{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 5000,
  "global_step": 855981,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0,
      "eval_loss": 1.2852673530578613,
      "eval_runtime": 554.3766,
      "eval_samples_per_second": 686.241,
      "eval_steps_per_second": 57.187,
      "step": 0
    },
    {
      "epoch": 3.5047506895596982e-06,
      "grad_norm": 4.8125,
      "learning_rate": 0.0,
      "loss": 1.4749,
      "step": 1
    },
    {
      "epoch": 3.504750689559698e-05,
      "grad_norm": 4.96875,
      "learning_rate": 5.257070760172432e-09,
      "loss": 1.4029,
      "step": 10
    },
    {
      "epoch": 7.009501379119396e-05,
      "grad_norm": 4.28125,
      "learning_rate": 1.1098260493697357e-08,
      "loss": 1.3243,
      "step": 20
    },
    {
      "epoch": 0.00010514252068679094,
      "grad_norm": 4.84375,
      "learning_rate": 1.693945022722228e-08,
      "loss": 1.3497,
      "step": 30
    },
    {
      "epoch": 0.00014019002758238792,
      "grad_norm": 4.5625,
      "learning_rate": 2.2780639960747208e-08,
      "loss": 1.3672,
      "step": 40
    },
    {
      "epoch": 0.00017523753447798492,
      "grad_norm": 4.6875,
      "learning_rate": 2.8621829694272132e-08,
      "loss": 1.3467,
      "step": 50
    },
    {
      "epoch": 0.00021028504137358188,
      "grad_norm": 4.5,
      "learning_rate": 3.446301942779705e-08,
      "loss": 1.3571,
      "step": 60
    },
    {
      "epoch": 0.0002453325482691789,
      "grad_norm": 4.15625,
      "learning_rate": 4.0304209161321976e-08,
      "loss": 1.3505,
      "step": 70
    },
    {
      "epoch": 0.00028038005516477585,
      "grad_norm": 4.53125,
      "learning_rate": 4.614539889484691e-08,
      "loss": 1.3329,
      "step": 80
    },
    {
      "epoch": 0.0003154275620603728,
      "grad_norm": 5.09375,
      "learning_rate": 5.198658862837183e-08,
      "loss": 1.3821,
      "step": 90
    },
    {
      "epoch": 0.00035047506895596983,
      "grad_norm": 4.59375,
      "learning_rate": 5.7827778361896754e-08,
      "loss": 1.3854,
      "step": 100
    },
    {
      "epoch": 0.0003855225758515668,
      "grad_norm": 4.90625,
      "learning_rate": 6.366896809542168e-08,
      "loss": 1.3262,
      "step": 110
    },
    {
      "epoch": 0.00042057008274716377,
      "grad_norm": 4.625,
      "learning_rate": 6.95101578289466e-08,
      "loss": 1.457,
      "step": 120
    },
    {
      "epoch": 0.00045561758964276074,
      "grad_norm": 7.625,
      "learning_rate": 7.535134756247153e-08,
      "loss": 1.3772,
      "step": 130
    },
    {
      "epoch": 0.0004906650965383578,
      "grad_norm": 4.9375,
      "learning_rate": 8.119253729599645e-08,
      "loss": 1.3406,
      "step": 140
    },
    {
      "epoch": 0.0005257126034339547,
      "grad_norm": 4.625,
      "learning_rate": 8.703372702952137e-08,
      "loss": 1.4249,
      "step": 150
    },
    {
      "epoch": 0.0005607601103295517,
      "grad_norm": 5.0625,
      "learning_rate": 9.28749167630463e-08,
      "loss": 1.4004,
      "step": 160
    },
    {
      "epoch": 0.0005958076172251487,
      "grad_norm": 4.59375,
      "learning_rate": 9.871610649657122e-08,
      "loss": 1.4337,
      "step": 170
    },
    {
      "epoch": 0.0006308551241207456,
      "grad_norm": 5.0,
      "learning_rate": 1.0455729623009615e-07,
      "loss": 1.4401,
      "step": 180
    },
    {
      "epoch": 0.0006659026310163427,
      "grad_norm": 4.53125,
      "learning_rate": 1.1039848596362107e-07,
      "loss": 1.3536,
      "step": 190
    },
    {
      "epoch": 0.0007009501379119397,
      "grad_norm": 4.5625,
      "learning_rate": 1.16239675697146e-07,
      "loss": 1.3119,
      "step": 200
    },
    {
      "epoch": 0.0007359976448075366,
      "grad_norm": 5.5,
      "learning_rate": 1.2208086543067092e-07,
      "loss": 1.359,
      "step": 210
    },
    {
      "epoch": 0.0007710451517031336,
      "grad_norm": 4.625,
      "learning_rate": 1.2792205516419584e-07,
      "loss": 1.3812,
      "step": 220
    },
    {
      "epoch": 0.0008060926585987306,
      "grad_norm": 4.21875,
      "learning_rate": 1.3376324489772077e-07,
      "loss": 1.2395,
      "step": 230
    },
    {
      "epoch": 0.0008411401654943275,
      "grad_norm": 5.15625,
      "learning_rate": 1.396044346312457e-07,
      "loss": 1.4554,
      "step": 240
    },
    {
      "epoch": 0.0008761876723899245,
      "grad_norm": 4.8125,
      "learning_rate": 1.4544562436477064e-07,
      "loss": 1.4196,
      "step": 250
    },
    {
      "epoch": 0.0009112351792855215,
      "grad_norm": 4.375,
      "learning_rate": 1.5128681409829556e-07,
      "loss": 1.3169,
      "step": 260
    },
    {
      "epoch": 0.0009462826861811185,
      "grad_norm": 4.34375,
      "learning_rate": 1.571280038318205e-07,
      "loss": 1.3657,
      "step": 270
    },
    {
      "epoch": 0.0009813301930767155,
      "grad_norm": 4.5625,
      "learning_rate": 1.629691935653454e-07,
      "loss": 1.4202,
      "step": 280
    },
    {
      "epoch": 0.0010163776999723125,
      "grad_norm": 5.0,
      "learning_rate": 1.6881038329887033e-07,
      "loss": 1.4188,
      "step": 290
    },
    {
      "epoch": 0.0010514252068679095,
      "grad_norm": 4.375,
      "learning_rate": 1.7465157303239526e-07,
      "loss": 1.3706,
      "step": 300
    },
    {
      "epoch": 0.0010864727137635064,
      "grad_norm": 4.59375,
      "learning_rate": 1.8049276276592016e-07,
      "loss": 1.3667,
      "step": 310
    },
    {
      "epoch": 0.0011215202206591034,
      "grad_norm": 4.21875,
      "learning_rate": 1.8633395249944508e-07,
      "loss": 1.3512,
      "step": 320
    },
    {
      "epoch": 0.0011565677275547004,
      "grad_norm": 4.75,
      "learning_rate": 1.9217514223297e-07,
      "loss": 1.3229,
      "step": 330
    },
    {
      "epoch": 0.0011916152344502973,
      "grad_norm": 3.71875,
      "learning_rate": 1.9801633196649495e-07,
      "loss": 1.2674,
      "step": 340
    },
    {
      "epoch": 0.0012266627413458943,
      "grad_norm": 4.5625,
      "learning_rate": 2.0385752170001988e-07,
      "loss": 1.4214,
      "step": 350
    },
    {
      "epoch": 0.0012617102482414913,
      "grad_norm": 4.75,
      "learning_rate": 2.096987114335448e-07,
      "loss": 1.2992,
      "step": 360
    },
    {
      "epoch": 0.0012967577551370882,
      "grad_norm": 4.53125,
      "learning_rate": 2.1553990116706973e-07,
      "loss": 1.465,
      "step": 370
    },
    {
      "epoch": 0.0013318052620326854,
      "grad_norm": 4.875,
      "learning_rate": 2.2138109090059462e-07,
      "loss": 1.3929,
      "step": 380
    },
    {
      "epoch": 0.0013668527689282824,
      "grad_norm": 4.28125,
      "learning_rate": 2.2722228063411957e-07,
      "loss": 1.2843,
      "step": 390
    },
    {
      "epoch": 0.0014019002758238793,
      "grad_norm": 4.40625,
      "learning_rate": 2.3306347036764447e-07,
      "loss": 1.3579,
      "step": 400
    },
    {
      "epoch": 0.0014369477827194763,
      "grad_norm": 4.84375,
      "learning_rate": 2.389046601011694e-07,
      "loss": 1.3937,
      "step": 410
    },
    {
      "epoch": 0.0014719952896150733,
      "grad_norm": 5.46875,
      "learning_rate": 2.447458498346943e-07,
      "loss": 1.2607,
      "step": 420
    },
    {
      "epoch": 0.0015070427965106702,
      "grad_norm": 4.53125,
      "learning_rate": 2.5058703956821927e-07,
      "loss": 1.3865,
      "step": 430
    },
    {
      "epoch": 0.0015420903034062672,
      "grad_norm": 4.1875,
      "learning_rate": 2.5642822930174417e-07,
      "loss": 1.3269,
      "step": 440
    },
    {
      "epoch": 0.0015771378103018642,
      "grad_norm": 4.6875,
      "learning_rate": 2.622694190352691e-07,
      "loss": 1.3974,
      "step": 450
    },
    {
      "epoch": 0.0016121853171974611,
      "grad_norm": 4.71875,
      "learning_rate": 2.68110608768794e-07,
      "loss": 1.3281,
      "step": 460
    },
    {
      "epoch": 0.0016472328240930581,
      "grad_norm": 4.4375,
      "learning_rate": 2.7395179850231897e-07,
      "loss": 1.3919,
      "step": 470
    },
    {
      "epoch": 0.001682280330988655,
      "grad_norm": 4.4375,
      "learning_rate": 2.7979298823584386e-07,
      "loss": 1.3301,
      "step": 480
    },
    {
      "epoch": 0.001717327837884252,
      "grad_norm": 4.6875,
      "learning_rate": 2.856341779693688e-07,
      "loss": 1.2537,
      "step": 490
    },
    {
      "epoch": 0.001752375344779849,
      "grad_norm": 4.15625,
      "learning_rate": 2.914753677028937e-07,
      "loss": 1.3503,
      "step": 500
    },
    {
      "epoch": 0.001787422851675446,
      "grad_norm": 4.1875,
      "learning_rate": 2.9731655743641866e-07,
      "loss": 1.2738,
      "step": 510
    },
    {
      "epoch": 0.001822470358571043,
      "grad_norm": 3.8125,
      "learning_rate": 3.031577471699436e-07,
      "loss": 1.2832,
      "step": 520
    },
    {
      "epoch": 0.0018575178654666401,
      "grad_norm": 5.53125,
      "learning_rate": 3.089989369034685e-07,
      "loss": 1.3249,
      "step": 530
    },
    {
      "epoch": 0.001892565372362237,
      "grad_norm": 4.375,
      "learning_rate": 3.1484012663699346e-07,
      "loss": 1.3928,
      "step": 540
    },
    {
      "epoch": 0.001927612879257834,
      "grad_norm": 4.40625,
      "learning_rate": 3.2068131637051836e-07,
      "loss": 1.2796,
      "step": 550
    },
    {
      "epoch": 0.001962660386153431,
      "grad_norm": 4.375,
      "learning_rate": 3.265225061040433e-07,
      "loss": 1.3563,
      "step": 560
    },
    {
      "epoch": 0.001997707893049028,
      "grad_norm": 4.25,
      "learning_rate": 3.323636958375682e-07,
      "loss": 1.2643,
      "step": 570
    },
    {
      "epoch": 0.002032755399944625,
      "grad_norm": 4.1875,
      "learning_rate": 3.3820488557109315e-07,
      "loss": 1.2355,
      "step": 580
    },
    {
      "epoch": 0.002067802906840222,
      "grad_norm": 4.28125,
      "learning_rate": 3.4404607530461805e-07,
      "loss": 1.2893,
      "step": 590
    },
    {
      "epoch": 0.002102850413735819,
      "grad_norm": 3.890625,
      "learning_rate": 3.49887265038143e-07,
      "loss": 1.3887,
      "step": 600
    },
    {
      "epoch": 0.002137897920631416,
      "grad_norm": 3.96875,
      "learning_rate": 3.557284547716679e-07,
      "loss": 1.3485,
      "step": 610
    },
    {
      "epoch": 0.002172945427527013,
      "grad_norm": 4.21875,
      "learning_rate": 3.6156964450519285e-07,
      "loss": 1.3813,
      "step": 620
    },
    {
      "epoch": 0.00220799293442261,
      "grad_norm": 4.46875,
      "learning_rate": 3.6741083423871775e-07,
      "loss": 1.4347,
      "step": 630
    },
    {
      "epoch": 0.0022430404413182068,
      "grad_norm": 4.28125,
      "learning_rate": 3.732520239722427e-07,
      "loss": 1.3492,
      "step": 640
    },
    {
      "epoch": 0.0022780879482138037,
      "grad_norm": 4.09375,
      "learning_rate": 3.790932137057676e-07,
      "loss": 1.2706,
      "step": 650
    },
    {
      "epoch": 0.0023131354551094007,
      "grad_norm": 4.0625,
      "learning_rate": 3.8493440343929255e-07,
      "loss": 1.4125,
      "step": 660
    },
    {
      "epoch": 0.0023481829620049977,
      "grad_norm": 4.09375,
      "learning_rate": 3.9077559317281744e-07,
      "loss": 1.2254,
      "step": 670
    },
    {
      "epoch": 0.0023832304689005946,
      "grad_norm": 4.65625,
      "learning_rate": 3.9661678290634234e-07,
      "loss": 1.3532,
      "step": 680
    },
    {
      "epoch": 0.0024182779757961916,
      "grad_norm": 3.703125,
      "learning_rate": 4.024579726398673e-07,
      "loss": 1.2794,
      "step": 690
    },
    {
      "epoch": 0.0024533254826917886,
      "grad_norm": 3.859375,
      "learning_rate": 4.0829916237339224e-07,
      "loss": 1.339,
      "step": 700
    },
    {
      "epoch": 0.0024883729895873855,
      "grad_norm": 4.25,
      "learning_rate": 4.1414035210691714e-07,
      "loss": 1.3245,
      "step": 710
    },
    {
      "epoch": 0.0025234204964829825,
      "grad_norm": 4.21875,
      "learning_rate": 4.199815418404421e-07,
      "loss": 1.2796,
      "step": 720
    },
    {
      "epoch": 0.0025584680033785795,
      "grad_norm": 3.9375,
      "learning_rate": 4.25822731573967e-07,
      "loss": 1.2798,
      "step": 730
    },
    {
      "epoch": 0.0025935155102741764,
      "grad_norm": 4.21875,
      "learning_rate": 4.3166392130749194e-07,
      "loss": 1.3004,
      "step": 740
    },
    {
      "epoch": 0.002628563017169774,
      "grad_norm": 4.0,
      "learning_rate": 4.375051110410169e-07,
      "loss": 1.2494,
      "step": 750
    },
    {
      "epoch": 0.002663610524065371,
      "grad_norm": 3.640625,
      "learning_rate": 4.433463007745418e-07,
      "loss": 1.3012,
      "step": 760
    },
    {
      "epoch": 0.0026986580309609678,
      "grad_norm": 3.921875,
      "learning_rate": 4.491874905080667e-07,
      "loss": 1.2445,
      "step": 770
    },
    {
      "epoch": 0.0027337055378565647,
      "grad_norm": 3.953125,
      "learning_rate": 4.550286802415917e-07,
      "loss": 1.2227,
      "step": 780
    },
    {
      "epoch": 0.0027687530447521617,
      "grad_norm": 4.0625,
      "learning_rate": 4.608698699751166e-07,
      "loss": 1.2987,
      "step": 790
    },
    {
      "epoch": 0.0028038005516477587,
      "grad_norm": 4.625,
      "learning_rate": 4.667110597086415e-07,
      "loss": 1.2869,
      "step": 800
    },
    {
      "epoch": 0.0028388480585433556,
      "grad_norm": 4.5625,
      "learning_rate": 4.725522494421664e-07,
      "loss": 1.3331,
      "step": 810
    },
    {
      "epoch": 0.0028738955654389526,
      "grad_norm": 4.28125,
      "learning_rate": 4.783934391756913e-07,
      "loss": 1.1956,
      "step": 820
    },
    {
      "epoch": 0.0029089430723345496,
      "grad_norm": 4.15625,
      "learning_rate": 4.842346289092163e-07,
      "loss": 1.392,
      "step": 830
    },
    {
      "epoch": 0.0029439905792301465,
      "grad_norm": 4.09375,
      "learning_rate": 4.900758186427412e-07,
      "loss": 1.3471,
      "step": 840
    },
    {
      "epoch": 0.0029790380861257435,
      "grad_norm": 4.9375,
      "learning_rate": 4.959170083762661e-07,
      "loss": 1.3204,
      "step": 850
    },
    {
      "epoch": 0.0030140855930213405,
      "grad_norm": 3.9375,
      "learning_rate": 5.01758198109791e-07,
      "loss": 1.3121,
      "step": 860
    },
    {
      "epoch": 0.0030491330999169374,
      "grad_norm": 4.71875,
      "learning_rate": 5.07599387843316e-07,
      "loss": 1.2584,
      "step": 870
    },
    {
      "epoch": 0.0030841806068125344,
      "grad_norm": 3.984375,
      "learning_rate": 5.134405775768409e-07,
      "loss": 1.2494,
      "step": 880
    },
    {
      "epoch": 0.0031192281137081314,
      "grad_norm": 4.1875,
      "learning_rate": 5.192817673103658e-07,
      "loss": 1.3484,
      "step": 890
    },
    {
      "epoch": 0.0031542756206037284,
      "grad_norm": 4.1875,
      "learning_rate": 5.251229570438907e-07,
      "loss": 1.3381,
      "step": 900
    },
    {
      "epoch": 0.0031893231274993253,
      "grad_norm": 4.15625,
      "learning_rate": 5.309641467774157e-07,
      "loss": 1.2396,
      "step": 910
    },
    {
      "epoch": 0.0032243706343949223,
      "grad_norm": 4.125,
      "learning_rate": 5.368053365109406e-07,
      "loss": 1.2685,
      "step": 920
    },
    {
      "epoch": 0.0032594181412905193,
      "grad_norm": 4.0,
      "learning_rate": 5.426465262444655e-07,
      "loss": 1.3526,
      "step": 930
    },
    {
      "epoch": 0.0032944656481861162,
      "grad_norm": 3.875,
      "learning_rate": 5.484877159779904e-07,
      "loss": 1.3846,
      "step": 940
    },
    {
      "epoch": 0.003329513155081713,
      "grad_norm": 3.890625,
      "learning_rate": 5.543289057115154e-07,
      "loss": 1.2707,
      "step": 950
    },
    {
      "epoch": 0.00336456066197731,
      "grad_norm": 3.890625,
      "learning_rate": 5.601700954450403e-07,
      "loss": 1.2836,
      "step": 960
    },
    {
      "epoch": 0.003399608168872907,
      "grad_norm": 3.984375,
      "learning_rate": 5.660112851785652e-07,
      "loss": 1.2548,
      "step": 970
    },
    {
      "epoch": 0.003434655675768504,
      "grad_norm": 4.15625,
      "learning_rate": 5.718524749120901e-07,
      "loss": 1.3702,
      "step": 980
    },
    {
      "epoch": 0.003469703182664101,
      "grad_norm": 4.21875,
      "learning_rate": 5.77693664645615e-07,
      "loss": 1.2847,
      "step": 990
    },
    {
      "epoch": 0.003504750689559698,
      "grad_norm": 4.15625,
      "learning_rate": 5.8353485437914e-07,
      "loss": 1.2682,
      "step": 1000
    },
    {
      "epoch": 0.003539798196455295,
      "grad_norm": 3.796875,
      "learning_rate": 5.893760441126649e-07,
      "loss": 1.2876,
      "step": 1010
    },
    {
      "epoch": 0.003574845703350892,
      "grad_norm": 5.09375,
      "learning_rate": 5.952172338461898e-07,
      "loss": 1.3502,
      "step": 1020
    },
    {
      "epoch": 0.003609893210246489,
      "grad_norm": 3.9375,
      "learning_rate": 6.010584235797148e-07,
      "loss": 1.3314,
      "step": 1030
    },
    {
      "epoch": 0.003644940717142086,
      "grad_norm": 3.984375,
      "learning_rate": 6.068996133132397e-07,
      "loss": 1.228,
      "step": 1040
    },
    {
      "epoch": 0.0036799882240376833,
      "grad_norm": 4.0625,
      "learning_rate": 6.127408030467647e-07,
      "loss": 1.3284,
      "step": 1050
    },
    {
      "epoch": 0.0037150357309332803,
      "grad_norm": 4.53125,
      "learning_rate": 6.185819927802895e-07,
      "loss": 1.3703,
      "step": 1060
    },
    {
      "epoch": 0.0037500832378288772,
      "grad_norm": 4.34375,
      "learning_rate": 6.244231825138145e-07,
      "loss": 1.19,
      "step": 1070
    },
    {
      "epoch": 0.003785130744724474,
      "grad_norm": 3.78125,
      "learning_rate": 6.302643722473394e-07,
      "loss": 1.2524,
      "step": 1080
    },
    {
      "epoch": 0.003820178251620071,
      "grad_norm": 3.890625,
      "learning_rate": 6.361055619808644e-07,
      "loss": 1.2958,
      "step": 1090
    },
    {
      "epoch": 0.003855225758515668,
      "grad_norm": 3.921875,
      "learning_rate": 6.419467517143892e-07,
      "loss": 1.2665,
      "step": 1100
    },
    {
      "epoch": 0.003890273265411265,
      "grad_norm": 3.734375,
      "learning_rate": 6.477879414479141e-07,
      "loss": 1.252,
      "step": 1110
    },
    {
      "epoch": 0.003925320772306862,
      "grad_norm": 3.875,
      "learning_rate": 6.536291311814391e-07,
      "loss": 1.3682,
      "step": 1120
    },
    {
      "epoch": 0.003960368279202459,
      "grad_norm": 4.5,
      "learning_rate": 6.59470320914964e-07,
      "loss": 1.2403,
      "step": 1130
    },
    {
      "epoch": 0.003995415786098056,
      "grad_norm": 4.5625,
      "learning_rate": 6.653115106484889e-07,
      "loss": 1.3019,
      "step": 1140
    },
    {
      "epoch": 0.0040304632929936525,
      "grad_norm": 4.09375,
      "learning_rate": 6.711527003820138e-07,
      "loss": 1.2779,
      "step": 1150
    },
    {
      "epoch": 0.00406551079988925,
      "grad_norm": 4.03125,
      "learning_rate": 6.769938901155387e-07,
      "loss": 1.3199,
      "step": 1160
    },
    {
      "epoch": 0.0041005583067848465,
      "grad_norm": 3.640625,
      "learning_rate": 6.828350798490637e-07,
      "loss": 1.2444,
      "step": 1170
    },
    {
      "epoch": 0.004135605813680444,
      "grad_norm": 3.78125,
      "learning_rate": 6.886762695825886e-07,
      "loss": 1.2411,
      "step": 1180
    },
    {
      "epoch": 0.00417065332057604,
      "grad_norm": 3.609375,
      "learning_rate": 6.945174593161135e-07,
      "loss": 1.3368,
      "step": 1190
    },
    {
      "epoch": 0.004205700827471638,
      "grad_norm": 4.1875,
      "learning_rate": 7.003586490496384e-07,
      "loss": 1.2768,
      "step": 1200
    },
    {
      "epoch": 0.004240748334367235,
      "grad_norm": 3.953125,
      "learning_rate": 7.061998387831634e-07,
      "loss": 1.2358,
      "step": 1210
    },
    {
      "epoch": 0.004275795841262832,
      "grad_norm": 3.984375,
      "learning_rate": 7.120410285166883e-07,
      "loss": 1.2483,
      "step": 1220
    },
    {
      "epoch": 0.004310843348158429,
      "grad_norm": 3.515625,
      "learning_rate": 7.178822182502132e-07,
      "loss": 1.2972,
      "step": 1230
    },
    {
      "epoch": 0.004345890855054026,
      "grad_norm": 4.34375,
      "learning_rate": 7.237234079837382e-07,
      "loss": 1.2439,
      "step": 1240
    },
    {
      "epoch": 0.004380938361949623,
      "grad_norm": 4.25,
      "learning_rate": 7.295645977172631e-07,
      "loss": 1.325,
      "step": 1250
    },
    {
      "epoch": 0.00441598586884522,
      "grad_norm": 3.8125,
      "learning_rate": 7.35405787450788e-07,
      "loss": 1.3036,
      "step": 1260
    },
    {
      "epoch": 0.004451033375740817,
      "grad_norm": 4.21875,
      "learning_rate": 7.412469771843129e-07,
      "loss": 1.3311,
      "step": 1270
    },
    {
      "epoch": 0.0044860808826364135,
      "grad_norm": 3.375,
      "learning_rate": 7.470881669178379e-07,
      "loss": 1.2239,
      "step": 1280
    },
    {
      "epoch": 0.004521128389532011,
      "grad_norm": 4.125,
      "learning_rate": 7.529293566513627e-07,
      "loss": 1.2785,
      "step": 1290
    },
    {
      "epoch": 0.0045561758964276075,
      "grad_norm": 3.875,
      "learning_rate": 7.587705463848878e-07,
      "loss": 1.2234,
      "step": 1300
    },
    {
      "epoch": 0.004591223403323205,
      "grad_norm": 4.125,
      "learning_rate": 7.646117361184126e-07,
      "loss": 1.309,
      "step": 1310
    },
    {
      "epoch": 0.004626270910218801,
      "grad_norm": 4.5625,
      "learning_rate": 7.704529258519376e-07,
      "loss": 1.2604,
      "step": 1320
    },
    {
      "epoch": 0.004661318417114399,
      "grad_norm": 3.875,
      "learning_rate": 7.762941155854624e-07,
      "loss": 1.2916,
      "step": 1330
    },
    {
      "epoch": 0.004696365924009995,
      "grad_norm": 3.984375,
      "learning_rate": 7.821353053189875e-07,
      "loss": 1.2586,
      "step": 1340
    },
    {
      "epoch": 0.004731413430905593,
      "grad_norm": 4.4375,
      "learning_rate": 7.879764950525123e-07,
      "loss": 1.3005,
      "step": 1350
    },
    {
      "epoch": 0.004766460937801189,
      "grad_norm": 4.0625,
      "learning_rate": 7.938176847860373e-07,
      "loss": 1.348,
      "step": 1360
    },
    {
      "epoch": 0.004801508444696787,
      "grad_norm": 3.578125,
      "learning_rate": 7.996588745195621e-07,
      "loss": 1.2855,
      "step": 1370
    },
    {
      "epoch": 0.004836555951592383,
      "grad_norm": 4.46875,
      "learning_rate": 8.055000642530871e-07,
      "loss": 1.3322,
      "step": 1380
    },
    {
      "epoch": 0.004871603458487981,
      "grad_norm": 4.625,
      "learning_rate": 8.113412539866119e-07,
      "loss": 1.2644,
      "step": 1390
    },
    {
      "epoch": 0.004906650965383577,
      "grad_norm": 3.828125,
      "learning_rate": 8.171824437201371e-07,
      "loss": 1.2613,
      "step": 1400
    },
    {
      "epoch": 0.0049416984722791745,
      "grad_norm": 4.125,
      "learning_rate": 8.230236334536619e-07,
      "loss": 1.3236,
      "step": 1410
    },
    {
      "epoch": 0.004976745979174771,
      "grad_norm": 4.28125,
      "learning_rate": 8.288648231871869e-07,
      "loss": 1.2956,
      "step": 1420
    },
    {
      "epoch": 0.0050117934860703685,
      "grad_norm": 4.0625,
      "learning_rate": 8.347060129207117e-07,
      "loss": 1.2378,
      "step": 1430
    },
    {
      "epoch": 0.005046840992965965,
      "grad_norm": 3.9375,
      "learning_rate": 8.405472026542367e-07,
      "loss": 1.2719,
      "step": 1440
    },
    {
      "epoch": 0.005081888499861562,
      "grad_norm": 4.0,
      "learning_rate": 8.463883923877616e-07,
      "loss": 1.1944,
      "step": 1450
    },
    {
      "epoch": 0.005116936006757159,
      "grad_norm": 4.03125,
      "learning_rate": 8.522295821212865e-07,
      "loss": 1.3122,
      "step": 1460
    },
    {
      "epoch": 0.005151983513652756,
      "grad_norm": 4.09375,
      "learning_rate": 8.580707718548114e-07,
      "loss": 1.1991,
      "step": 1470
    },
    {
      "epoch": 0.005187031020548353,
      "grad_norm": 4.25,
      "learning_rate": 8.639119615883363e-07,
      "loss": 1.2985,
      "step": 1480
    },
    {
      "epoch": 0.00522207852744395,
      "grad_norm": 4.28125,
      "learning_rate": 8.697531513218613e-07,
      "loss": 1.259,
      "step": 1490
    },
    {
      "epoch": 0.005257126034339548,
      "grad_norm": 4.09375,
      "learning_rate": 8.755943410553863e-07,
      "loss": 1.2906,
      "step": 1500
    },
    {
      "epoch": 0.005292173541235144,
      "grad_norm": 4.03125,
      "learning_rate": 8.814355307889112e-07,
      "loss": 1.3169,
      "step": 1510
    },
    {
      "epoch": 0.005327221048130742,
      "grad_norm": 4.0625,
      "learning_rate": 8.872767205224361e-07,
      "loss": 1.3387,
      "step": 1520
    },
    {
      "epoch": 0.005362268555026338,
      "grad_norm": 4.34375,
      "learning_rate": 8.93117910255961e-07,
      "loss": 1.2484,
      "step": 1530
    },
    {
      "epoch": 0.0053973160619219356,
      "grad_norm": 4.1875,
      "learning_rate": 8.989590999894858e-07,
      "loss": 1.2923,
      "step": 1540
    },
    {
      "epoch": 0.005432363568817532,
      "grad_norm": 3.984375,
      "learning_rate": 9.048002897230108e-07,
      "loss": 1.374,
      "step": 1550
    },
    {
      "epoch": 0.0054674110757131295,
      "grad_norm": 3.609375,
      "learning_rate": 9.106414794565356e-07,
      "loss": 1.2217,
      "step": 1560
    },
    {
      "epoch": 0.005502458582608726,
      "grad_norm": 3.75,
      "learning_rate": 9.164826691900608e-07,
      "loss": 1.3026,
      "step": 1570
    },
    {
      "epoch": 0.005537506089504323,
      "grad_norm": 3.984375,
      "learning_rate": 9.223238589235856e-07,
      "loss": 1.2189,
      "step": 1580
    },
    {
      "epoch": 0.00557255359639992,
      "grad_norm": 3.9375,
      "learning_rate": 9.281650486571106e-07,
      "loss": 1.2333,
      "step": 1590
    },
    {
      "epoch": 0.005607601103295517,
      "grad_norm": 3.15625,
      "learning_rate": 9.340062383906354e-07,
      "loss": 1.2729,
      "step": 1600
    },
    {
      "epoch": 0.005642648610191114,
      "grad_norm": 4.03125,
      "learning_rate": 9.398474281241604e-07,
      "loss": 1.3141,
      "step": 1610
    },
    {
      "epoch": 0.005677696117086711,
      "grad_norm": 3.9375,
      "learning_rate": 9.456886178576852e-07,
      "loss": 1.2533,
      "step": 1620
    },
    {
      "epoch": 0.005712743623982308,
      "grad_norm": 4.375,
      "learning_rate": 9.515298075912102e-07,
      "loss": 1.2692,
      "step": 1630
    },
    {
      "epoch": 0.005747791130877905,
      "grad_norm": 4.125,
      "learning_rate": 9.573709973247351e-07,
      "loss": 1.2788,
      "step": 1640
    },
    {
      "epoch": 0.005782838637773502,
      "grad_norm": 4.0,
      "learning_rate": 9.6321218705826e-07,
      "loss": 1.2277,
      "step": 1650
    },
    {
      "epoch": 0.005817886144669099,
      "grad_norm": 3.84375,
      "learning_rate": 9.69053376791785e-07,
      "loss": 1.2383,
      "step": 1660
    },
    {
      "epoch": 0.005852933651564696,
      "grad_norm": 3.9375,
      "learning_rate": 9.748945665253099e-07,
      "loss": 1.2382,
      "step": 1670
    },
    {
      "epoch": 0.005887981158460293,
      "grad_norm": 4.0,
      "learning_rate": 9.80735756258835e-07,
      "loss": 1.2902,
      "step": 1680
    },
    {
      "epoch": 0.00592302866535589,
      "grad_norm": 3.90625,
      "learning_rate": 9.865769459923598e-07,
      "loss": 1.3712,
      "step": 1690
    },
    {
      "epoch": 0.005958076172251487,
      "grad_norm": 3.953125,
      "learning_rate": 9.924181357258846e-07,
      "loss": 1.2482,
      "step": 1700
    },
    {
      "epoch": 0.0059931236791470836,
      "grad_norm": 3.5625,
      "learning_rate": 9.982593254594097e-07,
      "loss": 1.1942,
      "step": 1710
    },
    {
      "epoch": 0.006028171186042681,
      "grad_norm": 3.671875,
      "learning_rate": 1.0041005151929345e-06,
      "loss": 1.2739,
      "step": 1720
    },
    {
      "epoch": 0.0060632186929382775,
      "grad_norm": 3.65625,
      "learning_rate": 1.0099417049264594e-06,
      "loss": 1.2746,
      "step": 1730
    },
    {
      "epoch": 0.006098266199833875,
      "grad_norm": 3.609375,
      "learning_rate": 1.0157828946599842e-06,
      "loss": 1.2221,
      "step": 1740
    },
    {
      "epoch": 0.006133313706729471,
      "grad_norm": 4.03125,
      "learning_rate": 1.0216240843935095e-06,
      "loss": 1.2841,
      "step": 1750
    },
    {
      "epoch": 0.006168361213625069,
      "grad_norm": 3.703125,
      "learning_rate": 1.0274652741270343e-06,
      "loss": 1.2398,
      "step": 1760
    },
    {
      "epoch": 0.006203408720520665,
      "grad_norm": 3.890625,
      "learning_rate": 1.0333064638605592e-06,
      "loss": 1.3061,
      "step": 1770
    },
    {
      "epoch": 0.006238456227416263,
      "grad_norm": 4.4375,
      "learning_rate": 1.039147653594084e-06,
      "loss": 1.3077,
      "step": 1780
    },
    {
      "epoch": 0.006273503734311859,
      "grad_norm": 3.90625,
      "learning_rate": 1.044988843327609e-06,
      "loss": 1.2418,
      "step": 1790
    },
    {
      "epoch": 0.006308551241207457,
      "grad_norm": 4.0625,
      "learning_rate": 1.050830033061134e-06,
      "loss": 1.2803,
      "step": 1800
    },
    {
      "epoch": 0.006343598748103054,
      "grad_norm": 4.125,
      "learning_rate": 1.0566712227946588e-06,
      "loss": 1.2755,
      "step": 1810
    },
    {
      "epoch": 0.006378646254998651,
      "grad_norm": 4.34375,
      "learning_rate": 1.0625124125281838e-06,
      "loss": 1.2358,
      "step": 1820
    },
    {
      "epoch": 0.006413693761894248,
      "grad_norm": 3.71875,
      "learning_rate": 1.0683536022617089e-06,
      "loss": 1.3115,
      "step": 1830
    },
    {
      "epoch": 0.006448741268789845,
      "grad_norm": 3.9375,
      "learning_rate": 1.0741947919952337e-06,
      "loss": 1.2324,
      "step": 1840
    },
    {
      "epoch": 0.006483788775685442,
      "grad_norm": 4.1875,
      "learning_rate": 1.0800359817287586e-06,
      "loss": 1.2848,
      "step": 1850
    },
    {
      "epoch": 0.0065188362825810385,
      "grad_norm": 3.90625,
      "learning_rate": 1.0858771714622834e-06,
      "loss": 1.3072,
      "step": 1860
    },
    {
      "epoch": 0.006553883789476636,
      "grad_norm": 3.8125,
      "learning_rate": 1.0917183611958085e-06,
      "loss": 1.2629,
      "step": 1870
    },
    {
      "epoch": 0.0065889312963722324,
      "grad_norm": 4.125,
      "learning_rate": 1.0975595509293333e-06,
      "loss": 1.1718,
      "step": 1880
    },
    {
      "epoch": 0.00662397880326783,
      "grad_norm": 3.828125,
      "learning_rate": 1.1034007406628582e-06,
      "loss": 1.2755,
      "step": 1890
    },
    {
      "epoch": 0.006659026310163426,
      "grad_norm": 4.375,
      "learning_rate": 1.1092419303963832e-06,
      "loss": 1.3185,
      "step": 1900
    },
    {
      "epoch": 0.006694073817059024,
      "grad_norm": 4.0,
      "learning_rate": 1.115083120129908e-06,
      "loss": 1.2396,
      "step": 1910
    },
    {
      "epoch": 0.00672912132395462,
      "grad_norm": 3.53125,
      "learning_rate": 1.1209243098634331e-06,
      "loss": 1.2945,
      "step": 1920
    },
    {
      "epoch": 0.006764168830850218,
      "grad_norm": 3.78125,
      "learning_rate": 1.126765499596958e-06,
      "loss": 1.2563,
      "step": 1930
    },
    {
      "epoch": 0.006799216337745814,
      "grad_norm": 4.125,
      "learning_rate": 1.132606689330483e-06,
      "loss": 1.27,
      "step": 1940
    },
    {
      "epoch": 0.006834263844641412,
      "grad_norm": 4.09375,
      "learning_rate": 1.1384478790640079e-06,
      "loss": 1.291,
      "step": 1950
    },
    {
      "epoch": 0.006869311351537008,
      "grad_norm": 5.125,
      "learning_rate": 1.1442890687975327e-06,
      "loss": 1.2212,
      "step": 1960
    },
    {
      "epoch": 0.006904358858432606,
      "grad_norm": 4.125,
      "learning_rate": 1.1501302585310575e-06,
      "loss": 1.2854,
      "step": 1970
    },
    {
      "epoch": 0.006939406365328202,
      "grad_norm": 3.84375,
      "learning_rate": 1.1559714482645826e-06,
      "loss": 1.2214,
      "step": 1980
    },
    {
      "epoch": 0.0069744538722237995,
      "grad_norm": 3.90625,
      "learning_rate": 1.1618126379981074e-06,
      "loss": 1.182,
      "step": 1990
    },
    {
      "epoch": 0.007009501379119396,
      "grad_norm": 3.84375,
      "learning_rate": 1.1676538277316325e-06,
      "loss": 1.1865,
      "step": 2000
    },
    {
      "epoch": 0.0070445488860149934,
      "grad_norm": 4.15625,
      "learning_rate": 1.1734950174651574e-06,
      "loss": 1.2276,
      "step": 2010
    },
    {
      "epoch": 0.00707959639291059,
      "grad_norm": 4.78125,
      "learning_rate": 1.1793362071986824e-06,
      "loss": 1.182,
      "step": 2020
    },
    {
      "epoch": 0.007114643899806187,
      "grad_norm": 3.8125,
      "learning_rate": 1.1851773969322073e-06,
      "loss": 1.208,
      "step": 2030
    },
    {
      "epoch": 0.007149691406701784,
      "grad_norm": 3.46875,
      "learning_rate": 1.191018586665732e-06,
      "loss": 1.2976,
      "step": 2040
    },
    {
      "epoch": 0.007184738913597381,
      "grad_norm": 4.125,
      "learning_rate": 1.196859776399257e-06,
      "loss": 1.1951,
      "step": 2050
    },
    {
      "epoch": 0.007219786420492978,
      "grad_norm": 3.578125,
      "learning_rate": 1.202700966132782e-06,
      "loss": 1.2523,
      "step": 2060
    },
    {
      "epoch": 0.007254833927388575,
      "grad_norm": 4.03125,
      "learning_rate": 1.2085421558663068e-06,
      "loss": 1.2906,
      "step": 2070
    },
    {
      "epoch": 0.007289881434284172,
      "grad_norm": 3.515625,
      "learning_rate": 1.2143833455998317e-06,
      "loss": 1.2772,
      "step": 2080
    },
    {
      "epoch": 0.007324928941179769,
      "grad_norm": 3.75,
      "learning_rate": 1.2202245353333567e-06,
      "loss": 1.2619,
      "step": 2090
    },
    {
      "epoch": 0.007359976448075367,
      "grad_norm": 3.625,
      "learning_rate": 1.2260657250668818e-06,
      "loss": 1.1864,
      "step": 2100
    },
    {
      "epoch": 0.007395023954970963,
      "grad_norm": 4.1875,
      "learning_rate": 1.2319069148004066e-06,
      "loss": 1.1677,
      "step": 2110
    },
    {
      "epoch": 0.0074300714618665605,
      "grad_norm": 4.65625,
      "learning_rate": 1.2377481045339315e-06,
      "loss": 1.3169,
      "step": 2120
    },
    {
      "epoch": 0.007465118968762157,
      "grad_norm": 3.71875,
      "learning_rate": 1.2435892942674565e-06,
      "loss": 1.185,
      "step": 2130
    },
    {
      "epoch": 0.0075001664756577545,
      "grad_norm": 3.96875,
      "learning_rate": 1.2494304840009814e-06,
      "loss": 1.2313,
      "step": 2140
    },
    {
      "epoch": 0.007535213982553351,
      "grad_norm": 3.90625,
      "learning_rate": 1.2552716737345062e-06,
      "loss": 1.2889,
      "step": 2150
    },
    {
      "epoch": 0.007570261489448948,
      "grad_norm": 3.671875,
      "learning_rate": 1.261112863468031e-06,
      "loss": 1.2634,
      "step": 2160
    },
    {
      "epoch": 0.007605308996344545,
      "grad_norm": 3.90625,
      "learning_rate": 1.2669540532015563e-06,
      "loss": 1.2542,
      "step": 2170
    },
    {
      "epoch": 0.007640356503240142,
      "grad_norm": 3.6875,
      "learning_rate": 1.2727952429350812e-06,
      "loss": 1.2247,
      "step": 2180
    },
    {
      "epoch": 0.007675404010135739,
      "grad_norm": 3.53125,
      "learning_rate": 1.278636432668606e-06,
      "loss": 1.1701,
      "step": 2190
    },
    {
      "epoch": 0.007710451517031336,
      "grad_norm": 3.78125,
      "learning_rate": 1.2844776224021309e-06,
      "loss": 1.1866,
      "step": 2200
    },
    {
      "epoch": 0.007745499023926933,
      "grad_norm": 3.703125,
      "learning_rate": 1.290318812135656e-06,
      "loss": 1.2506,
      "step": 2210
    },
    {
      "epoch": 0.00778054653082253,
      "grad_norm": 3.984375,
      "learning_rate": 1.2961600018691808e-06,
      "loss": 1.1811,
      "step": 2220
    },
    {
      "epoch": 0.007815594037718128,
      "grad_norm": 3.71875,
      "learning_rate": 1.3020011916027056e-06,
      "loss": 1.2965,
      "step": 2230
    },
    {
      "epoch": 0.007850641544613724,
      "grad_norm": 3.4375,
      "learning_rate": 1.3078423813362305e-06,
      "loss": 1.2175,
      "step": 2240
    },
    {
      "epoch": 0.00788568905150932,
      "grad_norm": 3.84375,
      "learning_rate": 1.3136835710697555e-06,
      "loss": 1.2238,
      "step": 2250
    },
    {
      "epoch": 0.007920736558404917,
      "grad_norm": 3.78125,
      "learning_rate": 1.3195247608032806e-06,
      "loss": 1.1872,
      "step": 2260
    },
    {
      "epoch": 0.007955784065300515,
      "grad_norm": 3.484375,
      "learning_rate": 1.3253659505368054e-06,
      "loss": 1.168,
      "step": 2270
    },
    {
      "epoch": 0.007990831572196112,
      "grad_norm": 3.8125,
      "learning_rate": 1.3312071402703303e-06,
      "loss": 1.1551,
      "step": 2280
    },
    {
      "epoch": 0.008025879079091709,
      "grad_norm": 3.5625,
      "learning_rate": 1.3370483300038553e-06,
      "loss": 1.2804,
      "step": 2290
    },
    {
      "epoch": 0.008060926585987305,
      "grad_norm": 4.3125,
      "learning_rate": 1.3428895197373802e-06,
      "loss": 1.3171,
      "step": 2300
    },
    {
      "epoch": 0.008095974092882903,
      "grad_norm": 3.453125,
      "learning_rate": 1.348730709470905e-06,
      "loss": 1.21,
      "step": 2310
    },
    {
      "epoch": 0.0081310215997785,
      "grad_norm": 3.75,
      "learning_rate": 1.35457189920443e-06,
      "loss": 1.2286,
      "step": 2320
    },
    {
      "epoch": 0.008166069106674096,
      "grad_norm": 3.703125,
      "learning_rate": 1.360413088937955e-06,
      "loss": 1.2133,
      "step": 2330
    },
    {
      "epoch": 0.008201116613569693,
      "grad_norm": 4.03125,
      "learning_rate": 1.3662542786714798e-06,
      "loss": 1.3554,
      "step": 2340
    },
    {
      "epoch": 0.008236164120465291,
      "grad_norm": 3.625,
      "learning_rate": 1.3720954684050048e-06,
      "loss": 1.2122,
      "step": 2350
    },
    {
      "epoch": 0.008271211627360888,
      "grad_norm": 3.796875,
      "learning_rate": 1.3779366581385299e-06,
      "loss": 1.3047,
      "step": 2360
    },
    {
      "epoch": 0.008306259134256484,
      "grad_norm": 4.25,
      "learning_rate": 1.3837778478720547e-06,
      "loss": 1.1766,
      "step": 2370
    },
    {
      "epoch": 0.00834130664115208,
      "grad_norm": 4.0,
      "learning_rate": 1.3896190376055796e-06,
      "loss": 1.2283,
      "step": 2380
    },
    {
      "epoch": 0.008376354148047679,
      "grad_norm": 5.15625,
      "learning_rate": 1.3954602273391044e-06,
      "loss": 1.2715,
      "step": 2390
    },
    {
      "epoch": 0.008411401654943276,
      "grad_norm": 4.34375,
      "learning_rate": 1.4013014170726295e-06,
      "loss": 1.2689,
      "step": 2400
    },
    {
      "epoch": 0.008446449161838872,
      "grad_norm": 3.59375,
      "learning_rate": 1.4071426068061543e-06,
      "loss": 1.257,
      "step": 2410
    },
    {
      "epoch": 0.00848149666873447,
      "grad_norm": 3.6875,
      "learning_rate": 1.4129837965396792e-06,
      "loss": 1.2295,
      "step": 2420
    },
    {
      "epoch": 0.008516544175630067,
      "grad_norm": 4.4375,
      "learning_rate": 1.4188249862732042e-06,
      "loss": 1.3149,
      "step": 2430
    },
    {
      "epoch": 0.008551591682525663,
      "grad_norm": 3.984375,
      "learning_rate": 1.4246661760067293e-06,
      "loss": 1.2018,
      "step": 2440
    },
    {
      "epoch": 0.00858663918942126,
      "grad_norm": 4.03125,
      "learning_rate": 1.4305073657402541e-06,
      "loss": 1.1884,
      "step": 2450
    },
    {
      "epoch": 0.008621686696316858,
      "grad_norm": 3.75,
      "learning_rate": 1.436348555473779e-06,
      "loss": 1.2398,
      "step": 2460
    },
    {
      "epoch": 0.008656734203212455,
      "grad_norm": 3.609375,
      "learning_rate": 1.4421897452073038e-06,
      "loss": 1.3156,
      "step": 2470
    },
    {
      "epoch": 0.008691781710108051,
      "grad_norm": 3.625,
      "learning_rate": 1.4480309349408289e-06,
      "loss": 1.2216,
      "step": 2480
    },
    {
      "epoch": 0.008726829217003648,
      "grad_norm": 3.9375,
      "learning_rate": 1.4538721246743537e-06,
      "loss": 1.2751,
      "step": 2490
    },
    {
      "epoch": 0.008761876723899246,
      "grad_norm": 3.8125,
      "learning_rate": 1.4597133144078785e-06,
      "loss": 1.2715,
      "step": 2500
    },
    {
      "epoch": 0.008796924230794843,
      "grad_norm": 4.0,
      "learning_rate": 1.4655545041414036e-06,
      "loss": 1.2811,
      "step": 2510
    },
    {
      "epoch": 0.00883197173769044,
      "grad_norm": 3.96875,
      "learning_rate": 1.4713956938749287e-06,
      "loss": 1.2545,
      "step": 2520
    },
    {
      "epoch": 0.008867019244586036,
      "grad_norm": 3.59375,
      "learning_rate": 1.4772368836084535e-06,
      "loss": 1.1997,
      "step": 2530
    },
    {
      "epoch": 0.008902066751481634,
      "grad_norm": 4.15625,
      "learning_rate": 1.4830780733419783e-06,
      "loss": 1.2431,
      "step": 2540
    },
    {
      "epoch": 0.00893711425837723,
      "grad_norm": 3.90625,
      "learning_rate": 1.4889192630755034e-06,
      "loss": 1.199,
      "step": 2550
    },
    {
      "epoch": 0.008972161765272827,
      "grad_norm": 3.75,
      "learning_rate": 1.4947604528090282e-06,
      "loss": 1.2498,
      "step": 2560
    },
    {
      "epoch": 0.009007209272168424,
      "grad_norm": 3.90625,
      "learning_rate": 1.500601642542553e-06,
      "loss": 1.159,
      "step": 2570
    },
    {
      "epoch": 0.009042256779064022,
      "grad_norm": 3.78125,
      "learning_rate": 1.506442832276078e-06,
      "loss": 1.3202,
      "step": 2580
    },
    {
      "epoch": 0.009077304285959618,
      "grad_norm": 3.875,
      "learning_rate": 1.512284022009603e-06,
      "loss": 1.3833,
      "step": 2590
    },
    {
      "epoch": 0.009112351792855215,
      "grad_norm": 3.78125,
      "learning_rate": 1.518125211743128e-06,
      "loss": 1.2864,
      "step": 2600
    },
    {
      "epoch": 0.009147399299750811,
      "grad_norm": 3.90625,
      "learning_rate": 1.5239664014766529e-06,
      "loss": 1.1563,
      "step": 2610
    },
    {
      "epoch": 0.00918244680664641,
      "grad_norm": 3.78125,
      "learning_rate": 1.5298075912101777e-06,
      "loss": 1.3196,
      "step": 2620
    },
    {
      "epoch": 0.009217494313542006,
      "grad_norm": 3.59375,
      "learning_rate": 1.5356487809437028e-06,
      "loss": 1.2191,
      "step": 2630
    },
    {
      "epoch": 0.009252541820437603,
      "grad_norm": 4.21875,
      "learning_rate": 1.5414899706772276e-06,
      "loss": 1.268,
      "step": 2640
    },
    {
      "epoch": 0.0092875893273332,
      "grad_norm": 3.90625,
      "learning_rate": 1.5473311604107525e-06,
      "loss": 1.1647,
      "step": 2650
    },
    {
      "epoch": 0.009322636834228798,
      "grad_norm": 3.859375,
      "learning_rate": 1.5531723501442773e-06,
      "loss": 1.1709,
      "step": 2660
    },
    {
      "epoch": 0.009357684341124394,
      "grad_norm": 4.0,
      "learning_rate": 1.5590135398778024e-06,
      "loss": 1.2574,
      "step": 2670
    },
    {
      "epoch": 0.00939273184801999,
      "grad_norm": 3.5,
      "learning_rate": 1.5648547296113272e-06,
      "loss": 1.2796,
      "step": 2680
    },
    {
      "epoch": 0.009427779354915587,
      "grad_norm": 3.921875,
      "learning_rate": 1.570695919344852e-06,
      "loss": 1.1925,
      "step": 2690
    },
    {
      "epoch": 0.009462826861811185,
      "grad_norm": 4.09375,
      "learning_rate": 1.5765371090783771e-06,
      "loss": 1.2839,
      "step": 2700
    },
    {
      "epoch": 0.009497874368706782,
      "grad_norm": 4.1875,
      "learning_rate": 1.582378298811902e-06,
      "loss": 1.2527,
      "step": 2710
    },
    {
      "epoch": 0.009532921875602379,
      "grad_norm": 3.375,
      "learning_rate": 1.5882194885454268e-06,
      "loss": 1.2493,
      "step": 2720
    },
    {
      "epoch": 0.009567969382497977,
      "grad_norm": 3.234375,
      "learning_rate": 1.594060678278952e-06,
      "loss": 1.2062,
      "step": 2730
    },
    {
      "epoch": 0.009603016889393573,
      "grad_norm": 3.921875,
      "learning_rate": 1.599901868012477e-06,
      "loss": 1.2966,
      "step": 2740
    },
    {
      "epoch": 0.00963806439628917,
      "grad_norm": 3.984375,
      "learning_rate": 1.605743057746002e-06,
      "loss": 1.2175,
      "step": 2750
    },
    {
      "epoch": 0.009673111903184766,
      "grad_norm": 3.484375,
      "learning_rate": 1.6115842474795268e-06,
      "loss": 1.2375,
      "step": 2760
    },
    {
      "epoch": 0.009708159410080365,
      "grad_norm": 4.15625,
      "learning_rate": 1.6174254372130517e-06,
      "loss": 1.2873,
      "step": 2770
    },
    {
      "epoch": 0.009743206916975961,
      "grad_norm": 6.6875,
      "learning_rate": 1.6232666269465765e-06,
      "loss": 1.1151,
      "step": 2780
    },
    {
      "epoch": 0.009778254423871558,
      "grad_norm": 4.03125,
      "learning_rate": 1.6291078166801016e-06,
      "loss": 1.2868,
      "step": 2790
    },
    {
      "epoch": 0.009813301930767154,
      "grad_norm": 4.3125,
      "learning_rate": 1.6349490064136264e-06,
      "loss": 1.2032,
      "step": 2800
    },
    {
      "epoch": 0.009848349437662753,
      "grad_norm": 3.265625,
      "learning_rate": 1.6407901961471513e-06,
      "loss": 1.1874,
      "step": 2810
    },
    {
      "epoch": 0.009883396944558349,
      "grad_norm": 3.765625,
      "learning_rate": 1.6466313858806763e-06,
      "loss": 1.2579,
      "step": 2820
    },
    {
      "epoch": 0.009918444451453946,
      "grad_norm": 3.4375,
      "learning_rate": 1.6524725756142012e-06,
      "loss": 1.236,
      "step": 2830
    },
    {
      "epoch": 0.009953491958349542,
      "grad_norm": 3.921875,
      "learning_rate": 1.658313765347726e-06,
      "loss": 1.2084,
      "step": 2840
    },
    {
      "epoch": 0.00998853946524514,
      "grad_norm": 4.375,
      "learning_rate": 1.6641549550812509e-06,
      "loss": 1.2057,
      "step": 2850
    },
    {
      "epoch": 0.010023586972140737,
      "grad_norm": 4.53125,
      "learning_rate": 1.669996144814776e-06,
      "loss": 1.1973,
      "step": 2860
    },
    {
      "epoch": 0.010058634479036333,
      "grad_norm": 3.859375,
      "learning_rate": 1.6758373345483008e-06,
      "loss": 1.2453,
      "step": 2870
    },
    {
      "epoch": 0.01009368198593193,
      "grad_norm": 3.9375,
      "learning_rate": 1.6816785242818256e-06,
      "loss": 1.218,
      "step": 2880
    },
    {
      "epoch": 0.010128729492827528,
      "grad_norm": 3.390625,
      "learning_rate": 1.6875197140153507e-06,
      "loss": 1.1795,
      "step": 2890
    },
    {
      "epoch": 0.010163776999723125,
      "grad_norm": 4.4375,
      "learning_rate": 1.693360903748876e-06,
      "loss": 1.292,
      "step": 2900
    },
    {
      "epoch": 0.010198824506618721,
      "grad_norm": 3.5,
      "learning_rate": 1.6992020934824008e-06,
      "loss": 1.1494,
      "step": 2910
    },
    {
      "epoch": 0.010233872013514318,
      "grad_norm": 3.921875,
      "learning_rate": 1.7050432832159256e-06,
      "loss": 1.2038,
      "step": 2920
    },
    {
      "epoch": 0.010268919520409916,
      "grad_norm": 4.09375,
      "learning_rate": 1.7108844729494505e-06,
      "loss": 1.2235,
      "step": 2930
    },
    {
      "epoch": 0.010303967027305513,
      "grad_norm": 3.578125,
      "learning_rate": 1.7167256626829755e-06,
      "loss": 1.2316,
      "step": 2940
    },
    {
      "epoch": 0.01033901453420111,
      "grad_norm": 4.0,
      "learning_rate": 1.7225668524165004e-06,
      "loss": 1.262,
      "step": 2950
    },
    {
      "epoch": 0.010374062041096706,
      "grad_norm": 3.75,
      "learning_rate": 1.7284080421500252e-06,
      "loss": 1.173,
      "step": 2960
    },
    {
      "epoch": 0.010409109547992304,
      "grad_norm": 4.09375,
      "learning_rate": 1.7342492318835503e-06,
      "loss": 1.2467,
      "step": 2970
    },
    {
      "epoch": 0.0104441570548879,
      "grad_norm": 3.953125,
      "learning_rate": 1.740090421617075e-06,
      "loss": 1.2304,
      "step": 2980
    },
    {
      "epoch": 0.010479204561783497,
      "grad_norm": 4.1875,
      "learning_rate": 1.7459316113506e-06,
      "loss": 1.2172,
      "step": 2990
    },
    {
      "epoch": 0.010514252068679095,
      "grad_norm": 3.625,
      "learning_rate": 1.7517728010841248e-06,
      "loss": 1.1824,
      "step": 3000
    },
    {
      "epoch": 0.010549299575574692,
      "grad_norm": 3.765625,
      "learning_rate": 1.7576139908176498e-06,
      "loss": 1.217,
      "step": 3010
    },
    {
      "epoch": 0.010584347082470288,
      "grad_norm": 3.625,
      "learning_rate": 1.7634551805511747e-06,
      "loss": 1.2171,
      "step": 3020
    },
    {
      "epoch": 0.010619394589365885,
      "grad_norm": 3.5,
      "learning_rate": 1.7692963702846995e-06,
      "loss": 1.1777,
      "step": 3030
    },
    {
      "epoch": 0.010654442096261483,
      "grad_norm": 3.828125,
      "learning_rate": 1.7751375600182244e-06,
      "loss": 1.2173,
      "step": 3040
    },
    {
      "epoch": 0.01068948960315708,
      "grad_norm": 3.625,
      "learning_rate": 1.7809787497517494e-06,
      "loss": 1.212,
      "step": 3050
    },
    {
      "epoch": 0.010724537110052676,
      "grad_norm": 3.796875,
      "learning_rate": 1.7868199394852743e-06,
      "loss": 1.1151,
      "step": 3060
    },
    {
      "epoch": 0.010759584616948273,
      "grad_norm": 3.828125,
      "learning_rate": 1.7926611292187995e-06,
      "loss": 1.166,
      "step": 3070
    },
    {
      "epoch": 0.010794632123843871,
      "grad_norm": 3.96875,
      "learning_rate": 1.7985023189523244e-06,
      "loss": 1.2892,
      "step": 3080
    },
    {
      "epoch": 0.010829679630739468,
      "grad_norm": 4.375,
      "learning_rate": 1.8043435086858494e-06,
      "loss": 1.1566,
      "step": 3090
    },
    {
      "epoch": 0.010864727137635064,
      "grad_norm": 4.28125,
      "learning_rate": 1.8101846984193743e-06,
      "loss": 1.239,
      "step": 3100
    },
    {
      "epoch": 0.01089977464453066,
      "grad_norm": 3.265625,
      "learning_rate": 1.8160258881528991e-06,
      "loss": 1.1715,
      "step": 3110
    },
    {
      "epoch": 0.010934822151426259,
      "grad_norm": 3.9375,
      "learning_rate": 1.821867077886424e-06,
      "loss": 1.1702,
      "step": 3120
    },
    {
      "epoch": 0.010969869658321856,
      "grad_norm": 3.34375,
      "learning_rate": 1.827708267619949e-06,
      "loss": 1.2321,
      "step": 3130
    },
    {
      "epoch": 0.011004917165217452,
      "grad_norm": 3.921875,
      "learning_rate": 1.8335494573534739e-06,
      "loss": 1.3136,
      "step": 3140
    },
    {
      "epoch": 0.011039964672113049,
      "grad_norm": 3.765625,
      "learning_rate": 1.8393906470869987e-06,
      "loss": 1.2448,
      "step": 3150
    },
    {
      "epoch": 0.011075012179008647,
      "grad_norm": 3.84375,
      "learning_rate": 1.8452318368205238e-06,
      "loss": 1.1944,
      "step": 3160
    },
    {
      "epoch": 0.011110059685904243,
      "grad_norm": 3.296875,
      "learning_rate": 1.8510730265540486e-06,
      "loss": 1.1258,
      "step": 3170
    },
    {
      "epoch": 0.01114510719279984,
      "grad_norm": 3.578125,
      "learning_rate": 1.8569142162875735e-06,
      "loss": 1.1763,
      "step": 3180
    },
    {
      "epoch": 0.011180154699695436,
      "grad_norm": 4.0,
      "learning_rate": 1.8627554060210983e-06,
      "loss": 1.253,
      "step": 3190
    },
    {
      "epoch": 0.011215202206591035,
      "grad_norm": 3.84375,
      "learning_rate": 1.8685965957546234e-06,
      "loss": 1.2056,
      "step": 3200
    },
    {
      "epoch": 0.011250249713486631,
      "grad_norm": 3.890625,
      "learning_rate": 1.8744377854881482e-06,
      "loss": 1.2974,
      "step": 3210
    },
    {
      "epoch": 0.011285297220382228,
      "grad_norm": 3.859375,
      "learning_rate": 1.880278975221673e-06,
      "loss": 1.217,
      "step": 3220
    },
    {
      "epoch": 0.011320344727277824,
      "grad_norm": 3.71875,
      "learning_rate": 1.8861201649551981e-06,
      "loss": 1.1648,
      "step": 3230
    },
    {
      "epoch": 0.011355392234173423,
      "grad_norm": 3.890625,
      "learning_rate": 1.891961354688723e-06,
      "loss": 1.2712,
      "step": 3240
    },
    {
      "epoch": 0.011390439741069019,
      "grad_norm": 3.90625,
      "learning_rate": 1.8978025444222482e-06,
      "loss": 1.2225,
      "step": 3250
    },
    {
      "epoch": 0.011425487247964616,
      "grad_norm": 3.75,
      "learning_rate": 1.903643734155773e-06,
      "loss": 1.2973,
      "step": 3260
    },
    {
      "epoch": 0.011460534754860212,
      "grad_norm": 3.59375,
      "learning_rate": 1.909484923889298e-06,
      "loss": 1.2205,
      "step": 3270
    },
    {
      "epoch": 0.01149558226175581,
      "grad_norm": 3.890625,
      "learning_rate": 1.9153261136228228e-06,
      "loss": 1.1852,
      "step": 3280
    },
    {
      "epoch": 0.011530629768651407,
      "grad_norm": 3.34375,
      "learning_rate": 1.9211673033563476e-06,
      "loss": 1.2224,
      "step": 3290
    },
    {
      "epoch": 0.011565677275547004,
      "grad_norm": 3.625,
      "learning_rate": 1.927008493089873e-06,
      "loss": 1.2277,
      "step": 3300
    },
    {
      "epoch": 0.011600724782442602,
      "grad_norm": 3.796875,
      "learning_rate": 1.9328496828233977e-06,
      "loss": 1.1863,
      "step": 3310
    },
    {
      "epoch": 0.011635772289338198,
      "grad_norm": 3.796875,
      "learning_rate": 1.9386908725569226e-06,
      "loss": 1.1488,
      "step": 3320
    },
    {
      "epoch": 0.011670819796233795,
      "grad_norm": 3.796875,
      "learning_rate": 1.9445320622904474e-06,
      "loss": 1.2326,
      "step": 3330
    },
    {
      "epoch": 0.011705867303129391,
      "grad_norm": 3.75,
      "learning_rate": 1.9503732520239723e-06,
      "loss": 1.201,
      "step": 3340
    },
    {
      "epoch": 0.01174091481002499,
      "grad_norm": 3.5625,
      "learning_rate": 1.956214441757497e-06,
      "loss": 1.157,
      "step": 3350
    },
    {
      "epoch": 0.011775962316920586,
      "grad_norm": 3.9375,
      "learning_rate": 1.962055631491022e-06,
      "loss": 1.1607,
      "step": 3360
    },
    {
      "epoch": 0.011811009823816183,
      "grad_norm": 3.671875,
      "learning_rate": 1.967896821224547e-06,
      "loss": 1.3172,
      "step": 3370
    },
    {
      "epoch": 0.01184605733071178,
      "grad_norm": 3.9375,
      "learning_rate": 1.973738010958072e-06,
      "loss": 1.1845,
      "step": 3380
    },
    {
      "epoch": 0.011881104837607378,
      "grad_norm": 4.0,
      "learning_rate": 1.979579200691597e-06,
      "loss": 1.2146,
      "step": 3390
    },
    {
      "epoch": 0.011916152344502974,
      "grad_norm": 4.0625,
      "learning_rate": 1.9854203904251217e-06,
      "loss": 1.2332,
      "step": 3400
    },
    {
      "epoch": 0.01195119985139857,
      "grad_norm": 3.59375,
      "learning_rate": 1.9912615801586466e-06,
      "loss": 1.2372,
      "step": 3410
    },
    {
      "epoch": 0.011986247358294167,
      "grad_norm": 3.765625,
      "learning_rate": 1.997102769892172e-06,
      "loss": 1.2318,
      "step": 3420
    },
    {
      "epoch": 0.012021294865189765,
      "grad_norm": 4.0625,
      "learning_rate": 2.0029439596256967e-06,
      "loss": 1.245,
      "step": 3430
    },
    {
      "epoch": 0.012056342372085362,
      "grad_norm": 3.40625,
      "learning_rate": 2.0087851493592215e-06,
      "loss": 1.2217,
      "step": 3440
    },
    {
      "epoch": 0.012091389878980958,
      "grad_norm": 3.875,
      "learning_rate": 2.014626339092747e-06,
      "loss": 1.2188,
      "step": 3450
    },
    {
      "epoch": 0.012126437385876555,
      "grad_norm": 3.84375,
      "learning_rate": 2.0204675288262717e-06,
      "loss": 1.2478,
      "step": 3460
    },
    {
      "epoch": 0.012161484892772153,
      "grad_norm": 3.5625,
      "learning_rate": 2.0263087185597965e-06,
      "loss": 1.1454,
      "step": 3470
    },
    {
      "epoch": 0.01219653239966775,
      "grad_norm": 3.546875,
      "learning_rate": 2.0321499082933213e-06,
      "loss": 1.237,
      "step": 3480
    },
    {
      "epoch": 0.012231579906563346,
      "grad_norm": 3.90625,
      "learning_rate": 2.037991098026846e-06,
      "loss": 1.1924,
      "step": 3490
    },
    {
      "epoch": 0.012266627413458943,
      "grad_norm": 3.59375,
      "learning_rate": 2.043832287760371e-06,
      "loss": 1.0486,
      "step": 3500
    },
    {
      "epoch": 0.012301674920354541,
      "grad_norm": 3.84375,
      "learning_rate": 2.049673477493896e-06,
      "loss": 1.2603,
      "step": 3510
    },
    {
      "epoch": 0.012336722427250138,
      "grad_norm": 3.578125,
      "learning_rate": 2.0555146672274207e-06,
      "loss": 1.2649,
      "step": 3520
    },
    {
      "epoch": 0.012371769934145734,
      "grad_norm": 3.75,
      "learning_rate": 2.061355856960946e-06,
      "loss": 1.1787,
      "step": 3530
    },
    {
      "epoch": 0.01240681744104133,
      "grad_norm": 3.609375,
      "learning_rate": 2.067197046694471e-06,
      "loss": 1.2454,
      "step": 3540
    },
    {
      "epoch": 0.012441864947936929,
      "grad_norm": 4.0,
      "learning_rate": 2.0730382364279957e-06,
      "loss": 1.2218,
      "step": 3550
    },
    {
      "epoch": 0.012476912454832526,
      "grad_norm": 3.1875,
      "learning_rate": 2.0788794261615205e-06,
      "loss": 1.2373,
      "step": 3560
    },
    {
      "epoch": 0.012511959961728122,
      "grad_norm": 3.953125,
      "learning_rate": 2.0847206158950454e-06,
      "loss": 1.2485,
      "step": 3570
    },
    {
      "epoch": 0.012547007468623719,
      "grad_norm": 4.0,
      "learning_rate": 2.0905618056285702e-06,
      "loss": 1.2069,
      "step": 3580
    },
    {
      "epoch": 0.012582054975519317,
      "grad_norm": 3.65625,
      "learning_rate": 2.0964029953620955e-06,
      "loss": 1.2313,
      "step": 3590
    },
    {
      "epoch": 0.012617102482414913,
      "grad_norm": 3.890625,
      "learning_rate": 2.1022441850956203e-06,
      "loss": 1.1869,
      "step": 3600
    },
    {
      "epoch": 0.01265214998931051,
      "grad_norm": 4.0625,
      "learning_rate": 2.1080853748291456e-06,
      "loss": 1.2106,
      "step": 3610
    },
    {
      "epoch": 0.012687197496206108,
      "grad_norm": 3.609375,
      "learning_rate": 2.1139265645626704e-06,
      "loss": 1.2525,
      "step": 3620
    },
    {
      "epoch": 0.012722245003101705,
      "grad_norm": 3.453125,
      "learning_rate": 2.1197677542961953e-06,
      "loss": 1.2019,
      "step": 3630
    },
    {
      "epoch": 0.012757292509997301,
      "grad_norm": 3.375,
      "learning_rate": 2.12560894402972e-06,
      "loss": 1.1489,
      "step": 3640
    },
    {
      "epoch": 0.012792340016892898,
      "grad_norm": 3.53125,
      "learning_rate": 2.131450133763245e-06,
      "loss": 1.2217,
      "step": 3650
    },
    {
      "epoch": 0.012827387523788496,
      "grad_norm": 3.671875,
      "learning_rate": 2.13729132349677e-06,
      "loss": 1.1562,
      "step": 3660
    },
    {
      "epoch": 0.012862435030684093,
      "grad_norm": 4.0,
      "learning_rate": 2.1431325132302947e-06,
      "loss": 1.2099,
      "step": 3670
    },
    {
      "epoch": 0.01289748253757969,
      "grad_norm": 3.984375,
      "learning_rate": 2.14897370296382e-06,
      "loss": 1.1778,
      "step": 3680
    },
    {
      "epoch": 0.012932530044475286,
      "grad_norm": 4.1875,
      "learning_rate": 2.1548148926973448e-06,
      "loss": 1.3122,
      "step": 3690
    },
    {
      "epoch": 0.012967577551370884,
      "grad_norm": 3.734375,
      "learning_rate": 2.1606560824308696e-06,
      "loss": 1.1224,
      "step": 3700
    },
    {
      "epoch": 0.01300262505826648,
      "grad_norm": 3.75,
      "learning_rate": 2.1664972721643945e-06,
      "loss": 1.2286,
      "step": 3710
    },
    {
      "epoch": 0.013037672565162077,
      "grad_norm": 3.609375,
      "learning_rate": 2.1723384618979193e-06,
      "loss": 1.2871,
      "step": 3720
    },
    {
      "epoch": 0.013072720072057674,
      "grad_norm": 3.171875,
      "learning_rate": 2.178179651631444e-06,
      "loss": 1.1597,
      "step": 3730
    },
    {
      "epoch": 0.013107767578953272,
      "grad_norm": 3.71875,
      "learning_rate": 2.184020841364969e-06,
      "loss": 1.138,
      "step": 3740
    },
    {
      "epoch": 0.013142815085848868,
      "grad_norm": 3.515625,
      "learning_rate": 2.1898620310984943e-06,
      "loss": 1.0802,
      "step": 3750
    },
    {
      "epoch": 0.013177862592744465,
      "grad_norm": 3.8125,
      "learning_rate": 2.1957032208320195e-06,
      "loss": 1.2381,
      "step": 3760
    },
    {
      "epoch": 0.013212910099640061,
      "grad_norm": 3.8125,
      "learning_rate": 2.2015444105655444e-06,
      "loss": 1.2064,
      "step": 3770
    },
    {
      "epoch": 0.01324795760653566,
      "grad_norm": 3.046875,
      "learning_rate": 2.2073856002990692e-06,
      "loss": 1.1777,
      "step": 3780
    },
    {
      "epoch": 0.013283005113431256,
      "grad_norm": 3.78125,
      "learning_rate": 2.213226790032594e-06,
      "loss": 1.2305,
      "step": 3790
    },
    {
      "epoch": 0.013318052620326853,
      "grad_norm": 4.1875,
      "learning_rate": 2.219067979766119e-06,
      "loss": 1.268,
      "step": 3800
    },
    {
      "epoch": 0.01335310012722245,
      "grad_norm": 3.609375,
      "learning_rate": 2.2249091694996438e-06,
      "loss": 1.2349,
      "step": 3810
    },
    {
      "epoch": 0.013388147634118048,
      "grad_norm": 3.625,
      "learning_rate": 2.2307503592331686e-06,
      "loss": 1.215,
      "step": 3820
    },
    {
      "epoch": 0.013423195141013644,
      "grad_norm": 3.625,
      "learning_rate": 2.236591548966694e-06,
      "loss": 1.1407,
      "step": 3830
    },
    {
      "epoch": 0.01345824264790924,
      "grad_norm": 3.53125,
      "learning_rate": 2.2424327387002187e-06,
      "loss": 1.2265,
      "step": 3840
    },
    {
      "epoch": 0.013493290154804837,
      "grad_norm": 4.09375,
      "learning_rate": 2.2482739284337436e-06,
      "loss": 1.2498,
      "step": 3850
    },
    {
      "epoch": 0.013528337661700435,
      "grad_norm": 3.84375,
      "learning_rate": 2.2541151181672684e-06,
      "loss": 1.1382,
      "step": 3860
    },
    {
      "epoch": 0.013563385168596032,
      "grad_norm": 5.59375,
      "learning_rate": 2.2599563079007932e-06,
      "loss": 1.1957,
      "step": 3870
    },
    {
      "epoch": 0.013598432675491628,
      "grad_norm": 3.625,
      "learning_rate": 2.265797497634318e-06,
      "loss": 1.1283,
      "step": 3880
    },
    {
      "epoch": 0.013633480182387225,
      "grad_norm": 4.40625,
      "learning_rate": 2.271638687367843e-06,
      "loss": 1.1961,
      "step": 3890
    },
    {
      "epoch": 0.013668527689282823,
      "grad_norm": 4.1875,
      "learning_rate": 2.277479877101368e-06,
      "loss": 1.2105,
      "step": 3900
    },
    {
      "epoch": 0.01370357519617842,
      "grad_norm": 3.71875,
      "learning_rate": 2.283321066834893e-06,
      "loss": 1.203,
      "step": 3910
    },
    {
      "epoch": 0.013738622703074016,
      "grad_norm": 3.125,
      "learning_rate": 2.289162256568418e-06,
      "loss": 1.1612,
      "step": 3920
    },
    {
      "epoch": 0.013773670209969615,
      "grad_norm": 3.734375,
      "learning_rate": 2.295003446301943e-06,
      "loss": 1.1747,
      "step": 3930
    },
    {
      "epoch": 0.013808717716865211,
      "grad_norm": 3.8125,
      "learning_rate": 2.300844636035468e-06,
      "loss": 1.1285,
      "step": 3940
    },
    {
      "epoch": 0.013843765223760808,
      "grad_norm": 3.484375,
      "learning_rate": 2.306685825768993e-06,
      "loss": 1.2298,
      "step": 3950
    },
    {
      "epoch": 0.013878812730656404,
      "grad_norm": 4.03125,
      "learning_rate": 2.3125270155025177e-06,
      "loss": 1.1821,
      "step": 3960
    },
    {
      "epoch": 0.013913860237552002,
      "grad_norm": 4.0625,
      "learning_rate": 2.3183682052360425e-06,
      "loss": 1.2322,
      "step": 3970
    },
    {
      "epoch": 0.013948907744447599,
      "grad_norm": 3.84375,
      "learning_rate": 2.3242093949695674e-06,
      "loss": 1.3585,
      "step": 3980
    },
    {
      "epoch": 0.013983955251343196,
      "grad_norm": 3.984375,
      "learning_rate": 2.3300505847030927e-06,
      "loss": 1.1761,
      "step": 3990
    },
    {
      "epoch": 0.014019002758238792,
      "grad_norm": 3.0625,
      "learning_rate": 2.3358917744366175e-06,
      "loss": 1.0994,
      "step": 4000
    },
    {
      "epoch": 0.01405405026513439,
      "grad_norm": 3.515625,
      "learning_rate": 2.3417329641701423e-06,
      "loss": 1.2828,
      "step": 4010
    },
    {
      "epoch": 0.014089097772029987,
      "grad_norm": 3.6875,
      "learning_rate": 2.347574153903667e-06,
      "loss": 1.1849,
      "step": 4020
    },
    {
      "epoch": 0.014124145278925583,
      "grad_norm": 3.59375,
      "learning_rate": 2.353415343637192e-06,
      "loss": 1.177,
      "step": 4030
    },
    {
      "epoch": 0.01415919278582118,
      "grad_norm": 3.71875,
      "learning_rate": 2.359256533370717e-06,
      "loss": 1.3044,
      "step": 4040
    },
    {
      "epoch": 0.014194240292716778,
      "grad_norm": 3.5625,
      "learning_rate": 2.3650977231042417e-06,
      "loss": 1.1551,
      "step": 4050
    },
    {
      "epoch": 0.014229287799612375,
      "grad_norm": 4.0,
      "learning_rate": 2.370938912837767e-06,
      "loss": 1.1854,
      "step": 4060
    },
    {
      "epoch": 0.014264335306507971,
      "grad_norm": 4.09375,
      "learning_rate": 2.376780102571292e-06,
      "loss": 1.2075,
      "step": 4070
    },
    {
      "epoch": 0.014299382813403568,
      "grad_norm": 4.1875,
      "learning_rate": 2.3826212923048167e-06,
      "loss": 1.2452,
      "step": 4080
    },
    {
      "epoch": 0.014334430320299166,
      "grad_norm": 3.859375,
      "learning_rate": 2.3884624820383415e-06,
      "loss": 1.1289,
      "step": 4090
    },
    {
      "epoch": 0.014369477827194763,
      "grad_norm": 3.984375,
      "learning_rate": 2.394303671771867e-06,
      "loss": 1.2168,
      "step": 4100
    },
    {
      "epoch": 0.01440452533409036,
      "grad_norm": 3.515625,
      "learning_rate": 2.4001448615053916e-06,
      "loss": 1.169,
      "step": 4110
    },
    {
      "epoch": 0.014439572840985956,
      "grad_norm": 4.03125,
      "learning_rate": 2.4059860512389165e-06,
      "loss": 1.223,
      "step": 4120
    },
    {
      "epoch": 0.014474620347881554,
      "grad_norm": 3.609375,
      "learning_rate": 2.4118272409724413e-06,
      "loss": 1.1742,
      "step": 4130
    },
    {
      "epoch": 0.01450966785477715,
      "grad_norm": 3.65625,
      "learning_rate": 2.4176684307059666e-06,
      "loss": 1.1101,
      "step": 4140
    },
    {
      "epoch": 0.014544715361672747,
      "grad_norm": 3.796875,
      "learning_rate": 2.4235096204394914e-06,
      "loss": 1.2079,
      "step": 4150
    },
    {
      "epoch": 0.014579762868568344,
      "grad_norm": 3.65625,
      "learning_rate": 2.4293508101730163e-06,
      "loss": 1.1668,
      "step": 4160
    },
    {
      "epoch": 0.014614810375463942,
      "grad_norm": 3.78125,
      "learning_rate": 2.435191999906541e-06,
      "loss": 1.2143,
      "step": 4170
    },
    {
      "epoch": 0.014649857882359538,
      "grad_norm": 3.453125,
      "learning_rate": 2.441033189640066e-06,
      "loss": 1.237,
      "step": 4180
    },
    {
      "epoch": 0.014684905389255135,
      "grad_norm": 3.5625,
      "learning_rate": 2.446874379373591e-06,
      "loss": 1.1156,
      "step": 4190
    },
    {
      "epoch": 0.014719952896150733,
      "grad_norm": 3.75,
      "learning_rate": 2.4527155691071157e-06,
      "loss": 1.1717,
      "step": 4200
    },
    {
      "epoch": 0.01475500040304633,
      "grad_norm": 3.078125,
      "learning_rate": 2.458556758840641e-06,
      "loss": 1.1452,
      "step": 4210
    },
    {
      "epoch": 0.014790047909941926,
      "grad_norm": 3.578125,
      "learning_rate": 2.4643979485741658e-06,
      "loss": 1.1304,
      "step": 4220
    },
    {
      "epoch": 0.014825095416837523,
      "grad_norm": 3.8125,
      "learning_rate": 2.4702391383076906e-06,
      "loss": 1.2026,
      "step": 4230
    },
    {
      "epoch": 0.014860142923733121,
      "grad_norm": 3.796875,
      "learning_rate": 2.4760803280412155e-06,
      "loss": 1.1445,
      "step": 4240
    },
    {
      "epoch": 0.014895190430628718,
      "grad_norm": 3.90625,
      "learning_rate": 2.4819215177747403e-06,
      "loss": 1.1723,
      "step": 4250
    },
    {
      "epoch": 0.014930237937524314,
      "grad_norm": 4.03125,
      "learning_rate": 2.487762707508265e-06,
      "loss": 1.2797,
      "step": 4260
    },
    {
      "epoch": 0.01496528544441991,
      "grad_norm": 3.6875,
      "learning_rate": 2.4936038972417904e-06,
      "loss": 1.1616,
      "step": 4270
    },
    {
      "epoch": 0.015000332951315509,
      "grad_norm": 3.78125,
      "learning_rate": 2.4994450869753153e-06,
      "loss": 1.12,
      "step": 4280
    },
    {
      "epoch": 0.015035380458211105,
      "grad_norm": 3.859375,
      "learning_rate": 2.50528627670884e-06,
      "loss": 1.257,
      "step": 4290
    },
    {
      "epoch": 0.015070427965106702,
      "grad_norm": 3.78125,
      "learning_rate": 2.5111274664423654e-06,
      "loss": 1.1718,
      "step": 4300
    },
    {
      "epoch": 0.015105475472002299,
      "grad_norm": 3.71875,
      "learning_rate": 2.5169686561758902e-06,
      "loss": 1.1953,
      "step": 4310
    },
    {
      "epoch": 0.015140522978897897,
      "grad_norm": 3.8125,
      "learning_rate": 2.522809845909415e-06,
      "loss": 1.1608,
      "step": 4320
    },
    {
      "epoch": 0.015175570485793493,
      "grad_norm": 3.90625,
      "learning_rate": 2.52865103564294e-06,
      "loss": 1.1746,
      "step": 4330
    },
    {
      "epoch": 0.01521061799268909,
      "grad_norm": 3.640625,
      "learning_rate": 2.5344922253764647e-06,
      "loss": 1.2052,
      "step": 4340
    },
    {
      "epoch": 0.015245665499584686,
      "grad_norm": 3.34375,
      "learning_rate": 2.5403334151099896e-06,
      "loss": 1.1457,
      "step": 4350
    },
    {
      "epoch": 0.015280713006480285,
      "grad_norm": 3.90625,
      "learning_rate": 2.5461746048435144e-06,
      "loss": 1.2197,
      "step": 4360
    },
    {
      "epoch": 0.015315760513375881,
      "grad_norm": 3.65625,
      "learning_rate": 2.5520157945770397e-06,
      "loss": 1.2338,
      "step": 4370
    },
    {
      "epoch": 0.015350808020271478,
      "grad_norm": 3.875,
      "learning_rate": 2.5578569843105646e-06,
      "loss": 1.144,
      "step": 4380
    },
    {
      "epoch": 0.015385855527167074,
      "grad_norm": 3.75,
      "learning_rate": 2.5636981740440894e-06,
      "loss": 1.1744,
      "step": 4390
    },
    {
      "epoch": 0.015420903034062673,
      "grad_norm": 3.53125,
      "learning_rate": 2.5695393637776142e-06,
      "loss": 1.1521,
      "step": 4400
    },
    {
      "epoch": 0.015455950540958269,
      "grad_norm": 3.40625,
      "learning_rate": 2.575380553511139e-06,
      "loss": 1.1932,
      "step": 4410
    },
    {
      "epoch": 0.015490998047853866,
      "grad_norm": 4.09375,
      "learning_rate": 2.581221743244664e-06,
      "loss": 1.1118,
      "step": 4420
    },
    {
      "epoch": 0.015526045554749462,
      "grad_norm": 3.828125,
      "learning_rate": 2.5870629329781888e-06,
      "loss": 1.2309,
      "step": 4430
    },
    {
      "epoch": 0.01556109306164506,
      "grad_norm": 3.90625,
      "learning_rate": 2.592904122711714e-06,
      "loss": 1.2156,
      "step": 4440
    },
    {
      "epoch": 0.015596140568540657,
      "grad_norm": 3.40625,
      "learning_rate": 2.5987453124452393e-06,
      "loss": 1.0869,
      "step": 4450
    },
    {
      "epoch": 0.015631188075436255,
      "grad_norm": 4.28125,
      "learning_rate": 2.604586502178764e-06,
      "loss": 1.2042,
      "step": 4460
    },
    {
      "epoch": 0.01566623558233185,
      "grad_norm": 3.46875,
      "learning_rate": 2.610427691912289e-06,
      "loss": 1.2383,
      "step": 4470
    },
    {
      "epoch": 0.015701283089227448,
      "grad_norm": 3.640625,
      "learning_rate": 2.616268881645814e-06,
      "loss": 1.1165,
      "step": 4480
    },
    {
      "epoch": 0.015736330596123047,
      "grad_norm": 4.0,
      "learning_rate": 2.6221100713793387e-06,
      "loss": 1.2286,
      "step": 4490
    },
    {
      "epoch": 0.01577137810301864,
      "grad_norm": 3.765625,
      "learning_rate": 2.6279512611128635e-06,
      "loss": 1.1071,
      "step": 4500
    },
    {
      "epoch": 0.01580642560991424,
      "grad_norm": 3.34375,
      "learning_rate": 2.6337924508463884e-06,
      "loss": 1.1776,
      "step": 4510
    },
    {
      "epoch": 0.015841473116809834,
      "grad_norm": 3.703125,
      "learning_rate": 2.6396336405799136e-06,
      "loss": 1.191,
      "step": 4520
    },
    {
      "epoch": 0.015876520623705433,
      "grad_norm": 3.515625,
      "learning_rate": 2.6454748303134385e-06,
      "loss": 1.2141,
      "step": 4530
    },
    {
      "epoch": 0.01591156813060103,
      "grad_norm": 3.734375,
      "learning_rate": 2.6513160200469633e-06,
      "loss": 1.1512,
      "step": 4540
    },
    {
      "epoch": 0.015946615637496626,
      "grad_norm": 3.234375,
      "learning_rate": 2.657157209780488e-06,
      "loss": 1.1117,
      "step": 4550
    },
    {
      "epoch": 0.015981663144392224,
      "grad_norm": 3.828125,
      "learning_rate": 2.662998399514013e-06,
      "loss": 1.2832,
      "step": 4560
    },
    {
      "epoch": 0.016016710651287822,
      "grad_norm": 3.375,
      "learning_rate": 2.668839589247538e-06,
      "loss": 1.196,
      "step": 4570
    },
    {
      "epoch": 0.016051758158183417,
      "grad_norm": 3.53125,
      "learning_rate": 2.6746807789810627e-06,
      "loss": 1.1544,
      "step": 4580
    },
    {
      "epoch": 0.016086805665079015,
      "grad_norm": 3.484375,
      "learning_rate": 2.680521968714588e-06,
      "loss": 1.1778,
      "step": 4590
    },
    {
      "epoch": 0.01612185317197461,
      "grad_norm": 3.640625,
      "learning_rate": 2.686363158448113e-06,
      "loss": 1.1357,
      "step": 4600
    },
    {
      "epoch": 0.01615690067887021,
      "grad_norm": 4.03125,
      "learning_rate": 2.6922043481816377e-06,
      "loss": 1.1561,
      "step": 4610
    },
    {
      "epoch": 0.016191948185765807,
      "grad_norm": 2.9375,
      "learning_rate": 2.698045537915163e-06,
      "loss": 1.152,
      "step": 4620
    },
    {
      "epoch": 0.0162269956926614,
      "grad_norm": 3.90625,
      "learning_rate": 2.7038867276486878e-06,
      "loss": 1.159,
      "step": 4630
    },
    {
      "epoch": 0.016262043199557,
      "grad_norm": 3.734375,
      "learning_rate": 2.7097279173822126e-06,
      "loss": 1.1807,
      "step": 4640
    },
    {
      "epoch": 0.016297090706452598,
      "grad_norm": 3.78125,
      "learning_rate": 2.7155691071157375e-06,
      "loss": 1.1699,
      "step": 4650
    },
    {
      "epoch": 0.016332138213348193,
      "grad_norm": 3.90625,
      "learning_rate": 2.7214102968492623e-06,
      "loss": 1.2702,
      "step": 4660
    },
    {
      "epoch": 0.01636718572024379,
      "grad_norm": 3.75,
      "learning_rate": 2.727251486582787e-06,
      "loss": 1.2446,
      "step": 4670
    },
    {
      "epoch": 0.016402233227139386,
      "grad_norm": 3.90625,
      "learning_rate": 2.7330926763163124e-06,
      "loss": 1.1173,
      "step": 4680
    },
    {
      "epoch": 0.016437280734034984,
      "grad_norm": 3.875,
      "learning_rate": 2.7389338660498373e-06,
      "loss": 1.2168,
      "step": 4690
    },
    {
      "epoch": 0.016472328240930582,
      "grad_norm": 3.734375,
      "learning_rate": 2.744775055783362e-06,
      "loss": 1.1121,
      "step": 4700
    },
    {
      "epoch": 0.016507375747826177,
      "grad_norm": 3.484375,
      "learning_rate": 2.750616245516887e-06,
      "loss": 1.1425,
      "step": 4710
    },
    {
      "epoch": 0.016542423254721775,
      "grad_norm": 4.0625,
      "learning_rate": 2.756457435250412e-06,
      "loss": 1.1904,
      "step": 4720
    },
    {
      "epoch": 0.016577470761617374,
      "grad_norm": 3.46875,
      "learning_rate": 2.7622986249839366e-06,
      "loss": 1.1872,
      "step": 4730
    },
    {
      "epoch": 0.01661251826851297,
      "grad_norm": 3.90625,
      "learning_rate": 2.7681398147174615e-06,
      "loss": 1.1298,
      "step": 4740
    },
    {
      "epoch": 0.016647565775408567,
      "grad_norm": 3.640625,
      "learning_rate": 2.7739810044509868e-06,
      "loss": 1.2097,
      "step": 4750
    },
    {
      "epoch": 0.01668261328230416,
      "grad_norm": 3.625,
      "learning_rate": 2.7798221941845116e-06,
      "loss": 1.1833,
      "step": 4760
    },
    {
      "epoch": 0.01671766078919976,
      "grad_norm": 3.734375,
      "learning_rate": 2.7856633839180365e-06,
      "loss": 1.1933,
      "step": 4770
    },
    {
      "epoch": 0.016752708296095358,
      "grad_norm": 3.515625,
      "learning_rate": 2.7915045736515613e-06,
      "loss": 1.1777,
      "step": 4780
    },
    {
      "epoch": 0.016787755802990953,
      "grad_norm": 3.53125,
      "learning_rate": 2.7973457633850866e-06,
      "loss": 1.17,
      "step": 4790
    },
    {
      "epoch": 0.01682280330988655,
      "grad_norm": 3.515625,
      "learning_rate": 2.8031869531186114e-06,
      "loss": 1.1375,
      "step": 4800
    },
    {
      "epoch": 0.01685785081678215,
      "grad_norm": 3.875,
      "learning_rate": 2.8090281428521363e-06,
      "loss": 1.1272,
      "step": 4810
    },
    {
      "epoch": 0.016892898323677744,
      "grad_norm": 3.53125,
      "learning_rate": 2.814869332585661e-06,
      "loss": 1.1998,
      "step": 4820
    },
    {
      "epoch": 0.016927945830573343,
      "grad_norm": 3.421875,
      "learning_rate": 2.8207105223191864e-06,
      "loss": 1.0996,
      "step": 4830
    },
    {
      "epoch": 0.01696299333746894,
      "grad_norm": 3.828125,
      "learning_rate": 2.826551712052711e-06,
      "loss": 1.1051,
      "step": 4840
    },
    {
      "epoch": 0.016998040844364536,
      "grad_norm": 3.984375,
      "learning_rate": 2.832392901786236e-06,
      "loss": 1.2817,
      "step": 4850
    },
    {
      "epoch": 0.017033088351260134,
      "grad_norm": 3.8125,
      "learning_rate": 2.838234091519761e-06,
      "loss": 1.1651,
      "step": 4860
    },
    {
      "epoch": 0.01706813585815573,
      "grad_norm": 3.671875,
      "learning_rate": 2.8440752812532857e-06,
      "loss": 1.159,
      "step": 4870
    },
    {
      "epoch": 0.017103183365051327,
      "grad_norm": 3.546875,
      "learning_rate": 2.8499164709868106e-06,
      "loss": 1.1446,
      "step": 4880
    },
    {
      "epoch": 0.017138230871946925,
      "grad_norm": 3.8125,
      "learning_rate": 2.8557576607203354e-06,
      "loss": 1.1218,
      "step": 4890
    },
    {
      "epoch": 0.01717327837884252,
      "grad_norm": 3.546875,
      "learning_rate": 2.8615988504538607e-06,
      "loss": 1.1148,
      "step": 4900
    },
    {
      "epoch": 0.01720832588573812,
      "grad_norm": 3.671875,
      "learning_rate": 2.8674400401873855e-06,
      "loss": 1.2485,
      "step": 4910
    },
    {
      "epoch": 0.017243373392633717,
      "grad_norm": 4.03125,
      "learning_rate": 2.8732812299209104e-06,
      "loss": 1.2309,
      "step": 4920
    },
    {
      "epoch": 0.01727842089952931,
      "grad_norm": 2.9375,
      "learning_rate": 2.8791224196544352e-06,
      "loss": 1.1042,
      "step": 4930
    },
    {
      "epoch": 0.01731346840642491,
      "grad_norm": 4.5625,
      "learning_rate": 2.88496360938796e-06,
      "loss": 1.2375,
      "step": 4940
    },
    {
      "epoch": 0.017348515913320504,
      "grad_norm": 4.0625,
      "learning_rate": 2.890804799121485e-06,
      "loss": 1.1184,
      "step": 4950
    },
    {
      "epoch": 0.017383563420216103,
      "grad_norm": 3.796875,
      "learning_rate": 2.89664598885501e-06,
      "loss": 1.1578,
      "step": 4960
    },
    {
      "epoch": 0.0174186109271117,
      "grad_norm": 3.390625,
      "learning_rate": 2.902487178588535e-06,
      "loss": 1.0854,
      "step": 4970
    },
    {
      "epoch": 0.017453658434007296,
      "grad_norm": 3.703125,
      "learning_rate": 2.9083283683220603e-06,
      "loss": 1.1557,
      "step": 4980
    },
    {
      "epoch": 0.017488705940902894,
      "grad_norm": 3.421875,
      "learning_rate": 2.914169558055585e-06,
      "loss": 1.1743,
      "step": 4990
    },
    {
      "epoch": 0.017523753447798492,
      "grad_norm": 3.5,
      "learning_rate": 2.92001074778911e-06,
      "loss": 1.149,
      "step": 5000
    },
    {
      "epoch": 0.017523753447798492,
      "eval_loss": 1.1074479818344116,
      "eval_runtime": 554.0478,
      "eval_samples_per_second": 686.648,
      "eval_steps_per_second": 57.221,
      "step": 5000
    },
    {
      "epoch": 0.017558800954694087,
      "grad_norm": 4.03125,
      "learning_rate": 2.925851937522635e-06,
      "loss": 1.2694,
      "step": 5010
    },
    {
      "epoch": 0.017593848461589685,
      "grad_norm": 3.34375,
      "learning_rate": 2.9316931272561597e-06,
      "loss": 1.1752,
      "step": 5020
    },
    {
      "epoch": 0.01762889596848528,
      "grad_norm": 3.375,
      "learning_rate": 2.9375343169896845e-06,
      "loss": 1.196,
      "step": 5030
    },
    {
      "epoch": 0.01766394347538088,
      "grad_norm": 3.640625,
      "learning_rate": 2.9433755067232094e-06,
      "loss": 1.1984,
      "step": 5040
    },
    {
      "epoch": 0.017698990982276477,
      "grad_norm": 4.09375,
      "learning_rate": 2.9492166964567346e-06,
      "loss": 1.2349,
      "step": 5050
    },
    {
      "epoch": 0.01773403848917207,
      "grad_norm": 4.0,
      "learning_rate": 2.9550578861902595e-06,
      "loss": 1.2226,
      "step": 5060
    },
    {
      "epoch": 0.01776908599606767,
      "grad_norm": 4.09375,
      "learning_rate": 2.9608990759237843e-06,
      "loss": 1.1386,
      "step": 5070
    },
    {
      "epoch": 0.017804133502963268,
      "grad_norm": 3.34375,
      "learning_rate": 2.966740265657309e-06,
      "loss": 1.1921,
      "step": 5080
    },
    {
      "epoch": 0.017839181009858863,
      "grad_norm": 3.71875,
      "learning_rate": 2.972581455390834e-06,
      "loss": 1.2117,
      "step": 5090
    },
    {
      "epoch": 0.01787422851675446,
      "grad_norm": 3.640625,
      "learning_rate": 2.978422645124359e-06,
      "loss": 1.2538,
      "step": 5100
    },
    {
      "epoch": 0.01790927602365006,
      "grad_norm": 3.484375,
      "learning_rate": 2.9842638348578837e-06,
      "loss": 1.1777,
      "step": 5110
    },
    {
      "epoch": 0.017944323530545654,
      "grad_norm": 3.78125,
      "learning_rate": 2.9901050245914085e-06,
      "loss": 1.2432,
      "step": 5120
    },
    {
      "epoch": 0.017979371037441252,
      "grad_norm": 4.03125,
      "learning_rate": 2.995946214324934e-06,
      "loss": 1.1508,
      "step": 5130
    },
    {
      "epoch": 0.018014418544336847,
      "grad_norm": 3.421875,
      "learning_rate": 3.001787404058459e-06,
      "loss": 1.1764,
      "step": 5140
    },
    {
      "epoch": 0.018049466051232445,
      "grad_norm": 4.09375,
      "learning_rate": 3.007628593791984e-06,
      "loss": 1.1897,
      "step": 5150
    },
    {
      "epoch": 0.018084513558128044,
      "grad_norm": 3.890625,
      "learning_rate": 3.0134697835255088e-06,
      "loss": 1.1553,
      "step": 5160
    },
    {
      "epoch": 0.01811956106502364,
      "grad_norm": 3.78125,
      "learning_rate": 3.0193109732590336e-06,
      "loss": 1.1614,
      "step": 5170
    },
    {
      "epoch": 0.018154608571919237,
      "grad_norm": 3.40625,
      "learning_rate": 3.0251521629925585e-06,
      "loss": 1.236,
      "step": 5180
    },
    {
      "epoch": 0.018189656078814835,
      "grad_norm": 3.515625,
      "learning_rate": 3.0309933527260833e-06,
      "loss": 1.1573,
      "step": 5190
    },
    {
      "epoch": 0.01822470358571043,
      "grad_norm": 3.421875,
      "learning_rate": 3.036834542459608e-06,
      "loss": 1.1687,
      "step": 5200
    },
    {
      "epoch": 0.018259751092606028,
      "grad_norm": 4.0,
      "learning_rate": 3.0426757321931334e-06,
      "loss": 1.1852,
      "step": 5210
    },
    {
      "epoch": 0.018294798599501623,
      "grad_norm": 3.6875,
      "learning_rate": 3.0485169219266583e-06,
      "loss": 1.1999,
      "step": 5220
    },
    {
      "epoch": 0.01832984610639722,
      "grad_norm": 3.921875,
      "learning_rate": 3.054358111660183e-06,
      "loss": 1.1901,
      "step": 5230
    },
    {
      "epoch": 0.01836489361329282,
      "grad_norm": 2.984375,
      "learning_rate": 3.060199301393708e-06,
      "loss": 0.987,
      "step": 5240
    },
    {
      "epoch": 0.018399941120188414,
      "grad_norm": 3.6875,
      "learning_rate": 3.066040491127233e-06,
      "loss": 1.2503,
      "step": 5250
    },
    {
      "epoch": 0.018434988627084013,
      "grad_norm": 3.875,
      "learning_rate": 3.0718816808607576e-06,
      "loss": 1.2244,
      "step": 5260
    },
    {
      "epoch": 0.01847003613397961,
      "grad_norm": 3.1875,
      "learning_rate": 3.0777228705942825e-06,
      "loss": 1.1639,
      "step": 5270
    },
    {
      "epoch": 0.018505083640875206,
      "grad_norm": 3.359375,
      "learning_rate": 3.0835640603278078e-06,
      "loss": 1.2237,
      "step": 5280
    },
    {
      "epoch": 0.018540131147770804,
      "grad_norm": 3.390625,
      "learning_rate": 3.0894052500613326e-06,
      "loss": 1.0953,
      "step": 5290
    },
    {
      "epoch": 0.0185751786546664,
      "grad_norm": 3.625,
      "learning_rate": 3.095246439794858e-06,
      "loss": 1.2516,
      "step": 5300
    },
    {
      "epoch": 0.018610226161561997,
      "grad_norm": 3.546875,
      "learning_rate": 3.1010876295283827e-06,
      "loss": 1.1364,
      "step": 5310
    },
    {
      "epoch": 0.018645273668457595,
      "grad_norm": 3.90625,
      "learning_rate": 3.1069288192619076e-06,
      "loss": 1.2182,
      "step": 5320
    },
    {
      "epoch": 0.01868032117535319,
      "grad_norm": 3.46875,
      "learning_rate": 3.1127700089954324e-06,
      "loss": 1.2012,
      "step": 5330
    },
    {
      "epoch": 0.01871536868224879,
      "grad_norm": 3.8125,
      "learning_rate": 3.1186111987289572e-06,
      "loss": 1.1873,
      "step": 5340
    },
    {
      "epoch": 0.018750416189144387,
      "grad_norm": 3.3125,
      "learning_rate": 3.124452388462482e-06,
      "loss": 1.17,
      "step": 5350
    },
    {
      "epoch": 0.01878546369603998,
      "grad_norm": 3.125,
      "learning_rate": 3.1302935781960074e-06,
      "loss": 1.1591,
      "step": 5360
    },
    {
      "epoch": 0.01882051120293558,
      "grad_norm": 3.34375,
      "learning_rate": 3.136134767929532e-06,
      "loss": 1.133,
      "step": 5370
    },
    {
      "epoch": 0.018855558709831174,
      "grad_norm": 3.375,
      "learning_rate": 3.141975957663057e-06,
      "loss": 1.0765,
      "step": 5380
    },
    {
      "epoch": 0.018890606216726773,
      "grad_norm": 4.5,
      "learning_rate": 3.147817147396582e-06,
      "loss": 1.1664,
      "step": 5390
    },
    {
      "epoch": 0.01892565372362237,
      "grad_norm": 4.03125,
      "learning_rate": 3.1536583371301067e-06,
      "loss": 1.1702,
      "step": 5400
    },
    {
      "epoch": 0.018960701230517966,
      "grad_norm": 3.546875,
      "learning_rate": 3.1594995268636316e-06,
      "loss": 1.1832,
      "step": 5410
    },
    {
      "epoch": 0.018995748737413564,
      "grad_norm": 3.828125,
      "learning_rate": 3.1653407165971564e-06,
      "loss": 1.1391,
      "step": 5420
    },
    {
      "epoch": 0.019030796244309162,
      "grad_norm": 3.71875,
      "learning_rate": 3.1711819063306817e-06,
      "loss": 1.1781,
      "step": 5430
    },
    {
      "epoch": 0.019065843751204757,
      "grad_norm": 3.515625,
      "learning_rate": 3.1770230960642065e-06,
      "loss": 1.2388,
      "step": 5440
    },
    {
      "epoch": 0.019100891258100355,
      "grad_norm": 4.125,
      "learning_rate": 3.1828642857977314e-06,
      "loss": 1.1634,
      "step": 5450
    },
    {
      "epoch": 0.019135938764995954,
      "grad_norm": 3.671875,
      "learning_rate": 3.1887054755312562e-06,
      "loss": 1.2749,
      "step": 5460
    },
    {
      "epoch": 0.01917098627189155,
      "grad_norm": 3.25,
      "learning_rate": 3.194546665264781e-06,
      "loss": 1.1219,
      "step": 5470
    },
    {
      "epoch": 0.019206033778787147,
      "grad_norm": 3.9375,
      "learning_rate": 3.200387854998306e-06,
      "loss": 1.1893,
      "step": 5480
    },
    {
      "epoch": 0.01924108128568274,
      "grad_norm": 3.5625,
      "learning_rate": 3.2062290447318308e-06,
      "loss": 1.1694,
      "step": 5490
    },
    {
      "epoch": 0.01927612879257834,
      "grad_norm": 3.734375,
      "learning_rate": 3.212070234465356e-06,
      "loss": 1.1955,
      "step": 5500
    },
    {
      "epoch": 0.019311176299473938,
      "grad_norm": 3.90625,
      "learning_rate": 3.217911424198881e-06,
      "loss": 1.2888,
      "step": 5510
    },
    {
      "epoch": 0.019346223806369533,
      "grad_norm": 3.453125,
      "learning_rate": 3.2237526139324057e-06,
      "loss": 1.1964,
      "step": 5520
    },
    {
      "epoch": 0.01938127131326513,
      "grad_norm": 3.109375,
      "learning_rate": 3.2295938036659306e-06,
      "loss": 1.124,
      "step": 5530
    },
    {
      "epoch": 0.01941631882016073,
      "grad_norm": 3.5625,
      "learning_rate": 3.2354349933994554e-06,
      "loss": 1.2353,
      "step": 5540
    },
    {
      "epoch": 0.019451366327056324,
      "grad_norm": 3.890625,
      "learning_rate": 3.2412761831329802e-06,
      "loss": 1.1682,
      "step": 5550
    },
    {
      "epoch": 0.019486413833951922,
      "grad_norm": 3.640625,
      "learning_rate": 3.247117372866506e-06,
      "loss": 1.0558,
      "step": 5560
    },
    {
      "epoch": 0.019521461340847517,
      "grad_norm": 3.796875,
      "learning_rate": 3.2529585626000308e-06,
      "loss": 1.2119,
      "step": 5570
    },
    {
      "epoch": 0.019556508847743116,
      "grad_norm": 3.5625,
      "learning_rate": 3.2587997523335556e-06,
      "loss": 1.1555,
      "step": 5580
    },
    {
      "epoch": 0.019591556354638714,
      "grad_norm": 4.03125,
      "learning_rate": 3.2646409420670805e-06,
      "loss": 1.1321,
      "step": 5590
    },
    {
      "epoch": 0.01962660386153431,
      "grad_norm": 3.671875,
      "learning_rate": 3.2704821318006057e-06,
      "loss": 1.1717,
      "step": 5600
    },
    {
      "epoch": 0.019661651368429907,
      "grad_norm": 3.625,
      "learning_rate": 3.2763233215341306e-06,
      "loss": 1.1367,
      "step": 5610
    },
    {
      "epoch": 0.019696698875325505,
      "grad_norm": 3.875,
      "learning_rate": 3.2821645112676554e-06,
      "loss": 1.1903,
      "step": 5620
    },
    {
      "epoch": 0.0197317463822211,
      "grad_norm": 3.46875,
      "learning_rate": 3.2880057010011803e-06,
      "loss": 1.0492,
      "step": 5630
    },
    {
      "epoch": 0.019766793889116698,
      "grad_norm": 3.96875,
      "learning_rate": 3.293846890734705e-06,
      "loss": 1.0902,
      "step": 5640
    },
    {
      "epoch": 0.019801841396012293,
      "grad_norm": 3.984375,
      "learning_rate": 3.29968808046823e-06,
      "loss": 1.1576,
      "step": 5650
    },
    {
      "epoch": 0.01983688890290789,
      "grad_norm": 3.328125,
      "learning_rate": 3.305529270201755e-06,
      "loss": 1.2355,
      "step": 5660
    },
    {
      "epoch": 0.01987193640980349,
      "grad_norm": 3.890625,
      "learning_rate": 3.31137045993528e-06,
      "loss": 1.1613,
      "step": 5670
    },
    {
      "epoch": 0.019906983916699084,
      "grad_norm": 3.578125,
      "learning_rate": 3.317211649668805e-06,
      "loss": 1.14,
      "step": 5680
    },
    {
      "epoch": 0.019942031423594683,
      "grad_norm": 3.46875,
      "learning_rate": 3.3230528394023298e-06,
      "loss": 1.1246,
      "step": 5690
    },
    {
      "epoch": 0.01997707893049028,
      "grad_norm": 4.3125,
      "learning_rate": 3.3288940291358546e-06,
      "loss": 1.176,
      "step": 5700
    },
    {
      "epoch": 0.020012126437385876,
      "grad_norm": 3.84375,
      "learning_rate": 3.3347352188693795e-06,
      "loss": 1.1648,
      "step": 5710
    },
    {
      "epoch": 0.020047173944281474,
      "grad_norm": 3.53125,
      "learning_rate": 3.3405764086029043e-06,
      "loss": 1.1669,
      "step": 5720
    },
    {
      "epoch": 0.020082221451177072,
      "grad_norm": 3.5625,
      "learning_rate": 3.346417598336429e-06,
      "loss": 1.0505,
      "step": 5730
    },
    {
      "epoch": 0.020117268958072667,
      "grad_norm": 3.640625,
      "learning_rate": 3.3522587880699544e-06,
      "loss": 1.1329,
      "step": 5740
    },
    {
      "epoch": 0.020152316464968265,
      "grad_norm": 3.40625,
      "learning_rate": 3.3580999778034793e-06,
      "loss": 1.1477,
      "step": 5750
    },
    {
      "epoch": 0.02018736397186386,
      "grad_norm": 3.640625,
      "learning_rate": 3.363941167537004e-06,
      "loss": 1.1755,
      "step": 5760
    },
    {
      "epoch": 0.02022241147875946,
      "grad_norm": 3.625,
      "learning_rate": 3.369782357270529e-06,
      "loss": 1.1444,
      "step": 5770
    },
    {
      "epoch": 0.020257458985655057,
      "grad_norm": 3.5625,
      "learning_rate": 3.375623547004054e-06,
      "loss": 1.1532,
      "step": 5780
    },
    {
      "epoch": 0.02029250649255065,
      "grad_norm": 3.78125,
      "learning_rate": 3.3814647367375786e-06,
      "loss": 1.1271,
      "step": 5790
    },
    {
      "epoch": 0.02032755399944625,
      "grad_norm": 3.59375,
      "learning_rate": 3.3873059264711035e-06,
      "loss": 1.0991,
      "step": 5800
    },
    {
      "epoch": 0.020362601506341848,
      "grad_norm": 3.421875,
      "learning_rate": 3.3931471162046287e-06,
      "loss": 1.0839,
      "step": 5810
    },
    {
      "epoch": 0.020397649013237443,
      "grad_norm": 3.484375,
      "learning_rate": 3.3989883059381536e-06,
      "loss": 1.2049,
      "step": 5820
    },
    {
      "epoch": 0.02043269652013304,
      "grad_norm": 3.6875,
      "learning_rate": 3.4048294956716784e-06,
      "loss": 1.1289,
      "step": 5830
    },
    {
      "epoch": 0.020467744027028636,
      "grad_norm": 3.890625,
      "learning_rate": 3.4106706854052033e-06,
      "loss": 1.1085,
      "step": 5840
    },
    {
      "epoch": 0.020502791533924234,
      "grad_norm": 3.25,
      "learning_rate": 3.416511875138728e-06,
      "loss": 1.1592,
      "step": 5850
    },
    {
      "epoch": 0.020537839040819832,
      "grad_norm": 3.71875,
      "learning_rate": 3.422353064872253e-06,
      "loss": 1.1295,
      "step": 5860
    },
    {
      "epoch": 0.020572886547715427,
      "grad_norm": 3.53125,
      "learning_rate": 3.428194254605778e-06,
      "loss": 1.1432,
      "step": 5870
    },
    {
      "epoch": 0.020607934054611025,
      "grad_norm": 3.40625,
      "learning_rate": 3.434035444339303e-06,
      "loss": 1.1458,
      "step": 5880
    },
    {
      "epoch": 0.020642981561506624,
      "grad_norm": 3.765625,
      "learning_rate": 3.439876634072828e-06,
      "loss": 1.1498,
      "step": 5890
    },
    {
      "epoch": 0.02067802906840222,
      "grad_norm": 4.1875,
      "learning_rate": 3.445717823806353e-06,
      "loss": 1.2009,
      "step": 5900
    },
    {
      "epoch": 0.020713076575297817,
      "grad_norm": 3.921875,
      "learning_rate": 3.4515590135398785e-06,
      "loss": 1.2159,
      "step": 5910
    },
    {
      "epoch": 0.02074812408219341,
      "grad_norm": 3.609375,
      "learning_rate": 3.4574002032734033e-06,
      "loss": 1.158,
      "step": 5920
    },
    {
      "epoch": 0.02078317158908901,
      "grad_norm": 3.453125,
      "learning_rate": 3.463241393006928e-06,
      "loss": 1.1719,
      "step": 5930
    },
    {
      "epoch": 0.020818219095984608,
      "grad_norm": 3.71875,
      "learning_rate": 3.469082582740453e-06,
      "loss": 1.1377,
      "step": 5940
    },
    {
      "epoch": 0.020853266602880203,
      "grad_norm": 3.640625,
      "learning_rate": 3.474923772473978e-06,
      "loss": 1.1065,
      "step": 5950
    },
    {
      "epoch": 0.0208883141097758,
      "grad_norm": 3.640625,
      "learning_rate": 3.4807649622075027e-06,
      "loss": 1.177,
      "step": 5960
    },
    {
      "epoch": 0.0209233616166714,
      "grad_norm": 3.953125,
      "learning_rate": 3.4866061519410275e-06,
      "loss": 1.1754,
      "step": 5970
    },
    {
      "epoch": 0.020958409123566994,
      "grad_norm": 3.3125,
      "learning_rate": 3.492447341674553e-06,
      "loss": 1.0349,
      "step": 5980
    },
    {
      "epoch": 0.020993456630462592,
      "grad_norm": 3.3125,
      "learning_rate": 3.4982885314080776e-06,
      "loss": 1.1428,
      "step": 5990
    },
    {
      "epoch": 0.02102850413735819,
      "grad_norm": 4.375,
      "learning_rate": 3.5041297211416025e-06,
      "loss": 1.2052,
      "step": 6000
    },
    {
      "epoch": 0.021063551644253786,
      "grad_norm": 3.953125,
      "learning_rate": 3.5099709108751273e-06,
      "loss": 1.2162,
      "step": 6010
    },
    {
      "epoch": 0.021098599151149384,
      "grad_norm": 3.734375,
      "learning_rate": 3.515812100608652e-06,
      "loss": 1.1787,
      "step": 6020
    },
    {
      "epoch": 0.02113364665804498,
      "grad_norm": 4.03125,
      "learning_rate": 3.521653290342177e-06,
      "loss": 1.1619,
      "step": 6030
    },
    {
      "epoch": 0.021168694164940577,
      "grad_norm": 3.65625,
      "learning_rate": 3.527494480075702e-06,
      "loss": 1.1198,
      "step": 6040
    },
    {
      "epoch": 0.021203741671836175,
      "grad_norm": 3.515625,
      "learning_rate": 3.533335669809227e-06,
      "loss": 1.2226,
      "step": 6050
    },
    {
      "epoch": 0.02123878917873177,
      "grad_norm": 3.90625,
      "learning_rate": 3.539176859542752e-06,
      "loss": 1.11,
      "step": 6060
    },
    {
      "epoch": 0.021273836685627368,
      "grad_norm": 3.6875,
      "learning_rate": 3.545018049276277e-06,
      "loss": 1.1966,
      "step": 6070
    },
    {
      "epoch": 0.021308884192522966,
      "grad_norm": 3.5625,
      "learning_rate": 3.5508592390098017e-06,
      "loss": 1.1925,
      "step": 6080
    },
    {
      "epoch": 0.02134393169941856,
      "grad_norm": 3.359375,
      "learning_rate": 3.5567004287433265e-06,
      "loss": 1.1936,
      "step": 6090
    },
    {
      "epoch": 0.02137897920631416,
      "grad_norm": 3.796875,
      "learning_rate": 3.5625416184768514e-06,
      "loss": 1.0465,
      "step": 6100
    },
    {
      "epoch": 0.021414026713209754,
      "grad_norm": 3.546875,
      "learning_rate": 3.568382808210376e-06,
      "loss": 1.1588,
      "step": 6110
    },
    {
      "epoch": 0.021449074220105353,
      "grad_norm": 4.0625,
      "learning_rate": 3.5742239979439015e-06,
      "loss": 1.1222,
      "step": 6120
    },
    {
      "epoch": 0.02148412172700095,
      "grad_norm": 3.640625,
      "learning_rate": 3.5800651876774263e-06,
      "loss": 1.1474,
      "step": 6130
    },
    {
      "epoch": 0.021519169233896546,
      "grad_norm": 3.84375,
      "learning_rate": 3.585906377410951e-06,
      "loss": 1.1739,
      "step": 6140
    },
    {
      "epoch": 0.021554216740792144,
      "grad_norm": 3.84375,
      "learning_rate": 3.591747567144476e-06,
      "loss": 1.1602,
      "step": 6150
    },
    {
      "epoch": 0.021589264247687742,
      "grad_norm": 3.953125,
      "learning_rate": 3.597588756878001e-06,
      "loss": 1.1335,
      "step": 6160
    },
    {
      "epoch": 0.021624311754583337,
      "grad_norm": 3.953125,
      "learning_rate": 3.6034299466115257e-06,
      "loss": 1.1475,
      "step": 6170
    },
    {
      "epoch": 0.021659359261478935,
      "grad_norm": 3.5,
      "learning_rate": 3.6092711363450505e-06,
      "loss": 1.1781,
      "step": 6180
    },
    {
      "epoch": 0.02169440676837453,
      "grad_norm": 3.875,
      "learning_rate": 3.615112326078576e-06,
      "loss": 1.1542,
      "step": 6190
    },
    {
      "epoch": 0.02172945427527013,
      "grad_norm": 4.0625,
      "learning_rate": 3.6209535158121006e-06,
      "loss": 1.1791,
      "step": 6200
    },
    {
      "epoch": 0.021764501782165727,
      "grad_norm": 3.34375,
      "learning_rate": 3.6267947055456255e-06,
      "loss": 1.1562,
      "step": 6210
    },
    {
      "epoch": 0.02179954928906132,
      "grad_norm": 3.515625,
      "learning_rate": 3.6326358952791503e-06,
      "loss": 1.1792,
      "step": 6220
    },
    {
      "epoch": 0.02183459679595692,
      "grad_norm": 3.5,
      "learning_rate": 3.638477085012675e-06,
      "loss": 1.1717,
      "step": 6230
    },
    {
      "epoch": 0.021869644302852518,
      "grad_norm": 3.5,
      "learning_rate": 3.6443182747462e-06,
      "loss": 1.1224,
      "step": 6240
    },
    {
      "epoch": 0.021904691809748113,
      "grad_norm": 3.96875,
      "learning_rate": 3.6501594644797257e-06,
      "loss": 1.1801,
      "step": 6250
    },
    {
      "epoch": 0.02193973931664371,
      "grad_norm": 3.53125,
      "learning_rate": 3.6560006542132506e-06,
      "loss": 1.1869,
      "step": 6260
    },
    {
      "epoch": 0.021974786823539306,
      "grad_norm": 3.5,
      "learning_rate": 3.6618418439467754e-06,
      "loss": 1.1483,
      "step": 6270
    },
    {
      "epoch": 0.022009834330434904,
      "grad_norm": 3.21875,
      "learning_rate": 3.6676830336803003e-06,
      "loss": 1.065,
      "step": 6280
    },
    {
      "epoch": 0.022044881837330502,
      "grad_norm": 4.0,
      "learning_rate": 3.6735242234138255e-06,
      "loss": 1.1741,
      "step": 6290
    },
    {
      "epoch": 0.022079929344226097,
      "grad_norm": 4.0625,
      "learning_rate": 3.6793654131473504e-06,
      "loss": 1.1894,
      "step": 6300
    },
    {
      "epoch": 0.022114976851121695,
      "grad_norm": 3.875,
      "learning_rate": 3.685206602880875e-06,
      "loss": 1.2036,
      "step": 6310
    },
    {
      "epoch": 0.022150024358017294,
      "grad_norm": 3.375,
      "learning_rate": 3.6910477926144e-06,
      "loss": 1.1242,
      "step": 6320
    },
    {
      "epoch": 0.02218507186491289,
      "grad_norm": 3.453125,
      "learning_rate": 3.696888982347925e-06,
      "loss": 1.1421,
      "step": 6330
    },
    {
      "epoch": 0.022220119371808487,
      "grad_norm": 3.796875,
      "learning_rate": 3.7027301720814497e-06,
      "loss": 1.2331,
      "step": 6340
    },
    {
      "epoch": 0.022255166878704085,
      "grad_norm": 3.8125,
      "learning_rate": 3.7085713618149746e-06,
      "loss": 1.2441,
      "step": 6350
    },
    {
      "epoch": 0.02229021438559968,
      "grad_norm": 3.484375,
      "learning_rate": 3.7144125515485e-06,
      "loss": 1.057,
      "step": 6360
    },
    {
      "epoch": 0.022325261892495278,
      "grad_norm": 3.765625,
      "learning_rate": 3.7202537412820247e-06,
      "loss": 1.2616,
      "step": 6370
    },
    {
      "epoch": 0.022360309399390873,
      "grad_norm": 3.890625,
      "learning_rate": 3.7260949310155495e-06,
      "loss": 1.1418,
      "step": 6380
    },
    {
      "epoch": 0.02239535690628647,
      "grad_norm": 3.328125,
      "learning_rate": 3.7319361207490744e-06,
      "loss": 1.1897,
      "step": 6390
    },
    {
      "epoch": 0.02243040441318207,
      "grad_norm": 3.4375,
      "learning_rate": 3.7377773104825992e-06,
      "loss": 1.1278,
      "step": 6400
    },
    {
      "epoch": 0.022465451920077664,
      "grad_norm": 3.90625,
      "learning_rate": 3.743618500216124e-06,
      "loss": 1.1962,
      "step": 6410
    },
    {
      "epoch": 0.022500499426973262,
      "grad_norm": 3.546875,
      "learning_rate": 3.749459689949649e-06,
      "loss": 1.1702,
      "step": 6420
    },
    {
      "epoch": 0.02253554693386886,
      "grad_norm": 3.296875,
      "learning_rate": 3.755300879683174e-06,
      "loss": 1.1937,
      "step": 6430
    },
    {
      "epoch": 0.022570594440764456,
      "grad_norm": 3.546875,
      "learning_rate": 3.761142069416699e-06,
      "loss": 1.1665,
      "step": 6440
    },
    {
      "epoch": 0.022605641947660054,
      "grad_norm": 3.578125,
      "learning_rate": 3.766983259150224e-06,
      "loss": 1.1674,
      "step": 6450
    },
    {
      "epoch": 0.02264068945455565,
      "grad_norm": 3.828125,
      "learning_rate": 3.7728244488837487e-06,
      "loss": 1.1425,
      "step": 6460
    },
    {
      "epoch": 0.022675736961451247,
      "grad_norm": 3.515625,
      "learning_rate": 3.7786656386172736e-06,
      "loss": 1.1698,
      "step": 6470
    },
    {
      "epoch": 0.022710784468346845,
      "grad_norm": 3.765625,
      "learning_rate": 3.7845068283507984e-06,
      "loss": 1.1401,
      "step": 6480
    },
    {
      "epoch": 0.02274583197524244,
      "grad_norm": 3.4375,
      "learning_rate": 3.7903480180843233e-06,
      "loss": 1.0876,
      "step": 6490
    },
    {
      "epoch": 0.022780879482138038,
      "grad_norm": 3.765625,
      "learning_rate": 3.7961892078178485e-06,
      "loss": 1.0856,
      "step": 6500
    },
    {
      "epoch": 0.022815926989033637,
      "grad_norm": 3.703125,
      "learning_rate": 3.8020303975513734e-06,
      "loss": 1.097,
      "step": 6510
    },
    {
      "epoch": 0.02285097449592923,
      "grad_norm": 3.546875,
      "learning_rate": 3.807871587284898e-06,
      "loss": 1.2038,
      "step": 6520
    },
    {
      "epoch": 0.02288602200282483,
      "grad_norm": 3.546875,
      "learning_rate": 3.813712777018423e-06,
      "loss": 1.2406,
      "step": 6530
    },
    {
      "epoch": 0.022921069509720424,
      "grad_norm": 3.578125,
      "learning_rate": 3.819553966751948e-06,
      "loss": 1.1294,
      "step": 6540
    },
    {
      "epoch": 0.022956117016616023,
      "grad_norm": 3.578125,
      "learning_rate": 3.825395156485473e-06,
      "loss": 1.1158,
      "step": 6550
    },
    {
      "epoch": 0.02299116452351162,
      "grad_norm": 3.59375,
      "learning_rate": 3.831236346218998e-06,
      "loss": 1.1662,
      "step": 6560
    },
    {
      "epoch": 0.023026212030407216,
      "grad_norm": 3.1875,
      "learning_rate": 3.837077535952523e-06,
      "loss": 1.0295,
      "step": 6570
    },
    {
      "epoch": 0.023061259537302814,
      "grad_norm": 3.65625,
      "learning_rate": 3.842918725686048e-06,
      "loss": 1.2242,
      "step": 6580
    },
    {
      "epoch": 0.023096307044198412,
      "grad_norm": 3.703125,
      "learning_rate": 3.848759915419573e-06,
      "loss": 1.0996,
      "step": 6590
    },
    {
      "epoch": 0.023131354551094007,
      "grad_norm": 3.453125,
      "learning_rate": 3.854601105153098e-06,
      "loss": 1.0991,
      "step": 6600
    },
    {
      "epoch": 0.023166402057989605,
      "grad_norm": 3.0,
      "learning_rate": 3.860442294886623e-06,
      "loss": 1.1168,
      "step": 6610
    },
    {
      "epoch": 0.023201449564885204,
      "grad_norm": 3.46875,
      "learning_rate": 3.866283484620148e-06,
      "loss": 1.1919,
      "step": 6620
    },
    {
      "epoch": 0.0232364970717808,
      "grad_norm": 3.765625,
      "learning_rate": 3.872124674353673e-06,
      "loss": 1.1447,
      "step": 6630
    },
    {
      "epoch": 0.023271544578676397,
      "grad_norm": 3.75,
      "learning_rate": 3.877965864087198e-06,
      "loss": 1.1859,
      "step": 6640
    },
    {
      "epoch": 0.02330659208557199,
      "grad_norm": 3.5,
      "learning_rate": 3.8838070538207225e-06,
      "loss": 1.1635,
      "step": 6650
    },
    {
      "epoch": 0.02334163959246759,
      "grad_norm": 3.765625,
      "learning_rate": 3.889648243554247e-06,
      "loss": 1.1775,
      "step": 6660
    },
    {
      "epoch": 0.023376687099363188,
      "grad_norm": 4.59375,
      "learning_rate": 3.895489433287772e-06,
      "loss": 1.1912,
      "step": 6670
    },
    {
      "epoch": 0.023411734606258783,
      "grad_norm": 4.0625,
      "learning_rate": 3.901330623021297e-06,
      "loss": 1.2073,
      "step": 6680
    },
    {
      "epoch": 0.02344678211315438,
      "grad_norm": 3.9375,
      "learning_rate": 3.907171812754822e-06,
      "loss": 1.0825,
      "step": 6690
    },
    {
      "epoch": 0.02348182962004998,
      "grad_norm": 3.59375,
      "learning_rate": 3.9130130024883475e-06,
      "loss": 1.1037,
      "step": 6700
    },
    {
      "epoch": 0.023516877126945574,
      "grad_norm": 3.703125,
      "learning_rate": 3.918854192221872e-06,
      "loss": 1.1335,
      "step": 6710
    },
    {
      "epoch": 0.023551924633841172,
      "grad_norm": 3.375,
      "learning_rate": 3.924695381955397e-06,
      "loss": 1.171,
      "step": 6720
    },
    {
      "epoch": 0.023586972140736767,
      "grad_norm": 3.59375,
      "learning_rate": 3.930536571688922e-06,
      "loss": 1.1763,
      "step": 6730
    },
    {
      "epoch": 0.023622019647632365,
      "grad_norm": 3.6875,
      "learning_rate": 3.936377761422447e-06,
      "loss": 1.1499,
      "step": 6740
    },
    {
      "epoch": 0.023657067154527964,
      "grad_norm": 3.390625,
      "learning_rate": 3.942218951155972e-06,
      "loss": 1.1408,
      "step": 6750
    },
    {
      "epoch": 0.02369211466142356,
      "grad_norm": 3.75,
      "learning_rate": 3.948060140889497e-06,
      "loss": 1.1957,
      "step": 6760
    },
    {
      "epoch": 0.023727162168319157,
      "grad_norm": 3.9375,
      "learning_rate": 3.9539013306230214e-06,
      "loss": 1.1449,
      "step": 6770
    },
    {
      "epoch": 0.023762209675214755,
      "grad_norm": 3.578125,
      "learning_rate": 3.959742520356546e-06,
      "loss": 1.2308,
      "step": 6780
    },
    {
      "epoch": 0.02379725718211035,
      "grad_norm": 3.171875,
      "learning_rate": 3.965583710090071e-06,
      "loss": 1.0782,
      "step": 6790
    },
    {
      "epoch": 0.023832304689005948,
      "grad_norm": 3.453125,
      "learning_rate": 3.971424899823596e-06,
      "loss": 1.1461,
      "step": 6800
    },
    {
      "epoch": 0.023867352195901543,
      "grad_norm": 3.984375,
      "learning_rate": 3.977266089557121e-06,
      "loss": 1.1127,
      "step": 6810
    },
    {
      "epoch": 0.02390239970279714,
      "grad_norm": 4.03125,
      "learning_rate": 3.983107279290646e-06,
      "loss": 1.253,
      "step": 6820
    },
    {
      "epoch": 0.02393744720969274,
      "grad_norm": 3.84375,
      "learning_rate": 3.9889484690241705e-06,
      "loss": 1.2077,
      "step": 6830
    },
    {
      "epoch": 0.023972494716588334,
      "grad_norm": 3.578125,
      "learning_rate": 3.994789658757696e-06,
      "loss": 1.1154,
      "step": 6840
    },
    {
      "epoch": 0.024007542223483933,
      "grad_norm": 3.765625,
      "learning_rate": 4.000630848491221e-06,
      "loss": 1.1699,
      "step": 6850
    },
    {
      "epoch": 0.02404258973037953,
      "grad_norm": 3.5625,
      "learning_rate": 4.006472038224746e-06,
      "loss": 1.1092,
      "step": 6860
    },
    {
      "epoch": 0.024077637237275126,
      "grad_norm": 3.40625,
      "learning_rate": 4.012313227958271e-06,
      "loss": 1.115,
      "step": 6870
    },
    {
      "epoch": 0.024112684744170724,
      "grad_norm": 3.515625,
      "learning_rate": 4.018154417691796e-06,
      "loss": 1.1714,
      "step": 6880
    },
    {
      "epoch": 0.024147732251066322,
      "grad_norm": 3.171875,
      "learning_rate": 4.02399560742532e-06,
      "loss": 1.0649,
      "step": 6890
    },
    {
      "epoch": 0.024182779757961917,
      "grad_norm": 3.953125,
      "learning_rate": 4.029836797158845e-06,
      "loss": 1.2031,
      "step": 6900
    },
    {
      "epoch": 0.024217827264857515,
      "grad_norm": 3.625,
      "learning_rate": 4.03567798689237e-06,
      "loss": 1.0897,
      "step": 6910
    },
    {
      "epoch": 0.02425287477175311,
      "grad_norm": 3.671875,
      "learning_rate": 4.041519176625895e-06,
      "loss": 1.1854,
      "step": 6920
    },
    {
      "epoch": 0.024287922278648708,
      "grad_norm": 3.640625,
      "learning_rate": 4.047360366359421e-06,
      "loss": 1.1161,
      "step": 6930
    },
    {
      "epoch": 0.024322969785544307,
      "grad_norm": 4.0625,
      "learning_rate": 4.0532015560929455e-06,
      "loss": 1.1486,
      "step": 6940
    },
    {
      "epoch": 0.0243580172924399,
      "grad_norm": 3.953125,
      "learning_rate": 4.05904274582647e-06,
      "loss": 1.2059,
      "step": 6950
    },
    {
      "epoch": 0.0243930647993355,
      "grad_norm": 3.484375,
      "learning_rate": 4.064883935559995e-06,
      "loss": 1.1317,
      "step": 6960
    },
    {
      "epoch": 0.024428112306231098,
      "grad_norm": 3.453125,
      "learning_rate": 4.07072512529352e-06,
      "loss": 1.2198,
      "step": 6970
    },
    {
      "epoch": 0.024463159813126693,
      "grad_norm": 3.4375,
      "learning_rate": 4.076566315027045e-06,
      "loss": 1.1787,
      "step": 6980
    },
    {
      "epoch": 0.02449820732002229,
      "grad_norm": 3.65625,
      "learning_rate": 4.08240750476057e-06,
      "loss": 1.1984,
      "step": 6990
    },
    {
      "epoch": 0.024533254826917886,
      "grad_norm": 3.609375,
      "learning_rate": 4.0882486944940946e-06,
      "loss": 1.1885,
      "step": 7000
    },
    {
      "epoch": 0.024568302333813484,
      "grad_norm": 3.671875,
      "learning_rate": 4.09408988422762e-06,
      "loss": 1.1885,
      "step": 7010
    },
    {
      "epoch": 0.024603349840709082,
      "grad_norm": 3.921875,
      "learning_rate": 4.099931073961145e-06,
      "loss": 1.2106,
      "step": 7020
    },
    {
      "epoch": 0.024638397347604677,
      "grad_norm": 3.5,
      "learning_rate": 4.10577226369467e-06,
      "loss": 1.1336,
      "step": 7030
    },
    {
      "epoch": 0.024673444854500275,
      "grad_norm": 3.734375,
      "learning_rate": 4.111613453428195e-06,
      "loss": 1.149,
      "step": 7040
    },
    {
      "epoch": 0.024708492361395874,
      "grad_norm": 3.234375,
      "learning_rate": 4.11745464316172e-06,
      "loss": 1.153,
      "step": 7050
    },
    {
      "epoch": 0.02474353986829147,
      "grad_norm": 3.8125,
      "learning_rate": 4.1232958328952445e-06,
      "loss": 1.2077,
      "step": 7060
    },
    {
      "epoch": 0.024778587375187067,
      "grad_norm": 3.5,
      "learning_rate": 4.129137022628769e-06,
      "loss": 1.1183,
      "step": 7070
    },
    {
      "epoch": 0.02481363488208266,
      "grad_norm": 3.421875,
      "learning_rate": 4.134978212362294e-06,
      "loss": 1.2072,
      "step": 7080
    },
    {
      "epoch": 0.02484868238897826,
      "grad_norm": 3.28125,
      "learning_rate": 4.140819402095819e-06,
      "loss": 1.1574,
      "step": 7090
    },
    {
      "epoch": 0.024883729895873858,
      "grad_norm": 3.375,
      "learning_rate": 4.146660591829344e-06,
      "loss": 1.1223,
      "step": 7100
    },
    {
      "epoch": 0.024918777402769453,
      "grad_norm": 4.15625,
      "learning_rate": 4.152501781562869e-06,
      "loss": 1.0931,
      "step": 7110
    },
    {
      "epoch": 0.02495382490966505,
      "grad_norm": 3.4375,
      "learning_rate": 4.1583429712963935e-06,
      "loss": 1.0921,
      "step": 7120
    },
    {
      "epoch": 0.02498887241656065,
      "grad_norm": 3.546875,
      "learning_rate": 4.164184161029918e-06,
      "loss": 1.1691,
      "step": 7130
    },
    {
      "epoch": 0.025023919923456244,
      "grad_norm": 3.9375,
      "learning_rate": 4.170025350763443e-06,
      "loss": 1.1592,
      "step": 7140
    },
    {
      "epoch": 0.025058967430351842,
      "grad_norm": 4.125,
      "learning_rate": 4.175866540496969e-06,
      "loss": 1.1151,
      "step": 7150
    },
    {
      "epoch": 0.025094014937247437,
      "grad_norm": 3.21875,
      "learning_rate": 4.181707730230494e-06,
      "loss": 1.1948,
      "step": 7160
    },
    {
      "epoch": 0.025129062444143035,
      "grad_norm": 3.3125,
      "learning_rate": 4.187548919964019e-06,
      "loss": 1.1119,
      "step": 7170
    },
    {
      "epoch": 0.025164109951038634,
      "grad_norm": 3.625,
      "learning_rate": 4.1933901096975435e-06,
      "loss": 1.1792,
      "step": 7180
    },
    {
      "epoch": 0.02519915745793423,
      "grad_norm": 3.5,
      "learning_rate": 4.199231299431068e-06,
      "loss": 1.2019,
      "step": 7190
    },
    {
      "epoch": 0.025234204964829827,
      "grad_norm": 3.5625,
      "learning_rate": 4.205072489164593e-06,
      "loss": 1.1012,
      "step": 7200
    },
    {
      "epoch": 0.025269252471725425,
      "grad_norm": 3.375,
      "learning_rate": 4.210913678898118e-06,
      "loss": 1.2119,
      "step": 7210
    },
    {
      "epoch": 0.02530429997862102,
      "grad_norm": 3.1875,
      "learning_rate": 4.216754868631643e-06,
      "loss": 1.1868,
      "step": 7220
    },
    {
      "epoch": 0.025339347485516618,
      "grad_norm": 3.5,
      "learning_rate": 4.222596058365168e-06,
      "loss": 1.1462,
      "step": 7230
    },
    {
      "epoch": 0.025374394992412216,
      "grad_norm": 4.125,
      "learning_rate": 4.2284372480986925e-06,
      "loss": 1.1781,
      "step": 7240
    },
    {
      "epoch": 0.02540944249930781,
      "grad_norm": 3.640625,
      "learning_rate": 4.234278437832217e-06,
      "loss": 1.1452,
      "step": 7250
    },
    {
      "epoch": 0.02544449000620341,
      "grad_norm": 3.78125,
      "learning_rate": 4.240119627565742e-06,
      "loss": 1.1167,
      "step": 7260
    },
    {
      "epoch": 0.025479537513099004,
      "grad_norm": 3.625,
      "learning_rate": 4.245960817299268e-06,
      "loss": 1.1193,
      "step": 7270
    },
    {
      "epoch": 0.025514585019994603,
      "grad_norm": 3.28125,
      "learning_rate": 4.251802007032793e-06,
      "loss": 1.1249,
      "step": 7280
    },
    {
      "epoch": 0.0255496325268902,
      "grad_norm": 3.96875,
      "learning_rate": 4.257643196766318e-06,
      "loss": 1.1944,
      "step": 7290
    },
    {
      "epoch": 0.025584680033785796,
      "grad_norm": 3.78125,
      "learning_rate": 4.2634843864998424e-06,
      "loss": 1.1182,
      "step": 7300
    },
    {
      "epoch": 0.025619727540681394,
      "grad_norm": 3.390625,
      "learning_rate": 4.269325576233367e-06,
      "loss": 1.0982,
      "step": 7310
    },
    {
      "epoch": 0.025654775047576992,
      "grad_norm": 3.796875,
      "learning_rate": 4.275166765966893e-06,
      "loss": 1.1034,
      "step": 7320
    },
    {
      "epoch": 0.025689822554472587,
      "grad_norm": 3.859375,
      "learning_rate": 4.281007955700418e-06,
      "loss": 1.1332,
      "step": 7330
    },
    {
      "epoch": 0.025724870061368185,
      "grad_norm": 3.6875,
      "learning_rate": 4.286849145433943e-06,
      "loss": 1.1345,
      "step": 7340
    },
    {
      "epoch": 0.02575991756826378,
      "grad_norm": 3.625,
      "learning_rate": 4.2926903351674675e-06,
      "loss": 1.0872,
      "step": 7350
    },
    {
      "epoch": 0.02579496507515938,
      "grad_norm": 3.46875,
      "learning_rate": 4.298531524900992e-06,
      "loss": 1.0851,
      "step": 7360
    },
    {
      "epoch": 0.025830012582054977,
      "grad_norm": 3.296875,
      "learning_rate": 4.304372714634517e-06,
      "loss": 1.1986,
      "step": 7370
    },
    {
      "epoch": 0.02586506008895057,
      "grad_norm": 3.609375,
      "learning_rate": 4.310213904368042e-06,
      "loss": 1.1256,
      "step": 7380
    },
    {
      "epoch": 0.02590010759584617,
      "grad_norm": 3.40625,
      "learning_rate": 4.316055094101567e-06,
      "loss": 1.0826,
      "step": 7390
    },
    {
      "epoch": 0.025935155102741768,
      "grad_norm": 3.96875,
      "learning_rate": 4.321896283835092e-06,
      "loss": 1.2013,
      "step": 7400
    },
    {
      "epoch": 0.025970202609637363,
      "grad_norm": 3.53125,
      "learning_rate": 4.3277374735686166e-06,
      "loss": 1.1925,
      "step": 7410
    },
    {
      "epoch": 0.02600525011653296,
      "grad_norm": 3.828125,
      "learning_rate": 4.333578663302141e-06,
      "loss": 1.1652,
      "step": 7420
    },
    {
      "epoch": 0.026040297623428556,
      "grad_norm": 3.65625,
      "learning_rate": 4.339419853035666e-06,
      "loss": 1.0575,
      "step": 7430
    },
    {
      "epoch": 0.026075345130324154,
      "grad_norm": 3.6875,
      "learning_rate": 4.345261042769191e-06,
      "loss": 1.1544,
      "step": 7440
    },
    {
      "epoch": 0.026110392637219752,
      "grad_norm": 4.15625,
      "learning_rate": 4.351102232502716e-06,
      "loss": 1.1647,
      "step": 7450
    },
    {
      "epoch": 0.026145440144115347,
      "grad_norm": 3.453125,
      "learning_rate": 4.356943422236242e-06,
      "loss": 1.0672,
      "step": 7460
    },
    {
      "epoch": 0.026180487651010945,
      "grad_norm": 3.828125,
      "learning_rate": 4.3627846119697665e-06,
      "loss": 1.1451,
      "step": 7470
    },
    {
      "epoch": 0.026215535157906544,
      "grad_norm": 3.453125,
      "learning_rate": 4.368625801703291e-06,
      "loss": 1.1466,
      "step": 7480
    },
    {
      "epoch": 0.02625058266480214,
      "grad_norm": 3.6875,
      "learning_rate": 4.374466991436816e-06,
      "loss": 1.1027,
      "step": 7490
    },
    {
      "epoch": 0.026285630171697737,
      "grad_norm": 3.953125,
      "learning_rate": 4.380308181170341e-06,
      "loss": 1.1391,
      "step": 7500
    },
    {
      "epoch": 0.026320677678593335,
      "grad_norm": 3.484375,
      "learning_rate": 4.386149370903866e-06,
      "loss": 1.212,
      "step": 7510
    },
    {
      "epoch": 0.02635572518548893,
      "grad_norm": 3.4375,
      "learning_rate": 4.391990560637391e-06,
      "loss": 1.1403,
      "step": 7520
    },
    {
      "epoch": 0.026390772692384528,
      "grad_norm": 3.484375,
      "learning_rate": 4.3978317503709156e-06,
      "loss": 1.1876,
      "step": 7530
    },
    {
      "epoch": 0.026425820199280123,
      "grad_norm": 3.859375,
      "learning_rate": 4.40367294010444e-06,
      "loss": 1.1348,
      "step": 7540
    },
    {
      "epoch": 0.02646086770617572,
      "grad_norm": 3.5625,
      "learning_rate": 4.409514129837965e-06,
      "loss": 1.1565,
      "step": 7550
    },
    {
      "epoch": 0.02649591521307132,
      "grad_norm": 4.46875,
      "learning_rate": 4.41535531957149e-06,
      "loss": 1.1403,
      "step": 7560
    },
    {
      "epoch": 0.026530962719966914,
      "grad_norm": 4.28125,
      "learning_rate": 4.421196509305015e-06,
      "loss": 1.0704,
      "step": 7570
    },
    {
      "epoch": 0.026566010226862512,
      "grad_norm": 3.5,
      "learning_rate": 4.42703769903854e-06,
      "loss": 1.1983,
      "step": 7580
    },
    {
      "epoch": 0.02660105773375811,
      "grad_norm": 3.25,
      "learning_rate": 4.432878888772065e-06,
      "loss": 1.17,
      "step": 7590
    },
    {
      "epoch": 0.026636105240653705,
      "grad_norm": 3.765625,
      "learning_rate": 4.43872007850559e-06,
      "loss": 1.157,
      "step": 7600
    },
    {
      "epoch": 0.026671152747549304,
      "grad_norm": 3.90625,
      "learning_rate": 4.444561268239115e-06,
      "loss": 1.0941,
      "step": 7610
    },
    {
      "epoch": 0.0267062002544449,
      "grad_norm": 3.34375,
      "learning_rate": 4.45040245797264e-06,
      "loss": 1.106,
      "step": 7620
    },
    {
      "epoch": 0.026741247761340497,
      "grad_norm": 3.53125,
      "learning_rate": 4.456243647706166e-06,
      "loss": 1.0735,
      "step": 7630
    },
    {
      "epoch": 0.026776295268236095,
      "grad_norm": 3.5625,
      "learning_rate": 4.4620848374396905e-06,
      "loss": 1.1602,
      "step": 7640
    },
    {
      "epoch": 0.02681134277513169,
      "grad_norm": 3.421875,
      "learning_rate": 4.467926027173215e-06,
      "loss": 1.1744,
      "step": 7650
    },
    {
      "epoch": 0.026846390282027288,
      "grad_norm": 3.65625,
      "learning_rate": 4.47376721690674e-06,
      "loss": 1.1755,
      "step": 7660
    },
    {
      "epoch": 0.026881437788922886,
      "grad_norm": 3.6875,
      "learning_rate": 4.479608406640265e-06,
      "loss": 1.1972,
      "step": 7670
    },
    {
      "epoch": 0.02691648529581848,
      "grad_norm": 3.96875,
      "learning_rate": 4.48544959637379e-06,
      "loss": 1.1485,
      "step": 7680
    },
    {
      "epoch": 0.02695153280271408,
      "grad_norm": 3.703125,
      "learning_rate": 4.491290786107315e-06,
      "loss": 1.078,
      "step": 7690
    },
    {
      "epoch": 0.026986580309609674,
      "grad_norm": 3.3125,
      "learning_rate": 4.49713197584084e-06,
      "loss": 1.1455,
      "step": 7700
    },
    {
      "epoch": 0.027021627816505273,
      "grad_norm": 3.265625,
      "learning_rate": 4.5029731655743644e-06,
      "loss": 1.0966,
      "step": 7710
    },
    {
      "epoch": 0.02705667532340087,
      "grad_norm": 3.46875,
      "learning_rate": 4.508814355307889e-06,
      "loss": 1.144,
      "step": 7720
    },
    {
      "epoch": 0.027091722830296466,
      "grad_norm": 3.328125,
      "learning_rate": 4.514655545041414e-06,
      "loss": 1.1746,
      "step": 7730
    },
    {
      "epoch": 0.027126770337192064,
      "grad_norm": 3.828125,
      "learning_rate": 4.520496734774939e-06,
      "loss": 1.1783,
      "step": 7740
    },
    {
      "epoch": 0.027161817844087662,
      "grad_norm": 3.171875,
      "learning_rate": 4.526337924508464e-06,
      "loss": 1.2066,
      "step": 7750
    },
    {
      "epoch": 0.027196865350983257,
      "grad_norm": 3.796875,
      "learning_rate": 4.532179114241989e-06,
      "loss": 1.1627,
      "step": 7760
    },
    {
      "epoch": 0.027231912857878855,
      "grad_norm": 3.109375,
      "learning_rate": 4.538020303975514e-06,
      "loss": 1.0757,
      "step": 7770
    },
    {
      "epoch": 0.02726696036477445,
      "grad_norm": 3.859375,
      "learning_rate": 4.543861493709039e-06,
      "loss": 1.1972,
      "step": 7780
    },
    {
      "epoch": 0.02730200787167005,
      "grad_norm": 3.96875,
      "learning_rate": 4.549702683442564e-06,
      "loss": 1.1168,
      "step": 7790
    },
    {
      "epoch": 0.027337055378565647,
      "grad_norm": 3.84375,
      "learning_rate": 4.555543873176089e-06,
      "loss": 1.1393,
      "step": 7800
    },
    {
      "epoch": 0.02737210288546124,
      "grad_norm": 3.09375,
      "learning_rate": 4.561385062909614e-06,
      "loss": 1.2183,
      "step": 7810
    },
    {
      "epoch": 0.02740715039235684,
      "grad_norm": 3.78125,
      "learning_rate": 4.567226252643139e-06,
      "loss": 1.181,
      "step": 7820
    },
    {
      "epoch": 0.027442197899252438,
      "grad_norm": 4.0625,
      "learning_rate": 4.5730674423766634e-06,
      "loss": 1.2109,
      "step": 7830
    },
    {
      "epoch": 0.027477245406148033,
      "grad_norm": 3.40625,
      "learning_rate": 4.578908632110188e-06,
      "loss": 1.2366,
      "step": 7840
    },
    {
      "epoch": 0.02751229291304363,
      "grad_norm": 3.46875,
      "learning_rate": 4.584749821843713e-06,
      "loss": 1.1216,
      "step": 7850
    },
    {
      "epoch": 0.02754734041993923,
      "grad_norm": 3.5625,
      "learning_rate": 4.590591011577238e-06,
      "loss": 1.1087,
      "step": 7860
    },
    {
      "epoch": 0.027582387926834824,
      "grad_norm": 3.59375,
      "learning_rate": 4.596432201310763e-06,
      "loss": 1.1291,
      "step": 7870
    },
    {
      "epoch": 0.027617435433730422,
      "grad_norm": 3.390625,
      "learning_rate": 4.602273391044288e-06,
      "loss": 1.0916,
      "step": 7880
    },
    {
      "epoch": 0.027652482940626017,
      "grad_norm": 4.15625,
      "learning_rate": 4.6081145807778125e-06,
      "loss": 1.1222,
      "step": 7890
    },
    {
      "epoch": 0.027687530447521615,
      "grad_norm": 3.765625,
      "learning_rate": 4.613955770511337e-06,
      "loss": 1.1314,
      "step": 7900
    },
    {
      "epoch": 0.027722577954417214,
      "grad_norm": 3.71875,
      "learning_rate": 4.619796960244863e-06,
      "loss": 1.1051,
      "step": 7910
    },
    {
      "epoch": 0.02775762546131281,
      "grad_norm": 3.4375,
      "learning_rate": 4.625638149978388e-06,
      "loss": 1.1395,
      "step": 7920
    },
    {
      "epoch": 0.027792672968208407,
      "grad_norm": 3.171875,
      "learning_rate": 4.631479339711913e-06,
      "loss": 1.1263,
      "step": 7930
    },
    {
      "epoch": 0.027827720475104005,
      "grad_norm": 3.703125,
      "learning_rate": 4.6373205294454376e-06,
      "loss": 1.0773,
      "step": 7940
    },
    {
      "epoch": 0.0278627679819996,
      "grad_norm": 3.546875,
      "learning_rate": 4.643161719178962e-06,
      "loss": 1.2699,
      "step": 7950
    },
    {
      "epoch": 0.027897815488895198,
      "grad_norm": 3.984375,
      "learning_rate": 4.649002908912488e-06,
      "loss": 1.1851,
      "step": 7960
    },
    {
      "epoch": 0.027932862995790793,
      "grad_norm": 3.375,
      "learning_rate": 4.654844098646013e-06,
      "loss": 1.1311,
      "step": 7970
    },
    {
      "epoch": 0.02796791050268639,
      "grad_norm": 3.234375,
      "learning_rate": 4.660685288379538e-06,
      "loss": 1.0461,
      "step": 7980
    },
    {
      "epoch": 0.02800295800958199,
      "grad_norm": 3.484375,
      "learning_rate": 4.666526478113063e-06,
      "loss": 1.145,
      "step": 7990
    },
    {
      "epoch": 0.028038005516477584,
      "grad_norm": 3.75,
      "learning_rate": 4.6723676678465875e-06,
      "loss": 1.1754,
      "step": 8000
    },
    {
      "epoch": 0.028073053023373182,
      "grad_norm": 3.421875,
      "learning_rate": 4.678208857580112e-06,
      "loss": 1.0488,
      "step": 8010
    },
    {
      "epoch": 0.02810810053026878,
      "grad_norm": 3.671875,
      "learning_rate": 4.684050047313637e-06,
      "loss": 1.1291,
      "step": 8020
    },
    {
      "epoch": 0.028143148037164376,
      "grad_norm": 3.609375,
      "learning_rate": 4.689891237047162e-06,
      "loss": 1.12,
      "step": 8030
    },
    {
      "epoch": 0.028178195544059974,
      "grad_norm": 3.734375,
      "learning_rate": 4.695732426780687e-06,
      "loss": 1.1245,
      "step": 8040
    },
    {
      "epoch": 0.02821324305095557,
      "grad_norm": 3.6875,
      "learning_rate": 4.701573616514212e-06,
      "loss": 1.1582,
      "step": 8050
    },
    {
      "epoch": 0.028248290557851167,
      "grad_norm": 3.734375,
      "learning_rate": 4.7074148062477365e-06,
      "loss": 1.1673,
      "step": 8060
    },
    {
      "epoch": 0.028283338064746765,
      "grad_norm": 3.828125,
      "learning_rate": 4.713255995981261e-06,
      "loss": 1.1917,
      "step": 8070
    },
    {
      "epoch": 0.02831838557164236,
      "grad_norm": 3.46875,
      "learning_rate": 4.719097185714787e-06,
      "loss": 1.1234,
      "step": 8080
    },
    {
      "epoch": 0.028353433078537958,
      "grad_norm": 4.09375,
      "learning_rate": 4.724938375448312e-06,
      "loss": 1.1628,
      "step": 8090
    },
    {
      "epoch": 0.028388480585433556,
      "grad_norm": 3.59375,
      "learning_rate": 4.730779565181837e-06,
      "loss": 1.1488,
      "step": 8100
    },
    {
      "epoch": 0.02842352809232915,
      "grad_norm": 3.703125,
      "learning_rate": 4.736620754915362e-06,
      "loss": 1.1222,
      "step": 8110
    },
    {
      "epoch": 0.02845857559922475,
      "grad_norm": 3.4375,
      "learning_rate": 4.7424619446488865e-06,
      "loss": 1.1611,
      "step": 8120
    },
    {
      "epoch": 0.028493623106120348,
      "grad_norm": 3.53125,
      "learning_rate": 4.748303134382411e-06,
      "loss": 1.193,
      "step": 8130
    },
    {
      "epoch": 0.028528670613015943,
      "grad_norm": 3.984375,
      "learning_rate": 4.754144324115936e-06,
      "loss": 1.167,
      "step": 8140
    },
    {
      "epoch": 0.02856371811991154,
      "grad_norm": 3.671875,
      "learning_rate": 4.759985513849461e-06,
      "loss": 1.1373,
      "step": 8150
    },
    {
      "epoch": 0.028598765626807136,
      "grad_norm": 3.421875,
      "learning_rate": 4.765826703582986e-06,
      "loss": 1.1093,
      "step": 8160
    },
    {
      "epoch": 0.028633813133702734,
      "grad_norm": 3.0625,
      "learning_rate": 4.771667893316511e-06,
      "loss": 1.1565,
      "step": 8170
    },
    {
      "epoch": 0.028668860640598332,
      "grad_norm": 3.5,
      "learning_rate": 4.7775090830500355e-06,
      "loss": 1.1486,
      "step": 8180
    },
    {
      "epoch": 0.028703908147493927,
      "grad_norm": 3.359375,
      "learning_rate": 4.78335027278356e-06,
      "loss": 1.1588,
      "step": 8190
    },
    {
      "epoch": 0.028738955654389525,
      "grad_norm": 3.59375,
      "learning_rate": 4.789191462517085e-06,
      "loss": 1.1611,
      "step": 8200
    },
    {
      "epoch": 0.028774003161285124,
      "grad_norm": 3.765625,
      "learning_rate": 4.79503265225061e-06,
      "loss": 1.1628,
      "step": 8210
    },
    {
      "epoch": 0.02880905066818072,
      "grad_norm": 3.359375,
      "learning_rate": 4.800873841984136e-06,
      "loss": 1.1452,
      "step": 8220
    },
    {
      "epoch": 0.028844098175076317,
      "grad_norm": 3.96875,
      "learning_rate": 4.806715031717661e-06,
      "loss": 1.1713,
      "step": 8230
    },
    {
      "epoch": 0.02887914568197191,
      "grad_norm": 3.578125,
      "learning_rate": 4.8125562214511854e-06,
      "loss": 1.147,
      "step": 8240
    },
    {
      "epoch": 0.02891419318886751,
      "grad_norm": 3.75,
      "learning_rate": 4.81839741118471e-06,
      "loss": 1.0852,
      "step": 8250
    },
    {
      "epoch": 0.028949240695763108,
      "grad_norm": 3.953125,
      "learning_rate": 4.824238600918235e-06,
      "loss": 1.0929,
      "step": 8260
    },
    {
      "epoch": 0.028984288202658703,
      "grad_norm": 3.671875,
      "learning_rate": 4.83007979065176e-06,
      "loss": 1.1412,
      "step": 8270
    },
    {
      "epoch": 0.0290193357095543,
      "grad_norm": 10.0625,
      "learning_rate": 4.835920980385285e-06,
      "loss": 1.1371,
      "step": 8280
    },
    {
      "epoch": 0.0290543832164499,
      "grad_norm": 4.0625,
      "learning_rate": 4.84176217011881e-06,
      "loss": 1.1666,
      "step": 8290
    },
    {
      "epoch": 0.029089430723345494,
      "grad_norm": 3.84375,
      "learning_rate": 4.847603359852335e-06,
      "loss": 1.1842,
      "step": 8300
    },
    {
      "epoch": 0.029124478230241092,
      "grad_norm": 4.0625,
      "learning_rate": 4.85344454958586e-06,
      "loss": 1.1711,
      "step": 8310
    },
    {
      "epoch": 0.029159525737136687,
      "grad_norm": 3.6875,
      "learning_rate": 4.859285739319385e-06,
      "loss": 1.072,
      "step": 8320
    },
    {
      "epoch": 0.029194573244032285,
      "grad_norm": 4.0625,
      "learning_rate": 4.86512692905291e-06,
      "loss": 1.1591,
      "step": 8330
    },
    {
      "epoch": 0.029229620750927884,
      "grad_norm": 3.59375,
      "learning_rate": 4.870968118786435e-06,
      "loss": 1.1314,
      "step": 8340
    },
    {
      "epoch": 0.02926466825782348,
      "grad_norm": 4.03125,
      "learning_rate": 4.8768093085199596e-06,
      "loss": 1.096,
      "step": 8350
    },
    {
      "epoch": 0.029299715764719077,
      "grad_norm": 3.875,
      "learning_rate": 4.882650498253484e-06,
      "loss": 1.2901,
      "step": 8360
    },
    {
      "epoch": 0.029334763271614675,
      "grad_norm": 3.625,
      "learning_rate": 4.888491687987009e-06,
      "loss": 1.149,
      "step": 8370
    },
    {
      "epoch": 0.02936981077851027,
      "grad_norm": 3.671875,
      "learning_rate": 4.894332877720534e-06,
      "loss": 1.0637,
      "step": 8380
    },
    {
      "epoch": 0.029404858285405868,
      "grad_norm": 3.6875,
      "learning_rate": 4.90017406745406e-06,
      "loss": 1.1659,
      "step": 8390
    },
    {
      "epoch": 0.029439905792301466,
      "grad_norm": 3.640625,
      "learning_rate": 4.906015257187585e-06,
      "loss": 1.1539,
      "step": 8400
    },
    {
      "epoch": 0.02947495329919706,
      "grad_norm": 3.6875,
      "learning_rate": 4.9118564469211095e-06,
      "loss": 1.1279,
      "step": 8410
    },
    {
      "epoch": 0.02951000080609266,
      "grad_norm": 3.984375,
      "learning_rate": 4.917697636654634e-06,
      "loss": 1.1325,
      "step": 8420
    },
    {
      "epoch": 0.029545048312988254,
      "grad_norm": 3.84375,
      "learning_rate": 4.923538826388159e-06,
      "loss": 1.063,
      "step": 8430
    },
    {
      "epoch": 0.029580095819883852,
      "grad_norm": 3.6875,
      "learning_rate": 4.929380016121684e-06,
      "loss": 1.0853,
      "step": 8440
    },
    {
      "epoch": 0.02961514332677945,
      "grad_norm": 3.328125,
      "learning_rate": 4.935221205855209e-06,
      "loss": 1.1243,
      "step": 8450
    },
    {
      "epoch": 0.029650190833675046,
      "grad_norm": 3.53125,
      "learning_rate": 4.941062395588734e-06,
      "loss": 1.0824,
      "step": 8460
    },
    {
      "epoch": 0.029685238340570644,
      "grad_norm": 3.578125,
      "learning_rate": 4.9469035853222586e-06,
      "loss": 1.2376,
      "step": 8470
    },
    {
      "epoch": 0.029720285847466242,
      "grad_norm": 3.609375,
      "learning_rate": 4.952744775055783e-06,
      "loss": 1.0531,
      "step": 8480
    },
    {
      "epoch": 0.029755333354361837,
      "grad_norm": 3.40625,
      "learning_rate": 4.958585964789308e-06,
      "loss": 1.1943,
      "step": 8490
    },
    {
      "epoch": 0.029790380861257435,
      "grad_norm": 3.5,
      "learning_rate": 4.964427154522833e-06,
      "loss": 1.152,
      "step": 8500
    },
    {
      "epoch": 0.02982542836815303,
      "grad_norm": 4.0,
      "learning_rate": 4.970268344256358e-06,
      "loss": 1.1525,
      "step": 8510
    },
    {
      "epoch": 0.029860475875048628,
      "grad_norm": 3.546875,
      "learning_rate": 4.976109533989883e-06,
      "loss": 1.1532,
      "step": 8520
    },
    {
      "epoch": 0.029895523381944226,
      "grad_norm": 3.640625,
      "learning_rate": 4.9819507237234085e-06,
      "loss": 1.1424,
      "step": 8530
    },
    {
      "epoch": 0.02993057088883982,
      "grad_norm": 3.921875,
      "learning_rate": 4.987791913456933e-06,
      "loss": 1.0636,
      "step": 8540
    },
    {
      "epoch": 0.02996561839573542,
      "grad_norm": 3.890625,
      "learning_rate": 4.993633103190458e-06,
      "loss": 1.1366,
      "step": 8550
    },
    {
      "epoch": 0.030000665902631018,
      "grad_norm": 3.25,
      "learning_rate": 4.999474292923983e-06,
      "loss": 1.0878,
      "step": 8560
    },
    {
      "epoch": 0.030035713409526613,
      "grad_norm": 3.546875,
      "learning_rate": 5.005315482657508e-06,
      "loss": 1.0553,
      "step": 8570
    },
    {
      "epoch": 0.03007076091642221,
      "grad_norm": 3.8125,
      "learning_rate": 5.011156672391033e-06,
      "loss": 1.165,
      "step": 8580
    },
    {
      "epoch": 0.030105808423317806,
      "grad_norm": 3.78125,
      "learning_rate": 5.0169978621245575e-06,
      "loss": 1.1638,
      "step": 8590
    },
    {
      "epoch": 0.030140855930213404,
      "grad_norm": 3.78125,
      "learning_rate": 5.022839051858082e-06,
      "loss": 1.2988,
      "step": 8600
    },
    {
      "epoch": 0.030175903437109002,
      "grad_norm": 3.484375,
      "learning_rate": 5.028680241591607e-06,
      "loss": 1.1026,
      "step": 8610
    },
    {
      "epoch": 0.030210950944004597,
      "grad_norm": 3.28125,
      "learning_rate": 5.034521431325132e-06,
      "loss": 1.0809,
      "step": 8620
    },
    {
      "epoch": 0.030245998450900195,
      "grad_norm": 3.609375,
      "learning_rate": 5.040362621058657e-06,
      "loss": 1.1218,
      "step": 8630
    },
    {
      "epoch": 0.030281045957795794,
      "grad_norm": 4.03125,
      "learning_rate": 5.046203810792183e-06,
      "loss": 1.161,
      "step": 8640
    },
    {
      "epoch": 0.03031609346469139,
      "grad_norm": 4.03125,
      "learning_rate": 5.0520450005257075e-06,
      "loss": 1.1336,
      "step": 8650
    },
    {
      "epoch": 0.030351140971586987,
      "grad_norm": 3.671875,
      "learning_rate": 5.057886190259232e-06,
      "loss": 1.1594,
      "step": 8660
    },
    {
      "epoch": 0.03038618847848258,
      "grad_norm": 3.515625,
      "learning_rate": 5.063727379992757e-06,
      "loss": 1.1846,
      "step": 8670
    },
    {
      "epoch": 0.03042123598537818,
      "grad_norm": 3.71875,
      "learning_rate": 5.069568569726282e-06,
      "loss": 1.127,
      "step": 8680
    },
    {
      "epoch": 0.030456283492273778,
      "grad_norm": 3.609375,
      "learning_rate": 5.075409759459808e-06,
      "loss": 1.105,
      "step": 8690
    },
    {
      "epoch": 0.030491330999169373,
      "grad_norm": 3.703125,
      "learning_rate": 5.0812509491933325e-06,
      "loss": 1.1252,
      "step": 8700
    },
    {
      "epoch": 0.03052637850606497,
      "grad_norm": 3.625,
      "learning_rate": 5.087092138926857e-06,
      "loss": 1.2248,
      "step": 8710
    },
    {
      "epoch": 0.03056142601296057,
      "grad_norm": 3.3125,
      "learning_rate": 5.092933328660382e-06,
      "loss": 1.1236,
      "step": 8720
    },
    {
      "epoch": 0.030596473519856164,
      "grad_norm": 3.75,
      "learning_rate": 5.098774518393907e-06,
      "loss": 1.0842,
      "step": 8730
    },
    {
      "epoch": 0.030631521026751762,
      "grad_norm": 3.890625,
      "learning_rate": 5.104615708127432e-06,
      "loss": 1.0849,
      "step": 8740
    },
    {
      "epoch": 0.03066656853364736,
      "grad_norm": 3.671875,
      "learning_rate": 5.110456897860957e-06,
      "loss": 1.1737,
      "step": 8750
    },
    {
      "epoch": 0.030701616040542955,
      "grad_norm": 4.0,
      "learning_rate": 5.116298087594482e-06,
      "loss": 1.1123,
      "step": 8760
    },
    {
      "epoch": 0.030736663547438554,
      "grad_norm": 3.421875,
      "learning_rate": 5.1221392773280064e-06,
      "loss": 1.1291,
      "step": 8770
    },
    {
      "epoch": 0.03077171105433415,
      "grad_norm": 3.34375,
      "learning_rate": 5.127980467061531e-06,
      "loss": 1.1753,
      "step": 8780
    },
    {
      "epoch": 0.030806758561229747,
      "grad_norm": 3.578125,
      "learning_rate": 5.133821656795056e-06,
      "loss": 1.0929,
      "step": 8790
    },
    {
      "epoch": 0.030841806068125345,
      "grad_norm": 3.828125,
      "learning_rate": 5.139662846528581e-06,
      "loss": 1.1914,
      "step": 8800
    },
    {
      "epoch": 0.03087685357502094,
      "grad_norm": 3.390625,
      "learning_rate": 5.145504036262106e-06,
      "loss": 1.0829,
      "step": 8810
    },
    {
      "epoch": 0.030911901081916538,
      "grad_norm": 3.625,
      "learning_rate": 5.151345225995631e-06,
      "loss": 1.1698,
      "step": 8820
    },
    {
      "epoch": 0.030946948588812136,
      "grad_norm": 3.546875,
      "learning_rate": 5.157186415729156e-06,
      "loss": 1.1512,
      "step": 8830
    },
    {
      "epoch": 0.03098199609570773,
      "grad_norm": 3.375,
      "learning_rate": 5.163027605462681e-06,
      "loss": 1.0874,
      "step": 8840
    },
    {
      "epoch": 0.03101704360260333,
      "grad_norm": 3.90625,
      "learning_rate": 5.168868795196206e-06,
      "loss": 1.1068,
      "step": 8850
    },
    {
      "epoch": 0.031052091109498924,
      "grad_norm": 3.359375,
      "learning_rate": 5.174709984929731e-06,
      "loss": 1.1444,
      "step": 8860
    },
    {
      "epoch": 0.031087138616394523,
      "grad_norm": 3.046875,
      "learning_rate": 5.180551174663256e-06,
      "loss": 1.09,
      "step": 8870
    },
    {
      "epoch": 0.03112218612329012,
      "grad_norm": 4.125,
      "learning_rate": 5.1863923643967806e-06,
      "loss": 1.1045,
      "step": 8880
    },
    {
      "epoch": 0.031157233630185716,
      "grad_norm": 3.0,
      "learning_rate": 5.192233554130305e-06,
      "loss": 1.0865,
      "step": 8890
    },
    {
      "epoch": 0.031192281137081314,
      "grad_norm": 3.515625,
      "learning_rate": 5.19807474386383e-06,
      "loss": 1.0509,
      "step": 8900
    },
    {
      "epoch": 0.031227328643976912,
      "grad_norm": 3.546875,
      "learning_rate": 5.203915933597355e-06,
      "loss": 1.1633,
      "step": 8910
    },
    {
      "epoch": 0.03126237615087251,
      "grad_norm": 3.21875,
      "learning_rate": 5.20975712333088e-06,
      "loss": 1.1569,
      "step": 8920
    },
    {
      "epoch": 0.0312974236577681,
      "grad_norm": 3.53125,
      "learning_rate": 5.215598313064405e-06,
      "loss": 1.1418,
      "step": 8930
    },
    {
      "epoch": 0.0313324711646637,
      "grad_norm": 3.859375,
      "learning_rate": 5.22143950279793e-06,
      "loss": 1.069,
      "step": 8940
    },
    {
      "epoch": 0.0313675186715593,
      "grad_norm": 3.4375,
      "learning_rate": 5.2272806925314545e-06,
      "loss": 1.1292,
      "step": 8950
    },
    {
      "epoch": 0.031402566178454897,
      "grad_norm": 4.125,
      "learning_rate": 5.233121882264979e-06,
      "loss": 1.1053,
      "step": 8960
    },
    {
      "epoch": 0.031437613685350495,
      "grad_norm": 3.703125,
      "learning_rate": 5.238963071998504e-06,
      "loss": 1.1166,
      "step": 8970
    },
    {
      "epoch": 0.03147266119224609,
      "grad_norm": 3.640625,
      "learning_rate": 5.24480426173203e-06,
      "loss": 1.1308,
      "step": 8980
    },
    {
      "epoch": 0.031507708699141684,
      "grad_norm": 3.703125,
      "learning_rate": 5.250645451465555e-06,
      "loss": 1.1279,
      "step": 8990
    },
    {
      "epoch": 0.03154275620603728,
      "grad_norm": 4.65625,
      "learning_rate": 5.25648664119908e-06,
      "loss": 1.1687,
      "step": 9000
    },
    {
      "epoch": 0.03157780371293288,
      "grad_norm": 3.546875,
      "learning_rate": 5.262327830932605e-06,
      "loss": 1.1034,
      "step": 9010
    },
    {
      "epoch": 0.03161285121982848,
      "grad_norm": 3.40625,
      "learning_rate": 5.26816902066613e-06,
      "loss": 1.139,
      "step": 9020
    },
    {
      "epoch": 0.03164789872672408,
      "grad_norm": 4.0625,
      "learning_rate": 5.274010210399655e-06,
      "loss": 1.1362,
      "step": 9030
    },
    {
      "epoch": 0.03168294623361967,
      "grad_norm": 3.4375,
      "learning_rate": 5.27985140013318e-06,
      "loss": 1.095,
      "step": 9040
    },
    {
      "epoch": 0.03171799374051527,
      "grad_norm": 3.359375,
      "learning_rate": 5.285692589866705e-06,
      "loss": 1.1073,
      "step": 9050
    },
    {
      "epoch": 0.031753041247410865,
      "grad_norm": 3.171875,
      "learning_rate": 5.2915337796002295e-06,
      "loss": 1.1733,
      "step": 9060
    },
    {
      "epoch": 0.031788088754306464,
      "grad_norm": 3.546875,
      "learning_rate": 5.297374969333754e-06,
      "loss": 1.1576,
      "step": 9070
    },
    {
      "epoch": 0.03182313626120206,
      "grad_norm": 3.59375,
      "learning_rate": 5.303216159067279e-06,
      "loss": 1.2078,
      "step": 9080
    },
    {
      "epoch": 0.03185818376809765,
      "grad_norm": 3.484375,
      "learning_rate": 5.309057348800804e-06,
      "loss": 1.1009,
      "step": 9090
    },
    {
      "epoch": 0.03189323127499325,
      "grad_norm": 3.46875,
      "learning_rate": 5.314898538534329e-06,
      "loss": 1.0853,
      "step": 9100
    },
    {
      "epoch": 0.03192827878188885,
      "grad_norm": 3.140625,
      "learning_rate": 5.320739728267854e-06,
      "loss": 1.1497,
      "step": 9110
    },
    {
      "epoch": 0.03196332628878445,
      "grad_norm": 3.828125,
      "learning_rate": 5.3265809180013785e-06,
      "loss": 1.1136,
      "step": 9120
    },
    {
      "epoch": 0.031998373795680046,
      "grad_norm": 3.40625,
      "learning_rate": 5.332422107734903e-06,
      "loss": 1.1686,
      "step": 9130
    },
    {
      "epoch": 0.032033421302575645,
      "grad_norm": 3.828125,
      "learning_rate": 5.338263297468429e-06,
      "loss": 1.1367,
      "step": 9140
    },
    {
      "epoch": 0.032068468809471236,
      "grad_norm": 3.546875,
      "learning_rate": 5.344104487201954e-06,
      "loss": 1.1044,
      "step": 9150
    },
    {
      "epoch": 0.032103516316366834,
      "grad_norm": 3.96875,
      "learning_rate": 5.349945676935479e-06,
      "loss": 1.1266,
      "step": 9160
    },
    {
      "epoch": 0.03213856382326243,
      "grad_norm": 3.40625,
      "learning_rate": 5.355786866669004e-06,
      "loss": 1.0896,
      "step": 9170
    },
    {
      "epoch": 0.03217361133015803,
      "grad_norm": 3.53125,
      "learning_rate": 5.3616280564025284e-06,
      "loss": 1.1562,
      "step": 9180
    },
    {
      "epoch": 0.03220865883705363,
      "grad_norm": 3.25,
      "learning_rate": 5.367469246136053e-06,
      "loss": 1.1583,
      "step": 9190
    },
    {
      "epoch": 0.03224370634394922,
      "grad_norm": 3.453125,
      "learning_rate": 5.373310435869578e-06,
      "loss": 1.1849,
      "step": 9200
    },
    {
      "epoch": 0.03227875385084482,
      "grad_norm": 3.796875,
      "learning_rate": 5.379151625603103e-06,
      "loss": 1.1458,
      "step": 9210
    },
    {
      "epoch": 0.03231380135774042,
      "grad_norm": 3.859375,
      "learning_rate": 5.384992815336628e-06,
      "loss": 1.1494,
      "step": 9220
    },
    {
      "epoch": 0.032348848864636015,
      "grad_norm": 3.640625,
      "learning_rate": 5.390834005070153e-06,
      "loss": 1.1348,
      "step": 9230
    },
    {
      "epoch": 0.03238389637153161,
      "grad_norm": 3.484375,
      "learning_rate": 5.3966751948036775e-06,
      "loss": 1.0636,
      "step": 9240
    },
    {
      "epoch": 0.032418943878427205,
      "grad_norm": 3.734375,
      "learning_rate": 5.402516384537202e-06,
      "loss": 1.1062,
      "step": 9250
    },
    {
      "epoch": 0.0324539913853228,
      "grad_norm": 3.40625,
      "learning_rate": 5.408357574270727e-06,
      "loss": 1.1588,
      "step": 9260
    },
    {
      "epoch": 0.0324890388922184,
      "grad_norm": 3.96875,
      "learning_rate": 5.414198764004252e-06,
      "loss": 1.1116,
      "step": 9270
    },
    {
      "epoch": 0.032524086399114,
      "grad_norm": 3.546875,
      "learning_rate": 5.420039953737778e-06,
      "loss": 1.1071,
      "step": 9280
    },
    {
      "epoch": 0.0325591339060096,
      "grad_norm": 3.1875,
      "learning_rate": 5.425881143471303e-06,
      "loss": 1.1153,
      "step": 9290
    },
    {
      "epoch": 0.032594181412905196,
      "grad_norm": 3.953125,
      "learning_rate": 5.4317223332048274e-06,
      "loss": 1.1788,
      "step": 9300
    },
    {
      "epoch": 0.03262922891980079,
      "grad_norm": 3.71875,
      "learning_rate": 5.437563522938352e-06,
      "loss": 1.1715,
      "step": 9310
    },
    {
      "epoch": 0.032664276426696386,
      "grad_norm": 3.484375,
      "learning_rate": 5.443404712671877e-06,
      "loss": 1.0969,
      "step": 9320
    },
    {
      "epoch": 0.032699323933591984,
      "grad_norm": 3.21875,
      "learning_rate": 5.449245902405403e-06,
      "loss": 1.1078,
      "step": 9330
    },
    {
      "epoch": 0.03273437144048758,
      "grad_norm": 3.921875,
      "learning_rate": 5.455087092138928e-06,
      "loss": 1.14,
      "step": 9340
    },
    {
      "epoch": 0.03276941894738318,
      "grad_norm": 3.15625,
      "learning_rate": 5.4609282818724525e-06,
      "loss": 1.1144,
      "step": 9350
    },
    {
      "epoch": 0.03280446645427877,
      "grad_norm": 3.78125,
      "learning_rate": 5.466769471605977e-06,
      "loss": 1.1365,
      "step": 9360
    },
    {
      "epoch": 0.03283951396117437,
      "grad_norm": 3.71875,
      "learning_rate": 5.472610661339502e-06,
      "loss": 1.1932,
      "step": 9370
    },
    {
      "epoch": 0.03287456146806997,
      "grad_norm": 3.15625,
      "learning_rate": 5.478451851073027e-06,
      "loss": 1.1586,
      "step": 9380
    },
    {
      "epoch": 0.032909608974965567,
      "grad_norm": 3.734375,
      "learning_rate": 5.484293040806552e-06,
      "loss": 1.1858,
      "step": 9390
    },
    {
      "epoch": 0.032944656481861165,
      "grad_norm": 2.859375,
      "learning_rate": 5.490134230540077e-06,
      "loss": 1.1164,
      "step": 9400
    },
    {
      "epoch": 0.03297970398875676,
      "grad_norm": 3.328125,
      "learning_rate": 5.4959754202736016e-06,
      "loss": 1.1835,
      "step": 9410
    },
    {
      "epoch": 0.033014751495652354,
      "grad_norm": 3.6875,
      "learning_rate": 5.501816610007126e-06,
      "loss": 1.1378,
      "step": 9420
    },
    {
      "epoch": 0.03304979900254795,
      "grad_norm": 3.40625,
      "learning_rate": 5.507657799740651e-06,
      "loss": 1.1562,
      "step": 9430
    },
    {
      "epoch": 0.03308484650944355,
      "grad_norm": 3.375,
      "learning_rate": 5.513498989474176e-06,
      "loss": 1.0921,
      "step": 9440
    },
    {
      "epoch": 0.03311989401633915,
      "grad_norm": 3.59375,
      "learning_rate": 5.519340179207702e-06,
      "loss": 1.1806,
      "step": 9450
    },
    {
      "epoch": 0.03315494152323475,
      "grad_norm": 4.0625,
      "learning_rate": 5.525181368941227e-06,
      "loss": 1.1402,
      "step": 9460
    },
    {
      "epoch": 0.03318998903013034,
      "grad_norm": 4.0,
      "learning_rate": 5.5310225586747515e-06,
      "loss": 1.1263,
      "step": 9470
    },
    {
      "epoch": 0.03322503653702594,
      "grad_norm": 4.0625,
      "learning_rate": 5.536863748408276e-06,
      "loss": 1.1866,
      "step": 9480
    },
    {
      "epoch": 0.033260084043921535,
      "grad_norm": 3.359375,
      "learning_rate": 5.542704938141801e-06,
      "loss": 1.1276,
      "step": 9490
    },
    {
      "epoch": 0.033295131550817134,
      "grad_norm": 3.5,
      "learning_rate": 5.548546127875326e-06,
      "loss": 1.0934,
      "step": 9500
    },
    {
      "epoch": 0.03333017905771273,
      "grad_norm": 3.515625,
      "learning_rate": 5.554387317608851e-06,
      "loss": 1.0589,
      "step": 9510
    },
    {
      "epoch": 0.03336522656460832,
      "grad_norm": 3.578125,
      "learning_rate": 5.560228507342376e-06,
      "loss": 1.134,
      "step": 9520
    },
    {
      "epoch": 0.03340027407150392,
      "grad_norm": 3.125,
      "learning_rate": 5.5660696970759005e-06,
      "loss": 1.0922,
      "step": 9530
    },
    {
      "epoch": 0.03343532157839952,
      "grad_norm": 3.234375,
      "learning_rate": 5.571910886809425e-06,
      "loss": 1.0883,
      "step": 9540
    },
    {
      "epoch": 0.03347036908529512,
      "grad_norm": 3.53125,
      "learning_rate": 5.57775207654295e-06,
      "loss": 1.1196,
      "step": 9550
    },
    {
      "epoch": 0.033505416592190716,
      "grad_norm": 3.4375,
      "learning_rate": 5.583593266276475e-06,
      "loss": 1.1307,
      "step": 9560
    },
    {
      "epoch": 0.033540464099086315,
      "grad_norm": 3.59375,
      "learning_rate": 5.58943445601e-06,
      "loss": 1.0557,
      "step": 9570
    },
    {
      "epoch": 0.033575511605981906,
      "grad_norm": 3.59375,
      "learning_rate": 5.595275645743525e-06,
      "loss": 1.1369,
      "step": 9580
    },
    {
      "epoch": 0.033610559112877504,
      "grad_norm": 3.21875,
      "learning_rate": 5.6011168354770505e-06,
      "loss": 1.1367,
      "step": 9590
    },
    {
      "epoch": 0.0336456066197731,
      "grad_norm": 3.703125,
      "learning_rate": 5.606958025210575e-06,
      "loss": 1.136,
      "step": 9600
    },
    {
      "epoch": 0.0336806541266687,
      "grad_norm": 3.359375,
      "learning_rate": 5.6127992149441e-06,
      "loss": 1.1301,
      "step": 9610
    },
    {
      "epoch": 0.0337157016335643,
      "grad_norm": 3.390625,
      "learning_rate": 5.618640404677625e-06,
      "loss": 1.1299,
      "step": 9620
    },
    {
      "epoch": 0.03375074914045989,
      "grad_norm": 3.125,
      "learning_rate": 5.62448159441115e-06,
      "loss": 1.0506,
      "step": 9630
    },
    {
      "epoch": 0.03378579664735549,
      "grad_norm": 2.921875,
      "learning_rate": 5.630322784144675e-06,
      "loss": 1.0624,
      "step": 9640
    },
    {
      "epoch": 0.03382084415425109,
      "grad_norm": 3.53125,
      "learning_rate": 5.6361639738781995e-06,
      "loss": 1.1265,
      "step": 9650
    },
    {
      "epoch": 0.033855891661146685,
      "grad_norm": 3.453125,
      "learning_rate": 5.642005163611724e-06,
      "loss": 1.1452,
      "step": 9660
    },
    {
      "epoch": 0.03389093916804228,
      "grad_norm": 4.15625,
      "learning_rate": 5.64784635334525e-06,
      "loss": 1.0861,
      "step": 9670
    },
    {
      "epoch": 0.03392598667493788,
      "grad_norm": 3.078125,
      "learning_rate": 5.653687543078775e-06,
      "loss": 1.1052,
      "step": 9680
    },
    {
      "epoch": 0.03396103418183347,
      "grad_norm": 3.34375,
      "learning_rate": 5.6595287328123e-06,
      "loss": 1.2087,
      "step": 9690
    },
    {
      "epoch": 0.03399608168872907,
      "grad_norm": 3.78125,
      "learning_rate": 5.665369922545825e-06,
      "loss": 1.1954,
      "step": 9700
    },
    {
      "epoch": 0.03403112919562467,
      "grad_norm": 3.71875,
      "learning_rate": 5.6712111122793494e-06,
      "loss": 1.0982,
      "step": 9710
    },
    {
      "epoch": 0.03406617670252027,
      "grad_norm": 3.4375,
      "learning_rate": 5.677052302012874e-06,
      "loss": 1.1215,
      "step": 9720
    },
    {
      "epoch": 0.034101224209415866,
      "grad_norm": 4.09375,
      "learning_rate": 5.682893491746399e-06,
      "loss": 1.0625,
      "step": 9730
    },
    {
      "epoch": 0.03413627171631146,
      "grad_norm": 3.765625,
      "learning_rate": 5.688734681479924e-06,
      "loss": 1.1223,
      "step": 9740
    },
    {
      "epoch": 0.034171319223207056,
      "grad_norm": 3.578125,
      "learning_rate": 5.694575871213449e-06,
      "loss": 1.1215,
      "step": 9750
    },
    {
      "epoch": 0.034206366730102654,
      "grad_norm": 3.8125,
      "learning_rate": 5.7004170609469745e-06,
      "loss": 1.2243,
      "step": 9760
    },
    {
      "epoch": 0.03424141423699825,
      "grad_norm": 3.65625,
      "learning_rate": 5.706258250680499e-06,
      "loss": 1.2364,
      "step": 9770
    },
    {
      "epoch": 0.03427646174389385,
      "grad_norm": 3.453125,
      "learning_rate": 5.712099440414024e-06,
      "loss": 1.0251,
      "step": 9780
    },
    {
      "epoch": 0.03431150925078944,
      "grad_norm": 3.609375,
      "learning_rate": 5.717940630147549e-06,
      "loss": 1.097,
      "step": 9790
    },
    {
      "epoch": 0.03434655675768504,
      "grad_norm": 3.671875,
      "learning_rate": 5.723781819881074e-06,
      "loss": 1.1613,
      "step": 9800
    },
    {
      "epoch": 0.03438160426458064,
      "grad_norm": 3.421875,
      "learning_rate": 5.729623009614599e-06,
      "loss": 1.1742,
      "step": 9810
    },
    {
      "epoch": 0.03441665177147624,
      "grad_norm": 3.671875,
      "learning_rate": 5.7354641993481236e-06,
      "loss": 1.1428,
      "step": 9820
    },
    {
      "epoch": 0.034451699278371835,
      "grad_norm": 3.578125,
      "learning_rate": 5.741305389081648e-06,
      "loss": 1.0864,
      "step": 9830
    },
    {
      "epoch": 0.03448674678526743,
      "grad_norm": 3.453125,
      "learning_rate": 5.747146578815173e-06,
      "loss": 1.1226,
      "step": 9840
    },
    {
      "epoch": 0.034521794292163024,
      "grad_norm": 3.046875,
      "learning_rate": 5.752987768548698e-06,
      "loss": 1.0405,
      "step": 9850
    },
    {
      "epoch": 0.03455684179905862,
      "grad_norm": 4.03125,
      "learning_rate": 5.758828958282223e-06,
      "loss": 1.1329,
      "step": 9860
    },
    {
      "epoch": 0.03459188930595422,
      "grad_norm": 3.53125,
      "learning_rate": 5.764670148015748e-06,
      "loss": 1.0441,
      "step": 9870
    },
    {
      "epoch": 0.03462693681284982,
      "grad_norm": 4.09375,
      "learning_rate": 5.770511337749273e-06,
      "loss": 1.1868,
      "step": 9880
    },
    {
      "epoch": 0.03466198431974542,
      "grad_norm": 3.265625,
      "learning_rate": 5.7763525274827975e-06,
      "loss": 1.1004,
      "step": 9890
    },
    {
      "epoch": 0.03469703182664101,
      "grad_norm": 3.65625,
      "learning_rate": 5.782193717216323e-06,
      "loss": 1.2033,
      "step": 9900
    },
    {
      "epoch": 0.03473207933353661,
      "grad_norm": 3.71875,
      "learning_rate": 5.788034906949848e-06,
      "loss": 1.0413,
      "step": 9910
    },
    {
      "epoch": 0.034767126840432205,
      "grad_norm": 3.546875,
      "learning_rate": 5.793876096683373e-06,
      "loss": 1.101,
      "step": 9920
    },
    {
      "epoch": 0.034802174347327804,
      "grad_norm": 3.40625,
      "learning_rate": 5.799717286416898e-06,
      "loss": 1.1309,
      "step": 9930
    },
    {
      "epoch": 0.0348372218542234,
      "grad_norm": 3.328125,
      "learning_rate": 5.8055584761504226e-06,
      "loss": 1.1448,
      "step": 9940
    },
    {
      "epoch": 0.034872269361119,
      "grad_norm": 3.703125,
      "learning_rate": 5.811399665883947e-06,
      "loss": 1.1755,
      "step": 9950
    },
    {
      "epoch": 0.03490731686801459,
      "grad_norm": 3.625,
      "learning_rate": 5.817240855617472e-06,
      "loss": 1.1356,
      "step": 9960
    },
    {
      "epoch": 0.03494236437491019,
      "grad_norm": 4.0,
      "learning_rate": 5.823082045350997e-06,
      "loss": 1.1528,
      "step": 9970
    },
    {
      "epoch": 0.03497741188180579,
      "grad_norm": 3.40625,
      "learning_rate": 5.828923235084522e-06,
      "loss": 1.1688,
      "step": 9980
    },
    {
      "epoch": 0.035012459388701386,
      "grad_norm": 3.375,
      "learning_rate": 5.834764424818047e-06,
      "loss": 1.149,
      "step": 9990
    },
    {
      "epoch": 0.035047506895596985,
      "grad_norm": 3.765625,
      "learning_rate": 5.840605614551572e-06,
      "loss": 1.0137,
      "step": 10000
    },
    {
      "epoch": 0.035047506895596985,
      "eval_loss": 1.057034969329834,
      "eval_runtime": 563.5543,
      "eval_samples_per_second": 675.065,
      "eval_steps_per_second": 56.255,
      "step": 10000
    },
    {
      "epoch": 0.035082554402492576,
      "grad_norm": 3.578125,
      "learning_rate": 5.8464468042850965e-06,
      "loss": 1.1128,
      "step": 10010
    },
    {
      "epoch": 0.035117601909388174,
      "grad_norm": 3.390625,
      "learning_rate": 5.852287994018622e-06,
      "loss": 1.0475,
      "step": 10020
    },
    {
      "epoch": 0.03515264941628377,
      "grad_norm": 3.6875,
      "learning_rate": 5.858129183752147e-06,
      "loss": 1.0807,
      "step": 10030
    },
    {
      "epoch": 0.03518769692317937,
      "grad_norm": 3.703125,
      "learning_rate": 5.863970373485672e-06,
      "loss": 1.1529,
      "step": 10040
    },
    {
      "epoch": 0.03522274443007497,
      "grad_norm": 3.734375,
      "learning_rate": 5.869811563219197e-06,
      "loss": 1.1631,
      "step": 10050
    },
    {
      "epoch": 0.03525779193697056,
      "grad_norm": 4.3125,
      "learning_rate": 5.8756527529527215e-06,
      "loss": 1.2045,
      "step": 10060
    },
    {
      "epoch": 0.03529283944386616,
      "grad_norm": 3.171875,
      "learning_rate": 5.881493942686247e-06,
      "loss": 1.136,
      "step": 10070
    },
    {
      "epoch": 0.03532788695076176,
      "grad_norm": 3.515625,
      "learning_rate": 5.887335132419772e-06,
      "loss": 1.0269,
      "step": 10080
    },
    {
      "epoch": 0.035362934457657355,
      "grad_norm": 3.8125,
      "learning_rate": 5.893176322153297e-06,
      "loss": 1.0481,
      "step": 10090
    },
    {
      "epoch": 0.03539798196455295,
      "grad_norm": 3.546875,
      "learning_rate": 5.899017511886822e-06,
      "loss": 1.066,
      "step": 10100
    },
    {
      "epoch": 0.03543302947144855,
      "grad_norm": 3.71875,
      "learning_rate": 5.904858701620347e-06,
      "loss": 1.2217,
      "step": 10110
    },
    {
      "epoch": 0.03546807697834414,
      "grad_norm": 3.703125,
      "learning_rate": 5.9106998913538714e-06,
      "loss": 1.1839,
      "step": 10120
    },
    {
      "epoch": 0.03550312448523974,
      "grad_norm": 3.609375,
      "learning_rate": 5.916541081087396e-06,
      "loss": 1.1237,
      "step": 10130
    },
    {
      "epoch": 0.03553817199213534,
      "grad_norm": 3.359375,
      "learning_rate": 5.922382270820921e-06,
      "loss": 1.1788,
      "step": 10140
    },
    {
      "epoch": 0.03557321949903094,
      "grad_norm": 3.15625,
      "learning_rate": 5.928223460554446e-06,
      "loss": 1.093,
      "step": 10150
    },
    {
      "epoch": 0.035608267005926536,
      "grad_norm": 3.171875,
      "learning_rate": 5.934064650287971e-06,
      "loss": 1.1224,
      "step": 10160
    },
    {
      "epoch": 0.03564331451282213,
      "grad_norm": 3.5,
      "learning_rate": 5.939905840021496e-06,
      "loss": 1.0651,
      "step": 10170
    },
    {
      "epoch": 0.035678362019717726,
      "grad_norm": 3.5625,
      "learning_rate": 5.9457470297550205e-06,
      "loss": 1.1401,
      "step": 10180
    },
    {
      "epoch": 0.035713409526613324,
      "grad_norm": 3.859375,
      "learning_rate": 5.951588219488545e-06,
      "loss": 1.1919,
      "step": 10190
    },
    {
      "epoch": 0.03574845703350892,
      "grad_norm": 3.296875,
      "learning_rate": 5.95742940922207e-06,
      "loss": 1.1371,
      "step": 10200
    },
    {
      "epoch": 0.03578350454040452,
      "grad_norm": 3.296875,
      "learning_rate": 5.963270598955596e-06,
      "loss": 1.1453,
      "step": 10210
    },
    {
      "epoch": 0.03581855204730012,
      "grad_norm": 4.0,
      "learning_rate": 5.969111788689121e-06,
      "loss": 1.1938,
      "step": 10220
    },
    {
      "epoch": 0.03585359955419571,
      "grad_norm": 3.375,
      "learning_rate": 5.974952978422646e-06,
      "loss": 1.0891,
      "step": 10230
    },
    {
      "epoch": 0.03588864706109131,
      "grad_norm": 3.5,
      "learning_rate": 5.9807941681561704e-06,
      "loss": 1.1251,
      "step": 10240
    },
    {
      "epoch": 0.03592369456798691,
      "grad_norm": 3.5625,
      "learning_rate": 5.986635357889695e-06,
      "loss": 1.0911,
      "step": 10250
    },
    {
      "epoch": 0.035958742074882505,
      "grad_norm": 3.84375,
      "learning_rate": 5.99247654762322e-06,
      "loss": 1.1508,
      "step": 10260
    },
    {
      "epoch": 0.0359937895817781,
      "grad_norm": 3.5,
      "learning_rate": 5.998317737356745e-06,
      "loss": 1.0437,
      "step": 10270
    },
    {
      "epoch": 0.036028837088673694,
      "grad_norm": 3.796875,
      "learning_rate": 6.00415892709027e-06,
      "loss": 1.1869,
      "step": 10280
    },
    {
      "epoch": 0.03606388459556929,
      "grad_norm": 3.09375,
      "learning_rate": 6.010000116823795e-06,
      "loss": 1.0959,
      "step": 10290
    },
    {
      "epoch": 0.03609893210246489,
      "grad_norm": 3.015625,
      "learning_rate": 6.0158413065573195e-06,
      "loss": 1.0734,
      "step": 10300
    },
    {
      "epoch": 0.03613397960936049,
      "grad_norm": 3.171875,
      "learning_rate": 6.021682496290844e-06,
      "loss": 1.117,
      "step": 10310
    },
    {
      "epoch": 0.03616902711625609,
      "grad_norm": 4.59375,
      "learning_rate": 6.027523686024369e-06,
      "loss": 1.1044,
      "step": 10320
    },
    {
      "epoch": 0.03620407462315168,
      "grad_norm": 3.828125,
      "learning_rate": 6.033364875757894e-06,
      "loss": 1.0694,
      "step": 10330
    },
    {
      "epoch": 0.03623912213004728,
      "grad_norm": 3.625,
      "learning_rate": 6.039206065491419e-06,
      "loss": 1.0635,
      "step": 10340
    },
    {
      "epoch": 0.036274169636942875,
      "grad_norm": 3.5,
      "learning_rate": 6.0450472552249446e-06,
      "loss": 1.091,
      "step": 10350
    },
    {
      "epoch": 0.036309217143838474,
      "grad_norm": 3.625,
      "learning_rate": 6.050888444958469e-06,
      "loss": 1.1377,
      "step": 10360
    },
    {
      "epoch": 0.03634426465073407,
      "grad_norm": 3.609375,
      "learning_rate": 6.056729634691994e-06,
      "loss": 1.1206,
      "step": 10370
    },
    {
      "epoch": 0.03637931215762967,
      "grad_norm": 3.5,
      "learning_rate": 6.06257082442552e-06,
      "loss": 1.0872,
      "step": 10380
    },
    {
      "epoch": 0.03641435966452526,
      "grad_norm": 3.65625,
      "learning_rate": 6.068412014159045e-06,
      "loss": 1.1346,
      "step": 10390
    },
    {
      "epoch": 0.03644940717142086,
      "grad_norm": 3.59375,
      "learning_rate": 6.07425320389257e-06,
      "loss": 1.165,
      "step": 10400
    },
    {
      "epoch": 0.03648445467831646,
      "grad_norm": 3.421875,
      "learning_rate": 6.0800943936260945e-06,
      "loss": 1.1379,
      "step": 10410
    },
    {
      "epoch": 0.036519502185212056,
      "grad_norm": 3.796875,
      "learning_rate": 6.085935583359619e-06,
      "loss": 1.0894,
      "step": 10420
    },
    {
      "epoch": 0.036554549692107655,
      "grad_norm": 3.546875,
      "learning_rate": 6.091776773093144e-06,
      "loss": 1.1161,
      "step": 10430
    },
    {
      "epoch": 0.036589597199003246,
      "grad_norm": 3.734375,
      "learning_rate": 6.097617962826669e-06,
      "loss": 1.1209,
      "step": 10440
    },
    {
      "epoch": 0.036624644705898844,
      "grad_norm": 3.484375,
      "learning_rate": 6.103459152560194e-06,
      "loss": 1.0726,
      "step": 10450
    },
    {
      "epoch": 0.03665969221279444,
      "grad_norm": 3.484375,
      "learning_rate": 6.109300342293719e-06,
      "loss": 1.1277,
      "step": 10460
    },
    {
      "epoch": 0.03669473971969004,
      "grad_norm": 3.234375,
      "learning_rate": 6.1151415320272435e-06,
      "loss": 1.1036,
      "step": 10470
    },
    {
      "epoch": 0.03672978722658564,
      "grad_norm": 3.734375,
      "learning_rate": 6.120982721760768e-06,
      "loss": 1.151,
      "step": 10480
    },
    {
      "epoch": 0.03676483473348124,
      "grad_norm": 3.640625,
      "learning_rate": 6.126823911494293e-06,
      "loss": 1.1294,
      "step": 10490
    },
    {
      "epoch": 0.03679988224037683,
      "grad_norm": 3.265625,
      "learning_rate": 6.132665101227818e-06,
      "loss": 1.0735,
      "step": 10500
    },
    {
      "epoch": 0.03683492974727243,
      "grad_norm": 3.546875,
      "learning_rate": 6.138506290961343e-06,
      "loss": 1.1301,
      "step": 10510
    },
    {
      "epoch": 0.036869977254168025,
      "grad_norm": 3.421875,
      "learning_rate": 6.144347480694869e-06,
      "loss": 1.0698,
      "step": 10520
    },
    {
      "epoch": 0.03690502476106362,
      "grad_norm": 3.609375,
      "learning_rate": 6.1501886704283935e-06,
      "loss": 1.113,
      "step": 10530
    },
    {
      "epoch": 0.03694007226795922,
      "grad_norm": 4.75,
      "learning_rate": 6.156029860161918e-06,
      "loss": 1.0113,
      "step": 10540
    },
    {
      "epoch": 0.03697511977485481,
      "grad_norm": 3.59375,
      "learning_rate": 6.161871049895443e-06,
      "loss": 1.1709,
      "step": 10550
    },
    {
      "epoch": 0.03701016728175041,
      "grad_norm": 3.53125,
      "learning_rate": 6.167712239628968e-06,
      "loss": 1.1338,
      "step": 10560
    },
    {
      "epoch": 0.03704521478864601,
      "grad_norm": 3.25,
      "learning_rate": 6.173553429362493e-06,
      "loss": 1.1383,
      "step": 10570
    },
    {
      "epoch": 0.03708026229554161,
      "grad_norm": 3.640625,
      "learning_rate": 6.179394619096018e-06,
      "loss": 1.0781,
      "step": 10580
    },
    {
      "epoch": 0.037115309802437206,
      "grad_norm": 3.0,
      "learning_rate": 6.1852358088295425e-06,
      "loss": 1.133,
      "step": 10590
    },
    {
      "epoch": 0.0371503573093328,
      "grad_norm": 3.59375,
      "learning_rate": 6.191076998563067e-06,
      "loss": 1.15,
      "step": 10600
    },
    {
      "epoch": 0.037185404816228396,
      "grad_norm": 3.015625,
      "learning_rate": 6.196918188296592e-06,
      "loss": 1.0747,
      "step": 10610
    },
    {
      "epoch": 0.037220452323123994,
      "grad_norm": 3.4375,
      "learning_rate": 6.202759378030117e-06,
      "loss": 1.0918,
      "step": 10620
    },
    {
      "epoch": 0.03725549983001959,
      "grad_norm": 3.796875,
      "learning_rate": 6.208600567763642e-06,
      "loss": 1.1743,
      "step": 10630
    },
    {
      "epoch": 0.03729054733691519,
      "grad_norm": 3.78125,
      "learning_rate": 6.214441757497167e-06,
      "loss": 1.158,
      "step": 10640
    },
    {
      "epoch": 0.03732559484381079,
      "grad_norm": 3.78125,
      "learning_rate": 6.220282947230692e-06,
      "loss": 1.1178,
      "step": 10650
    },
    {
      "epoch": 0.03736064235070638,
      "grad_norm": 3.53125,
      "learning_rate": 6.226124136964217e-06,
      "loss": 1.2014,
      "step": 10660
    },
    {
      "epoch": 0.03739568985760198,
      "grad_norm": 3.65625,
      "learning_rate": 6.231965326697742e-06,
      "loss": 1.1322,
      "step": 10670
    },
    {
      "epoch": 0.03743073736449758,
      "grad_norm": 3.703125,
      "learning_rate": 6.237806516431267e-06,
      "loss": 1.0407,
      "step": 10680
    },
    {
      "epoch": 0.037465784871393175,
      "grad_norm": 3.734375,
      "learning_rate": 6.243647706164792e-06,
      "loss": 1.1105,
      "step": 10690
    },
    {
      "epoch": 0.03750083237828877,
      "grad_norm": 3.703125,
      "learning_rate": 6.2494888958983175e-06,
      "loss": 1.1459,
      "step": 10700
    },
    {
      "epoch": 0.037535879885184364,
      "grad_norm": 3.296875,
      "learning_rate": 6.2553300856318415e-06,
      "loss": 1.1306,
      "step": 10710
    },
    {
      "epoch": 0.03757092739207996,
      "grad_norm": 3.65625,
      "learning_rate": 6.261171275365367e-06,
      "loss": 1.2035,
      "step": 10720
    },
    {
      "epoch": 0.03760597489897556,
      "grad_norm": 3.984375,
      "learning_rate": 6.267012465098891e-06,
      "loss": 1.0615,
      "step": 10730
    },
    {
      "epoch": 0.03764102240587116,
      "grad_norm": 3.421875,
      "learning_rate": 6.272853654832417e-06,
      "loss": 1.0948,
      "step": 10740
    },
    {
      "epoch": 0.03767606991276676,
      "grad_norm": 3.296875,
      "learning_rate": 6.278694844565941e-06,
      "loss": 1.1395,
      "step": 10750
    },
    {
      "epoch": 0.03771111741966235,
      "grad_norm": 3.578125,
      "learning_rate": 6.284536034299467e-06,
      "loss": 1.0817,
      "step": 10760
    },
    {
      "epoch": 0.03774616492655795,
      "grad_norm": 3.375,
      "learning_rate": 6.2903772240329906e-06,
      "loss": 1.0529,
      "step": 10770
    },
    {
      "epoch": 0.037781212433453545,
      "grad_norm": 3.484375,
      "learning_rate": 6.296218413766516e-06,
      "loss": 1.0674,
      "step": 10780
    },
    {
      "epoch": 0.037816259940349144,
      "grad_norm": 3.484375,
      "learning_rate": 6.30205960350004e-06,
      "loss": 1.1048,
      "step": 10790
    },
    {
      "epoch": 0.03785130744724474,
      "grad_norm": 3.515625,
      "learning_rate": 6.307900793233566e-06,
      "loss": 1.152,
      "step": 10800
    },
    {
      "epoch": 0.03788635495414034,
      "grad_norm": 4.09375,
      "learning_rate": 6.313741982967091e-06,
      "loss": 1.1024,
      "step": 10810
    },
    {
      "epoch": 0.03792140246103593,
      "grad_norm": 3.46875,
      "learning_rate": 6.319583172700616e-06,
      "loss": 1.18,
      "step": 10820
    },
    {
      "epoch": 0.03795644996793153,
      "grad_norm": 3.375,
      "learning_rate": 6.3254243624341405e-06,
      "loss": 1.1678,
      "step": 10830
    },
    {
      "epoch": 0.03799149747482713,
      "grad_norm": 3.375,
      "learning_rate": 6.331265552167666e-06,
      "loss": 1.0432,
      "step": 10840
    },
    {
      "epoch": 0.038026544981722726,
      "grad_norm": 3.328125,
      "learning_rate": 6.33710674190119e-06,
      "loss": 1.1757,
      "step": 10850
    },
    {
      "epoch": 0.038061592488618325,
      "grad_norm": 3.796875,
      "learning_rate": 6.342947931634716e-06,
      "loss": 1.0545,
      "step": 10860
    },
    {
      "epoch": 0.038096639995513916,
      "grad_norm": 3.859375,
      "learning_rate": 6.3487891213682416e-06,
      "loss": 1.1565,
      "step": 10870
    },
    {
      "epoch": 0.038131687502409514,
      "grad_norm": 3.71875,
      "learning_rate": 6.3546303111017656e-06,
      "loss": 1.1684,
      "step": 10880
    },
    {
      "epoch": 0.03816673500930511,
      "grad_norm": 3.640625,
      "learning_rate": 6.360471500835291e-06,
      "loss": 1.1615,
      "step": 10890
    },
    {
      "epoch": 0.03820178251620071,
      "grad_norm": 3.3125,
      "learning_rate": 6.366312690568815e-06,
      "loss": 1.0126,
      "step": 10900
    },
    {
      "epoch": 0.03823683002309631,
      "grad_norm": 3.46875,
      "learning_rate": 6.372153880302341e-06,
      "loss": 1.1274,
      "step": 10910
    },
    {
      "epoch": 0.03827187752999191,
      "grad_norm": 3.484375,
      "learning_rate": 6.377995070035865e-06,
      "loss": 1.2206,
      "step": 10920
    },
    {
      "epoch": 0.0383069250368875,
      "grad_norm": 3.4375,
      "learning_rate": 6.383836259769391e-06,
      "loss": 1.0946,
      "step": 10930
    },
    {
      "epoch": 0.0383419725437831,
      "grad_norm": 3.75,
      "learning_rate": 6.389677449502915e-06,
      "loss": 1.1416,
      "step": 10940
    },
    {
      "epoch": 0.038377020050678695,
      "grad_norm": 3.875,
      "learning_rate": 6.39551863923644e-06,
      "loss": 1.1387,
      "step": 10950
    },
    {
      "epoch": 0.03841206755757429,
      "grad_norm": 3.375,
      "learning_rate": 6.401359828969964e-06,
      "loss": 1.0482,
      "step": 10960
    },
    {
      "epoch": 0.03844711506446989,
      "grad_norm": 3.640625,
      "learning_rate": 6.40720101870349e-06,
      "loss": 1.05,
      "step": 10970
    },
    {
      "epoch": 0.03848216257136548,
      "grad_norm": 3.921875,
      "learning_rate": 6.413042208437015e-06,
      "loss": 1.1931,
      "step": 10980
    },
    {
      "epoch": 0.03851721007826108,
      "grad_norm": 3.78125,
      "learning_rate": 6.4188833981705405e-06,
      "loss": 1.17,
      "step": 10990
    },
    {
      "epoch": 0.03855225758515668,
      "grad_norm": 3.671875,
      "learning_rate": 6.4247245879040645e-06,
      "loss": 1.1194,
      "step": 11000
    },
    {
      "epoch": 0.03858730509205228,
      "grad_norm": 3.296875,
      "learning_rate": 6.43056577763759e-06,
      "loss": 1.1244,
      "step": 11010
    },
    {
      "epoch": 0.038622352598947876,
      "grad_norm": 3.3125,
      "learning_rate": 6.436406967371114e-06,
      "loss": 1.0865,
      "step": 11020
    },
    {
      "epoch": 0.03865740010584347,
      "grad_norm": 3.53125,
      "learning_rate": 6.44224815710464e-06,
      "loss": 1.1451,
      "step": 11030
    },
    {
      "epoch": 0.038692447612739066,
      "grad_norm": 3.546875,
      "learning_rate": 6.448089346838164e-06,
      "loss": 1.1656,
      "step": 11040
    },
    {
      "epoch": 0.038727495119634664,
      "grad_norm": 3.421875,
      "learning_rate": 6.45393053657169e-06,
      "loss": 1.1501,
      "step": 11050
    },
    {
      "epoch": 0.03876254262653026,
      "grad_norm": 3.578125,
      "learning_rate": 6.459771726305214e-06,
      "loss": 1.1128,
      "step": 11060
    },
    {
      "epoch": 0.03879759013342586,
      "grad_norm": 3.796875,
      "learning_rate": 6.465612916038739e-06,
      "loss": 1.1053,
      "step": 11070
    },
    {
      "epoch": 0.03883263764032146,
      "grad_norm": 3.5,
      "learning_rate": 6.471454105772263e-06,
      "loss": 1.0509,
      "step": 11080
    },
    {
      "epoch": 0.03886768514721705,
      "grad_norm": 3.953125,
      "learning_rate": 6.477295295505789e-06,
      "loss": 1.1391,
      "step": 11090
    },
    {
      "epoch": 0.03890273265411265,
      "grad_norm": 3.453125,
      "learning_rate": 6.483136485239313e-06,
      "loss": 1.0638,
      "step": 11100
    },
    {
      "epoch": 0.03893778016100825,
      "grad_norm": 3.46875,
      "learning_rate": 6.488977674972839e-06,
      "loss": 1.1602,
      "step": 11110
    },
    {
      "epoch": 0.038972827667903845,
      "grad_norm": 3.9375,
      "learning_rate": 6.4948188647063635e-06,
      "loss": 1.1247,
      "step": 11120
    },
    {
      "epoch": 0.03900787517479944,
      "grad_norm": 3.484375,
      "learning_rate": 6.500660054439889e-06,
      "loss": 1.0715,
      "step": 11130
    },
    {
      "epoch": 0.039042922681695034,
      "grad_norm": 3.4375,
      "learning_rate": 6.506501244173413e-06,
      "loss": 1.0557,
      "step": 11140
    },
    {
      "epoch": 0.03907797018859063,
      "grad_norm": 3.828125,
      "learning_rate": 6.512342433906939e-06,
      "loss": 1.1238,
      "step": 11150
    },
    {
      "epoch": 0.03911301769548623,
      "grad_norm": 3.5625,
      "learning_rate": 6.518183623640463e-06,
      "loss": 1.0569,
      "step": 11160
    },
    {
      "epoch": 0.03914806520238183,
      "grad_norm": 3.578125,
      "learning_rate": 6.524024813373989e-06,
      "loss": 1.1544,
      "step": 11170
    },
    {
      "epoch": 0.03918311270927743,
      "grad_norm": 3.546875,
      "learning_rate": 6.529866003107513e-06,
      "loss": 1.09,
      "step": 11180
    },
    {
      "epoch": 0.039218160216173026,
      "grad_norm": 3.75,
      "learning_rate": 6.535707192841038e-06,
      "loss": 1.2055,
      "step": 11190
    },
    {
      "epoch": 0.03925320772306862,
      "grad_norm": 3.59375,
      "learning_rate": 6.541548382574562e-06,
      "loss": 1.1033,
      "step": 11200
    },
    {
      "epoch": 0.039288255229964215,
      "grad_norm": 3.703125,
      "learning_rate": 6.547389572308088e-06,
      "loss": 1.1487,
      "step": 11210
    },
    {
      "epoch": 0.039323302736859814,
      "grad_norm": 3.46875,
      "learning_rate": 6.553230762041614e-06,
      "loss": 1.1561,
      "step": 11220
    },
    {
      "epoch": 0.03935835024375541,
      "grad_norm": 3.125,
      "learning_rate": 6.559071951775138e-06,
      "loss": 1.0682,
      "step": 11230
    },
    {
      "epoch": 0.03939339775065101,
      "grad_norm": 3.546875,
      "learning_rate": 6.564913141508663e-06,
      "loss": 1.089,
      "step": 11240
    },
    {
      "epoch": 0.0394284452575466,
      "grad_norm": 3.984375,
      "learning_rate": 6.570754331242187e-06,
      "loss": 1.1998,
      "step": 11250
    },
    {
      "epoch": 0.0394634927644422,
      "grad_norm": 3.28125,
      "learning_rate": 6.576595520975713e-06,
      "loss": 1.1749,
      "step": 11260
    },
    {
      "epoch": 0.0394985402713378,
      "grad_norm": 3.3125,
      "learning_rate": 6.582436710709237e-06,
      "loss": 1.0635,
      "step": 11270
    },
    {
      "epoch": 0.039533587778233396,
      "grad_norm": 3.234375,
      "learning_rate": 6.588277900442763e-06,
      "loss": 1.1926,
      "step": 11280
    },
    {
      "epoch": 0.039568635285128995,
      "grad_norm": 3.640625,
      "learning_rate": 6.5941190901762876e-06,
      "loss": 1.1217,
      "step": 11290
    },
    {
      "epoch": 0.039603682792024586,
      "grad_norm": 4.4375,
      "learning_rate": 6.599960279909813e-06,
      "loss": 1.0477,
      "step": 11300
    },
    {
      "epoch": 0.039638730298920184,
      "grad_norm": 3.5625,
      "learning_rate": 6.605801469643337e-06,
      "loss": 1.1203,
      "step": 11310
    },
    {
      "epoch": 0.03967377780581578,
      "grad_norm": 3.5625,
      "learning_rate": 6.611642659376863e-06,
      "loss": 1.1395,
      "step": 11320
    },
    {
      "epoch": 0.03970882531271138,
      "grad_norm": 3.6875,
      "learning_rate": 6.617483849110387e-06,
      "loss": 1.1264,
      "step": 11330
    },
    {
      "epoch": 0.03974387281960698,
      "grad_norm": 3.296875,
      "learning_rate": 6.623325038843913e-06,
      "loss": 1.0538,
      "step": 11340
    },
    {
      "epoch": 0.03977892032650258,
      "grad_norm": 3.796875,
      "learning_rate": 6.629166228577437e-06,
      "loss": 1.0996,
      "step": 11350
    },
    {
      "epoch": 0.03981396783339817,
      "grad_norm": 4.125,
      "learning_rate": 6.635007418310962e-06,
      "loss": 1.0978,
      "step": 11360
    },
    {
      "epoch": 0.03984901534029377,
      "grad_norm": 3.46875,
      "learning_rate": 6.640848608044486e-06,
      "loss": 1.0613,
      "step": 11370
    },
    {
      "epoch": 0.039884062847189365,
      "grad_norm": 3.546875,
      "learning_rate": 6.646689797778012e-06,
      "loss": 1.1135,
      "step": 11380
    },
    {
      "epoch": 0.03991911035408496,
      "grad_norm": 3.4375,
      "learning_rate": 6.652530987511536e-06,
      "loss": 1.1487,
      "step": 11390
    },
    {
      "epoch": 0.03995415786098056,
      "grad_norm": 4.0,
      "learning_rate": 6.658372177245062e-06,
      "loss": 1.1436,
      "step": 11400
    },
    {
      "epoch": 0.03998920536787615,
      "grad_norm": 3.703125,
      "learning_rate": 6.664213366978586e-06,
      "loss": 1.167,
      "step": 11410
    },
    {
      "epoch": 0.04002425287477175,
      "grad_norm": 3.75,
      "learning_rate": 6.670054556712111e-06,
      "loss": 1.0639,
      "step": 11420
    },
    {
      "epoch": 0.04005930038166735,
      "grad_norm": 3.59375,
      "learning_rate": 6.675895746445636e-06,
      "loss": 1.1012,
      "step": 11430
    },
    {
      "epoch": 0.04009434788856295,
      "grad_norm": 3.484375,
      "learning_rate": 6.681736936179162e-06,
      "loss": 1.0554,
      "step": 11440
    },
    {
      "epoch": 0.040129395395458546,
      "grad_norm": 3.359375,
      "learning_rate": 6.687578125912686e-06,
      "loss": 1.1294,
      "step": 11450
    },
    {
      "epoch": 0.040164442902354144,
      "grad_norm": 3.25,
      "learning_rate": 6.693419315646212e-06,
      "loss": 1.0756,
      "step": 11460
    },
    {
      "epoch": 0.040199490409249736,
      "grad_norm": 3.5625,
      "learning_rate": 6.699260505379736e-06,
      "loss": 1.0966,
      "step": 11470
    },
    {
      "epoch": 0.040234537916145334,
      "grad_norm": 3.515625,
      "learning_rate": 6.705101695113261e-06,
      "loss": 1.0804,
      "step": 11480
    },
    {
      "epoch": 0.04026958542304093,
      "grad_norm": 4.0625,
      "learning_rate": 6.710942884846785e-06,
      "loss": 1.1292,
      "step": 11490
    },
    {
      "epoch": 0.04030463292993653,
      "grad_norm": 3.4375,
      "learning_rate": 6.716784074580311e-06,
      "loss": 1.0916,
      "step": 11500
    },
    {
      "epoch": 0.04033968043683213,
      "grad_norm": 3.3125,
      "learning_rate": 6.722625264313835e-06,
      "loss": 1.1323,
      "step": 11510
    },
    {
      "epoch": 0.04037472794372772,
      "grad_norm": 3.265625,
      "learning_rate": 6.728466454047361e-06,
      "loss": 1.1281,
      "step": 11520
    },
    {
      "epoch": 0.04040977545062332,
      "grad_norm": 3.546875,
      "learning_rate": 6.734307643780885e-06,
      "loss": 1.1583,
      "step": 11530
    },
    {
      "epoch": 0.04044482295751892,
      "grad_norm": 3.25,
      "learning_rate": 6.74014883351441e-06,
      "loss": 1.102,
      "step": 11540
    },
    {
      "epoch": 0.040479870464414515,
      "grad_norm": 3.3125,
      "learning_rate": 6.745990023247934e-06,
      "loss": 1.0696,
      "step": 11550
    },
    {
      "epoch": 0.04051491797131011,
      "grad_norm": 3.4375,
      "learning_rate": 6.75183121298146e-06,
      "loss": 1.0875,
      "step": 11560
    },
    {
      "epoch": 0.040549965478205705,
      "grad_norm": 3.40625,
      "learning_rate": 6.757672402714986e-06,
      "loss": 1.1013,
      "step": 11570
    },
    {
      "epoch": 0.0405850129851013,
      "grad_norm": 3.953125,
      "learning_rate": 6.763513592448511e-06,
      "loss": 1.0801,
      "step": 11580
    },
    {
      "epoch": 0.0406200604919969,
      "grad_norm": 3.375,
      "learning_rate": 6.7693547821820354e-06,
      "loss": 1.0873,
      "step": 11590
    },
    {
      "epoch": 0.0406551079988925,
      "grad_norm": 3.59375,
      "learning_rate": 6.77519597191556e-06,
      "loss": 1.1772,
      "step": 11600
    },
    {
      "epoch": 0.0406901555057881,
      "grad_norm": 3.53125,
      "learning_rate": 6.781037161649086e-06,
      "loss": 1.1717,
      "step": 11610
    },
    {
      "epoch": 0.040725203012683696,
      "grad_norm": 3.59375,
      "learning_rate": 6.78687835138261e-06,
      "loss": 1.205,
      "step": 11620
    },
    {
      "epoch": 0.04076025051957929,
      "grad_norm": 18.625,
      "learning_rate": 6.792719541116136e-06,
      "loss": 1.0641,
      "step": 11630
    },
    {
      "epoch": 0.040795298026474885,
      "grad_norm": 3.21875,
      "learning_rate": 6.79856073084966e-06,
      "loss": 1.0731,
      "step": 11640
    },
    {
      "epoch": 0.040830345533370484,
      "grad_norm": 3.515625,
      "learning_rate": 6.804401920583185e-06,
      "loss": 1.1465,
      "step": 11650
    },
    {
      "epoch": 0.04086539304026608,
      "grad_norm": 3.875,
      "learning_rate": 6.810243110316709e-06,
      "loss": 1.0566,
      "step": 11660
    },
    {
      "epoch": 0.04090044054716168,
      "grad_norm": 3.6875,
      "learning_rate": 6.816084300050235e-06,
      "loss": 1.1258,
      "step": 11670
    },
    {
      "epoch": 0.04093548805405727,
      "grad_norm": 3.71875,
      "learning_rate": 6.821925489783759e-06,
      "loss": 1.0687,
      "step": 11680
    },
    {
      "epoch": 0.04097053556095287,
      "grad_norm": 3.6875,
      "learning_rate": 6.827766679517285e-06,
      "loss": 1.1021,
      "step": 11690
    },
    {
      "epoch": 0.04100558306784847,
      "grad_norm": 3.546875,
      "learning_rate": 6.833607869250809e-06,
      "loss": 1.0469,
      "step": 11700
    },
    {
      "epoch": 0.041040630574744066,
      "grad_norm": 3.703125,
      "learning_rate": 6.8394490589843344e-06,
      "loss": 1.1766,
      "step": 11710
    },
    {
      "epoch": 0.041075678081639665,
      "grad_norm": 3.46875,
      "learning_rate": 6.8452902487178584e-06,
      "loss": 1.0672,
      "step": 11720
    },
    {
      "epoch": 0.04111072558853526,
      "grad_norm": 3.765625,
      "learning_rate": 6.851131438451384e-06,
      "loss": 1.1327,
      "step": 11730
    },
    {
      "epoch": 0.041145773095430854,
      "grad_norm": 3.359375,
      "learning_rate": 6.856972628184909e-06,
      "loss": 1.1198,
      "step": 11740
    },
    {
      "epoch": 0.04118082060232645,
      "grad_norm": 3.546875,
      "learning_rate": 6.862813817918435e-06,
      "loss": 1.1041,
      "step": 11750
    },
    {
      "epoch": 0.04121586810922205,
      "grad_norm": 3.71875,
      "learning_rate": 6.868655007651959e-06,
      "loss": 1.0639,
      "step": 11760
    },
    {
      "epoch": 0.04125091561611765,
      "grad_norm": 3.734375,
      "learning_rate": 6.874496197385484e-06,
      "loss": 1.1126,
      "step": 11770
    },
    {
      "epoch": 0.04128596312301325,
      "grad_norm": 3.65625,
      "learning_rate": 6.880337387119008e-06,
      "loss": 1.1642,
      "step": 11780
    },
    {
      "epoch": 0.04132101062990884,
      "grad_norm": 3.3125,
      "learning_rate": 6.886178576852534e-06,
      "loss": 1.1974,
      "step": 11790
    },
    {
      "epoch": 0.04135605813680444,
      "grad_norm": 3.890625,
      "learning_rate": 6.892019766586058e-06,
      "loss": 1.1495,
      "step": 11800
    },
    {
      "epoch": 0.041391105643700035,
      "grad_norm": 3.84375,
      "learning_rate": 6.897860956319584e-06,
      "loss": 1.1512,
      "step": 11810
    },
    {
      "epoch": 0.04142615315059563,
      "grad_norm": 3.640625,
      "learning_rate": 6.903702146053108e-06,
      "loss": 1.0891,
      "step": 11820
    },
    {
      "epoch": 0.04146120065749123,
      "grad_norm": 3.78125,
      "learning_rate": 6.909543335786633e-06,
      "loss": 1.1226,
      "step": 11830
    },
    {
      "epoch": 0.04149624816438682,
      "grad_norm": 3.5625,
      "learning_rate": 6.915384525520157e-06,
      "loss": 1.1494,
      "step": 11840
    },
    {
      "epoch": 0.04153129567128242,
      "grad_norm": 3.65625,
      "learning_rate": 6.921225715253683e-06,
      "loss": 1.0761,
      "step": 11850
    },
    {
      "epoch": 0.04156634317817802,
      "grad_norm": 4.40625,
      "learning_rate": 6.927066904987207e-06,
      "loss": 1.0505,
      "step": 11860
    },
    {
      "epoch": 0.04160139068507362,
      "grad_norm": 3.65625,
      "learning_rate": 6.932908094720733e-06,
      "loss": 1.1765,
      "step": 11870
    },
    {
      "epoch": 0.041636438191969216,
      "grad_norm": 3.203125,
      "learning_rate": 6.938749284454258e-06,
      "loss": 1.1054,
      "step": 11880
    },
    {
      "epoch": 0.041671485698864814,
      "grad_norm": 3.84375,
      "learning_rate": 6.944590474187783e-06,
      "loss": 1.1778,
      "step": 11890
    },
    {
      "epoch": 0.041706533205760406,
      "grad_norm": 3.65625,
      "learning_rate": 6.950431663921308e-06,
      "loss": 1.1529,
      "step": 11900
    },
    {
      "epoch": 0.041741580712656004,
      "grad_norm": 3.734375,
      "learning_rate": 6.956272853654833e-06,
      "loss": 1.1778,
      "step": 11910
    },
    {
      "epoch": 0.0417766282195516,
      "grad_norm": 3.34375,
      "learning_rate": 6.962114043388359e-06,
      "loss": 1.0043,
      "step": 11920
    },
    {
      "epoch": 0.0418116757264472,
      "grad_norm": 3.890625,
      "learning_rate": 6.967955233121883e-06,
      "loss": 1.1762,
      "step": 11930
    },
    {
      "epoch": 0.0418467232333428,
      "grad_norm": 3.359375,
      "learning_rate": 6.973796422855408e-06,
      "loss": 1.1033,
      "step": 11940
    },
    {
      "epoch": 0.04188177074023839,
      "grad_norm": 3.296875,
      "learning_rate": 6.979637612588932e-06,
      "loss": 1.1341,
      "step": 11950
    },
    {
      "epoch": 0.04191681824713399,
      "grad_norm": 3.609375,
      "learning_rate": 6.985478802322458e-06,
      "loss": 1.0624,
      "step": 11960
    },
    {
      "epoch": 0.04195186575402959,
      "grad_norm": 3.703125,
      "learning_rate": 6.991319992055982e-06,
      "loss": 1.0952,
      "step": 11970
    },
    {
      "epoch": 0.041986913260925185,
      "grad_norm": 3.40625,
      "learning_rate": 6.997161181789508e-06,
      "loss": 1.126,
      "step": 11980
    },
    {
      "epoch": 0.04202196076782078,
      "grad_norm": 3.859375,
      "learning_rate": 7.003002371523032e-06,
      "loss": 1.153,
      "step": 11990
    },
    {
      "epoch": 0.04205700827471638,
      "grad_norm": 3.609375,
      "learning_rate": 7.0088435612565575e-06,
      "loss": 1.0757,
      "step": 12000
    },
    {
      "epoch": 0.04209205578161197,
      "grad_norm": 3.515625,
      "learning_rate": 7.0146847509900815e-06,
      "loss": 1.0971,
      "step": 12010
    },
    {
      "epoch": 0.04212710328850757,
      "grad_norm": 3.296875,
      "learning_rate": 7.020525940723607e-06,
      "loss": 1.1282,
      "step": 12020
    },
    {
      "epoch": 0.04216215079540317,
      "grad_norm": 3.53125,
      "learning_rate": 7.026367130457132e-06,
      "loss": 1.0993,
      "step": 12030
    },
    {
      "epoch": 0.04219719830229877,
      "grad_norm": 3.28125,
      "learning_rate": 7.032208320190657e-06,
      "loss": 1.0991,
      "step": 12040
    },
    {
      "epoch": 0.042232245809194366,
      "grad_norm": 3.96875,
      "learning_rate": 7.038049509924182e-06,
      "loss": 1.0567,
      "step": 12050
    },
    {
      "epoch": 0.04226729331608996,
      "grad_norm": 3.8125,
      "learning_rate": 7.043890699657707e-06,
      "loss": 1.1404,
      "step": 12060
    },
    {
      "epoch": 0.042302340822985555,
      "grad_norm": 3.578125,
      "learning_rate": 7.049731889391231e-06,
      "loss": 1.1304,
      "step": 12070
    },
    {
      "epoch": 0.042337388329881154,
      "grad_norm": 3.625,
      "learning_rate": 7.055573079124757e-06,
      "loss": 1.073,
      "step": 12080
    },
    {
      "epoch": 0.04237243583677675,
      "grad_norm": 3.4375,
      "learning_rate": 7.061414268858281e-06,
      "loss": 1.0735,
      "step": 12090
    },
    {
      "epoch": 0.04240748334367235,
      "grad_norm": 3.828125,
      "learning_rate": 7.067255458591807e-06,
      "loss": 1.1019,
      "step": 12100
    },
    {
      "epoch": 0.04244253085056794,
      "grad_norm": 4.28125,
      "learning_rate": 7.073096648325331e-06,
      "loss": 1.1076,
      "step": 12110
    },
    {
      "epoch": 0.04247757835746354,
      "grad_norm": 3.703125,
      "learning_rate": 7.0789378380588564e-06,
      "loss": 1.0453,
      "step": 12120
    },
    {
      "epoch": 0.04251262586435914,
      "grad_norm": 3.59375,
      "learning_rate": 7.0847790277923804e-06,
      "loss": 1.1216,
      "step": 12130
    },
    {
      "epoch": 0.042547673371254736,
      "grad_norm": 3.390625,
      "learning_rate": 7.090620217525906e-06,
      "loss": 1.097,
      "step": 12140
    },
    {
      "epoch": 0.042582720878150335,
      "grad_norm": 3.296875,
      "learning_rate": 7.09646140725943e-06,
      "loss": 1.0303,
      "step": 12150
    },
    {
      "epoch": 0.04261776838504593,
      "grad_norm": 3.78125,
      "learning_rate": 7.102302596992956e-06,
      "loss": 1.1287,
      "step": 12160
    },
    {
      "epoch": 0.042652815891941524,
      "grad_norm": 3.171875,
      "learning_rate": 7.10814378672648e-06,
      "loss": 1.113,
      "step": 12170
    },
    {
      "epoch": 0.04268786339883712,
      "grad_norm": 3.734375,
      "learning_rate": 7.1139849764600055e-06,
      "loss": 1.2335,
      "step": 12180
    },
    {
      "epoch": 0.04272291090573272,
      "grad_norm": 3.71875,
      "learning_rate": 7.11982616619353e-06,
      "loss": 1.1362,
      "step": 12190
    },
    {
      "epoch": 0.04275795841262832,
      "grad_norm": 3.46875,
      "learning_rate": 7.125667355927056e-06,
      "loss": 1.1572,
      "step": 12200
    },
    {
      "epoch": 0.04279300591952392,
      "grad_norm": 3.078125,
      "learning_rate": 7.13150854566058e-06,
      "loss": 1.1123,
      "step": 12210
    },
    {
      "epoch": 0.04282805342641951,
      "grad_norm": 3.6875,
      "learning_rate": 7.137349735394106e-06,
      "loss": 1.1332,
      "step": 12220
    },
    {
      "epoch": 0.04286310093331511,
      "grad_norm": 3.484375,
      "learning_rate": 7.14319092512763e-06,
      "loss": 1.1184,
      "step": 12230
    },
    {
      "epoch": 0.042898148440210705,
      "grad_norm": 3.46875,
      "learning_rate": 7.149032114861155e-06,
      "loss": 1.0717,
      "step": 12240
    },
    {
      "epoch": 0.042933195947106303,
      "grad_norm": 3.46875,
      "learning_rate": 7.154873304594681e-06,
      "loss": 1.1466,
      "step": 12250
    },
    {
      "epoch": 0.0429682434540019,
      "grad_norm": 3.375,
      "learning_rate": 7.160714494328205e-06,
      "loss": 1.0349,
      "step": 12260
    },
    {
      "epoch": 0.04300329096089749,
      "grad_norm": 3.234375,
      "learning_rate": 7.166555684061731e-06,
      "loss": 1.1194,
      "step": 12270
    },
    {
      "epoch": 0.04303833846779309,
      "grad_norm": 3.4375,
      "learning_rate": 7.172396873795255e-06,
      "loss": 1.146,
      "step": 12280
    },
    {
      "epoch": 0.04307338597468869,
      "grad_norm": 3.765625,
      "learning_rate": 7.1782380635287805e-06,
      "loss": 1.1667,
      "step": 12290
    },
    {
      "epoch": 0.04310843348158429,
      "grad_norm": 3.390625,
      "learning_rate": 7.1840792532623045e-06,
      "loss": 1.0567,
      "step": 12300
    },
    {
      "epoch": 0.043143480988479886,
      "grad_norm": 3.921875,
      "learning_rate": 7.18992044299583e-06,
      "loss": 1.0284,
      "step": 12310
    },
    {
      "epoch": 0.043178528495375484,
      "grad_norm": 4.0,
      "learning_rate": 7.195761632729354e-06,
      "loss": 1.1674,
      "step": 12320
    },
    {
      "epoch": 0.043213576002271076,
      "grad_norm": 3.0625,
      "learning_rate": 7.20160282246288e-06,
      "loss": 1.0775,
      "step": 12330
    },
    {
      "epoch": 0.043248623509166674,
      "grad_norm": 3.3125,
      "learning_rate": 7.207444012196405e-06,
      "loss": 1.0616,
      "step": 12340
    },
    {
      "epoch": 0.04328367101606227,
      "grad_norm": 3.609375,
      "learning_rate": 7.2132852019299296e-06,
      "loss": 1.0874,
      "step": 12350
    },
    {
      "epoch": 0.04331871852295787,
      "grad_norm": 2.984375,
      "learning_rate": 7.219126391663454e-06,
      "loss": 1.1146,
      "step": 12360
    },
    {
      "epoch": 0.04335376602985347,
      "grad_norm": 3.8125,
      "learning_rate": 7.22496758139698e-06,
      "loss": 1.1229,
      "step": 12370
    },
    {
      "epoch": 0.04338881353674906,
      "grad_norm": 3.5,
      "learning_rate": 7.230808771130504e-06,
      "loss": 1.0767,
      "step": 12380
    },
    {
      "epoch": 0.04342386104364466,
      "grad_norm": 3.546875,
      "learning_rate": 7.23664996086403e-06,
      "loss": 1.1513,
      "step": 12390
    },
    {
      "epoch": 0.04345890855054026,
      "grad_norm": 4.1875,
      "learning_rate": 7.242491150597554e-06,
      "loss": 0.9748,
      "step": 12400
    },
    {
      "epoch": 0.043493956057435855,
      "grad_norm": 3.75,
      "learning_rate": 7.2483323403310795e-06,
      "loss": 1.0954,
      "step": 12410
    },
    {
      "epoch": 0.04352900356433145,
      "grad_norm": 3.640625,
      "learning_rate": 7.2541735300646035e-06,
      "loss": 1.0552,
      "step": 12420
    },
    {
      "epoch": 0.04356405107122705,
      "grad_norm": 3.453125,
      "learning_rate": 7.260014719798129e-06,
      "loss": 1.0456,
      "step": 12430
    },
    {
      "epoch": 0.04359909857812264,
      "grad_norm": 3.203125,
      "learning_rate": 7.265855909531653e-06,
      "loss": 1.066,
      "step": 12440
    },
    {
      "epoch": 0.04363414608501824,
      "grad_norm": 3.75,
      "learning_rate": 7.271697099265179e-06,
      "loss": 1.0254,
      "step": 12450
    },
    {
      "epoch": 0.04366919359191384,
      "grad_norm": 3.71875,
      "learning_rate": 7.277538288998703e-06,
      "loss": 1.0851,
      "step": 12460
    },
    {
      "epoch": 0.04370424109880944,
      "grad_norm": 3.359375,
      "learning_rate": 7.2833794787322285e-06,
      "loss": 1.1992,
      "step": 12470
    },
    {
      "epoch": 0.043739288605705036,
      "grad_norm": 4.0625,
      "learning_rate": 7.289220668465753e-06,
      "loss": 1.0694,
      "step": 12480
    },
    {
      "epoch": 0.04377433611260063,
      "grad_norm": 3.53125,
      "learning_rate": 7.295061858199278e-06,
      "loss": 1.127,
      "step": 12490
    },
    {
      "epoch": 0.043809383619496226,
      "grad_norm": 3.15625,
      "learning_rate": 7.300903047932803e-06,
      "loss": 1.1255,
      "step": 12500
    },
    {
      "epoch": 0.043844431126391824,
      "grad_norm": 3.09375,
      "learning_rate": 7.306744237666329e-06,
      "loss": 1.0051,
      "step": 12510
    },
    {
      "epoch": 0.04387947863328742,
      "grad_norm": 3.6875,
      "learning_rate": 7.312585427399853e-06,
      "loss": 1.118,
      "step": 12520
    },
    {
      "epoch": 0.04391452614018302,
      "grad_norm": 3.375,
      "learning_rate": 7.3184266171333785e-06,
      "loss": 1.0558,
      "step": 12530
    },
    {
      "epoch": 0.04394957364707861,
      "grad_norm": 3.765625,
      "learning_rate": 7.3242678068669024e-06,
      "loss": 1.054,
      "step": 12540
    },
    {
      "epoch": 0.04398462115397421,
      "grad_norm": 3.71875,
      "learning_rate": 7.330108996600428e-06,
      "loss": 1.0847,
      "step": 12550
    },
    {
      "epoch": 0.04401966866086981,
      "grad_norm": 3.8125,
      "learning_rate": 7.335950186333952e-06,
      "loss": 1.0783,
      "step": 12560
    },
    {
      "epoch": 0.044054716167765406,
      "grad_norm": 3.796875,
      "learning_rate": 7.341791376067478e-06,
      "loss": 1.1259,
      "step": 12570
    },
    {
      "epoch": 0.044089763674661005,
      "grad_norm": 3.671875,
      "learning_rate": 7.347632565801002e-06,
      "loss": 1.053,
      "step": 12580
    },
    {
      "epoch": 0.0441248111815566,
      "grad_norm": 3.125,
      "learning_rate": 7.3534737555345275e-06,
      "loss": 1.1125,
      "step": 12590
    },
    {
      "epoch": 0.044159858688452194,
      "grad_norm": 3.640625,
      "learning_rate": 7.359314945268053e-06,
      "loss": 1.1963,
      "step": 12600
    },
    {
      "epoch": 0.04419490619534779,
      "grad_norm": 3.453125,
      "learning_rate": 7.365156135001577e-06,
      "loss": 1.1146,
      "step": 12610
    },
    {
      "epoch": 0.04422995370224339,
      "grad_norm": 3.484375,
      "learning_rate": 7.370997324735103e-06,
      "loss": 1.0866,
      "step": 12620
    },
    {
      "epoch": 0.04426500120913899,
      "grad_norm": 3.265625,
      "learning_rate": 7.376838514468627e-06,
      "loss": 1.0654,
      "step": 12630
    },
    {
      "epoch": 0.04430004871603459,
      "grad_norm": 3.609375,
      "learning_rate": 7.382679704202153e-06,
      "loss": 1.1384,
      "step": 12640
    },
    {
      "epoch": 0.04433509622293018,
      "grad_norm": 3.546875,
      "learning_rate": 7.3885208939356774e-06,
      "loss": 1.1579,
      "step": 12650
    },
    {
      "epoch": 0.04437014372982578,
      "grad_norm": 3.9375,
      "learning_rate": 7.394362083669202e-06,
      "loss": 1.1931,
      "step": 12660
    },
    {
      "epoch": 0.044405191236721375,
      "grad_norm": 3.65625,
      "learning_rate": 7.400203273402727e-06,
      "loss": 1.1133,
      "step": 12670
    },
    {
      "epoch": 0.044440238743616974,
      "grad_norm": 3.625,
      "learning_rate": 7.406044463136253e-06,
      "loss": 1.0746,
      "step": 12680
    },
    {
      "epoch": 0.04447528625051257,
      "grad_norm": 3.546875,
      "learning_rate": 7.411885652869777e-06,
      "loss": 1.0646,
      "step": 12690
    },
    {
      "epoch": 0.04451033375740817,
      "grad_norm": 3.9375,
      "learning_rate": 7.4177268426033025e-06,
      "loss": 1.1509,
      "step": 12700
    },
    {
      "epoch": 0.04454538126430376,
      "grad_norm": 3.3125,
      "learning_rate": 7.4235680323368265e-06,
      "loss": 1.0945,
      "step": 12710
    },
    {
      "epoch": 0.04458042877119936,
      "grad_norm": 3.21875,
      "learning_rate": 7.429409222070352e-06,
      "loss": 1.0471,
      "step": 12720
    },
    {
      "epoch": 0.04461547627809496,
      "grad_norm": 3.828125,
      "learning_rate": 7.435250411803876e-06,
      "loss": 1.1544,
      "step": 12730
    },
    {
      "epoch": 0.044650523784990556,
      "grad_norm": 3.328125,
      "learning_rate": 7.441091601537402e-06,
      "loss": 1.1572,
      "step": 12740
    },
    {
      "epoch": 0.044685571291886154,
      "grad_norm": 3.125,
      "learning_rate": 7.446932791270926e-06,
      "loss": 1.0623,
      "step": 12750
    },
    {
      "epoch": 0.044720618798781746,
      "grad_norm": 4.40625,
      "learning_rate": 7.4527739810044516e-06,
      "loss": 1.015,
      "step": 12760
    },
    {
      "epoch": 0.044755666305677344,
      "grad_norm": 3.421875,
      "learning_rate": 7.4586151707379756e-06,
      "loss": 1.2194,
      "step": 12770
    },
    {
      "epoch": 0.04479071381257294,
      "grad_norm": 3.0,
      "learning_rate": 7.464456360471501e-06,
      "loss": 1.001,
      "step": 12780
    },
    {
      "epoch": 0.04482576131946854,
      "grad_norm": 3.640625,
      "learning_rate": 7.470297550205026e-06,
      "loss": 1.2073,
      "step": 12790
    },
    {
      "epoch": 0.04486080882636414,
      "grad_norm": 3.453125,
      "learning_rate": 7.476138739938551e-06,
      "loss": 1.0702,
      "step": 12800
    },
    {
      "epoch": 0.04489585633325973,
      "grad_norm": 3.65625,
      "learning_rate": 7.481979929672076e-06,
      "loss": 1.0502,
      "step": 12810
    },
    {
      "epoch": 0.04493090384015533,
      "grad_norm": 3.765625,
      "learning_rate": 7.4878211194056015e-06,
      "loss": 1.1105,
      "step": 12820
    },
    {
      "epoch": 0.04496595134705093,
      "grad_norm": 3.765625,
      "learning_rate": 7.4936623091391255e-06,
      "loss": 1.0579,
      "step": 12830
    },
    {
      "epoch": 0.045000998853946525,
      "grad_norm": 3.375,
      "learning_rate": 7.499503498872651e-06,
      "loss": 1.0804,
      "step": 12840
    },
    {
      "epoch": 0.04503604636084212,
      "grad_norm": 3.609375,
      "learning_rate": 7.505344688606175e-06,
      "loss": 1.1216,
      "step": 12850
    },
    {
      "epoch": 0.04507109386773772,
      "grad_norm": 4.0625,
      "learning_rate": 7.511185878339701e-06,
      "loss": 1.1649,
      "step": 12860
    },
    {
      "epoch": 0.04510614137463331,
      "grad_norm": 3.390625,
      "learning_rate": 7.517027068073225e-06,
      "loss": 1.103,
      "step": 12870
    },
    {
      "epoch": 0.04514118888152891,
      "grad_norm": 3.609375,
      "learning_rate": 7.5228682578067505e-06,
      "loss": 1.1205,
      "step": 12880
    },
    {
      "epoch": 0.04517623638842451,
      "grad_norm": 3.5,
      "learning_rate": 7.5287094475402745e-06,
      "loss": 1.0864,
      "step": 12890
    },
    {
      "epoch": 0.04521128389532011,
      "grad_norm": 4.0625,
      "learning_rate": 7.5345506372738e-06,
      "loss": 1.0805,
      "step": 12900
    },
    {
      "epoch": 0.045246331402215706,
      "grad_norm": 3.8125,
      "learning_rate": 7.540391827007324e-06,
      "loss": 1.1525,
      "step": 12910
    },
    {
      "epoch": 0.0452813789091113,
      "grad_norm": 3.203125,
      "learning_rate": 7.54623301674085e-06,
      "loss": 1.0848,
      "step": 12920
    },
    {
      "epoch": 0.045316426416006896,
      "grad_norm": 3.921875,
      "learning_rate": 7.552074206474376e-06,
      "loss": 1.2061,
      "step": 12930
    },
    {
      "epoch": 0.045351473922902494,
      "grad_norm": 3.875,
      "learning_rate": 7.5579153962079e-06,
      "loss": 1.076,
      "step": 12940
    },
    {
      "epoch": 0.04538652142979809,
      "grad_norm": 2.90625,
      "learning_rate": 7.563756585941425e-06,
      "loss": 1.1158,
      "step": 12950
    },
    {
      "epoch": 0.04542156893669369,
      "grad_norm": 3.859375,
      "learning_rate": 7.56959777567495e-06,
      "loss": 1.1316,
      "step": 12960
    },
    {
      "epoch": 0.04545661644358929,
      "grad_norm": 3.71875,
      "learning_rate": 7.575438965408475e-06,
      "loss": 1.0581,
      "step": 12970
    },
    {
      "epoch": 0.04549166395048488,
      "grad_norm": 3.5625,
      "learning_rate": 7.581280155142e-06,
      "loss": 1.0686,
      "step": 12980
    },
    {
      "epoch": 0.04552671145738048,
      "grad_norm": 3.1875,
      "learning_rate": 7.5871213448755255e-06,
      "loss": 1.0526,
      "step": 12990
    },
    {
      "epoch": 0.045561758964276076,
      "grad_norm": 3.09375,
      "learning_rate": 7.5929625346090495e-06,
      "loss": 1.0282,
      "step": 13000
    },
    {
      "epoch": 0.045596806471171675,
      "grad_norm": 3.328125,
      "learning_rate": 7.598803724342575e-06,
      "loss": 1.1176,
      "step": 13010
    },
    {
      "epoch": 0.04563185397806727,
      "grad_norm": 3.59375,
      "learning_rate": 7.604644914076099e-06,
      "loss": 1.0811,
      "step": 13020
    },
    {
      "epoch": 0.045666901484962864,
      "grad_norm": 3.375,
      "learning_rate": 7.610486103809625e-06,
      "loss": 1.1589,
      "step": 13030
    },
    {
      "epoch": 0.04570194899185846,
      "grad_norm": 3.640625,
      "learning_rate": 7.616327293543149e-06,
      "loss": 1.1237,
      "step": 13040
    },
    {
      "epoch": 0.04573699649875406,
      "grad_norm": 3.53125,
      "learning_rate": 7.622168483276675e-06,
      "loss": 1.0576,
      "step": 13050
    },
    {
      "epoch": 0.04577204400564966,
      "grad_norm": 3.09375,
      "learning_rate": 7.628009673010199e-06,
      "loss": 1.0121,
      "step": 13060
    },
    {
      "epoch": 0.04580709151254526,
      "grad_norm": 3.796875,
      "learning_rate": 7.633850862743724e-06,
      "loss": 1.1598,
      "step": 13070
    },
    {
      "epoch": 0.04584213901944085,
      "grad_norm": 3.75,
      "learning_rate": 7.639692052477249e-06,
      "loss": 1.1338,
      "step": 13080
    },
    {
      "epoch": 0.04587718652633645,
      "grad_norm": 3.421875,
      "learning_rate": 7.645533242210774e-06,
      "loss": 1.0978,
      "step": 13090
    },
    {
      "epoch": 0.045912234033232045,
      "grad_norm": 3.09375,
      "learning_rate": 7.651374431944299e-06,
      "loss": 1.1091,
      "step": 13100
    },
    {
      "epoch": 0.045947281540127644,
      "grad_norm": 3.59375,
      "learning_rate": 7.657215621677824e-06,
      "loss": 1.0405,
      "step": 13110
    },
    {
      "epoch": 0.04598232904702324,
      "grad_norm": 3.84375,
      "learning_rate": 7.663056811411349e-06,
      "loss": 1.0967,
      "step": 13120
    },
    {
      "epoch": 0.04601737655391884,
      "grad_norm": 3.625,
      "learning_rate": 7.668898001144873e-06,
      "loss": 1.0872,
      "step": 13130
    },
    {
      "epoch": 0.04605242406081443,
      "grad_norm": 3.171875,
      "learning_rate": 7.674739190878398e-06,
      "loss": 1.0284,
      "step": 13140
    },
    {
      "epoch": 0.04608747156771003,
      "grad_norm": 4.25,
      "learning_rate": 7.680580380611923e-06,
      "loss": 1.192,
      "step": 13150
    },
    {
      "epoch": 0.04612251907460563,
      "grad_norm": 3.40625,
      "learning_rate": 7.686421570345448e-06,
      "loss": 1.0384,
      "step": 13160
    },
    {
      "epoch": 0.046157566581501226,
      "grad_norm": 3.375,
      "learning_rate": 7.692262760078973e-06,
      "loss": 1.0908,
      "step": 13170
    },
    {
      "epoch": 0.046192614088396824,
      "grad_norm": 3.90625,
      "learning_rate": 7.698103949812498e-06,
      "loss": 1.0836,
      "step": 13180
    },
    {
      "epoch": 0.046227661595292416,
      "grad_norm": 3.65625,
      "learning_rate": 7.703945139546022e-06,
      "loss": 1.1081,
      "step": 13190
    },
    {
      "epoch": 0.046262709102188014,
      "grad_norm": 3.796875,
      "learning_rate": 7.709786329279547e-06,
      "loss": 1.0585,
      "step": 13200
    },
    {
      "epoch": 0.04629775660908361,
      "grad_norm": 3.515625,
      "learning_rate": 7.715627519013074e-06,
      "loss": 1.1271,
      "step": 13210
    },
    {
      "epoch": 0.04633280411597921,
      "grad_norm": 3.359375,
      "learning_rate": 7.721468708746597e-06,
      "loss": 1.1214,
      "step": 13220
    },
    {
      "epoch": 0.04636785162287481,
      "grad_norm": 3.953125,
      "learning_rate": 7.727309898480123e-06,
      "loss": 1.1393,
      "step": 13230
    },
    {
      "epoch": 0.04640289912977041,
      "grad_norm": 3.78125,
      "learning_rate": 7.733151088213647e-06,
      "loss": 1.0877,
      "step": 13240
    },
    {
      "epoch": 0.046437946636666,
      "grad_norm": 3.578125,
      "learning_rate": 7.738992277947173e-06,
      "loss": 1.0792,
      "step": 13250
    },
    {
      "epoch": 0.0464729941435616,
      "grad_norm": 3.453125,
      "learning_rate": 7.744833467680696e-06,
      "loss": 1.0731,
      "step": 13260
    },
    {
      "epoch": 0.046508041650457195,
      "grad_norm": 3.453125,
      "learning_rate": 7.750674657414223e-06,
      "loss": 1.0991,
      "step": 13270
    },
    {
      "epoch": 0.04654308915735279,
      "grad_norm": 3.265625,
      "learning_rate": 7.756515847147748e-06,
      "loss": 1.0655,
      "step": 13280
    },
    {
      "epoch": 0.04657813666424839,
      "grad_norm": 3.328125,
      "learning_rate": 7.762357036881273e-06,
      "loss": 1.1406,
      "step": 13290
    },
    {
      "epoch": 0.04661318417114398,
      "grad_norm": 3.609375,
      "learning_rate": 7.768198226614797e-06,
      "loss": 1.0899,
      "step": 13300
    },
    {
      "epoch": 0.04664823167803958,
      "grad_norm": 3.546875,
      "learning_rate": 7.774039416348322e-06,
      "loss": 1.1852,
      "step": 13310
    },
    {
      "epoch": 0.04668327918493518,
      "grad_norm": 3.453125,
      "learning_rate": 7.779880606081847e-06,
      "loss": 1.0499,
      "step": 13320
    },
    {
      "epoch": 0.04671832669183078,
      "grad_norm": 3.6875,
      "learning_rate": 7.785721795815372e-06,
      "loss": 1.1212,
      "step": 13330
    },
    {
      "epoch": 0.046753374198726376,
      "grad_norm": 3.6875,
      "learning_rate": 7.791562985548897e-06,
      "loss": 1.1323,
      "step": 13340
    },
    {
      "epoch": 0.04678842170562197,
      "grad_norm": 3.5625,
      "learning_rate": 7.797404175282422e-06,
      "loss": 1.0632,
      "step": 13350
    },
    {
      "epoch": 0.046823469212517566,
      "grad_norm": 3.125,
      "learning_rate": 7.803245365015948e-06,
      "loss": 1.1631,
      "step": 13360
    },
    {
      "epoch": 0.046858516719413164,
      "grad_norm": 3.40625,
      "learning_rate": 7.809086554749471e-06,
      "loss": 1.0674,
      "step": 13370
    },
    {
      "epoch": 0.04689356422630876,
      "grad_norm": 2.890625,
      "learning_rate": 7.814927744482998e-06,
      "loss": 1.061,
      "step": 13380
    },
    {
      "epoch": 0.04692861173320436,
      "grad_norm": 3.703125,
      "learning_rate": 7.820768934216521e-06,
      "loss": 1.1064,
      "step": 13390
    },
    {
      "epoch": 0.04696365924009996,
      "grad_norm": 3.578125,
      "learning_rate": 7.826610123950048e-06,
      "loss": 1.0725,
      "step": 13400
    },
    {
      "epoch": 0.04699870674699555,
      "grad_norm": 3.5,
      "learning_rate": 7.83245131368357e-06,
      "loss": 1.112,
      "step": 13410
    },
    {
      "epoch": 0.04703375425389115,
      "grad_norm": 3.84375,
      "learning_rate": 7.838292503417097e-06,
      "loss": 1.0846,
      "step": 13420
    },
    {
      "epoch": 0.047068801760786746,
      "grad_norm": 3.578125,
      "learning_rate": 7.84413369315062e-06,
      "loss": 1.0513,
      "step": 13430
    },
    {
      "epoch": 0.047103849267682345,
      "grad_norm": 3.484375,
      "learning_rate": 7.849974882884147e-06,
      "loss": 1.0745,
      "step": 13440
    },
    {
      "epoch": 0.04713889677457794,
      "grad_norm": 3.203125,
      "learning_rate": 7.85581607261767e-06,
      "loss": 1.1326,
      "step": 13450
    },
    {
      "epoch": 0.047173944281473534,
      "grad_norm": 4.0,
      "learning_rate": 7.861657262351197e-06,
      "loss": 1.0567,
      "step": 13460
    },
    {
      "epoch": 0.04720899178836913,
      "grad_norm": 6.8125,
      "learning_rate": 7.86749845208472e-06,
      "loss": 1.1071,
      "step": 13470
    },
    {
      "epoch": 0.04724403929526473,
      "grad_norm": 3.5,
      "learning_rate": 7.873339641818246e-06,
      "loss": 1.0784,
      "step": 13480
    },
    {
      "epoch": 0.04727908680216033,
      "grad_norm": 3.484375,
      "learning_rate": 7.879180831551771e-06,
      "loss": 0.9857,
      "step": 13490
    },
    {
      "epoch": 0.04731413430905593,
      "grad_norm": 3.328125,
      "learning_rate": 7.885022021285296e-06,
      "loss": 1.0664,
      "step": 13500
    },
    {
      "epoch": 0.047349181815951526,
      "grad_norm": 3.3125,
      "learning_rate": 7.89086321101882e-06,
      "loss": 1.1032,
      "step": 13510
    },
    {
      "epoch": 0.04738422932284712,
      "grad_norm": 3.609375,
      "learning_rate": 7.896704400752346e-06,
      "loss": 1.0527,
      "step": 13520
    },
    {
      "epoch": 0.047419276829742715,
      "grad_norm": 3.5,
      "learning_rate": 7.90254559048587e-06,
      "loss": 1.0527,
      "step": 13530
    },
    {
      "epoch": 0.047454324336638314,
      "grad_norm": 3.9375,
      "learning_rate": 7.908386780219395e-06,
      "loss": 1.076,
      "step": 13540
    },
    {
      "epoch": 0.04748937184353391,
      "grad_norm": 3.484375,
      "learning_rate": 7.91422796995292e-06,
      "loss": 1.1128,
      "step": 13550
    },
    {
      "epoch": 0.04752441935042951,
      "grad_norm": 3.1875,
      "learning_rate": 7.920069159686445e-06,
      "loss": 1.0741,
      "step": 13560
    },
    {
      "epoch": 0.0475594668573251,
      "grad_norm": 3.5,
      "learning_rate": 7.92591034941997e-06,
      "loss": 1.0205,
      "step": 13570
    },
    {
      "epoch": 0.0475945143642207,
      "grad_norm": 3.515625,
      "learning_rate": 7.931751539153495e-06,
      "loss": 1.0259,
      "step": 13580
    },
    {
      "epoch": 0.0476295618711163,
      "grad_norm": 3.53125,
      "learning_rate": 7.93759272888702e-06,
      "loss": 1.0235,
      "step": 13590
    },
    {
      "epoch": 0.047664609378011896,
      "grad_norm": 3.234375,
      "learning_rate": 7.943433918620544e-06,
      "loss": 1.1243,
      "step": 13600
    },
    {
      "epoch": 0.047699656884907495,
      "grad_norm": 3.25,
      "learning_rate": 7.94927510835407e-06,
      "loss": 1.0724,
      "step": 13610
    },
    {
      "epoch": 0.047734704391803086,
      "grad_norm": 3.25,
      "learning_rate": 7.955116298087594e-06,
      "loss": 1.1074,
      "step": 13620
    },
    {
      "epoch": 0.047769751898698684,
      "grad_norm": 3.609375,
      "learning_rate": 7.96095748782112e-06,
      "loss": 1.0698,
      "step": 13630
    },
    {
      "epoch": 0.04780479940559428,
      "grad_norm": 3.421875,
      "learning_rate": 7.966798677554644e-06,
      "loss": 1.0872,
      "step": 13640
    },
    {
      "epoch": 0.04783984691248988,
      "grad_norm": 3.578125,
      "learning_rate": 7.97263986728817e-06,
      "loss": 1.1309,
      "step": 13650
    },
    {
      "epoch": 0.04787489441938548,
      "grad_norm": 3.5,
      "learning_rate": 7.978481057021695e-06,
      "loss": 1.0953,
      "step": 13660
    },
    {
      "epoch": 0.04790994192628108,
      "grad_norm": 3.328125,
      "learning_rate": 7.98432224675522e-06,
      "loss": 1.108,
      "step": 13670
    },
    {
      "epoch": 0.04794498943317667,
      "grad_norm": 3.59375,
      "learning_rate": 7.990163436488745e-06,
      "loss": 1.0874,
      "step": 13680
    },
    {
      "epoch": 0.04798003694007227,
      "grad_norm": 3.53125,
      "learning_rate": 7.99600462622227e-06,
      "loss": 1.1639,
      "step": 13690
    },
    {
      "epoch": 0.048015084446967865,
      "grad_norm": 3.765625,
      "learning_rate": 8.001845815955795e-06,
      "loss": 1.1198,
      "step": 13700
    },
    {
      "epoch": 0.04805013195386346,
      "grad_norm": 3.640625,
      "learning_rate": 8.00768700568932e-06,
      "loss": 1.0899,
      "step": 13710
    },
    {
      "epoch": 0.04808517946075906,
      "grad_norm": 3.78125,
      "learning_rate": 8.013528195422844e-06,
      "loss": 1.0145,
      "step": 13720
    },
    {
      "epoch": 0.04812022696765465,
      "grad_norm": 4.1875,
      "learning_rate": 8.019369385156369e-06,
      "loss": 1.1159,
      "step": 13730
    },
    {
      "epoch": 0.04815527447455025,
      "grad_norm": 3.1875,
      "learning_rate": 8.025210574889894e-06,
      "loss": 1.1265,
      "step": 13740
    },
    {
      "epoch": 0.04819032198144585,
      "grad_norm": 3.234375,
      "learning_rate": 8.031051764623419e-06,
      "loss": 1.1182,
      "step": 13750
    },
    {
      "epoch": 0.04822536948834145,
      "grad_norm": 3.671875,
      "learning_rate": 8.036892954356944e-06,
      "loss": 1.0903,
      "step": 13760
    },
    {
      "epoch": 0.048260416995237046,
      "grad_norm": 3.90625,
      "learning_rate": 8.042734144090468e-06,
      "loss": 1.0688,
      "step": 13770
    },
    {
      "epoch": 0.048295464502132644,
      "grad_norm": 3.9375,
      "learning_rate": 8.048575333823993e-06,
      "loss": 1.0769,
      "step": 13780
    },
    {
      "epoch": 0.048330512009028236,
      "grad_norm": 3.890625,
      "learning_rate": 8.054416523557518e-06,
      "loss": 1.0857,
      "step": 13790
    },
    {
      "epoch": 0.048365559515923834,
      "grad_norm": 3.625,
      "learning_rate": 8.060257713291043e-06,
      "loss": 1.0177,
      "step": 13800
    },
    {
      "epoch": 0.04840060702281943,
      "grad_norm": 3.703125,
      "learning_rate": 8.06609890302457e-06,
      "loss": 1.0969,
      "step": 13810
    },
    {
      "epoch": 0.04843565452971503,
      "grad_norm": 4.03125,
      "learning_rate": 8.071940092758093e-06,
      "loss": 1.1124,
      "step": 13820
    },
    {
      "epoch": 0.04847070203661063,
      "grad_norm": 3.796875,
      "learning_rate": 8.07778128249162e-06,
      "loss": 1.0754,
      "step": 13830
    },
    {
      "epoch": 0.04850574954350622,
      "grad_norm": 3.703125,
      "learning_rate": 8.083622472225142e-06,
      "loss": 1.0377,
      "step": 13840
    },
    {
      "epoch": 0.04854079705040182,
      "grad_norm": 3.734375,
      "learning_rate": 8.089463661958669e-06,
      "loss": 1.1226,
      "step": 13850
    },
    {
      "epoch": 0.048575844557297417,
      "grad_norm": 3.515625,
      "learning_rate": 8.095304851692192e-06,
      "loss": 1.1359,
      "step": 13860
    },
    {
      "epoch": 0.048610892064193015,
      "grad_norm": 3.234375,
      "learning_rate": 8.101146041425719e-06,
      "loss": 1.0395,
      "step": 13870
    },
    {
      "epoch": 0.04864593957108861,
      "grad_norm": 3.515625,
      "learning_rate": 8.106987231159242e-06,
      "loss": 1.0632,
      "step": 13880
    },
    {
      "epoch": 0.048680987077984204,
      "grad_norm": 3.578125,
      "learning_rate": 8.112828420892768e-06,
      "loss": 1.1437,
      "step": 13890
    },
    {
      "epoch": 0.0487160345848798,
      "grad_norm": 4.03125,
      "learning_rate": 8.118669610626291e-06,
      "loss": 1.1655,
      "step": 13900
    },
    {
      "epoch": 0.0487510820917754,
      "grad_norm": 3.34375,
      "learning_rate": 8.124510800359818e-06,
      "loss": 1.0847,
      "step": 13910
    },
    {
      "epoch": 0.048786129598671,
      "grad_norm": 3.671875,
      "learning_rate": 8.130351990093341e-06,
      "loss": 1.102,
      "step": 13920
    },
    {
      "epoch": 0.0488211771055666,
      "grad_norm": 3.609375,
      "learning_rate": 8.136193179826868e-06,
      "loss": 1.1333,
      "step": 13930
    },
    {
      "epoch": 0.048856224612462196,
      "grad_norm": 3.421875,
      "learning_rate": 8.142034369560393e-06,
      "loss": 1.1219,
      "step": 13940
    },
    {
      "epoch": 0.04889127211935779,
      "grad_norm": 3.703125,
      "learning_rate": 8.147875559293917e-06,
      "loss": 1.1481,
      "step": 13950
    },
    {
      "epoch": 0.048926319626253385,
      "grad_norm": 3.25,
      "learning_rate": 8.153716749027442e-06,
      "loss": 1.0081,
      "step": 13960
    },
    {
      "epoch": 0.048961367133148984,
      "grad_norm": 3.265625,
      "learning_rate": 8.159557938760967e-06,
      "loss": 1.0854,
      "step": 13970
    },
    {
      "epoch": 0.04899641464004458,
      "grad_norm": 3.875,
      "learning_rate": 8.165399128494494e-06,
      "loss": 1.0691,
      "step": 13980
    },
    {
      "epoch": 0.04903146214694018,
      "grad_norm": 3.25,
      "learning_rate": 8.171240318228017e-06,
      "loss": 1.1137,
      "step": 13990
    },
    {
      "epoch": 0.04906650965383577,
      "grad_norm": 3.5,
      "learning_rate": 8.177081507961543e-06,
      "loss": 1.103,
      "step": 14000
    },
    {
      "epoch": 0.04910155716073137,
      "grad_norm": 3.75,
      "learning_rate": 8.182922697695066e-06,
      "loss": 1.0591,
      "step": 14010
    },
    {
      "epoch": 0.04913660466762697,
      "grad_norm": 3.65625,
      "learning_rate": 8.188763887428593e-06,
      "loss": 1.0585,
      "step": 14020
    },
    {
      "epoch": 0.049171652174522566,
      "grad_norm": 3.796875,
      "learning_rate": 8.194605077162116e-06,
      "loss": 1.124,
      "step": 14030
    },
    {
      "epoch": 0.049206699681418165,
      "grad_norm": 3.375,
      "learning_rate": 8.200446266895643e-06,
      "loss": 1.0517,
      "step": 14040
    },
    {
      "epoch": 0.049241747188313756,
      "grad_norm": 3.453125,
      "learning_rate": 8.206287456629166e-06,
      "loss": 1.073,
      "step": 14050
    },
    {
      "epoch": 0.049276794695209354,
      "grad_norm": 3.71875,
      "learning_rate": 8.212128646362692e-06,
      "loss": 1.1375,
      "step": 14060
    },
    {
      "epoch": 0.04931184220210495,
      "grad_norm": 3.90625,
      "learning_rate": 8.217969836096216e-06,
      "loss": 1.1181,
      "step": 14070
    },
    {
      "epoch": 0.04934688970900055,
      "grad_norm": 4.28125,
      "learning_rate": 8.223811025829742e-06,
      "loss": 1.2397,
      "step": 14080
    },
    {
      "epoch": 0.04938193721589615,
      "grad_norm": 3.359375,
      "learning_rate": 8.229652215563265e-06,
      "loss": 1.1363,
      "step": 14090
    },
    {
      "epoch": 0.04941698472279175,
      "grad_norm": 3.34375,
      "learning_rate": 8.235493405296792e-06,
      "loss": 1.0544,
      "step": 14100
    },
    {
      "epoch": 0.04945203222968734,
      "grad_norm": 3.5625,
      "learning_rate": 8.241334595030317e-06,
      "loss": 1.0652,
      "step": 14110
    },
    {
      "epoch": 0.04948707973658294,
      "grad_norm": 3.578125,
      "learning_rate": 8.247175784763841e-06,
      "loss": 1.0573,
      "step": 14120
    },
    {
      "epoch": 0.049522127243478535,
      "grad_norm": 3.1875,
      "learning_rate": 8.253016974497366e-06,
      "loss": 1.1812,
      "step": 14130
    },
    {
      "epoch": 0.04955717475037413,
      "grad_norm": 3.140625,
      "learning_rate": 8.258858164230891e-06,
      "loss": 1.0884,
      "step": 14140
    },
    {
      "epoch": 0.04959222225726973,
      "grad_norm": 3.640625,
      "learning_rate": 8.264699353964416e-06,
      "loss": 1.1262,
      "step": 14150
    },
    {
      "epoch": 0.04962726976416532,
      "grad_norm": 3.359375,
      "learning_rate": 8.27054054369794e-06,
      "loss": 1.0555,
      "step": 14160
    },
    {
      "epoch": 0.04966231727106092,
      "grad_norm": 3.359375,
      "learning_rate": 8.276381733431466e-06,
      "loss": 0.9955,
      "step": 14170
    },
    {
      "epoch": 0.04969736477795652,
      "grad_norm": 3.515625,
      "learning_rate": 8.28222292316499e-06,
      "loss": 1.1415,
      "step": 14180
    },
    {
      "epoch": 0.04973241228485212,
      "grad_norm": 3.734375,
      "learning_rate": 8.288064112898515e-06,
      "loss": 1.0253,
      "step": 14190
    },
    {
      "epoch": 0.049767459791747716,
      "grad_norm": 3.703125,
      "learning_rate": 8.29390530263204e-06,
      "loss": 1.102,
      "step": 14200
    },
    {
      "epoch": 0.049802507298643314,
      "grad_norm": 3.625,
      "learning_rate": 8.299746492365565e-06,
      "loss": 1.133,
      "step": 14210
    },
    {
      "epoch": 0.049837554805538906,
      "grad_norm": 3.890625,
      "learning_rate": 8.30558768209909e-06,
      "loss": 1.1123,
      "step": 14220
    },
    {
      "epoch": 0.049872602312434504,
      "grad_norm": 3.5,
      "learning_rate": 8.311428871832615e-06,
      "loss": 1.1316,
      "step": 14230
    },
    {
      "epoch": 0.0499076498193301,
      "grad_norm": 3.421875,
      "learning_rate": 8.31727006156614e-06,
      "loss": 1.1358,
      "step": 14240
    },
    {
      "epoch": 0.0499426973262257,
      "grad_norm": 3.421875,
      "learning_rate": 8.323111251299664e-06,
      "loss": 1.1297,
      "step": 14250
    },
    {
      "epoch": 0.0499777448331213,
      "grad_norm": 3.390625,
      "learning_rate": 8.328952441033191e-06,
      "loss": 1.1464,
      "step": 14260
    },
    {
      "epoch": 0.05001279234001689,
      "grad_norm": 3.359375,
      "learning_rate": 8.334793630766714e-06,
      "loss": 1.0418,
      "step": 14270
    },
    {
      "epoch": 0.05004783984691249,
      "grad_norm": 3.40625,
      "learning_rate": 8.34063482050024e-06,
      "loss": 1.0972,
      "step": 14280
    },
    {
      "epoch": 0.05008288735380809,
      "grad_norm": 3.40625,
      "learning_rate": 8.346476010233764e-06,
      "loss": 1.1116,
      "step": 14290
    },
    {
      "epoch": 0.050117934860703685,
      "grad_norm": 3.5,
      "learning_rate": 8.35231719996729e-06,
      "loss": 1.0738,
      "step": 14300
    },
    {
      "epoch": 0.05015298236759928,
      "grad_norm": 4.0,
      "learning_rate": 8.358158389700815e-06,
      "loss": 1.0926,
      "step": 14310
    },
    {
      "epoch": 0.050188029874494874,
      "grad_norm": 3.65625,
      "learning_rate": 8.36399957943434e-06,
      "loss": 1.0404,
      "step": 14320
    },
    {
      "epoch": 0.05022307738139047,
      "grad_norm": 4.03125,
      "learning_rate": 8.369840769167865e-06,
      "loss": 1.135,
      "step": 14330
    },
    {
      "epoch": 0.05025812488828607,
      "grad_norm": 3.75,
      "learning_rate": 8.37568195890139e-06,
      "loss": 1.0383,
      "step": 14340
    },
    {
      "epoch": 0.05029317239518167,
      "grad_norm": 3.5,
      "learning_rate": 8.381523148634915e-06,
      "loss": 1.0464,
      "step": 14350
    },
    {
      "epoch": 0.05032821990207727,
      "grad_norm": 4.21875,
      "learning_rate": 8.38736433836844e-06,
      "loss": 1.0613,
      "step": 14360
    },
    {
      "epoch": 0.050363267408972866,
      "grad_norm": 3.3125,
      "learning_rate": 8.393205528101964e-06,
      "loss": 1.0803,
      "step": 14370
    },
    {
      "epoch": 0.05039831491586846,
      "grad_norm": 3.90625,
      "learning_rate": 8.399046717835489e-06,
      "loss": 1.1717,
      "step": 14380
    },
    {
      "epoch": 0.050433362422764055,
      "grad_norm": 3.515625,
      "learning_rate": 8.404887907569014e-06,
      "loss": 1.0981,
      "step": 14390
    },
    {
      "epoch": 0.050468409929659654,
      "grad_norm": 3.59375,
      "learning_rate": 8.410729097302539e-06,
      "loss": 1.0888,
      "step": 14400
    },
    {
      "epoch": 0.05050345743655525,
      "grad_norm": 3.546875,
      "learning_rate": 8.416570287036064e-06,
      "loss": 1.0403,
      "step": 14410
    },
    {
      "epoch": 0.05053850494345085,
      "grad_norm": 3.5,
      "learning_rate": 8.422411476769588e-06,
      "loss": 1.0698,
      "step": 14420
    },
    {
      "epoch": 0.05057355245034644,
      "grad_norm": 3.53125,
      "learning_rate": 8.428252666503115e-06,
      "loss": 1.082,
      "step": 14430
    },
    {
      "epoch": 0.05060859995724204,
      "grad_norm": 3.65625,
      "learning_rate": 8.434093856236638e-06,
      "loss": 1.0855,
      "step": 14440
    },
    {
      "epoch": 0.05064364746413764,
      "grad_norm": 3.1875,
      "learning_rate": 8.439935045970165e-06,
      "loss": 1.0425,
      "step": 14450
    },
    {
      "epoch": 0.050678694971033236,
      "grad_norm": 4.1875,
      "learning_rate": 8.445776235703688e-06,
      "loss": 1.1024,
      "step": 14460
    },
    {
      "epoch": 0.050713742477928835,
      "grad_norm": 3.515625,
      "learning_rate": 8.451617425437214e-06,
      "loss": 1.0661,
      "step": 14470
    },
    {
      "epoch": 0.05074878998482443,
      "grad_norm": 3.453125,
      "learning_rate": 8.457458615170738e-06,
      "loss": 1.0205,
      "step": 14480
    },
    {
      "epoch": 0.050783837491720024,
      "grad_norm": 3.09375,
      "learning_rate": 8.463299804904264e-06,
      "loss": 1.0826,
      "step": 14490
    },
    {
      "epoch": 0.05081888499861562,
      "grad_norm": 4.0625,
      "learning_rate": 8.469140994637787e-06,
      "loss": 1.1077,
      "step": 14500
    },
    {
      "epoch": 0.05085393250551122,
      "grad_norm": 3.671875,
      "learning_rate": 8.474982184371314e-06,
      "loss": 1.0502,
      "step": 14510
    },
    {
      "epoch": 0.05088898001240682,
      "grad_norm": 3.453125,
      "learning_rate": 8.480823374104837e-06,
      "loss": 1.0649,
      "step": 14520
    },
    {
      "epoch": 0.05092402751930242,
      "grad_norm": 3.4375,
      "learning_rate": 8.486664563838363e-06,
      "loss": 1.0573,
      "step": 14530
    },
    {
      "epoch": 0.05095907502619801,
      "grad_norm": 3.59375,
      "learning_rate": 8.492505753571887e-06,
      "loss": 1.0077,
      "step": 14540
    },
    {
      "epoch": 0.05099412253309361,
      "grad_norm": 3.578125,
      "learning_rate": 8.498346943305413e-06,
      "loss": 1.0578,
      "step": 14550
    },
    {
      "epoch": 0.051029170039989205,
      "grad_norm": 3.21875,
      "learning_rate": 8.504188133038938e-06,
      "loss": 1.0302,
      "step": 14560
    },
    {
      "epoch": 0.0510642175468848,
      "grad_norm": 3.78125,
      "learning_rate": 8.510029322772463e-06,
      "loss": 1.1177,
      "step": 14570
    },
    {
      "epoch": 0.0510992650537804,
      "grad_norm": 3.1875,
      "learning_rate": 8.515870512505988e-06,
      "loss": 1.1273,
      "step": 14580
    },
    {
      "epoch": 0.05113431256067599,
      "grad_norm": 3.265625,
      "learning_rate": 8.521711702239513e-06,
      "loss": 1.0962,
      "step": 14590
    },
    {
      "epoch": 0.05116936006757159,
      "grad_norm": 3.453125,
      "learning_rate": 8.527552891973037e-06,
      "loss": 1.1257,
      "step": 14600
    },
    {
      "epoch": 0.05120440757446719,
      "grad_norm": 3.46875,
      "learning_rate": 8.533394081706562e-06,
      "loss": 1.1046,
      "step": 14610
    },
    {
      "epoch": 0.05123945508136279,
      "grad_norm": 3.375,
      "learning_rate": 8.539235271440087e-06,
      "loss": 1.1034,
      "step": 14620
    },
    {
      "epoch": 0.051274502588258386,
      "grad_norm": 3.1875,
      "learning_rate": 8.545076461173612e-06,
      "loss": 1.0102,
      "step": 14630
    },
    {
      "epoch": 0.051309550095153984,
      "grad_norm": 3.75,
      "learning_rate": 8.550917650907138e-06,
      "loss": 1.0768,
      "step": 14640
    },
    {
      "epoch": 0.051344597602049576,
      "grad_norm": 3.84375,
      "learning_rate": 8.556758840640662e-06,
      "loss": 1.0466,
      "step": 14650
    },
    {
      "epoch": 0.051379645108945174,
      "grad_norm": 3.21875,
      "learning_rate": 8.562600030374188e-06,
      "loss": 1.0033,
      "step": 14660
    },
    {
      "epoch": 0.05141469261584077,
      "grad_norm": 4.1875,
      "learning_rate": 8.568441220107711e-06,
      "loss": 1.0589,
      "step": 14670
    },
    {
      "epoch": 0.05144974012273637,
      "grad_norm": 3.5625,
      "learning_rate": 8.574282409841238e-06,
      "loss": 1.2166,
      "step": 14680
    },
    {
      "epoch": 0.05148478762963197,
      "grad_norm": 3.421875,
      "learning_rate": 8.580123599574761e-06,
      "loss": 1.0158,
      "step": 14690
    },
    {
      "epoch": 0.05151983513652756,
      "grad_norm": 3.15625,
      "learning_rate": 8.585964789308287e-06,
      "loss": 1.0852,
      "step": 14700
    },
    {
      "epoch": 0.05155488264342316,
      "grad_norm": 3.484375,
      "learning_rate": 8.591805979041812e-06,
      "loss": 1.0856,
      "step": 14710
    },
    {
      "epoch": 0.05158993015031876,
      "grad_norm": 3.46875,
      "learning_rate": 8.597647168775337e-06,
      "loss": 1.1413,
      "step": 14720
    },
    {
      "epoch": 0.051624977657214355,
      "grad_norm": 3.765625,
      "learning_rate": 8.603488358508862e-06,
      "loss": 1.0625,
      "step": 14730
    },
    {
      "epoch": 0.05166002516410995,
      "grad_norm": 2.984375,
      "learning_rate": 8.609329548242387e-06,
      "loss": 1.0304,
      "step": 14740
    },
    {
      "epoch": 0.05169507267100555,
      "grad_norm": 3.984375,
      "learning_rate": 8.615170737975912e-06,
      "loss": 1.0352,
      "step": 14750
    },
    {
      "epoch": 0.05173012017790114,
      "grad_norm": 3.34375,
      "learning_rate": 8.621011927709437e-06,
      "loss": 1.0884,
      "step": 14760
    },
    {
      "epoch": 0.05176516768479674,
      "grad_norm": 3.015625,
      "learning_rate": 8.626853117442961e-06,
      "loss": 1.087,
      "step": 14770
    },
    {
      "epoch": 0.05180021519169234,
      "grad_norm": 3.8125,
      "learning_rate": 8.632694307176486e-06,
      "loss": 1.0772,
      "step": 14780
    },
    {
      "epoch": 0.05183526269858794,
      "grad_norm": 3.203125,
      "learning_rate": 8.638535496910011e-06,
      "loss": 1.0822,
      "step": 14790
    },
    {
      "epoch": 0.051870310205483536,
      "grad_norm": 3.5625,
      "learning_rate": 8.644376686643536e-06,
      "loss": 1.0392,
      "step": 14800
    },
    {
      "epoch": 0.05190535771237913,
      "grad_norm": 3.21875,
      "learning_rate": 8.65021787637706e-06,
      "loss": 1.0522,
      "step": 14810
    },
    {
      "epoch": 0.051940405219274725,
      "grad_norm": 3.515625,
      "learning_rate": 8.656059066110586e-06,
      "loss": 1.0172,
      "step": 14820
    },
    {
      "epoch": 0.051975452726170324,
      "grad_norm": 3.53125,
      "learning_rate": 8.66190025584411e-06,
      "loss": 1.0176,
      "step": 14830
    },
    {
      "epoch": 0.05201050023306592,
      "grad_norm": 3.5625,
      "learning_rate": 8.667741445577635e-06,
      "loss": 1.0585,
      "step": 14840
    },
    {
      "epoch": 0.05204554773996152,
      "grad_norm": 4.0,
      "learning_rate": 8.67358263531116e-06,
      "loss": 1.0869,
      "step": 14850
    },
    {
      "epoch": 0.05208059524685711,
      "grad_norm": 3.25,
      "learning_rate": 8.679423825044685e-06,
      "loss": 1.1365,
      "step": 14860
    },
    {
      "epoch": 0.05211564275375271,
      "grad_norm": 3.5,
      "learning_rate": 8.68526501477821e-06,
      "loss": 1.0823,
      "step": 14870
    },
    {
      "epoch": 0.05215069026064831,
      "grad_norm": 3.5,
      "learning_rate": 8.691106204511736e-06,
      "loss": 1.0352,
      "step": 14880
    },
    {
      "epoch": 0.052185737767543906,
      "grad_norm": 3.71875,
      "learning_rate": 8.69694739424526e-06,
      "loss": 1.0625,
      "step": 14890
    },
    {
      "epoch": 0.052220785274439505,
      "grad_norm": 3.296875,
      "learning_rate": 8.702788583978786e-06,
      "loss": 1.0526,
      "step": 14900
    },
    {
      "epoch": 0.0522558327813351,
      "grad_norm": 4.21875,
      "learning_rate": 8.70862977371231e-06,
      "loss": 1.1023,
      "step": 14910
    },
    {
      "epoch": 0.052290880288230694,
      "grad_norm": 3.265625,
      "learning_rate": 8.714470963445836e-06,
      "loss": 1.0711,
      "step": 14920
    },
    {
      "epoch": 0.05232592779512629,
      "grad_norm": 3.703125,
      "learning_rate": 8.720312153179359e-06,
      "loss": 1.038,
      "step": 14930
    },
    {
      "epoch": 0.05236097530202189,
      "grad_norm": 3.578125,
      "learning_rate": 8.726153342912885e-06,
      "loss": 1.042,
      "step": 14940
    },
    {
      "epoch": 0.05239602280891749,
      "grad_norm": 3.515625,
      "learning_rate": 8.731994532646409e-06,
      "loss": 1.1574,
      "step": 14950
    },
    {
      "epoch": 0.05243107031581309,
      "grad_norm": 3.265625,
      "learning_rate": 8.737835722379935e-06,
      "loss": 1.0988,
      "step": 14960
    },
    {
      "epoch": 0.05246611782270868,
      "grad_norm": 3.59375,
      "learning_rate": 8.743676912113458e-06,
      "loss": 1.0289,
      "step": 14970
    },
    {
      "epoch": 0.05250116532960428,
      "grad_norm": 3.71875,
      "learning_rate": 8.749518101846985e-06,
      "loss": 1.1811,
      "step": 14980
    },
    {
      "epoch": 0.052536212836499875,
      "grad_norm": 3.78125,
      "learning_rate": 8.75535929158051e-06,
      "loss": 1.0936,
      "step": 14990
    },
    {
      "epoch": 0.05257126034339547,
      "grad_norm": 3.6875,
      "learning_rate": 8.761200481314035e-06,
      "loss": 1.0699,
      "step": 15000
    },
    {
      "epoch": 0.05257126034339547,
      "eval_loss": 1.0205786228179932,
      "eval_runtime": 562.3523,
      "eval_samples_per_second": 676.508,
      "eval_steps_per_second": 56.376,
      "step": 15000
    },
    {
      "epoch": 0.05260630785029107,
      "grad_norm": 3.609375,
      "learning_rate": 8.76704167104756e-06,
      "loss": 1.1214,
      "step": 15010
    },
    {
      "epoch": 0.05264135535718667,
      "grad_norm": 3.359375,
      "learning_rate": 8.772882860781084e-06,
      "loss": 1.0743,
      "step": 15020
    },
    {
      "epoch": 0.05267640286408226,
      "grad_norm": 3.125,
      "learning_rate": 8.778724050514609e-06,
      "loss": 1.085,
      "step": 15030
    },
    {
      "epoch": 0.05271145037097786,
      "grad_norm": 3.5,
      "learning_rate": 8.784565240248134e-06,
      "loss": 1.0763,
      "step": 15040
    },
    {
      "epoch": 0.05274649787787346,
      "grad_norm": 3.65625,
      "learning_rate": 8.79040642998166e-06,
      "loss": 1.0163,
      "step": 15050
    },
    {
      "epoch": 0.052781545384769056,
      "grad_norm": 3.4375,
      "learning_rate": 8.796247619715184e-06,
      "loss": 1.051,
      "step": 15060
    },
    {
      "epoch": 0.052816592891664654,
      "grad_norm": 3.75,
      "learning_rate": 8.80208880944871e-06,
      "loss": 1.1153,
      "step": 15070
    },
    {
      "epoch": 0.052851640398560246,
      "grad_norm": 3.6875,
      "learning_rate": 8.807929999182233e-06,
      "loss": 1.1582,
      "step": 15080
    },
    {
      "epoch": 0.052886687905455844,
      "grad_norm": 3.734375,
      "learning_rate": 8.81377118891576e-06,
      "loss": 1.1419,
      "step": 15090
    },
    {
      "epoch": 0.05292173541235144,
      "grad_norm": 3.453125,
      "learning_rate": 8.819612378649283e-06,
      "loss": 1.1807,
      "step": 15100
    },
    {
      "epoch": 0.05295678291924704,
      "grad_norm": 4.0,
      "learning_rate": 8.82545356838281e-06,
      "loss": 1.118,
      "step": 15110
    },
    {
      "epoch": 0.05299183042614264,
      "grad_norm": 2.875,
      "learning_rate": 8.831294758116333e-06,
      "loss": 1.0495,
      "step": 15120
    },
    {
      "epoch": 0.05302687793303823,
      "grad_norm": 3.6875,
      "learning_rate": 8.83713594784986e-06,
      "loss": 0.9812,
      "step": 15130
    },
    {
      "epoch": 0.05306192543993383,
      "grad_norm": 3.359375,
      "learning_rate": 8.842977137583382e-06,
      "loss": 1.1133,
      "step": 15140
    },
    {
      "epoch": 0.05309697294682943,
      "grad_norm": 3.03125,
      "learning_rate": 8.848818327316909e-06,
      "loss": 1.0693,
      "step": 15150
    },
    {
      "epoch": 0.053132020453725025,
      "grad_norm": 3.5625,
      "learning_rate": 8.854659517050434e-06,
      "loss": 1.0651,
      "step": 15160
    },
    {
      "epoch": 0.05316706796062062,
      "grad_norm": 3.890625,
      "learning_rate": 8.860500706783959e-06,
      "loss": 1.0444,
      "step": 15170
    },
    {
      "epoch": 0.05320211546751622,
      "grad_norm": 3.25,
      "learning_rate": 8.866341896517483e-06,
      "loss": 1.1374,
      "step": 15180
    },
    {
      "epoch": 0.05323716297441181,
      "grad_norm": 3.484375,
      "learning_rate": 8.872183086251008e-06,
      "loss": 1.0351,
      "step": 15190
    },
    {
      "epoch": 0.05327221048130741,
      "grad_norm": 3.609375,
      "learning_rate": 8.878024275984533e-06,
      "loss": 1.1344,
      "step": 15200
    },
    {
      "epoch": 0.05330725798820301,
      "grad_norm": 3.46875,
      "learning_rate": 8.883865465718058e-06,
      "loss": 1.127,
      "step": 15210
    },
    {
      "epoch": 0.05334230549509861,
      "grad_norm": 3.34375,
      "learning_rate": 8.889706655451583e-06,
      "loss": 1.0604,
      "step": 15220
    },
    {
      "epoch": 0.053377353001994206,
      "grad_norm": 3.859375,
      "learning_rate": 8.895547845185108e-06,
      "loss": 1.0421,
      "step": 15230
    },
    {
      "epoch": 0.0534124005088898,
      "grad_norm": 3.578125,
      "learning_rate": 8.901389034918632e-06,
      "loss": 1.1315,
      "step": 15240
    },
    {
      "epoch": 0.053447448015785395,
      "grad_norm": 3.8125,
      "learning_rate": 8.907230224652157e-06,
      "loss": 1.1117,
      "step": 15250
    },
    {
      "epoch": 0.053482495522680994,
      "grad_norm": 3.828125,
      "learning_rate": 8.913071414385682e-06,
      "loss": 1.0313,
      "step": 15260
    },
    {
      "epoch": 0.05351754302957659,
      "grad_norm": 3.828125,
      "learning_rate": 8.918912604119207e-06,
      "loss": 1.1419,
      "step": 15270
    },
    {
      "epoch": 0.05355259053647219,
      "grad_norm": 4.0625,
      "learning_rate": 8.924753793852732e-06,
      "loss": 1.0606,
      "step": 15280
    },
    {
      "epoch": 0.05358763804336779,
      "grad_norm": 3.328125,
      "learning_rate": 8.930594983586257e-06,
      "loss": 1.1307,
      "step": 15290
    },
    {
      "epoch": 0.05362268555026338,
      "grad_norm": 3.625,
      "learning_rate": 8.936436173319782e-06,
      "loss": 1.0389,
      "step": 15300
    },
    {
      "epoch": 0.05365773305715898,
      "grad_norm": 3.140625,
      "learning_rate": 8.942277363053306e-06,
      "loss": 1.0754,
      "step": 15310
    },
    {
      "epoch": 0.053692780564054576,
      "grad_norm": 3.328125,
      "learning_rate": 8.948118552786831e-06,
      "loss": 1.0438,
      "step": 15320
    },
    {
      "epoch": 0.053727828070950175,
      "grad_norm": 3.859375,
      "learning_rate": 8.953959742520358e-06,
      "loss": 1.0701,
      "step": 15330
    },
    {
      "epoch": 0.05376287557784577,
      "grad_norm": 3.5,
      "learning_rate": 8.959800932253883e-06,
      "loss": 1.0712,
      "step": 15340
    },
    {
      "epoch": 0.053797923084741364,
      "grad_norm": 3.65625,
      "learning_rate": 8.965642121987407e-06,
      "loss": 1.1372,
      "step": 15350
    },
    {
      "epoch": 0.05383297059163696,
      "grad_norm": 3.578125,
      "learning_rate": 8.971483311720932e-06,
      "loss": 1.0666,
      "step": 15360
    },
    {
      "epoch": 0.05386801809853256,
      "grad_norm": 3.234375,
      "learning_rate": 8.977324501454457e-06,
      "loss": 1.1223,
      "step": 15370
    },
    {
      "epoch": 0.05390306560542816,
      "grad_norm": 3.65625,
      "learning_rate": 8.983165691187982e-06,
      "loss": 1.1374,
      "step": 15380
    },
    {
      "epoch": 0.05393811311232376,
      "grad_norm": 3.390625,
      "learning_rate": 8.989006880921507e-06,
      "loss": 1.1486,
      "step": 15390
    },
    {
      "epoch": 0.05397316061921935,
      "grad_norm": 3.28125,
      "learning_rate": 8.994848070655032e-06,
      "loss": 1.0655,
      "step": 15400
    },
    {
      "epoch": 0.05400820812611495,
      "grad_norm": 3.578125,
      "learning_rate": 9.000689260388557e-06,
      "loss": 0.971,
      "step": 15410
    },
    {
      "epoch": 0.054043255633010545,
      "grad_norm": 3.03125,
      "learning_rate": 9.006530450122081e-06,
      "loss": 1.0914,
      "step": 15420
    },
    {
      "epoch": 0.05407830313990614,
      "grad_norm": 3.65625,
      "learning_rate": 9.012371639855606e-06,
      "loss": 1.0749,
      "step": 15430
    },
    {
      "epoch": 0.05411335064680174,
      "grad_norm": 3.6875,
      "learning_rate": 9.018212829589131e-06,
      "loss": 1.0454,
      "step": 15440
    },
    {
      "epoch": 0.05414839815369734,
      "grad_norm": 4.0625,
      "learning_rate": 9.024054019322656e-06,
      "loss": 1.2002,
      "step": 15450
    },
    {
      "epoch": 0.05418344566059293,
      "grad_norm": 3.25,
      "learning_rate": 9.02989520905618e-06,
      "loss": 1.099,
      "step": 15460
    },
    {
      "epoch": 0.05421849316748853,
      "grad_norm": 3.6875,
      "learning_rate": 9.035736398789706e-06,
      "loss": 1.0044,
      "step": 15470
    },
    {
      "epoch": 0.05425354067438413,
      "grad_norm": 3.25,
      "learning_rate": 9.04157758852323e-06,
      "loss": 1.1522,
      "step": 15480
    },
    {
      "epoch": 0.054288588181279726,
      "grad_norm": 3.34375,
      "learning_rate": 9.047418778256755e-06,
      "loss": 1.1206,
      "step": 15490
    },
    {
      "epoch": 0.054323635688175324,
      "grad_norm": 3.703125,
      "learning_rate": 9.053259967990282e-06,
      "loss": 1.0654,
      "step": 15500
    },
    {
      "epoch": 0.054358683195070916,
      "grad_norm": 3.109375,
      "learning_rate": 9.059101157723805e-06,
      "loss": 1.1484,
      "step": 15510
    },
    {
      "epoch": 0.054393730701966514,
      "grad_norm": 3.640625,
      "learning_rate": 9.064942347457332e-06,
      "loss": 1.0664,
      "step": 15520
    },
    {
      "epoch": 0.05442877820886211,
      "grad_norm": 3.328125,
      "learning_rate": 9.070783537190855e-06,
      "loss": 1.0701,
      "step": 15530
    },
    {
      "epoch": 0.05446382571575771,
      "grad_norm": 3.5,
      "learning_rate": 9.076624726924381e-06,
      "loss": 1.1404,
      "step": 15540
    },
    {
      "epoch": 0.05449887322265331,
      "grad_norm": 3.984375,
      "learning_rate": 9.082465916657904e-06,
      "loss": 1.1304,
      "step": 15550
    },
    {
      "epoch": 0.0545339207295489,
      "grad_norm": 3.484375,
      "learning_rate": 9.088307106391431e-06,
      "loss": 1.105,
      "step": 15560
    },
    {
      "epoch": 0.0545689682364445,
      "grad_norm": 3.609375,
      "learning_rate": 9.094148296124954e-06,
      "loss": 1.163,
      "step": 15570
    },
    {
      "epoch": 0.0546040157433401,
      "grad_norm": 3.640625,
      "learning_rate": 9.09998948585848e-06,
      "loss": 0.9955,
      "step": 15580
    },
    {
      "epoch": 0.054639063250235695,
      "grad_norm": 3.1875,
      "learning_rate": 9.105830675592004e-06,
      "loss": 1.068,
      "step": 15590
    },
    {
      "epoch": 0.05467411075713129,
      "grad_norm": 3.453125,
      "learning_rate": 9.11167186532553e-06,
      "loss": 1.0259,
      "step": 15600
    },
    {
      "epoch": 0.05470915826402689,
      "grad_norm": 3.25,
      "learning_rate": 9.117513055059055e-06,
      "loss": 1.1136,
      "step": 15610
    },
    {
      "epoch": 0.05474420577092248,
      "grad_norm": 3.890625,
      "learning_rate": 9.12335424479258e-06,
      "loss": 1.078,
      "step": 15620
    },
    {
      "epoch": 0.05477925327781808,
      "grad_norm": 3.390625,
      "learning_rate": 9.129195434526105e-06,
      "loss": 1.1408,
      "step": 15630
    },
    {
      "epoch": 0.05481430078471368,
      "grad_norm": 3.578125,
      "learning_rate": 9.13503662425963e-06,
      "loss": 1.1014,
      "step": 15640
    },
    {
      "epoch": 0.05484934829160928,
      "grad_norm": 3.671875,
      "learning_rate": 9.140877813993154e-06,
      "loss": 1.0033,
      "step": 15650
    },
    {
      "epoch": 0.054884395798504876,
      "grad_norm": 3.359375,
      "learning_rate": 9.14671900372668e-06,
      "loss": 1.0036,
      "step": 15660
    },
    {
      "epoch": 0.05491944330540047,
      "grad_norm": 3.734375,
      "learning_rate": 9.152560193460206e-06,
      "loss": 1.0817,
      "step": 15670
    },
    {
      "epoch": 0.054954490812296065,
      "grad_norm": 3.71875,
      "learning_rate": 9.158401383193729e-06,
      "loss": 1.0311,
      "step": 15680
    },
    {
      "epoch": 0.054989538319191664,
      "grad_norm": 3.28125,
      "learning_rate": 9.164242572927256e-06,
      "loss": 1.0685,
      "step": 15690
    },
    {
      "epoch": 0.05502458582608726,
      "grad_norm": 3.5625,
      "learning_rate": 9.170083762660779e-06,
      "loss": 1.1587,
      "step": 15700
    },
    {
      "epoch": 0.05505963333298286,
      "grad_norm": 3.796875,
      "learning_rate": 9.175924952394305e-06,
      "loss": 1.1597,
      "step": 15710
    },
    {
      "epoch": 0.05509468083987846,
      "grad_norm": 3.859375,
      "learning_rate": 9.181766142127828e-06,
      "loss": 1.077,
      "step": 15720
    },
    {
      "epoch": 0.05512972834677405,
      "grad_norm": 3.796875,
      "learning_rate": 9.187607331861355e-06,
      "loss": 1.0609,
      "step": 15730
    },
    {
      "epoch": 0.05516477585366965,
      "grad_norm": 2.90625,
      "learning_rate": 9.193448521594878e-06,
      "loss": 1.0311,
      "step": 15740
    },
    {
      "epoch": 0.055199823360565246,
      "grad_norm": 3.375,
      "learning_rate": 9.199289711328405e-06,
      "loss": 1.1154,
      "step": 15750
    },
    {
      "epoch": 0.055234870867460845,
      "grad_norm": 3.21875,
      "learning_rate": 9.205130901061928e-06,
      "loss": 1.0803,
      "step": 15760
    },
    {
      "epoch": 0.05526991837435644,
      "grad_norm": 3.65625,
      "learning_rate": 9.210972090795454e-06,
      "loss": 1.0761,
      "step": 15770
    },
    {
      "epoch": 0.055304965881252034,
      "grad_norm": 3.578125,
      "learning_rate": 9.21681328052898e-06,
      "loss": 1.114,
      "step": 15780
    },
    {
      "epoch": 0.05534001338814763,
      "grad_norm": 3.8125,
      "learning_rate": 9.222654470262504e-06,
      "loss": 1.0317,
      "step": 15790
    },
    {
      "epoch": 0.05537506089504323,
      "grad_norm": 3.296875,
      "learning_rate": 9.228495659996029e-06,
      "loss": 1.0942,
      "step": 15800
    },
    {
      "epoch": 0.05541010840193883,
      "grad_norm": 3.65625,
      "learning_rate": 9.234336849729554e-06,
      "loss": 1.1167,
      "step": 15810
    },
    {
      "epoch": 0.05544515590883443,
      "grad_norm": 3.640625,
      "learning_rate": 9.240178039463079e-06,
      "loss": 1.0557,
      "step": 15820
    },
    {
      "epoch": 0.05548020341573002,
      "grad_norm": 3.3125,
      "learning_rate": 9.246019229196603e-06,
      "loss": 1.147,
      "step": 15830
    },
    {
      "epoch": 0.05551525092262562,
      "grad_norm": 3.65625,
      "learning_rate": 9.251860418930128e-06,
      "loss": 1.1228,
      "step": 15840
    },
    {
      "epoch": 0.055550298429521215,
      "grad_norm": 3.734375,
      "learning_rate": 9.257701608663653e-06,
      "loss": 1.1489,
      "step": 15850
    },
    {
      "epoch": 0.05558534593641681,
      "grad_norm": 3.34375,
      "learning_rate": 9.263542798397178e-06,
      "loss": 1.0826,
      "step": 15860
    },
    {
      "epoch": 0.05562039344331241,
      "grad_norm": 3.59375,
      "learning_rate": 9.269383988130703e-06,
      "loss": 1.0961,
      "step": 15870
    },
    {
      "epoch": 0.05565544095020801,
      "grad_norm": 3.21875,
      "learning_rate": 9.275225177864228e-06,
      "loss": 1.0565,
      "step": 15880
    },
    {
      "epoch": 0.0556904884571036,
      "grad_norm": 3.328125,
      "learning_rate": 9.281066367597752e-06,
      "loss": 1.0512,
      "step": 15890
    },
    {
      "epoch": 0.0557255359639992,
      "grad_norm": 3.5,
      "learning_rate": 9.286907557331277e-06,
      "loss": 1.0628,
      "step": 15900
    },
    {
      "epoch": 0.0557605834708948,
      "grad_norm": 3.671875,
      "learning_rate": 9.292748747064802e-06,
      "loss": 1.1071,
      "step": 15910
    },
    {
      "epoch": 0.055795630977790396,
      "grad_norm": 3.3125,
      "learning_rate": 9.298589936798327e-06,
      "loss": 1.0368,
      "step": 15920
    },
    {
      "epoch": 0.055830678484685994,
      "grad_norm": 3.65625,
      "learning_rate": 9.304431126531852e-06,
      "loss": 1.0878,
      "step": 15930
    },
    {
      "epoch": 0.055865725991581586,
      "grad_norm": 3.46875,
      "learning_rate": 9.310272316265377e-06,
      "loss": 1.0942,
      "step": 15940
    },
    {
      "epoch": 0.055900773498477184,
      "grad_norm": 3.890625,
      "learning_rate": 9.316113505998903e-06,
      "loss": 1.1309,
      "step": 15950
    },
    {
      "epoch": 0.05593582100537278,
      "grad_norm": 3.375,
      "learning_rate": 9.321954695732426e-06,
      "loss": 0.9832,
      "step": 15960
    },
    {
      "epoch": 0.05597086851226838,
      "grad_norm": 3.25,
      "learning_rate": 9.327795885465953e-06,
      "loss": 1.0893,
      "step": 15970
    },
    {
      "epoch": 0.05600591601916398,
      "grad_norm": 3.625,
      "learning_rate": 9.333637075199476e-06,
      "loss": 1.0747,
      "step": 15980
    },
    {
      "epoch": 0.05604096352605958,
      "grad_norm": 3.328125,
      "learning_rate": 9.339478264933003e-06,
      "loss": 1.0383,
      "step": 15990
    },
    {
      "epoch": 0.05607601103295517,
      "grad_norm": 3.75,
      "learning_rate": 9.345319454666526e-06,
      "loss": 1.0209,
      "step": 16000
    },
    {
      "epoch": 0.05611105853985077,
      "grad_norm": 3.3125,
      "learning_rate": 9.351160644400052e-06,
      "loss": 1.0631,
      "step": 16010
    },
    {
      "epoch": 0.056146106046746365,
      "grad_norm": 3.625,
      "learning_rate": 9.357001834133577e-06,
      "loss": 1.0887,
      "step": 16020
    },
    {
      "epoch": 0.05618115355364196,
      "grad_norm": 3.34375,
      "learning_rate": 9.362843023867102e-06,
      "loss": 1.0116,
      "step": 16030
    },
    {
      "epoch": 0.05621620106053756,
      "grad_norm": 3.296875,
      "learning_rate": 9.368684213600627e-06,
      "loss": 1.0893,
      "step": 16040
    },
    {
      "epoch": 0.05625124856743315,
      "grad_norm": 3.75,
      "learning_rate": 9.374525403334152e-06,
      "loss": 1.0312,
      "step": 16050
    },
    {
      "epoch": 0.05628629607432875,
      "grad_norm": 3.71875,
      "learning_rate": 9.380366593067677e-06,
      "loss": 1.1232,
      "step": 16060
    },
    {
      "epoch": 0.05632134358122435,
      "grad_norm": 3.453125,
      "learning_rate": 9.386207782801201e-06,
      "loss": 1.0677,
      "step": 16070
    },
    {
      "epoch": 0.05635639108811995,
      "grad_norm": 3.4375,
      "learning_rate": 9.392048972534726e-06,
      "loss": 1.0631,
      "step": 16080
    },
    {
      "epoch": 0.056391438595015546,
      "grad_norm": 3.34375,
      "learning_rate": 9.397890162268251e-06,
      "loss": 1.0611,
      "step": 16090
    },
    {
      "epoch": 0.05642648610191114,
      "grad_norm": 3.765625,
      "learning_rate": 9.403731352001776e-06,
      "loss": 1.09,
      "step": 16100
    },
    {
      "epoch": 0.056461533608806735,
      "grad_norm": 3.625,
      "learning_rate": 9.4095725417353e-06,
      "loss": 1.0892,
      "step": 16110
    },
    {
      "epoch": 0.056496581115702334,
      "grad_norm": 3.546875,
      "learning_rate": 9.415413731468827e-06,
      "loss": 1.1041,
      "step": 16120
    },
    {
      "epoch": 0.05653162862259793,
      "grad_norm": 3.453125,
      "learning_rate": 9.42125492120235e-06,
      "loss": 1.0886,
      "step": 16130
    },
    {
      "epoch": 0.05656667612949353,
      "grad_norm": 3.453125,
      "learning_rate": 9.427096110935877e-06,
      "loss": 1.0651,
      "step": 16140
    },
    {
      "epoch": 0.05660172363638913,
      "grad_norm": 3.40625,
      "learning_rate": 9.4329373006694e-06,
      "loss": 1.0521,
      "step": 16150
    },
    {
      "epoch": 0.05663677114328472,
      "grad_norm": 3.546875,
      "learning_rate": 9.438778490402927e-06,
      "loss": 1.0469,
      "step": 16160
    },
    {
      "epoch": 0.05667181865018032,
      "grad_norm": 3.078125,
      "learning_rate": 9.44461968013645e-06,
      "loss": 1.056,
      "step": 16170
    },
    {
      "epoch": 0.056706866157075916,
      "grad_norm": 3.578125,
      "learning_rate": 9.450460869869976e-06,
      "loss": 1.0081,
      "step": 16180
    },
    {
      "epoch": 0.056741913663971515,
      "grad_norm": 3.3125,
      "learning_rate": 9.4563020596035e-06,
      "loss": 1.083,
      "step": 16190
    },
    {
      "epoch": 0.05677696117086711,
      "grad_norm": 3.1875,
      "learning_rate": 9.462143249337026e-06,
      "loss": 1.0236,
      "step": 16200
    },
    {
      "epoch": 0.056812008677762704,
      "grad_norm": 3.328125,
      "learning_rate": 9.46798443907055e-06,
      "loss": 1.0979,
      "step": 16210
    },
    {
      "epoch": 0.0568470561846583,
      "grad_norm": 3.4375,
      "learning_rate": 9.473825628804076e-06,
      "loss": 1.0568,
      "step": 16220
    },
    {
      "epoch": 0.0568821036915539,
      "grad_norm": 3.59375,
      "learning_rate": 9.4796668185376e-06,
      "loss": 1.1145,
      "step": 16230
    },
    {
      "epoch": 0.0569171511984495,
      "grad_norm": 3.390625,
      "learning_rate": 9.485508008271125e-06,
      "loss": 1.092,
      "step": 16240
    },
    {
      "epoch": 0.0569521987053451,
      "grad_norm": 3.640625,
      "learning_rate": 9.49134919800465e-06,
      "loss": 1.1922,
      "step": 16250
    },
    {
      "epoch": 0.056987246212240696,
      "grad_norm": 3.203125,
      "learning_rate": 9.497190387738175e-06,
      "loss": 1.1132,
      "step": 16260
    },
    {
      "epoch": 0.05702229371913629,
      "grad_norm": 3.4375,
      "learning_rate": 9.5030315774717e-06,
      "loss": 1.143,
      "step": 16270
    },
    {
      "epoch": 0.057057341226031885,
      "grad_norm": 3.34375,
      "learning_rate": 9.508872767205225e-06,
      "loss": 1.124,
      "step": 16280
    },
    {
      "epoch": 0.05709238873292748,
      "grad_norm": 3.21875,
      "learning_rate": 9.51471395693875e-06,
      "loss": 1.0211,
      "step": 16290
    },
    {
      "epoch": 0.05712743623982308,
      "grad_norm": 3.8125,
      "learning_rate": 9.520555146672274e-06,
      "loss": 1.087,
      "step": 16300
    },
    {
      "epoch": 0.05716248374671868,
      "grad_norm": 3.359375,
      "learning_rate": 9.5263963364058e-06,
      "loss": 1.0754,
      "step": 16310
    },
    {
      "epoch": 0.05719753125361427,
      "grad_norm": 3.078125,
      "learning_rate": 9.532237526139324e-06,
      "loss": 1.089,
      "step": 16320
    },
    {
      "epoch": 0.05723257876050987,
      "grad_norm": 3.234375,
      "learning_rate": 9.538078715872849e-06,
      "loss": 1.0954,
      "step": 16330
    },
    {
      "epoch": 0.05726762626740547,
      "grad_norm": 3.34375,
      "learning_rate": 9.543919905606374e-06,
      "loss": 1.0508,
      "step": 16340
    },
    {
      "epoch": 0.057302673774301066,
      "grad_norm": 3.6875,
      "learning_rate": 9.549761095339899e-06,
      "loss": 1.0043,
      "step": 16350
    },
    {
      "epoch": 0.057337721281196664,
      "grad_norm": 3.4375,
      "learning_rate": 9.555602285073424e-06,
      "loss": 1.1144,
      "step": 16360
    },
    {
      "epoch": 0.057372768788092256,
      "grad_norm": 3.4375,
      "learning_rate": 9.56144347480695e-06,
      "loss": 1.157,
      "step": 16370
    },
    {
      "epoch": 0.057407816294987854,
      "grad_norm": 3.59375,
      "learning_rate": 9.567284664540473e-06,
      "loss": 1.1026,
      "step": 16380
    },
    {
      "epoch": 0.05744286380188345,
      "grad_norm": 3.5,
      "learning_rate": 9.573125854274e-06,
      "loss": 1.1129,
      "step": 16390
    },
    {
      "epoch": 0.05747791130877905,
      "grad_norm": 3.703125,
      "learning_rate": 9.578967044007525e-06,
      "loss": 1.1504,
      "step": 16400
    },
    {
      "epoch": 0.05751295881567465,
      "grad_norm": 3.90625,
      "learning_rate": 9.58480823374105e-06,
      "loss": 1.1396,
      "step": 16410
    },
    {
      "epoch": 0.05754800632257025,
      "grad_norm": 4.0,
      "learning_rate": 9.590649423474574e-06,
      "loss": 1.0418,
      "step": 16420
    },
    {
      "epoch": 0.05758305382946584,
      "grad_norm": 3.4375,
      "learning_rate": 9.596490613208099e-06,
      "loss": 1.0745,
      "step": 16430
    },
    {
      "epoch": 0.05761810133636144,
      "grad_norm": 3.4375,
      "learning_rate": 9.602331802941624e-06,
      "loss": 1.0831,
      "step": 16440
    },
    {
      "epoch": 0.057653148843257035,
      "grad_norm": 3.8125,
      "learning_rate": 9.608172992675149e-06,
      "loss": 1.1043,
      "step": 16450
    },
    {
      "epoch": 0.05768819635015263,
      "grad_norm": 3.21875,
      "learning_rate": 9.614014182408674e-06,
      "loss": 1.1262,
      "step": 16460
    },
    {
      "epoch": 0.05772324385704823,
      "grad_norm": 3.6875,
      "learning_rate": 9.619855372142199e-06,
      "loss": 1.0835,
      "step": 16470
    },
    {
      "epoch": 0.05775829136394382,
      "grad_norm": 3.34375,
      "learning_rate": 9.625696561875723e-06,
      "loss": 1.1015,
      "step": 16480
    },
    {
      "epoch": 0.05779333887083942,
      "grad_norm": 3.671875,
      "learning_rate": 9.631537751609248e-06,
      "loss": 1.1487,
      "step": 16490
    },
    {
      "epoch": 0.05782838637773502,
      "grad_norm": 4.03125,
      "learning_rate": 9.637378941342773e-06,
      "loss": 1.1606,
      "step": 16500
    },
    {
      "epoch": 0.05786343388463062,
      "grad_norm": 3.484375,
      "learning_rate": 9.643220131076298e-06,
      "loss": 1.1658,
      "step": 16510
    },
    {
      "epoch": 0.057898481391526216,
      "grad_norm": 3.765625,
      "learning_rate": 9.649061320809823e-06,
      "loss": 1.019,
      "step": 16520
    },
    {
      "epoch": 0.057933528898421814,
      "grad_norm": 3.640625,
      "learning_rate": 9.654902510543348e-06,
      "loss": 1.0866,
      "step": 16530
    },
    {
      "epoch": 0.057968576405317405,
      "grad_norm": 3.375,
      "learning_rate": 9.660743700276872e-06,
      "loss": 1.1002,
      "step": 16540
    },
    {
      "epoch": 0.058003623912213004,
      "grad_norm": 3.8125,
      "learning_rate": 9.666584890010397e-06,
      "loss": 1.061,
      "step": 16550
    },
    {
      "epoch": 0.0580386714191086,
      "grad_norm": 3.453125,
      "learning_rate": 9.672426079743922e-06,
      "loss": 1.0517,
      "step": 16560
    },
    {
      "epoch": 0.0580737189260042,
      "grad_norm": 2.78125,
      "learning_rate": 9.678267269477449e-06,
      "loss": 1.1005,
      "step": 16570
    },
    {
      "epoch": 0.0581087664328998,
      "grad_norm": 3.5625,
      "learning_rate": 9.684108459210972e-06,
      "loss": 1.0715,
      "step": 16580
    },
    {
      "epoch": 0.05814381393979539,
      "grad_norm": 3.6875,
      "learning_rate": 9.689949648944498e-06,
      "loss": 1.1084,
      "step": 16590
    },
    {
      "epoch": 0.05817886144669099,
      "grad_norm": 3.375,
      "learning_rate": 9.695790838678021e-06,
      "loss": 1.0936,
      "step": 16600
    },
    {
      "epoch": 0.058213908953586586,
      "grad_norm": 3.203125,
      "learning_rate": 9.701632028411548e-06,
      "loss": 1.0963,
      "step": 16610
    },
    {
      "epoch": 0.058248956460482185,
      "grad_norm": 3.34375,
      "learning_rate": 9.707473218145071e-06,
      "loss": 1.0868,
      "step": 16620
    },
    {
      "epoch": 0.05828400396737778,
      "grad_norm": 3.59375,
      "learning_rate": 9.713314407878598e-06,
      "loss": 1.0334,
      "step": 16630
    },
    {
      "epoch": 0.058319051474273374,
      "grad_norm": 3.578125,
      "learning_rate": 9.719155597612121e-06,
      "loss": 1.1011,
      "step": 16640
    },
    {
      "epoch": 0.05835409898116897,
      "grad_norm": 3.40625,
      "learning_rate": 9.724996787345647e-06,
      "loss": 1.0641,
      "step": 16650
    },
    {
      "epoch": 0.05838914648806457,
      "grad_norm": 3.4375,
      "learning_rate": 9.73083797707917e-06,
      "loss": 1.0541,
      "step": 16660
    },
    {
      "epoch": 0.05842419399496017,
      "grad_norm": 3.59375,
      "learning_rate": 9.736679166812697e-06,
      "loss": 1.0451,
      "step": 16670
    },
    {
      "epoch": 0.05845924150185577,
      "grad_norm": 3.34375,
      "learning_rate": 9.742520356546222e-06,
      "loss": 0.9827,
      "step": 16680
    },
    {
      "epoch": 0.058494289008751366,
      "grad_norm": 3.578125,
      "learning_rate": 9.748361546279747e-06,
      "loss": 0.9576,
      "step": 16690
    },
    {
      "epoch": 0.05852933651564696,
      "grad_norm": 3.390625,
      "learning_rate": 9.754202736013272e-06,
      "loss": 0.9926,
      "step": 16700
    },
    {
      "epoch": 0.058564384022542555,
      "grad_norm": 3.625,
      "learning_rate": 9.760043925746796e-06,
      "loss": 1.1294,
      "step": 16710
    },
    {
      "epoch": 0.058599431529438153,
      "grad_norm": 3.34375,
      "learning_rate": 9.765885115480321e-06,
      "loss": 1.0216,
      "step": 16720
    },
    {
      "epoch": 0.05863447903633375,
      "grad_norm": 3.0625,
      "learning_rate": 9.771726305213846e-06,
      "loss": 1.1476,
      "step": 16730
    },
    {
      "epoch": 0.05866952654322935,
      "grad_norm": 3.71875,
      "learning_rate": 9.777567494947373e-06,
      "loss": 1.0698,
      "step": 16740
    },
    {
      "epoch": 0.05870457405012494,
      "grad_norm": 3.265625,
      "learning_rate": 9.783408684680896e-06,
      "loss": 1.0826,
      "step": 16750
    },
    {
      "epoch": 0.05873962155702054,
      "grad_norm": 6.03125,
      "learning_rate": 9.789249874414422e-06,
      "loss": 1.0908,
      "step": 16760
    },
    {
      "epoch": 0.05877466906391614,
      "grad_norm": 3.109375,
      "learning_rate": 9.795091064147946e-06,
      "loss": 1.0275,
      "step": 16770
    },
    {
      "epoch": 0.058809716570811736,
      "grad_norm": 3.421875,
      "learning_rate": 9.800932253881472e-06,
      "loss": 1.0795,
      "step": 16780
    },
    {
      "epoch": 0.058844764077707334,
      "grad_norm": 3.71875,
      "learning_rate": 9.806773443614995e-06,
      "loss": 1.1201,
      "step": 16790
    },
    {
      "epoch": 0.05887981158460293,
      "grad_norm": 3.703125,
      "learning_rate": 9.812614633348522e-06,
      "loss": 1.0557,
      "step": 16800
    },
    {
      "epoch": 0.058914859091498524,
      "grad_norm": 3.140625,
      "learning_rate": 9.818455823082045e-06,
      "loss": 1.0291,
      "step": 16810
    },
    {
      "epoch": 0.05894990659839412,
      "grad_norm": 3.84375,
      "learning_rate": 9.824297012815571e-06,
      "loss": 1.0571,
      "step": 16820
    },
    {
      "epoch": 0.05898495410528972,
      "grad_norm": 3.578125,
      "learning_rate": 9.830138202549095e-06,
      "loss": 1.0663,
      "step": 16830
    },
    {
      "epoch": 0.05902000161218532,
      "grad_norm": 3.9375,
      "learning_rate": 9.835979392282621e-06,
      "loss": 1.1654,
      "step": 16840
    },
    {
      "epoch": 0.05905504911908092,
      "grad_norm": 3.296875,
      "learning_rate": 9.841820582016146e-06,
      "loss": 1.0594,
      "step": 16850
    },
    {
      "epoch": 0.05909009662597651,
      "grad_norm": 3.671875,
      "learning_rate": 9.84766177174967e-06,
      "loss": 1.0528,
      "step": 16860
    },
    {
      "epoch": 0.05912514413287211,
      "grad_norm": 3.296875,
      "learning_rate": 9.853502961483196e-06,
      "loss": 1.1084,
      "step": 16870
    },
    {
      "epoch": 0.059160191639767705,
      "grad_norm": 3.5,
      "learning_rate": 9.85934415121672e-06,
      "loss": 1.1124,
      "step": 16880
    },
    {
      "epoch": 0.0591952391466633,
      "grad_norm": 3.34375,
      "learning_rate": 9.865185340950245e-06,
      "loss": 1.0393,
      "step": 16890
    },
    {
      "epoch": 0.0592302866535589,
      "grad_norm": 3.390625,
      "learning_rate": 9.87102653068377e-06,
      "loss": 1.1265,
      "step": 16900
    },
    {
      "epoch": 0.05926533416045449,
      "grad_norm": 3.71875,
      "learning_rate": 9.876867720417295e-06,
      "loss": 1.0255,
      "step": 16910
    },
    {
      "epoch": 0.05930038166735009,
      "grad_norm": 3.421875,
      "learning_rate": 9.88270891015082e-06,
      "loss": 1.0408,
      "step": 16920
    },
    {
      "epoch": 0.05933542917424569,
      "grad_norm": 3.46875,
      "learning_rate": 9.888550099884345e-06,
      "loss": 1.0987,
      "step": 16930
    },
    {
      "epoch": 0.05937047668114129,
      "grad_norm": 3.234375,
      "learning_rate": 9.89439128961787e-06,
      "loss": 1.1326,
      "step": 16940
    },
    {
      "epoch": 0.059405524188036886,
      "grad_norm": 3.03125,
      "learning_rate": 9.900232479351394e-06,
      "loss": 1.0696,
      "step": 16950
    },
    {
      "epoch": 0.059440571694932484,
      "grad_norm": 3.546875,
      "learning_rate": 9.90607366908492e-06,
      "loss": 1.1059,
      "step": 16960
    },
    {
      "epoch": 0.059475619201828075,
      "grad_norm": 3.328125,
      "learning_rate": 9.911914858818444e-06,
      "loss": 1.0461,
      "step": 16970
    },
    {
      "epoch": 0.059510666708723674,
      "grad_norm": 3.21875,
      "learning_rate": 9.917756048551969e-06,
      "loss": 1.0839,
      "step": 16980
    },
    {
      "epoch": 0.05954571421561927,
      "grad_norm": 3.640625,
      "learning_rate": 9.923597238285494e-06,
      "loss": 1.1724,
      "step": 16990
    },
    {
      "epoch": 0.05958076172251487,
      "grad_norm": 3.625,
      "learning_rate": 9.929438428019019e-06,
      "loss": 1.1561,
      "step": 17000
    },
    {
      "epoch": 0.05961580922941047,
      "grad_norm": 3.3125,
      "learning_rate": 9.935279617752544e-06,
      "loss": 1.0249,
      "step": 17010
    },
    {
      "epoch": 0.05965085673630606,
      "grad_norm": 3.5,
      "learning_rate": 9.94112080748607e-06,
      "loss": 1.0957,
      "step": 17020
    },
    {
      "epoch": 0.05968590424320166,
      "grad_norm": 3.359375,
      "learning_rate": 9.946961997219593e-06,
      "loss": 1.042,
      "step": 17030
    },
    {
      "epoch": 0.059720951750097256,
      "grad_norm": 3.4375,
      "learning_rate": 9.95280318695312e-06,
      "loss": 1.0221,
      "step": 17040
    },
    {
      "epoch": 0.059755999256992855,
      "grad_norm": 4.21875,
      "learning_rate": 9.958644376686645e-06,
      "loss": 1.1455,
      "step": 17050
    },
    {
      "epoch": 0.05979104676388845,
      "grad_norm": 3.15625,
      "learning_rate": 9.96448556642017e-06,
      "loss": 0.9698,
      "step": 17060
    },
    {
      "epoch": 0.05982609427078405,
      "grad_norm": 3.375,
      "learning_rate": 9.970326756153694e-06,
      "loss": 1.1419,
      "step": 17070
    },
    {
      "epoch": 0.05986114177767964,
      "grad_norm": 3.671875,
      "learning_rate": 9.976167945887219e-06,
      "loss": 1.068,
      "step": 17080
    },
    {
      "epoch": 0.05989618928457524,
      "grad_norm": 3.796875,
      "learning_rate": 9.982009135620744e-06,
      "loss": 1.0543,
      "step": 17090
    },
    {
      "epoch": 0.05993123679147084,
      "grad_norm": 3.390625,
      "learning_rate": 9.987850325354269e-06,
      "loss": 1.043,
      "step": 17100
    },
    {
      "epoch": 0.05996628429836644,
      "grad_norm": 3.328125,
      "learning_rate": 9.993691515087794e-06,
      "loss": 1.0702,
      "step": 17110
    },
    {
      "epoch": 0.060001331805262036,
      "grad_norm": 3.515625,
      "learning_rate": 9.999532704821318e-06,
      "loss": 1.0697,
      "step": 17120
    },
    {
      "epoch": 0.06003637931215763,
      "grad_norm": 3.453125,
      "learning_rate": 1.0005373894554843e-05,
      "loss": 1.0471,
      "step": 17130
    },
    {
      "epoch": 0.060071426819053225,
      "grad_norm": 3.796875,
      "learning_rate": 1.0011215084288368e-05,
      "loss": 1.1201,
      "step": 17140
    },
    {
      "epoch": 0.060106474325948824,
      "grad_norm": 3.3125,
      "learning_rate": 1.0017056274021893e-05,
      "loss": 1.0712,
      "step": 17150
    },
    {
      "epoch": 0.06014152183284442,
      "grad_norm": 3.359375,
      "learning_rate": 1.0022897463755418e-05,
      "loss": 1.0648,
      "step": 17160
    },
    {
      "epoch": 0.06017656933974002,
      "grad_norm": 3.515625,
      "learning_rate": 1.0028738653488943e-05,
      "loss": 1.1024,
      "step": 17170
    },
    {
      "epoch": 0.06021161684663561,
      "grad_norm": 3.125,
      "learning_rate": 1.0034579843222468e-05,
      "loss": 1.1382,
      "step": 17180
    },
    {
      "epoch": 0.06024666435353121,
      "grad_norm": 3.515625,
      "learning_rate": 1.0040421032955994e-05,
      "loss": 1.0321,
      "step": 17190
    },
    {
      "epoch": 0.06028171186042681,
      "grad_norm": 3.390625,
      "learning_rate": 1.0046262222689517e-05,
      "loss": 1.0641,
      "step": 17200
    },
    {
      "epoch": 0.060316759367322406,
      "grad_norm": 3.765625,
      "learning_rate": 1.0052103412423044e-05,
      "loss": 1.0895,
      "step": 17210
    },
    {
      "epoch": 0.060351806874218004,
      "grad_norm": 3.5,
      "learning_rate": 1.0057944602156567e-05,
      "loss": 1.1401,
      "step": 17220
    },
    {
      "epoch": 0.0603868543811136,
      "grad_norm": 3.546875,
      "learning_rate": 1.0063785791890093e-05,
      "loss": 1.1212,
      "step": 17230
    },
    {
      "epoch": 0.060421901888009194,
      "grad_norm": 3.34375,
      "learning_rate": 1.0069626981623617e-05,
      "loss": 1.0347,
      "step": 17240
    },
    {
      "epoch": 0.06045694939490479,
      "grad_norm": 3.21875,
      "learning_rate": 1.0075468171357143e-05,
      "loss": 1.1416,
      "step": 17250
    },
    {
      "epoch": 0.06049199690180039,
      "grad_norm": 3.453125,
      "learning_rate": 1.0081309361090666e-05,
      "loss": 1.0643,
      "step": 17260
    },
    {
      "epoch": 0.06052704440869599,
      "grad_norm": 3.296875,
      "learning_rate": 1.0087150550824193e-05,
      "loss": 1.0643,
      "step": 17270
    },
    {
      "epoch": 0.06056209191559159,
      "grad_norm": 3.46875,
      "learning_rate": 1.0092991740557716e-05,
      "loss": 1.0306,
      "step": 17280
    },
    {
      "epoch": 0.06059713942248718,
      "grad_norm": 3.09375,
      "learning_rate": 1.0098832930291243e-05,
      "loss": 0.9609,
      "step": 17290
    },
    {
      "epoch": 0.06063218692938278,
      "grad_norm": 4.03125,
      "learning_rate": 1.0104674120024767e-05,
      "loss": 1.0925,
      "step": 17300
    },
    {
      "epoch": 0.060667234436278375,
      "grad_norm": 3.578125,
      "learning_rate": 1.0110515309758292e-05,
      "loss": 1.0427,
      "step": 17310
    },
    {
      "epoch": 0.06070228194317397,
      "grad_norm": 3.4375,
      "learning_rate": 1.0116356499491817e-05,
      "loss": 1.1852,
      "step": 17320
    },
    {
      "epoch": 0.06073732945006957,
      "grad_norm": 3.40625,
      "learning_rate": 1.0122197689225342e-05,
      "loss": 1.0768,
      "step": 17330
    },
    {
      "epoch": 0.06077237695696516,
      "grad_norm": 3.46875,
      "learning_rate": 1.0128038878958867e-05,
      "loss": 1.0464,
      "step": 17340
    },
    {
      "epoch": 0.06080742446386076,
      "grad_norm": 3.40625,
      "learning_rate": 1.0133880068692392e-05,
      "loss": 1.0341,
      "step": 17350
    },
    {
      "epoch": 0.06084247197075636,
      "grad_norm": 3.25,
      "learning_rate": 1.0139721258425916e-05,
      "loss": 1.136,
      "step": 17360
    },
    {
      "epoch": 0.06087751947765196,
      "grad_norm": 3.171875,
      "learning_rate": 1.0145562448159441e-05,
      "loss": 1.1126,
      "step": 17370
    },
    {
      "epoch": 0.060912566984547556,
      "grad_norm": 3.296875,
      "learning_rate": 1.0151403637892966e-05,
      "loss": 1.0593,
      "step": 17380
    },
    {
      "epoch": 0.060947614491443154,
      "grad_norm": 3.421875,
      "learning_rate": 1.0157244827626491e-05,
      "loss": 1.0465,
      "step": 17390
    },
    {
      "epoch": 0.060982661998338746,
      "grad_norm": 3.609375,
      "learning_rate": 1.0163086017360018e-05,
      "loss": 1.0659,
      "step": 17400
    },
    {
      "epoch": 0.061017709505234344,
      "grad_norm": 3.765625,
      "learning_rate": 1.016892720709354e-05,
      "loss": 1.0445,
      "step": 17410
    },
    {
      "epoch": 0.06105275701212994,
      "grad_norm": 3.1875,
      "learning_rate": 1.0174768396827067e-05,
      "loss": 1.0514,
      "step": 17420
    },
    {
      "epoch": 0.06108780451902554,
      "grad_norm": 3.78125,
      "learning_rate": 1.018060958656059e-05,
      "loss": 1.1064,
      "step": 17430
    },
    {
      "epoch": 0.06112285202592114,
      "grad_norm": 4.3125,
      "learning_rate": 1.0186450776294117e-05,
      "loss": 1.1008,
      "step": 17440
    },
    {
      "epoch": 0.06115789953281673,
      "grad_norm": 3.515625,
      "learning_rate": 1.019229196602764e-05,
      "loss": 1.0784,
      "step": 17450
    },
    {
      "epoch": 0.06119294703971233,
      "grad_norm": 4.0,
      "learning_rate": 1.0198133155761167e-05,
      "loss": 1.1057,
      "step": 17460
    },
    {
      "epoch": 0.061227994546607926,
      "grad_norm": 3.84375,
      "learning_rate": 1.0203974345494691e-05,
      "loss": 1.0948,
      "step": 17470
    },
    {
      "epoch": 0.061263042053503525,
      "grad_norm": 3.296875,
      "learning_rate": 1.0209815535228216e-05,
      "loss": 1.0738,
      "step": 17480
    },
    {
      "epoch": 0.06129808956039912,
      "grad_norm": 3.5625,
      "learning_rate": 1.0215656724961741e-05,
      "loss": 1.079,
      "step": 17490
    },
    {
      "epoch": 0.06133313706729472,
      "grad_norm": 3.21875,
      "learning_rate": 1.0221497914695266e-05,
      "loss": 1.0713,
      "step": 17500
    },
    {
      "epoch": 0.06136818457419031,
      "grad_norm": 3.6875,
      "learning_rate": 1.022733910442879e-05,
      "loss": 1.1008,
      "step": 17510
    },
    {
      "epoch": 0.06140323208108591,
      "grad_norm": 3.28125,
      "learning_rate": 1.0233180294162316e-05,
      "loss": 1.0913,
      "step": 17520
    },
    {
      "epoch": 0.06143827958798151,
      "grad_norm": 3.953125,
      "learning_rate": 1.023902148389584e-05,
      "loss": 1.0832,
      "step": 17530
    },
    {
      "epoch": 0.06147332709487711,
      "grad_norm": 3.203125,
      "learning_rate": 1.0244862673629365e-05,
      "loss": 0.9536,
      "step": 17540
    },
    {
      "epoch": 0.061508374601772706,
      "grad_norm": 3.609375,
      "learning_rate": 1.025070386336289e-05,
      "loss": 1.0451,
      "step": 17550
    },
    {
      "epoch": 0.0615434221086683,
      "grad_norm": 3.40625,
      "learning_rate": 1.0256545053096415e-05,
      "loss": 1.1544,
      "step": 17560
    },
    {
      "epoch": 0.061578469615563895,
      "grad_norm": 3.671875,
      "learning_rate": 1.026238624282994e-05,
      "loss": 1.0302,
      "step": 17570
    },
    {
      "epoch": 0.061613517122459494,
      "grad_norm": 3.484375,
      "learning_rate": 1.0268227432563465e-05,
      "loss": 1.0543,
      "step": 17580
    },
    {
      "epoch": 0.06164856462935509,
      "grad_norm": 3.6875,
      "learning_rate": 1.027406862229699e-05,
      "loss": 1.0757,
      "step": 17590
    },
    {
      "epoch": 0.06168361213625069,
      "grad_norm": 3.765625,
      "learning_rate": 1.0279909812030514e-05,
      "loss": 1.0363,
      "step": 17600
    },
    {
      "epoch": 0.06171865964314628,
      "grad_norm": 3.421875,
      "learning_rate": 1.028575100176404e-05,
      "loss": 1.1009,
      "step": 17610
    },
    {
      "epoch": 0.06175370715004188,
      "grad_norm": 3.140625,
      "learning_rate": 1.0291592191497564e-05,
      "loss": 1.0699,
      "step": 17620
    },
    {
      "epoch": 0.06178875465693748,
      "grad_norm": 3.359375,
      "learning_rate": 1.0297433381231089e-05,
      "loss": 1.0172,
      "step": 17630
    },
    {
      "epoch": 0.061823802163833076,
      "grad_norm": 7.71875,
      "learning_rate": 1.0303274570964615e-05,
      "loss": 1.0704,
      "step": 17640
    },
    {
      "epoch": 0.061858849670728674,
      "grad_norm": 3.34375,
      "learning_rate": 1.0309115760698139e-05,
      "loss": 1.0899,
      "step": 17650
    },
    {
      "epoch": 0.06189389717762427,
      "grad_norm": 3.75,
      "learning_rate": 1.0314956950431665e-05,
      "loss": 1.0377,
      "step": 17660
    },
    {
      "epoch": 0.061928944684519864,
      "grad_norm": 3.0,
      "learning_rate": 1.0320798140165188e-05,
      "loss": 1.0381,
      "step": 17670
    },
    {
      "epoch": 0.06196399219141546,
      "grad_norm": 3.203125,
      "learning_rate": 1.0326639329898715e-05,
      "loss": 1.0377,
      "step": 17680
    },
    {
      "epoch": 0.06199903969831106,
      "grad_norm": 3.65625,
      "learning_rate": 1.0332480519632238e-05,
      "loss": 1.064,
      "step": 17690
    },
    {
      "epoch": 0.06203408720520666,
      "grad_norm": 3.484375,
      "learning_rate": 1.0338321709365765e-05,
      "loss": 0.9959,
      "step": 17700
    },
    {
      "epoch": 0.06206913471210226,
      "grad_norm": 3.34375,
      "learning_rate": 1.0344162899099288e-05,
      "loss": 1.0604,
      "step": 17710
    },
    {
      "epoch": 0.06210418221899785,
      "grad_norm": 3.828125,
      "learning_rate": 1.0350004088832814e-05,
      "loss": 1.0623,
      "step": 17720
    },
    {
      "epoch": 0.06213922972589345,
      "grad_norm": 3.890625,
      "learning_rate": 1.0355845278566339e-05,
      "loss": 1.0354,
      "step": 17730
    },
    {
      "epoch": 0.062174277232789045,
      "grad_norm": 3.453125,
      "learning_rate": 1.0361686468299864e-05,
      "loss": 1.0419,
      "step": 17740
    },
    {
      "epoch": 0.06220932473968464,
      "grad_norm": 3.078125,
      "learning_rate": 1.0367527658033389e-05,
      "loss": 1.0819,
      "step": 17750
    },
    {
      "epoch": 0.06224437224658024,
      "grad_norm": 3.421875,
      "learning_rate": 1.0373368847766914e-05,
      "loss": 1.1075,
      "step": 17760
    },
    {
      "epoch": 0.06227941975347584,
      "grad_norm": 3.6875,
      "learning_rate": 1.0379210037500438e-05,
      "loss": 1.0172,
      "step": 17770
    },
    {
      "epoch": 0.06231446726037143,
      "grad_norm": 3.296875,
      "learning_rate": 1.0385051227233963e-05,
      "loss": 1.0891,
      "step": 17780
    },
    {
      "epoch": 0.06234951476726703,
      "grad_norm": 3.40625,
      "learning_rate": 1.0390892416967488e-05,
      "loss": 1.005,
      "step": 17790
    },
    {
      "epoch": 0.06238456227416263,
      "grad_norm": 3.75,
      "learning_rate": 1.0396733606701013e-05,
      "loss": 1.0972,
      "step": 17800
    },
    {
      "epoch": 0.062419609781058226,
      "grad_norm": 3.265625,
      "learning_rate": 1.040257479643454e-05,
      "loss": 1.1165,
      "step": 17810
    },
    {
      "epoch": 0.062454657287953824,
      "grad_norm": 3.796875,
      "learning_rate": 1.0408415986168063e-05,
      "loss": 1.121,
      "step": 17820
    },
    {
      "epoch": 0.062489704794849416,
      "grad_norm": 3.484375,
      "learning_rate": 1.041425717590159e-05,
      "loss": 1.007,
      "step": 17830
    },
    {
      "epoch": 0.06252475230174502,
      "grad_norm": 3.3125,
      "learning_rate": 1.0420098365635112e-05,
      "loss": 1.0116,
      "step": 17840
    },
    {
      "epoch": 0.06255979980864061,
      "grad_norm": 3.953125,
      "learning_rate": 1.0425939555368639e-05,
      "loss": 1.1079,
      "step": 17850
    },
    {
      "epoch": 0.0625948473155362,
      "grad_norm": 2.703125,
      "learning_rate": 1.0431780745102162e-05,
      "loss": 1.0376,
      "step": 17860
    },
    {
      "epoch": 0.06262989482243181,
      "grad_norm": 3.6875,
      "learning_rate": 1.0437621934835689e-05,
      "loss": 1.0997,
      "step": 17870
    },
    {
      "epoch": 0.0626649423293274,
      "grad_norm": 3.375,
      "learning_rate": 1.0443463124569212e-05,
      "loss": 1.0946,
      "step": 17880
    },
    {
      "epoch": 0.062699989836223,
      "grad_norm": 3.640625,
      "learning_rate": 1.0449304314302738e-05,
      "loss": 1.1541,
      "step": 17890
    },
    {
      "epoch": 0.0627350373431186,
      "grad_norm": 3.625,
      "learning_rate": 1.0455145504036261e-05,
      "loss": 1.0856,
      "step": 17900
    },
    {
      "epoch": 0.06277008485001419,
      "grad_norm": 3.453125,
      "learning_rate": 1.0460986693769788e-05,
      "loss": 1.1586,
      "step": 17910
    },
    {
      "epoch": 0.06280513235690979,
      "grad_norm": 3.34375,
      "learning_rate": 1.0466827883503313e-05,
      "loss": 1.0955,
      "step": 17920
    },
    {
      "epoch": 0.06284017986380538,
      "grad_norm": 3.296875,
      "learning_rate": 1.0472669073236838e-05,
      "loss": 1.0922,
      "step": 17930
    },
    {
      "epoch": 0.06287522737070099,
      "grad_norm": 3.890625,
      "learning_rate": 1.0478510262970363e-05,
      "loss": 1.0998,
      "step": 17940
    },
    {
      "epoch": 0.06291027487759658,
      "grad_norm": 3.546875,
      "learning_rate": 1.0484351452703887e-05,
      "loss": 1.0684,
      "step": 17950
    },
    {
      "epoch": 0.06294532238449219,
      "grad_norm": 2.96875,
      "learning_rate": 1.0490192642437412e-05,
      "loss": 0.9659,
      "step": 17960
    },
    {
      "epoch": 0.06298036989138778,
      "grad_norm": 3.28125,
      "learning_rate": 1.0496033832170937e-05,
      "loss": 1.1193,
      "step": 17970
    },
    {
      "epoch": 0.06301541739828337,
      "grad_norm": 3.15625,
      "learning_rate": 1.0501875021904462e-05,
      "loss": 1.0939,
      "step": 17980
    },
    {
      "epoch": 0.06305046490517897,
      "grad_norm": 3.359375,
      "learning_rate": 1.0507716211637987e-05,
      "loss": 1.1484,
      "step": 17990
    },
    {
      "epoch": 0.06308551241207457,
      "grad_norm": 3.765625,
      "learning_rate": 1.0513557401371512e-05,
      "loss": 1.1124,
      "step": 18000
    },
    {
      "epoch": 0.06312055991897017,
      "grad_norm": 3.625,
      "learning_rate": 1.0519398591105036e-05,
      "loss": 1.1269,
      "step": 18010
    },
    {
      "epoch": 0.06315560742586576,
      "grad_norm": 3.71875,
      "learning_rate": 1.0525239780838561e-05,
      "loss": 1.0779,
      "step": 18020
    },
    {
      "epoch": 0.06319065493276135,
      "grad_norm": 3.640625,
      "learning_rate": 1.0531080970572086e-05,
      "loss": 1.0785,
      "step": 18030
    },
    {
      "epoch": 0.06322570243965696,
      "grad_norm": 3.5,
      "learning_rate": 1.0536922160305611e-05,
      "loss": 1.0818,
      "step": 18040
    },
    {
      "epoch": 0.06326074994655255,
      "grad_norm": 3.484375,
      "learning_rate": 1.0542763350039136e-05,
      "loss": 1.1505,
      "step": 18050
    },
    {
      "epoch": 0.06329579745344815,
      "grad_norm": 3.234375,
      "learning_rate": 1.054860453977266e-05,
      "loss": 1.1269,
      "step": 18060
    },
    {
      "epoch": 0.06333084496034375,
      "grad_norm": 3.375,
      "learning_rate": 1.0554445729506185e-05,
      "loss": 1.1018,
      "step": 18070
    },
    {
      "epoch": 0.06336589246723934,
      "grad_norm": 3.75,
      "learning_rate": 1.0560286919239712e-05,
      "loss": 1.0931,
      "step": 18080
    },
    {
      "epoch": 0.06340093997413494,
      "grad_norm": 3.265625,
      "learning_rate": 1.0566128108973237e-05,
      "loss": 1.0889,
      "step": 18090
    },
    {
      "epoch": 0.06343598748103053,
      "grad_norm": 3.859375,
      "learning_rate": 1.0571969298706762e-05,
      "loss": 1.111,
      "step": 18100
    },
    {
      "epoch": 0.06347103498792614,
      "grad_norm": 3.234375,
      "learning_rate": 1.0577810488440287e-05,
      "loss": 1.0697,
      "step": 18110
    },
    {
      "epoch": 0.06350608249482173,
      "grad_norm": 3.96875,
      "learning_rate": 1.0583651678173811e-05,
      "loss": 1.0773,
      "step": 18120
    },
    {
      "epoch": 0.06354113000171732,
      "grad_norm": 3.25,
      "learning_rate": 1.0589492867907336e-05,
      "loss": 1.0763,
      "step": 18130
    },
    {
      "epoch": 0.06357617750861293,
      "grad_norm": 3.59375,
      "learning_rate": 1.0595334057640861e-05,
      "loss": 1.0277,
      "step": 18140
    },
    {
      "epoch": 0.06361122501550852,
      "grad_norm": 3.65625,
      "learning_rate": 1.0601175247374386e-05,
      "loss": 1.0575,
      "step": 18150
    },
    {
      "epoch": 0.06364627252240412,
      "grad_norm": 3.640625,
      "learning_rate": 1.060701643710791e-05,
      "loss": 1.0493,
      "step": 18160
    },
    {
      "epoch": 0.06368132002929972,
      "grad_norm": 3.421875,
      "learning_rate": 1.0612857626841436e-05,
      "loss": 1.1148,
      "step": 18170
    },
    {
      "epoch": 0.0637163675361953,
      "grad_norm": 3.640625,
      "learning_rate": 1.061869881657496e-05,
      "loss": 1.133,
      "step": 18180
    },
    {
      "epoch": 0.06375141504309091,
      "grad_norm": 3.484375,
      "learning_rate": 1.0624540006308485e-05,
      "loss": 1.0401,
      "step": 18190
    },
    {
      "epoch": 0.0637864625499865,
      "grad_norm": 4.0625,
      "learning_rate": 1.063038119604201e-05,
      "loss": 1.0552,
      "step": 18200
    },
    {
      "epoch": 0.06382151005688211,
      "grad_norm": 3.296875,
      "learning_rate": 1.0636222385775535e-05,
      "loss": 1.0833,
      "step": 18210
    },
    {
      "epoch": 0.0638565575637777,
      "grad_norm": 3.359375,
      "learning_rate": 1.064206357550906e-05,
      "loss": 1.0767,
      "step": 18220
    },
    {
      "epoch": 0.06389160507067329,
      "grad_norm": 3.453125,
      "learning_rate": 1.0647904765242585e-05,
      "loss": 1.0855,
      "step": 18230
    },
    {
      "epoch": 0.0639266525775689,
      "grad_norm": 3.984375,
      "learning_rate": 1.065374595497611e-05,
      "loss": 0.9823,
      "step": 18240
    },
    {
      "epoch": 0.06396170008446449,
      "grad_norm": 3.625,
      "learning_rate": 1.0659587144709634e-05,
      "loss": 1.0938,
      "step": 18250
    },
    {
      "epoch": 0.06399674759136009,
      "grad_norm": 3.703125,
      "learning_rate": 1.0665428334443161e-05,
      "loss": 0.9973,
      "step": 18260
    },
    {
      "epoch": 0.06403179509825568,
      "grad_norm": 3.734375,
      "learning_rate": 1.0671269524176684e-05,
      "loss": 1.0164,
      "step": 18270
    },
    {
      "epoch": 0.06406684260515129,
      "grad_norm": 3.609375,
      "learning_rate": 1.067711071391021e-05,
      "loss": 1.0279,
      "step": 18280
    },
    {
      "epoch": 0.06410189011204688,
      "grad_norm": 3.859375,
      "learning_rate": 1.0682951903643734e-05,
      "loss": 1.0273,
      "step": 18290
    },
    {
      "epoch": 0.06413693761894247,
      "grad_norm": 3.546875,
      "learning_rate": 1.068879309337726e-05,
      "loss": 1.108,
      "step": 18300
    },
    {
      "epoch": 0.06417198512583808,
      "grad_norm": 3.515625,
      "learning_rate": 1.0694634283110783e-05,
      "loss": 1.0799,
      "step": 18310
    },
    {
      "epoch": 0.06420703263273367,
      "grad_norm": 3.4375,
      "learning_rate": 1.070047547284431e-05,
      "loss": 1.1282,
      "step": 18320
    },
    {
      "epoch": 0.06424208013962927,
      "grad_norm": 3.296875,
      "learning_rate": 1.0706316662577833e-05,
      "loss": 1.0536,
      "step": 18330
    },
    {
      "epoch": 0.06427712764652486,
      "grad_norm": 3.296875,
      "learning_rate": 1.071215785231136e-05,
      "loss": 1.0785,
      "step": 18340
    },
    {
      "epoch": 0.06431217515342046,
      "grad_norm": 3.5625,
      "learning_rate": 1.0717999042044883e-05,
      "loss": 1.1246,
      "step": 18350
    },
    {
      "epoch": 0.06434722266031606,
      "grad_norm": 3.375,
      "learning_rate": 1.072384023177841e-05,
      "loss": 1.0824,
      "step": 18360
    },
    {
      "epoch": 0.06438227016721165,
      "grad_norm": 3.53125,
      "learning_rate": 1.0729681421511934e-05,
      "loss": 1.0637,
      "step": 18370
    },
    {
      "epoch": 0.06441731767410726,
      "grad_norm": 3.046875,
      "learning_rate": 1.0735522611245459e-05,
      "loss": 1.1046,
      "step": 18380
    },
    {
      "epoch": 0.06445236518100285,
      "grad_norm": 3.46875,
      "learning_rate": 1.0741363800978984e-05,
      "loss": 1.0338,
      "step": 18390
    },
    {
      "epoch": 0.06448741268789844,
      "grad_norm": 3.671875,
      "learning_rate": 1.0747204990712509e-05,
      "loss": 1.1058,
      "step": 18400
    },
    {
      "epoch": 0.06452246019479405,
      "grad_norm": 3.890625,
      "learning_rate": 1.0753046180446035e-05,
      "loss": 1.0757,
      "step": 18410
    },
    {
      "epoch": 0.06455750770168964,
      "grad_norm": 3.359375,
      "learning_rate": 1.0758887370179558e-05,
      "loss": 1.1215,
      "step": 18420
    },
    {
      "epoch": 0.06459255520858524,
      "grad_norm": 3.5625,
      "learning_rate": 1.0764728559913085e-05,
      "loss": 1.0513,
      "step": 18430
    },
    {
      "epoch": 0.06462760271548083,
      "grad_norm": 4.28125,
      "learning_rate": 1.0770569749646608e-05,
      "loss": 1.0053,
      "step": 18440
    },
    {
      "epoch": 0.06466265022237642,
      "grad_norm": 2.921875,
      "learning_rate": 1.0776410939380135e-05,
      "loss": 1.0601,
      "step": 18450
    },
    {
      "epoch": 0.06469769772927203,
      "grad_norm": 3.265625,
      "learning_rate": 1.0782252129113658e-05,
      "loss": 1.0554,
      "step": 18460
    },
    {
      "epoch": 0.06473274523616762,
      "grad_norm": 3.3125,
      "learning_rate": 1.0788093318847184e-05,
      "loss": 1.0107,
      "step": 18470
    },
    {
      "epoch": 0.06476779274306323,
      "grad_norm": 3.6875,
      "learning_rate": 1.0793934508580708e-05,
      "loss": 1.0842,
      "step": 18480
    },
    {
      "epoch": 0.06480284024995882,
      "grad_norm": 3.25,
      "learning_rate": 1.0799775698314234e-05,
      "loss": 1.0511,
      "step": 18490
    },
    {
      "epoch": 0.06483788775685441,
      "grad_norm": 3.8125,
      "learning_rate": 1.0805616888047757e-05,
      "loss": 1.0387,
      "step": 18500
    },
    {
      "epoch": 0.06487293526375001,
      "grad_norm": 3.6875,
      "learning_rate": 1.0811458077781284e-05,
      "loss": 1.1012,
      "step": 18510
    },
    {
      "epoch": 0.0649079827706456,
      "grad_norm": 3.625,
      "learning_rate": 1.0817299267514807e-05,
      "loss": 1.1013,
      "step": 18520
    },
    {
      "epoch": 0.06494303027754121,
      "grad_norm": 3.5625,
      "learning_rate": 1.0823140457248333e-05,
      "loss": 1.0696,
      "step": 18530
    },
    {
      "epoch": 0.0649780777844368,
      "grad_norm": 3.28125,
      "learning_rate": 1.0828981646981858e-05,
      "loss": 1.0497,
      "step": 18540
    },
    {
      "epoch": 0.06501312529133241,
      "grad_norm": 3.203125,
      "learning_rate": 1.0834822836715383e-05,
      "loss": 1.0156,
      "step": 18550
    },
    {
      "epoch": 0.065048172798228,
      "grad_norm": 3.6875,
      "learning_rate": 1.0840664026448908e-05,
      "loss": 1.0171,
      "step": 18560
    },
    {
      "epoch": 0.06508322030512359,
      "grad_norm": 3.5,
      "learning_rate": 1.0846505216182433e-05,
      "loss": 0.9519,
      "step": 18570
    },
    {
      "epoch": 0.0651182678120192,
      "grad_norm": 3.609375,
      "learning_rate": 1.0852346405915958e-05,
      "loss": 1.0864,
      "step": 18580
    },
    {
      "epoch": 0.06515331531891479,
      "grad_norm": 3.21875,
      "learning_rate": 1.0858187595649482e-05,
      "loss": 1.0589,
      "step": 18590
    },
    {
      "epoch": 0.06518836282581039,
      "grad_norm": 3.8125,
      "learning_rate": 1.0864028785383007e-05,
      "loss": 1.101,
      "step": 18600
    },
    {
      "epoch": 0.06522341033270598,
      "grad_norm": 3.921875,
      "learning_rate": 1.0869869975116532e-05,
      "loss": 1.1316,
      "step": 18610
    },
    {
      "epoch": 0.06525845783960157,
      "grad_norm": 3.90625,
      "learning_rate": 1.0875711164850057e-05,
      "loss": 1.1653,
      "step": 18620
    },
    {
      "epoch": 0.06529350534649718,
      "grad_norm": 3.125,
      "learning_rate": 1.0881552354583582e-05,
      "loss": 1.0549,
      "step": 18630
    },
    {
      "epoch": 0.06532855285339277,
      "grad_norm": 3.5625,
      "learning_rate": 1.0887393544317107e-05,
      "loss": 1.0542,
      "step": 18640
    },
    {
      "epoch": 0.06536360036028838,
      "grad_norm": 3.6875,
      "learning_rate": 1.0893234734050632e-05,
      "loss": 1.0753,
      "step": 18650
    },
    {
      "epoch": 0.06539864786718397,
      "grad_norm": 3.78125,
      "learning_rate": 1.0899075923784156e-05,
      "loss": 1.1004,
      "step": 18660
    },
    {
      "epoch": 0.06543369537407956,
      "grad_norm": 3.59375,
      "learning_rate": 1.0904917113517681e-05,
      "loss": 1.0434,
      "step": 18670
    },
    {
      "epoch": 0.06546874288097516,
      "grad_norm": 3.8125,
      "learning_rate": 1.0910758303251206e-05,
      "loss": 1.0721,
      "step": 18680
    },
    {
      "epoch": 0.06550379038787076,
      "grad_norm": 3.703125,
      "learning_rate": 1.0916599492984731e-05,
      "loss": 1.0943,
      "step": 18690
    },
    {
      "epoch": 0.06553883789476636,
      "grad_norm": 3.265625,
      "learning_rate": 1.0922440682718256e-05,
      "loss": 0.9993,
      "step": 18700
    },
    {
      "epoch": 0.06557388540166195,
      "grad_norm": 3.5,
      "learning_rate": 1.0928281872451782e-05,
      "loss": 1.0528,
      "step": 18710
    },
    {
      "epoch": 0.06560893290855754,
      "grad_norm": 3.953125,
      "learning_rate": 1.0934123062185305e-05,
      "loss": 1.0697,
      "step": 18720
    },
    {
      "epoch": 0.06564398041545315,
      "grad_norm": 3.390625,
      "learning_rate": 1.0939964251918832e-05,
      "loss": 1.0399,
      "step": 18730
    },
    {
      "epoch": 0.06567902792234874,
      "grad_norm": 3.796875,
      "learning_rate": 1.0945805441652355e-05,
      "loss": 1.0513,
      "step": 18740
    },
    {
      "epoch": 0.06571407542924435,
      "grad_norm": 2.953125,
      "learning_rate": 1.0951646631385882e-05,
      "loss": 1.0293,
      "step": 18750
    },
    {
      "epoch": 0.06574912293613994,
      "grad_norm": 3.359375,
      "learning_rate": 1.0957487821119407e-05,
      "loss": 1.0823,
      "step": 18760
    },
    {
      "epoch": 0.06578417044303553,
      "grad_norm": 3.8125,
      "learning_rate": 1.0963329010852931e-05,
      "loss": 1.0159,
      "step": 18770
    },
    {
      "epoch": 0.06581921794993113,
      "grad_norm": 3.390625,
      "learning_rate": 1.0969170200586456e-05,
      "loss": 1.0262,
      "step": 18780
    },
    {
      "epoch": 0.06585426545682672,
      "grad_norm": 3.6875,
      "learning_rate": 1.0975011390319981e-05,
      "loss": 1.1218,
      "step": 18790
    },
    {
      "epoch": 0.06588931296372233,
      "grad_norm": 3.6875,
      "learning_rate": 1.0980852580053506e-05,
      "loss": 1.0648,
      "step": 18800
    },
    {
      "epoch": 0.06592436047061792,
      "grad_norm": 3.390625,
      "learning_rate": 1.098669376978703e-05,
      "loss": 1.0394,
      "step": 18810
    },
    {
      "epoch": 0.06595940797751353,
      "grad_norm": 3.625,
      "learning_rate": 1.0992534959520556e-05,
      "loss": 1.0733,
      "step": 18820
    },
    {
      "epoch": 0.06599445548440912,
      "grad_norm": 3.640625,
      "learning_rate": 1.099837614925408e-05,
      "loss": 1.0919,
      "step": 18830
    },
    {
      "epoch": 0.06602950299130471,
      "grad_norm": 3.25,
      "learning_rate": 1.1004217338987605e-05,
      "loss": 1.005,
      "step": 18840
    },
    {
      "epoch": 0.06606455049820031,
      "grad_norm": 3.5,
      "learning_rate": 1.101005852872113e-05,
      "loss": 1.1111,
      "step": 18850
    },
    {
      "epoch": 0.0660995980050959,
      "grad_norm": 3.25,
      "learning_rate": 1.1015899718454657e-05,
      "loss": 1.1228,
      "step": 18860
    },
    {
      "epoch": 0.06613464551199151,
      "grad_norm": 3.828125,
      "learning_rate": 1.102174090818818e-05,
      "loss": 1.051,
      "step": 18870
    },
    {
      "epoch": 0.0661696930188871,
      "grad_norm": 2.984375,
      "learning_rate": 1.1027582097921706e-05,
      "loss": 1.0173,
      "step": 18880
    },
    {
      "epoch": 0.0662047405257827,
      "grad_norm": 4.03125,
      "learning_rate": 1.103342328765523e-05,
      "loss": 1.083,
      "step": 18890
    },
    {
      "epoch": 0.0662397880326783,
      "grad_norm": 3.5,
      "learning_rate": 1.1039264477388756e-05,
      "loss": 1.1009,
      "step": 18900
    },
    {
      "epoch": 0.06627483553957389,
      "grad_norm": 3.453125,
      "learning_rate": 1.104510566712228e-05,
      "loss": 1.0375,
      "step": 18910
    },
    {
      "epoch": 0.0663098830464695,
      "grad_norm": 3.671875,
      "learning_rate": 1.1050946856855806e-05,
      "loss": 1.0682,
      "step": 18920
    },
    {
      "epoch": 0.06634493055336509,
      "grad_norm": 3.53125,
      "learning_rate": 1.1056788046589329e-05,
      "loss": 1.1544,
      "step": 18930
    },
    {
      "epoch": 0.06637997806026068,
      "grad_norm": 3.953125,
      "learning_rate": 1.1062629236322855e-05,
      "loss": 1.0704,
      "step": 18940
    },
    {
      "epoch": 0.06641502556715628,
      "grad_norm": 3.578125,
      "learning_rate": 1.1068470426056379e-05,
      "loss": 1.0251,
      "step": 18950
    },
    {
      "epoch": 0.06645007307405187,
      "grad_norm": 3.1875,
      "learning_rate": 1.1074311615789905e-05,
      "loss": 0.996,
      "step": 18960
    },
    {
      "epoch": 0.06648512058094748,
      "grad_norm": 3.53125,
      "learning_rate": 1.1080152805523428e-05,
      "loss": 1.0463,
      "step": 18970
    },
    {
      "epoch": 0.06652016808784307,
      "grad_norm": 3.5,
      "learning_rate": 1.1085993995256955e-05,
      "loss": 1.0727,
      "step": 18980
    },
    {
      "epoch": 0.06655521559473866,
      "grad_norm": 3.65625,
      "learning_rate": 1.109183518499048e-05,
      "loss": 1.0511,
      "step": 18990
    },
    {
      "epoch": 0.06659026310163427,
      "grad_norm": 3.59375,
      "learning_rate": 1.1097676374724005e-05,
      "loss": 1.019,
      "step": 19000
    },
    {
      "epoch": 0.06662531060852986,
      "grad_norm": 3.53125,
      "learning_rate": 1.110351756445753e-05,
      "loss": 1.0301,
      "step": 19010
    },
    {
      "epoch": 0.06666035811542546,
      "grad_norm": 3.5,
      "learning_rate": 1.1109358754191054e-05,
      "loss": 1.0883,
      "step": 19020
    },
    {
      "epoch": 0.06669540562232106,
      "grad_norm": 3.921875,
      "learning_rate": 1.1115199943924579e-05,
      "loss": 1.087,
      "step": 19030
    },
    {
      "epoch": 0.06673045312921665,
      "grad_norm": 3.1875,
      "learning_rate": 1.1121041133658104e-05,
      "loss": 1.0219,
      "step": 19040
    },
    {
      "epoch": 0.06676550063611225,
      "grad_norm": 3.359375,
      "learning_rate": 1.1126882323391629e-05,
      "loss": 1.2198,
      "step": 19050
    },
    {
      "epoch": 0.06680054814300784,
      "grad_norm": 3.4375,
      "learning_rate": 1.1132723513125154e-05,
      "loss": 0.9659,
      "step": 19060
    },
    {
      "epoch": 0.06683559564990345,
      "grad_norm": 3.703125,
      "learning_rate": 1.1138564702858678e-05,
      "loss": 1.0721,
      "step": 19070
    },
    {
      "epoch": 0.06687064315679904,
      "grad_norm": 3.4375,
      "learning_rate": 1.1144405892592203e-05,
      "loss": 1.1015,
      "step": 19080
    },
    {
      "epoch": 0.06690569066369464,
      "grad_norm": 3.578125,
      "learning_rate": 1.1150247082325728e-05,
      "loss": 1.1133,
      "step": 19090
    },
    {
      "epoch": 0.06694073817059024,
      "grad_norm": 3.796875,
      "learning_rate": 1.1156088272059253e-05,
      "loss": 1.0769,
      "step": 19100
    },
    {
      "epoch": 0.06697578567748583,
      "grad_norm": 3.703125,
      "learning_rate": 1.116192946179278e-05,
      "loss": 1.1572,
      "step": 19110
    },
    {
      "epoch": 0.06701083318438143,
      "grad_norm": 3.640625,
      "learning_rate": 1.1167770651526303e-05,
      "loss": 1.145,
      "step": 19120
    },
    {
      "epoch": 0.06704588069127702,
      "grad_norm": 3.53125,
      "learning_rate": 1.117361184125983e-05,
      "loss": 1.055,
      "step": 19130
    },
    {
      "epoch": 0.06708092819817263,
      "grad_norm": 3.140625,
      "learning_rate": 1.1179453030993354e-05,
      "loss": 0.9817,
      "step": 19140
    },
    {
      "epoch": 0.06711597570506822,
      "grad_norm": 3.578125,
      "learning_rate": 1.1185294220726879e-05,
      "loss": 1.0985,
      "step": 19150
    },
    {
      "epoch": 0.06715102321196381,
      "grad_norm": 3.453125,
      "learning_rate": 1.1191135410460404e-05,
      "loss": 1.1162,
      "step": 19160
    },
    {
      "epoch": 0.06718607071885942,
      "grad_norm": 3.53125,
      "learning_rate": 1.1196976600193929e-05,
      "loss": 1.0523,
      "step": 19170
    },
    {
      "epoch": 0.06722111822575501,
      "grad_norm": 3.28125,
      "learning_rate": 1.1202817789927453e-05,
      "loss": 1.0339,
      "step": 19180
    },
    {
      "epoch": 0.06725616573265061,
      "grad_norm": 3.234375,
      "learning_rate": 1.1208658979660978e-05,
      "loss": 1.0108,
      "step": 19190
    },
    {
      "epoch": 0.0672912132395462,
      "grad_norm": 3.171875,
      "learning_rate": 1.1214500169394503e-05,
      "loss": 1.0999,
      "step": 19200
    },
    {
      "epoch": 0.0673262607464418,
      "grad_norm": 3.453125,
      "learning_rate": 1.1220341359128028e-05,
      "loss": 1.0319,
      "step": 19210
    },
    {
      "epoch": 0.0673613082533374,
      "grad_norm": 3.78125,
      "learning_rate": 1.1226182548861553e-05,
      "loss": 1.0224,
      "step": 19220
    },
    {
      "epoch": 0.06739635576023299,
      "grad_norm": 3.53125,
      "learning_rate": 1.1232023738595078e-05,
      "loss": 1.0962,
      "step": 19230
    },
    {
      "epoch": 0.0674314032671286,
      "grad_norm": 3.28125,
      "learning_rate": 1.1237864928328602e-05,
      "loss": 1.0598,
      "step": 19240
    },
    {
      "epoch": 0.06746645077402419,
      "grad_norm": 3.15625,
      "learning_rate": 1.1243706118062127e-05,
      "loss": 1.0297,
      "step": 19250
    },
    {
      "epoch": 0.06750149828091978,
      "grad_norm": 3.171875,
      "learning_rate": 1.1249547307795652e-05,
      "loss": 1.0402,
      "step": 19260
    },
    {
      "epoch": 0.06753654578781539,
      "grad_norm": 3.25,
      "learning_rate": 1.1255388497529177e-05,
      "loss": 1.0056,
      "step": 19270
    },
    {
      "epoch": 0.06757159329471098,
      "grad_norm": 3.6875,
      "learning_rate": 1.1261229687262702e-05,
      "loss": 0.9715,
      "step": 19280
    },
    {
      "epoch": 0.06760664080160658,
      "grad_norm": 3.28125,
      "learning_rate": 1.1267070876996227e-05,
      "loss": 1.0406,
      "step": 19290
    },
    {
      "epoch": 0.06764168830850217,
      "grad_norm": 3.21875,
      "learning_rate": 1.1272912066729752e-05,
      "loss": 0.9676,
      "step": 19300
    },
    {
      "epoch": 0.06767673581539776,
      "grad_norm": 3.546875,
      "learning_rate": 1.1278753256463278e-05,
      "loss": 1.079,
      "step": 19310
    },
    {
      "epoch": 0.06771178332229337,
      "grad_norm": 3.40625,
      "learning_rate": 1.1284594446196801e-05,
      "loss": 1.0947,
      "step": 19320
    },
    {
      "epoch": 0.06774683082918896,
      "grad_norm": 3.859375,
      "learning_rate": 1.1290435635930328e-05,
      "loss": 1.0097,
      "step": 19330
    },
    {
      "epoch": 0.06778187833608457,
      "grad_norm": 3.828125,
      "learning_rate": 1.1296276825663851e-05,
      "loss": 1.0763,
      "step": 19340
    },
    {
      "epoch": 0.06781692584298016,
      "grad_norm": 4.09375,
      "learning_rate": 1.1302118015397377e-05,
      "loss": 1.0519,
      "step": 19350
    },
    {
      "epoch": 0.06785197334987576,
      "grad_norm": 3.515625,
      "learning_rate": 1.13079592051309e-05,
      "loss": 1.0164,
      "step": 19360
    },
    {
      "epoch": 0.06788702085677135,
      "grad_norm": 3.515625,
      "learning_rate": 1.1313800394864427e-05,
      "loss": 1.1381,
      "step": 19370
    },
    {
      "epoch": 0.06792206836366695,
      "grad_norm": 3.078125,
      "learning_rate": 1.131964158459795e-05,
      "loss": 1.0432,
      "step": 19380
    },
    {
      "epoch": 0.06795711587056255,
      "grad_norm": 3.71875,
      "learning_rate": 1.1325482774331477e-05,
      "loss": 1.068,
      "step": 19390
    },
    {
      "epoch": 0.06799216337745814,
      "grad_norm": 3.4375,
      "learning_rate": 1.1331323964065e-05,
      "loss": 1.0047,
      "step": 19400
    },
    {
      "epoch": 0.06802721088435375,
      "grad_norm": 3.5,
      "learning_rate": 1.1337165153798527e-05,
      "loss": 1.0606,
      "step": 19410
    },
    {
      "epoch": 0.06806225839124934,
      "grad_norm": 3.4375,
      "learning_rate": 1.134300634353205e-05,
      "loss": 1.1035,
      "step": 19420
    },
    {
      "epoch": 0.06809730589814493,
      "grad_norm": 3.5,
      "learning_rate": 1.1348847533265576e-05,
      "loss": 1.0191,
      "step": 19430
    },
    {
      "epoch": 0.06813235340504054,
      "grad_norm": 3.34375,
      "learning_rate": 1.1354688722999101e-05,
      "loss": 1.0467,
      "step": 19440
    },
    {
      "epoch": 0.06816740091193613,
      "grad_norm": 3.21875,
      "learning_rate": 1.1360529912732626e-05,
      "loss": 1.117,
      "step": 19450
    },
    {
      "epoch": 0.06820244841883173,
      "grad_norm": 3.421875,
      "learning_rate": 1.136637110246615e-05,
      "loss": 1.0874,
      "step": 19460
    },
    {
      "epoch": 0.06823749592572732,
      "grad_norm": 3.25,
      "learning_rate": 1.1372212292199676e-05,
      "loss": 1.0724,
      "step": 19470
    },
    {
      "epoch": 0.06827254343262291,
      "grad_norm": 3.578125,
      "learning_rate": 1.1378053481933202e-05,
      "loss": 1.049,
      "step": 19480
    },
    {
      "epoch": 0.06830759093951852,
      "grad_norm": 3.78125,
      "learning_rate": 1.1383894671666725e-05,
      "loss": 1.1,
      "step": 19490
    },
    {
      "epoch": 0.06834263844641411,
      "grad_norm": 3.28125,
      "learning_rate": 1.1389735861400252e-05,
      "loss": 1.0428,
      "step": 19500
    },
    {
      "epoch": 0.06837768595330972,
      "grad_norm": 3.65625,
      "learning_rate": 1.1395577051133775e-05,
      "loss": 1.0383,
      "step": 19510
    },
    {
      "epoch": 0.06841273346020531,
      "grad_norm": 3.140625,
      "learning_rate": 1.1401418240867301e-05,
      "loss": 0.9934,
      "step": 19520
    },
    {
      "epoch": 0.0684477809671009,
      "grad_norm": 3.4375,
      "learning_rate": 1.1407259430600825e-05,
      "loss": 1.0549,
      "step": 19530
    },
    {
      "epoch": 0.0684828284739965,
      "grad_norm": 3.1875,
      "learning_rate": 1.1413100620334351e-05,
      "loss": 1.0285,
      "step": 19540
    },
    {
      "epoch": 0.0685178759808921,
      "grad_norm": 3.296875,
      "learning_rate": 1.1418941810067874e-05,
      "loss": 1.0805,
      "step": 19550
    },
    {
      "epoch": 0.0685529234877877,
      "grad_norm": 3.265625,
      "learning_rate": 1.1424782999801401e-05,
      "loss": 0.987,
      "step": 19560
    },
    {
      "epoch": 0.06858797099468329,
      "grad_norm": 3.828125,
      "learning_rate": 1.1430624189534924e-05,
      "loss": 1.0604,
      "step": 19570
    },
    {
      "epoch": 0.06862301850157888,
      "grad_norm": 3.84375,
      "learning_rate": 1.143646537926845e-05,
      "loss": 1.0006,
      "step": 19580
    },
    {
      "epoch": 0.06865806600847449,
      "grad_norm": 3.09375,
      "learning_rate": 1.1442306569001975e-05,
      "loss": 1.0632,
      "step": 19590
    },
    {
      "epoch": 0.06869311351537008,
      "grad_norm": 2.90625,
      "learning_rate": 1.14481477587355e-05,
      "loss": 1.0923,
      "step": 19600
    },
    {
      "epoch": 0.06872816102226569,
      "grad_norm": 3.171875,
      "learning_rate": 1.1453988948469025e-05,
      "loss": 1.0805,
      "step": 19610
    },
    {
      "epoch": 0.06876320852916128,
      "grad_norm": 3.328125,
      "learning_rate": 1.145983013820255e-05,
      "loss": 1.0997,
      "step": 19620
    },
    {
      "epoch": 0.06879825603605688,
      "grad_norm": 3.28125,
      "learning_rate": 1.1465671327936075e-05,
      "loss": 1.0598,
      "step": 19630
    },
    {
      "epoch": 0.06883330354295247,
      "grad_norm": 3.28125,
      "learning_rate": 1.14715125176696e-05,
      "loss": 0.9702,
      "step": 19640
    },
    {
      "epoch": 0.06886835104984806,
      "grad_norm": 3.40625,
      "learning_rate": 1.1477353707403124e-05,
      "loss": 1.0375,
      "step": 19650
    },
    {
      "epoch": 0.06890339855674367,
      "grad_norm": 3.859375,
      "learning_rate": 1.148319489713665e-05,
      "loss": 1.0911,
      "step": 19660
    },
    {
      "epoch": 0.06893844606363926,
      "grad_norm": 3.515625,
      "learning_rate": 1.1489036086870174e-05,
      "loss": 1.0833,
      "step": 19670
    },
    {
      "epoch": 0.06897349357053487,
      "grad_norm": 3.25,
      "learning_rate": 1.1494877276603699e-05,
      "loss": 0.9736,
      "step": 19680
    },
    {
      "epoch": 0.06900854107743046,
      "grad_norm": 3.0,
      "learning_rate": 1.1500718466337224e-05,
      "loss": 0.9954,
      "step": 19690
    },
    {
      "epoch": 0.06904358858432605,
      "grad_norm": 3.390625,
      "learning_rate": 1.1506559656070749e-05,
      "loss": 1.0508,
      "step": 19700
    },
    {
      "epoch": 0.06907863609122165,
      "grad_norm": 3.25,
      "learning_rate": 1.1512400845804274e-05,
      "loss": 1.0756,
      "step": 19710
    },
    {
      "epoch": 0.06911368359811725,
      "grad_norm": 3.890625,
      "learning_rate": 1.1518242035537798e-05,
      "loss": 1.0311,
      "step": 19720
    },
    {
      "epoch": 0.06914873110501285,
      "grad_norm": 3.609375,
      "learning_rate": 1.1524083225271323e-05,
      "loss": 1.0224,
      "step": 19730
    },
    {
      "epoch": 0.06918377861190844,
      "grad_norm": 3.125,
      "learning_rate": 1.1529924415004848e-05,
      "loss": 1.0051,
      "step": 19740
    },
    {
      "epoch": 0.06921882611880403,
      "grad_norm": 3.609375,
      "learning_rate": 1.1535765604738373e-05,
      "loss": 1.0261,
      "step": 19750
    },
    {
      "epoch": 0.06925387362569964,
      "grad_norm": 3.265625,
      "learning_rate": 1.15416067944719e-05,
      "loss": 1.0366,
      "step": 19760
    },
    {
      "epoch": 0.06928892113259523,
      "grad_norm": 3.5625,
      "learning_rate": 1.1547447984205423e-05,
      "loss": 1.0885,
      "step": 19770
    },
    {
      "epoch": 0.06932396863949083,
      "grad_norm": 3.53125,
      "learning_rate": 1.1553289173938949e-05,
      "loss": 1.0127,
      "step": 19780
    },
    {
      "epoch": 0.06935901614638643,
      "grad_norm": 3.5,
      "learning_rate": 1.1559130363672474e-05,
      "loss": 1.0269,
      "step": 19790
    },
    {
      "epoch": 0.06939406365328202,
      "grad_norm": 3.4375,
      "learning_rate": 1.1564971553405999e-05,
      "loss": 1.1646,
      "step": 19800
    },
    {
      "epoch": 0.06942911116017762,
      "grad_norm": 3.578125,
      "learning_rate": 1.1570812743139524e-05,
      "loss": 1.1402,
      "step": 19810
    },
    {
      "epoch": 0.06946415866707321,
      "grad_norm": 3.375,
      "learning_rate": 1.1576653932873049e-05,
      "loss": 1.0646,
      "step": 19820
    },
    {
      "epoch": 0.06949920617396882,
      "grad_norm": 3.390625,
      "learning_rate": 1.1582495122606573e-05,
      "loss": 1.0411,
      "step": 19830
    },
    {
      "epoch": 0.06953425368086441,
      "grad_norm": 3.703125,
      "learning_rate": 1.1588336312340098e-05,
      "loss": 0.9874,
      "step": 19840
    },
    {
      "epoch": 0.06956930118776,
      "grad_norm": 3.671875,
      "learning_rate": 1.1594177502073623e-05,
      "loss": 1.0353,
      "step": 19850
    },
    {
      "epoch": 0.06960434869465561,
      "grad_norm": 3.453125,
      "learning_rate": 1.1600018691807148e-05,
      "loss": 1.046,
      "step": 19860
    },
    {
      "epoch": 0.0696393962015512,
      "grad_norm": 3.1875,
      "learning_rate": 1.1605859881540673e-05,
      "loss": 1.0387,
      "step": 19870
    },
    {
      "epoch": 0.0696744437084468,
      "grad_norm": 3.140625,
      "learning_rate": 1.1611701071274198e-05,
      "loss": 0.9935,
      "step": 19880
    },
    {
      "epoch": 0.0697094912153424,
      "grad_norm": 4.125,
      "learning_rate": 1.1617542261007722e-05,
      "loss": 1.0212,
      "step": 19890
    },
    {
      "epoch": 0.069744538722238,
      "grad_norm": 3.109375,
      "learning_rate": 1.1623383450741247e-05,
      "loss": 1.0778,
      "step": 19900
    },
    {
      "epoch": 0.06977958622913359,
      "grad_norm": 2.890625,
      "learning_rate": 1.1629224640474772e-05,
      "loss": 1.0514,
      "step": 19910
    },
    {
      "epoch": 0.06981463373602918,
      "grad_norm": 3.28125,
      "learning_rate": 1.1635065830208297e-05,
      "loss": 1.0329,
      "step": 19920
    },
    {
      "epoch": 0.06984968124292479,
      "grad_norm": 3.359375,
      "learning_rate": 1.1640907019941824e-05,
      "loss": 0.9829,
      "step": 19930
    },
    {
      "epoch": 0.06988472874982038,
      "grad_norm": 3.171875,
      "learning_rate": 1.1646748209675347e-05,
      "loss": 1.0678,
      "step": 19940
    },
    {
      "epoch": 0.06991977625671598,
      "grad_norm": 3.4375,
      "learning_rate": 1.1652589399408873e-05,
      "loss": 1.0377,
      "step": 19950
    },
    {
      "epoch": 0.06995482376361158,
      "grad_norm": 3.234375,
      "learning_rate": 1.1658430589142396e-05,
      "loss": 1.0222,
      "step": 19960
    },
    {
      "epoch": 0.06998987127050717,
      "grad_norm": 3.296875,
      "learning_rate": 1.1664271778875923e-05,
      "loss": 0.9744,
      "step": 19970
    },
    {
      "epoch": 0.07002491877740277,
      "grad_norm": 3.265625,
      "learning_rate": 1.1670112968609446e-05,
      "loss": 1.0286,
      "step": 19980
    },
    {
      "epoch": 0.07005996628429836,
      "grad_norm": 3.859375,
      "learning_rate": 1.1675954158342973e-05,
      "loss": 1.1103,
      "step": 19990
    },
    {
      "epoch": 0.07009501379119397,
      "grad_norm": 3.65625,
      "learning_rate": 1.1681795348076496e-05,
      "loss": 0.9929,
      "step": 20000
    },
    {
      "epoch": 0.07009501379119397,
      "eval_loss": 0.9999563097953796,
      "eval_runtime": 553.9489,
      "eval_samples_per_second": 686.771,
      "eval_steps_per_second": 57.231,
      "step": 20000
    },
    {
      "epoch": 0.07013006129808956,
      "grad_norm": 3.640625,
      "learning_rate": 1.1687636537810022e-05,
      "loss": 1.1364,
      "step": 20010
    },
    {
      "epoch": 0.07016510880498515,
      "grad_norm": 3.1875,
      "learning_rate": 1.1693477727543545e-05,
      "loss": 1.0791,
      "step": 20020
    },
    {
      "epoch": 0.07020015631188076,
      "grad_norm": 3.109375,
      "learning_rate": 1.1699318917277072e-05,
      "loss": 1.0857,
      "step": 20030
    },
    {
      "epoch": 0.07023520381877635,
      "grad_norm": 3.109375,
      "learning_rate": 1.1705160107010597e-05,
      "loss": 1.0593,
      "step": 20040
    },
    {
      "epoch": 0.07027025132567195,
      "grad_norm": 3.734375,
      "learning_rate": 1.1711001296744122e-05,
      "loss": 1.0047,
      "step": 20050
    },
    {
      "epoch": 0.07030529883256754,
      "grad_norm": 3.296875,
      "learning_rate": 1.1716842486477646e-05,
      "loss": 1.07,
      "step": 20060
    },
    {
      "epoch": 0.07034034633946314,
      "grad_norm": 3.515625,
      "learning_rate": 1.1722683676211171e-05,
      "loss": 1.006,
      "step": 20070
    },
    {
      "epoch": 0.07037539384635874,
      "grad_norm": 3.375,
      "learning_rate": 1.1728524865944696e-05,
      "loss": 1.0254,
      "step": 20080
    },
    {
      "epoch": 0.07041044135325433,
      "grad_norm": 3.578125,
      "learning_rate": 1.1734366055678221e-05,
      "loss": 1.0904,
      "step": 20090
    },
    {
      "epoch": 0.07044548886014994,
      "grad_norm": 3.40625,
      "learning_rate": 1.1740207245411746e-05,
      "loss": 0.9954,
      "step": 20100
    },
    {
      "epoch": 0.07048053636704553,
      "grad_norm": 3.5625,
      "learning_rate": 1.174604843514527e-05,
      "loss": 1.0686,
      "step": 20110
    },
    {
      "epoch": 0.07051558387394112,
      "grad_norm": 3.125,
      "learning_rate": 1.1751889624878796e-05,
      "loss": 1.0023,
      "step": 20120
    },
    {
      "epoch": 0.07055063138083673,
      "grad_norm": 3.3125,
      "learning_rate": 1.175773081461232e-05,
      "loss": 1.0891,
      "step": 20130
    },
    {
      "epoch": 0.07058567888773232,
      "grad_norm": 3.59375,
      "learning_rate": 1.1763572004345847e-05,
      "loss": 1.0657,
      "step": 20140
    },
    {
      "epoch": 0.07062072639462792,
      "grad_norm": 3.796875,
      "learning_rate": 1.176941319407937e-05,
      "loss": 1.1317,
      "step": 20150
    },
    {
      "epoch": 0.07065577390152351,
      "grad_norm": 3.546875,
      "learning_rate": 1.1775254383812897e-05,
      "loss": 1.0764,
      "step": 20160
    },
    {
      "epoch": 0.07069082140841912,
      "grad_norm": 3.40625,
      "learning_rate": 1.178109557354642e-05,
      "loss": 1.0626,
      "step": 20170
    },
    {
      "epoch": 0.07072586891531471,
      "grad_norm": 3.21875,
      "learning_rate": 1.1786936763279946e-05,
      "loss": 1.0353,
      "step": 20180
    },
    {
      "epoch": 0.0707609164222103,
      "grad_norm": 3.625,
      "learning_rate": 1.179277795301347e-05,
      "loss": 1.0349,
      "step": 20190
    },
    {
      "epoch": 0.0707959639291059,
      "grad_norm": 3.25,
      "learning_rate": 1.1798619142746996e-05,
      "loss": 1.0879,
      "step": 20200
    },
    {
      "epoch": 0.0708310114360015,
      "grad_norm": 3.4375,
      "learning_rate": 1.1804460332480521e-05,
      "loss": 1.0538,
      "step": 20210
    },
    {
      "epoch": 0.0708660589428971,
      "grad_norm": 3.5,
      "learning_rate": 1.1810301522214046e-05,
      "loss": 1.0753,
      "step": 20220
    },
    {
      "epoch": 0.0709011064497927,
      "grad_norm": 3.65625,
      "learning_rate": 1.181614271194757e-05,
      "loss": 1.0471,
      "step": 20230
    },
    {
      "epoch": 0.07093615395668829,
      "grad_norm": 3.3125,
      "learning_rate": 1.1821983901681095e-05,
      "loss": 1.0783,
      "step": 20240
    },
    {
      "epoch": 0.07097120146358389,
      "grad_norm": 3.6875,
      "learning_rate": 1.182782509141462e-05,
      "loss": 1.0941,
      "step": 20250
    },
    {
      "epoch": 0.07100624897047948,
      "grad_norm": 3.65625,
      "learning_rate": 1.1833666281148145e-05,
      "loss": 1.0018,
      "step": 20260
    },
    {
      "epoch": 0.07104129647737509,
      "grad_norm": 4.0625,
      "learning_rate": 1.183950747088167e-05,
      "loss": 1.1443,
      "step": 20270
    },
    {
      "epoch": 0.07107634398427068,
      "grad_norm": 3.703125,
      "learning_rate": 1.1845348660615195e-05,
      "loss": 1.074,
      "step": 20280
    },
    {
      "epoch": 0.07111139149116627,
      "grad_norm": 3.4375,
      "learning_rate": 1.185118985034872e-05,
      "loss": 1.09,
      "step": 20290
    },
    {
      "epoch": 0.07114643899806188,
      "grad_norm": 3.65625,
      "learning_rate": 1.1857031040082244e-05,
      "loss": 1.0426,
      "step": 20300
    },
    {
      "epoch": 0.07118148650495747,
      "grad_norm": 3.578125,
      "learning_rate": 1.186287222981577e-05,
      "loss": 1.0324,
      "step": 20310
    },
    {
      "epoch": 0.07121653401185307,
      "grad_norm": 3.265625,
      "learning_rate": 1.1868713419549294e-05,
      "loss": 1.0653,
      "step": 20320
    },
    {
      "epoch": 0.07125158151874866,
      "grad_norm": 3.671875,
      "learning_rate": 1.1874554609282819e-05,
      "loss": 1.079,
      "step": 20330
    },
    {
      "epoch": 0.07128662902564425,
      "grad_norm": 2.953125,
      "learning_rate": 1.1880395799016344e-05,
      "loss": 1.0255,
      "step": 20340
    },
    {
      "epoch": 0.07132167653253986,
      "grad_norm": 3.40625,
      "learning_rate": 1.1886236988749869e-05,
      "loss": 1.043,
      "step": 20350
    },
    {
      "epoch": 0.07135672403943545,
      "grad_norm": 3.34375,
      "learning_rate": 1.1892078178483394e-05,
      "loss": 1.0115,
      "step": 20360
    },
    {
      "epoch": 0.07139177154633106,
      "grad_norm": 3.5,
      "learning_rate": 1.1897919368216918e-05,
      "loss": 1.1708,
      "step": 20370
    },
    {
      "epoch": 0.07142681905322665,
      "grad_norm": 3.375,
      "learning_rate": 1.1903760557950445e-05,
      "loss": 1.0602,
      "step": 20380
    },
    {
      "epoch": 0.07146186656012224,
      "grad_norm": 3.59375,
      "learning_rate": 1.1909601747683968e-05,
      "loss": 1.0726,
      "step": 20390
    },
    {
      "epoch": 0.07149691406701784,
      "grad_norm": 3.203125,
      "learning_rate": 1.1915442937417495e-05,
      "loss": 1.1171,
      "step": 20400
    },
    {
      "epoch": 0.07153196157391344,
      "grad_norm": 3.203125,
      "learning_rate": 1.1921284127151018e-05,
      "loss": 1.079,
      "step": 20410
    },
    {
      "epoch": 0.07156700908080904,
      "grad_norm": 3.390625,
      "learning_rate": 1.1927125316884544e-05,
      "loss": 1.1175,
      "step": 20420
    },
    {
      "epoch": 0.07160205658770463,
      "grad_norm": 3.421875,
      "learning_rate": 1.1932966506618067e-05,
      "loss": 1.0723,
      "step": 20430
    },
    {
      "epoch": 0.07163710409460024,
      "grad_norm": 3.109375,
      "learning_rate": 1.1938807696351594e-05,
      "loss": 1.0272,
      "step": 20440
    },
    {
      "epoch": 0.07167215160149583,
      "grad_norm": 3.734375,
      "learning_rate": 1.1944648886085117e-05,
      "loss": 1.0251,
      "step": 20450
    },
    {
      "epoch": 0.07170719910839142,
      "grad_norm": 3.5625,
      "learning_rate": 1.1950490075818644e-05,
      "loss": 1.0232,
      "step": 20460
    },
    {
      "epoch": 0.07174224661528703,
      "grad_norm": 3.203125,
      "learning_rate": 1.1956331265552168e-05,
      "loss": 0.946,
      "step": 20470
    },
    {
      "epoch": 0.07177729412218262,
      "grad_norm": 3.703125,
      "learning_rate": 1.1962172455285693e-05,
      "loss": 1.1618,
      "step": 20480
    },
    {
      "epoch": 0.07181234162907822,
      "grad_norm": 3.4375,
      "learning_rate": 1.1968013645019218e-05,
      "loss": 0.9993,
      "step": 20490
    },
    {
      "epoch": 0.07184738913597381,
      "grad_norm": 3.25,
      "learning_rate": 1.1973854834752743e-05,
      "loss": 0.987,
      "step": 20500
    },
    {
      "epoch": 0.0718824366428694,
      "grad_norm": 3.96875,
      "learning_rate": 1.1979696024486268e-05,
      "loss": 1.0867,
      "step": 20510
    },
    {
      "epoch": 0.07191748414976501,
      "grad_norm": 3.734375,
      "learning_rate": 1.1985537214219793e-05,
      "loss": 1.0441,
      "step": 20520
    },
    {
      "epoch": 0.0719525316566606,
      "grad_norm": 3.296875,
      "learning_rate": 1.1991378403953318e-05,
      "loss": 0.9916,
      "step": 20530
    },
    {
      "epoch": 0.0719875791635562,
      "grad_norm": 3.65625,
      "learning_rate": 1.1997219593686842e-05,
      "loss": 1.0879,
      "step": 20540
    },
    {
      "epoch": 0.0720226266704518,
      "grad_norm": 3.40625,
      "learning_rate": 1.2003060783420369e-05,
      "loss": 1.056,
      "step": 20550
    },
    {
      "epoch": 0.07205767417734739,
      "grad_norm": 3.375,
      "learning_rate": 1.2008901973153892e-05,
      "loss": 1.0844,
      "step": 20560
    },
    {
      "epoch": 0.072092721684243,
      "grad_norm": 3.4375,
      "learning_rate": 1.2014743162887419e-05,
      "loss": 1.0976,
      "step": 20570
    },
    {
      "epoch": 0.07212776919113859,
      "grad_norm": 3.421875,
      "learning_rate": 1.2020584352620942e-05,
      "loss": 1.1213,
      "step": 20580
    },
    {
      "epoch": 0.07216281669803419,
      "grad_norm": 3.4375,
      "learning_rate": 1.2026425542354468e-05,
      "loss": 1.0342,
      "step": 20590
    },
    {
      "epoch": 0.07219786420492978,
      "grad_norm": 3.546875,
      "learning_rate": 1.2032266732087991e-05,
      "loss": 1.0079,
      "step": 20600
    },
    {
      "epoch": 0.07223291171182537,
      "grad_norm": 3.265625,
      "learning_rate": 1.2038107921821518e-05,
      "loss": 1.0414,
      "step": 20610
    },
    {
      "epoch": 0.07226795921872098,
      "grad_norm": 3.71875,
      "learning_rate": 1.2043949111555041e-05,
      "loss": 1.0898,
      "step": 20620
    },
    {
      "epoch": 0.07230300672561657,
      "grad_norm": 3.1875,
      "learning_rate": 1.2049790301288568e-05,
      "loss": 1.0508,
      "step": 20630
    },
    {
      "epoch": 0.07233805423251218,
      "grad_norm": 3.546875,
      "learning_rate": 1.2055631491022091e-05,
      "loss": 1.0842,
      "step": 20640
    },
    {
      "epoch": 0.07237310173940777,
      "grad_norm": 3.40625,
      "learning_rate": 1.2061472680755617e-05,
      "loss": 1.0775,
      "step": 20650
    },
    {
      "epoch": 0.07240814924630336,
      "grad_norm": 3.40625,
      "learning_rate": 1.2067313870489142e-05,
      "loss": 1.0381,
      "step": 20660
    },
    {
      "epoch": 0.07244319675319896,
      "grad_norm": 3.640625,
      "learning_rate": 1.2073155060222667e-05,
      "loss": 1.0431,
      "step": 20670
    },
    {
      "epoch": 0.07247824426009455,
      "grad_norm": 3.59375,
      "learning_rate": 1.2078996249956192e-05,
      "loss": 0.9762,
      "step": 20680
    },
    {
      "epoch": 0.07251329176699016,
      "grad_norm": 3.65625,
      "learning_rate": 1.2084837439689717e-05,
      "loss": 1.0374,
      "step": 20690
    },
    {
      "epoch": 0.07254833927388575,
      "grad_norm": 3.609375,
      "learning_rate": 1.2090678629423242e-05,
      "loss": 1.0499,
      "step": 20700
    },
    {
      "epoch": 0.07258338678078136,
      "grad_norm": 7.0,
      "learning_rate": 1.2096519819156766e-05,
      "loss": 1.0682,
      "step": 20710
    },
    {
      "epoch": 0.07261843428767695,
      "grad_norm": 3.375,
      "learning_rate": 1.2102361008890291e-05,
      "loss": 1.1261,
      "step": 20720
    },
    {
      "epoch": 0.07265348179457254,
      "grad_norm": 3.125,
      "learning_rate": 1.2108202198623816e-05,
      "loss": 1.105,
      "step": 20730
    },
    {
      "epoch": 0.07268852930146814,
      "grad_norm": 3.21875,
      "learning_rate": 1.2114043388357341e-05,
      "loss": 1.0357,
      "step": 20740
    },
    {
      "epoch": 0.07272357680836374,
      "grad_norm": 3.890625,
      "learning_rate": 1.2119884578090866e-05,
      "loss": 1.0491,
      "step": 20750
    },
    {
      "epoch": 0.07275862431525934,
      "grad_norm": 3.609375,
      "learning_rate": 1.212572576782439e-05,
      "loss": 1.1098,
      "step": 20760
    },
    {
      "epoch": 0.07279367182215493,
      "grad_norm": 3.46875,
      "learning_rate": 1.2131566957557916e-05,
      "loss": 0.9966,
      "step": 20770
    },
    {
      "epoch": 0.07282871932905052,
      "grad_norm": 3.703125,
      "learning_rate": 1.213740814729144e-05,
      "loss": 1.0618,
      "step": 20780
    },
    {
      "epoch": 0.07286376683594613,
      "grad_norm": 3.4375,
      "learning_rate": 1.2143249337024965e-05,
      "loss": 1.0673,
      "step": 20790
    },
    {
      "epoch": 0.07289881434284172,
      "grad_norm": 3.40625,
      "learning_rate": 1.214909052675849e-05,
      "loss": 0.9878,
      "step": 20800
    },
    {
      "epoch": 0.07293386184973732,
      "grad_norm": 3.109375,
      "learning_rate": 1.2154931716492015e-05,
      "loss": 1.1214,
      "step": 20810
    },
    {
      "epoch": 0.07296890935663292,
      "grad_norm": 3.15625,
      "learning_rate": 1.2160772906225541e-05,
      "loss": 1.0165,
      "step": 20820
    },
    {
      "epoch": 0.07300395686352851,
      "grad_norm": 3.328125,
      "learning_rate": 1.2166614095959066e-05,
      "loss": 1.0587,
      "step": 20830
    },
    {
      "epoch": 0.07303900437042411,
      "grad_norm": 3.109375,
      "learning_rate": 1.2172455285692591e-05,
      "loss": 1.0544,
      "step": 20840
    },
    {
      "epoch": 0.0730740518773197,
      "grad_norm": 3.171875,
      "learning_rate": 1.2178296475426116e-05,
      "loss": 1.0549,
      "step": 20850
    },
    {
      "epoch": 0.07310909938421531,
      "grad_norm": 3.609375,
      "learning_rate": 1.218413766515964e-05,
      "loss": 1.067,
      "step": 20860
    },
    {
      "epoch": 0.0731441468911109,
      "grad_norm": 3.421875,
      "learning_rate": 1.2189978854893166e-05,
      "loss": 1.1192,
      "step": 20870
    },
    {
      "epoch": 0.07317919439800649,
      "grad_norm": 3.5625,
      "learning_rate": 1.219582004462669e-05,
      "loss": 1.1081,
      "step": 20880
    },
    {
      "epoch": 0.0732142419049021,
      "grad_norm": 3.828125,
      "learning_rate": 1.2201661234360215e-05,
      "loss": 1.0627,
      "step": 20890
    },
    {
      "epoch": 0.07324928941179769,
      "grad_norm": 3.890625,
      "learning_rate": 1.220750242409374e-05,
      "loss": 1.0746,
      "step": 20900
    },
    {
      "epoch": 0.0732843369186933,
      "grad_norm": 3.21875,
      "learning_rate": 1.2213343613827265e-05,
      "loss": 0.9967,
      "step": 20910
    },
    {
      "epoch": 0.07331938442558888,
      "grad_norm": 3.59375,
      "learning_rate": 1.221918480356079e-05,
      "loss": 1.0081,
      "step": 20920
    },
    {
      "epoch": 0.07335443193248448,
      "grad_norm": 3.375,
      "learning_rate": 1.2225025993294315e-05,
      "loss": 1.0639,
      "step": 20930
    },
    {
      "epoch": 0.07338947943938008,
      "grad_norm": 2.96875,
      "learning_rate": 1.223086718302784e-05,
      "loss": 1.0439,
      "step": 20940
    },
    {
      "epoch": 0.07342452694627567,
      "grad_norm": 3.515625,
      "learning_rate": 1.2236708372761364e-05,
      "loss": 1.0578,
      "step": 20950
    },
    {
      "epoch": 0.07345957445317128,
      "grad_norm": 3.765625,
      "learning_rate": 1.224254956249489e-05,
      "loss": 1.0439,
      "step": 20960
    },
    {
      "epoch": 0.07349462196006687,
      "grad_norm": 3.6875,
      "learning_rate": 1.2248390752228414e-05,
      "loss": 1.0517,
      "step": 20970
    },
    {
      "epoch": 0.07352966946696247,
      "grad_norm": 3.3125,
      "learning_rate": 1.2254231941961939e-05,
      "loss": 1.018,
      "step": 20980
    },
    {
      "epoch": 0.07356471697385807,
      "grad_norm": 3.625,
      "learning_rate": 1.2260073131695464e-05,
      "loss": 1.0643,
      "step": 20990
    },
    {
      "epoch": 0.07359976448075366,
      "grad_norm": 3.3125,
      "learning_rate": 1.226591432142899e-05,
      "loss": 1.1468,
      "step": 21000
    },
    {
      "epoch": 0.07363481198764926,
      "grad_norm": 3.296875,
      "learning_rate": 1.2271755511162513e-05,
      "loss": 1.0508,
      "step": 21010
    },
    {
      "epoch": 0.07366985949454485,
      "grad_norm": 3.03125,
      "learning_rate": 1.227759670089604e-05,
      "loss": 1.041,
      "step": 21020
    },
    {
      "epoch": 0.07370490700144046,
      "grad_norm": 3.484375,
      "learning_rate": 1.2283437890629563e-05,
      "loss": 1.0556,
      "step": 21030
    },
    {
      "epoch": 0.07373995450833605,
      "grad_norm": 3.6875,
      "learning_rate": 1.228927908036309e-05,
      "loss": 1.0333,
      "step": 21040
    },
    {
      "epoch": 0.07377500201523164,
      "grad_norm": 2.90625,
      "learning_rate": 1.2295120270096613e-05,
      "loss": 0.9887,
      "step": 21050
    },
    {
      "epoch": 0.07381004952212725,
      "grad_norm": 3.1875,
      "learning_rate": 1.230096145983014e-05,
      "loss": 1.0089,
      "step": 21060
    },
    {
      "epoch": 0.07384509702902284,
      "grad_norm": 3.53125,
      "learning_rate": 1.2306802649563663e-05,
      "loss": 1.0858,
      "step": 21070
    },
    {
      "epoch": 0.07388014453591844,
      "grad_norm": 3.328125,
      "learning_rate": 1.2312643839297189e-05,
      "loss": 1.0877,
      "step": 21080
    },
    {
      "epoch": 0.07391519204281403,
      "grad_norm": 3.125,
      "learning_rate": 1.2318485029030712e-05,
      "loss": 1.1286,
      "step": 21090
    },
    {
      "epoch": 0.07395023954970963,
      "grad_norm": 3.34375,
      "learning_rate": 1.2324326218764239e-05,
      "loss": 1.0343,
      "step": 21100
    },
    {
      "epoch": 0.07398528705660523,
      "grad_norm": 3.765625,
      "learning_rate": 1.2330167408497764e-05,
      "loss": 1.0931,
      "step": 21110
    },
    {
      "epoch": 0.07402033456350082,
      "grad_norm": 3.375,
      "learning_rate": 1.2336008598231288e-05,
      "loss": 1.1047,
      "step": 21120
    },
    {
      "epoch": 0.07405538207039643,
      "grad_norm": 3.640625,
      "learning_rate": 1.2341849787964813e-05,
      "loss": 1.0571,
      "step": 21130
    },
    {
      "epoch": 0.07409042957729202,
      "grad_norm": 4.125,
      "learning_rate": 1.2347690977698338e-05,
      "loss": 1.0775,
      "step": 21140
    },
    {
      "epoch": 0.07412547708418761,
      "grad_norm": 3.296875,
      "learning_rate": 1.2353532167431863e-05,
      "loss": 1.0332,
      "step": 21150
    },
    {
      "epoch": 0.07416052459108322,
      "grad_norm": 3.46875,
      "learning_rate": 1.2359373357165388e-05,
      "loss": 1.0486,
      "step": 21160
    },
    {
      "epoch": 0.0741955720979788,
      "grad_norm": 3.296875,
      "learning_rate": 1.2365214546898914e-05,
      "loss": 1.0423,
      "step": 21170
    },
    {
      "epoch": 0.07423061960487441,
      "grad_norm": 3.25,
      "learning_rate": 1.2371055736632438e-05,
      "loss": 1.0238,
      "step": 21180
    },
    {
      "epoch": 0.07426566711177,
      "grad_norm": 3.8125,
      "learning_rate": 1.2376896926365964e-05,
      "loss": 1.0534,
      "step": 21190
    },
    {
      "epoch": 0.0743007146186656,
      "grad_norm": 3.4375,
      "learning_rate": 1.2382738116099487e-05,
      "loss": 1.0321,
      "step": 21200
    },
    {
      "epoch": 0.0743357621255612,
      "grad_norm": 3.515625,
      "learning_rate": 1.2388579305833014e-05,
      "loss": 0.9799,
      "step": 21210
    },
    {
      "epoch": 0.07437080963245679,
      "grad_norm": 4.03125,
      "learning_rate": 1.2394420495566537e-05,
      "loss": 1.1425,
      "step": 21220
    },
    {
      "epoch": 0.0744058571393524,
      "grad_norm": 3.4375,
      "learning_rate": 1.2400261685300063e-05,
      "loss": 1.0525,
      "step": 21230
    },
    {
      "epoch": 0.07444090464624799,
      "grad_norm": 3.234375,
      "learning_rate": 1.2406102875033587e-05,
      "loss": 1.0461,
      "step": 21240
    },
    {
      "epoch": 0.07447595215314358,
      "grad_norm": 3.28125,
      "learning_rate": 1.2411944064767113e-05,
      "loss": 1.0628,
      "step": 21250
    },
    {
      "epoch": 0.07451099966003918,
      "grad_norm": 3.59375,
      "learning_rate": 1.2417785254500636e-05,
      "loss": 1.0725,
      "step": 21260
    },
    {
      "epoch": 0.07454604716693478,
      "grad_norm": 3.5625,
      "learning_rate": 1.2423626444234163e-05,
      "loss": 1.0733,
      "step": 21270
    },
    {
      "epoch": 0.07458109467383038,
      "grad_norm": 3.234375,
      "learning_rate": 1.2429467633967688e-05,
      "loss": 1.0518,
      "step": 21280
    },
    {
      "epoch": 0.07461614218072597,
      "grad_norm": 3.453125,
      "learning_rate": 1.2435308823701213e-05,
      "loss": 1.0033,
      "step": 21290
    },
    {
      "epoch": 0.07465118968762158,
      "grad_norm": 3.734375,
      "learning_rate": 1.2441150013434737e-05,
      "loss": 0.9319,
      "step": 21300
    },
    {
      "epoch": 0.07468623719451717,
      "grad_norm": 3.671875,
      "learning_rate": 1.2446991203168262e-05,
      "loss": 0.9904,
      "step": 21310
    },
    {
      "epoch": 0.07472128470141276,
      "grad_norm": 3.421875,
      "learning_rate": 1.2452832392901787e-05,
      "loss": 1.028,
      "step": 21320
    },
    {
      "epoch": 0.07475633220830837,
      "grad_norm": 3.46875,
      "learning_rate": 1.2458673582635312e-05,
      "loss": 1.0568,
      "step": 21330
    },
    {
      "epoch": 0.07479137971520396,
      "grad_norm": 3.359375,
      "learning_rate": 1.2464514772368837e-05,
      "loss": 1.0393,
      "step": 21340
    },
    {
      "epoch": 0.07482642722209956,
      "grad_norm": 3.40625,
      "learning_rate": 1.2470355962102362e-05,
      "loss": 1.0509,
      "step": 21350
    },
    {
      "epoch": 0.07486147472899515,
      "grad_norm": 3.453125,
      "learning_rate": 1.2476197151835886e-05,
      "loss": 1.0566,
      "step": 21360
    },
    {
      "epoch": 0.07489652223589074,
      "grad_norm": 3.359375,
      "learning_rate": 1.2482038341569411e-05,
      "loss": 0.9706,
      "step": 21370
    },
    {
      "epoch": 0.07493156974278635,
      "grad_norm": 3.234375,
      "learning_rate": 1.2487879531302936e-05,
      "loss": 1.0278,
      "step": 21380
    },
    {
      "epoch": 0.07496661724968194,
      "grad_norm": 3.375,
      "learning_rate": 1.2493720721036461e-05,
      "loss": 1.0112,
      "step": 21390
    },
    {
      "epoch": 0.07500166475657755,
      "grad_norm": 3.328125,
      "learning_rate": 1.2499561910769986e-05,
      "loss": 0.9733,
      "step": 21400
    },
    {
      "epoch": 0.07503671226347314,
      "grad_norm": 3.328125,
      "learning_rate": 1.250540310050351e-05,
      "loss": 1.0744,
      "step": 21410
    },
    {
      "epoch": 0.07507175977036873,
      "grad_norm": 2.953125,
      "learning_rate": 1.2511244290237037e-05,
      "loss": 1.0438,
      "step": 21420
    },
    {
      "epoch": 0.07510680727726433,
      "grad_norm": 3.625,
      "learning_rate": 1.251708547997056e-05,
      "loss": 1.1127,
      "step": 21430
    },
    {
      "epoch": 0.07514185478415993,
      "grad_norm": 4.0,
      "learning_rate": 1.2522926669704085e-05,
      "loss": 1.0521,
      "step": 21440
    },
    {
      "epoch": 0.07517690229105553,
      "grad_norm": 3.046875,
      "learning_rate": 1.252876785943761e-05,
      "loss": 1.1133,
      "step": 21450
    },
    {
      "epoch": 0.07521194979795112,
      "grad_norm": 3.484375,
      "learning_rate": 1.2534609049171137e-05,
      "loss": 1.1565,
      "step": 21460
    },
    {
      "epoch": 0.07524699730484671,
      "grad_norm": 3.4375,
      "learning_rate": 1.2540450238904661e-05,
      "loss": 1.0145,
      "step": 21470
    },
    {
      "epoch": 0.07528204481174232,
      "grad_norm": 3.734375,
      "learning_rate": 1.2546291428638185e-05,
      "loss": 1.0953,
      "step": 21480
    },
    {
      "epoch": 0.07531709231863791,
      "grad_norm": 3.578125,
      "learning_rate": 1.255213261837171e-05,
      "loss": 1.0772,
      "step": 21490
    },
    {
      "epoch": 0.07535213982553352,
      "grad_norm": 3.890625,
      "learning_rate": 1.2557973808105236e-05,
      "loss": 1.167,
      "step": 21500
    },
    {
      "epoch": 0.0753871873324291,
      "grad_norm": 3.25,
      "learning_rate": 1.256381499783876e-05,
      "loss": 0.9897,
      "step": 21510
    },
    {
      "epoch": 0.0754222348393247,
      "grad_norm": 3.828125,
      "learning_rate": 1.2569656187572284e-05,
      "loss": 1.1094,
      "step": 21520
    },
    {
      "epoch": 0.0754572823462203,
      "grad_norm": 3.671875,
      "learning_rate": 1.2575497377305812e-05,
      "loss": 1.0472,
      "step": 21530
    },
    {
      "epoch": 0.0754923298531159,
      "grad_norm": 3.71875,
      "learning_rate": 1.2581338567039335e-05,
      "loss": 1.0787,
      "step": 21540
    },
    {
      "epoch": 0.0755273773600115,
      "grad_norm": 3.453125,
      "learning_rate": 1.258717975677286e-05,
      "loss": 1.0217,
      "step": 21550
    },
    {
      "epoch": 0.07556242486690709,
      "grad_norm": 3.015625,
      "learning_rate": 1.2593020946506385e-05,
      "loss": 1.0294,
      "step": 21560
    },
    {
      "epoch": 0.0755974723738027,
      "grad_norm": 3.046875,
      "learning_rate": 1.2598862136239912e-05,
      "loss": 1.0774,
      "step": 21570
    },
    {
      "epoch": 0.07563251988069829,
      "grad_norm": 3.453125,
      "learning_rate": 1.2604703325973435e-05,
      "loss": 1.0166,
      "step": 21580
    },
    {
      "epoch": 0.07566756738759388,
      "grad_norm": 3.046875,
      "learning_rate": 1.261054451570696e-05,
      "loss": 1.0878,
      "step": 21590
    },
    {
      "epoch": 0.07570261489448948,
      "grad_norm": 3.59375,
      "learning_rate": 1.2616385705440484e-05,
      "loss": 1.0027,
      "step": 21600
    },
    {
      "epoch": 0.07573766240138508,
      "grad_norm": 3.140625,
      "learning_rate": 1.2622226895174011e-05,
      "loss": 0.9793,
      "step": 21610
    },
    {
      "epoch": 0.07577270990828068,
      "grad_norm": 3.34375,
      "learning_rate": 1.2628068084907536e-05,
      "loss": 1.0272,
      "step": 21620
    },
    {
      "epoch": 0.07580775741517627,
      "grad_norm": 4.5,
      "learning_rate": 1.2633909274641059e-05,
      "loss": 1.2008,
      "step": 21630
    },
    {
      "epoch": 0.07584280492207186,
      "grad_norm": 3.28125,
      "learning_rate": 1.2639750464374584e-05,
      "loss": 1.0739,
      "step": 21640
    },
    {
      "epoch": 0.07587785242896747,
      "grad_norm": 3.671875,
      "learning_rate": 1.264559165410811e-05,
      "loss": 1.04,
      "step": 21650
    },
    {
      "epoch": 0.07591289993586306,
      "grad_norm": 3.546875,
      "learning_rate": 1.2651432843841635e-05,
      "loss": 1.1139,
      "step": 21660
    },
    {
      "epoch": 0.07594794744275866,
      "grad_norm": 3.703125,
      "learning_rate": 1.2657274033575158e-05,
      "loss": 1.0308,
      "step": 21670
    },
    {
      "epoch": 0.07598299494965426,
      "grad_norm": 3.640625,
      "learning_rate": 1.2663115223308683e-05,
      "loss": 1.1069,
      "step": 21680
    },
    {
      "epoch": 0.07601804245654985,
      "grad_norm": 3.515625,
      "learning_rate": 1.266895641304221e-05,
      "loss": 1.0518,
      "step": 21690
    },
    {
      "epoch": 0.07605308996344545,
      "grad_norm": 3.28125,
      "learning_rate": 1.2674797602775735e-05,
      "loss": 1.0612,
      "step": 21700
    },
    {
      "epoch": 0.07608813747034104,
      "grad_norm": 3.65625,
      "learning_rate": 1.2680638792509258e-05,
      "loss": 1.0846,
      "step": 21710
    },
    {
      "epoch": 0.07612318497723665,
      "grad_norm": 3.46875,
      "learning_rate": 1.2686479982242783e-05,
      "loss": 1.0646,
      "step": 21720
    },
    {
      "epoch": 0.07615823248413224,
      "grad_norm": 3.25,
      "learning_rate": 1.2692321171976309e-05,
      "loss": 1.0461,
      "step": 21730
    },
    {
      "epoch": 0.07619327999102783,
      "grad_norm": 3.390625,
      "learning_rate": 1.2698162361709834e-05,
      "loss": 1.0307,
      "step": 21740
    },
    {
      "epoch": 0.07622832749792344,
      "grad_norm": 3.40625,
      "learning_rate": 1.2704003551443359e-05,
      "loss": 1.0247,
      "step": 21750
    },
    {
      "epoch": 0.07626337500481903,
      "grad_norm": 3.421875,
      "learning_rate": 1.2709844741176882e-05,
      "loss": 0.9977,
      "step": 21760
    },
    {
      "epoch": 0.07629842251171463,
      "grad_norm": 3.46875,
      "learning_rate": 1.2715685930910408e-05,
      "loss": 1.0536,
      "step": 21770
    },
    {
      "epoch": 0.07633347001861022,
      "grad_norm": 3.375,
      "learning_rate": 1.2721527120643933e-05,
      "loss": 1.0682,
      "step": 21780
    },
    {
      "epoch": 0.07636851752550582,
      "grad_norm": 4.125,
      "learning_rate": 1.2727368310377458e-05,
      "loss": 1.1452,
      "step": 21790
    },
    {
      "epoch": 0.07640356503240142,
      "grad_norm": 3.3125,
      "learning_rate": 1.2733209500110981e-05,
      "loss": 1.1098,
      "step": 21800
    },
    {
      "epoch": 0.07643861253929701,
      "grad_norm": 3.578125,
      "learning_rate": 1.273905068984451e-05,
      "loss": 1.0591,
      "step": 21810
    },
    {
      "epoch": 0.07647366004619262,
      "grad_norm": 3.5625,
      "learning_rate": 1.2744891879578033e-05,
      "loss": 0.9839,
      "step": 21820
    },
    {
      "epoch": 0.07650870755308821,
      "grad_norm": 3.234375,
      "learning_rate": 1.2750733069311558e-05,
      "loss": 1.0047,
      "step": 21830
    },
    {
      "epoch": 0.07654375505998381,
      "grad_norm": 3.125,
      "learning_rate": 1.2756574259045084e-05,
      "loss": 1.151,
      "step": 21840
    },
    {
      "epoch": 0.0765788025668794,
      "grad_norm": 3.578125,
      "learning_rate": 1.2762415448778609e-05,
      "loss": 1.0519,
      "step": 21850
    },
    {
      "epoch": 0.076613850073775,
      "grad_norm": 2.9375,
      "learning_rate": 1.2768256638512132e-05,
      "loss": 0.9897,
      "step": 21860
    },
    {
      "epoch": 0.0766488975806706,
      "grad_norm": 3.4375,
      "learning_rate": 1.2774097828245657e-05,
      "loss": 0.9849,
      "step": 21870
    },
    {
      "epoch": 0.0766839450875662,
      "grad_norm": 3.578125,
      "learning_rate": 1.2779939017979183e-05,
      "loss": 1.0062,
      "step": 21880
    },
    {
      "epoch": 0.0767189925944618,
      "grad_norm": 3.515625,
      "learning_rate": 1.2785780207712708e-05,
      "loss": 0.9378,
      "step": 21890
    },
    {
      "epoch": 0.07675404010135739,
      "grad_norm": 3.46875,
      "learning_rate": 1.2791621397446233e-05,
      "loss": 1.1356,
      "step": 21900
    },
    {
      "epoch": 0.07678908760825298,
      "grad_norm": 3.359375,
      "learning_rate": 1.2797462587179756e-05,
      "loss": 1.0089,
      "step": 21910
    },
    {
      "epoch": 0.07682413511514859,
      "grad_norm": 3.171875,
      "learning_rate": 1.2803303776913283e-05,
      "loss": 1.142,
      "step": 21920
    },
    {
      "epoch": 0.07685918262204418,
      "grad_norm": 3.5625,
      "learning_rate": 1.2809144966646808e-05,
      "loss": 1.0402,
      "step": 21930
    },
    {
      "epoch": 0.07689423012893978,
      "grad_norm": 3.828125,
      "learning_rate": 1.2814986156380332e-05,
      "loss": 1.0793,
      "step": 21940
    },
    {
      "epoch": 0.07692927763583537,
      "grad_norm": 3.53125,
      "learning_rate": 1.2820827346113856e-05,
      "loss": 1.061,
      "step": 21950
    },
    {
      "epoch": 0.07696432514273097,
      "grad_norm": 3.25,
      "learning_rate": 1.2826668535847384e-05,
      "loss": 1.0376,
      "step": 21960
    },
    {
      "epoch": 0.07699937264962657,
      "grad_norm": 3.625,
      "learning_rate": 1.2832509725580907e-05,
      "loss": 1.1075,
      "step": 21970
    },
    {
      "epoch": 0.07703442015652216,
      "grad_norm": 3.375,
      "learning_rate": 1.2838350915314432e-05,
      "loss": 1.101,
      "step": 21980
    },
    {
      "epoch": 0.07706946766341777,
      "grad_norm": 3.703125,
      "learning_rate": 1.2844192105047955e-05,
      "loss": 1.0865,
      "step": 21990
    },
    {
      "epoch": 0.07710451517031336,
      "grad_norm": 3.375,
      "learning_rate": 1.2850033294781483e-05,
      "loss": 1.0983,
      "step": 22000
    },
    {
      "epoch": 0.07713956267720895,
      "grad_norm": 3.609375,
      "learning_rate": 1.2855874484515006e-05,
      "loss": 1.0938,
      "step": 22010
    },
    {
      "epoch": 0.07717461018410456,
      "grad_norm": 3.1875,
      "learning_rate": 1.2861715674248531e-05,
      "loss": 1.0258,
      "step": 22020
    },
    {
      "epoch": 0.07720965769100015,
      "grad_norm": 3.328125,
      "learning_rate": 1.2867556863982056e-05,
      "loss": 1.1001,
      "step": 22030
    },
    {
      "epoch": 0.07724470519789575,
      "grad_norm": 3.6875,
      "learning_rate": 1.2873398053715583e-05,
      "loss": 1.0257,
      "step": 22040
    },
    {
      "epoch": 0.07727975270479134,
      "grad_norm": 3.296875,
      "learning_rate": 1.2879239243449106e-05,
      "loss": 0.9298,
      "step": 22050
    },
    {
      "epoch": 0.07731480021168693,
      "grad_norm": 3.921875,
      "learning_rate": 1.288508043318263e-05,
      "loss": 1.1454,
      "step": 22060
    },
    {
      "epoch": 0.07734984771858254,
      "grad_norm": 3.5,
      "learning_rate": 1.2890921622916155e-05,
      "loss": 1.0969,
      "step": 22070
    },
    {
      "epoch": 0.07738489522547813,
      "grad_norm": 3.78125,
      "learning_rate": 1.2896762812649682e-05,
      "loss": 1.0354,
      "step": 22080
    },
    {
      "epoch": 0.07741994273237374,
      "grad_norm": 3.390625,
      "learning_rate": 1.2902604002383207e-05,
      "loss": 1.0272,
      "step": 22090
    },
    {
      "epoch": 0.07745499023926933,
      "grad_norm": 3.328125,
      "learning_rate": 1.290844519211673e-05,
      "loss": 1.0699,
      "step": 22100
    },
    {
      "epoch": 0.07749003774616493,
      "grad_norm": 3.390625,
      "learning_rate": 1.2914286381850255e-05,
      "loss": 1.0545,
      "step": 22110
    },
    {
      "epoch": 0.07752508525306052,
      "grad_norm": 3.328125,
      "learning_rate": 1.2920127571583781e-05,
      "loss": 1.0286,
      "step": 22120
    },
    {
      "epoch": 0.07756013275995612,
      "grad_norm": 3.09375,
      "learning_rate": 1.2925968761317306e-05,
      "loss": 1.0726,
      "step": 22130
    },
    {
      "epoch": 0.07759518026685172,
      "grad_norm": 3.28125,
      "learning_rate": 1.293180995105083e-05,
      "loss": 1.043,
      "step": 22140
    },
    {
      "epoch": 0.07763022777374731,
      "grad_norm": 3.5625,
      "learning_rate": 1.2937651140784354e-05,
      "loss": 1.0066,
      "step": 22150
    },
    {
      "epoch": 0.07766527528064292,
      "grad_norm": 3.3125,
      "learning_rate": 1.294349233051788e-05,
      "loss": 0.9995,
      "step": 22160
    },
    {
      "epoch": 0.07770032278753851,
      "grad_norm": 3.0625,
      "learning_rate": 1.2949333520251406e-05,
      "loss": 1.1048,
      "step": 22170
    },
    {
      "epoch": 0.0777353702944341,
      "grad_norm": 3.421875,
      "learning_rate": 1.295517470998493e-05,
      "loss": 1.0538,
      "step": 22180
    },
    {
      "epoch": 0.0777704178013297,
      "grad_norm": 3.640625,
      "learning_rate": 1.2961015899718457e-05,
      "loss": 1.044,
      "step": 22190
    },
    {
      "epoch": 0.0778054653082253,
      "grad_norm": 3.75,
      "learning_rate": 1.296685708945198e-05,
      "loss": 1.0182,
      "step": 22200
    },
    {
      "epoch": 0.0778405128151209,
      "grad_norm": 3.375,
      "learning_rate": 1.2972698279185505e-05,
      "loss": 1.0228,
      "step": 22210
    },
    {
      "epoch": 0.0778755603220165,
      "grad_norm": 3.46875,
      "learning_rate": 1.297853946891903e-05,
      "loss": 1.0175,
      "step": 22220
    },
    {
      "epoch": 0.07791060782891208,
      "grad_norm": 3.515625,
      "learning_rate": 1.2984380658652556e-05,
      "loss": 1.0842,
      "step": 22230
    },
    {
      "epoch": 0.07794565533580769,
      "grad_norm": 3.625,
      "learning_rate": 1.2990221848386081e-05,
      "loss": 1.0352,
      "step": 22240
    },
    {
      "epoch": 0.07798070284270328,
      "grad_norm": 3.125,
      "learning_rate": 1.2996063038119604e-05,
      "loss": 0.9786,
      "step": 22250
    },
    {
      "epoch": 0.07801575034959889,
      "grad_norm": 3.84375,
      "learning_rate": 1.300190422785313e-05,
      "loss": 1.032,
      "step": 22260
    },
    {
      "epoch": 0.07805079785649448,
      "grad_norm": 3.453125,
      "learning_rate": 1.3007745417586656e-05,
      "loss": 1.0037,
      "step": 22270
    },
    {
      "epoch": 0.07808584536339007,
      "grad_norm": 3.5625,
      "learning_rate": 1.301358660732018e-05,
      "loss": 1.1114,
      "step": 22280
    },
    {
      "epoch": 0.07812089287028567,
      "grad_norm": 3.5625,
      "learning_rate": 1.3019427797053704e-05,
      "loss": 1.102,
      "step": 22290
    },
    {
      "epoch": 0.07815594037718127,
      "grad_norm": 3.578125,
      "learning_rate": 1.3025268986787229e-05,
      "loss": 1.0692,
      "step": 22300
    },
    {
      "epoch": 0.07819098788407687,
      "grad_norm": 3.671875,
      "learning_rate": 1.3031110176520755e-05,
      "loss": 0.994,
      "step": 22310
    },
    {
      "epoch": 0.07822603539097246,
      "grad_norm": 3.171875,
      "learning_rate": 1.303695136625428e-05,
      "loss": 1.0673,
      "step": 22320
    },
    {
      "epoch": 0.07826108289786805,
      "grad_norm": 3.734375,
      "learning_rate": 1.3042792555987803e-05,
      "loss": 1.0552,
      "step": 22330
    },
    {
      "epoch": 0.07829613040476366,
      "grad_norm": 2.96875,
      "learning_rate": 1.3048633745721328e-05,
      "loss": 0.9618,
      "step": 22340
    },
    {
      "epoch": 0.07833117791165925,
      "grad_norm": 4.0,
      "learning_rate": 1.3054474935454855e-05,
      "loss": 1.1318,
      "step": 22350
    },
    {
      "epoch": 0.07836622541855486,
      "grad_norm": 3.21875,
      "learning_rate": 1.306031612518838e-05,
      "loss": 0.9926,
      "step": 22360
    },
    {
      "epoch": 0.07840127292545045,
      "grad_norm": 2.6875,
      "learning_rate": 1.3066157314921904e-05,
      "loss": 1.0291,
      "step": 22370
    },
    {
      "epoch": 0.07843632043234605,
      "grad_norm": 3.5625,
      "learning_rate": 1.3071998504655427e-05,
      "loss": 1.0532,
      "step": 22380
    },
    {
      "epoch": 0.07847136793924164,
      "grad_norm": 3.609375,
      "learning_rate": 1.3077839694388954e-05,
      "loss": 0.9659,
      "step": 22390
    },
    {
      "epoch": 0.07850641544613723,
      "grad_norm": 3.703125,
      "learning_rate": 1.3083680884122479e-05,
      "loss": 0.9855,
      "step": 22400
    },
    {
      "epoch": 0.07854146295303284,
      "grad_norm": 3.40625,
      "learning_rate": 1.3089522073856004e-05,
      "loss": 1.0335,
      "step": 22410
    },
    {
      "epoch": 0.07857651045992843,
      "grad_norm": 3.140625,
      "learning_rate": 1.3095363263589527e-05,
      "loss": 0.9631,
      "step": 22420
    },
    {
      "epoch": 0.07861155796682404,
      "grad_norm": 3.125,
      "learning_rate": 1.3101204453323055e-05,
      "loss": 1.0533,
      "step": 22430
    },
    {
      "epoch": 0.07864660547371963,
      "grad_norm": 3.359375,
      "learning_rate": 1.3107045643056578e-05,
      "loss": 1.05,
      "step": 22440
    },
    {
      "epoch": 0.07868165298061522,
      "grad_norm": 3.734375,
      "learning_rate": 1.3112886832790103e-05,
      "loss": 1.078,
      "step": 22450
    },
    {
      "epoch": 0.07871670048751082,
      "grad_norm": 3.828125,
      "learning_rate": 1.3118728022523628e-05,
      "loss": 1.0465,
      "step": 22460
    },
    {
      "epoch": 0.07875174799440642,
      "grad_norm": 3.703125,
      "learning_rate": 1.3124569212257154e-05,
      "loss": 1.1187,
      "step": 22470
    },
    {
      "epoch": 0.07878679550130202,
      "grad_norm": 3.40625,
      "learning_rate": 1.3130410401990677e-05,
      "loss": 1.0753,
      "step": 22480
    },
    {
      "epoch": 0.07882184300819761,
      "grad_norm": 3.4375,
      "learning_rate": 1.3136251591724202e-05,
      "loss": 0.9813,
      "step": 22490
    },
    {
      "epoch": 0.0788568905150932,
      "grad_norm": 3.6875,
      "learning_rate": 1.3142092781457727e-05,
      "loss": 0.9762,
      "step": 22500
    },
    {
      "epoch": 0.07889193802198881,
      "grad_norm": 3.8125,
      "learning_rate": 1.3147933971191254e-05,
      "loss": 1.06,
      "step": 22510
    },
    {
      "epoch": 0.0789269855288844,
      "grad_norm": 3.21875,
      "learning_rate": 1.3153775160924779e-05,
      "loss": 1.0758,
      "step": 22520
    },
    {
      "epoch": 0.07896203303578,
      "grad_norm": 3.359375,
      "learning_rate": 1.3159616350658302e-05,
      "loss": 1.0425,
      "step": 22530
    },
    {
      "epoch": 0.0789970805426756,
      "grad_norm": 3.3125,
      "learning_rate": 1.3165457540391828e-05,
      "loss": 1.0084,
      "step": 22540
    },
    {
      "epoch": 0.07903212804957119,
      "grad_norm": 3.78125,
      "learning_rate": 1.3171298730125353e-05,
      "loss": 1.116,
      "step": 22550
    },
    {
      "epoch": 0.07906717555646679,
      "grad_norm": 3.109375,
      "learning_rate": 1.3177139919858878e-05,
      "loss": 1.0523,
      "step": 22560
    },
    {
      "epoch": 0.07910222306336238,
      "grad_norm": 3.328125,
      "learning_rate": 1.3182981109592401e-05,
      "loss": 1.024,
      "step": 22570
    },
    {
      "epoch": 0.07913727057025799,
      "grad_norm": 3.375,
      "learning_rate": 1.318882229932593e-05,
      "loss": 0.9982,
      "step": 22580
    },
    {
      "epoch": 0.07917231807715358,
      "grad_norm": 3.46875,
      "learning_rate": 1.3194663489059452e-05,
      "loss": 1.0667,
      "step": 22590
    },
    {
      "epoch": 0.07920736558404917,
      "grad_norm": 3.734375,
      "learning_rate": 1.3200504678792977e-05,
      "loss": 1.057,
      "step": 22600
    },
    {
      "epoch": 0.07924241309094478,
      "grad_norm": 3.203125,
      "learning_rate": 1.32063458685265e-05,
      "loss": 1.1171,
      "step": 22610
    },
    {
      "epoch": 0.07927746059784037,
      "grad_norm": 3.359375,
      "learning_rate": 1.3212187058260029e-05,
      "loss": 1.0901,
      "step": 22620
    },
    {
      "epoch": 0.07931250810473597,
      "grad_norm": 3.75,
      "learning_rate": 1.3218028247993552e-05,
      "loss": 1.0034,
      "step": 22630
    },
    {
      "epoch": 0.07934755561163157,
      "grad_norm": 3.375,
      "learning_rate": 1.3223869437727077e-05,
      "loss": 0.9887,
      "step": 22640
    },
    {
      "epoch": 0.07938260311852717,
      "grad_norm": 3.296875,
      "learning_rate": 1.3229710627460602e-05,
      "loss": 0.9992,
      "step": 22650
    },
    {
      "epoch": 0.07941765062542276,
      "grad_norm": 3.46875,
      "learning_rate": 1.3235551817194128e-05,
      "loss": 1.0253,
      "step": 22660
    },
    {
      "epoch": 0.07945269813231835,
      "grad_norm": 4.0,
      "learning_rate": 1.3241393006927651e-05,
      "loss": 1.1461,
      "step": 22670
    },
    {
      "epoch": 0.07948774563921396,
      "grad_norm": 4.15625,
      "learning_rate": 1.3247234196661176e-05,
      "loss": 1.0381,
      "step": 22680
    },
    {
      "epoch": 0.07952279314610955,
      "grad_norm": 3.015625,
      "learning_rate": 1.3253075386394701e-05,
      "loss": 1.0135,
      "step": 22690
    },
    {
      "epoch": 0.07955784065300515,
      "grad_norm": 3.78125,
      "learning_rate": 1.3258916576128227e-05,
      "loss": 1.0366,
      "step": 22700
    },
    {
      "epoch": 0.07959288815990075,
      "grad_norm": 3.375,
      "learning_rate": 1.3264757765861752e-05,
      "loss": 1.0601,
      "step": 22710
    },
    {
      "epoch": 0.07962793566679634,
      "grad_norm": 3.4375,
      "learning_rate": 1.3270598955595275e-05,
      "loss": 1.1022,
      "step": 22720
    },
    {
      "epoch": 0.07966298317369194,
      "grad_norm": 3.140625,
      "learning_rate": 1.32764401453288e-05,
      "loss": 1.0964,
      "step": 22730
    },
    {
      "epoch": 0.07969803068058753,
      "grad_norm": 3.5,
      "learning_rate": 1.3282281335062327e-05,
      "loss": 1.0724,
      "step": 22740
    },
    {
      "epoch": 0.07973307818748314,
      "grad_norm": 3.375,
      "learning_rate": 1.3288122524795852e-05,
      "loss": 1.0194,
      "step": 22750
    },
    {
      "epoch": 0.07976812569437873,
      "grad_norm": 3.328125,
      "learning_rate": 1.3293963714529375e-05,
      "loss": 1.0572,
      "step": 22760
    },
    {
      "epoch": 0.07980317320127432,
      "grad_norm": 3.34375,
      "learning_rate": 1.32998049042629e-05,
      "loss": 1.0182,
      "step": 22770
    },
    {
      "epoch": 0.07983822070816993,
      "grad_norm": 3.515625,
      "learning_rate": 1.3305646093996426e-05,
      "loss": 0.995,
      "step": 22780
    },
    {
      "epoch": 0.07987326821506552,
      "grad_norm": 3.359375,
      "learning_rate": 1.3311487283729951e-05,
      "loss": 1.0436,
      "step": 22790
    },
    {
      "epoch": 0.07990831572196112,
      "grad_norm": 3.4375,
      "learning_rate": 1.3317328473463476e-05,
      "loss": 1.0503,
      "step": 22800
    },
    {
      "epoch": 0.07994336322885671,
      "grad_norm": 3.28125,
      "learning_rate": 1.3323169663196999e-05,
      "loss": 1.0796,
      "step": 22810
    },
    {
      "epoch": 0.0799784107357523,
      "grad_norm": 3.359375,
      "learning_rate": 1.3329010852930526e-05,
      "loss": 0.998,
      "step": 22820
    },
    {
      "epoch": 0.08001345824264791,
      "grad_norm": 3.5,
      "learning_rate": 1.333485204266405e-05,
      "loss": 1.0538,
      "step": 22830
    },
    {
      "epoch": 0.0800485057495435,
      "grad_norm": 3.078125,
      "learning_rate": 1.3340693232397575e-05,
      "loss": 1.024,
      "step": 22840
    },
    {
      "epoch": 0.08008355325643911,
      "grad_norm": 3.671875,
      "learning_rate": 1.3346534422131098e-05,
      "loss": 1.1073,
      "step": 22850
    },
    {
      "epoch": 0.0801186007633347,
      "grad_norm": 3.046875,
      "learning_rate": 1.3352375611864627e-05,
      "loss": 1.03,
      "step": 22860
    },
    {
      "epoch": 0.08015364827023029,
      "grad_norm": 3.625,
      "learning_rate": 1.335821680159815e-05,
      "loss": 1.1103,
      "step": 22870
    },
    {
      "epoch": 0.0801886957771259,
      "grad_norm": 3.71875,
      "learning_rate": 1.3364057991331675e-05,
      "loss": 1.0698,
      "step": 22880
    },
    {
      "epoch": 0.08022374328402149,
      "grad_norm": 4.34375,
      "learning_rate": 1.3369899181065201e-05,
      "loss": 1.0233,
      "step": 22890
    },
    {
      "epoch": 0.08025879079091709,
      "grad_norm": 3.59375,
      "learning_rate": 1.3375740370798726e-05,
      "loss": 1.0169,
      "step": 22900
    },
    {
      "epoch": 0.08029383829781268,
      "grad_norm": 2.96875,
      "learning_rate": 1.338158156053225e-05,
      "loss": 1.0283,
      "step": 22910
    },
    {
      "epoch": 0.08032888580470829,
      "grad_norm": 3.390625,
      "learning_rate": 1.3387422750265774e-05,
      "loss": 1.2038,
      "step": 22920
    },
    {
      "epoch": 0.08036393331160388,
      "grad_norm": 3.578125,
      "learning_rate": 1.33932639399993e-05,
      "loss": 1.0331,
      "step": 22930
    },
    {
      "epoch": 0.08039898081849947,
      "grad_norm": 3.484375,
      "learning_rate": 1.3399105129732825e-05,
      "loss": 1.0253,
      "step": 22940
    },
    {
      "epoch": 0.08043402832539508,
      "grad_norm": 3.1875,
      "learning_rate": 1.3404946319466349e-05,
      "loss": 1.0601,
      "step": 22950
    },
    {
      "epoch": 0.08046907583229067,
      "grad_norm": 3.03125,
      "learning_rate": 1.3410787509199873e-05,
      "loss": 1.0227,
      "step": 22960
    },
    {
      "epoch": 0.08050412333918627,
      "grad_norm": 3.734375,
      "learning_rate": 1.34166286989334e-05,
      "loss": 1.0261,
      "step": 22970
    },
    {
      "epoch": 0.08053917084608186,
      "grad_norm": 3.09375,
      "learning_rate": 1.3422469888666925e-05,
      "loss": 1.0079,
      "step": 22980
    },
    {
      "epoch": 0.08057421835297746,
      "grad_norm": 3.6875,
      "learning_rate": 1.342831107840045e-05,
      "loss": 1.0878,
      "step": 22990
    },
    {
      "epoch": 0.08060926585987306,
      "grad_norm": 7.375,
      "learning_rate": 1.3434152268133973e-05,
      "loss": 1.0458,
      "step": 23000
    },
    {
      "epoch": 0.08064431336676865,
      "grad_norm": 3.078125,
      "learning_rate": 1.3439993457867501e-05,
      "loss": 1.107,
      "step": 23010
    },
    {
      "epoch": 0.08067936087366426,
      "grad_norm": 3.8125,
      "learning_rate": 1.3445834647601024e-05,
      "loss": 1.1151,
      "step": 23020
    },
    {
      "epoch": 0.08071440838055985,
      "grad_norm": 3.71875,
      "learning_rate": 1.3451675837334549e-05,
      "loss": 0.9899,
      "step": 23030
    },
    {
      "epoch": 0.08074945588745544,
      "grad_norm": 3.578125,
      "learning_rate": 1.3457517027068072e-05,
      "loss": 1.0233,
      "step": 23040
    },
    {
      "epoch": 0.08078450339435105,
      "grad_norm": 3.828125,
      "learning_rate": 1.34633582168016e-05,
      "loss": 1.0558,
      "step": 23050
    },
    {
      "epoch": 0.08081955090124664,
      "grad_norm": 3.71875,
      "learning_rate": 1.3469199406535124e-05,
      "loss": 1.0115,
      "step": 23060
    },
    {
      "epoch": 0.08085459840814224,
      "grad_norm": 3.578125,
      "learning_rate": 1.3475040596268648e-05,
      "loss": 1.0544,
      "step": 23070
    },
    {
      "epoch": 0.08088964591503783,
      "grad_norm": 3.734375,
      "learning_rate": 1.3480881786002173e-05,
      "loss": 1.0793,
      "step": 23080
    },
    {
      "epoch": 0.08092469342193342,
      "grad_norm": 3.046875,
      "learning_rate": 1.34867229757357e-05,
      "loss": 1.0925,
      "step": 23090
    },
    {
      "epoch": 0.08095974092882903,
      "grad_norm": 3.40625,
      "learning_rate": 1.3492564165469223e-05,
      "loss": 1.0683,
      "step": 23100
    },
    {
      "epoch": 0.08099478843572462,
      "grad_norm": 3.4375,
      "learning_rate": 1.3498405355202748e-05,
      "loss": 1.0658,
      "step": 23110
    },
    {
      "epoch": 0.08102983594262023,
      "grad_norm": 3.5625,
      "learning_rate": 1.3504246544936273e-05,
      "loss": 1.0231,
      "step": 23120
    },
    {
      "epoch": 0.08106488344951582,
      "grad_norm": 3.421875,
      "learning_rate": 1.35100877346698e-05,
      "loss": 1.0603,
      "step": 23130
    },
    {
      "epoch": 0.08109993095641141,
      "grad_norm": 3.484375,
      "learning_rate": 1.3515928924403324e-05,
      "loss": 1.1007,
      "step": 23140
    },
    {
      "epoch": 0.08113497846330701,
      "grad_norm": 3.34375,
      "learning_rate": 1.3521770114136847e-05,
      "loss": 1.0162,
      "step": 23150
    },
    {
      "epoch": 0.0811700259702026,
      "grad_norm": 3.765625,
      "learning_rate": 1.3527611303870372e-05,
      "loss": 0.9854,
      "step": 23160
    },
    {
      "epoch": 0.08120507347709821,
      "grad_norm": 3.359375,
      "learning_rate": 1.3533452493603899e-05,
      "loss": 1.0718,
      "step": 23170
    },
    {
      "epoch": 0.0812401209839938,
      "grad_norm": 3.3125,
      "learning_rate": 1.3539293683337423e-05,
      "loss": 1.0683,
      "step": 23180
    },
    {
      "epoch": 0.08127516849088941,
      "grad_norm": 3.4375,
      "learning_rate": 1.3545134873070947e-05,
      "loss": 1.0984,
      "step": 23190
    },
    {
      "epoch": 0.081310215997785,
      "grad_norm": 3.59375,
      "learning_rate": 1.3550976062804471e-05,
      "loss": 0.991,
      "step": 23200
    },
    {
      "epoch": 0.08134526350468059,
      "grad_norm": 3.328125,
      "learning_rate": 1.3556817252537998e-05,
      "loss": 1.0136,
      "step": 23210
    },
    {
      "epoch": 0.0813803110115762,
      "grad_norm": 3.15625,
      "learning_rate": 1.3562658442271523e-05,
      "loss": 1.0737,
      "step": 23220
    },
    {
      "epoch": 0.08141535851847179,
      "grad_norm": 3.15625,
      "learning_rate": 1.3568499632005046e-05,
      "loss": 1.0239,
      "step": 23230
    },
    {
      "epoch": 0.08145040602536739,
      "grad_norm": 3.125,
      "learning_rate": 1.3574340821738574e-05,
      "loss": 1.1096,
      "step": 23240
    },
    {
      "epoch": 0.08148545353226298,
      "grad_norm": 2.78125,
      "learning_rate": 1.3580182011472097e-05,
      "loss": 1.0341,
      "step": 23250
    },
    {
      "epoch": 0.08152050103915857,
      "grad_norm": 3.0,
      "learning_rate": 1.3586023201205622e-05,
      "loss": 1.0314,
      "step": 23260
    },
    {
      "epoch": 0.08155554854605418,
      "grad_norm": 3.65625,
      "learning_rate": 1.3591864390939147e-05,
      "loss": 1.0963,
      "step": 23270
    },
    {
      "epoch": 0.08159059605294977,
      "grad_norm": 3.59375,
      "learning_rate": 1.3597705580672674e-05,
      "loss": 1.0993,
      "step": 23280
    },
    {
      "epoch": 0.08162564355984538,
      "grad_norm": 3.109375,
      "learning_rate": 1.3603546770406198e-05,
      "loss": 1.0611,
      "step": 23290
    },
    {
      "epoch": 0.08166069106674097,
      "grad_norm": 3.40625,
      "learning_rate": 1.3609387960139722e-05,
      "loss": 1.0867,
      "step": 23300
    },
    {
      "epoch": 0.08169573857363656,
      "grad_norm": 3.46875,
      "learning_rate": 1.3615229149873246e-05,
      "loss": 1.128,
      "step": 23310
    },
    {
      "epoch": 0.08173078608053216,
      "grad_norm": 3.28125,
      "learning_rate": 1.3621070339606773e-05,
      "loss": 1.0354,
      "step": 23320
    },
    {
      "epoch": 0.08176583358742776,
      "grad_norm": 2.96875,
      "learning_rate": 1.3626911529340298e-05,
      "loss": 1.0674,
      "step": 23330
    },
    {
      "epoch": 0.08180088109432336,
      "grad_norm": 3.578125,
      "learning_rate": 1.3632752719073821e-05,
      "loss": 1.0013,
      "step": 23340
    },
    {
      "epoch": 0.08183592860121895,
      "grad_norm": 3.6875,
      "learning_rate": 1.3638593908807346e-05,
      "loss": 1.0736,
      "step": 23350
    },
    {
      "epoch": 0.08187097610811454,
      "grad_norm": 3.65625,
      "learning_rate": 1.3644435098540872e-05,
      "loss": 1.1594,
      "step": 23360
    },
    {
      "epoch": 0.08190602361501015,
      "grad_norm": 3.328125,
      "learning_rate": 1.3650276288274397e-05,
      "loss": 1.026,
      "step": 23370
    },
    {
      "epoch": 0.08194107112190574,
      "grad_norm": 3.203125,
      "learning_rate": 1.365611747800792e-05,
      "loss": 1.042,
      "step": 23380
    },
    {
      "epoch": 0.08197611862880134,
      "grad_norm": 3.0,
      "learning_rate": 1.3661958667741445e-05,
      "loss": 1.0034,
      "step": 23390
    },
    {
      "epoch": 0.08201116613569694,
      "grad_norm": 3.578125,
      "learning_rate": 1.3667799857474972e-05,
      "loss": 1.0571,
      "step": 23400
    },
    {
      "epoch": 0.08204621364259253,
      "grad_norm": 3.46875,
      "learning_rate": 1.3673641047208496e-05,
      "loss": 1.0345,
      "step": 23410
    },
    {
      "epoch": 0.08208126114948813,
      "grad_norm": 3.28125,
      "learning_rate": 1.3679482236942021e-05,
      "loss": 1.0051,
      "step": 23420
    },
    {
      "epoch": 0.08211630865638372,
      "grad_norm": 3.796875,
      "learning_rate": 1.3685323426675544e-05,
      "loss": 1.0992,
      "step": 23430
    },
    {
      "epoch": 0.08215135616327933,
      "grad_norm": 3.65625,
      "learning_rate": 1.3691164616409071e-05,
      "loss": 0.9988,
      "step": 23440
    },
    {
      "epoch": 0.08218640367017492,
      "grad_norm": 3.40625,
      "learning_rate": 1.3697005806142596e-05,
      "loss": 0.9988,
      "step": 23450
    },
    {
      "epoch": 0.08222145117707053,
      "grad_norm": 3.078125,
      "learning_rate": 1.370284699587612e-05,
      "loss": 1.09,
      "step": 23460
    },
    {
      "epoch": 0.08225649868396612,
      "grad_norm": 3.09375,
      "learning_rate": 1.3708688185609644e-05,
      "loss": 1.1007,
      "step": 23470
    },
    {
      "epoch": 0.08229154619086171,
      "grad_norm": 3.703125,
      "learning_rate": 1.3714529375343172e-05,
      "loss": 1.0362,
      "step": 23480
    },
    {
      "epoch": 0.08232659369775731,
      "grad_norm": 3.0625,
      "learning_rate": 1.3720370565076695e-05,
      "loss": 1.0109,
      "step": 23490
    },
    {
      "epoch": 0.0823616412046529,
      "grad_norm": 3.546875,
      "learning_rate": 1.372621175481022e-05,
      "loss": 1.0808,
      "step": 23500
    },
    {
      "epoch": 0.08239668871154851,
      "grad_norm": 3.46875,
      "learning_rate": 1.3732052944543743e-05,
      "loss": 1.0464,
      "step": 23510
    },
    {
      "epoch": 0.0824317362184441,
      "grad_norm": 3.65625,
      "learning_rate": 1.3737894134277271e-05,
      "loss": 1.0343,
      "step": 23520
    },
    {
      "epoch": 0.08246678372533969,
      "grad_norm": 3.734375,
      "learning_rate": 1.3743735324010795e-05,
      "loss": 1.0714,
      "step": 23530
    },
    {
      "epoch": 0.0825018312322353,
      "grad_norm": 3.375,
      "learning_rate": 1.374957651374432e-05,
      "loss": 1.009,
      "step": 23540
    },
    {
      "epoch": 0.08253687873913089,
      "grad_norm": 3.390625,
      "learning_rate": 1.3755417703477844e-05,
      "loss": 1.0489,
      "step": 23550
    },
    {
      "epoch": 0.0825719262460265,
      "grad_norm": 3.6875,
      "learning_rate": 1.3761258893211371e-05,
      "loss": 1.1209,
      "step": 23560
    },
    {
      "epoch": 0.08260697375292209,
      "grad_norm": 3.46875,
      "learning_rate": 1.3767100082944894e-05,
      "loss": 1.0642,
      "step": 23570
    },
    {
      "epoch": 0.08264202125981768,
      "grad_norm": 3.328125,
      "learning_rate": 1.3772941272678419e-05,
      "loss": 1.0451,
      "step": 23580
    },
    {
      "epoch": 0.08267706876671328,
      "grad_norm": 3.234375,
      "learning_rate": 1.3778782462411945e-05,
      "loss": 0.9368,
      "step": 23590
    },
    {
      "epoch": 0.08271211627360887,
      "grad_norm": 3.28125,
      "learning_rate": 1.378462365214547e-05,
      "loss": 1.0816,
      "step": 23600
    },
    {
      "epoch": 0.08274716378050448,
      "grad_norm": 3.703125,
      "learning_rate": 1.3790464841878995e-05,
      "loss": 1.0489,
      "step": 23610
    },
    {
      "epoch": 0.08278221128740007,
      "grad_norm": 3.890625,
      "learning_rate": 1.3796306031612518e-05,
      "loss": 1.0366,
      "step": 23620
    },
    {
      "epoch": 0.08281725879429566,
      "grad_norm": 3.453125,
      "learning_rate": 1.3802147221346046e-05,
      "loss": 1.0569,
      "step": 23630
    },
    {
      "epoch": 0.08285230630119127,
      "grad_norm": 3.8125,
      "learning_rate": 1.380798841107957e-05,
      "loss": 1.0183,
      "step": 23640
    },
    {
      "epoch": 0.08288735380808686,
      "grad_norm": 3.515625,
      "learning_rate": 1.3813829600813094e-05,
      "loss": 0.9655,
      "step": 23650
    },
    {
      "epoch": 0.08292240131498246,
      "grad_norm": 2.96875,
      "learning_rate": 1.3819670790546618e-05,
      "loss": 1.0724,
      "step": 23660
    },
    {
      "epoch": 0.08295744882187805,
      "grad_norm": 3.046875,
      "learning_rate": 1.3825511980280146e-05,
      "loss": 0.9764,
      "step": 23670
    },
    {
      "epoch": 0.08299249632877365,
      "grad_norm": 3.53125,
      "learning_rate": 1.3831353170013669e-05,
      "loss": 1.0697,
      "step": 23680
    },
    {
      "epoch": 0.08302754383566925,
      "grad_norm": 3.421875,
      "learning_rate": 1.3837194359747194e-05,
      "loss": 1.1237,
      "step": 23690
    },
    {
      "epoch": 0.08306259134256484,
      "grad_norm": 3.6875,
      "learning_rate": 1.3843035549480719e-05,
      "loss": 1.0956,
      "step": 23700
    },
    {
      "epoch": 0.08309763884946045,
      "grad_norm": 3.0,
      "learning_rate": 1.3848876739214245e-05,
      "loss": 0.9635,
      "step": 23710
    },
    {
      "epoch": 0.08313268635635604,
      "grad_norm": 3.75,
      "learning_rate": 1.3854717928947768e-05,
      "loss": 1.0705,
      "step": 23720
    },
    {
      "epoch": 0.08316773386325164,
      "grad_norm": 3.453125,
      "learning_rate": 1.3860559118681293e-05,
      "loss": 1.0053,
      "step": 23730
    },
    {
      "epoch": 0.08320278137014724,
      "grad_norm": 3.546875,
      "learning_rate": 1.3866400308414818e-05,
      "loss": 1.0172,
      "step": 23740
    },
    {
      "epoch": 0.08323782887704283,
      "grad_norm": 3.484375,
      "learning_rate": 1.3872241498148345e-05,
      "loss": 0.9993,
      "step": 23750
    },
    {
      "epoch": 0.08327287638393843,
      "grad_norm": 3.5,
      "learning_rate": 1.387808268788187e-05,
      "loss": 1.0796,
      "step": 23760
    },
    {
      "epoch": 0.08330792389083402,
      "grad_norm": 3.421875,
      "learning_rate": 1.3883923877615393e-05,
      "loss": 1.0581,
      "step": 23770
    },
    {
      "epoch": 0.08334297139772963,
      "grad_norm": 3.1875,
      "learning_rate": 1.3889765067348917e-05,
      "loss": 0.9834,
      "step": 23780
    },
    {
      "epoch": 0.08337801890462522,
      "grad_norm": 3.375,
      "learning_rate": 1.3895606257082444e-05,
      "loss": 1.0557,
      "step": 23790
    },
    {
      "epoch": 0.08341306641152081,
      "grad_norm": 3.609375,
      "learning_rate": 1.3901447446815969e-05,
      "loss": 1.0797,
      "step": 23800
    },
    {
      "epoch": 0.08344811391841642,
      "grad_norm": 3.1875,
      "learning_rate": 1.3907288636549492e-05,
      "loss": 1.0114,
      "step": 23810
    },
    {
      "epoch": 0.08348316142531201,
      "grad_norm": 3.40625,
      "learning_rate": 1.3913129826283017e-05,
      "loss": 1.0741,
      "step": 23820
    },
    {
      "epoch": 0.08351820893220761,
      "grad_norm": 3.359375,
      "learning_rate": 1.3918971016016543e-05,
      "loss": 1.0329,
      "step": 23830
    },
    {
      "epoch": 0.0835532564391032,
      "grad_norm": 3.71875,
      "learning_rate": 1.3924812205750068e-05,
      "loss": 1.0794,
      "step": 23840
    },
    {
      "epoch": 0.0835883039459988,
      "grad_norm": 3.203125,
      "learning_rate": 1.3930653395483591e-05,
      "loss": 1.0511,
      "step": 23850
    },
    {
      "epoch": 0.0836233514528944,
      "grad_norm": 3.078125,
      "learning_rate": 1.3936494585217116e-05,
      "loss": 1.0186,
      "step": 23860
    },
    {
      "epoch": 0.08365839895978999,
      "grad_norm": 3.15625,
      "learning_rate": 1.3942335774950643e-05,
      "loss": 1.0276,
      "step": 23870
    },
    {
      "epoch": 0.0836934464666856,
      "grad_norm": 3.546875,
      "learning_rate": 1.3948176964684168e-05,
      "loss": 1.0375,
      "step": 23880
    },
    {
      "epoch": 0.08372849397358119,
      "grad_norm": 3.484375,
      "learning_rate": 1.3954018154417692e-05,
      "loss": 1.0151,
      "step": 23890
    },
    {
      "epoch": 0.08376354148047678,
      "grad_norm": 3.75,
      "learning_rate": 1.3959859344151219e-05,
      "loss": 1.005,
      "step": 23900
    },
    {
      "epoch": 0.08379858898737239,
      "grad_norm": 3.5,
      "learning_rate": 1.3965700533884744e-05,
      "loss": 1.1193,
      "step": 23910
    },
    {
      "epoch": 0.08383363649426798,
      "grad_norm": 3.1875,
      "learning_rate": 1.3971541723618267e-05,
      "loss": 1.0323,
      "step": 23920
    },
    {
      "epoch": 0.08386868400116358,
      "grad_norm": 3.609375,
      "learning_rate": 1.3977382913351792e-05,
      "loss": 1.0439,
      "step": 23930
    },
    {
      "epoch": 0.08390373150805917,
      "grad_norm": 3.671875,
      "learning_rate": 1.3983224103085318e-05,
      "loss": 1.045,
      "step": 23940
    },
    {
      "epoch": 0.08393877901495476,
      "grad_norm": 3.703125,
      "learning_rate": 1.3989065292818843e-05,
      "loss": 0.9907,
      "step": 23950
    },
    {
      "epoch": 0.08397382652185037,
      "grad_norm": 3.1875,
      "learning_rate": 1.3994906482552366e-05,
      "loss": 1.0051,
      "step": 23960
    },
    {
      "epoch": 0.08400887402874596,
      "grad_norm": 3.203125,
      "learning_rate": 1.4000747672285891e-05,
      "loss": 0.9986,
      "step": 23970
    },
    {
      "epoch": 0.08404392153564157,
      "grad_norm": 3.375,
      "learning_rate": 1.4006588862019418e-05,
      "loss": 1.064,
      "step": 23980
    },
    {
      "epoch": 0.08407896904253716,
      "grad_norm": 3.703125,
      "learning_rate": 1.4012430051752943e-05,
      "loss": 1.0694,
      "step": 23990
    },
    {
      "epoch": 0.08411401654943276,
      "grad_norm": 3.65625,
      "learning_rate": 1.4018271241486466e-05,
      "loss": 1.1077,
      "step": 24000
    },
    {
      "epoch": 0.08414906405632835,
      "grad_norm": 4.0625,
      "learning_rate": 1.402411243121999e-05,
      "loss": 1.0417,
      "step": 24010
    },
    {
      "epoch": 0.08418411156322395,
      "grad_norm": 3.359375,
      "learning_rate": 1.4029953620953517e-05,
      "loss": 0.9978,
      "step": 24020
    },
    {
      "epoch": 0.08421915907011955,
      "grad_norm": 3.203125,
      "learning_rate": 1.4035794810687042e-05,
      "loss": 1.0658,
      "step": 24030
    },
    {
      "epoch": 0.08425420657701514,
      "grad_norm": 3.40625,
      "learning_rate": 1.4041636000420567e-05,
      "loss": 1.0317,
      "step": 24040
    },
    {
      "epoch": 0.08428925408391075,
      "grad_norm": 3.421875,
      "learning_rate": 1.404747719015409e-05,
      "loss": 1.0402,
      "step": 24050
    },
    {
      "epoch": 0.08432430159080634,
      "grad_norm": 3.53125,
      "learning_rate": 1.4053318379887616e-05,
      "loss": 1.087,
      "step": 24060
    },
    {
      "epoch": 0.08435934909770193,
      "grad_norm": 3.96875,
      "learning_rate": 1.4059159569621141e-05,
      "loss": 1.0336,
      "step": 24070
    },
    {
      "epoch": 0.08439439660459754,
      "grad_norm": 3.421875,
      "learning_rate": 1.4065000759354666e-05,
      "loss": 1.0398,
      "step": 24080
    },
    {
      "epoch": 0.08442944411149313,
      "grad_norm": 3.921875,
      "learning_rate": 1.407084194908819e-05,
      "loss": 1.0379,
      "step": 24090
    },
    {
      "epoch": 0.08446449161838873,
      "grad_norm": 3.25,
      "learning_rate": 1.4076683138821718e-05,
      "loss": 1.0097,
      "step": 24100
    },
    {
      "epoch": 0.08449953912528432,
      "grad_norm": 3.296875,
      "learning_rate": 1.408252432855524e-05,
      "loss": 1.0226,
      "step": 24110
    },
    {
      "epoch": 0.08453458663217991,
      "grad_norm": 3.40625,
      "learning_rate": 1.4088365518288766e-05,
      "loss": 1.0121,
      "step": 24120
    },
    {
      "epoch": 0.08456963413907552,
      "grad_norm": 4.03125,
      "learning_rate": 1.4094206708022289e-05,
      "loss": 1.0315,
      "step": 24130
    },
    {
      "epoch": 0.08460468164597111,
      "grad_norm": 3.125,
      "learning_rate": 1.4100047897755817e-05,
      "loss": 1.0107,
      "step": 24140
    },
    {
      "epoch": 0.08463972915286672,
      "grad_norm": 3.515625,
      "learning_rate": 1.410588908748934e-05,
      "loss": 1.0722,
      "step": 24150
    },
    {
      "epoch": 0.08467477665976231,
      "grad_norm": 3.375,
      "learning_rate": 1.4111730277222865e-05,
      "loss": 1.0285,
      "step": 24160
    },
    {
      "epoch": 0.0847098241666579,
      "grad_norm": 3.546875,
      "learning_rate": 1.411757146695639e-05,
      "loss": 1.0791,
      "step": 24170
    },
    {
      "epoch": 0.0847448716735535,
      "grad_norm": 3.46875,
      "learning_rate": 1.4123412656689916e-05,
      "loss": 1.0663,
      "step": 24180
    },
    {
      "epoch": 0.0847799191804491,
      "grad_norm": 3.203125,
      "learning_rate": 1.4129253846423441e-05,
      "loss": 1.1121,
      "step": 24190
    },
    {
      "epoch": 0.0848149666873447,
      "grad_norm": 3.375,
      "learning_rate": 1.4135095036156964e-05,
      "loss": 1.058,
      "step": 24200
    },
    {
      "epoch": 0.08485001419424029,
      "grad_norm": 3.453125,
      "learning_rate": 1.4140936225890489e-05,
      "loss": 1.0223,
      "step": 24210
    },
    {
      "epoch": 0.08488506170113588,
      "grad_norm": 3.203125,
      "learning_rate": 1.4146777415624016e-05,
      "loss": 1.0744,
      "step": 24220
    },
    {
      "epoch": 0.08492010920803149,
      "grad_norm": 3.671875,
      "learning_rate": 1.415261860535754e-05,
      "loss": 1.0499,
      "step": 24230
    },
    {
      "epoch": 0.08495515671492708,
      "grad_norm": 3.6875,
      "learning_rate": 1.4158459795091064e-05,
      "loss": 1.0447,
      "step": 24240
    },
    {
      "epoch": 0.08499020422182268,
      "grad_norm": 3.640625,
      "learning_rate": 1.4164300984824592e-05,
      "loss": 0.9573,
      "step": 24250
    },
    {
      "epoch": 0.08502525172871828,
      "grad_norm": 3.234375,
      "learning_rate": 1.4170142174558115e-05,
      "loss": 1.044,
      "step": 24260
    },
    {
      "epoch": 0.08506029923561388,
      "grad_norm": 3.5,
      "learning_rate": 1.417598336429164e-05,
      "loss": 1.0825,
      "step": 24270
    },
    {
      "epoch": 0.08509534674250947,
      "grad_norm": 3.640625,
      "learning_rate": 1.4181824554025163e-05,
      "loss": 1.0348,
      "step": 24280
    },
    {
      "epoch": 0.08513039424940506,
      "grad_norm": 3.265625,
      "learning_rate": 1.4187665743758691e-05,
      "loss": 1.0905,
      "step": 24290
    },
    {
      "epoch": 0.08516544175630067,
      "grad_norm": 3.75,
      "learning_rate": 1.4193506933492214e-05,
      "loss": 1.0732,
      "step": 24300
    },
    {
      "epoch": 0.08520048926319626,
      "grad_norm": 3.421875,
      "learning_rate": 1.419934812322574e-05,
      "loss": 1.0143,
      "step": 24310
    },
    {
      "epoch": 0.08523553677009187,
      "grad_norm": 3.671875,
      "learning_rate": 1.4205189312959264e-05,
      "loss": 1.067,
      "step": 24320
    },
    {
      "epoch": 0.08527058427698746,
      "grad_norm": 3.421875,
      "learning_rate": 1.421103050269279e-05,
      "loss": 0.8977,
      "step": 24330
    },
    {
      "epoch": 0.08530563178388305,
      "grad_norm": 3.6875,
      "learning_rate": 1.4216871692426314e-05,
      "loss": 0.9923,
      "step": 24340
    },
    {
      "epoch": 0.08534067929077865,
      "grad_norm": 3.40625,
      "learning_rate": 1.4222712882159839e-05,
      "loss": 1.141,
      "step": 24350
    },
    {
      "epoch": 0.08537572679767425,
      "grad_norm": 3.578125,
      "learning_rate": 1.4228554071893363e-05,
      "loss": 1.0413,
      "step": 24360
    },
    {
      "epoch": 0.08541077430456985,
      "grad_norm": 3.390625,
      "learning_rate": 1.423439526162689e-05,
      "loss": 1.1365,
      "step": 24370
    },
    {
      "epoch": 0.08544582181146544,
      "grad_norm": 2.96875,
      "learning_rate": 1.4240236451360415e-05,
      "loss": 0.9966,
      "step": 24380
    },
    {
      "epoch": 0.08548086931836103,
      "grad_norm": 3.265625,
      "learning_rate": 1.4246077641093938e-05,
      "loss": 1.1225,
      "step": 24390
    },
    {
      "epoch": 0.08551591682525664,
      "grad_norm": 3.34375,
      "learning_rate": 1.4251918830827463e-05,
      "loss": 1.0668,
      "step": 24400
    },
    {
      "epoch": 0.08555096433215223,
      "grad_norm": 3.296875,
      "learning_rate": 1.425776002056099e-05,
      "loss": 1.0528,
      "step": 24410
    },
    {
      "epoch": 0.08558601183904783,
      "grad_norm": 3.265625,
      "learning_rate": 1.4263601210294514e-05,
      "loss": 1.0439,
      "step": 24420
    },
    {
      "epoch": 0.08562105934594343,
      "grad_norm": 3.109375,
      "learning_rate": 1.4269442400028037e-05,
      "loss": 1.0302,
      "step": 24430
    },
    {
      "epoch": 0.08565610685283902,
      "grad_norm": 3.359375,
      "learning_rate": 1.4275283589761562e-05,
      "loss": 1.037,
      "step": 24440
    },
    {
      "epoch": 0.08569115435973462,
      "grad_norm": 2.984375,
      "learning_rate": 1.4281124779495089e-05,
      "loss": 1.0045,
      "step": 24450
    },
    {
      "epoch": 0.08572620186663021,
      "grad_norm": 3.53125,
      "learning_rate": 1.4286965969228614e-05,
      "loss": 0.974,
      "step": 24460
    },
    {
      "epoch": 0.08576124937352582,
      "grad_norm": 3.25,
      "learning_rate": 1.4292807158962137e-05,
      "loss": 1.0094,
      "step": 24470
    },
    {
      "epoch": 0.08579629688042141,
      "grad_norm": 3.234375,
      "learning_rate": 1.4298648348695662e-05,
      "loss": 1.0198,
      "step": 24480
    },
    {
      "epoch": 0.085831344387317,
      "grad_norm": 3.03125,
      "learning_rate": 1.4304489538429188e-05,
      "loss": 0.9791,
      "step": 24490
    },
    {
      "epoch": 0.08586639189421261,
      "grad_norm": 3.75,
      "learning_rate": 1.4310330728162713e-05,
      "loss": 1.0752,
      "step": 24500
    },
    {
      "epoch": 0.0859014394011082,
      "grad_norm": 3.390625,
      "learning_rate": 1.4316171917896238e-05,
      "loss": 1.0792,
      "step": 24510
    },
    {
      "epoch": 0.0859364869080038,
      "grad_norm": 3.515625,
      "learning_rate": 1.4322013107629761e-05,
      "loss": 1.1206,
      "step": 24520
    },
    {
      "epoch": 0.0859715344148994,
      "grad_norm": 3.078125,
      "learning_rate": 1.432785429736329e-05,
      "loss": 1.0405,
      "step": 24530
    },
    {
      "epoch": 0.08600658192179499,
      "grad_norm": 3.375,
      "learning_rate": 1.4333695487096812e-05,
      "loss": 0.9852,
      "step": 24540
    },
    {
      "epoch": 0.08604162942869059,
      "grad_norm": 3.171875,
      "learning_rate": 1.4339536676830337e-05,
      "loss": 1.0054,
      "step": 24550
    },
    {
      "epoch": 0.08607667693558618,
      "grad_norm": 3.671875,
      "learning_rate": 1.434537786656386e-05,
      "loss": 1.0903,
      "step": 24560
    },
    {
      "epoch": 0.08611172444248179,
      "grad_norm": 3.28125,
      "learning_rate": 1.4351219056297389e-05,
      "loss": 0.9744,
      "step": 24570
    },
    {
      "epoch": 0.08614677194937738,
      "grad_norm": 3.53125,
      "learning_rate": 1.4357060246030912e-05,
      "loss": 1.1032,
      "step": 24580
    },
    {
      "epoch": 0.08618181945627298,
      "grad_norm": 3.625,
      "learning_rate": 1.4362901435764437e-05,
      "loss": 1.0303,
      "step": 24590
    },
    {
      "epoch": 0.08621686696316858,
      "grad_norm": 3.09375,
      "learning_rate": 1.4368742625497963e-05,
      "loss": 1.0618,
      "step": 24600
    },
    {
      "epoch": 0.08625191447006417,
      "grad_norm": 3.21875,
      "learning_rate": 1.4374583815231488e-05,
      "loss": 1.0722,
      "step": 24610
    },
    {
      "epoch": 0.08628696197695977,
      "grad_norm": 3.640625,
      "learning_rate": 1.4380425004965011e-05,
      "loss": 1.0364,
      "step": 24620
    },
    {
      "epoch": 0.08632200948385536,
      "grad_norm": 3.390625,
      "learning_rate": 1.4386266194698536e-05,
      "loss": 1.0059,
      "step": 24630
    },
    {
      "epoch": 0.08635705699075097,
      "grad_norm": 3.40625,
      "learning_rate": 1.4392107384432063e-05,
      "loss": 1.0301,
      "step": 24640
    },
    {
      "epoch": 0.08639210449764656,
      "grad_norm": 4.75,
      "learning_rate": 1.4397948574165587e-05,
      "loss": 1.0266,
      "step": 24650
    },
    {
      "epoch": 0.08642715200454215,
      "grad_norm": 3.375,
      "learning_rate": 1.4403789763899112e-05,
      "loss": 1.0707,
      "step": 24660
    },
    {
      "epoch": 0.08646219951143776,
      "grad_norm": 3.3125,
      "learning_rate": 1.4409630953632635e-05,
      "loss": 1.0625,
      "step": 24670
    },
    {
      "epoch": 0.08649724701833335,
      "grad_norm": 3.90625,
      "learning_rate": 1.4415472143366162e-05,
      "loss": 1.1196,
      "step": 24680
    },
    {
      "epoch": 0.08653229452522895,
      "grad_norm": 3.828125,
      "learning_rate": 1.4421313333099687e-05,
      "loss": 1.1151,
      "step": 24690
    },
    {
      "epoch": 0.08656734203212454,
      "grad_norm": 3.4375,
      "learning_rate": 1.4427154522833212e-05,
      "loss": 1.046,
      "step": 24700
    },
    {
      "epoch": 0.08660238953902014,
      "grad_norm": 3.515625,
      "learning_rate": 1.4432995712566735e-05,
      "loss": 0.9855,
      "step": 24710
    },
    {
      "epoch": 0.08663743704591574,
      "grad_norm": 3.234375,
      "learning_rate": 1.4438836902300263e-05,
      "loss": 0.9714,
      "step": 24720
    },
    {
      "epoch": 0.08667248455281133,
      "grad_norm": 3.546875,
      "learning_rate": 1.4444678092033786e-05,
      "loss": 0.987,
      "step": 24730
    },
    {
      "epoch": 0.08670753205970694,
      "grad_norm": 3.296875,
      "learning_rate": 1.4450519281767311e-05,
      "loss": 1.0576,
      "step": 24740
    },
    {
      "epoch": 0.08674257956660253,
      "grad_norm": 3.765625,
      "learning_rate": 1.4456360471500834e-05,
      "loss": 1.0523,
      "step": 24750
    },
    {
      "epoch": 0.08677762707349812,
      "grad_norm": 3.53125,
      "learning_rate": 1.4462201661234362e-05,
      "loss": 1.0748,
      "step": 24760
    },
    {
      "epoch": 0.08681267458039373,
      "grad_norm": 3.40625,
      "learning_rate": 1.4468042850967886e-05,
      "loss": 1.01,
      "step": 24770
    },
    {
      "epoch": 0.08684772208728932,
      "grad_norm": 3.4375,
      "learning_rate": 1.447388404070141e-05,
      "loss": 1.0344,
      "step": 24780
    },
    {
      "epoch": 0.08688276959418492,
      "grad_norm": 3.546875,
      "learning_rate": 1.4479725230434935e-05,
      "loss": 1.0057,
      "step": 24790
    },
    {
      "epoch": 0.08691781710108051,
      "grad_norm": 3.25,
      "learning_rate": 1.4485566420168462e-05,
      "loss": 1.1412,
      "step": 24800
    },
    {
      "epoch": 0.0869528646079761,
      "grad_norm": 3.65625,
      "learning_rate": 1.4491407609901987e-05,
      "loss": 1.0139,
      "step": 24810
    },
    {
      "epoch": 0.08698791211487171,
      "grad_norm": 3.15625,
      "learning_rate": 1.449724879963551e-05,
      "loss": 0.9984,
      "step": 24820
    },
    {
      "epoch": 0.0870229596217673,
      "grad_norm": 3.953125,
      "learning_rate": 1.4503089989369035e-05,
      "loss": 1.1513,
      "step": 24830
    },
    {
      "epoch": 0.0870580071286629,
      "grad_norm": 3.171875,
      "learning_rate": 1.4508931179102561e-05,
      "loss": 1.0639,
      "step": 24840
    },
    {
      "epoch": 0.0870930546355585,
      "grad_norm": 3.421875,
      "learning_rate": 1.4514772368836086e-05,
      "loss": 0.9701,
      "step": 24850
    },
    {
      "epoch": 0.0871281021424541,
      "grad_norm": 3.046875,
      "learning_rate": 1.4520613558569609e-05,
      "loss": 1.0654,
      "step": 24860
    },
    {
      "epoch": 0.0871631496493497,
      "grad_norm": 3.203125,
      "learning_rate": 1.4526454748303134e-05,
      "loss": 0.9609,
      "step": 24870
    },
    {
      "epoch": 0.08719819715624529,
      "grad_norm": 3.265625,
      "learning_rate": 1.453229593803666e-05,
      "loss": 1.0411,
      "step": 24880
    },
    {
      "epoch": 0.08723324466314089,
      "grad_norm": 3.0,
      "learning_rate": 1.4538137127770185e-05,
      "loss": 1.0674,
      "step": 24890
    },
    {
      "epoch": 0.08726829217003648,
      "grad_norm": 3.328125,
      "learning_rate": 1.4543978317503708e-05,
      "loss": 1.089,
      "step": 24900
    },
    {
      "epoch": 0.08730333967693209,
      "grad_norm": 3.109375,
      "learning_rate": 1.4549819507237233e-05,
      "loss": 1.0332,
      "step": 24910
    },
    {
      "epoch": 0.08733838718382768,
      "grad_norm": 3.46875,
      "learning_rate": 1.455566069697076e-05,
      "loss": 1.016,
      "step": 24920
    },
    {
      "epoch": 0.08737343469072327,
      "grad_norm": 3.46875,
      "learning_rate": 1.4561501886704285e-05,
      "loss": 1.0759,
      "step": 24930
    },
    {
      "epoch": 0.08740848219761888,
      "grad_norm": 3.453125,
      "learning_rate": 1.456734307643781e-05,
      "loss": 1.0548,
      "step": 24940
    },
    {
      "epoch": 0.08744352970451447,
      "grad_norm": 3.71875,
      "learning_rate": 1.4573184266171336e-05,
      "loss": 0.9888,
      "step": 24950
    },
    {
      "epoch": 0.08747857721141007,
      "grad_norm": 3.265625,
      "learning_rate": 1.457902545590486e-05,
      "loss": 1.0473,
      "step": 24960
    },
    {
      "epoch": 0.08751362471830566,
      "grad_norm": 3.0625,
      "learning_rate": 1.4584866645638384e-05,
      "loss": 1.0806,
      "step": 24970
    },
    {
      "epoch": 0.08754867222520125,
      "grad_norm": 2.9375,
      "learning_rate": 1.4590707835371909e-05,
      "loss": 1.0466,
      "step": 24980
    },
    {
      "epoch": 0.08758371973209686,
      "grad_norm": 3.421875,
      "learning_rate": 1.4596549025105435e-05,
      "loss": 1.1006,
      "step": 24990
    },
    {
      "epoch": 0.08761876723899245,
      "grad_norm": 3.3125,
      "learning_rate": 1.460239021483896e-05,
      "loss": 1.0295,
      "step": 25000
    },
    {
      "epoch": 0.08761876723899245,
      "eval_loss": 0.9813523292541504,
      "eval_runtime": 568.7664,
      "eval_samples_per_second": 668.879,
      "eval_steps_per_second": 55.74,
      "step": 25000
    },
    {
      "epoch": 0.08765381474588806,
      "grad_norm": 3.40625,
      "learning_rate": 1.4608231404572483e-05,
      "loss": 0.9975,
      "step": 25010
    },
    {
      "epoch": 0.08768886225278365,
      "grad_norm": 3.96875,
      "learning_rate": 1.4614072594306008e-05,
      "loss": 1.0359,
      "step": 25020
    },
    {
      "epoch": 0.08772390975967924,
      "grad_norm": 3.25,
      "learning_rate": 1.4619913784039535e-05,
      "loss": 1.0448,
      "step": 25030
    },
    {
      "epoch": 0.08775895726657484,
      "grad_norm": 3.421875,
      "learning_rate": 1.462575497377306e-05,
      "loss": 1.0478,
      "step": 25040
    },
    {
      "epoch": 0.08779400477347044,
      "grad_norm": 3.25,
      "learning_rate": 1.4631596163506583e-05,
      "loss": 1.0054,
      "step": 25050
    },
    {
      "epoch": 0.08782905228036604,
      "grad_norm": 3.125,
      "learning_rate": 1.4637437353240108e-05,
      "loss": 1.054,
      "step": 25060
    },
    {
      "epoch": 0.08786409978726163,
      "grad_norm": 3.71875,
      "learning_rate": 1.4643278542973634e-05,
      "loss": 1.0802,
      "step": 25070
    },
    {
      "epoch": 0.08789914729415722,
      "grad_norm": 3.21875,
      "learning_rate": 1.4649119732707159e-05,
      "loss": 1.0931,
      "step": 25080
    },
    {
      "epoch": 0.08793419480105283,
      "grad_norm": 3.03125,
      "learning_rate": 1.4654960922440684e-05,
      "loss": 1.0674,
      "step": 25090
    },
    {
      "epoch": 0.08796924230794842,
      "grad_norm": 3.421875,
      "learning_rate": 1.4660802112174207e-05,
      "loss": 1.0149,
      "step": 25100
    },
    {
      "epoch": 0.08800428981484403,
      "grad_norm": 3.953125,
      "learning_rate": 1.4666643301907734e-05,
      "loss": 1.0968,
      "step": 25110
    },
    {
      "epoch": 0.08803933732173962,
      "grad_norm": 3.359375,
      "learning_rate": 1.4672484491641258e-05,
      "loss": 0.9955,
      "step": 25120
    },
    {
      "epoch": 0.08807438482863522,
      "grad_norm": 3.609375,
      "learning_rate": 1.4678325681374783e-05,
      "loss": 1.0896,
      "step": 25130
    },
    {
      "epoch": 0.08810943233553081,
      "grad_norm": 3.140625,
      "learning_rate": 1.4684166871108306e-05,
      "loss": 1.1004,
      "step": 25140
    },
    {
      "epoch": 0.0881444798424264,
      "grad_norm": 3.5625,
      "learning_rate": 1.4690008060841835e-05,
      "loss": 1.0635,
      "step": 25150
    },
    {
      "epoch": 0.08817952734932201,
      "grad_norm": 3.5,
      "learning_rate": 1.4695849250575358e-05,
      "loss": 1.0606,
      "step": 25160
    },
    {
      "epoch": 0.0882145748562176,
      "grad_norm": 3.421875,
      "learning_rate": 1.4701690440308883e-05,
      "loss": 1.0106,
      "step": 25170
    },
    {
      "epoch": 0.0882496223631132,
      "grad_norm": 3.4375,
      "learning_rate": 1.4707531630042406e-05,
      "loss": 1.0109,
      "step": 25180
    },
    {
      "epoch": 0.0882846698700088,
      "grad_norm": 3.890625,
      "learning_rate": 1.4713372819775934e-05,
      "loss": 1.0066,
      "step": 25190
    },
    {
      "epoch": 0.08831971737690439,
      "grad_norm": 3.28125,
      "learning_rate": 1.4719214009509457e-05,
      "loss": 0.9704,
      "step": 25200
    },
    {
      "epoch": 0.0883547648838,
      "grad_norm": 2.953125,
      "learning_rate": 1.4725055199242982e-05,
      "loss": 1.003,
      "step": 25210
    },
    {
      "epoch": 0.08838981239069559,
      "grad_norm": 3.71875,
      "learning_rate": 1.4730896388976507e-05,
      "loss": 1.0464,
      "step": 25220
    },
    {
      "epoch": 0.08842485989759119,
      "grad_norm": 3.390625,
      "learning_rate": 1.4736737578710033e-05,
      "loss": 1.0471,
      "step": 25230
    },
    {
      "epoch": 0.08845990740448678,
      "grad_norm": 3.84375,
      "learning_rate": 1.4742578768443557e-05,
      "loss": 1.0781,
      "step": 25240
    },
    {
      "epoch": 0.08849495491138237,
      "grad_norm": 3.359375,
      "learning_rate": 1.4748419958177081e-05,
      "loss": 0.9944,
      "step": 25250
    },
    {
      "epoch": 0.08853000241827798,
      "grad_norm": 3.578125,
      "learning_rate": 1.4754261147910606e-05,
      "loss": 1.1303,
      "step": 25260
    },
    {
      "epoch": 0.08856504992517357,
      "grad_norm": 3.4375,
      "learning_rate": 1.4760102337644133e-05,
      "loss": 1.0477,
      "step": 25270
    },
    {
      "epoch": 0.08860009743206917,
      "grad_norm": 3.4375,
      "learning_rate": 1.4765943527377658e-05,
      "loss": 0.9753,
      "step": 25280
    },
    {
      "epoch": 0.08863514493896477,
      "grad_norm": 3.59375,
      "learning_rate": 1.477178471711118e-05,
      "loss": 1.0356,
      "step": 25290
    },
    {
      "epoch": 0.08867019244586036,
      "grad_norm": 3.359375,
      "learning_rate": 1.4777625906844707e-05,
      "loss": 1.0631,
      "step": 25300
    },
    {
      "epoch": 0.08870523995275596,
      "grad_norm": 3.609375,
      "learning_rate": 1.4783467096578232e-05,
      "loss": 1.0114,
      "step": 25310
    },
    {
      "epoch": 0.08874028745965155,
      "grad_norm": 3.90625,
      "learning_rate": 1.4789308286311757e-05,
      "loss": 1.1092,
      "step": 25320
    },
    {
      "epoch": 0.08877533496654716,
      "grad_norm": 3.5,
      "learning_rate": 1.479514947604528e-05,
      "loss": 1.0445,
      "step": 25330
    },
    {
      "epoch": 0.08881038247344275,
      "grad_norm": 3.265625,
      "learning_rate": 1.4800990665778808e-05,
      "loss": 1.0314,
      "step": 25340
    },
    {
      "epoch": 0.08884542998033834,
      "grad_norm": 3.359375,
      "learning_rate": 1.4806831855512332e-05,
      "loss": 0.9848,
      "step": 25350
    },
    {
      "epoch": 0.08888047748723395,
      "grad_norm": 3.453125,
      "learning_rate": 1.4812673045245856e-05,
      "loss": 1.0521,
      "step": 25360
    },
    {
      "epoch": 0.08891552499412954,
      "grad_norm": 2.734375,
      "learning_rate": 1.481851423497938e-05,
      "loss": 1.0695,
      "step": 25370
    },
    {
      "epoch": 0.08895057250102514,
      "grad_norm": 3.328125,
      "learning_rate": 1.4824355424712908e-05,
      "loss": 0.9602,
      "step": 25380
    },
    {
      "epoch": 0.08898562000792073,
      "grad_norm": 3.234375,
      "learning_rate": 1.4830196614446431e-05,
      "loss": 1.0684,
      "step": 25390
    },
    {
      "epoch": 0.08902066751481634,
      "grad_norm": 3.5625,
      "learning_rate": 1.4836037804179956e-05,
      "loss": 1.0788,
      "step": 25400
    },
    {
      "epoch": 0.08905571502171193,
      "grad_norm": 3.21875,
      "learning_rate": 1.484187899391348e-05,
      "loss": 1.1147,
      "step": 25410
    },
    {
      "epoch": 0.08909076252860752,
      "grad_norm": 3.578125,
      "learning_rate": 1.4847720183647007e-05,
      "loss": 1.0938,
      "step": 25420
    },
    {
      "epoch": 0.08912581003550313,
      "grad_norm": 3.265625,
      "learning_rate": 1.4853561373380532e-05,
      "loss": 1.0199,
      "step": 25430
    },
    {
      "epoch": 0.08916085754239872,
      "grad_norm": 3.3125,
      "learning_rate": 1.4859402563114055e-05,
      "loss": 1.0419,
      "step": 25440
    },
    {
      "epoch": 0.08919590504929432,
      "grad_norm": 3.15625,
      "learning_rate": 1.486524375284758e-05,
      "loss": 1.0339,
      "step": 25450
    },
    {
      "epoch": 0.08923095255618992,
      "grad_norm": 3.296875,
      "learning_rate": 1.4871084942581107e-05,
      "loss": 0.9975,
      "step": 25460
    },
    {
      "epoch": 0.08926600006308551,
      "grad_norm": 3.0,
      "learning_rate": 1.4876926132314631e-05,
      "loss": 1.0326,
      "step": 25470
    },
    {
      "epoch": 0.08930104756998111,
      "grad_norm": 3.25,
      "learning_rate": 1.4882767322048155e-05,
      "loss": 1.0529,
      "step": 25480
    },
    {
      "epoch": 0.0893360950768767,
      "grad_norm": 3.296875,
      "learning_rate": 1.488860851178168e-05,
      "loss": 1.0313,
      "step": 25490
    },
    {
      "epoch": 0.08937114258377231,
      "grad_norm": 3.671875,
      "learning_rate": 1.4894449701515206e-05,
      "loss": 1.0761,
      "step": 25500
    },
    {
      "epoch": 0.0894061900906679,
      "grad_norm": 3.21875,
      "learning_rate": 1.490029089124873e-05,
      "loss": 0.9737,
      "step": 25510
    },
    {
      "epoch": 0.08944123759756349,
      "grad_norm": 3.421875,
      "learning_rate": 1.4906132080982254e-05,
      "loss": 1.0943,
      "step": 25520
    },
    {
      "epoch": 0.0894762851044591,
      "grad_norm": 3.46875,
      "learning_rate": 1.4911973270715779e-05,
      "loss": 1.0653,
      "step": 25530
    },
    {
      "epoch": 0.08951133261135469,
      "grad_norm": 2.984375,
      "learning_rate": 1.4917814460449305e-05,
      "loss": 1.003,
      "step": 25540
    },
    {
      "epoch": 0.0895463801182503,
      "grad_norm": 3.53125,
      "learning_rate": 1.492365565018283e-05,
      "loss": 1.0319,
      "step": 25550
    },
    {
      "epoch": 0.08958142762514588,
      "grad_norm": 3.578125,
      "learning_rate": 1.4929496839916355e-05,
      "loss": 1.0236,
      "step": 25560
    },
    {
      "epoch": 0.08961647513204148,
      "grad_norm": 3.703125,
      "learning_rate": 1.4935338029649878e-05,
      "loss": 1.0688,
      "step": 25570
    },
    {
      "epoch": 0.08965152263893708,
      "grad_norm": 3.625,
      "learning_rate": 1.4941179219383405e-05,
      "loss": 1.0731,
      "step": 25580
    },
    {
      "epoch": 0.08968657014583267,
      "grad_norm": 3.25,
      "learning_rate": 1.494702040911693e-05,
      "loss": 1.0157,
      "step": 25590
    },
    {
      "epoch": 0.08972161765272828,
      "grad_norm": 3.71875,
      "learning_rate": 1.4952861598850454e-05,
      "loss": 1.0517,
      "step": 25600
    },
    {
      "epoch": 0.08975666515962387,
      "grad_norm": 3.125,
      "learning_rate": 1.4958702788583981e-05,
      "loss": 1.056,
      "step": 25610
    },
    {
      "epoch": 0.08979171266651946,
      "grad_norm": 3.671875,
      "learning_rate": 1.4964543978317506e-05,
      "loss": 1.0539,
      "step": 25620
    },
    {
      "epoch": 0.08982676017341507,
      "grad_norm": 3.359375,
      "learning_rate": 1.4970385168051029e-05,
      "loss": 1.0449,
      "step": 25630
    },
    {
      "epoch": 0.08986180768031066,
      "grad_norm": 2.71875,
      "learning_rate": 1.4976226357784554e-05,
      "loss": 0.9866,
      "step": 25640
    },
    {
      "epoch": 0.08989685518720626,
      "grad_norm": 3.328125,
      "learning_rate": 1.498206754751808e-05,
      "loss": 1.0636,
      "step": 25650
    },
    {
      "epoch": 0.08993190269410185,
      "grad_norm": 3.125,
      "learning_rate": 1.4987908737251605e-05,
      "loss": 1.0732,
      "step": 25660
    },
    {
      "epoch": 0.08996695020099746,
      "grad_norm": 3.390625,
      "learning_rate": 1.4993749926985128e-05,
      "loss": 1.0255,
      "step": 25670
    },
    {
      "epoch": 0.09000199770789305,
      "grad_norm": 3.578125,
      "learning_rate": 1.4999591116718653e-05,
      "loss": 1.0527,
      "step": 25680
    },
    {
      "epoch": 0.09003704521478864,
      "grad_norm": 3.46875,
      "learning_rate": 1.500543230645218e-05,
      "loss": 1.092,
      "step": 25690
    },
    {
      "epoch": 0.09007209272168425,
      "grad_norm": 3.34375,
      "learning_rate": 1.5011273496185705e-05,
      "loss": 0.9711,
      "step": 25700
    },
    {
      "epoch": 0.09010714022857984,
      "grad_norm": 3.453125,
      "learning_rate": 1.501711468591923e-05,
      "loss": 1.0965,
      "step": 25710
    },
    {
      "epoch": 0.09014218773547544,
      "grad_norm": 3.5,
      "learning_rate": 1.5022955875652753e-05,
      "loss": 1.0469,
      "step": 25720
    },
    {
      "epoch": 0.09017723524237103,
      "grad_norm": 3.328125,
      "learning_rate": 1.5028797065386279e-05,
      "loss": 1.0418,
      "step": 25730
    },
    {
      "epoch": 0.09021228274926663,
      "grad_norm": 3.59375,
      "learning_rate": 1.5034638255119804e-05,
      "loss": 1.0182,
      "step": 25740
    },
    {
      "epoch": 0.09024733025616223,
      "grad_norm": 3.4375,
      "learning_rate": 1.5040479444853329e-05,
      "loss": 1.0745,
      "step": 25750
    },
    {
      "epoch": 0.09028237776305782,
      "grad_norm": 3.609375,
      "learning_rate": 1.5046320634586852e-05,
      "loss": 1.0673,
      "step": 25760
    },
    {
      "epoch": 0.09031742526995343,
      "grad_norm": 3.5,
      "learning_rate": 1.505216182432038e-05,
      "loss": 1.0582,
      "step": 25770
    },
    {
      "epoch": 0.09035247277684902,
      "grad_norm": 2.96875,
      "learning_rate": 1.5058003014053903e-05,
      "loss": 1.0704,
      "step": 25780
    },
    {
      "epoch": 0.09038752028374461,
      "grad_norm": 3.421875,
      "learning_rate": 1.5063844203787428e-05,
      "loss": 0.9959,
      "step": 25790
    },
    {
      "epoch": 0.09042256779064022,
      "grad_norm": 3.265625,
      "learning_rate": 1.5069685393520951e-05,
      "loss": 0.9625,
      "step": 25800
    },
    {
      "epoch": 0.0904576152975358,
      "grad_norm": 3.28125,
      "learning_rate": 1.507552658325448e-05,
      "loss": 1.0138,
      "step": 25810
    },
    {
      "epoch": 0.09049266280443141,
      "grad_norm": 3.0625,
      "learning_rate": 1.5081367772988003e-05,
      "loss": 1.073,
      "step": 25820
    },
    {
      "epoch": 0.090527710311327,
      "grad_norm": 3.1875,
      "learning_rate": 1.5087208962721527e-05,
      "loss": 1.0853,
      "step": 25830
    },
    {
      "epoch": 0.0905627578182226,
      "grad_norm": 3.1875,
      "learning_rate": 1.5093050152455052e-05,
      "loss": 1.1178,
      "step": 25840
    },
    {
      "epoch": 0.0905978053251182,
      "grad_norm": 3.0625,
      "learning_rate": 1.5098891342188579e-05,
      "loss": 1.0167,
      "step": 25850
    },
    {
      "epoch": 0.09063285283201379,
      "grad_norm": 3.359375,
      "learning_rate": 1.5104732531922102e-05,
      "loss": 1.0404,
      "step": 25860
    },
    {
      "epoch": 0.0906679003389094,
      "grad_norm": 3.6875,
      "learning_rate": 1.5110573721655627e-05,
      "loss": 1.0718,
      "step": 25870
    },
    {
      "epoch": 0.09070294784580499,
      "grad_norm": 3.53125,
      "learning_rate": 1.5116414911389152e-05,
      "loss": 0.9744,
      "step": 25880
    },
    {
      "epoch": 0.09073799535270058,
      "grad_norm": 3.171875,
      "learning_rate": 1.5122256101122678e-05,
      "loss": 1.012,
      "step": 25890
    },
    {
      "epoch": 0.09077304285959618,
      "grad_norm": 3.40625,
      "learning_rate": 1.5128097290856203e-05,
      "loss": 1.0042,
      "step": 25900
    },
    {
      "epoch": 0.09080809036649178,
      "grad_norm": 3.78125,
      "learning_rate": 1.5133938480589726e-05,
      "loss": 1.0841,
      "step": 25910
    },
    {
      "epoch": 0.09084313787338738,
      "grad_norm": 3.078125,
      "learning_rate": 1.5139779670323251e-05,
      "loss": 0.9857,
      "step": 25920
    },
    {
      "epoch": 0.09087818538028297,
      "grad_norm": 3.453125,
      "learning_rate": 1.5145620860056778e-05,
      "loss": 1.0497,
      "step": 25930
    },
    {
      "epoch": 0.09091323288717858,
      "grad_norm": 3.5625,
      "learning_rate": 1.5151462049790302e-05,
      "loss": 1.0121,
      "step": 25940
    },
    {
      "epoch": 0.09094828039407417,
      "grad_norm": 3.34375,
      "learning_rate": 1.5157303239523826e-05,
      "loss": 1.0437,
      "step": 25950
    },
    {
      "epoch": 0.09098332790096976,
      "grad_norm": 3.25,
      "learning_rate": 1.5163144429257354e-05,
      "loss": 1.0748,
      "step": 25960
    },
    {
      "epoch": 0.09101837540786537,
      "grad_norm": 3.578125,
      "learning_rate": 1.5168985618990877e-05,
      "loss": 1.0882,
      "step": 25970
    },
    {
      "epoch": 0.09105342291476096,
      "grad_norm": 3.5,
      "learning_rate": 1.5174826808724402e-05,
      "loss": 1.0755,
      "step": 25980
    },
    {
      "epoch": 0.09108847042165656,
      "grad_norm": 3.375,
      "learning_rate": 1.5180667998457927e-05,
      "loss": 1.0574,
      "step": 25990
    },
    {
      "epoch": 0.09112351792855215,
      "grad_norm": 3.4375,
      "learning_rate": 1.5186509188191453e-05,
      "loss": 1.0264,
      "step": 26000
    },
    {
      "epoch": 0.09115856543544774,
      "grad_norm": 3.265625,
      "learning_rate": 1.5192350377924976e-05,
      "loss": 1.0509,
      "step": 26010
    },
    {
      "epoch": 0.09119361294234335,
      "grad_norm": 3.5,
      "learning_rate": 1.5198191567658501e-05,
      "loss": 1.0633,
      "step": 26020
    },
    {
      "epoch": 0.09122866044923894,
      "grad_norm": 4.0,
      "learning_rate": 1.5204032757392026e-05,
      "loss": 1.0868,
      "step": 26030
    },
    {
      "epoch": 0.09126370795613455,
      "grad_norm": 3.453125,
      "learning_rate": 1.5209873947125553e-05,
      "loss": 1.1824,
      "step": 26040
    },
    {
      "epoch": 0.09129875546303014,
      "grad_norm": 3.40625,
      "learning_rate": 1.5215715136859077e-05,
      "loss": 1.0575,
      "step": 26050
    },
    {
      "epoch": 0.09133380296992573,
      "grad_norm": 3.171875,
      "learning_rate": 1.52215563265926e-05,
      "loss": 0.9444,
      "step": 26060
    },
    {
      "epoch": 0.09136885047682133,
      "grad_norm": 3.53125,
      "learning_rate": 1.5227397516326125e-05,
      "loss": 1.0956,
      "step": 26070
    },
    {
      "epoch": 0.09140389798371693,
      "grad_norm": 3.53125,
      "learning_rate": 1.5233238706059652e-05,
      "loss": 1.111,
      "step": 26080
    },
    {
      "epoch": 0.09143894549061253,
      "grad_norm": 3.0625,
      "learning_rate": 1.5239079895793177e-05,
      "loss": 1.0428,
      "step": 26090
    },
    {
      "epoch": 0.09147399299750812,
      "grad_norm": 3.296875,
      "learning_rate": 1.52449210855267e-05,
      "loss": 1.043,
      "step": 26100
    },
    {
      "epoch": 0.09150904050440371,
      "grad_norm": 3.625,
      "learning_rate": 1.5250762275260225e-05,
      "loss": 1.0576,
      "step": 26110
    },
    {
      "epoch": 0.09154408801129932,
      "grad_norm": 3.875,
      "learning_rate": 1.5256603464993751e-05,
      "loss": 0.9852,
      "step": 26120
    },
    {
      "epoch": 0.09157913551819491,
      "grad_norm": 3.375,
      "learning_rate": 1.5262444654727275e-05,
      "loss": 1.1415,
      "step": 26130
    },
    {
      "epoch": 0.09161418302509051,
      "grad_norm": 3.140625,
      "learning_rate": 1.52682858444608e-05,
      "loss": 1.0404,
      "step": 26140
    },
    {
      "epoch": 0.0916492305319861,
      "grad_norm": 3.5625,
      "learning_rate": 1.5274127034194324e-05,
      "loss": 0.9583,
      "step": 26150
    },
    {
      "epoch": 0.0916842780388817,
      "grad_norm": 3.4375,
      "learning_rate": 1.5279968223927852e-05,
      "loss": 0.9803,
      "step": 26160
    },
    {
      "epoch": 0.0917193255457773,
      "grad_norm": 3.3125,
      "learning_rate": 1.5285809413661374e-05,
      "loss": 1.0226,
      "step": 26170
    },
    {
      "epoch": 0.0917543730526729,
      "grad_norm": 3.5,
      "learning_rate": 1.52916506033949e-05,
      "loss": 0.9723,
      "step": 26180
    },
    {
      "epoch": 0.0917894205595685,
      "grad_norm": 3.390625,
      "learning_rate": 1.5297491793128424e-05,
      "loss": 1.0307,
      "step": 26190
    },
    {
      "epoch": 0.09182446806646409,
      "grad_norm": 3.53125,
      "learning_rate": 1.5303332982861952e-05,
      "loss": 1.0146,
      "step": 26200
    },
    {
      "epoch": 0.0918595155733597,
      "grad_norm": 3.234375,
      "learning_rate": 1.5309174172595477e-05,
      "loss": 1.0582,
      "step": 26210
    },
    {
      "epoch": 0.09189456308025529,
      "grad_norm": 3.34375,
      "learning_rate": 1.5315015362328998e-05,
      "loss": 1.0483,
      "step": 26220
    },
    {
      "epoch": 0.09192961058715088,
      "grad_norm": 3.65625,
      "learning_rate": 1.5320856552062523e-05,
      "loss": 1.0148,
      "step": 26230
    },
    {
      "epoch": 0.09196465809404648,
      "grad_norm": 3.328125,
      "learning_rate": 1.532669774179605e-05,
      "loss": 1.1092,
      "step": 26240
    },
    {
      "epoch": 0.09199970560094207,
      "grad_norm": 3.53125,
      "learning_rate": 1.5332538931529576e-05,
      "loss": 1.0141,
      "step": 26250
    },
    {
      "epoch": 0.09203475310783768,
      "grad_norm": 3.25,
      "learning_rate": 1.5338380121263098e-05,
      "loss": 1.0523,
      "step": 26260
    },
    {
      "epoch": 0.09206980061473327,
      "grad_norm": 3.359375,
      "learning_rate": 1.5344221310996622e-05,
      "loss": 0.9565,
      "step": 26270
    },
    {
      "epoch": 0.09210484812162886,
      "grad_norm": 3.671875,
      "learning_rate": 1.535006250073015e-05,
      "loss": 1.1332,
      "step": 26280
    },
    {
      "epoch": 0.09213989562852447,
      "grad_norm": 3.734375,
      "learning_rate": 1.5355903690463675e-05,
      "loss": 1.0401,
      "step": 26290
    },
    {
      "epoch": 0.09217494313542006,
      "grad_norm": 3.1875,
      "learning_rate": 1.53617448801972e-05,
      "loss": 0.9482,
      "step": 26300
    },
    {
      "epoch": 0.09220999064231566,
      "grad_norm": 3.421875,
      "learning_rate": 1.5367586069930725e-05,
      "loss": 1.0676,
      "step": 26310
    },
    {
      "epoch": 0.09224503814921126,
      "grad_norm": 3.34375,
      "learning_rate": 1.537342725966425e-05,
      "loss": 0.9833,
      "step": 26320
    },
    {
      "epoch": 0.09228008565610685,
      "grad_norm": 3.390625,
      "learning_rate": 1.5379268449397775e-05,
      "loss": 1.084,
      "step": 26330
    },
    {
      "epoch": 0.09231513316300245,
      "grad_norm": 3.1875,
      "learning_rate": 1.53851096391313e-05,
      "loss": 0.9992,
      "step": 26340
    },
    {
      "epoch": 0.09235018066989804,
      "grad_norm": 3.59375,
      "learning_rate": 1.5390950828864824e-05,
      "loss": 1.0194,
      "step": 26350
    },
    {
      "epoch": 0.09238522817679365,
      "grad_norm": 3.71875,
      "learning_rate": 1.539679201859835e-05,
      "loss": 1.014,
      "step": 26360
    },
    {
      "epoch": 0.09242027568368924,
      "grad_norm": 3.375,
      "learning_rate": 1.5402633208331874e-05,
      "loss": 1.0326,
      "step": 26370
    },
    {
      "epoch": 0.09245532319058483,
      "grad_norm": 3.296875,
      "learning_rate": 1.54084743980654e-05,
      "loss": 1.0286,
      "step": 26380
    },
    {
      "epoch": 0.09249037069748044,
      "grad_norm": 3.234375,
      "learning_rate": 1.5414315587798924e-05,
      "loss": 1.1069,
      "step": 26390
    },
    {
      "epoch": 0.09252541820437603,
      "grad_norm": 3.4375,
      "learning_rate": 1.542015677753245e-05,
      "loss": 1.0265,
      "step": 26400
    },
    {
      "epoch": 0.09256046571127163,
      "grad_norm": 3.171875,
      "learning_rate": 1.5425997967265974e-05,
      "loss": 1.0452,
      "step": 26410
    },
    {
      "epoch": 0.09259551321816722,
      "grad_norm": 3.15625,
      "learning_rate": 1.54318391569995e-05,
      "loss": 0.9587,
      "step": 26420
    },
    {
      "epoch": 0.09263056072506282,
      "grad_norm": 3.328125,
      "learning_rate": 1.5437680346733023e-05,
      "loss": 1.0261,
      "step": 26430
    },
    {
      "epoch": 0.09266560823195842,
      "grad_norm": 3.328125,
      "learning_rate": 1.5443521536466548e-05,
      "loss": 1.0058,
      "step": 26440
    },
    {
      "epoch": 0.09270065573885401,
      "grad_norm": 3.46875,
      "learning_rate": 1.5449362726200073e-05,
      "loss": 1.0317,
      "step": 26450
    },
    {
      "epoch": 0.09273570324574962,
      "grad_norm": 3.703125,
      "learning_rate": 1.5455203915933598e-05,
      "loss": 0.9369,
      "step": 26460
    },
    {
      "epoch": 0.09277075075264521,
      "grad_norm": 3.640625,
      "learning_rate": 1.5461045105667123e-05,
      "loss": 1.041,
      "step": 26470
    },
    {
      "epoch": 0.09280579825954081,
      "grad_norm": 3.59375,
      "learning_rate": 1.5466886295400647e-05,
      "loss": 1.079,
      "step": 26480
    },
    {
      "epoch": 0.0928408457664364,
      "grad_norm": 3.59375,
      "learning_rate": 1.5472727485134172e-05,
      "loss": 1.1315,
      "step": 26490
    },
    {
      "epoch": 0.092875893273332,
      "grad_norm": 3.21875,
      "learning_rate": 1.5478568674867697e-05,
      "loss": 1.0341,
      "step": 26500
    },
    {
      "epoch": 0.0929109407802276,
      "grad_norm": 3.234375,
      "learning_rate": 1.5484409864601222e-05,
      "loss": 1.0895,
      "step": 26510
    },
    {
      "epoch": 0.0929459882871232,
      "grad_norm": 3.171875,
      "learning_rate": 1.5490251054334747e-05,
      "loss": 1.0981,
      "step": 26520
    },
    {
      "epoch": 0.0929810357940188,
      "grad_norm": 3.5,
      "learning_rate": 1.549609224406827e-05,
      "loss": 1.0286,
      "step": 26530
    },
    {
      "epoch": 0.09301608330091439,
      "grad_norm": 3.46875,
      "learning_rate": 1.5501933433801797e-05,
      "loss": 1.0874,
      "step": 26540
    },
    {
      "epoch": 0.09305113080780998,
      "grad_norm": 3.9375,
      "learning_rate": 1.5507774623535325e-05,
      "loss": 1.08,
      "step": 26550
    },
    {
      "epoch": 0.09308617831470559,
      "grad_norm": 3.125,
      "learning_rate": 1.5513615813268846e-05,
      "loss": 1.0011,
      "step": 26560
    },
    {
      "epoch": 0.09312122582160118,
      "grad_norm": 3.546875,
      "learning_rate": 1.551945700300237e-05,
      "loss": 1.0361,
      "step": 26570
    },
    {
      "epoch": 0.09315627332849678,
      "grad_norm": 3.28125,
      "learning_rate": 1.5525298192735896e-05,
      "loss": 0.9966,
      "step": 26580
    },
    {
      "epoch": 0.09319132083539237,
      "grad_norm": 3.46875,
      "learning_rate": 1.5531139382469424e-05,
      "loss": 1.0037,
      "step": 26590
    },
    {
      "epoch": 0.09322636834228797,
      "grad_norm": 3.171875,
      "learning_rate": 1.5536980572202946e-05,
      "loss": 1.005,
      "step": 26600
    },
    {
      "epoch": 0.09326141584918357,
      "grad_norm": 3.578125,
      "learning_rate": 1.554282176193647e-05,
      "loss": 1.031,
      "step": 26610
    },
    {
      "epoch": 0.09329646335607916,
      "grad_norm": 3.703125,
      "learning_rate": 1.5548662951669995e-05,
      "loss": 0.9795,
      "step": 26620
    },
    {
      "epoch": 0.09333151086297477,
      "grad_norm": 3.765625,
      "learning_rate": 1.5554504141403524e-05,
      "loss": 1.0486,
      "step": 26630
    },
    {
      "epoch": 0.09336655836987036,
      "grad_norm": 3.171875,
      "learning_rate": 1.556034533113705e-05,
      "loss": 0.9882,
      "step": 26640
    },
    {
      "epoch": 0.09340160587676595,
      "grad_norm": 2.828125,
      "learning_rate": 1.556618652087057e-05,
      "loss": 1.1282,
      "step": 26650
    },
    {
      "epoch": 0.09343665338366156,
      "grad_norm": 3.59375,
      "learning_rate": 1.5572027710604098e-05,
      "loss": 1.0773,
      "step": 26660
    },
    {
      "epoch": 0.09347170089055715,
      "grad_norm": 3.640625,
      "learning_rate": 1.5577868900337623e-05,
      "loss": 1.1182,
      "step": 26670
    },
    {
      "epoch": 0.09350674839745275,
      "grad_norm": 3.5625,
      "learning_rate": 1.5583710090071148e-05,
      "loss": 0.9826,
      "step": 26680
    },
    {
      "epoch": 0.09354179590434834,
      "grad_norm": 3.453125,
      "learning_rate": 1.558955127980467e-05,
      "loss": 1.065,
      "step": 26690
    },
    {
      "epoch": 0.09357684341124393,
      "grad_norm": 3.140625,
      "learning_rate": 1.5595392469538197e-05,
      "loss": 1.045,
      "step": 26700
    },
    {
      "epoch": 0.09361189091813954,
      "grad_norm": 3.59375,
      "learning_rate": 1.5601233659271722e-05,
      "loss": 1.021,
      "step": 26710
    },
    {
      "epoch": 0.09364693842503513,
      "grad_norm": 3.21875,
      "learning_rate": 1.5607074849005247e-05,
      "loss": 1.1331,
      "step": 26720
    },
    {
      "epoch": 0.09368198593193074,
      "grad_norm": 3.046875,
      "learning_rate": 1.561291603873877e-05,
      "loss": 1.0391,
      "step": 26730
    },
    {
      "epoch": 0.09371703343882633,
      "grad_norm": 3.671875,
      "learning_rate": 1.5618757228472297e-05,
      "loss": 1.006,
      "step": 26740
    },
    {
      "epoch": 0.09375208094572193,
      "grad_norm": 3.484375,
      "learning_rate": 1.562459841820582e-05,
      "loss": 0.9381,
      "step": 26750
    },
    {
      "epoch": 0.09378712845261752,
      "grad_norm": 3.3125,
      "learning_rate": 1.5630439607939347e-05,
      "loss": 1.0896,
      "step": 26760
    },
    {
      "epoch": 0.09382217595951312,
      "grad_norm": 3.21875,
      "learning_rate": 1.563628079767287e-05,
      "loss": 0.9981,
      "step": 26770
    },
    {
      "epoch": 0.09385722346640872,
      "grad_norm": 3.25,
      "learning_rate": 1.5642121987406396e-05,
      "loss": 1.0984,
      "step": 26780
    },
    {
      "epoch": 0.09389227097330431,
      "grad_norm": 3.359375,
      "learning_rate": 1.564796317713992e-05,
      "loss": 0.978,
      "step": 26790
    },
    {
      "epoch": 0.09392731848019992,
      "grad_norm": 3.21875,
      "learning_rate": 1.5653804366873446e-05,
      "loss": 1.0061,
      "step": 26800
    },
    {
      "epoch": 0.09396236598709551,
      "grad_norm": 3.578125,
      "learning_rate": 1.565964555660697e-05,
      "loss": 1.077,
      "step": 26810
    },
    {
      "epoch": 0.0939974134939911,
      "grad_norm": 3.328125,
      "learning_rate": 1.5665486746340496e-05,
      "loss": 1.0308,
      "step": 26820
    },
    {
      "epoch": 0.0940324610008867,
      "grad_norm": 13.6875,
      "learning_rate": 1.567132793607402e-05,
      "loss": 1.0927,
      "step": 26830
    },
    {
      "epoch": 0.0940675085077823,
      "grad_norm": 3.703125,
      "learning_rate": 1.5677169125807545e-05,
      "loss": 1.0511,
      "step": 26840
    },
    {
      "epoch": 0.0941025560146779,
      "grad_norm": 3.46875,
      "learning_rate": 1.568301031554107e-05,
      "loss": 0.9889,
      "step": 26850
    },
    {
      "epoch": 0.09413760352157349,
      "grad_norm": 3.03125,
      "learning_rate": 1.5688851505274595e-05,
      "loss": 1.005,
      "step": 26860
    },
    {
      "epoch": 0.09417265102846908,
      "grad_norm": 3.703125,
      "learning_rate": 1.569469269500812e-05,
      "loss": 1.006,
      "step": 26870
    },
    {
      "epoch": 0.09420769853536469,
      "grad_norm": 3.203125,
      "learning_rate": 1.5700533884741645e-05,
      "loss": 1.0836,
      "step": 26880
    },
    {
      "epoch": 0.09424274604226028,
      "grad_norm": 3.625,
      "learning_rate": 1.570637507447517e-05,
      "loss": 1.1618,
      "step": 26890
    },
    {
      "epoch": 0.09427779354915589,
      "grad_norm": 3.203125,
      "learning_rate": 1.5712216264208694e-05,
      "loss": 1.0887,
      "step": 26900
    },
    {
      "epoch": 0.09431284105605148,
      "grad_norm": 3.796875,
      "learning_rate": 1.571805745394222e-05,
      "loss": 0.9613,
      "step": 26910
    },
    {
      "epoch": 0.09434788856294707,
      "grad_norm": 4.3125,
      "learning_rate": 1.5723898643675744e-05,
      "loss": 1.0515,
      "step": 26920
    },
    {
      "epoch": 0.09438293606984267,
      "grad_norm": 3.046875,
      "learning_rate": 1.572973983340927e-05,
      "loss": 0.9723,
      "step": 26930
    },
    {
      "epoch": 0.09441798357673827,
      "grad_norm": 3.71875,
      "learning_rate": 1.5735581023142794e-05,
      "loss": 0.9806,
      "step": 26940
    },
    {
      "epoch": 0.09445303108363387,
      "grad_norm": 3.28125,
      "learning_rate": 1.574142221287632e-05,
      "loss": 1.0634,
      "step": 26950
    },
    {
      "epoch": 0.09448807859052946,
      "grad_norm": 3.46875,
      "learning_rate": 1.5747263402609843e-05,
      "loss": 1.0366,
      "step": 26960
    },
    {
      "epoch": 0.09452312609742505,
      "grad_norm": 2.625,
      "learning_rate": 1.5753104592343368e-05,
      "loss": 0.9752,
      "step": 26970
    },
    {
      "epoch": 0.09455817360432066,
      "grad_norm": 3.46875,
      "learning_rate": 1.5758945782076896e-05,
      "loss": 0.9965,
      "step": 26980
    },
    {
      "epoch": 0.09459322111121625,
      "grad_norm": 3.328125,
      "learning_rate": 1.5764786971810418e-05,
      "loss": 1.1184,
      "step": 26990
    },
    {
      "epoch": 0.09462826861811185,
      "grad_norm": 2.96875,
      "learning_rate": 1.5770628161543943e-05,
      "loss": 0.9821,
      "step": 27000
    },
    {
      "epoch": 0.09466331612500745,
      "grad_norm": 3.78125,
      "learning_rate": 1.577646935127747e-05,
      "loss": 1.1287,
      "step": 27010
    },
    {
      "epoch": 0.09469836363190305,
      "grad_norm": 3.53125,
      "learning_rate": 1.5782310541010996e-05,
      "loss": 1.0515,
      "step": 27020
    },
    {
      "epoch": 0.09473341113879864,
      "grad_norm": 3.546875,
      "learning_rate": 1.5788151730744517e-05,
      "loss": 1.072,
      "step": 27030
    },
    {
      "epoch": 0.09476845864569423,
      "grad_norm": 3.484375,
      "learning_rate": 1.5793992920478042e-05,
      "loss": 1.0901,
      "step": 27040
    },
    {
      "epoch": 0.09480350615258984,
      "grad_norm": 3.4375,
      "learning_rate": 1.579983411021157e-05,
      "loss": 1.0409,
      "step": 27050
    },
    {
      "epoch": 0.09483855365948543,
      "grad_norm": 3.234375,
      "learning_rate": 1.5805675299945095e-05,
      "loss": 1.0017,
      "step": 27060
    },
    {
      "epoch": 0.09487360116638104,
      "grad_norm": 3.125,
      "learning_rate": 1.5811516489678617e-05,
      "loss": 1.053,
      "step": 27070
    },
    {
      "epoch": 0.09490864867327663,
      "grad_norm": 3.859375,
      "learning_rate": 1.581735767941214e-05,
      "loss": 0.9816,
      "step": 27080
    },
    {
      "epoch": 0.09494369618017222,
      "grad_norm": 3.28125,
      "learning_rate": 1.582319886914567e-05,
      "loss": 0.9895,
      "step": 27090
    },
    {
      "epoch": 0.09497874368706782,
      "grad_norm": 3.34375,
      "learning_rate": 1.5829040058879195e-05,
      "loss": 1.0377,
      "step": 27100
    },
    {
      "epoch": 0.09501379119396341,
      "grad_norm": 3.1875,
      "learning_rate": 1.583488124861272e-05,
      "loss": 1.0329,
      "step": 27110
    },
    {
      "epoch": 0.09504883870085902,
      "grad_norm": 3.234375,
      "learning_rate": 1.584072243834624e-05,
      "loss": 1.0503,
      "step": 27120
    },
    {
      "epoch": 0.09508388620775461,
      "grad_norm": 3.53125,
      "learning_rate": 1.584656362807977e-05,
      "loss": 0.996,
      "step": 27130
    },
    {
      "epoch": 0.0951189337146502,
      "grad_norm": 3.53125,
      "learning_rate": 1.5852404817813294e-05,
      "loss": 1.0203,
      "step": 27140
    },
    {
      "epoch": 0.09515398122154581,
      "grad_norm": 3.53125,
      "learning_rate": 1.585824600754682e-05,
      "loss": 1.089,
      "step": 27150
    },
    {
      "epoch": 0.0951890287284414,
      "grad_norm": 3.390625,
      "learning_rate": 1.586408719728034e-05,
      "loss": 1.0012,
      "step": 27160
    },
    {
      "epoch": 0.095224076235337,
      "grad_norm": 3.546875,
      "learning_rate": 1.586992838701387e-05,
      "loss": 0.982,
      "step": 27170
    },
    {
      "epoch": 0.0952591237422326,
      "grad_norm": 4.0625,
      "learning_rate": 1.5875769576747393e-05,
      "loss": 1.0353,
      "step": 27180
    },
    {
      "epoch": 0.09529417124912819,
      "grad_norm": 3.28125,
      "learning_rate": 1.5881610766480918e-05,
      "loss": 1.0855,
      "step": 27190
    },
    {
      "epoch": 0.09532921875602379,
      "grad_norm": 3.546875,
      "learning_rate": 1.5887451956214443e-05,
      "loss": 1.0689,
      "step": 27200
    },
    {
      "epoch": 0.09536426626291938,
      "grad_norm": 3.6875,
      "learning_rate": 1.5893293145947968e-05,
      "loss": 1.0545,
      "step": 27210
    },
    {
      "epoch": 0.09539931376981499,
      "grad_norm": 3.21875,
      "learning_rate": 1.5899134335681493e-05,
      "loss": 0.9456,
      "step": 27220
    },
    {
      "epoch": 0.09543436127671058,
      "grad_norm": 3.203125,
      "learning_rate": 1.5904975525415018e-05,
      "loss": 0.9654,
      "step": 27230
    },
    {
      "epoch": 0.09546940878360617,
      "grad_norm": 3.5,
      "learning_rate": 1.5910816715148542e-05,
      "loss": 1.0669,
      "step": 27240
    },
    {
      "epoch": 0.09550445629050178,
      "grad_norm": 3.390625,
      "learning_rate": 1.5916657904882067e-05,
      "loss": 1.0442,
      "step": 27250
    },
    {
      "epoch": 0.09553950379739737,
      "grad_norm": 3.46875,
      "learning_rate": 1.5922499094615592e-05,
      "loss": 1.0718,
      "step": 27260
    },
    {
      "epoch": 0.09557455130429297,
      "grad_norm": 3.046875,
      "learning_rate": 1.5928340284349117e-05,
      "loss": 1.0041,
      "step": 27270
    },
    {
      "epoch": 0.09560959881118856,
      "grad_norm": 3.25,
      "learning_rate": 1.5934181474082642e-05,
      "loss": 1.0388,
      "step": 27280
    },
    {
      "epoch": 0.09564464631808417,
      "grad_norm": 3.34375,
      "learning_rate": 1.5940022663816167e-05,
      "loss": 1.0296,
      "step": 27290
    },
    {
      "epoch": 0.09567969382497976,
      "grad_norm": 3.234375,
      "learning_rate": 1.594586385354969e-05,
      "loss": 1.0005,
      "step": 27300
    },
    {
      "epoch": 0.09571474133187535,
      "grad_norm": 3.765625,
      "learning_rate": 1.5951705043283216e-05,
      "loss": 1.0988,
      "step": 27310
    },
    {
      "epoch": 0.09574978883877096,
      "grad_norm": 3.4375,
      "learning_rate": 1.595754623301674e-05,
      "loss": 1.0729,
      "step": 27320
    },
    {
      "epoch": 0.09578483634566655,
      "grad_norm": 3.375,
      "learning_rate": 1.5963387422750266e-05,
      "loss": 1.0437,
      "step": 27330
    },
    {
      "epoch": 0.09581988385256215,
      "grad_norm": 3.6875,
      "learning_rate": 1.596922861248379e-05,
      "loss": 0.9665,
      "step": 27340
    },
    {
      "epoch": 0.09585493135945775,
      "grad_norm": 3.28125,
      "learning_rate": 1.5975069802217316e-05,
      "loss": 1.0065,
      "step": 27350
    },
    {
      "epoch": 0.09588997886635334,
      "grad_norm": 3.125,
      "learning_rate": 1.5980910991950844e-05,
      "loss": 1.1185,
      "step": 27360
    },
    {
      "epoch": 0.09592502637324894,
      "grad_norm": 3.3125,
      "learning_rate": 1.5986752181684365e-05,
      "loss": 1.0184,
      "step": 27370
    },
    {
      "epoch": 0.09596007388014453,
      "grad_norm": 3.0,
      "learning_rate": 1.599259337141789e-05,
      "loss": 1.1013,
      "step": 27380
    },
    {
      "epoch": 0.09599512138704014,
      "grad_norm": 3.90625,
      "learning_rate": 1.5998434561151415e-05,
      "loss": 0.9867,
      "step": 27390
    },
    {
      "epoch": 0.09603016889393573,
      "grad_norm": 3.8125,
      "learning_rate": 1.6004275750884943e-05,
      "loss": 1.002,
      "step": 27400
    },
    {
      "epoch": 0.09606521640083132,
      "grad_norm": 3.390625,
      "learning_rate": 1.6010116940618465e-05,
      "loss": 0.9941,
      "step": 27410
    },
    {
      "epoch": 0.09610026390772693,
      "grad_norm": 3.203125,
      "learning_rate": 1.601595813035199e-05,
      "loss": 1.0099,
      "step": 27420
    },
    {
      "epoch": 0.09613531141462252,
      "grad_norm": 3.078125,
      "learning_rate": 1.6021799320085514e-05,
      "loss": 0.9545,
      "step": 27430
    },
    {
      "epoch": 0.09617035892151812,
      "grad_norm": 3.265625,
      "learning_rate": 1.6027640509819043e-05,
      "loss": 0.9896,
      "step": 27440
    },
    {
      "epoch": 0.09620540642841371,
      "grad_norm": 3.5625,
      "learning_rate": 1.6033481699552568e-05,
      "loss": 1.0902,
      "step": 27450
    },
    {
      "epoch": 0.0962404539353093,
      "grad_norm": 3.71875,
      "learning_rate": 1.603932288928609e-05,
      "loss": 1.0139,
      "step": 27460
    },
    {
      "epoch": 0.09627550144220491,
      "grad_norm": 3.546875,
      "learning_rate": 1.6045164079019614e-05,
      "loss": 1.0502,
      "step": 27470
    },
    {
      "epoch": 0.0963105489491005,
      "grad_norm": 3.28125,
      "learning_rate": 1.6051005268753142e-05,
      "loss": 0.9813,
      "step": 27480
    },
    {
      "epoch": 0.09634559645599611,
      "grad_norm": 3.90625,
      "learning_rate": 1.6056846458486667e-05,
      "loss": 1.1345,
      "step": 27490
    },
    {
      "epoch": 0.0963806439628917,
      "grad_norm": 3.390625,
      "learning_rate": 1.606268764822019e-05,
      "loss": 1.0019,
      "step": 27500
    },
    {
      "epoch": 0.09641569146978729,
      "grad_norm": 3.8125,
      "learning_rate": 1.6068528837953713e-05,
      "loss": 1.0295,
      "step": 27510
    },
    {
      "epoch": 0.0964507389766829,
      "grad_norm": 3.1875,
      "learning_rate": 1.607437002768724e-05,
      "loss": 1.0233,
      "step": 27520
    },
    {
      "epoch": 0.09648578648357849,
      "grad_norm": 3.125,
      "learning_rate": 1.6080211217420766e-05,
      "loss": 1.0875,
      "step": 27530
    },
    {
      "epoch": 0.09652083399047409,
      "grad_norm": 3.328125,
      "learning_rate": 1.608605240715429e-05,
      "loss": 1.0489,
      "step": 27540
    },
    {
      "epoch": 0.09655588149736968,
      "grad_norm": 3.34375,
      "learning_rate": 1.6091893596887813e-05,
      "loss": 1.0872,
      "step": 27550
    },
    {
      "epoch": 0.09659092900426529,
      "grad_norm": 3.203125,
      "learning_rate": 1.609773478662134e-05,
      "loss": 0.9986,
      "step": 27560
    },
    {
      "epoch": 0.09662597651116088,
      "grad_norm": 3.34375,
      "learning_rate": 1.6103575976354866e-05,
      "loss": 1.1007,
      "step": 27570
    },
    {
      "epoch": 0.09666102401805647,
      "grad_norm": 3.53125,
      "learning_rate": 1.610941716608839e-05,
      "loss": 1.0239,
      "step": 27580
    },
    {
      "epoch": 0.09669607152495208,
      "grad_norm": 3.6875,
      "learning_rate": 1.6115258355821912e-05,
      "loss": 1.0316,
      "step": 27590
    },
    {
      "epoch": 0.09673111903184767,
      "grad_norm": 3.625,
      "learning_rate": 1.612109954555544e-05,
      "loss": 1.0659,
      "step": 27600
    },
    {
      "epoch": 0.09676616653874327,
      "grad_norm": 3.515625,
      "learning_rate": 1.6126940735288965e-05,
      "loss": 1.0719,
      "step": 27610
    },
    {
      "epoch": 0.09680121404563886,
      "grad_norm": 3.1875,
      "learning_rate": 1.613278192502249e-05,
      "loss": 1.0858,
      "step": 27620
    },
    {
      "epoch": 0.09683626155253446,
      "grad_norm": 3.25,
      "learning_rate": 1.613862311475601e-05,
      "loss": 1.0216,
      "step": 27630
    },
    {
      "epoch": 0.09687130905943006,
      "grad_norm": 3.078125,
      "learning_rate": 1.614446430448954e-05,
      "loss": 1.084,
      "step": 27640
    },
    {
      "epoch": 0.09690635656632565,
      "grad_norm": 3.609375,
      "learning_rate": 1.6150305494223064e-05,
      "loss": 1.0202,
      "step": 27650
    },
    {
      "epoch": 0.09694140407322126,
      "grad_norm": 3.453125,
      "learning_rate": 1.615614668395659e-05,
      "loss": 1.0363,
      "step": 27660
    },
    {
      "epoch": 0.09697645158011685,
      "grad_norm": 3.578125,
      "learning_rate": 1.6161987873690114e-05,
      "loss": 1.0479,
      "step": 27670
    },
    {
      "epoch": 0.09701149908701244,
      "grad_norm": 3.453125,
      "learning_rate": 1.616782906342364e-05,
      "loss": 1.0243,
      "step": 27680
    },
    {
      "epoch": 0.09704654659390805,
      "grad_norm": 3.5,
      "learning_rate": 1.6173670253157164e-05,
      "loss": 1.077,
      "step": 27690
    },
    {
      "epoch": 0.09708159410080364,
      "grad_norm": 3.234375,
      "learning_rate": 1.617951144289069e-05,
      "loss": 1.0316,
      "step": 27700
    },
    {
      "epoch": 0.09711664160769924,
      "grad_norm": 3.34375,
      "learning_rate": 1.6185352632624214e-05,
      "loss": 1.0213,
      "step": 27710
    },
    {
      "epoch": 0.09715168911459483,
      "grad_norm": 3.75,
      "learning_rate": 1.619119382235774e-05,
      "loss": 1.0232,
      "step": 27720
    },
    {
      "epoch": 0.09718673662149042,
      "grad_norm": 2.734375,
      "learning_rate": 1.6197035012091263e-05,
      "loss": 0.9401,
      "step": 27730
    },
    {
      "epoch": 0.09722178412838603,
      "grad_norm": 3.6875,
      "learning_rate": 1.6202876201824788e-05,
      "loss": 1.056,
      "step": 27740
    },
    {
      "epoch": 0.09725683163528162,
      "grad_norm": 3.71875,
      "learning_rate": 1.6208717391558316e-05,
      "loss": 0.9945,
      "step": 27750
    },
    {
      "epoch": 0.09729187914217723,
      "grad_norm": 3.34375,
      "learning_rate": 1.6214558581291838e-05,
      "loss": 1.0354,
      "step": 27760
    },
    {
      "epoch": 0.09732692664907282,
      "grad_norm": 3.265625,
      "learning_rate": 1.6220399771025363e-05,
      "loss": 1.0675,
      "step": 27770
    },
    {
      "epoch": 0.09736197415596841,
      "grad_norm": 3.3125,
      "learning_rate": 1.6226240960758887e-05,
      "loss": 0.9992,
      "step": 27780
    },
    {
      "epoch": 0.09739702166286401,
      "grad_norm": 3.125,
      "learning_rate": 1.6232082150492416e-05,
      "loss": 1.0363,
      "step": 27790
    },
    {
      "epoch": 0.0974320691697596,
      "grad_norm": 3.21875,
      "learning_rate": 1.6237923340225937e-05,
      "loss": 1.0176,
      "step": 27800
    },
    {
      "epoch": 0.09746711667665521,
      "grad_norm": 3.703125,
      "learning_rate": 1.6243764529959462e-05,
      "loss": 1.0593,
      "step": 27810
    },
    {
      "epoch": 0.0975021641835508,
      "grad_norm": 3.484375,
      "learning_rate": 1.6249605719692987e-05,
      "loss": 1.0377,
      "step": 27820
    },
    {
      "epoch": 0.0975372116904464,
      "grad_norm": 2.84375,
      "learning_rate": 1.6255446909426515e-05,
      "loss": 0.9682,
      "step": 27830
    },
    {
      "epoch": 0.097572259197342,
      "grad_norm": 3.265625,
      "learning_rate": 1.6261288099160036e-05,
      "loss": 1.0615,
      "step": 27840
    },
    {
      "epoch": 0.09760730670423759,
      "grad_norm": 3.390625,
      "learning_rate": 1.626712928889356e-05,
      "loss": 0.9924,
      "step": 27850
    },
    {
      "epoch": 0.0976423542111332,
      "grad_norm": 3.140625,
      "learning_rate": 1.6272970478627086e-05,
      "loss": 0.979,
      "step": 27860
    },
    {
      "epoch": 0.09767740171802879,
      "grad_norm": 3.375,
      "learning_rate": 1.6278811668360614e-05,
      "loss": 0.9746,
      "step": 27870
    },
    {
      "epoch": 0.09771244922492439,
      "grad_norm": 3.28125,
      "learning_rate": 1.628465285809414e-05,
      "loss": 1.0304,
      "step": 27880
    },
    {
      "epoch": 0.09774749673181998,
      "grad_norm": 3.0625,
      "learning_rate": 1.629049404782766e-05,
      "loss": 1.0649,
      "step": 27890
    },
    {
      "epoch": 0.09778254423871557,
      "grad_norm": 3.578125,
      "learning_rate": 1.6296335237561186e-05,
      "loss": 1.0662,
      "step": 27900
    },
    {
      "epoch": 0.09781759174561118,
      "grad_norm": 4.0,
      "learning_rate": 1.6302176427294714e-05,
      "loss": 1.1207,
      "step": 27910
    },
    {
      "epoch": 0.09785263925250677,
      "grad_norm": 3.71875,
      "learning_rate": 1.630801761702824e-05,
      "loss": 0.9893,
      "step": 27920
    },
    {
      "epoch": 0.09788768675940238,
      "grad_norm": 3.4375,
      "learning_rate": 1.631385880676176e-05,
      "loss": 1.0288,
      "step": 27930
    },
    {
      "epoch": 0.09792273426629797,
      "grad_norm": 3.21875,
      "learning_rate": 1.6319699996495285e-05,
      "loss": 1.0582,
      "step": 27940
    },
    {
      "epoch": 0.09795778177319356,
      "grad_norm": 3.765625,
      "learning_rate": 1.6325541186228813e-05,
      "loss": 1.0828,
      "step": 27950
    },
    {
      "epoch": 0.09799282928008916,
      "grad_norm": 3.3125,
      "learning_rate": 1.6331382375962338e-05,
      "loss": 1.0296,
      "step": 27960
    },
    {
      "epoch": 0.09802787678698476,
      "grad_norm": 3.765625,
      "learning_rate": 1.633722356569586e-05,
      "loss": 1.0162,
      "step": 27970
    },
    {
      "epoch": 0.09806292429388036,
      "grad_norm": 3.65625,
      "learning_rate": 1.6343064755429384e-05,
      "loss": 0.9763,
      "step": 27980
    },
    {
      "epoch": 0.09809797180077595,
      "grad_norm": 3.5625,
      "learning_rate": 1.6348905945162913e-05,
      "loss": 1.0535,
      "step": 27990
    },
    {
      "epoch": 0.09813301930767154,
      "grad_norm": 3.125,
      "learning_rate": 1.6354747134896437e-05,
      "loss": 1.0553,
      "step": 28000
    },
    {
      "epoch": 0.09816806681456715,
      "grad_norm": 3.296875,
      "learning_rate": 1.6360588324629962e-05,
      "loss": 1.0011,
      "step": 28010
    },
    {
      "epoch": 0.09820311432146274,
      "grad_norm": 3.296875,
      "learning_rate": 1.6366429514363487e-05,
      "loss": 1.0439,
      "step": 28020
    },
    {
      "epoch": 0.09823816182835834,
      "grad_norm": 3.09375,
      "learning_rate": 1.6372270704097012e-05,
      "loss": 0.9997,
      "step": 28030
    },
    {
      "epoch": 0.09827320933525394,
      "grad_norm": 3.1875,
      "learning_rate": 1.6378111893830537e-05,
      "loss": 1.0531,
      "step": 28040
    },
    {
      "epoch": 0.09830825684214953,
      "grad_norm": 3.40625,
      "learning_rate": 1.638395308356406e-05,
      "loss": 1.0587,
      "step": 28050
    },
    {
      "epoch": 0.09834330434904513,
      "grad_norm": 3.296875,
      "learning_rate": 1.6389794273297586e-05,
      "loss": 1.096,
      "step": 28060
    },
    {
      "epoch": 0.09837835185594072,
      "grad_norm": 4.15625,
      "learning_rate": 1.639563546303111e-05,
      "loss": 1.1267,
      "step": 28070
    },
    {
      "epoch": 0.09841339936283633,
      "grad_norm": 3.5625,
      "learning_rate": 1.6401476652764636e-05,
      "loss": 0.9941,
      "step": 28080
    },
    {
      "epoch": 0.09844844686973192,
      "grad_norm": 3.671875,
      "learning_rate": 1.640731784249816e-05,
      "loss": 1.1308,
      "step": 28090
    },
    {
      "epoch": 0.09848349437662751,
      "grad_norm": 3.296875,
      "learning_rate": 1.6413159032231686e-05,
      "loss": 1.0257,
      "step": 28100
    },
    {
      "epoch": 0.09851854188352312,
      "grad_norm": 3.5,
      "learning_rate": 1.641900022196521e-05,
      "loss": 1.138,
      "step": 28110
    },
    {
      "epoch": 0.09855358939041871,
      "grad_norm": 3.703125,
      "learning_rate": 1.6424841411698736e-05,
      "loss": 0.9782,
      "step": 28120
    },
    {
      "epoch": 0.09858863689731431,
      "grad_norm": 3.75,
      "learning_rate": 1.643068260143226e-05,
      "loss": 1.0749,
      "step": 28130
    },
    {
      "epoch": 0.0986236844042099,
      "grad_norm": 2.90625,
      "learning_rate": 1.6436523791165785e-05,
      "loss": 0.9514,
      "step": 28140
    },
    {
      "epoch": 0.09865873191110551,
      "grad_norm": 3.21875,
      "learning_rate": 1.644236498089931e-05,
      "loss": 1.0333,
      "step": 28150
    },
    {
      "epoch": 0.0986937794180011,
      "grad_norm": 3.34375,
      "learning_rate": 1.6448206170632835e-05,
      "loss": 1.0241,
      "step": 28160
    },
    {
      "epoch": 0.09872882692489669,
      "grad_norm": 3.4375,
      "learning_rate": 1.645404736036636e-05,
      "loss": 1.0708,
      "step": 28170
    },
    {
      "epoch": 0.0987638744317923,
      "grad_norm": 3.953125,
      "learning_rate": 1.6459888550099885e-05,
      "loss": 1.003,
      "step": 28180
    },
    {
      "epoch": 0.09879892193868789,
      "grad_norm": 3.625,
      "learning_rate": 1.646572973983341e-05,
      "loss": 1.0423,
      "step": 28190
    },
    {
      "epoch": 0.0988339694455835,
      "grad_norm": 3.3125,
      "learning_rate": 1.6471570929566934e-05,
      "loss": 1.0426,
      "step": 28200
    },
    {
      "epoch": 0.09886901695247909,
      "grad_norm": 3.53125,
      "learning_rate": 1.647741211930046e-05,
      "loss": 1.1077,
      "step": 28210
    },
    {
      "epoch": 0.09890406445937468,
      "grad_norm": 3.515625,
      "learning_rate": 1.6483253309033987e-05,
      "loss": 0.9827,
      "step": 28220
    },
    {
      "epoch": 0.09893911196627028,
      "grad_norm": 3.40625,
      "learning_rate": 1.648909449876751e-05,
      "loss": 0.9933,
      "step": 28230
    },
    {
      "epoch": 0.09897415947316587,
      "grad_norm": 3.40625,
      "learning_rate": 1.6494935688501034e-05,
      "loss": 1.0318,
      "step": 28240
    },
    {
      "epoch": 0.09900920698006148,
      "grad_norm": 3.15625,
      "learning_rate": 1.650077687823456e-05,
      "loss": 1.0834,
      "step": 28250
    },
    {
      "epoch": 0.09904425448695707,
      "grad_norm": 3.359375,
      "learning_rate": 1.6506618067968087e-05,
      "loss": 1.0353,
      "step": 28260
    },
    {
      "epoch": 0.09907930199385266,
      "grad_norm": 3.21875,
      "learning_rate": 1.6512459257701608e-05,
      "loss": 1.0041,
      "step": 28270
    },
    {
      "epoch": 0.09911434950074827,
      "grad_norm": 3.65625,
      "learning_rate": 1.6518300447435133e-05,
      "loss": 1.1226,
      "step": 28280
    },
    {
      "epoch": 0.09914939700764386,
      "grad_norm": 3.453125,
      "learning_rate": 1.6524141637168658e-05,
      "loss": 1.1015,
      "step": 28290
    },
    {
      "epoch": 0.09918444451453946,
      "grad_norm": 4.0,
      "learning_rate": 1.6529982826902186e-05,
      "loss": 1.1975,
      "step": 28300
    },
    {
      "epoch": 0.09921949202143505,
      "grad_norm": 3.734375,
      "learning_rate": 1.6535824016635708e-05,
      "loss": 1.046,
      "step": 28310
    },
    {
      "epoch": 0.09925453952833065,
      "grad_norm": 3.515625,
      "learning_rate": 1.6541665206369232e-05,
      "loss": 1.0259,
      "step": 28320
    },
    {
      "epoch": 0.09928958703522625,
      "grad_norm": 3.25,
      "learning_rate": 1.6547506396102757e-05,
      "loss": 1.0425,
      "step": 28330
    },
    {
      "epoch": 0.09932463454212184,
      "grad_norm": 3.625,
      "learning_rate": 1.6553347585836285e-05,
      "loss": 1.0573,
      "step": 28340
    },
    {
      "epoch": 0.09935968204901745,
      "grad_norm": 3.8125,
      "learning_rate": 1.655918877556981e-05,
      "loss": 1.0888,
      "step": 28350
    },
    {
      "epoch": 0.09939472955591304,
      "grad_norm": 3.71875,
      "learning_rate": 1.6565029965303332e-05,
      "loss": 0.9738,
      "step": 28360
    },
    {
      "epoch": 0.09942977706280863,
      "grad_norm": 3.703125,
      "learning_rate": 1.657087115503686e-05,
      "loss": 1.0005,
      "step": 28370
    },
    {
      "epoch": 0.09946482456970424,
      "grad_norm": 3.046875,
      "learning_rate": 1.6576712344770385e-05,
      "loss": 1.0514,
      "step": 28380
    },
    {
      "epoch": 0.09949987207659983,
      "grad_norm": 3.328125,
      "learning_rate": 1.658255353450391e-05,
      "loss": 1.0988,
      "step": 28390
    },
    {
      "epoch": 0.09953491958349543,
      "grad_norm": 3.40625,
      "learning_rate": 1.658839472423743e-05,
      "loss": 1.0139,
      "step": 28400
    },
    {
      "epoch": 0.09956996709039102,
      "grad_norm": 3.296875,
      "learning_rate": 1.659423591397096e-05,
      "loss": 1.0421,
      "step": 28410
    },
    {
      "epoch": 0.09960501459728663,
      "grad_norm": 3.375,
      "learning_rate": 1.6600077103704484e-05,
      "loss": 1.044,
      "step": 28420
    },
    {
      "epoch": 0.09964006210418222,
      "grad_norm": 3.484375,
      "learning_rate": 1.660591829343801e-05,
      "loss": 1.0201,
      "step": 28430
    },
    {
      "epoch": 0.09967510961107781,
      "grad_norm": 3.171875,
      "learning_rate": 1.6611759483171534e-05,
      "loss": 1.0285,
      "step": 28440
    },
    {
      "epoch": 0.09971015711797342,
      "grad_norm": 3.359375,
      "learning_rate": 1.661760067290506e-05,
      "loss": 1.0356,
      "step": 28450
    },
    {
      "epoch": 0.09974520462486901,
      "grad_norm": 3.15625,
      "learning_rate": 1.6623441862638584e-05,
      "loss": 1.0803,
      "step": 28460
    },
    {
      "epoch": 0.09978025213176461,
      "grad_norm": 3.3125,
      "learning_rate": 1.662928305237211e-05,
      "loss": 1.0459,
      "step": 28470
    },
    {
      "epoch": 0.0998152996386602,
      "grad_norm": 3.296875,
      "learning_rate": 1.6635124242105633e-05,
      "loss": 1.0218,
      "step": 28480
    },
    {
      "epoch": 0.0998503471455558,
      "grad_norm": 2.921875,
      "learning_rate": 1.6640965431839158e-05,
      "loss": 1.0543,
      "step": 28490
    },
    {
      "epoch": 0.0998853946524514,
      "grad_norm": 3.296875,
      "learning_rate": 1.6646806621572683e-05,
      "loss": 0.9873,
      "step": 28500
    },
    {
      "epoch": 0.09992044215934699,
      "grad_norm": 3.640625,
      "learning_rate": 1.6652647811306208e-05,
      "loss": 1.0626,
      "step": 28510
    },
    {
      "epoch": 0.0999554896662426,
      "grad_norm": 3.296875,
      "learning_rate": 1.6658489001039733e-05,
      "loss": 0.9812,
      "step": 28520
    },
    {
      "epoch": 0.09999053717313819,
      "grad_norm": 3.75,
      "learning_rate": 1.6664330190773258e-05,
      "loss": 1.0582,
      "step": 28530
    },
    {
      "epoch": 0.10002558468003378,
      "grad_norm": 3.34375,
      "learning_rate": 1.6670171380506782e-05,
      "loss": 1.0887,
      "step": 28540
    },
    {
      "epoch": 0.10006063218692939,
      "grad_norm": 3.5625,
      "learning_rate": 1.6676012570240307e-05,
      "loss": 1.0081,
      "step": 28550
    },
    {
      "epoch": 0.10009567969382498,
      "grad_norm": 3.171875,
      "learning_rate": 1.6681853759973832e-05,
      "loss": 1.0674,
      "step": 28560
    },
    {
      "epoch": 0.10013072720072058,
      "grad_norm": 3.890625,
      "learning_rate": 1.6687694949707357e-05,
      "loss": 1.0785,
      "step": 28570
    },
    {
      "epoch": 0.10016577470761617,
      "grad_norm": 3.078125,
      "learning_rate": 1.6693536139440882e-05,
      "loss": 1.0053,
      "step": 28580
    },
    {
      "epoch": 0.10020082221451176,
      "grad_norm": 3.421875,
      "learning_rate": 1.6699377329174407e-05,
      "loss": 1.0182,
      "step": 28590
    },
    {
      "epoch": 0.10023586972140737,
      "grad_norm": 3.328125,
      "learning_rate": 1.670521851890793e-05,
      "loss": 1.0306,
      "step": 28600
    },
    {
      "epoch": 0.10027091722830296,
      "grad_norm": 3.453125,
      "learning_rate": 1.6711059708641456e-05,
      "loss": 1.062,
      "step": 28610
    },
    {
      "epoch": 0.10030596473519857,
      "grad_norm": 3.65625,
      "learning_rate": 1.671690089837498e-05,
      "loss": 1.0579,
      "step": 28620
    },
    {
      "epoch": 0.10034101224209416,
      "grad_norm": 3.359375,
      "learning_rate": 1.6722742088108506e-05,
      "loss": 1.0201,
      "step": 28630
    },
    {
      "epoch": 0.10037605974898975,
      "grad_norm": 3.09375,
      "learning_rate": 1.672858327784203e-05,
      "loss": 0.9763,
      "step": 28640
    },
    {
      "epoch": 0.10041110725588535,
      "grad_norm": 3.296875,
      "learning_rate": 1.673442446757556e-05,
      "loss": 1.0599,
      "step": 28650
    },
    {
      "epoch": 0.10044615476278095,
      "grad_norm": 2.953125,
      "learning_rate": 1.674026565730908e-05,
      "loss": 0.9872,
      "step": 28660
    },
    {
      "epoch": 0.10048120226967655,
      "grad_norm": 3.234375,
      "learning_rate": 1.6746106847042605e-05,
      "loss": 1.0865,
      "step": 28670
    },
    {
      "epoch": 0.10051624977657214,
      "grad_norm": 2.953125,
      "learning_rate": 1.675194803677613e-05,
      "loss": 0.9953,
      "step": 28680
    },
    {
      "epoch": 0.10055129728346775,
      "grad_norm": 3.09375,
      "learning_rate": 1.675778922650966e-05,
      "loss": 0.9843,
      "step": 28690
    },
    {
      "epoch": 0.10058634479036334,
      "grad_norm": 3.75,
      "learning_rate": 1.676363041624318e-05,
      "loss": 1.0187,
      "step": 28700
    },
    {
      "epoch": 0.10062139229725893,
      "grad_norm": 3.234375,
      "learning_rate": 1.6769471605976705e-05,
      "loss": 1.052,
      "step": 28710
    },
    {
      "epoch": 0.10065643980415453,
      "grad_norm": 3.59375,
      "learning_rate": 1.6775312795710233e-05,
      "loss": 1.0303,
      "step": 28720
    },
    {
      "epoch": 0.10069148731105013,
      "grad_norm": 3.65625,
      "learning_rate": 1.6781153985443758e-05,
      "loss": 1.0793,
      "step": 28730
    },
    {
      "epoch": 0.10072653481794573,
      "grad_norm": 3.0,
      "learning_rate": 1.678699517517728e-05,
      "loss": 0.9529,
      "step": 28740
    },
    {
      "epoch": 0.10076158232484132,
      "grad_norm": 3.171875,
      "learning_rate": 1.6792836364910804e-05,
      "loss": 1.0141,
      "step": 28750
    },
    {
      "epoch": 0.10079662983173691,
      "grad_norm": 2.984375,
      "learning_rate": 1.6798677554644332e-05,
      "loss": 0.9815,
      "step": 28760
    },
    {
      "epoch": 0.10083167733863252,
      "grad_norm": 3.625,
      "learning_rate": 1.6804518744377857e-05,
      "loss": 0.9518,
      "step": 28770
    },
    {
      "epoch": 0.10086672484552811,
      "grad_norm": 2.96875,
      "learning_rate": 1.6810359934111382e-05,
      "loss": 0.8844,
      "step": 28780
    },
    {
      "epoch": 0.10090177235242372,
      "grad_norm": 3.9375,
      "learning_rate": 1.6816201123844903e-05,
      "loss": 1.1045,
      "step": 28790
    },
    {
      "epoch": 0.10093681985931931,
      "grad_norm": 4.15625,
      "learning_rate": 1.6822042313578432e-05,
      "loss": 1.0851,
      "step": 28800
    },
    {
      "epoch": 0.1009718673662149,
      "grad_norm": 3.25,
      "learning_rate": 1.6827883503311957e-05,
      "loss": 0.9922,
      "step": 28810
    },
    {
      "epoch": 0.1010069148731105,
      "grad_norm": 3.515625,
      "learning_rate": 1.683372469304548e-05,
      "loss": 1.0995,
      "step": 28820
    },
    {
      "epoch": 0.1010419623800061,
      "grad_norm": 3.8125,
      "learning_rate": 1.6839565882779003e-05,
      "loss": 1.0168,
      "step": 28830
    },
    {
      "epoch": 0.1010770098869017,
      "grad_norm": 3.1875,
      "learning_rate": 1.684540707251253e-05,
      "loss": 1.0424,
      "step": 28840
    },
    {
      "epoch": 0.10111205739379729,
      "grad_norm": 3.703125,
      "learning_rate": 1.6851248262246056e-05,
      "loss": 1.0189,
      "step": 28850
    },
    {
      "epoch": 0.10114710490069288,
      "grad_norm": 3.375,
      "learning_rate": 1.685708945197958e-05,
      "loss": 1.0071,
      "step": 28860
    },
    {
      "epoch": 0.10118215240758849,
      "grad_norm": 3.34375,
      "learning_rate": 1.6862930641713102e-05,
      "loss": 1.0504,
      "step": 28870
    },
    {
      "epoch": 0.10121719991448408,
      "grad_norm": 3.890625,
      "learning_rate": 1.686877183144663e-05,
      "loss": 1.0221,
      "step": 28880
    },
    {
      "epoch": 0.10125224742137968,
      "grad_norm": 3.28125,
      "learning_rate": 1.6874613021180155e-05,
      "loss": 1.0125,
      "step": 28890
    },
    {
      "epoch": 0.10128729492827528,
      "grad_norm": 3.203125,
      "learning_rate": 1.688045421091368e-05,
      "loss": 0.9946,
      "step": 28900
    },
    {
      "epoch": 0.10132234243517087,
      "grad_norm": 3.140625,
      "learning_rate": 1.6886295400647205e-05,
      "loss": 1.0048,
      "step": 28910
    },
    {
      "epoch": 0.10135738994206647,
      "grad_norm": 3.421875,
      "learning_rate": 1.689213659038073e-05,
      "loss": 1.0546,
      "step": 28920
    },
    {
      "epoch": 0.10139243744896206,
      "grad_norm": 2.796875,
      "learning_rate": 1.6897977780114255e-05,
      "loss": 0.9396,
      "step": 28930
    },
    {
      "epoch": 0.10142748495585767,
      "grad_norm": 3.53125,
      "learning_rate": 1.690381896984778e-05,
      "loss": 0.9781,
      "step": 28940
    },
    {
      "epoch": 0.10146253246275326,
      "grad_norm": 3.484375,
      "learning_rate": 1.6909660159581304e-05,
      "loss": 1.0159,
      "step": 28950
    },
    {
      "epoch": 0.10149757996964887,
      "grad_norm": 3.34375,
      "learning_rate": 1.691550134931483e-05,
      "loss": 1.0723,
      "step": 28960
    },
    {
      "epoch": 0.10153262747654446,
      "grad_norm": 3.640625,
      "learning_rate": 1.6921342539048354e-05,
      "loss": 0.9932,
      "step": 28970
    },
    {
      "epoch": 0.10156767498344005,
      "grad_norm": 2.953125,
      "learning_rate": 1.692718372878188e-05,
      "loss": 0.9868,
      "step": 28980
    },
    {
      "epoch": 0.10160272249033565,
      "grad_norm": 3.328125,
      "learning_rate": 1.6933024918515404e-05,
      "loss": 1.0493,
      "step": 28990
    },
    {
      "epoch": 0.10163776999723124,
      "grad_norm": 3.28125,
      "learning_rate": 1.693886610824893e-05,
      "loss": 1.0515,
      "step": 29000
    },
    {
      "epoch": 0.10167281750412685,
      "grad_norm": 3.265625,
      "learning_rate": 1.6944707297982453e-05,
      "loss": 1.0549,
      "step": 29010
    },
    {
      "epoch": 0.10170786501102244,
      "grad_norm": 3.515625,
      "learning_rate": 1.6950548487715978e-05,
      "loss": 1.0991,
      "step": 29020
    },
    {
      "epoch": 0.10174291251791803,
      "grad_norm": 3.453125,
      "learning_rate": 1.6956389677449503e-05,
      "loss": 1.037,
      "step": 29030
    },
    {
      "epoch": 0.10177796002481364,
      "grad_norm": 3.140625,
      "learning_rate": 1.6962230867183028e-05,
      "loss": 0.9646,
      "step": 29040
    },
    {
      "epoch": 0.10181300753170923,
      "grad_norm": 3.578125,
      "learning_rate": 1.6968072056916553e-05,
      "loss": 1.031,
      "step": 29050
    },
    {
      "epoch": 0.10184805503860483,
      "grad_norm": 3.453125,
      "learning_rate": 1.6973913246650078e-05,
      "loss": 0.9949,
      "step": 29060
    },
    {
      "epoch": 0.10188310254550043,
      "grad_norm": 3.609375,
      "learning_rate": 1.6979754436383606e-05,
      "loss": 1.0711,
      "step": 29070
    },
    {
      "epoch": 0.10191815005239602,
      "grad_norm": 3.390625,
      "learning_rate": 1.6985595626117127e-05,
      "loss": 1.0267,
      "step": 29080
    },
    {
      "epoch": 0.10195319755929162,
      "grad_norm": 3.40625,
      "learning_rate": 1.6991436815850652e-05,
      "loss": 1.0455,
      "step": 29090
    },
    {
      "epoch": 0.10198824506618721,
      "grad_norm": 3.28125,
      "learning_rate": 1.6997278005584177e-05,
      "loss": 1.0483,
      "step": 29100
    },
    {
      "epoch": 0.10202329257308282,
      "grad_norm": 3.421875,
      "learning_rate": 1.7003119195317705e-05,
      "loss": 1.0459,
      "step": 29110
    },
    {
      "epoch": 0.10205834007997841,
      "grad_norm": 3.1875,
      "learning_rate": 1.700896038505123e-05,
      "loss": 0.9539,
      "step": 29120
    },
    {
      "epoch": 0.102093387586874,
      "grad_norm": 3.0,
      "learning_rate": 1.701480157478475e-05,
      "loss": 0.966,
      "step": 29130
    },
    {
      "epoch": 0.1021284350937696,
      "grad_norm": 4.125,
      "learning_rate": 1.7020642764518276e-05,
      "loss": 1.0029,
      "step": 29140
    },
    {
      "epoch": 0.1021634826006652,
      "grad_norm": 3.578125,
      "learning_rate": 1.7026483954251805e-05,
      "loss": 1.0096,
      "step": 29150
    },
    {
      "epoch": 0.1021985301075608,
      "grad_norm": 3.203125,
      "learning_rate": 1.703232514398533e-05,
      "loss": 1.0225,
      "step": 29160
    },
    {
      "epoch": 0.1022335776144564,
      "grad_norm": 3.640625,
      "learning_rate": 1.703816633371885e-05,
      "loss": 0.9995,
      "step": 29170
    },
    {
      "epoch": 0.10226862512135199,
      "grad_norm": 3.515625,
      "learning_rate": 1.7044007523452376e-05,
      "loss": 1.0207,
      "step": 29180
    },
    {
      "epoch": 0.10230367262824759,
      "grad_norm": 3.296875,
      "learning_rate": 1.7049848713185904e-05,
      "loss": 0.9462,
      "step": 29190
    },
    {
      "epoch": 0.10233872013514318,
      "grad_norm": 3.421875,
      "learning_rate": 1.705568990291943e-05,
      "loss": 1.0064,
      "step": 29200
    },
    {
      "epoch": 0.10237376764203879,
      "grad_norm": 3.265625,
      "learning_rate": 1.706153109265295e-05,
      "loss": 1.0566,
      "step": 29210
    },
    {
      "epoch": 0.10240881514893438,
      "grad_norm": 3.421875,
      "learning_rate": 1.7067372282386475e-05,
      "loss": 0.924,
      "step": 29220
    },
    {
      "epoch": 0.10244386265582998,
      "grad_norm": 3.015625,
      "learning_rate": 1.7073213472120003e-05,
      "loss": 0.9999,
      "step": 29230
    },
    {
      "epoch": 0.10247891016272558,
      "grad_norm": 3.28125,
      "learning_rate": 1.7079054661853528e-05,
      "loss": 1.0048,
      "step": 29240
    },
    {
      "epoch": 0.10251395766962117,
      "grad_norm": 3.328125,
      "learning_rate": 1.7084895851587053e-05,
      "loss": 1.1199,
      "step": 29250
    },
    {
      "epoch": 0.10254900517651677,
      "grad_norm": 3.28125,
      "learning_rate": 1.7090737041320575e-05,
      "loss": 1.0436,
      "step": 29260
    },
    {
      "epoch": 0.10258405268341236,
      "grad_norm": 3.984375,
      "learning_rate": 1.7096578231054103e-05,
      "loss": 1.038,
      "step": 29270
    },
    {
      "epoch": 0.10261910019030797,
      "grad_norm": 3.5,
      "learning_rate": 1.7102419420787628e-05,
      "loss": 1.0555,
      "step": 29280
    },
    {
      "epoch": 0.10265414769720356,
      "grad_norm": 4.0,
      "learning_rate": 1.7108260610521152e-05,
      "loss": 1.0432,
      "step": 29290
    },
    {
      "epoch": 0.10268919520409915,
      "grad_norm": 3.296875,
      "learning_rate": 1.7114101800254674e-05,
      "loss": 0.971,
      "step": 29300
    },
    {
      "epoch": 0.10272424271099476,
      "grad_norm": 3.4375,
      "learning_rate": 1.7119942989988202e-05,
      "loss": 1.0531,
      "step": 29310
    },
    {
      "epoch": 0.10275929021789035,
      "grad_norm": 3.6875,
      "learning_rate": 1.7125784179721727e-05,
      "loss": 1.0514,
      "step": 29320
    },
    {
      "epoch": 0.10279433772478595,
      "grad_norm": 3.46875,
      "learning_rate": 1.7131625369455252e-05,
      "loss": 1.1216,
      "step": 29330
    },
    {
      "epoch": 0.10282938523168154,
      "grad_norm": 3.125,
      "learning_rate": 1.7137466559188777e-05,
      "loss": 1.0428,
      "step": 29340
    },
    {
      "epoch": 0.10286443273857714,
      "grad_norm": 3.40625,
      "learning_rate": 1.71433077489223e-05,
      "loss": 1.0559,
      "step": 29350
    },
    {
      "epoch": 0.10289948024547274,
      "grad_norm": 3.0625,
      "learning_rate": 1.7149148938655826e-05,
      "loss": 0.9739,
      "step": 29360
    },
    {
      "epoch": 0.10293452775236833,
      "grad_norm": 3.5,
      "learning_rate": 1.715499012838935e-05,
      "loss": 1.071,
      "step": 29370
    },
    {
      "epoch": 0.10296957525926394,
      "grad_norm": 3.3125,
      "learning_rate": 1.7160831318122876e-05,
      "loss": 1.0576,
      "step": 29380
    },
    {
      "epoch": 0.10300462276615953,
      "grad_norm": 2.875,
      "learning_rate": 1.71666725078564e-05,
      "loss": 1.0083,
      "step": 29390
    },
    {
      "epoch": 0.10303967027305512,
      "grad_norm": 3.078125,
      "learning_rate": 1.7172513697589926e-05,
      "loss": 1.0174,
      "step": 29400
    },
    {
      "epoch": 0.10307471777995073,
      "grad_norm": 3.109375,
      "learning_rate": 1.717835488732345e-05,
      "loss": 0.9656,
      "step": 29410
    },
    {
      "epoch": 0.10310976528684632,
      "grad_norm": 3.28125,
      "learning_rate": 1.7184196077056975e-05,
      "loss": 1.001,
      "step": 29420
    },
    {
      "epoch": 0.10314481279374192,
      "grad_norm": 3.46875,
      "learning_rate": 1.71900372667905e-05,
      "loss": 1.0154,
      "step": 29430
    },
    {
      "epoch": 0.10317986030063751,
      "grad_norm": 3.484375,
      "learning_rate": 1.7195878456524025e-05,
      "loss": 1.0345,
      "step": 29440
    },
    {
      "epoch": 0.1032149078075331,
      "grad_norm": 3.328125,
      "learning_rate": 1.720171964625755e-05,
      "loss": 1.031,
      "step": 29450
    },
    {
      "epoch": 0.10324995531442871,
      "grad_norm": 3.828125,
      "learning_rate": 1.7207560835991078e-05,
      "loss": 1.0159,
      "step": 29460
    },
    {
      "epoch": 0.1032850028213243,
      "grad_norm": 3.28125,
      "learning_rate": 1.72134020257246e-05,
      "loss": 0.9945,
      "step": 29470
    },
    {
      "epoch": 0.1033200503282199,
      "grad_norm": 3.234375,
      "learning_rate": 1.7219243215458125e-05,
      "loss": 1.0448,
      "step": 29480
    },
    {
      "epoch": 0.1033550978351155,
      "grad_norm": 2.984375,
      "learning_rate": 1.722508440519165e-05,
      "loss": 1.0535,
      "step": 29490
    },
    {
      "epoch": 0.1033901453420111,
      "grad_norm": 3.5,
      "learning_rate": 1.7230925594925178e-05,
      "loss": 1.0053,
      "step": 29500
    },
    {
      "epoch": 0.1034251928489067,
      "grad_norm": 3.734375,
      "learning_rate": 1.72367667846587e-05,
      "loss": 1.0965,
      "step": 29510
    },
    {
      "epoch": 0.10346024035580229,
      "grad_norm": 2.859375,
      "learning_rate": 1.7242607974392224e-05,
      "loss": 1.0543,
      "step": 29520
    },
    {
      "epoch": 0.10349528786269789,
      "grad_norm": 3.140625,
      "learning_rate": 1.724844916412575e-05,
      "loss": 0.9977,
      "step": 29530
    },
    {
      "epoch": 0.10353033536959348,
      "grad_norm": 3.609375,
      "learning_rate": 1.7254290353859277e-05,
      "loss": 1.0772,
      "step": 29540
    },
    {
      "epoch": 0.10356538287648909,
      "grad_norm": 3.71875,
      "learning_rate": 1.7260131543592802e-05,
      "loss": 1.1031,
      "step": 29550
    },
    {
      "epoch": 0.10360043038338468,
      "grad_norm": 3.234375,
      "learning_rate": 1.7265972733326323e-05,
      "loss": 1.0071,
      "step": 29560
    },
    {
      "epoch": 0.10363547789028027,
      "grad_norm": 3.03125,
      "learning_rate": 1.7271813923059848e-05,
      "loss": 1.0442,
      "step": 29570
    },
    {
      "epoch": 0.10367052539717588,
      "grad_norm": 3.46875,
      "learning_rate": 1.7277655112793376e-05,
      "loss": 1.0493,
      "step": 29580
    },
    {
      "epoch": 0.10370557290407147,
      "grad_norm": 3.25,
      "learning_rate": 1.72834963025269e-05,
      "loss": 1.063,
      "step": 29590
    },
    {
      "epoch": 0.10374062041096707,
      "grad_norm": 3.578125,
      "learning_rate": 1.7289337492260423e-05,
      "loss": 0.9528,
      "step": 29600
    },
    {
      "epoch": 0.10377566791786266,
      "grad_norm": 3.65625,
      "learning_rate": 1.7295178681993948e-05,
      "loss": 0.9929,
      "step": 29610
    },
    {
      "epoch": 0.10381071542475825,
      "grad_norm": 3.140625,
      "learning_rate": 1.7301019871727476e-05,
      "loss": 1.0946,
      "step": 29620
    },
    {
      "epoch": 0.10384576293165386,
      "grad_norm": 4.5,
      "learning_rate": 1.7306861061461e-05,
      "loss": 1.0825,
      "step": 29630
    },
    {
      "epoch": 0.10388081043854945,
      "grad_norm": 3.1875,
      "learning_rate": 1.7312702251194522e-05,
      "loss": 0.9916,
      "step": 29640
    },
    {
      "epoch": 0.10391585794544506,
      "grad_norm": 3.1875,
      "learning_rate": 1.7318543440928047e-05,
      "loss": 1.1412,
      "step": 29650
    },
    {
      "epoch": 0.10395090545234065,
      "grad_norm": 3.21875,
      "learning_rate": 1.7324384630661575e-05,
      "loss": 0.9461,
      "step": 29660
    },
    {
      "epoch": 0.10398595295923624,
      "grad_norm": 3.546875,
      "learning_rate": 1.73302258203951e-05,
      "loss": 1.0742,
      "step": 29670
    },
    {
      "epoch": 0.10402100046613184,
      "grad_norm": 3.703125,
      "learning_rate": 1.7336067010128625e-05,
      "loss": 0.9784,
      "step": 29680
    },
    {
      "epoch": 0.10405604797302744,
      "grad_norm": 3.6875,
      "learning_rate": 1.7341908199862146e-05,
      "loss": 1.067,
      "step": 29690
    },
    {
      "epoch": 0.10409109547992304,
      "grad_norm": 3.390625,
      "learning_rate": 1.7347749389595674e-05,
      "loss": 1.0542,
      "step": 29700
    },
    {
      "epoch": 0.10412614298681863,
      "grad_norm": 3.234375,
      "learning_rate": 1.73535905793292e-05,
      "loss": 0.9529,
      "step": 29710
    },
    {
      "epoch": 0.10416119049371422,
      "grad_norm": 3.84375,
      "learning_rate": 1.7359431769062724e-05,
      "loss": 1.089,
      "step": 29720
    },
    {
      "epoch": 0.10419623800060983,
      "grad_norm": 3.40625,
      "learning_rate": 1.736527295879625e-05,
      "loss": 0.963,
      "step": 29730
    },
    {
      "epoch": 0.10423128550750542,
      "grad_norm": 4.1875,
      "learning_rate": 1.7371114148529774e-05,
      "loss": 1.0128,
      "step": 29740
    },
    {
      "epoch": 0.10426633301440102,
      "grad_norm": 3.109375,
      "learning_rate": 1.73769553382633e-05,
      "loss": 0.9839,
      "step": 29750
    },
    {
      "epoch": 0.10430138052129662,
      "grad_norm": 3.359375,
      "learning_rate": 1.7382796527996824e-05,
      "loss": 1.0361,
      "step": 29760
    },
    {
      "epoch": 0.10433642802819222,
      "grad_norm": 3.453125,
      "learning_rate": 1.738863771773035e-05,
      "loss": 0.9889,
      "step": 29770
    },
    {
      "epoch": 0.10437147553508781,
      "grad_norm": 3.421875,
      "learning_rate": 1.7394478907463873e-05,
      "loss": 0.9944,
      "step": 29780
    },
    {
      "epoch": 0.1044065230419834,
      "grad_norm": 3.21875,
      "learning_rate": 1.7400320097197398e-05,
      "loss": 1.0691,
      "step": 29790
    },
    {
      "epoch": 0.10444157054887901,
      "grad_norm": 3.3125,
      "learning_rate": 1.7406161286930923e-05,
      "loss": 1.0457,
      "step": 29800
    },
    {
      "epoch": 0.1044766180557746,
      "grad_norm": 3.296875,
      "learning_rate": 1.7412002476664448e-05,
      "loss": 1.0729,
      "step": 29810
    },
    {
      "epoch": 0.1045116655626702,
      "grad_norm": 3.65625,
      "learning_rate": 1.7417843666397973e-05,
      "loss": 1.0969,
      "step": 29820
    },
    {
      "epoch": 0.1045467130695658,
      "grad_norm": 3.390625,
      "learning_rate": 1.7423684856131497e-05,
      "loss": 1.038,
      "step": 29830
    },
    {
      "epoch": 0.10458176057646139,
      "grad_norm": 3.984375,
      "learning_rate": 1.7429526045865022e-05,
      "loss": 1.1071,
      "step": 29840
    },
    {
      "epoch": 0.104616808083357,
      "grad_norm": 3.625,
      "learning_rate": 1.7435367235598547e-05,
      "loss": 1.0393,
      "step": 29850
    },
    {
      "epoch": 0.10465185559025258,
      "grad_norm": 3.078125,
      "learning_rate": 1.7441208425332072e-05,
      "loss": 1.0847,
      "step": 29860
    },
    {
      "epoch": 0.10468690309714819,
      "grad_norm": 3.234375,
      "learning_rate": 1.7447049615065597e-05,
      "loss": 1.0467,
      "step": 29870
    },
    {
      "epoch": 0.10472195060404378,
      "grad_norm": 3.1875,
      "learning_rate": 1.7452890804799122e-05,
      "loss": 1.0619,
      "step": 29880
    },
    {
      "epoch": 0.10475699811093937,
      "grad_norm": 3.140625,
      "learning_rate": 1.745873199453265e-05,
      "loss": 0.9974,
      "step": 29890
    },
    {
      "epoch": 0.10479204561783498,
      "grad_norm": 3.375,
      "learning_rate": 1.746457318426617e-05,
      "loss": 0.9909,
      "step": 29900
    },
    {
      "epoch": 0.10482709312473057,
      "grad_norm": 3.25,
      "learning_rate": 1.7470414373999696e-05,
      "loss": 0.9922,
      "step": 29910
    },
    {
      "epoch": 0.10486214063162617,
      "grad_norm": 3.359375,
      "learning_rate": 1.747625556373322e-05,
      "loss": 1.0942,
      "step": 29920
    },
    {
      "epoch": 0.10489718813852177,
      "grad_norm": 3.515625,
      "learning_rate": 1.748209675346675e-05,
      "loss": 1.0368,
      "step": 29930
    },
    {
      "epoch": 0.10493223564541736,
      "grad_norm": 3.484375,
      "learning_rate": 1.748793794320027e-05,
      "loss": 1.0586,
      "step": 29940
    },
    {
      "epoch": 0.10496728315231296,
      "grad_norm": 3.484375,
      "learning_rate": 1.7493779132933796e-05,
      "loss": 1.0152,
      "step": 29950
    },
    {
      "epoch": 0.10500233065920855,
      "grad_norm": 3.265625,
      "learning_rate": 1.749962032266732e-05,
      "loss": 1.0217,
      "step": 29960
    },
    {
      "epoch": 0.10503737816610416,
      "grad_norm": 3.28125,
      "learning_rate": 1.750546151240085e-05,
      "loss": 1.0573,
      "step": 29970
    },
    {
      "epoch": 0.10507242567299975,
      "grad_norm": 3.53125,
      "learning_rate": 1.751130270213437e-05,
      "loss": 0.9621,
      "step": 29980
    },
    {
      "epoch": 0.10510747317989534,
      "grad_norm": 3.265625,
      "learning_rate": 1.7517143891867895e-05,
      "loss": 1.1137,
      "step": 29990
    },
    {
      "epoch": 0.10514252068679095,
      "grad_norm": 3.3125,
      "learning_rate": 1.752298508160142e-05,
      "loss": 0.9618,
      "step": 30000
    },
    {
      "epoch": 0.10514252068679095,
      "eval_loss": 0.9692790508270264,
      "eval_runtime": 558.5828,
      "eval_samples_per_second": 681.074,
      "eval_steps_per_second": 56.756,
      "step": 30000
    },
    {
      "epoch": 0.10517756819368654,
      "grad_norm": 3.1875,
      "learning_rate": 1.7528826271334948e-05,
      "loss": 1.0871,
      "step": 30010
    },
    {
      "epoch": 0.10521261570058214,
      "grad_norm": 3.09375,
      "learning_rate": 1.7534667461068473e-05,
      "loss": 1.0207,
      "step": 30020
    },
    {
      "epoch": 0.10524766320747773,
      "grad_norm": 3.40625,
      "learning_rate": 1.7540508650801994e-05,
      "loss": 1.0621,
      "step": 30030
    },
    {
      "epoch": 0.10528271071437334,
      "grad_norm": 3.390625,
      "learning_rate": 1.754634984053552e-05,
      "loss": 1.0876,
      "step": 30040
    },
    {
      "epoch": 0.10531775822126893,
      "grad_norm": 3.296875,
      "learning_rate": 1.7552191030269047e-05,
      "loss": 1.0282,
      "step": 30050
    },
    {
      "epoch": 0.10535280572816452,
      "grad_norm": 3.5,
      "learning_rate": 1.7558032220002572e-05,
      "loss": 1.0292,
      "step": 30060
    },
    {
      "epoch": 0.10538785323506013,
      "grad_norm": 3.625,
      "learning_rate": 1.7563873409736094e-05,
      "loss": 1.029,
      "step": 30070
    },
    {
      "epoch": 0.10542290074195572,
      "grad_norm": 3.390625,
      "learning_rate": 1.7569714599469622e-05,
      "loss": 1.026,
      "step": 30080
    },
    {
      "epoch": 0.10545794824885132,
      "grad_norm": 3.640625,
      "learning_rate": 1.7575555789203147e-05,
      "loss": 1.0453,
      "step": 30090
    },
    {
      "epoch": 0.10549299575574692,
      "grad_norm": 3.09375,
      "learning_rate": 1.758139697893667e-05,
      "loss": 1.0264,
      "step": 30100
    },
    {
      "epoch": 0.1055280432626425,
      "grad_norm": 3.390625,
      "learning_rate": 1.7587238168670193e-05,
      "loss": 0.9601,
      "step": 30110
    },
    {
      "epoch": 0.10556309076953811,
      "grad_norm": 3.40625,
      "learning_rate": 1.759307935840372e-05,
      "loss": 1.0313,
      "step": 30120
    },
    {
      "epoch": 0.1055981382764337,
      "grad_norm": 3.03125,
      "learning_rate": 1.7598920548137246e-05,
      "loss": 0.9738,
      "step": 30130
    },
    {
      "epoch": 0.10563318578332931,
      "grad_norm": 2.875,
      "learning_rate": 1.760476173787077e-05,
      "loss": 1.0077,
      "step": 30140
    },
    {
      "epoch": 0.1056682332902249,
      "grad_norm": 3.28125,
      "learning_rate": 1.7610602927604296e-05,
      "loss": 1.0615,
      "step": 30150
    },
    {
      "epoch": 0.10570328079712049,
      "grad_norm": 4.125,
      "learning_rate": 1.761644411733782e-05,
      "loss": 0.9592,
      "step": 30160
    },
    {
      "epoch": 0.1057383283040161,
      "grad_norm": 3.40625,
      "learning_rate": 1.7622285307071346e-05,
      "loss": 1.0639,
      "step": 30170
    },
    {
      "epoch": 0.10577337581091169,
      "grad_norm": 3.15625,
      "learning_rate": 1.762812649680487e-05,
      "loss": 1.0035,
      "step": 30180
    },
    {
      "epoch": 0.10580842331780729,
      "grad_norm": 3.359375,
      "learning_rate": 1.7633967686538395e-05,
      "loss": 1.0564,
      "step": 30190
    },
    {
      "epoch": 0.10584347082470288,
      "grad_norm": 3.609375,
      "learning_rate": 1.763980887627192e-05,
      "loss": 0.9957,
      "step": 30200
    },
    {
      "epoch": 0.10587851833159848,
      "grad_norm": 3.34375,
      "learning_rate": 1.7645650066005445e-05,
      "loss": 1.0553,
      "step": 30210
    },
    {
      "epoch": 0.10591356583849408,
      "grad_norm": 3.46875,
      "learning_rate": 1.765149125573897e-05,
      "loss": 0.9723,
      "step": 30220
    },
    {
      "epoch": 0.10594861334538967,
      "grad_norm": 3.25,
      "learning_rate": 1.7657332445472495e-05,
      "loss": 1.0289,
      "step": 30230
    },
    {
      "epoch": 0.10598366085228528,
      "grad_norm": 3.375,
      "learning_rate": 1.766317363520602e-05,
      "loss": 0.9929,
      "step": 30240
    },
    {
      "epoch": 0.10601870835918087,
      "grad_norm": 3.109375,
      "learning_rate": 1.7669014824939544e-05,
      "loss": 1.021,
      "step": 30250
    },
    {
      "epoch": 0.10605375586607646,
      "grad_norm": 3.15625,
      "learning_rate": 1.767485601467307e-05,
      "loss": 1.0541,
      "step": 30260
    },
    {
      "epoch": 0.10608880337297207,
      "grad_norm": 3.84375,
      "learning_rate": 1.7680697204406594e-05,
      "loss": 1.0701,
      "step": 30270
    },
    {
      "epoch": 0.10612385087986766,
      "grad_norm": 3.390625,
      "learning_rate": 1.768653839414012e-05,
      "loss": 1.0069,
      "step": 30280
    },
    {
      "epoch": 0.10615889838676326,
      "grad_norm": 3.421875,
      "learning_rate": 1.7692379583873644e-05,
      "loss": 1.001,
      "step": 30290
    },
    {
      "epoch": 0.10619394589365885,
      "grad_norm": 3.328125,
      "learning_rate": 1.769822077360717e-05,
      "loss": 1.0904,
      "step": 30300
    },
    {
      "epoch": 0.10622899340055446,
      "grad_norm": 3.265625,
      "learning_rate": 1.7704061963340693e-05,
      "loss": 1.0212,
      "step": 30310
    },
    {
      "epoch": 0.10626404090745005,
      "grad_norm": 3.359375,
      "learning_rate": 1.7709903153074218e-05,
      "loss": 1.0556,
      "step": 30320
    },
    {
      "epoch": 0.10629908841434564,
      "grad_norm": 3.375,
      "learning_rate": 1.7715744342807743e-05,
      "loss": 0.9685,
      "step": 30330
    },
    {
      "epoch": 0.10633413592124125,
      "grad_norm": 3.515625,
      "learning_rate": 1.7721585532541268e-05,
      "loss": 1.1592,
      "step": 30340
    },
    {
      "epoch": 0.10636918342813684,
      "grad_norm": 3.5625,
      "learning_rate": 1.7727426722274793e-05,
      "loss": 1.0248,
      "step": 30350
    },
    {
      "epoch": 0.10640423093503244,
      "grad_norm": 3.234375,
      "learning_rate": 1.773326791200832e-05,
      "loss": 1.0175,
      "step": 30360
    },
    {
      "epoch": 0.10643927844192803,
      "grad_norm": 3.765625,
      "learning_rate": 1.7739109101741842e-05,
      "loss": 1.0315,
      "step": 30370
    },
    {
      "epoch": 0.10647432594882363,
      "grad_norm": 3.015625,
      "learning_rate": 1.7744950291475367e-05,
      "loss": 1.008,
      "step": 30380
    },
    {
      "epoch": 0.10650937345571923,
      "grad_norm": 3.25,
      "learning_rate": 1.7750791481208892e-05,
      "loss": 1.0059,
      "step": 30390
    },
    {
      "epoch": 0.10654442096261482,
      "grad_norm": 3.171875,
      "learning_rate": 1.775663267094242e-05,
      "loss": 1.0289,
      "step": 30400
    },
    {
      "epoch": 0.10657946846951043,
      "grad_norm": 3.390625,
      "learning_rate": 1.7762473860675942e-05,
      "loss": 1.0249,
      "step": 30410
    },
    {
      "epoch": 0.10661451597640602,
      "grad_norm": 3.703125,
      "learning_rate": 1.7768315050409467e-05,
      "loss": 1.0713,
      "step": 30420
    },
    {
      "epoch": 0.10664956348330161,
      "grad_norm": 3.453125,
      "learning_rate": 1.7774156240142995e-05,
      "loss": 0.9193,
      "step": 30430
    },
    {
      "epoch": 0.10668461099019722,
      "grad_norm": 3.5,
      "learning_rate": 1.777999742987652e-05,
      "loss": 0.9551,
      "step": 30440
    },
    {
      "epoch": 0.1067196584970928,
      "grad_norm": 3.59375,
      "learning_rate": 1.7785838619610045e-05,
      "loss": 1.0298,
      "step": 30450
    },
    {
      "epoch": 0.10675470600398841,
      "grad_norm": 3.09375,
      "learning_rate": 1.7791679809343566e-05,
      "loss": 0.986,
      "step": 30460
    },
    {
      "epoch": 0.106789753510884,
      "grad_norm": 3.453125,
      "learning_rate": 1.7797520999077094e-05,
      "loss": 1.1077,
      "step": 30470
    },
    {
      "epoch": 0.1068248010177796,
      "grad_norm": 3.203125,
      "learning_rate": 1.780336218881062e-05,
      "loss": 0.996,
      "step": 30480
    },
    {
      "epoch": 0.1068598485246752,
      "grad_norm": 3.328125,
      "learning_rate": 1.7809203378544144e-05,
      "loss": 1.0635,
      "step": 30490
    },
    {
      "epoch": 0.10689489603157079,
      "grad_norm": 3.328125,
      "learning_rate": 1.7815044568277665e-05,
      "loss": 0.9458,
      "step": 30500
    },
    {
      "epoch": 0.1069299435384664,
      "grad_norm": 3.03125,
      "learning_rate": 1.7820885758011194e-05,
      "loss": 0.966,
      "step": 30510
    },
    {
      "epoch": 0.10696499104536199,
      "grad_norm": 3.328125,
      "learning_rate": 1.782672694774472e-05,
      "loss": 1.0461,
      "step": 30520
    },
    {
      "epoch": 0.10700003855225758,
      "grad_norm": 2.96875,
      "learning_rate": 1.7832568137478243e-05,
      "loss": 0.9673,
      "step": 30530
    },
    {
      "epoch": 0.10703508605915318,
      "grad_norm": 2.953125,
      "learning_rate": 1.7838409327211765e-05,
      "loss": 1.0835,
      "step": 30540
    },
    {
      "epoch": 0.10707013356604878,
      "grad_norm": 3.328125,
      "learning_rate": 1.7844250516945293e-05,
      "loss": 1.1002,
      "step": 30550
    },
    {
      "epoch": 0.10710518107294438,
      "grad_norm": 3.671875,
      "learning_rate": 1.7850091706678818e-05,
      "loss": 1.0369,
      "step": 30560
    },
    {
      "epoch": 0.10714022857983997,
      "grad_norm": 3.609375,
      "learning_rate": 1.7855932896412343e-05,
      "loss": 1.034,
      "step": 30570
    },
    {
      "epoch": 0.10717527608673558,
      "grad_norm": 3.453125,
      "learning_rate": 1.7861774086145868e-05,
      "loss": 1.0877,
      "step": 30580
    },
    {
      "epoch": 0.10721032359363117,
      "grad_norm": 3.28125,
      "learning_rate": 1.7867615275879392e-05,
      "loss": 0.9622,
      "step": 30590
    },
    {
      "epoch": 0.10724537110052676,
      "grad_norm": 3.53125,
      "learning_rate": 1.7873456465612917e-05,
      "loss": 0.9938,
      "step": 30600
    },
    {
      "epoch": 0.10728041860742236,
      "grad_norm": 3.625,
      "learning_rate": 1.7879297655346442e-05,
      "loss": 1.0093,
      "step": 30610
    },
    {
      "epoch": 0.10731546611431796,
      "grad_norm": 3.5625,
      "learning_rate": 1.7885138845079967e-05,
      "loss": 1.0398,
      "step": 30620
    },
    {
      "epoch": 0.10735051362121356,
      "grad_norm": 3.625,
      "learning_rate": 1.7890980034813492e-05,
      "loss": 1.0657,
      "step": 30630
    },
    {
      "epoch": 0.10738556112810915,
      "grad_norm": 3.390625,
      "learning_rate": 1.7896821224547017e-05,
      "loss": 1.0387,
      "step": 30640
    },
    {
      "epoch": 0.10742060863500474,
      "grad_norm": 3.234375,
      "learning_rate": 1.790266241428054e-05,
      "loss": 1.0898,
      "step": 30650
    },
    {
      "epoch": 0.10745565614190035,
      "grad_norm": 3.375,
      "learning_rate": 1.7908503604014066e-05,
      "loss": 0.9972,
      "step": 30660
    },
    {
      "epoch": 0.10749070364879594,
      "grad_norm": 3.5625,
      "learning_rate": 1.791434479374759e-05,
      "loss": 0.9889,
      "step": 30670
    },
    {
      "epoch": 0.10752575115569155,
      "grad_norm": 3.28125,
      "learning_rate": 1.7920185983481116e-05,
      "loss": 0.8887,
      "step": 30680
    },
    {
      "epoch": 0.10756079866258714,
      "grad_norm": 3.453125,
      "learning_rate": 1.792602717321464e-05,
      "loss": 0.9848,
      "step": 30690
    },
    {
      "epoch": 0.10759584616948273,
      "grad_norm": 3.40625,
      "learning_rate": 1.7931868362948166e-05,
      "loss": 0.9571,
      "step": 30700
    },
    {
      "epoch": 0.10763089367637833,
      "grad_norm": 3.25,
      "learning_rate": 1.793770955268169e-05,
      "loss": 1.0087,
      "step": 30710
    },
    {
      "epoch": 0.10766594118327392,
      "grad_norm": 3.53125,
      "learning_rate": 1.7943550742415215e-05,
      "loss": 1.0377,
      "step": 30720
    },
    {
      "epoch": 0.10770098869016953,
      "grad_norm": 3.03125,
      "learning_rate": 1.794939193214874e-05,
      "loss": 0.9457,
      "step": 30730
    },
    {
      "epoch": 0.10773603619706512,
      "grad_norm": 3.25,
      "learning_rate": 1.7955233121882265e-05,
      "loss": 1.0406,
      "step": 30740
    },
    {
      "epoch": 0.10777108370396071,
      "grad_norm": 3.5,
      "learning_rate": 1.796107431161579e-05,
      "loss": 1.0347,
      "step": 30750
    },
    {
      "epoch": 0.10780613121085632,
      "grad_norm": 3.09375,
      "learning_rate": 1.7966915501349315e-05,
      "loss": 0.9988,
      "step": 30760
    },
    {
      "epoch": 0.10784117871775191,
      "grad_norm": 2.8125,
      "learning_rate": 1.797275669108284e-05,
      "loss": 0.9416,
      "step": 30770
    },
    {
      "epoch": 0.10787622622464751,
      "grad_norm": 3.171875,
      "learning_rate": 1.7978597880816368e-05,
      "loss": 1.076,
      "step": 30780
    },
    {
      "epoch": 0.1079112737315431,
      "grad_norm": 3.296875,
      "learning_rate": 1.7984439070549893e-05,
      "loss": 1.0531,
      "step": 30790
    },
    {
      "epoch": 0.1079463212384387,
      "grad_norm": 3.484375,
      "learning_rate": 1.7990280260283414e-05,
      "loss": 1.0988,
      "step": 30800
    },
    {
      "epoch": 0.1079813687453343,
      "grad_norm": 3.53125,
      "learning_rate": 1.799612145001694e-05,
      "loss": 1.0315,
      "step": 30810
    },
    {
      "epoch": 0.1080164162522299,
      "grad_norm": 3.171875,
      "learning_rate": 1.8001962639750467e-05,
      "loss": 1.025,
      "step": 30820
    },
    {
      "epoch": 0.1080514637591255,
      "grad_norm": 3.53125,
      "learning_rate": 1.8007803829483992e-05,
      "loss": 1.0584,
      "step": 30830
    },
    {
      "epoch": 0.10808651126602109,
      "grad_norm": 3.78125,
      "learning_rate": 1.8013645019217514e-05,
      "loss": 1.032,
      "step": 30840
    },
    {
      "epoch": 0.1081215587729167,
      "grad_norm": 3.25,
      "learning_rate": 1.801948620895104e-05,
      "loss": 1.0075,
      "step": 30850
    },
    {
      "epoch": 0.10815660627981229,
      "grad_norm": 3.375,
      "learning_rate": 1.8025327398684567e-05,
      "loss": 1.0661,
      "step": 30860
    },
    {
      "epoch": 0.10819165378670788,
      "grad_norm": 3.34375,
      "learning_rate": 1.803116858841809e-05,
      "loss": 1.0339,
      "step": 30870
    },
    {
      "epoch": 0.10822670129360348,
      "grad_norm": 3.484375,
      "learning_rate": 1.8037009778151613e-05,
      "loss": 1.0307,
      "step": 30880
    },
    {
      "epoch": 0.10826174880049907,
      "grad_norm": 3.734375,
      "learning_rate": 1.8042850967885138e-05,
      "loss": 0.9948,
      "step": 30890
    },
    {
      "epoch": 0.10829679630739468,
      "grad_norm": 3.203125,
      "learning_rate": 1.8048692157618666e-05,
      "loss": 0.9801,
      "step": 30900
    },
    {
      "epoch": 0.10833184381429027,
      "grad_norm": 3.28125,
      "learning_rate": 1.805453334735219e-05,
      "loss": 0.9799,
      "step": 30910
    },
    {
      "epoch": 0.10836689132118586,
      "grad_norm": 3.265625,
      "learning_rate": 1.8060374537085716e-05,
      "loss": 1.0004,
      "step": 30920
    },
    {
      "epoch": 0.10840193882808147,
      "grad_norm": 3.578125,
      "learning_rate": 1.8066215726819237e-05,
      "loss": 1.0513,
      "step": 30930
    },
    {
      "epoch": 0.10843698633497706,
      "grad_norm": 3.34375,
      "learning_rate": 1.8072056916552765e-05,
      "loss": 1.0013,
      "step": 30940
    },
    {
      "epoch": 0.10847203384187266,
      "grad_norm": 3.46875,
      "learning_rate": 1.807789810628629e-05,
      "loss": 1.0835,
      "step": 30950
    },
    {
      "epoch": 0.10850708134876826,
      "grad_norm": 3.40625,
      "learning_rate": 1.8083739296019815e-05,
      "loss": 1.0335,
      "step": 30960
    },
    {
      "epoch": 0.10854212885566385,
      "grad_norm": 3.296875,
      "learning_rate": 1.8089580485753337e-05,
      "loss": 1.0057,
      "step": 30970
    },
    {
      "epoch": 0.10857717636255945,
      "grad_norm": 3.625,
      "learning_rate": 1.8095421675486865e-05,
      "loss": 1.0807,
      "step": 30980
    },
    {
      "epoch": 0.10861222386945504,
      "grad_norm": 4.0,
      "learning_rate": 1.810126286522039e-05,
      "loss": 0.9623,
      "step": 30990
    },
    {
      "epoch": 0.10864727137635065,
      "grad_norm": 3.296875,
      "learning_rate": 1.8107104054953914e-05,
      "loss": 0.9795,
      "step": 31000
    },
    {
      "epoch": 0.10868231888324624,
      "grad_norm": 3.390625,
      "learning_rate": 1.8112945244687436e-05,
      "loss": 1.0161,
      "step": 31010
    },
    {
      "epoch": 0.10871736639014183,
      "grad_norm": 3.84375,
      "learning_rate": 1.8118786434420964e-05,
      "loss": 0.9697,
      "step": 31020
    },
    {
      "epoch": 0.10875241389703744,
      "grad_norm": 3.046875,
      "learning_rate": 1.812462762415449e-05,
      "loss": 1.0221,
      "step": 31030
    },
    {
      "epoch": 0.10878746140393303,
      "grad_norm": 3.0625,
      "learning_rate": 1.8130468813888014e-05,
      "loss": 1.043,
      "step": 31040
    },
    {
      "epoch": 0.10882250891082863,
      "grad_norm": 3.4375,
      "learning_rate": 1.813631000362154e-05,
      "loss": 1.0372,
      "step": 31050
    },
    {
      "epoch": 0.10885755641772422,
      "grad_norm": 3.28125,
      "learning_rate": 1.8142151193355064e-05,
      "loss": 0.9711,
      "step": 31060
    },
    {
      "epoch": 0.10889260392461982,
      "grad_norm": 3.828125,
      "learning_rate": 1.814799238308859e-05,
      "loss": 1.0861,
      "step": 31070
    },
    {
      "epoch": 0.10892765143151542,
      "grad_norm": 3.40625,
      "learning_rate": 1.8153833572822113e-05,
      "loss": 1.0842,
      "step": 31080
    },
    {
      "epoch": 0.10896269893841101,
      "grad_norm": 3.296875,
      "learning_rate": 1.8159674762555638e-05,
      "loss": 1.0892,
      "step": 31090
    },
    {
      "epoch": 0.10899774644530662,
      "grad_norm": 3.140625,
      "learning_rate": 1.8165515952289163e-05,
      "loss": 1.0588,
      "step": 31100
    },
    {
      "epoch": 0.10903279395220221,
      "grad_norm": 3.203125,
      "learning_rate": 1.8171357142022688e-05,
      "loss": 1.052,
      "step": 31110
    },
    {
      "epoch": 0.1090678414590978,
      "grad_norm": 3.15625,
      "learning_rate": 1.8177198331756213e-05,
      "loss": 1.0,
      "step": 31120
    },
    {
      "epoch": 0.1091028889659934,
      "grad_norm": 3.359375,
      "learning_rate": 1.818303952148974e-05,
      "loss": 1.0726,
      "step": 31130
    },
    {
      "epoch": 0.109137936472889,
      "grad_norm": 3.078125,
      "learning_rate": 1.8188880711223262e-05,
      "loss": 1.0644,
      "step": 31140
    },
    {
      "epoch": 0.1091729839797846,
      "grad_norm": 3.328125,
      "learning_rate": 1.8194721900956787e-05,
      "loss": 1.0136,
      "step": 31150
    },
    {
      "epoch": 0.1092080314866802,
      "grad_norm": 3.421875,
      "learning_rate": 1.8200563090690312e-05,
      "loss": 0.9805,
      "step": 31160
    },
    {
      "epoch": 0.1092430789935758,
      "grad_norm": 3.703125,
      "learning_rate": 1.820640428042384e-05,
      "loss": 0.9894,
      "step": 31170
    },
    {
      "epoch": 0.10927812650047139,
      "grad_norm": 3.671875,
      "learning_rate": 1.821224547015736e-05,
      "loss": 0.9534,
      "step": 31180
    },
    {
      "epoch": 0.10931317400736698,
      "grad_norm": 3.65625,
      "learning_rate": 1.8218086659890886e-05,
      "loss": 1.1062,
      "step": 31190
    },
    {
      "epoch": 0.10934822151426259,
      "grad_norm": 3.984375,
      "learning_rate": 1.822392784962441e-05,
      "loss": 1.0367,
      "step": 31200
    },
    {
      "epoch": 0.10938326902115818,
      "grad_norm": 3.140625,
      "learning_rate": 1.822976903935794e-05,
      "loss": 1.0849,
      "step": 31210
    },
    {
      "epoch": 0.10941831652805378,
      "grad_norm": 3.28125,
      "learning_rate": 1.823561022909146e-05,
      "loss": 1.1213,
      "step": 31220
    },
    {
      "epoch": 0.10945336403494937,
      "grad_norm": 3.40625,
      "learning_rate": 1.8241451418824986e-05,
      "loss": 1.06,
      "step": 31230
    },
    {
      "epoch": 0.10948841154184497,
      "grad_norm": 3.171875,
      "learning_rate": 1.824729260855851e-05,
      "loss": 0.9467,
      "step": 31240
    },
    {
      "epoch": 0.10952345904874057,
      "grad_norm": 3.125,
      "learning_rate": 1.825313379829204e-05,
      "loss": 1.0497,
      "step": 31250
    },
    {
      "epoch": 0.10955850655563616,
      "grad_norm": 3.046875,
      "learning_rate": 1.8258974988025564e-05,
      "loss": 1.0192,
      "step": 31260
    },
    {
      "epoch": 0.10959355406253177,
      "grad_norm": 3.5625,
      "learning_rate": 1.8264816177759085e-05,
      "loss": 1.0534,
      "step": 31270
    },
    {
      "epoch": 0.10962860156942736,
      "grad_norm": 3.484375,
      "learning_rate": 1.827065736749261e-05,
      "loss": 1.0492,
      "step": 31280
    },
    {
      "epoch": 0.10966364907632295,
      "grad_norm": 3.875,
      "learning_rate": 1.827649855722614e-05,
      "loss": 1.0272,
      "step": 31290
    },
    {
      "epoch": 0.10969869658321856,
      "grad_norm": 3.09375,
      "learning_rate": 1.8282339746959663e-05,
      "loss": 0.9533,
      "step": 31300
    },
    {
      "epoch": 0.10973374409011415,
      "grad_norm": 3.59375,
      "learning_rate": 1.8288180936693185e-05,
      "loss": 1.0424,
      "step": 31310
    },
    {
      "epoch": 0.10976879159700975,
      "grad_norm": 3.796875,
      "learning_rate": 1.829402212642671e-05,
      "loss": 1.0134,
      "step": 31320
    },
    {
      "epoch": 0.10980383910390534,
      "grad_norm": 3.34375,
      "learning_rate": 1.8299863316160238e-05,
      "loss": 1.0831,
      "step": 31330
    },
    {
      "epoch": 0.10983888661080093,
      "grad_norm": 2.953125,
      "learning_rate": 1.8305704505893763e-05,
      "loss": 1.0492,
      "step": 31340
    },
    {
      "epoch": 0.10987393411769654,
      "grad_norm": 3.78125,
      "learning_rate": 1.8311545695627287e-05,
      "loss": 1.0216,
      "step": 31350
    },
    {
      "epoch": 0.10990898162459213,
      "grad_norm": 3.640625,
      "learning_rate": 1.831738688536081e-05,
      "loss": 1.0518,
      "step": 31360
    },
    {
      "epoch": 0.10994402913148774,
      "grad_norm": 3.421875,
      "learning_rate": 1.8323228075094337e-05,
      "loss": 1.0466,
      "step": 31370
    },
    {
      "epoch": 0.10997907663838333,
      "grad_norm": 2.859375,
      "learning_rate": 1.8329069264827862e-05,
      "loss": 0.9633,
      "step": 31380
    },
    {
      "epoch": 0.11001412414527892,
      "grad_norm": 3.359375,
      "learning_rate": 1.8334910454561387e-05,
      "loss": 1.068,
      "step": 31390
    },
    {
      "epoch": 0.11004917165217452,
      "grad_norm": 3.1875,
      "learning_rate": 1.8340751644294908e-05,
      "loss": 1.0397,
      "step": 31400
    },
    {
      "epoch": 0.11008421915907012,
      "grad_norm": 3.453125,
      "learning_rate": 1.8346592834028436e-05,
      "loss": 1.0665,
      "step": 31410
    },
    {
      "epoch": 0.11011926666596572,
      "grad_norm": 3.15625,
      "learning_rate": 1.835243402376196e-05,
      "loss": 0.9965,
      "step": 31420
    },
    {
      "epoch": 0.11015431417286131,
      "grad_norm": 3.78125,
      "learning_rate": 1.8358275213495486e-05,
      "loss": 1.0943,
      "step": 31430
    },
    {
      "epoch": 0.11018936167975692,
      "grad_norm": 3.09375,
      "learning_rate": 1.836411640322901e-05,
      "loss": 1.016,
      "step": 31440
    },
    {
      "epoch": 0.11022440918665251,
      "grad_norm": 3.625,
      "learning_rate": 1.8369957592962536e-05,
      "loss": 1.1018,
      "step": 31450
    },
    {
      "epoch": 0.1102594566935481,
      "grad_norm": 3.640625,
      "learning_rate": 1.837579878269606e-05,
      "loss": 1.0604,
      "step": 31460
    },
    {
      "epoch": 0.1102945042004437,
      "grad_norm": 3.390625,
      "learning_rate": 1.8381639972429586e-05,
      "loss": 1.0281,
      "step": 31470
    },
    {
      "epoch": 0.1103295517073393,
      "grad_norm": 3.90625,
      "learning_rate": 1.838748116216311e-05,
      "loss": 1.0899,
      "step": 31480
    },
    {
      "epoch": 0.1103645992142349,
      "grad_norm": 3.59375,
      "learning_rate": 1.8393322351896635e-05,
      "loss": 1.0041,
      "step": 31490
    },
    {
      "epoch": 0.11039964672113049,
      "grad_norm": 3.71875,
      "learning_rate": 1.839916354163016e-05,
      "loss": 1.0777,
      "step": 31500
    },
    {
      "epoch": 0.11043469422802608,
      "grad_norm": 3.75,
      "learning_rate": 1.8405004731363685e-05,
      "loss": 1.189,
      "step": 31510
    },
    {
      "epoch": 0.11046974173492169,
      "grad_norm": 3.40625,
      "learning_rate": 1.841084592109721e-05,
      "loss": 1.0148,
      "step": 31520
    },
    {
      "epoch": 0.11050478924181728,
      "grad_norm": 3.0,
      "learning_rate": 1.8416687110830735e-05,
      "loss": 0.9652,
      "step": 31530
    },
    {
      "epoch": 0.11053983674871289,
      "grad_norm": 2.921875,
      "learning_rate": 1.842252830056426e-05,
      "loss": 1.0413,
      "step": 31540
    },
    {
      "epoch": 0.11057488425560848,
      "grad_norm": 3.90625,
      "learning_rate": 1.8428369490297784e-05,
      "loss": 1.0685,
      "step": 31550
    },
    {
      "epoch": 0.11060993176250407,
      "grad_norm": 3.390625,
      "learning_rate": 1.843421068003131e-05,
      "loss": 0.9865,
      "step": 31560
    },
    {
      "epoch": 0.11064497926939967,
      "grad_norm": 3.265625,
      "learning_rate": 1.8440051869764834e-05,
      "loss": 0.971,
      "step": 31570
    },
    {
      "epoch": 0.11068002677629526,
      "grad_norm": 3.703125,
      "learning_rate": 1.844589305949836e-05,
      "loss": 0.9905,
      "step": 31580
    },
    {
      "epoch": 0.11071507428319087,
      "grad_norm": 3.484375,
      "learning_rate": 1.8451734249231884e-05,
      "loss": 1.0336,
      "step": 31590
    },
    {
      "epoch": 0.11075012179008646,
      "grad_norm": 3.546875,
      "learning_rate": 1.8457575438965412e-05,
      "loss": 1.0477,
      "step": 31600
    },
    {
      "epoch": 0.11078516929698205,
      "grad_norm": 3.609375,
      "learning_rate": 1.8463416628698933e-05,
      "loss": 1.0,
      "step": 31610
    },
    {
      "epoch": 0.11082021680387766,
      "grad_norm": 3.25,
      "learning_rate": 1.8469257818432458e-05,
      "loss": 1.0077,
      "step": 31620
    },
    {
      "epoch": 0.11085526431077325,
      "grad_norm": 3.640625,
      "learning_rate": 1.8475099008165983e-05,
      "loss": 1.0862,
      "step": 31630
    },
    {
      "epoch": 0.11089031181766885,
      "grad_norm": 3.9375,
      "learning_rate": 1.848094019789951e-05,
      "loss": 1.0147,
      "step": 31640
    },
    {
      "epoch": 0.11092535932456445,
      "grad_norm": 3.1875,
      "learning_rate": 1.8486781387633033e-05,
      "loss": 1.0045,
      "step": 31650
    },
    {
      "epoch": 0.11096040683146004,
      "grad_norm": 2.9375,
      "learning_rate": 1.8492622577366558e-05,
      "loss": 0.9885,
      "step": 31660
    },
    {
      "epoch": 0.11099545433835564,
      "grad_norm": 4.0,
      "learning_rate": 1.8498463767100082e-05,
      "loss": 1.0632,
      "step": 31670
    },
    {
      "epoch": 0.11103050184525123,
      "grad_norm": 3.28125,
      "learning_rate": 1.850430495683361e-05,
      "loss": 0.9572,
      "step": 31680
    },
    {
      "epoch": 0.11106554935214684,
      "grad_norm": 3.484375,
      "learning_rate": 1.8510146146567135e-05,
      "loss": 1.0559,
      "step": 31690
    },
    {
      "epoch": 0.11110059685904243,
      "grad_norm": 3.25,
      "learning_rate": 1.8515987336300657e-05,
      "loss": 0.9417,
      "step": 31700
    },
    {
      "epoch": 0.11113564436593804,
      "grad_norm": 3.296875,
      "learning_rate": 1.8521828526034182e-05,
      "loss": 1.0723,
      "step": 31710
    },
    {
      "epoch": 0.11117069187283363,
      "grad_norm": 3.59375,
      "learning_rate": 1.852766971576771e-05,
      "loss": 1.0148,
      "step": 31720
    },
    {
      "epoch": 0.11120573937972922,
      "grad_norm": 2.765625,
      "learning_rate": 1.8533510905501235e-05,
      "loss": 0.9744,
      "step": 31730
    },
    {
      "epoch": 0.11124078688662482,
      "grad_norm": 3.234375,
      "learning_rate": 1.8539352095234756e-05,
      "loss": 1.0342,
      "step": 31740
    },
    {
      "epoch": 0.11127583439352041,
      "grad_norm": 3.421875,
      "learning_rate": 1.854519328496828e-05,
      "loss": 0.9846,
      "step": 31750
    },
    {
      "epoch": 0.11131088190041602,
      "grad_norm": 3.21875,
      "learning_rate": 1.855103447470181e-05,
      "loss": 0.9919,
      "step": 31760
    },
    {
      "epoch": 0.11134592940731161,
      "grad_norm": 3.84375,
      "learning_rate": 1.8556875664435334e-05,
      "loss": 0.991,
      "step": 31770
    },
    {
      "epoch": 0.1113809769142072,
      "grad_norm": 3.6875,
      "learning_rate": 1.8562716854168856e-05,
      "loss": 1.1006,
      "step": 31780
    },
    {
      "epoch": 0.11141602442110281,
      "grad_norm": 3.875,
      "learning_rate": 1.8568558043902384e-05,
      "loss": 1.1479,
      "step": 31790
    },
    {
      "epoch": 0.1114510719279984,
      "grad_norm": 3.25,
      "learning_rate": 1.857439923363591e-05,
      "loss": 0.9764,
      "step": 31800
    },
    {
      "epoch": 0.111486119434894,
      "grad_norm": 3.234375,
      "learning_rate": 1.8580240423369434e-05,
      "loss": 0.958,
      "step": 31810
    },
    {
      "epoch": 0.1115211669417896,
      "grad_norm": 3.015625,
      "learning_rate": 1.858608161310296e-05,
      "loss": 0.978,
      "step": 31820
    },
    {
      "epoch": 0.11155621444868519,
      "grad_norm": 3.25,
      "learning_rate": 1.8591922802836483e-05,
      "loss": 1.0463,
      "step": 31830
    },
    {
      "epoch": 0.11159126195558079,
      "grad_norm": 3.0625,
      "learning_rate": 1.8597763992570008e-05,
      "loss": 0.9568,
      "step": 31840
    },
    {
      "epoch": 0.11162630946247638,
      "grad_norm": 3.46875,
      "learning_rate": 1.8603605182303533e-05,
      "loss": 0.9534,
      "step": 31850
    },
    {
      "epoch": 0.11166135696937199,
      "grad_norm": 3.296875,
      "learning_rate": 1.8609446372037058e-05,
      "loss": 0.9572,
      "step": 31860
    },
    {
      "epoch": 0.11169640447626758,
      "grad_norm": 3.40625,
      "learning_rate": 1.8615287561770583e-05,
      "loss": 0.9808,
      "step": 31870
    },
    {
      "epoch": 0.11173145198316317,
      "grad_norm": 3.609375,
      "learning_rate": 1.8621128751504108e-05,
      "loss": 1.0477,
      "step": 31880
    },
    {
      "epoch": 0.11176649949005878,
      "grad_norm": 3.421875,
      "learning_rate": 1.8626969941237632e-05,
      "loss": 0.9842,
      "step": 31890
    },
    {
      "epoch": 0.11180154699695437,
      "grad_norm": 3.53125,
      "learning_rate": 1.8632811130971157e-05,
      "loss": 1.0581,
      "step": 31900
    },
    {
      "epoch": 0.11183659450384997,
      "grad_norm": 3.59375,
      "learning_rate": 1.8638652320704682e-05,
      "loss": 1.0022,
      "step": 31910
    },
    {
      "epoch": 0.11187164201074556,
      "grad_norm": 3.40625,
      "learning_rate": 1.8644493510438207e-05,
      "loss": 0.9938,
      "step": 31920
    },
    {
      "epoch": 0.11190668951764116,
      "grad_norm": 3.859375,
      "learning_rate": 1.8650334700171732e-05,
      "loss": 1.0298,
      "step": 31930
    },
    {
      "epoch": 0.11194173702453676,
      "grad_norm": 3.953125,
      "learning_rate": 1.8656175889905257e-05,
      "loss": 1.0465,
      "step": 31940
    },
    {
      "epoch": 0.11197678453143235,
      "grad_norm": 3.71875,
      "learning_rate": 1.866201707963878e-05,
      "loss": 1.0347,
      "step": 31950
    },
    {
      "epoch": 0.11201183203832796,
      "grad_norm": 3.609375,
      "learning_rate": 1.8667858269372306e-05,
      "loss": 1.0183,
      "step": 31960
    },
    {
      "epoch": 0.11204687954522355,
      "grad_norm": 3.765625,
      "learning_rate": 1.867369945910583e-05,
      "loss": 1.0052,
      "step": 31970
    },
    {
      "epoch": 0.11208192705211915,
      "grad_norm": 3.421875,
      "learning_rate": 1.8679540648839356e-05,
      "loss": 1.0323,
      "step": 31980
    },
    {
      "epoch": 0.11211697455901475,
      "grad_norm": 3.125,
      "learning_rate": 1.868538183857288e-05,
      "loss": 0.9959,
      "step": 31990
    },
    {
      "epoch": 0.11215202206591034,
      "grad_norm": 3.28125,
      "learning_rate": 1.8691223028306406e-05,
      "loss": 1.0902,
      "step": 32000
    },
    {
      "epoch": 0.11218706957280594,
      "grad_norm": 3.46875,
      "learning_rate": 1.869706421803993e-05,
      "loss": 1.0879,
      "step": 32010
    },
    {
      "epoch": 0.11222211707970153,
      "grad_norm": 3.375,
      "learning_rate": 1.8702905407773455e-05,
      "loss": 0.9556,
      "step": 32020
    },
    {
      "epoch": 0.11225716458659714,
      "grad_norm": 3.15625,
      "learning_rate": 1.8708746597506984e-05,
      "loss": 1.0818,
      "step": 32030
    },
    {
      "epoch": 0.11229221209349273,
      "grad_norm": 3.4375,
      "learning_rate": 1.8714587787240505e-05,
      "loss": 0.9323,
      "step": 32040
    },
    {
      "epoch": 0.11232725960038832,
      "grad_norm": 3.609375,
      "learning_rate": 1.872042897697403e-05,
      "loss": 1.0086,
      "step": 32050
    },
    {
      "epoch": 0.11236230710728393,
      "grad_norm": 3.046875,
      "learning_rate": 1.8726270166707555e-05,
      "loss": 1.0165,
      "step": 32060
    },
    {
      "epoch": 0.11239735461417952,
      "grad_norm": 2.6875,
      "learning_rate": 1.8732111356441083e-05,
      "loss": 0.9947,
      "step": 32070
    },
    {
      "epoch": 0.11243240212107512,
      "grad_norm": 3.546875,
      "learning_rate": 1.8737952546174604e-05,
      "loss": 1.0794,
      "step": 32080
    },
    {
      "epoch": 0.11246744962797071,
      "grad_norm": 3.6875,
      "learning_rate": 1.874379373590813e-05,
      "loss": 1.0263,
      "step": 32090
    },
    {
      "epoch": 0.1125024971348663,
      "grad_norm": 3.296875,
      "learning_rate": 1.8749634925641654e-05,
      "loss": 1.0625,
      "step": 32100
    },
    {
      "epoch": 0.11253754464176191,
      "grad_norm": 3.09375,
      "learning_rate": 1.8755476115375182e-05,
      "loss": 0.9855,
      "step": 32110
    },
    {
      "epoch": 0.1125725921486575,
      "grad_norm": 3.484375,
      "learning_rate": 1.8761317305108704e-05,
      "loss": 0.991,
      "step": 32120
    },
    {
      "epoch": 0.11260763965555311,
      "grad_norm": 3.46875,
      "learning_rate": 1.876715849484223e-05,
      "loss": 1.0029,
      "step": 32130
    },
    {
      "epoch": 0.1126426871624487,
      "grad_norm": 3.40625,
      "learning_rate": 1.8772999684575757e-05,
      "loss": 1.0238,
      "step": 32140
    },
    {
      "epoch": 0.11267773466934429,
      "grad_norm": 3.21875,
      "learning_rate": 1.8778840874309282e-05,
      "loss": 1.0368,
      "step": 32150
    },
    {
      "epoch": 0.1127127821762399,
      "grad_norm": 3.234375,
      "learning_rate": 1.8784682064042807e-05,
      "loss": 1.0451,
      "step": 32160
    },
    {
      "epoch": 0.11274782968313549,
      "grad_norm": 3.671875,
      "learning_rate": 1.8790523253776328e-05,
      "loss": 1.0064,
      "step": 32170
    },
    {
      "epoch": 0.11278287719003109,
      "grad_norm": 4.0,
      "learning_rate": 1.8796364443509856e-05,
      "loss": 1.1028,
      "step": 32180
    },
    {
      "epoch": 0.11281792469692668,
      "grad_norm": 3.109375,
      "learning_rate": 1.880220563324338e-05,
      "loss": 1.0556,
      "step": 32190
    },
    {
      "epoch": 0.11285297220382227,
      "grad_norm": 3.84375,
      "learning_rate": 1.8808046822976906e-05,
      "loss": 1.0358,
      "step": 32200
    },
    {
      "epoch": 0.11288801971071788,
      "grad_norm": 2.9375,
      "learning_rate": 1.8813888012710427e-05,
      "loss": 0.9824,
      "step": 32210
    },
    {
      "epoch": 0.11292306721761347,
      "grad_norm": 3.75,
      "learning_rate": 1.8819729202443956e-05,
      "loss": 1.0537,
      "step": 32220
    },
    {
      "epoch": 0.11295811472450908,
      "grad_norm": 3.59375,
      "learning_rate": 1.882557039217748e-05,
      "loss": 1.1033,
      "step": 32230
    },
    {
      "epoch": 0.11299316223140467,
      "grad_norm": 3.4375,
      "learning_rate": 1.8831411581911005e-05,
      "loss": 0.9274,
      "step": 32240
    },
    {
      "epoch": 0.11302820973830027,
      "grad_norm": 3.59375,
      "learning_rate": 1.883725277164453e-05,
      "loss": 0.9848,
      "step": 32250
    },
    {
      "epoch": 0.11306325724519586,
      "grad_norm": 3.484375,
      "learning_rate": 1.8843093961378055e-05,
      "loss": 0.9945,
      "step": 32260
    },
    {
      "epoch": 0.11309830475209146,
      "grad_norm": 3.1875,
      "learning_rate": 1.884893515111158e-05,
      "loss": 1.1224,
      "step": 32270
    },
    {
      "epoch": 0.11313335225898706,
      "grad_norm": 3.71875,
      "learning_rate": 1.8854776340845105e-05,
      "loss": 1.0304,
      "step": 32280
    },
    {
      "epoch": 0.11316839976588265,
      "grad_norm": 3.390625,
      "learning_rate": 1.886061753057863e-05,
      "loss": 1.031,
      "step": 32290
    },
    {
      "epoch": 0.11320344727277826,
      "grad_norm": 3.28125,
      "learning_rate": 1.8866458720312154e-05,
      "loss": 1.0302,
      "step": 32300
    },
    {
      "epoch": 0.11323849477967385,
      "grad_norm": 3.78125,
      "learning_rate": 1.887229991004568e-05,
      "loss": 1.0993,
      "step": 32310
    },
    {
      "epoch": 0.11327354228656944,
      "grad_norm": 3.734375,
      "learning_rate": 1.8878141099779204e-05,
      "loss": 1.0816,
      "step": 32320
    },
    {
      "epoch": 0.11330858979346504,
      "grad_norm": 3.828125,
      "learning_rate": 1.888398228951273e-05,
      "loss": 1.011,
      "step": 32330
    },
    {
      "epoch": 0.11334363730036064,
      "grad_norm": 3.8125,
      "learning_rate": 1.8889823479246254e-05,
      "loss": 1.0607,
      "step": 32340
    },
    {
      "epoch": 0.11337868480725624,
      "grad_norm": 3.453125,
      "learning_rate": 1.889566466897978e-05,
      "loss": 1.0562,
      "step": 32350
    },
    {
      "epoch": 0.11341373231415183,
      "grad_norm": 3.65625,
      "learning_rate": 1.8901505858713303e-05,
      "loss": 1.0355,
      "step": 32360
    },
    {
      "epoch": 0.11344877982104742,
      "grad_norm": 3.171875,
      "learning_rate": 1.8907347048446828e-05,
      "loss": 0.9665,
      "step": 32370
    },
    {
      "epoch": 0.11348382732794303,
      "grad_norm": 3.296875,
      "learning_rate": 1.8913188238180353e-05,
      "loss": 0.9995,
      "step": 32380
    },
    {
      "epoch": 0.11351887483483862,
      "grad_norm": 3.265625,
      "learning_rate": 1.8919029427913878e-05,
      "loss": 0.9692,
      "step": 32390
    },
    {
      "epoch": 0.11355392234173423,
      "grad_norm": 3.578125,
      "learning_rate": 1.8924870617647403e-05,
      "loss": 1.013,
      "step": 32400
    },
    {
      "epoch": 0.11358896984862982,
      "grad_norm": 3.46875,
      "learning_rate": 1.8930711807380928e-05,
      "loss": 1.0278,
      "step": 32410
    },
    {
      "epoch": 0.11362401735552541,
      "grad_norm": 3.3125,
      "learning_rate": 1.8936552997114453e-05,
      "loss": 1.0261,
      "step": 32420
    },
    {
      "epoch": 0.11365906486242101,
      "grad_norm": 3.203125,
      "learning_rate": 1.8942394186847977e-05,
      "loss": 0.9381,
      "step": 32430
    },
    {
      "epoch": 0.1136941123693166,
      "grad_norm": 3.484375,
      "learning_rate": 1.8948235376581502e-05,
      "loss": 1.1126,
      "step": 32440
    },
    {
      "epoch": 0.11372915987621221,
      "grad_norm": 3.390625,
      "learning_rate": 1.8954076566315027e-05,
      "loss": 1.0183,
      "step": 32450
    },
    {
      "epoch": 0.1137642073831078,
      "grad_norm": 3.375,
      "learning_rate": 1.8959917756048552e-05,
      "loss": 1.0121,
      "step": 32460
    },
    {
      "epoch": 0.11379925489000339,
      "grad_norm": 2.984375,
      "learning_rate": 1.8965758945782077e-05,
      "loss": 1.0859,
      "step": 32470
    },
    {
      "epoch": 0.113834302396899,
      "grad_norm": 3.09375,
      "learning_rate": 1.89716001355156e-05,
      "loss": 1.0203,
      "step": 32480
    },
    {
      "epoch": 0.11386934990379459,
      "grad_norm": 3.46875,
      "learning_rate": 1.897744132524913e-05,
      "loss": 1.0242,
      "step": 32490
    },
    {
      "epoch": 0.1139043974106902,
      "grad_norm": 3.28125,
      "learning_rate": 1.8983282514982655e-05,
      "loss": 0.9072,
      "step": 32500
    },
    {
      "epoch": 0.11393944491758579,
      "grad_norm": 3.5,
      "learning_rate": 1.8989123704716176e-05,
      "loss": 1.0578,
      "step": 32510
    },
    {
      "epoch": 0.11397449242448139,
      "grad_norm": 3.75,
      "learning_rate": 1.89949648944497e-05,
      "loss": 0.986,
      "step": 32520
    },
    {
      "epoch": 0.11400953993137698,
      "grad_norm": 3.40625,
      "learning_rate": 1.900080608418323e-05,
      "loss": 1.0263,
      "step": 32530
    },
    {
      "epoch": 0.11404458743827257,
      "grad_norm": 3.015625,
      "learning_rate": 1.9006647273916754e-05,
      "loss": 0.9875,
      "step": 32540
    },
    {
      "epoch": 0.11407963494516818,
      "grad_norm": 3.515625,
      "learning_rate": 1.9012488463650276e-05,
      "loss": 0.9568,
      "step": 32550
    },
    {
      "epoch": 0.11411468245206377,
      "grad_norm": 3.25,
      "learning_rate": 1.90183296533838e-05,
      "loss": 0.9852,
      "step": 32560
    },
    {
      "epoch": 0.11414972995895938,
      "grad_norm": 3.078125,
      "learning_rate": 1.902417084311733e-05,
      "loss": 1.0943,
      "step": 32570
    },
    {
      "epoch": 0.11418477746585497,
      "grad_norm": 3.109375,
      "learning_rate": 1.9030012032850853e-05,
      "loss": 1.0121,
      "step": 32580
    },
    {
      "epoch": 0.11421982497275056,
      "grad_norm": 3.4375,
      "learning_rate": 1.9035853222584378e-05,
      "loss": 1.0216,
      "step": 32590
    },
    {
      "epoch": 0.11425487247964616,
      "grad_norm": 2.921875,
      "learning_rate": 1.90416944123179e-05,
      "loss": 0.9211,
      "step": 32600
    },
    {
      "epoch": 0.11428991998654175,
      "grad_norm": 3.765625,
      "learning_rate": 1.9047535602051428e-05,
      "loss": 1.001,
      "step": 32610
    },
    {
      "epoch": 0.11432496749343736,
      "grad_norm": 3.515625,
      "learning_rate": 1.9053376791784953e-05,
      "loss": 1.0547,
      "step": 32620
    },
    {
      "epoch": 0.11436001500033295,
      "grad_norm": 3.03125,
      "learning_rate": 1.9059217981518478e-05,
      "loss": 0.9718,
      "step": 32630
    },
    {
      "epoch": 0.11439506250722854,
      "grad_norm": 3.234375,
      "learning_rate": 1.9065059171252e-05,
      "loss": 1.1093,
      "step": 32640
    },
    {
      "epoch": 0.11443011001412415,
      "grad_norm": 3.140625,
      "learning_rate": 1.9070900360985527e-05,
      "loss": 0.9626,
      "step": 32650
    },
    {
      "epoch": 0.11446515752101974,
      "grad_norm": 3.65625,
      "learning_rate": 1.9076741550719052e-05,
      "loss": 1.023,
      "step": 32660
    },
    {
      "epoch": 0.11450020502791534,
      "grad_norm": 3.234375,
      "learning_rate": 1.9082582740452577e-05,
      "loss": 1.0016,
      "step": 32670
    },
    {
      "epoch": 0.11453525253481094,
      "grad_norm": 3.625,
      "learning_rate": 1.90884239301861e-05,
      "loss": 1.0536,
      "step": 32680
    },
    {
      "epoch": 0.11457030004170653,
      "grad_norm": 3.453125,
      "learning_rate": 1.9094265119919627e-05,
      "loss": 0.9218,
      "step": 32690
    },
    {
      "epoch": 0.11460534754860213,
      "grad_norm": 3.109375,
      "learning_rate": 1.910010630965315e-05,
      "loss": 1.0723,
      "step": 32700
    },
    {
      "epoch": 0.11464039505549772,
      "grad_norm": 3.375,
      "learning_rate": 1.9105947499386676e-05,
      "loss": 0.9988,
      "step": 32710
    },
    {
      "epoch": 0.11467544256239333,
      "grad_norm": 3.21875,
      "learning_rate": 1.91117886891202e-05,
      "loss": 0.9826,
      "step": 32720
    },
    {
      "epoch": 0.11471049006928892,
      "grad_norm": 3.328125,
      "learning_rate": 1.9117629878853726e-05,
      "loss": 0.9667,
      "step": 32730
    },
    {
      "epoch": 0.11474553757618451,
      "grad_norm": 3.453125,
      "learning_rate": 1.912347106858725e-05,
      "loss": 1.0304,
      "step": 32740
    },
    {
      "epoch": 0.11478058508308012,
      "grad_norm": 3.140625,
      "learning_rate": 1.9129312258320776e-05,
      "loss": 0.939,
      "step": 32750
    },
    {
      "epoch": 0.11481563258997571,
      "grad_norm": 3.125,
      "learning_rate": 1.91351534480543e-05,
      "loss": 1.0193,
      "step": 32760
    },
    {
      "epoch": 0.11485068009687131,
      "grad_norm": 3.46875,
      "learning_rate": 1.9140994637787825e-05,
      "loss": 0.9998,
      "step": 32770
    },
    {
      "epoch": 0.1148857276037669,
      "grad_norm": 3.546875,
      "learning_rate": 1.914683582752135e-05,
      "loss": 1.0264,
      "step": 32780
    },
    {
      "epoch": 0.11492077511066251,
      "grad_norm": 3.421875,
      "learning_rate": 1.9152677017254875e-05,
      "loss": 1.06,
      "step": 32790
    },
    {
      "epoch": 0.1149558226175581,
      "grad_norm": 3.40625,
      "learning_rate": 1.91585182069884e-05,
      "loss": 1.0281,
      "step": 32800
    },
    {
      "epoch": 0.11499087012445369,
      "grad_norm": 3.6875,
      "learning_rate": 1.9164359396721925e-05,
      "loss": 1.0957,
      "step": 32810
    },
    {
      "epoch": 0.1150259176313493,
      "grad_norm": 3.125,
      "learning_rate": 1.917020058645545e-05,
      "loss": 0.9731,
      "step": 32820
    },
    {
      "epoch": 0.11506096513824489,
      "grad_norm": 3.4375,
      "learning_rate": 1.9176041776188975e-05,
      "loss": 1.0739,
      "step": 32830
    },
    {
      "epoch": 0.1150960126451405,
      "grad_norm": 3.6875,
      "learning_rate": 1.9181882965922503e-05,
      "loss": 0.9468,
      "step": 32840
    },
    {
      "epoch": 0.11513106015203609,
      "grad_norm": 3.484375,
      "learning_rate": 1.9187724155656024e-05,
      "loss": 1.0559,
      "step": 32850
    },
    {
      "epoch": 0.11516610765893168,
      "grad_norm": 3.359375,
      "learning_rate": 1.919356534538955e-05,
      "loss": 1.0464,
      "step": 32860
    },
    {
      "epoch": 0.11520115516582728,
      "grad_norm": 3.5625,
      "learning_rate": 1.9199406535123074e-05,
      "loss": 0.9891,
      "step": 32870
    },
    {
      "epoch": 0.11523620267272287,
      "grad_norm": 3.3125,
      "learning_rate": 1.9205247724856602e-05,
      "loss": 1.0446,
      "step": 32880
    },
    {
      "epoch": 0.11527125017961848,
      "grad_norm": 3.203125,
      "learning_rate": 1.9211088914590124e-05,
      "loss": 0.9459,
      "step": 32890
    },
    {
      "epoch": 0.11530629768651407,
      "grad_norm": 3.625,
      "learning_rate": 1.921693010432365e-05,
      "loss": 1.0574,
      "step": 32900
    },
    {
      "epoch": 0.11534134519340966,
      "grad_norm": 2.953125,
      "learning_rate": 1.9222771294057173e-05,
      "loss": 1.0173,
      "step": 32910
    },
    {
      "epoch": 0.11537639270030527,
      "grad_norm": 3.515625,
      "learning_rate": 1.92286124837907e-05,
      "loss": 1.0253,
      "step": 32920
    },
    {
      "epoch": 0.11541144020720086,
      "grad_norm": 3.203125,
      "learning_rate": 1.9234453673524226e-05,
      "loss": 1.0767,
      "step": 32930
    },
    {
      "epoch": 0.11544648771409646,
      "grad_norm": 3.4375,
      "learning_rate": 1.9240294863257748e-05,
      "loss": 0.9886,
      "step": 32940
    },
    {
      "epoch": 0.11548153522099205,
      "grad_norm": 3.515625,
      "learning_rate": 1.9246136052991273e-05,
      "loss": 1.0858,
      "step": 32950
    },
    {
      "epoch": 0.11551658272788765,
      "grad_norm": 3.5,
      "learning_rate": 1.92519772427248e-05,
      "loss": 1.0102,
      "step": 32960
    },
    {
      "epoch": 0.11555163023478325,
      "grad_norm": 3.046875,
      "learning_rate": 1.9257818432458326e-05,
      "loss": 0.9679,
      "step": 32970
    },
    {
      "epoch": 0.11558667774167884,
      "grad_norm": 3.40625,
      "learning_rate": 1.9263659622191847e-05,
      "loss": 1.0751,
      "step": 32980
    },
    {
      "epoch": 0.11562172524857445,
      "grad_norm": 3.421875,
      "learning_rate": 1.9269500811925372e-05,
      "loss": 0.9403,
      "step": 32990
    },
    {
      "epoch": 0.11565677275547004,
      "grad_norm": 3.34375,
      "learning_rate": 1.92753420016589e-05,
      "loss": 0.9855,
      "step": 33000
    },
    {
      "epoch": 0.11569182026236563,
      "grad_norm": 3.6875,
      "learning_rate": 1.9281183191392425e-05,
      "loss": 1.0197,
      "step": 33010
    },
    {
      "epoch": 0.11572686776926124,
      "grad_norm": 3.078125,
      "learning_rate": 1.9287024381125947e-05,
      "loss": 1.055,
      "step": 33020
    },
    {
      "epoch": 0.11576191527615683,
      "grad_norm": 3.203125,
      "learning_rate": 1.929286557085947e-05,
      "loss": 1.0095,
      "step": 33030
    },
    {
      "epoch": 0.11579696278305243,
      "grad_norm": 3.40625,
      "learning_rate": 1.9298706760593e-05,
      "loss": 0.9644,
      "step": 33040
    },
    {
      "epoch": 0.11583201028994802,
      "grad_norm": 3.40625,
      "learning_rate": 1.9304547950326525e-05,
      "loss": 1.0661,
      "step": 33050
    },
    {
      "epoch": 0.11586705779684363,
      "grad_norm": 3.234375,
      "learning_rate": 1.931038914006005e-05,
      "loss": 1.0386,
      "step": 33060
    },
    {
      "epoch": 0.11590210530373922,
      "grad_norm": 3.09375,
      "learning_rate": 1.931623032979357e-05,
      "loss": 0.952,
      "step": 33070
    },
    {
      "epoch": 0.11593715281063481,
      "grad_norm": 3.75,
      "learning_rate": 1.93220715195271e-05,
      "loss": 1.0241,
      "step": 33080
    },
    {
      "epoch": 0.11597220031753042,
      "grad_norm": 3.734375,
      "learning_rate": 1.9327912709260624e-05,
      "loss": 0.9604,
      "step": 33090
    },
    {
      "epoch": 0.11600724782442601,
      "grad_norm": 3.28125,
      "learning_rate": 1.933375389899415e-05,
      "loss": 1.0546,
      "step": 33100
    },
    {
      "epoch": 0.11604229533132161,
      "grad_norm": 3.75,
      "learning_rate": 1.933959508872767e-05,
      "loss": 1.0505,
      "step": 33110
    },
    {
      "epoch": 0.1160773428382172,
      "grad_norm": 3.484375,
      "learning_rate": 1.93454362784612e-05,
      "loss": 0.9938,
      "step": 33120
    },
    {
      "epoch": 0.1161123903451128,
      "grad_norm": 3.484375,
      "learning_rate": 1.9351277468194723e-05,
      "loss": 1.0791,
      "step": 33130
    },
    {
      "epoch": 0.1161474378520084,
      "grad_norm": 3.109375,
      "learning_rate": 1.9357118657928248e-05,
      "loss": 0.9974,
      "step": 33140
    },
    {
      "epoch": 0.11618248535890399,
      "grad_norm": 2.96875,
      "learning_rate": 1.9362959847661773e-05,
      "loss": 0.9233,
      "step": 33150
    },
    {
      "epoch": 0.1162175328657996,
      "grad_norm": 3.515625,
      "learning_rate": 1.9368801037395298e-05,
      "loss": 1.0689,
      "step": 33160
    },
    {
      "epoch": 0.11625258037269519,
      "grad_norm": 3.53125,
      "learning_rate": 1.9374642227128823e-05,
      "loss": 0.9901,
      "step": 33170
    },
    {
      "epoch": 0.11628762787959078,
      "grad_norm": 3.3125,
      "learning_rate": 1.9380483416862347e-05,
      "loss": 1.0268,
      "step": 33180
    },
    {
      "epoch": 0.11632267538648638,
      "grad_norm": 3.65625,
      "learning_rate": 1.9386324606595872e-05,
      "loss": 1.0453,
      "step": 33190
    },
    {
      "epoch": 0.11635772289338198,
      "grad_norm": 3.125,
      "learning_rate": 1.9392165796329397e-05,
      "loss": 1.0329,
      "step": 33200
    },
    {
      "epoch": 0.11639277040027758,
      "grad_norm": 3.28125,
      "learning_rate": 1.9398006986062922e-05,
      "loss": 1.0425,
      "step": 33210
    },
    {
      "epoch": 0.11642781790717317,
      "grad_norm": 3.609375,
      "learning_rate": 1.9403848175796447e-05,
      "loss": 1.0862,
      "step": 33220
    },
    {
      "epoch": 0.11646286541406876,
      "grad_norm": 3.421875,
      "learning_rate": 1.9409689365529972e-05,
      "loss": 1.0177,
      "step": 33230
    },
    {
      "epoch": 0.11649791292096437,
      "grad_norm": 3.0,
      "learning_rate": 1.9415530555263497e-05,
      "loss": 0.9975,
      "step": 33240
    },
    {
      "epoch": 0.11653296042785996,
      "grad_norm": 3.4375,
      "learning_rate": 1.942137174499702e-05,
      "loss": 1.0533,
      "step": 33250
    },
    {
      "epoch": 0.11656800793475557,
      "grad_norm": 3.40625,
      "learning_rate": 1.9427212934730546e-05,
      "loss": 1.0297,
      "step": 33260
    },
    {
      "epoch": 0.11660305544165116,
      "grad_norm": 3.140625,
      "learning_rate": 1.9433054124464074e-05,
      "loss": 0.9589,
      "step": 33270
    },
    {
      "epoch": 0.11663810294854675,
      "grad_norm": 2.96875,
      "learning_rate": 1.9438895314197596e-05,
      "loss": 1.0067,
      "step": 33280
    },
    {
      "epoch": 0.11667315045544235,
      "grad_norm": 3.140625,
      "learning_rate": 1.944473650393112e-05,
      "loss": 0.9701,
      "step": 33290
    },
    {
      "epoch": 0.11670819796233795,
      "grad_norm": 3.09375,
      "learning_rate": 1.9450577693664646e-05,
      "loss": 1.0788,
      "step": 33300
    },
    {
      "epoch": 0.11674324546923355,
      "grad_norm": 2.90625,
      "learning_rate": 1.9456418883398174e-05,
      "loss": 1.0461,
      "step": 33310
    },
    {
      "epoch": 0.11677829297612914,
      "grad_norm": 3.515625,
      "learning_rate": 1.9462260073131695e-05,
      "loss": 1.07,
      "step": 33320
    },
    {
      "epoch": 0.11681334048302475,
      "grad_norm": 3.265625,
      "learning_rate": 1.946810126286522e-05,
      "loss": 1.0833,
      "step": 33330
    },
    {
      "epoch": 0.11684838798992034,
      "grad_norm": 3.5625,
      "learning_rate": 1.9473942452598745e-05,
      "loss": 1.085,
      "step": 33340
    },
    {
      "epoch": 0.11688343549681593,
      "grad_norm": 3.046875,
      "learning_rate": 1.9479783642332273e-05,
      "loss": 1.0308,
      "step": 33350
    },
    {
      "epoch": 0.11691848300371153,
      "grad_norm": 3.15625,
      "learning_rate": 1.9485624832065795e-05,
      "loss": 1.0349,
      "step": 33360
    },
    {
      "epoch": 0.11695353051060713,
      "grad_norm": 3.4375,
      "learning_rate": 1.949146602179932e-05,
      "loss": 1.0075,
      "step": 33370
    },
    {
      "epoch": 0.11698857801750273,
      "grad_norm": 2.90625,
      "learning_rate": 1.9497307211532844e-05,
      "loss": 0.9664,
      "step": 33380
    },
    {
      "epoch": 0.11702362552439832,
      "grad_norm": 3.453125,
      "learning_rate": 1.9503148401266373e-05,
      "loss": 1.022,
      "step": 33390
    },
    {
      "epoch": 0.11705867303129391,
      "grad_norm": 3.484375,
      "learning_rate": 1.9508989590999897e-05,
      "loss": 0.9986,
      "step": 33400
    },
    {
      "epoch": 0.11709372053818952,
      "grad_norm": 3.296875,
      "learning_rate": 1.951483078073342e-05,
      "loss": 1.0255,
      "step": 33410
    },
    {
      "epoch": 0.11712876804508511,
      "grad_norm": 3.6875,
      "learning_rate": 1.9520671970466944e-05,
      "loss": 1.1008,
      "step": 33420
    },
    {
      "epoch": 0.11716381555198072,
      "grad_norm": 3.5625,
      "learning_rate": 1.9526513160200472e-05,
      "loss": 1.13,
      "step": 33430
    },
    {
      "epoch": 0.11719886305887631,
      "grad_norm": 3.140625,
      "learning_rate": 1.9532354349933997e-05,
      "loss": 1.0184,
      "step": 33440
    },
    {
      "epoch": 0.1172339105657719,
      "grad_norm": 3.546875,
      "learning_rate": 1.9538195539667518e-05,
      "loss": 1.0377,
      "step": 33450
    },
    {
      "epoch": 0.1172689580726675,
      "grad_norm": 3.359375,
      "learning_rate": 1.9544036729401043e-05,
      "loss": 1.0617,
      "step": 33460
    },
    {
      "epoch": 0.1173040055795631,
      "grad_norm": 3.71875,
      "learning_rate": 1.954987791913457e-05,
      "loss": 1.051,
      "step": 33470
    },
    {
      "epoch": 0.1173390530864587,
      "grad_norm": 3.296875,
      "learning_rate": 1.9555719108868096e-05,
      "loss": 0.9914,
      "step": 33480
    },
    {
      "epoch": 0.11737410059335429,
      "grad_norm": 3.734375,
      "learning_rate": 1.956156029860162e-05,
      "loss": 1.0202,
      "step": 33490
    },
    {
      "epoch": 0.11740914810024988,
      "grad_norm": 3.109375,
      "learning_rate": 1.9567401488335146e-05,
      "loss": 0.988,
      "step": 33500
    },
    {
      "epoch": 0.11744419560714549,
      "grad_norm": 3.015625,
      "learning_rate": 1.957324267806867e-05,
      "loss": 1.0437,
      "step": 33510
    },
    {
      "epoch": 0.11747924311404108,
      "grad_norm": 3.140625,
      "learning_rate": 1.9579083867802196e-05,
      "loss": 1.1162,
      "step": 33520
    },
    {
      "epoch": 0.11751429062093668,
      "grad_norm": 3.28125,
      "learning_rate": 1.958492505753572e-05,
      "loss": 1.0369,
      "step": 33530
    },
    {
      "epoch": 0.11754933812783228,
      "grad_norm": 3.1875,
      "learning_rate": 1.9590766247269245e-05,
      "loss": 1.0543,
      "step": 33540
    },
    {
      "epoch": 0.11758438563472787,
      "grad_norm": 3.21875,
      "learning_rate": 1.959660743700277e-05,
      "loss": 0.9858,
      "step": 33550
    },
    {
      "epoch": 0.11761943314162347,
      "grad_norm": 3.265625,
      "learning_rate": 1.9602448626736295e-05,
      "loss": 1.0264,
      "step": 33560
    },
    {
      "epoch": 0.11765448064851906,
      "grad_norm": 3.0625,
      "learning_rate": 1.960828981646982e-05,
      "loss": 0.9737,
      "step": 33570
    },
    {
      "epoch": 0.11768952815541467,
      "grad_norm": 2.890625,
      "learning_rate": 1.9614131006203345e-05,
      "loss": 1.0224,
      "step": 33580
    },
    {
      "epoch": 0.11772457566231026,
      "grad_norm": 3.21875,
      "learning_rate": 1.961997219593687e-05,
      "loss": 0.9834,
      "step": 33590
    },
    {
      "epoch": 0.11775962316920587,
      "grad_norm": 3.75,
      "learning_rate": 1.9625813385670394e-05,
      "loss": 0.989,
      "step": 33600
    },
    {
      "epoch": 0.11779467067610146,
      "grad_norm": 3.625,
      "learning_rate": 1.963165457540392e-05,
      "loss": 0.9703,
      "step": 33610
    },
    {
      "epoch": 0.11782971818299705,
      "grad_norm": 3.0,
      "learning_rate": 1.9637495765137444e-05,
      "loss": 0.9739,
      "step": 33620
    },
    {
      "epoch": 0.11786476568989265,
      "grad_norm": 3.484375,
      "learning_rate": 1.964333695487097e-05,
      "loss": 1.0061,
      "step": 33630
    },
    {
      "epoch": 0.11789981319678824,
      "grad_norm": 2.953125,
      "learning_rate": 1.9649178144604494e-05,
      "loss": 0.9989,
      "step": 33640
    },
    {
      "epoch": 0.11793486070368385,
      "grad_norm": 3.015625,
      "learning_rate": 1.965501933433802e-05,
      "loss": 0.9771,
      "step": 33650
    },
    {
      "epoch": 0.11796990821057944,
      "grad_norm": 3.234375,
      "learning_rate": 1.9660860524071543e-05,
      "loss": 1.0932,
      "step": 33660
    },
    {
      "epoch": 0.11800495571747503,
      "grad_norm": 3.65625,
      "learning_rate": 1.9666701713805068e-05,
      "loss": 1.0456,
      "step": 33670
    },
    {
      "epoch": 0.11804000322437064,
      "grad_norm": 3.03125,
      "learning_rate": 1.9672542903538593e-05,
      "loss": 0.9878,
      "step": 33680
    },
    {
      "epoch": 0.11807505073126623,
      "grad_norm": 3.765625,
      "learning_rate": 1.9678384093272118e-05,
      "loss": 1.0705,
      "step": 33690
    },
    {
      "epoch": 0.11811009823816183,
      "grad_norm": 3.078125,
      "learning_rate": 1.9684225283005646e-05,
      "loss": 1.0702,
      "step": 33700
    },
    {
      "epoch": 0.11814514574505743,
      "grad_norm": 3.15625,
      "learning_rate": 1.9690066472739168e-05,
      "loss": 0.9802,
      "step": 33710
    },
    {
      "epoch": 0.11818019325195302,
      "grad_norm": 3.1875,
      "learning_rate": 1.9695907662472692e-05,
      "loss": 0.977,
      "step": 33720
    },
    {
      "epoch": 0.11821524075884862,
      "grad_norm": 3.75,
      "learning_rate": 1.9701748852206217e-05,
      "loss": 1.0966,
      "step": 33730
    },
    {
      "epoch": 0.11825028826574421,
      "grad_norm": 3.375,
      "learning_rate": 1.9707590041939746e-05,
      "loss": 0.9776,
      "step": 33740
    },
    {
      "epoch": 0.11828533577263982,
      "grad_norm": 3.1875,
      "learning_rate": 1.9713431231673267e-05,
      "loss": 0.9561,
      "step": 33750
    },
    {
      "epoch": 0.11832038327953541,
      "grad_norm": 3.25,
      "learning_rate": 1.9719272421406792e-05,
      "loss": 1.1589,
      "step": 33760
    },
    {
      "epoch": 0.118355430786431,
      "grad_norm": 3.40625,
      "learning_rate": 1.9725113611140317e-05,
      "loss": 1.0682,
      "step": 33770
    },
    {
      "epoch": 0.1183904782933266,
      "grad_norm": 3.5,
      "learning_rate": 1.9730954800873845e-05,
      "loss": 0.966,
      "step": 33780
    },
    {
      "epoch": 0.1184255258002222,
      "grad_norm": 3.34375,
      "learning_rate": 1.9736795990607366e-05,
      "loss": 1.068,
      "step": 33790
    },
    {
      "epoch": 0.1184605733071178,
      "grad_norm": 3.46875,
      "learning_rate": 1.974263718034089e-05,
      "loss": 1.1185,
      "step": 33800
    },
    {
      "epoch": 0.1184956208140134,
      "grad_norm": 3.40625,
      "learning_rate": 1.9748478370074416e-05,
      "loss": 0.992,
      "step": 33810
    },
    {
      "epoch": 0.11853066832090899,
      "grad_norm": 3.140625,
      "learning_rate": 1.9754319559807944e-05,
      "loss": 1.026,
      "step": 33820
    },
    {
      "epoch": 0.11856571582780459,
      "grad_norm": 3.46875,
      "learning_rate": 1.976016074954147e-05,
      "loss": 1.0521,
      "step": 33830
    },
    {
      "epoch": 0.11860076333470018,
      "grad_norm": 3.609375,
      "learning_rate": 1.976600193927499e-05,
      "loss": 0.9927,
      "step": 33840
    },
    {
      "epoch": 0.11863581084159579,
      "grad_norm": 3.234375,
      "learning_rate": 1.977184312900852e-05,
      "loss": 1.085,
      "step": 33850
    },
    {
      "epoch": 0.11867085834849138,
      "grad_norm": 3.765625,
      "learning_rate": 1.9777684318742044e-05,
      "loss": 1.0382,
      "step": 33860
    },
    {
      "epoch": 0.11870590585538698,
      "grad_norm": 3.515625,
      "learning_rate": 1.978352550847557e-05,
      "loss": 0.9766,
      "step": 33870
    },
    {
      "epoch": 0.11874095336228258,
      "grad_norm": 3.796875,
      "learning_rate": 1.978936669820909e-05,
      "loss": 1.0346,
      "step": 33880
    },
    {
      "epoch": 0.11877600086917817,
      "grad_norm": 3.6875,
      "learning_rate": 1.9795207887942618e-05,
      "loss": 1.0591,
      "step": 33890
    },
    {
      "epoch": 0.11881104837607377,
      "grad_norm": 9.0625,
      "learning_rate": 1.9801049077676143e-05,
      "loss": 0.9695,
      "step": 33900
    },
    {
      "epoch": 0.11884609588296936,
      "grad_norm": 3.25,
      "learning_rate": 1.9806890267409668e-05,
      "loss": 1.055,
      "step": 33910
    },
    {
      "epoch": 0.11888114338986497,
      "grad_norm": 3.640625,
      "learning_rate": 1.981273145714319e-05,
      "loss": 1.0875,
      "step": 33920
    },
    {
      "epoch": 0.11891619089676056,
      "grad_norm": 3.546875,
      "learning_rate": 1.9818572646876718e-05,
      "loss": 1.0322,
      "step": 33930
    },
    {
      "epoch": 0.11895123840365615,
      "grad_norm": 3.265625,
      "learning_rate": 1.9824413836610242e-05,
      "loss": 1.0068,
      "step": 33940
    },
    {
      "epoch": 0.11898628591055176,
      "grad_norm": 3.625,
      "learning_rate": 1.9830255026343767e-05,
      "loss": 0.971,
      "step": 33950
    },
    {
      "epoch": 0.11902133341744735,
      "grad_norm": 3.328125,
      "learning_rate": 1.9836096216077292e-05,
      "loss": 1.0347,
      "step": 33960
    },
    {
      "epoch": 0.11905638092434295,
      "grad_norm": 3.515625,
      "learning_rate": 1.9841937405810817e-05,
      "loss": 0.9969,
      "step": 33970
    },
    {
      "epoch": 0.11909142843123854,
      "grad_norm": 3.125,
      "learning_rate": 1.9847778595544342e-05,
      "loss": 0.9457,
      "step": 33980
    },
    {
      "epoch": 0.11912647593813414,
      "grad_norm": 3.078125,
      "learning_rate": 1.9853619785277867e-05,
      "loss": 0.9882,
      "step": 33990
    },
    {
      "epoch": 0.11916152344502974,
      "grad_norm": 3.71875,
      "learning_rate": 1.985946097501139e-05,
      "loss": 0.9867,
      "step": 34000
    },
    {
      "epoch": 0.11919657095192533,
      "grad_norm": 3.0,
      "learning_rate": 1.9865302164744916e-05,
      "loss": 1.0366,
      "step": 34010
    },
    {
      "epoch": 0.11923161845882094,
      "grad_norm": 3.359375,
      "learning_rate": 1.987114335447844e-05,
      "loss": 1.021,
      "step": 34020
    },
    {
      "epoch": 0.11926666596571653,
      "grad_norm": 2.953125,
      "learning_rate": 1.9876984544211966e-05,
      "loss": 1.0442,
      "step": 34030
    },
    {
      "epoch": 0.11930171347261212,
      "grad_norm": 3.171875,
      "learning_rate": 1.988282573394549e-05,
      "loss": 0.969,
      "step": 34040
    },
    {
      "epoch": 0.11933676097950772,
      "grad_norm": 3.046875,
      "learning_rate": 1.9888666923679016e-05,
      "loss": 1.0746,
      "step": 34050
    },
    {
      "epoch": 0.11937180848640332,
      "grad_norm": 3.140625,
      "learning_rate": 1.989450811341254e-05,
      "loss": 1.0935,
      "step": 34060
    },
    {
      "epoch": 0.11940685599329892,
      "grad_norm": 3.140625,
      "learning_rate": 1.9900349303146065e-05,
      "loss": 1.0487,
      "step": 34070
    },
    {
      "epoch": 0.11944190350019451,
      "grad_norm": 3.3125,
      "learning_rate": 1.990619049287959e-05,
      "loss": 0.9776,
      "step": 34080
    },
    {
      "epoch": 0.1194769510070901,
      "grad_norm": 3.59375,
      "learning_rate": 1.9912031682613115e-05,
      "loss": 1.0475,
      "step": 34090
    },
    {
      "epoch": 0.11951199851398571,
      "grad_norm": 3.453125,
      "learning_rate": 1.991787287234664e-05,
      "loss": 0.9102,
      "step": 34100
    },
    {
      "epoch": 0.1195470460208813,
      "grad_norm": 3.1875,
      "learning_rate": 1.9923714062080165e-05,
      "loss": 1.0418,
      "step": 34110
    },
    {
      "epoch": 0.1195820935277769,
      "grad_norm": 3.34375,
      "learning_rate": 1.992955525181369e-05,
      "loss": 1.0636,
      "step": 34120
    },
    {
      "epoch": 0.1196171410346725,
      "grad_norm": 2.921875,
      "learning_rate": 1.9935396441547214e-05,
      "loss": 0.9504,
      "step": 34130
    },
    {
      "epoch": 0.1196521885415681,
      "grad_norm": 3.703125,
      "learning_rate": 1.994123763128074e-05,
      "loss": 1.0959,
      "step": 34140
    },
    {
      "epoch": 0.1196872360484637,
      "grad_norm": 3.625,
      "learning_rate": 1.9947078821014264e-05,
      "loss": 0.9652,
      "step": 34150
    },
    {
      "epoch": 0.11972228355535929,
      "grad_norm": 3.296875,
      "learning_rate": 1.995292001074779e-05,
      "loss": 1.0041,
      "step": 34160
    },
    {
      "epoch": 0.11975733106225489,
      "grad_norm": 3.28125,
      "learning_rate": 1.9958761200481317e-05,
      "loss": 0.9552,
      "step": 34170
    },
    {
      "epoch": 0.11979237856915048,
      "grad_norm": 3.6875,
      "learning_rate": 1.996460239021484e-05,
      "loss": 1.1044,
      "step": 34180
    },
    {
      "epoch": 0.11982742607604609,
      "grad_norm": 3.375,
      "learning_rate": 1.9970443579948364e-05,
      "loss": 1.0439,
      "step": 34190
    },
    {
      "epoch": 0.11986247358294168,
      "grad_norm": 2.78125,
      "learning_rate": 1.9976284769681892e-05,
      "loss": 0.9669,
      "step": 34200
    },
    {
      "epoch": 0.11989752108983727,
      "grad_norm": 3.671875,
      "learning_rate": 1.9982125959415417e-05,
      "loss": 1.0443,
      "step": 34210
    },
    {
      "epoch": 0.11993256859673287,
      "grad_norm": 3.125,
      "learning_rate": 1.9987967149148938e-05,
      "loss": 1.0491,
      "step": 34220
    },
    {
      "epoch": 0.11996761610362847,
      "grad_norm": 3.28125,
      "learning_rate": 1.9993808338882463e-05,
      "loss": 1.0966,
      "step": 34230
    },
    {
      "epoch": 0.12000266361052407,
      "grad_norm": 3.25,
      "learning_rate": 1.999964952861599e-05,
      "loss": 0.9796,
      "step": 34240
    },
    {
      "epoch": 0.12003771111741966,
      "grad_norm": 3.875,
      "learning_rate": 2.0005490718349516e-05,
      "loss": 1.018,
      "step": 34250
    },
    {
      "epoch": 0.12007275862431525,
      "grad_norm": 3.0625,
      "learning_rate": 2.0011331908083037e-05,
      "loss": 0.9978,
      "step": 34260
    },
    {
      "epoch": 0.12010780613121086,
      "grad_norm": 3.671875,
      "learning_rate": 2.0017173097816562e-05,
      "loss": 0.9794,
      "step": 34270
    },
    {
      "epoch": 0.12014285363810645,
      "grad_norm": 2.90625,
      "learning_rate": 2.002301428755009e-05,
      "loss": 0.9879,
      "step": 34280
    },
    {
      "epoch": 0.12017790114500206,
      "grad_norm": 3.125,
      "learning_rate": 2.0028855477283615e-05,
      "loss": 1.0353,
      "step": 34290
    },
    {
      "epoch": 0.12021294865189765,
      "grad_norm": 3.640625,
      "learning_rate": 2.003469666701714e-05,
      "loss": 1.0962,
      "step": 34300
    },
    {
      "epoch": 0.12024799615879324,
      "grad_norm": 3.953125,
      "learning_rate": 2.004053785675066e-05,
      "loss": 1.0474,
      "step": 34310
    },
    {
      "epoch": 0.12028304366568884,
      "grad_norm": 3.65625,
      "learning_rate": 2.004637904648419e-05,
      "loss": 0.9898,
      "step": 34320
    },
    {
      "epoch": 0.12031809117258443,
      "grad_norm": 3.015625,
      "learning_rate": 2.0052220236217715e-05,
      "loss": 0.9934,
      "step": 34330
    },
    {
      "epoch": 0.12035313867948004,
      "grad_norm": 3.34375,
      "learning_rate": 2.005806142595124e-05,
      "loss": 0.9976,
      "step": 34340
    },
    {
      "epoch": 0.12038818618637563,
      "grad_norm": 3.359375,
      "learning_rate": 2.006390261568476e-05,
      "loss": 1.1314,
      "step": 34350
    },
    {
      "epoch": 0.12042323369327122,
      "grad_norm": 3.40625,
      "learning_rate": 2.006974380541829e-05,
      "loss": 1.0277,
      "step": 34360
    },
    {
      "epoch": 0.12045828120016683,
      "grad_norm": 3.515625,
      "learning_rate": 2.0075584995151814e-05,
      "loss": 1.0699,
      "step": 34370
    },
    {
      "epoch": 0.12049332870706242,
      "grad_norm": 3.15625,
      "learning_rate": 2.008142618488534e-05,
      "loss": 1.0638,
      "step": 34380
    },
    {
      "epoch": 0.12052837621395802,
      "grad_norm": 3.71875,
      "learning_rate": 2.0087267374618864e-05,
      "loss": 1.1069,
      "step": 34390
    },
    {
      "epoch": 0.12056342372085362,
      "grad_norm": 4.1875,
      "learning_rate": 2.009310856435239e-05,
      "loss": 0.9612,
      "step": 34400
    },
    {
      "epoch": 0.12059847122774921,
      "grad_norm": 3.4375,
      "learning_rate": 2.0098949754085914e-05,
      "loss": 1.0014,
      "step": 34410
    },
    {
      "epoch": 0.12063351873464481,
      "grad_norm": 3.484375,
      "learning_rate": 2.010479094381944e-05,
      "loss": 1.0675,
      "step": 34420
    },
    {
      "epoch": 0.1206685662415404,
      "grad_norm": 3.328125,
      "learning_rate": 2.0110632133552963e-05,
      "loss": 0.9593,
      "step": 34430
    },
    {
      "epoch": 0.12070361374843601,
      "grad_norm": 3.46875,
      "learning_rate": 2.0116473323286488e-05,
      "loss": 1.0561,
      "step": 34440
    },
    {
      "epoch": 0.1207386612553316,
      "grad_norm": 3.59375,
      "learning_rate": 2.0122314513020013e-05,
      "loss": 1.0412,
      "step": 34450
    },
    {
      "epoch": 0.1207737087622272,
      "grad_norm": 3.421875,
      "learning_rate": 2.0128155702753538e-05,
      "loss": 1.0193,
      "step": 34460
    },
    {
      "epoch": 0.1208087562691228,
      "grad_norm": 3.3125,
      "learning_rate": 2.0133996892487063e-05,
      "loss": 1.0089,
      "step": 34470
    },
    {
      "epoch": 0.12084380377601839,
      "grad_norm": 3.46875,
      "learning_rate": 2.0139838082220587e-05,
      "loss": 1.0998,
      "step": 34480
    },
    {
      "epoch": 0.120878851282914,
      "grad_norm": 3.140625,
      "learning_rate": 2.0145679271954112e-05,
      "loss": 0.9491,
      "step": 34490
    },
    {
      "epoch": 0.12091389878980958,
      "grad_norm": 3.484375,
      "learning_rate": 2.0151520461687637e-05,
      "loss": 1.0027,
      "step": 34500
    },
    {
      "epoch": 0.12094894629670519,
      "grad_norm": 3.328125,
      "learning_rate": 2.0157361651421162e-05,
      "loss": 0.9639,
      "step": 34510
    },
    {
      "epoch": 0.12098399380360078,
      "grad_norm": 2.953125,
      "learning_rate": 2.0163202841154687e-05,
      "loss": 0.9591,
      "step": 34520
    },
    {
      "epoch": 0.12101904131049637,
      "grad_norm": 3.265625,
      "learning_rate": 2.016904403088821e-05,
      "loss": 1.0972,
      "step": 34530
    },
    {
      "epoch": 0.12105408881739198,
      "grad_norm": 3.390625,
      "learning_rate": 2.0174885220621736e-05,
      "loss": 1.057,
      "step": 34540
    },
    {
      "epoch": 0.12108913632428757,
      "grad_norm": 3.5625,
      "learning_rate": 2.0180726410355265e-05,
      "loss": 1.0259,
      "step": 34550
    },
    {
      "epoch": 0.12112418383118317,
      "grad_norm": 3.546875,
      "learning_rate": 2.0186567600088786e-05,
      "loss": 1.0781,
      "step": 34560
    },
    {
      "epoch": 0.12115923133807877,
      "grad_norm": 3.578125,
      "learning_rate": 2.019240878982231e-05,
      "loss": 1.0616,
      "step": 34570
    },
    {
      "epoch": 0.12119427884497436,
      "grad_norm": 3.109375,
      "learning_rate": 2.0198249979555836e-05,
      "loss": 1.003,
      "step": 34580
    },
    {
      "epoch": 0.12122932635186996,
      "grad_norm": 3.109375,
      "learning_rate": 2.0204091169289364e-05,
      "loss": 0.9922,
      "step": 34590
    },
    {
      "epoch": 0.12126437385876555,
      "grad_norm": 3.203125,
      "learning_rate": 2.020993235902289e-05,
      "loss": 0.9971,
      "step": 34600
    },
    {
      "epoch": 0.12129942136566116,
      "grad_norm": 3.1875,
      "learning_rate": 2.021577354875641e-05,
      "loss": 1.0197,
      "step": 34610
    },
    {
      "epoch": 0.12133446887255675,
      "grad_norm": 3.71875,
      "learning_rate": 2.0221614738489935e-05,
      "loss": 0.9835,
      "step": 34620
    },
    {
      "epoch": 0.12136951637945234,
      "grad_norm": 3.1875,
      "learning_rate": 2.0227455928223463e-05,
      "loss": 0.9607,
      "step": 34630
    },
    {
      "epoch": 0.12140456388634795,
      "grad_norm": 3.78125,
      "learning_rate": 2.023329711795699e-05,
      "loss": 0.9374,
      "step": 34640
    },
    {
      "epoch": 0.12143961139324354,
      "grad_norm": 3.15625,
      "learning_rate": 2.023913830769051e-05,
      "loss": 1.0508,
      "step": 34650
    },
    {
      "epoch": 0.12147465890013914,
      "grad_norm": 3.125,
      "learning_rate": 2.0244979497424035e-05,
      "loss": 1.0912,
      "step": 34660
    },
    {
      "epoch": 0.12150970640703473,
      "grad_norm": 3.6875,
      "learning_rate": 2.0250820687157563e-05,
      "loss": 1.0169,
      "step": 34670
    },
    {
      "epoch": 0.12154475391393033,
      "grad_norm": 3.53125,
      "learning_rate": 2.0256661876891088e-05,
      "loss": 0.9634,
      "step": 34680
    },
    {
      "epoch": 0.12157980142082593,
      "grad_norm": 3.328125,
      "learning_rate": 2.026250306662461e-05,
      "loss": 1.0516,
      "step": 34690
    },
    {
      "epoch": 0.12161484892772152,
      "grad_norm": 4.15625,
      "learning_rate": 2.0268344256358134e-05,
      "loss": 0.9858,
      "step": 34700
    },
    {
      "epoch": 0.12164989643461713,
      "grad_norm": 3.265625,
      "learning_rate": 2.0274185446091662e-05,
      "loss": 1.0287,
      "step": 34710
    },
    {
      "epoch": 0.12168494394151272,
      "grad_norm": 3.03125,
      "learning_rate": 2.0280026635825187e-05,
      "loss": 1.0065,
      "step": 34720
    },
    {
      "epoch": 0.12171999144840832,
      "grad_norm": 3.203125,
      "learning_rate": 2.0285867825558712e-05,
      "loss": 0.9761,
      "step": 34730
    },
    {
      "epoch": 0.12175503895530392,
      "grad_norm": 3.515625,
      "learning_rate": 2.0291709015292233e-05,
      "loss": 1.0029,
      "step": 34740
    },
    {
      "epoch": 0.1217900864621995,
      "grad_norm": 3.296875,
      "learning_rate": 2.029755020502576e-05,
      "loss": 0.9974,
      "step": 34750
    },
    {
      "epoch": 0.12182513396909511,
      "grad_norm": 2.96875,
      "learning_rate": 2.0303391394759286e-05,
      "loss": 0.9553,
      "step": 34760
    },
    {
      "epoch": 0.1218601814759907,
      "grad_norm": 3.328125,
      "learning_rate": 2.030923258449281e-05,
      "loss": 1.0091,
      "step": 34770
    },
    {
      "epoch": 0.12189522898288631,
      "grad_norm": 3.203125,
      "learning_rate": 2.0315073774226333e-05,
      "loss": 0.9621,
      "step": 34780
    },
    {
      "epoch": 0.1219302764897819,
      "grad_norm": 3.28125,
      "learning_rate": 2.032091496395986e-05,
      "loss": 0.9888,
      "step": 34790
    },
    {
      "epoch": 0.12196532399667749,
      "grad_norm": 3.609375,
      "learning_rate": 2.0326756153693386e-05,
      "loss": 1.0293,
      "step": 34800
    },
    {
      "epoch": 0.1220003715035731,
      "grad_norm": 3.3125,
      "learning_rate": 2.033259734342691e-05,
      "loss": 0.9602,
      "step": 34810
    },
    {
      "epoch": 0.12203541901046869,
      "grad_norm": 3.578125,
      "learning_rate": 2.0338438533160432e-05,
      "loss": 0.9633,
      "step": 34820
    },
    {
      "epoch": 0.12207046651736429,
      "grad_norm": 4.3125,
      "learning_rate": 2.034427972289396e-05,
      "loss": 1.0176,
      "step": 34830
    },
    {
      "epoch": 0.12210551402425988,
      "grad_norm": 3.09375,
      "learning_rate": 2.0350120912627485e-05,
      "loss": 1.022,
      "step": 34840
    },
    {
      "epoch": 0.12214056153115548,
      "grad_norm": 3.578125,
      "learning_rate": 2.035596210236101e-05,
      "loss": 1.0677,
      "step": 34850
    },
    {
      "epoch": 0.12217560903805108,
      "grad_norm": 3.53125,
      "learning_rate": 2.0361803292094535e-05,
      "loss": 1.0801,
      "step": 34860
    },
    {
      "epoch": 0.12221065654494667,
      "grad_norm": 3.203125,
      "learning_rate": 2.036764448182806e-05,
      "loss": 0.9812,
      "step": 34870
    },
    {
      "epoch": 0.12224570405184228,
      "grad_norm": 3.5,
      "learning_rate": 2.0373485671561585e-05,
      "loss": 1.0443,
      "step": 34880
    },
    {
      "epoch": 0.12228075155873787,
      "grad_norm": 3.625,
      "learning_rate": 2.037932686129511e-05,
      "loss": 1.0512,
      "step": 34890
    },
    {
      "epoch": 0.12231579906563346,
      "grad_norm": 3.859375,
      "learning_rate": 2.0385168051028634e-05,
      "loss": 1.0599,
      "step": 34900
    },
    {
      "epoch": 0.12235084657252907,
      "grad_norm": 3.25,
      "learning_rate": 2.039100924076216e-05,
      "loss": 1.0169,
      "step": 34910
    },
    {
      "epoch": 0.12238589407942466,
      "grad_norm": 2.828125,
      "learning_rate": 2.0396850430495684e-05,
      "loss": 0.9767,
      "step": 34920
    },
    {
      "epoch": 0.12242094158632026,
      "grad_norm": 3.3125,
      "learning_rate": 2.040269162022921e-05,
      "loss": 1.0441,
      "step": 34930
    },
    {
      "epoch": 0.12245598909321585,
      "grad_norm": 3.078125,
      "learning_rate": 2.0408532809962737e-05,
      "loss": 0.9673,
      "step": 34940
    },
    {
      "epoch": 0.12249103660011144,
      "grad_norm": 4.96875,
      "learning_rate": 2.041437399969626e-05,
      "loss": 1.0487,
      "step": 34950
    },
    {
      "epoch": 0.12252608410700705,
      "grad_norm": 3.984375,
      "learning_rate": 2.0420215189429783e-05,
      "loss": 1.0983,
      "step": 34960
    },
    {
      "epoch": 0.12256113161390264,
      "grad_norm": 3.25,
      "learning_rate": 2.0426056379163308e-05,
      "loss": 0.9989,
      "step": 34970
    },
    {
      "epoch": 0.12259617912079825,
      "grad_norm": 3.359375,
      "learning_rate": 2.0431897568896836e-05,
      "loss": 1.0335,
      "step": 34980
    },
    {
      "epoch": 0.12263122662769384,
      "grad_norm": 3.453125,
      "learning_rate": 2.0437738758630358e-05,
      "loss": 1.0918,
      "step": 34990
    },
    {
      "epoch": 0.12266627413458944,
      "grad_norm": 3.6875,
      "learning_rate": 2.0443579948363883e-05,
      "loss": 1.0257,
      "step": 35000
    },
    {
      "epoch": 0.12266627413458944,
      "eval_loss": 0.9606387615203857,
      "eval_runtime": 559.3255,
      "eval_samples_per_second": 680.169,
      "eval_steps_per_second": 56.681,
      "step": 35000
    },
    {
      "epoch": 0.12270132164148503,
      "grad_norm": 3.59375,
      "learning_rate": 2.0449421138097408e-05,
      "loss": 1.1015,
      "step": 35010
    },
    {
      "epoch": 0.12273636914838063,
      "grad_norm": 3.234375,
      "learning_rate": 2.0455262327830936e-05,
      "loss": 1.0426,
      "step": 35020
    },
    {
      "epoch": 0.12277141665527623,
      "grad_norm": 3.765625,
      "learning_rate": 2.0461103517564457e-05,
      "loss": 1.0952,
      "step": 35030
    },
    {
      "epoch": 0.12280646416217182,
      "grad_norm": 3.21875,
      "learning_rate": 2.0466944707297982e-05,
      "loss": 1.0266,
      "step": 35040
    },
    {
      "epoch": 0.12284151166906743,
      "grad_norm": 3.40625,
      "learning_rate": 2.0472785897031507e-05,
      "loss": 1.0346,
      "step": 35050
    },
    {
      "epoch": 0.12287655917596302,
      "grad_norm": 3.390625,
      "learning_rate": 2.0478627086765035e-05,
      "loss": 1.0387,
      "step": 35060
    },
    {
      "epoch": 0.12291160668285861,
      "grad_norm": 3.234375,
      "learning_rate": 2.048446827649856e-05,
      "loss": 1.0709,
      "step": 35070
    },
    {
      "epoch": 0.12294665418975421,
      "grad_norm": 3.515625,
      "learning_rate": 2.049030946623208e-05,
      "loss": 1.0353,
      "step": 35080
    },
    {
      "epoch": 0.1229817016966498,
      "grad_norm": 3.453125,
      "learning_rate": 2.0496150655965606e-05,
      "loss": 1.047,
      "step": 35090
    },
    {
      "epoch": 0.12301674920354541,
      "grad_norm": 3.46875,
      "learning_rate": 2.0501991845699135e-05,
      "loss": 0.9301,
      "step": 35100
    },
    {
      "epoch": 0.123051796710441,
      "grad_norm": 3.53125,
      "learning_rate": 2.050783303543266e-05,
      "loss": 0.9994,
      "step": 35110
    },
    {
      "epoch": 0.1230868442173366,
      "grad_norm": 3.4375,
      "learning_rate": 2.051367422516618e-05,
      "loss": 0.9914,
      "step": 35120
    },
    {
      "epoch": 0.1231218917242322,
      "grad_norm": 3.015625,
      "learning_rate": 2.0519515414899706e-05,
      "loss": 0.9746,
      "step": 35130
    },
    {
      "epoch": 0.12315693923112779,
      "grad_norm": 3.1875,
      "learning_rate": 2.0525356604633234e-05,
      "loss": 1.0235,
      "step": 35140
    },
    {
      "epoch": 0.1231919867380234,
      "grad_norm": 2.9375,
      "learning_rate": 2.053119779436676e-05,
      "loss": 0.9803,
      "step": 35150
    },
    {
      "epoch": 0.12322703424491899,
      "grad_norm": 3.125,
      "learning_rate": 2.053703898410028e-05,
      "loss": 0.9909,
      "step": 35160
    },
    {
      "epoch": 0.12326208175181458,
      "grad_norm": 3.328125,
      "learning_rate": 2.0542880173833805e-05,
      "loss": 0.9327,
      "step": 35170
    },
    {
      "epoch": 0.12329712925871018,
      "grad_norm": 3.59375,
      "learning_rate": 2.0548721363567333e-05,
      "loss": 1.0494,
      "step": 35180
    },
    {
      "epoch": 0.12333217676560577,
      "grad_norm": 3.515625,
      "learning_rate": 2.0554562553300858e-05,
      "loss": 1.0426,
      "step": 35190
    },
    {
      "epoch": 0.12336722427250138,
      "grad_norm": 3.25,
      "learning_rate": 2.0560403743034383e-05,
      "loss": 0.9917,
      "step": 35200
    },
    {
      "epoch": 0.12340227177939697,
      "grad_norm": 3.515625,
      "learning_rate": 2.0566244932767908e-05,
      "loss": 1.0158,
      "step": 35210
    },
    {
      "epoch": 0.12343731928629256,
      "grad_norm": 3.640625,
      "learning_rate": 2.0572086122501433e-05,
      "loss": 1.0907,
      "step": 35220
    },
    {
      "epoch": 0.12347236679318817,
      "grad_norm": 3.375,
      "learning_rate": 2.0577927312234958e-05,
      "loss": 1.0539,
      "step": 35230
    },
    {
      "epoch": 0.12350741430008376,
      "grad_norm": 3.359375,
      "learning_rate": 2.0583768501968482e-05,
      "loss": 0.9892,
      "step": 35240
    },
    {
      "epoch": 0.12354246180697936,
      "grad_norm": 3.328125,
      "learning_rate": 2.0589609691702007e-05,
      "loss": 1.0471,
      "step": 35250
    },
    {
      "epoch": 0.12357750931387496,
      "grad_norm": 3.59375,
      "learning_rate": 2.0595450881435532e-05,
      "loss": 1.0102,
      "step": 35260
    },
    {
      "epoch": 0.12361255682077056,
      "grad_norm": 2.921875,
      "learning_rate": 2.0601292071169057e-05,
      "loss": 0.9471,
      "step": 35270
    },
    {
      "epoch": 0.12364760432766615,
      "grad_norm": 3.453125,
      "learning_rate": 2.0607133260902582e-05,
      "loss": 1.0561,
      "step": 35280
    },
    {
      "epoch": 0.12368265183456174,
      "grad_norm": 3.40625,
      "learning_rate": 2.0612974450636107e-05,
      "loss": 0.9634,
      "step": 35290
    },
    {
      "epoch": 0.12371769934145735,
      "grad_norm": 3.25,
      "learning_rate": 2.061881564036963e-05,
      "loss": 1.0643,
      "step": 35300
    },
    {
      "epoch": 0.12375274684835294,
      "grad_norm": 3.515625,
      "learning_rate": 2.0624656830103156e-05,
      "loss": 0.9933,
      "step": 35310
    },
    {
      "epoch": 0.12378779435524855,
      "grad_norm": 3.15625,
      "learning_rate": 2.063049801983668e-05,
      "loss": 0.9781,
      "step": 35320
    },
    {
      "epoch": 0.12382284186214414,
      "grad_norm": 3.359375,
      "learning_rate": 2.0636339209570206e-05,
      "loss": 0.9992,
      "step": 35330
    },
    {
      "epoch": 0.12385788936903973,
      "grad_norm": 3.578125,
      "learning_rate": 2.064218039930373e-05,
      "loss": 0.9909,
      "step": 35340
    },
    {
      "epoch": 0.12389293687593533,
      "grad_norm": 3.78125,
      "learning_rate": 2.0648021589037256e-05,
      "loss": 1.127,
      "step": 35350
    },
    {
      "epoch": 0.12392798438283092,
      "grad_norm": 2.84375,
      "learning_rate": 2.065386277877078e-05,
      "loss": 0.9823,
      "step": 35360
    },
    {
      "epoch": 0.12396303188972653,
      "grad_norm": 3.609375,
      "learning_rate": 2.0659703968504305e-05,
      "loss": 0.9784,
      "step": 35370
    },
    {
      "epoch": 0.12399807939662212,
      "grad_norm": 3.390625,
      "learning_rate": 2.066554515823783e-05,
      "loss": 0.9263,
      "step": 35380
    },
    {
      "epoch": 0.12403312690351771,
      "grad_norm": 3.078125,
      "learning_rate": 2.0671386347971355e-05,
      "loss": 1.0862,
      "step": 35390
    },
    {
      "epoch": 0.12406817441041332,
      "grad_norm": 3.578125,
      "learning_rate": 2.067722753770488e-05,
      "loss": 0.9406,
      "step": 35400
    },
    {
      "epoch": 0.12410322191730891,
      "grad_norm": 3.421875,
      "learning_rate": 2.0683068727438408e-05,
      "loss": 1.0373,
      "step": 35410
    },
    {
      "epoch": 0.12413826942420451,
      "grad_norm": 3.703125,
      "learning_rate": 2.068890991717193e-05,
      "loss": 1.0145,
      "step": 35420
    },
    {
      "epoch": 0.1241733169311001,
      "grad_norm": 3.0625,
      "learning_rate": 2.0694751106905454e-05,
      "loss": 1.0396,
      "step": 35430
    },
    {
      "epoch": 0.1242083644379957,
      "grad_norm": 3.421875,
      "learning_rate": 2.070059229663898e-05,
      "loss": 1.0096,
      "step": 35440
    },
    {
      "epoch": 0.1242434119448913,
      "grad_norm": 3.328125,
      "learning_rate": 2.0706433486372508e-05,
      "loss": 1.0449,
      "step": 35450
    },
    {
      "epoch": 0.1242784594517869,
      "grad_norm": 3.21875,
      "learning_rate": 2.071227467610603e-05,
      "loss": 1.0442,
      "step": 35460
    },
    {
      "epoch": 0.1243135069586825,
      "grad_norm": 2.984375,
      "learning_rate": 2.0718115865839554e-05,
      "loss": 1.0109,
      "step": 35470
    },
    {
      "epoch": 0.12434855446557809,
      "grad_norm": 3.109375,
      "learning_rate": 2.072395705557308e-05,
      "loss": 0.9294,
      "step": 35480
    },
    {
      "epoch": 0.12438360197247368,
      "grad_norm": 3.03125,
      "learning_rate": 2.0729798245306607e-05,
      "loss": 0.9836,
      "step": 35490
    },
    {
      "epoch": 0.12441864947936929,
      "grad_norm": 2.984375,
      "learning_rate": 2.0735639435040132e-05,
      "loss": 0.9819,
      "step": 35500
    },
    {
      "epoch": 0.12445369698626488,
      "grad_norm": 3.640625,
      "learning_rate": 2.0741480624773653e-05,
      "loss": 1.0003,
      "step": 35510
    },
    {
      "epoch": 0.12448874449316048,
      "grad_norm": 3.4375,
      "learning_rate": 2.0747321814507178e-05,
      "loss": 1.0679,
      "step": 35520
    },
    {
      "epoch": 0.12452379200005607,
      "grad_norm": 3.28125,
      "learning_rate": 2.0753163004240706e-05,
      "loss": 0.975,
      "step": 35530
    },
    {
      "epoch": 0.12455883950695168,
      "grad_norm": 3.140625,
      "learning_rate": 2.075900419397423e-05,
      "loss": 1.035,
      "step": 35540
    },
    {
      "epoch": 0.12459388701384727,
      "grad_norm": 3.328125,
      "learning_rate": 2.0764845383707753e-05,
      "loss": 1.1284,
      "step": 35550
    },
    {
      "epoch": 0.12462893452074286,
      "grad_norm": 3.3125,
      "learning_rate": 2.077068657344128e-05,
      "loss": 1.0077,
      "step": 35560
    },
    {
      "epoch": 0.12466398202763847,
      "grad_norm": 2.953125,
      "learning_rate": 2.0776527763174806e-05,
      "loss": 0.984,
      "step": 35570
    },
    {
      "epoch": 0.12469902953453406,
      "grad_norm": 3.078125,
      "learning_rate": 2.078236895290833e-05,
      "loss": 1.089,
      "step": 35580
    },
    {
      "epoch": 0.12473407704142966,
      "grad_norm": 3.453125,
      "learning_rate": 2.0788210142641852e-05,
      "loss": 0.9836,
      "step": 35590
    },
    {
      "epoch": 0.12476912454832526,
      "grad_norm": 3.46875,
      "learning_rate": 2.079405133237538e-05,
      "loss": 1.0978,
      "step": 35600
    },
    {
      "epoch": 0.12480417205522085,
      "grad_norm": 3.015625,
      "learning_rate": 2.0799892522108905e-05,
      "loss": 0.9347,
      "step": 35610
    },
    {
      "epoch": 0.12483921956211645,
      "grad_norm": 3.03125,
      "learning_rate": 2.080573371184243e-05,
      "loss": 1.0039,
      "step": 35620
    },
    {
      "epoch": 0.12487426706901204,
      "grad_norm": 3.359375,
      "learning_rate": 2.0811574901575955e-05,
      "loss": 0.9461,
      "step": 35630
    },
    {
      "epoch": 0.12490931457590765,
      "grad_norm": 3.203125,
      "learning_rate": 2.081741609130948e-05,
      "loss": 1.0529,
      "step": 35640
    },
    {
      "epoch": 0.12494436208280324,
      "grad_norm": 3.515625,
      "learning_rate": 2.0823257281043004e-05,
      "loss": 1.008,
      "step": 35650
    },
    {
      "epoch": 0.12497940958969883,
      "grad_norm": 3.0625,
      "learning_rate": 2.082909847077653e-05,
      "loss": 1.0337,
      "step": 35660
    },
    {
      "epoch": 0.12501445709659442,
      "grad_norm": 3.40625,
      "learning_rate": 2.0834939660510054e-05,
      "loss": 0.9499,
      "step": 35670
    },
    {
      "epoch": 0.12504950460349004,
      "grad_norm": 3.359375,
      "learning_rate": 2.084078085024358e-05,
      "loss": 1.0146,
      "step": 35680
    },
    {
      "epoch": 0.12508455211038563,
      "grad_norm": 3.453125,
      "learning_rate": 2.0846622039977104e-05,
      "loss": 1.0322,
      "step": 35690
    },
    {
      "epoch": 0.12511959961728122,
      "grad_norm": 3.953125,
      "learning_rate": 2.085246322971063e-05,
      "loss": 1.0444,
      "step": 35700
    },
    {
      "epoch": 0.12515464712417682,
      "grad_norm": 3.625,
      "learning_rate": 2.0858304419444153e-05,
      "loss": 0.999,
      "step": 35710
    },
    {
      "epoch": 0.1251896946310724,
      "grad_norm": 3.25,
      "learning_rate": 2.086414560917768e-05,
      "loss": 0.9988,
      "step": 35720
    },
    {
      "epoch": 0.12522474213796803,
      "grad_norm": 3.125,
      "learning_rate": 2.0869986798911203e-05,
      "loss": 1.0558,
      "step": 35730
    },
    {
      "epoch": 0.12525978964486362,
      "grad_norm": 3.234375,
      "learning_rate": 2.0875827988644728e-05,
      "loss": 0.989,
      "step": 35740
    },
    {
      "epoch": 0.1252948371517592,
      "grad_norm": 3.4375,
      "learning_rate": 2.0881669178378253e-05,
      "loss": 1.046,
      "step": 35750
    },
    {
      "epoch": 0.1253298846586548,
      "grad_norm": 3.875,
      "learning_rate": 2.0887510368111778e-05,
      "loss": 1.0891,
      "step": 35760
    },
    {
      "epoch": 0.1253649321655504,
      "grad_norm": 3.625,
      "learning_rate": 2.0893351557845303e-05,
      "loss": 0.9972,
      "step": 35770
    },
    {
      "epoch": 0.125399979672446,
      "grad_norm": 3.28125,
      "learning_rate": 2.0899192747578827e-05,
      "loss": 0.9947,
      "step": 35780
    },
    {
      "epoch": 0.1254350271793416,
      "grad_norm": 3.46875,
      "learning_rate": 2.0905033937312352e-05,
      "loss": 0.9793,
      "step": 35790
    },
    {
      "epoch": 0.1254700746862372,
      "grad_norm": 3.5,
      "learning_rate": 2.0910875127045877e-05,
      "loss": 0.997,
      "step": 35800
    },
    {
      "epoch": 0.12550512219313278,
      "grad_norm": 3.125,
      "learning_rate": 2.0916716316779402e-05,
      "loss": 1.0201,
      "step": 35810
    },
    {
      "epoch": 0.12554016970002838,
      "grad_norm": 3.359375,
      "learning_rate": 2.0922557506512927e-05,
      "loss": 1.1034,
      "step": 35820
    },
    {
      "epoch": 0.125575217206924,
      "grad_norm": 3.203125,
      "learning_rate": 2.092839869624645e-05,
      "loss": 1.0481,
      "step": 35830
    },
    {
      "epoch": 0.12561026471381959,
      "grad_norm": 3.09375,
      "learning_rate": 2.093423988597998e-05,
      "loss": 0.9885,
      "step": 35840
    },
    {
      "epoch": 0.12564531222071518,
      "grad_norm": 3.53125,
      "learning_rate": 2.09400810757135e-05,
      "loss": 1.0037,
      "step": 35850
    },
    {
      "epoch": 0.12568035972761077,
      "grad_norm": 3.421875,
      "learning_rate": 2.0945922265447026e-05,
      "loss": 0.979,
      "step": 35860
    },
    {
      "epoch": 0.12571540723450636,
      "grad_norm": 3.28125,
      "learning_rate": 2.095176345518055e-05,
      "loss": 0.9606,
      "step": 35870
    },
    {
      "epoch": 0.12575045474140198,
      "grad_norm": 2.6875,
      "learning_rate": 2.095760464491408e-05,
      "loss": 1.0902,
      "step": 35880
    },
    {
      "epoch": 0.12578550224829757,
      "grad_norm": 3.40625,
      "learning_rate": 2.09634458346476e-05,
      "loss": 1.0099,
      "step": 35890
    },
    {
      "epoch": 0.12582054975519316,
      "grad_norm": 3.15625,
      "learning_rate": 2.0969287024381126e-05,
      "loss": 1.0103,
      "step": 35900
    },
    {
      "epoch": 0.12585559726208875,
      "grad_norm": 2.75,
      "learning_rate": 2.0975128214114654e-05,
      "loss": 0.9321,
      "step": 35910
    },
    {
      "epoch": 0.12589064476898437,
      "grad_norm": 3.59375,
      "learning_rate": 2.098096940384818e-05,
      "loss": 1.0636,
      "step": 35920
    },
    {
      "epoch": 0.12592569227587996,
      "grad_norm": 3.546875,
      "learning_rate": 2.09868105935817e-05,
      "loss": 1.0226,
      "step": 35930
    },
    {
      "epoch": 0.12596073978277555,
      "grad_norm": 3.421875,
      "learning_rate": 2.0992651783315225e-05,
      "loss": 0.9972,
      "step": 35940
    },
    {
      "epoch": 0.12599578728967115,
      "grad_norm": 3.546875,
      "learning_rate": 2.0998492973048753e-05,
      "loss": 1.0708,
      "step": 35950
    },
    {
      "epoch": 0.12603083479656674,
      "grad_norm": 3.484375,
      "learning_rate": 2.1004334162782278e-05,
      "loss": 1.0427,
      "step": 35960
    },
    {
      "epoch": 0.12606588230346236,
      "grad_norm": 3.046875,
      "learning_rate": 2.1010175352515803e-05,
      "loss": 1.0297,
      "step": 35970
    },
    {
      "epoch": 0.12610092981035795,
      "grad_norm": 3.265625,
      "learning_rate": 2.1016016542249324e-05,
      "loss": 1.0331,
      "step": 35980
    },
    {
      "epoch": 0.12613597731725354,
      "grad_norm": 3.265625,
      "learning_rate": 2.1021857731982852e-05,
      "loss": 1.0034,
      "step": 35990
    },
    {
      "epoch": 0.12617102482414913,
      "grad_norm": 3.21875,
      "learning_rate": 2.1027698921716377e-05,
      "loss": 1.0217,
      "step": 36000
    },
    {
      "epoch": 0.12620607233104472,
      "grad_norm": 3.515625,
      "learning_rate": 2.1033540111449902e-05,
      "loss": 0.9956,
      "step": 36010
    },
    {
      "epoch": 0.12624111983794034,
      "grad_norm": 3.03125,
      "learning_rate": 2.1039381301183424e-05,
      "loss": 0.9898,
      "step": 36020
    },
    {
      "epoch": 0.12627616734483593,
      "grad_norm": 3.140625,
      "learning_rate": 2.1045222490916952e-05,
      "loss": 1.0337,
      "step": 36030
    },
    {
      "epoch": 0.12631121485173152,
      "grad_norm": 2.9375,
      "learning_rate": 2.1051063680650477e-05,
      "loss": 0.9889,
      "step": 36040
    },
    {
      "epoch": 0.12634626235862711,
      "grad_norm": 3.546875,
      "learning_rate": 2.1056904870384e-05,
      "loss": 0.9289,
      "step": 36050
    },
    {
      "epoch": 0.1263813098655227,
      "grad_norm": 2.828125,
      "learning_rate": 2.1062746060117523e-05,
      "loss": 0.9159,
      "step": 36060
    },
    {
      "epoch": 0.12641635737241833,
      "grad_norm": 3.296875,
      "learning_rate": 2.106858724985105e-05,
      "loss": 1.0482,
      "step": 36070
    },
    {
      "epoch": 0.12645140487931392,
      "grad_norm": 3.296875,
      "learning_rate": 2.1074428439584576e-05,
      "loss": 1.0692,
      "step": 36080
    },
    {
      "epoch": 0.1264864523862095,
      "grad_norm": 2.78125,
      "learning_rate": 2.10802696293181e-05,
      "loss": 1.0448,
      "step": 36090
    },
    {
      "epoch": 0.1265214998931051,
      "grad_norm": 3.265625,
      "learning_rate": 2.1086110819051626e-05,
      "loss": 1.0809,
      "step": 36100
    },
    {
      "epoch": 0.1265565474000007,
      "grad_norm": 3.375,
      "learning_rate": 2.109195200878515e-05,
      "loss": 0.9962,
      "step": 36110
    },
    {
      "epoch": 0.1265915949068963,
      "grad_norm": 3.625,
      "learning_rate": 2.1097793198518675e-05,
      "loss": 0.9666,
      "step": 36120
    },
    {
      "epoch": 0.1266266424137919,
      "grad_norm": 3.21875,
      "learning_rate": 2.11036343882522e-05,
      "loss": 0.9762,
      "step": 36130
    },
    {
      "epoch": 0.1266616899206875,
      "grad_norm": 3.0,
      "learning_rate": 2.1109475577985725e-05,
      "loss": 0.9957,
      "step": 36140
    },
    {
      "epoch": 0.12669673742758308,
      "grad_norm": 2.828125,
      "learning_rate": 2.111531676771925e-05,
      "loss": 1.001,
      "step": 36150
    },
    {
      "epoch": 0.12673178493447868,
      "grad_norm": 3.234375,
      "learning_rate": 2.1121157957452775e-05,
      "loss": 1.0935,
      "step": 36160
    },
    {
      "epoch": 0.1267668324413743,
      "grad_norm": 3.8125,
      "learning_rate": 2.11269991471863e-05,
      "loss": 1.1234,
      "step": 36170
    },
    {
      "epoch": 0.12680187994826989,
      "grad_norm": 3.828125,
      "learning_rate": 2.1132840336919825e-05,
      "loss": 1.0597,
      "step": 36180
    },
    {
      "epoch": 0.12683692745516548,
      "grad_norm": 3.5,
      "learning_rate": 2.113868152665335e-05,
      "loss": 1.0794,
      "step": 36190
    },
    {
      "epoch": 0.12687197496206107,
      "grad_norm": 3.34375,
      "learning_rate": 2.1144522716386874e-05,
      "loss": 0.9875,
      "step": 36200
    },
    {
      "epoch": 0.12690702246895666,
      "grad_norm": 3.171875,
      "learning_rate": 2.11503639061204e-05,
      "loss": 1.0079,
      "step": 36210
    },
    {
      "epoch": 0.12694206997585228,
      "grad_norm": 3.40625,
      "learning_rate": 2.1156205095853924e-05,
      "loss": 1.0273,
      "step": 36220
    },
    {
      "epoch": 0.12697711748274787,
      "grad_norm": 3.140625,
      "learning_rate": 2.116204628558745e-05,
      "loss": 0.9939,
      "step": 36230
    },
    {
      "epoch": 0.12701216498964346,
      "grad_norm": 3.375,
      "learning_rate": 2.1167887475320974e-05,
      "loss": 1.0327,
      "step": 36240
    },
    {
      "epoch": 0.12704721249653905,
      "grad_norm": 3.203125,
      "learning_rate": 2.11737286650545e-05,
      "loss": 1.0126,
      "step": 36250
    },
    {
      "epoch": 0.12708226000343464,
      "grad_norm": 2.90625,
      "learning_rate": 2.1179569854788027e-05,
      "loss": 1.0276,
      "step": 36260
    },
    {
      "epoch": 0.12711730751033026,
      "grad_norm": 3.078125,
      "learning_rate": 2.1185411044521548e-05,
      "loss": 0.9844,
      "step": 36270
    },
    {
      "epoch": 0.12715235501722585,
      "grad_norm": 2.65625,
      "learning_rate": 2.1191252234255073e-05,
      "loss": 0.9992,
      "step": 36280
    },
    {
      "epoch": 0.12718740252412145,
      "grad_norm": 3.984375,
      "learning_rate": 2.1197093423988598e-05,
      "loss": 1.0434,
      "step": 36290
    },
    {
      "epoch": 0.12722245003101704,
      "grad_norm": 3.25,
      "learning_rate": 2.1202934613722126e-05,
      "loss": 1.032,
      "step": 36300
    },
    {
      "epoch": 0.12725749753791263,
      "grad_norm": 3.296875,
      "learning_rate": 2.120877580345565e-05,
      "loss": 1.0213,
      "step": 36310
    },
    {
      "epoch": 0.12729254504480825,
      "grad_norm": 3.5,
      "learning_rate": 2.1214616993189172e-05,
      "loss": 1.1013,
      "step": 36320
    },
    {
      "epoch": 0.12732759255170384,
      "grad_norm": 3.40625,
      "learning_rate": 2.1220458182922697e-05,
      "loss": 1.0351,
      "step": 36330
    },
    {
      "epoch": 0.12736264005859943,
      "grad_norm": 3.40625,
      "learning_rate": 2.1226299372656225e-05,
      "loss": 0.9647,
      "step": 36340
    },
    {
      "epoch": 0.12739768756549502,
      "grad_norm": 3.28125,
      "learning_rate": 2.123214056238975e-05,
      "loss": 1.0885,
      "step": 36350
    },
    {
      "epoch": 0.1274327350723906,
      "grad_norm": 3.25,
      "learning_rate": 2.1237981752123272e-05,
      "loss": 1.0507,
      "step": 36360
    },
    {
      "epoch": 0.12746778257928623,
      "grad_norm": 3.890625,
      "learning_rate": 2.1243822941856797e-05,
      "loss": 1.0335,
      "step": 36370
    },
    {
      "epoch": 0.12750283008618182,
      "grad_norm": 3.46875,
      "learning_rate": 2.1249664131590325e-05,
      "loss": 1.0036,
      "step": 36380
    },
    {
      "epoch": 0.12753787759307741,
      "grad_norm": 3.375,
      "learning_rate": 2.125550532132385e-05,
      "loss": 0.9846,
      "step": 36390
    },
    {
      "epoch": 0.127572925099973,
      "grad_norm": 2.65625,
      "learning_rate": 2.1261346511057375e-05,
      "loss": 0.9657,
      "step": 36400
    },
    {
      "epoch": 0.1276079726068686,
      "grad_norm": 3.0625,
      "learning_rate": 2.1267187700790896e-05,
      "loss": 1.1394,
      "step": 36410
    },
    {
      "epoch": 0.12764302011376422,
      "grad_norm": 3.21875,
      "learning_rate": 2.1273028890524424e-05,
      "loss": 1.0115,
      "step": 36420
    },
    {
      "epoch": 0.1276780676206598,
      "grad_norm": 3.265625,
      "learning_rate": 2.127887008025795e-05,
      "loss": 0.9887,
      "step": 36430
    },
    {
      "epoch": 0.1277131151275554,
      "grad_norm": 3.5,
      "learning_rate": 2.1284711269991474e-05,
      "loss": 1.0837,
      "step": 36440
    },
    {
      "epoch": 0.127748162634451,
      "grad_norm": 3.109375,
      "learning_rate": 2.1290552459724995e-05,
      "loss": 1.0177,
      "step": 36450
    },
    {
      "epoch": 0.12778321014134658,
      "grad_norm": 3.40625,
      "learning_rate": 2.1296393649458524e-05,
      "loss": 1.0794,
      "step": 36460
    },
    {
      "epoch": 0.1278182576482422,
      "grad_norm": 3.21875,
      "learning_rate": 2.130223483919205e-05,
      "loss": 0.9949,
      "step": 36470
    },
    {
      "epoch": 0.1278533051551378,
      "grad_norm": 3.359375,
      "learning_rate": 2.1308076028925573e-05,
      "loss": 1.0102,
      "step": 36480
    },
    {
      "epoch": 0.12788835266203338,
      "grad_norm": 3.484375,
      "learning_rate": 2.1313917218659095e-05,
      "loss": 1.0693,
      "step": 36490
    },
    {
      "epoch": 0.12792340016892897,
      "grad_norm": 2.9375,
      "learning_rate": 2.1319758408392623e-05,
      "loss": 1.026,
      "step": 36500
    },
    {
      "epoch": 0.1279584476758246,
      "grad_norm": 3.34375,
      "learning_rate": 2.1325599598126148e-05,
      "loss": 0.9893,
      "step": 36510
    },
    {
      "epoch": 0.12799349518272018,
      "grad_norm": 3.5,
      "learning_rate": 2.1331440787859673e-05,
      "loss": 0.9771,
      "step": 36520
    },
    {
      "epoch": 0.12802854268961578,
      "grad_norm": 3.3125,
      "learning_rate": 2.1337281977593197e-05,
      "loss": 1.0324,
      "step": 36530
    },
    {
      "epoch": 0.12806359019651137,
      "grad_norm": 3.28125,
      "learning_rate": 2.1343123167326722e-05,
      "loss": 1.0213,
      "step": 36540
    },
    {
      "epoch": 0.12809863770340696,
      "grad_norm": 3.21875,
      "learning_rate": 2.1348964357060247e-05,
      "loss": 1.0584,
      "step": 36550
    },
    {
      "epoch": 0.12813368521030258,
      "grad_norm": 3.15625,
      "learning_rate": 2.1354805546793772e-05,
      "loss": 1.0158,
      "step": 36560
    },
    {
      "epoch": 0.12816873271719817,
      "grad_norm": 3.328125,
      "learning_rate": 2.1360646736527297e-05,
      "loss": 1.0106,
      "step": 36570
    },
    {
      "epoch": 0.12820378022409376,
      "grad_norm": 3.328125,
      "learning_rate": 2.1366487926260822e-05,
      "loss": 1.0118,
      "step": 36580
    },
    {
      "epoch": 0.12823882773098935,
      "grad_norm": 3.515625,
      "learning_rate": 2.1372329115994347e-05,
      "loss": 1.0712,
      "step": 36590
    },
    {
      "epoch": 0.12827387523788494,
      "grad_norm": 3.453125,
      "learning_rate": 2.137817030572787e-05,
      "loss": 1.0143,
      "step": 36600
    },
    {
      "epoch": 0.12830892274478056,
      "grad_norm": 3.203125,
      "learning_rate": 2.1384011495461396e-05,
      "loss": 1.0057,
      "step": 36610
    },
    {
      "epoch": 0.12834397025167615,
      "grad_norm": 3.328125,
      "learning_rate": 2.138985268519492e-05,
      "loss": 1.078,
      "step": 36620
    },
    {
      "epoch": 0.12837901775857175,
      "grad_norm": 3.4375,
      "learning_rate": 2.1395693874928446e-05,
      "loss": 1.1037,
      "step": 36630
    },
    {
      "epoch": 0.12841406526546734,
      "grad_norm": 3.125,
      "learning_rate": 2.140153506466197e-05,
      "loss": 0.9692,
      "step": 36640
    },
    {
      "epoch": 0.12844911277236293,
      "grad_norm": 3.578125,
      "learning_rate": 2.14073762543955e-05,
      "loss": 1.0316,
      "step": 36650
    },
    {
      "epoch": 0.12848416027925855,
      "grad_norm": 3.546875,
      "learning_rate": 2.141321744412902e-05,
      "loss": 1.0573,
      "step": 36660
    },
    {
      "epoch": 0.12851920778615414,
      "grad_norm": 2.953125,
      "learning_rate": 2.1419058633862545e-05,
      "loss": 0.9357,
      "step": 36670
    },
    {
      "epoch": 0.12855425529304973,
      "grad_norm": 3.3125,
      "learning_rate": 2.142489982359607e-05,
      "loss": 0.9318,
      "step": 36680
    },
    {
      "epoch": 0.12858930279994532,
      "grad_norm": 3.15625,
      "learning_rate": 2.14307410133296e-05,
      "loss": 0.9862,
      "step": 36690
    },
    {
      "epoch": 0.1286243503068409,
      "grad_norm": 3.390625,
      "learning_rate": 2.143658220306312e-05,
      "loss": 1.1054,
      "step": 36700
    },
    {
      "epoch": 0.12865939781373653,
      "grad_norm": 3.75,
      "learning_rate": 2.1442423392796645e-05,
      "loss": 0.9881,
      "step": 36710
    },
    {
      "epoch": 0.12869444532063212,
      "grad_norm": 3.265625,
      "learning_rate": 2.144826458253017e-05,
      "loss": 1.0455,
      "step": 36720
    },
    {
      "epoch": 0.12872949282752771,
      "grad_norm": 3.1875,
      "learning_rate": 2.1454105772263698e-05,
      "loss": 0.9882,
      "step": 36730
    },
    {
      "epoch": 0.1287645403344233,
      "grad_norm": 3.3125,
      "learning_rate": 2.1459946961997223e-05,
      "loss": 1.0173,
      "step": 36740
    },
    {
      "epoch": 0.1287995878413189,
      "grad_norm": 3.46875,
      "learning_rate": 2.1465788151730744e-05,
      "loss": 1.075,
      "step": 36750
    },
    {
      "epoch": 0.12883463534821452,
      "grad_norm": 3.578125,
      "learning_rate": 2.147162934146427e-05,
      "loss": 0.9649,
      "step": 36760
    },
    {
      "epoch": 0.1288696828551101,
      "grad_norm": 3.59375,
      "learning_rate": 2.1477470531197797e-05,
      "loss": 1.0446,
      "step": 36770
    },
    {
      "epoch": 0.1289047303620057,
      "grad_norm": 3.3125,
      "learning_rate": 2.1483311720931322e-05,
      "loss": 1.0058,
      "step": 36780
    },
    {
      "epoch": 0.1289397778689013,
      "grad_norm": 3.265625,
      "learning_rate": 2.1489152910664843e-05,
      "loss": 1.0487,
      "step": 36790
    },
    {
      "epoch": 0.12897482537579688,
      "grad_norm": 3.203125,
      "learning_rate": 2.1494994100398368e-05,
      "loss": 0.9858,
      "step": 36800
    },
    {
      "epoch": 0.1290098728826925,
      "grad_norm": 3.53125,
      "learning_rate": 2.1500835290131897e-05,
      "loss": 0.9831,
      "step": 36810
    },
    {
      "epoch": 0.1290449203895881,
      "grad_norm": 3.5,
      "learning_rate": 2.150667647986542e-05,
      "loss": 1.0483,
      "step": 36820
    },
    {
      "epoch": 0.12907996789648368,
      "grad_norm": 3.546875,
      "learning_rate": 2.1512517669598943e-05,
      "loss": 1.0798,
      "step": 36830
    },
    {
      "epoch": 0.12911501540337927,
      "grad_norm": 3.265625,
      "learning_rate": 2.1518358859332468e-05,
      "loss": 1.0365,
      "step": 36840
    },
    {
      "epoch": 0.12915006291027487,
      "grad_norm": 3.59375,
      "learning_rate": 2.1524200049065996e-05,
      "loss": 1.0156,
      "step": 36850
    },
    {
      "epoch": 0.12918511041717048,
      "grad_norm": 3.390625,
      "learning_rate": 2.153004123879952e-05,
      "loss": 0.943,
      "step": 36860
    },
    {
      "epoch": 0.12922015792406608,
      "grad_norm": 3.40625,
      "learning_rate": 2.1535882428533046e-05,
      "loss": 0.9412,
      "step": 36870
    },
    {
      "epoch": 0.12925520543096167,
      "grad_norm": 2.90625,
      "learning_rate": 2.1541723618266567e-05,
      "loss": 0.9561,
      "step": 36880
    },
    {
      "epoch": 0.12929025293785726,
      "grad_norm": 3.375,
      "learning_rate": 2.1547564808000095e-05,
      "loss": 1.0451,
      "step": 36890
    },
    {
      "epoch": 0.12932530044475285,
      "grad_norm": 3.21875,
      "learning_rate": 2.155340599773362e-05,
      "loss": 0.907,
      "step": 36900
    },
    {
      "epoch": 0.12936034795164847,
      "grad_norm": 3.859375,
      "learning_rate": 2.1559247187467145e-05,
      "loss": 1.0093,
      "step": 36910
    },
    {
      "epoch": 0.12939539545854406,
      "grad_norm": 3.34375,
      "learning_rate": 2.1565088377200666e-05,
      "loss": 0.9427,
      "step": 36920
    },
    {
      "epoch": 0.12943044296543965,
      "grad_norm": 3.734375,
      "learning_rate": 2.1570929566934195e-05,
      "loss": 1.03,
      "step": 36930
    },
    {
      "epoch": 0.12946549047233524,
      "grad_norm": 3.5625,
      "learning_rate": 2.157677075666772e-05,
      "loss": 0.985,
      "step": 36940
    },
    {
      "epoch": 0.12950053797923083,
      "grad_norm": 3.375,
      "learning_rate": 2.1582611946401244e-05,
      "loss": 1.0694,
      "step": 36950
    },
    {
      "epoch": 0.12953558548612645,
      "grad_norm": 3.0,
      "learning_rate": 2.158845313613477e-05,
      "loss": 0.9932,
      "step": 36960
    },
    {
      "epoch": 0.12957063299302204,
      "grad_norm": 3.578125,
      "learning_rate": 2.1594294325868294e-05,
      "loss": 1.1077,
      "step": 36970
    },
    {
      "epoch": 0.12960568049991764,
      "grad_norm": 2.828125,
      "learning_rate": 2.160013551560182e-05,
      "loss": 1.0198,
      "step": 36980
    },
    {
      "epoch": 0.12964072800681323,
      "grad_norm": 3.03125,
      "learning_rate": 2.1605976705335344e-05,
      "loss": 0.9642,
      "step": 36990
    },
    {
      "epoch": 0.12967577551370882,
      "grad_norm": 3.421875,
      "learning_rate": 2.161181789506887e-05,
      "loss": 1.1193,
      "step": 37000
    },
    {
      "epoch": 0.12971082302060444,
      "grad_norm": 3.34375,
      "learning_rate": 2.1617659084802393e-05,
      "loss": 1.0403,
      "step": 37010
    },
    {
      "epoch": 0.12974587052750003,
      "grad_norm": 2.859375,
      "learning_rate": 2.1623500274535918e-05,
      "loss": 1.0403,
      "step": 37020
    },
    {
      "epoch": 0.12978091803439562,
      "grad_norm": 3.140625,
      "learning_rate": 2.1629341464269443e-05,
      "loss": 0.9952,
      "step": 37030
    },
    {
      "epoch": 0.1298159655412912,
      "grad_norm": 3.53125,
      "learning_rate": 2.1635182654002968e-05,
      "loss": 0.9861,
      "step": 37040
    },
    {
      "epoch": 0.12985101304818683,
      "grad_norm": 3.15625,
      "learning_rate": 2.1641023843736493e-05,
      "loss": 1.1239,
      "step": 37050
    },
    {
      "epoch": 0.12988606055508242,
      "grad_norm": 3.421875,
      "learning_rate": 2.1646865033470018e-05,
      "loss": 1.0331,
      "step": 37060
    },
    {
      "epoch": 0.129921108061978,
      "grad_norm": 3.28125,
      "learning_rate": 2.1652706223203542e-05,
      "loss": 0.9965,
      "step": 37070
    },
    {
      "epoch": 0.1299561555688736,
      "grad_norm": 3.328125,
      "learning_rate": 2.165854741293707e-05,
      "loss": 0.965,
      "step": 37080
    },
    {
      "epoch": 0.1299912030757692,
      "grad_norm": 3.34375,
      "learning_rate": 2.1664388602670592e-05,
      "loss": 1.0125,
      "step": 37090
    },
    {
      "epoch": 0.13002625058266482,
      "grad_norm": 3.71875,
      "learning_rate": 2.1670229792404117e-05,
      "loss": 1.0934,
      "step": 37100
    },
    {
      "epoch": 0.1300612980895604,
      "grad_norm": 3.40625,
      "learning_rate": 2.1676070982137642e-05,
      "loss": 1.0026,
      "step": 37110
    },
    {
      "epoch": 0.130096345596456,
      "grad_norm": 3.09375,
      "learning_rate": 2.168191217187117e-05,
      "loss": 1.0047,
      "step": 37120
    },
    {
      "epoch": 0.1301313931033516,
      "grad_norm": 3.171875,
      "learning_rate": 2.168775336160469e-05,
      "loss": 0.9778,
      "step": 37130
    },
    {
      "epoch": 0.13016644061024718,
      "grad_norm": 3.6875,
      "learning_rate": 2.1693594551338216e-05,
      "loss": 0.9625,
      "step": 37140
    },
    {
      "epoch": 0.1302014881171428,
      "grad_norm": 3.328125,
      "learning_rate": 2.169943574107174e-05,
      "loss": 0.9495,
      "step": 37150
    },
    {
      "epoch": 0.1302365356240384,
      "grad_norm": 3.28125,
      "learning_rate": 2.170527693080527e-05,
      "loss": 1.1408,
      "step": 37160
    },
    {
      "epoch": 0.13027158313093398,
      "grad_norm": 3.28125,
      "learning_rate": 2.171111812053879e-05,
      "loss": 1.0046,
      "step": 37170
    },
    {
      "epoch": 0.13030663063782957,
      "grad_norm": 3.4375,
      "learning_rate": 2.1716959310272316e-05,
      "loss": 0.9991,
      "step": 37180
    },
    {
      "epoch": 0.13034167814472516,
      "grad_norm": 3.3125,
      "learning_rate": 2.172280050000584e-05,
      "loss": 0.9473,
      "step": 37190
    },
    {
      "epoch": 0.13037672565162078,
      "grad_norm": 3.09375,
      "learning_rate": 2.172864168973937e-05,
      "loss": 0.9935,
      "step": 37200
    },
    {
      "epoch": 0.13041177315851638,
      "grad_norm": 3.390625,
      "learning_rate": 2.1734482879472894e-05,
      "loss": 1.0122,
      "step": 37210
    },
    {
      "epoch": 0.13044682066541197,
      "grad_norm": 3.515625,
      "learning_rate": 2.1740324069206415e-05,
      "loss": 0.9561,
      "step": 37220
    },
    {
      "epoch": 0.13048186817230756,
      "grad_norm": 3.359375,
      "learning_rate": 2.174616525893994e-05,
      "loss": 1.0685,
      "step": 37230
    },
    {
      "epoch": 0.13051691567920315,
      "grad_norm": 3.203125,
      "learning_rate": 2.1752006448673468e-05,
      "loss": 0.9468,
      "step": 37240
    },
    {
      "epoch": 0.13055196318609877,
      "grad_norm": 3.359375,
      "learning_rate": 2.1757847638406993e-05,
      "loss": 1.0786,
      "step": 37250
    },
    {
      "epoch": 0.13058701069299436,
      "grad_norm": 3.28125,
      "learning_rate": 2.1763688828140515e-05,
      "loss": 1.0244,
      "step": 37260
    },
    {
      "epoch": 0.13062205819988995,
      "grad_norm": 3.359375,
      "learning_rate": 2.1769530017874043e-05,
      "loss": 0.9984,
      "step": 37270
    },
    {
      "epoch": 0.13065710570678554,
      "grad_norm": 3.4375,
      "learning_rate": 2.1775371207607568e-05,
      "loss": 1.0302,
      "step": 37280
    },
    {
      "epoch": 0.13069215321368113,
      "grad_norm": 3.734375,
      "learning_rate": 2.1781212397341092e-05,
      "loss": 0.9295,
      "step": 37290
    },
    {
      "epoch": 0.13072720072057675,
      "grad_norm": 3.703125,
      "learning_rate": 2.1787053587074617e-05,
      "loss": 1.0162,
      "step": 37300
    },
    {
      "epoch": 0.13076224822747234,
      "grad_norm": 3.78125,
      "learning_rate": 2.1792894776808142e-05,
      "loss": 1.0821,
      "step": 37310
    },
    {
      "epoch": 0.13079729573436794,
      "grad_norm": 3.15625,
      "learning_rate": 2.1798735966541667e-05,
      "loss": 0.9448,
      "step": 37320
    },
    {
      "epoch": 0.13083234324126353,
      "grad_norm": 3.53125,
      "learning_rate": 2.1804577156275192e-05,
      "loss": 1.0041,
      "step": 37330
    },
    {
      "epoch": 0.13086739074815912,
      "grad_norm": 3.359375,
      "learning_rate": 2.1810418346008717e-05,
      "loss": 1.0285,
      "step": 37340
    },
    {
      "epoch": 0.13090243825505474,
      "grad_norm": 3.625,
      "learning_rate": 2.181625953574224e-05,
      "loss": 1.0242,
      "step": 37350
    },
    {
      "epoch": 0.13093748576195033,
      "grad_norm": 3.59375,
      "learning_rate": 2.1822100725475766e-05,
      "loss": 0.9814,
      "step": 37360
    },
    {
      "epoch": 0.13097253326884592,
      "grad_norm": 3.34375,
      "learning_rate": 2.182794191520929e-05,
      "loss": 1.0332,
      "step": 37370
    },
    {
      "epoch": 0.1310075807757415,
      "grad_norm": 3.59375,
      "learning_rate": 2.1833783104942816e-05,
      "loss": 0.9978,
      "step": 37380
    },
    {
      "epoch": 0.1310426282826371,
      "grad_norm": 3.265625,
      "learning_rate": 2.183962429467634e-05,
      "loss": 0.9891,
      "step": 37390
    },
    {
      "epoch": 0.13107767578953272,
      "grad_norm": 3.125,
      "learning_rate": 2.1845465484409866e-05,
      "loss": 1.0273,
      "step": 37400
    },
    {
      "epoch": 0.1311127232964283,
      "grad_norm": 3.6875,
      "learning_rate": 2.185130667414339e-05,
      "loss": 1.0012,
      "step": 37410
    },
    {
      "epoch": 0.1311477708033239,
      "grad_norm": 3.359375,
      "learning_rate": 2.1857147863876915e-05,
      "loss": 1.0133,
      "step": 37420
    },
    {
      "epoch": 0.1311828183102195,
      "grad_norm": 3.5,
      "learning_rate": 2.186298905361044e-05,
      "loss": 0.9301,
      "step": 37430
    },
    {
      "epoch": 0.1312178658171151,
      "grad_norm": 3.28125,
      "learning_rate": 2.1868830243343965e-05,
      "loss": 1.0322,
      "step": 37440
    },
    {
      "epoch": 0.1312529133240107,
      "grad_norm": 3.203125,
      "learning_rate": 2.187467143307749e-05,
      "loss": 0.9999,
      "step": 37450
    },
    {
      "epoch": 0.1312879608309063,
      "grad_norm": 3.203125,
      "learning_rate": 2.1880512622811015e-05,
      "loss": 0.9918,
      "step": 37460
    },
    {
      "epoch": 0.1313230083378019,
      "grad_norm": 3.375,
      "learning_rate": 2.188635381254454e-05,
      "loss": 1.0277,
      "step": 37470
    },
    {
      "epoch": 0.13135805584469748,
      "grad_norm": 4.1875,
      "learning_rate": 2.1892195002278064e-05,
      "loss": 1.0033,
      "step": 37480
    },
    {
      "epoch": 0.13139310335159307,
      "grad_norm": 3.4375,
      "learning_rate": 2.189803619201159e-05,
      "loss": 1.0153,
      "step": 37490
    },
    {
      "epoch": 0.1314281508584887,
      "grad_norm": 3.53125,
      "learning_rate": 2.1903877381745114e-05,
      "loss": 1.1086,
      "step": 37500
    },
    {
      "epoch": 0.13146319836538428,
      "grad_norm": 3.484375,
      "learning_rate": 2.190971857147864e-05,
      "loss": 1.0394,
      "step": 37510
    },
    {
      "epoch": 0.13149824587227987,
      "grad_norm": 3.515625,
      "learning_rate": 2.1915559761212164e-05,
      "loss": 1.1168,
      "step": 37520
    },
    {
      "epoch": 0.13153329337917546,
      "grad_norm": 3.234375,
      "learning_rate": 2.192140095094569e-05,
      "loss": 1.0319,
      "step": 37530
    },
    {
      "epoch": 0.13156834088607106,
      "grad_norm": 3.203125,
      "learning_rate": 2.1927242140679214e-05,
      "loss": 1.0234,
      "step": 37540
    },
    {
      "epoch": 0.13160338839296667,
      "grad_norm": 3.5,
      "learning_rate": 2.1933083330412742e-05,
      "loss": 1.0333,
      "step": 37550
    },
    {
      "epoch": 0.13163843589986227,
      "grad_norm": 3.671875,
      "learning_rate": 2.1938924520146263e-05,
      "loss": 1.0264,
      "step": 37560
    },
    {
      "epoch": 0.13167348340675786,
      "grad_norm": 3.390625,
      "learning_rate": 2.1944765709879788e-05,
      "loss": 1.0161,
      "step": 37570
    },
    {
      "epoch": 0.13170853091365345,
      "grad_norm": 3.078125,
      "learning_rate": 2.1950606899613313e-05,
      "loss": 1.0485,
      "step": 37580
    },
    {
      "epoch": 0.13174357842054907,
      "grad_norm": 3.453125,
      "learning_rate": 2.195644808934684e-05,
      "loss": 1.0652,
      "step": 37590
    },
    {
      "epoch": 0.13177862592744466,
      "grad_norm": 3.453125,
      "learning_rate": 2.1962289279080363e-05,
      "loss": 0.9414,
      "step": 37600
    },
    {
      "epoch": 0.13181367343434025,
      "grad_norm": 3.171875,
      "learning_rate": 2.1968130468813887e-05,
      "loss": 1.0252,
      "step": 37610
    },
    {
      "epoch": 0.13184872094123584,
      "grad_norm": 3.359375,
      "learning_rate": 2.1973971658547416e-05,
      "loss": 1.0208,
      "step": 37620
    },
    {
      "epoch": 0.13188376844813143,
      "grad_norm": 3.34375,
      "learning_rate": 2.197981284828094e-05,
      "loss": 1.0653,
      "step": 37630
    },
    {
      "epoch": 0.13191881595502705,
      "grad_norm": 3.515625,
      "learning_rate": 2.1985654038014465e-05,
      "loss": 1.0034,
      "step": 37640
    },
    {
      "epoch": 0.13195386346192264,
      "grad_norm": 3.71875,
      "learning_rate": 2.1991495227747987e-05,
      "loss": 1.0023,
      "step": 37650
    },
    {
      "epoch": 0.13198891096881823,
      "grad_norm": 3.53125,
      "learning_rate": 2.1997336417481515e-05,
      "loss": 0.9845,
      "step": 37660
    },
    {
      "epoch": 0.13202395847571383,
      "grad_norm": 3.71875,
      "learning_rate": 2.200317760721504e-05,
      "loss": 0.9758,
      "step": 37670
    },
    {
      "epoch": 0.13205900598260942,
      "grad_norm": 3.5,
      "learning_rate": 2.2009018796948565e-05,
      "loss": 1.1003,
      "step": 37680
    },
    {
      "epoch": 0.13209405348950504,
      "grad_norm": 3.234375,
      "learning_rate": 2.2014859986682086e-05,
      "loss": 1.0042,
      "step": 37690
    },
    {
      "epoch": 0.13212910099640063,
      "grad_norm": 3.34375,
      "learning_rate": 2.2020701176415614e-05,
      "loss": 1.0586,
      "step": 37700
    },
    {
      "epoch": 0.13216414850329622,
      "grad_norm": 3.296875,
      "learning_rate": 2.202654236614914e-05,
      "loss": 1.0399,
      "step": 37710
    },
    {
      "epoch": 0.1321991960101918,
      "grad_norm": 3.21875,
      "learning_rate": 2.2032383555882664e-05,
      "loss": 0.989,
      "step": 37720
    },
    {
      "epoch": 0.1322342435170874,
      "grad_norm": 3.15625,
      "learning_rate": 2.2038224745616186e-05,
      "loss": 0.9323,
      "step": 37730
    },
    {
      "epoch": 0.13226929102398302,
      "grad_norm": 2.953125,
      "learning_rate": 2.2044065935349714e-05,
      "loss": 1.087,
      "step": 37740
    },
    {
      "epoch": 0.1323043385308786,
      "grad_norm": 2.9375,
      "learning_rate": 2.204990712508324e-05,
      "loss": 0.9408,
      "step": 37750
    },
    {
      "epoch": 0.1323393860377742,
      "grad_norm": 3.59375,
      "learning_rate": 2.2055748314816764e-05,
      "loss": 1.0547,
      "step": 37760
    },
    {
      "epoch": 0.1323744335446698,
      "grad_norm": 3.625,
      "learning_rate": 2.206158950455029e-05,
      "loss": 1.0108,
      "step": 37770
    },
    {
      "epoch": 0.1324094810515654,
      "grad_norm": 3.3125,
      "learning_rate": 2.2067430694283813e-05,
      "loss": 1.0951,
      "step": 37780
    },
    {
      "epoch": 0.132444528558461,
      "grad_norm": 3.59375,
      "learning_rate": 2.2073271884017338e-05,
      "loss": 1.0041,
      "step": 37790
    },
    {
      "epoch": 0.1324795760653566,
      "grad_norm": 3.640625,
      "learning_rate": 2.2079113073750863e-05,
      "loss": 1.002,
      "step": 37800
    },
    {
      "epoch": 0.1325146235722522,
      "grad_norm": 3.0,
      "learning_rate": 2.2084954263484388e-05,
      "loss": 0.9908,
      "step": 37810
    },
    {
      "epoch": 0.13254967107914778,
      "grad_norm": 3.234375,
      "learning_rate": 2.2090795453217913e-05,
      "loss": 1.0174,
      "step": 37820
    },
    {
      "epoch": 0.13258471858604337,
      "grad_norm": 3.8125,
      "learning_rate": 2.2096636642951437e-05,
      "loss": 1.102,
      "step": 37830
    },
    {
      "epoch": 0.132619766092939,
      "grad_norm": 3.546875,
      "learning_rate": 2.2102477832684962e-05,
      "loss": 1.0291,
      "step": 37840
    },
    {
      "epoch": 0.13265481359983458,
      "grad_norm": 3.171875,
      "learning_rate": 2.2108319022418487e-05,
      "loss": 1.0042,
      "step": 37850
    },
    {
      "epoch": 0.13268986110673017,
      "grad_norm": 3.359375,
      "learning_rate": 2.2114160212152012e-05,
      "loss": 0.9917,
      "step": 37860
    },
    {
      "epoch": 0.13272490861362576,
      "grad_norm": 3.0,
      "learning_rate": 2.2120001401885537e-05,
      "loss": 0.9645,
      "step": 37870
    },
    {
      "epoch": 0.13275995612052136,
      "grad_norm": 3.078125,
      "learning_rate": 2.212584259161906e-05,
      "loss": 0.997,
      "step": 37880
    },
    {
      "epoch": 0.13279500362741697,
      "grad_norm": 3.359375,
      "learning_rate": 2.2131683781352587e-05,
      "loss": 0.9675,
      "step": 37890
    },
    {
      "epoch": 0.13283005113431257,
      "grad_norm": 3.40625,
      "learning_rate": 2.213752497108611e-05,
      "loss": 1.0677,
      "step": 37900
    },
    {
      "epoch": 0.13286509864120816,
      "grad_norm": 3.59375,
      "learning_rate": 2.2143366160819636e-05,
      "loss": 0.9763,
      "step": 37910
    },
    {
      "epoch": 0.13290014614810375,
      "grad_norm": 3.6875,
      "learning_rate": 2.214920735055316e-05,
      "loss": 1.0958,
      "step": 37920
    },
    {
      "epoch": 0.13293519365499934,
      "grad_norm": 3.546875,
      "learning_rate": 2.2155048540286686e-05,
      "loss": 1.0181,
      "step": 37930
    },
    {
      "epoch": 0.13297024116189496,
      "grad_norm": 3.703125,
      "learning_rate": 2.216088973002021e-05,
      "loss": 0.9377,
      "step": 37940
    },
    {
      "epoch": 0.13300528866879055,
      "grad_norm": 3.234375,
      "learning_rate": 2.2166730919753736e-05,
      "loss": 0.9247,
      "step": 37950
    },
    {
      "epoch": 0.13304033617568614,
      "grad_norm": 3.65625,
      "learning_rate": 2.217257210948726e-05,
      "loss": 0.964,
      "step": 37960
    },
    {
      "epoch": 0.13307538368258173,
      "grad_norm": 3.078125,
      "learning_rate": 2.217841329922079e-05,
      "loss": 0.986,
      "step": 37970
    },
    {
      "epoch": 0.13311043118947732,
      "grad_norm": 3.046875,
      "learning_rate": 2.2184254488954313e-05,
      "loss": 0.9484,
      "step": 37980
    },
    {
      "epoch": 0.13314547869637294,
      "grad_norm": 3.109375,
      "learning_rate": 2.2190095678687835e-05,
      "loss": 1.022,
      "step": 37990
    },
    {
      "epoch": 0.13318052620326853,
      "grad_norm": 3.8125,
      "learning_rate": 2.219593686842136e-05,
      "loss": 1.0354,
      "step": 38000
    },
    {
      "epoch": 0.13321557371016413,
      "grad_norm": 3.421875,
      "learning_rate": 2.2201778058154888e-05,
      "loss": 0.9916,
      "step": 38010
    },
    {
      "epoch": 0.13325062121705972,
      "grad_norm": 3.484375,
      "learning_rate": 2.2207619247888413e-05,
      "loss": 1.074,
      "step": 38020
    },
    {
      "epoch": 0.1332856687239553,
      "grad_norm": 3.375,
      "learning_rate": 2.2213460437621934e-05,
      "loss": 1.0551,
      "step": 38030
    },
    {
      "epoch": 0.13332071623085093,
      "grad_norm": 3.296875,
      "learning_rate": 2.221930162735546e-05,
      "loss": 1.0072,
      "step": 38040
    },
    {
      "epoch": 0.13335576373774652,
      "grad_norm": 3.328125,
      "learning_rate": 2.2225142817088987e-05,
      "loss": 1.033,
      "step": 38050
    },
    {
      "epoch": 0.1333908112446421,
      "grad_norm": 3.46875,
      "learning_rate": 2.2230984006822512e-05,
      "loss": 0.9783,
      "step": 38060
    },
    {
      "epoch": 0.1334258587515377,
      "grad_norm": 3.15625,
      "learning_rate": 2.2236825196556034e-05,
      "loss": 0.9912,
      "step": 38070
    },
    {
      "epoch": 0.1334609062584333,
      "grad_norm": 2.90625,
      "learning_rate": 2.224266638628956e-05,
      "loss": 1.0408,
      "step": 38080
    },
    {
      "epoch": 0.1334959537653289,
      "grad_norm": 3.125,
      "learning_rate": 2.2248507576023087e-05,
      "loss": 1.0719,
      "step": 38090
    },
    {
      "epoch": 0.1335310012722245,
      "grad_norm": 3.328125,
      "learning_rate": 2.225434876575661e-05,
      "loss": 1.0563,
      "step": 38100
    },
    {
      "epoch": 0.1335660487791201,
      "grad_norm": 3.390625,
      "learning_rate": 2.2260189955490136e-05,
      "loss": 1.0068,
      "step": 38110
    },
    {
      "epoch": 0.13360109628601569,
      "grad_norm": 3.28125,
      "learning_rate": 2.2266031145223658e-05,
      "loss": 1.0224,
      "step": 38120
    },
    {
      "epoch": 0.1336361437929113,
      "grad_norm": 3.34375,
      "learning_rate": 2.2271872334957186e-05,
      "loss": 1.0718,
      "step": 38130
    },
    {
      "epoch": 0.1336711912998069,
      "grad_norm": 2.859375,
      "learning_rate": 2.227771352469071e-05,
      "loss": 1.0022,
      "step": 38140
    },
    {
      "epoch": 0.1337062388067025,
      "grad_norm": 3.328125,
      "learning_rate": 2.2283554714424236e-05,
      "loss": 1.0468,
      "step": 38150
    },
    {
      "epoch": 0.13374128631359808,
      "grad_norm": 3.296875,
      "learning_rate": 2.2289395904157757e-05,
      "loss": 1.0092,
      "step": 38160
    },
    {
      "epoch": 0.13377633382049367,
      "grad_norm": 3.203125,
      "learning_rate": 2.2295237093891286e-05,
      "loss": 1.0081,
      "step": 38170
    },
    {
      "epoch": 0.1338113813273893,
      "grad_norm": 3.109375,
      "learning_rate": 2.230107828362481e-05,
      "loss": 0.9342,
      "step": 38180
    },
    {
      "epoch": 0.13384642883428488,
      "grad_norm": 3.46875,
      "learning_rate": 2.2306919473358335e-05,
      "loss": 0.9924,
      "step": 38190
    },
    {
      "epoch": 0.13388147634118047,
      "grad_norm": 3.3125,
      "learning_rate": 2.231276066309186e-05,
      "loss": 1.0479,
      "step": 38200
    },
    {
      "epoch": 0.13391652384807606,
      "grad_norm": 3.484375,
      "learning_rate": 2.2318601852825385e-05,
      "loss": 1.0071,
      "step": 38210
    },
    {
      "epoch": 0.13395157135497165,
      "grad_norm": 3.75,
      "learning_rate": 2.232444304255891e-05,
      "loss": 1.0529,
      "step": 38220
    },
    {
      "epoch": 0.13398661886186727,
      "grad_norm": 3.578125,
      "learning_rate": 2.2330284232292435e-05,
      "loss": 1.0439,
      "step": 38230
    },
    {
      "epoch": 0.13402166636876287,
      "grad_norm": 3.65625,
      "learning_rate": 2.233612542202596e-05,
      "loss": 1.043,
      "step": 38240
    },
    {
      "epoch": 0.13405671387565846,
      "grad_norm": 3.0625,
      "learning_rate": 2.2341966611759484e-05,
      "loss": 0.9837,
      "step": 38250
    },
    {
      "epoch": 0.13409176138255405,
      "grad_norm": 3.0,
      "learning_rate": 2.234780780149301e-05,
      "loss": 0.985,
      "step": 38260
    },
    {
      "epoch": 0.13412680888944964,
      "grad_norm": 3.046875,
      "learning_rate": 2.2353648991226534e-05,
      "loss": 1.0254,
      "step": 38270
    },
    {
      "epoch": 0.13416185639634526,
      "grad_norm": 3.671875,
      "learning_rate": 2.235949018096006e-05,
      "loss": 1.0182,
      "step": 38280
    },
    {
      "epoch": 0.13419690390324085,
      "grad_norm": 3.703125,
      "learning_rate": 2.2365331370693584e-05,
      "loss": 1.0471,
      "step": 38290
    },
    {
      "epoch": 0.13423195141013644,
      "grad_norm": 3.40625,
      "learning_rate": 2.237117256042711e-05,
      "loss": 1.06,
      "step": 38300
    },
    {
      "epoch": 0.13426699891703203,
      "grad_norm": 3.1875,
      "learning_rate": 2.2377013750160633e-05,
      "loss": 0.9723,
      "step": 38310
    },
    {
      "epoch": 0.13430204642392762,
      "grad_norm": 3.21875,
      "learning_rate": 2.238285493989416e-05,
      "loss": 0.9467,
      "step": 38320
    },
    {
      "epoch": 0.13433709393082324,
      "grad_norm": 3.375,
      "learning_rate": 2.2388696129627683e-05,
      "loss": 1.067,
      "step": 38330
    },
    {
      "epoch": 0.13437214143771883,
      "grad_norm": 3.40625,
      "learning_rate": 2.2394537319361208e-05,
      "loss": 1.1298,
      "step": 38340
    },
    {
      "epoch": 0.13440718894461443,
      "grad_norm": 3.28125,
      "learning_rate": 2.2400378509094733e-05,
      "loss": 1.0002,
      "step": 38350
    },
    {
      "epoch": 0.13444223645151002,
      "grad_norm": 3.109375,
      "learning_rate": 2.240621969882826e-05,
      "loss": 0.9801,
      "step": 38360
    },
    {
      "epoch": 0.1344772839584056,
      "grad_norm": 3.671875,
      "learning_rate": 2.2412060888561782e-05,
      "loss": 1.0013,
      "step": 38370
    },
    {
      "epoch": 0.13451233146530123,
      "grad_norm": 3.140625,
      "learning_rate": 2.2417902078295307e-05,
      "loss": 0.9721,
      "step": 38380
    },
    {
      "epoch": 0.13454737897219682,
      "grad_norm": 3.453125,
      "learning_rate": 2.2423743268028832e-05,
      "loss": 1.0768,
      "step": 38390
    },
    {
      "epoch": 0.1345824264790924,
      "grad_norm": 3.359375,
      "learning_rate": 2.242958445776236e-05,
      "loss": 0.9567,
      "step": 38400
    },
    {
      "epoch": 0.134617473985988,
      "grad_norm": 3.359375,
      "learning_rate": 2.2435425647495882e-05,
      "loss": 1.0203,
      "step": 38410
    },
    {
      "epoch": 0.1346525214928836,
      "grad_norm": 3.0625,
      "learning_rate": 2.2441266837229407e-05,
      "loss": 1.0772,
      "step": 38420
    },
    {
      "epoch": 0.1346875689997792,
      "grad_norm": 3.265625,
      "learning_rate": 2.244710802696293e-05,
      "loss": 0.9316,
      "step": 38430
    },
    {
      "epoch": 0.1347226165066748,
      "grad_norm": 3.265625,
      "learning_rate": 2.245294921669646e-05,
      "loss": 1.0298,
      "step": 38440
    },
    {
      "epoch": 0.1347576640135704,
      "grad_norm": 3.234375,
      "learning_rate": 2.2458790406429985e-05,
      "loss": 0.981,
      "step": 38450
    },
    {
      "epoch": 0.13479271152046599,
      "grad_norm": 2.796875,
      "learning_rate": 2.2464631596163506e-05,
      "loss": 1.0119,
      "step": 38460
    },
    {
      "epoch": 0.13482775902736158,
      "grad_norm": 3.109375,
      "learning_rate": 2.247047278589703e-05,
      "loss": 0.9764,
      "step": 38470
    },
    {
      "epoch": 0.1348628065342572,
      "grad_norm": 3.5625,
      "learning_rate": 2.247631397563056e-05,
      "loss": 1.0659,
      "step": 38480
    },
    {
      "epoch": 0.1348978540411528,
      "grad_norm": 3.453125,
      "learning_rate": 2.2482155165364084e-05,
      "loss": 1.0115,
      "step": 38490
    },
    {
      "epoch": 0.13493290154804838,
      "grad_norm": 3.328125,
      "learning_rate": 2.2487996355097605e-05,
      "loss": 1.0623,
      "step": 38500
    },
    {
      "epoch": 0.13496794905494397,
      "grad_norm": 3.078125,
      "learning_rate": 2.249383754483113e-05,
      "loss": 0.9758,
      "step": 38510
    },
    {
      "epoch": 0.13500299656183956,
      "grad_norm": 3.046875,
      "learning_rate": 2.249967873456466e-05,
      "loss": 1.0081,
      "step": 38520
    },
    {
      "epoch": 0.13503804406873518,
      "grad_norm": 3.140625,
      "learning_rate": 2.2505519924298183e-05,
      "loss": 0.9752,
      "step": 38530
    },
    {
      "epoch": 0.13507309157563077,
      "grad_norm": 3.28125,
      "learning_rate": 2.2511361114031708e-05,
      "loss": 1.0958,
      "step": 38540
    },
    {
      "epoch": 0.13510813908252636,
      "grad_norm": 3.015625,
      "learning_rate": 2.251720230376523e-05,
      "loss": 1.0523,
      "step": 38550
    },
    {
      "epoch": 0.13514318658942195,
      "grad_norm": 3.359375,
      "learning_rate": 2.2523043493498758e-05,
      "loss": 0.9978,
      "step": 38560
    },
    {
      "epoch": 0.13517823409631755,
      "grad_norm": 3.578125,
      "learning_rate": 2.2528884683232283e-05,
      "loss": 0.9377,
      "step": 38570
    },
    {
      "epoch": 0.13521328160321316,
      "grad_norm": 3.265625,
      "learning_rate": 2.2534725872965808e-05,
      "loss": 1.0078,
      "step": 38580
    },
    {
      "epoch": 0.13524832911010876,
      "grad_norm": 3.28125,
      "learning_rate": 2.254056706269933e-05,
      "loss": 1.0057,
      "step": 38590
    },
    {
      "epoch": 0.13528337661700435,
      "grad_norm": 3.609375,
      "learning_rate": 2.2546408252432857e-05,
      "loss": 1.0367,
      "step": 38600
    },
    {
      "epoch": 0.13531842412389994,
      "grad_norm": 3.40625,
      "learning_rate": 2.2552249442166382e-05,
      "loss": 1.0572,
      "step": 38610
    },
    {
      "epoch": 0.13535347163079553,
      "grad_norm": 3.5,
      "learning_rate": 2.2558090631899907e-05,
      "loss": 1.077,
      "step": 38620
    },
    {
      "epoch": 0.13538851913769115,
      "grad_norm": 3.09375,
      "learning_rate": 2.256393182163343e-05,
      "loss": 1.0314,
      "step": 38630
    },
    {
      "epoch": 0.13542356664458674,
      "grad_norm": 3.25,
      "learning_rate": 2.2569773011366957e-05,
      "loss": 1.0313,
      "step": 38640
    },
    {
      "epoch": 0.13545861415148233,
      "grad_norm": 3.28125,
      "learning_rate": 2.257561420110048e-05,
      "loss": 1.0265,
      "step": 38650
    },
    {
      "epoch": 0.13549366165837792,
      "grad_norm": 3.03125,
      "learning_rate": 2.2581455390834006e-05,
      "loss": 0.9369,
      "step": 38660
    },
    {
      "epoch": 0.13552870916527354,
      "grad_norm": 3.09375,
      "learning_rate": 2.258729658056753e-05,
      "loss": 0.9974,
      "step": 38670
    },
    {
      "epoch": 0.13556375667216913,
      "grad_norm": 3.171875,
      "learning_rate": 2.2593137770301056e-05,
      "loss": 0.9582,
      "step": 38680
    },
    {
      "epoch": 0.13559880417906472,
      "grad_norm": 3.3125,
      "learning_rate": 2.259897896003458e-05,
      "loss": 0.9835,
      "step": 38690
    },
    {
      "epoch": 0.13563385168596032,
      "grad_norm": 3.125,
      "learning_rate": 2.2604820149768106e-05,
      "loss": 1.012,
      "step": 38700
    },
    {
      "epoch": 0.1356688991928559,
      "grad_norm": 2.875,
      "learning_rate": 2.261066133950163e-05,
      "loss": 1.0556,
      "step": 38710
    },
    {
      "epoch": 0.13570394669975153,
      "grad_norm": 3.34375,
      "learning_rate": 2.2616502529235155e-05,
      "loss": 1.0308,
      "step": 38720
    },
    {
      "epoch": 0.13573899420664712,
      "grad_norm": 3.421875,
      "learning_rate": 2.262234371896868e-05,
      "loss": 1.0557,
      "step": 38730
    },
    {
      "epoch": 0.1357740417135427,
      "grad_norm": 3.1875,
      "learning_rate": 2.2628184908702205e-05,
      "loss": 0.9911,
      "step": 38740
    },
    {
      "epoch": 0.1358090892204383,
      "grad_norm": 3.234375,
      "learning_rate": 2.2634026098435733e-05,
      "loss": 1.029,
      "step": 38750
    },
    {
      "epoch": 0.1358441367273339,
      "grad_norm": 3.3125,
      "learning_rate": 2.2639867288169255e-05,
      "loss": 0.9646,
      "step": 38760
    },
    {
      "epoch": 0.1358791842342295,
      "grad_norm": 3.109375,
      "learning_rate": 2.264570847790278e-05,
      "loss": 0.9959,
      "step": 38770
    },
    {
      "epoch": 0.1359142317411251,
      "grad_norm": 3.703125,
      "learning_rate": 2.2651549667636304e-05,
      "loss": 0.955,
      "step": 38780
    },
    {
      "epoch": 0.1359492792480207,
      "grad_norm": 3.515625,
      "learning_rate": 2.2657390857369833e-05,
      "loss": 1.0065,
      "step": 38790
    },
    {
      "epoch": 0.13598432675491628,
      "grad_norm": 3.0,
      "learning_rate": 2.2663232047103354e-05,
      "loss": 1.0626,
      "step": 38800
    },
    {
      "epoch": 0.13601937426181188,
      "grad_norm": 3.4375,
      "learning_rate": 2.266907323683688e-05,
      "loss": 1.0067,
      "step": 38810
    },
    {
      "epoch": 0.1360544217687075,
      "grad_norm": 3.5,
      "learning_rate": 2.2674914426570404e-05,
      "loss": 0.8965,
      "step": 38820
    },
    {
      "epoch": 0.1360894692756031,
      "grad_norm": 3.296875,
      "learning_rate": 2.2680755616303932e-05,
      "loss": 1.0378,
      "step": 38830
    },
    {
      "epoch": 0.13612451678249868,
      "grad_norm": 3.296875,
      "learning_rate": 2.2686596806037454e-05,
      "loss": 1.0171,
      "step": 38840
    },
    {
      "epoch": 0.13615956428939427,
      "grad_norm": 3.4375,
      "learning_rate": 2.269243799577098e-05,
      "loss": 1.0187,
      "step": 38850
    },
    {
      "epoch": 0.13619461179628986,
      "grad_norm": 3.1875,
      "learning_rate": 2.2698279185504503e-05,
      "loss": 1.0183,
      "step": 38860
    },
    {
      "epoch": 0.13622965930318548,
      "grad_norm": 2.9375,
      "learning_rate": 2.270412037523803e-05,
      "loss": 1.0315,
      "step": 38870
    },
    {
      "epoch": 0.13626470681008107,
      "grad_norm": 3.5,
      "learning_rate": 2.2709961564971556e-05,
      "loss": 0.9886,
      "step": 38880
    },
    {
      "epoch": 0.13629975431697666,
      "grad_norm": 3.6875,
      "learning_rate": 2.2715802754705078e-05,
      "loss": 1.0275,
      "step": 38890
    },
    {
      "epoch": 0.13633480182387225,
      "grad_norm": 3.203125,
      "learning_rate": 2.2721643944438603e-05,
      "loss": 0.9791,
      "step": 38900
    },
    {
      "epoch": 0.13636984933076784,
      "grad_norm": 3.921875,
      "learning_rate": 2.272748513417213e-05,
      "loss": 0.9478,
      "step": 38910
    },
    {
      "epoch": 0.13640489683766346,
      "grad_norm": 3.515625,
      "learning_rate": 2.2733326323905656e-05,
      "loss": 1.0429,
      "step": 38920
    },
    {
      "epoch": 0.13643994434455906,
      "grad_norm": 3.421875,
      "learning_rate": 2.2739167513639177e-05,
      "loss": 0.9467,
      "step": 38930
    },
    {
      "epoch": 0.13647499185145465,
      "grad_norm": 3.265625,
      "learning_rate": 2.2745008703372702e-05,
      "loss": 0.9781,
      "step": 38940
    },
    {
      "epoch": 0.13651003935835024,
      "grad_norm": 3.265625,
      "learning_rate": 2.275084989310623e-05,
      "loss": 0.9943,
      "step": 38950
    },
    {
      "epoch": 0.13654508686524583,
      "grad_norm": 3.296875,
      "learning_rate": 2.2756691082839755e-05,
      "loss": 0.9768,
      "step": 38960
    },
    {
      "epoch": 0.13658013437214145,
      "grad_norm": 3.46875,
      "learning_rate": 2.2762532272573276e-05,
      "loss": 1.0022,
      "step": 38970
    },
    {
      "epoch": 0.13661518187903704,
      "grad_norm": 3.296875,
      "learning_rate": 2.2768373462306805e-05,
      "loss": 1.0437,
      "step": 38980
    },
    {
      "epoch": 0.13665022938593263,
      "grad_norm": 3.0,
      "learning_rate": 2.277421465204033e-05,
      "loss": 1.0269,
      "step": 38990
    },
    {
      "epoch": 0.13668527689282822,
      "grad_norm": 3.21875,
      "learning_rate": 2.2780055841773854e-05,
      "loss": 0.9824,
      "step": 39000
    },
    {
      "epoch": 0.1367203243997238,
      "grad_norm": 2.875,
      "learning_rate": 2.278589703150738e-05,
      "loss": 0.9154,
      "step": 39010
    },
    {
      "epoch": 0.13675537190661943,
      "grad_norm": 3.109375,
      "learning_rate": 2.2791738221240904e-05,
      "loss": 0.9299,
      "step": 39020
    },
    {
      "epoch": 0.13679041941351502,
      "grad_norm": 3.515625,
      "learning_rate": 2.279757941097443e-05,
      "loss": 1.0295,
      "step": 39030
    },
    {
      "epoch": 0.13682546692041062,
      "grad_norm": 2.84375,
      "learning_rate": 2.2803420600707954e-05,
      "loss": 0.9285,
      "step": 39040
    },
    {
      "epoch": 0.1368605144273062,
      "grad_norm": 3.171875,
      "learning_rate": 2.280926179044148e-05,
      "loss": 1.0042,
      "step": 39050
    },
    {
      "epoch": 0.1368955619342018,
      "grad_norm": 2.984375,
      "learning_rate": 2.2815102980175003e-05,
      "loss": 0.9288,
      "step": 39060
    },
    {
      "epoch": 0.13693060944109742,
      "grad_norm": 2.875,
      "learning_rate": 2.282094416990853e-05,
      "loss": 1.0052,
      "step": 39070
    },
    {
      "epoch": 0.136965656947993,
      "grad_norm": 3.328125,
      "learning_rate": 2.2826785359642053e-05,
      "loss": 1.0226,
      "step": 39080
    },
    {
      "epoch": 0.1370007044548886,
      "grad_norm": 3.203125,
      "learning_rate": 2.2832626549375578e-05,
      "loss": 0.9568,
      "step": 39090
    },
    {
      "epoch": 0.1370357519617842,
      "grad_norm": 3.5625,
      "learning_rate": 2.2838467739109103e-05,
      "loss": 1.0554,
      "step": 39100
    },
    {
      "epoch": 0.13707079946867978,
      "grad_norm": 3.359375,
      "learning_rate": 2.2844308928842628e-05,
      "loss": 1.086,
      "step": 39110
    },
    {
      "epoch": 0.1371058469755754,
      "grad_norm": 3.078125,
      "learning_rate": 2.2850150118576153e-05,
      "loss": 0.9532,
      "step": 39120
    },
    {
      "epoch": 0.137140894482471,
      "grad_norm": 3.421875,
      "learning_rate": 2.2855991308309677e-05,
      "loss": 1.0276,
      "step": 39130
    },
    {
      "epoch": 0.13717594198936658,
      "grad_norm": 3.453125,
      "learning_rate": 2.2861832498043202e-05,
      "loss": 1.0096,
      "step": 39140
    },
    {
      "epoch": 0.13721098949626218,
      "grad_norm": 3.59375,
      "learning_rate": 2.2867673687776727e-05,
      "loss": 1.1012,
      "step": 39150
    },
    {
      "epoch": 0.13724603700315777,
      "grad_norm": 4.25,
      "learning_rate": 2.2873514877510252e-05,
      "loss": 1.0805,
      "step": 39160
    },
    {
      "epoch": 0.13728108451005339,
      "grad_norm": 3.265625,
      "learning_rate": 2.2879356067243777e-05,
      "loss": 1.0023,
      "step": 39170
    },
    {
      "epoch": 0.13731613201694898,
      "grad_norm": 3.40625,
      "learning_rate": 2.28851972569773e-05,
      "loss": 1.0581,
      "step": 39180
    },
    {
      "epoch": 0.13735117952384457,
      "grad_norm": 3.984375,
      "learning_rate": 2.2891038446710826e-05,
      "loss": 0.9735,
      "step": 39190
    },
    {
      "epoch": 0.13738622703074016,
      "grad_norm": 3.015625,
      "learning_rate": 2.289687963644435e-05,
      "loss": 0.936,
      "step": 39200
    },
    {
      "epoch": 0.13742127453763578,
      "grad_norm": 3.15625,
      "learning_rate": 2.2902720826177876e-05,
      "loss": 1.0642,
      "step": 39210
    },
    {
      "epoch": 0.13745632204453137,
      "grad_norm": 3.515625,
      "learning_rate": 2.2908562015911404e-05,
      "loss": 1.1262,
      "step": 39220
    },
    {
      "epoch": 0.13749136955142696,
      "grad_norm": 2.765625,
      "learning_rate": 2.2914403205644926e-05,
      "loss": 1.0388,
      "step": 39230
    },
    {
      "epoch": 0.13752641705832255,
      "grad_norm": 2.84375,
      "learning_rate": 2.292024439537845e-05,
      "loss": 0.9248,
      "step": 39240
    },
    {
      "epoch": 0.13756146456521814,
      "grad_norm": 3.203125,
      "learning_rate": 2.2926085585111976e-05,
      "loss": 0.9416,
      "step": 39250
    },
    {
      "epoch": 0.13759651207211376,
      "grad_norm": 3.46875,
      "learning_rate": 2.2931926774845504e-05,
      "loss": 1.0277,
      "step": 39260
    },
    {
      "epoch": 0.13763155957900935,
      "grad_norm": 3.390625,
      "learning_rate": 2.2937767964579025e-05,
      "loss": 1.088,
      "step": 39270
    },
    {
      "epoch": 0.13766660708590495,
      "grad_norm": 3.78125,
      "learning_rate": 2.294360915431255e-05,
      "loss": 1.004,
      "step": 39280
    },
    {
      "epoch": 0.13770165459280054,
      "grad_norm": 3.609375,
      "learning_rate": 2.2949450344046075e-05,
      "loss": 0.9497,
      "step": 39290
    },
    {
      "epoch": 0.13773670209969613,
      "grad_norm": 2.96875,
      "learning_rate": 2.2955291533779603e-05,
      "loss": 0.9855,
      "step": 39300
    },
    {
      "epoch": 0.13777174960659175,
      "grad_norm": 3.015625,
      "learning_rate": 2.2961132723513125e-05,
      "loss": 1.0213,
      "step": 39310
    },
    {
      "epoch": 0.13780679711348734,
      "grad_norm": 2.96875,
      "learning_rate": 2.296697391324665e-05,
      "loss": 0.9449,
      "step": 39320
    },
    {
      "epoch": 0.13784184462038293,
      "grad_norm": 3.453125,
      "learning_rate": 2.2972815102980178e-05,
      "loss": 1.0063,
      "step": 39330
    },
    {
      "epoch": 0.13787689212727852,
      "grad_norm": 2.71875,
      "learning_rate": 2.2978656292713703e-05,
      "loss": 1.0214,
      "step": 39340
    },
    {
      "epoch": 0.1379119396341741,
      "grad_norm": 3.234375,
      "learning_rate": 2.2984497482447227e-05,
      "loss": 1.0653,
      "step": 39350
    },
    {
      "epoch": 0.13794698714106973,
      "grad_norm": 3.46875,
      "learning_rate": 2.299033867218075e-05,
      "loss": 1.0027,
      "step": 39360
    },
    {
      "epoch": 0.13798203464796532,
      "grad_norm": 3.984375,
      "learning_rate": 2.2996179861914277e-05,
      "loss": 1.0023,
      "step": 39370
    },
    {
      "epoch": 0.13801708215486092,
      "grad_norm": 3.328125,
      "learning_rate": 2.3002021051647802e-05,
      "loss": 0.9775,
      "step": 39380
    },
    {
      "epoch": 0.1380521296617565,
      "grad_norm": 3.671875,
      "learning_rate": 2.3007862241381327e-05,
      "loss": 1.0412,
      "step": 39390
    },
    {
      "epoch": 0.1380871771686521,
      "grad_norm": 2.90625,
      "learning_rate": 2.3013703431114848e-05,
      "loss": 1.0158,
      "step": 39400
    },
    {
      "epoch": 0.13812222467554772,
      "grad_norm": 3.25,
      "learning_rate": 2.3019544620848376e-05,
      "loss": 1.0327,
      "step": 39410
    },
    {
      "epoch": 0.1381572721824433,
      "grad_norm": 3.53125,
      "learning_rate": 2.30253858105819e-05,
      "loss": 1.063,
      "step": 39420
    },
    {
      "epoch": 0.1381923196893389,
      "grad_norm": 3.203125,
      "learning_rate": 2.3031227000315426e-05,
      "loss": 1.0308,
      "step": 39430
    },
    {
      "epoch": 0.1382273671962345,
      "grad_norm": 3.03125,
      "learning_rate": 2.303706819004895e-05,
      "loss": 1.0138,
      "step": 39440
    },
    {
      "epoch": 0.13826241470313008,
      "grad_norm": 3.234375,
      "learning_rate": 2.3042909379782476e-05,
      "loss": 0.9991,
      "step": 39450
    },
    {
      "epoch": 0.1382974622100257,
      "grad_norm": 3.5,
      "learning_rate": 2.3048750569516e-05,
      "loss": 1.0501,
      "step": 39460
    },
    {
      "epoch": 0.1383325097169213,
      "grad_norm": 3.15625,
      "learning_rate": 2.3054591759249525e-05,
      "loss": 0.9529,
      "step": 39470
    },
    {
      "epoch": 0.13836755722381688,
      "grad_norm": 2.96875,
      "learning_rate": 2.306043294898305e-05,
      "loss": 0.9536,
      "step": 39480
    },
    {
      "epoch": 0.13840260473071248,
      "grad_norm": 3.515625,
      "learning_rate": 2.3066274138716575e-05,
      "loss": 0.9962,
      "step": 39490
    },
    {
      "epoch": 0.13843765223760807,
      "grad_norm": 3.0,
      "learning_rate": 2.30721153284501e-05,
      "loss": 1.0178,
      "step": 39500
    },
    {
      "epoch": 0.13847269974450369,
      "grad_norm": 3.515625,
      "learning_rate": 2.3077956518183625e-05,
      "loss": 1.0982,
      "step": 39510
    },
    {
      "epoch": 0.13850774725139928,
      "grad_norm": 3.71875,
      "learning_rate": 2.308379770791715e-05,
      "loss": 1.0807,
      "step": 39520
    },
    {
      "epoch": 0.13854279475829487,
      "grad_norm": 3.03125,
      "learning_rate": 2.3089638897650675e-05,
      "loss": 0.9703,
      "step": 39530
    },
    {
      "epoch": 0.13857784226519046,
      "grad_norm": 3.25,
      "learning_rate": 2.30954800873842e-05,
      "loss": 1.0581,
      "step": 39540
    },
    {
      "epoch": 0.13861288977208605,
      "grad_norm": 3.21875,
      "learning_rate": 2.3101321277117724e-05,
      "loss": 1.0501,
      "step": 39550
    },
    {
      "epoch": 0.13864793727898167,
      "grad_norm": 3.546875,
      "learning_rate": 2.310716246685125e-05,
      "loss": 1.0185,
      "step": 39560
    },
    {
      "epoch": 0.13868298478587726,
      "grad_norm": 3.484375,
      "learning_rate": 2.3113003656584774e-05,
      "loss": 0.9883,
      "step": 39570
    },
    {
      "epoch": 0.13871803229277285,
      "grad_norm": 3.578125,
      "learning_rate": 2.31188448463183e-05,
      "loss": 1.0418,
      "step": 39580
    },
    {
      "epoch": 0.13875307979966844,
      "grad_norm": 3.59375,
      "learning_rate": 2.3124686036051824e-05,
      "loss": 1.0341,
      "step": 39590
    },
    {
      "epoch": 0.13878812730656404,
      "grad_norm": 3.359375,
      "learning_rate": 2.313052722578535e-05,
      "loss": 1.0143,
      "step": 39600
    },
    {
      "epoch": 0.13882317481345965,
      "grad_norm": 3.1875,
      "learning_rate": 2.3136368415518873e-05,
      "loss": 1.0017,
      "step": 39610
    },
    {
      "epoch": 0.13885822232035525,
      "grad_norm": 2.875,
      "learning_rate": 2.3142209605252398e-05,
      "loss": 0.9618,
      "step": 39620
    },
    {
      "epoch": 0.13889326982725084,
      "grad_norm": 2.984375,
      "learning_rate": 2.3148050794985923e-05,
      "loss": 1.13,
      "step": 39630
    },
    {
      "epoch": 0.13892831733414643,
      "grad_norm": 3.5625,
      "learning_rate": 2.3153891984719448e-05,
      "loss": 1.024,
      "step": 39640
    },
    {
      "epoch": 0.13896336484104202,
      "grad_norm": 3.296875,
      "learning_rate": 2.3159733174452976e-05,
      "loss": 1.0595,
      "step": 39650
    },
    {
      "epoch": 0.13899841234793764,
      "grad_norm": 3.703125,
      "learning_rate": 2.3165574364186498e-05,
      "loss": 1.0167,
      "step": 39660
    },
    {
      "epoch": 0.13903345985483323,
      "grad_norm": 3.390625,
      "learning_rate": 2.3171415553920022e-05,
      "loss": 1.0557,
      "step": 39670
    },
    {
      "epoch": 0.13906850736172882,
      "grad_norm": 3.4375,
      "learning_rate": 2.317725674365355e-05,
      "loss": 1.0124,
      "step": 39680
    },
    {
      "epoch": 0.1391035548686244,
      "grad_norm": 3.125,
      "learning_rate": 2.3183097933387075e-05,
      "loss": 1.037,
      "step": 39690
    },
    {
      "epoch": 0.13913860237552,
      "grad_norm": 2.84375,
      "learning_rate": 2.3188939123120597e-05,
      "loss": 0.9668,
      "step": 39700
    },
    {
      "epoch": 0.13917364988241562,
      "grad_norm": 3.484375,
      "learning_rate": 2.3194780312854122e-05,
      "loss": 1.0875,
      "step": 39710
    },
    {
      "epoch": 0.13920869738931121,
      "grad_norm": 3.828125,
      "learning_rate": 2.320062150258765e-05,
      "loss": 0.962,
      "step": 39720
    },
    {
      "epoch": 0.1392437448962068,
      "grad_norm": 3.453125,
      "learning_rate": 2.3206462692321175e-05,
      "loss": 1.019,
      "step": 39730
    },
    {
      "epoch": 0.1392787924031024,
      "grad_norm": 3.34375,
      "learning_rate": 2.3212303882054696e-05,
      "loss": 1.0178,
      "step": 39740
    },
    {
      "epoch": 0.139313839909998,
      "grad_norm": 3.59375,
      "learning_rate": 2.321814507178822e-05,
      "loss": 1.0807,
      "step": 39750
    },
    {
      "epoch": 0.1393488874168936,
      "grad_norm": 3.0,
      "learning_rate": 2.322398626152175e-05,
      "loss": 0.9338,
      "step": 39760
    },
    {
      "epoch": 0.1393839349237892,
      "grad_norm": 3.46875,
      "learning_rate": 2.3229827451255274e-05,
      "loss": 1.022,
      "step": 39770
    },
    {
      "epoch": 0.1394189824306848,
      "grad_norm": 3.15625,
      "learning_rate": 2.32356686409888e-05,
      "loss": 0.9502,
      "step": 39780
    },
    {
      "epoch": 0.13945402993758038,
      "grad_norm": 3.203125,
      "learning_rate": 2.324150983072232e-05,
      "loss": 1.0174,
      "step": 39790
    },
    {
      "epoch": 0.139489077444476,
      "grad_norm": 2.90625,
      "learning_rate": 2.324735102045585e-05,
      "loss": 0.9453,
      "step": 39800
    },
    {
      "epoch": 0.1395241249513716,
      "grad_norm": 3.296875,
      "learning_rate": 2.3253192210189374e-05,
      "loss": 1.0575,
      "step": 39810
    },
    {
      "epoch": 0.13955917245826718,
      "grad_norm": 3.015625,
      "learning_rate": 2.32590333999229e-05,
      "loss": 1.0996,
      "step": 39820
    },
    {
      "epoch": 0.13959421996516277,
      "grad_norm": 3.234375,
      "learning_rate": 2.326487458965642e-05,
      "loss": 1.019,
      "step": 39830
    },
    {
      "epoch": 0.13962926747205837,
      "grad_norm": 3.53125,
      "learning_rate": 2.3270715779389948e-05,
      "loss": 0.958,
      "step": 39840
    },
    {
      "epoch": 0.13966431497895399,
      "grad_norm": 3.296875,
      "learning_rate": 2.3276556969123473e-05,
      "loss": 0.9994,
      "step": 39850
    },
    {
      "epoch": 0.13969936248584958,
      "grad_norm": 3.453125,
      "learning_rate": 2.3282398158856998e-05,
      "loss": 1.0142,
      "step": 39860
    },
    {
      "epoch": 0.13973440999274517,
      "grad_norm": 3.1875,
      "learning_rate": 2.328823934859052e-05,
      "loss": 0.9357,
      "step": 39870
    },
    {
      "epoch": 0.13976945749964076,
      "grad_norm": 3.546875,
      "learning_rate": 2.3294080538324047e-05,
      "loss": 1.0757,
      "step": 39880
    },
    {
      "epoch": 0.13980450500653635,
      "grad_norm": 3.046875,
      "learning_rate": 2.3299921728057572e-05,
      "loss": 0.9963,
      "step": 39890
    },
    {
      "epoch": 0.13983955251343197,
      "grad_norm": 3.234375,
      "learning_rate": 2.3305762917791097e-05,
      "loss": 0.984,
      "step": 39900
    },
    {
      "epoch": 0.13987460002032756,
      "grad_norm": 3.421875,
      "learning_rate": 2.3311604107524622e-05,
      "loss": 1.0122,
      "step": 39910
    },
    {
      "epoch": 0.13990964752722315,
      "grad_norm": 3.5,
      "learning_rate": 2.3317445297258147e-05,
      "loss": 0.9744,
      "step": 39920
    },
    {
      "epoch": 0.13994469503411874,
      "grad_norm": 3.453125,
      "learning_rate": 2.3323286486991672e-05,
      "loss": 1.0132,
      "step": 39930
    },
    {
      "epoch": 0.13997974254101433,
      "grad_norm": 3.15625,
      "learning_rate": 2.3329127676725197e-05,
      "loss": 0.9685,
      "step": 39940
    },
    {
      "epoch": 0.14001479004790995,
      "grad_norm": 3.59375,
      "learning_rate": 2.333496886645872e-05,
      "loss": 1.0183,
      "step": 39950
    },
    {
      "epoch": 0.14004983755480555,
      "grad_norm": 3.5,
      "learning_rate": 2.3340810056192246e-05,
      "loss": 1.0177,
      "step": 39960
    },
    {
      "epoch": 0.14008488506170114,
      "grad_norm": 3.3125,
      "learning_rate": 2.334665124592577e-05,
      "loss": 0.9916,
      "step": 39970
    },
    {
      "epoch": 0.14011993256859673,
      "grad_norm": 3.0,
      "learning_rate": 2.3352492435659296e-05,
      "loss": 0.9157,
      "step": 39980
    },
    {
      "epoch": 0.14015498007549232,
      "grad_norm": 3.296875,
      "learning_rate": 2.335833362539282e-05,
      "loss": 0.9894,
      "step": 39990
    },
    {
      "epoch": 0.14019002758238794,
      "grad_norm": 3.21875,
      "learning_rate": 2.3364174815126346e-05,
      "loss": 0.9682,
      "step": 40000
    },
    {
      "epoch": 0.14019002758238794,
      "eval_loss": 0.954192578792572,
      "eval_runtime": 562.7529,
      "eval_samples_per_second": 676.027,
      "eval_steps_per_second": 56.336,
      "step": 40000
    },
    {
      "epoch": 0.14022507508928353,
      "grad_norm": 3.265625,
      "learning_rate": 2.337001600485987e-05,
      "loss": 1.0204,
      "step": 40010
    },
    {
      "epoch": 0.14026012259617912,
      "grad_norm": 3.84375,
      "learning_rate": 2.3375857194593395e-05,
      "loss": 1.0564,
      "step": 40020
    },
    {
      "epoch": 0.1402951701030747,
      "grad_norm": 3.453125,
      "learning_rate": 2.3381698384326924e-05,
      "loss": 1.0805,
      "step": 40030
    },
    {
      "epoch": 0.1403302176099703,
      "grad_norm": 3.578125,
      "learning_rate": 2.3387539574060445e-05,
      "loss": 1.022,
      "step": 40040
    },
    {
      "epoch": 0.14036526511686592,
      "grad_norm": 3.015625,
      "learning_rate": 2.339338076379397e-05,
      "loss": 1.0362,
      "step": 40050
    },
    {
      "epoch": 0.14040031262376151,
      "grad_norm": 3.34375,
      "learning_rate": 2.3399221953527495e-05,
      "loss": 1.0787,
      "step": 40060
    },
    {
      "epoch": 0.1404353601306571,
      "grad_norm": 3.375,
      "learning_rate": 2.3405063143261023e-05,
      "loss": 1.1076,
      "step": 40070
    },
    {
      "epoch": 0.1404704076375527,
      "grad_norm": 3.546875,
      "learning_rate": 2.3410904332994544e-05,
      "loss": 1.0454,
      "step": 40080
    },
    {
      "epoch": 0.1405054551444483,
      "grad_norm": 3.25,
      "learning_rate": 2.341674552272807e-05,
      "loss": 1.0622,
      "step": 40090
    },
    {
      "epoch": 0.1405405026513439,
      "grad_norm": 3.765625,
      "learning_rate": 2.3422586712461594e-05,
      "loss": 1.0292,
      "step": 40100
    },
    {
      "epoch": 0.1405755501582395,
      "grad_norm": 3.15625,
      "learning_rate": 2.3428427902195122e-05,
      "loss": 1.0546,
      "step": 40110
    },
    {
      "epoch": 0.1406105976651351,
      "grad_norm": 3.171875,
      "learning_rate": 2.3434269091928647e-05,
      "loss": 1.0637,
      "step": 40120
    },
    {
      "epoch": 0.14064564517203068,
      "grad_norm": 3.03125,
      "learning_rate": 2.344011028166217e-05,
      "loss": 1.0332,
      "step": 40130
    },
    {
      "epoch": 0.14068069267892627,
      "grad_norm": 2.890625,
      "learning_rate": 2.3445951471395693e-05,
      "loss": 0.9727,
      "step": 40140
    },
    {
      "epoch": 0.1407157401858219,
      "grad_norm": 3.265625,
      "learning_rate": 2.345179266112922e-05,
      "loss": 0.9957,
      "step": 40150
    },
    {
      "epoch": 0.14075078769271748,
      "grad_norm": 2.96875,
      "learning_rate": 2.3457633850862747e-05,
      "loss": 0.9665,
      "step": 40160
    },
    {
      "epoch": 0.14078583519961307,
      "grad_norm": 3.4375,
      "learning_rate": 2.3463475040596268e-05,
      "loss": 1.1147,
      "step": 40170
    },
    {
      "epoch": 0.14082088270650867,
      "grad_norm": 4.0625,
      "learning_rate": 2.3469316230329793e-05,
      "loss": 0.9752,
      "step": 40180
    },
    {
      "epoch": 0.14085593021340426,
      "grad_norm": 3.1875,
      "learning_rate": 2.347515742006332e-05,
      "loss": 1.1355,
      "step": 40190
    },
    {
      "epoch": 0.14089097772029988,
      "grad_norm": 3.109375,
      "learning_rate": 2.3480998609796846e-05,
      "loss": 0.9872,
      "step": 40200
    },
    {
      "epoch": 0.14092602522719547,
      "grad_norm": 3.53125,
      "learning_rate": 2.348683979953037e-05,
      "loss": 1.0631,
      "step": 40210
    },
    {
      "epoch": 0.14096107273409106,
      "grad_norm": 3.703125,
      "learning_rate": 2.3492680989263892e-05,
      "loss": 1.003,
      "step": 40220
    },
    {
      "epoch": 0.14099612024098665,
      "grad_norm": 2.921875,
      "learning_rate": 2.349852217899742e-05,
      "loss": 0.9963,
      "step": 40230
    },
    {
      "epoch": 0.14103116774788224,
      "grad_norm": 2.921875,
      "learning_rate": 2.3504363368730945e-05,
      "loss": 0.9735,
      "step": 40240
    },
    {
      "epoch": 0.14106621525477786,
      "grad_norm": 3.21875,
      "learning_rate": 2.351020455846447e-05,
      "loss": 0.9971,
      "step": 40250
    },
    {
      "epoch": 0.14110126276167345,
      "grad_norm": 3.46875,
      "learning_rate": 2.351604574819799e-05,
      "loss": 1.0291,
      "step": 40260
    },
    {
      "epoch": 0.14113631026856904,
      "grad_norm": 3.578125,
      "learning_rate": 2.352188693793152e-05,
      "loss": 0.9557,
      "step": 40270
    },
    {
      "epoch": 0.14117135777546463,
      "grad_norm": 3.46875,
      "learning_rate": 2.3527728127665045e-05,
      "loss": 1.0056,
      "step": 40280
    },
    {
      "epoch": 0.14120640528236023,
      "grad_norm": 3.265625,
      "learning_rate": 2.353356931739857e-05,
      "loss": 0.9653,
      "step": 40290
    },
    {
      "epoch": 0.14124145278925584,
      "grad_norm": 3.796875,
      "learning_rate": 2.353941050713209e-05,
      "loss": 0.9964,
      "step": 40300
    },
    {
      "epoch": 0.14127650029615144,
      "grad_norm": 3.609375,
      "learning_rate": 2.354525169686562e-05,
      "loss": 1.0533,
      "step": 40310
    },
    {
      "epoch": 0.14131154780304703,
      "grad_norm": 3.515625,
      "learning_rate": 2.3551092886599144e-05,
      "loss": 1.0779,
      "step": 40320
    },
    {
      "epoch": 0.14134659530994262,
      "grad_norm": 3.359375,
      "learning_rate": 2.355693407633267e-05,
      "loss": 0.9843,
      "step": 40330
    },
    {
      "epoch": 0.14138164281683824,
      "grad_norm": 3.453125,
      "learning_rate": 2.3562775266066194e-05,
      "loss": 1.0823,
      "step": 40340
    },
    {
      "epoch": 0.14141669032373383,
      "grad_norm": 3.09375,
      "learning_rate": 2.356861645579972e-05,
      "loss": 1.1045,
      "step": 40350
    },
    {
      "epoch": 0.14145173783062942,
      "grad_norm": 3.078125,
      "learning_rate": 2.3574457645533243e-05,
      "loss": 1.0649,
      "step": 40360
    },
    {
      "epoch": 0.141486785337525,
      "grad_norm": 3.140625,
      "learning_rate": 2.3580298835266768e-05,
      "loss": 1.0532,
      "step": 40370
    },
    {
      "epoch": 0.1415218328444206,
      "grad_norm": 3.1875,
      "learning_rate": 2.3586140025000293e-05,
      "loss": 1.0081,
      "step": 40380
    },
    {
      "epoch": 0.14155688035131622,
      "grad_norm": 3.1875,
      "learning_rate": 2.3591981214733818e-05,
      "loss": 1.0367,
      "step": 40390
    },
    {
      "epoch": 0.1415919278582118,
      "grad_norm": 3.1875,
      "learning_rate": 2.3597822404467343e-05,
      "loss": 0.9994,
      "step": 40400
    },
    {
      "epoch": 0.1416269753651074,
      "grad_norm": 2.765625,
      "learning_rate": 2.3603663594200868e-05,
      "loss": 1.0317,
      "step": 40410
    },
    {
      "epoch": 0.141662022872003,
      "grad_norm": 3.0625,
      "learning_rate": 2.3609504783934392e-05,
      "loss": 0.9678,
      "step": 40420
    },
    {
      "epoch": 0.1416970703788986,
      "grad_norm": 2.921875,
      "learning_rate": 2.3615345973667917e-05,
      "loss": 0.9955,
      "step": 40430
    },
    {
      "epoch": 0.1417321178857942,
      "grad_norm": 3.375,
      "learning_rate": 2.3621187163401442e-05,
      "loss": 1.0945,
      "step": 40440
    },
    {
      "epoch": 0.1417671653926898,
      "grad_norm": 3.421875,
      "learning_rate": 2.3627028353134967e-05,
      "loss": 0.9912,
      "step": 40450
    },
    {
      "epoch": 0.1418022128995854,
      "grad_norm": 3.21875,
      "learning_rate": 2.3632869542868495e-05,
      "loss": 0.9696,
      "step": 40460
    },
    {
      "epoch": 0.14183726040648098,
      "grad_norm": 3.03125,
      "learning_rate": 2.3638710732602017e-05,
      "loss": 0.9658,
      "step": 40470
    },
    {
      "epoch": 0.14187230791337657,
      "grad_norm": 3.25,
      "learning_rate": 2.364455192233554e-05,
      "loss": 1.0958,
      "step": 40480
    },
    {
      "epoch": 0.1419073554202722,
      "grad_norm": 3.328125,
      "learning_rate": 2.3650393112069066e-05,
      "loss": 1.0495,
      "step": 40490
    },
    {
      "epoch": 0.14194240292716778,
      "grad_norm": 3.25,
      "learning_rate": 2.3656234301802595e-05,
      "loss": 0.9981,
      "step": 40500
    },
    {
      "epoch": 0.14197745043406337,
      "grad_norm": 3.0625,
      "learning_rate": 2.3662075491536116e-05,
      "loss": 0.946,
      "step": 40510
    },
    {
      "epoch": 0.14201249794095896,
      "grad_norm": 3.171875,
      "learning_rate": 2.366791668126964e-05,
      "loss": 0.9521,
      "step": 40520
    },
    {
      "epoch": 0.14204754544785456,
      "grad_norm": 3.875,
      "learning_rate": 2.3673757871003166e-05,
      "loss": 1.0771,
      "step": 40530
    },
    {
      "epoch": 0.14208259295475018,
      "grad_norm": 2.828125,
      "learning_rate": 2.3679599060736694e-05,
      "loss": 1.0521,
      "step": 40540
    },
    {
      "epoch": 0.14211764046164577,
      "grad_norm": 3.390625,
      "learning_rate": 2.368544025047022e-05,
      "loss": 0.9698,
      "step": 40550
    },
    {
      "epoch": 0.14215268796854136,
      "grad_norm": 3.296875,
      "learning_rate": 2.369128144020374e-05,
      "loss": 0.9925,
      "step": 40560
    },
    {
      "epoch": 0.14218773547543695,
      "grad_norm": 3.34375,
      "learning_rate": 2.3697122629937265e-05,
      "loss": 0.9872,
      "step": 40570
    },
    {
      "epoch": 0.14222278298233254,
      "grad_norm": 3.546875,
      "learning_rate": 2.3702963819670793e-05,
      "loss": 1.0456,
      "step": 40580
    },
    {
      "epoch": 0.14225783048922816,
      "grad_norm": 3.21875,
      "learning_rate": 2.3708805009404318e-05,
      "loss": 1.0491,
      "step": 40590
    },
    {
      "epoch": 0.14229287799612375,
      "grad_norm": 3.375,
      "learning_rate": 2.371464619913784e-05,
      "loss": 1.1007,
      "step": 40600
    },
    {
      "epoch": 0.14232792550301934,
      "grad_norm": 2.953125,
      "learning_rate": 2.3720487388871365e-05,
      "loss": 1.0162,
      "step": 40610
    },
    {
      "epoch": 0.14236297300991493,
      "grad_norm": 3.078125,
      "learning_rate": 2.3726328578604893e-05,
      "loss": 1.0407,
      "step": 40620
    },
    {
      "epoch": 0.14239802051681053,
      "grad_norm": 3.375,
      "learning_rate": 2.3732169768338418e-05,
      "loss": 0.9395,
      "step": 40630
    },
    {
      "epoch": 0.14243306802370614,
      "grad_norm": 3.15625,
      "learning_rate": 2.373801095807194e-05,
      "loss": 1.0456,
      "step": 40640
    },
    {
      "epoch": 0.14246811553060174,
      "grad_norm": 2.90625,
      "learning_rate": 2.3743852147805464e-05,
      "loss": 0.9234,
      "step": 40650
    },
    {
      "epoch": 0.14250316303749733,
      "grad_norm": 3.1875,
      "learning_rate": 2.3749693337538992e-05,
      "loss": 0.9587,
      "step": 40660
    },
    {
      "epoch": 0.14253821054439292,
      "grad_norm": 3.4375,
      "learning_rate": 2.3755534527272517e-05,
      "loss": 0.9776,
      "step": 40670
    },
    {
      "epoch": 0.1425732580512885,
      "grad_norm": 3.296875,
      "learning_rate": 2.3761375717006042e-05,
      "loss": 1.0078,
      "step": 40680
    },
    {
      "epoch": 0.14260830555818413,
      "grad_norm": 3.625,
      "learning_rate": 2.3767216906739563e-05,
      "loss": 0.9855,
      "step": 40690
    },
    {
      "epoch": 0.14264335306507972,
      "grad_norm": 3.21875,
      "learning_rate": 2.377305809647309e-05,
      "loss": 0.9885,
      "step": 40700
    },
    {
      "epoch": 0.1426784005719753,
      "grad_norm": 3.5625,
      "learning_rate": 2.3778899286206616e-05,
      "loss": 0.9858,
      "step": 40710
    },
    {
      "epoch": 0.1427134480788709,
      "grad_norm": 2.953125,
      "learning_rate": 2.378474047594014e-05,
      "loss": 0.9486,
      "step": 40720
    },
    {
      "epoch": 0.1427484955857665,
      "grad_norm": 3.171875,
      "learning_rate": 2.3790581665673666e-05,
      "loss": 1.001,
      "step": 40730
    },
    {
      "epoch": 0.1427835430926621,
      "grad_norm": 3.125,
      "learning_rate": 2.379642285540719e-05,
      "loss": 0.9316,
      "step": 40740
    },
    {
      "epoch": 0.1428185905995577,
      "grad_norm": 3.609375,
      "learning_rate": 2.3802264045140716e-05,
      "loss": 1.0683,
      "step": 40750
    },
    {
      "epoch": 0.1428536381064533,
      "grad_norm": 3.0625,
      "learning_rate": 2.380810523487424e-05,
      "loss": 1.0589,
      "step": 40760
    },
    {
      "epoch": 0.1428886856133489,
      "grad_norm": 3.0,
      "learning_rate": 2.3813946424607765e-05,
      "loss": 1.0846,
      "step": 40770
    },
    {
      "epoch": 0.14292373312024448,
      "grad_norm": 3.03125,
      "learning_rate": 2.381978761434129e-05,
      "loss": 1.0323,
      "step": 40780
    },
    {
      "epoch": 0.1429587806271401,
      "grad_norm": 3.359375,
      "learning_rate": 2.3825628804074815e-05,
      "loss": 0.9655,
      "step": 40790
    },
    {
      "epoch": 0.1429938281340357,
      "grad_norm": 3.09375,
      "learning_rate": 2.383146999380834e-05,
      "loss": 1.0488,
      "step": 40800
    },
    {
      "epoch": 0.14302887564093128,
      "grad_norm": 3.1875,
      "learning_rate": 2.3837311183541865e-05,
      "loss": 1.0138,
      "step": 40810
    },
    {
      "epoch": 0.14306392314782687,
      "grad_norm": 3.09375,
      "learning_rate": 2.384315237327539e-05,
      "loss": 1.0133,
      "step": 40820
    },
    {
      "epoch": 0.14309897065472246,
      "grad_norm": 3.21875,
      "learning_rate": 2.3848993563008914e-05,
      "loss": 1.0602,
      "step": 40830
    },
    {
      "epoch": 0.14313401816161808,
      "grad_norm": 3.0,
      "learning_rate": 2.385483475274244e-05,
      "loss": 0.9707,
      "step": 40840
    },
    {
      "epoch": 0.14316906566851367,
      "grad_norm": 3.53125,
      "learning_rate": 2.3860675942475964e-05,
      "loss": 0.9682,
      "step": 40850
    },
    {
      "epoch": 0.14320411317540926,
      "grad_norm": 3.140625,
      "learning_rate": 2.386651713220949e-05,
      "loss": 1.0117,
      "step": 40860
    },
    {
      "epoch": 0.14323916068230486,
      "grad_norm": 3.296875,
      "learning_rate": 2.3872358321943014e-05,
      "loss": 1.0399,
      "step": 40870
    },
    {
      "epoch": 0.14327420818920047,
      "grad_norm": 3.4375,
      "learning_rate": 2.387819951167654e-05,
      "loss": 1.0369,
      "step": 40880
    },
    {
      "epoch": 0.14330925569609607,
      "grad_norm": 2.984375,
      "learning_rate": 2.3884040701410067e-05,
      "loss": 0.9929,
      "step": 40890
    },
    {
      "epoch": 0.14334430320299166,
      "grad_norm": 3.109375,
      "learning_rate": 2.388988189114359e-05,
      "loss": 1.0343,
      "step": 40900
    },
    {
      "epoch": 0.14337935070988725,
      "grad_norm": 3.484375,
      "learning_rate": 2.3895723080877113e-05,
      "loss": 1.0364,
      "step": 40910
    },
    {
      "epoch": 0.14341439821678284,
      "grad_norm": 3.296875,
      "learning_rate": 2.3901564270610638e-05,
      "loss": 0.9602,
      "step": 40920
    },
    {
      "epoch": 0.14344944572367846,
      "grad_norm": 3.109375,
      "learning_rate": 2.3907405460344166e-05,
      "loss": 1.0054,
      "step": 40930
    },
    {
      "epoch": 0.14348449323057405,
      "grad_norm": 3.140625,
      "learning_rate": 2.3913246650077688e-05,
      "loss": 0.9588,
      "step": 40940
    },
    {
      "epoch": 0.14351954073746964,
      "grad_norm": 3.703125,
      "learning_rate": 2.3919087839811213e-05,
      "loss": 0.9878,
      "step": 40950
    },
    {
      "epoch": 0.14355458824436523,
      "grad_norm": 3.484375,
      "learning_rate": 2.3924929029544737e-05,
      "loss": 1.0743,
      "step": 40960
    },
    {
      "epoch": 0.14358963575126082,
      "grad_norm": 3.203125,
      "learning_rate": 2.3930770219278266e-05,
      "loss": 1.0598,
      "step": 40970
    },
    {
      "epoch": 0.14362468325815644,
      "grad_norm": 3.4375,
      "learning_rate": 2.3936611409011787e-05,
      "loss": 0.9451,
      "step": 40980
    },
    {
      "epoch": 0.14365973076505203,
      "grad_norm": 3.734375,
      "learning_rate": 2.3942452598745312e-05,
      "loss": 0.9851,
      "step": 40990
    },
    {
      "epoch": 0.14369477827194763,
      "grad_norm": 3.5,
      "learning_rate": 2.3948293788478837e-05,
      "loss": 1.016,
      "step": 41000
    },
    {
      "epoch": 0.14372982577884322,
      "grad_norm": 3.015625,
      "learning_rate": 2.3954134978212365e-05,
      "loss": 1.0137,
      "step": 41010
    },
    {
      "epoch": 0.1437648732857388,
      "grad_norm": 3.171875,
      "learning_rate": 2.395997616794589e-05,
      "loss": 1.0322,
      "step": 41020
    },
    {
      "epoch": 0.14379992079263443,
      "grad_norm": 3.0,
      "learning_rate": 2.396581735767941e-05,
      "loss": 1.0105,
      "step": 41030
    },
    {
      "epoch": 0.14383496829953002,
      "grad_norm": 3.171875,
      "learning_rate": 2.397165854741294e-05,
      "loss": 1.0111,
      "step": 41040
    },
    {
      "epoch": 0.1438700158064256,
      "grad_norm": 3.78125,
      "learning_rate": 2.3977499737146464e-05,
      "loss": 0.9871,
      "step": 41050
    },
    {
      "epoch": 0.1439050633133212,
      "grad_norm": 3.109375,
      "learning_rate": 2.398334092687999e-05,
      "loss": 0.9967,
      "step": 41060
    },
    {
      "epoch": 0.1439401108202168,
      "grad_norm": 3.109375,
      "learning_rate": 2.398918211661351e-05,
      "loss": 0.9143,
      "step": 41070
    },
    {
      "epoch": 0.1439751583271124,
      "grad_norm": 3.484375,
      "learning_rate": 2.399502330634704e-05,
      "loss": 0.9977,
      "step": 41080
    },
    {
      "epoch": 0.144010205834008,
      "grad_norm": 3.734375,
      "learning_rate": 2.4000864496080564e-05,
      "loss": 1.0108,
      "step": 41090
    },
    {
      "epoch": 0.1440452533409036,
      "grad_norm": 3.5,
      "learning_rate": 2.400670568581409e-05,
      "loss": 1.0935,
      "step": 41100
    },
    {
      "epoch": 0.1440803008477992,
      "grad_norm": 3.34375,
      "learning_rate": 2.4012546875547614e-05,
      "loss": 1.0654,
      "step": 41110
    },
    {
      "epoch": 0.14411534835469478,
      "grad_norm": 3.6875,
      "learning_rate": 2.401838806528114e-05,
      "loss": 0.9973,
      "step": 41120
    },
    {
      "epoch": 0.1441503958615904,
      "grad_norm": 3.21875,
      "learning_rate": 2.4024229255014663e-05,
      "loss": 1.0271,
      "step": 41130
    },
    {
      "epoch": 0.144185443368486,
      "grad_norm": 3.03125,
      "learning_rate": 2.4030070444748188e-05,
      "loss": 1.0042,
      "step": 41140
    },
    {
      "epoch": 0.14422049087538158,
      "grad_norm": 3.421875,
      "learning_rate": 2.4035911634481713e-05,
      "loss": 1.0026,
      "step": 41150
    },
    {
      "epoch": 0.14425553838227717,
      "grad_norm": 3.484375,
      "learning_rate": 2.4041752824215238e-05,
      "loss": 1.0518,
      "step": 41160
    },
    {
      "epoch": 0.14429058588917276,
      "grad_norm": 3.1875,
      "learning_rate": 2.4047594013948763e-05,
      "loss": 0.9254,
      "step": 41170
    },
    {
      "epoch": 0.14432563339606838,
      "grad_norm": 3.296875,
      "learning_rate": 2.4053435203682287e-05,
      "loss": 1.0259,
      "step": 41180
    },
    {
      "epoch": 0.14436068090296397,
      "grad_norm": 3.546875,
      "learning_rate": 2.4059276393415812e-05,
      "loss": 1.0953,
      "step": 41190
    },
    {
      "epoch": 0.14439572840985956,
      "grad_norm": 3.046875,
      "learning_rate": 2.4065117583149337e-05,
      "loss": 0.9827,
      "step": 41200
    },
    {
      "epoch": 0.14443077591675516,
      "grad_norm": 2.984375,
      "learning_rate": 2.4070958772882862e-05,
      "loss": 0.9389,
      "step": 41210
    },
    {
      "epoch": 0.14446582342365075,
      "grad_norm": 3.28125,
      "learning_rate": 2.4076799962616387e-05,
      "loss": 1.0219,
      "step": 41220
    },
    {
      "epoch": 0.14450087093054637,
      "grad_norm": 3.65625,
      "learning_rate": 2.408264115234991e-05,
      "loss": 1.091,
      "step": 41230
    },
    {
      "epoch": 0.14453591843744196,
      "grad_norm": 3.546875,
      "learning_rate": 2.4088482342083437e-05,
      "loss": 1.0484,
      "step": 41240
    },
    {
      "epoch": 0.14457096594433755,
      "grad_norm": 3.1875,
      "learning_rate": 2.409432353181696e-05,
      "loss": 0.9467,
      "step": 41250
    },
    {
      "epoch": 0.14460601345123314,
      "grad_norm": 3.234375,
      "learning_rate": 2.4100164721550486e-05,
      "loss": 0.9752,
      "step": 41260
    },
    {
      "epoch": 0.14464106095812873,
      "grad_norm": 3.359375,
      "learning_rate": 2.410600591128401e-05,
      "loss": 1.0638,
      "step": 41270
    },
    {
      "epoch": 0.14467610846502435,
      "grad_norm": 3.34375,
      "learning_rate": 2.4111847101017536e-05,
      "loss": 1.0201,
      "step": 41280
    },
    {
      "epoch": 0.14471115597191994,
      "grad_norm": 3.203125,
      "learning_rate": 2.411768829075106e-05,
      "loss": 0.9184,
      "step": 41290
    },
    {
      "epoch": 0.14474620347881553,
      "grad_norm": 3.625,
      "learning_rate": 2.4123529480484586e-05,
      "loss": 0.9746,
      "step": 41300
    },
    {
      "epoch": 0.14478125098571112,
      "grad_norm": 3.8125,
      "learning_rate": 2.412937067021811e-05,
      "loss": 1.0701,
      "step": 41310
    },
    {
      "epoch": 0.14481629849260672,
      "grad_norm": 3.765625,
      "learning_rate": 2.4135211859951635e-05,
      "loss": 1.0666,
      "step": 41320
    },
    {
      "epoch": 0.14485134599950233,
      "grad_norm": 3.078125,
      "learning_rate": 2.414105304968516e-05,
      "loss": 1.0204,
      "step": 41330
    },
    {
      "epoch": 0.14488639350639793,
      "grad_norm": 3.375,
      "learning_rate": 2.4146894239418685e-05,
      "loss": 0.9467,
      "step": 41340
    },
    {
      "epoch": 0.14492144101329352,
      "grad_norm": 3.265625,
      "learning_rate": 2.415273542915221e-05,
      "loss": 1.0068,
      "step": 41350
    },
    {
      "epoch": 0.1449564885201891,
      "grad_norm": 2.921875,
      "learning_rate": 2.4158576618885738e-05,
      "loss": 0.935,
      "step": 41360
    },
    {
      "epoch": 0.1449915360270847,
      "grad_norm": 3.4375,
      "learning_rate": 2.416441780861926e-05,
      "loss": 0.9486,
      "step": 41370
    },
    {
      "epoch": 0.14502658353398032,
      "grad_norm": 3.421875,
      "learning_rate": 2.4170258998352784e-05,
      "loss": 1.0786,
      "step": 41380
    },
    {
      "epoch": 0.1450616310408759,
      "grad_norm": 3.421875,
      "learning_rate": 2.4176100188086313e-05,
      "loss": 1.0627,
      "step": 41390
    },
    {
      "epoch": 0.1450966785477715,
      "grad_norm": 3.25,
      "learning_rate": 2.4181941377819837e-05,
      "loss": 1.0644,
      "step": 41400
    },
    {
      "epoch": 0.1451317260546671,
      "grad_norm": 3.109375,
      "learning_rate": 2.418778256755336e-05,
      "loss": 1.1216,
      "step": 41410
    },
    {
      "epoch": 0.1451667735615627,
      "grad_norm": 3.578125,
      "learning_rate": 2.4193623757286884e-05,
      "loss": 0.9837,
      "step": 41420
    },
    {
      "epoch": 0.1452018210684583,
      "grad_norm": 3.5,
      "learning_rate": 2.4199464947020412e-05,
      "loss": 1.0086,
      "step": 41430
    },
    {
      "epoch": 0.1452368685753539,
      "grad_norm": 3.34375,
      "learning_rate": 2.4205306136753937e-05,
      "loss": 1.1216,
      "step": 41440
    },
    {
      "epoch": 0.14527191608224949,
      "grad_norm": 3.4375,
      "learning_rate": 2.421114732648746e-05,
      "loss": 1.0769,
      "step": 41450
    },
    {
      "epoch": 0.14530696358914508,
      "grad_norm": 2.5,
      "learning_rate": 2.4216988516220983e-05,
      "loss": 0.904,
      "step": 41460
    },
    {
      "epoch": 0.1453420110960407,
      "grad_norm": 3.453125,
      "learning_rate": 2.422282970595451e-05,
      "loss": 1.0157,
      "step": 41470
    },
    {
      "epoch": 0.1453770586029363,
      "grad_norm": 3.484375,
      "learning_rate": 2.4228670895688036e-05,
      "loss": 1.0552,
      "step": 41480
    },
    {
      "epoch": 0.14541210610983188,
      "grad_norm": 3.046875,
      "learning_rate": 2.423451208542156e-05,
      "loss": 1.0508,
      "step": 41490
    },
    {
      "epoch": 0.14544715361672747,
      "grad_norm": 3.15625,
      "learning_rate": 2.4240353275155082e-05,
      "loss": 0.9931,
      "step": 41500
    },
    {
      "epoch": 0.14548220112362306,
      "grad_norm": 3.03125,
      "learning_rate": 2.424619446488861e-05,
      "loss": 1.0147,
      "step": 41510
    },
    {
      "epoch": 0.14551724863051868,
      "grad_norm": 3.046875,
      "learning_rate": 2.4252035654622136e-05,
      "loss": 0.978,
      "step": 41520
    },
    {
      "epoch": 0.14555229613741427,
      "grad_norm": 3.4375,
      "learning_rate": 2.425787684435566e-05,
      "loss": 1.0133,
      "step": 41530
    },
    {
      "epoch": 0.14558734364430986,
      "grad_norm": 3.4375,
      "learning_rate": 2.4263718034089182e-05,
      "loss": 0.973,
      "step": 41540
    },
    {
      "epoch": 0.14562239115120545,
      "grad_norm": 3.59375,
      "learning_rate": 2.426955922382271e-05,
      "loss": 0.9747,
      "step": 41550
    },
    {
      "epoch": 0.14565743865810105,
      "grad_norm": 3.21875,
      "learning_rate": 2.4275400413556235e-05,
      "loss": 0.9124,
      "step": 41560
    },
    {
      "epoch": 0.14569248616499667,
      "grad_norm": 3.4375,
      "learning_rate": 2.428124160328976e-05,
      "loss": 1.0443,
      "step": 41570
    },
    {
      "epoch": 0.14572753367189226,
      "grad_norm": 2.875,
      "learning_rate": 2.4287082793023285e-05,
      "loss": 1.0152,
      "step": 41580
    },
    {
      "epoch": 0.14576258117878785,
      "grad_norm": 3.515625,
      "learning_rate": 2.429292398275681e-05,
      "loss": 1.022,
      "step": 41590
    },
    {
      "epoch": 0.14579762868568344,
      "grad_norm": 3.625,
      "learning_rate": 2.4298765172490334e-05,
      "loss": 0.9623,
      "step": 41600
    },
    {
      "epoch": 0.14583267619257903,
      "grad_norm": 3.890625,
      "learning_rate": 2.430460636222386e-05,
      "loss": 0.982,
      "step": 41610
    },
    {
      "epoch": 0.14586772369947465,
      "grad_norm": 3.40625,
      "learning_rate": 2.4310447551957384e-05,
      "loss": 1.0617,
      "step": 41620
    },
    {
      "epoch": 0.14590277120637024,
      "grad_norm": 3.90625,
      "learning_rate": 2.431628874169091e-05,
      "loss": 1.0597,
      "step": 41630
    },
    {
      "epoch": 0.14593781871326583,
      "grad_norm": 3.421875,
      "learning_rate": 2.4322129931424434e-05,
      "loss": 0.9785,
      "step": 41640
    },
    {
      "epoch": 0.14597286622016142,
      "grad_norm": 3.1875,
      "learning_rate": 2.432797112115796e-05,
      "loss": 1.0383,
      "step": 41650
    },
    {
      "epoch": 0.14600791372705701,
      "grad_norm": 2.84375,
      "learning_rate": 2.4333812310891483e-05,
      "loss": 1.087,
      "step": 41660
    },
    {
      "epoch": 0.14604296123395263,
      "grad_norm": 3.09375,
      "learning_rate": 2.4339653500625008e-05,
      "loss": 0.9866,
      "step": 41670
    },
    {
      "epoch": 0.14607800874084823,
      "grad_norm": 3.0625,
      "learning_rate": 2.4345494690358533e-05,
      "loss": 1.0214,
      "step": 41680
    },
    {
      "epoch": 0.14611305624774382,
      "grad_norm": 3.53125,
      "learning_rate": 2.4351335880092058e-05,
      "loss": 1.0989,
      "step": 41690
    },
    {
      "epoch": 0.1461481037546394,
      "grad_norm": 3.140625,
      "learning_rate": 2.4357177069825583e-05,
      "loss": 1.0066,
      "step": 41700
    },
    {
      "epoch": 0.146183151261535,
      "grad_norm": 3.65625,
      "learning_rate": 2.4363018259559108e-05,
      "loss": 1.0415,
      "step": 41710
    },
    {
      "epoch": 0.14621819876843062,
      "grad_norm": 3.65625,
      "learning_rate": 2.4368859449292632e-05,
      "loss": 1.101,
      "step": 41720
    },
    {
      "epoch": 0.1462532462753262,
      "grad_norm": 3.0,
      "learning_rate": 2.4374700639026157e-05,
      "loss": 0.9405,
      "step": 41730
    },
    {
      "epoch": 0.1462882937822218,
      "grad_norm": 3.40625,
      "learning_rate": 2.4380541828759686e-05,
      "loss": 0.9933,
      "step": 41740
    },
    {
      "epoch": 0.1463233412891174,
      "grad_norm": 3.28125,
      "learning_rate": 2.4386383018493207e-05,
      "loss": 1.0663,
      "step": 41750
    },
    {
      "epoch": 0.14635838879601298,
      "grad_norm": 3.078125,
      "learning_rate": 2.4392224208226732e-05,
      "loss": 1.0213,
      "step": 41760
    },
    {
      "epoch": 0.1463934363029086,
      "grad_norm": 3.515625,
      "learning_rate": 2.4398065397960257e-05,
      "loss": 1.0359,
      "step": 41770
    },
    {
      "epoch": 0.1464284838098042,
      "grad_norm": 3.046875,
      "learning_rate": 2.4403906587693785e-05,
      "loss": 1.0167,
      "step": 41780
    },
    {
      "epoch": 0.14646353131669979,
      "grad_norm": 3.53125,
      "learning_rate": 2.440974777742731e-05,
      "loss": 1.0223,
      "step": 41790
    },
    {
      "epoch": 0.14649857882359538,
      "grad_norm": 3.6875,
      "learning_rate": 2.441558896716083e-05,
      "loss": 0.9647,
      "step": 41800
    },
    {
      "epoch": 0.14653362633049097,
      "grad_norm": 3.3125,
      "learning_rate": 2.4421430156894356e-05,
      "loss": 1.0929,
      "step": 41810
    },
    {
      "epoch": 0.1465686738373866,
      "grad_norm": 3.171875,
      "learning_rate": 2.4427271346627884e-05,
      "loss": 0.9728,
      "step": 41820
    },
    {
      "epoch": 0.14660372134428218,
      "grad_norm": 3.171875,
      "learning_rate": 2.443311253636141e-05,
      "loss": 0.9691,
      "step": 41830
    },
    {
      "epoch": 0.14663876885117777,
      "grad_norm": 2.8125,
      "learning_rate": 2.443895372609493e-05,
      "loss": 0.9634,
      "step": 41840
    },
    {
      "epoch": 0.14667381635807336,
      "grad_norm": 3.421875,
      "learning_rate": 2.4444794915828455e-05,
      "loss": 1.0571,
      "step": 41850
    },
    {
      "epoch": 0.14670886386496895,
      "grad_norm": 3.671875,
      "learning_rate": 2.4450636105561984e-05,
      "loss": 1.0516,
      "step": 41860
    },
    {
      "epoch": 0.14674391137186457,
      "grad_norm": 3.34375,
      "learning_rate": 2.445647729529551e-05,
      "loss": 1.0354,
      "step": 41870
    },
    {
      "epoch": 0.14677895887876016,
      "grad_norm": 3.203125,
      "learning_rate": 2.446231848502903e-05,
      "loss": 0.9208,
      "step": 41880
    },
    {
      "epoch": 0.14681400638565575,
      "grad_norm": 3.65625,
      "learning_rate": 2.4468159674762555e-05,
      "loss": 1.025,
      "step": 41890
    },
    {
      "epoch": 0.14684905389255135,
      "grad_norm": 3.234375,
      "learning_rate": 2.4474000864496083e-05,
      "loss": 0.9963,
      "step": 41900
    },
    {
      "epoch": 0.14688410139944694,
      "grad_norm": 3.28125,
      "learning_rate": 2.4479842054229608e-05,
      "loss": 0.9795,
      "step": 41910
    },
    {
      "epoch": 0.14691914890634256,
      "grad_norm": 3.015625,
      "learning_rate": 2.4485683243963133e-05,
      "loss": 1.0226,
      "step": 41920
    },
    {
      "epoch": 0.14695419641323815,
      "grad_norm": 3.046875,
      "learning_rate": 2.4491524433696654e-05,
      "loss": 0.9671,
      "step": 41930
    },
    {
      "epoch": 0.14698924392013374,
      "grad_norm": 3.46875,
      "learning_rate": 2.4497365623430182e-05,
      "loss": 1.0287,
      "step": 41940
    },
    {
      "epoch": 0.14702429142702933,
      "grad_norm": 3.25,
      "learning_rate": 2.4503206813163707e-05,
      "loss": 0.943,
      "step": 41950
    },
    {
      "epoch": 0.14705933893392495,
      "grad_norm": 3.109375,
      "learning_rate": 2.4509048002897232e-05,
      "loss": 0.9691,
      "step": 41960
    },
    {
      "epoch": 0.14709438644082054,
      "grad_norm": 3.28125,
      "learning_rate": 2.4514889192630754e-05,
      "loss": 0.9934,
      "step": 41970
    },
    {
      "epoch": 0.14712943394771613,
      "grad_norm": 3.0625,
      "learning_rate": 2.4520730382364282e-05,
      "loss": 0.9468,
      "step": 41980
    },
    {
      "epoch": 0.14716448145461172,
      "grad_norm": 2.890625,
      "learning_rate": 2.4526571572097807e-05,
      "loss": 0.9586,
      "step": 41990
    },
    {
      "epoch": 0.14719952896150731,
      "grad_norm": 3.734375,
      "learning_rate": 2.453241276183133e-05,
      "loss": 1.0756,
      "step": 42000
    },
    {
      "epoch": 0.14723457646840293,
      "grad_norm": 3.078125,
      "learning_rate": 2.4538253951564856e-05,
      "loss": 1.0646,
      "step": 42010
    },
    {
      "epoch": 0.14726962397529852,
      "grad_norm": 2.984375,
      "learning_rate": 2.454409514129838e-05,
      "loss": 0.9948,
      "step": 42020
    },
    {
      "epoch": 0.14730467148219412,
      "grad_norm": 2.71875,
      "learning_rate": 2.4549936331031906e-05,
      "loss": 1.0303,
      "step": 42030
    },
    {
      "epoch": 0.1473397189890897,
      "grad_norm": 3.234375,
      "learning_rate": 2.455577752076543e-05,
      "loss": 0.9654,
      "step": 42040
    },
    {
      "epoch": 0.1473747664959853,
      "grad_norm": 3.609375,
      "learning_rate": 2.4561618710498956e-05,
      "loss": 0.9812,
      "step": 42050
    },
    {
      "epoch": 0.14740981400288092,
      "grad_norm": 3.0,
      "learning_rate": 2.456745990023248e-05,
      "loss": 1.1014,
      "step": 42060
    },
    {
      "epoch": 0.1474448615097765,
      "grad_norm": 3.234375,
      "learning_rate": 2.4573301089966005e-05,
      "loss": 0.9804,
      "step": 42070
    },
    {
      "epoch": 0.1474799090166721,
      "grad_norm": 3.359375,
      "learning_rate": 2.457914227969953e-05,
      "loss": 1.0977,
      "step": 42080
    },
    {
      "epoch": 0.1475149565235677,
      "grad_norm": 3.53125,
      "learning_rate": 2.4584983469433055e-05,
      "loss": 1.0582,
      "step": 42090
    },
    {
      "epoch": 0.14755000403046328,
      "grad_norm": 2.734375,
      "learning_rate": 2.459082465916658e-05,
      "loss": 1.0268,
      "step": 42100
    },
    {
      "epoch": 0.1475850515373589,
      "grad_norm": 3.015625,
      "learning_rate": 2.4596665848900105e-05,
      "loss": 0.9969,
      "step": 42110
    },
    {
      "epoch": 0.1476200990442545,
      "grad_norm": 3.25,
      "learning_rate": 2.460250703863363e-05,
      "loss": 1.012,
      "step": 42120
    },
    {
      "epoch": 0.14765514655115008,
      "grad_norm": 3.765625,
      "learning_rate": 2.4608348228367158e-05,
      "loss": 1.0168,
      "step": 42130
    },
    {
      "epoch": 0.14769019405804568,
      "grad_norm": 3.265625,
      "learning_rate": 2.461418941810068e-05,
      "loss": 1.1287,
      "step": 42140
    },
    {
      "epoch": 0.14772524156494127,
      "grad_norm": 3.515625,
      "learning_rate": 2.4620030607834204e-05,
      "loss": 1.0573,
      "step": 42150
    },
    {
      "epoch": 0.1477602890718369,
      "grad_norm": 3.96875,
      "learning_rate": 2.462587179756773e-05,
      "loss": 1.0056,
      "step": 42160
    },
    {
      "epoch": 0.14779533657873248,
      "grad_norm": 3.40625,
      "learning_rate": 2.4631712987301257e-05,
      "loss": 0.9886,
      "step": 42170
    },
    {
      "epoch": 0.14783038408562807,
      "grad_norm": 3.828125,
      "learning_rate": 2.463755417703478e-05,
      "loss": 0.9664,
      "step": 42180
    },
    {
      "epoch": 0.14786543159252366,
      "grad_norm": 2.953125,
      "learning_rate": 2.4643395366768304e-05,
      "loss": 1.0459,
      "step": 42190
    },
    {
      "epoch": 0.14790047909941925,
      "grad_norm": 3.140625,
      "learning_rate": 2.464923655650183e-05,
      "loss": 0.9868,
      "step": 42200
    },
    {
      "epoch": 0.14793552660631487,
      "grad_norm": 3.34375,
      "learning_rate": 2.4655077746235357e-05,
      "loss": 0.9877,
      "step": 42210
    },
    {
      "epoch": 0.14797057411321046,
      "grad_norm": 3.515625,
      "learning_rate": 2.4660918935968878e-05,
      "loss": 0.9541,
      "step": 42220
    },
    {
      "epoch": 0.14800562162010605,
      "grad_norm": 4.09375,
      "learning_rate": 2.4666760125702403e-05,
      "loss": 1.1153,
      "step": 42230
    },
    {
      "epoch": 0.14804066912700165,
      "grad_norm": 3.5,
      "learning_rate": 2.4672601315435928e-05,
      "loss": 1.0281,
      "step": 42240
    },
    {
      "epoch": 0.14807571663389724,
      "grad_norm": 3.15625,
      "learning_rate": 2.4678442505169456e-05,
      "loss": 1.0326,
      "step": 42250
    },
    {
      "epoch": 0.14811076414079286,
      "grad_norm": 3.03125,
      "learning_rate": 2.468428369490298e-05,
      "loss": 1.0198,
      "step": 42260
    },
    {
      "epoch": 0.14814581164768845,
      "grad_norm": 3.0625,
      "learning_rate": 2.4690124884636502e-05,
      "loss": 0.954,
      "step": 42270
    },
    {
      "epoch": 0.14818085915458404,
      "grad_norm": 3.03125,
      "learning_rate": 2.4695966074370027e-05,
      "loss": 0.9924,
      "step": 42280
    },
    {
      "epoch": 0.14821590666147963,
      "grad_norm": 3.828125,
      "learning_rate": 2.4701807264103555e-05,
      "loss": 1.0608,
      "step": 42290
    },
    {
      "epoch": 0.14825095416837522,
      "grad_norm": 3.484375,
      "learning_rate": 2.470764845383708e-05,
      "loss": 1.0679,
      "step": 42300
    },
    {
      "epoch": 0.14828600167527084,
      "grad_norm": 3.34375,
      "learning_rate": 2.47134896435706e-05,
      "loss": 1.0276,
      "step": 42310
    },
    {
      "epoch": 0.14832104918216643,
      "grad_norm": 3.53125,
      "learning_rate": 2.4719330833304126e-05,
      "loss": 0.9769,
      "step": 42320
    },
    {
      "epoch": 0.14835609668906202,
      "grad_norm": 3.484375,
      "learning_rate": 2.4725172023037655e-05,
      "loss": 1.1063,
      "step": 42330
    },
    {
      "epoch": 0.1483911441959576,
      "grad_norm": 3.25,
      "learning_rate": 2.473101321277118e-05,
      "loss": 1.0151,
      "step": 42340
    },
    {
      "epoch": 0.1484261917028532,
      "grad_norm": 2.9375,
      "learning_rate": 2.4736854402504704e-05,
      "loss": 1.0188,
      "step": 42350
    },
    {
      "epoch": 0.14846123920974882,
      "grad_norm": 3.75,
      "learning_rate": 2.4742695592238226e-05,
      "loss": 1.0395,
      "step": 42360
    },
    {
      "epoch": 0.14849628671664442,
      "grad_norm": 3.46875,
      "learning_rate": 2.4748536781971754e-05,
      "loss": 0.995,
      "step": 42370
    },
    {
      "epoch": 0.14853133422354,
      "grad_norm": 4.09375,
      "learning_rate": 2.475437797170528e-05,
      "loss": 1.0602,
      "step": 42380
    },
    {
      "epoch": 0.1485663817304356,
      "grad_norm": 3.078125,
      "learning_rate": 2.4760219161438804e-05,
      "loss": 1.0727,
      "step": 42390
    },
    {
      "epoch": 0.1486014292373312,
      "grad_norm": 3.390625,
      "learning_rate": 2.4766060351172325e-05,
      "loss": 0.9795,
      "step": 42400
    },
    {
      "epoch": 0.1486364767442268,
      "grad_norm": 3.390625,
      "learning_rate": 2.4771901540905853e-05,
      "loss": 1.059,
      "step": 42410
    },
    {
      "epoch": 0.1486715242511224,
      "grad_norm": 3.21875,
      "learning_rate": 2.477774273063938e-05,
      "loss": 1.0072,
      "step": 42420
    },
    {
      "epoch": 0.148706571758018,
      "grad_norm": 3.671875,
      "learning_rate": 2.4783583920372903e-05,
      "loss": 0.995,
      "step": 42430
    },
    {
      "epoch": 0.14874161926491358,
      "grad_norm": 3.46875,
      "learning_rate": 2.4789425110106428e-05,
      "loss": 1.0058,
      "step": 42440
    },
    {
      "epoch": 0.14877666677180917,
      "grad_norm": 3.328125,
      "learning_rate": 2.4795266299839953e-05,
      "loss": 1.0062,
      "step": 42450
    },
    {
      "epoch": 0.1488117142787048,
      "grad_norm": 3.40625,
      "learning_rate": 2.4801107489573478e-05,
      "loss": 0.9738,
      "step": 42460
    },
    {
      "epoch": 0.14884676178560038,
      "grad_norm": 2.96875,
      "learning_rate": 2.4806948679307003e-05,
      "loss": 0.9252,
      "step": 42470
    },
    {
      "epoch": 0.14888180929249598,
      "grad_norm": 3.234375,
      "learning_rate": 2.4812789869040527e-05,
      "loss": 0.9454,
      "step": 42480
    },
    {
      "epoch": 0.14891685679939157,
      "grad_norm": 2.96875,
      "learning_rate": 2.4818631058774052e-05,
      "loss": 0.977,
      "step": 42490
    },
    {
      "epoch": 0.14895190430628716,
      "grad_norm": 3.453125,
      "learning_rate": 2.4824472248507577e-05,
      "loss": 0.9795,
      "step": 42500
    },
    {
      "epoch": 0.14898695181318278,
      "grad_norm": 3.0625,
      "learning_rate": 2.4830313438241102e-05,
      "loss": 0.987,
      "step": 42510
    },
    {
      "epoch": 0.14902199932007837,
      "grad_norm": 3.28125,
      "learning_rate": 2.4836154627974627e-05,
      "loss": 1.0228,
      "step": 42520
    },
    {
      "epoch": 0.14905704682697396,
      "grad_norm": 3.296875,
      "learning_rate": 2.484199581770815e-05,
      "loss": 1.0894,
      "step": 42530
    },
    {
      "epoch": 0.14909209433386955,
      "grad_norm": 3.546875,
      "learning_rate": 2.4847837007441676e-05,
      "loss": 1.0511,
      "step": 42540
    },
    {
      "epoch": 0.14912714184076517,
      "grad_norm": 3.5,
      "learning_rate": 2.48536781971752e-05,
      "loss": 1.0606,
      "step": 42550
    },
    {
      "epoch": 0.14916218934766076,
      "grad_norm": 3.046875,
      "learning_rate": 2.4859519386908726e-05,
      "loss": 0.9766,
      "step": 42560
    },
    {
      "epoch": 0.14919723685455635,
      "grad_norm": 3.234375,
      "learning_rate": 2.486536057664225e-05,
      "loss": 0.9942,
      "step": 42570
    },
    {
      "epoch": 0.14923228436145194,
      "grad_norm": 3.5,
      "learning_rate": 2.4871201766375776e-05,
      "loss": 1.0228,
      "step": 42580
    },
    {
      "epoch": 0.14926733186834754,
      "grad_norm": 3.03125,
      "learning_rate": 2.48770429561093e-05,
      "loss": 0.905,
      "step": 42590
    },
    {
      "epoch": 0.14930237937524315,
      "grad_norm": 3.8125,
      "learning_rate": 2.488288414584283e-05,
      "loss": 1.1007,
      "step": 42600
    },
    {
      "epoch": 0.14933742688213875,
      "grad_norm": 3.375,
      "learning_rate": 2.488872533557635e-05,
      "loss": 0.9988,
      "step": 42610
    },
    {
      "epoch": 0.14937247438903434,
      "grad_norm": 3.453125,
      "learning_rate": 2.4894566525309875e-05,
      "loss": 0.9672,
      "step": 42620
    },
    {
      "epoch": 0.14940752189592993,
      "grad_norm": 3.1875,
      "learning_rate": 2.49004077150434e-05,
      "loss": 1.0281,
      "step": 42630
    },
    {
      "epoch": 0.14944256940282552,
      "grad_norm": 3.59375,
      "learning_rate": 2.4906248904776928e-05,
      "loss": 0.9934,
      "step": 42640
    },
    {
      "epoch": 0.14947761690972114,
      "grad_norm": 3.328125,
      "learning_rate": 2.491209009451045e-05,
      "loss": 0.9399,
      "step": 42650
    },
    {
      "epoch": 0.14951266441661673,
      "grad_norm": 3.421875,
      "learning_rate": 2.4917931284243975e-05,
      "loss": 1.052,
      "step": 42660
    },
    {
      "epoch": 0.14954771192351232,
      "grad_norm": 3.296875,
      "learning_rate": 2.49237724739775e-05,
      "loss": 1.0525,
      "step": 42670
    },
    {
      "epoch": 0.1495827594304079,
      "grad_norm": 3.390625,
      "learning_rate": 2.4929613663711028e-05,
      "loss": 0.981,
      "step": 42680
    },
    {
      "epoch": 0.1496178069373035,
      "grad_norm": 3.296875,
      "learning_rate": 2.4935454853444553e-05,
      "loss": 1.0606,
      "step": 42690
    },
    {
      "epoch": 0.14965285444419912,
      "grad_norm": 3.21875,
      "learning_rate": 2.4941296043178074e-05,
      "loss": 0.9544,
      "step": 42700
    },
    {
      "epoch": 0.14968790195109472,
      "grad_norm": 3.59375,
      "learning_rate": 2.49471372329116e-05,
      "loss": 1.0794,
      "step": 42710
    },
    {
      "epoch": 0.1497229494579903,
      "grad_norm": 3.15625,
      "learning_rate": 2.4952978422645127e-05,
      "loss": 0.9949,
      "step": 42720
    },
    {
      "epoch": 0.1497579969648859,
      "grad_norm": 3.078125,
      "learning_rate": 2.4958819612378652e-05,
      "loss": 1.0817,
      "step": 42730
    },
    {
      "epoch": 0.1497930444717815,
      "grad_norm": 3.34375,
      "learning_rate": 2.4964660802112173e-05,
      "loss": 0.9914,
      "step": 42740
    },
    {
      "epoch": 0.1498280919786771,
      "grad_norm": 3.03125,
      "learning_rate": 2.49705019918457e-05,
      "loss": 1.0929,
      "step": 42750
    },
    {
      "epoch": 0.1498631394855727,
      "grad_norm": 3.640625,
      "learning_rate": 2.4976343181579226e-05,
      "loss": 1.0036,
      "step": 42760
    },
    {
      "epoch": 0.1498981869924683,
      "grad_norm": 3.421875,
      "learning_rate": 2.498218437131275e-05,
      "loss": 1.0724,
      "step": 42770
    },
    {
      "epoch": 0.14993323449936388,
      "grad_norm": 3.265625,
      "learning_rate": 2.4988025561046273e-05,
      "loss": 1.0969,
      "step": 42780
    },
    {
      "epoch": 0.14996828200625947,
      "grad_norm": 3.296875,
      "learning_rate": 2.49938667507798e-05,
      "loss": 1.0278,
      "step": 42790
    },
    {
      "epoch": 0.1500033295131551,
      "grad_norm": 3.0,
      "learning_rate": 2.4999707940513326e-05,
      "loss": 0.9942,
      "step": 42800
    },
    {
      "epoch": 0.15003837702005068,
      "grad_norm": 3.15625,
      "learning_rate": 2.5005549130246854e-05,
      "loss": 0.9277,
      "step": 42810
    },
    {
      "epoch": 0.15007342452694628,
      "grad_norm": 2.796875,
      "learning_rate": 2.5011390319980375e-05,
      "loss": 1.0089,
      "step": 42820
    },
    {
      "epoch": 0.15010847203384187,
      "grad_norm": 3.3125,
      "learning_rate": 2.50172315097139e-05,
      "loss": 0.9277,
      "step": 42830
    },
    {
      "epoch": 0.15014351954073746,
      "grad_norm": 3.390625,
      "learning_rate": 2.5023072699447425e-05,
      "loss": 1.0188,
      "step": 42840
    },
    {
      "epoch": 0.15017856704763308,
      "grad_norm": 3.234375,
      "learning_rate": 2.502891388918095e-05,
      "loss": 1.1081,
      "step": 42850
    },
    {
      "epoch": 0.15021361455452867,
      "grad_norm": 3.015625,
      "learning_rate": 2.5034755078914475e-05,
      "loss": 0.9882,
      "step": 42860
    },
    {
      "epoch": 0.15024866206142426,
      "grad_norm": 3.34375,
      "learning_rate": 2.5040596268647996e-05,
      "loss": 0.9956,
      "step": 42870
    },
    {
      "epoch": 0.15028370956831985,
      "grad_norm": 3.328125,
      "learning_rate": 2.504643745838152e-05,
      "loss": 1.0688,
      "step": 42880
    },
    {
      "epoch": 0.15031875707521544,
      "grad_norm": 3.828125,
      "learning_rate": 2.5052278648115053e-05,
      "loss": 1.0211,
      "step": 42890
    },
    {
      "epoch": 0.15035380458211106,
      "grad_norm": 3.46875,
      "learning_rate": 2.5058119837848578e-05,
      "loss": 1.0229,
      "step": 42900
    },
    {
      "epoch": 0.15038885208900665,
      "grad_norm": 3.140625,
      "learning_rate": 2.50639610275821e-05,
      "loss": 1.0371,
      "step": 42910
    },
    {
      "epoch": 0.15042389959590224,
      "grad_norm": 3.203125,
      "learning_rate": 2.5069802217315624e-05,
      "loss": 0.9279,
      "step": 42920
    },
    {
      "epoch": 0.15045894710279784,
      "grad_norm": 3.015625,
      "learning_rate": 2.507564340704915e-05,
      "loss": 0.9918,
      "step": 42930
    },
    {
      "epoch": 0.15049399460969343,
      "grad_norm": 3.359375,
      "learning_rate": 2.5081484596782674e-05,
      "loss": 0.9858,
      "step": 42940
    },
    {
      "epoch": 0.15052904211658905,
      "grad_norm": 3.65625,
      "learning_rate": 2.50873257865162e-05,
      "loss": 1.1297,
      "step": 42950
    },
    {
      "epoch": 0.15056408962348464,
      "grad_norm": 3.4375,
      "learning_rate": 2.509316697624972e-05,
      "loss": 0.9662,
      "step": 42960
    },
    {
      "epoch": 0.15059913713038023,
      "grad_norm": 3.046875,
      "learning_rate": 2.509900816598325e-05,
      "loss": 1.0365,
      "step": 42970
    },
    {
      "epoch": 0.15063418463727582,
      "grad_norm": 3.046875,
      "learning_rate": 2.5104849355716776e-05,
      "loss": 0.977,
      "step": 42980
    },
    {
      "epoch": 0.1506692321441714,
      "grad_norm": 3.234375,
      "learning_rate": 2.5110690545450298e-05,
      "loss": 0.972,
      "step": 42990
    },
    {
      "epoch": 0.15070427965106703,
      "grad_norm": 3.21875,
      "learning_rate": 2.5116531735183823e-05,
      "loss": 0.9611,
      "step": 43000
    },
    {
      "epoch": 0.15073932715796262,
      "grad_norm": 3.09375,
      "learning_rate": 2.5122372924917348e-05,
      "loss": 1.0013,
      "step": 43010
    },
    {
      "epoch": 0.1507743746648582,
      "grad_norm": 3.296875,
      "learning_rate": 2.5128214114650872e-05,
      "loss": 1.0241,
      "step": 43020
    },
    {
      "epoch": 0.1508094221717538,
      "grad_norm": 3.28125,
      "learning_rate": 2.5134055304384397e-05,
      "loss": 0.9373,
      "step": 43030
    },
    {
      "epoch": 0.1508444696786494,
      "grad_norm": 3.625,
      "learning_rate": 2.5139896494117922e-05,
      "loss": 0.993,
      "step": 43040
    },
    {
      "epoch": 0.15087951718554501,
      "grad_norm": 2.875,
      "learning_rate": 2.514573768385145e-05,
      "loss": 0.9111,
      "step": 43050
    },
    {
      "epoch": 0.1509145646924406,
      "grad_norm": 3.625,
      "learning_rate": 2.5151578873584975e-05,
      "loss": 1.0518,
      "step": 43060
    },
    {
      "epoch": 0.1509496121993362,
      "grad_norm": 3.53125,
      "learning_rate": 2.51574200633185e-05,
      "loss": 1.0711,
      "step": 43070
    },
    {
      "epoch": 0.1509846597062318,
      "grad_norm": 4.0,
      "learning_rate": 2.516326125305202e-05,
      "loss": 1.0393,
      "step": 43080
    },
    {
      "epoch": 0.1510197072131274,
      "grad_norm": 3.015625,
      "learning_rate": 2.5169102442785546e-05,
      "loss": 0.9611,
      "step": 43090
    },
    {
      "epoch": 0.151054754720023,
      "grad_norm": 3.78125,
      "learning_rate": 2.517494363251907e-05,
      "loss": 0.9653,
      "step": 43100
    },
    {
      "epoch": 0.1510898022269186,
      "grad_norm": 2.96875,
      "learning_rate": 2.5180784822252596e-05,
      "loss": 1.0669,
      "step": 43110
    },
    {
      "epoch": 0.15112484973381418,
      "grad_norm": 3.203125,
      "learning_rate": 2.5186626011986124e-05,
      "loss": 1.0168,
      "step": 43120
    },
    {
      "epoch": 0.15115989724070977,
      "grad_norm": 3.484375,
      "learning_rate": 2.519246720171965e-05,
      "loss": 1.0761,
      "step": 43130
    },
    {
      "epoch": 0.1511949447476054,
      "grad_norm": 3.34375,
      "learning_rate": 2.5198308391453174e-05,
      "loss": 1.0083,
      "step": 43140
    },
    {
      "epoch": 0.15122999225450098,
      "grad_norm": 3.390625,
      "learning_rate": 2.52041495811867e-05,
      "loss": 1.0113,
      "step": 43150
    },
    {
      "epoch": 0.15126503976139657,
      "grad_norm": 2.765625,
      "learning_rate": 2.5209990770920224e-05,
      "loss": 0.9555,
      "step": 43160
    },
    {
      "epoch": 0.15130008726829217,
      "grad_norm": 3.5625,
      "learning_rate": 2.5215831960653745e-05,
      "loss": 0.9243,
      "step": 43170
    },
    {
      "epoch": 0.15133513477518776,
      "grad_norm": 3.59375,
      "learning_rate": 2.522167315038727e-05,
      "loss": 0.9964,
      "step": 43180
    },
    {
      "epoch": 0.15137018228208338,
      "grad_norm": 3.140625,
      "learning_rate": 2.5227514340120795e-05,
      "loss": 0.9836,
      "step": 43190
    },
    {
      "epoch": 0.15140522978897897,
      "grad_norm": 3.46875,
      "learning_rate": 2.5233355529854323e-05,
      "loss": 1.0253,
      "step": 43200
    },
    {
      "epoch": 0.15144027729587456,
      "grad_norm": 3.34375,
      "learning_rate": 2.5239196719587848e-05,
      "loss": 1.0006,
      "step": 43210
    },
    {
      "epoch": 0.15147532480277015,
      "grad_norm": 3.03125,
      "learning_rate": 2.5245037909321373e-05,
      "loss": 0.9871,
      "step": 43220
    },
    {
      "epoch": 0.15151037230966574,
      "grad_norm": 3.0,
      "learning_rate": 2.5250879099054898e-05,
      "loss": 1.0188,
      "step": 43230
    },
    {
      "epoch": 0.15154541981656136,
      "grad_norm": 3.5,
      "learning_rate": 2.5256720288788422e-05,
      "loss": 1.0543,
      "step": 43240
    },
    {
      "epoch": 0.15158046732345695,
      "grad_norm": 3.5625,
      "learning_rate": 2.5262561478521947e-05,
      "loss": 1.0168,
      "step": 43250
    },
    {
      "epoch": 0.15161551483035254,
      "grad_norm": 3.3125,
      "learning_rate": 2.526840266825547e-05,
      "loss": 0.9441,
      "step": 43260
    },
    {
      "epoch": 0.15165056233724813,
      "grad_norm": 3.484375,
      "learning_rate": 2.5274243857988993e-05,
      "loss": 1.022,
      "step": 43270
    },
    {
      "epoch": 0.15168560984414373,
      "grad_norm": 3.5,
      "learning_rate": 2.5280085047722525e-05,
      "loss": 1.0537,
      "step": 43280
    },
    {
      "epoch": 0.15172065735103935,
      "grad_norm": 2.84375,
      "learning_rate": 2.5285926237456047e-05,
      "loss": 0.9296,
      "step": 43290
    },
    {
      "epoch": 0.15175570485793494,
      "grad_norm": 3.390625,
      "learning_rate": 2.529176742718957e-05,
      "loss": 0.913,
      "step": 43300
    },
    {
      "epoch": 0.15179075236483053,
      "grad_norm": 3.5,
      "learning_rate": 2.5297608616923096e-05,
      "loss": 0.9941,
      "step": 43310
    },
    {
      "epoch": 0.15182579987172612,
      "grad_norm": 3.09375,
      "learning_rate": 2.530344980665662e-05,
      "loss": 1.0555,
      "step": 43320
    },
    {
      "epoch": 0.1518608473786217,
      "grad_norm": 3.5625,
      "learning_rate": 2.5309290996390146e-05,
      "loss": 0.9898,
      "step": 43330
    },
    {
      "epoch": 0.15189589488551733,
      "grad_norm": 3.375,
      "learning_rate": 2.5315132186123667e-05,
      "loss": 1.0136,
      "step": 43340
    },
    {
      "epoch": 0.15193094239241292,
      "grad_norm": 3.671875,
      "learning_rate": 2.5320973375857192e-05,
      "loss": 1.0215,
      "step": 43350
    },
    {
      "epoch": 0.1519659898993085,
      "grad_norm": 3.171875,
      "learning_rate": 2.5326814565590724e-05,
      "loss": 0.9751,
      "step": 43360
    },
    {
      "epoch": 0.1520010374062041,
      "grad_norm": 3.046875,
      "learning_rate": 2.533265575532425e-05,
      "loss": 0.9653,
      "step": 43370
    },
    {
      "epoch": 0.1520360849130997,
      "grad_norm": 3.203125,
      "learning_rate": 2.533849694505777e-05,
      "loss": 0.9992,
      "step": 43380
    },
    {
      "epoch": 0.15207113241999531,
      "grad_norm": 3.125,
      "learning_rate": 2.5344338134791295e-05,
      "loss": 0.9929,
      "step": 43390
    },
    {
      "epoch": 0.1521061799268909,
      "grad_norm": 3.203125,
      "learning_rate": 2.535017932452482e-05,
      "loss": 1.0051,
      "step": 43400
    },
    {
      "epoch": 0.1521412274337865,
      "grad_norm": 3.46875,
      "learning_rate": 2.5356020514258345e-05,
      "loss": 1.0271,
      "step": 43410
    },
    {
      "epoch": 0.1521762749406821,
      "grad_norm": 3.71875,
      "learning_rate": 2.536186170399187e-05,
      "loss": 1.097,
      "step": 43420
    },
    {
      "epoch": 0.15221132244757768,
      "grad_norm": 3.34375,
      "learning_rate": 2.536770289372539e-05,
      "loss": 0.9609,
      "step": 43430
    },
    {
      "epoch": 0.1522463699544733,
      "grad_norm": 3.4375,
      "learning_rate": 2.5373544083458923e-05,
      "loss": 0.9652,
      "step": 43440
    },
    {
      "epoch": 0.1522814174613689,
      "grad_norm": 3.1875,
      "learning_rate": 2.5379385273192447e-05,
      "loss": 1.1,
      "step": 43450
    },
    {
      "epoch": 0.15231646496826448,
      "grad_norm": 3.25,
      "learning_rate": 2.538522646292597e-05,
      "loss": 1.058,
      "step": 43460
    },
    {
      "epoch": 0.15235151247516007,
      "grad_norm": 3.296875,
      "learning_rate": 2.5391067652659494e-05,
      "loss": 1.0003,
      "step": 43470
    },
    {
      "epoch": 0.15238655998205566,
      "grad_norm": 3.875,
      "learning_rate": 2.539690884239302e-05,
      "loss": 1.0114,
      "step": 43480
    },
    {
      "epoch": 0.15242160748895128,
      "grad_norm": 3.171875,
      "learning_rate": 2.5402750032126543e-05,
      "loss": 0.9965,
      "step": 43490
    },
    {
      "epoch": 0.15245665499584687,
      "grad_norm": 2.875,
      "learning_rate": 2.5408591221860068e-05,
      "loss": 0.9548,
      "step": 43500
    },
    {
      "epoch": 0.15249170250274247,
      "grad_norm": 3.359375,
      "learning_rate": 2.5414432411593597e-05,
      "loss": 0.9286,
      "step": 43510
    },
    {
      "epoch": 0.15252675000963806,
      "grad_norm": 3.0625,
      "learning_rate": 2.542027360132712e-05,
      "loss": 1.0317,
      "step": 43520
    },
    {
      "epoch": 0.15256179751653365,
      "grad_norm": 3.40625,
      "learning_rate": 2.5426114791060646e-05,
      "loss": 1.0567,
      "step": 43530
    },
    {
      "epoch": 0.15259684502342927,
      "grad_norm": 3.03125,
      "learning_rate": 2.543195598079417e-05,
      "loss": 0.9261,
      "step": 43540
    },
    {
      "epoch": 0.15263189253032486,
      "grad_norm": 2.984375,
      "learning_rate": 2.5437797170527693e-05,
      "loss": 1.0777,
      "step": 43550
    },
    {
      "epoch": 0.15266694003722045,
      "grad_norm": 3.203125,
      "learning_rate": 2.5443638360261217e-05,
      "loss": 0.9819,
      "step": 43560
    },
    {
      "epoch": 0.15270198754411604,
      "grad_norm": 3.390625,
      "learning_rate": 2.5449479549994742e-05,
      "loss": 1.0345,
      "step": 43570
    },
    {
      "epoch": 0.15273703505101163,
      "grad_norm": 3.53125,
      "learning_rate": 2.5455320739728267e-05,
      "loss": 0.914,
      "step": 43580
    },
    {
      "epoch": 0.15277208255790725,
      "grad_norm": 3.28125,
      "learning_rate": 2.5461161929461795e-05,
      "loss": 0.9079,
      "step": 43590
    },
    {
      "epoch": 0.15280713006480284,
      "grad_norm": 3.53125,
      "learning_rate": 2.546700311919532e-05,
      "loss": 1.1015,
      "step": 43600
    },
    {
      "epoch": 0.15284217757169843,
      "grad_norm": 3.09375,
      "learning_rate": 2.5472844308928845e-05,
      "loss": 1.0704,
      "step": 43610
    },
    {
      "epoch": 0.15287722507859403,
      "grad_norm": 3.453125,
      "learning_rate": 2.547868549866237e-05,
      "loss": 0.9857,
      "step": 43620
    },
    {
      "epoch": 0.15291227258548964,
      "grad_norm": 2.84375,
      "learning_rate": 2.5484526688395895e-05,
      "loss": 0.9604,
      "step": 43630
    },
    {
      "epoch": 0.15294732009238524,
      "grad_norm": 3.75,
      "learning_rate": 2.5490367878129416e-05,
      "loss": 1.0001,
      "step": 43640
    },
    {
      "epoch": 0.15298236759928083,
      "grad_norm": 3.109375,
      "learning_rate": 2.549620906786294e-05,
      "loss": 0.948,
      "step": 43650
    },
    {
      "epoch": 0.15301741510617642,
      "grad_norm": 3.25,
      "learning_rate": 2.5502050257596466e-05,
      "loss": 1.0034,
      "step": 43660
    },
    {
      "epoch": 0.153052462613072,
      "grad_norm": 3.34375,
      "learning_rate": 2.5507891447329994e-05,
      "loss": 0.9957,
      "step": 43670
    },
    {
      "epoch": 0.15308751011996763,
      "grad_norm": 3.65625,
      "learning_rate": 2.551373263706352e-05,
      "loss": 1.0042,
      "step": 43680
    },
    {
      "epoch": 0.15312255762686322,
      "grad_norm": 3.15625,
      "learning_rate": 2.5519573826797044e-05,
      "loss": 1.0772,
      "step": 43690
    },
    {
      "epoch": 0.1531576051337588,
      "grad_norm": 3.078125,
      "learning_rate": 2.552541501653057e-05,
      "loss": 0.9951,
      "step": 43700
    },
    {
      "epoch": 0.1531926526406544,
      "grad_norm": 3.21875,
      "learning_rate": 2.5531256206264093e-05,
      "loss": 1.0203,
      "step": 43710
    },
    {
      "epoch": 0.15322770014755,
      "grad_norm": 3.359375,
      "learning_rate": 2.5537097395997618e-05,
      "loss": 1.0228,
      "step": 43720
    },
    {
      "epoch": 0.1532627476544456,
      "grad_norm": 2.875,
      "learning_rate": 2.554293858573114e-05,
      "loss": 1.0782,
      "step": 43730
    },
    {
      "epoch": 0.1532977951613412,
      "grad_norm": 3.640625,
      "learning_rate": 2.5548779775464665e-05,
      "loss": 1.0328,
      "step": 43740
    },
    {
      "epoch": 0.1533328426682368,
      "grad_norm": 3.453125,
      "learning_rate": 2.5554620965198196e-05,
      "loss": 0.9996,
      "step": 43750
    },
    {
      "epoch": 0.1533678901751324,
      "grad_norm": 3.046875,
      "learning_rate": 2.5560462154931718e-05,
      "loss": 1.0244,
      "step": 43760
    },
    {
      "epoch": 0.15340293768202798,
      "grad_norm": 3.171875,
      "learning_rate": 2.5566303344665242e-05,
      "loss": 0.996,
      "step": 43770
    },
    {
      "epoch": 0.1534379851889236,
      "grad_norm": 2.921875,
      "learning_rate": 2.5572144534398767e-05,
      "loss": 0.9747,
      "step": 43780
    },
    {
      "epoch": 0.1534730326958192,
      "grad_norm": 3.828125,
      "learning_rate": 2.5577985724132292e-05,
      "loss": 1.0452,
      "step": 43790
    },
    {
      "epoch": 0.15350808020271478,
      "grad_norm": 3.40625,
      "learning_rate": 2.5583826913865817e-05,
      "loss": 0.98,
      "step": 43800
    },
    {
      "epoch": 0.15354312770961037,
      "grad_norm": 3.21875,
      "learning_rate": 2.5589668103599342e-05,
      "loss": 0.9939,
      "step": 43810
    },
    {
      "epoch": 0.15357817521650596,
      "grad_norm": 2.890625,
      "learning_rate": 2.559550929333287e-05,
      "loss": 1.029,
      "step": 43820
    },
    {
      "epoch": 0.15361322272340158,
      "grad_norm": 3.0625,
      "learning_rate": 2.5601350483066395e-05,
      "loss": 0.9934,
      "step": 43830
    },
    {
      "epoch": 0.15364827023029717,
      "grad_norm": 2.984375,
      "learning_rate": 2.560719167279992e-05,
      "loss": 1.1124,
      "step": 43840
    },
    {
      "epoch": 0.15368331773719277,
      "grad_norm": 3.15625,
      "learning_rate": 2.561303286253344e-05,
      "loss": 1.0866,
      "step": 43850
    },
    {
      "epoch": 0.15371836524408836,
      "grad_norm": 3.1875,
      "learning_rate": 2.5618874052266966e-05,
      "loss": 0.9764,
      "step": 43860
    },
    {
      "epoch": 0.15375341275098395,
      "grad_norm": 3.453125,
      "learning_rate": 2.562471524200049e-05,
      "loss": 0.9834,
      "step": 43870
    },
    {
      "epoch": 0.15378846025787957,
      "grad_norm": 3.5625,
      "learning_rate": 2.5630556431734016e-05,
      "loss": 0.9939,
      "step": 43880
    },
    {
      "epoch": 0.15382350776477516,
      "grad_norm": 3.34375,
      "learning_rate": 2.563639762146754e-05,
      "loss": 0.9663,
      "step": 43890
    },
    {
      "epoch": 0.15385855527167075,
      "grad_norm": 3.34375,
      "learning_rate": 2.564223881120107e-05,
      "loss": 1.0458,
      "step": 43900
    },
    {
      "epoch": 0.15389360277856634,
      "grad_norm": 3.328125,
      "learning_rate": 2.5648080000934594e-05,
      "loss": 1.0615,
      "step": 43910
    },
    {
      "epoch": 0.15392865028546193,
      "grad_norm": 3.53125,
      "learning_rate": 2.565392119066812e-05,
      "loss": 1.0303,
      "step": 43920
    },
    {
      "epoch": 0.15396369779235755,
      "grad_norm": 3.5,
      "learning_rate": 2.5659762380401643e-05,
      "loss": 1.0873,
      "step": 43930
    },
    {
      "epoch": 0.15399874529925314,
      "grad_norm": 3.0,
      "learning_rate": 2.5665603570135165e-05,
      "loss": 1.005,
      "step": 43940
    },
    {
      "epoch": 0.15403379280614873,
      "grad_norm": 3.140625,
      "learning_rate": 2.567144475986869e-05,
      "loss": 1.0275,
      "step": 43950
    },
    {
      "epoch": 0.15406884031304433,
      "grad_norm": 3.171875,
      "learning_rate": 2.5677285949602215e-05,
      "loss": 1.0595,
      "step": 43960
    },
    {
      "epoch": 0.15410388781993992,
      "grad_norm": 2.96875,
      "learning_rate": 2.568312713933574e-05,
      "loss": 0.9714,
      "step": 43970
    },
    {
      "epoch": 0.15413893532683554,
      "grad_norm": 3.34375,
      "learning_rate": 2.5688968329069268e-05,
      "loss": 0.9857,
      "step": 43980
    },
    {
      "epoch": 0.15417398283373113,
      "grad_norm": 3.53125,
      "learning_rate": 2.5694809518802792e-05,
      "loss": 1.0565,
      "step": 43990
    },
    {
      "epoch": 0.15420903034062672,
      "grad_norm": 3.46875,
      "learning_rate": 2.5700650708536317e-05,
      "loss": 1.0535,
      "step": 44000
    },
    {
      "epoch": 0.1542440778475223,
      "grad_norm": 3.09375,
      "learning_rate": 2.5706491898269842e-05,
      "loss": 1.0077,
      "step": 44010
    },
    {
      "epoch": 0.1542791253544179,
      "grad_norm": 3.421875,
      "learning_rate": 2.5712333088003364e-05,
      "loss": 1.029,
      "step": 44020
    },
    {
      "epoch": 0.15431417286131352,
      "grad_norm": 3.5,
      "learning_rate": 2.571817427773689e-05,
      "loss": 0.9845,
      "step": 44030
    },
    {
      "epoch": 0.1543492203682091,
      "grad_norm": 3.265625,
      "learning_rate": 2.5724015467470413e-05,
      "loss": 1.0328,
      "step": 44040
    },
    {
      "epoch": 0.1543842678751047,
      "grad_norm": 3.25,
      "learning_rate": 2.5729856657203938e-05,
      "loss": 0.9532,
      "step": 44050
    },
    {
      "epoch": 0.1544193153820003,
      "grad_norm": 3.5625,
      "learning_rate": 2.5735697846937466e-05,
      "loss": 1.0903,
      "step": 44060
    },
    {
      "epoch": 0.15445436288889589,
      "grad_norm": 3.234375,
      "learning_rate": 2.574153903667099e-05,
      "loss": 1.0044,
      "step": 44070
    },
    {
      "epoch": 0.1544894103957915,
      "grad_norm": 3.375,
      "learning_rate": 2.5747380226404516e-05,
      "loss": 1.0252,
      "step": 44080
    },
    {
      "epoch": 0.1545244579026871,
      "grad_norm": 2.953125,
      "learning_rate": 2.575322141613804e-05,
      "loss": 1.008,
      "step": 44090
    },
    {
      "epoch": 0.1545595054095827,
      "grad_norm": 3.328125,
      "learning_rate": 2.5759062605871566e-05,
      "loss": 0.9758,
      "step": 44100
    },
    {
      "epoch": 0.15459455291647828,
      "grad_norm": 3.5,
      "learning_rate": 2.5764903795605087e-05,
      "loss": 1.081,
      "step": 44110
    },
    {
      "epoch": 0.15462960042337387,
      "grad_norm": 3.421875,
      "learning_rate": 2.5770744985338612e-05,
      "loss": 1.0261,
      "step": 44120
    },
    {
      "epoch": 0.1546646479302695,
      "grad_norm": 3.515625,
      "learning_rate": 2.5776586175072144e-05,
      "loss": 1.0981,
      "step": 44130
    },
    {
      "epoch": 0.15469969543716508,
      "grad_norm": 2.90625,
      "learning_rate": 2.578242736480567e-05,
      "loss": 0.9835,
      "step": 44140
    },
    {
      "epoch": 0.15473474294406067,
      "grad_norm": 3.28125,
      "learning_rate": 2.578826855453919e-05,
      "loss": 1.0624,
      "step": 44150
    },
    {
      "epoch": 0.15476979045095626,
      "grad_norm": 3.53125,
      "learning_rate": 2.5794109744272715e-05,
      "loss": 0.9609,
      "step": 44160
    },
    {
      "epoch": 0.15480483795785188,
      "grad_norm": 3.640625,
      "learning_rate": 2.579995093400624e-05,
      "loss": 0.9868,
      "step": 44170
    },
    {
      "epoch": 0.15483988546474747,
      "grad_norm": 2.953125,
      "learning_rate": 2.5805792123739765e-05,
      "loss": 1.0169,
      "step": 44180
    },
    {
      "epoch": 0.15487493297164306,
      "grad_norm": 3.546875,
      "learning_rate": 2.581163331347329e-05,
      "loss": 1.0287,
      "step": 44190
    },
    {
      "epoch": 0.15490998047853866,
      "grad_norm": 3.140625,
      "learning_rate": 2.581747450320681e-05,
      "loss": 0.9575,
      "step": 44200
    },
    {
      "epoch": 0.15494502798543425,
      "grad_norm": 2.96875,
      "learning_rate": 2.5823315692940342e-05,
      "loss": 0.918,
      "step": 44210
    },
    {
      "epoch": 0.15498007549232987,
      "grad_norm": 3.625,
      "learning_rate": 2.5829156882673867e-05,
      "loss": 0.9639,
      "step": 44220
    },
    {
      "epoch": 0.15501512299922546,
      "grad_norm": 3.28125,
      "learning_rate": 2.583499807240739e-05,
      "loss": 0.9847,
      "step": 44230
    },
    {
      "epoch": 0.15505017050612105,
      "grad_norm": 3.625,
      "learning_rate": 2.5840839262140914e-05,
      "loss": 1.0391,
      "step": 44240
    },
    {
      "epoch": 0.15508521801301664,
      "grad_norm": 3.40625,
      "learning_rate": 2.584668045187444e-05,
      "loss": 0.9909,
      "step": 44250
    },
    {
      "epoch": 0.15512026551991223,
      "grad_norm": 3.390625,
      "learning_rate": 2.5852521641607963e-05,
      "loss": 0.9914,
      "step": 44260
    },
    {
      "epoch": 0.15515531302680785,
      "grad_norm": 3.421875,
      "learning_rate": 2.5858362831341488e-05,
      "loss": 1.0846,
      "step": 44270
    },
    {
      "epoch": 0.15519036053370344,
      "grad_norm": 3.375,
      "learning_rate": 2.5864204021075013e-05,
      "loss": 1.092,
      "step": 44280
    },
    {
      "epoch": 0.15522540804059903,
      "grad_norm": 3.0625,
      "learning_rate": 2.587004521080854e-05,
      "loss": 1.0457,
      "step": 44290
    },
    {
      "epoch": 0.15526045554749462,
      "grad_norm": 3.25,
      "learning_rate": 2.5875886400542066e-05,
      "loss": 1.1162,
      "step": 44300
    },
    {
      "epoch": 0.15529550305439022,
      "grad_norm": 3.34375,
      "learning_rate": 2.588172759027559e-05,
      "loss": 1.0705,
      "step": 44310
    },
    {
      "epoch": 0.15533055056128584,
      "grad_norm": 3.390625,
      "learning_rate": 2.5887568780009112e-05,
      "loss": 0.9209,
      "step": 44320
    },
    {
      "epoch": 0.15536559806818143,
      "grad_norm": 2.953125,
      "learning_rate": 2.5893409969742637e-05,
      "loss": 1.0348,
      "step": 44330
    },
    {
      "epoch": 0.15540064557507702,
      "grad_norm": 3.296875,
      "learning_rate": 2.5899251159476162e-05,
      "loss": 1.0622,
      "step": 44340
    },
    {
      "epoch": 0.1554356930819726,
      "grad_norm": 3.453125,
      "learning_rate": 2.5905092349209687e-05,
      "loss": 1.0899,
      "step": 44350
    },
    {
      "epoch": 0.1554707405888682,
      "grad_norm": 3.125,
      "learning_rate": 2.5910933538943212e-05,
      "loss": 0.9813,
      "step": 44360
    },
    {
      "epoch": 0.15550578809576382,
      "grad_norm": 3.359375,
      "learning_rate": 2.591677472867674e-05,
      "loss": 1.0817,
      "step": 44370
    },
    {
      "epoch": 0.1555408356026594,
      "grad_norm": 3.46875,
      "learning_rate": 2.5922615918410265e-05,
      "loss": 1.0319,
      "step": 44380
    },
    {
      "epoch": 0.155575883109555,
      "grad_norm": 3.34375,
      "learning_rate": 2.592845710814379e-05,
      "loss": 1.0784,
      "step": 44390
    },
    {
      "epoch": 0.1556109306164506,
      "grad_norm": 3.1875,
      "learning_rate": 2.5934298297877314e-05,
      "loss": 1.0422,
      "step": 44400
    },
    {
      "epoch": 0.15564597812334618,
      "grad_norm": 3.125,
      "learning_rate": 2.5940139487610836e-05,
      "loss": 1.0149,
      "step": 44410
    },
    {
      "epoch": 0.1556810256302418,
      "grad_norm": 3.828125,
      "learning_rate": 2.594598067734436e-05,
      "loss": 1.1604,
      "step": 44420
    },
    {
      "epoch": 0.1557160731371374,
      "grad_norm": 3.0,
      "learning_rate": 2.5951821867077886e-05,
      "loss": 0.9386,
      "step": 44430
    },
    {
      "epoch": 0.155751120644033,
      "grad_norm": 3.171875,
      "learning_rate": 2.595766305681141e-05,
      "loss": 1.0563,
      "step": 44440
    },
    {
      "epoch": 0.15578616815092858,
      "grad_norm": 3.28125,
      "learning_rate": 2.596350424654494e-05,
      "loss": 1.0391,
      "step": 44450
    },
    {
      "epoch": 0.15582121565782417,
      "grad_norm": 3.328125,
      "learning_rate": 2.5969345436278464e-05,
      "loss": 1.0596,
      "step": 44460
    },
    {
      "epoch": 0.1558562631647198,
      "grad_norm": 2.875,
      "learning_rate": 2.597518662601199e-05,
      "loss": 0.9588,
      "step": 44470
    },
    {
      "epoch": 0.15589131067161538,
      "grad_norm": 3.71875,
      "learning_rate": 2.5981027815745513e-05,
      "loss": 0.985,
      "step": 44480
    },
    {
      "epoch": 0.15592635817851097,
      "grad_norm": 3.53125,
      "learning_rate": 2.5986869005479038e-05,
      "loss": 0.9995,
      "step": 44490
    },
    {
      "epoch": 0.15596140568540656,
      "grad_norm": 3.171875,
      "learning_rate": 2.599271019521256e-05,
      "loss": 1.0846,
      "step": 44500
    },
    {
      "epoch": 0.15599645319230215,
      "grad_norm": 3.421875,
      "learning_rate": 2.5998551384946084e-05,
      "loss": 1.0893,
      "step": 44510
    },
    {
      "epoch": 0.15603150069919777,
      "grad_norm": 3.078125,
      "learning_rate": 2.6004392574679616e-05,
      "loss": 0.9913,
      "step": 44520
    },
    {
      "epoch": 0.15606654820609336,
      "grad_norm": 3.078125,
      "learning_rate": 2.6010233764413137e-05,
      "loss": 1.0061,
      "step": 44530
    },
    {
      "epoch": 0.15610159571298896,
      "grad_norm": 3.515625,
      "learning_rate": 2.6016074954146662e-05,
      "loss": 1.0211,
      "step": 44540
    },
    {
      "epoch": 0.15613664321988455,
      "grad_norm": 3.546875,
      "learning_rate": 2.6021916143880187e-05,
      "loss": 0.9747,
      "step": 44550
    },
    {
      "epoch": 0.15617169072678014,
      "grad_norm": 3.609375,
      "learning_rate": 2.6027757333613712e-05,
      "loss": 0.9463,
      "step": 44560
    },
    {
      "epoch": 0.15620673823367576,
      "grad_norm": 3.171875,
      "learning_rate": 2.6033598523347237e-05,
      "loss": 1.0131,
      "step": 44570
    },
    {
      "epoch": 0.15624178574057135,
      "grad_norm": 2.984375,
      "learning_rate": 2.6039439713080758e-05,
      "loss": 1.0135,
      "step": 44580
    },
    {
      "epoch": 0.15627683324746694,
      "grad_norm": 3.28125,
      "learning_rate": 2.6045280902814283e-05,
      "loss": 1.0052,
      "step": 44590
    },
    {
      "epoch": 0.15631188075436253,
      "grad_norm": 3.078125,
      "learning_rate": 2.6051122092547815e-05,
      "loss": 0.9894,
      "step": 44600
    },
    {
      "epoch": 0.15634692826125812,
      "grad_norm": 3.140625,
      "learning_rate": 2.605696328228134e-05,
      "loss": 0.9877,
      "step": 44610
    },
    {
      "epoch": 0.15638197576815374,
      "grad_norm": 3.453125,
      "learning_rate": 2.606280447201486e-05,
      "loss": 1.0683,
      "step": 44620
    },
    {
      "epoch": 0.15641702327504933,
      "grad_norm": 3.28125,
      "learning_rate": 2.6068645661748386e-05,
      "loss": 0.9599,
      "step": 44630
    },
    {
      "epoch": 0.15645207078194492,
      "grad_norm": 3.140625,
      "learning_rate": 2.607448685148191e-05,
      "loss": 1.0046,
      "step": 44640
    },
    {
      "epoch": 0.15648711828884052,
      "grad_norm": 3.703125,
      "learning_rate": 2.6080328041215436e-05,
      "loss": 1.0424,
      "step": 44650
    },
    {
      "epoch": 0.1565221657957361,
      "grad_norm": 3.453125,
      "learning_rate": 2.608616923094896e-05,
      "loss": 1.1042,
      "step": 44660
    },
    {
      "epoch": 0.15655721330263173,
      "grad_norm": 3.609375,
      "learning_rate": 2.6092010420682482e-05,
      "loss": 1.0408,
      "step": 44670
    },
    {
      "epoch": 0.15659226080952732,
      "grad_norm": 3.5625,
      "learning_rate": 2.6097851610416014e-05,
      "loss": 0.9678,
      "step": 44680
    },
    {
      "epoch": 0.1566273083164229,
      "grad_norm": 3.140625,
      "learning_rate": 2.610369280014954e-05,
      "loss": 0.9964,
      "step": 44690
    },
    {
      "epoch": 0.1566623558233185,
      "grad_norm": 3.765625,
      "learning_rate": 2.6109533989883063e-05,
      "loss": 0.9997,
      "step": 44700
    },
    {
      "epoch": 0.15669740333021412,
      "grad_norm": 3.046875,
      "learning_rate": 2.6115375179616585e-05,
      "loss": 0.9617,
      "step": 44710
    },
    {
      "epoch": 0.1567324508371097,
      "grad_norm": 2.984375,
      "learning_rate": 2.612121636935011e-05,
      "loss": 0.9705,
      "step": 44720
    },
    {
      "epoch": 0.1567674983440053,
      "grad_norm": 3.21875,
      "learning_rate": 2.6127057559083634e-05,
      "loss": 1.0529,
      "step": 44730
    },
    {
      "epoch": 0.1568025458509009,
      "grad_norm": 3.359375,
      "learning_rate": 2.613289874881716e-05,
      "loss": 0.9925,
      "step": 44740
    },
    {
      "epoch": 0.15683759335779648,
      "grad_norm": 3.171875,
      "learning_rate": 2.6138739938550684e-05,
      "loss": 1.038,
      "step": 44750
    },
    {
      "epoch": 0.1568726408646921,
      "grad_norm": 3.25,
      "learning_rate": 2.6144581128284212e-05,
      "loss": 1.0212,
      "step": 44760
    },
    {
      "epoch": 0.1569076883715877,
      "grad_norm": 3.359375,
      "learning_rate": 2.6150422318017737e-05,
      "loss": 1.0699,
      "step": 44770
    },
    {
      "epoch": 0.15694273587848329,
      "grad_norm": 3.171875,
      "learning_rate": 2.6156263507751262e-05,
      "loss": 1.0152,
      "step": 44780
    },
    {
      "epoch": 0.15697778338537888,
      "grad_norm": 3.53125,
      "learning_rate": 2.6162104697484783e-05,
      "loss": 1.06,
      "step": 44790
    },
    {
      "epoch": 0.15701283089227447,
      "grad_norm": 3.421875,
      "learning_rate": 2.6167945887218308e-05,
      "loss": 1.1437,
      "step": 44800
    },
    {
      "epoch": 0.1570478783991701,
      "grad_norm": 3.03125,
      "learning_rate": 2.6173787076951833e-05,
      "loss": 0.9267,
      "step": 44810
    },
    {
      "epoch": 0.15708292590606568,
      "grad_norm": 3.65625,
      "learning_rate": 2.6179628266685358e-05,
      "loss": 1.0598,
      "step": 44820
    },
    {
      "epoch": 0.15711797341296127,
      "grad_norm": 3.375,
      "learning_rate": 2.6185469456418886e-05,
      "loss": 0.9529,
      "step": 44830
    },
    {
      "epoch": 0.15715302091985686,
      "grad_norm": 3.046875,
      "learning_rate": 2.619131064615241e-05,
      "loss": 0.9758,
      "step": 44840
    },
    {
      "epoch": 0.15718806842675245,
      "grad_norm": 3.3125,
      "learning_rate": 2.6197151835885936e-05,
      "loss": 0.9163,
      "step": 44850
    },
    {
      "epoch": 0.15722311593364807,
      "grad_norm": 3.125,
      "learning_rate": 2.620299302561946e-05,
      "loss": 1.0692,
      "step": 44860
    },
    {
      "epoch": 0.15725816344054366,
      "grad_norm": 3.5625,
      "learning_rate": 2.6208834215352986e-05,
      "loss": 1.0455,
      "step": 44870
    },
    {
      "epoch": 0.15729321094743925,
      "grad_norm": 3.328125,
      "learning_rate": 2.6214675405086507e-05,
      "loss": 1.0358,
      "step": 44880
    },
    {
      "epoch": 0.15732825845433485,
      "grad_norm": 3.53125,
      "learning_rate": 2.6220516594820032e-05,
      "loss": 0.9583,
      "step": 44890
    },
    {
      "epoch": 0.15736330596123044,
      "grad_norm": 3.671875,
      "learning_rate": 2.6226357784553557e-05,
      "loss": 1.0662,
      "step": 44900
    },
    {
      "epoch": 0.15739835346812606,
      "grad_norm": 3.265625,
      "learning_rate": 2.6232198974287085e-05,
      "loss": 1.0434,
      "step": 44910
    },
    {
      "epoch": 0.15743340097502165,
      "grad_norm": 3.609375,
      "learning_rate": 2.623804016402061e-05,
      "loss": 1.061,
      "step": 44920
    },
    {
      "epoch": 0.15746844848191724,
      "grad_norm": 3.53125,
      "learning_rate": 2.6243881353754135e-05,
      "loss": 0.9918,
      "step": 44930
    },
    {
      "epoch": 0.15750349598881283,
      "grad_norm": 3.796875,
      "learning_rate": 2.624972254348766e-05,
      "loss": 1.046,
      "step": 44940
    },
    {
      "epoch": 0.15753854349570842,
      "grad_norm": 3.359375,
      "learning_rate": 2.6255563733221184e-05,
      "loss": 1.0009,
      "step": 44950
    },
    {
      "epoch": 0.15757359100260404,
      "grad_norm": 2.984375,
      "learning_rate": 2.626140492295471e-05,
      "loss": 1.0056,
      "step": 44960
    },
    {
      "epoch": 0.15760863850949963,
      "grad_norm": 3.171875,
      "learning_rate": 2.626724611268823e-05,
      "loss": 1.1135,
      "step": 44970
    },
    {
      "epoch": 0.15764368601639522,
      "grad_norm": 3.703125,
      "learning_rate": 2.6273087302421755e-05,
      "loss": 1.0119,
      "step": 44980
    },
    {
      "epoch": 0.15767873352329081,
      "grad_norm": 3.359375,
      "learning_rate": 2.6278928492155287e-05,
      "loss": 0.974,
      "step": 44990
    },
    {
      "epoch": 0.1577137810301864,
      "grad_norm": 3.234375,
      "learning_rate": 2.628476968188881e-05,
      "loss": 1.0069,
      "step": 45000
    },
    {
      "epoch": 0.1577137810301864,
      "eval_loss": 0.9447451233863831,
      "eval_runtime": 562.5866,
      "eval_samples_per_second": 676.227,
      "eval_steps_per_second": 56.352,
      "step": 45000
    },
    {
      "epoch": 0.15774882853708203,
      "grad_norm": 3.3125,
      "learning_rate": 2.6290610871622333e-05,
      "loss": 1.0083,
      "step": 45010
    },
    {
      "epoch": 0.15778387604397762,
      "grad_norm": 3.0625,
      "learning_rate": 2.6296452061355858e-05,
      "loss": 1.0608,
      "step": 45020
    },
    {
      "epoch": 0.1578189235508732,
      "grad_norm": 3.5625,
      "learning_rate": 2.6302293251089383e-05,
      "loss": 1.0495,
      "step": 45030
    },
    {
      "epoch": 0.1578539710577688,
      "grad_norm": 2.953125,
      "learning_rate": 2.6308134440822908e-05,
      "loss": 0.9789,
      "step": 45040
    },
    {
      "epoch": 0.1578890185646644,
      "grad_norm": 2.984375,
      "learning_rate": 2.6313975630556433e-05,
      "loss": 0.9828,
      "step": 45050
    },
    {
      "epoch": 0.15792406607156,
      "grad_norm": 3.21875,
      "learning_rate": 2.6319816820289954e-05,
      "loss": 1.0088,
      "step": 45060
    },
    {
      "epoch": 0.1579591135784556,
      "grad_norm": 3.046875,
      "learning_rate": 2.6325658010023486e-05,
      "loss": 0.9942,
      "step": 45070
    },
    {
      "epoch": 0.1579941610853512,
      "grad_norm": 3.453125,
      "learning_rate": 2.633149919975701e-05,
      "loss": 0.9337,
      "step": 45080
    },
    {
      "epoch": 0.15802920859224678,
      "grad_norm": 3.34375,
      "learning_rate": 2.6337340389490532e-05,
      "loss": 0.9889,
      "step": 45090
    },
    {
      "epoch": 0.15806425609914238,
      "grad_norm": 3.15625,
      "learning_rate": 2.6343181579224057e-05,
      "loss": 1.0621,
      "step": 45100
    },
    {
      "epoch": 0.158099303606038,
      "grad_norm": 3.53125,
      "learning_rate": 2.6349022768957582e-05,
      "loss": 1.0145,
      "step": 45110
    },
    {
      "epoch": 0.15813435111293359,
      "grad_norm": 2.84375,
      "learning_rate": 2.6354863958691107e-05,
      "loss": 0.9058,
      "step": 45120
    },
    {
      "epoch": 0.15816939861982918,
      "grad_norm": 3.34375,
      "learning_rate": 2.636070514842463e-05,
      "loss": 1.0435,
      "step": 45130
    },
    {
      "epoch": 0.15820444612672477,
      "grad_norm": 3.078125,
      "learning_rate": 2.6366546338158153e-05,
      "loss": 0.9884,
      "step": 45140
    },
    {
      "epoch": 0.15823949363362036,
      "grad_norm": 4.21875,
      "learning_rate": 2.6372387527891685e-05,
      "loss": 1.0145,
      "step": 45150
    },
    {
      "epoch": 0.15827454114051598,
      "grad_norm": 3.203125,
      "learning_rate": 2.637822871762521e-05,
      "loss": 0.9908,
      "step": 45160
    },
    {
      "epoch": 0.15830958864741157,
      "grad_norm": 4.25,
      "learning_rate": 2.6384069907358734e-05,
      "loss": 0.9994,
      "step": 45170
    },
    {
      "epoch": 0.15834463615430716,
      "grad_norm": 2.796875,
      "learning_rate": 2.6389911097092256e-05,
      "loss": 1.046,
      "step": 45180
    },
    {
      "epoch": 0.15837968366120275,
      "grad_norm": 3.265625,
      "learning_rate": 2.639575228682578e-05,
      "loss": 0.9892,
      "step": 45190
    },
    {
      "epoch": 0.15841473116809834,
      "grad_norm": 3.21875,
      "learning_rate": 2.6401593476559305e-05,
      "loss": 1.0066,
      "step": 45200
    },
    {
      "epoch": 0.15844977867499396,
      "grad_norm": 3.34375,
      "learning_rate": 2.640743466629283e-05,
      "loss": 1.0839,
      "step": 45210
    },
    {
      "epoch": 0.15848482618188955,
      "grad_norm": 3.90625,
      "learning_rate": 2.641327585602636e-05,
      "loss": 1.0547,
      "step": 45220
    },
    {
      "epoch": 0.15851987368878515,
      "grad_norm": 3.125,
      "learning_rate": 2.6419117045759883e-05,
      "loss": 1.0769,
      "step": 45230
    },
    {
      "epoch": 0.15855492119568074,
      "grad_norm": 3.125,
      "learning_rate": 2.6424958235493408e-05,
      "loss": 1.0162,
      "step": 45240
    },
    {
      "epoch": 0.15858996870257636,
      "grad_norm": 3.453125,
      "learning_rate": 2.6430799425226933e-05,
      "loss": 1.0347,
      "step": 45250
    },
    {
      "epoch": 0.15862501620947195,
      "grad_norm": 3.484375,
      "learning_rate": 2.6436640614960458e-05,
      "loss": 0.985,
      "step": 45260
    },
    {
      "epoch": 0.15866006371636754,
      "grad_norm": 3.046875,
      "learning_rate": 2.644248180469398e-05,
      "loss": 0.9729,
      "step": 45270
    },
    {
      "epoch": 0.15869511122326313,
      "grad_norm": 3.25,
      "learning_rate": 2.6448322994427504e-05,
      "loss": 0.9862,
      "step": 45280
    },
    {
      "epoch": 0.15873015873015872,
      "grad_norm": 3.09375,
      "learning_rate": 2.645416418416103e-05,
      "loss": 1.0067,
      "step": 45290
    },
    {
      "epoch": 0.15876520623705434,
      "grad_norm": 3.296875,
      "learning_rate": 2.6460005373894557e-05,
      "loss": 1.0133,
      "step": 45300
    },
    {
      "epoch": 0.15880025374394993,
      "grad_norm": 3.296875,
      "learning_rate": 2.6465846563628082e-05,
      "loss": 0.9847,
      "step": 45310
    },
    {
      "epoch": 0.15883530125084552,
      "grad_norm": 2.765625,
      "learning_rate": 2.6471687753361607e-05,
      "loss": 0.9409,
      "step": 45320
    },
    {
      "epoch": 0.15887034875774111,
      "grad_norm": 3.140625,
      "learning_rate": 2.6477528943095132e-05,
      "loss": 1.063,
      "step": 45330
    },
    {
      "epoch": 0.1589053962646367,
      "grad_norm": 3.265625,
      "learning_rate": 2.6483370132828657e-05,
      "loss": 1.0809,
      "step": 45340
    },
    {
      "epoch": 0.15894044377153232,
      "grad_norm": 3.28125,
      "learning_rate": 2.6489211322562178e-05,
      "loss": 1.0183,
      "step": 45350
    },
    {
      "epoch": 0.15897549127842792,
      "grad_norm": 3.171875,
      "learning_rate": 2.6495052512295703e-05,
      "loss": 0.9842,
      "step": 45360
    },
    {
      "epoch": 0.1590105387853235,
      "grad_norm": 3.71875,
      "learning_rate": 2.6500893702029228e-05,
      "loss": 1.0185,
      "step": 45370
    },
    {
      "epoch": 0.1590455862922191,
      "grad_norm": 3.65625,
      "learning_rate": 2.650673489176276e-05,
      "loss": 1.1111,
      "step": 45380
    },
    {
      "epoch": 0.1590806337991147,
      "grad_norm": 3.796875,
      "learning_rate": 2.651257608149628e-05,
      "loss": 0.9782,
      "step": 45390
    },
    {
      "epoch": 0.1591156813060103,
      "grad_norm": 3.828125,
      "learning_rate": 2.6518417271229806e-05,
      "loss": 1.0485,
      "step": 45400
    },
    {
      "epoch": 0.1591507288129059,
      "grad_norm": 3.15625,
      "learning_rate": 2.652425846096333e-05,
      "loss": 1.0529,
      "step": 45410
    },
    {
      "epoch": 0.1591857763198015,
      "grad_norm": 2.953125,
      "learning_rate": 2.6530099650696855e-05,
      "loss": 1.1206,
      "step": 45420
    },
    {
      "epoch": 0.15922082382669708,
      "grad_norm": 3.375,
      "learning_rate": 2.653594084043038e-05,
      "loss": 1.0275,
      "step": 45430
    },
    {
      "epoch": 0.15925587133359267,
      "grad_norm": 3.09375,
      "learning_rate": 2.65417820301639e-05,
      "loss": 1.0188,
      "step": 45440
    },
    {
      "epoch": 0.1592909188404883,
      "grad_norm": 3.40625,
      "learning_rate": 2.6547623219897427e-05,
      "loss": 0.9252,
      "step": 45450
    },
    {
      "epoch": 0.15932596634738388,
      "grad_norm": 3.46875,
      "learning_rate": 2.6553464409630958e-05,
      "loss": 0.9483,
      "step": 45460
    },
    {
      "epoch": 0.15936101385427948,
      "grad_norm": 3.515625,
      "learning_rate": 2.655930559936448e-05,
      "loss": 1.0233,
      "step": 45470
    },
    {
      "epoch": 0.15939606136117507,
      "grad_norm": 3.453125,
      "learning_rate": 2.6565146789098004e-05,
      "loss": 0.99,
      "step": 45480
    },
    {
      "epoch": 0.15943110886807066,
      "grad_norm": 2.84375,
      "learning_rate": 2.657098797883153e-05,
      "loss": 1.0366,
      "step": 45490
    },
    {
      "epoch": 0.15946615637496628,
      "grad_norm": 3.546875,
      "learning_rate": 2.6576829168565054e-05,
      "loss": 0.9845,
      "step": 45500
    },
    {
      "epoch": 0.15950120388186187,
      "grad_norm": 3.203125,
      "learning_rate": 2.658267035829858e-05,
      "loss": 1.0065,
      "step": 45510
    },
    {
      "epoch": 0.15953625138875746,
      "grad_norm": 3.125,
      "learning_rate": 2.6588511548032104e-05,
      "loss": 1.038,
      "step": 45520
    },
    {
      "epoch": 0.15957129889565305,
      "grad_norm": 3.28125,
      "learning_rate": 2.6594352737765632e-05,
      "loss": 0.9792,
      "step": 45530
    },
    {
      "epoch": 0.15960634640254864,
      "grad_norm": 3.5625,
      "learning_rate": 2.6600193927499157e-05,
      "loss": 1.0103,
      "step": 45540
    },
    {
      "epoch": 0.15964139390944426,
      "grad_norm": 3.53125,
      "learning_rate": 2.6606035117232682e-05,
      "loss": 1.0737,
      "step": 45550
    },
    {
      "epoch": 0.15967644141633985,
      "grad_norm": 3.546875,
      "learning_rate": 2.6611876306966203e-05,
      "loss": 1.0021,
      "step": 45560
    },
    {
      "epoch": 0.15971148892323545,
      "grad_norm": 3.359375,
      "learning_rate": 2.6617717496699728e-05,
      "loss": 0.9505,
      "step": 45570
    },
    {
      "epoch": 0.15974653643013104,
      "grad_norm": 3.078125,
      "learning_rate": 2.6623558686433253e-05,
      "loss": 0.9954,
      "step": 45580
    },
    {
      "epoch": 0.15978158393702663,
      "grad_norm": 3.625,
      "learning_rate": 2.6629399876166778e-05,
      "loss": 0.9735,
      "step": 45590
    },
    {
      "epoch": 0.15981663144392225,
      "grad_norm": 3.359375,
      "learning_rate": 2.6635241065900303e-05,
      "loss": 1.0313,
      "step": 45600
    },
    {
      "epoch": 0.15985167895081784,
      "grad_norm": 3.328125,
      "learning_rate": 2.664108225563383e-05,
      "loss": 0.9955,
      "step": 45610
    },
    {
      "epoch": 0.15988672645771343,
      "grad_norm": 3.328125,
      "learning_rate": 2.6646923445367356e-05,
      "loss": 0.926,
      "step": 45620
    },
    {
      "epoch": 0.15992177396460902,
      "grad_norm": 3.125,
      "learning_rate": 2.665276463510088e-05,
      "loss": 0.9835,
      "step": 45630
    },
    {
      "epoch": 0.1599568214715046,
      "grad_norm": 3.40625,
      "learning_rate": 2.6658605824834405e-05,
      "loss": 1.0039,
      "step": 45640
    },
    {
      "epoch": 0.15999186897840023,
      "grad_norm": 3.359375,
      "learning_rate": 2.6664447014567927e-05,
      "loss": 0.9765,
      "step": 45650
    },
    {
      "epoch": 0.16002691648529582,
      "grad_norm": 3.59375,
      "learning_rate": 2.667028820430145e-05,
      "loss": 1.031,
      "step": 45660
    },
    {
      "epoch": 0.16006196399219141,
      "grad_norm": 3.609375,
      "learning_rate": 2.6676129394034976e-05,
      "loss": 0.9878,
      "step": 45670
    },
    {
      "epoch": 0.160097011499087,
      "grad_norm": 3.34375,
      "learning_rate": 2.66819705837685e-05,
      "loss": 0.9952,
      "step": 45680
    },
    {
      "epoch": 0.1601320590059826,
      "grad_norm": 3.234375,
      "learning_rate": 2.668781177350203e-05,
      "loss": 1.0052,
      "step": 45690
    },
    {
      "epoch": 0.16016710651287822,
      "grad_norm": 2.84375,
      "learning_rate": 2.6693652963235554e-05,
      "loss": 0.9787,
      "step": 45700
    },
    {
      "epoch": 0.1602021540197738,
      "grad_norm": 3.25,
      "learning_rate": 2.669949415296908e-05,
      "loss": 1.0527,
      "step": 45710
    },
    {
      "epoch": 0.1602372015266694,
      "grad_norm": 2.796875,
      "learning_rate": 2.6705335342702604e-05,
      "loss": 0.9379,
      "step": 45720
    },
    {
      "epoch": 0.160272249033565,
      "grad_norm": 2.9375,
      "learning_rate": 2.671117653243613e-05,
      "loss": 0.9675,
      "step": 45730
    },
    {
      "epoch": 0.16030729654046058,
      "grad_norm": 2.828125,
      "learning_rate": 2.671701772216965e-05,
      "loss": 0.9252,
      "step": 45740
    },
    {
      "epoch": 0.1603423440473562,
      "grad_norm": 2.6875,
      "learning_rate": 2.6722858911903175e-05,
      "loss": 0.9934,
      "step": 45750
    },
    {
      "epoch": 0.1603773915542518,
      "grad_norm": 3.59375,
      "learning_rate": 2.67287001016367e-05,
      "loss": 1.0901,
      "step": 45760
    },
    {
      "epoch": 0.16041243906114738,
      "grad_norm": 2.953125,
      "learning_rate": 2.673454129137023e-05,
      "loss": 1.0326,
      "step": 45770
    },
    {
      "epoch": 0.16044748656804297,
      "grad_norm": 2.984375,
      "learning_rate": 2.6740382481103753e-05,
      "loss": 1.0023,
      "step": 45780
    },
    {
      "epoch": 0.16048253407493857,
      "grad_norm": 3.125,
      "learning_rate": 2.6746223670837278e-05,
      "loss": 1.0384,
      "step": 45790
    },
    {
      "epoch": 0.16051758158183418,
      "grad_norm": 3.59375,
      "learning_rate": 2.6752064860570803e-05,
      "loss": 1.0277,
      "step": 45800
    },
    {
      "epoch": 0.16055262908872978,
      "grad_norm": 3.5625,
      "learning_rate": 2.6757906050304328e-05,
      "loss": 0.9773,
      "step": 45810
    },
    {
      "epoch": 0.16058767659562537,
      "grad_norm": 3.03125,
      "learning_rate": 2.676374724003785e-05,
      "loss": 0.9849,
      "step": 45820
    },
    {
      "epoch": 0.16062272410252096,
      "grad_norm": 3.078125,
      "learning_rate": 2.6769588429771374e-05,
      "loss": 0.966,
      "step": 45830
    },
    {
      "epoch": 0.16065777160941658,
      "grad_norm": 3.5,
      "learning_rate": 2.6775429619504906e-05,
      "loss": 1.0444,
      "step": 45840
    },
    {
      "epoch": 0.16069281911631217,
      "grad_norm": 3.234375,
      "learning_rate": 2.678127080923843e-05,
      "loss": 1.0395,
      "step": 45850
    },
    {
      "epoch": 0.16072786662320776,
      "grad_norm": 3.125,
      "learning_rate": 2.6787111998971952e-05,
      "loss": 1.0223,
      "step": 45860
    },
    {
      "epoch": 0.16076291413010335,
      "grad_norm": 3.375,
      "learning_rate": 2.6792953188705477e-05,
      "loss": 1.0064,
      "step": 45870
    },
    {
      "epoch": 0.16079796163699894,
      "grad_norm": 3.21875,
      "learning_rate": 2.6798794378439e-05,
      "loss": 0.9495,
      "step": 45880
    },
    {
      "epoch": 0.16083300914389456,
      "grad_norm": 3.375,
      "learning_rate": 2.6804635568172526e-05,
      "loss": 0.9996,
      "step": 45890
    },
    {
      "epoch": 0.16086805665079015,
      "grad_norm": 3.109375,
      "learning_rate": 2.681047675790605e-05,
      "loss": 0.9787,
      "step": 45900
    },
    {
      "epoch": 0.16090310415768574,
      "grad_norm": 3.296875,
      "learning_rate": 2.6816317947639573e-05,
      "loss": 0.9793,
      "step": 45910
    },
    {
      "epoch": 0.16093815166458134,
      "grad_norm": 2.9375,
      "learning_rate": 2.6822159137373104e-05,
      "loss": 1.0142,
      "step": 45920
    },
    {
      "epoch": 0.16097319917147693,
      "grad_norm": 3.25,
      "learning_rate": 2.682800032710663e-05,
      "loss": 1.0792,
      "step": 45930
    },
    {
      "epoch": 0.16100824667837255,
      "grad_norm": 3.984375,
      "learning_rate": 2.6833841516840154e-05,
      "loss": 0.9973,
      "step": 45940
    },
    {
      "epoch": 0.16104329418526814,
      "grad_norm": 3.171875,
      "learning_rate": 2.6839682706573676e-05,
      "loss": 0.9679,
      "step": 45950
    },
    {
      "epoch": 0.16107834169216373,
      "grad_norm": 2.84375,
      "learning_rate": 2.68455238963072e-05,
      "loss": 1.0203,
      "step": 45960
    },
    {
      "epoch": 0.16111338919905932,
      "grad_norm": 3.203125,
      "learning_rate": 2.6851365086040725e-05,
      "loss": 0.9189,
      "step": 45970
    },
    {
      "epoch": 0.1611484367059549,
      "grad_norm": 3.453125,
      "learning_rate": 2.685720627577425e-05,
      "loss": 1.0345,
      "step": 45980
    },
    {
      "epoch": 0.16118348421285053,
      "grad_norm": 3.390625,
      "learning_rate": 2.6863047465507775e-05,
      "loss": 0.9924,
      "step": 45990
    },
    {
      "epoch": 0.16121853171974612,
      "grad_norm": 3.15625,
      "learning_rate": 2.6868888655241303e-05,
      "loss": 1.0081,
      "step": 46000
    },
    {
      "epoch": 0.1612535792266417,
      "grad_norm": 3.046875,
      "learning_rate": 2.6874729844974828e-05,
      "loss": 0.9935,
      "step": 46010
    },
    {
      "epoch": 0.1612886267335373,
      "grad_norm": 3.453125,
      "learning_rate": 2.6880571034708353e-05,
      "loss": 1.0361,
      "step": 46020
    },
    {
      "epoch": 0.1613236742404329,
      "grad_norm": 3.453125,
      "learning_rate": 2.6886412224441874e-05,
      "loss": 0.9952,
      "step": 46030
    },
    {
      "epoch": 0.16135872174732852,
      "grad_norm": 3.375,
      "learning_rate": 2.68922534141754e-05,
      "loss": 1.0128,
      "step": 46040
    },
    {
      "epoch": 0.1613937692542241,
      "grad_norm": 3.421875,
      "learning_rate": 2.6898094603908924e-05,
      "loss": 1.0929,
      "step": 46050
    },
    {
      "epoch": 0.1614288167611197,
      "grad_norm": 3.53125,
      "learning_rate": 2.690393579364245e-05,
      "loss": 0.9639,
      "step": 46060
    },
    {
      "epoch": 0.1614638642680153,
      "grad_norm": 3.484375,
      "learning_rate": 2.6909776983375974e-05,
      "loss": 1.0791,
      "step": 46070
    },
    {
      "epoch": 0.16149891177491088,
      "grad_norm": 3.78125,
      "learning_rate": 2.6915618173109502e-05,
      "loss": 0.9959,
      "step": 46080
    },
    {
      "epoch": 0.1615339592818065,
      "grad_norm": 3.53125,
      "learning_rate": 2.6921459362843027e-05,
      "loss": 0.9415,
      "step": 46090
    },
    {
      "epoch": 0.1615690067887021,
      "grad_norm": 3.046875,
      "learning_rate": 2.692730055257655e-05,
      "loss": 0.9284,
      "step": 46100
    },
    {
      "epoch": 0.16160405429559768,
      "grad_norm": 3.453125,
      "learning_rate": 2.6933141742310076e-05,
      "loss": 1.0065,
      "step": 46110
    },
    {
      "epoch": 0.16163910180249327,
      "grad_norm": 3.71875,
      "learning_rate": 2.6938982932043598e-05,
      "loss": 0.9465,
      "step": 46120
    },
    {
      "epoch": 0.16167414930938886,
      "grad_norm": 3.5625,
      "learning_rate": 2.6944824121777123e-05,
      "loss": 1.0505,
      "step": 46130
    },
    {
      "epoch": 0.16170919681628448,
      "grad_norm": 3.125,
      "learning_rate": 2.6950665311510648e-05,
      "loss": 1.0007,
      "step": 46140
    },
    {
      "epoch": 0.16174424432318008,
      "grad_norm": 3.578125,
      "learning_rate": 2.6956506501244172e-05,
      "loss": 1.0079,
      "step": 46150
    },
    {
      "epoch": 0.16177929183007567,
      "grad_norm": 3.15625,
      "learning_rate": 2.69623476909777e-05,
      "loss": 0.9728,
      "step": 46160
    },
    {
      "epoch": 0.16181433933697126,
      "grad_norm": 3.421875,
      "learning_rate": 2.6968188880711225e-05,
      "loss": 0.9571,
      "step": 46170
    },
    {
      "epoch": 0.16184938684386685,
      "grad_norm": 4.0,
      "learning_rate": 2.697403007044475e-05,
      "loss": 1.0625,
      "step": 46180
    },
    {
      "epoch": 0.16188443435076247,
      "grad_norm": 3.734375,
      "learning_rate": 2.6979871260178275e-05,
      "loss": 1.0196,
      "step": 46190
    },
    {
      "epoch": 0.16191948185765806,
      "grad_norm": 3.453125,
      "learning_rate": 2.69857124499118e-05,
      "loss": 1.0501,
      "step": 46200
    },
    {
      "epoch": 0.16195452936455365,
      "grad_norm": 3.328125,
      "learning_rate": 2.699155363964532e-05,
      "loss": 1.0133,
      "step": 46210
    },
    {
      "epoch": 0.16198957687144924,
      "grad_norm": 3.421875,
      "learning_rate": 2.6997394829378846e-05,
      "loss": 1.0213,
      "step": 46220
    },
    {
      "epoch": 0.16202462437834483,
      "grad_norm": 3.453125,
      "learning_rate": 2.7003236019112378e-05,
      "loss": 1.0211,
      "step": 46230
    },
    {
      "epoch": 0.16205967188524045,
      "grad_norm": 3.140625,
      "learning_rate": 2.70090772088459e-05,
      "loss": 0.9838,
      "step": 46240
    },
    {
      "epoch": 0.16209471939213604,
      "grad_norm": 3.421875,
      "learning_rate": 2.7014918398579424e-05,
      "loss": 1.0683,
      "step": 46250
    },
    {
      "epoch": 0.16212976689903164,
      "grad_norm": 3.359375,
      "learning_rate": 2.702075958831295e-05,
      "loss": 0.9734,
      "step": 46260
    },
    {
      "epoch": 0.16216481440592723,
      "grad_norm": 3.40625,
      "learning_rate": 2.7026600778046474e-05,
      "loss": 0.9949,
      "step": 46270
    },
    {
      "epoch": 0.16219986191282282,
      "grad_norm": 3.375,
      "learning_rate": 2.703244196778e-05,
      "loss": 0.9284,
      "step": 46280
    },
    {
      "epoch": 0.16223490941971844,
      "grad_norm": 3.25,
      "learning_rate": 2.7038283157513524e-05,
      "loss": 0.9573,
      "step": 46290
    },
    {
      "epoch": 0.16226995692661403,
      "grad_norm": 3.25,
      "learning_rate": 2.7044124347247045e-05,
      "loss": 0.9356,
      "step": 46300
    },
    {
      "epoch": 0.16230500443350962,
      "grad_norm": 3.34375,
      "learning_rate": 2.7049965536980577e-05,
      "loss": 1.0812,
      "step": 46310
    },
    {
      "epoch": 0.1623400519404052,
      "grad_norm": 3.03125,
      "learning_rate": 2.70558067267141e-05,
      "loss": 0.994,
      "step": 46320
    },
    {
      "epoch": 0.1623750994473008,
      "grad_norm": 3.21875,
      "learning_rate": 2.7061647916447623e-05,
      "loss": 1.0062,
      "step": 46330
    },
    {
      "epoch": 0.16241014695419642,
      "grad_norm": 3.296875,
      "learning_rate": 2.7067489106181148e-05,
      "loss": 0.9853,
      "step": 46340
    },
    {
      "epoch": 0.162445194461092,
      "grad_norm": 3.40625,
      "learning_rate": 2.7073330295914673e-05,
      "loss": 0.98,
      "step": 46350
    },
    {
      "epoch": 0.1624802419679876,
      "grad_norm": 3.1875,
      "learning_rate": 2.7079171485648198e-05,
      "loss": 0.9925,
      "step": 46360
    },
    {
      "epoch": 0.1625152894748832,
      "grad_norm": 3.296875,
      "learning_rate": 2.7085012675381722e-05,
      "loss": 1.0498,
      "step": 46370
    },
    {
      "epoch": 0.16255033698177881,
      "grad_norm": 3.75,
      "learning_rate": 2.7090853865115244e-05,
      "loss": 0.9686,
      "step": 46380
    },
    {
      "epoch": 0.1625853844886744,
      "grad_norm": 2.921875,
      "learning_rate": 2.7096695054848775e-05,
      "loss": 0.9755,
      "step": 46390
    },
    {
      "epoch": 0.16262043199557,
      "grad_norm": 3.015625,
      "learning_rate": 2.71025362445823e-05,
      "loss": 0.8992,
      "step": 46400
    },
    {
      "epoch": 0.1626554795024656,
      "grad_norm": 3.34375,
      "learning_rate": 2.7108377434315825e-05,
      "loss": 0.9465,
      "step": 46410
    },
    {
      "epoch": 0.16269052700936118,
      "grad_norm": 3.140625,
      "learning_rate": 2.7114218624049347e-05,
      "loss": 1.0815,
      "step": 46420
    },
    {
      "epoch": 0.1627255745162568,
      "grad_norm": 3.1875,
      "learning_rate": 2.712005981378287e-05,
      "loss": 1.0748,
      "step": 46430
    },
    {
      "epoch": 0.1627606220231524,
      "grad_norm": 3.234375,
      "learning_rate": 2.7125901003516396e-05,
      "loss": 1.0096,
      "step": 46440
    },
    {
      "epoch": 0.16279566953004798,
      "grad_norm": 3.046875,
      "learning_rate": 2.713174219324992e-05,
      "loss": 0.9328,
      "step": 46450
    },
    {
      "epoch": 0.16283071703694357,
      "grad_norm": 3.453125,
      "learning_rate": 2.7137583382983446e-05,
      "loss": 1.0297,
      "step": 46460
    },
    {
      "epoch": 0.16286576454383916,
      "grad_norm": 3.59375,
      "learning_rate": 2.7143424572716974e-05,
      "loss": 1.026,
      "step": 46470
    },
    {
      "epoch": 0.16290081205073478,
      "grad_norm": 3.515625,
      "learning_rate": 2.71492657624505e-05,
      "loss": 0.9942,
      "step": 46480
    },
    {
      "epoch": 0.16293585955763037,
      "grad_norm": 3.5625,
      "learning_rate": 2.7155106952184024e-05,
      "loss": 1.0892,
      "step": 46490
    },
    {
      "epoch": 0.16297090706452597,
      "grad_norm": 3.46875,
      "learning_rate": 2.716094814191755e-05,
      "loss": 0.9519,
      "step": 46500
    },
    {
      "epoch": 0.16300595457142156,
      "grad_norm": 3.046875,
      "learning_rate": 2.716678933165107e-05,
      "loss": 1.0445,
      "step": 46510
    },
    {
      "epoch": 0.16304100207831715,
      "grad_norm": 3.75,
      "learning_rate": 2.7172630521384595e-05,
      "loss": 1.0336,
      "step": 46520
    },
    {
      "epoch": 0.16307604958521277,
      "grad_norm": 3.34375,
      "learning_rate": 2.717847171111812e-05,
      "loss": 1.0355,
      "step": 46530
    },
    {
      "epoch": 0.16311109709210836,
      "grad_norm": 3.421875,
      "learning_rate": 2.7184312900851648e-05,
      "loss": 0.9985,
      "step": 46540
    },
    {
      "epoch": 0.16314614459900395,
      "grad_norm": 3.734375,
      "learning_rate": 2.7190154090585173e-05,
      "loss": 1.0463,
      "step": 46550
    },
    {
      "epoch": 0.16318119210589954,
      "grad_norm": 3.140625,
      "learning_rate": 2.7195995280318698e-05,
      "loss": 0.939,
      "step": 46560
    },
    {
      "epoch": 0.16321623961279513,
      "grad_norm": 3.15625,
      "learning_rate": 2.7201836470052223e-05,
      "loss": 1.0799,
      "step": 46570
    },
    {
      "epoch": 0.16325128711969075,
      "grad_norm": 3.21875,
      "learning_rate": 2.7207677659785748e-05,
      "loss": 1.0071,
      "step": 46580
    },
    {
      "epoch": 0.16328633462658634,
      "grad_norm": 3.546875,
      "learning_rate": 2.721351884951927e-05,
      "loss": 0.992,
      "step": 46590
    },
    {
      "epoch": 0.16332138213348193,
      "grad_norm": 3.265625,
      "learning_rate": 2.7219360039252794e-05,
      "loss": 0.9842,
      "step": 46600
    },
    {
      "epoch": 0.16335642964037753,
      "grad_norm": 3.515625,
      "learning_rate": 2.722520122898632e-05,
      "loss": 1.0379,
      "step": 46610
    },
    {
      "epoch": 0.16339147714727312,
      "grad_norm": 3.046875,
      "learning_rate": 2.723104241871985e-05,
      "loss": 1.0316,
      "step": 46620
    },
    {
      "epoch": 0.16342652465416874,
      "grad_norm": 3.328125,
      "learning_rate": 2.7236883608453372e-05,
      "loss": 0.957,
      "step": 46630
    },
    {
      "epoch": 0.16346157216106433,
      "grad_norm": 3.71875,
      "learning_rate": 2.7242724798186897e-05,
      "loss": 1.013,
      "step": 46640
    },
    {
      "epoch": 0.16349661966795992,
      "grad_norm": 3.09375,
      "learning_rate": 2.724856598792042e-05,
      "loss": 0.8805,
      "step": 46650
    },
    {
      "epoch": 0.1635316671748555,
      "grad_norm": 3.171875,
      "learning_rate": 2.7254407177653946e-05,
      "loss": 1.088,
      "step": 46660
    },
    {
      "epoch": 0.1635667146817511,
      "grad_norm": 3.046875,
      "learning_rate": 2.726024836738747e-05,
      "loss": 1.0345,
      "step": 46670
    },
    {
      "epoch": 0.16360176218864672,
      "grad_norm": 3.546875,
      "learning_rate": 2.7266089557120993e-05,
      "loss": 1.0722,
      "step": 46680
    },
    {
      "epoch": 0.1636368096955423,
      "grad_norm": 3.3125,
      "learning_rate": 2.7271930746854517e-05,
      "loss": 1.0249,
      "step": 46690
    },
    {
      "epoch": 0.1636718572024379,
      "grad_norm": 3.140625,
      "learning_rate": 2.727777193658805e-05,
      "loss": 0.9781,
      "step": 46700
    },
    {
      "epoch": 0.1637069047093335,
      "grad_norm": 3.546875,
      "learning_rate": 2.728361312632157e-05,
      "loss": 1.0429,
      "step": 46710
    },
    {
      "epoch": 0.1637419522162291,
      "grad_norm": 2.984375,
      "learning_rate": 2.7289454316055095e-05,
      "loss": 0.9874,
      "step": 46720
    },
    {
      "epoch": 0.1637769997231247,
      "grad_norm": 3.359375,
      "learning_rate": 2.729529550578862e-05,
      "loss": 0.9998,
      "step": 46730
    },
    {
      "epoch": 0.1638120472300203,
      "grad_norm": 3.046875,
      "learning_rate": 2.7301136695522145e-05,
      "loss": 0.9785,
      "step": 46740
    },
    {
      "epoch": 0.1638470947369159,
      "grad_norm": 3.1875,
      "learning_rate": 2.730697788525567e-05,
      "loss": 1.0505,
      "step": 46750
    },
    {
      "epoch": 0.16388214224381148,
      "grad_norm": 3.171875,
      "learning_rate": 2.7312819074989195e-05,
      "loss": 1.0743,
      "step": 46760
    },
    {
      "epoch": 0.16391718975070707,
      "grad_norm": 3.328125,
      "learning_rate": 2.7318660264722716e-05,
      "loss": 1.061,
      "step": 46770
    },
    {
      "epoch": 0.1639522372576027,
      "grad_norm": 3.796875,
      "learning_rate": 2.7324501454456248e-05,
      "loss": 1.0608,
      "step": 46780
    },
    {
      "epoch": 0.16398728476449828,
      "grad_norm": 3.1875,
      "learning_rate": 2.7330342644189773e-05,
      "loss": 1.0942,
      "step": 46790
    },
    {
      "epoch": 0.16402233227139387,
      "grad_norm": 3.4375,
      "learning_rate": 2.7336183833923294e-05,
      "loss": 0.9932,
      "step": 46800
    },
    {
      "epoch": 0.16405737977828946,
      "grad_norm": 2.921875,
      "learning_rate": 2.734202502365682e-05,
      "loss": 1.0743,
      "step": 46810
    },
    {
      "epoch": 0.16409242728518506,
      "grad_norm": 3.359375,
      "learning_rate": 2.7347866213390344e-05,
      "loss": 1.0654,
      "step": 46820
    },
    {
      "epoch": 0.16412747479208067,
      "grad_norm": 3.328125,
      "learning_rate": 2.735370740312387e-05,
      "loss": 1.0188,
      "step": 46830
    },
    {
      "epoch": 0.16416252229897627,
      "grad_norm": 3.609375,
      "learning_rate": 2.7359548592857393e-05,
      "loss": 1.0065,
      "step": 46840
    },
    {
      "epoch": 0.16419756980587186,
      "grad_norm": 3.3125,
      "learning_rate": 2.736538978259092e-05,
      "loss": 0.9682,
      "step": 46850
    },
    {
      "epoch": 0.16423261731276745,
      "grad_norm": 3.734375,
      "learning_rate": 2.7371230972324447e-05,
      "loss": 1.0054,
      "step": 46860
    },
    {
      "epoch": 0.16426766481966304,
      "grad_norm": 3.359375,
      "learning_rate": 2.737707216205797e-05,
      "loss": 0.9917,
      "step": 46870
    },
    {
      "epoch": 0.16430271232655866,
      "grad_norm": 3.296875,
      "learning_rate": 2.7382913351791496e-05,
      "loss": 0.9381,
      "step": 46880
    },
    {
      "epoch": 0.16433775983345425,
      "grad_norm": 3.828125,
      "learning_rate": 2.7388754541525018e-05,
      "loss": 1.0608,
      "step": 46890
    },
    {
      "epoch": 0.16437280734034984,
      "grad_norm": 3.421875,
      "learning_rate": 2.7394595731258543e-05,
      "loss": 1.013,
      "step": 46900
    },
    {
      "epoch": 0.16440785484724543,
      "grad_norm": 3.328125,
      "learning_rate": 2.7400436920992067e-05,
      "loss": 0.9822,
      "step": 46910
    },
    {
      "epoch": 0.16444290235414105,
      "grad_norm": 3.359375,
      "learning_rate": 2.7406278110725592e-05,
      "loss": 1.0269,
      "step": 46920
    },
    {
      "epoch": 0.16447794986103664,
      "grad_norm": 3.078125,
      "learning_rate": 2.741211930045912e-05,
      "loss": 0.9622,
      "step": 46930
    },
    {
      "epoch": 0.16451299736793223,
      "grad_norm": 3.6875,
      "learning_rate": 2.7417960490192645e-05,
      "loss": 1.0352,
      "step": 46940
    },
    {
      "epoch": 0.16454804487482783,
      "grad_norm": 2.625,
      "learning_rate": 2.742380167992617e-05,
      "loss": 1.0388,
      "step": 46950
    },
    {
      "epoch": 0.16458309238172342,
      "grad_norm": 2.890625,
      "learning_rate": 2.7429642869659695e-05,
      "loss": 0.9488,
      "step": 46960
    },
    {
      "epoch": 0.16461813988861904,
      "grad_norm": 3.21875,
      "learning_rate": 2.743548405939322e-05,
      "loss": 0.9935,
      "step": 46970
    },
    {
      "epoch": 0.16465318739551463,
      "grad_norm": 3.1875,
      "learning_rate": 2.744132524912674e-05,
      "loss": 0.9949,
      "step": 46980
    },
    {
      "epoch": 0.16468823490241022,
      "grad_norm": 3.328125,
      "learning_rate": 2.7447166438860266e-05,
      "loss": 1.0673,
      "step": 46990
    },
    {
      "epoch": 0.1647232824093058,
      "grad_norm": 3.34375,
      "learning_rate": 2.745300762859379e-05,
      "loss": 1.0125,
      "step": 47000
    },
    {
      "epoch": 0.1647583299162014,
      "grad_norm": 3.46875,
      "learning_rate": 2.745884881832732e-05,
      "loss": 1.0483,
      "step": 47010
    },
    {
      "epoch": 0.16479337742309702,
      "grad_norm": 3.078125,
      "learning_rate": 2.7464690008060844e-05,
      "loss": 1.0867,
      "step": 47020
    },
    {
      "epoch": 0.1648284249299926,
      "grad_norm": 3.15625,
      "learning_rate": 2.747053119779437e-05,
      "loss": 0.9378,
      "step": 47030
    },
    {
      "epoch": 0.1648634724368882,
      "grad_norm": 2.796875,
      "learning_rate": 2.7476372387527894e-05,
      "loss": 0.9189,
      "step": 47040
    },
    {
      "epoch": 0.1648985199437838,
      "grad_norm": 3.46875,
      "learning_rate": 2.748221357726142e-05,
      "loss": 1.0126,
      "step": 47050
    },
    {
      "epoch": 0.16493356745067939,
      "grad_norm": 3.4375,
      "learning_rate": 2.7488054766994943e-05,
      "loss": 1.0509,
      "step": 47060
    },
    {
      "epoch": 0.164968614957575,
      "grad_norm": 2.828125,
      "learning_rate": 2.7493895956728465e-05,
      "loss": 0.9387,
      "step": 47070
    },
    {
      "epoch": 0.1650036624644706,
      "grad_norm": 3.53125,
      "learning_rate": 2.749973714646199e-05,
      "loss": 0.9436,
      "step": 47080
    },
    {
      "epoch": 0.1650387099713662,
      "grad_norm": 3.8125,
      "learning_rate": 2.750557833619552e-05,
      "loss": 1.0062,
      "step": 47090
    },
    {
      "epoch": 0.16507375747826178,
      "grad_norm": 3.234375,
      "learning_rate": 2.7511419525929043e-05,
      "loss": 1.0485,
      "step": 47100
    },
    {
      "epoch": 0.16510880498515737,
      "grad_norm": 3.546875,
      "learning_rate": 2.7517260715662568e-05,
      "loss": 0.9513,
      "step": 47110
    },
    {
      "epoch": 0.165143852492053,
      "grad_norm": 3.296875,
      "learning_rate": 2.7523101905396093e-05,
      "loss": 1.0393,
      "step": 47120
    },
    {
      "epoch": 0.16517889999894858,
      "grad_norm": 3.09375,
      "learning_rate": 2.7528943095129617e-05,
      "loss": 0.9587,
      "step": 47130
    },
    {
      "epoch": 0.16521394750584417,
      "grad_norm": 3.078125,
      "learning_rate": 2.7534784284863142e-05,
      "loss": 0.9647,
      "step": 47140
    },
    {
      "epoch": 0.16524899501273976,
      "grad_norm": 3.25,
      "learning_rate": 2.7540625474596664e-05,
      "loss": 1.0084,
      "step": 47150
    },
    {
      "epoch": 0.16528404251963535,
      "grad_norm": 3.515625,
      "learning_rate": 2.754646666433019e-05,
      "loss": 1.0407,
      "step": 47160
    },
    {
      "epoch": 0.16531909002653097,
      "grad_norm": 3.265625,
      "learning_rate": 2.755230785406372e-05,
      "loss": 1.0436,
      "step": 47170
    },
    {
      "epoch": 0.16535413753342657,
      "grad_norm": 3.6875,
      "learning_rate": 2.7558149043797245e-05,
      "loss": 1.0934,
      "step": 47180
    },
    {
      "epoch": 0.16538918504032216,
      "grad_norm": 3.6875,
      "learning_rate": 2.7563990233530766e-05,
      "loss": 0.9221,
      "step": 47190
    },
    {
      "epoch": 0.16542423254721775,
      "grad_norm": 3.203125,
      "learning_rate": 2.756983142326429e-05,
      "loss": 1.0042,
      "step": 47200
    },
    {
      "epoch": 0.16545928005411334,
      "grad_norm": 3.5,
      "learning_rate": 2.7575672612997816e-05,
      "loss": 1.0242,
      "step": 47210
    },
    {
      "epoch": 0.16549432756100896,
      "grad_norm": 3.578125,
      "learning_rate": 2.758151380273134e-05,
      "loss": 0.9686,
      "step": 47220
    },
    {
      "epoch": 0.16552937506790455,
      "grad_norm": 3.171875,
      "learning_rate": 2.7587354992464866e-05,
      "loss": 1.1186,
      "step": 47230
    },
    {
      "epoch": 0.16556442257480014,
      "grad_norm": 2.90625,
      "learning_rate": 2.7593196182198394e-05,
      "loss": 1.016,
      "step": 47240
    },
    {
      "epoch": 0.16559947008169573,
      "grad_norm": 3.21875,
      "learning_rate": 2.759903737193192e-05,
      "loss": 0.9403,
      "step": 47250
    },
    {
      "epoch": 0.16563451758859132,
      "grad_norm": 3.125,
      "learning_rate": 2.7604878561665444e-05,
      "loss": 0.9805,
      "step": 47260
    },
    {
      "epoch": 0.16566956509548694,
      "grad_norm": 3.703125,
      "learning_rate": 2.7610719751398965e-05,
      "loss": 1.0332,
      "step": 47270
    },
    {
      "epoch": 0.16570461260238253,
      "grad_norm": 2.65625,
      "learning_rate": 2.761656094113249e-05,
      "loss": 0.9475,
      "step": 47280
    },
    {
      "epoch": 0.16573966010927813,
      "grad_norm": 3.46875,
      "learning_rate": 2.7622402130866015e-05,
      "loss": 1.0035,
      "step": 47290
    },
    {
      "epoch": 0.16577470761617372,
      "grad_norm": 2.953125,
      "learning_rate": 2.762824332059954e-05,
      "loss": 0.9699,
      "step": 47300
    },
    {
      "epoch": 0.1658097551230693,
      "grad_norm": 3.015625,
      "learning_rate": 2.7634084510333065e-05,
      "loss": 0.9678,
      "step": 47310
    },
    {
      "epoch": 0.16584480262996493,
      "grad_norm": 3.171875,
      "learning_rate": 2.7639925700066593e-05,
      "loss": 1.0166,
      "step": 47320
    },
    {
      "epoch": 0.16587985013686052,
      "grad_norm": 3.53125,
      "learning_rate": 2.7645766889800118e-05,
      "loss": 0.9839,
      "step": 47330
    },
    {
      "epoch": 0.1659148976437561,
      "grad_norm": 3.1875,
      "learning_rate": 2.7651608079533642e-05,
      "loss": 1.0472,
      "step": 47340
    },
    {
      "epoch": 0.1659499451506517,
      "grad_norm": 3.53125,
      "learning_rate": 2.7657449269267167e-05,
      "loss": 1.0088,
      "step": 47350
    },
    {
      "epoch": 0.1659849926575473,
      "grad_norm": 3.75,
      "learning_rate": 2.766329045900069e-05,
      "loss": 0.9996,
      "step": 47360
    },
    {
      "epoch": 0.1660200401644429,
      "grad_norm": 3.578125,
      "learning_rate": 2.7669131648734214e-05,
      "loss": 1.1685,
      "step": 47370
    },
    {
      "epoch": 0.1660550876713385,
      "grad_norm": 3.140625,
      "learning_rate": 2.767497283846774e-05,
      "loss": 0.9534,
      "step": 47380
    },
    {
      "epoch": 0.1660901351782341,
      "grad_norm": 3.5,
      "learning_rate": 2.7680814028201263e-05,
      "loss": 0.9858,
      "step": 47390
    },
    {
      "epoch": 0.16612518268512969,
      "grad_norm": 3.046875,
      "learning_rate": 2.768665521793479e-05,
      "loss": 0.9495,
      "step": 47400
    },
    {
      "epoch": 0.16616023019202528,
      "grad_norm": 3.21875,
      "learning_rate": 2.7692496407668316e-05,
      "loss": 0.9874,
      "step": 47410
    },
    {
      "epoch": 0.1661952776989209,
      "grad_norm": 3.28125,
      "learning_rate": 2.769833759740184e-05,
      "loss": 1.0175,
      "step": 47420
    },
    {
      "epoch": 0.1662303252058165,
      "grad_norm": 3.484375,
      "learning_rate": 2.7704178787135366e-05,
      "loss": 1.0177,
      "step": 47430
    },
    {
      "epoch": 0.16626537271271208,
      "grad_norm": 3.53125,
      "learning_rate": 2.771001997686889e-05,
      "loss": 1.0432,
      "step": 47440
    },
    {
      "epoch": 0.16630042021960767,
      "grad_norm": 3.328125,
      "learning_rate": 2.7715861166602412e-05,
      "loss": 1.0225,
      "step": 47450
    },
    {
      "epoch": 0.1663354677265033,
      "grad_norm": 3.09375,
      "learning_rate": 2.7721702356335937e-05,
      "loss": 0.977,
      "step": 47460
    },
    {
      "epoch": 0.16637051523339888,
      "grad_norm": 3.609375,
      "learning_rate": 2.7727543546069462e-05,
      "loss": 1.0401,
      "step": 47470
    },
    {
      "epoch": 0.16640556274029447,
      "grad_norm": 2.828125,
      "learning_rate": 2.773338473580299e-05,
      "loss": 1.0035,
      "step": 47480
    },
    {
      "epoch": 0.16644061024719006,
      "grad_norm": 3.203125,
      "learning_rate": 2.7739225925536515e-05,
      "loss": 0.9978,
      "step": 47490
    },
    {
      "epoch": 0.16647565775408565,
      "grad_norm": 3.3125,
      "learning_rate": 2.774506711527004e-05,
      "loss": 0.9959,
      "step": 47500
    },
    {
      "epoch": 0.16651070526098127,
      "grad_norm": 3.484375,
      "learning_rate": 2.7750908305003565e-05,
      "loss": 0.9938,
      "step": 47510
    },
    {
      "epoch": 0.16654575276787686,
      "grad_norm": 2.578125,
      "learning_rate": 2.775674949473709e-05,
      "loss": 1.0385,
      "step": 47520
    },
    {
      "epoch": 0.16658080027477246,
      "grad_norm": 2.703125,
      "learning_rate": 2.7762590684470615e-05,
      "loss": 0.9567,
      "step": 47530
    },
    {
      "epoch": 0.16661584778166805,
      "grad_norm": 3.109375,
      "learning_rate": 2.7768431874204136e-05,
      "loss": 0.9607,
      "step": 47540
    },
    {
      "epoch": 0.16665089528856364,
      "grad_norm": 3.0625,
      "learning_rate": 2.7774273063937668e-05,
      "loss": 1.0514,
      "step": 47550
    },
    {
      "epoch": 0.16668594279545926,
      "grad_norm": 3.875,
      "learning_rate": 2.7780114253671192e-05,
      "loss": 1.0065,
      "step": 47560
    },
    {
      "epoch": 0.16672099030235485,
      "grad_norm": 3.390625,
      "learning_rate": 2.7785955443404714e-05,
      "loss": 1.0209,
      "step": 47570
    },
    {
      "epoch": 0.16675603780925044,
      "grad_norm": 3.453125,
      "learning_rate": 2.779179663313824e-05,
      "loss": 0.9853,
      "step": 47580
    },
    {
      "epoch": 0.16679108531614603,
      "grad_norm": 3.3125,
      "learning_rate": 2.7797637822871764e-05,
      "loss": 0.8968,
      "step": 47590
    },
    {
      "epoch": 0.16682613282304162,
      "grad_norm": 3.671875,
      "learning_rate": 2.780347901260529e-05,
      "loss": 0.9993,
      "step": 47600
    },
    {
      "epoch": 0.16686118032993724,
      "grad_norm": 3.71875,
      "learning_rate": 2.7809320202338813e-05,
      "loss": 0.9748,
      "step": 47610
    },
    {
      "epoch": 0.16689622783683283,
      "grad_norm": 3.078125,
      "learning_rate": 2.7815161392072335e-05,
      "loss": 0.9181,
      "step": 47620
    },
    {
      "epoch": 0.16693127534372842,
      "grad_norm": 3.4375,
      "learning_rate": 2.7821002581805866e-05,
      "loss": 1.0013,
      "step": 47630
    },
    {
      "epoch": 0.16696632285062402,
      "grad_norm": 3.3125,
      "learning_rate": 2.782684377153939e-05,
      "loss": 1.047,
      "step": 47640
    },
    {
      "epoch": 0.1670013703575196,
      "grad_norm": 3.109375,
      "learning_rate": 2.7832684961272916e-05,
      "loss": 1.0331,
      "step": 47650
    },
    {
      "epoch": 0.16703641786441523,
      "grad_norm": 3.3125,
      "learning_rate": 2.7838526151006437e-05,
      "loss": 1.0228,
      "step": 47660
    },
    {
      "epoch": 0.16707146537131082,
      "grad_norm": 3.0,
      "learning_rate": 2.7844367340739962e-05,
      "loss": 0.9656,
      "step": 47670
    },
    {
      "epoch": 0.1671065128782064,
      "grad_norm": 3.296875,
      "learning_rate": 2.7850208530473487e-05,
      "loss": 1.059,
      "step": 47680
    },
    {
      "epoch": 0.167141560385102,
      "grad_norm": 3.375,
      "learning_rate": 2.7856049720207012e-05,
      "loss": 1.0789,
      "step": 47690
    },
    {
      "epoch": 0.1671766078919976,
      "grad_norm": 3.3125,
      "learning_rate": 2.7861890909940537e-05,
      "loss": 1.0165,
      "step": 47700
    },
    {
      "epoch": 0.1672116553988932,
      "grad_norm": 3.515625,
      "learning_rate": 2.7867732099674065e-05,
      "loss": 1.0265,
      "step": 47710
    },
    {
      "epoch": 0.1672467029057888,
      "grad_norm": 3.296875,
      "learning_rate": 2.787357328940759e-05,
      "loss": 1.0135,
      "step": 47720
    },
    {
      "epoch": 0.1672817504126844,
      "grad_norm": 3.21875,
      "learning_rate": 2.7879414479141115e-05,
      "loss": 1.0295,
      "step": 47730
    },
    {
      "epoch": 0.16731679791957998,
      "grad_norm": 3.171875,
      "learning_rate": 2.788525566887464e-05,
      "loss": 1.0466,
      "step": 47740
    },
    {
      "epoch": 0.16735184542647558,
      "grad_norm": 3.078125,
      "learning_rate": 2.789109685860816e-05,
      "loss": 1.0459,
      "step": 47750
    },
    {
      "epoch": 0.1673868929333712,
      "grad_norm": 2.984375,
      "learning_rate": 2.7896938048341686e-05,
      "loss": 0.922,
      "step": 47760
    },
    {
      "epoch": 0.1674219404402668,
      "grad_norm": 3.640625,
      "learning_rate": 2.790277923807521e-05,
      "loss": 1.0303,
      "step": 47770
    },
    {
      "epoch": 0.16745698794716238,
      "grad_norm": 3.1875,
      "learning_rate": 2.7908620427808736e-05,
      "loss": 1.021,
      "step": 47780
    },
    {
      "epoch": 0.16749203545405797,
      "grad_norm": 2.859375,
      "learning_rate": 2.7914461617542264e-05,
      "loss": 0.906,
      "step": 47790
    },
    {
      "epoch": 0.16752708296095356,
      "grad_norm": 3.375,
      "learning_rate": 2.792030280727579e-05,
      "loss": 1.0551,
      "step": 47800
    },
    {
      "epoch": 0.16756213046784918,
      "grad_norm": 3.78125,
      "learning_rate": 2.7926143997009314e-05,
      "loss": 0.996,
      "step": 47810
    },
    {
      "epoch": 0.16759717797474477,
      "grad_norm": 3.296875,
      "learning_rate": 2.793198518674284e-05,
      "loss": 1.0217,
      "step": 47820
    },
    {
      "epoch": 0.16763222548164036,
      "grad_norm": 3.78125,
      "learning_rate": 2.793782637647636e-05,
      "loss": 1.0067,
      "step": 47830
    },
    {
      "epoch": 0.16766727298853595,
      "grad_norm": 3.3125,
      "learning_rate": 2.7943667566209885e-05,
      "loss": 0.9705,
      "step": 47840
    },
    {
      "epoch": 0.16770232049543154,
      "grad_norm": 3.75,
      "learning_rate": 2.794950875594341e-05,
      "loss": 0.9552,
      "step": 47850
    },
    {
      "epoch": 0.16773736800232716,
      "grad_norm": 3.078125,
      "learning_rate": 2.7955349945676934e-05,
      "loss": 0.9672,
      "step": 47860
    },
    {
      "epoch": 0.16777241550922276,
      "grad_norm": 3.21875,
      "learning_rate": 2.7961191135410463e-05,
      "loss": 0.9745,
      "step": 47870
    },
    {
      "epoch": 0.16780746301611835,
      "grad_norm": 3.203125,
      "learning_rate": 2.7967032325143987e-05,
      "loss": 1.0166,
      "step": 47880
    },
    {
      "epoch": 0.16784251052301394,
      "grad_norm": 3.515625,
      "learning_rate": 2.7972873514877512e-05,
      "loss": 1.0485,
      "step": 47890
    },
    {
      "epoch": 0.16787755802990953,
      "grad_norm": 2.953125,
      "learning_rate": 2.7978714704611037e-05,
      "loss": 0.9753,
      "step": 47900
    },
    {
      "epoch": 0.16791260553680515,
      "grad_norm": 2.90625,
      "learning_rate": 2.7984555894344562e-05,
      "loss": 0.9845,
      "step": 47910
    },
    {
      "epoch": 0.16794765304370074,
      "grad_norm": 2.890625,
      "learning_rate": 2.7990397084078083e-05,
      "loss": 0.9089,
      "step": 47920
    },
    {
      "epoch": 0.16798270055059633,
      "grad_norm": 3.328125,
      "learning_rate": 2.7996238273811608e-05,
      "loss": 0.9211,
      "step": 47930
    },
    {
      "epoch": 0.16801774805749192,
      "grad_norm": 3.203125,
      "learning_rate": 2.800207946354514e-05,
      "loss": 0.9699,
      "step": 47940
    },
    {
      "epoch": 0.1680527955643875,
      "grad_norm": 3.140625,
      "learning_rate": 2.8007920653278665e-05,
      "loss": 0.9243,
      "step": 47950
    },
    {
      "epoch": 0.16808784307128313,
      "grad_norm": 3.421875,
      "learning_rate": 2.8013761843012186e-05,
      "loss": 1.0488,
      "step": 47960
    },
    {
      "epoch": 0.16812289057817872,
      "grad_norm": 3.296875,
      "learning_rate": 2.801960303274571e-05,
      "loss": 0.9783,
      "step": 47970
    },
    {
      "epoch": 0.16815793808507432,
      "grad_norm": 3.8125,
      "learning_rate": 2.8025444222479236e-05,
      "loss": 1.0078,
      "step": 47980
    },
    {
      "epoch": 0.1681929855919699,
      "grad_norm": 3.28125,
      "learning_rate": 2.803128541221276e-05,
      "loss": 1.0053,
      "step": 47990
    },
    {
      "epoch": 0.16822803309886553,
      "grad_norm": 3.015625,
      "learning_rate": 2.8037126601946286e-05,
      "loss": 0.9979,
      "step": 48000
    },
    {
      "epoch": 0.16826308060576112,
      "grad_norm": 3.265625,
      "learning_rate": 2.8042967791679807e-05,
      "loss": 0.9728,
      "step": 48010
    },
    {
      "epoch": 0.1682981281126567,
      "grad_norm": 3.203125,
      "learning_rate": 2.804880898141334e-05,
      "loss": 1.0459,
      "step": 48020
    },
    {
      "epoch": 0.1683331756195523,
      "grad_norm": 3.296875,
      "learning_rate": 2.8054650171146864e-05,
      "loss": 0.9712,
      "step": 48030
    },
    {
      "epoch": 0.1683682231264479,
      "grad_norm": 3.015625,
      "learning_rate": 2.8060491360880385e-05,
      "loss": 1.0018,
      "step": 48040
    },
    {
      "epoch": 0.1684032706333435,
      "grad_norm": 3.75,
      "learning_rate": 2.806633255061391e-05,
      "loss": 0.9432,
      "step": 48050
    },
    {
      "epoch": 0.1684383181402391,
      "grad_norm": 3.234375,
      "learning_rate": 2.8072173740347435e-05,
      "loss": 1.0396,
      "step": 48060
    },
    {
      "epoch": 0.1684733656471347,
      "grad_norm": 3.625,
      "learning_rate": 2.807801493008096e-05,
      "loss": 1.0346,
      "step": 48070
    },
    {
      "epoch": 0.16850841315403028,
      "grad_norm": 3.75,
      "learning_rate": 2.8083856119814484e-05,
      "loss": 1.0373,
      "step": 48080
    },
    {
      "epoch": 0.16854346066092588,
      "grad_norm": 3.296875,
      "learning_rate": 2.808969730954801e-05,
      "loss": 0.952,
      "step": 48090
    },
    {
      "epoch": 0.1685785081678215,
      "grad_norm": 2.859375,
      "learning_rate": 2.8095538499281537e-05,
      "loss": 1.0037,
      "step": 48100
    },
    {
      "epoch": 0.16861355567471709,
      "grad_norm": 3.0625,
      "learning_rate": 2.8101379689015062e-05,
      "loss": 0.9595,
      "step": 48110
    },
    {
      "epoch": 0.16864860318161268,
      "grad_norm": 3.078125,
      "learning_rate": 2.8107220878748587e-05,
      "loss": 0.9948,
      "step": 48120
    },
    {
      "epoch": 0.16868365068850827,
      "grad_norm": 3.9375,
      "learning_rate": 2.811306206848211e-05,
      "loss": 1.0358,
      "step": 48130
    },
    {
      "epoch": 0.16871869819540386,
      "grad_norm": 3.0625,
      "learning_rate": 2.8118903258215633e-05,
      "loss": 1.0057,
      "step": 48140
    },
    {
      "epoch": 0.16875374570229948,
      "grad_norm": 3.546875,
      "learning_rate": 2.8124744447949158e-05,
      "loss": 1.0377,
      "step": 48150
    },
    {
      "epoch": 0.16878879320919507,
      "grad_norm": 3.515625,
      "learning_rate": 2.8130585637682683e-05,
      "loss": 0.9867,
      "step": 48160
    },
    {
      "epoch": 0.16882384071609066,
      "grad_norm": 3.4375,
      "learning_rate": 2.8136426827416208e-05,
      "loss": 1.0276,
      "step": 48170
    },
    {
      "epoch": 0.16885888822298625,
      "grad_norm": 3.21875,
      "learning_rate": 2.8142268017149736e-05,
      "loss": 0.9883,
      "step": 48180
    },
    {
      "epoch": 0.16889393572988184,
      "grad_norm": 3.234375,
      "learning_rate": 2.814810920688326e-05,
      "loss": 1.0056,
      "step": 48190
    },
    {
      "epoch": 0.16892898323677746,
      "grad_norm": 3.078125,
      "learning_rate": 2.8153950396616786e-05,
      "loss": 0.9558,
      "step": 48200
    },
    {
      "epoch": 0.16896403074367305,
      "grad_norm": 3.21875,
      "learning_rate": 2.815979158635031e-05,
      "loss": 1.0188,
      "step": 48210
    },
    {
      "epoch": 0.16899907825056865,
      "grad_norm": 2.9375,
      "learning_rate": 2.8165632776083832e-05,
      "loss": 0.9765,
      "step": 48220
    },
    {
      "epoch": 0.16903412575746424,
      "grad_norm": 3.59375,
      "learning_rate": 2.8171473965817357e-05,
      "loss": 1.0106,
      "step": 48230
    },
    {
      "epoch": 0.16906917326435983,
      "grad_norm": 3.484375,
      "learning_rate": 2.8177315155550882e-05,
      "loss": 0.9607,
      "step": 48240
    },
    {
      "epoch": 0.16910422077125545,
      "grad_norm": 2.953125,
      "learning_rate": 2.818315634528441e-05,
      "loss": 0.9897,
      "step": 48250
    },
    {
      "epoch": 0.16913926827815104,
      "grad_norm": 3.4375,
      "learning_rate": 2.8188997535017935e-05,
      "loss": 0.9865,
      "step": 48260
    },
    {
      "epoch": 0.16917431578504663,
      "grad_norm": 3.140625,
      "learning_rate": 2.819483872475146e-05,
      "loss": 0.9065,
      "step": 48270
    },
    {
      "epoch": 0.16920936329194222,
      "grad_norm": 3.046875,
      "learning_rate": 2.8200679914484985e-05,
      "loss": 1.0001,
      "step": 48280
    },
    {
      "epoch": 0.1692444107988378,
      "grad_norm": 3.421875,
      "learning_rate": 2.820652110421851e-05,
      "loss": 0.9952,
      "step": 48290
    },
    {
      "epoch": 0.16927945830573343,
      "grad_norm": 3.3125,
      "learning_rate": 2.8212362293952034e-05,
      "loss": 1.0138,
      "step": 48300
    },
    {
      "epoch": 0.16931450581262902,
      "grad_norm": 3.328125,
      "learning_rate": 2.8218203483685556e-05,
      "loss": 1.0103,
      "step": 48310
    },
    {
      "epoch": 0.16934955331952461,
      "grad_norm": 3.203125,
      "learning_rate": 2.822404467341908e-05,
      "loss": 1.0233,
      "step": 48320
    },
    {
      "epoch": 0.1693846008264202,
      "grad_norm": 2.84375,
      "learning_rate": 2.8229885863152612e-05,
      "loss": 0.9734,
      "step": 48330
    },
    {
      "epoch": 0.1694196483333158,
      "grad_norm": 3.265625,
      "learning_rate": 2.8235727052886134e-05,
      "loss": 1.0169,
      "step": 48340
    },
    {
      "epoch": 0.16945469584021142,
      "grad_norm": 3.53125,
      "learning_rate": 2.824156824261966e-05,
      "loss": 1.0538,
      "step": 48350
    },
    {
      "epoch": 0.169489743347107,
      "grad_norm": 3.578125,
      "learning_rate": 2.8247409432353183e-05,
      "loss": 0.9833,
      "step": 48360
    },
    {
      "epoch": 0.1695247908540026,
      "grad_norm": 3.109375,
      "learning_rate": 2.8253250622086708e-05,
      "loss": 0.9627,
      "step": 48370
    },
    {
      "epoch": 0.1695598383608982,
      "grad_norm": 3.46875,
      "learning_rate": 2.8259091811820233e-05,
      "loss": 1.1027,
      "step": 48380
    },
    {
      "epoch": 0.16959488586779378,
      "grad_norm": 3.015625,
      "learning_rate": 2.8264933001553755e-05,
      "loss": 1.0195,
      "step": 48390
    },
    {
      "epoch": 0.1696299333746894,
      "grad_norm": 3.140625,
      "learning_rate": 2.827077419128728e-05,
      "loss": 1.0119,
      "step": 48400
    },
    {
      "epoch": 0.169664980881585,
      "grad_norm": 3.328125,
      "learning_rate": 2.827661538102081e-05,
      "loss": 1.0313,
      "step": 48410
    },
    {
      "epoch": 0.16970002838848058,
      "grad_norm": 3.53125,
      "learning_rate": 2.8282456570754336e-05,
      "loss": 1.0273,
      "step": 48420
    },
    {
      "epoch": 0.16973507589537618,
      "grad_norm": 3.578125,
      "learning_rate": 2.8288297760487857e-05,
      "loss": 0.9279,
      "step": 48430
    },
    {
      "epoch": 0.16977012340227177,
      "grad_norm": 2.8125,
      "learning_rate": 2.8294138950221382e-05,
      "loss": 0.9359,
      "step": 48440
    },
    {
      "epoch": 0.16980517090916739,
      "grad_norm": 2.984375,
      "learning_rate": 2.8299980139954907e-05,
      "loss": 0.941,
      "step": 48450
    },
    {
      "epoch": 0.16984021841606298,
      "grad_norm": 2.96875,
      "learning_rate": 2.8305821329688432e-05,
      "loss": 1.0454,
      "step": 48460
    },
    {
      "epoch": 0.16987526592295857,
      "grad_norm": 2.96875,
      "learning_rate": 2.8311662519421957e-05,
      "loss": 0.923,
      "step": 48470
    },
    {
      "epoch": 0.16991031342985416,
      "grad_norm": 3.4375,
      "learning_rate": 2.8317503709155478e-05,
      "loss": 1.057,
      "step": 48480
    },
    {
      "epoch": 0.16994536093674975,
      "grad_norm": 3.09375,
      "learning_rate": 2.832334489888901e-05,
      "loss": 0.9466,
      "step": 48490
    },
    {
      "epoch": 0.16998040844364537,
      "grad_norm": 3.21875,
      "learning_rate": 2.8329186088622535e-05,
      "loss": 1.0326,
      "step": 48500
    },
    {
      "epoch": 0.17001545595054096,
      "grad_norm": 2.90625,
      "learning_rate": 2.833502727835606e-05,
      "loss": 0.8857,
      "step": 48510
    },
    {
      "epoch": 0.17005050345743655,
      "grad_norm": 3.625,
      "learning_rate": 2.834086846808958e-05,
      "loss": 1.0741,
      "step": 48520
    },
    {
      "epoch": 0.17008555096433214,
      "grad_norm": 3.046875,
      "learning_rate": 2.8346709657823106e-05,
      "loss": 0.9752,
      "step": 48530
    },
    {
      "epoch": 0.17012059847122776,
      "grad_norm": 3.109375,
      "learning_rate": 2.835255084755663e-05,
      "loss": 1.0066,
      "step": 48540
    },
    {
      "epoch": 0.17015564597812335,
      "grad_norm": 3.140625,
      "learning_rate": 2.8358392037290155e-05,
      "loss": 0.9918,
      "step": 48550
    },
    {
      "epoch": 0.17019069348501895,
      "grad_norm": 3.265625,
      "learning_rate": 2.836423322702368e-05,
      "loss": 0.9637,
      "step": 48560
    },
    {
      "epoch": 0.17022574099191454,
      "grad_norm": 3.0625,
      "learning_rate": 2.837007441675721e-05,
      "loss": 0.9968,
      "step": 48570
    },
    {
      "epoch": 0.17026078849881013,
      "grad_norm": 3.109375,
      "learning_rate": 2.8375915606490733e-05,
      "loss": 0.9966,
      "step": 48580
    },
    {
      "epoch": 0.17029583600570575,
      "grad_norm": 3.25,
      "learning_rate": 2.8381756796224258e-05,
      "loss": 1.0883,
      "step": 48590
    },
    {
      "epoch": 0.17033088351260134,
      "grad_norm": 3.21875,
      "learning_rate": 2.838759798595778e-05,
      "loss": 1.0607,
      "step": 48600
    },
    {
      "epoch": 0.17036593101949693,
      "grad_norm": 3.5625,
      "learning_rate": 2.8393439175691304e-05,
      "loss": 0.9689,
      "step": 48610
    },
    {
      "epoch": 0.17040097852639252,
      "grad_norm": 3.453125,
      "learning_rate": 2.839928036542483e-05,
      "loss": 1.0165,
      "step": 48620
    },
    {
      "epoch": 0.1704360260332881,
      "grad_norm": 3.46875,
      "learning_rate": 2.8405121555158354e-05,
      "loss": 0.9609,
      "step": 48630
    },
    {
      "epoch": 0.17047107354018373,
      "grad_norm": 3.421875,
      "learning_rate": 2.8410962744891882e-05,
      "loss": 0.9925,
      "step": 48640
    },
    {
      "epoch": 0.17050612104707932,
      "grad_norm": 3.53125,
      "learning_rate": 2.8416803934625407e-05,
      "loss": 1.0213,
      "step": 48650
    },
    {
      "epoch": 0.17054116855397491,
      "grad_norm": 3.109375,
      "learning_rate": 2.8422645124358932e-05,
      "loss": 1.0433,
      "step": 48660
    },
    {
      "epoch": 0.1705762160608705,
      "grad_norm": 2.84375,
      "learning_rate": 2.8428486314092457e-05,
      "loss": 0.9345,
      "step": 48670
    },
    {
      "epoch": 0.1706112635677661,
      "grad_norm": 3.40625,
      "learning_rate": 2.8434327503825982e-05,
      "loss": 0.9748,
      "step": 48680
    },
    {
      "epoch": 0.17064631107466172,
      "grad_norm": 3.421875,
      "learning_rate": 2.8440168693559503e-05,
      "loss": 1.0538,
      "step": 48690
    },
    {
      "epoch": 0.1706813585815573,
      "grad_norm": 3.3125,
      "learning_rate": 2.8446009883293028e-05,
      "loss": 1.0733,
      "step": 48700
    },
    {
      "epoch": 0.1707164060884529,
      "grad_norm": 3.625,
      "learning_rate": 2.8451851073026553e-05,
      "loss": 0.9443,
      "step": 48710
    },
    {
      "epoch": 0.1707514535953485,
      "grad_norm": 3.25,
      "learning_rate": 2.845769226276008e-05,
      "loss": 0.9351,
      "step": 48720
    },
    {
      "epoch": 0.17078650110224408,
      "grad_norm": 2.9375,
      "learning_rate": 2.8463533452493606e-05,
      "loss": 1.018,
      "step": 48730
    },
    {
      "epoch": 0.1708215486091397,
      "grad_norm": 3.5,
      "learning_rate": 2.846937464222713e-05,
      "loss": 0.996,
      "step": 48740
    },
    {
      "epoch": 0.1708565961160353,
      "grad_norm": 3.5625,
      "learning_rate": 2.8475215831960656e-05,
      "loss": 1.0122,
      "step": 48750
    },
    {
      "epoch": 0.17089164362293088,
      "grad_norm": 3.453125,
      "learning_rate": 2.848105702169418e-05,
      "loss": 0.9795,
      "step": 48760
    },
    {
      "epoch": 0.17092669112982647,
      "grad_norm": 3.078125,
      "learning_rate": 2.8486898211427705e-05,
      "loss": 0.9642,
      "step": 48770
    },
    {
      "epoch": 0.17096173863672207,
      "grad_norm": 3.015625,
      "learning_rate": 2.8492739401161227e-05,
      "loss": 0.9151,
      "step": 48780
    },
    {
      "epoch": 0.17099678614361769,
      "grad_norm": 3.671875,
      "learning_rate": 2.8498580590894752e-05,
      "loss": 1.0415,
      "step": 48790
    },
    {
      "epoch": 0.17103183365051328,
      "grad_norm": 3.125,
      "learning_rate": 2.8504421780628283e-05,
      "loss": 0.9505,
      "step": 48800
    },
    {
      "epoch": 0.17106688115740887,
      "grad_norm": 3.328125,
      "learning_rate": 2.8510262970361805e-05,
      "loss": 1.0356,
      "step": 48810
    },
    {
      "epoch": 0.17110192866430446,
      "grad_norm": 3.171875,
      "learning_rate": 2.851610416009533e-05,
      "loss": 0.9715,
      "step": 48820
    },
    {
      "epoch": 0.17113697617120005,
      "grad_norm": 3.203125,
      "learning_rate": 2.8521945349828854e-05,
      "loss": 1.1118,
      "step": 48830
    },
    {
      "epoch": 0.17117202367809567,
      "grad_norm": 3.515625,
      "learning_rate": 2.852778653956238e-05,
      "loss": 1.0122,
      "step": 48840
    },
    {
      "epoch": 0.17120707118499126,
      "grad_norm": 3.03125,
      "learning_rate": 2.8533627729295904e-05,
      "loss": 1.0263,
      "step": 48850
    },
    {
      "epoch": 0.17124211869188685,
      "grad_norm": 3.234375,
      "learning_rate": 2.853946891902943e-05,
      "loss": 0.9388,
      "step": 48860
    },
    {
      "epoch": 0.17127716619878244,
      "grad_norm": 3.34375,
      "learning_rate": 2.854531010876295e-05,
      "loss": 0.9167,
      "step": 48870
    },
    {
      "epoch": 0.17131221370567803,
      "grad_norm": 3.515625,
      "learning_rate": 2.8551151298496482e-05,
      "loss": 1.0385,
      "step": 48880
    },
    {
      "epoch": 0.17134726121257365,
      "grad_norm": 3.390625,
      "learning_rate": 2.8556992488230007e-05,
      "loss": 1.0696,
      "step": 48890
    },
    {
      "epoch": 0.17138230871946925,
      "grad_norm": 3.46875,
      "learning_rate": 2.856283367796353e-05,
      "loss": 0.9919,
      "step": 48900
    },
    {
      "epoch": 0.17141735622636484,
      "grad_norm": 3.390625,
      "learning_rate": 2.8568674867697053e-05,
      "loss": 0.938,
      "step": 48910
    },
    {
      "epoch": 0.17145240373326043,
      "grad_norm": 3.140625,
      "learning_rate": 2.8574516057430578e-05,
      "loss": 1.0287,
      "step": 48920
    },
    {
      "epoch": 0.17148745124015602,
      "grad_norm": 3.28125,
      "learning_rate": 2.8580357247164103e-05,
      "loss": 1.0583,
      "step": 48930
    },
    {
      "epoch": 0.17152249874705164,
      "grad_norm": 2.9375,
      "learning_rate": 2.8586198436897628e-05,
      "loss": 1.0245,
      "step": 48940
    },
    {
      "epoch": 0.17155754625394723,
      "grad_norm": 3.453125,
      "learning_rate": 2.8592039626631156e-05,
      "loss": 1.049,
      "step": 48950
    },
    {
      "epoch": 0.17159259376084282,
      "grad_norm": 2.671875,
      "learning_rate": 2.859788081636468e-05,
      "loss": 0.9564,
      "step": 48960
    },
    {
      "epoch": 0.1716276412677384,
      "grad_norm": 3.078125,
      "learning_rate": 2.8603722006098206e-05,
      "loss": 0.8916,
      "step": 48970
    },
    {
      "epoch": 0.171662688774634,
      "grad_norm": 2.96875,
      "learning_rate": 2.860956319583173e-05,
      "loss": 1.0287,
      "step": 48980
    },
    {
      "epoch": 0.17169773628152962,
      "grad_norm": 3.359375,
      "learning_rate": 2.8615404385565252e-05,
      "loss": 1.0246,
      "step": 48990
    },
    {
      "epoch": 0.17173278378842521,
      "grad_norm": 3.421875,
      "learning_rate": 2.8621245575298777e-05,
      "loss": 0.9954,
      "step": 49000
    },
    {
      "epoch": 0.1717678312953208,
      "grad_norm": 3.65625,
      "learning_rate": 2.86270867650323e-05,
      "loss": 0.9996,
      "step": 49010
    },
    {
      "epoch": 0.1718028788022164,
      "grad_norm": 3.015625,
      "learning_rate": 2.8632927954765827e-05,
      "loss": 1.0744,
      "step": 49020
    },
    {
      "epoch": 0.171837926309112,
      "grad_norm": 3.609375,
      "learning_rate": 2.8638769144499355e-05,
      "loss": 0.9883,
      "step": 49030
    },
    {
      "epoch": 0.1718729738160076,
      "grad_norm": 3.296875,
      "learning_rate": 2.864461033423288e-05,
      "loss": 0.9405,
      "step": 49040
    },
    {
      "epoch": 0.1719080213229032,
      "grad_norm": 3.625,
      "learning_rate": 2.8650451523966404e-05,
      "loss": 1.0077,
      "step": 49050
    },
    {
      "epoch": 0.1719430688297988,
      "grad_norm": 3.15625,
      "learning_rate": 2.865629271369993e-05,
      "loss": 1.0166,
      "step": 49060
    },
    {
      "epoch": 0.17197811633669438,
      "grad_norm": 3.1875,
      "learning_rate": 2.866213390343345e-05,
      "loss": 1.0089,
      "step": 49070
    },
    {
      "epoch": 0.17201316384358997,
      "grad_norm": 3.3125,
      "learning_rate": 2.8667975093166976e-05,
      "loss": 1.049,
      "step": 49080
    },
    {
      "epoch": 0.1720482113504856,
      "grad_norm": 3.3125,
      "learning_rate": 2.86738162829005e-05,
      "loss": 0.9837,
      "step": 49090
    },
    {
      "epoch": 0.17208325885738118,
      "grad_norm": 3.359375,
      "learning_rate": 2.8679657472634025e-05,
      "loss": 1.0044,
      "step": 49100
    },
    {
      "epoch": 0.17211830636427677,
      "grad_norm": 3.09375,
      "learning_rate": 2.8685498662367553e-05,
      "loss": 0.9504,
      "step": 49110
    },
    {
      "epoch": 0.17215335387117237,
      "grad_norm": 3.359375,
      "learning_rate": 2.869133985210108e-05,
      "loss": 0.9782,
      "step": 49120
    },
    {
      "epoch": 0.17218840137806798,
      "grad_norm": 3.453125,
      "learning_rate": 2.8697181041834603e-05,
      "loss": 1.0047,
      "step": 49130
    },
    {
      "epoch": 0.17222344888496358,
      "grad_norm": 2.859375,
      "learning_rate": 2.8703022231568128e-05,
      "loss": 1.0678,
      "step": 49140
    },
    {
      "epoch": 0.17225849639185917,
      "grad_norm": 3.234375,
      "learning_rate": 2.8708863421301653e-05,
      "loss": 0.9703,
      "step": 49150
    },
    {
      "epoch": 0.17229354389875476,
      "grad_norm": 3.828125,
      "learning_rate": 2.8714704611035174e-05,
      "loss": 0.9655,
      "step": 49160
    },
    {
      "epoch": 0.17232859140565035,
      "grad_norm": 3.28125,
      "learning_rate": 2.87205458007687e-05,
      "loss": 0.976,
      "step": 49170
    },
    {
      "epoch": 0.17236363891254597,
      "grad_norm": 3.59375,
      "learning_rate": 2.8726386990502224e-05,
      "loss": 1.0671,
      "step": 49180
    },
    {
      "epoch": 0.17239868641944156,
      "grad_norm": 3.75,
      "learning_rate": 2.8732228180235756e-05,
      "loss": 0.9659,
      "step": 49190
    },
    {
      "epoch": 0.17243373392633715,
      "grad_norm": 3.375,
      "learning_rate": 2.8738069369969277e-05,
      "loss": 1.0193,
      "step": 49200
    },
    {
      "epoch": 0.17246878143323274,
      "grad_norm": 3.015625,
      "learning_rate": 2.8743910559702802e-05,
      "loss": 0.975,
      "step": 49210
    },
    {
      "epoch": 0.17250382894012833,
      "grad_norm": 2.921875,
      "learning_rate": 2.8749751749436327e-05,
      "loss": 0.9573,
      "step": 49220
    },
    {
      "epoch": 0.17253887644702395,
      "grad_norm": 3.359375,
      "learning_rate": 2.875559293916985e-05,
      "loss": 1.0204,
      "step": 49230
    },
    {
      "epoch": 0.17257392395391954,
      "grad_norm": 3.140625,
      "learning_rate": 2.8761434128903376e-05,
      "loss": 0.9565,
      "step": 49240
    },
    {
      "epoch": 0.17260897146081514,
      "grad_norm": 3.46875,
      "learning_rate": 2.8767275318636898e-05,
      "loss": 0.9646,
      "step": 49250
    },
    {
      "epoch": 0.17264401896771073,
      "grad_norm": 3.28125,
      "learning_rate": 2.8773116508370423e-05,
      "loss": 1.0266,
      "step": 49260
    },
    {
      "epoch": 0.17267906647460632,
      "grad_norm": 3.734375,
      "learning_rate": 2.8778957698103954e-05,
      "loss": 1.0513,
      "step": 49270
    },
    {
      "epoch": 0.17271411398150194,
      "grad_norm": 3.4375,
      "learning_rate": 2.8784798887837476e-05,
      "loss": 1.0226,
      "step": 49280
    },
    {
      "epoch": 0.17274916148839753,
      "grad_norm": 2.78125,
      "learning_rate": 2.8790640077571e-05,
      "loss": 0.9894,
      "step": 49290
    },
    {
      "epoch": 0.17278420899529312,
      "grad_norm": 3.0625,
      "learning_rate": 2.8796481267304526e-05,
      "loss": 0.9668,
      "step": 49300
    },
    {
      "epoch": 0.1728192565021887,
      "grad_norm": 4.0,
      "learning_rate": 2.880232245703805e-05,
      "loss": 0.987,
      "step": 49310
    },
    {
      "epoch": 0.1728543040090843,
      "grad_norm": 3.734375,
      "learning_rate": 2.8808163646771575e-05,
      "loss": 1.0362,
      "step": 49320
    },
    {
      "epoch": 0.17288935151597992,
      "grad_norm": 3.40625,
      "learning_rate": 2.88140048365051e-05,
      "loss": 0.9596,
      "step": 49330
    },
    {
      "epoch": 0.1729243990228755,
      "grad_norm": 3.328125,
      "learning_rate": 2.8819846026238628e-05,
      "loss": 0.9456,
      "step": 49340
    },
    {
      "epoch": 0.1729594465297711,
      "grad_norm": 3.59375,
      "learning_rate": 2.8825687215972153e-05,
      "loss": 1.0341,
      "step": 49350
    },
    {
      "epoch": 0.1729944940366667,
      "grad_norm": 3.5625,
      "learning_rate": 2.8831528405705678e-05,
      "loss": 1.0192,
      "step": 49360
    },
    {
      "epoch": 0.1730295415435623,
      "grad_norm": 3.359375,
      "learning_rate": 2.88373695954392e-05,
      "loss": 0.9641,
      "step": 49370
    },
    {
      "epoch": 0.1730645890504579,
      "grad_norm": 3.21875,
      "learning_rate": 2.8843210785172724e-05,
      "loss": 1.0339,
      "step": 49380
    },
    {
      "epoch": 0.1730996365573535,
      "grad_norm": 3.3125,
      "learning_rate": 2.884905197490625e-05,
      "loss": 1.0403,
      "step": 49390
    },
    {
      "epoch": 0.1731346840642491,
      "grad_norm": 3.171875,
      "learning_rate": 2.8854893164639774e-05,
      "loss": 1.0574,
      "step": 49400
    },
    {
      "epoch": 0.17316973157114468,
      "grad_norm": 3.25,
      "learning_rate": 2.88607343543733e-05,
      "loss": 1.1507,
      "step": 49410
    },
    {
      "epoch": 0.17320477907804027,
      "grad_norm": 3.4375,
      "learning_rate": 2.8866575544106827e-05,
      "loss": 0.9462,
      "step": 49420
    },
    {
      "epoch": 0.1732398265849359,
      "grad_norm": 3.09375,
      "learning_rate": 2.8872416733840352e-05,
      "loss": 1.0305,
      "step": 49430
    },
    {
      "epoch": 0.17327487409183148,
      "grad_norm": 3.21875,
      "learning_rate": 2.8878257923573877e-05,
      "loss": 0.9661,
      "step": 49440
    },
    {
      "epoch": 0.17330992159872707,
      "grad_norm": 3.5625,
      "learning_rate": 2.88840991133074e-05,
      "loss": 1.0762,
      "step": 49450
    },
    {
      "epoch": 0.17334496910562266,
      "grad_norm": 3.203125,
      "learning_rate": 2.8889940303040923e-05,
      "loss": 0.9533,
      "step": 49460
    },
    {
      "epoch": 0.17338001661251826,
      "grad_norm": 3.140625,
      "learning_rate": 2.8895781492774448e-05,
      "loss": 0.9801,
      "step": 49470
    },
    {
      "epoch": 0.17341506411941388,
      "grad_norm": 3.234375,
      "learning_rate": 2.8901622682507973e-05,
      "loss": 0.9711,
      "step": 49480
    },
    {
      "epoch": 0.17345011162630947,
      "grad_norm": 3.3125,
      "learning_rate": 2.8907463872241498e-05,
      "loss": 0.9772,
      "step": 49490
    },
    {
      "epoch": 0.17348515913320506,
      "grad_norm": 2.9375,
      "learning_rate": 2.8913305061975026e-05,
      "loss": 1.028,
      "step": 49500
    },
    {
      "epoch": 0.17352020664010065,
      "grad_norm": 3.234375,
      "learning_rate": 2.891914625170855e-05,
      "loss": 0.9546,
      "step": 49510
    },
    {
      "epoch": 0.17355525414699624,
      "grad_norm": 3.375,
      "learning_rate": 2.8924987441442076e-05,
      "loss": 1.0066,
      "step": 49520
    },
    {
      "epoch": 0.17359030165389186,
      "grad_norm": 3.609375,
      "learning_rate": 2.89308286311756e-05,
      "loss": 0.985,
      "step": 49530
    },
    {
      "epoch": 0.17362534916078745,
      "grad_norm": 3.296875,
      "learning_rate": 2.8936669820909125e-05,
      "loss": 1.0111,
      "step": 49540
    },
    {
      "epoch": 0.17366039666768304,
      "grad_norm": 3.28125,
      "learning_rate": 2.8942511010642647e-05,
      "loss": 1.086,
      "step": 49550
    },
    {
      "epoch": 0.17369544417457863,
      "grad_norm": 2.75,
      "learning_rate": 2.894835220037617e-05,
      "loss": 0.9321,
      "step": 49560
    },
    {
      "epoch": 0.17373049168147423,
      "grad_norm": 2.984375,
      "learning_rate": 2.8954193390109696e-05,
      "loss": 0.9951,
      "step": 49570
    },
    {
      "epoch": 0.17376553918836984,
      "grad_norm": 3.59375,
      "learning_rate": 2.8960034579843225e-05,
      "loss": 0.9665,
      "step": 49580
    },
    {
      "epoch": 0.17380058669526544,
      "grad_norm": 3.578125,
      "learning_rate": 2.896587576957675e-05,
      "loss": 0.9944,
      "step": 49590
    },
    {
      "epoch": 0.17383563420216103,
      "grad_norm": 3.296875,
      "learning_rate": 2.8971716959310274e-05,
      "loss": 0.9643,
      "step": 49600
    },
    {
      "epoch": 0.17387068170905662,
      "grad_norm": 3.203125,
      "learning_rate": 2.89775581490438e-05,
      "loss": 1.1167,
      "step": 49610
    },
    {
      "epoch": 0.1739057292159522,
      "grad_norm": 3.578125,
      "learning_rate": 2.8983399338777324e-05,
      "loss": 0.9311,
      "step": 49620
    },
    {
      "epoch": 0.17394077672284783,
      "grad_norm": 2.71875,
      "learning_rate": 2.8989240528510845e-05,
      "loss": 1.0004,
      "step": 49630
    },
    {
      "epoch": 0.17397582422974342,
      "grad_norm": 3.046875,
      "learning_rate": 2.899508171824437e-05,
      "loss": 0.9209,
      "step": 49640
    },
    {
      "epoch": 0.174010871736639,
      "grad_norm": 3.578125,
      "learning_rate": 2.9000922907977902e-05,
      "loss": 1.0204,
      "step": 49650
    },
    {
      "epoch": 0.1740459192435346,
      "grad_norm": 3.328125,
      "learning_rate": 2.9006764097711427e-05,
      "loss": 0.9561,
      "step": 49660
    },
    {
      "epoch": 0.17408096675043022,
      "grad_norm": 3.28125,
      "learning_rate": 2.9012605287444948e-05,
      "loss": 1.0524,
      "step": 49670
    },
    {
      "epoch": 0.1741160142573258,
      "grad_norm": 3.546875,
      "learning_rate": 2.9018446477178473e-05,
      "loss": 0.9756,
      "step": 49680
    },
    {
      "epoch": 0.1741510617642214,
      "grad_norm": 3.640625,
      "learning_rate": 2.9024287666911998e-05,
      "loss": 1.0309,
      "step": 49690
    },
    {
      "epoch": 0.174186109271117,
      "grad_norm": 3.84375,
      "learning_rate": 2.9030128856645523e-05,
      "loss": 1.086,
      "step": 49700
    },
    {
      "epoch": 0.1742211567780126,
      "grad_norm": 3.234375,
      "learning_rate": 2.9035970046379048e-05,
      "loss": 0.9971,
      "step": 49710
    },
    {
      "epoch": 0.1742562042849082,
      "grad_norm": 3.21875,
      "learning_rate": 2.904181123611257e-05,
      "loss": 1.0531,
      "step": 49720
    },
    {
      "epoch": 0.1742912517918038,
      "grad_norm": 3.3125,
      "learning_rate": 2.90476524258461e-05,
      "loss": 1.1043,
      "step": 49730
    },
    {
      "epoch": 0.1743262992986994,
      "grad_norm": 3.859375,
      "learning_rate": 2.9053493615579625e-05,
      "loss": 1.0146,
      "step": 49740
    },
    {
      "epoch": 0.17436134680559498,
      "grad_norm": 2.71875,
      "learning_rate": 2.905933480531315e-05,
      "loss": 0.9629,
      "step": 49750
    },
    {
      "epoch": 0.17439639431249057,
      "grad_norm": 3.453125,
      "learning_rate": 2.9065175995046672e-05,
      "loss": 0.9983,
      "step": 49760
    },
    {
      "epoch": 0.1744314418193862,
      "grad_norm": 3.5625,
      "learning_rate": 2.9071017184780197e-05,
      "loss": 0.981,
      "step": 49770
    },
    {
      "epoch": 0.17446648932628178,
      "grad_norm": 3.1875,
      "learning_rate": 2.907685837451372e-05,
      "loss": 0.9905,
      "step": 49780
    },
    {
      "epoch": 0.17450153683317737,
      "grad_norm": 3.0625,
      "learning_rate": 2.9082699564247246e-05,
      "loss": 0.9456,
      "step": 49790
    },
    {
      "epoch": 0.17453658434007296,
      "grad_norm": 3.640625,
      "learning_rate": 2.908854075398077e-05,
      "loss": 1.0596,
      "step": 49800
    },
    {
      "epoch": 0.17457163184696856,
      "grad_norm": 3.34375,
      "learning_rate": 2.90943819437143e-05,
      "loss": 1.0812,
      "step": 49810
    },
    {
      "epoch": 0.17460667935386417,
      "grad_norm": 3.484375,
      "learning_rate": 2.9100223133447824e-05,
      "loss": 1.1086,
      "step": 49820
    },
    {
      "epoch": 0.17464172686075977,
      "grad_norm": 3.453125,
      "learning_rate": 2.910606432318135e-05,
      "loss": 0.9721,
      "step": 49830
    },
    {
      "epoch": 0.17467677436765536,
      "grad_norm": 3.140625,
      "learning_rate": 2.911190551291487e-05,
      "loss": 0.9293,
      "step": 49840
    },
    {
      "epoch": 0.17471182187455095,
      "grad_norm": 3.421875,
      "learning_rate": 2.9117746702648395e-05,
      "loss": 0.9924,
      "step": 49850
    },
    {
      "epoch": 0.17474686938144654,
      "grad_norm": 4.125,
      "learning_rate": 2.912358789238192e-05,
      "loss": 0.9857,
      "step": 49860
    },
    {
      "epoch": 0.17478191688834216,
      "grad_norm": 3.046875,
      "learning_rate": 2.9129429082115445e-05,
      "loss": 0.9808,
      "step": 49870
    },
    {
      "epoch": 0.17481696439523775,
      "grad_norm": 3.21875,
      "learning_rate": 2.913527027184897e-05,
      "loss": 1.0012,
      "step": 49880
    },
    {
      "epoch": 0.17485201190213334,
      "grad_norm": 3.40625,
      "learning_rate": 2.9141111461582498e-05,
      "loss": 0.997,
      "step": 49890
    },
    {
      "epoch": 0.17488705940902893,
      "grad_norm": 3.015625,
      "learning_rate": 2.9146952651316023e-05,
      "loss": 0.9911,
      "step": 49900
    },
    {
      "epoch": 0.17492210691592452,
      "grad_norm": 3.453125,
      "learning_rate": 2.9152793841049548e-05,
      "loss": 1.02,
      "step": 49910
    },
    {
      "epoch": 0.17495715442282014,
      "grad_norm": 2.984375,
      "learning_rate": 2.9158635030783073e-05,
      "loss": 0.9441,
      "step": 49920
    },
    {
      "epoch": 0.17499220192971573,
      "grad_norm": 3.234375,
      "learning_rate": 2.9164476220516594e-05,
      "loss": 0.9773,
      "step": 49930
    },
    {
      "epoch": 0.17502724943661133,
      "grad_norm": 3.046875,
      "learning_rate": 2.917031741025012e-05,
      "loss": 0.9988,
      "step": 49940
    },
    {
      "epoch": 0.17506229694350692,
      "grad_norm": 3.1875,
      "learning_rate": 2.9176158599983644e-05,
      "loss": 1.0181,
      "step": 49950
    },
    {
      "epoch": 0.1750973444504025,
      "grad_norm": 3.5,
      "learning_rate": 2.9181999789717172e-05,
      "loss": 1.0238,
      "step": 49960
    },
    {
      "epoch": 0.17513239195729813,
      "grad_norm": 3.046875,
      "learning_rate": 2.9187840979450697e-05,
      "loss": 1.0248,
      "step": 49970
    },
    {
      "epoch": 0.17516743946419372,
      "grad_norm": 3.328125,
      "learning_rate": 2.9193682169184222e-05,
      "loss": 0.9943,
      "step": 49980
    },
    {
      "epoch": 0.1752024869710893,
      "grad_norm": 3.421875,
      "learning_rate": 2.9199523358917747e-05,
      "loss": 1.0246,
      "step": 49990
    },
    {
      "epoch": 0.1752375344779849,
      "grad_norm": 2.734375,
      "learning_rate": 2.920536454865127e-05,
      "loss": 1.0262,
      "step": 50000
    },
    {
      "epoch": 0.1752375344779849,
      "eval_loss": 0.9381079077720642,
      "eval_runtime": 557.1895,
      "eval_samples_per_second": 682.777,
      "eval_steps_per_second": 56.898,
      "step": 50000
    },
    {
      "epoch": 0.1752725819848805,
      "grad_norm": 3.4375,
      "learning_rate": 2.9211205738384796e-05,
      "loss": 1.0419,
      "step": 50010
    },
    {
      "epoch": 0.1753076294917761,
      "grad_norm": 2.765625,
      "learning_rate": 2.9217046928118318e-05,
      "loss": 0.9305,
      "step": 50020
    },
    {
      "epoch": 0.1753426769986717,
      "grad_norm": 3.03125,
      "learning_rate": 2.9222888117851843e-05,
      "loss": 0.9811,
      "step": 50030
    },
    {
      "epoch": 0.1753777245055673,
      "grad_norm": 3.359375,
      "learning_rate": 2.9228729307585374e-05,
      "loss": 0.9619,
      "step": 50040
    },
    {
      "epoch": 0.1754127720124629,
      "grad_norm": 3.890625,
      "learning_rate": 2.9234570497318896e-05,
      "loss": 1.0192,
      "step": 50050
    },
    {
      "epoch": 0.17544781951935848,
      "grad_norm": 2.84375,
      "learning_rate": 2.924041168705242e-05,
      "loss": 1.0114,
      "step": 50060
    },
    {
      "epoch": 0.1754828670262541,
      "grad_norm": 3.0625,
      "learning_rate": 2.9246252876785945e-05,
      "loss": 1.0089,
      "step": 50070
    },
    {
      "epoch": 0.1755179145331497,
      "grad_norm": 3.28125,
      "learning_rate": 2.925209406651947e-05,
      "loss": 1.0089,
      "step": 50080
    },
    {
      "epoch": 0.17555296204004528,
      "grad_norm": 3.359375,
      "learning_rate": 2.9257935256252995e-05,
      "loss": 1.0116,
      "step": 50090
    },
    {
      "epoch": 0.17558800954694087,
      "grad_norm": 2.78125,
      "learning_rate": 2.926377644598652e-05,
      "loss": 1.0702,
      "step": 50100
    },
    {
      "epoch": 0.17562305705383646,
      "grad_norm": 2.90625,
      "learning_rate": 2.926961763572004e-05,
      "loss": 1.0471,
      "step": 50110
    },
    {
      "epoch": 0.17565810456073208,
      "grad_norm": 2.953125,
      "learning_rate": 2.9275458825453573e-05,
      "loss": 1.0375,
      "step": 50120
    },
    {
      "epoch": 0.17569315206762767,
      "grad_norm": 3.6875,
      "learning_rate": 2.9281300015187098e-05,
      "loss": 1.0553,
      "step": 50130
    },
    {
      "epoch": 0.17572819957452326,
      "grad_norm": 3.046875,
      "learning_rate": 2.928714120492062e-05,
      "loss": 1.0006,
      "step": 50140
    },
    {
      "epoch": 0.17576324708141886,
      "grad_norm": 3.078125,
      "learning_rate": 2.9292982394654144e-05,
      "loss": 0.998,
      "step": 50150
    },
    {
      "epoch": 0.17579829458831445,
      "grad_norm": 3.484375,
      "learning_rate": 2.929882358438767e-05,
      "loss": 0.997,
      "step": 50160
    },
    {
      "epoch": 0.17583334209521007,
      "grad_norm": 3.078125,
      "learning_rate": 2.9304664774121194e-05,
      "loss": 0.9366,
      "step": 50170
    },
    {
      "epoch": 0.17586838960210566,
      "grad_norm": 3.015625,
      "learning_rate": 2.931050596385472e-05,
      "loss": 0.8994,
      "step": 50180
    },
    {
      "epoch": 0.17590343710900125,
      "grad_norm": 3.5,
      "learning_rate": 2.931634715358824e-05,
      "loss": 0.956,
      "step": 50190
    },
    {
      "epoch": 0.17593848461589684,
      "grad_norm": 3.734375,
      "learning_rate": 2.9322188343321772e-05,
      "loss": 1.0007,
      "step": 50200
    },
    {
      "epoch": 0.17597353212279246,
      "grad_norm": 3.28125,
      "learning_rate": 2.9328029533055297e-05,
      "loss": 0.9817,
      "step": 50210
    },
    {
      "epoch": 0.17600857962968805,
      "grad_norm": 3.28125,
      "learning_rate": 2.933387072278882e-05,
      "loss": 0.9582,
      "step": 50220
    },
    {
      "epoch": 0.17604362713658364,
      "grad_norm": 3.484375,
      "learning_rate": 2.9339711912522343e-05,
      "loss": 0.9313,
      "step": 50230
    },
    {
      "epoch": 0.17607867464347923,
      "grad_norm": 3.546875,
      "learning_rate": 2.9345553102255868e-05,
      "loss": 0.9966,
      "step": 50240
    },
    {
      "epoch": 0.17611372215037482,
      "grad_norm": 3.015625,
      "learning_rate": 2.9351394291989393e-05,
      "loss": 0.9899,
      "step": 50250
    },
    {
      "epoch": 0.17614876965727044,
      "grad_norm": 3.5625,
      "learning_rate": 2.9357235481722917e-05,
      "loss": 1.0116,
      "step": 50260
    },
    {
      "epoch": 0.17618381716416603,
      "grad_norm": 3.21875,
      "learning_rate": 2.9363076671456442e-05,
      "loss": 0.982,
      "step": 50270
    },
    {
      "epoch": 0.17621886467106163,
      "grad_norm": 3.03125,
      "learning_rate": 2.936891786118997e-05,
      "loss": 0.9632,
      "step": 50280
    },
    {
      "epoch": 0.17625391217795722,
      "grad_norm": 3.296875,
      "learning_rate": 2.9374759050923495e-05,
      "loss": 1.0073,
      "step": 50290
    },
    {
      "epoch": 0.1762889596848528,
      "grad_norm": 3.28125,
      "learning_rate": 2.938060024065702e-05,
      "loss": 1.0086,
      "step": 50300
    },
    {
      "epoch": 0.17632400719174843,
      "grad_norm": 3.3125,
      "learning_rate": 2.9386441430390545e-05,
      "loss": 1.0167,
      "step": 50310
    },
    {
      "epoch": 0.17635905469864402,
      "grad_norm": 3.40625,
      "learning_rate": 2.9392282620124066e-05,
      "loss": 1.0602,
      "step": 50320
    },
    {
      "epoch": 0.1763941022055396,
      "grad_norm": 3.40625,
      "learning_rate": 2.939812380985759e-05,
      "loss": 0.9678,
      "step": 50330
    },
    {
      "epoch": 0.1764291497124352,
      "grad_norm": 3.453125,
      "learning_rate": 2.9403964999591116e-05,
      "loss": 1.0064,
      "step": 50340
    },
    {
      "epoch": 0.1764641972193308,
      "grad_norm": 2.765625,
      "learning_rate": 2.9409806189324644e-05,
      "loss": 0.8789,
      "step": 50350
    },
    {
      "epoch": 0.1764992447262264,
      "grad_norm": 3.640625,
      "learning_rate": 2.941564737905817e-05,
      "loss": 1.0633,
      "step": 50360
    },
    {
      "epoch": 0.176534292233122,
      "grad_norm": 3.171875,
      "learning_rate": 2.9421488568791694e-05,
      "loss": 0.9957,
      "step": 50370
    },
    {
      "epoch": 0.1765693397400176,
      "grad_norm": 3.25,
      "learning_rate": 2.942732975852522e-05,
      "loss": 1.0673,
      "step": 50380
    },
    {
      "epoch": 0.17660438724691319,
      "grad_norm": 3.3125,
      "learning_rate": 2.9433170948258744e-05,
      "loss": 1.0428,
      "step": 50390
    },
    {
      "epoch": 0.17663943475380878,
      "grad_norm": 3.640625,
      "learning_rate": 2.9439012137992265e-05,
      "loss": 1.0443,
      "step": 50400
    },
    {
      "epoch": 0.1766744822607044,
      "grad_norm": 3.09375,
      "learning_rate": 2.944485332772579e-05,
      "loss": 1.1044,
      "step": 50410
    },
    {
      "epoch": 0.1767095297676,
      "grad_norm": 3.40625,
      "learning_rate": 2.9450694517459315e-05,
      "loss": 0.9768,
      "step": 50420
    },
    {
      "epoch": 0.17674457727449558,
      "grad_norm": 3.140625,
      "learning_rate": 2.9456535707192847e-05,
      "loss": 1.0159,
      "step": 50430
    },
    {
      "epoch": 0.17677962478139117,
      "grad_norm": 3.40625,
      "learning_rate": 2.9462376896926368e-05,
      "loss": 0.9473,
      "step": 50440
    },
    {
      "epoch": 0.17681467228828676,
      "grad_norm": 2.921875,
      "learning_rate": 2.9468218086659893e-05,
      "loss": 0.9652,
      "step": 50450
    },
    {
      "epoch": 0.17684971979518238,
      "grad_norm": 3.125,
      "learning_rate": 2.9474059276393418e-05,
      "loss": 0.9789,
      "step": 50460
    },
    {
      "epoch": 0.17688476730207797,
      "grad_norm": 3.015625,
      "learning_rate": 2.9479900466126943e-05,
      "loss": 1.0139,
      "step": 50470
    },
    {
      "epoch": 0.17691981480897356,
      "grad_norm": 3.359375,
      "learning_rate": 2.9485741655860467e-05,
      "loss": 0.959,
      "step": 50480
    },
    {
      "epoch": 0.17695486231586915,
      "grad_norm": 3.640625,
      "learning_rate": 2.949158284559399e-05,
      "loss": 1.0737,
      "step": 50490
    },
    {
      "epoch": 0.17698990982276475,
      "grad_norm": 3.21875,
      "learning_rate": 2.9497424035327514e-05,
      "loss": 0.9822,
      "step": 50500
    },
    {
      "epoch": 0.17702495732966037,
      "grad_norm": 3.109375,
      "learning_rate": 2.9503265225061045e-05,
      "loss": 0.9883,
      "step": 50510
    },
    {
      "epoch": 0.17706000483655596,
      "grad_norm": 3.46875,
      "learning_rate": 2.9509106414794567e-05,
      "loss": 1.0248,
      "step": 50520
    },
    {
      "epoch": 0.17709505234345155,
      "grad_norm": 3.40625,
      "learning_rate": 2.951494760452809e-05,
      "loss": 1.0581,
      "step": 50530
    },
    {
      "epoch": 0.17713009985034714,
      "grad_norm": 3.375,
      "learning_rate": 2.9520788794261616e-05,
      "loss": 0.9663,
      "step": 50540
    },
    {
      "epoch": 0.17716514735724273,
      "grad_norm": 3.375,
      "learning_rate": 2.952662998399514e-05,
      "loss": 0.9678,
      "step": 50550
    },
    {
      "epoch": 0.17720019486413835,
      "grad_norm": 3.046875,
      "learning_rate": 2.9532471173728666e-05,
      "loss": 1.0106,
      "step": 50560
    },
    {
      "epoch": 0.17723524237103394,
      "grad_norm": 3.1875,
      "learning_rate": 2.953831236346219e-05,
      "loss": 1.07,
      "step": 50570
    },
    {
      "epoch": 0.17727028987792953,
      "grad_norm": 3.234375,
      "learning_rate": 2.9544153553195712e-05,
      "loss": 1.034,
      "step": 50580
    },
    {
      "epoch": 0.17730533738482512,
      "grad_norm": 3.328125,
      "learning_rate": 2.9549994742929244e-05,
      "loss": 1.0249,
      "step": 50590
    },
    {
      "epoch": 0.17734038489172071,
      "grad_norm": 3.5,
      "learning_rate": 2.955583593266277e-05,
      "loss": 0.9567,
      "step": 50600
    },
    {
      "epoch": 0.17737543239861633,
      "grad_norm": 2.921875,
      "learning_rate": 2.956167712239629e-05,
      "loss": 0.9868,
      "step": 50610
    },
    {
      "epoch": 0.17741047990551193,
      "grad_norm": 3.46875,
      "learning_rate": 2.9567518312129815e-05,
      "loss": 1.0166,
      "step": 50620
    },
    {
      "epoch": 0.17744552741240752,
      "grad_norm": 3.234375,
      "learning_rate": 2.957335950186334e-05,
      "loss": 0.9794,
      "step": 50630
    },
    {
      "epoch": 0.1774805749193031,
      "grad_norm": 3.296875,
      "learning_rate": 2.9579200691596865e-05,
      "loss": 0.9889,
      "step": 50640
    },
    {
      "epoch": 0.1775156224261987,
      "grad_norm": 3.03125,
      "learning_rate": 2.958504188133039e-05,
      "loss": 1.015,
      "step": 50650
    },
    {
      "epoch": 0.17755066993309432,
      "grad_norm": 2.765625,
      "learning_rate": 2.9590883071063918e-05,
      "loss": 0.9664,
      "step": 50660
    },
    {
      "epoch": 0.1775857174399899,
      "grad_norm": 3.21875,
      "learning_rate": 2.9596724260797443e-05,
      "loss": 0.9837,
      "step": 50670
    },
    {
      "epoch": 0.1776207649468855,
      "grad_norm": 3.4375,
      "learning_rate": 2.9602565450530968e-05,
      "loss": 1.0219,
      "step": 50680
    },
    {
      "epoch": 0.1776558124537811,
      "grad_norm": 3.046875,
      "learning_rate": 2.9608406640264492e-05,
      "loss": 0.9567,
      "step": 50690
    },
    {
      "epoch": 0.17769085996067668,
      "grad_norm": 3.4375,
      "learning_rate": 2.9614247829998014e-05,
      "loss": 1.0508,
      "step": 50700
    },
    {
      "epoch": 0.1777259074675723,
      "grad_norm": 2.953125,
      "learning_rate": 2.962008901973154e-05,
      "loss": 1.1171,
      "step": 50710
    },
    {
      "epoch": 0.1777609549744679,
      "grad_norm": 4.25,
      "learning_rate": 2.9625930209465064e-05,
      "loss": 1.0337,
      "step": 50720
    },
    {
      "epoch": 0.17779600248136349,
      "grad_norm": 3.484375,
      "learning_rate": 2.963177139919859e-05,
      "loss": 0.9703,
      "step": 50730
    },
    {
      "epoch": 0.17783104998825908,
      "grad_norm": 3.21875,
      "learning_rate": 2.9637612588932117e-05,
      "loss": 0.9976,
      "step": 50740
    },
    {
      "epoch": 0.1778660974951547,
      "grad_norm": 3.5,
      "learning_rate": 2.964345377866564e-05,
      "loss": 1.0163,
      "step": 50750
    },
    {
      "epoch": 0.1779011450020503,
      "grad_norm": 3.5,
      "learning_rate": 2.9649294968399166e-05,
      "loss": 0.996,
      "step": 50760
    },
    {
      "epoch": 0.17793619250894588,
      "grad_norm": 3.609375,
      "learning_rate": 2.965513615813269e-05,
      "loss": 0.9821,
      "step": 50770
    },
    {
      "epoch": 0.17797124001584147,
      "grad_norm": 3.1875,
      "learning_rate": 2.9660977347866216e-05,
      "loss": 1.0962,
      "step": 50780
    },
    {
      "epoch": 0.17800628752273706,
      "grad_norm": 3.796875,
      "learning_rate": 2.9666818537599738e-05,
      "loss": 1.0222,
      "step": 50790
    },
    {
      "epoch": 0.17804133502963268,
      "grad_norm": 2.90625,
      "learning_rate": 2.9672659727333262e-05,
      "loss": 0.9348,
      "step": 50800
    },
    {
      "epoch": 0.17807638253652827,
      "grad_norm": 3.375,
      "learning_rate": 2.9678500917066787e-05,
      "loss": 1.0895,
      "step": 50810
    },
    {
      "epoch": 0.17811143004342386,
      "grad_norm": 3.0,
      "learning_rate": 2.9684342106800315e-05,
      "loss": 1.0382,
      "step": 50820
    },
    {
      "epoch": 0.17814647755031945,
      "grad_norm": 3.734375,
      "learning_rate": 2.969018329653384e-05,
      "loss": 1.0547,
      "step": 50830
    },
    {
      "epoch": 0.17818152505721505,
      "grad_norm": 3.390625,
      "learning_rate": 2.9696024486267365e-05,
      "loss": 1.042,
      "step": 50840
    },
    {
      "epoch": 0.17821657256411066,
      "grad_norm": 3.140625,
      "learning_rate": 2.970186567600089e-05,
      "loss": 0.9609,
      "step": 50850
    },
    {
      "epoch": 0.17825162007100626,
      "grad_norm": 3.3125,
      "learning_rate": 2.9707706865734415e-05,
      "loss": 1.0538,
      "step": 50860
    },
    {
      "epoch": 0.17828666757790185,
      "grad_norm": 3.03125,
      "learning_rate": 2.9713548055467936e-05,
      "loss": 0.9673,
      "step": 50870
    },
    {
      "epoch": 0.17832171508479744,
      "grad_norm": 3.328125,
      "learning_rate": 2.971938924520146e-05,
      "loss": 1.0304,
      "step": 50880
    },
    {
      "epoch": 0.17835676259169303,
      "grad_norm": 3.46875,
      "learning_rate": 2.9725230434934986e-05,
      "loss": 0.9294,
      "step": 50890
    },
    {
      "epoch": 0.17839181009858865,
      "grad_norm": 3.140625,
      "learning_rate": 2.9731071624668518e-05,
      "loss": 1.0661,
      "step": 50900
    },
    {
      "epoch": 0.17842685760548424,
      "grad_norm": 3.8125,
      "learning_rate": 2.973691281440204e-05,
      "loss": 1.0724,
      "step": 50910
    },
    {
      "epoch": 0.17846190511237983,
      "grad_norm": 3.328125,
      "learning_rate": 2.9742754004135564e-05,
      "loss": 1.017,
      "step": 50920
    },
    {
      "epoch": 0.17849695261927542,
      "grad_norm": 3.296875,
      "learning_rate": 2.974859519386909e-05,
      "loss": 1.0281,
      "step": 50930
    },
    {
      "epoch": 0.17853200012617101,
      "grad_norm": 3.265625,
      "learning_rate": 2.9754436383602614e-05,
      "loss": 0.9735,
      "step": 50940
    },
    {
      "epoch": 0.17856704763306663,
      "grad_norm": 3.359375,
      "learning_rate": 2.976027757333614e-05,
      "loss": 0.9322,
      "step": 50950
    },
    {
      "epoch": 0.17860209513996222,
      "grad_norm": 3.078125,
      "learning_rate": 2.976611876306966e-05,
      "loss": 0.9293,
      "step": 50960
    },
    {
      "epoch": 0.17863714264685782,
      "grad_norm": 2.9375,
      "learning_rate": 2.9771959952803185e-05,
      "loss": 1.0003,
      "step": 50970
    },
    {
      "epoch": 0.1786721901537534,
      "grad_norm": 3.25,
      "learning_rate": 2.9777801142536716e-05,
      "loss": 0.9052,
      "step": 50980
    },
    {
      "epoch": 0.178707237660649,
      "grad_norm": 3.28125,
      "learning_rate": 2.978364233227024e-05,
      "loss": 0.9891,
      "step": 50990
    },
    {
      "epoch": 0.17874228516754462,
      "grad_norm": 3.03125,
      "learning_rate": 2.9789483522003763e-05,
      "loss": 1.0095,
      "step": 51000
    },
    {
      "epoch": 0.1787773326744402,
      "grad_norm": 3.609375,
      "learning_rate": 2.9795324711737287e-05,
      "loss": 1.0621,
      "step": 51010
    },
    {
      "epoch": 0.1788123801813358,
      "grad_norm": 3.234375,
      "learning_rate": 2.9801165901470812e-05,
      "loss": 0.9435,
      "step": 51020
    },
    {
      "epoch": 0.1788474276882314,
      "grad_norm": 2.9375,
      "learning_rate": 2.9807007091204337e-05,
      "loss": 0.9553,
      "step": 51030
    },
    {
      "epoch": 0.17888247519512698,
      "grad_norm": 4.28125,
      "learning_rate": 2.9812848280937862e-05,
      "loss": 1.0129,
      "step": 51040
    },
    {
      "epoch": 0.1789175227020226,
      "grad_norm": 3.4375,
      "learning_rate": 2.981868947067139e-05,
      "loss": 1.0201,
      "step": 51050
    },
    {
      "epoch": 0.1789525702089182,
      "grad_norm": 3.171875,
      "learning_rate": 2.9824530660404915e-05,
      "loss": 1.002,
      "step": 51060
    },
    {
      "epoch": 0.17898761771581378,
      "grad_norm": 3.765625,
      "learning_rate": 2.983037185013844e-05,
      "loss": 0.9872,
      "step": 51070
    },
    {
      "epoch": 0.17902266522270938,
      "grad_norm": 4.0,
      "learning_rate": 2.983621303987196e-05,
      "loss": 1.117,
      "step": 51080
    },
    {
      "epoch": 0.17905771272960497,
      "grad_norm": 3.4375,
      "learning_rate": 2.9842054229605486e-05,
      "loss": 1.0626,
      "step": 51090
    },
    {
      "epoch": 0.1790927602365006,
      "grad_norm": 3.6875,
      "learning_rate": 2.984789541933901e-05,
      "loss": 1.0357,
      "step": 51100
    },
    {
      "epoch": 0.17912780774339618,
      "grad_norm": 3.34375,
      "learning_rate": 2.9853736609072536e-05,
      "loss": 1.0408,
      "step": 51110
    },
    {
      "epoch": 0.17916285525029177,
      "grad_norm": 3.34375,
      "learning_rate": 2.985957779880606e-05,
      "loss": 1.0158,
      "step": 51120
    },
    {
      "epoch": 0.17919790275718736,
      "grad_norm": 2.65625,
      "learning_rate": 2.986541898853959e-05,
      "loss": 1.0079,
      "step": 51130
    },
    {
      "epoch": 0.17923295026408295,
      "grad_norm": 3.0,
      "learning_rate": 2.9871260178273114e-05,
      "loss": 1.0418,
      "step": 51140
    },
    {
      "epoch": 0.17926799777097857,
      "grad_norm": 3.578125,
      "learning_rate": 2.987710136800664e-05,
      "loss": 0.9745,
      "step": 51150
    },
    {
      "epoch": 0.17930304527787416,
      "grad_norm": 3.34375,
      "learning_rate": 2.9882942557740164e-05,
      "loss": 1.072,
      "step": 51160
    },
    {
      "epoch": 0.17933809278476975,
      "grad_norm": 3.609375,
      "learning_rate": 2.9888783747473685e-05,
      "loss": 1.071,
      "step": 51170
    },
    {
      "epoch": 0.17937314029166535,
      "grad_norm": 3.078125,
      "learning_rate": 2.989462493720721e-05,
      "loss": 0.9563,
      "step": 51180
    },
    {
      "epoch": 0.17940818779856094,
      "grad_norm": 3.0,
      "learning_rate": 2.9900466126940735e-05,
      "loss": 1.0153,
      "step": 51190
    },
    {
      "epoch": 0.17944323530545656,
      "grad_norm": 3.9375,
      "learning_rate": 2.990630731667426e-05,
      "loss": 0.9742,
      "step": 51200
    },
    {
      "epoch": 0.17947828281235215,
      "grad_norm": 3.046875,
      "learning_rate": 2.9912148506407788e-05,
      "loss": 0.9214,
      "step": 51210
    },
    {
      "epoch": 0.17951333031924774,
      "grad_norm": 3.046875,
      "learning_rate": 2.9917989696141313e-05,
      "loss": 1.0605,
      "step": 51220
    },
    {
      "epoch": 0.17954837782614333,
      "grad_norm": 2.9375,
      "learning_rate": 2.9923830885874837e-05,
      "loss": 1.022,
      "step": 51230
    },
    {
      "epoch": 0.17958342533303892,
      "grad_norm": 3.46875,
      "learning_rate": 2.9929672075608362e-05,
      "loss": 1.0922,
      "step": 51240
    },
    {
      "epoch": 0.17961847283993454,
      "grad_norm": 3.09375,
      "learning_rate": 2.9935513265341887e-05,
      "loss": 1.0011,
      "step": 51250
    },
    {
      "epoch": 0.17965352034683013,
      "grad_norm": 3.6875,
      "learning_rate": 2.994135445507541e-05,
      "loss": 0.9479,
      "step": 51260
    },
    {
      "epoch": 0.17968856785372572,
      "grad_norm": 3.171875,
      "learning_rate": 2.9947195644808933e-05,
      "loss": 1.0035,
      "step": 51270
    },
    {
      "epoch": 0.1797236153606213,
      "grad_norm": 3.265625,
      "learning_rate": 2.9953036834542458e-05,
      "loss": 1.0636,
      "step": 51280
    },
    {
      "epoch": 0.17975866286751693,
      "grad_norm": 3.21875,
      "learning_rate": 2.9958878024275987e-05,
      "loss": 0.9721,
      "step": 51290
    },
    {
      "epoch": 0.17979371037441252,
      "grad_norm": 3.796875,
      "learning_rate": 2.996471921400951e-05,
      "loss": 1.0703,
      "step": 51300
    },
    {
      "epoch": 0.17982875788130812,
      "grad_norm": 3.34375,
      "learning_rate": 2.9970560403743036e-05,
      "loss": 0.9101,
      "step": 51310
    },
    {
      "epoch": 0.1798638053882037,
      "grad_norm": 3.046875,
      "learning_rate": 2.997640159347656e-05,
      "loss": 0.9332,
      "step": 51320
    },
    {
      "epoch": 0.1798988528950993,
      "grad_norm": 3.359375,
      "learning_rate": 2.9982242783210086e-05,
      "loss": 0.9588,
      "step": 51330
    },
    {
      "epoch": 0.17993390040199492,
      "grad_norm": 3.96875,
      "learning_rate": 2.998808397294361e-05,
      "loss": 0.9782,
      "step": 51340
    },
    {
      "epoch": 0.1799689479088905,
      "grad_norm": 3.65625,
      "learning_rate": 2.9993925162677132e-05,
      "loss": 1.0279,
      "step": 51350
    },
    {
      "epoch": 0.1800039954157861,
      "grad_norm": 3.390625,
      "learning_rate": 2.9999766352410664e-05,
      "loss": 1.0278,
      "step": 51360
    },
    {
      "epoch": 0.1800390429226817,
      "grad_norm": 3.453125,
      "learning_rate": 3.000560754214419e-05,
      "loss": 1.0185,
      "step": 51370
    },
    {
      "epoch": 0.18007409042957728,
      "grad_norm": 3.578125,
      "learning_rate": 3.001144873187771e-05,
      "loss": 1.085,
      "step": 51380
    },
    {
      "epoch": 0.1801091379364729,
      "grad_norm": 3.09375,
      "learning_rate": 3.0017289921611235e-05,
      "loss": 0.9456,
      "step": 51390
    },
    {
      "epoch": 0.1801441854433685,
      "grad_norm": 3.375,
      "learning_rate": 3.002313111134476e-05,
      "loss": 1.0715,
      "step": 51400
    },
    {
      "epoch": 0.18017923295026408,
      "grad_norm": 3.515625,
      "learning_rate": 3.0028972301078285e-05,
      "loss": 0.9404,
      "step": 51410
    },
    {
      "epoch": 0.18021428045715968,
      "grad_norm": 2.984375,
      "learning_rate": 3.003481349081181e-05,
      "loss": 0.9823,
      "step": 51420
    },
    {
      "epoch": 0.18024932796405527,
      "grad_norm": 3.140625,
      "learning_rate": 3.004065468054533e-05,
      "loss": 0.9309,
      "step": 51430
    },
    {
      "epoch": 0.18028437547095089,
      "grad_norm": 3.421875,
      "learning_rate": 3.0046495870278863e-05,
      "loss": 0.997,
      "step": 51440
    },
    {
      "epoch": 0.18031942297784648,
      "grad_norm": 3.3125,
      "learning_rate": 3.0052337060012387e-05,
      "loss": 0.9698,
      "step": 51450
    },
    {
      "epoch": 0.18035447048474207,
      "grad_norm": 3.734375,
      "learning_rate": 3.0058178249745912e-05,
      "loss": 0.9919,
      "step": 51460
    },
    {
      "epoch": 0.18038951799163766,
      "grad_norm": 3.453125,
      "learning_rate": 3.0064019439479434e-05,
      "loss": 0.9575,
      "step": 51470
    },
    {
      "epoch": 0.18042456549853325,
      "grad_norm": 3.375,
      "learning_rate": 3.006986062921296e-05,
      "loss": 1.0296,
      "step": 51480
    },
    {
      "epoch": 0.18045961300542887,
      "grad_norm": 2.6875,
      "learning_rate": 3.0075701818946483e-05,
      "loss": 0.9239,
      "step": 51490
    },
    {
      "epoch": 0.18049466051232446,
      "grad_norm": 3.078125,
      "learning_rate": 3.0081543008680008e-05,
      "loss": 1.033,
      "step": 51500
    },
    {
      "epoch": 0.18052970801922005,
      "grad_norm": 3.359375,
      "learning_rate": 3.0087384198413533e-05,
      "loss": 1.0356,
      "step": 51510
    },
    {
      "epoch": 0.18056475552611564,
      "grad_norm": 2.796875,
      "learning_rate": 3.009322538814706e-05,
      "loss": 0.9311,
      "step": 51520
    },
    {
      "epoch": 0.18059980303301124,
      "grad_norm": 2.984375,
      "learning_rate": 3.0099066577880586e-05,
      "loss": 1.037,
      "step": 51530
    },
    {
      "epoch": 0.18063485053990685,
      "grad_norm": 3.265625,
      "learning_rate": 3.010490776761411e-05,
      "loss": 1.0213,
      "step": 51540
    },
    {
      "epoch": 0.18066989804680245,
      "grad_norm": 3.6875,
      "learning_rate": 3.0110748957347636e-05,
      "loss": 1.0471,
      "step": 51550
    },
    {
      "epoch": 0.18070494555369804,
      "grad_norm": 2.828125,
      "learning_rate": 3.0116590147081157e-05,
      "loss": 0.928,
      "step": 51560
    },
    {
      "epoch": 0.18073999306059363,
      "grad_norm": 3.0625,
      "learning_rate": 3.0122431336814682e-05,
      "loss": 0.9255,
      "step": 51570
    },
    {
      "epoch": 0.18077504056748922,
      "grad_norm": 3.390625,
      "learning_rate": 3.0128272526548207e-05,
      "loss": 0.9899,
      "step": 51580
    },
    {
      "epoch": 0.18081008807438484,
      "grad_norm": 2.953125,
      "learning_rate": 3.0134113716281732e-05,
      "loss": 0.9724,
      "step": 51590
    },
    {
      "epoch": 0.18084513558128043,
      "grad_norm": 3.53125,
      "learning_rate": 3.013995490601526e-05,
      "loss": 1.0489,
      "step": 51600
    },
    {
      "epoch": 0.18088018308817602,
      "grad_norm": 3.03125,
      "learning_rate": 3.0145796095748785e-05,
      "loss": 1.0143,
      "step": 51610
    },
    {
      "epoch": 0.1809152305950716,
      "grad_norm": 2.953125,
      "learning_rate": 3.015163728548231e-05,
      "loss": 0.9655,
      "step": 51620
    },
    {
      "epoch": 0.1809502781019672,
      "grad_norm": 3.125,
      "learning_rate": 3.0157478475215835e-05,
      "loss": 0.9708,
      "step": 51630
    },
    {
      "epoch": 0.18098532560886282,
      "grad_norm": 3.703125,
      "learning_rate": 3.0163319664949356e-05,
      "loss": 1.035,
      "step": 51640
    },
    {
      "epoch": 0.18102037311575842,
      "grad_norm": 3.109375,
      "learning_rate": 3.016916085468288e-05,
      "loss": 0.9885,
      "step": 51650
    },
    {
      "epoch": 0.181055420622654,
      "grad_norm": 3.234375,
      "learning_rate": 3.0175002044416406e-05,
      "loss": 0.944,
      "step": 51660
    },
    {
      "epoch": 0.1810904681295496,
      "grad_norm": 3.375,
      "learning_rate": 3.0180843234149937e-05,
      "loss": 1.0561,
      "step": 51670
    },
    {
      "epoch": 0.1811255156364452,
      "grad_norm": 3.1875,
      "learning_rate": 3.018668442388346e-05,
      "loss": 0.94,
      "step": 51680
    },
    {
      "epoch": 0.1811605631433408,
      "grad_norm": 2.953125,
      "learning_rate": 3.0192525613616984e-05,
      "loss": 0.9973,
      "step": 51690
    },
    {
      "epoch": 0.1811956106502364,
      "grad_norm": 3.203125,
      "learning_rate": 3.019836680335051e-05,
      "loss": 0.9866,
      "step": 51700
    },
    {
      "epoch": 0.181230658157132,
      "grad_norm": 2.96875,
      "learning_rate": 3.0204207993084033e-05,
      "loss": 1.024,
      "step": 51710
    },
    {
      "epoch": 0.18126570566402758,
      "grad_norm": 3.265625,
      "learning_rate": 3.0210049182817558e-05,
      "loss": 0.9979,
      "step": 51720
    },
    {
      "epoch": 0.18130075317092317,
      "grad_norm": 3.75,
      "learning_rate": 3.021589037255108e-05,
      "loss": 0.985,
      "step": 51730
    },
    {
      "epoch": 0.1813358006778188,
      "grad_norm": 3.0625,
      "learning_rate": 3.0221731562284605e-05,
      "loss": 1.027,
      "step": 51740
    },
    {
      "epoch": 0.18137084818471438,
      "grad_norm": 2.921875,
      "learning_rate": 3.0227572752018136e-05,
      "loss": 1.0248,
      "step": 51750
    },
    {
      "epoch": 0.18140589569160998,
      "grad_norm": 3.03125,
      "learning_rate": 3.023341394175166e-05,
      "loss": 0.9636,
      "step": 51760
    },
    {
      "epoch": 0.18144094319850557,
      "grad_norm": 3.25,
      "learning_rate": 3.0239255131485182e-05,
      "loss": 0.984,
      "step": 51770
    },
    {
      "epoch": 0.18147599070540116,
      "grad_norm": 3.484375,
      "learning_rate": 3.0245096321218707e-05,
      "loss": 1.0091,
      "step": 51780
    },
    {
      "epoch": 0.18151103821229678,
      "grad_norm": 3.390625,
      "learning_rate": 3.0250937510952232e-05,
      "loss": 0.9659,
      "step": 51790
    },
    {
      "epoch": 0.18154608571919237,
      "grad_norm": 3.109375,
      "learning_rate": 3.0256778700685757e-05,
      "loss": 1.0743,
      "step": 51800
    },
    {
      "epoch": 0.18158113322608796,
      "grad_norm": 3.1875,
      "learning_rate": 3.0262619890419282e-05,
      "loss": 1.0111,
      "step": 51810
    },
    {
      "epoch": 0.18161618073298355,
      "grad_norm": 3.390625,
      "learning_rate": 3.0268461080152803e-05,
      "loss": 0.9704,
      "step": 51820
    },
    {
      "epoch": 0.18165122823987917,
      "grad_norm": 2.578125,
      "learning_rate": 3.0274302269886335e-05,
      "loss": 1.0008,
      "step": 51830
    },
    {
      "epoch": 0.18168627574677476,
      "grad_norm": 3.375,
      "learning_rate": 3.028014345961986e-05,
      "loss": 1.0611,
      "step": 51840
    },
    {
      "epoch": 0.18172132325367035,
      "grad_norm": 3.296875,
      "learning_rate": 3.028598464935338e-05,
      "loss": 1.0004,
      "step": 51850
    },
    {
      "epoch": 0.18175637076056594,
      "grad_norm": 3.296875,
      "learning_rate": 3.0291825839086906e-05,
      "loss": 0.9248,
      "step": 51860
    },
    {
      "epoch": 0.18179141826746154,
      "grad_norm": 3.296875,
      "learning_rate": 3.029766702882043e-05,
      "loss": 1.0571,
      "step": 51870
    },
    {
      "epoch": 0.18182646577435715,
      "grad_norm": 3.296875,
      "learning_rate": 3.0303508218553956e-05,
      "loss": 1.0192,
      "step": 51880
    },
    {
      "epoch": 0.18186151328125275,
      "grad_norm": 3.53125,
      "learning_rate": 3.030934940828748e-05,
      "loss": 0.9912,
      "step": 51890
    },
    {
      "epoch": 0.18189656078814834,
      "grad_norm": 3.515625,
      "learning_rate": 3.0315190598021005e-05,
      "loss": 1.0526,
      "step": 51900
    },
    {
      "epoch": 0.18193160829504393,
      "grad_norm": 2.921875,
      "learning_rate": 3.0321031787754534e-05,
      "loss": 0.9473,
      "step": 51910
    },
    {
      "epoch": 0.18196665580193952,
      "grad_norm": 3.203125,
      "learning_rate": 3.032687297748806e-05,
      "loss": 1.0476,
      "step": 51920
    },
    {
      "epoch": 0.18200170330883514,
      "grad_norm": 2.953125,
      "learning_rate": 3.0332714167221583e-05,
      "loss": 1.0063,
      "step": 51930
    },
    {
      "epoch": 0.18203675081573073,
      "grad_norm": 3.328125,
      "learning_rate": 3.0338555356955105e-05,
      "loss": 1.0477,
      "step": 51940
    },
    {
      "epoch": 0.18207179832262632,
      "grad_norm": 3.28125,
      "learning_rate": 3.034439654668863e-05,
      "loss": 1.0324,
      "step": 51950
    },
    {
      "epoch": 0.1821068458295219,
      "grad_norm": 3.109375,
      "learning_rate": 3.0350237736422155e-05,
      "loss": 0.9539,
      "step": 51960
    },
    {
      "epoch": 0.1821418933364175,
      "grad_norm": 3.453125,
      "learning_rate": 3.035607892615568e-05,
      "loss": 1.0184,
      "step": 51970
    },
    {
      "epoch": 0.18217694084331312,
      "grad_norm": 3.359375,
      "learning_rate": 3.0361920115889204e-05,
      "loss": 0.9611,
      "step": 51980
    },
    {
      "epoch": 0.18221198835020871,
      "grad_norm": 3.1875,
      "learning_rate": 3.0367761305622732e-05,
      "loss": 0.9707,
      "step": 51990
    },
    {
      "epoch": 0.1822470358571043,
      "grad_norm": 3.71875,
      "learning_rate": 3.0373602495356257e-05,
      "loss": 1.0327,
      "step": 52000
    },
    {
      "epoch": 0.1822820833639999,
      "grad_norm": 2.953125,
      "learning_rate": 3.0379443685089782e-05,
      "loss": 1.0031,
      "step": 52010
    },
    {
      "epoch": 0.1823171308708955,
      "grad_norm": 3.4375,
      "learning_rate": 3.0385284874823307e-05,
      "loss": 1.0651,
      "step": 52020
    },
    {
      "epoch": 0.1823521783777911,
      "grad_norm": 2.796875,
      "learning_rate": 3.039112606455683e-05,
      "loss": 0.9431,
      "step": 52030
    },
    {
      "epoch": 0.1823872258846867,
      "grad_norm": 3.734375,
      "learning_rate": 3.0396967254290353e-05,
      "loss": 1.0753,
      "step": 52040
    },
    {
      "epoch": 0.1824222733915823,
      "grad_norm": 3.015625,
      "learning_rate": 3.0402808444023878e-05,
      "loss": 0.9409,
      "step": 52050
    },
    {
      "epoch": 0.18245732089847788,
      "grad_norm": 3.1875,
      "learning_rate": 3.0408649633757406e-05,
      "loss": 0.9654,
      "step": 52060
    },
    {
      "epoch": 0.18249236840537347,
      "grad_norm": 3.359375,
      "learning_rate": 3.041449082349093e-05,
      "loss": 1.0116,
      "step": 52070
    },
    {
      "epoch": 0.1825274159122691,
      "grad_norm": 3.609375,
      "learning_rate": 3.0420332013224456e-05,
      "loss": 1.0,
      "step": 52080
    },
    {
      "epoch": 0.18256246341916468,
      "grad_norm": 2.953125,
      "learning_rate": 3.042617320295798e-05,
      "loss": 1.0443,
      "step": 52090
    },
    {
      "epoch": 0.18259751092606027,
      "grad_norm": 3.328125,
      "learning_rate": 3.0432014392691506e-05,
      "loss": 0.9808,
      "step": 52100
    },
    {
      "epoch": 0.18263255843295587,
      "grad_norm": 3.390625,
      "learning_rate": 3.043785558242503e-05,
      "loss": 1.0553,
      "step": 52110
    },
    {
      "epoch": 0.18266760593985146,
      "grad_norm": 3.546875,
      "learning_rate": 3.0443696772158552e-05,
      "loss": 1.0359,
      "step": 52120
    },
    {
      "epoch": 0.18270265344674708,
      "grad_norm": 3.171875,
      "learning_rate": 3.0449537961892077e-05,
      "loss": 1.0041,
      "step": 52130
    },
    {
      "epoch": 0.18273770095364267,
      "grad_norm": 3.046875,
      "learning_rate": 3.045537915162561e-05,
      "loss": 0.9035,
      "step": 52140
    },
    {
      "epoch": 0.18277274846053826,
      "grad_norm": 3.40625,
      "learning_rate": 3.046122034135913e-05,
      "loss": 0.9858,
      "step": 52150
    },
    {
      "epoch": 0.18280779596743385,
      "grad_norm": 3.609375,
      "learning_rate": 3.0467061531092655e-05,
      "loss": 1.0496,
      "step": 52160
    },
    {
      "epoch": 0.18284284347432944,
      "grad_norm": 3.5,
      "learning_rate": 3.047290272082618e-05,
      "loss": 1.0525,
      "step": 52170
    },
    {
      "epoch": 0.18287789098122506,
      "grad_norm": 3.109375,
      "learning_rate": 3.0478743910559704e-05,
      "loss": 1.0128,
      "step": 52180
    },
    {
      "epoch": 0.18291293848812065,
      "grad_norm": 3.71875,
      "learning_rate": 3.048458510029323e-05,
      "loss": 1.0525,
      "step": 52190
    },
    {
      "epoch": 0.18294798599501624,
      "grad_norm": 2.703125,
      "learning_rate": 3.049042629002675e-05,
      "loss": 0.9476,
      "step": 52200
    },
    {
      "epoch": 0.18298303350191183,
      "grad_norm": 3.3125,
      "learning_rate": 3.0496267479760276e-05,
      "loss": 0.9019,
      "step": 52210
    },
    {
      "epoch": 0.18301808100880743,
      "grad_norm": 3.515625,
      "learning_rate": 3.0502108669493807e-05,
      "loss": 0.9739,
      "step": 52220
    },
    {
      "epoch": 0.18305312851570305,
      "grad_norm": 3.25,
      "learning_rate": 3.0507949859227332e-05,
      "loss": 0.9191,
      "step": 52230
    },
    {
      "epoch": 0.18308817602259864,
      "grad_norm": 3.484375,
      "learning_rate": 3.0513791048960854e-05,
      "loss": 1.0524,
      "step": 52240
    },
    {
      "epoch": 0.18312322352949423,
      "grad_norm": 3.125,
      "learning_rate": 3.051963223869438e-05,
      "loss": 1.0069,
      "step": 52250
    },
    {
      "epoch": 0.18315827103638982,
      "grad_norm": 3.234375,
      "learning_rate": 3.05254734284279e-05,
      "loss": 0.973,
      "step": 52260
    },
    {
      "epoch": 0.1831933185432854,
      "grad_norm": 3.28125,
      "learning_rate": 3.053131461816143e-05,
      "loss": 1.0123,
      "step": 52270
    },
    {
      "epoch": 0.18322836605018103,
      "grad_norm": 3.140625,
      "learning_rate": 3.053715580789495e-05,
      "loss": 0.9585,
      "step": 52280
    },
    {
      "epoch": 0.18326341355707662,
      "grad_norm": 3.5,
      "learning_rate": 3.054299699762848e-05,
      "loss": 0.9969,
      "step": 52290
    },
    {
      "epoch": 0.1832984610639722,
      "grad_norm": 2.984375,
      "learning_rate": 3.0548838187362006e-05,
      "loss": 0.975,
      "step": 52300
    },
    {
      "epoch": 0.1833335085708678,
      "grad_norm": 3.140625,
      "learning_rate": 3.055467937709553e-05,
      "loss": 1.0725,
      "step": 52310
    },
    {
      "epoch": 0.1833685560777634,
      "grad_norm": 3.609375,
      "learning_rate": 3.0560520566829056e-05,
      "loss": 1.0186,
      "step": 52320
    },
    {
      "epoch": 0.18340360358465901,
      "grad_norm": 3.25,
      "learning_rate": 3.056636175656258e-05,
      "loss": 0.9707,
      "step": 52330
    },
    {
      "epoch": 0.1834386510915546,
      "grad_norm": 3.328125,
      "learning_rate": 3.0572202946296105e-05,
      "loss": 0.9835,
      "step": 52340
    },
    {
      "epoch": 0.1834736985984502,
      "grad_norm": 3.296875,
      "learning_rate": 3.057804413602963e-05,
      "loss": 1.0217,
      "step": 52350
    },
    {
      "epoch": 0.1835087461053458,
      "grad_norm": 3.546875,
      "learning_rate": 3.058388532576315e-05,
      "loss": 1.0025,
      "step": 52360
    },
    {
      "epoch": 0.18354379361224138,
      "grad_norm": 3.109375,
      "learning_rate": 3.058972651549668e-05,
      "loss": 1.0365,
      "step": 52370
    },
    {
      "epoch": 0.183578841119137,
      "grad_norm": 3.328125,
      "learning_rate": 3.0595567705230205e-05,
      "loss": 0.9644,
      "step": 52380
    },
    {
      "epoch": 0.1836138886260326,
      "grad_norm": 3.3125,
      "learning_rate": 3.0601408894963726e-05,
      "loss": 0.9512,
      "step": 52390
    },
    {
      "epoch": 0.18364893613292818,
      "grad_norm": 2.796875,
      "learning_rate": 3.0607250084697254e-05,
      "loss": 0.9891,
      "step": 52400
    },
    {
      "epoch": 0.18368398363982377,
      "grad_norm": 3.3125,
      "learning_rate": 3.0613091274430776e-05,
      "loss": 0.9148,
      "step": 52410
    },
    {
      "epoch": 0.1837190311467194,
      "grad_norm": 3.4375,
      "learning_rate": 3.0618932464164304e-05,
      "loss": 1.0062,
      "step": 52420
    },
    {
      "epoch": 0.18375407865361498,
      "grad_norm": 2.546875,
      "learning_rate": 3.0624773653897826e-05,
      "loss": 1.0105,
      "step": 52430
    },
    {
      "epoch": 0.18378912616051057,
      "grad_norm": 2.8125,
      "learning_rate": 3.063061484363135e-05,
      "loss": 0.9596,
      "step": 52440
    },
    {
      "epoch": 0.18382417366740617,
      "grad_norm": 3.1875,
      "learning_rate": 3.063645603336488e-05,
      "loss": 0.9696,
      "step": 52450
    },
    {
      "epoch": 0.18385922117430176,
      "grad_norm": 3.359375,
      "learning_rate": 3.0642297223098404e-05,
      "loss": 0.9285,
      "step": 52460
    },
    {
      "epoch": 0.18389426868119738,
      "grad_norm": 3.78125,
      "learning_rate": 3.0648138412831925e-05,
      "loss": 1.0356,
      "step": 52470
    },
    {
      "epoch": 0.18392931618809297,
      "grad_norm": 3.203125,
      "learning_rate": 3.065397960256545e-05,
      "loss": 0.9479,
      "step": 52480
    },
    {
      "epoch": 0.18396436369498856,
      "grad_norm": 3.515625,
      "learning_rate": 3.0659820792298975e-05,
      "loss": 0.98,
      "step": 52490
    },
    {
      "epoch": 0.18399941120188415,
      "grad_norm": 2.984375,
      "learning_rate": 3.06656619820325e-05,
      "loss": 0.9463,
      "step": 52500
    },
    {
      "epoch": 0.18403445870877974,
      "grad_norm": 2.984375,
      "learning_rate": 3.0671503171766024e-05,
      "loss": 0.9591,
      "step": 52510
    },
    {
      "epoch": 0.18406950621567536,
      "grad_norm": 3.8125,
      "learning_rate": 3.067734436149955e-05,
      "loss": 1.1392,
      "step": 52520
    },
    {
      "epoch": 0.18410455372257095,
      "grad_norm": 3.078125,
      "learning_rate": 3.068318555123308e-05,
      "loss": 1.0709,
      "step": 52530
    },
    {
      "epoch": 0.18413960122946654,
      "grad_norm": 3.53125,
      "learning_rate": 3.06890267409666e-05,
      "loss": 1.0514,
      "step": 52540
    },
    {
      "epoch": 0.18417464873636213,
      "grad_norm": 3.296875,
      "learning_rate": 3.069486793070013e-05,
      "loss": 1.0161,
      "step": 52550
    },
    {
      "epoch": 0.18420969624325773,
      "grad_norm": 3.5,
      "learning_rate": 3.070070912043365e-05,
      "loss": 0.9581,
      "step": 52560
    },
    {
      "epoch": 0.18424474375015334,
      "grad_norm": 2.890625,
      "learning_rate": 3.0706550310167173e-05,
      "loss": 0.9304,
      "step": 52570
    },
    {
      "epoch": 0.18427979125704894,
      "grad_norm": 3.3125,
      "learning_rate": 3.07123914999007e-05,
      "loss": 1.0734,
      "step": 52580
    },
    {
      "epoch": 0.18431483876394453,
      "grad_norm": 3.3125,
      "learning_rate": 3.071823268963422e-05,
      "loss": 1.0281,
      "step": 52590
    },
    {
      "epoch": 0.18434988627084012,
      "grad_norm": 3.21875,
      "learning_rate": 3.072407387936775e-05,
      "loss": 1.0103,
      "step": 52600
    },
    {
      "epoch": 0.1843849337777357,
      "grad_norm": 2.984375,
      "learning_rate": 3.072991506910128e-05,
      "loss": 0.9395,
      "step": 52610
    },
    {
      "epoch": 0.18441998128463133,
      "grad_norm": 3.1875,
      "learning_rate": 3.07357562588348e-05,
      "loss": 0.9809,
      "step": 52620
    },
    {
      "epoch": 0.18445502879152692,
      "grad_norm": 3.734375,
      "learning_rate": 3.074159744856833e-05,
      "loss": 0.9732,
      "step": 52630
    },
    {
      "epoch": 0.1844900762984225,
      "grad_norm": 3.328125,
      "learning_rate": 3.074743863830185e-05,
      "loss": 1.0515,
      "step": 52640
    },
    {
      "epoch": 0.1845251238053181,
      "grad_norm": 3.5625,
      "learning_rate": 3.075327982803537e-05,
      "loss": 0.978,
      "step": 52650
    },
    {
      "epoch": 0.1845601713122137,
      "grad_norm": 3.421875,
      "learning_rate": 3.07591210177689e-05,
      "loss": 1.0022,
      "step": 52660
    },
    {
      "epoch": 0.1845952188191093,
      "grad_norm": 3.578125,
      "learning_rate": 3.076496220750242e-05,
      "loss": 1.1309,
      "step": 52670
    },
    {
      "epoch": 0.1846302663260049,
      "grad_norm": 3.34375,
      "learning_rate": 3.077080339723595e-05,
      "loss": 0.9853,
      "step": 52680
    },
    {
      "epoch": 0.1846653138329005,
      "grad_norm": 3.09375,
      "learning_rate": 3.077664458696948e-05,
      "loss": 0.9582,
      "step": 52690
    },
    {
      "epoch": 0.1847003613397961,
      "grad_norm": 3.015625,
      "learning_rate": 3.0782485776703e-05,
      "loss": 1.0318,
      "step": 52700
    },
    {
      "epoch": 0.18473540884669168,
      "grad_norm": 3.390625,
      "learning_rate": 3.078832696643653e-05,
      "loss": 0.908,
      "step": 52710
    },
    {
      "epoch": 0.1847704563535873,
      "grad_norm": 3.125,
      "learning_rate": 3.079416815617005e-05,
      "loss": 1.0394,
      "step": 52720
    },
    {
      "epoch": 0.1848055038604829,
      "grad_norm": 3.265625,
      "learning_rate": 3.080000934590358e-05,
      "loss": 1.0926,
      "step": 52730
    },
    {
      "epoch": 0.18484055136737848,
      "grad_norm": 3.4375,
      "learning_rate": 3.08058505356371e-05,
      "loss": 0.9529,
      "step": 52740
    },
    {
      "epoch": 0.18487559887427407,
      "grad_norm": 2.65625,
      "learning_rate": 3.081169172537062e-05,
      "loss": 1.031,
      "step": 52750
    },
    {
      "epoch": 0.18491064638116966,
      "grad_norm": 3.1875,
      "learning_rate": 3.0817532915104156e-05,
      "loss": 1.0497,
      "step": 52760
    },
    {
      "epoch": 0.18494569388806528,
      "grad_norm": 3.515625,
      "learning_rate": 3.082337410483768e-05,
      "loss": 1.007,
      "step": 52770
    },
    {
      "epoch": 0.18498074139496087,
      "grad_norm": 3.078125,
      "learning_rate": 3.08292152945712e-05,
      "loss": 0.9729,
      "step": 52780
    },
    {
      "epoch": 0.18501578890185646,
      "grad_norm": 3.328125,
      "learning_rate": 3.083505648430473e-05,
      "loss": 1.1043,
      "step": 52790
    },
    {
      "epoch": 0.18505083640875206,
      "grad_norm": 3.4375,
      "learning_rate": 3.084089767403825e-05,
      "loss": 1.0306,
      "step": 52800
    },
    {
      "epoch": 0.18508588391564765,
      "grad_norm": 3.0625,
      "learning_rate": 3.0846738863771776e-05,
      "loss": 1.0121,
      "step": 52810
    },
    {
      "epoch": 0.18512093142254327,
      "grad_norm": 3.34375,
      "learning_rate": 3.08525800535053e-05,
      "loss": 1.0486,
      "step": 52820
    },
    {
      "epoch": 0.18515597892943886,
      "grad_norm": 3.234375,
      "learning_rate": 3.085842124323882e-05,
      "loss": 1.0936,
      "step": 52830
    },
    {
      "epoch": 0.18519102643633445,
      "grad_norm": 3.171875,
      "learning_rate": 3.0864262432972354e-05,
      "loss": 1.0601,
      "step": 52840
    },
    {
      "epoch": 0.18522607394323004,
      "grad_norm": 5.53125,
      "learning_rate": 3.0870103622705876e-05,
      "loss": 1.0527,
      "step": 52850
    },
    {
      "epoch": 0.18526112145012563,
      "grad_norm": 3.1875,
      "learning_rate": 3.08759448124394e-05,
      "loss": 1.0355,
      "step": 52860
    },
    {
      "epoch": 0.18529616895702125,
      "grad_norm": 3.46875,
      "learning_rate": 3.0881786002172926e-05,
      "loss": 0.8971,
      "step": 52870
    },
    {
      "epoch": 0.18533121646391684,
      "grad_norm": 3.171875,
      "learning_rate": 3.088762719190645e-05,
      "loss": 0.9587,
      "step": 52880
    },
    {
      "epoch": 0.18536626397081243,
      "grad_norm": 2.96875,
      "learning_rate": 3.0893468381639975e-05,
      "loss": 0.9497,
      "step": 52890
    },
    {
      "epoch": 0.18540131147770803,
      "grad_norm": 3.40625,
      "learning_rate": 3.08993095713735e-05,
      "loss": 1.0542,
      "step": 52900
    },
    {
      "epoch": 0.18543635898460362,
      "grad_norm": 3.1875,
      "learning_rate": 3.090515076110702e-05,
      "loss": 1.0096,
      "step": 52910
    },
    {
      "epoch": 0.18547140649149924,
      "grad_norm": 3.609375,
      "learning_rate": 3.091099195084055e-05,
      "loss": 1.0376,
      "step": 52920
    },
    {
      "epoch": 0.18550645399839483,
      "grad_norm": 3.078125,
      "learning_rate": 3.0916833140574075e-05,
      "loss": 0.9191,
      "step": 52930
    },
    {
      "epoch": 0.18554150150529042,
      "grad_norm": 3.1875,
      "learning_rate": 3.0922674330307596e-05,
      "loss": 0.9857,
      "step": 52940
    },
    {
      "epoch": 0.185576549012186,
      "grad_norm": 3.40625,
      "learning_rate": 3.0928515520041124e-05,
      "loss": 1.0124,
      "step": 52950
    },
    {
      "epoch": 0.18561159651908163,
      "grad_norm": 3.328125,
      "learning_rate": 3.0934356709774646e-05,
      "loss": 0.8708,
      "step": 52960
    },
    {
      "epoch": 0.18564664402597722,
      "grad_norm": 2.78125,
      "learning_rate": 3.0940197899508174e-05,
      "loss": 0.9051,
      "step": 52970
    },
    {
      "epoch": 0.1856816915328728,
      "grad_norm": 3.59375,
      "learning_rate": 3.0946039089241695e-05,
      "loss": 0.9656,
      "step": 52980
    },
    {
      "epoch": 0.1857167390397684,
      "grad_norm": 3.359375,
      "learning_rate": 3.0951880278975224e-05,
      "loss": 0.9557,
      "step": 52990
    },
    {
      "epoch": 0.185751786546664,
      "grad_norm": 3.359375,
      "learning_rate": 3.095772146870875e-05,
      "loss": 0.947,
      "step": 53000
    },
    {
      "epoch": 0.1857868340535596,
      "grad_norm": 3.203125,
      "learning_rate": 3.096356265844227e-05,
      "loss": 0.9883,
      "step": 53010
    },
    {
      "epoch": 0.1858218815604552,
      "grad_norm": 3.1875,
      "learning_rate": 3.09694038481758e-05,
      "loss": 0.9772,
      "step": 53020
    },
    {
      "epoch": 0.1858569290673508,
      "grad_norm": 2.578125,
      "learning_rate": 3.097524503790932e-05,
      "loss": 0.9267,
      "step": 53030
    },
    {
      "epoch": 0.1858919765742464,
      "grad_norm": 3.171875,
      "learning_rate": 3.0981086227642844e-05,
      "loss": 1.0689,
      "step": 53040
    },
    {
      "epoch": 0.18592702408114198,
      "grad_norm": 2.9375,
      "learning_rate": 3.098692741737637e-05,
      "loss": 0.9733,
      "step": 53050
    },
    {
      "epoch": 0.1859620715880376,
      "grad_norm": 2.890625,
      "learning_rate": 3.0992768607109894e-05,
      "loss": 0.9878,
      "step": 53060
    },
    {
      "epoch": 0.1859971190949332,
      "grad_norm": 3.265625,
      "learning_rate": 3.099860979684342e-05,
      "loss": 0.9952,
      "step": 53070
    },
    {
      "epoch": 0.18603216660182878,
      "grad_norm": 3.3125,
      "learning_rate": 3.100445098657695e-05,
      "loss": 0.9749,
      "step": 53080
    },
    {
      "epoch": 0.18606721410872437,
      "grad_norm": 3.4375,
      "learning_rate": 3.101029217631047e-05,
      "loss": 1.0292,
      "step": 53090
    },
    {
      "epoch": 0.18610226161561996,
      "grad_norm": 3.109375,
      "learning_rate": 3.1016133366044e-05,
      "loss": 0.9735,
      "step": 53100
    },
    {
      "epoch": 0.18613730912251558,
      "grad_norm": 3.1875,
      "learning_rate": 3.102197455577752e-05,
      "loss": 1.0075,
      "step": 53110
    },
    {
      "epoch": 0.18617235662941117,
      "grad_norm": 4.125,
      "learning_rate": 3.102781574551104e-05,
      "loss": 0.9995,
      "step": 53120
    },
    {
      "epoch": 0.18620740413630676,
      "grad_norm": 3.140625,
      "learning_rate": 3.103365693524457e-05,
      "loss": 1.0022,
      "step": 53130
    },
    {
      "epoch": 0.18624245164320236,
      "grad_norm": 3.71875,
      "learning_rate": 3.103949812497809e-05,
      "loss": 0.9526,
      "step": 53140
    },
    {
      "epoch": 0.18627749915009795,
      "grad_norm": 3.0,
      "learning_rate": 3.104533931471162e-05,
      "loss": 1.0449,
      "step": 53150
    },
    {
      "epoch": 0.18631254665699357,
      "grad_norm": 3.21875,
      "learning_rate": 3.105118050444515e-05,
      "loss": 0.9939,
      "step": 53160
    },
    {
      "epoch": 0.18634759416388916,
      "grad_norm": 3.171875,
      "learning_rate": 3.105702169417867e-05,
      "loss": 0.987,
      "step": 53170
    },
    {
      "epoch": 0.18638264167078475,
      "grad_norm": 3.15625,
      "learning_rate": 3.10628628839122e-05,
      "loss": 1.0149,
      "step": 53180
    },
    {
      "epoch": 0.18641768917768034,
      "grad_norm": 3.3125,
      "learning_rate": 3.106870407364572e-05,
      "loss": 0.9654,
      "step": 53190
    },
    {
      "epoch": 0.18645273668457593,
      "grad_norm": 2.984375,
      "learning_rate": 3.107454526337925e-05,
      "loss": 0.9289,
      "step": 53200
    },
    {
      "epoch": 0.18648778419147155,
      "grad_norm": 3.421875,
      "learning_rate": 3.108038645311277e-05,
      "loss": 1.0389,
      "step": 53210
    },
    {
      "epoch": 0.18652283169836714,
      "grad_norm": 3.140625,
      "learning_rate": 3.108622764284629e-05,
      "loss": 1.0266,
      "step": 53220
    },
    {
      "epoch": 0.18655787920526273,
      "grad_norm": 3.125,
      "learning_rate": 3.109206883257983e-05,
      "loss": 0.8978,
      "step": 53230
    },
    {
      "epoch": 0.18659292671215832,
      "grad_norm": 3.21875,
      "learning_rate": 3.109791002231335e-05,
      "loss": 0.9277,
      "step": 53240
    },
    {
      "epoch": 0.18662797421905392,
      "grad_norm": 3.640625,
      "learning_rate": 3.110375121204687e-05,
      "loss": 1.0415,
      "step": 53250
    },
    {
      "epoch": 0.18666302172594954,
      "grad_norm": 3.46875,
      "learning_rate": 3.11095924017804e-05,
      "loss": 1.0098,
      "step": 53260
    },
    {
      "epoch": 0.18669806923284513,
      "grad_norm": 2.9375,
      "learning_rate": 3.111543359151392e-05,
      "loss": 0.9428,
      "step": 53270
    },
    {
      "epoch": 0.18673311673974072,
      "grad_norm": 3.65625,
      "learning_rate": 3.112127478124745e-05,
      "loss": 1.0172,
      "step": 53280
    },
    {
      "epoch": 0.1867681642466363,
      "grad_norm": 3.140625,
      "learning_rate": 3.112711597098097e-05,
      "loss": 1.0394,
      "step": 53290
    },
    {
      "epoch": 0.1868032117535319,
      "grad_norm": 3.359375,
      "learning_rate": 3.113295716071449e-05,
      "loss": 0.9605,
      "step": 53300
    },
    {
      "epoch": 0.18683825926042752,
      "grad_norm": 4.0,
      "learning_rate": 3.1138798350448025e-05,
      "loss": 0.9755,
      "step": 53310
    },
    {
      "epoch": 0.1868733067673231,
      "grad_norm": 3.1875,
      "learning_rate": 3.114463954018155e-05,
      "loss": 1.0041,
      "step": 53320
    },
    {
      "epoch": 0.1869083542742187,
      "grad_norm": 3.203125,
      "learning_rate": 3.115048072991507e-05,
      "loss": 1.1108,
      "step": 53330
    },
    {
      "epoch": 0.1869434017811143,
      "grad_norm": 3.609375,
      "learning_rate": 3.1156321919648597e-05,
      "loss": 1.1341,
      "step": 53340
    },
    {
      "epoch": 0.18697844928800988,
      "grad_norm": 3.140625,
      "learning_rate": 3.116216310938212e-05,
      "loss": 1.0513,
      "step": 53350
    },
    {
      "epoch": 0.1870134967949055,
      "grad_norm": 3.40625,
      "learning_rate": 3.1168004299115646e-05,
      "loss": 1.003,
      "step": 53360
    },
    {
      "epoch": 0.1870485443018011,
      "grad_norm": 3.203125,
      "learning_rate": 3.117384548884917e-05,
      "loss": 1.033,
      "step": 53370
    },
    {
      "epoch": 0.1870835918086967,
      "grad_norm": 3.671875,
      "learning_rate": 3.1179686678582696e-05,
      "loss": 0.9787,
      "step": 53380
    },
    {
      "epoch": 0.18711863931559228,
      "grad_norm": 3.234375,
      "learning_rate": 3.1185527868316224e-05,
      "loss": 0.9,
      "step": 53390
    },
    {
      "epoch": 0.18715368682248787,
      "grad_norm": 3.296875,
      "learning_rate": 3.1191369058049746e-05,
      "loss": 0.9567,
      "step": 53400
    },
    {
      "epoch": 0.1871887343293835,
      "grad_norm": 2.9375,
      "learning_rate": 3.1197210247783274e-05,
      "loss": 0.9937,
      "step": 53410
    },
    {
      "epoch": 0.18722378183627908,
      "grad_norm": 3.21875,
      "learning_rate": 3.1203051437516795e-05,
      "loss": 1.0257,
      "step": 53420
    },
    {
      "epoch": 0.18725882934317467,
      "grad_norm": 3.546875,
      "learning_rate": 3.120889262725032e-05,
      "loss": 1.0227,
      "step": 53430
    },
    {
      "epoch": 0.18729387685007026,
      "grad_norm": 3.0,
      "learning_rate": 3.1214733816983845e-05,
      "loss": 0.9895,
      "step": 53440
    },
    {
      "epoch": 0.18732892435696585,
      "grad_norm": 3.453125,
      "learning_rate": 3.1220575006717366e-05,
      "loss": 1.0014,
      "step": 53450
    },
    {
      "epoch": 0.18736397186386147,
      "grad_norm": 3.671875,
      "learning_rate": 3.1226416196450895e-05,
      "loss": 0.9996,
      "step": 53460
    },
    {
      "epoch": 0.18739901937075706,
      "grad_norm": 2.53125,
      "learning_rate": 3.123225738618442e-05,
      "loss": 1.0041,
      "step": 53470
    },
    {
      "epoch": 0.18743406687765266,
      "grad_norm": 3.375,
      "learning_rate": 3.1238098575917944e-05,
      "loss": 0.9923,
      "step": 53480
    },
    {
      "epoch": 0.18746911438454825,
      "grad_norm": 2.828125,
      "learning_rate": 3.124393976565147e-05,
      "loss": 0.9812,
      "step": 53490
    },
    {
      "epoch": 0.18750416189144387,
      "grad_norm": 3.25,
      "learning_rate": 3.1249780955384994e-05,
      "loss": 0.9405,
      "step": 53500
    },
    {
      "epoch": 0.18753920939833946,
      "grad_norm": 3.265625,
      "learning_rate": 3.1255622145118516e-05,
      "loss": 1.0271,
      "step": 53510
    },
    {
      "epoch": 0.18757425690523505,
      "grad_norm": 3.328125,
      "learning_rate": 3.1261463334852044e-05,
      "loss": 1.0194,
      "step": 53520
    },
    {
      "epoch": 0.18760930441213064,
      "grad_norm": 2.84375,
      "learning_rate": 3.1267304524585565e-05,
      "loss": 0.9487,
      "step": 53530
    },
    {
      "epoch": 0.18764435191902623,
      "grad_norm": 3.109375,
      "learning_rate": 3.1273145714319093e-05,
      "loss": 1.0605,
      "step": 53540
    },
    {
      "epoch": 0.18767939942592185,
      "grad_norm": 3.59375,
      "learning_rate": 3.127898690405262e-05,
      "loss": 1.0031,
      "step": 53550
    },
    {
      "epoch": 0.18771444693281744,
      "grad_norm": 2.71875,
      "learning_rate": 3.128482809378614e-05,
      "loss": 0.9839,
      "step": 53560
    },
    {
      "epoch": 0.18774949443971303,
      "grad_norm": 3.453125,
      "learning_rate": 3.129066928351967e-05,
      "loss": 0.9688,
      "step": 53570
    },
    {
      "epoch": 0.18778454194660862,
      "grad_norm": 2.71875,
      "learning_rate": 3.129651047325319e-05,
      "loss": 0.9596,
      "step": 53580
    },
    {
      "epoch": 0.18781958945350422,
      "grad_norm": 3.375,
      "learning_rate": 3.1302351662986714e-05,
      "loss": 0.9827,
      "step": 53590
    },
    {
      "epoch": 0.18785463696039983,
      "grad_norm": 3.1875,
      "learning_rate": 3.130819285272024e-05,
      "loss": 0.9801,
      "step": 53600
    },
    {
      "epoch": 0.18788968446729543,
      "grad_norm": 3.78125,
      "learning_rate": 3.1314034042453764e-05,
      "loss": 0.9359,
      "step": 53610
    },
    {
      "epoch": 0.18792473197419102,
      "grad_norm": 3.203125,
      "learning_rate": 3.13198752321873e-05,
      "loss": 1.0602,
      "step": 53620
    },
    {
      "epoch": 0.1879597794810866,
      "grad_norm": 2.984375,
      "learning_rate": 3.132571642192082e-05,
      "loss": 1.034,
      "step": 53630
    },
    {
      "epoch": 0.1879948269879822,
      "grad_norm": 3.15625,
      "learning_rate": 3.133155761165434e-05,
      "loss": 0.9607,
      "step": 53640
    },
    {
      "epoch": 0.18802987449487782,
      "grad_norm": 3.34375,
      "learning_rate": 3.133739880138787e-05,
      "loss": 0.9849,
      "step": 53650
    },
    {
      "epoch": 0.1880649220017734,
      "grad_norm": 3.0625,
      "learning_rate": 3.134323999112139e-05,
      "loss": 1.0368,
      "step": 53660
    },
    {
      "epoch": 0.188099969508669,
      "grad_norm": 2.796875,
      "learning_rate": 3.134908118085492e-05,
      "loss": 0.8429,
      "step": 53670
    },
    {
      "epoch": 0.1881350170155646,
      "grad_norm": 3.453125,
      "learning_rate": 3.135492237058844e-05,
      "loss": 0.9754,
      "step": 53680
    },
    {
      "epoch": 0.18817006452246018,
      "grad_norm": 3.140625,
      "learning_rate": 3.136076356032196e-05,
      "loss": 0.9711,
      "step": 53690
    },
    {
      "epoch": 0.1882051120293558,
      "grad_norm": 3.125,
      "learning_rate": 3.13666047500555e-05,
      "loss": 0.9829,
      "step": 53700
    },
    {
      "epoch": 0.1882401595362514,
      "grad_norm": 3.328125,
      "learning_rate": 3.137244593978902e-05,
      "loss": 0.9293,
      "step": 53710
    },
    {
      "epoch": 0.18827520704314699,
      "grad_norm": 3.296875,
      "learning_rate": 3.137828712952254e-05,
      "loss": 0.9827,
      "step": 53720
    },
    {
      "epoch": 0.18831025455004258,
      "grad_norm": 3.515625,
      "learning_rate": 3.138412831925607e-05,
      "loss": 1.0655,
      "step": 53730
    },
    {
      "epoch": 0.18834530205693817,
      "grad_norm": 3.328125,
      "learning_rate": 3.138996950898959e-05,
      "loss": 1.0343,
      "step": 53740
    },
    {
      "epoch": 0.1883803495638338,
      "grad_norm": 3.59375,
      "learning_rate": 3.139581069872312e-05,
      "loss": 0.9877,
      "step": 53750
    },
    {
      "epoch": 0.18841539707072938,
      "grad_norm": 3.28125,
      "learning_rate": 3.140165188845664e-05,
      "loss": 0.954,
      "step": 53760
    },
    {
      "epoch": 0.18845044457762497,
      "grad_norm": 3.796875,
      "learning_rate": 3.140749307819017e-05,
      "loss": 1.0463,
      "step": 53770
    },
    {
      "epoch": 0.18848549208452056,
      "grad_norm": 3.046875,
      "learning_rate": 3.1413334267923697e-05,
      "loss": 0.9659,
      "step": 53780
    },
    {
      "epoch": 0.18852053959141615,
      "grad_norm": 2.96875,
      "learning_rate": 3.141917545765722e-05,
      "loss": 0.9996,
      "step": 53790
    },
    {
      "epoch": 0.18855558709831177,
      "grad_norm": 3.390625,
      "learning_rate": 3.142501664739074e-05,
      "loss": 0.9738,
      "step": 53800
    },
    {
      "epoch": 0.18859063460520736,
      "grad_norm": 3.328125,
      "learning_rate": 3.143085783712427e-05,
      "loss": 1.1175,
      "step": 53810
    },
    {
      "epoch": 0.18862568211210295,
      "grad_norm": 3.109375,
      "learning_rate": 3.143669902685779e-05,
      "loss": 1.0399,
      "step": 53820
    },
    {
      "epoch": 0.18866072961899855,
      "grad_norm": 2.984375,
      "learning_rate": 3.144254021659132e-05,
      "loss": 0.9397,
      "step": 53830
    },
    {
      "epoch": 0.18869577712589414,
      "grad_norm": 3.1875,
      "learning_rate": 3.144838140632484e-05,
      "loss": 0.9775,
      "step": 53840
    },
    {
      "epoch": 0.18873082463278976,
      "grad_norm": 3.390625,
      "learning_rate": 3.145422259605837e-05,
      "loss": 0.9616,
      "step": 53850
    },
    {
      "epoch": 0.18876587213968535,
      "grad_norm": 3.0,
      "learning_rate": 3.1460063785791895e-05,
      "loss": 0.9528,
      "step": 53860
    },
    {
      "epoch": 0.18880091964658094,
      "grad_norm": 3.46875,
      "learning_rate": 3.146590497552542e-05,
      "loss": 1.0181,
      "step": 53870
    },
    {
      "epoch": 0.18883596715347653,
      "grad_norm": 3.359375,
      "learning_rate": 3.1471746165258945e-05,
      "loss": 1.0066,
      "step": 53880
    },
    {
      "epoch": 0.18887101466037212,
      "grad_norm": 3.53125,
      "learning_rate": 3.1477587354992466e-05,
      "loss": 0.8959,
      "step": 53890
    },
    {
      "epoch": 0.18890606216726774,
      "grad_norm": 2.890625,
      "learning_rate": 3.148342854472599e-05,
      "loss": 1.0115,
      "step": 53900
    },
    {
      "epoch": 0.18894110967416333,
      "grad_norm": 3.46875,
      "learning_rate": 3.1489269734459516e-05,
      "loss": 1.0119,
      "step": 53910
    },
    {
      "epoch": 0.18897615718105892,
      "grad_norm": 3.0625,
      "learning_rate": 3.149511092419304e-05,
      "loss": 1.0657,
      "step": 53920
    },
    {
      "epoch": 0.18901120468795451,
      "grad_norm": 3.0625,
      "learning_rate": 3.1500952113926566e-05,
      "loss": 1.0041,
      "step": 53930
    },
    {
      "epoch": 0.1890462521948501,
      "grad_norm": 2.9375,
      "learning_rate": 3.1506793303660094e-05,
      "loss": 0.9163,
      "step": 53940
    },
    {
      "epoch": 0.18908129970174573,
      "grad_norm": 3.578125,
      "learning_rate": 3.1512634493393615e-05,
      "loss": 0.9148,
      "step": 53950
    },
    {
      "epoch": 0.18911634720864132,
      "grad_norm": 3.296875,
      "learning_rate": 3.1518475683127144e-05,
      "loss": 1.0392,
      "step": 53960
    },
    {
      "epoch": 0.1891513947155369,
      "grad_norm": 3.015625,
      "learning_rate": 3.1524316872860665e-05,
      "loss": 0.9417,
      "step": 53970
    },
    {
      "epoch": 0.1891864422224325,
      "grad_norm": 3.28125,
      "learning_rate": 3.153015806259419e-05,
      "loss": 0.9463,
      "step": 53980
    },
    {
      "epoch": 0.1892214897293281,
      "grad_norm": 3.3125,
      "learning_rate": 3.1535999252327715e-05,
      "loss": 1.0021,
      "step": 53990
    },
    {
      "epoch": 0.1892565372362237,
      "grad_norm": 3.109375,
      "learning_rate": 3.1541840442061236e-05,
      "loss": 0.9356,
      "step": 54000
    },
    {
      "epoch": 0.1892915847431193,
      "grad_norm": 3.5625,
      "learning_rate": 3.1547681631794765e-05,
      "loss": 1.0268,
      "step": 54010
    },
    {
      "epoch": 0.1893266322500149,
      "grad_norm": 3.921875,
      "learning_rate": 3.155352282152829e-05,
      "loss": 0.9293,
      "step": 54020
    },
    {
      "epoch": 0.18936167975691048,
      "grad_norm": 2.890625,
      "learning_rate": 3.1559364011261814e-05,
      "loss": 1.0034,
      "step": 54030
    },
    {
      "epoch": 0.1893967272638061,
      "grad_norm": 3.34375,
      "learning_rate": 3.156520520099534e-05,
      "loss": 0.967,
      "step": 54040
    },
    {
      "epoch": 0.1894317747707017,
      "grad_norm": 2.96875,
      "learning_rate": 3.1571046390728864e-05,
      "loss": 1.0216,
      "step": 54050
    },
    {
      "epoch": 0.18946682227759729,
      "grad_norm": 2.625,
      "learning_rate": 3.1576887580462385e-05,
      "loss": 0.958,
      "step": 54060
    },
    {
      "epoch": 0.18950186978449288,
      "grad_norm": 3.015625,
      "learning_rate": 3.1582728770195914e-05,
      "loss": 1.0071,
      "step": 54070
    },
    {
      "epoch": 0.18953691729138847,
      "grad_norm": 2.765625,
      "learning_rate": 3.158856995992944e-05,
      "loss": 1.0188,
      "step": 54080
    },
    {
      "epoch": 0.1895719647982841,
      "grad_norm": 3.390625,
      "learning_rate": 3.159441114966297e-05,
      "loss": 1.0358,
      "step": 54090
    },
    {
      "epoch": 0.18960701230517968,
      "grad_norm": 3.015625,
      "learning_rate": 3.160025233939649e-05,
      "loss": 0.9957,
      "step": 54100
    },
    {
      "epoch": 0.18964205981207527,
      "grad_norm": 3.171875,
      "learning_rate": 3.160609352913001e-05,
      "loss": 0.9695,
      "step": 54110
    },
    {
      "epoch": 0.18967710731897086,
      "grad_norm": 3.5,
      "learning_rate": 3.161193471886354e-05,
      "loss": 1.0737,
      "step": 54120
    },
    {
      "epoch": 0.18971215482586645,
      "grad_norm": 3.125,
      "learning_rate": 3.161777590859706e-05,
      "loss": 0.8964,
      "step": 54130
    },
    {
      "epoch": 0.18974720233276207,
      "grad_norm": 3.578125,
      "learning_rate": 3.162361709833059e-05,
      "loss": 0.8815,
      "step": 54140
    },
    {
      "epoch": 0.18978224983965766,
      "grad_norm": 3.34375,
      "learning_rate": 3.162945828806411e-05,
      "loss": 0.9655,
      "step": 54150
    },
    {
      "epoch": 0.18981729734655325,
      "grad_norm": 2.84375,
      "learning_rate": 3.163529947779764e-05,
      "loss": 0.9783,
      "step": 54160
    },
    {
      "epoch": 0.18985234485344885,
      "grad_norm": 3.09375,
      "learning_rate": 3.164114066753117e-05,
      "loss": 1.0078,
      "step": 54170
    },
    {
      "epoch": 0.18988739236034444,
      "grad_norm": 3.09375,
      "learning_rate": 3.164698185726469e-05,
      "loss": 0.9077,
      "step": 54180
    },
    {
      "epoch": 0.18992243986724006,
      "grad_norm": 2.984375,
      "learning_rate": 3.165282304699821e-05,
      "loss": 0.9618,
      "step": 54190
    },
    {
      "epoch": 0.18995748737413565,
      "grad_norm": 2.96875,
      "learning_rate": 3.165866423673174e-05,
      "loss": 0.9858,
      "step": 54200
    },
    {
      "epoch": 0.18999253488103124,
      "grad_norm": 3.578125,
      "learning_rate": 3.166450542646526e-05,
      "loss": 0.9713,
      "step": 54210
    },
    {
      "epoch": 0.19002758238792683,
      "grad_norm": 2.96875,
      "learning_rate": 3.167034661619879e-05,
      "loss": 0.967,
      "step": 54220
    },
    {
      "epoch": 0.19006262989482242,
      "grad_norm": 3.78125,
      "learning_rate": 3.167618780593231e-05,
      "loss": 1.073,
      "step": 54230
    },
    {
      "epoch": 0.19009767740171804,
      "grad_norm": 3.0625,
      "learning_rate": 3.168202899566584e-05,
      "loss": 1.0387,
      "step": 54240
    },
    {
      "epoch": 0.19013272490861363,
      "grad_norm": 2.953125,
      "learning_rate": 3.168787018539937e-05,
      "loss": 1.0111,
      "step": 54250
    },
    {
      "epoch": 0.19016777241550922,
      "grad_norm": 3.265625,
      "learning_rate": 3.169371137513289e-05,
      "loss": 1.0034,
      "step": 54260
    },
    {
      "epoch": 0.19020281992240481,
      "grad_norm": 3.28125,
      "learning_rate": 3.169955256486641e-05,
      "loss": 0.9619,
      "step": 54270
    },
    {
      "epoch": 0.1902378674293004,
      "grad_norm": 3.421875,
      "learning_rate": 3.170539375459994e-05,
      "loss": 1.0309,
      "step": 54280
    },
    {
      "epoch": 0.19027291493619602,
      "grad_norm": 3.515625,
      "learning_rate": 3.171123494433346e-05,
      "loss": 1.0179,
      "step": 54290
    },
    {
      "epoch": 0.19030796244309162,
      "grad_norm": 2.859375,
      "learning_rate": 3.171707613406699e-05,
      "loss": 1.0102,
      "step": 54300
    },
    {
      "epoch": 0.1903430099499872,
      "grad_norm": 2.8125,
      "learning_rate": 3.172291732380051e-05,
      "loss": 0.9751,
      "step": 54310
    },
    {
      "epoch": 0.1903780574568828,
      "grad_norm": 3.296875,
      "learning_rate": 3.172875851353404e-05,
      "loss": 0.9684,
      "step": 54320
    },
    {
      "epoch": 0.1904131049637784,
      "grad_norm": 4.28125,
      "learning_rate": 3.1734599703267566e-05,
      "loss": 0.9745,
      "step": 54330
    },
    {
      "epoch": 0.190448152470674,
      "grad_norm": 3.0625,
      "learning_rate": 3.174044089300109e-05,
      "loss": 1.0053,
      "step": 54340
    },
    {
      "epoch": 0.1904831999775696,
      "grad_norm": 3.328125,
      "learning_rate": 3.1746282082734616e-05,
      "loss": 0.9543,
      "step": 54350
    },
    {
      "epoch": 0.1905182474844652,
      "grad_norm": 3.015625,
      "learning_rate": 3.175212327246814e-05,
      "loss": 1.112,
      "step": 54360
    },
    {
      "epoch": 0.19055329499136078,
      "grad_norm": 3.1875,
      "learning_rate": 3.175796446220166e-05,
      "loss": 1.0119,
      "step": 54370
    },
    {
      "epoch": 0.19058834249825637,
      "grad_norm": 3.078125,
      "learning_rate": 3.176380565193519e-05,
      "loss": 0.9648,
      "step": 54380
    },
    {
      "epoch": 0.190623390005152,
      "grad_norm": 3.9375,
      "learning_rate": 3.176964684166871e-05,
      "loss": 1.0282,
      "step": 54390
    },
    {
      "epoch": 0.19065843751204758,
      "grad_norm": 3.515625,
      "learning_rate": 3.177548803140224e-05,
      "loss": 1.0397,
      "step": 54400
    },
    {
      "epoch": 0.19069348501894318,
      "grad_norm": 3.734375,
      "learning_rate": 3.1781329221135765e-05,
      "loss": 0.9972,
      "step": 54410
    },
    {
      "epoch": 0.19072853252583877,
      "grad_norm": 2.625,
      "learning_rate": 3.1787170410869287e-05,
      "loss": 0.9865,
      "step": 54420
    },
    {
      "epoch": 0.19076358003273436,
      "grad_norm": 3.390625,
      "learning_rate": 3.1793011600602815e-05,
      "loss": 1.0609,
      "step": 54430
    },
    {
      "epoch": 0.19079862753962998,
      "grad_norm": 3.234375,
      "learning_rate": 3.1798852790336336e-05,
      "loss": 0.9717,
      "step": 54440
    },
    {
      "epoch": 0.19083367504652557,
      "grad_norm": 3.640625,
      "learning_rate": 3.180469398006986e-05,
      "loss": 0.9917,
      "step": 54450
    },
    {
      "epoch": 0.19086872255342116,
      "grad_norm": 3.1875,
      "learning_rate": 3.1810535169803386e-05,
      "loss": 0.9249,
      "step": 54460
    },
    {
      "epoch": 0.19090377006031675,
      "grad_norm": 3.1875,
      "learning_rate": 3.1816376359536914e-05,
      "loss": 1.0523,
      "step": 54470
    },
    {
      "epoch": 0.19093881756721234,
      "grad_norm": 3.40625,
      "learning_rate": 3.1822217549270436e-05,
      "loss": 1.0304,
      "step": 54480
    },
    {
      "epoch": 0.19097386507410796,
      "grad_norm": 3.109375,
      "learning_rate": 3.1828058739003964e-05,
      "loss": 0.9399,
      "step": 54490
    },
    {
      "epoch": 0.19100891258100355,
      "grad_norm": 3.609375,
      "learning_rate": 3.1833899928737485e-05,
      "loss": 1.0686,
      "step": 54500
    },
    {
      "epoch": 0.19104396008789915,
      "grad_norm": 3.328125,
      "learning_rate": 3.1839741118471014e-05,
      "loss": 0.9209,
      "step": 54510
    },
    {
      "epoch": 0.19107900759479474,
      "grad_norm": 2.796875,
      "learning_rate": 3.1845582308204535e-05,
      "loss": 0.9757,
      "step": 54520
    },
    {
      "epoch": 0.19111405510169033,
      "grad_norm": 3.140625,
      "learning_rate": 3.185142349793806e-05,
      "loss": 0.9736,
      "step": 54530
    },
    {
      "epoch": 0.19114910260858595,
      "grad_norm": 2.765625,
      "learning_rate": 3.1857264687671585e-05,
      "loss": 0.9104,
      "step": 54540
    },
    {
      "epoch": 0.19118415011548154,
      "grad_norm": 3.6875,
      "learning_rate": 3.186310587740511e-05,
      "loss": 0.9721,
      "step": 54550
    },
    {
      "epoch": 0.19121919762237713,
      "grad_norm": 2.828125,
      "learning_rate": 3.186894706713864e-05,
      "loss": 1.0958,
      "step": 54560
    },
    {
      "epoch": 0.19125424512927272,
      "grad_norm": 3.265625,
      "learning_rate": 3.187478825687216e-05,
      "loss": 1.0037,
      "step": 54570
    },
    {
      "epoch": 0.19128929263616834,
      "grad_norm": 3.515625,
      "learning_rate": 3.1880629446605684e-05,
      "loss": 1.0064,
      "step": 54580
    },
    {
      "epoch": 0.19132434014306393,
      "grad_norm": 3.296875,
      "learning_rate": 3.188647063633921e-05,
      "loss": 1.1134,
      "step": 54590
    },
    {
      "epoch": 0.19135938764995952,
      "grad_norm": 3.203125,
      "learning_rate": 3.1892311826072734e-05,
      "loss": 0.9738,
      "step": 54600
    },
    {
      "epoch": 0.1913944351568551,
      "grad_norm": 3.5625,
      "learning_rate": 3.189815301580626e-05,
      "loss": 0.9995,
      "step": 54610
    },
    {
      "epoch": 0.1914294826637507,
      "grad_norm": 3.109375,
      "learning_rate": 3.1903994205539783e-05,
      "loss": 1.0249,
      "step": 54620
    },
    {
      "epoch": 0.19146453017064632,
      "grad_norm": 3.21875,
      "learning_rate": 3.190983539527331e-05,
      "loss": 0.931,
      "step": 54630
    },
    {
      "epoch": 0.19149957767754192,
      "grad_norm": 3.59375,
      "learning_rate": 3.191567658500684e-05,
      "loss": 1.047,
      "step": 54640
    },
    {
      "epoch": 0.1915346251844375,
      "grad_norm": 3.484375,
      "learning_rate": 3.192151777474036e-05,
      "loss": 1.0087,
      "step": 54650
    },
    {
      "epoch": 0.1915696726913331,
      "grad_norm": 2.953125,
      "learning_rate": 3.192735896447388e-05,
      "loss": 0.9574,
      "step": 54660
    },
    {
      "epoch": 0.1916047201982287,
      "grad_norm": 3.03125,
      "learning_rate": 3.193320015420741e-05,
      "loss": 0.9503,
      "step": 54670
    },
    {
      "epoch": 0.1916397677051243,
      "grad_norm": 3.390625,
      "learning_rate": 3.193904134394093e-05,
      "loss": 1.0385,
      "step": 54680
    },
    {
      "epoch": 0.1916748152120199,
      "grad_norm": 3.046875,
      "learning_rate": 3.194488253367446e-05,
      "loss": 1.0119,
      "step": 54690
    },
    {
      "epoch": 0.1917098627189155,
      "grad_norm": 3.515625,
      "learning_rate": 3.195072372340798e-05,
      "loss": 1.0093,
      "step": 54700
    },
    {
      "epoch": 0.19174491022581108,
      "grad_norm": 3.375,
      "learning_rate": 3.195656491314151e-05,
      "loss": 0.8925,
      "step": 54710
    },
    {
      "epoch": 0.19177995773270667,
      "grad_norm": 2.75,
      "learning_rate": 3.196240610287504e-05,
      "loss": 1.0218,
      "step": 54720
    },
    {
      "epoch": 0.1918150052396023,
      "grad_norm": 3.25,
      "learning_rate": 3.196824729260856e-05,
      "loss": 0.9886,
      "step": 54730
    },
    {
      "epoch": 0.19185005274649788,
      "grad_norm": 3.21875,
      "learning_rate": 3.197408848234208e-05,
      "loss": 0.9839,
      "step": 54740
    },
    {
      "epoch": 0.19188510025339348,
      "grad_norm": 3.234375,
      "learning_rate": 3.197992967207561e-05,
      "loss": 0.9772,
      "step": 54750
    },
    {
      "epoch": 0.19192014776028907,
      "grad_norm": 3.421875,
      "learning_rate": 3.198577086180913e-05,
      "loss": 1.0002,
      "step": 54760
    },
    {
      "epoch": 0.19195519526718466,
      "grad_norm": 2.890625,
      "learning_rate": 3.199161205154266e-05,
      "loss": 0.9598,
      "step": 54770
    },
    {
      "epoch": 0.19199024277408028,
      "grad_norm": 3.390625,
      "learning_rate": 3.199745324127619e-05,
      "loss": 1.0539,
      "step": 54780
    },
    {
      "epoch": 0.19202529028097587,
      "grad_norm": 3.453125,
      "learning_rate": 3.200329443100971e-05,
      "loss": 0.9708,
      "step": 54790
    },
    {
      "epoch": 0.19206033778787146,
      "grad_norm": 2.765625,
      "learning_rate": 3.200913562074324e-05,
      "loss": 0.9102,
      "step": 54800
    },
    {
      "epoch": 0.19209538529476705,
      "grad_norm": 2.9375,
      "learning_rate": 3.201497681047676e-05,
      "loss": 0.9676,
      "step": 54810
    },
    {
      "epoch": 0.19213043280166264,
      "grad_norm": 3.71875,
      "learning_rate": 3.202081800021029e-05,
      "loss": 0.9422,
      "step": 54820
    },
    {
      "epoch": 0.19216548030855826,
      "grad_norm": 3.0625,
      "learning_rate": 3.202665918994381e-05,
      "loss": 0.9668,
      "step": 54830
    },
    {
      "epoch": 0.19220052781545385,
      "grad_norm": 3.09375,
      "learning_rate": 3.203250037967733e-05,
      "loss": 0.9567,
      "step": 54840
    },
    {
      "epoch": 0.19223557532234944,
      "grad_norm": 3.21875,
      "learning_rate": 3.203834156941086e-05,
      "loss": 0.9512,
      "step": 54850
    },
    {
      "epoch": 0.19227062282924504,
      "grad_norm": 3.140625,
      "learning_rate": 3.2044182759144387e-05,
      "loss": 0.9236,
      "step": 54860
    },
    {
      "epoch": 0.19230567033614063,
      "grad_norm": 3.6875,
      "learning_rate": 3.205002394887791e-05,
      "loss": 1.0613,
      "step": 54870
    },
    {
      "epoch": 0.19234071784303625,
      "grad_norm": 2.9375,
      "learning_rate": 3.2055865138611436e-05,
      "loss": 1.044,
      "step": 54880
    },
    {
      "epoch": 0.19237576534993184,
      "grad_norm": 3.234375,
      "learning_rate": 3.206170632834496e-05,
      "loss": 1.0429,
      "step": 54890
    },
    {
      "epoch": 0.19241081285682743,
      "grad_norm": 3.53125,
      "learning_rate": 3.2067547518078486e-05,
      "loss": 1.0191,
      "step": 54900
    },
    {
      "epoch": 0.19244586036372302,
      "grad_norm": 3.234375,
      "learning_rate": 3.207338870781201e-05,
      "loss": 0.9896,
      "step": 54910
    },
    {
      "epoch": 0.1924809078706186,
      "grad_norm": 3.640625,
      "learning_rate": 3.207922989754553e-05,
      "loss": 0.9994,
      "step": 54920
    },
    {
      "epoch": 0.19251595537751423,
      "grad_norm": 2.6875,
      "learning_rate": 3.208507108727906e-05,
      "loss": 1.0209,
      "step": 54930
    },
    {
      "epoch": 0.19255100288440982,
      "grad_norm": 3.234375,
      "learning_rate": 3.2090912277012585e-05,
      "loss": 1.0605,
      "step": 54940
    },
    {
      "epoch": 0.1925860503913054,
      "grad_norm": 3.5,
      "learning_rate": 3.209675346674611e-05,
      "loss": 1.0213,
      "step": 54950
    },
    {
      "epoch": 0.192621097898201,
      "grad_norm": 3.796875,
      "learning_rate": 3.2102594656479635e-05,
      "loss": 1.0038,
      "step": 54960
    },
    {
      "epoch": 0.1926561454050966,
      "grad_norm": 3.53125,
      "learning_rate": 3.2108435846213156e-05,
      "loss": 1.0299,
      "step": 54970
    },
    {
      "epoch": 0.19269119291199222,
      "grad_norm": 2.9375,
      "learning_rate": 3.2114277035946685e-05,
      "loss": 1.0587,
      "step": 54980
    },
    {
      "epoch": 0.1927262404188878,
      "grad_norm": 3.5625,
      "learning_rate": 3.2120118225680206e-05,
      "loss": 0.8965,
      "step": 54990
    },
    {
      "epoch": 0.1927612879257834,
      "grad_norm": 3.421875,
      "learning_rate": 3.2125959415413734e-05,
      "loss": 1.0479,
      "step": 55000
    },
    {
      "epoch": 0.1927612879257834,
      "eval_loss": 0.9363110065460205,
      "eval_runtime": 559.9662,
      "eval_samples_per_second": 679.391,
      "eval_steps_per_second": 56.616,
      "step": 55000
    },
    {
      "epoch": 0.192796335432679,
      "grad_norm": 3.109375,
      "learning_rate": 3.2131800605147256e-05,
      "loss": 1.039,
      "step": 55010
    },
    {
      "epoch": 0.19283138293957458,
      "grad_norm": 3.09375,
      "learning_rate": 3.2137641794880784e-05,
      "loss": 0.9297,
      "step": 55020
    },
    {
      "epoch": 0.1928664304464702,
      "grad_norm": 3.515625,
      "learning_rate": 3.214348298461431e-05,
      "loss": 1.0664,
      "step": 55030
    },
    {
      "epoch": 0.1929014779533658,
      "grad_norm": 3.25,
      "learning_rate": 3.2149324174347834e-05,
      "loss": 0.9894,
      "step": 55040
    },
    {
      "epoch": 0.19293652546026138,
      "grad_norm": 3.59375,
      "learning_rate": 3.2155165364081355e-05,
      "loss": 0.9293,
      "step": 55050
    },
    {
      "epoch": 0.19297157296715697,
      "grad_norm": 3.046875,
      "learning_rate": 3.2161006553814883e-05,
      "loss": 1.018,
      "step": 55060
    },
    {
      "epoch": 0.19300662047405256,
      "grad_norm": 3.171875,
      "learning_rate": 3.2166847743548405e-05,
      "loss": 1.0197,
      "step": 55070
    },
    {
      "epoch": 0.19304166798094818,
      "grad_norm": 2.90625,
      "learning_rate": 3.217268893328193e-05,
      "loss": 0.9957,
      "step": 55080
    },
    {
      "epoch": 0.19307671548784378,
      "grad_norm": 3.5625,
      "learning_rate": 3.2178530123015455e-05,
      "loss": 0.9691,
      "step": 55090
    },
    {
      "epoch": 0.19311176299473937,
      "grad_norm": 3.203125,
      "learning_rate": 3.218437131274898e-05,
      "loss": 0.9825,
      "step": 55100
    },
    {
      "epoch": 0.19314681050163496,
      "grad_norm": 3.171875,
      "learning_rate": 3.219021250248251e-05,
      "loss": 0.8861,
      "step": 55110
    },
    {
      "epoch": 0.19318185800853058,
      "grad_norm": 3.328125,
      "learning_rate": 3.219605369221603e-05,
      "loss": 0.9266,
      "step": 55120
    },
    {
      "epoch": 0.19321690551542617,
      "grad_norm": 3.75,
      "learning_rate": 3.2201894881949554e-05,
      "loss": 0.9654,
      "step": 55130
    },
    {
      "epoch": 0.19325195302232176,
      "grad_norm": 3.09375,
      "learning_rate": 3.220773607168308e-05,
      "loss": 0.9769,
      "step": 55140
    },
    {
      "epoch": 0.19328700052921735,
      "grad_norm": 3.703125,
      "learning_rate": 3.2213577261416604e-05,
      "loss": 1.0184,
      "step": 55150
    },
    {
      "epoch": 0.19332204803611294,
      "grad_norm": 3.21875,
      "learning_rate": 3.221941845115013e-05,
      "loss": 1.0147,
      "step": 55160
    },
    {
      "epoch": 0.19335709554300856,
      "grad_norm": 3.03125,
      "learning_rate": 3.222525964088366e-05,
      "loss": 0.9138,
      "step": 55170
    },
    {
      "epoch": 0.19339214304990415,
      "grad_norm": 3.140625,
      "learning_rate": 3.223110083061718e-05,
      "loss": 1.008,
      "step": 55180
    },
    {
      "epoch": 0.19342719055679974,
      "grad_norm": 3.1875,
      "learning_rate": 3.223694202035071e-05,
      "loss": 1.1035,
      "step": 55190
    },
    {
      "epoch": 0.19346223806369534,
      "grad_norm": 3.234375,
      "learning_rate": 3.224278321008423e-05,
      "loss": 0.9779,
      "step": 55200
    },
    {
      "epoch": 0.19349728557059093,
      "grad_norm": 3.28125,
      "learning_rate": 3.224862439981776e-05,
      "loss": 1.0551,
      "step": 55210
    },
    {
      "epoch": 0.19353233307748655,
      "grad_norm": 3.21875,
      "learning_rate": 3.225446558955128e-05,
      "loss": 0.967,
      "step": 55220
    },
    {
      "epoch": 0.19356738058438214,
      "grad_norm": 3.125,
      "learning_rate": 3.22603067792848e-05,
      "loss": 0.8868,
      "step": 55230
    },
    {
      "epoch": 0.19360242809127773,
      "grad_norm": 3.484375,
      "learning_rate": 3.226614796901833e-05,
      "loss": 0.9733,
      "step": 55240
    },
    {
      "epoch": 0.19363747559817332,
      "grad_norm": 3.484375,
      "learning_rate": 3.227198915875186e-05,
      "loss": 0.98,
      "step": 55250
    },
    {
      "epoch": 0.1936725231050689,
      "grad_norm": 3.171875,
      "learning_rate": 3.227783034848538e-05,
      "loss": 1.0339,
      "step": 55260
    },
    {
      "epoch": 0.19370757061196453,
      "grad_norm": 3.390625,
      "learning_rate": 3.228367153821891e-05,
      "loss": 0.9146,
      "step": 55270
    },
    {
      "epoch": 0.19374261811886012,
      "grad_norm": 3.21875,
      "learning_rate": 3.228951272795243e-05,
      "loss": 0.9223,
      "step": 55280
    },
    {
      "epoch": 0.1937776656257557,
      "grad_norm": 3.484375,
      "learning_rate": 3.229535391768596e-05,
      "loss": 0.9555,
      "step": 55290
    },
    {
      "epoch": 0.1938127131326513,
      "grad_norm": 3.140625,
      "learning_rate": 3.230119510741948e-05,
      "loss": 0.9831,
      "step": 55300
    },
    {
      "epoch": 0.1938477606395469,
      "grad_norm": 3.125,
      "learning_rate": 3.2307036297153e-05,
      "loss": 1.0199,
      "step": 55310
    },
    {
      "epoch": 0.19388280814644251,
      "grad_norm": 3.265625,
      "learning_rate": 3.231287748688653e-05,
      "loss": 1.0674,
      "step": 55320
    },
    {
      "epoch": 0.1939178556533381,
      "grad_norm": 3.078125,
      "learning_rate": 3.231871867662006e-05,
      "loss": 1.067,
      "step": 55330
    },
    {
      "epoch": 0.1939529031602337,
      "grad_norm": 3.203125,
      "learning_rate": 3.232455986635358e-05,
      "loss": 0.9918,
      "step": 55340
    },
    {
      "epoch": 0.1939879506671293,
      "grad_norm": 3.46875,
      "learning_rate": 3.233040105608711e-05,
      "loss": 1.03,
      "step": 55350
    },
    {
      "epoch": 0.19402299817402488,
      "grad_norm": 2.953125,
      "learning_rate": 3.233624224582063e-05,
      "loss": 1.0363,
      "step": 55360
    },
    {
      "epoch": 0.1940580456809205,
      "grad_norm": 3.1875,
      "learning_rate": 3.234208343555416e-05,
      "loss": 1.0158,
      "step": 55370
    },
    {
      "epoch": 0.1940930931878161,
      "grad_norm": 3.375,
      "learning_rate": 3.234792462528768e-05,
      "loss": 1.0035,
      "step": 55380
    },
    {
      "epoch": 0.19412814069471168,
      "grad_norm": 3.28125,
      "learning_rate": 3.23537658150212e-05,
      "loss": 0.9332,
      "step": 55390
    },
    {
      "epoch": 0.19416318820160727,
      "grad_norm": 3.421875,
      "learning_rate": 3.235960700475473e-05,
      "loss": 0.9242,
      "step": 55400
    },
    {
      "epoch": 0.19419823570850286,
      "grad_norm": 3.015625,
      "learning_rate": 3.2365448194488256e-05,
      "loss": 0.9309,
      "step": 55410
    },
    {
      "epoch": 0.19423328321539848,
      "grad_norm": 3.1875,
      "learning_rate": 3.2371289384221785e-05,
      "loss": 1.0813,
      "step": 55420
    },
    {
      "epoch": 0.19426833072229407,
      "grad_norm": 3.65625,
      "learning_rate": 3.2377130573955306e-05,
      "loss": 0.9486,
      "step": 55430
    },
    {
      "epoch": 0.19430337822918967,
      "grad_norm": 3.25,
      "learning_rate": 3.238297176368883e-05,
      "loss": 0.9783,
      "step": 55440
    },
    {
      "epoch": 0.19433842573608526,
      "grad_norm": 2.96875,
      "learning_rate": 3.2388812953422356e-05,
      "loss": 1.0126,
      "step": 55450
    },
    {
      "epoch": 0.19437347324298085,
      "grad_norm": 3.296875,
      "learning_rate": 3.239465414315588e-05,
      "loss": 0.8659,
      "step": 55460
    },
    {
      "epoch": 0.19440852074987647,
      "grad_norm": 3.484375,
      "learning_rate": 3.2400495332889405e-05,
      "loss": 1.0958,
      "step": 55470
    },
    {
      "epoch": 0.19444356825677206,
      "grad_norm": 3.625,
      "learning_rate": 3.2406336522622934e-05,
      "loss": 1.0192,
      "step": 55480
    },
    {
      "epoch": 0.19447861576366765,
      "grad_norm": 3.015625,
      "learning_rate": 3.2412177712356455e-05,
      "loss": 1.0322,
      "step": 55490
    },
    {
      "epoch": 0.19451366327056324,
      "grad_norm": 2.84375,
      "learning_rate": 3.241801890208998e-05,
      "loss": 0.9706,
      "step": 55500
    },
    {
      "epoch": 0.19454871077745883,
      "grad_norm": 3.765625,
      "learning_rate": 3.2423860091823505e-05,
      "loss": 0.9642,
      "step": 55510
    },
    {
      "epoch": 0.19458375828435445,
      "grad_norm": 3.0625,
      "learning_rate": 3.2429701281557026e-05,
      "loss": 1.0106,
      "step": 55520
    },
    {
      "epoch": 0.19461880579125004,
      "grad_norm": 3.109375,
      "learning_rate": 3.2435542471290554e-05,
      "loss": 1.0331,
      "step": 55530
    },
    {
      "epoch": 0.19465385329814563,
      "grad_norm": 3.46875,
      "learning_rate": 3.2441383661024076e-05,
      "loss": 0.9769,
      "step": 55540
    },
    {
      "epoch": 0.19468890080504123,
      "grad_norm": 5.03125,
      "learning_rate": 3.2447224850757604e-05,
      "loss": 1.0564,
      "step": 55550
    },
    {
      "epoch": 0.19472394831193682,
      "grad_norm": 3.78125,
      "learning_rate": 3.245306604049113e-05,
      "loss": 0.9748,
      "step": 55560
    },
    {
      "epoch": 0.19475899581883244,
      "grad_norm": 3.34375,
      "learning_rate": 3.2458907230224654e-05,
      "loss": 0.9966,
      "step": 55570
    },
    {
      "epoch": 0.19479404332572803,
      "grad_norm": 3.46875,
      "learning_rate": 3.246474841995818e-05,
      "loss": 1.0307,
      "step": 55580
    },
    {
      "epoch": 0.19482909083262362,
      "grad_norm": 3.4375,
      "learning_rate": 3.2470589609691704e-05,
      "loss": 1.0793,
      "step": 55590
    },
    {
      "epoch": 0.1948641383395192,
      "grad_norm": 2.984375,
      "learning_rate": 3.2476430799425225e-05,
      "loss": 0.9836,
      "step": 55600
    },
    {
      "epoch": 0.1948991858464148,
      "grad_norm": 3.390625,
      "learning_rate": 3.248227198915875e-05,
      "loss": 0.9064,
      "step": 55610
    },
    {
      "epoch": 0.19493423335331042,
      "grad_norm": 3.171875,
      "learning_rate": 3.2488113178892275e-05,
      "loss": 0.9844,
      "step": 55620
    },
    {
      "epoch": 0.194969280860206,
      "grad_norm": 3.359375,
      "learning_rate": 3.24939543686258e-05,
      "loss": 0.9877,
      "step": 55630
    },
    {
      "epoch": 0.1950043283671016,
      "grad_norm": 2.9375,
      "learning_rate": 3.249979555835933e-05,
      "loss": 0.9118,
      "step": 55640
    },
    {
      "epoch": 0.1950393758739972,
      "grad_norm": 3.359375,
      "learning_rate": 3.250563674809285e-05,
      "loss": 0.9416,
      "step": 55650
    },
    {
      "epoch": 0.1950744233808928,
      "grad_norm": 3.25,
      "learning_rate": 3.251147793782638e-05,
      "loss": 1.0527,
      "step": 55660
    },
    {
      "epoch": 0.1951094708877884,
      "grad_norm": 3.328125,
      "learning_rate": 3.25173191275599e-05,
      "loss": 1.0015,
      "step": 55670
    },
    {
      "epoch": 0.195144518394684,
      "grad_norm": 3.375,
      "learning_rate": 3.252316031729343e-05,
      "loss": 1.0088,
      "step": 55680
    },
    {
      "epoch": 0.1951795659015796,
      "grad_norm": 3.421875,
      "learning_rate": 3.252900150702695e-05,
      "loss": 1.0704,
      "step": 55690
    },
    {
      "epoch": 0.19521461340847518,
      "grad_norm": 3.359375,
      "learning_rate": 3.2534842696760473e-05,
      "loss": 1.0369,
      "step": 55700
    },
    {
      "epoch": 0.1952496609153708,
      "grad_norm": 3.09375,
      "learning_rate": 3.2540683886494e-05,
      "loss": 0.9356,
      "step": 55710
    },
    {
      "epoch": 0.1952847084222664,
      "grad_norm": 3.53125,
      "learning_rate": 3.254652507622753e-05,
      "loss": 0.9341,
      "step": 55720
    },
    {
      "epoch": 0.19531975592916198,
      "grad_norm": 2.859375,
      "learning_rate": 3.255236626596105e-05,
      "loss": 1.0292,
      "step": 55730
    },
    {
      "epoch": 0.19535480343605757,
      "grad_norm": 3.140625,
      "learning_rate": 3.255820745569458e-05,
      "loss": 0.9711,
      "step": 55740
    },
    {
      "epoch": 0.19538985094295316,
      "grad_norm": 3.0625,
      "learning_rate": 3.25640486454281e-05,
      "loss": 1.018,
      "step": 55750
    },
    {
      "epoch": 0.19542489844984878,
      "grad_norm": 3.796875,
      "learning_rate": 3.256988983516163e-05,
      "loss": 1.0239,
      "step": 55760
    },
    {
      "epoch": 0.19545994595674437,
      "grad_norm": 3.390625,
      "learning_rate": 3.257573102489515e-05,
      "loss": 1.0067,
      "step": 55770
    },
    {
      "epoch": 0.19549499346363997,
      "grad_norm": 3.203125,
      "learning_rate": 3.258157221462867e-05,
      "loss": 0.9632,
      "step": 55780
    },
    {
      "epoch": 0.19553004097053556,
      "grad_norm": 3.078125,
      "learning_rate": 3.258741340436221e-05,
      "loss": 0.9856,
      "step": 55790
    },
    {
      "epoch": 0.19556508847743115,
      "grad_norm": 3.265625,
      "learning_rate": 3.259325459409573e-05,
      "loss": 0.9531,
      "step": 55800
    },
    {
      "epoch": 0.19560013598432677,
      "grad_norm": 3.890625,
      "learning_rate": 3.259909578382925e-05,
      "loss": 1.0204,
      "step": 55810
    },
    {
      "epoch": 0.19563518349122236,
      "grad_norm": 2.9375,
      "learning_rate": 3.260493697356278e-05,
      "loss": 0.8832,
      "step": 55820
    },
    {
      "epoch": 0.19567023099811795,
      "grad_norm": 3.53125,
      "learning_rate": 3.26107781632963e-05,
      "loss": 1.0756,
      "step": 55830
    },
    {
      "epoch": 0.19570527850501354,
      "grad_norm": 2.703125,
      "learning_rate": 3.261661935302983e-05,
      "loss": 0.9925,
      "step": 55840
    },
    {
      "epoch": 0.19574032601190913,
      "grad_norm": 2.96875,
      "learning_rate": 3.262246054276335e-05,
      "loss": 0.9774,
      "step": 55850
    },
    {
      "epoch": 0.19577537351880475,
      "grad_norm": 2.9375,
      "learning_rate": 3.262830173249687e-05,
      "loss": 0.9397,
      "step": 55860
    },
    {
      "epoch": 0.19581042102570034,
      "grad_norm": 3.0625,
      "learning_rate": 3.2634142922230406e-05,
      "loss": 0.9727,
      "step": 55870
    },
    {
      "epoch": 0.19584546853259593,
      "grad_norm": 3.484375,
      "learning_rate": 3.263998411196393e-05,
      "loss": 1.0504,
      "step": 55880
    },
    {
      "epoch": 0.19588051603949153,
      "grad_norm": 3.140625,
      "learning_rate": 3.2645825301697456e-05,
      "loss": 1.0009,
      "step": 55890
    },
    {
      "epoch": 0.19591556354638712,
      "grad_norm": 3.0,
      "learning_rate": 3.265166649143098e-05,
      "loss": 0.9809,
      "step": 55900
    },
    {
      "epoch": 0.19595061105328274,
      "grad_norm": 2.984375,
      "learning_rate": 3.26575076811645e-05,
      "loss": 0.9776,
      "step": 55910
    },
    {
      "epoch": 0.19598565856017833,
      "grad_norm": 3.28125,
      "learning_rate": 3.266334887089803e-05,
      "loss": 0.9428,
      "step": 55920
    },
    {
      "epoch": 0.19602070606707392,
      "grad_norm": 3.203125,
      "learning_rate": 3.266919006063155e-05,
      "loss": 0.9729,
      "step": 55930
    },
    {
      "epoch": 0.1960557535739695,
      "grad_norm": 3.140625,
      "learning_rate": 3.2675031250365076e-05,
      "loss": 0.9235,
      "step": 55940
    },
    {
      "epoch": 0.1960908010808651,
      "grad_norm": 3.40625,
      "learning_rate": 3.2680872440098605e-05,
      "loss": 0.9655,
      "step": 55950
    },
    {
      "epoch": 0.19612584858776072,
      "grad_norm": 2.90625,
      "learning_rate": 3.2686713629832126e-05,
      "loss": 0.9442,
      "step": 55960
    },
    {
      "epoch": 0.1961608960946563,
      "grad_norm": 3.125,
      "learning_rate": 3.2692554819565654e-05,
      "loss": 0.9673,
      "step": 55970
    },
    {
      "epoch": 0.1961959436015519,
      "grad_norm": 3.296875,
      "learning_rate": 3.2698396009299176e-05,
      "loss": 0.9468,
      "step": 55980
    },
    {
      "epoch": 0.1962309911084475,
      "grad_norm": 3.359375,
      "learning_rate": 3.27042371990327e-05,
      "loss": 1.0645,
      "step": 55990
    },
    {
      "epoch": 0.19626603861534309,
      "grad_norm": 3.46875,
      "learning_rate": 3.2710078388766226e-05,
      "loss": 0.9921,
      "step": 56000
    },
    {
      "epoch": 0.1963010861222387,
      "grad_norm": 3.546875,
      "learning_rate": 3.271591957849975e-05,
      "loss": 1.0125,
      "step": 56010
    },
    {
      "epoch": 0.1963361336291343,
      "grad_norm": 3.71875,
      "learning_rate": 3.2721760768233275e-05,
      "loss": 0.9653,
      "step": 56020
    },
    {
      "epoch": 0.1963711811360299,
      "grad_norm": 3.28125,
      "learning_rate": 3.2727601957966803e-05,
      "loss": 1.0233,
      "step": 56030
    },
    {
      "epoch": 0.19640622864292548,
      "grad_norm": 3.375,
      "learning_rate": 3.2733443147700325e-05,
      "loss": 1.0253,
      "step": 56040
    },
    {
      "epoch": 0.19644127614982107,
      "grad_norm": 3.171875,
      "learning_rate": 3.273928433743385e-05,
      "loss": 0.9315,
      "step": 56050
    },
    {
      "epoch": 0.1964763236567167,
      "grad_norm": 3.15625,
      "learning_rate": 3.2745125527167375e-05,
      "loss": 0.9674,
      "step": 56060
    },
    {
      "epoch": 0.19651137116361228,
      "grad_norm": 3.875,
      "learning_rate": 3.2750966716900896e-05,
      "loss": 0.9941,
      "step": 56070
    },
    {
      "epoch": 0.19654641867050787,
      "grad_norm": 4.0,
      "learning_rate": 3.2756807906634424e-05,
      "loss": 0.9514,
      "step": 56080
    },
    {
      "epoch": 0.19658146617740346,
      "grad_norm": 3.4375,
      "learning_rate": 3.2762649096367946e-05,
      "loss": 1.0118,
      "step": 56090
    },
    {
      "epoch": 0.19661651368429905,
      "grad_norm": 2.859375,
      "learning_rate": 3.2768490286101474e-05,
      "loss": 1.0287,
      "step": 56100
    },
    {
      "epoch": 0.19665156119119467,
      "grad_norm": 3.296875,
      "learning_rate": 3.2774331475835e-05,
      "loss": 0.9861,
      "step": 56110
    },
    {
      "epoch": 0.19668660869809027,
      "grad_norm": 3.25,
      "learning_rate": 3.2780172665568524e-05,
      "loss": 1.0413,
      "step": 56120
    },
    {
      "epoch": 0.19672165620498586,
      "grad_norm": 3.265625,
      "learning_rate": 3.278601385530205e-05,
      "loss": 0.9905,
      "step": 56130
    },
    {
      "epoch": 0.19675670371188145,
      "grad_norm": 3.25,
      "learning_rate": 3.279185504503557e-05,
      "loss": 1.0498,
      "step": 56140
    },
    {
      "epoch": 0.19679175121877704,
      "grad_norm": 3.671875,
      "learning_rate": 3.27976962347691e-05,
      "loss": 0.9711,
      "step": 56150
    },
    {
      "epoch": 0.19682679872567266,
      "grad_norm": 3.140625,
      "learning_rate": 3.280353742450262e-05,
      "loss": 0.9749,
      "step": 56160
    },
    {
      "epoch": 0.19686184623256825,
      "grad_norm": 3.0,
      "learning_rate": 3.2809378614236145e-05,
      "loss": 0.9495,
      "step": 56170
    },
    {
      "epoch": 0.19689689373946384,
      "grad_norm": 3.0,
      "learning_rate": 3.281521980396968e-05,
      "loss": 1.0109,
      "step": 56180
    },
    {
      "epoch": 0.19693194124635943,
      "grad_norm": 3.140625,
      "learning_rate": 3.28210609937032e-05,
      "loss": 1.0332,
      "step": 56190
    },
    {
      "epoch": 0.19696698875325502,
      "grad_norm": 3.34375,
      "learning_rate": 3.282690218343672e-05,
      "loss": 1.1287,
      "step": 56200
    },
    {
      "epoch": 0.19700203626015064,
      "grad_norm": 3.296875,
      "learning_rate": 3.283274337317025e-05,
      "loss": 1.002,
      "step": 56210
    },
    {
      "epoch": 0.19703708376704623,
      "grad_norm": 2.890625,
      "learning_rate": 3.283858456290377e-05,
      "loss": 1.0256,
      "step": 56220
    },
    {
      "epoch": 0.19707213127394183,
      "grad_norm": 3.28125,
      "learning_rate": 3.28444257526373e-05,
      "loss": 1.0285,
      "step": 56230
    },
    {
      "epoch": 0.19710717878083742,
      "grad_norm": 3.15625,
      "learning_rate": 3.285026694237082e-05,
      "loss": 1.0118,
      "step": 56240
    },
    {
      "epoch": 0.19714222628773304,
      "grad_norm": 3.71875,
      "learning_rate": 3.285610813210434e-05,
      "loss": 0.9297,
      "step": 56250
    },
    {
      "epoch": 0.19717727379462863,
      "grad_norm": 3.125,
      "learning_rate": 3.286194932183788e-05,
      "loss": 0.9871,
      "step": 56260
    },
    {
      "epoch": 0.19721232130152422,
      "grad_norm": 3.203125,
      "learning_rate": 3.28677905115714e-05,
      "loss": 0.9165,
      "step": 56270
    },
    {
      "epoch": 0.1972473688084198,
      "grad_norm": 3.328125,
      "learning_rate": 3.287363170130492e-05,
      "loss": 1.0183,
      "step": 56280
    },
    {
      "epoch": 0.1972824163153154,
      "grad_norm": 2.90625,
      "learning_rate": 3.287947289103845e-05,
      "loss": 0.9595,
      "step": 56290
    },
    {
      "epoch": 0.19731746382221102,
      "grad_norm": 3.453125,
      "learning_rate": 3.288531408077197e-05,
      "loss": 0.9897,
      "step": 56300
    },
    {
      "epoch": 0.1973525113291066,
      "grad_norm": 3.25,
      "learning_rate": 3.28911552705055e-05,
      "loss": 0.9948,
      "step": 56310
    },
    {
      "epoch": 0.1973875588360022,
      "grad_norm": 3.390625,
      "learning_rate": 3.289699646023902e-05,
      "loss": 1.0366,
      "step": 56320
    },
    {
      "epoch": 0.1974226063428978,
      "grad_norm": 3.09375,
      "learning_rate": 3.290283764997255e-05,
      "loss": 1.0,
      "step": 56330
    },
    {
      "epoch": 0.19745765384979339,
      "grad_norm": 3.0,
      "learning_rate": 3.290867883970608e-05,
      "loss": 0.9765,
      "step": 56340
    },
    {
      "epoch": 0.197492701356689,
      "grad_norm": 3.09375,
      "learning_rate": 3.29145200294396e-05,
      "loss": 0.9855,
      "step": 56350
    },
    {
      "epoch": 0.1975277488635846,
      "grad_norm": 3.203125,
      "learning_rate": 3.292036121917313e-05,
      "loss": 0.9552,
      "step": 56360
    },
    {
      "epoch": 0.1975627963704802,
      "grad_norm": 3.078125,
      "learning_rate": 3.292620240890665e-05,
      "loss": 0.9665,
      "step": 56370
    },
    {
      "epoch": 0.19759784387737578,
      "grad_norm": 3.140625,
      "learning_rate": 3.293204359864017e-05,
      "loss": 1.0504,
      "step": 56380
    },
    {
      "epoch": 0.19763289138427137,
      "grad_norm": 3.328125,
      "learning_rate": 3.29378847883737e-05,
      "loss": 0.9984,
      "step": 56390
    },
    {
      "epoch": 0.197667938891167,
      "grad_norm": 3.328125,
      "learning_rate": 3.294372597810722e-05,
      "loss": 1.0031,
      "step": 56400
    },
    {
      "epoch": 0.19770298639806258,
      "grad_norm": 3.046875,
      "learning_rate": 3.294956716784075e-05,
      "loss": 0.9692,
      "step": 56410
    },
    {
      "epoch": 0.19773803390495817,
      "grad_norm": 3.09375,
      "learning_rate": 3.2955408357574276e-05,
      "loss": 0.9277,
      "step": 56420
    },
    {
      "epoch": 0.19777308141185376,
      "grad_norm": 3.390625,
      "learning_rate": 3.29612495473078e-05,
      "loss": 0.955,
      "step": 56430
    },
    {
      "epoch": 0.19780812891874935,
      "grad_norm": 3.171875,
      "learning_rate": 3.2967090737041325e-05,
      "loss": 0.936,
      "step": 56440
    },
    {
      "epoch": 0.19784317642564497,
      "grad_norm": 3.078125,
      "learning_rate": 3.297293192677485e-05,
      "loss": 0.9959,
      "step": 56450
    },
    {
      "epoch": 0.19787822393254056,
      "grad_norm": 3.125,
      "learning_rate": 3.297877311650837e-05,
      "loss": 0.9553,
      "step": 56460
    },
    {
      "epoch": 0.19791327143943616,
      "grad_norm": 3.796875,
      "learning_rate": 3.29846143062419e-05,
      "loss": 0.9623,
      "step": 56470
    },
    {
      "epoch": 0.19794831894633175,
      "grad_norm": 2.96875,
      "learning_rate": 3.299045549597542e-05,
      "loss": 1.009,
      "step": 56480
    },
    {
      "epoch": 0.19798336645322734,
      "grad_norm": 3.625,
      "learning_rate": 3.2996296685708946e-05,
      "loss": 1.0354,
      "step": 56490
    },
    {
      "epoch": 0.19801841396012296,
      "grad_norm": 3.296875,
      "learning_rate": 3.3002137875442475e-05,
      "loss": 0.9241,
      "step": 56500
    },
    {
      "epoch": 0.19805346146701855,
      "grad_norm": 2.9375,
      "learning_rate": 3.3007979065175996e-05,
      "loss": 0.8719,
      "step": 56510
    },
    {
      "epoch": 0.19808850897391414,
      "grad_norm": 2.875,
      "learning_rate": 3.3013820254909524e-05,
      "loss": 0.9952,
      "step": 56520
    },
    {
      "epoch": 0.19812355648080973,
      "grad_norm": 3.140625,
      "learning_rate": 3.3019661444643046e-05,
      "loss": 1.0252,
      "step": 56530
    },
    {
      "epoch": 0.19815860398770532,
      "grad_norm": 3.25,
      "learning_rate": 3.3025502634376574e-05,
      "loss": 0.9422,
      "step": 56540
    },
    {
      "epoch": 0.19819365149460094,
      "grad_norm": 2.921875,
      "learning_rate": 3.3031343824110095e-05,
      "loss": 0.9903,
      "step": 56550
    },
    {
      "epoch": 0.19822869900149653,
      "grad_norm": 3.421875,
      "learning_rate": 3.303718501384362e-05,
      "loss": 1.0974,
      "step": 56560
    },
    {
      "epoch": 0.19826374650839212,
      "grad_norm": 3.4375,
      "learning_rate": 3.304302620357715e-05,
      "loss": 1.0894,
      "step": 56570
    },
    {
      "epoch": 0.19829879401528772,
      "grad_norm": 3.296875,
      "learning_rate": 3.304886739331067e-05,
      "loss": 0.9784,
      "step": 56580
    },
    {
      "epoch": 0.1983338415221833,
      "grad_norm": 3.4375,
      "learning_rate": 3.3054708583044195e-05,
      "loss": 0.9778,
      "step": 56590
    },
    {
      "epoch": 0.19836888902907893,
      "grad_norm": 3.203125,
      "learning_rate": 3.306054977277772e-05,
      "loss": 0.9983,
      "step": 56600
    },
    {
      "epoch": 0.19840393653597452,
      "grad_norm": 3.359375,
      "learning_rate": 3.3066390962511244e-05,
      "loss": 0.982,
      "step": 56610
    },
    {
      "epoch": 0.1984389840428701,
      "grad_norm": 3.0625,
      "learning_rate": 3.307223215224477e-05,
      "loss": 0.9496,
      "step": 56620
    },
    {
      "epoch": 0.1984740315497657,
      "grad_norm": 3.515625,
      "learning_rate": 3.3078073341978294e-05,
      "loss": 1.0719,
      "step": 56630
    },
    {
      "epoch": 0.1985090790566613,
      "grad_norm": 3.640625,
      "learning_rate": 3.3083914531711816e-05,
      "loss": 1.0248,
      "step": 56640
    },
    {
      "epoch": 0.1985441265635569,
      "grad_norm": 3.1875,
      "learning_rate": 3.308975572144535e-05,
      "loss": 0.9822,
      "step": 56650
    },
    {
      "epoch": 0.1985791740704525,
      "grad_norm": 3.25,
      "learning_rate": 3.309559691117887e-05,
      "loss": 1.0386,
      "step": 56660
    },
    {
      "epoch": 0.1986142215773481,
      "grad_norm": 3.359375,
      "learning_rate": 3.3101438100912394e-05,
      "loss": 1.0329,
      "step": 56670
    },
    {
      "epoch": 0.19864926908424368,
      "grad_norm": 3.359375,
      "learning_rate": 3.310727929064592e-05,
      "loss": 1.0202,
      "step": 56680
    },
    {
      "epoch": 0.19868431659113928,
      "grad_norm": 3.265625,
      "learning_rate": 3.311312048037944e-05,
      "loss": 0.9893,
      "step": 56690
    },
    {
      "epoch": 0.1987193640980349,
      "grad_norm": 2.984375,
      "learning_rate": 3.311896167011297e-05,
      "loss": 1.0067,
      "step": 56700
    },
    {
      "epoch": 0.1987544116049305,
      "grad_norm": 3.265625,
      "learning_rate": 3.312480285984649e-05,
      "loss": 0.9885,
      "step": 56710
    },
    {
      "epoch": 0.19878945911182608,
      "grad_norm": 3.359375,
      "learning_rate": 3.3130644049580014e-05,
      "loss": 0.9133,
      "step": 56720
    },
    {
      "epoch": 0.19882450661872167,
      "grad_norm": 3.03125,
      "learning_rate": 3.313648523931355e-05,
      "loss": 1.0045,
      "step": 56730
    },
    {
      "epoch": 0.19885955412561726,
      "grad_norm": 3.265625,
      "learning_rate": 3.314232642904707e-05,
      "loss": 0.9638,
      "step": 56740
    },
    {
      "epoch": 0.19889460163251288,
      "grad_norm": 2.671875,
      "learning_rate": 3.314816761878059e-05,
      "loss": 0.9236,
      "step": 56750
    },
    {
      "epoch": 0.19892964913940847,
      "grad_norm": 3.296875,
      "learning_rate": 3.315400880851412e-05,
      "loss": 0.9891,
      "step": 56760
    },
    {
      "epoch": 0.19896469664630406,
      "grad_norm": 3.296875,
      "learning_rate": 3.315984999824764e-05,
      "loss": 0.9487,
      "step": 56770
    },
    {
      "epoch": 0.19899974415319965,
      "grad_norm": 3.28125,
      "learning_rate": 3.316569118798117e-05,
      "loss": 1.0116,
      "step": 56780
    },
    {
      "epoch": 0.19903479166009527,
      "grad_norm": 3.1875,
      "learning_rate": 3.317153237771469e-05,
      "loss": 0.9833,
      "step": 56790
    },
    {
      "epoch": 0.19906983916699086,
      "grad_norm": 3.203125,
      "learning_rate": 3.317737356744822e-05,
      "loss": 1.0708,
      "step": 56800
    },
    {
      "epoch": 0.19910488667388646,
      "grad_norm": 3.515625,
      "learning_rate": 3.318321475718175e-05,
      "loss": 0.9838,
      "step": 56810
    },
    {
      "epoch": 0.19913993418078205,
      "grad_norm": 3.390625,
      "learning_rate": 3.318905594691527e-05,
      "loss": 1.0088,
      "step": 56820
    },
    {
      "epoch": 0.19917498168767764,
      "grad_norm": 3.34375,
      "learning_rate": 3.31948971366488e-05,
      "loss": 1.0683,
      "step": 56830
    },
    {
      "epoch": 0.19921002919457326,
      "grad_norm": 3.4375,
      "learning_rate": 3.320073832638232e-05,
      "loss": 1.023,
      "step": 56840
    },
    {
      "epoch": 0.19924507670146885,
      "grad_norm": 3.09375,
      "learning_rate": 3.320657951611584e-05,
      "loss": 1.0089,
      "step": 56850
    },
    {
      "epoch": 0.19928012420836444,
      "grad_norm": 3.328125,
      "learning_rate": 3.321242070584937e-05,
      "loss": 0.9786,
      "step": 56860
    },
    {
      "epoch": 0.19931517171526003,
      "grad_norm": 3.5,
      "learning_rate": 3.321826189558289e-05,
      "loss": 0.9799,
      "step": 56870
    },
    {
      "epoch": 0.19935021922215562,
      "grad_norm": 3.484375,
      "learning_rate": 3.322410308531642e-05,
      "loss": 1.0169,
      "step": 56880
    },
    {
      "epoch": 0.19938526672905124,
      "grad_norm": 3.25,
      "learning_rate": 3.322994427504995e-05,
      "loss": 1.022,
      "step": 56890
    },
    {
      "epoch": 0.19942031423594683,
      "grad_norm": 3.28125,
      "learning_rate": 3.323578546478347e-05,
      "loss": 0.9336,
      "step": 56900
    },
    {
      "epoch": 0.19945536174284242,
      "grad_norm": 3.15625,
      "learning_rate": 3.3241626654516997e-05,
      "loss": 1.0026,
      "step": 56910
    },
    {
      "epoch": 0.19949040924973802,
      "grad_norm": 3.796875,
      "learning_rate": 3.324746784425052e-05,
      "loss": 0.9939,
      "step": 56920
    },
    {
      "epoch": 0.1995254567566336,
      "grad_norm": 3.0,
      "learning_rate": 3.325330903398404e-05,
      "loss": 1.0092,
      "step": 56930
    },
    {
      "epoch": 0.19956050426352923,
      "grad_norm": 3.59375,
      "learning_rate": 3.325915022371757e-05,
      "loss": 1.0388,
      "step": 56940
    },
    {
      "epoch": 0.19959555177042482,
      "grad_norm": 3.5625,
      "learning_rate": 3.326499141345109e-05,
      "loss": 0.929,
      "step": 56950
    },
    {
      "epoch": 0.1996305992773204,
      "grad_norm": 3.046875,
      "learning_rate": 3.327083260318462e-05,
      "loss": 1.0566,
      "step": 56960
    },
    {
      "epoch": 0.199665646784216,
      "grad_norm": 3.125,
      "learning_rate": 3.3276673792918146e-05,
      "loss": 1.0098,
      "step": 56970
    },
    {
      "epoch": 0.1997006942911116,
      "grad_norm": 3.3125,
      "learning_rate": 3.328251498265167e-05,
      "loss": 0.9383,
      "step": 56980
    },
    {
      "epoch": 0.1997357417980072,
      "grad_norm": 3.390625,
      "learning_rate": 3.3288356172385195e-05,
      "loss": 0.9827,
      "step": 56990
    },
    {
      "epoch": 0.1997707893049028,
      "grad_norm": 3.046875,
      "learning_rate": 3.329419736211872e-05,
      "loss": 1.0255,
      "step": 57000
    },
    {
      "epoch": 0.1998058368117984,
      "grad_norm": 3.109375,
      "learning_rate": 3.3300038551852245e-05,
      "loss": 1.0106,
      "step": 57010
    },
    {
      "epoch": 0.19984088431869398,
      "grad_norm": 3.046875,
      "learning_rate": 3.3305879741585766e-05,
      "loss": 0.9913,
      "step": 57020
    },
    {
      "epoch": 0.19987593182558958,
      "grad_norm": 3.015625,
      "learning_rate": 3.331172093131929e-05,
      "loss": 1.0981,
      "step": 57030
    },
    {
      "epoch": 0.1999109793324852,
      "grad_norm": 3.9375,
      "learning_rate": 3.331756212105282e-05,
      "loss": 0.971,
      "step": 57040
    },
    {
      "epoch": 0.19994602683938079,
      "grad_norm": 3.234375,
      "learning_rate": 3.3323403310786344e-05,
      "loss": 0.9827,
      "step": 57050
    },
    {
      "epoch": 0.19998107434627638,
      "grad_norm": 3.296875,
      "learning_rate": 3.3329244500519866e-05,
      "loss": 1.0108,
      "step": 57060
    },
    {
      "epoch": 0.20001612185317197,
      "grad_norm": 3.734375,
      "learning_rate": 3.3335085690253394e-05,
      "loss": 1.0691,
      "step": 57070
    },
    {
      "epoch": 0.20005116936006756,
      "grad_norm": 3.09375,
      "learning_rate": 3.3340926879986916e-05,
      "loss": 0.9833,
      "step": 57080
    },
    {
      "epoch": 0.20008621686696318,
      "grad_norm": 3.5,
      "learning_rate": 3.3346768069720444e-05,
      "loss": 0.9557,
      "step": 57090
    },
    {
      "epoch": 0.20012126437385877,
      "grad_norm": 3.109375,
      "learning_rate": 3.3352609259453965e-05,
      "loss": 0.9945,
      "step": 57100
    },
    {
      "epoch": 0.20015631188075436,
      "grad_norm": 3.6875,
      "learning_rate": 3.335845044918749e-05,
      "loss": 0.9505,
      "step": 57110
    },
    {
      "epoch": 0.20019135938764995,
      "grad_norm": 3.359375,
      "learning_rate": 3.336429163892102e-05,
      "loss": 0.9466,
      "step": 57120
    },
    {
      "epoch": 0.20022640689454554,
      "grad_norm": 3.40625,
      "learning_rate": 3.337013282865454e-05,
      "loss": 1.0025,
      "step": 57130
    },
    {
      "epoch": 0.20026145440144116,
      "grad_norm": 3.546875,
      "learning_rate": 3.3375974018388065e-05,
      "loss": 0.9848,
      "step": 57140
    },
    {
      "epoch": 0.20029650190833675,
      "grad_norm": 3.3125,
      "learning_rate": 3.338181520812159e-05,
      "loss": 0.9601,
      "step": 57150
    },
    {
      "epoch": 0.20033154941523235,
      "grad_norm": 3.15625,
      "learning_rate": 3.3387656397855114e-05,
      "loss": 0.9399,
      "step": 57160
    },
    {
      "epoch": 0.20036659692212794,
      "grad_norm": 3.375,
      "learning_rate": 3.339349758758864e-05,
      "loss": 1.0455,
      "step": 57170
    },
    {
      "epoch": 0.20040164442902353,
      "grad_norm": 3.171875,
      "learning_rate": 3.3399338777322164e-05,
      "loss": 0.9332,
      "step": 57180
    },
    {
      "epoch": 0.20043669193591915,
      "grad_norm": 3.515625,
      "learning_rate": 3.340517996705569e-05,
      "loss": 0.9843,
      "step": 57190
    },
    {
      "epoch": 0.20047173944281474,
      "grad_norm": 3.28125,
      "learning_rate": 3.341102115678922e-05,
      "loss": 1.0154,
      "step": 57200
    },
    {
      "epoch": 0.20050678694971033,
      "grad_norm": 2.90625,
      "learning_rate": 3.341686234652274e-05,
      "loss": 1.0554,
      "step": 57210
    },
    {
      "epoch": 0.20054183445660592,
      "grad_norm": 2.859375,
      "learning_rate": 3.342270353625627e-05,
      "loss": 0.9396,
      "step": 57220
    },
    {
      "epoch": 0.2005768819635015,
      "grad_norm": 3.078125,
      "learning_rate": 3.342854472598979e-05,
      "loss": 0.9064,
      "step": 57230
    },
    {
      "epoch": 0.20061192947039713,
      "grad_norm": 3.0625,
      "learning_rate": 3.343438591572331e-05,
      "loss": 0.994,
      "step": 57240
    },
    {
      "epoch": 0.20064697697729272,
      "grad_norm": 3.171875,
      "learning_rate": 3.344022710545684e-05,
      "loss": 1.0368,
      "step": 57250
    },
    {
      "epoch": 0.20068202448418831,
      "grad_norm": 3.28125,
      "learning_rate": 3.344606829519036e-05,
      "loss": 0.991,
      "step": 57260
    },
    {
      "epoch": 0.2007170719910839,
      "grad_norm": 3.1875,
      "learning_rate": 3.345190948492389e-05,
      "loss": 0.9444,
      "step": 57270
    },
    {
      "epoch": 0.2007521194979795,
      "grad_norm": 3.34375,
      "learning_rate": 3.345775067465742e-05,
      "loss": 1.0625,
      "step": 57280
    },
    {
      "epoch": 0.20078716700487512,
      "grad_norm": 3.296875,
      "learning_rate": 3.346359186439094e-05,
      "loss": 0.9276,
      "step": 57290
    },
    {
      "epoch": 0.2008222145117707,
      "grad_norm": 3.484375,
      "learning_rate": 3.346943305412447e-05,
      "loss": 0.9563,
      "step": 57300
    },
    {
      "epoch": 0.2008572620186663,
      "grad_norm": 3.125,
      "learning_rate": 3.347527424385799e-05,
      "loss": 0.9445,
      "step": 57310
    },
    {
      "epoch": 0.2008923095255619,
      "grad_norm": 3.171875,
      "learning_rate": 3.348111543359151e-05,
      "loss": 0.9349,
      "step": 57320
    },
    {
      "epoch": 0.2009273570324575,
      "grad_norm": 3.265625,
      "learning_rate": 3.348695662332504e-05,
      "loss": 1.0031,
      "step": 57330
    },
    {
      "epoch": 0.2009624045393531,
      "grad_norm": 3.515625,
      "learning_rate": 3.349279781305856e-05,
      "loss": 0.9752,
      "step": 57340
    },
    {
      "epoch": 0.2009974520462487,
      "grad_norm": 3.3125,
      "learning_rate": 3.349863900279209e-05,
      "loss": 1.041,
      "step": 57350
    },
    {
      "epoch": 0.20103249955314428,
      "grad_norm": 3.234375,
      "learning_rate": 3.350448019252562e-05,
      "loss": 1.0895,
      "step": 57360
    },
    {
      "epoch": 0.20106754706003988,
      "grad_norm": 3.15625,
      "learning_rate": 3.351032138225914e-05,
      "loss": 0.9582,
      "step": 57370
    },
    {
      "epoch": 0.2011025945669355,
      "grad_norm": 3.578125,
      "learning_rate": 3.351616257199267e-05,
      "loss": 1.0058,
      "step": 57380
    },
    {
      "epoch": 0.20113764207383109,
      "grad_norm": 2.515625,
      "learning_rate": 3.352200376172619e-05,
      "loss": 0.9576,
      "step": 57390
    },
    {
      "epoch": 0.20117268958072668,
      "grad_norm": 3.125,
      "learning_rate": 3.352784495145971e-05,
      "loss": 1.0083,
      "step": 57400
    },
    {
      "epoch": 0.20120773708762227,
      "grad_norm": 3.4375,
      "learning_rate": 3.353368614119324e-05,
      "loss": 0.983,
      "step": 57410
    },
    {
      "epoch": 0.20124278459451786,
      "grad_norm": 3.203125,
      "learning_rate": 3.353952733092676e-05,
      "loss": 0.9917,
      "step": 57420
    },
    {
      "epoch": 0.20127783210141348,
      "grad_norm": 3.359375,
      "learning_rate": 3.3545368520660295e-05,
      "loss": 0.9945,
      "step": 57430
    },
    {
      "epoch": 0.20131287960830907,
      "grad_norm": 3.4375,
      "learning_rate": 3.355120971039382e-05,
      "loss": 0.9359,
      "step": 57440
    },
    {
      "epoch": 0.20134792711520466,
      "grad_norm": 3.140625,
      "learning_rate": 3.355705090012734e-05,
      "loss": 1.0487,
      "step": 57450
    },
    {
      "epoch": 0.20138297462210025,
      "grad_norm": 3.09375,
      "learning_rate": 3.3562892089860866e-05,
      "loss": 0.9348,
      "step": 57460
    },
    {
      "epoch": 0.20141802212899584,
      "grad_norm": 3.96875,
      "learning_rate": 3.356873327959439e-05,
      "loss": 1.0497,
      "step": 57470
    },
    {
      "epoch": 0.20145306963589146,
      "grad_norm": 2.9375,
      "learning_rate": 3.3574574469327916e-05,
      "loss": 1.0141,
      "step": 57480
    },
    {
      "epoch": 0.20148811714278705,
      "grad_norm": 3.375,
      "learning_rate": 3.358041565906144e-05,
      "loss": 0.91,
      "step": 57490
    },
    {
      "epoch": 0.20152316464968265,
      "grad_norm": 2.875,
      "learning_rate": 3.3586256848794966e-05,
      "loss": 1.0087,
      "step": 57500
    },
    {
      "epoch": 0.20155821215657824,
      "grad_norm": 3.390625,
      "learning_rate": 3.3592098038528494e-05,
      "loss": 0.9898,
      "step": 57510
    },
    {
      "epoch": 0.20159325966347383,
      "grad_norm": 3.125,
      "learning_rate": 3.3597939228262015e-05,
      "loss": 0.9211,
      "step": 57520
    },
    {
      "epoch": 0.20162830717036945,
      "grad_norm": 3.015625,
      "learning_rate": 3.360378041799554e-05,
      "loss": 1.0473,
      "step": 57530
    },
    {
      "epoch": 0.20166335467726504,
      "grad_norm": 2.984375,
      "learning_rate": 3.3609621607729065e-05,
      "loss": 0.9985,
      "step": 57540
    },
    {
      "epoch": 0.20169840218416063,
      "grad_norm": 3.25,
      "learning_rate": 3.361546279746259e-05,
      "loss": 1.0413,
      "step": 57550
    },
    {
      "epoch": 0.20173344969105622,
      "grad_norm": 3.625,
      "learning_rate": 3.3621303987196115e-05,
      "loss": 0.9451,
      "step": 57560
    },
    {
      "epoch": 0.2017684971979518,
      "grad_norm": 3.53125,
      "learning_rate": 3.3627145176929636e-05,
      "loss": 0.9753,
      "step": 57570
    },
    {
      "epoch": 0.20180354470484743,
      "grad_norm": 3.390625,
      "learning_rate": 3.3632986366663165e-05,
      "loss": 0.9909,
      "step": 57580
    },
    {
      "epoch": 0.20183859221174302,
      "grad_norm": 3.09375,
      "learning_rate": 3.363882755639669e-05,
      "loss": 0.9651,
      "step": 57590
    },
    {
      "epoch": 0.20187363971863861,
      "grad_norm": 2.953125,
      "learning_rate": 3.3644668746130214e-05,
      "loss": 0.9672,
      "step": 57600
    },
    {
      "epoch": 0.2019086872255342,
      "grad_norm": 3.5625,
      "learning_rate": 3.3650509935863736e-05,
      "loss": 1.0268,
      "step": 57610
    },
    {
      "epoch": 0.2019437347324298,
      "grad_norm": 3.078125,
      "learning_rate": 3.3656351125597264e-05,
      "loss": 1.0112,
      "step": 57620
    },
    {
      "epoch": 0.20197878223932542,
      "grad_norm": 3.1875,
      "learning_rate": 3.3662192315330785e-05,
      "loss": 0.9437,
      "step": 57630
    },
    {
      "epoch": 0.202013829746221,
      "grad_norm": 2.9375,
      "learning_rate": 3.3668033505064314e-05,
      "loss": 1.05,
      "step": 57640
    },
    {
      "epoch": 0.2020488772531166,
      "grad_norm": 3.546875,
      "learning_rate": 3.3673874694797835e-05,
      "loss": 0.9355,
      "step": 57650
    },
    {
      "epoch": 0.2020839247600122,
      "grad_norm": 3.59375,
      "learning_rate": 3.367971588453136e-05,
      "loss": 0.9869,
      "step": 57660
    },
    {
      "epoch": 0.20211897226690778,
      "grad_norm": 2.96875,
      "learning_rate": 3.368555707426489e-05,
      "loss": 0.9902,
      "step": 57670
    },
    {
      "epoch": 0.2021540197738034,
      "grad_norm": 2.796875,
      "learning_rate": 3.369139826399841e-05,
      "loss": 0.9853,
      "step": 57680
    },
    {
      "epoch": 0.202189067280699,
      "grad_norm": 3.265625,
      "learning_rate": 3.369723945373194e-05,
      "loss": 1.0513,
      "step": 57690
    },
    {
      "epoch": 0.20222411478759458,
      "grad_norm": 3.25,
      "learning_rate": 3.370308064346546e-05,
      "loss": 0.9766,
      "step": 57700
    },
    {
      "epoch": 0.20225916229449017,
      "grad_norm": 3.046875,
      "learning_rate": 3.3708921833198984e-05,
      "loss": 1.0029,
      "step": 57710
    },
    {
      "epoch": 0.20229420980138577,
      "grad_norm": 3.671875,
      "learning_rate": 3.371476302293251e-05,
      "loss": 0.9624,
      "step": 57720
    },
    {
      "epoch": 0.20232925730828138,
      "grad_norm": 3.421875,
      "learning_rate": 3.3720604212666034e-05,
      "loss": 0.9413,
      "step": 57730
    },
    {
      "epoch": 0.20236430481517698,
      "grad_norm": 3.328125,
      "learning_rate": 3.372644540239956e-05,
      "loss": 0.9803,
      "step": 57740
    },
    {
      "epoch": 0.20239935232207257,
      "grad_norm": 2.53125,
      "learning_rate": 3.373228659213309e-05,
      "loss": 1.0097,
      "step": 57750
    },
    {
      "epoch": 0.20243439982896816,
      "grad_norm": 3.65625,
      "learning_rate": 3.373812778186661e-05,
      "loss": 1.0238,
      "step": 57760
    },
    {
      "epoch": 0.20246944733586375,
      "grad_norm": 3.390625,
      "learning_rate": 3.374396897160014e-05,
      "loss": 0.9404,
      "step": 57770
    },
    {
      "epoch": 0.20250449484275937,
      "grad_norm": 3.5,
      "learning_rate": 3.374981016133366e-05,
      "loss": 0.9801,
      "step": 57780
    },
    {
      "epoch": 0.20253954234965496,
      "grad_norm": 3.25,
      "learning_rate": 3.375565135106718e-05,
      "loss": 1.0326,
      "step": 57790
    },
    {
      "epoch": 0.20257458985655055,
      "grad_norm": 3.671875,
      "learning_rate": 3.376149254080071e-05,
      "loss": 1.0253,
      "step": 57800
    },
    {
      "epoch": 0.20260963736344614,
      "grad_norm": 2.703125,
      "learning_rate": 3.376733373053423e-05,
      "loss": 0.982,
      "step": 57810
    },
    {
      "epoch": 0.20264468487034173,
      "grad_norm": 3.21875,
      "learning_rate": 3.377317492026776e-05,
      "loss": 0.9491,
      "step": 57820
    },
    {
      "epoch": 0.20267973237723735,
      "grad_norm": 3.328125,
      "learning_rate": 3.377901611000129e-05,
      "loss": 1.016,
      "step": 57830
    },
    {
      "epoch": 0.20271477988413295,
      "grad_norm": 3.21875,
      "learning_rate": 3.378485729973481e-05,
      "loss": 0.9981,
      "step": 57840
    },
    {
      "epoch": 0.20274982739102854,
      "grad_norm": 3.09375,
      "learning_rate": 3.379069848946834e-05,
      "loss": 0.9072,
      "step": 57850
    },
    {
      "epoch": 0.20278487489792413,
      "grad_norm": 3.5625,
      "learning_rate": 3.379653967920186e-05,
      "loss": 1.0741,
      "step": 57860
    },
    {
      "epoch": 0.20281992240481975,
      "grad_norm": 3.0,
      "learning_rate": 3.380238086893538e-05,
      "loss": 1.03,
      "step": 57870
    },
    {
      "epoch": 0.20285496991171534,
      "grad_norm": 3.40625,
      "learning_rate": 3.380822205866891e-05,
      "loss": 1.0196,
      "step": 57880
    },
    {
      "epoch": 0.20289001741861093,
      "grad_norm": 3.0625,
      "learning_rate": 3.381406324840244e-05,
      "loss": 0.9554,
      "step": 57890
    },
    {
      "epoch": 0.20292506492550652,
      "grad_norm": 3.328125,
      "learning_rate": 3.3819904438135966e-05,
      "loss": 1.0079,
      "step": 57900
    },
    {
      "epoch": 0.2029601124324021,
      "grad_norm": 3.171875,
      "learning_rate": 3.382574562786949e-05,
      "loss": 0.999,
      "step": 57910
    },
    {
      "epoch": 0.20299515993929773,
      "grad_norm": 3.09375,
      "learning_rate": 3.383158681760301e-05,
      "loss": 0.975,
      "step": 57920
    },
    {
      "epoch": 0.20303020744619332,
      "grad_norm": 2.875,
      "learning_rate": 3.383742800733654e-05,
      "loss": 1.0184,
      "step": 57930
    },
    {
      "epoch": 0.20306525495308891,
      "grad_norm": 2.984375,
      "learning_rate": 3.384326919707006e-05,
      "loss": 0.9444,
      "step": 57940
    },
    {
      "epoch": 0.2031003024599845,
      "grad_norm": 3.0,
      "learning_rate": 3.384911038680359e-05,
      "loss": 0.9365,
      "step": 57950
    },
    {
      "epoch": 0.2031353499668801,
      "grad_norm": 3.375,
      "learning_rate": 3.385495157653711e-05,
      "loss": 1.0269,
      "step": 57960
    },
    {
      "epoch": 0.20317039747377572,
      "grad_norm": 3.609375,
      "learning_rate": 3.386079276627064e-05,
      "loss": 1.04,
      "step": 57970
    },
    {
      "epoch": 0.2032054449806713,
      "grad_norm": 3.296875,
      "learning_rate": 3.3866633956004165e-05,
      "loss": 0.9795,
      "step": 57980
    },
    {
      "epoch": 0.2032404924875669,
      "grad_norm": 3.234375,
      "learning_rate": 3.3872475145737687e-05,
      "loss": 1.005,
      "step": 57990
    },
    {
      "epoch": 0.2032755399944625,
      "grad_norm": 3.171875,
      "learning_rate": 3.387831633547121e-05,
      "loss": 1.0081,
      "step": 58000
    },
    {
      "epoch": 0.20331058750135808,
      "grad_norm": 2.96875,
      "learning_rate": 3.3884157525204736e-05,
      "loss": 1.0059,
      "step": 58010
    },
    {
      "epoch": 0.2033456350082537,
      "grad_norm": 3.09375,
      "learning_rate": 3.388999871493826e-05,
      "loss": 0.9315,
      "step": 58020
    },
    {
      "epoch": 0.2033806825151493,
      "grad_norm": 3.0,
      "learning_rate": 3.3895839904671786e-05,
      "loss": 1.0491,
      "step": 58030
    },
    {
      "epoch": 0.20341573002204488,
      "grad_norm": 3.421875,
      "learning_rate": 3.390168109440531e-05,
      "loss": 0.9751,
      "step": 58040
    },
    {
      "epoch": 0.20345077752894047,
      "grad_norm": 3.421875,
      "learning_rate": 3.3907522284138836e-05,
      "loss": 0.9789,
      "step": 58050
    },
    {
      "epoch": 0.20348582503583607,
      "grad_norm": 3.09375,
      "learning_rate": 3.3913363473872364e-05,
      "loss": 0.9753,
      "step": 58060
    },
    {
      "epoch": 0.20352087254273168,
      "grad_norm": 2.96875,
      "learning_rate": 3.3919204663605885e-05,
      "loss": 0.9933,
      "step": 58070
    },
    {
      "epoch": 0.20355592004962728,
      "grad_norm": 3.171875,
      "learning_rate": 3.392504585333941e-05,
      "loss": 1.0353,
      "step": 58080
    },
    {
      "epoch": 0.20359096755652287,
      "grad_norm": 3.21875,
      "learning_rate": 3.3930887043072935e-05,
      "loss": 1.0176,
      "step": 58090
    },
    {
      "epoch": 0.20362601506341846,
      "grad_norm": 3.015625,
      "learning_rate": 3.3936728232806456e-05,
      "loss": 1.0005,
      "step": 58100
    },
    {
      "epoch": 0.20366106257031405,
      "grad_norm": 3.0,
      "learning_rate": 3.3942569422539985e-05,
      "loss": 1.0088,
      "step": 58110
    },
    {
      "epoch": 0.20369611007720967,
      "grad_norm": 3.40625,
      "learning_rate": 3.3948410612273506e-05,
      "loss": 0.9962,
      "step": 58120
    },
    {
      "epoch": 0.20373115758410526,
      "grad_norm": 3.0,
      "learning_rate": 3.3954251802007034e-05,
      "loss": 1.0583,
      "step": 58130
    },
    {
      "epoch": 0.20376620509100085,
      "grad_norm": 2.890625,
      "learning_rate": 3.396009299174056e-05,
      "loss": 0.9821,
      "step": 58140
    },
    {
      "epoch": 0.20380125259789644,
      "grad_norm": 2.828125,
      "learning_rate": 3.3965934181474084e-05,
      "loss": 1.0645,
      "step": 58150
    },
    {
      "epoch": 0.20383630010479203,
      "grad_norm": 2.84375,
      "learning_rate": 3.397177537120761e-05,
      "loss": 1.0188,
      "step": 58160
    },
    {
      "epoch": 0.20387134761168765,
      "grad_norm": 2.6875,
      "learning_rate": 3.3977616560941134e-05,
      "loss": 0.9274,
      "step": 58170
    },
    {
      "epoch": 0.20390639511858324,
      "grad_norm": 2.953125,
      "learning_rate": 3.3983457750674655e-05,
      "loss": 0.9484,
      "step": 58180
    },
    {
      "epoch": 0.20394144262547884,
      "grad_norm": 3.546875,
      "learning_rate": 3.3989298940408183e-05,
      "loss": 1.0113,
      "step": 58190
    },
    {
      "epoch": 0.20397649013237443,
      "grad_norm": 3.140625,
      "learning_rate": 3.399514013014171e-05,
      "loss": 1.0214,
      "step": 58200
    },
    {
      "epoch": 0.20401153763927002,
      "grad_norm": 3.21875,
      "learning_rate": 3.400098131987523e-05,
      "loss": 0.9711,
      "step": 58210
    },
    {
      "epoch": 0.20404658514616564,
      "grad_norm": 3.234375,
      "learning_rate": 3.400682250960876e-05,
      "loss": 1.0124,
      "step": 58220
    },
    {
      "epoch": 0.20408163265306123,
      "grad_norm": 3.046875,
      "learning_rate": 3.401266369934228e-05,
      "loss": 1.0088,
      "step": 58230
    },
    {
      "epoch": 0.20411668015995682,
      "grad_norm": 3.15625,
      "learning_rate": 3.401850488907581e-05,
      "loss": 0.9131,
      "step": 58240
    },
    {
      "epoch": 0.2041517276668524,
      "grad_norm": 3.3125,
      "learning_rate": 3.402434607880933e-05,
      "loss": 1.0154,
      "step": 58250
    },
    {
      "epoch": 0.204186775173748,
      "grad_norm": 3.21875,
      "learning_rate": 3.4030187268542854e-05,
      "loss": 1.0459,
      "step": 58260
    },
    {
      "epoch": 0.20422182268064362,
      "grad_norm": 3.1875,
      "learning_rate": 3.403602845827638e-05,
      "loss": 0.95,
      "step": 58270
    },
    {
      "epoch": 0.2042568701875392,
      "grad_norm": 3.15625,
      "learning_rate": 3.404186964800991e-05,
      "loss": 0.94,
      "step": 58280
    },
    {
      "epoch": 0.2042919176944348,
      "grad_norm": 3.015625,
      "learning_rate": 3.404771083774343e-05,
      "loss": 0.9734,
      "step": 58290
    },
    {
      "epoch": 0.2043269652013304,
      "grad_norm": 3.328125,
      "learning_rate": 3.405355202747696e-05,
      "loss": 1.0163,
      "step": 58300
    },
    {
      "epoch": 0.204362012708226,
      "grad_norm": 3.09375,
      "learning_rate": 3.405939321721048e-05,
      "loss": 1.1024,
      "step": 58310
    },
    {
      "epoch": 0.2043970602151216,
      "grad_norm": 2.984375,
      "learning_rate": 3.406523440694401e-05,
      "loss": 0.9519,
      "step": 58320
    },
    {
      "epoch": 0.2044321077220172,
      "grad_norm": 3.453125,
      "learning_rate": 3.407107559667753e-05,
      "loss": 0.9678,
      "step": 58330
    },
    {
      "epoch": 0.2044671552289128,
      "grad_norm": 3.203125,
      "learning_rate": 3.407691678641106e-05,
      "loss": 1.1009,
      "step": 58340
    },
    {
      "epoch": 0.20450220273580838,
      "grad_norm": 2.921875,
      "learning_rate": 3.408275797614458e-05,
      "loss": 0.9696,
      "step": 58350
    },
    {
      "epoch": 0.20453725024270397,
      "grad_norm": 3.28125,
      "learning_rate": 3.408859916587811e-05,
      "loss": 1.0659,
      "step": 58360
    },
    {
      "epoch": 0.2045722977495996,
      "grad_norm": 3.4375,
      "learning_rate": 3.409444035561164e-05,
      "loss": 0.9769,
      "step": 58370
    },
    {
      "epoch": 0.20460734525649518,
      "grad_norm": 3.390625,
      "learning_rate": 3.410028154534516e-05,
      "loss": 0.9349,
      "step": 58380
    },
    {
      "epoch": 0.20464239276339077,
      "grad_norm": 3.0625,
      "learning_rate": 3.410612273507868e-05,
      "loss": 1.0752,
      "step": 58390
    },
    {
      "epoch": 0.20467744027028636,
      "grad_norm": 3.15625,
      "learning_rate": 3.411196392481221e-05,
      "loss": 1.0479,
      "step": 58400
    },
    {
      "epoch": 0.20471248777718198,
      "grad_norm": 3.296875,
      "learning_rate": 3.411780511454573e-05,
      "loss": 0.9905,
      "step": 58410
    },
    {
      "epoch": 0.20474753528407758,
      "grad_norm": 3.21875,
      "learning_rate": 3.412364630427926e-05,
      "loss": 0.9673,
      "step": 58420
    },
    {
      "epoch": 0.20478258279097317,
      "grad_norm": 3.109375,
      "learning_rate": 3.412948749401278e-05,
      "loss": 1.0278,
      "step": 58430
    },
    {
      "epoch": 0.20481763029786876,
      "grad_norm": 3.359375,
      "learning_rate": 3.413532868374631e-05,
      "loss": 0.9619,
      "step": 58440
    },
    {
      "epoch": 0.20485267780476435,
      "grad_norm": 2.9375,
      "learning_rate": 3.4141169873479836e-05,
      "loss": 1.0426,
      "step": 58450
    },
    {
      "epoch": 0.20488772531165997,
      "grad_norm": 3.53125,
      "learning_rate": 3.414701106321336e-05,
      "loss": 0.9745,
      "step": 58460
    },
    {
      "epoch": 0.20492277281855556,
      "grad_norm": 3.15625,
      "learning_rate": 3.415285225294688e-05,
      "loss": 0.9637,
      "step": 58470
    },
    {
      "epoch": 0.20495782032545115,
      "grad_norm": 3.0,
      "learning_rate": 3.415869344268041e-05,
      "loss": 1.0707,
      "step": 58480
    },
    {
      "epoch": 0.20499286783234674,
      "grad_norm": 3.375,
      "learning_rate": 3.416453463241393e-05,
      "loss": 1.0666,
      "step": 58490
    },
    {
      "epoch": 0.20502791533924233,
      "grad_norm": 3.890625,
      "learning_rate": 3.417037582214746e-05,
      "loss": 1.0277,
      "step": 58500
    },
    {
      "epoch": 0.20506296284613795,
      "grad_norm": 3.515625,
      "learning_rate": 3.417621701188098e-05,
      "loss": 1.0166,
      "step": 58510
    },
    {
      "epoch": 0.20509801035303354,
      "grad_norm": 2.6875,
      "learning_rate": 3.418205820161451e-05,
      "loss": 0.9715,
      "step": 58520
    },
    {
      "epoch": 0.20513305785992914,
      "grad_norm": 3.40625,
      "learning_rate": 3.4187899391348035e-05,
      "loss": 1.0262,
      "step": 58530
    },
    {
      "epoch": 0.20516810536682473,
      "grad_norm": 3.046875,
      "learning_rate": 3.4193740581081556e-05,
      "loss": 1.0367,
      "step": 58540
    },
    {
      "epoch": 0.20520315287372032,
      "grad_norm": 3.15625,
      "learning_rate": 3.419958177081508e-05,
      "loss": 1.0236,
      "step": 58550
    },
    {
      "epoch": 0.20523820038061594,
      "grad_norm": 3.078125,
      "learning_rate": 3.4205422960548606e-05,
      "loss": 0.9313,
      "step": 58560
    },
    {
      "epoch": 0.20527324788751153,
      "grad_norm": 3.03125,
      "learning_rate": 3.421126415028213e-05,
      "loss": 1.0343,
      "step": 58570
    },
    {
      "epoch": 0.20530829539440712,
      "grad_norm": 3.15625,
      "learning_rate": 3.4217105340015656e-05,
      "loss": 0.9859,
      "step": 58580
    },
    {
      "epoch": 0.2053433429013027,
      "grad_norm": 3.484375,
      "learning_rate": 3.4222946529749184e-05,
      "loss": 1.0541,
      "step": 58590
    },
    {
      "epoch": 0.2053783904081983,
      "grad_norm": 3.46875,
      "learning_rate": 3.4228787719482705e-05,
      "loss": 1.0233,
      "step": 58600
    },
    {
      "epoch": 0.20541343791509392,
      "grad_norm": 3.390625,
      "learning_rate": 3.4234628909216234e-05,
      "loss": 0.9567,
      "step": 58610
    },
    {
      "epoch": 0.2054484854219895,
      "grad_norm": 2.984375,
      "learning_rate": 3.4240470098949755e-05,
      "loss": 1.0029,
      "step": 58620
    },
    {
      "epoch": 0.2054835329288851,
      "grad_norm": 3.484375,
      "learning_rate": 3.424631128868328e-05,
      "loss": 1.0151,
      "step": 58630
    },
    {
      "epoch": 0.2055185804357807,
      "grad_norm": 3.234375,
      "learning_rate": 3.4252152478416805e-05,
      "loss": 0.9539,
      "step": 58640
    },
    {
      "epoch": 0.2055536279426763,
      "grad_norm": 3.125,
      "learning_rate": 3.4257993668150326e-05,
      "loss": 1.0282,
      "step": 58650
    },
    {
      "epoch": 0.2055886754495719,
      "grad_norm": 2.734375,
      "learning_rate": 3.4263834857883855e-05,
      "loss": 0.9553,
      "step": 58660
    },
    {
      "epoch": 0.2056237229564675,
      "grad_norm": 3.375,
      "learning_rate": 3.426967604761738e-05,
      "loss": 0.9905,
      "step": 58670
    },
    {
      "epoch": 0.2056587704633631,
      "grad_norm": 3.140625,
      "learning_rate": 3.4275517237350904e-05,
      "loss": 1.0389,
      "step": 58680
    },
    {
      "epoch": 0.20569381797025868,
      "grad_norm": 3.59375,
      "learning_rate": 3.428135842708443e-05,
      "loss": 0.8444,
      "step": 58690
    },
    {
      "epoch": 0.20572886547715427,
      "grad_norm": 3.4375,
      "learning_rate": 3.4287199616817954e-05,
      "loss": 0.9872,
      "step": 58700
    },
    {
      "epoch": 0.2057639129840499,
      "grad_norm": 3.28125,
      "learning_rate": 3.429304080655148e-05,
      "loss": 0.9402,
      "step": 58710
    },
    {
      "epoch": 0.20579896049094548,
      "grad_norm": 3.34375,
      "learning_rate": 3.4298881996285004e-05,
      "loss": 0.9988,
      "step": 58720
    },
    {
      "epoch": 0.20583400799784107,
      "grad_norm": 3.421875,
      "learning_rate": 3.4304723186018525e-05,
      "loss": 0.9796,
      "step": 58730
    },
    {
      "epoch": 0.20586905550473666,
      "grad_norm": 3.3125,
      "learning_rate": 3.431056437575205e-05,
      "loss": 1.0414,
      "step": 58740
    },
    {
      "epoch": 0.20590410301163226,
      "grad_norm": 3.125,
      "learning_rate": 3.431640556548558e-05,
      "loss": 0.9524,
      "step": 58750
    },
    {
      "epoch": 0.20593915051852787,
      "grad_norm": 3.5,
      "learning_rate": 3.43222467552191e-05,
      "loss": 0.992,
      "step": 58760
    },
    {
      "epoch": 0.20597419802542347,
      "grad_norm": 3.125,
      "learning_rate": 3.432808794495263e-05,
      "loss": 0.9682,
      "step": 58770
    },
    {
      "epoch": 0.20600924553231906,
      "grad_norm": 2.921875,
      "learning_rate": 3.433392913468615e-05,
      "loss": 1.0335,
      "step": 58780
    },
    {
      "epoch": 0.20604429303921465,
      "grad_norm": 3.3125,
      "learning_rate": 3.433977032441968e-05,
      "loss": 1.0557,
      "step": 58790
    },
    {
      "epoch": 0.20607934054611024,
      "grad_norm": 3.65625,
      "learning_rate": 3.43456115141532e-05,
      "loss": 1.0209,
      "step": 58800
    },
    {
      "epoch": 0.20611438805300586,
      "grad_norm": 3.09375,
      "learning_rate": 3.435145270388673e-05,
      "loss": 0.9937,
      "step": 58810
    },
    {
      "epoch": 0.20614943555990145,
      "grad_norm": 2.78125,
      "learning_rate": 3.435729389362025e-05,
      "loss": 0.9472,
      "step": 58820
    },
    {
      "epoch": 0.20618448306679704,
      "grad_norm": 3.34375,
      "learning_rate": 3.436313508335378e-05,
      "loss": 1.0564,
      "step": 58830
    },
    {
      "epoch": 0.20621953057369263,
      "grad_norm": 3.359375,
      "learning_rate": 3.436897627308731e-05,
      "loss": 0.9842,
      "step": 58840
    },
    {
      "epoch": 0.20625457808058822,
      "grad_norm": 3.53125,
      "learning_rate": 3.437481746282083e-05,
      "loss": 0.9826,
      "step": 58850
    },
    {
      "epoch": 0.20628962558748384,
      "grad_norm": 3.671875,
      "learning_rate": 3.438065865255435e-05,
      "loss": 1.0376,
      "step": 58860
    },
    {
      "epoch": 0.20632467309437943,
      "grad_norm": 2.9375,
      "learning_rate": 3.438649984228788e-05,
      "loss": 1.0312,
      "step": 58870
    },
    {
      "epoch": 0.20635972060127503,
      "grad_norm": 3.015625,
      "learning_rate": 3.43923410320214e-05,
      "loss": 1.0457,
      "step": 58880
    },
    {
      "epoch": 0.20639476810817062,
      "grad_norm": 3.296875,
      "learning_rate": 3.439818222175493e-05,
      "loss": 0.8923,
      "step": 58890
    },
    {
      "epoch": 0.2064298156150662,
      "grad_norm": 2.890625,
      "learning_rate": 3.440402341148846e-05,
      "loss": 1.0362,
      "step": 58900
    },
    {
      "epoch": 0.20646486312196183,
      "grad_norm": 3.15625,
      "learning_rate": 3.440986460122198e-05,
      "loss": 0.9958,
      "step": 58910
    },
    {
      "epoch": 0.20649991062885742,
      "grad_norm": 3.0625,
      "learning_rate": 3.441570579095551e-05,
      "loss": 0.9988,
      "step": 58920
    },
    {
      "epoch": 0.206534958135753,
      "grad_norm": 3.0625,
      "learning_rate": 3.442154698068903e-05,
      "loss": 1.0634,
      "step": 58930
    },
    {
      "epoch": 0.2065700056426486,
      "grad_norm": 3.671875,
      "learning_rate": 3.442738817042255e-05,
      "loss": 1.0422,
      "step": 58940
    },
    {
      "epoch": 0.2066050531495442,
      "grad_norm": 3.25,
      "learning_rate": 3.443322936015608e-05,
      "loss": 0.9249,
      "step": 58950
    },
    {
      "epoch": 0.2066401006564398,
      "grad_norm": 3.234375,
      "learning_rate": 3.44390705498896e-05,
      "loss": 1.0003,
      "step": 58960
    },
    {
      "epoch": 0.2066751481633354,
      "grad_norm": 2.984375,
      "learning_rate": 3.444491173962313e-05,
      "loss": 0.9737,
      "step": 58970
    },
    {
      "epoch": 0.206710195670231,
      "grad_norm": 3.234375,
      "learning_rate": 3.4450752929356656e-05,
      "loss": 1.0132,
      "step": 58980
    },
    {
      "epoch": 0.2067452431771266,
      "grad_norm": 3.28125,
      "learning_rate": 3.445659411909018e-05,
      "loss": 0.9845,
      "step": 58990
    },
    {
      "epoch": 0.2067802906840222,
      "grad_norm": 3.3125,
      "learning_rate": 3.4462435308823706e-05,
      "loss": 0.9876,
      "step": 59000
    },
    {
      "epoch": 0.2068153381909178,
      "grad_norm": 3.171875,
      "learning_rate": 3.446827649855723e-05,
      "loss": 0.9775,
      "step": 59010
    },
    {
      "epoch": 0.2068503856978134,
      "grad_norm": 3.5625,
      "learning_rate": 3.4474117688290756e-05,
      "loss": 1.0182,
      "step": 59020
    },
    {
      "epoch": 0.20688543320470898,
      "grad_norm": 3.5,
      "learning_rate": 3.447995887802428e-05,
      "loss": 0.9605,
      "step": 59030
    },
    {
      "epoch": 0.20692048071160457,
      "grad_norm": 3.203125,
      "learning_rate": 3.44858000677578e-05,
      "loss": 0.9571,
      "step": 59040
    },
    {
      "epoch": 0.2069555282185002,
      "grad_norm": 3.5,
      "learning_rate": 3.449164125749133e-05,
      "loss": 0.9873,
      "step": 59050
    },
    {
      "epoch": 0.20699057572539578,
      "grad_norm": 3.140625,
      "learning_rate": 3.4497482447224855e-05,
      "loss": 1.0345,
      "step": 59060
    },
    {
      "epoch": 0.20702562323229137,
      "grad_norm": 3.28125,
      "learning_rate": 3.4503323636958377e-05,
      "loss": 1.0606,
      "step": 59070
    },
    {
      "epoch": 0.20706067073918696,
      "grad_norm": 3.21875,
      "learning_rate": 3.4509164826691905e-05,
      "loss": 0.9961,
      "step": 59080
    },
    {
      "epoch": 0.20709571824608256,
      "grad_norm": 3.375,
      "learning_rate": 3.4515006016425426e-05,
      "loss": 0.9063,
      "step": 59090
    },
    {
      "epoch": 0.20713076575297817,
      "grad_norm": 2.90625,
      "learning_rate": 3.4520847206158954e-05,
      "loss": 1.0012,
      "step": 59100
    },
    {
      "epoch": 0.20716581325987377,
      "grad_norm": 3.84375,
      "learning_rate": 3.4526688395892476e-05,
      "loss": 1.0513,
      "step": 59110
    },
    {
      "epoch": 0.20720086076676936,
      "grad_norm": 3.140625,
      "learning_rate": 3.4532529585626e-05,
      "loss": 0.9824,
      "step": 59120
    },
    {
      "epoch": 0.20723590827366495,
      "grad_norm": 3.296875,
      "learning_rate": 3.4538370775359526e-05,
      "loss": 1.0156,
      "step": 59130
    },
    {
      "epoch": 0.20727095578056054,
      "grad_norm": 3.125,
      "learning_rate": 3.4544211965093054e-05,
      "loss": 0.9772,
      "step": 59140
    },
    {
      "epoch": 0.20730600328745616,
      "grad_norm": 3.203125,
      "learning_rate": 3.4550053154826575e-05,
      "loss": 0.9482,
      "step": 59150
    },
    {
      "epoch": 0.20734105079435175,
      "grad_norm": 3.1875,
      "learning_rate": 3.4555894344560104e-05,
      "loss": 1.0009,
      "step": 59160
    },
    {
      "epoch": 0.20737609830124734,
      "grad_norm": 2.796875,
      "learning_rate": 3.4561735534293625e-05,
      "loss": 0.9957,
      "step": 59170
    },
    {
      "epoch": 0.20741114580814293,
      "grad_norm": 2.9375,
      "learning_rate": 3.456757672402715e-05,
      "loss": 0.9369,
      "step": 59180
    },
    {
      "epoch": 0.20744619331503852,
      "grad_norm": 3.21875,
      "learning_rate": 3.4573417913760675e-05,
      "loss": 1.1134,
      "step": 59190
    },
    {
      "epoch": 0.20748124082193414,
      "grad_norm": 2.859375,
      "learning_rate": 3.4579259103494196e-05,
      "loss": 0.9391,
      "step": 59200
    },
    {
      "epoch": 0.20751628832882973,
      "grad_norm": 2.84375,
      "learning_rate": 3.458510029322773e-05,
      "loss": 0.9866,
      "step": 59210
    },
    {
      "epoch": 0.20755133583572533,
      "grad_norm": 3.171875,
      "learning_rate": 3.459094148296125e-05,
      "loss": 0.9209,
      "step": 59220
    },
    {
      "epoch": 0.20758638334262092,
      "grad_norm": 2.921875,
      "learning_rate": 3.459678267269478e-05,
      "loss": 0.957,
      "step": 59230
    },
    {
      "epoch": 0.2076214308495165,
      "grad_norm": 3.25,
      "learning_rate": 3.46026238624283e-05,
      "loss": 0.9246,
      "step": 59240
    },
    {
      "epoch": 0.20765647835641213,
      "grad_norm": 3.421875,
      "learning_rate": 3.4608465052161824e-05,
      "loss": 1.05,
      "step": 59250
    },
    {
      "epoch": 0.20769152586330772,
      "grad_norm": 3.078125,
      "learning_rate": 3.461430624189535e-05,
      "loss": 1.0657,
      "step": 59260
    },
    {
      "epoch": 0.2077265733702033,
      "grad_norm": 3.140625,
      "learning_rate": 3.4620147431628873e-05,
      "loss": 0.9735,
      "step": 59270
    },
    {
      "epoch": 0.2077616208770989,
      "grad_norm": 2.953125,
      "learning_rate": 3.46259886213624e-05,
      "loss": 0.9124,
      "step": 59280
    },
    {
      "epoch": 0.2077966683839945,
      "grad_norm": 3.359375,
      "learning_rate": 3.463182981109593e-05,
      "loss": 1.0408,
      "step": 59290
    },
    {
      "epoch": 0.2078317158908901,
      "grad_norm": 3.140625,
      "learning_rate": 3.463767100082945e-05,
      "loss": 0.9758,
      "step": 59300
    },
    {
      "epoch": 0.2078667633977857,
      "grad_norm": 3.046875,
      "learning_rate": 3.464351219056298e-05,
      "loss": 0.9076,
      "step": 59310
    },
    {
      "epoch": 0.2079018109046813,
      "grad_norm": 2.84375,
      "learning_rate": 3.46493533802965e-05,
      "loss": 0.8992,
      "step": 59320
    },
    {
      "epoch": 0.20793685841157689,
      "grad_norm": 3.25,
      "learning_rate": 3.465519457003002e-05,
      "loss": 0.9029,
      "step": 59330
    },
    {
      "epoch": 0.20797190591847248,
      "grad_norm": 3.4375,
      "learning_rate": 3.466103575976355e-05,
      "loss": 1.0828,
      "step": 59340
    },
    {
      "epoch": 0.2080069534253681,
      "grad_norm": 3.15625,
      "learning_rate": 3.466687694949707e-05,
      "loss": 1.0189,
      "step": 59350
    },
    {
      "epoch": 0.2080420009322637,
      "grad_norm": 3.234375,
      "learning_rate": 3.46727181392306e-05,
      "loss": 0.947,
      "step": 59360
    },
    {
      "epoch": 0.20807704843915928,
      "grad_norm": 2.984375,
      "learning_rate": 3.467855932896413e-05,
      "loss": 1.0043,
      "step": 59370
    },
    {
      "epoch": 0.20811209594605487,
      "grad_norm": 2.984375,
      "learning_rate": 3.468440051869765e-05,
      "loss": 0.9661,
      "step": 59380
    },
    {
      "epoch": 0.20814714345295046,
      "grad_norm": 3.5625,
      "learning_rate": 3.469024170843118e-05,
      "loss": 0.888,
      "step": 59390
    },
    {
      "epoch": 0.20818219095984608,
      "grad_norm": 2.890625,
      "learning_rate": 3.46960828981647e-05,
      "loss": 0.9857,
      "step": 59400
    },
    {
      "epoch": 0.20821723846674167,
      "grad_norm": 3.359375,
      "learning_rate": 3.470192408789822e-05,
      "loss": 1.043,
      "step": 59410
    },
    {
      "epoch": 0.20825228597363726,
      "grad_norm": 3.046875,
      "learning_rate": 3.470776527763175e-05,
      "loss": 1.063,
      "step": 59420
    },
    {
      "epoch": 0.20828733348053285,
      "grad_norm": 3.5,
      "learning_rate": 3.471360646736527e-05,
      "loss": 1.0343,
      "step": 59430
    },
    {
      "epoch": 0.20832238098742845,
      "grad_norm": 2.640625,
      "learning_rate": 3.47194476570988e-05,
      "loss": 0.9025,
      "step": 59440
    },
    {
      "epoch": 0.20835742849432407,
      "grad_norm": 2.953125,
      "learning_rate": 3.472528884683233e-05,
      "loss": 0.9243,
      "step": 59450
    },
    {
      "epoch": 0.20839247600121966,
      "grad_norm": 3.53125,
      "learning_rate": 3.473113003656585e-05,
      "loss": 1.0377,
      "step": 59460
    },
    {
      "epoch": 0.20842752350811525,
      "grad_norm": 2.953125,
      "learning_rate": 3.473697122629938e-05,
      "loss": 0.9537,
      "step": 59470
    },
    {
      "epoch": 0.20846257101501084,
      "grad_norm": 3.34375,
      "learning_rate": 3.47428124160329e-05,
      "loss": 1.041,
      "step": 59480
    },
    {
      "epoch": 0.20849761852190643,
      "grad_norm": 3.46875,
      "learning_rate": 3.474865360576643e-05,
      "loss": 0.9626,
      "step": 59490
    },
    {
      "epoch": 0.20853266602880205,
      "grad_norm": 3.40625,
      "learning_rate": 3.475449479549995e-05,
      "loss": 0.9988,
      "step": 59500
    },
    {
      "epoch": 0.20856771353569764,
      "grad_norm": 3.140625,
      "learning_rate": 3.476033598523347e-05,
      "loss": 1.0086,
      "step": 59510
    },
    {
      "epoch": 0.20860276104259323,
      "grad_norm": 3.015625,
      "learning_rate": 3.4766177174967e-05,
      "loss": 1.0732,
      "step": 59520
    },
    {
      "epoch": 0.20863780854948882,
      "grad_norm": 2.890625,
      "learning_rate": 3.4772018364700526e-05,
      "loss": 0.9703,
      "step": 59530
    },
    {
      "epoch": 0.20867285605638444,
      "grad_norm": 3.328125,
      "learning_rate": 3.477785955443405e-05,
      "loss": 0.9065,
      "step": 59540
    },
    {
      "epoch": 0.20870790356328003,
      "grad_norm": 3.3125,
      "learning_rate": 3.4783700744167576e-05,
      "loss": 1.0176,
      "step": 59550
    },
    {
      "epoch": 0.20874295107017563,
      "grad_norm": 2.703125,
      "learning_rate": 3.47895419339011e-05,
      "loss": 1.0185,
      "step": 59560
    },
    {
      "epoch": 0.20877799857707122,
      "grad_norm": 3.671875,
      "learning_rate": 3.4795383123634626e-05,
      "loss": 0.9047,
      "step": 59570
    },
    {
      "epoch": 0.2088130460839668,
      "grad_norm": 3.234375,
      "learning_rate": 3.480122431336815e-05,
      "loss": 0.9338,
      "step": 59580
    },
    {
      "epoch": 0.20884809359086243,
      "grad_norm": 3.46875,
      "learning_rate": 3.480706550310167e-05,
      "loss": 1.0058,
      "step": 59590
    },
    {
      "epoch": 0.20888314109775802,
      "grad_norm": 2.90625,
      "learning_rate": 3.4812906692835203e-05,
      "loss": 1.0513,
      "step": 59600
    },
    {
      "epoch": 0.2089181886046536,
      "grad_norm": 3.296875,
      "learning_rate": 3.4818747882568725e-05,
      "loss": 0.9685,
      "step": 59610
    },
    {
      "epoch": 0.2089532361115492,
      "grad_norm": 3.390625,
      "learning_rate": 3.4824589072302246e-05,
      "loss": 0.9294,
      "step": 59620
    },
    {
      "epoch": 0.2089882836184448,
      "grad_norm": 2.71875,
      "learning_rate": 3.4830430262035775e-05,
      "loss": 0.9634,
      "step": 59630
    },
    {
      "epoch": 0.2090233311253404,
      "grad_norm": 3.453125,
      "learning_rate": 3.4836271451769296e-05,
      "loss": 0.9744,
      "step": 59640
    },
    {
      "epoch": 0.209058378632236,
      "grad_norm": 3.234375,
      "learning_rate": 3.4842112641502824e-05,
      "loss": 0.9752,
      "step": 59650
    },
    {
      "epoch": 0.2090934261391316,
      "grad_norm": 2.921875,
      "learning_rate": 3.4847953831236346e-05,
      "loss": 0.9243,
      "step": 59660
    },
    {
      "epoch": 0.20912847364602719,
      "grad_norm": 3.09375,
      "learning_rate": 3.485379502096987e-05,
      "loss": 1.042,
      "step": 59670
    },
    {
      "epoch": 0.20916352115292278,
      "grad_norm": 2.875,
      "learning_rate": 3.48596362107034e-05,
      "loss": 0.9632,
      "step": 59680
    },
    {
      "epoch": 0.2091985686598184,
      "grad_norm": 3.453125,
      "learning_rate": 3.4865477400436924e-05,
      "loss": 0.9538,
      "step": 59690
    },
    {
      "epoch": 0.209233616166714,
      "grad_norm": 2.984375,
      "learning_rate": 3.487131859017045e-05,
      "loss": 1.0035,
      "step": 59700
    },
    {
      "epoch": 0.20926866367360958,
      "grad_norm": 3.4375,
      "learning_rate": 3.487715977990397e-05,
      "loss": 0.9065,
      "step": 59710
    },
    {
      "epoch": 0.20930371118050517,
      "grad_norm": 2.9375,
      "learning_rate": 3.4883000969637495e-05,
      "loss": 1.0141,
      "step": 59720
    },
    {
      "epoch": 0.20933875868740076,
      "grad_norm": 3.328125,
      "learning_rate": 3.488884215937102e-05,
      "loss": 0.9515,
      "step": 59730
    },
    {
      "epoch": 0.20937380619429638,
      "grad_norm": 2.953125,
      "learning_rate": 3.4894683349104544e-05,
      "loss": 0.982,
      "step": 59740
    },
    {
      "epoch": 0.20940885370119197,
      "grad_norm": 2.859375,
      "learning_rate": 3.490052453883807e-05,
      "loss": 1.0251,
      "step": 59750
    },
    {
      "epoch": 0.20944390120808756,
      "grad_norm": 3.5625,
      "learning_rate": 3.49063657285716e-05,
      "loss": 1.0756,
      "step": 59760
    },
    {
      "epoch": 0.20947894871498315,
      "grad_norm": 3.765625,
      "learning_rate": 3.491220691830512e-05,
      "loss": 1.0182,
      "step": 59770
    },
    {
      "epoch": 0.20951399622187875,
      "grad_norm": 2.859375,
      "learning_rate": 3.491804810803865e-05,
      "loss": 0.9818,
      "step": 59780
    },
    {
      "epoch": 0.20954904372877436,
      "grad_norm": 3.171875,
      "learning_rate": 3.492388929777217e-05,
      "loss": 0.9824,
      "step": 59790
    },
    {
      "epoch": 0.20958409123566996,
      "grad_norm": 3.265625,
      "learning_rate": 3.4929730487505694e-05,
      "loss": 1.0478,
      "step": 59800
    },
    {
      "epoch": 0.20961913874256555,
      "grad_norm": 3.28125,
      "learning_rate": 3.493557167723922e-05,
      "loss": 0.9191,
      "step": 59810
    },
    {
      "epoch": 0.20965418624946114,
      "grad_norm": 3.1875,
      "learning_rate": 3.494141286697274e-05,
      "loss": 0.9864,
      "step": 59820
    },
    {
      "epoch": 0.20968923375635673,
      "grad_norm": 3.03125,
      "learning_rate": 3.494725405670627e-05,
      "loss": 0.9558,
      "step": 59830
    },
    {
      "epoch": 0.20972428126325235,
      "grad_norm": 2.890625,
      "learning_rate": 3.49530952464398e-05,
      "loss": 0.9901,
      "step": 59840
    },
    {
      "epoch": 0.20975932877014794,
      "grad_norm": 3.046875,
      "learning_rate": 3.495893643617332e-05,
      "loss": 0.98,
      "step": 59850
    },
    {
      "epoch": 0.20979437627704353,
      "grad_norm": 3.171875,
      "learning_rate": 3.496477762590685e-05,
      "loss": 1.032,
      "step": 59860
    },
    {
      "epoch": 0.20982942378393912,
      "grad_norm": 3.234375,
      "learning_rate": 3.497061881564037e-05,
      "loss": 0.9758,
      "step": 59870
    },
    {
      "epoch": 0.20986447129083471,
      "grad_norm": 3.28125,
      "learning_rate": 3.497646000537389e-05,
      "loss": 0.985,
      "step": 59880
    },
    {
      "epoch": 0.20989951879773033,
      "grad_norm": 3.0625,
      "learning_rate": 3.498230119510742e-05,
      "loss": 0.9344,
      "step": 59890
    },
    {
      "epoch": 0.20993456630462592,
      "grad_norm": 3.453125,
      "learning_rate": 3.498814238484094e-05,
      "loss": 1.0115,
      "step": 59900
    },
    {
      "epoch": 0.20996961381152152,
      "grad_norm": 3.171875,
      "learning_rate": 3.499398357457448e-05,
      "loss": 1.0121,
      "step": 59910
    },
    {
      "epoch": 0.2100046613184171,
      "grad_norm": 3.03125,
      "learning_rate": 3.4999824764308e-05,
      "loss": 0.9835,
      "step": 59920
    },
    {
      "epoch": 0.2100397088253127,
      "grad_norm": 3.375,
      "learning_rate": 3.500566595404152e-05,
      "loss": 1.0418,
      "step": 59930
    },
    {
      "epoch": 0.21007475633220832,
      "grad_norm": 2.8125,
      "learning_rate": 3.501150714377505e-05,
      "loss": 0.9595,
      "step": 59940
    },
    {
      "epoch": 0.2101098038391039,
      "grad_norm": 3.328125,
      "learning_rate": 3.501734833350857e-05,
      "loss": 1.0841,
      "step": 59950
    },
    {
      "epoch": 0.2101448513459995,
      "grad_norm": 2.984375,
      "learning_rate": 3.50231895232421e-05,
      "loss": 0.9657,
      "step": 59960
    },
    {
      "epoch": 0.2101798988528951,
      "grad_norm": 3.453125,
      "learning_rate": 3.502903071297562e-05,
      "loss": 0.9895,
      "step": 59970
    },
    {
      "epoch": 0.21021494635979068,
      "grad_norm": 3.15625,
      "learning_rate": 3.503487190270914e-05,
      "loss": 1.1138,
      "step": 59980
    },
    {
      "epoch": 0.2102499938666863,
      "grad_norm": 3.640625,
      "learning_rate": 3.5040713092442676e-05,
      "loss": 1.0616,
      "step": 59990
    },
    {
      "epoch": 0.2102850413735819,
      "grad_norm": 3.484375,
      "learning_rate": 3.50465542821762e-05,
      "loss": 1.0346,
      "step": 60000
    },
    {
      "epoch": 0.2102850413735819,
      "eval_loss": 0.9314480423927307,
      "eval_runtime": 551.1251,
      "eval_samples_per_second": 690.29,
      "eval_steps_per_second": 57.524,
      "step": 60000
    },
    {
      "epoch": 0.21032008888047748,
      "grad_norm": 3.328125,
      "learning_rate": 3.505239547190972e-05,
      "loss": 1.0707,
      "step": 60010
    },
    {
      "epoch": 0.21035513638737308,
      "grad_norm": 3.140625,
      "learning_rate": 3.505823666164325e-05,
      "loss": 0.9178,
      "step": 60020
    },
    {
      "epoch": 0.21039018389426867,
      "grad_norm": 3.3125,
      "learning_rate": 3.506407785137677e-05,
      "loss": 1.0152,
      "step": 60030
    },
    {
      "epoch": 0.2104252314011643,
      "grad_norm": 3.390625,
      "learning_rate": 3.5069919041110297e-05,
      "loss": 1.008,
      "step": 60040
    },
    {
      "epoch": 0.21046027890805988,
      "grad_norm": 2.828125,
      "learning_rate": 3.507576023084382e-05,
      "loss": 1.0009,
      "step": 60050
    },
    {
      "epoch": 0.21049532641495547,
      "grad_norm": 3.515625,
      "learning_rate": 3.508160142057734e-05,
      "loss": 0.9393,
      "step": 60060
    },
    {
      "epoch": 0.21053037392185106,
      "grad_norm": 3.578125,
      "learning_rate": 3.5087442610310875e-05,
      "loss": 1.0068,
      "step": 60070
    },
    {
      "epoch": 0.21056542142874668,
      "grad_norm": 3.671875,
      "learning_rate": 3.5093283800044396e-05,
      "loss": 1.0023,
      "step": 60080
    },
    {
      "epoch": 0.21060046893564227,
      "grad_norm": 3.28125,
      "learning_rate": 3.509912498977792e-05,
      "loss": 0.9836,
      "step": 60090
    },
    {
      "epoch": 0.21063551644253786,
      "grad_norm": 2.71875,
      "learning_rate": 3.5104966179511446e-05,
      "loss": 0.9826,
      "step": 60100
    },
    {
      "epoch": 0.21067056394943345,
      "grad_norm": 3.546875,
      "learning_rate": 3.511080736924497e-05,
      "loss": 1.001,
      "step": 60110
    },
    {
      "epoch": 0.21070561145632904,
      "grad_norm": 3.40625,
      "learning_rate": 3.5116648558978495e-05,
      "loss": 1.0644,
      "step": 60120
    },
    {
      "epoch": 0.21074065896322466,
      "grad_norm": 3.4375,
      "learning_rate": 3.512248974871202e-05,
      "loss": 1.1309,
      "step": 60130
    },
    {
      "epoch": 0.21077570647012026,
      "grad_norm": 3.390625,
      "learning_rate": 3.5128330938445545e-05,
      "loss": 0.9607,
      "step": 60140
    },
    {
      "epoch": 0.21081075397701585,
      "grad_norm": 3.484375,
      "learning_rate": 3.513417212817907e-05,
      "loss": 0.9726,
      "step": 60150
    },
    {
      "epoch": 0.21084580148391144,
      "grad_norm": 3.609375,
      "learning_rate": 3.5140013317912595e-05,
      "loss": 1.0552,
      "step": 60160
    },
    {
      "epoch": 0.21088084899080703,
      "grad_norm": 3.015625,
      "learning_rate": 3.514585450764612e-05,
      "loss": 1.0729,
      "step": 60170
    },
    {
      "epoch": 0.21091589649770265,
      "grad_norm": 3.015625,
      "learning_rate": 3.5151695697379644e-05,
      "loss": 1.105,
      "step": 60180
    },
    {
      "epoch": 0.21095094400459824,
      "grad_norm": 3.71875,
      "learning_rate": 3.5157536887113166e-05,
      "loss": 0.9619,
      "step": 60190
    },
    {
      "epoch": 0.21098599151149383,
      "grad_norm": 2.9375,
      "learning_rate": 3.5163378076846694e-05,
      "loss": 1.0557,
      "step": 60200
    },
    {
      "epoch": 0.21102103901838942,
      "grad_norm": 3.0,
      "learning_rate": 3.5169219266580216e-05,
      "loss": 0.938,
      "step": 60210
    },
    {
      "epoch": 0.211056086525285,
      "grad_norm": 3.421875,
      "learning_rate": 3.5175060456313744e-05,
      "loss": 0.9859,
      "step": 60220
    },
    {
      "epoch": 0.21109113403218063,
      "grad_norm": 3.3125,
      "learning_rate": 3.518090164604727e-05,
      "loss": 0.9751,
      "step": 60230
    },
    {
      "epoch": 0.21112618153907622,
      "grad_norm": 3.5625,
      "learning_rate": 3.5186742835780793e-05,
      "loss": 1.1284,
      "step": 60240
    },
    {
      "epoch": 0.21116122904597182,
      "grad_norm": 3.03125,
      "learning_rate": 3.519258402551432e-05,
      "loss": 0.9582,
      "step": 60250
    },
    {
      "epoch": 0.2111962765528674,
      "grad_norm": 3.578125,
      "learning_rate": 3.519842521524784e-05,
      "loss": 1.0448,
      "step": 60260
    },
    {
      "epoch": 0.211231324059763,
      "grad_norm": 3.265625,
      "learning_rate": 3.5204266404981365e-05,
      "loss": 0.9555,
      "step": 60270
    },
    {
      "epoch": 0.21126637156665862,
      "grad_norm": 3.15625,
      "learning_rate": 3.521010759471489e-05,
      "loss": 1.0046,
      "step": 60280
    },
    {
      "epoch": 0.2113014190735542,
      "grad_norm": 2.859375,
      "learning_rate": 3.5215948784448414e-05,
      "loss": 0.9986,
      "step": 60290
    },
    {
      "epoch": 0.2113364665804498,
      "grad_norm": 2.9375,
      "learning_rate": 3.522178997418194e-05,
      "loss": 0.9127,
      "step": 60300
    },
    {
      "epoch": 0.2113715140873454,
      "grad_norm": 2.953125,
      "learning_rate": 3.522763116391547e-05,
      "loss": 1.0201,
      "step": 60310
    },
    {
      "epoch": 0.21140656159424098,
      "grad_norm": 2.96875,
      "learning_rate": 3.523347235364899e-05,
      "loss": 0.9767,
      "step": 60320
    },
    {
      "epoch": 0.2114416091011366,
      "grad_norm": 3.0625,
      "learning_rate": 3.523931354338252e-05,
      "loss": 0.9804,
      "step": 60330
    },
    {
      "epoch": 0.2114766566080322,
      "grad_norm": 3.1875,
      "learning_rate": 3.524515473311604e-05,
      "loss": 0.9045,
      "step": 60340
    },
    {
      "epoch": 0.21151170411492778,
      "grad_norm": 3.296875,
      "learning_rate": 3.5250995922849563e-05,
      "loss": 0.9402,
      "step": 60350
    },
    {
      "epoch": 0.21154675162182338,
      "grad_norm": 2.6875,
      "learning_rate": 3.525683711258309e-05,
      "loss": 0.9738,
      "step": 60360
    },
    {
      "epoch": 0.21158179912871897,
      "grad_norm": 3.53125,
      "learning_rate": 3.526267830231661e-05,
      "loss": 1.0092,
      "step": 60370
    },
    {
      "epoch": 0.21161684663561459,
      "grad_norm": 3.5625,
      "learning_rate": 3.526851949205015e-05,
      "loss": 1.0052,
      "step": 60380
    },
    {
      "epoch": 0.21165189414251018,
      "grad_norm": 2.796875,
      "learning_rate": 3.527436068178367e-05,
      "loss": 0.9789,
      "step": 60390
    },
    {
      "epoch": 0.21168694164940577,
      "grad_norm": 3.234375,
      "learning_rate": 3.528020187151719e-05,
      "loss": 1.0112,
      "step": 60400
    },
    {
      "epoch": 0.21172198915630136,
      "grad_norm": 2.9375,
      "learning_rate": 3.528604306125072e-05,
      "loss": 0.9728,
      "step": 60410
    },
    {
      "epoch": 0.21175703666319695,
      "grad_norm": 3.421875,
      "learning_rate": 3.529188425098424e-05,
      "loss": 0.9203,
      "step": 60420
    },
    {
      "epoch": 0.21179208417009257,
      "grad_norm": 3.0625,
      "learning_rate": 3.529772544071777e-05,
      "loss": 1.0235,
      "step": 60430
    },
    {
      "epoch": 0.21182713167698816,
      "grad_norm": 4.78125,
      "learning_rate": 3.530356663045129e-05,
      "loss": 0.971,
      "step": 60440
    },
    {
      "epoch": 0.21186217918388375,
      "grad_norm": 3.375,
      "learning_rate": 3.530940782018481e-05,
      "loss": 0.9518,
      "step": 60450
    },
    {
      "epoch": 0.21189722669077934,
      "grad_norm": 2.875,
      "learning_rate": 3.531524900991835e-05,
      "loss": 1.0173,
      "step": 60460
    },
    {
      "epoch": 0.21193227419767494,
      "grad_norm": 3.34375,
      "learning_rate": 3.532109019965187e-05,
      "loss": 1.0573,
      "step": 60470
    },
    {
      "epoch": 0.21196732170457055,
      "grad_norm": 2.984375,
      "learning_rate": 3.532693138938539e-05,
      "loss": 1.0238,
      "step": 60480
    },
    {
      "epoch": 0.21200236921146615,
      "grad_norm": 2.703125,
      "learning_rate": 3.533277257911892e-05,
      "loss": 0.9118,
      "step": 60490
    },
    {
      "epoch": 0.21203741671836174,
      "grad_norm": 3.078125,
      "learning_rate": 3.533861376885244e-05,
      "loss": 0.9645,
      "step": 60500
    },
    {
      "epoch": 0.21207246422525733,
      "grad_norm": 3.09375,
      "learning_rate": 3.534445495858597e-05,
      "loss": 1.0449,
      "step": 60510
    },
    {
      "epoch": 0.21210751173215292,
      "grad_norm": 2.703125,
      "learning_rate": 3.535029614831949e-05,
      "loss": 1.0293,
      "step": 60520
    },
    {
      "epoch": 0.21214255923904854,
      "grad_norm": 3.359375,
      "learning_rate": 3.535613733805301e-05,
      "loss": 0.9785,
      "step": 60530
    },
    {
      "epoch": 0.21217760674594413,
      "grad_norm": 3.25,
      "learning_rate": 3.5361978527786546e-05,
      "loss": 0.9853,
      "step": 60540
    },
    {
      "epoch": 0.21221265425283972,
      "grad_norm": 2.9375,
      "learning_rate": 3.536781971752007e-05,
      "loss": 1.004,
      "step": 60550
    },
    {
      "epoch": 0.2122477017597353,
      "grad_norm": 3.328125,
      "learning_rate": 3.537366090725359e-05,
      "loss": 0.983,
      "step": 60560
    },
    {
      "epoch": 0.2122827492666309,
      "grad_norm": 3.015625,
      "learning_rate": 3.537950209698712e-05,
      "loss": 0.9204,
      "step": 60570
    },
    {
      "epoch": 0.21231779677352652,
      "grad_norm": 3.078125,
      "learning_rate": 3.538534328672064e-05,
      "loss": 0.9763,
      "step": 60580
    },
    {
      "epoch": 0.21235284428042212,
      "grad_norm": 3.3125,
      "learning_rate": 3.5391184476454166e-05,
      "loss": 1.0156,
      "step": 60590
    },
    {
      "epoch": 0.2123878917873177,
      "grad_norm": 3.5,
      "learning_rate": 3.539702566618769e-05,
      "loss": 0.9789,
      "step": 60600
    },
    {
      "epoch": 0.2124229392942133,
      "grad_norm": 2.640625,
      "learning_rate": 3.5402866855921216e-05,
      "loss": 0.9876,
      "step": 60610
    },
    {
      "epoch": 0.21245798680110892,
      "grad_norm": 3.21875,
      "learning_rate": 3.5408708045654744e-05,
      "loss": 0.9941,
      "step": 60620
    },
    {
      "epoch": 0.2124930343080045,
      "grad_norm": 3.46875,
      "learning_rate": 3.5414549235388266e-05,
      "loss": 1.0343,
      "step": 60630
    },
    {
      "epoch": 0.2125280818149001,
      "grad_norm": 3.359375,
      "learning_rate": 3.5420390425121794e-05,
      "loss": 1.0748,
      "step": 60640
    },
    {
      "epoch": 0.2125631293217957,
      "grad_norm": 3.140625,
      "learning_rate": 3.5426231614855316e-05,
      "loss": 1.0255,
      "step": 60650
    },
    {
      "epoch": 0.21259817682869128,
      "grad_norm": 3.28125,
      "learning_rate": 3.543207280458884e-05,
      "loss": 0.9727,
      "step": 60660
    },
    {
      "epoch": 0.2126332243355869,
      "grad_norm": 3.453125,
      "learning_rate": 3.5437913994322365e-05,
      "loss": 1.0825,
      "step": 60670
    },
    {
      "epoch": 0.2126682718424825,
      "grad_norm": 3.34375,
      "learning_rate": 3.544375518405589e-05,
      "loss": 1.0541,
      "step": 60680
    },
    {
      "epoch": 0.21270331934937808,
      "grad_norm": 3.21875,
      "learning_rate": 3.5449596373789415e-05,
      "loss": 0.9608,
      "step": 60690
    },
    {
      "epoch": 0.21273836685627368,
      "grad_norm": 3.21875,
      "learning_rate": 3.545543756352294e-05,
      "loss": 0.9096,
      "step": 60700
    },
    {
      "epoch": 0.21277341436316927,
      "grad_norm": 3.03125,
      "learning_rate": 3.5461278753256465e-05,
      "loss": 0.9427,
      "step": 60710
    },
    {
      "epoch": 0.21280846187006489,
      "grad_norm": 3.078125,
      "learning_rate": 3.546711994298999e-05,
      "loss": 1.0512,
      "step": 60720
    },
    {
      "epoch": 0.21284350937696048,
      "grad_norm": 3.234375,
      "learning_rate": 3.5472961132723514e-05,
      "loss": 1.0385,
      "step": 60730
    },
    {
      "epoch": 0.21287855688385607,
      "grad_norm": 3.234375,
      "learning_rate": 3.5478802322457036e-05,
      "loss": 1.0055,
      "step": 60740
    },
    {
      "epoch": 0.21291360439075166,
      "grad_norm": 3.328125,
      "learning_rate": 3.5484643512190564e-05,
      "loss": 1.0077,
      "step": 60750
    },
    {
      "epoch": 0.21294865189764725,
      "grad_norm": 3.1875,
      "learning_rate": 3.5490484701924085e-05,
      "loss": 0.9883,
      "step": 60760
    },
    {
      "epoch": 0.21298369940454287,
      "grad_norm": 3.171875,
      "learning_rate": 3.5496325891657614e-05,
      "loss": 1.0076,
      "step": 60770
    },
    {
      "epoch": 0.21301874691143846,
      "grad_norm": 3.15625,
      "learning_rate": 3.550216708139114e-05,
      "loss": 1.0307,
      "step": 60780
    },
    {
      "epoch": 0.21305379441833405,
      "grad_norm": 3.09375,
      "learning_rate": 3.550800827112466e-05,
      "loss": 0.9909,
      "step": 60790
    },
    {
      "epoch": 0.21308884192522964,
      "grad_norm": 2.9375,
      "learning_rate": 3.551384946085819e-05,
      "loss": 0.9692,
      "step": 60800
    },
    {
      "epoch": 0.21312388943212524,
      "grad_norm": 3.09375,
      "learning_rate": 3.551969065059171e-05,
      "loss": 1.0184,
      "step": 60810
    },
    {
      "epoch": 0.21315893693902085,
      "grad_norm": 3.0625,
      "learning_rate": 3.552553184032524e-05,
      "loss": 1.0334,
      "step": 60820
    },
    {
      "epoch": 0.21319398444591645,
      "grad_norm": 3.265625,
      "learning_rate": 3.553137303005876e-05,
      "loss": 0.9468,
      "step": 60830
    },
    {
      "epoch": 0.21322903195281204,
      "grad_norm": 3.140625,
      "learning_rate": 3.5537214219792284e-05,
      "loss": 0.9131,
      "step": 60840
    },
    {
      "epoch": 0.21326407945970763,
      "grad_norm": 3.09375,
      "learning_rate": 3.554305540952582e-05,
      "loss": 0.9949,
      "step": 60850
    },
    {
      "epoch": 0.21329912696660322,
      "grad_norm": 3.109375,
      "learning_rate": 3.554889659925934e-05,
      "loss": 1.0313,
      "step": 60860
    },
    {
      "epoch": 0.21333417447349884,
      "grad_norm": 3.328125,
      "learning_rate": 3.555473778899286e-05,
      "loss": 1.0255,
      "step": 60870
    },
    {
      "epoch": 0.21336922198039443,
      "grad_norm": 3.078125,
      "learning_rate": 3.556057897872639e-05,
      "loss": 0.8862,
      "step": 60880
    },
    {
      "epoch": 0.21340426948729002,
      "grad_norm": 3.390625,
      "learning_rate": 3.556642016845991e-05,
      "loss": 1.0283,
      "step": 60890
    },
    {
      "epoch": 0.2134393169941856,
      "grad_norm": 3.53125,
      "learning_rate": 3.557226135819344e-05,
      "loss": 0.9825,
      "step": 60900
    },
    {
      "epoch": 0.2134743645010812,
      "grad_norm": 3.28125,
      "learning_rate": 3.557810254792696e-05,
      "loss": 1.0021,
      "step": 60910
    },
    {
      "epoch": 0.21350941200797682,
      "grad_norm": 3.390625,
      "learning_rate": 3.558394373766049e-05,
      "loss": 0.9729,
      "step": 60920
    },
    {
      "epoch": 0.21354445951487241,
      "grad_norm": 3.0625,
      "learning_rate": 3.558978492739402e-05,
      "loss": 0.9813,
      "step": 60930
    },
    {
      "epoch": 0.213579507021768,
      "grad_norm": 3.109375,
      "learning_rate": 3.559562611712754e-05,
      "loss": 1.0331,
      "step": 60940
    },
    {
      "epoch": 0.2136145545286636,
      "grad_norm": 3.015625,
      "learning_rate": 3.560146730686106e-05,
      "loss": 0.9565,
      "step": 60950
    },
    {
      "epoch": 0.2136496020355592,
      "grad_norm": 3.28125,
      "learning_rate": 3.560730849659459e-05,
      "loss": 0.9117,
      "step": 60960
    },
    {
      "epoch": 0.2136846495424548,
      "grad_norm": 3.546875,
      "learning_rate": 3.561314968632811e-05,
      "loss": 0.9912,
      "step": 60970
    },
    {
      "epoch": 0.2137196970493504,
      "grad_norm": 3.03125,
      "learning_rate": 3.561899087606164e-05,
      "loss": 1.0124,
      "step": 60980
    },
    {
      "epoch": 0.213754744556246,
      "grad_norm": 3.765625,
      "learning_rate": 3.562483206579516e-05,
      "loss": 1.0306,
      "step": 60990
    },
    {
      "epoch": 0.21378979206314158,
      "grad_norm": 3.078125,
      "learning_rate": 3.563067325552869e-05,
      "loss": 0.9641,
      "step": 61000
    },
    {
      "epoch": 0.21382483957003717,
      "grad_norm": 3.046875,
      "learning_rate": 3.563651444526222e-05,
      "loss": 0.9937,
      "step": 61010
    },
    {
      "epoch": 0.2138598870769328,
      "grad_norm": 3.375,
      "learning_rate": 3.564235563499574e-05,
      "loss": 0.9371,
      "step": 61020
    },
    {
      "epoch": 0.21389493458382838,
      "grad_norm": 3.390625,
      "learning_rate": 3.5648196824729266e-05,
      "loss": 0.9733,
      "step": 61030
    },
    {
      "epoch": 0.21392998209072397,
      "grad_norm": 3.21875,
      "learning_rate": 3.565403801446279e-05,
      "loss": 1.0235,
      "step": 61040
    },
    {
      "epoch": 0.21396502959761957,
      "grad_norm": 3.171875,
      "learning_rate": 3.565987920419631e-05,
      "loss": 0.9267,
      "step": 61050
    },
    {
      "epoch": 0.21400007710451516,
      "grad_norm": 3.015625,
      "learning_rate": 3.566572039392984e-05,
      "loss": 0.9722,
      "step": 61060
    },
    {
      "epoch": 0.21403512461141078,
      "grad_norm": 3.5625,
      "learning_rate": 3.567156158366336e-05,
      "loss": 0.9666,
      "step": 61070
    },
    {
      "epoch": 0.21407017211830637,
      "grad_norm": 3.0,
      "learning_rate": 3.567740277339689e-05,
      "loss": 0.9425,
      "step": 61080
    },
    {
      "epoch": 0.21410521962520196,
      "grad_norm": 3.46875,
      "learning_rate": 3.5683243963130415e-05,
      "loss": 1.0219,
      "step": 61090
    },
    {
      "epoch": 0.21414026713209755,
      "grad_norm": 3.421875,
      "learning_rate": 3.568908515286394e-05,
      "loss": 1.0057,
      "step": 61100
    },
    {
      "epoch": 0.21417531463899314,
      "grad_norm": 2.515625,
      "learning_rate": 3.5694926342597465e-05,
      "loss": 0.911,
      "step": 61110
    },
    {
      "epoch": 0.21421036214588876,
      "grad_norm": 3.40625,
      "learning_rate": 3.5700767532330987e-05,
      "loss": 1.0895,
      "step": 61120
    },
    {
      "epoch": 0.21424540965278435,
      "grad_norm": 2.890625,
      "learning_rate": 3.570660872206451e-05,
      "loss": 1.0614,
      "step": 61130
    },
    {
      "epoch": 0.21428045715967994,
      "grad_norm": 3.15625,
      "learning_rate": 3.5712449911798036e-05,
      "loss": 0.9536,
      "step": 61140
    },
    {
      "epoch": 0.21431550466657553,
      "grad_norm": 3.171875,
      "learning_rate": 3.571829110153156e-05,
      "loss": 1.0461,
      "step": 61150
    },
    {
      "epoch": 0.21435055217347115,
      "grad_norm": 3.265625,
      "learning_rate": 3.5724132291265086e-05,
      "loss": 0.9645,
      "step": 61160
    },
    {
      "epoch": 0.21438559968036675,
      "grad_norm": 3.640625,
      "learning_rate": 3.5729973480998614e-05,
      "loss": 1.0276,
      "step": 61170
    },
    {
      "epoch": 0.21442064718726234,
      "grad_norm": 3.0,
      "learning_rate": 3.5735814670732136e-05,
      "loss": 0.9282,
      "step": 61180
    },
    {
      "epoch": 0.21445569469415793,
      "grad_norm": 3.046875,
      "learning_rate": 3.5741655860465664e-05,
      "loss": 0.9663,
      "step": 61190
    },
    {
      "epoch": 0.21449074220105352,
      "grad_norm": 3.328125,
      "learning_rate": 3.5747497050199185e-05,
      "loss": 0.9954,
      "step": 61200
    },
    {
      "epoch": 0.21452578970794914,
      "grad_norm": 3.984375,
      "learning_rate": 3.575333823993271e-05,
      "loss": 0.9909,
      "step": 61210
    },
    {
      "epoch": 0.21456083721484473,
      "grad_norm": 3.21875,
      "learning_rate": 3.5759179429666235e-05,
      "loss": 0.9885,
      "step": 61220
    },
    {
      "epoch": 0.21459588472174032,
      "grad_norm": 3.15625,
      "learning_rate": 3.5765020619399756e-05,
      "loss": 1.0361,
      "step": 61230
    },
    {
      "epoch": 0.2146309322286359,
      "grad_norm": 3.25,
      "learning_rate": 3.5770861809133285e-05,
      "loss": 0.9606,
      "step": 61240
    },
    {
      "epoch": 0.2146659797355315,
      "grad_norm": 3.125,
      "learning_rate": 3.577670299886681e-05,
      "loss": 0.9575,
      "step": 61250
    },
    {
      "epoch": 0.21470102724242712,
      "grad_norm": 2.71875,
      "learning_rate": 3.5782544188600334e-05,
      "loss": 0.9926,
      "step": 61260
    },
    {
      "epoch": 0.21473607474932271,
      "grad_norm": 3.1875,
      "learning_rate": 3.578838537833386e-05,
      "loss": 0.9477,
      "step": 61270
    },
    {
      "epoch": 0.2147711222562183,
      "grad_norm": 3.25,
      "learning_rate": 3.5794226568067384e-05,
      "loss": 1.0188,
      "step": 61280
    },
    {
      "epoch": 0.2148061697631139,
      "grad_norm": 3.296875,
      "learning_rate": 3.580006775780091e-05,
      "loss": 0.98,
      "step": 61290
    },
    {
      "epoch": 0.2148412172700095,
      "grad_norm": 3.234375,
      "learning_rate": 3.5805908947534434e-05,
      "loss": 0.9486,
      "step": 61300
    },
    {
      "epoch": 0.2148762647769051,
      "grad_norm": 3.03125,
      "learning_rate": 3.581175013726796e-05,
      "loss": 1.059,
      "step": 61310
    },
    {
      "epoch": 0.2149113122838007,
      "grad_norm": 3.484375,
      "learning_rate": 3.581759132700149e-05,
      "loss": 0.9634,
      "step": 61320
    },
    {
      "epoch": 0.2149463597906963,
      "grad_norm": 3.53125,
      "learning_rate": 3.582343251673501e-05,
      "loss": 0.9691,
      "step": 61330
    },
    {
      "epoch": 0.21498140729759188,
      "grad_norm": 3.1875,
      "learning_rate": 3.582927370646853e-05,
      "loss": 0.9946,
      "step": 61340
    },
    {
      "epoch": 0.21501645480448747,
      "grad_norm": 3.234375,
      "learning_rate": 3.583511489620206e-05,
      "loss": 0.9238,
      "step": 61350
    },
    {
      "epoch": 0.2150515023113831,
      "grad_norm": 3.265625,
      "learning_rate": 3.584095608593558e-05,
      "loss": 1.0821,
      "step": 61360
    },
    {
      "epoch": 0.21508654981827868,
      "grad_norm": 3.375,
      "learning_rate": 3.584679727566911e-05,
      "loss": 1.0322,
      "step": 61370
    },
    {
      "epoch": 0.21512159732517427,
      "grad_norm": 3.15625,
      "learning_rate": 3.585263846540263e-05,
      "loss": 0.9878,
      "step": 61380
    },
    {
      "epoch": 0.21515664483206987,
      "grad_norm": 3.359375,
      "learning_rate": 3.585847965513616e-05,
      "loss": 0.9764,
      "step": 61390
    },
    {
      "epoch": 0.21519169233896546,
      "grad_norm": 3.390625,
      "learning_rate": 3.586432084486969e-05,
      "loss": 0.9101,
      "step": 61400
    },
    {
      "epoch": 0.21522673984586108,
      "grad_norm": 3.1875,
      "learning_rate": 3.587016203460321e-05,
      "loss": 1.0313,
      "step": 61410
    },
    {
      "epoch": 0.21526178735275667,
      "grad_norm": 3.46875,
      "learning_rate": 3.587600322433673e-05,
      "loss": 1.0295,
      "step": 61420
    },
    {
      "epoch": 0.21529683485965226,
      "grad_norm": 2.921875,
      "learning_rate": 3.588184441407026e-05,
      "loss": 0.9605,
      "step": 61430
    },
    {
      "epoch": 0.21533188236654785,
      "grad_norm": 2.953125,
      "learning_rate": 3.588768560380378e-05,
      "loss": 0.9745,
      "step": 61440
    },
    {
      "epoch": 0.21536692987344344,
      "grad_norm": 3.296875,
      "learning_rate": 3.589352679353731e-05,
      "loss": 0.9781,
      "step": 61450
    },
    {
      "epoch": 0.21540197738033906,
      "grad_norm": 3.265625,
      "learning_rate": 3.589936798327083e-05,
      "loss": 1.0041,
      "step": 61460
    },
    {
      "epoch": 0.21543702488723465,
      "grad_norm": 3.0625,
      "learning_rate": 3.590520917300436e-05,
      "loss": 0.9663,
      "step": 61470
    },
    {
      "epoch": 0.21547207239413024,
      "grad_norm": 3.28125,
      "learning_rate": 3.591105036273789e-05,
      "loss": 0.9357,
      "step": 61480
    },
    {
      "epoch": 0.21550711990102583,
      "grad_norm": 3.0625,
      "learning_rate": 3.591689155247141e-05,
      "loss": 1.0011,
      "step": 61490
    },
    {
      "epoch": 0.21554216740792143,
      "grad_norm": 2.828125,
      "learning_rate": 3.592273274220494e-05,
      "loss": 1.0123,
      "step": 61500
    },
    {
      "epoch": 0.21557721491481704,
      "grad_norm": 3.0625,
      "learning_rate": 3.592857393193846e-05,
      "loss": 0.9936,
      "step": 61510
    },
    {
      "epoch": 0.21561226242171264,
      "grad_norm": 3.453125,
      "learning_rate": 3.593441512167198e-05,
      "loss": 1.0506,
      "step": 61520
    },
    {
      "epoch": 0.21564730992860823,
      "grad_norm": 2.6875,
      "learning_rate": 3.594025631140551e-05,
      "loss": 0.9137,
      "step": 61530
    },
    {
      "epoch": 0.21568235743550382,
      "grad_norm": 3.625,
      "learning_rate": 3.594609750113903e-05,
      "loss": 0.9658,
      "step": 61540
    },
    {
      "epoch": 0.2157174049423994,
      "grad_norm": 3.671875,
      "learning_rate": 3.595193869087256e-05,
      "loss": 1.0573,
      "step": 61550
    },
    {
      "epoch": 0.21575245244929503,
      "grad_norm": 3.15625,
      "learning_rate": 3.5957779880606087e-05,
      "loss": 1.0606,
      "step": 61560
    },
    {
      "epoch": 0.21578749995619062,
      "grad_norm": 3.15625,
      "learning_rate": 3.596362107033961e-05,
      "loss": 1.0079,
      "step": 61570
    },
    {
      "epoch": 0.2158225474630862,
      "grad_norm": 3.0625,
      "learning_rate": 3.5969462260073136e-05,
      "loss": 0.9954,
      "step": 61580
    },
    {
      "epoch": 0.2158575949699818,
      "grad_norm": 3.34375,
      "learning_rate": 3.597530344980666e-05,
      "loss": 0.9612,
      "step": 61590
    },
    {
      "epoch": 0.2158926424768774,
      "grad_norm": 3.453125,
      "learning_rate": 3.598114463954018e-05,
      "loss": 1.0011,
      "step": 61600
    },
    {
      "epoch": 0.215927689983773,
      "grad_norm": 3.671875,
      "learning_rate": 3.598698582927371e-05,
      "loss": 0.9813,
      "step": 61610
    },
    {
      "epoch": 0.2159627374906686,
      "grad_norm": 2.640625,
      "learning_rate": 3.5992827019007236e-05,
      "loss": 1.0028,
      "step": 61620
    },
    {
      "epoch": 0.2159977849975642,
      "grad_norm": 3.3125,
      "learning_rate": 3.599866820874076e-05,
      "loss": 1.0417,
      "step": 61630
    },
    {
      "epoch": 0.2160328325044598,
      "grad_norm": 2.96875,
      "learning_rate": 3.6004509398474285e-05,
      "loss": 1.0365,
      "step": 61640
    },
    {
      "epoch": 0.21606788001135538,
      "grad_norm": 2.984375,
      "learning_rate": 3.601035058820781e-05,
      "loss": 1.0212,
      "step": 61650
    },
    {
      "epoch": 0.216102927518251,
      "grad_norm": 3.015625,
      "learning_rate": 3.6016191777941335e-05,
      "loss": 1.0247,
      "step": 61660
    },
    {
      "epoch": 0.2161379750251466,
      "grad_norm": 3.296875,
      "learning_rate": 3.6022032967674856e-05,
      "loss": 1.0451,
      "step": 61670
    },
    {
      "epoch": 0.21617302253204218,
      "grad_norm": 3.140625,
      "learning_rate": 3.602787415740838e-05,
      "loss": 0.9575,
      "step": 61680
    },
    {
      "epoch": 0.21620807003893777,
      "grad_norm": 2.90625,
      "learning_rate": 3.6033715347141906e-05,
      "loss": 1.0124,
      "step": 61690
    },
    {
      "epoch": 0.2162431175458334,
      "grad_norm": 2.90625,
      "learning_rate": 3.6039556536875434e-05,
      "loss": 1.0435,
      "step": 61700
    },
    {
      "epoch": 0.21627816505272898,
      "grad_norm": 3.203125,
      "learning_rate": 3.604539772660896e-05,
      "loss": 1.0162,
      "step": 61710
    },
    {
      "epoch": 0.21631321255962457,
      "grad_norm": 3.1875,
      "learning_rate": 3.6051238916342484e-05,
      "loss": 1.013,
      "step": 61720
    },
    {
      "epoch": 0.21634826006652016,
      "grad_norm": 3.46875,
      "learning_rate": 3.6057080106076005e-05,
      "loss": 0.9963,
      "step": 61730
    },
    {
      "epoch": 0.21638330757341576,
      "grad_norm": 3.265625,
      "learning_rate": 3.6062921295809534e-05,
      "loss": 1.0752,
      "step": 61740
    },
    {
      "epoch": 0.21641835508031138,
      "grad_norm": 3.5,
      "learning_rate": 3.6068762485543055e-05,
      "loss": 0.97,
      "step": 61750
    },
    {
      "epoch": 0.21645340258720697,
      "grad_norm": 2.921875,
      "learning_rate": 3.6074603675276583e-05,
      "loss": 0.9361,
      "step": 61760
    },
    {
      "epoch": 0.21648845009410256,
      "grad_norm": 3.59375,
      "learning_rate": 3.6080444865010105e-05,
      "loss": 1.0424,
      "step": 61770
    },
    {
      "epoch": 0.21652349760099815,
      "grad_norm": 3.390625,
      "learning_rate": 3.608628605474363e-05,
      "loss": 1.0176,
      "step": 61780
    },
    {
      "epoch": 0.21655854510789374,
      "grad_norm": 3.09375,
      "learning_rate": 3.609212724447716e-05,
      "loss": 1.0278,
      "step": 61790
    },
    {
      "epoch": 0.21659359261478936,
      "grad_norm": 3.109375,
      "learning_rate": 3.609796843421068e-05,
      "loss": 1.0847,
      "step": 61800
    },
    {
      "epoch": 0.21662864012168495,
      "grad_norm": 3.453125,
      "learning_rate": 3.6103809623944204e-05,
      "loss": 1.0545,
      "step": 61810
    },
    {
      "epoch": 0.21666368762858054,
      "grad_norm": 3.375,
      "learning_rate": 3.610965081367773e-05,
      "loss": 1.0213,
      "step": 61820
    },
    {
      "epoch": 0.21669873513547613,
      "grad_norm": 3.46875,
      "learning_rate": 3.6115492003411254e-05,
      "loss": 1.0444,
      "step": 61830
    },
    {
      "epoch": 0.21673378264237173,
      "grad_norm": 2.71875,
      "learning_rate": 3.612133319314478e-05,
      "loss": 0.9434,
      "step": 61840
    },
    {
      "epoch": 0.21676883014926734,
      "grad_norm": 2.96875,
      "learning_rate": 3.6127174382878304e-05,
      "loss": 0.931,
      "step": 61850
    },
    {
      "epoch": 0.21680387765616294,
      "grad_norm": 2.59375,
      "learning_rate": 3.613301557261183e-05,
      "loss": 0.9415,
      "step": 61860
    },
    {
      "epoch": 0.21683892516305853,
      "grad_norm": 3.09375,
      "learning_rate": 3.613885676234536e-05,
      "loss": 0.9608,
      "step": 61870
    },
    {
      "epoch": 0.21687397266995412,
      "grad_norm": 3.0625,
      "learning_rate": 3.614469795207888e-05,
      "loss": 0.9444,
      "step": 61880
    },
    {
      "epoch": 0.2169090201768497,
      "grad_norm": 2.890625,
      "learning_rate": 3.61505391418124e-05,
      "loss": 0.9314,
      "step": 61890
    },
    {
      "epoch": 0.21694406768374533,
      "grad_norm": 3.265625,
      "learning_rate": 3.615638033154593e-05,
      "loss": 0.8882,
      "step": 61900
    },
    {
      "epoch": 0.21697911519064092,
      "grad_norm": 3.3125,
      "learning_rate": 3.616222152127945e-05,
      "loss": 0.9736,
      "step": 61910
    },
    {
      "epoch": 0.2170141626975365,
      "grad_norm": 3.328125,
      "learning_rate": 3.616806271101298e-05,
      "loss": 0.9628,
      "step": 61920
    },
    {
      "epoch": 0.2170492102044321,
      "grad_norm": 3.046875,
      "learning_rate": 3.61739039007465e-05,
      "loss": 0.9318,
      "step": 61930
    },
    {
      "epoch": 0.2170842577113277,
      "grad_norm": 3.375,
      "learning_rate": 3.617974509048003e-05,
      "loss": 1.0508,
      "step": 61940
    },
    {
      "epoch": 0.2171193052182233,
      "grad_norm": 2.65625,
      "learning_rate": 3.618558628021356e-05,
      "loss": 0.9155,
      "step": 61950
    },
    {
      "epoch": 0.2171543527251189,
      "grad_norm": 3.015625,
      "learning_rate": 3.619142746994708e-05,
      "loss": 1.0027,
      "step": 61960
    },
    {
      "epoch": 0.2171894002320145,
      "grad_norm": 3.15625,
      "learning_rate": 3.619726865968061e-05,
      "loss": 1.0115,
      "step": 61970
    },
    {
      "epoch": 0.2172244477389101,
      "grad_norm": 3.515625,
      "learning_rate": 3.620310984941413e-05,
      "loss": 0.9866,
      "step": 61980
    },
    {
      "epoch": 0.21725949524580568,
      "grad_norm": 2.984375,
      "learning_rate": 3.620895103914765e-05,
      "loss": 0.9088,
      "step": 61990
    },
    {
      "epoch": 0.2172945427527013,
      "grad_norm": 3.25,
      "learning_rate": 3.621479222888118e-05,
      "loss": 1.0079,
      "step": 62000
    },
    {
      "epoch": 0.2173295902595969,
      "grad_norm": 2.9375,
      "learning_rate": 3.622063341861471e-05,
      "loss": 1.0003,
      "step": 62010
    },
    {
      "epoch": 0.21736463776649248,
      "grad_norm": 3.203125,
      "learning_rate": 3.622647460834823e-05,
      "loss": 0.9899,
      "step": 62020
    },
    {
      "epoch": 0.21739968527338807,
      "grad_norm": 3.3125,
      "learning_rate": 3.623231579808176e-05,
      "loss": 0.9313,
      "step": 62030
    },
    {
      "epoch": 0.21743473278028366,
      "grad_norm": 3.09375,
      "learning_rate": 3.623815698781528e-05,
      "loss": 1.0329,
      "step": 62040
    },
    {
      "epoch": 0.21746978028717928,
      "grad_norm": 2.78125,
      "learning_rate": 3.624399817754881e-05,
      "loss": 0.9268,
      "step": 62050
    },
    {
      "epoch": 0.21750482779407487,
      "grad_norm": 3.203125,
      "learning_rate": 3.624983936728233e-05,
      "loss": 0.9936,
      "step": 62060
    },
    {
      "epoch": 0.21753987530097046,
      "grad_norm": 3.3125,
      "learning_rate": 3.625568055701585e-05,
      "loss": 1.0334,
      "step": 62070
    },
    {
      "epoch": 0.21757492280786606,
      "grad_norm": 3.234375,
      "learning_rate": 3.626152174674938e-05,
      "loss": 1.0008,
      "step": 62080
    },
    {
      "epoch": 0.21760997031476165,
      "grad_norm": 3.265625,
      "learning_rate": 3.626736293648291e-05,
      "loss": 0.9708,
      "step": 62090
    },
    {
      "epoch": 0.21764501782165727,
      "grad_norm": 3.265625,
      "learning_rate": 3.627320412621643e-05,
      "loss": 0.9438,
      "step": 62100
    },
    {
      "epoch": 0.21768006532855286,
      "grad_norm": 3.328125,
      "learning_rate": 3.6279045315949956e-05,
      "loss": 0.9676,
      "step": 62110
    },
    {
      "epoch": 0.21771511283544845,
      "grad_norm": 3.515625,
      "learning_rate": 3.628488650568348e-05,
      "loss": 1.122,
      "step": 62120
    },
    {
      "epoch": 0.21775016034234404,
      "grad_norm": 3.390625,
      "learning_rate": 3.6290727695417006e-05,
      "loss": 1.0067,
      "step": 62130
    },
    {
      "epoch": 0.21778520784923963,
      "grad_norm": 3.40625,
      "learning_rate": 3.629656888515053e-05,
      "loss": 1.0142,
      "step": 62140
    },
    {
      "epoch": 0.21782025535613525,
      "grad_norm": 3.359375,
      "learning_rate": 3.630241007488405e-05,
      "loss": 0.9531,
      "step": 62150
    },
    {
      "epoch": 0.21785530286303084,
      "grad_norm": 3.390625,
      "learning_rate": 3.630825126461758e-05,
      "loss": 0.9856,
      "step": 62160
    },
    {
      "epoch": 0.21789035036992643,
      "grad_norm": 3.484375,
      "learning_rate": 3.6314092454351105e-05,
      "loss": 1.0541,
      "step": 62170
    },
    {
      "epoch": 0.21792539787682202,
      "grad_norm": 3.046875,
      "learning_rate": 3.6319933644084634e-05,
      "loss": 0.9902,
      "step": 62180
    },
    {
      "epoch": 0.21796044538371762,
      "grad_norm": 3.0625,
      "learning_rate": 3.6325774833818155e-05,
      "loss": 0.9731,
      "step": 62190
    },
    {
      "epoch": 0.21799549289061323,
      "grad_norm": 3.25,
      "learning_rate": 3.6331616023551677e-05,
      "loss": 1.0301,
      "step": 62200
    },
    {
      "epoch": 0.21803054039750883,
      "grad_norm": 3.015625,
      "learning_rate": 3.6337457213285205e-05,
      "loss": 0.9716,
      "step": 62210
    },
    {
      "epoch": 0.21806558790440442,
      "grad_norm": 3.1875,
      "learning_rate": 3.6343298403018726e-05,
      "loss": 0.9419,
      "step": 62220
    },
    {
      "epoch": 0.2181006354113,
      "grad_norm": 3.46875,
      "learning_rate": 3.6349139592752254e-05,
      "loss": 1.0805,
      "step": 62230
    },
    {
      "epoch": 0.2181356829181956,
      "grad_norm": 3.328125,
      "learning_rate": 3.6354980782485776e-05,
      "loss": 0.9726,
      "step": 62240
    },
    {
      "epoch": 0.21817073042509122,
      "grad_norm": 3.375,
      "learning_rate": 3.6360821972219304e-05,
      "loss": 1.0059,
      "step": 62250
    },
    {
      "epoch": 0.2182057779319868,
      "grad_norm": 3.78125,
      "learning_rate": 3.636666316195283e-05,
      "loss": 0.9966,
      "step": 62260
    },
    {
      "epoch": 0.2182408254388824,
      "grad_norm": 3.78125,
      "learning_rate": 3.6372504351686354e-05,
      "loss": 0.9344,
      "step": 62270
    },
    {
      "epoch": 0.218275872945778,
      "grad_norm": 3.484375,
      "learning_rate": 3.6378345541419875e-05,
      "loss": 1.0894,
      "step": 62280
    },
    {
      "epoch": 0.2183109204526736,
      "grad_norm": 3.125,
      "learning_rate": 3.6384186731153404e-05,
      "loss": 1.0547,
      "step": 62290
    },
    {
      "epoch": 0.2183459679595692,
      "grad_norm": 3.078125,
      "learning_rate": 3.6390027920886925e-05,
      "loss": 0.9893,
      "step": 62300
    },
    {
      "epoch": 0.2183810154664648,
      "grad_norm": 3.09375,
      "learning_rate": 3.639586911062045e-05,
      "loss": 0.9912,
      "step": 62310
    },
    {
      "epoch": 0.2184160629733604,
      "grad_norm": 2.921875,
      "learning_rate": 3.640171030035398e-05,
      "loss": 1.014,
      "step": 62320
    },
    {
      "epoch": 0.21845111048025598,
      "grad_norm": 2.875,
      "learning_rate": 3.64075514900875e-05,
      "loss": 0.9426,
      "step": 62330
    },
    {
      "epoch": 0.2184861579871516,
      "grad_norm": 2.671875,
      "learning_rate": 3.641339267982103e-05,
      "loss": 0.993,
      "step": 62340
    },
    {
      "epoch": 0.2185212054940472,
      "grad_norm": 3.4375,
      "learning_rate": 3.641923386955455e-05,
      "loss": 1.0344,
      "step": 62350
    },
    {
      "epoch": 0.21855625300094278,
      "grad_norm": 3.421875,
      "learning_rate": 3.6425075059288074e-05,
      "loss": 0.9443,
      "step": 62360
    },
    {
      "epoch": 0.21859130050783837,
      "grad_norm": 2.828125,
      "learning_rate": 3.64309162490216e-05,
      "loss": 1.0137,
      "step": 62370
    },
    {
      "epoch": 0.21862634801473396,
      "grad_norm": 3.40625,
      "learning_rate": 3.6436757438755124e-05,
      "loss": 1.0614,
      "step": 62380
    },
    {
      "epoch": 0.21866139552162958,
      "grad_norm": 3.328125,
      "learning_rate": 3.644259862848865e-05,
      "loss": 0.9574,
      "step": 62390
    },
    {
      "epoch": 0.21869644302852517,
      "grad_norm": 3.421875,
      "learning_rate": 3.644843981822218e-05,
      "loss": 1.0997,
      "step": 62400
    },
    {
      "epoch": 0.21873149053542076,
      "grad_norm": 2.90625,
      "learning_rate": 3.64542810079557e-05,
      "loss": 0.9834,
      "step": 62410
    },
    {
      "epoch": 0.21876653804231636,
      "grad_norm": 3.453125,
      "learning_rate": 3.646012219768923e-05,
      "loss": 1.0444,
      "step": 62420
    },
    {
      "epoch": 0.21880158554921195,
      "grad_norm": 3.328125,
      "learning_rate": 3.646596338742275e-05,
      "loss": 0.9574,
      "step": 62430
    },
    {
      "epoch": 0.21883663305610757,
      "grad_norm": 2.9375,
      "learning_rate": 3.647180457715628e-05,
      "loss": 0.9042,
      "step": 62440
    },
    {
      "epoch": 0.21887168056300316,
      "grad_norm": 3.359375,
      "learning_rate": 3.64776457668898e-05,
      "loss": 1.0671,
      "step": 62450
    },
    {
      "epoch": 0.21890672806989875,
      "grad_norm": 3.0,
      "learning_rate": 3.648348695662332e-05,
      "loss": 0.9737,
      "step": 62460
    },
    {
      "epoch": 0.21894177557679434,
      "grad_norm": 3.296875,
      "learning_rate": 3.648932814635685e-05,
      "loss": 0.9757,
      "step": 62470
    },
    {
      "epoch": 0.21897682308368993,
      "grad_norm": 3.046875,
      "learning_rate": 3.649516933609038e-05,
      "loss": 1.0255,
      "step": 62480
    },
    {
      "epoch": 0.21901187059058555,
      "grad_norm": 3.234375,
      "learning_rate": 3.65010105258239e-05,
      "loss": 0.9736,
      "step": 62490
    },
    {
      "epoch": 0.21904691809748114,
      "grad_norm": 3.34375,
      "learning_rate": 3.650685171555743e-05,
      "loss": 0.9106,
      "step": 62500
    },
    {
      "epoch": 0.21908196560437673,
      "grad_norm": 3.0625,
      "learning_rate": 3.651269290529095e-05,
      "loss": 0.9381,
      "step": 62510
    },
    {
      "epoch": 0.21911701311127232,
      "grad_norm": 3.125,
      "learning_rate": 3.651853409502448e-05,
      "loss": 1.0738,
      "step": 62520
    },
    {
      "epoch": 0.21915206061816792,
      "grad_norm": 4.03125,
      "learning_rate": 3.6524375284758e-05,
      "loss": 1.0946,
      "step": 62530
    },
    {
      "epoch": 0.21918710812506353,
      "grad_norm": 2.765625,
      "learning_rate": 3.653021647449152e-05,
      "loss": 0.9545,
      "step": 62540
    },
    {
      "epoch": 0.21922215563195913,
      "grad_norm": 3.0625,
      "learning_rate": 3.653605766422505e-05,
      "loss": 1.0235,
      "step": 62550
    },
    {
      "epoch": 0.21925720313885472,
      "grad_norm": 2.921875,
      "learning_rate": 3.654189885395858e-05,
      "loss": 0.9346,
      "step": 62560
    },
    {
      "epoch": 0.2192922506457503,
      "grad_norm": 3.078125,
      "learning_rate": 3.65477400436921e-05,
      "loss": 0.9757,
      "step": 62570
    },
    {
      "epoch": 0.2193272981526459,
      "grad_norm": 2.71875,
      "learning_rate": 3.655358123342563e-05,
      "loss": 0.8778,
      "step": 62580
    },
    {
      "epoch": 0.21936234565954152,
      "grad_norm": 3.109375,
      "learning_rate": 3.655942242315915e-05,
      "loss": 0.9821,
      "step": 62590
    },
    {
      "epoch": 0.2193973931664371,
      "grad_norm": 3.25,
      "learning_rate": 3.656526361289268e-05,
      "loss": 0.9675,
      "step": 62600
    },
    {
      "epoch": 0.2194324406733327,
      "grad_norm": 3.0625,
      "learning_rate": 3.65711048026262e-05,
      "loss": 1.012,
      "step": 62610
    },
    {
      "epoch": 0.2194674881802283,
      "grad_norm": 3.0,
      "learning_rate": 3.657694599235973e-05,
      "loss": 0.9638,
      "step": 62620
    },
    {
      "epoch": 0.21950253568712388,
      "grad_norm": 3.46875,
      "learning_rate": 3.658278718209325e-05,
      "loss": 0.8994,
      "step": 62630
    },
    {
      "epoch": 0.2195375831940195,
      "grad_norm": 3.4375,
      "learning_rate": 3.6588628371826777e-05,
      "loss": 1.0218,
      "step": 62640
    },
    {
      "epoch": 0.2195726307009151,
      "grad_norm": 2.8125,
      "learning_rate": 3.6594469561560305e-05,
      "loss": 0.9709,
      "step": 62650
    },
    {
      "epoch": 0.21960767820781069,
      "grad_norm": 2.859375,
      "learning_rate": 3.6600310751293826e-05,
      "loss": 0.8748,
      "step": 62660
    },
    {
      "epoch": 0.21964272571470628,
      "grad_norm": 2.796875,
      "learning_rate": 3.660615194102735e-05,
      "loss": 0.9671,
      "step": 62670
    },
    {
      "epoch": 0.21967777322160187,
      "grad_norm": 2.609375,
      "learning_rate": 3.6611993130760876e-05,
      "loss": 0.9261,
      "step": 62680
    },
    {
      "epoch": 0.2197128207284975,
      "grad_norm": 3.28125,
      "learning_rate": 3.66178343204944e-05,
      "loss": 0.9982,
      "step": 62690
    },
    {
      "epoch": 0.21974786823539308,
      "grad_norm": 3.25,
      "learning_rate": 3.6623675510227926e-05,
      "loss": 0.9851,
      "step": 62700
    },
    {
      "epoch": 0.21978291574228867,
      "grad_norm": 3.375,
      "learning_rate": 3.6629516699961454e-05,
      "loss": 1.0715,
      "step": 62710
    },
    {
      "epoch": 0.21981796324918426,
      "grad_norm": 3.5,
      "learning_rate": 3.6635357889694975e-05,
      "loss": 0.8999,
      "step": 62720
    },
    {
      "epoch": 0.21985301075607985,
      "grad_norm": 3.21875,
      "learning_rate": 3.6641199079428503e-05,
      "loss": 0.9385,
      "step": 62730
    },
    {
      "epoch": 0.21988805826297547,
      "grad_norm": 3.109375,
      "learning_rate": 3.6647040269162025e-05,
      "loss": 0.9887,
      "step": 62740
    },
    {
      "epoch": 0.21992310576987106,
      "grad_norm": 3.328125,
      "learning_rate": 3.6652881458895546e-05,
      "loss": 1.1293,
      "step": 62750
    },
    {
      "epoch": 0.21995815327676665,
      "grad_norm": 3.0,
      "learning_rate": 3.6658722648629075e-05,
      "loss": 0.9139,
      "step": 62760
    },
    {
      "epoch": 0.21999320078366225,
      "grad_norm": 3.015625,
      "learning_rate": 3.6664563838362596e-05,
      "loss": 1.018,
      "step": 62770
    },
    {
      "epoch": 0.22002824829055784,
      "grad_norm": 2.96875,
      "learning_rate": 3.6670405028096124e-05,
      "loss": 1.005,
      "step": 62780
    },
    {
      "epoch": 0.22006329579745346,
      "grad_norm": 3.4375,
      "learning_rate": 3.667624621782965e-05,
      "loss": 0.9457,
      "step": 62790
    },
    {
      "epoch": 0.22009834330434905,
      "grad_norm": 3.125,
      "learning_rate": 3.6682087407563174e-05,
      "loss": 0.9707,
      "step": 62800
    },
    {
      "epoch": 0.22013339081124464,
      "grad_norm": 2.984375,
      "learning_rate": 3.66879285972967e-05,
      "loss": 0.9895,
      "step": 62810
    },
    {
      "epoch": 0.22016843831814023,
      "grad_norm": 3.09375,
      "learning_rate": 3.6693769787030224e-05,
      "loss": 1.0104,
      "step": 62820
    },
    {
      "epoch": 0.22020348582503585,
      "grad_norm": 2.90625,
      "learning_rate": 3.669961097676375e-05,
      "loss": 0.9252,
      "step": 62830
    },
    {
      "epoch": 0.22023853333193144,
      "grad_norm": 3.515625,
      "learning_rate": 3.6705452166497273e-05,
      "loss": 0.9833,
      "step": 62840
    },
    {
      "epoch": 0.22027358083882703,
      "grad_norm": 2.953125,
      "learning_rate": 3.6711293356230795e-05,
      "loss": 0.9469,
      "step": 62850
    },
    {
      "epoch": 0.22030862834572262,
      "grad_norm": 3.3125,
      "learning_rate": 3.671713454596432e-05,
      "loss": 0.9259,
      "step": 62860
    },
    {
      "epoch": 0.22034367585261821,
      "grad_norm": 3.234375,
      "learning_rate": 3.672297573569785e-05,
      "loss": 0.9802,
      "step": 62870
    },
    {
      "epoch": 0.22037872335951383,
      "grad_norm": 3.046875,
      "learning_rate": 3.672881692543137e-05,
      "loss": 1.0592,
      "step": 62880
    },
    {
      "epoch": 0.22041377086640943,
      "grad_norm": 2.953125,
      "learning_rate": 3.67346581151649e-05,
      "loss": 0.9518,
      "step": 62890
    },
    {
      "epoch": 0.22044881837330502,
      "grad_norm": 3.15625,
      "learning_rate": 3.674049930489842e-05,
      "loss": 0.9421,
      "step": 62900
    },
    {
      "epoch": 0.2204838658802006,
      "grad_norm": 3.484375,
      "learning_rate": 3.674634049463195e-05,
      "loss": 0.9395,
      "step": 62910
    },
    {
      "epoch": 0.2205189133870962,
      "grad_norm": 3.34375,
      "learning_rate": 3.675218168436547e-05,
      "loss": 0.9352,
      "step": 62920
    },
    {
      "epoch": 0.22055396089399182,
      "grad_norm": 2.65625,
      "learning_rate": 3.6758022874098994e-05,
      "loss": 0.8913,
      "step": 62930
    },
    {
      "epoch": 0.2205890084008874,
      "grad_norm": 3.25,
      "learning_rate": 3.676386406383252e-05,
      "loss": 0.9635,
      "step": 62940
    },
    {
      "epoch": 0.220624055907783,
      "grad_norm": 3.4375,
      "learning_rate": 3.676970525356605e-05,
      "loss": 0.9701,
      "step": 62950
    },
    {
      "epoch": 0.2206591034146786,
      "grad_norm": 3.171875,
      "learning_rate": 3.677554644329957e-05,
      "loss": 0.9611,
      "step": 62960
    },
    {
      "epoch": 0.22069415092157418,
      "grad_norm": 2.953125,
      "learning_rate": 3.67813876330331e-05,
      "loss": 0.9728,
      "step": 62970
    },
    {
      "epoch": 0.2207291984284698,
      "grad_norm": 3.265625,
      "learning_rate": 3.678722882276662e-05,
      "loss": 1.0024,
      "step": 62980
    },
    {
      "epoch": 0.2207642459353654,
      "grad_norm": 3.4375,
      "learning_rate": 3.679307001250015e-05,
      "loss": 1.0833,
      "step": 62990
    },
    {
      "epoch": 0.22079929344226099,
      "grad_norm": 3.203125,
      "learning_rate": 3.679891120223367e-05,
      "loss": 0.9926,
      "step": 63000
    },
    {
      "epoch": 0.22083434094915658,
      "grad_norm": 2.609375,
      "learning_rate": 3.680475239196719e-05,
      "loss": 1.0184,
      "step": 63010
    },
    {
      "epoch": 0.22086938845605217,
      "grad_norm": 3.125,
      "learning_rate": 3.681059358170073e-05,
      "loss": 0.9821,
      "step": 63020
    },
    {
      "epoch": 0.2209044359629478,
      "grad_norm": 2.75,
      "learning_rate": 3.681643477143425e-05,
      "loss": 1.0119,
      "step": 63030
    },
    {
      "epoch": 0.22093948346984338,
      "grad_norm": 2.734375,
      "learning_rate": 3.682227596116778e-05,
      "loss": 0.9406,
      "step": 63040
    },
    {
      "epoch": 0.22097453097673897,
      "grad_norm": 3.53125,
      "learning_rate": 3.68281171509013e-05,
      "loss": 1.0112,
      "step": 63050
    },
    {
      "epoch": 0.22100957848363456,
      "grad_norm": 2.59375,
      "learning_rate": 3.683395834063482e-05,
      "loss": 0.8541,
      "step": 63060
    },
    {
      "epoch": 0.22104462599053015,
      "grad_norm": 3.15625,
      "learning_rate": 3.683979953036835e-05,
      "loss": 1.0256,
      "step": 63070
    },
    {
      "epoch": 0.22107967349742577,
      "grad_norm": 3.015625,
      "learning_rate": 3.684564072010187e-05,
      "loss": 0.9926,
      "step": 63080
    },
    {
      "epoch": 0.22111472100432136,
      "grad_norm": 3.296875,
      "learning_rate": 3.68514819098354e-05,
      "loss": 0.9999,
      "step": 63090
    },
    {
      "epoch": 0.22114976851121695,
      "grad_norm": 3.203125,
      "learning_rate": 3.6857323099568926e-05,
      "loss": 0.966,
      "step": 63100
    },
    {
      "epoch": 0.22118481601811255,
      "grad_norm": 3.21875,
      "learning_rate": 3.686316428930245e-05,
      "loss": 1.0197,
      "step": 63110
    },
    {
      "epoch": 0.22121986352500814,
      "grad_norm": 3.25,
      "learning_rate": 3.6869005479035976e-05,
      "loss": 0.9617,
      "step": 63120
    },
    {
      "epoch": 0.22125491103190376,
      "grad_norm": 3.265625,
      "learning_rate": 3.68748466687695e-05,
      "loss": 1.0312,
      "step": 63130
    },
    {
      "epoch": 0.22128995853879935,
      "grad_norm": 3.015625,
      "learning_rate": 3.688068785850302e-05,
      "loss": 1.0265,
      "step": 63140
    },
    {
      "epoch": 0.22132500604569494,
      "grad_norm": 2.921875,
      "learning_rate": 3.688652904823655e-05,
      "loss": 1.0536,
      "step": 63150
    },
    {
      "epoch": 0.22136005355259053,
      "grad_norm": 3.25,
      "learning_rate": 3.689237023797007e-05,
      "loss": 0.9471,
      "step": 63160
    },
    {
      "epoch": 0.22139510105948612,
      "grad_norm": 3.234375,
      "learning_rate": 3.68982114277036e-05,
      "loss": 0.9881,
      "step": 63170
    },
    {
      "epoch": 0.22143014856638174,
      "grad_norm": 3.390625,
      "learning_rate": 3.6904052617437125e-05,
      "loss": 0.9887,
      "step": 63180
    },
    {
      "epoch": 0.22146519607327733,
      "grad_norm": 3.375,
      "learning_rate": 3.6909893807170646e-05,
      "loss": 1.0847,
      "step": 63190
    },
    {
      "epoch": 0.22150024358017292,
      "grad_norm": 3.078125,
      "learning_rate": 3.6915734996904175e-05,
      "loss": 1.0052,
      "step": 63200
    },
    {
      "epoch": 0.22153529108706851,
      "grad_norm": 3.484375,
      "learning_rate": 3.6921576186637696e-05,
      "loss": 1.006,
      "step": 63210
    },
    {
      "epoch": 0.2215703385939641,
      "grad_norm": 3.09375,
      "learning_rate": 3.692741737637122e-05,
      "loss": 0.9571,
      "step": 63220
    },
    {
      "epoch": 0.22160538610085972,
      "grad_norm": 3.359375,
      "learning_rate": 3.6933258566104746e-05,
      "loss": 0.9845,
      "step": 63230
    },
    {
      "epoch": 0.22164043360775532,
      "grad_norm": 3.78125,
      "learning_rate": 3.693909975583827e-05,
      "loss": 1.0619,
      "step": 63240
    },
    {
      "epoch": 0.2216754811146509,
      "grad_norm": 2.703125,
      "learning_rate": 3.6944940945571795e-05,
      "loss": 0.9762,
      "step": 63250
    },
    {
      "epoch": 0.2217105286215465,
      "grad_norm": 2.78125,
      "learning_rate": 3.6950782135305324e-05,
      "loss": 0.9736,
      "step": 63260
    },
    {
      "epoch": 0.2217455761284421,
      "grad_norm": 3.109375,
      "learning_rate": 3.6956623325038845e-05,
      "loss": 0.9789,
      "step": 63270
    },
    {
      "epoch": 0.2217806236353377,
      "grad_norm": 3.125,
      "learning_rate": 3.696246451477237e-05,
      "loss": 0.9981,
      "step": 63280
    },
    {
      "epoch": 0.2218156711422333,
      "grad_norm": 3.59375,
      "learning_rate": 3.6968305704505895e-05,
      "loss": 1.0187,
      "step": 63290
    },
    {
      "epoch": 0.2218507186491289,
      "grad_norm": 2.921875,
      "learning_rate": 3.697414689423942e-05,
      "loss": 0.9001,
      "step": 63300
    },
    {
      "epoch": 0.22188576615602448,
      "grad_norm": 3.59375,
      "learning_rate": 3.6979988083972944e-05,
      "loss": 0.9971,
      "step": 63310
    },
    {
      "epoch": 0.22192081366292007,
      "grad_norm": 3.421875,
      "learning_rate": 3.6985829273706466e-05,
      "loss": 0.9513,
      "step": 63320
    },
    {
      "epoch": 0.2219558611698157,
      "grad_norm": 3.578125,
      "learning_rate": 3.699167046344e-05,
      "loss": 1.0229,
      "step": 63330
    },
    {
      "epoch": 0.22199090867671128,
      "grad_norm": 3.21875,
      "learning_rate": 3.699751165317352e-05,
      "loss": 0.8748,
      "step": 63340
    },
    {
      "epoch": 0.22202595618360688,
      "grad_norm": 3.265625,
      "learning_rate": 3.7003352842907044e-05,
      "loss": 1.029,
      "step": 63350
    },
    {
      "epoch": 0.22206100369050247,
      "grad_norm": 3.46875,
      "learning_rate": 3.700919403264057e-05,
      "loss": 1.1022,
      "step": 63360
    },
    {
      "epoch": 0.2220960511973981,
      "grad_norm": 3.40625,
      "learning_rate": 3.7015035222374094e-05,
      "loss": 0.9896,
      "step": 63370
    },
    {
      "epoch": 0.22213109870429368,
      "grad_norm": 3.703125,
      "learning_rate": 3.702087641210762e-05,
      "loss": 1.0118,
      "step": 63380
    },
    {
      "epoch": 0.22216614621118927,
      "grad_norm": 3.359375,
      "learning_rate": 3.702671760184114e-05,
      "loss": 0.8528,
      "step": 63390
    },
    {
      "epoch": 0.22220119371808486,
      "grad_norm": 3.125,
      "learning_rate": 3.7032558791574665e-05,
      "loss": 1.0289,
      "step": 63400
    },
    {
      "epoch": 0.22223624122498045,
      "grad_norm": 2.875,
      "learning_rate": 3.70383999813082e-05,
      "loss": 0.9813,
      "step": 63410
    },
    {
      "epoch": 0.22227128873187607,
      "grad_norm": 3.203125,
      "learning_rate": 3.704424117104172e-05,
      "loss": 0.9697,
      "step": 63420
    },
    {
      "epoch": 0.22230633623877166,
      "grad_norm": 3.390625,
      "learning_rate": 3.705008236077524e-05,
      "loss": 0.9954,
      "step": 63430
    },
    {
      "epoch": 0.22234138374566725,
      "grad_norm": 3.40625,
      "learning_rate": 3.705592355050877e-05,
      "loss": 1.0032,
      "step": 63440
    },
    {
      "epoch": 0.22237643125256285,
      "grad_norm": 3.03125,
      "learning_rate": 3.706176474024229e-05,
      "loss": 0.9851,
      "step": 63450
    },
    {
      "epoch": 0.22241147875945844,
      "grad_norm": 3.40625,
      "learning_rate": 3.706760592997582e-05,
      "loss": 1.0096,
      "step": 63460
    },
    {
      "epoch": 0.22244652626635406,
      "grad_norm": 3.09375,
      "learning_rate": 3.707344711970934e-05,
      "loss": 0.9265,
      "step": 63470
    },
    {
      "epoch": 0.22248157377324965,
      "grad_norm": 3.1875,
      "learning_rate": 3.7079288309442863e-05,
      "loss": 0.9618,
      "step": 63480
    },
    {
      "epoch": 0.22251662128014524,
      "grad_norm": 3.078125,
      "learning_rate": 3.70851294991764e-05,
      "loss": 0.8844,
      "step": 63490
    },
    {
      "epoch": 0.22255166878704083,
      "grad_norm": 3.125,
      "learning_rate": 3.709097068890992e-05,
      "loss": 0.9936,
      "step": 63500
    },
    {
      "epoch": 0.22258671629393642,
      "grad_norm": 2.75,
      "learning_rate": 3.709681187864345e-05,
      "loss": 0.9531,
      "step": 63510
    },
    {
      "epoch": 0.22262176380083204,
      "grad_norm": 3.015625,
      "learning_rate": 3.710265306837697e-05,
      "loss": 0.9715,
      "step": 63520
    },
    {
      "epoch": 0.22265681130772763,
      "grad_norm": 2.84375,
      "learning_rate": 3.710849425811049e-05,
      "loss": 0.9843,
      "step": 63530
    },
    {
      "epoch": 0.22269185881462322,
      "grad_norm": 3.09375,
      "learning_rate": 3.711433544784402e-05,
      "loss": 0.9229,
      "step": 63540
    },
    {
      "epoch": 0.2227269063215188,
      "grad_norm": 2.828125,
      "learning_rate": 3.712017663757754e-05,
      "loss": 0.988,
      "step": 63550
    },
    {
      "epoch": 0.2227619538284144,
      "grad_norm": 3.546875,
      "learning_rate": 3.712601782731107e-05,
      "loss": 1.0468,
      "step": 63560
    },
    {
      "epoch": 0.22279700133531002,
      "grad_norm": 3.015625,
      "learning_rate": 3.71318590170446e-05,
      "loss": 1.024,
      "step": 63570
    },
    {
      "epoch": 0.22283204884220562,
      "grad_norm": 3.453125,
      "learning_rate": 3.713770020677812e-05,
      "loss": 1.0009,
      "step": 63580
    },
    {
      "epoch": 0.2228670963491012,
      "grad_norm": 3.078125,
      "learning_rate": 3.714354139651165e-05,
      "loss": 0.9419,
      "step": 63590
    },
    {
      "epoch": 0.2229021438559968,
      "grad_norm": 3.4375,
      "learning_rate": 3.714938258624517e-05,
      "loss": 0.9866,
      "step": 63600
    },
    {
      "epoch": 0.2229371913628924,
      "grad_norm": 2.96875,
      "learning_rate": 3.715522377597869e-05,
      "loss": 1.0144,
      "step": 63610
    },
    {
      "epoch": 0.222972238869788,
      "grad_norm": 3.0,
      "learning_rate": 3.716106496571222e-05,
      "loss": 1.0292,
      "step": 63620
    },
    {
      "epoch": 0.2230072863766836,
      "grad_norm": 3.03125,
      "learning_rate": 3.716690615544574e-05,
      "loss": 1.0189,
      "step": 63630
    },
    {
      "epoch": 0.2230423338835792,
      "grad_norm": 3.15625,
      "learning_rate": 3.717274734517927e-05,
      "loss": 1.0332,
      "step": 63640
    },
    {
      "epoch": 0.22307738139047478,
      "grad_norm": 2.90625,
      "learning_rate": 3.7178588534912796e-05,
      "loss": 0.937,
      "step": 63650
    },
    {
      "epoch": 0.22311242889737037,
      "grad_norm": 3.09375,
      "learning_rate": 3.718442972464632e-05,
      "loss": 0.9529,
      "step": 63660
    },
    {
      "epoch": 0.223147476404266,
      "grad_norm": 3.140625,
      "learning_rate": 3.7190270914379846e-05,
      "loss": 1.0346,
      "step": 63670
    },
    {
      "epoch": 0.22318252391116158,
      "grad_norm": 3.21875,
      "learning_rate": 3.719611210411337e-05,
      "loss": 1.0008,
      "step": 63680
    },
    {
      "epoch": 0.22321757141805718,
      "grad_norm": 3.203125,
      "learning_rate": 3.720195329384689e-05,
      "loss": 0.9345,
      "step": 63690
    },
    {
      "epoch": 0.22325261892495277,
      "grad_norm": 3.359375,
      "learning_rate": 3.720779448358042e-05,
      "loss": 0.9257,
      "step": 63700
    },
    {
      "epoch": 0.22328766643184836,
      "grad_norm": 2.71875,
      "learning_rate": 3.721363567331394e-05,
      "loss": 0.8882,
      "step": 63710
    },
    {
      "epoch": 0.22332271393874398,
      "grad_norm": 3.09375,
      "learning_rate": 3.721947686304747e-05,
      "loss": 1.0356,
      "step": 63720
    },
    {
      "epoch": 0.22335776144563957,
      "grad_norm": 3.0625,
      "learning_rate": 3.7225318052780995e-05,
      "loss": 0.9066,
      "step": 63730
    },
    {
      "epoch": 0.22339280895253516,
      "grad_norm": 3.15625,
      "learning_rate": 3.7231159242514516e-05,
      "loss": 1.0516,
      "step": 63740
    },
    {
      "epoch": 0.22342785645943075,
      "grad_norm": 3.359375,
      "learning_rate": 3.7237000432248044e-05,
      "loss": 0.9015,
      "step": 63750
    },
    {
      "epoch": 0.22346290396632634,
      "grad_norm": 3.203125,
      "learning_rate": 3.7242841621981566e-05,
      "loss": 0.9761,
      "step": 63760
    },
    {
      "epoch": 0.22349795147322196,
      "grad_norm": 3.375,
      "learning_rate": 3.7248682811715094e-05,
      "loss": 1.0304,
      "step": 63770
    },
    {
      "epoch": 0.22353299898011755,
      "grad_norm": 3.453125,
      "learning_rate": 3.7254524001448616e-05,
      "loss": 0.9746,
      "step": 63780
    },
    {
      "epoch": 0.22356804648701314,
      "grad_norm": 3.3125,
      "learning_rate": 3.726036519118214e-05,
      "loss": 0.9494,
      "step": 63790
    },
    {
      "epoch": 0.22360309399390874,
      "grad_norm": 3.078125,
      "learning_rate": 3.726620638091567e-05,
      "loss": 0.9679,
      "step": 63800
    },
    {
      "epoch": 0.22363814150080433,
      "grad_norm": 3.765625,
      "learning_rate": 3.7272047570649193e-05,
      "loss": 1.0004,
      "step": 63810
    },
    {
      "epoch": 0.22367318900769995,
      "grad_norm": 3.3125,
      "learning_rate": 3.7277888760382715e-05,
      "loss": 0.9773,
      "step": 63820
    },
    {
      "epoch": 0.22370823651459554,
      "grad_norm": 2.953125,
      "learning_rate": 3.728372995011624e-05,
      "loss": 0.8801,
      "step": 63830
    },
    {
      "epoch": 0.22374328402149113,
      "grad_norm": 3.359375,
      "learning_rate": 3.7289571139849765e-05,
      "loss": 1.0211,
      "step": 63840
    },
    {
      "epoch": 0.22377833152838672,
      "grad_norm": 3.015625,
      "learning_rate": 3.729541232958329e-05,
      "loss": 0.9718,
      "step": 63850
    },
    {
      "epoch": 0.2238133790352823,
      "grad_norm": 3.578125,
      "learning_rate": 3.7301253519316814e-05,
      "loss": 0.975,
      "step": 63860
    },
    {
      "epoch": 0.22384842654217793,
      "grad_norm": 3.4375,
      "learning_rate": 3.7307094709050336e-05,
      "loss": 1.0575,
      "step": 63870
    },
    {
      "epoch": 0.22388347404907352,
      "grad_norm": 2.921875,
      "learning_rate": 3.731293589878387e-05,
      "loss": 0.9388,
      "step": 63880
    },
    {
      "epoch": 0.2239185215559691,
      "grad_norm": 3.28125,
      "learning_rate": 3.731877708851739e-05,
      "loss": 1.0738,
      "step": 63890
    },
    {
      "epoch": 0.2239535690628647,
      "grad_norm": 3.09375,
      "learning_rate": 3.7324618278250914e-05,
      "loss": 0.9698,
      "step": 63900
    },
    {
      "epoch": 0.22398861656976032,
      "grad_norm": 3.359375,
      "learning_rate": 3.733045946798444e-05,
      "loss": 0.9796,
      "step": 63910
    },
    {
      "epoch": 0.22402366407665592,
      "grad_norm": 3.3125,
      "learning_rate": 3.733630065771796e-05,
      "loss": 1.045,
      "step": 63920
    },
    {
      "epoch": 0.2240587115835515,
      "grad_norm": 3.234375,
      "learning_rate": 3.734214184745149e-05,
      "loss": 0.9567,
      "step": 63930
    },
    {
      "epoch": 0.2240937590904471,
      "grad_norm": 3.484375,
      "learning_rate": 3.734798303718501e-05,
      "loss": 0.995,
      "step": 63940
    },
    {
      "epoch": 0.2241288065973427,
      "grad_norm": 3.09375,
      "learning_rate": 3.7353824226918535e-05,
      "loss": 1.0832,
      "step": 63950
    },
    {
      "epoch": 0.2241638541042383,
      "grad_norm": 3.109375,
      "learning_rate": 3.735966541665207e-05,
      "loss": 0.9038,
      "step": 63960
    },
    {
      "epoch": 0.2241989016111339,
      "grad_norm": 3.15625,
      "learning_rate": 3.736550660638559e-05,
      "loss": 0.9189,
      "step": 63970
    },
    {
      "epoch": 0.2242339491180295,
      "grad_norm": 3.140625,
      "learning_rate": 3.737134779611912e-05,
      "loss": 0.9708,
      "step": 63980
    },
    {
      "epoch": 0.22426899662492508,
      "grad_norm": 3.328125,
      "learning_rate": 3.737718898585264e-05,
      "loss": 0.9305,
      "step": 63990
    },
    {
      "epoch": 0.22430404413182067,
      "grad_norm": 3.15625,
      "learning_rate": 3.738303017558616e-05,
      "loss": 1.0349,
      "step": 64000
    },
    {
      "epoch": 0.2243390916387163,
      "grad_norm": 3.375,
      "learning_rate": 3.738887136531969e-05,
      "loss": 0.96,
      "step": 64010
    },
    {
      "epoch": 0.22437413914561188,
      "grad_norm": 2.859375,
      "learning_rate": 3.739471255505321e-05,
      "loss": 0.9701,
      "step": 64020
    },
    {
      "epoch": 0.22440918665250748,
      "grad_norm": 3.09375,
      "learning_rate": 3.740055374478674e-05,
      "loss": 0.9836,
      "step": 64030
    },
    {
      "epoch": 0.22444423415940307,
      "grad_norm": 3.375,
      "learning_rate": 3.740639493452027e-05,
      "loss": 1.0063,
      "step": 64040
    },
    {
      "epoch": 0.22447928166629866,
      "grad_norm": 3.265625,
      "learning_rate": 3.741223612425379e-05,
      "loss": 0.9607,
      "step": 64050
    },
    {
      "epoch": 0.22451432917319428,
      "grad_norm": 2.828125,
      "learning_rate": 3.741807731398732e-05,
      "loss": 0.9645,
      "step": 64060
    },
    {
      "epoch": 0.22454937668008987,
      "grad_norm": 3.15625,
      "learning_rate": 3.742391850372084e-05,
      "loss": 1.0029,
      "step": 64070
    },
    {
      "epoch": 0.22458442418698546,
      "grad_norm": 3.15625,
      "learning_rate": 3.742975969345436e-05,
      "loss": 0.9787,
      "step": 64080
    },
    {
      "epoch": 0.22461947169388105,
      "grad_norm": 2.96875,
      "learning_rate": 3.743560088318789e-05,
      "loss": 0.9997,
      "step": 64090
    },
    {
      "epoch": 0.22465451920077664,
      "grad_norm": 3.46875,
      "learning_rate": 3.744144207292141e-05,
      "loss": 1.0081,
      "step": 64100
    },
    {
      "epoch": 0.22468956670767226,
      "grad_norm": 2.875,
      "learning_rate": 3.744728326265494e-05,
      "loss": 1.0751,
      "step": 64110
    },
    {
      "epoch": 0.22472461421456785,
      "grad_norm": 3.0,
      "learning_rate": 3.745312445238847e-05,
      "loss": 1.0011,
      "step": 64120
    },
    {
      "epoch": 0.22475966172146344,
      "grad_norm": 3.078125,
      "learning_rate": 3.745896564212199e-05,
      "loss": 0.944,
      "step": 64130
    },
    {
      "epoch": 0.22479470922835904,
      "grad_norm": 2.453125,
      "learning_rate": 3.746480683185552e-05,
      "loss": 0.9363,
      "step": 64140
    },
    {
      "epoch": 0.22482975673525463,
      "grad_norm": 3.140625,
      "learning_rate": 3.747064802158904e-05,
      "loss": 0.9461,
      "step": 64150
    },
    {
      "epoch": 0.22486480424215025,
      "grad_norm": 2.984375,
      "learning_rate": 3.747648921132256e-05,
      "loss": 0.9609,
      "step": 64160
    },
    {
      "epoch": 0.22489985174904584,
      "grad_norm": 3.359375,
      "learning_rate": 3.748233040105609e-05,
      "loss": 1.0091,
      "step": 64170
    },
    {
      "epoch": 0.22493489925594143,
      "grad_norm": 3.40625,
      "learning_rate": 3.748817159078961e-05,
      "loss": 1.0049,
      "step": 64180
    },
    {
      "epoch": 0.22496994676283702,
      "grad_norm": 2.78125,
      "learning_rate": 3.7494012780523144e-05,
      "loss": 0.972,
      "step": 64190
    },
    {
      "epoch": 0.2250049942697326,
      "grad_norm": 3.03125,
      "learning_rate": 3.7499853970256666e-05,
      "loss": 0.8672,
      "step": 64200
    },
    {
      "epoch": 0.22504004177662823,
      "grad_norm": 2.859375,
      "learning_rate": 3.750569515999019e-05,
      "loss": 0.963,
      "step": 64210
    },
    {
      "epoch": 0.22507508928352382,
      "grad_norm": 3.375,
      "learning_rate": 3.7511536349723715e-05,
      "loss": 1.072,
      "step": 64220
    },
    {
      "epoch": 0.2251101367904194,
      "grad_norm": 3.15625,
      "learning_rate": 3.751737753945724e-05,
      "loss": 1.0075,
      "step": 64230
    },
    {
      "epoch": 0.225145184297315,
      "grad_norm": 3.1875,
      "learning_rate": 3.7523218729190765e-05,
      "loss": 1.0043,
      "step": 64240
    },
    {
      "epoch": 0.2251802318042106,
      "grad_norm": 3.109375,
      "learning_rate": 3.752905991892429e-05,
      "loss": 1.0734,
      "step": 64250
    },
    {
      "epoch": 0.22521527931110621,
      "grad_norm": 3.015625,
      "learning_rate": 3.753490110865781e-05,
      "loss": 0.966,
      "step": 64260
    },
    {
      "epoch": 0.2252503268180018,
      "grad_norm": 3.265625,
      "learning_rate": 3.754074229839134e-05,
      "loss": 1.0209,
      "step": 64270
    },
    {
      "epoch": 0.2252853743248974,
      "grad_norm": 3.15625,
      "learning_rate": 3.7546583488124865e-05,
      "loss": 0.9311,
      "step": 64280
    },
    {
      "epoch": 0.225320421831793,
      "grad_norm": 3.375,
      "learning_rate": 3.7552424677858386e-05,
      "loss": 0.9024,
      "step": 64290
    },
    {
      "epoch": 0.22535546933868858,
      "grad_norm": 2.90625,
      "learning_rate": 3.7558265867591914e-05,
      "loss": 1.0278,
      "step": 64300
    },
    {
      "epoch": 0.2253905168455842,
      "grad_norm": 3.390625,
      "learning_rate": 3.7564107057325436e-05,
      "loss": 0.9848,
      "step": 64310
    },
    {
      "epoch": 0.2254255643524798,
      "grad_norm": 3.234375,
      "learning_rate": 3.7569948247058964e-05,
      "loss": 1.0309,
      "step": 64320
    },
    {
      "epoch": 0.22546061185937538,
      "grad_norm": 3.4375,
      "learning_rate": 3.7575789436792485e-05,
      "loss": 0.9363,
      "step": 64330
    },
    {
      "epoch": 0.22549565936627097,
      "grad_norm": 3.25,
      "learning_rate": 3.758163062652601e-05,
      "loss": 0.9916,
      "step": 64340
    },
    {
      "epoch": 0.22553070687316656,
      "grad_norm": 3.21875,
      "learning_rate": 3.758747181625954e-05,
      "loss": 0.9523,
      "step": 64350
    },
    {
      "epoch": 0.22556575438006218,
      "grad_norm": 3.265625,
      "learning_rate": 3.759331300599306e-05,
      "loss": 1.006,
      "step": 64360
    },
    {
      "epoch": 0.22560080188695777,
      "grad_norm": 3.0625,
      "learning_rate": 3.7599154195726585e-05,
      "loss": 1.1067,
      "step": 64370
    },
    {
      "epoch": 0.22563584939385337,
      "grad_norm": 3.140625,
      "learning_rate": 3.760499538546011e-05,
      "loss": 0.9212,
      "step": 64380
    },
    {
      "epoch": 0.22567089690074896,
      "grad_norm": 3.21875,
      "learning_rate": 3.7610836575193634e-05,
      "loss": 0.9676,
      "step": 64390
    },
    {
      "epoch": 0.22570594440764455,
      "grad_norm": 3.5,
      "learning_rate": 3.761667776492716e-05,
      "loss": 0.9125,
      "step": 64400
    },
    {
      "epoch": 0.22574099191454017,
      "grad_norm": 3.5,
      "learning_rate": 3.7622518954660684e-05,
      "loss": 1.046,
      "step": 64410
    },
    {
      "epoch": 0.22577603942143576,
      "grad_norm": 3.453125,
      "learning_rate": 3.762836014439421e-05,
      "loss": 0.9824,
      "step": 64420
    },
    {
      "epoch": 0.22581108692833135,
      "grad_norm": 3.3125,
      "learning_rate": 3.763420133412774e-05,
      "loss": 1.0857,
      "step": 64430
    },
    {
      "epoch": 0.22584613443522694,
      "grad_norm": 3.09375,
      "learning_rate": 3.764004252386126e-05,
      "loss": 1.0038,
      "step": 64440
    },
    {
      "epoch": 0.22588118194212256,
      "grad_norm": 3.015625,
      "learning_rate": 3.764588371359479e-05,
      "loss": 0.9423,
      "step": 64450
    },
    {
      "epoch": 0.22591622944901815,
      "grad_norm": 3.15625,
      "learning_rate": 3.765172490332831e-05,
      "loss": 0.9967,
      "step": 64460
    },
    {
      "epoch": 0.22595127695591374,
      "grad_norm": 3.328125,
      "learning_rate": 3.765756609306183e-05,
      "loss": 0.9877,
      "step": 64470
    },
    {
      "epoch": 0.22598632446280933,
      "grad_norm": 2.84375,
      "learning_rate": 3.766340728279536e-05,
      "loss": 0.9684,
      "step": 64480
    },
    {
      "epoch": 0.22602137196970493,
      "grad_norm": 3.078125,
      "learning_rate": 3.766924847252888e-05,
      "loss": 0.9554,
      "step": 64490
    },
    {
      "epoch": 0.22605641947660055,
      "grad_norm": 3.046875,
      "learning_rate": 3.767508966226241e-05,
      "loss": 0.908,
      "step": 64500
    },
    {
      "epoch": 0.22609146698349614,
      "grad_norm": 3.59375,
      "learning_rate": 3.768093085199594e-05,
      "loss": 1.0304,
      "step": 64510
    },
    {
      "epoch": 0.22612651449039173,
      "grad_norm": 3.328125,
      "learning_rate": 3.768677204172946e-05,
      "loss": 0.9972,
      "step": 64520
    },
    {
      "epoch": 0.22616156199728732,
      "grad_norm": 3.28125,
      "learning_rate": 3.769261323146299e-05,
      "loss": 1.0494,
      "step": 64530
    },
    {
      "epoch": 0.2261966095041829,
      "grad_norm": 3.53125,
      "learning_rate": 3.769845442119651e-05,
      "loss": 1.0827,
      "step": 64540
    },
    {
      "epoch": 0.22623165701107853,
      "grad_norm": 3.59375,
      "learning_rate": 3.770429561093003e-05,
      "loss": 0.9554,
      "step": 64550
    },
    {
      "epoch": 0.22626670451797412,
      "grad_norm": 3.640625,
      "learning_rate": 3.771013680066356e-05,
      "loss": 0.9901,
      "step": 64560
    },
    {
      "epoch": 0.2263017520248697,
      "grad_norm": 3.53125,
      "learning_rate": 3.771597799039708e-05,
      "loss": 0.9865,
      "step": 64570
    },
    {
      "epoch": 0.2263367995317653,
      "grad_norm": 3.359375,
      "learning_rate": 3.772181918013061e-05,
      "loss": 1.0549,
      "step": 64580
    },
    {
      "epoch": 0.2263718470386609,
      "grad_norm": 3.328125,
      "learning_rate": 3.772766036986414e-05,
      "loss": 1.0005,
      "step": 64590
    },
    {
      "epoch": 0.22640689454555651,
      "grad_norm": 3.515625,
      "learning_rate": 3.773350155959766e-05,
      "loss": 0.9331,
      "step": 64600
    },
    {
      "epoch": 0.2264419420524521,
      "grad_norm": 3.171875,
      "learning_rate": 3.773934274933119e-05,
      "loss": 1.0069,
      "step": 64610
    },
    {
      "epoch": 0.2264769895593477,
      "grad_norm": 3.296875,
      "learning_rate": 3.774518393906471e-05,
      "loss": 0.9043,
      "step": 64620
    },
    {
      "epoch": 0.2265120370662433,
      "grad_norm": 3.03125,
      "learning_rate": 3.775102512879824e-05,
      "loss": 0.9658,
      "step": 64630
    },
    {
      "epoch": 0.22654708457313888,
      "grad_norm": 2.96875,
      "learning_rate": 3.775686631853176e-05,
      "loss": 0.9457,
      "step": 64640
    },
    {
      "epoch": 0.2265821320800345,
      "grad_norm": 2.875,
      "learning_rate": 3.776270750826528e-05,
      "loss": 0.953,
      "step": 64650
    },
    {
      "epoch": 0.2266171795869301,
      "grad_norm": 3.0625,
      "learning_rate": 3.7768548697998815e-05,
      "loss": 0.9804,
      "step": 64660
    },
    {
      "epoch": 0.22665222709382568,
      "grad_norm": 3.09375,
      "learning_rate": 3.777438988773234e-05,
      "loss": 1.0639,
      "step": 64670
    },
    {
      "epoch": 0.22668727460072127,
      "grad_norm": 3.171875,
      "learning_rate": 3.778023107746586e-05,
      "loss": 0.9075,
      "step": 64680
    },
    {
      "epoch": 0.22672232210761686,
      "grad_norm": 2.890625,
      "learning_rate": 3.7786072267199387e-05,
      "loss": 1.0477,
      "step": 64690
    },
    {
      "epoch": 0.22675736961451248,
      "grad_norm": 3.28125,
      "learning_rate": 3.779191345693291e-05,
      "loss": 0.9986,
      "step": 64700
    },
    {
      "epoch": 0.22679241712140807,
      "grad_norm": 3.671875,
      "learning_rate": 3.7797754646666436e-05,
      "loss": 1.0231,
      "step": 64710
    },
    {
      "epoch": 0.22682746462830367,
      "grad_norm": 3.203125,
      "learning_rate": 3.780359583639996e-05,
      "loss": 0.9378,
      "step": 64720
    },
    {
      "epoch": 0.22686251213519926,
      "grad_norm": 2.734375,
      "learning_rate": 3.7809437026133486e-05,
      "loss": 0.9211,
      "step": 64730
    },
    {
      "epoch": 0.22689755964209485,
      "grad_norm": 3.34375,
      "learning_rate": 3.7815278215867014e-05,
      "loss": 0.9312,
      "step": 64740
    },
    {
      "epoch": 0.22693260714899047,
      "grad_norm": 3.5,
      "learning_rate": 3.7821119405600536e-05,
      "loss": 0.9846,
      "step": 64750
    },
    {
      "epoch": 0.22696765465588606,
      "grad_norm": 2.90625,
      "learning_rate": 3.782696059533406e-05,
      "loss": 0.9299,
      "step": 64760
    },
    {
      "epoch": 0.22700270216278165,
      "grad_norm": 3.390625,
      "learning_rate": 3.7832801785067585e-05,
      "loss": 0.9897,
      "step": 64770
    },
    {
      "epoch": 0.22703774966967724,
      "grad_norm": 2.859375,
      "learning_rate": 3.783864297480111e-05,
      "loss": 0.9919,
      "step": 64780
    },
    {
      "epoch": 0.22707279717657283,
      "grad_norm": 3.296875,
      "learning_rate": 3.7844484164534635e-05,
      "loss": 0.9412,
      "step": 64790
    },
    {
      "epoch": 0.22710784468346845,
      "grad_norm": 3.46875,
      "learning_rate": 3.7850325354268156e-05,
      "loss": 1.0209,
      "step": 64800
    },
    {
      "epoch": 0.22714289219036404,
      "grad_norm": 3.1875,
      "learning_rate": 3.7856166544001685e-05,
      "loss": 1.0101,
      "step": 64810
    },
    {
      "epoch": 0.22717793969725963,
      "grad_norm": 3.34375,
      "learning_rate": 3.786200773373521e-05,
      "loss": 0.9916,
      "step": 64820
    },
    {
      "epoch": 0.22721298720415523,
      "grad_norm": 2.625,
      "learning_rate": 3.7867848923468734e-05,
      "loss": 0.9511,
      "step": 64830
    },
    {
      "epoch": 0.22724803471105082,
      "grad_norm": 3.171875,
      "learning_rate": 3.787369011320226e-05,
      "loss": 0.9801,
      "step": 64840
    },
    {
      "epoch": 0.22728308221794644,
      "grad_norm": 3.109375,
      "learning_rate": 3.7879531302935784e-05,
      "loss": 0.9679,
      "step": 64850
    },
    {
      "epoch": 0.22731812972484203,
      "grad_norm": 3.109375,
      "learning_rate": 3.7885372492669306e-05,
      "loss": 0.9342,
      "step": 64860
    },
    {
      "epoch": 0.22735317723173762,
      "grad_norm": 3.03125,
      "learning_rate": 3.7891213682402834e-05,
      "loss": 1.0019,
      "step": 64870
    },
    {
      "epoch": 0.2273882247386332,
      "grad_norm": 3.015625,
      "learning_rate": 3.7897054872136355e-05,
      "loss": 0.9922,
      "step": 64880
    },
    {
      "epoch": 0.2274232722455288,
      "grad_norm": 3.390625,
      "learning_rate": 3.7902896061869883e-05,
      "loss": 1.0019,
      "step": 64890
    },
    {
      "epoch": 0.22745831975242442,
      "grad_norm": 3.328125,
      "learning_rate": 3.790873725160341e-05,
      "loss": 0.9705,
      "step": 64900
    },
    {
      "epoch": 0.22749336725932,
      "grad_norm": 3.078125,
      "learning_rate": 3.791457844133693e-05,
      "loss": 1.0189,
      "step": 64910
    },
    {
      "epoch": 0.2275284147662156,
      "grad_norm": 2.921875,
      "learning_rate": 3.792041963107046e-05,
      "loss": 0.9472,
      "step": 64920
    },
    {
      "epoch": 0.2275634622731112,
      "grad_norm": 3.0,
      "learning_rate": 3.792626082080398e-05,
      "loss": 0.9683,
      "step": 64930
    },
    {
      "epoch": 0.22759850978000679,
      "grad_norm": 2.953125,
      "learning_rate": 3.7932102010537504e-05,
      "loss": 1.0254,
      "step": 64940
    },
    {
      "epoch": 0.2276335572869024,
      "grad_norm": 2.84375,
      "learning_rate": 3.793794320027103e-05,
      "loss": 0.9875,
      "step": 64950
    },
    {
      "epoch": 0.227668604793798,
      "grad_norm": 2.796875,
      "learning_rate": 3.7943784390004554e-05,
      "loss": 0.9848,
      "step": 64960
    },
    {
      "epoch": 0.2277036523006936,
      "grad_norm": 3.578125,
      "learning_rate": 3.794962557973808e-05,
      "loss": 1.0258,
      "step": 64970
    },
    {
      "epoch": 0.22773869980758918,
      "grad_norm": 2.953125,
      "learning_rate": 3.795546676947161e-05,
      "loss": 1.0092,
      "step": 64980
    },
    {
      "epoch": 0.2277737473144848,
      "grad_norm": 3.5,
      "learning_rate": 3.796130795920513e-05,
      "loss": 0.9919,
      "step": 64990
    },
    {
      "epoch": 0.2278087948213804,
      "grad_norm": 3.1875,
      "learning_rate": 3.796714914893866e-05,
      "loss": 1.033,
      "step": 65000
    },
    {
      "epoch": 0.2278087948213804,
      "eval_loss": 0.9266122579574585,
      "eval_runtime": 552.9942,
      "eval_samples_per_second": 687.957,
      "eval_steps_per_second": 57.33,
      "step": 65000
    },
    {
      "epoch": 0.22784384232827598,
      "grad_norm": 2.9375,
      "learning_rate": 3.797299033867218e-05,
      "loss": 0.9527,
      "step": 65010
    },
    {
      "epoch": 0.22787888983517157,
      "grad_norm": 3.0,
      "learning_rate": 3.79788315284057e-05,
      "loss": 0.9796,
      "step": 65020
    },
    {
      "epoch": 0.22791393734206716,
      "grad_norm": 3.0625,
      "learning_rate": 3.798467271813923e-05,
      "loss": 0.9756,
      "step": 65030
    },
    {
      "epoch": 0.22794898484896278,
      "grad_norm": 3.28125,
      "learning_rate": 3.799051390787276e-05,
      "loss": 1.0105,
      "step": 65040
    },
    {
      "epoch": 0.22798403235585837,
      "grad_norm": 3.53125,
      "learning_rate": 3.799635509760628e-05,
      "loss": 0.9095,
      "step": 65050
    },
    {
      "epoch": 0.22801907986275397,
      "grad_norm": 3.34375,
      "learning_rate": 3.800219628733981e-05,
      "loss": 0.9336,
      "step": 65060
    },
    {
      "epoch": 0.22805412736964956,
      "grad_norm": 3.296875,
      "learning_rate": 3.800803747707333e-05,
      "loss": 0.9978,
      "step": 65070
    },
    {
      "epoch": 0.22808917487654515,
      "grad_norm": 3.34375,
      "learning_rate": 3.801387866680686e-05,
      "loss": 0.9206,
      "step": 65080
    },
    {
      "epoch": 0.22812422238344077,
      "grad_norm": 3.53125,
      "learning_rate": 3.801971985654038e-05,
      "loss": 0.9881,
      "step": 65090
    },
    {
      "epoch": 0.22815926989033636,
      "grad_norm": 3.5625,
      "learning_rate": 3.802556104627391e-05,
      "loss": 1.1002,
      "step": 65100
    },
    {
      "epoch": 0.22819431739723195,
      "grad_norm": 3.265625,
      "learning_rate": 3.803140223600743e-05,
      "loss": 1.0746,
      "step": 65110
    },
    {
      "epoch": 0.22822936490412754,
      "grad_norm": 2.96875,
      "learning_rate": 3.803724342574096e-05,
      "loss": 0.9208,
      "step": 65120
    },
    {
      "epoch": 0.22826441241102313,
      "grad_norm": 3.3125,
      "learning_rate": 3.8043084615474486e-05,
      "loss": 1.0011,
      "step": 65130
    },
    {
      "epoch": 0.22829945991791875,
      "grad_norm": 3.078125,
      "learning_rate": 3.804892580520801e-05,
      "loss": 0.9908,
      "step": 65140
    },
    {
      "epoch": 0.22833450742481434,
      "grad_norm": 3.25,
      "learning_rate": 3.805476699494153e-05,
      "loss": 0.9549,
      "step": 65150
    },
    {
      "epoch": 0.22836955493170993,
      "grad_norm": 3.125,
      "learning_rate": 3.806060818467506e-05,
      "loss": 1.0221,
      "step": 65160
    },
    {
      "epoch": 0.22840460243860553,
      "grad_norm": 3.296875,
      "learning_rate": 3.806644937440858e-05,
      "loss": 0.943,
      "step": 65170
    },
    {
      "epoch": 0.22843964994550112,
      "grad_norm": 3.09375,
      "learning_rate": 3.807229056414211e-05,
      "loss": 0.9266,
      "step": 65180
    },
    {
      "epoch": 0.22847469745239674,
      "grad_norm": 2.90625,
      "learning_rate": 3.807813175387563e-05,
      "loss": 0.9953,
      "step": 65190
    },
    {
      "epoch": 0.22850974495929233,
      "grad_norm": 3.46875,
      "learning_rate": 3.808397294360916e-05,
      "loss": 1.0182,
      "step": 65200
    },
    {
      "epoch": 0.22854479246618792,
      "grad_norm": 2.65625,
      "learning_rate": 3.8089814133342685e-05,
      "loss": 0.8418,
      "step": 65210
    },
    {
      "epoch": 0.2285798399730835,
      "grad_norm": 3.234375,
      "learning_rate": 3.809565532307621e-05,
      "loss": 0.9977,
      "step": 65220
    },
    {
      "epoch": 0.2286148874799791,
      "grad_norm": 3.40625,
      "learning_rate": 3.810149651280973e-05,
      "loss": 1.1158,
      "step": 65230
    },
    {
      "epoch": 0.22864993498687472,
      "grad_norm": 3.4375,
      "learning_rate": 3.8107337702543256e-05,
      "loss": 0.9291,
      "step": 65240
    },
    {
      "epoch": 0.2286849824937703,
      "grad_norm": 3.09375,
      "learning_rate": 3.811317889227678e-05,
      "loss": 1.0356,
      "step": 65250
    },
    {
      "epoch": 0.2287200300006659,
      "grad_norm": 3.609375,
      "learning_rate": 3.8119020082010306e-05,
      "loss": 1.0443,
      "step": 65260
    },
    {
      "epoch": 0.2287550775075615,
      "grad_norm": 3.078125,
      "learning_rate": 3.812486127174383e-05,
      "loss": 0.895,
      "step": 65270
    },
    {
      "epoch": 0.22879012501445709,
      "grad_norm": 4.09375,
      "learning_rate": 3.8130702461477356e-05,
      "loss": 0.9934,
      "step": 65280
    },
    {
      "epoch": 0.2288251725213527,
      "grad_norm": 3.234375,
      "learning_rate": 3.8136543651210884e-05,
      "loss": 1.0223,
      "step": 65290
    },
    {
      "epoch": 0.2288602200282483,
      "grad_norm": 2.765625,
      "learning_rate": 3.8142384840944405e-05,
      "loss": 0.9198,
      "step": 65300
    },
    {
      "epoch": 0.2288952675351439,
      "grad_norm": 3.109375,
      "learning_rate": 3.8148226030677934e-05,
      "loss": 0.958,
      "step": 65310
    },
    {
      "epoch": 0.22893031504203948,
      "grad_norm": 3.390625,
      "learning_rate": 3.8154067220411455e-05,
      "loss": 1.0539,
      "step": 65320
    },
    {
      "epoch": 0.22896536254893507,
      "grad_norm": 2.4375,
      "learning_rate": 3.8159908410144977e-05,
      "loss": 0.9762,
      "step": 65330
    },
    {
      "epoch": 0.2290004100558307,
      "grad_norm": 3.546875,
      "learning_rate": 3.8165749599878505e-05,
      "loss": 0.9824,
      "step": 65340
    },
    {
      "epoch": 0.22903545756272628,
      "grad_norm": 3.203125,
      "learning_rate": 3.8171590789612026e-05,
      "loss": 1.0333,
      "step": 65350
    },
    {
      "epoch": 0.22907050506962187,
      "grad_norm": 3.1875,
      "learning_rate": 3.8177431979345555e-05,
      "loss": 1.0054,
      "step": 65360
    },
    {
      "epoch": 0.22910555257651746,
      "grad_norm": 3.546875,
      "learning_rate": 3.818327316907908e-05,
      "loss": 0.9201,
      "step": 65370
    },
    {
      "epoch": 0.22914060008341305,
      "grad_norm": 3.484375,
      "learning_rate": 3.8189114358812604e-05,
      "loss": 0.9854,
      "step": 65380
    },
    {
      "epoch": 0.22917564759030867,
      "grad_norm": 3.3125,
      "learning_rate": 3.819495554854613e-05,
      "loss": 0.9878,
      "step": 65390
    },
    {
      "epoch": 0.22921069509720426,
      "grad_norm": 2.921875,
      "learning_rate": 3.8200796738279654e-05,
      "loss": 1.0911,
      "step": 65400
    },
    {
      "epoch": 0.22924574260409986,
      "grad_norm": 2.875,
      "learning_rate": 3.8206637928013175e-05,
      "loss": 0.9516,
      "step": 65410
    },
    {
      "epoch": 0.22928079011099545,
      "grad_norm": 3.21875,
      "learning_rate": 3.8212479117746704e-05,
      "loss": 1.0179,
      "step": 65420
    },
    {
      "epoch": 0.22931583761789104,
      "grad_norm": 3.546875,
      "learning_rate": 3.821832030748023e-05,
      "loss": 0.9789,
      "step": 65430
    },
    {
      "epoch": 0.22935088512478666,
      "grad_norm": 3.53125,
      "learning_rate": 3.822416149721375e-05,
      "loss": 1.0052,
      "step": 65440
    },
    {
      "epoch": 0.22938593263168225,
      "grad_norm": 2.703125,
      "learning_rate": 3.823000268694728e-05,
      "loss": 0.9803,
      "step": 65450
    },
    {
      "epoch": 0.22942098013857784,
      "grad_norm": 3.25,
      "learning_rate": 3.82358438766808e-05,
      "loss": 1.0517,
      "step": 65460
    },
    {
      "epoch": 0.22945602764547343,
      "grad_norm": 3.1875,
      "learning_rate": 3.824168506641433e-05,
      "loss": 0.8985,
      "step": 65470
    },
    {
      "epoch": 0.22949107515236902,
      "grad_norm": 3.015625,
      "learning_rate": 3.824752625614785e-05,
      "loss": 0.9651,
      "step": 65480
    },
    {
      "epoch": 0.22952612265926464,
      "grad_norm": 3.265625,
      "learning_rate": 3.8253367445881374e-05,
      "loss": 0.9204,
      "step": 65490
    },
    {
      "epoch": 0.22956117016616023,
      "grad_norm": 3.140625,
      "learning_rate": 3.82592086356149e-05,
      "loss": 1.0242,
      "step": 65500
    },
    {
      "epoch": 0.22959621767305582,
      "grad_norm": 3.171875,
      "learning_rate": 3.826504982534843e-05,
      "loss": 0.962,
      "step": 65510
    },
    {
      "epoch": 0.22963126517995142,
      "grad_norm": 3.5,
      "learning_rate": 3.827089101508196e-05,
      "loss": 1.0706,
      "step": 65520
    },
    {
      "epoch": 0.229666312686847,
      "grad_norm": 2.921875,
      "learning_rate": 3.827673220481548e-05,
      "loss": 0.8833,
      "step": 65530
    },
    {
      "epoch": 0.22970136019374263,
      "grad_norm": 2.53125,
      "learning_rate": 3.8282573394549e-05,
      "loss": 0.931,
      "step": 65540
    },
    {
      "epoch": 0.22973640770063822,
      "grad_norm": 2.9375,
      "learning_rate": 3.828841458428253e-05,
      "loss": 1.0145,
      "step": 65550
    },
    {
      "epoch": 0.2297714552075338,
      "grad_norm": 3.328125,
      "learning_rate": 3.829425577401605e-05,
      "loss": 1.0213,
      "step": 65560
    },
    {
      "epoch": 0.2298065027144294,
      "grad_norm": 2.921875,
      "learning_rate": 3.830009696374958e-05,
      "loss": 1.0073,
      "step": 65570
    },
    {
      "epoch": 0.22984155022132502,
      "grad_norm": 3.34375,
      "learning_rate": 3.83059381534831e-05,
      "loss": 1.0189,
      "step": 65580
    },
    {
      "epoch": 0.2298765977282206,
      "grad_norm": 3.234375,
      "learning_rate": 3.831177934321663e-05,
      "loss": 0.9684,
      "step": 65590
    },
    {
      "epoch": 0.2299116452351162,
      "grad_norm": 3.546875,
      "learning_rate": 3.831762053295016e-05,
      "loss": 1.0281,
      "step": 65600
    },
    {
      "epoch": 0.2299466927420118,
      "grad_norm": 3.015625,
      "learning_rate": 3.832346172268368e-05,
      "loss": 0.982,
      "step": 65610
    },
    {
      "epoch": 0.22998174024890738,
      "grad_norm": 2.96875,
      "learning_rate": 3.83293029124172e-05,
      "loss": 1.0786,
      "step": 65620
    },
    {
      "epoch": 0.230016787755803,
      "grad_norm": 3.4375,
      "learning_rate": 3.833514410215073e-05,
      "loss": 1.0812,
      "step": 65630
    },
    {
      "epoch": 0.2300518352626986,
      "grad_norm": 3.078125,
      "learning_rate": 3.834098529188425e-05,
      "loss": 0.967,
      "step": 65640
    },
    {
      "epoch": 0.2300868827695942,
      "grad_norm": 3.40625,
      "learning_rate": 3.834682648161778e-05,
      "loss": 1.0336,
      "step": 65650
    },
    {
      "epoch": 0.23012193027648978,
      "grad_norm": 3.515625,
      "learning_rate": 3.83526676713513e-05,
      "loss": 0.9809,
      "step": 65660
    },
    {
      "epoch": 0.23015697778338537,
      "grad_norm": 3.078125,
      "learning_rate": 3.835850886108483e-05,
      "loss": 1.0055,
      "step": 65670
    },
    {
      "epoch": 0.230192025290281,
      "grad_norm": 2.890625,
      "learning_rate": 3.8364350050818356e-05,
      "loss": 1.0013,
      "step": 65680
    },
    {
      "epoch": 0.23022707279717658,
      "grad_norm": 3.046875,
      "learning_rate": 3.837019124055188e-05,
      "loss": 0.9648,
      "step": 65690
    },
    {
      "epoch": 0.23026212030407217,
      "grad_norm": 3.390625,
      "learning_rate": 3.83760324302854e-05,
      "loss": 0.9126,
      "step": 65700
    },
    {
      "epoch": 0.23029716781096776,
      "grad_norm": 2.890625,
      "learning_rate": 3.838187362001893e-05,
      "loss": 0.93,
      "step": 65710
    },
    {
      "epoch": 0.23033221531786335,
      "grad_norm": 3.5625,
      "learning_rate": 3.838771480975245e-05,
      "loss": 0.9965,
      "step": 65720
    },
    {
      "epoch": 0.23036726282475897,
      "grad_norm": 3.546875,
      "learning_rate": 3.839355599948598e-05,
      "loss": 1.0235,
      "step": 65730
    },
    {
      "epoch": 0.23040231033165456,
      "grad_norm": 2.984375,
      "learning_rate": 3.8399397189219505e-05,
      "loss": 0.9883,
      "step": 65740
    },
    {
      "epoch": 0.23043735783855016,
      "grad_norm": 2.953125,
      "learning_rate": 3.840523837895303e-05,
      "loss": 0.9953,
      "step": 65750
    },
    {
      "epoch": 0.23047240534544575,
      "grad_norm": 3.90625,
      "learning_rate": 3.8411079568686555e-05,
      "loss": 0.9298,
      "step": 65760
    },
    {
      "epoch": 0.23050745285234134,
      "grad_norm": 2.984375,
      "learning_rate": 3.8416920758420077e-05,
      "loss": 0.9618,
      "step": 65770
    },
    {
      "epoch": 0.23054250035923696,
      "grad_norm": 3.140625,
      "learning_rate": 3.8422761948153605e-05,
      "loss": 1.0278,
      "step": 65780
    },
    {
      "epoch": 0.23057754786613255,
      "grad_norm": 3.65625,
      "learning_rate": 3.8428603137887126e-05,
      "loss": 1.0253,
      "step": 65790
    },
    {
      "epoch": 0.23061259537302814,
      "grad_norm": 3.140625,
      "learning_rate": 3.843444432762065e-05,
      "loss": 0.9861,
      "step": 65800
    },
    {
      "epoch": 0.23064764287992373,
      "grad_norm": 3.65625,
      "learning_rate": 3.8440285517354176e-05,
      "loss": 0.9606,
      "step": 65810
    },
    {
      "epoch": 0.23068269038681932,
      "grad_norm": 3.109375,
      "learning_rate": 3.8446126707087704e-05,
      "loss": 0.9699,
      "step": 65820
    },
    {
      "epoch": 0.23071773789371494,
      "grad_norm": 3.40625,
      "learning_rate": 3.8451967896821226e-05,
      "loss": 0.9802,
      "step": 65830
    },
    {
      "epoch": 0.23075278540061053,
      "grad_norm": 3.0,
      "learning_rate": 3.8457809086554754e-05,
      "loss": 0.9719,
      "step": 65840
    },
    {
      "epoch": 0.23078783290750612,
      "grad_norm": 3.5,
      "learning_rate": 3.8463650276288275e-05,
      "loss": 0.9524,
      "step": 65850
    },
    {
      "epoch": 0.23082288041440172,
      "grad_norm": 3.0625,
      "learning_rate": 3.8469491466021804e-05,
      "loss": 1.0203,
      "step": 65860
    },
    {
      "epoch": 0.2308579279212973,
      "grad_norm": 3.328125,
      "learning_rate": 3.8475332655755325e-05,
      "loss": 1.0371,
      "step": 65870
    },
    {
      "epoch": 0.23089297542819293,
      "grad_norm": 3.484375,
      "learning_rate": 3.8481173845488846e-05,
      "loss": 1.0066,
      "step": 65880
    },
    {
      "epoch": 0.23092802293508852,
      "grad_norm": 2.953125,
      "learning_rate": 3.8487015035222375e-05,
      "loss": 0.9954,
      "step": 65890
    },
    {
      "epoch": 0.2309630704419841,
      "grad_norm": 3.4375,
      "learning_rate": 3.84928562249559e-05,
      "loss": 1.049,
      "step": 65900
    },
    {
      "epoch": 0.2309981179488797,
      "grad_norm": 3.34375,
      "learning_rate": 3.8498697414689424e-05,
      "loss": 1.0897,
      "step": 65910
    },
    {
      "epoch": 0.2310331654557753,
      "grad_norm": 3.140625,
      "learning_rate": 3.850453860442295e-05,
      "loss": 1.0662,
      "step": 65920
    },
    {
      "epoch": 0.2310682129626709,
      "grad_norm": 2.75,
      "learning_rate": 3.8510379794156474e-05,
      "loss": 0.981,
      "step": 65930
    },
    {
      "epoch": 0.2311032604695665,
      "grad_norm": 3.140625,
      "learning_rate": 3.851622098389e-05,
      "loss": 0.9593,
      "step": 65940
    },
    {
      "epoch": 0.2311383079764621,
      "grad_norm": 2.953125,
      "learning_rate": 3.8522062173623524e-05,
      "loss": 1.0402,
      "step": 65950
    },
    {
      "epoch": 0.23117335548335768,
      "grad_norm": 3.328125,
      "learning_rate": 3.8527903363357045e-05,
      "loss": 0.9264,
      "step": 65960
    },
    {
      "epoch": 0.23120840299025328,
      "grad_norm": 3.234375,
      "learning_rate": 3.8533744553090573e-05,
      "loss": 1.0043,
      "step": 65970
    },
    {
      "epoch": 0.2312434504971489,
      "grad_norm": 3.453125,
      "learning_rate": 3.85395857428241e-05,
      "loss": 1.0434,
      "step": 65980
    },
    {
      "epoch": 0.23127849800404449,
      "grad_norm": 3.8125,
      "learning_rate": 3.854542693255763e-05,
      "loss": 0.995,
      "step": 65990
    },
    {
      "epoch": 0.23131354551094008,
      "grad_norm": 3.203125,
      "learning_rate": 3.855126812229115e-05,
      "loss": 1.0054,
      "step": 66000
    },
    {
      "epoch": 0.23134859301783567,
      "grad_norm": 3.109375,
      "learning_rate": 3.855710931202467e-05,
      "loss": 1.0414,
      "step": 66010
    },
    {
      "epoch": 0.23138364052473126,
      "grad_norm": 3.03125,
      "learning_rate": 3.85629505017582e-05,
      "loss": 0.9491,
      "step": 66020
    },
    {
      "epoch": 0.23141868803162688,
      "grad_norm": 3.4375,
      "learning_rate": 3.856879169149172e-05,
      "loss": 1.0,
      "step": 66030
    },
    {
      "epoch": 0.23145373553852247,
      "grad_norm": 3.296875,
      "learning_rate": 3.857463288122525e-05,
      "loss": 1.0218,
      "step": 66040
    },
    {
      "epoch": 0.23148878304541806,
      "grad_norm": 3.515625,
      "learning_rate": 3.858047407095877e-05,
      "loss": 0.918,
      "step": 66050
    },
    {
      "epoch": 0.23152383055231365,
      "grad_norm": 3.015625,
      "learning_rate": 3.85863152606923e-05,
      "loss": 0.9404,
      "step": 66060
    },
    {
      "epoch": 0.23155887805920924,
      "grad_norm": 4.375,
      "learning_rate": 3.859215645042583e-05,
      "loss": 1.0124,
      "step": 66070
    },
    {
      "epoch": 0.23159392556610486,
      "grad_norm": 3.1875,
      "learning_rate": 3.859799764015935e-05,
      "loss": 0.9281,
      "step": 66080
    },
    {
      "epoch": 0.23162897307300045,
      "grad_norm": 3.390625,
      "learning_rate": 3.860383882989287e-05,
      "loss": 1.0287,
      "step": 66090
    },
    {
      "epoch": 0.23166402057989605,
      "grad_norm": 2.796875,
      "learning_rate": 3.86096800196264e-05,
      "loss": 1.0065,
      "step": 66100
    },
    {
      "epoch": 0.23169906808679164,
      "grad_norm": 3.046875,
      "learning_rate": 3.861552120935992e-05,
      "loss": 1.0056,
      "step": 66110
    },
    {
      "epoch": 0.23173411559368726,
      "grad_norm": 2.984375,
      "learning_rate": 3.862136239909345e-05,
      "loss": 0.9711,
      "step": 66120
    },
    {
      "epoch": 0.23176916310058285,
      "grad_norm": 2.9375,
      "learning_rate": 3.862720358882698e-05,
      "loss": 0.9125,
      "step": 66130
    },
    {
      "epoch": 0.23180421060747844,
      "grad_norm": 3.265625,
      "learning_rate": 3.86330447785605e-05,
      "loss": 1.0015,
      "step": 66140
    },
    {
      "epoch": 0.23183925811437403,
      "grad_norm": 3.234375,
      "learning_rate": 3.863888596829403e-05,
      "loss": 0.9446,
      "step": 66150
    },
    {
      "epoch": 0.23187430562126962,
      "grad_norm": 3.359375,
      "learning_rate": 3.864472715802755e-05,
      "loss": 0.9267,
      "step": 66160
    },
    {
      "epoch": 0.23190935312816524,
      "grad_norm": 3.234375,
      "learning_rate": 3.865056834776107e-05,
      "loss": 0.9146,
      "step": 66170
    },
    {
      "epoch": 0.23194440063506083,
      "grad_norm": 3.15625,
      "learning_rate": 3.86564095374946e-05,
      "loss": 1.053,
      "step": 66180
    },
    {
      "epoch": 0.23197944814195642,
      "grad_norm": 3.0,
      "learning_rate": 3.866225072722812e-05,
      "loss": 0.9558,
      "step": 66190
    },
    {
      "epoch": 0.23201449564885201,
      "grad_norm": 3.25,
      "learning_rate": 3.866809191696165e-05,
      "loss": 0.9709,
      "step": 66200
    },
    {
      "epoch": 0.2320495431557476,
      "grad_norm": 3.078125,
      "learning_rate": 3.8673933106695176e-05,
      "loss": 1.0006,
      "step": 66210
    },
    {
      "epoch": 0.23208459066264323,
      "grad_norm": 2.96875,
      "learning_rate": 3.86797742964287e-05,
      "loss": 0.9702,
      "step": 66220
    },
    {
      "epoch": 0.23211963816953882,
      "grad_norm": 3.265625,
      "learning_rate": 3.8685615486162226e-05,
      "loss": 0.9719,
      "step": 66230
    },
    {
      "epoch": 0.2321546856764344,
      "grad_norm": 2.84375,
      "learning_rate": 3.869145667589575e-05,
      "loss": 0.9563,
      "step": 66240
    },
    {
      "epoch": 0.23218973318333,
      "grad_norm": 3.09375,
      "learning_rate": 3.8697297865629276e-05,
      "loss": 0.9757,
      "step": 66250
    },
    {
      "epoch": 0.2322247806902256,
      "grad_norm": 3.328125,
      "learning_rate": 3.87031390553628e-05,
      "loss": 0.9913,
      "step": 66260
    },
    {
      "epoch": 0.2322598281971212,
      "grad_norm": 3.25,
      "learning_rate": 3.870898024509632e-05,
      "loss": 0.9396,
      "step": 66270
    },
    {
      "epoch": 0.2322948757040168,
      "grad_norm": 3.40625,
      "learning_rate": 3.871482143482985e-05,
      "loss": 0.9948,
      "step": 66280
    },
    {
      "epoch": 0.2323299232109124,
      "grad_norm": 3.625,
      "learning_rate": 3.8720662624563375e-05,
      "loss": 1.0737,
      "step": 66290
    },
    {
      "epoch": 0.23236497071780798,
      "grad_norm": 3.0625,
      "learning_rate": 3.87265038142969e-05,
      "loss": 0.9959,
      "step": 66300
    },
    {
      "epoch": 0.23240001822470358,
      "grad_norm": 2.625,
      "learning_rate": 3.8732345004030425e-05,
      "loss": 0.9538,
      "step": 66310
    },
    {
      "epoch": 0.2324350657315992,
      "grad_norm": 3.265625,
      "learning_rate": 3.8738186193763946e-05,
      "loss": 0.9878,
      "step": 66320
    },
    {
      "epoch": 0.23247011323849479,
      "grad_norm": 3.390625,
      "learning_rate": 3.8744027383497475e-05,
      "loss": 1.0041,
      "step": 66330
    },
    {
      "epoch": 0.23250516074539038,
      "grad_norm": 3.6875,
      "learning_rate": 3.8749868573230996e-05,
      "loss": 1.0083,
      "step": 66340
    },
    {
      "epoch": 0.23254020825228597,
      "grad_norm": 3.296875,
      "learning_rate": 3.875570976296452e-05,
      "loss": 1.0219,
      "step": 66350
    },
    {
      "epoch": 0.23257525575918156,
      "grad_norm": 3.875,
      "learning_rate": 3.8761550952698046e-05,
      "loss": 0.9542,
      "step": 66360
    },
    {
      "epoch": 0.23261030326607718,
      "grad_norm": 3.09375,
      "learning_rate": 3.8767392142431574e-05,
      "loss": 0.9574,
      "step": 66370
    },
    {
      "epoch": 0.23264535077297277,
      "grad_norm": 3.390625,
      "learning_rate": 3.8773233332165095e-05,
      "loss": 0.9341,
      "step": 66380
    },
    {
      "epoch": 0.23268039827986836,
      "grad_norm": 3.578125,
      "learning_rate": 3.8779074521898624e-05,
      "loss": 1.0009,
      "step": 66390
    },
    {
      "epoch": 0.23271544578676395,
      "grad_norm": 3.03125,
      "learning_rate": 3.8784915711632145e-05,
      "loss": 1.0396,
      "step": 66400
    },
    {
      "epoch": 0.23275049329365954,
      "grad_norm": 3.0625,
      "learning_rate": 3.879075690136567e-05,
      "loss": 0.9972,
      "step": 66410
    },
    {
      "epoch": 0.23278554080055516,
      "grad_norm": 3.265625,
      "learning_rate": 3.8796598091099195e-05,
      "loss": 0.9086,
      "step": 66420
    },
    {
      "epoch": 0.23282058830745075,
      "grad_norm": 3.25,
      "learning_rate": 3.880243928083272e-05,
      "loss": 0.9255,
      "step": 66430
    },
    {
      "epoch": 0.23285563581434635,
      "grad_norm": 3.234375,
      "learning_rate": 3.880828047056625e-05,
      "loss": 0.9479,
      "step": 66440
    },
    {
      "epoch": 0.23289068332124194,
      "grad_norm": 3.109375,
      "learning_rate": 3.881412166029977e-05,
      "loss": 0.9541,
      "step": 66450
    },
    {
      "epoch": 0.23292573082813753,
      "grad_norm": 3.203125,
      "learning_rate": 3.88199628500333e-05,
      "loss": 0.9955,
      "step": 66460
    },
    {
      "epoch": 0.23296077833503315,
      "grad_norm": 3.09375,
      "learning_rate": 3.882580403976682e-05,
      "loss": 0.9441,
      "step": 66470
    },
    {
      "epoch": 0.23299582584192874,
      "grad_norm": 3.171875,
      "learning_rate": 3.8831645229500344e-05,
      "loss": 0.9937,
      "step": 66480
    },
    {
      "epoch": 0.23303087334882433,
      "grad_norm": 3.140625,
      "learning_rate": 3.883748641923387e-05,
      "loss": 1.026,
      "step": 66490
    },
    {
      "epoch": 0.23306592085571992,
      "grad_norm": 3.3125,
      "learning_rate": 3.8843327608967394e-05,
      "loss": 1.0249,
      "step": 66500
    },
    {
      "epoch": 0.2331009683626155,
      "grad_norm": 3.359375,
      "learning_rate": 3.884916879870092e-05,
      "loss": 1.0182,
      "step": 66510
    },
    {
      "epoch": 0.23313601586951113,
      "grad_norm": 3.609375,
      "learning_rate": 3.885500998843445e-05,
      "loss": 0.9719,
      "step": 66520
    },
    {
      "epoch": 0.23317106337640672,
      "grad_norm": 3.28125,
      "learning_rate": 3.886085117816797e-05,
      "loss": 0.987,
      "step": 66530
    },
    {
      "epoch": 0.23320611088330231,
      "grad_norm": 3.4375,
      "learning_rate": 3.88666923679015e-05,
      "loss": 0.9647,
      "step": 66540
    },
    {
      "epoch": 0.2332411583901979,
      "grad_norm": 3.375,
      "learning_rate": 3.887253355763502e-05,
      "loss": 1.02,
      "step": 66550
    },
    {
      "epoch": 0.2332762058970935,
      "grad_norm": 3.34375,
      "learning_rate": 3.887837474736854e-05,
      "loss": 0.9502,
      "step": 66560
    },
    {
      "epoch": 0.23331125340398912,
      "grad_norm": 3.265625,
      "learning_rate": 3.888421593710207e-05,
      "loss": 0.9746,
      "step": 66570
    },
    {
      "epoch": 0.2333463009108847,
      "grad_norm": 2.859375,
      "learning_rate": 3.889005712683559e-05,
      "loss": 0.9619,
      "step": 66580
    },
    {
      "epoch": 0.2333813484177803,
      "grad_norm": 3.328125,
      "learning_rate": 3.889589831656912e-05,
      "loss": 0.9406,
      "step": 66590
    },
    {
      "epoch": 0.2334163959246759,
      "grad_norm": 3.609375,
      "learning_rate": 3.890173950630265e-05,
      "loss": 1.0179,
      "step": 66600
    },
    {
      "epoch": 0.23345144343157148,
      "grad_norm": 3.46875,
      "learning_rate": 3.890758069603617e-05,
      "loss": 1.1147,
      "step": 66610
    },
    {
      "epoch": 0.2334864909384671,
      "grad_norm": 3.3125,
      "learning_rate": 3.89134218857697e-05,
      "loss": 0.9862,
      "step": 66620
    },
    {
      "epoch": 0.2335215384453627,
      "grad_norm": 3.390625,
      "learning_rate": 3.891926307550322e-05,
      "loss": 0.9901,
      "step": 66630
    },
    {
      "epoch": 0.23355658595225828,
      "grad_norm": 3.0,
      "learning_rate": 3.892510426523675e-05,
      "loss": 1.0413,
      "step": 66640
    },
    {
      "epoch": 0.23359163345915387,
      "grad_norm": 3.21875,
      "learning_rate": 3.893094545497027e-05,
      "loss": 0.9838,
      "step": 66650
    },
    {
      "epoch": 0.2336266809660495,
      "grad_norm": 2.84375,
      "learning_rate": 3.893678664470379e-05,
      "loss": 0.9815,
      "step": 66660
    },
    {
      "epoch": 0.23366172847294508,
      "grad_norm": 3.21875,
      "learning_rate": 3.894262783443732e-05,
      "loss": 0.9555,
      "step": 66670
    },
    {
      "epoch": 0.23369677597984068,
      "grad_norm": 2.796875,
      "learning_rate": 3.894846902417085e-05,
      "loss": 1.0447,
      "step": 66680
    },
    {
      "epoch": 0.23373182348673627,
      "grad_norm": 3.375,
      "learning_rate": 3.895431021390437e-05,
      "loss": 0.9415,
      "step": 66690
    },
    {
      "epoch": 0.23376687099363186,
      "grad_norm": 3.015625,
      "learning_rate": 3.89601514036379e-05,
      "loss": 0.9791,
      "step": 66700
    },
    {
      "epoch": 0.23380191850052748,
      "grad_norm": 3.015625,
      "learning_rate": 3.896599259337142e-05,
      "loss": 0.9709,
      "step": 66710
    },
    {
      "epoch": 0.23383696600742307,
      "grad_norm": 3.5625,
      "learning_rate": 3.897183378310495e-05,
      "loss": 0.9598,
      "step": 66720
    },
    {
      "epoch": 0.23387201351431866,
      "grad_norm": 2.96875,
      "learning_rate": 3.897767497283847e-05,
      "loss": 0.9327,
      "step": 66730
    },
    {
      "epoch": 0.23390706102121425,
      "grad_norm": 2.671875,
      "learning_rate": 3.898351616257199e-05,
      "loss": 0.9732,
      "step": 66740
    },
    {
      "epoch": 0.23394210852810984,
      "grad_norm": 3.5625,
      "learning_rate": 3.8989357352305525e-05,
      "loss": 1.041,
      "step": 66750
    },
    {
      "epoch": 0.23397715603500546,
      "grad_norm": 3.0,
      "learning_rate": 3.8995198542039046e-05,
      "loss": 0.9709,
      "step": 66760
    },
    {
      "epoch": 0.23401220354190105,
      "grad_norm": 3.15625,
      "learning_rate": 3.900103973177257e-05,
      "loss": 0.9651,
      "step": 66770
    },
    {
      "epoch": 0.23404725104879665,
      "grad_norm": 3.0,
      "learning_rate": 3.9006880921506096e-05,
      "loss": 0.9154,
      "step": 66780
    },
    {
      "epoch": 0.23408229855569224,
      "grad_norm": 2.984375,
      "learning_rate": 3.901272211123962e-05,
      "loss": 1.012,
      "step": 66790
    },
    {
      "epoch": 0.23411734606258783,
      "grad_norm": 3.140625,
      "learning_rate": 3.9018563300973146e-05,
      "loss": 0.9957,
      "step": 66800
    },
    {
      "epoch": 0.23415239356948345,
      "grad_norm": 3.1875,
      "learning_rate": 3.902440449070667e-05,
      "loss": 0.9896,
      "step": 66810
    },
    {
      "epoch": 0.23418744107637904,
      "grad_norm": 2.921875,
      "learning_rate": 3.903024568044019e-05,
      "loss": 1.0285,
      "step": 66820
    },
    {
      "epoch": 0.23422248858327463,
      "grad_norm": 2.8125,
      "learning_rate": 3.9036086870173724e-05,
      "loss": 0.9128,
      "step": 66830
    },
    {
      "epoch": 0.23425753609017022,
      "grad_norm": 3.171875,
      "learning_rate": 3.9041928059907245e-05,
      "loss": 0.9338,
      "step": 66840
    },
    {
      "epoch": 0.2342925835970658,
      "grad_norm": 3.4375,
      "learning_rate": 3.9047769249640767e-05,
      "loss": 0.9899,
      "step": 66850
    },
    {
      "epoch": 0.23432763110396143,
      "grad_norm": 3.15625,
      "learning_rate": 3.9053610439374295e-05,
      "loss": 1.0154,
      "step": 66860
    },
    {
      "epoch": 0.23436267861085702,
      "grad_norm": 3.4375,
      "learning_rate": 3.9059451629107816e-05,
      "loss": 0.9403,
      "step": 66870
    },
    {
      "epoch": 0.23439772611775261,
      "grad_norm": 3.0625,
      "learning_rate": 3.9065292818841344e-05,
      "loss": 0.9439,
      "step": 66880
    },
    {
      "epoch": 0.2344327736246482,
      "grad_norm": 3.25,
      "learning_rate": 3.9071134008574866e-05,
      "loss": 0.9439,
      "step": 66890
    },
    {
      "epoch": 0.2344678211315438,
      "grad_norm": 3.421875,
      "learning_rate": 3.9076975198308394e-05,
      "loss": 1.0604,
      "step": 66900
    },
    {
      "epoch": 0.23450286863843942,
      "grad_norm": 3.046875,
      "learning_rate": 3.908281638804192e-05,
      "loss": 1.0205,
      "step": 66910
    },
    {
      "epoch": 0.234537916145335,
      "grad_norm": 3.46875,
      "learning_rate": 3.9088657577775444e-05,
      "loss": 0.9298,
      "step": 66920
    },
    {
      "epoch": 0.2345729636522306,
      "grad_norm": 3.109375,
      "learning_rate": 3.909449876750897e-05,
      "loss": 1.0552,
      "step": 66930
    },
    {
      "epoch": 0.2346080111591262,
      "grad_norm": 3.25,
      "learning_rate": 3.9100339957242494e-05,
      "loss": 1.0137,
      "step": 66940
    },
    {
      "epoch": 0.23464305866602178,
      "grad_norm": 3.25,
      "learning_rate": 3.9106181146976015e-05,
      "loss": 0.9141,
      "step": 66950
    },
    {
      "epoch": 0.2346781061729174,
      "grad_norm": 3.046875,
      "learning_rate": 3.911202233670954e-05,
      "loss": 0.9369,
      "step": 66960
    },
    {
      "epoch": 0.234713153679813,
      "grad_norm": 3.203125,
      "learning_rate": 3.9117863526443065e-05,
      "loss": 0.995,
      "step": 66970
    },
    {
      "epoch": 0.23474820118670858,
      "grad_norm": 3.359375,
      "learning_rate": 3.912370471617659e-05,
      "loss": 1.004,
      "step": 66980
    },
    {
      "epoch": 0.23478324869360417,
      "grad_norm": 3.8125,
      "learning_rate": 3.912954590591012e-05,
      "loss": 0.9905,
      "step": 66990
    },
    {
      "epoch": 0.23481829620049977,
      "grad_norm": 3.4375,
      "learning_rate": 3.913538709564364e-05,
      "loss": 0.982,
      "step": 67000
    },
    {
      "epoch": 0.23485334370739538,
      "grad_norm": 3.078125,
      "learning_rate": 3.914122828537717e-05,
      "loss": 0.9954,
      "step": 67010
    },
    {
      "epoch": 0.23488839121429098,
      "grad_norm": 3.140625,
      "learning_rate": 3.914706947511069e-05,
      "loss": 0.9596,
      "step": 67020
    },
    {
      "epoch": 0.23492343872118657,
      "grad_norm": 3.140625,
      "learning_rate": 3.9152910664844214e-05,
      "loss": 0.984,
      "step": 67030
    },
    {
      "epoch": 0.23495848622808216,
      "grad_norm": 3.015625,
      "learning_rate": 3.915875185457774e-05,
      "loss": 0.9679,
      "step": 67040
    },
    {
      "epoch": 0.23499353373497775,
      "grad_norm": 3.1875,
      "learning_rate": 3.9164593044311263e-05,
      "loss": 0.973,
      "step": 67050
    },
    {
      "epoch": 0.23502858124187337,
      "grad_norm": 3.109375,
      "learning_rate": 3.917043423404479e-05,
      "loss": 0.9086,
      "step": 67060
    },
    {
      "epoch": 0.23506362874876896,
      "grad_norm": 3.65625,
      "learning_rate": 3.917627542377832e-05,
      "loss": 0.9761,
      "step": 67070
    },
    {
      "epoch": 0.23509867625566455,
      "grad_norm": 3.34375,
      "learning_rate": 3.918211661351184e-05,
      "loss": 1.0329,
      "step": 67080
    },
    {
      "epoch": 0.23513372376256014,
      "grad_norm": 3.359375,
      "learning_rate": 3.918795780324537e-05,
      "loss": 1.0008,
      "step": 67090
    },
    {
      "epoch": 0.23516877126945573,
      "grad_norm": 3.625,
      "learning_rate": 3.919379899297889e-05,
      "loss": 0.9553,
      "step": 67100
    },
    {
      "epoch": 0.23520381877635135,
      "grad_norm": 2.984375,
      "learning_rate": 3.919964018271242e-05,
      "loss": 0.949,
      "step": 67110
    },
    {
      "epoch": 0.23523886628324694,
      "grad_norm": 3.265625,
      "learning_rate": 3.920548137244594e-05,
      "loss": 0.9705,
      "step": 67120
    },
    {
      "epoch": 0.23527391379014254,
      "grad_norm": 2.953125,
      "learning_rate": 3.921132256217946e-05,
      "loss": 0.9828,
      "step": 67130
    },
    {
      "epoch": 0.23530896129703813,
      "grad_norm": 4.0,
      "learning_rate": 3.9217163751913e-05,
      "loss": 1.0522,
      "step": 67140
    },
    {
      "epoch": 0.23534400880393372,
      "grad_norm": 3.53125,
      "learning_rate": 3.922300494164652e-05,
      "loss": 0.9093,
      "step": 67150
    },
    {
      "epoch": 0.23537905631082934,
      "grad_norm": 3.09375,
      "learning_rate": 3.922884613138004e-05,
      "loss": 1.0489,
      "step": 67160
    },
    {
      "epoch": 0.23541410381772493,
      "grad_norm": 3.0625,
      "learning_rate": 3.923468732111357e-05,
      "loss": 0.973,
      "step": 67170
    },
    {
      "epoch": 0.23544915132462052,
      "grad_norm": 2.859375,
      "learning_rate": 3.924052851084709e-05,
      "loss": 0.9459,
      "step": 67180
    },
    {
      "epoch": 0.2354841988315161,
      "grad_norm": 3.171875,
      "learning_rate": 3.924636970058062e-05,
      "loss": 1.0024,
      "step": 67190
    },
    {
      "epoch": 0.23551924633841173,
      "grad_norm": 3.25,
      "learning_rate": 3.925221089031414e-05,
      "loss": 1.0201,
      "step": 67200
    },
    {
      "epoch": 0.23555429384530732,
      "grad_norm": 2.703125,
      "learning_rate": 3.925805208004766e-05,
      "loss": 0.9792,
      "step": 67210
    },
    {
      "epoch": 0.2355893413522029,
      "grad_norm": 3.453125,
      "learning_rate": 3.9263893269781196e-05,
      "loss": 1.0285,
      "step": 67220
    },
    {
      "epoch": 0.2356243888590985,
      "grad_norm": 3.390625,
      "learning_rate": 3.926973445951472e-05,
      "loss": 1.018,
      "step": 67230
    },
    {
      "epoch": 0.2356594363659941,
      "grad_norm": 2.84375,
      "learning_rate": 3.927557564924824e-05,
      "loss": 0.9969,
      "step": 67240
    },
    {
      "epoch": 0.23569448387288972,
      "grad_norm": 3.15625,
      "learning_rate": 3.928141683898177e-05,
      "loss": 0.9661,
      "step": 67250
    },
    {
      "epoch": 0.2357295313797853,
      "grad_norm": 3.203125,
      "learning_rate": 3.928725802871529e-05,
      "loss": 0.8929,
      "step": 67260
    },
    {
      "epoch": 0.2357645788866809,
      "grad_norm": 3.359375,
      "learning_rate": 3.929309921844882e-05,
      "loss": 1.0282,
      "step": 67270
    },
    {
      "epoch": 0.2357996263935765,
      "grad_norm": 3.203125,
      "learning_rate": 3.929894040818234e-05,
      "loss": 0.9835,
      "step": 67280
    },
    {
      "epoch": 0.23583467390047208,
      "grad_norm": 2.96875,
      "learning_rate": 3.930478159791586e-05,
      "loss": 0.9417,
      "step": 67290
    },
    {
      "epoch": 0.2358697214073677,
      "grad_norm": 3.34375,
      "learning_rate": 3.9310622787649395e-05,
      "loss": 0.9516,
      "step": 67300
    },
    {
      "epoch": 0.2359047689142633,
      "grad_norm": 3.0,
      "learning_rate": 3.9316463977382916e-05,
      "loss": 0.966,
      "step": 67310
    },
    {
      "epoch": 0.23593981642115888,
      "grad_norm": 2.96875,
      "learning_rate": 3.9322305167116444e-05,
      "loss": 1.0277,
      "step": 67320
    },
    {
      "epoch": 0.23597486392805447,
      "grad_norm": 3.234375,
      "learning_rate": 3.9328146356849966e-05,
      "loss": 0.8967,
      "step": 67330
    },
    {
      "epoch": 0.23600991143495006,
      "grad_norm": 3.484375,
      "learning_rate": 3.933398754658349e-05,
      "loss": 1.0013,
      "step": 67340
    },
    {
      "epoch": 0.23604495894184568,
      "grad_norm": 2.921875,
      "learning_rate": 3.9339828736317016e-05,
      "loss": 1.0484,
      "step": 67350
    },
    {
      "epoch": 0.23608000644874128,
      "grad_norm": 3.1875,
      "learning_rate": 3.934566992605054e-05,
      "loss": 0.946,
      "step": 67360
    },
    {
      "epoch": 0.23611505395563687,
      "grad_norm": 3.125,
      "learning_rate": 3.9351511115784065e-05,
      "loss": 0.9665,
      "step": 67370
    },
    {
      "epoch": 0.23615010146253246,
      "grad_norm": 3.109375,
      "learning_rate": 3.9357352305517593e-05,
      "loss": 0.9397,
      "step": 67380
    },
    {
      "epoch": 0.23618514896942805,
      "grad_norm": 3.375,
      "learning_rate": 3.9363193495251115e-05,
      "loss": 0.9763,
      "step": 67390
    },
    {
      "epoch": 0.23622019647632367,
      "grad_norm": 3.34375,
      "learning_rate": 3.936903468498464e-05,
      "loss": 1.0601,
      "step": 67400
    },
    {
      "epoch": 0.23625524398321926,
      "grad_norm": 3.140625,
      "learning_rate": 3.9374875874718165e-05,
      "loss": 1.0543,
      "step": 67410
    },
    {
      "epoch": 0.23629029149011485,
      "grad_norm": 3.0625,
      "learning_rate": 3.9380717064451686e-05,
      "loss": 0.934,
      "step": 67420
    },
    {
      "epoch": 0.23632533899701044,
      "grad_norm": 2.96875,
      "learning_rate": 3.9386558254185214e-05,
      "loss": 0.9385,
      "step": 67430
    },
    {
      "epoch": 0.23636038650390603,
      "grad_norm": 3.59375,
      "learning_rate": 3.9392399443918736e-05,
      "loss": 1.0682,
      "step": 67440
    },
    {
      "epoch": 0.23639543401080165,
      "grad_norm": 3.25,
      "learning_rate": 3.9398240633652264e-05,
      "loss": 1.0938,
      "step": 67450
    },
    {
      "epoch": 0.23643048151769724,
      "grad_norm": 3.109375,
      "learning_rate": 3.940408182338579e-05,
      "loss": 1.0417,
      "step": 67460
    },
    {
      "epoch": 0.23646552902459284,
      "grad_norm": 2.953125,
      "learning_rate": 3.9409923013119314e-05,
      "loss": 1.012,
      "step": 67470
    },
    {
      "epoch": 0.23650057653148843,
      "grad_norm": 3.3125,
      "learning_rate": 3.941576420285284e-05,
      "loss": 0.9805,
      "step": 67480
    },
    {
      "epoch": 0.23653562403838402,
      "grad_norm": 3.3125,
      "learning_rate": 3.942160539258636e-05,
      "loss": 0.9769,
      "step": 67490
    },
    {
      "epoch": 0.23657067154527964,
      "grad_norm": 3.109375,
      "learning_rate": 3.9427446582319885e-05,
      "loss": 0.946,
      "step": 67500
    },
    {
      "epoch": 0.23660571905217523,
      "grad_norm": 3.421875,
      "learning_rate": 3.943328777205341e-05,
      "loss": 0.994,
      "step": 67510
    },
    {
      "epoch": 0.23664076655907082,
      "grad_norm": 2.75,
      "learning_rate": 3.9439128961786934e-05,
      "loss": 0.8886,
      "step": 67520
    },
    {
      "epoch": 0.2366758140659664,
      "grad_norm": 3.59375,
      "learning_rate": 3.944497015152047e-05,
      "loss": 0.9869,
      "step": 67530
    },
    {
      "epoch": 0.236710861572862,
      "grad_norm": 3.078125,
      "learning_rate": 3.945081134125399e-05,
      "loss": 1.0543,
      "step": 67540
    },
    {
      "epoch": 0.23674590907975762,
      "grad_norm": 3.140625,
      "learning_rate": 3.945665253098751e-05,
      "loss": 0.9233,
      "step": 67550
    },
    {
      "epoch": 0.2367809565866532,
      "grad_norm": 3.125,
      "learning_rate": 3.946249372072104e-05,
      "loss": 0.9651,
      "step": 67560
    },
    {
      "epoch": 0.2368160040935488,
      "grad_norm": 3.15625,
      "learning_rate": 3.946833491045456e-05,
      "loss": 1.0115,
      "step": 67570
    },
    {
      "epoch": 0.2368510516004444,
      "grad_norm": 3.015625,
      "learning_rate": 3.947417610018809e-05,
      "loss": 0.9299,
      "step": 67580
    },
    {
      "epoch": 0.23688609910734,
      "grad_norm": 3.203125,
      "learning_rate": 3.948001728992161e-05,
      "loss": 1.0152,
      "step": 67590
    },
    {
      "epoch": 0.2369211466142356,
      "grad_norm": 2.984375,
      "learning_rate": 3.948585847965513e-05,
      "loss": 0.9238,
      "step": 67600
    },
    {
      "epoch": 0.2369561941211312,
      "grad_norm": 3.15625,
      "learning_rate": 3.949169966938867e-05,
      "loss": 0.9984,
      "step": 67610
    },
    {
      "epoch": 0.2369912416280268,
      "grad_norm": 3.03125,
      "learning_rate": 3.949754085912219e-05,
      "loss": 0.9823,
      "step": 67620
    },
    {
      "epoch": 0.23702628913492238,
      "grad_norm": 3.453125,
      "learning_rate": 3.950338204885571e-05,
      "loss": 0.9513,
      "step": 67630
    },
    {
      "epoch": 0.23706133664181797,
      "grad_norm": 2.78125,
      "learning_rate": 3.950922323858924e-05,
      "loss": 0.955,
      "step": 67640
    },
    {
      "epoch": 0.2370963841487136,
      "grad_norm": 2.921875,
      "learning_rate": 3.951506442832276e-05,
      "loss": 0.9259,
      "step": 67650
    },
    {
      "epoch": 0.23713143165560918,
      "grad_norm": 3.703125,
      "learning_rate": 3.952090561805629e-05,
      "loss": 0.9869,
      "step": 67660
    },
    {
      "epoch": 0.23716647916250477,
      "grad_norm": 3.515625,
      "learning_rate": 3.952674680778981e-05,
      "loss": 1.0131,
      "step": 67670
    },
    {
      "epoch": 0.23720152666940036,
      "grad_norm": 3.0,
      "learning_rate": 3.953258799752333e-05,
      "loss": 0.9285,
      "step": 67680
    },
    {
      "epoch": 0.23723657417629596,
      "grad_norm": 3.359375,
      "learning_rate": 3.953842918725687e-05,
      "loss": 0.9262,
      "step": 67690
    },
    {
      "epoch": 0.23727162168319157,
      "grad_norm": 3.265625,
      "learning_rate": 3.954427037699039e-05,
      "loss": 1.058,
      "step": 67700
    },
    {
      "epoch": 0.23730666919008717,
      "grad_norm": 3.09375,
      "learning_rate": 3.955011156672391e-05,
      "loss": 0.9458,
      "step": 67710
    },
    {
      "epoch": 0.23734171669698276,
      "grad_norm": 2.890625,
      "learning_rate": 3.955595275645744e-05,
      "loss": 0.975,
      "step": 67720
    },
    {
      "epoch": 0.23737676420387835,
      "grad_norm": 3.28125,
      "learning_rate": 3.956179394619096e-05,
      "loss": 1.0263,
      "step": 67730
    },
    {
      "epoch": 0.23741181171077397,
      "grad_norm": 3.296875,
      "learning_rate": 3.956763513592449e-05,
      "loss": 1.007,
      "step": 67740
    },
    {
      "epoch": 0.23744685921766956,
      "grad_norm": 3.234375,
      "learning_rate": 3.957347632565801e-05,
      "loss": 0.9963,
      "step": 67750
    },
    {
      "epoch": 0.23748190672456515,
      "grad_norm": 3.515625,
      "learning_rate": 3.957931751539153e-05,
      "loss": 0.983,
      "step": 67760
    },
    {
      "epoch": 0.23751695423146074,
      "grad_norm": 3.125,
      "learning_rate": 3.9585158705125066e-05,
      "loss": 0.9299,
      "step": 67770
    },
    {
      "epoch": 0.23755200173835633,
      "grad_norm": 3.484375,
      "learning_rate": 3.959099989485859e-05,
      "loss": 0.9877,
      "step": 67780
    },
    {
      "epoch": 0.23758704924525195,
      "grad_norm": 3.234375,
      "learning_rate": 3.9596841084592115e-05,
      "loss": 1.0162,
      "step": 67790
    },
    {
      "epoch": 0.23762209675214754,
      "grad_norm": 3.078125,
      "learning_rate": 3.960268227432564e-05,
      "loss": 0.982,
      "step": 67800
    },
    {
      "epoch": 0.23765714425904313,
      "grad_norm": 3.03125,
      "learning_rate": 3.960852346405916e-05,
      "loss": 0.9936,
      "step": 67810
    },
    {
      "epoch": 0.23769219176593873,
      "grad_norm": 3.1875,
      "learning_rate": 3.9614364653792687e-05,
      "loss": 1.0003,
      "step": 67820
    },
    {
      "epoch": 0.23772723927283432,
      "grad_norm": 3.28125,
      "learning_rate": 3.962020584352621e-05,
      "loss": 0.9544,
      "step": 67830
    },
    {
      "epoch": 0.23776228677972994,
      "grad_norm": 3.828125,
      "learning_rate": 3.9626047033259736e-05,
      "loss": 0.9365,
      "step": 67840
    },
    {
      "epoch": 0.23779733428662553,
      "grad_norm": 2.859375,
      "learning_rate": 3.9631888222993265e-05,
      "loss": 0.9479,
      "step": 67850
    },
    {
      "epoch": 0.23783238179352112,
      "grad_norm": 3.484375,
      "learning_rate": 3.9637729412726786e-05,
      "loss": 0.98,
      "step": 67860
    },
    {
      "epoch": 0.2378674293004167,
      "grad_norm": 3.21875,
      "learning_rate": 3.9643570602460314e-05,
      "loss": 1.0576,
      "step": 67870
    },
    {
      "epoch": 0.2379024768073123,
      "grad_norm": 3.25,
      "learning_rate": 3.9649411792193836e-05,
      "loss": 0.9122,
      "step": 67880
    },
    {
      "epoch": 0.23793752431420792,
      "grad_norm": 2.9375,
      "learning_rate": 3.965525298192736e-05,
      "loss": 0.9759,
      "step": 67890
    },
    {
      "epoch": 0.2379725718211035,
      "grad_norm": 2.828125,
      "learning_rate": 3.9661094171660885e-05,
      "loss": 0.9311,
      "step": 67900
    },
    {
      "epoch": 0.2380076193279991,
      "grad_norm": 3.453125,
      "learning_rate": 3.966693536139441e-05,
      "loss": 0.9938,
      "step": 67910
    },
    {
      "epoch": 0.2380426668348947,
      "grad_norm": 3.5625,
      "learning_rate": 3.9672776551127935e-05,
      "loss": 1.093,
      "step": 67920
    },
    {
      "epoch": 0.2380777143417903,
      "grad_norm": 3.25,
      "learning_rate": 3.967861774086146e-05,
      "loss": 0.9479,
      "step": 67930
    },
    {
      "epoch": 0.2381127618486859,
      "grad_norm": 3.15625,
      "learning_rate": 3.9684458930594985e-05,
      "loss": 0.9518,
      "step": 67940
    },
    {
      "epoch": 0.2381478093555815,
      "grad_norm": 2.921875,
      "learning_rate": 3.969030012032851e-05,
      "loss": 1.0422,
      "step": 67950
    },
    {
      "epoch": 0.2381828568624771,
      "grad_norm": 3.21875,
      "learning_rate": 3.9696141310062034e-05,
      "loss": 0.9798,
      "step": 67960
    },
    {
      "epoch": 0.23821790436937268,
      "grad_norm": 3.1875,
      "learning_rate": 3.9701982499795556e-05,
      "loss": 1.0278,
      "step": 67970
    },
    {
      "epoch": 0.23825295187626827,
      "grad_norm": 3.265625,
      "learning_rate": 3.9707823689529084e-05,
      "loss": 1.0419,
      "step": 67980
    },
    {
      "epoch": 0.2382879993831639,
      "grad_norm": 3.265625,
      "learning_rate": 3.9713664879262606e-05,
      "loss": 0.9846,
      "step": 67990
    },
    {
      "epoch": 0.23832304689005948,
      "grad_norm": 3.15625,
      "learning_rate": 3.971950606899614e-05,
      "loss": 0.9972,
      "step": 68000
    },
    {
      "epoch": 0.23835809439695507,
      "grad_norm": 3.28125,
      "learning_rate": 3.972534725872966e-05,
      "loss": 0.974,
      "step": 68010
    },
    {
      "epoch": 0.23839314190385066,
      "grad_norm": 3.421875,
      "learning_rate": 3.9731188448463183e-05,
      "loss": 1.0471,
      "step": 68020
    },
    {
      "epoch": 0.23842818941074626,
      "grad_norm": 3.28125,
      "learning_rate": 3.973702963819671e-05,
      "loss": 0.996,
      "step": 68030
    },
    {
      "epoch": 0.23846323691764187,
      "grad_norm": 3.140625,
      "learning_rate": 3.974287082793023e-05,
      "loss": 0.9793,
      "step": 68040
    },
    {
      "epoch": 0.23849828442453747,
      "grad_norm": 3.375,
      "learning_rate": 3.974871201766376e-05,
      "loss": 0.9891,
      "step": 68050
    },
    {
      "epoch": 0.23853333193143306,
      "grad_norm": 5.03125,
      "learning_rate": 3.975455320739728e-05,
      "loss": 0.8719,
      "step": 68060
    },
    {
      "epoch": 0.23856837943832865,
      "grad_norm": 3.484375,
      "learning_rate": 3.9760394397130804e-05,
      "loss": 0.909,
      "step": 68070
    },
    {
      "epoch": 0.23860342694522424,
      "grad_norm": 2.890625,
      "learning_rate": 3.976623558686434e-05,
      "loss": 1.0896,
      "step": 68080
    },
    {
      "epoch": 0.23863847445211986,
      "grad_norm": 3.390625,
      "learning_rate": 3.977207677659786e-05,
      "loss": 1.0365,
      "step": 68090
    },
    {
      "epoch": 0.23867352195901545,
      "grad_norm": 2.828125,
      "learning_rate": 3.977791796633138e-05,
      "loss": 1.029,
      "step": 68100
    },
    {
      "epoch": 0.23870856946591104,
      "grad_norm": 2.96875,
      "learning_rate": 3.978375915606491e-05,
      "loss": 0.9754,
      "step": 68110
    },
    {
      "epoch": 0.23874361697280663,
      "grad_norm": 3.359375,
      "learning_rate": 3.978960034579843e-05,
      "loss": 0.9859,
      "step": 68120
    },
    {
      "epoch": 0.23877866447970222,
      "grad_norm": 3.078125,
      "learning_rate": 3.979544153553196e-05,
      "loss": 1.0256,
      "step": 68130
    },
    {
      "epoch": 0.23881371198659784,
      "grad_norm": 3.40625,
      "learning_rate": 3.980128272526548e-05,
      "loss": 0.9043,
      "step": 68140
    },
    {
      "epoch": 0.23884875949349343,
      "grad_norm": 3.140625,
      "learning_rate": 3.980712391499901e-05,
      "loss": 0.9839,
      "step": 68150
    },
    {
      "epoch": 0.23888380700038903,
      "grad_norm": 3.4375,
      "learning_rate": 3.981296510473254e-05,
      "loss": 1.0444,
      "step": 68160
    },
    {
      "epoch": 0.23891885450728462,
      "grad_norm": 3.046875,
      "learning_rate": 3.981880629446606e-05,
      "loss": 0.9741,
      "step": 68170
    },
    {
      "epoch": 0.2389539020141802,
      "grad_norm": 2.90625,
      "learning_rate": 3.982464748419958e-05,
      "loss": 0.9999,
      "step": 68180
    },
    {
      "epoch": 0.23898894952107583,
      "grad_norm": 3.15625,
      "learning_rate": 3.983048867393311e-05,
      "loss": 0.9189,
      "step": 68190
    },
    {
      "epoch": 0.23902399702797142,
      "grad_norm": 3.375,
      "learning_rate": 3.983632986366663e-05,
      "loss": 1.0341,
      "step": 68200
    },
    {
      "epoch": 0.239059044534867,
      "grad_norm": 3.46875,
      "learning_rate": 3.984217105340016e-05,
      "loss": 1.0812,
      "step": 68210
    },
    {
      "epoch": 0.2390940920417626,
      "grad_norm": 3.296875,
      "learning_rate": 3.984801224313368e-05,
      "loss": 1.0594,
      "step": 68220
    },
    {
      "epoch": 0.2391291395486582,
      "grad_norm": 3.015625,
      "learning_rate": 3.985385343286721e-05,
      "loss": 0.998,
      "step": 68230
    },
    {
      "epoch": 0.2391641870555538,
      "grad_norm": 3.609375,
      "learning_rate": 3.985969462260074e-05,
      "loss": 1.0216,
      "step": 68240
    },
    {
      "epoch": 0.2391992345624494,
      "grad_norm": 3.328125,
      "learning_rate": 3.986553581233426e-05,
      "loss": 0.9886,
      "step": 68250
    },
    {
      "epoch": 0.239234282069345,
      "grad_norm": 2.9375,
      "learning_rate": 3.9871377002067787e-05,
      "loss": 1.0556,
      "step": 68260
    },
    {
      "epoch": 0.23926932957624059,
      "grad_norm": 2.703125,
      "learning_rate": 3.987721819180131e-05,
      "loss": 0.9326,
      "step": 68270
    },
    {
      "epoch": 0.2393043770831362,
      "grad_norm": 3.3125,
      "learning_rate": 3.988305938153483e-05,
      "loss": 1.0053,
      "step": 68280
    },
    {
      "epoch": 0.2393394245900318,
      "grad_norm": 3.46875,
      "learning_rate": 3.988890057126836e-05,
      "loss": 1.0873,
      "step": 68290
    },
    {
      "epoch": 0.2393744720969274,
      "grad_norm": 2.890625,
      "learning_rate": 3.989474176100188e-05,
      "loss": 0.953,
      "step": 68300
    },
    {
      "epoch": 0.23940951960382298,
      "grad_norm": 3.109375,
      "learning_rate": 3.990058295073541e-05,
      "loss": 0.9742,
      "step": 68310
    },
    {
      "epoch": 0.23944456711071857,
      "grad_norm": 3.6875,
      "learning_rate": 3.9906424140468936e-05,
      "loss": 1.0439,
      "step": 68320
    },
    {
      "epoch": 0.2394796146176142,
      "grad_norm": 3.5625,
      "learning_rate": 3.991226533020246e-05,
      "loss": 1.0719,
      "step": 68330
    },
    {
      "epoch": 0.23951466212450978,
      "grad_norm": 3.828125,
      "learning_rate": 3.9918106519935985e-05,
      "loss": 1.029,
      "step": 68340
    },
    {
      "epoch": 0.23954970963140537,
      "grad_norm": 2.796875,
      "learning_rate": 3.992394770966951e-05,
      "loss": 0.9823,
      "step": 68350
    },
    {
      "epoch": 0.23958475713830096,
      "grad_norm": 2.9375,
      "learning_rate": 3.992978889940303e-05,
      "loss": 0.9689,
      "step": 68360
    },
    {
      "epoch": 0.23961980464519655,
      "grad_norm": 3.28125,
      "learning_rate": 3.9935630089136556e-05,
      "loss": 0.9296,
      "step": 68370
    },
    {
      "epoch": 0.23965485215209217,
      "grad_norm": 3.65625,
      "learning_rate": 3.994147127887008e-05,
      "loss": 1.0579,
      "step": 68380
    },
    {
      "epoch": 0.23968989965898777,
      "grad_norm": 3.109375,
      "learning_rate": 3.9947312468603606e-05,
      "loss": 0.9648,
      "step": 68390
    },
    {
      "epoch": 0.23972494716588336,
      "grad_norm": 3.34375,
      "learning_rate": 3.9953153658337134e-05,
      "loss": 0.941,
      "step": 68400
    },
    {
      "epoch": 0.23975999467277895,
      "grad_norm": 2.859375,
      "learning_rate": 3.9958994848070656e-05,
      "loss": 0.972,
      "step": 68410
    },
    {
      "epoch": 0.23979504217967454,
      "grad_norm": 3.453125,
      "learning_rate": 3.9964836037804184e-05,
      "loss": 0.9969,
      "step": 68420
    },
    {
      "epoch": 0.23983008968657016,
      "grad_norm": 2.890625,
      "learning_rate": 3.9970677227537706e-05,
      "loss": 0.9988,
      "step": 68430
    },
    {
      "epoch": 0.23986513719346575,
      "grad_norm": 3.359375,
      "learning_rate": 3.9976518417271234e-05,
      "loss": 1.0201,
      "step": 68440
    },
    {
      "epoch": 0.23990018470036134,
      "grad_norm": 2.609375,
      "learning_rate": 3.9982359607004755e-05,
      "loss": 0.9257,
      "step": 68450
    },
    {
      "epoch": 0.23993523220725693,
      "grad_norm": 3.25,
      "learning_rate": 3.9988200796738283e-05,
      "loss": 1.002,
      "step": 68460
    },
    {
      "epoch": 0.23997027971415252,
      "grad_norm": 3.0,
      "learning_rate": 3.999404198647181e-05,
      "loss": 0.9608,
      "step": 68470
    },
    {
      "epoch": 0.24000532722104814,
      "grad_norm": 3.71875,
      "learning_rate": 3.999988317620533e-05,
      "loss": 0.9304,
      "step": 68480
    },
    {
      "epoch": 0.24004037472794373,
      "grad_norm": 3.28125,
      "learning_rate": 4.0005724365938855e-05,
      "loss": 0.9566,
      "step": 68490
    },
    {
      "epoch": 0.24007542223483933,
      "grad_norm": 3.890625,
      "learning_rate": 4.001156555567238e-05,
      "loss": 1.0499,
      "step": 68500
    },
    {
      "epoch": 0.24011046974173492,
      "grad_norm": 3.5,
      "learning_rate": 4.0017406745405904e-05,
      "loss": 0.9472,
      "step": 68510
    },
    {
      "epoch": 0.2401455172486305,
      "grad_norm": 3.484375,
      "learning_rate": 4.002324793513943e-05,
      "loss": 1.0706,
      "step": 68520
    },
    {
      "epoch": 0.24018056475552613,
      "grad_norm": 3.3125,
      "learning_rate": 4.0029089124872954e-05,
      "loss": 1.0126,
      "step": 68530
    },
    {
      "epoch": 0.24021561226242172,
      "grad_norm": 3.203125,
      "learning_rate": 4.003493031460648e-05,
      "loss": 0.9632,
      "step": 68540
    },
    {
      "epoch": 0.2402506597693173,
      "grad_norm": 3.296875,
      "learning_rate": 4.004077150434001e-05,
      "loss": 0.95,
      "step": 68550
    },
    {
      "epoch": 0.2402857072762129,
      "grad_norm": 3.078125,
      "learning_rate": 4.004661269407353e-05,
      "loss": 0.9313,
      "step": 68560
    },
    {
      "epoch": 0.2403207547831085,
      "grad_norm": 3.140625,
      "learning_rate": 4.005245388380705e-05,
      "loss": 0.9757,
      "step": 68570
    },
    {
      "epoch": 0.2403558022900041,
      "grad_norm": 3.453125,
      "learning_rate": 4.005829507354058e-05,
      "loss": 0.9075,
      "step": 68580
    },
    {
      "epoch": 0.2403908497968997,
      "grad_norm": 3.03125,
      "learning_rate": 4.00641362632741e-05,
      "loss": 0.9197,
      "step": 68590
    },
    {
      "epoch": 0.2404258973037953,
      "grad_norm": 3.125,
      "learning_rate": 4.006997745300763e-05,
      "loss": 1.055,
      "step": 68600
    },
    {
      "epoch": 0.24046094481069089,
      "grad_norm": 3.0625,
      "learning_rate": 4.007581864274115e-05,
      "loss": 0.9181,
      "step": 68610
    },
    {
      "epoch": 0.24049599231758648,
      "grad_norm": 2.984375,
      "learning_rate": 4.008165983247468e-05,
      "loss": 0.8961,
      "step": 68620
    },
    {
      "epoch": 0.2405310398244821,
      "grad_norm": 3.234375,
      "learning_rate": 4.008750102220821e-05,
      "loss": 0.8863,
      "step": 68630
    },
    {
      "epoch": 0.2405660873313777,
      "grad_norm": 3.1875,
      "learning_rate": 4.009334221194173e-05,
      "loss": 0.9808,
      "step": 68640
    },
    {
      "epoch": 0.24060113483827328,
      "grad_norm": 3.078125,
      "learning_rate": 4.009918340167525e-05,
      "loss": 0.9528,
      "step": 68650
    },
    {
      "epoch": 0.24063618234516887,
      "grad_norm": 3.03125,
      "learning_rate": 4.010502459140878e-05,
      "loss": 0.9626,
      "step": 68660
    },
    {
      "epoch": 0.24067122985206446,
      "grad_norm": 3.515625,
      "learning_rate": 4.01108657811423e-05,
      "loss": 1.0007,
      "step": 68670
    },
    {
      "epoch": 0.24070627735896008,
      "grad_norm": 3.328125,
      "learning_rate": 4.011670697087583e-05,
      "loss": 1.0192,
      "step": 68680
    },
    {
      "epoch": 0.24074132486585567,
      "grad_norm": 3.0,
      "learning_rate": 4.012254816060935e-05,
      "loss": 0.9915,
      "step": 68690
    },
    {
      "epoch": 0.24077637237275126,
      "grad_norm": 3.125,
      "learning_rate": 4.012838935034288e-05,
      "loss": 0.9906,
      "step": 68700
    },
    {
      "epoch": 0.24081141987964685,
      "grad_norm": 3.078125,
      "learning_rate": 4.013423054007641e-05,
      "loss": 1.0338,
      "step": 68710
    },
    {
      "epoch": 0.24084646738654245,
      "grad_norm": 3.734375,
      "learning_rate": 4.014007172980993e-05,
      "loss": 1.0082,
      "step": 68720
    },
    {
      "epoch": 0.24088151489343806,
      "grad_norm": 3.515625,
      "learning_rate": 4.014591291954346e-05,
      "loss": 1.0337,
      "step": 68730
    },
    {
      "epoch": 0.24091656240033366,
      "grad_norm": 2.59375,
      "learning_rate": 4.015175410927698e-05,
      "loss": 0.9777,
      "step": 68740
    },
    {
      "epoch": 0.24095160990722925,
      "grad_norm": 3.125,
      "learning_rate": 4.01575952990105e-05,
      "loss": 1.0017,
      "step": 68750
    },
    {
      "epoch": 0.24098665741412484,
      "grad_norm": 3.015625,
      "learning_rate": 4.016343648874403e-05,
      "loss": 0.9131,
      "step": 68760
    },
    {
      "epoch": 0.24102170492102043,
      "grad_norm": 3.234375,
      "learning_rate": 4.016927767847755e-05,
      "loss": 0.9106,
      "step": 68770
    },
    {
      "epoch": 0.24105675242791605,
      "grad_norm": 3.046875,
      "learning_rate": 4.017511886821108e-05,
      "loss": 1.0416,
      "step": 68780
    },
    {
      "epoch": 0.24109179993481164,
      "grad_norm": 3.078125,
      "learning_rate": 4.018096005794461e-05,
      "loss": 0.9501,
      "step": 68790
    },
    {
      "epoch": 0.24112684744170723,
      "grad_norm": 3.125,
      "learning_rate": 4.018680124767813e-05,
      "loss": 0.9354,
      "step": 68800
    },
    {
      "epoch": 0.24116189494860282,
      "grad_norm": 3.28125,
      "learning_rate": 4.0192642437411656e-05,
      "loss": 1.0995,
      "step": 68810
    },
    {
      "epoch": 0.24119694245549841,
      "grad_norm": 3.234375,
      "learning_rate": 4.019848362714518e-05,
      "loss": 0.9877,
      "step": 68820
    },
    {
      "epoch": 0.24123198996239403,
      "grad_norm": 2.890625,
      "learning_rate": 4.02043248168787e-05,
      "loss": 0.9459,
      "step": 68830
    },
    {
      "epoch": 0.24126703746928962,
      "grad_norm": 2.859375,
      "learning_rate": 4.021016600661223e-05,
      "loss": 1.0394,
      "step": 68840
    },
    {
      "epoch": 0.24130208497618522,
      "grad_norm": 3.59375,
      "learning_rate": 4.0216007196345756e-05,
      "loss": 0.9707,
      "step": 68850
    },
    {
      "epoch": 0.2413371324830808,
      "grad_norm": 3.53125,
      "learning_rate": 4.022184838607928e-05,
      "loss": 0.9709,
      "step": 68860
    },
    {
      "epoch": 0.24137217998997643,
      "grad_norm": 3.546875,
      "learning_rate": 4.0227689575812805e-05,
      "loss": 0.9245,
      "step": 68870
    },
    {
      "epoch": 0.24140722749687202,
      "grad_norm": 3.0625,
      "learning_rate": 4.023353076554633e-05,
      "loss": 0.9153,
      "step": 68880
    },
    {
      "epoch": 0.2414422750037676,
      "grad_norm": 2.8125,
      "learning_rate": 4.0239371955279855e-05,
      "loss": 1.0577,
      "step": 68890
    },
    {
      "epoch": 0.2414773225106632,
      "grad_norm": 3.3125,
      "learning_rate": 4.0245213145013377e-05,
      "loss": 1.0173,
      "step": 68900
    },
    {
      "epoch": 0.2415123700175588,
      "grad_norm": 3.0625,
      "learning_rate": 4.0251054334746905e-05,
      "loss": 0.9509,
      "step": 68910
    },
    {
      "epoch": 0.2415474175244544,
      "grad_norm": 3.453125,
      "learning_rate": 4.0256895524480426e-05,
      "loss": 1.0002,
      "step": 68920
    },
    {
      "epoch": 0.24158246503135,
      "grad_norm": 3.28125,
      "learning_rate": 4.0262736714213955e-05,
      "loss": 1.0256,
      "step": 68930
    },
    {
      "epoch": 0.2416175125382456,
      "grad_norm": 3.484375,
      "learning_rate": 4.026857790394748e-05,
      "loss": 0.9499,
      "step": 68940
    },
    {
      "epoch": 0.24165256004514118,
      "grad_norm": 2.65625,
      "learning_rate": 4.0274419093681004e-05,
      "loss": 0.8994,
      "step": 68950
    },
    {
      "epoch": 0.24168760755203678,
      "grad_norm": 2.84375,
      "learning_rate": 4.0280260283414526e-05,
      "loss": 0.9938,
      "step": 68960
    },
    {
      "epoch": 0.2417226550589324,
      "grad_norm": 3.390625,
      "learning_rate": 4.0286101473148054e-05,
      "loss": 1.0023,
      "step": 68970
    },
    {
      "epoch": 0.241757702565828,
      "grad_norm": 3.5,
      "learning_rate": 4.0291942662881575e-05,
      "loss": 1.0115,
      "step": 68980
    },
    {
      "epoch": 0.24179275007272358,
      "grad_norm": 2.5625,
      "learning_rate": 4.0297783852615104e-05,
      "loss": 0.9318,
      "step": 68990
    },
    {
      "epoch": 0.24182779757961917,
      "grad_norm": 3.03125,
      "learning_rate": 4.0303625042348625e-05,
      "loss": 0.9501,
      "step": 69000
    },
    {
      "epoch": 0.24186284508651476,
      "grad_norm": 3.765625,
      "learning_rate": 4.030946623208215e-05,
      "loss": 1.0957,
      "step": 69010
    },
    {
      "epoch": 0.24189789259341038,
      "grad_norm": 3.25,
      "learning_rate": 4.031530742181568e-05,
      "loss": 0.9815,
      "step": 69020
    },
    {
      "epoch": 0.24193294010030597,
      "grad_norm": 3.15625,
      "learning_rate": 4.03211486115492e-05,
      "loss": 0.9797,
      "step": 69030
    },
    {
      "epoch": 0.24196798760720156,
      "grad_norm": 3.0,
      "learning_rate": 4.0326989801282724e-05,
      "loss": 0.987,
      "step": 69040
    },
    {
      "epoch": 0.24200303511409715,
      "grad_norm": 2.859375,
      "learning_rate": 4.033283099101625e-05,
      "loss": 0.985,
      "step": 69050
    },
    {
      "epoch": 0.24203808262099274,
      "grad_norm": 2.796875,
      "learning_rate": 4.0338672180749774e-05,
      "loss": 0.9948,
      "step": 69060
    },
    {
      "epoch": 0.24207313012788836,
      "grad_norm": 3.28125,
      "learning_rate": 4.03445133704833e-05,
      "loss": 1.0073,
      "step": 69070
    },
    {
      "epoch": 0.24210817763478396,
      "grad_norm": 3.296875,
      "learning_rate": 4.0350354560216824e-05,
      "loss": 0.9605,
      "step": 69080
    },
    {
      "epoch": 0.24214322514167955,
      "grad_norm": 3.203125,
      "learning_rate": 4.035619574995035e-05,
      "loss": 0.9208,
      "step": 69090
    },
    {
      "epoch": 0.24217827264857514,
      "grad_norm": 3.1875,
      "learning_rate": 4.036203693968388e-05,
      "loss": 0.9076,
      "step": 69100
    },
    {
      "epoch": 0.24221332015547073,
      "grad_norm": 3.109375,
      "learning_rate": 4.03678781294174e-05,
      "loss": 1.0109,
      "step": 69110
    },
    {
      "epoch": 0.24224836766236635,
      "grad_norm": 3.34375,
      "learning_rate": 4.037371931915093e-05,
      "loss": 0.9762,
      "step": 69120
    },
    {
      "epoch": 0.24228341516926194,
      "grad_norm": 3.796875,
      "learning_rate": 4.037956050888445e-05,
      "loss": 0.9959,
      "step": 69130
    },
    {
      "epoch": 0.24231846267615753,
      "grad_norm": 3.140625,
      "learning_rate": 4.038540169861797e-05,
      "loss": 0.9331,
      "step": 69140
    },
    {
      "epoch": 0.24235351018305312,
      "grad_norm": 3.21875,
      "learning_rate": 4.03912428883515e-05,
      "loss": 0.9688,
      "step": 69150
    },
    {
      "epoch": 0.2423885576899487,
      "grad_norm": 3.65625,
      "learning_rate": 4.039708407808503e-05,
      "loss": 1.0764,
      "step": 69160
    },
    {
      "epoch": 0.24242360519684433,
      "grad_norm": 2.953125,
      "learning_rate": 4.040292526781855e-05,
      "loss": 0.9345,
      "step": 69170
    },
    {
      "epoch": 0.24245865270373992,
      "grad_norm": 3.984375,
      "learning_rate": 4.040876645755208e-05,
      "loss": 0.9912,
      "step": 69180
    },
    {
      "epoch": 0.24249370021063552,
      "grad_norm": 2.96875,
      "learning_rate": 4.04146076472856e-05,
      "loss": 0.9277,
      "step": 69190
    },
    {
      "epoch": 0.2425287477175311,
      "grad_norm": 3.28125,
      "learning_rate": 4.042044883701913e-05,
      "loss": 0.9837,
      "step": 69200
    },
    {
      "epoch": 0.2425637952244267,
      "grad_norm": 3.25,
      "learning_rate": 4.042629002675265e-05,
      "loss": 1.1058,
      "step": 69210
    },
    {
      "epoch": 0.24259884273132232,
      "grad_norm": 2.9375,
      "learning_rate": 4.043213121648617e-05,
      "loss": 0.9745,
      "step": 69220
    },
    {
      "epoch": 0.2426338902382179,
      "grad_norm": 3.09375,
      "learning_rate": 4.04379724062197e-05,
      "loss": 0.981,
      "step": 69230
    },
    {
      "epoch": 0.2426689377451135,
      "grad_norm": 3.28125,
      "learning_rate": 4.044381359595323e-05,
      "loss": 0.9892,
      "step": 69240
    },
    {
      "epoch": 0.2427039852520091,
      "grad_norm": 3.0,
      "learning_rate": 4.044965478568675e-05,
      "loss": 1.0563,
      "step": 69250
    },
    {
      "epoch": 0.24273903275890468,
      "grad_norm": 2.890625,
      "learning_rate": 4.045549597542028e-05,
      "loss": 1.0764,
      "step": 69260
    },
    {
      "epoch": 0.2427740802658003,
      "grad_norm": 2.90625,
      "learning_rate": 4.04613371651538e-05,
      "loss": 0.9914,
      "step": 69270
    },
    {
      "epoch": 0.2428091277726959,
      "grad_norm": 3.359375,
      "learning_rate": 4.046717835488733e-05,
      "loss": 1.0274,
      "step": 69280
    },
    {
      "epoch": 0.24284417527959148,
      "grad_norm": 3.34375,
      "learning_rate": 4.047301954462085e-05,
      "loss": 1.0301,
      "step": 69290
    },
    {
      "epoch": 0.24287922278648708,
      "grad_norm": 3.28125,
      "learning_rate": 4.047886073435437e-05,
      "loss": 0.982,
      "step": 69300
    },
    {
      "epoch": 0.24291427029338267,
      "grad_norm": 2.90625,
      "learning_rate": 4.04847019240879e-05,
      "loss": 0.9567,
      "step": 69310
    },
    {
      "epoch": 0.24294931780027829,
      "grad_norm": 3.46875,
      "learning_rate": 4.049054311382143e-05,
      "loss": 1.0002,
      "step": 69320
    },
    {
      "epoch": 0.24298436530717388,
      "grad_norm": 3.28125,
      "learning_rate": 4.0496384303554955e-05,
      "loss": 0.9509,
      "step": 69330
    },
    {
      "epoch": 0.24301941281406947,
      "grad_norm": 3.0625,
      "learning_rate": 4.0502225493288477e-05,
      "loss": 1.0188,
      "step": 69340
    },
    {
      "epoch": 0.24305446032096506,
      "grad_norm": 3.5,
      "learning_rate": 4.0508066683022e-05,
      "loss": 1.0612,
      "step": 69350
    },
    {
      "epoch": 0.24308950782786065,
      "grad_norm": 3.1875,
      "learning_rate": 4.0513907872755526e-05,
      "loss": 0.9876,
      "step": 69360
    },
    {
      "epoch": 0.24312455533475627,
      "grad_norm": 3.03125,
      "learning_rate": 4.051974906248905e-05,
      "loss": 0.966,
      "step": 69370
    },
    {
      "epoch": 0.24315960284165186,
      "grad_norm": 3.171875,
      "learning_rate": 4.0525590252222576e-05,
      "loss": 0.9528,
      "step": 69380
    },
    {
      "epoch": 0.24319465034854745,
      "grad_norm": 3.140625,
      "learning_rate": 4.05314314419561e-05,
      "loss": 0.987,
      "step": 69390
    },
    {
      "epoch": 0.24322969785544304,
      "grad_norm": 3.390625,
      "learning_rate": 4.0537272631689626e-05,
      "loss": 0.9681,
      "step": 69400
    },
    {
      "epoch": 0.24326474536233866,
      "grad_norm": 3.296875,
      "learning_rate": 4.0543113821423154e-05,
      "loss": 0.9914,
      "step": 69410
    },
    {
      "epoch": 0.24329979286923425,
      "grad_norm": 2.984375,
      "learning_rate": 4.0548955011156675e-05,
      "loss": 1.0422,
      "step": 69420
    },
    {
      "epoch": 0.24333484037612985,
      "grad_norm": 3.15625,
      "learning_rate": 4.05547962008902e-05,
      "loss": 0.9855,
      "step": 69430
    },
    {
      "epoch": 0.24336988788302544,
      "grad_norm": 3.109375,
      "learning_rate": 4.0560637390623725e-05,
      "loss": 0.9541,
      "step": 69440
    },
    {
      "epoch": 0.24340493538992103,
      "grad_norm": 3.296875,
      "learning_rate": 4.0566478580357246e-05,
      "loss": 1.033,
      "step": 69450
    },
    {
      "epoch": 0.24343998289681665,
      "grad_norm": 3.4375,
      "learning_rate": 4.0572319770090775e-05,
      "loss": 1.042,
      "step": 69460
    },
    {
      "epoch": 0.24347503040371224,
      "grad_norm": 3.296875,
      "learning_rate": 4.0578160959824296e-05,
      "loss": 0.9736,
      "step": 69470
    },
    {
      "epoch": 0.24351007791060783,
      "grad_norm": 3.515625,
      "learning_rate": 4.0584002149557824e-05,
      "loss": 0.9861,
      "step": 69480
    },
    {
      "epoch": 0.24354512541750342,
      "grad_norm": 3.140625,
      "learning_rate": 4.058984333929135e-05,
      "loss": 0.9528,
      "step": 69490
    },
    {
      "epoch": 0.243580172924399,
      "grad_norm": 2.796875,
      "learning_rate": 4.0595684529024874e-05,
      "loss": 1.0361,
      "step": 69500
    },
    {
      "epoch": 0.24361522043129463,
      "grad_norm": 3.671875,
      "learning_rate": 4.0601525718758395e-05,
      "loss": 1.0779,
      "step": 69510
    },
    {
      "epoch": 0.24365026793819022,
      "grad_norm": 3.0,
      "learning_rate": 4.0607366908491924e-05,
      "loss": 0.9767,
      "step": 69520
    },
    {
      "epoch": 0.24368531544508581,
      "grad_norm": 2.875,
      "learning_rate": 4.0613208098225445e-05,
      "loss": 0.9679,
      "step": 69530
    },
    {
      "epoch": 0.2437203629519814,
      "grad_norm": 3.046875,
      "learning_rate": 4.0619049287958973e-05,
      "loss": 0.983,
      "step": 69540
    },
    {
      "epoch": 0.243755410458877,
      "grad_norm": 3.1875,
      "learning_rate": 4.06248904776925e-05,
      "loss": 0.8909,
      "step": 69550
    },
    {
      "epoch": 0.24379045796577262,
      "grad_norm": 3.0625,
      "learning_rate": 4.063073166742602e-05,
      "loss": 0.9929,
      "step": 69560
    },
    {
      "epoch": 0.2438255054726682,
      "grad_norm": 3.625,
      "learning_rate": 4.063657285715955e-05,
      "loss": 0.9852,
      "step": 69570
    },
    {
      "epoch": 0.2438605529795638,
      "grad_norm": 3.046875,
      "learning_rate": 4.064241404689307e-05,
      "loss": 1.008,
      "step": 69580
    },
    {
      "epoch": 0.2438956004864594,
      "grad_norm": 2.78125,
      "learning_rate": 4.06482552366266e-05,
      "loss": 1.0394,
      "step": 69590
    },
    {
      "epoch": 0.24393064799335498,
      "grad_norm": 3.21875,
      "learning_rate": 4.065409642636012e-05,
      "loss": 0.9331,
      "step": 69600
    },
    {
      "epoch": 0.2439656955002506,
      "grad_norm": 3.46875,
      "learning_rate": 4.0659937616093644e-05,
      "loss": 0.9913,
      "step": 69610
    },
    {
      "epoch": 0.2440007430071462,
      "grad_norm": 2.875,
      "learning_rate": 4.066577880582717e-05,
      "loss": 0.8904,
      "step": 69620
    },
    {
      "epoch": 0.24403579051404178,
      "grad_norm": 3.1875,
      "learning_rate": 4.06716199955607e-05,
      "loss": 1.0189,
      "step": 69630
    },
    {
      "epoch": 0.24407083802093738,
      "grad_norm": 3.1875,
      "learning_rate": 4.067746118529422e-05,
      "loss": 0.9697,
      "step": 69640
    },
    {
      "epoch": 0.24410588552783297,
      "grad_norm": 3.4375,
      "learning_rate": 4.068330237502775e-05,
      "loss": 0.9912,
      "step": 69650
    },
    {
      "epoch": 0.24414093303472859,
      "grad_norm": 2.9375,
      "learning_rate": 4.068914356476127e-05,
      "loss": 0.9304,
      "step": 69660
    },
    {
      "epoch": 0.24417598054162418,
      "grad_norm": 3.265625,
      "learning_rate": 4.06949847544948e-05,
      "loss": 0.9369,
      "step": 69670
    },
    {
      "epoch": 0.24421102804851977,
      "grad_norm": 2.890625,
      "learning_rate": 4.070082594422832e-05,
      "loss": 0.9398,
      "step": 69680
    },
    {
      "epoch": 0.24424607555541536,
      "grad_norm": 3.25,
      "learning_rate": 4.070666713396184e-05,
      "loss": 0.9863,
      "step": 69690
    },
    {
      "epoch": 0.24428112306231095,
      "grad_norm": 3.046875,
      "learning_rate": 4.071250832369537e-05,
      "loss": 1.0119,
      "step": 69700
    },
    {
      "epoch": 0.24431617056920657,
      "grad_norm": 3.234375,
      "learning_rate": 4.07183495134289e-05,
      "loss": 0.9988,
      "step": 69710
    },
    {
      "epoch": 0.24435121807610216,
      "grad_norm": 3.375,
      "learning_rate": 4.072419070316242e-05,
      "loss": 0.9936,
      "step": 69720
    },
    {
      "epoch": 0.24438626558299775,
      "grad_norm": 3.3125,
      "learning_rate": 4.073003189289595e-05,
      "loss": 1.0288,
      "step": 69730
    },
    {
      "epoch": 0.24442131308989334,
      "grad_norm": 3.609375,
      "learning_rate": 4.073587308262947e-05,
      "loss": 1.0,
      "step": 69740
    },
    {
      "epoch": 0.24445636059678894,
      "grad_norm": 3.03125,
      "learning_rate": 4.0741714272363e-05,
      "loss": 0.9242,
      "step": 69750
    },
    {
      "epoch": 0.24449140810368455,
      "grad_norm": 3.453125,
      "learning_rate": 4.074755546209652e-05,
      "loss": 0.978,
      "step": 69760
    },
    {
      "epoch": 0.24452645561058015,
      "grad_norm": 3.171875,
      "learning_rate": 4.075339665183004e-05,
      "loss": 0.9745,
      "step": 69770
    },
    {
      "epoch": 0.24456150311747574,
      "grad_norm": 3.03125,
      "learning_rate": 4.075923784156357e-05,
      "loss": 1.0244,
      "step": 69780
    },
    {
      "epoch": 0.24459655062437133,
      "grad_norm": 3.421875,
      "learning_rate": 4.07650790312971e-05,
      "loss": 1.0317,
      "step": 69790
    },
    {
      "epoch": 0.24463159813126692,
      "grad_norm": 3.1875,
      "learning_rate": 4.0770920221030626e-05,
      "loss": 1.0426,
      "step": 69800
    },
    {
      "epoch": 0.24466664563816254,
      "grad_norm": 3.046875,
      "learning_rate": 4.077676141076415e-05,
      "loss": 1.0393,
      "step": 69810
    },
    {
      "epoch": 0.24470169314505813,
      "grad_norm": 2.78125,
      "learning_rate": 4.078260260049767e-05,
      "loss": 1.06,
      "step": 69820
    },
    {
      "epoch": 0.24473674065195372,
      "grad_norm": 3.109375,
      "learning_rate": 4.07884437902312e-05,
      "loss": 1.0459,
      "step": 69830
    },
    {
      "epoch": 0.2447717881588493,
      "grad_norm": 3.03125,
      "learning_rate": 4.079428497996472e-05,
      "loss": 0.9954,
      "step": 69840
    },
    {
      "epoch": 0.2448068356657449,
      "grad_norm": 3.1875,
      "learning_rate": 4.080012616969825e-05,
      "loss": 1.0233,
      "step": 69850
    },
    {
      "epoch": 0.24484188317264052,
      "grad_norm": 3.09375,
      "learning_rate": 4.0805967359431775e-05,
      "loss": 1.0117,
      "step": 69860
    },
    {
      "epoch": 0.24487693067953611,
      "grad_norm": 3.046875,
      "learning_rate": 4.08118085491653e-05,
      "loss": 0.9672,
      "step": 69870
    },
    {
      "epoch": 0.2449119781864317,
      "grad_norm": 2.78125,
      "learning_rate": 4.0817649738898825e-05,
      "loss": 0.9441,
      "step": 69880
    },
    {
      "epoch": 0.2449470256933273,
      "grad_norm": 2.71875,
      "learning_rate": 4.0823490928632346e-05,
      "loss": 0.9072,
      "step": 69890
    },
    {
      "epoch": 0.2449820732002229,
      "grad_norm": 4.5625,
      "learning_rate": 4.082933211836587e-05,
      "loss": 0.9761,
      "step": 69900
    },
    {
      "epoch": 0.2450171207071185,
      "grad_norm": 2.9375,
      "learning_rate": 4.0835173308099396e-05,
      "loss": 0.9961,
      "step": 69910
    },
    {
      "epoch": 0.2450521682140141,
      "grad_norm": 3.59375,
      "learning_rate": 4.084101449783292e-05,
      "loss": 1.0073,
      "step": 69920
    },
    {
      "epoch": 0.2450872157209097,
      "grad_norm": 3.328125,
      "learning_rate": 4.0846855687566446e-05,
      "loss": 0.9377,
      "step": 69930
    },
    {
      "epoch": 0.24512226322780528,
      "grad_norm": 3.4375,
      "learning_rate": 4.0852696877299974e-05,
      "loss": 0.902,
      "step": 69940
    },
    {
      "epoch": 0.2451573107347009,
      "grad_norm": 3.015625,
      "learning_rate": 4.0858538067033495e-05,
      "loss": 0.9807,
      "step": 69950
    },
    {
      "epoch": 0.2451923582415965,
      "grad_norm": 3.390625,
      "learning_rate": 4.0864379256767024e-05,
      "loss": 0.9879,
      "step": 69960
    },
    {
      "epoch": 0.24522740574849208,
      "grad_norm": 3.4375,
      "learning_rate": 4.0870220446500545e-05,
      "loss": 0.9699,
      "step": 69970
    },
    {
      "epoch": 0.24526245325538767,
      "grad_norm": 3.15625,
      "learning_rate": 4.0876061636234067e-05,
      "loss": 0.8958,
      "step": 69980
    },
    {
      "epoch": 0.24529750076228327,
      "grad_norm": 3.375,
      "learning_rate": 4.0881902825967595e-05,
      "loss": 0.9731,
      "step": 69990
    },
    {
      "epoch": 0.24533254826917889,
      "grad_norm": 3.5,
      "learning_rate": 4.0887744015701116e-05,
      "loss": 1.063,
      "step": 70000
    },
    {
      "epoch": 0.24533254826917889,
      "eval_loss": 0.9233139157295227,
      "eval_runtime": 548.6643,
      "eval_samples_per_second": 693.386,
      "eval_steps_per_second": 57.782,
      "step": 70000
    },
    {
      "epoch": 0.24536759577607448,
      "grad_norm": 3.703125,
      "learning_rate": 4.0893585205434644e-05,
      "loss": 0.9741,
      "step": 70010
    },
    {
      "epoch": 0.24540264328297007,
      "grad_norm": 3.359375,
      "learning_rate": 4.089942639516817e-05,
      "loss": 0.9923,
      "step": 70020
    },
    {
      "epoch": 0.24543769078986566,
      "grad_norm": 2.65625,
      "learning_rate": 4.0905267584901694e-05,
      "loss": 0.9499,
      "step": 70030
    },
    {
      "epoch": 0.24547273829676125,
      "grad_norm": 3.140625,
      "learning_rate": 4.091110877463522e-05,
      "loss": 0.8751,
      "step": 70040
    },
    {
      "epoch": 0.24550778580365687,
      "grad_norm": 3.140625,
      "learning_rate": 4.0916949964368744e-05,
      "loss": 1.0201,
      "step": 70050
    },
    {
      "epoch": 0.24554283331055246,
      "grad_norm": 3.140625,
      "learning_rate": 4.092279115410227e-05,
      "loss": 1.084,
      "step": 70060
    },
    {
      "epoch": 0.24557788081744805,
      "grad_norm": 3.3125,
      "learning_rate": 4.0928632343835794e-05,
      "loss": 0.8796,
      "step": 70070
    },
    {
      "epoch": 0.24561292832434364,
      "grad_norm": 2.96875,
      "learning_rate": 4.0934473533569315e-05,
      "loss": 0.9339,
      "step": 70080
    },
    {
      "epoch": 0.24564797583123923,
      "grad_norm": 3.234375,
      "learning_rate": 4.094031472330284e-05,
      "loss": 0.9821,
      "step": 70090
    },
    {
      "epoch": 0.24568302333813485,
      "grad_norm": 3.28125,
      "learning_rate": 4.094615591303637e-05,
      "loss": 0.991,
      "step": 70100
    },
    {
      "epoch": 0.24571807084503045,
      "grad_norm": 3.109375,
      "learning_rate": 4.095199710276989e-05,
      "loss": 1.0963,
      "step": 70110
    },
    {
      "epoch": 0.24575311835192604,
      "grad_norm": 3.34375,
      "learning_rate": 4.095783829250342e-05,
      "loss": 1.0578,
      "step": 70120
    },
    {
      "epoch": 0.24578816585882163,
      "grad_norm": 2.75,
      "learning_rate": 4.096367948223694e-05,
      "loss": 0.9923,
      "step": 70130
    },
    {
      "epoch": 0.24582321336571722,
      "grad_norm": 2.90625,
      "learning_rate": 4.096952067197047e-05,
      "loss": 0.9289,
      "step": 70140
    },
    {
      "epoch": 0.24585826087261284,
      "grad_norm": 3.265625,
      "learning_rate": 4.097536186170399e-05,
      "loss": 0.9293,
      "step": 70150
    },
    {
      "epoch": 0.24589330837950843,
      "grad_norm": 3.609375,
      "learning_rate": 4.0981203051437514e-05,
      "loss": 0.9749,
      "step": 70160
    },
    {
      "epoch": 0.24592835588640402,
      "grad_norm": 3.109375,
      "learning_rate": 4.098704424117104e-05,
      "loss": 1.0045,
      "step": 70170
    },
    {
      "epoch": 0.2459634033932996,
      "grad_norm": 3.109375,
      "learning_rate": 4.099288543090457e-05,
      "loss": 0.9706,
      "step": 70180
    },
    {
      "epoch": 0.2459984509001952,
      "grad_norm": 2.640625,
      "learning_rate": 4.099872662063809e-05,
      "loss": 0.9398,
      "step": 70190
    },
    {
      "epoch": 0.24603349840709082,
      "grad_norm": 3.734375,
      "learning_rate": 4.100456781037162e-05,
      "loss": 0.9468,
      "step": 70200
    },
    {
      "epoch": 0.24606854591398641,
      "grad_norm": 3.34375,
      "learning_rate": 4.101040900010514e-05,
      "loss": 1.0473,
      "step": 70210
    },
    {
      "epoch": 0.246103593420882,
      "grad_norm": 3.1875,
      "learning_rate": 4.101625018983867e-05,
      "loss": 1.0676,
      "step": 70220
    },
    {
      "epoch": 0.2461386409277776,
      "grad_norm": 3.578125,
      "learning_rate": 4.102209137957219e-05,
      "loss": 0.9713,
      "step": 70230
    },
    {
      "epoch": 0.2461736884346732,
      "grad_norm": 2.875,
      "learning_rate": 4.102793256930572e-05,
      "loss": 0.9756,
      "step": 70240
    },
    {
      "epoch": 0.2462087359415688,
      "grad_norm": 3.34375,
      "learning_rate": 4.103377375903925e-05,
      "loss": 0.9701,
      "step": 70250
    },
    {
      "epoch": 0.2462437834484644,
      "grad_norm": 3.484375,
      "learning_rate": 4.103961494877277e-05,
      "loss": 0.9773,
      "step": 70260
    },
    {
      "epoch": 0.24627883095536,
      "grad_norm": 3.203125,
      "learning_rate": 4.10454561385063e-05,
      "loss": 0.9672,
      "step": 70270
    },
    {
      "epoch": 0.24631387846225558,
      "grad_norm": 2.71875,
      "learning_rate": 4.105129732823982e-05,
      "loss": 1.0633,
      "step": 70280
    },
    {
      "epoch": 0.24634892596915117,
      "grad_norm": 3.765625,
      "learning_rate": 4.105713851797334e-05,
      "loss": 1.0255,
      "step": 70290
    },
    {
      "epoch": 0.2463839734760468,
      "grad_norm": 3.078125,
      "learning_rate": 4.106297970770687e-05,
      "loss": 0.9819,
      "step": 70300
    },
    {
      "epoch": 0.24641902098294238,
      "grad_norm": 2.828125,
      "learning_rate": 4.106882089744039e-05,
      "loss": 0.9427,
      "step": 70310
    },
    {
      "epoch": 0.24645406848983797,
      "grad_norm": 2.703125,
      "learning_rate": 4.107466208717392e-05,
      "loss": 0.9901,
      "step": 70320
    },
    {
      "epoch": 0.24648911599673357,
      "grad_norm": 3.265625,
      "learning_rate": 4.1080503276907446e-05,
      "loss": 0.973,
      "step": 70330
    },
    {
      "epoch": 0.24652416350362916,
      "grad_norm": 3.375,
      "learning_rate": 4.108634446664097e-05,
      "loss": 0.9847,
      "step": 70340
    },
    {
      "epoch": 0.24655921101052478,
      "grad_norm": 3.3125,
      "learning_rate": 4.1092185656374496e-05,
      "loss": 0.9758,
      "step": 70350
    },
    {
      "epoch": 0.24659425851742037,
      "grad_norm": 3.484375,
      "learning_rate": 4.109802684610802e-05,
      "loss": 1.0148,
      "step": 70360
    },
    {
      "epoch": 0.24662930602431596,
      "grad_norm": 2.984375,
      "learning_rate": 4.110386803584154e-05,
      "loss": 1.0056,
      "step": 70370
    },
    {
      "epoch": 0.24666435353121155,
      "grad_norm": 3.234375,
      "learning_rate": 4.110970922557507e-05,
      "loss": 0.9175,
      "step": 70380
    },
    {
      "epoch": 0.24669940103810714,
      "grad_norm": 3.09375,
      "learning_rate": 4.111555041530859e-05,
      "loss": 0.9943,
      "step": 70390
    },
    {
      "epoch": 0.24673444854500276,
      "grad_norm": 3.25,
      "learning_rate": 4.112139160504212e-05,
      "loss": 0.9787,
      "step": 70400
    },
    {
      "epoch": 0.24676949605189835,
      "grad_norm": 3.109375,
      "learning_rate": 4.1127232794775645e-05,
      "loss": 0.9537,
      "step": 70410
    },
    {
      "epoch": 0.24680454355879394,
      "grad_norm": 3.34375,
      "learning_rate": 4.1133073984509166e-05,
      "loss": 0.9857,
      "step": 70420
    },
    {
      "epoch": 0.24683959106568953,
      "grad_norm": 3.140625,
      "learning_rate": 4.1138915174242695e-05,
      "loss": 1.06,
      "step": 70430
    },
    {
      "epoch": 0.24687463857258513,
      "grad_norm": 3.59375,
      "learning_rate": 4.1144756363976216e-05,
      "loss": 0.9553,
      "step": 70440
    },
    {
      "epoch": 0.24690968607948074,
      "grad_norm": 3.0625,
      "learning_rate": 4.115059755370974e-05,
      "loss": 0.9604,
      "step": 70450
    },
    {
      "epoch": 0.24694473358637634,
      "grad_norm": 3.1875,
      "learning_rate": 4.1156438743443266e-05,
      "loss": 1.0242,
      "step": 70460
    },
    {
      "epoch": 0.24697978109327193,
      "grad_norm": 3.28125,
      "learning_rate": 4.116227993317679e-05,
      "loss": 0.8897,
      "step": 70470
    },
    {
      "epoch": 0.24701482860016752,
      "grad_norm": 3.1875,
      "learning_rate": 4.1168121122910316e-05,
      "loss": 1.0003,
      "step": 70480
    },
    {
      "epoch": 0.24704987610706314,
      "grad_norm": 3.390625,
      "learning_rate": 4.1173962312643844e-05,
      "loss": 0.9915,
      "step": 70490
    },
    {
      "epoch": 0.24708492361395873,
      "grad_norm": 3.140625,
      "learning_rate": 4.1179803502377365e-05,
      "loss": 0.9724,
      "step": 70500
    },
    {
      "epoch": 0.24711997112085432,
      "grad_norm": 3.453125,
      "learning_rate": 4.1185644692110893e-05,
      "loss": 0.9511,
      "step": 70510
    },
    {
      "epoch": 0.2471550186277499,
      "grad_norm": 3.46875,
      "learning_rate": 4.1191485881844415e-05,
      "loss": 1.0282,
      "step": 70520
    },
    {
      "epoch": 0.2471900661346455,
      "grad_norm": 3.4375,
      "learning_rate": 4.119732707157794e-05,
      "loss": 0.9263,
      "step": 70530
    },
    {
      "epoch": 0.24722511364154112,
      "grad_norm": 3.328125,
      "learning_rate": 4.1203168261311465e-05,
      "loss": 1.0261,
      "step": 70540
    },
    {
      "epoch": 0.2472601611484367,
      "grad_norm": 3.0,
      "learning_rate": 4.1209009451044986e-05,
      "loss": 0.9991,
      "step": 70550
    },
    {
      "epoch": 0.2472952086553323,
      "grad_norm": 2.921875,
      "learning_rate": 4.121485064077852e-05,
      "loss": 0.9723,
      "step": 70560
    },
    {
      "epoch": 0.2473302561622279,
      "grad_norm": 3.1875,
      "learning_rate": 4.122069183051204e-05,
      "loss": 1.0221,
      "step": 70570
    },
    {
      "epoch": 0.2473653036691235,
      "grad_norm": 3.171875,
      "learning_rate": 4.1226533020245564e-05,
      "loss": 0.8983,
      "step": 70580
    },
    {
      "epoch": 0.2474003511760191,
      "grad_norm": 3.03125,
      "learning_rate": 4.123237420997909e-05,
      "loss": 1.0042,
      "step": 70590
    },
    {
      "epoch": 0.2474353986829147,
      "grad_norm": 3.34375,
      "learning_rate": 4.1238215399712614e-05,
      "loss": 0.9811,
      "step": 70600
    },
    {
      "epoch": 0.2474704461898103,
      "grad_norm": 3.140625,
      "learning_rate": 4.124405658944614e-05,
      "loss": 0.9416,
      "step": 70610
    },
    {
      "epoch": 0.24750549369670588,
      "grad_norm": 4.4375,
      "learning_rate": 4.124989777917966e-05,
      "loss": 0.9945,
      "step": 70620
    },
    {
      "epoch": 0.24754054120360147,
      "grad_norm": 2.890625,
      "learning_rate": 4.1255738968913185e-05,
      "loss": 0.8947,
      "step": 70630
    },
    {
      "epoch": 0.2475755887104971,
      "grad_norm": 3.171875,
      "learning_rate": 4.126158015864672e-05,
      "loss": 0.9716,
      "step": 70640
    },
    {
      "epoch": 0.24761063621739268,
      "grad_norm": 3.25,
      "learning_rate": 4.126742134838024e-05,
      "loss": 0.9746,
      "step": 70650
    },
    {
      "epoch": 0.24764568372428827,
      "grad_norm": 2.96875,
      "learning_rate": 4.127326253811376e-05,
      "loss": 0.9409,
      "step": 70660
    },
    {
      "epoch": 0.24768073123118386,
      "grad_norm": 3.546875,
      "learning_rate": 4.127910372784729e-05,
      "loss": 0.9782,
      "step": 70670
    },
    {
      "epoch": 0.24771577873807946,
      "grad_norm": 3.015625,
      "learning_rate": 4.128494491758081e-05,
      "loss": 1.0245,
      "step": 70680
    },
    {
      "epoch": 0.24775082624497508,
      "grad_norm": 2.984375,
      "learning_rate": 4.129078610731434e-05,
      "loss": 0.982,
      "step": 70690
    },
    {
      "epoch": 0.24778587375187067,
      "grad_norm": 2.84375,
      "learning_rate": 4.129662729704786e-05,
      "loss": 0.9002,
      "step": 70700
    },
    {
      "epoch": 0.24782092125876626,
      "grad_norm": 3.1875,
      "learning_rate": 4.130246848678139e-05,
      "loss": 1.0576,
      "step": 70710
    },
    {
      "epoch": 0.24785596876566185,
      "grad_norm": 2.890625,
      "learning_rate": 4.130830967651492e-05,
      "loss": 0.9738,
      "step": 70720
    },
    {
      "epoch": 0.24789101627255744,
      "grad_norm": 3.078125,
      "learning_rate": 4.131415086624844e-05,
      "loss": 0.9852,
      "step": 70730
    },
    {
      "epoch": 0.24792606377945306,
      "grad_norm": 3.3125,
      "learning_rate": 4.131999205598197e-05,
      "loss": 1.0369,
      "step": 70740
    },
    {
      "epoch": 0.24796111128634865,
      "grad_norm": 3.0625,
      "learning_rate": 4.132583324571549e-05,
      "loss": 0.9995,
      "step": 70750
    },
    {
      "epoch": 0.24799615879324424,
      "grad_norm": 3.265625,
      "learning_rate": 4.133167443544901e-05,
      "loss": 1.0141,
      "step": 70760
    },
    {
      "epoch": 0.24803120630013983,
      "grad_norm": 3.171875,
      "learning_rate": 4.133751562518254e-05,
      "loss": 0.8985,
      "step": 70770
    },
    {
      "epoch": 0.24806625380703543,
      "grad_norm": 3.234375,
      "learning_rate": 4.134335681491606e-05,
      "loss": 1.0326,
      "step": 70780
    },
    {
      "epoch": 0.24810130131393104,
      "grad_norm": 3.296875,
      "learning_rate": 4.134919800464959e-05,
      "loss": 0.9933,
      "step": 70790
    },
    {
      "epoch": 0.24813634882082664,
      "grad_norm": 3.046875,
      "learning_rate": 4.135503919438312e-05,
      "loss": 0.927,
      "step": 70800
    },
    {
      "epoch": 0.24817139632772223,
      "grad_norm": 3.421875,
      "learning_rate": 4.136088038411664e-05,
      "loss": 1.0488,
      "step": 70810
    },
    {
      "epoch": 0.24820644383461782,
      "grad_norm": 3.125,
      "learning_rate": 4.136672157385017e-05,
      "loss": 0.9185,
      "step": 70820
    },
    {
      "epoch": 0.2482414913415134,
      "grad_norm": 3.0,
      "learning_rate": 4.137256276358369e-05,
      "loss": 0.9923,
      "step": 70830
    },
    {
      "epoch": 0.24827653884840903,
      "grad_norm": 2.5625,
      "learning_rate": 4.137840395331721e-05,
      "loss": 0.9256,
      "step": 70840
    },
    {
      "epoch": 0.24831158635530462,
      "grad_norm": 2.953125,
      "learning_rate": 4.138424514305074e-05,
      "loss": 0.9527,
      "step": 70850
    },
    {
      "epoch": 0.2483466338622002,
      "grad_norm": 2.96875,
      "learning_rate": 4.139008633278426e-05,
      "loss": 0.9624,
      "step": 70860
    },
    {
      "epoch": 0.2483816813690958,
      "grad_norm": 3.203125,
      "learning_rate": 4.139592752251779e-05,
      "loss": 1.0289,
      "step": 70870
    },
    {
      "epoch": 0.2484167288759914,
      "grad_norm": 2.859375,
      "learning_rate": 4.1401768712251316e-05,
      "loss": 0.9934,
      "step": 70880
    },
    {
      "epoch": 0.248451776382887,
      "grad_norm": 2.90625,
      "learning_rate": 4.140760990198484e-05,
      "loss": 0.9073,
      "step": 70890
    },
    {
      "epoch": 0.2484868238897826,
      "grad_norm": 3.171875,
      "learning_rate": 4.1413451091718366e-05,
      "loss": 1.0058,
      "step": 70900
    },
    {
      "epoch": 0.2485218713966782,
      "grad_norm": 2.84375,
      "learning_rate": 4.141929228145189e-05,
      "loss": 0.9999,
      "step": 70910
    },
    {
      "epoch": 0.2485569189035738,
      "grad_norm": 2.65625,
      "learning_rate": 4.1425133471185415e-05,
      "loss": 1.0135,
      "step": 70920
    },
    {
      "epoch": 0.24859196641046938,
      "grad_norm": 3.5625,
      "learning_rate": 4.143097466091894e-05,
      "loss": 1.0161,
      "step": 70930
    },
    {
      "epoch": 0.248627013917365,
      "grad_norm": 2.953125,
      "learning_rate": 4.143681585065246e-05,
      "loss": 0.9363,
      "step": 70940
    },
    {
      "epoch": 0.2486620614242606,
      "grad_norm": 2.703125,
      "learning_rate": 4.1442657040385993e-05,
      "loss": 0.9914,
      "step": 70950
    },
    {
      "epoch": 0.24869710893115618,
      "grad_norm": 3.171875,
      "learning_rate": 4.1448498230119515e-05,
      "loss": 1.0281,
      "step": 70960
    },
    {
      "epoch": 0.24873215643805177,
      "grad_norm": 3.453125,
      "learning_rate": 4.1454339419853036e-05,
      "loss": 0.9976,
      "step": 70970
    },
    {
      "epoch": 0.24876720394494736,
      "grad_norm": 3.578125,
      "learning_rate": 4.1460180609586565e-05,
      "loss": 1.0476,
      "step": 70980
    },
    {
      "epoch": 0.24880225145184298,
      "grad_norm": 2.984375,
      "learning_rate": 4.1466021799320086e-05,
      "loss": 1.0529,
      "step": 70990
    },
    {
      "epoch": 0.24883729895873857,
      "grad_norm": 3.125,
      "learning_rate": 4.1471862989053614e-05,
      "loss": 0.9635,
      "step": 71000
    },
    {
      "epoch": 0.24887234646563416,
      "grad_norm": 3.09375,
      "learning_rate": 4.1477704178787136e-05,
      "loss": 0.9868,
      "step": 71010
    },
    {
      "epoch": 0.24890739397252976,
      "grad_norm": 2.484375,
      "learning_rate": 4.148354536852066e-05,
      "loss": 0.9324,
      "step": 71020
    },
    {
      "epoch": 0.24894244147942537,
      "grad_norm": 3.0625,
      "learning_rate": 4.148938655825419e-05,
      "loss": 0.9866,
      "step": 71030
    },
    {
      "epoch": 0.24897748898632097,
      "grad_norm": 3.046875,
      "learning_rate": 4.1495227747987714e-05,
      "loss": 1.0388,
      "step": 71040
    },
    {
      "epoch": 0.24901253649321656,
      "grad_norm": 3.1875,
      "learning_rate": 4.1501068937721235e-05,
      "loss": 0.9947,
      "step": 71050
    },
    {
      "epoch": 0.24904758400011215,
      "grad_norm": 2.78125,
      "learning_rate": 4.150691012745476e-05,
      "loss": 0.9328,
      "step": 71060
    },
    {
      "epoch": 0.24908263150700774,
      "grad_norm": 3.296875,
      "learning_rate": 4.1512751317188285e-05,
      "loss": 0.9842,
      "step": 71070
    },
    {
      "epoch": 0.24911767901390336,
      "grad_norm": 3.765625,
      "learning_rate": 4.151859250692181e-05,
      "loss": 0.9845,
      "step": 71080
    },
    {
      "epoch": 0.24915272652079895,
      "grad_norm": 2.859375,
      "learning_rate": 4.1524433696655334e-05,
      "loss": 0.9835,
      "step": 71090
    },
    {
      "epoch": 0.24918777402769454,
      "grad_norm": 3.265625,
      "learning_rate": 4.1530274886388856e-05,
      "loss": 1.0158,
      "step": 71100
    },
    {
      "epoch": 0.24922282153459013,
      "grad_norm": 2.8125,
      "learning_rate": 4.153611607612239e-05,
      "loss": 0.903,
      "step": 71110
    },
    {
      "epoch": 0.24925786904148572,
      "grad_norm": 3.140625,
      "learning_rate": 4.154195726585591e-05,
      "loss": 1.0011,
      "step": 71120
    },
    {
      "epoch": 0.24929291654838134,
      "grad_norm": 3.3125,
      "learning_rate": 4.154779845558944e-05,
      "loss": 0.9912,
      "step": 71130
    },
    {
      "epoch": 0.24932796405527693,
      "grad_norm": 3.390625,
      "learning_rate": 4.155363964532296e-05,
      "loss": 0.9724,
      "step": 71140
    },
    {
      "epoch": 0.24936301156217253,
      "grad_norm": 3.28125,
      "learning_rate": 4.1559480835056484e-05,
      "loss": 0.8814,
      "step": 71150
    },
    {
      "epoch": 0.24939805906906812,
      "grad_norm": 3.109375,
      "learning_rate": 4.156532202479001e-05,
      "loss": 1.0437,
      "step": 71160
    },
    {
      "epoch": 0.2494331065759637,
      "grad_norm": 3.21875,
      "learning_rate": 4.157116321452353e-05,
      "loss": 1.0031,
      "step": 71170
    },
    {
      "epoch": 0.24946815408285933,
      "grad_norm": 3.6875,
      "learning_rate": 4.157700440425706e-05,
      "loss": 1.0652,
      "step": 71180
    },
    {
      "epoch": 0.24950320158975492,
      "grad_norm": 3.484375,
      "learning_rate": 4.158284559399059e-05,
      "loss": 1.0212,
      "step": 71190
    },
    {
      "epoch": 0.2495382490966505,
      "grad_norm": 3.546875,
      "learning_rate": 4.158868678372411e-05,
      "loss": 0.9581,
      "step": 71200
    },
    {
      "epoch": 0.2495732966035461,
      "grad_norm": 2.96875,
      "learning_rate": 4.159452797345764e-05,
      "loss": 0.9397,
      "step": 71210
    },
    {
      "epoch": 0.2496083441104417,
      "grad_norm": 2.890625,
      "learning_rate": 4.160036916319116e-05,
      "loss": 1.0145,
      "step": 71220
    },
    {
      "epoch": 0.2496433916173373,
      "grad_norm": 2.96875,
      "learning_rate": 4.160621035292468e-05,
      "loss": 1.0246,
      "step": 71230
    },
    {
      "epoch": 0.2496784391242329,
      "grad_norm": 3.46875,
      "learning_rate": 4.161205154265821e-05,
      "loss": 0.9704,
      "step": 71240
    },
    {
      "epoch": 0.2497134866311285,
      "grad_norm": 3.109375,
      "learning_rate": 4.161789273239173e-05,
      "loss": 0.9877,
      "step": 71250
    },
    {
      "epoch": 0.2497485341380241,
      "grad_norm": 2.984375,
      "learning_rate": 4.162373392212526e-05,
      "loss": 0.994,
      "step": 71260
    },
    {
      "epoch": 0.24978358164491968,
      "grad_norm": 3.171875,
      "learning_rate": 4.162957511185879e-05,
      "loss": 0.9466,
      "step": 71270
    },
    {
      "epoch": 0.2498186291518153,
      "grad_norm": 2.84375,
      "learning_rate": 4.163541630159231e-05,
      "loss": 0.9637,
      "step": 71280
    },
    {
      "epoch": 0.2498536766587109,
      "grad_norm": 3.234375,
      "learning_rate": 4.164125749132584e-05,
      "loss": 0.9855,
      "step": 71290
    },
    {
      "epoch": 0.24988872416560648,
      "grad_norm": 3.09375,
      "learning_rate": 4.164709868105936e-05,
      "loss": 1.0075,
      "step": 71300
    },
    {
      "epoch": 0.24992377167250207,
      "grad_norm": 3.46875,
      "learning_rate": 4.165293987079288e-05,
      "loss": 0.9865,
      "step": 71310
    },
    {
      "epoch": 0.24995881917939766,
      "grad_norm": 2.953125,
      "learning_rate": 4.165878106052641e-05,
      "loss": 1.0132,
      "step": 71320
    },
    {
      "epoch": 0.24999386668629328,
      "grad_norm": 3.046875,
      "learning_rate": 4.166462225025993e-05,
      "loss": 0.9444,
      "step": 71330
    },
    {
      "epoch": 0.25002891419318884,
      "grad_norm": 2.75,
      "learning_rate": 4.1670463439993466e-05,
      "loss": 1.0322,
      "step": 71340
    },
    {
      "epoch": 0.2500639617000845,
      "grad_norm": 3.328125,
      "learning_rate": 4.167630462972699e-05,
      "loss": 0.9328,
      "step": 71350
    },
    {
      "epoch": 0.2500990092069801,
      "grad_norm": 2.984375,
      "learning_rate": 4.168214581946051e-05,
      "loss": 0.9162,
      "step": 71360
    },
    {
      "epoch": 0.2501340567138757,
      "grad_norm": 3.671875,
      "learning_rate": 4.168798700919404e-05,
      "loss": 0.9989,
      "step": 71370
    },
    {
      "epoch": 0.25016910422077127,
      "grad_norm": 2.96875,
      "learning_rate": 4.169382819892756e-05,
      "loss": 1.0501,
      "step": 71380
    },
    {
      "epoch": 0.25020415172766686,
      "grad_norm": 3.203125,
      "learning_rate": 4.1699669388661087e-05,
      "loss": 1.0292,
      "step": 71390
    },
    {
      "epoch": 0.25023919923456245,
      "grad_norm": 3.203125,
      "learning_rate": 4.170551057839461e-05,
      "loss": 1.0688,
      "step": 71400
    },
    {
      "epoch": 0.25027424674145804,
      "grad_norm": 3.328125,
      "learning_rate": 4.171135176812813e-05,
      "loss": 1.0119,
      "step": 71410
    },
    {
      "epoch": 0.25030929424835363,
      "grad_norm": 3.125,
      "learning_rate": 4.1717192957861664e-05,
      "loss": 0.93,
      "step": 71420
    },
    {
      "epoch": 0.2503443417552492,
      "grad_norm": 3.53125,
      "learning_rate": 4.1723034147595186e-05,
      "loss": 1.0229,
      "step": 71430
    },
    {
      "epoch": 0.2503793892621448,
      "grad_norm": 3.6875,
      "learning_rate": 4.172887533732871e-05,
      "loss": 1.0437,
      "step": 71440
    },
    {
      "epoch": 0.25041443676904046,
      "grad_norm": 3.265625,
      "learning_rate": 4.1734716527062236e-05,
      "loss": 0.9331,
      "step": 71450
    },
    {
      "epoch": 0.25044948427593605,
      "grad_norm": 3.578125,
      "learning_rate": 4.174055771679576e-05,
      "loss": 1.0179,
      "step": 71460
    },
    {
      "epoch": 0.25048453178283164,
      "grad_norm": 3.390625,
      "learning_rate": 4.1746398906529285e-05,
      "loss": 0.9693,
      "step": 71470
    },
    {
      "epoch": 0.25051957928972723,
      "grad_norm": 3.015625,
      "learning_rate": 4.175224009626281e-05,
      "loss": 0.907,
      "step": 71480
    },
    {
      "epoch": 0.2505546267966228,
      "grad_norm": 2.984375,
      "learning_rate": 4.175808128599633e-05,
      "loss": 1.0223,
      "step": 71490
    },
    {
      "epoch": 0.2505896743035184,
      "grad_norm": 3.515625,
      "learning_rate": 4.176392247572986e-05,
      "loss": 0.9687,
      "step": 71500
    },
    {
      "epoch": 0.250624721810414,
      "grad_norm": 2.984375,
      "learning_rate": 4.1769763665463385e-05,
      "loss": 0.9944,
      "step": 71510
    },
    {
      "epoch": 0.2506597693173096,
      "grad_norm": 3.015625,
      "learning_rate": 4.1775604855196906e-05,
      "loss": 0.9994,
      "step": 71520
    },
    {
      "epoch": 0.2506948168242052,
      "grad_norm": 3.25,
      "learning_rate": 4.1781446044930434e-05,
      "loss": 1.042,
      "step": 71530
    },
    {
      "epoch": 0.2507298643311008,
      "grad_norm": 3.359375,
      "learning_rate": 4.1787287234663956e-05,
      "loss": 1.0856,
      "step": 71540
    },
    {
      "epoch": 0.25076491183799643,
      "grad_norm": 3.0,
      "learning_rate": 4.1793128424397484e-05,
      "loss": 0.9578,
      "step": 71550
    },
    {
      "epoch": 0.250799959344892,
      "grad_norm": 3.125,
      "learning_rate": 4.1798969614131006e-05,
      "loss": 0.9926,
      "step": 71560
    },
    {
      "epoch": 0.2508350068517876,
      "grad_norm": 3.34375,
      "learning_rate": 4.1804810803864534e-05,
      "loss": 1.0082,
      "step": 71570
    },
    {
      "epoch": 0.2508700543586832,
      "grad_norm": 3.0,
      "learning_rate": 4.181065199359806e-05,
      "loss": 1.0453,
      "step": 71580
    },
    {
      "epoch": 0.2509051018655788,
      "grad_norm": 3.078125,
      "learning_rate": 4.1816493183331583e-05,
      "loss": 0.9441,
      "step": 71590
    },
    {
      "epoch": 0.2509401493724744,
      "grad_norm": 3.796875,
      "learning_rate": 4.182233437306511e-05,
      "loss": 0.9746,
      "step": 71600
    },
    {
      "epoch": 0.25097519687937,
      "grad_norm": 3.03125,
      "learning_rate": 4.182817556279863e-05,
      "loss": 1.0083,
      "step": 71610
    },
    {
      "epoch": 0.25101024438626557,
      "grad_norm": 3.109375,
      "learning_rate": 4.1834016752532155e-05,
      "loss": 1.0087,
      "step": 71620
    },
    {
      "epoch": 0.25104529189316116,
      "grad_norm": 2.90625,
      "learning_rate": 4.183985794226568e-05,
      "loss": 1.0618,
      "step": 71630
    },
    {
      "epoch": 0.25108033940005675,
      "grad_norm": 3.234375,
      "learning_rate": 4.1845699131999204e-05,
      "loss": 1.02,
      "step": 71640
    },
    {
      "epoch": 0.2511153869069524,
      "grad_norm": 2.859375,
      "learning_rate": 4.185154032173273e-05,
      "loss": 0.9918,
      "step": 71650
    },
    {
      "epoch": 0.251150434413848,
      "grad_norm": 3.09375,
      "learning_rate": 4.185738151146626e-05,
      "loss": 0.9964,
      "step": 71660
    },
    {
      "epoch": 0.2511854819207436,
      "grad_norm": 3.21875,
      "learning_rate": 4.186322270119978e-05,
      "loss": 0.9531,
      "step": 71670
    },
    {
      "epoch": 0.25122052942763917,
      "grad_norm": 3.078125,
      "learning_rate": 4.186906389093331e-05,
      "loss": 1.0441,
      "step": 71680
    },
    {
      "epoch": 0.25125557693453476,
      "grad_norm": 2.9375,
      "learning_rate": 4.187490508066683e-05,
      "loss": 0.9873,
      "step": 71690
    },
    {
      "epoch": 0.25129062444143035,
      "grad_norm": 3.40625,
      "learning_rate": 4.188074627040035e-05,
      "loss": 1.0899,
      "step": 71700
    },
    {
      "epoch": 0.25132567194832595,
      "grad_norm": 2.859375,
      "learning_rate": 4.188658746013388e-05,
      "loss": 0.9956,
      "step": 71710
    },
    {
      "epoch": 0.25136071945522154,
      "grad_norm": 3.515625,
      "learning_rate": 4.18924286498674e-05,
      "loss": 1.0471,
      "step": 71720
    },
    {
      "epoch": 0.25139576696211713,
      "grad_norm": 3.3125,
      "learning_rate": 4.189826983960093e-05,
      "loss": 1.018,
      "step": 71730
    },
    {
      "epoch": 0.2514308144690127,
      "grad_norm": 3.546875,
      "learning_rate": 4.190411102933446e-05,
      "loss": 0.9622,
      "step": 71740
    },
    {
      "epoch": 0.25146586197590837,
      "grad_norm": 2.921875,
      "learning_rate": 4.190995221906798e-05,
      "loss": 1.0703,
      "step": 71750
    },
    {
      "epoch": 0.25150090948280396,
      "grad_norm": 3.015625,
      "learning_rate": 4.191579340880151e-05,
      "loss": 0.9485,
      "step": 71760
    },
    {
      "epoch": 0.25153595698969955,
      "grad_norm": 3.15625,
      "learning_rate": 4.192163459853503e-05,
      "loss": 0.9498,
      "step": 71770
    },
    {
      "epoch": 0.25157100449659514,
      "grad_norm": 3.046875,
      "learning_rate": 4.192747578826855e-05,
      "loss": 0.9854,
      "step": 71780
    },
    {
      "epoch": 0.25160605200349073,
      "grad_norm": 4.4375,
      "learning_rate": 4.193331697800208e-05,
      "loss": 1.0136,
      "step": 71790
    },
    {
      "epoch": 0.2516410995103863,
      "grad_norm": 3.171875,
      "learning_rate": 4.19391581677356e-05,
      "loss": 1.0291,
      "step": 71800
    },
    {
      "epoch": 0.2516761470172819,
      "grad_norm": 3.328125,
      "learning_rate": 4.194499935746914e-05,
      "loss": 0.9697,
      "step": 71810
    },
    {
      "epoch": 0.2517111945241775,
      "grad_norm": 2.890625,
      "learning_rate": 4.195084054720266e-05,
      "loss": 1.074,
      "step": 71820
    },
    {
      "epoch": 0.2517462420310731,
      "grad_norm": 2.828125,
      "learning_rate": 4.195668173693618e-05,
      "loss": 0.9359,
      "step": 71830
    },
    {
      "epoch": 0.25178128953796874,
      "grad_norm": 3.15625,
      "learning_rate": 4.196252292666971e-05,
      "loss": 0.989,
      "step": 71840
    },
    {
      "epoch": 0.25181633704486434,
      "grad_norm": 3.203125,
      "learning_rate": 4.196836411640323e-05,
      "loss": 0.9642,
      "step": 71850
    },
    {
      "epoch": 0.2518513845517599,
      "grad_norm": 3.40625,
      "learning_rate": 4.197420530613676e-05,
      "loss": 0.9945,
      "step": 71860
    },
    {
      "epoch": 0.2518864320586555,
      "grad_norm": 3.109375,
      "learning_rate": 4.198004649587028e-05,
      "loss": 0.9454,
      "step": 71870
    },
    {
      "epoch": 0.2519214795655511,
      "grad_norm": 3.515625,
      "learning_rate": 4.19858876856038e-05,
      "loss": 0.959,
      "step": 71880
    },
    {
      "epoch": 0.2519565270724467,
      "grad_norm": 3.296875,
      "learning_rate": 4.1991728875337336e-05,
      "loss": 1.0433,
      "step": 71890
    },
    {
      "epoch": 0.2519915745793423,
      "grad_norm": 3.34375,
      "learning_rate": 4.199757006507086e-05,
      "loss": 0.9971,
      "step": 71900
    },
    {
      "epoch": 0.2520266220862379,
      "grad_norm": 3.15625,
      "learning_rate": 4.200341125480438e-05,
      "loss": 1.018,
      "step": 71910
    },
    {
      "epoch": 0.2520616695931335,
      "grad_norm": 2.96875,
      "learning_rate": 4.200925244453791e-05,
      "loss": 0.924,
      "step": 71920
    },
    {
      "epoch": 0.25209671710002907,
      "grad_norm": 3.65625,
      "learning_rate": 4.201509363427143e-05,
      "loss": 0.9981,
      "step": 71930
    },
    {
      "epoch": 0.2521317646069247,
      "grad_norm": 3.0,
      "learning_rate": 4.2020934824004956e-05,
      "loss": 0.9897,
      "step": 71940
    },
    {
      "epoch": 0.2521668121138203,
      "grad_norm": 3.34375,
      "learning_rate": 4.202677601373848e-05,
      "loss": 1.0044,
      "step": 71950
    },
    {
      "epoch": 0.2522018596207159,
      "grad_norm": 3.265625,
      "learning_rate": 4.2032617203472006e-05,
      "loss": 1.0218,
      "step": 71960
    },
    {
      "epoch": 0.2522369071276115,
      "grad_norm": 3.203125,
      "learning_rate": 4.2038458393205534e-05,
      "loss": 0.9867,
      "step": 71970
    },
    {
      "epoch": 0.2522719546345071,
      "grad_norm": 3.359375,
      "learning_rate": 4.2044299582939056e-05,
      "loss": 0.9602,
      "step": 71980
    },
    {
      "epoch": 0.25230700214140267,
      "grad_norm": 3.296875,
      "learning_rate": 4.205014077267258e-05,
      "loss": 0.9802,
      "step": 71990
    },
    {
      "epoch": 0.25234204964829826,
      "grad_norm": 3.421875,
      "learning_rate": 4.2055981962406105e-05,
      "loss": 0.9181,
      "step": 72000
    },
    {
      "epoch": 0.25237709715519385,
      "grad_norm": 3.03125,
      "learning_rate": 4.206182315213963e-05,
      "loss": 0.9693,
      "step": 72010
    },
    {
      "epoch": 0.25241214466208944,
      "grad_norm": 3.484375,
      "learning_rate": 4.2067664341873155e-05,
      "loss": 0.9995,
      "step": 72020
    },
    {
      "epoch": 0.25244719216898504,
      "grad_norm": 3.375,
      "learning_rate": 4.207350553160668e-05,
      "loss": 1.0539,
      "step": 72030
    },
    {
      "epoch": 0.2524822396758807,
      "grad_norm": 3.40625,
      "learning_rate": 4.2079346721340205e-05,
      "loss": 1.0471,
      "step": 72040
    },
    {
      "epoch": 0.2525172871827763,
      "grad_norm": 3.40625,
      "learning_rate": 4.208518791107373e-05,
      "loss": 0.9208,
      "step": 72050
    },
    {
      "epoch": 0.25255233468967186,
      "grad_norm": 3.1875,
      "learning_rate": 4.2091029100807255e-05,
      "loss": 0.9748,
      "step": 72060
    },
    {
      "epoch": 0.25258738219656746,
      "grad_norm": 3.453125,
      "learning_rate": 4.209687029054078e-05,
      "loss": 0.9352,
      "step": 72070
    },
    {
      "epoch": 0.25262242970346305,
      "grad_norm": 3.609375,
      "learning_rate": 4.2102711480274304e-05,
      "loss": 1.0367,
      "step": 72080
    },
    {
      "epoch": 0.25265747721035864,
      "grad_norm": 3.09375,
      "learning_rate": 4.2108552670007826e-05,
      "loss": 0.9628,
      "step": 72090
    },
    {
      "epoch": 0.25269252471725423,
      "grad_norm": 3.578125,
      "learning_rate": 4.2114393859741354e-05,
      "loss": 0.893,
      "step": 72100
    },
    {
      "epoch": 0.2527275722241498,
      "grad_norm": 3.484375,
      "learning_rate": 4.2120235049474875e-05,
      "loss": 0.9968,
      "step": 72110
    },
    {
      "epoch": 0.2527626197310454,
      "grad_norm": 3.125,
      "learning_rate": 4.2126076239208404e-05,
      "loss": 1.0615,
      "step": 72120
    },
    {
      "epoch": 0.252797667237941,
      "grad_norm": 3.671875,
      "learning_rate": 4.213191742894193e-05,
      "loss": 1.0305,
      "step": 72130
    },
    {
      "epoch": 0.25283271474483665,
      "grad_norm": 3.296875,
      "learning_rate": 4.213775861867545e-05,
      "loss": 0.9332,
      "step": 72140
    },
    {
      "epoch": 0.25286776225173224,
      "grad_norm": 3.8125,
      "learning_rate": 4.214359980840898e-05,
      "loss": 0.9977,
      "step": 72150
    },
    {
      "epoch": 0.25290280975862783,
      "grad_norm": 3.34375,
      "learning_rate": 4.21494409981425e-05,
      "loss": 1.0248,
      "step": 72160
    },
    {
      "epoch": 0.2529378572655234,
      "grad_norm": 2.90625,
      "learning_rate": 4.2155282187876024e-05,
      "loss": 0.9877,
      "step": 72170
    },
    {
      "epoch": 0.252972904772419,
      "grad_norm": 3.4375,
      "learning_rate": 4.216112337760955e-05,
      "loss": 0.9108,
      "step": 72180
    },
    {
      "epoch": 0.2530079522793146,
      "grad_norm": 3.234375,
      "learning_rate": 4.2166964567343074e-05,
      "loss": 0.9271,
      "step": 72190
    },
    {
      "epoch": 0.2530429997862102,
      "grad_norm": 2.875,
      "learning_rate": 4.21728057570766e-05,
      "loss": 0.9678,
      "step": 72200
    },
    {
      "epoch": 0.2530780472931058,
      "grad_norm": 3.15625,
      "learning_rate": 4.217864694681013e-05,
      "loss": 0.9202,
      "step": 72210
    },
    {
      "epoch": 0.2531130948000014,
      "grad_norm": 3.34375,
      "learning_rate": 4.218448813654365e-05,
      "loss": 0.9453,
      "step": 72220
    },
    {
      "epoch": 0.253148142306897,
      "grad_norm": 2.96875,
      "learning_rate": 4.219032932627718e-05,
      "loss": 0.9422,
      "step": 72230
    },
    {
      "epoch": 0.2531831898137926,
      "grad_norm": 3.546875,
      "learning_rate": 4.21961705160107e-05,
      "loss": 0.8826,
      "step": 72240
    },
    {
      "epoch": 0.2532182373206882,
      "grad_norm": 2.96875,
      "learning_rate": 4.220201170574422e-05,
      "loss": 0.9872,
      "step": 72250
    },
    {
      "epoch": 0.2532532848275838,
      "grad_norm": 2.859375,
      "learning_rate": 4.220785289547775e-05,
      "loss": 0.9251,
      "step": 72260
    },
    {
      "epoch": 0.2532883323344794,
      "grad_norm": 3.03125,
      "learning_rate": 4.221369408521128e-05,
      "loss": 0.9854,
      "step": 72270
    },
    {
      "epoch": 0.253323379841375,
      "grad_norm": 3.34375,
      "learning_rate": 4.221953527494481e-05,
      "loss": 1.0291,
      "step": 72280
    },
    {
      "epoch": 0.2533584273482706,
      "grad_norm": 3.140625,
      "learning_rate": 4.222537646467833e-05,
      "loss": 0.9864,
      "step": 72290
    },
    {
      "epoch": 0.25339347485516617,
      "grad_norm": 4.28125,
      "learning_rate": 4.223121765441185e-05,
      "loss": 0.9689,
      "step": 72300
    },
    {
      "epoch": 0.25342852236206176,
      "grad_norm": 2.59375,
      "learning_rate": 4.223705884414538e-05,
      "loss": 0.9648,
      "step": 72310
    },
    {
      "epoch": 0.25346356986895735,
      "grad_norm": 3.421875,
      "learning_rate": 4.22429000338789e-05,
      "loss": 1.0091,
      "step": 72320
    },
    {
      "epoch": 0.25349861737585294,
      "grad_norm": 3.265625,
      "learning_rate": 4.224874122361243e-05,
      "loss": 0.9638,
      "step": 72330
    },
    {
      "epoch": 0.2535336648827486,
      "grad_norm": 3.3125,
      "learning_rate": 4.225458241334595e-05,
      "loss": 1.0345,
      "step": 72340
    },
    {
      "epoch": 0.2535687123896442,
      "grad_norm": 3.03125,
      "learning_rate": 4.226042360307948e-05,
      "loss": 0.9889,
      "step": 72350
    },
    {
      "epoch": 0.25360375989653977,
      "grad_norm": 3.359375,
      "learning_rate": 4.226626479281301e-05,
      "loss": 1.0111,
      "step": 72360
    },
    {
      "epoch": 0.25363880740343536,
      "grad_norm": 2.875,
      "learning_rate": 4.227210598254653e-05,
      "loss": 0.9175,
      "step": 72370
    },
    {
      "epoch": 0.25367385491033095,
      "grad_norm": 2.953125,
      "learning_rate": 4.227794717228005e-05,
      "loss": 1.0084,
      "step": 72380
    },
    {
      "epoch": 0.25370890241722655,
      "grad_norm": 3.09375,
      "learning_rate": 4.228378836201358e-05,
      "loss": 1.0247,
      "step": 72390
    },
    {
      "epoch": 0.25374394992412214,
      "grad_norm": 3.5,
      "learning_rate": 4.22896295517471e-05,
      "loss": 1.0582,
      "step": 72400
    },
    {
      "epoch": 0.2537789974310177,
      "grad_norm": 3.3125,
      "learning_rate": 4.229547074148063e-05,
      "loss": 1.0313,
      "step": 72410
    },
    {
      "epoch": 0.2538140449379133,
      "grad_norm": 3.046875,
      "learning_rate": 4.230131193121415e-05,
      "loss": 0.9774,
      "step": 72420
    },
    {
      "epoch": 0.25384909244480897,
      "grad_norm": 2.953125,
      "learning_rate": 4.230715312094768e-05,
      "loss": 1.057,
      "step": 72430
    },
    {
      "epoch": 0.25388413995170456,
      "grad_norm": 3.40625,
      "learning_rate": 4.2312994310681205e-05,
      "loss": 0.9876,
      "step": 72440
    },
    {
      "epoch": 0.25391918745860015,
      "grad_norm": 3.1875,
      "learning_rate": 4.231883550041473e-05,
      "loss": 0.8957,
      "step": 72450
    },
    {
      "epoch": 0.25395423496549574,
      "grad_norm": 3.421875,
      "learning_rate": 4.232467669014825e-05,
      "loss": 1.0558,
      "step": 72460
    },
    {
      "epoch": 0.25398928247239133,
      "grad_norm": 3.421875,
      "learning_rate": 4.2330517879881777e-05,
      "loss": 1.0175,
      "step": 72470
    },
    {
      "epoch": 0.2540243299792869,
      "grad_norm": 3.5625,
      "learning_rate": 4.23363590696153e-05,
      "loss": 0.9951,
      "step": 72480
    },
    {
      "epoch": 0.2540593774861825,
      "grad_norm": 3.0625,
      "learning_rate": 4.2342200259348826e-05,
      "loss": 0.898,
      "step": 72490
    },
    {
      "epoch": 0.2540944249930781,
      "grad_norm": 3.34375,
      "learning_rate": 4.234804144908235e-05,
      "loss": 1.0306,
      "step": 72500
    },
    {
      "epoch": 0.2541294724999737,
      "grad_norm": 3.171875,
      "learning_rate": 4.2353882638815876e-05,
      "loss": 0.9421,
      "step": 72510
    },
    {
      "epoch": 0.2541645200068693,
      "grad_norm": 3.34375,
      "learning_rate": 4.2359723828549404e-05,
      "loss": 1.003,
      "step": 72520
    },
    {
      "epoch": 0.25419956751376493,
      "grad_norm": 3.21875,
      "learning_rate": 4.2365565018282926e-05,
      "loss": 1.02,
      "step": 72530
    },
    {
      "epoch": 0.2542346150206605,
      "grad_norm": 3.265625,
      "learning_rate": 4.2371406208016454e-05,
      "loss": 1.0378,
      "step": 72540
    },
    {
      "epoch": 0.2542696625275561,
      "grad_norm": 3.453125,
      "learning_rate": 4.2377247397749975e-05,
      "loss": 1.0066,
      "step": 72550
    },
    {
      "epoch": 0.2543047100344517,
      "grad_norm": 2.921875,
      "learning_rate": 4.23830885874835e-05,
      "loss": 0.9855,
      "step": 72560
    },
    {
      "epoch": 0.2543397575413473,
      "grad_norm": 2.890625,
      "learning_rate": 4.2388929777217025e-05,
      "loss": 0.936,
      "step": 72570
    },
    {
      "epoch": 0.2543748050482429,
      "grad_norm": 2.734375,
      "learning_rate": 4.239477096695055e-05,
      "loss": 0.9468,
      "step": 72580
    },
    {
      "epoch": 0.2544098525551385,
      "grad_norm": 2.984375,
      "learning_rate": 4.2400612156684075e-05,
      "loss": 0.9374,
      "step": 72590
    },
    {
      "epoch": 0.2544449000620341,
      "grad_norm": 3.40625,
      "learning_rate": 4.24064533464176e-05,
      "loss": 0.9863,
      "step": 72600
    },
    {
      "epoch": 0.25447994756892967,
      "grad_norm": 3.375,
      "learning_rate": 4.2412294536151124e-05,
      "loss": 1.0914,
      "step": 72610
    },
    {
      "epoch": 0.25451499507582526,
      "grad_norm": 2.765625,
      "learning_rate": 4.241813572588465e-05,
      "loss": 0.9458,
      "step": 72620
    },
    {
      "epoch": 0.2545500425827209,
      "grad_norm": 3.40625,
      "learning_rate": 4.2423976915618174e-05,
      "loss": 0.9704,
      "step": 72630
    },
    {
      "epoch": 0.2545850900896165,
      "grad_norm": 2.984375,
      "learning_rate": 4.2429818105351696e-05,
      "loss": 0.9373,
      "step": 72640
    },
    {
      "epoch": 0.2546201375965121,
      "grad_norm": 3.234375,
      "learning_rate": 4.2435659295085224e-05,
      "loss": 0.8778,
      "step": 72650
    },
    {
      "epoch": 0.2546551851034077,
      "grad_norm": 3.21875,
      "learning_rate": 4.244150048481875e-05,
      "loss": 0.9281,
      "step": 72660
    },
    {
      "epoch": 0.25469023261030327,
      "grad_norm": 2.75,
      "learning_rate": 4.2447341674552273e-05,
      "loss": 1.0391,
      "step": 72670
    },
    {
      "epoch": 0.25472528011719886,
      "grad_norm": 3.5,
      "learning_rate": 4.24531828642858e-05,
      "loss": 0.9742,
      "step": 72680
    },
    {
      "epoch": 0.25476032762409445,
      "grad_norm": 3.34375,
      "learning_rate": 4.245902405401932e-05,
      "loss": 0.9565,
      "step": 72690
    },
    {
      "epoch": 0.25479537513099004,
      "grad_norm": 3.3125,
      "learning_rate": 4.246486524375285e-05,
      "loss": 1.0807,
      "step": 72700
    },
    {
      "epoch": 0.25483042263788563,
      "grad_norm": 2.890625,
      "learning_rate": 4.247070643348637e-05,
      "loss": 0.97,
      "step": 72710
    },
    {
      "epoch": 0.2548654701447812,
      "grad_norm": 2.953125,
      "learning_rate": 4.24765476232199e-05,
      "loss": 0.9849,
      "step": 72720
    },
    {
      "epoch": 0.2549005176516769,
      "grad_norm": 3.609375,
      "learning_rate": 4.248238881295342e-05,
      "loss": 0.9449,
      "step": 72730
    },
    {
      "epoch": 0.25493556515857246,
      "grad_norm": 3.40625,
      "learning_rate": 4.248823000268695e-05,
      "loss": 0.9852,
      "step": 72740
    },
    {
      "epoch": 0.25497061266546805,
      "grad_norm": 3.265625,
      "learning_rate": 4.249407119242048e-05,
      "loss": 1.0428,
      "step": 72750
    },
    {
      "epoch": 0.25500566017236365,
      "grad_norm": 3.53125,
      "learning_rate": 4.2499912382154e-05,
      "loss": 0.971,
      "step": 72760
    },
    {
      "epoch": 0.25504070767925924,
      "grad_norm": 3.125,
      "learning_rate": 4.250575357188752e-05,
      "loss": 0.9544,
      "step": 72770
    },
    {
      "epoch": 0.25507575518615483,
      "grad_norm": 3.359375,
      "learning_rate": 4.251159476162105e-05,
      "loss": 0.9302,
      "step": 72780
    },
    {
      "epoch": 0.2551108026930504,
      "grad_norm": 2.984375,
      "learning_rate": 4.251743595135457e-05,
      "loss": 0.9575,
      "step": 72790
    },
    {
      "epoch": 0.255145850199946,
      "grad_norm": 5.34375,
      "learning_rate": 4.25232771410881e-05,
      "loss": 0.9576,
      "step": 72800
    },
    {
      "epoch": 0.2551808977068416,
      "grad_norm": 3.046875,
      "learning_rate": 4.252911833082162e-05,
      "loss": 0.9875,
      "step": 72810
    },
    {
      "epoch": 0.2552159452137372,
      "grad_norm": 3.375,
      "learning_rate": 4.253495952055515e-05,
      "loss": 0.987,
      "step": 72820
    },
    {
      "epoch": 0.25525099272063284,
      "grad_norm": 3.140625,
      "learning_rate": 4.254080071028868e-05,
      "loss": 0.9448,
      "step": 72830
    },
    {
      "epoch": 0.25528604022752843,
      "grad_norm": 3.296875,
      "learning_rate": 4.25466419000222e-05,
      "loss": 0.9945,
      "step": 72840
    },
    {
      "epoch": 0.255321087734424,
      "grad_norm": 2.859375,
      "learning_rate": 4.255248308975572e-05,
      "loss": 0.9433,
      "step": 72850
    },
    {
      "epoch": 0.2553561352413196,
      "grad_norm": 3.046875,
      "learning_rate": 4.255832427948925e-05,
      "loss": 1.0244,
      "step": 72860
    },
    {
      "epoch": 0.2553911827482152,
      "grad_norm": 3.203125,
      "learning_rate": 4.256416546922277e-05,
      "loss": 0.9231,
      "step": 72870
    },
    {
      "epoch": 0.2554262302551108,
      "grad_norm": 3.15625,
      "learning_rate": 4.25700066589563e-05,
      "loss": 0.9846,
      "step": 72880
    },
    {
      "epoch": 0.2554612777620064,
      "grad_norm": 3.234375,
      "learning_rate": 4.257584784868982e-05,
      "loss": 1.0371,
      "step": 72890
    },
    {
      "epoch": 0.255496325268902,
      "grad_norm": 2.6875,
      "learning_rate": 4.258168903842335e-05,
      "loss": 0.8371,
      "step": 72900
    },
    {
      "epoch": 0.25553137277579757,
      "grad_norm": 2.9375,
      "learning_rate": 4.2587530228156876e-05,
      "loss": 0.9214,
      "step": 72910
    },
    {
      "epoch": 0.25556642028269316,
      "grad_norm": 2.921875,
      "learning_rate": 4.25933714178904e-05,
      "loss": 0.9964,
      "step": 72920
    },
    {
      "epoch": 0.2556014677895888,
      "grad_norm": 3.125,
      "learning_rate": 4.2599212607623926e-05,
      "loss": 0.9967,
      "step": 72930
    },
    {
      "epoch": 0.2556365152964844,
      "grad_norm": 3.171875,
      "learning_rate": 4.260505379735745e-05,
      "loss": 0.9893,
      "step": 72940
    },
    {
      "epoch": 0.25567156280338,
      "grad_norm": 3.296875,
      "learning_rate": 4.261089498709097e-05,
      "loss": 0.9811,
      "step": 72950
    },
    {
      "epoch": 0.2557066103102756,
      "grad_norm": 3.453125,
      "learning_rate": 4.26167361768245e-05,
      "loss": 0.869,
      "step": 72960
    },
    {
      "epoch": 0.2557416578171712,
      "grad_norm": 3.328125,
      "learning_rate": 4.2622577366558026e-05,
      "loss": 1.0023,
      "step": 72970
    },
    {
      "epoch": 0.25577670532406677,
      "grad_norm": 3.140625,
      "learning_rate": 4.262841855629155e-05,
      "loss": 1.0752,
      "step": 72980
    },
    {
      "epoch": 0.25581175283096236,
      "grad_norm": 2.984375,
      "learning_rate": 4.2634259746025075e-05,
      "loss": 0.9208,
      "step": 72990
    },
    {
      "epoch": 0.25584680033785795,
      "grad_norm": 3.421875,
      "learning_rate": 4.26401009357586e-05,
      "loss": 0.9404,
      "step": 73000
    },
    {
      "epoch": 0.25588184784475354,
      "grad_norm": 3.359375,
      "learning_rate": 4.2645942125492125e-05,
      "loss": 1.037,
      "step": 73010
    },
    {
      "epoch": 0.2559168953516492,
      "grad_norm": 3.078125,
      "learning_rate": 4.2651783315225646e-05,
      "loss": 0.9215,
      "step": 73020
    },
    {
      "epoch": 0.2559519428585448,
      "grad_norm": 3.09375,
      "learning_rate": 4.265762450495917e-05,
      "loss": 0.9547,
      "step": 73030
    },
    {
      "epoch": 0.25598699036544037,
      "grad_norm": 3.0625,
      "learning_rate": 4.2663465694692696e-05,
      "loss": 1.0059,
      "step": 73040
    },
    {
      "epoch": 0.25602203787233596,
      "grad_norm": 3.046875,
      "learning_rate": 4.2669306884426224e-05,
      "loss": 1.0549,
      "step": 73050
    },
    {
      "epoch": 0.25605708537923155,
      "grad_norm": 3.046875,
      "learning_rate": 4.2675148074159746e-05,
      "loss": 0.9136,
      "step": 73060
    },
    {
      "epoch": 0.25609213288612714,
      "grad_norm": 3.296875,
      "learning_rate": 4.2680989263893274e-05,
      "loss": 0.9458,
      "step": 73070
    },
    {
      "epoch": 0.25612718039302274,
      "grad_norm": 2.859375,
      "learning_rate": 4.2686830453626795e-05,
      "loss": 0.9314,
      "step": 73080
    },
    {
      "epoch": 0.2561622278999183,
      "grad_norm": 3.1875,
      "learning_rate": 4.2692671643360324e-05,
      "loss": 1.019,
      "step": 73090
    },
    {
      "epoch": 0.2561972754068139,
      "grad_norm": 3.203125,
      "learning_rate": 4.2698512833093845e-05,
      "loss": 0.9473,
      "step": 73100
    },
    {
      "epoch": 0.2562323229137095,
      "grad_norm": 3.078125,
      "learning_rate": 4.2704354022827367e-05,
      "loss": 0.9822,
      "step": 73110
    },
    {
      "epoch": 0.25626737042060516,
      "grad_norm": 3.21875,
      "learning_rate": 4.2710195212560895e-05,
      "loss": 0.9744,
      "step": 73120
    },
    {
      "epoch": 0.25630241792750075,
      "grad_norm": 2.96875,
      "learning_rate": 4.271603640229442e-05,
      "loss": 0.891,
      "step": 73130
    },
    {
      "epoch": 0.25633746543439634,
      "grad_norm": 2.984375,
      "learning_rate": 4.272187759202795e-05,
      "loss": 1.0272,
      "step": 73140
    },
    {
      "epoch": 0.25637251294129193,
      "grad_norm": 3.328125,
      "learning_rate": 4.272771878176147e-05,
      "loss": 0.9769,
      "step": 73150
    },
    {
      "epoch": 0.2564075604481875,
      "grad_norm": 3.078125,
      "learning_rate": 4.2733559971494994e-05,
      "loss": 0.9876,
      "step": 73160
    },
    {
      "epoch": 0.2564426079550831,
      "grad_norm": 2.921875,
      "learning_rate": 4.273940116122852e-05,
      "loss": 0.9911,
      "step": 73170
    },
    {
      "epoch": 0.2564776554619787,
      "grad_norm": 2.53125,
      "learning_rate": 4.2745242350962044e-05,
      "loss": 1.0841,
      "step": 73180
    },
    {
      "epoch": 0.2565127029688743,
      "grad_norm": 3.1875,
      "learning_rate": 4.275108354069557e-05,
      "loss": 0.9226,
      "step": 73190
    },
    {
      "epoch": 0.2565477504757699,
      "grad_norm": 3.328125,
      "learning_rate": 4.2756924730429094e-05,
      "loss": 1.0169,
      "step": 73200
    },
    {
      "epoch": 0.2565827979826655,
      "grad_norm": 3.125,
      "learning_rate": 4.276276592016262e-05,
      "loss": 0.9835,
      "step": 73210
    },
    {
      "epoch": 0.2566178454895611,
      "grad_norm": 3.625,
      "learning_rate": 4.276860710989615e-05,
      "loss": 1.0185,
      "step": 73220
    },
    {
      "epoch": 0.2566528929964567,
      "grad_norm": 3.3125,
      "learning_rate": 4.277444829962967e-05,
      "loss": 1.028,
      "step": 73230
    },
    {
      "epoch": 0.2566879405033523,
      "grad_norm": 3.078125,
      "learning_rate": 4.278028948936319e-05,
      "loss": 0.94,
      "step": 73240
    },
    {
      "epoch": 0.2567229880102479,
      "grad_norm": 3.203125,
      "learning_rate": 4.278613067909672e-05,
      "loss": 0.9324,
      "step": 73250
    },
    {
      "epoch": 0.2567580355171435,
      "grad_norm": 3.109375,
      "learning_rate": 4.279197186883024e-05,
      "loss": 0.9867,
      "step": 73260
    },
    {
      "epoch": 0.2567930830240391,
      "grad_norm": 3.265625,
      "learning_rate": 4.279781305856377e-05,
      "loss": 0.9353,
      "step": 73270
    },
    {
      "epoch": 0.2568281305309347,
      "grad_norm": 3.296875,
      "learning_rate": 4.28036542482973e-05,
      "loss": 1.0091,
      "step": 73280
    },
    {
      "epoch": 0.25686317803783026,
      "grad_norm": 3.21875,
      "learning_rate": 4.280949543803082e-05,
      "loss": 0.9193,
      "step": 73290
    },
    {
      "epoch": 0.25689822554472586,
      "grad_norm": 2.90625,
      "learning_rate": 4.281533662776435e-05,
      "loss": 1.0691,
      "step": 73300
    },
    {
      "epoch": 0.25693327305162145,
      "grad_norm": 3.375,
      "learning_rate": 4.282117781749787e-05,
      "loss": 0.9831,
      "step": 73310
    },
    {
      "epoch": 0.2569683205585171,
      "grad_norm": 2.734375,
      "learning_rate": 4.282701900723139e-05,
      "loss": 0.9247,
      "step": 73320
    },
    {
      "epoch": 0.2570033680654127,
      "grad_norm": 3.03125,
      "learning_rate": 4.283286019696492e-05,
      "loss": 0.8919,
      "step": 73330
    },
    {
      "epoch": 0.2570384155723083,
      "grad_norm": 2.890625,
      "learning_rate": 4.283870138669844e-05,
      "loss": 0.9469,
      "step": 73340
    },
    {
      "epoch": 0.25707346307920387,
      "grad_norm": 2.703125,
      "learning_rate": 4.284454257643197e-05,
      "loss": 0.8331,
      "step": 73350
    },
    {
      "epoch": 0.25710851058609946,
      "grad_norm": 2.859375,
      "learning_rate": 4.28503837661655e-05,
      "loss": 1.0826,
      "step": 73360
    },
    {
      "epoch": 0.25714355809299505,
      "grad_norm": 3.09375,
      "learning_rate": 4.285622495589902e-05,
      "loss": 0.9169,
      "step": 73370
    },
    {
      "epoch": 0.25717860559989064,
      "grad_norm": 3.21875,
      "learning_rate": 4.286206614563255e-05,
      "loss": 1.0423,
      "step": 73380
    },
    {
      "epoch": 0.25721365310678623,
      "grad_norm": 3.375,
      "learning_rate": 4.286790733536607e-05,
      "loss": 1.0103,
      "step": 73390
    },
    {
      "epoch": 0.2572487006136818,
      "grad_norm": 3.09375,
      "learning_rate": 4.28737485250996e-05,
      "loss": 0.9849,
      "step": 73400
    },
    {
      "epoch": 0.2572837481205774,
      "grad_norm": 3.40625,
      "learning_rate": 4.287958971483312e-05,
      "loss": 1.0088,
      "step": 73410
    },
    {
      "epoch": 0.25731879562747306,
      "grad_norm": 3.1875,
      "learning_rate": 4.288543090456664e-05,
      "loss": 0.9584,
      "step": 73420
    },
    {
      "epoch": 0.25735384313436865,
      "grad_norm": 3.03125,
      "learning_rate": 4.289127209430017e-05,
      "loss": 1.0775,
      "step": 73430
    },
    {
      "epoch": 0.25738889064126425,
      "grad_norm": 3.125,
      "learning_rate": 4.28971132840337e-05,
      "loss": 0.9854,
      "step": 73440
    },
    {
      "epoch": 0.25742393814815984,
      "grad_norm": 2.78125,
      "learning_rate": 4.290295447376722e-05,
      "loss": 0.9341,
      "step": 73450
    },
    {
      "epoch": 0.25745898565505543,
      "grad_norm": 2.96875,
      "learning_rate": 4.2908795663500746e-05,
      "loss": 1.0107,
      "step": 73460
    },
    {
      "epoch": 0.257494033161951,
      "grad_norm": 3.25,
      "learning_rate": 4.291463685323427e-05,
      "loss": 1.0322,
      "step": 73470
    },
    {
      "epoch": 0.2575290806688466,
      "grad_norm": 3.015625,
      "learning_rate": 4.2920478042967796e-05,
      "loss": 0.9589,
      "step": 73480
    },
    {
      "epoch": 0.2575641281757422,
      "grad_norm": 3.390625,
      "learning_rate": 4.292631923270132e-05,
      "loss": 0.9434,
      "step": 73490
    },
    {
      "epoch": 0.2575991756826378,
      "grad_norm": 3.140625,
      "learning_rate": 4.293216042243484e-05,
      "loss": 0.9977,
      "step": 73500
    },
    {
      "epoch": 0.25763422318953344,
      "grad_norm": 3.09375,
      "learning_rate": 4.293800161216837e-05,
      "loss": 0.8964,
      "step": 73510
    },
    {
      "epoch": 0.25766927069642903,
      "grad_norm": 3.328125,
      "learning_rate": 4.2943842801901895e-05,
      "loss": 1.0456,
      "step": 73520
    },
    {
      "epoch": 0.2577043182033246,
      "grad_norm": 3.125,
      "learning_rate": 4.294968399163542e-05,
      "loss": 0.9398,
      "step": 73530
    },
    {
      "epoch": 0.2577393657102202,
      "grad_norm": 3.15625,
      "learning_rate": 4.2955525181368945e-05,
      "loss": 1.0811,
      "step": 73540
    },
    {
      "epoch": 0.2577744132171158,
      "grad_norm": 2.75,
      "learning_rate": 4.2961366371102467e-05,
      "loss": 0.9526,
      "step": 73550
    },
    {
      "epoch": 0.2578094607240114,
      "grad_norm": 3.28125,
      "learning_rate": 4.2967207560835995e-05,
      "loss": 0.9891,
      "step": 73560
    },
    {
      "epoch": 0.257844508230907,
      "grad_norm": 3.046875,
      "learning_rate": 4.2973048750569516e-05,
      "loss": 0.9298,
      "step": 73570
    },
    {
      "epoch": 0.2578795557378026,
      "grad_norm": 2.765625,
      "learning_rate": 4.297888994030304e-05,
      "loss": 0.9455,
      "step": 73580
    },
    {
      "epoch": 0.25791460324469817,
      "grad_norm": 3.234375,
      "learning_rate": 4.2984731130036566e-05,
      "loss": 0.9074,
      "step": 73590
    },
    {
      "epoch": 0.25794965075159376,
      "grad_norm": 3.40625,
      "learning_rate": 4.2990572319770094e-05,
      "loss": 1.043,
      "step": 73600
    },
    {
      "epoch": 0.2579846982584894,
      "grad_norm": 3.03125,
      "learning_rate": 4.299641350950362e-05,
      "loss": 1.0478,
      "step": 73610
    },
    {
      "epoch": 0.258019745765385,
      "grad_norm": 3.0625,
      "learning_rate": 4.3002254699237144e-05,
      "loss": 0.9587,
      "step": 73620
    },
    {
      "epoch": 0.2580547932722806,
      "grad_norm": 3.421875,
      "learning_rate": 4.3008095888970665e-05,
      "loss": 1.076,
      "step": 73630
    },
    {
      "epoch": 0.2580898407791762,
      "grad_norm": 3.40625,
      "learning_rate": 4.3013937078704194e-05,
      "loss": 0.9661,
      "step": 73640
    },
    {
      "epoch": 0.2581248882860718,
      "grad_norm": 3.3125,
      "learning_rate": 4.3019778268437715e-05,
      "loss": 0.9574,
      "step": 73650
    },
    {
      "epoch": 0.25815993579296737,
      "grad_norm": 3.578125,
      "learning_rate": 4.302561945817124e-05,
      "loss": 1.0273,
      "step": 73660
    },
    {
      "epoch": 0.25819498329986296,
      "grad_norm": 3.15625,
      "learning_rate": 4.303146064790477e-05,
      "loss": 0.9746,
      "step": 73670
    },
    {
      "epoch": 0.25823003080675855,
      "grad_norm": 3.0,
      "learning_rate": 4.303730183763829e-05,
      "loss": 0.9596,
      "step": 73680
    },
    {
      "epoch": 0.25826507831365414,
      "grad_norm": 3.015625,
      "learning_rate": 4.304314302737182e-05,
      "loss": 1.0146,
      "step": 73690
    },
    {
      "epoch": 0.25830012582054973,
      "grad_norm": 3.125,
      "learning_rate": 4.304898421710534e-05,
      "loss": 1.0125,
      "step": 73700
    },
    {
      "epoch": 0.2583351733274454,
      "grad_norm": 3.109375,
      "learning_rate": 4.3054825406838864e-05,
      "loss": 1.0444,
      "step": 73710
    },
    {
      "epoch": 0.25837022083434097,
      "grad_norm": 3.53125,
      "learning_rate": 4.306066659657239e-05,
      "loss": 0.9769,
      "step": 73720
    },
    {
      "epoch": 0.25840526834123656,
      "grad_norm": 3.1875,
      "learning_rate": 4.3066507786305914e-05,
      "loss": 0.9584,
      "step": 73730
    },
    {
      "epoch": 0.25844031584813215,
      "grad_norm": 3.4375,
      "learning_rate": 4.307234897603944e-05,
      "loss": 1.0026,
      "step": 73740
    },
    {
      "epoch": 0.25847536335502774,
      "grad_norm": 2.921875,
      "learning_rate": 4.307819016577297e-05,
      "loss": 0.9088,
      "step": 73750
    },
    {
      "epoch": 0.25851041086192333,
      "grad_norm": 3.609375,
      "learning_rate": 4.308403135550649e-05,
      "loss": 0.9886,
      "step": 73760
    },
    {
      "epoch": 0.2585454583688189,
      "grad_norm": 3.203125,
      "learning_rate": 4.308987254524002e-05,
      "loss": 1.0219,
      "step": 73770
    },
    {
      "epoch": 0.2585805058757145,
      "grad_norm": 3.25,
      "learning_rate": 4.309571373497354e-05,
      "loss": 0.9626,
      "step": 73780
    },
    {
      "epoch": 0.2586155533826101,
      "grad_norm": 2.796875,
      "learning_rate": 4.310155492470706e-05,
      "loss": 1.0062,
      "step": 73790
    },
    {
      "epoch": 0.2586506008895057,
      "grad_norm": 2.875,
      "learning_rate": 4.310739611444059e-05,
      "loss": 1.0025,
      "step": 73800
    },
    {
      "epoch": 0.25868564839640135,
      "grad_norm": 2.9375,
      "learning_rate": 4.311323730417411e-05,
      "loss": 1.0025,
      "step": 73810
    },
    {
      "epoch": 0.25872069590329694,
      "grad_norm": 3.203125,
      "learning_rate": 4.311907849390764e-05,
      "loss": 1.0344,
      "step": 73820
    },
    {
      "epoch": 0.25875574341019253,
      "grad_norm": 2.84375,
      "learning_rate": 4.312491968364117e-05,
      "loss": 0.9862,
      "step": 73830
    },
    {
      "epoch": 0.2587907909170881,
      "grad_norm": 2.8125,
      "learning_rate": 4.313076087337469e-05,
      "loss": 0.9604,
      "step": 73840
    },
    {
      "epoch": 0.2588258384239837,
      "grad_norm": 3.203125,
      "learning_rate": 4.313660206310822e-05,
      "loss": 0.9999,
      "step": 73850
    },
    {
      "epoch": 0.2588608859308793,
      "grad_norm": 3.109375,
      "learning_rate": 4.314244325284174e-05,
      "loss": 0.9098,
      "step": 73860
    },
    {
      "epoch": 0.2588959334377749,
      "grad_norm": 3.015625,
      "learning_rate": 4.314828444257527e-05,
      "loss": 1.0602,
      "step": 73870
    },
    {
      "epoch": 0.2589309809446705,
      "grad_norm": 2.828125,
      "learning_rate": 4.315412563230879e-05,
      "loss": 1.0034,
      "step": 73880
    },
    {
      "epoch": 0.2589660284515661,
      "grad_norm": 3.484375,
      "learning_rate": 4.315996682204231e-05,
      "loss": 1.0226,
      "step": 73890
    },
    {
      "epoch": 0.25900107595846167,
      "grad_norm": 3.390625,
      "learning_rate": 4.316580801177584e-05,
      "loss": 1.025,
      "step": 73900
    },
    {
      "epoch": 0.2590361234653573,
      "grad_norm": 3.15625,
      "learning_rate": 4.317164920150937e-05,
      "loss": 0.9422,
      "step": 73910
    },
    {
      "epoch": 0.2590711709722529,
      "grad_norm": 3.328125,
      "learning_rate": 4.317749039124289e-05,
      "loss": 0.8963,
      "step": 73920
    },
    {
      "epoch": 0.2591062184791485,
      "grad_norm": 2.71875,
      "learning_rate": 4.318333158097642e-05,
      "loss": 0.9444,
      "step": 73930
    },
    {
      "epoch": 0.2591412659860441,
      "grad_norm": 3.1875,
      "learning_rate": 4.318917277070994e-05,
      "loss": 0.9642,
      "step": 73940
    },
    {
      "epoch": 0.2591763134929397,
      "grad_norm": 3.15625,
      "learning_rate": 4.319501396044347e-05,
      "loss": 1.0214,
      "step": 73950
    },
    {
      "epoch": 0.25921136099983527,
      "grad_norm": 3.359375,
      "learning_rate": 4.320085515017699e-05,
      "loss": 0.9905,
      "step": 73960
    },
    {
      "epoch": 0.25924640850673086,
      "grad_norm": 2.671875,
      "learning_rate": 4.320669633991051e-05,
      "loss": 0.876,
      "step": 73970
    },
    {
      "epoch": 0.25928145601362645,
      "grad_norm": 3.359375,
      "learning_rate": 4.3212537529644045e-05,
      "loss": 0.9934,
      "step": 73980
    },
    {
      "epoch": 0.25931650352052205,
      "grad_norm": 2.5,
      "learning_rate": 4.3218378719377566e-05,
      "loss": 0.8726,
      "step": 73990
    },
    {
      "epoch": 0.25935155102741764,
      "grad_norm": 4.53125,
      "learning_rate": 4.322421990911109e-05,
      "loss": 1.0118,
      "step": 74000
    },
    {
      "epoch": 0.2593865985343133,
      "grad_norm": 3.25,
      "learning_rate": 4.3230061098844616e-05,
      "loss": 0.9333,
      "step": 74010
    },
    {
      "epoch": 0.2594216460412089,
      "grad_norm": 3.359375,
      "learning_rate": 4.323590228857814e-05,
      "loss": 0.8872,
      "step": 74020
    },
    {
      "epoch": 0.25945669354810447,
      "grad_norm": 3.421875,
      "learning_rate": 4.3241743478311666e-05,
      "loss": 0.9578,
      "step": 74030
    },
    {
      "epoch": 0.25949174105500006,
      "grad_norm": 3.796875,
      "learning_rate": 4.324758466804519e-05,
      "loss": 1.0219,
      "step": 74040
    },
    {
      "epoch": 0.25952678856189565,
      "grad_norm": 3.328125,
      "learning_rate": 4.325342585777871e-05,
      "loss": 1.0037,
      "step": 74050
    },
    {
      "epoch": 0.25956183606879124,
      "grad_norm": 3.296875,
      "learning_rate": 4.3259267047512244e-05,
      "loss": 0.9686,
      "step": 74060
    },
    {
      "epoch": 0.25959688357568683,
      "grad_norm": 3.015625,
      "learning_rate": 4.3265108237245765e-05,
      "loss": 1.0618,
      "step": 74070
    },
    {
      "epoch": 0.2596319310825824,
      "grad_norm": 3.40625,
      "learning_rate": 4.3270949426979293e-05,
      "loss": 1.0871,
      "step": 74080
    },
    {
      "epoch": 0.259666978589478,
      "grad_norm": 2.859375,
      "learning_rate": 4.3276790616712815e-05,
      "loss": 0.9092,
      "step": 74090
    },
    {
      "epoch": 0.25970202609637366,
      "grad_norm": 3.1875,
      "learning_rate": 4.3282631806446336e-05,
      "loss": 1.0337,
      "step": 74100
    },
    {
      "epoch": 0.25973707360326925,
      "grad_norm": 3.3125,
      "learning_rate": 4.3288472996179865e-05,
      "loss": 0.9882,
      "step": 74110
    },
    {
      "epoch": 0.25977212111016484,
      "grad_norm": 3.4375,
      "learning_rate": 4.3294314185913386e-05,
      "loss": 0.9943,
      "step": 74120
    },
    {
      "epoch": 0.25980716861706044,
      "grad_norm": 2.984375,
      "learning_rate": 4.3300155375646914e-05,
      "loss": 0.9821,
      "step": 74130
    },
    {
      "epoch": 0.259842216123956,
      "grad_norm": 3.171875,
      "learning_rate": 4.330599656538044e-05,
      "loss": 1.0405,
      "step": 74140
    },
    {
      "epoch": 0.2598772636308516,
      "grad_norm": 3.59375,
      "learning_rate": 4.3311837755113964e-05,
      "loss": 1.0394,
      "step": 74150
    },
    {
      "epoch": 0.2599123111377472,
      "grad_norm": 3.546875,
      "learning_rate": 4.331767894484749e-05,
      "loss": 1.0178,
      "step": 74160
    },
    {
      "epoch": 0.2599473586446428,
      "grad_norm": 3.1875,
      "learning_rate": 4.3323520134581014e-05,
      "loss": 0.9563,
      "step": 74170
    },
    {
      "epoch": 0.2599824061515384,
      "grad_norm": 3.234375,
      "learning_rate": 4.3329361324314535e-05,
      "loss": 1.0484,
      "step": 74180
    },
    {
      "epoch": 0.260017453658434,
      "grad_norm": 2.921875,
      "learning_rate": 4.333520251404806e-05,
      "loss": 0.901,
      "step": 74190
    },
    {
      "epoch": 0.26005250116532963,
      "grad_norm": 3.21875,
      "learning_rate": 4.3341043703781585e-05,
      "loss": 1.0364,
      "step": 74200
    },
    {
      "epoch": 0.2600875486722252,
      "grad_norm": 3.1875,
      "learning_rate": 4.334688489351511e-05,
      "loss": 1.0518,
      "step": 74210
    },
    {
      "epoch": 0.2601225961791208,
      "grad_norm": 3.609375,
      "learning_rate": 4.335272608324864e-05,
      "loss": 1.0358,
      "step": 74220
    },
    {
      "epoch": 0.2601576436860164,
      "grad_norm": 3.21875,
      "learning_rate": 4.335856727298216e-05,
      "loss": 1.0595,
      "step": 74230
    },
    {
      "epoch": 0.260192691192912,
      "grad_norm": 3.03125,
      "learning_rate": 4.336440846271569e-05,
      "loss": 0.9452,
      "step": 74240
    },
    {
      "epoch": 0.2602277386998076,
      "grad_norm": 3.265625,
      "learning_rate": 4.337024965244921e-05,
      "loss": 1.056,
      "step": 74250
    },
    {
      "epoch": 0.2602627862067032,
      "grad_norm": 3.265625,
      "learning_rate": 4.3376090842182734e-05,
      "loss": 1.0174,
      "step": 74260
    },
    {
      "epoch": 0.26029783371359877,
      "grad_norm": 3.015625,
      "learning_rate": 4.338193203191626e-05,
      "loss": 0.9972,
      "step": 74270
    },
    {
      "epoch": 0.26033288122049436,
      "grad_norm": 3.578125,
      "learning_rate": 4.3387773221649784e-05,
      "loss": 1.0035,
      "step": 74280
    },
    {
      "epoch": 0.26036792872738995,
      "grad_norm": 3.1875,
      "learning_rate": 4.339361441138332e-05,
      "loss": 1.0052,
      "step": 74290
    },
    {
      "epoch": 0.2604029762342856,
      "grad_norm": 2.671875,
      "learning_rate": 4.339945560111684e-05,
      "loss": 0.9783,
      "step": 74300
    },
    {
      "epoch": 0.2604380237411812,
      "grad_norm": 3.28125,
      "learning_rate": 4.340529679085036e-05,
      "loss": 0.9294,
      "step": 74310
    },
    {
      "epoch": 0.2604730712480768,
      "grad_norm": 3.171875,
      "learning_rate": 4.341113798058389e-05,
      "loss": 0.9468,
      "step": 74320
    },
    {
      "epoch": 0.2605081187549724,
      "grad_norm": 3.3125,
      "learning_rate": 4.341697917031741e-05,
      "loss": 1.0505,
      "step": 74330
    },
    {
      "epoch": 0.26054316626186796,
      "grad_norm": 3.390625,
      "learning_rate": 4.342282036005094e-05,
      "loss": 0.9369,
      "step": 74340
    },
    {
      "epoch": 0.26057821376876356,
      "grad_norm": 2.578125,
      "learning_rate": 4.342866154978446e-05,
      "loss": 0.9737,
      "step": 74350
    },
    {
      "epoch": 0.26061326127565915,
      "grad_norm": 3.078125,
      "learning_rate": 4.343450273951798e-05,
      "loss": 1.0312,
      "step": 74360
    },
    {
      "epoch": 0.26064830878255474,
      "grad_norm": 3.375,
      "learning_rate": 4.344034392925152e-05,
      "loss": 0.957,
      "step": 74370
    },
    {
      "epoch": 0.26068335628945033,
      "grad_norm": 3.34375,
      "learning_rate": 4.344618511898504e-05,
      "loss": 0.9853,
      "step": 74380
    },
    {
      "epoch": 0.2607184037963459,
      "grad_norm": 3.296875,
      "learning_rate": 4.345202630871856e-05,
      "loss": 0.9511,
      "step": 74390
    },
    {
      "epoch": 0.26075345130324157,
      "grad_norm": 2.9375,
      "learning_rate": 4.345786749845209e-05,
      "loss": 0.9308,
      "step": 74400
    },
    {
      "epoch": 0.26078849881013716,
      "grad_norm": 3.609375,
      "learning_rate": 4.346370868818561e-05,
      "loss": 0.9853,
      "step": 74410
    },
    {
      "epoch": 0.26082354631703275,
      "grad_norm": 3.234375,
      "learning_rate": 4.346954987791914e-05,
      "loss": 0.9847,
      "step": 74420
    },
    {
      "epoch": 0.26085859382392834,
      "grad_norm": 3.515625,
      "learning_rate": 4.347539106765266e-05,
      "loss": 0.9966,
      "step": 74430
    },
    {
      "epoch": 0.26089364133082393,
      "grad_norm": 3.15625,
      "learning_rate": 4.348123225738618e-05,
      "loss": 0.9791,
      "step": 74440
    },
    {
      "epoch": 0.2609286888377195,
      "grad_norm": 2.921875,
      "learning_rate": 4.3487073447119716e-05,
      "loss": 0.9701,
      "step": 74450
    },
    {
      "epoch": 0.2609637363446151,
      "grad_norm": 2.96875,
      "learning_rate": 4.349291463685324e-05,
      "loss": 0.9568,
      "step": 74460
    },
    {
      "epoch": 0.2609987838515107,
      "grad_norm": 3.40625,
      "learning_rate": 4.349875582658676e-05,
      "loss": 1.003,
      "step": 74470
    },
    {
      "epoch": 0.2610338313584063,
      "grad_norm": 3.03125,
      "learning_rate": 4.350459701632029e-05,
      "loss": 0.9147,
      "step": 74480
    },
    {
      "epoch": 0.2610688788653019,
      "grad_norm": 2.96875,
      "learning_rate": 4.351043820605381e-05,
      "loss": 1.0466,
      "step": 74490
    },
    {
      "epoch": 0.26110392637219754,
      "grad_norm": 3.171875,
      "learning_rate": 4.351627939578734e-05,
      "loss": 0.9401,
      "step": 74500
    },
    {
      "epoch": 0.26113897387909313,
      "grad_norm": 2.78125,
      "learning_rate": 4.352212058552086e-05,
      "loss": 0.9597,
      "step": 74510
    },
    {
      "epoch": 0.2611740213859887,
      "grad_norm": 3.5,
      "learning_rate": 4.352796177525439e-05,
      "loss": 0.998,
      "step": 74520
    },
    {
      "epoch": 0.2612090688928843,
      "grad_norm": 3.40625,
      "learning_rate": 4.3533802964987915e-05,
      "loss": 0.9975,
      "step": 74530
    },
    {
      "epoch": 0.2612441163997799,
      "grad_norm": 3.671875,
      "learning_rate": 4.3539644154721436e-05,
      "loss": 1.0505,
      "step": 74540
    },
    {
      "epoch": 0.2612791639066755,
      "grad_norm": 2.96875,
      "learning_rate": 4.3545485344454965e-05,
      "loss": 0.9425,
      "step": 74550
    },
    {
      "epoch": 0.2613142114135711,
      "grad_norm": 3.6875,
      "learning_rate": 4.3551326534188486e-05,
      "loss": 0.9284,
      "step": 74560
    },
    {
      "epoch": 0.2613492589204667,
      "grad_norm": 3.328125,
      "learning_rate": 4.355716772392201e-05,
      "loss": 0.9581,
      "step": 74570
    },
    {
      "epoch": 0.26138430642736227,
      "grad_norm": 3.375,
      "learning_rate": 4.3563008913655536e-05,
      "loss": 0.9892,
      "step": 74580
    },
    {
      "epoch": 0.2614193539342579,
      "grad_norm": 3.171875,
      "learning_rate": 4.356885010338906e-05,
      "loss": 0.9116,
      "step": 74590
    },
    {
      "epoch": 0.2614544014411535,
      "grad_norm": 3.40625,
      "learning_rate": 4.3574691293122585e-05,
      "loss": 0.9369,
      "step": 74600
    },
    {
      "epoch": 0.2614894489480491,
      "grad_norm": 3.28125,
      "learning_rate": 4.3580532482856114e-05,
      "loss": 0.9496,
      "step": 74610
    },
    {
      "epoch": 0.2615244964549447,
      "grad_norm": 3.453125,
      "learning_rate": 4.3586373672589635e-05,
      "loss": 0.9411,
      "step": 74620
    },
    {
      "epoch": 0.2615595439618403,
      "grad_norm": 2.65625,
      "learning_rate": 4.359221486232316e-05,
      "loss": 0.9758,
      "step": 74630
    },
    {
      "epoch": 0.26159459146873587,
      "grad_norm": 3.265625,
      "learning_rate": 4.3598056052056685e-05,
      "loss": 0.9085,
      "step": 74640
    },
    {
      "epoch": 0.26162963897563146,
      "grad_norm": 3.109375,
      "learning_rate": 4.3603897241790206e-05,
      "loss": 1.0447,
      "step": 74650
    },
    {
      "epoch": 0.26166468648252705,
      "grad_norm": 3.21875,
      "learning_rate": 4.3609738431523734e-05,
      "loss": 1.1195,
      "step": 74660
    },
    {
      "epoch": 0.26169973398942264,
      "grad_norm": 3.40625,
      "learning_rate": 4.3615579621257256e-05,
      "loss": 1.0194,
      "step": 74670
    },
    {
      "epoch": 0.26173478149631824,
      "grad_norm": 3.421875,
      "learning_rate": 4.3621420810990784e-05,
      "loss": 0.9314,
      "step": 74680
    },
    {
      "epoch": 0.2617698290032139,
      "grad_norm": 2.96875,
      "learning_rate": 4.362726200072431e-05,
      "loss": 0.9838,
      "step": 74690
    },
    {
      "epoch": 0.2618048765101095,
      "grad_norm": 3.328125,
      "learning_rate": 4.3633103190457834e-05,
      "loss": 0.9937,
      "step": 74700
    },
    {
      "epoch": 0.26183992401700507,
      "grad_norm": 3.171875,
      "learning_rate": 4.363894438019136e-05,
      "loss": 0.9863,
      "step": 74710
    },
    {
      "epoch": 0.26187497152390066,
      "grad_norm": 3.1875,
      "learning_rate": 4.3644785569924884e-05,
      "loss": 1.0218,
      "step": 74720
    },
    {
      "epoch": 0.26191001903079625,
      "grad_norm": 3.0,
      "learning_rate": 4.365062675965841e-05,
      "loss": 0.9578,
      "step": 74730
    },
    {
      "epoch": 0.26194506653769184,
      "grad_norm": 2.875,
      "learning_rate": 4.365646794939193e-05,
      "loss": 0.9634,
      "step": 74740
    },
    {
      "epoch": 0.26198011404458743,
      "grad_norm": 3.109375,
      "learning_rate": 4.3662309139125455e-05,
      "loss": 0.9544,
      "step": 74750
    },
    {
      "epoch": 0.262015161551483,
      "grad_norm": 3.0625,
      "learning_rate": 4.366815032885899e-05,
      "loss": 0.944,
      "step": 74760
    },
    {
      "epoch": 0.2620502090583786,
      "grad_norm": 2.921875,
      "learning_rate": 4.367399151859251e-05,
      "loss": 0.8941,
      "step": 74770
    },
    {
      "epoch": 0.2620852565652742,
      "grad_norm": 3.3125,
      "learning_rate": 4.367983270832603e-05,
      "loss": 1.0033,
      "step": 74780
    },
    {
      "epoch": 0.26212030407216985,
      "grad_norm": 3.578125,
      "learning_rate": 4.368567389805956e-05,
      "loss": 0.9754,
      "step": 74790
    },
    {
      "epoch": 0.26215535157906544,
      "grad_norm": 3.015625,
      "learning_rate": 4.369151508779308e-05,
      "loss": 0.9758,
      "step": 74800
    },
    {
      "epoch": 0.26219039908596103,
      "grad_norm": 3.65625,
      "learning_rate": 4.369735627752661e-05,
      "loss": 1.0096,
      "step": 74810
    },
    {
      "epoch": 0.2622254465928566,
      "grad_norm": 3.40625,
      "learning_rate": 4.370319746726013e-05,
      "loss": 0.9448,
      "step": 74820
    },
    {
      "epoch": 0.2622604940997522,
      "grad_norm": 3.140625,
      "learning_rate": 4.3709038656993653e-05,
      "loss": 1.0369,
      "step": 74830
    },
    {
      "epoch": 0.2622955416066478,
      "grad_norm": 3.1875,
      "learning_rate": 4.371487984672719e-05,
      "loss": 0.9136,
      "step": 74840
    },
    {
      "epoch": 0.2623305891135434,
      "grad_norm": 3.140625,
      "learning_rate": 4.372072103646071e-05,
      "loss": 1.0263,
      "step": 74850
    },
    {
      "epoch": 0.262365636620439,
      "grad_norm": 3.828125,
      "learning_rate": 4.372656222619423e-05,
      "loss": 0.9634,
      "step": 74860
    },
    {
      "epoch": 0.2624006841273346,
      "grad_norm": 2.953125,
      "learning_rate": 4.373240341592776e-05,
      "loss": 0.9027,
      "step": 74870
    },
    {
      "epoch": 0.2624357316342302,
      "grad_norm": 3.40625,
      "learning_rate": 4.373824460566128e-05,
      "loss": 0.9358,
      "step": 74880
    },
    {
      "epoch": 0.2624707791411258,
      "grad_norm": 3.671875,
      "learning_rate": 4.374408579539481e-05,
      "loss": 1.0325,
      "step": 74890
    },
    {
      "epoch": 0.2625058266480214,
      "grad_norm": 3.1875,
      "learning_rate": 4.374992698512833e-05,
      "loss": 1.0097,
      "step": 74900
    },
    {
      "epoch": 0.262540874154917,
      "grad_norm": 3.53125,
      "learning_rate": 4.375576817486185e-05,
      "loss": 0.9693,
      "step": 74910
    },
    {
      "epoch": 0.2625759216618126,
      "grad_norm": 3.3125,
      "learning_rate": 4.376160936459539e-05,
      "loss": 0.9391,
      "step": 74920
    },
    {
      "epoch": 0.2626109691687082,
      "grad_norm": 3.890625,
      "learning_rate": 4.376745055432891e-05,
      "loss": 1.0458,
      "step": 74930
    },
    {
      "epoch": 0.2626460166756038,
      "grad_norm": 3.625,
      "learning_rate": 4.377329174406244e-05,
      "loss": 1.0721,
      "step": 74940
    },
    {
      "epoch": 0.26268106418249937,
      "grad_norm": 3.28125,
      "learning_rate": 4.377913293379596e-05,
      "loss": 0.9688,
      "step": 74950
    },
    {
      "epoch": 0.26271611168939496,
      "grad_norm": 3.34375,
      "learning_rate": 4.378497412352948e-05,
      "loss": 0.9348,
      "step": 74960
    },
    {
      "epoch": 0.26275115919629055,
      "grad_norm": 2.734375,
      "learning_rate": 4.379081531326301e-05,
      "loss": 0.991,
      "step": 74970
    },
    {
      "epoch": 0.26278620670318614,
      "grad_norm": 3.5,
      "learning_rate": 4.379665650299653e-05,
      "loss": 1.0094,
      "step": 74980
    },
    {
      "epoch": 0.2628212542100818,
      "grad_norm": 3.1875,
      "learning_rate": 4.380249769273006e-05,
      "loss": 0.9387,
      "step": 74990
    },
    {
      "epoch": 0.2628563017169774,
      "grad_norm": 3.09375,
      "learning_rate": 4.3808338882463586e-05,
      "loss": 0.9424,
      "step": 75000
    },
    {
      "epoch": 0.2628563017169774,
      "eval_loss": 0.9188407063484192,
      "eval_runtime": 559.5784,
      "eval_samples_per_second": 679.862,
      "eval_steps_per_second": 56.655,
      "step": 75000
    },
    {
      "epoch": 0.26289134922387297,
      "grad_norm": 4.25,
      "learning_rate": 4.381418007219711e-05,
      "loss": 0.98,
      "step": 75010
    },
    {
      "epoch": 0.26292639673076856,
      "grad_norm": 3.125,
      "learning_rate": 4.3820021261930636e-05,
      "loss": 0.9705,
      "step": 75020
    },
    {
      "epoch": 0.26296144423766415,
      "grad_norm": 3.421875,
      "learning_rate": 4.382586245166416e-05,
      "loss": 0.91,
      "step": 75030
    },
    {
      "epoch": 0.26299649174455975,
      "grad_norm": 3.15625,
      "learning_rate": 4.383170364139768e-05,
      "loss": 0.964,
      "step": 75040
    },
    {
      "epoch": 0.26303153925145534,
      "grad_norm": 2.734375,
      "learning_rate": 4.383754483113121e-05,
      "loss": 0.9813,
      "step": 75050
    },
    {
      "epoch": 0.26306658675835093,
      "grad_norm": 3.484375,
      "learning_rate": 4.384338602086473e-05,
      "loss": 1.0037,
      "step": 75060
    },
    {
      "epoch": 0.2631016342652465,
      "grad_norm": 3.5,
      "learning_rate": 4.3849227210598256e-05,
      "loss": 1.0279,
      "step": 75070
    },
    {
      "epoch": 0.2631366817721421,
      "grad_norm": 3.6875,
      "learning_rate": 4.3855068400331785e-05,
      "loss": 1.0243,
      "step": 75080
    },
    {
      "epoch": 0.26317172927903776,
      "grad_norm": 3.375,
      "learning_rate": 4.3860909590065306e-05,
      "loss": 0.9317,
      "step": 75090
    },
    {
      "epoch": 0.26320677678593335,
      "grad_norm": 3.34375,
      "learning_rate": 4.3866750779798834e-05,
      "loss": 1.0758,
      "step": 75100
    },
    {
      "epoch": 0.26324182429282894,
      "grad_norm": 3.015625,
      "learning_rate": 4.3872591969532356e-05,
      "loss": 0.945,
      "step": 75110
    },
    {
      "epoch": 0.26327687179972453,
      "grad_norm": 3.234375,
      "learning_rate": 4.387843315926588e-05,
      "loss": 0.9114,
      "step": 75120
    },
    {
      "epoch": 0.2633119193066201,
      "grad_norm": 3.234375,
      "learning_rate": 4.3884274348999406e-05,
      "loss": 0.9626,
      "step": 75130
    },
    {
      "epoch": 0.2633469668135157,
      "grad_norm": 2.671875,
      "learning_rate": 4.389011553873293e-05,
      "loss": 0.9429,
      "step": 75140
    },
    {
      "epoch": 0.2633820143204113,
      "grad_norm": 3.28125,
      "learning_rate": 4.3895956728466455e-05,
      "loss": 0.99,
      "step": 75150
    },
    {
      "epoch": 0.2634170618273069,
      "grad_norm": 2.921875,
      "learning_rate": 4.3901797918199983e-05,
      "loss": 0.9407,
      "step": 75160
    },
    {
      "epoch": 0.2634521093342025,
      "grad_norm": 3.234375,
      "learning_rate": 4.3907639107933505e-05,
      "loss": 1.0293,
      "step": 75170
    },
    {
      "epoch": 0.26348715684109814,
      "grad_norm": 3.046875,
      "learning_rate": 4.391348029766703e-05,
      "loss": 1.0159,
      "step": 75180
    },
    {
      "epoch": 0.2635222043479937,
      "grad_norm": 3.28125,
      "learning_rate": 4.3919321487400555e-05,
      "loss": 1.0606,
      "step": 75190
    },
    {
      "epoch": 0.2635572518548893,
      "grad_norm": 3.328125,
      "learning_rate": 4.392516267713408e-05,
      "loss": 0.9685,
      "step": 75200
    },
    {
      "epoch": 0.2635922993617849,
      "grad_norm": 3.296875,
      "learning_rate": 4.3931003866867604e-05,
      "loss": 0.9718,
      "step": 75210
    },
    {
      "epoch": 0.2636273468686805,
      "grad_norm": 3.25,
      "learning_rate": 4.3936845056601126e-05,
      "loss": 1.0219,
      "step": 75220
    },
    {
      "epoch": 0.2636623943755761,
      "grad_norm": 3.421875,
      "learning_rate": 4.394268624633466e-05,
      "loss": 1.0242,
      "step": 75230
    },
    {
      "epoch": 0.2636974418824717,
      "grad_norm": 3.3125,
      "learning_rate": 4.394852743606818e-05,
      "loss": 1.0095,
      "step": 75240
    },
    {
      "epoch": 0.2637324893893673,
      "grad_norm": 2.859375,
      "learning_rate": 4.3954368625801704e-05,
      "loss": 0.9792,
      "step": 75250
    },
    {
      "epoch": 0.26376753689626287,
      "grad_norm": 3.515625,
      "learning_rate": 4.396020981553523e-05,
      "loss": 0.9764,
      "step": 75260
    },
    {
      "epoch": 0.26380258440315846,
      "grad_norm": 3.328125,
      "learning_rate": 4.396605100526875e-05,
      "loss": 0.9575,
      "step": 75270
    },
    {
      "epoch": 0.2638376319100541,
      "grad_norm": 3.3125,
      "learning_rate": 4.397189219500228e-05,
      "loss": 1.0021,
      "step": 75280
    },
    {
      "epoch": 0.2638726794169497,
      "grad_norm": 3.546875,
      "learning_rate": 4.39777333847358e-05,
      "loss": 1.0432,
      "step": 75290
    },
    {
      "epoch": 0.2639077269238453,
      "grad_norm": 3.109375,
      "learning_rate": 4.3983574574469324e-05,
      "loss": 0.9461,
      "step": 75300
    },
    {
      "epoch": 0.2639427744307409,
      "grad_norm": 3.65625,
      "learning_rate": 4.398941576420286e-05,
      "loss": 0.9983,
      "step": 75310
    },
    {
      "epoch": 0.26397782193763647,
      "grad_norm": 3.0,
      "learning_rate": 4.399525695393638e-05,
      "loss": 1.0988,
      "step": 75320
    },
    {
      "epoch": 0.26401286944453206,
      "grad_norm": 3.546875,
      "learning_rate": 4.40010981436699e-05,
      "loss": 1.0248,
      "step": 75330
    },
    {
      "epoch": 0.26404791695142765,
      "grad_norm": 2.9375,
      "learning_rate": 4.400693933340343e-05,
      "loss": 1.0423,
      "step": 75340
    },
    {
      "epoch": 0.26408296445832324,
      "grad_norm": 3.640625,
      "learning_rate": 4.401278052313695e-05,
      "loss": 1.0682,
      "step": 75350
    },
    {
      "epoch": 0.26411801196521884,
      "grad_norm": 3.046875,
      "learning_rate": 4.401862171287048e-05,
      "loss": 0.924,
      "step": 75360
    },
    {
      "epoch": 0.2641530594721144,
      "grad_norm": 2.671875,
      "learning_rate": 4.4024462902604e-05,
      "loss": 0.9452,
      "step": 75370
    },
    {
      "epoch": 0.2641881069790101,
      "grad_norm": 3.03125,
      "learning_rate": 4.403030409233753e-05,
      "loss": 0.9868,
      "step": 75380
    },
    {
      "epoch": 0.26422315448590566,
      "grad_norm": 3.453125,
      "learning_rate": 4.403614528207106e-05,
      "loss": 1.0146,
      "step": 75390
    },
    {
      "epoch": 0.26425820199280126,
      "grad_norm": 3.296875,
      "learning_rate": 4.404198647180458e-05,
      "loss": 1.033,
      "step": 75400
    },
    {
      "epoch": 0.26429324949969685,
      "grad_norm": 3.015625,
      "learning_rate": 4.404782766153811e-05,
      "loss": 0.9105,
      "step": 75410
    },
    {
      "epoch": 0.26432829700659244,
      "grad_norm": 3.171875,
      "learning_rate": 4.405366885127163e-05,
      "loss": 0.9315,
      "step": 75420
    },
    {
      "epoch": 0.26436334451348803,
      "grad_norm": 3.53125,
      "learning_rate": 4.405951004100515e-05,
      "loss": 0.9875,
      "step": 75430
    },
    {
      "epoch": 0.2643983920203836,
      "grad_norm": 3.453125,
      "learning_rate": 4.406535123073868e-05,
      "loss": 0.9745,
      "step": 75440
    },
    {
      "epoch": 0.2644334395272792,
      "grad_norm": 3.171875,
      "learning_rate": 4.40711924204722e-05,
      "loss": 0.9984,
      "step": 75450
    },
    {
      "epoch": 0.2644684870341748,
      "grad_norm": 3.0,
      "learning_rate": 4.407703361020573e-05,
      "loss": 0.9869,
      "step": 75460
    },
    {
      "epoch": 0.2645035345410704,
      "grad_norm": 3.203125,
      "learning_rate": 4.408287479993926e-05,
      "loss": 0.9908,
      "step": 75470
    },
    {
      "epoch": 0.26453858204796604,
      "grad_norm": 3.140625,
      "learning_rate": 4.408871598967278e-05,
      "loss": 1.0293,
      "step": 75480
    },
    {
      "epoch": 0.26457362955486163,
      "grad_norm": 3.296875,
      "learning_rate": 4.409455717940631e-05,
      "loss": 0.9644,
      "step": 75490
    },
    {
      "epoch": 0.2646086770617572,
      "grad_norm": 3.1875,
      "learning_rate": 4.410039836913983e-05,
      "loss": 1.0416,
      "step": 75500
    },
    {
      "epoch": 0.2646437245686528,
      "grad_norm": 3.3125,
      "learning_rate": 4.410623955887335e-05,
      "loss": 1.0194,
      "step": 75510
    },
    {
      "epoch": 0.2646787720755484,
      "grad_norm": 2.78125,
      "learning_rate": 4.411208074860688e-05,
      "loss": 0.9146,
      "step": 75520
    },
    {
      "epoch": 0.264713819582444,
      "grad_norm": 3.46875,
      "learning_rate": 4.41179219383404e-05,
      "loss": 1.0007,
      "step": 75530
    },
    {
      "epoch": 0.2647488670893396,
      "grad_norm": 3.015625,
      "learning_rate": 4.412376312807393e-05,
      "loss": 0.9589,
      "step": 75540
    },
    {
      "epoch": 0.2647839145962352,
      "grad_norm": 3.046875,
      "learning_rate": 4.4129604317807456e-05,
      "loss": 0.9842,
      "step": 75550
    },
    {
      "epoch": 0.2648189621031308,
      "grad_norm": 3.265625,
      "learning_rate": 4.413544550754098e-05,
      "loss": 1.0529,
      "step": 75560
    },
    {
      "epoch": 0.26485400961002636,
      "grad_norm": 3.328125,
      "learning_rate": 4.4141286697274505e-05,
      "loss": 0.9581,
      "step": 75570
    },
    {
      "epoch": 0.264889057116922,
      "grad_norm": 3.40625,
      "learning_rate": 4.414712788700803e-05,
      "loss": 0.9217,
      "step": 75580
    },
    {
      "epoch": 0.2649241046238176,
      "grad_norm": 2.734375,
      "learning_rate": 4.415296907674155e-05,
      "loss": 0.9704,
      "step": 75590
    },
    {
      "epoch": 0.2649591521307132,
      "grad_norm": 3.25,
      "learning_rate": 4.4158810266475077e-05,
      "loss": 0.9272,
      "step": 75600
    },
    {
      "epoch": 0.2649941996376088,
      "grad_norm": 3.390625,
      "learning_rate": 4.41646514562086e-05,
      "loss": 1.0048,
      "step": 75610
    },
    {
      "epoch": 0.2650292471445044,
      "grad_norm": 3.484375,
      "learning_rate": 4.417049264594213e-05,
      "loss": 1.0179,
      "step": 75620
    },
    {
      "epoch": 0.26506429465139997,
      "grad_norm": 2.875,
      "learning_rate": 4.4176333835675655e-05,
      "loss": 0.9141,
      "step": 75630
    },
    {
      "epoch": 0.26509934215829556,
      "grad_norm": 2.78125,
      "learning_rate": 4.4182175025409176e-05,
      "loss": 0.9655,
      "step": 75640
    },
    {
      "epoch": 0.26513438966519115,
      "grad_norm": 2.9375,
      "learning_rate": 4.4188016215142704e-05,
      "loss": 0.9315,
      "step": 75650
    },
    {
      "epoch": 0.26516943717208674,
      "grad_norm": 3.015625,
      "learning_rate": 4.4193857404876226e-05,
      "loss": 0.9587,
      "step": 75660
    },
    {
      "epoch": 0.26520448467898233,
      "grad_norm": 3.125,
      "learning_rate": 4.4199698594609754e-05,
      "loss": 1.0008,
      "step": 75670
    },
    {
      "epoch": 0.265239532185878,
      "grad_norm": 3.109375,
      "learning_rate": 4.4205539784343275e-05,
      "loss": 0.9859,
      "step": 75680
    },
    {
      "epoch": 0.26527457969277357,
      "grad_norm": 3.140625,
      "learning_rate": 4.4211380974076804e-05,
      "loss": 0.9826,
      "step": 75690
    },
    {
      "epoch": 0.26530962719966916,
      "grad_norm": 3.625,
      "learning_rate": 4.421722216381033e-05,
      "loss": 1.063,
      "step": 75700
    },
    {
      "epoch": 0.26534467470656475,
      "grad_norm": 3.359375,
      "learning_rate": 4.422306335354385e-05,
      "loss": 0.9696,
      "step": 75710
    },
    {
      "epoch": 0.26537972221346035,
      "grad_norm": 2.953125,
      "learning_rate": 4.4228904543277375e-05,
      "loss": 0.9577,
      "step": 75720
    },
    {
      "epoch": 0.26541476972035594,
      "grad_norm": 3.0625,
      "learning_rate": 4.42347457330109e-05,
      "loss": 0.9953,
      "step": 75730
    },
    {
      "epoch": 0.2654498172272515,
      "grad_norm": 3.4375,
      "learning_rate": 4.4240586922744424e-05,
      "loss": 0.9957,
      "step": 75740
    },
    {
      "epoch": 0.2654848647341471,
      "grad_norm": 3.109375,
      "learning_rate": 4.424642811247795e-05,
      "loss": 0.9693,
      "step": 75750
    },
    {
      "epoch": 0.2655199122410427,
      "grad_norm": 3.21875,
      "learning_rate": 4.4252269302211474e-05,
      "loss": 0.9182,
      "step": 75760
    },
    {
      "epoch": 0.26555495974793836,
      "grad_norm": 2.75,
      "learning_rate": 4.4258110491945e-05,
      "loss": 0.8985,
      "step": 75770
    },
    {
      "epoch": 0.26559000725483395,
      "grad_norm": 2.96875,
      "learning_rate": 4.426395168167853e-05,
      "loss": 0.8847,
      "step": 75780
    },
    {
      "epoch": 0.26562505476172954,
      "grad_norm": 3.140625,
      "learning_rate": 4.426979287141205e-05,
      "loss": 0.953,
      "step": 75790
    },
    {
      "epoch": 0.26566010226862513,
      "grad_norm": 3.59375,
      "learning_rate": 4.4275634061145573e-05,
      "loss": 0.9982,
      "step": 75800
    },
    {
      "epoch": 0.2656951497755207,
      "grad_norm": 3.15625,
      "learning_rate": 4.42814752508791e-05,
      "loss": 0.8802,
      "step": 75810
    },
    {
      "epoch": 0.2657301972824163,
      "grad_norm": 2.890625,
      "learning_rate": 4.428731644061262e-05,
      "loss": 1.0357,
      "step": 75820
    },
    {
      "epoch": 0.2657652447893119,
      "grad_norm": 3.640625,
      "learning_rate": 4.429315763034615e-05,
      "loss": 1.0237,
      "step": 75830
    },
    {
      "epoch": 0.2658002922962075,
      "grad_norm": 3.453125,
      "learning_rate": 4.429899882007967e-05,
      "loss": 1.036,
      "step": 75840
    },
    {
      "epoch": 0.2658353398031031,
      "grad_norm": 3.078125,
      "learning_rate": 4.43048400098132e-05,
      "loss": 0.9679,
      "step": 75850
    },
    {
      "epoch": 0.2658703873099987,
      "grad_norm": 3.140625,
      "learning_rate": 4.431068119954673e-05,
      "loss": 0.91,
      "step": 75860
    },
    {
      "epoch": 0.2659054348168943,
      "grad_norm": 3.3125,
      "learning_rate": 4.431652238928025e-05,
      "loss": 0.9665,
      "step": 75870
    },
    {
      "epoch": 0.2659404823237899,
      "grad_norm": 3.15625,
      "learning_rate": 4.432236357901378e-05,
      "loss": 1.0159,
      "step": 75880
    },
    {
      "epoch": 0.2659755298306855,
      "grad_norm": 3.59375,
      "learning_rate": 4.43282047687473e-05,
      "loss": 1.0206,
      "step": 75890
    },
    {
      "epoch": 0.2660105773375811,
      "grad_norm": 2.859375,
      "learning_rate": 4.433404595848082e-05,
      "loss": 0.9779,
      "step": 75900
    },
    {
      "epoch": 0.2660456248444767,
      "grad_norm": 3.171875,
      "learning_rate": 4.433988714821435e-05,
      "loss": 0.9331,
      "step": 75910
    },
    {
      "epoch": 0.2660806723513723,
      "grad_norm": 2.96875,
      "learning_rate": 4.434572833794787e-05,
      "loss": 1.057,
      "step": 75920
    },
    {
      "epoch": 0.2661157198582679,
      "grad_norm": 2.828125,
      "learning_rate": 4.43515695276814e-05,
      "loss": 0.9996,
      "step": 75930
    },
    {
      "epoch": 0.26615076736516347,
      "grad_norm": 3.0625,
      "learning_rate": 4.435741071741493e-05,
      "loss": 0.9723,
      "step": 75940
    },
    {
      "epoch": 0.26618581487205906,
      "grad_norm": 3.453125,
      "learning_rate": 4.436325190714845e-05,
      "loss": 0.9844,
      "step": 75950
    },
    {
      "epoch": 0.26622086237895465,
      "grad_norm": 3.296875,
      "learning_rate": 4.436909309688198e-05,
      "loss": 0.9695,
      "step": 75960
    },
    {
      "epoch": 0.2662559098858503,
      "grad_norm": 2.84375,
      "learning_rate": 4.43749342866155e-05,
      "loss": 0.968,
      "step": 75970
    },
    {
      "epoch": 0.2662909573927459,
      "grad_norm": 3.609375,
      "learning_rate": 4.438077547634902e-05,
      "loss": 0.956,
      "step": 75980
    },
    {
      "epoch": 0.2663260048996415,
      "grad_norm": 3.125,
      "learning_rate": 4.438661666608255e-05,
      "loss": 0.9926,
      "step": 75990
    },
    {
      "epoch": 0.26636105240653707,
      "grad_norm": 3.109375,
      "learning_rate": 4.439245785581607e-05,
      "loss": 1.047,
      "step": 76000
    },
    {
      "epoch": 0.26639609991343266,
      "grad_norm": 3.140625,
      "learning_rate": 4.43982990455496e-05,
      "loss": 0.9273,
      "step": 76010
    },
    {
      "epoch": 0.26643114742032825,
      "grad_norm": 3.203125,
      "learning_rate": 4.440414023528313e-05,
      "loss": 0.9599,
      "step": 76020
    },
    {
      "epoch": 0.26646619492722384,
      "grad_norm": 3.078125,
      "learning_rate": 4.440998142501665e-05,
      "loss": 0.9494,
      "step": 76030
    },
    {
      "epoch": 0.26650124243411943,
      "grad_norm": 3.40625,
      "learning_rate": 4.4415822614750177e-05,
      "loss": 0.9919,
      "step": 76040
    },
    {
      "epoch": 0.266536289941015,
      "grad_norm": 2.859375,
      "learning_rate": 4.44216638044837e-05,
      "loss": 0.8597,
      "step": 76050
    },
    {
      "epoch": 0.2665713374479106,
      "grad_norm": 3.203125,
      "learning_rate": 4.442750499421722e-05,
      "loss": 0.9673,
      "step": 76060
    },
    {
      "epoch": 0.26660638495480626,
      "grad_norm": 3.015625,
      "learning_rate": 4.443334618395075e-05,
      "loss": 0.9686,
      "step": 76070
    },
    {
      "epoch": 0.26664143246170185,
      "grad_norm": 3.265625,
      "learning_rate": 4.4439187373684276e-05,
      "loss": 0.9657,
      "step": 76080
    },
    {
      "epoch": 0.26667647996859745,
      "grad_norm": 3.15625,
      "learning_rate": 4.4445028563417804e-05,
      "loss": 1.0647,
      "step": 76090
    },
    {
      "epoch": 0.26671152747549304,
      "grad_norm": 3.359375,
      "learning_rate": 4.4450869753151326e-05,
      "loss": 0.9871,
      "step": 76100
    },
    {
      "epoch": 0.26674657498238863,
      "grad_norm": 3.34375,
      "learning_rate": 4.445671094288485e-05,
      "loss": 1.0581,
      "step": 76110
    },
    {
      "epoch": 0.2667816224892842,
      "grad_norm": 2.96875,
      "learning_rate": 4.4462552132618375e-05,
      "loss": 0.9846,
      "step": 76120
    },
    {
      "epoch": 0.2668166699961798,
      "grad_norm": 3.0,
      "learning_rate": 4.44683933223519e-05,
      "loss": 0.9164,
      "step": 76130
    },
    {
      "epoch": 0.2668517175030754,
      "grad_norm": 3.125,
      "learning_rate": 4.4474234512085425e-05,
      "loss": 1.0025,
      "step": 76140
    },
    {
      "epoch": 0.266886765009971,
      "grad_norm": 3.421875,
      "learning_rate": 4.4480075701818946e-05,
      "loss": 0.9933,
      "step": 76150
    },
    {
      "epoch": 0.2669218125168666,
      "grad_norm": 3.4375,
      "learning_rate": 4.4485916891552475e-05,
      "loss": 0.9371,
      "step": 76160
    },
    {
      "epoch": 0.26695686002376223,
      "grad_norm": 3.375,
      "learning_rate": 4.4491758081286e-05,
      "loss": 0.9251,
      "step": 76170
    },
    {
      "epoch": 0.2669919075306578,
      "grad_norm": 2.890625,
      "learning_rate": 4.4497599271019524e-05,
      "loss": 0.9309,
      "step": 76180
    },
    {
      "epoch": 0.2670269550375534,
      "grad_norm": 3.25,
      "learning_rate": 4.4503440460753046e-05,
      "loss": 1.0251,
      "step": 76190
    },
    {
      "epoch": 0.267062002544449,
      "grad_norm": 3.046875,
      "learning_rate": 4.4509281650486574e-05,
      "loss": 1.0323,
      "step": 76200
    },
    {
      "epoch": 0.2670970500513446,
      "grad_norm": 3.375,
      "learning_rate": 4.4515122840220095e-05,
      "loss": 0.98,
      "step": 76210
    },
    {
      "epoch": 0.2671320975582402,
      "grad_norm": 3.8125,
      "learning_rate": 4.4520964029953624e-05,
      "loss": 0.9961,
      "step": 76220
    },
    {
      "epoch": 0.2671671450651358,
      "grad_norm": 3.375,
      "learning_rate": 4.4526805219687145e-05,
      "loss": 0.9541,
      "step": 76230
    },
    {
      "epoch": 0.26720219257203137,
      "grad_norm": 3.0625,
      "learning_rate": 4.4532646409420673e-05,
      "loss": 1.0515,
      "step": 76240
    },
    {
      "epoch": 0.26723724007892696,
      "grad_norm": 2.8125,
      "learning_rate": 4.45384875991542e-05,
      "loss": 1.0404,
      "step": 76250
    },
    {
      "epoch": 0.2672722875858226,
      "grad_norm": 3.21875,
      "learning_rate": 4.454432878888772e-05,
      "loss": 0.9512,
      "step": 76260
    },
    {
      "epoch": 0.2673073350927182,
      "grad_norm": 2.703125,
      "learning_rate": 4.4550169978621245e-05,
      "loss": 0.9537,
      "step": 76270
    },
    {
      "epoch": 0.2673423825996138,
      "grad_norm": 2.75,
      "learning_rate": 4.455601116835477e-05,
      "loss": 0.975,
      "step": 76280
    },
    {
      "epoch": 0.2673774301065094,
      "grad_norm": 2.984375,
      "learning_rate": 4.4561852358088294e-05,
      "loss": 0.9688,
      "step": 76290
    },
    {
      "epoch": 0.267412477613405,
      "grad_norm": 3.296875,
      "learning_rate": 4.456769354782182e-05,
      "loss": 0.9594,
      "step": 76300
    },
    {
      "epoch": 0.26744752512030057,
      "grad_norm": 3.078125,
      "learning_rate": 4.4573534737555344e-05,
      "loss": 1.0234,
      "step": 76310
    },
    {
      "epoch": 0.26748257262719616,
      "grad_norm": 2.96875,
      "learning_rate": 4.457937592728887e-05,
      "loss": 1.0621,
      "step": 76320
    },
    {
      "epoch": 0.26751762013409175,
      "grad_norm": 2.828125,
      "learning_rate": 4.45852171170224e-05,
      "loss": 0.9791,
      "step": 76330
    },
    {
      "epoch": 0.26755266764098734,
      "grad_norm": 3.4375,
      "learning_rate": 4.459105830675592e-05,
      "loss": 1.0237,
      "step": 76340
    },
    {
      "epoch": 0.26758771514788293,
      "grad_norm": 2.96875,
      "learning_rate": 4.459689949648945e-05,
      "loss": 0.881,
      "step": 76350
    },
    {
      "epoch": 0.2676227626547786,
      "grad_norm": 3.09375,
      "learning_rate": 4.460274068622297e-05,
      "loss": 1.0368,
      "step": 76360
    },
    {
      "epoch": 0.26765781016167417,
      "grad_norm": 3.03125,
      "learning_rate": 4.460858187595649e-05,
      "loss": 1.0293,
      "step": 76370
    },
    {
      "epoch": 0.26769285766856976,
      "grad_norm": 2.828125,
      "learning_rate": 4.461442306569002e-05,
      "loss": 0.9875,
      "step": 76380
    },
    {
      "epoch": 0.26772790517546535,
      "grad_norm": 3.203125,
      "learning_rate": 4.462026425542355e-05,
      "loss": 0.9447,
      "step": 76390
    },
    {
      "epoch": 0.26776295268236094,
      "grad_norm": 3.15625,
      "learning_rate": 4.462610544515707e-05,
      "loss": 1.0038,
      "step": 76400
    },
    {
      "epoch": 0.26779800018925654,
      "grad_norm": 3.0,
      "learning_rate": 4.46319466348906e-05,
      "loss": 0.9535,
      "step": 76410
    },
    {
      "epoch": 0.2678330476961521,
      "grad_norm": 3.296875,
      "learning_rate": 4.463778782462412e-05,
      "loss": 0.9993,
      "step": 76420
    },
    {
      "epoch": 0.2678680952030477,
      "grad_norm": 2.96875,
      "learning_rate": 4.464362901435765e-05,
      "loss": 0.9485,
      "step": 76430
    },
    {
      "epoch": 0.2679031427099433,
      "grad_norm": 3.59375,
      "learning_rate": 4.464947020409117e-05,
      "loss": 0.9766,
      "step": 76440
    },
    {
      "epoch": 0.2679381902168389,
      "grad_norm": 3.09375,
      "learning_rate": 4.465531139382469e-05,
      "loss": 0.9008,
      "step": 76450
    },
    {
      "epoch": 0.26797323772373455,
      "grad_norm": 2.9375,
      "learning_rate": 4.466115258355822e-05,
      "loss": 0.976,
      "step": 76460
    },
    {
      "epoch": 0.26800828523063014,
      "grad_norm": 2.609375,
      "learning_rate": 4.466699377329175e-05,
      "loss": 1.0401,
      "step": 76470
    },
    {
      "epoch": 0.26804333273752573,
      "grad_norm": 2.609375,
      "learning_rate": 4.467283496302527e-05,
      "loss": 0.9633,
      "step": 76480
    },
    {
      "epoch": 0.2680783802444213,
      "grad_norm": 3.203125,
      "learning_rate": 4.46786761527588e-05,
      "loss": 1.0186,
      "step": 76490
    },
    {
      "epoch": 0.2681134277513169,
      "grad_norm": 3.671875,
      "learning_rate": 4.468451734249232e-05,
      "loss": 0.9649,
      "step": 76500
    },
    {
      "epoch": 0.2681484752582125,
      "grad_norm": 3.421875,
      "learning_rate": 4.469035853222585e-05,
      "loss": 1.0331,
      "step": 76510
    },
    {
      "epoch": 0.2681835227651081,
      "grad_norm": 3.09375,
      "learning_rate": 4.469619972195937e-05,
      "loss": 0.9514,
      "step": 76520
    },
    {
      "epoch": 0.2682185702720037,
      "grad_norm": 3.515625,
      "learning_rate": 4.47020409116929e-05,
      "loss": 1.0035,
      "step": 76530
    },
    {
      "epoch": 0.2682536177788993,
      "grad_norm": 3.515625,
      "learning_rate": 4.470788210142642e-05,
      "loss": 0.9466,
      "step": 76540
    },
    {
      "epoch": 0.26828866528579487,
      "grad_norm": 3.359375,
      "learning_rate": 4.471372329115995e-05,
      "loss": 0.8931,
      "step": 76550
    },
    {
      "epoch": 0.2683237127926905,
      "grad_norm": 3.703125,
      "learning_rate": 4.4719564480893475e-05,
      "loss": 1.0778,
      "step": 76560
    },
    {
      "epoch": 0.2683587602995861,
      "grad_norm": 2.78125,
      "learning_rate": 4.4725405670627e-05,
      "loss": 0.876,
      "step": 76570
    },
    {
      "epoch": 0.2683938078064817,
      "grad_norm": 3.328125,
      "learning_rate": 4.473124686036052e-05,
      "loss": 0.9759,
      "step": 76580
    },
    {
      "epoch": 0.2684288553133773,
      "grad_norm": 2.765625,
      "learning_rate": 4.4737088050094046e-05,
      "loss": 0.8791,
      "step": 76590
    },
    {
      "epoch": 0.2684639028202729,
      "grad_norm": 3.34375,
      "learning_rate": 4.474292923982757e-05,
      "loss": 0.9216,
      "step": 76600
    },
    {
      "epoch": 0.2684989503271685,
      "grad_norm": 2.84375,
      "learning_rate": 4.4748770429561096e-05,
      "loss": 0.9263,
      "step": 76610
    },
    {
      "epoch": 0.26853399783406406,
      "grad_norm": 2.828125,
      "learning_rate": 4.475461161929462e-05,
      "loss": 0.91,
      "step": 76620
    },
    {
      "epoch": 0.26856904534095966,
      "grad_norm": 3.171875,
      "learning_rate": 4.4760452809028146e-05,
      "loss": 0.994,
      "step": 76630
    },
    {
      "epoch": 0.26860409284785525,
      "grad_norm": 3.453125,
      "learning_rate": 4.4766293998761674e-05,
      "loss": 0.9953,
      "step": 76640
    },
    {
      "epoch": 0.26863914035475084,
      "grad_norm": 3.5625,
      "learning_rate": 4.4772135188495195e-05,
      "loss": 1.0196,
      "step": 76650
    },
    {
      "epoch": 0.2686741878616465,
      "grad_norm": 3.171875,
      "learning_rate": 4.477797637822872e-05,
      "loss": 0.92,
      "step": 76660
    },
    {
      "epoch": 0.2687092353685421,
      "grad_norm": 2.78125,
      "learning_rate": 4.4783817567962245e-05,
      "loss": 1.0599,
      "step": 76670
    },
    {
      "epoch": 0.26874428287543767,
      "grad_norm": 3.09375,
      "learning_rate": 4.4789658757695767e-05,
      "loss": 1.0102,
      "step": 76680
    },
    {
      "epoch": 0.26877933038233326,
      "grad_norm": 3.28125,
      "learning_rate": 4.4795499947429295e-05,
      "loss": 0.9894,
      "step": 76690
    },
    {
      "epoch": 0.26881437788922885,
      "grad_norm": 3.0625,
      "learning_rate": 4.480134113716282e-05,
      "loss": 0.9254,
      "step": 76700
    },
    {
      "epoch": 0.26884942539612444,
      "grad_norm": 2.8125,
      "learning_rate": 4.4807182326896344e-05,
      "loss": 0.9771,
      "step": 76710
    },
    {
      "epoch": 0.26888447290302003,
      "grad_norm": 3.171875,
      "learning_rate": 4.481302351662987e-05,
      "loss": 1.0285,
      "step": 76720
    },
    {
      "epoch": 0.2689195204099156,
      "grad_norm": 3.328125,
      "learning_rate": 4.4818864706363394e-05,
      "loss": 0.9866,
      "step": 76730
    },
    {
      "epoch": 0.2689545679168112,
      "grad_norm": 3.265625,
      "learning_rate": 4.482470589609692e-05,
      "loss": 1.0226,
      "step": 76740
    },
    {
      "epoch": 0.2689896154237068,
      "grad_norm": 3.3125,
      "learning_rate": 4.4830547085830444e-05,
      "loss": 0.9739,
      "step": 76750
    },
    {
      "epoch": 0.26902466293060245,
      "grad_norm": 2.875,
      "learning_rate": 4.4836388275563965e-05,
      "loss": 0.9034,
      "step": 76760
    },
    {
      "epoch": 0.26905971043749805,
      "grad_norm": 3.609375,
      "learning_rate": 4.4842229465297494e-05,
      "loss": 0.9009,
      "step": 76770
    },
    {
      "epoch": 0.26909475794439364,
      "grad_norm": 3.390625,
      "learning_rate": 4.484807065503102e-05,
      "loss": 0.9897,
      "step": 76780
    },
    {
      "epoch": 0.26912980545128923,
      "grad_norm": 3.234375,
      "learning_rate": 4.485391184476454e-05,
      "loss": 0.9516,
      "step": 76790
    },
    {
      "epoch": 0.2691648529581848,
      "grad_norm": 3.03125,
      "learning_rate": 4.485975303449807e-05,
      "loss": 1.0113,
      "step": 76800
    },
    {
      "epoch": 0.2691999004650804,
      "grad_norm": 2.96875,
      "learning_rate": 4.486559422423159e-05,
      "loss": 1.0404,
      "step": 76810
    },
    {
      "epoch": 0.269234947971976,
      "grad_norm": 2.90625,
      "learning_rate": 4.487143541396512e-05,
      "loss": 1.0594,
      "step": 76820
    },
    {
      "epoch": 0.2692699954788716,
      "grad_norm": 3.234375,
      "learning_rate": 4.487727660369864e-05,
      "loss": 0.9976,
      "step": 76830
    },
    {
      "epoch": 0.2693050429857672,
      "grad_norm": 3.046875,
      "learning_rate": 4.4883117793432164e-05,
      "loss": 0.9794,
      "step": 76840
    },
    {
      "epoch": 0.26934009049266283,
      "grad_norm": 3.46875,
      "learning_rate": 4.488895898316569e-05,
      "loss": 0.9947,
      "step": 76850
    },
    {
      "epoch": 0.2693751379995584,
      "grad_norm": 3.1875,
      "learning_rate": 4.489480017289922e-05,
      "loss": 1.0034,
      "step": 76860
    },
    {
      "epoch": 0.269410185506454,
      "grad_norm": 3.125,
      "learning_rate": 4.490064136263274e-05,
      "loss": 0.9671,
      "step": 76870
    },
    {
      "epoch": 0.2694452330133496,
      "grad_norm": 2.9375,
      "learning_rate": 4.490648255236627e-05,
      "loss": 0.9984,
      "step": 76880
    },
    {
      "epoch": 0.2694802805202452,
      "grad_norm": 3.03125,
      "learning_rate": 4.491232374209979e-05,
      "loss": 0.9483,
      "step": 76890
    },
    {
      "epoch": 0.2695153280271408,
      "grad_norm": 3.28125,
      "learning_rate": 4.491816493183332e-05,
      "loss": 0.9285,
      "step": 76900
    },
    {
      "epoch": 0.2695503755340364,
      "grad_norm": 2.828125,
      "learning_rate": 4.492400612156684e-05,
      "loss": 0.9196,
      "step": 76910
    },
    {
      "epoch": 0.26958542304093197,
      "grad_norm": 3.0625,
      "learning_rate": 4.492984731130036e-05,
      "loss": 1.042,
      "step": 76920
    },
    {
      "epoch": 0.26962047054782756,
      "grad_norm": 3.03125,
      "learning_rate": 4.493568850103389e-05,
      "loss": 1.0411,
      "step": 76930
    },
    {
      "epoch": 0.26965551805472315,
      "grad_norm": 3.03125,
      "learning_rate": 4.494152969076742e-05,
      "loss": 1.0516,
      "step": 76940
    },
    {
      "epoch": 0.2696905655616188,
      "grad_norm": 3.15625,
      "learning_rate": 4.494737088050094e-05,
      "loss": 0.9209,
      "step": 76950
    },
    {
      "epoch": 0.2697256130685144,
      "grad_norm": 3.28125,
      "learning_rate": 4.495321207023447e-05,
      "loss": 0.9713,
      "step": 76960
    },
    {
      "epoch": 0.26976066057541,
      "grad_norm": 2.875,
      "learning_rate": 4.495905325996799e-05,
      "loss": 0.9321,
      "step": 76970
    },
    {
      "epoch": 0.2697957080823056,
      "grad_norm": 3.1875,
      "learning_rate": 4.496489444970152e-05,
      "loss": 0.9492,
      "step": 76980
    },
    {
      "epoch": 0.26983075558920117,
      "grad_norm": 3.34375,
      "learning_rate": 4.497073563943504e-05,
      "loss": 0.9107,
      "step": 76990
    },
    {
      "epoch": 0.26986580309609676,
      "grad_norm": 3.234375,
      "learning_rate": 4.497657682916857e-05,
      "loss": 0.9645,
      "step": 77000
    },
    {
      "epoch": 0.26990085060299235,
      "grad_norm": 3.390625,
      "learning_rate": 4.498241801890209e-05,
      "loss": 1.0647,
      "step": 77010
    },
    {
      "epoch": 0.26993589810988794,
      "grad_norm": 3.1875,
      "learning_rate": 4.498825920863562e-05,
      "loss": 1.0261,
      "step": 77020
    },
    {
      "epoch": 0.26997094561678353,
      "grad_norm": 3.328125,
      "learning_rate": 4.4994100398369146e-05,
      "loss": 1.0518,
      "step": 77030
    },
    {
      "epoch": 0.2700059931236791,
      "grad_norm": 3.109375,
      "learning_rate": 4.499994158810267e-05,
      "loss": 0.9049,
      "step": 77040
    },
    {
      "epoch": 0.27004104063057477,
      "grad_norm": 3.25,
      "learning_rate": 4.500578277783619e-05,
      "loss": 0.9991,
      "step": 77050
    },
    {
      "epoch": 0.27007608813747036,
      "grad_norm": 3.078125,
      "learning_rate": 4.501162396756972e-05,
      "loss": 1.0329,
      "step": 77060
    },
    {
      "epoch": 0.27011113564436595,
      "grad_norm": 3.09375,
      "learning_rate": 4.501746515730324e-05,
      "loss": 1.0169,
      "step": 77070
    },
    {
      "epoch": 0.27014618315126154,
      "grad_norm": 3.40625,
      "learning_rate": 4.502330634703677e-05,
      "loss": 1.0318,
      "step": 77080
    },
    {
      "epoch": 0.27018123065815713,
      "grad_norm": 3.375,
      "learning_rate": 4.5029147536770295e-05,
      "loss": 0.9725,
      "step": 77090
    },
    {
      "epoch": 0.2702162781650527,
      "grad_norm": 2.734375,
      "learning_rate": 4.503498872650382e-05,
      "loss": 0.9096,
      "step": 77100
    },
    {
      "epoch": 0.2702513256719483,
      "grad_norm": 2.859375,
      "learning_rate": 4.5040829916237345e-05,
      "loss": 1.0636,
      "step": 77110
    },
    {
      "epoch": 0.2702863731788439,
      "grad_norm": 3.234375,
      "learning_rate": 4.5046671105970867e-05,
      "loss": 1.0433,
      "step": 77120
    },
    {
      "epoch": 0.2703214206857395,
      "grad_norm": 3.125,
      "learning_rate": 4.505251229570439e-05,
      "loss": 0.9631,
      "step": 77130
    },
    {
      "epoch": 0.2703564681926351,
      "grad_norm": 2.828125,
      "learning_rate": 4.5058353485437916e-05,
      "loss": 0.9551,
      "step": 77140
    },
    {
      "epoch": 0.27039151569953074,
      "grad_norm": 3.375,
      "learning_rate": 4.506419467517144e-05,
      "loss": 1.0114,
      "step": 77150
    },
    {
      "epoch": 0.27042656320642633,
      "grad_norm": 2.953125,
      "learning_rate": 4.5070035864904966e-05,
      "loss": 1.0029,
      "step": 77160
    },
    {
      "epoch": 0.2704616107133219,
      "grad_norm": 2.8125,
      "learning_rate": 4.5075877054638494e-05,
      "loss": 1.0226,
      "step": 77170
    },
    {
      "epoch": 0.2704966582202175,
      "grad_norm": 3.0625,
      "learning_rate": 4.5081718244372016e-05,
      "loss": 1.0291,
      "step": 77180
    },
    {
      "epoch": 0.2705317057271131,
      "grad_norm": 3.21875,
      "learning_rate": 4.5087559434105544e-05,
      "loss": 0.9328,
      "step": 77190
    },
    {
      "epoch": 0.2705667532340087,
      "grad_norm": 3.109375,
      "learning_rate": 4.5093400623839065e-05,
      "loss": 0.9733,
      "step": 77200
    },
    {
      "epoch": 0.2706018007409043,
      "grad_norm": 3.125,
      "learning_rate": 4.5099241813572593e-05,
      "loss": 1.0419,
      "step": 77210
    },
    {
      "epoch": 0.2706368482477999,
      "grad_norm": 3.359375,
      "learning_rate": 4.5105083003306115e-05,
      "loss": 0.9985,
      "step": 77220
    },
    {
      "epoch": 0.27067189575469547,
      "grad_norm": 3.03125,
      "learning_rate": 4.5110924193039636e-05,
      "loss": 0.9947,
      "step": 77230
    },
    {
      "epoch": 0.27070694326159106,
      "grad_norm": 3.21875,
      "learning_rate": 4.5116765382773165e-05,
      "loss": 0.9827,
      "step": 77240
    },
    {
      "epoch": 0.2707419907684867,
      "grad_norm": 3.125,
      "learning_rate": 4.512260657250669e-05,
      "loss": 1.0,
      "step": 77250
    },
    {
      "epoch": 0.2707770382753823,
      "grad_norm": 3.0,
      "learning_rate": 4.5128447762240214e-05,
      "loss": 0.9455,
      "step": 77260
    },
    {
      "epoch": 0.2708120857822779,
      "grad_norm": 3.078125,
      "learning_rate": 4.513428895197374e-05,
      "loss": 0.9457,
      "step": 77270
    },
    {
      "epoch": 0.2708471332891735,
      "grad_norm": 3.1875,
      "learning_rate": 4.5140130141707264e-05,
      "loss": 0.9881,
      "step": 77280
    },
    {
      "epoch": 0.27088218079606907,
      "grad_norm": 3.25,
      "learning_rate": 4.514597133144079e-05,
      "loss": 0.9481,
      "step": 77290
    },
    {
      "epoch": 0.27091722830296466,
      "grad_norm": 3.359375,
      "learning_rate": 4.5151812521174314e-05,
      "loss": 0.8951,
      "step": 77300
    },
    {
      "epoch": 0.27095227580986025,
      "grad_norm": 3.078125,
      "learning_rate": 4.5157653710907835e-05,
      "loss": 1.0143,
      "step": 77310
    },
    {
      "epoch": 0.27098732331675585,
      "grad_norm": 3.28125,
      "learning_rate": 4.5163494900641363e-05,
      "loss": 0.9716,
      "step": 77320
    },
    {
      "epoch": 0.27102237082365144,
      "grad_norm": 3.21875,
      "learning_rate": 4.516933609037489e-05,
      "loss": 1.0116,
      "step": 77330
    },
    {
      "epoch": 0.2710574183305471,
      "grad_norm": 3.09375,
      "learning_rate": 4.517517728010841e-05,
      "loss": 0.959,
      "step": 77340
    },
    {
      "epoch": 0.2710924658374427,
      "grad_norm": 2.984375,
      "learning_rate": 4.518101846984194e-05,
      "loss": 1.0035,
      "step": 77350
    },
    {
      "epoch": 0.27112751334433827,
      "grad_norm": 3.078125,
      "learning_rate": 4.518685965957546e-05,
      "loss": 1.0106,
      "step": 77360
    },
    {
      "epoch": 0.27116256085123386,
      "grad_norm": 3.078125,
      "learning_rate": 4.519270084930899e-05,
      "loss": 0.9729,
      "step": 77370
    },
    {
      "epoch": 0.27119760835812945,
      "grad_norm": 3.21875,
      "learning_rate": 4.519854203904251e-05,
      "loss": 0.9244,
      "step": 77380
    },
    {
      "epoch": 0.27123265586502504,
      "grad_norm": 3.4375,
      "learning_rate": 4.5204383228776034e-05,
      "loss": 0.9526,
      "step": 77390
    },
    {
      "epoch": 0.27126770337192063,
      "grad_norm": 3.578125,
      "learning_rate": 4.521022441850957e-05,
      "loss": 1.0629,
      "step": 77400
    },
    {
      "epoch": 0.2713027508788162,
      "grad_norm": 3.09375,
      "learning_rate": 4.521606560824309e-05,
      "loss": 0.9166,
      "step": 77410
    },
    {
      "epoch": 0.2713377983857118,
      "grad_norm": 3.125,
      "learning_rate": 4.522190679797662e-05,
      "loss": 0.9401,
      "step": 77420
    },
    {
      "epoch": 0.2713728458926074,
      "grad_norm": 3.21875,
      "learning_rate": 4.522774798771014e-05,
      "loss": 0.954,
      "step": 77430
    },
    {
      "epoch": 0.27140789339950305,
      "grad_norm": 2.9375,
      "learning_rate": 4.523358917744366e-05,
      "loss": 0.9959,
      "step": 77440
    },
    {
      "epoch": 0.27144294090639864,
      "grad_norm": 2.8125,
      "learning_rate": 4.523943036717719e-05,
      "loss": 0.9482,
      "step": 77450
    },
    {
      "epoch": 0.27147798841329424,
      "grad_norm": 3.21875,
      "learning_rate": 4.524527155691071e-05,
      "loss": 0.9808,
      "step": 77460
    },
    {
      "epoch": 0.2715130359201898,
      "grad_norm": 2.921875,
      "learning_rate": 4.525111274664424e-05,
      "loss": 0.9125,
      "step": 77470
    },
    {
      "epoch": 0.2715480834270854,
      "grad_norm": 3.140625,
      "learning_rate": 4.525695393637777e-05,
      "loss": 1.0215,
      "step": 77480
    },
    {
      "epoch": 0.271583130933981,
      "grad_norm": 3.5,
      "learning_rate": 4.526279512611129e-05,
      "loss": 0.9705,
      "step": 77490
    },
    {
      "epoch": 0.2716181784408766,
      "grad_norm": 3.109375,
      "learning_rate": 4.526863631584482e-05,
      "loss": 1.005,
      "step": 77500
    },
    {
      "epoch": 0.2716532259477722,
      "grad_norm": 3.609375,
      "learning_rate": 4.527447750557834e-05,
      "loss": 1.0293,
      "step": 77510
    },
    {
      "epoch": 0.2716882734546678,
      "grad_norm": 3.5,
      "learning_rate": 4.528031869531186e-05,
      "loss": 0.991,
      "step": 77520
    },
    {
      "epoch": 0.2717233209615634,
      "grad_norm": 3.015625,
      "learning_rate": 4.528615988504539e-05,
      "loss": 0.9935,
      "step": 77530
    },
    {
      "epoch": 0.271758368468459,
      "grad_norm": 3.859375,
      "learning_rate": 4.529200107477891e-05,
      "loss": 1.0269,
      "step": 77540
    },
    {
      "epoch": 0.2717934159753546,
      "grad_norm": 2.953125,
      "learning_rate": 4.529784226451244e-05,
      "loss": 1.0469,
      "step": 77550
    },
    {
      "epoch": 0.2718284634822502,
      "grad_norm": 3.3125,
      "learning_rate": 4.5303683454245966e-05,
      "loss": 0.9523,
      "step": 77560
    },
    {
      "epoch": 0.2718635109891458,
      "grad_norm": 3.90625,
      "learning_rate": 4.530952464397949e-05,
      "loss": 0.9999,
      "step": 77570
    },
    {
      "epoch": 0.2718985584960414,
      "grad_norm": 3.015625,
      "learning_rate": 4.5315365833713016e-05,
      "loss": 1.051,
      "step": 77580
    },
    {
      "epoch": 0.271933606002937,
      "grad_norm": 2.9375,
      "learning_rate": 4.532120702344654e-05,
      "loss": 1.0216,
      "step": 77590
    },
    {
      "epoch": 0.27196865350983257,
      "grad_norm": 2.96875,
      "learning_rate": 4.532704821318006e-05,
      "loss": 1.0309,
      "step": 77600
    },
    {
      "epoch": 0.27200370101672816,
      "grad_norm": 3.109375,
      "learning_rate": 4.533288940291359e-05,
      "loss": 1.002,
      "step": 77610
    },
    {
      "epoch": 0.27203874852362375,
      "grad_norm": 3.359375,
      "learning_rate": 4.533873059264711e-05,
      "loss": 1.0503,
      "step": 77620
    },
    {
      "epoch": 0.27207379603051934,
      "grad_norm": 3.46875,
      "learning_rate": 4.534457178238064e-05,
      "loss": 0.9576,
      "step": 77630
    },
    {
      "epoch": 0.272108843537415,
      "grad_norm": 2.875,
      "learning_rate": 4.5350412972114165e-05,
      "loss": 1.011,
      "step": 77640
    },
    {
      "epoch": 0.2721438910443106,
      "grad_norm": 3.078125,
      "learning_rate": 4.535625416184769e-05,
      "loss": 1.0053,
      "step": 77650
    },
    {
      "epoch": 0.2721789385512062,
      "grad_norm": 3.328125,
      "learning_rate": 4.5362095351581215e-05,
      "loss": 0.9582,
      "step": 77660
    },
    {
      "epoch": 0.27221398605810176,
      "grad_norm": 3.328125,
      "learning_rate": 4.5367936541314736e-05,
      "loss": 1.0003,
      "step": 77670
    },
    {
      "epoch": 0.27224903356499736,
      "grad_norm": 3.1875,
      "learning_rate": 4.5373777731048265e-05,
      "loss": 0.9727,
      "step": 77680
    },
    {
      "epoch": 0.27228408107189295,
      "grad_norm": 3.140625,
      "learning_rate": 4.5379618920781786e-05,
      "loss": 1.0196,
      "step": 77690
    },
    {
      "epoch": 0.27231912857878854,
      "grad_norm": 3.609375,
      "learning_rate": 4.538546011051531e-05,
      "loss": 1.1175,
      "step": 77700
    },
    {
      "epoch": 0.27235417608568413,
      "grad_norm": 3.234375,
      "learning_rate": 4.5391301300248836e-05,
      "loss": 1.0561,
      "step": 77710
    },
    {
      "epoch": 0.2723892235925797,
      "grad_norm": 2.828125,
      "learning_rate": 4.5397142489982364e-05,
      "loss": 0.9895,
      "step": 77720
    },
    {
      "epoch": 0.2724242710994753,
      "grad_norm": 3.125,
      "learning_rate": 4.5402983679715885e-05,
      "loss": 0.9347,
      "step": 77730
    },
    {
      "epoch": 0.27245931860637096,
      "grad_norm": 3.953125,
      "learning_rate": 4.5408824869449414e-05,
      "loss": 1.0674,
      "step": 77740
    },
    {
      "epoch": 0.27249436611326655,
      "grad_norm": 2.734375,
      "learning_rate": 4.5414666059182935e-05,
      "loss": 0.9884,
      "step": 77750
    },
    {
      "epoch": 0.27252941362016214,
      "grad_norm": 3.265625,
      "learning_rate": 4.542050724891646e-05,
      "loss": 1.0338,
      "step": 77760
    },
    {
      "epoch": 0.27256446112705773,
      "grad_norm": 3.125,
      "learning_rate": 4.5426348438649985e-05,
      "loss": 0.9626,
      "step": 77770
    },
    {
      "epoch": 0.2725995086339533,
      "grad_norm": 3.421875,
      "learning_rate": 4.5432189628383506e-05,
      "loss": 0.9808,
      "step": 77780
    },
    {
      "epoch": 0.2726345561408489,
      "grad_norm": 3.078125,
      "learning_rate": 4.543803081811704e-05,
      "loss": 0.9693,
      "step": 77790
    },
    {
      "epoch": 0.2726696036477445,
      "grad_norm": 3.609375,
      "learning_rate": 4.544387200785056e-05,
      "loss": 0.9645,
      "step": 77800
    },
    {
      "epoch": 0.2727046511546401,
      "grad_norm": 2.921875,
      "learning_rate": 4.5449713197584084e-05,
      "loss": 0.8953,
      "step": 77810
    },
    {
      "epoch": 0.2727396986615357,
      "grad_norm": 3.171875,
      "learning_rate": 4.545555438731761e-05,
      "loss": 1.0057,
      "step": 77820
    },
    {
      "epoch": 0.2727747461684313,
      "grad_norm": 3.171875,
      "learning_rate": 4.5461395577051134e-05,
      "loss": 0.9969,
      "step": 77830
    },
    {
      "epoch": 0.27280979367532693,
      "grad_norm": 2.921875,
      "learning_rate": 4.546723676678466e-05,
      "loss": 0.9909,
      "step": 77840
    },
    {
      "epoch": 0.2728448411822225,
      "grad_norm": 3.640625,
      "learning_rate": 4.5473077956518184e-05,
      "loss": 1.0598,
      "step": 77850
    },
    {
      "epoch": 0.2728798886891181,
      "grad_norm": 2.9375,
      "learning_rate": 4.5478919146251705e-05,
      "loss": 0.9086,
      "step": 77860
    },
    {
      "epoch": 0.2729149361960137,
      "grad_norm": 3.171875,
      "learning_rate": 4.548476033598524e-05,
      "loss": 0.9945,
      "step": 77870
    },
    {
      "epoch": 0.2729499837029093,
      "grad_norm": 3.40625,
      "learning_rate": 4.549060152571876e-05,
      "loss": 0.9758,
      "step": 77880
    },
    {
      "epoch": 0.2729850312098049,
      "grad_norm": 3.171875,
      "learning_rate": 4.549644271545229e-05,
      "loss": 1.0333,
      "step": 77890
    },
    {
      "epoch": 0.2730200787167005,
      "grad_norm": 3.28125,
      "learning_rate": 4.550228390518581e-05,
      "loss": 1.0292,
      "step": 77900
    },
    {
      "epoch": 0.27305512622359607,
      "grad_norm": 2.703125,
      "learning_rate": 4.550812509491933e-05,
      "loss": 0.9348,
      "step": 77910
    },
    {
      "epoch": 0.27309017373049166,
      "grad_norm": 2.953125,
      "learning_rate": 4.551396628465286e-05,
      "loss": 0.9786,
      "step": 77920
    },
    {
      "epoch": 0.2731252212373873,
      "grad_norm": 3.015625,
      "learning_rate": 4.551980747438638e-05,
      "loss": 0.8863,
      "step": 77930
    },
    {
      "epoch": 0.2731602687442829,
      "grad_norm": 3.109375,
      "learning_rate": 4.552564866411991e-05,
      "loss": 0.9589,
      "step": 77940
    },
    {
      "epoch": 0.2731953162511785,
      "grad_norm": 3.171875,
      "learning_rate": 4.553148985385344e-05,
      "loss": 0.9663,
      "step": 77950
    },
    {
      "epoch": 0.2732303637580741,
      "grad_norm": 2.65625,
      "learning_rate": 4.553733104358696e-05,
      "loss": 0.8561,
      "step": 77960
    },
    {
      "epoch": 0.27326541126496967,
      "grad_norm": 3.453125,
      "learning_rate": 4.554317223332049e-05,
      "loss": 0.9712,
      "step": 77970
    },
    {
      "epoch": 0.27330045877186526,
      "grad_norm": 2.734375,
      "learning_rate": 4.554901342305401e-05,
      "loss": 0.8801,
      "step": 77980
    },
    {
      "epoch": 0.27333550627876085,
      "grad_norm": 3.453125,
      "learning_rate": 4.555485461278753e-05,
      "loss": 1.0003,
      "step": 77990
    },
    {
      "epoch": 0.27337055378565644,
      "grad_norm": 3.515625,
      "learning_rate": 4.556069580252106e-05,
      "loss": 0.9765,
      "step": 78000
    },
    {
      "epoch": 0.27340560129255204,
      "grad_norm": 3.234375,
      "learning_rate": 4.556653699225458e-05,
      "loss": 1.009,
      "step": 78010
    },
    {
      "epoch": 0.2734406487994476,
      "grad_norm": 2.796875,
      "learning_rate": 4.557237818198811e-05,
      "loss": 0.87,
      "step": 78020
    },
    {
      "epoch": 0.2734756963063433,
      "grad_norm": 3.203125,
      "learning_rate": 4.557821937172164e-05,
      "loss": 0.9596,
      "step": 78030
    },
    {
      "epoch": 0.27351074381323887,
      "grad_norm": 2.875,
      "learning_rate": 4.558406056145516e-05,
      "loss": 0.9474,
      "step": 78040
    },
    {
      "epoch": 0.27354579132013446,
      "grad_norm": 2.921875,
      "learning_rate": 4.558990175118869e-05,
      "loss": 0.9133,
      "step": 78050
    },
    {
      "epoch": 0.27358083882703005,
      "grad_norm": 3.375,
      "learning_rate": 4.559574294092221e-05,
      "loss": 0.9075,
      "step": 78060
    },
    {
      "epoch": 0.27361588633392564,
      "grad_norm": 3.03125,
      "learning_rate": 4.560158413065573e-05,
      "loss": 1.0629,
      "step": 78070
    },
    {
      "epoch": 0.27365093384082123,
      "grad_norm": 3.234375,
      "learning_rate": 4.560742532038926e-05,
      "loss": 0.9826,
      "step": 78080
    },
    {
      "epoch": 0.2736859813477168,
      "grad_norm": 3.109375,
      "learning_rate": 4.561326651012278e-05,
      "loss": 0.9727,
      "step": 78090
    },
    {
      "epoch": 0.2737210288546124,
      "grad_norm": 3.046875,
      "learning_rate": 4.5619107699856315e-05,
      "loss": 0.9731,
      "step": 78100
    },
    {
      "epoch": 0.273756076361508,
      "grad_norm": 3.359375,
      "learning_rate": 4.5624948889589836e-05,
      "loss": 0.9737,
      "step": 78110
    },
    {
      "epoch": 0.2737911238684036,
      "grad_norm": 2.6875,
      "learning_rate": 4.563079007932336e-05,
      "loss": 0.9178,
      "step": 78120
    },
    {
      "epoch": 0.27382617137529924,
      "grad_norm": 3.609375,
      "learning_rate": 4.5636631269056886e-05,
      "loss": 0.9568,
      "step": 78130
    },
    {
      "epoch": 0.27386121888219483,
      "grad_norm": 3.109375,
      "learning_rate": 4.564247245879041e-05,
      "loss": 1.0806,
      "step": 78140
    },
    {
      "epoch": 0.2738962663890904,
      "grad_norm": 3.125,
      "learning_rate": 4.5648313648523936e-05,
      "loss": 0.9622,
      "step": 78150
    },
    {
      "epoch": 0.273931313895986,
      "grad_norm": 3.09375,
      "learning_rate": 4.565415483825746e-05,
      "loss": 0.9279,
      "step": 78160
    },
    {
      "epoch": 0.2739663614028816,
      "grad_norm": 3.375,
      "learning_rate": 4.565999602799098e-05,
      "loss": 0.9236,
      "step": 78170
    },
    {
      "epoch": 0.2740014089097772,
      "grad_norm": 3.21875,
      "learning_rate": 4.5665837217724514e-05,
      "loss": 0.9528,
      "step": 78180
    },
    {
      "epoch": 0.2740364564166728,
      "grad_norm": 2.765625,
      "learning_rate": 4.5671678407458035e-05,
      "loss": 0.9289,
      "step": 78190
    },
    {
      "epoch": 0.2740715039235684,
      "grad_norm": 3.078125,
      "learning_rate": 4.5677519597191556e-05,
      "loss": 1.0468,
      "step": 78200
    },
    {
      "epoch": 0.274106551430464,
      "grad_norm": 2.859375,
      "learning_rate": 4.5683360786925085e-05,
      "loss": 0.9453,
      "step": 78210
    },
    {
      "epoch": 0.27414159893735957,
      "grad_norm": 3.296875,
      "learning_rate": 4.5689201976658606e-05,
      "loss": 1.078,
      "step": 78220
    },
    {
      "epoch": 0.2741766464442552,
      "grad_norm": 3.265625,
      "learning_rate": 4.5695043166392134e-05,
      "loss": 0.9629,
      "step": 78230
    },
    {
      "epoch": 0.2742116939511508,
      "grad_norm": 3.640625,
      "learning_rate": 4.5700884356125656e-05,
      "loss": 1.0484,
      "step": 78240
    },
    {
      "epoch": 0.2742467414580464,
      "grad_norm": 3.140625,
      "learning_rate": 4.570672554585918e-05,
      "loss": 1.0163,
      "step": 78250
    },
    {
      "epoch": 0.274281788964942,
      "grad_norm": 3.140625,
      "learning_rate": 4.571256673559271e-05,
      "loss": 0.9559,
      "step": 78260
    },
    {
      "epoch": 0.2743168364718376,
      "grad_norm": 3.109375,
      "learning_rate": 4.5718407925326234e-05,
      "loss": 0.9604,
      "step": 78270
    },
    {
      "epoch": 0.27435188397873317,
      "grad_norm": 3.09375,
      "learning_rate": 4.5724249115059755e-05,
      "loss": 0.9466,
      "step": 78280
    },
    {
      "epoch": 0.27438693148562876,
      "grad_norm": 3.421875,
      "learning_rate": 4.5730090304793283e-05,
      "loss": 1.0045,
      "step": 78290
    },
    {
      "epoch": 0.27442197899252435,
      "grad_norm": 3.484375,
      "learning_rate": 4.5735931494526805e-05,
      "loss": 1.0307,
      "step": 78300
    },
    {
      "epoch": 0.27445702649941994,
      "grad_norm": 2.984375,
      "learning_rate": 4.574177268426033e-05,
      "loss": 1.0045,
      "step": 78310
    },
    {
      "epoch": 0.27449207400631553,
      "grad_norm": 3.484375,
      "learning_rate": 4.5747613873993855e-05,
      "loss": 1.0138,
      "step": 78320
    },
    {
      "epoch": 0.2745271215132112,
      "grad_norm": 2.859375,
      "learning_rate": 4.575345506372738e-05,
      "loss": 0.9186,
      "step": 78330
    },
    {
      "epoch": 0.27456216902010677,
      "grad_norm": 3.078125,
      "learning_rate": 4.575929625346091e-05,
      "loss": 0.9658,
      "step": 78340
    },
    {
      "epoch": 0.27459721652700236,
      "grad_norm": 3.015625,
      "learning_rate": 4.576513744319443e-05,
      "loss": 1.0143,
      "step": 78350
    },
    {
      "epoch": 0.27463226403389795,
      "grad_norm": 3.3125,
      "learning_rate": 4.577097863292796e-05,
      "loss": 0.9679,
      "step": 78360
    },
    {
      "epoch": 0.27466731154079355,
      "grad_norm": 3.0,
      "learning_rate": 4.577681982266148e-05,
      "loss": 1.0504,
      "step": 78370
    },
    {
      "epoch": 0.27470235904768914,
      "grad_norm": 2.6875,
      "learning_rate": 4.5782661012395004e-05,
      "loss": 0.9923,
      "step": 78380
    },
    {
      "epoch": 0.27473740655458473,
      "grad_norm": 3.203125,
      "learning_rate": 4.578850220212853e-05,
      "loss": 1.0454,
      "step": 78390
    },
    {
      "epoch": 0.2747724540614803,
      "grad_norm": 2.890625,
      "learning_rate": 4.579434339186205e-05,
      "loss": 0.9587,
      "step": 78400
    },
    {
      "epoch": 0.2748075015683759,
      "grad_norm": 3.15625,
      "learning_rate": 4.580018458159558e-05,
      "loss": 0.9364,
      "step": 78410
    },
    {
      "epoch": 0.27484254907527156,
      "grad_norm": 3.265625,
      "learning_rate": 4.580602577132911e-05,
      "loss": 0.9232,
      "step": 78420
    },
    {
      "epoch": 0.27487759658216715,
      "grad_norm": 3.015625,
      "learning_rate": 4.581186696106263e-05,
      "loss": 0.9526,
      "step": 78430
    },
    {
      "epoch": 0.27491264408906274,
      "grad_norm": 3.453125,
      "learning_rate": 4.581770815079616e-05,
      "loss": 1.0121,
      "step": 78440
    },
    {
      "epoch": 0.27494769159595833,
      "grad_norm": 3.171875,
      "learning_rate": 4.582354934052968e-05,
      "loss": 0.9588,
      "step": 78450
    },
    {
      "epoch": 0.2749827391028539,
      "grad_norm": 2.75,
      "learning_rate": 4.58293905302632e-05,
      "loss": 0.9505,
      "step": 78460
    },
    {
      "epoch": 0.2750177866097495,
      "grad_norm": 3.40625,
      "learning_rate": 4.583523171999673e-05,
      "loss": 1.1004,
      "step": 78470
    },
    {
      "epoch": 0.2750528341166451,
      "grad_norm": 3.265625,
      "learning_rate": 4.584107290973025e-05,
      "loss": 1.0229,
      "step": 78480
    },
    {
      "epoch": 0.2750878816235407,
      "grad_norm": 2.734375,
      "learning_rate": 4.584691409946378e-05,
      "loss": 0.9396,
      "step": 78490
    },
    {
      "epoch": 0.2751229291304363,
      "grad_norm": 3.328125,
      "learning_rate": 4.585275528919731e-05,
      "loss": 0.9694,
      "step": 78500
    },
    {
      "epoch": 0.2751579766373319,
      "grad_norm": 3.203125,
      "learning_rate": 4.585859647893083e-05,
      "loss": 0.937,
      "step": 78510
    },
    {
      "epoch": 0.2751930241442275,
      "grad_norm": 2.53125,
      "learning_rate": 4.586443766866436e-05,
      "loss": 0.9448,
      "step": 78520
    },
    {
      "epoch": 0.2752280716511231,
      "grad_norm": 3.375,
      "learning_rate": 4.587027885839788e-05,
      "loss": 0.9972,
      "step": 78530
    },
    {
      "epoch": 0.2752631191580187,
      "grad_norm": 3.0,
      "learning_rate": 4.587612004813141e-05,
      "loss": 0.9433,
      "step": 78540
    },
    {
      "epoch": 0.2752981666649143,
      "grad_norm": 3.046875,
      "learning_rate": 4.588196123786493e-05,
      "loss": 0.9114,
      "step": 78550
    },
    {
      "epoch": 0.2753332141718099,
      "grad_norm": 2.875,
      "learning_rate": 4.588780242759845e-05,
      "loss": 0.9274,
      "step": 78560
    },
    {
      "epoch": 0.2753682616787055,
      "grad_norm": 3.015625,
      "learning_rate": 4.5893643617331986e-05,
      "loss": 0.9988,
      "step": 78570
    },
    {
      "epoch": 0.2754033091856011,
      "grad_norm": 3.234375,
      "learning_rate": 4.589948480706551e-05,
      "loss": 1.0252,
      "step": 78580
    },
    {
      "epoch": 0.27543835669249667,
      "grad_norm": 2.921875,
      "learning_rate": 4.590532599679903e-05,
      "loss": 0.985,
      "step": 78590
    },
    {
      "epoch": 0.27547340419939226,
      "grad_norm": 2.953125,
      "learning_rate": 4.591116718653256e-05,
      "loss": 1.0332,
      "step": 78600
    },
    {
      "epoch": 0.27550845170628785,
      "grad_norm": 2.984375,
      "learning_rate": 4.591700837626608e-05,
      "loss": 0.8998,
      "step": 78610
    },
    {
      "epoch": 0.2755434992131835,
      "grad_norm": 3.125,
      "learning_rate": 4.592284956599961e-05,
      "loss": 0.9641,
      "step": 78620
    },
    {
      "epoch": 0.2755785467200791,
      "grad_norm": 3.078125,
      "learning_rate": 4.592869075573313e-05,
      "loss": 0.9182,
      "step": 78630
    },
    {
      "epoch": 0.2756135942269747,
      "grad_norm": 3.03125,
      "learning_rate": 4.593453194546665e-05,
      "loss": 0.918,
      "step": 78640
    },
    {
      "epoch": 0.27564864173387027,
      "grad_norm": 3.390625,
      "learning_rate": 4.5940373135200185e-05,
      "loss": 1.0852,
      "step": 78650
    },
    {
      "epoch": 0.27568368924076586,
      "grad_norm": 3.359375,
      "learning_rate": 4.5946214324933706e-05,
      "loss": 0.9604,
      "step": 78660
    },
    {
      "epoch": 0.27571873674766145,
      "grad_norm": 2.890625,
      "learning_rate": 4.595205551466723e-05,
      "loss": 1.0133,
      "step": 78670
    },
    {
      "epoch": 0.27575378425455704,
      "grad_norm": 3.265625,
      "learning_rate": 4.5957896704400756e-05,
      "loss": 0.9994,
      "step": 78680
    },
    {
      "epoch": 0.27578883176145264,
      "grad_norm": 3.140625,
      "learning_rate": 4.596373789413428e-05,
      "loss": 1.0226,
      "step": 78690
    },
    {
      "epoch": 0.2758238792683482,
      "grad_norm": 2.9375,
      "learning_rate": 4.5969579083867805e-05,
      "loss": 1.0677,
      "step": 78700
    },
    {
      "epoch": 0.2758589267752438,
      "grad_norm": 3.46875,
      "learning_rate": 4.597542027360133e-05,
      "loss": 0.9707,
      "step": 78710
    },
    {
      "epoch": 0.27589397428213946,
      "grad_norm": 3.328125,
      "learning_rate": 4.598126146333485e-05,
      "loss": 0.8831,
      "step": 78720
    },
    {
      "epoch": 0.27592902178903506,
      "grad_norm": 3.296875,
      "learning_rate": 4.5987102653068383e-05,
      "loss": 1.0563,
      "step": 78730
    },
    {
      "epoch": 0.27596406929593065,
      "grad_norm": 3.203125,
      "learning_rate": 4.5992943842801905e-05,
      "loss": 0.9873,
      "step": 78740
    },
    {
      "epoch": 0.27599911680282624,
      "grad_norm": 3.015625,
      "learning_rate": 4.5998785032535426e-05,
      "loss": 1.0199,
      "step": 78750
    },
    {
      "epoch": 0.27603416430972183,
      "grad_norm": 3.5625,
      "learning_rate": 4.6004626222268955e-05,
      "loss": 1.0458,
      "step": 78760
    },
    {
      "epoch": 0.2760692118166174,
      "grad_norm": 3.4375,
      "learning_rate": 4.6010467412002476e-05,
      "loss": 0.9396,
      "step": 78770
    },
    {
      "epoch": 0.276104259323513,
      "grad_norm": 3.078125,
      "learning_rate": 4.6016308601736004e-05,
      "loss": 0.9955,
      "step": 78780
    },
    {
      "epoch": 0.2761393068304086,
      "grad_norm": 3.046875,
      "learning_rate": 4.6022149791469526e-05,
      "loss": 0.9854,
      "step": 78790
    },
    {
      "epoch": 0.2761743543373042,
      "grad_norm": 3.125,
      "learning_rate": 4.6027990981203054e-05,
      "loss": 0.9576,
      "step": 78800
    },
    {
      "epoch": 0.2762094018441998,
      "grad_norm": 3.25,
      "learning_rate": 4.603383217093658e-05,
      "loss": 0.9296,
      "step": 78810
    },
    {
      "epoch": 0.27624444935109543,
      "grad_norm": 3.359375,
      "learning_rate": 4.6039673360670104e-05,
      "loss": 0.9912,
      "step": 78820
    },
    {
      "epoch": 0.276279496857991,
      "grad_norm": 3.4375,
      "learning_rate": 4.604551455040363e-05,
      "loss": 1.0651,
      "step": 78830
    },
    {
      "epoch": 0.2763145443648866,
      "grad_norm": 3.046875,
      "learning_rate": 4.605135574013715e-05,
      "loss": 0.9431,
      "step": 78840
    },
    {
      "epoch": 0.2763495918717822,
      "grad_norm": 2.890625,
      "learning_rate": 4.6057196929870675e-05,
      "loss": 0.9745,
      "step": 78850
    },
    {
      "epoch": 0.2763846393786778,
      "grad_norm": 2.96875,
      "learning_rate": 4.60630381196042e-05,
      "loss": 0.8969,
      "step": 78860
    },
    {
      "epoch": 0.2764196868855734,
      "grad_norm": 3.421875,
      "learning_rate": 4.6068879309337724e-05,
      "loss": 0.9859,
      "step": 78870
    },
    {
      "epoch": 0.276454734392469,
      "grad_norm": 3.546875,
      "learning_rate": 4.607472049907125e-05,
      "loss": 0.9228,
      "step": 78880
    },
    {
      "epoch": 0.2764897818993646,
      "grad_norm": 3.265625,
      "learning_rate": 4.608056168880478e-05,
      "loss": 0.9398,
      "step": 78890
    },
    {
      "epoch": 0.27652482940626016,
      "grad_norm": 2.890625,
      "learning_rate": 4.60864028785383e-05,
      "loss": 0.9354,
      "step": 78900
    },
    {
      "epoch": 0.27655987691315576,
      "grad_norm": 3.421875,
      "learning_rate": 4.609224406827183e-05,
      "loss": 1.0303,
      "step": 78910
    },
    {
      "epoch": 0.2765949244200514,
      "grad_norm": 2.921875,
      "learning_rate": 4.609808525800535e-05,
      "loss": 0.953,
      "step": 78920
    },
    {
      "epoch": 0.276629971926947,
      "grad_norm": 3.015625,
      "learning_rate": 4.6103926447738874e-05,
      "loss": 0.9554,
      "step": 78930
    },
    {
      "epoch": 0.2766650194338426,
      "grad_norm": 3.5625,
      "learning_rate": 4.61097676374724e-05,
      "loss": 1.013,
      "step": 78940
    },
    {
      "epoch": 0.2767000669407382,
      "grad_norm": 3.125,
      "learning_rate": 4.611560882720592e-05,
      "loss": 0.9651,
      "step": 78950
    },
    {
      "epoch": 0.27673511444763377,
      "grad_norm": 3.078125,
      "learning_rate": 4.612145001693945e-05,
      "loss": 1.0182,
      "step": 78960
    },
    {
      "epoch": 0.27677016195452936,
      "grad_norm": 3.140625,
      "learning_rate": 4.612729120667298e-05,
      "loss": 1.0167,
      "step": 78970
    },
    {
      "epoch": 0.27680520946142495,
      "grad_norm": 3.28125,
      "learning_rate": 4.61331323964065e-05,
      "loss": 0.9648,
      "step": 78980
    },
    {
      "epoch": 0.27684025696832054,
      "grad_norm": 2.984375,
      "learning_rate": 4.613897358614003e-05,
      "loss": 0.9892,
      "step": 78990
    },
    {
      "epoch": 0.27687530447521613,
      "grad_norm": 3.15625,
      "learning_rate": 4.614481477587355e-05,
      "loss": 0.9782,
      "step": 79000
    },
    {
      "epoch": 0.2769103519821118,
      "grad_norm": 2.65625,
      "learning_rate": 4.615065596560708e-05,
      "loss": 0.995,
      "step": 79010
    },
    {
      "epoch": 0.27694539948900737,
      "grad_norm": 3.21875,
      "learning_rate": 4.61564971553406e-05,
      "loss": 0.9733,
      "step": 79020
    },
    {
      "epoch": 0.27698044699590296,
      "grad_norm": 3.296875,
      "learning_rate": 4.616233834507412e-05,
      "loss": 0.999,
      "step": 79030
    },
    {
      "epoch": 0.27701549450279855,
      "grad_norm": 3.296875,
      "learning_rate": 4.616817953480766e-05,
      "loss": 0.9951,
      "step": 79040
    },
    {
      "epoch": 0.27705054200969415,
      "grad_norm": 2.796875,
      "learning_rate": 4.617402072454118e-05,
      "loss": 1.0534,
      "step": 79050
    },
    {
      "epoch": 0.27708558951658974,
      "grad_norm": 2.921875,
      "learning_rate": 4.61798619142747e-05,
      "loss": 1.0154,
      "step": 79060
    },
    {
      "epoch": 0.27712063702348533,
      "grad_norm": 3.328125,
      "learning_rate": 4.618570310400823e-05,
      "loss": 1.0084,
      "step": 79070
    },
    {
      "epoch": 0.2771556845303809,
      "grad_norm": 3.203125,
      "learning_rate": 4.619154429374175e-05,
      "loss": 0.9769,
      "step": 79080
    },
    {
      "epoch": 0.2771907320372765,
      "grad_norm": 2.84375,
      "learning_rate": 4.619738548347528e-05,
      "loss": 0.9347,
      "step": 79090
    },
    {
      "epoch": 0.2772257795441721,
      "grad_norm": 3.296875,
      "learning_rate": 4.62032266732088e-05,
      "loss": 0.9494,
      "step": 79100
    },
    {
      "epoch": 0.27726082705106775,
      "grad_norm": 2.625,
      "learning_rate": 4.620906786294233e-05,
      "loss": 1.0748,
      "step": 79110
    },
    {
      "epoch": 0.27729587455796334,
      "grad_norm": 2.953125,
      "learning_rate": 4.6214909052675856e-05,
      "loss": 0.9604,
      "step": 79120
    },
    {
      "epoch": 0.27733092206485893,
      "grad_norm": 3.046875,
      "learning_rate": 4.622075024240938e-05,
      "loss": 0.9724,
      "step": 79130
    },
    {
      "epoch": 0.2773659695717545,
      "grad_norm": 3.015625,
      "learning_rate": 4.62265914321429e-05,
      "loss": 0.9914,
      "step": 79140
    },
    {
      "epoch": 0.2774010170786501,
      "grad_norm": 3.234375,
      "learning_rate": 4.623243262187643e-05,
      "loss": 0.9274,
      "step": 79150
    },
    {
      "epoch": 0.2774360645855457,
      "grad_norm": 3.421875,
      "learning_rate": 4.623827381160995e-05,
      "loss": 1.0435,
      "step": 79160
    },
    {
      "epoch": 0.2774711120924413,
      "grad_norm": 2.8125,
      "learning_rate": 4.6244115001343477e-05,
      "loss": 0.9096,
      "step": 79170
    },
    {
      "epoch": 0.2775061595993369,
      "grad_norm": 3.0625,
      "learning_rate": 4.6249956191077e-05,
      "loss": 0.9551,
      "step": 79180
    },
    {
      "epoch": 0.2775412071062325,
      "grad_norm": 2.9375,
      "learning_rate": 4.6255797380810526e-05,
      "loss": 0.9279,
      "step": 79190
    },
    {
      "epoch": 0.27757625461312807,
      "grad_norm": 3.4375,
      "learning_rate": 4.6261638570544054e-05,
      "loss": 0.9589,
      "step": 79200
    },
    {
      "epoch": 0.2776113021200237,
      "grad_norm": 2.96875,
      "learning_rate": 4.6267479760277576e-05,
      "loss": 0.9285,
      "step": 79210
    },
    {
      "epoch": 0.2776463496269193,
      "grad_norm": 3.03125,
      "learning_rate": 4.6273320950011104e-05,
      "loss": 1.0645,
      "step": 79220
    },
    {
      "epoch": 0.2776813971338149,
      "grad_norm": 3.265625,
      "learning_rate": 4.6279162139744626e-05,
      "loss": 0.8803,
      "step": 79230
    },
    {
      "epoch": 0.2777164446407105,
      "grad_norm": 3.421875,
      "learning_rate": 4.628500332947815e-05,
      "loss": 1.0444,
      "step": 79240
    },
    {
      "epoch": 0.2777514921476061,
      "grad_norm": 3.5625,
      "learning_rate": 4.6290844519211675e-05,
      "loss": 1.0451,
      "step": 79250
    },
    {
      "epoch": 0.2777865396545017,
      "grad_norm": 3.515625,
      "learning_rate": 4.62966857089452e-05,
      "loss": 0.9216,
      "step": 79260
    },
    {
      "epoch": 0.27782158716139727,
      "grad_norm": 3.421875,
      "learning_rate": 4.6302526898678725e-05,
      "loss": 0.9386,
      "step": 79270
    },
    {
      "epoch": 0.27785663466829286,
      "grad_norm": 4.0625,
      "learning_rate": 4.630836808841225e-05,
      "loss": 1.0359,
      "step": 79280
    },
    {
      "epoch": 0.27789168217518845,
      "grad_norm": 3.03125,
      "learning_rate": 4.6314209278145775e-05,
      "loss": 0.9578,
      "step": 79290
    },
    {
      "epoch": 0.27792672968208404,
      "grad_norm": 3.28125,
      "learning_rate": 4.63200504678793e-05,
      "loss": 0.9515,
      "step": 79300
    },
    {
      "epoch": 0.2779617771889797,
      "grad_norm": 2.859375,
      "learning_rate": 4.6325891657612824e-05,
      "loss": 0.9486,
      "step": 79310
    },
    {
      "epoch": 0.2779968246958753,
      "grad_norm": 3.546875,
      "learning_rate": 4.6331732847346346e-05,
      "loss": 0.9896,
      "step": 79320
    },
    {
      "epoch": 0.27803187220277087,
      "grad_norm": 3.703125,
      "learning_rate": 4.6337574037079874e-05,
      "loss": 0.9804,
      "step": 79330
    },
    {
      "epoch": 0.27806691970966646,
      "grad_norm": 3.09375,
      "learning_rate": 4.6343415226813396e-05,
      "loss": 1.0194,
      "step": 79340
    },
    {
      "epoch": 0.27810196721656205,
      "grad_norm": 3.125,
      "learning_rate": 4.6349256416546924e-05,
      "loss": 1.0023,
      "step": 79350
    },
    {
      "epoch": 0.27813701472345764,
      "grad_norm": 3.0625,
      "learning_rate": 4.635509760628045e-05,
      "loss": 1.0149,
      "step": 79360
    },
    {
      "epoch": 0.27817206223035323,
      "grad_norm": 3.578125,
      "learning_rate": 4.6360938796013973e-05,
      "loss": 0.9274,
      "step": 79370
    },
    {
      "epoch": 0.2782071097372488,
      "grad_norm": 3.421875,
      "learning_rate": 4.63667799857475e-05,
      "loss": 1.0345,
      "step": 79380
    },
    {
      "epoch": 0.2782421572441444,
      "grad_norm": 3.171875,
      "learning_rate": 4.637262117548102e-05,
      "loss": 0.9296,
      "step": 79390
    },
    {
      "epoch": 0.27827720475104,
      "grad_norm": 3.09375,
      "learning_rate": 4.6378462365214545e-05,
      "loss": 0.9372,
      "step": 79400
    },
    {
      "epoch": 0.27831225225793566,
      "grad_norm": 3.5,
      "learning_rate": 4.638430355494807e-05,
      "loss": 1.0214,
      "step": 79410
    },
    {
      "epoch": 0.27834729976483125,
      "grad_norm": 3.15625,
      "learning_rate": 4.6390144744681594e-05,
      "loss": 0.9923,
      "step": 79420
    },
    {
      "epoch": 0.27838234727172684,
      "grad_norm": 3.140625,
      "learning_rate": 4.639598593441513e-05,
      "loss": 1.0461,
      "step": 79430
    },
    {
      "epoch": 0.27841739477862243,
      "grad_norm": 3.234375,
      "learning_rate": 4.640182712414865e-05,
      "loss": 1.0481,
      "step": 79440
    },
    {
      "epoch": 0.278452442285518,
      "grad_norm": 2.984375,
      "learning_rate": 4.640766831388217e-05,
      "loss": 0.9871,
      "step": 79450
    },
    {
      "epoch": 0.2784874897924136,
      "grad_norm": 2.984375,
      "learning_rate": 4.64135095036157e-05,
      "loss": 0.9087,
      "step": 79460
    },
    {
      "epoch": 0.2785225372993092,
      "grad_norm": 2.703125,
      "learning_rate": 4.641935069334922e-05,
      "loss": 1.0003,
      "step": 79470
    },
    {
      "epoch": 0.2785575848062048,
      "grad_norm": 3.328125,
      "learning_rate": 4.642519188308275e-05,
      "loss": 0.9516,
      "step": 79480
    },
    {
      "epoch": 0.2785926323131004,
      "grad_norm": 3.3125,
      "learning_rate": 4.643103307281627e-05,
      "loss": 0.9475,
      "step": 79490
    },
    {
      "epoch": 0.278627679819996,
      "grad_norm": 3.296875,
      "learning_rate": 4.64368742625498e-05,
      "loss": 0.9074,
      "step": 79500
    },
    {
      "epoch": 0.2786627273268916,
      "grad_norm": 3.34375,
      "learning_rate": 4.644271545228333e-05,
      "loss": 0.9595,
      "step": 79510
    },
    {
      "epoch": 0.2786977748337872,
      "grad_norm": 3.0625,
      "learning_rate": 4.644855664201685e-05,
      "loss": 0.9904,
      "step": 79520
    },
    {
      "epoch": 0.2787328223406828,
      "grad_norm": 3.1875,
      "learning_rate": 4.645439783175037e-05,
      "loss": 0.9445,
      "step": 79530
    },
    {
      "epoch": 0.2787678698475784,
      "grad_norm": 3.578125,
      "learning_rate": 4.64602390214839e-05,
      "loss": 0.9229,
      "step": 79540
    },
    {
      "epoch": 0.278802917354474,
      "grad_norm": 3.15625,
      "learning_rate": 4.646608021121742e-05,
      "loss": 0.9353,
      "step": 79550
    },
    {
      "epoch": 0.2788379648613696,
      "grad_norm": 3.671875,
      "learning_rate": 4.647192140095095e-05,
      "loss": 0.9923,
      "step": 79560
    },
    {
      "epoch": 0.27887301236826517,
      "grad_norm": 2.953125,
      "learning_rate": 4.647776259068447e-05,
      "loss": 0.9371,
      "step": 79570
    },
    {
      "epoch": 0.27890805987516076,
      "grad_norm": 3.09375,
      "learning_rate": 4.6483603780418e-05,
      "loss": 0.9924,
      "step": 79580
    },
    {
      "epoch": 0.27894310738205635,
      "grad_norm": 3.109375,
      "learning_rate": 4.648944497015153e-05,
      "loss": 0.9904,
      "step": 79590
    },
    {
      "epoch": 0.278978154888952,
      "grad_norm": 3.609375,
      "learning_rate": 4.649528615988505e-05,
      "loss": 0.9832,
      "step": 79600
    },
    {
      "epoch": 0.2790132023958476,
      "grad_norm": 3.390625,
      "learning_rate": 4.650112734961857e-05,
      "loss": 1.0324,
      "step": 79610
    },
    {
      "epoch": 0.2790482499027432,
      "grad_norm": 3.328125,
      "learning_rate": 4.65069685393521e-05,
      "loss": 0.9697,
      "step": 79620
    },
    {
      "epoch": 0.2790832974096388,
      "grad_norm": 3.375,
      "learning_rate": 4.651280972908562e-05,
      "loss": 0.9346,
      "step": 79630
    },
    {
      "epoch": 0.27911834491653437,
      "grad_norm": 3.21875,
      "learning_rate": 4.651865091881915e-05,
      "loss": 1.0219,
      "step": 79640
    },
    {
      "epoch": 0.27915339242342996,
      "grad_norm": 3.453125,
      "learning_rate": 4.652449210855267e-05,
      "loss": 0.9387,
      "step": 79650
    },
    {
      "epoch": 0.27918843993032555,
      "grad_norm": 3.328125,
      "learning_rate": 4.65303332982862e-05,
      "loss": 0.9256,
      "step": 79660
    },
    {
      "epoch": 0.27922348743722114,
      "grad_norm": 3.171875,
      "learning_rate": 4.6536174488019726e-05,
      "loss": 1.0126,
      "step": 79670
    },
    {
      "epoch": 0.27925853494411673,
      "grad_norm": 3.046875,
      "learning_rate": 4.654201567775325e-05,
      "loss": 1.0383,
      "step": 79680
    },
    {
      "epoch": 0.2792935824510123,
      "grad_norm": 2.59375,
      "learning_rate": 4.6547856867486775e-05,
      "loss": 1.0231,
      "step": 79690
    },
    {
      "epoch": 0.27932862995790797,
      "grad_norm": 3.296875,
      "learning_rate": 4.65536980572203e-05,
      "loss": 0.9809,
      "step": 79700
    },
    {
      "epoch": 0.27936367746480356,
      "grad_norm": 3.046875,
      "learning_rate": 4.655953924695382e-05,
      "loss": 1.012,
      "step": 79710
    },
    {
      "epoch": 0.27939872497169915,
      "grad_norm": 3.21875,
      "learning_rate": 4.6565380436687346e-05,
      "loss": 0.9863,
      "step": 79720
    },
    {
      "epoch": 0.27943377247859474,
      "grad_norm": 3.046875,
      "learning_rate": 4.657122162642087e-05,
      "loss": 0.9151,
      "step": 79730
    },
    {
      "epoch": 0.27946881998549034,
      "grad_norm": 3.0625,
      "learning_rate": 4.6577062816154396e-05,
      "loss": 0.993,
      "step": 79740
    },
    {
      "epoch": 0.2795038674923859,
      "grad_norm": 3.59375,
      "learning_rate": 4.6582904005887924e-05,
      "loss": 0.9947,
      "step": 79750
    },
    {
      "epoch": 0.2795389149992815,
      "grad_norm": 3.5,
      "learning_rate": 4.6588745195621446e-05,
      "loss": 1.0203,
      "step": 79760
    },
    {
      "epoch": 0.2795739625061771,
      "grad_norm": 3.171875,
      "learning_rate": 4.6594586385354974e-05,
      "loss": 0.9437,
      "step": 79770
    },
    {
      "epoch": 0.2796090100130727,
      "grad_norm": 3.21875,
      "learning_rate": 4.6600427575088495e-05,
      "loss": 1.0625,
      "step": 79780
    },
    {
      "epoch": 0.2796440575199683,
      "grad_norm": 3.453125,
      "learning_rate": 4.660626876482202e-05,
      "loss": 0.9839,
      "step": 79790
    },
    {
      "epoch": 0.27967910502686394,
      "grad_norm": 3.0625,
      "learning_rate": 4.6612109954555545e-05,
      "loss": 0.9981,
      "step": 79800
    },
    {
      "epoch": 0.27971415253375953,
      "grad_norm": 3.3125,
      "learning_rate": 4.6617951144289073e-05,
      "loss": 0.9503,
      "step": 79810
    },
    {
      "epoch": 0.2797492000406551,
      "grad_norm": 3.046875,
      "learning_rate": 4.6623792334022595e-05,
      "loss": 0.96,
      "step": 79820
    },
    {
      "epoch": 0.2797842475475507,
      "grad_norm": 3.140625,
      "learning_rate": 4.662963352375612e-05,
      "loss": 0.9995,
      "step": 79830
    },
    {
      "epoch": 0.2798192950544463,
      "grad_norm": 3.59375,
      "learning_rate": 4.6635474713489645e-05,
      "loss": 0.9941,
      "step": 79840
    },
    {
      "epoch": 0.2798543425613419,
      "grad_norm": 2.953125,
      "learning_rate": 4.664131590322317e-05,
      "loss": 1.1349,
      "step": 79850
    },
    {
      "epoch": 0.2798893900682375,
      "grad_norm": 2.828125,
      "learning_rate": 4.6647157092956694e-05,
      "loss": 1.0756,
      "step": 79860
    },
    {
      "epoch": 0.2799244375751331,
      "grad_norm": 3.421875,
      "learning_rate": 4.6652998282690216e-05,
      "loss": 0.9601,
      "step": 79870
    },
    {
      "epoch": 0.27995948508202867,
      "grad_norm": 3.484375,
      "learning_rate": 4.6658839472423744e-05,
      "loss": 1.0253,
      "step": 79880
    },
    {
      "epoch": 0.27999453258892426,
      "grad_norm": 3.703125,
      "learning_rate": 4.666468066215727e-05,
      "loss": 0.9993,
      "step": 79890
    },
    {
      "epoch": 0.2800295800958199,
      "grad_norm": 3.15625,
      "learning_rate": 4.66705218518908e-05,
      "loss": 0.9088,
      "step": 79900
    },
    {
      "epoch": 0.2800646276027155,
      "grad_norm": 3.703125,
      "learning_rate": 4.667636304162432e-05,
      "loss": 0.978,
      "step": 79910
    },
    {
      "epoch": 0.2800996751096111,
      "grad_norm": 3.09375,
      "learning_rate": 4.668220423135784e-05,
      "loss": 0.934,
      "step": 79920
    },
    {
      "epoch": 0.2801347226165067,
      "grad_norm": 3.34375,
      "learning_rate": 4.668804542109137e-05,
      "loss": 0.9569,
      "step": 79930
    },
    {
      "epoch": 0.2801697701234023,
      "grad_norm": 3.09375,
      "learning_rate": 4.669388661082489e-05,
      "loss": 0.9657,
      "step": 79940
    },
    {
      "epoch": 0.28020481763029786,
      "grad_norm": 3.109375,
      "learning_rate": 4.669972780055842e-05,
      "loss": 0.9696,
      "step": 79950
    },
    {
      "epoch": 0.28023986513719346,
      "grad_norm": 2.921875,
      "learning_rate": 4.670556899029194e-05,
      "loss": 1.0286,
      "step": 79960
    },
    {
      "epoch": 0.28027491264408905,
      "grad_norm": 3.703125,
      "learning_rate": 4.671141018002547e-05,
      "loss": 1.0555,
      "step": 79970
    },
    {
      "epoch": 0.28030996015098464,
      "grad_norm": 3.15625,
      "learning_rate": 4.6717251369759e-05,
      "loss": 1.002,
      "step": 79980
    },
    {
      "epoch": 0.28034500765788023,
      "grad_norm": 2.984375,
      "learning_rate": 4.672309255949252e-05,
      "loss": 0.9738,
      "step": 79990
    },
    {
      "epoch": 0.2803800551647759,
      "grad_norm": 3.5625,
      "learning_rate": 4.672893374922604e-05,
      "loss": 0.9135,
      "step": 80000
    },
    {
      "epoch": 0.2803800551647759,
      "eval_loss": 0.9190837144851685,
      "eval_runtime": 559.3189,
      "eval_samples_per_second": 680.177,
      "eval_steps_per_second": 56.681,
      "step": 80000
    },
    {
      "epoch": 0.28041510267167147,
      "grad_norm": 3.25,
      "learning_rate": 4.673477493895957e-05,
      "loss": 1.0028,
      "step": 80010
    },
    {
      "epoch": 0.28045015017856706,
      "grad_norm": 2.796875,
      "learning_rate": 4.674061612869309e-05,
      "loss": 1.0533,
      "step": 80020
    },
    {
      "epoch": 0.28048519768546265,
      "grad_norm": 3.0625,
      "learning_rate": 4.674645731842662e-05,
      "loss": 0.9284,
      "step": 80030
    },
    {
      "epoch": 0.28052024519235824,
      "grad_norm": 3.171875,
      "learning_rate": 4.675229850816014e-05,
      "loss": 0.9618,
      "step": 80040
    },
    {
      "epoch": 0.28055529269925383,
      "grad_norm": 2.96875,
      "learning_rate": 4.675813969789367e-05,
      "loss": 0.9246,
      "step": 80050
    },
    {
      "epoch": 0.2805903402061494,
      "grad_norm": 3.03125,
      "learning_rate": 4.67639808876272e-05,
      "loss": 1.0401,
      "step": 80060
    },
    {
      "epoch": 0.280625387713045,
      "grad_norm": 3.1875,
      "learning_rate": 4.676982207736072e-05,
      "loss": 1.1003,
      "step": 80070
    },
    {
      "epoch": 0.2806604352199406,
      "grad_norm": 2.875,
      "learning_rate": 4.677566326709424e-05,
      "loss": 1.0322,
      "step": 80080
    },
    {
      "epoch": 0.28069548272683625,
      "grad_norm": 2.8125,
      "learning_rate": 4.678150445682777e-05,
      "loss": 1.0661,
      "step": 80090
    },
    {
      "epoch": 0.28073053023373185,
      "grad_norm": 3.234375,
      "learning_rate": 4.678734564656129e-05,
      "loss": 0.9527,
      "step": 80100
    },
    {
      "epoch": 0.28076557774062744,
      "grad_norm": 3.203125,
      "learning_rate": 4.679318683629482e-05,
      "loss": 0.9019,
      "step": 80110
    },
    {
      "epoch": 0.28080062524752303,
      "grad_norm": 2.96875,
      "learning_rate": 4.679902802602835e-05,
      "loss": 0.9555,
      "step": 80120
    },
    {
      "epoch": 0.2808356727544186,
      "grad_norm": 2.640625,
      "learning_rate": 4.680486921576187e-05,
      "loss": 0.9014,
      "step": 80130
    },
    {
      "epoch": 0.2808707202613142,
      "grad_norm": 3.484375,
      "learning_rate": 4.68107104054954e-05,
      "loss": 0.967,
      "step": 80140
    },
    {
      "epoch": 0.2809057677682098,
      "grad_norm": 3.125,
      "learning_rate": 4.681655159522892e-05,
      "loss": 1.0003,
      "step": 80150
    },
    {
      "epoch": 0.2809408152751054,
      "grad_norm": 2.921875,
      "learning_rate": 4.6822392784962446e-05,
      "loss": 0.9713,
      "step": 80160
    },
    {
      "epoch": 0.280975862782001,
      "grad_norm": 2.953125,
      "learning_rate": 4.682823397469597e-05,
      "loss": 0.9566,
      "step": 80170
    },
    {
      "epoch": 0.2810109102888966,
      "grad_norm": 3.5625,
      "learning_rate": 4.683407516442949e-05,
      "loss": 1.0166,
      "step": 80180
    },
    {
      "epoch": 0.2810459577957922,
      "grad_norm": 3.078125,
      "learning_rate": 4.683991635416302e-05,
      "loss": 0.9751,
      "step": 80190
    },
    {
      "epoch": 0.2810810053026878,
      "grad_norm": 3.0,
      "learning_rate": 4.6845757543896546e-05,
      "loss": 1.0103,
      "step": 80200
    },
    {
      "epoch": 0.2811160528095834,
      "grad_norm": 3.359375,
      "learning_rate": 4.685159873363007e-05,
      "loss": 0.9447,
      "step": 80210
    },
    {
      "epoch": 0.281151100316479,
      "grad_norm": 3.53125,
      "learning_rate": 4.6857439923363595e-05,
      "loss": 1.0056,
      "step": 80220
    },
    {
      "epoch": 0.2811861478233746,
      "grad_norm": 2.9375,
      "learning_rate": 4.686328111309712e-05,
      "loss": 0.9767,
      "step": 80230
    },
    {
      "epoch": 0.2812211953302702,
      "grad_norm": 3.65625,
      "learning_rate": 4.6869122302830645e-05,
      "loss": 1.0194,
      "step": 80240
    },
    {
      "epoch": 0.28125624283716577,
      "grad_norm": 3.21875,
      "learning_rate": 4.6874963492564167e-05,
      "loss": 0.9298,
      "step": 80250
    },
    {
      "epoch": 0.28129129034406136,
      "grad_norm": 3.171875,
      "learning_rate": 4.688080468229769e-05,
      "loss": 0.9386,
      "step": 80260
    },
    {
      "epoch": 0.28132633785095695,
      "grad_norm": 3.0625,
      "learning_rate": 4.6886645872031216e-05,
      "loss": 0.989,
      "step": 80270
    },
    {
      "epoch": 0.28136138535785254,
      "grad_norm": 3.109375,
      "learning_rate": 4.6892487061764744e-05,
      "loss": 0.9077,
      "step": 80280
    },
    {
      "epoch": 0.2813964328647482,
      "grad_norm": 2.921875,
      "learning_rate": 4.6898328251498266e-05,
      "loss": 0.9996,
      "step": 80290
    },
    {
      "epoch": 0.2814314803716438,
      "grad_norm": 3.046875,
      "learning_rate": 4.6904169441231794e-05,
      "loss": 0.9952,
      "step": 80300
    },
    {
      "epoch": 0.2814665278785394,
      "grad_norm": 3.125,
      "learning_rate": 4.6910010630965316e-05,
      "loss": 1.0179,
      "step": 80310
    },
    {
      "epoch": 0.28150157538543497,
      "grad_norm": 3.28125,
      "learning_rate": 4.6915851820698844e-05,
      "loss": 0.9281,
      "step": 80320
    },
    {
      "epoch": 0.28153662289233056,
      "grad_norm": 3.046875,
      "learning_rate": 4.6921693010432365e-05,
      "loss": 0.9173,
      "step": 80330
    },
    {
      "epoch": 0.28157167039922615,
      "grad_norm": 3.171875,
      "learning_rate": 4.6927534200165894e-05,
      "loss": 1.0312,
      "step": 80340
    },
    {
      "epoch": 0.28160671790612174,
      "grad_norm": 2.71875,
      "learning_rate": 4.6933375389899415e-05,
      "loss": 0.9213,
      "step": 80350
    },
    {
      "epoch": 0.28164176541301733,
      "grad_norm": 3.265625,
      "learning_rate": 4.693921657963294e-05,
      "loss": 0.9508,
      "step": 80360
    },
    {
      "epoch": 0.2816768129199129,
      "grad_norm": 3.375,
      "learning_rate": 4.694505776936647e-05,
      "loss": 1.0062,
      "step": 80370
    },
    {
      "epoch": 0.2817118604268085,
      "grad_norm": 2.953125,
      "learning_rate": 4.695089895909999e-05,
      "loss": 0.9342,
      "step": 80380
    },
    {
      "epoch": 0.28174690793370416,
      "grad_norm": 3.21875,
      "learning_rate": 4.6956740148833514e-05,
      "loss": 1.0178,
      "step": 80390
    },
    {
      "epoch": 0.28178195544059975,
      "grad_norm": 3.515625,
      "learning_rate": 4.696258133856704e-05,
      "loss": 0.9754,
      "step": 80400
    },
    {
      "epoch": 0.28181700294749534,
      "grad_norm": 3.28125,
      "learning_rate": 4.6968422528300564e-05,
      "loss": 0.9443,
      "step": 80410
    },
    {
      "epoch": 0.28185205045439093,
      "grad_norm": 2.96875,
      "learning_rate": 4.697426371803409e-05,
      "loss": 0.9409,
      "step": 80420
    },
    {
      "epoch": 0.2818870979612865,
      "grad_norm": 2.921875,
      "learning_rate": 4.6980104907767614e-05,
      "loss": 0.9838,
      "step": 80430
    },
    {
      "epoch": 0.2819221454681821,
      "grad_norm": 3.28125,
      "learning_rate": 4.698594609750114e-05,
      "loss": 0.9152,
      "step": 80440
    },
    {
      "epoch": 0.2819571929750777,
      "grad_norm": 3.359375,
      "learning_rate": 4.699178728723467e-05,
      "loss": 1.0517,
      "step": 80450
    },
    {
      "epoch": 0.2819922404819733,
      "grad_norm": 3.3125,
      "learning_rate": 4.699762847696819e-05,
      "loss": 0.9549,
      "step": 80460
    },
    {
      "epoch": 0.2820272879888689,
      "grad_norm": 3.59375,
      "learning_rate": 4.700346966670171e-05,
      "loss": 0.9547,
      "step": 80470
    },
    {
      "epoch": 0.2820623354957645,
      "grad_norm": 3.5,
      "learning_rate": 4.700931085643524e-05,
      "loss": 0.944,
      "step": 80480
    },
    {
      "epoch": 0.28209738300266013,
      "grad_norm": 3.0625,
      "learning_rate": 4.701515204616876e-05,
      "loss": 0.9144,
      "step": 80490
    },
    {
      "epoch": 0.2821324305095557,
      "grad_norm": 2.796875,
      "learning_rate": 4.702099323590229e-05,
      "loss": 1.0123,
      "step": 80500
    },
    {
      "epoch": 0.2821674780164513,
      "grad_norm": 2.921875,
      "learning_rate": 4.702683442563582e-05,
      "loss": 1.0272,
      "step": 80510
    },
    {
      "epoch": 0.2822025255233469,
      "grad_norm": 2.65625,
      "learning_rate": 4.703267561536934e-05,
      "loss": 0.9928,
      "step": 80520
    },
    {
      "epoch": 0.2822375730302425,
      "grad_norm": 3.171875,
      "learning_rate": 4.703851680510287e-05,
      "loss": 1.0448,
      "step": 80530
    },
    {
      "epoch": 0.2822726205371381,
      "grad_norm": 3.6875,
      "learning_rate": 4.704435799483639e-05,
      "loss": 0.9947,
      "step": 80540
    },
    {
      "epoch": 0.2823076680440337,
      "grad_norm": 3.328125,
      "learning_rate": 4.705019918456991e-05,
      "loss": 0.9951,
      "step": 80550
    },
    {
      "epoch": 0.28234271555092927,
      "grad_norm": 3.078125,
      "learning_rate": 4.705604037430344e-05,
      "loss": 0.9204,
      "step": 80560
    },
    {
      "epoch": 0.28237776305782486,
      "grad_norm": 3.5,
      "learning_rate": 4.706188156403696e-05,
      "loss": 0.9672,
      "step": 80570
    },
    {
      "epoch": 0.28241281056472045,
      "grad_norm": 3.125,
      "learning_rate": 4.706772275377049e-05,
      "loss": 0.9872,
      "step": 80580
    },
    {
      "epoch": 0.2824478580716161,
      "grad_norm": 3.09375,
      "learning_rate": 4.707356394350402e-05,
      "loss": 0.9535,
      "step": 80590
    },
    {
      "epoch": 0.2824829055785117,
      "grad_norm": 3.203125,
      "learning_rate": 4.707940513323754e-05,
      "loss": 1.0222,
      "step": 80600
    },
    {
      "epoch": 0.2825179530854073,
      "grad_norm": 2.890625,
      "learning_rate": 4.708524632297107e-05,
      "loss": 0.901,
      "step": 80610
    },
    {
      "epoch": 0.28255300059230287,
      "grad_norm": 3.078125,
      "learning_rate": 4.709108751270459e-05,
      "loss": 0.9606,
      "step": 80620
    },
    {
      "epoch": 0.28258804809919846,
      "grad_norm": 3.15625,
      "learning_rate": 4.709692870243812e-05,
      "loss": 1.0115,
      "step": 80630
    },
    {
      "epoch": 0.28262309560609405,
      "grad_norm": 3.59375,
      "learning_rate": 4.710276989217164e-05,
      "loss": 0.9317,
      "step": 80640
    },
    {
      "epoch": 0.28265814311298965,
      "grad_norm": 3.25,
      "learning_rate": 4.710861108190516e-05,
      "loss": 0.9909,
      "step": 80650
    },
    {
      "epoch": 0.28269319061988524,
      "grad_norm": 3.125,
      "learning_rate": 4.711445227163869e-05,
      "loss": 0.9968,
      "step": 80660
    },
    {
      "epoch": 0.28272823812678083,
      "grad_norm": 3.25,
      "learning_rate": 4.712029346137222e-05,
      "loss": 0.9635,
      "step": 80670
    },
    {
      "epoch": 0.2827632856336765,
      "grad_norm": 3.125,
      "learning_rate": 4.712613465110574e-05,
      "loss": 0.9673,
      "step": 80680
    },
    {
      "epoch": 0.28279833314057207,
      "grad_norm": 3.5,
      "learning_rate": 4.7131975840839266e-05,
      "loss": 0.8845,
      "step": 80690
    },
    {
      "epoch": 0.28283338064746766,
      "grad_norm": 3.15625,
      "learning_rate": 4.713781703057279e-05,
      "loss": 0.9981,
      "step": 80700
    },
    {
      "epoch": 0.28286842815436325,
      "grad_norm": 3.34375,
      "learning_rate": 4.7143658220306316e-05,
      "loss": 1.0404,
      "step": 80710
    },
    {
      "epoch": 0.28290347566125884,
      "grad_norm": 3.171875,
      "learning_rate": 4.714949941003984e-05,
      "loss": 1.0096,
      "step": 80720
    },
    {
      "epoch": 0.28293852316815443,
      "grad_norm": 2.75,
      "learning_rate": 4.715534059977336e-05,
      "loss": 1.0632,
      "step": 80730
    },
    {
      "epoch": 0.28297357067505,
      "grad_norm": 3.421875,
      "learning_rate": 4.716118178950689e-05,
      "loss": 0.9932,
      "step": 80740
    },
    {
      "epoch": 0.2830086181819456,
      "grad_norm": 2.875,
      "learning_rate": 4.7167022979240416e-05,
      "loss": 1.0334,
      "step": 80750
    },
    {
      "epoch": 0.2830436656888412,
      "grad_norm": 2.875,
      "learning_rate": 4.717286416897394e-05,
      "loss": 1.01,
      "step": 80760
    },
    {
      "epoch": 0.2830787131957368,
      "grad_norm": 3.5,
      "learning_rate": 4.7178705358707465e-05,
      "loss": 0.9203,
      "step": 80770
    },
    {
      "epoch": 0.28311376070263244,
      "grad_norm": 3.0625,
      "learning_rate": 4.718454654844099e-05,
      "loss": 0.9529,
      "step": 80780
    },
    {
      "epoch": 0.28314880820952804,
      "grad_norm": 3.640625,
      "learning_rate": 4.7190387738174515e-05,
      "loss": 1.0661,
      "step": 80790
    },
    {
      "epoch": 0.2831838557164236,
      "grad_norm": 3.203125,
      "learning_rate": 4.7196228927908036e-05,
      "loss": 0.9954,
      "step": 80800
    },
    {
      "epoch": 0.2832189032233192,
      "grad_norm": 3.203125,
      "learning_rate": 4.7202070117641565e-05,
      "loss": 0.9805,
      "step": 80810
    },
    {
      "epoch": 0.2832539507302148,
      "grad_norm": 3.0625,
      "learning_rate": 4.720791130737509e-05,
      "loss": 1.0014,
      "step": 80820
    },
    {
      "epoch": 0.2832889982371104,
      "grad_norm": 3.203125,
      "learning_rate": 4.7213752497108614e-05,
      "loss": 0.9779,
      "step": 80830
    },
    {
      "epoch": 0.283324045744006,
      "grad_norm": 2.984375,
      "learning_rate": 4.721959368684214e-05,
      "loss": 1.0146,
      "step": 80840
    },
    {
      "epoch": 0.2833590932509016,
      "grad_norm": 3.453125,
      "learning_rate": 4.7225434876575664e-05,
      "loss": 0.9816,
      "step": 80850
    },
    {
      "epoch": 0.2833941407577972,
      "grad_norm": 2.96875,
      "learning_rate": 4.7231276066309185e-05,
      "loss": 0.9694,
      "step": 80860
    },
    {
      "epoch": 0.28342918826469277,
      "grad_norm": 3.390625,
      "learning_rate": 4.7237117256042714e-05,
      "loss": 1.0484,
      "step": 80870
    },
    {
      "epoch": 0.2834642357715884,
      "grad_norm": 3.359375,
      "learning_rate": 4.7242958445776235e-05,
      "loss": 0.9862,
      "step": 80880
    },
    {
      "epoch": 0.283499283278484,
      "grad_norm": 3.03125,
      "learning_rate": 4.724879963550976e-05,
      "loss": 0.9229,
      "step": 80890
    },
    {
      "epoch": 0.2835343307853796,
      "grad_norm": 3.296875,
      "learning_rate": 4.725464082524329e-05,
      "loss": 0.9863,
      "step": 80900
    },
    {
      "epoch": 0.2835693782922752,
      "grad_norm": 3.0,
      "learning_rate": 4.726048201497681e-05,
      "loss": 1.0148,
      "step": 80910
    },
    {
      "epoch": 0.2836044257991708,
      "grad_norm": 3.28125,
      "learning_rate": 4.726632320471034e-05,
      "loss": 0.9027,
      "step": 80920
    },
    {
      "epoch": 0.28363947330606637,
      "grad_norm": 3.078125,
      "learning_rate": 4.727216439444386e-05,
      "loss": 0.9901,
      "step": 80930
    },
    {
      "epoch": 0.28367452081296196,
      "grad_norm": 3.25,
      "learning_rate": 4.7278005584177384e-05,
      "loss": 0.9951,
      "step": 80940
    },
    {
      "epoch": 0.28370956831985755,
      "grad_norm": 3.5,
      "learning_rate": 4.728384677391091e-05,
      "loss": 1.0412,
      "step": 80950
    },
    {
      "epoch": 0.28374461582675314,
      "grad_norm": 2.671875,
      "learning_rate": 4.7289687963644434e-05,
      "loss": 0.9107,
      "step": 80960
    },
    {
      "epoch": 0.28377966333364874,
      "grad_norm": 2.921875,
      "learning_rate": 4.729552915337796e-05,
      "loss": 0.943,
      "step": 80970
    },
    {
      "epoch": 0.2838147108405444,
      "grad_norm": 2.953125,
      "learning_rate": 4.730137034311149e-05,
      "loss": 0.8955,
      "step": 80980
    },
    {
      "epoch": 0.28384975834744,
      "grad_norm": 3.515625,
      "learning_rate": 4.730721153284501e-05,
      "loss": 0.9857,
      "step": 80990
    },
    {
      "epoch": 0.28388480585433556,
      "grad_norm": 3.484375,
      "learning_rate": 4.731305272257854e-05,
      "loss": 0.9609,
      "step": 81000
    },
    {
      "epoch": 0.28391985336123116,
      "grad_norm": 2.75,
      "learning_rate": 4.731889391231206e-05,
      "loss": 0.9296,
      "step": 81010
    },
    {
      "epoch": 0.28395490086812675,
      "grad_norm": 3.1875,
      "learning_rate": 4.732473510204559e-05,
      "loss": 0.9309,
      "step": 81020
    },
    {
      "epoch": 0.28398994837502234,
      "grad_norm": 3.234375,
      "learning_rate": 4.733057629177911e-05,
      "loss": 1.0118,
      "step": 81030
    },
    {
      "epoch": 0.28402499588191793,
      "grad_norm": 3.46875,
      "learning_rate": 4.733641748151263e-05,
      "loss": 1.0078,
      "step": 81040
    },
    {
      "epoch": 0.2840600433888135,
      "grad_norm": 3.234375,
      "learning_rate": 4.734225867124616e-05,
      "loss": 0.9258,
      "step": 81050
    },
    {
      "epoch": 0.2840950908957091,
      "grad_norm": 2.796875,
      "learning_rate": 4.734809986097969e-05,
      "loss": 0.9731,
      "step": 81060
    },
    {
      "epoch": 0.2841301384026047,
      "grad_norm": 3.40625,
      "learning_rate": 4.735394105071321e-05,
      "loss": 1.0336,
      "step": 81070
    },
    {
      "epoch": 0.28416518590950035,
      "grad_norm": 3.234375,
      "learning_rate": 4.735978224044674e-05,
      "loss": 0.9961,
      "step": 81080
    },
    {
      "epoch": 0.28420023341639594,
      "grad_norm": 4.6875,
      "learning_rate": 4.736562343018026e-05,
      "loss": 1.0292,
      "step": 81090
    },
    {
      "epoch": 0.28423528092329153,
      "grad_norm": 2.890625,
      "learning_rate": 4.737146461991379e-05,
      "loss": 0.9327,
      "step": 81100
    },
    {
      "epoch": 0.2842703284301871,
      "grad_norm": 2.796875,
      "learning_rate": 4.737730580964731e-05,
      "loss": 0.9245,
      "step": 81110
    },
    {
      "epoch": 0.2843053759370827,
      "grad_norm": 3.28125,
      "learning_rate": 4.738314699938083e-05,
      "loss": 0.9097,
      "step": 81120
    },
    {
      "epoch": 0.2843404234439783,
      "grad_norm": 3.421875,
      "learning_rate": 4.738898818911436e-05,
      "loss": 0.9476,
      "step": 81130
    },
    {
      "epoch": 0.2843754709508739,
      "grad_norm": 3.296875,
      "learning_rate": 4.739482937884789e-05,
      "loss": 1.0663,
      "step": 81140
    },
    {
      "epoch": 0.2844105184577695,
      "grad_norm": 3.4375,
      "learning_rate": 4.740067056858141e-05,
      "loss": 0.9688,
      "step": 81150
    },
    {
      "epoch": 0.2844455659646651,
      "grad_norm": 2.671875,
      "learning_rate": 4.740651175831494e-05,
      "loss": 0.8674,
      "step": 81160
    },
    {
      "epoch": 0.28448061347156073,
      "grad_norm": 2.9375,
      "learning_rate": 4.741235294804846e-05,
      "loss": 0.9142,
      "step": 81170
    },
    {
      "epoch": 0.2845156609784563,
      "grad_norm": 2.828125,
      "learning_rate": 4.741819413778199e-05,
      "loss": 0.9799,
      "step": 81180
    },
    {
      "epoch": 0.2845507084853519,
      "grad_norm": 2.578125,
      "learning_rate": 4.742403532751551e-05,
      "loss": 0.9653,
      "step": 81190
    },
    {
      "epoch": 0.2845857559922475,
      "grad_norm": 3.140625,
      "learning_rate": 4.742987651724903e-05,
      "loss": 0.9976,
      "step": 81200
    },
    {
      "epoch": 0.2846208034991431,
      "grad_norm": 3.84375,
      "learning_rate": 4.7435717706982565e-05,
      "loss": 1.0809,
      "step": 81210
    },
    {
      "epoch": 0.2846558510060387,
      "grad_norm": 3.265625,
      "learning_rate": 4.744155889671609e-05,
      "loss": 1.0415,
      "step": 81220
    },
    {
      "epoch": 0.2846908985129343,
      "grad_norm": 2.71875,
      "learning_rate": 4.7447400086449615e-05,
      "loss": 0.9748,
      "step": 81230
    },
    {
      "epoch": 0.28472594601982987,
      "grad_norm": 3.03125,
      "learning_rate": 4.7453241276183136e-05,
      "loss": 0.876,
      "step": 81240
    },
    {
      "epoch": 0.28476099352672546,
      "grad_norm": 2.984375,
      "learning_rate": 4.745908246591666e-05,
      "loss": 0.9242,
      "step": 81250
    },
    {
      "epoch": 0.28479604103362105,
      "grad_norm": 3.3125,
      "learning_rate": 4.7464923655650186e-05,
      "loss": 1.012,
      "step": 81260
    },
    {
      "epoch": 0.2848310885405167,
      "grad_norm": 2.875,
      "learning_rate": 4.747076484538371e-05,
      "loss": 0.9449,
      "step": 81270
    },
    {
      "epoch": 0.2848661360474123,
      "grad_norm": 3.4375,
      "learning_rate": 4.7476606035117236e-05,
      "loss": 0.9765,
      "step": 81280
    },
    {
      "epoch": 0.2849011835543079,
      "grad_norm": 3.109375,
      "learning_rate": 4.7482447224850764e-05,
      "loss": 0.952,
      "step": 81290
    },
    {
      "epoch": 0.28493623106120347,
      "grad_norm": 3.65625,
      "learning_rate": 4.7488288414584285e-05,
      "loss": 0.9838,
      "step": 81300
    },
    {
      "epoch": 0.28497127856809906,
      "grad_norm": 3.765625,
      "learning_rate": 4.7494129604317814e-05,
      "loss": 0.9903,
      "step": 81310
    },
    {
      "epoch": 0.28500632607499465,
      "grad_norm": 3.25,
      "learning_rate": 4.7499970794051335e-05,
      "loss": 1.0019,
      "step": 81320
    },
    {
      "epoch": 0.28504137358189024,
      "grad_norm": 2.796875,
      "learning_rate": 4.7505811983784857e-05,
      "loss": 0.9475,
      "step": 81330
    },
    {
      "epoch": 0.28507642108878584,
      "grad_norm": 2.34375,
      "learning_rate": 4.7511653173518385e-05,
      "loss": 0.928,
      "step": 81340
    },
    {
      "epoch": 0.2851114685956814,
      "grad_norm": 3.140625,
      "learning_rate": 4.7517494363251906e-05,
      "loss": 0.9982,
      "step": 81350
    },
    {
      "epoch": 0.285146516102577,
      "grad_norm": 3.328125,
      "learning_rate": 4.7523335552985434e-05,
      "loss": 0.9587,
      "step": 81360
    },
    {
      "epoch": 0.28518156360947267,
      "grad_norm": 3.1875,
      "learning_rate": 4.752917674271896e-05,
      "loss": 0.8489,
      "step": 81370
    },
    {
      "epoch": 0.28521661111636826,
      "grad_norm": 2.75,
      "learning_rate": 4.7535017932452484e-05,
      "loss": 0.911,
      "step": 81380
    },
    {
      "epoch": 0.28525165862326385,
      "grad_norm": 2.734375,
      "learning_rate": 4.754085912218601e-05,
      "loss": 0.9852,
      "step": 81390
    },
    {
      "epoch": 0.28528670613015944,
      "grad_norm": 2.890625,
      "learning_rate": 4.7546700311919534e-05,
      "loss": 0.944,
      "step": 81400
    },
    {
      "epoch": 0.28532175363705503,
      "grad_norm": 3.390625,
      "learning_rate": 4.7552541501653055e-05,
      "loss": 0.8469,
      "step": 81410
    },
    {
      "epoch": 0.2853568011439506,
      "grad_norm": 2.90625,
      "learning_rate": 4.7558382691386584e-05,
      "loss": 0.92,
      "step": 81420
    },
    {
      "epoch": 0.2853918486508462,
      "grad_norm": 3.59375,
      "learning_rate": 4.7564223881120105e-05,
      "loss": 1.0341,
      "step": 81430
    },
    {
      "epoch": 0.2854268961577418,
      "grad_norm": 3.015625,
      "learning_rate": 4.757006507085363e-05,
      "loss": 0.9383,
      "step": 81440
    },
    {
      "epoch": 0.2854619436646374,
      "grad_norm": 3.28125,
      "learning_rate": 4.757590626058716e-05,
      "loss": 1.0052,
      "step": 81450
    },
    {
      "epoch": 0.285496991171533,
      "grad_norm": 3.359375,
      "learning_rate": 4.758174745032068e-05,
      "loss": 0.9618,
      "step": 81460
    },
    {
      "epoch": 0.28553203867842863,
      "grad_norm": 3.203125,
      "learning_rate": 4.758758864005421e-05,
      "loss": 0.949,
      "step": 81470
    },
    {
      "epoch": 0.2855670861853242,
      "grad_norm": 3.234375,
      "learning_rate": 4.759342982978773e-05,
      "loss": 1.0286,
      "step": 81480
    },
    {
      "epoch": 0.2856021336922198,
      "grad_norm": 3.0625,
      "learning_rate": 4.759927101952126e-05,
      "loss": 0.8657,
      "step": 81490
    },
    {
      "epoch": 0.2856371811991154,
      "grad_norm": 2.875,
      "learning_rate": 4.760511220925478e-05,
      "loss": 0.9514,
      "step": 81500
    },
    {
      "epoch": 0.285672228706011,
      "grad_norm": 3.1875,
      "learning_rate": 4.7610953398988304e-05,
      "loss": 0.9396,
      "step": 81510
    },
    {
      "epoch": 0.2857072762129066,
      "grad_norm": 3.34375,
      "learning_rate": 4.761679458872184e-05,
      "loss": 0.929,
      "step": 81520
    },
    {
      "epoch": 0.2857423237198022,
      "grad_norm": 2.796875,
      "learning_rate": 4.762263577845536e-05,
      "loss": 0.9274,
      "step": 81530
    },
    {
      "epoch": 0.2857773712266978,
      "grad_norm": 3.15625,
      "learning_rate": 4.762847696818888e-05,
      "loss": 0.9602,
      "step": 81540
    },
    {
      "epoch": 0.28581241873359337,
      "grad_norm": 3.484375,
      "learning_rate": 4.763431815792241e-05,
      "loss": 1.023,
      "step": 81550
    },
    {
      "epoch": 0.28584746624048896,
      "grad_norm": 3.125,
      "learning_rate": 4.764015934765593e-05,
      "loss": 1.0052,
      "step": 81560
    },
    {
      "epoch": 0.2858825137473846,
      "grad_norm": 3.03125,
      "learning_rate": 4.764600053738946e-05,
      "loss": 0.9324,
      "step": 81570
    },
    {
      "epoch": 0.2859175612542802,
      "grad_norm": 3.640625,
      "learning_rate": 4.765184172712298e-05,
      "loss": 0.9452,
      "step": 81580
    },
    {
      "epoch": 0.2859526087611758,
      "grad_norm": 3.34375,
      "learning_rate": 4.76576829168565e-05,
      "loss": 1.0028,
      "step": 81590
    },
    {
      "epoch": 0.2859876562680714,
      "grad_norm": 3.09375,
      "learning_rate": 4.766352410659004e-05,
      "loss": 0.965,
      "step": 81600
    },
    {
      "epoch": 0.28602270377496697,
      "grad_norm": 3.03125,
      "learning_rate": 4.766936529632356e-05,
      "loss": 0.9856,
      "step": 81610
    },
    {
      "epoch": 0.28605775128186256,
      "grad_norm": 3.265625,
      "learning_rate": 4.767520648605708e-05,
      "loss": 0.9785,
      "step": 81620
    },
    {
      "epoch": 0.28609279878875815,
      "grad_norm": 2.921875,
      "learning_rate": 4.768104767579061e-05,
      "loss": 1.0289,
      "step": 81630
    },
    {
      "epoch": 0.28612784629565374,
      "grad_norm": 3.046875,
      "learning_rate": 4.768688886552413e-05,
      "loss": 0.961,
      "step": 81640
    },
    {
      "epoch": 0.28616289380254933,
      "grad_norm": 3.78125,
      "learning_rate": 4.769273005525766e-05,
      "loss": 1.0592,
      "step": 81650
    },
    {
      "epoch": 0.2861979413094449,
      "grad_norm": 2.796875,
      "learning_rate": 4.769857124499118e-05,
      "loss": 0.996,
      "step": 81660
    },
    {
      "epoch": 0.28623298881634057,
      "grad_norm": 2.9375,
      "learning_rate": 4.77044124347247e-05,
      "loss": 0.9185,
      "step": 81670
    },
    {
      "epoch": 0.28626803632323616,
      "grad_norm": 2.890625,
      "learning_rate": 4.7710253624458236e-05,
      "loss": 0.9147,
      "step": 81680
    },
    {
      "epoch": 0.28630308383013175,
      "grad_norm": 3.515625,
      "learning_rate": 4.771609481419176e-05,
      "loss": 0.9974,
      "step": 81690
    },
    {
      "epoch": 0.28633813133702735,
      "grad_norm": 3.546875,
      "learning_rate": 4.7721936003925286e-05,
      "loss": 1.0482,
      "step": 81700
    },
    {
      "epoch": 0.28637317884392294,
      "grad_norm": 3.234375,
      "learning_rate": 4.772777719365881e-05,
      "loss": 0.9349,
      "step": 81710
    },
    {
      "epoch": 0.28640822635081853,
      "grad_norm": 3.203125,
      "learning_rate": 4.773361838339233e-05,
      "loss": 0.9835,
      "step": 81720
    },
    {
      "epoch": 0.2864432738577141,
      "grad_norm": 2.96875,
      "learning_rate": 4.773945957312586e-05,
      "loss": 0.9373,
      "step": 81730
    },
    {
      "epoch": 0.2864783213646097,
      "grad_norm": 3.015625,
      "learning_rate": 4.774530076285938e-05,
      "loss": 1.0454,
      "step": 81740
    },
    {
      "epoch": 0.2865133688715053,
      "grad_norm": 3.5625,
      "learning_rate": 4.775114195259291e-05,
      "loss": 0.9686,
      "step": 81750
    },
    {
      "epoch": 0.28654841637840095,
      "grad_norm": 2.90625,
      "learning_rate": 4.7756983142326435e-05,
      "loss": 1.0323,
      "step": 81760
    },
    {
      "epoch": 0.28658346388529654,
      "grad_norm": 3.390625,
      "learning_rate": 4.7762824332059956e-05,
      "loss": 0.956,
      "step": 81770
    },
    {
      "epoch": 0.28661851139219213,
      "grad_norm": 2.984375,
      "learning_rate": 4.7768665521793485e-05,
      "loss": 0.9439,
      "step": 81780
    },
    {
      "epoch": 0.2866535588990877,
      "grad_norm": 2.78125,
      "learning_rate": 4.7774506711527006e-05,
      "loss": 1.0375,
      "step": 81790
    },
    {
      "epoch": 0.2866886064059833,
      "grad_norm": 3.078125,
      "learning_rate": 4.778034790126053e-05,
      "loss": 0.9631,
      "step": 81800
    },
    {
      "epoch": 0.2867236539128789,
      "grad_norm": 3.1875,
      "learning_rate": 4.7786189090994056e-05,
      "loss": 1.0396,
      "step": 81810
    },
    {
      "epoch": 0.2867587014197745,
      "grad_norm": 3.359375,
      "learning_rate": 4.779203028072758e-05,
      "loss": 0.9924,
      "step": 81820
    },
    {
      "epoch": 0.2867937489266701,
      "grad_norm": 3.4375,
      "learning_rate": 4.7797871470461106e-05,
      "loss": 0.9778,
      "step": 81830
    },
    {
      "epoch": 0.2868287964335657,
      "grad_norm": 3.53125,
      "learning_rate": 4.7803712660194634e-05,
      "loss": 0.9665,
      "step": 81840
    },
    {
      "epoch": 0.28686384394046127,
      "grad_norm": 3.03125,
      "learning_rate": 4.7809553849928155e-05,
      "loss": 0.9764,
      "step": 81850
    },
    {
      "epoch": 0.2868988914473569,
      "grad_norm": 3.015625,
      "learning_rate": 4.7815395039661683e-05,
      "loss": 1.012,
      "step": 81860
    },
    {
      "epoch": 0.2869339389542525,
      "grad_norm": 4.09375,
      "learning_rate": 4.7821236229395205e-05,
      "loss": 0.8958,
      "step": 81870
    },
    {
      "epoch": 0.2869689864611481,
      "grad_norm": 2.890625,
      "learning_rate": 4.7827077419128726e-05,
      "loss": 1.0222,
      "step": 81880
    },
    {
      "epoch": 0.2870040339680437,
      "grad_norm": 2.65625,
      "learning_rate": 4.7832918608862255e-05,
      "loss": 0.944,
      "step": 81890
    },
    {
      "epoch": 0.2870390814749393,
      "grad_norm": 2.8125,
      "learning_rate": 4.7838759798595776e-05,
      "loss": 0.9255,
      "step": 81900
    },
    {
      "epoch": 0.2870741289818349,
      "grad_norm": 3.125,
      "learning_rate": 4.784460098832931e-05,
      "loss": 1.0301,
      "step": 81910
    },
    {
      "epoch": 0.28710917648873047,
      "grad_norm": 3.125,
      "learning_rate": 4.785044217806283e-05,
      "loss": 1.0567,
      "step": 81920
    },
    {
      "epoch": 0.28714422399562606,
      "grad_norm": 3.203125,
      "learning_rate": 4.7856283367796354e-05,
      "loss": 1.0182,
      "step": 81930
    },
    {
      "epoch": 0.28717927150252165,
      "grad_norm": 3.109375,
      "learning_rate": 4.786212455752988e-05,
      "loss": 1.0478,
      "step": 81940
    },
    {
      "epoch": 0.28721431900941724,
      "grad_norm": 3.046875,
      "learning_rate": 4.7867965747263404e-05,
      "loss": 0.9668,
      "step": 81950
    },
    {
      "epoch": 0.2872493665163129,
      "grad_norm": 2.921875,
      "learning_rate": 4.787380693699693e-05,
      "loss": 0.9467,
      "step": 81960
    },
    {
      "epoch": 0.2872844140232085,
      "grad_norm": 2.609375,
      "learning_rate": 4.787964812673045e-05,
      "loss": 0.9055,
      "step": 81970
    },
    {
      "epoch": 0.28731946153010407,
      "grad_norm": 3.015625,
      "learning_rate": 4.7885489316463975e-05,
      "loss": 1.0059,
      "step": 81980
    },
    {
      "epoch": 0.28735450903699966,
      "grad_norm": 2.578125,
      "learning_rate": 4.789133050619751e-05,
      "loss": 1.0119,
      "step": 81990
    },
    {
      "epoch": 0.28738955654389525,
      "grad_norm": 3.375,
      "learning_rate": 4.789717169593103e-05,
      "loss": 0.9526,
      "step": 82000
    },
    {
      "epoch": 0.28742460405079084,
      "grad_norm": 3.0625,
      "learning_rate": 4.790301288566455e-05,
      "loss": 0.9224,
      "step": 82010
    },
    {
      "epoch": 0.28745965155768644,
      "grad_norm": 3.3125,
      "learning_rate": 4.790885407539808e-05,
      "loss": 0.906,
      "step": 82020
    },
    {
      "epoch": 0.287494699064582,
      "grad_norm": 3.21875,
      "learning_rate": 4.79146952651316e-05,
      "loss": 0.9342,
      "step": 82030
    },
    {
      "epoch": 0.2875297465714776,
      "grad_norm": 3.171875,
      "learning_rate": 4.792053645486513e-05,
      "loss": 0.9935,
      "step": 82040
    },
    {
      "epoch": 0.2875647940783732,
      "grad_norm": 3.546875,
      "learning_rate": 4.792637764459865e-05,
      "loss": 0.9573,
      "step": 82050
    },
    {
      "epoch": 0.28759984158526886,
      "grad_norm": 2.890625,
      "learning_rate": 4.7932218834332174e-05,
      "loss": 0.9988,
      "step": 82060
    },
    {
      "epoch": 0.28763488909216445,
      "grad_norm": 3.109375,
      "learning_rate": 4.793806002406571e-05,
      "loss": 0.9665,
      "step": 82070
    },
    {
      "epoch": 0.28766993659906004,
      "grad_norm": 3.28125,
      "learning_rate": 4.794390121379923e-05,
      "loss": 0.9927,
      "step": 82080
    },
    {
      "epoch": 0.28770498410595563,
      "grad_norm": 3.265625,
      "learning_rate": 4.794974240353275e-05,
      "loss": 0.9782,
      "step": 82090
    },
    {
      "epoch": 0.2877400316128512,
      "grad_norm": 3.1875,
      "learning_rate": 4.795558359326628e-05,
      "loss": 0.975,
      "step": 82100
    },
    {
      "epoch": 0.2877750791197468,
      "grad_norm": 2.875,
      "learning_rate": 4.79614247829998e-05,
      "loss": 1.0292,
      "step": 82110
    },
    {
      "epoch": 0.2878101266266424,
      "grad_norm": 3.046875,
      "learning_rate": 4.796726597273333e-05,
      "loss": 0.9467,
      "step": 82120
    },
    {
      "epoch": 0.287845174133538,
      "grad_norm": 2.6875,
      "learning_rate": 4.797310716246685e-05,
      "loss": 0.899,
      "step": 82130
    },
    {
      "epoch": 0.2878802216404336,
      "grad_norm": 2.96875,
      "learning_rate": 4.797894835220038e-05,
      "loss": 0.9328,
      "step": 82140
    },
    {
      "epoch": 0.2879152691473292,
      "grad_norm": 3.390625,
      "learning_rate": 4.798478954193391e-05,
      "loss": 1.0568,
      "step": 82150
    },
    {
      "epoch": 0.2879503166542248,
      "grad_norm": 3.03125,
      "learning_rate": 4.799063073166743e-05,
      "loss": 0.9552,
      "step": 82160
    },
    {
      "epoch": 0.2879853641611204,
      "grad_norm": 3.328125,
      "learning_rate": 4.799647192140096e-05,
      "loss": 1.0107,
      "step": 82170
    },
    {
      "epoch": 0.288020411668016,
      "grad_norm": 3.0625,
      "learning_rate": 4.800231311113448e-05,
      "loss": 0.9739,
      "step": 82180
    },
    {
      "epoch": 0.2880554591749116,
      "grad_norm": 2.96875,
      "learning_rate": 4.8008154300868e-05,
      "loss": 0.9833,
      "step": 82190
    },
    {
      "epoch": 0.2880905066818072,
      "grad_norm": 3.296875,
      "learning_rate": 4.801399549060153e-05,
      "loss": 0.9205,
      "step": 82200
    },
    {
      "epoch": 0.2881255541887028,
      "grad_norm": 2.65625,
      "learning_rate": 4.801983668033505e-05,
      "loss": 0.9691,
      "step": 82210
    },
    {
      "epoch": 0.2881606016955984,
      "grad_norm": 2.953125,
      "learning_rate": 4.802567787006858e-05,
      "loss": 0.8778,
      "step": 82220
    },
    {
      "epoch": 0.28819564920249396,
      "grad_norm": 3.15625,
      "learning_rate": 4.8031519059802106e-05,
      "loss": 1.0117,
      "step": 82230
    },
    {
      "epoch": 0.28823069670938956,
      "grad_norm": 3.109375,
      "learning_rate": 4.803736024953563e-05,
      "loss": 0.9659,
      "step": 82240
    },
    {
      "epoch": 0.28826574421628515,
      "grad_norm": 3.3125,
      "learning_rate": 4.8043201439269156e-05,
      "loss": 0.9893,
      "step": 82250
    },
    {
      "epoch": 0.2883007917231808,
      "grad_norm": 3.109375,
      "learning_rate": 4.804904262900268e-05,
      "loss": 1.045,
      "step": 82260
    },
    {
      "epoch": 0.2883358392300764,
      "grad_norm": 2.890625,
      "learning_rate": 4.80548838187362e-05,
      "loss": 0.8657,
      "step": 82270
    },
    {
      "epoch": 0.288370886736972,
      "grad_norm": 3.140625,
      "learning_rate": 4.806072500846973e-05,
      "loss": 0.9101,
      "step": 82280
    },
    {
      "epoch": 0.28840593424386757,
      "grad_norm": 3.203125,
      "learning_rate": 4.806656619820325e-05,
      "loss": 0.9734,
      "step": 82290
    },
    {
      "epoch": 0.28844098175076316,
      "grad_norm": 3.359375,
      "learning_rate": 4.8072407387936777e-05,
      "loss": 1.0081,
      "step": 82300
    },
    {
      "epoch": 0.28847602925765875,
      "grad_norm": 3.15625,
      "learning_rate": 4.8078248577670305e-05,
      "loss": 0.9077,
      "step": 82310
    },
    {
      "epoch": 0.28851107676455434,
      "grad_norm": 2.875,
      "learning_rate": 4.8084089767403826e-05,
      "loss": 1.0611,
      "step": 82320
    },
    {
      "epoch": 0.28854612427144993,
      "grad_norm": 3.328125,
      "learning_rate": 4.8089930957137355e-05,
      "loss": 0.9281,
      "step": 82330
    },
    {
      "epoch": 0.2885811717783455,
      "grad_norm": 3.46875,
      "learning_rate": 4.8095772146870876e-05,
      "loss": 0.9884,
      "step": 82340
    },
    {
      "epoch": 0.28861621928524117,
      "grad_norm": 3.265625,
      "learning_rate": 4.8101613336604404e-05,
      "loss": 0.8932,
      "step": 82350
    },
    {
      "epoch": 0.28865126679213676,
      "grad_norm": 3.109375,
      "learning_rate": 4.8107454526337926e-05,
      "loss": 1.0063,
      "step": 82360
    },
    {
      "epoch": 0.28868631429903235,
      "grad_norm": 3.0,
      "learning_rate": 4.811329571607145e-05,
      "loss": 0.9928,
      "step": 82370
    },
    {
      "epoch": 0.28872136180592795,
      "grad_norm": 3.359375,
      "learning_rate": 4.811913690580498e-05,
      "loss": 1.0091,
      "step": 82380
    },
    {
      "epoch": 0.28875640931282354,
      "grad_norm": 3.53125,
      "learning_rate": 4.8124978095538504e-05,
      "loss": 0.9459,
      "step": 82390
    },
    {
      "epoch": 0.28879145681971913,
      "grad_norm": 2.875,
      "learning_rate": 4.8130819285272025e-05,
      "loss": 0.9478,
      "step": 82400
    },
    {
      "epoch": 0.2888265043266147,
      "grad_norm": 3.25,
      "learning_rate": 4.813666047500555e-05,
      "loss": 0.9896,
      "step": 82410
    },
    {
      "epoch": 0.2888615518335103,
      "grad_norm": 3.015625,
      "learning_rate": 4.8142501664739075e-05,
      "loss": 0.9349,
      "step": 82420
    },
    {
      "epoch": 0.2888965993404059,
      "grad_norm": 2.984375,
      "learning_rate": 4.81483428544726e-05,
      "loss": 0.952,
      "step": 82430
    },
    {
      "epoch": 0.2889316468473015,
      "grad_norm": 3.171875,
      "learning_rate": 4.8154184044206124e-05,
      "loss": 0.9556,
      "step": 82440
    },
    {
      "epoch": 0.28896669435419714,
      "grad_norm": 3.03125,
      "learning_rate": 4.8160025233939646e-05,
      "loss": 0.9956,
      "step": 82450
    },
    {
      "epoch": 0.28900174186109273,
      "grad_norm": 3.09375,
      "learning_rate": 4.816586642367318e-05,
      "loss": 0.9936,
      "step": 82460
    },
    {
      "epoch": 0.2890367893679883,
      "grad_norm": 3.15625,
      "learning_rate": 4.81717076134067e-05,
      "loss": 1.0387,
      "step": 82470
    },
    {
      "epoch": 0.2890718368748839,
      "grad_norm": 2.9375,
      "learning_rate": 4.8177548803140224e-05,
      "loss": 0.992,
      "step": 82480
    },
    {
      "epoch": 0.2891068843817795,
      "grad_norm": 3.140625,
      "learning_rate": 4.818338999287375e-05,
      "loss": 0.8877,
      "step": 82490
    },
    {
      "epoch": 0.2891419318886751,
      "grad_norm": 3.375,
      "learning_rate": 4.8189231182607274e-05,
      "loss": 0.9826,
      "step": 82500
    },
    {
      "epoch": 0.2891769793955707,
      "grad_norm": 2.890625,
      "learning_rate": 4.81950723723408e-05,
      "loss": 1.0099,
      "step": 82510
    },
    {
      "epoch": 0.2892120269024663,
      "grad_norm": 3.75,
      "learning_rate": 4.820091356207432e-05,
      "loss": 1.0435,
      "step": 82520
    },
    {
      "epoch": 0.28924707440936187,
      "grad_norm": 3.078125,
      "learning_rate": 4.820675475180785e-05,
      "loss": 1.0445,
      "step": 82530
    },
    {
      "epoch": 0.28928212191625746,
      "grad_norm": 3.28125,
      "learning_rate": 4.821259594154138e-05,
      "loss": 0.9555,
      "step": 82540
    },
    {
      "epoch": 0.2893171694231531,
      "grad_norm": 3.15625,
      "learning_rate": 4.82184371312749e-05,
      "loss": 0.8994,
      "step": 82550
    },
    {
      "epoch": 0.2893522169300487,
      "grad_norm": 3.59375,
      "learning_rate": 4.822427832100842e-05,
      "loss": 0.9422,
      "step": 82560
    },
    {
      "epoch": 0.2893872644369443,
      "grad_norm": 3.171875,
      "learning_rate": 4.823011951074195e-05,
      "loss": 1.0116,
      "step": 82570
    },
    {
      "epoch": 0.2894223119438399,
      "grad_norm": 2.828125,
      "learning_rate": 4.823596070047547e-05,
      "loss": 0.9463,
      "step": 82580
    },
    {
      "epoch": 0.2894573594507355,
      "grad_norm": 2.96875,
      "learning_rate": 4.8241801890209e-05,
      "loss": 0.9519,
      "step": 82590
    },
    {
      "epoch": 0.28949240695763107,
      "grad_norm": 3.453125,
      "learning_rate": 4.824764307994252e-05,
      "loss": 1.0192,
      "step": 82600
    },
    {
      "epoch": 0.28952745446452666,
      "grad_norm": 3.015625,
      "learning_rate": 4.825348426967605e-05,
      "loss": 1.0022,
      "step": 82610
    },
    {
      "epoch": 0.28956250197142225,
      "grad_norm": 3.0,
      "learning_rate": 4.825932545940958e-05,
      "loss": 0.9627,
      "step": 82620
    },
    {
      "epoch": 0.28959754947831784,
      "grad_norm": 3.28125,
      "learning_rate": 4.82651666491431e-05,
      "loss": 1.0318,
      "step": 82630
    },
    {
      "epoch": 0.28963259698521343,
      "grad_norm": 2.875,
      "learning_rate": 4.827100783887663e-05,
      "loss": 0.935,
      "step": 82640
    },
    {
      "epoch": 0.2896676444921091,
      "grad_norm": 2.796875,
      "learning_rate": 4.827684902861015e-05,
      "loss": 0.91,
      "step": 82650
    },
    {
      "epoch": 0.28970269199900467,
      "grad_norm": 3.484375,
      "learning_rate": 4.828269021834367e-05,
      "loss": 1.0589,
      "step": 82660
    },
    {
      "epoch": 0.28973773950590026,
      "grad_norm": 2.890625,
      "learning_rate": 4.82885314080772e-05,
      "loss": 0.9474,
      "step": 82670
    },
    {
      "epoch": 0.28977278701279585,
      "grad_norm": 2.921875,
      "learning_rate": 4.829437259781072e-05,
      "loss": 0.9434,
      "step": 82680
    },
    {
      "epoch": 0.28980783451969144,
      "grad_norm": 3.5,
      "learning_rate": 4.830021378754425e-05,
      "loss": 1.0843,
      "step": 82690
    },
    {
      "epoch": 0.28984288202658703,
      "grad_norm": 3.65625,
      "learning_rate": 4.830605497727778e-05,
      "loss": 0.9889,
      "step": 82700
    },
    {
      "epoch": 0.2898779295334826,
      "grad_norm": 3.25,
      "learning_rate": 4.83118961670113e-05,
      "loss": 0.9347,
      "step": 82710
    },
    {
      "epoch": 0.2899129770403782,
      "grad_norm": 2.921875,
      "learning_rate": 4.831773735674483e-05,
      "loss": 0.9083,
      "step": 82720
    },
    {
      "epoch": 0.2899480245472738,
      "grad_norm": 3.171875,
      "learning_rate": 4.832357854647835e-05,
      "loss": 1.0003,
      "step": 82730
    },
    {
      "epoch": 0.2899830720541694,
      "grad_norm": 3.0625,
      "learning_rate": 4.832941973621187e-05,
      "loss": 0.9176,
      "step": 82740
    },
    {
      "epoch": 0.29001811956106505,
      "grad_norm": 3.046875,
      "learning_rate": 4.83352609259454e-05,
      "loss": 0.9715,
      "step": 82750
    },
    {
      "epoch": 0.29005316706796064,
      "grad_norm": 3.4375,
      "learning_rate": 4.834110211567892e-05,
      "loss": 1.0276,
      "step": 82760
    },
    {
      "epoch": 0.29008821457485623,
      "grad_norm": 3.203125,
      "learning_rate": 4.834694330541245e-05,
      "loss": 0.9649,
      "step": 82770
    },
    {
      "epoch": 0.2901232620817518,
      "grad_norm": 3.5625,
      "learning_rate": 4.8352784495145976e-05,
      "loss": 1.0051,
      "step": 82780
    },
    {
      "epoch": 0.2901583095886474,
      "grad_norm": 2.921875,
      "learning_rate": 4.83586256848795e-05,
      "loss": 1.0605,
      "step": 82790
    },
    {
      "epoch": 0.290193357095543,
      "grad_norm": 3.109375,
      "learning_rate": 4.8364466874613026e-05,
      "loss": 1.022,
      "step": 82800
    },
    {
      "epoch": 0.2902284046024386,
      "grad_norm": 3.15625,
      "learning_rate": 4.837030806434655e-05,
      "loss": 0.9059,
      "step": 82810
    },
    {
      "epoch": 0.2902634521093342,
      "grad_norm": 3.125,
      "learning_rate": 4.8376149254080075e-05,
      "loss": 0.9534,
      "step": 82820
    },
    {
      "epoch": 0.2902984996162298,
      "grad_norm": 2.8125,
      "learning_rate": 4.83819904438136e-05,
      "loss": 0.9586,
      "step": 82830
    },
    {
      "epoch": 0.2903335471231254,
      "grad_norm": 3.21875,
      "learning_rate": 4.838783163354712e-05,
      "loss": 0.9931,
      "step": 82840
    },
    {
      "epoch": 0.290368594630021,
      "grad_norm": 3.390625,
      "learning_rate": 4.839367282328065e-05,
      "loss": 0.9596,
      "step": 82850
    },
    {
      "epoch": 0.2904036421369166,
      "grad_norm": 3.171875,
      "learning_rate": 4.8399514013014175e-05,
      "loss": 0.996,
      "step": 82860
    },
    {
      "epoch": 0.2904386896438122,
      "grad_norm": 3.125,
      "learning_rate": 4.8405355202747696e-05,
      "loss": 0.9062,
      "step": 82870
    },
    {
      "epoch": 0.2904737371507078,
      "grad_norm": 3.109375,
      "learning_rate": 4.8411196392481224e-05,
      "loss": 0.9574,
      "step": 82880
    },
    {
      "epoch": 0.2905087846576034,
      "grad_norm": 3.15625,
      "learning_rate": 4.8417037582214746e-05,
      "loss": 0.9734,
      "step": 82890
    },
    {
      "epoch": 0.29054383216449897,
      "grad_norm": 3.09375,
      "learning_rate": 4.8422878771948274e-05,
      "loss": 0.8839,
      "step": 82900
    },
    {
      "epoch": 0.29057887967139456,
      "grad_norm": 2.8125,
      "learning_rate": 4.8428719961681796e-05,
      "loss": 0.9812,
      "step": 82910
    },
    {
      "epoch": 0.29061392717829015,
      "grad_norm": 3.3125,
      "learning_rate": 4.8434561151415324e-05,
      "loss": 0.9432,
      "step": 82920
    },
    {
      "epoch": 0.29064897468518575,
      "grad_norm": 3.046875,
      "learning_rate": 4.844040234114885e-05,
      "loss": 0.9921,
      "step": 82930
    },
    {
      "epoch": 0.2906840221920814,
      "grad_norm": 3.234375,
      "learning_rate": 4.8446243530882373e-05,
      "loss": 0.9623,
      "step": 82940
    },
    {
      "epoch": 0.290719069698977,
      "grad_norm": 3.546875,
      "learning_rate": 4.8452084720615895e-05,
      "loss": 0.9627,
      "step": 82950
    },
    {
      "epoch": 0.2907541172058726,
      "grad_norm": 2.890625,
      "learning_rate": 4.845792591034942e-05,
      "loss": 0.9294,
      "step": 82960
    },
    {
      "epoch": 0.29078916471276817,
      "grad_norm": 2.890625,
      "learning_rate": 4.8463767100082945e-05,
      "loss": 0.9486,
      "step": 82970
    },
    {
      "epoch": 0.29082421221966376,
      "grad_norm": 2.53125,
      "learning_rate": 4.846960828981647e-05,
      "loss": 0.921,
      "step": 82980
    },
    {
      "epoch": 0.29085925972655935,
      "grad_norm": 2.9375,
      "learning_rate": 4.8475449479549994e-05,
      "loss": 0.9902,
      "step": 82990
    },
    {
      "epoch": 0.29089430723345494,
      "grad_norm": 3.15625,
      "learning_rate": 4.848129066928352e-05,
      "loss": 0.907,
      "step": 83000
    },
    {
      "epoch": 0.29092935474035053,
      "grad_norm": 3.234375,
      "learning_rate": 4.848713185901705e-05,
      "loss": 1.0539,
      "step": 83010
    },
    {
      "epoch": 0.2909644022472461,
      "grad_norm": 2.953125,
      "learning_rate": 4.849297304875057e-05,
      "loss": 0.9081,
      "step": 83020
    },
    {
      "epoch": 0.2909994497541417,
      "grad_norm": 3.203125,
      "learning_rate": 4.84988142384841e-05,
      "loss": 1.0041,
      "step": 83030
    },
    {
      "epoch": 0.29103449726103736,
      "grad_norm": 3.109375,
      "learning_rate": 4.850465542821762e-05,
      "loss": 0.9063,
      "step": 83040
    },
    {
      "epoch": 0.29106954476793295,
      "grad_norm": 3.3125,
      "learning_rate": 4.851049661795114e-05,
      "loss": 0.9485,
      "step": 83050
    },
    {
      "epoch": 0.29110459227482854,
      "grad_norm": 3.046875,
      "learning_rate": 4.851633780768467e-05,
      "loss": 0.8933,
      "step": 83060
    },
    {
      "epoch": 0.29113963978172414,
      "grad_norm": 3.15625,
      "learning_rate": 4.852217899741819e-05,
      "loss": 0.9694,
      "step": 83070
    },
    {
      "epoch": 0.2911746872886197,
      "grad_norm": 2.984375,
      "learning_rate": 4.852802018715172e-05,
      "loss": 0.9928,
      "step": 83080
    },
    {
      "epoch": 0.2912097347955153,
      "grad_norm": 3.21875,
      "learning_rate": 4.853386137688525e-05,
      "loss": 1.0723,
      "step": 83090
    },
    {
      "epoch": 0.2912447823024109,
      "grad_norm": 2.859375,
      "learning_rate": 4.853970256661877e-05,
      "loss": 0.9637,
      "step": 83100
    },
    {
      "epoch": 0.2912798298093065,
      "grad_norm": 3.09375,
      "learning_rate": 4.85455437563523e-05,
      "loss": 0.9917,
      "step": 83110
    },
    {
      "epoch": 0.2913148773162021,
      "grad_norm": 3.453125,
      "learning_rate": 4.855138494608582e-05,
      "loss": 1.1106,
      "step": 83120
    },
    {
      "epoch": 0.2913499248230977,
      "grad_norm": 3.484375,
      "learning_rate": 4.855722613581934e-05,
      "loss": 0.9857,
      "step": 83130
    },
    {
      "epoch": 0.29138497232999333,
      "grad_norm": 3.203125,
      "learning_rate": 4.856306732555287e-05,
      "loss": 1.0528,
      "step": 83140
    },
    {
      "epoch": 0.2914200198368889,
      "grad_norm": 3.125,
      "learning_rate": 4.856890851528639e-05,
      "loss": 0.9907,
      "step": 83150
    },
    {
      "epoch": 0.2914550673437845,
      "grad_norm": 3.3125,
      "learning_rate": 4.857474970501992e-05,
      "loss": 0.9977,
      "step": 83160
    },
    {
      "epoch": 0.2914901148506801,
      "grad_norm": 3.09375,
      "learning_rate": 4.858059089475345e-05,
      "loss": 0.9305,
      "step": 83170
    },
    {
      "epoch": 0.2915251623575757,
      "grad_norm": 2.890625,
      "learning_rate": 4.858643208448697e-05,
      "loss": 0.929,
      "step": 83180
    },
    {
      "epoch": 0.2915602098644713,
      "grad_norm": 3.140625,
      "learning_rate": 4.85922732742205e-05,
      "loss": 1.0337,
      "step": 83190
    },
    {
      "epoch": 0.2915952573713669,
      "grad_norm": 3.296875,
      "learning_rate": 4.859811446395402e-05,
      "loss": 1.0209,
      "step": 83200
    },
    {
      "epoch": 0.29163030487826247,
      "grad_norm": 3.0625,
      "learning_rate": 4.860395565368754e-05,
      "loss": 0.8925,
      "step": 83210
    },
    {
      "epoch": 0.29166535238515806,
      "grad_norm": 3.03125,
      "learning_rate": 4.860979684342107e-05,
      "loss": 0.9296,
      "step": 83220
    },
    {
      "epoch": 0.29170039989205365,
      "grad_norm": 3.125,
      "learning_rate": 4.86156380331546e-05,
      "loss": 0.9317,
      "step": 83230
    },
    {
      "epoch": 0.2917354473989493,
      "grad_norm": 3.390625,
      "learning_rate": 4.8621479222888126e-05,
      "loss": 1.0388,
      "step": 83240
    },
    {
      "epoch": 0.2917704949058449,
      "grad_norm": 2.9375,
      "learning_rate": 4.862732041262165e-05,
      "loss": 0.8567,
      "step": 83250
    },
    {
      "epoch": 0.2918055424127405,
      "grad_norm": 3.09375,
      "learning_rate": 4.863316160235517e-05,
      "loss": 1.0601,
      "step": 83260
    },
    {
      "epoch": 0.2918405899196361,
      "grad_norm": 3.75,
      "learning_rate": 4.86390027920887e-05,
      "loss": 0.9951,
      "step": 83270
    },
    {
      "epoch": 0.29187563742653166,
      "grad_norm": 3.078125,
      "learning_rate": 4.864484398182222e-05,
      "loss": 0.9062,
      "step": 83280
    },
    {
      "epoch": 0.29191068493342726,
      "grad_norm": 3.234375,
      "learning_rate": 4.8650685171555746e-05,
      "loss": 0.9839,
      "step": 83290
    },
    {
      "epoch": 0.29194573244032285,
      "grad_norm": 3.21875,
      "learning_rate": 4.865652636128927e-05,
      "loss": 0.971,
      "step": 83300
    },
    {
      "epoch": 0.29198077994721844,
      "grad_norm": 3.25,
      "learning_rate": 4.8662367551022796e-05,
      "loss": 0.9494,
      "step": 83310
    },
    {
      "epoch": 0.29201582745411403,
      "grad_norm": 3.296875,
      "learning_rate": 4.8668208740756324e-05,
      "loss": 0.8957,
      "step": 83320
    },
    {
      "epoch": 0.2920508749610096,
      "grad_norm": 3.03125,
      "learning_rate": 4.8674049930489846e-05,
      "loss": 0.9786,
      "step": 83330
    },
    {
      "epoch": 0.29208592246790527,
      "grad_norm": 2.859375,
      "learning_rate": 4.867989112022337e-05,
      "loss": 0.9585,
      "step": 83340
    },
    {
      "epoch": 0.29212096997480086,
      "grad_norm": 3.21875,
      "learning_rate": 4.8685732309956895e-05,
      "loss": 0.9743,
      "step": 83350
    },
    {
      "epoch": 0.29215601748169645,
      "grad_norm": 3.078125,
      "learning_rate": 4.869157349969042e-05,
      "loss": 0.9281,
      "step": 83360
    },
    {
      "epoch": 0.29219106498859204,
      "grad_norm": 3.515625,
      "learning_rate": 4.8697414689423945e-05,
      "loss": 0.9999,
      "step": 83370
    },
    {
      "epoch": 0.29222611249548763,
      "grad_norm": 3.0625,
      "learning_rate": 4.8703255879157467e-05,
      "loss": 0.9513,
      "step": 83380
    },
    {
      "epoch": 0.2922611600023832,
      "grad_norm": 2.75,
      "learning_rate": 4.8709097068890995e-05,
      "loss": 0.9493,
      "step": 83390
    },
    {
      "epoch": 0.2922962075092788,
      "grad_norm": 2.671875,
      "learning_rate": 4.871493825862452e-05,
      "loss": 0.9594,
      "step": 83400
    },
    {
      "epoch": 0.2923312550161744,
      "grad_norm": 3.3125,
      "learning_rate": 4.8720779448358045e-05,
      "loss": 0.9481,
      "step": 83410
    },
    {
      "epoch": 0.29236630252307,
      "grad_norm": 3.109375,
      "learning_rate": 4.8726620638091566e-05,
      "loss": 0.9519,
      "step": 83420
    },
    {
      "epoch": 0.29240135002996565,
      "grad_norm": 2.765625,
      "learning_rate": 4.8732461827825094e-05,
      "loss": 0.9426,
      "step": 83430
    },
    {
      "epoch": 0.29243639753686124,
      "grad_norm": 2.578125,
      "learning_rate": 4.8738303017558616e-05,
      "loss": 0.9868,
      "step": 83440
    },
    {
      "epoch": 0.29247144504375683,
      "grad_norm": 3.71875,
      "learning_rate": 4.8744144207292144e-05,
      "loss": 1.0049,
      "step": 83450
    },
    {
      "epoch": 0.2925064925506524,
      "grad_norm": 3.171875,
      "learning_rate": 4.8749985397025665e-05,
      "loss": 1.0019,
      "step": 83460
    },
    {
      "epoch": 0.292541540057548,
      "grad_norm": 2.890625,
      "learning_rate": 4.8755826586759194e-05,
      "loss": 0.9597,
      "step": 83470
    },
    {
      "epoch": 0.2925765875644436,
      "grad_norm": 2.90625,
      "learning_rate": 4.876166777649272e-05,
      "loss": 1.0344,
      "step": 83480
    },
    {
      "epoch": 0.2926116350713392,
      "grad_norm": 3.046875,
      "learning_rate": 4.876750896622624e-05,
      "loss": 0.939,
      "step": 83490
    },
    {
      "epoch": 0.2926466825782348,
      "grad_norm": 3.140625,
      "learning_rate": 4.877335015595977e-05,
      "loss": 0.9618,
      "step": 83500
    },
    {
      "epoch": 0.2926817300851304,
      "grad_norm": 3.109375,
      "learning_rate": 4.877919134569329e-05,
      "loss": 0.9483,
      "step": 83510
    },
    {
      "epoch": 0.29271677759202597,
      "grad_norm": 2.765625,
      "learning_rate": 4.8785032535426814e-05,
      "loss": 0.9955,
      "step": 83520
    },
    {
      "epoch": 0.2927518250989216,
      "grad_norm": 3.078125,
      "learning_rate": 4.879087372516034e-05,
      "loss": 0.9601,
      "step": 83530
    },
    {
      "epoch": 0.2927868726058172,
      "grad_norm": 3.0,
      "learning_rate": 4.8796714914893864e-05,
      "loss": 1.008,
      "step": 83540
    },
    {
      "epoch": 0.2928219201127128,
      "grad_norm": 3.421875,
      "learning_rate": 4.880255610462739e-05,
      "loss": 0.9154,
      "step": 83550
    },
    {
      "epoch": 0.2928569676196084,
      "grad_norm": 3.59375,
      "learning_rate": 4.880839729436092e-05,
      "loss": 0.9719,
      "step": 83560
    },
    {
      "epoch": 0.292892015126504,
      "grad_norm": 3.09375,
      "learning_rate": 4.881423848409444e-05,
      "loss": 1.017,
      "step": 83570
    },
    {
      "epoch": 0.29292706263339957,
      "grad_norm": 3.3125,
      "learning_rate": 4.882007967382797e-05,
      "loss": 0.948,
      "step": 83580
    },
    {
      "epoch": 0.29296211014029516,
      "grad_norm": 2.75,
      "learning_rate": 4.882592086356149e-05,
      "loss": 0.8983,
      "step": 83590
    },
    {
      "epoch": 0.29299715764719075,
      "grad_norm": 2.734375,
      "learning_rate": 4.883176205329501e-05,
      "loss": 1.0141,
      "step": 83600
    },
    {
      "epoch": 0.29303220515408634,
      "grad_norm": 3.078125,
      "learning_rate": 4.883760324302854e-05,
      "loss": 1.0255,
      "step": 83610
    },
    {
      "epoch": 0.29306725266098194,
      "grad_norm": 3.078125,
      "learning_rate": 4.884344443276207e-05,
      "loss": 1.0157,
      "step": 83620
    },
    {
      "epoch": 0.2931023001678776,
      "grad_norm": 3.171875,
      "learning_rate": 4.884928562249559e-05,
      "loss": 0.9792,
      "step": 83630
    },
    {
      "epoch": 0.2931373476747732,
      "grad_norm": 3.109375,
      "learning_rate": 4.885512681222912e-05,
      "loss": 1.0763,
      "step": 83640
    },
    {
      "epoch": 0.29317239518166877,
      "grad_norm": 2.90625,
      "learning_rate": 4.886096800196264e-05,
      "loss": 0.9111,
      "step": 83650
    },
    {
      "epoch": 0.29320744268856436,
      "grad_norm": 3.40625,
      "learning_rate": 4.886680919169617e-05,
      "loss": 0.9711,
      "step": 83660
    },
    {
      "epoch": 0.29324249019545995,
      "grad_norm": 3.3125,
      "learning_rate": 4.887265038142969e-05,
      "loss": 0.8788,
      "step": 83670
    },
    {
      "epoch": 0.29327753770235554,
      "grad_norm": 2.921875,
      "learning_rate": 4.887849157116321e-05,
      "loss": 0.9898,
      "step": 83680
    },
    {
      "epoch": 0.29331258520925113,
      "grad_norm": 3.59375,
      "learning_rate": 4.888433276089674e-05,
      "loss": 0.902,
      "step": 83690
    },
    {
      "epoch": 0.2933476327161467,
      "grad_norm": 3.328125,
      "learning_rate": 4.889017395063027e-05,
      "loss": 0.9551,
      "step": 83700
    },
    {
      "epoch": 0.2933826802230423,
      "grad_norm": 3.0,
      "learning_rate": 4.88960151403638e-05,
      "loss": 0.8794,
      "step": 83710
    },
    {
      "epoch": 0.2934177277299379,
      "grad_norm": 3.21875,
      "learning_rate": 4.890185633009732e-05,
      "loss": 0.8917,
      "step": 83720
    },
    {
      "epoch": 0.29345277523683355,
      "grad_norm": 2.703125,
      "learning_rate": 4.890769751983084e-05,
      "loss": 0.9613,
      "step": 83730
    },
    {
      "epoch": 0.29348782274372914,
      "grad_norm": 2.890625,
      "learning_rate": 4.891353870956437e-05,
      "loss": 0.9956,
      "step": 83740
    },
    {
      "epoch": 0.29352287025062473,
      "grad_norm": 2.703125,
      "learning_rate": 4.891937989929789e-05,
      "loss": 0.9872,
      "step": 83750
    },
    {
      "epoch": 0.2935579177575203,
      "grad_norm": 3.234375,
      "learning_rate": 4.892522108903142e-05,
      "loss": 1.0052,
      "step": 83760
    },
    {
      "epoch": 0.2935929652644159,
      "grad_norm": 3.296875,
      "learning_rate": 4.893106227876494e-05,
      "loss": 0.9444,
      "step": 83770
    },
    {
      "epoch": 0.2936280127713115,
      "grad_norm": 3.078125,
      "learning_rate": 4.893690346849847e-05,
      "loss": 0.9615,
      "step": 83780
    },
    {
      "epoch": 0.2936630602782071,
      "grad_norm": 3.265625,
      "learning_rate": 4.8942744658231995e-05,
      "loss": 0.9536,
      "step": 83790
    },
    {
      "epoch": 0.2936981077851027,
      "grad_norm": 3.453125,
      "learning_rate": 4.894858584796552e-05,
      "loss": 0.9767,
      "step": 83800
    },
    {
      "epoch": 0.2937331552919983,
      "grad_norm": 2.921875,
      "learning_rate": 4.895442703769904e-05,
      "loss": 0.9906,
      "step": 83810
    },
    {
      "epoch": 0.2937682027988939,
      "grad_norm": 2.84375,
      "learning_rate": 4.8960268227432567e-05,
      "loss": 1.0173,
      "step": 83820
    },
    {
      "epoch": 0.2938032503057895,
      "grad_norm": 3.078125,
      "learning_rate": 4.896610941716609e-05,
      "loss": 0.913,
      "step": 83830
    },
    {
      "epoch": 0.2938382978126851,
      "grad_norm": 3.0625,
      "learning_rate": 4.8971950606899616e-05,
      "loss": 0.912,
      "step": 83840
    },
    {
      "epoch": 0.2938733453195807,
      "grad_norm": 3.125,
      "learning_rate": 4.897779179663314e-05,
      "loss": 0.988,
      "step": 83850
    },
    {
      "epoch": 0.2939083928264763,
      "grad_norm": 2.984375,
      "learning_rate": 4.8983632986366666e-05,
      "loss": 0.952,
      "step": 83860
    },
    {
      "epoch": 0.2939434403333719,
      "grad_norm": 3.34375,
      "learning_rate": 4.8989474176100194e-05,
      "loss": 0.9216,
      "step": 83870
    },
    {
      "epoch": 0.2939784878402675,
      "grad_norm": 2.953125,
      "learning_rate": 4.8995315365833716e-05,
      "loss": 0.9079,
      "step": 83880
    },
    {
      "epoch": 0.29401353534716307,
      "grad_norm": 3.1875,
      "learning_rate": 4.900115655556724e-05,
      "loss": 0.9866,
      "step": 83890
    },
    {
      "epoch": 0.29404858285405866,
      "grad_norm": 3.734375,
      "learning_rate": 4.9006997745300765e-05,
      "loss": 0.925,
      "step": 83900
    },
    {
      "epoch": 0.29408363036095425,
      "grad_norm": 3.640625,
      "learning_rate": 4.901283893503429e-05,
      "loss": 0.9808,
      "step": 83910
    },
    {
      "epoch": 0.2941186778678499,
      "grad_norm": 3.046875,
      "learning_rate": 4.9018680124767815e-05,
      "loss": 0.9831,
      "step": 83920
    },
    {
      "epoch": 0.2941537253747455,
      "grad_norm": 3.140625,
      "learning_rate": 4.902452131450134e-05,
      "loss": 1.0451,
      "step": 83930
    },
    {
      "epoch": 0.2941887728816411,
      "grad_norm": 2.984375,
      "learning_rate": 4.9030362504234865e-05,
      "loss": 0.9912,
      "step": 83940
    },
    {
      "epoch": 0.29422382038853667,
      "grad_norm": 3.0625,
      "learning_rate": 4.903620369396839e-05,
      "loss": 0.9629,
      "step": 83950
    },
    {
      "epoch": 0.29425886789543226,
      "grad_norm": 3.734375,
      "learning_rate": 4.9042044883701914e-05,
      "loss": 0.9063,
      "step": 83960
    },
    {
      "epoch": 0.29429391540232785,
      "grad_norm": 2.90625,
      "learning_rate": 4.904788607343544e-05,
      "loss": 1.0308,
      "step": 83970
    },
    {
      "epoch": 0.29432896290922345,
      "grad_norm": 3.265625,
      "learning_rate": 4.9053727263168964e-05,
      "loss": 0.9575,
      "step": 83980
    },
    {
      "epoch": 0.29436401041611904,
      "grad_norm": 3.46875,
      "learning_rate": 4.9059568452902485e-05,
      "loss": 1.1197,
      "step": 83990
    },
    {
      "epoch": 0.29439905792301463,
      "grad_norm": 3.34375,
      "learning_rate": 4.9065409642636014e-05,
      "loss": 1.019,
      "step": 84000
    },
    {
      "epoch": 0.2944341054299102,
      "grad_norm": 3.328125,
      "learning_rate": 4.907125083236954e-05,
      "loss": 0.9613,
      "step": 84010
    },
    {
      "epoch": 0.29446915293680587,
      "grad_norm": 3.125,
      "learning_rate": 4.9077092022103063e-05,
      "loss": 0.9341,
      "step": 84020
    },
    {
      "epoch": 0.29450420044370146,
      "grad_norm": 3.21875,
      "learning_rate": 4.908293321183659e-05,
      "loss": 0.9844,
      "step": 84030
    },
    {
      "epoch": 0.29453924795059705,
      "grad_norm": 2.953125,
      "learning_rate": 4.908877440157011e-05,
      "loss": 1.0427,
      "step": 84040
    },
    {
      "epoch": 0.29457429545749264,
      "grad_norm": 3.40625,
      "learning_rate": 4.909461559130364e-05,
      "loss": 0.9298,
      "step": 84050
    },
    {
      "epoch": 0.29460934296438823,
      "grad_norm": 3.0,
      "learning_rate": 4.910045678103716e-05,
      "loss": 0.9506,
      "step": 84060
    },
    {
      "epoch": 0.2946443904712838,
      "grad_norm": 3.359375,
      "learning_rate": 4.9106297970770684e-05,
      "loss": 0.9703,
      "step": 84070
    },
    {
      "epoch": 0.2946794379781794,
      "grad_norm": 3.09375,
      "learning_rate": 4.911213916050421e-05,
      "loss": 1.0567,
      "step": 84080
    },
    {
      "epoch": 0.294714485485075,
      "grad_norm": 3.140625,
      "learning_rate": 4.911798035023774e-05,
      "loss": 0.9528,
      "step": 84090
    },
    {
      "epoch": 0.2947495329919706,
      "grad_norm": 3.0625,
      "learning_rate": 4.912382153997126e-05,
      "loss": 0.9633,
      "step": 84100
    },
    {
      "epoch": 0.2947845804988662,
      "grad_norm": 2.90625,
      "learning_rate": 4.912966272970479e-05,
      "loss": 0.9042,
      "step": 84110
    },
    {
      "epoch": 0.29481962800576184,
      "grad_norm": 3.15625,
      "learning_rate": 4.913550391943831e-05,
      "loss": 0.9515,
      "step": 84120
    },
    {
      "epoch": 0.2948546755126574,
      "grad_norm": 3.34375,
      "learning_rate": 4.914134510917184e-05,
      "loss": 1.0184,
      "step": 84130
    },
    {
      "epoch": 0.294889723019553,
      "grad_norm": 3.09375,
      "learning_rate": 4.914718629890536e-05,
      "loss": 1.0256,
      "step": 84140
    },
    {
      "epoch": 0.2949247705264486,
      "grad_norm": 2.625,
      "learning_rate": 4.915302748863889e-05,
      "loss": 0.9563,
      "step": 84150
    },
    {
      "epoch": 0.2949598180333442,
      "grad_norm": 3.265625,
      "learning_rate": 4.915886867837241e-05,
      "loss": 0.9918,
      "step": 84160
    },
    {
      "epoch": 0.2949948655402398,
      "grad_norm": 2.765625,
      "learning_rate": 4.916470986810594e-05,
      "loss": 0.9083,
      "step": 84170
    },
    {
      "epoch": 0.2950299130471354,
      "grad_norm": 2.953125,
      "learning_rate": 4.917055105783947e-05,
      "loss": 0.9541,
      "step": 84180
    },
    {
      "epoch": 0.295064960554031,
      "grad_norm": 2.984375,
      "learning_rate": 4.917639224757299e-05,
      "loss": 0.9771,
      "step": 84190
    },
    {
      "epoch": 0.29510000806092657,
      "grad_norm": 3.046875,
      "learning_rate": 4.918223343730651e-05,
      "loss": 1.0002,
      "step": 84200
    },
    {
      "epoch": 0.29513505556782216,
      "grad_norm": 2.96875,
      "learning_rate": 4.918807462704004e-05,
      "loss": 0.9718,
      "step": 84210
    },
    {
      "epoch": 0.2951701030747178,
      "grad_norm": 2.984375,
      "learning_rate": 4.919391581677356e-05,
      "loss": 1.0006,
      "step": 84220
    },
    {
      "epoch": 0.2952051505816134,
      "grad_norm": 2.796875,
      "learning_rate": 4.919975700650709e-05,
      "loss": 0.8752,
      "step": 84230
    },
    {
      "epoch": 0.295240198088509,
      "grad_norm": 2.921875,
      "learning_rate": 4.920559819624062e-05,
      "loss": 0.8488,
      "step": 84240
    },
    {
      "epoch": 0.2952752455954046,
      "grad_norm": 3.125,
      "learning_rate": 4.921143938597414e-05,
      "loss": 1.0069,
      "step": 84250
    },
    {
      "epoch": 0.29531029310230017,
      "grad_norm": 2.96875,
      "learning_rate": 4.9217280575707666e-05,
      "loss": 0.8907,
      "step": 84260
    },
    {
      "epoch": 0.29534534060919576,
      "grad_norm": 2.625,
      "learning_rate": 4.922312176544119e-05,
      "loss": 1.0806,
      "step": 84270
    },
    {
      "epoch": 0.29538038811609135,
      "grad_norm": 3.109375,
      "learning_rate": 4.922896295517471e-05,
      "loss": 0.9519,
      "step": 84280
    },
    {
      "epoch": 0.29541543562298694,
      "grad_norm": 2.6875,
      "learning_rate": 4.923480414490824e-05,
      "loss": 0.9672,
      "step": 84290
    },
    {
      "epoch": 0.29545048312988254,
      "grad_norm": 3.078125,
      "learning_rate": 4.924064533464176e-05,
      "loss": 0.9362,
      "step": 84300
    },
    {
      "epoch": 0.2954855306367781,
      "grad_norm": 3.25,
      "learning_rate": 4.924648652437529e-05,
      "loss": 0.9977,
      "step": 84310
    },
    {
      "epoch": 0.2955205781436738,
      "grad_norm": 3.65625,
      "learning_rate": 4.9252327714108816e-05,
      "loss": 1.022,
      "step": 84320
    },
    {
      "epoch": 0.29555562565056936,
      "grad_norm": 3.328125,
      "learning_rate": 4.925816890384234e-05,
      "loss": 1.0619,
      "step": 84330
    },
    {
      "epoch": 0.29559067315746496,
      "grad_norm": 3.078125,
      "learning_rate": 4.9264010093575865e-05,
      "loss": 1.05,
      "step": 84340
    },
    {
      "epoch": 0.29562572066436055,
      "grad_norm": 3.5,
      "learning_rate": 4.926985128330939e-05,
      "loss": 0.9241,
      "step": 84350
    },
    {
      "epoch": 0.29566076817125614,
      "grad_norm": 3.9375,
      "learning_rate": 4.927569247304291e-05,
      "loss": 1.0581,
      "step": 84360
    },
    {
      "epoch": 0.29569581567815173,
      "grad_norm": 3.421875,
      "learning_rate": 4.9281533662776436e-05,
      "loss": 0.978,
      "step": 84370
    },
    {
      "epoch": 0.2957308631850473,
      "grad_norm": 3.03125,
      "learning_rate": 4.928737485250996e-05,
      "loss": 0.9341,
      "step": 84380
    },
    {
      "epoch": 0.2957659106919429,
      "grad_norm": 2.984375,
      "learning_rate": 4.9293216042243486e-05,
      "loss": 0.9511,
      "step": 84390
    },
    {
      "epoch": 0.2958009581988385,
      "grad_norm": 3.359375,
      "learning_rate": 4.9299057231977014e-05,
      "loss": 1.0402,
      "step": 84400
    },
    {
      "epoch": 0.2958360057057341,
      "grad_norm": 3.546875,
      "learning_rate": 4.9304898421710536e-05,
      "loss": 0.9494,
      "step": 84410
    },
    {
      "epoch": 0.29587105321262974,
      "grad_norm": 2.71875,
      "learning_rate": 4.9310739611444064e-05,
      "loss": 0.9143,
      "step": 84420
    },
    {
      "epoch": 0.29590610071952533,
      "grad_norm": 2.828125,
      "learning_rate": 4.9316580801177585e-05,
      "loss": 0.9931,
      "step": 84430
    },
    {
      "epoch": 0.2959411482264209,
      "grad_norm": 3.484375,
      "learning_rate": 4.9322421990911114e-05,
      "loss": 1.035,
      "step": 84440
    },
    {
      "epoch": 0.2959761957333165,
      "grad_norm": 3.296875,
      "learning_rate": 4.9328263180644635e-05,
      "loss": 0.9311,
      "step": 84450
    },
    {
      "epoch": 0.2960112432402121,
      "grad_norm": 3.5625,
      "learning_rate": 4.9334104370378157e-05,
      "loss": 0.9693,
      "step": 84460
    },
    {
      "epoch": 0.2960462907471077,
      "grad_norm": 3.609375,
      "learning_rate": 4.9339945560111685e-05,
      "loss": 0.9516,
      "step": 84470
    },
    {
      "epoch": 0.2960813382540033,
      "grad_norm": 3.171875,
      "learning_rate": 4.934578674984521e-05,
      "loss": 0.9988,
      "step": 84480
    },
    {
      "epoch": 0.2961163857608989,
      "grad_norm": 2.90625,
      "learning_rate": 4.9351627939578734e-05,
      "loss": 0.9404,
      "step": 84490
    },
    {
      "epoch": 0.2961514332677945,
      "grad_norm": 3.0625,
      "learning_rate": 4.935746912931226e-05,
      "loss": 0.9646,
      "step": 84500
    },
    {
      "epoch": 0.2961864807746901,
      "grad_norm": 2.953125,
      "learning_rate": 4.9363310319045784e-05,
      "loss": 0.94,
      "step": 84510
    },
    {
      "epoch": 0.2962215282815857,
      "grad_norm": 2.921875,
      "learning_rate": 4.936915150877931e-05,
      "loss": 0.9361,
      "step": 84520
    },
    {
      "epoch": 0.2962565757884813,
      "grad_norm": 3.265625,
      "learning_rate": 4.9374992698512834e-05,
      "loss": 0.914,
      "step": 84530
    },
    {
      "epoch": 0.2962916232953769,
      "grad_norm": 3.125,
      "learning_rate": 4.9380833888246355e-05,
      "loss": 0.9703,
      "step": 84540
    },
    {
      "epoch": 0.2963266708022725,
      "grad_norm": 2.9375,
      "learning_rate": 4.9386675077979884e-05,
      "loss": 0.9252,
      "step": 84550
    },
    {
      "epoch": 0.2963617183091681,
      "grad_norm": 2.890625,
      "learning_rate": 4.939251626771341e-05,
      "loss": 0.9686,
      "step": 84560
    },
    {
      "epoch": 0.29639676581606367,
      "grad_norm": 3.078125,
      "learning_rate": 4.939835745744693e-05,
      "loss": 1.0157,
      "step": 84570
    },
    {
      "epoch": 0.29643181332295926,
      "grad_norm": 3.453125,
      "learning_rate": 4.940419864718046e-05,
      "loss": 0.9509,
      "step": 84580
    },
    {
      "epoch": 0.29646686082985485,
      "grad_norm": 2.921875,
      "learning_rate": 4.941003983691398e-05,
      "loss": 1.0482,
      "step": 84590
    },
    {
      "epoch": 0.29650190833675044,
      "grad_norm": 3.234375,
      "learning_rate": 4.941588102664751e-05,
      "loss": 0.9254,
      "step": 84600
    },
    {
      "epoch": 0.2965369558436461,
      "grad_norm": 3.328125,
      "learning_rate": 4.942172221638103e-05,
      "loss": 0.9911,
      "step": 84610
    },
    {
      "epoch": 0.2965720033505417,
      "grad_norm": 2.890625,
      "learning_rate": 4.942756340611456e-05,
      "loss": 0.9694,
      "step": 84620
    },
    {
      "epoch": 0.29660705085743727,
      "grad_norm": 3.375,
      "learning_rate": 4.943340459584809e-05,
      "loss": 0.9115,
      "step": 84630
    },
    {
      "epoch": 0.29664209836433286,
      "grad_norm": 3.921875,
      "learning_rate": 4.943924578558161e-05,
      "loss": 0.9526,
      "step": 84640
    },
    {
      "epoch": 0.29667714587122845,
      "grad_norm": 3.203125,
      "learning_rate": 4.944508697531514e-05,
      "loss": 0.9372,
      "step": 84650
    },
    {
      "epoch": 0.29671219337812405,
      "grad_norm": 3.09375,
      "learning_rate": 4.945092816504866e-05,
      "loss": 0.9529,
      "step": 84660
    },
    {
      "epoch": 0.29674724088501964,
      "grad_norm": 3.5625,
      "learning_rate": 4.945676935478218e-05,
      "loss": 0.8866,
      "step": 84670
    },
    {
      "epoch": 0.2967822883919152,
      "grad_norm": 2.78125,
      "learning_rate": 4.946261054451571e-05,
      "loss": 0.9817,
      "step": 84680
    },
    {
      "epoch": 0.2968173358988108,
      "grad_norm": 2.484375,
      "learning_rate": 4.946845173424923e-05,
      "loss": 0.9927,
      "step": 84690
    },
    {
      "epoch": 0.2968523834057064,
      "grad_norm": 2.875,
      "learning_rate": 4.947429292398276e-05,
      "loss": 0.9726,
      "step": 84700
    },
    {
      "epoch": 0.29688743091260206,
      "grad_norm": 3.234375,
      "learning_rate": 4.948013411371629e-05,
      "loss": 0.9771,
      "step": 84710
    },
    {
      "epoch": 0.29692247841949765,
      "grad_norm": 2.859375,
      "learning_rate": 4.948597530344981e-05,
      "loss": 0.9528,
      "step": 84720
    },
    {
      "epoch": 0.29695752592639324,
      "grad_norm": 2.953125,
      "learning_rate": 4.949181649318334e-05,
      "loss": 0.842,
      "step": 84730
    },
    {
      "epoch": 0.29699257343328883,
      "grad_norm": 3.265625,
      "learning_rate": 4.949765768291686e-05,
      "loss": 0.958,
      "step": 84740
    },
    {
      "epoch": 0.2970276209401844,
      "grad_norm": 3.1875,
      "learning_rate": 4.950349887265038e-05,
      "loss": 0.9447,
      "step": 84750
    },
    {
      "epoch": 0.29706266844708,
      "grad_norm": 3.578125,
      "learning_rate": 4.950934006238391e-05,
      "loss": 0.9232,
      "step": 84760
    },
    {
      "epoch": 0.2970977159539756,
      "grad_norm": 3.296875,
      "learning_rate": 4.951518125211743e-05,
      "loss": 0.9681,
      "step": 84770
    },
    {
      "epoch": 0.2971327634608712,
      "grad_norm": 3.390625,
      "learning_rate": 4.952102244185096e-05,
      "loss": 0.9829,
      "step": 84780
    },
    {
      "epoch": 0.2971678109677668,
      "grad_norm": 3.21875,
      "learning_rate": 4.9526863631584487e-05,
      "loss": 0.9245,
      "step": 84790
    },
    {
      "epoch": 0.2972028584746624,
      "grad_norm": 3.1875,
      "learning_rate": 4.953270482131801e-05,
      "loss": 0.9822,
      "step": 84800
    },
    {
      "epoch": 0.297237905981558,
      "grad_norm": 2.625,
      "learning_rate": 4.9538546011051536e-05,
      "loss": 0.9465,
      "step": 84810
    },
    {
      "epoch": 0.2972729534884536,
      "grad_norm": 2.984375,
      "learning_rate": 4.954438720078506e-05,
      "loss": 0.9089,
      "step": 84820
    },
    {
      "epoch": 0.2973080009953492,
      "grad_norm": 3.578125,
      "learning_rate": 4.9550228390518586e-05,
      "loss": 1.0242,
      "step": 84830
    },
    {
      "epoch": 0.2973430485022448,
      "grad_norm": 2.875,
      "learning_rate": 4.955606958025211e-05,
      "loss": 0.9302,
      "step": 84840
    },
    {
      "epoch": 0.2973780960091404,
      "grad_norm": 3.109375,
      "learning_rate": 4.956191076998563e-05,
      "loss": 0.9158,
      "step": 84850
    },
    {
      "epoch": 0.297413143516036,
      "grad_norm": 3.078125,
      "learning_rate": 4.956775195971916e-05,
      "loss": 1.1152,
      "step": 84860
    },
    {
      "epoch": 0.2974481910229316,
      "grad_norm": 3.171875,
      "learning_rate": 4.9573593149452685e-05,
      "loss": 0.9213,
      "step": 84870
    },
    {
      "epoch": 0.29748323852982717,
      "grad_norm": 2.6875,
      "learning_rate": 4.957943433918621e-05,
      "loss": 0.9145,
      "step": 84880
    },
    {
      "epoch": 0.29751828603672276,
      "grad_norm": 3.8125,
      "learning_rate": 4.9585275528919735e-05,
      "loss": 1.0773,
      "step": 84890
    },
    {
      "epoch": 0.29755333354361835,
      "grad_norm": 3.296875,
      "learning_rate": 4.9591116718653257e-05,
      "loss": 0.9975,
      "step": 84900
    },
    {
      "epoch": 0.297588381050514,
      "grad_norm": 3.0625,
      "learning_rate": 4.9596957908386785e-05,
      "loss": 0.9468,
      "step": 84910
    },
    {
      "epoch": 0.2976234285574096,
      "grad_norm": 3.203125,
      "learning_rate": 4.9602799098120306e-05,
      "loss": 0.9858,
      "step": 84920
    },
    {
      "epoch": 0.2976584760643052,
      "grad_norm": 3.015625,
      "learning_rate": 4.960864028785383e-05,
      "loss": 1.0433,
      "step": 84930
    },
    {
      "epoch": 0.29769352357120077,
      "grad_norm": 3.25,
      "learning_rate": 4.961448147758736e-05,
      "loss": 0.9749,
      "step": 84940
    },
    {
      "epoch": 0.29772857107809636,
      "grad_norm": 2.734375,
      "learning_rate": 4.9620322667320884e-05,
      "loss": 0.9589,
      "step": 84950
    },
    {
      "epoch": 0.29776361858499195,
      "grad_norm": 2.96875,
      "learning_rate": 4.9626163857054406e-05,
      "loss": 0.9618,
      "step": 84960
    },
    {
      "epoch": 0.29779866609188754,
      "grad_norm": 3.0,
      "learning_rate": 4.9632005046787934e-05,
      "loss": 0.9883,
      "step": 84970
    },
    {
      "epoch": 0.29783371359878313,
      "grad_norm": 3.375,
      "learning_rate": 4.9637846236521455e-05,
      "loss": 1.0337,
      "step": 84980
    },
    {
      "epoch": 0.2978687611056787,
      "grad_norm": 3.421875,
      "learning_rate": 4.9643687426254983e-05,
      "loss": 0.9892,
      "step": 84990
    },
    {
      "epoch": 0.2979038086125743,
      "grad_norm": 2.890625,
      "learning_rate": 4.9649528615988505e-05,
      "loss": 0.9167,
      "step": 85000
    },
    {
      "epoch": 0.2979038086125743,
      "eval_loss": 0.9150681495666504,
      "eval_runtime": 555.2857,
      "eval_samples_per_second": 685.118,
      "eval_steps_per_second": 57.093,
      "step": 85000
    },
    {
      "epoch": 0.29793885611946996,
      "grad_norm": 3.5,
      "learning_rate": 4.9655369805722026e-05,
      "loss": 0.9935,
      "step": 85010
    },
    {
      "epoch": 0.29797390362636555,
      "grad_norm": 3.0625,
      "learning_rate": 4.966121099545556e-05,
      "loss": 0.9074,
      "step": 85020
    },
    {
      "epoch": 0.29800895113326115,
      "grad_norm": 3.0625,
      "learning_rate": 4.966705218518908e-05,
      "loss": 0.9248,
      "step": 85030
    },
    {
      "epoch": 0.29804399864015674,
      "grad_norm": 2.90625,
      "learning_rate": 4.967289337492261e-05,
      "loss": 0.9846,
      "step": 85040
    },
    {
      "epoch": 0.29807904614705233,
      "grad_norm": 3.234375,
      "learning_rate": 4.967873456465613e-05,
      "loss": 1.0168,
      "step": 85050
    },
    {
      "epoch": 0.2981140936539479,
      "grad_norm": 2.796875,
      "learning_rate": 4.9684575754389654e-05,
      "loss": 1.0115,
      "step": 85060
    },
    {
      "epoch": 0.2981491411608435,
      "grad_norm": 3.609375,
      "learning_rate": 4.969041694412318e-05,
      "loss": 0.9863,
      "step": 85070
    },
    {
      "epoch": 0.2981841886677391,
      "grad_norm": 2.875,
      "learning_rate": 4.9696258133856704e-05,
      "loss": 0.9119,
      "step": 85080
    },
    {
      "epoch": 0.2982192361746347,
      "grad_norm": 3.40625,
      "learning_rate": 4.970209932359023e-05,
      "loss": 1.0337,
      "step": 85090
    },
    {
      "epoch": 0.29825428368153034,
      "grad_norm": 3.265625,
      "learning_rate": 4.970794051332376e-05,
      "loss": 0.9497,
      "step": 85100
    },
    {
      "epoch": 0.29828933118842593,
      "grad_norm": 2.796875,
      "learning_rate": 4.971378170305728e-05,
      "loss": 0.9743,
      "step": 85110
    },
    {
      "epoch": 0.2983243786953215,
      "grad_norm": 2.796875,
      "learning_rate": 4.971962289279081e-05,
      "loss": 1.0244,
      "step": 85120
    },
    {
      "epoch": 0.2983594262022171,
      "grad_norm": 3.171875,
      "learning_rate": 4.972546408252433e-05,
      "loss": 1.0578,
      "step": 85130
    },
    {
      "epoch": 0.2983944737091127,
      "grad_norm": 2.703125,
      "learning_rate": 4.973130527225785e-05,
      "loss": 0.9786,
      "step": 85140
    },
    {
      "epoch": 0.2984295212160083,
      "grad_norm": 3.515625,
      "learning_rate": 4.973714646199138e-05,
      "loss": 0.9594,
      "step": 85150
    },
    {
      "epoch": 0.2984645687229039,
      "grad_norm": 3.46875,
      "learning_rate": 4.97429876517249e-05,
      "loss": 1.0148,
      "step": 85160
    },
    {
      "epoch": 0.2984996162297995,
      "grad_norm": 2.84375,
      "learning_rate": 4.974882884145843e-05,
      "loss": 0.994,
      "step": 85170
    },
    {
      "epoch": 0.29853466373669507,
      "grad_norm": 3.265625,
      "learning_rate": 4.975467003119196e-05,
      "loss": 0.9482,
      "step": 85180
    },
    {
      "epoch": 0.29856971124359066,
      "grad_norm": 3.1875,
      "learning_rate": 4.976051122092548e-05,
      "loss": 0.9596,
      "step": 85190
    },
    {
      "epoch": 0.2986047587504863,
      "grad_norm": 3.046875,
      "learning_rate": 4.976635241065901e-05,
      "loss": 0.9103,
      "step": 85200
    },
    {
      "epoch": 0.2986398062573819,
      "grad_norm": 3.515625,
      "learning_rate": 4.977219360039253e-05,
      "loss": 0.9056,
      "step": 85210
    },
    {
      "epoch": 0.2986748537642775,
      "grad_norm": 3.4375,
      "learning_rate": 4.977803479012605e-05,
      "loss": 1.0565,
      "step": 85220
    },
    {
      "epoch": 0.2987099012711731,
      "grad_norm": 2.953125,
      "learning_rate": 4.978387597985958e-05,
      "loss": 0.9497,
      "step": 85230
    },
    {
      "epoch": 0.2987449487780687,
      "grad_norm": 3.34375,
      "learning_rate": 4.97897171695931e-05,
      "loss": 0.9096,
      "step": 85240
    },
    {
      "epoch": 0.29877999628496427,
      "grad_norm": 2.828125,
      "learning_rate": 4.979555835932663e-05,
      "loss": 0.969,
      "step": 85250
    },
    {
      "epoch": 0.29881504379185986,
      "grad_norm": 3.046875,
      "learning_rate": 4.980139954906016e-05,
      "loss": 0.9605,
      "step": 85260
    },
    {
      "epoch": 0.29885009129875545,
      "grad_norm": 3.078125,
      "learning_rate": 4.980724073879368e-05,
      "loss": 0.9375,
      "step": 85270
    },
    {
      "epoch": 0.29888513880565104,
      "grad_norm": 2.78125,
      "learning_rate": 4.981308192852721e-05,
      "loss": 0.9848,
      "step": 85280
    },
    {
      "epoch": 0.29892018631254663,
      "grad_norm": 3.28125,
      "learning_rate": 4.981892311826073e-05,
      "loss": 0.9857,
      "step": 85290
    },
    {
      "epoch": 0.2989552338194423,
      "grad_norm": 3.3125,
      "learning_rate": 4.982476430799426e-05,
      "loss": 1.0519,
      "step": 85300
    },
    {
      "epoch": 0.29899028132633787,
      "grad_norm": 2.953125,
      "learning_rate": 4.983060549772778e-05,
      "loss": 0.9388,
      "step": 85310
    },
    {
      "epoch": 0.29902532883323346,
      "grad_norm": 3.484375,
      "learning_rate": 4.98364466874613e-05,
      "loss": 0.9189,
      "step": 85320
    },
    {
      "epoch": 0.29906037634012905,
      "grad_norm": 3.34375,
      "learning_rate": 4.9842287877194835e-05,
      "loss": 0.9469,
      "step": 85330
    },
    {
      "epoch": 0.29909542384702464,
      "grad_norm": 2.859375,
      "learning_rate": 4.9848129066928356e-05,
      "loss": 1.0074,
      "step": 85340
    },
    {
      "epoch": 0.29913047135392024,
      "grad_norm": 2.625,
      "learning_rate": 4.985397025666188e-05,
      "loss": 0.9161,
      "step": 85350
    },
    {
      "epoch": 0.2991655188608158,
      "grad_norm": 3.09375,
      "learning_rate": 4.9859811446395406e-05,
      "loss": 1.1018,
      "step": 85360
    },
    {
      "epoch": 0.2992005663677114,
      "grad_norm": 3.109375,
      "learning_rate": 4.986565263612893e-05,
      "loss": 1.0521,
      "step": 85370
    },
    {
      "epoch": 0.299235613874607,
      "grad_norm": 2.765625,
      "learning_rate": 4.9871493825862456e-05,
      "loss": 0.9805,
      "step": 85380
    },
    {
      "epoch": 0.2992706613815026,
      "grad_norm": 3.203125,
      "learning_rate": 4.987733501559598e-05,
      "loss": 0.9749,
      "step": 85390
    },
    {
      "epoch": 0.29930570888839825,
      "grad_norm": 3.25,
      "learning_rate": 4.98831762053295e-05,
      "loss": 1.0044,
      "step": 85400
    },
    {
      "epoch": 0.29934075639529384,
      "grad_norm": 3.1875,
      "learning_rate": 4.9889017395063034e-05,
      "loss": 1.0374,
      "step": 85410
    },
    {
      "epoch": 0.29937580390218943,
      "grad_norm": 3.125,
      "learning_rate": 4.9894858584796555e-05,
      "loss": 0.9955,
      "step": 85420
    },
    {
      "epoch": 0.299410851409085,
      "grad_norm": 2.84375,
      "learning_rate": 4.990069977453008e-05,
      "loss": 0.9599,
      "step": 85430
    },
    {
      "epoch": 0.2994458989159806,
      "grad_norm": 2.984375,
      "learning_rate": 4.9906540964263605e-05,
      "loss": 0.9974,
      "step": 85440
    },
    {
      "epoch": 0.2994809464228762,
      "grad_norm": 2.859375,
      "learning_rate": 4.9912382153997126e-05,
      "loss": 1.0179,
      "step": 85450
    },
    {
      "epoch": 0.2995159939297718,
      "grad_norm": 2.859375,
      "learning_rate": 4.9918223343730655e-05,
      "loss": 0.8574,
      "step": 85460
    },
    {
      "epoch": 0.2995510414366674,
      "grad_norm": 3.1875,
      "learning_rate": 4.9924064533464176e-05,
      "loss": 1.0412,
      "step": 85470
    },
    {
      "epoch": 0.299586088943563,
      "grad_norm": 2.78125,
      "learning_rate": 4.99299057231977e-05,
      "loss": 1.0061,
      "step": 85480
    },
    {
      "epoch": 0.29962113645045857,
      "grad_norm": 3.0,
      "learning_rate": 4.993574691293123e-05,
      "loss": 0.9123,
      "step": 85490
    },
    {
      "epoch": 0.2996561839573542,
      "grad_norm": 2.90625,
      "learning_rate": 4.9941588102664754e-05,
      "loss": 1.0526,
      "step": 85500
    },
    {
      "epoch": 0.2996912314642498,
      "grad_norm": 3.265625,
      "learning_rate": 4.994742929239828e-05,
      "loss": 1.0339,
      "step": 85510
    },
    {
      "epoch": 0.2997262789711454,
      "grad_norm": 3.234375,
      "learning_rate": 4.9953270482131804e-05,
      "loss": 0.9344,
      "step": 85520
    },
    {
      "epoch": 0.299761326478041,
      "grad_norm": 3.1875,
      "learning_rate": 4.9959111671865325e-05,
      "loss": 0.9182,
      "step": 85530
    },
    {
      "epoch": 0.2997963739849366,
      "grad_norm": 3.375,
      "learning_rate": 4.996495286159885e-05,
      "loss": 0.9493,
      "step": 85540
    },
    {
      "epoch": 0.2998314214918322,
      "grad_norm": 3.28125,
      "learning_rate": 4.9970794051332375e-05,
      "loss": 1.015,
      "step": 85550
    },
    {
      "epoch": 0.29986646899872776,
      "grad_norm": 3.296875,
      "learning_rate": 4.99766352410659e-05,
      "loss": 1.0292,
      "step": 85560
    },
    {
      "epoch": 0.29990151650562336,
      "grad_norm": 3.171875,
      "learning_rate": 4.998247643079943e-05,
      "loss": 0.9511,
      "step": 85570
    },
    {
      "epoch": 0.29993656401251895,
      "grad_norm": 3.421875,
      "learning_rate": 4.998831762053295e-05,
      "loss": 1.0409,
      "step": 85580
    },
    {
      "epoch": 0.2999716115194146,
      "grad_norm": 2.96875,
      "learning_rate": 4.999415881026648e-05,
      "loss": 0.9348,
      "step": 85590
    },
    {
      "epoch": 0.3000066590263102,
      "grad_norm": 3.328125,
      "learning_rate": 5e-05,
      "loss": 0.9729,
      "step": 85600
    },
    {
      "epoch": 0.3000417065332058,
      "grad_norm": 4.40625,
      "learning_rate": 4.9999350971336304e-05,
      "loss": 1.0503,
      "step": 85610
    },
    {
      "epoch": 0.30007675404010137,
      "grad_norm": 3.328125,
      "learning_rate": 4.99987019426726e-05,
      "loss": 0.9708,
      "step": 85620
    },
    {
      "epoch": 0.30011180154699696,
      "grad_norm": 3.03125,
      "learning_rate": 4.99980529140089e-05,
      "loss": 0.9999,
      "step": 85630
    },
    {
      "epoch": 0.30014684905389255,
      "grad_norm": 3.046875,
      "learning_rate": 4.9997403885345194e-05,
      "loss": 1.0124,
      "step": 85640
    },
    {
      "epoch": 0.30018189656078814,
      "grad_norm": 2.859375,
      "learning_rate": 4.9996754856681496e-05,
      "loss": 1.0136,
      "step": 85650
    },
    {
      "epoch": 0.30021694406768373,
      "grad_norm": 3.109375,
      "learning_rate": 4.999610582801779e-05,
      "loss": 0.9364,
      "step": 85660
    },
    {
      "epoch": 0.3002519915745793,
      "grad_norm": 3.546875,
      "learning_rate": 4.999545679935409e-05,
      "loss": 0.9899,
      "step": 85670
    },
    {
      "epoch": 0.3002870390814749,
      "grad_norm": 2.78125,
      "learning_rate": 4.9994807770690386e-05,
      "loss": 0.9205,
      "step": 85680
    },
    {
      "epoch": 0.30032208658837056,
      "grad_norm": 2.984375,
      "learning_rate": 4.999415874202669e-05,
      "loss": 0.9655,
      "step": 85690
    },
    {
      "epoch": 0.30035713409526615,
      "grad_norm": 3.203125,
      "learning_rate": 4.999350971336298e-05,
      "loss": 1.018,
      "step": 85700
    },
    {
      "epoch": 0.30039218160216175,
      "grad_norm": 2.875,
      "learning_rate": 4.9992860684699284e-05,
      "loss": 0.9924,
      "step": 85710
    },
    {
      "epoch": 0.30042722910905734,
      "grad_norm": 3.3125,
      "learning_rate": 4.999221165603558e-05,
      "loss": 1.0021,
      "step": 85720
    },
    {
      "epoch": 0.30046227661595293,
      "grad_norm": 2.6875,
      "learning_rate": 4.999156262737188e-05,
      "loss": 0.9743,
      "step": 85730
    },
    {
      "epoch": 0.3004973241228485,
      "grad_norm": 3.34375,
      "learning_rate": 4.999091359870818e-05,
      "loss": 0.9952,
      "step": 85740
    },
    {
      "epoch": 0.3005323716297441,
      "grad_norm": 3.53125,
      "learning_rate": 4.9990264570044476e-05,
      "loss": 0.9675,
      "step": 85750
    },
    {
      "epoch": 0.3005674191366397,
      "grad_norm": 3.0625,
      "learning_rate": 4.998961554138078e-05,
      "loss": 0.9111,
      "step": 85760
    },
    {
      "epoch": 0.3006024666435353,
      "grad_norm": 3.125,
      "learning_rate": 4.998896651271707e-05,
      "loss": 0.9933,
      "step": 85770
    },
    {
      "epoch": 0.3006375141504309,
      "grad_norm": 2.984375,
      "learning_rate": 4.998831748405337e-05,
      "loss": 0.9335,
      "step": 85780
    },
    {
      "epoch": 0.30067256165732653,
      "grad_norm": 2.78125,
      "learning_rate": 4.998766845538966e-05,
      "loss": 0.9865,
      "step": 85790
    },
    {
      "epoch": 0.3007076091642221,
      "grad_norm": 3.046875,
      "learning_rate": 4.998701942672596e-05,
      "loss": 0.9936,
      "step": 85800
    },
    {
      "epoch": 0.3007426566711177,
      "grad_norm": 2.890625,
      "learning_rate": 4.998637039806226e-05,
      "loss": 0.9632,
      "step": 85810
    },
    {
      "epoch": 0.3007777041780133,
      "grad_norm": 3.046875,
      "learning_rate": 4.998572136939856e-05,
      "loss": 1.0133,
      "step": 85820
    },
    {
      "epoch": 0.3008127516849089,
      "grad_norm": 2.796875,
      "learning_rate": 4.998507234073486e-05,
      "loss": 0.9659,
      "step": 85830
    },
    {
      "epoch": 0.3008477991918045,
      "grad_norm": 2.890625,
      "learning_rate": 4.9984423312071154e-05,
      "loss": 0.9616,
      "step": 85840
    },
    {
      "epoch": 0.3008828466987001,
      "grad_norm": 3.296875,
      "learning_rate": 4.9983774283407456e-05,
      "loss": 0.9983,
      "step": 85850
    },
    {
      "epoch": 0.30091789420559567,
      "grad_norm": 2.890625,
      "learning_rate": 4.998312525474375e-05,
      "loss": 1.0018,
      "step": 85860
    },
    {
      "epoch": 0.30095294171249126,
      "grad_norm": 3.265625,
      "learning_rate": 4.998247622608005e-05,
      "loss": 0.9867,
      "step": 85870
    },
    {
      "epoch": 0.30098798921938685,
      "grad_norm": 3.296875,
      "learning_rate": 4.9981827197416346e-05,
      "loss": 1.02,
      "step": 85880
    },
    {
      "epoch": 0.3010230367262825,
      "grad_norm": 3.03125,
      "learning_rate": 4.998117816875265e-05,
      "loss": 1.0918,
      "step": 85890
    },
    {
      "epoch": 0.3010580842331781,
      "grad_norm": 3.484375,
      "learning_rate": 4.998052914008894e-05,
      "loss": 0.972,
      "step": 85900
    },
    {
      "epoch": 0.3010931317400737,
      "grad_norm": 3.3125,
      "learning_rate": 4.9979880111425244e-05,
      "loss": 1.0248,
      "step": 85910
    },
    {
      "epoch": 0.3011281792469693,
      "grad_norm": 3.171875,
      "learning_rate": 4.997923108276154e-05,
      "loss": 0.9343,
      "step": 85920
    },
    {
      "epoch": 0.30116322675386487,
      "grad_norm": 2.859375,
      "learning_rate": 4.997858205409784e-05,
      "loss": 0.9773,
      "step": 85930
    },
    {
      "epoch": 0.30119827426076046,
      "grad_norm": 3.140625,
      "learning_rate": 4.9977933025434134e-05,
      "loss": 0.8676,
      "step": 85940
    },
    {
      "epoch": 0.30123332176765605,
      "grad_norm": 2.8125,
      "learning_rate": 4.9977283996770436e-05,
      "loss": 0.9823,
      "step": 85950
    },
    {
      "epoch": 0.30126836927455164,
      "grad_norm": 2.78125,
      "learning_rate": 4.997663496810673e-05,
      "loss": 0.9745,
      "step": 85960
    },
    {
      "epoch": 0.30130341678144723,
      "grad_norm": 3.1875,
      "learning_rate": 4.997598593944303e-05,
      "loss": 1.0221,
      "step": 85970
    },
    {
      "epoch": 0.3013384642883428,
      "grad_norm": 3.15625,
      "learning_rate": 4.997533691077933e-05,
      "loss": 0.9411,
      "step": 85980
    },
    {
      "epoch": 0.30137351179523847,
      "grad_norm": 2.546875,
      "learning_rate": 4.997468788211563e-05,
      "loss": 0.9901,
      "step": 85990
    },
    {
      "epoch": 0.30140855930213406,
      "grad_norm": 3.140625,
      "learning_rate": 4.997403885345193e-05,
      "loss": 0.9793,
      "step": 86000
    },
    {
      "epoch": 0.30144360680902965,
      "grad_norm": 3.765625,
      "learning_rate": 4.9973389824788224e-05,
      "loss": 1.0324,
      "step": 86010
    },
    {
      "epoch": 0.30147865431592524,
      "grad_norm": 3.25,
      "learning_rate": 4.9972740796124525e-05,
      "loss": 0.9527,
      "step": 86020
    },
    {
      "epoch": 0.30151370182282083,
      "grad_norm": 3.21875,
      "learning_rate": 4.997209176746082e-05,
      "loss": 0.9748,
      "step": 86030
    },
    {
      "epoch": 0.3015487493297164,
      "grad_norm": 2.734375,
      "learning_rate": 4.997144273879712e-05,
      "loss": 0.8956,
      "step": 86040
    },
    {
      "epoch": 0.301583796836612,
      "grad_norm": 3.09375,
      "learning_rate": 4.9970793710133416e-05,
      "loss": 0.9852,
      "step": 86050
    },
    {
      "epoch": 0.3016188443435076,
      "grad_norm": 2.75,
      "learning_rate": 4.997014468146972e-05,
      "loss": 1.0065,
      "step": 86060
    },
    {
      "epoch": 0.3016538918504032,
      "grad_norm": 2.90625,
      "learning_rate": 4.996949565280601e-05,
      "loss": 0.958,
      "step": 86070
    },
    {
      "epoch": 0.3016889393572988,
      "grad_norm": 2.75,
      "learning_rate": 4.996884662414231e-05,
      "loss": 0.9949,
      "step": 86080
    },
    {
      "epoch": 0.30172398686419444,
      "grad_norm": 3.109375,
      "learning_rate": 4.996819759547861e-05,
      "loss": 1.0776,
      "step": 86090
    },
    {
      "epoch": 0.30175903437109003,
      "grad_norm": 3.03125,
      "learning_rate": 4.996754856681491e-05,
      "loss": 0.9298,
      "step": 86100
    },
    {
      "epoch": 0.3017940818779856,
      "grad_norm": 3.046875,
      "learning_rate": 4.996689953815121e-05,
      "loss": 0.9379,
      "step": 86110
    },
    {
      "epoch": 0.3018291293848812,
      "grad_norm": 3.03125,
      "learning_rate": 4.9966250509487505e-05,
      "loss": 0.9778,
      "step": 86120
    },
    {
      "epoch": 0.3018641768917768,
      "grad_norm": 3.015625,
      "learning_rate": 4.9965601480823807e-05,
      "loss": 1.0184,
      "step": 86130
    },
    {
      "epoch": 0.3018992243986724,
      "grad_norm": 2.625,
      "learning_rate": 4.99649524521601e-05,
      "loss": 0.9236,
      "step": 86140
    },
    {
      "epoch": 0.301934271905568,
      "grad_norm": 3.0,
      "learning_rate": 4.99643034234964e-05,
      "loss": 0.9745,
      "step": 86150
    },
    {
      "epoch": 0.3019693194124636,
      "grad_norm": 3.28125,
      "learning_rate": 4.99636543948327e-05,
      "loss": 0.9006,
      "step": 86160
    },
    {
      "epoch": 0.30200436691935917,
      "grad_norm": 3.171875,
      "learning_rate": 4.996300536616899e-05,
      "loss": 0.9316,
      "step": 86170
    },
    {
      "epoch": 0.3020394144262548,
      "grad_norm": 3.109375,
      "learning_rate": 4.9962356337505286e-05,
      "loss": 0.9473,
      "step": 86180
    },
    {
      "epoch": 0.3020744619331504,
      "grad_norm": 3.421875,
      "learning_rate": 4.996170730884159e-05,
      "loss": 0.9817,
      "step": 86190
    },
    {
      "epoch": 0.302109509440046,
      "grad_norm": 2.671875,
      "learning_rate": 4.996105828017789e-05,
      "loss": 0.9423,
      "step": 86200
    },
    {
      "epoch": 0.3021445569469416,
      "grad_norm": 3.09375,
      "learning_rate": 4.9960409251514184e-05,
      "loss": 1.0179,
      "step": 86210
    },
    {
      "epoch": 0.3021796044538372,
      "grad_norm": 2.953125,
      "learning_rate": 4.9959760222850485e-05,
      "loss": 0.9889,
      "step": 86220
    },
    {
      "epoch": 0.30221465196073277,
      "grad_norm": 3.171875,
      "learning_rate": 4.995911119418678e-05,
      "loss": 0.9426,
      "step": 86230
    },
    {
      "epoch": 0.30224969946762836,
      "grad_norm": 3.015625,
      "learning_rate": 4.995846216552308e-05,
      "loss": 1.0719,
      "step": 86240
    },
    {
      "epoch": 0.30228474697452395,
      "grad_norm": 2.875,
      "learning_rate": 4.9957813136859376e-05,
      "loss": 0.8944,
      "step": 86250
    },
    {
      "epoch": 0.30231979448141955,
      "grad_norm": 2.703125,
      "learning_rate": 4.995716410819568e-05,
      "loss": 0.9568,
      "step": 86260
    },
    {
      "epoch": 0.30235484198831514,
      "grad_norm": 3.21875,
      "learning_rate": 4.995651507953197e-05,
      "loss": 0.9316,
      "step": 86270
    },
    {
      "epoch": 0.3023898894952108,
      "grad_norm": 2.71875,
      "learning_rate": 4.995586605086827e-05,
      "loss": 0.9903,
      "step": 86280
    },
    {
      "epoch": 0.3024249370021064,
      "grad_norm": 2.90625,
      "learning_rate": 4.995521702220457e-05,
      "loss": 1.0326,
      "step": 86290
    },
    {
      "epoch": 0.30245998450900197,
      "grad_norm": 2.984375,
      "learning_rate": 4.995456799354087e-05,
      "loss": 1.07,
      "step": 86300
    },
    {
      "epoch": 0.30249503201589756,
      "grad_norm": 3.28125,
      "learning_rate": 4.9953918964877164e-05,
      "loss": 1.0426,
      "step": 86310
    },
    {
      "epoch": 0.30253007952279315,
      "grad_norm": 3.0,
      "learning_rate": 4.9953269936213465e-05,
      "loss": 1.0293,
      "step": 86320
    },
    {
      "epoch": 0.30256512702968874,
      "grad_norm": 3.0,
      "learning_rate": 4.995262090754976e-05,
      "loss": 0.8714,
      "step": 86330
    },
    {
      "epoch": 0.30260017453658433,
      "grad_norm": 2.765625,
      "learning_rate": 4.995197187888606e-05,
      "loss": 0.9639,
      "step": 86340
    },
    {
      "epoch": 0.3026352220434799,
      "grad_norm": 2.984375,
      "learning_rate": 4.995132285022236e-05,
      "loss": 0.9082,
      "step": 86350
    },
    {
      "epoch": 0.3026702695503755,
      "grad_norm": 3.09375,
      "learning_rate": 4.995067382155866e-05,
      "loss": 0.9469,
      "step": 86360
    },
    {
      "epoch": 0.3027053170572711,
      "grad_norm": 3.046875,
      "learning_rate": 4.995002479289496e-05,
      "loss": 0.9913,
      "step": 86370
    },
    {
      "epoch": 0.30274036456416675,
      "grad_norm": 2.96875,
      "learning_rate": 4.994937576423125e-05,
      "loss": 0.854,
      "step": 86380
    },
    {
      "epoch": 0.30277541207106234,
      "grad_norm": 2.90625,
      "learning_rate": 4.9948726735567555e-05,
      "loss": 0.946,
      "step": 86390
    },
    {
      "epoch": 0.30281045957795794,
      "grad_norm": 2.890625,
      "learning_rate": 4.994807770690385e-05,
      "loss": 0.9276,
      "step": 86400
    },
    {
      "epoch": 0.3028455070848535,
      "grad_norm": 3.359375,
      "learning_rate": 4.994742867824015e-05,
      "loss": 0.9884,
      "step": 86410
    },
    {
      "epoch": 0.3028805545917491,
      "grad_norm": 3.0625,
      "learning_rate": 4.9946779649576445e-05,
      "loss": 1.0089,
      "step": 86420
    },
    {
      "epoch": 0.3029156020986447,
      "grad_norm": 2.75,
      "learning_rate": 4.9946130620912747e-05,
      "loss": 0.9447,
      "step": 86430
    },
    {
      "epoch": 0.3029506496055403,
      "grad_norm": 2.90625,
      "learning_rate": 4.994548159224904e-05,
      "loss": 1.0638,
      "step": 86440
    },
    {
      "epoch": 0.3029856971124359,
      "grad_norm": 3.296875,
      "learning_rate": 4.994483256358534e-05,
      "loss": 1.1,
      "step": 86450
    },
    {
      "epoch": 0.3030207446193315,
      "grad_norm": 2.984375,
      "learning_rate": 4.994418353492164e-05,
      "loss": 0.9035,
      "step": 86460
    },
    {
      "epoch": 0.3030557921262271,
      "grad_norm": 3.25,
      "learning_rate": 4.994353450625794e-05,
      "loss": 1.0364,
      "step": 86470
    },
    {
      "epoch": 0.3030908396331227,
      "grad_norm": 3.0,
      "learning_rate": 4.994288547759424e-05,
      "loss": 1.0015,
      "step": 86480
    },
    {
      "epoch": 0.3031258871400183,
      "grad_norm": 2.84375,
      "learning_rate": 4.9942236448930535e-05,
      "loss": 0.9145,
      "step": 86490
    },
    {
      "epoch": 0.3031609346469139,
      "grad_norm": 3.203125,
      "learning_rate": 4.9941587420266836e-05,
      "loss": 0.9887,
      "step": 86500
    },
    {
      "epoch": 0.3031959821538095,
      "grad_norm": 3.5,
      "learning_rate": 4.994093839160313e-05,
      "loss": 1.0558,
      "step": 86510
    },
    {
      "epoch": 0.3032310296607051,
      "grad_norm": 3.28125,
      "learning_rate": 4.994028936293943e-05,
      "loss": 1.0127,
      "step": 86520
    },
    {
      "epoch": 0.3032660771676007,
      "grad_norm": 3.015625,
      "learning_rate": 4.9939640334275727e-05,
      "loss": 0.9575,
      "step": 86530
    },
    {
      "epoch": 0.30330112467449627,
      "grad_norm": 2.859375,
      "learning_rate": 4.993899130561203e-05,
      "loss": 1.0013,
      "step": 86540
    },
    {
      "epoch": 0.30333617218139186,
      "grad_norm": 3.4375,
      "learning_rate": 4.9938342276948316e-05,
      "loss": 1.0493,
      "step": 86550
    },
    {
      "epoch": 0.30337121968828745,
      "grad_norm": 3.203125,
      "learning_rate": 4.993769324828462e-05,
      "loss": 0.9937,
      "step": 86560
    },
    {
      "epoch": 0.30340626719518304,
      "grad_norm": 3.734375,
      "learning_rate": 4.993704421962092e-05,
      "loss": 1.0333,
      "step": 86570
    },
    {
      "epoch": 0.3034413147020787,
      "grad_norm": 3.203125,
      "learning_rate": 4.993639519095721e-05,
      "loss": 1.0842,
      "step": 86580
    },
    {
      "epoch": 0.3034763622089743,
      "grad_norm": 3.015625,
      "learning_rate": 4.9935746162293515e-05,
      "loss": 0.9408,
      "step": 86590
    },
    {
      "epoch": 0.3035114097158699,
      "grad_norm": 2.671875,
      "learning_rate": 4.993509713362981e-05,
      "loss": 0.9749,
      "step": 86600
    },
    {
      "epoch": 0.30354645722276546,
      "grad_norm": 2.734375,
      "learning_rate": 4.993444810496611e-05,
      "loss": 0.9487,
      "step": 86610
    },
    {
      "epoch": 0.30358150472966106,
      "grad_norm": 3.1875,
      "learning_rate": 4.9933799076302405e-05,
      "loss": 1.0624,
      "step": 86620
    },
    {
      "epoch": 0.30361655223655665,
      "grad_norm": 3.59375,
      "learning_rate": 4.9933150047638707e-05,
      "loss": 1.0788,
      "step": 86630
    },
    {
      "epoch": 0.30365159974345224,
      "grad_norm": 2.6875,
      "learning_rate": 4.9932501018975e-05,
      "loss": 1.0129,
      "step": 86640
    },
    {
      "epoch": 0.30368664725034783,
      "grad_norm": 3.09375,
      "learning_rate": 4.99318519903113e-05,
      "loss": 0.9446,
      "step": 86650
    },
    {
      "epoch": 0.3037216947572434,
      "grad_norm": 3.5,
      "learning_rate": 4.99312029616476e-05,
      "loss": 0.8685,
      "step": 86660
    },
    {
      "epoch": 0.30375674226413907,
      "grad_norm": 3.203125,
      "learning_rate": 4.99305539329839e-05,
      "loss": 0.9563,
      "step": 86670
    },
    {
      "epoch": 0.30379178977103466,
      "grad_norm": 3.296875,
      "learning_rate": 4.992990490432019e-05,
      "loss": 0.9663,
      "step": 86680
    },
    {
      "epoch": 0.30382683727793025,
      "grad_norm": 3.25,
      "learning_rate": 4.9929255875656495e-05,
      "loss": 0.9264,
      "step": 86690
    },
    {
      "epoch": 0.30386188478482584,
      "grad_norm": 2.8125,
      "learning_rate": 4.9928606846992796e-05,
      "loss": 0.9427,
      "step": 86700
    },
    {
      "epoch": 0.30389693229172143,
      "grad_norm": 3.078125,
      "learning_rate": 4.992795781832909e-05,
      "loss": 0.8685,
      "step": 86710
    },
    {
      "epoch": 0.303931979798617,
      "grad_norm": 3.171875,
      "learning_rate": 4.992730878966539e-05,
      "loss": 0.986,
      "step": 86720
    },
    {
      "epoch": 0.3039670273055126,
      "grad_norm": 2.984375,
      "learning_rate": 4.9926659761001687e-05,
      "loss": 1.0234,
      "step": 86730
    },
    {
      "epoch": 0.3040020748124082,
      "grad_norm": 3.125,
      "learning_rate": 4.992601073233799e-05,
      "loss": 0.9203,
      "step": 86740
    },
    {
      "epoch": 0.3040371223193038,
      "grad_norm": 3.171875,
      "learning_rate": 4.992536170367428e-05,
      "loss": 1.0527,
      "step": 86750
    },
    {
      "epoch": 0.3040721698261994,
      "grad_norm": 3.25,
      "learning_rate": 4.9924712675010584e-05,
      "loss": 1.0289,
      "step": 86760
    },
    {
      "epoch": 0.30410721733309504,
      "grad_norm": 3.5,
      "learning_rate": 4.992406364634688e-05,
      "loss": 1.0702,
      "step": 86770
    },
    {
      "epoch": 0.30414226483999063,
      "grad_norm": 3.15625,
      "learning_rate": 4.992341461768318e-05,
      "loss": 1.0123,
      "step": 86780
    },
    {
      "epoch": 0.3041773123468862,
      "grad_norm": 2.890625,
      "learning_rate": 4.9922765589019475e-05,
      "loss": 0.9361,
      "step": 86790
    },
    {
      "epoch": 0.3042123598537818,
      "grad_norm": 3.140625,
      "learning_rate": 4.9922116560355776e-05,
      "loss": 0.9401,
      "step": 86800
    },
    {
      "epoch": 0.3042474073606774,
      "grad_norm": 3.09375,
      "learning_rate": 4.992146753169207e-05,
      "loss": 0.9241,
      "step": 86810
    },
    {
      "epoch": 0.304282454867573,
      "grad_norm": 2.5,
      "learning_rate": 4.992081850302837e-05,
      "loss": 0.9006,
      "step": 86820
    },
    {
      "epoch": 0.3043175023744686,
      "grad_norm": 3.328125,
      "learning_rate": 4.9920169474364667e-05,
      "loss": 0.9707,
      "step": 86830
    },
    {
      "epoch": 0.3043525498813642,
      "grad_norm": 3.1875,
      "learning_rate": 4.991952044570097e-05,
      "loss": 0.9066,
      "step": 86840
    },
    {
      "epoch": 0.30438759738825977,
      "grad_norm": 3.15625,
      "learning_rate": 4.991887141703727e-05,
      "loss": 0.9201,
      "step": 86850
    },
    {
      "epoch": 0.30442264489515536,
      "grad_norm": 3.25,
      "learning_rate": 4.9918222388373564e-05,
      "loss": 0.9363,
      "step": 86860
    },
    {
      "epoch": 0.304457692402051,
      "grad_norm": 2.90625,
      "learning_rate": 4.9917573359709865e-05,
      "loss": 0.939,
      "step": 86870
    },
    {
      "epoch": 0.3044927399089466,
      "grad_norm": 3.046875,
      "learning_rate": 4.991692433104616e-05,
      "loss": 0.9915,
      "step": 86880
    },
    {
      "epoch": 0.3045277874158422,
      "grad_norm": 2.734375,
      "learning_rate": 4.991627530238246e-05,
      "loss": 0.8757,
      "step": 86890
    },
    {
      "epoch": 0.3045628349227378,
      "grad_norm": 2.828125,
      "learning_rate": 4.9915626273718756e-05,
      "loss": 0.9983,
      "step": 86900
    },
    {
      "epoch": 0.30459788242963337,
      "grad_norm": 3.421875,
      "learning_rate": 4.991497724505506e-05,
      "loss": 1.0188,
      "step": 86910
    },
    {
      "epoch": 0.30463292993652896,
      "grad_norm": 2.703125,
      "learning_rate": 4.9914328216391345e-05,
      "loss": 0.9858,
      "step": 86920
    },
    {
      "epoch": 0.30466797744342455,
      "grad_norm": 3.09375,
      "learning_rate": 4.9913679187727647e-05,
      "loss": 0.9959,
      "step": 86930
    },
    {
      "epoch": 0.30470302495032014,
      "grad_norm": 3.015625,
      "learning_rate": 4.991303015906395e-05,
      "loss": 0.9085,
      "step": 86940
    },
    {
      "epoch": 0.30473807245721574,
      "grad_norm": 3.484375,
      "learning_rate": 4.991238113040024e-05,
      "loss": 1.0216,
      "step": 86950
    },
    {
      "epoch": 0.3047731199641113,
      "grad_norm": 3.4375,
      "learning_rate": 4.9911732101736544e-05,
      "loss": 0.9636,
      "step": 86960
    },
    {
      "epoch": 0.304808167471007,
      "grad_norm": 3.546875,
      "learning_rate": 4.991108307307284e-05,
      "loss": 1.0296,
      "step": 86970
    },
    {
      "epoch": 0.30484321497790257,
      "grad_norm": 3.296875,
      "learning_rate": 4.991043404440914e-05,
      "loss": 1.0202,
      "step": 86980
    },
    {
      "epoch": 0.30487826248479816,
      "grad_norm": 3.234375,
      "learning_rate": 4.9909785015745435e-05,
      "loss": 1.0586,
      "step": 86990
    },
    {
      "epoch": 0.30491330999169375,
      "grad_norm": 3.4375,
      "learning_rate": 4.9909135987081736e-05,
      "loss": 0.9714,
      "step": 87000
    },
    {
      "epoch": 0.30494835749858934,
      "grad_norm": 3.3125,
      "learning_rate": 4.990848695841803e-05,
      "loss": 0.9394,
      "step": 87010
    },
    {
      "epoch": 0.30498340500548493,
      "grad_norm": 2.953125,
      "learning_rate": 4.990783792975433e-05,
      "loss": 1.0223,
      "step": 87020
    },
    {
      "epoch": 0.3050184525123805,
      "grad_norm": 3.09375,
      "learning_rate": 4.9907188901090627e-05,
      "loss": 1.004,
      "step": 87030
    },
    {
      "epoch": 0.3050535000192761,
      "grad_norm": 3.203125,
      "learning_rate": 4.990653987242693e-05,
      "loss": 1.0301,
      "step": 87040
    },
    {
      "epoch": 0.3050885475261717,
      "grad_norm": 3.0625,
      "learning_rate": 4.990589084376322e-05,
      "loss": 0.9356,
      "step": 87050
    },
    {
      "epoch": 0.3051235950330673,
      "grad_norm": 3.0,
      "learning_rate": 4.9905241815099524e-05,
      "loss": 0.8198,
      "step": 87060
    },
    {
      "epoch": 0.30515864253996294,
      "grad_norm": 3.046875,
      "learning_rate": 4.9904592786435825e-05,
      "loss": 1.0151,
      "step": 87070
    },
    {
      "epoch": 0.30519369004685853,
      "grad_norm": 3.296875,
      "learning_rate": 4.990394375777212e-05,
      "loss": 0.9901,
      "step": 87080
    },
    {
      "epoch": 0.3052287375537541,
      "grad_norm": 2.875,
      "learning_rate": 4.990329472910842e-05,
      "loss": 0.9611,
      "step": 87090
    },
    {
      "epoch": 0.3052637850606497,
      "grad_norm": 2.9375,
      "learning_rate": 4.9902645700444716e-05,
      "loss": 1.0155,
      "step": 87100
    },
    {
      "epoch": 0.3052988325675453,
      "grad_norm": 3.046875,
      "learning_rate": 4.990199667178102e-05,
      "loss": 0.8927,
      "step": 87110
    },
    {
      "epoch": 0.3053338800744409,
      "grad_norm": 3.203125,
      "learning_rate": 4.990134764311731e-05,
      "loss": 0.9804,
      "step": 87120
    },
    {
      "epoch": 0.3053689275813365,
      "grad_norm": 3.078125,
      "learning_rate": 4.990069861445361e-05,
      "loss": 0.9441,
      "step": 87130
    },
    {
      "epoch": 0.3054039750882321,
      "grad_norm": 3.4375,
      "learning_rate": 4.990004958578991e-05,
      "loss": 0.9422,
      "step": 87140
    },
    {
      "epoch": 0.3054390225951277,
      "grad_norm": 3.65625,
      "learning_rate": 4.989940055712621e-05,
      "loss": 0.9539,
      "step": 87150
    },
    {
      "epoch": 0.30547407010202327,
      "grad_norm": 2.8125,
      "learning_rate": 4.9898751528462504e-05,
      "loss": 1.0058,
      "step": 87160
    },
    {
      "epoch": 0.3055091176089189,
      "grad_norm": 3.34375,
      "learning_rate": 4.9898102499798805e-05,
      "loss": 0.9435,
      "step": 87170
    },
    {
      "epoch": 0.3055441651158145,
      "grad_norm": 3.109375,
      "learning_rate": 4.98974534711351e-05,
      "loss": 0.9444,
      "step": 87180
    },
    {
      "epoch": 0.3055792126227101,
      "grad_norm": 3.296875,
      "learning_rate": 4.98968044424714e-05,
      "loss": 0.9703,
      "step": 87190
    },
    {
      "epoch": 0.3056142601296057,
      "grad_norm": 3.171875,
      "learning_rate": 4.9896155413807696e-05,
      "loss": 1.0608,
      "step": 87200
    },
    {
      "epoch": 0.3056493076365013,
      "grad_norm": 3.71875,
      "learning_rate": 4.9895506385144e-05,
      "loss": 0.9965,
      "step": 87210
    },
    {
      "epoch": 0.30568435514339687,
      "grad_norm": 3.09375,
      "learning_rate": 4.98948573564803e-05,
      "loss": 1.0095,
      "step": 87220
    },
    {
      "epoch": 0.30571940265029246,
      "grad_norm": 2.828125,
      "learning_rate": 4.989420832781659e-05,
      "loss": 0.9717,
      "step": 87230
    },
    {
      "epoch": 0.30575445015718805,
      "grad_norm": 2.828125,
      "learning_rate": 4.9893559299152895e-05,
      "loss": 0.914,
      "step": 87240
    },
    {
      "epoch": 0.30578949766408364,
      "grad_norm": 2.734375,
      "learning_rate": 4.989291027048919e-05,
      "loss": 0.9799,
      "step": 87250
    },
    {
      "epoch": 0.3058245451709793,
      "grad_norm": 3.34375,
      "learning_rate": 4.989226124182549e-05,
      "loss": 1.0543,
      "step": 87260
    },
    {
      "epoch": 0.3058595926778749,
      "grad_norm": 2.71875,
      "learning_rate": 4.9891612213161785e-05,
      "loss": 0.9811,
      "step": 87270
    },
    {
      "epoch": 0.30589464018477047,
      "grad_norm": 3.234375,
      "learning_rate": 4.989096318449809e-05,
      "loss": 0.9594,
      "step": 87280
    },
    {
      "epoch": 0.30592968769166606,
      "grad_norm": 3.484375,
      "learning_rate": 4.989031415583438e-05,
      "loss": 0.9629,
      "step": 87290
    },
    {
      "epoch": 0.30596473519856165,
      "grad_norm": 3.828125,
      "learning_rate": 4.9889665127170676e-05,
      "loss": 1.0149,
      "step": 87300
    },
    {
      "epoch": 0.30599978270545725,
      "grad_norm": 3.125,
      "learning_rate": 4.988901609850698e-05,
      "loss": 1.0115,
      "step": 87310
    },
    {
      "epoch": 0.30603483021235284,
      "grad_norm": 3.296875,
      "learning_rate": 4.988836706984327e-05,
      "loss": 1.0235,
      "step": 87320
    },
    {
      "epoch": 0.30606987771924843,
      "grad_norm": 2.78125,
      "learning_rate": 4.988771804117957e-05,
      "loss": 1.0035,
      "step": 87330
    },
    {
      "epoch": 0.306104925226144,
      "grad_norm": 3.3125,
      "learning_rate": 4.988706901251587e-05,
      "loss": 1.0125,
      "step": 87340
    },
    {
      "epoch": 0.3061399727330396,
      "grad_norm": 3.1875,
      "learning_rate": 4.988641998385217e-05,
      "loss": 1.0815,
      "step": 87350
    },
    {
      "epoch": 0.30617502023993526,
      "grad_norm": 2.890625,
      "learning_rate": 4.9885770955188464e-05,
      "loss": 0.9701,
      "step": 87360
    },
    {
      "epoch": 0.30621006774683085,
      "grad_norm": 2.578125,
      "learning_rate": 4.9885121926524765e-05,
      "loss": 0.9269,
      "step": 87370
    },
    {
      "epoch": 0.30624511525372644,
      "grad_norm": 3.15625,
      "learning_rate": 4.988447289786106e-05,
      "loss": 0.9522,
      "step": 87380
    },
    {
      "epoch": 0.30628016276062203,
      "grad_norm": 3.265625,
      "learning_rate": 4.988382386919736e-05,
      "loss": 1.019,
      "step": 87390
    },
    {
      "epoch": 0.3063152102675176,
      "grad_norm": 3.265625,
      "learning_rate": 4.9883174840533656e-05,
      "loss": 0.9122,
      "step": 87400
    },
    {
      "epoch": 0.3063502577744132,
      "grad_norm": 2.890625,
      "learning_rate": 4.988252581186996e-05,
      "loss": 1.0009,
      "step": 87410
    },
    {
      "epoch": 0.3063853052813088,
      "grad_norm": 3.046875,
      "learning_rate": 4.988187678320625e-05,
      "loss": 0.9303,
      "step": 87420
    },
    {
      "epoch": 0.3064203527882044,
      "grad_norm": 4.0,
      "learning_rate": 4.988122775454255e-05,
      "loss": 0.9991,
      "step": 87430
    },
    {
      "epoch": 0.3064554002951,
      "grad_norm": 2.875,
      "learning_rate": 4.9880578725878855e-05,
      "loss": 0.9547,
      "step": 87440
    },
    {
      "epoch": 0.3064904478019956,
      "grad_norm": 2.984375,
      "learning_rate": 4.987992969721515e-05,
      "loss": 0.9774,
      "step": 87450
    },
    {
      "epoch": 0.3065254953088912,
      "grad_norm": 3.203125,
      "learning_rate": 4.987928066855145e-05,
      "loss": 0.952,
      "step": 87460
    },
    {
      "epoch": 0.3065605428157868,
      "grad_norm": 3.21875,
      "learning_rate": 4.9878631639887745e-05,
      "loss": 1.0153,
      "step": 87470
    },
    {
      "epoch": 0.3065955903226824,
      "grad_norm": 3.390625,
      "learning_rate": 4.987798261122405e-05,
      "loss": 0.9275,
      "step": 87480
    },
    {
      "epoch": 0.306630637829578,
      "grad_norm": 2.828125,
      "learning_rate": 4.987733358256034e-05,
      "loss": 0.9225,
      "step": 87490
    },
    {
      "epoch": 0.3066656853364736,
      "grad_norm": 2.84375,
      "learning_rate": 4.987668455389664e-05,
      "loss": 1.0252,
      "step": 87500
    },
    {
      "epoch": 0.3067007328433692,
      "grad_norm": 3.453125,
      "learning_rate": 4.987603552523294e-05,
      "loss": 1.0126,
      "step": 87510
    },
    {
      "epoch": 0.3067357803502648,
      "grad_norm": 3.203125,
      "learning_rate": 4.987538649656924e-05,
      "loss": 0.9937,
      "step": 87520
    },
    {
      "epoch": 0.30677082785716037,
      "grad_norm": 3.125,
      "learning_rate": 4.987473746790553e-05,
      "loss": 0.9768,
      "step": 87530
    },
    {
      "epoch": 0.30680587536405596,
      "grad_norm": 2.71875,
      "learning_rate": 4.9874088439241835e-05,
      "loss": 0.981,
      "step": 87540
    },
    {
      "epoch": 0.30684092287095155,
      "grad_norm": 3.171875,
      "learning_rate": 4.987343941057813e-05,
      "loss": 1.0138,
      "step": 87550
    },
    {
      "epoch": 0.3068759703778472,
      "grad_norm": 3.21875,
      "learning_rate": 4.987279038191443e-05,
      "loss": 0.9389,
      "step": 87560
    },
    {
      "epoch": 0.3069110178847428,
      "grad_norm": 3.453125,
      "learning_rate": 4.9872141353250725e-05,
      "loss": 1.0443,
      "step": 87570
    },
    {
      "epoch": 0.3069460653916384,
      "grad_norm": 3.109375,
      "learning_rate": 4.987149232458703e-05,
      "loss": 0.9659,
      "step": 87580
    },
    {
      "epoch": 0.30698111289853397,
      "grad_norm": 3.09375,
      "learning_rate": 4.987084329592333e-05,
      "loss": 0.9503,
      "step": 87590
    },
    {
      "epoch": 0.30701616040542956,
      "grad_norm": 2.90625,
      "learning_rate": 4.987019426725962e-05,
      "loss": 0.9619,
      "step": 87600
    },
    {
      "epoch": 0.30705120791232515,
      "grad_norm": 2.9375,
      "learning_rate": 4.9869545238595924e-05,
      "loss": 0.942,
      "step": 87610
    },
    {
      "epoch": 0.30708625541922074,
      "grad_norm": 3.5,
      "learning_rate": 4.986889620993222e-05,
      "loss": 1.0381,
      "step": 87620
    },
    {
      "epoch": 0.30712130292611634,
      "grad_norm": 2.9375,
      "learning_rate": 4.986824718126852e-05,
      "loss": 0.9614,
      "step": 87630
    },
    {
      "epoch": 0.3071563504330119,
      "grad_norm": 3.234375,
      "learning_rate": 4.9867598152604815e-05,
      "loss": 0.9658,
      "step": 87640
    },
    {
      "epoch": 0.3071913979399075,
      "grad_norm": 3.0,
      "learning_rate": 4.9866949123941116e-05,
      "loss": 0.9718,
      "step": 87650
    },
    {
      "epoch": 0.30722644544680316,
      "grad_norm": 2.734375,
      "learning_rate": 4.986630009527741e-05,
      "loss": 0.9095,
      "step": 87660
    },
    {
      "epoch": 0.30726149295369876,
      "grad_norm": 3.3125,
      "learning_rate": 4.9865651066613705e-05,
      "loss": 0.9525,
      "step": 87670
    },
    {
      "epoch": 0.30729654046059435,
      "grad_norm": 3.09375,
      "learning_rate": 4.986500203795001e-05,
      "loss": 0.9726,
      "step": 87680
    },
    {
      "epoch": 0.30733158796748994,
      "grad_norm": 3.4375,
      "learning_rate": 4.98643530092863e-05,
      "loss": 0.9497,
      "step": 87690
    },
    {
      "epoch": 0.30736663547438553,
      "grad_norm": 3.03125,
      "learning_rate": 4.98637039806226e-05,
      "loss": 1.0071,
      "step": 87700
    },
    {
      "epoch": 0.3074016829812811,
      "grad_norm": 3.0625,
      "learning_rate": 4.98630549519589e-05,
      "loss": 1.0396,
      "step": 87710
    },
    {
      "epoch": 0.3074367304881767,
      "grad_norm": 3.40625,
      "learning_rate": 4.98624059232952e-05,
      "loss": 1.0304,
      "step": 87720
    },
    {
      "epoch": 0.3074717779950723,
      "grad_norm": 3.390625,
      "learning_rate": 4.986175689463149e-05,
      "loss": 1.0145,
      "step": 87730
    },
    {
      "epoch": 0.3075068255019679,
      "grad_norm": 2.9375,
      "learning_rate": 4.9861107865967795e-05,
      "loss": 0.9526,
      "step": 87740
    },
    {
      "epoch": 0.30754187300886354,
      "grad_norm": 3.15625,
      "learning_rate": 4.986045883730409e-05,
      "loss": 0.9974,
      "step": 87750
    },
    {
      "epoch": 0.30757692051575913,
      "grad_norm": 3.203125,
      "learning_rate": 4.985980980864039e-05,
      "loss": 1.0615,
      "step": 87760
    },
    {
      "epoch": 0.3076119680226547,
      "grad_norm": 2.578125,
      "learning_rate": 4.9859160779976685e-05,
      "loss": 0.9139,
      "step": 87770
    },
    {
      "epoch": 0.3076470155295503,
      "grad_norm": 3.140625,
      "learning_rate": 4.985851175131299e-05,
      "loss": 0.9099,
      "step": 87780
    },
    {
      "epoch": 0.3076820630364459,
      "grad_norm": 3.203125,
      "learning_rate": 4.985786272264928e-05,
      "loss": 0.9031,
      "step": 87790
    },
    {
      "epoch": 0.3077171105433415,
      "grad_norm": 2.859375,
      "learning_rate": 4.985721369398558e-05,
      "loss": 1.0022,
      "step": 87800
    },
    {
      "epoch": 0.3077521580502371,
      "grad_norm": 2.796875,
      "learning_rate": 4.9856564665321884e-05,
      "loss": 0.9286,
      "step": 87810
    },
    {
      "epoch": 0.3077872055571327,
      "grad_norm": 3.25,
      "learning_rate": 4.985591563665818e-05,
      "loss": 1.0101,
      "step": 87820
    },
    {
      "epoch": 0.3078222530640283,
      "grad_norm": 3.25,
      "learning_rate": 4.985526660799448e-05,
      "loss": 0.9993,
      "step": 87830
    },
    {
      "epoch": 0.30785730057092386,
      "grad_norm": 3.484375,
      "learning_rate": 4.9854617579330775e-05,
      "loss": 0.9981,
      "step": 87840
    },
    {
      "epoch": 0.3078923480778195,
      "grad_norm": 3.125,
      "learning_rate": 4.9853968550667076e-05,
      "loss": 1.0471,
      "step": 87850
    },
    {
      "epoch": 0.3079273955847151,
      "grad_norm": 3.421875,
      "learning_rate": 4.985331952200337e-05,
      "loss": 0.9518,
      "step": 87860
    },
    {
      "epoch": 0.3079624430916107,
      "grad_norm": 3.359375,
      "learning_rate": 4.985267049333967e-05,
      "loss": 1.0094,
      "step": 87870
    },
    {
      "epoch": 0.3079974905985063,
      "grad_norm": 2.96875,
      "learning_rate": 4.985202146467597e-05,
      "loss": 1.0049,
      "step": 87880
    },
    {
      "epoch": 0.3080325381054019,
      "grad_norm": 2.828125,
      "learning_rate": 4.985137243601227e-05,
      "loss": 0.9454,
      "step": 87890
    },
    {
      "epoch": 0.30806758561229747,
      "grad_norm": 2.890625,
      "learning_rate": 4.985072340734856e-05,
      "loss": 0.8835,
      "step": 87900
    },
    {
      "epoch": 0.30810263311919306,
      "grad_norm": 3.46875,
      "learning_rate": 4.9850074378684864e-05,
      "loss": 1.0217,
      "step": 87910
    },
    {
      "epoch": 0.30813768062608865,
      "grad_norm": 3.484375,
      "learning_rate": 4.984942535002116e-05,
      "loss": 1.0257,
      "step": 87920
    },
    {
      "epoch": 0.30817272813298424,
      "grad_norm": 3.015625,
      "learning_rate": 4.984877632135746e-05,
      "loss": 0.9534,
      "step": 87930
    },
    {
      "epoch": 0.30820777563987983,
      "grad_norm": 3.109375,
      "learning_rate": 4.984812729269376e-05,
      "loss": 0.9941,
      "step": 87940
    },
    {
      "epoch": 0.3082428231467755,
      "grad_norm": 2.90625,
      "learning_rate": 4.9847478264030056e-05,
      "loss": 0.9564,
      "step": 87950
    },
    {
      "epoch": 0.30827787065367107,
      "grad_norm": 3.21875,
      "learning_rate": 4.984682923536636e-05,
      "loss": 0.9661,
      "step": 87960
    },
    {
      "epoch": 0.30831291816056666,
      "grad_norm": 3.203125,
      "learning_rate": 4.984618020670265e-05,
      "loss": 0.8924,
      "step": 87970
    },
    {
      "epoch": 0.30834796566746225,
      "grad_norm": 3.046875,
      "learning_rate": 4.9845531178038953e-05,
      "loss": 0.9989,
      "step": 87980
    },
    {
      "epoch": 0.30838301317435785,
      "grad_norm": 2.890625,
      "learning_rate": 4.984488214937525e-05,
      "loss": 1.0033,
      "step": 87990
    },
    {
      "epoch": 0.30841806068125344,
      "grad_norm": 3.0,
      "learning_rate": 4.984423312071155e-05,
      "loss": 1.0119,
      "step": 88000
    },
    {
      "epoch": 0.30845310818814903,
      "grad_norm": 3.171875,
      "learning_rate": 4.9843584092047844e-05,
      "loss": 0.9491,
      "step": 88010
    },
    {
      "epoch": 0.3084881556950446,
      "grad_norm": 3.0,
      "learning_rate": 4.9842935063384145e-05,
      "loss": 0.8861,
      "step": 88020
    },
    {
      "epoch": 0.3085232032019402,
      "grad_norm": 3.5625,
      "learning_rate": 4.984228603472044e-05,
      "loss": 0.9633,
      "step": 88030
    },
    {
      "epoch": 0.3085582507088358,
      "grad_norm": 3.21875,
      "learning_rate": 4.984163700605674e-05,
      "loss": 0.8904,
      "step": 88040
    },
    {
      "epoch": 0.30859329821573145,
      "grad_norm": 3.21875,
      "learning_rate": 4.9840987977393036e-05,
      "loss": 1.0582,
      "step": 88050
    },
    {
      "epoch": 0.30862834572262704,
      "grad_norm": 3.296875,
      "learning_rate": 4.984033894872933e-05,
      "loss": 0.9862,
      "step": 88060
    },
    {
      "epoch": 0.30866339322952263,
      "grad_norm": 3.5,
      "learning_rate": 4.983968992006563e-05,
      "loss": 0.9817,
      "step": 88070
    },
    {
      "epoch": 0.3086984407364182,
      "grad_norm": 3.3125,
      "learning_rate": 4.983904089140193e-05,
      "loss": 1.0357,
      "step": 88080
    },
    {
      "epoch": 0.3087334882433138,
      "grad_norm": 3.09375,
      "learning_rate": 4.983839186273823e-05,
      "loss": 0.872,
      "step": 88090
    },
    {
      "epoch": 0.3087685357502094,
      "grad_norm": 2.984375,
      "learning_rate": 4.983774283407452e-05,
      "loss": 0.957,
      "step": 88100
    },
    {
      "epoch": 0.308803583257105,
      "grad_norm": 3.015625,
      "learning_rate": 4.9837093805410824e-05,
      "loss": 1.0682,
      "step": 88110
    },
    {
      "epoch": 0.3088386307640006,
      "grad_norm": 3.0625,
      "learning_rate": 4.983644477674712e-05,
      "loss": 0.9688,
      "step": 88120
    },
    {
      "epoch": 0.3088736782708962,
      "grad_norm": 2.890625,
      "learning_rate": 4.983579574808342e-05,
      "loss": 0.9685,
      "step": 88130
    },
    {
      "epoch": 0.30890872577779177,
      "grad_norm": 2.890625,
      "learning_rate": 4.9835146719419715e-05,
      "loss": 0.9255,
      "step": 88140
    },
    {
      "epoch": 0.3089437732846874,
      "grad_norm": 3.0625,
      "learning_rate": 4.9834497690756016e-05,
      "loss": 0.9864,
      "step": 88150
    },
    {
      "epoch": 0.308978820791583,
      "grad_norm": 2.984375,
      "learning_rate": 4.983384866209231e-05,
      "loss": 0.9711,
      "step": 88160
    },
    {
      "epoch": 0.3090138682984786,
      "grad_norm": 2.984375,
      "learning_rate": 4.983319963342861e-05,
      "loss": 0.9911,
      "step": 88170
    },
    {
      "epoch": 0.3090489158053742,
      "grad_norm": 2.765625,
      "learning_rate": 4.9832550604764913e-05,
      "loss": 0.917,
      "step": 88180
    },
    {
      "epoch": 0.3090839633122698,
      "grad_norm": 2.984375,
      "learning_rate": 4.983190157610121e-05,
      "loss": 1.0163,
      "step": 88190
    },
    {
      "epoch": 0.3091190108191654,
      "grad_norm": 3.53125,
      "learning_rate": 4.983125254743751e-05,
      "loss": 0.9622,
      "step": 88200
    },
    {
      "epoch": 0.30915405832606097,
      "grad_norm": 2.9375,
      "learning_rate": 4.9830603518773804e-05,
      "loss": 0.8801,
      "step": 88210
    },
    {
      "epoch": 0.30918910583295656,
      "grad_norm": 3.140625,
      "learning_rate": 4.9829954490110105e-05,
      "loss": 0.9949,
      "step": 88220
    },
    {
      "epoch": 0.30922415333985215,
      "grad_norm": 3.65625,
      "learning_rate": 4.98293054614464e-05,
      "loss": 1.0176,
      "step": 88230
    },
    {
      "epoch": 0.30925920084674774,
      "grad_norm": 2.5,
      "learning_rate": 4.98286564327827e-05,
      "loss": 0.8777,
      "step": 88240
    },
    {
      "epoch": 0.3092942483536434,
      "grad_norm": 2.84375,
      "learning_rate": 4.9828007404118996e-05,
      "loss": 0.9491,
      "step": 88250
    },
    {
      "epoch": 0.309329295860539,
      "grad_norm": 3.09375,
      "learning_rate": 4.98273583754553e-05,
      "loss": 0.9827,
      "step": 88260
    },
    {
      "epoch": 0.30936434336743457,
      "grad_norm": 3.140625,
      "learning_rate": 4.982670934679159e-05,
      "loss": 0.934,
      "step": 88270
    },
    {
      "epoch": 0.30939939087433016,
      "grad_norm": 3.03125,
      "learning_rate": 4.9826060318127893e-05,
      "loss": 0.9635,
      "step": 88280
    },
    {
      "epoch": 0.30943443838122575,
      "grad_norm": 3.359375,
      "learning_rate": 4.982541128946419e-05,
      "loss": 1.0145,
      "step": 88290
    },
    {
      "epoch": 0.30946948588812134,
      "grad_norm": 3.265625,
      "learning_rate": 4.982476226080049e-05,
      "loss": 0.9888,
      "step": 88300
    },
    {
      "epoch": 0.30950453339501693,
      "grad_norm": 3.25,
      "learning_rate": 4.982411323213679e-05,
      "loss": 0.9418,
      "step": 88310
    },
    {
      "epoch": 0.3095395809019125,
      "grad_norm": 3.578125,
      "learning_rate": 4.9823464203473085e-05,
      "loss": 0.9249,
      "step": 88320
    },
    {
      "epoch": 0.3095746284088081,
      "grad_norm": 3.078125,
      "learning_rate": 4.982281517480939e-05,
      "loss": 0.9289,
      "step": 88330
    },
    {
      "epoch": 0.30960967591570376,
      "grad_norm": 2.671875,
      "learning_rate": 4.982216614614568e-05,
      "loss": 0.9146,
      "step": 88340
    },
    {
      "epoch": 0.30964472342259936,
      "grad_norm": 3.71875,
      "learning_rate": 4.982151711748198e-05,
      "loss": 0.9543,
      "step": 88350
    },
    {
      "epoch": 0.30967977092949495,
      "grad_norm": 2.890625,
      "learning_rate": 4.982086808881828e-05,
      "loss": 0.9397,
      "step": 88360
    },
    {
      "epoch": 0.30971481843639054,
      "grad_norm": 3.28125,
      "learning_rate": 4.982021906015458e-05,
      "loss": 0.9382,
      "step": 88370
    },
    {
      "epoch": 0.30974986594328613,
      "grad_norm": 3.484375,
      "learning_rate": 4.9819570031490873e-05,
      "loss": 0.9943,
      "step": 88380
    },
    {
      "epoch": 0.3097849134501817,
      "grad_norm": 3.1875,
      "learning_rate": 4.9818921002827175e-05,
      "loss": 0.9879,
      "step": 88390
    },
    {
      "epoch": 0.3098199609570773,
      "grad_norm": 2.65625,
      "learning_rate": 4.981827197416347e-05,
      "loss": 0.9771,
      "step": 88400
    },
    {
      "epoch": 0.3098550084639729,
      "grad_norm": 3.875,
      "learning_rate": 4.981762294549977e-05,
      "loss": 0.9551,
      "step": 88410
    },
    {
      "epoch": 0.3098900559708685,
      "grad_norm": 2.90625,
      "learning_rate": 4.9816973916836065e-05,
      "loss": 0.9823,
      "step": 88420
    },
    {
      "epoch": 0.3099251034777641,
      "grad_norm": 3.5,
      "learning_rate": 4.981632488817236e-05,
      "loss": 0.9657,
      "step": 88430
    },
    {
      "epoch": 0.30996015098465973,
      "grad_norm": 3.0625,
      "learning_rate": 4.981567585950866e-05,
      "loss": 0.9279,
      "step": 88440
    },
    {
      "epoch": 0.3099951984915553,
      "grad_norm": 2.9375,
      "learning_rate": 4.9815026830844956e-05,
      "loss": 1.0042,
      "step": 88450
    },
    {
      "epoch": 0.3100302459984509,
      "grad_norm": 3.125,
      "learning_rate": 4.981437780218126e-05,
      "loss": 1.0418,
      "step": 88460
    },
    {
      "epoch": 0.3100652935053465,
      "grad_norm": 3.015625,
      "learning_rate": 4.981372877351755e-05,
      "loss": 1.0209,
      "step": 88470
    },
    {
      "epoch": 0.3101003410122421,
      "grad_norm": 2.9375,
      "learning_rate": 4.9813079744853853e-05,
      "loss": 1.0258,
      "step": 88480
    },
    {
      "epoch": 0.3101353885191377,
      "grad_norm": 3.28125,
      "learning_rate": 4.981243071619015e-05,
      "loss": 0.9589,
      "step": 88490
    },
    {
      "epoch": 0.3101704360260333,
      "grad_norm": 2.765625,
      "learning_rate": 4.981178168752645e-05,
      "loss": 0.8883,
      "step": 88500
    },
    {
      "epoch": 0.31020548353292887,
      "grad_norm": 3.0,
      "learning_rate": 4.9811132658862744e-05,
      "loss": 0.9563,
      "step": 88510
    },
    {
      "epoch": 0.31024053103982446,
      "grad_norm": 2.96875,
      "learning_rate": 4.9810483630199045e-05,
      "loss": 1.0428,
      "step": 88520
    },
    {
      "epoch": 0.31027557854672005,
      "grad_norm": 2.640625,
      "learning_rate": 4.980983460153534e-05,
      "loss": 0.9436,
      "step": 88530
    },
    {
      "epoch": 0.3103106260536157,
      "grad_norm": 2.640625,
      "learning_rate": 4.980918557287164e-05,
      "loss": 0.9306,
      "step": 88540
    },
    {
      "epoch": 0.3103456735605113,
      "grad_norm": 3.125,
      "learning_rate": 4.980853654420794e-05,
      "loss": 0.9583,
      "step": 88550
    },
    {
      "epoch": 0.3103807210674069,
      "grad_norm": 2.421875,
      "learning_rate": 4.980788751554424e-05,
      "loss": 0.9677,
      "step": 88560
    },
    {
      "epoch": 0.3104157685743025,
      "grad_norm": 3.53125,
      "learning_rate": 4.980723848688054e-05,
      "loss": 1.0336,
      "step": 88570
    },
    {
      "epoch": 0.31045081608119807,
      "grad_norm": 3.109375,
      "learning_rate": 4.9806589458216833e-05,
      "loss": 0.9591,
      "step": 88580
    },
    {
      "epoch": 0.31048586358809366,
      "grad_norm": 3.703125,
      "learning_rate": 4.9805940429553135e-05,
      "loss": 0.9618,
      "step": 88590
    },
    {
      "epoch": 0.31052091109498925,
      "grad_norm": 3.234375,
      "learning_rate": 4.980529140088943e-05,
      "loss": 0.9657,
      "step": 88600
    },
    {
      "epoch": 0.31055595860188484,
      "grad_norm": 3.015625,
      "learning_rate": 4.980464237222573e-05,
      "loss": 0.9193,
      "step": 88610
    },
    {
      "epoch": 0.31059100610878043,
      "grad_norm": 3.296875,
      "learning_rate": 4.9803993343562025e-05,
      "loss": 0.9345,
      "step": 88620
    },
    {
      "epoch": 0.310626053615676,
      "grad_norm": 3.9375,
      "learning_rate": 4.980334431489833e-05,
      "loss": 1.0336,
      "step": 88630
    },
    {
      "epoch": 0.31066110112257167,
      "grad_norm": 3.671875,
      "learning_rate": 4.980269528623462e-05,
      "loss": 0.973,
      "step": 88640
    },
    {
      "epoch": 0.31069614862946726,
      "grad_norm": 3.09375,
      "learning_rate": 4.980204625757092e-05,
      "loss": 0.9007,
      "step": 88650
    },
    {
      "epoch": 0.31073119613636285,
      "grad_norm": 3.109375,
      "learning_rate": 4.980139722890722e-05,
      "loss": 0.9362,
      "step": 88660
    },
    {
      "epoch": 0.31076624364325844,
      "grad_norm": 3.546875,
      "learning_rate": 4.980074820024352e-05,
      "loss": 0.9506,
      "step": 88670
    },
    {
      "epoch": 0.31080129115015404,
      "grad_norm": 3.28125,
      "learning_rate": 4.980009917157982e-05,
      "loss": 0.9482,
      "step": 88680
    },
    {
      "epoch": 0.3108363386570496,
      "grad_norm": 3.375,
      "learning_rate": 4.9799450142916115e-05,
      "loss": 1.0594,
      "step": 88690
    },
    {
      "epoch": 0.3108713861639452,
      "grad_norm": 3.265625,
      "learning_rate": 4.9798801114252416e-05,
      "loss": 1.0153,
      "step": 88700
    },
    {
      "epoch": 0.3109064336708408,
      "grad_norm": 3.3125,
      "learning_rate": 4.979815208558871e-05,
      "loss": 1.0138,
      "step": 88710
    },
    {
      "epoch": 0.3109414811777364,
      "grad_norm": 3.34375,
      "learning_rate": 4.979750305692501e-05,
      "loss": 0.9085,
      "step": 88720
    },
    {
      "epoch": 0.310976528684632,
      "grad_norm": 2.8125,
      "learning_rate": 4.979685402826131e-05,
      "loss": 0.9236,
      "step": 88730
    },
    {
      "epoch": 0.31101157619152764,
      "grad_norm": 2.84375,
      "learning_rate": 4.979620499959761e-05,
      "loss": 0.9504,
      "step": 88740
    },
    {
      "epoch": 0.31104662369842323,
      "grad_norm": 2.96875,
      "learning_rate": 4.97955559709339e-05,
      "loss": 0.9986,
      "step": 88750
    },
    {
      "epoch": 0.3110816712053188,
      "grad_norm": 2.84375,
      "learning_rate": 4.9794906942270204e-05,
      "loss": 0.982,
      "step": 88760
    },
    {
      "epoch": 0.3111167187122144,
      "grad_norm": 2.953125,
      "learning_rate": 4.97942579136065e-05,
      "loss": 0.9798,
      "step": 88770
    },
    {
      "epoch": 0.31115176621911,
      "grad_norm": 3.328125,
      "learning_rate": 4.97936088849428e-05,
      "loss": 0.9965,
      "step": 88780
    },
    {
      "epoch": 0.3111868137260056,
      "grad_norm": 3.453125,
      "learning_rate": 4.9792959856279095e-05,
      "loss": 1.1241,
      "step": 88790
    },
    {
      "epoch": 0.3112218612329012,
      "grad_norm": 3.34375,
      "learning_rate": 4.979231082761539e-05,
      "loss": 0.9607,
      "step": 88800
    },
    {
      "epoch": 0.3112569087397968,
      "grad_norm": 2.796875,
      "learning_rate": 4.979166179895169e-05,
      "loss": 0.9163,
      "step": 88810
    },
    {
      "epoch": 0.31129195624669237,
      "grad_norm": 3.0,
      "learning_rate": 4.9791012770287985e-05,
      "loss": 0.9952,
      "step": 88820
    },
    {
      "epoch": 0.31132700375358796,
      "grad_norm": 3.15625,
      "learning_rate": 4.979036374162429e-05,
      "loss": 0.9009,
      "step": 88830
    },
    {
      "epoch": 0.3113620512604836,
      "grad_norm": 2.84375,
      "learning_rate": 4.978971471296058e-05,
      "loss": 0.9465,
      "step": 88840
    },
    {
      "epoch": 0.3113970987673792,
      "grad_norm": 3.515625,
      "learning_rate": 4.978906568429688e-05,
      "loss": 0.9634,
      "step": 88850
    },
    {
      "epoch": 0.3114321462742748,
      "grad_norm": 2.9375,
      "learning_rate": 4.978841665563318e-05,
      "loss": 1.027,
      "step": 88860
    },
    {
      "epoch": 0.3114671937811704,
      "grad_norm": 2.828125,
      "learning_rate": 4.978776762696948e-05,
      "loss": 0.9099,
      "step": 88870
    },
    {
      "epoch": 0.311502241288066,
      "grad_norm": 2.953125,
      "learning_rate": 4.9787118598305773e-05,
      "loss": 0.9512,
      "step": 88880
    },
    {
      "epoch": 0.31153728879496156,
      "grad_norm": 3.03125,
      "learning_rate": 4.9786469569642075e-05,
      "loss": 0.9452,
      "step": 88890
    },
    {
      "epoch": 0.31157233630185716,
      "grad_norm": 3.015625,
      "learning_rate": 4.9785820540978376e-05,
      "loss": 0.9084,
      "step": 88900
    },
    {
      "epoch": 0.31160738380875275,
      "grad_norm": 3.015625,
      "learning_rate": 4.978517151231467e-05,
      "loss": 1.0132,
      "step": 88910
    },
    {
      "epoch": 0.31164243131564834,
      "grad_norm": 2.921875,
      "learning_rate": 4.978452248365097e-05,
      "loss": 1.0066,
      "step": 88920
    },
    {
      "epoch": 0.311677478822544,
      "grad_norm": 3.125,
      "learning_rate": 4.978387345498727e-05,
      "loss": 0.9741,
      "step": 88930
    },
    {
      "epoch": 0.3117125263294396,
      "grad_norm": 3.40625,
      "learning_rate": 4.978322442632357e-05,
      "loss": 0.9386,
      "step": 88940
    },
    {
      "epoch": 0.31174757383633517,
      "grad_norm": 3.28125,
      "learning_rate": 4.978257539765986e-05,
      "loss": 0.9968,
      "step": 88950
    },
    {
      "epoch": 0.31178262134323076,
      "grad_norm": 2.859375,
      "learning_rate": 4.9781926368996164e-05,
      "loss": 0.9946,
      "step": 88960
    },
    {
      "epoch": 0.31181766885012635,
      "grad_norm": 2.921875,
      "learning_rate": 4.978127734033246e-05,
      "loss": 0.9878,
      "step": 88970
    },
    {
      "epoch": 0.31185271635702194,
      "grad_norm": 3.09375,
      "learning_rate": 4.978062831166876e-05,
      "loss": 1.0193,
      "step": 88980
    },
    {
      "epoch": 0.31188776386391753,
      "grad_norm": 2.8125,
      "learning_rate": 4.9779979283005055e-05,
      "loss": 0.9443,
      "step": 88990
    },
    {
      "epoch": 0.3119228113708131,
      "grad_norm": 2.578125,
      "learning_rate": 4.9779330254341356e-05,
      "loss": 0.9465,
      "step": 89000
    },
    {
      "epoch": 0.3119578588777087,
      "grad_norm": 3.03125,
      "learning_rate": 4.977868122567765e-05,
      "loss": 0.9378,
      "step": 89010
    },
    {
      "epoch": 0.3119929063846043,
      "grad_norm": 2.90625,
      "learning_rate": 4.977803219701395e-05,
      "loss": 0.9561,
      "step": 89020
    },
    {
      "epoch": 0.31202795389149995,
      "grad_norm": 3.171875,
      "learning_rate": 4.977738316835025e-05,
      "loss": 1.0074,
      "step": 89030
    },
    {
      "epoch": 0.31206300139839555,
      "grad_norm": 2.84375,
      "learning_rate": 4.977673413968655e-05,
      "loss": 0.934,
      "step": 89040
    },
    {
      "epoch": 0.31209804890529114,
      "grad_norm": 3.234375,
      "learning_rate": 4.977608511102285e-05,
      "loss": 0.9939,
      "step": 89050
    },
    {
      "epoch": 0.31213309641218673,
      "grad_norm": 2.859375,
      "learning_rate": 4.9775436082359144e-05,
      "loss": 0.9617,
      "step": 89060
    },
    {
      "epoch": 0.3121681439190823,
      "grad_norm": 3.1875,
      "learning_rate": 4.9774787053695446e-05,
      "loss": 1.0159,
      "step": 89070
    },
    {
      "epoch": 0.3122031914259779,
      "grad_norm": 3.203125,
      "learning_rate": 4.977413802503174e-05,
      "loss": 0.9823,
      "step": 89080
    },
    {
      "epoch": 0.3122382389328735,
      "grad_norm": 3.203125,
      "learning_rate": 4.977348899636804e-05,
      "loss": 0.981,
      "step": 89090
    },
    {
      "epoch": 0.3122732864397691,
      "grad_norm": 2.984375,
      "learning_rate": 4.9772839967704336e-05,
      "loss": 0.9486,
      "step": 89100
    },
    {
      "epoch": 0.3123083339466647,
      "grad_norm": 2.734375,
      "learning_rate": 4.977219093904064e-05,
      "loss": 0.9786,
      "step": 89110
    },
    {
      "epoch": 0.3123433814535603,
      "grad_norm": 2.953125,
      "learning_rate": 4.977154191037693e-05,
      "loss": 0.9515,
      "step": 89120
    },
    {
      "epoch": 0.3123784289604559,
      "grad_norm": 3.0,
      "learning_rate": 4.9770892881713234e-05,
      "loss": 0.9803,
      "step": 89130
    },
    {
      "epoch": 0.3124134764673515,
      "grad_norm": 3.0625,
      "learning_rate": 4.977024385304953e-05,
      "loss": 0.9991,
      "step": 89140
    },
    {
      "epoch": 0.3124485239742471,
      "grad_norm": 3.4375,
      "learning_rate": 4.976959482438583e-05,
      "loss": 0.9581,
      "step": 89150
    },
    {
      "epoch": 0.3124835714811427,
      "grad_norm": 3.046875,
      "learning_rate": 4.9768945795722124e-05,
      "loss": 0.9951,
      "step": 89160
    },
    {
      "epoch": 0.3125186189880383,
      "grad_norm": 3.421875,
      "learning_rate": 4.9768296767058426e-05,
      "loss": 1.0261,
      "step": 89170
    },
    {
      "epoch": 0.3125536664949339,
      "grad_norm": 3.0,
      "learning_rate": 4.976764773839472e-05,
      "loss": 0.9968,
      "step": 89180
    },
    {
      "epoch": 0.31258871400182947,
      "grad_norm": 3.1875,
      "learning_rate": 4.9766998709731015e-05,
      "loss": 0.939,
      "step": 89190
    },
    {
      "epoch": 0.31262376150872506,
      "grad_norm": 3.21875,
      "learning_rate": 4.9766349681067316e-05,
      "loss": 0.9873,
      "step": 89200
    },
    {
      "epoch": 0.31265880901562065,
      "grad_norm": 3.109375,
      "learning_rate": 4.976570065240361e-05,
      "loss": 1.0214,
      "step": 89210
    },
    {
      "epoch": 0.31269385652251624,
      "grad_norm": 2.84375,
      "learning_rate": 4.976505162373991e-05,
      "loss": 0.975,
      "step": 89220
    },
    {
      "epoch": 0.3127289040294119,
      "grad_norm": 2.484375,
      "learning_rate": 4.976440259507621e-05,
      "loss": 0.9585,
      "step": 89230
    },
    {
      "epoch": 0.3127639515363075,
      "grad_norm": 3.21875,
      "learning_rate": 4.976375356641251e-05,
      "loss": 1.0371,
      "step": 89240
    },
    {
      "epoch": 0.3127989990432031,
      "grad_norm": 3.203125,
      "learning_rate": 4.97631045377488e-05,
      "loss": 1.015,
      "step": 89250
    },
    {
      "epoch": 0.31283404655009867,
      "grad_norm": 3.34375,
      "learning_rate": 4.9762455509085104e-05,
      "loss": 0.9877,
      "step": 89260
    },
    {
      "epoch": 0.31286909405699426,
      "grad_norm": 2.8125,
      "learning_rate": 4.9761806480421406e-05,
      "loss": 0.9223,
      "step": 89270
    },
    {
      "epoch": 0.31290414156388985,
      "grad_norm": 3.25,
      "learning_rate": 4.97611574517577e-05,
      "loss": 1.0007,
      "step": 89280
    },
    {
      "epoch": 0.31293918907078544,
      "grad_norm": 2.921875,
      "learning_rate": 4.9760508423094e-05,
      "loss": 1.0479,
      "step": 89290
    },
    {
      "epoch": 0.31297423657768103,
      "grad_norm": 3.140625,
      "learning_rate": 4.9759859394430296e-05,
      "loss": 0.905,
      "step": 89300
    },
    {
      "epoch": 0.3130092840845766,
      "grad_norm": 3.03125,
      "learning_rate": 4.97592103657666e-05,
      "loss": 0.9586,
      "step": 89310
    },
    {
      "epoch": 0.3130443315914722,
      "grad_norm": 3.46875,
      "learning_rate": 4.975856133710289e-05,
      "loss": 1.0122,
      "step": 89320
    },
    {
      "epoch": 0.31307937909836786,
      "grad_norm": 2.90625,
      "learning_rate": 4.9757912308439194e-05,
      "loss": 0.9925,
      "step": 89330
    },
    {
      "epoch": 0.31311442660526345,
      "grad_norm": 3.359375,
      "learning_rate": 4.975726327977549e-05,
      "loss": 1.0373,
      "step": 89340
    },
    {
      "epoch": 0.31314947411215904,
      "grad_norm": 3.015625,
      "learning_rate": 4.975661425111179e-05,
      "loss": 0.9839,
      "step": 89350
    },
    {
      "epoch": 0.31318452161905463,
      "grad_norm": 3.015625,
      "learning_rate": 4.9755965222448084e-05,
      "loss": 1.0026,
      "step": 89360
    },
    {
      "epoch": 0.3132195691259502,
      "grad_norm": 2.9375,
      "learning_rate": 4.9755316193784386e-05,
      "loss": 0.919,
      "step": 89370
    },
    {
      "epoch": 0.3132546166328458,
      "grad_norm": 3.15625,
      "learning_rate": 4.975466716512068e-05,
      "loss": 0.947,
      "step": 89380
    },
    {
      "epoch": 0.3132896641397414,
      "grad_norm": 3.25,
      "learning_rate": 4.975401813645698e-05,
      "loss": 1.0172,
      "step": 89390
    },
    {
      "epoch": 0.313324711646637,
      "grad_norm": 3.078125,
      "learning_rate": 4.9753369107793276e-05,
      "loss": 1.013,
      "step": 89400
    },
    {
      "epoch": 0.3133597591535326,
      "grad_norm": 3.0625,
      "learning_rate": 4.975272007912958e-05,
      "loss": 0.937,
      "step": 89410
    },
    {
      "epoch": 0.31339480666042824,
      "grad_norm": 3.109375,
      "learning_rate": 4.975207105046588e-05,
      "loss": 0.9412,
      "step": 89420
    },
    {
      "epoch": 0.31342985416732383,
      "grad_norm": 2.984375,
      "learning_rate": 4.9751422021802174e-05,
      "loss": 1.0269,
      "step": 89430
    },
    {
      "epoch": 0.3134649016742194,
      "grad_norm": 3.03125,
      "learning_rate": 4.9750772993138475e-05,
      "loss": 0.945,
      "step": 89440
    },
    {
      "epoch": 0.313499949181115,
      "grad_norm": 3.0,
      "learning_rate": 4.975012396447477e-05,
      "loss": 0.9299,
      "step": 89450
    },
    {
      "epoch": 0.3135349966880106,
      "grad_norm": 3.203125,
      "learning_rate": 4.974947493581107e-05,
      "loss": 0.9959,
      "step": 89460
    },
    {
      "epoch": 0.3135700441949062,
      "grad_norm": 3.09375,
      "learning_rate": 4.9748825907147366e-05,
      "loss": 0.9392,
      "step": 89470
    },
    {
      "epoch": 0.3136050917018018,
      "grad_norm": 2.921875,
      "learning_rate": 4.974817687848367e-05,
      "loss": 0.8326,
      "step": 89480
    },
    {
      "epoch": 0.3136401392086974,
      "grad_norm": 3.515625,
      "learning_rate": 4.974752784981996e-05,
      "loss": 0.9549,
      "step": 89490
    },
    {
      "epoch": 0.31367518671559297,
      "grad_norm": 2.859375,
      "learning_rate": 4.974687882115626e-05,
      "loss": 1.0202,
      "step": 89500
    },
    {
      "epoch": 0.31371023422248856,
      "grad_norm": 2.5,
      "learning_rate": 4.974622979249256e-05,
      "loss": 0.9706,
      "step": 89510
    },
    {
      "epoch": 0.3137452817293842,
      "grad_norm": 2.6875,
      "learning_rate": 4.974558076382886e-05,
      "loss": 0.9185,
      "step": 89520
    },
    {
      "epoch": 0.3137803292362798,
      "grad_norm": 3.5625,
      "learning_rate": 4.9744931735165154e-05,
      "loss": 1.0135,
      "step": 89530
    },
    {
      "epoch": 0.3138153767431754,
      "grad_norm": 2.8125,
      "learning_rate": 4.9744282706501455e-05,
      "loss": 0.9743,
      "step": 89540
    },
    {
      "epoch": 0.313850424250071,
      "grad_norm": 3.140625,
      "learning_rate": 4.974363367783775e-05,
      "loss": 1.0451,
      "step": 89550
    },
    {
      "epoch": 0.31388547175696657,
      "grad_norm": 2.921875,
      "learning_rate": 4.9742984649174044e-05,
      "loss": 0.8786,
      "step": 89560
    },
    {
      "epoch": 0.31392051926386216,
      "grad_norm": 3.34375,
      "learning_rate": 4.9742335620510346e-05,
      "loss": 0.9651,
      "step": 89570
    },
    {
      "epoch": 0.31395556677075775,
      "grad_norm": 3.46875,
      "learning_rate": 4.974168659184664e-05,
      "loss": 1.0161,
      "step": 89580
    },
    {
      "epoch": 0.31399061427765335,
      "grad_norm": 3.4375,
      "learning_rate": 4.974103756318294e-05,
      "loss": 0.9837,
      "step": 89590
    },
    {
      "epoch": 0.31402566178454894,
      "grad_norm": 3.65625,
      "learning_rate": 4.9740388534519236e-05,
      "loss": 0.9957,
      "step": 89600
    },
    {
      "epoch": 0.31406070929144453,
      "grad_norm": 3.40625,
      "learning_rate": 4.973973950585554e-05,
      "loss": 1.0489,
      "step": 89610
    },
    {
      "epoch": 0.3140957567983402,
      "grad_norm": 3.09375,
      "learning_rate": 4.973909047719183e-05,
      "loss": 0.9856,
      "step": 89620
    },
    {
      "epoch": 0.31413080430523577,
      "grad_norm": 2.75,
      "learning_rate": 4.9738441448528134e-05,
      "loss": 0.9253,
      "step": 89630
    },
    {
      "epoch": 0.31416585181213136,
      "grad_norm": 2.984375,
      "learning_rate": 4.9737792419864435e-05,
      "loss": 0.9395,
      "step": 89640
    },
    {
      "epoch": 0.31420089931902695,
      "grad_norm": 2.84375,
      "learning_rate": 4.973714339120073e-05,
      "loss": 0.8881,
      "step": 89650
    },
    {
      "epoch": 0.31423594682592254,
      "grad_norm": 2.96875,
      "learning_rate": 4.973649436253703e-05,
      "loss": 1.0179,
      "step": 89660
    },
    {
      "epoch": 0.31427099433281813,
      "grad_norm": 3.265625,
      "learning_rate": 4.9735845333873326e-05,
      "loss": 0.99,
      "step": 89670
    },
    {
      "epoch": 0.3143060418397137,
      "grad_norm": 3.578125,
      "learning_rate": 4.973519630520963e-05,
      "loss": 1.0108,
      "step": 89680
    },
    {
      "epoch": 0.3143410893466093,
      "grad_norm": 3.421875,
      "learning_rate": 4.973454727654592e-05,
      "loss": 0.9908,
      "step": 89690
    },
    {
      "epoch": 0.3143761368535049,
      "grad_norm": 2.765625,
      "learning_rate": 4.973389824788222e-05,
      "loss": 0.9703,
      "step": 89700
    },
    {
      "epoch": 0.3144111843604005,
      "grad_norm": 3.4375,
      "learning_rate": 4.973324921921852e-05,
      "loss": 0.9465,
      "step": 89710
    },
    {
      "epoch": 0.31444623186729614,
      "grad_norm": 3.0625,
      "learning_rate": 4.973260019055482e-05,
      "loss": 0.9873,
      "step": 89720
    },
    {
      "epoch": 0.31448127937419174,
      "grad_norm": 3.5,
      "learning_rate": 4.9731951161891114e-05,
      "loss": 0.9861,
      "step": 89730
    },
    {
      "epoch": 0.3145163268810873,
      "grad_norm": 3.53125,
      "learning_rate": 4.9731302133227415e-05,
      "loss": 0.9912,
      "step": 89740
    },
    {
      "epoch": 0.3145513743879829,
      "grad_norm": 3.265625,
      "learning_rate": 4.973065310456371e-05,
      "loss": 0.9788,
      "step": 89750
    },
    {
      "epoch": 0.3145864218948785,
      "grad_norm": 2.984375,
      "learning_rate": 4.973000407590001e-05,
      "loss": 0.9816,
      "step": 89760
    },
    {
      "epoch": 0.3146214694017741,
      "grad_norm": 3.25,
      "learning_rate": 4.9729355047236306e-05,
      "loss": 0.9628,
      "step": 89770
    },
    {
      "epoch": 0.3146565169086697,
      "grad_norm": 3.359375,
      "learning_rate": 4.972870601857261e-05,
      "loss": 0.9804,
      "step": 89780
    },
    {
      "epoch": 0.3146915644155653,
      "grad_norm": 2.921875,
      "learning_rate": 4.972805698990891e-05,
      "loss": 0.9312,
      "step": 89790
    },
    {
      "epoch": 0.3147266119224609,
      "grad_norm": 2.859375,
      "learning_rate": 4.97274079612452e-05,
      "loss": 0.9548,
      "step": 89800
    },
    {
      "epoch": 0.31476165942935647,
      "grad_norm": 3.203125,
      "learning_rate": 4.9726758932581504e-05,
      "loss": 0.9315,
      "step": 89810
    },
    {
      "epoch": 0.3147967069362521,
      "grad_norm": 2.859375,
      "learning_rate": 4.97261099039178e-05,
      "loss": 0.9047,
      "step": 89820
    },
    {
      "epoch": 0.3148317544431477,
      "grad_norm": 3.03125,
      "learning_rate": 4.97254608752541e-05,
      "loss": 0.9857,
      "step": 89830
    },
    {
      "epoch": 0.3148668019500433,
      "grad_norm": 3.125,
      "learning_rate": 4.9724811846590395e-05,
      "loss": 1.018,
      "step": 89840
    },
    {
      "epoch": 0.3149018494569389,
      "grad_norm": 3.234375,
      "learning_rate": 4.9724162817926696e-05,
      "loss": 1.0662,
      "step": 89850
    },
    {
      "epoch": 0.3149368969638345,
      "grad_norm": 2.828125,
      "learning_rate": 4.972351378926299e-05,
      "loss": 0.9199,
      "step": 89860
    },
    {
      "epoch": 0.31497194447073007,
      "grad_norm": 2.984375,
      "learning_rate": 4.972286476059929e-05,
      "loss": 0.9377,
      "step": 89870
    },
    {
      "epoch": 0.31500699197762566,
      "grad_norm": 2.9375,
      "learning_rate": 4.972221573193559e-05,
      "loss": 0.9612,
      "step": 89880
    },
    {
      "epoch": 0.31504203948452125,
      "grad_norm": 3.421875,
      "learning_rate": 4.972156670327189e-05,
      "loss": 0.9741,
      "step": 89890
    },
    {
      "epoch": 0.31507708699141684,
      "grad_norm": 2.9375,
      "learning_rate": 4.972091767460818e-05,
      "loss": 1.0221,
      "step": 89900
    },
    {
      "epoch": 0.31511213449831244,
      "grad_norm": 3.0,
      "learning_rate": 4.9720268645944484e-05,
      "loss": 0.9164,
      "step": 89910
    },
    {
      "epoch": 0.3151471820052081,
      "grad_norm": 3.28125,
      "learning_rate": 4.9719619617280786e-05,
      "loss": 1.0813,
      "step": 89920
    },
    {
      "epoch": 0.3151822295121037,
      "grad_norm": 3.625,
      "learning_rate": 4.9718970588617074e-05,
      "loss": 0.9543,
      "step": 89930
    },
    {
      "epoch": 0.31521727701899926,
      "grad_norm": 3.3125,
      "learning_rate": 4.9718321559953375e-05,
      "loss": 1.0718,
      "step": 89940
    },
    {
      "epoch": 0.31525232452589486,
      "grad_norm": 2.953125,
      "learning_rate": 4.971767253128967e-05,
      "loss": 0.9912,
      "step": 89950
    },
    {
      "epoch": 0.31528737203279045,
      "grad_norm": 3.203125,
      "learning_rate": 4.971702350262597e-05,
      "loss": 1.0322,
      "step": 89960
    },
    {
      "epoch": 0.31532241953968604,
      "grad_norm": 2.65625,
      "learning_rate": 4.9716374473962266e-05,
      "loss": 0.9424,
      "step": 89970
    },
    {
      "epoch": 0.31535746704658163,
      "grad_norm": 3.234375,
      "learning_rate": 4.971572544529857e-05,
      "loss": 0.9605,
      "step": 89980
    },
    {
      "epoch": 0.3153925145534772,
      "grad_norm": 3.25,
      "learning_rate": 4.971507641663486e-05,
      "loss": 0.9875,
      "step": 89990
    },
    {
      "epoch": 0.3154275620603728,
      "grad_norm": 2.9375,
      "learning_rate": 4.971442738797116e-05,
      "loss": 0.9867,
      "step": 90000
    },
    {
      "epoch": 0.3154275620603728,
      "eval_loss": 0.9095386862754822,
      "eval_runtime": 553.437,
      "eval_samples_per_second": 687.406,
      "eval_steps_per_second": 57.284,
      "step": 90000
    },
    {
      "epoch": 0.31546260956726846,
      "grad_norm": 3.25,
      "learning_rate": 4.9713778359307464e-05,
      "loss": 0.9903,
      "step": 90010
    },
    {
      "epoch": 0.31549765707416405,
      "grad_norm": 2.78125,
      "learning_rate": 4.971312933064376e-05,
      "loss": 1.0132,
      "step": 90020
    },
    {
      "epoch": 0.31553270458105964,
      "grad_norm": 3.09375,
      "learning_rate": 4.971248030198006e-05,
      "loss": 0.9007,
      "step": 90030
    },
    {
      "epoch": 0.31556775208795523,
      "grad_norm": 3.359375,
      "learning_rate": 4.9711831273316355e-05,
      "loss": 0.9911,
      "step": 90040
    },
    {
      "epoch": 0.3156027995948508,
      "grad_norm": 3.078125,
      "learning_rate": 4.9711182244652656e-05,
      "loss": 0.948,
      "step": 90050
    },
    {
      "epoch": 0.3156378471017464,
      "grad_norm": 2.875,
      "learning_rate": 4.971053321598895e-05,
      "loss": 0.9848,
      "step": 90060
    },
    {
      "epoch": 0.315672894608642,
      "grad_norm": 3.015625,
      "learning_rate": 4.970988418732525e-05,
      "loss": 0.9673,
      "step": 90070
    },
    {
      "epoch": 0.3157079421155376,
      "grad_norm": 2.609375,
      "learning_rate": 4.970923515866155e-05,
      "loss": 0.9651,
      "step": 90080
    },
    {
      "epoch": 0.3157429896224332,
      "grad_norm": 2.96875,
      "learning_rate": 4.970858612999785e-05,
      "loss": 0.9331,
      "step": 90090
    },
    {
      "epoch": 0.3157780371293288,
      "grad_norm": 2.953125,
      "learning_rate": 4.970793710133414e-05,
      "loss": 0.9177,
      "step": 90100
    },
    {
      "epoch": 0.31581308463622443,
      "grad_norm": 3.203125,
      "learning_rate": 4.9707288072670444e-05,
      "loss": 0.887,
      "step": 90110
    },
    {
      "epoch": 0.31584813214312,
      "grad_norm": 3.015625,
      "learning_rate": 4.970663904400674e-05,
      "loss": 1.0279,
      "step": 90120
    },
    {
      "epoch": 0.3158831796500156,
      "grad_norm": 3.140625,
      "learning_rate": 4.970599001534304e-05,
      "loss": 0.9131,
      "step": 90130
    },
    {
      "epoch": 0.3159182271569112,
      "grad_norm": 2.640625,
      "learning_rate": 4.970534098667934e-05,
      "loss": 0.9175,
      "step": 90140
    },
    {
      "epoch": 0.3159532746638068,
      "grad_norm": 3.234375,
      "learning_rate": 4.9704691958015636e-05,
      "loss": 0.8952,
      "step": 90150
    },
    {
      "epoch": 0.3159883221707024,
      "grad_norm": 3.46875,
      "learning_rate": 4.970404292935194e-05,
      "loss": 1.0136,
      "step": 90160
    },
    {
      "epoch": 0.316023369677598,
      "grad_norm": 3.3125,
      "learning_rate": 4.970339390068823e-05,
      "loss": 0.9994,
      "step": 90170
    },
    {
      "epoch": 0.31605841718449357,
      "grad_norm": 2.9375,
      "learning_rate": 4.9702744872024534e-05,
      "loss": 0.9775,
      "step": 90180
    },
    {
      "epoch": 0.31609346469138916,
      "grad_norm": 3.234375,
      "learning_rate": 4.970209584336083e-05,
      "loss": 0.9795,
      "step": 90190
    },
    {
      "epoch": 0.31612851219828475,
      "grad_norm": 3.125,
      "learning_rate": 4.970144681469713e-05,
      "loss": 0.9252,
      "step": 90200
    },
    {
      "epoch": 0.3161635597051804,
      "grad_norm": 2.953125,
      "learning_rate": 4.9700797786033424e-05,
      "loss": 1.0517,
      "step": 90210
    },
    {
      "epoch": 0.316198607212076,
      "grad_norm": 3.171875,
      "learning_rate": 4.9700148757369726e-05,
      "loss": 0.9642,
      "step": 90220
    },
    {
      "epoch": 0.3162336547189716,
      "grad_norm": 3.25,
      "learning_rate": 4.969949972870602e-05,
      "loss": 0.9577,
      "step": 90230
    },
    {
      "epoch": 0.31626870222586717,
      "grad_norm": 3.15625,
      "learning_rate": 4.969885070004232e-05,
      "loss": 0.9971,
      "step": 90240
    },
    {
      "epoch": 0.31630374973276276,
      "grad_norm": 3.546875,
      "learning_rate": 4.9698201671378616e-05,
      "loss": 0.9773,
      "step": 90250
    },
    {
      "epoch": 0.31633879723965835,
      "grad_norm": 2.953125,
      "learning_rate": 4.969755264271492e-05,
      "loss": 0.9215,
      "step": 90260
    },
    {
      "epoch": 0.31637384474655394,
      "grad_norm": 3.4375,
      "learning_rate": 4.969690361405121e-05,
      "loss": 0.976,
      "step": 90270
    },
    {
      "epoch": 0.31640889225344954,
      "grad_norm": 3.15625,
      "learning_rate": 4.9696254585387514e-05,
      "loss": 0.9465,
      "step": 90280
    },
    {
      "epoch": 0.3164439397603451,
      "grad_norm": 3.265625,
      "learning_rate": 4.9695605556723815e-05,
      "loss": 0.9704,
      "step": 90290
    },
    {
      "epoch": 0.3164789872672407,
      "grad_norm": 2.90625,
      "learning_rate": 4.969495652806011e-05,
      "loss": 0.906,
      "step": 90300
    },
    {
      "epoch": 0.31651403477413637,
      "grad_norm": 3.53125,
      "learning_rate": 4.9694307499396404e-05,
      "loss": 0.9794,
      "step": 90310
    },
    {
      "epoch": 0.31654908228103196,
      "grad_norm": 2.9375,
      "learning_rate": 4.96936584707327e-05,
      "loss": 0.9785,
      "step": 90320
    },
    {
      "epoch": 0.31658412978792755,
      "grad_norm": 3.703125,
      "learning_rate": 4.9693009442069e-05,
      "loss": 0.9775,
      "step": 90330
    },
    {
      "epoch": 0.31661917729482314,
      "grad_norm": 3.125,
      "learning_rate": 4.9692360413405295e-05,
      "loss": 0.9843,
      "step": 90340
    },
    {
      "epoch": 0.31665422480171873,
      "grad_norm": 3.625,
      "learning_rate": 4.9691711384741596e-05,
      "loss": 1.0673,
      "step": 90350
    },
    {
      "epoch": 0.3166892723086143,
      "grad_norm": 2.9375,
      "learning_rate": 4.969106235607789e-05,
      "loss": 0.9817,
      "step": 90360
    },
    {
      "epoch": 0.3167243198155099,
      "grad_norm": 3.28125,
      "learning_rate": 4.969041332741419e-05,
      "loss": 1.0267,
      "step": 90370
    },
    {
      "epoch": 0.3167593673224055,
      "grad_norm": 3.28125,
      "learning_rate": 4.9689764298750494e-05,
      "loss": 1.0102,
      "step": 90380
    },
    {
      "epoch": 0.3167944148293011,
      "grad_norm": 2.578125,
      "learning_rate": 4.968911527008679e-05,
      "loss": 0.9297,
      "step": 90390
    },
    {
      "epoch": 0.3168294623361967,
      "grad_norm": 3.015625,
      "learning_rate": 4.968846624142309e-05,
      "loss": 1.0104,
      "step": 90400
    },
    {
      "epoch": 0.31686450984309233,
      "grad_norm": 2.671875,
      "learning_rate": 4.9687817212759384e-05,
      "loss": 0.9066,
      "step": 90410
    },
    {
      "epoch": 0.3168995573499879,
      "grad_norm": 3.90625,
      "learning_rate": 4.9687168184095686e-05,
      "loss": 1.0481,
      "step": 90420
    },
    {
      "epoch": 0.3169346048568835,
      "grad_norm": 3.359375,
      "learning_rate": 4.968651915543198e-05,
      "loss": 0.9996,
      "step": 90430
    },
    {
      "epoch": 0.3169696523637791,
      "grad_norm": 3.15625,
      "learning_rate": 4.968587012676828e-05,
      "loss": 1.036,
      "step": 90440
    },
    {
      "epoch": 0.3170046998706747,
      "grad_norm": 3.25,
      "learning_rate": 4.9685221098104576e-05,
      "loss": 0.9803,
      "step": 90450
    },
    {
      "epoch": 0.3170397473775703,
      "grad_norm": 3.25,
      "learning_rate": 4.968457206944088e-05,
      "loss": 1.0642,
      "step": 90460
    },
    {
      "epoch": 0.3170747948844659,
      "grad_norm": 2.984375,
      "learning_rate": 4.968392304077717e-05,
      "loss": 0.9561,
      "step": 90470
    },
    {
      "epoch": 0.3171098423913615,
      "grad_norm": 3.328125,
      "learning_rate": 4.9683274012113474e-05,
      "loss": 1.0099,
      "step": 90480
    },
    {
      "epoch": 0.31714488989825707,
      "grad_norm": 3.125,
      "learning_rate": 4.968262498344977e-05,
      "loss": 0.9671,
      "step": 90490
    },
    {
      "epoch": 0.3171799374051527,
      "grad_norm": 2.890625,
      "learning_rate": 4.968197595478607e-05,
      "loss": 0.885,
      "step": 90500
    },
    {
      "epoch": 0.3172149849120483,
      "grad_norm": 3.359375,
      "learning_rate": 4.968132692612237e-05,
      "loss": 1.052,
      "step": 90510
    },
    {
      "epoch": 0.3172500324189439,
      "grad_norm": 3.234375,
      "learning_rate": 4.9680677897458666e-05,
      "loss": 0.9635,
      "step": 90520
    },
    {
      "epoch": 0.3172850799258395,
      "grad_norm": 23.0,
      "learning_rate": 4.968002886879497e-05,
      "loss": 0.9058,
      "step": 90530
    },
    {
      "epoch": 0.3173201274327351,
      "grad_norm": 3.078125,
      "learning_rate": 4.967937984013126e-05,
      "loss": 0.9302,
      "step": 90540
    },
    {
      "epoch": 0.31735517493963067,
      "grad_norm": 3.015625,
      "learning_rate": 4.967873081146756e-05,
      "loss": 0.9719,
      "step": 90550
    },
    {
      "epoch": 0.31739022244652626,
      "grad_norm": 3.234375,
      "learning_rate": 4.967808178280386e-05,
      "loss": 0.9183,
      "step": 90560
    },
    {
      "epoch": 0.31742526995342185,
      "grad_norm": 4.28125,
      "learning_rate": 4.967743275414016e-05,
      "loss": 1.0541,
      "step": 90570
    },
    {
      "epoch": 0.31746031746031744,
      "grad_norm": 3.09375,
      "learning_rate": 4.9676783725476454e-05,
      "loss": 0.9903,
      "step": 90580
    },
    {
      "epoch": 0.31749536496721303,
      "grad_norm": 3.203125,
      "learning_rate": 4.9676134696812755e-05,
      "loss": 0.9934,
      "step": 90590
    },
    {
      "epoch": 0.3175304124741087,
      "grad_norm": 3.4375,
      "learning_rate": 4.967548566814905e-05,
      "loss": 0.938,
      "step": 90600
    },
    {
      "epoch": 0.31756545998100427,
      "grad_norm": 3.484375,
      "learning_rate": 4.967483663948535e-05,
      "loss": 0.9766,
      "step": 90610
    },
    {
      "epoch": 0.31760050748789986,
      "grad_norm": 3.296875,
      "learning_rate": 4.9674187610821646e-05,
      "loss": 0.9778,
      "step": 90620
    },
    {
      "epoch": 0.31763555499479545,
      "grad_norm": 3.109375,
      "learning_rate": 4.967353858215795e-05,
      "loss": 0.8575,
      "step": 90630
    },
    {
      "epoch": 0.31767060250169105,
      "grad_norm": 3.34375,
      "learning_rate": 4.967288955349424e-05,
      "loss": 0.997,
      "step": 90640
    },
    {
      "epoch": 0.31770565000858664,
      "grad_norm": 3.015625,
      "learning_rate": 4.967224052483054e-05,
      "loss": 0.9646,
      "step": 90650
    },
    {
      "epoch": 0.31774069751548223,
      "grad_norm": 3.375,
      "learning_rate": 4.9671591496166845e-05,
      "loss": 0.9982,
      "step": 90660
    },
    {
      "epoch": 0.3177757450223778,
      "grad_norm": 3.265625,
      "learning_rate": 4.967094246750314e-05,
      "loss": 0.9476,
      "step": 90670
    },
    {
      "epoch": 0.3178107925292734,
      "grad_norm": 3.546875,
      "learning_rate": 4.9670293438839434e-05,
      "loss": 1.0342,
      "step": 90680
    },
    {
      "epoch": 0.317845840036169,
      "grad_norm": 2.78125,
      "learning_rate": 4.966964441017573e-05,
      "loss": 0.89,
      "step": 90690
    },
    {
      "epoch": 0.31788088754306465,
      "grad_norm": 3.34375,
      "learning_rate": 4.966899538151203e-05,
      "loss": 0.955,
      "step": 90700
    },
    {
      "epoch": 0.31791593504996024,
      "grad_norm": 2.953125,
      "learning_rate": 4.9668346352848324e-05,
      "loss": 0.9403,
      "step": 90710
    },
    {
      "epoch": 0.31795098255685583,
      "grad_norm": 2.78125,
      "learning_rate": 4.9667697324184626e-05,
      "loss": 1.0002,
      "step": 90720
    },
    {
      "epoch": 0.3179860300637514,
      "grad_norm": 3.140625,
      "learning_rate": 4.966704829552092e-05,
      "loss": 0.9815,
      "step": 90730
    },
    {
      "epoch": 0.318021077570647,
      "grad_norm": 2.984375,
      "learning_rate": 4.966639926685722e-05,
      "loss": 1.0188,
      "step": 90740
    },
    {
      "epoch": 0.3180561250775426,
      "grad_norm": 3.3125,
      "learning_rate": 4.966575023819352e-05,
      "loss": 0.9613,
      "step": 90750
    },
    {
      "epoch": 0.3180911725844382,
      "grad_norm": 3.28125,
      "learning_rate": 4.966510120952982e-05,
      "loss": 1.057,
      "step": 90760
    },
    {
      "epoch": 0.3181262200913338,
      "grad_norm": 3.15625,
      "learning_rate": 4.966445218086612e-05,
      "loss": 0.9649,
      "step": 90770
    },
    {
      "epoch": 0.3181612675982294,
      "grad_norm": 3.265625,
      "learning_rate": 4.9663803152202414e-05,
      "loss": 0.9971,
      "step": 90780
    },
    {
      "epoch": 0.31819631510512497,
      "grad_norm": 3.0,
      "learning_rate": 4.9663154123538715e-05,
      "loss": 1.0608,
      "step": 90790
    },
    {
      "epoch": 0.3182313626120206,
      "grad_norm": 3.140625,
      "learning_rate": 4.966250509487501e-05,
      "loss": 1.008,
      "step": 90800
    },
    {
      "epoch": 0.3182664101189162,
      "grad_norm": 3.515625,
      "learning_rate": 4.966185606621131e-05,
      "loss": 1.0232,
      "step": 90810
    },
    {
      "epoch": 0.3183014576258118,
      "grad_norm": 2.921875,
      "learning_rate": 4.9661207037547606e-05,
      "loss": 0.9789,
      "step": 90820
    },
    {
      "epoch": 0.3183365051327074,
      "grad_norm": 3.015625,
      "learning_rate": 4.966055800888391e-05,
      "loss": 0.97,
      "step": 90830
    },
    {
      "epoch": 0.318371552639603,
      "grad_norm": 3.03125,
      "learning_rate": 4.96599089802202e-05,
      "loss": 0.9865,
      "step": 90840
    },
    {
      "epoch": 0.3184066001464986,
      "grad_norm": 3.234375,
      "learning_rate": 4.96592599515565e-05,
      "loss": 0.9524,
      "step": 90850
    },
    {
      "epoch": 0.31844164765339417,
      "grad_norm": 3.484375,
      "learning_rate": 4.96586109228928e-05,
      "loss": 0.961,
      "step": 90860
    },
    {
      "epoch": 0.31847669516028976,
      "grad_norm": 3.40625,
      "learning_rate": 4.96579618942291e-05,
      "loss": 1.0386,
      "step": 90870
    },
    {
      "epoch": 0.31851174266718535,
      "grad_norm": 3.203125,
      "learning_rate": 4.96573128655654e-05,
      "loss": 0.9958,
      "step": 90880
    },
    {
      "epoch": 0.31854679017408094,
      "grad_norm": 3.265625,
      "learning_rate": 4.9656663836901695e-05,
      "loss": 0.9425,
      "step": 90890
    },
    {
      "epoch": 0.3185818376809766,
      "grad_norm": 3.484375,
      "learning_rate": 4.9656014808237997e-05,
      "loss": 0.9872,
      "step": 90900
    },
    {
      "epoch": 0.3186168851878722,
      "grad_norm": 2.9375,
      "learning_rate": 4.965536577957429e-05,
      "loss": 0.9965,
      "step": 90910
    },
    {
      "epoch": 0.31865193269476777,
      "grad_norm": 2.546875,
      "learning_rate": 4.965471675091059e-05,
      "loss": 0.9331,
      "step": 90920
    },
    {
      "epoch": 0.31868698020166336,
      "grad_norm": 3.109375,
      "learning_rate": 4.965406772224689e-05,
      "loss": 0.9893,
      "step": 90930
    },
    {
      "epoch": 0.31872202770855895,
      "grad_norm": 3.125,
      "learning_rate": 4.965341869358319e-05,
      "loss": 0.9754,
      "step": 90940
    },
    {
      "epoch": 0.31875707521545454,
      "grad_norm": 3.265625,
      "learning_rate": 4.965276966491948e-05,
      "loss": 1.0289,
      "step": 90950
    },
    {
      "epoch": 0.31879212272235014,
      "grad_norm": 3.015625,
      "learning_rate": 4.9652120636255785e-05,
      "loss": 0.9862,
      "step": 90960
    },
    {
      "epoch": 0.3188271702292457,
      "grad_norm": 3.125,
      "learning_rate": 4.965147160759208e-05,
      "loss": 0.995,
      "step": 90970
    },
    {
      "epoch": 0.3188622177361413,
      "grad_norm": 3.515625,
      "learning_rate": 4.965082257892838e-05,
      "loss": 0.8896,
      "step": 90980
    },
    {
      "epoch": 0.3188972652430369,
      "grad_norm": 3.03125,
      "learning_rate": 4.9650173550264675e-05,
      "loss": 1.0025,
      "step": 90990
    },
    {
      "epoch": 0.31893231274993256,
      "grad_norm": 3.0625,
      "learning_rate": 4.9649524521600977e-05,
      "loss": 0.9594,
      "step": 91000
    },
    {
      "epoch": 0.31896736025682815,
      "grad_norm": 3.484375,
      "learning_rate": 4.964887549293728e-05,
      "loss": 0.9201,
      "step": 91010
    },
    {
      "epoch": 0.31900240776372374,
      "grad_norm": 3.46875,
      "learning_rate": 4.964822646427357e-05,
      "loss": 0.9925,
      "step": 91020
    },
    {
      "epoch": 0.31903745527061933,
      "grad_norm": 3.1875,
      "learning_rate": 4.9647577435609874e-05,
      "loss": 0.987,
      "step": 91030
    },
    {
      "epoch": 0.3190725027775149,
      "grad_norm": 3.125,
      "learning_rate": 4.964692840694617e-05,
      "loss": 0.9501,
      "step": 91040
    },
    {
      "epoch": 0.3191075502844105,
      "grad_norm": 2.890625,
      "learning_rate": 4.964627937828247e-05,
      "loss": 0.9471,
      "step": 91050
    },
    {
      "epoch": 0.3191425977913061,
      "grad_norm": 3.28125,
      "learning_rate": 4.964563034961876e-05,
      "loss": 1.1027,
      "step": 91060
    },
    {
      "epoch": 0.3191776452982017,
      "grad_norm": 3.171875,
      "learning_rate": 4.964498132095506e-05,
      "loss": 0.9297,
      "step": 91070
    },
    {
      "epoch": 0.3192126928050973,
      "grad_norm": 3.390625,
      "learning_rate": 4.9644332292291354e-05,
      "loss": 1.0538,
      "step": 91080
    },
    {
      "epoch": 0.31924774031199293,
      "grad_norm": 3.390625,
      "learning_rate": 4.9643683263627655e-05,
      "loss": 1.0394,
      "step": 91090
    },
    {
      "epoch": 0.3192827878188885,
      "grad_norm": 3.1875,
      "learning_rate": 4.9643034234963957e-05,
      "loss": 0.9837,
      "step": 91100
    },
    {
      "epoch": 0.3193178353257841,
      "grad_norm": 3.40625,
      "learning_rate": 4.964238520630025e-05,
      "loss": 0.9967,
      "step": 91110
    },
    {
      "epoch": 0.3193528828326797,
      "grad_norm": 2.953125,
      "learning_rate": 4.964173617763655e-05,
      "loss": 1.0509,
      "step": 91120
    },
    {
      "epoch": 0.3193879303395753,
      "grad_norm": 3.21875,
      "learning_rate": 4.964108714897285e-05,
      "loss": 0.996,
      "step": 91130
    },
    {
      "epoch": 0.3194229778464709,
      "grad_norm": 3.046875,
      "learning_rate": 4.964043812030915e-05,
      "loss": 1.0084,
      "step": 91140
    },
    {
      "epoch": 0.3194580253533665,
      "grad_norm": 2.734375,
      "learning_rate": 4.963978909164544e-05,
      "loss": 0.9357,
      "step": 91150
    },
    {
      "epoch": 0.3194930728602621,
      "grad_norm": 3.40625,
      "learning_rate": 4.9639140062981745e-05,
      "loss": 1.045,
      "step": 91160
    },
    {
      "epoch": 0.31952812036715766,
      "grad_norm": 2.875,
      "learning_rate": 4.963849103431804e-05,
      "loss": 0.9413,
      "step": 91170
    },
    {
      "epoch": 0.31956316787405326,
      "grad_norm": 3.140625,
      "learning_rate": 4.963784200565434e-05,
      "loss": 0.9417,
      "step": 91180
    },
    {
      "epoch": 0.3195982153809489,
      "grad_norm": 3.125,
      "learning_rate": 4.9637192976990635e-05,
      "loss": 1.0196,
      "step": 91190
    },
    {
      "epoch": 0.3196332628878445,
      "grad_norm": 3.0,
      "learning_rate": 4.9636543948326937e-05,
      "loss": 0.9478,
      "step": 91200
    },
    {
      "epoch": 0.3196683103947401,
      "grad_norm": 3.3125,
      "learning_rate": 4.963589491966323e-05,
      "loss": 0.9683,
      "step": 91210
    },
    {
      "epoch": 0.3197033579016357,
      "grad_norm": 3.1875,
      "learning_rate": 4.963524589099953e-05,
      "loss": 0.9549,
      "step": 91220
    },
    {
      "epoch": 0.31973840540853127,
      "grad_norm": 2.921875,
      "learning_rate": 4.963459686233583e-05,
      "loss": 1.0514,
      "step": 91230
    },
    {
      "epoch": 0.31977345291542686,
      "grad_norm": 3.234375,
      "learning_rate": 4.963394783367213e-05,
      "loss": 0.992,
      "step": 91240
    },
    {
      "epoch": 0.31980850042232245,
      "grad_norm": 3.0,
      "learning_rate": 4.963329880500843e-05,
      "loss": 0.9522,
      "step": 91250
    },
    {
      "epoch": 0.31984354792921804,
      "grad_norm": 3.265625,
      "learning_rate": 4.9632649776344725e-05,
      "loss": 1.0012,
      "step": 91260
    },
    {
      "epoch": 0.31987859543611363,
      "grad_norm": 3.078125,
      "learning_rate": 4.9632000747681026e-05,
      "loss": 0.9406,
      "step": 91270
    },
    {
      "epoch": 0.3199136429430092,
      "grad_norm": 2.921875,
      "learning_rate": 4.963135171901732e-05,
      "loss": 0.8863,
      "step": 91280
    },
    {
      "epoch": 0.31994869044990487,
      "grad_norm": 3.3125,
      "learning_rate": 4.963070269035362e-05,
      "loss": 0.9859,
      "step": 91290
    },
    {
      "epoch": 0.31998373795680046,
      "grad_norm": 3.0,
      "learning_rate": 4.9630053661689917e-05,
      "loss": 0.9885,
      "step": 91300
    },
    {
      "epoch": 0.32001878546369605,
      "grad_norm": 3.0,
      "learning_rate": 4.962940463302622e-05,
      "loss": 0.9472,
      "step": 91310
    },
    {
      "epoch": 0.32005383297059165,
      "grad_norm": 3.1875,
      "learning_rate": 4.962875560436251e-05,
      "loss": 0.9815,
      "step": 91320
    },
    {
      "epoch": 0.32008888047748724,
      "grad_norm": 3.21875,
      "learning_rate": 4.9628106575698814e-05,
      "loss": 1.0528,
      "step": 91330
    },
    {
      "epoch": 0.32012392798438283,
      "grad_norm": 2.96875,
      "learning_rate": 4.962745754703511e-05,
      "loss": 1.0711,
      "step": 91340
    },
    {
      "epoch": 0.3201589754912784,
      "grad_norm": 3.4375,
      "learning_rate": 4.962680851837141e-05,
      "loss": 0.9855,
      "step": 91350
    },
    {
      "epoch": 0.320194022998174,
      "grad_norm": 2.84375,
      "learning_rate": 4.9626159489707705e-05,
      "loss": 1.0446,
      "step": 91360
    },
    {
      "epoch": 0.3202290705050696,
      "grad_norm": 2.671875,
      "learning_rate": 4.9625510461044006e-05,
      "loss": 0.9808,
      "step": 91370
    },
    {
      "epoch": 0.3202641180119652,
      "grad_norm": 3.21875,
      "learning_rate": 4.962486143238031e-05,
      "loss": 0.9471,
      "step": 91380
    },
    {
      "epoch": 0.32029916551886084,
      "grad_norm": 3.609375,
      "learning_rate": 4.96242124037166e-05,
      "loss": 0.9618,
      "step": 91390
    },
    {
      "epoch": 0.32033421302575643,
      "grad_norm": 3.1875,
      "learning_rate": 4.96235633750529e-05,
      "loss": 0.9486,
      "step": 91400
    },
    {
      "epoch": 0.320369260532652,
      "grad_norm": 2.84375,
      "learning_rate": 4.96229143463892e-05,
      "loss": 0.9779,
      "step": 91410
    },
    {
      "epoch": 0.3204043080395476,
      "grad_norm": 2.859375,
      "learning_rate": 4.96222653177255e-05,
      "loss": 0.9064,
      "step": 91420
    },
    {
      "epoch": 0.3204393555464432,
      "grad_norm": 2.921875,
      "learning_rate": 4.9621616289061794e-05,
      "loss": 1.0141,
      "step": 91430
    },
    {
      "epoch": 0.3204744030533388,
      "grad_norm": 2.765625,
      "learning_rate": 4.962096726039809e-05,
      "loss": 0.9736,
      "step": 91440
    },
    {
      "epoch": 0.3205094505602344,
      "grad_norm": 3.03125,
      "learning_rate": 4.962031823173438e-05,
      "loss": 0.97,
      "step": 91450
    },
    {
      "epoch": 0.32054449806713,
      "grad_norm": 2.96875,
      "learning_rate": 4.9619669203070685e-05,
      "loss": 0.8956,
      "step": 91460
    },
    {
      "epoch": 0.32057954557402557,
      "grad_norm": 3.140625,
      "learning_rate": 4.9619020174406986e-05,
      "loss": 0.9517,
      "step": 91470
    },
    {
      "epoch": 0.32061459308092116,
      "grad_norm": 3.0625,
      "learning_rate": 4.961837114574328e-05,
      "loss": 0.9755,
      "step": 91480
    },
    {
      "epoch": 0.3206496405878168,
      "grad_norm": 3.203125,
      "learning_rate": 4.961772211707958e-05,
      "loss": 0.9492,
      "step": 91490
    },
    {
      "epoch": 0.3206846880947124,
      "grad_norm": 3.03125,
      "learning_rate": 4.9617073088415877e-05,
      "loss": 0.9272,
      "step": 91500
    },
    {
      "epoch": 0.320719735601608,
      "grad_norm": 2.921875,
      "learning_rate": 4.961642405975218e-05,
      "loss": 0.9667,
      "step": 91510
    },
    {
      "epoch": 0.3207547831085036,
      "grad_norm": 3.265625,
      "learning_rate": 4.961577503108847e-05,
      "loss": 0.9547,
      "step": 91520
    },
    {
      "epoch": 0.3207898306153992,
      "grad_norm": 3.078125,
      "learning_rate": 4.9615126002424774e-05,
      "loss": 0.9754,
      "step": 91530
    },
    {
      "epoch": 0.32082487812229477,
      "grad_norm": 3.0625,
      "learning_rate": 4.961447697376107e-05,
      "loss": 0.9848,
      "step": 91540
    },
    {
      "epoch": 0.32085992562919036,
      "grad_norm": 3.390625,
      "learning_rate": 4.961382794509737e-05,
      "loss": 0.918,
      "step": 91550
    },
    {
      "epoch": 0.32089497313608595,
      "grad_norm": 3.5,
      "learning_rate": 4.9613178916433665e-05,
      "loss": 0.9812,
      "step": 91560
    },
    {
      "epoch": 0.32093002064298154,
      "grad_norm": 3.421875,
      "learning_rate": 4.9612529887769966e-05,
      "loss": 0.9848,
      "step": 91570
    },
    {
      "epoch": 0.32096506814987713,
      "grad_norm": 3.34375,
      "learning_rate": 4.961188085910626e-05,
      "loss": 1.0107,
      "step": 91580
    },
    {
      "epoch": 0.3210001156567728,
      "grad_norm": 2.8125,
      "learning_rate": 4.961123183044256e-05,
      "loss": 1.0006,
      "step": 91590
    },
    {
      "epoch": 0.32103516316366837,
      "grad_norm": 3.21875,
      "learning_rate": 4.9610582801778857e-05,
      "loss": 0.9702,
      "step": 91600
    },
    {
      "epoch": 0.32107021067056396,
      "grad_norm": 2.90625,
      "learning_rate": 4.960993377311516e-05,
      "loss": 0.9744,
      "step": 91610
    },
    {
      "epoch": 0.32110525817745955,
      "grad_norm": 3.375,
      "learning_rate": 4.960928474445146e-05,
      "loss": 0.9281,
      "step": 91620
    },
    {
      "epoch": 0.32114030568435514,
      "grad_norm": 3.375,
      "learning_rate": 4.9608635715787754e-05,
      "loss": 0.9298,
      "step": 91630
    },
    {
      "epoch": 0.32117535319125073,
      "grad_norm": 2.609375,
      "learning_rate": 4.9607986687124055e-05,
      "loss": 0.963,
      "step": 91640
    },
    {
      "epoch": 0.3212104006981463,
      "grad_norm": 2.734375,
      "learning_rate": 4.960733765846035e-05,
      "loss": 0.8912,
      "step": 91650
    },
    {
      "epoch": 0.3212454482050419,
      "grad_norm": 3.734375,
      "learning_rate": 4.960668862979665e-05,
      "loss": 1.0397,
      "step": 91660
    },
    {
      "epoch": 0.3212804957119375,
      "grad_norm": 3.359375,
      "learning_rate": 4.9606039601132946e-05,
      "loss": 1.0155,
      "step": 91670
    },
    {
      "epoch": 0.32131554321883316,
      "grad_norm": 3.65625,
      "learning_rate": 4.960539057246925e-05,
      "loss": 1.0034,
      "step": 91680
    },
    {
      "epoch": 0.32135059072572875,
      "grad_norm": 3.078125,
      "learning_rate": 4.960474154380554e-05,
      "loss": 1.0058,
      "step": 91690
    },
    {
      "epoch": 0.32138563823262434,
      "grad_norm": 3.15625,
      "learning_rate": 4.960409251514184e-05,
      "loss": 0.8979,
      "step": 91700
    },
    {
      "epoch": 0.32142068573951993,
      "grad_norm": 3.0,
      "learning_rate": 4.960344348647814e-05,
      "loss": 0.9279,
      "step": 91710
    },
    {
      "epoch": 0.3214557332464155,
      "grad_norm": 2.8125,
      "learning_rate": 4.960279445781444e-05,
      "loss": 0.9488,
      "step": 91720
    },
    {
      "epoch": 0.3214907807533111,
      "grad_norm": 3.0,
      "learning_rate": 4.9602145429150734e-05,
      "loss": 0.9685,
      "step": 91730
    },
    {
      "epoch": 0.3215258282602067,
      "grad_norm": 2.515625,
      "learning_rate": 4.9601496400487035e-05,
      "loss": 0.9043,
      "step": 91740
    },
    {
      "epoch": 0.3215608757671023,
      "grad_norm": 2.875,
      "learning_rate": 4.960084737182334e-05,
      "loss": 0.9125,
      "step": 91750
    },
    {
      "epoch": 0.3215959232739979,
      "grad_norm": 3.03125,
      "learning_rate": 4.960019834315963e-05,
      "loss": 0.944,
      "step": 91760
    },
    {
      "epoch": 0.3216309707808935,
      "grad_norm": 2.828125,
      "learning_rate": 4.959954931449593e-05,
      "loss": 0.9035,
      "step": 91770
    },
    {
      "epoch": 0.3216660182877891,
      "grad_norm": 2.90625,
      "learning_rate": 4.959890028583223e-05,
      "loss": 0.9613,
      "step": 91780
    },
    {
      "epoch": 0.3217010657946847,
      "grad_norm": 3.3125,
      "learning_rate": 4.959825125716853e-05,
      "loss": 1.0056,
      "step": 91790
    },
    {
      "epoch": 0.3217361133015803,
      "grad_norm": 3.046875,
      "learning_rate": 4.959760222850482e-05,
      "loss": 1.0007,
      "step": 91800
    },
    {
      "epoch": 0.3217711608084759,
      "grad_norm": 3.15625,
      "learning_rate": 4.959695319984112e-05,
      "loss": 1.0097,
      "step": 91810
    },
    {
      "epoch": 0.3218062083153715,
      "grad_norm": 3.109375,
      "learning_rate": 4.959630417117741e-05,
      "loss": 0.9648,
      "step": 91820
    },
    {
      "epoch": 0.3218412558222671,
      "grad_norm": 2.96875,
      "learning_rate": 4.9595655142513714e-05,
      "loss": 1.0081,
      "step": 91830
    },
    {
      "epoch": 0.32187630332916267,
      "grad_norm": 3.765625,
      "learning_rate": 4.9595006113850015e-05,
      "loss": 1.1172,
      "step": 91840
    },
    {
      "epoch": 0.32191135083605826,
      "grad_norm": 2.8125,
      "learning_rate": 4.959435708518631e-05,
      "loss": 0.9289,
      "step": 91850
    },
    {
      "epoch": 0.32194639834295385,
      "grad_norm": 3.125,
      "learning_rate": 4.959370805652261e-05,
      "loss": 0.949,
      "step": 91860
    },
    {
      "epoch": 0.32198144584984945,
      "grad_norm": 3.46875,
      "learning_rate": 4.9593059027858906e-05,
      "loss": 0.9439,
      "step": 91870
    },
    {
      "epoch": 0.3220164933567451,
      "grad_norm": 3.03125,
      "learning_rate": 4.959240999919521e-05,
      "loss": 0.9828,
      "step": 91880
    },
    {
      "epoch": 0.3220515408636407,
      "grad_norm": 3.265625,
      "learning_rate": 4.95917609705315e-05,
      "loss": 0.9375,
      "step": 91890
    },
    {
      "epoch": 0.3220865883705363,
      "grad_norm": 2.875,
      "learning_rate": 4.95911119418678e-05,
      "loss": 0.9576,
      "step": 91900
    },
    {
      "epoch": 0.32212163587743187,
      "grad_norm": 2.90625,
      "learning_rate": 4.95904629132041e-05,
      "loss": 0.9963,
      "step": 91910
    },
    {
      "epoch": 0.32215668338432746,
      "grad_norm": 3.109375,
      "learning_rate": 4.95898138845404e-05,
      "loss": 0.9373,
      "step": 91920
    },
    {
      "epoch": 0.32219173089122305,
      "grad_norm": 3.53125,
      "learning_rate": 4.9589164855876694e-05,
      "loss": 0.9538,
      "step": 91930
    },
    {
      "epoch": 0.32222677839811864,
      "grad_norm": 3.046875,
      "learning_rate": 4.9588515827212995e-05,
      "loss": 0.9134,
      "step": 91940
    },
    {
      "epoch": 0.32226182590501423,
      "grad_norm": 2.53125,
      "learning_rate": 4.958786679854929e-05,
      "loss": 0.9123,
      "step": 91950
    },
    {
      "epoch": 0.3222968734119098,
      "grad_norm": 3.125,
      "learning_rate": 4.958721776988559e-05,
      "loss": 0.9241,
      "step": 91960
    },
    {
      "epoch": 0.3223319209188054,
      "grad_norm": 3.5625,
      "learning_rate": 4.958656874122189e-05,
      "loss": 0.9229,
      "step": 91970
    },
    {
      "epoch": 0.32236696842570106,
      "grad_norm": 3.09375,
      "learning_rate": 4.958591971255819e-05,
      "loss": 0.8712,
      "step": 91980
    },
    {
      "epoch": 0.32240201593259665,
      "grad_norm": 3.09375,
      "learning_rate": 4.958527068389449e-05,
      "loss": 0.9142,
      "step": 91990
    },
    {
      "epoch": 0.32243706343949224,
      "grad_norm": 3.328125,
      "learning_rate": 4.958462165523078e-05,
      "loss": 1.0153,
      "step": 92000
    },
    {
      "epoch": 0.32247211094638784,
      "grad_norm": 3.359375,
      "learning_rate": 4.9583972626567085e-05,
      "loss": 1.0529,
      "step": 92010
    },
    {
      "epoch": 0.3225071584532834,
      "grad_norm": 3.421875,
      "learning_rate": 4.958332359790338e-05,
      "loss": 0.9559,
      "step": 92020
    },
    {
      "epoch": 0.322542205960179,
      "grad_norm": 2.96875,
      "learning_rate": 4.958267456923968e-05,
      "loss": 0.9232,
      "step": 92030
    },
    {
      "epoch": 0.3225772534670746,
      "grad_norm": 3.078125,
      "learning_rate": 4.9582025540575975e-05,
      "loss": 0.9853,
      "step": 92040
    },
    {
      "epoch": 0.3226123009739702,
      "grad_norm": 2.9375,
      "learning_rate": 4.958137651191228e-05,
      "loss": 0.9637,
      "step": 92050
    },
    {
      "epoch": 0.3226473484808658,
      "grad_norm": 2.875,
      "learning_rate": 4.958072748324857e-05,
      "loss": 0.9602,
      "step": 92060
    },
    {
      "epoch": 0.3226823959877614,
      "grad_norm": 3.109375,
      "learning_rate": 4.958007845458487e-05,
      "loss": 1.0915,
      "step": 92070
    },
    {
      "epoch": 0.32271744349465703,
      "grad_norm": 3.5625,
      "learning_rate": 4.957942942592117e-05,
      "loss": 0.9777,
      "step": 92080
    },
    {
      "epoch": 0.3227524910015526,
      "grad_norm": 2.578125,
      "learning_rate": 4.957878039725747e-05,
      "loss": 0.9092,
      "step": 92090
    },
    {
      "epoch": 0.3227875385084482,
      "grad_norm": 2.6875,
      "learning_rate": 4.957813136859376e-05,
      "loss": 1.0108,
      "step": 92100
    },
    {
      "epoch": 0.3228225860153438,
      "grad_norm": 3.171875,
      "learning_rate": 4.9577482339930065e-05,
      "loss": 0.9247,
      "step": 92110
    },
    {
      "epoch": 0.3228576335222394,
      "grad_norm": 3.203125,
      "learning_rate": 4.9576833311266366e-05,
      "loss": 0.9437,
      "step": 92120
    },
    {
      "epoch": 0.322892681029135,
      "grad_norm": 3.34375,
      "learning_rate": 4.957618428260266e-05,
      "loss": 1.0005,
      "step": 92130
    },
    {
      "epoch": 0.3229277285360306,
      "grad_norm": 3.15625,
      "learning_rate": 4.957553525393896e-05,
      "loss": 0.9552,
      "step": 92140
    },
    {
      "epoch": 0.32296277604292617,
      "grad_norm": 3.265625,
      "learning_rate": 4.957488622527526e-05,
      "loss": 1.0181,
      "step": 92150
    },
    {
      "epoch": 0.32299782354982176,
      "grad_norm": 2.765625,
      "learning_rate": 4.957423719661156e-05,
      "loss": 0.9399,
      "step": 92160
    },
    {
      "epoch": 0.3230328710567174,
      "grad_norm": 2.59375,
      "learning_rate": 4.957358816794785e-05,
      "loss": 0.8797,
      "step": 92170
    },
    {
      "epoch": 0.323067918563613,
      "grad_norm": 3.265625,
      "learning_rate": 4.9572939139284154e-05,
      "loss": 0.8945,
      "step": 92180
    },
    {
      "epoch": 0.3231029660705086,
      "grad_norm": 3.53125,
      "learning_rate": 4.957229011062044e-05,
      "loss": 0.9575,
      "step": 92190
    },
    {
      "epoch": 0.3231380135774042,
      "grad_norm": 3.171875,
      "learning_rate": 4.957164108195674e-05,
      "loss": 1.0038,
      "step": 92200
    },
    {
      "epoch": 0.3231730610842998,
      "grad_norm": 3.03125,
      "learning_rate": 4.9570992053293045e-05,
      "loss": 0.9574,
      "step": 92210
    },
    {
      "epoch": 0.32320810859119536,
      "grad_norm": 3.21875,
      "learning_rate": 4.957034302462934e-05,
      "loss": 0.9977,
      "step": 92220
    },
    {
      "epoch": 0.32324315609809096,
      "grad_norm": 3.359375,
      "learning_rate": 4.956969399596564e-05,
      "loss": 1.0578,
      "step": 92230
    },
    {
      "epoch": 0.32327820360498655,
      "grad_norm": 3.703125,
      "learning_rate": 4.9569044967301935e-05,
      "loss": 1.1016,
      "step": 92240
    },
    {
      "epoch": 0.32331325111188214,
      "grad_norm": 3.1875,
      "learning_rate": 4.956839593863824e-05,
      "loss": 0.9197,
      "step": 92250
    },
    {
      "epoch": 0.32334829861877773,
      "grad_norm": 3.046875,
      "learning_rate": 4.956774690997453e-05,
      "loss": 0.9404,
      "step": 92260
    },
    {
      "epoch": 0.3233833461256734,
      "grad_norm": 3.078125,
      "learning_rate": 4.956709788131083e-05,
      "loss": 1.0774,
      "step": 92270
    },
    {
      "epoch": 0.32341839363256897,
      "grad_norm": 2.78125,
      "learning_rate": 4.956644885264713e-05,
      "loss": 0.9234,
      "step": 92280
    },
    {
      "epoch": 0.32345344113946456,
      "grad_norm": 3.5,
      "learning_rate": 4.956579982398343e-05,
      "loss": 0.9831,
      "step": 92290
    },
    {
      "epoch": 0.32348848864636015,
      "grad_norm": 3.28125,
      "learning_rate": 4.956515079531972e-05,
      "loss": 0.9197,
      "step": 92300
    },
    {
      "epoch": 0.32352353615325574,
      "grad_norm": 3.671875,
      "learning_rate": 4.9564501766656025e-05,
      "loss": 0.9868,
      "step": 92310
    },
    {
      "epoch": 0.32355858366015133,
      "grad_norm": 2.796875,
      "learning_rate": 4.956385273799232e-05,
      "loss": 0.9852,
      "step": 92320
    },
    {
      "epoch": 0.3235936311670469,
      "grad_norm": 3.203125,
      "learning_rate": 4.956320370932862e-05,
      "loss": 0.9712,
      "step": 92330
    },
    {
      "epoch": 0.3236286786739425,
      "grad_norm": 3.25,
      "learning_rate": 4.956255468066492e-05,
      "loss": 0.9574,
      "step": 92340
    },
    {
      "epoch": 0.3236637261808381,
      "grad_norm": 3.203125,
      "learning_rate": 4.956190565200122e-05,
      "loss": 0.9079,
      "step": 92350
    },
    {
      "epoch": 0.3236987736877337,
      "grad_norm": 3.40625,
      "learning_rate": 4.956125662333752e-05,
      "loss": 0.915,
      "step": 92360
    },
    {
      "epoch": 0.32373382119462935,
      "grad_norm": 2.9375,
      "learning_rate": 4.956060759467381e-05,
      "loss": 0.8962,
      "step": 92370
    },
    {
      "epoch": 0.32376886870152494,
      "grad_norm": 3.203125,
      "learning_rate": 4.9559958566010114e-05,
      "loss": 1.0523,
      "step": 92380
    },
    {
      "epoch": 0.32380391620842053,
      "grad_norm": 3.28125,
      "learning_rate": 4.955930953734641e-05,
      "loss": 1.0297,
      "step": 92390
    },
    {
      "epoch": 0.3238389637153161,
      "grad_norm": 3.078125,
      "learning_rate": 4.955866050868271e-05,
      "loss": 0.956,
      "step": 92400
    },
    {
      "epoch": 0.3238740112222117,
      "grad_norm": 3.59375,
      "learning_rate": 4.9558011480019005e-05,
      "loss": 1.0085,
      "step": 92410
    },
    {
      "epoch": 0.3239090587291073,
      "grad_norm": 2.515625,
      "learning_rate": 4.9557362451355306e-05,
      "loss": 0.9651,
      "step": 92420
    },
    {
      "epoch": 0.3239441062360029,
      "grad_norm": 3.265625,
      "learning_rate": 4.95567134226916e-05,
      "loss": 1.0206,
      "step": 92430
    },
    {
      "epoch": 0.3239791537428985,
      "grad_norm": 2.546875,
      "learning_rate": 4.95560643940279e-05,
      "loss": 1.0185,
      "step": 92440
    },
    {
      "epoch": 0.3240142012497941,
      "grad_norm": 3.328125,
      "learning_rate": 4.95554153653642e-05,
      "loss": 1.0423,
      "step": 92450
    },
    {
      "epoch": 0.32404924875668967,
      "grad_norm": 3.09375,
      "learning_rate": 4.95547663367005e-05,
      "loss": 0.9553,
      "step": 92460
    },
    {
      "epoch": 0.3240842962635853,
      "grad_norm": 3.421875,
      "learning_rate": 4.955411730803679e-05,
      "loss": 1.0034,
      "step": 92470
    },
    {
      "epoch": 0.3241193437704809,
      "grad_norm": 3.375,
      "learning_rate": 4.9553468279373094e-05,
      "loss": 0.9279,
      "step": 92480
    },
    {
      "epoch": 0.3241543912773765,
      "grad_norm": 3.28125,
      "learning_rate": 4.9552819250709395e-05,
      "loss": 0.992,
      "step": 92490
    },
    {
      "epoch": 0.3241894387842721,
      "grad_norm": 2.890625,
      "learning_rate": 4.955217022204569e-05,
      "loss": 1.0169,
      "step": 92500
    },
    {
      "epoch": 0.3242244862911677,
      "grad_norm": 3.46875,
      "learning_rate": 4.955152119338199e-05,
      "loss": 0.8882,
      "step": 92510
    },
    {
      "epoch": 0.32425953379806327,
      "grad_norm": 3.15625,
      "learning_rate": 4.9550872164718286e-05,
      "loss": 0.9323,
      "step": 92520
    },
    {
      "epoch": 0.32429458130495886,
      "grad_norm": 3.84375,
      "learning_rate": 4.955022313605459e-05,
      "loss": 0.9852,
      "step": 92530
    },
    {
      "epoch": 0.32432962881185445,
      "grad_norm": 2.71875,
      "learning_rate": 4.954957410739088e-05,
      "loss": 0.964,
      "step": 92540
    },
    {
      "epoch": 0.32436467631875004,
      "grad_norm": 2.875,
      "learning_rate": 4.9548925078727183e-05,
      "loss": 0.9687,
      "step": 92550
    },
    {
      "epoch": 0.32439972382564564,
      "grad_norm": 2.859375,
      "learning_rate": 4.954827605006347e-05,
      "loss": 0.9305,
      "step": 92560
    },
    {
      "epoch": 0.3244347713325413,
      "grad_norm": 3.015625,
      "learning_rate": 4.954762702139977e-05,
      "loss": 1.0745,
      "step": 92570
    },
    {
      "epoch": 0.3244698188394369,
      "grad_norm": 3.390625,
      "learning_rate": 4.9546977992736074e-05,
      "loss": 0.8981,
      "step": 92580
    },
    {
      "epoch": 0.32450486634633247,
      "grad_norm": 3.125,
      "learning_rate": 4.954632896407237e-05,
      "loss": 0.9089,
      "step": 92590
    },
    {
      "epoch": 0.32453991385322806,
      "grad_norm": 2.8125,
      "learning_rate": 4.954567993540867e-05,
      "loss": 0.905,
      "step": 92600
    },
    {
      "epoch": 0.32457496136012365,
      "grad_norm": 2.40625,
      "learning_rate": 4.9545030906744965e-05,
      "loss": 0.9479,
      "step": 92610
    },
    {
      "epoch": 0.32461000886701924,
      "grad_norm": 3.359375,
      "learning_rate": 4.9544381878081266e-05,
      "loss": 1.0333,
      "step": 92620
    },
    {
      "epoch": 0.32464505637391483,
      "grad_norm": 2.625,
      "learning_rate": 4.954373284941756e-05,
      "loss": 0.9982,
      "step": 92630
    },
    {
      "epoch": 0.3246801038808104,
      "grad_norm": 3.046875,
      "learning_rate": 4.954308382075386e-05,
      "loss": 0.8894,
      "step": 92640
    },
    {
      "epoch": 0.324715151387706,
      "grad_norm": 3.5,
      "learning_rate": 4.954243479209016e-05,
      "loss": 0.9443,
      "step": 92650
    },
    {
      "epoch": 0.3247501988946016,
      "grad_norm": 3.09375,
      "learning_rate": 4.954178576342646e-05,
      "loss": 0.9597,
      "step": 92660
    },
    {
      "epoch": 0.32478524640149725,
      "grad_norm": 2.796875,
      "learning_rate": 4.954113673476275e-05,
      "loss": 1.0206,
      "step": 92670
    },
    {
      "epoch": 0.32482029390839284,
      "grad_norm": 2.859375,
      "learning_rate": 4.9540487706099054e-05,
      "loss": 0.929,
      "step": 92680
    },
    {
      "epoch": 0.32485534141528843,
      "grad_norm": 3.125,
      "learning_rate": 4.953983867743535e-05,
      "loss": 0.948,
      "step": 92690
    },
    {
      "epoch": 0.324890388922184,
      "grad_norm": 3.203125,
      "learning_rate": 4.953918964877165e-05,
      "loss": 1.0002,
      "step": 92700
    },
    {
      "epoch": 0.3249254364290796,
      "grad_norm": 2.9375,
      "learning_rate": 4.953854062010795e-05,
      "loss": 0.9572,
      "step": 92710
    },
    {
      "epoch": 0.3249604839359752,
      "grad_norm": 2.578125,
      "learning_rate": 4.9537891591444246e-05,
      "loss": 0.93,
      "step": 92720
    },
    {
      "epoch": 0.3249955314428708,
      "grad_norm": 3.484375,
      "learning_rate": 4.953724256278055e-05,
      "loss": 0.9872,
      "step": 92730
    },
    {
      "epoch": 0.3250305789497664,
      "grad_norm": 3.0,
      "learning_rate": 4.953659353411684e-05,
      "loss": 0.9631,
      "step": 92740
    },
    {
      "epoch": 0.325065626456662,
      "grad_norm": 2.765625,
      "learning_rate": 4.9535944505453143e-05,
      "loss": 1.0654,
      "step": 92750
    },
    {
      "epoch": 0.32510067396355763,
      "grad_norm": 2.921875,
      "learning_rate": 4.953529547678944e-05,
      "loss": 0.9883,
      "step": 92760
    },
    {
      "epoch": 0.3251357214704532,
      "grad_norm": 3.4375,
      "learning_rate": 4.953464644812574e-05,
      "loss": 0.965,
      "step": 92770
    },
    {
      "epoch": 0.3251707689773488,
      "grad_norm": 2.859375,
      "learning_rate": 4.9533997419462034e-05,
      "loss": 0.9964,
      "step": 92780
    },
    {
      "epoch": 0.3252058164842444,
      "grad_norm": 2.8125,
      "learning_rate": 4.9533348390798335e-05,
      "loss": 0.9237,
      "step": 92790
    },
    {
      "epoch": 0.32524086399114,
      "grad_norm": 3.171875,
      "learning_rate": 4.953269936213463e-05,
      "loss": 1.0105,
      "step": 92800
    },
    {
      "epoch": 0.3252759114980356,
      "grad_norm": 3.171875,
      "learning_rate": 4.953205033347093e-05,
      "loss": 0.9843,
      "step": 92810
    },
    {
      "epoch": 0.3253109590049312,
      "grad_norm": 3.5625,
      "learning_rate": 4.9531401304807226e-05,
      "loss": 1.0131,
      "step": 92820
    },
    {
      "epoch": 0.32534600651182677,
      "grad_norm": 3.078125,
      "learning_rate": 4.953075227614353e-05,
      "loss": 0.9899,
      "step": 92830
    },
    {
      "epoch": 0.32538105401872236,
      "grad_norm": 2.828125,
      "learning_rate": 4.953010324747982e-05,
      "loss": 1.0383,
      "step": 92840
    },
    {
      "epoch": 0.32541610152561795,
      "grad_norm": 2.8125,
      "learning_rate": 4.9529454218816123e-05,
      "loss": 0.8838,
      "step": 92850
    },
    {
      "epoch": 0.3254511490325136,
      "grad_norm": 3.265625,
      "learning_rate": 4.9528805190152425e-05,
      "loss": 0.8889,
      "step": 92860
    },
    {
      "epoch": 0.3254861965394092,
      "grad_norm": 2.65625,
      "learning_rate": 4.952815616148872e-05,
      "loss": 1.0035,
      "step": 92870
    },
    {
      "epoch": 0.3255212440463048,
      "grad_norm": 3.234375,
      "learning_rate": 4.952750713282502e-05,
      "loss": 0.9733,
      "step": 92880
    },
    {
      "epoch": 0.32555629155320037,
      "grad_norm": 2.8125,
      "learning_rate": 4.9526858104161315e-05,
      "loss": 0.8527,
      "step": 92890
    },
    {
      "epoch": 0.32559133906009596,
      "grad_norm": 3.328125,
      "learning_rate": 4.952620907549762e-05,
      "loss": 0.931,
      "step": 92900
    },
    {
      "epoch": 0.32562638656699155,
      "grad_norm": 3.1875,
      "learning_rate": 4.952556004683391e-05,
      "loss": 0.9568,
      "step": 92910
    },
    {
      "epoch": 0.32566143407388715,
      "grad_norm": 3.0,
      "learning_rate": 4.952491101817021e-05,
      "loss": 0.9689,
      "step": 92920
    },
    {
      "epoch": 0.32569648158078274,
      "grad_norm": 3.5,
      "learning_rate": 4.952426198950651e-05,
      "loss": 0.9723,
      "step": 92930
    },
    {
      "epoch": 0.32573152908767833,
      "grad_norm": 2.9375,
      "learning_rate": 4.95236129608428e-05,
      "loss": 0.9804,
      "step": 92940
    },
    {
      "epoch": 0.3257665765945739,
      "grad_norm": 3.28125,
      "learning_rate": 4.9522963932179103e-05,
      "loss": 0.955,
      "step": 92950
    },
    {
      "epoch": 0.32580162410146957,
      "grad_norm": 3.046875,
      "learning_rate": 4.95223149035154e-05,
      "loss": 0.9502,
      "step": 92960
    },
    {
      "epoch": 0.32583667160836516,
      "grad_norm": 2.9375,
      "learning_rate": 4.95216658748517e-05,
      "loss": 0.8922,
      "step": 92970
    },
    {
      "epoch": 0.32587171911526075,
      "grad_norm": 2.875,
      "learning_rate": 4.9521016846187994e-05,
      "loss": 0.946,
      "step": 92980
    },
    {
      "epoch": 0.32590676662215634,
      "grad_norm": 2.8125,
      "learning_rate": 4.9520367817524295e-05,
      "loss": 0.9765,
      "step": 92990
    },
    {
      "epoch": 0.32594181412905193,
      "grad_norm": 3.09375,
      "learning_rate": 4.951971878886059e-05,
      "loss": 1.0467,
      "step": 93000
    },
    {
      "epoch": 0.3259768616359475,
      "grad_norm": 2.890625,
      "learning_rate": 4.951906976019689e-05,
      "loss": 0.9105,
      "step": 93010
    },
    {
      "epoch": 0.3260119091428431,
      "grad_norm": 3.203125,
      "learning_rate": 4.9518420731533186e-05,
      "loss": 0.9986,
      "step": 93020
    },
    {
      "epoch": 0.3260469566497387,
      "grad_norm": 3.234375,
      "learning_rate": 4.951777170286949e-05,
      "loss": 0.9988,
      "step": 93030
    },
    {
      "epoch": 0.3260820041566343,
      "grad_norm": 3.140625,
      "learning_rate": 4.951712267420578e-05,
      "loss": 0.8921,
      "step": 93040
    },
    {
      "epoch": 0.3261170516635299,
      "grad_norm": 2.984375,
      "learning_rate": 4.9516473645542083e-05,
      "loss": 0.9656,
      "step": 93050
    },
    {
      "epoch": 0.32615209917042554,
      "grad_norm": 2.84375,
      "learning_rate": 4.951582461687838e-05,
      "loss": 0.9499,
      "step": 93060
    },
    {
      "epoch": 0.3261871466773211,
      "grad_norm": 2.9375,
      "learning_rate": 4.951517558821468e-05,
      "loss": 0.9722,
      "step": 93070
    },
    {
      "epoch": 0.3262221941842167,
      "grad_norm": 3.125,
      "learning_rate": 4.951452655955098e-05,
      "loss": 0.9939,
      "step": 93080
    },
    {
      "epoch": 0.3262572416911123,
      "grad_norm": 2.84375,
      "learning_rate": 4.9513877530887275e-05,
      "loss": 0.9781,
      "step": 93090
    },
    {
      "epoch": 0.3262922891980079,
      "grad_norm": 3.21875,
      "learning_rate": 4.951322850222358e-05,
      "loss": 0.9773,
      "step": 93100
    },
    {
      "epoch": 0.3263273367049035,
      "grad_norm": 3.25,
      "learning_rate": 4.951257947355987e-05,
      "loss": 0.9637,
      "step": 93110
    },
    {
      "epoch": 0.3263623842117991,
      "grad_norm": 3.25,
      "learning_rate": 4.951193044489617e-05,
      "loss": 0.9262,
      "step": 93120
    },
    {
      "epoch": 0.3263974317186947,
      "grad_norm": 3.0,
      "learning_rate": 4.951128141623247e-05,
      "loss": 0.8885,
      "step": 93130
    },
    {
      "epoch": 0.32643247922559027,
      "grad_norm": 3.09375,
      "learning_rate": 4.951063238756877e-05,
      "loss": 0.9517,
      "step": 93140
    },
    {
      "epoch": 0.32646752673248586,
      "grad_norm": 3.359375,
      "learning_rate": 4.9509983358905063e-05,
      "loss": 0.9304,
      "step": 93150
    },
    {
      "epoch": 0.3265025742393815,
      "grad_norm": 3.234375,
      "learning_rate": 4.9509334330241365e-05,
      "loss": 1.0166,
      "step": 93160
    },
    {
      "epoch": 0.3265376217462771,
      "grad_norm": 2.875,
      "learning_rate": 4.950868530157766e-05,
      "loss": 0.9824,
      "step": 93170
    },
    {
      "epoch": 0.3265726692531727,
      "grad_norm": 3.171875,
      "learning_rate": 4.950803627291396e-05,
      "loss": 1.0129,
      "step": 93180
    },
    {
      "epoch": 0.3266077167600683,
      "grad_norm": 2.890625,
      "learning_rate": 4.9507387244250255e-05,
      "loss": 0.9504,
      "step": 93190
    },
    {
      "epoch": 0.32664276426696387,
      "grad_norm": 3.40625,
      "learning_rate": 4.950673821558656e-05,
      "loss": 0.959,
      "step": 93200
    },
    {
      "epoch": 0.32667781177385946,
      "grad_norm": 3.09375,
      "learning_rate": 4.950608918692286e-05,
      "loss": 0.9536,
      "step": 93210
    },
    {
      "epoch": 0.32671285928075505,
      "grad_norm": 3.03125,
      "learning_rate": 4.950544015825915e-05,
      "loss": 0.9492,
      "step": 93220
    },
    {
      "epoch": 0.32674790678765064,
      "grad_norm": 3.234375,
      "learning_rate": 4.9504791129595454e-05,
      "loss": 0.9454,
      "step": 93230
    },
    {
      "epoch": 0.32678295429454624,
      "grad_norm": 3.234375,
      "learning_rate": 4.950414210093175e-05,
      "loss": 0.9778,
      "step": 93240
    },
    {
      "epoch": 0.3268180018014419,
      "grad_norm": 4.03125,
      "learning_rate": 4.950349307226805e-05,
      "loss": 0.9849,
      "step": 93250
    },
    {
      "epoch": 0.3268530493083375,
      "grad_norm": 3.25,
      "learning_rate": 4.9502844043604345e-05,
      "loss": 1.0302,
      "step": 93260
    },
    {
      "epoch": 0.32688809681523306,
      "grad_norm": 3.03125,
      "learning_rate": 4.9502195014940646e-05,
      "loss": 0.9729,
      "step": 93270
    },
    {
      "epoch": 0.32692314432212866,
      "grad_norm": 3.09375,
      "learning_rate": 4.950154598627694e-05,
      "loss": 0.9996,
      "step": 93280
    },
    {
      "epoch": 0.32695819182902425,
      "grad_norm": 3.5625,
      "learning_rate": 4.950089695761324e-05,
      "loss": 0.981,
      "step": 93290
    },
    {
      "epoch": 0.32699323933591984,
      "grad_norm": 3.015625,
      "learning_rate": 4.950024792894954e-05,
      "loss": 0.9967,
      "step": 93300
    },
    {
      "epoch": 0.32702828684281543,
      "grad_norm": 3.09375,
      "learning_rate": 4.949959890028584e-05,
      "loss": 0.9586,
      "step": 93310
    },
    {
      "epoch": 0.327063334349711,
      "grad_norm": 3.15625,
      "learning_rate": 4.949894987162213e-05,
      "loss": 0.9816,
      "step": 93320
    },
    {
      "epoch": 0.3270983818566066,
      "grad_norm": 3.8125,
      "learning_rate": 4.949830084295843e-05,
      "loss": 0.9631,
      "step": 93330
    },
    {
      "epoch": 0.3271334293635022,
      "grad_norm": 3.0,
      "learning_rate": 4.949765181429473e-05,
      "loss": 1.0022,
      "step": 93340
    },
    {
      "epoch": 0.32716847687039785,
      "grad_norm": 3.171875,
      "learning_rate": 4.9497002785631023e-05,
      "loss": 0.9589,
      "step": 93350
    },
    {
      "epoch": 0.32720352437729344,
      "grad_norm": 3.578125,
      "learning_rate": 4.9496353756967325e-05,
      "loss": 0.9397,
      "step": 93360
    },
    {
      "epoch": 0.32723857188418903,
      "grad_norm": 3.09375,
      "learning_rate": 4.949570472830362e-05,
      "loss": 0.9568,
      "step": 93370
    },
    {
      "epoch": 0.3272736193910846,
      "grad_norm": 2.9375,
      "learning_rate": 4.949505569963992e-05,
      "loss": 0.9307,
      "step": 93380
    },
    {
      "epoch": 0.3273086668979802,
      "grad_norm": 2.734375,
      "learning_rate": 4.9494406670976215e-05,
      "loss": 0.9705,
      "step": 93390
    },
    {
      "epoch": 0.3273437144048758,
      "grad_norm": 3.234375,
      "learning_rate": 4.949375764231252e-05,
      "loss": 0.9737,
      "step": 93400
    },
    {
      "epoch": 0.3273787619117714,
      "grad_norm": 3.21875,
      "learning_rate": 4.949310861364881e-05,
      "loss": 0.9422,
      "step": 93410
    },
    {
      "epoch": 0.327413809418667,
      "grad_norm": 3.46875,
      "learning_rate": 4.949245958498511e-05,
      "loss": 1.0186,
      "step": 93420
    },
    {
      "epoch": 0.3274488569255626,
      "grad_norm": 3.03125,
      "learning_rate": 4.949181055632141e-05,
      "loss": 0.9755,
      "step": 93430
    },
    {
      "epoch": 0.3274839044324582,
      "grad_norm": 2.9375,
      "learning_rate": 4.949116152765771e-05,
      "loss": 1.0257,
      "step": 93440
    },
    {
      "epoch": 0.3275189519393538,
      "grad_norm": 3.5625,
      "learning_rate": 4.949051249899401e-05,
      "loss": 0.9744,
      "step": 93450
    },
    {
      "epoch": 0.3275539994462494,
      "grad_norm": 3.28125,
      "learning_rate": 4.9489863470330305e-05,
      "loss": 0.9432,
      "step": 93460
    },
    {
      "epoch": 0.327589046953145,
      "grad_norm": 3.03125,
      "learning_rate": 4.9489214441666606e-05,
      "loss": 0.9473,
      "step": 93470
    },
    {
      "epoch": 0.3276240944600406,
      "grad_norm": 3.09375,
      "learning_rate": 4.94885654130029e-05,
      "loss": 0.9793,
      "step": 93480
    },
    {
      "epoch": 0.3276591419669362,
      "grad_norm": 3.0,
      "learning_rate": 4.94879163843392e-05,
      "loss": 1.0014,
      "step": 93490
    },
    {
      "epoch": 0.3276941894738318,
      "grad_norm": 3.46875,
      "learning_rate": 4.94872673556755e-05,
      "loss": 0.9032,
      "step": 93500
    },
    {
      "epoch": 0.32772923698072737,
      "grad_norm": 3.265625,
      "learning_rate": 4.94866183270118e-05,
      "loss": 0.9955,
      "step": 93510
    },
    {
      "epoch": 0.32776428448762296,
      "grad_norm": 3.25,
      "learning_rate": 4.948596929834809e-05,
      "loss": 1.0052,
      "step": 93520
    },
    {
      "epoch": 0.32779933199451855,
      "grad_norm": 3.046875,
      "learning_rate": 4.9485320269684394e-05,
      "loss": 0.9903,
      "step": 93530
    },
    {
      "epoch": 0.32783437950141414,
      "grad_norm": 3.125,
      "learning_rate": 4.948467124102069e-05,
      "loss": 0.9672,
      "step": 93540
    },
    {
      "epoch": 0.3278694270083098,
      "grad_norm": 2.703125,
      "learning_rate": 4.948402221235699e-05,
      "loss": 0.9217,
      "step": 93550
    },
    {
      "epoch": 0.3279044745152054,
      "grad_norm": 3.390625,
      "learning_rate": 4.9483373183693285e-05,
      "loss": 0.9431,
      "step": 93560
    },
    {
      "epoch": 0.32793952202210097,
      "grad_norm": 2.671875,
      "learning_rate": 4.9482724155029586e-05,
      "loss": 0.9877,
      "step": 93570
    },
    {
      "epoch": 0.32797456952899656,
      "grad_norm": 3.1875,
      "learning_rate": 4.948207512636589e-05,
      "loss": 0.9486,
      "step": 93580
    },
    {
      "epoch": 0.32800961703589215,
      "grad_norm": 2.9375,
      "learning_rate": 4.948142609770218e-05,
      "loss": 1.0183,
      "step": 93590
    },
    {
      "epoch": 0.32804466454278775,
      "grad_norm": 2.75,
      "learning_rate": 4.9480777069038484e-05,
      "loss": 1.0264,
      "step": 93600
    },
    {
      "epoch": 0.32807971204968334,
      "grad_norm": 2.765625,
      "learning_rate": 4.948012804037478e-05,
      "loss": 0.9283,
      "step": 93610
    },
    {
      "epoch": 0.3281147595565789,
      "grad_norm": 2.890625,
      "learning_rate": 4.947947901171108e-05,
      "loss": 0.9951,
      "step": 93620
    },
    {
      "epoch": 0.3281498070634745,
      "grad_norm": 3.3125,
      "learning_rate": 4.9478829983047374e-05,
      "loss": 0.9298,
      "step": 93630
    },
    {
      "epoch": 0.3281848545703701,
      "grad_norm": 3.328125,
      "learning_rate": 4.9478180954383676e-05,
      "loss": 0.9951,
      "step": 93640
    },
    {
      "epoch": 0.32821990207726576,
      "grad_norm": 3.015625,
      "learning_rate": 4.947753192571997e-05,
      "loss": 1.0254,
      "step": 93650
    },
    {
      "epoch": 0.32825494958416135,
      "grad_norm": 2.71875,
      "learning_rate": 4.947688289705627e-05,
      "loss": 1.0196,
      "step": 93660
    },
    {
      "epoch": 0.32828999709105694,
      "grad_norm": 2.75,
      "learning_rate": 4.9476233868392566e-05,
      "loss": 0.891,
      "step": 93670
    },
    {
      "epoch": 0.32832504459795253,
      "grad_norm": 2.5625,
      "learning_rate": 4.947558483972887e-05,
      "loss": 0.9351,
      "step": 93680
    },
    {
      "epoch": 0.3283600921048481,
      "grad_norm": 2.953125,
      "learning_rate": 4.947493581106516e-05,
      "loss": 0.9458,
      "step": 93690
    },
    {
      "epoch": 0.3283951396117437,
      "grad_norm": 2.90625,
      "learning_rate": 4.947428678240146e-05,
      "loss": 1.0021,
      "step": 93700
    },
    {
      "epoch": 0.3284301871186393,
      "grad_norm": 2.984375,
      "learning_rate": 4.947363775373776e-05,
      "loss": 0.9277,
      "step": 93710
    },
    {
      "epoch": 0.3284652346255349,
      "grad_norm": 3.140625,
      "learning_rate": 4.947298872507405e-05,
      "loss": 0.9137,
      "step": 93720
    },
    {
      "epoch": 0.3285002821324305,
      "grad_norm": 3.4375,
      "learning_rate": 4.9472339696410354e-05,
      "loss": 0.9632,
      "step": 93730
    },
    {
      "epoch": 0.3285353296393261,
      "grad_norm": 2.484375,
      "learning_rate": 4.947169066774665e-05,
      "loss": 0.898,
      "step": 93740
    },
    {
      "epoch": 0.3285703771462217,
      "grad_norm": 3.125,
      "learning_rate": 4.947104163908295e-05,
      "loss": 0.9334,
      "step": 93750
    },
    {
      "epoch": 0.3286054246531173,
      "grad_norm": 2.859375,
      "learning_rate": 4.9470392610419245e-05,
      "loss": 0.9352,
      "step": 93760
    },
    {
      "epoch": 0.3286404721600129,
      "grad_norm": 2.796875,
      "learning_rate": 4.9469743581755546e-05,
      "loss": 0.9278,
      "step": 93770
    },
    {
      "epoch": 0.3286755196669085,
      "grad_norm": 3.234375,
      "learning_rate": 4.946909455309184e-05,
      "loss": 0.9454,
      "step": 93780
    },
    {
      "epoch": 0.3287105671738041,
      "grad_norm": 3.4375,
      "learning_rate": 4.946844552442814e-05,
      "loss": 1.046,
      "step": 93790
    },
    {
      "epoch": 0.3287456146806997,
      "grad_norm": 3.328125,
      "learning_rate": 4.946779649576444e-05,
      "loss": 0.9875,
      "step": 93800
    },
    {
      "epoch": 0.3287806621875953,
      "grad_norm": 3.328125,
      "learning_rate": 4.946714746710074e-05,
      "loss": 0.9604,
      "step": 93810
    },
    {
      "epoch": 0.32881570969449087,
      "grad_norm": 2.921875,
      "learning_rate": 4.946649843843704e-05,
      "loss": 0.928,
      "step": 93820
    },
    {
      "epoch": 0.32885075720138646,
      "grad_norm": 3.0,
      "learning_rate": 4.9465849409773334e-05,
      "loss": 1.0231,
      "step": 93830
    },
    {
      "epoch": 0.3288858047082821,
      "grad_norm": 3.375,
      "learning_rate": 4.9465200381109636e-05,
      "loss": 1.0213,
      "step": 93840
    },
    {
      "epoch": 0.3289208522151777,
      "grad_norm": 3.375,
      "learning_rate": 4.946455135244593e-05,
      "loss": 0.9531,
      "step": 93850
    },
    {
      "epoch": 0.3289558997220733,
      "grad_norm": 3.1875,
      "learning_rate": 4.946390232378223e-05,
      "loss": 1.0587,
      "step": 93860
    },
    {
      "epoch": 0.3289909472289689,
      "grad_norm": 3.25,
      "learning_rate": 4.9463253295118526e-05,
      "loss": 1.0607,
      "step": 93870
    },
    {
      "epoch": 0.32902599473586447,
      "grad_norm": 3.203125,
      "learning_rate": 4.946260426645483e-05,
      "loss": 0.957,
      "step": 93880
    },
    {
      "epoch": 0.32906104224276006,
      "grad_norm": 2.796875,
      "learning_rate": 4.946195523779112e-05,
      "loss": 0.9482,
      "step": 93890
    },
    {
      "epoch": 0.32909608974965565,
      "grad_norm": 3.21875,
      "learning_rate": 4.9461306209127424e-05,
      "loss": 0.9574,
      "step": 93900
    },
    {
      "epoch": 0.32913113725655124,
      "grad_norm": 2.96875,
      "learning_rate": 4.946065718046372e-05,
      "loss": 0.9473,
      "step": 93910
    },
    {
      "epoch": 0.32916618476344683,
      "grad_norm": 2.90625,
      "learning_rate": 4.946000815180002e-05,
      "loss": 0.9296,
      "step": 93920
    },
    {
      "epoch": 0.3292012322703424,
      "grad_norm": 2.75,
      "learning_rate": 4.9459359123136314e-05,
      "loss": 0.9668,
      "step": 93930
    },
    {
      "epoch": 0.3292362797772381,
      "grad_norm": 3.0625,
      "learning_rate": 4.9458710094472616e-05,
      "loss": 0.9682,
      "step": 93940
    },
    {
      "epoch": 0.32927132728413366,
      "grad_norm": 3.453125,
      "learning_rate": 4.945806106580892e-05,
      "loss": 1.0539,
      "step": 93950
    },
    {
      "epoch": 0.32930637479102925,
      "grad_norm": 3.1875,
      "learning_rate": 4.945741203714521e-05,
      "loss": 0.974,
      "step": 93960
    },
    {
      "epoch": 0.32934142229792485,
      "grad_norm": 3.34375,
      "learning_rate": 4.945676300848151e-05,
      "loss": 0.9957,
      "step": 93970
    },
    {
      "epoch": 0.32937646980482044,
      "grad_norm": 3.3125,
      "learning_rate": 4.945611397981781e-05,
      "loss": 0.9106,
      "step": 93980
    },
    {
      "epoch": 0.32941151731171603,
      "grad_norm": 3.296875,
      "learning_rate": 4.945546495115411e-05,
      "loss": 0.99,
      "step": 93990
    },
    {
      "epoch": 0.3294465648186116,
      "grad_norm": 3.796875,
      "learning_rate": 4.9454815922490404e-05,
      "loss": 1.0482,
      "step": 94000
    },
    {
      "epoch": 0.3294816123255072,
      "grad_norm": 3.359375,
      "learning_rate": 4.9454166893826705e-05,
      "loss": 1.0273,
      "step": 94010
    },
    {
      "epoch": 0.3295166598324028,
      "grad_norm": 3.125,
      "learning_rate": 4.9453517865163e-05,
      "loss": 0.9612,
      "step": 94020
    },
    {
      "epoch": 0.3295517073392984,
      "grad_norm": 3.671875,
      "learning_rate": 4.94528688364993e-05,
      "loss": 0.9898,
      "step": 94030
    },
    {
      "epoch": 0.32958675484619404,
      "grad_norm": 2.953125,
      "learning_rate": 4.9452219807835596e-05,
      "loss": 0.927,
      "step": 94040
    },
    {
      "epoch": 0.32962180235308963,
      "grad_norm": 3.53125,
      "learning_rate": 4.94515707791719e-05,
      "loss": 0.9793,
      "step": 94050
    },
    {
      "epoch": 0.3296568498599852,
      "grad_norm": 3.484375,
      "learning_rate": 4.945092175050819e-05,
      "loss": 0.9414,
      "step": 94060
    },
    {
      "epoch": 0.3296918973668808,
      "grad_norm": 3.09375,
      "learning_rate": 4.9450272721844486e-05,
      "loss": 0.9283,
      "step": 94070
    },
    {
      "epoch": 0.3297269448737764,
      "grad_norm": 2.5,
      "learning_rate": 4.944962369318079e-05,
      "loss": 0.9608,
      "step": 94080
    },
    {
      "epoch": 0.329761992380672,
      "grad_norm": 3.140625,
      "learning_rate": 4.944897466451708e-05,
      "loss": 1.0287,
      "step": 94090
    },
    {
      "epoch": 0.3297970398875676,
      "grad_norm": 3.453125,
      "learning_rate": 4.9448325635853384e-05,
      "loss": 0.9944,
      "step": 94100
    },
    {
      "epoch": 0.3298320873944632,
      "grad_norm": 3.109375,
      "learning_rate": 4.944767660718968e-05,
      "loss": 0.9149,
      "step": 94110
    },
    {
      "epoch": 0.32986713490135877,
      "grad_norm": 3.078125,
      "learning_rate": 4.944702757852598e-05,
      "loss": 0.9974,
      "step": 94120
    },
    {
      "epoch": 0.32990218240825436,
      "grad_norm": 3.015625,
      "learning_rate": 4.9446378549862274e-05,
      "loss": 1.0398,
      "step": 94130
    },
    {
      "epoch": 0.32993722991515,
      "grad_norm": 3.171875,
      "learning_rate": 4.9445729521198576e-05,
      "loss": 1.0011,
      "step": 94140
    },
    {
      "epoch": 0.3299722774220456,
      "grad_norm": 3.09375,
      "learning_rate": 4.944508049253487e-05,
      "loss": 0.9442,
      "step": 94150
    },
    {
      "epoch": 0.3300073249289412,
      "grad_norm": 3.078125,
      "learning_rate": 4.944443146387117e-05,
      "loss": 0.9906,
      "step": 94160
    },
    {
      "epoch": 0.3300423724358368,
      "grad_norm": 2.953125,
      "learning_rate": 4.944378243520747e-05,
      "loss": 0.968,
      "step": 94170
    },
    {
      "epoch": 0.3300774199427324,
      "grad_norm": 2.6875,
      "learning_rate": 4.944313340654377e-05,
      "loss": 1.0052,
      "step": 94180
    },
    {
      "epoch": 0.33011246744962797,
      "grad_norm": 2.9375,
      "learning_rate": 4.944248437788007e-05,
      "loss": 0.9414,
      "step": 94190
    },
    {
      "epoch": 0.33014751495652356,
      "grad_norm": 3.109375,
      "learning_rate": 4.9441835349216364e-05,
      "loss": 1.021,
      "step": 94200
    },
    {
      "epoch": 0.33018256246341915,
      "grad_norm": 2.796875,
      "learning_rate": 4.9441186320552665e-05,
      "loss": 0.963,
      "step": 94210
    },
    {
      "epoch": 0.33021760997031474,
      "grad_norm": 2.921875,
      "learning_rate": 4.944053729188896e-05,
      "loss": 0.969,
      "step": 94220
    },
    {
      "epoch": 0.33025265747721033,
      "grad_norm": 3.671875,
      "learning_rate": 4.943988826322526e-05,
      "loss": 0.9864,
      "step": 94230
    },
    {
      "epoch": 0.330287704984106,
      "grad_norm": 2.96875,
      "learning_rate": 4.9439239234561556e-05,
      "loss": 0.9787,
      "step": 94240
    },
    {
      "epoch": 0.33032275249100157,
      "grad_norm": 3.46875,
      "learning_rate": 4.943859020589786e-05,
      "loss": 0.9899,
      "step": 94250
    },
    {
      "epoch": 0.33035779999789716,
      "grad_norm": 3.53125,
      "learning_rate": 4.943794117723415e-05,
      "loss": 0.9823,
      "step": 94260
    },
    {
      "epoch": 0.33039284750479275,
      "grad_norm": 2.5625,
      "learning_rate": 4.943729214857045e-05,
      "loss": 0.9111,
      "step": 94270
    },
    {
      "epoch": 0.33042789501168834,
      "grad_norm": 3.4375,
      "learning_rate": 4.943664311990675e-05,
      "loss": 1.0229,
      "step": 94280
    },
    {
      "epoch": 0.33046294251858394,
      "grad_norm": 3.359375,
      "learning_rate": 4.943599409124305e-05,
      "loss": 0.9789,
      "step": 94290
    },
    {
      "epoch": 0.3304979900254795,
      "grad_norm": 2.90625,
      "learning_rate": 4.9435345062579344e-05,
      "loss": 0.9757,
      "step": 94300
    },
    {
      "epoch": 0.3305330375323751,
      "grad_norm": 3.40625,
      "learning_rate": 4.9434696033915645e-05,
      "loss": 1.0007,
      "step": 94310
    },
    {
      "epoch": 0.3305680850392707,
      "grad_norm": 3.375,
      "learning_rate": 4.9434047005251946e-05,
      "loss": 0.9732,
      "step": 94320
    },
    {
      "epoch": 0.33060313254616636,
      "grad_norm": 3.15625,
      "learning_rate": 4.943339797658824e-05,
      "loss": 1.0295,
      "step": 94330
    },
    {
      "epoch": 0.33063818005306195,
      "grad_norm": 2.71875,
      "learning_rate": 4.943274894792454e-05,
      "loss": 0.9309,
      "step": 94340
    },
    {
      "epoch": 0.33067322755995754,
      "grad_norm": 3.125,
      "learning_rate": 4.943209991926084e-05,
      "loss": 0.9495,
      "step": 94350
    },
    {
      "epoch": 0.33070827506685313,
      "grad_norm": 2.953125,
      "learning_rate": 4.943145089059714e-05,
      "loss": 0.9506,
      "step": 94360
    },
    {
      "epoch": 0.3307433225737487,
      "grad_norm": 3.3125,
      "learning_rate": 4.943080186193343e-05,
      "loss": 0.9072,
      "step": 94370
    },
    {
      "epoch": 0.3307783700806443,
      "grad_norm": 2.96875,
      "learning_rate": 4.9430152833269734e-05,
      "loss": 0.9756,
      "step": 94380
    },
    {
      "epoch": 0.3308134175875399,
      "grad_norm": 3.125,
      "learning_rate": 4.942950380460603e-05,
      "loss": 0.9302,
      "step": 94390
    },
    {
      "epoch": 0.3308484650944355,
      "grad_norm": 3.0,
      "learning_rate": 4.942885477594233e-05,
      "loss": 0.8709,
      "step": 94400
    },
    {
      "epoch": 0.3308835126013311,
      "grad_norm": 2.9375,
      "learning_rate": 4.9428205747278625e-05,
      "loss": 0.9319,
      "step": 94410
    },
    {
      "epoch": 0.3309185601082267,
      "grad_norm": 2.890625,
      "learning_rate": 4.9427556718614926e-05,
      "loss": 1.015,
      "step": 94420
    },
    {
      "epoch": 0.3309536076151223,
      "grad_norm": 2.484375,
      "learning_rate": 4.942690768995122e-05,
      "loss": 0.8232,
      "step": 94430
    },
    {
      "epoch": 0.3309886551220179,
      "grad_norm": 3.21875,
      "learning_rate": 4.9426258661287516e-05,
      "loss": 1.029,
      "step": 94440
    },
    {
      "epoch": 0.3310237026289135,
      "grad_norm": 2.828125,
      "learning_rate": 4.942560963262382e-05,
      "loss": 0.8951,
      "step": 94450
    },
    {
      "epoch": 0.3310587501358091,
      "grad_norm": 3.1875,
      "learning_rate": 4.942496060396011e-05,
      "loss": 0.9566,
      "step": 94460
    },
    {
      "epoch": 0.3310937976427047,
      "grad_norm": 3.0,
      "learning_rate": 4.942431157529641e-05,
      "loss": 0.9803,
      "step": 94470
    },
    {
      "epoch": 0.3311288451496003,
      "grad_norm": 3.140625,
      "learning_rate": 4.942366254663271e-05,
      "loss": 1.0003,
      "step": 94480
    },
    {
      "epoch": 0.3311638926564959,
      "grad_norm": 2.84375,
      "learning_rate": 4.942301351796901e-05,
      "loss": 0.9268,
      "step": 94490
    },
    {
      "epoch": 0.33119894016339146,
      "grad_norm": 3.109375,
      "learning_rate": 4.9422364489305304e-05,
      "loss": 0.9865,
      "step": 94500
    },
    {
      "epoch": 0.33123398767028706,
      "grad_norm": 2.75,
      "learning_rate": 4.9421715460641605e-05,
      "loss": 1.0059,
      "step": 94510
    },
    {
      "epoch": 0.33126903517718265,
      "grad_norm": 2.65625,
      "learning_rate": 4.94210664319779e-05,
      "loss": 0.9369,
      "step": 94520
    },
    {
      "epoch": 0.3313040826840783,
      "grad_norm": 2.734375,
      "learning_rate": 4.94204174033142e-05,
      "loss": 0.8611,
      "step": 94530
    },
    {
      "epoch": 0.3313391301909739,
      "grad_norm": 3.40625,
      "learning_rate": 4.94197683746505e-05,
      "loss": 1.0225,
      "step": 94540
    },
    {
      "epoch": 0.3313741776978695,
      "grad_norm": 3.171875,
      "learning_rate": 4.94191193459868e-05,
      "loss": 0.9802,
      "step": 94550
    },
    {
      "epoch": 0.33140922520476507,
      "grad_norm": 3.28125,
      "learning_rate": 4.94184703173231e-05,
      "loss": 0.9998,
      "step": 94560
    },
    {
      "epoch": 0.33144427271166066,
      "grad_norm": 3.046875,
      "learning_rate": 4.941782128865939e-05,
      "loss": 1.0156,
      "step": 94570
    },
    {
      "epoch": 0.33147932021855625,
      "grad_norm": 3.46875,
      "learning_rate": 4.9417172259995694e-05,
      "loss": 1.0205,
      "step": 94580
    },
    {
      "epoch": 0.33151436772545184,
      "grad_norm": 2.59375,
      "learning_rate": 4.941652323133199e-05,
      "loss": 0.9573,
      "step": 94590
    },
    {
      "epoch": 0.33154941523234743,
      "grad_norm": 2.84375,
      "learning_rate": 4.941587420266829e-05,
      "loss": 1.0099,
      "step": 94600
    },
    {
      "epoch": 0.331584462739243,
      "grad_norm": 2.78125,
      "learning_rate": 4.9415225174004585e-05,
      "loss": 0.9588,
      "step": 94610
    },
    {
      "epoch": 0.3316195102461386,
      "grad_norm": 3.875,
      "learning_rate": 4.9414576145340886e-05,
      "loss": 0.9103,
      "step": 94620
    },
    {
      "epoch": 0.33165455775303426,
      "grad_norm": 3.21875,
      "learning_rate": 4.941392711667718e-05,
      "loss": 0.9419,
      "step": 94630
    },
    {
      "epoch": 0.33168960525992985,
      "grad_norm": 2.765625,
      "learning_rate": 4.941327808801348e-05,
      "loss": 0.9226,
      "step": 94640
    },
    {
      "epoch": 0.33172465276682545,
      "grad_norm": 2.90625,
      "learning_rate": 4.941262905934978e-05,
      "loss": 0.9123,
      "step": 94650
    },
    {
      "epoch": 0.33175970027372104,
      "grad_norm": 3.09375,
      "learning_rate": 4.941198003068608e-05,
      "loss": 1.0712,
      "step": 94660
    },
    {
      "epoch": 0.33179474778061663,
      "grad_norm": 2.75,
      "learning_rate": 4.941133100202237e-05,
      "loss": 0.9243,
      "step": 94670
    },
    {
      "epoch": 0.3318297952875122,
      "grad_norm": 3.21875,
      "learning_rate": 4.9410681973358674e-05,
      "loss": 0.9235,
      "step": 94680
    },
    {
      "epoch": 0.3318648427944078,
      "grad_norm": 3.265625,
      "learning_rate": 4.9410032944694976e-05,
      "loss": 1.0239,
      "step": 94690
    },
    {
      "epoch": 0.3318998903013034,
      "grad_norm": 3.078125,
      "learning_rate": 4.940938391603127e-05,
      "loss": 0.9355,
      "step": 94700
    },
    {
      "epoch": 0.331934937808199,
      "grad_norm": 2.828125,
      "learning_rate": 4.940873488736757e-05,
      "loss": 0.9578,
      "step": 94710
    },
    {
      "epoch": 0.3319699853150946,
      "grad_norm": 3.328125,
      "learning_rate": 4.9408085858703866e-05,
      "loss": 0.9733,
      "step": 94720
    },
    {
      "epoch": 0.33200503282199023,
      "grad_norm": 2.875,
      "learning_rate": 4.940743683004017e-05,
      "loss": 0.882,
      "step": 94730
    },
    {
      "epoch": 0.3320400803288858,
      "grad_norm": 2.984375,
      "learning_rate": 4.940678780137646e-05,
      "loss": 0.9575,
      "step": 94740
    },
    {
      "epoch": 0.3320751278357814,
      "grad_norm": 3.640625,
      "learning_rate": 4.9406138772712764e-05,
      "loss": 0.942,
      "step": 94750
    },
    {
      "epoch": 0.332110175342677,
      "grad_norm": 3.15625,
      "learning_rate": 4.940548974404906e-05,
      "loss": 1.0183,
      "step": 94760
    },
    {
      "epoch": 0.3321452228495726,
      "grad_norm": 2.8125,
      "learning_rate": 4.940484071538536e-05,
      "loss": 0.8601,
      "step": 94770
    },
    {
      "epoch": 0.3321802703564682,
      "grad_norm": 3.296875,
      "learning_rate": 4.9404191686721654e-05,
      "loss": 0.9018,
      "step": 94780
    },
    {
      "epoch": 0.3322153178633638,
      "grad_norm": 3.3125,
      "learning_rate": 4.9403542658057956e-05,
      "loss": 0.9408,
      "step": 94790
    },
    {
      "epoch": 0.33225036537025937,
      "grad_norm": 2.84375,
      "learning_rate": 4.940289362939425e-05,
      "loss": 1.0259,
      "step": 94800
    },
    {
      "epoch": 0.33228541287715496,
      "grad_norm": 3.25,
      "learning_rate": 4.940224460073055e-05,
      "loss": 0.9882,
      "step": 94810
    },
    {
      "epoch": 0.33232046038405055,
      "grad_norm": 3.0,
      "learning_rate": 4.9401595572066846e-05,
      "loss": 0.9588,
      "step": 94820
    },
    {
      "epoch": 0.3323555078909462,
      "grad_norm": 3.015625,
      "learning_rate": 4.940094654340314e-05,
      "loss": 0.8773,
      "step": 94830
    },
    {
      "epoch": 0.3323905553978418,
      "grad_norm": 3.03125,
      "learning_rate": 4.940029751473944e-05,
      "loss": 1.0008,
      "step": 94840
    },
    {
      "epoch": 0.3324256029047374,
      "grad_norm": 2.609375,
      "learning_rate": 4.939964848607574e-05,
      "loss": 0.9373,
      "step": 94850
    },
    {
      "epoch": 0.332460650411633,
      "grad_norm": 2.609375,
      "learning_rate": 4.939899945741204e-05,
      "loss": 1.0073,
      "step": 94860
    },
    {
      "epoch": 0.33249569791852857,
      "grad_norm": 3.203125,
      "learning_rate": 4.939835042874833e-05,
      "loss": 1.0066,
      "step": 94870
    },
    {
      "epoch": 0.33253074542542416,
      "grad_norm": 3.125,
      "learning_rate": 4.9397701400084634e-05,
      "loss": 1.071,
      "step": 94880
    },
    {
      "epoch": 0.33256579293231975,
      "grad_norm": 3.1875,
      "learning_rate": 4.939705237142093e-05,
      "loss": 0.9507,
      "step": 94890
    },
    {
      "epoch": 0.33260084043921534,
      "grad_norm": 3.046875,
      "learning_rate": 4.939640334275723e-05,
      "loss": 0.944,
      "step": 94900
    },
    {
      "epoch": 0.33263588794611093,
      "grad_norm": 3.109375,
      "learning_rate": 4.939575431409353e-05,
      "loss": 0.9263,
      "step": 94910
    },
    {
      "epoch": 0.3326709354530066,
      "grad_norm": 2.609375,
      "learning_rate": 4.9395105285429826e-05,
      "loss": 0.9189,
      "step": 94920
    },
    {
      "epoch": 0.33270598295990217,
      "grad_norm": 2.96875,
      "learning_rate": 4.939445625676613e-05,
      "loss": 0.9135,
      "step": 94930
    },
    {
      "epoch": 0.33274103046679776,
      "grad_norm": 2.53125,
      "learning_rate": 4.939380722810242e-05,
      "loss": 0.9441,
      "step": 94940
    },
    {
      "epoch": 0.33277607797369335,
      "grad_norm": 3.578125,
      "learning_rate": 4.9393158199438724e-05,
      "loss": 0.9331,
      "step": 94950
    },
    {
      "epoch": 0.33281112548058894,
      "grad_norm": 3.046875,
      "learning_rate": 4.939250917077502e-05,
      "loss": 0.9817,
      "step": 94960
    },
    {
      "epoch": 0.33284617298748453,
      "grad_norm": 2.609375,
      "learning_rate": 4.939186014211132e-05,
      "loss": 0.9245,
      "step": 94970
    },
    {
      "epoch": 0.3328812204943801,
      "grad_norm": 2.96875,
      "learning_rate": 4.9391211113447614e-05,
      "loss": 0.9326,
      "step": 94980
    },
    {
      "epoch": 0.3329162680012757,
      "grad_norm": 3.0625,
      "learning_rate": 4.9390562084783916e-05,
      "loss": 0.9144,
      "step": 94990
    },
    {
      "epoch": 0.3329513155081713,
      "grad_norm": 2.9375,
      "learning_rate": 4.938991305612021e-05,
      "loss": 0.9436,
      "step": 95000
    },
    {
      "epoch": 0.3329513155081713,
      "eval_loss": 0.906897783279419,
      "eval_runtime": 553.3105,
      "eval_samples_per_second": 687.563,
      "eval_steps_per_second": 57.297,
      "step": 95000
    },
    {
      "epoch": 0.3329863630150669,
      "grad_norm": 3.171875,
      "learning_rate": 4.938926402745651e-05,
      "loss": 1.0486,
      "step": 95010
    },
    {
      "epoch": 0.33302141052196255,
      "grad_norm": 3.3125,
      "learning_rate": 4.9388614998792806e-05,
      "loss": 0.9445,
      "step": 95020
    },
    {
      "epoch": 0.33305645802885814,
      "grad_norm": 2.96875,
      "learning_rate": 4.938796597012911e-05,
      "loss": 1.043,
      "step": 95030
    },
    {
      "epoch": 0.33309150553575373,
      "grad_norm": 3.140625,
      "learning_rate": 4.93873169414654e-05,
      "loss": 0.9158,
      "step": 95040
    },
    {
      "epoch": 0.3331265530426493,
      "grad_norm": 3.296875,
      "learning_rate": 4.9386667912801704e-05,
      "loss": 1.0361,
      "step": 95050
    },
    {
      "epoch": 0.3331616005495449,
      "grad_norm": 3.25,
      "learning_rate": 4.9386018884138005e-05,
      "loss": 1.0576,
      "step": 95060
    },
    {
      "epoch": 0.3331966480564405,
      "grad_norm": 3.078125,
      "learning_rate": 4.93853698554743e-05,
      "loss": 0.9294,
      "step": 95070
    },
    {
      "epoch": 0.3332316955633361,
      "grad_norm": 2.78125,
      "learning_rate": 4.93847208268106e-05,
      "loss": 0.9306,
      "step": 95080
    },
    {
      "epoch": 0.3332667430702317,
      "grad_norm": 3.40625,
      "learning_rate": 4.9384071798146896e-05,
      "loss": 1.0382,
      "step": 95090
    },
    {
      "epoch": 0.3333017905771273,
      "grad_norm": 3.171875,
      "learning_rate": 4.93834227694832e-05,
      "loss": 1.0187,
      "step": 95100
    },
    {
      "epoch": 0.33333683808402287,
      "grad_norm": 2.953125,
      "learning_rate": 4.938277374081949e-05,
      "loss": 0.8917,
      "step": 95110
    },
    {
      "epoch": 0.3333718855909185,
      "grad_norm": 5.75,
      "learning_rate": 4.938212471215579e-05,
      "loss": 0.9142,
      "step": 95120
    },
    {
      "epoch": 0.3334069330978141,
      "grad_norm": 3.375,
      "learning_rate": 4.938147568349209e-05,
      "loss": 0.9604,
      "step": 95130
    },
    {
      "epoch": 0.3334419806047097,
      "grad_norm": 3.484375,
      "learning_rate": 4.938082665482839e-05,
      "loss": 0.9235,
      "step": 95140
    },
    {
      "epoch": 0.3334770281116053,
      "grad_norm": 3.40625,
      "learning_rate": 4.9380177626164684e-05,
      "loss": 0.9486,
      "step": 95150
    },
    {
      "epoch": 0.3335120756185009,
      "grad_norm": 3.375,
      "learning_rate": 4.9379528597500985e-05,
      "loss": 1.0158,
      "step": 95160
    },
    {
      "epoch": 0.33354712312539647,
      "grad_norm": 2.921875,
      "learning_rate": 4.937887956883728e-05,
      "loss": 1.0338,
      "step": 95170
    },
    {
      "epoch": 0.33358217063229206,
      "grad_norm": 2.59375,
      "learning_rate": 4.937823054017358e-05,
      "loss": 0.9455,
      "step": 95180
    },
    {
      "epoch": 0.33361721813918765,
      "grad_norm": 2.734375,
      "learning_rate": 4.937758151150988e-05,
      "loss": 0.8371,
      "step": 95190
    },
    {
      "epoch": 0.33365226564608325,
      "grad_norm": 2.578125,
      "learning_rate": 4.937693248284617e-05,
      "loss": 1.0022,
      "step": 95200
    },
    {
      "epoch": 0.33368731315297884,
      "grad_norm": 3.0625,
      "learning_rate": 4.937628345418247e-05,
      "loss": 0.9396,
      "step": 95210
    },
    {
      "epoch": 0.3337223606598745,
      "grad_norm": 3.234375,
      "learning_rate": 4.9375634425518766e-05,
      "loss": 1.0541,
      "step": 95220
    },
    {
      "epoch": 0.3337574081667701,
      "grad_norm": 2.71875,
      "learning_rate": 4.937498539685507e-05,
      "loss": 0.9359,
      "step": 95230
    },
    {
      "epoch": 0.33379245567366567,
      "grad_norm": 2.859375,
      "learning_rate": 4.937433636819136e-05,
      "loss": 0.9599,
      "step": 95240
    },
    {
      "epoch": 0.33382750318056126,
      "grad_norm": 3.59375,
      "learning_rate": 4.9373687339527664e-05,
      "loss": 0.9789,
      "step": 95250
    },
    {
      "epoch": 0.33386255068745685,
      "grad_norm": 3.21875,
      "learning_rate": 4.937303831086396e-05,
      "loss": 0.9544,
      "step": 95260
    },
    {
      "epoch": 0.33389759819435244,
      "grad_norm": 2.953125,
      "learning_rate": 4.937238928220026e-05,
      "loss": 0.9625,
      "step": 95270
    },
    {
      "epoch": 0.33393264570124803,
      "grad_norm": 2.921875,
      "learning_rate": 4.937174025353656e-05,
      "loss": 0.9343,
      "step": 95280
    },
    {
      "epoch": 0.3339676932081436,
      "grad_norm": 3.203125,
      "learning_rate": 4.9371091224872856e-05,
      "loss": 0.9715,
      "step": 95290
    },
    {
      "epoch": 0.3340027407150392,
      "grad_norm": 3.40625,
      "learning_rate": 4.937044219620916e-05,
      "loss": 1.0164,
      "step": 95300
    },
    {
      "epoch": 0.3340377882219348,
      "grad_norm": 3.125,
      "learning_rate": 4.936979316754545e-05,
      "loss": 0.9397,
      "step": 95310
    },
    {
      "epoch": 0.33407283572883045,
      "grad_norm": 2.9375,
      "learning_rate": 4.936914413888175e-05,
      "loss": 1.0156,
      "step": 95320
    },
    {
      "epoch": 0.33410788323572604,
      "grad_norm": 3.125,
      "learning_rate": 4.936849511021805e-05,
      "loss": 0.9894,
      "step": 95330
    },
    {
      "epoch": 0.33414293074262164,
      "grad_norm": 3.0625,
      "learning_rate": 4.936784608155435e-05,
      "loss": 0.867,
      "step": 95340
    },
    {
      "epoch": 0.3341779782495172,
      "grad_norm": 3.375,
      "learning_rate": 4.9367197052890644e-05,
      "loss": 0.8964,
      "step": 95350
    },
    {
      "epoch": 0.3342130257564128,
      "grad_norm": 3.078125,
      "learning_rate": 4.9366548024226945e-05,
      "loss": 0.9474,
      "step": 95360
    },
    {
      "epoch": 0.3342480732633084,
      "grad_norm": 3.265625,
      "learning_rate": 4.936589899556324e-05,
      "loss": 1.0229,
      "step": 95370
    },
    {
      "epoch": 0.334283120770204,
      "grad_norm": 3.78125,
      "learning_rate": 4.936524996689954e-05,
      "loss": 0.9599,
      "step": 95380
    },
    {
      "epoch": 0.3343181682770996,
      "grad_norm": 3.171875,
      "learning_rate": 4.9364600938235836e-05,
      "loss": 0.9968,
      "step": 95390
    },
    {
      "epoch": 0.3343532157839952,
      "grad_norm": 2.765625,
      "learning_rate": 4.936395190957214e-05,
      "loss": 0.9734,
      "step": 95400
    },
    {
      "epoch": 0.3343882632908908,
      "grad_norm": 3.421875,
      "learning_rate": 4.936330288090844e-05,
      "loss": 0.9716,
      "step": 95410
    },
    {
      "epoch": 0.3344233107977864,
      "grad_norm": 3.25,
      "learning_rate": 4.936265385224473e-05,
      "loss": 0.904,
      "step": 95420
    },
    {
      "epoch": 0.334458358304682,
      "grad_norm": 3.03125,
      "learning_rate": 4.9362004823581035e-05,
      "loss": 0.9574,
      "step": 95430
    },
    {
      "epoch": 0.3344934058115776,
      "grad_norm": 3.1875,
      "learning_rate": 4.936135579491733e-05,
      "loss": 0.96,
      "step": 95440
    },
    {
      "epoch": 0.3345284533184732,
      "grad_norm": 2.828125,
      "learning_rate": 4.936070676625363e-05,
      "loss": 0.9246,
      "step": 95450
    },
    {
      "epoch": 0.3345635008253688,
      "grad_norm": 3.140625,
      "learning_rate": 4.9360057737589925e-05,
      "loss": 0.9708,
      "step": 95460
    },
    {
      "epoch": 0.3345985483322644,
      "grad_norm": 3.28125,
      "learning_rate": 4.9359408708926227e-05,
      "loss": 1.0454,
      "step": 95470
    },
    {
      "epoch": 0.33463359583915997,
      "grad_norm": 3.03125,
      "learning_rate": 4.935875968026252e-05,
      "loss": 0.9218,
      "step": 95480
    },
    {
      "epoch": 0.33466864334605556,
      "grad_norm": 3.28125,
      "learning_rate": 4.935811065159882e-05,
      "loss": 1.1393,
      "step": 95490
    },
    {
      "epoch": 0.33470369085295115,
      "grad_norm": 3.21875,
      "learning_rate": 4.935746162293512e-05,
      "loss": 0.999,
      "step": 95500
    },
    {
      "epoch": 0.3347387383598468,
      "grad_norm": 3.203125,
      "learning_rate": 4.935681259427142e-05,
      "loss": 0.9958,
      "step": 95510
    },
    {
      "epoch": 0.3347737858667424,
      "grad_norm": 2.859375,
      "learning_rate": 4.935616356560771e-05,
      "loss": 0.939,
      "step": 95520
    },
    {
      "epoch": 0.334808833373638,
      "grad_norm": 2.890625,
      "learning_rate": 4.9355514536944015e-05,
      "loss": 0.998,
      "step": 95530
    },
    {
      "epoch": 0.3348438808805336,
      "grad_norm": 3.140625,
      "learning_rate": 4.935486550828031e-05,
      "loss": 0.9644,
      "step": 95540
    },
    {
      "epoch": 0.33487892838742916,
      "grad_norm": 3.140625,
      "learning_rate": 4.935421647961661e-05,
      "loss": 1.017,
      "step": 95550
    },
    {
      "epoch": 0.33491397589432476,
      "grad_norm": 2.78125,
      "learning_rate": 4.935356745095291e-05,
      "loss": 0.9036,
      "step": 95560
    },
    {
      "epoch": 0.33494902340122035,
      "grad_norm": 3.21875,
      "learning_rate": 4.93529184222892e-05,
      "loss": 0.978,
      "step": 95570
    },
    {
      "epoch": 0.33498407090811594,
      "grad_norm": 3.671875,
      "learning_rate": 4.93522693936255e-05,
      "loss": 0.9846,
      "step": 95580
    },
    {
      "epoch": 0.33501911841501153,
      "grad_norm": 3.46875,
      "learning_rate": 4.9351620364961796e-05,
      "loss": 0.9742,
      "step": 95590
    },
    {
      "epoch": 0.3350541659219071,
      "grad_norm": 3.1875,
      "learning_rate": 4.93509713362981e-05,
      "loss": 0.9744,
      "step": 95600
    },
    {
      "epoch": 0.33508921342880277,
      "grad_norm": 3.265625,
      "learning_rate": 4.935032230763439e-05,
      "loss": 1.0446,
      "step": 95610
    },
    {
      "epoch": 0.33512426093569836,
      "grad_norm": 3.3125,
      "learning_rate": 4.934967327897069e-05,
      "loss": 0.9399,
      "step": 95620
    },
    {
      "epoch": 0.33515930844259395,
      "grad_norm": 2.890625,
      "learning_rate": 4.934902425030699e-05,
      "loss": 0.9565,
      "step": 95630
    },
    {
      "epoch": 0.33519435594948954,
      "grad_norm": 2.859375,
      "learning_rate": 4.934837522164329e-05,
      "loss": 0.9529,
      "step": 95640
    },
    {
      "epoch": 0.33522940345638513,
      "grad_norm": 3.0625,
      "learning_rate": 4.934772619297959e-05,
      "loss": 0.9156,
      "step": 95650
    },
    {
      "epoch": 0.3352644509632807,
      "grad_norm": 3.21875,
      "learning_rate": 4.9347077164315885e-05,
      "loss": 0.996,
      "step": 95660
    },
    {
      "epoch": 0.3352994984701763,
      "grad_norm": 3.4375,
      "learning_rate": 4.9346428135652187e-05,
      "loss": 0.9663,
      "step": 95670
    },
    {
      "epoch": 0.3353345459770719,
      "grad_norm": 3.296875,
      "learning_rate": 4.934577910698848e-05,
      "loss": 0.9365,
      "step": 95680
    },
    {
      "epoch": 0.3353695934839675,
      "grad_norm": 2.765625,
      "learning_rate": 4.934513007832478e-05,
      "loss": 0.9303,
      "step": 95690
    },
    {
      "epoch": 0.3354046409908631,
      "grad_norm": 3.203125,
      "learning_rate": 4.934448104966108e-05,
      "loss": 1.0092,
      "step": 95700
    },
    {
      "epoch": 0.33543968849775874,
      "grad_norm": 3.390625,
      "learning_rate": 4.934383202099738e-05,
      "loss": 0.9681,
      "step": 95710
    },
    {
      "epoch": 0.33547473600465433,
      "grad_norm": 2.953125,
      "learning_rate": 4.934318299233367e-05,
      "loss": 0.972,
      "step": 95720
    },
    {
      "epoch": 0.3355097835115499,
      "grad_norm": 3.484375,
      "learning_rate": 4.9342533963669975e-05,
      "loss": 0.9494,
      "step": 95730
    },
    {
      "epoch": 0.3355448310184455,
      "grad_norm": 3.21875,
      "learning_rate": 4.934188493500627e-05,
      "loss": 0.9874,
      "step": 95740
    },
    {
      "epoch": 0.3355798785253411,
      "grad_norm": 3.25,
      "learning_rate": 4.934123590634257e-05,
      "loss": 1.003,
      "step": 95750
    },
    {
      "epoch": 0.3356149260322367,
      "grad_norm": 3.28125,
      "learning_rate": 4.9340586877678865e-05,
      "loss": 0.9447,
      "step": 95760
    },
    {
      "epoch": 0.3356499735391323,
      "grad_norm": 2.796875,
      "learning_rate": 4.9339937849015167e-05,
      "loss": 0.9723,
      "step": 95770
    },
    {
      "epoch": 0.3356850210460279,
      "grad_norm": 3.34375,
      "learning_rate": 4.933928882035147e-05,
      "loss": 0.9858,
      "step": 95780
    },
    {
      "epoch": 0.33572006855292347,
      "grad_norm": 3.21875,
      "learning_rate": 4.933863979168776e-05,
      "loss": 0.9276,
      "step": 95790
    },
    {
      "epoch": 0.33575511605981906,
      "grad_norm": 3.203125,
      "learning_rate": 4.9337990763024064e-05,
      "loss": 0.9445,
      "step": 95800
    },
    {
      "epoch": 0.3357901635667147,
      "grad_norm": 3.515625,
      "learning_rate": 4.933734173436036e-05,
      "loss": 0.9867,
      "step": 95810
    },
    {
      "epoch": 0.3358252110736103,
      "grad_norm": 3.546875,
      "learning_rate": 4.933669270569666e-05,
      "loss": 1.0878,
      "step": 95820
    },
    {
      "epoch": 0.3358602585805059,
      "grad_norm": 2.65625,
      "learning_rate": 4.9336043677032955e-05,
      "loss": 0.9708,
      "step": 95830
    },
    {
      "epoch": 0.3358953060874015,
      "grad_norm": 3.34375,
      "learning_rate": 4.9335394648369256e-05,
      "loss": 0.9462,
      "step": 95840
    },
    {
      "epoch": 0.33593035359429707,
      "grad_norm": 2.890625,
      "learning_rate": 4.933474561970555e-05,
      "loss": 1.0088,
      "step": 95850
    },
    {
      "epoch": 0.33596540110119266,
      "grad_norm": 2.9375,
      "learning_rate": 4.933409659104185e-05,
      "loss": 0.9975,
      "step": 95860
    },
    {
      "epoch": 0.33600044860808825,
      "grad_norm": 3.515625,
      "learning_rate": 4.9333447562378147e-05,
      "loss": 0.8875,
      "step": 95870
    },
    {
      "epoch": 0.33603549611498384,
      "grad_norm": 3.078125,
      "learning_rate": 4.933279853371445e-05,
      "loss": 1.0146,
      "step": 95880
    },
    {
      "epoch": 0.33607054362187944,
      "grad_norm": 3.109375,
      "learning_rate": 4.933214950505074e-05,
      "loss": 0.9666,
      "step": 95890
    },
    {
      "epoch": 0.336105591128775,
      "grad_norm": 3.28125,
      "learning_rate": 4.9331500476387044e-05,
      "loss": 0.934,
      "step": 95900
    },
    {
      "epoch": 0.3361406386356707,
      "grad_norm": 3.03125,
      "learning_rate": 4.933085144772334e-05,
      "loss": 0.9456,
      "step": 95910
    },
    {
      "epoch": 0.33617568614256627,
      "grad_norm": 3.296875,
      "learning_rate": 4.933020241905964e-05,
      "loss": 0.9444,
      "step": 95920
    },
    {
      "epoch": 0.33621073364946186,
      "grad_norm": 3.0625,
      "learning_rate": 4.932955339039594e-05,
      "loss": 0.9558,
      "step": 95930
    },
    {
      "epoch": 0.33624578115635745,
      "grad_norm": 3.3125,
      "learning_rate": 4.9328904361732236e-05,
      "loss": 0.9929,
      "step": 95940
    },
    {
      "epoch": 0.33628082866325304,
      "grad_norm": 3.1875,
      "learning_rate": 4.932825533306853e-05,
      "loss": 1.0684,
      "step": 95950
    },
    {
      "epoch": 0.33631587617014863,
      "grad_norm": 3.265625,
      "learning_rate": 4.9327606304404825e-05,
      "loss": 0.9458,
      "step": 95960
    },
    {
      "epoch": 0.3363509236770442,
      "grad_norm": 3.15625,
      "learning_rate": 4.9326957275741127e-05,
      "loss": 0.965,
      "step": 95970
    },
    {
      "epoch": 0.3363859711839398,
      "grad_norm": 3.421875,
      "learning_rate": 4.932630824707742e-05,
      "loss": 0.9442,
      "step": 95980
    },
    {
      "epoch": 0.3364210186908354,
      "grad_norm": 2.546875,
      "learning_rate": 4.932565921841372e-05,
      "loss": 0.9525,
      "step": 95990
    },
    {
      "epoch": 0.33645606619773105,
      "grad_norm": 2.6875,
      "learning_rate": 4.932501018975002e-05,
      "loss": 0.9966,
      "step": 96000
    },
    {
      "epoch": 0.33649111370462664,
      "grad_norm": 3.046875,
      "learning_rate": 4.932436116108632e-05,
      "loss": 0.9885,
      "step": 96010
    },
    {
      "epoch": 0.33652616121152223,
      "grad_norm": 3.234375,
      "learning_rate": 4.932371213242262e-05,
      "loss": 1.0345,
      "step": 96020
    },
    {
      "epoch": 0.3365612087184178,
      "grad_norm": 3.0625,
      "learning_rate": 4.9323063103758915e-05,
      "loss": 0.9116,
      "step": 96030
    },
    {
      "epoch": 0.3365962562253134,
      "grad_norm": 3.53125,
      "learning_rate": 4.9322414075095216e-05,
      "loss": 1.0553,
      "step": 96040
    },
    {
      "epoch": 0.336631303732209,
      "grad_norm": 2.84375,
      "learning_rate": 4.932176504643151e-05,
      "loss": 0.9912,
      "step": 96050
    },
    {
      "epoch": 0.3366663512391046,
      "grad_norm": 2.609375,
      "learning_rate": 4.932111601776781e-05,
      "loss": 0.9562,
      "step": 96060
    },
    {
      "epoch": 0.3367013987460002,
      "grad_norm": 3.171875,
      "learning_rate": 4.9320466989104107e-05,
      "loss": 0.8988,
      "step": 96070
    },
    {
      "epoch": 0.3367364462528958,
      "grad_norm": 3.078125,
      "learning_rate": 4.931981796044041e-05,
      "loss": 0.9634,
      "step": 96080
    },
    {
      "epoch": 0.3367714937597914,
      "grad_norm": 2.5,
      "learning_rate": 4.93191689317767e-05,
      "loss": 0.9478,
      "step": 96090
    },
    {
      "epoch": 0.336806541266687,
      "grad_norm": 2.84375,
      "learning_rate": 4.9318519903113004e-05,
      "loss": 0.9214,
      "step": 96100
    },
    {
      "epoch": 0.3368415887735826,
      "grad_norm": 2.625,
      "learning_rate": 4.93178708744493e-05,
      "loss": 0.9359,
      "step": 96110
    },
    {
      "epoch": 0.3368766362804782,
      "grad_norm": 2.34375,
      "learning_rate": 4.93172218457856e-05,
      "loss": 0.9536,
      "step": 96120
    },
    {
      "epoch": 0.3369116837873738,
      "grad_norm": 2.921875,
      "learning_rate": 4.9316572817121895e-05,
      "loss": 1.0108,
      "step": 96130
    },
    {
      "epoch": 0.3369467312942694,
      "grad_norm": 2.8125,
      "learning_rate": 4.9315923788458196e-05,
      "loss": 1.0169,
      "step": 96140
    },
    {
      "epoch": 0.336981778801165,
      "grad_norm": 2.578125,
      "learning_rate": 4.93152747597945e-05,
      "loss": 0.9391,
      "step": 96150
    },
    {
      "epoch": 0.33701682630806057,
      "grad_norm": 2.921875,
      "learning_rate": 4.931462573113079e-05,
      "loss": 0.9793,
      "step": 96160
    },
    {
      "epoch": 0.33705187381495616,
      "grad_norm": 2.890625,
      "learning_rate": 4.931397670246709e-05,
      "loss": 0.942,
      "step": 96170
    },
    {
      "epoch": 0.33708692132185175,
      "grad_norm": 2.8125,
      "learning_rate": 4.931332767380339e-05,
      "loss": 1.0366,
      "step": 96180
    },
    {
      "epoch": 0.33712196882874734,
      "grad_norm": 3.625,
      "learning_rate": 4.931267864513969e-05,
      "loss": 0.983,
      "step": 96190
    },
    {
      "epoch": 0.337157016335643,
      "grad_norm": 3.296875,
      "learning_rate": 4.9312029616475984e-05,
      "loss": 0.9315,
      "step": 96200
    },
    {
      "epoch": 0.3371920638425386,
      "grad_norm": 2.8125,
      "learning_rate": 4.9311380587812285e-05,
      "loss": 0.9126,
      "step": 96210
    },
    {
      "epoch": 0.33722711134943417,
      "grad_norm": 3.171875,
      "learning_rate": 4.931073155914858e-05,
      "loss": 1.0343,
      "step": 96220
    },
    {
      "epoch": 0.33726215885632976,
      "grad_norm": 3.171875,
      "learning_rate": 4.931008253048488e-05,
      "loss": 1.0074,
      "step": 96230
    },
    {
      "epoch": 0.33729720636322535,
      "grad_norm": 3.078125,
      "learning_rate": 4.9309433501821176e-05,
      "loss": 1.014,
      "step": 96240
    },
    {
      "epoch": 0.33733225387012095,
      "grad_norm": 2.859375,
      "learning_rate": 4.930878447315748e-05,
      "loss": 0.9061,
      "step": 96250
    },
    {
      "epoch": 0.33736730137701654,
      "grad_norm": 3.125,
      "learning_rate": 4.930813544449377e-05,
      "loss": 0.8752,
      "step": 96260
    },
    {
      "epoch": 0.33740234888391213,
      "grad_norm": 3.0625,
      "learning_rate": 4.930748641583007e-05,
      "loss": 0.9516,
      "step": 96270
    },
    {
      "epoch": 0.3374373963908077,
      "grad_norm": 2.984375,
      "learning_rate": 4.9306837387166375e-05,
      "loss": 0.957,
      "step": 96280
    },
    {
      "epoch": 0.3374724438977033,
      "grad_norm": 2.953125,
      "learning_rate": 4.930618835850267e-05,
      "loss": 0.9899,
      "step": 96290
    },
    {
      "epoch": 0.33750749140459896,
      "grad_norm": 3.328125,
      "learning_rate": 4.930553932983897e-05,
      "loss": 1.033,
      "step": 96300
    },
    {
      "epoch": 0.33754253891149455,
      "grad_norm": 3.484375,
      "learning_rate": 4.9304890301175265e-05,
      "loss": 0.9993,
      "step": 96310
    },
    {
      "epoch": 0.33757758641839014,
      "grad_norm": 3.234375,
      "learning_rate": 4.930424127251157e-05,
      "loss": 0.9742,
      "step": 96320
    },
    {
      "epoch": 0.33761263392528573,
      "grad_norm": 3.328125,
      "learning_rate": 4.9303592243847855e-05,
      "loss": 0.9345,
      "step": 96330
    },
    {
      "epoch": 0.3376476814321813,
      "grad_norm": 3.125,
      "learning_rate": 4.9302943215184156e-05,
      "loss": 0.9607,
      "step": 96340
    },
    {
      "epoch": 0.3376827289390769,
      "grad_norm": 2.796875,
      "learning_rate": 4.930229418652045e-05,
      "loss": 0.9287,
      "step": 96350
    },
    {
      "epoch": 0.3377177764459725,
      "grad_norm": 2.6875,
      "learning_rate": 4.930164515785675e-05,
      "loss": 0.8979,
      "step": 96360
    },
    {
      "epoch": 0.3377528239528681,
      "grad_norm": 3.234375,
      "learning_rate": 4.930099612919305e-05,
      "loss": 1.0245,
      "step": 96370
    },
    {
      "epoch": 0.3377878714597637,
      "grad_norm": 2.9375,
      "learning_rate": 4.930034710052935e-05,
      "loss": 0.9628,
      "step": 96380
    },
    {
      "epoch": 0.3378229189666593,
      "grad_norm": 3.125,
      "learning_rate": 4.929969807186565e-05,
      "loss": 0.914,
      "step": 96390
    },
    {
      "epoch": 0.3378579664735549,
      "grad_norm": 3.015625,
      "learning_rate": 4.9299049043201944e-05,
      "loss": 0.9584,
      "step": 96400
    },
    {
      "epoch": 0.3378930139804505,
      "grad_norm": 2.9375,
      "learning_rate": 4.9298400014538245e-05,
      "loss": 0.931,
      "step": 96410
    },
    {
      "epoch": 0.3379280614873461,
      "grad_norm": 3.125,
      "learning_rate": 4.929775098587454e-05,
      "loss": 0.9356,
      "step": 96420
    },
    {
      "epoch": 0.3379631089942417,
      "grad_norm": 2.953125,
      "learning_rate": 4.929710195721084e-05,
      "loss": 0.8697,
      "step": 96430
    },
    {
      "epoch": 0.3379981565011373,
      "grad_norm": 3.0,
      "learning_rate": 4.9296452928547136e-05,
      "loss": 1.04,
      "step": 96440
    },
    {
      "epoch": 0.3380332040080329,
      "grad_norm": 3.109375,
      "learning_rate": 4.929580389988344e-05,
      "loss": 0.9448,
      "step": 96450
    },
    {
      "epoch": 0.3380682515149285,
      "grad_norm": 2.96875,
      "learning_rate": 4.929515487121973e-05,
      "loss": 0.8808,
      "step": 96460
    },
    {
      "epoch": 0.33810329902182407,
      "grad_norm": 2.984375,
      "learning_rate": 4.929450584255603e-05,
      "loss": 0.9244,
      "step": 96470
    },
    {
      "epoch": 0.33813834652871966,
      "grad_norm": 3.375,
      "learning_rate": 4.929385681389233e-05,
      "loss": 1.0329,
      "step": 96480
    },
    {
      "epoch": 0.33817339403561525,
      "grad_norm": 2.765625,
      "learning_rate": 4.929320778522863e-05,
      "loss": 0.963,
      "step": 96490
    },
    {
      "epoch": 0.3382084415425109,
      "grad_norm": 2.828125,
      "learning_rate": 4.9292558756564924e-05,
      "loss": 0.9821,
      "step": 96500
    },
    {
      "epoch": 0.3382434890494065,
      "grad_norm": 2.90625,
      "learning_rate": 4.9291909727901225e-05,
      "loss": 0.8805,
      "step": 96510
    },
    {
      "epoch": 0.3382785365563021,
      "grad_norm": 3.34375,
      "learning_rate": 4.929126069923753e-05,
      "loss": 1.0147,
      "step": 96520
    },
    {
      "epoch": 0.33831358406319767,
      "grad_norm": 3.203125,
      "learning_rate": 4.929061167057382e-05,
      "loss": 0.9322,
      "step": 96530
    },
    {
      "epoch": 0.33834863157009326,
      "grad_norm": 2.921875,
      "learning_rate": 4.928996264191012e-05,
      "loss": 1.021,
      "step": 96540
    },
    {
      "epoch": 0.33838367907698885,
      "grad_norm": 2.890625,
      "learning_rate": 4.928931361324642e-05,
      "loss": 0.99,
      "step": 96550
    },
    {
      "epoch": 0.33841872658388444,
      "grad_norm": 3.296875,
      "learning_rate": 4.928866458458272e-05,
      "loss": 1.0096,
      "step": 96560
    },
    {
      "epoch": 0.33845377409078004,
      "grad_norm": 3.25,
      "learning_rate": 4.928801555591901e-05,
      "loss": 1.0532,
      "step": 96570
    },
    {
      "epoch": 0.3384888215976756,
      "grad_norm": 3.265625,
      "learning_rate": 4.9287366527255315e-05,
      "loss": 0.8438,
      "step": 96580
    },
    {
      "epoch": 0.3385238691045713,
      "grad_norm": 2.84375,
      "learning_rate": 4.928671749859161e-05,
      "loss": 0.9599,
      "step": 96590
    },
    {
      "epoch": 0.33855891661146686,
      "grad_norm": 3.109375,
      "learning_rate": 4.928606846992791e-05,
      "loss": 0.9213,
      "step": 96600
    },
    {
      "epoch": 0.33859396411836246,
      "grad_norm": 3.140625,
      "learning_rate": 4.9285419441264205e-05,
      "loss": 0.926,
      "step": 96610
    },
    {
      "epoch": 0.33862901162525805,
      "grad_norm": 2.984375,
      "learning_rate": 4.928477041260051e-05,
      "loss": 1.0099,
      "step": 96620
    },
    {
      "epoch": 0.33866405913215364,
      "grad_norm": 3.203125,
      "learning_rate": 4.92841213839368e-05,
      "loss": 0.9528,
      "step": 96630
    },
    {
      "epoch": 0.33869910663904923,
      "grad_norm": 3.109375,
      "learning_rate": 4.92834723552731e-05,
      "loss": 1.0098,
      "step": 96640
    },
    {
      "epoch": 0.3387341541459448,
      "grad_norm": 2.90625,
      "learning_rate": 4.9282823326609404e-05,
      "loss": 0.9681,
      "step": 96650
    },
    {
      "epoch": 0.3387692016528404,
      "grad_norm": 3.0625,
      "learning_rate": 4.92821742979457e-05,
      "loss": 1.0245,
      "step": 96660
    },
    {
      "epoch": 0.338804249159736,
      "grad_norm": 2.984375,
      "learning_rate": 4.9281525269282e-05,
      "loss": 0.9698,
      "step": 96670
    },
    {
      "epoch": 0.3388392966666316,
      "grad_norm": 3.234375,
      "learning_rate": 4.9280876240618295e-05,
      "loss": 0.9087,
      "step": 96680
    },
    {
      "epoch": 0.33887434417352724,
      "grad_norm": 3.28125,
      "learning_rate": 4.9280227211954596e-05,
      "loss": 0.9603,
      "step": 96690
    },
    {
      "epoch": 0.33890939168042283,
      "grad_norm": 2.6875,
      "learning_rate": 4.9279578183290884e-05,
      "loss": 1.0532,
      "step": 96700
    },
    {
      "epoch": 0.3389444391873184,
      "grad_norm": 3.53125,
      "learning_rate": 4.9278929154627185e-05,
      "loss": 0.9847,
      "step": 96710
    },
    {
      "epoch": 0.338979486694214,
      "grad_norm": 2.984375,
      "learning_rate": 4.927828012596348e-05,
      "loss": 1.0277,
      "step": 96720
    },
    {
      "epoch": 0.3390145342011096,
      "grad_norm": 3.5,
      "learning_rate": 4.927763109729978e-05,
      "loss": 0.9185,
      "step": 96730
    },
    {
      "epoch": 0.3390495817080052,
      "grad_norm": 2.71875,
      "learning_rate": 4.927698206863608e-05,
      "loss": 0.897,
      "step": 96740
    },
    {
      "epoch": 0.3390846292149008,
      "grad_norm": 3.125,
      "learning_rate": 4.927633303997238e-05,
      "loss": 1.0418,
      "step": 96750
    },
    {
      "epoch": 0.3391196767217964,
      "grad_norm": 3.171875,
      "learning_rate": 4.927568401130868e-05,
      "loss": 1.0652,
      "step": 96760
    },
    {
      "epoch": 0.339154724228692,
      "grad_norm": 3.0625,
      "learning_rate": 4.927503498264497e-05,
      "loss": 0.9928,
      "step": 96770
    },
    {
      "epoch": 0.33918977173558756,
      "grad_norm": 3.296875,
      "learning_rate": 4.9274385953981275e-05,
      "loss": 0.9842,
      "step": 96780
    },
    {
      "epoch": 0.3392248192424832,
      "grad_norm": 2.703125,
      "learning_rate": 4.927373692531757e-05,
      "loss": 0.8696,
      "step": 96790
    },
    {
      "epoch": 0.3392598667493788,
      "grad_norm": 2.765625,
      "learning_rate": 4.927308789665387e-05,
      "loss": 1.0047,
      "step": 96800
    },
    {
      "epoch": 0.3392949142562744,
      "grad_norm": 2.9375,
      "learning_rate": 4.9272438867990165e-05,
      "loss": 0.9318,
      "step": 96810
    },
    {
      "epoch": 0.33932996176317,
      "grad_norm": 3.140625,
      "learning_rate": 4.927178983932647e-05,
      "loss": 0.9714,
      "step": 96820
    },
    {
      "epoch": 0.3393650092700656,
      "grad_norm": 3.078125,
      "learning_rate": 4.927114081066276e-05,
      "loss": 0.9658,
      "step": 96830
    },
    {
      "epoch": 0.33940005677696117,
      "grad_norm": 2.84375,
      "learning_rate": 4.927049178199906e-05,
      "loss": 0.9398,
      "step": 96840
    },
    {
      "epoch": 0.33943510428385676,
      "grad_norm": 4.09375,
      "learning_rate": 4.926984275333536e-05,
      "loss": 0.9915,
      "step": 96850
    },
    {
      "epoch": 0.33947015179075235,
      "grad_norm": 3.21875,
      "learning_rate": 4.926919372467166e-05,
      "loss": 0.901,
      "step": 96860
    },
    {
      "epoch": 0.33950519929764794,
      "grad_norm": 2.984375,
      "learning_rate": 4.926854469600795e-05,
      "loss": 0.9808,
      "step": 96870
    },
    {
      "epoch": 0.33954024680454353,
      "grad_norm": 3.15625,
      "learning_rate": 4.9267895667344255e-05,
      "loss": 1.0246,
      "step": 96880
    },
    {
      "epoch": 0.3395752943114392,
      "grad_norm": 2.984375,
      "learning_rate": 4.9267246638680556e-05,
      "loss": 0.9719,
      "step": 96890
    },
    {
      "epoch": 0.33961034181833477,
      "grad_norm": 3.21875,
      "learning_rate": 4.926659761001685e-05,
      "loss": 1.0591,
      "step": 96900
    },
    {
      "epoch": 0.33964538932523036,
      "grad_norm": 3.390625,
      "learning_rate": 4.926594858135315e-05,
      "loss": 1.0111,
      "step": 96910
    },
    {
      "epoch": 0.33968043683212595,
      "grad_norm": 3.125,
      "learning_rate": 4.926529955268945e-05,
      "loss": 0.9272,
      "step": 96920
    },
    {
      "epoch": 0.33971548433902155,
      "grad_norm": 3.25,
      "learning_rate": 4.926465052402575e-05,
      "loss": 0.9885,
      "step": 96930
    },
    {
      "epoch": 0.33975053184591714,
      "grad_norm": 3.078125,
      "learning_rate": 4.926400149536204e-05,
      "loss": 0.9761,
      "step": 96940
    },
    {
      "epoch": 0.3397855793528127,
      "grad_norm": 3.234375,
      "learning_rate": 4.9263352466698344e-05,
      "loss": 1.0153,
      "step": 96950
    },
    {
      "epoch": 0.3398206268597083,
      "grad_norm": 3.359375,
      "learning_rate": 4.926270343803464e-05,
      "loss": 0.9992,
      "step": 96960
    },
    {
      "epoch": 0.3398556743666039,
      "grad_norm": 3.453125,
      "learning_rate": 4.926205440937094e-05,
      "loss": 1.0056,
      "step": 96970
    },
    {
      "epoch": 0.3398907218734995,
      "grad_norm": 3.390625,
      "learning_rate": 4.9261405380707235e-05,
      "loss": 1.0282,
      "step": 96980
    },
    {
      "epoch": 0.33992576938039515,
      "grad_norm": 3.4375,
      "learning_rate": 4.9260756352043536e-05,
      "loss": 0.9694,
      "step": 96990
    },
    {
      "epoch": 0.33996081688729074,
      "grad_norm": 2.875,
      "learning_rate": 4.926010732337983e-05,
      "loss": 1.088,
      "step": 97000
    },
    {
      "epoch": 0.33999586439418633,
      "grad_norm": 3.328125,
      "learning_rate": 4.925945829471613e-05,
      "loss": 0.9848,
      "step": 97010
    },
    {
      "epoch": 0.3400309119010819,
      "grad_norm": 3.265625,
      "learning_rate": 4.9258809266052433e-05,
      "loss": 1.0314,
      "step": 97020
    },
    {
      "epoch": 0.3400659594079775,
      "grad_norm": 3.546875,
      "learning_rate": 4.925816023738873e-05,
      "loss": 0.9125,
      "step": 97030
    },
    {
      "epoch": 0.3401010069148731,
      "grad_norm": 3.453125,
      "learning_rate": 4.925751120872503e-05,
      "loss": 1.0787,
      "step": 97040
    },
    {
      "epoch": 0.3401360544217687,
      "grad_norm": 2.90625,
      "learning_rate": 4.9256862180061324e-05,
      "loss": 0.9634,
      "step": 97050
    },
    {
      "epoch": 0.3401711019286643,
      "grad_norm": 3.03125,
      "learning_rate": 4.9256213151397625e-05,
      "loss": 0.9764,
      "step": 97060
    },
    {
      "epoch": 0.3402061494355599,
      "grad_norm": 3.0625,
      "learning_rate": 4.925556412273392e-05,
      "loss": 0.9766,
      "step": 97070
    },
    {
      "epoch": 0.3402411969424555,
      "grad_norm": 2.78125,
      "learning_rate": 4.9254915094070215e-05,
      "loss": 0.962,
      "step": 97080
    },
    {
      "epoch": 0.3402762444493511,
      "grad_norm": 2.546875,
      "learning_rate": 4.925426606540651e-05,
      "loss": 0.9841,
      "step": 97090
    },
    {
      "epoch": 0.3403112919562467,
      "grad_norm": 3.0625,
      "learning_rate": 4.925361703674281e-05,
      "loss": 0.9148,
      "step": 97100
    },
    {
      "epoch": 0.3403463394631423,
      "grad_norm": 3.328125,
      "learning_rate": 4.925296800807911e-05,
      "loss": 1.0535,
      "step": 97110
    },
    {
      "epoch": 0.3403813869700379,
      "grad_norm": 2.75,
      "learning_rate": 4.925231897941541e-05,
      "loss": 0.9322,
      "step": 97120
    },
    {
      "epoch": 0.3404164344769335,
      "grad_norm": 3.109375,
      "learning_rate": 4.925166995075171e-05,
      "loss": 0.9462,
      "step": 97130
    },
    {
      "epoch": 0.3404514819838291,
      "grad_norm": 3.484375,
      "learning_rate": 4.9251020922088e-05,
      "loss": 0.8599,
      "step": 97140
    },
    {
      "epoch": 0.34048652949072467,
      "grad_norm": 2.875,
      "learning_rate": 4.9250371893424304e-05,
      "loss": 0.8534,
      "step": 97150
    },
    {
      "epoch": 0.34052157699762026,
      "grad_norm": 3.375,
      "learning_rate": 4.92497228647606e-05,
      "loss": 0.9674,
      "step": 97160
    },
    {
      "epoch": 0.34055662450451585,
      "grad_norm": 3.0,
      "learning_rate": 4.92490738360969e-05,
      "loss": 0.9773,
      "step": 97170
    },
    {
      "epoch": 0.3405916720114115,
      "grad_norm": 3.28125,
      "learning_rate": 4.9248424807433195e-05,
      "loss": 0.9117,
      "step": 97180
    },
    {
      "epoch": 0.3406267195183071,
      "grad_norm": 3.15625,
      "learning_rate": 4.9247775778769496e-05,
      "loss": 0.8936,
      "step": 97190
    },
    {
      "epoch": 0.3406617670252027,
      "grad_norm": 2.734375,
      "learning_rate": 4.924712675010579e-05,
      "loss": 0.8985,
      "step": 97200
    },
    {
      "epoch": 0.34069681453209827,
      "grad_norm": 3.1875,
      "learning_rate": 4.924647772144209e-05,
      "loss": 1.0343,
      "step": 97210
    },
    {
      "epoch": 0.34073186203899386,
      "grad_norm": 3.453125,
      "learning_rate": 4.924582869277839e-05,
      "loss": 1.0272,
      "step": 97220
    },
    {
      "epoch": 0.34076690954588945,
      "grad_norm": 3.015625,
      "learning_rate": 4.924517966411469e-05,
      "loss": 0.9862,
      "step": 97230
    },
    {
      "epoch": 0.34080195705278504,
      "grad_norm": 3.265625,
      "learning_rate": 4.924453063545099e-05,
      "loss": 0.9986,
      "step": 97240
    },
    {
      "epoch": 0.34083700455968063,
      "grad_norm": 3.09375,
      "learning_rate": 4.9243881606787284e-05,
      "loss": 0.9218,
      "step": 97250
    },
    {
      "epoch": 0.3408720520665762,
      "grad_norm": 2.546875,
      "learning_rate": 4.9243232578123585e-05,
      "loss": 0.9573,
      "step": 97260
    },
    {
      "epoch": 0.3409070995734718,
      "grad_norm": 3.28125,
      "learning_rate": 4.924258354945988e-05,
      "loss": 1.0626,
      "step": 97270
    },
    {
      "epoch": 0.34094214708036746,
      "grad_norm": 3.28125,
      "learning_rate": 4.924193452079618e-05,
      "loss": 1.0276,
      "step": 97280
    },
    {
      "epoch": 0.34097719458726305,
      "grad_norm": 2.84375,
      "learning_rate": 4.9241285492132476e-05,
      "loss": 0.9359,
      "step": 97290
    },
    {
      "epoch": 0.34101224209415865,
      "grad_norm": 3.296875,
      "learning_rate": 4.924063646346878e-05,
      "loss": 1.0311,
      "step": 97300
    },
    {
      "epoch": 0.34104728960105424,
      "grad_norm": 2.953125,
      "learning_rate": 4.923998743480507e-05,
      "loss": 1.0034,
      "step": 97310
    },
    {
      "epoch": 0.34108233710794983,
      "grad_norm": 3.40625,
      "learning_rate": 4.9239338406141373e-05,
      "loss": 0.97,
      "step": 97320
    },
    {
      "epoch": 0.3411173846148454,
      "grad_norm": 3.046875,
      "learning_rate": 4.923868937747767e-05,
      "loss": 0.935,
      "step": 97330
    },
    {
      "epoch": 0.341152432121741,
      "grad_norm": 3.359375,
      "learning_rate": 4.923804034881397e-05,
      "loss": 1.0812,
      "step": 97340
    },
    {
      "epoch": 0.3411874796286366,
      "grad_norm": 3.109375,
      "learning_rate": 4.9237391320150264e-05,
      "loss": 0.926,
      "step": 97350
    },
    {
      "epoch": 0.3412225271355322,
      "grad_norm": 3.21875,
      "learning_rate": 4.9236742291486565e-05,
      "loss": 0.9223,
      "step": 97360
    },
    {
      "epoch": 0.3412575746424278,
      "grad_norm": 3.328125,
      "learning_rate": 4.923609326282286e-05,
      "loss": 0.9589,
      "step": 97370
    },
    {
      "epoch": 0.34129262214932343,
      "grad_norm": 3.171875,
      "learning_rate": 4.923544423415916e-05,
      "loss": 1.008,
      "step": 97380
    },
    {
      "epoch": 0.341327669656219,
      "grad_norm": 3.1875,
      "learning_rate": 4.923479520549546e-05,
      "loss": 0.9643,
      "step": 97390
    },
    {
      "epoch": 0.3413627171631146,
      "grad_norm": 2.859375,
      "learning_rate": 4.923414617683176e-05,
      "loss": 0.8808,
      "step": 97400
    },
    {
      "epoch": 0.3413977646700102,
      "grad_norm": 2.8125,
      "learning_rate": 4.923349714816806e-05,
      "loss": 0.904,
      "step": 97410
    },
    {
      "epoch": 0.3414328121769058,
      "grad_norm": 3.46875,
      "learning_rate": 4.9232848119504353e-05,
      "loss": 0.9362,
      "step": 97420
    },
    {
      "epoch": 0.3414678596838014,
      "grad_norm": 2.9375,
      "learning_rate": 4.9232199090840655e-05,
      "loss": 1.0229,
      "step": 97430
    },
    {
      "epoch": 0.341502907190697,
      "grad_norm": 3.609375,
      "learning_rate": 4.923155006217695e-05,
      "loss": 1.0154,
      "step": 97440
    },
    {
      "epoch": 0.34153795469759257,
      "grad_norm": 2.6875,
      "learning_rate": 4.9230901033513244e-05,
      "loss": 0.9341,
      "step": 97450
    },
    {
      "epoch": 0.34157300220448816,
      "grad_norm": 3.015625,
      "learning_rate": 4.923025200484954e-05,
      "loss": 1.0701,
      "step": 97460
    },
    {
      "epoch": 0.34160804971138375,
      "grad_norm": 3.171875,
      "learning_rate": 4.922960297618584e-05,
      "loss": 0.9874,
      "step": 97470
    },
    {
      "epoch": 0.3416430972182794,
      "grad_norm": 3.1875,
      "learning_rate": 4.922895394752214e-05,
      "loss": 0.9915,
      "step": 97480
    },
    {
      "epoch": 0.341678144725175,
      "grad_norm": 2.421875,
      "learning_rate": 4.9228304918858436e-05,
      "loss": 0.949,
      "step": 97490
    },
    {
      "epoch": 0.3417131922320706,
      "grad_norm": 2.890625,
      "learning_rate": 4.922765589019474e-05,
      "loss": 0.9465,
      "step": 97500
    },
    {
      "epoch": 0.3417482397389662,
      "grad_norm": 3.1875,
      "learning_rate": 4.922700686153103e-05,
      "loss": 1.0251,
      "step": 97510
    },
    {
      "epoch": 0.34178328724586177,
      "grad_norm": 3.0625,
      "learning_rate": 4.9226357832867333e-05,
      "loss": 1.0115,
      "step": 97520
    },
    {
      "epoch": 0.34181833475275736,
      "grad_norm": 2.796875,
      "learning_rate": 4.922570880420363e-05,
      "loss": 0.8911,
      "step": 97530
    },
    {
      "epoch": 0.34185338225965295,
      "grad_norm": 2.65625,
      "learning_rate": 4.922505977553993e-05,
      "loss": 0.8848,
      "step": 97540
    },
    {
      "epoch": 0.34188842976654854,
      "grad_norm": 2.890625,
      "learning_rate": 4.9224410746876224e-05,
      "loss": 0.9005,
      "step": 97550
    },
    {
      "epoch": 0.34192347727344413,
      "grad_norm": 2.8125,
      "learning_rate": 4.9223761718212525e-05,
      "loss": 0.9605,
      "step": 97560
    },
    {
      "epoch": 0.3419585247803397,
      "grad_norm": 2.796875,
      "learning_rate": 4.922311268954882e-05,
      "loss": 0.9609,
      "step": 97570
    },
    {
      "epoch": 0.34199357228723537,
      "grad_norm": 2.875,
      "learning_rate": 4.922246366088512e-05,
      "loss": 0.9614,
      "step": 97580
    },
    {
      "epoch": 0.34202861979413096,
      "grad_norm": 3.40625,
      "learning_rate": 4.9221814632221416e-05,
      "loss": 1.0499,
      "step": 97590
    },
    {
      "epoch": 0.34206366730102655,
      "grad_norm": 2.828125,
      "learning_rate": 4.922116560355772e-05,
      "loss": 0.9268,
      "step": 97600
    },
    {
      "epoch": 0.34209871480792214,
      "grad_norm": 3.15625,
      "learning_rate": 4.922051657489402e-05,
      "loss": 0.9736,
      "step": 97610
    },
    {
      "epoch": 0.34213376231481774,
      "grad_norm": 2.984375,
      "learning_rate": 4.9219867546230313e-05,
      "loss": 1.0942,
      "step": 97620
    },
    {
      "epoch": 0.3421688098217133,
      "grad_norm": 3.203125,
      "learning_rate": 4.9219218517566615e-05,
      "loss": 0.8994,
      "step": 97630
    },
    {
      "epoch": 0.3422038573286089,
      "grad_norm": 3.03125,
      "learning_rate": 4.921856948890291e-05,
      "loss": 0.9376,
      "step": 97640
    },
    {
      "epoch": 0.3422389048355045,
      "grad_norm": 3.15625,
      "learning_rate": 4.921792046023921e-05,
      "loss": 0.9891,
      "step": 97650
    },
    {
      "epoch": 0.3422739523424001,
      "grad_norm": 2.65625,
      "learning_rate": 4.9217271431575505e-05,
      "loss": 0.9041,
      "step": 97660
    },
    {
      "epoch": 0.34230899984929575,
      "grad_norm": 3.046875,
      "learning_rate": 4.921662240291181e-05,
      "loss": 0.9882,
      "step": 97670
    },
    {
      "epoch": 0.34234404735619134,
      "grad_norm": 2.890625,
      "learning_rate": 4.92159733742481e-05,
      "loss": 0.9352,
      "step": 97680
    },
    {
      "epoch": 0.34237909486308693,
      "grad_norm": 3.09375,
      "learning_rate": 4.92153243455844e-05,
      "loss": 0.9112,
      "step": 97690
    },
    {
      "epoch": 0.3424141423699825,
      "grad_norm": 3.34375,
      "learning_rate": 4.92146753169207e-05,
      "loss": 0.946,
      "step": 97700
    },
    {
      "epoch": 0.3424491898768781,
      "grad_norm": 3.125,
      "learning_rate": 4.9214026288257e-05,
      "loss": 0.9112,
      "step": 97710
    },
    {
      "epoch": 0.3424842373837737,
      "grad_norm": 2.984375,
      "learning_rate": 4.9213377259593293e-05,
      "loss": 0.9141,
      "step": 97720
    },
    {
      "epoch": 0.3425192848906693,
      "grad_norm": 3.609375,
      "learning_rate": 4.9212728230929595e-05,
      "loss": 0.9772,
      "step": 97730
    },
    {
      "epoch": 0.3425543323975649,
      "grad_norm": 3.3125,
      "learning_rate": 4.921207920226589e-05,
      "loss": 0.9235,
      "step": 97740
    },
    {
      "epoch": 0.3425893799044605,
      "grad_norm": 2.953125,
      "learning_rate": 4.921143017360219e-05,
      "loss": 0.9106,
      "step": 97750
    },
    {
      "epoch": 0.34262442741135607,
      "grad_norm": 3.375,
      "learning_rate": 4.921078114493849e-05,
      "loss": 0.9441,
      "step": 97760
    },
    {
      "epoch": 0.3426594749182517,
      "grad_norm": 2.796875,
      "learning_rate": 4.921013211627479e-05,
      "loss": 0.9214,
      "step": 97770
    },
    {
      "epoch": 0.3426945224251473,
      "grad_norm": 2.609375,
      "learning_rate": 4.920948308761109e-05,
      "loss": 0.9881,
      "step": 97780
    },
    {
      "epoch": 0.3427295699320429,
      "grad_norm": 3.109375,
      "learning_rate": 4.920883405894738e-05,
      "loss": 0.9927,
      "step": 97790
    },
    {
      "epoch": 0.3427646174389385,
      "grad_norm": 3.265625,
      "learning_rate": 4.9208185030283684e-05,
      "loss": 0.9501,
      "step": 97800
    },
    {
      "epoch": 0.3427996649458341,
      "grad_norm": 2.890625,
      "learning_rate": 4.920753600161998e-05,
      "loss": 0.9236,
      "step": 97810
    },
    {
      "epoch": 0.3428347124527297,
      "grad_norm": 3.0,
      "learning_rate": 4.920688697295628e-05,
      "loss": 0.941,
      "step": 97820
    },
    {
      "epoch": 0.34286975995962526,
      "grad_norm": 3.125,
      "learning_rate": 4.920623794429257e-05,
      "loss": 0.9711,
      "step": 97830
    },
    {
      "epoch": 0.34290480746652086,
      "grad_norm": 2.96875,
      "learning_rate": 4.920558891562887e-05,
      "loss": 0.9917,
      "step": 97840
    },
    {
      "epoch": 0.34293985497341645,
      "grad_norm": 3.140625,
      "learning_rate": 4.920493988696517e-05,
      "loss": 0.985,
      "step": 97850
    },
    {
      "epoch": 0.34297490248031204,
      "grad_norm": 2.84375,
      "learning_rate": 4.9204290858301465e-05,
      "loss": 0.9109,
      "step": 97860
    },
    {
      "epoch": 0.3430099499872077,
      "grad_norm": 2.765625,
      "learning_rate": 4.920364182963777e-05,
      "loss": 0.9241,
      "step": 97870
    },
    {
      "epoch": 0.3430449974941033,
      "grad_norm": 3.546875,
      "learning_rate": 4.920299280097406e-05,
      "loss": 0.9989,
      "step": 97880
    },
    {
      "epoch": 0.34308004500099887,
      "grad_norm": 3.40625,
      "learning_rate": 4.920234377231036e-05,
      "loss": 0.927,
      "step": 97890
    },
    {
      "epoch": 0.34311509250789446,
      "grad_norm": 3.015625,
      "learning_rate": 4.920169474364666e-05,
      "loss": 0.9329,
      "step": 97900
    },
    {
      "epoch": 0.34315014001479005,
      "grad_norm": 2.53125,
      "learning_rate": 4.920104571498296e-05,
      "loss": 0.9707,
      "step": 97910
    },
    {
      "epoch": 0.34318518752168564,
      "grad_norm": 3.53125,
      "learning_rate": 4.9200396686319253e-05,
      "loss": 1.0031,
      "step": 97920
    },
    {
      "epoch": 0.34322023502858123,
      "grad_norm": 2.625,
      "learning_rate": 4.9199747657655555e-05,
      "loss": 1.0106,
      "step": 97930
    },
    {
      "epoch": 0.3432552825354768,
      "grad_norm": 2.96875,
      "learning_rate": 4.919909862899185e-05,
      "loss": 0.9085,
      "step": 97940
    },
    {
      "epoch": 0.3432903300423724,
      "grad_norm": 2.828125,
      "learning_rate": 4.919844960032815e-05,
      "loss": 0.9435,
      "step": 97950
    },
    {
      "epoch": 0.343325377549268,
      "grad_norm": 3.421875,
      "learning_rate": 4.9197800571664445e-05,
      "loss": 1.1063,
      "step": 97960
    },
    {
      "epoch": 0.34336042505616365,
      "grad_norm": 2.6875,
      "learning_rate": 4.919715154300075e-05,
      "loss": 0.9615,
      "step": 97970
    },
    {
      "epoch": 0.34339547256305925,
      "grad_norm": 3.46875,
      "learning_rate": 4.919650251433705e-05,
      "loss": 1.0551,
      "step": 97980
    },
    {
      "epoch": 0.34343052006995484,
      "grad_norm": 2.9375,
      "learning_rate": 4.919585348567334e-05,
      "loss": 0.934,
      "step": 97990
    },
    {
      "epoch": 0.34346556757685043,
      "grad_norm": 3.109375,
      "learning_rate": 4.9195204457009644e-05,
      "loss": 0.9704,
      "step": 98000
    },
    {
      "epoch": 0.343500615083746,
      "grad_norm": 3.140625,
      "learning_rate": 4.919455542834594e-05,
      "loss": 0.9904,
      "step": 98010
    },
    {
      "epoch": 0.3435356625906416,
      "grad_norm": 3.0,
      "learning_rate": 4.919390639968224e-05,
      "loss": 0.9564,
      "step": 98020
    },
    {
      "epoch": 0.3435707100975372,
      "grad_norm": 2.859375,
      "learning_rate": 4.9193257371018535e-05,
      "loss": 0.9359,
      "step": 98030
    },
    {
      "epoch": 0.3436057576044328,
      "grad_norm": 3.0625,
      "learning_rate": 4.9192608342354836e-05,
      "loss": 0.9414,
      "step": 98040
    },
    {
      "epoch": 0.3436408051113284,
      "grad_norm": 2.875,
      "learning_rate": 4.919195931369113e-05,
      "loss": 0.9841,
      "step": 98050
    },
    {
      "epoch": 0.343675852618224,
      "grad_norm": 2.78125,
      "learning_rate": 4.919131028502743e-05,
      "loss": 0.9952,
      "step": 98060
    },
    {
      "epoch": 0.3437109001251196,
      "grad_norm": 3.28125,
      "learning_rate": 4.919066125636373e-05,
      "loss": 1.0108,
      "step": 98070
    },
    {
      "epoch": 0.3437459476320152,
      "grad_norm": 3.28125,
      "learning_rate": 4.919001222770003e-05,
      "loss": 0.8838,
      "step": 98080
    },
    {
      "epoch": 0.3437809951389108,
      "grad_norm": 3.0625,
      "learning_rate": 4.918936319903632e-05,
      "loss": 0.9918,
      "step": 98090
    },
    {
      "epoch": 0.3438160426458064,
      "grad_norm": 3.03125,
      "learning_rate": 4.9188714170372624e-05,
      "loss": 0.9247,
      "step": 98100
    },
    {
      "epoch": 0.343851090152702,
      "grad_norm": 3.59375,
      "learning_rate": 4.918806514170892e-05,
      "loss": 1.0581,
      "step": 98110
    },
    {
      "epoch": 0.3438861376595976,
      "grad_norm": 2.96875,
      "learning_rate": 4.918741611304522e-05,
      "loss": 0.9477,
      "step": 98120
    },
    {
      "epoch": 0.34392118516649317,
      "grad_norm": 2.859375,
      "learning_rate": 4.918676708438152e-05,
      "loss": 0.8771,
      "step": 98130
    },
    {
      "epoch": 0.34395623267338876,
      "grad_norm": 3.15625,
      "learning_rate": 4.9186118055717816e-05,
      "loss": 1.0691,
      "step": 98140
    },
    {
      "epoch": 0.34399128018028435,
      "grad_norm": 2.375,
      "learning_rate": 4.918546902705412e-05,
      "loss": 0.9473,
      "step": 98150
    },
    {
      "epoch": 0.34402632768717994,
      "grad_norm": 3.1875,
      "learning_rate": 4.918481999839041e-05,
      "loss": 0.9162,
      "step": 98160
    },
    {
      "epoch": 0.3440613751940756,
      "grad_norm": 3.1875,
      "learning_rate": 4.9184170969726714e-05,
      "loss": 1.0186,
      "step": 98170
    },
    {
      "epoch": 0.3440964227009712,
      "grad_norm": 3.140625,
      "learning_rate": 4.918352194106301e-05,
      "loss": 0.9273,
      "step": 98180
    },
    {
      "epoch": 0.3441314702078668,
      "grad_norm": 2.921875,
      "learning_rate": 4.918287291239931e-05,
      "loss": 1.0257,
      "step": 98190
    },
    {
      "epoch": 0.34416651771476237,
      "grad_norm": 2.96875,
      "learning_rate": 4.9182223883735604e-05,
      "loss": 0.9567,
      "step": 98200
    },
    {
      "epoch": 0.34420156522165796,
      "grad_norm": 2.84375,
      "learning_rate": 4.91815748550719e-05,
      "loss": 0.9455,
      "step": 98210
    },
    {
      "epoch": 0.34423661272855355,
      "grad_norm": 3.15625,
      "learning_rate": 4.91809258264082e-05,
      "loss": 0.9933,
      "step": 98220
    },
    {
      "epoch": 0.34427166023544914,
      "grad_norm": 3.078125,
      "learning_rate": 4.9180276797744495e-05,
      "loss": 0.9533,
      "step": 98230
    },
    {
      "epoch": 0.34430670774234473,
      "grad_norm": 3.3125,
      "learning_rate": 4.9179627769080796e-05,
      "loss": 0.9557,
      "step": 98240
    },
    {
      "epoch": 0.3443417552492403,
      "grad_norm": 3.015625,
      "learning_rate": 4.917897874041709e-05,
      "loss": 0.9174,
      "step": 98250
    },
    {
      "epoch": 0.34437680275613597,
      "grad_norm": 3.171875,
      "learning_rate": 4.917832971175339e-05,
      "loss": 0.9555,
      "step": 98260
    },
    {
      "epoch": 0.34441185026303156,
      "grad_norm": 3.203125,
      "learning_rate": 4.917768068308969e-05,
      "loss": 0.971,
      "step": 98270
    },
    {
      "epoch": 0.34444689776992715,
      "grad_norm": 2.5625,
      "learning_rate": 4.917703165442599e-05,
      "loss": 0.9045,
      "step": 98280
    },
    {
      "epoch": 0.34448194527682274,
      "grad_norm": 3.0,
      "learning_rate": 4.917638262576228e-05,
      "loss": 0.9565,
      "step": 98290
    },
    {
      "epoch": 0.34451699278371833,
      "grad_norm": 3.34375,
      "learning_rate": 4.9175733597098584e-05,
      "loss": 0.9596,
      "step": 98300
    },
    {
      "epoch": 0.3445520402906139,
      "grad_norm": 3.015625,
      "learning_rate": 4.917508456843488e-05,
      "loss": 0.9656,
      "step": 98310
    },
    {
      "epoch": 0.3445870877975095,
      "grad_norm": 2.875,
      "learning_rate": 4.917443553977118e-05,
      "loss": 1.012,
      "step": 98320
    },
    {
      "epoch": 0.3446221353044051,
      "grad_norm": 2.640625,
      "learning_rate": 4.9173786511107475e-05,
      "loss": 1.0203,
      "step": 98330
    },
    {
      "epoch": 0.3446571828113007,
      "grad_norm": 2.703125,
      "learning_rate": 4.9173137482443776e-05,
      "loss": 1.0081,
      "step": 98340
    },
    {
      "epoch": 0.3446922303181963,
      "grad_norm": 2.875,
      "learning_rate": 4.917248845378008e-05,
      "loss": 0.9894,
      "step": 98350
    },
    {
      "epoch": 0.34472727782509194,
      "grad_norm": 2.609375,
      "learning_rate": 4.917183942511637e-05,
      "loss": 0.971,
      "step": 98360
    },
    {
      "epoch": 0.34476232533198753,
      "grad_norm": 3.0625,
      "learning_rate": 4.9171190396452674e-05,
      "loss": 1.0384,
      "step": 98370
    },
    {
      "epoch": 0.3447973728388831,
      "grad_norm": 2.84375,
      "learning_rate": 4.917054136778897e-05,
      "loss": 1.0367,
      "step": 98380
    },
    {
      "epoch": 0.3448324203457787,
      "grad_norm": 2.65625,
      "learning_rate": 4.916989233912527e-05,
      "loss": 0.953,
      "step": 98390
    },
    {
      "epoch": 0.3448674678526743,
      "grad_norm": 2.734375,
      "learning_rate": 4.9169243310461564e-05,
      "loss": 0.933,
      "step": 98400
    },
    {
      "epoch": 0.3449025153595699,
      "grad_norm": 2.984375,
      "learning_rate": 4.9168594281797866e-05,
      "loss": 0.9537,
      "step": 98410
    },
    {
      "epoch": 0.3449375628664655,
      "grad_norm": 3.0625,
      "learning_rate": 4.916794525313416e-05,
      "loss": 0.9521,
      "step": 98420
    },
    {
      "epoch": 0.3449726103733611,
      "grad_norm": 2.90625,
      "learning_rate": 4.916729622447046e-05,
      "loss": 0.9186,
      "step": 98430
    },
    {
      "epoch": 0.34500765788025667,
      "grad_norm": 2.875,
      "learning_rate": 4.9166647195806756e-05,
      "loss": 1.0426,
      "step": 98440
    },
    {
      "epoch": 0.34504270538715226,
      "grad_norm": 2.65625,
      "learning_rate": 4.916599816714306e-05,
      "loss": 0.9339,
      "step": 98450
    },
    {
      "epoch": 0.3450777528940479,
      "grad_norm": 2.578125,
      "learning_rate": 4.916534913847935e-05,
      "loss": 0.9764,
      "step": 98460
    },
    {
      "epoch": 0.3451128004009435,
      "grad_norm": 3.0,
      "learning_rate": 4.9164700109815654e-05,
      "loss": 0.9718,
      "step": 98470
    },
    {
      "epoch": 0.3451478479078391,
      "grad_norm": 2.828125,
      "learning_rate": 4.9164051081151955e-05,
      "loss": 0.9394,
      "step": 98480
    },
    {
      "epoch": 0.3451828954147347,
      "grad_norm": 3.609375,
      "learning_rate": 4.916340205248825e-05,
      "loss": 0.9968,
      "step": 98490
    },
    {
      "epoch": 0.34521794292163027,
      "grad_norm": 2.953125,
      "learning_rate": 4.916275302382455e-05,
      "loss": 0.8839,
      "step": 98500
    },
    {
      "epoch": 0.34525299042852586,
      "grad_norm": 2.984375,
      "learning_rate": 4.9162103995160846e-05,
      "loss": 0.9231,
      "step": 98510
    },
    {
      "epoch": 0.34528803793542145,
      "grad_norm": 3.5,
      "learning_rate": 4.916145496649715e-05,
      "loss": 0.8911,
      "step": 98520
    },
    {
      "epoch": 0.34532308544231705,
      "grad_norm": 2.8125,
      "learning_rate": 4.916080593783344e-05,
      "loss": 0.9786,
      "step": 98530
    },
    {
      "epoch": 0.34535813294921264,
      "grad_norm": 2.9375,
      "learning_rate": 4.916015690916974e-05,
      "loss": 0.9814,
      "step": 98540
    },
    {
      "epoch": 0.34539318045610823,
      "grad_norm": 3.359375,
      "learning_rate": 4.915950788050604e-05,
      "loss": 0.9431,
      "step": 98550
    },
    {
      "epoch": 0.3454282279630039,
      "grad_norm": 3.34375,
      "learning_rate": 4.915885885184234e-05,
      "loss": 0.9887,
      "step": 98560
    },
    {
      "epoch": 0.34546327546989947,
      "grad_norm": 3.390625,
      "learning_rate": 4.9158209823178634e-05,
      "loss": 1.0183,
      "step": 98570
    },
    {
      "epoch": 0.34549832297679506,
      "grad_norm": 2.71875,
      "learning_rate": 4.915756079451493e-05,
      "loss": 0.8774,
      "step": 98580
    },
    {
      "epoch": 0.34553337048369065,
      "grad_norm": 2.9375,
      "learning_rate": 4.915691176585123e-05,
      "loss": 0.9666,
      "step": 98590
    },
    {
      "epoch": 0.34556841799058624,
      "grad_norm": 2.84375,
      "learning_rate": 4.9156262737187524e-05,
      "loss": 0.9155,
      "step": 98600
    },
    {
      "epoch": 0.34560346549748183,
      "grad_norm": 3.046875,
      "learning_rate": 4.9155613708523826e-05,
      "loss": 1.0071,
      "step": 98610
    },
    {
      "epoch": 0.3456385130043774,
      "grad_norm": 2.6875,
      "learning_rate": 4.915496467986012e-05,
      "loss": 0.8822,
      "step": 98620
    },
    {
      "epoch": 0.345673560511273,
      "grad_norm": 3.03125,
      "learning_rate": 4.915431565119642e-05,
      "loss": 0.9075,
      "step": 98630
    },
    {
      "epoch": 0.3457086080181686,
      "grad_norm": 2.96875,
      "learning_rate": 4.9153666622532716e-05,
      "loss": 0.9136,
      "step": 98640
    },
    {
      "epoch": 0.3457436555250642,
      "grad_norm": 3.109375,
      "learning_rate": 4.915301759386902e-05,
      "loss": 0.9098,
      "step": 98650
    },
    {
      "epoch": 0.34577870303195984,
      "grad_norm": 3.25,
      "learning_rate": 4.915236856520531e-05,
      "loss": 0.9458,
      "step": 98660
    },
    {
      "epoch": 0.34581375053885544,
      "grad_norm": 3.046875,
      "learning_rate": 4.9151719536541614e-05,
      "loss": 0.9767,
      "step": 98670
    },
    {
      "epoch": 0.345848798045751,
      "grad_norm": 3.03125,
      "learning_rate": 4.915107050787791e-05,
      "loss": 0.9447,
      "step": 98680
    },
    {
      "epoch": 0.3458838455526466,
      "grad_norm": 3.421875,
      "learning_rate": 4.915042147921421e-05,
      "loss": 0.8701,
      "step": 98690
    },
    {
      "epoch": 0.3459188930595422,
      "grad_norm": 3.078125,
      "learning_rate": 4.9149772450550504e-05,
      "loss": 0.9252,
      "step": 98700
    },
    {
      "epoch": 0.3459539405664378,
      "grad_norm": 2.734375,
      "learning_rate": 4.9149123421886806e-05,
      "loss": 0.8959,
      "step": 98710
    },
    {
      "epoch": 0.3459889880733334,
      "grad_norm": 2.984375,
      "learning_rate": 4.914847439322311e-05,
      "loss": 0.908,
      "step": 98720
    },
    {
      "epoch": 0.346024035580229,
      "grad_norm": 2.84375,
      "learning_rate": 4.91478253645594e-05,
      "loss": 0.9504,
      "step": 98730
    },
    {
      "epoch": 0.3460590830871246,
      "grad_norm": 3.1875,
      "learning_rate": 4.91471763358957e-05,
      "loss": 1.008,
      "step": 98740
    },
    {
      "epoch": 0.3460941305940202,
      "grad_norm": 3.25,
      "learning_rate": 4.9146527307232e-05,
      "loss": 0.95,
      "step": 98750
    },
    {
      "epoch": 0.3461291781009158,
      "grad_norm": 2.640625,
      "learning_rate": 4.91458782785683e-05,
      "loss": 0.9618,
      "step": 98760
    },
    {
      "epoch": 0.3461642256078114,
      "grad_norm": 3.5,
      "learning_rate": 4.9145229249904594e-05,
      "loss": 0.9641,
      "step": 98770
    },
    {
      "epoch": 0.346199273114707,
      "grad_norm": 2.96875,
      "learning_rate": 4.9144580221240895e-05,
      "loss": 0.9626,
      "step": 98780
    },
    {
      "epoch": 0.3462343206216026,
      "grad_norm": 2.984375,
      "learning_rate": 4.914393119257719e-05,
      "loss": 0.9803,
      "step": 98790
    },
    {
      "epoch": 0.3462693681284982,
      "grad_norm": 3.109375,
      "learning_rate": 4.914328216391349e-05,
      "loss": 0.94,
      "step": 98800
    },
    {
      "epoch": 0.34630441563539377,
      "grad_norm": 3.0625,
      "learning_rate": 4.9142633135249786e-05,
      "loss": 0.9494,
      "step": 98810
    },
    {
      "epoch": 0.34633946314228936,
      "grad_norm": 2.984375,
      "learning_rate": 4.914198410658609e-05,
      "loss": 0.9279,
      "step": 98820
    },
    {
      "epoch": 0.34637451064918495,
      "grad_norm": 3.171875,
      "learning_rate": 4.914133507792238e-05,
      "loss": 0.9939,
      "step": 98830
    },
    {
      "epoch": 0.34640955815608054,
      "grad_norm": 3.078125,
      "learning_rate": 4.914068604925868e-05,
      "loss": 0.9071,
      "step": 98840
    },
    {
      "epoch": 0.3464446056629762,
      "grad_norm": 3.203125,
      "learning_rate": 4.9140037020594984e-05,
      "loss": 0.9405,
      "step": 98850
    },
    {
      "epoch": 0.3464796531698718,
      "grad_norm": 3.453125,
      "learning_rate": 4.913938799193128e-05,
      "loss": 0.9511,
      "step": 98860
    },
    {
      "epoch": 0.3465147006767674,
      "grad_norm": 2.890625,
      "learning_rate": 4.913873896326758e-05,
      "loss": 0.9697,
      "step": 98870
    },
    {
      "epoch": 0.34654974818366296,
      "grad_norm": 3.203125,
      "learning_rate": 4.9138089934603875e-05,
      "loss": 0.9632,
      "step": 98880
    },
    {
      "epoch": 0.34658479569055856,
      "grad_norm": 2.84375,
      "learning_rate": 4.9137440905940176e-05,
      "loss": 1.0767,
      "step": 98890
    },
    {
      "epoch": 0.34661984319745415,
      "grad_norm": 3.09375,
      "learning_rate": 4.913679187727647e-05,
      "loss": 0.9039,
      "step": 98900
    },
    {
      "epoch": 0.34665489070434974,
      "grad_norm": 2.6875,
      "learning_rate": 4.913614284861277e-05,
      "loss": 0.9041,
      "step": 98910
    },
    {
      "epoch": 0.34668993821124533,
      "grad_norm": 3.109375,
      "learning_rate": 4.913549381994907e-05,
      "loss": 0.975,
      "step": 98920
    },
    {
      "epoch": 0.3467249857181409,
      "grad_norm": 3.0625,
      "learning_rate": 4.913484479128537e-05,
      "loss": 0.8888,
      "step": 98930
    },
    {
      "epoch": 0.3467600332250365,
      "grad_norm": 3.34375,
      "learning_rate": 4.913419576262166e-05,
      "loss": 1.0385,
      "step": 98940
    },
    {
      "epoch": 0.34679508073193216,
      "grad_norm": 2.96875,
      "learning_rate": 4.9133546733957964e-05,
      "loss": 0.9784,
      "step": 98950
    },
    {
      "epoch": 0.34683012823882775,
      "grad_norm": 3.078125,
      "learning_rate": 4.913289770529426e-05,
      "loss": 1.0113,
      "step": 98960
    },
    {
      "epoch": 0.34686517574572334,
      "grad_norm": 3.28125,
      "learning_rate": 4.9132248676630554e-05,
      "loss": 0.9896,
      "step": 98970
    },
    {
      "epoch": 0.34690022325261893,
      "grad_norm": 3.328125,
      "learning_rate": 4.9131599647966855e-05,
      "loss": 0.9889,
      "step": 98980
    },
    {
      "epoch": 0.3469352707595145,
      "grad_norm": 3.125,
      "learning_rate": 4.913095061930315e-05,
      "loss": 0.9402,
      "step": 98990
    },
    {
      "epoch": 0.3469703182664101,
      "grad_norm": 3.546875,
      "learning_rate": 4.913030159063945e-05,
      "loss": 0.9664,
      "step": 99000
    },
    {
      "epoch": 0.3470053657733057,
      "grad_norm": 3.109375,
      "learning_rate": 4.9129652561975746e-05,
      "loss": 0.9715,
      "step": 99010
    },
    {
      "epoch": 0.3470404132802013,
      "grad_norm": 3.546875,
      "learning_rate": 4.912900353331205e-05,
      "loss": 0.9873,
      "step": 99020
    },
    {
      "epoch": 0.3470754607870969,
      "grad_norm": 3.265625,
      "learning_rate": 4.912835450464834e-05,
      "loss": 0.9658,
      "step": 99030
    },
    {
      "epoch": 0.3471105082939925,
      "grad_norm": 3.203125,
      "learning_rate": 4.912770547598464e-05,
      "loss": 1.0099,
      "step": 99040
    },
    {
      "epoch": 0.34714555580088813,
      "grad_norm": 2.96875,
      "learning_rate": 4.912705644732094e-05,
      "loss": 0.9662,
      "step": 99050
    },
    {
      "epoch": 0.3471806033077837,
      "grad_norm": 3.078125,
      "learning_rate": 4.912640741865724e-05,
      "loss": 0.9709,
      "step": 99060
    },
    {
      "epoch": 0.3472156508146793,
      "grad_norm": 3.171875,
      "learning_rate": 4.9125758389993534e-05,
      "loss": 0.9089,
      "step": 99070
    },
    {
      "epoch": 0.3472506983215749,
      "grad_norm": 3.125,
      "learning_rate": 4.9125109361329835e-05,
      "loss": 0.992,
      "step": 99080
    },
    {
      "epoch": 0.3472857458284705,
      "grad_norm": 2.953125,
      "learning_rate": 4.9124460332666136e-05,
      "loss": 1.0893,
      "step": 99090
    },
    {
      "epoch": 0.3473207933353661,
      "grad_norm": 3.375,
      "learning_rate": 4.912381130400243e-05,
      "loss": 0.9125,
      "step": 99100
    },
    {
      "epoch": 0.3473558408422617,
      "grad_norm": 3.125,
      "learning_rate": 4.912316227533873e-05,
      "loss": 0.945,
      "step": 99110
    },
    {
      "epoch": 0.34739088834915727,
      "grad_norm": 3.765625,
      "learning_rate": 4.912251324667503e-05,
      "loss": 0.9412,
      "step": 99120
    },
    {
      "epoch": 0.34742593585605286,
      "grad_norm": 2.859375,
      "learning_rate": 4.912186421801133e-05,
      "loss": 1.0163,
      "step": 99130
    },
    {
      "epoch": 0.34746098336294845,
      "grad_norm": 3.40625,
      "learning_rate": 4.912121518934762e-05,
      "loss": 0.965,
      "step": 99140
    },
    {
      "epoch": 0.3474960308698441,
      "grad_norm": 2.75,
      "learning_rate": 4.9120566160683924e-05,
      "loss": 0.9108,
      "step": 99150
    },
    {
      "epoch": 0.3475310783767397,
      "grad_norm": 3.359375,
      "learning_rate": 4.911991713202022e-05,
      "loss": 0.9599,
      "step": 99160
    },
    {
      "epoch": 0.3475661258836353,
      "grad_norm": 3.296875,
      "learning_rate": 4.911926810335652e-05,
      "loss": 1.01,
      "step": 99170
    },
    {
      "epoch": 0.34760117339053087,
      "grad_norm": 3.0625,
      "learning_rate": 4.9118619074692815e-05,
      "loss": 0.977,
      "step": 99180
    },
    {
      "epoch": 0.34763622089742646,
      "grad_norm": 3.296875,
      "learning_rate": 4.9117970046029116e-05,
      "loss": 0.9709,
      "step": 99190
    },
    {
      "epoch": 0.34767126840432205,
      "grad_norm": 3.46875,
      "learning_rate": 4.911732101736541e-05,
      "loss": 1.0498,
      "step": 99200
    },
    {
      "epoch": 0.34770631591121764,
      "grad_norm": 3.1875,
      "learning_rate": 4.911667198870171e-05,
      "loss": 0.9612,
      "step": 99210
    },
    {
      "epoch": 0.34774136341811324,
      "grad_norm": 3.03125,
      "learning_rate": 4.9116022960038014e-05,
      "loss": 0.9653,
      "step": 99220
    },
    {
      "epoch": 0.3477764109250088,
      "grad_norm": 2.75,
      "learning_rate": 4.911537393137431e-05,
      "loss": 0.9696,
      "step": 99230
    },
    {
      "epoch": 0.3478114584319044,
      "grad_norm": 2.859375,
      "learning_rate": 4.911472490271061e-05,
      "loss": 0.9039,
      "step": 99240
    },
    {
      "epoch": 0.34784650593880007,
      "grad_norm": 3.125,
      "learning_rate": 4.9114075874046904e-05,
      "loss": 0.9941,
      "step": 99250
    },
    {
      "epoch": 0.34788155344569566,
      "grad_norm": 3.203125,
      "learning_rate": 4.9113426845383206e-05,
      "loss": 1.0015,
      "step": 99260
    },
    {
      "epoch": 0.34791660095259125,
      "grad_norm": 3.0625,
      "learning_rate": 4.91127778167195e-05,
      "loss": 0.9116,
      "step": 99270
    },
    {
      "epoch": 0.34795164845948684,
      "grad_norm": 2.71875,
      "learning_rate": 4.91121287880558e-05,
      "loss": 0.9242,
      "step": 99280
    },
    {
      "epoch": 0.34798669596638243,
      "grad_norm": 3.03125,
      "learning_rate": 4.9111479759392096e-05,
      "loss": 0.9414,
      "step": 99290
    },
    {
      "epoch": 0.348021743473278,
      "grad_norm": 2.859375,
      "learning_rate": 4.91108307307284e-05,
      "loss": 0.9251,
      "step": 99300
    },
    {
      "epoch": 0.3480567909801736,
      "grad_norm": 3.515625,
      "learning_rate": 4.911018170206469e-05,
      "loss": 0.9931,
      "step": 99310
    },
    {
      "epoch": 0.3480918384870692,
      "grad_norm": 2.921875,
      "learning_rate": 4.9109532673400994e-05,
      "loss": 0.8944,
      "step": 99320
    },
    {
      "epoch": 0.3481268859939648,
      "grad_norm": 3.15625,
      "learning_rate": 4.910888364473729e-05,
      "loss": 1.0179,
      "step": 99330
    },
    {
      "epoch": 0.34816193350086044,
      "grad_norm": 3.390625,
      "learning_rate": 4.910823461607358e-05,
      "loss": 0.9414,
      "step": 99340
    },
    {
      "epoch": 0.34819698100775603,
      "grad_norm": 2.890625,
      "learning_rate": 4.9107585587409884e-05,
      "loss": 0.9534,
      "step": 99350
    },
    {
      "epoch": 0.3482320285146516,
      "grad_norm": 3.0625,
      "learning_rate": 4.910693655874618e-05,
      "loss": 0.9268,
      "step": 99360
    },
    {
      "epoch": 0.3482670760215472,
      "grad_norm": 3.171875,
      "learning_rate": 4.910628753008248e-05,
      "loss": 0.9334,
      "step": 99370
    },
    {
      "epoch": 0.3483021235284428,
      "grad_norm": 3.09375,
      "learning_rate": 4.9105638501418775e-05,
      "loss": 0.9807,
      "step": 99380
    },
    {
      "epoch": 0.3483371710353384,
      "grad_norm": 3.09375,
      "learning_rate": 4.9104989472755076e-05,
      "loss": 0.938,
      "step": 99390
    },
    {
      "epoch": 0.348372218542234,
      "grad_norm": 3.296875,
      "learning_rate": 4.910434044409137e-05,
      "loss": 1.0771,
      "step": 99400
    },
    {
      "epoch": 0.3484072660491296,
      "grad_norm": 3.046875,
      "learning_rate": 4.910369141542767e-05,
      "loss": 0.9613,
      "step": 99410
    },
    {
      "epoch": 0.3484423135560252,
      "grad_norm": 2.71875,
      "learning_rate": 4.910304238676397e-05,
      "loss": 1.0138,
      "step": 99420
    },
    {
      "epoch": 0.34847736106292077,
      "grad_norm": 2.828125,
      "learning_rate": 4.910239335810027e-05,
      "loss": 1.0008,
      "step": 99430
    },
    {
      "epoch": 0.3485124085698164,
      "grad_norm": 3.0,
      "learning_rate": 4.910174432943657e-05,
      "loss": 0.9577,
      "step": 99440
    },
    {
      "epoch": 0.348547456076712,
      "grad_norm": 3.21875,
      "learning_rate": 4.9101095300772864e-05,
      "loss": 1.0455,
      "step": 99450
    },
    {
      "epoch": 0.3485825035836076,
      "grad_norm": 3.203125,
      "learning_rate": 4.9100446272109166e-05,
      "loss": 0.9805,
      "step": 99460
    },
    {
      "epoch": 0.3486175510905032,
      "grad_norm": 3.15625,
      "learning_rate": 4.909979724344546e-05,
      "loss": 0.9459,
      "step": 99470
    },
    {
      "epoch": 0.3486525985973988,
      "grad_norm": 2.984375,
      "learning_rate": 4.909914821478176e-05,
      "loss": 0.9815,
      "step": 99480
    },
    {
      "epoch": 0.34868764610429437,
      "grad_norm": 2.984375,
      "learning_rate": 4.9098499186118056e-05,
      "loss": 0.9413,
      "step": 99490
    },
    {
      "epoch": 0.34872269361118996,
      "grad_norm": 3.28125,
      "learning_rate": 4.909785015745436e-05,
      "loss": 0.9755,
      "step": 99500
    },
    {
      "epoch": 0.34875774111808555,
      "grad_norm": 3.015625,
      "learning_rate": 4.909720112879065e-05,
      "loss": 0.9405,
      "step": 99510
    },
    {
      "epoch": 0.34879278862498114,
      "grad_norm": 2.84375,
      "learning_rate": 4.9096552100126954e-05,
      "loss": 0.883,
      "step": 99520
    },
    {
      "epoch": 0.34882783613187673,
      "grad_norm": 3.203125,
      "learning_rate": 4.909590307146325e-05,
      "loss": 1.0078,
      "step": 99530
    },
    {
      "epoch": 0.3488628836387724,
      "grad_norm": 3.234375,
      "learning_rate": 4.909525404279955e-05,
      "loss": 0.9675,
      "step": 99540
    },
    {
      "epoch": 0.34889793114566797,
      "grad_norm": 3.28125,
      "learning_rate": 4.9094605014135844e-05,
      "loss": 0.9525,
      "step": 99550
    },
    {
      "epoch": 0.34893297865256356,
      "grad_norm": 2.53125,
      "learning_rate": 4.9093955985472146e-05,
      "loss": 0.8807,
      "step": 99560
    },
    {
      "epoch": 0.34896802615945915,
      "grad_norm": 2.640625,
      "learning_rate": 4.909330695680844e-05,
      "loss": 0.9826,
      "step": 99570
    },
    {
      "epoch": 0.34900307366635475,
      "grad_norm": 2.90625,
      "learning_rate": 4.909265792814474e-05,
      "loss": 0.8867,
      "step": 99580
    },
    {
      "epoch": 0.34903812117325034,
      "grad_norm": 2.9375,
      "learning_rate": 4.909200889948104e-05,
      "loss": 1.006,
      "step": 99590
    },
    {
      "epoch": 0.34907316868014593,
      "grad_norm": 2.984375,
      "learning_rate": 4.909135987081734e-05,
      "loss": 0.8425,
      "step": 99600
    },
    {
      "epoch": 0.3491082161870415,
      "grad_norm": 3.1875,
      "learning_rate": 4.909071084215364e-05,
      "loss": 0.953,
      "step": 99610
    },
    {
      "epoch": 0.3491432636939371,
      "grad_norm": 2.796875,
      "learning_rate": 4.9090061813489934e-05,
      "loss": 0.969,
      "step": 99620
    },
    {
      "epoch": 0.3491783112008327,
      "grad_norm": 4.0625,
      "learning_rate": 4.9089412784826235e-05,
      "loss": 1.1114,
      "step": 99630
    },
    {
      "epoch": 0.34921335870772835,
      "grad_norm": 3.25,
      "learning_rate": 4.908876375616253e-05,
      "loss": 0.9516,
      "step": 99640
    },
    {
      "epoch": 0.34924840621462394,
      "grad_norm": 3.65625,
      "learning_rate": 4.908811472749883e-05,
      "loss": 0.9865,
      "step": 99650
    },
    {
      "epoch": 0.34928345372151953,
      "grad_norm": 2.9375,
      "learning_rate": 4.9087465698835126e-05,
      "loss": 0.9508,
      "step": 99660
    },
    {
      "epoch": 0.3493185012284151,
      "grad_norm": 3.015625,
      "learning_rate": 4.908681667017143e-05,
      "loss": 0.9959,
      "step": 99670
    },
    {
      "epoch": 0.3493535487353107,
      "grad_norm": 3.25,
      "learning_rate": 4.908616764150772e-05,
      "loss": 1.0756,
      "step": 99680
    },
    {
      "epoch": 0.3493885962422063,
      "grad_norm": 3.28125,
      "learning_rate": 4.908551861284402e-05,
      "loss": 1.0288,
      "step": 99690
    },
    {
      "epoch": 0.3494236437491019,
      "grad_norm": 3.484375,
      "learning_rate": 4.908486958418032e-05,
      "loss": 0.988,
      "step": 99700
    },
    {
      "epoch": 0.3494586912559975,
      "grad_norm": 3.1875,
      "learning_rate": 4.908422055551661e-05,
      "loss": 0.9087,
      "step": 99710
    },
    {
      "epoch": 0.3494937387628931,
      "grad_norm": 3.0,
      "learning_rate": 4.9083571526852914e-05,
      "loss": 0.9296,
      "step": 99720
    },
    {
      "epoch": 0.34952878626978867,
      "grad_norm": 3.015625,
      "learning_rate": 4.908292249818921e-05,
      "loss": 1.0154,
      "step": 99730
    },
    {
      "epoch": 0.3495638337766843,
      "grad_norm": 2.671875,
      "learning_rate": 4.908227346952551e-05,
      "loss": 0.9706,
      "step": 99740
    },
    {
      "epoch": 0.3495988812835799,
      "grad_norm": 3.046875,
      "learning_rate": 4.9081624440861804e-05,
      "loss": 0.9873,
      "step": 99750
    },
    {
      "epoch": 0.3496339287904755,
      "grad_norm": 3.34375,
      "learning_rate": 4.9080975412198106e-05,
      "loss": 0.9773,
      "step": 99760
    },
    {
      "epoch": 0.3496689762973711,
      "grad_norm": 3.171875,
      "learning_rate": 4.90803263835344e-05,
      "loss": 0.9349,
      "step": 99770
    },
    {
      "epoch": 0.3497040238042667,
      "grad_norm": 3.203125,
      "learning_rate": 4.90796773548707e-05,
      "loss": 0.9148,
      "step": 99780
    },
    {
      "epoch": 0.3497390713111623,
      "grad_norm": 2.6875,
      "learning_rate": 4.9079028326206996e-05,
      "loss": 0.9612,
      "step": 99790
    },
    {
      "epoch": 0.34977411881805787,
      "grad_norm": 2.921875,
      "learning_rate": 4.90783792975433e-05,
      "loss": 0.983,
      "step": 99800
    },
    {
      "epoch": 0.34980916632495346,
      "grad_norm": 2.953125,
      "learning_rate": 4.90777302688796e-05,
      "loss": 0.8621,
      "step": 99810
    },
    {
      "epoch": 0.34984421383184905,
      "grad_norm": 3.203125,
      "learning_rate": 4.9077081240215894e-05,
      "loss": 0.9464,
      "step": 99820
    },
    {
      "epoch": 0.3498792613387447,
      "grad_norm": 3.0,
      "learning_rate": 4.9076432211552195e-05,
      "loss": 0.9755,
      "step": 99830
    },
    {
      "epoch": 0.3499143088456403,
      "grad_norm": 3.1875,
      "learning_rate": 4.907578318288849e-05,
      "loss": 0.9635,
      "step": 99840
    },
    {
      "epoch": 0.3499493563525359,
      "grad_norm": 3.171875,
      "learning_rate": 4.907513415422479e-05,
      "loss": 0.9837,
      "step": 99850
    },
    {
      "epoch": 0.34998440385943147,
      "grad_norm": 2.828125,
      "learning_rate": 4.9074485125561086e-05,
      "loss": 0.8925,
      "step": 99860
    },
    {
      "epoch": 0.35001945136632706,
      "grad_norm": 3.0625,
      "learning_rate": 4.907383609689739e-05,
      "loss": 0.915,
      "step": 99870
    },
    {
      "epoch": 0.35005449887322265,
      "grad_norm": 3.296875,
      "learning_rate": 4.907318706823368e-05,
      "loss": 0.9765,
      "step": 99880
    },
    {
      "epoch": 0.35008954638011824,
      "grad_norm": 3.078125,
      "learning_rate": 4.907253803956998e-05,
      "loss": 0.9429,
      "step": 99890
    },
    {
      "epoch": 0.35012459388701384,
      "grad_norm": 2.90625,
      "learning_rate": 4.907188901090628e-05,
      "loss": 0.9787,
      "step": 99900
    },
    {
      "epoch": 0.3501596413939094,
      "grad_norm": 2.953125,
      "learning_rate": 4.907123998224258e-05,
      "loss": 0.9732,
      "step": 99910
    },
    {
      "epoch": 0.350194688900805,
      "grad_norm": 3.1875,
      "learning_rate": 4.9070590953578874e-05,
      "loss": 0.9911,
      "step": 99920
    },
    {
      "epoch": 0.35022973640770066,
      "grad_norm": 3.109375,
      "learning_rate": 4.9069941924915175e-05,
      "loss": 0.9072,
      "step": 99930
    },
    {
      "epoch": 0.35026478391459626,
      "grad_norm": 3.34375,
      "learning_rate": 4.906929289625147e-05,
      "loss": 0.9371,
      "step": 99940
    },
    {
      "epoch": 0.35029983142149185,
      "grad_norm": 3.1875,
      "learning_rate": 4.906864386758777e-05,
      "loss": 0.8699,
      "step": 99950
    },
    {
      "epoch": 0.35033487892838744,
      "grad_norm": 3.515625,
      "learning_rate": 4.906799483892407e-05,
      "loss": 0.9163,
      "step": 99960
    },
    {
      "epoch": 0.35036992643528303,
      "grad_norm": 3.53125,
      "learning_rate": 4.906734581026037e-05,
      "loss": 0.9799,
      "step": 99970
    },
    {
      "epoch": 0.3504049739421786,
      "grad_norm": 3.359375,
      "learning_rate": 4.906669678159667e-05,
      "loss": 0.9455,
      "step": 99980
    },
    {
      "epoch": 0.3504400214490742,
      "grad_norm": 3.0,
      "learning_rate": 4.906604775293296e-05,
      "loss": 1.0116,
      "step": 99990
    },
    {
      "epoch": 0.3504750689559698,
      "grad_norm": 3.015625,
      "learning_rate": 4.9065398724269265e-05,
      "loss": 1.0282,
      "step": 100000
    },
    {
      "epoch": 0.3504750689559698,
      "eval_loss": 0.899395763874054,
      "eval_runtime": 551.4837,
      "eval_samples_per_second": 689.841,
      "eval_steps_per_second": 57.487,
      "step": 100000
    },
    {
      "epoch": 0.3505101164628654,
      "grad_norm": 3.21875,
      "learning_rate": 4.906474969560556e-05,
      "loss": 0.9658,
      "step": 100010
    },
    {
      "epoch": 0.350545163969761,
      "grad_norm": 3.21875,
      "learning_rate": 4.906410066694186e-05,
      "loss": 0.8984,
      "step": 100020
    },
    {
      "epoch": 0.35058021147665663,
      "grad_norm": 2.78125,
      "learning_rate": 4.9063451638278155e-05,
      "loss": 0.9753,
      "step": 100030
    },
    {
      "epoch": 0.3506152589835522,
      "grad_norm": 2.625,
      "learning_rate": 4.9062802609614457e-05,
      "loss": 0.8818,
      "step": 100040
    },
    {
      "epoch": 0.3506503064904478,
      "grad_norm": 3.296875,
      "learning_rate": 4.906215358095075e-05,
      "loss": 1.0367,
      "step": 100050
    },
    {
      "epoch": 0.3506853539973434,
      "grad_norm": 3.1875,
      "learning_rate": 4.906150455228705e-05,
      "loss": 0.8418,
      "step": 100060
    },
    {
      "epoch": 0.350720401504239,
      "grad_norm": 3.953125,
      "learning_rate": 4.906085552362335e-05,
      "loss": 1.0065,
      "step": 100070
    },
    {
      "epoch": 0.3507554490111346,
      "grad_norm": 2.84375,
      "learning_rate": 4.906020649495965e-05,
      "loss": 0.8562,
      "step": 100080
    },
    {
      "epoch": 0.3507904965180302,
      "grad_norm": 3.5,
      "learning_rate": 4.905955746629594e-05,
      "loss": 0.9351,
      "step": 100090
    },
    {
      "epoch": 0.3508255440249258,
      "grad_norm": 3.203125,
      "learning_rate": 4.905890843763224e-05,
      "loss": 1.0203,
      "step": 100100
    },
    {
      "epoch": 0.35086059153182136,
      "grad_norm": 2.921875,
      "learning_rate": 4.905825940896854e-05,
      "loss": 0.9288,
      "step": 100110
    },
    {
      "epoch": 0.35089563903871696,
      "grad_norm": 2.703125,
      "learning_rate": 4.9057610380304834e-05,
      "loss": 0.9098,
      "step": 100120
    },
    {
      "epoch": 0.3509306865456126,
      "grad_norm": 2.84375,
      "learning_rate": 4.9056961351641135e-05,
      "loss": 0.9802,
      "step": 100130
    },
    {
      "epoch": 0.3509657340525082,
      "grad_norm": 3.203125,
      "learning_rate": 4.905631232297743e-05,
      "loss": 0.9403,
      "step": 100140
    },
    {
      "epoch": 0.3510007815594038,
      "grad_norm": 3.203125,
      "learning_rate": 4.905566329431373e-05,
      "loss": 0.959,
      "step": 100150
    },
    {
      "epoch": 0.3510358290662994,
      "grad_norm": 2.703125,
      "learning_rate": 4.9055014265650026e-05,
      "loss": 0.9115,
      "step": 100160
    },
    {
      "epoch": 0.35107087657319497,
      "grad_norm": 2.6875,
      "learning_rate": 4.905436523698633e-05,
      "loss": 0.9338,
      "step": 100170
    },
    {
      "epoch": 0.35110592408009056,
      "grad_norm": 2.953125,
      "learning_rate": 4.905371620832263e-05,
      "loss": 0.8869,
      "step": 100180
    },
    {
      "epoch": 0.35114097158698615,
      "grad_norm": 3.34375,
      "learning_rate": 4.905306717965892e-05,
      "loss": 0.9825,
      "step": 100190
    },
    {
      "epoch": 0.35117601909388174,
      "grad_norm": 3.546875,
      "learning_rate": 4.9052418150995225e-05,
      "loss": 1.0015,
      "step": 100200
    },
    {
      "epoch": 0.35121106660077733,
      "grad_norm": 3.1875,
      "learning_rate": 4.905176912233152e-05,
      "loss": 0.9269,
      "step": 100210
    },
    {
      "epoch": 0.3512461141076729,
      "grad_norm": 2.875,
      "learning_rate": 4.905112009366782e-05,
      "loss": 1.0342,
      "step": 100220
    },
    {
      "epoch": 0.35128116161456857,
      "grad_norm": 3.078125,
      "learning_rate": 4.9050471065004115e-05,
      "loss": 0.9971,
      "step": 100230
    },
    {
      "epoch": 0.35131620912146416,
      "grad_norm": 2.703125,
      "learning_rate": 4.9049822036340417e-05,
      "loss": 0.9354,
      "step": 100240
    },
    {
      "epoch": 0.35135125662835975,
      "grad_norm": 3.1875,
      "learning_rate": 4.904917300767671e-05,
      "loss": 0.9686,
      "step": 100250
    },
    {
      "epoch": 0.35138630413525535,
      "grad_norm": 3.515625,
      "learning_rate": 4.904852397901301e-05,
      "loss": 1.005,
      "step": 100260
    },
    {
      "epoch": 0.35142135164215094,
      "grad_norm": 3.28125,
      "learning_rate": 4.904787495034931e-05,
      "loss": 0.9728,
      "step": 100270
    },
    {
      "epoch": 0.35145639914904653,
      "grad_norm": 3.0625,
      "learning_rate": 4.904722592168561e-05,
      "loss": 1.0281,
      "step": 100280
    },
    {
      "epoch": 0.3514914466559421,
      "grad_norm": 2.71875,
      "learning_rate": 4.90465768930219e-05,
      "loss": 0.9167,
      "step": 100290
    },
    {
      "epoch": 0.3515264941628377,
      "grad_norm": 3.1875,
      "learning_rate": 4.9045927864358205e-05,
      "loss": 1.0281,
      "step": 100300
    },
    {
      "epoch": 0.3515615416697333,
      "grad_norm": 3.078125,
      "learning_rate": 4.9045278835694506e-05,
      "loss": 0.9889,
      "step": 100310
    },
    {
      "epoch": 0.3515965891766289,
      "grad_norm": 2.75,
      "learning_rate": 4.90446298070308e-05,
      "loss": 0.9141,
      "step": 100320
    },
    {
      "epoch": 0.35163163668352454,
      "grad_norm": 3.359375,
      "learning_rate": 4.90439807783671e-05,
      "loss": 0.9779,
      "step": 100330
    },
    {
      "epoch": 0.35166668419042013,
      "grad_norm": 3.5625,
      "learning_rate": 4.9043331749703397e-05,
      "loss": 0.9792,
      "step": 100340
    },
    {
      "epoch": 0.3517017316973157,
      "grad_norm": 2.796875,
      "learning_rate": 4.90426827210397e-05,
      "loss": 0.9039,
      "step": 100350
    },
    {
      "epoch": 0.3517367792042113,
      "grad_norm": 3.1875,
      "learning_rate": 4.904203369237599e-05,
      "loss": 0.9937,
      "step": 100360
    },
    {
      "epoch": 0.3517718267111069,
      "grad_norm": 2.828125,
      "learning_rate": 4.9041384663712294e-05,
      "loss": 0.8912,
      "step": 100370
    },
    {
      "epoch": 0.3518068742180025,
      "grad_norm": 3.046875,
      "learning_rate": 4.904073563504859e-05,
      "loss": 0.9704,
      "step": 100380
    },
    {
      "epoch": 0.3518419217248981,
      "grad_norm": 2.875,
      "learning_rate": 4.904008660638489e-05,
      "loss": 0.9241,
      "step": 100390
    },
    {
      "epoch": 0.3518769692317937,
      "grad_norm": 2.9375,
      "learning_rate": 4.9039437577721185e-05,
      "loss": 0.9356,
      "step": 100400
    },
    {
      "epoch": 0.35191201673868927,
      "grad_norm": 2.578125,
      "learning_rate": 4.9038788549057486e-05,
      "loss": 0.9969,
      "step": 100410
    },
    {
      "epoch": 0.3519470642455849,
      "grad_norm": 2.96875,
      "learning_rate": 4.903813952039378e-05,
      "loss": 0.913,
      "step": 100420
    },
    {
      "epoch": 0.3519821117524805,
      "grad_norm": 2.984375,
      "learning_rate": 4.903749049173008e-05,
      "loss": 0.9581,
      "step": 100430
    },
    {
      "epoch": 0.3520171592593761,
      "grad_norm": 3.34375,
      "learning_rate": 4.9036841463066377e-05,
      "loss": 0.8968,
      "step": 100440
    },
    {
      "epoch": 0.3520522067662717,
      "grad_norm": 3.09375,
      "learning_rate": 4.903619243440268e-05,
      "loss": 0.9564,
      "step": 100450
    },
    {
      "epoch": 0.3520872542731673,
      "grad_norm": 3.171875,
      "learning_rate": 4.903554340573897e-05,
      "loss": 0.9772,
      "step": 100460
    },
    {
      "epoch": 0.3521223017800629,
      "grad_norm": 2.75,
      "learning_rate": 4.903489437707527e-05,
      "loss": 0.9026,
      "step": 100470
    },
    {
      "epoch": 0.35215734928695847,
      "grad_norm": 3.125,
      "learning_rate": 4.903424534841157e-05,
      "loss": 0.9927,
      "step": 100480
    },
    {
      "epoch": 0.35219239679385406,
      "grad_norm": 3.171875,
      "learning_rate": 4.903359631974786e-05,
      "loss": 0.9704,
      "step": 100490
    },
    {
      "epoch": 0.35222744430074965,
      "grad_norm": 3.21875,
      "learning_rate": 4.9032947291084165e-05,
      "loss": 1.0203,
      "step": 100500
    },
    {
      "epoch": 0.35226249180764524,
      "grad_norm": 3.625,
      "learning_rate": 4.903229826242046e-05,
      "loss": 1.037,
      "step": 100510
    },
    {
      "epoch": 0.3522975393145409,
      "grad_norm": 2.953125,
      "learning_rate": 4.903164923375676e-05,
      "loss": 0.9225,
      "step": 100520
    },
    {
      "epoch": 0.3523325868214365,
      "grad_norm": 3.15625,
      "learning_rate": 4.9031000205093055e-05,
      "loss": 0.9538,
      "step": 100530
    },
    {
      "epoch": 0.35236763432833207,
      "grad_norm": 3.03125,
      "learning_rate": 4.9030351176429357e-05,
      "loss": 0.9584,
      "step": 100540
    },
    {
      "epoch": 0.35240268183522766,
      "grad_norm": 3.296875,
      "learning_rate": 4.902970214776566e-05,
      "loss": 0.9215,
      "step": 100550
    },
    {
      "epoch": 0.35243772934212325,
      "grad_norm": 3.0625,
      "learning_rate": 4.902905311910195e-05,
      "loss": 0.8863,
      "step": 100560
    },
    {
      "epoch": 0.35247277684901884,
      "grad_norm": 3.5,
      "learning_rate": 4.9028404090438254e-05,
      "loss": 0.9787,
      "step": 100570
    },
    {
      "epoch": 0.35250782435591443,
      "grad_norm": 3.359375,
      "learning_rate": 4.902775506177455e-05,
      "loss": 0.9507,
      "step": 100580
    },
    {
      "epoch": 0.35254287186281,
      "grad_norm": 2.5,
      "learning_rate": 4.902710603311085e-05,
      "loss": 0.9982,
      "step": 100590
    },
    {
      "epoch": 0.3525779193697056,
      "grad_norm": 3.015625,
      "learning_rate": 4.9026457004447145e-05,
      "loss": 0.989,
      "step": 100600
    },
    {
      "epoch": 0.3526129668766012,
      "grad_norm": 3.46875,
      "learning_rate": 4.9025807975783446e-05,
      "loss": 1.0152,
      "step": 100610
    },
    {
      "epoch": 0.35264801438349686,
      "grad_norm": 2.90625,
      "learning_rate": 4.902515894711974e-05,
      "loss": 0.9866,
      "step": 100620
    },
    {
      "epoch": 0.35268306189039245,
      "grad_norm": 2.875,
      "learning_rate": 4.902450991845604e-05,
      "loss": 0.9703,
      "step": 100630
    },
    {
      "epoch": 0.35271810939728804,
      "grad_norm": 3.3125,
      "learning_rate": 4.9023860889792337e-05,
      "loss": 0.9101,
      "step": 100640
    },
    {
      "epoch": 0.35275315690418363,
      "grad_norm": 2.921875,
      "learning_rate": 4.902321186112864e-05,
      "loss": 1.0473,
      "step": 100650
    },
    {
      "epoch": 0.3527882044110792,
      "grad_norm": 3.046875,
      "learning_rate": 4.902256283246493e-05,
      "loss": 0.9914,
      "step": 100660
    },
    {
      "epoch": 0.3528232519179748,
      "grad_norm": 3.171875,
      "learning_rate": 4.9021913803801234e-05,
      "loss": 0.9849,
      "step": 100670
    },
    {
      "epoch": 0.3528582994248704,
      "grad_norm": 2.9375,
      "learning_rate": 4.9021264775137535e-05,
      "loss": 1.006,
      "step": 100680
    },
    {
      "epoch": 0.352893346931766,
      "grad_norm": 3.0,
      "learning_rate": 4.902061574647383e-05,
      "loss": 0.9728,
      "step": 100690
    },
    {
      "epoch": 0.3529283944386616,
      "grad_norm": 2.984375,
      "learning_rate": 4.901996671781013e-05,
      "loss": 0.9439,
      "step": 100700
    },
    {
      "epoch": 0.3529634419455572,
      "grad_norm": 2.890625,
      "learning_rate": 4.9019317689146426e-05,
      "loss": 0.9796,
      "step": 100710
    },
    {
      "epoch": 0.3529984894524528,
      "grad_norm": 3.5,
      "learning_rate": 4.901866866048273e-05,
      "loss": 1.0548,
      "step": 100720
    },
    {
      "epoch": 0.3530335369593484,
      "grad_norm": 2.9375,
      "learning_rate": 4.901801963181902e-05,
      "loss": 0.9557,
      "step": 100730
    },
    {
      "epoch": 0.353068584466244,
      "grad_norm": 3.03125,
      "learning_rate": 4.901737060315532e-05,
      "loss": 0.8843,
      "step": 100740
    },
    {
      "epoch": 0.3531036319731396,
      "grad_norm": 2.828125,
      "learning_rate": 4.901672157449162e-05,
      "loss": 0.9974,
      "step": 100750
    },
    {
      "epoch": 0.3531386794800352,
      "grad_norm": 2.703125,
      "learning_rate": 4.901607254582792e-05,
      "loss": 0.9322,
      "step": 100760
    },
    {
      "epoch": 0.3531737269869308,
      "grad_norm": 3.40625,
      "learning_rate": 4.9015423517164214e-05,
      "loss": 0.915,
      "step": 100770
    },
    {
      "epoch": 0.35320877449382637,
      "grad_norm": 2.859375,
      "learning_rate": 4.9014774488500515e-05,
      "loss": 0.928,
      "step": 100780
    },
    {
      "epoch": 0.35324382200072196,
      "grad_norm": 3.140625,
      "learning_rate": 4.901412545983681e-05,
      "loss": 1.012,
      "step": 100790
    },
    {
      "epoch": 0.35327886950761755,
      "grad_norm": 3.25,
      "learning_rate": 4.901347643117311e-05,
      "loss": 0.9502,
      "step": 100800
    },
    {
      "epoch": 0.35331391701451315,
      "grad_norm": 2.9375,
      "learning_rate": 4.9012827402509406e-05,
      "loss": 0.9754,
      "step": 100810
    },
    {
      "epoch": 0.3533489645214088,
      "grad_norm": 3.34375,
      "learning_rate": 4.901217837384571e-05,
      "loss": 1.008,
      "step": 100820
    },
    {
      "epoch": 0.3533840120283044,
      "grad_norm": 3.21875,
      "learning_rate": 4.901152934518201e-05,
      "loss": 0.9975,
      "step": 100830
    },
    {
      "epoch": 0.3534190595352,
      "grad_norm": 3.078125,
      "learning_rate": 4.9010880316518297e-05,
      "loss": 0.9213,
      "step": 100840
    },
    {
      "epoch": 0.35345410704209557,
      "grad_norm": 2.796875,
      "learning_rate": 4.90102312878546e-05,
      "loss": 0.9687,
      "step": 100850
    },
    {
      "epoch": 0.35348915454899116,
      "grad_norm": 2.828125,
      "learning_rate": 4.900958225919089e-05,
      "loss": 0.9699,
      "step": 100860
    },
    {
      "epoch": 0.35352420205588675,
      "grad_norm": 2.796875,
      "learning_rate": 4.9008933230527194e-05,
      "loss": 0.9294,
      "step": 100870
    },
    {
      "epoch": 0.35355924956278234,
      "grad_norm": 3.015625,
      "learning_rate": 4.900828420186349e-05,
      "loss": 0.9107,
      "step": 100880
    },
    {
      "epoch": 0.35359429706967793,
      "grad_norm": 3.140625,
      "learning_rate": 4.900763517319979e-05,
      "loss": 0.9403,
      "step": 100890
    },
    {
      "epoch": 0.3536293445765735,
      "grad_norm": 2.96875,
      "learning_rate": 4.9006986144536085e-05,
      "loss": 1.0009,
      "step": 100900
    },
    {
      "epoch": 0.35366439208346917,
      "grad_norm": 3.0625,
      "learning_rate": 4.9006337115872386e-05,
      "loss": 0.9777,
      "step": 100910
    },
    {
      "epoch": 0.35369943959036476,
      "grad_norm": 4.125,
      "learning_rate": 4.900568808720869e-05,
      "loss": 0.9931,
      "step": 100920
    },
    {
      "epoch": 0.35373448709726035,
      "grad_norm": 3.296875,
      "learning_rate": 4.900503905854498e-05,
      "loss": 0.9187,
      "step": 100930
    },
    {
      "epoch": 0.35376953460415594,
      "grad_norm": 3.1875,
      "learning_rate": 4.900439002988128e-05,
      "loss": 0.8753,
      "step": 100940
    },
    {
      "epoch": 0.35380458211105154,
      "grad_norm": 3.015625,
      "learning_rate": 4.900374100121758e-05,
      "loss": 0.9858,
      "step": 100950
    },
    {
      "epoch": 0.3538396296179471,
      "grad_norm": 3.203125,
      "learning_rate": 4.900309197255388e-05,
      "loss": 0.8887,
      "step": 100960
    },
    {
      "epoch": 0.3538746771248427,
      "grad_norm": 3.078125,
      "learning_rate": 4.9002442943890174e-05,
      "loss": 1.0293,
      "step": 100970
    },
    {
      "epoch": 0.3539097246317383,
      "grad_norm": 3.265625,
      "learning_rate": 4.9001793915226475e-05,
      "loss": 0.9111,
      "step": 100980
    },
    {
      "epoch": 0.3539447721386339,
      "grad_norm": 3.125,
      "learning_rate": 4.900114488656277e-05,
      "loss": 0.937,
      "step": 100990
    },
    {
      "epoch": 0.3539798196455295,
      "grad_norm": 3.25,
      "learning_rate": 4.900049585789907e-05,
      "loss": 0.9477,
      "step": 101000
    },
    {
      "epoch": 0.35401486715242514,
      "grad_norm": 3.0625,
      "learning_rate": 4.8999846829235366e-05,
      "loss": 1.0218,
      "step": 101010
    },
    {
      "epoch": 0.35404991465932073,
      "grad_norm": 3.09375,
      "learning_rate": 4.899919780057167e-05,
      "loss": 0.9385,
      "step": 101020
    },
    {
      "epoch": 0.3540849621662163,
      "grad_norm": 3.171875,
      "learning_rate": 4.899854877190796e-05,
      "loss": 0.9586,
      "step": 101030
    },
    {
      "epoch": 0.3541200096731119,
      "grad_norm": 3.140625,
      "learning_rate": 4.899789974324426e-05,
      "loss": 0.9462,
      "step": 101040
    },
    {
      "epoch": 0.3541550571800075,
      "grad_norm": 3.328125,
      "learning_rate": 4.8997250714580565e-05,
      "loss": 0.9399,
      "step": 101050
    },
    {
      "epoch": 0.3541901046869031,
      "grad_norm": 2.65625,
      "learning_rate": 4.899660168591686e-05,
      "loss": 0.9409,
      "step": 101060
    },
    {
      "epoch": 0.3542251521937987,
      "grad_norm": 3.1875,
      "learning_rate": 4.899595265725316e-05,
      "loss": 0.8939,
      "step": 101070
    },
    {
      "epoch": 0.3542601997006943,
      "grad_norm": 3.390625,
      "learning_rate": 4.8995303628589455e-05,
      "loss": 1.0232,
      "step": 101080
    },
    {
      "epoch": 0.35429524720758987,
      "grad_norm": 2.84375,
      "learning_rate": 4.899465459992576e-05,
      "loss": 0.9892,
      "step": 101090
    },
    {
      "epoch": 0.35433029471448546,
      "grad_norm": 3.375,
      "learning_rate": 4.899400557126205e-05,
      "loss": 0.9192,
      "step": 101100
    },
    {
      "epoch": 0.3543653422213811,
      "grad_norm": 2.765625,
      "learning_rate": 4.899335654259835e-05,
      "loss": 0.9248,
      "step": 101110
    },
    {
      "epoch": 0.3544003897282767,
      "grad_norm": 3.09375,
      "learning_rate": 4.899270751393465e-05,
      "loss": 0.9523,
      "step": 101120
    },
    {
      "epoch": 0.3544354372351723,
      "grad_norm": 3.296875,
      "learning_rate": 4.899205848527095e-05,
      "loss": 1.0231,
      "step": 101130
    },
    {
      "epoch": 0.3544704847420679,
      "grad_norm": 2.671875,
      "learning_rate": 4.899140945660724e-05,
      "loss": 0.9118,
      "step": 101140
    },
    {
      "epoch": 0.3545055322489635,
      "grad_norm": 2.984375,
      "learning_rate": 4.8990760427943545e-05,
      "loss": 0.9671,
      "step": 101150
    },
    {
      "epoch": 0.35454057975585906,
      "grad_norm": 2.875,
      "learning_rate": 4.899011139927984e-05,
      "loss": 1.0549,
      "step": 101160
    },
    {
      "epoch": 0.35457562726275466,
      "grad_norm": 3.25,
      "learning_rate": 4.898946237061614e-05,
      "loss": 0.9842,
      "step": 101170
    },
    {
      "epoch": 0.35461067476965025,
      "grad_norm": 3.59375,
      "learning_rate": 4.8988813341952435e-05,
      "loss": 1.0157,
      "step": 101180
    },
    {
      "epoch": 0.35464572227654584,
      "grad_norm": 2.9375,
      "learning_rate": 4.898816431328874e-05,
      "loss": 0.9486,
      "step": 101190
    },
    {
      "epoch": 0.35468076978344143,
      "grad_norm": 2.984375,
      "learning_rate": 4.898751528462504e-05,
      "loss": 1.013,
      "step": 101200
    },
    {
      "epoch": 0.3547158172903371,
      "grad_norm": 3.015625,
      "learning_rate": 4.898686625596133e-05,
      "loss": 0.9913,
      "step": 101210
    },
    {
      "epoch": 0.35475086479723267,
      "grad_norm": 3.15625,
      "learning_rate": 4.898621722729763e-05,
      "loss": 0.9148,
      "step": 101220
    },
    {
      "epoch": 0.35478591230412826,
      "grad_norm": 3.234375,
      "learning_rate": 4.898556819863392e-05,
      "loss": 0.9473,
      "step": 101230
    },
    {
      "epoch": 0.35482095981102385,
      "grad_norm": 3.171875,
      "learning_rate": 4.898491916997022e-05,
      "loss": 0.9743,
      "step": 101240
    },
    {
      "epoch": 0.35485600731791944,
      "grad_norm": 2.953125,
      "learning_rate": 4.898427014130652e-05,
      "loss": 0.951,
      "step": 101250
    },
    {
      "epoch": 0.35489105482481503,
      "grad_norm": 3.015625,
      "learning_rate": 4.898362111264282e-05,
      "loss": 0.9125,
      "step": 101260
    },
    {
      "epoch": 0.3549261023317106,
      "grad_norm": 3.171875,
      "learning_rate": 4.8982972083979114e-05,
      "loss": 0.9578,
      "step": 101270
    },
    {
      "epoch": 0.3549611498386062,
      "grad_norm": 2.9375,
      "learning_rate": 4.8982323055315415e-05,
      "loss": 0.9789,
      "step": 101280
    },
    {
      "epoch": 0.3549961973455018,
      "grad_norm": 2.828125,
      "learning_rate": 4.898167402665172e-05,
      "loss": 0.9323,
      "step": 101290
    },
    {
      "epoch": 0.3550312448523974,
      "grad_norm": 2.921875,
      "learning_rate": 4.898102499798801e-05,
      "loss": 0.9975,
      "step": 101300
    },
    {
      "epoch": 0.35506629235929305,
      "grad_norm": 3.3125,
      "learning_rate": 4.898037596932431e-05,
      "loss": 1.0056,
      "step": 101310
    },
    {
      "epoch": 0.35510133986618864,
      "grad_norm": 2.8125,
      "learning_rate": 4.897972694066061e-05,
      "loss": 0.9118,
      "step": 101320
    },
    {
      "epoch": 0.35513638737308423,
      "grad_norm": 2.875,
      "learning_rate": 4.897907791199691e-05,
      "loss": 1.0128,
      "step": 101330
    },
    {
      "epoch": 0.3551714348799798,
      "grad_norm": 2.796875,
      "learning_rate": 4.89784288833332e-05,
      "loss": 1.1173,
      "step": 101340
    },
    {
      "epoch": 0.3552064823868754,
      "grad_norm": 3.40625,
      "learning_rate": 4.8977779854669505e-05,
      "loss": 0.9382,
      "step": 101350
    },
    {
      "epoch": 0.355241529893771,
      "grad_norm": 2.953125,
      "learning_rate": 4.89771308260058e-05,
      "loss": 0.9255,
      "step": 101360
    },
    {
      "epoch": 0.3552765774006666,
      "grad_norm": 3.046875,
      "learning_rate": 4.89764817973421e-05,
      "loss": 1.0135,
      "step": 101370
    },
    {
      "epoch": 0.3553116249075622,
      "grad_norm": 2.875,
      "learning_rate": 4.8975832768678395e-05,
      "loss": 0.9791,
      "step": 101380
    },
    {
      "epoch": 0.3553466724144578,
      "grad_norm": 2.78125,
      "learning_rate": 4.89751837400147e-05,
      "loss": 0.9669,
      "step": 101390
    },
    {
      "epoch": 0.35538171992135337,
      "grad_norm": 3.515625,
      "learning_rate": 4.897453471135099e-05,
      "loss": 0.9641,
      "step": 101400
    },
    {
      "epoch": 0.355416767428249,
      "grad_norm": 3.046875,
      "learning_rate": 4.897388568268729e-05,
      "loss": 0.9702,
      "step": 101410
    },
    {
      "epoch": 0.3554518149351446,
      "grad_norm": 3.046875,
      "learning_rate": 4.8973236654023594e-05,
      "loss": 0.9402,
      "step": 101420
    },
    {
      "epoch": 0.3554868624420402,
      "grad_norm": 2.96875,
      "learning_rate": 4.897258762535989e-05,
      "loss": 1.0123,
      "step": 101430
    },
    {
      "epoch": 0.3555219099489358,
      "grad_norm": 3.015625,
      "learning_rate": 4.897193859669619e-05,
      "loss": 1.0673,
      "step": 101440
    },
    {
      "epoch": 0.3555569574558314,
      "grad_norm": 2.65625,
      "learning_rate": 4.8971289568032485e-05,
      "loss": 1.0142,
      "step": 101450
    },
    {
      "epoch": 0.35559200496272697,
      "grad_norm": 3.0625,
      "learning_rate": 4.8970640539368786e-05,
      "loss": 0.9975,
      "step": 101460
    },
    {
      "epoch": 0.35562705246962256,
      "grad_norm": 3.140625,
      "learning_rate": 4.896999151070508e-05,
      "loss": 0.9425,
      "step": 101470
    },
    {
      "epoch": 0.35566209997651815,
      "grad_norm": 2.796875,
      "learning_rate": 4.896934248204138e-05,
      "loss": 0.8995,
      "step": 101480
    },
    {
      "epoch": 0.35569714748341374,
      "grad_norm": 3.140625,
      "learning_rate": 4.896869345337768e-05,
      "loss": 0.928,
      "step": 101490
    },
    {
      "epoch": 0.3557321949903094,
      "grad_norm": 3.46875,
      "learning_rate": 4.896804442471398e-05,
      "loss": 0.9852,
      "step": 101500
    },
    {
      "epoch": 0.355767242497205,
      "grad_norm": 2.96875,
      "learning_rate": 4.896739539605027e-05,
      "loss": 0.8941,
      "step": 101510
    },
    {
      "epoch": 0.3558022900041006,
      "grad_norm": 2.8125,
      "learning_rate": 4.8966746367386574e-05,
      "loss": 0.9411,
      "step": 101520
    },
    {
      "epoch": 0.35583733751099617,
      "grad_norm": 2.890625,
      "learning_rate": 4.896609733872287e-05,
      "loss": 0.9202,
      "step": 101530
    },
    {
      "epoch": 0.35587238501789176,
      "grad_norm": 2.859375,
      "learning_rate": 4.896544831005917e-05,
      "loss": 0.9831,
      "step": 101540
    },
    {
      "epoch": 0.35590743252478735,
      "grad_norm": 3.609375,
      "learning_rate": 4.896479928139547e-05,
      "loss": 1.0564,
      "step": 101550
    },
    {
      "epoch": 0.35594248003168294,
      "grad_norm": 3.0625,
      "learning_rate": 4.8964150252731766e-05,
      "loss": 1.0022,
      "step": 101560
    },
    {
      "epoch": 0.35597752753857853,
      "grad_norm": 2.640625,
      "learning_rate": 4.896350122406807e-05,
      "loss": 0.9484,
      "step": 101570
    },
    {
      "epoch": 0.3560125750454741,
      "grad_norm": 2.953125,
      "learning_rate": 4.896285219540436e-05,
      "loss": 0.9635,
      "step": 101580
    },
    {
      "epoch": 0.3560476225523697,
      "grad_norm": 2.984375,
      "learning_rate": 4.896220316674066e-05,
      "loss": 0.9422,
      "step": 101590
    },
    {
      "epoch": 0.35608267005926536,
      "grad_norm": 3.1875,
      "learning_rate": 4.896155413807695e-05,
      "loss": 0.9198,
      "step": 101600
    },
    {
      "epoch": 0.35611771756616095,
      "grad_norm": 3.046875,
      "learning_rate": 4.896090510941325e-05,
      "loss": 0.964,
      "step": 101610
    },
    {
      "epoch": 0.35615276507305654,
      "grad_norm": 2.734375,
      "learning_rate": 4.896025608074955e-05,
      "loss": 0.8681,
      "step": 101620
    },
    {
      "epoch": 0.35618781257995213,
      "grad_norm": 3.390625,
      "learning_rate": 4.895960705208585e-05,
      "loss": 0.9849,
      "step": 101630
    },
    {
      "epoch": 0.3562228600868477,
      "grad_norm": 3.3125,
      "learning_rate": 4.895895802342215e-05,
      "loss": 0.9555,
      "step": 101640
    },
    {
      "epoch": 0.3562579075937433,
      "grad_norm": 3.0,
      "learning_rate": 4.8958308994758445e-05,
      "loss": 1.0205,
      "step": 101650
    },
    {
      "epoch": 0.3562929551006389,
      "grad_norm": 2.875,
      "learning_rate": 4.8957659966094746e-05,
      "loss": 0.9305,
      "step": 101660
    },
    {
      "epoch": 0.3563280026075345,
      "grad_norm": 2.875,
      "learning_rate": 4.895701093743104e-05,
      "loss": 0.9955,
      "step": 101670
    },
    {
      "epoch": 0.3563630501144301,
      "grad_norm": 2.9375,
      "learning_rate": 4.895636190876734e-05,
      "loss": 0.8659,
      "step": 101680
    },
    {
      "epoch": 0.3563980976213257,
      "grad_norm": 3.140625,
      "learning_rate": 4.895571288010364e-05,
      "loss": 1.0464,
      "step": 101690
    },
    {
      "epoch": 0.35643314512822133,
      "grad_norm": 2.96875,
      "learning_rate": 4.895506385143994e-05,
      "loss": 0.9302,
      "step": 101700
    },
    {
      "epoch": 0.3564681926351169,
      "grad_norm": 3.203125,
      "learning_rate": 4.895441482277623e-05,
      "loss": 0.9224,
      "step": 101710
    },
    {
      "epoch": 0.3565032401420125,
      "grad_norm": 3.0,
      "learning_rate": 4.8953765794112534e-05,
      "loss": 1.0237,
      "step": 101720
    },
    {
      "epoch": 0.3565382876489081,
      "grad_norm": 2.6875,
      "learning_rate": 4.895311676544883e-05,
      "loss": 0.9451,
      "step": 101730
    },
    {
      "epoch": 0.3565733351558037,
      "grad_norm": 3.421875,
      "learning_rate": 4.895246773678513e-05,
      "loss": 0.9212,
      "step": 101740
    },
    {
      "epoch": 0.3566083826626993,
      "grad_norm": 3.4375,
      "learning_rate": 4.8951818708121425e-05,
      "loss": 1.008,
      "step": 101750
    },
    {
      "epoch": 0.3566434301695949,
      "grad_norm": 2.890625,
      "learning_rate": 4.8951169679457726e-05,
      "loss": 0.9439,
      "step": 101760
    },
    {
      "epoch": 0.35667847767649047,
      "grad_norm": 3.3125,
      "learning_rate": 4.895052065079402e-05,
      "loss": 0.9691,
      "step": 101770
    },
    {
      "epoch": 0.35671352518338606,
      "grad_norm": 3.40625,
      "learning_rate": 4.894987162213032e-05,
      "loss": 0.9528,
      "step": 101780
    },
    {
      "epoch": 0.35674857269028165,
      "grad_norm": 3.03125,
      "learning_rate": 4.8949222593466623e-05,
      "loss": 1.0072,
      "step": 101790
    },
    {
      "epoch": 0.3567836201971773,
      "grad_norm": 3.375,
      "learning_rate": 4.894857356480292e-05,
      "loss": 0.9189,
      "step": 101800
    },
    {
      "epoch": 0.3568186677040729,
      "grad_norm": 3.796875,
      "learning_rate": 4.894792453613922e-05,
      "loss": 0.9923,
      "step": 101810
    },
    {
      "epoch": 0.3568537152109685,
      "grad_norm": 3.140625,
      "learning_rate": 4.8947275507475514e-05,
      "loss": 1.0111,
      "step": 101820
    },
    {
      "epoch": 0.35688876271786407,
      "grad_norm": 3.078125,
      "learning_rate": 4.8946626478811815e-05,
      "loss": 0.993,
      "step": 101830
    },
    {
      "epoch": 0.35692381022475966,
      "grad_norm": 3.234375,
      "learning_rate": 4.894597745014811e-05,
      "loss": 1.0353,
      "step": 101840
    },
    {
      "epoch": 0.35695885773165525,
      "grad_norm": 2.96875,
      "learning_rate": 4.894532842148441e-05,
      "loss": 0.9304,
      "step": 101850
    },
    {
      "epoch": 0.35699390523855085,
      "grad_norm": 3.125,
      "learning_rate": 4.8944679392820706e-05,
      "loss": 1.0105,
      "step": 101860
    },
    {
      "epoch": 0.35702895274544644,
      "grad_norm": 2.953125,
      "learning_rate": 4.894403036415701e-05,
      "loss": 0.895,
      "step": 101870
    },
    {
      "epoch": 0.35706400025234203,
      "grad_norm": 3.09375,
      "learning_rate": 4.89433813354933e-05,
      "loss": 1.0343,
      "step": 101880
    },
    {
      "epoch": 0.3570990477592376,
      "grad_norm": 2.796875,
      "learning_rate": 4.8942732306829603e-05,
      "loss": 0.9468,
      "step": 101890
    },
    {
      "epoch": 0.35713409526613327,
      "grad_norm": 3.03125,
      "learning_rate": 4.89420832781659e-05,
      "loss": 0.9313,
      "step": 101900
    },
    {
      "epoch": 0.35716914277302886,
      "grad_norm": 3.375,
      "learning_rate": 4.89414342495022e-05,
      "loss": 0.9962,
      "step": 101910
    },
    {
      "epoch": 0.35720419027992445,
      "grad_norm": 2.796875,
      "learning_rate": 4.89407852208385e-05,
      "loss": 0.9701,
      "step": 101920
    },
    {
      "epoch": 0.35723923778682004,
      "grad_norm": 2.9375,
      "learning_rate": 4.8940136192174795e-05,
      "loss": 1.0162,
      "step": 101930
    },
    {
      "epoch": 0.35727428529371563,
      "grad_norm": 3.078125,
      "learning_rate": 4.89394871635111e-05,
      "loss": 0.9934,
      "step": 101940
    },
    {
      "epoch": 0.3573093328006112,
      "grad_norm": 2.96875,
      "learning_rate": 4.893883813484739e-05,
      "loss": 0.9916,
      "step": 101950
    },
    {
      "epoch": 0.3573443803075068,
      "grad_norm": 2.96875,
      "learning_rate": 4.893818910618369e-05,
      "loss": 1.0113,
      "step": 101960
    },
    {
      "epoch": 0.3573794278144024,
      "grad_norm": 2.875,
      "learning_rate": 4.893754007751998e-05,
      "loss": 1.0216,
      "step": 101970
    },
    {
      "epoch": 0.357414475321298,
      "grad_norm": 2.828125,
      "learning_rate": 4.893689104885628e-05,
      "loss": 0.9434,
      "step": 101980
    },
    {
      "epoch": 0.3574495228281936,
      "grad_norm": 3.0,
      "learning_rate": 4.893624202019258e-05,
      "loss": 0.9555,
      "step": 101990
    },
    {
      "epoch": 0.35748457033508924,
      "grad_norm": 3.078125,
      "learning_rate": 4.893559299152888e-05,
      "loss": 0.91,
      "step": 102000
    },
    {
      "epoch": 0.3575196178419848,
      "grad_norm": 2.625,
      "learning_rate": 4.893494396286518e-05,
      "loss": 0.8986,
      "step": 102010
    },
    {
      "epoch": 0.3575546653488804,
      "grad_norm": 3.015625,
      "learning_rate": 4.8934294934201474e-05,
      "loss": 0.9477,
      "step": 102020
    },
    {
      "epoch": 0.357589712855776,
      "grad_norm": 3.46875,
      "learning_rate": 4.8933645905537775e-05,
      "loss": 0.94,
      "step": 102030
    },
    {
      "epoch": 0.3576247603626716,
      "grad_norm": 3.03125,
      "learning_rate": 4.893299687687407e-05,
      "loss": 0.9487,
      "step": 102040
    },
    {
      "epoch": 0.3576598078695672,
      "grad_norm": 2.515625,
      "learning_rate": 4.893234784821037e-05,
      "loss": 0.8941,
      "step": 102050
    },
    {
      "epoch": 0.3576948553764628,
      "grad_norm": 2.875,
      "learning_rate": 4.8931698819546666e-05,
      "loss": 0.9259,
      "step": 102060
    },
    {
      "epoch": 0.3577299028833584,
      "grad_norm": 3.265625,
      "learning_rate": 4.893104979088297e-05,
      "loss": 0.9213,
      "step": 102070
    },
    {
      "epoch": 0.35776495039025397,
      "grad_norm": 2.5625,
      "learning_rate": 4.893040076221926e-05,
      "loss": 0.9876,
      "step": 102080
    },
    {
      "epoch": 0.3577999978971496,
      "grad_norm": 3.03125,
      "learning_rate": 4.8929751733555563e-05,
      "loss": 0.9592,
      "step": 102090
    },
    {
      "epoch": 0.3578350454040452,
      "grad_norm": 2.6875,
      "learning_rate": 4.892910270489186e-05,
      "loss": 1.0175,
      "step": 102100
    },
    {
      "epoch": 0.3578700929109408,
      "grad_norm": 3.5,
      "learning_rate": 4.892845367622816e-05,
      "loss": 0.9984,
      "step": 102110
    },
    {
      "epoch": 0.3579051404178364,
      "grad_norm": 3.046875,
      "learning_rate": 4.8927804647564454e-05,
      "loss": 0.9363,
      "step": 102120
    },
    {
      "epoch": 0.357940187924732,
      "grad_norm": 3.046875,
      "learning_rate": 4.8927155618900755e-05,
      "loss": 0.9263,
      "step": 102130
    },
    {
      "epoch": 0.35797523543162757,
      "grad_norm": 3.515625,
      "learning_rate": 4.892650659023705e-05,
      "loss": 0.9889,
      "step": 102140
    },
    {
      "epoch": 0.35801028293852316,
      "grad_norm": 2.671875,
      "learning_rate": 4.892585756157335e-05,
      "loss": 0.8953,
      "step": 102150
    },
    {
      "epoch": 0.35804533044541875,
      "grad_norm": 3.28125,
      "learning_rate": 4.892520853290965e-05,
      "loss": 0.9555,
      "step": 102160
    },
    {
      "epoch": 0.35808037795231434,
      "grad_norm": 2.484375,
      "learning_rate": 4.892455950424595e-05,
      "loss": 0.9557,
      "step": 102170
    },
    {
      "epoch": 0.35811542545920994,
      "grad_norm": 3.140625,
      "learning_rate": 4.892391047558225e-05,
      "loss": 1.0066,
      "step": 102180
    },
    {
      "epoch": 0.3581504729661056,
      "grad_norm": 2.796875,
      "learning_rate": 4.8923261446918543e-05,
      "loss": 0.9155,
      "step": 102190
    },
    {
      "epoch": 0.3581855204730012,
      "grad_norm": 3.109375,
      "learning_rate": 4.8922612418254845e-05,
      "loss": 0.9373,
      "step": 102200
    },
    {
      "epoch": 0.35822056797989676,
      "grad_norm": 2.890625,
      "learning_rate": 4.892196338959114e-05,
      "loss": 0.9916,
      "step": 102210
    },
    {
      "epoch": 0.35825561548679236,
      "grad_norm": 3.375,
      "learning_rate": 4.892131436092744e-05,
      "loss": 0.9066,
      "step": 102220
    },
    {
      "epoch": 0.35829066299368795,
      "grad_norm": 2.8125,
      "learning_rate": 4.8920665332263735e-05,
      "loss": 0.9273,
      "step": 102230
    },
    {
      "epoch": 0.35832571050058354,
      "grad_norm": 3.09375,
      "learning_rate": 4.892001630360004e-05,
      "loss": 0.9248,
      "step": 102240
    },
    {
      "epoch": 0.35836075800747913,
      "grad_norm": 3.15625,
      "learning_rate": 4.891936727493633e-05,
      "loss": 0.8947,
      "step": 102250
    },
    {
      "epoch": 0.3583958055143747,
      "grad_norm": 2.890625,
      "learning_rate": 4.891871824627263e-05,
      "loss": 0.9153,
      "step": 102260
    },
    {
      "epoch": 0.3584308530212703,
      "grad_norm": 2.6875,
      "learning_rate": 4.891806921760893e-05,
      "loss": 0.9431,
      "step": 102270
    },
    {
      "epoch": 0.3584659005281659,
      "grad_norm": 3.390625,
      "learning_rate": 4.891742018894523e-05,
      "loss": 0.9749,
      "step": 102280
    },
    {
      "epoch": 0.35850094803506155,
      "grad_norm": 3.21875,
      "learning_rate": 4.891677116028153e-05,
      "loss": 1.024,
      "step": 102290
    },
    {
      "epoch": 0.35853599554195714,
      "grad_norm": 3.171875,
      "learning_rate": 4.8916122131617825e-05,
      "loss": 0.9471,
      "step": 102300
    },
    {
      "epoch": 0.35857104304885273,
      "grad_norm": 3.046875,
      "learning_rate": 4.8915473102954126e-05,
      "loss": 0.9148,
      "step": 102310
    },
    {
      "epoch": 0.3586060905557483,
      "grad_norm": 2.96875,
      "learning_rate": 4.891482407429042e-05,
      "loss": 0.9955,
      "step": 102320
    },
    {
      "epoch": 0.3586411380626439,
      "grad_norm": 2.953125,
      "learning_rate": 4.891417504562672e-05,
      "loss": 0.9272,
      "step": 102330
    },
    {
      "epoch": 0.3586761855695395,
      "grad_norm": 2.796875,
      "learning_rate": 4.891352601696301e-05,
      "loss": 0.8549,
      "step": 102340
    },
    {
      "epoch": 0.3587112330764351,
      "grad_norm": 3.46875,
      "learning_rate": 4.891287698829931e-05,
      "loss": 0.9716,
      "step": 102350
    },
    {
      "epoch": 0.3587462805833307,
      "grad_norm": 3.640625,
      "learning_rate": 4.8912227959635606e-05,
      "loss": 1.0125,
      "step": 102360
    },
    {
      "epoch": 0.3587813280902263,
      "grad_norm": 2.734375,
      "learning_rate": 4.891157893097191e-05,
      "loss": 0.9121,
      "step": 102370
    },
    {
      "epoch": 0.3588163755971219,
      "grad_norm": 3.0,
      "learning_rate": 4.891092990230821e-05,
      "loss": 1.0216,
      "step": 102380
    },
    {
      "epoch": 0.3588514231040175,
      "grad_norm": 2.890625,
      "learning_rate": 4.8910280873644503e-05,
      "loss": 1.0181,
      "step": 102390
    },
    {
      "epoch": 0.3588864706109131,
      "grad_norm": 2.84375,
      "learning_rate": 4.8909631844980805e-05,
      "loss": 1.0136,
      "step": 102400
    },
    {
      "epoch": 0.3589215181178087,
      "grad_norm": 2.96875,
      "learning_rate": 4.89089828163171e-05,
      "loss": 0.9853,
      "step": 102410
    },
    {
      "epoch": 0.3589565656247043,
      "grad_norm": 3.265625,
      "learning_rate": 4.89083337876534e-05,
      "loss": 0.8921,
      "step": 102420
    },
    {
      "epoch": 0.3589916131315999,
      "grad_norm": 3.1875,
      "learning_rate": 4.8907684758989695e-05,
      "loss": 0.9487,
      "step": 102430
    },
    {
      "epoch": 0.3590266606384955,
      "grad_norm": 3.5,
      "learning_rate": 4.8907035730326e-05,
      "loss": 1.0014,
      "step": 102440
    },
    {
      "epoch": 0.35906170814539107,
      "grad_norm": 3.4375,
      "learning_rate": 4.890638670166229e-05,
      "loss": 0.9916,
      "step": 102450
    },
    {
      "epoch": 0.35909675565228666,
      "grad_norm": 3.8125,
      "learning_rate": 4.890573767299859e-05,
      "loss": 0.8869,
      "step": 102460
    },
    {
      "epoch": 0.35913180315918225,
      "grad_norm": 2.96875,
      "learning_rate": 4.890508864433489e-05,
      "loss": 0.9891,
      "step": 102470
    },
    {
      "epoch": 0.35916685066607784,
      "grad_norm": 3.1875,
      "learning_rate": 4.890443961567119e-05,
      "loss": 1.0226,
      "step": 102480
    },
    {
      "epoch": 0.3592018981729735,
      "grad_norm": 2.5625,
      "learning_rate": 4.8903790587007483e-05,
      "loss": 1.0462,
      "step": 102490
    },
    {
      "epoch": 0.3592369456798691,
      "grad_norm": 2.828125,
      "learning_rate": 4.8903141558343785e-05,
      "loss": 0.947,
      "step": 102500
    },
    {
      "epoch": 0.35927199318676467,
      "grad_norm": 3.4375,
      "learning_rate": 4.8902492529680086e-05,
      "loss": 0.9997,
      "step": 102510
    },
    {
      "epoch": 0.35930704069366026,
      "grad_norm": 2.859375,
      "learning_rate": 4.890184350101638e-05,
      "loss": 0.9907,
      "step": 102520
    },
    {
      "epoch": 0.35934208820055585,
      "grad_norm": 3.625,
      "learning_rate": 4.890119447235268e-05,
      "loss": 1.0454,
      "step": 102530
    },
    {
      "epoch": 0.35937713570745144,
      "grad_norm": 3.265625,
      "learning_rate": 4.890054544368898e-05,
      "loss": 0.9696,
      "step": 102540
    },
    {
      "epoch": 0.35941218321434704,
      "grad_norm": 3.109375,
      "learning_rate": 4.889989641502528e-05,
      "loss": 0.9181,
      "step": 102550
    },
    {
      "epoch": 0.3594472307212426,
      "grad_norm": 3.09375,
      "learning_rate": 4.889924738636157e-05,
      "loss": 0.945,
      "step": 102560
    },
    {
      "epoch": 0.3594822782281382,
      "grad_norm": 3.046875,
      "learning_rate": 4.8898598357697874e-05,
      "loss": 1.0024,
      "step": 102570
    },
    {
      "epoch": 0.35951732573503387,
      "grad_norm": 3.234375,
      "learning_rate": 4.889794932903417e-05,
      "loss": 0.981,
      "step": 102580
    },
    {
      "epoch": 0.35955237324192946,
      "grad_norm": 3.21875,
      "learning_rate": 4.889730030037047e-05,
      "loss": 0.8983,
      "step": 102590
    },
    {
      "epoch": 0.35958742074882505,
      "grad_norm": 3.0,
      "learning_rate": 4.8896651271706765e-05,
      "loss": 0.9328,
      "step": 102600
    },
    {
      "epoch": 0.35962246825572064,
      "grad_norm": 3.109375,
      "learning_rate": 4.8896002243043066e-05,
      "loss": 0.9731,
      "step": 102610
    },
    {
      "epoch": 0.35965751576261623,
      "grad_norm": 2.796875,
      "learning_rate": 4.889535321437936e-05,
      "loss": 1.0394,
      "step": 102620
    },
    {
      "epoch": 0.3596925632695118,
      "grad_norm": 2.90625,
      "learning_rate": 4.889470418571566e-05,
      "loss": 0.9511,
      "step": 102630
    },
    {
      "epoch": 0.3597276107764074,
      "grad_norm": 2.921875,
      "learning_rate": 4.889405515705196e-05,
      "loss": 1.0768,
      "step": 102640
    },
    {
      "epoch": 0.359762658283303,
      "grad_norm": 2.84375,
      "learning_rate": 4.889340612838826e-05,
      "loss": 0.9584,
      "step": 102650
    },
    {
      "epoch": 0.3597977057901986,
      "grad_norm": 3.1875,
      "learning_rate": 4.889275709972456e-05,
      "loss": 0.9935,
      "step": 102660
    },
    {
      "epoch": 0.3598327532970942,
      "grad_norm": 3.40625,
      "learning_rate": 4.8892108071060854e-05,
      "loss": 0.9203,
      "step": 102670
    },
    {
      "epoch": 0.35986780080398983,
      "grad_norm": 2.84375,
      "learning_rate": 4.8891459042397156e-05,
      "loss": 0.94,
      "step": 102680
    },
    {
      "epoch": 0.3599028483108854,
      "grad_norm": 3.3125,
      "learning_rate": 4.889081001373345e-05,
      "loss": 1.0114,
      "step": 102690
    },
    {
      "epoch": 0.359937895817781,
      "grad_norm": 3.265625,
      "learning_rate": 4.889016098506975e-05,
      "loss": 0.9232,
      "step": 102700
    },
    {
      "epoch": 0.3599729433246766,
      "grad_norm": 3.09375,
      "learning_rate": 4.8889511956406046e-05,
      "loss": 0.8458,
      "step": 102710
    },
    {
      "epoch": 0.3600079908315722,
      "grad_norm": 3.546875,
      "learning_rate": 4.888886292774234e-05,
      "loss": 0.9023,
      "step": 102720
    },
    {
      "epoch": 0.3600430383384678,
      "grad_norm": 3.40625,
      "learning_rate": 4.8888213899078635e-05,
      "loss": 1.0267,
      "step": 102730
    },
    {
      "epoch": 0.3600780858453634,
      "grad_norm": 3.109375,
      "learning_rate": 4.888756487041494e-05,
      "loss": 0.9366,
      "step": 102740
    },
    {
      "epoch": 0.360113133352259,
      "grad_norm": 3.609375,
      "learning_rate": 4.888691584175124e-05,
      "loss": 1.0256,
      "step": 102750
    },
    {
      "epoch": 0.36014818085915457,
      "grad_norm": 2.953125,
      "learning_rate": 4.888626681308753e-05,
      "loss": 0.9684,
      "step": 102760
    },
    {
      "epoch": 0.36018322836605016,
      "grad_norm": 2.90625,
      "learning_rate": 4.8885617784423834e-05,
      "loss": 0.9391,
      "step": 102770
    },
    {
      "epoch": 0.3602182758729458,
      "grad_norm": 3.1875,
      "learning_rate": 4.888496875576013e-05,
      "loss": 0.8823,
      "step": 102780
    },
    {
      "epoch": 0.3602533233798414,
      "grad_norm": 3.078125,
      "learning_rate": 4.888431972709643e-05,
      "loss": 0.9425,
      "step": 102790
    },
    {
      "epoch": 0.360288370886737,
      "grad_norm": 3.109375,
      "learning_rate": 4.8883670698432725e-05,
      "loss": 0.9922,
      "step": 102800
    },
    {
      "epoch": 0.3603234183936326,
      "grad_norm": 2.859375,
      "learning_rate": 4.8883021669769026e-05,
      "loss": 0.9274,
      "step": 102810
    },
    {
      "epoch": 0.36035846590052817,
      "grad_norm": 3.09375,
      "learning_rate": 4.888237264110532e-05,
      "loss": 0.9896,
      "step": 102820
    },
    {
      "epoch": 0.36039351340742376,
      "grad_norm": 3.65625,
      "learning_rate": 4.888172361244162e-05,
      "loss": 0.9056,
      "step": 102830
    },
    {
      "epoch": 0.36042856091431935,
      "grad_norm": 2.984375,
      "learning_rate": 4.888107458377792e-05,
      "loss": 1.0078,
      "step": 102840
    },
    {
      "epoch": 0.36046360842121494,
      "grad_norm": 3.4375,
      "learning_rate": 4.888042555511422e-05,
      "loss": 0.961,
      "step": 102850
    },
    {
      "epoch": 0.36049865592811053,
      "grad_norm": 3.265625,
      "learning_rate": 4.887977652645051e-05,
      "loss": 0.9808,
      "step": 102860
    },
    {
      "epoch": 0.3605337034350061,
      "grad_norm": 3.0625,
      "learning_rate": 4.8879127497786814e-05,
      "loss": 0.938,
      "step": 102870
    },
    {
      "epoch": 0.36056875094190177,
      "grad_norm": 3.1875,
      "learning_rate": 4.8878478469123116e-05,
      "loss": 1.0409,
      "step": 102880
    },
    {
      "epoch": 0.36060379844879736,
      "grad_norm": 2.703125,
      "learning_rate": 4.887782944045941e-05,
      "loss": 1.01,
      "step": 102890
    },
    {
      "epoch": 0.36063884595569295,
      "grad_norm": 3.59375,
      "learning_rate": 4.887718041179571e-05,
      "loss": 1.0403,
      "step": 102900
    },
    {
      "epoch": 0.36067389346258855,
      "grad_norm": 3.03125,
      "learning_rate": 4.8876531383132006e-05,
      "loss": 1.0095,
      "step": 102910
    },
    {
      "epoch": 0.36070894096948414,
      "grad_norm": 3.0,
      "learning_rate": 4.887588235446831e-05,
      "loss": 0.9787,
      "step": 102920
    },
    {
      "epoch": 0.36074398847637973,
      "grad_norm": 2.953125,
      "learning_rate": 4.88752333258046e-05,
      "loss": 1.0419,
      "step": 102930
    },
    {
      "epoch": 0.3607790359832753,
      "grad_norm": 3.140625,
      "learning_rate": 4.8874584297140904e-05,
      "loss": 0.9327,
      "step": 102940
    },
    {
      "epoch": 0.3608140834901709,
      "grad_norm": 3.3125,
      "learning_rate": 4.88739352684772e-05,
      "loss": 1.0241,
      "step": 102950
    },
    {
      "epoch": 0.3608491309970665,
      "grad_norm": 3.203125,
      "learning_rate": 4.88732862398135e-05,
      "loss": 0.9762,
      "step": 102960
    },
    {
      "epoch": 0.3608841785039621,
      "grad_norm": 3.15625,
      "learning_rate": 4.8872637211149794e-05,
      "loss": 0.9465,
      "step": 102970
    },
    {
      "epoch": 0.36091922601085774,
      "grad_norm": 3.53125,
      "learning_rate": 4.8871988182486096e-05,
      "loss": 1.0203,
      "step": 102980
    },
    {
      "epoch": 0.36095427351775333,
      "grad_norm": 3.328125,
      "learning_rate": 4.887133915382239e-05,
      "loss": 0.929,
      "step": 102990
    },
    {
      "epoch": 0.3609893210246489,
      "grad_norm": 3.046875,
      "learning_rate": 4.887069012515869e-05,
      "loss": 0.9192,
      "step": 103000
    },
    {
      "epoch": 0.3610243685315445,
      "grad_norm": 2.8125,
      "learning_rate": 4.8870041096494986e-05,
      "loss": 1.0161,
      "step": 103010
    },
    {
      "epoch": 0.3610594160384401,
      "grad_norm": 2.515625,
      "learning_rate": 4.886939206783129e-05,
      "loss": 0.8771,
      "step": 103020
    },
    {
      "epoch": 0.3610944635453357,
      "grad_norm": 3.265625,
      "learning_rate": 4.886874303916759e-05,
      "loss": 0.9197,
      "step": 103030
    },
    {
      "epoch": 0.3611295110522313,
      "grad_norm": 3.125,
      "learning_rate": 4.8868094010503884e-05,
      "loss": 0.981,
      "step": 103040
    },
    {
      "epoch": 0.3611645585591269,
      "grad_norm": 3.171875,
      "learning_rate": 4.8867444981840185e-05,
      "loss": 1.0636,
      "step": 103050
    },
    {
      "epoch": 0.36119960606602247,
      "grad_norm": 3.015625,
      "learning_rate": 4.886679595317648e-05,
      "loss": 0.8818,
      "step": 103060
    },
    {
      "epoch": 0.36123465357291806,
      "grad_norm": 3.234375,
      "learning_rate": 4.886614692451278e-05,
      "loss": 0.9326,
      "step": 103070
    },
    {
      "epoch": 0.3612697010798137,
      "grad_norm": 2.875,
      "learning_rate": 4.8865497895849076e-05,
      "loss": 0.9736,
      "step": 103080
    },
    {
      "epoch": 0.3613047485867093,
      "grad_norm": 3.328125,
      "learning_rate": 4.886484886718538e-05,
      "loss": 1.0094,
      "step": 103090
    },
    {
      "epoch": 0.3613397960936049,
      "grad_norm": 3.484375,
      "learning_rate": 4.8864199838521665e-05,
      "loss": 0.9587,
      "step": 103100
    },
    {
      "epoch": 0.3613748436005005,
      "grad_norm": 3.03125,
      "learning_rate": 4.8863550809857966e-05,
      "loss": 0.9588,
      "step": 103110
    },
    {
      "epoch": 0.3614098911073961,
      "grad_norm": 3.03125,
      "learning_rate": 4.886290178119427e-05,
      "loss": 0.9348,
      "step": 103120
    },
    {
      "epoch": 0.36144493861429167,
      "grad_norm": 3.203125,
      "learning_rate": 4.886225275253056e-05,
      "loss": 0.9328,
      "step": 103130
    },
    {
      "epoch": 0.36147998612118726,
      "grad_norm": 2.84375,
      "learning_rate": 4.8861603723866864e-05,
      "loss": 0.9072,
      "step": 103140
    },
    {
      "epoch": 0.36151503362808285,
      "grad_norm": 3.21875,
      "learning_rate": 4.886095469520316e-05,
      "loss": 0.9135,
      "step": 103150
    },
    {
      "epoch": 0.36155008113497844,
      "grad_norm": 3.0625,
      "learning_rate": 4.886030566653946e-05,
      "loss": 0.9814,
      "step": 103160
    },
    {
      "epoch": 0.3615851286418741,
      "grad_norm": 2.921875,
      "learning_rate": 4.8859656637875754e-05,
      "loss": 0.9669,
      "step": 103170
    },
    {
      "epoch": 0.3616201761487697,
      "grad_norm": 2.890625,
      "learning_rate": 4.8859007609212056e-05,
      "loss": 0.9762,
      "step": 103180
    },
    {
      "epoch": 0.36165522365566527,
      "grad_norm": 3.15625,
      "learning_rate": 4.885835858054835e-05,
      "loss": 0.9455,
      "step": 103190
    },
    {
      "epoch": 0.36169027116256086,
      "grad_norm": 3.15625,
      "learning_rate": 4.885770955188465e-05,
      "loss": 0.9889,
      "step": 103200
    },
    {
      "epoch": 0.36172531866945645,
      "grad_norm": 3.03125,
      "learning_rate": 4.8857060523220946e-05,
      "loss": 0.9526,
      "step": 103210
    },
    {
      "epoch": 0.36176036617635204,
      "grad_norm": 3.015625,
      "learning_rate": 4.885641149455725e-05,
      "loss": 0.9717,
      "step": 103220
    },
    {
      "epoch": 0.36179541368324764,
      "grad_norm": 3.140625,
      "learning_rate": 4.885576246589354e-05,
      "loss": 1.0345,
      "step": 103230
    },
    {
      "epoch": 0.3618304611901432,
      "grad_norm": 3.015625,
      "learning_rate": 4.8855113437229844e-05,
      "loss": 0.9369,
      "step": 103240
    },
    {
      "epoch": 0.3618655086970388,
      "grad_norm": 2.609375,
      "learning_rate": 4.8854464408566145e-05,
      "loss": 0.9801,
      "step": 103250
    },
    {
      "epoch": 0.3619005562039344,
      "grad_norm": 3.296875,
      "learning_rate": 4.885381537990244e-05,
      "loss": 0.9524,
      "step": 103260
    },
    {
      "epoch": 0.36193560371083006,
      "grad_norm": 2.953125,
      "learning_rate": 4.885316635123874e-05,
      "loss": 0.9226,
      "step": 103270
    },
    {
      "epoch": 0.36197065121772565,
      "grad_norm": 2.875,
      "learning_rate": 4.8852517322575036e-05,
      "loss": 0.9553,
      "step": 103280
    },
    {
      "epoch": 0.36200569872462124,
      "grad_norm": 3.0,
      "learning_rate": 4.885186829391134e-05,
      "loss": 1.0477,
      "step": 103290
    },
    {
      "epoch": 0.36204074623151683,
      "grad_norm": 3.703125,
      "learning_rate": 4.885121926524763e-05,
      "loss": 1.0122,
      "step": 103300
    },
    {
      "epoch": 0.3620757937384124,
      "grad_norm": 3.1875,
      "learning_rate": 4.885057023658393e-05,
      "loss": 0.9713,
      "step": 103310
    },
    {
      "epoch": 0.362110841245308,
      "grad_norm": 2.625,
      "learning_rate": 4.884992120792023e-05,
      "loss": 0.9074,
      "step": 103320
    },
    {
      "epoch": 0.3621458887522036,
      "grad_norm": 2.953125,
      "learning_rate": 4.884927217925653e-05,
      "loss": 0.9777,
      "step": 103330
    },
    {
      "epoch": 0.3621809362590992,
      "grad_norm": 2.59375,
      "learning_rate": 4.8848623150592824e-05,
      "loss": 0.9609,
      "step": 103340
    },
    {
      "epoch": 0.3622159837659948,
      "grad_norm": 3.234375,
      "learning_rate": 4.8847974121929125e-05,
      "loss": 1.0224,
      "step": 103350
    },
    {
      "epoch": 0.3622510312728904,
      "grad_norm": 2.734375,
      "learning_rate": 4.884732509326542e-05,
      "loss": 0.9594,
      "step": 103360
    },
    {
      "epoch": 0.362286078779786,
      "grad_norm": 2.796875,
      "learning_rate": 4.884667606460172e-05,
      "loss": 0.9642,
      "step": 103370
    },
    {
      "epoch": 0.3623211262866816,
      "grad_norm": 3.1875,
      "learning_rate": 4.8846027035938016e-05,
      "loss": 0.9643,
      "step": 103380
    },
    {
      "epoch": 0.3623561737935772,
      "grad_norm": 2.953125,
      "learning_rate": 4.884537800727432e-05,
      "loss": 0.9753,
      "step": 103390
    },
    {
      "epoch": 0.3623912213004728,
      "grad_norm": 3.078125,
      "learning_rate": 4.884472897861062e-05,
      "loss": 0.9134,
      "step": 103400
    },
    {
      "epoch": 0.3624262688073684,
      "grad_norm": 3.40625,
      "learning_rate": 4.884407994994691e-05,
      "loss": 0.932,
      "step": 103410
    },
    {
      "epoch": 0.362461316314264,
      "grad_norm": 3.296875,
      "learning_rate": 4.8843430921283214e-05,
      "loss": 1.0,
      "step": 103420
    },
    {
      "epoch": 0.3624963638211596,
      "grad_norm": 2.75,
      "learning_rate": 4.884278189261951e-05,
      "loss": 0.9649,
      "step": 103430
    },
    {
      "epoch": 0.36253141132805516,
      "grad_norm": 3.171875,
      "learning_rate": 4.884213286395581e-05,
      "loss": 0.9862,
      "step": 103440
    },
    {
      "epoch": 0.36256645883495076,
      "grad_norm": 3.078125,
      "learning_rate": 4.8841483835292105e-05,
      "loss": 0.9985,
      "step": 103450
    },
    {
      "epoch": 0.36260150634184635,
      "grad_norm": 3.109375,
      "learning_rate": 4.8840834806628406e-05,
      "loss": 1.0639,
      "step": 103460
    },
    {
      "epoch": 0.362636553848742,
      "grad_norm": 2.90625,
      "learning_rate": 4.88401857779647e-05,
      "loss": 0.899,
      "step": 103470
    },
    {
      "epoch": 0.3626716013556376,
      "grad_norm": 3.046875,
      "learning_rate": 4.8839536749300996e-05,
      "loss": 0.8785,
      "step": 103480
    },
    {
      "epoch": 0.3627066488625332,
      "grad_norm": 2.90625,
      "learning_rate": 4.88388877206373e-05,
      "loss": 0.9818,
      "step": 103490
    },
    {
      "epoch": 0.36274169636942877,
      "grad_norm": 3.5625,
      "learning_rate": 4.883823869197359e-05,
      "loss": 0.9611,
      "step": 103500
    },
    {
      "epoch": 0.36277674387632436,
      "grad_norm": 3.25,
      "learning_rate": 4.883758966330989e-05,
      "loss": 0.912,
      "step": 103510
    },
    {
      "epoch": 0.36281179138321995,
      "grad_norm": 3.640625,
      "learning_rate": 4.883694063464619e-05,
      "loss": 0.9605,
      "step": 103520
    },
    {
      "epoch": 0.36284683889011554,
      "grad_norm": 2.640625,
      "learning_rate": 4.883629160598249e-05,
      "loss": 1.0134,
      "step": 103530
    },
    {
      "epoch": 0.36288188639701113,
      "grad_norm": 2.921875,
      "learning_rate": 4.8835642577318784e-05,
      "loss": 0.9335,
      "step": 103540
    },
    {
      "epoch": 0.3629169339039067,
      "grad_norm": 2.6875,
      "learning_rate": 4.8834993548655085e-05,
      "loss": 0.9417,
      "step": 103550
    },
    {
      "epoch": 0.3629519814108023,
      "grad_norm": 2.875,
      "learning_rate": 4.883434451999138e-05,
      "loss": 0.9457,
      "step": 103560
    },
    {
      "epoch": 0.36298702891769796,
      "grad_norm": 3.3125,
      "learning_rate": 4.883369549132768e-05,
      "loss": 0.9546,
      "step": 103570
    },
    {
      "epoch": 0.36302207642459355,
      "grad_norm": 3.203125,
      "learning_rate": 4.8833046462663976e-05,
      "loss": 1.0844,
      "step": 103580
    },
    {
      "epoch": 0.36305712393148915,
      "grad_norm": 3.640625,
      "learning_rate": 4.883239743400028e-05,
      "loss": 0.9456,
      "step": 103590
    },
    {
      "epoch": 0.36309217143838474,
      "grad_norm": 3.53125,
      "learning_rate": 4.883174840533657e-05,
      "loss": 0.9632,
      "step": 103600
    },
    {
      "epoch": 0.36312721894528033,
      "grad_norm": 3.234375,
      "learning_rate": 4.883109937667287e-05,
      "loss": 0.9967,
      "step": 103610
    },
    {
      "epoch": 0.3631622664521759,
      "grad_norm": 2.953125,
      "learning_rate": 4.8830450348009174e-05,
      "loss": 0.9721,
      "step": 103620
    },
    {
      "epoch": 0.3631973139590715,
      "grad_norm": 2.96875,
      "learning_rate": 4.882980131934547e-05,
      "loss": 0.9853,
      "step": 103630
    },
    {
      "epoch": 0.3632323614659671,
      "grad_norm": 3.515625,
      "learning_rate": 4.882915229068177e-05,
      "loss": 0.9772,
      "step": 103640
    },
    {
      "epoch": 0.3632674089728627,
      "grad_norm": 3.109375,
      "learning_rate": 4.8828503262018065e-05,
      "loss": 0.9769,
      "step": 103650
    },
    {
      "epoch": 0.36330245647975834,
      "grad_norm": 3.234375,
      "learning_rate": 4.8827854233354366e-05,
      "loss": 1.0565,
      "step": 103660
    },
    {
      "epoch": 0.36333750398665393,
      "grad_norm": 2.90625,
      "learning_rate": 4.882720520469066e-05,
      "loss": 0.9199,
      "step": 103670
    },
    {
      "epoch": 0.3633725514935495,
      "grad_norm": 3.1875,
      "learning_rate": 4.882655617602696e-05,
      "loss": 0.9029,
      "step": 103680
    },
    {
      "epoch": 0.3634075990004451,
      "grad_norm": 2.84375,
      "learning_rate": 4.882590714736326e-05,
      "loss": 0.9635,
      "step": 103690
    },
    {
      "epoch": 0.3634426465073407,
      "grad_norm": 3.09375,
      "learning_rate": 4.882525811869956e-05,
      "loss": 0.9,
      "step": 103700
    },
    {
      "epoch": 0.3634776940142363,
      "grad_norm": 3.03125,
      "learning_rate": 4.882460909003585e-05,
      "loss": 0.9803,
      "step": 103710
    },
    {
      "epoch": 0.3635127415211319,
      "grad_norm": 2.40625,
      "learning_rate": 4.8823960061372154e-05,
      "loss": 0.9422,
      "step": 103720
    },
    {
      "epoch": 0.3635477890280275,
      "grad_norm": 3.125,
      "learning_rate": 4.882331103270845e-05,
      "loss": 0.9807,
      "step": 103730
    },
    {
      "epoch": 0.36358283653492307,
      "grad_norm": 2.984375,
      "learning_rate": 4.882266200404475e-05,
      "loss": 0.9496,
      "step": 103740
    },
    {
      "epoch": 0.36361788404181866,
      "grad_norm": 3.03125,
      "learning_rate": 4.882201297538105e-05,
      "loss": 0.9744,
      "step": 103750
    },
    {
      "epoch": 0.3636529315487143,
      "grad_norm": 3.015625,
      "learning_rate": 4.8821363946717346e-05,
      "loss": 0.9743,
      "step": 103760
    },
    {
      "epoch": 0.3636879790556099,
      "grad_norm": 3.25,
      "learning_rate": 4.882071491805365e-05,
      "loss": 0.9891,
      "step": 103770
    },
    {
      "epoch": 0.3637230265625055,
      "grad_norm": 2.75,
      "learning_rate": 4.882006588938994e-05,
      "loss": 0.8922,
      "step": 103780
    },
    {
      "epoch": 0.3637580740694011,
      "grad_norm": 3.109375,
      "learning_rate": 4.8819416860726244e-05,
      "loss": 0.9307,
      "step": 103790
    },
    {
      "epoch": 0.3637931215762967,
      "grad_norm": 2.875,
      "learning_rate": 4.881876783206254e-05,
      "loss": 0.9707,
      "step": 103800
    },
    {
      "epoch": 0.36382816908319227,
      "grad_norm": 2.9375,
      "learning_rate": 4.881811880339884e-05,
      "loss": 0.9737,
      "step": 103810
    },
    {
      "epoch": 0.36386321659008786,
      "grad_norm": 3.03125,
      "learning_rate": 4.8817469774735134e-05,
      "loss": 1.0465,
      "step": 103820
    },
    {
      "epoch": 0.36389826409698345,
      "grad_norm": 3.453125,
      "learning_rate": 4.8816820746071436e-05,
      "loss": 0.9733,
      "step": 103830
    },
    {
      "epoch": 0.36393331160387904,
      "grad_norm": 3.15625,
      "learning_rate": 4.881617171740773e-05,
      "loss": 0.9379,
      "step": 103840
    },
    {
      "epoch": 0.36396835911077463,
      "grad_norm": 2.9375,
      "learning_rate": 4.8815522688744025e-05,
      "loss": 0.9577,
      "step": 103850
    },
    {
      "epoch": 0.3640034066176703,
      "grad_norm": 3.625,
      "learning_rate": 4.8814873660080326e-05,
      "loss": 0.8929,
      "step": 103860
    },
    {
      "epoch": 0.36403845412456587,
      "grad_norm": 2.953125,
      "learning_rate": 4.881422463141662e-05,
      "loss": 0.9121,
      "step": 103870
    },
    {
      "epoch": 0.36407350163146146,
      "grad_norm": 3.34375,
      "learning_rate": 4.881357560275292e-05,
      "loss": 0.9526,
      "step": 103880
    },
    {
      "epoch": 0.36410854913835705,
      "grad_norm": 3.0,
      "learning_rate": 4.881292657408922e-05,
      "loss": 0.9771,
      "step": 103890
    },
    {
      "epoch": 0.36414359664525264,
      "grad_norm": 3.1875,
      "learning_rate": 4.881227754542552e-05,
      "loss": 0.9565,
      "step": 103900
    },
    {
      "epoch": 0.36417864415214823,
      "grad_norm": 3.359375,
      "learning_rate": 4.881162851676181e-05,
      "loss": 0.9255,
      "step": 103910
    },
    {
      "epoch": 0.3642136916590438,
      "grad_norm": 3.328125,
      "learning_rate": 4.8810979488098114e-05,
      "loss": 0.9698,
      "step": 103920
    },
    {
      "epoch": 0.3642487391659394,
      "grad_norm": 2.71875,
      "learning_rate": 4.881033045943441e-05,
      "loss": 0.9882,
      "step": 103930
    },
    {
      "epoch": 0.364283786672835,
      "grad_norm": 3.28125,
      "learning_rate": 4.880968143077071e-05,
      "loss": 0.9893,
      "step": 103940
    },
    {
      "epoch": 0.3643188341797306,
      "grad_norm": 2.828125,
      "learning_rate": 4.8809032402107005e-05,
      "loss": 0.9198,
      "step": 103950
    },
    {
      "epoch": 0.36435388168662625,
      "grad_norm": 3.328125,
      "learning_rate": 4.8808383373443306e-05,
      "loss": 0.9728,
      "step": 103960
    },
    {
      "epoch": 0.36438892919352184,
      "grad_norm": 3.21875,
      "learning_rate": 4.88077343447796e-05,
      "loss": 0.9809,
      "step": 103970
    },
    {
      "epoch": 0.36442397670041743,
      "grad_norm": 3.125,
      "learning_rate": 4.88070853161159e-05,
      "loss": 1.0129,
      "step": 103980
    },
    {
      "epoch": 0.364459024207313,
      "grad_norm": 3.296875,
      "learning_rate": 4.8806436287452204e-05,
      "loss": 0.968,
      "step": 103990
    },
    {
      "epoch": 0.3644940717142086,
      "grad_norm": 2.546875,
      "learning_rate": 4.88057872587885e-05,
      "loss": 0.8448,
      "step": 104000
    },
    {
      "epoch": 0.3645291192211042,
      "grad_norm": 3.140625,
      "learning_rate": 4.88051382301248e-05,
      "loss": 0.9021,
      "step": 104010
    },
    {
      "epoch": 0.3645641667279998,
      "grad_norm": 2.9375,
      "learning_rate": 4.8804489201461094e-05,
      "loss": 0.998,
      "step": 104020
    },
    {
      "epoch": 0.3645992142348954,
      "grad_norm": 3.1875,
      "learning_rate": 4.8803840172797396e-05,
      "loss": 0.9218,
      "step": 104030
    },
    {
      "epoch": 0.364634261741791,
      "grad_norm": 3.234375,
      "learning_rate": 4.880319114413369e-05,
      "loss": 0.8751,
      "step": 104040
    },
    {
      "epoch": 0.36466930924868657,
      "grad_norm": 2.484375,
      "learning_rate": 4.880254211546999e-05,
      "loss": 0.9768,
      "step": 104050
    },
    {
      "epoch": 0.3647043567555822,
      "grad_norm": 3.0,
      "learning_rate": 4.8801893086806286e-05,
      "loss": 0.9918,
      "step": 104060
    },
    {
      "epoch": 0.3647394042624778,
      "grad_norm": 3.53125,
      "learning_rate": 4.880124405814259e-05,
      "loss": 0.9088,
      "step": 104070
    },
    {
      "epoch": 0.3647744517693734,
      "grad_norm": 2.875,
      "learning_rate": 4.880059502947888e-05,
      "loss": 0.995,
      "step": 104080
    },
    {
      "epoch": 0.364809499276269,
      "grad_norm": 2.546875,
      "learning_rate": 4.8799946000815184e-05,
      "loss": 0.9541,
      "step": 104090
    },
    {
      "epoch": 0.3648445467831646,
      "grad_norm": 3.140625,
      "learning_rate": 4.879929697215148e-05,
      "loss": 0.8783,
      "step": 104100
    },
    {
      "epoch": 0.36487959429006017,
      "grad_norm": 2.390625,
      "learning_rate": 4.879864794348778e-05,
      "loss": 0.9126,
      "step": 104110
    },
    {
      "epoch": 0.36491464179695576,
      "grad_norm": 3.0,
      "learning_rate": 4.879799891482408e-05,
      "loss": 0.9221,
      "step": 104120
    },
    {
      "epoch": 0.36494968930385135,
      "grad_norm": 2.9375,
      "learning_rate": 4.8797349886160376e-05,
      "loss": 1.0278,
      "step": 104130
    },
    {
      "epoch": 0.36498473681074695,
      "grad_norm": 3.109375,
      "learning_rate": 4.879670085749668e-05,
      "loss": 0.8953,
      "step": 104140
    },
    {
      "epoch": 0.36501978431764254,
      "grad_norm": 3.203125,
      "learning_rate": 4.879605182883297e-05,
      "loss": 0.9181,
      "step": 104150
    },
    {
      "epoch": 0.3650548318245382,
      "grad_norm": 2.765625,
      "learning_rate": 4.879540280016927e-05,
      "loss": 0.9879,
      "step": 104160
    },
    {
      "epoch": 0.3650898793314338,
      "grad_norm": 3.015625,
      "learning_rate": 4.879475377150557e-05,
      "loss": 1.0199,
      "step": 104170
    },
    {
      "epoch": 0.36512492683832937,
      "grad_norm": 3.265625,
      "learning_rate": 4.879410474284187e-05,
      "loss": 0.9819,
      "step": 104180
    },
    {
      "epoch": 0.36515997434522496,
      "grad_norm": 3.1875,
      "learning_rate": 4.8793455714178164e-05,
      "loss": 0.9197,
      "step": 104190
    },
    {
      "epoch": 0.36519502185212055,
      "grad_norm": 3.484375,
      "learning_rate": 4.8792806685514465e-05,
      "loss": 0.9585,
      "step": 104200
    },
    {
      "epoch": 0.36523006935901614,
      "grad_norm": 2.9375,
      "learning_rate": 4.879215765685076e-05,
      "loss": 0.9195,
      "step": 104210
    },
    {
      "epoch": 0.36526511686591173,
      "grad_norm": 3.03125,
      "learning_rate": 4.879150862818706e-05,
      "loss": 0.938,
      "step": 104220
    },
    {
      "epoch": 0.3653001643728073,
      "grad_norm": 2.90625,
      "learning_rate": 4.8790859599523356e-05,
      "loss": 1.0534,
      "step": 104230
    },
    {
      "epoch": 0.3653352118797029,
      "grad_norm": 3.171875,
      "learning_rate": 4.879021057085965e-05,
      "loss": 0.947,
      "step": 104240
    },
    {
      "epoch": 0.36537025938659856,
      "grad_norm": 3.390625,
      "learning_rate": 4.878956154219595e-05,
      "loss": 0.9319,
      "step": 104250
    },
    {
      "epoch": 0.36540530689349415,
      "grad_norm": 2.921875,
      "learning_rate": 4.8788912513532246e-05,
      "loss": 0.936,
      "step": 104260
    },
    {
      "epoch": 0.36544035440038974,
      "grad_norm": 2.9375,
      "learning_rate": 4.878826348486855e-05,
      "loss": 1.0018,
      "step": 104270
    },
    {
      "epoch": 0.36547540190728534,
      "grad_norm": 2.734375,
      "learning_rate": 4.878761445620484e-05,
      "loss": 0.9798,
      "step": 104280
    },
    {
      "epoch": 0.3655104494141809,
      "grad_norm": 2.59375,
      "learning_rate": 4.8786965427541144e-05,
      "loss": 0.904,
      "step": 104290
    },
    {
      "epoch": 0.3655454969210765,
      "grad_norm": 3.21875,
      "learning_rate": 4.878631639887744e-05,
      "loss": 0.9859,
      "step": 104300
    },
    {
      "epoch": 0.3655805444279721,
      "grad_norm": 3.21875,
      "learning_rate": 4.878566737021374e-05,
      "loss": 0.982,
      "step": 104310
    },
    {
      "epoch": 0.3656155919348677,
      "grad_norm": 3.921875,
      "learning_rate": 4.8785018341550034e-05,
      "loss": 0.9884,
      "step": 104320
    },
    {
      "epoch": 0.3656506394417633,
      "grad_norm": 2.84375,
      "learning_rate": 4.8784369312886336e-05,
      "loss": 1.001,
      "step": 104330
    },
    {
      "epoch": 0.3656856869486589,
      "grad_norm": 2.984375,
      "learning_rate": 4.878372028422263e-05,
      "loss": 0.9531,
      "step": 104340
    },
    {
      "epoch": 0.36572073445555453,
      "grad_norm": 3.21875,
      "learning_rate": 4.878307125555893e-05,
      "loss": 0.9887,
      "step": 104350
    },
    {
      "epoch": 0.3657557819624501,
      "grad_norm": 3.125,
      "learning_rate": 4.878242222689523e-05,
      "loss": 0.945,
      "step": 104360
    },
    {
      "epoch": 0.3657908294693457,
      "grad_norm": 3.328125,
      "learning_rate": 4.878177319823153e-05,
      "loss": 1.0031,
      "step": 104370
    },
    {
      "epoch": 0.3658258769762413,
      "grad_norm": 3.1875,
      "learning_rate": 4.878112416956783e-05,
      "loss": 1.0027,
      "step": 104380
    },
    {
      "epoch": 0.3658609244831369,
      "grad_norm": 3.453125,
      "learning_rate": 4.8780475140904124e-05,
      "loss": 0.9043,
      "step": 104390
    },
    {
      "epoch": 0.3658959719900325,
      "grad_norm": 2.796875,
      "learning_rate": 4.8779826112240425e-05,
      "loss": 0.956,
      "step": 104400
    },
    {
      "epoch": 0.3659310194969281,
      "grad_norm": 3.109375,
      "learning_rate": 4.877917708357672e-05,
      "loss": 0.9879,
      "step": 104410
    },
    {
      "epoch": 0.36596606700382367,
      "grad_norm": 3.296875,
      "learning_rate": 4.877852805491302e-05,
      "loss": 0.9036,
      "step": 104420
    },
    {
      "epoch": 0.36600111451071926,
      "grad_norm": 2.828125,
      "learning_rate": 4.8777879026249316e-05,
      "loss": 0.9274,
      "step": 104430
    },
    {
      "epoch": 0.36603616201761485,
      "grad_norm": 3.46875,
      "learning_rate": 4.877722999758562e-05,
      "loss": 1.0019,
      "step": 104440
    },
    {
      "epoch": 0.3660712095245105,
      "grad_norm": 2.609375,
      "learning_rate": 4.877658096892191e-05,
      "loss": 0.9288,
      "step": 104450
    },
    {
      "epoch": 0.3661062570314061,
      "grad_norm": 3.0625,
      "learning_rate": 4.877593194025821e-05,
      "loss": 0.9142,
      "step": 104460
    },
    {
      "epoch": 0.3661413045383017,
      "grad_norm": 2.6875,
      "learning_rate": 4.877528291159451e-05,
      "loss": 1.0016,
      "step": 104470
    },
    {
      "epoch": 0.3661763520451973,
      "grad_norm": 2.3125,
      "learning_rate": 4.877463388293081e-05,
      "loss": 0.9315,
      "step": 104480
    },
    {
      "epoch": 0.36621139955209286,
      "grad_norm": 3.234375,
      "learning_rate": 4.877398485426711e-05,
      "loss": 1.0013,
      "step": 104490
    },
    {
      "epoch": 0.36624644705898846,
      "grad_norm": 3.40625,
      "learning_rate": 4.8773335825603405e-05,
      "loss": 1.018,
      "step": 104500
    },
    {
      "epoch": 0.36628149456588405,
      "grad_norm": 2.859375,
      "learning_rate": 4.8772686796939707e-05,
      "loss": 1.0236,
      "step": 104510
    },
    {
      "epoch": 0.36631654207277964,
      "grad_norm": 2.96875,
      "learning_rate": 4.8772037768276e-05,
      "loss": 0.9369,
      "step": 104520
    },
    {
      "epoch": 0.36635158957967523,
      "grad_norm": 2.875,
      "learning_rate": 4.87713887396123e-05,
      "loss": 0.9936,
      "step": 104530
    },
    {
      "epoch": 0.3663866370865708,
      "grad_norm": 3.28125,
      "learning_rate": 4.87707397109486e-05,
      "loss": 0.9513,
      "step": 104540
    },
    {
      "epoch": 0.36642168459346647,
      "grad_norm": 2.96875,
      "learning_rate": 4.87700906822849e-05,
      "loss": 0.9489,
      "step": 104550
    },
    {
      "epoch": 0.36645673210036206,
      "grad_norm": 3.34375,
      "learning_rate": 4.876944165362119e-05,
      "loss": 0.9624,
      "step": 104560
    },
    {
      "epoch": 0.36649177960725765,
      "grad_norm": 3.046875,
      "learning_rate": 4.8768792624957495e-05,
      "loss": 0.9745,
      "step": 104570
    },
    {
      "epoch": 0.36652682711415324,
      "grad_norm": 3.625,
      "learning_rate": 4.876814359629379e-05,
      "loss": 0.9878,
      "step": 104580
    },
    {
      "epoch": 0.36656187462104883,
      "grad_norm": 2.78125,
      "learning_rate": 4.876749456763009e-05,
      "loss": 0.9964,
      "step": 104590
    },
    {
      "epoch": 0.3665969221279444,
      "grad_norm": 2.828125,
      "learning_rate": 4.8766845538966385e-05,
      "loss": 0.9906,
      "step": 104600
    },
    {
      "epoch": 0.36663196963484,
      "grad_norm": 3.296875,
      "learning_rate": 4.876619651030268e-05,
      "loss": 1.0361,
      "step": 104610
    },
    {
      "epoch": 0.3666670171417356,
      "grad_norm": 3.140625,
      "learning_rate": 4.876554748163898e-05,
      "loss": 0.946,
      "step": 104620
    },
    {
      "epoch": 0.3667020646486312,
      "grad_norm": 2.5625,
      "learning_rate": 4.8764898452975276e-05,
      "loss": 0.9488,
      "step": 104630
    },
    {
      "epoch": 0.3667371121555268,
      "grad_norm": 2.75,
      "learning_rate": 4.876424942431158e-05,
      "loss": 1.0232,
      "step": 104640
    },
    {
      "epoch": 0.36677215966242244,
      "grad_norm": 2.96875,
      "learning_rate": 4.876360039564787e-05,
      "loss": 0.9782,
      "step": 104650
    },
    {
      "epoch": 0.36680720716931803,
      "grad_norm": 3.015625,
      "learning_rate": 4.876295136698417e-05,
      "loss": 0.9121,
      "step": 104660
    },
    {
      "epoch": 0.3668422546762136,
      "grad_norm": 3.109375,
      "learning_rate": 4.876230233832047e-05,
      "loss": 1.0008,
      "step": 104670
    },
    {
      "epoch": 0.3668773021831092,
      "grad_norm": 3.15625,
      "learning_rate": 4.876165330965677e-05,
      "loss": 0.9526,
      "step": 104680
    },
    {
      "epoch": 0.3669123496900048,
      "grad_norm": 3.640625,
      "learning_rate": 4.8761004280993064e-05,
      "loss": 1.0525,
      "step": 104690
    },
    {
      "epoch": 0.3669473971969004,
      "grad_norm": 3.140625,
      "learning_rate": 4.8760355252329365e-05,
      "loss": 0.9329,
      "step": 104700
    },
    {
      "epoch": 0.366982444703796,
      "grad_norm": 3.0625,
      "learning_rate": 4.8759706223665667e-05,
      "loss": 1.0025,
      "step": 104710
    },
    {
      "epoch": 0.3670174922106916,
      "grad_norm": 3.265625,
      "learning_rate": 4.875905719500196e-05,
      "loss": 1.0144,
      "step": 104720
    },
    {
      "epoch": 0.36705253971758717,
      "grad_norm": 2.984375,
      "learning_rate": 4.875840816633826e-05,
      "loss": 0.9749,
      "step": 104730
    },
    {
      "epoch": 0.36708758722448276,
      "grad_norm": 2.53125,
      "learning_rate": 4.875775913767456e-05,
      "loss": 0.9133,
      "step": 104740
    },
    {
      "epoch": 0.3671226347313784,
      "grad_norm": 3.15625,
      "learning_rate": 4.875711010901086e-05,
      "loss": 0.9651,
      "step": 104750
    },
    {
      "epoch": 0.367157682238274,
      "grad_norm": 3.0625,
      "learning_rate": 4.875646108034715e-05,
      "loss": 1.0199,
      "step": 104760
    },
    {
      "epoch": 0.3671927297451696,
      "grad_norm": 3.28125,
      "learning_rate": 4.8755812051683455e-05,
      "loss": 0.9902,
      "step": 104770
    },
    {
      "epoch": 0.3672277772520652,
      "grad_norm": 3.296875,
      "learning_rate": 4.875516302301975e-05,
      "loss": 0.9564,
      "step": 104780
    },
    {
      "epoch": 0.36726282475896077,
      "grad_norm": 3.359375,
      "learning_rate": 4.875451399435605e-05,
      "loss": 0.9917,
      "step": 104790
    },
    {
      "epoch": 0.36729787226585636,
      "grad_norm": 2.9375,
      "learning_rate": 4.8753864965692345e-05,
      "loss": 0.9413,
      "step": 104800
    },
    {
      "epoch": 0.36733291977275195,
      "grad_norm": 3.34375,
      "learning_rate": 4.8753215937028647e-05,
      "loss": 0.8942,
      "step": 104810
    },
    {
      "epoch": 0.36736796727964754,
      "grad_norm": 2.984375,
      "learning_rate": 4.875256690836494e-05,
      "loss": 1.0306,
      "step": 104820
    },
    {
      "epoch": 0.36740301478654314,
      "grad_norm": 3.078125,
      "learning_rate": 4.875191787970124e-05,
      "loss": 0.9473,
      "step": 104830
    },
    {
      "epoch": 0.3674380622934388,
      "grad_norm": 3.59375,
      "learning_rate": 4.875126885103754e-05,
      "loss": 1.0082,
      "step": 104840
    },
    {
      "epoch": 0.3674731098003344,
      "grad_norm": 2.96875,
      "learning_rate": 4.875061982237384e-05,
      "loss": 0.9388,
      "step": 104850
    },
    {
      "epoch": 0.36750815730722997,
      "grad_norm": 2.859375,
      "learning_rate": 4.874997079371014e-05,
      "loss": 0.9277,
      "step": 104860
    },
    {
      "epoch": 0.36754320481412556,
      "grad_norm": 3.34375,
      "learning_rate": 4.8749321765046435e-05,
      "loss": 0.9327,
      "step": 104870
    },
    {
      "epoch": 0.36757825232102115,
      "grad_norm": 2.421875,
      "learning_rate": 4.8748672736382736e-05,
      "loss": 0.9543,
      "step": 104880
    },
    {
      "epoch": 0.36761329982791674,
      "grad_norm": 2.84375,
      "learning_rate": 4.874802370771903e-05,
      "loss": 0.9858,
      "step": 104890
    },
    {
      "epoch": 0.36764834733481233,
      "grad_norm": 3.265625,
      "learning_rate": 4.874737467905533e-05,
      "loss": 1.0064,
      "step": 104900
    },
    {
      "epoch": 0.3676833948417079,
      "grad_norm": 3.140625,
      "learning_rate": 4.8746725650391627e-05,
      "loss": 0.9267,
      "step": 104910
    },
    {
      "epoch": 0.3677184423486035,
      "grad_norm": 3.046875,
      "learning_rate": 4.874607662172793e-05,
      "loss": 0.9401,
      "step": 104920
    },
    {
      "epoch": 0.3677534898554991,
      "grad_norm": 2.953125,
      "learning_rate": 4.874542759306422e-05,
      "loss": 0.8986,
      "step": 104930
    },
    {
      "epoch": 0.36778853736239475,
      "grad_norm": 3.140625,
      "learning_rate": 4.8744778564400524e-05,
      "loss": 0.9483,
      "step": 104940
    },
    {
      "epoch": 0.36782358486929034,
      "grad_norm": 3.125,
      "learning_rate": 4.874412953573682e-05,
      "loss": 0.9961,
      "step": 104950
    },
    {
      "epoch": 0.36785863237618593,
      "grad_norm": 3.265625,
      "learning_rate": 4.874348050707312e-05,
      "loss": 0.9351,
      "step": 104960
    },
    {
      "epoch": 0.3678936798830815,
      "grad_norm": 2.9375,
      "learning_rate": 4.8742831478409415e-05,
      "loss": 0.884,
      "step": 104970
    },
    {
      "epoch": 0.3679287273899771,
      "grad_norm": 3.0625,
      "learning_rate": 4.874218244974571e-05,
      "loss": 0.9986,
      "step": 104980
    },
    {
      "epoch": 0.3679637748968727,
      "grad_norm": 2.75,
      "learning_rate": 4.874153342108201e-05,
      "loss": 0.9764,
      "step": 104990
    },
    {
      "epoch": 0.3679988224037683,
      "grad_norm": 2.84375,
      "learning_rate": 4.8740884392418305e-05,
      "loss": 0.9002,
      "step": 105000
    },
    {
      "epoch": 0.3679988224037683,
      "eval_loss": 0.8996362686157227,
      "eval_runtime": 555.7791,
      "eval_samples_per_second": 684.509,
      "eval_steps_per_second": 57.042,
      "step": 105000
    },
    {
      "epoch": 0.3680338699106639,
      "grad_norm": 3.109375,
      "learning_rate": 4.8740235363754607e-05,
      "loss": 1.0072,
      "step": 105010
    },
    {
      "epoch": 0.3680689174175595,
      "grad_norm": 3.1875,
      "learning_rate": 4.87395863350909e-05,
      "loss": 0.9773,
      "step": 105020
    },
    {
      "epoch": 0.3681039649244551,
      "grad_norm": 3.515625,
      "learning_rate": 4.87389373064272e-05,
      "loss": 0.988,
      "step": 105030
    },
    {
      "epoch": 0.3681390124313507,
      "grad_norm": 2.765625,
      "learning_rate": 4.87382882777635e-05,
      "loss": 0.9598,
      "step": 105040
    },
    {
      "epoch": 0.3681740599382463,
      "grad_norm": 2.9375,
      "learning_rate": 4.87376392490998e-05,
      "loss": 0.9267,
      "step": 105050
    },
    {
      "epoch": 0.3682091074451419,
      "grad_norm": 3.5625,
      "learning_rate": 4.873699022043609e-05,
      "loss": 0.8667,
      "step": 105060
    },
    {
      "epoch": 0.3682441549520375,
      "grad_norm": 3.4375,
      "learning_rate": 4.8736341191772395e-05,
      "loss": 0.9776,
      "step": 105070
    },
    {
      "epoch": 0.3682792024589331,
      "grad_norm": 3.15625,
      "learning_rate": 4.8735692163108696e-05,
      "loss": 0.8747,
      "step": 105080
    },
    {
      "epoch": 0.3683142499658287,
      "grad_norm": 3.5625,
      "learning_rate": 4.873504313444499e-05,
      "loss": 1.0013,
      "step": 105090
    },
    {
      "epoch": 0.36834929747272427,
      "grad_norm": 3.328125,
      "learning_rate": 4.873439410578129e-05,
      "loss": 0.9722,
      "step": 105100
    },
    {
      "epoch": 0.36838434497961986,
      "grad_norm": 2.828125,
      "learning_rate": 4.8733745077117587e-05,
      "loss": 0.9985,
      "step": 105110
    },
    {
      "epoch": 0.36841939248651545,
      "grad_norm": 2.703125,
      "learning_rate": 4.873309604845389e-05,
      "loss": 0.8902,
      "step": 105120
    },
    {
      "epoch": 0.36845443999341104,
      "grad_norm": 2.9375,
      "learning_rate": 4.873244701979018e-05,
      "loss": 0.8879,
      "step": 105130
    },
    {
      "epoch": 0.3684894875003067,
      "grad_norm": 2.828125,
      "learning_rate": 4.8731797991126484e-05,
      "loss": 0.9519,
      "step": 105140
    },
    {
      "epoch": 0.3685245350072023,
      "grad_norm": 3.03125,
      "learning_rate": 4.873114896246278e-05,
      "loss": 1.0008,
      "step": 105150
    },
    {
      "epoch": 0.36855958251409787,
      "grad_norm": 3.109375,
      "learning_rate": 4.873049993379908e-05,
      "loss": 0.9354,
      "step": 105160
    },
    {
      "epoch": 0.36859463002099346,
      "grad_norm": 3.28125,
      "learning_rate": 4.8729850905135375e-05,
      "loss": 0.8919,
      "step": 105170
    },
    {
      "epoch": 0.36862967752788905,
      "grad_norm": 2.703125,
      "learning_rate": 4.8729201876471676e-05,
      "loss": 1.022,
      "step": 105180
    },
    {
      "epoch": 0.36866472503478465,
      "grad_norm": 3.609375,
      "learning_rate": 4.872855284780797e-05,
      "loss": 0.9196,
      "step": 105190
    },
    {
      "epoch": 0.36869977254168024,
      "grad_norm": 3.078125,
      "learning_rate": 4.872790381914427e-05,
      "loss": 0.9397,
      "step": 105200
    },
    {
      "epoch": 0.36873482004857583,
      "grad_norm": 2.859375,
      "learning_rate": 4.8727254790480567e-05,
      "loss": 0.9434,
      "step": 105210
    },
    {
      "epoch": 0.3687698675554714,
      "grad_norm": 2.96875,
      "learning_rate": 4.872660576181687e-05,
      "loss": 0.9333,
      "step": 105220
    },
    {
      "epoch": 0.368804915062367,
      "grad_norm": 2.5625,
      "learning_rate": 4.872595673315317e-05,
      "loss": 0.9552,
      "step": 105230
    },
    {
      "epoch": 0.36883996256926266,
      "grad_norm": 2.953125,
      "learning_rate": 4.8725307704489464e-05,
      "loss": 0.9476,
      "step": 105240
    },
    {
      "epoch": 0.36887501007615825,
      "grad_norm": 2.96875,
      "learning_rate": 4.8724658675825765e-05,
      "loss": 0.8987,
      "step": 105250
    },
    {
      "epoch": 0.36891005758305384,
      "grad_norm": 3.0625,
      "learning_rate": 4.872400964716206e-05,
      "loss": 1.0471,
      "step": 105260
    },
    {
      "epoch": 0.36894510508994943,
      "grad_norm": 2.640625,
      "learning_rate": 4.872336061849836e-05,
      "loss": 0.9611,
      "step": 105270
    },
    {
      "epoch": 0.368980152596845,
      "grad_norm": 3.25,
      "learning_rate": 4.8722711589834656e-05,
      "loss": 0.8999,
      "step": 105280
    },
    {
      "epoch": 0.3690152001037406,
      "grad_norm": 3.328125,
      "learning_rate": 4.872206256117096e-05,
      "loss": 0.9519,
      "step": 105290
    },
    {
      "epoch": 0.3690502476106362,
      "grad_norm": 2.953125,
      "learning_rate": 4.872141353250725e-05,
      "loss": 0.8994,
      "step": 105300
    },
    {
      "epoch": 0.3690852951175318,
      "grad_norm": 2.96875,
      "learning_rate": 4.872076450384355e-05,
      "loss": 0.9414,
      "step": 105310
    },
    {
      "epoch": 0.3691203426244274,
      "grad_norm": 2.59375,
      "learning_rate": 4.872011547517985e-05,
      "loss": 0.9284,
      "step": 105320
    },
    {
      "epoch": 0.36915539013132304,
      "grad_norm": 3.53125,
      "learning_rate": 4.871946644651615e-05,
      "loss": 1.0276,
      "step": 105330
    },
    {
      "epoch": 0.3691904376382186,
      "grad_norm": 2.828125,
      "learning_rate": 4.8718817417852444e-05,
      "loss": 0.9075,
      "step": 105340
    },
    {
      "epoch": 0.3692254851451142,
      "grad_norm": 2.453125,
      "learning_rate": 4.871816838918874e-05,
      "loss": 0.8973,
      "step": 105350
    },
    {
      "epoch": 0.3692605326520098,
      "grad_norm": 3.015625,
      "learning_rate": 4.871751936052504e-05,
      "loss": 0.9687,
      "step": 105360
    },
    {
      "epoch": 0.3692955801589054,
      "grad_norm": 3.03125,
      "learning_rate": 4.8716870331861335e-05,
      "loss": 1.055,
      "step": 105370
    },
    {
      "epoch": 0.369330627665801,
      "grad_norm": 3.015625,
      "learning_rate": 4.8716221303197636e-05,
      "loss": 1.0081,
      "step": 105380
    },
    {
      "epoch": 0.3693656751726966,
      "grad_norm": 2.9375,
      "learning_rate": 4.871557227453393e-05,
      "loss": 0.9542,
      "step": 105390
    },
    {
      "epoch": 0.3694007226795922,
      "grad_norm": 2.71875,
      "learning_rate": 4.871492324587023e-05,
      "loss": 0.9678,
      "step": 105400
    },
    {
      "epoch": 0.36943577018648777,
      "grad_norm": 2.859375,
      "learning_rate": 4.8714274217206527e-05,
      "loss": 0.9612,
      "step": 105410
    },
    {
      "epoch": 0.36947081769338336,
      "grad_norm": 3.0625,
      "learning_rate": 4.871362518854283e-05,
      "loss": 0.8972,
      "step": 105420
    },
    {
      "epoch": 0.369505865200279,
      "grad_norm": 3.4375,
      "learning_rate": 4.871297615987912e-05,
      "loss": 0.9706,
      "step": 105430
    },
    {
      "epoch": 0.3695409127071746,
      "grad_norm": 3.390625,
      "learning_rate": 4.8712327131215424e-05,
      "loss": 0.9782,
      "step": 105440
    },
    {
      "epoch": 0.3695759602140702,
      "grad_norm": 3.046875,
      "learning_rate": 4.8711678102551725e-05,
      "loss": 0.9058,
      "step": 105450
    },
    {
      "epoch": 0.3696110077209658,
      "grad_norm": 4.125,
      "learning_rate": 4.871102907388802e-05,
      "loss": 1.04,
      "step": 105460
    },
    {
      "epoch": 0.36964605522786137,
      "grad_norm": 3.1875,
      "learning_rate": 4.871038004522432e-05,
      "loss": 0.988,
      "step": 105470
    },
    {
      "epoch": 0.36968110273475696,
      "grad_norm": 2.875,
      "learning_rate": 4.8709731016560616e-05,
      "loss": 0.9667,
      "step": 105480
    },
    {
      "epoch": 0.36971615024165255,
      "grad_norm": 3.171875,
      "learning_rate": 4.870908198789692e-05,
      "loss": 1.0147,
      "step": 105490
    },
    {
      "epoch": 0.36975119774854814,
      "grad_norm": 3.484375,
      "learning_rate": 4.870843295923321e-05,
      "loss": 0.9601,
      "step": 105500
    },
    {
      "epoch": 0.36978624525544374,
      "grad_norm": 2.859375,
      "learning_rate": 4.870778393056951e-05,
      "loss": 0.95,
      "step": 105510
    },
    {
      "epoch": 0.3698212927623393,
      "grad_norm": 2.96875,
      "learning_rate": 4.870713490190581e-05,
      "loss": 0.9431,
      "step": 105520
    },
    {
      "epoch": 0.369856340269235,
      "grad_norm": 3.71875,
      "learning_rate": 4.870648587324211e-05,
      "loss": 0.9259,
      "step": 105530
    },
    {
      "epoch": 0.36989138777613056,
      "grad_norm": 2.921875,
      "learning_rate": 4.8705836844578404e-05,
      "loss": 0.9167,
      "step": 105540
    },
    {
      "epoch": 0.36992643528302616,
      "grad_norm": 3.078125,
      "learning_rate": 4.8705187815914705e-05,
      "loss": 0.9124,
      "step": 105550
    },
    {
      "epoch": 0.36996148278992175,
      "grad_norm": 2.5625,
      "learning_rate": 4.8704538787251e-05,
      "loss": 0.8985,
      "step": 105560
    },
    {
      "epoch": 0.36999653029681734,
      "grad_norm": 3.015625,
      "learning_rate": 4.87038897585873e-05,
      "loss": 1.0186,
      "step": 105570
    },
    {
      "epoch": 0.37003157780371293,
      "grad_norm": 2.625,
      "learning_rate": 4.87032407299236e-05,
      "loss": 0.9858,
      "step": 105580
    },
    {
      "epoch": 0.3700666253106085,
      "grad_norm": 2.671875,
      "learning_rate": 4.87025917012599e-05,
      "loss": 0.8901,
      "step": 105590
    },
    {
      "epoch": 0.3701016728175041,
      "grad_norm": 3.03125,
      "learning_rate": 4.87019426725962e-05,
      "loss": 1.0213,
      "step": 105600
    },
    {
      "epoch": 0.3701367203243997,
      "grad_norm": 3.53125,
      "learning_rate": 4.870129364393249e-05,
      "loss": 1.0067,
      "step": 105610
    },
    {
      "epoch": 0.3701717678312953,
      "grad_norm": 3.125,
      "learning_rate": 4.8700644615268795e-05,
      "loss": 0.9394,
      "step": 105620
    },
    {
      "epoch": 0.37020681533819094,
      "grad_norm": 2.8125,
      "learning_rate": 4.869999558660509e-05,
      "loss": 0.9336,
      "step": 105630
    },
    {
      "epoch": 0.37024186284508653,
      "grad_norm": 3.15625,
      "learning_rate": 4.869934655794139e-05,
      "loss": 1.0529,
      "step": 105640
    },
    {
      "epoch": 0.3702769103519821,
      "grad_norm": 3.109375,
      "learning_rate": 4.8698697529277685e-05,
      "loss": 0.9892,
      "step": 105650
    },
    {
      "epoch": 0.3703119578588777,
      "grad_norm": 2.921875,
      "learning_rate": 4.869804850061399e-05,
      "loss": 0.8734,
      "step": 105660
    },
    {
      "epoch": 0.3703470053657733,
      "grad_norm": 2.984375,
      "learning_rate": 4.869739947195028e-05,
      "loss": 1.0149,
      "step": 105670
    },
    {
      "epoch": 0.3703820528726689,
      "grad_norm": 3.125,
      "learning_rate": 4.869675044328658e-05,
      "loss": 0.9808,
      "step": 105680
    },
    {
      "epoch": 0.3704171003795645,
      "grad_norm": 2.84375,
      "learning_rate": 4.869610141462288e-05,
      "loss": 0.9364,
      "step": 105690
    },
    {
      "epoch": 0.3704521478864601,
      "grad_norm": 2.953125,
      "learning_rate": 4.869545238595918e-05,
      "loss": 0.9622,
      "step": 105700
    },
    {
      "epoch": 0.3704871953933557,
      "grad_norm": 3.03125,
      "learning_rate": 4.869480335729547e-05,
      "loss": 0.937,
      "step": 105710
    },
    {
      "epoch": 0.37052224290025126,
      "grad_norm": 3.296875,
      "learning_rate": 4.8694154328631775e-05,
      "loss": 1.0088,
      "step": 105720
    },
    {
      "epoch": 0.3705572904071469,
      "grad_norm": 3.546875,
      "learning_rate": 4.869350529996807e-05,
      "loss": 0.9533,
      "step": 105730
    },
    {
      "epoch": 0.3705923379140425,
      "grad_norm": 3.1875,
      "learning_rate": 4.8692856271304364e-05,
      "loss": 1.0263,
      "step": 105740
    },
    {
      "epoch": 0.3706273854209381,
      "grad_norm": 3.484375,
      "learning_rate": 4.8692207242640665e-05,
      "loss": 0.9511,
      "step": 105750
    },
    {
      "epoch": 0.3706624329278337,
      "grad_norm": 2.890625,
      "learning_rate": 4.869155821397696e-05,
      "loss": 1.0049,
      "step": 105760
    },
    {
      "epoch": 0.3706974804347293,
      "grad_norm": 3.25,
      "learning_rate": 4.869090918531326e-05,
      "loss": 0.9465,
      "step": 105770
    },
    {
      "epoch": 0.37073252794162487,
      "grad_norm": 2.84375,
      "learning_rate": 4.8690260156649556e-05,
      "loss": 1.024,
      "step": 105780
    },
    {
      "epoch": 0.37076757544852046,
      "grad_norm": 2.625,
      "learning_rate": 4.868961112798586e-05,
      "loss": 0.9883,
      "step": 105790
    },
    {
      "epoch": 0.37080262295541605,
      "grad_norm": 3.421875,
      "learning_rate": 4.868896209932215e-05,
      "loss": 1.0505,
      "step": 105800
    },
    {
      "epoch": 0.37083767046231164,
      "grad_norm": 3.078125,
      "learning_rate": 4.868831307065845e-05,
      "loss": 0.9907,
      "step": 105810
    },
    {
      "epoch": 0.37087271796920723,
      "grad_norm": 2.859375,
      "learning_rate": 4.8687664041994755e-05,
      "loss": 0.9124,
      "step": 105820
    },
    {
      "epoch": 0.3709077654761029,
      "grad_norm": 3.03125,
      "learning_rate": 4.868701501333105e-05,
      "loss": 0.9888,
      "step": 105830
    },
    {
      "epoch": 0.37094281298299847,
      "grad_norm": 3.09375,
      "learning_rate": 4.868636598466735e-05,
      "loss": 0.9669,
      "step": 105840
    },
    {
      "epoch": 0.37097786048989406,
      "grad_norm": 2.859375,
      "learning_rate": 4.8685716956003645e-05,
      "loss": 0.984,
      "step": 105850
    },
    {
      "epoch": 0.37101290799678965,
      "grad_norm": 2.578125,
      "learning_rate": 4.868506792733995e-05,
      "loss": 0.9443,
      "step": 105860
    },
    {
      "epoch": 0.37104795550368525,
      "grad_norm": 3.53125,
      "learning_rate": 4.868441889867624e-05,
      "loss": 0.9647,
      "step": 105870
    },
    {
      "epoch": 0.37108300301058084,
      "grad_norm": 3.234375,
      "learning_rate": 4.868376987001254e-05,
      "loss": 1.0306,
      "step": 105880
    },
    {
      "epoch": 0.3711180505174764,
      "grad_norm": 2.984375,
      "learning_rate": 4.868312084134884e-05,
      "loss": 1.0397,
      "step": 105890
    },
    {
      "epoch": 0.371153098024372,
      "grad_norm": 2.765625,
      "learning_rate": 4.868247181268514e-05,
      "loss": 1.0343,
      "step": 105900
    },
    {
      "epoch": 0.3711881455312676,
      "grad_norm": 2.78125,
      "learning_rate": 4.868182278402143e-05,
      "loss": 1.0065,
      "step": 105910
    },
    {
      "epoch": 0.37122319303816326,
      "grad_norm": 3.171875,
      "learning_rate": 4.8681173755357735e-05,
      "loss": 0.9476,
      "step": 105920
    },
    {
      "epoch": 0.37125824054505885,
      "grad_norm": 2.890625,
      "learning_rate": 4.868052472669403e-05,
      "loss": 0.9146,
      "step": 105930
    },
    {
      "epoch": 0.37129328805195444,
      "grad_norm": 3.125,
      "learning_rate": 4.867987569803033e-05,
      "loss": 0.9433,
      "step": 105940
    },
    {
      "epoch": 0.37132833555885003,
      "grad_norm": 3.109375,
      "learning_rate": 4.867922666936663e-05,
      "loss": 0.9488,
      "step": 105950
    },
    {
      "epoch": 0.3713633830657456,
      "grad_norm": 3.0,
      "learning_rate": 4.867857764070293e-05,
      "loss": 0.9676,
      "step": 105960
    },
    {
      "epoch": 0.3713984305726412,
      "grad_norm": 3.03125,
      "learning_rate": 4.867792861203923e-05,
      "loss": 0.94,
      "step": 105970
    },
    {
      "epoch": 0.3714334780795368,
      "grad_norm": 3.015625,
      "learning_rate": 4.867727958337552e-05,
      "loss": 1.0216,
      "step": 105980
    },
    {
      "epoch": 0.3714685255864324,
      "grad_norm": 2.96875,
      "learning_rate": 4.8676630554711824e-05,
      "loss": 1.0136,
      "step": 105990
    },
    {
      "epoch": 0.371503573093328,
      "grad_norm": 2.984375,
      "learning_rate": 4.867598152604812e-05,
      "loss": 0.9337,
      "step": 106000
    },
    {
      "epoch": 0.3715386206002236,
      "grad_norm": 2.953125,
      "learning_rate": 4.867533249738442e-05,
      "loss": 0.8933,
      "step": 106010
    },
    {
      "epoch": 0.3715736681071192,
      "grad_norm": 2.65625,
      "learning_rate": 4.8674683468720715e-05,
      "loss": 0.9127,
      "step": 106020
    },
    {
      "epoch": 0.3716087156140148,
      "grad_norm": 3.09375,
      "learning_rate": 4.8674034440057016e-05,
      "loss": 0.9618,
      "step": 106030
    },
    {
      "epoch": 0.3716437631209104,
      "grad_norm": 2.65625,
      "learning_rate": 4.867338541139331e-05,
      "loss": 0.9916,
      "step": 106040
    },
    {
      "epoch": 0.371678810627806,
      "grad_norm": 2.703125,
      "learning_rate": 4.867273638272961e-05,
      "loss": 0.9366,
      "step": 106050
    },
    {
      "epoch": 0.3717138581347016,
      "grad_norm": 2.78125,
      "learning_rate": 4.867208735406591e-05,
      "loss": 0.8682,
      "step": 106060
    },
    {
      "epoch": 0.3717489056415972,
      "grad_norm": 3.03125,
      "learning_rate": 4.867143832540221e-05,
      "loss": 0.9513,
      "step": 106070
    },
    {
      "epoch": 0.3717839531484928,
      "grad_norm": 3.15625,
      "learning_rate": 4.86707892967385e-05,
      "loss": 0.9353,
      "step": 106080
    },
    {
      "epoch": 0.37181900065538837,
      "grad_norm": 2.859375,
      "learning_rate": 4.8670140268074804e-05,
      "loss": 0.9443,
      "step": 106090
    },
    {
      "epoch": 0.37185404816228396,
      "grad_norm": 2.875,
      "learning_rate": 4.8669491239411105e-05,
      "loss": 0.8305,
      "step": 106100
    },
    {
      "epoch": 0.37188909566917955,
      "grad_norm": 3.140625,
      "learning_rate": 4.866884221074739e-05,
      "loss": 0.9216,
      "step": 106110
    },
    {
      "epoch": 0.3719241431760752,
      "grad_norm": 3.25,
      "learning_rate": 4.8668193182083695e-05,
      "loss": 1.0224,
      "step": 106120
    },
    {
      "epoch": 0.3719591906829708,
      "grad_norm": 2.859375,
      "learning_rate": 4.866754415341999e-05,
      "loss": 0.963,
      "step": 106130
    },
    {
      "epoch": 0.3719942381898664,
      "grad_norm": 3.390625,
      "learning_rate": 4.866689512475629e-05,
      "loss": 1.053,
      "step": 106140
    },
    {
      "epoch": 0.37202928569676197,
      "grad_norm": 2.828125,
      "learning_rate": 4.8666246096092585e-05,
      "loss": 0.9795,
      "step": 106150
    },
    {
      "epoch": 0.37206433320365756,
      "grad_norm": 3.09375,
      "learning_rate": 4.866559706742889e-05,
      "loss": 1.0353,
      "step": 106160
    },
    {
      "epoch": 0.37209938071055315,
      "grad_norm": 3.4375,
      "learning_rate": 4.866494803876518e-05,
      "loss": 0.8968,
      "step": 106170
    },
    {
      "epoch": 0.37213442821744874,
      "grad_norm": 2.984375,
      "learning_rate": 4.866429901010148e-05,
      "loss": 0.9471,
      "step": 106180
    },
    {
      "epoch": 0.37216947572434433,
      "grad_norm": 2.8125,
      "learning_rate": 4.8663649981437784e-05,
      "loss": 0.9356,
      "step": 106190
    },
    {
      "epoch": 0.3722045232312399,
      "grad_norm": 3.515625,
      "learning_rate": 4.866300095277408e-05,
      "loss": 0.8779,
      "step": 106200
    },
    {
      "epoch": 0.3722395707381355,
      "grad_norm": 2.984375,
      "learning_rate": 4.866235192411038e-05,
      "loss": 1.0005,
      "step": 106210
    },
    {
      "epoch": 0.37227461824503116,
      "grad_norm": 2.640625,
      "learning_rate": 4.8661702895446675e-05,
      "loss": 0.9423,
      "step": 106220
    },
    {
      "epoch": 0.37230966575192675,
      "grad_norm": 3.09375,
      "learning_rate": 4.8661053866782976e-05,
      "loss": 1.0364,
      "step": 106230
    },
    {
      "epoch": 0.37234471325882235,
      "grad_norm": 3.15625,
      "learning_rate": 4.866040483811927e-05,
      "loss": 0.9695,
      "step": 106240
    },
    {
      "epoch": 0.37237976076571794,
      "grad_norm": 3.375,
      "learning_rate": 4.865975580945557e-05,
      "loss": 0.9366,
      "step": 106250
    },
    {
      "epoch": 0.37241480827261353,
      "grad_norm": 2.96875,
      "learning_rate": 4.865910678079187e-05,
      "loss": 0.9804,
      "step": 106260
    },
    {
      "epoch": 0.3724498557795091,
      "grad_norm": 2.984375,
      "learning_rate": 4.865845775212817e-05,
      "loss": 0.9749,
      "step": 106270
    },
    {
      "epoch": 0.3724849032864047,
      "grad_norm": 2.59375,
      "learning_rate": 4.865780872346446e-05,
      "loss": 0.9225,
      "step": 106280
    },
    {
      "epoch": 0.3725199507933003,
      "grad_norm": 3.5,
      "learning_rate": 4.8657159694800764e-05,
      "loss": 0.9715,
      "step": 106290
    },
    {
      "epoch": 0.3725549983001959,
      "grad_norm": 3.390625,
      "learning_rate": 4.865651066613706e-05,
      "loss": 0.9505,
      "step": 106300
    },
    {
      "epoch": 0.3725900458070915,
      "grad_norm": 3.125,
      "learning_rate": 4.865586163747336e-05,
      "loss": 0.9712,
      "step": 106310
    },
    {
      "epoch": 0.37262509331398713,
      "grad_norm": 2.78125,
      "learning_rate": 4.865521260880966e-05,
      "loss": 0.9356,
      "step": 106320
    },
    {
      "epoch": 0.3726601408208827,
      "grad_norm": 3.40625,
      "learning_rate": 4.8654563580145956e-05,
      "loss": 0.9989,
      "step": 106330
    },
    {
      "epoch": 0.3726951883277783,
      "grad_norm": 2.796875,
      "learning_rate": 4.865391455148226e-05,
      "loss": 0.9261,
      "step": 106340
    },
    {
      "epoch": 0.3727302358346739,
      "grad_norm": 3.296875,
      "learning_rate": 4.865326552281855e-05,
      "loss": 0.9204,
      "step": 106350
    },
    {
      "epoch": 0.3727652833415695,
      "grad_norm": 2.9375,
      "learning_rate": 4.8652616494154853e-05,
      "loss": 0.9069,
      "step": 106360
    },
    {
      "epoch": 0.3728003308484651,
      "grad_norm": 2.84375,
      "learning_rate": 4.865196746549115e-05,
      "loss": 1.0263,
      "step": 106370
    },
    {
      "epoch": 0.3728353783553607,
      "grad_norm": 3.140625,
      "learning_rate": 4.865131843682745e-05,
      "loss": 0.9643,
      "step": 106380
    },
    {
      "epoch": 0.37287042586225627,
      "grad_norm": 3.28125,
      "learning_rate": 4.8650669408163744e-05,
      "loss": 0.9843,
      "step": 106390
    },
    {
      "epoch": 0.37290547336915186,
      "grad_norm": 2.4375,
      "learning_rate": 4.8650020379500045e-05,
      "loss": 0.9754,
      "step": 106400
    },
    {
      "epoch": 0.3729405208760475,
      "grad_norm": 2.578125,
      "learning_rate": 4.864937135083634e-05,
      "loss": 0.9585,
      "step": 106410
    },
    {
      "epoch": 0.3729755683829431,
      "grad_norm": 2.84375,
      "learning_rate": 4.864872232217264e-05,
      "loss": 1.0194,
      "step": 106420
    },
    {
      "epoch": 0.3730106158898387,
      "grad_norm": 3.109375,
      "learning_rate": 4.8648073293508936e-05,
      "loss": 0.9785,
      "step": 106430
    },
    {
      "epoch": 0.3730456633967343,
      "grad_norm": 3.0,
      "learning_rate": 4.864742426484524e-05,
      "loss": 0.9299,
      "step": 106440
    },
    {
      "epoch": 0.3730807109036299,
      "grad_norm": 3.296875,
      "learning_rate": 4.864677523618153e-05,
      "loss": 0.9707,
      "step": 106450
    },
    {
      "epoch": 0.37311575841052547,
      "grad_norm": 3.125,
      "learning_rate": 4.8646126207517833e-05,
      "loss": 1.0113,
      "step": 106460
    },
    {
      "epoch": 0.37315080591742106,
      "grad_norm": 2.875,
      "learning_rate": 4.8645477178854135e-05,
      "loss": 0.9315,
      "step": 106470
    },
    {
      "epoch": 0.37318585342431665,
      "grad_norm": 3.046875,
      "learning_rate": 4.864482815019042e-05,
      "loss": 1.0198,
      "step": 106480
    },
    {
      "epoch": 0.37322090093121224,
      "grad_norm": 3.09375,
      "learning_rate": 4.8644179121526724e-05,
      "loss": 1.0022,
      "step": 106490
    },
    {
      "epoch": 0.37325594843810783,
      "grad_norm": 3.15625,
      "learning_rate": 4.864353009286302e-05,
      "loss": 0.9499,
      "step": 106500
    },
    {
      "epoch": 0.3732909959450035,
      "grad_norm": 3.140625,
      "learning_rate": 4.864288106419932e-05,
      "loss": 0.9303,
      "step": 106510
    },
    {
      "epoch": 0.37332604345189907,
      "grad_norm": 3.46875,
      "learning_rate": 4.8642232035535615e-05,
      "loss": 1.0172,
      "step": 106520
    },
    {
      "epoch": 0.37336109095879466,
      "grad_norm": 2.828125,
      "learning_rate": 4.8641583006871916e-05,
      "loss": 0.9844,
      "step": 106530
    },
    {
      "epoch": 0.37339613846569025,
      "grad_norm": 3.078125,
      "learning_rate": 4.864093397820821e-05,
      "loss": 0.9477,
      "step": 106540
    },
    {
      "epoch": 0.37343118597258584,
      "grad_norm": 2.90625,
      "learning_rate": 4.864028494954451e-05,
      "loss": 0.9745,
      "step": 106550
    },
    {
      "epoch": 0.37346623347948144,
      "grad_norm": 2.59375,
      "learning_rate": 4.8639635920880813e-05,
      "loss": 0.9526,
      "step": 106560
    },
    {
      "epoch": 0.373501280986377,
      "grad_norm": 3.25,
      "learning_rate": 4.863898689221711e-05,
      "loss": 0.973,
      "step": 106570
    },
    {
      "epoch": 0.3735363284932726,
      "grad_norm": 3.28125,
      "learning_rate": 4.863833786355341e-05,
      "loss": 0.95,
      "step": 106580
    },
    {
      "epoch": 0.3735713760001682,
      "grad_norm": 3.015625,
      "learning_rate": 4.8637688834889704e-05,
      "loss": 0.971,
      "step": 106590
    },
    {
      "epoch": 0.3736064235070638,
      "grad_norm": 2.53125,
      "learning_rate": 4.8637039806226005e-05,
      "loss": 0.9719,
      "step": 106600
    },
    {
      "epoch": 0.37364147101395945,
      "grad_norm": 3.25,
      "learning_rate": 4.86363907775623e-05,
      "loss": 0.8949,
      "step": 106610
    },
    {
      "epoch": 0.37367651852085504,
      "grad_norm": 3.125,
      "learning_rate": 4.86357417488986e-05,
      "loss": 0.8884,
      "step": 106620
    },
    {
      "epoch": 0.37371156602775063,
      "grad_norm": 2.828125,
      "learning_rate": 4.8635092720234896e-05,
      "loss": 1.0643,
      "step": 106630
    },
    {
      "epoch": 0.3737466135346462,
      "grad_norm": 3.109375,
      "learning_rate": 4.86344436915712e-05,
      "loss": 1.0083,
      "step": 106640
    },
    {
      "epoch": 0.3737816610415418,
      "grad_norm": 2.671875,
      "learning_rate": 4.863379466290749e-05,
      "loss": 0.9441,
      "step": 106650
    },
    {
      "epoch": 0.3738167085484374,
      "grad_norm": 2.53125,
      "learning_rate": 4.8633145634243793e-05,
      "loss": 0.9328,
      "step": 106660
    },
    {
      "epoch": 0.373851756055333,
      "grad_norm": 2.796875,
      "learning_rate": 4.863249660558009e-05,
      "loss": 1.0377,
      "step": 106670
    },
    {
      "epoch": 0.3738868035622286,
      "grad_norm": 3.359375,
      "learning_rate": 4.863184757691639e-05,
      "loss": 0.8481,
      "step": 106680
    },
    {
      "epoch": 0.3739218510691242,
      "grad_norm": 3.078125,
      "learning_rate": 4.863119854825269e-05,
      "loss": 0.9226,
      "step": 106690
    },
    {
      "epoch": 0.37395689857601977,
      "grad_norm": 3.015625,
      "learning_rate": 4.8630549519588985e-05,
      "loss": 0.9825,
      "step": 106700
    },
    {
      "epoch": 0.3739919460829154,
      "grad_norm": 3.078125,
      "learning_rate": 4.862990049092529e-05,
      "loss": 0.9119,
      "step": 106710
    },
    {
      "epoch": 0.374026993589811,
      "grad_norm": 3.03125,
      "learning_rate": 4.862925146226158e-05,
      "loss": 1.0847,
      "step": 106720
    },
    {
      "epoch": 0.3740620410967066,
      "grad_norm": 2.828125,
      "learning_rate": 4.862860243359788e-05,
      "loss": 0.9357,
      "step": 106730
    },
    {
      "epoch": 0.3740970886036022,
      "grad_norm": 2.875,
      "learning_rate": 4.862795340493418e-05,
      "loss": 1.0159,
      "step": 106740
    },
    {
      "epoch": 0.3741321361104978,
      "grad_norm": 3.078125,
      "learning_rate": 4.862730437627048e-05,
      "loss": 0.9304,
      "step": 106750
    },
    {
      "epoch": 0.3741671836173934,
      "grad_norm": 3.21875,
      "learning_rate": 4.8626655347606773e-05,
      "loss": 1.0072,
      "step": 106760
    },
    {
      "epoch": 0.37420223112428896,
      "grad_norm": 3.140625,
      "learning_rate": 4.8626006318943075e-05,
      "loss": 1.0013,
      "step": 106770
    },
    {
      "epoch": 0.37423727863118456,
      "grad_norm": 3.203125,
      "learning_rate": 4.862535729027937e-05,
      "loss": 0.9726,
      "step": 106780
    },
    {
      "epoch": 0.37427232613808015,
      "grad_norm": 2.515625,
      "learning_rate": 4.862470826161567e-05,
      "loss": 0.965,
      "step": 106790
    },
    {
      "epoch": 0.37430737364497574,
      "grad_norm": 2.9375,
      "learning_rate": 4.8624059232951965e-05,
      "loss": 0.959,
      "step": 106800
    },
    {
      "epoch": 0.3743424211518714,
      "grad_norm": 2.625,
      "learning_rate": 4.862341020428827e-05,
      "loss": 1.01,
      "step": 106810
    },
    {
      "epoch": 0.374377468658767,
      "grad_norm": 3.21875,
      "learning_rate": 4.862276117562457e-05,
      "loss": 1.0143,
      "step": 106820
    },
    {
      "epoch": 0.37441251616566257,
      "grad_norm": 2.59375,
      "learning_rate": 4.862211214696086e-05,
      "loss": 0.9339,
      "step": 106830
    },
    {
      "epoch": 0.37444756367255816,
      "grad_norm": 3.171875,
      "learning_rate": 4.8621463118297164e-05,
      "loss": 0.9488,
      "step": 106840
    },
    {
      "epoch": 0.37448261117945375,
      "grad_norm": 2.875,
      "learning_rate": 4.862081408963346e-05,
      "loss": 0.9507,
      "step": 106850
    },
    {
      "epoch": 0.37451765868634934,
      "grad_norm": 2.953125,
      "learning_rate": 4.8620165060969753e-05,
      "loss": 0.9468,
      "step": 106860
    },
    {
      "epoch": 0.37455270619324493,
      "grad_norm": 3.171875,
      "learning_rate": 4.861951603230605e-05,
      "loss": 1.0179,
      "step": 106870
    },
    {
      "epoch": 0.3745877537001405,
      "grad_norm": 2.9375,
      "learning_rate": 4.861886700364235e-05,
      "loss": 0.976,
      "step": 106880
    },
    {
      "epoch": 0.3746228012070361,
      "grad_norm": 2.828125,
      "learning_rate": 4.8618217974978644e-05,
      "loss": 1.0067,
      "step": 106890
    },
    {
      "epoch": 0.3746578487139317,
      "grad_norm": 3.125,
      "learning_rate": 4.8617568946314945e-05,
      "loss": 0.9515,
      "step": 106900
    },
    {
      "epoch": 0.37469289622082735,
      "grad_norm": 3.328125,
      "learning_rate": 4.861691991765125e-05,
      "loss": 0.9926,
      "step": 106910
    },
    {
      "epoch": 0.37472794372772295,
      "grad_norm": 2.96875,
      "learning_rate": 4.861627088898754e-05,
      "loss": 0.8826,
      "step": 106920
    },
    {
      "epoch": 0.37476299123461854,
      "grad_norm": 2.40625,
      "learning_rate": 4.861562186032384e-05,
      "loss": 0.9122,
      "step": 106930
    },
    {
      "epoch": 0.37479803874151413,
      "grad_norm": 3.53125,
      "learning_rate": 4.861497283166014e-05,
      "loss": 1.0234,
      "step": 106940
    },
    {
      "epoch": 0.3748330862484097,
      "grad_norm": 18.125,
      "learning_rate": 4.861432380299644e-05,
      "loss": 0.9405,
      "step": 106950
    },
    {
      "epoch": 0.3748681337553053,
      "grad_norm": 3.234375,
      "learning_rate": 4.8613674774332733e-05,
      "loss": 0.944,
      "step": 106960
    },
    {
      "epoch": 0.3749031812622009,
      "grad_norm": 3.59375,
      "learning_rate": 4.8613025745669035e-05,
      "loss": 0.877,
      "step": 106970
    },
    {
      "epoch": 0.3749382287690965,
      "grad_norm": 3.125,
      "learning_rate": 4.861237671700533e-05,
      "loss": 0.9148,
      "step": 106980
    },
    {
      "epoch": 0.3749732762759921,
      "grad_norm": 3.15625,
      "learning_rate": 4.861172768834163e-05,
      "loss": 0.8968,
      "step": 106990
    },
    {
      "epoch": 0.37500832378288773,
      "grad_norm": 2.84375,
      "learning_rate": 4.8611078659677925e-05,
      "loss": 0.9126,
      "step": 107000
    },
    {
      "epoch": 0.3750433712897833,
      "grad_norm": 3.328125,
      "learning_rate": 4.861042963101423e-05,
      "loss": 0.9382,
      "step": 107010
    },
    {
      "epoch": 0.3750784187966789,
      "grad_norm": 3.03125,
      "learning_rate": 4.860978060235052e-05,
      "loss": 0.823,
      "step": 107020
    },
    {
      "epoch": 0.3751134663035745,
      "grad_norm": 3.171875,
      "learning_rate": 4.860913157368682e-05,
      "loss": 0.9538,
      "step": 107030
    },
    {
      "epoch": 0.3751485138104701,
      "grad_norm": 3.03125,
      "learning_rate": 4.860848254502312e-05,
      "loss": 0.9383,
      "step": 107040
    },
    {
      "epoch": 0.3751835613173657,
      "grad_norm": 2.828125,
      "learning_rate": 4.860783351635942e-05,
      "loss": 1.0956,
      "step": 107050
    },
    {
      "epoch": 0.3752186088242613,
      "grad_norm": 2.921875,
      "learning_rate": 4.860718448769572e-05,
      "loss": 0.9895,
      "step": 107060
    },
    {
      "epoch": 0.37525365633115687,
      "grad_norm": 2.921875,
      "learning_rate": 4.8606535459032015e-05,
      "loss": 0.9686,
      "step": 107070
    },
    {
      "epoch": 0.37528870383805246,
      "grad_norm": 3.3125,
      "learning_rate": 4.8605886430368316e-05,
      "loss": 0.9493,
      "step": 107080
    },
    {
      "epoch": 0.37532375134494805,
      "grad_norm": 3.5,
      "learning_rate": 4.860523740170461e-05,
      "loss": 0.9851,
      "step": 107090
    },
    {
      "epoch": 0.3753587988518437,
      "grad_norm": 2.703125,
      "learning_rate": 4.860458837304091e-05,
      "loss": 1.0016,
      "step": 107100
    },
    {
      "epoch": 0.3753938463587393,
      "grad_norm": 3.359375,
      "learning_rate": 4.860393934437721e-05,
      "loss": 0.9869,
      "step": 107110
    },
    {
      "epoch": 0.3754288938656349,
      "grad_norm": 2.703125,
      "learning_rate": 4.860329031571351e-05,
      "loss": 0.8665,
      "step": 107120
    },
    {
      "epoch": 0.3754639413725305,
      "grad_norm": 3.25,
      "learning_rate": 4.86026412870498e-05,
      "loss": 1.006,
      "step": 107130
    },
    {
      "epoch": 0.37549898887942607,
      "grad_norm": 2.828125,
      "learning_rate": 4.8601992258386104e-05,
      "loss": 0.9931,
      "step": 107140
    },
    {
      "epoch": 0.37553403638632166,
      "grad_norm": 2.78125,
      "learning_rate": 4.86013432297224e-05,
      "loss": 0.8797,
      "step": 107150
    },
    {
      "epoch": 0.37556908389321725,
      "grad_norm": 3.5625,
      "learning_rate": 4.86006942010587e-05,
      "loss": 0.8695,
      "step": 107160
    },
    {
      "epoch": 0.37560413140011284,
      "grad_norm": 2.8125,
      "learning_rate": 4.8600045172394995e-05,
      "loss": 0.9371,
      "step": 107170
    },
    {
      "epoch": 0.37563917890700843,
      "grad_norm": 3.046875,
      "learning_rate": 4.8599396143731296e-05,
      "loss": 0.9445,
      "step": 107180
    },
    {
      "epoch": 0.375674226413904,
      "grad_norm": 2.953125,
      "learning_rate": 4.85987471150676e-05,
      "loss": 0.9294,
      "step": 107190
    },
    {
      "epoch": 0.37570927392079967,
      "grad_norm": 3.09375,
      "learning_rate": 4.859809808640389e-05,
      "loss": 1.0223,
      "step": 107200
    },
    {
      "epoch": 0.37574432142769526,
      "grad_norm": 3.421875,
      "learning_rate": 4.8597449057740194e-05,
      "loss": 0.9622,
      "step": 107210
    },
    {
      "epoch": 0.37577936893459085,
      "grad_norm": 2.71875,
      "learning_rate": 4.859680002907649e-05,
      "loss": 0.8841,
      "step": 107220
    },
    {
      "epoch": 0.37581441644148644,
      "grad_norm": 3.3125,
      "learning_rate": 4.859615100041278e-05,
      "loss": 0.9718,
      "step": 107230
    },
    {
      "epoch": 0.37584946394838203,
      "grad_norm": 2.75,
      "learning_rate": 4.859550197174908e-05,
      "loss": 1.0382,
      "step": 107240
    },
    {
      "epoch": 0.3758845114552776,
      "grad_norm": 2.9375,
      "learning_rate": 4.859485294308538e-05,
      "loss": 1.018,
      "step": 107250
    },
    {
      "epoch": 0.3759195589621732,
      "grad_norm": 3.171875,
      "learning_rate": 4.8594203914421673e-05,
      "loss": 1.0468,
      "step": 107260
    },
    {
      "epoch": 0.3759546064690688,
      "grad_norm": 2.84375,
      "learning_rate": 4.8593554885757975e-05,
      "loss": 0.8806,
      "step": 107270
    },
    {
      "epoch": 0.3759896539759644,
      "grad_norm": 3.78125,
      "learning_rate": 4.8592905857094276e-05,
      "loss": 1.0001,
      "step": 107280
    },
    {
      "epoch": 0.37602470148286,
      "grad_norm": 2.90625,
      "learning_rate": 4.859225682843057e-05,
      "loss": 0.9781,
      "step": 107290
    },
    {
      "epoch": 0.37605974898975564,
      "grad_norm": 3.078125,
      "learning_rate": 4.859160779976687e-05,
      "loss": 1.011,
      "step": 107300
    },
    {
      "epoch": 0.37609479649665123,
      "grad_norm": 3.015625,
      "learning_rate": 4.859095877110317e-05,
      "loss": 0.953,
      "step": 107310
    },
    {
      "epoch": 0.3761298440035468,
      "grad_norm": 3.203125,
      "learning_rate": 4.859030974243947e-05,
      "loss": 0.9886,
      "step": 107320
    },
    {
      "epoch": 0.3761648915104424,
      "grad_norm": 3.25,
      "learning_rate": 4.858966071377576e-05,
      "loss": 0.9177,
      "step": 107330
    },
    {
      "epoch": 0.376199939017338,
      "grad_norm": 3.359375,
      "learning_rate": 4.8589011685112064e-05,
      "loss": 0.9159,
      "step": 107340
    },
    {
      "epoch": 0.3762349865242336,
      "grad_norm": 2.859375,
      "learning_rate": 4.858836265644836e-05,
      "loss": 0.9316,
      "step": 107350
    },
    {
      "epoch": 0.3762700340311292,
      "grad_norm": 2.828125,
      "learning_rate": 4.858771362778466e-05,
      "loss": 1.0094,
      "step": 107360
    },
    {
      "epoch": 0.3763050815380248,
      "grad_norm": 3.078125,
      "learning_rate": 4.8587064599120955e-05,
      "loss": 0.9355,
      "step": 107370
    },
    {
      "epoch": 0.37634012904492037,
      "grad_norm": 2.796875,
      "learning_rate": 4.8586415570457256e-05,
      "loss": 0.8879,
      "step": 107380
    },
    {
      "epoch": 0.37637517655181596,
      "grad_norm": 2.796875,
      "learning_rate": 4.858576654179355e-05,
      "loss": 0.9932,
      "step": 107390
    },
    {
      "epoch": 0.3764102240587116,
      "grad_norm": 2.6875,
      "learning_rate": 4.858511751312985e-05,
      "loss": 0.8578,
      "step": 107400
    },
    {
      "epoch": 0.3764452715656072,
      "grad_norm": 3.0,
      "learning_rate": 4.858446848446615e-05,
      "loss": 0.8879,
      "step": 107410
    },
    {
      "epoch": 0.3764803190725028,
      "grad_norm": 2.890625,
      "learning_rate": 4.858381945580245e-05,
      "loss": 1.0346,
      "step": 107420
    },
    {
      "epoch": 0.3765153665793984,
      "grad_norm": 3.0,
      "learning_rate": 4.858317042713875e-05,
      "loss": 1.0024,
      "step": 107430
    },
    {
      "epoch": 0.37655041408629397,
      "grad_norm": 3.234375,
      "learning_rate": 4.8582521398475044e-05,
      "loss": 1.0634,
      "step": 107440
    },
    {
      "epoch": 0.37658546159318956,
      "grad_norm": 3.28125,
      "learning_rate": 4.8581872369811346e-05,
      "loss": 1.0206,
      "step": 107450
    },
    {
      "epoch": 0.37662050910008515,
      "grad_norm": 2.9375,
      "learning_rate": 4.858122334114764e-05,
      "loss": 0.9772,
      "step": 107460
    },
    {
      "epoch": 0.37665555660698075,
      "grad_norm": 3.640625,
      "learning_rate": 4.858057431248394e-05,
      "loss": 0.9546,
      "step": 107470
    },
    {
      "epoch": 0.37669060411387634,
      "grad_norm": 3.53125,
      "learning_rate": 4.8579925283820236e-05,
      "loss": 0.9855,
      "step": 107480
    },
    {
      "epoch": 0.376725651620772,
      "grad_norm": 3.0625,
      "learning_rate": 4.857927625515654e-05,
      "loss": 0.8705,
      "step": 107490
    },
    {
      "epoch": 0.3767606991276676,
      "grad_norm": 2.859375,
      "learning_rate": 4.857862722649283e-05,
      "loss": 0.9386,
      "step": 107500
    },
    {
      "epoch": 0.37679574663456317,
      "grad_norm": 3.171875,
      "learning_rate": 4.8577978197829134e-05,
      "loss": 0.9298,
      "step": 107510
    },
    {
      "epoch": 0.37683079414145876,
      "grad_norm": 2.984375,
      "learning_rate": 4.857732916916543e-05,
      "loss": 0.946,
      "step": 107520
    },
    {
      "epoch": 0.37686584164835435,
      "grad_norm": 2.8125,
      "learning_rate": 4.857668014050173e-05,
      "loss": 0.9768,
      "step": 107530
    },
    {
      "epoch": 0.37690088915524994,
      "grad_norm": 3.34375,
      "learning_rate": 4.8576031111838024e-05,
      "loss": 0.94,
      "step": 107540
    },
    {
      "epoch": 0.37693593666214553,
      "grad_norm": 2.84375,
      "learning_rate": 4.8575382083174326e-05,
      "loss": 1.0068,
      "step": 107550
    },
    {
      "epoch": 0.3769709841690411,
      "grad_norm": 3.21875,
      "learning_rate": 4.857473305451063e-05,
      "loss": 1.0142,
      "step": 107560
    },
    {
      "epoch": 0.3770060316759367,
      "grad_norm": 2.546875,
      "learning_rate": 4.857408402584692e-05,
      "loss": 0.9317,
      "step": 107570
    },
    {
      "epoch": 0.3770410791828323,
      "grad_norm": 3.265625,
      "learning_rate": 4.857343499718322e-05,
      "loss": 0.9729,
      "step": 107580
    },
    {
      "epoch": 0.37707612668972795,
      "grad_norm": 3.25,
      "learning_rate": 4.857278596851952e-05,
      "loss": 0.9747,
      "step": 107590
    },
    {
      "epoch": 0.37711117419662354,
      "grad_norm": 3.25,
      "learning_rate": 4.857213693985582e-05,
      "loss": 0.9014,
      "step": 107600
    },
    {
      "epoch": 0.37714622170351914,
      "grad_norm": 3.421875,
      "learning_rate": 4.857148791119211e-05,
      "loss": 0.909,
      "step": 107610
    },
    {
      "epoch": 0.3771812692104147,
      "grad_norm": 2.625,
      "learning_rate": 4.857083888252841e-05,
      "loss": 0.9017,
      "step": 107620
    },
    {
      "epoch": 0.3772163167173103,
      "grad_norm": 3.296875,
      "learning_rate": 4.85701898538647e-05,
      "loss": 1.0006,
      "step": 107630
    },
    {
      "epoch": 0.3772513642242059,
      "grad_norm": 3.015625,
      "learning_rate": 4.8569540825201004e-05,
      "loss": 0.8962,
      "step": 107640
    },
    {
      "epoch": 0.3772864117311015,
      "grad_norm": 3.015625,
      "learning_rate": 4.8568891796537306e-05,
      "loss": 1.0088,
      "step": 107650
    },
    {
      "epoch": 0.3773214592379971,
      "grad_norm": 3.40625,
      "learning_rate": 4.85682427678736e-05,
      "loss": 0.9656,
      "step": 107660
    },
    {
      "epoch": 0.3773565067448927,
      "grad_norm": 3.0,
      "learning_rate": 4.85675937392099e-05,
      "loss": 0.9903,
      "step": 107670
    },
    {
      "epoch": 0.3773915542517883,
      "grad_norm": 3.046875,
      "learning_rate": 4.8566944710546196e-05,
      "loss": 0.9904,
      "step": 107680
    },
    {
      "epoch": 0.3774266017586839,
      "grad_norm": 3.1875,
      "learning_rate": 4.85662956818825e-05,
      "loss": 1.0032,
      "step": 107690
    },
    {
      "epoch": 0.3774616492655795,
      "grad_norm": 3.125,
      "learning_rate": 4.856564665321879e-05,
      "loss": 0.9686,
      "step": 107700
    },
    {
      "epoch": 0.3774966967724751,
      "grad_norm": 3.03125,
      "learning_rate": 4.8564997624555094e-05,
      "loss": 0.9802,
      "step": 107710
    },
    {
      "epoch": 0.3775317442793707,
      "grad_norm": 3.203125,
      "learning_rate": 4.856434859589139e-05,
      "loss": 0.9383,
      "step": 107720
    },
    {
      "epoch": 0.3775667917862663,
      "grad_norm": 3.234375,
      "learning_rate": 4.856369956722769e-05,
      "loss": 0.9263,
      "step": 107730
    },
    {
      "epoch": 0.3776018392931619,
      "grad_norm": 3.28125,
      "learning_rate": 4.8563050538563984e-05,
      "loss": 0.9599,
      "step": 107740
    },
    {
      "epoch": 0.37763688680005747,
      "grad_norm": 3.109375,
      "learning_rate": 4.8562401509900286e-05,
      "loss": 0.9778,
      "step": 107750
    },
    {
      "epoch": 0.37767193430695306,
      "grad_norm": 3.296875,
      "learning_rate": 4.856175248123658e-05,
      "loss": 0.9905,
      "step": 107760
    },
    {
      "epoch": 0.37770698181384865,
      "grad_norm": 2.90625,
      "learning_rate": 4.856110345257288e-05,
      "loss": 0.9415,
      "step": 107770
    },
    {
      "epoch": 0.37774202932074424,
      "grad_norm": 3.078125,
      "learning_rate": 4.856045442390918e-05,
      "loss": 1.005,
      "step": 107780
    },
    {
      "epoch": 0.3777770768276399,
      "grad_norm": 2.671875,
      "learning_rate": 4.855980539524548e-05,
      "loss": 1.0015,
      "step": 107790
    },
    {
      "epoch": 0.3778121243345355,
      "grad_norm": 2.4375,
      "learning_rate": 4.855915636658178e-05,
      "loss": 0.8931,
      "step": 107800
    },
    {
      "epoch": 0.3778471718414311,
      "grad_norm": 3.171875,
      "learning_rate": 4.8558507337918074e-05,
      "loss": 0.9324,
      "step": 107810
    },
    {
      "epoch": 0.37788221934832666,
      "grad_norm": 3.125,
      "learning_rate": 4.8557858309254375e-05,
      "loss": 0.9371,
      "step": 107820
    },
    {
      "epoch": 0.37791726685522226,
      "grad_norm": 4.03125,
      "learning_rate": 4.855720928059067e-05,
      "loss": 0.9431,
      "step": 107830
    },
    {
      "epoch": 0.37795231436211785,
      "grad_norm": 3.40625,
      "learning_rate": 4.855656025192697e-05,
      "loss": 0.9703,
      "step": 107840
    },
    {
      "epoch": 0.37798736186901344,
      "grad_norm": 3.09375,
      "learning_rate": 4.8555911223263266e-05,
      "loss": 1.0184,
      "step": 107850
    },
    {
      "epoch": 0.37802240937590903,
      "grad_norm": 2.96875,
      "learning_rate": 4.855526219459957e-05,
      "loss": 0.9595,
      "step": 107860
    },
    {
      "epoch": 0.3780574568828046,
      "grad_norm": 2.6875,
      "learning_rate": 4.855461316593586e-05,
      "loss": 0.9994,
      "step": 107870
    },
    {
      "epoch": 0.3780925043897002,
      "grad_norm": 3.234375,
      "learning_rate": 4.855396413727216e-05,
      "loss": 0.9483,
      "step": 107880
    },
    {
      "epoch": 0.37812755189659586,
      "grad_norm": 2.84375,
      "learning_rate": 4.855331510860846e-05,
      "loss": 0.9738,
      "step": 107890
    },
    {
      "epoch": 0.37816259940349145,
      "grad_norm": 2.875,
      "learning_rate": 4.855266607994476e-05,
      "loss": 0.9145,
      "step": 107900
    },
    {
      "epoch": 0.37819764691038704,
      "grad_norm": 3.359375,
      "learning_rate": 4.8552017051281054e-05,
      "loss": 0.9179,
      "step": 107910
    },
    {
      "epoch": 0.37823269441728263,
      "grad_norm": 2.953125,
      "learning_rate": 4.8551368022617355e-05,
      "loss": 0.9859,
      "step": 107920
    },
    {
      "epoch": 0.3782677419241782,
      "grad_norm": 2.734375,
      "learning_rate": 4.8550718993953656e-05,
      "loss": 0.9623,
      "step": 107930
    },
    {
      "epoch": 0.3783027894310738,
      "grad_norm": 3.0625,
      "learning_rate": 4.855006996528995e-05,
      "loss": 0.8884,
      "step": 107940
    },
    {
      "epoch": 0.3783378369379694,
      "grad_norm": 2.609375,
      "learning_rate": 4.854942093662625e-05,
      "loss": 0.9734,
      "step": 107950
    },
    {
      "epoch": 0.378372884444865,
      "grad_norm": 2.796875,
      "learning_rate": 4.854877190796255e-05,
      "loss": 0.9804,
      "step": 107960
    },
    {
      "epoch": 0.3784079319517606,
      "grad_norm": 3.53125,
      "learning_rate": 4.854812287929885e-05,
      "loss": 0.9215,
      "step": 107970
    },
    {
      "epoch": 0.3784429794586562,
      "grad_norm": 2.90625,
      "learning_rate": 4.854747385063514e-05,
      "loss": 1.0422,
      "step": 107980
    },
    {
      "epoch": 0.37847802696555183,
      "grad_norm": 3.203125,
      "learning_rate": 4.854682482197144e-05,
      "loss": 0.9489,
      "step": 107990
    },
    {
      "epoch": 0.3785130744724474,
      "grad_norm": 2.46875,
      "learning_rate": 4.854617579330773e-05,
      "loss": 0.9348,
      "step": 108000
    },
    {
      "epoch": 0.378548121979343,
      "grad_norm": 3.296875,
      "learning_rate": 4.8545526764644034e-05,
      "loss": 0.9115,
      "step": 108010
    },
    {
      "epoch": 0.3785831694862386,
      "grad_norm": 2.890625,
      "learning_rate": 4.8544877735980335e-05,
      "loss": 1.0029,
      "step": 108020
    },
    {
      "epoch": 0.3786182169931342,
      "grad_norm": 2.921875,
      "learning_rate": 4.854422870731663e-05,
      "loss": 0.8568,
      "step": 108030
    },
    {
      "epoch": 0.3786532645000298,
      "grad_norm": 2.828125,
      "learning_rate": 4.854357967865293e-05,
      "loss": 0.9601,
      "step": 108040
    },
    {
      "epoch": 0.3786883120069254,
      "grad_norm": 3.375,
      "learning_rate": 4.8542930649989226e-05,
      "loss": 1.0247,
      "step": 108050
    },
    {
      "epoch": 0.37872335951382097,
      "grad_norm": 3.34375,
      "learning_rate": 4.854228162132553e-05,
      "loss": 0.9121,
      "step": 108060
    },
    {
      "epoch": 0.37875840702071656,
      "grad_norm": 2.84375,
      "learning_rate": 4.854163259266182e-05,
      "loss": 0.9531,
      "step": 108070
    },
    {
      "epoch": 0.3787934545276122,
      "grad_norm": 3.09375,
      "learning_rate": 4.854098356399812e-05,
      "loss": 0.9362,
      "step": 108080
    },
    {
      "epoch": 0.3788285020345078,
      "grad_norm": 3.125,
      "learning_rate": 4.854033453533442e-05,
      "loss": 0.9444,
      "step": 108090
    },
    {
      "epoch": 0.3788635495414034,
      "grad_norm": 2.953125,
      "learning_rate": 4.853968550667072e-05,
      "loss": 0.933,
      "step": 108100
    },
    {
      "epoch": 0.378898597048299,
      "grad_norm": 2.953125,
      "learning_rate": 4.8539036478007014e-05,
      "loss": 0.9275,
      "step": 108110
    },
    {
      "epoch": 0.37893364455519457,
      "grad_norm": 3.703125,
      "learning_rate": 4.8538387449343315e-05,
      "loss": 0.9539,
      "step": 108120
    },
    {
      "epoch": 0.37896869206209016,
      "grad_norm": 3.109375,
      "learning_rate": 4.853773842067961e-05,
      "loss": 0.9045,
      "step": 108130
    },
    {
      "epoch": 0.37900373956898575,
      "grad_norm": 3.234375,
      "learning_rate": 4.853708939201591e-05,
      "loss": 0.925,
      "step": 108140
    },
    {
      "epoch": 0.37903878707588134,
      "grad_norm": 3.296875,
      "learning_rate": 4.853644036335221e-05,
      "loss": 0.9294,
      "step": 108150
    },
    {
      "epoch": 0.37907383458277694,
      "grad_norm": 3.15625,
      "learning_rate": 4.853579133468851e-05,
      "loss": 1.0106,
      "step": 108160
    },
    {
      "epoch": 0.3791088820896725,
      "grad_norm": 3.28125,
      "learning_rate": 4.853514230602481e-05,
      "loss": 1.014,
      "step": 108170
    },
    {
      "epoch": 0.3791439295965682,
      "grad_norm": 2.671875,
      "learning_rate": 4.85344932773611e-05,
      "loss": 0.9521,
      "step": 108180
    },
    {
      "epoch": 0.37917897710346377,
      "grad_norm": 2.96875,
      "learning_rate": 4.8533844248697404e-05,
      "loss": 0.9898,
      "step": 108190
    },
    {
      "epoch": 0.37921402461035936,
      "grad_norm": 2.96875,
      "learning_rate": 4.85331952200337e-05,
      "loss": 0.9743,
      "step": 108200
    },
    {
      "epoch": 0.37924907211725495,
      "grad_norm": 3.015625,
      "learning_rate": 4.853254619137e-05,
      "loss": 0.9389,
      "step": 108210
    },
    {
      "epoch": 0.37928411962415054,
      "grad_norm": 3.21875,
      "learning_rate": 4.8531897162706295e-05,
      "loss": 0.9821,
      "step": 108220
    },
    {
      "epoch": 0.37931916713104613,
      "grad_norm": 3.078125,
      "learning_rate": 4.8531248134042596e-05,
      "loss": 0.994,
      "step": 108230
    },
    {
      "epoch": 0.3793542146379417,
      "grad_norm": 2.859375,
      "learning_rate": 4.853059910537889e-05,
      "loss": 0.9416,
      "step": 108240
    },
    {
      "epoch": 0.3793892621448373,
      "grad_norm": 3.015625,
      "learning_rate": 4.852995007671519e-05,
      "loss": 0.9954,
      "step": 108250
    },
    {
      "epoch": 0.3794243096517329,
      "grad_norm": 2.828125,
      "learning_rate": 4.852930104805149e-05,
      "loss": 1.0075,
      "step": 108260
    },
    {
      "epoch": 0.3794593571586285,
      "grad_norm": 3.1875,
      "learning_rate": 4.852865201938779e-05,
      "loss": 0.9545,
      "step": 108270
    },
    {
      "epoch": 0.37949440466552414,
      "grad_norm": 2.9375,
      "learning_rate": 4.852800299072408e-05,
      "loss": 1.0008,
      "step": 108280
    },
    {
      "epoch": 0.37952945217241973,
      "grad_norm": 3.375,
      "learning_rate": 4.8527353962060384e-05,
      "loss": 0.9922,
      "step": 108290
    },
    {
      "epoch": 0.3795644996793153,
      "grad_norm": 3.0,
      "learning_rate": 4.8526704933396686e-05,
      "loss": 0.9627,
      "step": 108300
    },
    {
      "epoch": 0.3795995471862109,
      "grad_norm": 2.734375,
      "learning_rate": 4.852605590473298e-05,
      "loss": 0.9237,
      "step": 108310
    },
    {
      "epoch": 0.3796345946931065,
      "grad_norm": 2.8125,
      "learning_rate": 4.852540687606928e-05,
      "loss": 0.9804,
      "step": 108320
    },
    {
      "epoch": 0.3796696422000021,
      "grad_norm": 2.84375,
      "learning_rate": 4.8524757847405576e-05,
      "loss": 0.9858,
      "step": 108330
    },
    {
      "epoch": 0.3797046897068977,
      "grad_norm": 3.359375,
      "learning_rate": 4.852410881874188e-05,
      "loss": 0.9901,
      "step": 108340
    },
    {
      "epoch": 0.3797397372137933,
      "grad_norm": 2.765625,
      "learning_rate": 4.852345979007817e-05,
      "loss": 1.0451,
      "step": 108350
    },
    {
      "epoch": 0.3797747847206889,
      "grad_norm": 2.921875,
      "learning_rate": 4.852281076141447e-05,
      "loss": 0.9176,
      "step": 108360
    },
    {
      "epoch": 0.37980983222758447,
      "grad_norm": 2.9375,
      "learning_rate": 4.852216173275076e-05,
      "loss": 0.8997,
      "step": 108370
    },
    {
      "epoch": 0.3798448797344801,
      "grad_norm": 3.015625,
      "learning_rate": 4.852151270408706e-05,
      "loss": 0.9995,
      "step": 108380
    },
    {
      "epoch": 0.3798799272413757,
      "grad_norm": 2.703125,
      "learning_rate": 4.8520863675423364e-05,
      "loss": 0.8974,
      "step": 108390
    },
    {
      "epoch": 0.3799149747482713,
      "grad_norm": 3.140625,
      "learning_rate": 4.852021464675966e-05,
      "loss": 1.0536,
      "step": 108400
    },
    {
      "epoch": 0.3799500222551669,
      "grad_norm": 3.203125,
      "learning_rate": 4.851956561809596e-05,
      "loss": 1.0021,
      "step": 108410
    },
    {
      "epoch": 0.3799850697620625,
      "grad_norm": 2.921875,
      "learning_rate": 4.8518916589432255e-05,
      "loss": 0.9404,
      "step": 108420
    },
    {
      "epoch": 0.38002011726895807,
      "grad_norm": 3.390625,
      "learning_rate": 4.8518267560768556e-05,
      "loss": 0.9974,
      "step": 108430
    },
    {
      "epoch": 0.38005516477585366,
      "grad_norm": 3.046875,
      "learning_rate": 4.851761853210485e-05,
      "loss": 0.9454,
      "step": 108440
    },
    {
      "epoch": 0.38009021228274925,
      "grad_norm": 2.78125,
      "learning_rate": 4.851696950344115e-05,
      "loss": 0.9606,
      "step": 108450
    },
    {
      "epoch": 0.38012525978964484,
      "grad_norm": 2.875,
      "learning_rate": 4.851632047477745e-05,
      "loss": 0.9599,
      "step": 108460
    },
    {
      "epoch": 0.38016030729654043,
      "grad_norm": 3.0625,
      "learning_rate": 4.851567144611375e-05,
      "loss": 0.9463,
      "step": 108470
    },
    {
      "epoch": 0.3801953548034361,
      "grad_norm": 3.515625,
      "learning_rate": 4.851502241745004e-05,
      "loss": 0.9488,
      "step": 108480
    },
    {
      "epoch": 0.38023040231033167,
      "grad_norm": 2.921875,
      "learning_rate": 4.8514373388786344e-05,
      "loss": 0.9831,
      "step": 108490
    },
    {
      "epoch": 0.38026544981722726,
      "grad_norm": 3.109375,
      "learning_rate": 4.851372436012264e-05,
      "loss": 1.0687,
      "step": 108500
    },
    {
      "epoch": 0.38030049732412285,
      "grad_norm": 3.328125,
      "learning_rate": 4.851307533145894e-05,
      "loss": 0.8864,
      "step": 108510
    },
    {
      "epoch": 0.38033554483101845,
      "grad_norm": 2.609375,
      "learning_rate": 4.851242630279524e-05,
      "loss": 0.9496,
      "step": 108520
    },
    {
      "epoch": 0.38037059233791404,
      "grad_norm": 2.953125,
      "learning_rate": 4.8511777274131536e-05,
      "loss": 0.8889,
      "step": 108530
    },
    {
      "epoch": 0.38040563984480963,
      "grad_norm": 3.171875,
      "learning_rate": 4.851112824546784e-05,
      "loss": 0.9284,
      "step": 108540
    },
    {
      "epoch": 0.3804406873517052,
      "grad_norm": 2.859375,
      "learning_rate": 4.851047921680413e-05,
      "loss": 0.9216,
      "step": 108550
    },
    {
      "epoch": 0.3804757348586008,
      "grad_norm": 2.703125,
      "learning_rate": 4.8509830188140434e-05,
      "loss": 0.9972,
      "step": 108560
    },
    {
      "epoch": 0.3805107823654964,
      "grad_norm": 3.09375,
      "learning_rate": 4.850918115947673e-05,
      "loss": 0.9884,
      "step": 108570
    },
    {
      "epoch": 0.38054582987239205,
      "grad_norm": 3.03125,
      "learning_rate": 4.850853213081303e-05,
      "loss": 0.9929,
      "step": 108580
    },
    {
      "epoch": 0.38058087737928764,
      "grad_norm": 2.9375,
      "learning_rate": 4.8507883102149324e-05,
      "loss": 0.9901,
      "step": 108590
    },
    {
      "epoch": 0.38061592488618323,
      "grad_norm": 2.921875,
      "learning_rate": 4.8507234073485626e-05,
      "loss": 0.9408,
      "step": 108600
    },
    {
      "epoch": 0.3806509723930788,
      "grad_norm": 3.203125,
      "learning_rate": 4.850658504482192e-05,
      "loss": 0.9499,
      "step": 108610
    },
    {
      "epoch": 0.3806860198999744,
      "grad_norm": 2.90625,
      "learning_rate": 4.850593601615822e-05,
      "loss": 0.977,
      "step": 108620
    },
    {
      "epoch": 0.38072106740687,
      "grad_norm": 2.859375,
      "learning_rate": 4.8505286987494516e-05,
      "loss": 0.9651,
      "step": 108630
    },
    {
      "epoch": 0.3807561149137656,
      "grad_norm": 2.875,
      "learning_rate": 4.850463795883082e-05,
      "loss": 0.9288,
      "step": 108640
    },
    {
      "epoch": 0.3807911624206612,
      "grad_norm": 3.015625,
      "learning_rate": 4.850398893016711e-05,
      "loss": 1.0479,
      "step": 108650
    },
    {
      "epoch": 0.3808262099275568,
      "grad_norm": 3.03125,
      "learning_rate": 4.8503339901503414e-05,
      "loss": 1.0612,
      "step": 108660
    },
    {
      "epoch": 0.3808612574344524,
      "grad_norm": 2.734375,
      "learning_rate": 4.8502690872839715e-05,
      "loss": 0.919,
      "step": 108670
    },
    {
      "epoch": 0.380896304941348,
      "grad_norm": 2.875,
      "learning_rate": 4.850204184417601e-05,
      "loss": 0.9931,
      "step": 108680
    },
    {
      "epoch": 0.3809313524482436,
      "grad_norm": 3.421875,
      "learning_rate": 4.850139281551231e-05,
      "loss": 0.929,
      "step": 108690
    },
    {
      "epoch": 0.3809663999551392,
      "grad_norm": 3.28125,
      "learning_rate": 4.8500743786848606e-05,
      "loss": 0.9011,
      "step": 108700
    },
    {
      "epoch": 0.3810014474620348,
      "grad_norm": 3.328125,
      "learning_rate": 4.850009475818491e-05,
      "loss": 1.0024,
      "step": 108710
    },
    {
      "epoch": 0.3810364949689304,
      "grad_norm": 2.765625,
      "learning_rate": 4.84994457295212e-05,
      "loss": 0.8568,
      "step": 108720
    },
    {
      "epoch": 0.381071542475826,
      "grad_norm": 3.171875,
      "learning_rate": 4.84987967008575e-05,
      "loss": 0.9575,
      "step": 108730
    },
    {
      "epoch": 0.38110658998272157,
      "grad_norm": 3.1875,
      "learning_rate": 4.84981476721938e-05,
      "loss": 0.9999,
      "step": 108740
    },
    {
      "epoch": 0.38114163748961716,
      "grad_norm": 2.734375,
      "learning_rate": 4.849749864353009e-05,
      "loss": 0.9357,
      "step": 108750
    },
    {
      "epoch": 0.38117668499651275,
      "grad_norm": 3.234375,
      "learning_rate": 4.8496849614866394e-05,
      "loss": 0.9277,
      "step": 108760
    },
    {
      "epoch": 0.3812117325034084,
      "grad_norm": 2.5,
      "learning_rate": 4.849620058620269e-05,
      "loss": 0.9357,
      "step": 108770
    },
    {
      "epoch": 0.381246780010304,
      "grad_norm": 2.625,
      "learning_rate": 4.849555155753899e-05,
      "loss": 0.9106,
      "step": 108780
    },
    {
      "epoch": 0.3812818275171996,
      "grad_norm": 3.109375,
      "learning_rate": 4.8494902528875284e-05,
      "loss": 0.9281,
      "step": 108790
    },
    {
      "epoch": 0.38131687502409517,
      "grad_norm": 2.84375,
      "learning_rate": 4.8494253500211586e-05,
      "loss": 0.8403,
      "step": 108800
    },
    {
      "epoch": 0.38135192253099076,
      "grad_norm": 3.09375,
      "learning_rate": 4.849360447154788e-05,
      "loss": 0.9438,
      "step": 108810
    },
    {
      "epoch": 0.38138697003788635,
      "grad_norm": 3.0,
      "learning_rate": 4.849295544288418e-05,
      "loss": 0.9004,
      "step": 108820
    },
    {
      "epoch": 0.38142201754478194,
      "grad_norm": 3.1875,
      "learning_rate": 4.8492306414220476e-05,
      "loss": 0.9582,
      "step": 108830
    },
    {
      "epoch": 0.38145706505167754,
      "grad_norm": 3.0625,
      "learning_rate": 4.849165738555678e-05,
      "loss": 0.9324,
      "step": 108840
    },
    {
      "epoch": 0.3814921125585731,
      "grad_norm": 3.625,
      "learning_rate": 4.849100835689307e-05,
      "loss": 1.0226,
      "step": 108850
    },
    {
      "epoch": 0.3815271600654687,
      "grad_norm": 3.0625,
      "learning_rate": 4.8490359328229374e-05,
      "loss": 0.9568,
      "step": 108860
    },
    {
      "epoch": 0.38156220757236436,
      "grad_norm": 3.34375,
      "learning_rate": 4.848971029956567e-05,
      "loss": 1.023,
      "step": 108870
    },
    {
      "epoch": 0.38159725507925996,
      "grad_norm": 2.890625,
      "learning_rate": 4.848906127090197e-05,
      "loss": 0.9489,
      "step": 108880
    },
    {
      "epoch": 0.38163230258615555,
      "grad_norm": 3.078125,
      "learning_rate": 4.848841224223827e-05,
      "loss": 0.964,
      "step": 108890
    },
    {
      "epoch": 0.38166735009305114,
      "grad_norm": 2.859375,
      "learning_rate": 4.8487763213574566e-05,
      "loss": 0.8943,
      "step": 108900
    },
    {
      "epoch": 0.38170239759994673,
      "grad_norm": 2.890625,
      "learning_rate": 4.848711418491087e-05,
      "loss": 0.9524,
      "step": 108910
    },
    {
      "epoch": 0.3817374451068423,
      "grad_norm": 3.203125,
      "learning_rate": 4.848646515624716e-05,
      "loss": 0.9361,
      "step": 108920
    },
    {
      "epoch": 0.3817724926137379,
      "grad_norm": 3.1875,
      "learning_rate": 4.848581612758346e-05,
      "loss": 0.9628,
      "step": 108930
    },
    {
      "epoch": 0.3818075401206335,
      "grad_norm": 2.90625,
      "learning_rate": 4.848516709891976e-05,
      "loss": 0.9028,
      "step": 108940
    },
    {
      "epoch": 0.3818425876275291,
      "grad_norm": 2.953125,
      "learning_rate": 4.848451807025606e-05,
      "loss": 0.9764,
      "step": 108950
    },
    {
      "epoch": 0.3818776351344247,
      "grad_norm": 3.171875,
      "learning_rate": 4.8483869041592354e-05,
      "loss": 0.9466,
      "step": 108960
    },
    {
      "epoch": 0.38191268264132033,
      "grad_norm": 3.03125,
      "learning_rate": 4.8483220012928655e-05,
      "loss": 0.9069,
      "step": 108970
    },
    {
      "epoch": 0.3819477301482159,
      "grad_norm": 3.34375,
      "learning_rate": 4.848257098426495e-05,
      "loss": 0.9913,
      "step": 108980
    },
    {
      "epoch": 0.3819827776551115,
      "grad_norm": 3.109375,
      "learning_rate": 4.848192195560125e-05,
      "loss": 0.8854,
      "step": 108990
    },
    {
      "epoch": 0.3820178251620071,
      "grad_norm": 2.96875,
      "learning_rate": 4.8481272926937546e-05,
      "loss": 0.902,
      "step": 109000
    },
    {
      "epoch": 0.3820528726689027,
      "grad_norm": 2.984375,
      "learning_rate": 4.848062389827385e-05,
      "loss": 0.9352,
      "step": 109010
    },
    {
      "epoch": 0.3820879201757983,
      "grad_norm": 3.265625,
      "learning_rate": 4.847997486961015e-05,
      "loss": 1.0296,
      "step": 109020
    },
    {
      "epoch": 0.3821229676826939,
      "grad_norm": 3.265625,
      "learning_rate": 4.847932584094644e-05,
      "loss": 1.0088,
      "step": 109030
    },
    {
      "epoch": 0.3821580151895895,
      "grad_norm": 2.5625,
      "learning_rate": 4.8478676812282745e-05,
      "loss": 0.8817,
      "step": 109040
    },
    {
      "epoch": 0.38219306269648506,
      "grad_norm": 2.796875,
      "learning_rate": 4.847802778361904e-05,
      "loss": 0.9584,
      "step": 109050
    },
    {
      "epoch": 0.38222811020338066,
      "grad_norm": 2.578125,
      "learning_rate": 4.847737875495534e-05,
      "loss": 0.9069,
      "step": 109060
    },
    {
      "epoch": 0.3822631577102763,
      "grad_norm": 3.078125,
      "learning_rate": 4.8476729726291635e-05,
      "loss": 0.9552,
      "step": 109070
    },
    {
      "epoch": 0.3822982052171719,
      "grad_norm": 2.921875,
      "learning_rate": 4.8476080697627937e-05,
      "loss": 0.9216,
      "step": 109080
    },
    {
      "epoch": 0.3823332527240675,
      "grad_norm": 2.6875,
      "learning_rate": 4.847543166896423e-05,
      "loss": 1.0379,
      "step": 109090
    },
    {
      "epoch": 0.3823683002309631,
      "grad_norm": 3.578125,
      "learning_rate": 4.847478264030053e-05,
      "loss": 0.9165,
      "step": 109100
    },
    {
      "epoch": 0.38240334773785867,
      "grad_norm": 3.109375,
      "learning_rate": 4.847413361163683e-05,
      "loss": 0.8615,
      "step": 109110
    },
    {
      "epoch": 0.38243839524475426,
      "grad_norm": 3.328125,
      "learning_rate": 4.847348458297312e-05,
      "loss": 0.9157,
      "step": 109120
    },
    {
      "epoch": 0.38247344275164985,
      "grad_norm": 3.09375,
      "learning_rate": 4.847283555430942e-05,
      "loss": 0.9496,
      "step": 109130
    },
    {
      "epoch": 0.38250849025854544,
      "grad_norm": 3.140625,
      "learning_rate": 4.847218652564572e-05,
      "loss": 0.9135,
      "step": 109140
    },
    {
      "epoch": 0.38254353776544103,
      "grad_norm": 2.984375,
      "learning_rate": 4.847153749698202e-05,
      "loss": 0.8833,
      "step": 109150
    },
    {
      "epoch": 0.3825785852723367,
      "grad_norm": 3.4375,
      "learning_rate": 4.8470888468318314e-05,
      "loss": 0.92,
      "step": 109160
    },
    {
      "epoch": 0.38261363277923227,
      "grad_norm": 3.71875,
      "learning_rate": 4.8470239439654615e-05,
      "loss": 0.9381,
      "step": 109170
    },
    {
      "epoch": 0.38264868028612786,
      "grad_norm": 2.65625,
      "learning_rate": 4.846959041099091e-05,
      "loss": 1.0051,
      "step": 109180
    },
    {
      "epoch": 0.38268372779302345,
      "grad_norm": 2.890625,
      "learning_rate": 4.846894138232721e-05,
      "loss": 1.0017,
      "step": 109190
    },
    {
      "epoch": 0.38271877529991905,
      "grad_norm": 2.96875,
      "learning_rate": 4.8468292353663506e-05,
      "loss": 0.9252,
      "step": 109200
    },
    {
      "epoch": 0.38275382280681464,
      "grad_norm": 2.828125,
      "learning_rate": 4.846764332499981e-05,
      "loss": 0.9865,
      "step": 109210
    },
    {
      "epoch": 0.3827888703137102,
      "grad_norm": 3.0625,
      "learning_rate": 4.84669942963361e-05,
      "loss": 0.8627,
      "step": 109220
    },
    {
      "epoch": 0.3828239178206058,
      "grad_norm": 3.65625,
      "learning_rate": 4.84663452676724e-05,
      "loss": 0.9281,
      "step": 109230
    },
    {
      "epoch": 0.3828589653275014,
      "grad_norm": 2.921875,
      "learning_rate": 4.84656962390087e-05,
      "loss": 0.9007,
      "step": 109240
    },
    {
      "epoch": 0.382894012834397,
      "grad_norm": 3.25,
      "learning_rate": 4.8465047210345e-05,
      "loss": 0.9913,
      "step": 109250
    },
    {
      "epoch": 0.38292906034129265,
      "grad_norm": 2.78125,
      "learning_rate": 4.84643981816813e-05,
      "loss": 0.97,
      "step": 109260
    },
    {
      "epoch": 0.38296410784818824,
      "grad_norm": 3.015625,
      "learning_rate": 4.8463749153017595e-05,
      "loss": 0.965,
      "step": 109270
    },
    {
      "epoch": 0.38299915535508383,
      "grad_norm": 3.0,
      "learning_rate": 4.8463100124353897e-05,
      "loss": 0.9238,
      "step": 109280
    },
    {
      "epoch": 0.3830342028619794,
      "grad_norm": 2.671875,
      "learning_rate": 4.846245109569019e-05,
      "loss": 0.9459,
      "step": 109290
    },
    {
      "epoch": 0.383069250368875,
      "grad_norm": 3.21875,
      "learning_rate": 4.846180206702649e-05,
      "loss": 0.9554,
      "step": 109300
    },
    {
      "epoch": 0.3831042978757706,
      "grad_norm": 2.96875,
      "learning_rate": 4.846115303836279e-05,
      "loss": 0.9559,
      "step": 109310
    },
    {
      "epoch": 0.3831393453826662,
      "grad_norm": 3.21875,
      "learning_rate": 4.846050400969909e-05,
      "loss": 0.9417,
      "step": 109320
    },
    {
      "epoch": 0.3831743928895618,
      "grad_norm": 3.125,
      "learning_rate": 4.845985498103538e-05,
      "loss": 0.8164,
      "step": 109330
    },
    {
      "epoch": 0.3832094403964574,
      "grad_norm": 3.0,
      "learning_rate": 4.8459205952371685e-05,
      "loss": 0.9484,
      "step": 109340
    },
    {
      "epoch": 0.38324448790335297,
      "grad_norm": 3.0,
      "learning_rate": 4.845855692370798e-05,
      "loss": 0.8001,
      "step": 109350
    },
    {
      "epoch": 0.3832795354102486,
      "grad_norm": 3.734375,
      "learning_rate": 4.845790789504428e-05,
      "loss": 0.9614,
      "step": 109360
    },
    {
      "epoch": 0.3833145829171442,
      "grad_norm": 3.015625,
      "learning_rate": 4.8457258866380575e-05,
      "loss": 0.9475,
      "step": 109370
    },
    {
      "epoch": 0.3833496304240398,
      "grad_norm": 2.890625,
      "learning_rate": 4.8456609837716877e-05,
      "loss": 0.9977,
      "step": 109380
    },
    {
      "epoch": 0.3833846779309354,
      "grad_norm": 3.0,
      "learning_rate": 4.845596080905318e-05,
      "loss": 0.9402,
      "step": 109390
    },
    {
      "epoch": 0.383419725437831,
      "grad_norm": 3.046875,
      "learning_rate": 4.845531178038947e-05,
      "loss": 1.006,
      "step": 109400
    },
    {
      "epoch": 0.3834547729447266,
      "grad_norm": 2.6875,
      "learning_rate": 4.8454662751725774e-05,
      "loss": 0.958,
      "step": 109410
    },
    {
      "epoch": 0.38348982045162217,
      "grad_norm": 2.40625,
      "learning_rate": 4.845401372306207e-05,
      "loss": 0.9735,
      "step": 109420
    },
    {
      "epoch": 0.38352486795851776,
      "grad_norm": 2.9375,
      "learning_rate": 4.845336469439837e-05,
      "loss": 0.8922,
      "step": 109430
    },
    {
      "epoch": 0.38355991546541335,
      "grad_norm": 3.296875,
      "learning_rate": 4.8452715665734665e-05,
      "loss": 0.8538,
      "step": 109440
    },
    {
      "epoch": 0.38359496297230894,
      "grad_norm": 3.21875,
      "learning_rate": 4.8452066637070966e-05,
      "loss": 0.9118,
      "step": 109450
    },
    {
      "epoch": 0.3836300104792046,
      "grad_norm": 3.140625,
      "learning_rate": 4.845141760840726e-05,
      "loss": 0.9369,
      "step": 109460
    },
    {
      "epoch": 0.3836650579861002,
      "grad_norm": 2.953125,
      "learning_rate": 4.845076857974356e-05,
      "loss": 0.9835,
      "step": 109470
    },
    {
      "epoch": 0.38370010549299577,
      "grad_norm": 3.25,
      "learning_rate": 4.8450119551079857e-05,
      "loss": 0.9769,
      "step": 109480
    },
    {
      "epoch": 0.38373515299989136,
      "grad_norm": 2.9375,
      "learning_rate": 4.844947052241615e-05,
      "loss": 1.0517,
      "step": 109490
    },
    {
      "epoch": 0.38377020050678695,
      "grad_norm": 3.4375,
      "learning_rate": 4.844882149375245e-05,
      "loss": 1.0037,
      "step": 109500
    },
    {
      "epoch": 0.38380524801368254,
      "grad_norm": 2.46875,
      "learning_rate": 4.844817246508875e-05,
      "loss": 1.0279,
      "step": 109510
    },
    {
      "epoch": 0.38384029552057813,
      "grad_norm": 2.84375,
      "learning_rate": 4.844752343642505e-05,
      "loss": 0.9955,
      "step": 109520
    },
    {
      "epoch": 0.3838753430274737,
      "grad_norm": 2.828125,
      "learning_rate": 4.844687440776134e-05,
      "loss": 1.024,
      "step": 109530
    },
    {
      "epoch": 0.3839103905343693,
      "grad_norm": 3.21875,
      "learning_rate": 4.8446225379097645e-05,
      "loss": 0.941,
      "step": 109540
    },
    {
      "epoch": 0.3839454380412649,
      "grad_norm": 3.125,
      "learning_rate": 4.844557635043394e-05,
      "loss": 0.9742,
      "step": 109550
    },
    {
      "epoch": 0.38398048554816055,
      "grad_norm": 2.609375,
      "learning_rate": 4.844492732177024e-05,
      "loss": 0.8924,
      "step": 109560
    },
    {
      "epoch": 0.38401553305505615,
      "grad_norm": 3.21875,
      "learning_rate": 4.8444278293106535e-05,
      "loss": 0.9816,
      "step": 109570
    },
    {
      "epoch": 0.38405058056195174,
      "grad_norm": 3.53125,
      "learning_rate": 4.8443629264442837e-05,
      "loss": 0.952,
      "step": 109580
    },
    {
      "epoch": 0.38408562806884733,
      "grad_norm": 3.109375,
      "learning_rate": 4.844298023577913e-05,
      "loss": 0.9597,
      "step": 109590
    },
    {
      "epoch": 0.3841206755757429,
      "grad_norm": 2.984375,
      "learning_rate": 4.844233120711543e-05,
      "loss": 0.9995,
      "step": 109600
    },
    {
      "epoch": 0.3841557230826385,
      "grad_norm": 2.921875,
      "learning_rate": 4.844168217845173e-05,
      "loss": 0.9637,
      "step": 109610
    },
    {
      "epoch": 0.3841907705895341,
      "grad_norm": 3.234375,
      "learning_rate": 4.844103314978803e-05,
      "loss": 0.9994,
      "step": 109620
    },
    {
      "epoch": 0.3842258180964297,
      "grad_norm": 3.265625,
      "learning_rate": 4.844038412112433e-05,
      "loss": 0.9254,
      "step": 109630
    },
    {
      "epoch": 0.3842608656033253,
      "grad_norm": 3.109375,
      "learning_rate": 4.8439735092460625e-05,
      "loss": 0.9359,
      "step": 109640
    },
    {
      "epoch": 0.3842959131102209,
      "grad_norm": 3.46875,
      "learning_rate": 4.8439086063796926e-05,
      "loss": 0.9944,
      "step": 109650
    },
    {
      "epoch": 0.3843309606171165,
      "grad_norm": 2.921875,
      "learning_rate": 4.843843703513322e-05,
      "loss": 1.0139,
      "step": 109660
    },
    {
      "epoch": 0.3843660081240121,
      "grad_norm": 3.0625,
      "learning_rate": 4.843778800646952e-05,
      "loss": 0.9357,
      "step": 109670
    },
    {
      "epoch": 0.3844010556309077,
      "grad_norm": 2.96875,
      "learning_rate": 4.8437138977805817e-05,
      "loss": 1.0134,
      "step": 109680
    },
    {
      "epoch": 0.3844361031378033,
      "grad_norm": 3.25,
      "learning_rate": 4.843648994914212e-05,
      "loss": 1.0011,
      "step": 109690
    },
    {
      "epoch": 0.3844711506446989,
      "grad_norm": 3.015625,
      "learning_rate": 4.843584092047841e-05,
      "loss": 0.9756,
      "step": 109700
    },
    {
      "epoch": 0.3845061981515945,
      "grad_norm": 3.34375,
      "learning_rate": 4.8435191891814714e-05,
      "loss": 1.0116,
      "step": 109710
    },
    {
      "epoch": 0.38454124565849007,
      "grad_norm": 3.1875,
      "learning_rate": 4.843454286315101e-05,
      "loss": 0.9707,
      "step": 109720
    },
    {
      "epoch": 0.38457629316538566,
      "grad_norm": 2.828125,
      "learning_rate": 4.843389383448731e-05,
      "loss": 0.8505,
      "step": 109730
    },
    {
      "epoch": 0.38461134067228125,
      "grad_norm": 2.9375,
      "learning_rate": 4.8433244805823605e-05,
      "loss": 0.947,
      "step": 109740
    },
    {
      "epoch": 0.3846463881791769,
      "grad_norm": 3.046875,
      "learning_rate": 4.8432595777159906e-05,
      "loss": 0.9334,
      "step": 109750
    },
    {
      "epoch": 0.3846814356860725,
      "grad_norm": 2.90625,
      "learning_rate": 4.843194674849621e-05,
      "loss": 0.9554,
      "step": 109760
    },
    {
      "epoch": 0.3847164831929681,
      "grad_norm": 3.328125,
      "learning_rate": 4.84312977198325e-05,
      "loss": 0.8777,
      "step": 109770
    },
    {
      "epoch": 0.3847515306998637,
      "grad_norm": 2.71875,
      "learning_rate": 4.84306486911688e-05,
      "loss": 1.0398,
      "step": 109780
    },
    {
      "epoch": 0.38478657820675927,
      "grad_norm": 2.578125,
      "learning_rate": 4.84299996625051e-05,
      "loss": 0.9193,
      "step": 109790
    },
    {
      "epoch": 0.38482162571365486,
      "grad_norm": 2.65625,
      "learning_rate": 4.84293506338414e-05,
      "loss": 0.9046,
      "step": 109800
    },
    {
      "epoch": 0.38485667322055045,
      "grad_norm": 3.15625,
      "learning_rate": 4.8428701605177694e-05,
      "loss": 0.9581,
      "step": 109810
    },
    {
      "epoch": 0.38489172072744604,
      "grad_norm": 3.28125,
      "learning_rate": 4.8428052576513995e-05,
      "loss": 0.9534,
      "step": 109820
    },
    {
      "epoch": 0.38492676823434163,
      "grad_norm": 3.5625,
      "learning_rate": 4.842740354785029e-05,
      "loss": 1.0121,
      "step": 109830
    },
    {
      "epoch": 0.3849618157412372,
      "grad_norm": 2.890625,
      "learning_rate": 4.842675451918659e-05,
      "loss": 0.8704,
      "step": 109840
    },
    {
      "epoch": 0.38499686324813287,
      "grad_norm": 2.8125,
      "learning_rate": 4.8426105490522886e-05,
      "loss": 0.9857,
      "step": 109850
    },
    {
      "epoch": 0.38503191075502846,
      "grad_norm": 3.03125,
      "learning_rate": 4.842545646185919e-05,
      "loss": 0.9843,
      "step": 109860
    },
    {
      "epoch": 0.38506695826192405,
      "grad_norm": 3.171875,
      "learning_rate": 4.842480743319548e-05,
      "loss": 0.979,
      "step": 109870
    },
    {
      "epoch": 0.38510200576881964,
      "grad_norm": 2.828125,
      "learning_rate": 4.8424158404531777e-05,
      "loss": 0.944,
      "step": 109880
    },
    {
      "epoch": 0.38513705327571524,
      "grad_norm": 2.75,
      "learning_rate": 4.842350937586808e-05,
      "loss": 0.8905,
      "step": 109890
    },
    {
      "epoch": 0.3851721007826108,
      "grad_norm": 2.875,
      "learning_rate": 4.842286034720437e-05,
      "loss": 0.922,
      "step": 109900
    },
    {
      "epoch": 0.3852071482895064,
      "grad_norm": 3.28125,
      "learning_rate": 4.8422211318540674e-05,
      "loss": 0.9154,
      "step": 109910
    },
    {
      "epoch": 0.385242195796402,
      "grad_norm": 3.125,
      "learning_rate": 4.842156228987697e-05,
      "loss": 0.9996,
      "step": 109920
    },
    {
      "epoch": 0.3852772433032976,
      "grad_norm": 3.296875,
      "learning_rate": 4.842091326121327e-05,
      "loss": 0.9703,
      "step": 109930
    },
    {
      "epoch": 0.3853122908101932,
      "grad_norm": 2.984375,
      "learning_rate": 4.8420264232549565e-05,
      "loss": 0.9293,
      "step": 109940
    },
    {
      "epoch": 0.38534733831708884,
      "grad_norm": 3.4375,
      "learning_rate": 4.8419615203885866e-05,
      "loss": 1.0454,
      "step": 109950
    },
    {
      "epoch": 0.38538238582398443,
      "grad_norm": 2.796875,
      "learning_rate": 4.841896617522216e-05,
      "loss": 1.012,
      "step": 109960
    },
    {
      "epoch": 0.38541743333088,
      "grad_norm": 3.046875,
      "learning_rate": 4.841831714655846e-05,
      "loss": 1.0424,
      "step": 109970
    },
    {
      "epoch": 0.3854524808377756,
      "grad_norm": 2.703125,
      "learning_rate": 4.841766811789476e-05,
      "loss": 0.9469,
      "step": 109980
    },
    {
      "epoch": 0.3854875283446712,
      "grad_norm": 3.171875,
      "learning_rate": 4.841701908923106e-05,
      "loss": 1.0042,
      "step": 109990
    },
    {
      "epoch": 0.3855225758515668,
      "grad_norm": 3.453125,
      "learning_rate": 4.841637006056736e-05,
      "loss": 0.9184,
      "step": 110000
    },
    {
      "epoch": 0.3855225758515668,
      "eval_loss": 0.8933005332946777,
      "eval_runtime": 552.3456,
      "eval_samples_per_second": 688.764,
      "eval_steps_per_second": 57.397,
      "step": 110000
    },
    {
      "epoch": 0.3855576233584624,
      "grad_norm": 2.953125,
      "learning_rate": 4.8415721031903654e-05,
      "loss": 1.0356,
      "step": 110010
    },
    {
      "epoch": 0.385592670865358,
      "grad_norm": 2.921875,
      "learning_rate": 4.8415072003239955e-05,
      "loss": 0.8869,
      "step": 110020
    },
    {
      "epoch": 0.38562771837225357,
      "grad_norm": 3.421875,
      "learning_rate": 4.841442297457625e-05,
      "loss": 0.9164,
      "step": 110030
    },
    {
      "epoch": 0.38566276587914916,
      "grad_norm": 3.703125,
      "learning_rate": 4.841377394591255e-05,
      "loss": 0.9337,
      "step": 110040
    },
    {
      "epoch": 0.3856978133860448,
      "grad_norm": 2.78125,
      "learning_rate": 4.8413124917248846e-05,
      "loss": 0.9618,
      "step": 110050
    },
    {
      "epoch": 0.3857328608929404,
      "grad_norm": 3.078125,
      "learning_rate": 4.841247588858515e-05,
      "loss": 1.0139,
      "step": 110060
    },
    {
      "epoch": 0.385767908399836,
      "grad_norm": 3.03125,
      "learning_rate": 4.841182685992144e-05,
      "loss": 0.8961,
      "step": 110070
    },
    {
      "epoch": 0.3858029559067316,
      "grad_norm": 2.859375,
      "learning_rate": 4.841117783125774e-05,
      "loss": 0.968,
      "step": 110080
    },
    {
      "epoch": 0.3858380034136272,
      "grad_norm": 2.984375,
      "learning_rate": 4.841052880259404e-05,
      "loss": 0.9918,
      "step": 110090
    },
    {
      "epoch": 0.38587305092052276,
      "grad_norm": 3.015625,
      "learning_rate": 4.840987977393034e-05,
      "loss": 0.8832,
      "step": 110100
    },
    {
      "epoch": 0.38590809842741836,
      "grad_norm": 3.53125,
      "learning_rate": 4.8409230745266634e-05,
      "loss": 0.9985,
      "step": 110110
    },
    {
      "epoch": 0.38594314593431395,
      "grad_norm": 3.171875,
      "learning_rate": 4.8408581716602935e-05,
      "loss": 1.0079,
      "step": 110120
    },
    {
      "epoch": 0.38597819344120954,
      "grad_norm": 2.953125,
      "learning_rate": 4.840793268793924e-05,
      "loss": 0.9456,
      "step": 110130
    },
    {
      "epoch": 0.38601324094810513,
      "grad_norm": 2.953125,
      "learning_rate": 4.840728365927553e-05,
      "loss": 1.0261,
      "step": 110140
    },
    {
      "epoch": 0.3860482884550008,
      "grad_norm": 2.859375,
      "learning_rate": 4.840663463061183e-05,
      "loss": 0.9448,
      "step": 110150
    },
    {
      "epoch": 0.38608333596189637,
      "grad_norm": 3.171875,
      "learning_rate": 4.840598560194813e-05,
      "loss": 0.9876,
      "step": 110160
    },
    {
      "epoch": 0.38611838346879196,
      "grad_norm": 2.984375,
      "learning_rate": 4.840533657328443e-05,
      "loss": 0.981,
      "step": 110170
    },
    {
      "epoch": 0.38615343097568755,
      "grad_norm": 3.140625,
      "learning_rate": 4.840468754462072e-05,
      "loss": 0.9615,
      "step": 110180
    },
    {
      "epoch": 0.38618847848258314,
      "grad_norm": 3.0,
      "learning_rate": 4.8404038515957025e-05,
      "loss": 0.9496,
      "step": 110190
    },
    {
      "epoch": 0.38622352598947873,
      "grad_norm": 2.84375,
      "learning_rate": 4.840338948729332e-05,
      "loss": 0.9772,
      "step": 110200
    },
    {
      "epoch": 0.3862585734963743,
      "grad_norm": 3.09375,
      "learning_rate": 4.840274045862962e-05,
      "loss": 0.9806,
      "step": 110210
    },
    {
      "epoch": 0.3862936210032699,
      "grad_norm": 3.109375,
      "learning_rate": 4.8402091429965915e-05,
      "loss": 0.9323,
      "step": 110220
    },
    {
      "epoch": 0.3863286685101655,
      "grad_norm": 3.0625,
      "learning_rate": 4.840144240130222e-05,
      "loss": 0.9676,
      "step": 110230
    },
    {
      "epoch": 0.38636371601706115,
      "grad_norm": 3.078125,
      "learning_rate": 4.840079337263851e-05,
      "loss": 0.9508,
      "step": 110240
    },
    {
      "epoch": 0.38639876352395675,
      "grad_norm": 2.90625,
      "learning_rate": 4.8400144343974806e-05,
      "loss": 0.9613,
      "step": 110250
    },
    {
      "epoch": 0.38643381103085234,
      "grad_norm": 2.796875,
      "learning_rate": 4.839949531531111e-05,
      "loss": 1.0289,
      "step": 110260
    },
    {
      "epoch": 0.38646885853774793,
      "grad_norm": 2.984375,
      "learning_rate": 4.83988462866474e-05,
      "loss": 0.9318,
      "step": 110270
    },
    {
      "epoch": 0.3865039060446435,
      "grad_norm": 3.296875,
      "learning_rate": 4.83981972579837e-05,
      "loss": 0.8917,
      "step": 110280
    },
    {
      "epoch": 0.3865389535515391,
      "grad_norm": 2.515625,
      "learning_rate": 4.839754822932e-05,
      "loss": 0.958,
      "step": 110290
    },
    {
      "epoch": 0.3865740010584347,
      "grad_norm": 3.171875,
      "learning_rate": 4.83968992006563e-05,
      "loss": 0.9354,
      "step": 110300
    },
    {
      "epoch": 0.3866090485653303,
      "grad_norm": 3.203125,
      "learning_rate": 4.8396250171992594e-05,
      "loss": 0.9272,
      "step": 110310
    },
    {
      "epoch": 0.3866440960722259,
      "grad_norm": 2.984375,
      "learning_rate": 4.8395601143328895e-05,
      "loss": 0.8861,
      "step": 110320
    },
    {
      "epoch": 0.3866791435791215,
      "grad_norm": 3.375,
      "learning_rate": 4.839495211466519e-05,
      "loss": 0.8938,
      "step": 110330
    },
    {
      "epoch": 0.3867141910860171,
      "grad_norm": 2.765625,
      "learning_rate": 4.839430308600149e-05,
      "loss": 0.9972,
      "step": 110340
    },
    {
      "epoch": 0.3867492385929127,
      "grad_norm": 2.890625,
      "learning_rate": 4.839365405733779e-05,
      "loss": 0.9297,
      "step": 110350
    },
    {
      "epoch": 0.3867842860998083,
      "grad_norm": 3.046875,
      "learning_rate": 4.839300502867409e-05,
      "loss": 0.9573,
      "step": 110360
    },
    {
      "epoch": 0.3868193336067039,
      "grad_norm": 3.71875,
      "learning_rate": 4.839235600001039e-05,
      "loss": 0.9995,
      "step": 110370
    },
    {
      "epoch": 0.3868543811135995,
      "grad_norm": 3.328125,
      "learning_rate": 4.839170697134668e-05,
      "loss": 0.9669,
      "step": 110380
    },
    {
      "epoch": 0.3868894286204951,
      "grad_norm": 3.046875,
      "learning_rate": 4.8391057942682985e-05,
      "loss": 0.8964,
      "step": 110390
    },
    {
      "epoch": 0.38692447612739067,
      "grad_norm": 3.234375,
      "learning_rate": 4.839040891401928e-05,
      "loss": 0.8858,
      "step": 110400
    },
    {
      "epoch": 0.38695952363428626,
      "grad_norm": 2.90625,
      "learning_rate": 4.838975988535558e-05,
      "loss": 0.9741,
      "step": 110410
    },
    {
      "epoch": 0.38699457114118185,
      "grad_norm": 3.578125,
      "learning_rate": 4.8389110856691875e-05,
      "loss": 0.9141,
      "step": 110420
    },
    {
      "epoch": 0.38702961864807744,
      "grad_norm": 3.3125,
      "learning_rate": 4.838846182802818e-05,
      "loss": 0.9773,
      "step": 110430
    },
    {
      "epoch": 0.3870646661549731,
      "grad_norm": 2.96875,
      "learning_rate": 4.838781279936447e-05,
      "loss": 0.9748,
      "step": 110440
    },
    {
      "epoch": 0.3870997136618687,
      "grad_norm": 3.203125,
      "learning_rate": 4.838716377070077e-05,
      "loss": 0.9435,
      "step": 110450
    },
    {
      "epoch": 0.3871347611687643,
      "grad_norm": 2.84375,
      "learning_rate": 4.838651474203707e-05,
      "loss": 0.9944,
      "step": 110460
    },
    {
      "epoch": 0.38716980867565987,
      "grad_norm": 3.109375,
      "learning_rate": 4.838586571337337e-05,
      "loss": 0.8863,
      "step": 110470
    },
    {
      "epoch": 0.38720485618255546,
      "grad_norm": 3.296875,
      "learning_rate": 4.838521668470966e-05,
      "loss": 0.9575,
      "step": 110480
    },
    {
      "epoch": 0.38723990368945105,
      "grad_norm": 3.09375,
      "learning_rate": 4.8384567656045965e-05,
      "loss": 0.9121,
      "step": 110490
    },
    {
      "epoch": 0.38727495119634664,
      "grad_norm": 3.546875,
      "learning_rate": 4.8383918627382266e-05,
      "loss": 1.036,
      "step": 110500
    },
    {
      "epoch": 0.38730999870324223,
      "grad_norm": 3.09375,
      "learning_rate": 4.838326959871856e-05,
      "loss": 0.993,
      "step": 110510
    },
    {
      "epoch": 0.3873450462101378,
      "grad_norm": 3.015625,
      "learning_rate": 4.838262057005486e-05,
      "loss": 0.9428,
      "step": 110520
    },
    {
      "epoch": 0.3873800937170334,
      "grad_norm": 3.25,
      "learning_rate": 4.838197154139116e-05,
      "loss": 0.9895,
      "step": 110530
    },
    {
      "epoch": 0.38741514122392906,
      "grad_norm": 3.15625,
      "learning_rate": 4.838132251272746e-05,
      "loss": 0.9244,
      "step": 110540
    },
    {
      "epoch": 0.38745018873082465,
      "grad_norm": 2.53125,
      "learning_rate": 4.838067348406375e-05,
      "loss": 0.9471,
      "step": 110550
    },
    {
      "epoch": 0.38748523623772024,
      "grad_norm": 2.984375,
      "learning_rate": 4.8380024455400054e-05,
      "loss": 0.9825,
      "step": 110560
    },
    {
      "epoch": 0.38752028374461583,
      "grad_norm": 3.546875,
      "learning_rate": 4.837937542673635e-05,
      "loss": 0.9858,
      "step": 110570
    },
    {
      "epoch": 0.3875553312515114,
      "grad_norm": 3.015625,
      "learning_rate": 4.837872639807265e-05,
      "loss": 0.9819,
      "step": 110580
    },
    {
      "epoch": 0.387590378758407,
      "grad_norm": 3.375,
      "learning_rate": 4.8378077369408945e-05,
      "loss": 1.0017,
      "step": 110590
    },
    {
      "epoch": 0.3876254262653026,
      "grad_norm": 2.734375,
      "learning_rate": 4.8377428340745246e-05,
      "loss": 0.9393,
      "step": 110600
    },
    {
      "epoch": 0.3876604737721982,
      "grad_norm": 3.5625,
      "learning_rate": 4.837677931208154e-05,
      "loss": 0.8867,
      "step": 110610
    },
    {
      "epoch": 0.3876955212790938,
      "grad_norm": 2.859375,
      "learning_rate": 4.8376130283417835e-05,
      "loss": 0.9284,
      "step": 110620
    },
    {
      "epoch": 0.3877305687859894,
      "grad_norm": 2.96875,
      "learning_rate": 4.837548125475414e-05,
      "loss": 0.9763,
      "step": 110630
    },
    {
      "epoch": 0.38776561629288503,
      "grad_norm": 2.890625,
      "learning_rate": 4.837483222609043e-05,
      "loss": 0.879,
      "step": 110640
    },
    {
      "epoch": 0.3878006637997806,
      "grad_norm": 2.515625,
      "learning_rate": 4.837418319742673e-05,
      "loss": 0.9775,
      "step": 110650
    },
    {
      "epoch": 0.3878357113066762,
      "grad_norm": 2.8125,
      "learning_rate": 4.837353416876303e-05,
      "loss": 0.9777,
      "step": 110660
    },
    {
      "epoch": 0.3878707588135718,
      "grad_norm": 3.15625,
      "learning_rate": 4.837288514009933e-05,
      "loss": 0.9974,
      "step": 110670
    },
    {
      "epoch": 0.3879058063204674,
      "grad_norm": 3.125,
      "learning_rate": 4.837223611143562e-05,
      "loss": 0.9416,
      "step": 110680
    },
    {
      "epoch": 0.387940853827363,
      "grad_norm": 3.0625,
      "learning_rate": 4.8371587082771925e-05,
      "loss": 0.9432,
      "step": 110690
    },
    {
      "epoch": 0.3879759013342586,
      "grad_norm": 2.796875,
      "learning_rate": 4.837093805410822e-05,
      "loss": 0.9548,
      "step": 110700
    },
    {
      "epoch": 0.38801094884115417,
      "grad_norm": 2.875,
      "learning_rate": 4.837028902544452e-05,
      "loss": 0.893,
      "step": 110710
    },
    {
      "epoch": 0.38804599634804976,
      "grad_norm": 2.90625,
      "learning_rate": 4.836963999678082e-05,
      "loss": 0.921,
      "step": 110720
    },
    {
      "epoch": 0.38808104385494535,
      "grad_norm": 3.28125,
      "learning_rate": 4.836899096811712e-05,
      "loss": 0.96,
      "step": 110730
    },
    {
      "epoch": 0.388116091361841,
      "grad_norm": 3.140625,
      "learning_rate": 4.836834193945342e-05,
      "loss": 0.9969,
      "step": 110740
    },
    {
      "epoch": 0.3881511388687366,
      "grad_norm": 2.9375,
      "learning_rate": 4.836769291078971e-05,
      "loss": 1.0185,
      "step": 110750
    },
    {
      "epoch": 0.3881861863756322,
      "grad_norm": 3.203125,
      "learning_rate": 4.8367043882126014e-05,
      "loss": 1.0501,
      "step": 110760
    },
    {
      "epoch": 0.38822123388252777,
      "grad_norm": 2.578125,
      "learning_rate": 4.836639485346231e-05,
      "loss": 0.9706,
      "step": 110770
    },
    {
      "epoch": 0.38825628138942336,
      "grad_norm": 3.171875,
      "learning_rate": 4.836574582479861e-05,
      "loss": 0.9595,
      "step": 110780
    },
    {
      "epoch": 0.38829132889631895,
      "grad_norm": 3.265625,
      "learning_rate": 4.8365096796134905e-05,
      "loss": 0.9479,
      "step": 110790
    },
    {
      "epoch": 0.38832637640321455,
      "grad_norm": 2.765625,
      "learning_rate": 4.8364447767471206e-05,
      "loss": 0.9513,
      "step": 110800
    },
    {
      "epoch": 0.38836142391011014,
      "grad_norm": 3.0625,
      "learning_rate": 4.83637987388075e-05,
      "loss": 0.9748,
      "step": 110810
    },
    {
      "epoch": 0.38839647141700573,
      "grad_norm": 3.0625,
      "learning_rate": 4.83631497101438e-05,
      "loss": 1.0349,
      "step": 110820
    },
    {
      "epoch": 0.3884315189239014,
      "grad_norm": 3.28125,
      "learning_rate": 4.83625006814801e-05,
      "loss": 0.9705,
      "step": 110830
    },
    {
      "epoch": 0.38846656643079697,
      "grad_norm": 2.609375,
      "learning_rate": 4.83618516528164e-05,
      "loss": 0.9417,
      "step": 110840
    },
    {
      "epoch": 0.38850161393769256,
      "grad_norm": 3.328125,
      "learning_rate": 4.83612026241527e-05,
      "loss": 0.9712,
      "step": 110850
    },
    {
      "epoch": 0.38853666144458815,
      "grad_norm": 3.296875,
      "learning_rate": 4.8360553595488994e-05,
      "loss": 0.9538,
      "step": 110860
    },
    {
      "epoch": 0.38857170895148374,
      "grad_norm": 3.203125,
      "learning_rate": 4.8359904566825295e-05,
      "loss": 0.9086,
      "step": 110870
    },
    {
      "epoch": 0.38860675645837933,
      "grad_norm": 3.171875,
      "learning_rate": 4.835925553816159e-05,
      "loss": 0.9063,
      "step": 110880
    },
    {
      "epoch": 0.3886418039652749,
      "grad_norm": 2.609375,
      "learning_rate": 4.835860650949789e-05,
      "loss": 0.9365,
      "step": 110890
    },
    {
      "epoch": 0.3886768514721705,
      "grad_norm": 2.875,
      "learning_rate": 4.8357957480834186e-05,
      "loss": 1.0164,
      "step": 110900
    },
    {
      "epoch": 0.3887118989790661,
      "grad_norm": 3.09375,
      "learning_rate": 4.835730845217049e-05,
      "loss": 0.9479,
      "step": 110910
    },
    {
      "epoch": 0.3887469464859617,
      "grad_norm": 2.953125,
      "learning_rate": 4.835665942350678e-05,
      "loss": 0.9204,
      "step": 110920
    },
    {
      "epoch": 0.38878199399285734,
      "grad_norm": 3.046875,
      "learning_rate": 4.8356010394843083e-05,
      "loss": 0.908,
      "step": 110930
    },
    {
      "epoch": 0.38881704149975294,
      "grad_norm": 3.09375,
      "learning_rate": 4.835536136617938e-05,
      "loss": 0.9922,
      "step": 110940
    },
    {
      "epoch": 0.3888520890066485,
      "grad_norm": 2.984375,
      "learning_rate": 4.835471233751568e-05,
      "loss": 0.8781,
      "step": 110950
    },
    {
      "epoch": 0.3888871365135441,
      "grad_norm": 2.890625,
      "learning_rate": 4.8354063308851974e-05,
      "loss": 0.9506,
      "step": 110960
    },
    {
      "epoch": 0.3889221840204397,
      "grad_norm": 3.125,
      "learning_rate": 4.8353414280188275e-05,
      "loss": 0.8997,
      "step": 110970
    },
    {
      "epoch": 0.3889572315273353,
      "grad_norm": 2.84375,
      "learning_rate": 4.835276525152457e-05,
      "loss": 0.9082,
      "step": 110980
    },
    {
      "epoch": 0.3889922790342309,
      "grad_norm": 2.5625,
      "learning_rate": 4.835211622286087e-05,
      "loss": 0.9413,
      "step": 110990
    },
    {
      "epoch": 0.3890273265411265,
      "grad_norm": 3.125,
      "learning_rate": 4.8351467194197166e-05,
      "loss": 0.9474,
      "step": 111000
    },
    {
      "epoch": 0.3890623740480221,
      "grad_norm": 3.09375,
      "learning_rate": 4.835081816553346e-05,
      "loss": 0.9079,
      "step": 111010
    },
    {
      "epoch": 0.38909742155491767,
      "grad_norm": 3.109375,
      "learning_rate": 4.835016913686976e-05,
      "loss": 0.9609,
      "step": 111020
    },
    {
      "epoch": 0.3891324690618133,
      "grad_norm": 3.34375,
      "learning_rate": 4.834952010820606e-05,
      "loss": 0.9083,
      "step": 111030
    },
    {
      "epoch": 0.3891675165687089,
      "grad_norm": 3.34375,
      "learning_rate": 4.834887107954236e-05,
      "loss": 0.9322,
      "step": 111040
    },
    {
      "epoch": 0.3892025640756045,
      "grad_norm": 3.640625,
      "learning_rate": 4.834822205087865e-05,
      "loss": 0.9845,
      "step": 111050
    },
    {
      "epoch": 0.3892376115825001,
      "grad_norm": 3.234375,
      "learning_rate": 4.8347573022214954e-05,
      "loss": 0.967,
      "step": 111060
    },
    {
      "epoch": 0.3892726590893957,
      "grad_norm": 2.921875,
      "learning_rate": 4.834692399355125e-05,
      "loss": 0.9437,
      "step": 111070
    },
    {
      "epoch": 0.38930770659629127,
      "grad_norm": 3.21875,
      "learning_rate": 4.834627496488755e-05,
      "loss": 0.9764,
      "step": 111080
    },
    {
      "epoch": 0.38934275410318686,
      "grad_norm": 2.734375,
      "learning_rate": 4.834562593622385e-05,
      "loss": 0.8446,
      "step": 111090
    },
    {
      "epoch": 0.38937780161008245,
      "grad_norm": 2.984375,
      "learning_rate": 4.8344976907560146e-05,
      "loss": 0.9826,
      "step": 111100
    },
    {
      "epoch": 0.38941284911697804,
      "grad_norm": 3.109375,
      "learning_rate": 4.834432787889645e-05,
      "loss": 1.0271,
      "step": 111110
    },
    {
      "epoch": 0.38944789662387364,
      "grad_norm": 3.125,
      "learning_rate": 4.834367885023274e-05,
      "loss": 0.9548,
      "step": 111120
    },
    {
      "epoch": 0.3894829441307693,
      "grad_norm": 3.1875,
      "learning_rate": 4.8343029821569043e-05,
      "loss": 0.9045,
      "step": 111130
    },
    {
      "epoch": 0.3895179916376649,
      "grad_norm": 2.921875,
      "learning_rate": 4.834238079290534e-05,
      "loss": 0.9634,
      "step": 111140
    },
    {
      "epoch": 0.38955303914456046,
      "grad_norm": 3.015625,
      "learning_rate": 4.834173176424164e-05,
      "loss": 0.9655,
      "step": 111150
    },
    {
      "epoch": 0.38958808665145606,
      "grad_norm": 3.09375,
      "learning_rate": 4.8341082735577934e-05,
      "loss": 0.964,
      "step": 111160
    },
    {
      "epoch": 0.38962313415835165,
      "grad_norm": 3.125,
      "learning_rate": 4.8340433706914235e-05,
      "loss": 1.011,
      "step": 111170
    },
    {
      "epoch": 0.38965818166524724,
      "grad_norm": 3.234375,
      "learning_rate": 4.833978467825053e-05,
      "loss": 0.9183,
      "step": 111180
    },
    {
      "epoch": 0.38969322917214283,
      "grad_norm": 3.21875,
      "learning_rate": 4.833913564958683e-05,
      "loss": 0.9492,
      "step": 111190
    },
    {
      "epoch": 0.3897282766790384,
      "grad_norm": 3.09375,
      "learning_rate": 4.8338486620923126e-05,
      "loss": 0.9253,
      "step": 111200
    },
    {
      "epoch": 0.389763324185934,
      "grad_norm": 3.390625,
      "learning_rate": 4.833783759225943e-05,
      "loss": 1.0443,
      "step": 111210
    },
    {
      "epoch": 0.3897983716928296,
      "grad_norm": 3.515625,
      "learning_rate": 4.833718856359573e-05,
      "loss": 0.9603,
      "step": 111220
    },
    {
      "epoch": 0.38983341919972525,
      "grad_norm": 3.40625,
      "learning_rate": 4.8336539534932023e-05,
      "loss": 0.9549,
      "step": 111230
    },
    {
      "epoch": 0.38986846670662084,
      "grad_norm": 3.34375,
      "learning_rate": 4.8335890506268325e-05,
      "loss": 0.9309,
      "step": 111240
    },
    {
      "epoch": 0.38990351421351643,
      "grad_norm": 3.078125,
      "learning_rate": 4.833524147760462e-05,
      "loss": 0.9901,
      "step": 111250
    },
    {
      "epoch": 0.389938561720412,
      "grad_norm": 2.53125,
      "learning_rate": 4.833459244894092e-05,
      "loss": 0.9447,
      "step": 111260
    },
    {
      "epoch": 0.3899736092273076,
      "grad_norm": 3.0,
      "learning_rate": 4.8333943420277215e-05,
      "loss": 0.9325,
      "step": 111270
    },
    {
      "epoch": 0.3900086567342032,
      "grad_norm": 3.28125,
      "learning_rate": 4.833329439161352e-05,
      "loss": 1.0483,
      "step": 111280
    },
    {
      "epoch": 0.3900437042410988,
      "grad_norm": 3.734375,
      "learning_rate": 4.833264536294981e-05,
      "loss": 0.9773,
      "step": 111290
    },
    {
      "epoch": 0.3900787517479944,
      "grad_norm": 3.125,
      "learning_rate": 4.833199633428611e-05,
      "loss": 0.9742,
      "step": 111300
    },
    {
      "epoch": 0.39011379925489,
      "grad_norm": 2.921875,
      "learning_rate": 4.833134730562241e-05,
      "loss": 0.8703,
      "step": 111310
    },
    {
      "epoch": 0.3901488467617856,
      "grad_norm": 3.25,
      "learning_rate": 4.833069827695871e-05,
      "loss": 1.0598,
      "step": 111320
    },
    {
      "epoch": 0.3901838942686812,
      "grad_norm": 3.078125,
      "learning_rate": 4.8330049248295003e-05,
      "loss": 0.9829,
      "step": 111330
    },
    {
      "epoch": 0.3902189417755768,
      "grad_norm": 3.125,
      "learning_rate": 4.8329400219631305e-05,
      "loss": 0.9944,
      "step": 111340
    },
    {
      "epoch": 0.3902539892824724,
      "grad_norm": 3.3125,
      "learning_rate": 4.83287511909676e-05,
      "loss": 0.9496,
      "step": 111350
    },
    {
      "epoch": 0.390289036789368,
      "grad_norm": 3.03125,
      "learning_rate": 4.83281021623039e-05,
      "loss": 0.9089,
      "step": 111360
    },
    {
      "epoch": 0.3903240842962636,
      "grad_norm": 3.3125,
      "learning_rate": 4.8327453133640195e-05,
      "loss": 1.0606,
      "step": 111370
    },
    {
      "epoch": 0.3903591318031592,
      "grad_norm": 2.609375,
      "learning_rate": 4.832680410497649e-05,
      "loss": 0.8762,
      "step": 111380
    },
    {
      "epoch": 0.39039417931005477,
      "grad_norm": 3.078125,
      "learning_rate": 4.832615507631279e-05,
      "loss": 1.0119,
      "step": 111390
    },
    {
      "epoch": 0.39042922681695036,
      "grad_norm": 2.734375,
      "learning_rate": 4.8325506047649086e-05,
      "loss": 0.8867,
      "step": 111400
    },
    {
      "epoch": 0.39046427432384595,
      "grad_norm": 3.171875,
      "learning_rate": 4.832485701898539e-05,
      "loss": 0.8977,
      "step": 111410
    },
    {
      "epoch": 0.3904993218307416,
      "grad_norm": 2.625,
      "learning_rate": 4.832420799032168e-05,
      "loss": 0.9202,
      "step": 111420
    },
    {
      "epoch": 0.3905343693376372,
      "grad_norm": 3.640625,
      "learning_rate": 4.8323558961657983e-05,
      "loss": 1.0294,
      "step": 111430
    },
    {
      "epoch": 0.3905694168445328,
      "grad_norm": 2.6875,
      "learning_rate": 4.832290993299428e-05,
      "loss": 0.9871,
      "step": 111440
    },
    {
      "epoch": 0.39060446435142837,
      "grad_norm": 3.09375,
      "learning_rate": 4.832226090433058e-05,
      "loss": 1.0092,
      "step": 111450
    },
    {
      "epoch": 0.39063951185832396,
      "grad_norm": 2.890625,
      "learning_rate": 4.832161187566688e-05,
      "loss": 0.9462,
      "step": 111460
    },
    {
      "epoch": 0.39067455936521955,
      "grad_norm": 2.8125,
      "learning_rate": 4.8320962847003175e-05,
      "loss": 0.9043,
      "step": 111470
    },
    {
      "epoch": 0.39070960687211514,
      "grad_norm": 3.046875,
      "learning_rate": 4.832031381833948e-05,
      "loss": 0.9664,
      "step": 111480
    },
    {
      "epoch": 0.39074465437901074,
      "grad_norm": 3.015625,
      "learning_rate": 4.831966478967577e-05,
      "loss": 0.9588,
      "step": 111490
    },
    {
      "epoch": 0.3907797018859063,
      "grad_norm": 3.421875,
      "learning_rate": 4.831901576101207e-05,
      "loss": 1.0271,
      "step": 111500
    },
    {
      "epoch": 0.3908147493928019,
      "grad_norm": 3.34375,
      "learning_rate": 4.831836673234837e-05,
      "loss": 1.0165,
      "step": 111510
    },
    {
      "epoch": 0.39084979689969757,
      "grad_norm": 3.046875,
      "learning_rate": 4.831771770368467e-05,
      "loss": 0.9632,
      "step": 111520
    },
    {
      "epoch": 0.39088484440659316,
      "grad_norm": 3.296875,
      "learning_rate": 4.8317068675020963e-05,
      "loss": 0.9527,
      "step": 111530
    },
    {
      "epoch": 0.39091989191348875,
      "grad_norm": 2.9375,
      "learning_rate": 4.8316419646357265e-05,
      "loss": 0.9944,
      "step": 111540
    },
    {
      "epoch": 0.39095493942038434,
      "grad_norm": 2.875,
      "learning_rate": 4.831577061769356e-05,
      "loss": 0.9404,
      "step": 111550
    },
    {
      "epoch": 0.39098998692727993,
      "grad_norm": 3.109375,
      "learning_rate": 4.831512158902986e-05,
      "loss": 0.9568,
      "step": 111560
    },
    {
      "epoch": 0.3910250344341755,
      "grad_norm": 3.03125,
      "learning_rate": 4.8314472560366155e-05,
      "loss": 1.0029,
      "step": 111570
    },
    {
      "epoch": 0.3910600819410711,
      "grad_norm": 2.515625,
      "learning_rate": 4.831382353170246e-05,
      "loss": 1.0468,
      "step": 111580
    },
    {
      "epoch": 0.3910951294479667,
      "grad_norm": 2.859375,
      "learning_rate": 4.831317450303876e-05,
      "loss": 0.9098,
      "step": 111590
    },
    {
      "epoch": 0.3911301769548623,
      "grad_norm": 3.140625,
      "learning_rate": 4.831252547437505e-05,
      "loss": 0.8733,
      "step": 111600
    },
    {
      "epoch": 0.3911652244617579,
      "grad_norm": 3.421875,
      "learning_rate": 4.8311876445711354e-05,
      "loss": 0.9033,
      "step": 111610
    },
    {
      "epoch": 0.39120027196865353,
      "grad_norm": 2.640625,
      "learning_rate": 4.831122741704765e-05,
      "loss": 0.8692,
      "step": 111620
    },
    {
      "epoch": 0.3912353194755491,
      "grad_norm": 3.28125,
      "learning_rate": 4.831057838838395e-05,
      "loss": 0.9898,
      "step": 111630
    },
    {
      "epoch": 0.3912703669824447,
      "grad_norm": 3.0625,
      "learning_rate": 4.8309929359720245e-05,
      "loss": 0.9461,
      "step": 111640
    },
    {
      "epoch": 0.3913054144893403,
      "grad_norm": 3.125,
      "learning_rate": 4.8309280331056546e-05,
      "loss": 0.9284,
      "step": 111650
    },
    {
      "epoch": 0.3913404619962359,
      "grad_norm": 2.875,
      "learning_rate": 4.830863130239284e-05,
      "loss": 0.9682,
      "step": 111660
    },
    {
      "epoch": 0.3913755095031315,
      "grad_norm": 3.046875,
      "learning_rate": 4.830798227372914e-05,
      "loss": 0.8965,
      "step": 111670
    },
    {
      "epoch": 0.3914105570100271,
      "grad_norm": 3.0,
      "learning_rate": 4.830733324506544e-05,
      "loss": 0.9226,
      "step": 111680
    },
    {
      "epoch": 0.3914456045169227,
      "grad_norm": 2.890625,
      "learning_rate": 4.830668421640174e-05,
      "loss": 0.9286,
      "step": 111690
    },
    {
      "epoch": 0.39148065202381827,
      "grad_norm": 3.625,
      "learning_rate": 4.830603518773803e-05,
      "loss": 1.0015,
      "step": 111700
    },
    {
      "epoch": 0.39151569953071386,
      "grad_norm": 3.28125,
      "learning_rate": 4.8305386159074334e-05,
      "loss": 0.9331,
      "step": 111710
    },
    {
      "epoch": 0.3915507470376095,
      "grad_norm": 2.9375,
      "learning_rate": 4.830473713041063e-05,
      "loss": 0.9909,
      "step": 111720
    },
    {
      "epoch": 0.3915857945445051,
      "grad_norm": 2.890625,
      "learning_rate": 4.830408810174693e-05,
      "loss": 0.9472,
      "step": 111730
    },
    {
      "epoch": 0.3916208420514007,
      "grad_norm": 3.359375,
      "learning_rate": 4.830343907308323e-05,
      "loss": 1.0557,
      "step": 111740
    },
    {
      "epoch": 0.3916558895582963,
      "grad_norm": 3.359375,
      "learning_rate": 4.830279004441952e-05,
      "loss": 0.9308,
      "step": 111750
    },
    {
      "epoch": 0.39169093706519187,
      "grad_norm": 3.234375,
      "learning_rate": 4.830214101575582e-05,
      "loss": 0.8913,
      "step": 111760
    },
    {
      "epoch": 0.39172598457208746,
      "grad_norm": 3.109375,
      "learning_rate": 4.8301491987092115e-05,
      "loss": 0.9744,
      "step": 111770
    },
    {
      "epoch": 0.39176103207898305,
      "grad_norm": 2.578125,
      "learning_rate": 4.830084295842842e-05,
      "loss": 0.8952,
      "step": 111780
    },
    {
      "epoch": 0.39179607958587864,
      "grad_norm": 3.21875,
      "learning_rate": 4.830019392976471e-05,
      "loss": 0.9169,
      "step": 111790
    },
    {
      "epoch": 0.39183112709277423,
      "grad_norm": 3.203125,
      "learning_rate": 4.829954490110101e-05,
      "loss": 0.9398,
      "step": 111800
    },
    {
      "epoch": 0.3918661745996698,
      "grad_norm": 3.109375,
      "learning_rate": 4.829889587243731e-05,
      "loss": 0.9247,
      "step": 111810
    },
    {
      "epoch": 0.39190122210656547,
      "grad_norm": 2.734375,
      "learning_rate": 4.829824684377361e-05,
      "loss": 0.9445,
      "step": 111820
    },
    {
      "epoch": 0.39193626961346106,
      "grad_norm": 2.875,
      "learning_rate": 4.829759781510991e-05,
      "loss": 1.0442,
      "step": 111830
    },
    {
      "epoch": 0.39197131712035665,
      "grad_norm": 2.65625,
      "learning_rate": 4.8296948786446205e-05,
      "loss": 0.9981,
      "step": 111840
    },
    {
      "epoch": 0.39200636462725225,
      "grad_norm": 2.890625,
      "learning_rate": 4.8296299757782506e-05,
      "loss": 0.8665,
      "step": 111850
    },
    {
      "epoch": 0.39204141213414784,
      "grad_norm": 2.75,
      "learning_rate": 4.82956507291188e-05,
      "loss": 0.9534,
      "step": 111860
    },
    {
      "epoch": 0.39207645964104343,
      "grad_norm": 3.265625,
      "learning_rate": 4.82950017004551e-05,
      "loss": 0.8917,
      "step": 111870
    },
    {
      "epoch": 0.392111507147939,
      "grad_norm": 2.734375,
      "learning_rate": 4.82943526717914e-05,
      "loss": 0.9545,
      "step": 111880
    },
    {
      "epoch": 0.3921465546548346,
      "grad_norm": 2.9375,
      "learning_rate": 4.82937036431277e-05,
      "loss": 0.9825,
      "step": 111890
    },
    {
      "epoch": 0.3921816021617302,
      "grad_norm": 3.125,
      "learning_rate": 4.829305461446399e-05,
      "loss": 0.9968,
      "step": 111900
    },
    {
      "epoch": 0.39221664966862585,
      "grad_norm": 3.125,
      "learning_rate": 4.8292405585800294e-05,
      "loss": 0.9478,
      "step": 111910
    },
    {
      "epoch": 0.39225169717552144,
      "grad_norm": 3.453125,
      "learning_rate": 4.829175655713659e-05,
      "loss": 0.999,
      "step": 111920
    },
    {
      "epoch": 0.39228674468241703,
      "grad_norm": 3.125,
      "learning_rate": 4.829110752847289e-05,
      "loss": 1.0206,
      "step": 111930
    },
    {
      "epoch": 0.3923217921893126,
      "grad_norm": 2.953125,
      "learning_rate": 4.8290458499809185e-05,
      "loss": 0.984,
      "step": 111940
    },
    {
      "epoch": 0.3923568396962082,
      "grad_norm": 3.296875,
      "learning_rate": 4.8289809471145486e-05,
      "loss": 0.997,
      "step": 111950
    },
    {
      "epoch": 0.3923918872031038,
      "grad_norm": 2.625,
      "learning_rate": 4.828916044248179e-05,
      "loss": 0.9909,
      "step": 111960
    },
    {
      "epoch": 0.3924269347099994,
      "grad_norm": 2.734375,
      "learning_rate": 4.828851141381808e-05,
      "loss": 0.9309,
      "step": 111970
    },
    {
      "epoch": 0.392461982216895,
      "grad_norm": 3.390625,
      "learning_rate": 4.8287862385154384e-05,
      "loss": 1.0716,
      "step": 111980
    },
    {
      "epoch": 0.3924970297237906,
      "grad_norm": 2.625,
      "learning_rate": 4.828721335649068e-05,
      "loss": 0.9053,
      "step": 111990
    },
    {
      "epoch": 0.39253207723068617,
      "grad_norm": 2.671875,
      "learning_rate": 4.828656432782698e-05,
      "loss": 1.0053,
      "step": 112000
    },
    {
      "epoch": 0.3925671247375818,
      "grad_norm": 3.078125,
      "learning_rate": 4.8285915299163274e-05,
      "loss": 0.8439,
      "step": 112010
    },
    {
      "epoch": 0.3926021722444774,
      "grad_norm": 2.8125,
      "learning_rate": 4.8285266270499576e-05,
      "loss": 0.965,
      "step": 112020
    },
    {
      "epoch": 0.392637219751373,
      "grad_norm": 3.203125,
      "learning_rate": 4.828461724183587e-05,
      "loss": 0.9997,
      "step": 112030
    },
    {
      "epoch": 0.3926722672582686,
      "grad_norm": 2.828125,
      "learning_rate": 4.828396821317217e-05,
      "loss": 1.0141,
      "step": 112040
    },
    {
      "epoch": 0.3927073147651642,
      "grad_norm": 3.5625,
      "learning_rate": 4.8283319184508466e-05,
      "loss": 1.026,
      "step": 112050
    },
    {
      "epoch": 0.3927423622720598,
      "grad_norm": 3.25,
      "learning_rate": 4.828267015584477e-05,
      "loss": 0.9174,
      "step": 112060
    },
    {
      "epoch": 0.39277740977895537,
      "grad_norm": 2.90625,
      "learning_rate": 4.828202112718106e-05,
      "loss": 0.9661,
      "step": 112070
    },
    {
      "epoch": 0.39281245728585096,
      "grad_norm": 2.921875,
      "learning_rate": 4.8281372098517364e-05,
      "loss": 0.9749,
      "step": 112080
    },
    {
      "epoch": 0.39284750479274655,
      "grad_norm": 3.09375,
      "learning_rate": 4.8280723069853665e-05,
      "loss": 0.9166,
      "step": 112090
    },
    {
      "epoch": 0.39288255229964214,
      "grad_norm": 2.75,
      "learning_rate": 4.828007404118996e-05,
      "loss": 0.9428,
      "step": 112100
    },
    {
      "epoch": 0.3929175998065378,
      "grad_norm": 3.0625,
      "learning_rate": 4.827942501252626e-05,
      "loss": 0.9169,
      "step": 112110
    },
    {
      "epoch": 0.3929526473134334,
      "grad_norm": 3.328125,
      "learning_rate": 4.827877598386255e-05,
      "loss": 1.0262,
      "step": 112120
    },
    {
      "epoch": 0.39298769482032897,
      "grad_norm": 2.890625,
      "learning_rate": 4.827812695519885e-05,
      "loss": 0.9326,
      "step": 112130
    },
    {
      "epoch": 0.39302274232722456,
      "grad_norm": 2.703125,
      "learning_rate": 4.8277477926535145e-05,
      "loss": 0.9967,
      "step": 112140
    },
    {
      "epoch": 0.39305778983412015,
      "grad_norm": 2.625,
      "learning_rate": 4.8276828897871446e-05,
      "loss": 0.8862,
      "step": 112150
    },
    {
      "epoch": 0.39309283734101574,
      "grad_norm": 2.734375,
      "learning_rate": 4.827617986920774e-05,
      "loss": 0.9544,
      "step": 112160
    },
    {
      "epoch": 0.39312788484791134,
      "grad_norm": 3.21875,
      "learning_rate": 4.827553084054404e-05,
      "loss": 0.988,
      "step": 112170
    },
    {
      "epoch": 0.3931629323548069,
      "grad_norm": 3.078125,
      "learning_rate": 4.8274881811880344e-05,
      "loss": 1.0086,
      "step": 112180
    },
    {
      "epoch": 0.3931979798617025,
      "grad_norm": 3.0,
      "learning_rate": 4.827423278321664e-05,
      "loss": 0.9153,
      "step": 112190
    },
    {
      "epoch": 0.3932330273685981,
      "grad_norm": 2.96875,
      "learning_rate": 4.827358375455294e-05,
      "loss": 0.9834,
      "step": 112200
    },
    {
      "epoch": 0.39326807487549376,
      "grad_norm": 3.4375,
      "learning_rate": 4.8272934725889234e-05,
      "loss": 0.9715,
      "step": 112210
    },
    {
      "epoch": 0.39330312238238935,
      "grad_norm": 2.859375,
      "learning_rate": 4.8272285697225536e-05,
      "loss": 1.0216,
      "step": 112220
    },
    {
      "epoch": 0.39333816988928494,
      "grad_norm": 2.59375,
      "learning_rate": 4.827163666856183e-05,
      "loss": 0.8771,
      "step": 112230
    },
    {
      "epoch": 0.39337321739618053,
      "grad_norm": 3.359375,
      "learning_rate": 4.827098763989813e-05,
      "loss": 0.9552,
      "step": 112240
    },
    {
      "epoch": 0.3934082649030761,
      "grad_norm": 2.875,
      "learning_rate": 4.8270338611234426e-05,
      "loss": 0.9904,
      "step": 112250
    },
    {
      "epoch": 0.3934433124099717,
      "grad_norm": 3.078125,
      "learning_rate": 4.826968958257073e-05,
      "loss": 0.951,
      "step": 112260
    },
    {
      "epoch": 0.3934783599168673,
      "grad_norm": 3.015625,
      "learning_rate": 4.826904055390702e-05,
      "loss": 0.9631,
      "step": 112270
    },
    {
      "epoch": 0.3935134074237629,
      "grad_norm": 2.640625,
      "learning_rate": 4.8268391525243324e-05,
      "loss": 0.9263,
      "step": 112280
    },
    {
      "epoch": 0.3935484549306585,
      "grad_norm": 3.125,
      "learning_rate": 4.826774249657962e-05,
      "loss": 0.9345,
      "step": 112290
    },
    {
      "epoch": 0.3935835024375541,
      "grad_norm": 2.765625,
      "learning_rate": 4.826709346791592e-05,
      "loss": 0.9559,
      "step": 112300
    },
    {
      "epoch": 0.3936185499444497,
      "grad_norm": 2.984375,
      "learning_rate": 4.8266444439252214e-05,
      "loss": 0.9602,
      "step": 112310
    },
    {
      "epoch": 0.3936535974513453,
      "grad_norm": 3.0,
      "learning_rate": 4.8265795410588516e-05,
      "loss": 1.0368,
      "step": 112320
    },
    {
      "epoch": 0.3936886449582409,
      "grad_norm": 3.0,
      "learning_rate": 4.826514638192482e-05,
      "loss": 0.9412,
      "step": 112330
    },
    {
      "epoch": 0.3937236924651365,
      "grad_norm": 3.28125,
      "learning_rate": 4.826449735326111e-05,
      "loss": 0.9204,
      "step": 112340
    },
    {
      "epoch": 0.3937587399720321,
      "grad_norm": 3.390625,
      "learning_rate": 4.826384832459741e-05,
      "loss": 0.8943,
      "step": 112350
    },
    {
      "epoch": 0.3937937874789277,
      "grad_norm": 3.046875,
      "learning_rate": 4.826319929593371e-05,
      "loss": 0.9839,
      "step": 112360
    },
    {
      "epoch": 0.3938288349858233,
      "grad_norm": 3.0,
      "learning_rate": 4.826255026727001e-05,
      "loss": 0.9311,
      "step": 112370
    },
    {
      "epoch": 0.39386388249271886,
      "grad_norm": 3.3125,
      "learning_rate": 4.8261901238606304e-05,
      "loss": 0.8925,
      "step": 112380
    },
    {
      "epoch": 0.39389892999961446,
      "grad_norm": 2.953125,
      "learning_rate": 4.8261252209942605e-05,
      "loss": 0.9538,
      "step": 112390
    },
    {
      "epoch": 0.39393397750651005,
      "grad_norm": 3.140625,
      "learning_rate": 4.82606031812789e-05,
      "loss": 0.9739,
      "step": 112400
    },
    {
      "epoch": 0.3939690250134057,
      "grad_norm": 3.0,
      "learning_rate": 4.82599541526152e-05,
      "loss": 0.9946,
      "step": 112410
    },
    {
      "epoch": 0.3940040725203013,
      "grad_norm": 3.234375,
      "learning_rate": 4.8259305123951496e-05,
      "loss": 0.9124,
      "step": 112420
    },
    {
      "epoch": 0.3940391200271969,
      "grad_norm": 3.03125,
      "learning_rate": 4.82586560952878e-05,
      "loss": 0.9086,
      "step": 112430
    },
    {
      "epoch": 0.39407416753409247,
      "grad_norm": 3.0,
      "learning_rate": 4.825800706662409e-05,
      "loss": 0.999,
      "step": 112440
    },
    {
      "epoch": 0.39410921504098806,
      "grad_norm": 2.984375,
      "learning_rate": 4.825735803796039e-05,
      "loss": 0.9648,
      "step": 112450
    },
    {
      "epoch": 0.39414426254788365,
      "grad_norm": 3.203125,
      "learning_rate": 4.8256709009296694e-05,
      "loss": 0.988,
      "step": 112460
    },
    {
      "epoch": 0.39417931005477924,
      "grad_norm": 3.515625,
      "learning_rate": 4.825605998063299e-05,
      "loss": 1.0212,
      "step": 112470
    },
    {
      "epoch": 0.39421435756167483,
      "grad_norm": 2.8125,
      "learning_rate": 4.825541095196929e-05,
      "loss": 0.9235,
      "step": 112480
    },
    {
      "epoch": 0.3942494050685704,
      "grad_norm": 3.109375,
      "learning_rate": 4.8254761923305585e-05,
      "loss": 0.9113,
      "step": 112490
    },
    {
      "epoch": 0.39428445257546607,
      "grad_norm": 3.171875,
      "learning_rate": 4.825411289464188e-05,
      "loss": 0.9911,
      "step": 112500
    },
    {
      "epoch": 0.39431950008236166,
      "grad_norm": 2.9375,
      "learning_rate": 4.8253463865978174e-05,
      "loss": 0.9193,
      "step": 112510
    },
    {
      "epoch": 0.39435454758925725,
      "grad_norm": 3.453125,
      "learning_rate": 4.8252814837314476e-05,
      "loss": 1.0162,
      "step": 112520
    },
    {
      "epoch": 0.39438959509615285,
      "grad_norm": 2.6875,
      "learning_rate": 4.825216580865077e-05,
      "loss": 0.8852,
      "step": 112530
    },
    {
      "epoch": 0.39442464260304844,
      "grad_norm": 2.53125,
      "learning_rate": 4.825151677998707e-05,
      "loss": 0.8874,
      "step": 112540
    },
    {
      "epoch": 0.39445969010994403,
      "grad_norm": 3.09375,
      "learning_rate": 4.825086775132337e-05,
      "loss": 0.9498,
      "step": 112550
    },
    {
      "epoch": 0.3944947376168396,
      "grad_norm": 3.015625,
      "learning_rate": 4.825021872265967e-05,
      "loss": 0.9513,
      "step": 112560
    },
    {
      "epoch": 0.3945297851237352,
      "grad_norm": 3.078125,
      "learning_rate": 4.824956969399597e-05,
      "loss": 0.9307,
      "step": 112570
    },
    {
      "epoch": 0.3945648326306308,
      "grad_norm": 3.1875,
      "learning_rate": 4.8248920665332264e-05,
      "loss": 1.0408,
      "step": 112580
    },
    {
      "epoch": 0.3945998801375264,
      "grad_norm": 3.0,
      "learning_rate": 4.8248271636668565e-05,
      "loss": 0.9603,
      "step": 112590
    },
    {
      "epoch": 0.39463492764442204,
      "grad_norm": 2.984375,
      "learning_rate": 4.824762260800486e-05,
      "loss": 0.9586,
      "step": 112600
    },
    {
      "epoch": 0.39466997515131763,
      "grad_norm": 3.15625,
      "learning_rate": 4.824697357934116e-05,
      "loss": 1.0127,
      "step": 112610
    },
    {
      "epoch": 0.3947050226582132,
      "grad_norm": 3.03125,
      "learning_rate": 4.8246324550677456e-05,
      "loss": 0.9598,
      "step": 112620
    },
    {
      "epoch": 0.3947400701651088,
      "grad_norm": 3.03125,
      "learning_rate": 4.824567552201376e-05,
      "loss": 0.9486,
      "step": 112630
    },
    {
      "epoch": 0.3947751176720044,
      "grad_norm": 2.796875,
      "learning_rate": 4.824502649335005e-05,
      "loss": 0.9338,
      "step": 112640
    },
    {
      "epoch": 0.3948101651789,
      "grad_norm": 3.1875,
      "learning_rate": 4.824437746468635e-05,
      "loss": 0.9133,
      "step": 112650
    },
    {
      "epoch": 0.3948452126857956,
      "grad_norm": 2.921875,
      "learning_rate": 4.824372843602265e-05,
      "loss": 1.0097,
      "step": 112660
    },
    {
      "epoch": 0.3948802601926912,
      "grad_norm": 3.0,
      "learning_rate": 4.824307940735895e-05,
      "loss": 0.9002,
      "step": 112670
    },
    {
      "epoch": 0.39491530769958677,
      "grad_norm": 2.78125,
      "learning_rate": 4.8242430378695244e-05,
      "loss": 0.958,
      "step": 112680
    },
    {
      "epoch": 0.39495035520648236,
      "grad_norm": 3.5625,
      "learning_rate": 4.8241781350031545e-05,
      "loss": 0.9202,
      "step": 112690
    },
    {
      "epoch": 0.394985402713378,
      "grad_norm": 3.265625,
      "learning_rate": 4.8241132321367846e-05,
      "loss": 0.9809,
      "step": 112700
    },
    {
      "epoch": 0.3950204502202736,
      "grad_norm": 3.125,
      "learning_rate": 4.824048329270414e-05,
      "loss": 0.9998,
      "step": 112710
    },
    {
      "epoch": 0.3950554977271692,
      "grad_norm": 3.359375,
      "learning_rate": 4.823983426404044e-05,
      "loss": 0.8937,
      "step": 112720
    },
    {
      "epoch": 0.3950905452340648,
      "grad_norm": 3.46875,
      "learning_rate": 4.823918523537674e-05,
      "loss": 1.0259,
      "step": 112730
    },
    {
      "epoch": 0.3951255927409604,
      "grad_norm": 2.734375,
      "learning_rate": 4.823853620671304e-05,
      "loss": 0.9814,
      "step": 112740
    },
    {
      "epoch": 0.39516064024785597,
      "grad_norm": 3.0625,
      "learning_rate": 4.823788717804933e-05,
      "loss": 0.9305,
      "step": 112750
    },
    {
      "epoch": 0.39519568775475156,
      "grad_norm": 2.890625,
      "learning_rate": 4.8237238149385634e-05,
      "loss": 0.9198,
      "step": 112760
    },
    {
      "epoch": 0.39523073526164715,
      "grad_norm": 2.84375,
      "learning_rate": 4.823658912072193e-05,
      "loss": 0.9403,
      "step": 112770
    },
    {
      "epoch": 0.39526578276854274,
      "grad_norm": 2.96875,
      "learning_rate": 4.823594009205823e-05,
      "loss": 1.0248,
      "step": 112780
    },
    {
      "epoch": 0.39530083027543833,
      "grad_norm": 2.828125,
      "learning_rate": 4.8235291063394525e-05,
      "loss": 0.9679,
      "step": 112790
    },
    {
      "epoch": 0.395335877782334,
      "grad_norm": 3.21875,
      "learning_rate": 4.8234642034730826e-05,
      "loss": 0.9283,
      "step": 112800
    },
    {
      "epoch": 0.39537092528922957,
      "grad_norm": 3.109375,
      "learning_rate": 4.823399300606712e-05,
      "loss": 1.0027,
      "step": 112810
    },
    {
      "epoch": 0.39540597279612516,
      "grad_norm": 2.796875,
      "learning_rate": 4.823334397740342e-05,
      "loss": 0.9521,
      "step": 112820
    },
    {
      "epoch": 0.39544102030302075,
      "grad_norm": 3.234375,
      "learning_rate": 4.8232694948739724e-05,
      "loss": 0.9483,
      "step": 112830
    },
    {
      "epoch": 0.39547606780991634,
      "grad_norm": 2.84375,
      "learning_rate": 4.823204592007602e-05,
      "loss": 0.9197,
      "step": 112840
    },
    {
      "epoch": 0.39551111531681193,
      "grad_norm": 3.375,
      "learning_rate": 4.823139689141232e-05,
      "loss": 0.9393,
      "step": 112850
    },
    {
      "epoch": 0.3955461628237075,
      "grad_norm": 2.9375,
      "learning_rate": 4.8230747862748614e-05,
      "loss": 0.8313,
      "step": 112860
    },
    {
      "epoch": 0.3955812103306031,
      "grad_norm": 3.4375,
      "learning_rate": 4.8230098834084916e-05,
      "loss": 0.9737,
      "step": 112870
    },
    {
      "epoch": 0.3956162578374987,
      "grad_norm": 3.0625,
      "learning_rate": 4.8229449805421204e-05,
      "loss": 0.9464,
      "step": 112880
    },
    {
      "epoch": 0.3956513053443943,
      "grad_norm": 3.171875,
      "learning_rate": 4.8228800776757505e-05,
      "loss": 0.9817,
      "step": 112890
    },
    {
      "epoch": 0.39568635285128995,
      "grad_norm": 3.109375,
      "learning_rate": 4.82281517480938e-05,
      "loss": 0.9558,
      "step": 112900
    },
    {
      "epoch": 0.39572140035818554,
      "grad_norm": 2.90625,
      "learning_rate": 4.82275027194301e-05,
      "loss": 1.0019,
      "step": 112910
    },
    {
      "epoch": 0.39575644786508113,
      "grad_norm": 3.4375,
      "learning_rate": 4.82268536907664e-05,
      "loss": 1.0209,
      "step": 112920
    },
    {
      "epoch": 0.3957914953719767,
      "grad_norm": 3.453125,
      "learning_rate": 4.82262046621027e-05,
      "loss": 0.9588,
      "step": 112930
    },
    {
      "epoch": 0.3958265428788723,
      "grad_norm": 3.09375,
      "learning_rate": 4.8225555633439e-05,
      "loss": 0.9309,
      "step": 112940
    },
    {
      "epoch": 0.3958615903857679,
      "grad_norm": 3.59375,
      "learning_rate": 4.822490660477529e-05,
      "loss": 0.9712,
      "step": 112950
    },
    {
      "epoch": 0.3958966378926635,
      "grad_norm": 3.25,
      "learning_rate": 4.8224257576111594e-05,
      "loss": 0.896,
      "step": 112960
    },
    {
      "epoch": 0.3959316853995591,
      "grad_norm": 3.0625,
      "learning_rate": 4.822360854744789e-05,
      "loss": 0.956,
      "step": 112970
    },
    {
      "epoch": 0.3959667329064547,
      "grad_norm": 3.171875,
      "learning_rate": 4.822295951878419e-05,
      "loss": 0.9228,
      "step": 112980
    },
    {
      "epoch": 0.3960017804133503,
      "grad_norm": 2.828125,
      "learning_rate": 4.8222310490120485e-05,
      "loss": 0.9743,
      "step": 112990
    },
    {
      "epoch": 0.3960368279202459,
      "grad_norm": 3.65625,
      "learning_rate": 4.8221661461456786e-05,
      "loss": 0.9603,
      "step": 113000
    },
    {
      "epoch": 0.3960718754271415,
      "grad_norm": 3.40625,
      "learning_rate": 4.822101243279308e-05,
      "loss": 0.9772,
      "step": 113010
    },
    {
      "epoch": 0.3961069229340371,
      "grad_norm": 3.109375,
      "learning_rate": 4.822036340412938e-05,
      "loss": 1.0204,
      "step": 113020
    },
    {
      "epoch": 0.3961419704409327,
      "grad_norm": 3.015625,
      "learning_rate": 4.821971437546568e-05,
      "loss": 0.956,
      "step": 113030
    },
    {
      "epoch": 0.3961770179478283,
      "grad_norm": 2.5625,
      "learning_rate": 4.821906534680198e-05,
      "loss": 1.0611,
      "step": 113040
    },
    {
      "epoch": 0.39621206545472387,
      "grad_norm": 5.59375,
      "learning_rate": 4.821841631813828e-05,
      "loss": 1.0791,
      "step": 113050
    },
    {
      "epoch": 0.39624711296161946,
      "grad_norm": 2.953125,
      "learning_rate": 4.8217767289474574e-05,
      "loss": 0.9285,
      "step": 113060
    },
    {
      "epoch": 0.39628216046851505,
      "grad_norm": 3.15625,
      "learning_rate": 4.8217118260810876e-05,
      "loss": 0.89,
      "step": 113070
    },
    {
      "epoch": 0.39631720797541065,
      "grad_norm": 3.03125,
      "learning_rate": 4.821646923214717e-05,
      "loss": 1.0694,
      "step": 113080
    },
    {
      "epoch": 0.3963522554823063,
      "grad_norm": 3.5,
      "learning_rate": 4.821582020348347e-05,
      "loss": 0.9873,
      "step": 113090
    },
    {
      "epoch": 0.3963873029892019,
      "grad_norm": 3.375,
      "learning_rate": 4.8215171174819766e-05,
      "loss": 0.9522,
      "step": 113100
    },
    {
      "epoch": 0.3964223504960975,
      "grad_norm": 2.921875,
      "learning_rate": 4.821452214615607e-05,
      "loss": 0.8944,
      "step": 113110
    },
    {
      "epoch": 0.39645739800299307,
      "grad_norm": 3.09375,
      "learning_rate": 4.821387311749236e-05,
      "loss": 0.9517,
      "step": 113120
    },
    {
      "epoch": 0.39649244550988866,
      "grad_norm": 2.984375,
      "learning_rate": 4.8213224088828664e-05,
      "loss": 0.868,
      "step": 113130
    },
    {
      "epoch": 0.39652749301678425,
      "grad_norm": 3.03125,
      "learning_rate": 4.821257506016496e-05,
      "loss": 0.9546,
      "step": 113140
    },
    {
      "epoch": 0.39656254052367984,
      "grad_norm": 2.984375,
      "learning_rate": 4.821192603150126e-05,
      "loss": 0.97,
      "step": 113150
    },
    {
      "epoch": 0.39659758803057543,
      "grad_norm": 3.046875,
      "learning_rate": 4.8211277002837554e-05,
      "loss": 0.9993,
      "step": 113160
    },
    {
      "epoch": 0.396632635537471,
      "grad_norm": 2.578125,
      "learning_rate": 4.8210627974173856e-05,
      "loss": 0.9152,
      "step": 113170
    },
    {
      "epoch": 0.3966676830443666,
      "grad_norm": 3.46875,
      "learning_rate": 4.820997894551015e-05,
      "loss": 0.9456,
      "step": 113180
    },
    {
      "epoch": 0.39670273055126226,
      "grad_norm": 3.34375,
      "learning_rate": 4.820932991684645e-05,
      "loss": 0.9906,
      "step": 113190
    },
    {
      "epoch": 0.39673777805815785,
      "grad_norm": 3.15625,
      "learning_rate": 4.820868088818275e-05,
      "loss": 1.0032,
      "step": 113200
    },
    {
      "epoch": 0.39677282556505344,
      "grad_norm": 3.046875,
      "learning_rate": 4.820803185951905e-05,
      "loss": 0.9637,
      "step": 113210
    },
    {
      "epoch": 0.39680787307194904,
      "grad_norm": 2.84375,
      "learning_rate": 4.820738283085535e-05,
      "loss": 0.8878,
      "step": 113220
    },
    {
      "epoch": 0.3968429205788446,
      "grad_norm": 2.90625,
      "learning_rate": 4.8206733802191644e-05,
      "loss": 0.9798,
      "step": 113230
    },
    {
      "epoch": 0.3968779680857402,
      "grad_norm": 3.4375,
      "learning_rate": 4.8206084773527945e-05,
      "loss": 1.0142,
      "step": 113240
    },
    {
      "epoch": 0.3969130155926358,
      "grad_norm": 3.203125,
      "learning_rate": 4.820543574486423e-05,
      "loss": 0.8825,
      "step": 113250
    },
    {
      "epoch": 0.3969480630995314,
      "grad_norm": 2.8125,
      "learning_rate": 4.8204786716200534e-05,
      "loss": 0.9156,
      "step": 113260
    },
    {
      "epoch": 0.396983110606427,
      "grad_norm": 2.796875,
      "learning_rate": 4.820413768753683e-05,
      "loss": 0.8793,
      "step": 113270
    },
    {
      "epoch": 0.3970181581133226,
      "grad_norm": 2.53125,
      "learning_rate": 4.820348865887313e-05,
      "loss": 0.8201,
      "step": 113280
    },
    {
      "epoch": 0.39705320562021823,
      "grad_norm": 3.3125,
      "learning_rate": 4.820283963020943e-05,
      "loss": 0.9792,
      "step": 113290
    },
    {
      "epoch": 0.3970882531271138,
      "grad_norm": 3.578125,
      "learning_rate": 4.8202190601545726e-05,
      "loss": 0.9928,
      "step": 113300
    },
    {
      "epoch": 0.3971233006340094,
      "grad_norm": 3.0,
      "learning_rate": 4.820154157288203e-05,
      "loss": 0.9251,
      "step": 113310
    },
    {
      "epoch": 0.397158348140905,
      "grad_norm": 2.890625,
      "learning_rate": 4.820089254421832e-05,
      "loss": 0.9427,
      "step": 113320
    },
    {
      "epoch": 0.3971933956478006,
      "grad_norm": 2.828125,
      "learning_rate": 4.8200243515554624e-05,
      "loss": 0.9468,
      "step": 113330
    },
    {
      "epoch": 0.3972284431546962,
      "grad_norm": 3.28125,
      "learning_rate": 4.819959448689092e-05,
      "loss": 0.9558,
      "step": 113340
    },
    {
      "epoch": 0.3972634906615918,
      "grad_norm": 3.0625,
      "learning_rate": 4.819894545822722e-05,
      "loss": 1.0142,
      "step": 113350
    },
    {
      "epoch": 0.39729853816848737,
      "grad_norm": 3.0625,
      "learning_rate": 4.8198296429563514e-05,
      "loss": 1.0049,
      "step": 113360
    },
    {
      "epoch": 0.39733358567538296,
      "grad_norm": 3.5,
      "learning_rate": 4.8197647400899816e-05,
      "loss": 1.0219,
      "step": 113370
    },
    {
      "epoch": 0.39736863318227855,
      "grad_norm": 3.25,
      "learning_rate": 4.819699837223611e-05,
      "loss": 0.9734,
      "step": 113380
    },
    {
      "epoch": 0.3974036806891742,
      "grad_norm": 2.984375,
      "learning_rate": 4.819634934357241e-05,
      "loss": 0.8754,
      "step": 113390
    },
    {
      "epoch": 0.3974387281960698,
      "grad_norm": 2.65625,
      "learning_rate": 4.8195700314908706e-05,
      "loss": 0.8386,
      "step": 113400
    },
    {
      "epoch": 0.3974737757029654,
      "grad_norm": 2.90625,
      "learning_rate": 4.819505128624501e-05,
      "loss": 0.9155,
      "step": 113410
    },
    {
      "epoch": 0.397508823209861,
      "grad_norm": 3.34375,
      "learning_rate": 4.819440225758131e-05,
      "loss": 0.9126,
      "step": 113420
    },
    {
      "epoch": 0.39754387071675656,
      "grad_norm": 3.28125,
      "learning_rate": 4.8193753228917604e-05,
      "loss": 0.8178,
      "step": 113430
    },
    {
      "epoch": 0.39757891822365216,
      "grad_norm": 3.09375,
      "learning_rate": 4.8193104200253905e-05,
      "loss": 1.0168,
      "step": 113440
    },
    {
      "epoch": 0.39761396573054775,
      "grad_norm": 2.90625,
      "learning_rate": 4.81924551715902e-05,
      "loss": 0.9727,
      "step": 113450
    },
    {
      "epoch": 0.39764901323744334,
      "grad_norm": 2.90625,
      "learning_rate": 4.81918061429265e-05,
      "loss": 0.9188,
      "step": 113460
    },
    {
      "epoch": 0.39768406074433893,
      "grad_norm": 2.890625,
      "learning_rate": 4.8191157114262796e-05,
      "loss": 0.9708,
      "step": 113470
    },
    {
      "epoch": 0.3977191082512345,
      "grad_norm": 3.078125,
      "learning_rate": 4.81905080855991e-05,
      "loss": 0.9451,
      "step": 113480
    },
    {
      "epoch": 0.39775415575813017,
      "grad_norm": 2.890625,
      "learning_rate": 4.818985905693539e-05,
      "loss": 0.958,
      "step": 113490
    },
    {
      "epoch": 0.39778920326502576,
      "grad_norm": 3.140625,
      "learning_rate": 4.818921002827169e-05,
      "loss": 0.9875,
      "step": 113500
    },
    {
      "epoch": 0.39782425077192135,
      "grad_norm": 3.3125,
      "learning_rate": 4.818856099960799e-05,
      "loss": 0.9405,
      "step": 113510
    },
    {
      "epoch": 0.39785929827881694,
      "grad_norm": 3.453125,
      "learning_rate": 4.818791197094429e-05,
      "loss": 0.9772,
      "step": 113520
    },
    {
      "epoch": 0.39789434578571253,
      "grad_norm": 2.75,
      "learning_rate": 4.8187262942280584e-05,
      "loss": 0.9869,
      "step": 113530
    },
    {
      "epoch": 0.3979293932926081,
      "grad_norm": 3.21875,
      "learning_rate": 4.8186613913616885e-05,
      "loss": 0.944,
      "step": 113540
    },
    {
      "epoch": 0.3979644407995037,
      "grad_norm": 3.0,
      "learning_rate": 4.818596488495318e-05,
      "loss": 0.9886,
      "step": 113550
    },
    {
      "epoch": 0.3979994883063993,
      "grad_norm": 3.09375,
      "learning_rate": 4.818531585628948e-05,
      "loss": 0.9696,
      "step": 113560
    },
    {
      "epoch": 0.3980345358132949,
      "grad_norm": 2.625,
      "learning_rate": 4.818466682762578e-05,
      "loss": 0.9573,
      "step": 113570
    },
    {
      "epoch": 0.39806958332019055,
      "grad_norm": 2.796875,
      "learning_rate": 4.818401779896208e-05,
      "loss": 0.9156,
      "step": 113580
    },
    {
      "epoch": 0.39810463082708614,
      "grad_norm": 2.984375,
      "learning_rate": 4.818336877029838e-05,
      "loss": 0.9689,
      "step": 113590
    },
    {
      "epoch": 0.39813967833398173,
      "grad_norm": 3.15625,
      "learning_rate": 4.818271974163467e-05,
      "loss": 0.9386,
      "step": 113600
    },
    {
      "epoch": 0.3981747258408773,
      "grad_norm": 3.453125,
      "learning_rate": 4.8182070712970975e-05,
      "loss": 0.9945,
      "step": 113610
    },
    {
      "epoch": 0.3982097733477729,
      "grad_norm": 3.125,
      "learning_rate": 4.818142168430727e-05,
      "loss": 0.9323,
      "step": 113620
    },
    {
      "epoch": 0.3982448208546685,
      "grad_norm": 2.90625,
      "learning_rate": 4.8180772655643564e-05,
      "loss": 0.9494,
      "step": 113630
    },
    {
      "epoch": 0.3982798683615641,
      "grad_norm": 3.421875,
      "learning_rate": 4.818012362697986e-05,
      "loss": 1.0197,
      "step": 113640
    },
    {
      "epoch": 0.3983149158684597,
      "grad_norm": 2.6875,
      "learning_rate": 4.817947459831616e-05,
      "loss": 0.9095,
      "step": 113650
    },
    {
      "epoch": 0.3983499633753553,
      "grad_norm": 2.875,
      "learning_rate": 4.817882556965246e-05,
      "loss": 0.9527,
      "step": 113660
    },
    {
      "epoch": 0.39838501088225087,
      "grad_norm": 3.15625,
      "learning_rate": 4.8178176540988756e-05,
      "loss": 0.9313,
      "step": 113670
    },
    {
      "epoch": 0.3984200583891465,
      "grad_norm": 3.296875,
      "learning_rate": 4.817752751232506e-05,
      "loss": 0.9522,
      "step": 113680
    },
    {
      "epoch": 0.3984551058960421,
      "grad_norm": 2.71875,
      "learning_rate": 4.817687848366135e-05,
      "loss": 0.9513,
      "step": 113690
    },
    {
      "epoch": 0.3984901534029377,
      "grad_norm": 3.265625,
      "learning_rate": 4.817622945499765e-05,
      "loss": 0.9646,
      "step": 113700
    },
    {
      "epoch": 0.3985252009098333,
      "grad_norm": 3.09375,
      "learning_rate": 4.817558042633395e-05,
      "loss": 1.0471,
      "step": 113710
    },
    {
      "epoch": 0.3985602484167289,
      "grad_norm": 3.328125,
      "learning_rate": 4.817493139767025e-05,
      "loss": 0.8758,
      "step": 113720
    },
    {
      "epoch": 0.39859529592362447,
      "grad_norm": 3.046875,
      "learning_rate": 4.8174282369006544e-05,
      "loss": 0.9193,
      "step": 113730
    },
    {
      "epoch": 0.39863034343052006,
      "grad_norm": 2.796875,
      "learning_rate": 4.8173633340342845e-05,
      "loss": 0.9694,
      "step": 113740
    },
    {
      "epoch": 0.39866539093741565,
      "grad_norm": 3.265625,
      "learning_rate": 4.817298431167914e-05,
      "loss": 0.9156,
      "step": 113750
    },
    {
      "epoch": 0.39870043844431124,
      "grad_norm": 3.203125,
      "learning_rate": 4.817233528301544e-05,
      "loss": 0.9563,
      "step": 113760
    },
    {
      "epoch": 0.39873548595120684,
      "grad_norm": 3.078125,
      "learning_rate": 4.8171686254351736e-05,
      "loss": 1.0251,
      "step": 113770
    },
    {
      "epoch": 0.3987705334581025,
      "grad_norm": 3.265625,
      "learning_rate": 4.817103722568804e-05,
      "loss": 0.9464,
      "step": 113780
    },
    {
      "epoch": 0.3988055809649981,
      "grad_norm": 2.9375,
      "learning_rate": 4.817038819702434e-05,
      "loss": 0.9695,
      "step": 113790
    },
    {
      "epoch": 0.39884062847189367,
      "grad_norm": 3.421875,
      "learning_rate": 4.816973916836063e-05,
      "loss": 0.9886,
      "step": 113800
    },
    {
      "epoch": 0.39887567597878926,
      "grad_norm": 2.9375,
      "learning_rate": 4.8169090139696935e-05,
      "loss": 0.9332,
      "step": 113810
    },
    {
      "epoch": 0.39891072348568485,
      "grad_norm": 3.53125,
      "learning_rate": 4.816844111103323e-05,
      "loss": 0.9956,
      "step": 113820
    },
    {
      "epoch": 0.39894577099258044,
      "grad_norm": 3.484375,
      "learning_rate": 4.816779208236953e-05,
      "loss": 0.9576,
      "step": 113830
    },
    {
      "epoch": 0.39898081849947603,
      "grad_norm": 2.578125,
      "learning_rate": 4.8167143053705825e-05,
      "loss": 0.9561,
      "step": 113840
    },
    {
      "epoch": 0.3990158660063716,
      "grad_norm": 2.640625,
      "learning_rate": 4.8166494025042127e-05,
      "loss": 0.9316,
      "step": 113850
    },
    {
      "epoch": 0.3990509135132672,
      "grad_norm": 3.171875,
      "learning_rate": 4.816584499637842e-05,
      "loss": 0.9279,
      "step": 113860
    },
    {
      "epoch": 0.3990859610201628,
      "grad_norm": 3.109375,
      "learning_rate": 4.816519596771472e-05,
      "loss": 0.8895,
      "step": 113870
    },
    {
      "epoch": 0.39912100852705845,
      "grad_norm": 3.03125,
      "learning_rate": 4.816454693905102e-05,
      "loss": 0.952,
      "step": 113880
    },
    {
      "epoch": 0.39915605603395404,
      "grad_norm": 3.3125,
      "learning_rate": 4.816389791038732e-05,
      "loss": 0.8854,
      "step": 113890
    },
    {
      "epoch": 0.39919110354084963,
      "grad_norm": 3.484375,
      "learning_rate": 4.816324888172361e-05,
      "loss": 0.9549,
      "step": 113900
    },
    {
      "epoch": 0.3992261510477452,
      "grad_norm": 2.96875,
      "learning_rate": 4.8162599853059915e-05,
      "loss": 0.9106,
      "step": 113910
    },
    {
      "epoch": 0.3992611985546408,
      "grad_norm": 2.90625,
      "learning_rate": 4.816195082439621e-05,
      "loss": 0.9379,
      "step": 113920
    },
    {
      "epoch": 0.3992962460615364,
      "grad_norm": 3.21875,
      "learning_rate": 4.816130179573251e-05,
      "loss": 0.8872,
      "step": 113930
    },
    {
      "epoch": 0.399331293568432,
      "grad_norm": 3.40625,
      "learning_rate": 4.816065276706881e-05,
      "loss": 0.9266,
      "step": 113940
    },
    {
      "epoch": 0.3993663410753276,
      "grad_norm": 2.921875,
      "learning_rate": 4.8160003738405107e-05,
      "loss": 0.8752,
      "step": 113950
    },
    {
      "epoch": 0.3994013885822232,
      "grad_norm": 3.390625,
      "learning_rate": 4.815935470974141e-05,
      "loss": 0.9522,
      "step": 113960
    },
    {
      "epoch": 0.3994364360891188,
      "grad_norm": 3.15625,
      "learning_rate": 4.81587056810777e-05,
      "loss": 0.9574,
      "step": 113970
    },
    {
      "epoch": 0.3994714835960144,
      "grad_norm": 3.3125,
      "learning_rate": 4.8158056652414004e-05,
      "loss": 0.9599,
      "step": 113980
    },
    {
      "epoch": 0.39950653110291,
      "grad_norm": 2.859375,
      "learning_rate": 4.81574076237503e-05,
      "loss": 0.999,
      "step": 113990
    },
    {
      "epoch": 0.3995415786098056,
      "grad_norm": 2.875,
      "learning_rate": 4.81567585950866e-05,
      "loss": 0.9949,
      "step": 114000
    },
    {
      "epoch": 0.3995766261167012,
      "grad_norm": 2.78125,
      "learning_rate": 4.8156109566422895e-05,
      "loss": 0.9849,
      "step": 114010
    },
    {
      "epoch": 0.3996116736235968,
      "grad_norm": 2.9375,
      "learning_rate": 4.815546053775919e-05,
      "loss": 0.9668,
      "step": 114020
    },
    {
      "epoch": 0.3996467211304924,
      "grad_norm": 3.046875,
      "learning_rate": 4.815481150909549e-05,
      "loss": 0.8949,
      "step": 114030
    },
    {
      "epoch": 0.39968176863738797,
      "grad_norm": 3.140625,
      "learning_rate": 4.8154162480431785e-05,
      "loss": 0.938,
      "step": 114040
    },
    {
      "epoch": 0.39971681614428356,
      "grad_norm": 2.828125,
      "learning_rate": 4.8153513451768087e-05,
      "loss": 0.9902,
      "step": 114050
    },
    {
      "epoch": 0.39975186365117915,
      "grad_norm": 2.890625,
      "learning_rate": 4.815286442310438e-05,
      "loss": 0.9718,
      "step": 114060
    },
    {
      "epoch": 0.3997869111580748,
      "grad_norm": 2.828125,
      "learning_rate": 4.815221539444068e-05,
      "loss": 0.9651,
      "step": 114070
    },
    {
      "epoch": 0.3998219586649704,
      "grad_norm": 3.59375,
      "learning_rate": 4.815156636577698e-05,
      "loss": 0.985,
      "step": 114080
    },
    {
      "epoch": 0.399857006171866,
      "grad_norm": 3.34375,
      "learning_rate": 4.815091733711328e-05,
      "loss": 0.9586,
      "step": 114090
    },
    {
      "epoch": 0.39989205367876157,
      "grad_norm": 3.375,
      "learning_rate": 4.815026830844957e-05,
      "loss": 0.9727,
      "step": 114100
    },
    {
      "epoch": 0.39992710118565716,
      "grad_norm": 3.46875,
      "learning_rate": 4.8149619279785875e-05,
      "loss": 1.006,
      "step": 114110
    },
    {
      "epoch": 0.39996214869255275,
      "grad_norm": 3.109375,
      "learning_rate": 4.814897025112217e-05,
      "loss": 0.9777,
      "step": 114120
    },
    {
      "epoch": 0.39999719619944835,
      "grad_norm": 3.1875,
      "learning_rate": 4.814832122245847e-05,
      "loss": 0.9192,
      "step": 114130
    },
    {
      "epoch": 0.40003224370634394,
      "grad_norm": 2.8125,
      "learning_rate": 4.8147672193794765e-05,
      "loss": 0.9664,
      "step": 114140
    },
    {
      "epoch": 0.40006729121323953,
      "grad_norm": 2.671875,
      "learning_rate": 4.8147023165131067e-05,
      "loss": 0.9638,
      "step": 114150
    },
    {
      "epoch": 0.4001023387201351,
      "grad_norm": 3.078125,
      "learning_rate": 4.814637413646737e-05,
      "loss": 0.8722,
      "step": 114160
    },
    {
      "epoch": 0.40013738622703077,
      "grad_norm": 2.953125,
      "learning_rate": 4.814572510780366e-05,
      "loss": 0.9039,
      "step": 114170
    },
    {
      "epoch": 0.40017243373392636,
      "grad_norm": 3.75,
      "learning_rate": 4.8145076079139964e-05,
      "loss": 0.9991,
      "step": 114180
    },
    {
      "epoch": 0.40020748124082195,
      "grad_norm": 3.28125,
      "learning_rate": 4.814442705047626e-05,
      "loss": 0.9171,
      "step": 114190
    },
    {
      "epoch": 0.40024252874771754,
      "grad_norm": 2.96875,
      "learning_rate": 4.814377802181256e-05,
      "loss": 0.9108,
      "step": 114200
    },
    {
      "epoch": 0.40027757625461313,
      "grad_norm": 3.28125,
      "learning_rate": 4.8143128993148855e-05,
      "loss": 0.9671,
      "step": 114210
    },
    {
      "epoch": 0.4003126237615087,
      "grad_norm": 3.25,
      "learning_rate": 4.8142479964485156e-05,
      "loss": 0.9715,
      "step": 114220
    },
    {
      "epoch": 0.4003476712684043,
      "grad_norm": 3.25,
      "learning_rate": 4.814183093582145e-05,
      "loss": 0.9488,
      "step": 114230
    },
    {
      "epoch": 0.4003827187752999,
      "grad_norm": 3.296875,
      "learning_rate": 4.814118190715775e-05,
      "loss": 0.9581,
      "step": 114240
    },
    {
      "epoch": 0.4004177662821955,
      "grad_norm": 2.953125,
      "learning_rate": 4.8140532878494047e-05,
      "loss": 0.9564,
      "step": 114250
    },
    {
      "epoch": 0.4004528137890911,
      "grad_norm": 2.6875,
      "learning_rate": 4.813988384983035e-05,
      "loss": 0.9136,
      "step": 114260
    },
    {
      "epoch": 0.40048786129598674,
      "grad_norm": 3.21875,
      "learning_rate": 4.813923482116664e-05,
      "loss": 0.9687,
      "step": 114270
    },
    {
      "epoch": 0.4005229088028823,
      "grad_norm": 3.40625,
      "learning_rate": 4.8138585792502944e-05,
      "loss": 0.9596,
      "step": 114280
    },
    {
      "epoch": 0.4005579563097779,
      "grad_norm": 3.15625,
      "learning_rate": 4.8137936763839245e-05,
      "loss": 0.9723,
      "step": 114290
    },
    {
      "epoch": 0.4005930038166735,
      "grad_norm": 2.828125,
      "learning_rate": 4.813728773517554e-05,
      "loss": 0.9203,
      "step": 114300
    },
    {
      "epoch": 0.4006280513235691,
      "grad_norm": 2.5,
      "learning_rate": 4.813663870651184e-05,
      "loss": 0.9329,
      "step": 114310
    },
    {
      "epoch": 0.4006630988304647,
      "grad_norm": 2.796875,
      "learning_rate": 4.8135989677848136e-05,
      "loss": 1.0165,
      "step": 114320
    },
    {
      "epoch": 0.4006981463373603,
      "grad_norm": 2.734375,
      "learning_rate": 4.813534064918444e-05,
      "loss": 0.958,
      "step": 114330
    },
    {
      "epoch": 0.4007331938442559,
      "grad_norm": 3.03125,
      "learning_rate": 4.813469162052073e-05,
      "loss": 0.9311,
      "step": 114340
    },
    {
      "epoch": 0.40076824135115147,
      "grad_norm": 3.078125,
      "learning_rate": 4.813404259185703e-05,
      "loss": 0.9544,
      "step": 114350
    },
    {
      "epoch": 0.40080328885804706,
      "grad_norm": 3.25,
      "learning_rate": 4.813339356319333e-05,
      "loss": 0.9481,
      "step": 114360
    },
    {
      "epoch": 0.4008383363649427,
      "grad_norm": 3.3125,
      "learning_rate": 4.813274453452963e-05,
      "loss": 1.0393,
      "step": 114370
    },
    {
      "epoch": 0.4008733838718383,
      "grad_norm": 3.0625,
      "learning_rate": 4.8132095505865924e-05,
      "loss": 0.96,
      "step": 114380
    },
    {
      "epoch": 0.4009084313787339,
      "grad_norm": 2.65625,
      "learning_rate": 4.813144647720222e-05,
      "loss": 0.9371,
      "step": 114390
    },
    {
      "epoch": 0.4009434788856295,
      "grad_norm": 2.875,
      "learning_rate": 4.813079744853852e-05,
      "loss": 0.9199,
      "step": 114400
    },
    {
      "epoch": 0.40097852639252507,
      "grad_norm": 3.234375,
      "learning_rate": 4.8130148419874815e-05,
      "loss": 0.8958,
      "step": 114410
    },
    {
      "epoch": 0.40101357389942066,
      "grad_norm": 3.515625,
      "learning_rate": 4.8129499391211116e-05,
      "loss": 0.9657,
      "step": 114420
    },
    {
      "epoch": 0.40104862140631625,
      "grad_norm": 3.09375,
      "learning_rate": 4.812885036254741e-05,
      "loss": 0.9442,
      "step": 114430
    },
    {
      "epoch": 0.40108366891321184,
      "grad_norm": 3.15625,
      "learning_rate": 4.812820133388371e-05,
      "loss": 0.9312,
      "step": 114440
    },
    {
      "epoch": 0.40111871642010744,
      "grad_norm": 2.984375,
      "learning_rate": 4.8127552305220007e-05,
      "loss": 0.9585,
      "step": 114450
    },
    {
      "epoch": 0.401153763927003,
      "grad_norm": 2.984375,
      "learning_rate": 4.812690327655631e-05,
      "loss": 1.0447,
      "step": 114460
    },
    {
      "epoch": 0.4011888114338987,
      "grad_norm": 2.796875,
      "learning_rate": 4.81262542478926e-05,
      "loss": 0.9081,
      "step": 114470
    },
    {
      "epoch": 0.40122385894079426,
      "grad_norm": 3.203125,
      "learning_rate": 4.8125605219228904e-05,
      "loss": 0.9405,
      "step": 114480
    },
    {
      "epoch": 0.40125890644768986,
      "grad_norm": 2.9375,
      "learning_rate": 4.81249561905652e-05,
      "loss": 0.9126,
      "step": 114490
    },
    {
      "epoch": 0.40129395395458545,
      "grad_norm": 3.671875,
      "learning_rate": 4.81243071619015e-05,
      "loss": 0.9796,
      "step": 114500
    },
    {
      "epoch": 0.40132900146148104,
      "grad_norm": 3.203125,
      "learning_rate": 4.8123658133237795e-05,
      "loss": 0.944,
      "step": 114510
    },
    {
      "epoch": 0.40136404896837663,
      "grad_norm": 3.171875,
      "learning_rate": 4.8123009104574096e-05,
      "loss": 0.8352,
      "step": 114520
    },
    {
      "epoch": 0.4013990964752722,
      "grad_norm": 3.625,
      "learning_rate": 4.81223600759104e-05,
      "loss": 0.9726,
      "step": 114530
    },
    {
      "epoch": 0.4014341439821678,
      "grad_norm": 3.484375,
      "learning_rate": 4.812171104724669e-05,
      "loss": 0.8991,
      "step": 114540
    },
    {
      "epoch": 0.4014691914890634,
      "grad_norm": 3.1875,
      "learning_rate": 4.812106201858299e-05,
      "loss": 1.0015,
      "step": 114550
    },
    {
      "epoch": 0.401504238995959,
      "grad_norm": 3.375,
      "learning_rate": 4.812041298991929e-05,
      "loss": 0.9832,
      "step": 114560
    },
    {
      "epoch": 0.40153928650285464,
      "grad_norm": 3.171875,
      "learning_rate": 4.811976396125559e-05,
      "loss": 0.9573,
      "step": 114570
    },
    {
      "epoch": 0.40157433400975023,
      "grad_norm": 2.796875,
      "learning_rate": 4.8119114932591884e-05,
      "loss": 0.9696,
      "step": 114580
    },
    {
      "epoch": 0.4016093815166458,
      "grad_norm": 2.734375,
      "learning_rate": 4.8118465903928185e-05,
      "loss": 0.8948,
      "step": 114590
    },
    {
      "epoch": 0.4016444290235414,
      "grad_norm": 3.34375,
      "learning_rate": 4.811781687526448e-05,
      "loss": 0.9292,
      "step": 114600
    },
    {
      "epoch": 0.401679476530437,
      "grad_norm": 3.078125,
      "learning_rate": 4.811716784660078e-05,
      "loss": 0.9123,
      "step": 114610
    },
    {
      "epoch": 0.4017145240373326,
      "grad_norm": 2.703125,
      "learning_rate": 4.8116518817937076e-05,
      "loss": 0.972,
      "step": 114620
    },
    {
      "epoch": 0.4017495715442282,
      "grad_norm": 2.703125,
      "learning_rate": 4.811586978927338e-05,
      "loss": 0.8914,
      "step": 114630
    },
    {
      "epoch": 0.4017846190511238,
      "grad_norm": 3.125,
      "learning_rate": 4.811522076060967e-05,
      "loss": 0.9413,
      "step": 114640
    },
    {
      "epoch": 0.4018196665580194,
      "grad_norm": 3.21875,
      "learning_rate": 4.811457173194597e-05,
      "loss": 0.9267,
      "step": 114650
    },
    {
      "epoch": 0.401854714064915,
      "grad_norm": 2.984375,
      "learning_rate": 4.8113922703282275e-05,
      "loss": 0.9472,
      "step": 114660
    },
    {
      "epoch": 0.4018897615718106,
      "grad_norm": 2.484375,
      "learning_rate": 4.811327367461857e-05,
      "loss": 0.8752,
      "step": 114670
    },
    {
      "epoch": 0.4019248090787062,
      "grad_norm": 2.953125,
      "learning_rate": 4.811262464595487e-05,
      "loss": 0.9122,
      "step": 114680
    },
    {
      "epoch": 0.4019598565856018,
      "grad_norm": 3.328125,
      "learning_rate": 4.8111975617291165e-05,
      "loss": 1.0147,
      "step": 114690
    },
    {
      "epoch": 0.4019949040924974,
      "grad_norm": 3.421875,
      "learning_rate": 4.811132658862747e-05,
      "loss": 0.9721,
      "step": 114700
    },
    {
      "epoch": 0.402029951599393,
      "grad_norm": 4.15625,
      "learning_rate": 4.811067755996376e-05,
      "loss": 0.9973,
      "step": 114710
    },
    {
      "epoch": 0.40206499910628857,
      "grad_norm": 2.75,
      "learning_rate": 4.811002853130006e-05,
      "loss": 1.0405,
      "step": 114720
    },
    {
      "epoch": 0.40210004661318416,
      "grad_norm": 2.953125,
      "learning_rate": 4.810937950263636e-05,
      "loss": 0.9711,
      "step": 114730
    },
    {
      "epoch": 0.40213509412007975,
      "grad_norm": 2.859375,
      "learning_rate": 4.810873047397266e-05,
      "loss": 0.9351,
      "step": 114740
    },
    {
      "epoch": 0.40217014162697534,
      "grad_norm": 3.25,
      "learning_rate": 4.810808144530895e-05,
      "loss": 0.885,
      "step": 114750
    },
    {
      "epoch": 0.402205189133871,
      "grad_norm": 3.359375,
      "learning_rate": 4.810743241664525e-05,
      "loss": 0.9731,
      "step": 114760
    },
    {
      "epoch": 0.4022402366407666,
      "grad_norm": 3.125,
      "learning_rate": 4.810678338798155e-05,
      "loss": 0.9997,
      "step": 114770
    },
    {
      "epoch": 0.40227528414766217,
      "grad_norm": 3.28125,
      "learning_rate": 4.8106134359317844e-05,
      "loss": 0.9774,
      "step": 114780
    },
    {
      "epoch": 0.40231033165455776,
      "grad_norm": 2.859375,
      "learning_rate": 4.8105485330654145e-05,
      "loss": 0.982,
      "step": 114790
    },
    {
      "epoch": 0.40234537916145335,
      "grad_norm": 3.296875,
      "learning_rate": 4.810483630199044e-05,
      "loss": 1.0568,
      "step": 114800
    },
    {
      "epoch": 0.40238042666834895,
      "grad_norm": 3.34375,
      "learning_rate": 4.810418727332674e-05,
      "loss": 1.0518,
      "step": 114810
    },
    {
      "epoch": 0.40241547417524454,
      "grad_norm": 3.390625,
      "learning_rate": 4.8103538244663036e-05,
      "loss": 1.0975,
      "step": 114820
    },
    {
      "epoch": 0.4024505216821401,
      "grad_norm": 3.203125,
      "learning_rate": 4.810288921599934e-05,
      "loss": 0.8829,
      "step": 114830
    },
    {
      "epoch": 0.4024855691890357,
      "grad_norm": 2.578125,
      "learning_rate": 4.810224018733563e-05,
      "loss": 0.9639,
      "step": 114840
    },
    {
      "epoch": 0.4025206166959313,
      "grad_norm": 2.859375,
      "learning_rate": 4.810159115867193e-05,
      "loss": 0.9348,
      "step": 114850
    },
    {
      "epoch": 0.40255566420282696,
      "grad_norm": 2.71875,
      "learning_rate": 4.810094213000823e-05,
      "loss": 1.005,
      "step": 114860
    },
    {
      "epoch": 0.40259071170972255,
      "grad_norm": 3.0,
      "learning_rate": 4.810029310134453e-05,
      "loss": 0.9055,
      "step": 114870
    },
    {
      "epoch": 0.40262575921661814,
      "grad_norm": 2.890625,
      "learning_rate": 4.8099644072680824e-05,
      "loss": 0.9434,
      "step": 114880
    },
    {
      "epoch": 0.40266080672351373,
      "grad_norm": 2.671875,
      "learning_rate": 4.8098995044017125e-05,
      "loss": 0.9785,
      "step": 114890
    },
    {
      "epoch": 0.4026958542304093,
      "grad_norm": 2.921875,
      "learning_rate": 4.809834601535343e-05,
      "loss": 0.9626,
      "step": 114900
    },
    {
      "epoch": 0.4027309017373049,
      "grad_norm": 3.0625,
      "learning_rate": 4.809769698668972e-05,
      "loss": 0.992,
      "step": 114910
    },
    {
      "epoch": 0.4027659492442005,
      "grad_norm": 2.96875,
      "learning_rate": 4.809704795802602e-05,
      "loss": 0.9619,
      "step": 114920
    },
    {
      "epoch": 0.4028009967510961,
      "grad_norm": 2.90625,
      "learning_rate": 4.809639892936232e-05,
      "loss": 0.889,
      "step": 114930
    },
    {
      "epoch": 0.4028360442579917,
      "grad_norm": 3.296875,
      "learning_rate": 4.809574990069862e-05,
      "loss": 0.9411,
      "step": 114940
    },
    {
      "epoch": 0.4028710917648873,
      "grad_norm": 3.296875,
      "learning_rate": 4.809510087203491e-05,
      "loss": 0.9668,
      "step": 114950
    },
    {
      "epoch": 0.4029061392717829,
      "grad_norm": 3.21875,
      "learning_rate": 4.8094451843371215e-05,
      "loss": 0.88,
      "step": 114960
    },
    {
      "epoch": 0.4029411867786785,
      "grad_norm": 3.046875,
      "learning_rate": 4.809380281470751e-05,
      "loss": 0.877,
      "step": 114970
    },
    {
      "epoch": 0.4029762342855741,
      "grad_norm": 3.125,
      "learning_rate": 4.809315378604381e-05,
      "loss": 0.977,
      "step": 114980
    },
    {
      "epoch": 0.4030112817924697,
      "grad_norm": 2.75,
      "learning_rate": 4.8092504757380105e-05,
      "loss": 1.0194,
      "step": 114990
    },
    {
      "epoch": 0.4030463292993653,
      "grad_norm": 2.9375,
      "learning_rate": 4.809185572871641e-05,
      "loss": 0.9196,
      "step": 115000
    },
    {
      "epoch": 0.4030463292993653,
      "eval_loss": 0.889788031578064,
      "eval_runtime": 558.8434,
      "eval_samples_per_second": 680.756,
      "eval_steps_per_second": 56.73,
      "step": 115000
    },
    {
      "epoch": 0.4030813768062609,
      "grad_norm": 2.84375,
      "learning_rate": 4.80912067000527e-05,
      "loss": 0.9877,
      "step": 115010
    },
    {
      "epoch": 0.4031164243131565,
      "grad_norm": 2.890625,
      "learning_rate": 4.8090557671389e-05,
      "loss": 1.0125,
      "step": 115020
    },
    {
      "epoch": 0.40315147182005207,
      "grad_norm": 3.1875,
      "learning_rate": 4.8089908642725304e-05,
      "loss": 0.9121,
      "step": 115030
    },
    {
      "epoch": 0.40318651932694766,
      "grad_norm": 3.40625,
      "learning_rate": 4.80892596140616e-05,
      "loss": 1.0085,
      "step": 115040
    },
    {
      "epoch": 0.40322156683384325,
      "grad_norm": 2.984375,
      "learning_rate": 4.80886105853979e-05,
      "loss": 0.9884,
      "step": 115050
    },
    {
      "epoch": 0.4032566143407389,
      "grad_norm": 2.90625,
      "learning_rate": 4.8087961556734195e-05,
      "loss": 0.9391,
      "step": 115060
    },
    {
      "epoch": 0.4032916618476345,
      "grad_norm": 2.859375,
      "learning_rate": 4.8087312528070496e-05,
      "loss": 0.9766,
      "step": 115070
    },
    {
      "epoch": 0.4033267093545301,
      "grad_norm": 3.140625,
      "learning_rate": 4.808666349940679e-05,
      "loss": 0.979,
      "step": 115080
    },
    {
      "epoch": 0.40336175686142567,
      "grad_norm": 2.78125,
      "learning_rate": 4.808601447074309e-05,
      "loss": 0.951,
      "step": 115090
    },
    {
      "epoch": 0.40339680436832126,
      "grad_norm": 2.78125,
      "learning_rate": 4.808536544207939e-05,
      "loss": 0.9831,
      "step": 115100
    },
    {
      "epoch": 0.40343185187521685,
      "grad_norm": 3.234375,
      "learning_rate": 4.808471641341569e-05,
      "loss": 0.9838,
      "step": 115110
    },
    {
      "epoch": 0.40346689938211244,
      "grad_norm": 3.125,
      "learning_rate": 4.808406738475198e-05,
      "loss": 0.9067,
      "step": 115120
    },
    {
      "epoch": 0.40350194688900803,
      "grad_norm": 3.203125,
      "learning_rate": 4.808341835608828e-05,
      "loss": 0.8814,
      "step": 115130
    },
    {
      "epoch": 0.4035369943959036,
      "grad_norm": 2.796875,
      "learning_rate": 4.808276932742458e-05,
      "loss": 0.8944,
      "step": 115140
    },
    {
      "epoch": 0.4035720419027992,
      "grad_norm": 2.75,
      "learning_rate": 4.808212029876087e-05,
      "loss": 0.9154,
      "step": 115150
    },
    {
      "epoch": 0.40360708940969486,
      "grad_norm": 2.84375,
      "learning_rate": 4.8081471270097175e-05,
      "loss": 0.8694,
      "step": 115160
    },
    {
      "epoch": 0.40364213691659045,
      "grad_norm": 2.828125,
      "learning_rate": 4.808082224143347e-05,
      "loss": 0.9578,
      "step": 115170
    },
    {
      "epoch": 0.40367718442348605,
      "grad_norm": 3.0625,
      "learning_rate": 4.808017321276977e-05,
      "loss": 0.9812,
      "step": 115180
    },
    {
      "epoch": 0.40371223193038164,
      "grad_norm": 3.234375,
      "learning_rate": 4.8079524184106065e-05,
      "loss": 0.8853,
      "step": 115190
    },
    {
      "epoch": 0.40374727943727723,
      "grad_norm": 3.59375,
      "learning_rate": 4.807887515544237e-05,
      "loss": 0.9506,
      "step": 115200
    },
    {
      "epoch": 0.4037823269441728,
      "grad_norm": 2.75,
      "learning_rate": 4.807822612677866e-05,
      "loss": 0.8836,
      "step": 115210
    },
    {
      "epoch": 0.4038173744510684,
      "grad_norm": 3.078125,
      "learning_rate": 4.807757709811496e-05,
      "loss": 0.954,
      "step": 115220
    },
    {
      "epoch": 0.403852421957964,
      "grad_norm": 3.234375,
      "learning_rate": 4.807692806945126e-05,
      "loss": 0.8951,
      "step": 115230
    },
    {
      "epoch": 0.4038874694648596,
      "grad_norm": 3.140625,
      "learning_rate": 4.807627904078756e-05,
      "loss": 0.9957,
      "step": 115240
    },
    {
      "epoch": 0.40392251697175524,
      "grad_norm": 3.1875,
      "learning_rate": 4.807563001212386e-05,
      "loss": 0.8726,
      "step": 115250
    },
    {
      "epoch": 0.40395756447865083,
      "grad_norm": 2.84375,
      "learning_rate": 4.8074980983460155e-05,
      "loss": 0.9609,
      "step": 115260
    },
    {
      "epoch": 0.4039926119855464,
      "grad_norm": 2.9375,
      "learning_rate": 4.8074331954796456e-05,
      "loss": 0.9743,
      "step": 115270
    },
    {
      "epoch": 0.404027659492442,
      "grad_norm": 3.109375,
      "learning_rate": 4.807368292613275e-05,
      "loss": 0.9514,
      "step": 115280
    },
    {
      "epoch": 0.4040627069993376,
      "grad_norm": 3.0625,
      "learning_rate": 4.807303389746905e-05,
      "loss": 0.9015,
      "step": 115290
    },
    {
      "epoch": 0.4040977545062332,
      "grad_norm": 2.96875,
      "learning_rate": 4.807238486880535e-05,
      "loss": 0.9197,
      "step": 115300
    },
    {
      "epoch": 0.4041328020131288,
      "grad_norm": 2.78125,
      "learning_rate": 4.807173584014165e-05,
      "loss": 0.9831,
      "step": 115310
    },
    {
      "epoch": 0.4041678495200244,
      "grad_norm": 3.015625,
      "learning_rate": 4.807108681147794e-05,
      "loss": 0.9395,
      "step": 115320
    },
    {
      "epoch": 0.40420289702691997,
      "grad_norm": 2.9375,
      "learning_rate": 4.8070437782814244e-05,
      "loss": 0.8473,
      "step": 115330
    },
    {
      "epoch": 0.40423794453381556,
      "grad_norm": 2.65625,
      "learning_rate": 4.806978875415054e-05,
      "loss": 0.8455,
      "step": 115340
    },
    {
      "epoch": 0.4042729920407112,
      "grad_norm": 3.296875,
      "learning_rate": 4.806913972548684e-05,
      "loss": 0.8885,
      "step": 115350
    },
    {
      "epoch": 0.4043080395476068,
      "grad_norm": 3.421875,
      "learning_rate": 4.8068490696823135e-05,
      "loss": 0.8766,
      "step": 115360
    },
    {
      "epoch": 0.4043430870545024,
      "grad_norm": 2.9375,
      "learning_rate": 4.8067841668159436e-05,
      "loss": 0.8901,
      "step": 115370
    },
    {
      "epoch": 0.404378134561398,
      "grad_norm": 3.203125,
      "learning_rate": 4.806719263949573e-05,
      "loss": 0.9913,
      "step": 115380
    },
    {
      "epoch": 0.4044131820682936,
      "grad_norm": 2.921875,
      "learning_rate": 4.806654361083203e-05,
      "loss": 1.0945,
      "step": 115390
    },
    {
      "epoch": 0.40444822957518917,
      "grad_norm": 2.953125,
      "learning_rate": 4.8065894582168333e-05,
      "loss": 0.9204,
      "step": 115400
    },
    {
      "epoch": 0.40448327708208476,
      "grad_norm": 3.296875,
      "learning_rate": 4.806524555350463e-05,
      "loss": 0.9155,
      "step": 115410
    },
    {
      "epoch": 0.40451832458898035,
      "grad_norm": 2.734375,
      "learning_rate": 4.806459652484093e-05,
      "loss": 0.8583,
      "step": 115420
    },
    {
      "epoch": 0.40455337209587594,
      "grad_norm": 2.734375,
      "learning_rate": 4.8063947496177224e-05,
      "loss": 0.9169,
      "step": 115430
    },
    {
      "epoch": 0.40458841960277153,
      "grad_norm": 2.90625,
      "learning_rate": 4.8063298467513525e-05,
      "loss": 1.0069,
      "step": 115440
    },
    {
      "epoch": 0.4046234671096672,
      "grad_norm": 3.09375,
      "learning_rate": 4.806264943884982e-05,
      "loss": 0.9352,
      "step": 115450
    },
    {
      "epoch": 0.40465851461656277,
      "grad_norm": 3.015625,
      "learning_rate": 4.806200041018612e-05,
      "loss": 0.9198,
      "step": 115460
    },
    {
      "epoch": 0.40469356212345836,
      "grad_norm": 2.921875,
      "learning_rate": 4.8061351381522416e-05,
      "loss": 0.8877,
      "step": 115470
    },
    {
      "epoch": 0.40472860963035395,
      "grad_norm": 3.34375,
      "learning_rate": 4.806070235285872e-05,
      "loss": 0.9198,
      "step": 115480
    },
    {
      "epoch": 0.40476365713724954,
      "grad_norm": 3.0625,
      "learning_rate": 4.806005332419501e-05,
      "loss": 0.8417,
      "step": 115490
    },
    {
      "epoch": 0.40479870464414514,
      "grad_norm": 3.171875,
      "learning_rate": 4.8059404295531313e-05,
      "loss": 0.905,
      "step": 115500
    },
    {
      "epoch": 0.4048337521510407,
      "grad_norm": 2.734375,
      "learning_rate": 4.805875526686761e-05,
      "loss": 0.9892,
      "step": 115510
    },
    {
      "epoch": 0.4048687996579363,
      "grad_norm": 3.03125,
      "learning_rate": 4.80581062382039e-05,
      "loss": 0.9193,
      "step": 115520
    },
    {
      "epoch": 0.4049038471648319,
      "grad_norm": 3.265625,
      "learning_rate": 4.8057457209540204e-05,
      "loss": 0.9659,
      "step": 115530
    },
    {
      "epoch": 0.4049388946717275,
      "grad_norm": 3.34375,
      "learning_rate": 4.80568081808765e-05,
      "loss": 0.8897,
      "step": 115540
    },
    {
      "epoch": 0.40497394217862315,
      "grad_norm": 3.09375,
      "learning_rate": 4.80561591522128e-05,
      "loss": 1.0178,
      "step": 115550
    },
    {
      "epoch": 0.40500898968551874,
      "grad_norm": 2.953125,
      "learning_rate": 4.8055510123549095e-05,
      "loss": 0.9724,
      "step": 115560
    },
    {
      "epoch": 0.40504403719241433,
      "grad_norm": 3.0625,
      "learning_rate": 4.8054861094885396e-05,
      "loss": 0.9626,
      "step": 115570
    },
    {
      "epoch": 0.4050790846993099,
      "grad_norm": 3.25,
      "learning_rate": 4.805421206622169e-05,
      "loss": 0.9756,
      "step": 115580
    },
    {
      "epoch": 0.4051141322062055,
      "grad_norm": 3.171875,
      "learning_rate": 4.805356303755799e-05,
      "loss": 0.8894,
      "step": 115590
    },
    {
      "epoch": 0.4051491797131011,
      "grad_norm": 3.4375,
      "learning_rate": 4.805291400889429e-05,
      "loss": 0.993,
      "step": 115600
    },
    {
      "epoch": 0.4051842272199967,
      "grad_norm": 2.71875,
      "learning_rate": 4.805226498023059e-05,
      "loss": 0.934,
      "step": 115610
    },
    {
      "epoch": 0.4052192747268923,
      "grad_norm": 3.0,
      "learning_rate": 4.805161595156689e-05,
      "loss": 0.8815,
      "step": 115620
    },
    {
      "epoch": 0.4052543222337879,
      "grad_norm": 3.515625,
      "learning_rate": 4.8050966922903184e-05,
      "loss": 0.994,
      "step": 115630
    },
    {
      "epoch": 0.40528936974068347,
      "grad_norm": 3.71875,
      "learning_rate": 4.8050317894239485e-05,
      "loss": 0.8973,
      "step": 115640
    },
    {
      "epoch": 0.4053244172475791,
      "grad_norm": 3.09375,
      "learning_rate": 4.804966886557578e-05,
      "loss": 0.9823,
      "step": 115650
    },
    {
      "epoch": 0.4053594647544747,
      "grad_norm": 2.828125,
      "learning_rate": 4.804901983691208e-05,
      "loss": 0.9531,
      "step": 115660
    },
    {
      "epoch": 0.4053945122613703,
      "grad_norm": 3.078125,
      "learning_rate": 4.8048370808248376e-05,
      "loss": 0.9679,
      "step": 115670
    },
    {
      "epoch": 0.4054295597682659,
      "grad_norm": 2.84375,
      "learning_rate": 4.804772177958468e-05,
      "loss": 0.8343,
      "step": 115680
    },
    {
      "epoch": 0.4054646072751615,
      "grad_norm": 2.984375,
      "learning_rate": 4.804707275092097e-05,
      "loss": 0.9947,
      "step": 115690
    },
    {
      "epoch": 0.4054996547820571,
      "grad_norm": 2.890625,
      "learning_rate": 4.8046423722257273e-05,
      "loss": 0.9822,
      "step": 115700
    },
    {
      "epoch": 0.40553470228895266,
      "grad_norm": 3.015625,
      "learning_rate": 4.804577469359357e-05,
      "loss": 0.9315,
      "step": 115710
    },
    {
      "epoch": 0.40556974979584826,
      "grad_norm": 3.40625,
      "learning_rate": 4.804512566492987e-05,
      "loss": 1.0629,
      "step": 115720
    },
    {
      "epoch": 0.40560479730274385,
      "grad_norm": 3.09375,
      "learning_rate": 4.8044476636266164e-05,
      "loss": 0.9724,
      "step": 115730
    },
    {
      "epoch": 0.4056398448096395,
      "grad_norm": 3.265625,
      "learning_rate": 4.8043827607602465e-05,
      "loss": 0.9113,
      "step": 115740
    },
    {
      "epoch": 0.4056748923165351,
      "grad_norm": 2.90625,
      "learning_rate": 4.804317857893876e-05,
      "loss": 0.9197,
      "step": 115750
    },
    {
      "epoch": 0.4057099398234307,
      "grad_norm": 3.234375,
      "learning_rate": 4.804252955027506e-05,
      "loss": 1.0252,
      "step": 115760
    },
    {
      "epoch": 0.40574498733032627,
      "grad_norm": 3.015625,
      "learning_rate": 4.804188052161136e-05,
      "loss": 0.9116,
      "step": 115770
    },
    {
      "epoch": 0.40578003483722186,
      "grad_norm": 3.359375,
      "learning_rate": 4.804123149294766e-05,
      "loss": 0.9697,
      "step": 115780
    },
    {
      "epoch": 0.40581508234411745,
      "grad_norm": 3.0625,
      "learning_rate": 4.804058246428396e-05,
      "loss": 1.0168,
      "step": 115790
    },
    {
      "epoch": 0.40585012985101304,
      "grad_norm": 3.328125,
      "learning_rate": 4.8039933435620253e-05,
      "loss": 0.9643,
      "step": 115800
    },
    {
      "epoch": 0.40588517735790863,
      "grad_norm": 3.09375,
      "learning_rate": 4.8039284406956555e-05,
      "loss": 0.916,
      "step": 115810
    },
    {
      "epoch": 0.4059202248648042,
      "grad_norm": 3.28125,
      "learning_rate": 4.803863537829285e-05,
      "loss": 1.0407,
      "step": 115820
    },
    {
      "epoch": 0.4059552723716998,
      "grad_norm": 2.65625,
      "learning_rate": 4.803798634962915e-05,
      "loss": 0.8663,
      "step": 115830
    },
    {
      "epoch": 0.40599031987859546,
      "grad_norm": 3.21875,
      "learning_rate": 4.8037337320965445e-05,
      "loss": 0.9946,
      "step": 115840
    },
    {
      "epoch": 0.40602536738549105,
      "grad_norm": 3.0625,
      "learning_rate": 4.803668829230175e-05,
      "loss": 0.9007,
      "step": 115850
    },
    {
      "epoch": 0.40606041489238665,
      "grad_norm": 3.1875,
      "learning_rate": 4.803603926363804e-05,
      "loss": 0.9755,
      "step": 115860
    },
    {
      "epoch": 0.40609546239928224,
      "grad_norm": 2.828125,
      "learning_rate": 4.803539023497434e-05,
      "loss": 0.8606,
      "step": 115870
    },
    {
      "epoch": 0.40613050990617783,
      "grad_norm": 3.328125,
      "learning_rate": 4.803474120631064e-05,
      "loss": 0.9692,
      "step": 115880
    },
    {
      "epoch": 0.4061655574130734,
      "grad_norm": 3.21875,
      "learning_rate": 4.803409217764693e-05,
      "loss": 0.9334,
      "step": 115890
    },
    {
      "epoch": 0.406200604919969,
      "grad_norm": 2.9375,
      "learning_rate": 4.8033443148983233e-05,
      "loss": 0.9024,
      "step": 115900
    },
    {
      "epoch": 0.4062356524268646,
      "grad_norm": 3.5625,
      "learning_rate": 4.803279412031953e-05,
      "loss": 0.8927,
      "step": 115910
    },
    {
      "epoch": 0.4062706999337602,
      "grad_norm": 2.96875,
      "learning_rate": 4.803214509165583e-05,
      "loss": 0.8755,
      "step": 115920
    },
    {
      "epoch": 0.4063057474406558,
      "grad_norm": 3.0625,
      "learning_rate": 4.8031496062992124e-05,
      "loss": 0.9377,
      "step": 115930
    },
    {
      "epoch": 0.40634079494755143,
      "grad_norm": 3.0,
      "learning_rate": 4.8030847034328425e-05,
      "loss": 0.9724,
      "step": 115940
    },
    {
      "epoch": 0.406375842454447,
      "grad_norm": 3.03125,
      "learning_rate": 4.803019800566472e-05,
      "loss": 0.9085,
      "step": 115950
    },
    {
      "epoch": 0.4064108899613426,
      "grad_norm": 3.046875,
      "learning_rate": 4.802954897700102e-05,
      "loss": 0.9353,
      "step": 115960
    },
    {
      "epoch": 0.4064459374682382,
      "grad_norm": 3.234375,
      "learning_rate": 4.8028899948337316e-05,
      "loss": 0.9697,
      "step": 115970
    },
    {
      "epoch": 0.4064809849751338,
      "grad_norm": 2.640625,
      "learning_rate": 4.802825091967362e-05,
      "loss": 0.9446,
      "step": 115980
    },
    {
      "epoch": 0.4065160324820294,
      "grad_norm": 2.6875,
      "learning_rate": 4.802760189100992e-05,
      "loss": 0.9486,
      "step": 115990
    },
    {
      "epoch": 0.406551079988925,
      "grad_norm": 3.0625,
      "learning_rate": 4.8026952862346213e-05,
      "loss": 0.9306,
      "step": 116000
    },
    {
      "epoch": 0.40658612749582057,
      "grad_norm": 3.453125,
      "learning_rate": 4.8026303833682515e-05,
      "loss": 1.0038,
      "step": 116010
    },
    {
      "epoch": 0.40662117500271616,
      "grad_norm": 3.390625,
      "learning_rate": 4.802565480501881e-05,
      "loss": 0.9639,
      "step": 116020
    },
    {
      "epoch": 0.40665622250961175,
      "grad_norm": 2.71875,
      "learning_rate": 4.802500577635511e-05,
      "loss": 0.979,
      "step": 116030
    },
    {
      "epoch": 0.4066912700165074,
      "grad_norm": 3.296875,
      "learning_rate": 4.8024356747691405e-05,
      "loss": 0.9517,
      "step": 116040
    },
    {
      "epoch": 0.406726317523403,
      "grad_norm": 3.328125,
      "learning_rate": 4.802370771902771e-05,
      "loss": 0.9111,
      "step": 116050
    },
    {
      "epoch": 0.4067613650302986,
      "grad_norm": 3.09375,
      "learning_rate": 4.8023058690364e-05,
      "loss": 0.9433,
      "step": 116060
    },
    {
      "epoch": 0.4067964125371942,
      "grad_norm": 3.203125,
      "learning_rate": 4.80224096617003e-05,
      "loss": 0.9284,
      "step": 116070
    },
    {
      "epoch": 0.40683146004408977,
      "grad_norm": 2.890625,
      "learning_rate": 4.80217606330366e-05,
      "loss": 0.8843,
      "step": 116080
    },
    {
      "epoch": 0.40686650755098536,
      "grad_norm": 2.984375,
      "learning_rate": 4.80211116043729e-05,
      "loss": 0.9124,
      "step": 116090
    },
    {
      "epoch": 0.40690155505788095,
      "grad_norm": 3.34375,
      "learning_rate": 4.8020462575709193e-05,
      "loss": 0.9899,
      "step": 116100
    },
    {
      "epoch": 0.40693660256477654,
      "grad_norm": 2.953125,
      "learning_rate": 4.8019813547045495e-05,
      "loss": 0.9063,
      "step": 116110
    },
    {
      "epoch": 0.40697165007167213,
      "grad_norm": 2.796875,
      "learning_rate": 4.8019164518381796e-05,
      "loss": 0.896,
      "step": 116120
    },
    {
      "epoch": 0.4070066975785677,
      "grad_norm": 3.34375,
      "learning_rate": 4.801851548971809e-05,
      "loss": 0.9354,
      "step": 116130
    },
    {
      "epoch": 0.40704174508546337,
      "grad_norm": 3.265625,
      "learning_rate": 4.801786646105439e-05,
      "loss": 1.0085,
      "step": 116140
    },
    {
      "epoch": 0.40707679259235896,
      "grad_norm": 3.421875,
      "learning_rate": 4.801721743239069e-05,
      "loss": 0.9673,
      "step": 116150
    },
    {
      "epoch": 0.40711184009925455,
      "grad_norm": 3.484375,
      "learning_rate": 4.801656840372699e-05,
      "loss": 0.9601,
      "step": 116160
    },
    {
      "epoch": 0.40714688760615014,
      "grad_norm": 2.8125,
      "learning_rate": 4.801591937506328e-05,
      "loss": 0.9299,
      "step": 116170
    },
    {
      "epoch": 0.40718193511304573,
      "grad_norm": 2.953125,
      "learning_rate": 4.8015270346399584e-05,
      "loss": 0.9453,
      "step": 116180
    },
    {
      "epoch": 0.4072169826199413,
      "grad_norm": 3.265625,
      "learning_rate": 4.801462131773588e-05,
      "loss": 0.9832,
      "step": 116190
    },
    {
      "epoch": 0.4072520301268369,
      "grad_norm": 3.28125,
      "learning_rate": 4.801397228907218e-05,
      "loss": 0.8978,
      "step": 116200
    },
    {
      "epoch": 0.4072870776337325,
      "grad_norm": 3.09375,
      "learning_rate": 4.8013323260408475e-05,
      "loss": 0.9209,
      "step": 116210
    },
    {
      "epoch": 0.4073221251406281,
      "grad_norm": 3.515625,
      "learning_rate": 4.8012674231744776e-05,
      "loss": 0.9672,
      "step": 116220
    },
    {
      "epoch": 0.4073571726475237,
      "grad_norm": 3.03125,
      "learning_rate": 4.801202520308107e-05,
      "loss": 0.9612,
      "step": 116230
    },
    {
      "epoch": 0.40739222015441934,
      "grad_norm": 3.03125,
      "learning_rate": 4.801137617441737e-05,
      "loss": 0.8344,
      "step": 116240
    },
    {
      "epoch": 0.40742726766131493,
      "grad_norm": 3.296875,
      "learning_rate": 4.801072714575367e-05,
      "loss": 1.0633,
      "step": 116250
    },
    {
      "epoch": 0.4074623151682105,
      "grad_norm": 3.765625,
      "learning_rate": 4.801007811708996e-05,
      "loss": 1.0258,
      "step": 116260
    },
    {
      "epoch": 0.4074973626751061,
      "grad_norm": 2.96875,
      "learning_rate": 4.800942908842626e-05,
      "loss": 0.8887,
      "step": 116270
    },
    {
      "epoch": 0.4075324101820017,
      "grad_norm": 2.859375,
      "learning_rate": 4.800878005976256e-05,
      "loss": 0.9372,
      "step": 116280
    },
    {
      "epoch": 0.4075674576888973,
      "grad_norm": 2.8125,
      "learning_rate": 4.800813103109886e-05,
      "loss": 0.9138,
      "step": 116290
    },
    {
      "epoch": 0.4076025051957929,
      "grad_norm": 3.3125,
      "learning_rate": 4.8007482002435153e-05,
      "loss": 0.9936,
      "step": 116300
    },
    {
      "epoch": 0.4076375527026885,
      "grad_norm": 3.234375,
      "learning_rate": 4.8006832973771455e-05,
      "loss": 0.877,
      "step": 116310
    },
    {
      "epoch": 0.40767260020958407,
      "grad_norm": 6.8125,
      "learning_rate": 4.800618394510775e-05,
      "loss": 0.9984,
      "step": 116320
    },
    {
      "epoch": 0.4077076477164797,
      "grad_norm": 3.390625,
      "learning_rate": 4.800553491644405e-05,
      "loss": 1.0039,
      "step": 116330
    },
    {
      "epoch": 0.4077426952233753,
      "grad_norm": 2.859375,
      "learning_rate": 4.8004885887780345e-05,
      "loss": 0.9164,
      "step": 116340
    },
    {
      "epoch": 0.4077777427302709,
      "grad_norm": 3.015625,
      "learning_rate": 4.800423685911665e-05,
      "loss": 0.9355,
      "step": 116350
    },
    {
      "epoch": 0.4078127902371665,
      "grad_norm": 3.140625,
      "learning_rate": 4.800358783045295e-05,
      "loss": 0.9147,
      "step": 116360
    },
    {
      "epoch": 0.4078478377440621,
      "grad_norm": 3.09375,
      "learning_rate": 4.800293880178924e-05,
      "loss": 0.953,
      "step": 116370
    },
    {
      "epoch": 0.40788288525095767,
      "grad_norm": 3.109375,
      "learning_rate": 4.8002289773125544e-05,
      "loss": 0.9796,
      "step": 116380
    },
    {
      "epoch": 0.40791793275785326,
      "grad_norm": 3.15625,
      "learning_rate": 4.800164074446184e-05,
      "loss": 0.9315,
      "step": 116390
    },
    {
      "epoch": 0.40795298026474885,
      "grad_norm": 3.5625,
      "learning_rate": 4.800099171579814e-05,
      "loss": 1.0134,
      "step": 116400
    },
    {
      "epoch": 0.40798802777164445,
      "grad_norm": 2.953125,
      "learning_rate": 4.8000342687134435e-05,
      "loss": 0.9033,
      "step": 116410
    },
    {
      "epoch": 0.40802307527854004,
      "grad_norm": 3.34375,
      "learning_rate": 4.7999693658470736e-05,
      "loss": 1.0126,
      "step": 116420
    },
    {
      "epoch": 0.4080581227854357,
      "grad_norm": 3.109375,
      "learning_rate": 4.799904462980703e-05,
      "loss": 0.8097,
      "step": 116430
    },
    {
      "epoch": 0.4080931702923313,
      "grad_norm": 2.4375,
      "learning_rate": 4.799839560114333e-05,
      "loss": 0.8571,
      "step": 116440
    },
    {
      "epoch": 0.40812821779922687,
      "grad_norm": 2.703125,
      "learning_rate": 4.799774657247963e-05,
      "loss": 0.9293,
      "step": 116450
    },
    {
      "epoch": 0.40816326530612246,
      "grad_norm": 2.984375,
      "learning_rate": 4.799709754381593e-05,
      "loss": 0.9053,
      "step": 116460
    },
    {
      "epoch": 0.40819831281301805,
      "grad_norm": 2.515625,
      "learning_rate": 4.799644851515222e-05,
      "loss": 0.9528,
      "step": 116470
    },
    {
      "epoch": 0.40823336031991364,
      "grad_norm": 3.078125,
      "learning_rate": 4.7995799486488524e-05,
      "loss": 0.8934,
      "step": 116480
    },
    {
      "epoch": 0.40826840782680923,
      "grad_norm": 2.953125,
      "learning_rate": 4.7995150457824826e-05,
      "loss": 0.9753,
      "step": 116490
    },
    {
      "epoch": 0.4083034553337048,
      "grad_norm": 2.765625,
      "learning_rate": 4.799450142916112e-05,
      "loss": 0.9097,
      "step": 116500
    },
    {
      "epoch": 0.4083385028406004,
      "grad_norm": 2.84375,
      "learning_rate": 4.799385240049742e-05,
      "loss": 1.0079,
      "step": 116510
    },
    {
      "epoch": 0.408373550347496,
      "grad_norm": 3.28125,
      "learning_rate": 4.7993203371833716e-05,
      "loss": 0.9512,
      "step": 116520
    },
    {
      "epoch": 0.40840859785439165,
      "grad_norm": 3.15625,
      "learning_rate": 4.799255434317002e-05,
      "loss": 1.0425,
      "step": 116530
    },
    {
      "epoch": 0.40844364536128724,
      "grad_norm": 3.0625,
      "learning_rate": 4.799190531450631e-05,
      "loss": 0.9315,
      "step": 116540
    },
    {
      "epoch": 0.40847869286818284,
      "grad_norm": 3.125,
      "learning_rate": 4.7991256285842614e-05,
      "loss": 0.8585,
      "step": 116550
    },
    {
      "epoch": 0.4085137403750784,
      "grad_norm": 2.984375,
      "learning_rate": 4.799060725717891e-05,
      "loss": 0.9241,
      "step": 116560
    },
    {
      "epoch": 0.408548787881974,
      "grad_norm": 3.65625,
      "learning_rate": 4.798995822851521e-05,
      "loss": 0.9694,
      "step": 116570
    },
    {
      "epoch": 0.4085838353888696,
      "grad_norm": 2.8125,
      "learning_rate": 4.7989309199851504e-05,
      "loss": 0.8942,
      "step": 116580
    },
    {
      "epoch": 0.4086188828957652,
      "grad_norm": 2.828125,
      "learning_rate": 4.7988660171187806e-05,
      "loss": 0.9886,
      "step": 116590
    },
    {
      "epoch": 0.4086539304026608,
      "grad_norm": 2.859375,
      "learning_rate": 4.79880111425241e-05,
      "loss": 0.957,
      "step": 116600
    },
    {
      "epoch": 0.4086889779095564,
      "grad_norm": 2.671875,
      "learning_rate": 4.79873621138604e-05,
      "loss": 0.8796,
      "step": 116610
    },
    {
      "epoch": 0.408724025416452,
      "grad_norm": 2.984375,
      "learning_rate": 4.7986713085196696e-05,
      "loss": 0.9803,
      "step": 116620
    },
    {
      "epoch": 0.4087590729233476,
      "grad_norm": 3.21875,
      "learning_rate": 4.7986064056533e-05,
      "loss": 0.9807,
      "step": 116630
    },
    {
      "epoch": 0.4087941204302432,
      "grad_norm": 3.15625,
      "learning_rate": 4.798541502786929e-05,
      "loss": 1.0387,
      "step": 116640
    },
    {
      "epoch": 0.4088291679371388,
      "grad_norm": 3.28125,
      "learning_rate": 4.798476599920559e-05,
      "loss": 0.9438,
      "step": 116650
    },
    {
      "epoch": 0.4088642154440344,
      "grad_norm": 3.40625,
      "learning_rate": 4.798411697054189e-05,
      "loss": 0.9115,
      "step": 116660
    },
    {
      "epoch": 0.40889926295093,
      "grad_norm": 3.375,
      "learning_rate": 4.798346794187818e-05,
      "loss": 0.9784,
      "step": 116670
    },
    {
      "epoch": 0.4089343104578256,
      "grad_norm": 2.796875,
      "learning_rate": 4.7982818913214484e-05,
      "loss": 0.9444,
      "step": 116680
    },
    {
      "epoch": 0.40896935796472117,
      "grad_norm": 2.796875,
      "learning_rate": 4.798216988455078e-05,
      "loss": 0.9364,
      "step": 116690
    },
    {
      "epoch": 0.40900440547161676,
      "grad_norm": 2.96875,
      "learning_rate": 4.798152085588708e-05,
      "loss": 0.91,
      "step": 116700
    },
    {
      "epoch": 0.40903945297851235,
      "grad_norm": 3.0625,
      "learning_rate": 4.7980871827223375e-05,
      "loss": 1.0668,
      "step": 116710
    },
    {
      "epoch": 0.40907450048540794,
      "grad_norm": 2.890625,
      "learning_rate": 4.7980222798559676e-05,
      "loss": 0.9624,
      "step": 116720
    },
    {
      "epoch": 0.4091095479923036,
      "grad_norm": 2.90625,
      "learning_rate": 4.797957376989598e-05,
      "loss": 0.9311,
      "step": 116730
    },
    {
      "epoch": 0.4091445954991992,
      "grad_norm": 3.015625,
      "learning_rate": 4.797892474123227e-05,
      "loss": 0.9124,
      "step": 116740
    },
    {
      "epoch": 0.4091796430060948,
      "grad_norm": 3.0625,
      "learning_rate": 4.7978275712568574e-05,
      "loss": 0.9631,
      "step": 116750
    },
    {
      "epoch": 0.40921469051299036,
      "grad_norm": 3.359375,
      "learning_rate": 4.797762668390487e-05,
      "loss": 0.9168,
      "step": 116760
    },
    {
      "epoch": 0.40924973801988596,
      "grad_norm": 3.078125,
      "learning_rate": 4.797697765524117e-05,
      "loss": 0.9396,
      "step": 116770
    },
    {
      "epoch": 0.40928478552678155,
      "grad_norm": 3.671875,
      "learning_rate": 4.7976328626577464e-05,
      "loss": 1.0026,
      "step": 116780
    },
    {
      "epoch": 0.40931983303367714,
      "grad_norm": 3.25,
      "learning_rate": 4.7975679597913766e-05,
      "loss": 0.972,
      "step": 116790
    },
    {
      "epoch": 0.40935488054057273,
      "grad_norm": 3.03125,
      "learning_rate": 4.797503056925006e-05,
      "loss": 0.9833,
      "step": 116800
    },
    {
      "epoch": 0.4093899280474683,
      "grad_norm": 2.546875,
      "learning_rate": 4.797438154058636e-05,
      "loss": 0.8699,
      "step": 116810
    },
    {
      "epoch": 0.40942497555436397,
      "grad_norm": 3.109375,
      "learning_rate": 4.7973732511922656e-05,
      "loss": 0.9271,
      "step": 116820
    },
    {
      "epoch": 0.40946002306125956,
      "grad_norm": 2.9375,
      "learning_rate": 4.797308348325896e-05,
      "loss": 0.9275,
      "step": 116830
    },
    {
      "epoch": 0.40949507056815515,
      "grad_norm": 2.765625,
      "learning_rate": 4.797243445459525e-05,
      "loss": 0.9329,
      "step": 116840
    },
    {
      "epoch": 0.40953011807505074,
      "grad_norm": 3.015625,
      "learning_rate": 4.7971785425931554e-05,
      "loss": 0.9346,
      "step": 116850
    },
    {
      "epoch": 0.40956516558194633,
      "grad_norm": 3.5,
      "learning_rate": 4.7971136397267855e-05,
      "loss": 1.0389,
      "step": 116860
    },
    {
      "epoch": 0.4096002130888419,
      "grad_norm": 3.046875,
      "learning_rate": 4.797048736860415e-05,
      "loss": 0.9427,
      "step": 116870
    },
    {
      "epoch": 0.4096352605957375,
      "grad_norm": 3.296875,
      "learning_rate": 4.796983833994045e-05,
      "loss": 0.9517,
      "step": 116880
    },
    {
      "epoch": 0.4096703081026331,
      "grad_norm": 3.515625,
      "learning_rate": 4.7969189311276746e-05,
      "loss": 1.0133,
      "step": 116890
    },
    {
      "epoch": 0.4097053556095287,
      "grad_norm": 3.28125,
      "learning_rate": 4.796854028261305e-05,
      "loss": 0.9196,
      "step": 116900
    },
    {
      "epoch": 0.4097404031164243,
      "grad_norm": 2.9375,
      "learning_rate": 4.796789125394934e-05,
      "loss": 0.9283,
      "step": 116910
    },
    {
      "epoch": 0.40977545062331994,
      "grad_norm": 2.875,
      "learning_rate": 4.796724222528564e-05,
      "loss": 0.9093,
      "step": 116920
    },
    {
      "epoch": 0.40981049813021553,
      "grad_norm": 3.140625,
      "learning_rate": 4.796659319662194e-05,
      "loss": 1.0119,
      "step": 116930
    },
    {
      "epoch": 0.4098455456371111,
      "grad_norm": 2.859375,
      "learning_rate": 4.796594416795824e-05,
      "loss": 0.8099,
      "step": 116940
    },
    {
      "epoch": 0.4098805931440067,
      "grad_norm": 3.078125,
      "learning_rate": 4.7965295139294534e-05,
      "loss": 0.9832,
      "step": 116950
    },
    {
      "epoch": 0.4099156406509023,
      "grad_norm": 2.765625,
      "learning_rate": 4.7964646110630835e-05,
      "loss": 0.9073,
      "step": 116960
    },
    {
      "epoch": 0.4099506881577979,
      "grad_norm": 2.984375,
      "learning_rate": 4.796399708196713e-05,
      "loss": 1.0149,
      "step": 116970
    },
    {
      "epoch": 0.4099857356646935,
      "grad_norm": 3.0,
      "learning_rate": 4.796334805330343e-05,
      "loss": 0.926,
      "step": 116980
    },
    {
      "epoch": 0.4100207831715891,
      "grad_norm": 3.09375,
      "learning_rate": 4.7962699024639726e-05,
      "loss": 0.9236,
      "step": 116990
    },
    {
      "epoch": 0.41005583067848467,
      "grad_norm": 3.03125,
      "learning_rate": 4.796204999597603e-05,
      "loss": 1.0127,
      "step": 117000
    },
    {
      "epoch": 0.41009087818538026,
      "grad_norm": 2.546875,
      "learning_rate": 4.796140096731232e-05,
      "loss": 0.9044,
      "step": 117010
    },
    {
      "epoch": 0.4101259256922759,
      "grad_norm": 2.84375,
      "learning_rate": 4.7960751938648616e-05,
      "loss": 0.9489,
      "step": 117020
    },
    {
      "epoch": 0.4101609731991715,
      "grad_norm": 2.671875,
      "learning_rate": 4.796010290998492e-05,
      "loss": 0.843,
      "step": 117030
    },
    {
      "epoch": 0.4101960207060671,
      "grad_norm": 3.53125,
      "learning_rate": 4.795945388132121e-05,
      "loss": 0.8908,
      "step": 117040
    },
    {
      "epoch": 0.4102310682129627,
      "grad_norm": 3.21875,
      "learning_rate": 4.7958804852657514e-05,
      "loss": 1.0382,
      "step": 117050
    },
    {
      "epoch": 0.41026611571985827,
      "grad_norm": 2.828125,
      "learning_rate": 4.795815582399381e-05,
      "loss": 1.0308,
      "step": 117060
    },
    {
      "epoch": 0.41030116322675386,
      "grad_norm": 2.921875,
      "learning_rate": 4.795750679533011e-05,
      "loss": 1.0063,
      "step": 117070
    },
    {
      "epoch": 0.41033621073364945,
      "grad_norm": 3.3125,
      "learning_rate": 4.795685776666641e-05,
      "loss": 0.89,
      "step": 117080
    },
    {
      "epoch": 0.41037125824054504,
      "grad_norm": 3.28125,
      "learning_rate": 4.7956208738002706e-05,
      "loss": 0.9842,
      "step": 117090
    },
    {
      "epoch": 0.41040630574744064,
      "grad_norm": 2.984375,
      "learning_rate": 4.795555970933901e-05,
      "loss": 0.9412,
      "step": 117100
    },
    {
      "epoch": 0.4104413532543362,
      "grad_norm": 3.265625,
      "learning_rate": 4.79549106806753e-05,
      "loss": 0.9396,
      "step": 117110
    },
    {
      "epoch": 0.4104764007612319,
      "grad_norm": 3.1875,
      "learning_rate": 4.79542616520116e-05,
      "loss": 0.9199,
      "step": 117120
    },
    {
      "epoch": 0.41051144826812747,
      "grad_norm": 3.515625,
      "learning_rate": 4.79536126233479e-05,
      "loss": 1.0075,
      "step": 117130
    },
    {
      "epoch": 0.41054649577502306,
      "grad_norm": 3.4375,
      "learning_rate": 4.79529635946842e-05,
      "loss": 0.9409,
      "step": 117140
    },
    {
      "epoch": 0.41058154328191865,
      "grad_norm": 2.921875,
      "learning_rate": 4.7952314566020494e-05,
      "loss": 0.9767,
      "step": 117150
    },
    {
      "epoch": 0.41061659078881424,
      "grad_norm": 3.15625,
      "learning_rate": 4.7951665537356795e-05,
      "loss": 0.9229,
      "step": 117160
    },
    {
      "epoch": 0.41065163829570983,
      "grad_norm": 2.53125,
      "learning_rate": 4.795101650869309e-05,
      "loss": 0.9895,
      "step": 117170
    },
    {
      "epoch": 0.4106866858026054,
      "grad_norm": 2.84375,
      "learning_rate": 4.795036748002939e-05,
      "loss": 0.8913,
      "step": 117180
    },
    {
      "epoch": 0.410721733309501,
      "grad_norm": 3.140625,
      "learning_rate": 4.7949718451365686e-05,
      "loss": 1.0267,
      "step": 117190
    },
    {
      "epoch": 0.4107567808163966,
      "grad_norm": 3.25,
      "learning_rate": 4.794906942270199e-05,
      "loss": 0.9635,
      "step": 117200
    },
    {
      "epoch": 0.4107918283232922,
      "grad_norm": 2.953125,
      "learning_rate": 4.794842039403828e-05,
      "loss": 1.0038,
      "step": 117210
    },
    {
      "epoch": 0.41082687583018784,
      "grad_norm": 2.78125,
      "learning_rate": 4.794777136537458e-05,
      "loss": 1.022,
      "step": 117220
    },
    {
      "epoch": 0.41086192333708343,
      "grad_norm": 2.796875,
      "learning_rate": 4.7947122336710884e-05,
      "loss": 0.9411,
      "step": 117230
    },
    {
      "epoch": 0.410896970843979,
      "grad_norm": 2.875,
      "learning_rate": 4.794647330804718e-05,
      "loss": 0.8529,
      "step": 117240
    },
    {
      "epoch": 0.4109320183508746,
      "grad_norm": 3.578125,
      "learning_rate": 4.794582427938348e-05,
      "loss": 0.9592,
      "step": 117250
    },
    {
      "epoch": 0.4109670658577702,
      "grad_norm": 2.984375,
      "learning_rate": 4.7945175250719775e-05,
      "loss": 0.9673,
      "step": 117260
    },
    {
      "epoch": 0.4110021133646658,
      "grad_norm": 2.5625,
      "learning_rate": 4.7944526222056076e-05,
      "loss": 0.9214,
      "step": 117270
    },
    {
      "epoch": 0.4110371608715614,
      "grad_norm": 3.109375,
      "learning_rate": 4.794387719339237e-05,
      "loss": 1.0395,
      "step": 117280
    },
    {
      "epoch": 0.411072208378457,
      "grad_norm": 2.96875,
      "learning_rate": 4.794322816472867e-05,
      "loss": 0.9108,
      "step": 117290
    },
    {
      "epoch": 0.4111072558853526,
      "grad_norm": 3.046875,
      "learning_rate": 4.794257913606497e-05,
      "loss": 0.8752,
      "step": 117300
    },
    {
      "epoch": 0.41114230339224817,
      "grad_norm": 2.8125,
      "learning_rate": 4.794193010740127e-05,
      "loss": 0.9581,
      "step": 117310
    },
    {
      "epoch": 0.4111773508991438,
      "grad_norm": 2.6875,
      "learning_rate": 4.794128107873756e-05,
      "loss": 0.9546,
      "step": 117320
    },
    {
      "epoch": 0.4112123984060394,
      "grad_norm": 3.296875,
      "learning_rate": 4.7940632050073864e-05,
      "loss": 1.0259,
      "step": 117330
    },
    {
      "epoch": 0.411247445912935,
      "grad_norm": 2.984375,
      "learning_rate": 4.793998302141016e-05,
      "loss": 0.9597,
      "step": 117340
    },
    {
      "epoch": 0.4112824934198306,
      "grad_norm": 3.0,
      "learning_rate": 4.793933399274646e-05,
      "loss": 0.9439,
      "step": 117350
    },
    {
      "epoch": 0.4113175409267262,
      "grad_norm": 3.046875,
      "learning_rate": 4.793868496408276e-05,
      "loss": 0.9717,
      "step": 117360
    },
    {
      "epoch": 0.41135258843362177,
      "grad_norm": 3.09375,
      "learning_rate": 4.7938035935419056e-05,
      "loss": 0.9893,
      "step": 117370
    },
    {
      "epoch": 0.41138763594051736,
      "grad_norm": 2.875,
      "learning_rate": 4.793738690675536e-05,
      "loss": 1.0012,
      "step": 117380
    },
    {
      "epoch": 0.41142268344741295,
      "grad_norm": 2.84375,
      "learning_rate": 4.7936737878091646e-05,
      "loss": 0.9732,
      "step": 117390
    },
    {
      "epoch": 0.41145773095430854,
      "grad_norm": 3.015625,
      "learning_rate": 4.793608884942795e-05,
      "loss": 0.9601,
      "step": 117400
    },
    {
      "epoch": 0.4114927784612042,
      "grad_norm": 2.671875,
      "learning_rate": 4.793543982076424e-05,
      "loss": 0.9413,
      "step": 117410
    },
    {
      "epoch": 0.4115278259680998,
      "grad_norm": 2.765625,
      "learning_rate": 4.793479079210054e-05,
      "loss": 0.9706,
      "step": 117420
    },
    {
      "epoch": 0.41156287347499537,
      "grad_norm": 2.984375,
      "learning_rate": 4.793414176343684e-05,
      "loss": 0.9054,
      "step": 117430
    },
    {
      "epoch": 0.41159792098189096,
      "grad_norm": 3.09375,
      "learning_rate": 4.793349273477314e-05,
      "loss": 0.9595,
      "step": 117440
    },
    {
      "epoch": 0.41163296848878655,
      "grad_norm": 2.921875,
      "learning_rate": 4.793284370610944e-05,
      "loss": 1.038,
      "step": 117450
    },
    {
      "epoch": 0.41166801599568215,
      "grad_norm": 3.15625,
      "learning_rate": 4.7932194677445735e-05,
      "loss": 0.9117,
      "step": 117460
    },
    {
      "epoch": 0.41170306350257774,
      "grad_norm": 2.75,
      "learning_rate": 4.7931545648782036e-05,
      "loss": 0.8972,
      "step": 117470
    },
    {
      "epoch": 0.41173811100947333,
      "grad_norm": 3.328125,
      "learning_rate": 4.793089662011833e-05,
      "loss": 0.9966,
      "step": 117480
    },
    {
      "epoch": 0.4117731585163689,
      "grad_norm": 3.09375,
      "learning_rate": 4.793024759145463e-05,
      "loss": 0.9485,
      "step": 117490
    },
    {
      "epoch": 0.4118082060232645,
      "grad_norm": 2.4375,
      "learning_rate": 4.792959856279093e-05,
      "loss": 0.9896,
      "step": 117500
    },
    {
      "epoch": 0.41184325353016016,
      "grad_norm": 3.0,
      "learning_rate": 4.792894953412723e-05,
      "loss": 1.0319,
      "step": 117510
    },
    {
      "epoch": 0.41187830103705575,
      "grad_norm": 2.984375,
      "learning_rate": 4.792830050546352e-05,
      "loss": 0.9415,
      "step": 117520
    },
    {
      "epoch": 0.41191334854395134,
      "grad_norm": 2.859375,
      "learning_rate": 4.7927651476799824e-05,
      "loss": 0.9092,
      "step": 117530
    },
    {
      "epoch": 0.41194839605084693,
      "grad_norm": 2.875,
      "learning_rate": 4.792700244813612e-05,
      "loss": 0.9974,
      "step": 117540
    },
    {
      "epoch": 0.4119834435577425,
      "grad_norm": 3.3125,
      "learning_rate": 4.792635341947242e-05,
      "loss": 0.981,
      "step": 117550
    },
    {
      "epoch": 0.4120184910646381,
      "grad_norm": 2.953125,
      "learning_rate": 4.7925704390808715e-05,
      "loss": 0.9475,
      "step": 117560
    },
    {
      "epoch": 0.4120535385715337,
      "grad_norm": 3.28125,
      "learning_rate": 4.7925055362145016e-05,
      "loss": 1.0307,
      "step": 117570
    },
    {
      "epoch": 0.4120885860784293,
      "grad_norm": 3.109375,
      "learning_rate": 4.792440633348131e-05,
      "loss": 0.9751,
      "step": 117580
    },
    {
      "epoch": 0.4121236335853249,
      "grad_norm": 2.984375,
      "learning_rate": 4.792375730481761e-05,
      "loss": 0.9757,
      "step": 117590
    },
    {
      "epoch": 0.4121586810922205,
      "grad_norm": 2.5625,
      "learning_rate": 4.7923108276153914e-05,
      "loss": 0.9722,
      "step": 117600
    },
    {
      "epoch": 0.4121937285991161,
      "grad_norm": 2.71875,
      "learning_rate": 4.792245924749021e-05,
      "loss": 0.9477,
      "step": 117610
    },
    {
      "epoch": 0.4122287761060117,
      "grad_norm": 3.0625,
      "learning_rate": 4.792181021882651e-05,
      "loss": 0.9902,
      "step": 117620
    },
    {
      "epoch": 0.4122638236129073,
      "grad_norm": 3.171875,
      "learning_rate": 4.7921161190162804e-05,
      "loss": 0.9434,
      "step": 117630
    },
    {
      "epoch": 0.4122988711198029,
      "grad_norm": 2.734375,
      "learning_rate": 4.7920512161499106e-05,
      "loss": 0.9763,
      "step": 117640
    },
    {
      "epoch": 0.4123339186266985,
      "grad_norm": 2.84375,
      "learning_rate": 4.79198631328354e-05,
      "loss": 0.8958,
      "step": 117650
    },
    {
      "epoch": 0.4123689661335941,
      "grad_norm": 2.625,
      "learning_rate": 4.79192141041717e-05,
      "loss": 1.0606,
      "step": 117660
    },
    {
      "epoch": 0.4124040136404897,
      "grad_norm": 2.828125,
      "learning_rate": 4.7918565075507996e-05,
      "loss": 0.9873,
      "step": 117670
    },
    {
      "epoch": 0.41243906114738527,
      "grad_norm": 3.0625,
      "learning_rate": 4.79179160468443e-05,
      "loss": 0.9794,
      "step": 117680
    },
    {
      "epoch": 0.41247410865428086,
      "grad_norm": 3.234375,
      "learning_rate": 4.791726701818059e-05,
      "loss": 0.9088,
      "step": 117690
    },
    {
      "epoch": 0.41250915616117645,
      "grad_norm": 2.890625,
      "learning_rate": 4.7916617989516894e-05,
      "loss": 0.9356,
      "step": 117700
    },
    {
      "epoch": 0.4125442036680721,
      "grad_norm": 3.21875,
      "learning_rate": 4.791596896085319e-05,
      "loss": 0.8994,
      "step": 117710
    },
    {
      "epoch": 0.4125792511749677,
      "grad_norm": 2.984375,
      "learning_rate": 4.791531993218949e-05,
      "loss": 1.0584,
      "step": 117720
    },
    {
      "epoch": 0.4126142986818633,
      "grad_norm": 2.6875,
      "learning_rate": 4.791467090352579e-05,
      "loss": 0.976,
      "step": 117730
    },
    {
      "epoch": 0.41264934618875887,
      "grad_norm": 2.96875,
      "learning_rate": 4.7914021874862086e-05,
      "loss": 0.9688,
      "step": 117740
    },
    {
      "epoch": 0.41268439369565446,
      "grad_norm": 3.21875,
      "learning_rate": 4.791337284619839e-05,
      "loss": 0.9272,
      "step": 117750
    },
    {
      "epoch": 0.41271944120255005,
      "grad_norm": 3.0,
      "learning_rate": 4.791272381753468e-05,
      "loss": 0.887,
      "step": 117760
    },
    {
      "epoch": 0.41275448870944564,
      "grad_norm": 2.953125,
      "learning_rate": 4.7912074788870976e-05,
      "loss": 1.0027,
      "step": 117770
    },
    {
      "epoch": 0.41278953621634124,
      "grad_norm": 3.03125,
      "learning_rate": 4.791142576020727e-05,
      "loss": 1.0567,
      "step": 117780
    },
    {
      "epoch": 0.4128245837232368,
      "grad_norm": 3.03125,
      "learning_rate": 4.791077673154357e-05,
      "loss": 1.0099,
      "step": 117790
    },
    {
      "epoch": 0.4128596312301324,
      "grad_norm": 3.09375,
      "learning_rate": 4.791012770287987e-05,
      "loss": 0.9057,
      "step": 117800
    },
    {
      "epoch": 0.41289467873702806,
      "grad_norm": 2.8125,
      "learning_rate": 4.790947867421617e-05,
      "loss": 0.9292,
      "step": 117810
    },
    {
      "epoch": 0.41292972624392366,
      "grad_norm": 2.59375,
      "learning_rate": 4.790882964555247e-05,
      "loss": 0.8588,
      "step": 117820
    },
    {
      "epoch": 0.41296477375081925,
      "grad_norm": 2.796875,
      "learning_rate": 4.7908180616888764e-05,
      "loss": 0.9525,
      "step": 117830
    },
    {
      "epoch": 0.41299982125771484,
      "grad_norm": 2.90625,
      "learning_rate": 4.7907531588225066e-05,
      "loss": 0.9509,
      "step": 117840
    },
    {
      "epoch": 0.41303486876461043,
      "grad_norm": 2.921875,
      "learning_rate": 4.790688255956136e-05,
      "loss": 0.9692,
      "step": 117850
    },
    {
      "epoch": 0.413069916271506,
      "grad_norm": 2.9375,
      "learning_rate": 4.790623353089766e-05,
      "loss": 0.9385,
      "step": 117860
    },
    {
      "epoch": 0.4131049637784016,
      "grad_norm": 2.46875,
      "learning_rate": 4.7905584502233956e-05,
      "loss": 0.9412,
      "step": 117870
    },
    {
      "epoch": 0.4131400112852972,
      "grad_norm": 2.8125,
      "learning_rate": 4.790493547357026e-05,
      "loss": 0.8717,
      "step": 117880
    },
    {
      "epoch": 0.4131750587921928,
      "grad_norm": 4.8125,
      "learning_rate": 4.790428644490655e-05,
      "loss": 0.9476,
      "step": 117890
    },
    {
      "epoch": 0.4132101062990884,
      "grad_norm": 3.109375,
      "learning_rate": 4.7903637416242854e-05,
      "loss": 0.9136,
      "step": 117900
    },
    {
      "epoch": 0.41324515380598403,
      "grad_norm": 2.890625,
      "learning_rate": 4.790298838757915e-05,
      "loss": 0.9276,
      "step": 117910
    },
    {
      "epoch": 0.4132802013128796,
      "grad_norm": 2.828125,
      "learning_rate": 4.790233935891545e-05,
      "loss": 1.0129,
      "step": 117920
    },
    {
      "epoch": 0.4133152488197752,
      "grad_norm": 3.21875,
      "learning_rate": 4.7901690330251744e-05,
      "loss": 0.9739,
      "step": 117930
    },
    {
      "epoch": 0.4133502963266708,
      "grad_norm": 3.078125,
      "learning_rate": 4.7901041301588046e-05,
      "loss": 0.945,
      "step": 117940
    },
    {
      "epoch": 0.4133853438335664,
      "grad_norm": 3.03125,
      "learning_rate": 4.790039227292434e-05,
      "loss": 0.9268,
      "step": 117950
    },
    {
      "epoch": 0.413420391340462,
      "grad_norm": 2.59375,
      "learning_rate": 4.789974324426064e-05,
      "loss": 0.9644,
      "step": 117960
    },
    {
      "epoch": 0.4134554388473576,
      "grad_norm": 3.203125,
      "learning_rate": 4.789909421559694e-05,
      "loss": 0.8712,
      "step": 117970
    },
    {
      "epoch": 0.4134904863542532,
      "grad_norm": 2.9375,
      "learning_rate": 4.789844518693324e-05,
      "loss": 0.9416,
      "step": 117980
    },
    {
      "epoch": 0.41352553386114876,
      "grad_norm": 3.15625,
      "learning_rate": 4.789779615826954e-05,
      "loss": 0.9659,
      "step": 117990
    },
    {
      "epoch": 0.4135605813680444,
      "grad_norm": 2.84375,
      "learning_rate": 4.7897147129605834e-05,
      "loss": 0.9471,
      "step": 118000
    },
    {
      "epoch": 0.41359562887494,
      "grad_norm": 3.171875,
      "learning_rate": 4.7896498100942135e-05,
      "loss": 0.8367,
      "step": 118010
    },
    {
      "epoch": 0.4136306763818356,
      "grad_norm": 2.890625,
      "learning_rate": 4.789584907227843e-05,
      "loss": 0.9259,
      "step": 118020
    },
    {
      "epoch": 0.4136657238887312,
      "grad_norm": 3.046875,
      "learning_rate": 4.789520004361473e-05,
      "loss": 1.0069,
      "step": 118030
    },
    {
      "epoch": 0.4137007713956268,
      "grad_norm": 3.1875,
      "learning_rate": 4.7894551014951026e-05,
      "loss": 0.9198,
      "step": 118040
    },
    {
      "epoch": 0.41373581890252237,
      "grad_norm": 2.765625,
      "learning_rate": 4.789390198628733e-05,
      "loss": 0.9253,
      "step": 118050
    },
    {
      "epoch": 0.41377086640941796,
      "grad_norm": 2.921875,
      "learning_rate": 4.789325295762362e-05,
      "loss": 0.983,
      "step": 118060
    },
    {
      "epoch": 0.41380591391631355,
      "grad_norm": 3.734375,
      "learning_rate": 4.789260392895992e-05,
      "loss": 1.0075,
      "step": 118070
    },
    {
      "epoch": 0.41384096142320914,
      "grad_norm": 2.96875,
      "learning_rate": 4.789195490029622e-05,
      "loss": 0.9817,
      "step": 118080
    },
    {
      "epoch": 0.41387600893010473,
      "grad_norm": 3.03125,
      "learning_rate": 4.789130587163252e-05,
      "loss": 0.8989,
      "step": 118090
    },
    {
      "epoch": 0.4139110564370004,
      "grad_norm": 3.40625,
      "learning_rate": 4.789065684296882e-05,
      "loss": 1.013,
      "step": 118100
    },
    {
      "epoch": 0.41394610394389597,
      "grad_norm": 3.15625,
      "learning_rate": 4.7890007814305115e-05,
      "loss": 0.9895,
      "step": 118110
    },
    {
      "epoch": 0.41398115145079156,
      "grad_norm": 2.984375,
      "learning_rate": 4.7889358785641417e-05,
      "loss": 0.9989,
      "step": 118120
    },
    {
      "epoch": 0.41401619895768715,
      "grad_norm": 2.828125,
      "learning_rate": 4.788870975697771e-05,
      "loss": 1.0155,
      "step": 118130
    },
    {
      "epoch": 0.41405124646458275,
      "grad_norm": 2.828125,
      "learning_rate": 4.7888060728314006e-05,
      "loss": 0.9253,
      "step": 118140
    },
    {
      "epoch": 0.41408629397147834,
      "grad_norm": 3.28125,
      "learning_rate": 4.78874116996503e-05,
      "loss": 0.9195,
      "step": 118150
    },
    {
      "epoch": 0.4141213414783739,
      "grad_norm": 3.765625,
      "learning_rate": 4.78867626709866e-05,
      "loss": 0.9752,
      "step": 118160
    },
    {
      "epoch": 0.4141563889852695,
      "grad_norm": 3.171875,
      "learning_rate": 4.7886113642322896e-05,
      "loss": 0.931,
      "step": 118170
    },
    {
      "epoch": 0.4141914364921651,
      "grad_norm": 2.625,
      "learning_rate": 4.78854646136592e-05,
      "loss": 0.9431,
      "step": 118180
    },
    {
      "epoch": 0.4142264839990607,
      "grad_norm": 2.859375,
      "learning_rate": 4.78848155849955e-05,
      "loss": 0.9861,
      "step": 118190
    },
    {
      "epoch": 0.41426153150595635,
      "grad_norm": 3.3125,
      "learning_rate": 4.7884166556331794e-05,
      "loss": 0.9612,
      "step": 118200
    },
    {
      "epoch": 0.41429657901285194,
      "grad_norm": 2.8125,
      "learning_rate": 4.7883517527668095e-05,
      "loss": 0.8724,
      "step": 118210
    },
    {
      "epoch": 0.41433162651974753,
      "grad_norm": 3.390625,
      "learning_rate": 4.788286849900439e-05,
      "loss": 0.9682,
      "step": 118220
    },
    {
      "epoch": 0.4143666740266431,
      "grad_norm": 3.703125,
      "learning_rate": 4.788221947034069e-05,
      "loss": 1.021,
      "step": 118230
    },
    {
      "epoch": 0.4144017215335387,
      "grad_norm": 2.828125,
      "learning_rate": 4.7881570441676986e-05,
      "loss": 0.9594,
      "step": 118240
    },
    {
      "epoch": 0.4144367690404343,
      "grad_norm": 2.796875,
      "learning_rate": 4.788092141301329e-05,
      "loss": 0.9386,
      "step": 118250
    },
    {
      "epoch": 0.4144718165473299,
      "grad_norm": 2.953125,
      "learning_rate": 4.788027238434958e-05,
      "loss": 1.0242,
      "step": 118260
    },
    {
      "epoch": 0.4145068640542255,
      "grad_norm": 2.875,
      "learning_rate": 4.787962335568588e-05,
      "loss": 0.9042,
      "step": 118270
    },
    {
      "epoch": 0.4145419115611211,
      "grad_norm": 3.1875,
      "learning_rate": 4.787897432702218e-05,
      "loss": 0.9248,
      "step": 118280
    },
    {
      "epoch": 0.41457695906801667,
      "grad_norm": 2.890625,
      "learning_rate": 4.787832529835848e-05,
      "loss": 0.9051,
      "step": 118290
    },
    {
      "epoch": 0.4146120065749123,
      "grad_norm": 3.265625,
      "learning_rate": 4.7877676269694774e-05,
      "loss": 0.9747,
      "step": 118300
    },
    {
      "epoch": 0.4146470540818079,
      "grad_norm": 3.3125,
      "learning_rate": 4.7877027241031075e-05,
      "loss": 0.8554,
      "step": 118310
    },
    {
      "epoch": 0.4146821015887035,
      "grad_norm": 2.96875,
      "learning_rate": 4.7876378212367377e-05,
      "loss": 0.9037,
      "step": 118320
    },
    {
      "epoch": 0.4147171490955991,
      "grad_norm": 3.125,
      "learning_rate": 4.787572918370367e-05,
      "loss": 0.9342,
      "step": 118330
    },
    {
      "epoch": 0.4147521966024947,
      "grad_norm": 2.78125,
      "learning_rate": 4.787508015503997e-05,
      "loss": 0.9042,
      "step": 118340
    },
    {
      "epoch": 0.4147872441093903,
      "grad_norm": 2.78125,
      "learning_rate": 4.787443112637627e-05,
      "loss": 0.916,
      "step": 118350
    },
    {
      "epoch": 0.41482229161628587,
      "grad_norm": 2.71875,
      "learning_rate": 4.787378209771257e-05,
      "loss": 0.9985,
      "step": 118360
    },
    {
      "epoch": 0.41485733912318146,
      "grad_norm": 3.140625,
      "learning_rate": 4.787313306904886e-05,
      "loss": 0.826,
      "step": 118370
    },
    {
      "epoch": 0.41489238663007705,
      "grad_norm": 3.21875,
      "learning_rate": 4.7872484040385165e-05,
      "loss": 1.0236,
      "step": 118380
    },
    {
      "epoch": 0.41492743413697264,
      "grad_norm": 3.375,
      "learning_rate": 4.787183501172146e-05,
      "loss": 0.9491,
      "step": 118390
    },
    {
      "epoch": 0.4149624816438683,
      "grad_norm": 3.328125,
      "learning_rate": 4.787118598305776e-05,
      "loss": 0.9472,
      "step": 118400
    },
    {
      "epoch": 0.4149975291507639,
      "grad_norm": 3.125,
      "learning_rate": 4.7870536954394055e-05,
      "loss": 0.9663,
      "step": 118410
    },
    {
      "epoch": 0.41503257665765947,
      "grad_norm": 2.90625,
      "learning_rate": 4.7869887925730357e-05,
      "loss": 0.9819,
      "step": 118420
    },
    {
      "epoch": 0.41506762416455506,
      "grad_norm": 3.84375,
      "learning_rate": 4.786923889706665e-05,
      "loss": 0.8943,
      "step": 118430
    },
    {
      "epoch": 0.41510267167145065,
      "grad_norm": 2.734375,
      "learning_rate": 4.786858986840295e-05,
      "loss": 0.9527,
      "step": 118440
    },
    {
      "epoch": 0.41513771917834624,
      "grad_norm": 2.875,
      "learning_rate": 4.786794083973925e-05,
      "loss": 0.9129,
      "step": 118450
    },
    {
      "epoch": 0.41517276668524183,
      "grad_norm": 2.59375,
      "learning_rate": 4.786729181107555e-05,
      "loss": 0.8821,
      "step": 118460
    },
    {
      "epoch": 0.4152078141921374,
      "grad_norm": 3.34375,
      "learning_rate": 4.786664278241185e-05,
      "loss": 0.9639,
      "step": 118470
    },
    {
      "epoch": 0.415242861699033,
      "grad_norm": 3.3125,
      "learning_rate": 4.7865993753748145e-05,
      "loss": 0.8979,
      "step": 118480
    },
    {
      "epoch": 0.41527790920592866,
      "grad_norm": 2.875,
      "learning_rate": 4.7865344725084446e-05,
      "loss": 0.9395,
      "step": 118490
    },
    {
      "epoch": 0.41531295671282425,
      "grad_norm": 3.171875,
      "learning_rate": 4.786469569642074e-05,
      "loss": 0.9679,
      "step": 118500
    },
    {
      "epoch": 0.41534800421971985,
      "grad_norm": 2.890625,
      "learning_rate": 4.786404666775704e-05,
      "loss": 0.9828,
      "step": 118510
    },
    {
      "epoch": 0.41538305172661544,
      "grad_norm": 3.09375,
      "learning_rate": 4.786339763909333e-05,
      "loss": 0.9644,
      "step": 118520
    },
    {
      "epoch": 0.41541809923351103,
      "grad_norm": 3.4375,
      "learning_rate": 4.786274861042963e-05,
      "loss": 0.9224,
      "step": 118530
    },
    {
      "epoch": 0.4154531467404066,
      "grad_norm": 3.1875,
      "learning_rate": 4.7862099581765926e-05,
      "loss": 0.9526,
      "step": 118540
    },
    {
      "epoch": 0.4154881942473022,
      "grad_norm": 2.859375,
      "learning_rate": 4.786145055310223e-05,
      "loss": 0.992,
      "step": 118550
    },
    {
      "epoch": 0.4155232417541978,
      "grad_norm": 3.5,
      "learning_rate": 4.786080152443853e-05,
      "loss": 0.9314,
      "step": 118560
    },
    {
      "epoch": 0.4155582892610934,
      "grad_norm": 2.875,
      "learning_rate": 4.786015249577482e-05,
      "loss": 0.9835,
      "step": 118570
    },
    {
      "epoch": 0.415593336767989,
      "grad_norm": 2.859375,
      "learning_rate": 4.7859503467111125e-05,
      "loss": 0.9487,
      "step": 118580
    },
    {
      "epoch": 0.41562838427488463,
      "grad_norm": 3.203125,
      "learning_rate": 4.785885443844742e-05,
      "loss": 0.9789,
      "step": 118590
    },
    {
      "epoch": 0.4156634317817802,
      "grad_norm": 3.140625,
      "learning_rate": 4.785820540978372e-05,
      "loss": 0.903,
      "step": 118600
    },
    {
      "epoch": 0.4156984792886758,
      "grad_norm": 3.515625,
      "learning_rate": 4.7857556381120015e-05,
      "loss": 0.9936,
      "step": 118610
    },
    {
      "epoch": 0.4157335267955714,
      "grad_norm": 3.21875,
      "learning_rate": 4.7856907352456317e-05,
      "loss": 1.0448,
      "step": 118620
    },
    {
      "epoch": 0.415768574302467,
      "grad_norm": 2.8125,
      "learning_rate": 4.785625832379261e-05,
      "loss": 0.9468,
      "step": 118630
    },
    {
      "epoch": 0.4158036218093626,
      "grad_norm": 3.359375,
      "learning_rate": 4.785560929512891e-05,
      "loss": 0.8749,
      "step": 118640
    },
    {
      "epoch": 0.4158386693162582,
      "grad_norm": 3.359375,
      "learning_rate": 4.785496026646521e-05,
      "loss": 1.007,
      "step": 118650
    },
    {
      "epoch": 0.41587371682315377,
      "grad_norm": 3.09375,
      "learning_rate": 4.785431123780151e-05,
      "loss": 0.9388,
      "step": 118660
    },
    {
      "epoch": 0.41590876433004936,
      "grad_norm": 2.953125,
      "learning_rate": 4.78536622091378e-05,
      "loss": 0.9835,
      "step": 118670
    },
    {
      "epoch": 0.41594381183694495,
      "grad_norm": 2.703125,
      "learning_rate": 4.7853013180474105e-05,
      "loss": 0.9381,
      "step": 118680
    },
    {
      "epoch": 0.4159788593438406,
      "grad_norm": 2.65625,
      "learning_rate": 4.7852364151810406e-05,
      "loss": 0.9407,
      "step": 118690
    },
    {
      "epoch": 0.4160139068507362,
      "grad_norm": 3.359375,
      "learning_rate": 4.78517151231467e-05,
      "loss": 0.9224,
      "step": 118700
    },
    {
      "epoch": 0.4160489543576318,
      "grad_norm": 3.359375,
      "learning_rate": 4.7851066094483e-05,
      "loss": 0.9719,
      "step": 118710
    },
    {
      "epoch": 0.4160840018645274,
      "grad_norm": 3.109375,
      "learning_rate": 4.7850417065819297e-05,
      "loss": 0.8829,
      "step": 118720
    },
    {
      "epoch": 0.41611904937142297,
      "grad_norm": 3.140625,
      "learning_rate": 4.78497680371556e-05,
      "loss": 0.996,
      "step": 118730
    },
    {
      "epoch": 0.41615409687831856,
      "grad_norm": 3.265625,
      "learning_rate": 4.784911900849189e-05,
      "loss": 1.0567,
      "step": 118740
    },
    {
      "epoch": 0.41618914438521415,
      "grad_norm": 3.375,
      "learning_rate": 4.7848469979828194e-05,
      "loss": 1.0194,
      "step": 118750
    },
    {
      "epoch": 0.41622419189210974,
      "grad_norm": 2.609375,
      "learning_rate": 4.784782095116449e-05,
      "loss": 0.9619,
      "step": 118760
    },
    {
      "epoch": 0.41625923939900533,
      "grad_norm": 3.078125,
      "learning_rate": 4.784717192250079e-05,
      "loss": 1.0046,
      "step": 118770
    },
    {
      "epoch": 0.4162942869059009,
      "grad_norm": 2.734375,
      "learning_rate": 4.7846522893837085e-05,
      "loss": 0.8965,
      "step": 118780
    },
    {
      "epoch": 0.41632933441279657,
      "grad_norm": 3.1875,
      "learning_rate": 4.7845873865173386e-05,
      "loss": 0.9433,
      "step": 118790
    },
    {
      "epoch": 0.41636438191969216,
      "grad_norm": 3.15625,
      "learning_rate": 4.784522483650968e-05,
      "loss": 0.9314,
      "step": 118800
    },
    {
      "epoch": 0.41639942942658775,
      "grad_norm": 3.109375,
      "learning_rate": 4.784457580784598e-05,
      "loss": 0.8838,
      "step": 118810
    },
    {
      "epoch": 0.41643447693348334,
      "grad_norm": 3.515625,
      "learning_rate": 4.7843926779182277e-05,
      "loss": 0.9704,
      "step": 118820
    },
    {
      "epoch": 0.41646952444037894,
      "grad_norm": 2.609375,
      "learning_rate": 4.784327775051858e-05,
      "loss": 0.9241,
      "step": 118830
    },
    {
      "epoch": 0.4165045719472745,
      "grad_norm": 2.8125,
      "learning_rate": 4.784262872185488e-05,
      "loss": 0.9852,
      "step": 118840
    },
    {
      "epoch": 0.4165396194541701,
      "grad_norm": 3.34375,
      "learning_rate": 4.7841979693191174e-05,
      "loss": 1.0049,
      "step": 118850
    },
    {
      "epoch": 0.4165746669610657,
      "grad_norm": 3.25,
      "learning_rate": 4.7841330664527475e-05,
      "loss": 0.9024,
      "step": 118860
    },
    {
      "epoch": 0.4166097144679613,
      "grad_norm": 2.828125,
      "learning_rate": 4.784068163586377e-05,
      "loss": 0.9591,
      "step": 118870
    },
    {
      "epoch": 0.4166447619748569,
      "grad_norm": 2.875,
      "learning_rate": 4.784003260720007e-05,
      "loss": 0.9185,
      "step": 118880
    },
    {
      "epoch": 0.41667980948175254,
      "grad_norm": 3.09375,
      "learning_rate": 4.7839383578536366e-05,
      "loss": 0.9049,
      "step": 118890
    },
    {
      "epoch": 0.41671485698864813,
      "grad_norm": 3.203125,
      "learning_rate": 4.783873454987266e-05,
      "loss": 0.9709,
      "step": 118900
    },
    {
      "epoch": 0.4167499044955437,
      "grad_norm": 2.890625,
      "learning_rate": 4.7838085521208955e-05,
      "loss": 0.9754,
      "step": 118910
    },
    {
      "epoch": 0.4167849520024393,
      "grad_norm": 3.078125,
      "learning_rate": 4.7837436492545257e-05,
      "loss": 0.9401,
      "step": 118920
    },
    {
      "epoch": 0.4168199995093349,
      "grad_norm": 3.09375,
      "learning_rate": 4.783678746388156e-05,
      "loss": 0.9863,
      "step": 118930
    },
    {
      "epoch": 0.4168550470162305,
      "grad_norm": 2.796875,
      "learning_rate": 4.783613843521785e-05,
      "loss": 0.9593,
      "step": 118940
    },
    {
      "epoch": 0.4168900945231261,
      "grad_norm": 2.90625,
      "learning_rate": 4.7835489406554154e-05,
      "loss": 0.9555,
      "step": 118950
    },
    {
      "epoch": 0.4169251420300217,
      "grad_norm": 2.875,
      "learning_rate": 4.783484037789045e-05,
      "loss": 0.9617,
      "step": 118960
    },
    {
      "epoch": 0.41696018953691727,
      "grad_norm": 3.015625,
      "learning_rate": 4.783419134922675e-05,
      "loss": 0.9726,
      "step": 118970
    },
    {
      "epoch": 0.41699523704381286,
      "grad_norm": 2.890625,
      "learning_rate": 4.7833542320563045e-05,
      "loss": 1.022,
      "step": 118980
    },
    {
      "epoch": 0.4170302845507085,
      "grad_norm": 3.140625,
      "learning_rate": 4.7832893291899346e-05,
      "loss": 0.9713,
      "step": 118990
    },
    {
      "epoch": 0.4170653320576041,
      "grad_norm": 3.203125,
      "learning_rate": 4.783224426323564e-05,
      "loss": 1.0259,
      "step": 119000
    },
    {
      "epoch": 0.4171003795644997,
      "grad_norm": 3.0625,
      "learning_rate": 4.783159523457194e-05,
      "loss": 0.896,
      "step": 119010
    },
    {
      "epoch": 0.4171354270713953,
      "grad_norm": 3.203125,
      "learning_rate": 4.7830946205908237e-05,
      "loss": 1.0037,
      "step": 119020
    },
    {
      "epoch": 0.4171704745782909,
      "grad_norm": 3.296875,
      "learning_rate": 4.783029717724454e-05,
      "loss": 0.985,
      "step": 119030
    },
    {
      "epoch": 0.41720552208518646,
      "grad_norm": 2.984375,
      "learning_rate": 4.782964814858083e-05,
      "loss": 0.9192,
      "step": 119040
    },
    {
      "epoch": 0.41724056959208206,
      "grad_norm": 2.671875,
      "learning_rate": 4.7828999119917134e-05,
      "loss": 0.9643,
      "step": 119050
    },
    {
      "epoch": 0.41727561709897765,
      "grad_norm": 3.078125,
      "learning_rate": 4.7828350091253435e-05,
      "loss": 0.9086,
      "step": 119060
    },
    {
      "epoch": 0.41731066460587324,
      "grad_norm": 3.5,
      "learning_rate": 4.782770106258973e-05,
      "loss": 0.9479,
      "step": 119070
    },
    {
      "epoch": 0.4173457121127689,
      "grad_norm": 3.03125,
      "learning_rate": 4.782705203392603e-05,
      "loss": 0.9164,
      "step": 119080
    },
    {
      "epoch": 0.4173807596196645,
      "grad_norm": 2.9375,
      "learning_rate": 4.7826403005262326e-05,
      "loss": 0.9457,
      "step": 119090
    },
    {
      "epoch": 0.41741580712656007,
      "grad_norm": 3.03125,
      "learning_rate": 4.782575397659863e-05,
      "loss": 0.9704,
      "step": 119100
    },
    {
      "epoch": 0.41745085463345566,
      "grad_norm": 3.1875,
      "learning_rate": 4.782510494793492e-05,
      "loss": 0.9303,
      "step": 119110
    },
    {
      "epoch": 0.41748590214035125,
      "grad_norm": 3.15625,
      "learning_rate": 4.782445591927122e-05,
      "loss": 1.1076,
      "step": 119120
    },
    {
      "epoch": 0.41752094964724684,
      "grad_norm": 3.25,
      "learning_rate": 4.782380689060752e-05,
      "loss": 0.9105,
      "step": 119130
    },
    {
      "epoch": 0.41755599715414243,
      "grad_norm": 3.359375,
      "learning_rate": 4.782315786194382e-05,
      "loss": 0.8801,
      "step": 119140
    },
    {
      "epoch": 0.417591044661038,
      "grad_norm": 3.09375,
      "learning_rate": 4.7822508833280114e-05,
      "loss": 0.9244,
      "step": 119150
    },
    {
      "epoch": 0.4176260921679336,
      "grad_norm": 2.6875,
      "learning_rate": 4.7821859804616415e-05,
      "loss": 0.8618,
      "step": 119160
    },
    {
      "epoch": 0.4176611396748292,
      "grad_norm": 2.890625,
      "learning_rate": 4.782121077595271e-05,
      "loss": 0.9492,
      "step": 119170
    },
    {
      "epoch": 0.41769618718172485,
      "grad_norm": 2.90625,
      "learning_rate": 4.782056174728901e-05,
      "loss": 0.8721,
      "step": 119180
    },
    {
      "epoch": 0.41773123468862045,
      "grad_norm": 2.90625,
      "learning_rate": 4.7819912718625306e-05,
      "loss": 0.9699,
      "step": 119190
    },
    {
      "epoch": 0.41776628219551604,
      "grad_norm": 3.390625,
      "learning_rate": 4.781926368996161e-05,
      "loss": 0.8574,
      "step": 119200
    },
    {
      "epoch": 0.41780132970241163,
      "grad_norm": 3.125,
      "learning_rate": 4.781861466129791e-05,
      "loss": 0.9293,
      "step": 119210
    },
    {
      "epoch": 0.4178363772093072,
      "grad_norm": 3.09375,
      "learning_rate": 4.78179656326342e-05,
      "loss": 1.0015,
      "step": 119220
    },
    {
      "epoch": 0.4178714247162028,
      "grad_norm": 2.59375,
      "learning_rate": 4.7817316603970505e-05,
      "loss": 0.9384,
      "step": 119230
    },
    {
      "epoch": 0.4179064722230984,
      "grad_norm": 2.5625,
      "learning_rate": 4.78166675753068e-05,
      "loss": 0.9753,
      "step": 119240
    },
    {
      "epoch": 0.417941519729994,
      "grad_norm": 3.5,
      "learning_rate": 4.78160185466431e-05,
      "loss": 0.98,
      "step": 119250
    },
    {
      "epoch": 0.4179765672368896,
      "grad_norm": 2.875,
      "learning_rate": 4.7815369517979395e-05,
      "loss": 0.8922,
      "step": 119260
    },
    {
      "epoch": 0.4180116147437852,
      "grad_norm": 2.828125,
      "learning_rate": 4.781472048931569e-05,
      "loss": 0.8505,
      "step": 119270
    },
    {
      "epoch": 0.4180466622506808,
      "grad_norm": 3.03125,
      "learning_rate": 4.781407146065199e-05,
      "loss": 0.9144,
      "step": 119280
    },
    {
      "epoch": 0.4180817097575764,
      "grad_norm": 3.234375,
      "learning_rate": 4.7813422431988286e-05,
      "loss": 0.8647,
      "step": 119290
    },
    {
      "epoch": 0.418116757264472,
      "grad_norm": 4.03125,
      "learning_rate": 4.781277340332459e-05,
      "loss": 0.9296,
      "step": 119300
    },
    {
      "epoch": 0.4181518047713676,
      "grad_norm": 2.875,
      "learning_rate": 4.781212437466088e-05,
      "loss": 0.8941,
      "step": 119310
    },
    {
      "epoch": 0.4181868522782632,
      "grad_norm": 3.59375,
      "learning_rate": 4.781147534599718e-05,
      "loss": 0.9888,
      "step": 119320
    },
    {
      "epoch": 0.4182218997851588,
      "grad_norm": 2.828125,
      "learning_rate": 4.781082631733348e-05,
      "loss": 0.935,
      "step": 119330
    },
    {
      "epoch": 0.41825694729205437,
      "grad_norm": 2.875,
      "learning_rate": 4.781017728866978e-05,
      "loss": 0.9288,
      "step": 119340
    },
    {
      "epoch": 0.41829199479894996,
      "grad_norm": 3.140625,
      "learning_rate": 4.7809528260006074e-05,
      "loss": 0.9054,
      "step": 119350
    },
    {
      "epoch": 0.41832704230584555,
      "grad_norm": 3.09375,
      "learning_rate": 4.7808879231342375e-05,
      "loss": 1.0765,
      "step": 119360
    },
    {
      "epoch": 0.41836208981274114,
      "grad_norm": 7.65625,
      "learning_rate": 4.780823020267867e-05,
      "loss": 1.0904,
      "step": 119370
    },
    {
      "epoch": 0.4183971373196368,
      "grad_norm": 3.546875,
      "learning_rate": 4.780758117401497e-05,
      "loss": 0.9945,
      "step": 119380
    },
    {
      "epoch": 0.4184321848265324,
      "grad_norm": 3.234375,
      "learning_rate": 4.7806932145351266e-05,
      "loss": 0.9498,
      "step": 119390
    },
    {
      "epoch": 0.418467232333428,
      "grad_norm": 2.828125,
      "learning_rate": 4.780628311668757e-05,
      "loss": 0.9206,
      "step": 119400
    },
    {
      "epoch": 0.41850227984032357,
      "grad_norm": 2.875,
      "learning_rate": 4.780563408802386e-05,
      "loss": 1.0345,
      "step": 119410
    },
    {
      "epoch": 0.41853732734721916,
      "grad_norm": 2.734375,
      "learning_rate": 4.780498505936016e-05,
      "loss": 0.9467,
      "step": 119420
    },
    {
      "epoch": 0.41857237485411475,
      "grad_norm": 3.421875,
      "learning_rate": 4.7804336030696465e-05,
      "loss": 0.9589,
      "step": 119430
    },
    {
      "epoch": 0.41860742236101034,
      "grad_norm": 2.671875,
      "learning_rate": 4.780368700203276e-05,
      "loss": 0.9709,
      "step": 119440
    },
    {
      "epoch": 0.41864246986790593,
      "grad_norm": 2.640625,
      "learning_rate": 4.780303797336906e-05,
      "loss": 0.8956,
      "step": 119450
    },
    {
      "epoch": 0.4186775173748015,
      "grad_norm": 3.265625,
      "learning_rate": 4.7802388944705355e-05,
      "loss": 0.9153,
      "step": 119460
    },
    {
      "epoch": 0.4187125648816971,
      "grad_norm": 3.1875,
      "learning_rate": 4.780173991604166e-05,
      "loss": 0.934,
      "step": 119470
    },
    {
      "epoch": 0.41874761238859276,
      "grad_norm": 2.546875,
      "learning_rate": 4.780109088737795e-05,
      "loss": 1.0264,
      "step": 119480
    },
    {
      "epoch": 0.41878265989548835,
      "grad_norm": 3.5625,
      "learning_rate": 4.780044185871425e-05,
      "loss": 1.024,
      "step": 119490
    },
    {
      "epoch": 0.41881770740238394,
      "grad_norm": 3.125,
      "learning_rate": 4.779979283005055e-05,
      "loss": 1.0162,
      "step": 119500
    },
    {
      "epoch": 0.41885275490927953,
      "grad_norm": 3.21875,
      "learning_rate": 4.779914380138685e-05,
      "loss": 1.028,
      "step": 119510
    },
    {
      "epoch": 0.4188878024161751,
      "grad_norm": 3.171875,
      "learning_rate": 4.779849477272314e-05,
      "loss": 0.9971,
      "step": 119520
    },
    {
      "epoch": 0.4189228499230707,
      "grad_norm": 2.453125,
      "learning_rate": 4.7797845744059445e-05,
      "loss": 0.9439,
      "step": 119530
    },
    {
      "epoch": 0.4189578974299663,
      "grad_norm": 2.671875,
      "learning_rate": 4.779719671539574e-05,
      "loss": 0.9216,
      "step": 119540
    },
    {
      "epoch": 0.4189929449368619,
      "grad_norm": 3.03125,
      "learning_rate": 4.779654768673204e-05,
      "loss": 0.9736,
      "step": 119550
    },
    {
      "epoch": 0.4190279924437575,
      "grad_norm": 2.625,
      "learning_rate": 4.779589865806834e-05,
      "loss": 0.923,
      "step": 119560
    },
    {
      "epoch": 0.41906303995065314,
      "grad_norm": 3.84375,
      "learning_rate": 4.779524962940464e-05,
      "loss": 0.9357,
      "step": 119570
    },
    {
      "epoch": 0.41909808745754873,
      "grad_norm": 3.390625,
      "learning_rate": 4.779460060074094e-05,
      "loss": 0.8758,
      "step": 119580
    },
    {
      "epoch": 0.4191331349644443,
      "grad_norm": 2.84375,
      "learning_rate": 4.779395157207723e-05,
      "loss": 0.9168,
      "step": 119590
    },
    {
      "epoch": 0.4191681824713399,
      "grad_norm": 3.046875,
      "learning_rate": 4.7793302543413534e-05,
      "loss": 0.9807,
      "step": 119600
    },
    {
      "epoch": 0.4192032299782355,
      "grad_norm": 3.359375,
      "learning_rate": 4.779265351474983e-05,
      "loss": 0.9023,
      "step": 119610
    },
    {
      "epoch": 0.4192382774851311,
      "grad_norm": 3.015625,
      "learning_rate": 4.779200448608613e-05,
      "loss": 0.9674,
      "step": 119620
    },
    {
      "epoch": 0.4192733249920267,
      "grad_norm": 3.15625,
      "learning_rate": 4.7791355457422425e-05,
      "loss": 0.9346,
      "step": 119630
    },
    {
      "epoch": 0.4193083724989223,
      "grad_norm": 3.265625,
      "learning_rate": 4.7790706428758726e-05,
      "loss": 1.0037,
      "step": 119640
    },
    {
      "epoch": 0.41934342000581787,
      "grad_norm": 2.90625,
      "learning_rate": 4.779005740009502e-05,
      "loss": 0.8315,
      "step": 119650
    },
    {
      "epoch": 0.41937846751271346,
      "grad_norm": 3.328125,
      "learning_rate": 4.7789408371431315e-05,
      "loss": 0.9705,
      "step": 119660
    },
    {
      "epoch": 0.4194135150196091,
      "grad_norm": 2.75,
      "learning_rate": 4.778875934276762e-05,
      "loss": 0.9398,
      "step": 119670
    },
    {
      "epoch": 0.4194485625265047,
      "grad_norm": 3.203125,
      "learning_rate": 4.778811031410391e-05,
      "loss": 0.9958,
      "step": 119680
    },
    {
      "epoch": 0.4194836100334003,
      "grad_norm": 2.796875,
      "learning_rate": 4.778746128544021e-05,
      "loss": 0.9752,
      "step": 119690
    },
    {
      "epoch": 0.4195186575402959,
      "grad_norm": 3.40625,
      "learning_rate": 4.778681225677651e-05,
      "loss": 0.9156,
      "step": 119700
    },
    {
      "epoch": 0.41955370504719147,
      "grad_norm": 3.34375,
      "learning_rate": 4.778616322811281e-05,
      "loss": 0.9337,
      "step": 119710
    },
    {
      "epoch": 0.41958875255408706,
      "grad_norm": 2.4375,
      "learning_rate": 4.77855141994491e-05,
      "loss": 0.912,
      "step": 119720
    },
    {
      "epoch": 0.41962380006098265,
      "grad_norm": 2.828125,
      "learning_rate": 4.7784865170785405e-05,
      "loss": 0.9559,
      "step": 119730
    },
    {
      "epoch": 0.41965884756787825,
      "grad_norm": 3.234375,
      "learning_rate": 4.77842161421217e-05,
      "loss": 0.9727,
      "step": 119740
    },
    {
      "epoch": 0.41969389507477384,
      "grad_norm": 3.40625,
      "learning_rate": 4.7783567113458e-05,
      "loss": 0.9627,
      "step": 119750
    },
    {
      "epoch": 0.41972894258166943,
      "grad_norm": 2.953125,
      "learning_rate": 4.7782918084794295e-05,
      "loss": 0.9322,
      "step": 119760
    },
    {
      "epoch": 0.4197639900885651,
      "grad_norm": 3.25,
      "learning_rate": 4.77822690561306e-05,
      "loss": 0.9906,
      "step": 119770
    },
    {
      "epoch": 0.41979903759546067,
      "grad_norm": 3.28125,
      "learning_rate": 4.778162002746689e-05,
      "loss": 0.964,
      "step": 119780
    },
    {
      "epoch": 0.41983408510235626,
      "grad_norm": 3.265625,
      "learning_rate": 4.778097099880319e-05,
      "loss": 0.9561,
      "step": 119790
    },
    {
      "epoch": 0.41986913260925185,
      "grad_norm": 2.75,
      "learning_rate": 4.7780321970139494e-05,
      "loss": 0.9205,
      "step": 119800
    },
    {
      "epoch": 0.41990418011614744,
      "grad_norm": 3.234375,
      "learning_rate": 4.777967294147579e-05,
      "loss": 0.9131,
      "step": 119810
    },
    {
      "epoch": 0.41993922762304303,
      "grad_norm": 3.046875,
      "learning_rate": 4.777902391281209e-05,
      "loss": 0.9174,
      "step": 119820
    },
    {
      "epoch": 0.4199742751299386,
      "grad_norm": 3.125,
      "learning_rate": 4.7778374884148385e-05,
      "loss": 0.9761,
      "step": 119830
    },
    {
      "epoch": 0.4200093226368342,
      "grad_norm": 3.0,
      "learning_rate": 4.7777725855484686e-05,
      "loss": 0.9815,
      "step": 119840
    },
    {
      "epoch": 0.4200443701437298,
      "grad_norm": 3.0,
      "learning_rate": 4.777707682682098e-05,
      "loss": 0.9895,
      "step": 119850
    },
    {
      "epoch": 0.4200794176506254,
      "grad_norm": 3.15625,
      "learning_rate": 4.777642779815728e-05,
      "loss": 1.0168,
      "step": 119860
    },
    {
      "epoch": 0.42011446515752104,
      "grad_norm": 3.03125,
      "learning_rate": 4.777577876949358e-05,
      "loss": 0.9664,
      "step": 119870
    },
    {
      "epoch": 0.42014951266441664,
      "grad_norm": 2.84375,
      "learning_rate": 4.777512974082988e-05,
      "loss": 0.9506,
      "step": 119880
    },
    {
      "epoch": 0.4201845601713122,
      "grad_norm": 3.703125,
      "learning_rate": 4.777448071216617e-05,
      "loss": 0.9204,
      "step": 119890
    },
    {
      "epoch": 0.4202196076782078,
      "grad_norm": 3.09375,
      "learning_rate": 4.7773831683502474e-05,
      "loss": 0.9964,
      "step": 119900
    },
    {
      "epoch": 0.4202546551851034,
      "grad_norm": 2.78125,
      "learning_rate": 4.777318265483877e-05,
      "loss": 0.8896,
      "step": 119910
    },
    {
      "epoch": 0.420289702691999,
      "grad_norm": 3.078125,
      "learning_rate": 4.777253362617507e-05,
      "loss": 0.9233,
      "step": 119920
    },
    {
      "epoch": 0.4203247501988946,
      "grad_norm": 2.796875,
      "learning_rate": 4.777188459751137e-05,
      "loss": 0.8944,
      "step": 119930
    },
    {
      "epoch": 0.4203597977057902,
      "grad_norm": 3.015625,
      "learning_rate": 4.7771235568847666e-05,
      "loss": 0.9896,
      "step": 119940
    },
    {
      "epoch": 0.4203948452126858,
      "grad_norm": 3.03125,
      "learning_rate": 4.777058654018397e-05,
      "loss": 0.9241,
      "step": 119950
    },
    {
      "epoch": 0.42042989271958137,
      "grad_norm": 3.265625,
      "learning_rate": 4.776993751152026e-05,
      "loss": 0.9713,
      "step": 119960
    },
    {
      "epoch": 0.420464940226477,
      "grad_norm": 3.109375,
      "learning_rate": 4.7769288482856563e-05,
      "loss": 0.9339,
      "step": 119970
    },
    {
      "epoch": 0.4204999877333726,
      "grad_norm": 2.984375,
      "learning_rate": 4.776863945419286e-05,
      "loss": 0.9066,
      "step": 119980
    },
    {
      "epoch": 0.4205350352402682,
      "grad_norm": 2.90625,
      "learning_rate": 4.776799042552916e-05,
      "loss": 0.9463,
      "step": 119990
    },
    {
      "epoch": 0.4205700827471638,
      "grad_norm": 3.1875,
      "learning_rate": 4.7767341396865454e-05,
      "loss": 0.954,
      "step": 120000
    },
    {
      "epoch": 0.4205700827471638,
      "eval_loss": 0.8873227834701538,
      "eval_runtime": 551.1856,
      "eval_samples_per_second": 690.214,
      "eval_steps_per_second": 57.518,
      "step": 120000
    },
    {
      "epoch": 0.4206051302540594,
      "grad_norm": 3.03125,
      "learning_rate": 4.7766692368201755e-05,
      "loss": 0.9849,
      "step": 120010
    },
    {
      "epoch": 0.42064017776095497,
      "grad_norm": 3.21875,
      "learning_rate": 4.776604333953805e-05,
      "loss": 0.975,
      "step": 120020
    },
    {
      "epoch": 0.42067522526785056,
      "grad_norm": 3.171875,
      "learning_rate": 4.7765394310874345e-05,
      "loss": 0.9149,
      "step": 120030
    },
    {
      "epoch": 0.42071027277474615,
      "grad_norm": 3.015625,
      "learning_rate": 4.7764745282210646e-05,
      "loss": 0.9975,
      "step": 120040
    },
    {
      "epoch": 0.42074532028164174,
      "grad_norm": 2.984375,
      "learning_rate": 4.776409625354694e-05,
      "loss": 0.9623,
      "step": 120050
    },
    {
      "epoch": 0.42078036778853734,
      "grad_norm": 3.265625,
      "learning_rate": 4.776344722488324e-05,
      "loss": 0.9437,
      "step": 120060
    },
    {
      "epoch": 0.420815415295433,
      "grad_norm": 2.6875,
      "learning_rate": 4.776279819621954e-05,
      "loss": 0.8813,
      "step": 120070
    },
    {
      "epoch": 0.4208504628023286,
      "grad_norm": 3.03125,
      "learning_rate": 4.776214916755584e-05,
      "loss": 1.0706,
      "step": 120080
    },
    {
      "epoch": 0.42088551030922416,
      "grad_norm": 3.125,
      "learning_rate": 4.776150013889213e-05,
      "loss": 0.9788,
      "step": 120090
    },
    {
      "epoch": 0.42092055781611976,
      "grad_norm": 3.265625,
      "learning_rate": 4.7760851110228434e-05,
      "loss": 0.8824,
      "step": 120100
    },
    {
      "epoch": 0.42095560532301535,
      "grad_norm": 3.046875,
      "learning_rate": 4.776020208156473e-05,
      "loss": 0.9143,
      "step": 120110
    },
    {
      "epoch": 0.42099065282991094,
      "grad_norm": 2.703125,
      "learning_rate": 4.775955305290103e-05,
      "loss": 0.9276,
      "step": 120120
    },
    {
      "epoch": 0.42102570033680653,
      "grad_norm": 3.3125,
      "learning_rate": 4.7758904024237325e-05,
      "loss": 1.0622,
      "step": 120130
    },
    {
      "epoch": 0.4210607478437021,
      "grad_norm": 3.25,
      "learning_rate": 4.7758254995573626e-05,
      "loss": 0.9917,
      "step": 120140
    },
    {
      "epoch": 0.4210957953505977,
      "grad_norm": 3.09375,
      "learning_rate": 4.775760596690992e-05,
      "loss": 0.9639,
      "step": 120150
    },
    {
      "epoch": 0.42113084285749336,
      "grad_norm": 2.625,
      "learning_rate": 4.775695693824622e-05,
      "loss": 0.9516,
      "step": 120160
    },
    {
      "epoch": 0.42116589036438895,
      "grad_norm": 2.9375,
      "learning_rate": 4.7756307909582523e-05,
      "loss": 0.9457,
      "step": 120170
    },
    {
      "epoch": 0.42120093787128454,
      "grad_norm": 2.625,
      "learning_rate": 4.775565888091882e-05,
      "loss": 0.9498,
      "step": 120180
    },
    {
      "epoch": 0.42123598537818013,
      "grad_norm": 3.296875,
      "learning_rate": 4.775500985225512e-05,
      "loss": 0.9639,
      "step": 120190
    },
    {
      "epoch": 0.4212710328850757,
      "grad_norm": 3.03125,
      "learning_rate": 4.7754360823591414e-05,
      "loss": 0.98,
      "step": 120200
    },
    {
      "epoch": 0.4213060803919713,
      "grad_norm": 2.78125,
      "learning_rate": 4.7753711794927715e-05,
      "loss": 0.9755,
      "step": 120210
    },
    {
      "epoch": 0.4213411278988669,
      "grad_norm": 2.546875,
      "learning_rate": 4.775306276626401e-05,
      "loss": 0.8983,
      "step": 120220
    },
    {
      "epoch": 0.4213761754057625,
      "grad_norm": 3.015625,
      "learning_rate": 4.775241373760031e-05,
      "loss": 0.943,
      "step": 120230
    },
    {
      "epoch": 0.4214112229126581,
      "grad_norm": 2.546875,
      "learning_rate": 4.7751764708936606e-05,
      "loss": 0.8981,
      "step": 120240
    },
    {
      "epoch": 0.4214462704195537,
      "grad_norm": 3.4375,
      "learning_rate": 4.775111568027291e-05,
      "loss": 0.9907,
      "step": 120250
    },
    {
      "epoch": 0.42148131792644933,
      "grad_norm": 3.109375,
      "learning_rate": 4.77504666516092e-05,
      "loss": 0.9564,
      "step": 120260
    },
    {
      "epoch": 0.4215163654333449,
      "grad_norm": 3.265625,
      "learning_rate": 4.7749817622945503e-05,
      "loss": 0.973,
      "step": 120270
    },
    {
      "epoch": 0.4215514129402405,
      "grad_norm": 2.640625,
      "learning_rate": 4.77491685942818e-05,
      "loss": 0.9837,
      "step": 120280
    },
    {
      "epoch": 0.4215864604471361,
      "grad_norm": 3.09375,
      "learning_rate": 4.77485195656181e-05,
      "loss": 0.9472,
      "step": 120290
    },
    {
      "epoch": 0.4216215079540317,
      "grad_norm": 2.890625,
      "learning_rate": 4.77478705369544e-05,
      "loss": 0.9291,
      "step": 120300
    },
    {
      "epoch": 0.4216565554609273,
      "grad_norm": 2.859375,
      "learning_rate": 4.7747221508290695e-05,
      "loss": 0.9479,
      "step": 120310
    },
    {
      "epoch": 0.4216916029678229,
      "grad_norm": 3.109375,
      "learning_rate": 4.7746572479627e-05,
      "loss": 0.9092,
      "step": 120320
    },
    {
      "epoch": 0.42172665047471847,
      "grad_norm": 2.859375,
      "learning_rate": 4.774592345096329e-05,
      "loss": 1.0095,
      "step": 120330
    },
    {
      "epoch": 0.42176169798161406,
      "grad_norm": 2.828125,
      "learning_rate": 4.774527442229959e-05,
      "loss": 0.8923,
      "step": 120340
    },
    {
      "epoch": 0.42179674548850965,
      "grad_norm": 3.46875,
      "learning_rate": 4.774462539363589e-05,
      "loss": 0.898,
      "step": 120350
    },
    {
      "epoch": 0.4218317929954053,
      "grad_norm": 2.96875,
      "learning_rate": 4.774397636497219e-05,
      "loss": 0.9613,
      "step": 120360
    },
    {
      "epoch": 0.4218668405023009,
      "grad_norm": 3.4375,
      "learning_rate": 4.7743327336308483e-05,
      "loss": 1.0649,
      "step": 120370
    },
    {
      "epoch": 0.4219018880091965,
      "grad_norm": 3.34375,
      "learning_rate": 4.7742678307644785e-05,
      "loss": 1.0367,
      "step": 120380
    },
    {
      "epoch": 0.42193693551609207,
      "grad_norm": 3.015625,
      "learning_rate": 4.774202927898108e-05,
      "loss": 1.0167,
      "step": 120390
    },
    {
      "epoch": 0.42197198302298766,
      "grad_norm": 2.96875,
      "learning_rate": 4.7741380250317374e-05,
      "loss": 0.91,
      "step": 120400
    },
    {
      "epoch": 0.42200703052988325,
      "grad_norm": 3.625,
      "learning_rate": 4.7740731221653675e-05,
      "loss": 0.9763,
      "step": 120410
    },
    {
      "epoch": 0.42204207803677884,
      "grad_norm": 3.203125,
      "learning_rate": 4.774008219298997e-05,
      "loss": 0.9189,
      "step": 120420
    },
    {
      "epoch": 0.42207712554367444,
      "grad_norm": 2.875,
      "learning_rate": 4.773943316432627e-05,
      "loss": 0.8991,
      "step": 120430
    },
    {
      "epoch": 0.42211217305057,
      "grad_norm": 3.640625,
      "learning_rate": 4.7738784135662566e-05,
      "loss": 0.9561,
      "step": 120440
    },
    {
      "epoch": 0.4221472205574656,
      "grad_norm": 3.5,
      "learning_rate": 4.773813510699887e-05,
      "loss": 1.0157,
      "step": 120450
    },
    {
      "epoch": 0.42218226806436127,
      "grad_norm": 2.765625,
      "learning_rate": 4.773748607833516e-05,
      "loss": 0.9915,
      "step": 120460
    },
    {
      "epoch": 0.42221731557125686,
      "grad_norm": 2.875,
      "learning_rate": 4.7736837049671463e-05,
      "loss": 0.9022,
      "step": 120470
    },
    {
      "epoch": 0.42225236307815245,
      "grad_norm": 3.640625,
      "learning_rate": 4.773618802100776e-05,
      "loss": 1.0086,
      "step": 120480
    },
    {
      "epoch": 0.42228741058504804,
      "grad_norm": 3.265625,
      "learning_rate": 4.773553899234406e-05,
      "loss": 0.9092,
      "step": 120490
    },
    {
      "epoch": 0.42232245809194363,
      "grad_norm": 2.921875,
      "learning_rate": 4.7734889963680354e-05,
      "loss": 0.8644,
      "step": 120500
    },
    {
      "epoch": 0.4223575055988392,
      "grad_norm": 2.828125,
      "learning_rate": 4.7734240935016655e-05,
      "loss": 0.9586,
      "step": 120510
    },
    {
      "epoch": 0.4223925531057348,
      "grad_norm": 2.9375,
      "learning_rate": 4.773359190635296e-05,
      "loss": 0.9207,
      "step": 120520
    },
    {
      "epoch": 0.4224276006126304,
      "grad_norm": 3.3125,
      "learning_rate": 4.773294287768925e-05,
      "loss": 0.9034,
      "step": 120530
    },
    {
      "epoch": 0.422462648119526,
      "grad_norm": 3.015625,
      "learning_rate": 4.773229384902555e-05,
      "loss": 1.0172,
      "step": 120540
    },
    {
      "epoch": 0.4224976956264216,
      "grad_norm": 2.890625,
      "learning_rate": 4.773164482036185e-05,
      "loss": 0.9585,
      "step": 120550
    },
    {
      "epoch": 0.42253274313331723,
      "grad_norm": 3.3125,
      "learning_rate": 4.773099579169815e-05,
      "loss": 1.0027,
      "step": 120560
    },
    {
      "epoch": 0.4225677906402128,
      "grad_norm": 2.921875,
      "learning_rate": 4.7730346763034443e-05,
      "loss": 0.908,
      "step": 120570
    },
    {
      "epoch": 0.4226028381471084,
      "grad_norm": 3.234375,
      "learning_rate": 4.7729697734370745e-05,
      "loss": 0.9434,
      "step": 120580
    },
    {
      "epoch": 0.422637885654004,
      "grad_norm": 3.109375,
      "learning_rate": 4.772904870570704e-05,
      "loss": 0.9323,
      "step": 120590
    },
    {
      "epoch": 0.4226729331608996,
      "grad_norm": 3.1875,
      "learning_rate": 4.772839967704334e-05,
      "loss": 0.9595,
      "step": 120600
    },
    {
      "epoch": 0.4227079806677952,
      "grad_norm": 3.109375,
      "learning_rate": 4.7727750648379635e-05,
      "loss": 0.9353,
      "step": 120610
    },
    {
      "epoch": 0.4227430281746908,
      "grad_norm": 3.1875,
      "learning_rate": 4.772710161971594e-05,
      "loss": 0.9185,
      "step": 120620
    },
    {
      "epoch": 0.4227780756815864,
      "grad_norm": 3.046875,
      "learning_rate": 4.772645259105223e-05,
      "loss": 0.9132,
      "step": 120630
    },
    {
      "epoch": 0.42281312318848197,
      "grad_norm": 2.890625,
      "learning_rate": 4.772580356238853e-05,
      "loss": 0.933,
      "step": 120640
    },
    {
      "epoch": 0.4228481706953776,
      "grad_norm": 2.5625,
      "learning_rate": 4.772515453372483e-05,
      "loss": 0.9812,
      "step": 120650
    },
    {
      "epoch": 0.4228832182022732,
      "grad_norm": 2.875,
      "learning_rate": 4.772450550506113e-05,
      "loss": 0.9184,
      "step": 120660
    },
    {
      "epoch": 0.4229182657091688,
      "grad_norm": 3.640625,
      "learning_rate": 4.772385647639743e-05,
      "loss": 0.9922,
      "step": 120670
    },
    {
      "epoch": 0.4229533132160644,
      "grad_norm": 2.84375,
      "learning_rate": 4.7723207447733725e-05,
      "loss": 0.9772,
      "step": 120680
    },
    {
      "epoch": 0.42298836072296,
      "grad_norm": 2.828125,
      "learning_rate": 4.7722558419070026e-05,
      "loss": 0.8914,
      "step": 120690
    },
    {
      "epoch": 0.42302340822985557,
      "grad_norm": 3.078125,
      "learning_rate": 4.772190939040632e-05,
      "loss": 0.8932,
      "step": 120700
    },
    {
      "epoch": 0.42305845573675116,
      "grad_norm": 3.046875,
      "learning_rate": 4.772126036174262e-05,
      "loss": 0.9523,
      "step": 120710
    },
    {
      "epoch": 0.42309350324364675,
      "grad_norm": 3.203125,
      "learning_rate": 4.772061133307892e-05,
      "loss": 1.0322,
      "step": 120720
    },
    {
      "epoch": 0.42312855075054234,
      "grad_norm": 3.53125,
      "learning_rate": 4.771996230441522e-05,
      "loss": 0.9657,
      "step": 120730
    },
    {
      "epoch": 0.42316359825743793,
      "grad_norm": 3.296875,
      "learning_rate": 4.771931327575151e-05,
      "loss": 0.938,
      "step": 120740
    },
    {
      "epoch": 0.4231986457643336,
      "grad_norm": 3.296875,
      "learning_rate": 4.7718664247087814e-05,
      "loss": 1.0395,
      "step": 120750
    },
    {
      "epoch": 0.42323369327122917,
      "grad_norm": 3.015625,
      "learning_rate": 4.771801521842411e-05,
      "loss": 0.8943,
      "step": 120760
    },
    {
      "epoch": 0.42326874077812476,
      "grad_norm": 2.75,
      "learning_rate": 4.771736618976041e-05,
      "loss": 0.9677,
      "step": 120770
    },
    {
      "epoch": 0.42330378828502035,
      "grad_norm": 2.9375,
      "learning_rate": 4.7716717161096705e-05,
      "loss": 0.9075,
      "step": 120780
    },
    {
      "epoch": 0.42333883579191595,
      "grad_norm": 3.515625,
      "learning_rate": 4.7716068132433e-05,
      "loss": 0.9741,
      "step": 120790
    },
    {
      "epoch": 0.42337388329881154,
      "grad_norm": 2.859375,
      "learning_rate": 4.77154191037693e-05,
      "loss": 0.9005,
      "step": 120800
    },
    {
      "epoch": 0.42340893080570713,
      "grad_norm": 4.09375,
      "learning_rate": 4.7714770075105595e-05,
      "loss": 0.9635,
      "step": 120810
    },
    {
      "epoch": 0.4234439783126027,
      "grad_norm": 3.109375,
      "learning_rate": 4.77141210464419e-05,
      "loss": 0.9645,
      "step": 120820
    },
    {
      "epoch": 0.4234790258194983,
      "grad_norm": 2.875,
      "learning_rate": 4.771347201777819e-05,
      "loss": 0.9534,
      "step": 120830
    },
    {
      "epoch": 0.4235140733263939,
      "grad_norm": 3.34375,
      "learning_rate": 4.771282298911449e-05,
      "loss": 0.8524,
      "step": 120840
    },
    {
      "epoch": 0.42354912083328955,
      "grad_norm": 3.015625,
      "learning_rate": 4.771217396045079e-05,
      "loss": 0.9936,
      "step": 120850
    },
    {
      "epoch": 0.42358416834018514,
      "grad_norm": 2.8125,
      "learning_rate": 4.771152493178709e-05,
      "loss": 0.8979,
      "step": 120860
    },
    {
      "epoch": 0.42361921584708073,
      "grad_norm": 2.78125,
      "learning_rate": 4.7710875903123383e-05,
      "loss": 0.9009,
      "step": 120870
    },
    {
      "epoch": 0.4236542633539763,
      "grad_norm": 2.78125,
      "learning_rate": 4.7710226874459685e-05,
      "loss": 1.0248,
      "step": 120880
    },
    {
      "epoch": 0.4236893108608719,
      "grad_norm": 2.984375,
      "learning_rate": 4.7709577845795986e-05,
      "loss": 1.0116,
      "step": 120890
    },
    {
      "epoch": 0.4237243583677675,
      "grad_norm": 3.09375,
      "learning_rate": 4.770892881713228e-05,
      "loss": 0.9646,
      "step": 120900
    },
    {
      "epoch": 0.4237594058746631,
      "grad_norm": 2.71875,
      "learning_rate": 4.770827978846858e-05,
      "loss": 0.9935,
      "step": 120910
    },
    {
      "epoch": 0.4237944533815587,
      "grad_norm": 3.421875,
      "learning_rate": 4.770763075980488e-05,
      "loss": 0.9389,
      "step": 120920
    },
    {
      "epoch": 0.4238295008884543,
      "grad_norm": 3.140625,
      "learning_rate": 4.770698173114118e-05,
      "loss": 1.0346,
      "step": 120930
    },
    {
      "epoch": 0.42386454839534987,
      "grad_norm": 3.265625,
      "learning_rate": 4.770633270247747e-05,
      "loss": 0.9022,
      "step": 120940
    },
    {
      "epoch": 0.4238995959022455,
      "grad_norm": 2.703125,
      "learning_rate": 4.7705683673813774e-05,
      "loss": 0.9248,
      "step": 120950
    },
    {
      "epoch": 0.4239346434091411,
      "grad_norm": 2.765625,
      "learning_rate": 4.770503464515007e-05,
      "loss": 0.9325,
      "step": 120960
    },
    {
      "epoch": 0.4239696909160367,
      "grad_norm": 3.171875,
      "learning_rate": 4.770438561648637e-05,
      "loss": 0.9139,
      "step": 120970
    },
    {
      "epoch": 0.4240047384229323,
      "grad_norm": 2.90625,
      "learning_rate": 4.7703736587822665e-05,
      "loss": 0.9158,
      "step": 120980
    },
    {
      "epoch": 0.4240397859298279,
      "grad_norm": 3.375,
      "learning_rate": 4.7703087559158966e-05,
      "loss": 0.9715,
      "step": 120990
    },
    {
      "epoch": 0.4240748334367235,
      "grad_norm": 2.84375,
      "learning_rate": 4.770243853049526e-05,
      "loss": 0.9273,
      "step": 121000
    },
    {
      "epoch": 0.42410988094361907,
      "grad_norm": 2.609375,
      "learning_rate": 4.770178950183156e-05,
      "loss": 0.953,
      "step": 121010
    },
    {
      "epoch": 0.42414492845051466,
      "grad_norm": 2.6875,
      "learning_rate": 4.770114047316786e-05,
      "loss": 0.8937,
      "step": 121020
    },
    {
      "epoch": 0.42417997595741025,
      "grad_norm": 3.359375,
      "learning_rate": 4.770049144450416e-05,
      "loss": 0.9373,
      "step": 121030
    },
    {
      "epoch": 0.42421502346430584,
      "grad_norm": 3.15625,
      "learning_rate": 4.769984241584046e-05,
      "loss": 0.9624,
      "step": 121040
    },
    {
      "epoch": 0.4242500709712015,
      "grad_norm": 2.796875,
      "learning_rate": 4.7699193387176754e-05,
      "loss": 0.9069,
      "step": 121050
    },
    {
      "epoch": 0.4242851184780971,
      "grad_norm": 2.8125,
      "learning_rate": 4.7698544358513056e-05,
      "loss": 0.8587,
      "step": 121060
    },
    {
      "epoch": 0.42432016598499267,
      "grad_norm": 2.921875,
      "learning_rate": 4.769789532984935e-05,
      "loss": 0.8973,
      "step": 121070
    },
    {
      "epoch": 0.42435521349188826,
      "grad_norm": 3.0625,
      "learning_rate": 4.769724630118565e-05,
      "loss": 0.9799,
      "step": 121080
    },
    {
      "epoch": 0.42439026099878385,
      "grad_norm": 2.578125,
      "learning_rate": 4.7696597272521946e-05,
      "loss": 0.9984,
      "step": 121090
    },
    {
      "epoch": 0.42442530850567944,
      "grad_norm": 2.8125,
      "learning_rate": 4.769594824385825e-05,
      "loss": 0.9559,
      "step": 121100
    },
    {
      "epoch": 0.42446035601257504,
      "grad_norm": 2.953125,
      "learning_rate": 4.769529921519454e-05,
      "loss": 0.9254,
      "step": 121110
    },
    {
      "epoch": 0.4244954035194706,
      "grad_norm": 3.21875,
      "learning_rate": 4.7694650186530844e-05,
      "loss": 0.9464,
      "step": 121120
    },
    {
      "epoch": 0.4245304510263662,
      "grad_norm": 3.40625,
      "learning_rate": 4.769400115786714e-05,
      "loss": 1.0253,
      "step": 121130
    },
    {
      "epoch": 0.4245654985332618,
      "grad_norm": 3.25,
      "learning_rate": 4.769335212920344e-05,
      "loss": 0.8913,
      "step": 121140
    },
    {
      "epoch": 0.42460054604015746,
      "grad_norm": 3.328125,
      "learning_rate": 4.7692703100539734e-05,
      "loss": 0.972,
      "step": 121150
    },
    {
      "epoch": 0.42463559354705305,
      "grad_norm": 3.25,
      "learning_rate": 4.769205407187603e-05,
      "loss": 0.9548,
      "step": 121160
    },
    {
      "epoch": 0.42467064105394864,
      "grad_norm": 3.234375,
      "learning_rate": 4.769140504321233e-05,
      "loss": 0.906,
      "step": 121170
    },
    {
      "epoch": 0.42470568856084423,
      "grad_norm": 3.140625,
      "learning_rate": 4.7690756014548625e-05,
      "loss": 0.9285,
      "step": 121180
    },
    {
      "epoch": 0.4247407360677398,
      "grad_norm": 3.09375,
      "learning_rate": 4.7690106985884926e-05,
      "loss": 0.9655,
      "step": 121190
    },
    {
      "epoch": 0.4247757835746354,
      "grad_norm": 2.765625,
      "learning_rate": 4.768945795722122e-05,
      "loss": 0.8941,
      "step": 121200
    },
    {
      "epoch": 0.424810831081531,
      "grad_norm": 2.578125,
      "learning_rate": 4.768880892855752e-05,
      "loss": 0.9219,
      "step": 121210
    },
    {
      "epoch": 0.4248458785884266,
      "grad_norm": 3.078125,
      "learning_rate": 4.768815989989382e-05,
      "loss": 0.876,
      "step": 121220
    },
    {
      "epoch": 0.4248809260953222,
      "grad_norm": 3.09375,
      "learning_rate": 4.768751087123012e-05,
      "loss": 0.9037,
      "step": 121230
    },
    {
      "epoch": 0.42491597360221783,
      "grad_norm": 2.875,
      "learning_rate": 4.768686184256641e-05,
      "loss": 0.9519,
      "step": 121240
    },
    {
      "epoch": 0.4249510211091134,
      "grad_norm": 3.15625,
      "learning_rate": 4.7686212813902714e-05,
      "loss": 1.0075,
      "step": 121250
    },
    {
      "epoch": 0.424986068616009,
      "grad_norm": 3.234375,
      "learning_rate": 4.7685563785239016e-05,
      "loss": 1.0736,
      "step": 121260
    },
    {
      "epoch": 0.4250211161229046,
      "grad_norm": 2.671875,
      "learning_rate": 4.768491475657531e-05,
      "loss": 0.914,
      "step": 121270
    },
    {
      "epoch": 0.4250561636298002,
      "grad_norm": 3.40625,
      "learning_rate": 4.768426572791161e-05,
      "loss": 0.9337,
      "step": 121280
    },
    {
      "epoch": 0.4250912111366958,
      "grad_norm": 3.0625,
      "learning_rate": 4.7683616699247906e-05,
      "loss": 0.8768,
      "step": 121290
    },
    {
      "epoch": 0.4251262586435914,
      "grad_norm": 2.96875,
      "learning_rate": 4.768296767058421e-05,
      "loss": 0.9727,
      "step": 121300
    },
    {
      "epoch": 0.425161306150487,
      "grad_norm": 2.875,
      "learning_rate": 4.76823186419205e-05,
      "loss": 0.8981,
      "step": 121310
    },
    {
      "epoch": 0.42519635365738256,
      "grad_norm": 2.5,
      "learning_rate": 4.7681669613256804e-05,
      "loss": 0.9308,
      "step": 121320
    },
    {
      "epoch": 0.42523140116427816,
      "grad_norm": 2.875,
      "learning_rate": 4.76810205845931e-05,
      "loss": 0.9505,
      "step": 121330
    },
    {
      "epoch": 0.4252664486711738,
      "grad_norm": 2.984375,
      "learning_rate": 4.76803715559294e-05,
      "loss": 0.8566,
      "step": 121340
    },
    {
      "epoch": 0.4253014961780694,
      "grad_norm": 2.953125,
      "learning_rate": 4.7679722527265694e-05,
      "loss": 0.9602,
      "step": 121350
    },
    {
      "epoch": 0.425336543684965,
      "grad_norm": 2.84375,
      "learning_rate": 4.7679073498601996e-05,
      "loss": 1.0271,
      "step": 121360
    },
    {
      "epoch": 0.4253715911918606,
      "grad_norm": 3.078125,
      "learning_rate": 4.767842446993829e-05,
      "loss": 0.9443,
      "step": 121370
    },
    {
      "epoch": 0.42540663869875617,
      "grad_norm": 3.15625,
      "learning_rate": 4.767777544127459e-05,
      "loss": 0.8717,
      "step": 121380
    },
    {
      "epoch": 0.42544168620565176,
      "grad_norm": 3.140625,
      "learning_rate": 4.767712641261089e-05,
      "loss": 0.9189,
      "step": 121390
    },
    {
      "epoch": 0.42547673371254735,
      "grad_norm": 2.375,
      "learning_rate": 4.767647738394719e-05,
      "loss": 0.9661,
      "step": 121400
    },
    {
      "epoch": 0.42551178121944294,
      "grad_norm": 3.328125,
      "learning_rate": 4.767582835528349e-05,
      "loss": 1.0072,
      "step": 121410
    },
    {
      "epoch": 0.42554682872633853,
      "grad_norm": 3.28125,
      "learning_rate": 4.7675179326619784e-05,
      "loss": 0.9709,
      "step": 121420
    },
    {
      "epoch": 0.4255818762332341,
      "grad_norm": 2.890625,
      "learning_rate": 4.7674530297956085e-05,
      "loss": 0.9943,
      "step": 121430
    },
    {
      "epoch": 0.42561692374012977,
      "grad_norm": 2.703125,
      "learning_rate": 4.767388126929238e-05,
      "loss": 0.8906,
      "step": 121440
    },
    {
      "epoch": 0.42565197124702536,
      "grad_norm": 2.984375,
      "learning_rate": 4.767323224062868e-05,
      "loss": 0.8557,
      "step": 121450
    },
    {
      "epoch": 0.42568701875392095,
      "grad_norm": 3.21875,
      "learning_rate": 4.7672583211964976e-05,
      "loss": 0.9836,
      "step": 121460
    },
    {
      "epoch": 0.42572206626081655,
      "grad_norm": 3.453125,
      "learning_rate": 4.767193418330128e-05,
      "loss": 0.9208,
      "step": 121470
    },
    {
      "epoch": 0.42575711376771214,
      "grad_norm": 3.125,
      "learning_rate": 4.767128515463757e-05,
      "loss": 0.9174,
      "step": 121480
    },
    {
      "epoch": 0.42579216127460773,
      "grad_norm": 2.84375,
      "learning_rate": 4.767063612597387e-05,
      "loss": 0.888,
      "step": 121490
    },
    {
      "epoch": 0.4258272087815033,
      "grad_norm": 2.890625,
      "learning_rate": 4.766998709731017e-05,
      "loss": 0.9725,
      "step": 121500
    },
    {
      "epoch": 0.4258622562883989,
      "grad_norm": 3.265625,
      "learning_rate": 4.766933806864647e-05,
      "loss": 0.9722,
      "step": 121510
    },
    {
      "epoch": 0.4258973037952945,
      "grad_norm": 3.46875,
      "learning_rate": 4.7668689039982764e-05,
      "loss": 0.978,
      "step": 121520
    },
    {
      "epoch": 0.4259323513021901,
      "grad_norm": 3.0625,
      "learning_rate": 4.766804001131906e-05,
      "loss": 1.0142,
      "step": 121530
    },
    {
      "epoch": 0.42596739880908574,
      "grad_norm": 2.859375,
      "learning_rate": 4.766739098265536e-05,
      "loss": 0.8563,
      "step": 121540
    },
    {
      "epoch": 0.42600244631598133,
      "grad_norm": 3.328125,
      "learning_rate": 4.7666741953991654e-05,
      "loss": 0.9642,
      "step": 121550
    },
    {
      "epoch": 0.4260374938228769,
      "grad_norm": 3.296875,
      "learning_rate": 4.7666092925327956e-05,
      "loss": 0.9551,
      "step": 121560
    },
    {
      "epoch": 0.4260725413297725,
      "grad_norm": 2.828125,
      "learning_rate": 4.766544389666425e-05,
      "loss": 0.9071,
      "step": 121570
    },
    {
      "epoch": 0.4261075888366681,
      "grad_norm": 2.984375,
      "learning_rate": 4.766479486800055e-05,
      "loss": 0.9817,
      "step": 121580
    },
    {
      "epoch": 0.4261426363435637,
      "grad_norm": 2.84375,
      "learning_rate": 4.7664145839336846e-05,
      "loss": 0.9131,
      "step": 121590
    },
    {
      "epoch": 0.4261776838504593,
      "grad_norm": 2.890625,
      "learning_rate": 4.766349681067315e-05,
      "loss": 0.9383,
      "step": 121600
    },
    {
      "epoch": 0.4262127313573549,
      "grad_norm": 2.96875,
      "learning_rate": 4.766284778200944e-05,
      "loss": 0.9056,
      "step": 121610
    },
    {
      "epoch": 0.42624777886425047,
      "grad_norm": 4.21875,
      "learning_rate": 4.7662198753345744e-05,
      "loss": 0.8963,
      "step": 121620
    },
    {
      "epoch": 0.42628282637114606,
      "grad_norm": 2.859375,
      "learning_rate": 4.7661549724682045e-05,
      "loss": 0.8458,
      "step": 121630
    },
    {
      "epoch": 0.4263178738780417,
      "grad_norm": 2.96875,
      "learning_rate": 4.766090069601834e-05,
      "loss": 0.9567,
      "step": 121640
    },
    {
      "epoch": 0.4263529213849373,
      "grad_norm": 2.875,
      "learning_rate": 4.766025166735464e-05,
      "loss": 0.9284,
      "step": 121650
    },
    {
      "epoch": 0.4263879688918329,
      "grad_norm": 2.984375,
      "learning_rate": 4.7659602638690936e-05,
      "loss": 0.86,
      "step": 121660
    },
    {
      "epoch": 0.4264230163987285,
      "grad_norm": 2.765625,
      "learning_rate": 4.765895361002724e-05,
      "loss": 0.9266,
      "step": 121670
    },
    {
      "epoch": 0.4264580639056241,
      "grad_norm": 2.984375,
      "learning_rate": 4.765830458136353e-05,
      "loss": 0.9439,
      "step": 121680
    },
    {
      "epoch": 0.42649311141251967,
      "grad_norm": 3.203125,
      "learning_rate": 4.765765555269983e-05,
      "loss": 0.9863,
      "step": 121690
    },
    {
      "epoch": 0.42652815891941526,
      "grad_norm": 3.203125,
      "learning_rate": 4.765700652403613e-05,
      "loss": 0.951,
      "step": 121700
    },
    {
      "epoch": 0.42656320642631085,
      "grad_norm": 2.65625,
      "learning_rate": 4.765635749537243e-05,
      "loss": 0.8999,
      "step": 121710
    },
    {
      "epoch": 0.42659825393320644,
      "grad_norm": 3.015625,
      "learning_rate": 4.7655708466708724e-05,
      "loss": 0.9537,
      "step": 121720
    },
    {
      "epoch": 0.42663330144010203,
      "grad_norm": 3.28125,
      "learning_rate": 4.7655059438045025e-05,
      "loss": 0.9435,
      "step": 121730
    },
    {
      "epoch": 0.4266683489469977,
      "grad_norm": 2.796875,
      "learning_rate": 4.765441040938132e-05,
      "loss": 0.8708,
      "step": 121740
    },
    {
      "epoch": 0.42670339645389327,
      "grad_norm": 3.0,
      "learning_rate": 4.765376138071762e-05,
      "loss": 0.9155,
      "step": 121750
    },
    {
      "epoch": 0.42673844396078886,
      "grad_norm": 2.96875,
      "learning_rate": 4.765311235205392e-05,
      "loss": 0.9316,
      "step": 121760
    },
    {
      "epoch": 0.42677349146768445,
      "grad_norm": 2.890625,
      "learning_rate": 4.765246332339022e-05,
      "loss": 0.9701,
      "step": 121770
    },
    {
      "epoch": 0.42680853897458004,
      "grad_norm": 3.28125,
      "learning_rate": 4.765181429472652e-05,
      "loss": 0.9806,
      "step": 121780
    },
    {
      "epoch": 0.42684358648147563,
      "grad_norm": 2.875,
      "learning_rate": 4.765116526606281e-05,
      "loss": 0.9902,
      "step": 121790
    },
    {
      "epoch": 0.4268786339883712,
      "grad_norm": 3.15625,
      "learning_rate": 4.7650516237399114e-05,
      "loss": 0.979,
      "step": 121800
    },
    {
      "epoch": 0.4269136814952668,
      "grad_norm": 3.109375,
      "learning_rate": 4.764986720873541e-05,
      "loss": 0.8939,
      "step": 121810
    },
    {
      "epoch": 0.4269487290021624,
      "grad_norm": 2.671875,
      "learning_rate": 4.764921818007171e-05,
      "loss": 0.8962,
      "step": 121820
    },
    {
      "epoch": 0.42698377650905806,
      "grad_norm": 3.390625,
      "learning_rate": 4.7648569151408005e-05,
      "loss": 1.0241,
      "step": 121830
    },
    {
      "epoch": 0.42701882401595365,
      "grad_norm": 2.96875,
      "learning_rate": 4.7647920122744306e-05,
      "loss": 0.884,
      "step": 121840
    },
    {
      "epoch": 0.42705387152284924,
      "grad_norm": 4.125,
      "learning_rate": 4.76472710940806e-05,
      "loss": 0.8979,
      "step": 121850
    },
    {
      "epoch": 0.42708891902974483,
      "grad_norm": 2.796875,
      "learning_rate": 4.76466220654169e-05,
      "loss": 0.9794,
      "step": 121860
    },
    {
      "epoch": 0.4271239665366404,
      "grad_norm": 3.25,
      "learning_rate": 4.76459730367532e-05,
      "loss": 0.9223,
      "step": 121870
    },
    {
      "epoch": 0.427159014043536,
      "grad_norm": 3.046875,
      "learning_rate": 4.76453240080895e-05,
      "loss": 1.0153,
      "step": 121880
    },
    {
      "epoch": 0.4271940615504316,
      "grad_norm": 3.140625,
      "learning_rate": 4.764467497942579e-05,
      "loss": 0.9476,
      "step": 121890
    },
    {
      "epoch": 0.4272291090573272,
      "grad_norm": 3.4375,
      "learning_rate": 4.764402595076209e-05,
      "loss": 1.0781,
      "step": 121900
    },
    {
      "epoch": 0.4272641565642228,
      "grad_norm": 2.890625,
      "learning_rate": 4.764337692209839e-05,
      "loss": 0.8795,
      "step": 121910
    },
    {
      "epoch": 0.4272992040711184,
      "grad_norm": 2.90625,
      "learning_rate": 4.7642727893434684e-05,
      "loss": 0.9871,
      "step": 121920
    },
    {
      "epoch": 0.427334251578014,
      "grad_norm": 3.4375,
      "learning_rate": 4.7642078864770985e-05,
      "loss": 0.9165,
      "step": 121930
    },
    {
      "epoch": 0.4273692990849096,
      "grad_norm": 3.015625,
      "learning_rate": 4.764142983610728e-05,
      "loss": 0.9694,
      "step": 121940
    },
    {
      "epoch": 0.4274043465918052,
      "grad_norm": 3.546875,
      "learning_rate": 4.764078080744358e-05,
      "loss": 0.9365,
      "step": 121950
    },
    {
      "epoch": 0.4274393940987008,
      "grad_norm": 3.296875,
      "learning_rate": 4.7640131778779876e-05,
      "loss": 0.9394,
      "step": 121960
    },
    {
      "epoch": 0.4274744416055964,
      "grad_norm": 3.203125,
      "learning_rate": 4.763948275011618e-05,
      "loss": 1.0997,
      "step": 121970
    },
    {
      "epoch": 0.427509489112492,
      "grad_norm": 3.203125,
      "learning_rate": 4.763883372145247e-05,
      "loss": 0.9605,
      "step": 121980
    },
    {
      "epoch": 0.42754453661938757,
      "grad_norm": 2.75,
      "learning_rate": 4.763818469278877e-05,
      "loss": 0.9933,
      "step": 121990
    },
    {
      "epoch": 0.42757958412628316,
      "grad_norm": 3.03125,
      "learning_rate": 4.7637535664125074e-05,
      "loss": 0.9047,
      "step": 122000
    },
    {
      "epoch": 0.42761463163317875,
      "grad_norm": 3.203125,
      "learning_rate": 4.763688663546137e-05,
      "loss": 0.9388,
      "step": 122010
    },
    {
      "epoch": 0.42764967914007435,
      "grad_norm": 3.3125,
      "learning_rate": 4.763623760679767e-05,
      "loss": 0.9298,
      "step": 122020
    },
    {
      "epoch": 0.42768472664697,
      "grad_norm": 2.890625,
      "learning_rate": 4.7635588578133965e-05,
      "loss": 0.9372,
      "step": 122030
    },
    {
      "epoch": 0.4277197741538656,
      "grad_norm": 3.09375,
      "learning_rate": 4.7634939549470266e-05,
      "loss": 0.9737,
      "step": 122040
    },
    {
      "epoch": 0.4277548216607612,
      "grad_norm": 2.96875,
      "learning_rate": 4.763429052080656e-05,
      "loss": 0.9142,
      "step": 122050
    },
    {
      "epoch": 0.42778986916765677,
      "grad_norm": 3.21875,
      "learning_rate": 4.763364149214286e-05,
      "loss": 0.909,
      "step": 122060
    },
    {
      "epoch": 0.42782491667455236,
      "grad_norm": 2.5625,
      "learning_rate": 4.763299246347916e-05,
      "loss": 0.9654,
      "step": 122070
    },
    {
      "epoch": 0.42785996418144795,
      "grad_norm": 3.21875,
      "learning_rate": 4.763234343481546e-05,
      "loss": 0.8823,
      "step": 122080
    },
    {
      "epoch": 0.42789501168834354,
      "grad_norm": 3.25,
      "learning_rate": 4.763169440615175e-05,
      "loss": 0.9133,
      "step": 122090
    },
    {
      "epoch": 0.42793005919523913,
      "grad_norm": 3.3125,
      "learning_rate": 4.7631045377488054e-05,
      "loss": 0.9874,
      "step": 122100
    },
    {
      "epoch": 0.4279651067021347,
      "grad_norm": 2.859375,
      "learning_rate": 4.763039634882435e-05,
      "loss": 0.9628,
      "step": 122110
    },
    {
      "epoch": 0.4280001542090303,
      "grad_norm": 2.90625,
      "learning_rate": 4.762974732016065e-05,
      "loss": 0.921,
      "step": 122120
    },
    {
      "epoch": 0.42803520171592596,
      "grad_norm": 3.0,
      "learning_rate": 4.762909829149695e-05,
      "loss": 0.9259,
      "step": 122130
    },
    {
      "epoch": 0.42807024922282155,
      "grad_norm": 2.65625,
      "learning_rate": 4.7628449262833246e-05,
      "loss": 0.9667,
      "step": 122140
    },
    {
      "epoch": 0.42810529672971714,
      "grad_norm": 2.96875,
      "learning_rate": 4.762780023416955e-05,
      "loss": 0.9776,
      "step": 122150
    },
    {
      "epoch": 0.42814034423661274,
      "grad_norm": 3.359375,
      "learning_rate": 4.762715120550584e-05,
      "loss": 1.0519,
      "step": 122160
    },
    {
      "epoch": 0.4281753917435083,
      "grad_norm": 3.03125,
      "learning_rate": 4.7626502176842144e-05,
      "loss": 0.8638,
      "step": 122170
    },
    {
      "epoch": 0.4282104392504039,
      "grad_norm": 3.09375,
      "learning_rate": 4.762585314817844e-05,
      "loss": 0.9034,
      "step": 122180
    },
    {
      "epoch": 0.4282454867572995,
      "grad_norm": 3.1875,
      "learning_rate": 4.762520411951474e-05,
      "loss": 0.8961,
      "step": 122190
    },
    {
      "epoch": 0.4282805342641951,
      "grad_norm": 2.9375,
      "learning_rate": 4.7624555090851034e-05,
      "loss": 0.8202,
      "step": 122200
    },
    {
      "epoch": 0.4283155817710907,
      "grad_norm": 3.4375,
      "learning_rate": 4.7623906062187336e-05,
      "loss": 1.0061,
      "step": 122210
    },
    {
      "epoch": 0.4283506292779863,
      "grad_norm": 2.984375,
      "learning_rate": 4.762325703352363e-05,
      "loss": 0.9969,
      "step": 122220
    },
    {
      "epoch": 0.42838567678488193,
      "grad_norm": 3.0625,
      "learning_rate": 4.762260800485993e-05,
      "loss": 0.8938,
      "step": 122230
    },
    {
      "epoch": 0.4284207242917775,
      "grad_norm": 3.3125,
      "learning_rate": 4.7621958976196226e-05,
      "loss": 0.928,
      "step": 122240
    },
    {
      "epoch": 0.4284557717986731,
      "grad_norm": 3.015625,
      "learning_rate": 4.762130994753253e-05,
      "loss": 0.9311,
      "step": 122250
    },
    {
      "epoch": 0.4284908193055687,
      "grad_norm": 3.21875,
      "learning_rate": 4.762066091886882e-05,
      "loss": 0.9043,
      "step": 122260
    },
    {
      "epoch": 0.4285258668124643,
      "grad_norm": 2.75,
      "learning_rate": 4.7620011890205124e-05,
      "loss": 0.9286,
      "step": 122270
    },
    {
      "epoch": 0.4285609143193599,
      "grad_norm": 3.453125,
      "learning_rate": 4.761936286154142e-05,
      "loss": 0.9493,
      "step": 122280
    },
    {
      "epoch": 0.4285959618262555,
      "grad_norm": 2.75,
      "learning_rate": 4.761871383287771e-05,
      "loss": 0.873,
      "step": 122290
    },
    {
      "epoch": 0.42863100933315107,
      "grad_norm": 3.0625,
      "learning_rate": 4.7618064804214014e-05,
      "loss": 0.8585,
      "step": 122300
    },
    {
      "epoch": 0.42866605684004666,
      "grad_norm": 3.203125,
      "learning_rate": 4.761741577555031e-05,
      "loss": 0.9262,
      "step": 122310
    },
    {
      "epoch": 0.4287011043469423,
      "grad_norm": 3.390625,
      "learning_rate": 4.761676674688661e-05,
      "loss": 0.9768,
      "step": 122320
    },
    {
      "epoch": 0.4287361518538379,
      "grad_norm": 3.25,
      "learning_rate": 4.7616117718222905e-05,
      "loss": 0.9098,
      "step": 122330
    },
    {
      "epoch": 0.4287711993607335,
      "grad_norm": 3.3125,
      "learning_rate": 4.7615468689559206e-05,
      "loss": 1.0421,
      "step": 122340
    },
    {
      "epoch": 0.4288062468676291,
      "grad_norm": 2.828125,
      "learning_rate": 4.761481966089551e-05,
      "loss": 0.9518,
      "step": 122350
    },
    {
      "epoch": 0.4288412943745247,
      "grad_norm": 2.75,
      "learning_rate": 4.76141706322318e-05,
      "loss": 0.9344,
      "step": 122360
    },
    {
      "epoch": 0.42887634188142026,
      "grad_norm": 3.375,
      "learning_rate": 4.7613521603568104e-05,
      "loss": 0.9209,
      "step": 122370
    },
    {
      "epoch": 0.42891138938831586,
      "grad_norm": 2.828125,
      "learning_rate": 4.76128725749044e-05,
      "loss": 0.8786,
      "step": 122380
    },
    {
      "epoch": 0.42894643689521145,
      "grad_norm": 3.046875,
      "learning_rate": 4.76122235462407e-05,
      "loss": 0.95,
      "step": 122390
    },
    {
      "epoch": 0.42898148440210704,
      "grad_norm": 3.15625,
      "learning_rate": 4.7611574517576994e-05,
      "loss": 0.9779,
      "step": 122400
    },
    {
      "epoch": 0.42901653190900263,
      "grad_norm": 2.828125,
      "learning_rate": 4.7610925488913296e-05,
      "loss": 1.0285,
      "step": 122410
    },
    {
      "epoch": 0.4290515794158983,
      "grad_norm": 2.59375,
      "learning_rate": 4.761027646024959e-05,
      "loss": 0.9074,
      "step": 122420
    },
    {
      "epoch": 0.42908662692279387,
      "grad_norm": 3.375,
      "learning_rate": 4.760962743158589e-05,
      "loss": 1.0983,
      "step": 122430
    },
    {
      "epoch": 0.42912167442968946,
      "grad_norm": 3.1875,
      "learning_rate": 4.7608978402922186e-05,
      "loss": 0.9494,
      "step": 122440
    },
    {
      "epoch": 0.42915672193658505,
      "grad_norm": 2.953125,
      "learning_rate": 4.760832937425849e-05,
      "loss": 0.9516,
      "step": 122450
    },
    {
      "epoch": 0.42919176944348064,
      "grad_norm": 3.203125,
      "learning_rate": 4.760768034559478e-05,
      "loss": 0.9723,
      "step": 122460
    },
    {
      "epoch": 0.42922681695037623,
      "grad_norm": 3.78125,
      "learning_rate": 4.7607031316931084e-05,
      "loss": 0.8469,
      "step": 122470
    },
    {
      "epoch": 0.4292618644572718,
      "grad_norm": 2.5,
      "learning_rate": 4.760638228826738e-05,
      "loss": 0.8837,
      "step": 122480
    },
    {
      "epoch": 0.4292969119641674,
      "grad_norm": 2.875,
      "learning_rate": 4.760573325960368e-05,
      "loss": 0.9779,
      "step": 122490
    },
    {
      "epoch": 0.429331959471063,
      "grad_norm": 3.34375,
      "learning_rate": 4.760508423093998e-05,
      "loss": 0.9983,
      "step": 122500
    },
    {
      "epoch": 0.4293670069779586,
      "grad_norm": 2.984375,
      "learning_rate": 4.7604435202276276e-05,
      "loss": 0.9116,
      "step": 122510
    },
    {
      "epoch": 0.42940205448485425,
      "grad_norm": 3.03125,
      "learning_rate": 4.760378617361258e-05,
      "loss": 0.8942,
      "step": 122520
    },
    {
      "epoch": 0.42943710199174984,
      "grad_norm": 2.984375,
      "learning_rate": 4.760313714494887e-05,
      "loss": 0.9344,
      "step": 122530
    },
    {
      "epoch": 0.42947214949864543,
      "grad_norm": 3.34375,
      "learning_rate": 4.760248811628517e-05,
      "loss": 0.9405,
      "step": 122540
    },
    {
      "epoch": 0.429507197005541,
      "grad_norm": 3.46875,
      "learning_rate": 4.760183908762147e-05,
      "loss": 0.9257,
      "step": 122550
    },
    {
      "epoch": 0.4295422445124366,
      "grad_norm": 3.109375,
      "learning_rate": 4.760119005895777e-05,
      "loss": 0.9328,
      "step": 122560
    },
    {
      "epoch": 0.4295772920193322,
      "grad_norm": 2.984375,
      "learning_rate": 4.7600541030294064e-05,
      "loss": 0.9582,
      "step": 122570
    },
    {
      "epoch": 0.4296123395262278,
      "grad_norm": 3.046875,
      "learning_rate": 4.7599892001630365e-05,
      "loss": 0.9195,
      "step": 122580
    },
    {
      "epoch": 0.4296473870331234,
      "grad_norm": 2.671875,
      "learning_rate": 4.759924297296666e-05,
      "loss": 1.0733,
      "step": 122590
    },
    {
      "epoch": 0.429682434540019,
      "grad_norm": 3.03125,
      "learning_rate": 4.759859394430296e-05,
      "loss": 0.9476,
      "step": 122600
    },
    {
      "epoch": 0.42971748204691457,
      "grad_norm": 2.671875,
      "learning_rate": 4.7597944915639256e-05,
      "loss": 0.9522,
      "step": 122610
    },
    {
      "epoch": 0.4297525295538102,
      "grad_norm": 3.125,
      "learning_rate": 4.759729588697556e-05,
      "loss": 0.9186,
      "step": 122620
    },
    {
      "epoch": 0.4297875770607058,
      "grad_norm": 3.3125,
      "learning_rate": 4.759664685831186e-05,
      "loss": 0.9123,
      "step": 122630
    },
    {
      "epoch": 0.4298226245676014,
      "grad_norm": 2.53125,
      "learning_rate": 4.759599782964815e-05,
      "loss": 0.9437,
      "step": 122640
    },
    {
      "epoch": 0.429857672074497,
      "grad_norm": 3.59375,
      "learning_rate": 4.7595348800984455e-05,
      "loss": 0.9452,
      "step": 122650
    },
    {
      "epoch": 0.4298927195813926,
      "grad_norm": 2.484375,
      "learning_rate": 4.759469977232074e-05,
      "loss": 0.9155,
      "step": 122660
    },
    {
      "epoch": 0.42992776708828817,
      "grad_norm": 2.6875,
      "learning_rate": 4.7594050743657044e-05,
      "loss": 0.9438,
      "step": 122670
    },
    {
      "epoch": 0.42996281459518376,
      "grad_norm": 2.71875,
      "learning_rate": 4.759340171499334e-05,
      "loss": 0.9492,
      "step": 122680
    },
    {
      "epoch": 0.42999786210207935,
      "grad_norm": 3.03125,
      "learning_rate": 4.759275268632964e-05,
      "loss": 0.8145,
      "step": 122690
    },
    {
      "epoch": 0.43003290960897494,
      "grad_norm": 3.140625,
      "learning_rate": 4.7592103657665934e-05,
      "loss": 0.9793,
      "step": 122700
    },
    {
      "epoch": 0.43006795711587054,
      "grad_norm": 2.9375,
      "learning_rate": 4.7591454629002236e-05,
      "loss": 0.9546,
      "step": 122710
    },
    {
      "epoch": 0.4301030046227662,
      "grad_norm": 3.03125,
      "learning_rate": 4.759080560033854e-05,
      "loss": 0.9603,
      "step": 122720
    },
    {
      "epoch": 0.4301380521296618,
      "grad_norm": 3.046875,
      "learning_rate": 4.759015657167483e-05,
      "loss": 0.9499,
      "step": 122730
    },
    {
      "epoch": 0.43017309963655737,
      "grad_norm": 2.921875,
      "learning_rate": 4.758950754301113e-05,
      "loss": 0.8755,
      "step": 122740
    },
    {
      "epoch": 0.43020814714345296,
      "grad_norm": 3.015625,
      "learning_rate": 4.758885851434743e-05,
      "loss": 0.8757,
      "step": 122750
    },
    {
      "epoch": 0.43024319465034855,
      "grad_norm": 3.1875,
      "learning_rate": 4.758820948568373e-05,
      "loss": 1.0237,
      "step": 122760
    },
    {
      "epoch": 0.43027824215724414,
      "grad_norm": 3.34375,
      "learning_rate": 4.7587560457020024e-05,
      "loss": 0.9995,
      "step": 122770
    },
    {
      "epoch": 0.43031328966413973,
      "grad_norm": 3.0625,
      "learning_rate": 4.7586911428356325e-05,
      "loss": 0.913,
      "step": 122780
    },
    {
      "epoch": 0.4303483371710353,
      "grad_norm": 2.671875,
      "learning_rate": 4.758626239969262e-05,
      "loss": 0.944,
      "step": 122790
    },
    {
      "epoch": 0.4303833846779309,
      "grad_norm": 3.40625,
      "learning_rate": 4.758561337102892e-05,
      "loss": 0.9786,
      "step": 122800
    },
    {
      "epoch": 0.4304184321848265,
      "grad_norm": 3.75,
      "learning_rate": 4.7584964342365216e-05,
      "loss": 0.9432,
      "step": 122810
    },
    {
      "epoch": 0.43045347969172215,
      "grad_norm": 3.421875,
      "learning_rate": 4.758431531370152e-05,
      "loss": 0.9213,
      "step": 122820
    },
    {
      "epoch": 0.43048852719861774,
      "grad_norm": 2.953125,
      "learning_rate": 4.758366628503781e-05,
      "loss": 0.9662,
      "step": 122830
    },
    {
      "epoch": 0.43052357470551333,
      "grad_norm": 3.015625,
      "learning_rate": 4.758301725637411e-05,
      "loss": 0.9238,
      "step": 122840
    },
    {
      "epoch": 0.4305586222124089,
      "grad_norm": 3.03125,
      "learning_rate": 4.758236822771041e-05,
      "loss": 0.9308,
      "step": 122850
    },
    {
      "epoch": 0.4305936697193045,
      "grad_norm": 2.65625,
      "learning_rate": 4.758171919904671e-05,
      "loss": 0.9741,
      "step": 122860
    },
    {
      "epoch": 0.4306287172262001,
      "grad_norm": 3.0,
      "learning_rate": 4.758107017038301e-05,
      "loss": 0.9403,
      "step": 122870
    },
    {
      "epoch": 0.4306637647330957,
      "grad_norm": 2.984375,
      "learning_rate": 4.7580421141719305e-05,
      "loss": 0.9175,
      "step": 122880
    },
    {
      "epoch": 0.4306988122399913,
      "grad_norm": 2.984375,
      "learning_rate": 4.7579772113055607e-05,
      "loss": 0.9301,
      "step": 122890
    },
    {
      "epoch": 0.4307338597468869,
      "grad_norm": 3.1875,
      "learning_rate": 4.75791230843919e-05,
      "loss": 0.9351,
      "step": 122900
    },
    {
      "epoch": 0.43076890725378253,
      "grad_norm": 2.453125,
      "learning_rate": 4.75784740557282e-05,
      "loss": 0.8962,
      "step": 122910
    },
    {
      "epoch": 0.4308039547606781,
      "grad_norm": 2.90625,
      "learning_rate": 4.75778250270645e-05,
      "loss": 1.0033,
      "step": 122920
    },
    {
      "epoch": 0.4308390022675737,
      "grad_norm": 3.015625,
      "learning_rate": 4.75771759984008e-05,
      "loss": 0.9418,
      "step": 122930
    },
    {
      "epoch": 0.4308740497744693,
      "grad_norm": 2.625,
      "learning_rate": 4.757652696973709e-05,
      "loss": 0.8731,
      "step": 122940
    },
    {
      "epoch": 0.4309090972813649,
      "grad_norm": 3.546875,
      "learning_rate": 4.7575877941073395e-05,
      "loss": 0.9347,
      "step": 122950
    },
    {
      "epoch": 0.4309441447882605,
      "grad_norm": 3.40625,
      "learning_rate": 4.757522891240969e-05,
      "loss": 0.9761,
      "step": 122960
    },
    {
      "epoch": 0.4309791922951561,
      "grad_norm": 2.75,
      "learning_rate": 4.757457988374599e-05,
      "loss": 0.9019,
      "step": 122970
    },
    {
      "epoch": 0.43101423980205167,
      "grad_norm": 2.75,
      "learning_rate": 4.7573930855082285e-05,
      "loss": 0.9158,
      "step": 122980
    },
    {
      "epoch": 0.43104928730894726,
      "grad_norm": 3.140625,
      "learning_rate": 4.7573281826418587e-05,
      "loss": 0.9574,
      "step": 122990
    },
    {
      "epoch": 0.43108433481584285,
      "grad_norm": 3.171875,
      "learning_rate": 4.757263279775489e-05,
      "loss": 1.0122,
      "step": 123000
    },
    {
      "epoch": 0.4311193823227385,
      "grad_norm": 3.203125,
      "learning_rate": 4.757198376909118e-05,
      "loss": 1.0271,
      "step": 123010
    },
    {
      "epoch": 0.4311544298296341,
      "grad_norm": 2.953125,
      "learning_rate": 4.7571334740427484e-05,
      "loss": 1.0502,
      "step": 123020
    },
    {
      "epoch": 0.4311894773365297,
      "grad_norm": 2.84375,
      "learning_rate": 4.757068571176377e-05,
      "loss": 0.8695,
      "step": 123030
    },
    {
      "epoch": 0.43122452484342527,
      "grad_norm": 3.28125,
      "learning_rate": 4.757003668310007e-05,
      "loss": 0.9243,
      "step": 123040
    },
    {
      "epoch": 0.43125957235032086,
      "grad_norm": 2.640625,
      "learning_rate": 4.756938765443637e-05,
      "loss": 0.8728,
      "step": 123050
    },
    {
      "epoch": 0.43129461985721645,
      "grad_norm": 3.21875,
      "learning_rate": 4.756873862577267e-05,
      "loss": 0.963,
      "step": 123060
    },
    {
      "epoch": 0.43132966736411205,
      "grad_norm": 2.953125,
      "learning_rate": 4.7568089597108964e-05,
      "loss": 0.9302,
      "step": 123070
    },
    {
      "epoch": 0.43136471487100764,
      "grad_norm": 2.84375,
      "learning_rate": 4.7567440568445265e-05,
      "loss": 0.9761,
      "step": 123080
    },
    {
      "epoch": 0.43139976237790323,
      "grad_norm": 2.578125,
      "learning_rate": 4.7566791539781567e-05,
      "loss": 0.8367,
      "step": 123090
    },
    {
      "epoch": 0.4314348098847988,
      "grad_norm": 2.921875,
      "learning_rate": 4.756614251111786e-05,
      "loss": 0.9298,
      "step": 123100
    },
    {
      "epoch": 0.43146985739169447,
      "grad_norm": 3.25,
      "learning_rate": 4.756549348245416e-05,
      "loss": 0.9718,
      "step": 123110
    },
    {
      "epoch": 0.43150490489859006,
      "grad_norm": 2.8125,
      "learning_rate": 4.756484445379046e-05,
      "loss": 0.9841,
      "step": 123120
    },
    {
      "epoch": 0.43153995240548565,
      "grad_norm": 2.9375,
      "learning_rate": 4.756419542512676e-05,
      "loss": 1.0005,
      "step": 123130
    },
    {
      "epoch": 0.43157499991238124,
      "grad_norm": 3.390625,
      "learning_rate": 4.756354639646305e-05,
      "loss": 0.8796,
      "step": 123140
    },
    {
      "epoch": 0.43161004741927683,
      "grad_norm": 2.734375,
      "learning_rate": 4.7562897367799355e-05,
      "loss": 1.0006,
      "step": 123150
    },
    {
      "epoch": 0.4316450949261724,
      "grad_norm": 3.234375,
      "learning_rate": 4.756224833913565e-05,
      "loss": 0.9259,
      "step": 123160
    },
    {
      "epoch": 0.431680142433068,
      "grad_norm": 3.0625,
      "learning_rate": 4.756159931047195e-05,
      "loss": 0.9484,
      "step": 123170
    },
    {
      "epoch": 0.4317151899399636,
      "grad_norm": 3.4375,
      "learning_rate": 4.7560950281808245e-05,
      "loss": 0.9009,
      "step": 123180
    },
    {
      "epoch": 0.4317502374468592,
      "grad_norm": 2.96875,
      "learning_rate": 4.7560301253144547e-05,
      "loss": 0.9498,
      "step": 123190
    },
    {
      "epoch": 0.4317852849537548,
      "grad_norm": 3.03125,
      "learning_rate": 4.755965222448084e-05,
      "loss": 0.9073,
      "step": 123200
    },
    {
      "epoch": 0.43182033246065044,
      "grad_norm": 3.5625,
      "learning_rate": 4.755900319581714e-05,
      "loss": 1.0256,
      "step": 123210
    },
    {
      "epoch": 0.431855379967546,
      "grad_norm": 2.578125,
      "learning_rate": 4.755835416715344e-05,
      "loss": 0.9035,
      "step": 123220
    },
    {
      "epoch": 0.4318904274744416,
      "grad_norm": 2.59375,
      "learning_rate": 4.755770513848974e-05,
      "loss": 0.8931,
      "step": 123230
    },
    {
      "epoch": 0.4319254749813372,
      "grad_norm": 2.796875,
      "learning_rate": 4.755705610982604e-05,
      "loss": 0.8632,
      "step": 123240
    },
    {
      "epoch": 0.4319605224882328,
      "grad_norm": 2.84375,
      "learning_rate": 4.7556407081162335e-05,
      "loss": 0.9842,
      "step": 123250
    },
    {
      "epoch": 0.4319955699951284,
      "grad_norm": 2.890625,
      "learning_rate": 4.7555758052498636e-05,
      "loss": 0.9779,
      "step": 123260
    },
    {
      "epoch": 0.432030617502024,
      "grad_norm": 3.15625,
      "learning_rate": 4.755510902383493e-05,
      "loss": 0.9738,
      "step": 123270
    },
    {
      "epoch": 0.4320656650089196,
      "grad_norm": 3.21875,
      "learning_rate": 4.755445999517123e-05,
      "loss": 0.9991,
      "step": 123280
    },
    {
      "epoch": 0.43210071251581517,
      "grad_norm": 3.28125,
      "learning_rate": 4.7553810966507527e-05,
      "loss": 1.01,
      "step": 123290
    },
    {
      "epoch": 0.43213576002271076,
      "grad_norm": 2.9375,
      "learning_rate": 4.755316193784383e-05,
      "loss": 0.8131,
      "step": 123300
    },
    {
      "epoch": 0.4321708075296064,
      "grad_norm": 2.859375,
      "learning_rate": 4.755251290918012e-05,
      "loss": 0.925,
      "step": 123310
    },
    {
      "epoch": 0.432205855036502,
      "grad_norm": 3.03125,
      "learning_rate": 4.7551863880516424e-05,
      "loss": 0.9963,
      "step": 123320
    },
    {
      "epoch": 0.4322409025433976,
      "grad_norm": 2.6875,
      "learning_rate": 4.755121485185272e-05,
      "loss": 0.9226,
      "step": 123330
    },
    {
      "epoch": 0.4322759500502932,
      "grad_norm": 3.421875,
      "learning_rate": 4.755056582318902e-05,
      "loss": 1.0282,
      "step": 123340
    },
    {
      "epoch": 0.43231099755718877,
      "grad_norm": 2.734375,
      "learning_rate": 4.7549916794525315e-05,
      "loss": 0.9908,
      "step": 123350
    },
    {
      "epoch": 0.43234604506408436,
      "grad_norm": 2.953125,
      "learning_rate": 4.7549267765861616e-05,
      "loss": 0.863,
      "step": 123360
    },
    {
      "epoch": 0.43238109257097995,
      "grad_norm": 3.265625,
      "learning_rate": 4.754861873719792e-05,
      "loss": 1.0632,
      "step": 123370
    },
    {
      "epoch": 0.43241614007787554,
      "grad_norm": 2.734375,
      "learning_rate": 4.754796970853421e-05,
      "loss": 0.8871,
      "step": 123380
    },
    {
      "epoch": 0.43245118758477114,
      "grad_norm": 3.078125,
      "learning_rate": 4.754732067987051e-05,
      "loss": 0.9976,
      "step": 123390
    },
    {
      "epoch": 0.4324862350916668,
      "grad_norm": 2.484375,
      "learning_rate": 4.754667165120681e-05,
      "loss": 0.9205,
      "step": 123400
    },
    {
      "epoch": 0.4325212825985624,
      "grad_norm": 3.109375,
      "learning_rate": 4.75460226225431e-05,
      "loss": 0.9854,
      "step": 123410
    },
    {
      "epoch": 0.43255633010545796,
      "grad_norm": 2.78125,
      "learning_rate": 4.75453735938794e-05,
      "loss": 0.9297,
      "step": 123420
    },
    {
      "epoch": 0.43259137761235356,
      "grad_norm": 3.328125,
      "learning_rate": 4.75447245652157e-05,
      "loss": 0.9571,
      "step": 123430
    },
    {
      "epoch": 0.43262642511924915,
      "grad_norm": 2.953125,
      "learning_rate": 4.754407553655199e-05,
      "loss": 0.8782,
      "step": 123440
    },
    {
      "epoch": 0.43266147262614474,
      "grad_norm": 3.09375,
      "learning_rate": 4.7543426507888295e-05,
      "loss": 0.9686,
      "step": 123450
    },
    {
      "epoch": 0.43269652013304033,
      "grad_norm": 3.0625,
      "learning_rate": 4.7542777479224596e-05,
      "loss": 0.9555,
      "step": 123460
    },
    {
      "epoch": 0.4327315676399359,
      "grad_norm": 3.296875,
      "learning_rate": 4.754212845056089e-05,
      "loss": 0.8828,
      "step": 123470
    },
    {
      "epoch": 0.4327666151468315,
      "grad_norm": 3.515625,
      "learning_rate": 4.754147942189719e-05,
      "loss": 0.9819,
      "step": 123480
    },
    {
      "epoch": 0.4328016626537271,
      "grad_norm": 3.125,
      "learning_rate": 4.7540830393233487e-05,
      "loss": 0.8849,
      "step": 123490
    },
    {
      "epoch": 0.43283671016062275,
      "grad_norm": 2.90625,
      "learning_rate": 4.754018136456979e-05,
      "loss": 0.9191,
      "step": 123500
    },
    {
      "epoch": 0.43287175766751834,
      "grad_norm": 2.84375,
      "learning_rate": 4.753953233590608e-05,
      "loss": 0.8498,
      "step": 123510
    },
    {
      "epoch": 0.43290680517441393,
      "grad_norm": 2.9375,
      "learning_rate": 4.7538883307242384e-05,
      "loss": 0.857,
      "step": 123520
    },
    {
      "epoch": 0.4329418526813095,
      "grad_norm": 3.265625,
      "learning_rate": 4.753823427857868e-05,
      "loss": 0.947,
      "step": 123530
    },
    {
      "epoch": 0.4329769001882051,
      "grad_norm": 2.828125,
      "learning_rate": 4.753758524991498e-05,
      "loss": 0.9679,
      "step": 123540
    },
    {
      "epoch": 0.4330119476951007,
      "grad_norm": 3.28125,
      "learning_rate": 4.7536936221251275e-05,
      "loss": 0.8961,
      "step": 123550
    },
    {
      "epoch": 0.4330469952019963,
      "grad_norm": 3.375,
      "learning_rate": 4.7536287192587576e-05,
      "loss": 0.9137,
      "step": 123560
    },
    {
      "epoch": 0.4330820427088919,
      "grad_norm": 3.1875,
      "learning_rate": 4.753563816392387e-05,
      "loss": 0.9097,
      "step": 123570
    },
    {
      "epoch": 0.4331170902157875,
      "grad_norm": 2.5,
      "learning_rate": 4.753498913526017e-05,
      "loss": 0.9478,
      "step": 123580
    },
    {
      "epoch": 0.4331521377226831,
      "grad_norm": 3.25,
      "learning_rate": 4.753434010659647e-05,
      "loss": 1.0258,
      "step": 123590
    },
    {
      "epoch": 0.4331871852295787,
      "grad_norm": 2.828125,
      "learning_rate": 4.753369107793277e-05,
      "loss": 0.9291,
      "step": 123600
    },
    {
      "epoch": 0.4332222327364743,
      "grad_norm": 3.421875,
      "learning_rate": 4.753304204926907e-05,
      "loss": 1.0252,
      "step": 123610
    },
    {
      "epoch": 0.4332572802433699,
      "grad_norm": 3.21875,
      "learning_rate": 4.7532393020605364e-05,
      "loss": 0.944,
      "step": 123620
    },
    {
      "epoch": 0.4332923277502655,
      "grad_norm": 3.3125,
      "learning_rate": 4.7531743991941665e-05,
      "loss": 0.9002,
      "step": 123630
    },
    {
      "epoch": 0.4333273752571611,
      "grad_norm": 2.59375,
      "learning_rate": 4.753109496327796e-05,
      "loss": 0.977,
      "step": 123640
    },
    {
      "epoch": 0.4333624227640567,
      "grad_norm": 3.46875,
      "learning_rate": 4.753044593461426e-05,
      "loss": 0.9564,
      "step": 123650
    },
    {
      "epoch": 0.43339747027095227,
      "grad_norm": 3.09375,
      "learning_rate": 4.7529796905950556e-05,
      "loss": 0.8472,
      "step": 123660
    },
    {
      "epoch": 0.43343251777784786,
      "grad_norm": 3.078125,
      "learning_rate": 4.752914787728686e-05,
      "loss": 0.9096,
      "step": 123670
    },
    {
      "epoch": 0.43346756528474345,
      "grad_norm": 2.828125,
      "learning_rate": 4.752849884862315e-05,
      "loss": 0.9712,
      "step": 123680
    },
    {
      "epoch": 0.43350261279163904,
      "grad_norm": 3.015625,
      "learning_rate": 4.752784981995945e-05,
      "loss": 0.9202,
      "step": 123690
    },
    {
      "epoch": 0.4335376602985347,
      "grad_norm": 2.734375,
      "learning_rate": 4.752720079129575e-05,
      "loss": 0.9372,
      "step": 123700
    },
    {
      "epoch": 0.4335727078054303,
      "grad_norm": 2.609375,
      "learning_rate": 4.752655176263205e-05,
      "loss": 0.9004,
      "step": 123710
    },
    {
      "epoch": 0.43360775531232587,
      "grad_norm": 3.203125,
      "learning_rate": 4.7525902733968344e-05,
      "loss": 0.919,
      "step": 123720
    },
    {
      "epoch": 0.43364280281922146,
      "grad_norm": 3.59375,
      "learning_rate": 4.7525253705304645e-05,
      "loss": 1.0339,
      "step": 123730
    },
    {
      "epoch": 0.43367785032611705,
      "grad_norm": 3.0,
      "learning_rate": 4.752460467664095e-05,
      "loss": 0.9741,
      "step": 123740
    },
    {
      "epoch": 0.43371289783301264,
      "grad_norm": 3.1875,
      "learning_rate": 4.752395564797724e-05,
      "loss": 0.9184,
      "step": 123750
    },
    {
      "epoch": 0.43374794533990824,
      "grad_norm": 2.671875,
      "learning_rate": 4.752330661931354e-05,
      "loss": 0.9353,
      "step": 123760
    },
    {
      "epoch": 0.4337829928468038,
      "grad_norm": 3.15625,
      "learning_rate": 4.752265759064984e-05,
      "loss": 0.9314,
      "step": 123770
    },
    {
      "epoch": 0.4338180403536994,
      "grad_norm": 2.96875,
      "learning_rate": 4.752200856198614e-05,
      "loss": 0.9938,
      "step": 123780
    },
    {
      "epoch": 0.433853087860595,
      "grad_norm": 2.96875,
      "learning_rate": 4.7521359533322427e-05,
      "loss": 0.9872,
      "step": 123790
    },
    {
      "epoch": 0.43388813536749066,
      "grad_norm": 2.875,
      "learning_rate": 4.752071050465873e-05,
      "loss": 0.9716,
      "step": 123800
    },
    {
      "epoch": 0.43392318287438625,
      "grad_norm": 2.984375,
      "learning_rate": 4.752006147599502e-05,
      "loss": 0.9348,
      "step": 123810
    },
    {
      "epoch": 0.43395823038128184,
      "grad_norm": 3.15625,
      "learning_rate": 4.7519412447331324e-05,
      "loss": 0.9043,
      "step": 123820
    },
    {
      "epoch": 0.43399327788817743,
      "grad_norm": 3.03125,
      "learning_rate": 4.7518763418667625e-05,
      "loss": 0.9958,
      "step": 123830
    },
    {
      "epoch": 0.434028325395073,
      "grad_norm": 3.1875,
      "learning_rate": 4.751811439000392e-05,
      "loss": 1.0166,
      "step": 123840
    },
    {
      "epoch": 0.4340633729019686,
      "grad_norm": 3.390625,
      "learning_rate": 4.751746536134022e-05,
      "loss": 1.0069,
      "step": 123850
    },
    {
      "epoch": 0.4340984204088642,
      "grad_norm": 3.28125,
      "learning_rate": 4.7516816332676516e-05,
      "loss": 1.0448,
      "step": 123860
    },
    {
      "epoch": 0.4341334679157598,
      "grad_norm": 3.265625,
      "learning_rate": 4.751616730401282e-05,
      "loss": 0.9797,
      "step": 123870
    },
    {
      "epoch": 0.4341685154226554,
      "grad_norm": 2.890625,
      "learning_rate": 4.751551827534911e-05,
      "loss": 0.979,
      "step": 123880
    },
    {
      "epoch": 0.434203562929551,
      "grad_norm": 2.890625,
      "learning_rate": 4.751486924668541e-05,
      "loss": 1.0405,
      "step": 123890
    },
    {
      "epoch": 0.4342386104364466,
      "grad_norm": 3.296875,
      "learning_rate": 4.751422021802171e-05,
      "loss": 0.9722,
      "step": 123900
    },
    {
      "epoch": 0.4342736579433422,
      "grad_norm": 3.1875,
      "learning_rate": 4.751357118935801e-05,
      "loss": 0.8905,
      "step": 123910
    },
    {
      "epoch": 0.4343087054502378,
      "grad_norm": 3.015625,
      "learning_rate": 4.7512922160694304e-05,
      "loss": 0.9373,
      "step": 123920
    },
    {
      "epoch": 0.4343437529571334,
      "grad_norm": 3.375,
      "learning_rate": 4.7512273132030605e-05,
      "loss": 0.9699,
      "step": 123930
    },
    {
      "epoch": 0.434378800464029,
      "grad_norm": 3.328125,
      "learning_rate": 4.75116241033669e-05,
      "loss": 0.8822,
      "step": 123940
    },
    {
      "epoch": 0.4344138479709246,
      "grad_norm": 2.953125,
      "learning_rate": 4.75109750747032e-05,
      "loss": 0.9207,
      "step": 123950
    },
    {
      "epoch": 0.4344488954778202,
      "grad_norm": 3.375,
      "learning_rate": 4.75103260460395e-05,
      "loss": 0.9049,
      "step": 123960
    },
    {
      "epoch": 0.43448394298471577,
      "grad_norm": 3.109375,
      "learning_rate": 4.75096770173758e-05,
      "loss": 0.9277,
      "step": 123970
    },
    {
      "epoch": 0.43451899049161136,
      "grad_norm": 3.203125,
      "learning_rate": 4.75090279887121e-05,
      "loss": 0.9995,
      "step": 123980
    },
    {
      "epoch": 0.434554037998507,
      "grad_norm": 3.015625,
      "learning_rate": 4.750837896004839e-05,
      "loss": 0.9237,
      "step": 123990
    },
    {
      "epoch": 0.4345890855054026,
      "grad_norm": 3.328125,
      "learning_rate": 4.7507729931384695e-05,
      "loss": 1.0171,
      "step": 124000
    },
    {
      "epoch": 0.4346241330122982,
      "grad_norm": 3.65625,
      "learning_rate": 4.750708090272099e-05,
      "loss": 1.0377,
      "step": 124010
    },
    {
      "epoch": 0.4346591805191938,
      "grad_norm": 2.96875,
      "learning_rate": 4.750643187405729e-05,
      "loss": 0.9172,
      "step": 124020
    },
    {
      "epoch": 0.43469422802608937,
      "grad_norm": 3.1875,
      "learning_rate": 4.7505782845393585e-05,
      "loss": 0.9258,
      "step": 124030
    },
    {
      "epoch": 0.43472927553298496,
      "grad_norm": 2.953125,
      "learning_rate": 4.750513381672989e-05,
      "loss": 0.9514,
      "step": 124040
    },
    {
      "epoch": 0.43476432303988055,
      "grad_norm": 3.125,
      "learning_rate": 4.750448478806618e-05,
      "loss": 0.9301,
      "step": 124050
    },
    {
      "epoch": 0.43479937054677614,
      "grad_norm": 3.0,
      "learning_rate": 4.750383575940248e-05,
      "loss": 1.0757,
      "step": 124060
    },
    {
      "epoch": 0.43483441805367173,
      "grad_norm": 3.078125,
      "learning_rate": 4.750318673073878e-05,
      "loss": 0.9117,
      "step": 124070
    },
    {
      "epoch": 0.4348694655605673,
      "grad_norm": 3.03125,
      "learning_rate": 4.750253770207508e-05,
      "loss": 0.9588,
      "step": 124080
    },
    {
      "epoch": 0.43490451306746297,
      "grad_norm": 3.5,
      "learning_rate": 4.750188867341137e-05,
      "loss": 0.88,
      "step": 124090
    },
    {
      "epoch": 0.43493956057435856,
      "grad_norm": 3.34375,
      "learning_rate": 4.7501239644747675e-05,
      "loss": 0.8989,
      "step": 124100
    },
    {
      "epoch": 0.43497460808125415,
      "grad_norm": 2.5,
      "learning_rate": 4.7500590616083976e-05,
      "loss": 0.8611,
      "step": 124110
    },
    {
      "epoch": 0.43500965558814975,
      "grad_norm": 3.078125,
      "learning_rate": 4.749994158742027e-05,
      "loss": 1.0035,
      "step": 124120
    },
    {
      "epoch": 0.43504470309504534,
      "grad_norm": 2.734375,
      "learning_rate": 4.749929255875657e-05,
      "loss": 0.9664,
      "step": 124130
    },
    {
      "epoch": 0.43507975060194093,
      "grad_norm": 3.171875,
      "learning_rate": 4.749864353009287e-05,
      "loss": 0.9481,
      "step": 124140
    },
    {
      "epoch": 0.4351147981088365,
      "grad_norm": 3.1875,
      "learning_rate": 4.749799450142917e-05,
      "loss": 0.9708,
      "step": 124150
    },
    {
      "epoch": 0.4351498456157321,
      "grad_norm": 3.109375,
      "learning_rate": 4.7497345472765456e-05,
      "loss": 0.9929,
      "step": 124160
    },
    {
      "epoch": 0.4351848931226277,
      "grad_norm": 3.484375,
      "learning_rate": 4.749669644410176e-05,
      "loss": 0.9295,
      "step": 124170
    },
    {
      "epoch": 0.4352199406295233,
      "grad_norm": 2.921875,
      "learning_rate": 4.749604741543805e-05,
      "loss": 0.9273,
      "step": 124180
    },
    {
      "epoch": 0.43525498813641894,
      "grad_norm": 3.171875,
      "learning_rate": 4.749539838677435e-05,
      "loss": 0.8875,
      "step": 124190
    },
    {
      "epoch": 0.43529003564331453,
      "grad_norm": 2.953125,
      "learning_rate": 4.7494749358110655e-05,
      "loss": 0.9914,
      "step": 124200
    },
    {
      "epoch": 0.4353250831502101,
      "grad_norm": 2.984375,
      "learning_rate": 4.749410032944695e-05,
      "loss": 0.911,
      "step": 124210
    },
    {
      "epoch": 0.4353601306571057,
      "grad_norm": 2.71875,
      "learning_rate": 4.749345130078325e-05,
      "loss": 0.9178,
      "step": 124220
    },
    {
      "epoch": 0.4353951781640013,
      "grad_norm": 2.828125,
      "learning_rate": 4.7492802272119545e-05,
      "loss": 0.9711,
      "step": 124230
    },
    {
      "epoch": 0.4354302256708969,
      "grad_norm": 2.9375,
      "learning_rate": 4.749215324345585e-05,
      "loss": 0.9183,
      "step": 124240
    },
    {
      "epoch": 0.4354652731777925,
      "grad_norm": 3.21875,
      "learning_rate": 4.749150421479214e-05,
      "loss": 1.0133,
      "step": 124250
    },
    {
      "epoch": 0.4355003206846881,
      "grad_norm": 2.78125,
      "learning_rate": 4.749085518612844e-05,
      "loss": 0.9638,
      "step": 124260
    },
    {
      "epoch": 0.43553536819158367,
      "grad_norm": 2.75,
      "learning_rate": 4.749020615746474e-05,
      "loss": 0.9651,
      "step": 124270
    },
    {
      "epoch": 0.43557041569847926,
      "grad_norm": 2.921875,
      "learning_rate": 4.748955712880104e-05,
      "loss": 0.9482,
      "step": 124280
    },
    {
      "epoch": 0.4356054632053749,
      "grad_norm": 2.921875,
      "learning_rate": 4.748890810013733e-05,
      "loss": 0.913,
      "step": 124290
    },
    {
      "epoch": 0.4356405107122705,
      "grad_norm": 2.6875,
      "learning_rate": 4.7488259071473635e-05,
      "loss": 0.9431,
      "step": 124300
    },
    {
      "epoch": 0.4356755582191661,
      "grad_norm": 2.875,
      "learning_rate": 4.748761004280993e-05,
      "loss": 0.9689,
      "step": 124310
    },
    {
      "epoch": 0.4357106057260617,
      "grad_norm": 2.703125,
      "learning_rate": 4.748696101414623e-05,
      "loss": 0.9246,
      "step": 124320
    },
    {
      "epoch": 0.4357456532329573,
      "grad_norm": 3.390625,
      "learning_rate": 4.748631198548253e-05,
      "loss": 0.9715,
      "step": 124330
    },
    {
      "epoch": 0.43578070073985287,
      "grad_norm": 2.796875,
      "learning_rate": 4.748566295681883e-05,
      "loss": 0.9145,
      "step": 124340
    },
    {
      "epoch": 0.43581574824674846,
      "grad_norm": 2.5625,
      "learning_rate": 4.748501392815513e-05,
      "loss": 0.9338,
      "step": 124350
    },
    {
      "epoch": 0.43585079575364405,
      "grad_norm": 3.203125,
      "learning_rate": 4.748436489949142e-05,
      "loss": 0.8995,
      "step": 124360
    },
    {
      "epoch": 0.43588584326053964,
      "grad_norm": 2.96875,
      "learning_rate": 4.7483715870827724e-05,
      "loss": 0.9644,
      "step": 124370
    },
    {
      "epoch": 0.43592089076743523,
      "grad_norm": 2.75,
      "learning_rate": 4.748306684216402e-05,
      "loss": 1.0083,
      "step": 124380
    },
    {
      "epoch": 0.4359559382743309,
      "grad_norm": 3.765625,
      "learning_rate": 4.748241781350032e-05,
      "loss": 0.9713,
      "step": 124390
    },
    {
      "epoch": 0.43599098578122647,
      "grad_norm": 3.390625,
      "learning_rate": 4.7481768784836615e-05,
      "loss": 0.9823,
      "step": 124400
    },
    {
      "epoch": 0.43602603328812206,
      "grad_norm": 2.8125,
      "learning_rate": 4.7481119756172916e-05,
      "loss": 0.9312,
      "step": 124410
    },
    {
      "epoch": 0.43606108079501765,
      "grad_norm": 3.046875,
      "learning_rate": 4.748047072750921e-05,
      "loss": 0.9343,
      "step": 124420
    },
    {
      "epoch": 0.43609612830191324,
      "grad_norm": 3.046875,
      "learning_rate": 4.747982169884551e-05,
      "loss": 0.9941,
      "step": 124430
    },
    {
      "epoch": 0.43613117580880884,
      "grad_norm": 2.78125,
      "learning_rate": 4.747917267018181e-05,
      "loss": 0.8737,
      "step": 124440
    },
    {
      "epoch": 0.4361662233157044,
      "grad_norm": 2.5625,
      "learning_rate": 4.747852364151811e-05,
      "loss": 0.9404,
      "step": 124450
    },
    {
      "epoch": 0.4362012708226,
      "grad_norm": 2.96875,
      "learning_rate": 4.747787461285441e-05,
      "loss": 0.9982,
      "step": 124460
    },
    {
      "epoch": 0.4362363183294956,
      "grad_norm": 2.984375,
      "learning_rate": 4.7477225584190704e-05,
      "loss": 0.9133,
      "step": 124470
    },
    {
      "epoch": 0.4362713658363912,
      "grad_norm": 3.109375,
      "learning_rate": 4.7476576555527005e-05,
      "loss": 0.9356,
      "step": 124480
    },
    {
      "epoch": 0.43630641334328685,
      "grad_norm": 2.5625,
      "learning_rate": 4.74759275268633e-05,
      "loss": 0.9055,
      "step": 124490
    },
    {
      "epoch": 0.43634146085018244,
      "grad_norm": 3.28125,
      "learning_rate": 4.74752784981996e-05,
      "loss": 0.9332,
      "step": 124500
    },
    {
      "epoch": 0.43637650835707803,
      "grad_norm": 3.296875,
      "learning_rate": 4.7474629469535896e-05,
      "loss": 0.9665,
      "step": 124510
    },
    {
      "epoch": 0.4364115558639736,
      "grad_norm": 3.3125,
      "learning_rate": 4.74739804408722e-05,
      "loss": 0.99,
      "step": 124520
    },
    {
      "epoch": 0.4364466033708692,
      "grad_norm": 2.84375,
      "learning_rate": 4.747333141220849e-05,
      "loss": 0.9257,
      "step": 124530
    },
    {
      "epoch": 0.4364816508777648,
      "grad_norm": 3.234375,
      "learning_rate": 4.747268238354479e-05,
      "loss": 0.8973,
      "step": 124540
    },
    {
      "epoch": 0.4365166983846604,
      "grad_norm": 3.0625,
      "learning_rate": 4.747203335488109e-05,
      "loss": 0.8844,
      "step": 124550
    },
    {
      "epoch": 0.436551745891556,
      "grad_norm": 2.765625,
      "learning_rate": 4.747138432621738e-05,
      "loss": 0.9651,
      "step": 124560
    },
    {
      "epoch": 0.4365867933984516,
      "grad_norm": 2.421875,
      "learning_rate": 4.7470735297553684e-05,
      "loss": 0.8843,
      "step": 124570
    },
    {
      "epoch": 0.4366218409053472,
      "grad_norm": 3.15625,
      "learning_rate": 4.747008626888998e-05,
      "loss": 0.8681,
      "step": 124580
    },
    {
      "epoch": 0.4366568884122428,
      "grad_norm": 3.09375,
      "learning_rate": 4.746943724022628e-05,
      "loss": 0.9696,
      "step": 124590
    },
    {
      "epoch": 0.4366919359191384,
      "grad_norm": 3.375,
      "learning_rate": 4.7468788211562575e-05,
      "loss": 0.9572,
      "step": 124600
    },
    {
      "epoch": 0.436726983426034,
      "grad_norm": 2.875,
      "learning_rate": 4.7468139182898876e-05,
      "loss": 0.9878,
      "step": 124610
    },
    {
      "epoch": 0.4367620309329296,
      "grad_norm": 3.640625,
      "learning_rate": 4.746749015423517e-05,
      "loss": 1.0184,
      "step": 124620
    },
    {
      "epoch": 0.4367970784398252,
      "grad_norm": 3.25,
      "learning_rate": 4.746684112557147e-05,
      "loss": 0.8797,
      "step": 124630
    },
    {
      "epoch": 0.4368321259467208,
      "grad_norm": 3.015625,
      "learning_rate": 4.746619209690777e-05,
      "loss": 0.9337,
      "step": 124640
    },
    {
      "epoch": 0.43686717345361636,
      "grad_norm": 3.09375,
      "learning_rate": 4.746554306824407e-05,
      "loss": 0.9516,
      "step": 124650
    },
    {
      "epoch": 0.43690222096051196,
      "grad_norm": 2.265625,
      "learning_rate": 4.746489403958036e-05,
      "loss": 0.9313,
      "step": 124660
    },
    {
      "epoch": 0.43693726846740755,
      "grad_norm": 2.859375,
      "learning_rate": 4.7464245010916664e-05,
      "loss": 1.0647,
      "step": 124670
    },
    {
      "epoch": 0.4369723159743032,
      "grad_norm": 3.03125,
      "learning_rate": 4.746359598225296e-05,
      "loss": 0.9586,
      "step": 124680
    },
    {
      "epoch": 0.4370073634811988,
      "grad_norm": 2.90625,
      "learning_rate": 4.746294695358926e-05,
      "loss": 0.9171,
      "step": 124690
    },
    {
      "epoch": 0.4370424109880944,
      "grad_norm": 3.171875,
      "learning_rate": 4.746229792492556e-05,
      "loss": 0.9079,
      "step": 124700
    },
    {
      "epoch": 0.43707745849498997,
      "grad_norm": 2.765625,
      "learning_rate": 4.7461648896261856e-05,
      "loss": 0.9351,
      "step": 124710
    },
    {
      "epoch": 0.43711250600188556,
      "grad_norm": 3.390625,
      "learning_rate": 4.746099986759816e-05,
      "loss": 0.8676,
      "step": 124720
    },
    {
      "epoch": 0.43714755350878115,
      "grad_norm": 2.953125,
      "learning_rate": 4.746035083893445e-05,
      "loss": 0.8917,
      "step": 124730
    },
    {
      "epoch": 0.43718260101567674,
      "grad_norm": 3.078125,
      "learning_rate": 4.7459701810270753e-05,
      "loss": 0.8895,
      "step": 124740
    },
    {
      "epoch": 0.43721764852257233,
      "grad_norm": 2.9375,
      "learning_rate": 4.745905278160705e-05,
      "loss": 1.0117,
      "step": 124750
    },
    {
      "epoch": 0.4372526960294679,
      "grad_norm": 3.15625,
      "learning_rate": 4.745840375294335e-05,
      "loss": 1.0592,
      "step": 124760
    },
    {
      "epoch": 0.4372877435363635,
      "grad_norm": 3.0,
      "learning_rate": 4.7457754724279644e-05,
      "loss": 0.9891,
      "step": 124770
    },
    {
      "epoch": 0.43732279104325916,
      "grad_norm": 3.34375,
      "learning_rate": 4.7457105695615945e-05,
      "loss": 0.9283,
      "step": 124780
    },
    {
      "epoch": 0.43735783855015475,
      "grad_norm": 3.421875,
      "learning_rate": 4.745645666695224e-05,
      "loss": 1.0028,
      "step": 124790
    },
    {
      "epoch": 0.43739288605705035,
      "grad_norm": 3.5625,
      "learning_rate": 4.745580763828854e-05,
      "loss": 0.8854,
      "step": 124800
    },
    {
      "epoch": 0.43742793356394594,
      "grad_norm": 3.15625,
      "learning_rate": 4.7455158609624836e-05,
      "loss": 0.8868,
      "step": 124810
    },
    {
      "epoch": 0.43746298107084153,
      "grad_norm": 2.9375,
      "learning_rate": 4.745450958096114e-05,
      "loss": 0.9769,
      "step": 124820
    },
    {
      "epoch": 0.4374980285777371,
      "grad_norm": 2.8125,
      "learning_rate": 4.745386055229744e-05,
      "loss": 0.9528,
      "step": 124830
    },
    {
      "epoch": 0.4375330760846327,
      "grad_norm": 2.96875,
      "learning_rate": 4.7453211523633733e-05,
      "loss": 0.9362,
      "step": 124840
    },
    {
      "epoch": 0.4375681235915283,
      "grad_norm": 3.0,
      "learning_rate": 4.7452562494970035e-05,
      "loss": 0.9622,
      "step": 124850
    },
    {
      "epoch": 0.4376031710984239,
      "grad_norm": 2.703125,
      "learning_rate": 4.745191346630633e-05,
      "loss": 0.8511,
      "step": 124860
    },
    {
      "epoch": 0.4376382186053195,
      "grad_norm": 3.875,
      "learning_rate": 4.745126443764263e-05,
      "loss": 0.8926,
      "step": 124870
    },
    {
      "epoch": 0.43767326611221513,
      "grad_norm": 2.90625,
      "learning_rate": 4.7450615408978925e-05,
      "loss": 1.0328,
      "step": 124880
    },
    {
      "epoch": 0.4377083136191107,
      "grad_norm": 2.875,
      "learning_rate": 4.744996638031523e-05,
      "loss": 0.9651,
      "step": 124890
    },
    {
      "epoch": 0.4377433611260063,
      "grad_norm": 4.03125,
      "learning_rate": 4.744931735165152e-05,
      "loss": 0.972,
      "step": 124900
    },
    {
      "epoch": 0.4377784086329019,
      "grad_norm": 3.328125,
      "learning_rate": 4.7448668322987816e-05,
      "loss": 0.9774,
      "step": 124910
    },
    {
      "epoch": 0.4378134561397975,
      "grad_norm": 2.75,
      "learning_rate": 4.744801929432412e-05,
      "loss": 0.9162,
      "step": 124920
    },
    {
      "epoch": 0.4378485036466931,
      "grad_norm": 3.078125,
      "learning_rate": 4.744737026566041e-05,
      "loss": 0.9667,
      "step": 124930
    },
    {
      "epoch": 0.4378835511535887,
      "grad_norm": 3.09375,
      "learning_rate": 4.7446721236996713e-05,
      "loss": 0.9795,
      "step": 124940
    },
    {
      "epoch": 0.43791859866048427,
      "grad_norm": 3.359375,
      "learning_rate": 4.744607220833301e-05,
      "loss": 0.9242,
      "step": 124950
    },
    {
      "epoch": 0.43795364616737986,
      "grad_norm": 2.96875,
      "learning_rate": 4.744542317966931e-05,
      "loss": 1.0041,
      "step": 124960
    },
    {
      "epoch": 0.43798869367427545,
      "grad_norm": 2.625,
      "learning_rate": 4.7444774151005604e-05,
      "loss": 0.9211,
      "step": 124970
    },
    {
      "epoch": 0.4380237411811711,
      "grad_norm": 3.015625,
      "learning_rate": 4.7444125122341905e-05,
      "loss": 0.9889,
      "step": 124980
    },
    {
      "epoch": 0.4380587886880667,
      "grad_norm": 3.15625,
      "learning_rate": 4.74434760936782e-05,
      "loss": 0.9218,
      "step": 124990
    },
    {
      "epoch": 0.4380938361949623,
      "grad_norm": 2.78125,
      "learning_rate": 4.74428270650145e-05,
      "loss": 0.9514,
      "step": 125000
    },
    {
      "epoch": 0.4380938361949623,
      "eval_loss": 0.8830316662788391,
      "eval_runtime": 550.566,
      "eval_samples_per_second": 690.991,
      "eval_steps_per_second": 57.583,
      "step": 125000
    },
    {
      "epoch": 0.4381288837018579,
      "grad_norm": 2.9375,
      "learning_rate": 4.7442178036350796e-05,
      "loss": 0.9021,
      "step": 125010
    },
    {
      "epoch": 0.43816393120875347,
      "grad_norm": 3.265625,
      "learning_rate": 4.74415290076871e-05,
      "loss": 0.9299,
      "step": 125020
    },
    {
      "epoch": 0.43819897871564906,
      "grad_norm": 3.28125,
      "learning_rate": 4.744087997902339e-05,
      "loss": 0.9809,
      "step": 125030
    },
    {
      "epoch": 0.43823402622254465,
      "grad_norm": 2.46875,
      "learning_rate": 4.7440230950359693e-05,
      "loss": 0.9467,
      "step": 125040
    },
    {
      "epoch": 0.43826907372944024,
      "grad_norm": 3.0,
      "learning_rate": 4.743958192169599e-05,
      "loss": 0.9634,
      "step": 125050
    },
    {
      "epoch": 0.43830412123633583,
      "grad_norm": 3.3125,
      "learning_rate": 4.743893289303229e-05,
      "loss": 0.9352,
      "step": 125060
    },
    {
      "epoch": 0.4383391687432315,
      "grad_norm": 3.046875,
      "learning_rate": 4.743828386436859e-05,
      "loss": 0.8946,
      "step": 125070
    },
    {
      "epoch": 0.43837421625012707,
      "grad_norm": 2.734375,
      "learning_rate": 4.7437634835704885e-05,
      "loss": 0.9344,
      "step": 125080
    },
    {
      "epoch": 0.43840926375702266,
      "grad_norm": 2.625,
      "learning_rate": 4.743698580704119e-05,
      "loss": 0.8981,
      "step": 125090
    },
    {
      "epoch": 0.43844431126391825,
      "grad_norm": 2.765625,
      "learning_rate": 4.743633677837748e-05,
      "loss": 0.9583,
      "step": 125100
    },
    {
      "epoch": 0.43847935877081384,
      "grad_norm": 3.046875,
      "learning_rate": 4.743568774971378e-05,
      "loss": 0.9294,
      "step": 125110
    },
    {
      "epoch": 0.43851440627770943,
      "grad_norm": 2.96875,
      "learning_rate": 4.743503872105008e-05,
      "loss": 0.9597,
      "step": 125120
    },
    {
      "epoch": 0.438549453784605,
      "grad_norm": 2.953125,
      "learning_rate": 4.743438969238638e-05,
      "loss": 0.9528,
      "step": 125130
    },
    {
      "epoch": 0.4385845012915006,
      "grad_norm": 2.953125,
      "learning_rate": 4.7433740663722673e-05,
      "loss": 0.9753,
      "step": 125140
    },
    {
      "epoch": 0.4386195487983962,
      "grad_norm": 3.28125,
      "learning_rate": 4.7433091635058975e-05,
      "loss": 0.9307,
      "step": 125150
    },
    {
      "epoch": 0.4386545963052918,
      "grad_norm": 3.125,
      "learning_rate": 4.743244260639527e-05,
      "loss": 0.9055,
      "step": 125160
    },
    {
      "epoch": 0.43868964381218745,
      "grad_norm": 2.28125,
      "learning_rate": 4.743179357773157e-05,
      "loss": 0.8689,
      "step": 125170
    },
    {
      "epoch": 0.43872469131908304,
      "grad_norm": 3.265625,
      "learning_rate": 4.7431144549067865e-05,
      "loss": 0.9776,
      "step": 125180
    },
    {
      "epoch": 0.43875973882597863,
      "grad_norm": 3.390625,
      "learning_rate": 4.743049552040417e-05,
      "loss": 0.9058,
      "step": 125190
    },
    {
      "epoch": 0.4387947863328742,
      "grad_norm": 3.453125,
      "learning_rate": 4.742984649174047e-05,
      "loss": 0.9287,
      "step": 125200
    },
    {
      "epoch": 0.4388298338397698,
      "grad_norm": 2.8125,
      "learning_rate": 4.742919746307676e-05,
      "loss": 0.9686,
      "step": 125210
    },
    {
      "epoch": 0.4388648813466654,
      "grad_norm": 3.375,
      "learning_rate": 4.7428548434413064e-05,
      "loss": 0.937,
      "step": 125220
    },
    {
      "epoch": 0.438899928853561,
      "grad_norm": 3.15625,
      "learning_rate": 4.742789940574936e-05,
      "loss": 1.0429,
      "step": 125230
    },
    {
      "epoch": 0.4389349763604566,
      "grad_norm": 3.5,
      "learning_rate": 4.742725037708566e-05,
      "loss": 0.9167,
      "step": 125240
    },
    {
      "epoch": 0.4389700238673522,
      "grad_norm": 2.8125,
      "learning_rate": 4.7426601348421955e-05,
      "loss": 0.911,
      "step": 125250
    },
    {
      "epoch": 0.43900507137424777,
      "grad_norm": 3.0625,
      "learning_rate": 4.7425952319758256e-05,
      "loss": 0.9453,
      "step": 125260
    },
    {
      "epoch": 0.4390401188811434,
      "grad_norm": 3.421875,
      "learning_rate": 4.742530329109455e-05,
      "loss": 1.0474,
      "step": 125270
    },
    {
      "epoch": 0.439075166388039,
      "grad_norm": 2.984375,
      "learning_rate": 4.742465426243085e-05,
      "loss": 0.9798,
      "step": 125280
    },
    {
      "epoch": 0.4391102138949346,
      "grad_norm": 3.59375,
      "learning_rate": 4.742400523376715e-05,
      "loss": 0.9118,
      "step": 125290
    },
    {
      "epoch": 0.4391452614018302,
      "grad_norm": 3.390625,
      "learning_rate": 4.742335620510344e-05,
      "loss": 1.0218,
      "step": 125300
    },
    {
      "epoch": 0.4391803089087258,
      "grad_norm": 2.859375,
      "learning_rate": 4.742270717643974e-05,
      "loss": 0.8821,
      "step": 125310
    },
    {
      "epoch": 0.43921535641562137,
      "grad_norm": 3.03125,
      "learning_rate": 4.742205814777604e-05,
      "loss": 0.9722,
      "step": 125320
    },
    {
      "epoch": 0.43925040392251696,
      "grad_norm": 2.875,
      "learning_rate": 4.742140911911234e-05,
      "loss": 0.9589,
      "step": 125330
    },
    {
      "epoch": 0.43928545142941255,
      "grad_norm": 3.15625,
      "learning_rate": 4.7420760090448633e-05,
      "loss": 0.9324,
      "step": 125340
    },
    {
      "epoch": 0.43932049893630815,
      "grad_norm": 2.78125,
      "learning_rate": 4.7420111061784935e-05,
      "loss": 0.8649,
      "step": 125350
    },
    {
      "epoch": 0.43935554644320374,
      "grad_norm": 3.1875,
      "learning_rate": 4.741946203312123e-05,
      "loss": 0.9024,
      "step": 125360
    },
    {
      "epoch": 0.4393905939500994,
      "grad_norm": 3.0625,
      "learning_rate": 4.741881300445753e-05,
      "loss": 0.9317,
      "step": 125370
    },
    {
      "epoch": 0.439425641456995,
      "grad_norm": 2.75,
      "learning_rate": 4.7418163975793825e-05,
      "loss": 0.9965,
      "step": 125380
    },
    {
      "epoch": 0.43946068896389057,
      "grad_norm": 2.953125,
      "learning_rate": 4.741751494713013e-05,
      "loss": 1.0245,
      "step": 125390
    },
    {
      "epoch": 0.43949573647078616,
      "grad_norm": 3.390625,
      "learning_rate": 4.741686591846642e-05,
      "loss": 0.9533,
      "step": 125400
    },
    {
      "epoch": 0.43953078397768175,
      "grad_norm": 2.953125,
      "learning_rate": 4.741621688980272e-05,
      "loss": 0.8551,
      "step": 125410
    },
    {
      "epoch": 0.43956583148457734,
      "grad_norm": 3.15625,
      "learning_rate": 4.741556786113902e-05,
      "loss": 0.9847,
      "step": 125420
    },
    {
      "epoch": 0.43960087899147293,
      "grad_norm": 2.890625,
      "learning_rate": 4.741491883247532e-05,
      "loss": 0.895,
      "step": 125430
    },
    {
      "epoch": 0.4396359264983685,
      "grad_norm": 3.171875,
      "learning_rate": 4.741426980381162e-05,
      "loss": 0.9615,
      "step": 125440
    },
    {
      "epoch": 0.4396709740052641,
      "grad_norm": 3.015625,
      "learning_rate": 4.7413620775147915e-05,
      "loss": 0.9489,
      "step": 125450
    },
    {
      "epoch": 0.4397060215121597,
      "grad_norm": 3.0625,
      "learning_rate": 4.7412971746484216e-05,
      "loss": 0.9379,
      "step": 125460
    },
    {
      "epoch": 0.43974106901905535,
      "grad_norm": 3.234375,
      "learning_rate": 4.741232271782051e-05,
      "loss": 0.9101,
      "step": 125470
    },
    {
      "epoch": 0.43977611652595094,
      "grad_norm": 2.796875,
      "learning_rate": 4.741167368915681e-05,
      "loss": 0.9087,
      "step": 125480
    },
    {
      "epoch": 0.43981116403284654,
      "grad_norm": 3.125,
      "learning_rate": 4.741102466049311e-05,
      "loss": 0.9624,
      "step": 125490
    },
    {
      "epoch": 0.4398462115397421,
      "grad_norm": 2.578125,
      "learning_rate": 4.741037563182941e-05,
      "loss": 0.8641,
      "step": 125500
    },
    {
      "epoch": 0.4398812590466377,
      "grad_norm": 2.703125,
      "learning_rate": 4.74097266031657e-05,
      "loss": 0.9692,
      "step": 125510
    },
    {
      "epoch": 0.4399163065535333,
      "grad_norm": 2.984375,
      "learning_rate": 4.7409077574502004e-05,
      "loss": 0.962,
      "step": 125520
    },
    {
      "epoch": 0.4399513540604289,
      "grad_norm": 3.21875,
      "learning_rate": 4.74084285458383e-05,
      "loss": 0.9674,
      "step": 125530
    },
    {
      "epoch": 0.4399864015673245,
      "grad_norm": 3.09375,
      "learning_rate": 4.74077795171746e-05,
      "loss": 0.8993,
      "step": 125540
    },
    {
      "epoch": 0.4400214490742201,
      "grad_norm": 3.46875,
      "learning_rate": 4.7407130488510895e-05,
      "loss": 0.9264,
      "step": 125550
    },
    {
      "epoch": 0.4400564965811157,
      "grad_norm": 2.890625,
      "learning_rate": 4.7406481459847196e-05,
      "loss": 0.9572,
      "step": 125560
    },
    {
      "epoch": 0.4400915440880113,
      "grad_norm": 3.109375,
      "learning_rate": 4.74058324311835e-05,
      "loss": 0.9589,
      "step": 125570
    },
    {
      "epoch": 0.4401265915949069,
      "grad_norm": 3.25,
      "learning_rate": 4.740518340251979e-05,
      "loss": 0.9127,
      "step": 125580
    },
    {
      "epoch": 0.4401616391018025,
      "grad_norm": 2.890625,
      "learning_rate": 4.7404534373856094e-05,
      "loss": 0.9364,
      "step": 125590
    },
    {
      "epoch": 0.4401966866086981,
      "grad_norm": 3.140625,
      "learning_rate": 4.740388534519239e-05,
      "loss": 0.9101,
      "step": 125600
    },
    {
      "epoch": 0.4402317341155937,
      "grad_norm": 2.8125,
      "learning_rate": 4.740323631652869e-05,
      "loss": 0.9613,
      "step": 125610
    },
    {
      "epoch": 0.4402667816224893,
      "grad_norm": 3.078125,
      "learning_rate": 4.7402587287864984e-05,
      "loss": 0.9596,
      "step": 125620
    },
    {
      "epoch": 0.44030182912938487,
      "grad_norm": 2.953125,
      "learning_rate": 4.7401938259201286e-05,
      "loss": 0.9415,
      "step": 125630
    },
    {
      "epoch": 0.44033687663628046,
      "grad_norm": 2.546875,
      "learning_rate": 4.740128923053758e-05,
      "loss": 0.9041,
      "step": 125640
    },
    {
      "epoch": 0.44037192414317605,
      "grad_norm": 3.171875,
      "learning_rate": 4.740064020187388e-05,
      "loss": 0.9554,
      "step": 125650
    },
    {
      "epoch": 0.4404069716500717,
      "grad_norm": 3.21875,
      "learning_rate": 4.7399991173210176e-05,
      "loss": 1.0443,
      "step": 125660
    },
    {
      "epoch": 0.4404420191569673,
      "grad_norm": 2.921875,
      "learning_rate": 4.739934214454647e-05,
      "loss": 1.0022,
      "step": 125670
    },
    {
      "epoch": 0.4404770666638629,
      "grad_norm": 3.03125,
      "learning_rate": 4.739869311588277e-05,
      "loss": 0.9891,
      "step": 125680
    },
    {
      "epoch": 0.4405121141707585,
      "grad_norm": 3.015625,
      "learning_rate": 4.739804408721907e-05,
      "loss": 0.9407,
      "step": 125690
    },
    {
      "epoch": 0.44054716167765406,
      "grad_norm": 2.734375,
      "learning_rate": 4.739739505855537e-05,
      "loss": 0.9496,
      "step": 125700
    },
    {
      "epoch": 0.44058220918454966,
      "grad_norm": 3.296875,
      "learning_rate": 4.739674602989166e-05,
      "loss": 0.9471,
      "step": 125710
    },
    {
      "epoch": 0.44061725669144525,
      "grad_norm": 3.25,
      "learning_rate": 4.7396097001227964e-05,
      "loss": 0.9116,
      "step": 125720
    },
    {
      "epoch": 0.44065230419834084,
      "grad_norm": 3.09375,
      "learning_rate": 4.739544797256426e-05,
      "loss": 0.9418,
      "step": 125730
    },
    {
      "epoch": 0.44068735170523643,
      "grad_norm": 2.75,
      "learning_rate": 4.739479894390056e-05,
      "loss": 0.9268,
      "step": 125740
    },
    {
      "epoch": 0.440722399212132,
      "grad_norm": 2.96875,
      "learning_rate": 4.7394149915236855e-05,
      "loss": 0.8644,
      "step": 125750
    },
    {
      "epoch": 0.44075744671902767,
      "grad_norm": 2.625,
      "learning_rate": 4.7393500886573156e-05,
      "loss": 0.958,
      "step": 125760
    },
    {
      "epoch": 0.44079249422592326,
      "grad_norm": 2.90625,
      "learning_rate": 4.739285185790945e-05,
      "loss": 0.9885,
      "step": 125770
    },
    {
      "epoch": 0.44082754173281885,
      "grad_norm": 3.15625,
      "learning_rate": 4.739220282924575e-05,
      "loss": 0.9743,
      "step": 125780
    },
    {
      "epoch": 0.44086258923971444,
      "grad_norm": 2.984375,
      "learning_rate": 4.7391553800582054e-05,
      "loss": 0.9556,
      "step": 125790
    },
    {
      "epoch": 0.44089763674661003,
      "grad_norm": 3.0,
      "learning_rate": 4.739090477191835e-05,
      "loss": 0.9446,
      "step": 125800
    },
    {
      "epoch": 0.4409326842535056,
      "grad_norm": 3.265625,
      "learning_rate": 4.739025574325465e-05,
      "loss": 0.9824,
      "step": 125810
    },
    {
      "epoch": 0.4409677317604012,
      "grad_norm": 3.109375,
      "learning_rate": 4.7389606714590944e-05,
      "loss": 0.948,
      "step": 125820
    },
    {
      "epoch": 0.4410027792672968,
      "grad_norm": 3.046875,
      "learning_rate": 4.7388957685927246e-05,
      "loss": 1.0002,
      "step": 125830
    },
    {
      "epoch": 0.4410378267741924,
      "grad_norm": 3.21875,
      "learning_rate": 4.738830865726354e-05,
      "loss": 0.9882,
      "step": 125840
    },
    {
      "epoch": 0.441072874281088,
      "grad_norm": 3.34375,
      "learning_rate": 4.738765962859984e-05,
      "loss": 0.8926,
      "step": 125850
    },
    {
      "epoch": 0.44110792178798364,
      "grad_norm": 3.5625,
      "learning_rate": 4.7387010599936136e-05,
      "loss": 0.8912,
      "step": 125860
    },
    {
      "epoch": 0.44114296929487923,
      "grad_norm": 2.921875,
      "learning_rate": 4.738636157127244e-05,
      "loss": 0.8969,
      "step": 125870
    },
    {
      "epoch": 0.4411780168017748,
      "grad_norm": 3.375,
      "learning_rate": 4.738571254260873e-05,
      "loss": 1.0218,
      "step": 125880
    },
    {
      "epoch": 0.4412130643086704,
      "grad_norm": 2.984375,
      "learning_rate": 4.7385063513945034e-05,
      "loss": 0.8999,
      "step": 125890
    },
    {
      "epoch": 0.441248111815566,
      "grad_norm": 3.125,
      "learning_rate": 4.738441448528133e-05,
      "loss": 1.0039,
      "step": 125900
    },
    {
      "epoch": 0.4412831593224616,
      "grad_norm": 3.0,
      "learning_rate": 4.738376545661763e-05,
      "loss": 0.9029,
      "step": 125910
    },
    {
      "epoch": 0.4413182068293572,
      "grad_norm": 2.65625,
      "learning_rate": 4.7383116427953924e-05,
      "loss": 0.9,
      "step": 125920
    },
    {
      "epoch": 0.4413532543362528,
      "grad_norm": 3.515625,
      "learning_rate": 4.7382467399290226e-05,
      "loss": 0.9921,
      "step": 125930
    },
    {
      "epoch": 0.44138830184314837,
      "grad_norm": 3.109375,
      "learning_rate": 4.738181837062653e-05,
      "loss": 0.9606,
      "step": 125940
    },
    {
      "epoch": 0.44142334935004396,
      "grad_norm": 3.265625,
      "learning_rate": 4.738116934196282e-05,
      "loss": 0.9921,
      "step": 125950
    },
    {
      "epoch": 0.4414583968569396,
      "grad_norm": 3.03125,
      "learning_rate": 4.738052031329912e-05,
      "loss": 1.0112,
      "step": 125960
    },
    {
      "epoch": 0.4414934443638352,
      "grad_norm": 2.953125,
      "learning_rate": 4.737987128463542e-05,
      "loss": 0.9102,
      "step": 125970
    },
    {
      "epoch": 0.4415284918707308,
      "grad_norm": 2.859375,
      "learning_rate": 4.737922225597172e-05,
      "loss": 0.9029,
      "step": 125980
    },
    {
      "epoch": 0.4415635393776264,
      "grad_norm": 2.953125,
      "learning_rate": 4.7378573227308014e-05,
      "loss": 0.9134,
      "step": 125990
    },
    {
      "epoch": 0.44159858688452197,
      "grad_norm": 2.84375,
      "learning_rate": 4.7377924198644315e-05,
      "loss": 0.9029,
      "step": 126000
    },
    {
      "epoch": 0.44163363439141756,
      "grad_norm": 2.71875,
      "learning_rate": 4.737727516998061e-05,
      "loss": 1.0268,
      "step": 126010
    },
    {
      "epoch": 0.44166868189831315,
      "grad_norm": 2.875,
      "learning_rate": 4.737662614131691e-05,
      "loss": 0.898,
      "step": 126020
    },
    {
      "epoch": 0.44170372940520874,
      "grad_norm": 3.09375,
      "learning_rate": 4.7375977112653206e-05,
      "loss": 0.9286,
      "step": 126030
    },
    {
      "epoch": 0.44173877691210434,
      "grad_norm": 3.109375,
      "learning_rate": 4.73753280839895e-05,
      "loss": 0.9673,
      "step": 126040
    },
    {
      "epoch": 0.4417738244189999,
      "grad_norm": 3.21875,
      "learning_rate": 4.73746790553258e-05,
      "loss": 0.9149,
      "step": 126050
    },
    {
      "epoch": 0.4418088719258956,
      "grad_norm": 2.90625,
      "learning_rate": 4.7374030026662096e-05,
      "loss": 0.9292,
      "step": 126060
    },
    {
      "epoch": 0.44184391943279117,
      "grad_norm": 2.734375,
      "learning_rate": 4.73733809979984e-05,
      "loss": 0.891,
      "step": 126070
    },
    {
      "epoch": 0.44187896693968676,
      "grad_norm": 2.90625,
      "learning_rate": 4.737273196933469e-05,
      "loss": 0.9008,
      "step": 126080
    },
    {
      "epoch": 0.44191401444658235,
      "grad_norm": 3.09375,
      "learning_rate": 4.7372082940670994e-05,
      "loss": 1.0198,
      "step": 126090
    },
    {
      "epoch": 0.44194906195347794,
      "grad_norm": 3.03125,
      "learning_rate": 4.737143391200729e-05,
      "loss": 1.0709,
      "step": 126100
    },
    {
      "epoch": 0.44198410946037353,
      "grad_norm": 3.046875,
      "learning_rate": 4.737078488334359e-05,
      "loss": 0.9702,
      "step": 126110
    },
    {
      "epoch": 0.4420191569672691,
      "grad_norm": 2.96875,
      "learning_rate": 4.7370135854679884e-05,
      "loss": 0.9903,
      "step": 126120
    },
    {
      "epoch": 0.4420542044741647,
      "grad_norm": 2.96875,
      "learning_rate": 4.7369486826016186e-05,
      "loss": 0.9182,
      "step": 126130
    },
    {
      "epoch": 0.4420892519810603,
      "grad_norm": 3.0,
      "learning_rate": 4.736883779735248e-05,
      "loss": 0.9221,
      "step": 126140
    },
    {
      "epoch": 0.44212429948795595,
      "grad_norm": 3.484375,
      "learning_rate": 4.736818876868878e-05,
      "loss": 0.9193,
      "step": 126150
    },
    {
      "epoch": 0.44215934699485154,
      "grad_norm": 2.671875,
      "learning_rate": 4.736753974002508e-05,
      "loss": 0.8933,
      "step": 126160
    },
    {
      "epoch": 0.44219439450174713,
      "grad_norm": 3.578125,
      "learning_rate": 4.736689071136138e-05,
      "loss": 0.9174,
      "step": 126170
    },
    {
      "epoch": 0.4422294420086427,
      "grad_norm": 2.828125,
      "learning_rate": 4.736624168269768e-05,
      "loss": 0.9339,
      "step": 126180
    },
    {
      "epoch": 0.4422644895155383,
      "grad_norm": 2.53125,
      "learning_rate": 4.7365592654033974e-05,
      "loss": 0.9271,
      "step": 126190
    },
    {
      "epoch": 0.4422995370224339,
      "grad_norm": 3.46875,
      "learning_rate": 4.7364943625370275e-05,
      "loss": 1.0465,
      "step": 126200
    },
    {
      "epoch": 0.4423345845293295,
      "grad_norm": 3.171875,
      "learning_rate": 4.736429459670657e-05,
      "loss": 0.9811,
      "step": 126210
    },
    {
      "epoch": 0.4423696320362251,
      "grad_norm": 3.234375,
      "learning_rate": 4.736364556804287e-05,
      "loss": 0.9943,
      "step": 126220
    },
    {
      "epoch": 0.4424046795431207,
      "grad_norm": 3.0,
      "learning_rate": 4.7362996539379166e-05,
      "loss": 1.0007,
      "step": 126230
    },
    {
      "epoch": 0.4424397270500163,
      "grad_norm": 2.796875,
      "learning_rate": 4.736234751071547e-05,
      "loss": 0.9375,
      "step": 126240
    },
    {
      "epoch": 0.4424747745569119,
      "grad_norm": 2.984375,
      "learning_rate": 4.736169848205176e-05,
      "loss": 0.9224,
      "step": 126250
    },
    {
      "epoch": 0.4425098220638075,
      "grad_norm": 3.3125,
      "learning_rate": 4.736104945338806e-05,
      "loss": 0.9169,
      "step": 126260
    },
    {
      "epoch": 0.4425448695707031,
      "grad_norm": 2.984375,
      "learning_rate": 4.736040042472436e-05,
      "loss": 0.9356,
      "step": 126270
    },
    {
      "epoch": 0.4425799170775987,
      "grad_norm": 3.421875,
      "learning_rate": 4.735975139606066e-05,
      "loss": 0.926,
      "step": 126280
    },
    {
      "epoch": 0.4426149645844943,
      "grad_norm": 2.71875,
      "learning_rate": 4.7359102367396954e-05,
      "loss": 0.9204,
      "step": 126290
    },
    {
      "epoch": 0.4426500120913899,
      "grad_norm": 3.53125,
      "learning_rate": 4.7358453338733255e-05,
      "loss": 0.9639,
      "step": 126300
    },
    {
      "epoch": 0.44268505959828547,
      "grad_norm": 3.015625,
      "learning_rate": 4.7357804310069556e-05,
      "loss": 1.0268,
      "step": 126310
    },
    {
      "epoch": 0.44272010710518106,
      "grad_norm": 3.171875,
      "learning_rate": 4.735715528140585e-05,
      "loss": 0.9193,
      "step": 126320
    },
    {
      "epoch": 0.44275515461207665,
      "grad_norm": 2.984375,
      "learning_rate": 4.735650625274215e-05,
      "loss": 1.0255,
      "step": 126330
    },
    {
      "epoch": 0.44279020211897224,
      "grad_norm": 3.015625,
      "learning_rate": 4.735585722407845e-05,
      "loss": 1.0665,
      "step": 126340
    },
    {
      "epoch": 0.4428252496258679,
      "grad_norm": 3.109375,
      "learning_rate": 4.735520819541475e-05,
      "loss": 1.0065,
      "step": 126350
    },
    {
      "epoch": 0.4428602971327635,
      "grad_norm": 2.890625,
      "learning_rate": 4.735455916675104e-05,
      "loss": 0.9616,
      "step": 126360
    },
    {
      "epoch": 0.44289534463965907,
      "grad_norm": 3.234375,
      "learning_rate": 4.7353910138087344e-05,
      "loss": 0.9722,
      "step": 126370
    },
    {
      "epoch": 0.44293039214655466,
      "grad_norm": 3.5,
      "learning_rate": 4.735326110942364e-05,
      "loss": 0.9411,
      "step": 126380
    },
    {
      "epoch": 0.44296543965345025,
      "grad_norm": 3.21875,
      "learning_rate": 4.735261208075994e-05,
      "loss": 0.8719,
      "step": 126390
    },
    {
      "epoch": 0.44300048716034585,
      "grad_norm": 3.078125,
      "learning_rate": 4.7351963052096235e-05,
      "loss": 0.9733,
      "step": 126400
    },
    {
      "epoch": 0.44303553466724144,
      "grad_norm": 2.78125,
      "learning_rate": 4.7351314023432536e-05,
      "loss": 1.0417,
      "step": 126410
    },
    {
      "epoch": 0.44307058217413703,
      "grad_norm": 3.09375,
      "learning_rate": 4.735066499476883e-05,
      "loss": 0.9328,
      "step": 126420
    },
    {
      "epoch": 0.4431056296810326,
      "grad_norm": 2.75,
      "learning_rate": 4.7350015966105126e-05,
      "loss": 1.0159,
      "step": 126430
    },
    {
      "epoch": 0.4431406771879282,
      "grad_norm": 3.15625,
      "learning_rate": 4.734936693744143e-05,
      "loss": 1.0038,
      "step": 126440
    },
    {
      "epoch": 0.44317572469482386,
      "grad_norm": 2.953125,
      "learning_rate": 4.734871790877772e-05,
      "loss": 1.0408,
      "step": 126450
    },
    {
      "epoch": 0.44321077220171945,
      "grad_norm": 5.0625,
      "learning_rate": 4.734806888011402e-05,
      "loss": 0.9634,
      "step": 126460
    },
    {
      "epoch": 0.44324581970861504,
      "grad_norm": 3.140625,
      "learning_rate": 4.734741985145032e-05,
      "loss": 0.9353,
      "step": 126470
    },
    {
      "epoch": 0.44328086721551063,
      "grad_norm": 3.0,
      "learning_rate": 4.734677082278662e-05,
      "loss": 0.9615,
      "step": 126480
    },
    {
      "epoch": 0.4433159147224062,
      "grad_norm": 3.453125,
      "learning_rate": 4.7346121794122914e-05,
      "loss": 0.9185,
      "step": 126490
    },
    {
      "epoch": 0.4433509622293018,
      "grad_norm": 3.34375,
      "learning_rate": 4.7345472765459215e-05,
      "loss": 0.9163,
      "step": 126500
    },
    {
      "epoch": 0.4433860097361974,
      "grad_norm": 3.21875,
      "learning_rate": 4.734482373679551e-05,
      "loss": 1.0047,
      "step": 126510
    },
    {
      "epoch": 0.443421057243093,
      "grad_norm": 3.15625,
      "learning_rate": 4.734417470813181e-05,
      "loss": 0.9483,
      "step": 126520
    },
    {
      "epoch": 0.4434561047499886,
      "grad_norm": 3.5,
      "learning_rate": 4.734352567946811e-05,
      "loss": 0.8608,
      "step": 126530
    },
    {
      "epoch": 0.4434911522568842,
      "grad_norm": 2.796875,
      "learning_rate": 4.734287665080441e-05,
      "loss": 0.9522,
      "step": 126540
    },
    {
      "epoch": 0.4435261997637798,
      "grad_norm": 3.1875,
      "learning_rate": 4.734222762214071e-05,
      "loss": 1.0121,
      "step": 126550
    },
    {
      "epoch": 0.4435612472706754,
      "grad_norm": 2.90625,
      "learning_rate": 4.7341578593477e-05,
      "loss": 0.9533,
      "step": 126560
    },
    {
      "epoch": 0.443596294777571,
      "grad_norm": 2.578125,
      "learning_rate": 4.7340929564813304e-05,
      "loss": 0.9988,
      "step": 126570
    },
    {
      "epoch": 0.4436313422844666,
      "grad_norm": 3.359375,
      "learning_rate": 4.73402805361496e-05,
      "loss": 0.9892,
      "step": 126580
    },
    {
      "epoch": 0.4436663897913622,
      "grad_norm": 2.59375,
      "learning_rate": 4.73396315074859e-05,
      "loss": 0.9491,
      "step": 126590
    },
    {
      "epoch": 0.4437014372982578,
      "grad_norm": 2.796875,
      "learning_rate": 4.7338982478822195e-05,
      "loss": 0.9655,
      "step": 126600
    },
    {
      "epoch": 0.4437364848051534,
      "grad_norm": 3.03125,
      "learning_rate": 4.7338333450158496e-05,
      "loss": 0.9195,
      "step": 126610
    },
    {
      "epoch": 0.44377153231204897,
      "grad_norm": 2.75,
      "learning_rate": 4.733768442149479e-05,
      "loss": 0.934,
      "step": 126620
    },
    {
      "epoch": 0.44380657981894456,
      "grad_norm": 3.4375,
      "learning_rate": 4.733703539283109e-05,
      "loss": 0.9694,
      "step": 126630
    },
    {
      "epoch": 0.44384162732584015,
      "grad_norm": 3.171875,
      "learning_rate": 4.733638636416739e-05,
      "loss": 1.0145,
      "step": 126640
    },
    {
      "epoch": 0.4438766748327358,
      "grad_norm": 2.625,
      "learning_rate": 4.733573733550369e-05,
      "loss": 0.9668,
      "step": 126650
    },
    {
      "epoch": 0.4439117223396314,
      "grad_norm": 2.859375,
      "learning_rate": 4.733508830683999e-05,
      "loss": 1.0056,
      "step": 126660
    },
    {
      "epoch": 0.443946769846527,
      "grad_norm": 3.390625,
      "learning_rate": 4.7334439278176284e-05,
      "loss": 0.9378,
      "step": 126670
    },
    {
      "epoch": 0.44398181735342257,
      "grad_norm": 2.921875,
      "learning_rate": 4.7333790249512586e-05,
      "loss": 0.9063,
      "step": 126680
    },
    {
      "epoch": 0.44401686486031816,
      "grad_norm": 2.90625,
      "learning_rate": 4.733314122084888e-05,
      "loss": 0.9752,
      "step": 126690
    },
    {
      "epoch": 0.44405191236721375,
      "grad_norm": 2.96875,
      "learning_rate": 4.733249219218518e-05,
      "loss": 0.915,
      "step": 126700
    },
    {
      "epoch": 0.44408695987410934,
      "grad_norm": 2.796875,
      "learning_rate": 4.7331843163521476e-05,
      "loss": 0.893,
      "step": 126710
    },
    {
      "epoch": 0.44412200738100494,
      "grad_norm": 2.859375,
      "learning_rate": 4.733119413485778e-05,
      "loss": 0.8236,
      "step": 126720
    },
    {
      "epoch": 0.4441570548879005,
      "grad_norm": 3.40625,
      "learning_rate": 4.733054510619407e-05,
      "loss": 1.0064,
      "step": 126730
    },
    {
      "epoch": 0.4441921023947962,
      "grad_norm": 3.046875,
      "learning_rate": 4.7329896077530374e-05,
      "loss": 0.9647,
      "step": 126740
    },
    {
      "epoch": 0.44422714990169176,
      "grad_norm": 3.34375,
      "learning_rate": 4.732924704886667e-05,
      "loss": 1.036,
      "step": 126750
    },
    {
      "epoch": 0.44426219740858736,
      "grad_norm": 3.03125,
      "learning_rate": 4.732859802020297e-05,
      "loss": 1.0061,
      "step": 126760
    },
    {
      "epoch": 0.44429724491548295,
      "grad_norm": 3.296875,
      "learning_rate": 4.7327948991539264e-05,
      "loss": 0.9399,
      "step": 126770
    },
    {
      "epoch": 0.44433229242237854,
      "grad_norm": 3.125,
      "learning_rate": 4.7327299962875566e-05,
      "loss": 0.9014,
      "step": 126780
    },
    {
      "epoch": 0.44436733992927413,
      "grad_norm": 3.109375,
      "learning_rate": 4.732665093421186e-05,
      "loss": 0.9745,
      "step": 126790
    },
    {
      "epoch": 0.4444023874361697,
      "grad_norm": 3.015625,
      "learning_rate": 4.7326001905548155e-05,
      "loss": 0.9747,
      "step": 126800
    },
    {
      "epoch": 0.4444374349430653,
      "grad_norm": 3.046875,
      "learning_rate": 4.7325352876884456e-05,
      "loss": 0.8543,
      "step": 126810
    },
    {
      "epoch": 0.4444724824499609,
      "grad_norm": 2.8125,
      "learning_rate": 4.732470384822075e-05,
      "loss": 0.9167,
      "step": 126820
    },
    {
      "epoch": 0.4445075299568565,
      "grad_norm": 3.109375,
      "learning_rate": 4.732405481955705e-05,
      "loss": 0.9826,
      "step": 126830
    },
    {
      "epoch": 0.44454257746375214,
      "grad_norm": 2.671875,
      "learning_rate": 4.732340579089335e-05,
      "loss": 0.9856,
      "step": 126840
    },
    {
      "epoch": 0.44457762497064773,
      "grad_norm": 2.734375,
      "learning_rate": 4.732275676222965e-05,
      "loss": 0.9851,
      "step": 126850
    },
    {
      "epoch": 0.4446126724775433,
      "grad_norm": 2.859375,
      "learning_rate": 4.732210773356594e-05,
      "loss": 0.8879,
      "step": 126860
    },
    {
      "epoch": 0.4446477199844389,
      "grad_norm": 2.953125,
      "learning_rate": 4.7321458704902244e-05,
      "loss": 0.9329,
      "step": 126870
    },
    {
      "epoch": 0.4446827674913345,
      "grad_norm": 2.546875,
      "learning_rate": 4.732080967623854e-05,
      "loss": 0.9812,
      "step": 126880
    },
    {
      "epoch": 0.4447178149982301,
      "grad_norm": 3.109375,
      "learning_rate": 4.732016064757484e-05,
      "loss": 0.9312,
      "step": 126890
    },
    {
      "epoch": 0.4447528625051257,
      "grad_norm": 2.5625,
      "learning_rate": 4.731951161891114e-05,
      "loss": 0.8693,
      "step": 126900
    },
    {
      "epoch": 0.4447879100120213,
      "grad_norm": 2.703125,
      "learning_rate": 4.7318862590247436e-05,
      "loss": 0.9323,
      "step": 126910
    },
    {
      "epoch": 0.4448229575189169,
      "grad_norm": 2.6875,
      "learning_rate": 4.731821356158374e-05,
      "loss": 0.9992,
      "step": 126920
    },
    {
      "epoch": 0.44485800502581246,
      "grad_norm": 3.015625,
      "learning_rate": 4.731756453292003e-05,
      "loss": 0.9295,
      "step": 126930
    },
    {
      "epoch": 0.4448930525327081,
      "grad_norm": 3.25,
      "learning_rate": 4.7316915504256334e-05,
      "loss": 1.009,
      "step": 126940
    },
    {
      "epoch": 0.4449281000396037,
      "grad_norm": 3.03125,
      "learning_rate": 4.731626647559263e-05,
      "loss": 0.8564,
      "step": 126950
    },
    {
      "epoch": 0.4449631475464993,
      "grad_norm": 2.609375,
      "learning_rate": 4.731561744692893e-05,
      "loss": 0.9521,
      "step": 126960
    },
    {
      "epoch": 0.4449981950533949,
      "grad_norm": 3.453125,
      "learning_rate": 4.7314968418265224e-05,
      "loss": 0.9847,
      "step": 126970
    },
    {
      "epoch": 0.4450332425602905,
      "grad_norm": 2.671875,
      "learning_rate": 4.7314319389601526e-05,
      "loss": 0.9129,
      "step": 126980
    },
    {
      "epoch": 0.44506829006718607,
      "grad_norm": 3.25,
      "learning_rate": 4.731367036093782e-05,
      "loss": 1.0563,
      "step": 126990
    },
    {
      "epoch": 0.44510333757408166,
      "grad_norm": 3.03125,
      "learning_rate": 4.731302133227412e-05,
      "loss": 0.9144,
      "step": 127000
    },
    {
      "epoch": 0.44513838508097725,
      "grad_norm": 3.71875,
      "learning_rate": 4.7312372303610416e-05,
      "loss": 1.0216,
      "step": 127010
    },
    {
      "epoch": 0.44517343258787284,
      "grad_norm": 3.015625,
      "learning_rate": 4.731172327494672e-05,
      "loss": 0.9803,
      "step": 127020
    },
    {
      "epoch": 0.44520848009476843,
      "grad_norm": 3.046875,
      "learning_rate": 4.731107424628302e-05,
      "loss": 0.9331,
      "step": 127030
    },
    {
      "epoch": 0.4452435276016641,
      "grad_norm": 2.78125,
      "learning_rate": 4.7310425217619314e-05,
      "loss": 0.9716,
      "step": 127040
    },
    {
      "epoch": 0.44527857510855967,
      "grad_norm": 3.109375,
      "learning_rate": 4.7309776188955615e-05,
      "loss": 0.9109,
      "step": 127050
    },
    {
      "epoch": 0.44531362261545526,
      "grad_norm": 3.1875,
      "learning_rate": 4.730912716029191e-05,
      "loss": 0.9263,
      "step": 127060
    },
    {
      "epoch": 0.44534867012235085,
      "grad_norm": 3.0625,
      "learning_rate": 4.730847813162821e-05,
      "loss": 1.0416,
      "step": 127070
    },
    {
      "epoch": 0.44538371762924645,
      "grad_norm": 2.828125,
      "learning_rate": 4.7307829102964506e-05,
      "loss": 1.0224,
      "step": 127080
    },
    {
      "epoch": 0.44541876513614204,
      "grad_norm": 2.96875,
      "learning_rate": 4.730718007430081e-05,
      "loss": 0.9389,
      "step": 127090
    },
    {
      "epoch": 0.4454538126430376,
      "grad_norm": 2.84375,
      "learning_rate": 4.73065310456371e-05,
      "loss": 0.9739,
      "step": 127100
    },
    {
      "epoch": 0.4454888601499332,
      "grad_norm": 2.671875,
      "learning_rate": 4.73058820169734e-05,
      "loss": 1.0327,
      "step": 127110
    },
    {
      "epoch": 0.4455239076568288,
      "grad_norm": 3.390625,
      "learning_rate": 4.73052329883097e-05,
      "loss": 0.9737,
      "step": 127120
    },
    {
      "epoch": 0.4455589551637244,
      "grad_norm": 2.953125,
      "learning_rate": 4.7304583959646e-05,
      "loss": 0.8971,
      "step": 127130
    },
    {
      "epoch": 0.44559400267062005,
      "grad_norm": 3.0,
      "learning_rate": 4.7303934930982294e-05,
      "loss": 0.925,
      "step": 127140
    },
    {
      "epoch": 0.44562905017751564,
      "grad_norm": 2.9375,
      "learning_rate": 4.7303285902318595e-05,
      "loss": 1.067,
      "step": 127150
    },
    {
      "epoch": 0.44566409768441123,
      "grad_norm": 2.875,
      "learning_rate": 4.730263687365489e-05,
      "loss": 0.9149,
      "step": 127160
    },
    {
      "epoch": 0.4456991451913068,
      "grad_norm": 2.9375,
      "learning_rate": 4.7301987844991184e-05,
      "loss": 0.9365,
      "step": 127170
    },
    {
      "epoch": 0.4457341926982024,
      "grad_norm": 3.125,
      "learning_rate": 4.7301338816327486e-05,
      "loss": 0.8593,
      "step": 127180
    },
    {
      "epoch": 0.445769240205098,
      "grad_norm": 3.0,
      "learning_rate": 4.730068978766378e-05,
      "loss": 1.038,
      "step": 127190
    },
    {
      "epoch": 0.4458042877119936,
      "grad_norm": 3.234375,
      "learning_rate": 4.730004075900008e-05,
      "loss": 0.9944,
      "step": 127200
    },
    {
      "epoch": 0.4458393352188892,
      "grad_norm": 3.375,
      "learning_rate": 4.7299391730336376e-05,
      "loss": 0.9546,
      "step": 127210
    },
    {
      "epoch": 0.4458743827257848,
      "grad_norm": 2.625,
      "learning_rate": 4.729874270167268e-05,
      "loss": 1.0747,
      "step": 127220
    },
    {
      "epoch": 0.4459094302326804,
      "grad_norm": 2.828125,
      "learning_rate": 4.729809367300897e-05,
      "loss": 0.9458,
      "step": 127230
    },
    {
      "epoch": 0.445944477739576,
      "grad_norm": 3.15625,
      "learning_rate": 4.7297444644345274e-05,
      "loss": 0.9979,
      "step": 127240
    },
    {
      "epoch": 0.4459795252464716,
      "grad_norm": 3.0625,
      "learning_rate": 4.729679561568157e-05,
      "loss": 0.9672,
      "step": 127250
    },
    {
      "epoch": 0.4460145727533672,
      "grad_norm": 3.34375,
      "learning_rate": 4.729614658701787e-05,
      "loss": 0.9813,
      "step": 127260
    },
    {
      "epoch": 0.4460496202602628,
      "grad_norm": 3.15625,
      "learning_rate": 4.729549755835417e-05,
      "loss": 1.0244,
      "step": 127270
    },
    {
      "epoch": 0.4460846677671584,
      "grad_norm": 2.8125,
      "learning_rate": 4.7294848529690466e-05,
      "loss": 0.9371,
      "step": 127280
    },
    {
      "epoch": 0.446119715274054,
      "grad_norm": 2.765625,
      "learning_rate": 4.729419950102677e-05,
      "loss": 0.8855,
      "step": 127290
    },
    {
      "epoch": 0.44615476278094957,
      "grad_norm": 2.671875,
      "learning_rate": 4.729355047236306e-05,
      "loss": 0.9574,
      "step": 127300
    },
    {
      "epoch": 0.44618981028784516,
      "grad_norm": 3.046875,
      "learning_rate": 4.729290144369936e-05,
      "loss": 0.8982,
      "step": 127310
    },
    {
      "epoch": 0.44622485779474075,
      "grad_norm": 2.984375,
      "learning_rate": 4.729225241503566e-05,
      "loss": 0.9405,
      "step": 127320
    },
    {
      "epoch": 0.4462599053016364,
      "grad_norm": 2.78125,
      "learning_rate": 4.729160338637196e-05,
      "loss": 0.9698,
      "step": 127330
    },
    {
      "epoch": 0.446294952808532,
      "grad_norm": 2.859375,
      "learning_rate": 4.7290954357708254e-05,
      "loss": 0.9364,
      "step": 127340
    },
    {
      "epoch": 0.4463300003154276,
      "grad_norm": 3.140625,
      "learning_rate": 4.7290305329044555e-05,
      "loss": 0.9543,
      "step": 127350
    },
    {
      "epoch": 0.44636504782232317,
      "grad_norm": 2.890625,
      "learning_rate": 4.728965630038085e-05,
      "loss": 0.9394,
      "step": 127360
    },
    {
      "epoch": 0.44640009532921876,
      "grad_norm": 3.125,
      "learning_rate": 4.728900727171715e-05,
      "loss": 0.9697,
      "step": 127370
    },
    {
      "epoch": 0.44643514283611435,
      "grad_norm": 2.921875,
      "learning_rate": 4.7288358243053446e-05,
      "loss": 0.9519,
      "step": 127380
    },
    {
      "epoch": 0.44647019034300994,
      "grad_norm": 3.09375,
      "learning_rate": 4.728770921438975e-05,
      "loss": 0.9468,
      "step": 127390
    },
    {
      "epoch": 0.44650523784990553,
      "grad_norm": 2.765625,
      "learning_rate": 4.728706018572605e-05,
      "loss": 0.9182,
      "step": 127400
    },
    {
      "epoch": 0.4465402853568011,
      "grad_norm": 3.203125,
      "learning_rate": 4.728641115706234e-05,
      "loss": 0.9779,
      "step": 127410
    },
    {
      "epoch": 0.4465753328636967,
      "grad_norm": 3.0,
      "learning_rate": 4.7285762128398645e-05,
      "loss": 0.8811,
      "step": 127420
    },
    {
      "epoch": 0.44661038037059236,
      "grad_norm": 3.28125,
      "learning_rate": 4.728511309973494e-05,
      "loss": 0.9794,
      "step": 127430
    },
    {
      "epoch": 0.44664542787748795,
      "grad_norm": 3.265625,
      "learning_rate": 4.728446407107124e-05,
      "loss": 1.0542,
      "step": 127440
    },
    {
      "epoch": 0.44668047538438355,
      "grad_norm": 2.640625,
      "learning_rate": 4.7283815042407535e-05,
      "loss": 0.962,
      "step": 127450
    },
    {
      "epoch": 0.44671552289127914,
      "grad_norm": 3.53125,
      "learning_rate": 4.7283166013743837e-05,
      "loss": 0.9538,
      "step": 127460
    },
    {
      "epoch": 0.44675057039817473,
      "grad_norm": 2.96875,
      "learning_rate": 4.728251698508013e-05,
      "loss": 0.9631,
      "step": 127470
    },
    {
      "epoch": 0.4467856179050703,
      "grad_norm": 3.671875,
      "learning_rate": 4.728186795641643e-05,
      "loss": 0.9864,
      "step": 127480
    },
    {
      "epoch": 0.4468206654119659,
      "grad_norm": 3.15625,
      "learning_rate": 4.728121892775273e-05,
      "loss": 0.9443,
      "step": 127490
    },
    {
      "epoch": 0.4468557129188615,
      "grad_norm": 2.765625,
      "learning_rate": 4.728056989908903e-05,
      "loss": 0.8291,
      "step": 127500
    },
    {
      "epoch": 0.4468907604257571,
      "grad_norm": 3.0,
      "learning_rate": 4.727992087042532e-05,
      "loss": 0.8418,
      "step": 127510
    },
    {
      "epoch": 0.4469258079326527,
      "grad_norm": 3.4375,
      "learning_rate": 4.7279271841761625e-05,
      "loss": 0.9037,
      "step": 127520
    },
    {
      "epoch": 0.44696085543954833,
      "grad_norm": 3.25,
      "learning_rate": 4.727862281309792e-05,
      "loss": 0.9118,
      "step": 127530
    },
    {
      "epoch": 0.4469959029464439,
      "grad_norm": 3.390625,
      "learning_rate": 4.727797378443422e-05,
      "loss": 0.9493,
      "step": 127540
    },
    {
      "epoch": 0.4470309504533395,
      "grad_norm": 2.984375,
      "learning_rate": 4.7277324755770515e-05,
      "loss": 1.0223,
      "step": 127550
    },
    {
      "epoch": 0.4470659979602351,
      "grad_norm": 3.203125,
      "learning_rate": 4.727667572710681e-05,
      "loss": 0.9685,
      "step": 127560
    },
    {
      "epoch": 0.4471010454671307,
      "grad_norm": 3.09375,
      "learning_rate": 4.727602669844311e-05,
      "loss": 0.8319,
      "step": 127570
    },
    {
      "epoch": 0.4471360929740263,
      "grad_norm": 3.109375,
      "learning_rate": 4.7275377669779406e-05,
      "loss": 0.9802,
      "step": 127580
    },
    {
      "epoch": 0.4471711404809219,
      "grad_norm": 3.015625,
      "learning_rate": 4.727472864111571e-05,
      "loss": 0.822,
      "step": 127590
    },
    {
      "epoch": 0.44720618798781747,
      "grad_norm": 3.296875,
      "learning_rate": 4.7274079612452e-05,
      "loss": 0.9809,
      "step": 127600
    },
    {
      "epoch": 0.44724123549471306,
      "grad_norm": 3.125,
      "learning_rate": 4.72734305837883e-05,
      "loss": 0.9396,
      "step": 127610
    },
    {
      "epoch": 0.44727628300160865,
      "grad_norm": 3.109375,
      "learning_rate": 4.7272781555124605e-05,
      "loss": 0.8977,
      "step": 127620
    },
    {
      "epoch": 0.4473113305085043,
      "grad_norm": 3.21875,
      "learning_rate": 4.72721325264609e-05,
      "loss": 0.8719,
      "step": 127630
    },
    {
      "epoch": 0.4473463780153999,
      "grad_norm": 3.03125,
      "learning_rate": 4.72714834977972e-05,
      "loss": 0.932,
      "step": 127640
    },
    {
      "epoch": 0.4473814255222955,
      "grad_norm": 3.015625,
      "learning_rate": 4.7270834469133495e-05,
      "loss": 1.0138,
      "step": 127650
    },
    {
      "epoch": 0.4474164730291911,
      "grad_norm": 3.015625,
      "learning_rate": 4.7270185440469797e-05,
      "loss": 0.9376,
      "step": 127660
    },
    {
      "epoch": 0.44745152053608667,
      "grad_norm": 3.140625,
      "learning_rate": 4.726953641180609e-05,
      "loss": 0.9576,
      "step": 127670
    },
    {
      "epoch": 0.44748656804298226,
      "grad_norm": 3.234375,
      "learning_rate": 4.726888738314239e-05,
      "loss": 1.0518,
      "step": 127680
    },
    {
      "epoch": 0.44752161554987785,
      "grad_norm": 3.015625,
      "learning_rate": 4.726823835447869e-05,
      "loss": 0.9119,
      "step": 127690
    },
    {
      "epoch": 0.44755666305677344,
      "grad_norm": 2.734375,
      "learning_rate": 4.726758932581499e-05,
      "loss": 0.9059,
      "step": 127700
    },
    {
      "epoch": 0.44759171056366903,
      "grad_norm": 2.71875,
      "learning_rate": 4.726694029715128e-05,
      "loss": 0.9048,
      "step": 127710
    },
    {
      "epoch": 0.4476267580705646,
      "grad_norm": 2.9375,
      "learning_rate": 4.7266291268487585e-05,
      "loss": 0.9621,
      "step": 127720
    },
    {
      "epoch": 0.44766180557746027,
      "grad_norm": 2.703125,
      "learning_rate": 4.726564223982388e-05,
      "loss": 0.9109,
      "step": 127730
    },
    {
      "epoch": 0.44769685308435586,
      "grad_norm": 3.390625,
      "learning_rate": 4.726499321116018e-05,
      "loss": 0.9867,
      "step": 127740
    },
    {
      "epoch": 0.44773190059125145,
      "grad_norm": 2.46875,
      "learning_rate": 4.7264344182496475e-05,
      "loss": 0.8603,
      "step": 127750
    },
    {
      "epoch": 0.44776694809814704,
      "grad_norm": 2.75,
      "learning_rate": 4.7263695153832777e-05,
      "loss": 0.9329,
      "step": 127760
    },
    {
      "epoch": 0.44780199560504264,
      "grad_norm": 2.921875,
      "learning_rate": 4.726304612516908e-05,
      "loss": 1.0174,
      "step": 127770
    },
    {
      "epoch": 0.4478370431119382,
      "grad_norm": 3.3125,
      "learning_rate": 4.726239709650537e-05,
      "loss": 1.0478,
      "step": 127780
    },
    {
      "epoch": 0.4478720906188338,
      "grad_norm": 2.828125,
      "learning_rate": 4.7261748067841674e-05,
      "loss": 0.9434,
      "step": 127790
    },
    {
      "epoch": 0.4479071381257294,
      "grad_norm": 2.59375,
      "learning_rate": 4.726109903917797e-05,
      "loss": 0.8946,
      "step": 127800
    },
    {
      "epoch": 0.447942185632625,
      "grad_norm": 2.921875,
      "learning_rate": 4.726045001051427e-05,
      "loss": 0.8046,
      "step": 127810
    },
    {
      "epoch": 0.44797723313952065,
      "grad_norm": 3.0,
      "learning_rate": 4.7259800981850565e-05,
      "loss": 0.9471,
      "step": 127820
    },
    {
      "epoch": 0.44801228064641624,
      "grad_norm": 3.3125,
      "learning_rate": 4.7259151953186866e-05,
      "loss": 0.9073,
      "step": 127830
    },
    {
      "epoch": 0.44804732815331183,
      "grad_norm": 2.90625,
      "learning_rate": 4.725850292452316e-05,
      "loss": 0.8938,
      "step": 127840
    },
    {
      "epoch": 0.4480823756602074,
      "grad_norm": 3.0625,
      "learning_rate": 4.725785389585946e-05,
      "loss": 0.9391,
      "step": 127850
    },
    {
      "epoch": 0.448117423167103,
      "grad_norm": 3.0625,
      "learning_rate": 4.7257204867195757e-05,
      "loss": 0.9494,
      "step": 127860
    },
    {
      "epoch": 0.4481524706739986,
      "grad_norm": 2.953125,
      "learning_rate": 4.725655583853206e-05,
      "loss": 0.9558,
      "step": 127870
    },
    {
      "epoch": 0.4481875181808942,
      "grad_norm": 3.125,
      "learning_rate": 4.725590680986835e-05,
      "loss": 1.0269,
      "step": 127880
    },
    {
      "epoch": 0.4482225656877898,
      "grad_norm": 3.375,
      "learning_rate": 4.7255257781204654e-05,
      "loss": 1.0185,
      "step": 127890
    },
    {
      "epoch": 0.4482576131946854,
      "grad_norm": 3.5,
      "learning_rate": 4.7254608752540955e-05,
      "loss": 0.9295,
      "step": 127900
    },
    {
      "epoch": 0.44829266070158097,
      "grad_norm": 2.8125,
      "learning_rate": 4.725395972387725e-05,
      "loss": 0.9006,
      "step": 127910
    },
    {
      "epoch": 0.4483277082084766,
      "grad_norm": 2.734375,
      "learning_rate": 4.7253310695213545e-05,
      "loss": 0.9166,
      "step": 127920
    },
    {
      "epoch": 0.4483627557153722,
      "grad_norm": 2.875,
      "learning_rate": 4.725266166654984e-05,
      "loss": 0.9747,
      "step": 127930
    },
    {
      "epoch": 0.4483978032222678,
      "grad_norm": 3.015625,
      "learning_rate": 4.725201263788614e-05,
      "loss": 0.9536,
      "step": 127940
    },
    {
      "epoch": 0.4484328507291634,
      "grad_norm": 3.109375,
      "learning_rate": 4.7251363609222435e-05,
      "loss": 0.9738,
      "step": 127950
    },
    {
      "epoch": 0.448467898236059,
      "grad_norm": 3.015625,
      "learning_rate": 4.7250714580558737e-05,
      "loss": 1.004,
      "step": 127960
    },
    {
      "epoch": 0.4485029457429546,
      "grad_norm": 2.84375,
      "learning_rate": 4.725006555189503e-05,
      "loss": 0.9036,
      "step": 127970
    },
    {
      "epoch": 0.44853799324985016,
      "grad_norm": 2.921875,
      "learning_rate": 4.724941652323133e-05,
      "loss": 0.9471,
      "step": 127980
    },
    {
      "epoch": 0.44857304075674576,
      "grad_norm": 3.15625,
      "learning_rate": 4.7248767494567634e-05,
      "loss": 1.0568,
      "step": 127990
    },
    {
      "epoch": 0.44860808826364135,
      "grad_norm": 3.28125,
      "learning_rate": 4.724811846590393e-05,
      "loss": 1.0383,
      "step": 128000
    },
    {
      "epoch": 0.44864313577053694,
      "grad_norm": 3.390625,
      "learning_rate": 4.724746943724023e-05,
      "loss": 0.9087,
      "step": 128010
    },
    {
      "epoch": 0.4486781832774326,
      "grad_norm": 3.21875,
      "learning_rate": 4.7246820408576525e-05,
      "loss": 0.8997,
      "step": 128020
    },
    {
      "epoch": 0.4487132307843282,
      "grad_norm": 2.859375,
      "learning_rate": 4.7246171379912826e-05,
      "loss": 0.8674,
      "step": 128030
    },
    {
      "epoch": 0.44874827829122377,
      "grad_norm": 3.0625,
      "learning_rate": 4.724552235124912e-05,
      "loss": 0.9713,
      "step": 128040
    },
    {
      "epoch": 0.44878332579811936,
      "grad_norm": 2.828125,
      "learning_rate": 4.724487332258542e-05,
      "loss": 0.8983,
      "step": 128050
    },
    {
      "epoch": 0.44881837330501495,
      "grad_norm": 2.46875,
      "learning_rate": 4.7244224293921717e-05,
      "loss": 0.9387,
      "step": 128060
    },
    {
      "epoch": 0.44885342081191054,
      "grad_norm": 2.984375,
      "learning_rate": 4.724357526525802e-05,
      "loss": 0.9268,
      "step": 128070
    },
    {
      "epoch": 0.44888846831880613,
      "grad_norm": 8.3125,
      "learning_rate": 4.724292623659431e-05,
      "loss": 0.8376,
      "step": 128080
    },
    {
      "epoch": 0.4489235158257017,
      "grad_norm": 3.375,
      "learning_rate": 4.7242277207930614e-05,
      "loss": 0.9675,
      "step": 128090
    },
    {
      "epoch": 0.4489585633325973,
      "grad_norm": 2.8125,
      "learning_rate": 4.724162817926691e-05,
      "loss": 0.965,
      "step": 128100
    },
    {
      "epoch": 0.4489936108394929,
      "grad_norm": 2.921875,
      "learning_rate": 4.724097915060321e-05,
      "loss": 0.9386,
      "step": 128110
    },
    {
      "epoch": 0.44902865834638855,
      "grad_norm": 3.0625,
      "learning_rate": 4.7240330121939505e-05,
      "loss": 0.941,
      "step": 128120
    },
    {
      "epoch": 0.44906370585328415,
      "grad_norm": 3.40625,
      "learning_rate": 4.7239681093275806e-05,
      "loss": 0.9364,
      "step": 128130
    },
    {
      "epoch": 0.44909875336017974,
      "grad_norm": 2.3125,
      "learning_rate": 4.723903206461211e-05,
      "loss": 0.998,
      "step": 128140
    },
    {
      "epoch": 0.44913380086707533,
      "grad_norm": 3.375,
      "learning_rate": 4.72383830359484e-05,
      "loss": 0.9072,
      "step": 128150
    },
    {
      "epoch": 0.4491688483739709,
      "grad_norm": 2.9375,
      "learning_rate": 4.72377340072847e-05,
      "loss": 0.9955,
      "step": 128160
    },
    {
      "epoch": 0.4492038958808665,
      "grad_norm": 3.125,
      "learning_rate": 4.7237084978621e-05,
      "loss": 0.9166,
      "step": 128170
    },
    {
      "epoch": 0.4492389433877621,
      "grad_norm": 3.234375,
      "learning_rate": 4.72364359499573e-05,
      "loss": 0.9655,
      "step": 128180
    },
    {
      "epoch": 0.4492739908946577,
      "grad_norm": 2.796875,
      "learning_rate": 4.7235786921293594e-05,
      "loss": 0.9468,
      "step": 128190
    },
    {
      "epoch": 0.4493090384015533,
      "grad_norm": 3.25,
      "learning_rate": 4.7235137892629895e-05,
      "loss": 1.0158,
      "step": 128200
    },
    {
      "epoch": 0.4493440859084489,
      "grad_norm": 2.96875,
      "learning_rate": 4.723448886396619e-05,
      "loss": 0.8933,
      "step": 128210
    },
    {
      "epoch": 0.4493791334153445,
      "grad_norm": 3.0625,
      "learning_rate": 4.723383983530249e-05,
      "loss": 0.9379,
      "step": 128220
    },
    {
      "epoch": 0.4494141809222401,
      "grad_norm": 3.265625,
      "learning_rate": 4.7233190806638786e-05,
      "loss": 0.9417,
      "step": 128230
    },
    {
      "epoch": 0.4494492284291357,
      "grad_norm": 3.203125,
      "learning_rate": 4.723254177797509e-05,
      "loss": 0.9512,
      "step": 128240
    },
    {
      "epoch": 0.4494842759360313,
      "grad_norm": 3.1875,
      "learning_rate": 4.723189274931138e-05,
      "loss": 0.9655,
      "step": 128250
    },
    {
      "epoch": 0.4495193234429269,
      "grad_norm": 2.84375,
      "learning_rate": 4.723124372064768e-05,
      "loss": 0.9806,
      "step": 128260
    },
    {
      "epoch": 0.4495543709498225,
      "grad_norm": 3.1875,
      "learning_rate": 4.7230594691983985e-05,
      "loss": 0.9326,
      "step": 128270
    },
    {
      "epoch": 0.44958941845671807,
      "grad_norm": 3.078125,
      "learning_rate": 4.722994566332028e-05,
      "loss": 1.0225,
      "step": 128280
    },
    {
      "epoch": 0.44962446596361366,
      "grad_norm": 2.890625,
      "learning_rate": 4.722929663465658e-05,
      "loss": 0.9127,
      "step": 128290
    },
    {
      "epoch": 0.44965951347050925,
      "grad_norm": 3.390625,
      "learning_rate": 4.722864760599287e-05,
      "loss": 0.9275,
      "step": 128300
    },
    {
      "epoch": 0.44969456097740484,
      "grad_norm": 3.296875,
      "learning_rate": 4.722799857732917e-05,
      "loss": 0.9302,
      "step": 128310
    },
    {
      "epoch": 0.4497296084843005,
      "grad_norm": 3.0,
      "learning_rate": 4.7227349548665465e-05,
      "loss": 0.9358,
      "step": 128320
    },
    {
      "epoch": 0.4497646559911961,
      "grad_norm": 2.859375,
      "learning_rate": 4.7226700520001766e-05,
      "loss": 0.9412,
      "step": 128330
    },
    {
      "epoch": 0.4497997034980917,
      "grad_norm": 2.71875,
      "learning_rate": 4.722605149133806e-05,
      "loss": 0.9441,
      "step": 128340
    },
    {
      "epoch": 0.44983475100498727,
      "grad_norm": 3.15625,
      "learning_rate": 4.722540246267436e-05,
      "loss": 1.0159,
      "step": 128350
    },
    {
      "epoch": 0.44986979851188286,
      "grad_norm": 2.859375,
      "learning_rate": 4.722475343401066e-05,
      "loss": 0.9192,
      "step": 128360
    },
    {
      "epoch": 0.44990484601877845,
      "grad_norm": 3.59375,
      "learning_rate": 4.722410440534696e-05,
      "loss": 1.0088,
      "step": 128370
    },
    {
      "epoch": 0.44993989352567404,
      "grad_norm": 2.984375,
      "learning_rate": 4.722345537668326e-05,
      "loss": 0.8978,
      "step": 128380
    },
    {
      "epoch": 0.44997494103256963,
      "grad_norm": 2.890625,
      "learning_rate": 4.7222806348019554e-05,
      "loss": 0.8982,
      "step": 128390
    },
    {
      "epoch": 0.4500099885394652,
      "grad_norm": 2.9375,
      "learning_rate": 4.7222157319355855e-05,
      "loss": 0.9155,
      "step": 128400
    },
    {
      "epoch": 0.45004503604636087,
      "grad_norm": 3.0625,
      "learning_rate": 4.722150829069215e-05,
      "loss": 1.0034,
      "step": 128410
    },
    {
      "epoch": 0.45008008355325646,
      "grad_norm": 3.0625,
      "learning_rate": 4.722085926202845e-05,
      "loss": 1.0148,
      "step": 128420
    },
    {
      "epoch": 0.45011513106015205,
      "grad_norm": 3.375,
      "learning_rate": 4.7220210233364746e-05,
      "loss": 1.0631,
      "step": 128430
    },
    {
      "epoch": 0.45015017856704764,
      "grad_norm": 2.671875,
      "learning_rate": 4.721956120470105e-05,
      "loss": 0.8162,
      "step": 128440
    },
    {
      "epoch": 0.45018522607394323,
      "grad_norm": 3.03125,
      "learning_rate": 4.721891217603734e-05,
      "loss": 0.9793,
      "step": 128450
    },
    {
      "epoch": 0.4502202735808388,
      "grad_norm": 3.03125,
      "learning_rate": 4.721826314737364e-05,
      "loss": 0.9913,
      "step": 128460
    },
    {
      "epoch": 0.4502553210877344,
      "grad_norm": 2.78125,
      "learning_rate": 4.721761411870994e-05,
      "loss": 0.9387,
      "step": 128470
    },
    {
      "epoch": 0.45029036859463,
      "grad_norm": 3.671875,
      "learning_rate": 4.721696509004624e-05,
      "loss": 0.8946,
      "step": 128480
    },
    {
      "epoch": 0.4503254161015256,
      "grad_norm": 2.9375,
      "learning_rate": 4.7216316061382534e-05,
      "loss": 0.9272,
      "step": 128490
    },
    {
      "epoch": 0.4503604636084212,
      "grad_norm": 3.203125,
      "learning_rate": 4.7215667032718835e-05,
      "loss": 0.9354,
      "step": 128500
    },
    {
      "epoch": 0.45039551111531684,
      "grad_norm": 3.015625,
      "learning_rate": 4.721501800405514e-05,
      "loss": 0.9194,
      "step": 128510
    },
    {
      "epoch": 0.45043055862221243,
      "grad_norm": 3.109375,
      "learning_rate": 4.721436897539143e-05,
      "loss": 0.9414,
      "step": 128520
    },
    {
      "epoch": 0.450465606129108,
      "grad_norm": 2.609375,
      "learning_rate": 4.721371994672773e-05,
      "loss": 0.9195,
      "step": 128530
    },
    {
      "epoch": 0.4505006536360036,
      "grad_norm": 3.390625,
      "learning_rate": 4.721307091806403e-05,
      "loss": 1.0003,
      "step": 128540
    },
    {
      "epoch": 0.4505357011428992,
      "grad_norm": 3.03125,
      "learning_rate": 4.721242188940033e-05,
      "loss": 0.9533,
      "step": 128550
    },
    {
      "epoch": 0.4505707486497948,
      "grad_norm": 3.03125,
      "learning_rate": 4.721177286073662e-05,
      "loss": 1.0568,
      "step": 128560
    },
    {
      "epoch": 0.4506057961566904,
      "grad_norm": 3.359375,
      "learning_rate": 4.7211123832072925e-05,
      "loss": 0.9529,
      "step": 128570
    },
    {
      "epoch": 0.450640843663586,
      "grad_norm": 3.171875,
      "learning_rate": 4.721047480340922e-05,
      "loss": 0.9736,
      "step": 128580
    },
    {
      "epoch": 0.45067589117048157,
      "grad_norm": 2.984375,
      "learning_rate": 4.720982577474552e-05,
      "loss": 0.9709,
      "step": 128590
    },
    {
      "epoch": 0.45071093867737716,
      "grad_norm": 2.625,
      "learning_rate": 4.7209176746081815e-05,
      "loss": 0.866,
      "step": 128600
    },
    {
      "epoch": 0.4507459861842728,
      "grad_norm": 2.90625,
      "learning_rate": 4.720852771741812e-05,
      "loss": 0.9717,
      "step": 128610
    },
    {
      "epoch": 0.4507810336911684,
      "grad_norm": 2.953125,
      "learning_rate": 4.720787868875441e-05,
      "loss": 0.8881,
      "step": 128620
    },
    {
      "epoch": 0.450816081198064,
      "grad_norm": 3.171875,
      "learning_rate": 4.720722966009071e-05,
      "loss": 0.913,
      "step": 128630
    },
    {
      "epoch": 0.4508511287049596,
      "grad_norm": 2.875,
      "learning_rate": 4.7206580631427014e-05,
      "loss": 0.9836,
      "step": 128640
    },
    {
      "epoch": 0.45088617621185517,
      "grad_norm": 3.109375,
      "learning_rate": 4.720593160276331e-05,
      "loss": 0.9123,
      "step": 128650
    },
    {
      "epoch": 0.45092122371875076,
      "grad_norm": 3.109375,
      "learning_rate": 4.720528257409961e-05,
      "loss": 0.9632,
      "step": 128660
    },
    {
      "epoch": 0.45095627122564635,
      "grad_norm": 3.75,
      "learning_rate": 4.7204633545435905e-05,
      "loss": 0.935,
      "step": 128670
    },
    {
      "epoch": 0.45099131873254195,
      "grad_norm": 2.84375,
      "learning_rate": 4.72039845167722e-05,
      "loss": 0.9403,
      "step": 128680
    },
    {
      "epoch": 0.45102636623943754,
      "grad_norm": 2.90625,
      "learning_rate": 4.7203335488108494e-05,
      "loss": 0.9406,
      "step": 128690
    },
    {
      "epoch": 0.45106141374633313,
      "grad_norm": 3.109375,
      "learning_rate": 4.7202686459444795e-05,
      "loss": 0.9763,
      "step": 128700
    },
    {
      "epoch": 0.4510964612532288,
      "grad_norm": 2.828125,
      "learning_rate": 4.720203743078109e-05,
      "loss": 0.9879,
      "step": 128710
    },
    {
      "epoch": 0.45113150876012437,
      "grad_norm": 3.15625,
      "learning_rate": 4.720138840211739e-05,
      "loss": 0.9665,
      "step": 128720
    },
    {
      "epoch": 0.45116655626701996,
      "grad_norm": 2.859375,
      "learning_rate": 4.720073937345369e-05,
      "loss": 0.9421,
      "step": 128730
    },
    {
      "epoch": 0.45120160377391555,
      "grad_norm": 3.046875,
      "learning_rate": 4.720009034478999e-05,
      "loss": 1.0359,
      "step": 128740
    },
    {
      "epoch": 0.45123665128081114,
      "grad_norm": 2.9375,
      "learning_rate": 4.719944131612629e-05,
      "loss": 0.8926,
      "step": 128750
    },
    {
      "epoch": 0.45127169878770673,
      "grad_norm": 3.0625,
      "learning_rate": 4.719879228746258e-05,
      "loss": 0.8741,
      "step": 128760
    },
    {
      "epoch": 0.4513067462946023,
      "grad_norm": 2.765625,
      "learning_rate": 4.7198143258798885e-05,
      "loss": 0.9276,
      "step": 128770
    },
    {
      "epoch": 0.4513417938014979,
      "grad_norm": 2.96875,
      "learning_rate": 4.719749423013518e-05,
      "loss": 0.9304,
      "step": 128780
    },
    {
      "epoch": 0.4513768413083935,
      "grad_norm": 2.859375,
      "learning_rate": 4.719684520147148e-05,
      "loss": 0.9751,
      "step": 128790
    },
    {
      "epoch": 0.4514118888152891,
      "grad_norm": 2.78125,
      "learning_rate": 4.7196196172807775e-05,
      "loss": 0.9704,
      "step": 128800
    },
    {
      "epoch": 0.45144693632218474,
      "grad_norm": 2.59375,
      "learning_rate": 4.719554714414408e-05,
      "loss": 0.9911,
      "step": 128810
    },
    {
      "epoch": 0.45148198382908034,
      "grad_norm": 3.28125,
      "learning_rate": 4.719489811548037e-05,
      "loss": 0.8837,
      "step": 128820
    },
    {
      "epoch": 0.4515170313359759,
      "grad_norm": 3.03125,
      "learning_rate": 4.719424908681667e-05,
      "loss": 0.9268,
      "step": 128830
    },
    {
      "epoch": 0.4515520788428715,
      "grad_norm": 3.25,
      "learning_rate": 4.719360005815297e-05,
      "loss": 1.0215,
      "step": 128840
    },
    {
      "epoch": 0.4515871263497671,
      "grad_norm": 3.515625,
      "learning_rate": 4.719295102948927e-05,
      "loss": 0.9459,
      "step": 128850
    },
    {
      "epoch": 0.4516221738566627,
      "grad_norm": 2.9375,
      "learning_rate": 4.719230200082557e-05,
      "loss": 0.9806,
      "step": 128860
    },
    {
      "epoch": 0.4516572213635583,
      "grad_norm": 2.828125,
      "learning_rate": 4.7191652972161865e-05,
      "loss": 0.8499,
      "step": 128870
    },
    {
      "epoch": 0.4516922688704539,
      "grad_norm": 3.125,
      "learning_rate": 4.7191003943498166e-05,
      "loss": 0.9183,
      "step": 128880
    },
    {
      "epoch": 0.4517273163773495,
      "grad_norm": 2.96875,
      "learning_rate": 4.719035491483446e-05,
      "loss": 0.8966,
      "step": 128890
    },
    {
      "epoch": 0.4517623638842451,
      "grad_norm": 2.890625,
      "learning_rate": 4.718970588617076e-05,
      "loss": 1.0266,
      "step": 128900
    },
    {
      "epoch": 0.4517974113911407,
      "grad_norm": 3.265625,
      "learning_rate": 4.718905685750706e-05,
      "loss": 0.9323,
      "step": 128910
    },
    {
      "epoch": 0.4518324588980363,
      "grad_norm": 2.671875,
      "learning_rate": 4.718840782884336e-05,
      "loss": 0.9103,
      "step": 128920
    },
    {
      "epoch": 0.4518675064049319,
      "grad_norm": 2.734375,
      "learning_rate": 4.718775880017965e-05,
      "loss": 0.8974,
      "step": 128930
    },
    {
      "epoch": 0.4519025539118275,
      "grad_norm": 3.3125,
      "learning_rate": 4.7187109771515954e-05,
      "loss": 0.9713,
      "step": 128940
    },
    {
      "epoch": 0.4519376014187231,
      "grad_norm": 3.328125,
      "learning_rate": 4.718646074285225e-05,
      "loss": 0.9124,
      "step": 128950
    },
    {
      "epoch": 0.45197264892561867,
      "grad_norm": 3.140625,
      "learning_rate": 4.718581171418855e-05,
      "loss": 0.8541,
      "step": 128960
    },
    {
      "epoch": 0.45200769643251426,
      "grad_norm": 3.390625,
      "learning_rate": 4.7185162685524845e-05,
      "loss": 0.9614,
      "step": 128970
    },
    {
      "epoch": 0.45204274393940985,
      "grad_norm": 2.703125,
      "learning_rate": 4.7184513656861146e-05,
      "loss": 0.8689,
      "step": 128980
    },
    {
      "epoch": 0.45207779144630544,
      "grad_norm": 3.1875,
      "learning_rate": 4.718386462819744e-05,
      "loss": 0.8908,
      "step": 128990
    },
    {
      "epoch": 0.4521128389532011,
      "grad_norm": 3.453125,
      "learning_rate": 4.718321559953374e-05,
      "loss": 0.8304,
      "step": 129000
    },
    {
      "epoch": 0.4521478864600967,
      "grad_norm": 3.109375,
      "learning_rate": 4.7182566570870043e-05,
      "loss": 1.0286,
      "step": 129010
    },
    {
      "epoch": 0.4521829339669923,
      "grad_norm": 3.109375,
      "learning_rate": 4.718191754220634e-05,
      "loss": 0.9771,
      "step": 129020
    },
    {
      "epoch": 0.45221798147388786,
      "grad_norm": 3.375,
      "learning_rate": 4.718126851354264e-05,
      "loss": 1.0525,
      "step": 129030
    },
    {
      "epoch": 0.45225302898078346,
      "grad_norm": 2.9375,
      "learning_rate": 4.7180619484878934e-05,
      "loss": 0.8411,
      "step": 129040
    },
    {
      "epoch": 0.45228807648767905,
      "grad_norm": 3.046875,
      "learning_rate": 4.717997045621523e-05,
      "loss": 1.0439,
      "step": 129050
    },
    {
      "epoch": 0.45232312399457464,
      "grad_norm": 3.25,
      "learning_rate": 4.717932142755152e-05,
      "loss": 0.9088,
      "step": 129060
    },
    {
      "epoch": 0.45235817150147023,
      "grad_norm": 2.9375,
      "learning_rate": 4.7178672398887825e-05,
      "loss": 0.9145,
      "step": 129070
    },
    {
      "epoch": 0.4523932190083658,
      "grad_norm": 2.640625,
      "learning_rate": 4.717802337022412e-05,
      "loss": 0.8528,
      "step": 129080
    },
    {
      "epoch": 0.4524282665152614,
      "grad_norm": 2.796875,
      "learning_rate": 4.717737434156042e-05,
      "loss": 0.925,
      "step": 129090
    },
    {
      "epoch": 0.45246331402215706,
      "grad_norm": 2.890625,
      "learning_rate": 4.717672531289672e-05,
      "loss": 0.9045,
      "step": 129100
    },
    {
      "epoch": 0.45249836152905265,
      "grad_norm": 2.671875,
      "learning_rate": 4.717607628423302e-05,
      "loss": 0.9645,
      "step": 129110
    },
    {
      "epoch": 0.45253340903594824,
      "grad_norm": 3.703125,
      "learning_rate": 4.717542725556932e-05,
      "loss": 1.0218,
      "step": 129120
    },
    {
      "epoch": 0.45256845654284383,
      "grad_norm": 3.0625,
      "learning_rate": 4.717477822690561e-05,
      "loss": 0.9227,
      "step": 129130
    },
    {
      "epoch": 0.4526035040497394,
      "grad_norm": 3.4375,
      "learning_rate": 4.7174129198241914e-05,
      "loss": 0.9003,
      "step": 129140
    },
    {
      "epoch": 0.452638551556635,
      "grad_norm": 3.125,
      "learning_rate": 4.717348016957821e-05,
      "loss": 0.9243,
      "step": 129150
    },
    {
      "epoch": 0.4526735990635306,
      "grad_norm": 3.0625,
      "learning_rate": 4.717283114091451e-05,
      "loss": 0.9084,
      "step": 129160
    },
    {
      "epoch": 0.4527086465704262,
      "grad_norm": 3.28125,
      "learning_rate": 4.7172182112250805e-05,
      "loss": 0.9438,
      "step": 129170
    },
    {
      "epoch": 0.4527436940773218,
      "grad_norm": 3.09375,
      "learning_rate": 4.7171533083587106e-05,
      "loss": 0.9298,
      "step": 129180
    },
    {
      "epoch": 0.4527787415842174,
      "grad_norm": 3.09375,
      "learning_rate": 4.71708840549234e-05,
      "loss": 0.9893,
      "step": 129190
    },
    {
      "epoch": 0.45281378909111303,
      "grad_norm": 3.515625,
      "learning_rate": 4.71702350262597e-05,
      "loss": 0.9678,
      "step": 129200
    },
    {
      "epoch": 0.4528488365980086,
      "grad_norm": 3.015625,
      "learning_rate": 4.7169585997596e-05,
      "loss": 0.8911,
      "step": 129210
    },
    {
      "epoch": 0.4528838841049042,
      "grad_norm": 3.359375,
      "learning_rate": 4.71689369689323e-05,
      "loss": 0.9049,
      "step": 129220
    },
    {
      "epoch": 0.4529189316117998,
      "grad_norm": 2.921875,
      "learning_rate": 4.71682879402686e-05,
      "loss": 1.0498,
      "step": 129230
    },
    {
      "epoch": 0.4529539791186954,
      "grad_norm": 2.953125,
      "learning_rate": 4.7167638911604894e-05,
      "loss": 0.9683,
      "step": 129240
    },
    {
      "epoch": 0.452989026625591,
      "grad_norm": 2.9375,
      "learning_rate": 4.7166989882941195e-05,
      "loss": 0.9715,
      "step": 129250
    },
    {
      "epoch": 0.4530240741324866,
      "grad_norm": 3.03125,
      "learning_rate": 4.716634085427749e-05,
      "loss": 0.8793,
      "step": 129260
    },
    {
      "epoch": 0.45305912163938217,
      "grad_norm": 2.484375,
      "learning_rate": 4.716569182561379e-05,
      "loss": 0.898,
      "step": 129270
    },
    {
      "epoch": 0.45309416914627776,
      "grad_norm": 2.765625,
      "learning_rate": 4.7165042796950086e-05,
      "loss": 0.9789,
      "step": 129280
    },
    {
      "epoch": 0.45312921665317335,
      "grad_norm": 3.359375,
      "learning_rate": 4.716439376828639e-05,
      "loss": 0.984,
      "step": 129290
    },
    {
      "epoch": 0.453164264160069,
      "grad_norm": 2.765625,
      "learning_rate": 4.716374473962268e-05,
      "loss": 0.8847,
      "step": 129300
    },
    {
      "epoch": 0.4531993116669646,
      "grad_norm": 3.03125,
      "learning_rate": 4.7163095710958983e-05,
      "loss": 0.8954,
      "step": 129310
    },
    {
      "epoch": 0.4532343591738602,
      "grad_norm": 2.90625,
      "learning_rate": 4.716244668229528e-05,
      "loss": 0.9669,
      "step": 129320
    },
    {
      "epoch": 0.45326940668075577,
      "grad_norm": 3.71875,
      "learning_rate": 4.716179765363158e-05,
      "loss": 0.9472,
      "step": 129330
    },
    {
      "epoch": 0.45330445418765136,
      "grad_norm": 3.125,
      "learning_rate": 4.7161148624967874e-05,
      "loss": 0.965,
      "step": 129340
    },
    {
      "epoch": 0.45333950169454695,
      "grad_norm": 3.125,
      "learning_rate": 4.7160499596304175e-05,
      "loss": 0.9576,
      "step": 129350
    },
    {
      "epoch": 0.45337454920144254,
      "grad_norm": 3.125,
      "learning_rate": 4.715985056764047e-05,
      "loss": 0.9455,
      "step": 129360
    },
    {
      "epoch": 0.45340959670833814,
      "grad_norm": 3.03125,
      "learning_rate": 4.715920153897677e-05,
      "loss": 0.9852,
      "step": 129370
    },
    {
      "epoch": 0.4534446442152337,
      "grad_norm": 3.234375,
      "learning_rate": 4.715855251031307e-05,
      "loss": 0.987,
      "step": 129380
    },
    {
      "epoch": 0.4534796917221293,
      "grad_norm": 3.265625,
      "learning_rate": 4.715790348164937e-05,
      "loss": 0.9009,
      "step": 129390
    },
    {
      "epoch": 0.45351473922902497,
      "grad_norm": 3.1875,
      "learning_rate": 4.715725445298567e-05,
      "loss": 0.8392,
      "step": 129400
    },
    {
      "epoch": 0.45354978673592056,
      "grad_norm": 3.28125,
      "learning_rate": 4.7156605424321963e-05,
      "loss": 1.0329,
      "step": 129410
    },
    {
      "epoch": 0.45358483424281615,
      "grad_norm": 3.15625,
      "learning_rate": 4.7155956395658265e-05,
      "loss": 0.877,
      "step": 129420
    },
    {
      "epoch": 0.45361988174971174,
      "grad_norm": 3.015625,
      "learning_rate": 4.715530736699455e-05,
      "loss": 0.894,
      "step": 129430
    },
    {
      "epoch": 0.45365492925660733,
      "grad_norm": 2.859375,
      "learning_rate": 4.7154658338330854e-05,
      "loss": 0.868,
      "step": 129440
    },
    {
      "epoch": 0.4536899767635029,
      "grad_norm": 2.5625,
      "learning_rate": 4.715400930966715e-05,
      "loss": 0.8546,
      "step": 129450
    },
    {
      "epoch": 0.4537250242703985,
      "grad_norm": 3.078125,
      "learning_rate": 4.715336028100345e-05,
      "loss": 0.9691,
      "step": 129460
    },
    {
      "epoch": 0.4537600717772941,
      "grad_norm": 2.78125,
      "learning_rate": 4.715271125233975e-05,
      "loss": 0.9413,
      "step": 129470
    },
    {
      "epoch": 0.4537951192841897,
      "grad_norm": 3.0625,
      "learning_rate": 4.7152062223676046e-05,
      "loss": 0.9511,
      "step": 129480
    },
    {
      "epoch": 0.45383016679108534,
      "grad_norm": 3.1875,
      "learning_rate": 4.715141319501235e-05,
      "loss": 1.0286,
      "step": 129490
    },
    {
      "epoch": 0.45386521429798093,
      "grad_norm": 2.640625,
      "learning_rate": 4.715076416634864e-05,
      "loss": 0.9225,
      "step": 129500
    },
    {
      "epoch": 0.4539002618048765,
      "grad_norm": 3.34375,
      "learning_rate": 4.7150115137684943e-05,
      "loss": 0.9302,
      "step": 129510
    },
    {
      "epoch": 0.4539353093117721,
      "grad_norm": 2.984375,
      "learning_rate": 4.714946610902124e-05,
      "loss": 1.0032,
      "step": 129520
    },
    {
      "epoch": 0.4539703568186677,
      "grad_norm": 2.9375,
      "learning_rate": 4.714881708035754e-05,
      "loss": 0.8732,
      "step": 129530
    },
    {
      "epoch": 0.4540054043255633,
      "grad_norm": 3.046875,
      "learning_rate": 4.7148168051693834e-05,
      "loss": 0.9282,
      "step": 129540
    },
    {
      "epoch": 0.4540404518324589,
      "grad_norm": 2.5625,
      "learning_rate": 4.7147519023030135e-05,
      "loss": 0.9134,
      "step": 129550
    },
    {
      "epoch": 0.4540754993393545,
      "grad_norm": 2.765625,
      "learning_rate": 4.714686999436643e-05,
      "loss": 0.9596,
      "step": 129560
    },
    {
      "epoch": 0.4541105468462501,
      "grad_norm": 3.28125,
      "learning_rate": 4.714622096570273e-05,
      "loss": 0.9734,
      "step": 129570
    },
    {
      "epoch": 0.45414559435314567,
      "grad_norm": 3.375,
      "learning_rate": 4.7145571937039026e-05,
      "loss": 0.9067,
      "step": 129580
    },
    {
      "epoch": 0.4541806418600413,
      "grad_norm": 2.671875,
      "learning_rate": 4.714492290837533e-05,
      "loss": 0.9296,
      "step": 129590
    },
    {
      "epoch": 0.4542156893669369,
      "grad_norm": 3.359375,
      "learning_rate": 4.714427387971163e-05,
      "loss": 0.8332,
      "step": 129600
    },
    {
      "epoch": 0.4542507368738325,
      "grad_norm": 2.875,
      "learning_rate": 4.7143624851047923e-05,
      "loss": 0.9444,
      "step": 129610
    },
    {
      "epoch": 0.4542857843807281,
      "grad_norm": 3.0625,
      "learning_rate": 4.7142975822384225e-05,
      "loss": 0.947,
      "step": 129620
    },
    {
      "epoch": 0.4543208318876237,
      "grad_norm": 3.140625,
      "learning_rate": 4.714232679372052e-05,
      "loss": 0.9377,
      "step": 129630
    },
    {
      "epoch": 0.45435587939451927,
      "grad_norm": 3.078125,
      "learning_rate": 4.714167776505682e-05,
      "loss": 0.919,
      "step": 129640
    },
    {
      "epoch": 0.45439092690141486,
      "grad_norm": 3.09375,
      "learning_rate": 4.7141028736393115e-05,
      "loss": 0.9544,
      "step": 129650
    },
    {
      "epoch": 0.45442597440831045,
      "grad_norm": 2.890625,
      "learning_rate": 4.714037970772942e-05,
      "loss": 0.9956,
      "step": 129660
    },
    {
      "epoch": 0.45446102191520604,
      "grad_norm": 3.453125,
      "learning_rate": 4.713973067906571e-05,
      "loss": 0.9282,
      "step": 129670
    },
    {
      "epoch": 0.45449606942210163,
      "grad_norm": 3.234375,
      "learning_rate": 4.713908165040201e-05,
      "loss": 0.9712,
      "step": 129680
    },
    {
      "epoch": 0.4545311169289973,
      "grad_norm": 2.703125,
      "learning_rate": 4.713843262173831e-05,
      "loss": 0.9374,
      "step": 129690
    },
    {
      "epoch": 0.45456616443589287,
      "grad_norm": 2.734375,
      "learning_rate": 4.713778359307461e-05,
      "loss": 0.866,
      "step": 129700
    },
    {
      "epoch": 0.45460121194278846,
      "grad_norm": 3.28125,
      "learning_rate": 4.7137134564410903e-05,
      "loss": 0.9538,
      "step": 129710
    },
    {
      "epoch": 0.45463625944968405,
      "grad_norm": 2.734375,
      "learning_rate": 4.7136485535747205e-05,
      "loss": 0.909,
      "step": 129720
    },
    {
      "epoch": 0.45467130695657965,
      "grad_norm": 3.75,
      "learning_rate": 4.7135836507083506e-05,
      "loss": 1.0322,
      "step": 129730
    },
    {
      "epoch": 0.45470635446347524,
      "grad_norm": 3.40625,
      "learning_rate": 4.71351874784198e-05,
      "loss": 1.0004,
      "step": 129740
    },
    {
      "epoch": 0.45474140197037083,
      "grad_norm": 3.3125,
      "learning_rate": 4.71345384497561e-05,
      "loss": 0.9734,
      "step": 129750
    },
    {
      "epoch": 0.4547764494772664,
      "grad_norm": 2.828125,
      "learning_rate": 4.71338894210924e-05,
      "loss": 0.9461,
      "step": 129760
    },
    {
      "epoch": 0.454811496984162,
      "grad_norm": 2.828125,
      "learning_rate": 4.71332403924287e-05,
      "loss": 0.9877,
      "step": 129770
    },
    {
      "epoch": 0.4548465444910576,
      "grad_norm": 2.84375,
      "learning_rate": 4.713259136376499e-05,
      "loss": 0.8652,
      "step": 129780
    },
    {
      "epoch": 0.45488159199795325,
      "grad_norm": 2.953125,
      "learning_rate": 4.7131942335101294e-05,
      "loss": 0.876,
      "step": 129790
    },
    {
      "epoch": 0.45491663950484884,
      "grad_norm": 2.796875,
      "learning_rate": 4.713129330643758e-05,
      "loss": 0.8926,
      "step": 129800
    },
    {
      "epoch": 0.45495168701174443,
      "grad_norm": 2.875,
      "learning_rate": 4.7130644277773883e-05,
      "loss": 0.9241,
      "step": 129810
    },
    {
      "epoch": 0.45498673451864,
      "grad_norm": 3.140625,
      "learning_rate": 4.7129995249110185e-05,
      "loss": 0.996,
      "step": 129820
    },
    {
      "epoch": 0.4550217820255356,
      "grad_norm": 2.9375,
      "learning_rate": 4.712934622044648e-05,
      "loss": 0.9353,
      "step": 129830
    },
    {
      "epoch": 0.4550568295324312,
      "grad_norm": 3.5,
      "learning_rate": 4.712869719178278e-05,
      "loss": 0.9307,
      "step": 129840
    },
    {
      "epoch": 0.4550918770393268,
      "grad_norm": 2.453125,
      "learning_rate": 4.7128048163119075e-05,
      "loss": 0.9929,
      "step": 129850
    },
    {
      "epoch": 0.4551269245462224,
      "grad_norm": 2.59375,
      "learning_rate": 4.712739913445538e-05,
      "loss": 0.9537,
      "step": 129860
    },
    {
      "epoch": 0.455161972053118,
      "grad_norm": 3.0,
      "learning_rate": 4.712675010579167e-05,
      "loss": 1.0058,
      "step": 129870
    },
    {
      "epoch": 0.45519701956001357,
      "grad_norm": 3.171875,
      "learning_rate": 4.712610107712797e-05,
      "loss": 0.9037,
      "step": 129880
    },
    {
      "epoch": 0.4552320670669092,
      "grad_norm": 3.078125,
      "learning_rate": 4.712545204846427e-05,
      "loss": 0.9317,
      "step": 129890
    },
    {
      "epoch": 0.4552671145738048,
      "grad_norm": 3.375,
      "learning_rate": 4.712480301980057e-05,
      "loss": 0.9475,
      "step": 129900
    },
    {
      "epoch": 0.4553021620807004,
      "grad_norm": 3.15625,
      "learning_rate": 4.7124153991136863e-05,
      "loss": 0.878,
      "step": 129910
    },
    {
      "epoch": 0.455337209587596,
      "grad_norm": 3.390625,
      "learning_rate": 4.7123504962473165e-05,
      "loss": 0.9966,
      "step": 129920
    },
    {
      "epoch": 0.4553722570944916,
      "grad_norm": 2.4375,
      "learning_rate": 4.712285593380946e-05,
      "loss": 0.9014,
      "step": 129930
    },
    {
      "epoch": 0.4554073046013872,
      "grad_norm": 3.0,
      "learning_rate": 4.712220690514576e-05,
      "loss": 0.8797,
      "step": 129940
    },
    {
      "epoch": 0.45544235210828277,
      "grad_norm": 3.71875,
      "learning_rate": 4.7121557876482055e-05,
      "loss": 0.9369,
      "step": 129950
    },
    {
      "epoch": 0.45547739961517836,
      "grad_norm": 3.5,
      "learning_rate": 4.712090884781836e-05,
      "loss": 0.966,
      "step": 129960
    },
    {
      "epoch": 0.45551244712207395,
      "grad_norm": 2.703125,
      "learning_rate": 4.712025981915466e-05,
      "loss": 0.9127,
      "step": 129970
    },
    {
      "epoch": 0.4555474946289696,
      "grad_norm": 2.671875,
      "learning_rate": 4.711961079049095e-05,
      "loss": 0.9351,
      "step": 129980
    },
    {
      "epoch": 0.4555825421358652,
      "grad_norm": 3.15625,
      "learning_rate": 4.7118961761827254e-05,
      "loss": 0.8768,
      "step": 129990
    },
    {
      "epoch": 0.4556175896427608,
      "grad_norm": 3.0625,
      "learning_rate": 4.711831273316355e-05,
      "loss": 0.9009,
      "step": 130000
    },
    {
      "epoch": 0.4556175896427608,
      "eval_loss": 0.8807840943336487,
      "eval_runtime": 564.5873,
      "eval_samples_per_second": 673.83,
      "eval_steps_per_second": 56.153,
      "step": 130000
    },
    {
      "epoch": 0.45565263714965637,
      "grad_norm": 2.765625,
      "learning_rate": 4.711766370449985e-05,
      "loss": 0.9725,
      "step": 130010
    },
    {
      "epoch": 0.45568768465655196,
      "grad_norm": 2.765625,
      "learning_rate": 4.7117014675836145e-05,
      "loss": 0.9452,
      "step": 130020
    },
    {
      "epoch": 0.45572273216344755,
      "grad_norm": 2.828125,
      "learning_rate": 4.7116365647172446e-05,
      "loss": 0.9148,
      "step": 130030
    },
    {
      "epoch": 0.45575777967034314,
      "grad_norm": 3.21875,
      "learning_rate": 4.711571661850874e-05,
      "loss": 0.9232,
      "step": 130040
    },
    {
      "epoch": 0.45579282717723874,
      "grad_norm": 2.875,
      "learning_rate": 4.711506758984504e-05,
      "loss": 0.9057,
      "step": 130050
    },
    {
      "epoch": 0.4558278746841343,
      "grad_norm": 3.015625,
      "learning_rate": 4.711441856118134e-05,
      "loss": 0.9986,
      "step": 130060
    },
    {
      "epoch": 0.4558629221910299,
      "grad_norm": 2.8125,
      "learning_rate": 4.711376953251764e-05,
      "loss": 0.9649,
      "step": 130070
    },
    {
      "epoch": 0.45589796969792556,
      "grad_norm": 2.765625,
      "learning_rate": 4.711312050385393e-05,
      "loss": 0.9619,
      "step": 130080
    },
    {
      "epoch": 0.45593301720482116,
      "grad_norm": 3.796875,
      "learning_rate": 4.7112471475190234e-05,
      "loss": 0.9642,
      "step": 130090
    },
    {
      "epoch": 0.45596806471171675,
      "grad_norm": 3.109375,
      "learning_rate": 4.7111822446526536e-05,
      "loss": 0.9617,
      "step": 130100
    },
    {
      "epoch": 0.45600311221861234,
      "grad_norm": 2.96875,
      "learning_rate": 4.711117341786283e-05,
      "loss": 0.938,
      "step": 130110
    },
    {
      "epoch": 0.45603815972550793,
      "grad_norm": 2.921875,
      "learning_rate": 4.711052438919913e-05,
      "loss": 0.9978,
      "step": 130120
    },
    {
      "epoch": 0.4560732072324035,
      "grad_norm": 2.953125,
      "learning_rate": 4.7109875360535426e-05,
      "loss": 0.8716,
      "step": 130130
    },
    {
      "epoch": 0.4561082547392991,
      "grad_norm": 2.890625,
      "learning_rate": 4.710922633187173e-05,
      "loss": 0.9389,
      "step": 130140
    },
    {
      "epoch": 0.4561433022461947,
      "grad_norm": 3.078125,
      "learning_rate": 4.710857730320802e-05,
      "loss": 0.9004,
      "step": 130150
    },
    {
      "epoch": 0.4561783497530903,
      "grad_norm": 3.125,
      "learning_rate": 4.7107928274544324e-05,
      "loss": 0.9359,
      "step": 130160
    },
    {
      "epoch": 0.4562133972599859,
      "grad_norm": 3.4375,
      "learning_rate": 4.710727924588062e-05,
      "loss": 0.9998,
      "step": 130170
    },
    {
      "epoch": 0.45624844476688153,
      "grad_norm": 3.21875,
      "learning_rate": 4.710663021721691e-05,
      "loss": 0.9158,
      "step": 130180
    },
    {
      "epoch": 0.4562834922737771,
      "grad_norm": 2.828125,
      "learning_rate": 4.7105981188553214e-05,
      "loss": 0.9066,
      "step": 130190
    },
    {
      "epoch": 0.4563185397806727,
      "grad_norm": 2.78125,
      "learning_rate": 4.710533215988951e-05,
      "loss": 1.0181,
      "step": 130200
    },
    {
      "epoch": 0.4563535872875683,
      "grad_norm": 2.921875,
      "learning_rate": 4.710468313122581e-05,
      "loss": 0.9329,
      "step": 130210
    },
    {
      "epoch": 0.4563886347944639,
      "grad_norm": 3.203125,
      "learning_rate": 4.7104034102562105e-05,
      "loss": 0.9648,
      "step": 130220
    },
    {
      "epoch": 0.4564236823013595,
      "grad_norm": 3.546875,
      "learning_rate": 4.7103385073898406e-05,
      "loss": 0.9055,
      "step": 130230
    },
    {
      "epoch": 0.4564587298082551,
      "grad_norm": 2.890625,
      "learning_rate": 4.71027360452347e-05,
      "loss": 0.891,
      "step": 130240
    },
    {
      "epoch": 0.4564937773151507,
      "grad_norm": 2.9375,
      "learning_rate": 4.7102087016571e-05,
      "loss": 0.9272,
      "step": 130250
    },
    {
      "epoch": 0.45652882482204626,
      "grad_norm": 2.796875,
      "learning_rate": 4.71014379879073e-05,
      "loss": 0.9906,
      "step": 130260
    },
    {
      "epoch": 0.45656387232894186,
      "grad_norm": 3.109375,
      "learning_rate": 4.71007889592436e-05,
      "loss": 0.847,
      "step": 130270
    },
    {
      "epoch": 0.4565989198358375,
      "grad_norm": 2.640625,
      "learning_rate": 4.710013993057989e-05,
      "loss": 1.0338,
      "step": 130280
    },
    {
      "epoch": 0.4566339673427331,
      "grad_norm": 2.984375,
      "learning_rate": 4.7099490901916194e-05,
      "loss": 0.9355,
      "step": 130290
    },
    {
      "epoch": 0.4566690148496287,
      "grad_norm": 2.625,
      "learning_rate": 4.709884187325249e-05,
      "loss": 0.8503,
      "step": 130300
    },
    {
      "epoch": 0.4567040623565243,
      "grad_norm": 3.125,
      "learning_rate": 4.709819284458879e-05,
      "loss": 0.9715,
      "step": 130310
    },
    {
      "epoch": 0.45673910986341987,
      "grad_norm": 3.34375,
      "learning_rate": 4.7097543815925085e-05,
      "loss": 0.984,
      "step": 130320
    },
    {
      "epoch": 0.45677415737031546,
      "grad_norm": 3.0,
      "learning_rate": 4.7096894787261386e-05,
      "loss": 0.9245,
      "step": 130330
    },
    {
      "epoch": 0.45680920487721105,
      "grad_norm": 2.765625,
      "learning_rate": 4.709624575859769e-05,
      "loss": 0.8685,
      "step": 130340
    },
    {
      "epoch": 0.45684425238410664,
      "grad_norm": 3.328125,
      "learning_rate": 4.709559672993398e-05,
      "loss": 0.9298,
      "step": 130350
    },
    {
      "epoch": 0.45687929989100223,
      "grad_norm": 3.515625,
      "learning_rate": 4.7094947701270284e-05,
      "loss": 1.0104,
      "step": 130360
    },
    {
      "epoch": 0.4569143473978978,
      "grad_norm": 3.015625,
      "learning_rate": 4.709429867260658e-05,
      "loss": 0.8973,
      "step": 130370
    },
    {
      "epoch": 0.45694939490479347,
      "grad_norm": 3.109375,
      "learning_rate": 4.709364964394288e-05,
      "loss": 0.9266,
      "step": 130380
    },
    {
      "epoch": 0.45698444241168906,
      "grad_norm": 3.75,
      "learning_rate": 4.7093000615279174e-05,
      "loss": 0.975,
      "step": 130390
    },
    {
      "epoch": 0.45701948991858465,
      "grad_norm": 2.5,
      "learning_rate": 4.7092351586615476e-05,
      "loss": 0.8504,
      "step": 130400
    },
    {
      "epoch": 0.45705453742548025,
      "grad_norm": 3.21875,
      "learning_rate": 4.709170255795177e-05,
      "loss": 0.9857,
      "step": 130410
    },
    {
      "epoch": 0.45708958493237584,
      "grad_norm": 3.5625,
      "learning_rate": 4.709105352928807e-05,
      "loss": 0.9084,
      "step": 130420
    },
    {
      "epoch": 0.4571246324392714,
      "grad_norm": 2.859375,
      "learning_rate": 4.7090404500624366e-05,
      "loss": 0.9048,
      "step": 130430
    },
    {
      "epoch": 0.457159679946167,
      "grad_norm": 3.265625,
      "learning_rate": 4.708975547196067e-05,
      "loss": 0.9518,
      "step": 130440
    },
    {
      "epoch": 0.4571947274530626,
      "grad_norm": 3.28125,
      "learning_rate": 4.708910644329696e-05,
      "loss": 0.9626,
      "step": 130450
    },
    {
      "epoch": 0.4572297749599582,
      "grad_norm": 2.75,
      "learning_rate": 4.7088457414633264e-05,
      "loss": 0.9531,
      "step": 130460
    },
    {
      "epoch": 0.4572648224668538,
      "grad_norm": 4.4375,
      "learning_rate": 4.7087808385969565e-05,
      "loss": 0.8529,
      "step": 130470
    },
    {
      "epoch": 0.45729986997374944,
      "grad_norm": 3.25,
      "learning_rate": 4.708715935730586e-05,
      "loss": 0.9802,
      "step": 130480
    },
    {
      "epoch": 0.45733491748064503,
      "grad_norm": 3.0625,
      "learning_rate": 4.708651032864216e-05,
      "loss": 0.9577,
      "step": 130490
    },
    {
      "epoch": 0.4573699649875406,
      "grad_norm": 3.046875,
      "learning_rate": 4.7085861299978456e-05,
      "loss": 0.9974,
      "step": 130500
    },
    {
      "epoch": 0.4574050124944362,
      "grad_norm": 3.15625,
      "learning_rate": 4.708521227131476e-05,
      "loss": 0.9709,
      "step": 130510
    },
    {
      "epoch": 0.4574400600013318,
      "grad_norm": 3.578125,
      "learning_rate": 4.708456324265105e-05,
      "loss": 0.9747,
      "step": 130520
    },
    {
      "epoch": 0.4574751075082274,
      "grad_norm": 3.5,
      "learning_rate": 4.708391421398735e-05,
      "loss": 0.8649,
      "step": 130530
    },
    {
      "epoch": 0.457510155015123,
      "grad_norm": 3.21875,
      "learning_rate": 4.708326518532365e-05,
      "loss": 0.9294,
      "step": 130540
    },
    {
      "epoch": 0.4575452025220186,
      "grad_norm": 2.703125,
      "learning_rate": 4.708261615665995e-05,
      "loss": 0.9153,
      "step": 130550
    },
    {
      "epoch": 0.45758025002891417,
      "grad_norm": 3.171875,
      "learning_rate": 4.7081967127996244e-05,
      "loss": 1.0062,
      "step": 130560
    },
    {
      "epoch": 0.4576152975358098,
      "grad_norm": 3.09375,
      "learning_rate": 4.708131809933254e-05,
      "loss": 0.8818,
      "step": 130570
    },
    {
      "epoch": 0.4576503450427054,
      "grad_norm": 2.71875,
      "learning_rate": 4.708066907066884e-05,
      "loss": 0.9393,
      "step": 130580
    },
    {
      "epoch": 0.457685392549601,
      "grad_norm": 2.734375,
      "learning_rate": 4.7080020042005134e-05,
      "loss": 0.9532,
      "step": 130590
    },
    {
      "epoch": 0.4577204400564966,
      "grad_norm": 3.0625,
      "learning_rate": 4.7079371013341436e-05,
      "loss": 0.9646,
      "step": 130600
    },
    {
      "epoch": 0.4577554875633922,
      "grad_norm": 2.859375,
      "learning_rate": 4.707872198467773e-05,
      "loss": 0.9199,
      "step": 130610
    },
    {
      "epoch": 0.4577905350702878,
      "grad_norm": 3.203125,
      "learning_rate": 4.707807295601403e-05,
      "loss": 0.9527,
      "step": 130620
    },
    {
      "epoch": 0.45782558257718337,
      "grad_norm": 2.765625,
      "learning_rate": 4.7077423927350326e-05,
      "loss": 0.9276,
      "step": 130630
    },
    {
      "epoch": 0.45786063008407896,
      "grad_norm": 2.96875,
      "learning_rate": 4.707677489868663e-05,
      "loss": 0.9334,
      "step": 130640
    },
    {
      "epoch": 0.45789567759097455,
      "grad_norm": 3.484375,
      "learning_rate": 4.707612587002292e-05,
      "loss": 0.9839,
      "step": 130650
    },
    {
      "epoch": 0.45793072509787014,
      "grad_norm": 3.0,
      "learning_rate": 4.7075476841359224e-05,
      "loss": 0.9713,
      "step": 130660
    },
    {
      "epoch": 0.4579657726047658,
      "grad_norm": 3.71875,
      "learning_rate": 4.707482781269552e-05,
      "loss": 0.9489,
      "step": 130670
    },
    {
      "epoch": 0.4580008201116614,
      "grad_norm": 2.96875,
      "learning_rate": 4.707417878403182e-05,
      "loss": 0.8692,
      "step": 130680
    },
    {
      "epoch": 0.45803586761855697,
      "grad_norm": 2.796875,
      "learning_rate": 4.7073529755368114e-05,
      "loss": 0.9399,
      "step": 130690
    },
    {
      "epoch": 0.45807091512545256,
      "grad_norm": 3.203125,
      "learning_rate": 4.7072880726704416e-05,
      "loss": 0.9441,
      "step": 130700
    },
    {
      "epoch": 0.45810596263234815,
      "grad_norm": 2.9375,
      "learning_rate": 4.707223169804072e-05,
      "loss": 0.974,
      "step": 130710
    },
    {
      "epoch": 0.45814101013924374,
      "grad_norm": 2.796875,
      "learning_rate": 4.707158266937701e-05,
      "loss": 0.8506,
      "step": 130720
    },
    {
      "epoch": 0.45817605764613933,
      "grad_norm": 2.90625,
      "learning_rate": 4.707093364071331e-05,
      "loss": 0.8928,
      "step": 130730
    },
    {
      "epoch": 0.4582111051530349,
      "grad_norm": 3.703125,
      "learning_rate": 4.707028461204961e-05,
      "loss": 0.9068,
      "step": 130740
    },
    {
      "epoch": 0.4582461526599305,
      "grad_norm": 3.015625,
      "learning_rate": 4.706963558338591e-05,
      "loss": 1.0229,
      "step": 130750
    },
    {
      "epoch": 0.4582812001668261,
      "grad_norm": 3.328125,
      "learning_rate": 4.7068986554722204e-05,
      "loss": 1.0058,
      "step": 130760
    },
    {
      "epoch": 0.45831624767372175,
      "grad_norm": 2.859375,
      "learning_rate": 4.7068337526058505e-05,
      "loss": 0.9972,
      "step": 130770
    },
    {
      "epoch": 0.45835129518061735,
      "grad_norm": 2.734375,
      "learning_rate": 4.70676884973948e-05,
      "loss": 0.8798,
      "step": 130780
    },
    {
      "epoch": 0.45838634268751294,
      "grad_norm": 3.25,
      "learning_rate": 4.70670394687311e-05,
      "loss": 1.0351,
      "step": 130790
    },
    {
      "epoch": 0.45842139019440853,
      "grad_norm": 3.140625,
      "learning_rate": 4.7066390440067396e-05,
      "loss": 0.9708,
      "step": 130800
    },
    {
      "epoch": 0.4584564377013041,
      "grad_norm": 2.84375,
      "learning_rate": 4.70657414114037e-05,
      "loss": 0.9332,
      "step": 130810
    },
    {
      "epoch": 0.4584914852081997,
      "grad_norm": 3.15625,
      "learning_rate": 4.706509238273999e-05,
      "loss": 1.0109,
      "step": 130820
    },
    {
      "epoch": 0.4585265327150953,
      "grad_norm": 2.875,
      "learning_rate": 4.706444335407629e-05,
      "loss": 0.9396,
      "step": 130830
    },
    {
      "epoch": 0.4585615802219909,
      "grad_norm": 2.921875,
      "learning_rate": 4.7063794325412594e-05,
      "loss": 0.9373,
      "step": 130840
    },
    {
      "epoch": 0.4585966277288865,
      "grad_norm": 3.40625,
      "learning_rate": 4.706314529674889e-05,
      "loss": 0.9096,
      "step": 130850
    },
    {
      "epoch": 0.4586316752357821,
      "grad_norm": 3.0625,
      "learning_rate": 4.706249626808519e-05,
      "loss": 1.0013,
      "step": 130860
    },
    {
      "epoch": 0.4586667227426777,
      "grad_norm": 2.8125,
      "learning_rate": 4.7061847239421485e-05,
      "loss": 0.9525,
      "step": 130870
    },
    {
      "epoch": 0.4587017702495733,
      "grad_norm": 3.28125,
      "learning_rate": 4.7061198210757786e-05,
      "loss": 0.9082,
      "step": 130880
    },
    {
      "epoch": 0.4587368177564689,
      "grad_norm": 2.71875,
      "learning_rate": 4.706054918209408e-05,
      "loss": 0.924,
      "step": 130890
    },
    {
      "epoch": 0.4587718652633645,
      "grad_norm": 2.765625,
      "learning_rate": 4.705990015343038e-05,
      "loss": 0.9084,
      "step": 130900
    },
    {
      "epoch": 0.4588069127702601,
      "grad_norm": 3.125,
      "learning_rate": 4.705925112476668e-05,
      "loss": 0.9651,
      "step": 130910
    },
    {
      "epoch": 0.4588419602771557,
      "grad_norm": 2.390625,
      "learning_rate": 4.705860209610298e-05,
      "loss": 0.9228,
      "step": 130920
    },
    {
      "epoch": 0.45887700778405127,
      "grad_norm": 3.046875,
      "learning_rate": 4.705795306743927e-05,
      "loss": 0.8945,
      "step": 130930
    },
    {
      "epoch": 0.45891205529094686,
      "grad_norm": 2.859375,
      "learning_rate": 4.705730403877557e-05,
      "loss": 0.927,
      "step": 130940
    },
    {
      "epoch": 0.45894710279784245,
      "grad_norm": 3.421875,
      "learning_rate": 4.705665501011187e-05,
      "loss": 0.9723,
      "step": 130950
    },
    {
      "epoch": 0.45898215030473805,
      "grad_norm": 3.234375,
      "learning_rate": 4.7056005981448164e-05,
      "loss": 0.9749,
      "step": 130960
    },
    {
      "epoch": 0.4590171978116337,
      "grad_norm": 2.96875,
      "learning_rate": 4.7055356952784465e-05,
      "loss": 1.0336,
      "step": 130970
    },
    {
      "epoch": 0.4590522453185293,
      "grad_norm": 3.078125,
      "learning_rate": 4.705470792412076e-05,
      "loss": 1.0512,
      "step": 130980
    },
    {
      "epoch": 0.4590872928254249,
      "grad_norm": 3.203125,
      "learning_rate": 4.705405889545706e-05,
      "loss": 0.921,
      "step": 130990
    },
    {
      "epoch": 0.45912234033232047,
      "grad_norm": 2.953125,
      "learning_rate": 4.7053409866793356e-05,
      "loss": 0.9621,
      "step": 131000
    },
    {
      "epoch": 0.45915738783921606,
      "grad_norm": 2.953125,
      "learning_rate": 4.705276083812966e-05,
      "loss": 0.9479,
      "step": 131010
    },
    {
      "epoch": 0.45919243534611165,
      "grad_norm": 3.203125,
      "learning_rate": 4.705211180946595e-05,
      "loss": 0.9504,
      "step": 131020
    },
    {
      "epoch": 0.45922748285300724,
      "grad_norm": 2.640625,
      "learning_rate": 4.705146278080225e-05,
      "loss": 0.8308,
      "step": 131030
    },
    {
      "epoch": 0.45926253035990283,
      "grad_norm": 3.296875,
      "learning_rate": 4.705081375213855e-05,
      "loss": 1.0387,
      "step": 131040
    },
    {
      "epoch": 0.4592975778667984,
      "grad_norm": 3.15625,
      "learning_rate": 4.705016472347485e-05,
      "loss": 0.9914,
      "step": 131050
    },
    {
      "epoch": 0.459332625373694,
      "grad_norm": 3.078125,
      "learning_rate": 4.704951569481115e-05,
      "loss": 0.94,
      "step": 131060
    },
    {
      "epoch": 0.45936767288058966,
      "grad_norm": 2.546875,
      "learning_rate": 4.7048866666147445e-05,
      "loss": 0.8457,
      "step": 131070
    },
    {
      "epoch": 0.45940272038748525,
      "grad_norm": 3.25,
      "learning_rate": 4.7048217637483746e-05,
      "loss": 0.926,
      "step": 131080
    },
    {
      "epoch": 0.45943776789438084,
      "grad_norm": 3.03125,
      "learning_rate": 4.704756860882004e-05,
      "loss": 0.8955,
      "step": 131090
    },
    {
      "epoch": 0.45947281540127644,
      "grad_norm": 3.515625,
      "learning_rate": 4.704691958015634e-05,
      "loss": 0.9514,
      "step": 131100
    },
    {
      "epoch": 0.459507862908172,
      "grad_norm": 2.6875,
      "learning_rate": 4.704627055149264e-05,
      "loss": 0.8813,
      "step": 131110
    },
    {
      "epoch": 0.4595429104150676,
      "grad_norm": 2.96875,
      "learning_rate": 4.704562152282894e-05,
      "loss": 0.8677,
      "step": 131120
    },
    {
      "epoch": 0.4595779579219632,
      "grad_norm": 3.125,
      "learning_rate": 4.704497249416523e-05,
      "loss": 0.8669,
      "step": 131130
    },
    {
      "epoch": 0.4596130054288588,
      "grad_norm": 3.09375,
      "learning_rate": 4.7044323465501534e-05,
      "loss": 0.9426,
      "step": 131140
    },
    {
      "epoch": 0.4596480529357544,
      "grad_norm": 2.75,
      "learning_rate": 4.704367443683783e-05,
      "loss": 0.9459,
      "step": 131150
    },
    {
      "epoch": 0.45968310044265004,
      "grad_norm": 3.078125,
      "learning_rate": 4.704302540817413e-05,
      "loss": 0.9042,
      "step": 131160
    },
    {
      "epoch": 0.45971814794954563,
      "grad_norm": 3.25,
      "learning_rate": 4.7042376379510425e-05,
      "loss": 1.0092,
      "step": 131170
    },
    {
      "epoch": 0.4597531954564412,
      "grad_norm": 3.25,
      "learning_rate": 4.7041727350846726e-05,
      "loss": 0.9368,
      "step": 131180
    },
    {
      "epoch": 0.4597882429633368,
      "grad_norm": 3.125,
      "learning_rate": 4.704107832218302e-05,
      "loss": 0.9518,
      "step": 131190
    },
    {
      "epoch": 0.4598232904702324,
      "grad_norm": 2.84375,
      "learning_rate": 4.704042929351932e-05,
      "loss": 0.9139,
      "step": 131200
    },
    {
      "epoch": 0.459858337977128,
      "grad_norm": 2.796875,
      "learning_rate": 4.7039780264855624e-05,
      "loss": 1.009,
      "step": 131210
    },
    {
      "epoch": 0.4598933854840236,
      "grad_norm": 2.96875,
      "learning_rate": 4.703913123619192e-05,
      "loss": 0.8563,
      "step": 131220
    },
    {
      "epoch": 0.4599284329909192,
      "grad_norm": 3.140625,
      "learning_rate": 4.703848220752822e-05,
      "loss": 0.9082,
      "step": 131230
    },
    {
      "epoch": 0.45996348049781477,
      "grad_norm": 3.0625,
      "learning_rate": 4.7037833178864514e-05,
      "loss": 0.9885,
      "step": 131240
    },
    {
      "epoch": 0.45999852800471036,
      "grad_norm": 3.15625,
      "learning_rate": 4.7037184150200816e-05,
      "loss": 0.9459,
      "step": 131250
    },
    {
      "epoch": 0.460033575511606,
      "grad_norm": 3.046875,
      "learning_rate": 4.703653512153711e-05,
      "loss": 0.9262,
      "step": 131260
    },
    {
      "epoch": 0.4600686230185016,
      "grad_norm": 2.90625,
      "learning_rate": 4.703588609287341e-05,
      "loss": 0.93,
      "step": 131270
    },
    {
      "epoch": 0.4601036705253972,
      "grad_norm": 2.59375,
      "learning_rate": 4.7035237064209706e-05,
      "loss": 0.8928,
      "step": 131280
    },
    {
      "epoch": 0.4601387180322928,
      "grad_norm": 3.078125,
      "learning_rate": 4.703458803554601e-05,
      "loss": 0.9027,
      "step": 131290
    },
    {
      "epoch": 0.4601737655391884,
      "grad_norm": 3.75,
      "learning_rate": 4.70339390068823e-05,
      "loss": 0.9434,
      "step": 131300
    },
    {
      "epoch": 0.46020881304608396,
      "grad_norm": 3.140625,
      "learning_rate": 4.70332899782186e-05,
      "loss": 0.974,
      "step": 131310
    },
    {
      "epoch": 0.46024386055297956,
      "grad_norm": 3.25,
      "learning_rate": 4.70326409495549e-05,
      "loss": 1.0006,
      "step": 131320
    },
    {
      "epoch": 0.46027890805987515,
      "grad_norm": 2.921875,
      "learning_rate": 4.703199192089119e-05,
      "loss": 1.0154,
      "step": 131330
    },
    {
      "epoch": 0.46031395556677074,
      "grad_norm": 2.84375,
      "learning_rate": 4.7031342892227494e-05,
      "loss": 0.8995,
      "step": 131340
    },
    {
      "epoch": 0.46034900307366633,
      "grad_norm": 3.03125,
      "learning_rate": 4.703069386356379e-05,
      "loss": 0.9518,
      "step": 131350
    },
    {
      "epoch": 0.460384050580562,
      "grad_norm": 2.5,
      "learning_rate": 4.703004483490009e-05,
      "loss": 0.9263,
      "step": 131360
    },
    {
      "epoch": 0.46041909808745757,
      "grad_norm": 2.953125,
      "learning_rate": 4.7029395806236385e-05,
      "loss": 0.9215,
      "step": 131370
    },
    {
      "epoch": 0.46045414559435316,
      "grad_norm": 3.109375,
      "learning_rate": 4.7028746777572686e-05,
      "loss": 0.949,
      "step": 131380
    },
    {
      "epoch": 0.46048919310124875,
      "grad_norm": 2.6875,
      "learning_rate": 4.702809774890898e-05,
      "loss": 0.9007,
      "step": 131390
    },
    {
      "epoch": 0.46052424060814434,
      "grad_norm": 3.1875,
      "learning_rate": 4.702744872024528e-05,
      "loss": 0.9488,
      "step": 131400
    },
    {
      "epoch": 0.46055928811503993,
      "grad_norm": 3.59375,
      "learning_rate": 4.702679969158158e-05,
      "loss": 0.9401,
      "step": 131410
    },
    {
      "epoch": 0.4605943356219355,
      "grad_norm": 3.234375,
      "learning_rate": 4.702615066291788e-05,
      "loss": 0.9449,
      "step": 131420
    },
    {
      "epoch": 0.4606293831288311,
      "grad_norm": 3.265625,
      "learning_rate": 4.702550163425418e-05,
      "loss": 1.0142,
      "step": 131430
    },
    {
      "epoch": 0.4606644306357267,
      "grad_norm": 3.25,
      "learning_rate": 4.7024852605590474e-05,
      "loss": 0.9423,
      "step": 131440
    },
    {
      "epoch": 0.4606994781426223,
      "grad_norm": 3.140625,
      "learning_rate": 4.7024203576926776e-05,
      "loss": 0.892,
      "step": 131450
    },
    {
      "epoch": 0.46073452564951795,
      "grad_norm": 3.234375,
      "learning_rate": 4.702355454826307e-05,
      "loss": 0.9312,
      "step": 131460
    },
    {
      "epoch": 0.46076957315641354,
      "grad_norm": 3.140625,
      "learning_rate": 4.702290551959937e-05,
      "loss": 0.9664,
      "step": 131470
    },
    {
      "epoch": 0.46080462066330913,
      "grad_norm": 3.0625,
      "learning_rate": 4.7022256490935666e-05,
      "loss": 0.9722,
      "step": 131480
    },
    {
      "epoch": 0.4608396681702047,
      "grad_norm": 3.25,
      "learning_rate": 4.702160746227197e-05,
      "loss": 0.9288,
      "step": 131490
    },
    {
      "epoch": 0.4608747156771003,
      "grad_norm": 3.0625,
      "learning_rate": 4.702095843360826e-05,
      "loss": 0.8365,
      "step": 131500
    },
    {
      "epoch": 0.4609097631839959,
      "grad_norm": 3.203125,
      "learning_rate": 4.7020309404944564e-05,
      "loss": 0.9549,
      "step": 131510
    },
    {
      "epoch": 0.4609448106908915,
      "grad_norm": 2.703125,
      "learning_rate": 4.701966037628086e-05,
      "loss": 0.9553,
      "step": 131520
    },
    {
      "epoch": 0.4609798581977871,
      "grad_norm": 3.21875,
      "learning_rate": 4.701901134761716e-05,
      "loss": 0.9044,
      "step": 131530
    },
    {
      "epoch": 0.4610149057046827,
      "grad_norm": 2.90625,
      "learning_rate": 4.7018362318953454e-05,
      "loss": 0.9518,
      "step": 131540
    },
    {
      "epoch": 0.46104995321157827,
      "grad_norm": 2.625,
      "learning_rate": 4.7017713290289756e-05,
      "loss": 0.9499,
      "step": 131550
    },
    {
      "epoch": 0.4610850007184739,
      "grad_norm": 3.046875,
      "learning_rate": 4.701706426162605e-05,
      "loss": 0.9831,
      "step": 131560
    },
    {
      "epoch": 0.4611200482253695,
      "grad_norm": 3.015625,
      "learning_rate": 4.701641523296235e-05,
      "loss": 0.9018,
      "step": 131570
    },
    {
      "epoch": 0.4611550957322651,
      "grad_norm": 2.984375,
      "learning_rate": 4.701576620429865e-05,
      "loss": 0.9537,
      "step": 131580
    },
    {
      "epoch": 0.4611901432391607,
      "grad_norm": 2.953125,
      "learning_rate": 4.701511717563495e-05,
      "loss": 0.9476,
      "step": 131590
    },
    {
      "epoch": 0.4612251907460563,
      "grad_norm": 3.03125,
      "learning_rate": 4.701446814697125e-05,
      "loss": 0.9275,
      "step": 131600
    },
    {
      "epoch": 0.46126023825295187,
      "grad_norm": 3.34375,
      "learning_rate": 4.7013819118307544e-05,
      "loss": 0.9159,
      "step": 131610
    },
    {
      "epoch": 0.46129528575984746,
      "grad_norm": 2.9375,
      "learning_rate": 4.7013170089643845e-05,
      "loss": 0.8354,
      "step": 131620
    },
    {
      "epoch": 0.46133033326674305,
      "grad_norm": 2.65625,
      "learning_rate": 4.701252106098014e-05,
      "loss": 0.9169,
      "step": 131630
    },
    {
      "epoch": 0.46136538077363864,
      "grad_norm": 3.140625,
      "learning_rate": 4.701187203231644e-05,
      "loss": 0.9099,
      "step": 131640
    },
    {
      "epoch": 0.4614004282805343,
      "grad_norm": 3.265625,
      "learning_rate": 4.7011223003652736e-05,
      "loss": 0.9692,
      "step": 131650
    },
    {
      "epoch": 0.4614354757874299,
      "grad_norm": 2.8125,
      "learning_rate": 4.701057397498904e-05,
      "loss": 0.9026,
      "step": 131660
    },
    {
      "epoch": 0.4614705232943255,
      "grad_norm": 3.0625,
      "learning_rate": 4.700992494632533e-05,
      "loss": 0.9438,
      "step": 131670
    },
    {
      "epoch": 0.46150557080122107,
      "grad_norm": 3.46875,
      "learning_rate": 4.7009275917661626e-05,
      "loss": 0.903,
      "step": 131680
    },
    {
      "epoch": 0.46154061830811666,
      "grad_norm": 3.625,
      "learning_rate": 4.700862688899793e-05,
      "loss": 0.853,
      "step": 131690
    },
    {
      "epoch": 0.46157566581501225,
      "grad_norm": 3.09375,
      "learning_rate": 4.700797786033422e-05,
      "loss": 0.8487,
      "step": 131700
    },
    {
      "epoch": 0.46161071332190784,
      "grad_norm": 3.03125,
      "learning_rate": 4.7007328831670524e-05,
      "loss": 0.9416,
      "step": 131710
    },
    {
      "epoch": 0.46164576082880343,
      "grad_norm": 2.765625,
      "learning_rate": 4.700667980300682e-05,
      "loss": 0.8388,
      "step": 131720
    },
    {
      "epoch": 0.461680808335699,
      "grad_norm": 2.78125,
      "learning_rate": 4.700603077434312e-05,
      "loss": 0.9119,
      "step": 131730
    },
    {
      "epoch": 0.4617158558425946,
      "grad_norm": 2.96875,
      "learning_rate": 4.7005381745679414e-05,
      "loss": 0.9385,
      "step": 131740
    },
    {
      "epoch": 0.46175090334949026,
      "grad_norm": 3.234375,
      "learning_rate": 4.7004732717015716e-05,
      "loss": 0.9034,
      "step": 131750
    },
    {
      "epoch": 0.46178595085638585,
      "grad_norm": 3.09375,
      "learning_rate": 4.700408368835201e-05,
      "loss": 0.9664,
      "step": 131760
    },
    {
      "epoch": 0.46182099836328144,
      "grad_norm": 2.8125,
      "learning_rate": 4.700343465968831e-05,
      "loss": 0.9938,
      "step": 131770
    },
    {
      "epoch": 0.46185604587017703,
      "grad_norm": 3.234375,
      "learning_rate": 4.7002785631024606e-05,
      "loss": 1.0048,
      "step": 131780
    },
    {
      "epoch": 0.4618910933770726,
      "grad_norm": 3.515625,
      "learning_rate": 4.700213660236091e-05,
      "loss": 0.9383,
      "step": 131790
    },
    {
      "epoch": 0.4619261408839682,
      "grad_norm": 3.28125,
      "learning_rate": 4.700148757369721e-05,
      "loss": 0.9168,
      "step": 131800
    },
    {
      "epoch": 0.4619611883908638,
      "grad_norm": 2.703125,
      "learning_rate": 4.7000838545033504e-05,
      "loss": 0.9703,
      "step": 131810
    },
    {
      "epoch": 0.4619962358977594,
      "grad_norm": 2.671875,
      "learning_rate": 4.7000189516369805e-05,
      "loss": 0.9947,
      "step": 131820
    },
    {
      "epoch": 0.462031283404655,
      "grad_norm": 3.125,
      "learning_rate": 4.69995404877061e-05,
      "loss": 1.0072,
      "step": 131830
    },
    {
      "epoch": 0.4620663309115506,
      "grad_norm": 3.234375,
      "learning_rate": 4.69988914590424e-05,
      "loss": 0.9064,
      "step": 131840
    },
    {
      "epoch": 0.46210137841844623,
      "grad_norm": 2.921875,
      "learning_rate": 4.6998242430378696e-05,
      "loss": 0.8764,
      "step": 131850
    },
    {
      "epoch": 0.4621364259253418,
      "grad_norm": 3.078125,
      "learning_rate": 4.6997593401715e-05,
      "loss": 0.977,
      "step": 131860
    },
    {
      "epoch": 0.4621714734322374,
      "grad_norm": 3.203125,
      "learning_rate": 4.699694437305129e-05,
      "loss": 0.95,
      "step": 131870
    },
    {
      "epoch": 0.462206520939133,
      "grad_norm": 3.0625,
      "learning_rate": 4.699629534438759e-05,
      "loss": 0.9918,
      "step": 131880
    },
    {
      "epoch": 0.4622415684460286,
      "grad_norm": 3.3125,
      "learning_rate": 4.699564631572389e-05,
      "loss": 0.8911,
      "step": 131890
    },
    {
      "epoch": 0.4622766159529242,
      "grad_norm": 3.125,
      "learning_rate": 4.699499728706019e-05,
      "loss": 0.8995,
      "step": 131900
    },
    {
      "epoch": 0.4623116634598198,
      "grad_norm": 2.96875,
      "learning_rate": 4.6994348258396484e-05,
      "loss": 0.9507,
      "step": 131910
    },
    {
      "epoch": 0.46234671096671537,
      "grad_norm": 2.828125,
      "learning_rate": 4.6993699229732785e-05,
      "loss": 0.9085,
      "step": 131920
    },
    {
      "epoch": 0.46238175847361096,
      "grad_norm": 3.125,
      "learning_rate": 4.6993050201069087e-05,
      "loss": 0.9264,
      "step": 131930
    },
    {
      "epoch": 0.46241680598050655,
      "grad_norm": 3.203125,
      "learning_rate": 4.699240117240538e-05,
      "loss": 0.8574,
      "step": 131940
    },
    {
      "epoch": 0.4624518534874022,
      "grad_norm": 2.828125,
      "learning_rate": 4.699175214374168e-05,
      "loss": 1.0129,
      "step": 131950
    },
    {
      "epoch": 0.4624869009942978,
      "grad_norm": 2.671875,
      "learning_rate": 4.699110311507798e-05,
      "loss": 0.9213,
      "step": 131960
    },
    {
      "epoch": 0.4625219485011934,
      "grad_norm": 2.875,
      "learning_rate": 4.699045408641428e-05,
      "loss": 0.9368,
      "step": 131970
    },
    {
      "epoch": 0.46255699600808897,
      "grad_norm": 3.171875,
      "learning_rate": 4.698980505775057e-05,
      "loss": 0.9805,
      "step": 131980
    },
    {
      "epoch": 0.46259204351498456,
      "grad_norm": 2.859375,
      "learning_rate": 4.6989156029086875e-05,
      "loss": 0.8814,
      "step": 131990
    },
    {
      "epoch": 0.46262709102188015,
      "grad_norm": 2.484375,
      "learning_rate": 4.698850700042317e-05,
      "loss": 0.8938,
      "step": 132000
    },
    {
      "epoch": 0.46266213852877575,
      "grad_norm": 2.875,
      "learning_rate": 4.698785797175947e-05,
      "loss": 0.896,
      "step": 132010
    },
    {
      "epoch": 0.46269718603567134,
      "grad_norm": 3.859375,
      "learning_rate": 4.6987208943095765e-05,
      "loss": 0.9406,
      "step": 132020
    },
    {
      "epoch": 0.46273223354256693,
      "grad_norm": 2.75,
      "learning_rate": 4.6986559914432067e-05,
      "loss": 0.9232,
      "step": 132030
    },
    {
      "epoch": 0.4627672810494625,
      "grad_norm": 2.9375,
      "learning_rate": 4.698591088576836e-05,
      "loss": 0.9069,
      "step": 132040
    },
    {
      "epoch": 0.46280232855635817,
      "grad_norm": 2.953125,
      "learning_rate": 4.698526185710466e-05,
      "loss": 0.9595,
      "step": 132050
    },
    {
      "epoch": 0.46283737606325376,
      "grad_norm": 3.234375,
      "learning_rate": 4.698461282844096e-05,
      "loss": 0.9982,
      "step": 132060
    },
    {
      "epoch": 0.46287242357014935,
      "grad_norm": 2.859375,
      "learning_rate": 4.698396379977725e-05,
      "loss": 0.9813,
      "step": 132070
    },
    {
      "epoch": 0.46290747107704494,
      "grad_norm": 3.09375,
      "learning_rate": 4.698331477111355e-05,
      "loss": 0.9592,
      "step": 132080
    },
    {
      "epoch": 0.46294251858394053,
      "grad_norm": 3.015625,
      "learning_rate": 4.698266574244985e-05,
      "loss": 0.971,
      "step": 132090
    },
    {
      "epoch": 0.4629775660908361,
      "grad_norm": 2.765625,
      "learning_rate": 4.698201671378615e-05,
      "loss": 0.9231,
      "step": 132100
    },
    {
      "epoch": 0.4630126135977317,
      "grad_norm": 2.96875,
      "learning_rate": 4.6981367685122444e-05,
      "loss": 0.9724,
      "step": 132110
    },
    {
      "epoch": 0.4630476611046273,
      "grad_norm": 3.234375,
      "learning_rate": 4.6980718656458745e-05,
      "loss": 0.9334,
      "step": 132120
    },
    {
      "epoch": 0.4630827086115229,
      "grad_norm": 3.234375,
      "learning_rate": 4.698006962779504e-05,
      "loss": 0.917,
      "step": 132130
    },
    {
      "epoch": 0.4631177561184185,
      "grad_norm": 2.796875,
      "learning_rate": 4.697942059913134e-05,
      "loss": 0.8741,
      "step": 132140
    },
    {
      "epoch": 0.46315280362531414,
      "grad_norm": 3.703125,
      "learning_rate": 4.6978771570467636e-05,
      "loss": 0.9314,
      "step": 132150
    },
    {
      "epoch": 0.4631878511322097,
      "grad_norm": 2.578125,
      "learning_rate": 4.697812254180394e-05,
      "loss": 0.8766,
      "step": 132160
    },
    {
      "epoch": 0.4632228986391053,
      "grad_norm": 3.046875,
      "learning_rate": 4.697747351314024e-05,
      "loss": 1.0291,
      "step": 132170
    },
    {
      "epoch": 0.4632579461460009,
      "grad_norm": 3.109375,
      "learning_rate": 4.697682448447653e-05,
      "loss": 0.9468,
      "step": 132180
    },
    {
      "epoch": 0.4632929936528965,
      "grad_norm": 2.890625,
      "learning_rate": 4.6976175455812835e-05,
      "loss": 0.9441,
      "step": 132190
    },
    {
      "epoch": 0.4633280411597921,
      "grad_norm": 2.875,
      "learning_rate": 4.697552642714913e-05,
      "loss": 0.9379,
      "step": 132200
    },
    {
      "epoch": 0.4633630886666877,
      "grad_norm": 3.359375,
      "learning_rate": 4.697487739848543e-05,
      "loss": 0.9314,
      "step": 132210
    },
    {
      "epoch": 0.4633981361735833,
      "grad_norm": 2.8125,
      "learning_rate": 4.6974228369821725e-05,
      "loss": 0.9755,
      "step": 132220
    },
    {
      "epoch": 0.46343318368047887,
      "grad_norm": 3.1875,
      "learning_rate": 4.6973579341158027e-05,
      "loss": 0.8253,
      "step": 132230
    },
    {
      "epoch": 0.4634682311873745,
      "grad_norm": 3.125,
      "learning_rate": 4.697293031249432e-05,
      "loss": 1.0441,
      "step": 132240
    },
    {
      "epoch": 0.4635032786942701,
      "grad_norm": 3.1875,
      "learning_rate": 4.697228128383062e-05,
      "loss": 0.8735,
      "step": 132250
    },
    {
      "epoch": 0.4635383262011657,
      "grad_norm": 2.671875,
      "learning_rate": 4.697163225516692e-05,
      "loss": 0.918,
      "step": 132260
    },
    {
      "epoch": 0.4635733737080613,
      "grad_norm": 2.53125,
      "learning_rate": 4.697098322650322e-05,
      "loss": 0.8703,
      "step": 132270
    },
    {
      "epoch": 0.4636084212149569,
      "grad_norm": 2.96875,
      "learning_rate": 4.697033419783951e-05,
      "loss": 1.0125,
      "step": 132280
    },
    {
      "epoch": 0.46364346872185247,
      "grad_norm": 3.078125,
      "learning_rate": 4.6969685169175815e-05,
      "loss": 0.9512,
      "step": 132290
    },
    {
      "epoch": 0.46367851622874806,
      "grad_norm": 3.328125,
      "learning_rate": 4.6969036140512116e-05,
      "loss": 0.9235,
      "step": 132300
    },
    {
      "epoch": 0.46371356373564365,
      "grad_norm": 3.171875,
      "learning_rate": 4.696838711184841e-05,
      "loss": 0.9844,
      "step": 132310
    },
    {
      "epoch": 0.46374861124253924,
      "grad_norm": 3.15625,
      "learning_rate": 4.696773808318471e-05,
      "loss": 0.9168,
      "step": 132320
    },
    {
      "epoch": 0.46378365874943484,
      "grad_norm": 3.109375,
      "learning_rate": 4.6967089054521007e-05,
      "loss": 0.9083,
      "step": 132330
    },
    {
      "epoch": 0.4638187062563305,
      "grad_norm": 3.15625,
      "learning_rate": 4.696644002585731e-05,
      "loss": 1.048,
      "step": 132340
    },
    {
      "epoch": 0.4638537537632261,
      "grad_norm": 2.796875,
      "learning_rate": 4.69657909971936e-05,
      "loss": 0.905,
      "step": 132350
    },
    {
      "epoch": 0.46388880127012166,
      "grad_norm": 2.796875,
      "learning_rate": 4.6965141968529904e-05,
      "loss": 0.9543,
      "step": 132360
    },
    {
      "epoch": 0.46392384877701726,
      "grad_norm": 3.015625,
      "learning_rate": 4.69644929398662e-05,
      "loss": 0.9741,
      "step": 132370
    },
    {
      "epoch": 0.46395889628391285,
      "grad_norm": 2.703125,
      "learning_rate": 4.69638439112025e-05,
      "loss": 0.895,
      "step": 132380
    },
    {
      "epoch": 0.46399394379080844,
      "grad_norm": 3.28125,
      "learning_rate": 4.6963194882538795e-05,
      "loss": 1.0024,
      "step": 132390
    },
    {
      "epoch": 0.46402899129770403,
      "grad_norm": 2.921875,
      "learning_rate": 4.6962545853875096e-05,
      "loss": 0.9769,
      "step": 132400
    },
    {
      "epoch": 0.4640640388045996,
      "grad_norm": 2.796875,
      "learning_rate": 4.696189682521139e-05,
      "loss": 0.8811,
      "step": 132410
    },
    {
      "epoch": 0.4640990863114952,
      "grad_norm": 3.28125,
      "learning_rate": 4.696124779654769e-05,
      "loss": 0.9279,
      "step": 132420
    },
    {
      "epoch": 0.4641341338183908,
      "grad_norm": 3.125,
      "learning_rate": 4.6960598767883987e-05,
      "loss": 0.9519,
      "step": 132430
    },
    {
      "epoch": 0.46416918132528645,
      "grad_norm": 2.875,
      "learning_rate": 4.695994973922028e-05,
      "loss": 0.9373,
      "step": 132440
    },
    {
      "epoch": 0.46420422883218204,
      "grad_norm": 3.078125,
      "learning_rate": 4.695930071055658e-05,
      "loss": 0.8832,
      "step": 132450
    },
    {
      "epoch": 0.46423927633907763,
      "grad_norm": 3.125,
      "learning_rate": 4.695865168189288e-05,
      "loss": 1.0092,
      "step": 132460
    },
    {
      "epoch": 0.4642743238459732,
      "grad_norm": 3.625,
      "learning_rate": 4.695800265322918e-05,
      "loss": 0.9364,
      "step": 132470
    },
    {
      "epoch": 0.4643093713528688,
      "grad_norm": 3.34375,
      "learning_rate": 4.695735362456547e-05,
      "loss": 0.8733,
      "step": 132480
    },
    {
      "epoch": 0.4643444188597644,
      "grad_norm": 2.953125,
      "learning_rate": 4.6956704595901775e-05,
      "loss": 0.9764,
      "step": 132490
    },
    {
      "epoch": 0.46437946636666,
      "grad_norm": 2.640625,
      "learning_rate": 4.695605556723807e-05,
      "loss": 0.9005,
      "step": 132500
    },
    {
      "epoch": 0.4644145138735556,
      "grad_norm": 2.71875,
      "learning_rate": 4.695540653857437e-05,
      "loss": 0.9306,
      "step": 132510
    },
    {
      "epoch": 0.4644495613804512,
      "grad_norm": 3.09375,
      "learning_rate": 4.6954757509910665e-05,
      "loss": 0.8949,
      "step": 132520
    },
    {
      "epoch": 0.4644846088873468,
      "grad_norm": 2.8125,
      "learning_rate": 4.6954108481246967e-05,
      "loss": 0.9739,
      "step": 132530
    },
    {
      "epoch": 0.4645196563942424,
      "grad_norm": 3.328125,
      "learning_rate": 4.695345945258327e-05,
      "loss": 0.873,
      "step": 132540
    },
    {
      "epoch": 0.464554703901138,
      "grad_norm": 2.796875,
      "learning_rate": 4.695281042391956e-05,
      "loss": 0.9403,
      "step": 132550
    },
    {
      "epoch": 0.4645897514080336,
      "grad_norm": 2.84375,
      "learning_rate": 4.6952161395255864e-05,
      "loss": 0.9036,
      "step": 132560
    },
    {
      "epoch": 0.4646247989149292,
      "grad_norm": 3.25,
      "learning_rate": 4.695151236659216e-05,
      "loss": 0.9996,
      "step": 132570
    },
    {
      "epoch": 0.4646598464218248,
      "grad_norm": 2.796875,
      "learning_rate": 4.695086333792846e-05,
      "loss": 0.8884,
      "step": 132580
    },
    {
      "epoch": 0.4646948939287204,
      "grad_norm": 2.625,
      "learning_rate": 4.6950214309264755e-05,
      "loss": 0.859,
      "step": 132590
    },
    {
      "epoch": 0.46472994143561597,
      "grad_norm": 2.796875,
      "learning_rate": 4.6949565280601056e-05,
      "loss": 1.0617,
      "step": 132600
    },
    {
      "epoch": 0.46476498894251156,
      "grad_norm": 3.15625,
      "learning_rate": 4.694891625193735e-05,
      "loss": 0.8838,
      "step": 132610
    },
    {
      "epoch": 0.46480003644940715,
      "grad_norm": 2.875,
      "learning_rate": 4.694826722327365e-05,
      "loss": 1.054,
      "step": 132620
    },
    {
      "epoch": 0.46483508395630274,
      "grad_norm": 3.40625,
      "learning_rate": 4.6947618194609947e-05,
      "loss": 0.9114,
      "step": 132630
    },
    {
      "epoch": 0.4648701314631984,
      "grad_norm": 2.890625,
      "learning_rate": 4.694696916594625e-05,
      "loss": 0.9731,
      "step": 132640
    },
    {
      "epoch": 0.464905178970094,
      "grad_norm": 3.09375,
      "learning_rate": 4.694632013728254e-05,
      "loss": 0.9321,
      "step": 132650
    },
    {
      "epoch": 0.46494022647698957,
      "grad_norm": 2.96875,
      "learning_rate": 4.6945671108618844e-05,
      "loss": 0.8839,
      "step": 132660
    },
    {
      "epoch": 0.46497527398388516,
      "grad_norm": 2.78125,
      "learning_rate": 4.6945022079955145e-05,
      "loss": 0.9259,
      "step": 132670
    },
    {
      "epoch": 0.46501032149078075,
      "grad_norm": 3.375,
      "learning_rate": 4.694437305129144e-05,
      "loss": 0.9544,
      "step": 132680
    },
    {
      "epoch": 0.46504536899767634,
      "grad_norm": 3.53125,
      "learning_rate": 4.694372402262774e-05,
      "loss": 0.9466,
      "step": 132690
    },
    {
      "epoch": 0.46508041650457194,
      "grad_norm": 2.96875,
      "learning_rate": 4.6943074993964036e-05,
      "loss": 0.8544,
      "step": 132700
    },
    {
      "epoch": 0.4651154640114675,
      "grad_norm": 2.859375,
      "learning_rate": 4.694242596530034e-05,
      "loss": 0.8959,
      "step": 132710
    },
    {
      "epoch": 0.4651505115183631,
      "grad_norm": 2.765625,
      "learning_rate": 4.694177693663663e-05,
      "loss": 0.9184,
      "step": 132720
    },
    {
      "epoch": 0.46518555902525877,
      "grad_norm": 2.796875,
      "learning_rate": 4.694112790797293e-05,
      "loss": 0.883,
      "step": 132730
    },
    {
      "epoch": 0.46522060653215436,
      "grad_norm": 3.21875,
      "learning_rate": 4.694047887930923e-05,
      "loss": 0.8718,
      "step": 132740
    },
    {
      "epoch": 0.46525565403904995,
      "grad_norm": 3.125,
      "learning_rate": 4.693982985064553e-05,
      "loss": 0.9436,
      "step": 132750
    },
    {
      "epoch": 0.46529070154594554,
      "grad_norm": 2.921875,
      "learning_rate": 4.6939180821981824e-05,
      "loss": 0.9424,
      "step": 132760
    },
    {
      "epoch": 0.46532574905284113,
      "grad_norm": 3.203125,
      "learning_rate": 4.6938531793318125e-05,
      "loss": 0.9082,
      "step": 132770
    },
    {
      "epoch": 0.4653607965597367,
      "grad_norm": 3.21875,
      "learning_rate": 4.693788276465442e-05,
      "loss": 1.0638,
      "step": 132780
    },
    {
      "epoch": 0.4653958440666323,
      "grad_norm": 2.859375,
      "learning_rate": 4.693723373599072e-05,
      "loss": 1.0485,
      "step": 132790
    },
    {
      "epoch": 0.4654308915735279,
      "grad_norm": 3.1875,
      "learning_rate": 4.6936584707327016e-05,
      "loss": 0.9564,
      "step": 132800
    },
    {
      "epoch": 0.4654659390804235,
      "grad_norm": 3.375,
      "learning_rate": 4.693593567866331e-05,
      "loss": 0.9119,
      "step": 132810
    },
    {
      "epoch": 0.4655009865873191,
      "grad_norm": 3.0,
      "learning_rate": 4.693528664999961e-05,
      "loss": 0.8694,
      "step": 132820
    },
    {
      "epoch": 0.46553603409421473,
      "grad_norm": 3.328125,
      "learning_rate": 4.6934637621335907e-05,
      "loss": 0.9485,
      "step": 132830
    },
    {
      "epoch": 0.4655710816011103,
      "grad_norm": 3.453125,
      "learning_rate": 4.693398859267221e-05,
      "loss": 0.9352,
      "step": 132840
    },
    {
      "epoch": 0.4656061291080059,
      "grad_norm": 3.203125,
      "learning_rate": 4.69333395640085e-05,
      "loss": 0.9645,
      "step": 132850
    },
    {
      "epoch": 0.4656411766149015,
      "grad_norm": 2.84375,
      "learning_rate": 4.6932690535344804e-05,
      "loss": 0.9822,
      "step": 132860
    },
    {
      "epoch": 0.4656762241217971,
      "grad_norm": 2.875,
      "learning_rate": 4.69320415066811e-05,
      "loss": 0.892,
      "step": 132870
    },
    {
      "epoch": 0.4657112716286927,
      "grad_norm": 2.90625,
      "learning_rate": 4.69313924780174e-05,
      "loss": 0.968,
      "step": 132880
    },
    {
      "epoch": 0.4657463191355883,
      "grad_norm": 3.03125,
      "learning_rate": 4.69307434493537e-05,
      "loss": 0.877,
      "step": 132890
    },
    {
      "epoch": 0.4657813666424839,
      "grad_norm": 2.9375,
      "learning_rate": 4.6930094420689996e-05,
      "loss": 0.8922,
      "step": 132900
    },
    {
      "epoch": 0.46581641414937947,
      "grad_norm": 3.0625,
      "learning_rate": 4.69294453920263e-05,
      "loss": 0.8927,
      "step": 132910
    },
    {
      "epoch": 0.46585146165627506,
      "grad_norm": 2.765625,
      "learning_rate": 4.692879636336259e-05,
      "loss": 0.9434,
      "step": 132920
    },
    {
      "epoch": 0.4658865091631707,
      "grad_norm": 2.953125,
      "learning_rate": 4.692814733469889e-05,
      "loss": 0.9853,
      "step": 132930
    },
    {
      "epoch": 0.4659215566700663,
      "grad_norm": 3.671875,
      "learning_rate": 4.692749830603519e-05,
      "loss": 0.8764,
      "step": 132940
    },
    {
      "epoch": 0.4659566041769619,
      "grad_norm": 2.78125,
      "learning_rate": 4.692684927737149e-05,
      "loss": 0.94,
      "step": 132950
    },
    {
      "epoch": 0.4659916516838575,
      "grad_norm": 3.0625,
      "learning_rate": 4.6926200248707784e-05,
      "loss": 0.9489,
      "step": 132960
    },
    {
      "epoch": 0.46602669919075307,
      "grad_norm": 2.890625,
      "learning_rate": 4.6925551220044085e-05,
      "loss": 0.8973,
      "step": 132970
    },
    {
      "epoch": 0.46606174669764866,
      "grad_norm": 3.5625,
      "learning_rate": 4.692490219138038e-05,
      "loss": 0.9687,
      "step": 132980
    },
    {
      "epoch": 0.46609679420454425,
      "grad_norm": 3.046875,
      "learning_rate": 4.692425316271668e-05,
      "loss": 0.9741,
      "step": 132990
    },
    {
      "epoch": 0.46613184171143984,
      "grad_norm": 3.1875,
      "learning_rate": 4.6923604134052976e-05,
      "loss": 0.9847,
      "step": 133000
    },
    {
      "epoch": 0.46616688921833543,
      "grad_norm": 2.90625,
      "learning_rate": 4.692295510538928e-05,
      "loss": 0.9027,
      "step": 133010
    },
    {
      "epoch": 0.466201936725231,
      "grad_norm": 3.09375,
      "learning_rate": 4.692230607672557e-05,
      "loss": 0.9384,
      "step": 133020
    },
    {
      "epoch": 0.46623698423212667,
      "grad_norm": 3.015625,
      "learning_rate": 4.692165704806187e-05,
      "loss": 0.9608,
      "step": 133030
    },
    {
      "epoch": 0.46627203173902226,
      "grad_norm": 3.296875,
      "learning_rate": 4.6921008019398175e-05,
      "loss": 0.8926,
      "step": 133040
    },
    {
      "epoch": 0.46630707924591785,
      "grad_norm": 2.78125,
      "learning_rate": 4.692035899073447e-05,
      "loss": 0.9347,
      "step": 133050
    },
    {
      "epoch": 0.46634212675281345,
      "grad_norm": 3.0,
      "learning_rate": 4.691970996207077e-05,
      "loss": 0.9297,
      "step": 133060
    },
    {
      "epoch": 0.46637717425970904,
      "grad_norm": 2.9375,
      "learning_rate": 4.6919060933407065e-05,
      "loss": 0.9129,
      "step": 133070
    },
    {
      "epoch": 0.46641222176660463,
      "grad_norm": 3.3125,
      "learning_rate": 4.691841190474337e-05,
      "loss": 0.9038,
      "step": 133080
    },
    {
      "epoch": 0.4664472692735002,
      "grad_norm": 2.765625,
      "learning_rate": 4.691776287607966e-05,
      "loss": 0.9007,
      "step": 133090
    },
    {
      "epoch": 0.4664823167803958,
      "grad_norm": 2.5,
      "learning_rate": 4.691711384741596e-05,
      "loss": 0.8838,
      "step": 133100
    },
    {
      "epoch": 0.4665173642872914,
      "grad_norm": 3.015625,
      "learning_rate": 4.691646481875226e-05,
      "loss": 0.8907,
      "step": 133110
    },
    {
      "epoch": 0.466552411794187,
      "grad_norm": 3.25,
      "learning_rate": 4.691581579008856e-05,
      "loss": 1.0081,
      "step": 133120
    },
    {
      "epoch": 0.46658745930108264,
      "grad_norm": 3.640625,
      "learning_rate": 4.691516676142485e-05,
      "loss": 0.9971,
      "step": 133130
    },
    {
      "epoch": 0.46662250680797823,
      "grad_norm": 2.5,
      "learning_rate": 4.6914517732761155e-05,
      "loss": 0.8777,
      "step": 133140
    },
    {
      "epoch": 0.4666575543148738,
      "grad_norm": 3.15625,
      "learning_rate": 4.691386870409745e-05,
      "loss": 0.9954,
      "step": 133150
    },
    {
      "epoch": 0.4666926018217694,
      "grad_norm": 3.03125,
      "learning_rate": 4.691321967543375e-05,
      "loss": 0.9178,
      "step": 133160
    },
    {
      "epoch": 0.466727649328665,
      "grad_norm": 3.015625,
      "learning_rate": 4.691257064677005e-05,
      "loss": 0.9358,
      "step": 133170
    },
    {
      "epoch": 0.4667626968355606,
      "grad_norm": 3.234375,
      "learning_rate": 4.691192161810635e-05,
      "loss": 0.9723,
      "step": 133180
    },
    {
      "epoch": 0.4667977443424562,
      "grad_norm": 3.140625,
      "learning_rate": 4.691127258944264e-05,
      "loss": 0.9124,
      "step": 133190
    },
    {
      "epoch": 0.4668327918493518,
      "grad_norm": 3.484375,
      "learning_rate": 4.6910623560778936e-05,
      "loss": 0.904,
      "step": 133200
    },
    {
      "epoch": 0.46686783935624737,
      "grad_norm": 2.90625,
      "learning_rate": 4.690997453211524e-05,
      "loss": 0.9751,
      "step": 133210
    },
    {
      "epoch": 0.46690288686314296,
      "grad_norm": 3.3125,
      "learning_rate": 4.690932550345153e-05,
      "loss": 0.9372,
      "step": 133220
    },
    {
      "epoch": 0.4669379343700386,
      "grad_norm": 3.25,
      "learning_rate": 4.690867647478783e-05,
      "loss": 0.9533,
      "step": 133230
    },
    {
      "epoch": 0.4669729818769342,
      "grad_norm": 3.09375,
      "learning_rate": 4.690802744612413e-05,
      "loss": 0.8968,
      "step": 133240
    },
    {
      "epoch": 0.4670080293838298,
      "grad_norm": 3.140625,
      "learning_rate": 4.690737841746043e-05,
      "loss": 1.0425,
      "step": 133250
    },
    {
      "epoch": 0.4670430768907254,
      "grad_norm": 2.8125,
      "learning_rate": 4.690672938879673e-05,
      "loss": 0.8984,
      "step": 133260
    },
    {
      "epoch": 0.467078124397621,
      "grad_norm": 2.765625,
      "learning_rate": 4.6906080360133025e-05,
      "loss": 0.9446,
      "step": 133270
    },
    {
      "epoch": 0.46711317190451657,
      "grad_norm": 2.578125,
      "learning_rate": 4.690543133146933e-05,
      "loss": 0.8668,
      "step": 133280
    },
    {
      "epoch": 0.46714821941141216,
      "grad_norm": 2.75,
      "learning_rate": 4.690478230280562e-05,
      "loss": 0.8931,
      "step": 133290
    },
    {
      "epoch": 0.46718326691830775,
      "grad_norm": 2.984375,
      "learning_rate": 4.690413327414192e-05,
      "loss": 0.8628,
      "step": 133300
    },
    {
      "epoch": 0.46721831442520334,
      "grad_norm": 2.96875,
      "learning_rate": 4.690348424547822e-05,
      "loss": 0.9076,
      "step": 133310
    },
    {
      "epoch": 0.467253361932099,
      "grad_norm": 2.84375,
      "learning_rate": 4.690283521681452e-05,
      "loss": 0.9652,
      "step": 133320
    },
    {
      "epoch": 0.4672884094389946,
      "grad_norm": 3.515625,
      "learning_rate": 4.690218618815081e-05,
      "loss": 0.9878,
      "step": 133330
    },
    {
      "epoch": 0.46732345694589017,
      "grad_norm": 3.515625,
      "learning_rate": 4.6901537159487115e-05,
      "loss": 1.0021,
      "step": 133340
    },
    {
      "epoch": 0.46735850445278576,
      "grad_norm": 3.28125,
      "learning_rate": 4.690088813082341e-05,
      "loss": 0.9231,
      "step": 133350
    },
    {
      "epoch": 0.46739355195968135,
      "grad_norm": 3.0,
      "learning_rate": 4.690023910215971e-05,
      "loss": 1.0007,
      "step": 133360
    },
    {
      "epoch": 0.46742859946657694,
      "grad_norm": 3.375,
      "learning_rate": 4.6899590073496005e-05,
      "loss": 0.9185,
      "step": 133370
    },
    {
      "epoch": 0.46746364697347254,
      "grad_norm": 2.828125,
      "learning_rate": 4.689894104483231e-05,
      "loss": 0.9772,
      "step": 133380
    },
    {
      "epoch": 0.4674986944803681,
      "grad_norm": 3.015625,
      "learning_rate": 4.68982920161686e-05,
      "loss": 1.0589,
      "step": 133390
    },
    {
      "epoch": 0.4675337419872637,
      "grad_norm": 2.546875,
      "learning_rate": 4.68976429875049e-05,
      "loss": 0.8791,
      "step": 133400
    },
    {
      "epoch": 0.4675687894941593,
      "grad_norm": 2.5,
      "learning_rate": 4.6896993958841204e-05,
      "loss": 0.8514,
      "step": 133410
    },
    {
      "epoch": 0.46760383700105496,
      "grad_norm": 2.875,
      "learning_rate": 4.68963449301775e-05,
      "loss": 0.9333,
      "step": 133420
    },
    {
      "epoch": 0.46763888450795055,
      "grad_norm": 3.484375,
      "learning_rate": 4.68956959015138e-05,
      "loss": 0.9063,
      "step": 133430
    },
    {
      "epoch": 0.46767393201484614,
      "grad_norm": 3.21875,
      "learning_rate": 4.6895046872850095e-05,
      "loss": 0.9416,
      "step": 133440
    },
    {
      "epoch": 0.46770897952174173,
      "grad_norm": 3.28125,
      "learning_rate": 4.6894397844186396e-05,
      "loss": 0.8651,
      "step": 133450
    },
    {
      "epoch": 0.4677440270286373,
      "grad_norm": 3.265625,
      "learning_rate": 4.689374881552269e-05,
      "loss": 0.943,
      "step": 133460
    },
    {
      "epoch": 0.4677790745355329,
      "grad_norm": 2.921875,
      "learning_rate": 4.689309978685899e-05,
      "loss": 0.9498,
      "step": 133470
    },
    {
      "epoch": 0.4678141220424285,
      "grad_norm": 2.875,
      "learning_rate": 4.689245075819529e-05,
      "loss": 0.9557,
      "step": 133480
    },
    {
      "epoch": 0.4678491695493241,
      "grad_norm": 3.0625,
      "learning_rate": 4.689180172953159e-05,
      "loss": 0.8628,
      "step": 133490
    },
    {
      "epoch": 0.4678842170562197,
      "grad_norm": 3.28125,
      "learning_rate": 4.689115270086788e-05,
      "loss": 0.9495,
      "step": 133500
    },
    {
      "epoch": 0.4679192645631153,
      "grad_norm": 3.0,
      "learning_rate": 4.6890503672204184e-05,
      "loss": 0.9096,
      "step": 133510
    },
    {
      "epoch": 0.4679543120700109,
      "grad_norm": 2.890625,
      "learning_rate": 4.688985464354048e-05,
      "loss": 1.0027,
      "step": 133520
    },
    {
      "epoch": 0.4679893595769065,
      "grad_norm": 3.359375,
      "learning_rate": 4.688920561487678e-05,
      "loss": 0.8622,
      "step": 133530
    },
    {
      "epoch": 0.4680244070838021,
      "grad_norm": 3.09375,
      "learning_rate": 4.688855658621308e-05,
      "loss": 0.9694,
      "step": 133540
    },
    {
      "epoch": 0.4680594545906977,
      "grad_norm": 2.734375,
      "learning_rate": 4.6887907557549376e-05,
      "loss": 0.9426,
      "step": 133550
    },
    {
      "epoch": 0.4680945020975933,
      "grad_norm": 2.734375,
      "learning_rate": 4.688725852888568e-05,
      "loss": 0.8943,
      "step": 133560
    },
    {
      "epoch": 0.4681295496044889,
      "grad_norm": 2.796875,
      "learning_rate": 4.6886609500221965e-05,
      "loss": 0.9424,
      "step": 133570
    },
    {
      "epoch": 0.4681645971113845,
      "grad_norm": 3.203125,
      "learning_rate": 4.688596047155827e-05,
      "loss": 0.9032,
      "step": 133580
    },
    {
      "epoch": 0.46819964461828006,
      "grad_norm": 2.765625,
      "learning_rate": 4.688531144289456e-05,
      "loss": 0.8947,
      "step": 133590
    },
    {
      "epoch": 0.46823469212517566,
      "grad_norm": 2.90625,
      "learning_rate": 4.688466241423086e-05,
      "loss": 0.9556,
      "step": 133600
    },
    {
      "epoch": 0.46826973963207125,
      "grad_norm": 2.546875,
      "learning_rate": 4.688401338556716e-05,
      "loss": 0.9098,
      "step": 133610
    },
    {
      "epoch": 0.4683047871389669,
      "grad_norm": 3.140625,
      "learning_rate": 4.688336435690346e-05,
      "loss": 0.9091,
      "step": 133620
    },
    {
      "epoch": 0.4683398346458625,
      "grad_norm": 2.890625,
      "learning_rate": 4.688271532823976e-05,
      "loss": 0.8605,
      "step": 133630
    },
    {
      "epoch": 0.4683748821527581,
      "grad_norm": 3.203125,
      "learning_rate": 4.6882066299576055e-05,
      "loss": 0.8819,
      "step": 133640
    },
    {
      "epoch": 0.46840992965965367,
      "grad_norm": 3.4375,
      "learning_rate": 4.6881417270912356e-05,
      "loss": 0.9779,
      "step": 133650
    },
    {
      "epoch": 0.46844497716654926,
      "grad_norm": 3.21875,
      "learning_rate": 4.688076824224865e-05,
      "loss": 0.9917,
      "step": 133660
    },
    {
      "epoch": 0.46848002467344485,
      "grad_norm": 2.828125,
      "learning_rate": 4.688011921358495e-05,
      "loss": 0.8965,
      "step": 133670
    },
    {
      "epoch": 0.46851507218034044,
      "grad_norm": 2.8125,
      "learning_rate": 4.687947018492125e-05,
      "loss": 0.8761,
      "step": 133680
    },
    {
      "epoch": 0.46855011968723603,
      "grad_norm": 3.109375,
      "learning_rate": 4.687882115625755e-05,
      "loss": 0.9302,
      "step": 133690
    },
    {
      "epoch": 0.4685851671941316,
      "grad_norm": 3.0,
      "learning_rate": 4.687817212759384e-05,
      "loss": 0.8893,
      "step": 133700
    },
    {
      "epoch": 0.4686202147010272,
      "grad_norm": 2.828125,
      "learning_rate": 4.6877523098930144e-05,
      "loss": 0.9445,
      "step": 133710
    },
    {
      "epoch": 0.46865526220792286,
      "grad_norm": 3.171875,
      "learning_rate": 4.687687407026644e-05,
      "loss": 0.9975,
      "step": 133720
    },
    {
      "epoch": 0.46869030971481845,
      "grad_norm": 3.890625,
      "learning_rate": 4.687622504160274e-05,
      "loss": 0.9301,
      "step": 133730
    },
    {
      "epoch": 0.46872535722171405,
      "grad_norm": 3.125,
      "learning_rate": 4.6875576012939035e-05,
      "loss": 0.9188,
      "step": 133740
    },
    {
      "epoch": 0.46876040472860964,
      "grad_norm": 3.125,
      "learning_rate": 4.6874926984275336e-05,
      "loss": 0.9764,
      "step": 133750
    },
    {
      "epoch": 0.46879545223550523,
      "grad_norm": 2.890625,
      "learning_rate": 4.687427795561163e-05,
      "loss": 0.921,
      "step": 133760
    },
    {
      "epoch": 0.4688304997424008,
      "grad_norm": 3.53125,
      "learning_rate": 4.687362892694793e-05,
      "loss": 0.9463,
      "step": 133770
    },
    {
      "epoch": 0.4688655472492964,
      "grad_norm": 3.0,
      "learning_rate": 4.6872979898284233e-05,
      "loss": 0.9395,
      "step": 133780
    },
    {
      "epoch": 0.468900594756192,
      "grad_norm": 3.25,
      "learning_rate": 4.687233086962053e-05,
      "loss": 0.8688,
      "step": 133790
    },
    {
      "epoch": 0.4689356422630876,
      "grad_norm": 3.40625,
      "learning_rate": 4.687168184095683e-05,
      "loss": 0.9538,
      "step": 133800
    },
    {
      "epoch": 0.46897068976998324,
      "grad_norm": 3.375,
      "learning_rate": 4.6871032812293124e-05,
      "loss": 0.8783,
      "step": 133810
    },
    {
      "epoch": 0.46900573727687883,
      "grad_norm": 3.03125,
      "learning_rate": 4.6870383783629425e-05,
      "loss": 0.9666,
      "step": 133820
    },
    {
      "epoch": 0.4690407847837744,
      "grad_norm": 3.09375,
      "learning_rate": 4.686973475496572e-05,
      "loss": 1.0506,
      "step": 133830
    },
    {
      "epoch": 0.46907583229067,
      "grad_norm": 3.34375,
      "learning_rate": 4.686908572630202e-05,
      "loss": 0.9828,
      "step": 133840
    },
    {
      "epoch": 0.4691108797975656,
      "grad_norm": 3.3125,
      "learning_rate": 4.6868436697638316e-05,
      "loss": 0.9447,
      "step": 133850
    },
    {
      "epoch": 0.4691459273044612,
      "grad_norm": 3.140625,
      "learning_rate": 4.686778766897462e-05,
      "loss": 0.99,
      "step": 133860
    },
    {
      "epoch": 0.4691809748113568,
      "grad_norm": 3.234375,
      "learning_rate": 4.686713864031091e-05,
      "loss": 0.9413,
      "step": 133870
    },
    {
      "epoch": 0.4692160223182524,
      "grad_norm": 3.03125,
      "learning_rate": 4.6866489611647213e-05,
      "loss": 0.8701,
      "step": 133880
    },
    {
      "epoch": 0.46925106982514797,
      "grad_norm": 2.9375,
      "learning_rate": 4.686584058298351e-05,
      "loss": 0.9418,
      "step": 133890
    },
    {
      "epoch": 0.46928611733204356,
      "grad_norm": 2.71875,
      "learning_rate": 4.686519155431981e-05,
      "loss": 0.9101,
      "step": 133900
    },
    {
      "epoch": 0.4693211648389392,
      "grad_norm": 3.078125,
      "learning_rate": 4.686454252565611e-05,
      "loss": 0.9816,
      "step": 133910
    },
    {
      "epoch": 0.4693562123458348,
      "grad_norm": 3.359375,
      "learning_rate": 4.6863893496992405e-05,
      "loss": 0.9389,
      "step": 133920
    },
    {
      "epoch": 0.4693912598527304,
      "grad_norm": 3.21875,
      "learning_rate": 4.686324446832871e-05,
      "loss": 0.9676,
      "step": 133930
    },
    {
      "epoch": 0.469426307359626,
      "grad_norm": 2.921875,
      "learning_rate": 4.6862595439664995e-05,
      "loss": 0.9723,
      "step": 133940
    },
    {
      "epoch": 0.4694613548665216,
      "grad_norm": 2.8125,
      "learning_rate": 4.6861946411001296e-05,
      "loss": 0.9401,
      "step": 133950
    },
    {
      "epoch": 0.46949640237341717,
      "grad_norm": 2.6875,
      "learning_rate": 4.686129738233759e-05,
      "loss": 0.9709,
      "step": 133960
    },
    {
      "epoch": 0.46953144988031276,
      "grad_norm": 2.890625,
      "learning_rate": 4.686064835367389e-05,
      "loss": 0.9689,
      "step": 133970
    },
    {
      "epoch": 0.46956649738720835,
      "grad_norm": 3.3125,
      "learning_rate": 4.685999932501019e-05,
      "loss": 0.9163,
      "step": 133980
    },
    {
      "epoch": 0.46960154489410394,
      "grad_norm": 2.890625,
      "learning_rate": 4.685935029634649e-05,
      "loss": 0.8921,
      "step": 133990
    },
    {
      "epoch": 0.46963659240099953,
      "grad_norm": 2.75,
      "learning_rate": 4.685870126768279e-05,
      "loss": 0.8129,
      "step": 134000
    },
    {
      "epoch": 0.4696716399078952,
      "grad_norm": 2.75,
      "learning_rate": 4.6858052239019084e-05,
      "loss": 0.9591,
      "step": 134010
    },
    {
      "epoch": 0.46970668741479077,
      "grad_norm": 2.90625,
      "learning_rate": 4.6857403210355385e-05,
      "loss": 0.9262,
      "step": 134020
    },
    {
      "epoch": 0.46974173492168636,
      "grad_norm": 3.203125,
      "learning_rate": 4.685675418169168e-05,
      "loss": 0.9182,
      "step": 134030
    },
    {
      "epoch": 0.46977678242858195,
      "grad_norm": 2.734375,
      "learning_rate": 4.685610515302798e-05,
      "loss": 0.9067,
      "step": 134040
    },
    {
      "epoch": 0.46981182993547754,
      "grad_norm": 3.5625,
      "learning_rate": 4.6855456124364276e-05,
      "loss": 0.9595,
      "step": 134050
    },
    {
      "epoch": 0.46984687744237313,
      "grad_norm": 2.671875,
      "learning_rate": 4.685480709570058e-05,
      "loss": 0.909,
      "step": 134060
    },
    {
      "epoch": 0.4698819249492687,
      "grad_norm": 2.359375,
      "learning_rate": 4.685415806703687e-05,
      "loss": 0.8558,
      "step": 134070
    },
    {
      "epoch": 0.4699169724561643,
      "grad_norm": 3.125,
      "learning_rate": 4.6853509038373173e-05,
      "loss": 0.966,
      "step": 134080
    },
    {
      "epoch": 0.4699520199630599,
      "grad_norm": 3.15625,
      "learning_rate": 4.685286000970947e-05,
      "loss": 0.8998,
      "step": 134090
    },
    {
      "epoch": 0.4699870674699555,
      "grad_norm": 2.9375,
      "learning_rate": 4.685221098104577e-05,
      "loss": 0.8629,
      "step": 134100
    },
    {
      "epoch": 0.47002211497685115,
      "grad_norm": 2.5,
      "learning_rate": 4.6851561952382064e-05,
      "loss": 0.9175,
      "step": 134110
    },
    {
      "epoch": 0.47005716248374674,
      "grad_norm": 3.0,
      "learning_rate": 4.6850912923718365e-05,
      "loss": 0.921,
      "step": 134120
    },
    {
      "epoch": 0.47009220999064233,
      "grad_norm": 2.84375,
      "learning_rate": 4.685026389505467e-05,
      "loss": 0.9124,
      "step": 134130
    },
    {
      "epoch": 0.4701272574975379,
      "grad_norm": 2.828125,
      "learning_rate": 4.684961486639096e-05,
      "loss": 0.876,
      "step": 134140
    },
    {
      "epoch": 0.4701623050044335,
      "grad_norm": 2.875,
      "learning_rate": 4.684896583772726e-05,
      "loss": 0.9071,
      "step": 134150
    },
    {
      "epoch": 0.4701973525113291,
      "grad_norm": 3.15625,
      "learning_rate": 4.684831680906356e-05,
      "loss": 0.9877,
      "step": 134160
    },
    {
      "epoch": 0.4702324000182247,
      "grad_norm": 2.765625,
      "learning_rate": 4.684766778039986e-05,
      "loss": 0.8869,
      "step": 134170
    },
    {
      "epoch": 0.4702674475251203,
      "grad_norm": 2.96875,
      "learning_rate": 4.6847018751736153e-05,
      "loss": 0.9847,
      "step": 134180
    },
    {
      "epoch": 0.4703024950320159,
      "grad_norm": 3.078125,
      "learning_rate": 4.6846369723072455e-05,
      "loss": 1.0087,
      "step": 134190
    },
    {
      "epoch": 0.47033754253891147,
      "grad_norm": 3.140625,
      "learning_rate": 4.684572069440875e-05,
      "loss": 0.9991,
      "step": 134200
    },
    {
      "epoch": 0.4703725900458071,
      "grad_norm": 2.859375,
      "learning_rate": 4.684507166574505e-05,
      "loss": 1.0245,
      "step": 134210
    },
    {
      "epoch": 0.4704076375527027,
      "grad_norm": 3.0,
      "learning_rate": 4.6844422637081345e-05,
      "loss": 0.9496,
      "step": 134220
    },
    {
      "epoch": 0.4704426850595983,
      "grad_norm": 3.03125,
      "learning_rate": 4.684377360841765e-05,
      "loss": 0.9297,
      "step": 134230
    },
    {
      "epoch": 0.4704777325664939,
      "grad_norm": 3.15625,
      "learning_rate": 4.684312457975394e-05,
      "loss": 0.9276,
      "step": 134240
    },
    {
      "epoch": 0.4705127800733895,
      "grad_norm": 3.234375,
      "learning_rate": 4.684247555109024e-05,
      "loss": 0.902,
      "step": 134250
    },
    {
      "epoch": 0.47054782758028507,
      "grad_norm": 3.0,
      "learning_rate": 4.684182652242654e-05,
      "loss": 0.9957,
      "step": 134260
    },
    {
      "epoch": 0.47058287508718066,
      "grad_norm": 2.890625,
      "learning_rate": 4.684117749376284e-05,
      "loss": 0.9514,
      "step": 134270
    },
    {
      "epoch": 0.47061792259407625,
      "grad_norm": 3.90625,
      "learning_rate": 4.684052846509914e-05,
      "loss": 0.9479,
      "step": 134280
    },
    {
      "epoch": 0.47065297010097185,
      "grad_norm": 3.140625,
      "learning_rate": 4.6839879436435435e-05,
      "loss": 0.931,
      "step": 134290
    },
    {
      "epoch": 0.47068801760786744,
      "grad_norm": 2.890625,
      "learning_rate": 4.6839230407771736e-05,
      "loss": 0.9232,
      "step": 134300
    },
    {
      "epoch": 0.4707230651147631,
      "grad_norm": 2.890625,
      "learning_rate": 4.683858137910803e-05,
      "loss": 0.9005,
      "step": 134310
    },
    {
      "epoch": 0.4707581126216587,
      "grad_norm": 3.03125,
      "learning_rate": 4.6837932350444325e-05,
      "loss": 0.9944,
      "step": 134320
    },
    {
      "epoch": 0.47079316012855427,
      "grad_norm": 2.96875,
      "learning_rate": 4.683728332178062e-05,
      "loss": 0.8966,
      "step": 134330
    },
    {
      "epoch": 0.47082820763544986,
      "grad_norm": 2.9375,
      "learning_rate": 4.683663429311692e-05,
      "loss": 0.9199,
      "step": 134340
    },
    {
      "epoch": 0.47086325514234545,
      "grad_norm": 3.078125,
      "learning_rate": 4.6835985264453216e-05,
      "loss": 0.9216,
      "step": 134350
    },
    {
      "epoch": 0.47089830264924104,
      "grad_norm": 3.34375,
      "learning_rate": 4.683533623578952e-05,
      "loss": 1.01,
      "step": 134360
    },
    {
      "epoch": 0.47093335015613663,
      "grad_norm": 2.671875,
      "learning_rate": 4.683468720712582e-05,
      "loss": 0.9653,
      "step": 134370
    },
    {
      "epoch": 0.4709683976630322,
      "grad_norm": 3.203125,
      "learning_rate": 4.6834038178462113e-05,
      "loss": 0.8958,
      "step": 134380
    },
    {
      "epoch": 0.4710034451699278,
      "grad_norm": 2.65625,
      "learning_rate": 4.6833389149798415e-05,
      "loss": 0.8834,
      "step": 134390
    },
    {
      "epoch": 0.47103849267682346,
      "grad_norm": 2.640625,
      "learning_rate": 4.683274012113471e-05,
      "loss": 0.8835,
      "step": 134400
    },
    {
      "epoch": 0.47107354018371905,
      "grad_norm": 3.015625,
      "learning_rate": 4.683209109247101e-05,
      "loss": 0.9089,
      "step": 134410
    },
    {
      "epoch": 0.47110858769061464,
      "grad_norm": 3.234375,
      "learning_rate": 4.6831442063807305e-05,
      "loss": 1.0049,
      "step": 134420
    },
    {
      "epoch": 0.47114363519751024,
      "grad_norm": 3.203125,
      "learning_rate": 4.683079303514361e-05,
      "loss": 0.9593,
      "step": 134430
    },
    {
      "epoch": 0.4711786827044058,
      "grad_norm": 3.375,
      "learning_rate": 4.68301440064799e-05,
      "loss": 0.9595,
      "step": 134440
    },
    {
      "epoch": 0.4712137302113014,
      "grad_norm": 2.96875,
      "learning_rate": 4.68294949778162e-05,
      "loss": 0.9094,
      "step": 134450
    },
    {
      "epoch": 0.471248777718197,
      "grad_norm": 3.015625,
      "learning_rate": 4.68288459491525e-05,
      "loss": 0.9351,
      "step": 134460
    },
    {
      "epoch": 0.4712838252250926,
      "grad_norm": 3.328125,
      "learning_rate": 4.68281969204888e-05,
      "loss": 1.0499,
      "step": 134470
    },
    {
      "epoch": 0.4713188727319882,
      "grad_norm": 3.328125,
      "learning_rate": 4.6827547891825093e-05,
      "loss": 0.8963,
      "step": 134480
    },
    {
      "epoch": 0.4713539202388838,
      "grad_norm": 4.3125,
      "learning_rate": 4.6826898863161395e-05,
      "loss": 1.024,
      "step": 134490
    },
    {
      "epoch": 0.47138896774577943,
      "grad_norm": 2.703125,
      "learning_rate": 4.6826249834497696e-05,
      "loss": 0.9309,
      "step": 134500
    },
    {
      "epoch": 0.471424015252675,
      "grad_norm": 3.078125,
      "learning_rate": 4.682560080583399e-05,
      "loss": 0.9752,
      "step": 134510
    },
    {
      "epoch": 0.4714590627595706,
      "grad_norm": 3.21875,
      "learning_rate": 4.682495177717029e-05,
      "loss": 1.0373,
      "step": 134520
    },
    {
      "epoch": 0.4714941102664662,
      "grad_norm": 2.703125,
      "learning_rate": 4.682430274850659e-05,
      "loss": 0.944,
      "step": 134530
    },
    {
      "epoch": 0.4715291577733618,
      "grad_norm": 2.75,
      "learning_rate": 4.682365371984289e-05,
      "loss": 0.9238,
      "step": 134540
    },
    {
      "epoch": 0.4715642052802574,
      "grad_norm": 3.03125,
      "learning_rate": 4.682300469117918e-05,
      "loss": 0.9342,
      "step": 134550
    },
    {
      "epoch": 0.471599252787153,
      "grad_norm": 2.953125,
      "learning_rate": 4.6822355662515484e-05,
      "loss": 0.9617,
      "step": 134560
    },
    {
      "epoch": 0.47163430029404857,
      "grad_norm": 2.953125,
      "learning_rate": 4.682170663385178e-05,
      "loss": 0.9494,
      "step": 134570
    },
    {
      "epoch": 0.47166934780094416,
      "grad_norm": 3.1875,
      "learning_rate": 4.682105760518808e-05,
      "loss": 0.9446,
      "step": 134580
    },
    {
      "epoch": 0.47170439530783975,
      "grad_norm": 2.5625,
      "learning_rate": 4.6820408576524375e-05,
      "loss": 0.9158,
      "step": 134590
    },
    {
      "epoch": 0.4717394428147354,
      "grad_norm": 2.984375,
      "learning_rate": 4.6819759547860676e-05,
      "loss": 0.9134,
      "step": 134600
    },
    {
      "epoch": 0.471774490321631,
      "grad_norm": 3.125,
      "learning_rate": 4.681911051919697e-05,
      "loss": 0.9159,
      "step": 134610
    },
    {
      "epoch": 0.4718095378285266,
      "grad_norm": 2.90625,
      "learning_rate": 4.681846149053327e-05,
      "loss": 0.9099,
      "step": 134620
    },
    {
      "epoch": 0.4718445853354222,
      "grad_norm": 2.75,
      "learning_rate": 4.681781246186957e-05,
      "loss": 0.9634,
      "step": 134630
    },
    {
      "epoch": 0.47187963284231776,
      "grad_norm": 2.453125,
      "learning_rate": 4.681716343320587e-05,
      "loss": 0.8011,
      "step": 134640
    },
    {
      "epoch": 0.47191468034921336,
      "grad_norm": 2.75,
      "learning_rate": 4.681651440454217e-05,
      "loss": 0.9514,
      "step": 134650
    },
    {
      "epoch": 0.47194972785610895,
      "grad_norm": 3.296875,
      "learning_rate": 4.6815865375878464e-05,
      "loss": 0.9673,
      "step": 134660
    },
    {
      "epoch": 0.47198477536300454,
      "grad_norm": 3.03125,
      "learning_rate": 4.6815216347214766e-05,
      "loss": 0.9991,
      "step": 134670
    },
    {
      "epoch": 0.47201982286990013,
      "grad_norm": 3.09375,
      "learning_rate": 4.681456731855106e-05,
      "loss": 1.0107,
      "step": 134680
    },
    {
      "epoch": 0.4720548703767957,
      "grad_norm": 2.71875,
      "learning_rate": 4.6813918289887355e-05,
      "loss": 0.9528,
      "step": 134690
    },
    {
      "epoch": 0.47208991788369137,
      "grad_norm": 2.890625,
      "learning_rate": 4.681326926122365e-05,
      "loss": 1.0394,
      "step": 134700
    },
    {
      "epoch": 0.47212496539058696,
      "grad_norm": 3.5,
      "learning_rate": 4.681262023255995e-05,
      "loss": 0.8548,
      "step": 134710
    },
    {
      "epoch": 0.47216001289748255,
      "grad_norm": 2.84375,
      "learning_rate": 4.6811971203896245e-05,
      "loss": 0.9253,
      "step": 134720
    },
    {
      "epoch": 0.47219506040437814,
      "grad_norm": 3.328125,
      "learning_rate": 4.681132217523255e-05,
      "loss": 0.943,
      "step": 134730
    },
    {
      "epoch": 0.47223010791127373,
      "grad_norm": 2.796875,
      "learning_rate": 4.681067314656885e-05,
      "loss": 0.9433,
      "step": 134740
    },
    {
      "epoch": 0.4722651554181693,
      "grad_norm": 2.875,
      "learning_rate": 4.681002411790514e-05,
      "loss": 0.9934,
      "step": 134750
    },
    {
      "epoch": 0.4723002029250649,
      "grad_norm": 3.09375,
      "learning_rate": 4.6809375089241444e-05,
      "loss": 0.9489,
      "step": 134760
    },
    {
      "epoch": 0.4723352504319605,
      "grad_norm": 3.203125,
      "learning_rate": 4.680872606057774e-05,
      "loss": 0.8681,
      "step": 134770
    },
    {
      "epoch": 0.4723702979388561,
      "grad_norm": 2.890625,
      "learning_rate": 4.680807703191404e-05,
      "loss": 0.9342,
      "step": 134780
    },
    {
      "epoch": 0.4724053454457517,
      "grad_norm": 2.46875,
      "learning_rate": 4.6807428003250335e-05,
      "loss": 0.9173,
      "step": 134790
    },
    {
      "epoch": 0.47244039295264734,
      "grad_norm": 3.140625,
      "learning_rate": 4.6806778974586636e-05,
      "loss": 0.9405,
      "step": 134800
    },
    {
      "epoch": 0.47247544045954293,
      "grad_norm": 3.53125,
      "learning_rate": 4.680612994592293e-05,
      "loss": 0.9042,
      "step": 134810
    },
    {
      "epoch": 0.4725104879664385,
      "grad_norm": 2.96875,
      "learning_rate": 4.680548091725923e-05,
      "loss": 0.8174,
      "step": 134820
    },
    {
      "epoch": 0.4725455354733341,
      "grad_norm": 3.09375,
      "learning_rate": 4.680483188859553e-05,
      "loss": 0.8779,
      "step": 134830
    },
    {
      "epoch": 0.4725805829802297,
      "grad_norm": 3.296875,
      "learning_rate": 4.680418285993183e-05,
      "loss": 0.9226,
      "step": 134840
    },
    {
      "epoch": 0.4726156304871253,
      "grad_norm": 3.28125,
      "learning_rate": 4.680353383126812e-05,
      "loss": 0.8894,
      "step": 134850
    },
    {
      "epoch": 0.4726506779940209,
      "grad_norm": 3.046875,
      "learning_rate": 4.6802884802604424e-05,
      "loss": 0.9616,
      "step": 134860
    },
    {
      "epoch": 0.4726857255009165,
      "grad_norm": 3.03125,
      "learning_rate": 4.6802235773940726e-05,
      "loss": 0.9601,
      "step": 134870
    },
    {
      "epoch": 0.47272077300781207,
      "grad_norm": 3.078125,
      "learning_rate": 4.680158674527702e-05,
      "loss": 0.9016,
      "step": 134880
    },
    {
      "epoch": 0.47275582051470766,
      "grad_norm": 2.9375,
      "learning_rate": 4.680093771661332e-05,
      "loss": 0.9185,
      "step": 134890
    },
    {
      "epoch": 0.4727908680216033,
      "grad_norm": 3.03125,
      "learning_rate": 4.6800288687949616e-05,
      "loss": 0.8953,
      "step": 134900
    },
    {
      "epoch": 0.4728259155284989,
      "grad_norm": 3.28125,
      "learning_rate": 4.679963965928592e-05,
      "loss": 0.8857,
      "step": 134910
    },
    {
      "epoch": 0.4728609630353945,
      "grad_norm": 3.015625,
      "learning_rate": 4.679899063062221e-05,
      "loss": 0.8551,
      "step": 134920
    },
    {
      "epoch": 0.4728960105422901,
      "grad_norm": 2.75,
      "learning_rate": 4.6798341601958514e-05,
      "loss": 0.8941,
      "step": 134930
    },
    {
      "epoch": 0.47293105804918567,
      "grad_norm": 3.234375,
      "learning_rate": 4.679769257329481e-05,
      "loss": 0.996,
      "step": 134940
    },
    {
      "epoch": 0.47296610555608126,
      "grad_norm": 3.40625,
      "learning_rate": 4.679704354463111e-05,
      "loss": 1.0071,
      "step": 134950
    },
    {
      "epoch": 0.47300115306297685,
      "grad_norm": 2.59375,
      "learning_rate": 4.6796394515967404e-05,
      "loss": 0.9425,
      "step": 134960
    },
    {
      "epoch": 0.47303620056987244,
      "grad_norm": 3.171875,
      "learning_rate": 4.6795745487303706e-05,
      "loss": 0.9191,
      "step": 134970
    },
    {
      "epoch": 0.47307124807676804,
      "grad_norm": 3.140625,
      "learning_rate": 4.679509645864e-05,
      "loss": 0.9566,
      "step": 134980
    },
    {
      "epoch": 0.4731062955836637,
      "grad_norm": 2.84375,
      "learning_rate": 4.67944474299763e-05,
      "loss": 0.9523,
      "step": 134990
    },
    {
      "epoch": 0.4731413430905593,
      "grad_norm": 2.671875,
      "learning_rate": 4.67937984013126e-05,
      "loss": 0.9006,
      "step": 135000
    },
    {
      "epoch": 0.4731413430905593,
      "eval_loss": 0.8760254979133606,
      "eval_runtime": 552.6822,
      "eval_samples_per_second": 688.345,
      "eval_steps_per_second": 57.362,
      "step": 135000
    },
    {
      "epoch": 0.47317639059745487,
      "grad_norm": 3.015625,
      "learning_rate": 4.67931493726489e-05,
      "loss": 1.0102,
      "step": 135010
    },
    {
      "epoch": 0.47321143810435046,
      "grad_norm": 3.125,
      "learning_rate": 4.67925003439852e-05,
      "loss": 1.0148,
      "step": 135020
    },
    {
      "epoch": 0.47324648561124605,
      "grad_norm": 2.734375,
      "learning_rate": 4.6791851315321494e-05,
      "loss": 0.956,
      "step": 135030
    },
    {
      "epoch": 0.47328153311814164,
      "grad_norm": 3.171875,
      "learning_rate": 4.6791202286657795e-05,
      "loss": 0.9569,
      "step": 135040
    },
    {
      "epoch": 0.47331658062503723,
      "grad_norm": 2.9375,
      "learning_rate": 4.679055325799409e-05,
      "loss": 0.9644,
      "step": 135050
    },
    {
      "epoch": 0.4733516281319328,
      "grad_norm": 2.875,
      "learning_rate": 4.678990422933039e-05,
      "loss": 0.9386,
      "step": 135060
    },
    {
      "epoch": 0.4733866756388284,
      "grad_norm": 3.28125,
      "learning_rate": 4.678925520066668e-05,
      "loss": 0.9639,
      "step": 135070
    },
    {
      "epoch": 0.473421723145724,
      "grad_norm": 3.78125,
      "learning_rate": 4.678860617200298e-05,
      "loss": 0.9432,
      "step": 135080
    },
    {
      "epoch": 0.47345677065261965,
      "grad_norm": 2.6875,
      "learning_rate": 4.678795714333928e-05,
      "loss": 0.8937,
      "step": 135090
    },
    {
      "epoch": 0.47349181815951524,
      "grad_norm": 2.96875,
      "learning_rate": 4.6787308114675576e-05,
      "loss": 0.9757,
      "step": 135100
    },
    {
      "epoch": 0.47352686566641083,
      "grad_norm": 2.84375,
      "learning_rate": 4.678665908601188e-05,
      "loss": 0.946,
      "step": 135110
    },
    {
      "epoch": 0.4735619131733064,
      "grad_norm": 3.078125,
      "learning_rate": 4.678601005734817e-05,
      "loss": 0.9747,
      "step": 135120
    },
    {
      "epoch": 0.473596960680202,
      "grad_norm": 3.09375,
      "learning_rate": 4.6785361028684474e-05,
      "loss": 0.9719,
      "step": 135130
    },
    {
      "epoch": 0.4736320081870976,
      "grad_norm": 3.03125,
      "learning_rate": 4.678471200002077e-05,
      "loss": 0.9278,
      "step": 135140
    },
    {
      "epoch": 0.4736670556939932,
      "grad_norm": 3.171875,
      "learning_rate": 4.678406297135707e-05,
      "loss": 0.8853,
      "step": 135150
    },
    {
      "epoch": 0.4737021032008888,
      "grad_norm": 2.828125,
      "learning_rate": 4.6783413942693364e-05,
      "loss": 0.9497,
      "step": 135160
    },
    {
      "epoch": 0.4737371507077844,
      "grad_norm": 2.78125,
      "learning_rate": 4.6782764914029666e-05,
      "loss": 0.8956,
      "step": 135170
    },
    {
      "epoch": 0.47377219821468,
      "grad_norm": 3.265625,
      "learning_rate": 4.678211588536596e-05,
      "loss": 0.9746,
      "step": 135180
    },
    {
      "epoch": 0.4738072457215756,
      "grad_norm": 3.375,
      "learning_rate": 4.678146685670226e-05,
      "loss": 0.9625,
      "step": 135190
    },
    {
      "epoch": 0.4738422932284712,
      "grad_norm": 3.125,
      "learning_rate": 4.6780817828038556e-05,
      "loss": 1.0177,
      "step": 135200
    },
    {
      "epoch": 0.4738773407353668,
      "grad_norm": 2.765625,
      "learning_rate": 4.678016879937486e-05,
      "loss": 1.0405,
      "step": 135210
    },
    {
      "epoch": 0.4739123882422624,
      "grad_norm": 2.78125,
      "learning_rate": 4.677951977071115e-05,
      "loss": 0.8548,
      "step": 135220
    },
    {
      "epoch": 0.473947435749158,
      "grad_norm": 2.953125,
      "learning_rate": 4.6778870742047454e-05,
      "loss": 0.8927,
      "step": 135230
    },
    {
      "epoch": 0.4739824832560536,
      "grad_norm": 3.078125,
      "learning_rate": 4.6778221713383755e-05,
      "loss": 0.92,
      "step": 135240
    },
    {
      "epoch": 0.47401753076294917,
      "grad_norm": 3.265625,
      "learning_rate": 4.677757268472005e-05,
      "loss": 0.9978,
      "step": 135250
    },
    {
      "epoch": 0.47405257826984476,
      "grad_norm": 3.390625,
      "learning_rate": 4.677692365605635e-05,
      "loss": 0.9314,
      "step": 135260
    },
    {
      "epoch": 0.47408762577674035,
      "grad_norm": 3.203125,
      "learning_rate": 4.6776274627392646e-05,
      "loss": 0.9994,
      "step": 135270
    },
    {
      "epoch": 0.47412267328363594,
      "grad_norm": 2.9375,
      "learning_rate": 4.677562559872895e-05,
      "loss": 0.9418,
      "step": 135280
    },
    {
      "epoch": 0.4741577207905316,
      "grad_norm": 2.8125,
      "learning_rate": 4.677497657006524e-05,
      "loss": 0.8839,
      "step": 135290
    },
    {
      "epoch": 0.4741927682974272,
      "grad_norm": 2.859375,
      "learning_rate": 4.677432754140154e-05,
      "loss": 0.8753,
      "step": 135300
    },
    {
      "epoch": 0.47422781580432277,
      "grad_norm": 3.125,
      "learning_rate": 4.677367851273784e-05,
      "loss": 0.9014,
      "step": 135310
    },
    {
      "epoch": 0.47426286331121836,
      "grad_norm": 2.453125,
      "learning_rate": 4.677302948407414e-05,
      "loss": 0.8968,
      "step": 135320
    },
    {
      "epoch": 0.47429791081811395,
      "grad_norm": 3.09375,
      "learning_rate": 4.6772380455410434e-05,
      "loss": 0.8737,
      "step": 135330
    },
    {
      "epoch": 0.47433295832500955,
      "grad_norm": 3.046875,
      "learning_rate": 4.6771731426746735e-05,
      "loss": 0.9293,
      "step": 135340
    },
    {
      "epoch": 0.47436800583190514,
      "grad_norm": 2.796875,
      "learning_rate": 4.677108239808303e-05,
      "loss": 0.9467,
      "step": 135350
    },
    {
      "epoch": 0.47440305333880073,
      "grad_norm": 2.671875,
      "learning_rate": 4.677043336941933e-05,
      "loss": 1.0013,
      "step": 135360
    },
    {
      "epoch": 0.4744381008456963,
      "grad_norm": 3.46875,
      "learning_rate": 4.676978434075563e-05,
      "loss": 0.9038,
      "step": 135370
    },
    {
      "epoch": 0.4744731483525919,
      "grad_norm": 2.96875,
      "learning_rate": 4.676913531209193e-05,
      "loss": 0.997,
      "step": 135380
    },
    {
      "epoch": 0.47450819585948756,
      "grad_norm": 2.984375,
      "learning_rate": 4.676848628342823e-05,
      "loss": 0.8905,
      "step": 135390
    },
    {
      "epoch": 0.47454324336638315,
      "grad_norm": 2.8125,
      "learning_rate": 4.676783725476452e-05,
      "loss": 0.8632,
      "step": 135400
    },
    {
      "epoch": 0.47457829087327874,
      "grad_norm": 3.125,
      "learning_rate": 4.6767188226100824e-05,
      "loss": 0.9225,
      "step": 135410
    },
    {
      "epoch": 0.47461333838017433,
      "grad_norm": 3.015625,
      "learning_rate": 4.676653919743712e-05,
      "loss": 1.0084,
      "step": 135420
    },
    {
      "epoch": 0.4746483858870699,
      "grad_norm": 3.109375,
      "learning_rate": 4.676589016877342e-05,
      "loss": 0.9839,
      "step": 135430
    },
    {
      "epoch": 0.4746834333939655,
      "grad_norm": 2.921875,
      "learning_rate": 4.6765241140109715e-05,
      "loss": 0.9846,
      "step": 135440
    },
    {
      "epoch": 0.4747184809008611,
      "grad_norm": 3.140625,
      "learning_rate": 4.676459211144601e-05,
      "loss": 0.8998,
      "step": 135450
    },
    {
      "epoch": 0.4747535284077567,
      "grad_norm": 3.234375,
      "learning_rate": 4.676394308278231e-05,
      "loss": 0.9199,
      "step": 135460
    },
    {
      "epoch": 0.4747885759146523,
      "grad_norm": 2.9375,
      "learning_rate": 4.6763294054118606e-05,
      "loss": 0.9127,
      "step": 135470
    },
    {
      "epoch": 0.47482362342154794,
      "grad_norm": 2.90625,
      "learning_rate": 4.676264502545491e-05,
      "loss": 0.8834,
      "step": 135480
    },
    {
      "epoch": 0.4748586709284435,
      "grad_norm": 3.28125,
      "learning_rate": 4.67619959967912e-05,
      "loss": 0.9714,
      "step": 135490
    },
    {
      "epoch": 0.4748937184353391,
      "grad_norm": 3.03125,
      "learning_rate": 4.67613469681275e-05,
      "loss": 0.9253,
      "step": 135500
    },
    {
      "epoch": 0.4749287659422347,
      "grad_norm": 3.015625,
      "learning_rate": 4.67606979394638e-05,
      "loss": 0.9501,
      "step": 135510
    },
    {
      "epoch": 0.4749638134491303,
      "grad_norm": 3.0,
      "learning_rate": 4.67600489108001e-05,
      "loss": 0.958,
      "step": 135520
    },
    {
      "epoch": 0.4749988609560259,
      "grad_norm": 3.140625,
      "learning_rate": 4.6759399882136394e-05,
      "loss": 0.9475,
      "step": 135530
    },
    {
      "epoch": 0.4750339084629215,
      "grad_norm": 3.0,
      "learning_rate": 4.6758750853472695e-05,
      "loss": 0.9727,
      "step": 135540
    },
    {
      "epoch": 0.4750689559698171,
      "grad_norm": 3.203125,
      "learning_rate": 4.675810182480899e-05,
      "loss": 1.0821,
      "step": 135550
    },
    {
      "epoch": 0.47510400347671267,
      "grad_norm": 2.734375,
      "learning_rate": 4.675745279614529e-05,
      "loss": 0.8953,
      "step": 135560
    },
    {
      "epoch": 0.47513905098360826,
      "grad_norm": 2.921875,
      "learning_rate": 4.6756803767481586e-05,
      "loss": 0.9777,
      "step": 135570
    },
    {
      "epoch": 0.4751740984905039,
      "grad_norm": 2.734375,
      "learning_rate": 4.675615473881789e-05,
      "loss": 0.9224,
      "step": 135580
    },
    {
      "epoch": 0.4752091459973995,
      "grad_norm": 2.65625,
      "learning_rate": 4.675550571015418e-05,
      "loss": 0.8694,
      "step": 135590
    },
    {
      "epoch": 0.4752441935042951,
      "grad_norm": 3.0,
      "learning_rate": 4.675485668149048e-05,
      "loss": 0.9784,
      "step": 135600
    },
    {
      "epoch": 0.4752792410111907,
      "grad_norm": 2.984375,
      "learning_rate": 4.6754207652826784e-05,
      "loss": 0.8792,
      "step": 135610
    },
    {
      "epoch": 0.47531428851808627,
      "grad_norm": 2.984375,
      "learning_rate": 4.675355862416308e-05,
      "loss": 0.8986,
      "step": 135620
    },
    {
      "epoch": 0.47534933602498186,
      "grad_norm": 2.71875,
      "learning_rate": 4.675290959549938e-05,
      "loss": 0.9437,
      "step": 135630
    },
    {
      "epoch": 0.47538438353187745,
      "grad_norm": 3.171875,
      "learning_rate": 4.6752260566835675e-05,
      "loss": 0.9752,
      "step": 135640
    },
    {
      "epoch": 0.47541943103877304,
      "grad_norm": 3.296875,
      "learning_rate": 4.6751611538171976e-05,
      "loss": 0.9394,
      "step": 135650
    },
    {
      "epoch": 0.47545447854566864,
      "grad_norm": 3.0,
      "learning_rate": 4.675096250950827e-05,
      "loss": 0.9064,
      "step": 135660
    },
    {
      "epoch": 0.4754895260525642,
      "grad_norm": 3.171875,
      "learning_rate": 4.675031348084457e-05,
      "loss": 0.9965,
      "step": 135670
    },
    {
      "epoch": 0.4755245735594599,
      "grad_norm": 3.140625,
      "learning_rate": 4.674966445218087e-05,
      "loss": 0.9664,
      "step": 135680
    },
    {
      "epoch": 0.47555962106635546,
      "grad_norm": 2.65625,
      "learning_rate": 4.674901542351717e-05,
      "loss": 0.9374,
      "step": 135690
    },
    {
      "epoch": 0.47559466857325106,
      "grad_norm": 2.953125,
      "learning_rate": 4.674836639485346e-05,
      "loss": 0.9078,
      "step": 135700
    },
    {
      "epoch": 0.47562971608014665,
      "grad_norm": 2.65625,
      "learning_rate": 4.6747717366189764e-05,
      "loss": 0.8502,
      "step": 135710
    },
    {
      "epoch": 0.47566476358704224,
      "grad_norm": 2.96875,
      "learning_rate": 4.674706833752606e-05,
      "loss": 0.9261,
      "step": 135720
    },
    {
      "epoch": 0.47569981109393783,
      "grad_norm": 2.765625,
      "learning_rate": 4.674641930886236e-05,
      "loss": 0.9787,
      "step": 135730
    },
    {
      "epoch": 0.4757348586008334,
      "grad_norm": 2.828125,
      "learning_rate": 4.674577028019866e-05,
      "loss": 0.9683,
      "step": 135740
    },
    {
      "epoch": 0.475769906107729,
      "grad_norm": 3.0625,
      "learning_rate": 4.6745121251534956e-05,
      "loss": 0.9423,
      "step": 135750
    },
    {
      "epoch": 0.4758049536146246,
      "grad_norm": 3.078125,
      "learning_rate": 4.674447222287126e-05,
      "loss": 0.8782,
      "step": 135760
    },
    {
      "epoch": 0.4758400011215202,
      "grad_norm": 2.796875,
      "learning_rate": 4.674382319420755e-05,
      "loss": 0.9733,
      "step": 135770
    },
    {
      "epoch": 0.47587504862841584,
      "grad_norm": 3.09375,
      "learning_rate": 4.6743174165543854e-05,
      "loss": 0.9765,
      "step": 135780
    },
    {
      "epoch": 0.47591009613531143,
      "grad_norm": 2.9375,
      "learning_rate": 4.674252513688015e-05,
      "loss": 0.8987,
      "step": 135790
    },
    {
      "epoch": 0.475945143642207,
      "grad_norm": 3.171875,
      "learning_rate": 4.674187610821645e-05,
      "loss": 0.9251,
      "step": 135800
    },
    {
      "epoch": 0.4759801911491026,
      "grad_norm": 2.890625,
      "learning_rate": 4.6741227079552744e-05,
      "loss": 0.8655,
      "step": 135810
    },
    {
      "epoch": 0.4760152386559982,
      "grad_norm": 3.28125,
      "learning_rate": 4.674057805088904e-05,
      "loss": 0.9062,
      "step": 135820
    },
    {
      "epoch": 0.4760502861628938,
      "grad_norm": 2.71875,
      "learning_rate": 4.673992902222534e-05,
      "loss": 0.8722,
      "step": 135830
    },
    {
      "epoch": 0.4760853336697894,
      "grad_norm": 3.046875,
      "learning_rate": 4.6739279993561635e-05,
      "loss": 0.9101,
      "step": 135840
    },
    {
      "epoch": 0.476120381176685,
      "grad_norm": 2.71875,
      "learning_rate": 4.6738630964897936e-05,
      "loss": 0.9894,
      "step": 135850
    },
    {
      "epoch": 0.4761554286835806,
      "grad_norm": 2.90625,
      "learning_rate": 4.673798193623423e-05,
      "loss": 0.8615,
      "step": 135860
    },
    {
      "epoch": 0.47619047619047616,
      "grad_norm": 3.046875,
      "learning_rate": 4.673733290757053e-05,
      "loss": 0.9117,
      "step": 135870
    },
    {
      "epoch": 0.4762255236973718,
      "grad_norm": 3.1875,
      "learning_rate": 4.673668387890683e-05,
      "loss": 0.9254,
      "step": 135880
    },
    {
      "epoch": 0.4762605712042674,
      "grad_norm": 2.875,
      "learning_rate": 4.673603485024313e-05,
      "loss": 0.9366,
      "step": 135890
    },
    {
      "epoch": 0.476295618711163,
      "grad_norm": 3.25,
      "learning_rate": 4.673538582157942e-05,
      "loss": 0.8954,
      "step": 135900
    },
    {
      "epoch": 0.4763306662180586,
      "grad_norm": 3.40625,
      "learning_rate": 4.6734736792915724e-05,
      "loss": 0.9976,
      "step": 135910
    },
    {
      "epoch": 0.4763657137249542,
      "grad_norm": 3.125,
      "learning_rate": 4.673408776425202e-05,
      "loss": 0.9202,
      "step": 135920
    },
    {
      "epoch": 0.47640076123184977,
      "grad_norm": 3.5,
      "learning_rate": 4.673343873558832e-05,
      "loss": 0.9389,
      "step": 135930
    },
    {
      "epoch": 0.47643580873874536,
      "grad_norm": 2.875,
      "learning_rate": 4.6732789706924615e-05,
      "loss": 0.8593,
      "step": 135940
    },
    {
      "epoch": 0.47647085624564095,
      "grad_norm": 3.125,
      "learning_rate": 4.6732140678260916e-05,
      "loss": 0.9814,
      "step": 135950
    },
    {
      "epoch": 0.47650590375253654,
      "grad_norm": 2.671875,
      "learning_rate": 4.673149164959722e-05,
      "loss": 0.9375,
      "step": 135960
    },
    {
      "epoch": 0.47654095125943213,
      "grad_norm": 3.28125,
      "learning_rate": 4.673084262093351e-05,
      "loss": 0.9635,
      "step": 135970
    },
    {
      "epoch": 0.4765759987663278,
      "grad_norm": 2.875,
      "learning_rate": 4.6730193592269814e-05,
      "loss": 0.9209,
      "step": 135980
    },
    {
      "epoch": 0.47661104627322337,
      "grad_norm": 3.28125,
      "learning_rate": 4.672954456360611e-05,
      "loss": 0.9396,
      "step": 135990
    },
    {
      "epoch": 0.47664609378011896,
      "grad_norm": 3.046875,
      "learning_rate": 4.672889553494241e-05,
      "loss": 0.8913,
      "step": 136000
    },
    {
      "epoch": 0.47668114128701455,
      "grad_norm": 3.03125,
      "learning_rate": 4.6728246506278704e-05,
      "loss": 0.9941,
      "step": 136010
    },
    {
      "epoch": 0.47671618879391015,
      "grad_norm": 3.09375,
      "learning_rate": 4.6727597477615006e-05,
      "loss": 0.9589,
      "step": 136020
    },
    {
      "epoch": 0.47675123630080574,
      "grad_norm": 2.765625,
      "learning_rate": 4.67269484489513e-05,
      "loss": 0.9455,
      "step": 136030
    },
    {
      "epoch": 0.4767862838077013,
      "grad_norm": 3.125,
      "learning_rate": 4.67262994202876e-05,
      "loss": 0.9239,
      "step": 136040
    },
    {
      "epoch": 0.4768213313145969,
      "grad_norm": 3.4375,
      "learning_rate": 4.6725650391623896e-05,
      "loss": 0.9779,
      "step": 136050
    },
    {
      "epoch": 0.4768563788214925,
      "grad_norm": 3.03125,
      "learning_rate": 4.67250013629602e-05,
      "loss": 1.0062,
      "step": 136060
    },
    {
      "epoch": 0.47689142632838816,
      "grad_norm": 2.9375,
      "learning_rate": 4.672435233429649e-05,
      "loss": 0.9876,
      "step": 136070
    },
    {
      "epoch": 0.47692647383528375,
      "grad_norm": 3.578125,
      "learning_rate": 4.6723703305632794e-05,
      "loss": 0.9717,
      "step": 136080
    },
    {
      "epoch": 0.47696152134217934,
      "grad_norm": 2.9375,
      "learning_rate": 4.672305427696909e-05,
      "loss": 0.9359,
      "step": 136090
    },
    {
      "epoch": 0.47699656884907493,
      "grad_norm": 3.125,
      "learning_rate": 4.672240524830539e-05,
      "loss": 0.8618,
      "step": 136100
    },
    {
      "epoch": 0.4770316163559705,
      "grad_norm": 3.03125,
      "learning_rate": 4.672175621964169e-05,
      "loss": 0.9198,
      "step": 136110
    },
    {
      "epoch": 0.4770666638628661,
      "grad_norm": 3.015625,
      "learning_rate": 4.6721107190977986e-05,
      "loss": 0.9805,
      "step": 136120
    },
    {
      "epoch": 0.4771017113697617,
      "grad_norm": 3.34375,
      "learning_rate": 4.672045816231429e-05,
      "loss": 0.9341,
      "step": 136130
    },
    {
      "epoch": 0.4771367588766573,
      "grad_norm": 3.25,
      "learning_rate": 4.671980913365058e-05,
      "loss": 0.8708,
      "step": 136140
    },
    {
      "epoch": 0.4771718063835529,
      "grad_norm": 3.34375,
      "learning_rate": 4.671916010498688e-05,
      "loss": 0.893,
      "step": 136150
    },
    {
      "epoch": 0.4772068538904485,
      "grad_norm": 2.703125,
      "learning_rate": 4.671851107632318e-05,
      "loss": 1.0174,
      "step": 136160
    },
    {
      "epoch": 0.4772419013973441,
      "grad_norm": 2.75,
      "learning_rate": 4.671786204765948e-05,
      "loss": 0.9116,
      "step": 136170
    },
    {
      "epoch": 0.4772769489042397,
      "grad_norm": 2.890625,
      "learning_rate": 4.6717213018995774e-05,
      "loss": 0.9857,
      "step": 136180
    },
    {
      "epoch": 0.4773119964111353,
      "grad_norm": 3.375,
      "learning_rate": 4.6716563990332075e-05,
      "loss": 0.9925,
      "step": 136190
    },
    {
      "epoch": 0.4773470439180309,
      "grad_norm": 3.0,
      "learning_rate": 4.671591496166837e-05,
      "loss": 0.9435,
      "step": 136200
    },
    {
      "epoch": 0.4773820914249265,
      "grad_norm": 3.046875,
      "learning_rate": 4.6715265933004664e-05,
      "loss": 0.9625,
      "step": 136210
    },
    {
      "epoch": 0.4774171389318221,
      "grad_norm": 2.96875,
      "learning_rate": 4.6714616904340966e-05,
      "loss": 0.9172,
      "step": 136220
    },
    {
      "epoch": 0.4774521864387177,
      "grad_norm": 3.203125,
      "learning_rate": 4.671396787567726e-05,
      "loss": 0.9375,
      "step": 136230
    },
    {
      "epoch": 0.47748723394561327,
      "grad_norm": 3.859375,
      "learning_rate": 4.671331884701356e-05,
      "loss": 1.0484,
      "step": 136240
    },
    {
      "epoch": 0.47752228145250886,
      "grad_norm": 2.703125,
      "learning_rate": 4.6712669818349856e-05,
      "loss": 0.9682,
      "step": 136250
    },
    {
      "epoch": 0.47755732895940445,
      "grad_norm": 2.421875,
      "learning_rate": 4.671202078968616e-05,
      "loss": 0.8307,
      "step": 136260
    },
    {
      "epoch": 0.4775923764663001,
      "grad_norm": 2.96875,
      "learning_rate": 4.671137176102245e-05,
      "loss": 0.907,
      "step": 136270
    },
    {
      "epoch": 0.4776274239731957,
      "grad_norm": 3.28125,
      "learning_rate": 4.6710722732358754e-05,
      "loss": 0.8911,
      "step": 136280
    },
    {
      "epoch": 0.4776624714800913,
      "grad_norm": 2.984375,
      "learning_rate": 4.671007370369505e-05,
      "loss": 0.9653,
      "step": 136290
    },
    {
      "epoch": 0.47769751898698687,
      "grad_norm": 2.71875,
      "learning_rate": 4.670942467503135e-05,
      "loss": 0.884,
      "step": 136300
    },
    {
      "epoch": 0.47773256649388246,
      "grad_norm": 3.078125,
      "learning_rate": 4.6708775646367644e-05,
      "loss": 0.9229,
      "step": 136310
    },
    {
      "epoch": 0.47776761400077805,
      "grad_norm": 3.34375,
      "learning_rate": 4.6708126617703946e-05,
      "loss": 0.8771,
      "step": 136320
    },
    {
      "epoch": 0.47780266150767364,
      "grad_norm": 3.484375,
      "learning_rate": 4.670747758904025e-05,
      "loss": 0.935,
      "step": 136330
    },
    {
      "epoch": 0.47783770901456923,
      "grad_norm": 3.09375,
      "learning_rate": 4.670682856037654e-05,
      "loss": 0.8683,
      "step": 136340
    },
    {
      "epoch": 0.4778727565214648,
      "grad_norm": 2.875,
      "learning_rate": 4.670617953171284e-05,
      "loss": 0.9629,
      "step": 136350
    },
    {
      "epoch": 0.4779078040283604,
      "grad_norm": 3.296875,
      "learning_rate": 4.670553050304914e-05,
      "loss": 0.925,
      "step": 136360
    },
    {
      "epoch": 0.47794285153525606,
      "grad_norm": 2.859375,
      "learning_rate": 4.670488147438544e-05,
      "loss": 0.9432,
      "step": 136370
    },
    {
      "epoch": 0.47797789904215165,
      "grad_norm": 3.171875,
      "learning_rate": 4.6704232445721734e-05,
      "loss": 0.9347,
      "step": 136380
    },
    {
      "epoch": 0.47801294654904725,
      "grad_norm": 2.671875,
      "learning_rate": 4.6703583417058035e-05,
      "loss": 0.8348,
      "step": 136390
    },
    {
      "epoch": 0.47804799405594284,
      "grad_norm": 3.234375,
      "learning_rate": 4.670293438839433e-05,
      "loss": 0.9298,
      "step": 136400
    },
    {
      "epoch": 0.47808304156283843,
      "grad_norm": 3.171875,
      "learning_rate": 4.670228535973063e-05,
      "loss": 0.9839,
      "step": 136410
    },
    {
      "epoch": 0.478118089069734,
      "grad_norm": 3.046875,
      "learning_rate": 4.6701636331066926e-05,
      "loss": 0.9277,
      "step": 136420
    },
    {
      "epoch": 0.4781531365766296,
      "grad_norm": 3.109375,
      "learning_rate": 4.670098730240323e-05,
      "loss": 1.05,
      "step": 136430
    },
    {
      "epoch": 0.4781881840835252,
      "grad_norm": 2.890625,
      "learning_rate": 4.670033827373952e-05,
      "loss": 0.8831,
      "step": 136440
    },
    {
      "epoch": 0.4782232315904208,
      "grad_norm": 3.234375,
      "learning_rate": 4.669968924507582e-05,
      "loss": 0.9646,
      "step": 136450
    },
    {
      "epoch": 0.4782582790973164,
      "grad_norm": 3.25,
      "learning_rate": 4.669904021641212e-05,
      "loss": 0.913,
      "step": 136460
    },
    {
      "epoch": 0.47829332660421203,
      "grad_norm": 3.140625,
      "learning_rate": 4.669839118774842e-05,
      "loss": 0.9636,
      "step": 136470
    },
    {
      "epoch": 0.4783283741111076,
      "grad_norm": 2.90625,
      "learning_rate": 4.669774215908472e-05,
      "loss": 0.9019,
      "step": 136480
    },
    {
      "epoch": 0.4783634216180032,
      "grad_norm": 3.03125,
      "learning_rate": 4.6697093130421015e-05,
      "loss": 0.9406,
      "step": 136490
    },
    {
      "epoch": 0.4783984691248988,
      "grad_norm": 3.390625,
      "learning_rate": 4.6696444101757317e-05,
      "loss": 0.9235,
      "step": 136500
    },
    {
      "epoch": 0.4784335166317944,
      "grad_norm": 4.25,
      "learning_rate": 4.669579507309361e-05,
      "loss": 0.9757,
      "step": 136510
    },
    {
      "epoch": 0.47846856413869,
      "grad_norm": 3.484375,
      "learning_rate": 4.669514604442991e-05,
      "loss": 1.0074,
      "step": 136520
    },
    {
      "epoch": 0.4785036116455856,
      "grad_norm": 3.71875,
      "learning_rate": 4.669449701576621e-05,
      "loss": 1.0061,
      "step": 136530
    },
    {
      "epoch": 0.47853865915248117,
      "grad_norm": 2.859375,
      "learning_rate": 4.669384798710251e-05,
      "loss": 0.8902,
      "step": 136540
    },
    {
      "epoch": 0.47857370665937676,
      "grad_norm": 3.140625,
      "learning_rate": 4.66931989584388e-05,
      "loss": 0.9223,
      "step": 136550
    },
    {
      "epoch": 0.4786087541662724,
      "grad_norm": 3.296875,
      "learning_rate": 4.6692549929775105e-05,
      "loss": 0.9302,
      "step": 136560
    },
    {
      "epoch": 0.478643801673168,
      "grad_norm": 2.609375,
      "learning_rate": 4.66919009011114e-05,
      "loss": 0.8965,
      "step": 136570
    },
    {
      "epoch": 0.4786788491800636,
      "grad_norm": 2.8125,
      "learning_rate": 4.6691251872447694e-05,
      "loss": 0.8512,
      "step": 136580
    },
    {
      "epoch": 0.4787138966869592,
      "grad_norm": 2.671875,
      "learning_rate": 4.6690602843783995e-05,
      "loss": 1.0012,
      "step": 136590
    },
    {
      "epoch": 0.4787489441938548,
      "grad_norm": 2.59375,
      "learning_rate": 4.668995381512029e-05,
      "loss": 0.909,
      "step": 136600
    },
    {
      "epoch": 0.47878399170075037,
      "grad_norm": 2.890625,
      "learning_rate": 4.668930478645659e-05,
      "loss": 0.9609,
      "step": 136610
    },
    {
      "epoch": 0.47881903920764596,
      "grad_norm": 3.234375,
      "learning_rate": 4.6688655757792886e-05,
      "loss": 0.9562,
      "step": 136620
    },
    {
      "epoch": 0.47885408671454155,
      "grad_norm": 2.6875,
      "learning_rate": 4.668800672912919e-05,
      "loss": 0.8943,
      "step": 136630
    },
    {
      "epoch": 0.47888913422143714,
      "grad_norm": 3.140625,
      "learning_rate": 4.668735770046548e-05,
      "loss": 0.9097,
      "step": 136640
    },
    {
      "epoch": 0.47892418172833273,
      "grad_norm": 3.109375,
      "learning_rate": 4.668670867180178e-05,
      "loss": 0.9058,
      "step": 136650
    },
    {
      "epoch": 0.4789592292352284,
      "grad_norm": 3.0625,
      "learning_rate": 4.668605964313808e-05,
      "loss": 0.9748,
      "step": 136660
    },
    {
      "epoch": 0.47899427674212397,
      "grad_norm": 2.921875,
      "learning_rate": 4.668541061447438e-05,
      "loss": 0.9521,
      "step": 136670
    },
    {
      "epoch": 0.47902932424901956,
      "grad_norm": 3.15625,
      "learning_rate": 4.6684761585810674e-05,
      "loss": 0.864,
      "step": 136680
    },
    {
      "epoch": 0.47906437175591515,
      "grad_norm": 2.921875,
      "learning_rate": 4.6684112557146975e-05,
      "loss": 0.9163,
      "step": 136690
    },
    {
      "epoch": 0.47909941926281074,
      "grad_norm": 3.375,
      "learning_rate": 4.6683463528483277e-05,
      "loss": 0.9452,
      "step": 136700
    },
    {
      "epoch": 0.47913446676970634,
      "grad_norm": 3.171875,
      "learning_rate": 4.668281449981957e-05,
      "loss": 0.9048,
      "step": 136710
    },
    {
      "epoch": 0.4791695142766019,
      "grad_norm": 2.84375,
      "learning_rate": 4.668216547115587e-05,
      "loss": 1.0029,
      "step": 136720
    },
    {
      "epoch": 0.4792045617834975,
      "grad_norm": 2.78125,
      "learning_rate": 4.668151644249217e-05,
      "loss": 0.8743,
      "step": 136730
    },
    {
      "epoch": 0.4792396092903931,
      "grad_norm": 2.84375,
      "learning_rate": 4.668086741382847e-05,
      "loss": 0.9153,
      "step": 136740
    },
    {
      "epoch": 0.4792746567972887,
      "grad_norm": 2.734375,
      "learning_rate": 4.668021838516476e-05,
      "loss": 0.9239,
      "step": 136750
    },
    {
      "epoch": 0.47930970430418435,
      "grad_norm": 2.484375,
      "learning_rate": 4.6679569356501065e-05,
      "loss": 0.9037,
      "step": 136760
    },
    {
      "epoch": 0.47934475181107994,
      "grad_norm": 2.5625,
      "learning_rate": 4.667892032783736e-05,
      "loss": 0.9055,
      "step": 136770
    },
    {
      "epoch": 0.47937979931797553,
      "grad_norm": 2.875,
      "learning_rate": 4.667827129917366e-05,
      "loss": 0.889,
      "step": 136780
    },
    {
      "epoch": 0.4794148468248711,
      "grad_norm": 2.640625,
      "learning_rate": 4.6677622270509955e-05,
      "loss": 0.8839,
      "step": 136790
    },
    {
      "epoch": 0.4794498943317667,
      "grad_norm": 3.109375,
      "learning_rate": 4.6676973241846257e-05,
      "loss": 0.9497,
      "step": 136800
    },
    {
      "epoch": 0.4794849418386623,
      "grad_norm": 3.328125,
      "learning_rate": 4.667632421318255e-05,
      "loss": 0.9544,
      "step": 136810
    },
    {
      "epoch": 0.4795199893455579,
      "grad_norm": 3.0,
      "learning_rate": 4.667567518451885e-05,
      "loss": 0.9577,
      "step": 136820
    },
    {
      "epoch": 0.4795550368524535,
      "grad_norm": 2.9375,
      "learning_rate": 4.667502615585515e-05,
      "loss": 0.9009,
      "step": 136830
    },
    {
      "epoch": 0.4795900843593491,
      "grad_norm": 2.40625,
      "learning_rate": 4.667437712719145e-05,
      "loss": 0.8378,
      "step": 136840
    },
    {
      "epoch": 0.47962513186624467,
      "grad_norm": 3.15625,
      "learning_rate": 4.667372809852775e-05,
      "loss": 0.9722,
      "step": 136850
    },
    {
      "epoch": 0.4796601793731403,
      "grad_norm": 3.0,
      "learning_rate": 4.6673079069864045e-05,
      "loss": 0.9052,
      "step": 136860
    },
    {
      "epoch": 0.4796952268800359,
      "grad_norm": 3.234375,
      "learning_rate": 4.6672430041200346e-05,
      "loss": 0.9069,
      "step": 136870
    },
    {
      "epoch": 0.4797302743869315,
      "grad_norm": 3.15625,
      "learning_rate": 4.667178101253664e-05,
      "loss": 0.873,
      "step": 136880
    },
    {
      "epoch": 0.4797653218938271,
      "grad_norm": 2.796875,
      "learning_rate": 4.667113198387294e-05,
      "loss": 0.866,
      "step": 136890
    },
    {
      "epoch": 0.4798003694007227,
      "grad_norm": 3.359375,
      "learning_rate": 4.6670482955209237e-05,
      "loss": 0.9386,
      "step": 136900
    },
    {
      "epoch": 0.4798354169076183,
      "grad_norm": 3.171875,
      "learning_rate": 4.666983392654554e-05,
      "loss": 0.9099,
      "step": 136910
    },
    {
      "epoch": 0.47987046441451386,
      "grad_norm": 2.921875,
      "learning_rate": 4.666918489788183e-05,
      "loss": 0.9626,
      "step": 136920
    },
    {
      "epoch": 0.47990551192140946,
      "grad_norm": 3.15625,
      "learning_rate": 4.6668535869218134e-05,
      "loss": 0.9758,
      "step": 136930
    },
    {
      "epoch": 0.47994055942830505,
      "grad_norm": 3.078125,
      "learning_rate": 4.666788684055443e-05,
      "loss": 0.9477,
      "step": 136940
    },
    {
      "epoch": 0.47997560693520064,
      "grad_norm": 2.890625,
      "learning_rate": 4.666723781189072e-05,
      "loss": 0.9031,
      "step": 136950
    },
    {
      "epoch": 0.4800106544420963,
      "grad_norm": 2.6875,
      "learning_rate": 4.6666588783227025e-05,
      "loss": 0.9416,
      "step": 136960
    },
    {
      "epoch": 0.4800457019489919,
      "grad_norm": 2.9375,
      "learning_rate": 4.666593975456332e-05,
      "loss": 0.8771,
      "step": 136970
    },
    {
      "epoch": 0.48008074945588747,
      "grad_norm": 3.59375,
      "learning_rate": 4.666529072589962e-05,
      "loss": 0.9403,
      "step": 136980
    },
    {
      "epoch": 0.48011579696278306,
      "grad_norm": 2.8125,
      "learning_rate": 4.6664641697235915e-05,
      "loss": 0.988,
      "step": 136990
    },
    {
      "epoch": 0.48015084446967865,
      "grad_norm": 3.0,
      "learning_rate": 4.6663992668572217e-05,
      "loss": 0.9257,
      "step": 137000
    },
    {
      "epoch": 0.48018589197657424,
      "grad_norm": 3.3125,
      "learning_rate": 4.666334363990851e-05,
      "loss": 0.965,
      "step": 137010
    },
    {
      "epoch": 0.48022093948346983,
      "grad_norm": 2.828125,
      "learning_rate": 4.666269461124481e-05,
      "loss": 0.9293,
      "step": 137020
    },
    {
      "epoch": 0.4802559869903654,
      "grad_norm": 3.171875,
      "learning_rate": 4.666204558258111e-05,
      "loss": 0.918,
      "step": 137030
    },
    {
      "epoch": 0.480291034497261,
      "grad_norm": 3.125,
      "learning_rate": 4.666139655391741e-05,
      "loss": 0.8994,
      "step": 137040
    },
    {
      "epoch": 0.4803260820041566,
      "grad_norm": 2.921875,
      "learning_rate": 4.66607475252537e-05,
      "loss": 0.9842,
      "step": 137050
    },
    {
      "epoch": 0.48036112951105225,
      "grad_norm": 3.0625,
      "learning_rate": 4.6660098496590005e-05,
      "loss": 1.0106,
      "step": 137060
    },
    {
      "epoch": 0.48039617701794785,
      "grad_norm": 2.890625,
      "learning_rate": 4.6659449467926306e-05,
      "loss": 0.9167,
      "step": 137070
    },
    {
      "epoch": 0.48043122452484344,
      "grad_norm": 2.546875,
      "learning_rate": 4.66588004392626e-05,
      "loss": 0.9378,
      "step": 137080
    },
    {
      "epoch": 0.48046627203173903,
      "grad_norm": 3.15625,
      "learning_rate": 4.66581514105989e-05,
      "loss": 1.0257,
      "step": 137090
    },
    {
      "epoch": 0.4805013195386346,
      "grad_norm": 2.609375,
      "learning_rate": 4.6657502381935197e-05,
      "loss": 0.9569,
      "step": 137100
    },
    {
      "epoch": 0.4805363670455302,
      "grad_norm": 2.6875,
      "learning_rate": 4.66568533532715e-05,
      "loss": 0.9188,
      "step": 137110
    },
    {
      "epoch": 0.4805714145524258,
      "grad_norm": 2.859375,
      "learning_rate": 4.665620432460779e-05,
      "loss": 0.9007,
      "step": 137120
    },
    {
      "epoch": 0.4806064620593214,
      "grad_norm": 2.84375,
      "learning_rate": 4.6655555295944094e-05,
      "loss": 0.9621,
      "step": 137130
    },
    {
      "epoch": 0.480641509566217,
      "grad_norm": 2.96875,
      "learning_rate": 4.665490626728039e-05,
      "loss": 0.9107,
      "step": 137140
    },
    {
      "epoch": 0.48067655707311263,
      "grad_norm": 2.859375,
      "learning_rate": 4.665425723861669e-05,
      "loss": 0.8989,
      "step": 137150
    },
    {
      "epoch": 0.4807116045800082,
      "grad_norm": 3.125,
      "learning_rate": 4.6653608209952985e-05,
      "loss": 0.9367,
      "step": 137160
    },
    {
      "epoch": 0.4807466520869038,
      "grad_norm": 2.828125,
      "learning_rate": 4.6652959181289286e-05,
      "loss": 0.8569,
      "step": 137170
    },
    {
      "epoch": 0.4807816995937994,
      "grad_norm": 3.09375,
      "learning_rate": 4.665231015262558e-05,
      "loss": 1.0315,
      "step": 137180
    },
    {
      "epoch": 0.480816747100695,
      "grad_norm": 2.921875,
      "learning_rate": 4.665166112396188e-05,
      "loss": 0.9453,
      "step": 137190
    },
    {
      "epoch": 0.4808517946075906,
      "grad_norm": 3.234375,
      "learning_rate": 4.665101209529818e-05,
      "loss": 0.9707,
      "step": 137200
    },
    {
      "epoch": 0.4808868421144862,
      "grad_norm": 3.171875,
      "learning_rate": 4.665036306663448e-05,
      "loss": 0.9301,
      "step": 137210
    },
    {
      "epoch": 0.48092188962138177,
      "grad_norm": 2.765625,
      "learning_rate": 4.664971403797078e-05,
      "loss": 0.9921,
      "step": 137220
    },
    {
      "epoch": 0.48095693712827736,
      "grad_norm": 3.15625,
      "learning_rate": 4.6649065009307074e-05,
      "loss": 0.8846,
      "step": 137230
    },
    {
      "epoch": 0.48099198463517295,
      "grad_norm": 2.734375,
      "learning_rate": 4.6648415980643375e-05,
      "loss": 0.9284,
      "step": 137240
    },
    {
      "epoch": 0.4810270321420686,
      "grad_norm": 2.75,
      "learning_rate": 4.664776695197967e-05,
      "loss": 0.9118,
      "step": 137250
    },
    {
      "epoch": 0.4810620796489642,
      "grad_norm": 3.125,
      "learning_rate": 4.664711792331597e-05,
      "loss": 1.039,
      "step": 137260
    },
    {
      "epoch": 0.4810971271558598,
      "grad_norm": 2.875,
      "learning_rate": 4.6646468894652266e-05,
      "loss": 0.9433,
      "step": 137270
    },
    {
      "epoch": 0.4811321746627554,
      "grad_norm": 2.96875,
      "learning_rate": 4.664581986598857e-05,
      "loss": 0.9635,
      "step": 137280
    },
    {
      "epoch": 0.48116722216965097,
      "grad_norm": 3.109375,
      "learning_rate": 4.664517083732486e-05,
      "loss": 1.0531,
      "step": 137290
    },
    {
      "epoch": 0.48120226967654656,
      "grad_norm": 2.65625,
      "learning_rate": 4.664452180866116e-05,
      "loss": 0.7768,
      "step": 137300
    },
    {
      "epoch": 0.48123731718344215,
      "grad_norm": 3.5,
      "learning_rate": 4.664387277999746e-05,
      "loss": 0.9695,
      "step": 137310
    },
    {
      "epoch": 0.48127236469033774,
      "grad_norm": 3.015625,
      "learning_rate": 4.664322375133376e-05,
      "loss": 0.9397,
      "step": 137320
    },
    {
      "epoch": 0.48130741219723333,
      "grad_norm": 2.875,
      "learning_rate": 4.6642574722670054e-05,
      "loss": 0.8672,
      "step": 137330
    },
    {
      "epoch": 0.4813424597041289,
      "grad_norm": 3.171875,
      "learning_rate": 4.664192569400635e-05,
      "loss": 0.9687,
      "step": 137340
    },
    {
      "epoch": 0.48137750721102457,
      "grad_norm": 3.0,
      "learning_rate": 4.664127666534265e-05,
      "loss": 0.8798,
      "step": 137350
    },
    {
      "epoch": 0.48141255471792016,
      "grad_norm": 2.5,
      "learning_rate": 4.6640627636678945e-05,
      "loss": 0.9294,
      "step": 137360
    },
    {
      "epoch": 0.48144760222481575,
      "grad_norm": 2.953125,
      "learning_rate": 4.6639978608015246e-05,
      "loss": 0.9336,
      "step": 137370
    },
    {
      "epoch": 0.48148264973171134,
      "grad_norm": 2.8125,
      "learning_rate": 4.663932957935154e-05,
      "loss": 0.9141,
      "step": 137380
    },
    {
      "epoch": 0.48151769723860693,
      "grad_norm": 3.609375,
      "learning_rate": 4.663868055068784e-05,
      "loss": 0.86,
      "step": 137390
    },
    {
      "epoch": 0.4815527447455025,
      "grad_norm": 2.875,
      "learning_rate": 4.6638031522024137e-05,
      "loss": 0.8504,
      "step": 137400
    },
    {
      "epoch": 0.4815877922523981,
      "grad_norm": 2.484375,
      "learning_rate": 4.663738249336044e-05,
      "loss": 0.8409,
      "step": 137410
    },
    {
      "epoch": 0.4816228397592937,
      "grad_norm": 2.953125,
      "learning_rate": 4.663673346469673e-05,
      "loss": 0.8985,
      "step": 137420
    },
    {
      "epoch": 0.4816578872661893,
      "grad_norm": 3.0625,
      "learning_rate": 4.6636084436033034e-05,
      "loss": 0.9034,
      "step": 137430
    },
    {
      "epoch": 0.4816929347730849,
      "grad_norm": 3.5,
      "learning_rate": 4.6635435407369335e-05,
      "loss": 0.9235,
      "step": 137440
    },
    {
      "epoch": 0.48172798227998054,
      "grad_norm": 2.59375,
      "learning_rate": 4.663478637870563e-05,
      "loss": 0.893,
      "step": 137450
    },
    {
      "epoch": 0.48176302978687613,
      "grad_norm": 3.109375,
      "learning_rate": 4.663413735004193e-05,
      "loss": 0.9519,
      "step": 137460
    },
    {
      "epoch": 0.4817980772937717,
      "grad_norm": 3.125,
      "learning_rate": 4.6633488321378226e-05,
      "loss": 1.0291,
      "step": 137470
    },
    {
      "epoch": 0.4818331248006673,
      "grad_norm": 3.09375,
      "learning_rate": 4.663283929271453e-05,
      "loss": 0.9448,
      "step": 137480
    },
    {
      "epoch": 0.4818681723075629,
      "grad_norm": 2.828125,
      "learning_rate": 4.663219026405082e-05,
      "loss": 0.8379,
      "step": 137490
    },
    {
      "epoch": 0.4819032198144585,
      "grad_norm": 3.28125,
      "learning_rate": 4.663154123538712e-05,
      "loss": 1.013,
      "step": 137500
    },
    {
      "epoch": 0.4819382673213541,
      "grad_norm": 3.078125,
      "learning_rate": 4.663089220672342e-05,
      "loss": 0.9296,
      "step": 137510
    },
    {
      "epoch": 0.4819733148282497,
      "grad_norm": 3.015625,
      "learning_rate": 4.663024317805972e-05,
      "loss": 0.9075,
      "step": 137520
    },
    {
      "epoch": 0.48200836233514527,
      "grad_norm": 2.796875,
      "learning_rate": 4.6629594149396014e-05,
      "loss": 0.9377,
      "step": 137530
    },
    {
      "epoch": 0.48204340984204086,
      "grad_norm": 2.9375,
      "learning_rate": 4.6628945120732315e-05,
      "loss": 0.9901,
      "step": 137540
    },
    {
      "epoch": 0.4820784573489365,
      "grad_norm": 3.03125,
      "learning_rate": 4.662829609206861e-05,
      "loss": 0.916,
      "step": 137550
    },
    {
      "epoch": 0.4821135048558321,
      "grad_norm": 2.875,
      "learning_rate": 4.662764706340491e-05,
      "loss": 0.8971,
      "step": 137560
    },
    {
      "epoch": 0.4821485523627277,
      "grad_norm": 2.6875,
      "learning_rate": 4.662699803474121e-05,
      "loss": 0.9176,
      "step": 137570
    },
    {
      "epoch": 0.4821835998696233,
      "grad_norm": 3.03125,
      "learning_rate": 4.662634900607751e-05,
      "loss": 0.983,
      "step": 137580
    },
    {
      "epoch": 0.48221864737651887,
      "grad_norm": 2.8125,
      "learning_rate": 4.662569997741381e-05,
      "loss": 0.9309,
      "step": 137590
    },
    {
      "epoch": 0.48225369488341446,
      "grad_norm": 2.78125,
      "learning_rate": 4.66250509487501e-05,
      "loss": 0.9206,
      "step": 137600
    },
    {
      "epoch": 0.48228874239031005,
      "grad_norm": 2.96875,
      "learning_rate": 4.6624401920086405e-05,
      "loss": 0.8776,
      "step": 137610
    },
    {
      "epoch": 0.48232378989720565,
      "grad_norm": 3.0625,
      "learning_rate": 4.66237528914227e-05,
      "loss": 0.9539,
      "step": 137620
    },
    {
      "epoch": 0.48235883740410124,
      "grad_norm": 2.796875,
      "learning_rate": 4.6623103862759e-05,
      "loss": 0.9496,
      "step": 137630
    },
    {
      "epoch": 0.48239388491099683,
      "grad_norm": 2.796875,
      "learning_rate": 4.6622454834095295e-05,
      "loss": 0.9378,
      "step": 137640
    },
    {
      "epoch": 0.4824289324178925,
      "grad_norm": 2.6875,
      "learning_rate": 4.66218058054316e-05,
      "loss": 0.8581,
      "step": 137650
    },
    {
      "epoch": 0.48246397992478807,
      "grad_norm": 2.921875,
      "learning_rate": 4.662115677676789e-05,
      "loss": 1.0008,
      "step": 137660
    },
    {
      "epoch": 0.48249902743168366,
      "grad_norm": 3.203125,
      "learning_rate": 4.662050774810419e-05,
      "loss": 0.9617,
      "step": 137670
    },
    {
      "epoch": 0.48253407493857925,
      "grad_norm": 3.328125,
      "learning_rate": 4.661985871944049e-05,
      "loss": 0.9288,
      "step": 137680
    },
    {
      "epoch": 0.48256912244547484,
      "grad_norm": 2.8125,
      "learning_rate": 4.661920969077679e-05,
      "loss": 0.908,
      "step": 137690
    },
    {
      "epoch": 0.48260416995237043,
      "grad_norm": 3.125,
      "learning_rate": 4.661856066211308e-05,
      "loss": 0.9095,
      "step": 137700
    },
    {
      "epoch": 0.482639217459266,
      "grad_norm": 2.5,
      "learning_rate": 4.661791163344938e-05,
      "loss": 0.8872,
      "step": 137710
    },
    {
      "epoch": 0.4826742649661616,
      "grad_norm": 3.140625,
      "learning_rate": 4.661726260478568e-05,
      "loss": 0.8951,
      "step": 137720
    },
    {
      "epoch": 0.4827093124730572,
      "grad_norm": 2.828125,
      "learning_rate": 4.6616613576121974e-05,
      "loss": 0.8759,
      "step": 137730
    },
    {
      "epoch": 0.48274435997995285,
      "grad_norm": 2.921875,
      "learning_rate": 4.6615964547458275e-05,
      "loss": 0.8965,
      "step": 137740
    },
    {
      "epoch": 0.48277940748684844,
      "grad_norm": 3.0,
      "learning_rate": 4.661531551879457e-05,
      "loss": 0.9856,
      "step": 137750
    },
    {
      "epoch": 0.48281445499374404,
      "grad_norm": 3.203125,
      "learning_rate": 4.661466649013087e-05,
      "loss": 0.8798,
      "step": 137760
    },
    {
      "epoch": 0.4828495025006396,
      "grad_norm": 3.15625,
      "learning_rate": 4.6614017461467166e-05,
      "loss": 0.943,
      "step": 137770
    },
    {
      "epoch": 0.4828845500075352,
      "grad_norm": 3.265625,
      "learning_rate": 4.661336843280347e-05,
      "loss": 0.9658,
      "step": 137780
    },
    {
      "epoch": 0.4829195975144308,
      "grad_norm": 3.28125,
      "learning_rate": 4.661271940413976e-05,
      "loss": 0.9925,
      "step": 137790
    },
    {
      "epoch": 0.4829546450213264,
      "grad_norm": 2.875,
      "learning_rate": 4.661207037547606e-05,
      "loss": 0.9583,
      "step": 137800
    },
    {
      "epoch": 0.482989692528222,
      "grad_norm": 2.765625,
      "learning_rate": 4.6611421346812365e-05,
      "loss": 0.9382,
      "step": 137810
    },
    {
      "epoch": 0.4830247400351176,
      "grad_norm": 3.328125,
      "learning_rate": 4.661077231814866e-05,
      "loss": 1.0185,
      "step": 137820
    },
    {
      "epoch": 0.4830597875420132,
      "grad_norm": 2.796875,
      "learning_rate": 4.661012328948496e-05,
      "loss": 0.916,
      "step": 137830
    },
    {
      "epoch": 0.4830948350489088,
      "grad_norm": 3.46875,
      "learning_rate": 4.6609474260821255e-05,
      "loss": 0.925,
      "step": 137840
    },
    {
      "epoch": 0.4831298825558044,
      "grad_norm": 3.234375,
      "learning_rate": 4.660882523215756e-05,
      "loss": 0.9451,
      "step": 137850
    },
    {
      "epoch": 0.4831649300627,
      "grad_norm": 3.296875,
      "learning_rate": 4.660817620349385e-05,
      "loss": 0.9572,
      "step": 137860
    },
    {
      "epoch": 0.4831999775695956,
      "grad_norm": 3.078125,
      "learning_rate": 4.660752717483015e-05,
      "loss": 0.8794,
      "step": 137870
    },
    {
      "epoch": 0.4832350250764912,
      "grad_norm": 2.953125,
      "learning_rate": 4.660687814616645e-05,
      "loss": 0.8869,
      "step": 137880
    },
    {
      "epoch": 0.4832700725833868,
      "grad_norm": 3.15625,
      "learning_rate": 4.660622911750275e-05,
      "loss": 0.9266,
      "step": 137890
    },
    {
      "epoch": 0.48330512009028237,
      "grad_norm": 3.0625,
      "learning_rate": 4.660558008883904e-05,
      "loss": 0.9544,
      "step": 137900
    },
    {
      "epoch": 0.48334016759717796,
      "grad_norm": 2.671875,
      "learning_rate": 4.6604931060175345e-05,
      "loss": 0.896,
      "step": 137910
    },
    {
      "epoch": 0.48337521510407355,
      "grad_norm": 3.234375,
      "learning_rate": 4.660428203151164e-05,
      "loss": 0.9311,
      "step": 137920
    },
    {
      "epoch": 0.48341026261096914,
      "grad_norm": 3.25,
      "learning_rate": 4.660363300284794e-05,
      "loss": 1.0157,
      "step": 137930
    },
    {
      "epoch": 0.4834453101178648,
      "grad_norm": 3.3125,
      "learning_rate": 4.660298397418424e-05,
      "loss": 0.9594,
      "step": 137940
    },
    {
      "epoch": 0.4834803576247604,
      "grad_norm": 2.828125,
      "learning_rate": 4.660233494552054e-05,
      "loss": 0.9598,
      "step": 137950
    },
    {
      "epoch": 0.483515405131656,
      "grad_norm": 3.265625,
      "learning_rate": 4.660168591685684e-05,
      "loss": 1.0144,
      "step": 137960
    },
    {
      "epoch": 0.48355045263855156,
      "grad_norm": 2.765625,
      "learning_rate": 4.660103688819313e-05,
      "loss": 0.892,
      "step": 137970
    },
    {
      "epoch": 0.48358550014544716,
      "grad_norm": 2.484375,
      "learning_rate": 4.6600387859529434e-05,
      "loss": 0.9399,
      "step": 137980
    },
    {
      "epoch": 0.48362054765234275,
      "grad_norm": 2.75,
      "learning_rate": 4.659973883086573e-05,
      "loss": 0.9607,
      "step": 137990
    },
    {
      "epoch": 0.48365559515923834,
      "grad_norm": 2.78125,
      "learning_rate": 4.659908980220203e-05,
      "loss": 0.8552,
      "step": 138000
    },
    {
      "epoch": 0.48369064266613393,
      "grad_norm": 2.796875,
      "learning_rate": 4.6598440773538325e-05,
      "loss": 0.9446,
      "step": 138010
    },
    {
      "epoch": 0.4837256901730295,
      "grad_norm": 3.21875,
      "learning_rate": 4.6597791744874626e-05,
      "loss": 0.9692,
      "step": 138020
    },
    {
      "epoch": 0.4837607376799251,
      "grad_norm": 2.703125,
      "learning_rate": 4.659714271621092e-05,
      "loss": 0.9562,
      "step": 138030
    },
    {
      "epoch": 0.48379578518682076,
      "grad_norm": 2.78125,
      "learning_rate": 4.659649368754722e-05,
      "loss": 0.8753,
      "step": 138040
    },
    {
      "epoch": 0.48383083269371635,
      "grad_norm": 2.9375,
      "learning_rate": 4.659584465888352e-05,
      "loss": 0.9712,
      "step": 138050
    },
    {
      "epoch": 0.48386588020061194,
      "grad_norm": 3.203125,
      "learning_rate": 4.659519563021982e-05,
      "loss": 0.9112,
      "step": 138060
    },
    {
      "epoch": 0.48390092770750753,
      "grad_norm": 2.859375,
      "learning_rate": 4.659454660155611e-05,
      "loss": 0.963,
      "step": 138070
    },
    {
      "epoch": 0.4839359752144031,
      "grad_norm": 3.0,
      "learning_rate": 4.659389757289241e-05,
      "loss": 0.9759,
      "step": 138080
    },
    {
      "epoch": 0.4839710227212987,
      "grad_norm": 3.25,
      "learning_rate": 4.659324854422871e-05,
      "loss": 0.9506,
      "step": 138090
    },
    {
      "epoch": 0.4840060702281943,
      "grad_norm": 3.15625,
      "learning_rate": 4.6592599515565e-05,
      "loss": 0.9051,
      "step": 138100
    },
    {
      "epoch": 0.4840411177350899,
      "grad_norm": 3.171875,
      "learning_rate": 4.6591950486901305e-05,
      "loss": 1.0717,
      "step": 138110
    },
    {
      "epoch": 0.4840761652419855,
      "grad_norm": 3.203125,
      "learning_rate": 4.65913014582376e-05,
      "loss": 0.8708,
      "step": 138120
    },
    {
      "epoch": 0.4841112127488811,
      "grad_norm": 2.921875,
      "learning_rate": 4.65906524295739e-05,
      "loss": 0.927,
      "step": 138130
    },
    {
      "epoch": 0.48414626025577673,
      "grad_norm": 2.859375,
      "learning_rate": 4.6590003400910195e-05,
      "loss": 0.9406,
      "step": 138140
    },
    {
      "epoch": 0.4841813077626723,
      "grad_norm": 3.03125,
      "learning_rate": 4.65893543722465e-05,
      "loss": 0.9176,
      "step": 138150
    },
    {
      "epoch": 0.4842163552695679,
      "grad_norm": 3.078125,
      "learning_rate": 4.65887053435828e-05,
      "loss": 0.8944,
      "step": 138160
    },
    {
      "epoch": 0.4842514027764635,
      "grad_norm": 2.6875,
      "learning_rate": 4.658805631491909e-05,
      "loss": 1.0182,
      "step": 138170
    },
    {
      "epoch": 0.4842864502833591,
      "grad_norm": 3.1875,
      "learning_rate": 4.6587407286255394e-05,
      "loss": 0.9497,
      "step": 138180
    },
    {
      "epoch": 0.4843214977902547,
      "grad_norm": 3.234375,
      "learning_rate": 4.658675825759169e-05,
      "loss": 0.8697,
      "step": 138190
    },
    {
      "epoch": 0.4843565452971503,
      "grad_norm": 3.1875,
      "learning_rate": 4.658610922892799e-05,
      "loss": 0.9458,
      "step": 138200
    },
    {
      "epoch": 0.48439159280404587,
      "grad_norm": 2.859375,
      "learning_rate": 4.6585460200264285e-05,
      "loss": 0.8703,
      "step": 138210
    },
    {
      "epoch": 0.48442664031094146,
      "grad_norm": 3.09375,
      "learning_rate": 4.6584811171600586e-05,
      "loss": 0.9689,
      "step": 138220
    },
    {
      "epoch": 0.4844616878178371,
      "grad_norm": 2.875,
      "learning_rate": 4.658416214293688e-05,
      "loss": 1.0332,
      "step": 138230
    },
    {
      "epoch": 0.4844967353247327,
      "grad_norm": 3.34375,
      "learning_rate": 4.658351311427318e-05,
      "loss": 0.9051,
      "step": 138240
    },
    {
      "epoch": 0.4845317828316283,
      "grad_norm": 2.703125,
      "learning_rate": 4.658286408560948e-05,
      "loss": 0.978,
      "step": 138250
    },
    {
      "epoch": 0.4845668303385239,
      "grad_norm": 2.875,
      "learning_rate": 4.658221505694578e-05,
      "loss": 0.917,
      "step": 138260
    },
    {
      "epoch": 0.48460187784541947,
      "grad_norm": 2.875,
      "learning_rate": 4.658156602828207e-05,
      "loss": 0.9978,
      "step": 138270
    },
    {
      "epoch": 0.48463692535231506,
      "grad_norm": 3.234375,
      "learning_rate": 4.6580916999618374e-05,
      "loss": 0.8928,
      "step": 138280
    },
    {
      "epoch": 0.48467197285921065,
      "grad_norm": 3.5,
      "learning_rate": 4.658026797095467e-05,
      "loss": 0.9939,
      "step": 138290
    },
    {
      "epoch": 0.48470702036610624,
      "grad_norm": 3.9375,
      "learning_rate": 4.657961894229097e-05,
      "loss": 0.9037,
      "step": 138300
    },
    {
      "epoch": 0.48474206787300184,
      "grad_norm": 2.84375,
      "learning_rate": 4.657896991362727e-05,
      "loss": 0.9396,
      "step": 138310
    },
    {
      "epoch": 0.4847771153798974,
      "grad_norm": 2.765625,
      "learning_rate": 4.6578320884963566e-05,
      "loss": 0.8936,
      "step": 138320
    },
    {
      "epoch": 0.4848121628867931,
      "grad_norm": 2.734375,
      "learning_rate": 4.657767185629987e-05,
      "loss": 0.9131,
      "step": 138330
    },
    {
      "epoch": 0.48484721039368867,
      "grad_norm": 2.65625,
      "learning_rate": 4.657702282763616e-05,
      "loss": 0.9547,
      "step": 138340
    },
    {
      "epoch": 0.48488225790058426,
      "grad_norm": 2.703125,
      "learning_rate": 4.6576373798972463e-05,
      "loss": 0.8903,
      "step": 138350
    },
    {
      "epoch": 0.48491730540747985,
      "grad_norm": 3.15625,
      "learning_rate": 4.657572477030876e-05,
      "loss": 0.974,
      "step": 138360
    },
    {
      "epoch": 0.48495235291437544,
      "grad_norm": 2.78125,
      "learning_rate": 4.657507574164506e-05,
      "loss": 0.7822,
      "step": 138370
    },
    {
      "epoch": 0.48498740042127103,
      "grad_norm": 3.140625,
      "learning_rate": 4.6574426712981354e-05,
      "loss": 0.9115,
      "step": 138380
    },
    {
      "epoch": 0.4850224479281666,
      "grad_norm": 2.953125,
      "learning_rate": 4.6573777684317655e-05,
      "loss": 0.9239,
      "step": 138390
    },
    {
      "epoch": 0.4850574954350622,
      "grad_norm": 2.65625,
      "learning_rate": 4.657312865565395e-05,
      "loss": 0.9103,
      "step": 138400
    },
    {
      "epoch": 0.4850925429419578,
      "grad_norm": 3.15625,
      "learning_rate": 4.657247962699025e-05,
      "loss": 1.0171,
      "step": 138410
    },
    {
      "epoch": 0.4851275904488534,
      "grad_norm": 2.859375,
      "learning_rate": 4.6571830598326546e-05,
      "loss": 0.9479,
      "step": 138420
    },
    {
      "epoch": 0.48516263795574904,
      "grad_norm": 2.578125,
      "learning_rate": 4.657118156966285e-05,
      "loss": 0.889,
      "step": 138430
    },
    {
      "epoch": 0.48519768546264463,
      "grad_norm": 2.59375,
      "learning_rate": 4.657053254099915e-05,
      "loss": 0.9383,
      "step": 138440
    },
    {
      "epoch": 0.4852327329695402,
      "grad_norm": 2.890625,
      "learning_rate": 4.6569883512335443e-05,
      "loss": 0.9143,
      "step": 138450
    },
    {
      "epoch": 0.4852677804764358,
      "grad_norm": 3.03125,
      "learning_rate": 4.656923448367174e-05,
      "loss": 0.8872,
      "step": 138460
    },
    {
      "epoch": 0.4853028279833314,
      "grad_norm": 3.4375,
      "learning_rate": 4.656858545500803e-05,
      "loss": 1.0263,
      "step": 138470
    },
    {
      "epoch": 0.485337875490227,
      "grad_norm": 2.796875,
      "learning_rate": 4.6567936426344334e-05,
      "loss": 0.9002,
      "step": 138480
    },
    {
      "epoch": 0.4853729229971226,
      "grad_norm": 3.40625,
      "learning_rate": 4.656728739768063e-05,
      "loss": 0.9454,
      "step": 138490
    },
    {
      "epoch": 0.4854079705040182,
      "grad_norm": 2.828125,
      "learning_rate": 4.656663836901693e-05,
      "loss": 0.9208,
      "step": 138500
    },
    {
      "epoch": 0.4854430180109138,
      "grad_norm": 3.296875,
      "learning_rate": 4.6565989340353225e-05,
      "loss": 0.9799,
      "step": 138510
    },
    {
      "epoch": 0.48547806551780937,
      "grad_norm": 2.84375,
      "learning_rate": 4.6565340311689526e-05,
      "loss": 0.8836,
      "step": 138520
    },
    {
      "epoch": 0.485513113024705,
      "grad_norm": 3.421875,
      "learning_rate": 4.656469128302583e-05,
      "loss": 0.9611,
      "step": 138530
    },
    {
      "epoch": 0.4855481605316006,
      "grad_norm": 2.75,
      "learning_rate": 4.656404225436212e-05,
      "loss": 0.9311,
      "step": 138540
    },
    {
      "epoch": 0.4855832080384962,
      "grad_norm": 2.859375,
      "learning_rate": 4.6563393225698423e-05,
      "loss": 0.9248,
      "step": 138550
    },
    {
      "epoch": 0.4856182555453918,
      "grad_norm": 3.140625,
      "learning_rate": 4.656274419703472e-05,
      "loss": 0.9151,
      "step": 138560
    },
    {
      "epoch": 0.4856533030522874,
      "grad_norm": 3.265625,
      "learning_rate": 4.656209516837102e-05,
      "loss": 0.9067,
      "step": 138570
    },
    {
      "epoch": 0.48568835055918297,
      "grad_norm": 3.109375,
      "learning_rate": 4.6561446139707314e-05,
      "loss": 0.8998,
      "step": 138580
    },
    {
      "epoch": 0.48572339806607856,
      "grad_norm": 3.34375,
      "learning_rate": 4.6560797111043615e-05,
      "loss": 0.9722,
      "step": 138590
    },
    {
      "epoch": 0.48575844557297415,
      "grad_norm": 2.765625,
      "learning_rate": 4.656014808237991e-05,
      "loss": 0.8652,
      "step": 138600
    },
    {
      "epoch": 0.48579349307986974,
      "grad_norm": 2.796875,
      "learning_rate": 4.655949905371621e-05,
      "loss": 0.9322,
      "step": 138610
    },
    {
      "epoch": 0.48582854058676533,
      "grad_norm": 2.453125,
      "learning_rate": 4.6558850025052506e-05,
      "loss": 0.9571,
      "step": 138620
    },
    {
      "epoch": 0.485863588093661,
      "grad_norm": 2.96875,
      "learning_rate": 4.655820099638881e-05,
      "loss": 0.8856,
      "step": 138630
    },
    {
      "epoch": 0.48589863560055657,
      "grad_norm": 3.484375,
      "learning_rate": 4.65575519677251e-05,
      "loss": 0.9169,
      "step": 138640
    },
    {
      "epoch": 0.48593368310745216,
      "grad_norm": 3.140625,
      "learning_rate": 4.6556902939061403e-05,
      "loss": 0.876,
      "step": 138650
    },
    {
      "epoch": 0.48596873061434775,
      "grad_norm": 2.84375,
      "learning_rate": 4.65562539103977e-05,
      "loss": 0.8313,
      "step": 138660
    },
    {
      "epoch": 0.48600377812124335,
      "grad_norm": 2.828125,
      "learning_rate": 4.6555604881734e-05,
      "loss": 0.8274,
      "step": 138670
    },
    {
      "epoch": 0.48603882562813894,
      "grad_norm": 2.875,
      "learning_rate": 4.65549558530703e-05,
      "loss": 0.9732,
      "step": 138680
    },
    {
      "epoch": 0.48607387313503453,
      "grad_norm": 3.328125,
      "learning_rate": 4.6554306824406595e-05,
      "loss": 0.9462,
      "step": 138690
    },
    {
      "epoch": 0.4861089206419301,
      "grad_norm": 2.75,
      "learning_rate": 4.65536577957429e-05,
      "loss": 0.9678,
      "step": 138700
    },
    {
      "epoch": 0.4861439681488257,
      "grad_norm": 2.6875,
      "learning_rate": 4.655300876707919e-05,
      "loss": 0.9367,
      "step": 138710
    },
    {
      "epoch": 0.4861790156557213,
      "grad_norm": 2.78125,
      "learning_rate": 4.655235973841549e-05,
      "loss": 0.9585,
      "step": 138720
    },
    {
      "epoch": 0.48621406316261695,
      "grad_norm": 3.125,
      "learning_rate": 4.655171070975179e-05,
      "loss": 0.984,
      "step": 138730
    },
    {
      "epoch": 0.48624911066951254,
      "grad_norm": 3.125,
      "learning_rate": 4.655106168108809e-05,
      "loss": 0.9463,
      "step": 138740
    },
    {
      "epoch": 0.48628415817640813,
      "grad_norm": 3.53125,
      "learning_rate": 4.6550412652424383e-05,
      "loss": 0.9256,
      "step": 138750
    },
    {
      "epoch": 0.4863192056833037,
      "grad_norm": 2.8125,
      "learning_rate": 4.6549763623760685e-05,
      "loss": 0.898,
      "step": 138760
    },
    {
      "epoch": 0.4863542531901993,
      "grad_norm": 2.84375,
      "learning_rate": 4.654911459509698e-05,
      "loss": 0.9364,
      "step": 138770
    },
    {
      "epoch": 0.4863893006970949,
      "grad_norm": 2.875,
      "learning_rate": 4.654846556643328e-05,
      "loss": 0.9969,
      "step": 138780
    },
    {
      "epoch": 0.4864243482039905,
      "grad_norm": 2.8125,
      "learning_rate": 4.6547816537769575e-05,
      "loss": 0.9301,
      "step": 138790
    },
    {
      "epoch": 0.4864593957108861,
      "grad_norm": 3.296875,
      "learning_rate": 4.654716750910588e-05,
      "loss": 0.9157,
      "step": 138800
    },
    {
      "epoch": 0.4864944432177817,
      "grad_norm": 2.953125,
      "learning_rate": 4.654651848044218e-05,
      "loss": 0.9575,
      "step": 138810
    },
    {
      "epoch": 0.4865294907246773,
      "grad_norm": 3.0625,
      "learning_rate": 4.654586945177847e-05,
      "loss": 0.9532,
      "step": 138820
    },
    {
      "epoch": 0.4865645382315729,
      "grad_norm": 2.90625,
      "learning_rate": 4.654522042311477e-05,
      "loss": 0.9778,
      "step": 138830
    },
    {
      "epoch": 0.4865995857384685,
      "grad_norm": 3.34375,
      "learning_rate": 4.654457139445106e-05,
      "loss": 0.9127,
      "step": 138840
    },
    {
      "epoch": 0.4866346332453641,
      "grad_norm": 2.734375,
      "learning_rate": 4.6543922365787363e-05,
      "loss": 0.9469,
      "step": 138850
    },
    {
      "epoch": 0.4866696807522597,
      "grad_norm": 2.703125,
      "learning_rate": 4.654327333712366e-05,
      "loss": 0.9237,
      "step": 138860
    },
    {
      "epoch": 0.4867047282591553,
      "grad_norm": 2.90625,
      "learning_rate": 4.654262430845996e-05,
      "loss": 0.9692,
      "step": 138870
    },
    {
      "epoch": 0.4867397757660509,
      "grad_norm": 2.953125,
      "learning_rate": 4.6541975279796254e-05,
      "loss": 0.9305,
      "step": 138880
    },
    {
      "epoch": 0.48677482327294647,
      "grad_norm": 3.546875,
      "learning_rate": 4.6541326251132555e-05,
      "loss": 0.9845,
      "step": 138890
    },
    {
      "epoch": 0.48680987077984206,
      "grad_norm": 3.0,
      "learning_rate": 4.654067722246886e-05,
      "loss": 1.0059,
      "step": 138900
    },
    {
      "epoch": 0.48684491828673765,
      "grad_norm": 2.828125,
      "learning_rate": 4.654002819380515e-05,
      "loss": 0.9571,
      "step": 138910
    },
    {
      "epoch": 0.4868799657936333,
      "grad_norm": 3.125,
      "learning_rate": 4.653937916514145e-05,
      "loss": 0.9801,
      "step": 138920
    },
    {
      "epoch": 0.4869150133005289,
      "grad_norm": 2.734375,
      "learning_rate": 4.653873013647775e-05,
      "loss": 0.9184,
      "step": 138930
    },
    {
      "epoch": 0.4869500608074245,
      "grad_norm": 2.703125,
      "learning_rate": 4.653808110781405e-05,
      "loss": 0.9913,
      "step": 138940
    },
    {
      "epoch": 0.48698510831432007,
      "grad_norm": 3.265625,
      "learning_rate": 4.6537432079150343e-05,
      "loss": 0.9361,
      "step": 138950
    },
    {
      "epoch": 0.48702015582121566,
      "grad_norm": 3.265625,
      "learning_rate": 4.6536783050486645e-05,
      "loss": 0.9162,
      "step": 138960
    },
    {
      "epoch": 0.48705520332811125,
      "grad_norm": 2.875,
      "learning_rate": 4.653613402182294e-05,
      "loss": 0.9278,
      "step": 138970
    },
    {
      "epoch": 0.48709025083500684,
      "grad_norm": 2.921875,
      "learning_rate": 4.653548499315924e-05,
      "loss": 0.9561,
      "step": 138980
    },
    {
      "epoch": 0.48712529834190244,
      "grad_norm": 3.296875,
      "learning_rate": 4.6534835964495535e-05,
      "loss": 0.9273,
      "step": 138990
    },
    {
      "epoch": 0.487160345848798,
      "grad_norm": 3.125,
      "learning_rate": 4.653418693583184e-05,
      "loss": 0.8818,
      "step": 139000
    },
    {
      "epoch": 0.4871953933556936,
      "grad_norm": 2.859375,
      "learning_rate": 4.653353790716813e-05,
      "loss": 1.0101,
      "step": 139010
    },
    {
      "epoch": 0.48723044086258926,
      "grad_norm": 2.96875,
      "learning_rate": 4.653288887850443e-05,
      "loss": 0.932,
      "step": 139020
    },
    {
      "epoch": 0.48726548836948486,
      "grad_norm": 3.34375,
      "learning_rate": 4.653223984984073e-05,
      "loss": 0.9631,
      "step": 139030
    },
    {
      "epoch": 0.48730053587638045,
      "grad_norm": 3.140625,
      "learning_rate": 4.653159082117703e-05,
      "loss": 0.9929,
      "step": 139040
    },
    {
      "epoch": 0.48733558338327604,
      "grad_norm": 2.921875,
      "learning_rate": 4.653094179251333e-05,
      "loss": 0.9103,
      "step": 139050
    },
    {
      "epoch": 0.48737063089017163,
      "grad_norm": 2.390625,
      "learning_rate": 4.6530292763849625e-05,
      "loss": 0.8593,
      "step": 139060
    },
    {
      "epoch": 0.4874056783970672,
      "grad_norm": 3.0,
      "learning_rate": 4.6529643735185926e-05,
      "loss": 0.9911,
      "step": 139070
    },
    {
      "epoch": 0.4874407259039628,
      "grad_norm": 2.640625,
      "learning_rate": 4.652899470652222e-05,
      "loss": 0.9101,
      "step": 139080
    },
    {
      "epoch": 0.4874757734108584,
      "grad_norm": 2.9375,
      "learning_rate": 4.652834567785852e-05,
      "loss": 0.9724,
      "step": 139090
    },
    {
      "epoch": 0.487510820917754,
      "grad_norm": 3.296875,
      "learning_rate": 4.652769664919482e-05,
      "loss": 0.8772,
      "step": 139100
    },
    {
      "epoch": 0.4875458684246496,
      "grad_norm": 3.1875,
      "learning_rate": 4.652704762053112e-05,
      "loss": 0.9185,
      "step": 139110
    },
    {
      "epoch": 0.48758091593154523,
      "grad_norm": 2.984375,
      "learning_rate": 4.652639859186741e-05,
      "loss": 0.8753,
      "step": 139120
    },
    {
      "epoch": 0.4876159634384408,
      "grad_norm": 2.75,
      "learning_rate": 4.6525749563203714e-05,
      "loss": 0.9507,
      "step": 139130
    },
    {
      "epoch": 0.4876510109453364,
      "grad_norm": 3.1875,
      "learning_rate": 4.652510053454001e-05,
      "loss": 0.9304,
      "step": 139140
    },
    {
      "epoch": 0.487686058452232,
      "grad_norm": 2.859375,
      "learning_rate": 4.652445150587631e-05,
      "loss": 0.8747,
      "step": 139150
    },
    {
      "epoch": 0.4877211059591276,
      "grad_norm": 3.0625,
      "learning_rate": 4.6523802477212605e-05,
      "loss": 0.9177,
      "step": 139160
    },
    {
      "epoch": 0.4877561534660232,
      "grad_norm": 3.046875,
      "learning_rate": 4.6523153448548906e-05,
      "loss": 0.9133,
      "step": 139170
    },
    {
      "epoch": 0.4877912009729188,
      "grad_norm": 2.765625,
      "learning_rate": 4.652250441988521e-05,
      "loss": 0.9701,
      "step": 139180
    },
    {
      "epoch": 0.4878262484798144,
      "grad_norm": 2.96875,
      "learning_rate": 4.65218553912215e-05,
      "loss": 0.9575,
      "step": 139190
    },
    {
      "epoch": 0.48786129598670996,
      "grad_norm": 2.953125,
      "learning_rate": 4.6521206362557804e-05,
      "loss": 0.9474,
      "step": 139200
    },
    {
      "epoch": 0.48789634349360556,
      "grad_norm": 3.265625,
      "learning_rate": 4.652055733389409e-05,
      "loss": 0.9176,
      "step": 139210
    },
    {
      "epoch": 0.4879313910005012,
      "grad_norm": 4.4375,
      "learning_rate": 4.651990830523039e-05,
      "loss": 0.9954,
      "step": 139220
    },
    {
      "epoch": 0.4879664385073968,
      "grad_norm": 3.390625,
      "learning_rate": 4.651925927656669e-05,
      "loss": 0.9709,
      "step": 139230
    },
    {
      "epoch": 0.4880014860142924,
      "grad_norm": 2.890625,
      "learning_rate": 4.651861024790299e-05,
      "loss": 0.9688,
      "step": 139240
    },
    {
      "epoch": 0.488036533521188,
      "grad_norm": 3.03125,
      "learning_rate": 4.6517961219239283e-05,
      "loss": 0.9433,
      "step": 139250
    },
    {
      "epoch": 0.48807158102808357,
      "grad_norm": 3.3125,
      "learning_rate": 4.6517312190575585e-05,
      "loss": 0.9525,
      "step": 139260
    },
    {
      "epoch": 0.48810662853497916,
      "grad_norm": 2.96875,
      "learning_rate": 4.6516663161911886e-05,
      "loss": 0.9422,
      "step": 139270
    },
    {
      "epoch": 0.48814167604187475,
      "grad_norm": 2.53125,
      "learning_rate": 4.651601413324818e-05,
      "loss": 0.9985,
      "step": 139280
    },
    {
      "epoch": 0.48817672354877034,
      "grad_norm": 2.9375,
      "learning_rate": 4.651536510458448e-05,
      "loss": 0.8933,
      "step": 139290
    },
    {
      "epoch": 0.48821177105566593,
      "grad_norm": 2.671875,
      "learning_rate": 4.651471607592078e-05,
      "loss": 1.0552,
      "step": 139300
    },
    {
      "epoch": 0.4882468185625616,
      "grad_norm": 2.8125,
      "learning_rate": 4.651406704725708e-05,
      "loss": 0.9496,
      "step": 139310
    },
    {
      "epoch": 0.48828186606945717,
      "grad_norm": 2.65625,
      "learning_rate": 4.651341801859337e-05,
      "loss": 0.9244,
      "step": 139320
    },
    {
      "epoch": 0.48831691357635276,
      "grad_norm": 2.6875,
      "learning_rate": 4.6512768989929674e-05,
      "loss": 0.8672,
      "step": 139330
    },
    {
      "epoch": 0.48835196108324835,
      "grad_norm": 2.921875,
      "learning_rate": 4.651211996126597e-05,
      "loss": 0.9107,
      "step": 139340
    },
    {
      "epoch": 0.48838700859014395,
      "grad_norm": 2.609375,
      "learning_rate": 4.651147093260227e-05,
      "loss": 0.924,
      "step": 139350
    },
    {
      "epoch": 0.48842205609703954,
      "grad_norm": 2.578125,
      "learning_rate": 4.6510821903938565e-05,
      "loss": 0.9357,
      "step": 139360
    },
    {
      "epoch": 0.4884571036039351,
      "grad_norm": 2.859375,
      "learning_rate": 4.6510172875274866e-05,
      "loss": 0.9292,
      "step": 139370
    },
    {
      "epoch": 0.4884921511108307,
      "grad_norm": 2.4375,
      "learning_rate": 4.650952384661116e-05,
      "loss": 0.9275,
      "step": 139380
    },
    {
      "epoch": 0.4885271986177263,
      "grad_norm": 2.765625,
      "learning_rate": 4.650887481794746e-05,
      "loss": 0.9772,
      "step": 139390
    },
    {
      "epoch": 0.4885622461246219,
      "grad_norm": 3.140625,
      "learning_rate": 4.6508225789283764e-05,
      "loss": 0.9622,
      "step": 139400
    },
    {
      "epoch": 0.48859729363151755,
      "grad_norm": 2.875,
      "learning_rate": 4.650757676062006e-05,
      "loss": 0.8872,
      "step": 139410
    },
    {
      "epoch": 0.48863234113841314,
      "grad_norm": 3.046875,
      "learning_rate": 4.650692773195636e-05,
      "loss": 1.0273,
      "step": 139420
    },
    {
      "epoch": 0.48866738864530873,
      "grad_norm": 3.0,
      "learning_rate": 4.6506278703292654e-05,
      "loss": 0.8752,
      "step": 139430
    },
    {
      "epoch": 0.4887024361522043,
      "grad_norm": 3.09375,
      "learning_rate": 4.6505629674628956e-05,
      "loss": 0.9762,
      "step": 139440
    },
    {
      "epoch": 0.4887374836590999,
      "grad_norm": 3.171875,
      "learning_rate": 4.650498064596525e-05,
      "loss": 0.9964,
      "step": 139450
    },
    {
      "epoch": 0.4887725311659955,
      "grad_norm": 2.96875,
      "learning_rate": 4.650433161730155e-05,
      "loss": 0.9023,
      "step": 139460
    },
    {
      "epoch": 0.4888075786728911,
      "grad_norm": 2.546875,
      "learning_rate": 4.6503682588637846e-05,
      "loss": 0.9129,
      "step": 139470
    },
    {
      "epoch": 0.4888426261797867,
      "grad_norm": 2.9375,
      "learning_rate": 4.650303355997415e-05,
      "loss": 0.9525,
      "step": 139480
    },
    {
      "epoch": 0.4888776736866823,
      "grad_norm": 3.375,
      "learning_rate": 4.650238453131044e-05,
      "loss": 0.9733,
      "step": 139490
    },
    {
      "epoch": 0.48891272119357787,
      "grad_norm": 2.96875,
      "learning_rate": 4.6501735502646744e-05,
      "loss": 0.8696,
      "step": 139500
    },
    {
      "epoch": 0.4889477687004735,
      "grad_norm": 2.90625,
      "learning_rate": 4.650108647398304e-05,
      "loss": 1.013,
      "step": 139510
    },
    {
      "epoch": 0.4889828162073691,
      "grad_norm": 3.046875,
      "learning_rate": 4.650043744531934e-05,
      "loss": 1.0009,
      "step": 139520
    },
    {
      "epoch": 0.4890178637142647,
      "grad_norm": 3.5,
      "learning_rate": 4.6499788416655634e-05,
      "loss": 0.9424,
      "step": 139530
    },
    {
      "epoch": 0.4890529112211603,
      "grad_norm": 3.40625,
      "learning_rate": 4.6499139387991936e-05,
      "loss": 0.9739,
      "step": 139540
    },
    {
      "epoch": 0.4890879587280559,
      "grad_norm": 3.140625,
      "learning_rate": 4.649849035932824e-05,
      "loss": 0.9825,
      "step": 139550
    },
    {
      "epoch": 0.4891230062349515,
      "grad_norm": 2.90625,
      "learning_rate": 4.649784133066453e-05,
      "loss": 0.9784,
      "step": 139560
    },
    {
      "epoch": 0.48915805374184707,
      "grad_norm": 2.9375,
      "learning_rate": 4.649719230200083e-05,
      "loss": 0.923,
      "step": 139570
    },
    {
      "epoch": 0.48919310124874266,
      "grad_norm": 3.375,
      "learning_rate": 4.649654327333712e-05,
      "loss": 0.9927,
      "step": 139580
    },
    {
      "epoch": 0.48922814875563825,
      "grad_norm": 2.734375,
      "learning_rate": 4.649589424467342e-05,
      "loss": 0.9298,
      "step": 139590
    },
    {
      "epoch": 0.48926319626253384,
      "grad_norm": 2.9375,
      "learning_rate": 4.649524521600972e-05,
      "loss": 0.846,
      "step": 139600
    },
    {
      "epoch": 0.4892982437694295,
      "grad_norm": 3.328125,
      "learning_rate": 4.649459618734602e-05,
      "loss": 0.9411,
      "step": 139610
    },
    {
      "epoch": 0.4893332912763251,
      "grad_norm": 2.890625,
      "learning_rate": 4.649394715868231e-05,
      "loss": 0.8767,
      "step": 139620
    },
    {
      "epoch": 0.48936833878322067,
      "grad_norm": 3.265625,
      "learning_rate": 4.6493298130018614e-05,
      "loss": 0.8832,
      "step": 139630
    },
    {
      "epoch": 0.48940338629011626,
      "grad_norm": 3.140625,
      "learning_rate": 4.6492649101354916e-05,
      "loss": 0.898,
      "step": 139640
    },
    {
      "epoch": 0.48943843379701185,
      "grad_norm": 2.65625,
      "learning_rate": 4.649200007269121e-05,
      "loss": 0.9185,
      "step": 139650
    },
    {
      "epoch": 0.48947348130390744,
      "grad_norm": 2.984375,
      "learning_rate": 4.649135104402751e-05,
      "loss": 0.8945,
      "step": 139660
    },
    {
      "epoch": 0.48950852881080303,
      "grad_norm": 2.859375,
      "learning_rate": 4.6490702015363806e-05,
      "loss": 0.9169,
      "step": 139670
    },
    {
      "epoch": 0.4895435763176986,
      "grad_norm": 2.96875,
      "learning_rate": 4.649005298670011e-05,
      "loss": 0.8931,
      "step": 139680
    },
    {
      "epoch": 0.4895786238245942,
      "grad_norm": 3.140625,
      "learning_rate": 4.64894039580364e-05,
      "loss": 0.8534,
      "step": 139690
    },
    {
      "epoch": 0.4896136713314898,
      "grad_norm": 3.109375,
      "learning_rate": 4.6488754929372704e-05,
      "loss": 0.9446,
      "step": 139700
    },
    {
      "epoch": 0.48964871883838545,
      "grad_norm": 2.8125,
      "learning_rate": 4.6488105900709e-05,
      "loss": 0.9255,
      "step": 139710
    },
    {
      "epoch": 0.48968376634528105,
      "grad_norm": 2.640625,
      "learning_rate": 4.64874568720453e-05,
      "loss": 0.9301,
      "step": 139720
    },
    {
      "epoch": 0.48971881385217664,
      "grad_norm": 3.109375,
      "learning_rate": 4.6486807843381594e-05,
      "loss": 1.0072,
      "step": 139730
    },
    {
      "epoch": 0.48975386135907223,
      "grad_norm": 2.875,
      "learning_rate": 4.6486158814717896e-05,
      "loss": 0.9198,
      "step": 139740
    },
    {
      "epoch": 0.4897889088659678,
      "grad_norm": 2.546875,
      "learning_rate": 4.648550978605419e-05,
      "loss": 0.8526,
      "step": 139750
    },
    {
      "epoch": 0.4898239563728634,
      "grad_norm": 2.953125,
      "learning_rate": 4.648486075739049e-05,
      "loss": 0.9664,
      "step": 139760
    },
    {
      "epoch": 0.489859003879759,
      "grad_norm": 3.171875,
      "learning_rate": 4.648421172872679e-05,
      "loss": 0.9278,
      "step": 139770
    },
    {
      "epoch": 0.4898940513866546,
      "grad_norm": 2.96875,
      "learning_rate": 4.648356270006309e-05,
      "loss": 0.9493,
      "step": 139780
    },
    {
      "epoch": 0.4899290988935502,
      "grad_norm": 4.1875,
      "learning_rate": 4.648291367139939e-05,
      "loss": 0.9476,
      "step": 139790
    },
    {
      "epoch": 0.4899641464004458,
      "grad_norm": 2.8125,
      "learning_rate": 4.6482264642735684e-05,
      "loss": 0.9161,
      "step": 139800
    },
    {
      "epoch": 0.4899991939073414,
      "grad_norm": 2.78125,
      "learning_rate": 4.6481615614071985e-05,
      "loss": 0.9052,
      "step": 139810
    },
    {
      "epoch": 0.490034241414237,
      "grad_norm": 2.9375,
      "learning_rate": 4.648096658540828e-05,
      "loss": 0.8946,
      "step": 139820
    },
    {
      "epoch": 0.4900692889211326,
      "grad_norm": 2.46875,
      "learning_rate": 4.648031755674458e-05,
      "loss": 0.8392,
      "step": 139830
    },
    {
      "epoch": 0.4901043364280282,
      "grad_norm": 2.90625,
      "learning_rate": 4.6479668528080876e-05,
      "loss": 0.9114,
      "step": 139840
    },
    {
      "epoch": 0.4901393839349238,
      "grad_norm": 3.34375,
      "learning_rate": 4.647901949941718e-05,
      "loss": 0.9627,
      "step": 139850
    },
    {
      "epoch": 0.4901744314418194,
      "grad_norm": 3.078125,
      "learning_rate": 4.647837047075347e-05,
      "loss": 0.8773,
      "step": 139860
    },
    {
      "epoch": 0.49020947894871497,
      "grad_norm": 3.0,
      "learning_rate": 4.647772144208977e-05,
      "loss": 0.9282,
      "step": 139870
    },
    {
      "epoch": 0.49024452645561056,
      "grad_norm": 3.265625,
      "learning_rate": 4.647707241342607e-05,
      "loss": 0.9455,
      "step": 139880
    },
    {
      "epoch": 0.49027957396250615,
      "grad_norm": 2.953125,
      "learning_rate": 4.647642338476237e-05,
      "loss": 0.9466,
      "step": 139890
    },
    {
      "epoch": 0.4903146214694018,
      "grad_norm": 2.984375,
      "learning_rate": 4.6475774356098664e-05,
      "loss": 0.9577,
      "step": 139900
    },
    {
      "epoch": 0.4903496689762974,
      "grad_norm": 2.78125,
      "learning_rate": 4.6475125327434965e-05,
      "loss": 0.9257,
      "step": 139910
    },
    {
      "epoch": 0.490384716483193,
      "grad_norm": 2.78125,
      "learning_rate": 4.6474476298771266e-05,
      "loss": 0.9641,
      "step": 139920
    },
    {
      "epoch": 0.4904197639900886,
      "grad_norm": 3.0625,
      "learning_rate": 4.647382727010756e-05,
      "loss": 0.9641,
      "step": 139930
    },
    {
      "epoch": 0.49045481149698417,
      "grad_norm": 2.859375,
      "learning_rate": 4.647317824144386e-05,
      "loss": 0.8829,
      "step": 139940
    },
    {
      "epoch": 0.49048985900387976,
      "grad_norm": 3.046875,
      "learning_rate": 4.647252921278016e-05,
      "loss": 0.9918,
      "step": 139950
    },
    {
      "epoch": 0.49052490651077535,
      "grad_norm": 2.984375,
      "learning_rate": 4.647188018411645e-05,
      "loss": 0.9208,
      "step": 139960
    },
    {
      "epoch": 0.49055995401767094,
      "grad_norm": 3.171875,
      "learning_rate": 4.6471231155452746e-05,
      "loss": 1.0111,
      "step": 139970
    },
    {
      "epoch": 0.49059500152456653,
      "grad_norm": 2.75,
      "learning_rate": 4.647058212678905e-05,
      "loss": 0.9,
      "step": 139980
    },
    {
      "epoch": 0.4906300490314621,
      "grad_norm": 2.5625,
      "learning_rate": 4.646993309812534e-05,
      "loss": 0.8438,
      "step": 139990
    },
    {
      "epoch": 0.49066509653835777,
      "grad_norm": 2.78125,
      "learning_rate": 4.6469284069461644e-05,
      "loss": 0.9467,
      "step": 140000
    },
    {
      "epoch": 0.49066509653835777,
      "eval_loss": 0.8742027878761292,
      "eval_runtime": 561.939,
      "eval_samples_per_second": 677.006,
      "eval_steps_per_second": 56.417,
      "step": 140000
    },
    {
      "epoch": 0.49070014404525336,
      "grad_norm": 3.46875,
      "learning_rate": 4.6468635040797945e-05,
      "loss": 0.9624,
      "step": 140010
    },
    {
      "epoch": 0.49073519155214895,
      "grad_norm": 2.703125,
      "learning_rate": 4.646798601213424e-05,
      "loss": 0.8019,
      "step": 140020
    },
    {
      "epoch": 0.49077023905904454,
      "grad_norm": 2.984375,
      "learning_rate": 4.646733698347054e-05,
      "loss": 0.8274,
      "step": 140030
    },
    {
      "epoch": 0.49080528656594014,
      "grad_norm": 2.90625,
      "learning_rate": 4.6466687954806836e-05,
      "loss": 0.9296,
      "step": 140040
    },
    {
      "epoch": 0.4908403340728357,
      "grad_norm": 3.546875,
      "learning_rate": 4.646603892614314e-05,
      "loss": 0.8858,
      "step": 140050
    },
    {
      "epoch": 0.4908753815797313,
      "grad_norm": 3.421875,
      "learning_rate": 4.646538989747943e-05,
      "loss": 0.9866,
      "step": 140060
    },
    {
      "epoch": 0.4909104290866269,
      "grad_norm": 3.234375,
      "learning_rate": 4.646474086881573e-05,
      "loss": 0.9618,
      "step": 140070
    },
    {
      "epoch": 0.4909454765935225,
      "grad_norm": 2.59375,
      "learning_rate": 4.646409184015203e-05,
      "loss": 0.9641,
      "step": 140080
    },
    {
      "epoch": 0.4909805241004181,
      "grad_norm": 2.8125,
      "learning_rate": 4.646344281148833e-05,
      "loss": 0.8933,
      "step": 140090
    },
    {
      "epoch": 0.49101557160731374,
      "grad_norm": 3.171875,
      "learning_rate": 4.6462793782824624e-05,
      "loss": 0.8934,
      "step": 140100
    },
    {
      "epoch": 0.49105061911420933,
      "grad_norm": 2.84375,
      "learning_rate": 4.6462144754160925e-05,
      "loss": 0.9206,
      "step": 140110
    },
    {
      "epoch": 0.4910856666211049,
      "grad_norm": 3.046875,
      "learning_rate": 4.646149572549722e-05,
      "loss": 0.961,
      "step": 140120
    },
    {
      "epoch": 0.4911207141280005,
      "grad_norm": 2.6875,
      "learning_rate": 4.646084669683352e-05,
      "loss": 0.9572,
      "step": 140130
    },
    {
      "epoch": 0.4911557616348961,
      "grad_norm": 3.09375,
      "learning_rate": 4.646019766816982e-05,
      "loss": 0.9426,
      "step": 140140
    },
    {
      "epoch": 0.4911908091417917,
      "grad_norm": 3.09375,
      "learning_rate": 4.645954863950612e-05,
      "loss": 0.8973,
      "step": 140150
    },
    {
      "epoch": 0.4912258566486873,
      "grad_norm": 2.765625,
      "learning_rate": 4.645889961084242e-05,
      "loss": 0.9745,
      "step": 140160
    },
    {
      "epoch": 0.4912609041555829,
      "grad_norm": 2.828125,
      "learning_rate": 4.645825058217871e-05,
      "loss": 0.9365,
      "step": 140170
    },
    {
      "epoch": 0.49129595166247847,
      "grad_norm": 3.328125,
      "learning_rate": 4.6457601553515014e-05,
      "loss": 0.9605,
      "step": 140180
    },
    {
      "epoch": 0.49133099916937406,
      "grad_norm": 2.84375,
      "learning_rate": 4.645695252485131e-05,
      "loss": 0.9199,
      "step": 140190
    },
    {
      "epoch": 0.4913660466762697,
      "grad_norm": 3.359375,
      "learning_rate": 4.645630349618761e-05,
      "loss": 0.9052,
      "step": 140200
    },
    {
      "epoch": 0.4914010941831653,
      "grad_norm": 3.34375,
      "learning_rate": 4.6455654467523905e-05,
      "loss": 0.9178,
      "step": 140210
    },
    {
      "epoch": 0.4914361416900609,
      "grad_norm": 3.140625,
      "learning_rate": 4.6455005438860206e-05,
      "loss": 0.9416,
      "step": 140220
    },
    {
      "epoch": 0.4914711891969565,
      "grad_norm": 3.53125,
      "learning_rate": 4.64543564101965e-05,
      "loss": 0.9206,
      "step": 140230
    },
    {
      "epoch": 0.4915062367038521,
      "grad_norm": 3.0625,
      "learning_rate": 4.64537073815328e-05,
      "loss": 0.9383,
      "step": 140240
    },
    {
      "epoch": 0.49154128421074766,
      "grad_norm": 2.515625,
      "learning_rate": 4.64530583528691e-05,
      "loss": 0.8732,
      "step": 140250
    },
    {
      "epoch": 0.49157633171764326,
      "grad_norm": 2.78125,
      "learning_rate": 4.64524093242054e-05,
      "loss": 0.9187,
      "step": 140260
    },
    {
      "epoch": 0.49161137922453885,
      "grad_norm": 3.328125,
      "learning_rate": 4.64517602955417e-05,
      "loss": 1.0302,
      "step": 140270
    },
    {
      "epoch": 0.49164642673143444,
      "grad_norm": 2.96875,
      "learning_rate": 4.6451111266877994e-05,
      "loss": 0.8607,
      "step": 140280
    },
    {
      "epoch": 0.49168147423833003,
      "grad_norm": 2.6875,
      "learning_rate": 4.6450462238214296e-05,
      "loss": 0.9241,
      "step": 140290
    },
    {
      "epoch": 0.4917165217452257,
      "grad_norm": 2.890625,
      "learning_rate": 4.644981320955059e-05,
      "loss": 1.0412,
      "step": 140300
    },
    {
      "epoch": 0.49175156925212127,
      "grad_norm": 2.984375,
      "learning_rate": 4.644916418088689e-05,
      "loss": 0.877,
      "step": 140310
    },
    {
      "epoch": 0.49178661675901686,
      "grad_norm": 2.984375,
      "learning_rate": 4.6448515152223186e-05,
      "loss": 0.9415,
      "step": 140320
    },
    {
      "epoch": 0.49182166426591245,
      "grad_norm": 2.84375,
      "learning_rate": 4.644786612355949e-05,
      "loss": 0.9375,
      "step": 140330
    },
    {
      "epoch": 0.49185671177280804,
      "grad_norm": 3.3125,
      "learning_rate": 4.6447217094895776e-05,
      "loss": 1.0031,
      "step": 140340
    },
    {
      "epoch": 0.49189175927970363,
      "grad_norm": 3.1875,
      "learning_rate": 4.644656806623208e-05,
      "loss": 0.8355,
      "step": 140350
    },
    {
      "epoch": 0.4919268067865992,
      "grad_norm": 3.0,
      "learning_rate": 4.644591903756838e-05,
      "loss": 1.0266,
      "step": 140360
    },
    {
      "epoch": 0.4919618542934948,
      "grad_norm": 2.65625,
      "learning_rate": 4.644527000890467e-05,
      "loss": 0.95,
      "step": 140370
    },
    {
      "epoch": 0.4919969018003904,
      "grad_norm": 3.109375,
      "learning_rate": 4.6444620980240974e-05,
      "loss": 0.8738,
      "step": 140380
    },
    {
      "epoch": 0.49203194930728605,
      "grad_norm": 3.34375,
      "learning_rate": 4.644397195157727e-05,
      "loss": 0.9286,
      "step": 140390
    },
    {
      "epoch": 0.49206699681418165,
      "grad_norm": 3.34375,
      "learning_rate": 4.644332292291357e-05,
      "loss": 0.9372,
      "step": 140400
    },
    {
      "epoch": 0.49210204432107724,
      "grad_norm": 3.265625,
      "learning_rate": 4.6442673894249865e-05,
      "loss": 0.9793,
      "step": 140410
    },
    {
      "epoch": 0.49213709182797283,
      "grad_norm": 2.96875,
      "learning_rate": 4.6442024865586166e-05,
      "loss": 0.9352,
      "step": 140420
    },
    {
      "epoch": 0.4921721393348684,
      "grad_norm": 3.515625,
      "learning_rate": 4.644137583692246e-05,
      "loss": 0.9646,
      "step": 140430
    },
    {
      "epoch": 0.492207186841764,
      "grad_norm": 3.234375,
      "learning_rate": 4.644072680825876e-05,
      "loss": 0.965,
      "step": 140440
    },
    {
      "epoch": 0.4922422343486596,
      "grad_norm": 2.53125,
      "learning_rate": 4.644007777959506e-05,
      "loss": 0.9633,
      "step": 140450
    },
    {
      "epoch": 0.4922772818555552,
      "grad_norm": 2.703125,
      "learning_rate": 4.643942875093136e-05,
      "loss": 0.9313,
      "step": 140460
    },
    {
      "epoch": 0.4923123293624508,
      "grad_norm": 3.125,
      "learning_rate": 4.643877972226765e-05,
      "loss": 0.9669,
      "step": 140470
    },
    {
      "epoch": 0.4923473768693464,
      "grad_norm": 3.03125,
      "learning_rate": 4.6438130693603954e-05,
      "loss": 0.9224,
      "step": 140480
    },
    {
      "epoch": 0.492382424376242,
      "grad_norm": 3.09375,
      "learning_rate": 4.643748166494025e-05,
      "loss": 0.9913,
      "step": 140490
    },
    {
      "epoch": 0.4924174718831376,
      "grad_norm": 3.203125,
      "learning_rate": 4.643683263627655e-05,
      "loss": 0.9353,
      "step": 140500
    },
    {
      "epoch": 0.4924525193900332,
      "grad_norm": 3.109375,
      "learning_rate": 4.643618360761285e-05,
      "loss": 0.9438,
      "step": 140510
    },
    {
      "epoch": 0.4924875668969288,
      "grad_norm": 3.21875,
      "learning_rate": 4.6435534578949146e-05,
      "loss": 0.9396,
      "step": 140520
    },
    {
      "epoch": 0.4925226144038244,
      "grad_norm": 2.90625,
      "learning_rate": 4.643488555028545e-05,
      "loss": 0.9258,
      "step": 140530
    },
    {
      "epoch": 0.49255766191072,
      "grad_norm": 3.140625,
      "learning_rate": 4.643423652162174e-05,
      "loss": 0.9769,
      "step": 140540
    },
    {
      "epoch": 0.49259270941761557,
      "grad_norm": 2.640625,
      "learning_rate": 4.6433587492958044e-05,
      "loss": 0.8754,
      "step": 140550
    },
    {
      "epoch": 0.49262775692451116,
      "grad_norm": 2.703125,
      "learning_rate": 4.643293846429434e-05,
      "loss": 0.965,
      "step": 140560
    },
    {
      "epoch": 0.49266280443140675,
      "grad_norm": 3.21875,
      "learning_rate": 4.643228943563064e-05,
      "loss": 0.8962,
      "step": 140570
    },
    {
      "epoch": 0.49269785193830234,
      "grad_norm": 2.734375,
      "learning_rate": 4.6431640406966934e-05,
      "loss": 0.9388,
      "step": 140580
    },
    {
      "epoch": 0.492732899445198,
      "grad_norm": 2.703125,
      "learning_rate": 4.6430991378303236e-05,
      "loss": 0.9193,
      "step": 140590
    },
    {
      "epoch": 0.4927679469520936,
      "grad_norm": 3.046875,
      "learning_rate": 4.643034234963953e-05,
      "loss": 0.8919,
      "step": 140600
    },
    {
      "epoch": 0.4928029944589892,
      "grad_norm": 3.125,
      "learning_rate": 4.642969332097583e-05,
      "loss": 0.8747,
      "step": 140610
    },
    {
      "epoch": 0.49283804196588477,
      "grad_norm": 3.046875,
      "learning_rate": 4.6429044292312126e-05,
      "loss": 1.0089,
      "step": 140620
    },
    {
      "epoch": 0.49287308947278036,
      "grad_norm": 3.28125,
      "learning_rate": 4.642839526364843e-05,
      "loss": 0.922,
      "step": 140630
    },
    {
      "epoch": 0.49290813697967595,
      "grad_norm": 2.984375,
      "learning_rate": 4.642774623498473e-05,
      "loss": 0.9509,
      "step": 140640
    },
    {
      "epoch": 0.49294318448657154,
      "grad_norm": 2.859375,
      "learning_rate": 4.6427097206321024e-05,
      "loss": 0.9452,
      "step": 140650
    },
    {
      "epoch": 0.49297823199346713,
      "grad_norm": 3.125,
      "learning_rate": 4.6426448177657325e-05,
      "loss": 0.9421,
      "step": 140660
    },
    {
      "epoch": 0.4930132795003627,
      "grad_norm": 3.1875,
      "learning_rate": 4.642579914899362e-05,
      "loss": 0.9767,
      "step": 140670
    },
    {
      "epoch": 0.4930483270072583,
      "grad_norm": 3.015625,
      "learning_rate": 4.642515012032992e-05,
      "loss": 0.9341,
      "step": 140680
    },
    {
      "epoch": 0.49308337451415396,
      "grad_norm": 2.875,
      "learning_rate": 4.6424501091666216e-05,
      "loss": 0.8965,
      "step": 140690
    },
    {
      "epoch": 0.49311842202104955,
      "grad_norm": 3.015625,
      "learning_rate": 4.642385206300252e-05,
      "loss": 0.9905,
      "step": 140700
    },
    {
      "epoch": 0.49315346952794514,
      "grad_norm": 3.109375,
      "learning_rate": 4.6423203034338805e-05,
      "loss": 0.9423,
      "step": 140710
    },
    {
      "epoch": 0.49318851703484073,
      "grad_norm": 2.65625,
      "learning_rate": 4.6422554005675106e-05,
      "loss": 0.8865,
      "step": 140720
    },
    {
      "epoch": 0.4932235645417363,
      "grad_norm": 3.265625,
      "learning_rate": 4.642190497701141e-05,
      "loss": 0.9131,
      "step": 140730
    },
    {
      "epoch": 0.4932586120486319,
      "grad_norm": 2.890625,
      "learning_rate": 4.64212559483477e-05,
      "loss": 1.01,
      "step": 140740
    },
    {
      "epoch": 0.4932936595555275,
      "grad_norm": 2.71875,
      "learning_rate": 4.6420606919684004e-05,
      "loss": 0.8302,
      "step": 140750
    },
    {
      "epoch": 0.4933287070624231,
      "grad_norm": 2.953125,
      "learning_rate": 4.64199578910203e-05,
      "loss": 0.996,
      "step": 140760
    },
    {
      "epoch": 0.4933637545693187,
      "grad_norm": 2.375,
      "learning_rate": 4.64193088623566e-05,
      "loss": 0.9156,
      "step": 140770
    },
    {
      "epoch": 0.4933988020762143,
      "grad_norm": 3.234375,
      "learning_rate": 4.6418659833692894e-05,
      "loss": 0.9053,
      "step": 140780
    },
    {
      "epoch": 0.49343384958310993,
      "grad_norm": 2.875,
      "learning_rate": 4.6418010805029196e-05,
      "loss": 0.9507,
      "step": 140790
    },
    {
      "epoch": 0.4934688970900055,
      "grad_norm": 2.65625,
      "learning_rate": 4.641736177636549e-05,
      "loss": 0.7977,
      "step": 140800
    },
    {
      "epoch": 0.4935039445969011,
      "grad_norm": 3.078125,
      "learning_rate": 4.641671274770179e-05,
      "loss": 0.9048,
      "step": 140810
    },
    {
      "epoch": 0.4935389921037967,
      "grad_norm": 2.25,
      "learning_rate": 4.6416063719038086e-05,
      "loss": 0.9576,
      "step": 140820
    },
    {
      "epoch": 0.4935740396106923,
      "grad_norm": 2.953125,
      "learning_rate": 4.641541469037439e-05,
      "loss": 0.9281,
      "step": 140830
    },
    {
      "epoch": 0.4936090871175879,
      "grad_norm": 3.09375,
      "learning_rate": 4.641476566171068e-05,
      "loss": 0.8981,
      "step": 140840
    },
    {
      "epoch": 0.4936441346244835,
      "grad_norm": 3.015625,
      "learning_rate": 4.6414116633046984e-05,
      "loss": 0.9348,
      "step": 140850
    },
    {
      "epoch": 0.49367918213137907,
      "grad_norm": 2.71875,
      "learning_rate": 4.641346760438328e-05,
      "loss": 0.9202,
      "step": 140860
    },
    {
      "epoch": 0.49371422963827466,
      "grad_norm": 3.140625,
      "learning_rate": 4.641281857571958e-05,
      "loss": 1.0494,
      "step": 140870
    },
    {
      "epoch": 0.49374927714517025,
      "grad_norm": 2.953125,
      "learning_rate": 4.641216954705588e-05,
      "loss": 0.9292,
      "step": 140880
    },
    {
      "epoch": 0.4937843246520659,
      "grad_norm": 3.046875,
      "learning_rate": 4.6411520518392176e-05,
      "loss": 0.9544,
      "step": 140890
    },
    {
      "epoch": 0.4938193721589615,
      "grad_norm": 2.609375,
      "learning_rate": 4.641087148972848e-05,
      "loss": 0.8887,
      "step": 140900
    },
    {
      "epoch": 0.4938544196658571,
      "grad_norm": 2.96875,
      "learning_rate": 4.641022246106477e-05,
      "loss": 0.9323,
      "step": 140910
    },
    {
      "epoch": 0.49388946717275267,
      "grad_norm": 2.9375,
      "learning_rate": 4.640957343240107e-05,
      "loss": 0.9935,
      "step": 140920
    },
    {
      "epoch": 0.49392451467964826,
      "grad_norm": 2.59375,
      "learning_rate": 4.640892440373737e-05,
      "loss": 0.935,
      "step": 140930
    },
    {
      "epoch": 0.49395956218654385,
      "grad_norm": 3.0625,
      "learning_rate": 4.640827537507367e-05,
      "loss": 1.0053,
      "step": 140940
    },
    {
      "epoch": 0.49399460969343945,
      "grad_norm": 3.09375,
      "learning_rate": 4.6407626346409964e-05,
      "loss": 0.9428,
      "step": 140950
    },
    {
      "epoch": 0.49402965720033504,
      "grad_norm": 2.90625,
      "learning_rate": 4.6406977317746265e-05,
      "loss": 0.9148,
      "step": 140960
    },
    {
      "epoch": 0.49406470470723063,
      "grad_norm": 3.125,
      "learning_rate": 4.640632828908256e-05,
      "loss": 0.9771,
      "step": 140970
    },
    {
      "epoch": 0.4940997522141263,
      "grad_norm": 3.03125,
      "learning_rate": 4.640567926041886e-05,
      "loss": 0.9457,
      "step": 140980
    },
    {
      "epoch": 0.49413479972102187,
      "grad_norm": 3.28125,
      "learning_rate": 4.6405030231755156e-05,
      "loss": 0.9786,
      "step": 140990
    },
    {
      "epoch": 0.49416984722791746,
      "grad_norm": 2.78125,
      "learning_rate": 4.640438120309146e-05,
      "loss": 0.8676,
      "step": 141000
    },
    {
      "epoch": 0.49420489473481305,
      "grad_norm": 3.671875,
      "learning_rate": 4.640373217442776e-05,
      "loss": 0.8665,
      "step": 141010
    },
    {
      "epoch": 0.49423994224170864,
      "grad_norm": 3.359375,
      "learning_rate": 4.640308314576405e-05,
      "loss": 0.9796,
      "step": 141020
    },
    {
      "epoch": 0.49427498974860423,
      "grad_norm": 2.703125,
      "learning_rate": 4.6402434117100355e-05,
      "loss": 0.8915,
      "step": 141030
    },
    {
      "epoch": 0.4943100372554998,
      "grad_norm": 2.90625,
      "learning_rate": 4.640178508843665e-05,
      "loss": 0.8631,
      "step": 141040
    },
    {
      "epoch": 0.4943450847623954,
      "grad_norm": 3.046875,
      "learning_rate": 4.640113605977295e-05,
      "loss": 0.9541,
      "step": 141050
    },
    {
      "epoch": 0.494380132269291,
      "grad_norm": 2.609375,
      "learning_rate": 4.6400487031109245e-05,
      "loss": 0.9526,
      "step": 141060
    },
    {
      "epoch": 0.4944151797761866,
      "grad_norm": 2.84375,
      "learning_rate": 4.6399838002445547e-05,
      "loss": 0.9127,
      "step": 141070
    },
    {
      "epoch": 0.49445022728308224,
      "grad_norm": 3.125,
      "learning_rate": 4.639918897378184e-05,
      "loss": 0.9892,
      "step": 141080
    },
    {
      "epoch": 0.49448527478997784,
      "grad_norm": 2.9375,
      "learning_rate": 4.6398539945118136e-05,
      "loss": 0.9658,
      "step": 141090
    },
    {
      "epoch": 0.4945203222968734,
      "grad_norm": 2.984375,
      "learning_rate": 4.639789091645444e-05,
      "loss": 0.9346,
      "step": 141100
    },
    {
      "epoch": 0.494555369803769,
      "grad_norm": 2.734375,
      "learning_rate": 4.639724188779073e-05,
      "loss": 0.916,
      "step": 141110
    },
    {
      "epoch": 0.4945904173106646,
      "grad_norm": 3.328125,
      "learning_rate": 4.639659285912703e-05,
      "loss": 1.009,
      "step": 141120
    },
    {
      "epoch": 0.4946254648175602,
      "grad_norm": 2.9375,
      "learning_rate": 4.639594383046333e-05,
      "loss": 0.9329,
      "step": 141130
    },
    {
      "epoch": 0.4946605123244558,
      "grad_norm": 3.015625,
      "learning_rate": 4.639529480179963e-05,
      "loss": 0.885,
      "step": 141140
    },
    {
      "epoch": 0.4946955598313514,
      "grad_norm": 2.953125,
      "learning_rate": 4.6394645773135924e-05,
      "loss": 0.9307,
      "step": 141150
    },
    {
      "epoch": 0.494730607338247,
      "grad_norm": 2.921875,
      "learning_rate": 4.6393996744472225e-05,
      "loss": 0.8511,
      "step": 141160
    },
    {
      "epoch": 0.49476565484514257,
      "grad_norm": 3.25,
      "learning_rate": 4.639334771580852e-05,
      "loss": 0.9555,
      "step": 141170
    },
    {
      "epoch": 0.4948007023520382,
      "grad_norm": 2.796875,
      "learning_rate": 4.639269868714482e-05,
      "loss": 0.927,
      "step": 141180
    },
    {
      "epoch": 0.4948357498589338,
      "grad_norm": 3.03125,
      "learning_rate": 4.6392049658481116e-05,
      "loss": 0.9632,
      "step": 141190
    },
    {
      "epoch": 0.4948707973658294,
      "grad_norm": 2.640625,
      "learning_rate": 4.639140062981742e-05,
      "loss": 0.9933,
      "step": 141200
    },
    {
      "epoch": 0.494905844872725,
      "grad_norm": 2.765625,
      "learning_rate": 4.639075160115371e-05,
      "loss": 0.9616,
      "step": 141210
    },
    {
      "epoch": 0.4949408923796206,
      "grad_norm": 3.28125,
      "learning_rate": 4.639010257249001e-05,
      "loss": 0.8541,
      "step": 141220
    },
    {
      "epoch": 0.49497593988651617,
      "grad_norm": 3.078125,
      "learning_rate": 4.6389453543826315e-05,
      "loss": 0.8995,
      "step": 141230
    },
    {
      "epoch": 0.49501098739341176,
      "grad_norm": 3.171875,
      "learning_rate": 4.638880451516261e-05,
      "loss": 0.8613,
      "step": 141240
    },
    {
      "epoch": 0.49504603490030735,
      "grad_norm": 2.71875,
      "learning_rate": 4.638815548649891e-05,
      "loss": 0.9125,
      "step": 141250
    },
    {
      "epoch": 0.49508108240720294,
      "grad_norm": 3.390625,
      "learning_rate": 4.6387506457835205e-05,
      "loss": 0.9525,
      "step": 141260
    },
    {
      "epoch": 0.49511612991409854,
      "grad_norm": 3.015625,
      "learning_rate": 4.6386857429171507e-05,
      "loss": 0.8882,
      "step": 141270
    },
    {
      "epoch": 0.4951511774209942,
      "grad_norm": 3.328125,
      "learning_rate": 4.63862084005078e-05,
      "loss": 0.9893,
      "step": 141280
    },
    {
      "epoch": 0.4951862249278898,
      "grad_norm": 3.0625,
      "learning_rate": 4.63855593718441e-05,
      "loss": 0.9589,
      "step": 141290
    },
    {
      "epoch": 0.49522127243478536,
      "grad_norm": 3.4375,
      "learning_rate": 4.63849103431804e-05,
      "loss": 0.9975,
      "step": 141300
    },
    {
      "epoch": 0.49525631994168096,
      "grad_norm": 2.6875,
      "learning_rate": 4.63842613145167e-05,
      "loss": 0.8957,
      "step": 141310
    },
    {
      "epoch": 0.49529136744857655,
      "grad_norm": 2.90625,
      "learning_rate": 4.638361228585299e-05,
      "loss": 0.9432,
      "step": 141320
    },
    {
      "epoch": 0.49532641495547214,
      "grad_norm": 2.6875,
      "learning_rate": 4.6382963257189295e-05,
      "loss": 0.9112,
      "step": 141330
    },
    {
      "epoch": 0.49536146246236773,
      "grad_norm": 3.1875,
      "learning_rate": 4.638231422852559e-05,
      "loss": 0.864,
      "step": 141340
    },
    {
      "epoch": 0.4953965099692633,
      "grad_norm": 2.640625,
      "learning_rate": 4.638166519986189e-05,
      "loss": 0.8973,
      "step": 141350
    },
    {
      "epoch": 0.4954315574761589,
      "grad_norm": 3.09375,
      "learning_rate": 4.6381016171198185e-05,
      "loss": 0.9042,
      "step": 141360
    },
    {
      "epoch": 0.4954666049830545,
      "grad_norm": 3.46875,
      "learning_rate": 4.6380367142534487e-05,
      "loss": 0.9951,
      "step": 141370
    },
    {
      "epoch": 0.49550165248995015,
      "grad_norm": 2.8125,
      "learning_rate": 4.637971811387079e-05,
      "loss": 0.8825,
      "step": 141380
    },
    {
      "epoch": 0.49553669999684574,
      "grad_norm": 2.828125,
      "learning_rate": 4.637906908520708e-05,
      "loss": 0.9638,
      "step": 141390
    },
    {
      "epoch": 0.49557174750374133,
      "grad_norm": 2.71875,
      "learning_rate": 4.6378420056543384e-05,
      "loss": 0.9799,
      "step": 141400
    },
    {
      "epoch": 0.4956067950106369,
      "grad_norm": 3.140625,
      "learning_rate": 4.637777102787968e-05,
      "loss": 0.9039,
      "step": 141410
    },
    {
      "epoch": 0.4956418425175325,
      "grad_norm": 2.9375,
      "learning_rate": 4.637712199921598e-05,
      "loss": 0.9061,
      "step": 141420
    },
    {
      "epoch": 0.4956768900244281,
      "grad_norm": 2.84375,
      "learning_rate": 4.6376472970552275e-05,
      "loss": 0.9078,
      "step": 141430
    },
    {
      "epoch": 0.4957119375313237,
      "grad_norm": 3.171875,
      "learning_rate": 4.6375823941888576e-05,
      "loss": 0.9712,
      "step": 141440
    },
    {
      "epoch": 0.4957469850382193,
      "grad_norm": 3.234375,
      "learning_rate": 4.637517491322487e-05,
      "loss": 0.9333,
      "step": 141450
    },
    {
      "epoch": 0.4957820325451149,
      "grad_norm": 2.8125,
      "learning_rate": 4.637452588456117e-05,
      "loss": 0.963,
      "step": 141460
    },
    {
      "epoch": 0.4958170800520105,
      "grad_norm": 2.671875,
      "learning_rate": 4.6373876855897467e-05,
      "loss": 0.9438,
      "step": 141470
    },
    {
      "epoch": 0.4958521275589061,
      "grad_norm": 3.046875,
      "learning_rate": 4.637322782723376e-05,
      "loss": 1.0145,
      "step": 141480
    },
    {
      "epoch": 0.4958871750658017,
      "grad_norm": 3.140625,
      "learning_rate": 4.637257879857006e-05,
      "loss": 0.9733,
      "step": 141490
    },
    {
      "epoch": 0.4959222225726973,
      "grad_norm": 3.203125,
      "learning_rate": 4.637192976990636e-05,
      "loss": 0.9098,
      "step": 141500
    },
    {
      "epoch": 0.4959572700795929,
      "grad_norm": 3.25,
      "learning_rate": 4.637128074124266e-05,
      "loss": 0.9623,
      "step": 141510
    },
    {
      "epoch": 0.4959923175864885,
      "grad_norm": 2.9375,
      "learning_rate": 4.637063171257895e-05,
      "loss": 0.9944,
      "step": 141520
    },
    {
      "epoch": 0.4960273650933841,
      "grad_norm": 2.71875,
      "learning_rate": 4.6369982683915255e-05,
      "loss": 0.8814,
      "step": 141530
    },
    {
      "epoch": 0.49606241260027967,
      "grad_norm": 3.21875,
      "learning_rate": 4.636933365525155e-05,
      "loss": 0.9946,
      "step": 141540
    },
    {
      "epoch": 0.49609746010717526,
      "grad_norm": 2.84375,
      "learning_rate": 4.636868462658785e-05,
      "loss": 0.9372,
      "step": 141550
    },
    {
      "epoch": 0.49613250761407085,
      "grad_norm": 3.046875,
      "learning_rate": 4.6368035597924145e-05,
      "loss": 0.9016,
      "step": 141560
    },
    {
      "epoch": 0.4961675551209665,
      "grad_norm": 2.84375,
      "learning_rate": 4.6367386569260447e-05,
      "loss": 0.7779,
      "step": 141570
    },
    {
      "epoch": 0.4962026026278621,
      "grad_norm": 3.1875,
      "learning_rate": 4.636673754059674e-05,
      "loss": 1.0109,
      "step": 141580
    },
    {
      "epoch": 0.4962376501347577,
      "grad_norm": 3.0,
      "learning_rate": 4.636608851193304e-05,
      "loss": 0.9657,
      "step": 141590
    },
    {
      "epoch": 0.49627269764165327,
      "grad_norm": 2.984375,
      "learning_rate": 4.6365439483269344e-05,
      "loss": 0.9582,
      "step": 141600
    },
    {
      "epoch": 0.49630774514854886,
      "grad_norm": 3.03125,
      "learning_rate": 4.636479045460564e-05,
      "loss": 0.8705,
      "step": 141610
    },
    {
      "epoch": 0.49634279265544445,
      "grad_norm": 3.09375,
      "learning_rate": 4.636414142594194e-05,
      "loss": 0.9798,
      "step": 141620
    },
    {
      "epoch": 0.49637784016234004,
      "grad_norm": 2.921875,
      "learning_rate": 4.6363492397278235e-05,
      "loss": 0.9663,
      "step": 141630
    },
    {
      "epoch": 0.49641288766923564,
      "grad_norm": 3.21875,
      "learning_rate": 4.6362843368614536e-05,
      "loss": 0.9955,
      "step": 141640
    },
    {
      "epoch": 0.4964479351761312,
      "grad_norm": 3.0625,
      "learning_rate": 4.636219433995083e-05,
      "loss": 0.9498,
      "step": 141650
    },
    {
      "epoch": 0.4964829826830268,
      "grad_norm": 3.03125,
      "learning_rate": 4.636154531128713e-05,
      "loss": 0.9661,
      "step": 141660
    },
    {
      "epoch": 0.49651803018992247,
      "grad_norm": 3.125,
      "learning_rate": 4.6360896282623427e-05,
      "loss": 0.9876,
      "step": 141670
    },
    {
      "epoch": 0.49655307769681806,
      "grad_norm": 2.84375,
      "learning_rate": 4.636024725395973e-05,
      "loss": 0.9672,
      "step": 141680
    },
    {
      "epoch": 0.49658812520371365,
      "grad_norm": 2.65625,
      "learning_rate": 4.635959822529602e-05,
      "loss": 0.9232,
      "step": 141690
    },
    {
      "epoch": 0.49662317271060924,
      "grad_norm": 2.96875,
      "learning_rate": 4.6358949196632324e-05,
      "loss": 0.9158,
      "step": 141700
    },
    {
      "epoch": 0.49665822021750483,
      "grad_norm": 2.78125,
      "learning_rate": 4.635830016796862e-05,
      "loss": 1.006,
      "step": 141710
    },
    {
      "epoch": 0.4966932677244004,
      "grad_norm": 3.125,
      "learning_rate": 4.635765113930492e-05,
      "loss": 0.9775,
      "step": 141720
    },
    {
      "epoch": 0.496728315231296,
      "grad_norm": 3.125,
      "learning_rate": 4.6357002110641215e-05,
      "loss": 0.9239,
      "step": 141730
    },
    {
      "epoch": 0.4967633627381916,
      "grad_norm": 3.15625,
      "learning_rate": 4.6356353081977516e-05,
      "loss": 0.9317,
      "step": 141740
    },
    {
      "epoch": 0.4967984102450872,
      "grad_norm": 3.171875,
      "learning_rate": 4.635570405331382e-05,
      "loss": 0.9592,
      "step": 141750
    },
    {
      "epoch": 0.4968334577519828,
      "grad_norm": 2.65625,
      "learning_rate": 4.635505502465011e-05,
      "loss": 0.8882,
      "step": 141760
    },
    {
      "epoch": 0.49686850525887843,
      "grad_norm": 3.03125,
      "learning_rate": 4.635440599598641e-05,
      "loss": 0.8671,
      "step": 141770
    },
    {
      "epoch": 0.496903552765774,
      "grad_norm": 2.875,
      "learning_rate": 4.635375696732271e-05,
      "loss": 0.8974,
      "step": 141780
    },
    {
      "epoch": 0.4969386002726696,
      "grad_norm": 3.015625,
      "learning_rate": 4.635310793865901e-05,
      "loss": 0.9582,
      "step": 141790
    },
    {
      "epoch": 0.4969736477795652,
      "grad_norm": 2.828125,
      "learning_rate": 4.6352458909995304e-05,
      "loss": 0.8575,
      "step": 141800
    },
    {
      "epoch": 0.4970086952864608,
      "grad_norm": 3.375,
      "learning_rate": 4.6351809881331605e-05,
      "loss": 0.8926,
      "step": 141810
    },
    {
      "epoch": 0.4970437427933564,
      "grad_norm": 3.03125,
      "learning_rate": 4.63511608526679e-05,
      "loss": 0.9041,
      "step": 141820
    },
    {
      "epoch": 0.497078790300252,
      "grad_norm": 3.21875,
      "learning_rate": 4.63505118240042e-05,
      "loss": 0.9454,
      "step": 141830
    },
    {
      "epoch": 0.4971138378071476,
      "grad_norm": 2.984375,
      "learning_rate": 4.6349862795340496e-05,
      "loss": 0.8809,
      "step": 141840
    },
    {
      "epoch": 0.49714888531404317,
      "grad_norm": 3.015625,
      "learning_rate": 4.634921376667679e-05,
      "loss": 0.9025,
      "step": 141850
    },
    {
      "epoch": 0.49718393282093876,
      "grad_norm": 2.640625,
      "learning_rate": 4.634856473801309e-05,
      "loss": 0.9082,
      "step": 141860
    },
    {
      "epoch": 0.4972189803278344,
      "grad_norm": 2.9375,
      "learning_rate": 4.6347915709349387e-05,
      "loss": 0.8393,
      "step": 141870
    },
    {
      "epoch": 0.49725402783473,
      "grad_norm": 2.453125,
      "learning_rate": 4.634726668068569e-05,
      "loss": 0.9556,
      "step": 141880
    },
    {
      "epoch": 0.4972890753416256,
      "grad_norm": 2.96875,
      "learning_rate": 4.634661765202198e-05,
      "loss": 0.8591,
      "step": 141890
    },
    {
      "epoch": 0.4973241228485212,
      "grad_norm": 2.75,
      "learning_rate": 4.6345968623358284e-05,
      "loss": 0.8728,
      "step": 141900
    },
    {
      "epoch": 0.49735917035541677,
      "grad_norm": 3.59375,
      "learning_rate": 4.634531959469458e-05,
      "loss": 1.0098,
      "step": 141910
    },
    {
      "epoch": 0.49739421786231236,
      "grad_norm": 2.78125,
      "learning_rate": 4.634467056603088e-05,
      "loss": 0.9108,
      "step": 141920
    },
    {
      "epoch": 0.49742926536920795,
      "grad_norm": 2.96875,
      "learning_rate": 4.6344021537367175e-05,
      "loss": 0.948,
      "step": 141930
    },
    {
      "epoch": 0.49746431287610354,
      "grad_norm": 2.953125,
      "learning_rate": 4.6343372508703476e-05,
      "loss": 0.9334,
      "step": 141940
    },
    {
      "epoch": 0.49749936038299913,
      "grad_norm": 2.96875,
      "learning_rate": 4.634272348003977e-05,
      "loss": 0.896,
      "step": 141950
    },
    {
      "epoch": 0.4975344078898947,
      "grad_norm": 2.640625,
      "learning_rate": 4.634207445137607e-05,
      "loss": 0.8756,
      "step": 141960
    },
    {
      "epoch": 0.49756945539679037,
      "grad_norm": 2.96875,
      "learning_rate": 4.634142542271237e-05,
      "loss": 0.8786,
      "step": 141970
    },
    {
      "epoch": 0.49760450290368596,
      "grad_norm": 2.796875,
      "learning_rate": 4.634077639404867e-05,
      "loss": 0.8569,
      "step": 141980
    },
    {
      "epoch": 0.49763955041058155,
      "grad_norm": 2.8125,
      "learning_rate": 4.634012736538497e-05,
      "loss": 0.9071,
      "step": 141990
    },
    {
      "epoch": 0.49767459791747715,
      "grad_norm": 3.171875,
      "learning_rate": 4.6339478336721264e-05,
      "loss": 0.8814,
      "step": 142000
    },
    {
      "epoch": 0.49770964542437274,
      "grad_norm": 3.0625,
      "learning_rate": 4.6338829308057565e-05,
      "loss": 0.9376,
      "step": 142010
    },
    {
      "epoch": 0.49774469293126833,
      "grad_norm": 2.796875,
      "learning_rate": 4.633818027939386e-05,
      "loss": 0.8674,
      "step": 142020
    },
    {
      "epoch": 0.4977797404381639,
      "grad_norm": 2.890625,
      "learning_rate": 4.633753125073016e-05,
      "loss": 0.8751,
      "step": 142030
    },
    {
      "epoch": 0.4978147879450595,
      "grad_norm": 3.28125,
      "learning_rate": 4.6336882222066456e-05,
      "loss": 0.8774,
      "step": 142040
    },
    {
      "epoch": 0.4978498354519551,
      "grad_norm": 2.9375,
      "learning_rate": 4.633623319340276e-05,
      "loss": 0.8886,
      "step": 142050
    },
    {
      "epoch": 0.49788488295885075,
      "grad_norm": 3.09375,
      "learning_rate": 4.633558416473905e-05,
      "loss": 0.8763,
      "step": 142060
    },
    {
      "epoch": 0.49791993046574634,
      "grad_norm": 2.953125,
      "learning_rate": 4.633493513607535e-05,
      "loss": 1.0254,
      "step": 142070
    },
    {
      "epoch": 0.49795497797264193,
      "grad_norm": 3.015625,
      "learning_rate": 4.633428610741165e-05,
      "loss": 0.9459,
      "step": 142080
    },
    {
      "epoch": 0.4979900254795375,
      "grad_norm": 3.21875,
      "learning_rate": 4.633363707874795e-05,
      "loss": 1.0174,
      "step": 142090
    },
    {
      "epoch": 0.4980250729864331,
      "grad_norm": 2.890625,
      "learning_rate": 4.6332988050084244e-05,
      "loss": 0.8877,
      "step": 142100
    },
    {
      "epoch": 0.4980601204933287,
      "grad_norm": 2.890625,
      "learning_rate": 4.6332339021420545e-05,
      "loss": 0.9634,
      "step": 142110
    },
    {
      "epoch": 0.4980951680002243,
      "grad_norm": 3.1875,
      "learning_rate": 4.633168999275685e-05,
      "loss": 0.9187,
      "step": 142120
    },
    {
      "epoch": 0.4981302155071199,
      "grad_norm": 2.671875,
      "learning_rate": 4.633104096409314e-05,
      "loss": 0.8886,
      "step": 142130
    },
    {
      "epoch": 0.4981652630140155,
      "grad_norm": 2.734375,
      "learning_rate": 4.633039193542944e-05,
      "loss": 0.8798,
      "step": 142140
    },
    {
      "epoch": 0.49820031052091107,
      "grad_norm": 3.28125,
      "learning_rate": 4.632974290676574e-05,
      "loss": 0.9199,
      "step": 142150
    },
    {
      "epoch": 0.4982353580278067,
      "grad_norm": 3.0,
      "learning_rate": 4.632909387810204e-05,
      "loss": 0.8896,
      "step": 142160
    },
    {
      "epoch": 0.4982704055347023,
      "grad_norm": 3.4375,
      "learning_rate": 4.632844484943833e-05,
      "loss": 0.8743,
      "step": 142170
    },
    {
      "epoch": 0.4983054530415979,
      "grad_norm": 3.296875,
      "learning_rate": 4.6327795820774635e-05,
      "loss": 0.9176,
      "step": 142180
    },
    {
      "epoch": 0.4983405005484935,
      "grad_norm": 2.875,
      "learning_rate": 4.632714679211093e-05,
      "loss": 0.9194,
      "step": 142190
    },
    {
      "epoch": 0.4983755480553891,
      "grad_norm": 2.9375,
      "learning_rate": 4.632649776344723e-05,
      "loss": 0.9375,
      "step": 142200
    },
    {
      "epoch": 0.4984105955622847,
      "grad_norm": 3.140625,
      "learning_rate": 4.6325848734783525e-05,
      "loss": 0.877,
      "step": 142210
    },
    {
      "epoch": 0.49844564306918027,
      "grad_norm": 2.828125,
      "learning_rate": 4.632519970611982e-05,
      "loss": 0.9419,
      "step": 142220
    },
    {
      "epoch": 0.49848069057607586,
      "grad_norm": 3.1875,
      "learning_rate": 4.632455067745612e-05,
      "loss": 1.0778,
      "step": 142230
    },
    {
      "epoch": 0.49851573808297145,
      "grad_norm": 3.234375,
      "learning_rate": 4.6323901648792416e-05,
      "loss": 0.9796,
      "step": 142240
    },
    {
      "epoch": 0.49855078558986704,
      "grad_norm": 3.0625,
      "learning_rate": 4.632325262012872e-05,
      "loss": 0.929,
      "step": 142250
    },
    {
      "epoch": 0.4985858330967627,
      "grad_norm": 2.671875,
      "learning_rate": 4.632260359146501e-05,
      "loss": 0.9572,
      "step": 142260
    },
    {
      "epoch": 0.4986208806036583,
      "grad_norm": 3.109375,
      "learning_rate": 4.632195456280131e-05,
      "loss": 1.003,
      "step": 142270
    },
    {
      "epoch": 0.49865592811055387,
      "grad_norm": 2.8125,
      "learning_rate": 4.632130553413761e-05,
      "loss": 0.9421,
      "step": 142280
    },
    {
      "epoch": 0.49869097561744946,
      "grad_norm": 3.1875,
      "learning_rate": 4.632065650547391e-05,
      "loss": 0.9523,
      "step": 142290
    },
    {
      "epoch": 0.49872602312434505,
      "grad_norm": 2.640625,
      "learning_rate": 4.6320007476810204e-05,
      "loss": 0.9023,
      "step": 142300
    },
    {
      "epoch": 0.49876107063124064,
      "grad_norm": 3.0625,
      "learning_rate": 4.6319358448146505e-05,
      "loss": 1.0443,
      "step": 142310
    },
    {
      "epoch": 0.49879611813813624,
      "grad_norm": 2.9375,
      "learning_rate": 4.63187094194828e-05,
      "loss": 0.9602,
      "step": 142320
    },
    {
      "epoch": 0.4988311656450318,
      "grad_norm": 3.0,
      "learning_rate": 4.63180603908191e-05,
      "loss": 0.8864,
      "step": 142330
    },
    {
      "epoch": 0.4988662131519274,
      "grad_norm": 2.828125,
      "learning_rate": 4.63174113621554e-05,
      "loss": 0.9074,
      "step": 142340
    },
    {
      "epoch": 0.498901260658823,
      "grad_norm": 3.03125,
      "learning_rate": 4.63167623334917e-05,
      "loss": 0.9627,
      "step": 142350
    },
    {
      "epoch": 0.49893630816571866,
      "grad_norm": 2.71875,
      "learning_rate": 4.6316113304828e-05,
      "loss": 0.8584,
      "step": 142360
    },
    {
      "epoch": 0.49897135567261425,
      "grad_norm": 2.796875,
      "learning_rate": 4.631546427616429e-05,
      "loss": 1.0478,
      "step": 142370
    },
    {
      "epoch": 0.49900640317950984,
      "grad_norm": 2.703125,
      "learning_rate": 4.6314815247500595e-05,
      "loss": 0.8798,
      "step": 142380
    },
    {
      "epoch": 0.49904145068640543,
      "grad_norm": 3.5,
      "learning_rate": 4.631416621883689e-05,
      "loss": 0.8851,
      "step": 142390
    },
    {
      "epoch": 0.499076498193301,
      "grad_norm": 3.5,
      "learning_rate": 4.631351719017319e-05,
      "loss": 0.9697,
      "step": 142400
    },
    {
      "epoch": 0.4991115457001966,
      "grad_norm": 2.78125,
      "learning_rate": 4.6312868161509485e-05,
      "loss": 0.9449,
      "step": 142410
    },
    {
      "epoch": 0.4991465932070922,
      "grad_norm": 2.984375,
      "learning_rate": 4.631221913284579e-05,
      "loss": 0.9916,
      "step": 142420
    },
    {
      "epoch": 0.4991816407139878,
      "grad_norm": 2.9375,
      "learning_rate": 4.631157010418208e-05,
      "loss": 0.9136,
      "step": 142430
    },
    {
      "epoch": 0.4992166882208834,
      "grad_norm": 3.265625,
      "learning_rate": 4.631092107551838e-05,
      "loss": 0.901,
      "step": 142440
    },
    {
      "epoch": 0.499251735727779,
      "grad_norm": 2.984375,
      "learning_rate": 4.631027204685468e-05,
      "loss": 0.9331,
      "step": 142450
    },
    {
      "epoch": 0.4992867832346746,
      "grad_norm": 2.921875,
      "learning_rate": 4.630962301819098e-05,
      "loss": 0.9995,
      "step": 142460
    },
    {
      "epoch": 0.4993218307415702,
      "grad_norm": 3.234375,
      "learning_rate": 4.630897398952728e-05,
      "loss": 0.9318,
      "step": 142470
    },
    {
      "epoch": 0.4993568782484658,
      "grad_norm": 3.359375,
      "learning_rate": 4.6308324960863575e-05,
      "loss": 0.9485,
      "step": 142480
    },
    {
      "epoch": 0.4993919257553614,
      "grad_norm": 2.71875,
      "learning_rate": 4.6307675932199876e-05,
      "loss": 0.9709,
      "step": 142490
    },
    {
      "epoch": 0.499426973262257,
      "grad_norm": 3.21875,
      "learning_rate": 4.630702690353617e-05,
      "loss": 0.9381,
      "step": 142500
    },
    {
      "epoch": 0.4994620207691526,
      "grad_norm": 2.96875,
      "learning_rate": 4.630637787487247e-05,
      "loss": 0.8481,
      "step": 142510
    },
    {
      "epoch": 0.4994970682760482,
      "grad_norm": 3.125,
      "learning_rate": 4.630572884620877e-05,
      "loss": 0.8303,
      "step": 142520
    },
    {
      "epoch": 0.49953211578294376,
      "grad_norm": 3.28125,
      "learning_rate": 4.630507981754507e-05,
      "loss": 0.9568,
      "step": 142530
    },
    {
      "epoch": 0.49956716328983936,
      "grad_norm": 3.421875,
      "learning_rate": 4.630443078888136e-05,
      "loss": 0.9484,
      "step": 142540
    },
    {
      "epoch": 0.49960221079673495,
      "grad_norm": 2.984375,
      "learning_rate": 4.6303781760217664e-05,
      "loss": 0.8931,
      "step": 142550
    },
    {
      "epoch": 0.4996372583036306,
      "grad_norm": 3.25,
      "learning_rate": 4.630313273155396e-05,
      "loss": 0.9452,
      "step": 142560
    },
    {
      "epoch": 0.4996723058105262,
      "grad_norm": 2.78125,
      "learning_rate": 4.630248370289026e-05,
      "loss": 1.0157,
      "step": 142570
    },
    {
      "epoch": 0.4997073533174218,
      "grad_norm": 3.421875,
      "learning_rate": 4.6301834674226555e-05,
      "loss": 0.9429,
      "step": 142580
    },
    {
      "epoch": 0.49974240082431737,
      "grad_norm": 2.75,
      "learning_rate": 4.630118564556285e-05,
      "loss": 0.8696,
      "step": 142590
    },
    {
      "epoch": 0.49977744833121296,
      "grad_norm": 2.90625,
      "learning_rate": 4.630053661689915e-05,
      "loss": 0.9305,
      "step": 142600
    },
    {
      "epoch": 0.49981249583810855,
      "grad_norm": 3.078125,
      "learning_rate": 4.6299887588235445e-05,
      "loss": 0.9441,
      "step": 142610
    },
    {
      "epoch": 0.49984754334500414,
      "grad_norm": 2.484375,
      "learning_rate": 4.629923855957175e-05,
      "loss": 0.908,
      "step": 142620
    },
    {
      "epoch": 0.49988259085189973,
      "grad_norm": 2.921875,
      "learning_rate": 4.629858953090804e-05,
      "loss": 0.9349,
      "step": 142630
    },
    {
      "epoch": 0.4999176383587953,
      "grad_norm": 3.109375,
      "learning_rate": 4.629794050224434e-05,
      "loss": 0.9109,
      "step": 142640
    },
    {
      "epoch": 0.49995268586569097,
      "grad_norm": 3.125,
      "learning_rate": 4.629729147358064e-05,
      "loss": 1.0215,
      "step": 142650
    },
    {
      "epoch": 0.49998773337258656,
      "grad_norm": 3.078125,
      "learning_rate": 4.629664244491694e-05,
      "loss": 0.9751,
      "step": 142660
    },
    {
      "epoch": 0.5000227808794822,
      "grad_norm": 2.78125,
      "learning_rate": 4.629599341625323e-05,
      "loss": 0.8792,
      "step": 142670
    },
    {
      "epoch": 0.5000578283863777,
      "grad_norm": 3.03125,
      "learning_rate": 4.6295344387589535e-05,
      "loss": 0.9007,
      "step": 142680
    },
    {
      "epoch": 0.5000928758932733,
      "grad_norm": 3.015625,
      "learning_rate": 4.629469535892583e-05,
      "loss": 0.895,
      "step": 142690
    },
    {
      "epoch": 0.500127923400169,
      "grad_norm": 2.953125,
      "learning_rate": 4.629404633026213e-05,
      "loss": 0.9374,
      "step": 142700
    },
    {
      "epoch": 0.5001629709070645,
      "grad_norm": 2.84375,
      "learning_rate": 4.629339730159843e-05,
      "loss": 0.9282,
      "step": 142710
    },
    {
      "epoch": 0.5001980184139602,
      "grad_norm": 3.0625,
      "learning_rate": 4.629274827293473e-05,
      "loss": 0.9546,
      "step": 142720
    },
    {
      "epoch": 0.5002330659208557,
      "grad_norm": 2.828125,
      "learning_rate": 4.629209924427103e-05,
      "loss": 0.8897,
      "step": 142730
    },
    {
      "epoch": 0.5002681134277513,
      "grad_norm": 2.90625,
      "learning_rate": 4.629145021560732e-05,
      "loss": 0.9294,
      "step": 142740
    },
    {
      "epoch": 0.5003031609346469,
      "grad_norm": 2.859375,
      "learning_rate": 4.6290801186943624e-05,
      "loss": 0.9233,
      "step": 142750
    },
    {
      "epoch": 0.5003382084415425,
      "grad_norm": 2.984375,
      "learning_rate": 4.629015215827992e-05,
      "loss": 0.8994,
      "step": 142760
    },
    {
      "epoch": 0.5003732559484381,
      "grad_norm": 3.078125,
      "learning_rate": 4.628950312961622e-05,
      "loss": 0.9906,
      "step": 142770
    },
    {
      "epoch": 0.5004083034553337,
      "grad_norm": 3.546875,
      "learning_rate": 4.6288854100952515e-05,
      "loss": 0.9062,
      "step": 142780
    },
    {
      "epoch": 0.5004433509622292,
      "grad_norm": 3.25,
      "learning_rate": 4.6288205072288816e-05,
      "loss": 0.948,
      "step": 142790
    },
    {
      "epoch": 0.5004783984691249,
      "grad_norm": 3.171875,
      "learning_rate": 4.628755604362511e-05,
      "loss": 0.8959,
      "step": 142800
    },
    {
      "epoch": 0.5005134459760205,
      "grad_norm": 3.125,
      "learning_rate": 4.628690701496141e-05,
      "loss": 0.9471,
      "step": 142810
    },
    {
      "epoch": 0.5005484934829161,
      "grad_norm": 2.96875,
      "learning_rate": 4.628625798629771e-05,
      "loss": 0.9023,
      "step": 142820
    },
    {
      "epoch": 0.5005835409898117,
      "grad_norm": 3.046875,
      "learning_rate": 4.628560895763401e-05,
      "loss": 0.8808,
      "step": 142830
    },
    {
      "epoch": 0.5006185884967073,
      "grad_norm": 3.0,
      "learning_rate": 4.628495992897031e-05,
      "loss": 0.9942,
      "step": 142840
    },
    {
      "epoch": 0.5006536360036029,
      "grad_norm": 2.625,
      "learning_rate": 4.6284310900306604e-05,
      "loss": 0.8836,
      "step": 142850
    },
    {
      "epoch": 0.5006886835104984,
      "grad_norm": 3.375,
      "learning_rate": 4.6283661871642905e-05,
      "loss": 0.9658,
      "step": 142860
    },
    {
      "epoch": 0.5007237310173941,
      "grad_norm": 3.0,
      "learning_rate": 4.62830128429792e-05,
      "loss": 0.92,
      "step": 142870
    },
    {
      "epoch": 0.5007587785242896,
      "grad_norm": 6.0625,
      "learning_rate": 4.62823638143155e-05,
      "loss": 0.979,
      "step": 142880
    },
    {
      "epoch": 0.5007938260311853,
      "grad_norm": 2.9375,
      "learning_rate": 4.6281714785651796e-05,
      "loss": 0.996,
      "step": 142890
    },
    {
      "epoch": 0.5008288735380809,
      "grad_norm": 2.84375,
      "learning_rate": 4.62810657569881e-05,
      "loss": 0.8202,
      "step": 142900
    },
    {
      "epoch": 0.5008639210449765,
      "grad_norm": 3.28125,
      "learning_rate": 4.628041672832439e-05,
      "loss": 0.9476,
      "step": 142910
    },
    {
      "epoch": 0.5008989685518721,
      "grad_norm": 2.578125,
      "learning_rate": 4.6279767699660693e-05,
      "loss": 0.9266,
      "step": 142920
    },
    {
      "epoch": 0.5009340160587676,
      "grad_norm": 3.40625,
      "learning_rate": 4.627911867099699e-05,
      "loss": 0.9066,
      "step": 142930
    },
    {
      "epoch": 0.5009690635656633,
      "grad_norm": 2.9375,
      "learning_rate": 4.627846964233329e-05,
      "loss": 0.9812,
      "step": 142940
    },
    {
      "epoch": 0.5010041110725588,
      "grad_norm": 3.140625,
      "learning_rate": 4.6277820613669584e-05,
      "loss": 0.889,
      "step": 142950
    },
    {
      "epoch": 0.5010391585794545,
      "grad_norm": 2.90625,
      "learning_rate": 4.6277171585005885e-05,
      "loss": 0.995,
      "step": 142960
    },
    {
      "epoch": 0.50107420608635,
      "grad_norm": 2.921875,
      "learning_rate": 4.627652255634218e-05,
      "loss": 0.8594,
      "step": 142970
    },
    {
      "epoch": 0.5011092535932457,
      "grad_norm": 3.015625,
      "learning_rate": 4.6275873527678475e-05,
      "loss": 0.8519,
      "step": 142980
    },
    {
      "epoch": 0.5011443011001412,
      "grad_norm": 2.65625,
      "learning_rate": 4.6275224499014776e-05,
      "loss": 0.9385,
      "step": 142990
    },
    {
      "epoch": 0.5011793486070368,
      "grad_norm": 3.125,
      "learning_rate": 4.627457547035107e-05,
      "loss": 0.8931,
      "step": 143000
    },
    {
      "epoch": 0.5012143961139325,
      "grad_norm": 2.921875,
      "learning_rate": 4.627392644168737e-05,
      "loss": 0.8989,
      "step": 143010
    },
    {
      "epoch": 0.501249443620828,
      "grad_norm": 3.375,
      "learning_rate": 4.627327741302367e-05,
      "loss": 0.9008,
      "step": 143020
    },
    {
      "epoch": 0.5012844911277237,
      "grad_norm": 2.78125,
      "learning_rate": 4.627262838435997e-05,
      "loss": 0.9234,
      "step": 143030
    },
    {
      "epoch": 0.5013195386346192,
      "grad_norm": 3.28125,
      "learning_rate": 4.627197935569626e-05,
      "loss": 0.9481,
      "step": 143040
    },
    {
      "epoch": 0.5013545861415148,
      "grad_norm": 3.203125,
      "learning_rate": 4.6271330327032564e-05,
      "loss": 0.9747,
      "step": 143050
    },
    {
      "epoch": 0.5013896336484104,
      "grad_norm": 3.125,
      "learning_rate": 4.627068129836886e-05,
      "loss": 0.8853,
      "step": 143060
    },
    {
      "epoch": 0.501424681155306,
      "grad_norm": 3.0,
      "learning_rate": 4.627003226970516e-05,
      "loss": 0.9386,
      "step": 143070
    },
    {
      "epoch": 0.5014597286622016,
      "grad_norm": 3.0625,
      "learning_rate": 4.626938324104146e-05,
      "loss": 0.9752,
      "step": 143080
    },
    {
      "epoch": 0.5014947761690972,
      "grad_norm": 2.90625,
      "learning_rate": 4.6268734212377756e-05,
      "loss": 0.915,
      "step": 143090
    },
    {
      "epoch": 0.5015298236759929,
      "grad_norm": 2.9375,
      "learning_rate": 4.626808518371406e-05,
      "loss": 0.8646,
      "step": 143100
    },
    {
      "epoch": 0.5015648711828884,
      "grad_norm": 2.84375,
      "learning_rate": 4.626743615505035e-05,
      "loss": 0.9223,
      "step": 143110
    },
    {
      "epoch": 0.501599918689784,
      "grad_norm": 2.96875,
      "learning_rate": 4.6266787126386653e-05,
      "loss": 0.9087,
      "step": 143120
    },
    {
      "epoch": 0.5016349661966796,
      "grad_norm": 3.171875,
      "learning_rate": 4.626613809772295e-05,
      "loss": 0.9304,
      "step": 143130
    },
    {
      "epoch": 0.5016700137035752,
      "grad_norm": 2.75,
      "learning_rate": 4.626548906905925e-05,
      "loss": 0.8797,
      "step": 143140
    },
    {
      "epoch": 0.5017050612104708,
      "grad_norm": 3.1875,
      "learning_rate": 4.6264840040395544e-05,
      "loss": 1.0029,
      "step": 143150
    },
    {
      "epoch": 0.5017401087173664,
      "grad_norm": 2.71875,
      "learning_rate": 4.6264191011731845e-05,
      "loss": 0.911,
      "step": 143160
    },
    {
      "epoch": 0.5017751562242619,
      "grad_norm": 2.96875,
      "learning_rate": 4.626354198306814e-05,
      "loss": 0.973,
      "step": 143170
    },
    {
      "epoch": 0.5018102037311576,
      "grad_norm": 2.75,
      "learning_rate": 4.626289295440444e-05,
      "loss": 0.9229,
      "step": 143180
    },
    {
      "epoch": 0.5018452512380532,
      "grad_norm": 3.46875,
      "learning_rate": 4.6262243925740736e-05,
      "loss": 0.8762,
      "step": 143190
    },
    {
      "epoch": 0.5018802987449488,
      "grad_norm": 3.625,
      "learning_rate": 4.626159489707704e-05,
      "loss": 1.0086,
      "step": 143200
    },
    {
      "epoch": 0.5019153462518444,
      "grad_norm": 2.71875,
      "learning_rate": 4.626094586841334e-05,
      "loss": 0.9169,
      "step": 143210
    },
    {
      "epoch": 0.50195039375874,
      "grad_norm": 3.109375,
      "learning_rate": 4.6260296839749633e-05,
      "loss": 0.9383,
      "step": 143220
    },
    {
      "epoch": 0.5019854412656356,
      "grad_norm": 3.109375,
      "learning_rate": 4.6259647811085935e-05,
      "loss": 0.9247,
      "step": 143230
    },
    {
      "epoch": 0.5020204887725311,
      "grad_norm": 2.875,
      "learning_rate": 4.625899878242223e-05,
      "loss": 0.8565,
      "step": 143240
    },
    {
      "epoch": 0.5020555362794268,
      "grad_norm": 2.703125,
      "learning_rate": 4.625834975375853e-05,
      "loss": 0.9931,
      "step": 143250
    },
    {
      "epoch": 0.5020905837863223,
      "grad_norm": 3.21875,
      "learning_rate": 4.6257700725094825e-05,
      "loss": 0.9733,
      "step": 143260
    },
    {
      "epoch": 0.502125631293218,
      "grad_norm": 2.9375,
      "learning_rate": 4.625705169643113e-05,
      "loss": 0.9732,
      "step": 143270
    },
    {
      "epoch": 0.5021606788001135,
      "grad_norm": 2.921875,
      "learning_rate": 4.625640266776742e-05,
      "loss": 0.9434,
      "step": 143280
    },
    {
      "epoch": 0.5021957263070091,
      "grad_norm": 2.890625,
      "learning_rate": 4.625575363910372e-05,
      "loss": 0.9772,
      "step": 143290
    },
    {
      "epoch": 0.5022307738139048,
      "grad_norm": 2.671875,
      "learning_rate": 4.625510461044002e-05,
      "loss": 0.8624,
      "step": 143300
    },
    {
      "epoch": 0.5022658213208003,
      "grad_norm": 3.1875,
      "learning_rate": 4.625445558177632e-05,
      "loss": 0.9131,
      "step": 143310
    },
    {
      "epoch": 0.502300868827696,
      "grad_norm": 2.8125,
      "learning_rate": 4.6253806553112613e-05,
      "loss": 0.9534,
      "step": 143320
    },
    {
      "epoch": 0.5023359163345915,
      "grad_norm": 2.9375,
      "learning_rate": 4.6253157524448915e-05,
      "loss": 0.8437,
      "step": 143330
    },
    {
      "epoch": 0.5023709638414872,
      "grad_norm": 3.328125,
      "learning_rate": 4.625250849578521e-05,
      "loss": 0.8668,
      "step": 143340
    },
    {
      "epoch": 0.5024060113483827,
      "grad_norm": 2.984375,
      "learning_rate": 4.6251859467121504e-05,
      "loss": 0.8499,
      "step": 143350
    },
    {
      "epoch": 0.5024410588552783,
      "grad_norm": 2.796875,
      "learning_rate": 4.6251210438457805e-05,
      "loss": 0.9104,
      "step": 143360
    },
    {
      "epoch": 0.5024761063621739,
      "grad_norm": 2.953125,
      "learning_rate": 4.62505614097941e-05,
      "loss": 0.9743,
      "step": 143370
    },
    {
      "epoch": 0.5025111538690695,
      "grad_norm": 2.96875,
      "learning_rate": 4.62499123811304e-05,
      "loss": 0.9408,
      "step": 143380
    },
    {
      "epoch": 0.5025462013759652,
      "grad_norm": 3.0,
      "learning_rate": 4.6249263352466696e-05,
      "loss": 0.9578,
      "step": 143390
    },
    {
      "epoch": 0.5025812488828607,
      "grad_norm": 3.046875,
      "learning_rate": 4.6248614323803e-05,
      "loss": 0.8612,
      "step": 143400
    },
    {
      "epoch": 0.5026162963897564,
      "grad_norm": 2.8125,
      "learning_rate": 4.624796529513929e-05,
      "loss": 0.9349,
      "step": 143410
    },
    {
      "epoch": 0.5026513438966519,
      "grad_norm": 3.078125,
      "learning_rate": 4.6247316266475593e-05,
      "loss": 0.9419,
      "step": 143420
    },
    {
      "epoch": 0.5026863914035475,
      "grad_norm": 2.484375,
      "learning_rate": 4.6246667237811895e-05,
      "loss": 0.955,
      "step": 143430
    },
    {
      "epoch": 0.5027214389104431,
      "grad_norm": 2.71875,
      "learning_rate": 4.624601820914819e-05,
      "loss": 0.8146,
      "step": 143440
    },
    {
      "epoch": 0.5027564864173387,
      "grad_norm": 3.0,
      "learning_rate": 4.624536918048449e-05,
      "loss": 0.8617,
      "step": 143450
    },
    {
      "epoch": 0.5027915339242343,
      "grad_norm": 2.90625,
      "learning_rate": 4.6244720151820785e-05,
      "loss": 0.9317,
      "step": 143460
    },
    {
      "epoch": 0.5028265814311299,
      "grad_norm": 2.765625,
      "learning_rate": 4.624407112315709e-05,
      "loss": 0.9089,
      "step": 143470
    },
    {
      "epoch": 0.5028616289380254,
      "grad_norm": 3.390625,
      "learning_rate": 4.624342209449338e-05,
      "loss": 0.926,
      "step": 143480
    },
    {
      "epoch": 0.5028966764449211,
      "grad_norm": 2.9375,
      "learning_rate": 4.624277306582968e-05,
      "loss": 0.8836,
      "step": 143490
    },
    {
      "epoch": 0.5029317239518167,
      "grad_norm": 2.671875,
      "learning_rate": 4.624212403716598e-05,
      "loss": 0.9442,
      "step": 143500
    },
    {
      "epoch": 0.5029667714587123,
      "grad_norm": 2.65625,
      "learning_rate": 4.624147500850228e-05,
      "loss": 0.8875,
      "step": 143510
    },
    {
      "epoch": 0.5030018189656079,
      "grad_norm": 2.703125,
      "learning_rate": 4.6240825979838573e-05,
      "loss": 0.9455,
      "step": 143520
    },
    {
      "epoch": 0.5030368664725035,
      "grad_norm": 3.109375,
      "learning_rate": 4.6240176951174875e-05,
      "loss": 0.9768,
      "step": 143530
    },
    {
      "epoch": 0.5030719139793991,
      "grad_norm": 3.25,
      "learning_rate": 4.623952792251117e-05,
      "loss": 0.957,
      "step": 143540
    },
    {
      "epoch": 0.5031069614862946,
      "grad_norm": 3.078125,
      "learning_rate": 4.623887889384747e-05,
      "loss": 1.0466,
      "step": 143550
    },
    {
      "epoch": 0.5031420089931903,
      "grad_norm": 2.859375,
      "learning_rate": 4.6238229865183765e-05,
      "loss": 0.9202,
      "step": 143560
    },
    {
      "epoch": 0.5031770565000858,
      "grad_norm": 2.734375,
      "learning_rate": 4.623758083652007e-05,
      "loss": 0.8978,
      "step": 143570
    },
    {
      "epoch": 0.5032121040069815,
      "grad_norm": 3.203125,
      "learning_rate": 4.623693180785637e-05,
      "loss": 0.9589,
      "step": 143580
    },
    {
      "epoch": 0.5032471515138771,
      "grad_norm": 2.875,
      "learning_rate": 4.623628277919266e-05,
      "loss": 0.9953,
      "step": 143590
    },
    {
      "epoch": 0.5032821990207726,
      "grad_norm": 3.375,
      "learning_rate": 4.6235633750528964e-05,
      "loss": 0.9298,
      "step": 143600
    },
    {
      "epoch": 0.5033172465276683,
      "grad_norm": 2.65625,
      "learning_rate": 4.623498472186526e-05,
      "loss": 0.9039,
      "step": 143610
    },
    {
      "epoch": 0.5033522940345638,
      "grad_norm": 3.125,
      "learning_rate": 4.623433569320156e-05,
      "loss": 1.0002,
      "step": 143620
    },
    {
      "epoch": 0.5033873415414595,
      "grad_norm": 2.953125,
      "learning_rate": 4.6233686664537855e-05,
      "loss": 0.9077,
      "step": 143630
    },
    {
      "epoch": 0.503422389048355,
      "grad_norm": 3.09375,
      "learning_rate": 4.6233037635874156e-05,
      "loss": 0.962,
      "step": 143640
    },
    {
      "epoch": 0.5034574365552507,
      "grad_norm": 2.953125,
      "learning_rate": 4.623238860721045e-05,
      "loss": 0.9395,
      "step": 143650
    },
    {
      "epoch": 0.5034924840621462,
      "grad_norm": 3.0,
      "learning_rate": 4.623173957854675e-05,
      "loss": 0.9171,
      "step": 143660
    },
    {
      "epoch": 0.5035275315690418,
      "grad_norm": 2.671875,
      "learning_rate": 4.623109054988305e-05,
      "loss": 0.9675,
      "step": 143670
    },
    {
      "epoch": 0.5035625790759375,
      "grad_norm": 2.828125,
      "learning_rate": 4.623044152121935e-05,
      "loss": 0.9015,
      "step": 143680
    },
    {
      "epoch": 0.503597626582833,
      "grad_norm": 3.203125,
      "learning_rate": 4.622979249255564e-05,
      "loss": 0.896,
      "step": 143690
    },
    {
      "epoch": 0.5036326740897287,
      "grad_norm": 3.03125,
      "learning_rate": 4.6229143463891944e-05,
      "loss": 0.9529,
      "step": 143700
    },
    {
      "epoch": 0.5036677215966242,
      "grad_norm": 2.90625,
      "learning_rate": 4.6228494435228246e-05,
      "loss": 0.9458,
      "step": 143710
    },
    {
      "epoch": 0.5037027691035199,
      "grad_norm": 2.9375,
      "learning_rate": 4.6227845406564533e-05,
      "loss": 0.9555,
      "step": 143720
    },
    {
      "epoch": 0.5037378166104154,
      "grad_norm": 2.53125,
      "learning_rate": 4.6227196377900835e-05,
      "loss": 0.9879,
      "step": 143730
    },
    {
      "epoch": 0.503772864117311,
      "grad_norm": 3.09375,
      "learning_rate": 4.622654734923713e-05,
      "loss": 0.9219,
      "step": 143740
    },
    {
      "epoch": 0.5038079116242066,
      "grad_norm": 3.15625,
      "learning_rate": 4.622589832057343e-05,
      "loss": 0.9921,
      "step": 143750
    },
    {
      "epoch": 0.5038429591311022,
      "grad_norm": 2.984375,
      "learning_rate": 4.6225249291909725e-05,
      "loss": 0.9027,
      "step": 143760
    },
    {
      "epoch": 0.5038780066379978,
      "grad_norm": 2.90625,
      "learning_rate": 4.622460026324603e-05,
      "loss": 0.9113,
      "step": 143770
    },
    {
      "epoch": 0.5039130541448934,
      "grad_norm": 3.09375,
      "learning_rate": 4.622395123458232e-05,
      "loss": 0.9752,
      "step": 143780
    },
    {
      "epoch": 0.503948101651789,
      "grad_norm": 2.578125,
      "learning_rate": 4.622330220591862e-05,
      "loss": 0.9586,
      "step": 143790
    },
    {
      "epoch": 0.5039831491586846,
      "grad_norm": 3.0625,
      "learning_rate": 4.6222653177254924e-05,
      "loss": 0.9525,
      "step": 143800
    },
    {
      "epoch": 0.5040181966655802,
      "grad_norm": 2.859375,
      "learning_rate": 4.622200414859122e-05,
      "loss": 0.939,
      "step": 143810
    },
    {
      "epoch": 0.5040532441724758,
      "grad_norm": 2.84375,
      "learning_rate": 4.622135511992752e-05,
      "loss": 0.9254,
      "step": 143820
    },
    {
      "epoch": 0.5040882916793714,
      "grad_norm": 3.15625,
      "learning_rate": 4.6220706091263815e-05,
      "loss": 0.9497,
      "step": 143830
    },
    {
      "epoch": 0.504123339186267,
      "grad_norm": 4.0,
      "learning_rate": 4.6220057062600116e-05,
      "loss": 0.9002,
      "step": 143840
    },
    {
      "epoch": 0.5041583866931626,
      "grad_norm": 2.890625,
      "learning_rate": 4.621940803393641e-05,
      "loss": 0.9692,
      "step": 143850
    },
    {
      "epoch": 0.5041934342000581,
      "grad_norm": 3.046875,
      "learning_rate": 4.621875900527271e-05,
      "loss": 0.8942,
      "step": 143860
    },
    {
      "epoch": 0.5042284817069538,
      "grad_norm": 3.03125,
      "learning_rate": 4.621810997660901e-05,
      "loss": 0.9,
      "step": 143870
    },
    {
      "epoch": 0.5042635292138494,
      "grad_norm": 3.09375,
      "learning_rate": 4.621746094794531e-05,
      "loss": 1.0117,
      "step": 143880
    },
    {
      "epoch": 0.504298576720745,
      "grad_norm": 3.203125,
      "learning_rate": 4.62168119192816e-05,
      "loss": 0.973,
      "step": 143890
    },
    {
      "epoch": 0.5043336242276406,
      "grad_norm": 3.21875,
      "learning_rate": 4.6216162890617904e-05,
      "loss": 0.9817,
      "step": 143900
    },
    {
      "epoch": 0.5043686717345361,
      "grad_norm": 2.90625,
      "learning_rate": 4.62155138619542e-05,
      "loss": 0.8988,
      "step": 143910
    },
    {
      "epoch": 0.5044037192414318,
      "grad_norm": 2.90625,
      "learning_rate": 4.62148648332905e-05,
      "loss": 0.9457,
      "step": 143920
    },
    {
      "epoch": 0.5044387667483273,
      "grad_norm": 2.8125,
      "learning_rate": 4.6214215804626795e-05,
      "loss": 0.9429,
      "step": 143930
    },
    {
      "epoch": 0.504473814255223,
      "grad_norm": 3.296875,
      "learning_rate": 4.6213566775963096e-05,
      "loss": 0.8939,
      "step": 143940
    },
    {
      "epoch": 0.5045088617621185,
      "grad_norm": 3.03125,
      "learning_rate": 4.62129177472994e-05,
      "loss": 0.9133,
      "step": 143950
    },
    {
      "epoch": 0.5045439092690142,
      "grad_norm": 2.921875,
      "learning_rate": 4.621226871863569e-05,
      "loss": 0.9086,
      "step": 143960
    },
    {
      "epoch": 0.5045789567759097,
      "grad_norm": 2.703125,
      "learning_rate": 4.6211619689971994e-05,
      "loss": 0.9552,
      "step": 143970
    },
    {
      "epoch": 0.5046140042828053,
      "grad_norm": 3.15625,
      "learning_rate": 4.621097066130829e-05,
      "loss": 0.9815,
      "step": 143980
    },
    {
      "epoch": 0.504649051789701,
      "grad_norm": 3.046875,
      "learning_rate": 4.621032163264459e-05,
      "loss": 0.978,
      "step": 143990
    },
    {
      "epoch": 0.5046840992965965,
      "grad_norm": 2.90625,
      "learning_rate": 4.6209672603980884e-05,
      "loss": 0.9737,
      "step": 144000
    },
    {
      "epoch": 0.5047191468034922,
      "grad_norm": 2.65625,
      "learning_rate": 4.6209023575317186e-05,
      "loss": 0.979,
      "step": 144010
    },
    {
      "epoch": 0.5047541943103877,
      "grad_norm": 3.015625,
      "learning_rate": 4.620837454665348e-05,
      "loss": 0.9936,
      "step": 144020
    },
    {
      "epoch": 0.5047892418172834,
      "grad_norm": 3.09375,
      "learning_rate": 4.620772551798978e-05,
      "loss": 0.9035,
      "step": 144030
    },
    {
      "epoch": 0.5048242893241789,
      "grad_norm": 2.84375,
      "learning_rate": 4.6207076489326076e-05,
      "loss": 0.8715,
      "step": 144040
    },
    {
      "epoch": 0.5048593368310745,
      "grad_norm": 2.84375,
      "learning_rate": 4.620642746066238e-05,
      "loss": 0.9153,
      "step": 144050
    },
    {
      "epoch": 0.5048943843379701,
      "grad_norm": 3.328125,
      "learning_rate": 4.620577843199867e-05,
      "loss": 0.8944,
      "step": 144060
    },
    {
      "epoch": 0.5049294318448657,
      "grad_norm": 3.53125,
      "learning_rate": 4.6205129403334974e-05,
      "loss": 0.9625,
      "step": 144070
    },
    {
      "epoch": 0.5049644793517614,
      "grad_norm": 2.859375,
      "learning_rate": 4.6204480374671275e-05,
      "loss": 0.9897,
      "step": 144080
    },
    {
      "epoch": 0.5049995268586569,
      "grad_norm": 2.890625,
      "learning_rate": 4.620383134600757e-05,
      "loss": 0.9441,
      "step": 144090
    },
    {
      "epoch": 0.5050345743655525,
      "grad_norm": 3.125,
      "learning_rate": 4.6203182317343864e-05,
      "loss": 0.8677,
      "step": 144100
    },
    {
      "epoch": 0.5050696218724481,
      "grad_norm": 2.921875,
      "learning_rate": 4.620253328868016e-05,
      "loss": 0.9458,
      "step": 144110
    },
    {
      "epoch": 0.5051046693793437,
      "grad_norm": 3.34375,
      "learning_rate": 4.620188426001646e-05,
      "loss": 0.8752,
      "step": 144120
    },
    {
      "epoch": 0.5051397168862393,
      "grad_norm": 2.796875,
      "learning_rate": 4.6201235231352755e-05,
      "loss": 0.9097,
      "step": 144130
    },
    {
      "epoch": 0.5051747643931349,
      "grad_norm": 3.046875,
      "learning_rate": 4.6200586202689056e-05,
      "loss": 0.8482,
      "step": 144140
    },
    {
      "epoch": 0.5052098119000304,
      "grad_norm": 2.84375,
      "learning_rate": 4.619993717402535e-05,
      "loss": 0.9205,
      "step": 144150
    },
    {
      "epoch": 0.5052448594069261,
      "grad_norm": 3.171875,
      "learning_rate": 4.619928814536165e-05,
      "loss": 0.9092,
      "step": 144160
    },
    {
      "epoch": 0.5052799069138216,
      "grad_norm": 3.09375,
      "learning_rate": 4.6198639116697954e-05,
      "loss": 0.9251,
      "step": 144170
    },
    {
      "epoch": 0.5053149544207173,
      "grad_norm": 2.765625,
      "learning_rate": 4.619799008803425e-05,
      "loss": 0.8976,
      "step": 144180
    },
    {
      "epoch": 0.5053500019276129,
      "grad_norm": 3.15625,
      "learning_rate": 4.619734105937055e-05,
      "loss": 1.0028,
      "step": 144190
    },
    {
      "epoch": 0.5053850494345085,
      "grad_norm": 2.984375,
      "learning_rate": 4.6196692030706844e-05,
      "loss": 0.8496,
      "step": 144200
    },
    {
      "epoch": 0.5054200969414041,
      "grad_norm": 2.703125,
      "learning_rate": 4.6196043002043146e-05,
      "loss": 0.9017,
      "step": 144210
    },
    {
      "epoch": 0.5054551444482996,
      "grad_norm": 3.09375,
      "learning_rate": 4.619539397337944e-05,
      "loss": 0.8937,
      "step": 144220
    },
    {
      "epoch": 0.5054901919551953,
      "grad_norm": 3.234375,
      "learning_rate": 4.619474494471574e-05,
      "loss": 0.9072,
      "step": 144230
    },
    {
      "epoch": 0.5055252394620908,
      "grad_norm": 2.84375,
      "learning_rate": 4.6194095916052036e-05,
      "loss": 0.9948,
      "step": 144240
    },
    {
      "epoch": 0.5055602869689865,
      "grad_norm": 3.109375,
      "learning_rate": 4.619344688738834e-05,
      "loss": 0.9732,
      "step": 144250
    },
    {
      "epoch": 0.505595334475882,
      "grad_norm": 3.375,
      "learning_rate": 4.619279785872463e-05,
      "loss": 0.9025,
      "step": 144260
    },
    {
      "epoch": 0.5056303819827777,
      "grad_norm": 3.4375,
      "learning_rate": 4.6192148830060934e-05,
      "loss": 0.9391,
      "step": 144270
    },
    {
      "epoch": 0.5056654294896733,
      "grad_norm": 2.75,
      "learning_rate": 4.619149980139723e-05,
      "loss": 0.9387,
      "step": 144280
    },
    {
      "epoch": 0.5057004769965688,
      "grad_norm": 3.015625,
      "learning_rate": 4.619085077273353e-05,
      "loss": 0.9491,
      "step": 144290
    },
    {
      "epoch": 0.5057355245034645,
      "grad_norm": 2.875,
      "learning_rate": 4.6190201744069824e-05,
      "loss": 0.9536,
      "step": 144300
    },
    {
      "epoch": 0.50577057201036,
      "grad_norm": 3.09375,
      "learning_rate": 4.6189552715406126e-05,
      "loss": 0.8523,
      "step": 144310
    },
    {
      "epoch": 0.5058056195172557,
      "grad_norm": 3.078125,
      "learning_rate": 4.618890368674243e-05,
      "loss": 0.9716,
      "step": 144320
    },
    {
      "epoch": 0.5058406670241512,
      "grad_norm": 3.109375,
      "learning_rate": 4.618825465807872e-05,
      "loss": 0.9837,
      "step": 144330
    },
    {
      "epoch": 0.5058757145310468,
      "grad_norm": 3.125,
      "learning_rate": 4.618760562941502e-05,
      "loss": 0.9321,
      "step": 144340
    },
    {
      "epoch": 0.5059107620379424,
      "grad_norm": 2.9375,
      "learning_rate": 4.618695660075132e-05,
      "loss": 0.9419,
      "step": 144350
    },
    {
      "epoch": 0.505945809544838,
      "grad_norm": 3.296875,
      "learning_rate": 4.618630757208762e-05,
      "loss": 0.9258,
      "step": 144360
    },
    {
      "epoch": 0.5059808570517337,
      "grad_norm": 2.578125,
      "learning_rate": 4.6185658543423914e-05,
      "loss": 0.8896,
      "step": 144370
    },
    {
      "epoch": 0.5060159045586292,
      "grad_norm": 2.78125,
      "learning_rate": 4.6185009514760215e-05,
      "loss": 0.8708,
      "step": 144380
    },
    {
      "epoch": 0.5060509520655249,
      "grad_norm": 3.15625,
      "learning_rate": 4.618436048609651e-05,
      "loss": 0.9578,
      "step": 144390
    },
    {
      "epoch": 0.5060859995724204,
      "grad_norm": 3.0,
      "learning_rate": 4.618371145743281e-05,
      "loss": 0.8135,
      "step": 144400
    },
    {
      "epoch": 0.506121047079316,
      "grad_norm": 2.734375,
      "learning_rate": 4.6183062428769106e-05,
      "loss": 0.9067,
      "step": 144410
    },
    {
      "epoch": 0.5061560945862116,
      "grad_norm": 3.171875,
      "learning_rate": 4.618241340010541e-05,
      "loss": 0.9822,
      "step": 144420
    },
    {
      "epoch": 0.5061911420931072,
      "grad_norm": 3.09375,
      "learning_rate": 4.61817643714417e-05,
      "loss": 0.9363,
      "step": 144430
    },
    {
      "epoch": 0.5062261896000028,
      "grad_norm": 2.84375,
      "learning_rate": 4.6181115342778e-05,
      "loss": 0.8782,
      "step": 144440
    },
    {
      "epoch": 0.5062612371068984,
      "grad_norm": 2.953125,
      "learning_rate": 4.6180466314114304e-05,
      "loss": 0.9577,
      "step": 144450
    },
    {
      "epoch": 0.506296284613794,
      "grad_norm": 4.0,
      "learning_rate": 4.61798172854506e-05,
      "loss": 1.0519,
      "step": 144460
    },
    {
      "epoch": 0.5063313321206896,
      "grad_norm": 3.0,
      "learning_rate": 4.6179168256786894e-05,
      "loss": 0.8711,
      "step": 144470
    },
    {
      "epoch": 0.5063663796275852,
      "grad_norm": 2.96875,
      "learning_rate": 4.617851922812319e-05,
      "loss": 0.9149,
      "step": 144480
    },
    {
      "epoch": 0.5064014271344808,
      "grad_norm": 2.78125,
      "learning_rate": 4.617787019945949e-05,
      "loss": 0.884,
      "step": 144490
    },
    {
      "epoch": 0.5064364746413764,
      "grad_norm": 3.0,
      "learning_rate": 4.6177221170795784e-05,
      "loss": 0.9997,
      "step": 144500
    },
    {
      "epoch": 0.506471522148272,
      "grad_norm": 2.84375,
      "learning_rate": 4.6176572142132086e-05,
      "loss": 0.9266,
      "step": 144510
    },
    {
      "epoch": 0.5065065696551676,
      "grad_norm": 3.203125,
      "learning_rate": 4.617592311346838e-05,
      "loss": 0.8499,
      "step": 144520
    },
    {
      "epoch": 0.5065416171620631,
      "grad_norm": 2.765625,
      "learning_rate": 4.617527408480468e-05,
      "loss": 0.9155,
      "step": 144530
    },
    {
      "epoch": 0.5065766646689588,
      "grad_norm": 3.40625,
      "learning_rate": 4.617462505614098e-05,
      "loss": 0.9054,
      "step": 144540
    },
    {
      "epoch": 0.5066117121758543,
      "grad_norm": 3.375,
      "learning_rate": 4.617397602747728e-05,
      "loss": 0.9958,
      "step": 144550
    },
    {
      "epoch": 0.50664675968275,
      "grad_norm": 3.359375,
      "learning_rate": 4.617332699881358e-05,
      "loss": 0.9708,
      "step": 144560
    },
    {
      "epoch": 0.5066818071896456,
      "grad_norm": 3.28125,
      "learning_rate": 4.6172677970149874e-05,
      "loss": 0.9503,
      "step": 144570
    },
    {
      "epoch": 0.5067168546965412,
      "grad_norm": 2.78125,
      "learning_rate": 4.6172028941486175e-05,
      "loss": 0.9811,
      "step": 144580
    },
    {
      "epoch": 0.5067519022034368,
      "grad_norm": 3.046875,
      "learning_rate": 4.617137991282247e-05,
      "loss": 0.965,
      "step": 144590
    },
    {
      "epoch": 0.5067869497103323,
      "grad_norm": 2.5625,
      "learning_rate": 4.617073088415877e-05,
      "loss": 0.9737,
      "step": 144600
    },
    {
      "epoch": 0.506821997217228,
      "grad_norm": 3.3125,
      "learning_rate": 4.6170081855495066e-05,
      "loss": 0.9407,
      "step": 144610
    },
    {
      "epoch": 0.5068570447241235,
      "grad_norm": 3.046875,
      "learning_rate": 4.616943282683137e-05,
      "loss": 0.9105,
      "step": 144620
    },
    {
      "epoch": 0.5068920922310192,
      "grad_norm": 3.1875,
      "learning_rate": 4.616878379816766e-05,
      "loss": 0.9964,
      "step": 144630
    },
    {
      "epoch": 0.5069271397379147,
      "grad_norm": 3.015625,
      "learning_rate": 4.616813476950396e-05,
      "loss": 0.9844,
      "step": 144640
    },
    {
      "epoch": 0.5069621872448103,
      "grad_norm": 3.0,
      "learning_rate": 4.616748574084026e-05,
      "loss": 0.9474,
      "step": 144650
    },
    {
      "epoch": 0.5069972347517059,
      "grad_norm": 3.109375,
      "learning_rate": 4.616683671217656e-05,
      "loss": 1.0295,
      "step": 144660
    },
    {
      "epoch": 0.5070322822586015,
      "grad_norm": 3.28125,
      "learning_rate": 4.616618768351286e-05,
      "loss": 0.9603,
      "step": 144670
    },
    {
      "epoch": 0.5070673297654972,
      "grad_norm": 2.828125,
      "learning_rate": 4.6165538654849155e-05,
      "loss": 0.9738,
      "step": 144680
    },
    {
      "epoch": 0.5071023772723927,
      "grad_norm": 3.109375,
      "learning_rate": 4.6164889626185456e-05,
      "loss": 0.9401,
      "step": 144690
    },
    {
      "epoch": 0.5071374247792884,
      "grad_norm": 3.03125,
      "learning_rate": 4.616424059752175e-05,
      "loss": 0.9355,
      "step": 144700
    },
    {
      "epoch": 0.5071724722861839,
      "grad_norm": 3.484375,
      "learning_rate": 4.616359156885805e-05,
      "loss": 0.9449,
      "step": 144710
    },
    {
      "epoch": 0.5072075197930795,
      "grad_norm": 2.625,
      "learning_rate": 4.616294254019435e-05,
      "loss": 0.9094,
      "step": 144720
    },
    {
      "epoch": 0.5072425672999751,
      "grad_norm": 3.09375,
      "learning_rate": 4.616229351153065e-05,
      "loss": 0.9684,
      "step": 144730
    },
    {
      "epoch": 0.5072776148068707,
      "grad_norm": 3.109375,
      "learning_rate": 4.616164448286694e-05,
      "loss": 0.9477,
      "step": 144740
    },
    {
      "epoch": 0.5073126623137663,
      "grad_norm": 3.125,
      "learning_rate": 4.6160995454203244e-05,
      "loss": 0.9653,
      "step": 144750
    },
    {
      "epoch": 0.5073477098206619,
      "grad_norm": 2.65625,
      "learning_rate": 4.616034642553954e-05,
      "loss": 0.9085,
      "step": 144760
    },
    {
      "epoch": 0.5073827573275576,
      "grad_norm": 2.65625,
      "learning_rate": 4.615969739687584e-05,
      "loss": 0.9149,
      "step": 144770
    },
    {
      "epoch": 0.5074178048344531,
      "grad_norm": 2.875,
      "learning_rate": 4.6159048368212135e-05,
      "loss": 0.9327,
      "step": 144780
    },
    {
      "epoch": 0.5074528523413487,
      "grad_norm": 3.265625,
      "learning_rate": 4.6158399339548436e-05,
      "loss": 0.8759,
      "step": 144790
    },
    {
      "epoch": 0.5074878998482443,
      "grad_norm": 3.265625,
      "learning_rate": 4.615775031088473e-05,
      "loss": 0.9277,
      "step": 144800
    },
    {
      "epoch": 0.5075229473551399,
      "grad_norm": 2.59375,
      "learning_rate": 4.615710128222103e-05,
      "loss": 0.931,
      "step": 144810
    },
    {
      "epoch": 0.5075579948620355,
      "grad_norm": 2.84375,
      "learning_rate": 4.6156452253557334e-05,
      "loss": 0.8835,
      "step": 144820
    },
    {
      "epoch": 0.5075930423689311,
      "grad_norm": 3.078125,
      "learning_rate": 4.615580322489363e-05,
      "loss": 0.9638,
      "step": 144830
    },
    {
      "epoch": 0.5076280898758266,
      "grad_norm": 2.828125,
      "learning_rate": 4.615515419622993e-05,
      "loss": 0.932,
      "step": 144840
    },
    {
      "epoch": 0.5076631373827223,
      "grad_norm": 2.921875,
      "learning_rate": 4.615450516756622e-05,
      "loss": 0.896,
      "step": 144850
    },
    {
      "epoch": 0.5076981848896179,
      "grad_norm": 3.03125,
      "learning_rate": 4.615385613890252e-05,
      "loss": 0.9777,
      "step": 144860
    },
    {
      "epoch": 0.5077332323965135,
      "grad_norm": 2.9375,
      "learning_rate": 4.6153207110238814e-05,
      "loss": 0.9241,
      "step": 144870
    },
    {
      "epoch": 0.5077682799034091,
      "grad_norm": 3.125,
      "learning_rate": 4.6152558081575115e-05,
      "loss": 1.0296,
      "step": 144880
    },
    {
      "epoch": 0.5078033274103047,
      "grad_norm": 2.890625,
      "learning_rate": 4.615190905291141e-05,
      "loss": 0.9819,
      "step": 144890
    },
    {
      "epoch": 0.5078383749172003,
      "grad_norm": 3.125,
      "learning_rate": 4.615126002424771e-05,
      "loss": 0.9783,
      "step": 144900
    },
    {
      "epoch": 0.5078734224240958,
      "grad_norm": 3.359375,
      "learning_rate": 4.615061099558401e-05,
      "loss": 0.9837,
      "step": 144910
    },
    {
      "epoch": 0.5079084699309915,
      "grad_norm": 2.765625,
      "learning_rate": 4.614996196692031e-05,
      "loss": 0.9871,
      "step": 144920
    },
    {
      "epoch": 0.507943517437887,
      "grad_norm": 2.984375,
      "learning_rate": 4.614931293825661e-05,
      "loss": 0.8895,
      "step": 144930
    },
    {
      "epoch": 0.5079785649447827,
      "grad_norm": 3.953125,
      "learning_rate": 4.61486639095929e-05,
      "loss": 0.9862,
      "step": 144940
    },
    {
      "epoch": 0.5080136124516782,
      "grad_norm": 3.046875,
      "learning_rate": 4.6148014880929204e-05,
      "loss": 0.9876,
      "step": 144950
    },
    {
      "epoch": 0.5080486599585738,
      "grad_norm": 2.796875,
      "learning_rate": 4.61473658522655e-05,
      "loss": 0.8362,
      "step": 144960
    },
    {
      "epoch": 0.5080837074654695,
      "grad_norm": 2.875,
      "learning_rate": 4.61467168236018e-05,
      "loss": 0.9209,
      "step": 144970
    },
    {
      "epoch": 0.508118754972365,
      "grad_norm": 3.1875,
      "learning_rate": 4.6146067794938095e-05,
      "loss": 0.9866,
      "step": 144980
    },
    {
      "epoch": 0.5081538024792607,
      "grad_norm": 2.6875,
      "learning_rate": 4.6145418766274396e-05,
      "loss": 0.9244,
      "step": 144990
    },
    {
      "epoch": 0.5081888499861562,
      "grad_norm": 3.078125,
      "learning_rate": 4.614476973761069e-05,
      "loss": 0.9275,
      "step": 145000
    },
    {
      "epoch": 0.5081888499861562,
      "eval_loss": 0.8695665001869202,
      "eval_runtime": 562.7376,
      "eval_samples_per_second": 676.045,
      "eval_steps_per_second": 56.337,
      "step": 145000
    },
    {
      "epoch": 0.5082238974930519,
      "grad_norm": 3.0,
      "learning_rate": 4.614412070894699e-05,
      "loss": 0.9804,
      "step": 145010
    },
    {
      "epoch": 0.5082589449999474,
      "grad_norm": 3.421875,
      "learning_rate": 4.614347168028329e-05,
      "loss": 1.0356,
      "step": 145020
    },
    {
      "epoch": 0.508293992506843,
      "grad_norm": 2.40625,
      "learning_rate": 4.614282265161959e-05,
      "loss": 0.926,
      "step": 145030
    },
    {
      "epoch": 0.5083290400137386,
      "grad_norm": 3.59375,
      "learning_rate": 4.614217362295589e-05,
      "loss": 1.0368,
      "step": 145040
    },
    {
      "epoch": 0.5083640875206342,
      "grad_norm": 2.765625,
      "learning_rate": 4.6141524594292184e-05,
      "loss": 0.8842,
      "step": 145050
    },
    {
      "epoch": 0.5083991350275299,
      "grad_norm": 2.828125,
      "learning_rate": 4.6140875565628486e-05,
      "loss": 0.9805,
      "step": 145060
    },
    {
      "epoch": 0.5084341825344254,
      "grad_norm": 3.0,
      "learning_rate": 4.614022653696478e-05,
      "loss": 0.8876,
      "step": 145070
    },
    {
      "epoch": 0.508469230041321,
      "grad_norm": 3.3125,
      "learning_rate": 4.613957750830108e-05,
      "loss": 0.8985,
      "step": 145080
    },
    {
      "epoch": 0.5085042775482166,
      "grad_norm": 2.9375,
      "learning_rate": 4.6138928479637376e-05,
      "loss": 0.9147,
      "step": 145090
    },
    {
      "epoch": 0.5085393250551122,
      "grad_norm": 2.875,
      "learning_rate": 4.613827945097368e-05,
      "loss": 0.9887,
      "step": 145100
    },
    {
      "epoch": 0.5085743725620078,
      "grad_norm": 2.796875,
      "learning_rate": 4.613763042230997e-05,
      "loss": 0.9196,
      "step": 145110
    },
    {
      "epoch": 0.5086094200689034,
      "grad_norm": 3.046875,
      "learning_rate": 4.6136981393646274e-05,
      "loss": 0.9544,
      "step": 145120
    },
    {
      "epoch": 0.508644467575799,
      "grad_norm": 2.90625,
      "learning_rate": 4.613633236498257e-05,
      "loss": 0.9574,
      "step": 145130
    },
    {
      "epoch": 0.5086795150826946,
      "grad_norm": 2.734375,
      "learning_rate": 4.613568333631887e-05,
      "loss": 0.8825,
      "step": 145140
    },
    {
      "epoch": 0.5087145625895901,
      "grad_norm": 3.078125,
      "learning_rate": 4.6135034307655164e-05,
      "loss": 0.8959,
      "step": 145150
    },
    {
      "epoch": 0.5087496100964858,
      "grad_norm": 2.390625,
      "learning_rate": 4.6134385278991466e-05,
      "loss": 0.8973,
      "step": 145160
    },
    {
      "epoch": 0.5087846576033814,
      "grad_norm": 3.125,
      "learning_rate": 4.613373625032776e-05,
      "loss": 0.894,
      "step": 145170
    },
    {
      "epoch": 0.508819705110277,
      "grad_norm": 2.65625,
      "learning_rate": 4.613308722166406e-05,
      "loss": 0.8994,
      "step": 145180
    },
    {
      "epoch": 0.5088547526171726,
      "grad_norm": 3.015625,
      "learning_rate": 4.613243819300036e-05,
      "loss": 0.9186,
      "step": 145190
    },
    {
      "epoch": 0.5088898001240681,
      "grad_norm": 2.921875,
      "learning_rate": 4.613178916433666e-05,
      "loss": 0.8741,
      "step": 145200
    },
    {
      "epoch": 0.5089248476309638,
      "grad_norm": 2.875,
      "learning_rate": 4.613114013567296e-05,
      "loss": 0.9368,
      "step": 145210
    },
    {
      "epoch": 0.5089598951378593,
      "grad_norm": 3.0,
      "learning_rate": 4.6130491107009254e-05,
      "loss": 0.9789,
      "step": 145220
    },
    {
      "epoch": 0.508994942644755,
      "grad_norm": 2.625,
      "learning_rate": 4.612984207834555e-05,
      "loss": 0.9221,
      "step": 145230
    },
    {
      "epoch": 0.5090299901516505,
      "grad_norm": 2.609375,
      "learning_rate": 4.612919304968184e-05,
      "loss": 0.7951,
      "step": 145240
    },
    {
      "epoch": 0.5090650376585462,
      "grad_norm": 3.03125,
      "learning_rate": 4.6128544021018144e-05,
      "loss": 0.9804,
      "step": 145250
    },
    {
      "epoch": 0.5091000851654418,
      "grad_norm": 2.765625,
      "learning_rate": 4.612789499235444e-05,
      "loss": 0.9411,
      "step": 145260
    },
    {
      "epoch": 0.5091351326723373,
      "grad_norm": 3.0,
      "learning_rate": 4.612724596369074e-05,
      "loss": 0.9275,
      "step": 145270
    },
    {
      "epoch": 0.509170180179233,
      "grad_norm": 3.4375,
      "learning_rate": 4.612659693502704e-05,
      "loss": 0.9435,
      "step": 145280
    },
    {
      "epoch": 0.5092052276861285,
      "grad_norm": 3.09375,
      "learning_rate": 4.6125947906363336e-05,
      "loss": 0.9249,
      "step": 145290
    },
    {
      "epoch": 0.5092402751930242,
      "grad_norm": 3.5,
      "learning_rate": 4.612529887769964e-05,
      "loss": 0.8966,
      "step": 145300
    },
    {
      "epoch": 0.5092753226999197,
      "grad_norm": 2.90625,
      "learning_rate": 4.612464984903593e-05,
      "loss": 1.0291,
      "step": 145310
    },
    {
      "epoch": 0.5093103702068154,
      "grad_norm": 3.546875,
      "learning_rate": 4.6124000820372234e-05,
      "loss": 0.8745,
      "step": 145320
    },
    {
      "epoch": 0.5093454177137109,
      "grad_norm": 2.96875,
      "learning_rate": 4.612335179170853e-05,
      "loss": 1.0147,
      "step": 145330
    },
    {
      "epoch": 0.5093804652206065,
      "grad_norm": 3.1875,
      "learning_rate": 4.612270276304483e-05,
      "loss": 0.9748,
      "step": 145340
    },
    {
      "epoch": 0.5094155127275022,
      "grad_norm": 2.90625,
      "learning_rate": 4.6122053734381124e-05,
      "loss": 1.0029,
      "step": 145350
    },
    {
      "epoch": 0.5094505602343977,
      "grad_norm": 3.21875,
      "learning_rate": 4.6121404705717426e-05,
      "loss": 0.908,
      "step": 145360
    },
    {
      "epoch": 0.5094856077412934,
      "grad_norm": 3.0625,
      "learning_rate": 4.612075567705372e-05,
      "loss": 1.0036,
      "step": 145370
    },
    {
      "epoch": 0.5095206552481889,
      "grad_norm": 3.21875,
      "learning_rate": 4.612010664839002e-05,
      "loss": 0.985,
      "step": 145380
    },
    {
      "epoch": 0.5095557027550845,
      "grad_norm": 2.765625,
      "learning_rate": 4.6119457619726316e-05,
      "loss": 0.887,
      "step": 145390
    },
    {
      "epoch": 0.5095907502619801,
      "grad_norm": 2.9375,
      "learning_rate": 4.611880859106262e-05,
      "loss": 0.9078,
      "step": 145400
    },
    {
      "epoch": 0.5096257977688757,
      "grad_norm": 2.40625,
      "learning_rate": 4.611815956239892e-05,
      "loss": 0.8547,
      "step": 145410
    },
    {
      "epoch": 0.5096608452757713,
      "grad_norm": 3.125,
      "learning_rate": 4.6117510533735214e-05,
      "loss": 0.9389,
      "step": 145420
    },
    {
      "epoch": 0.5096958927826669,
      "grad_norm": 2.6875,
      "learning_rate": 4.6116861505071515e-05,
      "loss": 0.9169,
      "step": 145430
    },
    {
      "epoch": 0.5097309402895625,
      "grad_norm": 3.171875,
      "learning_rate": 4.611621247640781e-05,
      "loss": 0.936,
      "step": 145440
    },
    {
      "epoch": 0.5097659877964581,
      "grad_norm": 3.3125,
      "learning_rate": 4.611556344774411e-05,
      "loss": 0.9365,
      "step": 145450
    },
    {
      "epoch": 0.5098010353033537,
      "grad_norm": 2.90625,
      "learning_rate": 4.6114914419080406e-05,
      "loss": 0.9108,
      "step": 145460
    },
    {
      "epoch": 0.5098360828102493,
      "grad_norm": 3.15625,
      "learning_rate": 4.611426539041671e-05,
      "loss": 0.916,
      "step": 145470
    },
    {
      "epoch": 0.5098711303171449,
      "grad_norm": 2.84375,
      "learning_rate": 4.6113616361753e-05,
      "loss": 0.9289,
      "step": 145480
    },
    {
      "epoch": 0.5099061778240405,
      "grad_norm": 2.84375,
      "learning_rate": 4.61129673330893e-05,
      "loss": 0.8571,
      "step": 145490
    },
    {
      "epoch": 0.5099412253309361,
      "grad_norm": 2.78125,
      "learning_rate": 4.61123183044256e-05,
      "loss": 0.9834,
      "step": 145500
    },
    {
      "epoch": 0.5099762728378316,
      "grad_norm": 2.515625,
      "learning_rate": 4.61116692757619e-05,
      "loss": 0.9522,
      "step": 145510
    },
    {
      "epoch": 0.5100113203447273,
      "grad_norm": 2.828125,
      "learning_rate": 4.6111020247098194e-05,
      "loss": 0.9296,
      "step": 145520
    },
    {
      "epoch": 0.5100463678516228,
      "grad_norm": 3.046875,
      "learning_rate": 4.6110371218434495e-05,
      "loss": 0.9098,
      "step": 145530
    },
    {
      "epoch": 0.5100814153585185,
      "grad_norm": 3.171875,
      "learning_rate": 4.6109722189770797e-05,
      "loss": 0.9538,
      "step": 145540
    },
    {
      "epoch": 0.5101164628654141,
      "grad_norm": 3.25,
      "learning_rate": 4.610907316110709e-05,
      "loss": 0.9572,
      "step": 145550
    },
    {
      "epoch": 0.5101515103723097,
      "grad_norm": 3.0,
      "learning_rate": 4.610842413244339e-05,
      "loss": 0.9633,
      "step": 145560
    },
    {
      "epoch": 0.5101865578792053,
      "grad_norm": 2.84375,
      "learning_rate": 4.610777510377969e-05,
      "loss": 0.931,
      "step": 145570
    },
    {
      "epoch": 0.5102216053861008,
      "grad_norm": 2.828125,
      "learning_rate": 4.610712607511599e-05,
      "loss": 0.9233,
      "step": 145580
    },
    {
      "epoch": 0.5102566528929965,
      "grad_norm": 2.578125,
      "learning_rate": 4.610647704645228e-05,
      "loss": 0.8675,
      "step": 145590
    },
    {
      "epoch": 0.510291700399892,
      "grad_norm": 3.203125,
      "learning_rate": 4.610582801778858e-05,
      "loss": 0.8003,
      "step": 145600
    },
    {
      "epoch": 0.5103267479067877,
      "grad_norm": 3.359375,
      "learning_rate": 4.610517898912487e-05,
      "loss": 0.9216,
      "step": 145610
    },
    {
      "epoch": 0.5103617954136832,
      "grad_norm": 3.046875,
      "learning_rate": 4.6104529960461174e-05,
      "loss": 0.9159,
      "step": 145620
    },
    {
      "epoch": 0.5103968429205789,
      "grad_norm": 2.9375,
      "learning_rate": 4.6103880931797475e-05,
      "loss": 0.9534,
      "step": 145630
    },
    {
      "epoch": 0.5104318904274744,
      "grad_norm": 3.1875,
      "learning_rate": 4.610323190313377e-05,
      "loss": 0.9996,
      "step": 145640
    },
    {
      "epoch": 0.51046693793437,
      "grad_norm": 2.84375,
      "learning_rate": 4.610258287447007e-05,
      "loss": 0.9925,
      "step": 145650
    },
    {
      "epoch": 0.5105019854412657,
      "grad_norm": 3.28125,
      "learning_rate": 4.6101933845806366e-05,
      "loss": 0.9095,
      "step": 145660
    },
    {
      "epoch": 0.5105370329481612,
      "grad_norm": 3.171875,
      "learning_rate": 4.610128481714267e-05,
      "loss": 0.9523,
      "step": 145670
    },
    {
      "epoch": 0.5105720804550569,
      "grad_norm": 3.0,
      "learning_rate": 4.610063578847896e-05,
      "loss": 0.9512,
      "step": 145680
    },
    {
      "epoch": 0.5106071279619524,
      "grad_norm": 3.390625,
      "learning_rate": 4.609998675981526e-05,
      "loss": 0.9547,
      "step": 145690
    },
    {
      "epoch": 0.510642175468848,
      "grad_norm": 2.796875,
      "learning_rate": 4.609933773115156e-05,
      "loss": 0.8536,
      "step": 145700
    },
    {
      "epoch": 0.5106772229757436,
      "grad_norm": 2.890625,
      "learning_rate": 4.609868870248786e-05,
      "loss": 0.9478,
      "step": 145710
    },
    {
      "epoch": 0.5107122704826392,
      "grad_norm": 3.234375,
      "learning_rate": 4.6098039673824154e-05,
      "loss": 0.9513,
      "step": 145720
    },
    {
      "epoch": 0.5107473179895348,
      "grad_norm": 3.171875,
      "learning_rate": 4.6097390645160455e-05,
      "loss": 0.9348,
      "step": 145730
    },
    {
      "epoch": 0.5107823654964304,
      "grad_norm": 2.765625,
      "learning_rate": 4.609674161649675e-05,
      "loss": 0.927,
      "step": 145740
    },
    {
      "epoch": 0.5108174130033261,
      "grad_norm": 2.625,
      "learning_rate": 4.609609258783305e-05,
      "loss": 0.9232,
      "step": 145750
    },
    {
      "epoch": 0.5108524605102216,
      "grad_norm": 3.203125,
      "learning_rate": 4.6095443559169346e-05,
      "loss": 0.8411,
      "step": 145760
    },
    {
      "epoch": 0.5108875080171172,
      "grad_norm": 3.171875,
      "learning_rate": 4.609479453050565e-05,
      "loss": 0.9508,
      "step": 145770
    },
    {
      "epoch": 0.5109225555240128,
      "grad_norm": 3.25,
      "learning_rate": 4.609414550184195e-05,
      "loss": 0.9717,
      "step": 145780
    },
    {
      "epoch": 0.5109576030309084,
      "grad_norm": 3.390625,
      "learning_rate": 4.609349647317824e-05,
      "loss": 1.0359,
      "step": 145790
    },
    {
      "epoch": 0.510992650537804,
      "grad_norm": 2.859375,
      "learning_rate": 4.6092847444514545e-05,
      "loss": 0.8426,
      "step": 145800
    },
    {
      "epoch": 0.5110276980446996,
      "grad_norm": 3.078125,
      "learning_rate": 4.609219841585084e-05,
      "loss": 1.0003,
      "step": 145810
    },
    {
      "epoch": 0.5110627455515951,
      "grad_norm": 3.25,
      "learning_rate": 4.609154938718714e-05,
      "loss": 0.8955,
      "step": 145820
    },
    {
      "epoch": 0.5110977930584908,
      "grad_norm": 2.78125,
      "learning_rate": 4.6090900358523435e-05,
      "loss": 0.8687,
      "step": 145830
    },
    {
      "epoch": 0.5111328405653863,
      "grad_norm": 2.859375,
      "learning_rate": 4.6090251329859737e-05,
      "loss": 0.9248,
      "step": 145840
    },
    {
      "epoch": 0.511167888072282,
      "grad_norm": 3.28125,
      "learning_rate": 4.608960230119603e-05,
      "loss": 0.9133,
      "step": 145850
    },
    {
      "epoch": 0.5112029355791776,
      "grad_norm": 2.765625,
      "learning_rate": 4.608895327253233e-05,
      "loss": 0.8884,
      "step": 145860
    },
    {
      "epoch": 0.5112379830860732,
      "grad_norm": 3.140625,
      "learning_rate": 4.608830424386863e-05,
      "loss": 1.0149,
      "step": 145870
    },
    {
      "epoch": 0.5112730305929688,
      "grad_norm": 2.734375,
      "learning_rate": 4.608765521520493e-05,
      "loss": 0.8634,
      "step": 145880
    },
    {
      "epoch": 0.5113080780998643,
      "grad_norm": 3.0,
      "learning_rate": 4.608700618654122e-05,
      "loss": 0.9038,
      "step": 145890
    },
    {
      "epoch": 0.51134312560676,
      "grad_norm": 2.75,
      "learning_rate": 4.6086357157877525e-05,
      "loss": 0.8728,
      "step": 145900
    },
    {
      "epoch": 0.5113781731136555,
      "grad_norm": 2.765625,
      "learning_rate": 4.6085708129213826e-05,
      "loss": 0.8717,
      "step": 145910
    },
    {
      "epoch": 0.5114132206205512,
      "grad_norm": 3.09375,
      "learning_rate": 4.608505910055012e-05,
      "loss": 0.9311,
      "step": 145920
    },
    {
      "epoch": 0.5114482681274467,
      "grad_norm": 2.828125,
      "learning_rate": 4.608441007188642e-05,
      "loss": 0.974,
      "step": 145930
    },
    {
      "epoch": 0.5114833156343424,
      "grad_norm": 3.046875,
      "learning_rate": 4.6083761043222717e-05,
      "loss": 0.9242,
      "step": 145940
    },
    {
      "epoch": 0.511518363141238,
      "grad_norm": 2.8125,
      "learning_rate": 4.608311201455902e-05,
      "loss": 0.9009,
      "step": 145950
    },
    {
      "epoch": 0.5115534106481335,
      "grad_norm": 2.65625,
      "learning_rate": 4.608246298589531e-05,
      "loss": 0.9962,
      "step": 145960
    },
    {
      "epoch": 0.5115884581550292,
      "grad_norm": 2.796875,
      "learning_rate": 4.6081813957231614e-05,
      "loss": 0.9185,
      "step": 145970
    },
    {
      "epoch": 0.5116235056619247,
      "grad_norm": 2.71875,
      "learning_rate": 4.60811649285679e-05,
      "loss": 0.9995,
      "step": 145980
    },
    {
      "epoch": 0.5116585531688204,
      "grad_norm": 2.859375,
      "learning_rate": 4.60805158999042e-05,
      "loss": 0.9875,
      "step": 145990
    },
    {
      "epoch": 0.5116936006757159,
      "grad_norm": 2.515625,
      "learning_rate": 4.6079866871240505e-05,
      "loss": 0.9281,
      "step": 146000
    },
    {
      "epoch": 0.5117286481826115,
      "grad_norm": 3.109375,
      "learning_rate": 4.60792178425768e-05,
      "loss": 0.8353,
      "step": 146010
    },
    {
      "epoch": 0.5117636956895071,
      "grad_norm": 3.0625,
      "learning_rate": 4.60785688139131e-05,
      "loss": 0.9574,
      "step": 146020
    },
    {
      "epoch": 0.5117987431964027,
      "grad_norm": 3.15625,
      "learning_rate": 4.6077919785249395e-05,
      "loss": 0.9296,
      "step": 146030
    },
    {
      "epoch": 0.5118337907032984,
      "grad_norm": 2.75,
      "learning_rate": 4.6077270756585697e-05,
      "loss": 0.8709,
      "step": 146040
    },
    {
      "epoch": 0.5118688382101939,
      "grad_norm": 3.28125,
      "learning_rate": 4.607662172792199e-05,
      "loss": 0.9292,
      "step": 146050
    },
    {
      "epoch": 0.5119038857170896,
      "grad_norm": 2.9375,
      "learning_rate": 4.607597269925829e-05,
      "loss": 0.8493,
      "step": 146060
    },
    {
      "epoch": 0.5119389332239851,
      "grad_norm": 2.140625,
      "learning_rate": 4.607532367059459e-05,
      "loss": 0.9292,
      "step": 146070
    },
    {
      "epoch": 0.5119739807308807,
      "grad_norm": 2.5,
      "learning_rate": 4.607467464193089e-05,
      "loss": 0.907,
      "step": 146080
    },
    {
      "epoch": 0.5120090282377763,
      "grad_norm": 2.8125,
      "learning_rate": 4.607402561326718e-05,
      "loss": 1.0048,
      "step": 146090
    },
    {
      "epoch": 0.5120440757446719,
      "grad_norm": 3.125,
      "learning_rate": 4.6073376584603485e-05,
      "loss": 0.8849,
      "step": 146100
    },
    {
      "epoch": 0.5120791232515675,
      "grad_norm": 3.3125,
      "learning_rate": 4.607272755593978e-05,
      "loss": 0.9341,
      "step": 146110
    },
    {
      "epoch": 0.5121141707584631,
      "grad_norm": 3.109375,
      "learning_rate": 4.607207852727608e-05,
      "loss": 0.9993,
      "step": 146120
    },
    {
      "epoch": 0.5121492182653586,
      "grad_norm": 2.890625,
      "learning_rate": 4.6071429498612375e-05,
      "loss": 0.9777,
      "step": 146130
    },
    {
      "epoch": 0.5121842657722543,
      "grad_norm": 2.90625,
      "learning_rate": 4.6070780469948677e-05,
      "loss": 0.9765,
      "step": 146140
    },
    {
      "epoch": 0.5122193132791499,
      "grad_norm": 3.140625,
      "learning_rate": 4.607013144128498e-05,
      "loss": 0.8894,
      "step": 146150
    },
    {
      "epoch": 0.5122543607860455,
      "grad_norm": 3.046875,
      "learning_rate": 4.606948241262127e-05,
      "loss": 0.9226,
      "step": 146160
    },
    {
      "epoch": 0.5122894082929411,
      "grad_norm": 3.21875,
      "learning_rate": 4.6068833383957574e-05,
      "loss": 0.9638,
      "step": 146170
    },
    {
      "epoch": 0.5123244557998367,
      "grad_norm": 3.296875,
      "learning_rate": 4.606818435529387e-05,
      "loss": 1.0423,
      "step": 146180
    },
    {
      "epoch": 0.5123595033067323,
      "grad_norm": 3.078125,
      "learning_rate": 4.606753532663017e-05,
      "loss": 1.0123,
      "step": 146190
    },
    {
      "epoch": 0.5123945508136278,
      "grad_norm": 2.921875,
      "learning_rate": 4.6066886297966465e-05,
      "loss": 0.8831,
      "step": 146200
    },
    {
      "epoch": 0.5124295983205235,
      "grad_norm": 3.203125,
      "learning_rate": 4.6066237269302766e-05,
      "loss": 0.9506,
      "step": 146210
    },
    {
      "epoch": 0.512464645827419,
      "grad_norm": 3.1875,
      "learning_rate": 4.606558824063906e-05,
      "loss": 0.9431,
      "step": 146220
    },
    {
      "epoch": 0.5124996933343147,
      "grad_norm": 3.140625,
      "learning_rate": 4.606493921197536e-05,
      "loss": 0.9321,
      "step": 146230
    },
    {
      "epoch": 0.5125347408412103,
      "grad_norm": 2.78125,
      "learning_rate": 4.6064290183311657e-05,
      "loss": 0.8414,
      "step": 146240
    },
    {
      "epoch": 0.5125697883481058,
      "grad_norm": 3.0625,
      "learning_rate": 4.606364115464796e-05,
      "loss": 0.9816,
      "step": 146250
    },
    {
      "epoch": 0.5126048358550015,
      "grad_norm": 3.015625,
      "learning_rate": 4.606299212598425e-05,
      "loss": 0.8573,
      "step": 146260
    },
    {
      "epoch": 0.512639883361897,
      "grad_norm": 3.359375,
      "learning_rate": 4.6062343097320554e-05,
      "loss": 0.9347,
      "step": 146270
    },
    {
      "epoch": 0.5126749308687927,
      "grad_norm": 2.875,
      "learning_rate": 4.6061694068656855e-05,
      "loss": 0.9602,
      "step": 146280
    },
    {
      "epoch": 0.5127099783756882,
      "grad_norm": 2.65625,
      "learning_rate": 4.606104503999315e-05,
      "loss": 0.8637,
      "step": 146290
    },
    {
      "epoch": 0.5127450258825839,
      "grad_norm": 2.984375,
      "learning_rate": 4.606039601132945e-05,
      "loss": 0.9703,
      "step": 146300
    },
    {
      "epoch": 0.5127800733894794,
      "grad_norm": 2.796875,
      "learning_rate": 4.6059746982665746e-05,
      "loss": 0.9585,
      "step": 146310
    },
    {
      "epoch": 0.512815120896375,
      "grad_norm": 2.65625,
      "learning_rate": 4.605909795400205e-05,
      "loss": 0.9383,
      "step": 146320
    },
    {
      "epoch": 0.5128501684032706,
      "grad_norm": 3.1875,
      "learning_rate": 4.605844892533834e-05,
      "loss": 1.0307,
      "step": 146330
    },
    {
      "epoch": 0.5128852159101662,
      "grad_norm": 3.5,
      "learning_rate": 4.605779989667464e-05,
      "loss": 0.9543,
      "step": 146340
    },
    {
      "epoch": 0.5129202634170619,
      "grad_norm": 2.859375,
      "learning_rate": 4.605715086801094e-05,
      "loss": 0.8212,
      "step": 146350
    },
    {
      "epoch": 0.5129553109239574,
      "grad_norm": 3.375,
      "learning_rate": 4.605650183934723e-05,
      "loss": 0.9929,
      "step": 146360
    },
    {
      "epoch": 0.512990358430853,
      "grad_norm": 3.296875,
      "learning_rate": 4.6055852810683534e-05,
      "loss": 0.9157,
      "step": 146370
    },
    {
      "epoch": 0.5130254059377486,
      "grad_norm": 3.09375,
      "learning_rate": 4.605520378201983e-05,
      "loss": 1.0484,
      "step": 146380
    },
    {
      "epoch": 0.5130604534446442,
      "grad_norm": 3.453125,
      "learning_rate": 4.605455475335613e-05,
      "loss": 0.9147,
      "step": 146390
    },
    {
      "epoch": 0.5130955009515398,
      "grad_norm": 2.921875,
      "learning_rate": 4.6053905724692425e-05,
      "loss": 0.9595,
      "step": 146400
    },
    {
      "epoch": 0.5131305484584354,
      "grad_norm": 3.21875,
      "learning_rate": 4.6053256696028726e-05,
      "loss": 0.8901,
      "step": 146410
    },
    {
      "epoch": 0.513165595965331,
      "grad_norm": 2.703125,
      "learning_rate": 4.605260766736502e-05,
      "loss": 0.8869,
      "step": 146420
    },
    {
      "epoch": 0.5132006434722266,
      "grad_norm": 3.15625,
      "learning_rate": 4.605195863870132e-05,
      "loss": 0.9098,
      "step": 146430
    },
    {
      "epoch": 0.5132356909791222,
      "grad_norm": 3.40625,
      "learning_rate": 4.6051309610037617e-05,
      "loss": 0.9518,
      "step": 146440
    },
    {
      "epoch": 0.5132707384860178,
      "grad_norm": 3.125,
      "learning_rate": 4.605066058137392e-05,
      "loss": 0.888,
      "step": 146450
    },
    {
      "epoch": 0.5133057859929134,
      "grad_norm": 2.671875,
      "learning_rate": 4.605001155271021e-05,
      "loss": 0.885,
      "step": 146460
    },
    {
      "epoch": 0.513340833499809,
      "grad_norm": 2.921875,
      "learning_rate": 4.6049362524046514e-05,
      "loss": 0.952,
      "step": 146470
    },
    {
      "epoch": 0.5133758810067046,
      "grad_norm": 3.0,
      "learning_rate": 4.604871349538281e-05,
      "loss": 0.9435,
      "step": 146480
    },
    {
      "epoch": 0.5134109285136002,
      "grad_norm": 3.125,
      "learning_rate": 4.604806446671911e-05,
      "loss": 0.9625,
      "step": 146490
    },
    {
      "epoch": 0.5134459760204958,
      "grad_norm": 2.890625,
      "learning_rate": 4.604741543805541e-05,
      "loss": 0.9338,
      "step": 146500
    },
    {
      "epoch": 0.5134810235273913,
      "grad_norm": 2.78125,
      "learning_rate": 4.6046766409391706e-05,
      "loss": 0.9132,
      "step": 146510
    },
    {
      "epoch": 0.513516071034287,
      "grad_norm": 2.96875,
      "learning_rate": 4.604611738072801e-05,
      "loss": 0.9198,
      "step": 146520
    },
    {
      "epoch": 0.5135511185411826,
      "grad_norm": 3.484375,
      "learning_rate": 4.60454683520643e-05,
      "loss": 0.9979,
      "step": 146530
    },
    {
      "epoch": 0.5135861660480782,
      "grad_norm": 3.03125,
      "learning_rate": 4.60448193234006e-05,
      "loss": 0.9718,
      "step": 146540
    },
    {
      "epoch": 0.5136212135549738,
      "grad_norm": 3.140625,
      "learning_rate": 4.60441702947369e-05,
      "loss": 0.9321,
      "step": 146550
    },
    {
      "epoch": 0.5136562610618693,
      "grad_norm": 3.15625,
      "learning_rate": 4.60435212660732e-05,
      "loss": 0.9537,
      "step": 146560
    },
    {
      "epoch": 0.513691308568765,
      "grad_norm": 2.734375,
      "learning_rate": 4.6042872237409494e-05,
      "loss": 0.9327,
      "step": 146570
    },
    {
      "epoch": 0.5137263560756605,
      "grad_norm": 3.1875,
      "learning_rate": 4.6042223208745795e-05,
      "loss": 1.0839,
      "step": 146580
    },
    {
      "epoch": 0.5137614035825562,
      "grad_norm": 2.796875,
      "learning_rate": 4.604157418008209e-05,
      "loss": 0.9205,
      "step": 146590
    },
    {
      "epoch": 0.5137964510894517,
      "grad_norm": 3.734375,
      "learning_rate": 4.604092515141839e-05,
      "loss": 0.9132,
      "step": 146600
    },
    {
      "epoch": 0.5138314985963474,
      "grad_norm": 3.734375,
      "learning_rate": 4.6040276122754686e-05,
      "loss": 0.9371,
      "step": 146610
    },
    {
      "epoch": 0.5138665461032429,
      "grad_norm": 3.203125,
      "learning_rate": 4.603962709409099e-05,
      "loss": 0.8716,
      "step": 146620
    },
    {
      "epoch": 0.5139015936101385,
      "grad_norm": 2.984375,
      "learning_rate": 4.603897806542728e-05,
      "loss": 0.916,
      "step": 146630
    },
    {
      "epoch": 0.5139366411170342,
      "grad_norm": 3.15625,
      "learning_rate": 4.603832903676358e-05,
      "loss": 0.9131,
      "step": 146640
    },
    {
      "epoch": 0.5139716886239297,
      "grad_norm": 2.96875,
      "learning_rate": 4.6037680008099885e-05,
      "loss": 0.8696,
      "step": 146650
    },
    {
      "epoch": 0.5140067361308254,
      "grad_norm": 2.9375,
      "learning_rate": 4.603703097943618e-05,
      "loss": 0.9485,
      "step": 146660
    },
    {
      "epoch": 0.5140417836377209,
      "grad_norm": 3.265625,
      "learning_rate": 4.603638195077248e-05,
      "loss": 0.8949,
      "step": 146670
    },
    {
      "epoch": 0.5140768311446166,
      "grad_norm": 3.015625,
      "learning_rate": 4.6035732922108775e-05,
      "loss": 0.9346,
      "step": 146680
    },
    {
      "epoch": 0.5141118786515121,
      "grad_norm": 2.921875,
      "learning_rate": 4.603508389344508e-05,
      "loss": 0.9088,
      "step": 146690
    },
    {
      "epoch": 0.5141469261584077,
      "grad_norm": 3.28125,
      "learning_rate": 4.603443486478137e-05,
      "loss": 0.922,
      "step": 146700
    },
    {
      "epoch": 0.5141819736653033,
      "grad_norm": 3.078125,
      "learning_rate": 4.603378583611767e-05,
      "loss": 0.8906,
      "step": 146710
    },
    {
      "epoch": 0.5142170211721989,
      "grad_norm": 2.46875,
      "learning_rate": 4.603313680745397e-05,
      "loss": 0.8679,
      "step": 146720
    },
    {
      "epoch": 0.5142520686790946,
      "grad_norm": 3.15625,
      "learning_rate": 4.603248777879026e-05,
      "loss": 0.9347,
      "step": 146730
    },
    {
      "epoch": 0.5142871161859901,
      "grad_norm": 3.09375,
      "learning_rate": 4.603183875012656e-05,
      "loss": 0.9269,
      "step": 146740
    },
    {
      "epoch": 0.5143221636928857,
      "grad_norm": 3.234375,
      "learning_rate": 4.603118972146286e-05,
      "loss": 0.906,
      "step": 146750
    },
    {
      "epoch": 0.5143572111997813,
      "grad_norm": 3.28125,
      "learning_rate": 4.603054069279916e-05,
      "loss": 1.0328,
      "step": 146760
    },
    {
      "epoch": 0.5143922587066769,
      "grad_norm": 3.3125,
      "learning_rate": 4.6029891664135454e-05,
      "loss": 0.9463,
      "step": 146770
    },
    {
      "epoch": 0.5144273062135725,
      "grad_norm": 2.796875,
      "learning_rate": 4.6029242635471755e-05,
      "loss": 0.9556,
      "step": 146780
    },
    {
      "epoch": 0.5144623537204681,
      "grad_norm": 2.875,
      "learning_rate": 4.602859360680805e-05,
      "loss": 0.9681,
      "step": 146790
    },
    {
      "epoch": 0.5144974012273636,
      "grad_norm": 2.75,
      "learning_rate": 4.602794457814435e-05,
      "loss": 0.8779,
      "step": 146800
    },
    {
      "epoch": 0.5145324487342593,
      "grad_norm": 3.234375,
      "learning_rate": 4.6027295549480646e-05,
      "loss": 0.8881,
      "step": 146810
    },
    {
      "epoch": 0.5145674962411548,
      "grad_norm": 2.515625,
      "learning_rate": 4.602664652081695e-05,
      "loss": 0.9619,
      "step": 146820
    },
    {
      "epoch": 0.5146025437480505,
      "grad_norm": 3.203125,
      "learning_rate": 4.602599749215324e-05,
      "loss": 0.9164,
      "step": 146830
    },
    {
      "epoch": 0.5146375912549461,
      "grad_norm": 3.09375,
      "learning_rate": 4.602534846348954e-05,
      "loss": 0.8842,
      "step": 146840
    },
    {
      "epoch": 0.5146726387618417,
      "grad_norm": 3.296875,
      "learning_rate": 4.602469943482584e-05,
      "loss": 0.9694,
      "step": 146850
    },
    {
      "epoch": 0.5147076862687373,
      "grad_norm": 3.046875,
      "learning_rate": 4.602405040616214e-05,
      "loss": 0.9384,
      "step": 146860
    },
    {
      "epoch": 0.5147427337756328,
      "grad_norm": 2.734375,
      "learning_rate": 4.602340137749844e-05,
      "loss": 0.9723,
      "step": 146870
    },
    {
      "epoch": 0.5147777812825285,
      "grad_norm": 3.078125,
      "learning_rate": 4.6022752348834735e-05,
      "loss": 0.9176,
      "step": 146880
    },
    {
      "epoch": 0.514812828789424,
      "grad_norm": 2.84375,
      "learning_rate": 4.602210332017104e-05,
      "loss": 0.9679,
      "step": 146890
    },
    {
      "epoch": 0.5148478762963197,
      "grad_norm": 2.875,
      "learning_rate": 4.602145429150733e-05,
      "loss": 0.9208,
      "step": 146900
    },
    {
      "epoch": 0.5148829238032152,
      "grad_norm": 2.921875,
      "learning_rate": 4.602080526284363e-05,
      "loss": 1.0096,
      "step": 146910
    },
    {
      "epoch": 0.5149179713101109,
      "grad_norm": 2.6875,
      "learning_rate": 4.602015623417993e-05,
      "loss": 0.885,
      "step": 146920
    },
    {
      "epoch": 0.5149530188170065,
      "grad_norm": 3.125,
      "learning_rate": 4.601950720551623e-05,
      "loss": 0.8839,
      "step": 146930
    },
    {
      "epoch": 0.514988066323902,
      "grad_norm": 3.4375,
      "learning_rate": 4.601885817685252e-05,
      "loss": 0.9177,
      "step": 146940
    },
    {
      "epoch": 0.5150231138307977,
      "grad_norm": 3.046875,
      "learning_rate": 4.6018209148188825e-05,
      "loss": 0.8741,
      "step": 146950
    },
    {
      "epoch": 0.5150581613376932,
      "grad_norm": 3.078125,
      "learning_rate": 4.601756011952512e-05,
      "loss": 0.9612,
      "step": 146960
    },
    {
      "epoch": 0.5150932088445889,
      "grad_norm": 2.9375,
      "learning_rate": 4.601691109086142e-05,
      "loss": 0.918,
      "step": 146970
    },
    {
      "epoch": 0.5151282563514844,
      "grad_norm": 3.21875,
      "learning_rate": 4.6016262062197715e-05,
      "loss": 0.977,
      "step": 146980
    },
    {
      "epoch": 0.51516330385838,
      "grad_norm": 2.6875,
      "learning_rate": 4.601561303353402e-05,
      "loss": 0.9653,
      "step": 146990
    },
    {
      "epoch": 0.5151983513652756,
      "grad_norm": 2.546875,
      "learning_rate": 4.601496400487031e-05,
      "loss": 0.8617,
      "step": 147000
    },
    {
      "epoch": 0.5152333988721712,
      "grad_norm": 3.015625,
      "learning_rate": 4.601431497620661e-05,
      "loss": 0.9153,
      "step": 147010
    },
    {
      "epoch": 0.5152684463790669,
      "grad_norm": 3.046875,
      "learning_rate": 4.6013665947542914e-05,
      "loss": 0.9496,
      "step": 147020
    },
    {
      "epoch": 0.5153034938859624,
      "grad_norm": 3.328125,
      "learning_rate": 4.601301691887921e-05,
      "loss": 0.8657,
      "step": 147030
    },
    {
      "epoch": 0.5153385413928581,
      "grad_norm": 2.875,
      "learning_rate": 4.601236789021551e-05,
      "loss": 0.8971,
      "step": 147040
    },
    {
      "epoch": 0.5153735888997536,
      "grad_norm": 2.96875,
      "learning_rate": 4.6011718861551805e-05,
      "loss": 0.8651,
      "step": 147050
    },
    {
      "epoch": 0.5154086364066492,
      "grad_norm": 2.921875,
      "learning_rate": 4.6011069832888106e-05,
      "loss": 0.8908,
      "step": 147060
    },
    {
      "epoch": 0.5154436839135448,
      "grad_norm": 2.609375,
      "learning_rate": 4.60104208042244e-05,
      "loss": 0.9414,
      "step": 147070
    },
    {
      "epoch": 0.5154787314204404,
      "grad_norm": 2.734375,
      "learning_rate": 4.60097717755607e-05,
      "loss": 0.9689,
      "step": 147080
    },
    {
      "epoch": 0.515513778927336,
      "grad_norm": 2.953125,
      "learning_rate": 4.6009122746897e-05,
      "loss": 0.8924,
      "step": 147090
    },
    {
      "epoch": 0.5155488264342316,
      "grad_norm": 2.609375,
      "learning_rate": 4.60084737182333e-05,
      "loss": 0.9335,
      "step": 147100
    },
    {
      "epoch": 0.5155838739411271,
      "grad_norm": 3.09375,
      "learning_rate": 4.600782468956959e-05,
      "loss": 0.9164,
      "step": 147110
    },
    {
      "epoch": 0.5156189214480228,
      "grad_norm": 3.234375,
      "learning_rate": 4.600717566090589e-05,
      "loss": 0.8955,
      "step": 147120
    },
    {
      "epoch": 0.5156539689549184,
      "grad_norm": 3.890625,
      "learning_rate": 4.600652663224219e-05,
      "loss": 0.954,
      "step": 147130
    },
    {
      "epoch": 0.515689016461814,
      "grad_norm": 2.984375,
      "learning_rate": 4.600587760357848e-05,
      "loss": 0.9011,
      "step": 147140
    },
    {
      "epoch": 0.5157240639687096,
      "grad_norm": 2.921875,
      "learning_rate": 4.6005228574914785e-05,
      "loss": 0.8902,
      "step": 147150
    },
    {
      "epoch": 0.5157591114756052,
      "grad_norm": 3.0,
      "learning_rate": 4.600457954625108e-05,
      "loss": 0.9554,
      "step": 147160
    },
    {
      "epoch": 0.5157941589825008,
      "grad_norm": 2.9375,
      "learning_rate": 4.600393051758738e-05,
      "loss": 0.9673,
      "step": 147170
    },
    {
      "epoch": 0.5158292064893963,
      "grad_norm": 2.796875,
      "learning_rate": 4.6003281488923675e-05,
      "loss": 0.8429,
      "step": 147180
    },
    {
      "epoch": 0.515864253996292,
      "grad_norm": 2.65625,
      "learning_rate": 4.600263246025998e-05,
      "loss": 0.8762,
      "step": 147190
    },
    {
      "epoch": 0.5158993015031875,
      "grad_norm": 3.015625,
      "learning_rate": 4.600198343159627e-05,
      "loss": 0.9192,
      "step": 147200
    },
    {
      "epoch": 0.5159343490100832,
      "grad_norm": 2.96875,
      "learning_rate": 4.600133440293257e-05,
      "loss": 0.9117,
      "step": 147210
    },
    {
      "epoch": 0.5159693965169788,
      "grad_norm": 3.09375,
      "learning_rate": 4.600068537426887e-05,
      "loss": 0.9531,
      "step": 147220
    },
    {
      "epoch": 0.5160044440238744,
      "grad_norm": 3.03125,
      "learning_rate": 4.600003634560517e-05,
      "loss": 0.9543,
      "step": 147230
    },
    {
      "epoch": 0.51603949153077,
      "grad_norm": 2.609375,
      "learning_rate": 4.599938731694147e-05,
      "loss": 0.8579,
      "step": 147240
    },
    {
      "epoch": 0.5160745390376655,
      "grad_norm": 3.140625,
      "learning_rate": 4.5998738288277765e-05,
      "loss": 1.0181,
      "step": 147250
    },
    {
      "epoch": 0.5161095865445612,
      "grad_norm": 3.40625,
      "learning_rate": 4.5998089259614066e-05,
      "loss": 0.9285,
      "step": 147260
    },
    {
      "epoch": 0.5161446340514567,
      "grad_norm": 3.21875,
      "learning_rate": 4.599744023095036e-05,
      "loss": 0.9783,
      "step": 147270
    },
    {
      "epoch": 0.5161796815583524,
      "grad_norm": 3.03125,
      "learning_rate": 4.599679120228666e-05,
      "loss": 0.9606,
      "step": 147280
    },
    {
      "epoch": 0.5162147290652479,
      "grad_norm": 2.65625,
      "learning_rate": 4.599614217362296e-05,
      "loss": 0.893,
      "step": 147290
    },
    {
      "epoch": 0.5162497765721435,
      "grad_norm": 3.15625,
      "learning_rate": 4.599549314495926e-05,
      "loss": 0.9116,
      "step": 147300
    },
    {
      "epoch": 0.5162848240790391,
      "grad_norm": 3.09375,
      "learning_rate": 4.599484411629555e-05,
      "loss": 0.9461,
      "step": 147310
    },
    {
      "epoch": 0.5163198715859347,
      "grad_norm": 2.9375,
      "learning_rate": 4.5994195087631854e-05,
      "loss": 1.0079,
      "step": 147320
    },
    {
      "epoch": 0.5163549190928304,
      "grad_norm": 2.890625,
      "learning_rate": 4.599354605896815e-05,
      "loss": 0.9773,
      "step": 147330
    },
    {
      "epoch": 0.5163899665997259,
      "grad_norm": 2.875,
      "learning_rate": 4.599289703030445e-05,
      "loss": 0.8206,
      "step": 147340
    },
    {
      "epoch": 0.5164250141066216,
      "grad_norm": 2.78125,
      "learning_rate": 4.5992248001640745e-05,
      "loss": 0.9678,
      "step": 147350
    },
    {
      "epoch": 0.5164600616135171,
      "grad_norm": 3.375,
      "learning_rate": 4.5991598972977046e-05,
      "loss": 1.0075,
      "step": 147360
    },
    {
      "epoch": 0.5164951091204127,
      "grad_norm": 3.0625,
      "learning_rate": 4.599094994431334e-05,
      "loss": 0.9207,
      "step": 147370
    },
    {
      "epoch": 0.5165301566273083,
      "grad_norm": 2.75,
      "learning_rate": 4.599030091564964e-05,
      "loss": 0.9078,
      "step": 147380
    },
    {
      "epoch": 0.5165652041342039,
      "grad_norm": 3.046875,
      "learning_rate": 4.5989651886985943e-05,
      "loss": 0.9503,
      "step": 147390
    },
    {
      "epoch": 0.5166002516410995,
      "grad_norm": 2.671875,
      "learning_rate": 4.598900285832224e-05,
      "loss": 1.0204,
      "step": 147400
    },
    {
      "epoch": 0.5166352991479951,
      "grad_norm": 2.65625,
      "learning_rate": 4.598835382965854e-05,
      "loss": 0.9568,
      "step": 147410
    },
    {
      "epoch": 0.5166703466548908,
      "grad_norm": 3.453125,
      "learning_rate": 4.5987704800994834e-05,
      "loss": 0.9651,
      "step": 147420
    },
    {
      "epoch": 0.5167053941617863,
      "grad_norm": 3.28125,
      "learning_rate": 4.5987055772331135e-05,
      "loss": 0.8789,
      "step": 147430
    },
    {
      "epoch": 0.5167404416686819,
      "grad_norm": 3.28125,
      "learning_rate": 4.598640674366743e-05,
      "loss": 1.0144,
      "step": 147440
    },
    {
      "epoch": 0.5167754891755775,
      "grad_norm": 2.9375,
      "learning_rate": 4.598575771500373e-05,
      "loss": 0.9547,
      "step": 147450
    },
    {
      "epoch": 0.5168105366824731,
      "grad_norm": 2.71875,
      "learning_rate": 4.5985108686340026e-05,
      "loss": 0.935,
      "step": 147460
    },
    {
      "epoch": 0.5168455841893687,
      "grad_norm": 3.046875,
      "learning_rate": 4.598445965767633e-05,
      "loss": 0.9299,
      "step": 147470
    },
    {
      "epoch": 0.5168806316962643,
      "grad_norm": 3.3125,
      "learning_rate": 4.598381062901262e-05,
      "loss": 0.9922,
      "step": 147480
    },
    {
      "epoch": 0.5169156792031598,
      "grad_norm": 2.515625,
      "learning_rate": 4.598316160034892e-05,
      "loss": 0.9151,
      "step": 147490
    },
    {
      "epoch": 0.5169507267100555,
      "grad_norm": 3.578125,
      "learning_rate": 4.598251257168522e-05,
      "loss": 1.0925,
      "step": 147500
    },
    {
      "epoch": 0.516985774216951,
      "grad_norm": 3.234375,
      "learning_rate": 4.598186354302151e-05,
      "loss": 0.8846,
      "step": 147510
    },
    {
      "epoch": 0.5170208217238467,
      "grad_norm": 3.015625,
      "learning_rate": 4.5981214514357814e-05,
      "loss": 0.964,
      "step": 147520
    },
    {
      "epoch": 0.5170558692307423,
      "grad_norm": 2.921875,
      "learning_rate": 4.598056548569411e-05,
      "loss": 0.897,
      "step": 147530
    },
    {
      "epoch": 0.5170909167376379,
      "grad_norm": 3.28125,
      "learning_rate": 4.597991645703041e-05,
      "loss": 0.9533,
      "step": 147540
    },
    {
      "epoch": 0.5171259642445335,
      "grad_norm": 3.0,
      "learning_rate": 4.5979267428366705e-05,
      "loss": 0.9759,
      "step": 147550
    },
    {
      "epoch": 0.517161011751429,
      "grad_norm": 3.171875,
      "learning_rate": 4.5978618399703006e-05,
      "loss": 0.9225,
      "step": 147560
    },
    {
      "epoch": 0.5171960592583247,
      "grad_norm": 2.921875,
      "learning_rate": 4.59779693710393e-05,
      "loss": 0.9095,
      "step": 147570
    },
    {
      "epoch": 0.5172311067652202,
      "grad_norm": 2.859375,
      "learning_rate": 4.59773203423756e-05,
      "loss": 0.9805,
      "step": 147580
    },
    {
      "epoch": 0.5172661542721159,
      "grad_norm": 2.96875,
      "learning_rate": 4.59766713137119e-05,
      "loss": 0.8856,
      "step": 147590
    },
    {
      "epoch": 0.5173012017790114,
      "grad_norm": 2.796875,
      "learning_rate": 4.59760222850482e-05,
      "loss": 0.9701,
      "step": 147600
    },
    {
      "epoch": 0.517336249285907,
      "grad_norm": 3.046875,
      "learning_rate": 4.59753732563845e-05,
      "loss": 0.8734,
      "step": 147610
    },
    {
      "epoch": 0.5173712967928027,
      "grad_norm": 2.765625,
      "learning_rate": 4.5974724227720794e-05,
      "loss": 0.8973,
      "step": 147620
    },
    {
      "epoch": 0.5174063442996982,
      "grad_norm": 3.078125,
      "learning_rate": 4.5974075199057095e-05,
      "loss": 0.9751,
      "step": 147630
    },
    {
      "epoch": 0.5174413918065939,
      "grad_norm": 2.921875,
      "learning_rate": 4.597342617039339e-05,
      "loss": 0.9201,
      "step": 147640
    },
    {
      "epoch": 0.5174764393134894,
      "grad_norm": 3.078125,
      "learning_rate": 4.597277714172969e-05,
      "loss": 0.9679,
      "step": 147650
    },
    {
      "epoch": 0.5175114868203851,
      "grad_norm": 2.59375,
      "learning_rate": 4.5972128113065986e-05,
      "loss": 0.9027,
      "step": 147660
    },
    {
      "epoch": 0.5175465343272806,
      "grad_norm": 3.140625,
      "learning_rate": 4.597147908440229e-05,
      "loss": 0.9527,
      "step": 147670
    },
    {
      "epoch": 0.5175815818341762,
      "grad_norm": 3.25,
      "learning_rate": 4.597083005573858e-05,
      "loss": 0.9551,
      "step": 147680
    },
    {
      "epoch": 0.5176166293410718,
      "grad_norm": 2.828125,
      "learning_rate": 4.5970181027074883e-05,
      "loss": 0.9444,
      "step": 147690
    },
    {
      "epoch": 0.5176516768479674,
      "grad_norm": 3.46875,
      "learning_rate": 4.596953199841118e-05,
      "loss": 0.906,
      "step": 147700
    },
    {
      "epoch": 0.5176867243548631,
      "grad_norm": 3.15625,
      "learning_rate": 4.596888296974748e-05,
      "loss": 0.9663,
      "step": 147710
    },
    {
      "epoch": 0.5177217718617586,
      "grad_norm": 3.265625,
      "learning_rate": 4.5968233941083774e-05,
      "loss": 0.923,
      "step": 147720
    },
    {
      "epoch": 0.5177568193686543,
      "grad_norm": 2.71875,
      "learning_rate": 4.5967584912420075e-05,
      "loss": 0.9427,
      "step": 147730
    },
    {
      "epoch": 0.5177918668755498,
      "grad_norm": 3.328125,
      "learning_rate": 4.596693588375638e-05,
      "loss": 0.8873,
      "step": 147740
    },
    {
      "epoch": 0.5178269143824454,
      "grad_norm": 2.859375,
      "learning_rate": 4.596628685509267e-05,
      "loss": 0.9347,
      "step": 147750
    },
    {
      "epoch": 0.517861961889341,
      "grad_norm": 2.78125,
      "learning_rate": 4.596563782642897e-05,
      "loss": 0.9484,
      "step": 147760
    },
    {
      "epoch": 0.5178970093962366,
      "grad_norm": 3.1875,
      "learning_rate": 4.596498879776527e-05,
      "loss": 0.9602,
      "step": 147770
    },
    {
      "epoch": 0.5179320569031322,
      "grad_norm": 3.90625,
      "learning_rate": 4.596433976910157e-05,
      "loss": 0.8921,
      "step": 147780
    },
    {
      "epoch": 0.5179671044100278,
      "grad_norm": 2.984375,
      "learning_rate": 4.5963690740437863e-05,
      "loss": 0.9456,
      "step": 147790
    },
    {
      "epoch": 0.5180021519169233,
      "grad_norm": 3.3125,
      "learning_rate": 4.5963041711774165e-05,
      "loss": 0.913,
      "step": 147800
    },
    {
      "epoch": 0.518037199423819,
      "grad_norm": 3.140625,
      "learning_rate": 4.596239268311046e-05,
      "loss": 0.9665,
      "step": 147810
    },
    {
      "epoch": 0.5180722469307146,
      "grad_norm": 2.5625,
      "learning_rate": 4.596174365444676e-05,
      "loss": 0.9681,
      "step": 147820
    },
    {
      "epoch": 0.5181072944376102,
      "grad_norm": 3.15625,
      "learning_rate": 4.5961094625783055e-05,
      "loss": 0.8179,
      "step": 147830
    },
    {
      "epoch": 0.5181423419445058,
      "grad_norm": 3.125,
      "learning_rate": 4.596044559711936e-05,
      "loss": 0.9623,
      "step": 147840
    },
    {
      "epoch": 0.5181773894514013,
      "grad_norm": 2.96875,
      "learning_rate": 4.595979656845565e-05,
      "loss": 0.908,
      "step": 147850
    },
    {
      "epoch": 0.518212436958297,
      "grad_norm": 2.59375,
      "learning_rate": 4.5959147539791946e-05,
      "loss": 0.9609,
      "step": 147860
    },
    {
      "epoch": 0.5182474844651925,
      "grad_norm": 3.09375,
      "learning_rate": 4.595849851112825e-05,
      "loss": 0.8926,
      "step": 147870
    },
    {
      "epoch": 0.5182825319720882,
      "grad_norm": 3.046875,
      "learning_rate": 4.595784948246454e-05,
      "loss": 0.9758,
      "step": 147880
    },
    {
      "epoch": 0.5183175794789837,
      "grad_norm": 2.90625,
      "learning_rate": 4.5957200453800843e-05,
      "loss": 0.9157,
      "step": 147890
    },
    {
      "epoch": 0.5183526269858794,
      "grad_norm": 2.984375,
      "learning_rate": 4.595655142513714e-05,
      "loss": 0.9333,
      "step": 147900
    },
    {
      "epoch": 0.518387674492775,
      "grad_norm": 2.671875,
      "learning_rate": 4.595590239647344e-05,
      "loss": 0.8993,
      "step": 147910
    },
    {
      "epoch": 0.5184227219996705,
      "grad_norm": 3.734375,
      "learning_rate": 4.5955253367809734e-05,
      "loss": 0.873,
      "step": 147920
    },
    {
      "epoch": 0.5184577695065662,
      "grad_norm": 2.75,
      "learning_rate": 4.5954604339146035e-05,
      "loss": 0.9094,
      "step": 147930
    },
    {
      "epoch": 0.5184928170134617,
      "grad_norm": 2.984375,
      "learning_rate": 4.595395531048233e-05,
      "loss": 0.8924,
      "step": 147940
    },
    {
      "epoch": 0.5185278645203574,
      "grad_norm": 2.9375,
      "learning_rate": 4.595330628181863e-05,
      "loss": 0.899,
      "step": 147950
    },
    {
      "epoch": 0.5185629120272529,
      "grad_norm": 3.0625,
      "learning_rate": 4.5952657253154926e-05,
      "loss": 0.9008,
      "step": 147960
    },
    {
      "epoch": 0.5185979595341486,
      "grad_norm": 3.078125,
      "learning_rate": 4.595200822449123e-05,
      "loss": 0.9395,
      "step": 147970
    },
    {
      "epoch": 0.5186330070410441,
      "grad_norm": 3.328125,
      "learning_rate": 4.595135919582753e-05,
      "loss": 0.9069,
      "step": 147980
    },
    {
      "epoch": 0.5186680545479397,
      "grad_norm": 2.59375,
      "learning_rate": 4.5950710167163823e-05,
      "loss": 0.8751,
      "step": 147990
    },
    {
      "epoch": 0.5187031020548353,
      "grad_norm": 3.0625,
      "learning_rate": 4.5950061138500125e-05,
      "loss": 0.9821,
      "step": 148000
    },
    {
      "epoch": 0.5187381495617309,
      "grad_norm": 2.859375,
      "learning_rate": 4.594941210983642e-05,
      "loss": 0.8419,
      "step": 148010
    },
    {
      "epoch": 0.5187731970686266,
      "grad_norm": 3.015625,
      "learning_rate": 4.594876308117272e-05,
      "loss": 0.9692,
      "step": 148020
    },
    {
      "epoch": 0.5188082445755221,
      "grad_norm": 3.09375,
      "learning_rate": 4.5948114052509015e-05,
      "loss": 0.8853,
      "step": 148030
    },
    {
      "epoch": 0.5188432920824178,
      "grad_norm": 2.921875,
      "learning_rate": 4.594746502384532e-05,
      "loss": 1.0217,
      "step": 148040
    },
    {
      "epoch": 0.5188783395893133,
      "grad_norm": 3.140625,
      "learning_rate": 4.594681599518161e-05,
      "loss": 0.8978,
      "step": 148050
    },
    {
      "epoch": 0.5189133870962089,
      "grad_norm": 3.1875,
      "learning_rate": 4.594616696651791e-05,
      "loss": 0.8721,
      "step": 148060
    },
    {
      "epoch": 0.5189484346031045,
      "grad_norm": 3.1875,
      "learning_rate": 4.594551793785421e-05,
      "loss": 0.9021,
      "step": 148070
    },
    {
      "epoch": 0.5189834821100001,
      "grad_norm": 2.96875,
      "learning_rate": 4.594486890919051e-05,
      "loss": 0.9215,
      "step": 148080
    },
    {
      "epoch": 0.5190185296168957,
      "grad_norm": 2.953125,
      "learning_rate": 4.5944219880526803e-05,
      "loss": 0.997,
      "step": 148090
    },
    {
      "epoch": 0.5190535771237913,
      "grad_norm": 2.671875,
      "learning_rate": 4.5943570851863105e-05,
      "loss": 0.9296,
      "step": 148100
    },
    {
      "epoch": 0.519088624630687,
      "grad_norm": 2.59375,
      "learning_rate": 4.5942921823199406e-05,
      "loss": 0.9107,
      "step": 148110
    },
    {
      "epoch": 0.5191236721375825,
      "grad_norm": 2.875,
      "learning_rate": 4.59422727945357e-05,
      "loss": 0.9114,
      "step": 148120
    },
    {
      "epoch": 0.5191587196444781,
      "grad_norm": 2.9375,
      "learning_rate": 4.5941623765872e-05,
      "loss": 0.9743,
      "step": 148130
    },
    {
      "epoch": 0.5191937671513737,
      "grad_norm": 3.015625,
      "learning_rate": 4.59409747372083e-05,
      "loss": 0.9812,
      "step": 148140
    },
    {
      "epoch": 0.5192288146582693,
      "grad_norm": 2.828125,
      "learning_rate": 4.59403257085446e-05,
      "loss": 0.8927,
      "step": 148150
    },
    {
      "epoch": 0.5192638621651648,
      "grad_norm": 2.875,
      "learning_rate": 4.593967667988089e-05,
      "loss": 0.9361,
      "step": 148160
    },
    {
      "epoch": 0.5192989096720605,
      "grad_norm": 3.015625,
      "learning_rate": 4.5939027651217194e-05,
      "loss": 0.9236,
      "step": 148170
    },
    {
      "epoch": 0.519333957178956,
      "grad_norm": 3.171875,
      "learning_rate": 4.593837862255349e-05,
      "loss": 0.9933,
      "step": 148180
    },
    {
      "epoch": 0.5193690046858517,
      "grad_norm": 3.34375,
      "learning_rate": 4.593772959388979e-05,
      "loss": 0.9838,
      "step": 148190
    },
    {
      "epoch": 0.5194040521927473,
      "grad_norm": 2.84375,
      "learning_rate": 4.5937080565226085e-05,
      "loss": 0.9144,
      "step": 148200
    },
    {
      "epoch": 0.5194390996996429,
      "grad_norm": 2.625,
      "learning_rate": 4.5936431536562386e-05,
      "loss": 0.9438,
      "step": 148210
    },
    {
      "epoch": 0.5194741472065385,
      "grad_norm": 3.015625,
      "learning_rate": 4.593578250789868e-05,
      "loss": 0.9594,
      "step": 148220
    },
    {
      "epoch": 0.519509194713434,
      "grad_norm": 2.328125,
      "learning_rate": 4.593513347923498e-05,
      "loss": 0.8986,
      "step": 148230
    },
    {
      "epoch": 0.5195442422203297,
      "grad_norm": 3.125,
      "learning_rate": 4.593448445057128e-05,
      "loss": 0.8928,
      "step": 148240
    },
    {
      "epoch": 0.5195792897272252,
      "grad_norm": 3.25,
      "learning_rate": 4.593383542190757e-05,
      "loss": 0.9668,
      "step": 148250
    },
    {
      "epoch": 0.5196143372341209,
      "grad_norm": 3.125,
      "learning_rate": 4.593318639324387e-05,
      "loss": 0.925,
      "step": 148260
    },
    {
      "epoch": 0.5196493847410164,
      "grad_norm": 3.15625,
      "learning_rate": 4.593253736458017e-05,
      "loss": 0.974,
      "step": 148270
    },
    {
      "epoch": 0.519684432247912,
      "grad_norm": 2.796875,
      "learning_rate": 4.593188833591647e-05,
      "loss": 0.931,
      "step": 148280
    },
    {
      "epoch": 0.5197194797548076,
      "grad_norm": 3.140625,
      "learning_rate": 4.5931239307252763e-05,
      "loss": 0.865,
      "step": 148290
    },
    {
      "epoch": 0.5197545272617032,
      "grad_norm": 2.46875,
      "learning_rate": 4.5930590278589065e-05,
      "loss": 0.9313,
      "step": 148300
    },
    {
      "epoch": 0.5197895747685989,
      "grad_norm": 2.734375,
      "learning_rate": 4.592994124992536e-05,
      "loss": 0.9463,
      "step": 148310
    },
    {
      "epoch": 0.5198246222754944,
      "grad_norm": 2.859375,
      "learning_rate": 4.592929222126166e-05,
      "loss": 0.9012,
      "step": 148320
    },
    {
      "epoch": 0.5198596697823901,
      "grad_norm": 3.296875,
      "learning_rate": 4.5928643192597955e-05,
      "loss": 0.9201,
      "step": 148330
    },
    {
      "epoch": 0.5198947172892856,
      "grad_norm": 6.09375,
      "learning_rate": 4.592799416393426e-05,
      "loss": 0.8971,
      "step": 148340
    },
    {
      "epoch": 0.5199297647961812,
      "grad_norm": 3.1875,
      "learning_rate": 4.592734513527056e-05,
      "loss": 0.8327,
      "step": 148350
    },
    {
      "epoch": 0.5199648123030768,
      "grad_norm": 3.28125,
      "learning_rate": 4.592669610660685e-05,
      "loss": 0.8596,
      "step": 148360
    },
    {
      "epoch": 0.5199998598099724,
      "grad_norm": 3.0,
      "learning_rate": 4.5926047077943154e-05,
      "loss": 0.9201,
      "step": 148370
    },
    {
      "epoch": 0.520034907316868,
      "grad_norm": 2.734375,
      "learning_rate": 4.592539804927945e-05,
      "loss": 0.9397,
      "step": 148380
    },
    {
      "epoch": 0.5200699548237636,
      "grad_norm": 2.734375,
      "learning_rate": 4.592474902061575e-05,
      "loss": 0.9009,
      "step": 148390
    },
    {
      "epoch": 0.5201050023306593,
      "grad_norm": 3.0,
      "learning_rate": 4.5924099991952045e-05,
      "loss": 0.862,
      "step": 148400
    },
    {
      "epoch": 0.5201400498375548,
      "grad_norm": 3.171875,
      "learning_rate": 4.5923450963288346e-05,
      "loss": 0.8525,
      "step": 148410
    },
    {
      "epoch": 0.5201750973444504,
      "grad_norm": 2.953125,
      "learning_rate": 4.592280193462464e-05,
      "loss": 0.8732,
      "step": 148420
    },
    {
      "epoch": 0.520210144851346,
      "grad_norm": 2.890625,
      "learning_rate": 4.592215290596094e-05,
      "loss": 0.998,
      "step": 148430
    },
    {
      "epoch": 0.5202451923582416,
      "grad_norm": 2.890625,
      "learning_rate": 4.592150387729724e-05,
      "loss": 0.9929,
      "step": 148440
    },
    {
      "epoch": 0.5202802398651372,
      "grad_norm": 2.984375,
      "learning_rate": 4.592085484863354e-05,
      "loss": 0.8934,
      "step": 148450
    },
    {
      "epoch": 0.5203152873720328,
      "grad_norm": 2.46875,
      "learning_rate": 4.592020581996983e-05,
      "loss": 0.9347,
      "step": 148460
    },
    {
      "epoch": 0.5203503348789283,
      "grad_norm": 3.046875,
      "learning_rate": 4.5919556791306134e-05,
      "loss": 0.9595,
      "step": 148470
    },
    {
      "epoch": 0.520385382385824,
      "grad_norm": 3.21875,
      "learning_rate": 4.5918907762642436e-05,
      "loss": 1.0122,
      "step": 148480
    },
    {
      "epoch": 0.5204204298927195,
      "grad_norm": 2.75,
      "learning_rate": 4.591825873397873e-05,
      "loss": 0.8739,
      "step": 148490
    },
    {
      "epoch": 0.5204554773996152,
      "grad_norm": 2.90625,
      "learning_rate": 4.591760970531503e-05,
      "loss": 0.9664,
      "step": 148500
    },
    {
      "epoch": 0.5204905249065108,
      "grad_norm": 3.25,
      "learning_rate": 4.5916960676651326e-05,
      "loss": 0.9147,
      "step": 148510
    },
    {
      "epoch": 0.5205255724134064,
      "grad_norm": 2.984375,
      "learning_rate": 4.591631164798763e-05,
      "loss": 0.9202,
      "step": 148520
    },
    {
      "epoch": 0.520560619920302,
      "grad_norm": 3.03125,
      "learning_rate": 4.591566261932392e-05,
      "loss": 0.8304,
      "step": 148530
    },
    {
      "epoch": 0.5205956674271975,
      "grad_norm": 3.328125,
      "learning_rate": 4.5915013590660224e-05,
      "loss": 1.0167,
      "step": 148540
    },
    {
      "epoch": 0.5206307149340932,
      "grad_norm": 3.140625,
      "learning_rate": 4.591436456199652e-05,
      "loss": 0.8813,
      "step": 148550
    },
    {
      "epoch": 0.5206657624409887,
      "grad_norm": 2.734375,
      "learning_rate": 4.591371553333282e-05,
      "loss": 0.9484,
      "step": 148560
    },
    {
      "epoch": 0.5207008099478844,
      "grad_norm": 3.078125,
      "learning_rate": 4.5913066504669114e-05,
      "loss": 0.8054,
      "step": 148570
    },
    {
      "epoch": 0.5207358574547799,
      "grad_norm": 2.9375,
      "learning_rate": 4.5912417476005416e-05,
      "loss": 0.9133,
      "step": 148580
    },
    {
      "epoch": 0.5207709049616756,
      "grad_norm": 3.359375,
      "learning_rate": 4.591176844734171e-05,
      "loss": 0.9351,
      "step": 148590
    },
    {
      "epoch": 0.5208059524685712,
      "grad_norm": 2.75,
      "learning_rate": 4.591111941867801e-05,
      "loss": 0.9122,
      "step": 148600
    },
    {
      "epoch": 0.5208409999754667,
      "grad_norm": 2.796875,
      "learning_rate": 4.5910470390014306e-05,
      "loss": 0.9359,
      "step": 148610
    },
    {
      "epoch": 0.5208760474823624,
      "grad_norm": 3.234375,
      "learning_rate": 4.59098213613506e-05,
      "loss": 0.9391,
      "step": 148620
    },
    {
      "epoch": 0.5209110949892579,
      "grad_norm": 3.1875,
      "learning_rate": 4.59091723326869e-05,
      "loss": 0.9434,
      "step": 148630
    },
    {
      "epoch": 0.5209461424961536,
      "grad_norm": 3.1875,
      "learning_rate": 4.59085233040232e-05,
      "loss": 0.915,
      "step": 148640
    },
    {
      "epoch": 0.5209811900030491,
      "grad_norm": 3.125,
      "learning_rate": 4.59078742753595e-05,
      "loss": 0.9405,
      "step": 148650
    },
    {
      "epoch": 0.5210162375099447,
      "grad_norm": 2.90625,
      "learning_rate": 4.590722524669579e-05,
      "loss": 0.9421,
      "step": 148660
    },
    {
      "epoch": 0.5210512850168403,
      "grad_norm": 3.015625,
      "learning_rate": 4.5906576218032094e-05,
      "loss": 1.0298,
      "step": 148670
    },
    {
      "epoch": 0.5210863325237359,
      "grad_norm": 3.046875,
      "learning_rate": 4.590592718936839e-05,
      "loss": 0.8871,
      "step": 148680
    },
    {
      "epoch": 0.5211213800306316,
      "grad_norm": 3.0,
      "learning_rate": 4.590527816070469e-05,
      "loss": 0.8998,
      "step": 148690
    },
    {
      "epoch": 0.5211564275375271,
      "grad_norm": 2.609375,
      "learning_rate": 4.590462913204099e-05,
      "loss": 0.8609,
      "step": 148700
    },
    {
      "epoch": 0.5211914750444228,
      "grad_norm": 3.65625,
      "learning_rate": 4.5903980103377286e-05,
      "loss": 1.025,
      "step": 148710
    },
    {
      "epoch": 0.5212265225513183,
      "grad_norm": 3.59375,
      "learning_rate": 4.590333107471359e-05,
      "loss": 0.9723,
      "step": 148720
    },
    {
      "epoch": 0.5212615700582139,
      "grad_norm": 2.5625,
      "learning_rate": 4.590268204604988e-05,
      "loss": 0.9007,
      "step": 148730
    },
    {
      "epoch": 0.5212966175651095,
      "grad_norm": 2.9375,
      "learning_rate": 4.5902033017386184e-05,
      "loss": 0.938,
      "step": 148740
    },
    {
      "epoch": 0.5213316650720051,
      "grad_norm": 2.578125,
      "learning_rate": 4.590138398872248e-05,
      "loss": 0.9071,
      "step": 148750
    },
    {
      "epoch": 0.5213667125789007,
      "grad_norm": 2.71875,
      "learning_rate": 4.590073496005878e-05,
      "loss": 0.9503,
      "step": 148760
    },
    {
      "epoch": 0.5214017600857963,
      "grad_norm": 2.734375,
      "learning_rate": 4.5900085931395074e-05,
      "loss": 0.8977,
      "step": 148770
    },
    {
      "epoch": 0.5214368075926918,
      "grad_norm": 2.921875,
      "learning_rate": 4.5899436902731376e-05,
      "loss": 0.9332,
      "step": 148780
    },
    {
      "epoch": 0.5214718550995875,
      "grad_norm": 2.859375,
      "learning_rate": 4.589878787406767e-05,
      "loss": 0.9373,
      "step": 148790
    },
    {
      "epoch": 0.5215069026064831,
      "grad_norm": 2.9375,
      "learning_rate": 4.589813884540397e-05,
      "loss": 0.8987,
      "step": 148800
    },
    {
      "epoch": 0.5215419501133787,
      "grad_norm": 2.96875,
      "learning_rate": 4.5897489816740266e-05,
      "loss": 0.9781,
      "step": 148810
    },
    {
      "epoch": 0.5215769976202743,
      "grad_norm": 3.1875,
      "learning_rate": 4.589684078807657e-05,
      "loss": 0.894,
      "step": 148820
    },
    {
      "epoch": 0.5216120451271699,
      "grad_norm": 3.296875,
      "learning_rate": 4.589619175941286e-05,
      "loss": 0.922,
      "step": 148830
    },
    {
      "epoch": 0.5216470926340655,
      "grad_norm": 2.875,
      "learning_rate": 4.5895542730749164e-05,
      "loss": 0.9606,
      "step": 148840
    },
    {
      "epoch": 0.521682140140961,
      "grad_norm": 3.171875,
      "learning_rate": 4.5894893702085465e-05,
      "loss": 0.9795,
      "step": 148850
    },
    {
      "epoch": 0.5217171876478567,
      "grad_norm": 2.78125,
      "learning_rate": 4.589424467342176e-05,
      "loss": 0.8467,
      "step": 148860
    },
    {
      "epoch": 0.5217522351547522,
      "grad_norm": 3.21875,
      "learning_rate": 4.589359564475806e-05,
      "loss": 0.9978,
      "step": 148870
    },
    {
      "epoch": 0.5217872826616479,
      "grad_norm": 3.203125,
      "learning_rate": 4.5892946616094356e-05,
      "loss": 1.0285,
      "step": 148880
    },
    {
      "epoch": 0.5218223301685435,
      "grad_norm": 2.5,
      "learning_rate": 4.589229758743066e-05,
      "loss": 0.9047,
      "step": 148890
    },
    {
      "epoch": 0.521857377675439,
      "grad_norm": 3.4375,
      "learning_rate": 4.589164855876695e-05,
      "loss": 0.8399,
      "step": 148900
    },
    {
      "epoch": 0.5218924251823347,
      "grad_norm": 3.375,
      "learning_rate": 4.589099953010325e-05,
      "loss": 0.9622,
      "step": 148910
    },
    {
      "epoch": 0.5219274726892302,
      "grad_norm": 2.6875,
      "learning_rate": 4.589035050143955e-05,
      "loss": 0.9086,
      "step": 148920
    },
    {
      "epoch": 0.5219625201961259,
      "grad_norm": 2.5625,
      "learning_rate": 4.588970147277585e-05,
      "loss": 0.8855,
      "step": 148930
    },
    {
      "epoch": 0.5219975677030214,
      "grad_norm": 2.859375,
      "learning_rate": 4.5889052444112144e-05,
      "loss": 0.9186,
      "step": 148940
    },
    {
      "epoch": 0.5220326152099171,
      "grad_norm": 2.953125,
      "learning_rate": 4.5888403415448445e-05,
      "loss": 0.8951,
      "step": 148950
    },
    {
      "epoch": 0.5220676627168126,
      "grad_norm": 3.328125,
      "learning_rate": 4.588775438678474e-05,
      "loss": 0.9674,
      "step": 148960
    },
    {
      "epoch": 0.5221027102237082,
      "grad_norm": 2.6875,
      "learning_rate": 4.588710535812104e-05,
      "loss": 0.8603,
      "step": 148970
    },
    {
      "epoch": 0.5221377577306038,
      "grad_norm": 3.0625,
      "learning_rate": 4.588645632945734e-05,
      "loss": 0.9029,
      "step": 148980
    },
    {
      "epoch": 0.5221728052374994,
      "grad_norm": 2.96875,
      "learning_rate": 4.588580730079363e-05,
      "loss": 0.941,
      "step": 148990
    },
    {
      "epoch": 0.5222078527443951,
      "grad_norm": 3.359375,
      "learning_rate": 4.588515827212993e-05,
      "loss": 0.983,
      "step": 149000
    },
    {
      "epoch": 0.5222429002512906,
      "grad_norm": 3.125,
      "learning_rate": 4.5884509243466226e-05,
      "loss": 0.9557,
      "step": 149010
    },
    {
      "epoch": 0.5222779477581863,
      "grad_norm": 2.984375,
      "learning_rate": 4.588386021480253e-05,
      "loss": 0.8552,
      "step": 149020
    },
    {
      "epoch": 0.5223129952650818,
      "grad_norm": 2.84375,
      "learning_rate": 4.588321118613882e-05,
      "loss": 0.9761,
      "step": 149030
    },
    {
      "epoch": 0.5223480427719774,
      "grad_norm": 2.921875,
      "learning_rate": 4.5882562157475124e-05,
      "loss": 0.9371,
      "step": 149040
    },
    {
      "epoch": 0.522383090278873,
      "grad_norm": 2.984375,
      "learning_rate": 4.588191312881142e-05,
      "loss": 0.9751,
      "step": 149050
    },
    {
      "epoch": 0.5224181377857686,
      "grad_norm": 3.203125,
      "learning_rate": 4.588126410014772e-05,
      "loss": 0.9057,
      "step": 149060
    },
    {
      "epoch": 0.5224531852926642,
      "grad_norm": 2.671875,
      "learning_rate": 4.588061507148402e-05,
      "loss": 0.9151,
      "step": 149070
    },
    {
      "epoch": 0.5224882327995598,
      "grad_norm": 2.609375,
      "learning_rate": 4.5879966042820316e-05,
      "loss": 0.8883,
      "step": 149080
    },
    {
      "epoch": 0.5225232803064555,
      "grad_norm": 3.15625,
      "learning_rate": 4.587931701415662e-05,
      "loss": 0.8984,
      "step": 149090
    },
    {
      "epoch": 0.522558327813351,
      "grad_norm": 3.015625,
      "learning_rate": 4.587866798549291e-05,
      "loss": 0.9164,
      "step": 149100
    },
    {
      "epoch": 0.5225933753202466,
      "grad_norm": 2.734375,
      "learning_rate": 4.587801895682921e-05,
      "loss": 0.883,
      "step": 149110
    },
    {
      "epoch": 0.5226284228271422,
      "grad_norm": 2.34375,
      "learning_rate": 4.587736992816551e-05,
      "loss": 0.8035,
      "step": 149120
    },
    {
      "epoch": 0.5226634703340378,
      "grad_norm": 3.046875,
      "learning_rate": 4.587672089950181e-05,
      "loss": 0.9075,
      "step": 149130
    },
    {
      "epoch": 0.5226985178409334,
      "grad_norm": 2.96875,
      "learning_rate": 4.5876071870838104e-05,
      "loss": 0.8774,
      "step": 149140
    },
    {
      "epoch": 0.522733565347829,
      "grad_norm": 2.984375,
      "learning_rate": 4.5875422842174405e-05,
      "loss": 0.8701,
      "step": 149150
    },
    {
      "epoch": 0.5227686128547245,
      "grad_norm": 3.0625,
      "learning_rate": 4.58747738135107e-05,
      "loss": 0.8322,
      "step": 149160
    },
    {
      "epoch": 0.5228036603616202,
      "grad_norm": 2.828125,
      "learning_rate": 4.5874124784847e-05,
      "loss": 0.8752,
      "step": 149170
    },
    {
      "epoch": 0.5228387078685158,
      "grad_norm": 3.078125,
      "learning_rate": 4.5873475756183296e-05,
      "loss": 0.9851,
      "step": 149180
    },
    {
      "epoch": 0.5228737553754114,
      "grad_norm": 3.15625,
      "learning_rate": 4.58728267275196e-05,
      "loss": 1.0006,
      "step": 149190
    },
    {
      "epoch": 0.522908802882307,
      "grad_norm": 3.265625,
      "learning_rate": 4.587217769885589e-05,
      "loss": 0.9268,
      "step": 149200
    },
    {
      "epoch": 0.5229438503892025,
      "grad_norm": 2.96875,
      "learning_rate": 4.587152867019219e-05,
      "loss": 0.9978,
      "step": 149210
    },
    {
      "epoch": 0.5229788978960982,
      "grad_norm": 3.171875,
      "learning_rate": 4.5870879641528494e-05,
      "loss": 0.988,
      "step": 149220
    },
    {
      "epoch": 0.5230139454029937,
      "grad_norm": 3.140625,
      "learning_rate": 4.587023061286479e-05,
      "loss": 0.9711,
      "step": 149230
    },
    {
      "epoch": 0.5230489929098894,
      "grad_norm": 2.65625,
      "learning_rate": 4.586958158420109e-05,
      "loss": 0.912,
      "step": 149240
    },
    {
      "epoch": 0.5230840404167849,
      "grad_norm": 3.421875,
      "learning_rate": 4.5868932555537385e-05,
      "loss": 0.9435,
      "step": 149250
    },
    {
      "epoch": 0.5231190879236806,
      "grad_norm": 3.328125,
      "learning_rate": 4.5868283526873686e-05,
      "loss": 0.9888,
      "step": 149260
    },
    {
      "epoch": 0.5231541354305761,
      "grad_norm": 3.140625,
      "learning_rate": 4.586763449820998e-05,
      "loss": 0.866,
      "step": 149270
    },
    {
      "epoch": 0.5231891829374717,
      "grad_norm": 2.8125,
      "learning_rate": 4.586698546954628e-05,
      "loss": 0.8379,
      "step": 149280
    },
    {
      "epoch": 0.5232242304443674,
      "grad_norm": 3.078125,
      "learning_rate": 4.586633644088258e-05,
      "loss": 0.87,
      "step": 149290
    },
    {
      "epoch": 0.5232592779512629,
      "grad_norm": 2.984375,
      "learning_rate": 4.586568741221888e-05,
      "loss": 0.962,
      "step": 149300
    },
    {
      "epoch": 0.5232943254581586,
      "grad_norm": 3.09375,
      "learning_rate": 4.586503838355517e-05,
      "loss": 0.9654,
      "step": 149310
    },
    {
      "epoch": 0.5233293729650541,
      "grad_norm": 3.296875,
      "learning_rate": 4.5864389354891474e-05,
      "loss": 0.9527,
      "step": 149320
    },
    {
      "epoch": 0.5233644204719498,
      "grad_norm": 2.71875,
      "learning_rate": 4.586374032622777e-05,
      "loss": 0.9107,
      "step": 149330
    },
    {
      "epoch": 0.5233994679788453,
      "grad_norm": 3.171875,
      "learning_rate": 4.586309129756407e-05,
      "loss": 0.8565,
      "step": 149340
    },
    {
      "epoch": 0.5234345154857409,
      "grad_norm": 2.59375,
      "learning_rate": 4.586244226890037e-05,
      "loss": 0.9037,
      "step": 149350
    },
    {
      "epoch": 0.5234695629926365,
      "grad_norm": 2.5625,
      "learning_rate": 4.586179324023666e-05,
      "loss": 0.9456,
      "step": 149360
    },
    {
      "epoch": 0.5235046104995321,
      "grad_norm": 2.609375,
      "learning_rate": 4.586114421157296e-05,
      "loss": 0.9979,
      "step": 149370
    },
    {
      "epoch": 0.5235396580064278,
      "grad_norm": 3.25,
      "learning_rate": 4.5860495182909256e-05,
      "loss": 0.8752,
      "step": 149380
    },
    {
      "epoch": 0.5235747055133233,
      "grad_norm": 2.96875,
      "learning_rate": 4.585984615424556e-05,
      "loss": 0.9054,
      "step": 149390
    },
    {
      "epoch": 0.523609753020219,
      "grad_norm": 2.984375,
      "learning_rate": 4.585919712558185e-05,
      "loss": 0.8561,
      "step": 149400
    },
    {
      "epoch": 0.5236448005271145,
      "grad_norm": 2.890625,
      "learning_rate": 4.585854809691815e-05,
      "loss": 0.9492,
      "step": 149410
    },
    {
      "epoch": 0.5236798480340101,
      "grad_norm": 2.890625,
      "learning_rate": 4.585789906825445e-05,
      "loss": 0.9481,
      "step": 149420
    },
    {
      "epoch": 0.5237148955409057,
      "grad_norm": 3.21875,
      "learning_rate": 4.585725003959075e-05,
      "loss": 0.8847,
      "step": 149430
    },
    {
      "epoch": 0.5237499430478013,
      "grad_norm": 2.546875,
      "learning_rate": 4.585660101092705e-05,
      "loss": 0.8988,
      "step": 149440
    },
    {
      "epoch": 0.5237849905546968,
      "grad_norm": 2.21875,
      "learning_rate": 4.5855951982263345e-05,
      "loss": 0.9288,
      "step": 149450
    },
    {
      "epoch": 0.5238200380615925,
      "grad_norm": 2.703125,
      "learning_rate": 4.5855302953599646e-05,
      "loss": 0.8875,
      "step": 149460
    },
    {
      "epoch": 0.523855085568488,
      "grad_norm": 3.609375,
      "learning_rate": 4.585465392493594e-05,
      "loss": 1.0042,
      "step": 149470
    },
    {
      "epoch": 0.5238901330753837,
      "grad_norm": 2.765625,
      "learning_rate": 4.585400489627224e-05,
      "loss": 0.9127,
      "step": 149480
    },
    {
      "epoch": 0.5239251805822793,
      "grad_norm": 2.984375,
      "learning_rate": 4.585335586760854e-05,
      "loss": 0.9852,
      "step": 149490
    },
    {
      "epoch": 0.5239602280891749,
      "grad_norm": 2.703125,
      "learning_rate": 4.585270683894484e-05,
      "loss": 0.8728,
      "step": 149500
    },
    {
      "epoch": 0.5239952755960705,
      "grad_norm": 3.125,
      "learning_rate": 4.585205781028113e-05,
      "loss": 1.0062,
      "step": 149510
    },
    {
      "epoch": 0.524030323102966,
      "grad_norm": 2.765625,
      "learning_rate": 4.5851408781617434e-05,
      "loss": 0.9363,
      "step": 149520
    },
    {
      "epoch": 0.5240653706098617,
      "grad_norm": 2.84375,
      "learning_rate": 4.585075975295373e-05,
      "loss": 0.9207,
      "step": 149530
    },
    {
      "epoch": 0.5241004181167572,
      "grad_norm": 2.796875,
      "learning_rate": 4.585011072429003e-05,
      "loss": 0.8915,
      "step": 149540
    },
    {
      "epoch": 0.5241354656236529,
      "grad_norm": 3.53125,
      "learning_rate": 4.5849461695626325e-05,
      "loss": 0.8173,
      "step": 149550
    },
    {
      "epoch": 0.5241705131305484,
      "grad_norm": 2.484375,
      "learning_rate": 4.5848812666962626e-05,
      "loss": 0.8424,
      "step": 149560
    },
    {
      "epoch": 0.5242055606374441,
      "grad_norm": 3.234375,
      "learning_rate": 4.584816363829892e-05,
      "loss": 0.9495,
      "step": 149570
    },
    {
      "epoch": 0.5242406081443397,
      "grad_norm": 2.96875,
      "learning_rate": 4.584751460963522e-05,
      "loss": 0.9144,
      "step": 149580
    },
    {
      "epoch": 0.5242756556512352,
      "grad_norm": 3.234375,
      "learning_rate": 4.5846865580971524e-05,
      "loss": 0.9568,
      "step": 149590
    },
    {
      "epoch": 0.5243107031581309,
      "grad_norm": 2.890625,
      "learning_rate": 4.584621655230782e-05,
      "loss": 0.9595,
      "step": 149600
    },
    {
      "epoch": 0.5243457506650264,
      "grad_norm": 3.15625,
      "learning_rate": 4.584556752364412e-05,
      "loss": 0.9294,
      "step": 149610
    },
    {
      "epoch": 0.5243807981719221,
      "grad_norm": 3.09375,
      "learning_rate": 4.5844918494980414e-05,
      "loss": 0.9532,
      "step": 149620
    },
    {
      "epoch": 0.5244158456788176,
      "grad_norm": 2.71875,
      "learning_rate": 4.5844269466316716e-05,
      "loss": 0.8766,
      "step": 149630
    },
    {
      "epoch": 0.5244508931857133,
      "grad_norm": 2.984375,
      "learning_rate": 4.584362043765301e-05,
      "loss": 0.8745,
      "step": 149640
    },
    {
      "epoch": 0.5244859406926088,
      "grad_norm": 2.984375,
      "learning_rate": 4.584297140898931e-05,
      "loss": 0.9774,
      "step": 149650
    },
    {
      "epoch": 0.5245209881995044,
      "grad_norm": 3.46875,
      "learning_rate": 4.5842322380325606e-05,
      "loss": 0.9167,
      "step": 149660
    },
    {
      "epoch": 0.5245560357064,
      "grad_norm": 3.03125,
      "learning_rate": 4.584167335166191e-05,
      "loss": 0.9305,
      "step": 149670
    },
    {
      "epoch": 0.5245910832132956,
      "grad_norm": 3.203125,
      "learning_rate": 4.58410243229982e-05,
      "loss": 1.0408,
      "step": 149680
    },
    {
      "epoch": 0.5246261307201913,
      "grad_norm": 3.234375,
      "learning_rate": 4.5840375294334504e-05,
      "loss": 0.9243,
      "step": 149690
    },
    {
      "epoch": 0.5246611782270868,
      "grad_norm": 2.6875,
      "learning_rate": 4.58397262656708e-05,
      "loss": 0.9513,
      "step": 149700
    },
    {
      "epoch": 0.5246962257339824,
      "grad_norm": 2.875,
      "learning_rate": 4.58390772370071e-05,
      "loss": 0.8691,
      "step": 149710
    },
    {
      "epoch": 0.524731273240878,
      "grad_norm": 2.921875,
      "learning_rate": 4.58384282083434e-05,
      "loss": 0.9021,
      "step": 149720
    },
    {
      "epoch": 0.5247663207477736,
      "grad_norm": 3.125,
      "learning_rate": 4.5837779179679696e-05,
      "loss": 0.9562,
      "step": 149730
    },
    {
      "epoch": 0.5248013682546692,
      "grad_norm": 3.25,
      "learning_rate": 4.583713015101599e-05,
      "loss": 0.836,
      "step": 149740
    },
    {
      "epoch": 0.5248364157615648,
      "grad_norm": 3.203125,
      "learning_rate": 4.5836481122352285e-05,
      "loss": 0.9289,
      "step": 149750
    },
    {
      "epoch": 0.5248714632684603,
      "grad_norm": 3.09375,
      "learning_rate": 4.5835832093688586e-05,
      "loss": 0.9198,
      "step": 149760
    },
    {
      "epoch": 0.524906510775356,
      "grad_norm": 3.8125,
      "learning_rate": 4.583518306502488e-05,
      "loss": 1.0005,
      "step": 149770
    },
    {
      "epoch": 0.5249415582822516,
      "grad_norm": 3.078125,
      "learning_rate": 4.583453403636118e-05,
      "loss": 0.899,
      "step": 149780
    },
    {
      "epoch": 0.5249766057891472,
      "grad_norm": 2.875,
      "learning_rate": 4.583388500769748e-05,
      "loss": 0.9925,
      "step": 149790
    },
    {
      "epoch": 0.5250116532960428,
      "grad_norm": 2.625,
      "learning_rate": 4.583323597903378e-05,
      "loss": 0.9014,
      "step": 149800
    },
    {
      "epoch": 0.5250467008029384,
      "grad_norm": 3.25,
      "learning_rate": 4.583258695037008e-05,
      "loss": 0.932,
      "step": 149810
    },
    {
      "epoch": 0.525081748309834,
      "grad_norm": 2.734375,
      "learning_rate": 4.5831937921706374e-05,
      "loss": 0.9213,
      "step": 149820
    },
    {
      "epoch": 0.5251167958167295,
      "grad_norm": 2.828125,
      "learning_rate": 4.5831288893042676e-05,
      "loss": 0.9614,
      "step": 149830
    },
    {
      "epoch": 0.5251518433236252,
      "grad_norm": 3.03125,
      "learning_rate": 4.583063986437897e-05,
      "loss": 0.9,
      "step": 149840
    },
    {
      "epoch": 0.5251868908305207,
      "grad_norm": 2.734375,
      "learning_rate": 4.582999083571527e-05,
      "loss": 0.9242,
      "step": 149850
    },
    {
      "epoch": 0.5252219383374164,
      "grad_norm": 3.203125,
      "learning_rate": 4.5829341807051566e-05,
      "loss": 0.8957,
      "step": 149860
    },
    {
      "epoch": 0.525256985844312,
      "grad_norm": 3.140625,
      "learning_rate": 4.582869277838787e-05,
      "loss": 0.9697,
      "step": 149870
    },
    {
      "epoch": 0.5252920333512076,
      "grad_norm": 2.90625,
      "learning_rate": 4.582804374972416e-05,
      "loss": 0.9363,
      "step": 149880
    },
    {
      "epoch": 0.5253270808581032,
      "grad_norm": 3.421875,
      "learning_rate": 4.5827394721060464e-05,
      "loss": 0.8494,
      "step": 149890
    },
    {
      "epoch": 0.5253621283649987,
      "grad_norm": 3.015625,
      "learning_rate": 4.582674569239676e-05,
      "loss": 0.8736,
      "step": 149900
    },
    {
      "epoch": 0.5253971758718944,
      "grad_norm": 2.953125,
      "learning_rate": 4.582609666373306e-05,
      "loss": 0.9343,
      "step": 149910
    },
    {
      "epoch": 0.5254322233787899,
      "grad_norm": 2.796875,
      "learning_rate": 4.5825447635069354e-05,
      "loss": 0.8535,
      "step": 149920
    },
    {
      "epoch": 0.5254672708856856,
      "grad_norm": 3.59375,
      "learning_rate": 4.5824798606405656e-05,
      "loss": 0.9932,
      "step": 149930
    },
    {
      "epoch": 0.5255023183925811,
      "grad_norm": 2.828125,
      "learning_rate": 4.582414957774196e-05,
      "loss": 0.9118,
      "step": 149940
    },
    {
      "epoch": 0.5255373658994767,
      "grad_norm": 2.90625,
      "learning_rate": 4.582350054907825e-05,
      "loss": 0.9366,
      "step": 149950
    },
    {
      "epoch": 0.5255724134063723,
      "grad_norm": 3.015625,
      "learning_rate": 4.582285152041455e-05,
      "loss": 1.0039,
      "step": 149960
    },
    {
      "epoch": 0.5256074609132679,
      "grad_norm": 3.03125,
      "learning_rate": 4.582220249175085e-05,
      "loss": 0.9927,
      "step": 149970
    },
    {
      "epoch": 0.5256425084201636,
      "grad_norm": 2.875,
      "learning_rate": 4.582155346308715e-05,
      "loss": 0.8817,
      "step": 149980
    },
    {
      "epoch": 0.5256775559270591,
      "grad_norm": 2.734375,
      "learning_rate": 4.5820904434423444e-05,
      "loss": 0.9449,
      "step": 149990
    },
    {
      "epoch": 0.5257126034339548,
      "grad_norm": 2.921875,
      "learning_rate": 4.5820255405759745e-05,
      "loss": 0.9209,
      "step": 150000
    },
    {
      "epoch": 0.5257126034339548,
      "eval_loss": 0.8688340187072754,
      "eval_runtime": 561.061,
      "eval_samples_per_second": 678.065,
      "eval_steps_per_second": 56.505,
      "step": 150000
    },
    {
      "epoch": 0.5257476509408503,
      "grad_norm": 3.0,
      "learning_rate": 4.581960637709604e-05,
      "loss": 0.9467,
      "step": 150010
    },
    {
      "epoch": 0.5257826984477459,
      "grad_norm": 2.9375,
      "learning_rate": 4.581895734843234e-05,
      "loss": 0.9038,
      "step": 150020
    },
    {
      "epoch": 0.5258177459546415,
      "grad_norm": 3.171875,
      "learning_rate": 4.5818308319768636e-05,
      "loss": 0.8527,
      "step": 150030
    },
    {
      "epoch": 0.5258527934615371,
      "grad_norm": 3.265625,
      "learning_rate": 4.581765929110494e-05,
      "loss": 0.9908,
      "step": 150040
    },
    {
      "epoch": 0.5258878409684327,
      "grad_norm": 2.796875,
      "learning_rate": 4.581701026244123e-05,
      "loss": 0.9711,
      "step": 150050
    },
    {
      "epoch": 0.5259228884753283,
      "grad_norm": 3.03125,
      "learning_rate": 4.581636123377753e-05,
      "loss": 0.9492,
      "step": 150060
    },
    {
      "epoch": 0.525957935982224,
      "grad_norm": 2.890625,
      "learning_rate": 4.581571220511383e-05,
      "loss": 0.965,
      "step": 150070
    },
    {
      "epoch": 0.5259929834891195,
      "grad_norm": 2.578125,
      "learning_rate": 4.581506317645013e-05,
      "loss": 0.8484,
      "step": 150080
    },
    {
      "epoch": 0.5260280309960151,
      "grad_norm": 3.421875,
      "learning_rate": 4.581441414778643e-05,
      "loss": 0.9219,
      "step": 150090
    },
    {
      "epoch": 0.5260630785029107,
      "grad_norm": 2.84375,
      "learning_rate": 4.5813765119122725e-05,
      "loss": 0.9036,
      "step": 150100
    },
    {
      "epoch": 0.5260981260098063,
      "grad_norm": 2.75,
      "learning_rate": 4.5813116090459027e-05,
      "loss": 0.8469,
      "step": 150110
    },
    {
      "epoch": 0.5261331735167019,
      "grad_norm": 2.765625,
      "learning_rate": 4.5812467061795314e-05,
      "loss": 0.9102,
      "step": 150120
    },
    {
      "epoch": 0.5261682210235975,
      "grad_norm": 2.984375,
      "learning_rate": 4.5811818033131616e-05,
      "loss": 0.9826,
      "step": 150130
    },
    {
      "epoch": 0.526203268530493,
      "grad_norm": 2.78125,
      "learning_rate": 4.581116900446791e-05,
      "loss": 1.0008,
      "step": 150140
    },
    {
      "epoch": 0.5262383160373887,
      "grad_norm": 2.59375,
      "learning_rate": 4.581051997580421e-05,
      "loss": 0.9171,
      "step": 150150
    },
    {
      "epoch": 0.5262733635442842,
      "grad_norm": 2.734375,
      "learning_rate": 4.5809870947140506e-05,
      "loss": 0.9006,
      "step": 150160
    },
    {
      "epoch": 0.5263084110511799,
      "grad_norm": 2.640625,
      "learning_rate": 4.580922191847681e-05,
      "loss": 0.9155,
      "step": 150170
    },
    {
      "epoch": 0.5263434585580755,
      "grad_norm": 3.15625,
      "learning_rate": 4.580857288981311e-05,
      "loss": 0.8715,
      "step": 150180
    },
    {
      "epoch": 0.526378506064971,
      "grad_norm": 3.09375,
      "learning_rate": 4.5807923861149404e-05,
      "loss": 0.9349,
      "step": 150190
    },
    {
      "epoch": 0.5264135535718667,
      "grad_norm": 3.140625,
      "learning_rate": 4.5807274832485705e-05,
      "loss": 0.9434,
      "step": 150200
    },
    {
      "epoch": 0.5264486010787622,
      "grad_norm": 2.796875,
      "learning_rate": 4.5806625803822e-05,
      "loss": 0.9752,
      "step": 150210
    },
    {
      "epoch": 0.5264836485856579,
      "grad_norm": 2.609375,
      "learning_rate": 4.58059767751583e-05,
      "loss": 1.0145,
      "step": 150220
    },
    {
      "epoch": 0.5265186960925534,
      "grad_norm": 3.171875,
      "learning_rate": 4.5805327746494596e-05,
      "loss": 0.9786,
      "step": 150230
    },
    {
      "epoch": 0.5265537435994491,
      "grad_norm": 2.953125,
      "learning_rate": 4.58046787178309e-05,
      "loss": 0.9135,
      "step": 150240
    },
    {
      "epoch": 0.5265887911063446,
      "grad_norm": 3.28125,
      "learning_rate": 4.580402968916719e-05,
      "loss": 0.94,
      "step": 150250
    },
    {
      "epoch": 0.5266238386132402,
      "grad_norm": 3.015625,
      "learning_rate": 4.580338066050349e-05,
      "loss": 0.9242,
      "step": 150260
    },
    {
      "epoch": 0.5266588861201359,
      "grad_norm": 2.96875,
      "learning_rate": 4.580273163183979e-05,
      "loss": 0.8425,
      "step": 150270
    },
    {
      "epoch": 0.5266939336270314,
      "grad_norm": 3.328125,
      "learning_rate": 4.580208260317609e-05,
      "loss": 0.9489,
      "step": 150280
    },
    {
      "epoch": 0.5267289811339271,
      "grad_norm": 3.3125,
      "learning_rate": 4.5801433574512384e-05,
      "loss": 0.8615,
      "step": 150290
    },
    {
      "epoch": 0.5267640286408226,
      "grad_norm": 3.09375,
      "learning_rate": 4.5800784545848685e-05,
      "loss": 0.9153,
      "step": 150300
    },
    {
      "epoch": 0.5267990761477183,
      "grad_norm": 3.15625,
      "learning_rate": 4.5800135517184987e-05,
      "loss": 0.8416,
      "step": 150310
    },
    {
      "epoch": 0.5268341236546138,
      "grad_norm": 3.4375,
      "learning_rate": 4.579948648852128e-05,
      "loss": 0.9689,
      "step": 150320
    },
    {
      "epoch": 0.5268691711615094,
      "grad_norm": 3.046875,
      "learning_rate": 4.579883745985758e-05,
      "loss": 0.885,
      "step": 150330
    },
    {
      "epoch": 0.526904218668405,
      "grad_norm": 3.234375,
      "learning_rate": 4.579818843119388e-05,
      "loss": 0.9182,
      "step": 150340
    },
    {
      "epoch": 0.5269392661753006,
      "grad_norm": 2.78125,
      "learning_rate": 4.579753940253018e-05,
      "loss": 0.9256,
      "step": 150350
    },
    {
      "epoch": 0.5269743136821963,
      "grad_norm": 2.984375,
      "learning_rate": 4.579689037386647e-05,
      "loss": 0.9652,
      "step": 150360
    },
    {
      "epoch": 0.5270093611890918,
      "grad_norm": 2.953125,
      "learning_rate": 4.5796241345202775e-05,
      "loss": 0.952,
      "step": 150370
    },
    {
      "epoch": 0.5270444086959875,
      "grad_norm": 3.03125,
      "learning_rate": 4.579559231653907e-05,
      "loss": 0.9016,
      "step": 150380
    },
    {
      "epoch": 0.527079456202883,
      "grad_norm": 3.0625,
      "learning_rate": 4.579494328787537e-05,
      "loss": 0.9176,
      "step": 150390
    },
    {
      "epoch": 0.5271145037097786,
      "grad_norm": 3.015625,
      "learning_rate": 4.5794294259211665e-05,
      "loss": 0.9263,
      "step": 150400
    },
    {
      "epoch": 0.5271495512166742,
      "grad_norm": 2.75,
      "learning_rate": 4.5793645230547967e-05,
      "loss": 0.9813,
      "step": 150410
    },
    {
      "epoch": 0.5271845987235698,
      "grad_norm": 3.203125,
      "learning_rate": 4.579299620188426e-05,
      "loss": 0.945,
      "step": 150420
    },
    {
      "epoch": 0.5272196462304654,
      "grad_norm": 3.03125,
      "learning_rate": 4.579234717322056e-05,
      "loss": 0.8645,
      "step": 150430
    },
    {
      "epoch": 0.527254693737361,
      "grad_norm": 3.046875,
      "learning_rate": 4.579169814455686e-05,
      "loss": 0.9912,
      "step": 150440
    },
    {
      "epoch": 0.5272897412442565,
      "grad_norm": 3.890625,
      "learning_rate": 4.579104911589316e-05,
      "loss": 0.9966,
      "step": 150450
    },
    {
      "epoch": 0.5273247887511522,
      "grad_norm": 3.359375,
      "learning_rate": 4.579040008722946e-05,
      "loss": 0.8713,
      "step": 150460
    },
    {
      "epoch": 0.5273598362580478,
      "grad_norm": 3.0625,
      "learning_rate": 4.5789751058565755e-05,
      "loss": 0.8837,
      "step": 150470
    },
    {
      "epoch": 0.5273948837649434,
      "grad_norm": 3.265625,
      "learning_rate": 4.5789102029902056e-05,
      "loss": 0.9251,
      "step": 150480
    },
    {
      "epoch": 0.527429931271839,
      "grad_norm": 2.609375,
      "learning_rate": 4.5788453001238344e-05,
      "loss": 0.9008,
      "step": 150490
    },
    {
      "epoch": 0.5274649787787346,
      "grad_norm": 3.265625,
      "learning_rate": 4.5787803972574645e-05,
      "loss": 0.9091,
      "step": 150500
    },
    {
      "epoch": 0.5275000262856302,
      "grad_norm": 2.9375,
      "learning_rate": 4.578715494391094e-05,
      "loss": 0.9249,
      "step": 150510
    },
    {
      "epoch": 0.5275350737925257,
      "grad_norm": 3.21875,
      "learning_rate": 4.578650591524724e-05,
      "loss": 1.0042,
      "step": 150520
    },
    {
      "epoch": 0.5275701212994214,
      "grad_norm": 2.953125,
      "learning_rate": 4.5785856886583536e-05,
      "loss": 0.9464,
      "step": 150530
    },
    {
      "epoch": 0.5276051688063169,
      "grad_norm": 3.234375,
      "learning_rate": 4.578520785791984e-05,
      "loss": 0.9184,
      "step": 150540
    },
    {
      "epoch": 0.5276402163132126,
      "grad_norm": 4.21875,
      "learning_rate": 4.578455882925614e-05,
      "loss": 0.9718,
      "step": 150550
    },
    {
      "epoch": 0.5276752638201082,
      "grad_norm": 3.09375,
      "learning_rate": 4.578390980059243e-05,
      "loss": 1.0289,
      "step": 150560
    },
    {
      "epoch": 0.5277103113270037,
      "grad_norm": 2.65625,
      "learning_rate": 4.5783260771928735e-05,
      "loss": 0.9081,
      "step": 150570
    },
    {
      "epoch": 0.5277453588338994,
      "grad_norm": 3.359375,
      "learning_rate": 4.578261174326503e-05,
      "loss": 0.9792,
      "step": 150580
    },
    {
      "epoch": 0.5277804063407949,
      "grad_norm": 2.828125,
      "learning_rate": 4.578196271460133e-05,
      "loss": 0.9304,
      "step": 150590
    },
    {
      "epoch": 0.5278154538476906,
      "grad_norm": 3.0,
      "learning_rate": 4.5781313685937625e-05,
      "loss": 0.8218,
      "step": 150600
    },
    {
      "epoch": 0.5278505013545861,
      "grad_norm": 2.921875,
      "learning_rate": 4.5780664657273927e-05,
      "loss": 0.9196,
      "step": 150610
    },
    {
      "epoch": 0.5278855488614818,
      "grad_norm": 2.859375,
      "learning_rate": 4.578001562861022e-05,
      "loss": 0.9177,
      "step": 150620
    },
    {
      "epoch": 0.5279205963683773,
      "grad_norm": 2.59375,
      "learning_rate": 4.577936659994652e-05,
      "loss": 0.9297,
      "step": 150630
    },
    {
      "epoch": 0.5279556438752729,
      "grad_norm": 3.25,
      "learning_rate": 4.577871757128282e-05,
      "loss": 0.9085,
      "step": 150640
    },
    {
      "epoch": 0.5279906913821685,
      "grad_norm": 2.96875,
      "learning_rate": 4.577806854261912e-05,
      "loss": 0.9721,
      "step": 150650
    },
    {
      "epoch": 0.5280257388890641,
      "grad_norm": 2.609375,
      "learning_rate": 4.577741951395541e-05,
      "loss": 0.9301,
      "step": 150660
    },
    {
      "epoch": 0.5280607863959598,
      "grad_norm": 2.546875,
      "learning_rate": 4.5776770485291715e-05,
      "loss": 0.983,
      "step": 150670
    },
    {
      "epoch": 0.5280958339028553,
      "grad_norm": 3.15625,
      "learning_rate": 4.5776121456628016e-05,
      "loss": 0.8718,
      "step": 150680
    },
    {
      "epoch": 0.528130881409751,
      "grad_norm": 3.03125,
      "learning_rate": 4.577547242796431e-05,
      "loss": 0.9281,
      "step": 150690
    },
    {
      "epoch": 0.5281659289166465,
      "grad_norm": 3.40625,
      "learning_rate": 4.577482339930061e-05,
      "loss": 0.9754,
      "step": 150700
    },
    {
      "epoch": 0.5282009764235421,
      "grad_norm": 3.03125,
      "learning_rate": 4.5774174370636907e-05,
      "loss": 0.9316,
      "step": 150710
    },
    {
      "epoch": 0.5282360239304377,
      "grad_norm": 3.578125,
      "learning_rate": 4.577352534197321e-05,
      "loss": 0.9004,
      "step": 150720
    },
    {
      "epoch": 0.5282710714373333,
      "grad_norm": 2.828125,
      "learning_rate": 4.57728763133095e-05,
      "loss": 0.9455,
      "step": 150730
    },
    {
      "epoch": 0.5283061189442289,
      "grad_norm": 2.921875,
      "learning_rate": 4.5772227284645804e-05,
      "loss": 0.9867,
      "step": 150740
    },
    {
      "epoch": 0.5283411664511245,
      "grad_norm": 2.75,
      "learning_rate": 4.57715782559821e-05,
      "loss": 0.9157,
      "step": 150750
    },
    {
      "epoch": 0.5283762139580201,
      "grad_norm": 2.90625,
      "learning_rate": 4.57709292273184e-05,
      "loss": 0.8827,
      "step": 150760
    },
    {
      "epoch": 0.5284112614649157,
      "grad_norm": 2.890625,
      "learning_rate": 4.5770280198654695e-05,
      "loss": 0.9189,
      "step": 150770
    },
    {
      "epoch": 0.5284463089718113,
      "grad_norm": 3.21875,
      "learning_rate": 4.5769631169990996e-05,
      "loss": 0.9653,
      "step": 150780
    },
    {
      "epoch": 0.5284813564787069,
      "grad_norm": 3.015625,
      "learning_rate": 4.576898214132729e-05,
      "loss": 0.8828,
      "step": 150790
    },
    {
      "epoch": 0.5285164039856025,
      "grad_norm": 2.671875,
      "learning_rate": 4.576833311266359e-05,
      "loss": 0.9213,
      "step": 150800
    },
    {
      "epoch": 0.528551451492498,
      "grad_norm": 3.09375,
      "learning_rate": 4.576768408399989e-05,
      "loss": 0.9331,
      "step": 150810
    },
    {
      "epoch": 0.5285864989993937,
      "grad_norm": 3.375,
      "learning_rate": 4.576703505533619e-05,
      "loss": 0.9524,
      "step": 150820
    },
    {
      "epoch": 0.5286215465062892,
      "grad_norm": 3.140625,
      "learning_rate": 4.576638602667249e-05,
      "loss": 0.9569,
      "step": 150830
    },
    {
      "epoch": 0.5286565940131849,
      "grad_norm": 3.0,
      "learning_rate": 4.5765736998008784e-05,
      "loss": 0.9521,
      "step": 150840
    },
    {
      "epoch": 0.5286916415200805,
      "grad_norm": 2.953125,
      "learning_rate": 4.5765087969345085e-05,
      "loss": 0.8734,
      "step": 150850
    },
    {
      "epoch": 0.5287266890269761,
      "grad_norm": 2.921875,
      "learning_rate": 4.576443894068138e-05,
      "loss": 0.9785,
      "step": 150860
    },
    {
      "epoch": 0.5287617365338717,
      "grad_norm": 3.203125,
      "learning_rate": 4.5763789912017675e-05,
      "loss": 0.8994,
      "step": 150870
    },
    {
      "epoch": 0.5287967840407672,
      "grad_norm": 3.09375,
      "learning_rate": 4.576314088335397e-05,
      "loss": 0.9048,
      "step": 150880
    },
    {
      "epoch": 0.5288318315476629,
      "grad_norm": 2.921875,
      "learning_rate": 4.576249185469027e-05,
      "loss": 0.8439,
      "step": 150890
    },
    {
      "epoch": 0.5288668790545584,
      "grad_norm": 4.4375,
      "learning_rate": 4.576184282602657e-05,
      "loss": 0.9703,
      "step": 150900
    },
    {
      "epoch": 0.5289019265614541,
      "grad_norm": 2.796875,
      "learning_rate": 4.5761193797362867e-05,
      "loss": 0.9259,
      "step": 150910
    },
    {
      "epoch": 0.5289369740683496,
      "grad_norm": 2.921875,
      "learning_rate": 4.576054476869917e-05,
      "loss": 0.9459,
      "step": 150920
    },
    {
      "epoch": 0.5289720215752453,
      "grad_norm": 2.984375,
      "learning_rate": 4.575989574003546e-05,
      "loss": 0.9223,
      "step": 150930
    },
    {
      "epoch": 0.5290070690821408,
      "grad_norm": 3.4375,
      "learning_rate": 4.5759246711371764e-05,
      "loss": 1.0656,
      "step": 150940
    },
    {
      "epoch": 0.5290421165890364,
      "grad_norm": 2.90625,
      "learning_rate": 4.575859768270806e-05,
      "loss": 0.9023,
      "step": 150950
    },
    {
      "epoch": 0.5290771640959321,
      "grad_norm": 2.90625,
      "learning_rate": 4.575794865404436e-05,
      "loss": 0.9445,
      "step": 150960
    },
    {
      "epoch": 0.5291122116028276,
      "grad_norm": 2.90625,
      "learning_rate": 4.5757299625380655e-05,
      "loss": 0.852,
      "step": 150970
    },
    {
      "epoch": 0.5291472591097233,
      "grad_norm": 3.015625,
      "learning_rate": 4.5756650596716956e-05,
      "loss": 0.9507,
      "step": 150980
    },
    {
      "epoch": 0.5291823066166188,
      "grad_norm": 3.078125,
      "learning_rate": 4.575600156805325e-05,
      "loss": 0.9142,
      "step": 150990
    },
    {
      "epoch": 0.5292173541235144,
      "grad_norm": 2.8125,
      "learning_rate": 4.575535253938955e-05,
      "loss": 0.9063,
      "step": 151000
    },
    {
      "epoch": 0.52925240163041,
      "grad_norm": 2.984375,
      "learning_rate": 4.5754703510725847e-05,
      "loss": 0.9212,
      "step": 151010
    },
    {
      "epoch": 0.5292874491373056,
      "grad_norm": 3.546875,
      "learning_rate": 4.575405448206215e-05,
      "loss": 0.9715,
      "step": 151020
    },
    {
      "epoch": 0.5293224966442012,
      "grad_norm": 3.328125,
      "learning_rate": 4.575340545339844e-05,
      "loss": 0.9555,
      "step": 151030
    },
    {
      "epoch": 0.5293575441510968,
      "grad_norm": 2.921875,
      "learning_rate": 4.5752756424734744e-05,
      "loss": 0.8969,
      "step": 151040
    },
    {
      "epoch": 0.5293925916579925,
      "grad_norm": 3.0,
      "learning_rate": 4.5752107396071045e-05,
      "loss": 0.9172,
      "step": 151050
    },
    {
      "epoch": 0.529427639164888,
      "grad_norm": 3.40625,
      "learning_rate": 4.575145836740734e-05,
      "loss": 0.9522,
      "step": 151060
    },
    {
      "epoch": 0.5294626866717836,
      "grad_norm": 2.890625,
      "learning_rate": 4.575080933874364e-05,
      "loss": 0.9509,
      "step": 151070
    },
    {
      "epoch": 0.5294977341786792,
      "grad_norm": 2.796875,
      "learning_rate": 4.5750160310079936e-05,
      "loss": 0.8358,
      "step": 151080
    },
    {
      "epoch": 0.5295327816855748,
      "grad_norm": 3.515625,
      "learning_rate": 4.574951128141624e-05,
      "loss": 0.9989,
      "step": 151090
    },
    {
      "epoch": 0.5295678291924704,
      "grad_norm": 2.6875,
      "learning_rate": 4.574886225275253e-05,
      "loss": 0.9352,
      "step": 151100
    },
    {
      "epoch": 0.529602876699366,
      "grad_norm": 2.65625,
      "learning_rate": 4.574821322408883e-05,
      "loss": 0.9189,
      "step": 151110
    },
    {
      "epoch": 0.5296379242062615,
      "grad_norm": 3.34375,
      "learning_rate": 4.574756419542513e-05,
      "loss": 0.9274,
      "step": 151120
    },
    {
      "epoch": 0.5296729717131572,
      "grad_norm": 3.546875,
      "learning_rate": 4.574691516676143e-05,
      "loss": 0.9446,
      "step": 151130
    },
    {
      "epoch": 0.5297080192200527,
      "grad_norm": 2.75,
      "learning_rate": 4.5746266138097724e-05,
      "loss": 0.992,
      "step": 151140
    },
    {
      "epoch": 0.5297430667269484,
      "grad_norm": 2.9375,
      "learning_rate": 4.5745617109434025e-05,
      "loss": 0.9626,
      "step": 151150
    },
    {
      "epoch": 0.529778114233844,
      "grad_norm": 3.296875,
      "learning_rate": 4.574496808077032e-05,
      "loss": 0.8833,
      "step": 151160
    },
    {
      "epoch": 0.5298131617407396,
      "grad_norm": 2.71875,
      "learning_rate": 4.574431905210662e-05,
      "loss": 0.9713,
      "step": 151170
    },
    {
      "epoch": 0.5298482092476352,
      "grad_norm": 3.078125,
      "learning_rate": 4.574367002344292e-05,
      "loss": 0.9732,
      "step": 151180
    },
    {
      "epoch": 0.5298832567545307,
      "grad_norm": 3.1875,
      "learning_rate": 4.574302099477922e-05,
      "loss": 0.9406,
      "step": 151190
    },
    {
      "epoch": 0.5299183042614264,
      "grad_norm": 2.6875,
      "learning_rate": 4.574237196611552e-05,
      "loss": 0.8736,
      "step": 151200
    },
    {
      "epoch": 0.5299533517683219,
      "grad_norm": 3.125,
      "learning_rate": 4.574172293745181e-05,
      "loss": 0.9407,
      "step": 151210
    },
    {
      "epoch": 0.5299883992752176,
      "grad_norm": 3.09375,
      "learning_rate": 4.5741073908788115e-05,
      "loss": 0.8482,
      "step": 151220
    },
    {
      "epoch": 0.5300234467821131,
      "grad_norm": 3.453125,
      "learning_rate": 4.574042488012441e-05,
      "loss": 0.9549,
      "step": 151230
    },
    {
      "epoch": 0.5300584942890088,
      "grad_norm": 3.1875,
      "learning_rate": 4.573977585146071e-05,
      "loss": 0.9825,
      "step": 151240
    },
    {
      "epoch": 0.5300935417959044,
      "grad_norm": 2.96875,
      "learning_rate": 4.5739126822797e-05,
      "loss": 0.8794,
      "step": 151250
    },
    {
      "epoch": 0.5301285893027999,
      "grad_norm": 2.90625,
      "learning_rate": 4.57384777941333e-05,
      "loss": 0.933,
      "step": 151260
    },
    {
      "epoch": 0.5301636368096956,
      "grad_norm": 2.84375,
      "learning_rate": 4.57378287654696e-05,
      "loss": 0.9403,
      "step": 151270
    },
    {
      "epoch": 0.5301986843165911,
      "grad_norm": 2.828125,
      "learning_rate": 4.5737179736805896e-05,
      "loss": 1.0049,
      "step": 151280
    },
    {
      "epoch": 0.5302337318234868,
      "grad_norm": 3.078125,
      "learning_rate": 4.57365307081422e-05,
      "loss": 0.9625,
      "step": 151290
    },
    {
      "epoch": 0.5302687793303823,
      "grad_norm": 2.546875,
      "learning_rate": 4.573588167947849e-05,
      "loss": 0.9205,
      "step": 151300
    },
    {
      "epoch": 0.530303826837278,
      "grad_norm": 2.796875,
      "learning_rate": 4.573523265081479e-05,
      "loss": 0.933,
      "step": 151310
    },
    {
      "epoch": 0.5303388743441735,
      "grad_norm": 3.234375,
      "learning_rate": 4.573458362215109e-05,
      "loss": 0.9162,
      "step": 151320
    },
    {
      "epoch": 0.5303739218510691,
      "grad_norm": 3.171875,
      "learning_rate": 4.573393459348739e-05,
      "loss": 0.957,
      "step": 151330
    },
    {
      "epoch": 0.5304089693579647,
      "grad_norm": 2.90625,
      "learning_rate": 4.5733285564823684e-05,
      "loss": 0.9808,
      "step": 151340
    },
    {
      "epoch": 0.5304440168648603,
      "grad_norm": 2.796875,
      "learning_rate": 4.5732636536159985e-05,
      "loss": 0.8961,
      "step": 151350
    },
    {
      "epoch": 0.530479064371756,
      "grad_norm": 3.328125,
      "learning_rate": 4.573198750749628e-05,
      "loss": 0.9541,
      "step": 151360
    },
    {
      "epoch": 0.5305141118786515,
      "grad_norm": 3.34375,
      "learning_rate": 4.573133847883258e-05,
      "loss": 0.935,
      "step": 151370
    },
    {
      "epoch": 0.5305491593855471,
      "grad_norm": 3.03125,
      "learning_rate": 4.5730689450168876e-05,
      "loss": 0.8744,
      "step": 151380
    },
    {
      "epoch": 0.5305842068924427,
      "grad_norm": 2.640625,
      "learning_rate": 4.573004042150518e-05,
      "loss": 0.9017,
      "step": 151390
    },
    {
      "epoch": 0.5306192543993383,
      "grad_norm": 3.0625,
      "learning_rate": 4.572939139284147e-05,
      "loss": 0.9647,
      "step": 151400
    },
    {
      "epoch": 0.5306543019062339,
      "grad_norm": 3.109375,
      "learning_rate": 4.572874236417777e-05,
      "loss": 0.9365,
      "step": 151410
    },
    {
      "epoch": 0.5306893494131295,
      "grad_norm": 3.15625,
      "learning_rate": 4.5728093335514075e-05,
      "loss": 0.9061,
      "step": 151420
    },
    {
      "epoch": 0.530724396920025,
      "grad_norm": 3.15625,
      "learning_rate": 4.572744430685037e-05,
      "loss": 0.8832,
      "step": 151430
    },
    {
      "epoch": 0.5307594444269207,
      "grad_norm": 3.015625,
      "learning_rate": 4.572679527818667e-05,
      "loss": 0.9497,
      "step": 151440
    },
    {
      "epoch": 0.5307944919338163,
      "grad_norm": 2.828125,
      "learning_rate": 4.5726146249522965e-05,
      "loss": 0.9044,
      "step": 151450
    },
    {
      "epoch": 0.5308295394407119,
      "grad_norm": 2.984375,
      "learning_rate": 4.572549722085927e-05,
      "loss": 0.9314,
      "step": 151460
    },
    {
      "epoch": 0.5308645869476075,
      "grad_norm": 2.671875,
      "learning_rate": 4.572484819219556e-05,
      "loss": 0.9341,
      "step": 151470
    },
    {
      "epoch": 0.530899634454503,
      "grad_norm": 3.78125,
      "learning_rate": 4.572419916353186e-05,
      "loss": 0.9836,
      "step": 151480
    },
    {
      "epoch": 0.5309346819613987,
      "grad_norm": 3.0625,
      "learning_rate": 4.572355013486816e-05,
      "loss": 0.9179,
      "step": 151490
    },
    {
      "epoch": 0.5309697294682942,
      "grad_norm": 3.421875,
      "learning_rate": 4.572290110620446e-05,
      "loss": 0.9092,
      "step": 151500
    },
    {
      "epoch": 0.5310047769751899,
      "grad_norm": 2.53125,
      "learning_rate": 4.572225207754075e-05,
      "loss": 0.9662,
      "step": 151510
    },
    {
      "epoch": 0.5310398244820854,
      "grad_norm": 3.359375,
      "learning_rate": 4.5721603048877055e-05,
      "loss": 0.9828,
      "step": 151520
    },
    {
      "epoch": 0.5310748719889811,
      "grad_norm": 2.9375,
      "learning_rate": 4.572095402021335e-05,
      "loss": 0.8802,
      "step": 151530
    },
    {
      "epoch": 0.5311099194958767,
      "grad_norm": 4.03125,
      "learning_rate": 4.572030499154965e-05,
      "loss": 0.8697,
      "step": 151540
    },
    {
      "epoch": 0.5311449670027723,
      "grad_norm": 2.765625,
      "learning_rate": 4.571965596288595e-05,
      "loss": 0.8873,
      "step": 151550
    },
    {
      "epoch": 0.5311800145096679,
      "grad_norm": 2.859375,
      "learning_rate": 4.571900693422225e-05,
      "loss": 0.909,
      "step": 151560
    },
    {
      "epoch": 0.5312150620165634,
      "grad_norm": 3.171875,
      "learning_rate": 4.571835790555855e-05,
      "loss": 0.928,
      "step": 151570
    },
    {
      "epoch": 0.5312501095234591,
      "grad_norm": 3.125,
      "learning_rate": 4.571770887689484e-05,
      "loss": 0.9399,
      "step": 151580
    },
    {
      "epoch": 0.5312851570303546,
      "grad_norm": 2.96875,
      "learning_rate": 4.5717059848231144e-05,
      "loss": 0.9367,
      "step": 151590
    },
    {
      "epoch": 0.5313202045372503,
      "grad_norm": 2.609375,
      "learning_rate": 4.571641081956744e-05,
      "loss": 0.9246,
      "step": 151600
    },
    {
      "epoch": 0.5313552520441458,
      "grad_norm": 2.6875,
      "learning_rate": 4.571576179090374e-05,
      "loss": 0.8362,
      "step": 151610
    },
    {
      "epoch": 0.5313902995510414,
      "grad_norm": 3.3125,
      "learning_rate": 4.571511276224003e-05,
      "loss": 0.9428,
      "step": 151620
    },
    {
      "epoch": 0.531425347057937,
      "grad_norm": 2.890625,
      "learning_rate": 4.571446373357633e-05,
      "loss": 0.8545,
      "step": 151630
    },
    {
      "epoch": 0.5314603945648326,
      "grad_norm": 2.71875,
      "learning_rate": 4.571381470491263e-05,
      "loss": 0.9193,
      "step": 151640
    },
    {
      "epoch": 0.5314954420717283,
      "grad_norm": 2.84375,
      "learning_rate": 4.5713165676248925e-05,
      "loss": 0.9298,
      "step": 151650
    },
    {
      "epoch": 0.5315304895786238,
      "grad_norm": 3.0,
      "learning_rate": 4.571251664758523e-05,
      "loss": 0.9093,
      "step": 151660
    },
    {
      "epoch": 0.5315655370855195,
      "grad_norm": 5.875,
      "learning_rate": 4.571186761892152e-05,
      "loss": 1.0092,
      "step": 151670
    },
    {
      "epoch": 0.531600584592415,
      "grad_norm": 2.859375,
      "learning_rate": 4.571121859025782e-05,
      "loss": 0.923,
      "step": 151680
    },
    {
      "epoch": 0.5316356320993106,
      "grad_norm": 3.0,
      "learning_rate": 4.571056956159412e-05,
      "loss": 0.9016,
      "step": 151690
    },
    {
      "epoch": 0.5316706796062062,
      "grad_norm": 2.828125,
      "learning_rate": 4.570992053293042e-05,
      "loss": 1.0005,
      "step": 151700
    },
    {
      "epoch": 0.5317057271131018,
      "grad_norm": 3.09375,
      "learning_rate": 4.570927150426671e-05,
      "loss": 0.9572,
      "step": 151710
    },
    {
      "epoch": 0.5317407746199974,
      "grad_norm": 3.15625,
      "learning_rate": 4.5708622475603015e-05,
      "loss": 0.925,
      "step": 151720
    },
    {
      "epoch": 0.531775822126893,
      "grad_norm": 3.125,
      "learning_rate": 4.570797344693931e-05,
      "loss": 0.9083,
      "step": 151730
    },
    {
      "epoch": 0.5318108696337887,
      "grad_norm": 2.65625,
      "learning_rate": 4.570732441827561e-05,
      "loss": 0.9397,
      "step": 151740
    },
    {
      "epoch": 0.5318459171406842,
      "grad_norm": 2.578125,
      "learning_rate": 4.5706675389611905e-05,
      "loss": 0.9527,
      "step": 151750
    },
    {
      "epoch": 0.5318809646475798,
      "grad_norm": 2.890625,
      "learning_rate": 4.570602636094821e-05,
      "loss": 0.9611,
      "step": 151760
    },
    {
      "epoch": 0.5319160121544754,
      "grad_norm": 2.84375,
      "learning_rate": 4.570537733228451e-05,
      "loss": 0.8607,
      "step": 151770
    },
    {
      "epoch": 0.531951059661371,
      "grad_norm": 2.921875,
      "learning_rate": 4.57047283036208e-05,
      "loss": 0.8766,
      "step": 151780
    },
    {
      "epoch": 0.5319861071682666,
      "grad_norm": 3.265625,
      "learning_rate": 4.5704079274957104e-05,
      "loss": 1.0513,
      "step": 151790
    },
    {
      "epoch": 0.5320211546751622,
      "grad_norm": 3.28125,
      "learning_rate": 4.57034302462934e-05,
      "loss": 0.9221,
      "step": 151800
    },
    {
      "epoch": 0.5320562021820577,
      "grad_norm": 2.828125,
      "learning_rate": 4.57027812176297e-05,
      "loss": 0.9403,
      "step": 151810
    },
    {
      "epoch": 0.5320912496889534,
      "grad_norm": 2.96875,
      "learning_rate": 4.5702132188965995e-05,
      "loss": 0.9745,
      "step": 151820
    },
    {
      "epoch": 0.5321262971958489,
      "grad_norm": 2.90625,
      "learning_rate": 4.5701483160302296e-05,
      "loss": 0.9135,
      "step": 151830
    },
    {
      "epoch": 0.5321613447027446,
      "grad_norm": 3.234375,
      "learning_rate": 4.570083413163859e-05,
      "loss": 0.9404,
      "step": 151840
    },
    {
      "epoch": 0.5321963922096402,
      "grad_norm": 3.15625,
      "learning_rate": 4.570018510297489e-05,
      "loss": 0.8716,
      "step": 151850
    },
    {
      "epoch": 0.5322314397165357,
      "grad_norm": 5.40625,
      "learning_rate": 4.569953607431119e-05,
      "loss": 0.9567,
      "step": 151860
    },
    {
      "epoch": 0.5322664872234314,
      "grad_norm": 2.953125,
      "learning_rate": 4.569888704564749e-05,
      "loss": 0.8956,
      "step": 151870
    },
    {
      "epoch": 0.5323015347303269,
      "grad_norm": 3.125,
      "learning_rate": 4.569823801698378e-05,
      "loss": 0.9098,
      "step": 151880
    },
    {
      "epoch": 0.5323365822372226,
      "grad_norm": 2.859375,
      "learning_rate": 4.5697588988320084e-05,
      "loss": 0.9366,
      "step": 151890
    },
    {
      "epoch": 0.5323716297441181,
      "grad_norm": 3.125,
      "learning_rate": 4.569693995965638e-05,
      "loss": 0.9687,
      "step": 151900
    },
    {
      "epoch": 0.5324066772510138,
      "grad_norm": 3.09375,
      "learning_rate": 4.569629093099268e-05,
      "loss": 0.9507,
      "step": 151910
    },
    {
      "epoch": 0.5324417247579093,
      "grad_norm": 3.375,
      "learning_rate": 4.569564190232898e-05,
      "loss": 0.8704,
      "step": 151920
    },
    {
      "epoch": 0.5324767722648049,
      "grad_norm": 3.046875,
      "learning_rate": 4.5694992873665276e-05,
      "loss": 0.9164,
      "step": 151930
    },
    {
      "epoch": 0.5325118197717006,
      "grad_norm": 3.171875,
      "learning_rate": 4.569434384500158e-05,
      "loss": 0.8912,
      "step": 151940
    },
    {
      "epoch": 0.5325468672785961,
      "grad_norm": 3.203125,
      "learning_rate": 4.569369481633787e-05,
      "loss": 0.9145,
      "step": 151950
    },
    {
      "epoch": 0.5325819147854918,
      "grad_norm": 3.078125,
      "learning_rate": 4.5693045787674173e-05,
      "loss": 1.0072,
      "step": 151960
    },
    {
      "epoch": 0.5326169622923873,
      "grad_norm": 3.953125,
      "learning_rate": 4.569239675901047e-05,
      "loss": 1.0004,
      "step": 151970
    },
    {
      "epoch": 0.532652009799283,
      "grad_norm": 2.890625,
      "learning_rate": 4.569174773034677e-05,
      "loss": 0.9848,
      "step": 151980
    },
    {
      "epoch": 0.5326870573061785,
      "grad_norm": 3.0,
      "learning_rate": 4.5691098701683064e-05,
      "loss": 0.9394,
      "step": 151990
    },
    {
      "epoch": 0.5327221048130741,
      "grad_norm": 2.890625,
      "learning_rate": 4.569044967301936e-05,
      "loss": 1.0141,
      "step": 152000
    },
    {
      "epoch": 0.5327571523199697,
      "grad_norm": 3.015625,
      "learning_rate": 4.568980064435566e-05,
      "loss": 0.9143,
      "step": 152010
    },
    {
      "epoch": 0.5327921998268653,
      "grad_norm": 2.96875,
      "learning_rate": 4.5689151615691955e-05,
      "loss": 1.0249,
      "step": 152020
    },
    {
      "epoch": 0.532827247333761,
      "grad_norm": 2.890625,
      "learning_rate": 4.5688502587028256e-05,
      "loss": 0.8839,
      "step": 152030
    },
    {
      "epoch": 0.5328622948406565,
      "grad_norm": 3.03125,
      "learning_rate": 4.568785355836455e-05,
      "loss": 0.9242,
      "step": 152040
    },
    {
      "epoch": 0.5328973423475521,
      "grad_norm": 2.625,
      "learning_rate": 4.568720452970085e-05,
      "loss": 0.9211,
      "step": 152050
    },
    {
      "epoch": 0.5329323898544477,
      "grad_norm": 2.90625,
      "learning_rate": 4.568655550103715e-05,
      "loss": 0.9127,
      "step": 152060
    },
    {
      "epoch": 0.5329674373613433,
      "grad_norm": 2.828125,
      "learning_rate": 4.568590647237345e-05,
      "loss": 0.8576,
      "step": 152070
    },
    {
      "epoch": 0.5330024848682389,
      "grad_norm": 4.21875,
      "learning_rate": 4.568525744370974e-05,
      "loss": 0.8528,
      "step": 152080
    },
    {
      "epoch": 0.5330375323751345,
      "grad_norm": 2.859375,
      "learning_rate": 4.5684608415046044e-05,
      "loss": 0.8707,
      "step": 152090
    },
    {
      "epoch": 0.53307257988203,
      "grad_norm": 3.140625,
      "learning_rate": 4.568395938638234e-05,
      "loss": 0.8592,
      "step": 152100
    },
    {
      "epoch": 0.5331076273889257,
      "grad_norm": 3.09375,
      "learning_rate": 4.568331035771864e-05,
      "loss": 0.9257,
      "step": 152110
    },
    {
      "epoch": 0.5331426748958212,
      "grad_norm": 2.890625,
      "learning_rate": 4.5682661329054935e-05,
      "loss": 0.8745,
      "step": 152120
    },
    {
      "epoch": 0.5331777224027169,
      "grad_norm": 3.5625,
      "learning_rate": 4.5682012300391236e-05,
      "loss": 0.8955,
      "step": 152130
    },
    {
      "epoch": 0.5332127699096125,
      "grad_norm": 3.0625,
      "learning_rate": 4.568136327172754e-05,
      "loss": 1.078,
      "step": 152140
    },
    {
      "epoch": 0.5332478174165081,
      "grad_norm": 2.953125,
      "learning_rate": 4.568071424306383e-05,
      "loss": 0.8658,
      "step": 152150
    },
    {
      "epoch": 0.5332828649234037,
      "grad_norm": 2.671875,
      "learning_rate": 4.5680065214400133e-05,
      "loss": 0.8944,
      "step": 152160
    },
    {
      "epoch": 0.5333179124302992,
      "grad_norm": 3.171875,
      "learning_rate": 4.567941618573643e-05,
      "loss": 0.9717,
      "step": 152170
    },
    {
      "epoch": 0.5333529599371949,
      "grad_norm": 3.359375,
      "learning_rate": 4.567876715707273e-05,
      "loss": 0.9974,
      "step": 152180
    },
    {
      "epoch": 0.5333880074440904,
      "grad_norm": 2.765625,
      "learning_rate": 4.5678118128409024e-05,
      "loss": 0.897,
      "step": 152190
    },
    {
      "epoch": 0.5334230549509861,
      "grad_norm": 2.671875,
      "learning_rate": 4.5677469099745325e-05,
      "loss": 1.0064,
      "step": 152200
    },
    {
      "epoch": 0.5334581024578816,
      "grad_norm": 2.859375,
      "learning_rate": 4.567682007108162e-05,
      "loss": 0.8648,
      "step": 152210
    },
    {
      "epoch": 0.5334931499647773,
      "grad_norm": 2.703125,
      "learning_rate": 4.567617104241792e-05,
      "loss": 0.9376,
      "step": 152220
    },
    {
      "epoch": 0.5335281974716729,
      "grad_norm": 3.171875,
      "learning_rate": 4.5675522013754216e-05,
      "loss": 0.9608,
      "step": 152230
    },
    {
      "epoch": 0.5335632449785684,
      "grad_norm": 2.859375,
      "learning_rate": 4.567487298509052e-05,
      "loss": 0.9294,
      "step": 152240
    },
    {
      "epoch": 0.5335982924854641,
      "grad_norm": 2.984375,
      "learning_rate": 4.567422395642681e-05,
      "loss": 0.8971,
      "step": 152250
    },
    {
      "epoch": 0.5336333399923596,
      "grad_norm": 2.75,
      "learning_rate": 4.5673574927763113e-05,
      "loss": 0.9677,
      "step": 152260
    },
    {
      "epoch": 0.5336683874992553,
      "grad_norm": 3.09375,
      "learning_rate": 4.567292589909941e-05,
      "loss": 0.9117,
      "step": 152270
    },
    {
      "epoch": 0.5337034350061508,
      "grad_norm": 2.84375,
      "learning_rate": 4.567227687043571e-05,
      "loss": 0.9088,
      "step": 152280
    },
    {
      "epoch": 0.5337384825130465,
      "grad_norm": 2.75,
      "learning_rate": 4.567162784177201e-05,
      "loss": 0.847,
      "step": 152290
    },
    {
      "epoch": 0.533773530019942,
      "grad_norm": 3.015625,
      "learning_rate": 4.5670978813108305e-05,
      "loss": 0.912,
      "step": 152300
    },
    {
      "epoch": 0.5338085775268376,
      "grad_norm": 2.75,
      "learning_rate": 4.567032978444461e-05,
      "loss": 0.9243,
      "step": 152310
    },
    {
      "epoch": 0.5338436250337332,
      "grad_norm": 2.71875,
      "learning_rate": 4.56696807557809e-05,
      "loss": 0.9162,
      "step": 152320
    },
    {
      "epoch": 0.5338786725406288,
      "grad_norm": 2.78125,
      "learning_rate": 4.56690317271172e-05,
      "loss": 0.8446,
      "step": 152330
    },
    {
      "epoch": 0.5339137200475245,
      "grad_norm": 3.125,
      "learning_rate": 4.56683826984535e-05,
      "loss": 1.0518,
      "step": 152340
    },
    {
      "epoch": 0.53394876755442,
      "grad_norm": 3.3125,
      "learning_rate": 4.56677336697898e-05,
      "loss": 1.0598,
      "step": 152350
    },
    {
      "epoch": 0.5339838150613156,
      "grad_norm": 2.46875,
      "learning_rate": 4.5667084641126093e-05,
      "loss": 0.857,
      "step": 152360
    },
    {
      "epoch": 0.5340188625682112,
      "grad_norm": 3.078125,
      "learning_rate": 4.566643561246239e-05,
      "loss": 0.9773,
      "step": 152370
    },
    {
      "epoch": 0.5340539100751068,
      "grad_norm": 2.703125,
      "learning_rate": 4.566578658379869e-05,
      "loss": 0.935,
      "step": 152380
    },
    {
      "epoch": 0.5340889575820024,
      "grad_norm": 3.03125,
      "learning_rate": 4.5665137555134984e-05,
      "loss": 0.9657,
      "step": 152390
    },
    {
      "epoch": 0.534124005088898,
      "grad_norm": 3.109375,
      "learning_rate": 4.5664488526471285e-05,
      "loss": 0.9252,
      "step": 152400
    },
    {
      "epoch": 0.5341590525957935,
      "grad_norm": 2.8125,
      "learning_rate": 4.566383949780758e-05,
      "loss": 0.9131,
      "step": 152410
    },
    {
      "epoch": 0.5341941001026892,
      "grad_norm": 2.5625,
      "learning_rate": 4.566319046914388e-05,
      "loss": 0.9184,
      "step": 152420
    },
    {
      "epoch": 0.5342291476095848,
      "grad_norm": 2.46875,
      "learning_rate": 4.5662541440480176e-05,
      "loss": 0.9055,
      "step": 152430
    },
    {
      "epoch": 0.5342641951164804,
      "grad_norm": 2.875,
      "learning_rate": 4.566189241181648e-05,
      "loss": 1.0028,
      "step": 152440
    },
    {
      "epoch": 0.534299242623376,
      "grad_norm": 2.703125,
      "learning_rate": 4.566124338315277e-05,
      "loss": 0.9342,
      "step": 152450
    },
    {
      "epoch": 0.5343342901302716,
      "grad_norm": 3.21875,
      "learning_rate": 4.5660594354489073e-05,
      "loss": 0.8944,
      "step": 152460
    },
    {
      "epoch": 0.5343693376371672,
      "grad_norm": 3.671875,
      "learning_rate": 4.565994532582537e-05,
      "loss": 0.936,
      "step": 152470
    },
    {
      "epoch": 0.5344043851440627,
      "grad_norm": 3.328125,
      "learning_rate": 4.565929629716167e-05,
      "loss": 0.8835,
      "step": 152480
    },
    {
      "epoch": 0.5344394326509584,
      "grad_norm": 3.234375,
      "learning_rate": 4.5658647268497964e-05,
      "loss": 0.9327,
      "step": 152490
    },
    {
      "epoch": 0.5344744801578539,
      "grad_norm": 2.890625,
      "learning_rate": 4.5657998239834265e-05,
      "loss": 0.8801,
      "step": 152500
    },
    {
      "epoch": 0.5345095276647496,
      "grad_norm": 3.015625,
      "learning_rate": 4.565734921117057e-05,
      "loss": 0.9016,
      "step": 152510
    },
    {
      "epoch": 0.5345445751716452,
      "grad_norm": 2.828125,
      "learning_rate": 4.565670018250686e-05,
      "loss": 0.8975,
      "step": 152520
    },
    {
      "epoch": 0.5345796226785408,
      "grad_norm": 3.15625,
      "learning_rate": 4.565605115384316e-05,
      "loss": 0.9486,
      "step": 152530
    },
    {
      "epoch": 0.5346146701854364,
      "grad_norm": 2.625,
      "learning_rate": 4.565540212517946e-05,
      "loss": 0.9246,
      "step": 152540
    },
    {
      "epoch": 0.5346497176923319,
      "grad_norm": 2.953125,
      "learning_rate": 4.565475309651576e-05,
      "loss": 0.8703,
      "step": 152550
    },
    {
      "epoch": 0.5346847651992276,
      "grad_norm": 3.078125,
      "learning_rate": 4.5654104067852053e-05,
      "loss": 0.96,
      "step": 152560
    },
    {
      "epoch": 0.5347198127061231,
      "grad_norm": 3.5,
      "learning_rate": 4.5653455039188355e-05,
      "loss": 0.9357,
      "step": 152570
    },
    {
      "epoch": 0.5347548602130188,
      "grad_norm": 2.734375,
      "learning_rate": 4.565280601052465e-05,
      "loss": 0.9372,
      "step": 152580
    },
    {
      "epoch": 0.5347899077199143,
      "grad_norm": 3.09375,
      "learning_rate": 4.565215698186095e-05,
      "loss": 0.9454,
      "step": 152590
    },
    {
      "epoch": 0.53482495522681,
      "grad_norm": 3.125,
      "learning_rate": 4.5651507953197245e-05,
      "loss": 0.8606,
      "step": 152600
    },
    {
      "epoch": 0.5348600027337055,
      "grad_norm": 2.859375,
      "learning_rate": 4.565085892453355e-05,
      "loss": 0.9717,
      "step": 152610
    },
    {
      "epoch": 0.5348950502406011,
      "grad_norm": 3.1875,
      "learning_rate": 4.565020989586984e-05,
      "loss": 0.9196,
      "step": 152620
    },
    {
      "epoch": 0.5349300977474968,
      "grad_norm": 2.9375,
      "learning_rate": 4.564956086720614e-05,
      "loss": 0.9409,
      "step": 152630
    },
    {
      "epoch": 0.5349651452543923,
      "grad_norm": 2.890625,
      "learning_rate": 4.564891183854244e-05,
      "loss": 0.9669,
      "step": 152640
    },
    {
      "epoch": 0.535000192761288,
      "grad_norm": 3.09375,
      "learning_rate": 4.564826280987874e-05,
      "loss": 0.8798,
      "step": 152650
    },
    {
      "epoch": 0.5350352402681835,
      "grad_norm": 3.0,
      "learning_rate": 4.564761378121504e-05,
      "loss": 0.9032,
      "step": 152660
    },
    {
      "epoch": 0.5350702877750791,
      "grad_norm": 2.671875,
      "learning_rate": 4.5646964752551335e-05,
      "loss": 0.8873,
      "step": 152670
    },
    {
      "epoch": 0.5351053352819747,
      "grad_norm": 3.046875,
      "learning_rate": 4.5646315723887636e-05,
      "loss": 0.9188,
      "step": 152680
    },
    {
      "epoch": 0.5351403827888703,
      "grad_norm": 2.828125,
      "learning_rate": 4.564566669522393e-05,
      "loss": 0.9178,
      "step": 152690
    },
    {
      "epoch": 0.5351754302957659,
      "grad_norm": 3.125,
      "learning_rate": 4.564501766656023e-05,
      "loss": 0.9249,
      "step": 152700
    },
    {
      "epoch": 0.5352104778026615,
      "grad_norm": 2.75,
      "learning_rate": 4.564436863789653e-05,
      "loss": 0.881,
      "step": 152710
    },
    {
      "epoch": 0.5352455253095572,
      "grad_norm": 3.015625,
      "learning_rate": 4.564371960923283e-05,
      "loss": 0.8877,
      "step": 152720
    },
    {
      "epoch": 0.5352805728164527,
      "grad_norm": 3.0,
      "learning_rate": 4.564307058056912e-05,
      "loss": 0.84,
      "step": 152730
    },
    {
      "epoch": 0.5353156203233483,
      "grad_norm": 2.515625,
      "learning_rate": 4.5642421551905424e-05,
      "loss": 0.913,
      "step": 152740
    },
    {
      "epoch": 0.5353506678302439,
      "grad_norm": 3.25,
      "learning_rate": 4.564177252324172e-05,
      "loss": 0.9414,
      "step": 152750
    },
    {
      "epoch": 0.5353857153371395,
      "grad_norm": 3.046875,
      "learning_rate": 4.5641123494578013e-05,
      "loss": 0.9756,
      "step": 152760
    },
    {
      "epoch": 0.5354207628440351,
      "grad_norm": 2.421875,
      "learning_rate": 4.5640474465914315e-05,
      "loss": 1.0095,
      "step": 152770
    },
    {
      "epoch": 0.5354558103509307,
      "grad_norm": 3.0625,
      "learning_rate": 4.563982543725061e-05,
      "loss": 0.9224,
      "step": 152780
    },
    {
      "epoch": 0.5354908578578262,
      "grad_norm": 3.15625,
      "learning_rate": 4.563917640858691e-05,
      "loss": 0.889,
      "step": 152790
    },
    {
      "epoch": 0.5355259053647219,
      "grad_norm": 2.53125,
      "learning_rate": 4.5638527379923205e-05,
      "loss": 0.9813,
      "step": 152800
    },
    {
      "epoch": 0.5355609528716174,
      "grad_norm": 3.015625,
      "learning_rate": 4.563787835125951e-05,
      "loss": 0.9553,
      "step": 152810
    },
    {
      "epoch": 0.5355960003785131,
      "grad_norm": 3.109375,
      "learning_rate": 4.56372293225958e-05,
      "loss": 0.8684,
      "step": 152820
    },
    {
      "epoch": 0.5356310478854087,
      "grad_norm": 2.875,
      "learning_rate": 4.56365802939321e-05,
      "loss": 0.9471,
      "step": 152830
    },
    {
      "epoch": 0.5356660953923043,
      "grad_norm": 2.9375,
      "learning_rate": 4.56359312652684e-05,
      "loss": 0.8725,
      "step": 152840
    },
    {
      "epoch": 0.5357011428991999,
      "grad_norm": 3.4375,
      "learning_rate": 4.56352822366047e-05,
      "loss": 0.8995,
      "step": 152850
    },
    {
      "epoch": 0.5357361904060954,
      "grad_norm": 3.359375,
      "learning_rate": 4.5634633207940993e-05,
      "loss": 0.9324,
      "step": 152860
    },
    {
      "epoch": 0.5357712379129911,
      "grad_norm": 3.453125,
      "learning_rate": 4.5633984179277295e-05,
      "loss": 0.9742,
      "step": 152870
    },
    {
      "epoch": 0.5358062854198866,
      "grad_norm": 3.125,
      "learning_rate": 4.5633335150613596e-05,
      "loss": 0.9073,
      "step": 152880
    },
    {
      "epoch": 0.5358413329267823,
      "grad_norm": 3.140625,
      "learning_rate": 4.563268612194989e-05,
      "loss": 0.9274,
      "step": 152890
    },
    {
      "epoch": 0.5358763804336778,
      "grad_norm": 2.671875,
      "learning_rate": 4.563203709328619e-05,
      "loss": 0.9807,
      "step": 152900
    },
    {
      "epoch": 0.5359114279405734,
      "grad_norm": 2.484375,
      "learning_rate": 4.563138806462249e-05,
      "loss": 0.9481,
      "step": 152910
    },
    {
      "epoch": 0.5359464754474691,
      "grad_norm": 3.109375,
      "learning_rate": 4.563073903595879e-05,
      "loss": 0.9519,
      "step": 152920
    },
    {
      "epoch": 0.5359815229543646,
      "grad_norm": 3.3125,
      "learning_rate": 4.563009000729508e-05,
      "loss": 0.9113,
      "step": 152930
    },
    {
      "epoch": 0.5360165704612603,
      "grad_norm": 3.6875,
      "learning_rate": 4.5629440978631384e-05,
      "loss": 1.023,
      "step": 152940
    },
    {
      "epoch": 0.5360516179681558,
      "grad_norm": 2.984375,
      "learning_rate": 4.562879194996768e-05,
      "loss": 1.0106,
      "step": 152950
    },
    {
      "epoch": 0.5360866654750515,
      "grad_norm": 2.734375,
      "learning_rate": 4.562814292130398e-05,
      "loss": 0.9622,
      "step": 152960
    },
    {
      "epoch": 0.536121712981947,
      "grad_norm": 3.109375,
      "learning_rate": 4.5627493892640275e-05,
      "loss": 0.8838,
      "step": 152970
    },
    {
      "epoch": 0.5361567604888426,
      "grad_norm": 3.1875,
      "learning_rate": 4.5626844863976576e-05,
      "loss": 0.8518,
      "step": 152980
    },
    {
      "epoch": 0.5361918079957382,
      "grad_norm": 2.609375,
      "learning_rate": 4.562619583531287e-05,
      "loss": 1.0058,
      "step": 152990
    },
    {
      "epoch": 0.5362268555026338,
      "grad_norm": 3.046875,
      "learning_rate": 4.562554680664917e-05,
      "loss": 0.935,
      "step": 153000
    },
    {
      "epoch": 0.5362619030095295,
      "grad_norm": 3.0625,
      "learning_rate": 4.5624897777985474e-05,
      "loss": 0.9788,
      "step": 153010
    },
    {
      "epoch": 0.536296950516425,
      "grad_norm": 2.90625,
      "learning_rate": 4.562424874932177e-05,
      "loss": 0.9136,
      "step": 153020
    },
    {
      "epoch": 0.5363319980233207,
      "grad_norm": 2.828125,
      "learning_rate": 4.562359972065807e-05,
      "loss": 0.9255,
      "step": 153030
    },
    {
      "epoch": 0.5363670455302162,
      "grad_norm": 3.234375,
      "learning_rate": 4.5622950691994364e-05,
      "loss": 0.8939,
      "step": 153040
    },
    {
      "epoch": 0.5364020930371118,
      "grad_norm": 3.140625,
      "learning_rate": 4.5622301663330666e-05,
      "loss": 0.974,
      "step": 153050
    },
    {
      "epoch": 0.5364371405440074,
      "grad_norm": 3.1875,
      "learning_rate": 4.562165263466696e-05,
      "loss": 0.9508,
      "step": 153060
    },
    {
      "epoch": 0.536472188050903,
      "grad_norm": 2.796875,
      "learning_rate": 4.562100360600326e-05,
      "loss": 0.984,
      "step": 153070
    },
    {
      "epoch": 0.5365072355577986,
      "grad_norm": 2.9375,
      "learning_rate": 4.5620354577339556e-05,
      "loss": 0.8825,
      "step": 153080
    },
    {
      "epoch": 0.5365422830646942,
      "grad_norm": 2.8125,
      "learning_rate": 4.561970554867586e-05,
      "loss": 0.8982,
      "step": 153090
    },
    {
      "epoch": 0.5365773305715897,
      "grad_norm": 2.75,
      "learning_rate": 4.561905652001215e-05,
      "loss": 0.7859,
      "step": 153100
    },
    {
      "epoch": 0.5366123780784854,
      "grad_norm": 2.9375,
      "learning_rate": 4.5618407491348454e-05,
      "loss": 0.8817,
      "step": 153110
    },
    {
      "epoch": 0.536647425585381,
      "grad_norm": 2.828125,
      "learning_rate": 4.561775846268475e-05,
      "loss": 1.0152,
      "step": 153120
    },
    {
      "epoch": 0.5366824730922766,
      "grad_norm": 3.03125,
      "learning_rate": 4.561710943402104e-05,
      "loss": 0.8786,
      "step": 153130
    },
    {
      "epoch": 0.5367175205991722,
      "grad_norm": 3.109375,
      "learning_rate": 4.5616460405357344e-05,
      "loss": 0.879,
      "step": 153140
    },
    {
      "epoch": 0.5367525681060678,
      "grad_norm": 2.984375,
      "learning_rate": 4.561581137669364e-05,
      "loss": 0.8738,
      "step": 153150
    },
    {
      "epoch": 0.5367876156129634,
      "grad_norm": 3.390625,
      "learning_rate": 4.561516234802994e-05,
      "loss": 0.9885,
      "step": 153160
    },
    {
      "epoch": 0.5368226631198589,
      "grad_norm": 3.234375,
      "learning_rate": 4.5614513319366235e-05,
      "loss": 0.9963,
      "step": 153170
    },
    {
      "epoch": 0.5368577106267546,
      "grad_norm": 2.875,
      "learning_rate": 4.5613864290702536e-05,
      "loss": 0.9854,
      "step": 153180
    },
    {
      "epoch": 0.5368927581336501,
      "grad_norm": 3.515625,
      "learning_rate": 4.561321526203883e-05,
      "loss": 1.0069,
      "step": 153190
    },
    {
      "epoch": 0.5369278056405458,
      "grad_norm": 2.953125,
      "learning_rate": 4.561256623337513e-05,
      "loss": 0.9606,
      "step": 153200
    },
    {
      "epoch": 0.5369628531474414,
      "grad_norm": 2.90625,
      "learning_rate": 4.561191720471143e-05,
      "loss": 0.9372,
      "step": 153210
    },
    {
      "epoch": 0.536997900654337,
      "grad_norm": 2.59375,
      "learning_rate": 4.561126817604773e-05,
      "loss": 0.8643,
      "step": 153220
    },
    {
      "epoch": 0.5370329481612326,
      "grad_norm": 2.75,
      "learning_rate": 4.561061914738402e-05,
      "loss": 0.8947,
      "step": 153230
    },
    {
      "epoch": 0.5370679956681281,
      "grad_norm": 2.984375,
      "learning_rate": 4.5609970118720324e-05,
      "loss": 0.9244,
      "step": 153240
    },
    {
      "epoch": 0.5371030431750238,
      "grad_norm": 2.984375,
      "learning_rate": 4.5609321090056626e-05,
      "loss": 0.9623,
      "step": 153250
    },
    {
      "epoch": 0.5371380906819193,
      "grad_norm": 3.578125,
      "learning_rate": 4.560867206139292e-05,
      "loss": 1.0046,
      "step": 153260
    },
    {
      "epoch": 0.537173138188815,
      "grad_norm": 2.84375,
      "learning_rate": 4.560802303272922e-05,
      "loss": 0.8736,
      "step": 153270
    },
    {
      "epoch": 0.5372081856957105,
      "grad_norm": 2.71875,
      "learning_rate": 4.5607374004065516e-05,
      "loss": 0.9023,
      "step": 153280
    },
    {
      "epoch": 0.5372432332026061,
      "grad_norm": 2.90625,
      "learning_rate": 4.560672497540182e-05,
      "loss": 0.9314,
      "step": 153290
    },
    {
      "epoch": 0.5372782807095017,
      "grad_norm": 3.359375,
      "learning_rate": 4.560607594673811e-05,
      "loss": 0.9453,
      "step": 153300
    },
    {
      "epoch": 0.5373133282163973,
      "grad_norm": 2.828125,
      "learning_rate": 4.5605426918074414e-05,
      "loss": 0.8586,
      "step": 153310
    },
    {
      "epoch": 0.537348375723293,
      "grad_norm": 2.96875,
      "learning_rate": 4.560477788941071e-05,
      "loss": 0.8453,
      "step": 153320
    },
    {
      "epoch": 0.5373834232301885,
      "grad_norm": 3.203125,
      "learning_rate": 4.560412886074701e-05,
      "loss": 0.8593,
      "step": 153330
    },
    {
      "epoch": 0.5374184707370842,
      "grad_norm": 2.796875,
      "learning_rate": 4.5603479832083304e-05,
      "loss": 0.9127,
      "step": 153340
    },
    {
      "epoch": 0.5374535182439797,
      "grad_norm": 3.0,
      "learning_rate": 4.5602830803419606e-05,
      "loss": 0.9118,
      "step": 153350
    },
    {
      "epoch": 0.5374885657508753,
      "grad_norm": 3.015625,
      "learning_rate": 4.56021817747559e-05,
      "loss": 0.8802,
      "step": 153360
    },
    {
      "epoch": 0.5375236132577709,
      "grad_norm": 2.90625,
      "learning_rate": 4.56015327460922e-05,
      "loss": 0.9227,
      "step": 153370
    },
    {
      "epoch": 0.5375586607646665,
      "grad_norm": 3.125,
      "learning_rate": 4.56008837174285e-05,
      "loss": 0.8666,
      "step": 153380
    },
    {
      "epoch": 0.537593708271562,
      "grad_norm": 2.765625,
      "learning_rate": 4.56002346887648e-05,
      "loss": 0.8862,
      "step": 153390
    },
    {
      "epoch": 0.5376287557784577,
      "grad_norm": 2.875,
      "learning_rate": 4.55995856601011e-05,
      "loss": 0.9532,
      "step": 153400
    },
    {
      "epoch": 0.5376638032853533,
      "grad_norm": 2.671875,
      "learning_rate": 4.5598936631437394e-05,
      "loss": 0.8973,
      "step": 153410
    },
    {
      "epoch": 0.5376988507922489,
      "grad_norm": 3.140625,
      "learning_rate": 4.5598287602773695e-05,
      "loss": 0.8956,
      "step": 153420
    },
    {
      "epoch": 0.5377338982991445,
      "grad_norm": 2.984375,
      "learning_rate": 4.559763857410999e-05,
      "loss": 0.9214,
      "step": 153430
    },
    {
      "epoch": 0.5377689458060401,
      "grad_norm": 2.71875,
      "learning_rate": 4.559698954544629e-05,
      "loss": 0.9138,
      "step": 153440
    },
    {
      "epoch": 0.5378039933129357,
      "grad_norm": 2.828125,
      "learning_rate": 4.5596340516782586e-05,
      "loss": 0.9546,
      "step": 153450
    },
    {
      "epoch": 0.5378390408198312,
      "grad_norm": 3.375,
      "learning_rate": 4.559569148811889e-05,
      "loss": 0.9666,
      "step": 153460
    },
    {
      "epoch": 0.5378740883267269,
      "grad_norm": 3.3125,
      "learning_rate": 4.559504245945518e-05,
      "loss": 0.9481,
      "step": 153470
    },
    {
      "epoch": 0.5379091358336224,
      "grad_norm": 3.3125,
      "learning_rate": 4.559439343079148e-05,
      "loss": 0.9123,
      "step": 153480
    },
    {
      "epoch": 0.5379441833405181,
      "grad_norm": 3.109375,
      "learning_rate": 4.559374440212778e-05,
      "loss": 0.9332,
      "step": 153490
    },
    {
      "epoch": 0.5379792308474136,
      "grad_norm": 2.953125,
      "learning_rate": 4.559309537346407e-05,
      "loss": 0.9292,
      "step": 153500
    },
    {
      "epoch": 0.5380142783543093,
      "grad_norm": 2.75,
      "learning_rate": 4.5592446344800374e-05,
      "loss": 0.9847,
      "step": 153510
    },
    {
      "epoch": 0.5380493258612049,
      "grad_norm": 2.984375,
      "learning_rate": 4.559179731613667e-05,
      "loss": 0.921,
      "step": 153520
    },
    {
      "epoch": 0.5380843733681004,
      "grad_norm": 3.140625,
      "learning_rate": 4.559114828747297e-05,
      "loss": 0.9506,
      "step": 153530
    },
    {
      "epoch": 0.5381194208749961,
      "grad_norm": 2.75,
      "learning_rate": 4.5590499258809264e-05,
      "loss": 0.913,
      "step": 153540
    },
    {
      "epoch": 0.5381544683818916,
      "grad_norm": 3.03125,
      "learning_rate": 4.5589850230145566e-05,
      "loss": 0.9074,
      "step": 153550
    },
    {
      "epoch": 0.5381895158887873,
      "grad_norm": 3.21875,
      "learning_rate": 4.558920120148186e-05,
      "loss": 0.9416,
      "step": 153560
    },
    {
      "epoch": 0.5382245633956828,
      "grad_norm": 3.0625,
      "learning_rate": 4.558855217281816e-05,
      "loss": 0.9366,
      "step": 153570
    },
    {
      "epoch": 0.5382596109025785,
      "grad_norm": 2.8125,
      "learning_rate": 4.5587903144154456e-05,
      "loss": 0.8745,
      "step": 153580
    },
    {
      "epoch": 0.538294658409474,
      "grad_norm": 3.03125,
      "learning_rate": 4.558725411549076e-05,
      "loss": 0.869,
      "step": 153590
    },
    {
      "epoch": 0.5383297059163696,
      "grad_norm": 3.125,
      "learning_rate": 4.558660508682705e-05,
      "loss": 0.9364,
      "step": 153600
    },
    {
      "epoch": 0.5383647534232653,
      "grad_norm": 3.03125,
      "learning_rate": 4.5585956058163354e-05,
      "loss": 0.8997,
      "step": 153610
    },
    {
      "epoch": 0.5383998009301608,
      "grad_norm": 2.71875,
      "learning_rate": 4.5585307029499655e-05,
      "loss": 0.8678,
      "step": 153620
    },
    {
      "epoch": 0.5384348484370565,
      "grad_norm": 2.953125,
      "learning_rate": 4.558465800083595e-05,
      "loss": 0.9657,
      "step": 153630
    },
    {
      "epoch": 0.538469895943952,
      "grad_norm": 2.96875,
      "learning_rate": 4.558400897217225e-05,
      "loss": 0.9004,
      "step": 153640
    },
    {
      "epoch": 0.5385049434508477,
      "grad_norm": 3.328125,
      "learning_rate": 4.5583359943508546e-05,
      "loss": 0.9099,
      "step": 153650
    },
    {
      "epoch": 0.5385399909577432,
      "grad_norm": 2.78125,
      "learning_rate": 4.558271091484485e-05,
      "loss": 0.8751,
      "step": 153660
    },
    {
      "epoch": 0.5385750384646388,
      "grad_norm": 3.15625,
      "learning_rate": 4.558206188618114e-05,
      "loss": 0.9504,
      "step": 153670
    },
    {
      "epoch": 0.5386100859715344,
      "grad_norm": 2.859375,
      "learning_rate": 4.558141285751744e-05,
      "loss": 0.9373,
      "step": 153680
    },
    {
      "epoch": 0.53864513347843,
      "grad_norm": 2.734375,
      "learning_rate": 4.558076382885374e-05,
      "loss": 0.9694,
      "step": 153690
    },
    {
      "epoch": 0.5386801809853257,
      "grad_norm": 2.953125,
      "learning_rate": 4.558011480019004e-05,
      "loss": 1.0254,
      "step": 153700
    },
    {
      "epoch": 0.5387152284922212,
      "grad_norm": 2.90625,
      "learning_rate": 4.5579465771526334e-05,
      "loss": 0.9366,
      "step": 153710
    },
    {
      "epoch": 0.5387502759991168,
      "grad_norm": 2.671875,
      "learning_rate": 4.5578816742862635e-05,
      "loss": 0.8543,
      "step": 153720
    },
    {
      "epoch": 0.5387853235060124,
      "grad_norm": 2.96875,
      "learning_rate": 4.557816771419893e-05,
      "loss": 0.8913,
      "step": 153730
    },
    {
      "epoch": 0.538820371012908,
      "grad_norm": 3.171875,
      "learning_rate": 4.557751868553523e-05,
      "loss": 0.9467,
      "step": 153740
    },
    {
      "epoch": 0.5388554185198036,
      "grad_norm": 3.171875,
      "learning_rate": 4.557686965687153e-05,
      "loss": 0.936,
      "step": 153750
    },
    {
      "epoch": 0.5388904660266992,
      "grad_norm": 3.1875,
      "learning_rate": 4.557622062820783e-05,
      "loss": 0.8283,
      "step": 153760
    },
    {
      "epoch": 0.5389255135335947,
      "grad_norm": 3.25,
      "learning_rate": 4.557557159954413e-05,
      "loss": 0.9289,
      "step": 153770
    },
    {
      "epoch": 0.5389605610404904,
      "grad_norm": 2.875,
      "learning_rate": 4.557492257088042e-05,
      "loss": 0.9083,
      "step": 153780
    },
    {
      "epoch": 0.5389956085473859,
      "grad_norm": 3.765625,
      "learning_rate": 4.5574273542216724e-05,
      "loss": 0.9137,
      "step": 153790
    },
    {
      "epoch": 0.5390306560542816,
      "grad_norm": 2.875,
      "learning_rate": 4.557362451355302e-05,
      "loss": 0.8743,
      "step": 153800
    },
    {
      "epoch": 0.5390657035611772,
      "grad_norm": 2.84375,
      "learning_rate": 4.557297548488932e-05,
      "loss": 0.9855,
      "step": 153810
    },
    {
      "epoch": 0.5391007510680728,
      "grad_norm": 3.140625,
      "learning_rate": 4.5572326456225615e-05,
      "loss": 0.9626,
      "step": 153820
    },
    {
      "epoch": 0.5391357985749684,
      "grad_norm": 3.640625,
      "learning_rate": 4.5571677427561916e-05,
      "loss": 0.9546,
      "step": 153830
    },
    {
      "epoch": 0.5391708460818639,
      "grad_norm": 3.078125,
      "learning_rate": 4.557102839889821e-05,
      "loss": 1.0077,
      "step": 153840
    },
    {
      "epoch": 0.5392058935887596,
      "grad_norm": 2.765625,
      "learning_rate": 4.557037937023451e-05,
      "loss": 0.8838,
      "step": 153850
    },
    {
      "epoch": 0.5392409410956551,
      "grad_norm": 2.90625,
      "learning_rate": 4.556973034157081e-05,
      "loss": 0.9232,
      "step": 153860
    },
    {
      "epoch": 0.5392759886025508,
      "grad_norm": 3.125,
      "learning_rate": 4.556908131290711e-05,
      "loss": 0.8847,
      "step": 153870
    },
    {
      "epoch": 0.5393110361094463,
      "grad_norm": 2.6875,
      "learning_rate": 4.55684322842434e-05,
      "loss": 0.9018,
      "step": 153880
    },
    {
      "epoch": 0.539346083616342,
      "grad_norm": 2.921875,
      "learning_rate": 4.55677832555797e-05,
      "loss": 0.9458,
      "step": 153890
    },
    {
      "epoch": 0.5393811311232376,
      "grad_norm": 2.765625,
      "learning_rate": 4.5567134226916e-05,
      "loss": 0.9204,
      "step": 153900
    },
    {
      "epoch": 0.5394161786301331,
      "grad_norm": 2.640625,
      "learning_rate": 4.5566485198252294e-05,
      "loss": 0.9934,
      "step": 153910
    },
    {
      "epoch": 0.5394512261370288,
      "grad_norm": 3.171875,
      "learning_rate": 4.5565836169588595e-05,
      "loss": 0.8661,
      "step": 153920
    },
    {
      "epoch": 0.5394862736439243,
      "grad_norm": 2.875,
      "learning_rate": 4.556518714092489e-05,
      "loss": 0.9301,
      "step": 153930
    },
    {
      "epoch": 0.53952132115082,
      "grad_norm": 2.859375,
      "learning_rate": 4.556453811226119e-05,
      "loss": 0.8433,
      "step": 153940
    },
    {
      "epoch": 0.5395563686577155,
      "grad_norm": 3.1875,
      "learning_rate": 4.5563889083597486e-05,
      "loss": 0.9545,
      "step": 153950
    },
    {
      "epoch": 0.5395914161646111,
      "grad_norm": 2.875,
      "learning_rate": 4.556324005493379e-05,
      "loss": 0.9304,
      "step": 153960
    },
    {
      "epoch": 0.5396264636715067,
      "grad_norm": 2.859375,
      "learning_rate": 4.556259102627009e-05,
      "loss": 0.9014,
      "step": 153970
    },
    {
      "epoch": 0.5396615111784023,
      "grad_norm": 2.875,
      "learning_rate": 4.556194199760638e-05,
      "loss": 0.9906,
      "step": 153980
    },
    {
      "epoch": 0.5396965586852979,
      "grad_norm": 2.96875,
      "learning_rate": 4.5561292968942684e-05,
      "loss": 0.8941,
      "step": 153990
    },
    {
      "epoch": 0.5397316061921935,
      "grad_norm": 2.671875,
      "learning_rate": 4.556064394027898e-05,
      "loss": 0.9327,
      "step": 154000
    },
    {
      "epoch": 0.5397666536990892,
      "grad_norm": 2.984375,
      "learning_rate": 4.555999491161528e-05,
      "loss": 0.9317,
      "step": 154010
    },
    {
      "epoch": 0.5398017012059847,
      "grad_norm": 3.40625,
      "learning_rate": 4.5559345882951575e-05,
      "loss": 0.9007,
      "step": 154020
    },
    {
      "epoch": 0.5398367487128803,
      "grad_norm": 3.34375,
      "learning_rate": 4.5558696854287876e-05,
      "loss": 1.0291,
      "step": 154030
    },
    {
      "epoch": 0.5398717962197759,
      "grad_norm": 2.53125,
      "learning_rate": 4.555804782562417e-05,
      "loss": 0.8781,
      "step": 154040
    },
    {
      "epoch": 0.5399068437266715,
      "grad_norm": 3.046875,
      "learning_rate": 4.555739879696047e-05,
      "loss": 0.8823,
      "step": 154050
    },
    {
      "epoch": 0.5399418912335671,
      "grad_norm": 2.90625,
      "learning_rate": 4.555674976829677e-05,
      "loss": 0.8525,
      "step": 154060
    },
    {
      "epoch": 0.5399769387404627,
      "grad_norm": 2.828125,
      "learning_rate": 4.555610073963307e-05,
      "loss": 0.9757,
      "step": 154070
    },
    {
      "epoch": 0.5400119862473582,
      "grad_norm": 2.8125,
      "learning_rate": 4.555545171096936e-05,
      "loss": 0.8931,
      "step": 154080
    },
    {
      "epoch": 0.5400470337542539,
      "grad_norm": 3.0,
      "learning_rate": 4.5554802682305664e-05,
      "loss": 0.8923,
      "step": 154090
    },
    {
      "epoch": 0.5400820812611495,
      "grad_norm": 2.8125,
      "learning_rate": 4.555415365364196e-05,
      "loss": 0.8389,
      "step": 154100
    },
    {
      "epoch": 0.5401171287680451,
      "grad_norm": 3.0625,
      "learning_rate": 4.555350462497826e-05,
      "loss": 0.8962,
      "step": 154110
    },
    {
      "epoch": 0.5401521762749407,
      "grad_norm": 3.140625,
      "learning_rate": 4.555285559631456e-05,
      "loss": 0.9274,
      "step": 154120
    },
    {
      "epoch": 0.5401872237818363,
      "grad_norm": 2.859375,
      "learning_rate": 4.5552206567650856e-05,
      "loss": 0.9043,
      "step": 154130
    },
    {
      "epoch": 0.5402222712887319,
      "grad_norm": 3.015625,
      "learning_rate": 4.555155753898716e-05,
      "loss": 0.9418,
      "step": 154140
    },
    {
      "epoch": 0.5402573187956274,
      "grad_norm": 2.75,
      "learning_rate": 4.555090851032345e-05,
      "loss": 0.8899,
      "step": 154150
    },
    {
      "epoch": 0.5402923663025231,
      "grad_norm": 3.109375,
      "learning_rate": 4.5550259481659754e-05,
      "loss": 0.9701,
      "step": 154160
    },
    {
      "epoch": 0.5403274138094186,
      "grad_norm": 3.265625,
      "learning_rate": 4.554961045299605e-05,
      "loss": 1.0074,
      "step": 154170
    },
    {
      "epoch": 0.5403624613163143,
      "grad_norm": 2.84375,
      "learning_rate": 4.554896142433235e-05,
      "loss": 0.9236,
      "step": 154180
    },
    {
      "epoch": 0.5403975088232099,
      "grad_norm": 3.3125,
      "learning_rate": 4.5548312395668644e-05,
      "loss": 0.9825,
      "step": 154190
    },
    {
      "epoch": 0.5404325563301055,
      "grad_norm": 2.9375,
      "learning_rate": 4.5547663367004946e-05,
      "loss": 0.9359,
      "step": 154200
    },
    {
      "epoch": 0.5404676038370011,
      "grad_norm": 3.078125,
      "learning_rate": 4.554701433834124e-05,
      "loss": 0.9127,
      "step": 154210
    },
    {
      "epoch": 0.5405026513438966,
      "grad_norm": 2.9375,
      "learning_rate": 4.554636530967754e-05,
      "loss": 0.9448,
      "step": 154220
    },
    {
      "epoch": 0.5405376988507923,
      "grad_norm": 2.75,
      "learning_rate": 4.5545716281013836e-05,
      "loss": 0.9075,
      "step": 154230
    },
    {
      "epoch": 0.5405727463576878,
      "grad_norm": 3.015625,
      "learning_rate": 4.554506725235014e-05,
      "loss": 0.9761,
      "step": 154240
    },
    {
      "epoch": 0.5406077938645835,
      "grad_norm": 3.09375,
      "learning_rate": 4.554441822368643e-05,
      "loss": 0.9134,
      "step": 154250
    },
    {
      "epoch": 0.540642841371479,
      "grad_norm": 2.859375,
      "learning_rate": 4.554376919502273e-05,
      "loss": 0.8604,
      "step": 154260
    },
    {
      "epoch": 0.5406778888783746,
      "grad_norm": 3.296875,
      "learning_rate": 4.554312016635903e-05,
      "loss": 0.9388,
      "step": 154270
    },
    {
      "epoch": 0.5407129363852702,
      "grad_norm": 2.609375,
      "learning_rate": 4.554247113769532e-05,
      "loss": 0.9258,
      "step": 154280
    },
    {
      "epoch": 0.5407479838921658,
      "grad_norm": 2.984375,
      "learning_rate": 4.5541822109031624e-05,
      "loss": 0.921,
      "step": 154290
    },
    {
      "epoch": 0.5407830313990615,
      "grad_norm": 3.328125,
      "learning_rate": 4.554117308036792e-05,
      "loss": 0.9408,
      "step": 154300
    },
    {
      "epoch": 0.540818078905957,
      "grad_norm": 3.15625,
      "learning_rate": 4.554052405170422e-05,
      "loss": 0.9593,
      "step": 154310
    },
    {
      "epoch": 0.5408531264128527,
      "grad_norm": 2.765625,
      "learning_rate": 4.5539875023040515e-05,
      "loss": 0.9111,
      "step": 154320
    },
    {
      "epoch": 0.5408881739197482,
      "grad_norm": 3.578125,
      "learning_rate": 4.5539225994376816e-05,
      "loss": 0.9729,
      "step": 154330
    },
    {
      "epoch": 0.5409232214266438,
      "grad_norm": 3.1875,
      "learning_rate": 4.553857696571312e-05,
      "loss": 0.8522,
      "step": 154340
    },
    {
      "epoch": 0.5409582689335394,
      "grad_norm": 3.109375,
      "learning_rate": 4.553792793704941e-05,
      "loss": 0.8631,
      "step": 154350
    },
    {
      "epoch": 0.540993316440435,
      "grad_norm": 2.765625,
      "learning_rate": 4.5537278908385714e-05,
      "loss": 0.9497,
      "step": 154360
    },
    {
      "epoch": 0.5410283639473306,
      "grad_norm": 3.203125,
      "learning_rate": 4.553662987972201e-05,
      "loss": 0.8761,
      "step": 154370
    },
    {
      "epoch": 0.5410634114542262,
      "grad_norm": 2.84375,
      "learning_rate": 4.553598085105831e-05,
      "loss": 0.8098,
      "step": 154380
    },
    {
      "epoch": 0.5410984589611219,
      "grad_norm": 3.234375,
      "learning_rate": 4.5535331822394604e-05,
      "loss": 0.891,
      "step": 154390
    },
    {
      "epoch": 0.5411335064680174,
      "grad_norm": 3.296875,
      "learning_rate": 4.5534682793730906e-05,
      "loss": 0.9064,
      "step": 154400
    },
    {
      "epoch": 0.541168553974913,
      "grad_norm": 3.375,
      "learning_rate": 4.55340337650672e-05,
      "loss": 1.0192,
      "step": 154410
    },
    {
      "epoch": 0.5412036014818086,
      "grad_norm": 2.96875,
      "learning_rate": 4.55333847364035e-05,
      "loss": 0.9223,
      "step": 154420
    },
    {
      "epoch": 0.5412386489887042,
      "grad_norm": 3.125,
      "learning_rate": 4.5532735707739796e-05,
      "loss": 0.8144,
      "step": 154430
    },
    {
      "epoch": 0.5412736964955998,
      "grad_norm": 2.609375,
      "learning_rate": 4.55320866790761e-05,
      "loss": 0.9398,
      "step": 154440
    },
    {
      "epoch": 0.5413087440024954,
      "grad_norm": 3.890625,
      "learning_rate": 4.553143765041239e-05,
      "loss": 0.9557,
      "step": 154450
    },
    {
      "epoch": 0.5413437915093909,
      "grad_norm": 2.890625,
      "learning_rate": 4.5530788621748694e-05,
      "loss": 0.9985,
      "step": 154460
    },
    {
      "epoch": 0.5413788390162866,
      "grad_norm": 2.984375,
      "learning_rate": 4.553013959308499e-05,
      "loss": 0.9005,
      "step": 154470
    },
    {
      "epoch": 0.5414138865231821,
      "grad_norm": 2.53125,
      "learning_rate": 4.552949056442129e-05,
      "loss": 0.9227,
      "step": 154480
    },
    {
      "epoch": 0.5414489340300778,
      "grad_norm": 2.953125,
      "learning_rate": 4.552884153575759e-05,
      "loss": 0.9804,
      "step": 154490
    },
    {
      "epoch": 0.5414839815369734,
      "grad_norm": 3.09375,
      "learning_rate": 4.5528192507093886e-05,
      "loss": 0.9439,
      "step": 154500
    },
    {
      "epoch": 0.541519029043869,
      "grad_norm": 3.015625,
      "learning_rate": 4.552754347843019e-05,
      "loss": 0.9808,
      "step": 154510
    },
    {
      "epoch": 0.5415540765507646,
      "grad_norm": 2.671875,
      "learning_rate": 4.552689444976648e-05,
      "loss": 0.9346,
      "step": 154520
    },
    {
      "epoch": 0.5415891240576601,
      "grad_norm": 3.265625,
      "learning_rate": 4.552624542110278e-05,
      "loss": 0.8965,
      "step": 154530
    },
    {
      "epoch": 0.5416241715645558,
      "grad_norm": 3.796875,
      "learning_rate": 4.552559639243908e-05,
      "loss": 0.8476,
      "step": 154540
    },
    {
      "epoch": 0.5416592190714513,
      "grad_norm": 3.40625,
      "learning_rate": 4.552494736377538e-05,
      "loss": 0.9259,
      "step": 154550
    },
    {
      "epoch": 0.541694266578347,
      "grad_norm": 3.3125,
      "learning_rate": 4.5524298335111674e-05,
      "loss": 0.9291,
      "step": 154560
    },
    {
      "epoch": 0.5417293140852425,
      "grad_norm": 3.390625,
      "learning_rate": 4.5523649306447975e-05,
      "loss": 0.9514,
      "step": 154570
    },
    {
      "epoch": 0.5417643615921381,
      "grad_norm": 3.21875,
      "learning_rate": 4.552300027778427e-05,
      "loss": 0.935,
      "step": 154580
    },
    {
      "epoch": 0.5417994090990338,
      "grad_norm": 3.0625,
      "learning_rate": 4.552235124912057e-05,
      "loss": 0.9033,
      "step": 154590
    },
    {
      "epoch": 0.5418344566059293,
      "grad_norm": 4.875,
      "learning_rate": 4.5521702220456866e-05,
      "loss": 1.0008,
      "step": 154600
    },
    {
      "epoch": 0.541869504112825,
      "grad_norm": 2.640625,
      "learning_rate": 4.552105319179317e-05,
      "loss": 0.9646,
      "step": 154610
    },
    {
      "epoch": 0.5419045516197205,
      "grad_norm": 2.859375,
      "learning_rate": 4.552040416312947e-05,
      "loss": 0.8599,
      "step": 154620
    },
    {
      "epoch": 0.5419395991266162,
      "grad_norm": 3.203125,
      "learning_rate": 4.5519755134465756e-05,
      "loss": 0.9125,
      "step": 154630
    },
    {
      "epoch": 0.5419746466335117,
      "grad_norm": 3.109375,
      "learning_rate": 4.551910610580206e-05,
      "loss": 0.9053,
      "step": 154640
    },
    {
      "epoch": 0.5420096941404073,
      "grad_norm": 3.15625,
      "learning_rate": 4.551845707713835e-05,
      "loss": 0.9217,
      "step": 154650
    },
    {
      "epoch": 0.5420447416473029,
      "grad_norm": 3.265625,
      "learning_rate": 4.5517808048474654e-05,
      "loss": 0.8936,
      "step": 154660
    },
    {
      "epoch": 0.5420797891541985,
      "grad_norm": 2.984375,
      "learning_rate": 4.551715901981095e-05,
      "loss": 0.8895,
      "step": 154670
    },
    {
      "epoch": 0.5421148366610942,
      "grad_norm": 3.3125,
      "learning_rate": 4.551650999114725e-05,
      "loss": 0.9241,
      "step": 154680
    },
    {
      "epoch": 0.5421498841679897,
      "grad_norm": 2.828125,
      "learning_rate": 4.5515860962483544e-05,
      "loss": 0.9184,
      "step": 154690
    },
    {
      "epoch": 0.5421849316748854,
      "grad_norm": 2.796875,
      "learning_rate": 4.5515211933819846e-05,
      "loss": 0.8523,
      "step": 154700
    },
    {
      "epoch": 0.5422199791817809,
      "grad_norm": 2.625,
      "learning_rate": 4.551456290515615e-05,
      "loss": 0.9063,
      "step": 154710
    },
    {
      "epoch": 0.5422550266886765,
      "grad_norm": 2.875,
      "learning_rate": 4.551391387649244e-05,
      "loss": 0.9498,
      "step": 154720
    },
    {
      "epoch": 0.5422900741955721,
      "grad_norm": 3.0625,
      "learning_rate": 4.551326484782874e-05,
      "loss": 0.9258,
      "step": 154730
    },
    {
      "epoch": 0.5423251217024677,
      "grad_norm": 3.203125,
      "learning_rate": 4.551261581916504e-05,
      "loss": 0.9064,
      "step": 154740
    },
    {
      "epoch": 0.5423601692093633,
      "grad_norm": 2.984375,
      "learning_rate": 4.551196679050134e-05,
      "loss": 0.944,
      "step": 154750
    },
    {
      "epoch": 0.5423952167162589,
      "grad_norm": 3.234375,
      "learning_rate": 4.5511317761837634e-05,
      "loss": 0.9217,
      "step": 154760
    },
    {
      "epoch": 0.5424302642231544,
      "grad_norm": 2.828125,
      "learning_rate": 4.5510668733173935e-05,
      "loss": 0.9174,
      "step": 154770
    },
    {
      "epoch": 0.5424653117300501,
      "grad_norm": 3.03125,
      "learning_rate": 4.551001970451023e-05,
      "loss": 0.8994,
      "step": 154780
    },
    {
      "epoch": 0.5425003592369457,
      "grad_norm": 3.1875,
      "learning_rate": 4.550937067584653e-05,
      "loss": 0.9573,
      "step": 154790
    },
    {
      "epoch": 0.5425354067438413,
      "grad_norm": 3.125,
      "learning_rate": 4.5508721647182826e-05,
      "loss": 0.9928,
      "step": 154800
    },
    {
      "epoch": 0.5425704542507369,
      "grad_norm": 3.078125,
      "learning_rate": 4.550807261851913e-05,
      "loss": 0.9192,
      "step": 154810
    },
    {
      "epoch": 0.5426055017576324,
      "grad_norm": 2.96875,
      "learning_rate": 4.550742358985542e-05,
      "loss": 0.9451,
      "step": 154820
    },
    {
      "epoch": 0.5426405492645281,
      "grad_norm": 3.234375,
      "learning_rate": 4.550677456119172e-05,
      "loss": 0.8786,
      "step": 154830
    },
    {
      "epoch": 0.5426755967714236,
      "grad_norm": 2.90625,
      "learning_rate": 4.550612553252802e-05,
      "loss": 0.881,
      "step": 154840
    },
    {
      "epoch": 0.5427106442783193,
      "grad_norm": 2.8125,
      "learning_rate": 4.550547650386432e-05,
      "loss": 0.8852,
      "step": 154850
    },
    {
      "epoch": 0.5427456917852148,
      "grad_norm": 2.515625,
      "learning_rate": 4.550482747520062e-05,
      "loss": 0.914,
      "step": 154860
    },
    {
      "epoch": 0.5427807392921105,
      "grad_norm": 3.09375,
      "learning_rate": 4.5504178446536915e-05,
      "loss": 0.9118,
      "step": 154870
    },
    {
      "epoch": 0.5428157867990061,
      "grad_norm": 2.78125,
      "learning_rate": 4.5503529417873217e-05,
      "loss": 0.9115,
      "step": 154880
    },
    {
      "epoch": 0.5428508343059016,
      "grad_norm": 3.09375,
      "learning_rate": 4.550288038920951e-05,
      "loss": 0.9848,
      "step": 154890
    },
    {
      "epoch": 0.5428858818127973,
      "grad_norm": 2.9375,
      "learning_rate": 4.550223136054581e-05,
      "loss": 0.8883,
      "step": 154900
    },
    {
      "epoch": 0.5429209293196928,
      "grad_norm": 2.890625,
      "learning_rate": 4.550158233188211e-05,
      "loss": 0.919,
      "step": 154910
    },
    {
      "epoch": 0.5429559768265885,
      "grad_norm": 3.0,
      "learning_rate": 4.550093330321841e-05,
      "loss": 0.9649,
      "step": 154920
    },
    {
      "epoch": 0.542991024333484,
      "grad_norm": 3.078125,
      "learning_rate": 4.55002842745547e-05,
      "loss": 0.9996,
      "step": 154930
    },
    {
      "epoch": 0.5430260718403797,
      "grad_norm": 2.40625,
      "learning_rate": 4.5499635245891005e-05,
      "loss": 0.9032,
      "step": 154940
    },
    {
      "epoch": 0.5430611193472752,
      "grad_norm": 3.078125,
      "learning_rate": 4.54989862172273e-05,
      "loss": 0.9597,
      "step": 154950
    },
    {
      "epoch": 0.5430961668541708,
      "grad_norm": 3.265625,
      "learning_rate": 4.54983371885636e-05,
      "loss": 0.8846,
      "step": 154960
    },
    {
      "epoch": 0.5431312143610664,
      "grad_norm": 2.984375,
      "learning_rate": 4.5497688159899895e-05,
      "loss": 0.9766,
      "step": 154970
    },
    {
      "epoch": 0.543166261867962,
      "grad_norm": 2.84375,
      "learning_rate": 4.5497039131236197e-05,
      "loss": 0.9177,
      "step": 154980
    },
    {
      "epoch": 0.5432013093748577,
      "grad_norm": 3.171875,
      "learning_rate": 4.54963901025725e-05,
      "loss": 1.0278,
      "step": 154990
    },
    {
      "epoch": 0.5432363568817532,
      "grad_norm": 3.03125,
      "learning_rate": 4.549574107390879e-05,
      "loss": 0.8986,
      "step": 155000
    },
    {
      "epoch": 0.5432363568817532,
      "eval_loss": 0.8659214377403259,
      "eval_runtime": 553.1592,
      "eval_samples_per_second": 687.751,
      "eval_steps_per_second": 57.313,
      "step": 155000
    },
    {
      "epoch": 0.5432714043886488,
      "grad_norm": 3.3125,
      "learning_rate": 4.549509204524509e-05,
      "loss": 0.9806,
      "step": 155010
    },
    {
      "epoch": 0.5433064518955444,
      "grad_norm": 3.015625,
      "learning_rate": 4.549444301658138e-05,
      "loss": 0.9137,
      "step": 155020
    },
    {
      "epoch": 0.54334149940244,
      "grad_norm": 2.921875,
      "learning_rate": 4.549379398791768e-05,
      "loss": 0.9197,
      "step": 155030
    },
    {
      "epoch": 0.5433765469093356,
      "grad_norm": 2.9375,
      "learning_rate": 4.549314495925398e-05,
      "loss": 0.9443,
      "step": 155040
    },
    {
      "epoch": 0.5434115944162312,
      "grad_norm": 2.71875,
      "learning_rate": 4.549249593059028e-05,
      "loss": 0.8845,
      "step": 155050
    },
    {
      "epoch": 0.5434466419231267,
      "grad_norm": 3.0,
      "learning_rate": 4.5491846901926574e-05,
      "loss": 0.8673,
      "step": 155060
    },
    {
      "epoch": 0.5434816894300224,
      "grad_norm": 3.109375,
      "learning_rate": 4.5491197873262875e-05,
      "loss": 0.9997,
      "step": 155070
    },
    {
      "epoch": 0.543516736936918,
      "grad_norm": 3.015625,
      "learning_rate": 4.5490548844599177e-05,
      "loss": 0.9618,
      "step": 155080
    },
    {
      "epoch": 0.5435517844438136,
      "grad_norm": 2.84375,
      "learning_rate": 4.548989981593547e-05,
      "loss": 0.9044,
      "step": 155090
    },
    {
      "epoch": 0.5435868319507092,
      "grad_norm": 2.875,
      "learning_rate": 4.548925078727177e-05,
      "loss": 0.9555,
      "step": 155100
    },
    {
      "epoch": 0.5436218794576048,
      "grad_norm": 2.578125,
      "learning_rate": 4.548860175860807e-05,
      "loss": 0.9459,
      "step": 155110
    },
    {
      "epoch": 0.5436569269645004,
      "grad_norm": 3.40625,
      "learning_rate": 4.548795272994437e-05,
      "loss": 0.9433,
      "step": 155120
    },
    {
      "epoch": 0.543691974471396,
      "grad_norm": 3.59375,
      "learning_rate": 4.548730370128066e-05,
      "loss": 0.8943,
      "step": 155130
    },
    {
      "epoch": 0.5437270219782916,
      "grad_norm": 3.0,
      "learning_rate": 4.5486654672616965e-05,
      "loss": 0.8937,
      "step": 155140
    },
    {
      "epoch": 0.5437620694851871,
      "grad_norm": 2.796875,
      "learning_rate": 4.548600564395326e-05,
      "loss": 0.9063,
      "step": 155150
    },
    {
      "epoch": 0.5437971169920828,
      "grad_norm": 3.03125,
      "learning_rate": 4.548535661528956e-05,
      "loss": 0.8461,
      "step": 155160
    },
    {
      "epoch": 0.5438321644989783,
      "grad_norm": 3.171875,
      "learning_rate": 4.5484707586625855e-05,
      "loss": 0.9874,
      "step": 155170
    },
    {
      "epoch": 0.543867212005874,
      "grad_norm": 3.03125,
      "learning_rate": 4.5484058557962157e-05,
      "loss": 0.934,
      "step": 155180
    },
    {
      "epoch": 0.5439022595127696,
      "grad_norm": 3.265625,
      "learning_rate": 4.548340952929845e-05,
      "loss": 1.0109,
      "step": 155190
    },
    {
      "epoch": 0.5439373070196651,
      "grad_norm": 2.734375,
      "learning_rate": 4.548276050063475e-05,
      "loss": 0.903,
      "step": 155200
    },
    {
      "epoch": 0.5439723545265608,
      "grad_norm": 3.0,
      "learning_rate": 4.5482111471971054e-05,
      "loss": 0.9139,
      "step": 155210
    },
    {
      "epoch": 0.5440074020334563,
      "grad_norm": 2.71875,
      "learning_rate": 4.548146244330735e-05,
      "loss": 0.9058,
      "step": 155220
    },
    {
      "epoch": 0.544042449540352,
      "grad_norm": 3.15625,
      "learning_rate": 4.548081341464365e-05,
      "loss": 0.9393,
      "step": 155230
    },
    {
      "epoch": 0.5440774970472475,
      "grad_norm": 2.796875,
      "learning_rate": 4.5480164385979945e-05,
      "loss": 0.898,
      "step": 155240
    },
    {
      "epoch": 0.5441125445541432,
      "grad_norm": 2.546875,
      "learning_rate": 4.5479515357316246e-05,
      "loss": 0.8891,
      "step": 155250
    },
    {
      "epoch": 0.5441475920610387,
      "grad_norm": 3.078125,
      "learning_rate": 4.547886632865254e-05,
      "loss": 0.8537,
      "step": 155260
    },
    {
      "epoch": 0.5441826395679343,
      "grad_norm": 2.6875,
      "learning_rate": 4.547821729998884e-05,
      "loss": 0.9746,
      "step": 155270
    },
    {
      "epoch": 0.54421768707483,
      "grad_norm": 3.421875,
      "learning_rate": 4.5477568271325137e-05,
      "loss": 0.9073,
      "step": 155280
    },
    {
      "epoch": 0.5442527345817255,
      "grad_norm": 2.5625,
      "learning_rate": 4.547691924266144e-05,
      "loss": 0.8669,
      "step": 155290
    },
    {
      "epoch": 0.5442877820886212,
      "grad_norm": 2.859375,
      "learning_rate": 4.547627021399773e-05,
      "loss": 0.9311,
      "step": 155300
    },
    {
      "epoch": 0.5443228295955167,
      "grad_norm": 2.65625,
      "learning_rate": 4.5475621185334034e-05,
      "loss": 0.8497,
      "step": 155310
    },
    {
      "epoch": 0.5443578771024123,
      "grad_norm": 3.28125,
      "learning_rate": 4.547497215667033e-05,
      "loss": 0.96,
      "step": 155320
    },
    {
      "epoch": 0.5443929246093079,
      "grad_norm": 3.25,
      "learning_rate": 4.547432312800663e-05,
      "loss": 0.9527,
      "step": 155330
    },
    {
      "epoch": 0.5444279721162035,
      "grad_norm": 2.796875,
      "learning_rate": 4.5473674099342925e-05,
      "loss": 0.9891,
      "step": 155340
    },
    {
      "epoch": 0.5444630196230991,
      "grad_norm": 2.28125,
      "learning_rate": 4.5473025070679226e-05,
      "loss": 0.8514,
      "step": 155350
    },
    {
      "epoch": 0.5444980671299947,
      "grad_norm": 3.078125,
      "learning_rate": 4.547237604201553e-05,
      "loss": 0.981,
      "step": 155360
    },
    {
      "epoch": 0.5445331146368904,
      "grad_norm": 3.0625,
      "learning_rate": 4.547172701335182e-05,
      "loss": 0.9019,
      "step": 155370
    },
    {
      "epoch": 0.5445681621437859,
      "grad_norm": 2.9375,
      "learning_rate": 4.5471077984688117e-05,
      "loss": 0.9406,
      "step": 155380
    },
    {
      "epoch": 0.5446032096506815,
      "grad_norm": 2.90625,
      "learning_rate": 4.547042895602441e-05,
      "loss": 0.913,
      "step": 155390
    },
    {
      "epoch": 0.5446382571575771,
      "grad_norm": 3.015625,
      "learning_rate": 4.546977992736071e-05,
      "loss": 0.9635,
      "step": 155400
    },
    {
      "epoch": 0.5446733046644727,
      "grad_norm": 2.71875,
      "learning_rate": 4.546913089869701e-05,
      "loss": 0.9284,
      "step": 155410
    },
    {
      "epoch": 0.5447083521713683,
      "grad_norm": 2.6875,
      "learning_rate": 4.546848187003331e-05,
      "loss": 0.9173,
      "step": 155420
    },
    {
      "epoch": 0.5447433996782639,
      "grad_norm": 3.5625,
      "learning_rate": 4.54678328413696e-05,
      "loss": 0.9469,
      "step": 155430
    },
    {
      "epoch": 0.5447784471851594,
      "grad_norm": 2.75,
      "learning_rate": 4.5467183812705905e-05,
      "loss": 0.9529,
      "step": 155440
    },
    {
      "epoch": 0.5448134946920551,
      "grad_norm": 2.421875,
      "learning_rate": 4.5466534784042206e-05,
      "loss": 0.9299,
      "step": 155450
    },
    {
      "epoch": 0.5448485421989506,
      "grad_norm": 4.875,
      "learning_rate": 4.54658857553785e-05,
      "loss": 0.9548,
      "step": 155460
    },
    {
      "epoch": 0.5448835897058463,
      "grad_norm": 3.390625,
      "learning_rate": 4.54652367267148e-05,
      "loss": 0.9221,
      "step": 155470
    },
    {
      "epoch": 0.5449186372127419,
      "grad_norm": 3.1875,
      "learning_rate": 4.5464587698051097e-05,
      "loss": 0.8968,
      "step": 155480
    },
    {
      "epoch": 0.5449536847196375,
      "grad_norm": 3.015625,
      "learning_rate": 4.54639386693874e-05,
      "loss": 0.9407,
      "step": 155490
    },
    {
      "epoch": 0.5449887322265331,
      "grad_norm": 3.125,
      "learning_rate": 4.546328964072369e-05,
      "loss": 0.8438,
      "step": 155500
    },
    {
      "epoch": 0.5450237797334286,
      "grad_norm": 2.953125,
      "learning_rate": 4.5462640612059994e-05,
      "loss": 0.9028,
      "step": 155510
    },
    {
      "epoch": 0.5450588272403243,
      "grad_norm": 2.734375,
      "learning_rate": 4.546199158339629e-05,
      "loss": 0.8429,
      "step": 155520
    },
    {
      "epoch": 0.5450938747472198,
      "grad_norm": 2.921875,
      "learning_rate": 4.546134255473259e-05,
      "loss": 0.9467,
      "step": 155530
    },
    {
      "epoch": 0.5451289222541155,
      "grad_norm": 2.765625,
      "learning_rate": 4.5460693526068885e-05,
      "loss": 0.9149,
      "step": 155540
    },
    {
      "epoch": 0.545163969761011,
      "grad_norm": 2.78125,
      "learning_rate": 4.5460044497405186e-05,
      "loss": 0.9311,
      "step": 155550
    },
    {
      "epoch": 0.5451990172679066,
      "grad_norm": 2.765625,
      "learning_rate": 4.545939546874148e-05,
      "loss": 0.8729,
      "step": 155560
    },
    {
      "epoch": 0.5452340647748023,
      "grad_norm": 3.140625,
      "learning_rate": 4.545874644007778e-05,
      "loss": 0.9775,
      "step": 155570
    },
    {
      "epoch": 0.5452691122816978,
      "grad_norm": 2.96875,
      "learning_rate": 4.545809741141408e-05,
      "loss": 0.9329,
      "step": 155580
    },
    {
      "epoch": 0.5453041597885935,
      "grad_norm": 2.953125,
      "learning_rate": 4.545744838275038e-05,
      "loss": 0.8847,
      "step": 155590
    },
    {
      "epoch": 0.545339207295489,
      "grad_norm": 3.5625,
      "learning_rate": 4.545679935408668e-05,
      "loss": 0.9878,
      "step": 155600
    },
    {
      "epoch": 0.5453742548023847,
      "grad_norm": 3.09375,
      "learning_rate": 4.5456150325422974e-05,
      "loss": 0.8762,
      "step": 155610
    },
    {
      "epoch": 0.5454093023092802,
      "grad_norm": 3.109375,
      "learning_rate": 4.5455501296759275e-05,
      "loss": 0.8121,
      "step": 155620
    },
    {
      "epoch": 0.5454443498161758,
      "grad_norm": 2.390625,
      "learning_rate": 4.545485226809557e-05,
      "loss": 0.868,
      "step": 155630
    },
    {
      "epoch": 0.5454793973230714,
      "grad_norm": 2.8125,
      "learning_rate": 4.545420323943187e-05,
      "loss": 0.8723,
      "step": 155640
    },
    {
      "epoch": 0.545514444829967,
      "grad_norm": 3.234375,
      "learning_rate": 4.5453554210768166e-05,
      "loss": 0.9755,
      "step": 155650
    },
    {
      "epoch": 0.5455494923368626,
      "grad_norm": 2.90625,
      "learning_rate": 4.545290518210447e-05,
      "loss": 0.863,
      "step": 155660
    },
    {
      "epoch": 0.5455845398437582,
      "grad_norm": 2.71875,
      "learning_rate": 4.545225615344076e-05,
      "loss": 0.8866,
      "step": 155670
    },
    {
      "epoch": 0.5456195873506539,
      "grad_norm": 2.859375,
      "learning_rate": 4.545160712477706e-05,
      "loss": 0.8485,
      "step": 155680
    },
    {
      "epoch": 0.5456546348575494,
      "grad_norm": 3.0625,
      "learning_rate": 4.545095809611336e-05,
      "loss": 0.9265,
      "step": 155690
    },
    {
      "epoch": 0.545689682364445,
      "grad_norm": 3.140625,
      "learning_rate": 4.545030906744966e-05,
      "loss": 0.9678,
      "step": 155700
    },
    {
      "epoch": 0.5457247298713406,
      "grad_norm": 2.90625,
      "learning_rate": 4.5449660038785954e-05,
      "loss": 1.005,
      "step": 155710
    },
    {
      "epoch": 0.5457597773782362,
      "grad_norm": 3.125,
      "learning_rate": 4.5449011010122255e-05,
      "loss": 0.9653,
      "step": 155720
    },
    {
      "epoch": 0.5457948248851318,
      "grad_norm": 2.890625,
      "learning_rate": 4.544836198145856e-05,
      "loss": 0.9931,
      "step": 155730
    },
    {
      "epoch": 0.5458298723920274,
      "grad_norm": 2.40625,
      "learning_rate": 4.544771295279485e-05,
      "loss": 0.8786,
      "step": 155740
    },
    {
      "epoch": 0.5458649198989229,
      "grad_norm": 3.375,
      "learning_rate": 4.544706392413115e-05,
      "loss": 0.85,
      "step": 155750
    },
    {
      "epoch": 0.5458999674058186,
      "grad_norm": 2.9375,
      "learning_rate": 4.544641489546744e-05,
      "loss": 0.9432,
      "step": 155760
    },
    {
      "epoch": 0.5459350149127142,
      "grad_norm": 2.640625,
      "learning_rate": 4.544576586680374e-05,
      "loss": 0.8905,
      "step": 155770
    },
    {
      "epoch": 0.5459700624196098,
      "grad_norm": 3.46875,
      "learning_rate": 4.5445116838140037e-05,
      "loss": 0.952,
      "step": 155780
    },
    {
      "epoch": 0.5460051099265054,
      "grad_norm": 2.96875,
      "learning_rate": 4.544446780947634e-05,
      "loss": 0.8339,
      "step": 155790
    },
    {
      "epoch": 0.546040157433401,
      "grad_norm": 3.140625,
      "learning_rate": 4.544381878081263e-05,
      "loss": 0.8948,
      "step": 155800
    },
    {
      "epoch": 0.5460752049402966,
      "grad_norm": 2.75,
      "learning_rate": 4.5443169752148934e-05,
      "loss": 0.9655,
      "step": 155810
    },
    {
      "epoch": 0.5461102524471921,
      "grad_norm": 2.765625,
      "learning_rate": 4.5442520723485235e-05,
      "loss": 0.8154,
      "step": 155820
    },
    {
      "epoch": 0.5461452999540878,
      "grad_norm": 2.9375,
      "learning_rate": 4.544187169482153e-05,
      "loss": 0.9513,
      "step": 155830
    },
    {
      "epoch": 0.5461803474609833,
      "grad_norm": 3.203125,
      "learning_rate": 4.544122266615783e-05,
      "loss": 0.874,
      "step": 155840
    },
    {
      "epoch": 0.546215394967879,
      "grad_norm": 3.0625,
      "learning_rate": 4.5440573637494126e-05,
      "loss": 0.9457,
      "step": 155850
    },
    {
      "epoch": 0.5462504424747746,
      "grad_norm": 2.796875,
      "learning_rate": 4.543992460883043e-05,
      "loss": 0.9546,
      "step": 155860
    },
    {
      "epoch": 0.5462854899816701,
      "grad_norm": 3.03125,
      "learning_rate": 4.543927558016672e-05,
      "loss": 0.9742,
      "step": 155870
    },
    {
      "epoch": 0.5463205374885658,
      "grad_norm": 3.296875,
      "learning_rate": 4.543862655150302e-05,
      "loss": 0.9381,
      "step": 155880
    },
    {
      "epoch": 0.5463555849954613,
      "grad_norm": 3.234375,
      "learning_rate": 4.543797752283932e-05,
      "loss": 0.9673,
      "step": 155890
    },
    {
      "epoch": 0.546390632502357,
      "grad_norm": 3.296875,
      "learning_rate": 4.543732849417562e-05,
      "loss": 0.8284,
      "step": 155900
    },
    {
      "epoch": 0.5464256800092525,
      "grad_norm": 2.890625,
      "learning_rate": 4.5436679465511914e-05,
      "loss": 0.908,
      "step": 155910
    },
    {
      "epoch": 0.5464607275161482,
      "grad_norm": 3.28125,
      "learning_rate": 4.5436030436848215e-05,
      "loss": 0.8576,
      "step": 155920
    },
    {
      "epoch": 0.5464957750230437,
      "grad_norm": 2.796875,
      "learning_rate": 4.543538140818451e-05,
      "loss": 0.9138,
      "step": 155930
    },
    {
      "epoch": 0.5465308225299393,
      "grad_norm": 2.65625,
      "learning_rate": 4.543473237952081e-05,
      "loss": 0.9046,
      "step": 155940
    },
    {
      "epoch": 0.5465658700368349,
      "grad_norm": 2.703125,
      "learning_rate": 4.543408335085711e-05,
      "loss": 1.0585,
      "step": 155950
    },
    {
      "epoch": 0.5466009175437305,
      "grad_norm": 3.515625,
      "learning_rate": 4.543343432219341e-05,
      "loss": 0.9115,
      "step": 155960
    },
    {
      "epoch": 0.5466359650506262,
      "grad_norm": 3.0,
      "learning_rate": 4.543278529352971e-05,
      "loss": 0.9973,
      "step": 155970
    },
    {
      "epoch": 0.5466710125575217,
      "grad_norm": 3.46875,
      "learning_rate": 4.5432136264866e-05,
      "loss": 0.8734,
      "step": 155980
    },
    {
      "epoch": 0.5467060600644174,
      "grad_norm": 3.015625,
      "learning_rate": 4.5431487236202305e-05,
      "loss": 0.8995,
      "step": 155990
    },
    {
      "epoch": 0.5467411075713129,
      "grad_norm": 3.078125,
      "learning_rate": 4.54308382075386e-05,
      "loss": 0.8576,
      "step": 156000
    },
    {
      "epoch": 0.5467761550782085,
      "grad_norm": 3.1875,
      "learning_rate": 4.54301891788749e-05,
      "loss": 0.8379,
      "step": 156010
    },
    {
      "epoch": 0.5468112025851041,
      "grad_norm": 3.140625,
      "learning_rate": 4.5429540150211195e-05,
      "loss": 0.9814,
      "step": 156020
    },
    {
      "epoch": 0.5468462500919997,
      "grad_norm": 3.296875,
      "learning_rate": 4.54288911215475e-05,
      "loss": 0.9873,
      "step": 156030
    },
    {
      "epoch": 0.5468812975988953,
      "grad_norm": 2.875,
      "learning_rate": 4.542824209288379e-05,
      "loss": 0.9578,
      "step": 156040
    },
    {
      "epoch": 0.5469163451057909,
      "grad_norm": 3.140625,
      "learning_rate": 4.542759306422009e-05,
      "loss": 0.9307,
      "step": 156050
    },
    {
      "epoch": 0.5469513926126865,
      "grad_norm": 2.859375,
      "learning_rate": 4.542694403555639e-05,
      "loss": 0.8722,
      "step": 156060
    },
    {
      "epoch": 0.5469864401195821,
      "grad_norm": 3.0,
      "learning_rate": 4.542629500689269e-05,
      "loss": 0.941,
      "step": 156070
    },
    {
      "epoch": 0.5470214876264777,
      "grad_norm": 2.6875,
      "learning_rate": 4.542564597822899e-05,
      "loss": 1.0185,
      "step": 156080
    },
    {
      "epoch": 0.5470565351333733,
      "grad_norm": 2.6875,
      "learning_rate": 4.5424996949565285e-05,
      "loss": 0.9524,
      "step": 156090
    },
    {
      "epoch": 0.5470915826402689,
      "grad_norm": 2.75,
      "learning_rate": 4.5424347920901586e-05,
      "loss": 0.956,
      "step": 156100
    },
    {
      "epoch": 0.5471266301471644,
      "grad_norm": 3.453125,
      "learning_rate": 4.542369889223788e-05,
      "loss": 0.9237,
      "step": 156110
    },
    {
      "epoch": 0.5471616776540601,
      "grad_norm": 2.71875,
      "learning_rate": 4.542304986357418e-05,
      "loss": 0.9412,
      "step": 156120
    },
    {
      "epoch": 0.5471967251609556,
      "grad_norm": 2.890625,
      "learning_rate": 4.542240083491048e-05,
      "loss": 0.9012,
      "step": 156130
    },
    {
      "epoch": 0.5472317726678513,
      "grad_norm": 2.59375,
      "learning_rate": 4.542175180624677e-05,
      "loss": 0.7929,
      "step": 156140
    },
    {
      "epoch": 0.5472668201747468,
      "grad_norm": 2.703125,
      "learning_rate": 4.5421102777583066e-05,
      "loss": 0.8821,
      "step": 156150
    },
    {
      "epoch": 0.5473018676816425,
      "grad_norm": 3.25,
      "learning_rate": 4.542045374891937e-05,
      "loss": 0.9177,
      "step": 156160
    },
    {
      "epoch": 0.5473369151885381,
      "grad_norm": 3.21875,
      "learning_rate": 4.541980472025567e-05,
      "loss": 0.9422,
      "step": 156170
    },
    {
      "epoch": 0.5473719626954336,
      "grad_norm": 3.171875,
      "learning_rate": 4.541915569159196e-05,
      "loss": 0.9564,
      "step": 156180
    },
    {
      "epoch": 0.5474070102023293,
      "grad_norm": 3.203125,
      "learning_rate": 4.5418506662928265e-05,
      "loss": 0.9737,
      "step": 156190
    },
    {
      "epoch": 0.5474420577092248,
      "grad_norm": 2.859375,
      "learning_rate": 4.541785763426456e-05,
      "loss": 0.8286,
      "step": 156200
    },
    {
      "epoch": 0.5474771052161205,
      "grad_norm": 3.28125,
      "learning_rate": 4.541720860560086e-05,
      "loss": 0.8816,
      "step": 156210
    },
    {
      "epoch": 0.547512152723016,
      "grad_norm": 3.328125,
      "learning_rate": 4.5416559576937155e-05,
      "loss": 0.946,
      "step": 156220
    },
    {
      "epoch": 0.5475472002299117,
      "grad_norm": 2.875,
      "learning_rate": 4.541591054827346e-05,
      "loss": 0.9094,
      "step": 156230
    },
    {
      "epoch": 0.5475822477368072,
      "grad_norm": 3.140625,
      "learning_rate": 4.541526151960975e-05,
      "loss": 0.9802,
      "step": 156240
    },
    {
      "epoch": 0.5476172952437028,
      "grad_norm": 2.859375,
      "learning_rate": 4.541461249094605e-05,
      "loss": 0.9941,
      "step": 156250
    },
    {
      "epoch": 0.5476523427505985,
      "grad_norm": 2.984375,
      "learning_rate": 4.541396346228235e-05,
      "loss": 0.9018,
      "step": 156260
    },
    {
      "epoch": 0.547687390257494,
      "grad_norm": 3.09375,
      "learning_rate": 4.541331443361865e-05,
      "loss": 0.9743,
      "step": 156270
    },
    {
      "epoch": 0.5477224377643897,
      "grad_norm": 3.015625,
      "learning_rate": 4.541266540495494e-05,
      "loss": 0.9103,
      "step": 156280
    },
    {
      "epoch": 0.5477574852712852,
      "grad_norm": 3.078125,
      "learning_rate": 4.5412016376291245e-05,
      "loss": 1.0441,
      "step": 156290
    },
    {
      "epoch": 0.5477925327781809,
      "grad_norm": 3.109375,
      "learning_rate": 4.541136734762754e-05,
      "loss": 0.8986,
      "step": 156300
    },
    {
      "epoch": 0.5478275802850764,
      "grad_norm": 2.703125,
      "learning_rate": 4.541071831896384e-05,
      "loss": 0.9037,
      "step": 156310
    },
    {
      "epoch": 0.547862627791972,
      "grad_norm": 3.390625,
      "learning_rate": 4.541006929030014e-05,
      "loss": 0.9222,
      "step": 156320
    },
    {
      "epoch": 0.5478976752988676,
      "grad_norm": 2.328125,
      "learning_rate": 4.540942026163644e-05,
      "loss": 0.8758,
      "step": 156330
    },
    {
      "epoch": 0.5479327228057632,
      "grad_norm": 3.078125,
      "learning_rate": 4.540877123297274e-05,
      "loss": 1.0144,
      "step": 156340
    },
    {
      "epoch": 0.5479677703126589,
      "grad_norm": 2.8125,
      "learning_rate": 4.540812220430903e-05,
      "loss": 0.8746,
      "step": 156350
    },
    {
      "epoch": 0.5480028178195544,
      "grad_norm": 2.515625,
      "learning_rate": 4.5407473175645334e-05,
      "loss": 0.8383,
      "step": 156360
    },
    {
      "epoch": 0.54803786532645,
      "grad_norm": 3.09375,
      "learning_rate": 4.540682414698163e-05,
      "loss": 0.9262,
      "step": 156370
    },
    {
      "epoch": 0.5480729128333456,
      "grad_norm": 3.3125,
      "learning_rate": 4.540617511831793e-05,
      "loss": 0.8954,
      "step": 156380
    },
    {
      "epoch": 0.5481079603402412,
      "grad_norm": 3.0625,
      "learning_rate": 4.5405526089654225e-05,
      "loss": 0.9675,
      "step": 156390
    },
    {
      "epoch": 0.5481430078471368,
      "grad_norm": 3.265625,
      "learning_rate": 4.5404877060990526e-05,
      "loss": 0.8978,
      "step": 156400
    },
    {
      "epoch": 0.5481780553540324,
      "grad_norm": 3.078125,
      "learning_rate": 4.540422803232682e-05,
      "loss": 1.0043,
      "step": 156410
    },
    {
      "epoch": 0.548213102860928,
      "grad_norm": 2.8125,
      "learning_rate": 4.540357900366312e-05,
      "loss": 0.9881,
      "step": 156420
    },
    {
      "epoch": 0.5482481503678236,
      "grad_norm": 2.984375,
      "learning_rate": 4.540292997499942e-05,
      "loss": 0.9115,
      "step": 156430
    },
    {
      "epoch": 0.5482831978747191,
      "grad_norm": 3.078125,
      "learning_rate": 4.540228094633572e-05,
      "loss": 0.8667,
      "step": 156440
    },
    {
      "epoch": 0.5483182453816148,
      "grad_norm": 3.234375,
      "learning_rate": 4.540163191767202e-05,
      "loss": 0.9334,
      "step": 156450
    },
    {
      "epoch": 0.5483532928885104,
      "grad_norm": 2.71875,
      "learning_rate": 4.5400982889008314e-05,
      "loss": 0.9403,
      "step": 156460
    },
    {
      "epoch": 0.548388340395406,
      "grad_norm": 2.703125,
      "learning_rate": 4.5400333860344615e-05,
      "loss": 0.8983,
      "step": 156470
    },
    {
      "epoch": 0.5484233879023016,
      "grad_norm": 3.046875,
      "learning_rate": 4.539968483168091e-05,
      "loss": 0.886,
      "step": 156480
    },
    {
      "epoch": 0.5484584354091971,
      "grad_norm": 3.46875,
      "learning_rate": 4.539903580301721e-05,
      "loss": 0.9007,
      "step": 156490
    },
    {
      "epoch": 0.5484934829160928,
      "grad_norm": 3.15625,
      "learning_rate": 4.5398386774353506e-05,
      "loss": 0.9273,
      "step": 156500
    },
    {
      "epoch": 0.5485285304229883,
      "grad_norm": 3.0625,
      "learning_rate": 4.53977377456898e-05,
      "loss": 0.8234,
      "step": 156510
    },
    {
      "epoch": 0.548563577929884,
      "grad_norm": 3.21875,
      "learning_rate": 4.5397088717026095e-05,
      "loss": 0.9069,
      "step": 156520
    },
    {
      "epoch": 0.5485986254367795,
      "grad_norm": 3.390625,
      "learning_rate": 4.53964396883624e-05,
      "loss": 0.9875,
      "step": 156530
    },
    {
      "epoch": 0.5486336729436752,
      "grad_norm": 2.625,
      "learning_rate": 4.53957906596987e-05,
      "loss": 0.9188,
      "step": 156540
    },
    {
      "epoch": 0.5486687204505708,
      "grad_norm": 3.15625,
      "learning_rate": 4.539514163103499e-05,
      "loss": 0.932,
      "step": 156550
    },
    {
      "epoch": 0.5487037679574663,
      "grad_norm": 2.671875,
      "learning_rate": 4.5394492602371294e-05,
      "loss": 0.9052,
      "step": 156560
    },
    {
      "epoch": 0.548738815464362,
      "grad_norm": 2.8125,
      "learning_rate": 4.539384357370759e-05,
      "loss": 0.9691,
      "step": 156570
    },
    {
      "epoch": 0.5487738629712575,
      "grad_norm": 3.1875,
      "learning_rate": 4.539319454504389e-05,
      "loss": 0.9401,
      "step": 156580
    },
    {
      "epoch": 0.5488089104781532,
      "grad_norm": 3.328125,
      "learning_rate": 4.5392545516380185e-05,
      "loss": 0.9624,
      "step": 156590
    },
    {
      "epoch": 0.5488439579850487,
      "grad_norm": 2.8125,
      "learning_rate": 4.5391896487716486e-05,
      "loss": 0.9531,
      "step": 156600
    },
    {
      "epoch": 0.5488790054919443,
      "grad_norm": 2.90625,
      "learning_rate": 4.539124745905278e-05,
      "loss": 0.8907,
      "step": 156610
    },
    {
      "epoch": 0.5489140529988399,
      "grad_norm": 2.796875,
      "learning_rate": 4.539059843038908e-05,
      "loss": 0.9256,
      "step": 156620
    },
    {
      "epoch": 0.5489491005057355,
      "grad_norm": 2.890625,
      "learning_rate": 4.538994940172538e-05,
      "loss": 0.9218,
      "step": 156630
    },
    {
      "epoch": 0.5489841480126311,
      "grad_norm": 2.671875,
      "learning_rate": 4.538930037306168e-05,
      "loss": 0.9002,
      "step": 156640
    },
    {
      "epoch": 0.5490191955195267,
      "grad_norm": 2.890625,
      "learning_rate": 4.538865134439797e-05,
      "loss": 0.9232,
      "step": 156650
    },
    {
      "epoch": 0.5490542430264224,
      "grad_norm": 2.5,
      "learning_rate": 4.5388002315734274e-05,
      "loss": 0.9488,
      "step": 156660
    },
    {
      "epoch": 0.5490892905333179,
      "grad_norm": 3.359375,
      "learning_rate": 4.538735328707057e-05,
      "loss": 0.8751,
      "step": 156670
    },
    {
      "epoch": 0.5491243380402135,
      "grad_norm": 2.9375,
      "learning_rate": 4.538670425840687e-05,
      "loss": 0.9062,
      "step": 156680
    },
    {
      "epoch": 0.5491593855471091,
      "grad_norm": 3.140625,
      "learning_rate": 4.538605522974317e-05,
      "loss": 0.8883,
      "step": 156690
    },
    {
      "epoch": 0.5491944330540047,
      "grad_norm": 2.984375,
      "learning_rate": 4.5385406201079466e-05,
      "loss": 0.9671,
      "step": 156700
    },
    {
      "epoch": 0.5492294805609003,
      "grad_norm": 2.953125,
      "learning_rate": 4.538475717241577e-05,
      "loss": 1.0213,
      "step": 156710
    },
    {
      "epoch": 0.5492645280677959,
      "grad_norm": 2.984375,
      "learning_rate": 4.538410814375206e-05,
      "loss": 0.8913,
      "step": 156720
    },
    {
      "epoch": 0.5492995755746914,
      "grad_norm": 3.34375,
      "learning_rate": 4.5383459115088363e-05,
      "loss": 1.0281,
      "step": 156730
    },
    {
      "epoch": 0.5493346230815871,
      "grad_norm": 3.640625,
      "learning_rate": 4.538281008642466e-05,
      "loss": 0.9286,
      "step": 156740
    },
    {
      "epoch": 0.5493696705884827,
      "grad_norm": 2.875,
      "learning_rate": 4.538216105776096e-05,
      "loss": 0.9107,
      "step": 156750
    },
    {
      "epoch": 0.5494047180953783,
      "grad_norm": 2.8125,
      "learning_rate": 4.5381512029097254e-05,
      "loss": 0.8883,
      "step": 156760
    },
    {
      "epoch": 0.5494397656022739,
      "grad_norm": 3.046875,
      "learning_rate": 4.5380863000433555e-05,
      "loss": 0.8705,
      "step": 156770
    },
    {
      "epoch": 0.5494748131091695,
      "grad_norm": 3.03125,
      "learning_rate": 4.538021397176985e-05,
      "loss": 0.8363,
      "step": 156780
    },
    {
      "epoch": 0.5495098606160651,
      "grad_norm": 2.859375,
      "learning_rate": 4.537956494310615e-05,
      "loss": 0.856,
      "step": 156790
    },
    {
      "epoch": 0.5495449081229606,
      "grad_norm": 2.578125,
      "learning_rate": 4.5378915914442446e-05,
      "loss": 0.8408,
      "step": 156800
    },
    {
      "epoch": 0.5495799556298563,
      "grad_norm": 2.8125,
      "learning_rate": 4.537826688577875e-05,
      "loss": 0.8605,
      "step": 156810
    },
    {
      "epoch": 0.5496150031367518,
      "grad_norm": 3.078125,
      "learning_rate": 4.537761785711505e-05,
      "loss": 0.9438,
      "step": 156820
    },
    {
      "epoch": 0.5496500506436475,
      "grad_norm": 2.921875,
      "learning_rate": 4.5376968828451343e-05,
      "loss": 1.023,
      "step": 156830
    },
    {
      "epoch": 0.5496850981505431,
      "grad_norm": 2.921875,
      "learning_rate": 4.5376319799787645e-05,
      "loss": 0.9586,
      "step": 156840
    },
    {
      "epoch": 0.5497201456574387,
      "grad_norm": 4.28125,
      "learning_rate": 4.537567077112394e-05,
      "loss": 0.9062,
      "step": 156850
    },
    {
      "epoch": 0.5497551931643343,
      "grad_norm": 2.828125,
      "learning_rate": 4.537502174246024e-05,
      "loss": 0.8643,
      "step": 156860
    },
    {
      "epoch": 0.5497902406712298,
      "grad_norm": 3.21875,
      "learning_rate": 4.5374372713796535e-05,
      "loss": 0.9349,
      "step": 156870
    },
    {
      "epoch": 0.5498252881781255,
      "grad_norm": 2.765625,
      "learning_rate": 4.537372368513284e-05,
      "loss": 0.8757,
      "step": 156880
    },
    {
      "epoch": 0.549860335685021,
      "grad_norm": 3.3125,
      "learning_rate": 4.5373074656469125e-05,
      "loss": 0.9953,
      "step": 156890
    },
    {
      "epoch": 0.5498953831919167,
      "grad_norm": 2.875,
      "learning_rate": 4.5372425627805426e-05,
      "loss": 0.9537,
      "step": 156900
    },
    {
      "epoch": 0.5499304306988122,
      "grad_norm": 3.28125,
      "learning_rate": 4.537177659914173e-05,
      "loss": 0.9008,
      "step": 156910
    },
    {
      "epoch": 0.5499654782057078,
      "grad_norm": 3.328125,
      "learning_rate": 4.537112757047802e-05,
      "loss": 0.915,
      "step": 156920
    },
    {
      "epoch": 0.5500005257126034,
      "grad_norm": 3.140625,
      "learning_rate": 4.5370478541814323e-05,
      "loss": 0.9227,
      "step": 156930
    },
    {
      "epoch": 0.550035573219499,
      "grad_norm": 3.25,
      "learning_rate": 4.536982951315062e-05,
      "loss": 0.9062,
      "step": 156940
    },
    {
      "epoch": 0.5500706207263947,
      "grad_norm": 3.25,
      "learning_rate": 4.536918048448692e-05,
      "loss": 0.8968,
      "step": 156950
    },
    {
      "epoch": 0.5501056682332902,
      "grad_norm": 2.921875,
      "learning_rate": 4.5368531455823214e-05,
      "loss": 0.905,
      "step": 156960
    },
    {
      "epoch": 0.5501407157401859,
      "grad_norm": 3.015625,
      "learning_rate": 4.5367882427159515e-05,
      "loss": 0.93,
      "step": 156970
    },
    {
      "epoch": 0.5501757632470814,
      "grad_norm": 3.15625,
      "learning_rate": 4.536723339849581e-05,
      "loss": 0.983,
      "step": 156980
    },
    {
      "epoch": 0.550210810753977,
      "grad_norm": 3.0625,
      "learning_rate": 4.536658436983211e-05,
      "loss": 0.9067,
      "step": 156990
    },
    {
      "epoch": 0.5502458582608726,
      "grad_norm": 2.78125,
      "learning_rate": 4.5365935341168406e-05,
      "loss": 0.9317,
      "step": 157000
    },
    {
      "epoch": 0.5502809057677682,
      "grad_norm": 2.9375,
      "learning_rate": 4.536528631250471e-05,
      "loss": 0.8989,
      "step": 157010
    },
    {
      "epoch": 0.5503159532746638,
      "grad_norm": 2.59375,
      "learning_rate": 4.5364637283841e-05,
      "loss": 0.881,
      "step": 157020
    },
    {
      "epoch": 0.5503510007815594,
      "grad_norm": 3.0,
      "learning_rate": 4.5363988255177303e-05,
      "loss": 0.9493,
      "step": 157030
    },
    {
      "epoch": 0.550386048288455,
      "grad_norm": 3.234375,
      "learning_rate": 4.5363339226513605e-05,
      "loss": 1.0263,
      "step": 157040
    },
    {
      "epoch": 0.5504210957953506,
      "grad_norm": 3.296875,
      "learning_rate": 4.53626901978499e-05,
      "loss": 1.0685,
      "step": 157050
    },
    {
      "epoch": 0.5504561433022462,
      "grad_norm": 2.828125,
      "learning_rate": 4.53620411691862e-05,
      "loss": 0.9498,
      "step": 157060
    },
    {
      "epoch": 0.5504911908091418,
      "grad_norm": 3.078125,
      "learning_rate": 4.5361392140522495e-05,
      "loss": 0.9237,
      "step": 157070
    },
    {
      "epoch": 0.5505262383160374,
      "grad_norm": 3.25,
      "learning_rate": 4.53607431118588e-05,
      "loss": 0.9415,
      "step": 157080
    },
    {
      "epoch": 0.550561285822933,
      "grad_norm": 2.65625,
      "learning_rate": 4.536009408319509e-05,
      "loss": 0.9241,
      "step": 157090
    },
    {
      "epoch": 0.5505963333298286,
      "grad_norm": 3.21875,
      "learning_rate": 4.535944505453139e-05,
      "loss": 0.961,
      "step": 157100
    },
    {
      "epoch": 0.5506313808367241,
      "grad_norm": 2.671875,
      "learning_rate": 4.535879602586769e-05,
      "loss": 0.8845,
      "step": 157110
    },
    {
      "epoch": 0.5506664283436198,
      "grad_norm": 3.15625,
      "learning_rate": 4.535814699720399e-05,
      "loss": 0.898,
      "step": 157120
    },
    {
      "epoch": 0.5507014758505153,
      "grad_norm": 2.828125,
      "learning_rate": 4.5357497968540283e-05,
      "loss": 0.8792,
      "step": 157130
    },
    {
      "epoch": 0.550736523357411,
      "grad_norm": 3.078125,
      "learning_rate": 4.5356848939876585e-05,
      "loss": 0.9341,
      "step": 157140
    },
    {
      "epoch": 0.5507715708643066,
      "grad_norm": 2.8125,
      "learning_rate": 4.535619991121288e-05,
      "loss": 0.8962,
      "step": 157150
    },
    {
      "epoch": 0.5508066183712022,
      "grad_norm": 3.28125,
      "learning_rate": 4.535555088254918e-05,
      "loss": 0.8964,
      "step": 157160
    },
    {
      "epoch": 0.5508416658780978,
      "grad_norm": 2.90625,
      "learning_rate": 4.5354901853885475e-05,
      "loss": 0.9049,
      "step": 157170
    },
    {
      "epoch": 0.5508767133849933,
      "grad_norm": 2.65625,
      "learning_rate": 4.535425282522178e-05,
      "loss": 0.9157,
      "step": 157180
    },
    {
      "epoch": 0.550911760891889,
      "grad_norm": 2.8125,
      "learning_rate": 4.535360379655808e-05,
      "loss": 0.871,
      "step": 157190
    },
    {
      "epoch": 0.5509468083987845,
      "grad_norm": 3.328125,
      "learning_rate": 4.535295476789437e-05,
      "loss": 0.9795,
      "step": 157200
    },
    {
      "epoch": 0.5509818559056802,
      "grad_norm": 3.0,
      "learning_rate": 4.5352305739230674e-05,
      "loss": 0.9461,
      "step": 157210
    },
    {
      "epoch": 0.5510169034125757,
      "grad_norm": 2.984375,
      "learning_rate": 4.535165671056697e-05,
      "loss": 0.9227,
      "step": 157220
    },
    {
      "epoch": 0.5510519509194713,
      "grad_norm": 3.28125,
      "learning_rate": 4.535100768190327e-05,
      "loss": 0.9692,
      "step": 157230
    },
    {
      "epoch": 0.551086998426367,
      "grad_norm": 3.0,
      "learning_rate": 4.5350358653239565e-05,
      "loss": 0.9845,
      "step": 157240
    },
    {
      "epoch": 0.5511220459332625,
      "grad_norm": 3.265625,
      "learning_rate": 4.5349709624575866e-05,
      "loss": 0.9186,
      "step": 157250
    },
    {
      "epoch": 0.5511570934401582,
      "grad_norm": 6.28125,
      "learning_rate": 4.5349060595912154e-05,
      "loss": 0.9269,
      "step": 157260
    },
    {
      "epoch": 0.5511921409470537,
      "grad_norm": 3.28125,
      "learning_rate": 4.5348411567248455e-05,
      "loss": 1.0112,
      "step": 157270
    },
    {
      "epoch": 0.5512271884539494,
      "grad_norm": 3.171875,
      "learning_rate": 4.534776253858476e-05,
      "loss": 0.952,
      "step": 157280
    },
    {
      "epoch": 0.5512622359608449,
      "grad_norm": 2.859375,
      "learning_rate": 4.534711350992105e-05,
      "loss": 0.9132,
      "step": 157290
    },
    {
      "epoch": 0.5512972834677405,
      "grad_norm": 3.0625,
      "learning_rate": 4.534646448125735e-05,
      "loss": 0.8827,
      "step": 157300
    },
    {
      "epoch": 0.5513323309746361,
      "grad_norm": 2.703125,
      "learning_rate": 4.534581545259365e-05,
      "loss": 0.9642,
      "step": 157310
    },
    {
      "epoch": 0.5513673784815317,
      "grad_norm": 3.03125,
      "learning_rate": 4.534516642392995e-05,
      "loss": 1.0108,
      "step": 157320
    },
    {
      "epoch": 0.5514024259884273,
      "grad_norm": 3.265625,
      "learning_rate": 4.5344517395266243e-05,
      "loss": 0.8981,
      "step": 157330
    },
    {
      "epoch": 0.5514374734953229,
      "grad_norm": 3.0625,
      "learning_rate": 4.5343868366602545e-05,
      "loss": 0.8911,
      "step": 157340
    },
    {
      "epoch": 0.5514725210022186,
      "grad_norm": 2.953125,
      "learning_rate": 4.534321933793884e-05,
      "loss": 0.9535,
      "step": 157350
    },
    {
      "epoch": 0.5515075685091141,
      "grad_norm": 2.9375,
      "learning_rate": 4.534257030927514e-05,
      "loss": 0.9376,
      "step": 157360
    },
    {
      "epoch": 0.5515426160160097,
      "grad_norm": 2.96875,
      "learning_rate": 4.5341921280611435e-05,
      "loss": 0.9781,
      "step": 157370
    },
    {
      "epoch": 0.5515776635229053,
      "grad_norm": 2.828125,
      "learning_rate": 4.534127225194774e-05,
      "loss": 0.979,
      "step": 157380
    },
    {
      "epoch": 0.5516127110298009,
      "grad_norm": 2.546875,
      "learning_rate": 4.534062322328403e-05,
      "loss": 0.8411,
      "step": 157390
    },
    {
      "epoch": 0.5516477585366965,
      "grad_norm": 2.8125,
      "learning_rate": 4.533997419462033e-05,
      "loss": 0.893,
      "step": 157400
    },
    {
      "epoch": 0.5516828060435921,
      "grad_norm": 2.5625,
      "learning_rate": 4.5339325165956634e-05,
      "loss": 0.8347,
      "step": 157410
    },
    {
      "epoch": 0.5517178535504876,
      "grad_norm": 2.65625,
      "learning_rate": 4.533867613729293e-05,
      "loss": 0.8781,
      "step": 157420
    },
    {
      "epoch": 0.5517529010573833,
      "grad_norm": 2.921875,
      "learning_rate": 4.533802710862923e-05,
      "loss": 0.9114,
      "step": 157430
    },
    {
      "epoch": 0.5517879485642789,
      "grad_norm": 2.796875,
      "learning_rate": 4.5337378079965525e-05,
      "loss": 0.9011,
      "step": 157440
    },
    {
      "epoch": 0.5518229960711745,
      "grad_norm": 2.921875,
      "learning_rate": 4.5336729051301826e-05,
      "loss": 0.9271,
      "step": 157450
    },
    {
      "epoch": 0.5518580435780701,
      "grad_norm": 3.171875,
      "learning_rate": 4.533608002263812e-05,
      "loss": 0.9546,
      "step": 157460
    },
    {
      "epoch": 0.5518930910849656,
      "grad_norm": 3.109375,
      "learning_rate": 4.533543099397442e-05,
      "loss": 0.886,
      "step": 157470
    },
    {
      "epoch": 0.5519281385918613,
      "grad_norm": 2.71875,
      "learning_rate": 4.533478196531072e-05,
      "loss": 0.8517,
      "step": 157480
    },
    {
      "epoch": 0.5519631860987568,
      "grad_norm": 2.578125,
      "learning_rate": 4.533413293664702e-05,
      "loss": 0.9521,
      "step": 157490
    },
    {
      "epoch": 0.5519982336056525,
      "grad_norm": 2.8125,
      "learning_rate": 4.533348390798331e-05,
      "loss": 0.8473,
      "step": 157500
    },
    {
      "epoch": 0.552033281112548,
      "grad_norm": 3.390625,
      "learning_rate": 4.5332834879319614e-05,
      "loss": 0.9223,
      "step": 157510
    },
    {
      "epoch": 0.5520683286194437,
      "grad_norm": 2.890625,
      "learning_rate": 4.533218585065591e-05,
      "loss": 0.9902,
      "step": 157520
    },
    {
      "epoch": 0.5521033761263393,
      "grad_norm": 3.21875,
      "learning_rate": 4.533153682199221e-05,
      "loss": 0.9165,
      "step": 157530
    },
    {
      "epoch": 0.5521384236332348,
      "grad_norm": 3.265625,
      "learning_rate": 4.5330887793328505e-05,
      "loss": 0.914,
      "step": 157540
    },
    {
      "epoch": 0.5521734711401305,
      "grad_norm": 3.5,
      "learning_rate": 4.5330238764664806e-05,
      "loss": 0.9686,
      "step": 157550
    },
    {
      "epoch": 0.552208518647026,
      "grad_norm": 3.046875,
      "learning_rate": 4.532958973600111e-05,
      "loss": 0.9243,
      "step": 157560
    },
    {
      "epoch": 0.5522435661539217,
      "grad_norm": 3.15625,
      "learning_rate": 4.53289407073374e-05,
      "loss": 0.9817,
      "step": 157570
    },
    {
      "epoch": 0.5522786136608172,
      "grad_norm": 2.796875,
      "learning_rate": 4.5328291678673704e-05,
      "loss": 0.906,
      "step": 157580
    },
    {
      "epoch": 0.5523136611677129,
      "grad_norm": 3.03125,
      "learning_rate": 4.532764265001e-05,
      "loss": 0.9684,
      "step": 157590
    },
    {
      "epoch": 0.5523487086746084,
      "grad_norm": 3.046875,
      "learning_rate": 4.53269936213463e-05,
      "loss": 0.8989,
      "step": 157600
    },
    {
      "epoch": 0.552383756181504,
      "grad_norm": 3.09375,
      "learning_rate": 4.5326344592682594e-05,
      "loss": 0.9528,
      "step": 157610
    },
    {
      "epoch": 0.5524188036883996,
      "grad_norm": 3.125,
      "learning_rate": 4.5325695564018896e-05,
      "loss": 0.9471,
      "step": 157620
    },
    {
      "epoch": 0.5524538511952952,
      "grad_norm": 2.546875,
      "learning_rate": 4.532504653535519e-05,
      "loss": 0.8493,
      "step": 157630
    },
    {
      "epoch": 0.5524888987021909,
      "grad_norm": 2.890625,
      "learning_rate": 4.5324397506691485e-05,
      "loss": 0.923,
      "step": 157640
    },
    {
      "epoch": 0.5525239462090864,
      "grad_norm": 2.734375,
      "learning_rate": 4.5323748478027786e-05,
      "loss": 0.866,
      "step": 157650
    },
    {
      "epoch": 0.552558993715982,
      "grad_norm": 3.09375,
      "learning_rate": 4.532309944936408e-05,
      "loss": 0.9906,
      "step": 157660
    },
    {
      "epoch": 0.5525940412228776,
      "grad_norm": 3.21875,
      "learning_rate": 4.532245042070038e-05,
      "loss": 0.8867,
      "step": 157670
    },
    {
      "epoch": 0.5526290887297732,
      "grad_norm": 3.15625,
      "learning_rate": 4.532180139203668e-05,
      "loss": 0.8584,
      "step": 157680
    },
    {
      "epoch": 0.5526641362366688,
      "grad_norm": 3.046875,
      "learning_rate": 4.532115236337298e-05,
      "loss": 0.8645,
      "step": 157690
    },
    {
      "epoch": 0.5526991837435644,
      "grad_norm": 3.390625,
      "learning_rate": 4.532050333470927e-05,
      "loss": 0.9916,
      "step": 157700
    },
    {
      "epoch": 0.55273423125046,
      "grad_norm": 3.03125,
      "learning_rate": 4.5319854306045574e-05,
      "loss": 0.9027,
      "step": 157710
    },
    {
      "epoch": 0.5527692787573556,
      "grad_norm": 3.09375,
      "learning_rate": 4.531920527738187e-05,
      "loss": 0.9379,
      "step": 157720
    },
    {
      "epoch": 0.5528043262642512,
      "grad_norm": 4.03125,
      "learning_rate": 4.531855624871817e-05,
      "loss": 0.9418,
      "step": 157730
    },
    {
      "epoch": 0.5528393737711468,
      "grad_norm": 2.59375,
      "learning_rate": 4.5317907220054465e-05,
      "loss": 0.9051,
      "step": 157740
    },
    {
      "epoch": 0.5528744212780424,
      "grad_norm": 2.890625,
      "learning_rate": 4.5317258191390766e-05,
      "loss": 0.973,
      "step": 157750
    },
    {
      "epoch": 0.552909468784938,
      "grad_norm": 2.578125,
      "learning_rate": 4.531660916272706e-05,
      "loss": 0.9145,
      "step": 157760
    },
    {
      "epoch": 0.5529445162918336,
      "grad_norm": 2.890625,
      "learning_rate": 4.531596013406336e-05,
      "loss": 0.8948,
      "step": 157770
    },
    {
      "epoch": 0.5529795637987291,
      "grad_norm": 2.546875,
      "learning_rate": 4.5315311105399664e-05,
      "loss": 0.8098,
      "step": 157780
    },
    {
      "epoch": 0.5530146113056248,
      "grad_norm": 2.828125,
      "learning_rate": 4.531466207673596e-05,
      "loss": 0.8484,
      "step": 157790
    },
    {
      "epoch": 0.5530496588125203,
      "grad_norm": 3.484375,
      "learning_rate": 4.531401304807226e-05,
      "loss": 0.884,
      "step": 157800
    },
    {
      "epoch": 0.553084706319416,
      "grad_norm": 3.171875,
      "learning_rate": 4.5313364019408554e-05,
      "loss": 0.9271,
      "step": 157810
    },
    {
      "epoch": 0.5531197538263115,
      "grad_norm": 2.859375,
      "learning_rate": 4.5312714990744856e-05,
      "loss": 0.902,
      "step": 157820
    },
    {
      "epoch": 0.5531548013332072,
      "grad_norm": 2.921875,
      "learning_rate": 4.531206596208115e-05,
      "loss": 0.8413,
      "step": 157830
    },
    {
      "epoch": 0.5531898488401028,
      "grad_norm": 3.5,
      "learning_rate": 4.531141693341745e-05,
      "loss": 0.9608,
      "step": 157840
    },
    {
      "epoch": 0.5532248963469983,
      "grad_norm": 2.625,
      "learning_rate": 4.5310767904753746e-05,
      "loss": 0.8249,
      "step": 157850
    },
    {
      "epoch": 0.553259943853894,
      "grad_norm": 3.4375,
      "learning_rate": 4.531011887609005e-05,
      "loss": 0.9774,
      "step": 157860
    },
    {
      "epoch": 0.5532949913607895,
      "grad_norm": 5.78125,
      "learning_rate": 4.530946984742634e-05,
      "loss": 0.9779,
      "step": 157870
    },
    {
      "epoch": 0.5533300388676852,
      "grad_norm": 2.75,
      "learning_rate": 4.5308820818762644e-05,
      "loss": 0.9153,
      "step": 157880
    },
    {
      "epoch": 0.5533650863745807,
      "grad_norm": 2.671875,
      "learning_rate": 4.530817179009894e-05,
      "loss": 0.8581,
      "step": 157890
    },
    {
      "epoch": 0.5534001338814764,
      "grad_norm": 3.15625,
      "learning_rate": 4.530752276143524e-05,
      "loss": 0.8772,
      "step": 157900
    },
    {
      "epoch": 0.5534351813883719,
      "grad_norm": 3.078125,
      "learning_rate": 4.5306873732771534e-05,
      "loss": 1.0199,
      "step": 157910
    },
    {
      "epoch": 0.5534702288952675,
      "grad_norm": 2.78125,
      "learning_rate": 4.5306224704107836e-05,
      "loss": 0.9657,
      "step": 157920
    },
    {
      "epoch": 0.5535052764021632,
      "grad_norm": 2.796875,
      "learning_rate": 4.530557567544414e-05,
      "loss": 0.9552,
      "step": 157930
    },
    {
      "epoch": 0.5535403239090587,
      "grad_norm": 2.578125,
      "learning_rate": 4.530492664678043e-05,
      "loss": 0.9089,
      "step": 157940
    },
    {
      "epoch": 0.5535753714159544,
      "grad_norm": 3.140625,
      "learning_rate": 4.530427761811673e-05,
      "loss": 0.9118,
      "step": 157950
    },
    {
      "epoch": 0.5536104189228499,
      "grad_norm": 3.109375,
      "learning_rate": 4.530362858945303e-05,
      "loss": 0.8372,
      "step": 157960
    },
    {
      "epoch": 0.5536454664297455,
      "grad_norm": 3.03125,
      "learning_rate": 4.530297956078933e-05,
      "loss": 0.9185,
      "step": 157970
    },
    {
      "epoch": 0.5536805139366411,
      "grad_norm": 2.890625,
      "learning_rate": 4.5302330532125624e-05,
      "loss": 0.8631,
      "step": 157980
    },
    {
      "epoch": 0.5537155614435367,
      "grad_norm": 3.171875,
      "learning_rate": 4.5301681503461925e-05,
      "loss": 0.9615,
      "step": 157990
    },
    {
      "epoch": 0.5537506089504323,
      "grad_norm": 3.296875,
      "learning_rate": 4.530103247479822e-05,
      "loss": 0.9207,
      "step": 158000
    },
    {
      "epoch": 0.5537856564573279,
      "grad_norm": 2.734375,
      "learning_rate": 4.530038344613452e-05,
      "loss": 0.9906,
      "step": 158010
    },
    {
      "epoch": 0.5538207039642236,
      "grad_norm": 2.765625,
      "learning_rate": 4.5299734417470816e-05,
      "loss": 0.9554,
      "step": 158020
    },
    {
      "epoch": 0.5538557514711191,
      "grad_norm": 2.578125,
      "learning_rate": 4.529908538880711e-05,
      "loss": 0.9796,
      "step": 158030
    },
    {
      "epoch": 0.5538907989780147,
      "grad_norm": 2.703125,
      "learning_rate": 4.529843636014341e-05,
      "loss": 0.9704,
      "step": 158040
    },
    {
      "epoch": 0.5539258464849103,
      "grad_norm": 2.953125,
      "learning_rate": 4.5297787331479706e-05,
      "loss": 0.9533,
      "step": 158050
    },
    {
      "epoch": 0.5539608939918059,
      "grad_norm": 2.625,
      "learning_rate": 4.529713830281601e-05,
      "loss": 0.9822,
      "step": 158060
    },
    {
      "epoch": 0.5539959414987015,
      "grad_norm": 2.875,
      "learning_rate": 4.52964892741523e-05,
      "loss": 0.8949,
      "step": 158070
    },
    {
      "epoch": 0.5540309890055971,
      "grad_norm": 3.046875,
      "learning_rate": 4.5295840245488604e-05,
      "loss": 0.9734,
      "step": 158080
    },
    {
      "epoch": 0.5540660365124926,
      "grad_norm": 3.109375,
      "learning_rate": 4.52951912168249e-05,
      "loss": 0.9695,
      "step": 158090
    },
    {
      "epoch": 0.5541010840193883,
      "grad_norm": 2.875,
      "learning_rate": 4.52945421881612e-05,
      "loss": 0.9258,
      "step": 158100
    },
    {
      "epoch": 0.5541361315262838,
      "grad_norm": 3.09375,
      "learning_rate": 4.5293893159497494e-05,
      "loss": 0.8826,
      "step": 158110
    },
    {
      "epoch": 0.5541711790331795,
      "grad_norm": 3.15625,
      "learning_rate": 4.5293244130833796e-05,
      "loss": 0.9541,
      "step": 158120
    },
    {
      "epoch": 0.5542062265400751,
      "grad_norm": 2.859375,
      "learning_rate": 4.529259510217009e-05,
      "loss": 0.9193,
      "step": 158130
    },
    {
      "epoch": 0.5542412740469707,
      "grad_norm": 3.28125,
      "learning_rate": 4.529194607350639e-05,
      "loss": 0.9588,
      "step": 158140
    },
    {
      "epoch": 0.5542763215538663,
      "grad_norm": 2.875,
      "learning_rate": 4.529129704484269e-05,
      "loss": 0.9243,
      "step": 158150
    },
    {
      "epoch": 0.5543113690607618,
      "grad_norm": 3.40625,
      "learning_rate": 4.529064801617899e-05,
      "loss": 0.959,
      "step": 158160
    },
    {
      "epoch": 0.5543464165676575,
      "grad_norm": 3.046875,
      "learning_rate": 4.528999898751529e-05,
      "loss": 0.9783,
      "step": 158170
    },
    {
      "epoch": 0.554381464074553,
      "grad_norm": 3.046875,
      "learning_rate": 4.5289349958851584e-05,
      "loss": 0.9671,
      "step": 158180
    },
    {
      "epoch": 0.5544165115814487,
      "grad_norm": 2.875,
      "learning_rate": 4.5288700930187885e-05,
      "loss": 0.9004,
      "step": 158190
    },
    {
      "epoch": 0.5544515590883442,
      "grad_norm": 2.75,
      "learning_rate": 4.528805190152418e-05,
      "loss": 0.8634,
      "step": 158200
    },
    {
      "epoch": 0.5544866065952399,
      "grad_norm": 3.125,
      "learning_rate": 4.528740287286048e-05,
      "loss": 0.88,
      "step": 158210
    },
    {
      "epoch": 0.5545216541021355,
      "grad_norm": 2.78125,
      "learning_rate": 4.5286753844196776e-05,
      "loss": 0.8625,
      "step": 158220
    },
    {
      "epoch": 0.554556701609031,
      "grad_norm": 3.078125,
      "learning_rate": 4.528610481553308e-05,
      "loss": 0.9466,
      "step": 158230
    },
    {
      "epoch": 0.5545917491159267,
      "grad_norm": 2.71875,
      "learning_rate": 4.528545578686937e-05,
      "loss": 0.8978,
      "step": 158240
    },
    {
      "epoch": 0.5546267966228222,
      "grad_norm": 2.953125,
      "learning_rate": 4.528480675820567e-05,
      "loss": 0.9105,
      "step": 158250
    },
    {
      "epoch": 0.5546618441297179,
      "grad_norm": 3.078125,
      "learning_rate": 4.528415772954197e-05,
      "loss": 0.9287,
      "step": 158260
    },
    {
      "epoch": 0.5546968916366134,
      "grad_norm": 2.8125,
      "learning_rate": 4.528350870087827e-05,
      "loss": 0.8708,
      "step": 158270
    },
    {
      "epoch": 0.554731939143509,
      "grad_norm": 2.828125,
      "learning_rate": 4.528285967221457e-05,
      "loss": 0.9875,
      "step": 158280
    },
    {
      "epoch": 0.5547669866504046,
      "grad_norm": 3.375,
      "learning_rate": 4.5282210643550865e-05,
      "loss": 0.9355,
      "step": 158290
    },
    {
      "epoch": 0.5548020341573002,
      "grad_norm": 2.859375,
      "learning_rate": 4.5281561614887166e-05,
      "loss": 0.9497,
      "step": 158300
    },
    {
      "epoch": 0.5548370816641958,
      "grad_norm": 2.765625,
      "learning_rate": 4.528091258622346e-05,
      "loss": 0.8765,
      "step": 158310
    },
    {
      "epoch": 0.5548721291710914,
      "grad_norm": 2.9375,
      "learning_rate": 4.528026355755976e-05,
      "loss": 0.9367,
      "step": 158320
    },
    {
      "epoch": 0.5549071766779871,
      "grad_norm": 3.0,
      "learning_rate": 4.527961452889606e-05,
      "loss": 0.854,
      "step": 158330
    },
    {
      "epoch": 0.5549422241848826,
      "grad_norm": 3.125,
      "learning_rate": 4.527896550023236e-05,
      "loss": 0.9429,
      "step": 158340
    },
    {
      "epoch": 0.5549772716917782,
      "grad_norm": 2.796875,
      "learning_rate": 4.527831647156865e-05,
      "loss": 0.9744,
      "step": 158350
    },
    {
      "epoch": 0.5550123191986738,
      "grad_norm": 3.15625,
      "learning_rate": 4.5277667442904954e-05,
      "loss": 0.9044,
      "step": 158360
    },
    {
      "epoch": 0.5550473667055694,
      "grad_norm": 2.734375,
      "learning_rate": 4.527701841424125e-05,
      "loss": 0.8948,
      "step": 158370
    },
    {
      "epoch": 0.555082414212465,
      "grad_norm": 2.828125,
      "learning_rate": 4.527636938557755e-05,
      "loss": 0.8515,
      "step": 158380
    },
    {
      "epoch": 0.5551174617193606,
      "grad_norm": 2.828125,
      "learning_rate": 4.5275720356913845e-05,
      "loss": 0.9161,
      "step": 158390
    },
    {
      "epoch": 0.5551525092262561,
      "grad_norm": 2.578125,
      "learning_rate": 4.527507132825014e-05,
      "loss": 0.8391,
      "step": 158400
    },
    {
      "epoch": 0.5551875567331518,
      "grad_norm": 3.03125,
      "learning_rate": 4.527442229958644e-05,
      "loss": 0.8528,
      "step": 158410
    },
    {
      "epoch": 0.5552226042400474,
      "grad_norm": 2.953125,
      "learning_rate": 4.5273773270922736e-05,
      "loss": 0.8562,
      "step": 158420
    },
    {
      "epoch": 0.555257651746943,
      "grad_norm": 3.140625,
      "learning_rate": 4.527312424225904e-05,
      "loss": 0.9986,
      "step": 158430
    },
    {
      "epoch": 0.5552926992538386,
      "grad_norm": 3.125,
      "learning_rate": 4.527247521359533e-05,
      "loss": 0.9323,
      "step": 158440
    },
    {
      "epoch": 0.5553277467607342,
      "grad_norm": 2.9375,
      "learning_rate": 4.527182618493163e-05,
      "loss": 0.9211,
      "step": 158450
    },
    {
      "epoch": 0.5553627942676298,
      "grad_norm": 2.640625,
      "learning_rate": 4.527117715626793e-05,
      "loss": 0.8269,
      "step": 158460
    },
    {
      "epoch": 0.5553978417745253,
      "grad_norm": 2.5625,
      "learning_rate": 4.527052812760423e-05,
      "loss": 0.865,
      "step": 158470
    },
    {
      "epoch": 0.555432889281421,
      "grad_norm": 2.921875,
      "learning_rate": 4.5269879098940524e-05,
      "loss": 0.9105,
      "step": 158480
    },
    {
      "epoch": 0.5554679367883165,
      "grad_norm": 3.078125,
      "learning_rate": 4.5269230070276825e-05,
      "loss": 0.9205,
      "step": 158490
    },
    {
      "epoch": 0.5555029842952122,
      "grad_norm": 3.25,
      "learning_rate": 4.526858104161312e-05,
      "loss": 0.8922,
      "step": 158500
    },
    {
      "epoch": 0.5555380318021078,
      "grad_norm": 3.09375,
      "learning_rate": 4.526793201294942e-05,
      "loss": 0.8428,
      "step": 158510
    },
    {
      "epoch": 0.5555730793090033,
      "grad_norm": 3.015625,
      "learning_rate": 4.526728298428572e-05,
      "loss": 0.9823,
      "step": 158520
    },
    {
      "epoch": 0.555608126815899,
      "grad_norm": 3.125,
      "learning_rate": 4.526663395562202e-05,
      "loss": 0.8975,
      "step": 158530
    },
    {
      "epoch": 0.5556431743227945,
      "grad_norm": 3.453125,
      "learning_rate": 4.526598492695832e-05,
      "loss": 0.9081,
      "step": 158540
    },
    {
      "epoch": 0.5556782218296902,
      "grad_norm": 3.25,
      "learning_rate": 4.526533589829461e-05,
      "loss": 0.9366,
      "step": 158550
    },
    {
      "epoch": 0.5557132693365857,
      "grad_norm": 3.5,
      "learning_rate": 4.5264686869630914e-05,
      "loss": 0.9182,
      "step": 158560
    },
    {
      "epoch": 0.5557483168434814,
      "grad_norm": 3.203125,
      "learning_rate": 4.526403784096721e-05,
      "loss": 0.9766,
      "step": 158570
    },
    {
      "epoch": 0.5557833643503769,
      "grad_norm": 2.9375,
      "learning_rate": 4.526338881230351e-05,
      "loss": 0.9301,
      "step": 158580
    },
    {
      "epoch": 0.5558184118572725,
      "grad_norm": 2.8125,
      "learning_rate": 4.5262739783639805e-05,
      "loss": 0.9136,
      "step": 158590
    },
    {
      "epoch": 0.5558534593641681,
      "grad_norm": 2.609375,
      "learning_rate": 4.5262090754976106e-05,
      "loss": 0.8666,
      "step": 158600
    },
    {
      "epoch": 0.5558885068710637,
      "grad_norm": 2.65625,
      "learning_rate": 4.52614417263124e-05,
      "loss": 0.8956,
      "step": 158610
    },
    {
      "epoch": 0.5559235543779594,
      "grad_norm": 3.140625,
      "learning_rate": 4.52607926976487e-05,
      "loss": 0.9487,
      "step": 158620
    },
    {
      "epoch": 0.5559586018848549,
      "grad_norm": 3.15625,
      "learning_rate": 4.5260143668985e-05,
      "loss": 0.9524,
      "step": 158630
    },
    {
      "epoch": 0.5559936493917506,
      "grad_norm": 2.890625,
      "learning_rate": 4.52594946403213e-05,
      "loss": 0.9201,
      "step": 158640
    },
    {
      "epoch": 0.5560286968986461,
      "grad_norm": 3.140625,
      "learning_rate": 4.52588456116576e-05,
      "loss": 0.9466,
      "step": 158650
    },
    {
      "epoch": 0.5560637444055417,
      "grad_norm": 2.65625,
      "learning_rate": 4.5258196582993894e-05,
      "loss": 0.9235,
      "step": 158660
    },
    {
      "epoch": 0.5560987919124373,
      "grad_norm": 2.921875,
      "learning_rate": 4.5257547554330196e-05,
      "loss": 0.93,
      "step": 158670
    },
    {
      "epoch": 0.5561338394193329,
      "grad_norm": 3.21875,
      "learning_rate": 4.525689852566649e-05,
      "loss": 0.9683,
      "step": 158680
    },
    {
      "epoch": 0.5561688869262285,
      "grad_norm": 3.15625,
      "learning_rate": 4.525624949700279e-05,
      "loss": 0.857,
      "step": 158690
    },
    {
      "epoch": 0.5562039344331241,
      "grad_norm": 2.703125,
      "learning_rate": 4.5255600468339086e-05,
      "loss": 0.8044,
      "step": 158700
    },
    {
      "epoch": 0.5562389819400197,
      "grad_norm": 2.984375,
      "learning_rate": 4.525495143967539e-05,
      "loss": 1.0065,
      "step": 158710
    },
    {
      "epoch": 0.5562740294469153,
      "grad_norm": 2.59375,
      "learning_rate": 4.525430241101168e-05,
      "loss": 0.8201,
      "step": 158720
    },
    {
      "epoch": 0.5563090769538109,
      "grad_norm": 3.0625,
      "learning_rate": 4.5253653382347984e-05,
      "loss": 0.9632,
      "step": 158730
    },
    {
      "epoch": 0.5563441244607065,
      "grad_norm": 3.015625,
      "learning_rate": 4.525300435368428e-05,
      "loss": 0.8753,
      "step": 158740
    },
    {
      "epoch": 0.5563791719676021,
      "grad_norm": 2.828125,
      "learning_rate": 4.525235532502058e-05,
      "loss": 0.8575,
      "step": 158750
    },
    {
      "epoch": 0.5564142194744977,
      "grad_norm": 3.0,
      "learning_rate": 4.5251706296356874e-05,
      "loss": 0.9263,
      "step": 158760
    },
    {
      "epoch": 0.5564492669813933,
      "grad_norm": 3.0625,
      "learning_rate": 4.525105726769317e-05,
      "loss": 0.9258,
      "step": 158770
    },
    {
      "epoch": 0.5564843144882888,
      "grad_norm": 3.25,
      "learning_rate": 4.525040823902947e-05,
      "loss": 0.9087,
      "step": 158780
    },
    {
      "epoch": 0.5565193619951845,
      "grad_norm": 3.21875,
      "learning_rate": 4.5249759210365765e-05,
      "loss": 0.8847,
      "step": 158790
    },
    {
      "epoch": 0.55655440950208,
      "grad_norm": 3.265625,
      "learning_rate": 4.5249110181702066e-05,
      "loss": 0.9228,
      "step": 158800
    },
    {
      "epoch": 0.5565894570089757,
      "grad_norm": 2.96875,
      "learning_rate": 4.524846115303836e-05,
      "loss": 0.9208,
      "step": 158810
    },
    {
      "epoch": 0.5566245045158713,
      "grad_norm": 3.0625,
      "learning_rate": 4.524781212437466e-05,
      "loss": 0.9835,
      "step": 158820
    },
    {
      "epoch": 0.5566595520227668,
      "grad_norm": 3.0,
      "learning_rate": 4.524716309571096e-05,
      "loss": 0.9837,
      "step": 158830
    },
    {
      "epoch": 0.5566945995296625,
      "grad_norm": 2.671875,
      "learning_rate": 4.524651406704726e-05,
      "loss": 0.8516,
      "step": 158840
    },
    {
      "epoch": 0.556729647036558,
      "grad_norm": 3.0625,
      "learning_rate": 4.524586503838355e-05,
      "loss": 0.9214,
      "step": 158850
    },
    {
      "epoch": 0.5567646945434537,
      "grad_norm": 2.796875,
      "learning_rate": 4.5245216009719854e-05,
      "loss": 0.9417,
      "step": 158860
    },
    {
      "epoch": 0.5567997420503492,
      "grad_norm": 3.265625,
      "learning_rate": 4.524456698105615e-05,
      "loss": 0.9056,
      "step": 158870
    },
    {
      "epoch": 0.5568347895572449,
      "grad_norm": 2.859375,
      "learning_rate": 4.524391795239245e-05,
      "loss": 0.8686,
      "step": 158880
    },
    {
      "epoch": 0.5568698370641404,
      "grad_norm": 2.578125,
      "learning_rate": 4.524326892372875e-05,
      "loss": 0.8785,
      "step": 158890
    },
    {
      "epoch": 0.556904884571036,
      "grad_norm": 2.71875,
      "learning_rate": 4.5242619895065046e-05,
      "loss": 0.9178,
      "step": 158900
    },
    {
      "epoch": 0.5569399320779317,
      "grad_norm": 3.09375,
      "learning_rate": 4.524197086640135e-05,
      "loss": 0.9043,
      "step": 158910
    },
    {
      "epoch": 0.5569749795848272,
      "grad_norm": 3.109375,
      "learning_rate": 4.524132183773764e-05,
      "loss": 1.0245,
      "step": 158920
    },
    {
      "epoch": 0.5570100270917229,
      "grad_norm": 2.75,
      "learning_rate": 4.5240672809073944e-05,
      "loss": 0.8817,
      "step": 158930
    },
    {
      "epoch": 0.5570450745986184,
      "grad_norm": 3.34375,
      "learning_rate": 4.524002378041024e-05,
      "loss": 0.9276,
      "step": 158940
    },
    {
      "epoch": 0.557080122105514,
      "grad_norm": 2.8125,
      "learning_rate": 4.523937475174654e-05,
      "loss": 0.9675,
      "step": 158950
    },
    {
      "epoch": 0.5571151696124096,
      "grad_norm": 2.984375,
      "learning_rate": 4.5238725723082834e-05,
      "loss": 0.8863,
      "step": 158960
    },
    {
      "epoch": 0.5571502171193052,
      "grad_norm": 2.71875,
      "learning_rate": 4.5238076694419136e-05,
      "loss": 0.9239,
      "step": 158970
    },
    {
      "epoch": 0.5571852646262008,
      "grad_norm": 3.140625,
      "learning_rate": 4.523742766575543e-05,
      "loss": 0.9304,
      "step": 158980
    },
    {
      "epoch": 0.5572203121330964,
      "grad_norm": 3.015625,
      "learning_rate": 4.523677863709173e-05,
      "loss": 0.9108,
      "step": 158990
    },
    {
      "epoch": 0.557255359639992,
      "grad_norm": 3.015625,
      "learning_rate": 4.5236129608428026e-05,
      "loss": 0.951,
      "step": 159000
    },
    {
      "epoch": 0.5572904071468876,
      "grad_norm": 2.59375,
      "learning_rate": 4.523548057976433e-05,
      "loss": 0.9636,
      "step": 159010
    },
    {
      "epoch": 0.5573254546537832,
      "grad_norm": 2.828125,
      "learning_rate": 4.523483155110063e-05,
      "loss": 0.947,
      "step": 159020
    },
    {
      "epoch": 0.5573605021606788,
      "grad_norm": 3.15625,
      "learning_rate": 4.5234182522436924e-05,
      "loss": 0.9406,
      "step": 159030
    },
    {
      "epoch": 0.5573955496675744,
      "grad_norm": 2.96875,
      "learning_rate": 4.5233533493773225e-05,
      "loss": 0.9641,
      "step": 159040
    },
    {
      "epoch": 0.55743059717447,
      "grad_norm": 2.953125,
      "learning_rate": 4.523288446510952e-05,
      "loss": 0.859,
      "step": 159050
    },
    {
      "epoch": 0.5574656446813656,
      "grad_norm": 2.984375,
      "learning_rate": 4.523223543644582e-05,
      "loss": 0.9156,
      "step": 159060
    },
    {
      "epoch": 0.5575006921882611,
      "grad_norm": 3.4375,
      "learning_rate": 4.5231586407782116e-05,
      "loss": 0.9608,
      "step": 159070
    },
    {
      "epoch": 0.5575357396951568,
      "grad_norm": 3.125,
      "learning_rate": 4.523093737911842e-05,
      "loss": 1.0226,
      "step": 159080
    },
    {
      "epoch": 0.5575707872020523,
      "grad_norm": 3.28125,
      "learning_rate": 4.523028835045471e-05,
      "loss": 0.9152,
      "step": 159090
    },
    {
      "epoch": 0.557605834708948,
      "grad_norm": 2.515625,
      "learning_rate": 4.522963932179101e-05,
      "loss": 0.9003,
      "step": 159100
    },
    {
      "epoch": 0.5576408822158436,
      "grad_norm": 2.953125,
      "learning_rate": 4.522899029312731e-05,
      "loss": 1.0097,
      "step": 159110
    },
    {
      "epoch": 0.5576759297227392,
      "grad_norm": 2.9375,
      "learning_rate": 4.522834126446361e-05,
      "loss": 0.9535,
      "step": 159120
    },
    {
      "epoch": 0.5577109772296348,
      "grad_norm": 2.8125,
      "learning_rate": 4.5227692235799904e-05,
      "loss": 0.8897,
      "step": 159130
    },
    {
      "epoch": 0.5577460247365303,
      "grad_norm": 2.671875,
      "learning_rate": 4.52270432071362e-05,
      "loss": 0.9378,
      "step": 159140
    },
    {
      "epoch": 0.557781072243426,
      "grad_norm": 2.53125,
      "learning_rate": 4.52263941784725e-05,
      "loss": 0.8465,
      "step": 159150
    },
    {
      "epoch": 0.5578161197503215,
      "grad_norm": 2.9375,
      "learning_rate": 4.5225745149808794e-05,
      "loss": 0.9517,
      "step": 159160
    },
    {
      "epoch": 0.5578511672572172,
      "grad_norm": 3.09375,
      "learning_rate": 4.5225096121145096e-05,
      "loss": 0.8533,
      "step": 159170
    },
    {
      "epoch": 0.5578862147641127,
      "grad_norm": 2.953125,
      "learning_rate": 4.522444709248139e-05,
      "loss": 0.9019,
      "step": 159180
    },
    {
      "epoch": 0.5579212622710084,
      "grad_norm": 2.828125,
      "learning_rate": 4.522379806381769e-05,
      "loss": 0.9078,
      "step": 159190
    },
    {
      "epoch": 0.557956309777904,
      "grad_norm": 2.765625,
      "learning_rate": 4.5223149035153986e-05,
      "loss": 0.8586,
      "step": 159200
    },
    {
      "epoch": 0.5579913572847995,
      "grad_norm": 2.890625,
      "learning_rate": 4.522250000649029e-05,
      "loss": 0.8903,
      "step": 159210
    },
    {
      "epoch": 0.5580264047916952,
      "grad_norm": 2.484375,
      "learning_rate": 4.522185097782658e-05,
      "loss": 0.9429,
      "step": 159220
    },
    {
      "epoch": 0.5580614522985907,
      "grad_norm": 2.40625,
      "learning_rate": 4.5221201949162884e-05,
      "loss": 0.8702,
      "step": 159230
    },
    {
      "epoch": 0.5580964998054864,
      "grad_norm": 3.828125,
      "learning_rate": 4.5220552920499185e-05,
      "loss": 0.9735,
      "step": 159240
    },
    {
      "epoch": 0.5581315473123819,
      "grad_norm": 2.390625,
      "learning_rate": 4.521990389183548e-05,
      "loss": 0.8888,
      "step": 159250
    },
    {
      "epoch": 0.5581665948192776,
      "grad_norm": 2.78125,
      "learning_rate": 4.521925486317178e-05,
      "loss": 0.8801,
      "step": 159260
    },
    {
      "epoch": 0.5582016423261731,
      "grad_norm": 3.0625,
      "learning_rate": 4.5218605834508076e-05,
      "loss": 0.9644,
      "step": 159270
    },
    {
      "epoch": 0.5582366898330687,
      "grad_norm": 2.625,
      "learning_rate": 4.521795680584438e-05,
      "loss": 0.8686,
      "step": 159280
    },
    {
      "epoch": 0.5582717373399643,
      "grad_norm": 3.125,
      "learning_rate": 4.521730777718067e-05,
      "loss": 0.8586,
      "step": 159290
    },
    {
      "epoch": 0.5583067848468599,
      "grad_norm": 3.21875,
      "learning_rate": 4.521665874851697e-05,
      "loss": 0.9841,
      "step": 159300
    },
    {
      "epoch": 0.5583418323537556,
      "grad_norm": 2.609375,
      "learning_rate": 4.521600971985327e-05,
      "loss": 0.8346,
      "step": 159310
    },
    {
      "epoch": 0.5583768798606511,
      "grad_norm": 2.515625,
      "learning_rate": 4.521536069118957e-05,
      "loss": 0.8463,
      "step": 159320
    },
    {
      "epoch": 0.5584119273675467,
      "grad_norm": 3.328125,
      "learning_rate": 4.5214711662525864e-05,
      "loss": 0.9484,
      "step": 159330
    },
    {
      "epoch": 0.5584469748744423,
      "grad_norm": 2.59375,
      "learning_rate": 4.5214062633862165e-05,
      "loss": 0.9032,
      "step": 159340
    },
    {
      "epoch": 0.5584820223813379,
      "grad_norm": 2.640625,
      "learning_rate": 4.521341360519846e-05,
      "loss": 0.892,
      "step": 159350
    },
    {
      "epoch": 0.5585170698882335,
      "grad_norm": 2.703125,
      "learning_rate": 4.521276457653476e-05,
      "loss": 0.8975,
      "step": 159360
    },
    {
      "epoch": 0.5585521173951291,
      "grad_norm": 3.140625,
      "learning_rate": 4.5212115547871056e-05,
      "loss": 0.9038,
      "step": 159370
    },
    {
      "epoch": 0.5585871649020246,
      "grad_norm": 3.28125,
      "learning_rate": 4.521146651920736e-05,
      "loss": 0.9288,
      "step": 159380
    },
    {
      "epoch": 0.5586222124089203,
      "grad_norm": 2.484375,
      "learning_rate": 4.521081749054366e-05,
      "loss": 0.9301,
      "step": 159390
    },
    {
      "epoch": 0.5586572599158159,
      "grad_norm": 3.140625,
      "learning_rate": 4.521016846187995e-05,
      "loss": 0.975,
      "step": 159400
    },
    {
      "epoch": 0.5586923074227115,
      "grad_norm": 3.34375,
      "learning_rate": 4.5209519433216255e-05,
      "loss": 0.878,
      "step": 159410
    },
    {
      "epoch": 0.5587273549296071,
      "grad_norm": 2.75,
      "learning_rate": 4.520887040455255e-05,
      "loss": 0.9979,
      "step": 159420
    },
    {
      "epoch": 0.5587624024365027,
      "grad_norm": 3.109375,
      "learning_rate": 4.520822137588885e-05,
      "loss": 0.9171,
      "step": 159430
    },
    {
      "epoch": 0.5587974499433983,
      "grad_norm": 2.828125,
      "learning_rate": 4.5207572347225145e-05,
      "loss": 0.9393,
      "step": 159440
    },
    {
      "epoch": 0.5588324974502938,
      "grad_norm": 2.984375,
      "learning_rate": 4.5206923318561447e-05,
      "loss": 0.8871,
      "step": 159450
    },
    {
      "epoch": 0.5588675449571895,
      "grad_norm": 3.140625,
      "learning_rate": 4.520627428989774e-05,
      "loss": 0.9745,
      "step": 159460
    },
    {
      "epoch": 0.558902592464085,
      "grad_norm": 3.046875,
      "learning_rate": 4.520562526123404e-05,
      "loss": 0.9401,
      "step": 159470
    },
    {
      "epoch": 0.5589376399709807,
      "grad_norm": 3.21875,
      "learning_rate": 4.520497623257034e-05,
      "loss": 1.0254,
      "step": 159480
    },
    {
      "epoch": 0.5589726874778762,
      "grad_norm": 2.671875,
      "learning_rate": 4.520432720390664e-05,
      "loss": 0.9476,
      "step": 159490
    },
    {
      "epoch": 0.5590077349847719,
      "grad_norm": 3.234375,
      "learning_rate": 4.520367817524293e-05,
      "loss": 0.8511,
      "step": 159500
    },
    {
      "epoch": 0.5590427824916675,
      "grad_norm": 3.0625,
      "learning_rate": 4.5203029146579235e-05,
      "loss": 0.9257,
      "step": 159510
    },
    {
      "epoch": 0.559077829998563,
      "grad_norm": 3.15625,
      "learning_rate": 4.520238011791553e-05,
      "loss": 0.8799,
      "step": 159520
    },
    {
      "epoch": 0.5591128775054587,
      "grad_norm": 2.78125,
      "learning_rate": 4.5201731089251824e-05,
      "loss": 0.8849,
      "step": 159530
    },
    {
      "epoch": 0.5591479250123542,
      "grad_norm": 2.96875,
      "learning_rate": 4.5201082060588125e-05,
      "loss": 0.9288,
      "step": 159540
    },
    {
      "epoch": 0.5591829725192499,
      "grad_norm": 2.796875,
      "learning_rate": 4.520043303192442e-05,
      "loss": 0.9679,
      "step": 159550
    },
    {
      "epoch": 0.5592180200261454,
      "grad_norm": 2.875,
      "learning_rate": 4.519978400326072e-05,
      "loss": 0.9208,
      "step": 159560
    },
    {
      "epoch": 0.559253067533041,
      "grad_norm": 2.84375,
      "learning_rate": 4.5199134974597016e-05,
      "loss": 0.9689,
      "step": 159570
    },
    {
      "epoch": 0.5592881150399366,
      "grad_norm": 3.109375,
      "learning_rate": 4.519848594593332e-05,
      "loss": 0.8674,
      "step": 159580
    },
    {
      "epoch": 0.5593231625468322,
      "grad_norm": 3.046875,
      "learning_rate": 4.519783691726961e-05,
      "loss": 0.8711,
      "step": 159590
    },
    {
      "epoch": 0.5593582100537279,
      "grad_norm": 3.140625,
      "learning_rate": 4.519718788860591e-05,
      "loss": 0.9047,
      "step": 159600
    },
    {
      "epoch": 0.5593932575606234,
      "grad_norm": 2.921875,
      "learning_rate": 4.5196538859942215e-05,
      "loss": 0.9264,
      "step": 159610
    },
    {
      "epoch": 0.5594283050675191,
      "grad_norm": 3.15625,
      "learning_rate": 4.519588983127851e-05,
      "loss": 0.9196,
      "step": 159620
    },
    {
      "epoch": 0.5594633525744146,
      "grad_norm": 3.46875,
      "learning_rate": 4.519524080261481e-05,
      "loss": 0.9853,
      "step": 159630
    },
    {
      "epoch": 0.5594984000813102,
      "grad_norm": 2.625,
      "learning_rate": 4.5194591773951105e-05,
      "loss": 0.9263,
      "step": 159640
    },
    {
      "epoch": 0.5595334475882058,
      "grad_norm": 3.15625,
      "learning_rate": 4.5193942745287407e-05,
      "loss": 0.8931,
      "step": 159650
    },
    {
      "epoch": 0.5595684950951014,
      "grad_norm": 2.71875,
      "learning_rate": 4.51932937166237e-05,
      "loss": 0.8786,
      "step": 159660
    },
    {
      "epoch": 0.559603542601997,
      "grad_norm": 3.0625,
      "learning_rate": 4.519264468796e-05,
      "loss": 0.8824,
      "step": 159670
    },
    {
      "epoch": 0.5596385901088926,
      "grad_norm": 2.828125,
      "learning_rate": 4.51919956592963e-05,
      "loss": 0.8808,
      "step": 159680
    },
    {
      "epoch": 0.5596736376157883,
      "grad_norm": 3.15625,
      "learning_rate": 4.51913466306326e-05,
      "loss": 0.9806,
      "step": 159690
    },
    {
      "epoch": 0.5597086851226838,
      "grad_norm": 3.25,
      "learning_rate": 4.519069760196889e-05,
      "loss": 0.9515,
      "step": 159700
    },
    {
      "epoch": 0.5597437326295794,
      "grad_norm": 3.78125,
      "learning_rate": 4.5190048573305195e-05,
      "loss": 0.9624,
      "step": 159710
    },
    {
      "epoch": 0.559778780136475,
      "grad_norm": 3.15625,
      "learning_rate": 4.518939954464149e-05,
      "loss": 0.965,
      "step": 159720
    },
    {
      "epoch": 0.5598138276433706,
      "grad_norm": 3.21875,
      "learning_rate": 4.518875051597779e-05,
      "loss": 0.9365,
      "step": 159730
    },
    {
      "epoch": 0.5598488751502662,
      "grad_norm": 3.625,
      "learning_rate": 4.5188101487314085e-05,
      "loss": 0.9262,
      "step": 159740
    },
    {
      "epoch": 0.5598839226571618,
      "grad_norm": 2.921875,
      "learning_rate": 4.5187452458650387e-05,
      "loss": 1.0019,
      "step": 159750
    },
    {
      "epoch": 0.5599189701640573,
      "grad_norm": 2.96875,
      "learning_rate": 4.518680342998669e-05,
      "loss": 0.9614,
      "step": 159760
    },
    {
      "epoch": 0.559954017670953,
      "grad_norm": 3.078125,
      "learning_rate": 4.518615440132298e-05,
      "loss": 0.9813,
      "step": 159770
    },
    {
      "epoch": 0.5599890651778485,
      "grad_norm": 2.609375,
      "learning_rate": 4.5185505372659284e-05,
      "loss": 0.8848,
      "step": 159780
    },
    {
      "epoch": 0.5600241126847442,
      "grad_norm": 2.953125,
      "learning_rate": 4.518485634399558e-05,
      "loss": 1.0215,
      "step": 159790
    },
    {
      "epoch": 0.5600591601916398,
      "grad_norm": 3.3125,
      "learning_rate": 4.518420731533188e-05,
      "loss": 0.9065,
      "step": 159800
    },
    {
      "epoch": 0.5600942076985354,
      "grad_norm": 2.78125,
      "learning_rate": 4.5183558286668175e-05,
      "loss": 0.944,
      "step": 159810
    },
    {
      "epoch": 0.560129255205431,
      "grad_norm": 2.875,
      "learning_rate": 4.5182909258004476e-05,
      "loss": 0.9174,
      "step": 159820
    },
    {
      "epoch": 0.5601643027123265,
      "grad_norm": 3.0,
      "learning_rate": 4.518226022934077e-05,
      "loss": 0.9248,
      "step": 159830
    },
    {
      "epoch": 0.5601993502192222,
      "grad_norm": 3.15625,
      "learning_rate": 4.518161120067707e-05,
      "loss": 0.9268,
      "step": 159840
    },
    {
      "epoch": 0.5602343977261177,
      "grad_norm": 3.1875,
      "learning_rate": 4.5180962172013367e-05,
      "loss": 0.8624,
      "step": 159850
    },
    {
      "epoch": 0.5602694452330134,
      "grad_norm": 3.546875,
      "learning_rate": 4.518031314334967e-05,
      "loss": 0.9644,
      "step": 159860
    },
    {
      "epoch": 0.5603044927399089,
      "grad_norm": 3.328125,
      "learning_rate": 4.517966411468596e-05,
      "loss": 0.8627,
      "step": 159870
    },
    {
      "epoch": 0.5603395402468045,
      "grad_norm": 3.078125,
      "learning_rate": 4.5179015086022264e-05,
      "loss": 0.8668,
      "step": 159880
    },
    {
      "epoch": 0.5603745877537002,
      "grad_norm": 2.90625,
      "learning_rate": 4.5178366057358565e-05,
      "loss": 0.9864,
      "step": 159890
    },
    {
      "epoch": 0.5604096352605957,
      "grad_norm": 2.90625,
      "learning_rate": 4.517771702869485e-05,
      "loss": 0.863,
      "step": 159900
    },
    {
      "epoch": 0.5604446827674914,
      "grad_norm": 2.78125,
      "learning_rate": 4.5177068000031155e-05,
      "loss": 0.9628,
      "step": 159910
    },
    {
      "epoch": 0.5604797302743869,
      "grad_norm": 2.8125,
      "learning_rate": 4.517641897136745e-05,
      "loss": 0.9841,
      "step": 159920
    },
    {
      "epoch": 0.5605147777812826,
      "grad_norm": 2.9375,
      "learning_rate": 4.517576994270375e-05,
      "loss": 0.9984,
      "step": 159930
    },
    {
      "epoch": 0.5605498252881781,
      "grad_norm": 2.9375,
      "learning_rate": 4.5175120914040045e-05,
      "loss": 0.9333,
      "step": 159940
    },
    {
      "epoch": 0.5605848727950737,
      "grad_norm": 2.859375,
      "learning_rate": 4.5174471885376347e-05,
      "loss": 0.9631,
      "step": 159950
    },
    {
      "epoch": 0.5606199203019693,
      "grad_norm": 3.0,
      "learning_rate": 4.517382285671264e-05,
      "loss": 0.8549,
      "step": 159960
    },
    {
      "epoch": 0.5606549678088649,
      "grad_norm": 2.859375,
      "learning_rate": 4.517317382804894e-05,
      "loss": 0.8867,
      "step": 159970
    },
    {
      "epoch": 0.5606900153157605,
      "grad_norm": 2.890625,
      "learning_rate": 4.5172524799385244e-05,
      "loss": 0.971,
      "step": 159980
    },
    {
      "epoch": 0.5607250628226561,
      "grad_norm": 2.921875,
      "learning_rate": 4.517187577072154e-05,
      "loss": 0.9967,
      "step": 159990
    },
    {
      "epoch": 0.5607601103295518,
      "grad_norm": 2.765625,
      "learning_rate": 4.517122674205784e-05,
      "loss": 0.8998,
      "step": 160000
    },
    {
      "epoch": 0.5607601103295518,
      "eval_loss": 0.8621734380722046,
      "eval_runtime": 551.2093,
      "eval_samples_per_second": 690.184,
      "eval_steps_per_second": 57.515,
      "step": 160000
    },
    {
      "epoch": 0.5607951578364473,
      "grad_norm": 3.25,
      "learning_rate": 4.5170577713394135e-05,
      "loss": 1.0089,
      "step": 160010
    },
    {
      "epoch": 0.5608302053433429,
      "grad_norm": 2.8125,
      "learning_rate": 4.5169928684730436e-05,
      "loss": 0.9235,
      "step": 160020
    },
    {
      "epoch": 0.5608652528502385,
      "grad_norm": 2.578125,
      "learning_rate": 4.516927965606673e-05,
      "loss": 0.9711,
      "step": 160030
    },
    {
      "epoch": 0.5609003003571341,
      "grad_norm": 3.28125,
      "learning_rate": 4.516863062740303e-05,
      "loss": 0.9285,
      "step": 160040
    },
    {
      "epoch": 0.5609353478640297,
      "grad_norm": 3.0625,
      "learning_rate": 4.5167981598739327e-05,
      "loss": 0.9533,
      "step": 160050
    },
    {
      "epoch": 0.5609703953709253,
      "grad_norm": 3.078125,
      "learning_rate": 4.516733257007563e-05,
      "loss": 0.9567,
      "step": 160060
    },
    {
      "epoch": 0.5610054428778208,
      "grad_norm": 2.859375,
      "learning_rate": 4.516668354141192e-05,
      "loss": 0.8746,
      "step": 160070
    },
    {
      "epoch": 0.5610404903847165,
      "grad_norm": 2.609375,
      "learning_rate": 4.5166034512748224e-05,
      "loss": 0.9596,
      "step": 160080
    },
    {
      "epoch": 0.5610755378916121,
      "grad_norm": 3.0,
      "learning_rate": 4.516538548408452e-05,
      "loss": 0.9357,
      "step": 160090
    },
    {
      "epoch": 0.5611105853985077,
      "grad_norm": 3.25,
      "learning_rate": 4.516473645542082e-05,
      "loss": 0.9748,
      "step": 160100
    },
    {
      "epoch": 0.5611456329054033,
      "grad_norm": 2.4375,
      "learning_rate": 4.516408742675712e-05,
      "loss": 0.9402,
      "step": 160110
    },
    {
      "epoch": 0.5611806804122988,
      "grad_norm": 2.8125,
      "learning_rate": 4.5163438398093416e-05,
      "loss": 0.8583,
      "step": 160120
    },
    {
      "epoch": 0.5612157279191945,
      "grad_norm": 2.65625,
      "learning_rate": 4.516278936942972e-05,
      "loss": 0.8363,
      "step": 160130
    },
    {
      "epoch": 0.56125077542609,
      "grad_norm": 3.390625,
      "learning_rate": 4.516214034076601e-05,
      "loss": 0.9007,
      "step": 160140
    },
    {
      "epoch": 0.5612858229329857,
      "grad_norm": 2.875,
      "learning_rate": 4.516149131210231e-05,
      "loss": 0.9637,
      "step": 160150
    },
    {
      "epoch": 0.5613208704398812,
      "grad_norm": 3.375,
      "learning_rate": 4.516084228343861e-05,
      "loss": 0.9366,
      "step": 160160
    },
    {
      "epoch": 0.5613559179467769,
      "grad_norm": 2.796875,
      "learning_rate": 4.516019325477491e-05,
      "loss": 0.9562,
      "step": 160170
    },
    {
      "epoch": 0.5613909654536725,
      "grad_norm": 3.375,
      "learning_rate": 4.5159544226111204e-05,
      "loss": 1.0076,
      "step": 160180
    },
    {
      "epoch": 0.561426012960568,
      "grad_norm": 2.9375,
      "learning_rate": 4.5158895197447505e-05,
      "loss": 0.9167,
      "step": 160190
    },
    {
      "epoch": 0.5614610604674637,
      "grad_norm": 2.9375,
      "learning_rate": 4.51582461687838e-05,
      "loss": 0.9319,
      "step": 160200
    },
    {
      "epoch": 0.5614961079743592,
      "grad_norm": 2.984375,
      "learning_rate": 4.51575971401201e-05,
      "loss": 0.8593,
      "step": 160210
    },
    {
      "epoch": 0.5615311554812549,
      "grad_norm": 3.4375,
      "learning_rate": 4.5156948111456396e-05,
      "loss": 0.9683,
      "step": 160220
    },
    {
      "epoch": 0.5615662029881504,
      "grad_norm": 3.078125,
      "learning_rate": 4.51562990827927e-05,
      "loss": 0.9001,
      "step": 160230
    },
    {
      "epoch": 0.5616012504950461,
      "grad_norm": 3.296875,
      "learning_rate": 4.515565005412899e-05,
      "loss": 0.9371,
      "step": 160240
    },
    {
      "epoch": 0.5616362980019416,
      "grad_norm": 2.78125,
      "learning_rate": 4.515500102546529e-05,
      "loss": 0.9807,
      "step": 160250
    },
    {
      "epoch": 0.5616713455088372,
      "grad_norm": 3.5,
      "learning_rate": 4.5154351996801595e-05,
      "loss": 1.0201,
      "step": 160260
    },
    {
      "epoch": 0.5617063930157328,
      "grad_norm": 2.921875,
      "learning_rate": 4.515370296813788e-05,
      "loss": 0.9309,
      "step": 160270
    },
    {
      "epoch": 0.5617414405226284,
      "grad_norm": 2.84375,
      "learning_rate": 4.5153053939474184e-05,
      "loss": 0.9371,
      "step": 160280
    },
    {
      "epoch": 0.5617764880295241,
      "grad_norm": 2.640625,
      "learning_rate": 4.515240491081048e-05,
      "loss": 0.9175,
      "step": 160290
    },
    {
      "epoch": 0.5618115355364196,
      "grad_norm": 2.984375,
      "learning_rate": 4.515175588214678e-05,
      "loss": 0.9073,
      "step": 160300
    },
    {
      "epoch": 0.5618465830433153,
      "grad_norm": 2.765625,
      "learning_rate": 4.5151106853483075e-05,
      "loss": 0.8549,
      "step": 160310
    },
    {
      "epoch": 0.5618816305502108,
      "grad_norm": 3.15625,
      "learning_rate": 4.5150457824819376e-05,
      "loss": 1.0311,
      "step": 160320
    },
    {
      "epoch": 0.5619166780571064,
      "grad_norm": 2.75,
      "learning_rate": 4.514980879615567e-05,
      "loss": 0.914,
      "step": 160330
    },
    {
      "epoch": 0.561951725564002,
      "grad_norm": 3.359375,
      "learning_rate": 4.514915976749197e-05,
      "loss": 0.9156,
      "step": 160340
    },
    {
      "epoch": 0.5619867730708976,
      "grad_norm": 2.90625,
      "learning_rate": 4.514851073882827e-05,
      "loss": 0.9553,
      "step": 160350
    },
    {
      "epoch": 0.5620218205777932,
      "grad_norm": 2.796875,
      "learning_rate": 4.514786171016457e-05,
      "loss": 0.8462,
      "step": 160360
    },
    {
      "epoch": 0.5620568680846888,
      "grad_norm": 2.859375,
      "learning_rate": 4.514721268150087e-05,
      "loss": 0.9365,
      "step": 160370
    },
    {
      "epoch": 0.5620919155915844,
      "grad_norm": 3.015625,
      "learning_rate": 4.5146563652837164e-05,
      "loss": 0.9279,
      "step": 160380
    },
    {
      "epoch": 0.56212696309848,
      "grad_norm": 2.828125,
      "learning_rate": 4.5145914624173465e-05,
      "loss": 0.887,
      "step": 160390
    },
    {
      "epoch": 0.5621620106053756,
      "grad_norm": 2.765625,
      "learning_rate": 4.514526559550976e-05,
      "loss": 0.902,
      "step": 160400
    },
    {
      "epoch": 0.5621970581122712,
      "grad_norm": 2.921875,
      "learning_rate": 4.514461656684606e-05,
      "loss": 0.9199,
      "step": 160410
    },
    {
      "epoch": 0.5622321056191668,
      "grad_norm": 2.921875,
      "learning_rate": 4.5143967538182356e-05,
      "loss": 0.9875,
      "step": 160420
    },
    {
      "epoch": 0.5622671531260623,
      "grad_norm": 2.703125,
      "learning_rate": 4.514331850951866e-05,
      "loss": 0.9276,
      "step": 160430
    },
    {
      "epoch": 0.562302200632958,
      "grad_norm": 3.328125,
      "learning_rate": 4.514266948085495e-05,
      "loss": 0.8482,
      "step": 160440
    },
    {
      "epoch": 0.5623372481398535,
      "grad_norm": 2.6875,
      "learning_rate": 4.514202045219125e-05,
      "loss": 0.869,
      "step": 160450
    },
    {
      "epoch": 0.5623722956467492,
      "grad_norm": 3.265625,
      "learning_rate": 4.514137142352755e-05,
      "loss": 0.9309,
      "step": 160460
    },
    {
      "epoch": 0.5624073431536447,
      "grad_norm": 3.328125,
      "learning_rate": 4.514072239486385e-05,
      "loss": 0.8897,
      "step": 160470
    },
    {
      "epoch": 0.5624423906605404,
      "grad_norm": 3.65625,
      "learning_rate": 4.514007336620015e-05,
      "loss": 0.923,
      "step": 160480
    },
    {
      "epoch": 0.562477438167436,
      "grad_norm": 3.453125,
      "learning_rate": 4.5139424337536445e-05,
      "loss": 0.9104,
      "step": 160490
    },
    {
      "epoch": 0.5625124856743315,
      "grad_norm": 2.59375,
      "learning_rate": 4.513877530887275e-05,
      "loss": 0.8944,
      "step": 160500
    },
    {
      "epoch": 0.5625475331812272,
      "grad_norm": 2.734375,
      "learning_rate": 4.513812628020904e-05,
      "loss": 1.0372,
      "step": 160510
    },
    {
      "epoch": 0.5625825806881227,
      "grad_norm": 3.546875,
      "learning_rate": 4.513747725154534e-05,
      "loss": 0.9406,
      "step": 160520
    },
    {
      "epoch": 0.5626176281950184,
      "grad_norm": 2.9375,
      "learning_rate": 4.513682822288164e-05,
      "loss": 0.9566,
      "step": 160530
    },
    {
      "epoch": 0.5626526757019139,
      "grad_norm": 2.953125,
      "learning_rate": 4.513617919421794e-05,
      "loss": 0.9631,
      "step": 160540
    },
    {
      "epoch": 0.5626877232088096,
      "grad_norm": 2.984375,
      "learning_rate": 4.513553016555423e-05,
      "loss": 0.8887,
      "step": 160550
    },
    {
      "epoch": 0.5627227707157051,
      "grad_norm": 2.828125,
      "learning_rate": 4.5134881136890535e-05,
      "loss": 1.0529,
      "step": 160560
    },
    {
      "epoch": 0.5627578182226007,
      "grad_norm": 3.234375,
      "learning_rate": 4.513423210822683e-05,
      "loss": 0.9403,
      "step": 160570
    },
    {
      "epoch": 0.5627928657294964,
      "grad_norm": 3.15625,
      "learning_rate": 4.513358307956313e-05,
      "loss": 0.8834,
      "step": 160580
    },
    {
      "epoch": 0.5628279132363919,
      "grad_norm": 3.203125,
      "learning_rate": 4.5132934050899425e-05,
      "loss": 0.9588,
      "step": 160590
    },
    {
      "epoch": 0.5628629607432876,
      "grad_norm": 3.015625,
      "learning_rate": 4.513228502223573e-05,
      "loss": 0.849,
      "step": 160600
    },
    {
      "epoch": 0.5628980082501831,
      "grad_norm": 3.015625,
      "learning_rate": 4.513163599357202e-05,
      "loss": 0.9255,
      "step": 160610
    },
    {
      "epoch": 0.5629330557570787,
      "grad_norm": 2.5625,
      "learning_rate": 4.513098696490832e-05,
      "loss": 0.8991,
      "step": 160620
    },
    {
      "epoch": 0.5629681032639743,
      "grad_norm": 3.078125,
      "learning_rate": 4.5130337936244624e-05,
      "loss": 0.9248,
      "step": 160630
    },
    {
      "epoch": 0.5630031507708699,
      "grad_norm": 3.03125,
      "learning_rate": 4.512968890758092e-05,
      "loss": 0.883,
      "step": 160640
    },
    {
      "epoch": 0.5630381982777655,
      "grad_norm": 3.171875,
      "learning_rate": 4.512903987891721e-05,
      "loss": 0.8396,
      "step": 160650
    },
    {
      "epoch": 0.5630732457846611,
      "grad_norm": 2.765625,
      "learning_rate": 4.512839085025351e-05,
      "loss": 0.9479,
      "step": 160660
    },
    {
      "epoch": 0.5631082932915566,
      "grad_norm": 3.171875,
      "learning_rate": 4.512774182158981e-05,
      "loss": 0.8263,
      "step": 160670
    },
    {
      "epoch": 0.5631433407984523,
      "grad_norm": 3.421875,
      "learning_rate": 4.5127092792926104e-05,
      "loss": 0.9306,
      "step": 160680
    },
    {
      "epoch": 0.5631783883053479,
      "grad_norm": 3.15625,
      "learning_rate": 4.5126443764262405e-05,
      "loss": 0.9745,
      "step": 160690
    },
    {
      "epoch": 0.5632134358122435,
      "grad_norm": 2.953125,
      "learning_rate": 4.51257947355987e-05,
      "loss": 0.8704,
      "step": 160700
    },
    {
      "epoch": 0.5632484833191391,
      "grad_norm": 2.921875,
      "learning_rate": 4.5125145706935e-05,
      "loss": 0.8626,
      "step": 160710
    },
    {
      "epoch": 0.5632835308260347,
      "grad_norm": 3.1875,
      "learning_rate": 4.51244966782713e-05,
      "loss": 0.9293,
      "step": 160720
    },
    {
      "epoch": 0.5633185783329303,
      "grad_norm": 2.65625,
      "learning_rate": 4.51238476496076e-05,
      "loss": 0.9283,
      "step": 160730
    },
    {
      "epoch": 0.5633536258398258,
      "grad_norm": 3.015625,
      "learning_rate": 4.51231986209439e-05,
      "loss": 0.8708,
      "step": 160740
    },
    {
      "epoch": 0.5633886733467215,
      "grad_norm": 2.96875,
      "learning_rate": 4.512254959228019e-05,
      "loss": 0.9898,
      "step": 160750
    },
    {
      "epoch": 0.563423720853617,
      "grad_norm": 2.8125,
      "learning_rate": 4.5121900563616495e-05,
      "loss": 0.9179,
      "step": 160760
    },
    {
      "epoch": 0.5634587683605127,
      "grad_norm": 3.0,
      "learning_rate": 4.512125153495279e-05,
      "loss": 0.9144,
      "step": 160770
    },
    {
      "epoch": 0.5634938158674083,
      "grad_norm": 2.890625,
      "learning_rate": 4.512060250628909e-05,
      "loss": 0.9322,
      "step": 160780
    },
    {
      "epoch": 0.5635288633743039,
      "grad_norm": 3.0625,
      "learning_rate": 4.5119953477625385e-05,
      "loss": 0.9652,
      "step": 160790
    },
    {
      "epoch": 0.5635639108811995,
      "grad_norm": 2.890625,
      "learning_rate": 4.511930444896169e-05,
      "loss": 0.8913,
      "step": 160800
    },
    {
      "epoch": 0.563598958388095,
      "grad_norm": 3.0625,
      "learning_rate": 4.511865542029798e-05,
      "loss": 0.9633,
      "step": 160810
    },
    {
      "epoch": 0.5636340058949907,
      "grad_norm": 2.78125,
      "learning_rate": 4.511800639163428e-05,
      "loss": 0.881,
      "step": 160820
    },
    {
      "epoch": 0.5636690534018862,
      "grad_norm": 3.1875,
      "learning_rate": 4.511735736297058e-05,
      "loss": 0.9065,
      "step": 160830
    },
    {
      "epoch": 0.5637041009087819,
      "grad_norm": 3.03125,
      "learning_rate": 4.511670833430688e-05,
      "loss": 0.8892,
      "step": 160840
    },
    {
      "epoch": 0.5637391484156774,
      "grad_norm": 3.125,
      "learning_rate": 4.511605930564318e-05,
      "loss": 0.9936,
      "step": 160850
    },
    {
      "epoch": 0.563774195922573,
      "grad_norm": 3.0,
      "learning_rate": 4.5115410276979475e-05,
      "loss": 0.8199,
      "step": 160860
    },
    {
      "epoch": 0.5638092434294687,
      "grad_norm": 3.359375,
      "learning_rate": 4.5114761248315776e-05,
      "loss": 0.852,
      "step": 160870
    },
    {
      "epoch": 0.5638442909363642,
      "grad_norm": 3.203125,
      "learning_rate": 4.511411221965207e-05,
      "loss": 0.9119,
      "step": 160880
    },
    {
      "epoch": 0.5638793384432599,
      "grad_norm": 2.875,
      "learning_rate": 4.511346319098837e-05,
      "loss": 0.871,
      "step": 160890
    },
    {
      "epoch": 0.5639143859501554,
      "grad_norm": 2.5,
      "learning_rate": 4.511281416232467e-05,
      "loss": 0.8841,
      "step": 160900
    },
    {
      "epoch": 0.5639494334570511,
      "grad_norm": 3.3125,
      "learning_rate": 4.511216513366097e-05,
      "loss": 0.9613,
      "step": 160910
    },
    {
      "epoch": 0.5639844809639466,
      "grad_norm": 2.921875,
      "learning_rate": 4.511151610499726e-05,
      "loss": 0.9013,
      "step": 160920
    },
    {
      "epoch": 0.5640195284708422,
      "grad_norm": 3.125,
      "learning_rate": 4.5110867076333564e-05,
      "loss": 0.9474,
      "step": 160930
    },
    {
      "epoch": 0.5640545759777378,
      "grad_norm": 3.015625,
      "learning_rate": 4.511021804766986e-05,
      "loss": 1.0094,
      "step": 160940
    },
    {
      "epoch": 0.5640896234846334,
      "grad_norm": 3.171875,
      "learning_rate": 4.510956901900616e-05,
      "loss": 0.946,
      "step": 160950
    },
    {
      "epoch": 0.564124670991529,
      "grad_norm": 3.015625,
      "learning_rate": 4.5108919990342455e-05,
      "loss": 0.9328,
      "step": 160960
    },
    {
      "epoch": 0.5641597184984246,
      "grad_norm": 3.234375,
      "learning_rate": 4.5108270961678756e-05,
      "loss": 0.9738,
      "step": 160970
    },
    {
      "epoch": 0.5641947660053203,
      "grad_norm": 3.0625,
      "learning_rate": 4.510762193301505e-05,
      "loss": 0.9382,
      "step": 160980
    },
    {
      "epoch": 0.5642298135122158,
      "grad_norm": 3.140625,
      "learning_rate": 4.510697290435135e-05,
      "loss": 0.8658,
      "step": 160990
    },
    {
      "epoch": 0.5642648610191114,
      "grad_norm": 3.34375,
      "learning_rate": 4.5106323875687653e-05,
      "loss": 0.9014,
      "step": 161000
    },
    {
      "epoch": 0.564299908526007,
      "grad_norm": 3.3125,
      "learning_rate": 4.510567484702395e-05,
      "loss": 0.8851,
      "step": 161010
    },
    {
      "epoch": 0.5643349560329026,
      "grad_norm": 2.78125,
      "learning_rate": 4.510502581836025e-05,
      "loss": 0.9416,
      "step": 161020
    },
    {
      "epoch": 0.5643700035397982,
      "grad_norm": 2.875,
      "learning_rate": 4.510437678969654e-05,
      "loss": 0.9321,
      "step": 161030
    },
    {
      "epoch": 0.5644050510466938,
      "grad_norm": 2.8125,
      "learning_rate": 4.510372776103284e-05,
      "loss": 0.8867,
      "step": 161040
    },
    {
      "epoch": 0.5644400985535893,
      "grad_norm": 3.328125,
      "learning_rate": 4.510307873236913e-05,
      "loss": 0.9645,
      "step": 161050
    },
    {
      "epoch": 0.564475146060485,
      "grad_norm": 2.828125,
      "learning_rate": 4.5102429703705435e-05,
      "loss": 0.8757,
      "step": 161060
    },
    {
      "epoch": 0.5645101935673806,
      "grad_norm": 3.015625,
      "learning_rate": 4.510178067504173e-05,
      "loss": 0.8726,
      "step": 161070
    },
    {
      "epoch": 0.5645452410742762,
      "grad_norm": 2.46875,
      "learning_rate": 4.510113164637803e-05,
      "loss": 0.9382,
      "step": 161080
    },
    {
      "epoch": 0.5645802885811718,
      "grad_norm": 3.21875,
      "learning_rate": 4.510048261771433e-05,
      "loss": 0.9135,
      "step": 161090
    },
    {
      "epoch": 0.5646153360880674,
      "grad_norm": 2.78125,
      "learning_rate": 4.509983358905063e-05,
      "loss": 0.9188,
      "step": 161100
    },
    {
      "epoch": 0.564650383594963,
      "grad_norm": 2.765625,
      "learning_rate": 4.509918456038693e-05,
      "loss": 0.8299,
      "step": 161110
    },
    {
      "epoch": 0.5646854311018585,
      "grad_norm": 2.859375,
      "learning_rate": 4.509853553172322e-05,
      "loss": 0.9094,
      "step": 161120
    },
    {
      "epoch": 0.5647204786087542,
      "grad_norm": 2.984375,
      "learning_rate": 4.5097886503059524e-05,
      "loss": 0.9312,
      "step": 161130
    },
    {
      "epoch": 0.5647555261156497,
      "grad_norm": 3.28125,
      "learning_rate": 4.509723747439582e-05,
      "loss": 0.9522,
      "step": 161140
    },
    {
      "epoch": 0.5647905736225454,
      "grad_norm": 3.46875,
      "learning_rate": 4.509658844573212e-05,
      "loss": 0.8817,
      "step": 161150
    },
    {
      "epoch": 0.5648256211294409,
      "grad_norm": 3.140625,
      "learning_rate": 4.5095939417068415e-05,
      "loss": 0.9909,
      "step": 161160
    },
    {
      "epoch": 0.5648606686363365,
      "grad_norm": 2.765625,
      "learning_rate": 4.5095290388404716e-05,
      "loss": 0.9618,
      "step": 161170
    },
    {
      "epoch": 0.5648957161432322,
      "grad_norm": 2.515625,
      "learning_rate": 4.509464135974101e-05,
      "loss": 0.8773,
      "step": 161180
    },
    {
      "epoch": 0.5649307636501277,
      "grad_norm": 2.6875,
      "learning_rate": 4.509399233107731e-05,
      "loss": 0.9065,
      "step": 161190
    },
    {
      "epoch": 0.5649658111570234,
      "grad_norm": 2.96875,
      "learning_rate": 4.509334330241361e-05,
      "loss": 0.9844,
      "step": 161200
    },
    {
      "epoch": 0.5650008586639189,
      "grad_norm": 2.953125,
      "learning_rate": 4.509269427374991e-05,
      "loss": 0.9393,
      "step": 161210
    },
    {
      "epoch": 0.5650359061708146,
      "grad_norm": 2.84375,
      "learning_rate": 4.509204524508621e-05,
      "loss": 0.8911,
      "step": 161220
    },
    {
      "epoch": 0.5650709536777101,
      "grad_norm": 3.109375,
      "learning_rate": 4.5091396216422504e-05,
      "loss": 0.8775,
      "step": 161230
    },
    {
      "epoch": 0.5651060011846057,
      "grad_norm": 3.4375,
      "learning_rate": 4.5090747187758805e-05,
      "loss": 0.9534,
      "step": 161240
    },
    {
      "epoch": 0.5651410486915013,
      "grad_norm": 3.21875,
      "learning_rate": 4.50900981590951e-05,
      "loss": 0.8507,
      "step": 161250
    },
    {
      "epoch": 0.5651760961983969,
      "grad_norm": 2.625,
      "learning_rate": 4.50894491304314e-05,
      "loss": 0.9,
      "step": 161260
    },
    {
      "epoch": 0.5652111437052926,
      "grad_norm": 2.65625,
      "learning_rate": 4.5088800101767696e-05,
      "loss": 0.9545,
      "step": 161270
    },
    {
      "epoch": 0.5652461912121881,
      "grad_norm": 3.140625,
      "learning_rate": 4.5088151073104e-05,
      "loss": 0.8936,
      "step": 161280
    },
    {
      "epoch": 0.5652812387190838,
      "grad_norm": 2.984375,
      "learning_rate": 4.508750204444029e-05,
      "loss": 0.8622,
      "step": 161290
    },
    {
      "epoch": 0.5653162862259793,
      "grad_norm": 3.046875,
      "learning_rate": 4.5086853015776593e-05,
      "loss": 0.9472,
      "step": 161300
    },
    {
      "epoch": 0.5653513337328749,
      "grad_norm": 2.828125,
      "learning_rate": 4.508620398711289e-05,
      "loss": 0.9625,
      "step": 161310
    },
    {
      "epoch": 0.5653863812397705,
      "grad_norm": 2.953125,
      "learning_rate": 4.508555495844919e-05,
      "loss": 0.8609,
      "step": 161320
    },
    {
      "epoch": 0.5654214287466661,
      "grad_norm": 3.171875,
      "learning_rate": 4.5084905929785484e-05,
      "loss": 0.9952,
      "step": 161330
    },
    {
      "epoch": 0.5654564762535617,
      "grad_norm": 3.46875,
      "learning_rate": 4.5084256901121785e-05,
      "loss": 0.9349,
      "step": 161340
    },
    {
      "epoch": 0.5654915237604573,
      "grad_norm": 3.046875,
      "learning_rate": 4.508360787245809e-05,
      "loss": 0.87,
      "step": 161350
    },
    {
      "epoch": 0.565526571267353,
      "grad_norm": 3.125,
      "learning_rate": 4.508295884379438e-05,
      "loss": 0.9709,
      "step": 161360
    },
    {
      "epoch": 0.5655616187742485,
      "grad_norm": 3.28125,
      "learning_rate": 4.508230981513068e-05,
      "loss": 0.8828,
      "step": 161370
    },
    {
      "epoch": 0.5655966662811441,
      "grad_norm": 2.859375,
      "learning_rate": 4.508166078646698e-05,
      "loss": 0.7818,
      "step": 161380
    },
    {
      "epoch": 0.5656317137880397,
      "grad_norm": 2.34375,
      "learning_rate": 4.508101175780328e-05,
      "loss": 0.9049,
      "step": 161390
    },
    {
      "epoch": 0.5656667612949353,
      "grad_norm": 2.828125,
      "learning_rate": 4.508036272913957e-05,
      "loss": 0.9218,
      "step": 161400
    },
    {
      "epoch": 0.5657018088018309,
      "grad_norm": 2.734375,
      "learning_rate": 4.507971370047587e-05,
      "loss": 0.8747,
      "step": 161410
    },
    {
      "epoch": 0.5657368563087265,
      "grad_norm": 3.0,
      "learning_rate": 4.507906467181216e-05,
      "loss": 0.8618,
      "step": 161420
    },
    {
      "epoch": 0.565771903815622,
      "grad_norm": 2.859375,
      "learning_rate": 4.5078415643148464e-05,
      "loss": 0.8521,
      "step": 161430
    },
    {
      "epoch": 0.5658069513225177,
      "grad_norm": 2.46875,
      "learning_rate": 4.5077766614484765e-05,
      "loss": 0.8991,
      "step": 161440
    },
    {
      "epoch": 0.5658419988294132,
      "grad_norm": 2.96875,
      "learning_rate": 4.507711758582106e-05,
      "loss": 1.0053,
      "step": 161450
    },
    {
      "epoch": 0.5658770463363089,
      "grad_norm": 2.703125,
      "learning_rate": 4.507646855715736e-05,
      "loss": 0.8766,
      "step": 161460
    },
    {
      "epoch": 0.5659120938432045,
      "grad_norm": 2.671875,
      "learning_rate": 4.5075819528493656e-05,
      "loss": 0.9247,
      "step": 161470
    },
    {
      "epoch": 0.5659471413501,
      "grad_norm": 3.296875,
      "learning_rate": 4.507517049982996e-05,
      "loss": 0.8596,
      "step": 161480
    },
    {
      "epoch": 0.5659821888569957,
      "grad_norm": 3.0,
      "learning_rate": 4.507452147116625e-05,
      "loss": 0.9548,
      "step": 161490
    },
    {
      "epoch": 0.5660172363638912,
      "grad_norm": 3.09375,
      "learning_rate": 4.5073872442502553e-05,
      "loss": 0.9249,
      "step": 161500
    },
    {
      "epoch": 0.5660522838707869,
      "grad_norm": 3.078125,
      "learning_rate": 4.507322341383885e-05,
      "loss": 0.8479,
      "step": 161510
    },
    {
      "epoch": 0.5660873313776824,
      "grad_norm": 2.875,
      "learning_rate": 4.507257438517515e-05,
      "loss": 0.9526,
      "step": 161520
    },
    {
      "epoch": 0.5661223788845781,
      "grad_norm": 2.765625,
      "learning_rate": 4.5071925356511444e-05,
      "loss": 0.894,
      "step": 161530
    },
    {
      "epoch": 0.5661574263914736,
      "grad_norm": 2.84375,
      "learning_rate": 4.5071276327847745e-05,
      "loss": 0.8956,
      "step": 161540
    },
    {
      "epoch": 0.5661924738983692,
      "grad_norm": 2.859375,
      "learning_rate": 4.507062729918404e-05,
      "loss": 0.9531,
      "step": 161550
    },
    {
      "epoch": 0.5662275214052649,
      "grad_norm": 2.6875,
      "learning_rate": 4.506997827052034e-05,
      "loss": 0.8697,
      "step": 161560
    },
    {
      "epoch": 0.5662625689121604,
      "grad_norm": 3.09375,
      "learning_rate": 4.5069329241856636e-05,
      "loss": 0.9101,
      "step": 161570
    },
    {
      "epoch": 0.5662976164190561,
      "grad_norm": 2.6875,
      "learning_rate": 4.506868021319294e-05,
      "loss": 0.8404,
      "step": 161580
    },
    {
      "epoch": 0.5663326639259516,
      "grad_norm": 3.40625,
      "learning_rate": 4.506803118452924e-05,
      "loss": 0.9341,
      "step": 161590
    },
    {
      "epoch": 0.5663677114328473,
      "grad_norm": 2.921875,
      "learning_rate": 4.5067382155865533e-05,
      "loss": 0.9216,
      "step": 161600
    },
    {
      "epoch": 0.5664027589397428,
      "grad_norm": 2.828125,
      "learning_rate": 4.5066733127201835e-05,
      "loss": 0.8578,
      "step": 161610
    },
    {
      "epoch": 0.5664378064466384,
      "grad_norm": 2.75,
      "learning_rate": 4.506608409853813e-05,
      "loss": 0.8955,
      "step": 161620
    },
    {
      "epoch": 0.566472853953534,
      "grad_norm": 2.828125,
      "learning_rate": 4.506543506987443e-05,
      "loss": 0.945,
      "step": 161630
    },
    {
      "epoch": 0.5665079014604296,
      "grad_norm": 3.46875,
      "learning_rate": 4.5064786041210725e-05,
      "loss": 0.9766,
      "step": 161640
    },
    {
      "epoch": 0.5665429489673252,
      "grad_norm": 3.140625,
      "learning_rate": 4.506413701254703e-05,
      "loss": 0.9727,
      "step": 161650
    },
    {
      "epoch": 0.5665779964742208,
      "grad_norm": 3.171875,
      "learning_rate": 4.506348798388332e-05,
      "loss": 0.9587,
      "step": 161660
    },
    {
      "epoch": 0.5666130439811164,
      "grad_norm": 2.71875,
      "learning_rate": 4.506283895521962e-05,
      "loss": 0.8724,
      "step": 161670
    },
    {
      "epoch": 0.566648091488012,
      "grad_norm": 3.625,
      "learning_rate": 4.506218992655592e-05,
      "loss": 0.9041,
      "step": 161680
    },
    {
      "epoch": 0.5666831389949076,
      "grad_norm": 3.0,
      "learning_rate": 4.506154089789222e-05,
      "loss": 0.8937,
      "step": 161690
    },
    {
      "epoch": 0.5667181865018032,
      "grad_norm": 3.171875,
      "learning_rate": 4.5060891869228513e-05,
      "loss": 0.8824,
      "step": 161700
    },
    {
      "epoch": 0.5667532340086988,
      "grad_norm": 3.09375,
      "learning_rate": 4.5060242840564815e-05,
      "loss": 0.9306,
      "step": 161710
    },
    {
      "epoch": 0.5667882815155943,
      "grad_norm": 2.921875,
      "learning_rate": 4.5059593811901116e-05,
      "loss": 0.9385,
      "step": 161720
    },
    {
      "epoch": 0.56682332902249,
      "grad_norm": 3.0625,
      "learning_rate": 4.505894478323741e-05,
      "loss": 0.9328,
      "step": 161730
    },
    {
      "epoch": 0.5668583765293855,
      "grad_norm": 2.84375,
      "learning_rate": 4.505829575457371e-05,
      "loss": 0.8666,
      "step": 161740
    },
    {
      "epoch": 0.5668934240362812,
      "grad_norm": 2.625,
      "learning_rate": 4.505764672591001e-05,
      "loss": 0.8958,
      "step": 161750
    },
    {
      "epoch": 0.5669284715431768,
      "grad_norm": 2.75,
      "learning_rate": 4.505699769724631e-05,
      "loss": 0.8439,
      "step": 161760
    },
    {
      "epoch": 0.5669635190500724,
      "grad_norm": 2.671875,
      "learning_rate": 4.50563486685826e-05,
      "loss": 0.8562,
      "step": 161770
    },
    {
      "epoch": 0.566998566556968,
      "grad_norm": 2.734375,
      "learning_rate": 4.50556996399189e-05,
      "loss": 0.9545,
      "step": 161780
    },
    {
      "epoch": 0.5670336140638635,
      "grad_norm": 3.09375,
      "learning_rate": 4.505505061125519e-05,
      "loss": 0.9313,
      "step": 161790
    },
    {
      "epoch": 0.5670686615707592,
      "grad_norm": 3.0625,
      "learning_rate": 4.5054401582591493e-05,
      "loss": 1.0123,
      "step": 161800
    },
    {
      "epoch": 0.5671037090776547,
      "grad_norm": 2.890625,
      "learning_rate": 4.5053752553927795e-05,
      "loss": 0.9391,
      "step": 161810
    },
    {
      "epoch": 0.5671387565845504,
      "grad_norm": 2.890625,
      "learning_rate": 4.505310352526409e-05,
      "loss": 0.9609,
      "step": 161820
    },
    {
      "epoch": 0.5671738040914459,
      "grad_norm": 2.640625,
      "learning_rate": 4.505245449660039e-05,
      "loss": 0.9051,
      "step": 161830
    },
    {
      "epoch": 0.5672088515983416,
      "grad_norm": 2.703125,
      "learning_rate": 4.5051805467936685e-05,
      "loss": 0.9085,
      "step": 161840
    },
    {
      "epoch": 0.5672438991052372,
      "grad_norm": 2.90625,
      "learning_rate": 4.505115643927299e-05,
      "loss": 0.8806,
      "step": 161850
    },
    {
      "epoch": 0.5672789466121327,
      "grad_norm": 3.015625,
      "learning_rate": 4.505050741060928e-05,
      "loss": 0.8741,
      "step": 161860
    },
    {
      "epoch": 0.5673139941190284,
      "grad_norm": 3.0625,
      "learning_rate": 4.504985838194558e-05,
      "loss": 0.9746,
      "step": 161870
    },
    {
      "epoch": 0.5673490416259239,
      "grad_norm": 2.5625,
      "learning_rate": 4.504920935328188e-05,
      "loss": 0.9265,
      "step": 161880
    },
    {
      "epoch": 0.5673840891328196,
      "grad_norm": 3.484375,
      "learning_rate": 4.504856032461818e-05,
      "loss": 1.0155,
      "step": 161890
    },
    {
      "epoch": 0.5674191366397151,
      "grad_norm": 2.84375,
      "learning_rate": 4.5047911295954473e-05,
      "loss": 0.8313,
      "step": 161900
    },
    {
      "epoch": 0.5674541841466108,
      "grad_norm": 2.578125,
      "learning_rate": 4.5047262267290775e-05,
      "loss": 0.8651,
      "step": 161910
    },
    {
      "epoch": 0.5674892316535063,
      "grad_norm": 3.09375,
      "learning_rate": 4.504661323862707e-05,
      "loss": 0.9751,
      "step": 161920
    },
    {
      "epoch": 0.5675242791604019,
      "grad_norm": 3.015625,
      "learning_rate": 4.504596420996337e-05,
      "loss": 0.9721,
      "step": 161930
    },
    {
      "epoch": 0.5675593266672975,
      "grad_norm": 3.34375,
      "learning_rate": 4.5045315181299665e-05,
      "loss": 0.972,
      "step": 161940
    },
    {
      "epoch": 0.5675943741741931,
      "grad_norm": 3.03125,
      "learning_rate": 4.504466615263597e-05,
      "loss": 0.8892,
      "step": 161950
    },
    {
      "epoch": 0.5676294216810888,
      "grad_norm": 2.90625,
      "learning_rate": 4.504401712397227e-05,
      "loss": 0.8866,
      "step": 161960
    },
    {
      "epoch": 0.5676644691879843,
      "grad_norm": 3.28125,
      "learning_rate": 4.504336809530856e-05,
      "loss": 0.8941,
      "step": 161970
    },
    {
      "epoch": 0.56769951669488,
      "grad_norm": 2.96875,
      "learning_rate": 4.5042719066644864e-05,
      "loss": 0.8917,
      "step": 161980
    },
    {
      "epoch": 0.5677345642017755,
      "grad_norm": 3.1875,
      "learning_rate": 4.504207003798116e-05,
      "loss": 0.9253,
      "step": 161990
    },
    {
      "epoch": 0.5677696117086711,
      "grad_norm": 2.9375,
      "learning_rate": 4.504142100931746e-05,
      "loss": 0.8631,
      "step": 162000
    },
    {
      "epoch": 0.5678046592155667,
      "grad_norm": 2.78125,
      "learning_rate": 4.5040771980653755e-05,
      "loss": 0.9338,
      "step": 162010
    },
    {
      "epoch": 0.5678397067224623,
      "grad_norm": 2.609375,
      "learning_rate": 4.5040122951990056e-05,
      "loss": 0.9138,
      "step": 162020
    },
    {
      "epoch": 0.5678747542293578,
      "grad_norm": 2.71875,
      "learning_rate": 4.503947392332635e-05,
      "loss": 0.8995,
      "step": 162030
    },
    {
      "epoch": 0.5679098017362535,
      "grad_norm": 3.0,
      "learning_rate": 4.503882489466265e-05,
      "loss": 0.9075,
      "step": 162040
    },
    {
      "epoch": 0.5679448492431491,
      "grad_norm": 2.65625,
      "learning_rate": 4.503817586599895e-05,
      "loss": 0.9202,
      "step": 162050
    },
    {
      "epoch": 0.5679798967500447,
      "grad_norm": 3.265625,
      "learning_rate": 4.503752683733525e-05,
      "loss": 0.9319,
      "step": 162060
    },
    {
      "epoch": 0.5680149442569403,
      "grad_norm": 2.75,
      "learning_rate": 4.503687780867154e-05,
      "loss": 0.873,
      "step": 162070
    },
    {
      "epoch": 0.5680499917638359,
      "grad_norm": 3.4375,
      "learning_rate": 4.5036228780007844e-05,
      "loss": 0.9551,
      "step": 162080
    },
    {
      "epoch": 0.5680850392707315,
      "grad_norm": 2.6875,
      "learning_rate": 4.5035579751344146e-05,
      "loss": 0.8481,
      "step": 162090
    },
    {
      "epoch": 0.568120086777627,
      "grad_norm": 3.140625,
      "learning_rate": 4.503493072268044e-05,
      "loss": 0.9306,
      "step": 162100
    },
    {
      "epoch": 0.5681551342845227,
      "grad_norm": 2.9375,
      "learning_rate": 4.503428169401674e-05,
      "loss": 0.8882,
      "step": 162110
    },
    {
      "epoch": 0.5681901817914182,
      "grad_norm": 2.6875,
      "learning_rate": 4.5033632665353036e-05,
      "loss": 1.0101,
      "step": 162120
    },
    {
      "epoch": 0.5682252292983139,
      "grad_norm": 2.734375,
      "learning_rate": 4.503298363668934e-05,
      "loss": 0.9098,
      "step": 162130
    },
    {
      "epoch": 0.5682602768052094,
      "grad_norm": 3.046875,
      "learning_rate": 4.503233460802563e-05,
      "loss": 0.928,
      "step": 162140
    },
    {
      "epoch": 0.568295324312105,
      "grad_norm": 2.640625,
      "learning_rate": 4.503168557936193e-05,
      "loss": 0.9509,
      "step": 162150
    },
    {
      "epoch": 0.5683303718190007,
      "grad_norm": 2.96875,
      "learning_rate": 4.503103655069822e-05,
      "loss": 0.893,
      "step": 162160
    },
    {
      "epoch": 0.5683654193258962,
      "grad_norm": 2.25,
      "learning_rate": 4.503038752203452e-05,
      "loss": 0.8983,
      "step": 162170
    },
    {
      "epoch": 0.5684004668327919,
      "grad_norm": 3.140625,
      "learning_rate": 4.5029738493370824e-05,
      "loss": 0.9564,
      "step": 162180
    },
    {
      "epoch": 0.5684355143396874,
      "grad_norm": 2.671875,
      "learning_rate": 4.502908946470712e-05,
      "loss": 0.8959,
      "step": 162190
    },
    {
      "epoch": 0.5684705618465831,
      "grad_norm": 2.890625,
      "learning_rate": 4.502844043604342e-05,
      "loss": 1.0127,
      "step": 162200
    },
    {
      "epoch": 0.5685056093534786,
      "grad_norm": 3.3125,
      "learning_rate": 4.5027791407379715e-05,
      "loss": 0.9883,
      "step": 162210
    },
    {
      "epoch": 0.5685406568603742,
      "grad_norm": 3.0,
      "learning_rate": 4.5027142378716016e-05,
      "loss": 0.8975,
      "step": 162220
    },
    {
      "epoch": 0.5685757043672698,
      "grad_norm": 2.859375,
      "learning_rate": 4.502649335005231e-05,
      "loss": 0.9116,
      "step": 162230
    },
    {
      "epoch": 0.5686107518741654,
      "grad_norm": 3.3125,
      "learning_rate": 4.502584432138861e-05,
      "loss": 0.8703,
      "step": 162240
    },
    {
      "epoch": 0.5686457993810611,
      "grad_norm": 2.625,
      "learning_rate": 4.502519529272491e-05,
      "loss": 0.9073,
      "step": 162250
    },
    {
      "epoch": 0.5686808468879566,
      "grad_norm": 2.65625,
      "learning_rate": 4.502454626406121e-05,
      "loss": 0.8929,
      "step": 162260
    },
    {
      "epoch": 0.5687158943948523,
      "grad_norm": 3.34375,
      "learning_rate": 4.50238972353975e-05,
      "loss": 0.9553,
      "step": 162270
    },
    {
      "epoch": 0.5687509419017478,
      "grad_norm": 2.84375,
      "learning_rate": 4.5023248206733804e-05,
      "loss": 0.8876,
      "step": 162280
    },
    {
      "epoch": 0.5687859894086434,
      "grad_norm": 3.109375,
      "learning_rate": 4.50225991780701e-05,
      "loss": 0.9125,
      "step": 162290
    },
    {
      "epoch": 0.568821036915539,
      "grad_norm": 2.8125,
      "learning_rate": 4.50219501494064e-05,
      "loss": 0.8918,
      "step": 162300
    },
    {
      "epoch": 0.5688560844224346,
      "grad_norm": 2.890625,
      "learning_rate": 4.50213011207427e-05,
      "loss": 0.9808,
      "step": 162310
    },
    {
      "epoch": 0.5688911319293302,
      "grad_norm": 3.453125,
      "learning_rate": 4.5020652092078996e-05,
      "loss": 0.9846,
      "step": 162320
    },
    {
      "epoch": 0.5689261794362258,
      "grad_norm": 3.109375,
      "learning_rate": 4.50200030634153e-05,
      "loss": 0.876,
      "step": 162330
    },
    {
      "epoch": 0.5689612269431215,
      "grad_norm": 2.6875,
      "learning_rate": 4.501935403475159e-05,
      "loss": 0.9267,
      "step": 162340
    },
    {
      "epoch": 0.568996274450017,
      "grad_norm": 3.0625,
      "learning_rate": 4.5018705006087894e-05,
      "loss": 0.9728,
      "step": 162350
    },
    {
      "epoch": 0.5690313219569126,
      "grad_norm": 2.984375,
      "learning_rate": 4.501805597742419e-05,
      "loss": 0.913,
      "step": 162360
    },
    {
      "epoch": 0.5690663694638082,
      "grad_norm": 3.1875,
      "learning_rate": 4.501740694876049e-05,
      "loss": 0.9019,
      "step": 162370
    },
    {
      "epoch": 0.5691014169707038,
      "grad_norm": 2.6875,
      "learning_rate": 4.5016757920096784e-05,
      "loss": 0.7822,
      "step": 162380
    },
    {
      "epoch": 0.5691364644775994,
      "grad_norm": 2.84375,
      "learning_rate": 4.5016108891433086e-05,
      "loss": 0.9262,
      "step": 162390
    },
    {
      "epoch": 0.569171511984495,
      "grad_norm": 2.65625,
      "learning_rate": 4.501545986276938e-05,
      "loss": 0.8731,
      "step": 162400
    },
    {
      "epoch": 0.5692065594913905,
      "grad_norm": 2.609375,
      "learning_rate": 4.501481083410568e-05,
      "loss": 0.9214,
      "step": 162410
    },
    {
      "epoch": 0.5692416069982862,
      "grad_norm": 3.1875,
      "learning_rate": 4.5014161805441976e-05,
      "loss": 0.8717,
      "step": 162420
    },
    {
      "epoch": 0.5692766545051817,
      "grad_norm": 2.8125,
      "learning_rate": 4.501351277677828e-05,
      "loss": 0.9201,
      "step": 162430
    },
    {
      "epoch": 0.5693117020120774,
      "grad_norm": 3.28125,
      "learning_rate": 4.501286374811457e-05,
      "loss": 0.8493,
      "step": 162440
    },
    {
      "epoch": 0.569346749518973,
      "grad_norm": 3.078125,
      "learning_rate": 4.5012214719450874e-05,
      "loss": 0.8849,
      "step": 162450
    },
    {
      "epoch": 0.5693817970258686,
      "grad_norm": 3.3125,
      "learning_rate": 4.5011565690787175e-05,
      "loss": 0.8965,
      "step": 162460
    },
    {
      "epoch": 0.5694168445327642,
      "grad_norm": 3.109375,
      "learning_rate": 4.501091666212347e-05,
      "loss": 0.9453,
      "step": 162470
    },
    {
      "epoch": 0.5694518920396597,
      "grad_norm": 2.84375,
      "learning_rate": 4.501026763345977e-05,
      "loss": 0.9246,
      "step": 162480
    },
    {
      "epoch": 0.5694869395465554,
      "grad_norm": 2.90625,
      "learning_rate": 4.5009618604796066e-05,
      "loss": 0.9064,
      "step": 162490
    },
    {
      "epoch": 0.5695219870534509,
      "grad_norm": 3.125,
      "learning_rate": 4.500896957613237e-05,
      "loss": 0.9324,
      "step": 162500
    },
    {
      "epoch": 0.5695570345603466,
      "grad_norm": 2.9375,
      "learning_rate": 4.500832054746866e-05,
      "loss": 0.8876,
      "step": 162510
    },
    {
      "epoch": 0.5695920820672421,
      "grad_norm": 3.265625,
      "learning_rate": 4.500767151880496e-05,
      "loss": 0.9757,
      "step": 162520
    },
    {
      "epoch": 0.5696271295741377,
      "grad_norm": 3.15625,
      "learning_rate": 4.500702249014125e-05,
      "loss": 0.8653,
      "step": 162530
    },
    {
      "epoch": 0.5696621770810334,
      "grad_norm": 3.25,
      "learning_rate": 4.500637346147755e-05,
      "loss": 0.9406,
      "step": 162540
    },
    {
      "epoch": 0.5696972245879289,
      "grad_norm": 2.6875,
      "learning_rate": 4.5005724432813854e-05,
      "loss": 0.9191,
      "step": 162550
    },
    {
      "epoch": 0.5697322720948246,
      "grad_norm": 2.890625,
      "learning_rate": 4.500507540415015e-05,
      "loss": 0.9387,
      "step": 162560
    },
    {
      "epoch": 0.5697673196017201,
      "grad_norm": 3.0625,
      "learning_rate": 4.500442637548645e-05,
      "loss": 0.8886,
      "step": 162570
    },
    {
      "epoch": 0.5698023671086158,
      "grad_norm": 3.546875,
      "learning_rate": 4.5003777346822744e-05,
      "loss": 1.022,
      "step": 162580
    },
    {
      "epoch": 0.5698374146155113,
      "grad_norm": 3.59375,
      "learning_rate": 4.5003128318159046e-05,
      "loss": 0.9323,
      "step": 162590
    },
    {
      "epoch": 0.5698724621224069,
      "grad_norm": 3.09375,
      "learning_rate": 4.500247928949534e-05,
      "loss": 0.9218,
      "step": 162600
    },
    {
      "epoch": 0.5699075096293025,
      "grad_norm": 3.28125,
      "learning_rate": 4.500183026083164e-05,
      "loss": 0.8809,
      "step": 162610
    },
    {
      "epoch": 0.5699425571361981,
      "grad_norm": 2.75,
      "learning_rate": 4.5001181232167936e-05,
      "loss": 0.9393,
      "step": 162620
    },
    {
      "epoch": 0.5699776046430937,
      "grad_norm": 3.203125,
      "learning_rate": 4.500053220350424e-05,
      "loss": 0.9026,
      "step": 162630
    },
    {
      "epoch": 0.5700126521499893,
      "grad_norm": 3.359375,
      "learning_rate": 4.499988317484053e-05,
      "loss": 0.9545,
      "step": 162640
    },
    {
      "epoch": 0.570047699656885,
      "grad_norm": 2.796875,
      "learning_rate": 4.4999234146176834e-05,
      "loss": 0.9028,
      "step": 162650
    },
    {
      "epoch": 0.5700827471637805,
      "grad_norm": 2.921875,
      "learning_rate": 4.499858511751313e-05,
      "loss": 0.9003,
      "step": 162660
    },
    {
      "epoch": 0.5701177946706761,
      "grad_norm": 3.109375,
      "learning_rate": 4.499793608884943e-05,
      "loss": 0.8936,
      "step": 162670
    },
    {
      "epoch": 0.5701528421775717,
      "grad_norm": 2.6875,
      "learning_rate": 4.499728706018573e-05,
      "loss": 0.9272,
      "step": 162680
    },
    {
      "epoch": 0.5701878896844673,
      "grad_norm": 3.03125,
      "learning_rate": 4.4996638031522026e-05,
      "loss": 0.9249,
      "step": 162690
    },
    {
      "epoch": 0.5702229371913629,
      "grad_norm": 2.71875,
      "learning_rate": 4.499598900285833e-05,
      "loss": 0.8739,
      "step": 162700
    },
    {
      "epoch": 0.5702579846982585,
      "grad_norm": 2.984375,
      "learning_rate": 4.499533997419462e-05,
      "loss": 0.9462,
      "step": 162710
    },
    {
      "epoch": 0.570293032205154,
      "grad_norm": 3.09375,
      "learning_rate": 4.499469094553092e-05,
      "loss": 0.8778,
      "step": 162720
    },
    {
      "epoch": 0.5703280797120497,
      "grad_norm": 3.0,
      "learning_rate": 4.499404191686722e-05,
      "loss": 0.9456,
      "step": 162730
    },
    {
      "epoch": 0.5703631272189453,
      "grad_norm": 2.71875,
      "learning_rate": 4.499339288820352e-05,
      "loss": 0.9349,
      "step": 162740
    },
    {
      "epoch": 0.5703981747258409,
      "grad_norm": 3.3125,
      "learning_rate": 4.4992743859539814e-05,
      "loss": 0.8961,
      "step": 162750
    },
    {
      "epoch": 0.5704332222327365,
      "grad_norm": 3.4375,
      "learning_rate": 4.4992094830876115e-05,
      "loss": 0.9748,
      "step": 162760
    },
    {
      "epoch": 0.570468269739632,
      "grad_norm": 2.859375,
      "learning_rate": 4.499144580221241e-05,
      "loss": 0.826,
      "step": 162770
    },
    {
      "epoch": 0.5705033172465277,
      "grad_norm": 3.15625,
      "learning_rate": 4.499079677354871e-05,
      "loss": 0.9588,
      "step": 162780
    },
    {
      "epoch": 0.5705383647534232,
      "grad_norm": 3.375,
      "learning_rate": 4.4990147744885006e-05,
      "loss": 0.9722,
      "step": 162790
    },
    {
      "epoch": 0.5705734122603189,
      "grad_norm": 2.921875,
      "learning_rate": 4.498949871622131e-05,
      "loss": 0.9941,
      "step": 162800
    },
    {
      "epoch": 0.5706084597672144,
      "grad_norm": 3.1875,
      "learning_rate": 4.49888496875576e-05,
      "loss": 0.9532,
      "step": 162810
    },
    {
      "epoch": 0.5706435072741101,
      "grad_norm": 3.109375,
      "learning_rate": 4.49882006588939e-05,
      "loss": 0.9692,
      "step": 162820
    },
    {
      "epoch": 0.5706785547810056,
      "grad_norm": 3.265625,
      "learning_rate": 4.4987551630230204e-05,
      "loss": 0.9529,
      "step": 162830
    },
    {
      "epoch": 0.5707136022879012,
      "grad_norm": 3.3125,
      "learning_rate": 4.49869026015665e-05,
      "loss": 0.9341,
      "step": 162840
    },
    {
      "epoch": 0.5707486497947969,
      "grad_norm": 3.21875,
      "learning_rate": 4.49862535729028e-05,
      "loss": 0.9697,
      "step": 162850
    },
    {
      "epoch": 0.5707836973016924,
      "grad_norm": 3.03125,
      "learning_rate": 4.4985604544239095e-05,
      "loss": 0.9715,
      "step": 162860
    },
    {
      "epoch": 0.5708187448085881,
      "grad_norm": 3.21875,
      "learning_rate": 4.4984955515575396e-05,
      "loss": 0.9178,
      "step": 162870
    },
    {
      "epoch": 0.5708537923154836,
      "grad_norm": 2.828125,
      "learning_rate": 4.498430648691169e-05,
      "loss": 0.8393,
      "step": 162880
    },
    {
      "epoch": 0.5708888398223793,
      "grad_norm": 3.140625,
      "learning_rate": 4.498365745824799e-05,
      "loss": 0.9622,
      "step": 162890
    },
    {
      "epoch": 0.5709238873292748,
      "grad_norm": 2.953125,
      "learning_rate": 4.498300842958429e-05,
      "loss": 0.9006,
      "step": 162900
    },
    {
      "epoch": 0.5709589348361704,
      "grad_norm": 3.015625,
      "learning_rate": 4.498235940092058e-05,
      "loss": 0.9088,
      "step": 162910
    },
    {
      "epoch": 0.570993982343066,
      "grad_norm": 2.5,
      "learning_rate": 4.498171037225688e-05,
      "loss": 0.9429,
      "step": 162920
    },
    {
      "epoch": 0.5710290298499616,
      "grad_norm": 3.125,
      "learning_rate": 4.498106134359318e-05,
      "loss": 0.9001,
      "step": 162930
    },
    {
      "epoch": 0.5710640773568573,
      "grad_norm": 3.265625,
      "learning_rate": 4.498041231492948e-05,
      "loss": 0.885,
      "step": 162940
    },
    {
      "epoch": 0.5710991248637528,
      "grad_norm": 2.75,
      "learning_rate": 4.4979763286265774e-05,
      "loss": 0.8995,
      "step": 162950
    },
    {
      "epoch": 0.5711341723706485,
      "grad_norm": 3.265625,
      "learning_rate": 4.4979114257602075e-05,
      "loss": 0.9922,
      "step": 162960
    },
    {
      "epoch": 0.571169219877544,
      "grad_norm": 3.640625,
      "learning_rate": 4.497846522893837e-05,
      "loss": 0.956,
      "step": 162970
    },
    {
      "epoch": 0.5712042673844396,
      "grad_norm": 3.375,
      "learning_rate": 4.497781620027467e-05,
      "loss": 1.0262,
      "step": 162980
    },
    {
      "epoch": 0.5712393148913352,
      "grad_norm": 3.0,
      "learning_rate": 4.4977167171610966e-05,
      "loss": 0.9034,
      "step": 162990
    },
    {
      "epoch": 0.5712743623982308,
      "grad_norm": 2.8125,
      "learning_rate": 4.497651814294727e-05,
      "loss": 0.9364,
      "step": 163000
    },
    {
      "epoch": 0.5713094099051264,
      "grad_norm": 2.875,
      "learning_rate": 4.497586911428356e-05,
      "loss": 0.9059,
      "step": 163010
    },
    {
      "epoch": 0.571344457412022,
      "grad_norm": 2.984375,
      "learning_rate": 4.497522008561986e-05,
      "loss": 0.9841,
      "step": 163020
    },
    {
      "epoch": 0.5713795049189176,
      "grad_norm": 3.15625,
      "learning_rate": 4.497457105695616e-05,
      "loss": 0.8573,
      "step": 163030
    },
    {
      "epoch": 0.5714145524258132,
      "grad_norm": 3.265625,
      "learning_rate": 4.497392202829246e-05,
      "loss": 0.9769,
      "step": 163040
    },
    {
      "epoch": 0.5714495999327088,
      "grad_norm": 2.984375,
      "learning_rate": 4.497327299962876e-05,
      "loss": 0.931,
      "step": 163050
    },
    {
      "epoch": 0.5714846474396044,
      "grad_norm": 2.90625,
      "learning_rate": 4.4972623970965055e-05,
      "loss": 0.97,
      "step": 163060
    },
    {
      "epoch": 0.5715196949465,
      "grad_norm": 3.109375,
      "learning_rate": 4.4971974942301356e-05,
      "loss": 0.9924,
      "step": 163070
    },
    {
      "epoch": 0.5715547424533955,
      "grad_norm": 2.421875,
      "learning_rate": 4.497132591363765e-05,
      "loss": 0.9427,
      "step": 163080
    },
    {
      "epoch": 0.5715897899602912,
      "grad_norm": 2.75,
      "learning_rate": 4.497067688497395e-05,
      "loss": 0.8494,
      "step": 163090
    },
    {
      "epoch": 0.5716248374671867,
      "grad_norm": 3.015625,
      "learning_rate": 4.497002785631025e-05,
      "loss": 0.9226,
      "step": 163100
    },
    {
      "epoch": 0.5716598849740824,
      "grad_norm": 2.90625,
      "learning_rate": 4.496937882764655e-05,
      "loss": 0.8912,
      "step": 163110
    },
    {
      "epoch": 0.5716949324809779,
      "grad_norm": 2.9375,
      "learning_rate": 4.496872979898284e-05,
      "loss": 0.8899,
      "step": 163120
    },
    {
      "epoch": 0.5717299799878736,
      "grad_norm": 3.875,
      "learning_rate": 4.4968080770319144e-05,
      "loss": 0.9432,
      "step": 163130
    },
    {
      "epoch": 0.5717650274947692,
      "grad_norm": 3.0,
      "learning_rate": 4.496743174165544e-05,
      "loss": 0.8914,
      "step": 163140
    },
    {
      "epoch": 0.5718000750016647,
      "grad_norm": 2.578125,
      "learning_rate": 4.496678271299174e-05,
      "loss": 0.9076,
      "step": 163150
    },
    {
      "epoch": 0.5718351225085604,
      "grad_norm": 3.515625,
      "learning_rate": 4.4966133684328035e-05,
      "loss": 0.9551,
      "step": 163160
    },
    {
      "epoch": 0.5718701700154559,
      "grad_norm": 2.9375,
      "learning_rate": 4.4965484655664336e-05,
      "loss": 0.8531,
      "step": 163170
    },
    {
      "epoch": 0.5719052175223516,
      "grad_norm": 3.3125,
      "learning_rate": 4.496483562700063e-05,
      "loss": 0.8964,
      "step": 163180
    },
    {
      "epoch": 0.5719402650292471,
      "grad_norm": 2.5625,
      "learning_rate": 4.496418659833693e-05,
      "loss": 0.9249,
      "step": 163190
    },
    {
      "epoch": 0.5719753125361428,
      "grad_norm": 2.59375,
      "learning_rate": 4.4963537569673234e-05,
      "loss": 0.8684,
      "step": 163200
    },
    {
      "epoch": 0.5720103600430383,
      "grad_norm": 3.140625,
      "learning_rate": 4.496288854100953e-05,
      "loss": 0.8978,
      "step": 163210
    },
    {
      "epoch": 0.5720454075499339,
      "grad_norm": 3.0,
      "learning_rate": 4.496223951234583e-05,
      "loss": 0.8642,
      "step": 163220
    },
    {
      "epoch": 0.5720804550568296,
      "grad_norm": 2.5625,
      "learning_rate": 4.4961590483682124e-05,
      "loss": 0.9051,
      "step": 163230
    },
    {
      "epoch": 0.5721155025637251,
      "grad_norm": 2.8125,
      "learning_rate": 4.4960941455018426e-05,
      "loss": 0.9424,
      "step": 163240
    },
    {
      "epoch": 0.5721505500706208,
      "grad_norm": 3.203125,
      "learning_rate": 4.496029242635472e-05,
      "loss": 0.9506,
      "step": 163250
    },
    {
      "epoch": 0.5721855975775163,
      "grad_norm": 3.296875,
      "learning_rate": 4.495964339769102e-05,
      "loss": 0.9233,
      "step": 163260
    },
    {
      "epoch": 0.572220645084412,
      "grad_norm": 3.140625,
      "learning_rate": 4.4958994369027316e-05,
      "loss": 0.9588,
      "step": 163270
    },
    {
      "epoch": 0.5722556925913075,
      "grad_norm": 3.015625,
      "learning_rate": 4.495834534036361e-05,
      "loss": 0.9312,
      "step": 163280
    },
    {
      "epoch": 0.5722907400982031,
      "grad_norm": 2.953125,
      "learning_rate": 4.495769631169991e-05,
      "loss": 0.8723,
      "step": 163290
    },
    {
      "epoch": 0.5723257876050987,
      "grad_norm": 2.890625,
      "learning_rate": 4.495704728303621e-05,
      "loss": 0.9188,
      "step": 163300
    },
    {
      "epoch": 0.5723608351119943,
      "grad_norm": 2.859375,
      "learning_rate": 4.495639825437251e-05,
      "loss": 0.8709,
      "step": 163310
    },
    {
      "epoch": 0.5723958826188899,
      "grad_norm": 2.640625,
      "learning_rate": 4.49557492257088e-05,
      "loss": 0.892,
      "step": 163320
    },
    {
      "epoch": 0.5724309301257855,
      "grad_norm": 2.921875,
      "learning_rate": 4.4955100197045104e-05,
      "loss": 0.9176,
      "step": 163330
    },
    {
      "epoch": 0.5724659776326811,
      "grad_norm": 2.515625,
      "learning_rate": 4.49544511683814e-05,
      "loss": 0.9165,
      "step": 163340
    },
    {
      "epoch": 0.5725010251395767,
      "grad_norm": 2.953125,
      "learning_rate": 4.49538021397177e-05,
      "loss": 0.8969,
      "step": 163350
    },
    {
      "epoch": 0.5725360726464723,
      "grad_norm": 2.921875,
      "learning_rate": 4.4953153111053995e-05,
      "loss": 0.9081,
      "step": 163360
    },
    {
      "epoch": 0.5725711201533679,
      "grad_norm": 2.984375,
      "learning_rate": 4.4952504082390296e-05,
      "loss": 0.917,
      "step": 163370
    },
    {
      "epoch": 0.5726061676602635,
      "grad_norm": 2.875,
      "learning_rate": 4.495185505372659e-05,
      "loss": 0.9776,
      "step": 163380
    },
    {
      "epoch": 0.572641215167159,
      "grad_norm": 2.796875,
      "learning_rate": 4.495120602506289e-05,
      "loss": 0.8678,
      "step": 163390
    },
    {
      "epoch": 0.5726762626740547,
      "grad_norm": 2.578125,
      "learning_rate": 4.495055699639919e-05,
      "loss": 0.8808,
      "step": 163400
    },
    {
      "epoch": 0.5727113101809502,
      "grad_norm": 3.0,
      "learning_rate": 4.494990796773549e-05,
      "loss": 0.9066,
      "step": 163410
    },
    {
      "epoch": 0.5727463576878459,
      "grad_norm": 2.65625,
      "learning_rate": 4.494925893907179e-05,
      "loss": 0.8935,
      "step": 163420
    },
    {
      "epoch": 0.5727814051947415,
      "grad_norm": 2.890625,
      "learning_rate": 4.4948609910408084e-05,
      "loss": 0.9758,
      "step": 163430
    },
    {
      "epoch": 0.5728164527016371,
      "grad_norm": 2.796875,
      "learning_rate": 4.4947960881744386e-05,
      "loss": 0.878,
      "step": 163440
    },
    {
      "epoch": 0.5728515002085327,
      "grad_norm": 4.78125,
      "learning_rate": 4.494731185308068e-05,
      "loss": 0.8684,
      "step": 163450
    },
    {
      "epoch": 0.5728865477154282,
      "grad_norm": 3.46875,
      "learning_rate": 4.494666282441698e-05,
      "loss": 0.975,
      "step": 163460
    },
    {
      "epoch": 0.5729215952223239,
      "grad_norm": 2.96875,
      "learning_rate": 4.4946013795753276e-05,
      "loss": 0.8344,
      "step": 163470
    },
    {
      "epoch": 0.5729566427292194,
      "grad_norm": 3.296875,
      "learning_rate": 4.494536476708958e-05,
      "loss": 0.972,
      "step": 163480
    },
    {
      "epoch": 0.5729916902361151,
      "grad_norm": 3.15625,
      "learning_rate": 4.494471573842587e-05,
      "loss": 0.8784,
      "step": 163490
    },
    {
      "epoch": 0.5730267377430106,
      "grad_norm": 2.984375,
      "learning_rate": 4.4944066709762174e-05,
      "loss": 0.9123,
      "step": 163500
    },
    {
      "epoch": 0.5730617852499063,
      "grad_norm": 3.015625,
      "learning_rate": 4.494341768109847e-05,
      "loss": 0.9232,
      "step": 163510
    },
    {
      "epoch": 0.5730968327568019,
      "grad_norm": 3.0625,
      "learning_rate": 4.494276865243477e-05,
      "loss": 0.9656,
      "step": 163520
    },
    {
      "epoch": 0.5731318802636974,
      "grad_norm": 2.59375,
      "learning_rate": 4.4942119623771064e-05,
      "loss": 0.8961,
      "step": 163530
    },
    {
      "epoch": 0.5731669277705931,
      "grad_norm": 2.578125,
      "learning_rate": 4.4941470595107366e-05,
      "loss": 0.8624,
      "step": 163540
    },
    {
      "epoch": 0.5732019752774886,
      "grad_norm": 3.03125,
      "learning_rate": 4.494082156644367e-05,
      "loss": 0.946,
      "step": 163550
    },
    {
      "epoch": 0.5732370227843843,
      "grad_norm": 3.03125,
      "learning_rate": 4.494017253777996e-05,
      "loss": 0.9058,
      "step": 163560
    },
    {
      "epoch": 0.5732720702912798,
      "grad_norm": 2.59375,
      "learning_rate": 4.493952350911626e-05,
      "loss": 0.9568,
      "step": 163570
    },
    {
      "epoch": 0.5733071177981754,
      "grad_norm": 2.890625,
      "learning_rate": 4.493887448045256e-05,
      "loss": 0.9968,
      "step": 163580
    },
    {
      "epoch": 0.573342165305071,
      "grad_norm": 3.0,
      "learning_rate": 4.493822545178886e-05,
      "loss": 0.8459,
      "step": 163590
    },
    {
      "epoch": 0.5733772128119666,
      "grad_norm": 3.015625,
      "learning_rate": 4.4937576423125154e-05,
      "loss": 0.9362,
      "step": 163600
    },
    {
      "epoch": 0.5734122603188622,
      "grad_norm": 3.015625,
      "learning_rate": 4.4936927394461455e-05,
      "loss": 1.0109,
      "step": 163610
    },
    {
      "epoch": 0.5734473078257578,
      "grad_norm": 2.828125,
      "learning_rate": 4.493627836579775e-05,
      "loss": 0.8904,
      "step": 163620
    },
    {
      "epoch": 0.5734823553326535,
      "grad_norm": 2.546875,
      "learning_rate": 4.493562933713405e-05,
      "loss": 0.9608,
      "step": 163630
    },
    {
      "epoch": 0.573517402839549,
      "grad_norm": 2.671875,
      "learning_rate": 4.4934980308470346e-05,
      "loss": 0.8694,
      "step": 163640
    },
    {
      "epoch": 0.5735524503464446,
      "grad_norm": 2.65625,
      "learning_rate": 4.493433127980665e-05,
      "loss": 0.8652,
      "step": 163650
    },
    {
      "epoch": 0.5735874978533402,
      "grad_norm": 3.0625,
      "learning_rate": 4.493368225114294e-05,
      "loss": 1.0093,
      "step": 163660
    },
    {
      "epoch": 0.5736225453602358,
      "grad_norm": 2.875,
      "learning_rate": 4.4933033222479236e-05,
      "loss": 0.8949,
      "step": 163670
    },
    {
      "epoch": 0.5736575928671314,
      "grad_norm": 2.921875,
      "learning_rate": 4.493238419381554e-05,
      "loss": 0.8655,
      "step": 163680
    },
    {
      "epoch": 0.573692640374027,
      "grad_norm": 3.015625,
      "learning_rate": 4.493173516515183e-05,
      "loss": 1.0267,
      "step": 163690
    },
    {
      "epoch": 0.5737276878809225,
      "grad_norm": 3.265625,
      "learning_rate": 4.4931086136488134e-05,
      "loss": 0.9783,
      "step": 163700
    },
    {
      "epoch": 0.5737627353878182,
      "grad_norm": 2.78125,
      "learning_rate": 4.493043710782443e-05,
      "loss": 0.8226,
      "step": 163710
    },
    {
      "epoch": 0.5737977828947138,
      "grad_norm": 2.546875,
      "learning_rate": 4.492978807916073e-05,
      "loss": 0.8523,
      "step": 163720
    },
    {
      "epoch": 0.5738328304016094,
      "grad_norm": 3.046875,
      "learning_rate": 4.4929139050497024e-05,
      "loss": 0.9048,
      "step": 163730
    },
    {
      "epoch": 0.573867877908505,
      "grad_norm": 3.40625,
      "learning_rate": 4.4928490021833326e-05,
      "loss": 1.0061,
      "step": 163740
    },
    {
      "epoch": 0.5739029254154006,
      "grad_norm": 2.65625,
      "learning_rate": 4.492784099316962e-05,
      "loss": 0.8908,
      "step": 163750
    },
    {
      "epoch": 0.5739379729222962,
      "grad_norm": 3.125,
      "learning_rate": 4.492719196450592e-05,
      "loss": 0.902,
      "step": 163760
    },
    {
      "epoch": 0.5739730204291917,
      "grad_norm": 3.171875,
      "learning_rate": 4.4926542935842216e-05,
      "loss": 0.8813,
      "step": 163770
    },
    {
      "epoch": 0.5740080679360874,
      "grad_norm": 2.703125,
      "learning_rate": 4.492589390717852e-05,
      "loss": 0.9201,
      "step": 163780
    },
    {
      "epoch": 0.5740431154429829,
      "grad_norm": 2.390625,
      "learning_rate": 4.492524487851482e-05,
      "loss": 0.9255,
      "step": 163790
    },
    {
      "epoch": 0.5740781629498786,
      "grad_norm": 3.0,
      "learning_rate": 4.4924595849851114e-05,
      "loss": 0.9212,
      "step": 163800
    },
    {
      "epoch": 0.5741132104567741,
      "grad_norm": 2.75,
      "learning_rate": 4.4923946821187415e-05,
      "loss": 0.8382,
      "step": 163810
    },
    {
      "epoch": 0.5741482579636698,
      "grad_norm": 2.640625,
      "learning_rate": 4.492329779252371e-05,
      "loss": 0.9752,
      "step": 163820
    },
    {
      "epoch": 0.5741833054705654,
      "grad_norm": 2.78125,
      "learning_rate": 4.492264876386001e-05,
      "loss": 0.8732,
      "step": 163830
    },
    {
      "epoch": 0.5742183529774609,
      "grad_norm": 3.015625,
      "learning_rate": 4.4921999735196306e-05,
      "loss": 0.8745,
      "step": 163840
    },
    {
      "epoch": 0.5742534004843566,
      "grad_norm": 2.984375,
      "learning_rate": 4.492135070653261e-05,
      "loss": 0.9498,
      "step": 163850
    },
    {
      "epoch": 0.5742884479912521,
      "grad_norm": 2.625,
      "learning_rate": 4.49207016778689e-05,
      "loss": 0.8293,
      "step": 163860
    },
    {
      "epoch": 0.5743234954981478,
      "grad_norm": 3.25,
      "learning_rate": 4.49200526492052e-05,
      "loss": 0.9512,
      "step": 163870
    },
    {
      "epoch": 0.5743585430050433,
      "grad_norm": 3.609375,
      "learning_rate": 4.49194036205415e-05,
      "loss": 0.9632,
      "step": 163880
    },
    {
      "epoch": 0.574393590511939,
      "grad_norm": 2.859375,
      "learning_rate": 4.49187545918778e-05,
      "loss": 0.8894,
      "step": 163890
    },
    {
      "epoch": 0.5744286380188345,
      "grad_norm": 3.34375,
      "learning_rate": 4.4918105563214094e-05,
      "loss": 1.0046,
      "step": 163900
    },
    {
      "epoch": 0.5744636855257301,
      "grad_norm": 3.421875,
      "learning_rate": 4.4917456534550395e-05,
      "loss": 0.8219,
      "step": 163910
    },
    {
      "epoch": 0.5744987330326258,
      "grad_norm": 3.109375,
      "learning_rate": 4.4916807505886697e-05,
      "loss": 0.9022,
      "step": 163920
    },
    {
      "epoch": 0.5745337805395213,
      "grad_norm": 2.78125,
      "learning_rate": 4.491615847722299e-05,
      "loss": 0.8885,
      "step": 163930
    },
    {
      "epoch": 0.574568828046417,
      "grad_norm": 2.71875,
      "learning_rate": 4.491550944855929e-05,
      "loss": 0.8859,
      "step": 163940
    },
    {
      "epoch": 0.5746038755533125,
      "grad_norm": 3.71875,
      "learning_rate": 4.491486041989559e-05,
      "loss": 0.9246,
      "step": 163950
    },
    {
      "epoch": 0.5746389230602081,
      "grad_norm": 2.609375,
      "learning_rate": 4.491421139123189e-05,
      "loss": 0.8179,
      "step": 163960
    },
    {
      "epoch": 0.5746739705671037,
      "grad_norm": 2.65625,
      "learning_rate": 4.491356236256818e-05,
      "loss": 0.8892,
      "step": 163970
    },
    {
      "epoch": 0.5747090180739993,
      "grad_norm": 2.8125,
      "learning_rate": 4.4912913333904485e-05,
      "loss": 1.0025,
      "step": 163980
    },
    {
      "epoch": 0.5747440655808949,
      "grad_norm": 3.046875,
      "learning_rate": 4.491226430524078e-05,
      "loss": 0.9424,
      "step": 163990
    },
    {
      "epoch": 0.5747791130877905,
      "grad_norm": 2.84375,
      "learning_rate": 4.491161527657708e-05,
      "loss": 0.9321,
      "step": 164000
    },
    {
      "epoch": 0.5748141605946862,
      "grad_norm": 2.828125,
      "learning_rate": 4.4910966247913375e-05,
      "loss": 0.8623,
      "step": 164010
    },
    {
      "epoch": 0.5748492081015817,
      "grad_norm": 2.953125,
      "learning_rate": 4.4910317219249677e-05,
      "loss": 0.7746,
      "step": 164020
    },
    {
      "epoch": 0.5748842556084773,
      "grad_norm": 3.078125,
      "learning_rate": 4.490966819058597e-05,
      "loss": 0.8348,
      "step": 164030
    },
    {
      "epoch": 0.5749193031153729,
      "grad_norm": 3.0,
      "learning_rate": 4.4909019161922266e-05,
      "loss": 1.0059,
      "step": 164040
    },
    {
      "epoch": 0.5749543506222685,
      "grad_norm": 2.703125,
      "learning_rate": 4.490837013325857e-05,
      "loss": 0.9501,
      "step": 164050
    },
    {
      "epoch": 0.574989398129164,
      "grad_norm": 2.4375,
      "learning_rate": 4.490772110459486e-05,
      "loss": 0.7974,
      "step": 164060
    },
    {
      "epoch": 0.5750244456360597,
      "grad_norm": 2.859375,
      "learning_rate": 4.490707207593116e-05,
      "loss": 0.9203,
      "step": 164070
    },
    {
      "epoch": 0.5750594931429552,
      "grad_norm": 2.734375,
      "learning_rate": 4.490642304726746e-05,
      "loss": 0.8692,
      "step": 164080
    },
    {
      "epoch": 0.5750945406498509,
      "grad_norm": 2.703125,
      "learning_rate": 4.490577401860376e-05,
      "loss": 0.8926,
      "step": 164090
    },
    {
      "epoch": 0.5751295881567464,
      "grad_norm": 3.328125,
      "learning_rate": 4.4905124989940054e-05,
      "loss": 0.932,
      "step": 164100
    },
    {
      "epoch": 0.5751646356636421,
      "grad_norm": 2.78125,
      "learning_rate": 4.4904475961276355e-05,
      "loss": 0.9259,
      "step": 164110
    },
    {
      "epoch": 0.5751996831705377,
      "grad_norm": 3.390625,
      "learning_rate": 4.490382693261265e-05,
      "loss": 0.8545,
      "step": 164120
    },
    {
      "epoch": 0.5752347306774332,
      "grad_norm": 3.140625,
      "learning_rate": 4.490317790394895e-05,
      "loss": 0.8182,
      "step": 164130
    },
    {
      "epoch": 0.5752697781843289,
      "grad_norm": 2.59375,
      "learning_rate": 4.4902528875285246e-05,
      "loss": 0.8604,
      "step": 164140
    },
    {
      "epoch": 0.5753048256912244,
      "grad_norm": 2.96875,
      "learning_rate": 4.490187984662155e-05,
      "loss": 0.9007,
      "step": 164150
    },
    {
      "epoch": 0.5753398731981201,
      "grad_norm": 2.96875,
      "learning_rate": 4.490123081795785e-05,
      "loss": 0.9929,
      "step": 164160
    },
    {
      "epoch": 0.5753749207050156,
      "grad_norm": 3.515625,
      "learning_rate": 4.490058178929414e-05,
      "loss": 0.9498,
      "step": 164170
    },
    {
      "epoch": 0.5754099682119113,
      "grad_norm": 3.421875,
      "learning_rate": 4.4899932760630445e-05,
      "loss": 0.8589,
      "step": 164180
    },
    {
      "epoch": 0.5754450157188068,
      "grad_norm": 2.765625,
      "learning_rate": 4.489928373196674e-05,
      "loss": 0.9866,
      "step": 164190
    },
    {
      "epoch": 0.5754800632257024,
      "grad_norm": 2.921875,
      "learning_rate": 4.489863470330304e-05,
      "loss": 0.9421,
      "step": 164200
    },
    {
      "epoch": 0.5755151107325981,
      "grad_norm": 2.96875,
      "learning_rate": 4.4897985674639335e-05,
      "loss": 0.9137,
      "step": 164210
    },
    {
      "epoch": 0.5755501582394936,
      "grad_norm": 2.453125,
      "learning_rate": 4.4897336645975637e-05,
      "loss": 0.9036,
      "step": 164220
    },
    {
      "epoch": 0.5755852057463893,
      "grad_norm": 2.9375,
      "learning_rate": 4.489668761731193e-05,
      "loss": 0.928,
      "step": 164230
    },
    {
      "epoch": 0.5756202532532848,
      "grad_norm": 3.25,
      "learning_rate": 4.489603858864823e-05,
      "loss": 0.9737,
      "step": 164240
    },
    {
      "epoch": 0.5756553007601805,
      "grad_norm": 2.765625,
      "learning_rate": 4.489538955998453e-05,
      "loss": 0.9006,
      "step": 164250
    },
    {
      "epoch": 0.575690348267076,
      "grad_norm": 3.203125,
      "learning_rate": 4.489474053132083e-05,
      "loss": 0.834,
      "step": 164260
    },
    {
      "epoch": 0.5757253957739716,
      "grad_norm": 2.984375,
      "learning_rate": 4.489409150265712e-05,
      "loss": 1.0103,
      "step": 164270
    },
    {
      "epoch": 0.5757604432808672,
      "grad_norm": 2.78125,
      "learning_rate": 4.4893442473993425e-05,
      "loss": 0.9165,
      "step": 164280
    },
    {
      "epoch": 0.5757954907877628,
      "grad_norm": 2.796875,
      "learning_rate": 4.4892793445329726e-05,
      "loss": 0.9782,
      "step": 164290
    },
    {
      "epoch": 0.5758305382946584,
      "grad_norm": 2.859375,
      "learning_rate": 4.489214441666602e-05,
      "loss": 0.9406,
      "step": 164300
    },
    {
      "epoch": 0.575865585801554,
      "grad_norm": 2.890625,
      "learning_rate": 4.489149538800232e-05,
      "loss": 0.9446,
      "step": 164310
    },
    {
      "epoch": 0.5759006333084496,
      "grad_norm": 3.21875,
      "learning_rate": 4.4890846359338617e-05,
      "loss": 0.9567,
      "step": 164320
    },
    {
      "epoch": 0.5759356808153452,
      "grad_norm": 2.875,
      "learning_rate": 4.489019733067492e-05,
      "loss": 0.9114,
      "step": 164330
    },
    {
      "epoch": 0.5759707283222408,
      "grad_norm": 3.359375,
      "learning_rate": 4.488954830201121e-05,
      "loss": 0.8716,
      "step": 164340
    },
    {
      "epoch": 0.5760057758291364,
      "grad_norm": 3.015625,
      "learning_rate": 4.4888899273347514e-05,
      "loss": 0.9119,
      "step": 164350
    },
    {
      "epoch": 0.576040823336032,
      "grad_norm": 2.96875,
      "learning_rate": 4.488825024468381e-05,
      "loss": 0.944,
      "step": 164360
    },
    {
      "epoch": 0.5760758708429276,
      "grad_norm": 2.890625,
      "learning_rate": 4.488760121602011e-05,
      "loss": 0.9442,
      "step": 164370
    },
    {
      "epoch": 0.5761109183498232,
      "grad_norm": 3.59375,
      "learning_rate": 4.4886952187356405e-05,
      "loss": 0.9051,
      "step": 164380
    },
    {
      "epoch": 0.5761459658567187,
      "grad_norm": 3.03125,
      "learning_rate": 4.4886303158692706e-05,
      "loss": 0.8474,
      "step": 164390
    },
    {
      "epoch": 0.5761810133636144,
      "grad_norm": 3.015625,
      "learning_rate": 4.4885654130029e-05,
      "loss": 0.813,
      "step": 164400
    },
    {
      "epoch": 0.57621606087051,
      "grad_norm": 2.53125,
      "learning_rate": 4.4885005101365295e-05,
      "loss": 0.8858,
      "step": 164410
    },
    {
      "epoch": 0.5762511083774056,
      "grad_norm": 3.21875,
      "learning_rate": 4.4884356072701597e-05,
      "loss": 0.8693,
      "step": 164420
    },
    {
      "epoch": 0.5762861558843012,
      "grad_norm": 3.25,
      "learning_rate": 4.488370704403789e-05,
      "loss": 0.9905,
      "step": 164430
    },
    {
      "epoch": 0.5763212033911967,
      "grad_norm": 3.328125,
      "learning_rate": 4.488305801537419e-05,
      "loss": 1.0348,
      "step": 164440
    },
    {
      "epoch": 0.5763562508980924,
      "grad_norm": 3.296875,
      "learning_rate": 4.488240898671049e-05,
      "loss": 0.8981,
      "step": 164450
    },
    {
      "epoch": 0.5763912984049879,
      "grad_norm": 3.1875,
      "learning_rate": 4.488175995804679e-05,
      "loss": 0.9171,
      "step": 164460
    },
    {
      "epoch": 0.5764263459118836,
      "grad_norm": 2.78125,
      "learning_rate": 4.488111092938308e-05,
      "loss": 0.7977,
      "step": 164470
    },
    {
      "epoch": 0.5764613934187791,
      "grad_norm": 3.28125,
      "learning_rate": 4.4880461900719385e-05,
      "loss": 1.007,
      "step": 164480
    },
    {
      "epoch": 0.5764964409256748,
      "grad_norm": 2.609375,
      "learning_rate": 4.487981287205568e-05,
      "loss": 0.92,
      "step": 164490
    },
    {
      "epoch": 0.5765314884325703,
      "grad_norm": 2.609375,
      "learning_rate": 4.487916384339198e-05,
      "loss": 0.8832,
      "step": 164500
    },
    {
      "epoch": 0.5765665359394659,
      "grad_norm": 2.90625,
      "learning_rate": 4.487851481472828e-05,
      "loss": 0.9037,
      "step": 164510
    },
    {
      "epoch": 0.5766015834463616,
      "grad_norm": 3.03125,
      "learning_rate": 4.4877865786064577e-05,
      "loss": 0.8774,
      "step": 164520
    },
    {
      "epoch": 0.5766366309532571,
      "grad_norm": 3.59375,
      "learning_rate": 4.487721675740088e-05,
      "loss": 0.9312,
      "step": 164530
    },
    {
      "epoch": 0.5766716784601528,
      "grad_norm": 3.0625,
      "learning_rate": 4.487656772873717e-05,
      "loss": 0.9267,
      "step": 164540
    },
    {
      "epoch": 0.5767067259670483,
      "grad_norm": 2.90625,
      "learning_rate": 4.4875918700073474e-05,
      "loss": 0.8531,
      "step": 164550
    },
    {
      "epoch": 0.576741773473944,
      "grad_norm": 2.8125,
      "learning_rate": 4.487526967140977e-05,
      "loss": 0.8415,
      "step": 164560
    },
    {
      "epoch": 0.5767768209808395,
      "grad_norm": 3.4375,
      "learning_rate": 4.487462064274607e-05,
      "loss": 0.9976,
      "step": 164570
    },
    {
      "epoch": 0.5768118684877351,
      "grad_norm": 3.3125,
      "learning_rate": 4.4873971614082365e-05,
      "loss": 0.9503,
      "step": 164580
    },
    {
      "epoch": 0.5768469159946307,
      "grad_norm": 3.25,
      "learning_rate": 4.4873322585418666e-05,
      "loss": 0.9533,
      "step": 164590
    },
    {
      "epoch": 0.5768819635015263,
      "grad_norm": 3.25,
      "learning_rate": 4.487267355675496e-05,
      "loss": 0.8933,
      "step": 164600
    },
    {
      "epoch": 0.576917011008422,
      "grad_norm": 2.953125,
      "learning_rate": 4.487202452809126e-05,
      "loss": 0.9799,
      "step": 164610
    },
    {
      "epoch": 0.5769520585153175,
      "grad_norm": 3.0,
      "learning_rate": 4.4871375499427557e-05,
      "loss": 0.9271,
      "step": 164620
    },
    {
      "epoch": 0.5769871060222131,
      "grad_norm": 3.15625,
      "learning_rate": 4.487072647076386e-05,
      "loss": 0.8606,
      "step": 164630
    },
    {
      "epoch": 0.5770221535291087,
      "grad_norm": 3.0,
      "learning_rate": 4.487007744210015e-05,
      "loss": 0.969,
      "step": 164640
    },
    {
      "epoch": 0.5770572010360043,
      "grad_norm": 3.34375,
      "learning_rate": 4.4869428413436454e-05,
      "loss": 0.9465,
      "step": 164650
    },
    {
      "epoch": 0.5770922485428999,
      "grad_norm": 3.125,
      "learning_rate": 4.4868779384772755e-05,
      "loss": 0.9605,
      "step": 164660
    },
    {
      "epoch": 0.5771272960497955,
      "grad_norm": 2.671875,
      "learning_rate": 4.486813035610905e-05,
      "loss": 0.8677,
      "step": 164670
    },
    {
      "epoch": 0.577162343556691,
      "grad_norm": 3.09375,
      "learning_rate": 4.486748132744535e-05,
      "loss": 0.9375,
      "step": 164680
    },
    {
      "epoch": 0.5771973910635867,
      "grad_norm": 2.890625,
      "learning_rate": 4.4866832298781646e-05,
      "loss": 0.8824,
      "step": 164690
    },
    {
      "epoch": 0.5772324385704823,
      "grad_norm": 3.0625,
      "learning_rate": 4.486618327011795e-05,
      "loss": 0.9938,
      "step": 164700
    },
    {
      "epoch": 0.5772674860773779,
      "grad_norm": 3.109375,
      "learning_rate": 4.486553424145424e-05,
      "loss": 0.9692,
      "step": 164710
    },
    {
      "epoch": 0.5773025335842735,
      "grad_norm": 3.171875,
      "learning_rate": 4.486488521279054e-05,
      "loss": 0.9381,
      "step": 164720
    },
    {
      "epoch": 0.5773375810911691,
      "grad_norm": 2.703125,
      "learning_rate": 4.486423618412684e-05,
      "loss": 0.9041,
      "step": 164730
    },
    {
      "epoch": 0.5773726285980647,
      "grad_norm": 3.21875,
      "learning_rate": 4.486358715546314e-05,
      "loss": 0.9175,
      "step": 164740
    },
    {
      "epoch": 0.5774076761049602,
      "grad_norm": 3.109375,
      "learning_rate": 4.4862938126799434e-05,
      "loss": 0.9018,
      "step": 164750
    },
    {
      "epoch": 0.5774427236118559,
      "grad_norm": 3.328125,
      "learning_rate": 4.4862289098135735e-05,
      "loss": 0.9534,
      "step": 164760
    },
    {
      "epoch": 0.5774777711187514,
      "grad_norm": 3.15625,
      "learning_rate": 4.486164006947203e-05,
      "loss": 0.9709,
      "step": 164770
    },
    {
      "epoch": 0.5775128186256471,
      "grad_norm": 2.65625,
      "learning_rate": 4.486099104080833e-05,
      "loss": 0.883,
      "step": 164780
    },
    {
      "epoch": 0.5775478661325426,
      "grad_norm": 2.96875,
      "learning_rate": 4.4860342012144626e-05,
      "loss": 0.9068,
      "step": 164790
    },
    {
      "epoch": 0.5775829136394383,
      "grad_norm": 2.859375,
      "learning_rate": 4.485969298348092e-05,
      "loss": 0.9763,
      "step": 164800
    },
    {
      "epoch": 0.5776179611463339,
      "grad_norm": 3.171875,
      "learning_rate": 4.485904395481722e-05,
      "loss": 0.9749,
      "step": 164810
    },
    {
      "epoch": 0.5776530086532294,
      "grad_norm": 2.671875,
      "learning_rate": 4.4858394926153517e-05,
      "loss": 0.922,
      "step": 164820
    },
    {
      "epoch": 0.5776880561601251,
      "grad_norm": 2.875,
      "learning_rate": 4.485774589748982e-05,
      "loss": 0.8706,
      "step": 164830
    },
    {
      "epoch": 0.5777231036670206,
      "grad_norm": 2.578125,
      "learning_rate": 4.485709686882611e-05,
      "loss": 0.9657,
      "step": 164840
    },
    {
      "epoch": 0.5777581511739163,
      "grad_norm": 3.1875,
      "learning_rate": 4.4856447840162414e-05,
      "loss": 0.9568,
      "step": 164850
    },
    {
      "epoch": 0.5777931986808118,
      "grad_norm": 3.390625,
      "learning_rate": 4.485579881149871e-05,
      "loss": 0.8548,
      "step": 164860
    },
    {
      "epoch": 0.5778282461877075,
      "grad_norm": 3.03125,
      "learning_rate": 4.485514978283501e-05,
      "loss": 1.0098,
      "step": 164870
    },
    {
      "epoch": 0.577863293694603,
      "grad_norm": 3.09375,
      "learning_rate": 4.485450075417131e-05,
      "loss": 0.8609,
      "step": 164880
    },
    {
      "epoch": 0.5778983412014986,
      "grad_norm": 3.890625,
      "learning_rate": 4.4853851725507606e-05,
      "loss": 1.003,
      "step": 164890
    },
    {
      "epoch": 0.5779333887083943,
      "grad_norm": 2.9375,
      "learning_rate": 4.485320269684391e-05,
      "loss": 0.8618,
      "step": 164900
    },
    {
      "epoch": 0.5779684362152898,
      "grad_norm": 3.125,
      "learning_rate": 4.48525536681802e-05,
      "loss": 0.9675,
      "step": 164910
    },
    {
      "epoch": 0.5780034837221855,
      "grad_norm": 2.71875,
      "learning_rate": 4.48519046395165e-05,
      "loss": 0.9196,
      "step": 164920
    },
    {
      "epoch": 0.578038531229081,
      "grad_norm": 3.203125,
      "learning_rate": 4.48512556108528e-05,
      "loss": 0.8679,
      "step": 164930
    },
    {
      "epoch": 0.5780735787359766,
      "grad_norm": 3.25,
      "learning_rate": 4.48506065821891e-05,
      "loss": 0.8986,
      "step": 164940
    },
    {
      "epoch": 0.5781086262428722,
      "grad_norm": 2.640625,
      "learning_rate": 4.4849957553525394e-05,
      "loss": 0.9189,
      "step": 164950
    },
    {
      "epoch": 0.5781436737497678,
      "grad_norm": 3.015625,
      "learning_rate": 4.4849308524861695e-05,
      "loss": 0.9319,
      "step": 164960
    },
    {
      "epoch": 0.5781787212566634,
      "grad_norm": 2.828125,
      "learning_rate": 4.484865949619799e-05,
      "loss": 0.9575,
      "step": 164970
    },
    {
      "epoch": 0.578213768763559,
      "grad_norm": 2.90625,
      "learning_rate": 4.484801046753429e-05,
      "loss": 0.9776,
      "step": 164980
    },
    {
      "epoch": 0.5782488162704545,
      "grad_norm": 3.140625,
      "learning_rate": 4.4847361438870586e-05,
      "loss": 0.9332,
      "step": 164990
    },
    {
      "epoch": 0.5782838637773502,
      "grad_norm": 2.59375,
      "learning_rate": 4.484671241020689e-05,
      "loss": 0.8618,
      "step": 165000
    },
    {
      "epoch": 0.5782838637773502,
      "eval_loss": 0.8617709279060364,
      "eval_runtime": 564.601,
      "eval_samples_per_second": 673.814,
      "eval_steps_per_second": 56.151,
      "step": 165000
    },
    {
      "epoch": 0.5783189112842458,
      "grad_norm": 2.5625,
      "learning_rate": 4.484606338154318e-05,
      "loss": 0.8953,
      "step": 165010
    },
    {
      "epoch": 0.5783539587911414,
      "grad_norm": 2.53125,
      "learning_rate": 4.484541435287948e-05,
      "loss": 0.8971,
      "step": 165020
    },
    {
      "epoch": 0.578389006298037,
      "grad_norm": 2.703125,
      "learning_rate": 4.4844765324215785e-05,
      "loss": 0.861,
      "step": 165030
    },
    {
      "epoch": 0.5784240538049326,
      "grad_norm": 3.1875,
      "learning_rate": 4.484411629555208e-05,
      "loss": 0.9014,
      "step": 165040
    },
    {
      "epoch": 0.5784591013118282,
      "grad_norm": 3.140625,
      "learning_rate": 4.484346726688838e-05,
      "loss": 0.8909,
      "step": 165050
    },
    {
      "epoch": 0.5784941488187237,
      "grad_norm": 2.71875,
      "learning_rate": 4.4842818238224675e-05,
      "loss": 0.9273,
      "step": 165060
    },
    {
      "epoch": 0.5785291963256194,
      "grad_norm": 2.859375,
      "learning_rate": 4.484216920956098e-05,
      "loss": 0.8933,
      "step": 165070
    },
    {
      "epoch": 0.5785642438325149,
      "grad_norm": 3.09375,
      "learning_rate": 4.484152018089727e-05,
      "loss": 0.8798,
      "step": 165080
    },
    {
      "epoch": 0.5785992913394106,
      "grad_norm": 3.40625,
      "learning_rate": 4.484087115223357e-05,
      "loss": 0.9501,
      "step": 165090
    },
    {
      "epoch": 0.5786343388463062,
      "grad_norm": 2.625,
      "learning_rate": 4.484022212356987e-05,
      "loss": 0.884,
      "step": 165100
    },
    {
      "epoch": 0.5786693863532018,
      "grad_norm": 2.65625,
      "learning_rate": 4.483957309490617e-05,
      "loss": 0.8991,
      "step": 165110
    },
    {
      "epoch": 0.5787044338600974,
      "grad_norm": 3.375,
      "learning_rate": 4.483892406624246e-05,
      "loss": 0.9099,
      "step": 165120
    },
    {
      "epoch": 0.5787394813669929,
      "grad_norm": 3.3125,
      "learning_rate": 4.4838275037578765e-05,
      "loss": 0.8591,
      "step": 165130
    },
    {
      "epoch": 0.5787745288738886,
      "grad_norm": 3.046875,
      "learning_rate": 4.483762600891506e-05,
      "loss": 0.9489,
      "step": 165140
    },
    {
      "epoch": 0.5788095763807841,
      "grad_norm": 3.234375,
      "learning_rate": 4.483697698025136e-05,
      "loss": 0.8854,
      "step": 165150
    },
    {
      "epoch": 0.5788446238876798,
      "grad_norm": 2.90625,
      "learning_rate": 4.4836327951587655e-05,
      "loss": 0.9208,
      "step": 165160
    },
    {
      "epoch": 0.5788796713945753,
      "grad_norm": 2.78125,
      "learning_rate": 4.483567892292395e-05,
      "loss": 0.9329,
      "step": 165170
    },
    {
      "epoch": 0.578914718901471,
      "grad_norm": 2.734375,
      "learning_rate": 4.483502989426025e-05,
      "loss": 0.9521,
      "step": 165180
    },
    {
      "epoch": 0.5789497664083666,
      "grad_norm": 3.15625,
      "learning_rate": 4.4834380865596546e-05,
      "loss": 0.9196,
      "step": 165190
    },
    {
      "epoch": 0.5789848139152621,
      "grad_norm": 2.875,
      "learning_rate": 4.483373183693285e-05,
      "loss": 0.9523,
      "step": 165200
    },
    {
      "epoch": 0.5790198614221578,
      "grad_norm": 3.03125,
      "learning_rate": 4.483308280826914e-05,
      "loss": 0.9139,
      "step": 165210
    },
    {
      "epoch": 0.5790549089290533,
      "grad_norm": 3.34375,
      "learning_rate": 4.483243377960544e-05,
      "loss": 0.9224,
      "step": 165220
    },
    {
      "epoch": 0.579089956435949,
      "grad_norm": 3.234375,
      "learning_rate": 4.483178475094174e-05,
      "loss": 0.8589,
      "step": 165230
    },
    {
      "epoch": 0.5791250039428445,
      "grad_norm": 3.203125,
      "learning_rate": 4.483113572227804e-05,
      "loss": 0.9807,
      "step": 165240
    },
    {
      "epoch": 0.5791600514497401,
      "grad_norm": 3.40625,
      "learning_rate": 4.483048669361434e-05,
      "loss": 0.9744,
      "step": 165250
    },
    {
      "epoch": 0.5791950989566357,
      "grad_norm": 3.0,
      "learning_rate": 4.4829837664950635e-05,
      "loss": 0.9074,
      "step": 165260
    },
    {
      "epoch": 0.5792301464635313,
      "grad_norm": 2.921875,
      "learning_rate": 4.482918863628694e-05,
      "loss": 0.8809,
      "step": 165270
    },
    {
      "epoch": 0.5792651939704269,
      "grad_norm": 2.96875,
      "learning_rate": 4.482853960762323e-05,
      "loss": 0.9193,
      "step": 165280
    },
    {
      "epoch": 0.5793002414773225,
      "grad_norm": 2.828125,
      "learning_rate": 4.482789057895953e-05,
      "loss": 0.7929,
      "step": 165290
    },
    {
      "epoch": 0.5793352889842182,
      "grad_norm": 3.03125,
      "learning_rate": 4.482724155029583e-05,
      "loss": 0.8097,
      "step": 165300
    },
    {
      "epoch": 0.5793703364911137,
      "grad_norm": 2.390625,
      "learning_rate": 4.482659252163213e-05,
      "loss": 0.8983,
      "step": 165310
    },
    {
      "epoch": 0.5794053839980093,
      "grad_norm": 3.015625,
      "learning_rate": 4.482594349296842e-05,
      "loss": 0.9685,
      "step": 165320
    },
    {
      "epoch": 0.5794404315049049,
      "grad_norm": 2.53125,
      "learning_rate": 4.4825294464304725e-05,
      "loss": 0.9296,
      "step": 165330
    },
    {
      "epoch": 0.5794754790118005,
      "grad_norm": 3.546875,
      "learning_rate": 4.482464543564102e-05,
      "loss": 0.8916,
      "step": 165340
    },
    {
      "epoch": 0.5795105265186961,
      "grad_norm": 2.578125,
      "learning_rate": 4.482399640697732e-05,
      "loss": 0.9516,
      "step": 165350
    },
    {
      "epoch": 0.5795455740255917,
      "grad_norm": 2.640625,
      "learning_rate": 4.4823347378313615e-05,
      "loss": 0.8071,
      "step": 165360
    },
    {
      "epoch": 0.5795806215324872,
      "grad_norm": 3.34375,
      "learning_rate": 4.482269834964992e-05,
      "loss": 0.958,
      "step": 165370
    },
    {
      "epoch": 0.5796156690393829,
      "grad_norm": 3.1875,
      "learning_rate": 4.482204932098622e-05,
      "loss": 0.913,
      "step": 165380
    },
    {
      "epoch": 0.5796507165462785,
      "grad_norm": 3.671875,
      "learning_rate": 4.482140029232251e-05,
      "loss": 0.9982,
      "step": 165390
    },
    {
      "epoch": 0.5796857640531741,
      "grad_norm": 2.9375,
      "learning_rate": 4.4820751263658814e-05,
      "loss": 1.0073,
      "step": 165400
    },
    {
      "epoch": 0.5797208115600697,
      "grad_norm": 2.9375,
      "learning_rate": 4.482010223499511e-05,
      "loss": 0.8716,
      "step": 165410
    },
    {
      "epoch": 0.5797558590669653,
      "grad_norm": 2.828125,
      "learning_rate": 4.481945320633141e-05,
      "loss": 0.9259,
      "step": 165420
    },
    {
      "epoch": 0.5797909065738609,
      "grad_norm": 3.296875,
      "learning_rate": 4.4818804177667705e-05,
      "loss": 0.9128,
      "step": 165430
    },
    {
      "epoch": 0.5798259540807564,
      "grad_norm": 2.375,
      "learning_rate": 4.4818155149004006e-05,
      "loss": 1.0238,
      "step": 165440
    },
    {
      "epoch": 0.5798610015876521,
      "grad_norm": 3.125,
      "learning_rate": 4.48175061203403e-05,
      "loss": 0.9283,
      "step": 165450
    },
    {
      "epoch": 0.5798960490945476,
      "grad_norm": 2.8125,
      "learning_rate": 4.48168570916766e-05,
      "loss": 0.9428,
      "step": 165460
    },
    {
      "epoch": 0.5799310966014433,
      "grad_norm": 3.171875,
      "learning_rate": 4.48162080630129e-05,
      "loss": 0.9014,
      "step": 165470
    },
    {
      "epoch": 0.5799661441083388,
      "grad_norm": 2.84375,
      "learning_rate": 4.48155590343492e-05,
      "loss": 0.9706,
      "step": 165480
    },
    {
      "epoch": 0.5800011916152344,
      "grad_norm": 2.859375,
      "learning_rate": 4.481491000568549e-05,
      "loss": 0.9324,
      "step": 165490
    },
    {
      "epoch": 0.5800362391221301,
      "grad_norm": 3.0,
      "learning_rate": 4.4814260977021794e-05,
      "loss": 0.9724,
      "step": 165500
    },
    {
      "epoch": 0.5800712866290256,
      "grad_norm": 3.0625,
      "learning_rate": 4.481361194835809e-05,
      "loss": 0.9248,
      "step": 165510
    },
    {
      "epoch": 0.5801063341359213,
      "grad_norm": 3.171875,
      "learning_rate": 4.481296291969439e-05,
      "loss": 0.8585,
      "step": 165520
    },
    {
      "epoch": 0.5801413816428168,
      "grad_norm": 3.34375,
      "learning_rate": 4.481231389103069e-05,
      "loss": 1.0014,
      "step": 165530
    },
    {
      "epoch": 0.5801764291497125,
      "grad_norm": 3.109375,
      "learning_rate": 4.481166486236698e-05,
      "loss": 0.8989,
      "step": 165540
    },
    {
      "epoch": 0.580211476656608,
      "grad_norm": 2.78125,
      "learning_rate": 4.481101583370328e-05,
      "loss": 0.8863,
      "step": 165550
    },
    {
      "epoch": 0.5802465241635036,
      "grad_norm": 3.046875,
      "learning_rate": 4.4810366805039575e-05,
      "loss": 0.841,
      "step": 165560
    },
    {
      "epoch": 0.5802815716703992,
      "grad_norm": 3.09375,
      "learning_rate": 4.480971777637588e-05,
      "loss": 0.8982,
      "step": 165570
    },
    {
      "epoch": 0.5803166191772948,
      "grad_norm": 2.640625,
      "learning_rate": 4.480906874771217e-05,
      "loss": 0.9367,
      "step": 165580
    },
    {
      "epoch": 0.5803516666841905,
      "grad_norm": 3.265625,
      "learning_rate": 4.480841971904847e-05,
      "loss": 0.8858,
      "step": 165590
    },
    {
      "epoch": 0.580386714191086,
      "grad_norm": 3.328125,
      "learning_rate": 4.480777069038477e-05,
      "loss": 0.8703,
      "step": 165600
    },
    {
      "epoch": 0.5804217616979817,
      "grad_norm": 2.734375,
      "learning_rate": 4.480712166172107e-05,
      "loss": 0.9125,
      "step": 165610
    },
    {
      "epoch": 0.5804568092048772,
      "grad_norm": 3.125,
      "learning_rate": 4.480647263305737e-05,
      "loss": 0.9333,
      "step": 165620
    },
    {
      "epoch": 0.5804918567117728,
      "grad_norm": 3.109375,
      "learning_rate": 4.4805823604393665e-05,
      "loss": 0.8926,
      "step": 165630
    },
    {
      "epoch": 0.5805269042186684,
      "grad_norm": 2.84375,
      "learning_rate": 4.4805174575729966e-05,
      "loss": 1.009,
      "step": 165640
    },
    {
      "epoch": 0.580561951725564,
      "grad_norm": 2.71875,
      "learning_rate": 4.480452554706626e-05,
      "loss": 0.897,
      "step": 165650
    },
    {
      "epoch": 0.5805969992324596,
      "grad_norm": 3.09375,
      "learning_rate": 4.480387651840256e-05,
      "loss": 0.8298,
      "step": 165660
    },
    {
      "epoch": 0.5806320467393552,
      "grad_norm": 3.15625,
      "learning_rate": 4.480322748973886e-05,
      "loss": 0.9492,
      "step": 165670
    },
    {
      "epoch": 0.5806670942462508,
      "grad_norm": 2.953125,
      "learning_rate": 4.480257846107516e-05,
      "loss": 0.9576,
      "step": 165680
    },
    {
      "epoch": 0.5807021417531464,
      "grad_norm": 2.4375,
      "learning_rate": 4.480192943241145e-05,
      "loss": 1.0013,
      "step": 165690
    },
    {
      "epoch": 0.580737189260042,
      "grad_norm": 3.359375,
      "learning_rate": 4.4801280403747754e-05,
      "loss": 0.9059,
      "step": 165700
    },
    {
      "epoch": 0.5807722367669376,
      "grad_norm": 3.359375,
      "learning_rate": 4.480063137508405e-05,
      "loss": 0.9534,
      "step": 165710
    },
    {
      "epoch": 0.5808072842738332,
      "grad_norm": 2.5625,
      "learning_rate": 4.479998234642035e-05,
      "loss": 0.9194,
      "step": 165720
    },
    {
      "epoch": 0.5808423317807287,
      "grad_norm": 3.125,
      "learning_rate": 4.4799333317756645e-05,
      "loss": 0.8718,
      "step": 165730
    },
    {
      "epoch": 0.5808773792876244,
      "grad_norm": 2.890625,
      "learning_rate": 4.4798684289092946e-05,
      "loss": 0.9164,
      "step": 165740
    },
    {
      "epoch": 0.5809124267945199,
      "grad_norm": 3.09375,
      "learning_rate": 4.479803526042925e-05,
      "loss": 0.8417,
      "step": 165750
    },
    {
      "epoch": 0.5809474743014156,
      "grad_norm": 3.078125,
      "learning_rate": 4.479738623176554e-05,
      "loss": 0.8568,
      "step": 165760
    },
    {
      "epoch": 0.5809825218083111,
      "grad_norm": 3.046875,
      "learning_rate": 4.4796737203101843e-05,
      "loss": 0.9093,
      "step": 165770
    },
    {
      "epoch": 0.5810175693152068,
      "grad_norm": 3.046875,
      "learning_rate": 4.479608817443814e-05,
      "loss": 0.9915,
      "step": 165780
    },
    {
      "epoch": 0.5810526168221024,
      "grad_norm": 3.4375,
      "learning_rate": 4.479543914577444e-05,
      "loss": 0.9767,
      "step": 165790
    },
    {
      "epoch": 0.5810876643289979,
      "grad_norm": 3.078125,
      "learning_rate": 4.4794790117110734e-05,
      "loss": 0.8478,
      "step": 165800
    },
    {
      "epoch": 0.5811227118358936,
      "grad_norm": 2.96875,
      "learning_rate": 4.4794141088447035e-05,
      "loss": 0.897,
      "step": 165810
    },
    {
      "epoch": 0.5811577593427891,
      "grad_norm": 2.953125,
      "learning_rate": 4.479349205978333e-05,
      "loss": 0.8938,
      "step": 165820
    },
    {
      "epoch": 0.5811928068496848,
      "grad_norm": 3.25,
      "learning_rate": 4.479284303111963e-05,
      "loss": 0.8927,
      "step": 165830
    },
    {
      "epoch": 0.5812278543565803,
      "grad_norm": 3.3125,
      "learning_rate": 4.4792194002455926e-05,
      "loss": 0.9121,
      "step": 165840
    },
    {
      "epoch": 0.581262901863476,
      "grad_norm": 2.859375,
      "learning_rate": 4.479154497379223e-05,
      "loss": 0.8948,
      "step": 165850
    },
    {
      "epoch": 0.5812979493703715,
      "grad_norm": 3.46875,
      "learning_rate": 4.479089594512852e-05,
      "loss": 0.9126,
      "step": 165860
    },
    {
      "epoch": 0.5813329968772671,
      "grad_norm": 3.03125,
      "learning_rate": 4.4790246916464823e-05,
      "loss": 1.0007,
      "step": 165870
    },
    {
      "epoch": 0.5813680443841628,
      "grad_norm": 2.9375,
      "learning_rate": 4.478959788780112e-05,
      "loss": 0.9467,
      "step": 165880
    },
    {
      "epoch": 0.5814030918910583,
      "grad_norm": 3.140625,
      "learning_rate": 4.478894885913742e-05,
      "loss": 0.9467,
      "step": 165890
    },
    {
      "epoch": 0.581438139397954,
      "grad_norm": 2.78125,
      "learning_rate": 4.478829983047372e-05,
      "loss": 0.9069,
      "step": 165900
    },
    {
      "epoch": 0.5814731869048495,
      "grad_norm": 2.71875,
      "learning_rate": 4.4787650801810015e-05,
      "loss": 0.9349,
      "step": 165910
    },
    {
      "epoch": 0.5815082344117452,
      "grad_norm": 2.96875,
      "learning_rate": 4.478700177314631e-05,
      "loss": 0.9676,
      "step": 165920
    },
    {
      "epoch": 0.5815432819186407,
      "grad_norm": 3.296875,
      "learning_rate": 4.4786352744482605e-05,
      "loss": 0.8948,
      "step": 165930
    },
    {
      "epoch": 0.5815783294255363,
      "grad_norm": 3.140625,
      "learning_rate": 4.4785703715818906e-05,
      "loss": 0.9526,
      "step": 165940
    },
    {
      "epoch": 0.5816133769324319,
      "grad_norm": 2.9375,
      "learning_rate": 4.47850546871552e-05,
      "loss": 0.8641,
      "step": 165950
    },
    {
      "epoch": 0.5816484244393275,
      "grad_norm": 2.703125,
      "learning_rate": 4.47844056584915e-05,
      "loss": 0.934,
      "step": 165960
    },
    {
      "epoch": 0.581683471946223,
      "grad_norm": 3.078125,
      "learning_rate": 4.47837566298278e-05,
      "loss": 0.9244,
      "step": 165970
    },
    {
      "epoch": 0.5817185194531187,
      "grad_norm": 3.09375,
      "learning_rate": 4.47831076011641e-05,
      "loss": 0.8417,
      "step": 165980
    },
    {
      "epoch": 0.5817535669600143,
      "grad_norm": 3.28125,
      "learning_rate": 4.47824585725004e-05,
      "loss": 0.91,
      "step": 165990
    },
    {
      "epoch": 0.5817886144669099,
      "grad_norm": 3.171875,
      "learning_rate": 4.4781809543836694e-05,
      "loss": 0.9523,
      "step": 166000
    },
    {
      "epoch": 0.5818236619738055,
      "grad_norm": 3.3125,
      "learning_rate": 4.4781160515172995e-05,
      "loss": 0.9003,
      "step": 166010
    },
    {
      "epoch": 0.5818587094807011,
      "grad_norm": 2.984375,
      "learning_rate": 4.478051148650929e-05,
      "loss": 0.8359,
      "step": 166020
    },
    {
      "epoch": 0.5818937569875967,
      "grad_norm": 2.765625,
      "learning_rate": 4.477986245784559e-05,
      "loss": 0.9682,
      "step": 166030
    },
    {
      "epoch": 0.5819288044944922,
      "grad_norm": 3.03125,
      "learning_rate": 4.4779213429181886e-05,
      "loss": 0.8389,
      "step": 166040
    },
    {
      "epoch": 0.5819638520013879,
      "grad_norm": 2.890625,
      "learning_rate": 4.477856440051819e-05,
      "loss": 0.9098,
      "step": 166050
    },
    {
      "epoch": 0.5819988995082834,
      "grad_norm": 3.03125,
      "learning_rate": 4.477791537185448e-05,
      "loss": 0.999,
      "step": 166060
    },
    {
      "epoch": 0.5820339470151791,
      "grad_norm": 2.96875,
      "learning_rate": 4.4777266343190783e-05,
      "loss": 0.9526,
      "step": 166070
    },
    {
      "epoch": 0.5820689945220747,
      "grad_norm": 3.5,
      "learning_rate": 4.477661731452708e-05,
      "loss": 0.9636,
      "step": 166080
    },
    {
      "epoch": 0.5821040420289703,
      "grad_norm": 2.953125,
      "learning_rate": 4.477596828586338e-05,
      "loss": 0.924,
      "step": 166090
    },
    {
      "epoch": 0.5821390895358659,
      "grad_norm": 3.0,
      "learning_rate": 4.4775319257199674e-05,
      "loss": 0.8272,
      "step": 166100
    },
    {
      "epoch": 0.5821741370427614,
      "grad_norm": 3.109375,
      "learning_rate": 4.4774670228535975e-05,
      "loss": 0.9299,
      "step": 166110
    },
    {
      "epoch": 0.5822091845496571,
      "grad_norm": 3.0,
      "learning_rate": 4.477402119987228e-05,
      "loss": 0.9296,
      "step": 166120
    },
    {
      "epoch": 0.5822442320565526,
      "grad_norm": 3.0,
      "learning_rate": 4.477337217120857e-05,
      "loss": 0.9301,
      "step": 166130
    },
    {
      "epoch": 0.5822792795634483,
      "grad_norm": 3.0625,
      "learning_rate": 4.477272314254487e-05,
      "loss": 0.9542,
      "step": 166140
    },
    {
      "epoch": 0.5823143270703438,
      "grad_norm": 3.484375,
      "learning_rate": 4.477207411388117e-05,
      "loss": 0.9661,
      "step": 166150
    },
    {
      "epoch": 0.5823493745772395,
      "grad_norm": 2.625,
      "learning_rate": 4.477142508521747e-05,
      "loss": 0.9584,
      "step": 166160
    },
    {
      "epoch": 0.5823844220841351,
      "grad_norm": 3.0,
      "learning_rate": 4.4770776056553763e-05,
      "loss": 0.9334,
      "step": 166170
    },
    {
      "epoch": 0.5824194695910306,
      "grad_norm": 3.1875,
      "learning_rate": 4.4770127027890065e-05,
      "loss": 0.8373,
      "step": 166180
    },
    {
      "epoch": 0.5824545170979263,
      "grad_norm": 3.21875,
      "learning_rate": 4.476947799922636e-05,
      "loss": 0.8751,
      "step": 166190
    },
    {
      "epoch": 0.5824895646048218,
      "grad_norm": 3.3125,
      "learning_rate": 4.476882897056266e-05,
      "loss": 0.9552,
      "step": 166200
    },
    {
      "epoch": 0.5825246121117175,
      "grad_norm": 3.046875,
      "learning_rate": 4.4768179941898955e-05,
      "loss": 0.9797,
      "step": 166210
    },
    {
      "epoch": 0.582559659618613,
      "grad_norm": 3.015625,
      "learning_rate": 4.476753091323526e-05,
      "loss": 0.8807,
      "step": 166220
    },
    {
      "epoch": 0.5825947071255086,
      "grad_norm": 2.90625,
      "learning_rate": 4.476688188457155e-05,
      "loss": 0.9191,
      "step": 166230
    },
    {
      "epoch": 0.5826297546324042,
      "grad_norm": 3.25,
      "learning_rate": 4.476623285590785e-05,
      "loss": 0.8459,
      "step": 166240
    },
    {
      "epoch": 0.5826648021392998,
      "grad_norm": 2.875,
      "learning_rate": 4.476558382724415e-05,
      "loss": 0.8774,
      "step": 166250
    },
    {
      "epoch": 0.5826998496461954,
      "grad_norm": 3.0625,
      "learning_rate": 4.476493479858045e-05,
      "loss": 0.9391,
      "step": 166260
    },
    {
      "epoch": 0.582734897153091,
      "grad_norm": 3.296875,
      "learning_rate": 4.476428576991675e-05,
      "loss": 0.9071,
      "step": 166270
    },
    {
      "epoch": 0.5827699446599867,
      "grad_norm": 2.953125,
      "learning_rate": 4.4763636741253045e-05,
      "loss": 0.8775,
      "step": 166280
    },
    {
      "epoch": 0.5828049921668822,
      "grad_norm": 3.015625,
      "learning_rate": 4.476298771258934e-05,
      "loss": 0.8864,
      "step": 166290
    },
    {
      "epoch": 0.5828400396737778,
      "grad_norm": 2.640625,
      "learning_rate": 4.4762338683925634e-05,
      "loss": 0.9165,
      "step": 166300
    },
    {
      "epoch": 0.5828750871806734,
      "grad_norm": 2.828125,
      "learning_rate": 4.4761689655261935e-05,
      "loss": 0.873,
      "step": 166310
    },
    {
      "epoch": 0.582910134687569,
      "grad_norm": 2.578125,
      "learning_rate": 4.476104062659823e-05,
      "loss": 0.8485,
      "step": 166320
    },
    {
      "epoch": 0.5829451821944646,
      "grad_norm": 2.59375,
      "learning_rate": 4.476039159793453e-05,
      "loss": 0.8952,
      "step": 166330
    },
    {
      "epoch": 0.5829802297013602,
      "grad_norm": 2.71875,
      "learning_rate": 4.4759742569270826e-05,
      "loss": 0.8894,
      "step": 166340
    },
    {
      "epoch": 0.5830152772082557,
      "grad_norm": 2.96875,
      "learning_rate": 4.475909354060713e-05,
      "loss": 0.9184,
      "step": 166350
    },
    {
      "epoch": 0.5830503247151514,
      "grad_norm": 3.125,
      "learning_rate": 4.475844451194343e-05,
      "loss": 0.9953,
      "step": 166360
    },
    {
      "epoch": 0.583085372222047,
      "grad_norm": 2.671875,
      "learning_rate": 4.4757795483279723e-05,
      "loss": 0.9452,
      "step": 166370
    },
    {
      "epoch": 0.5831204197289426,
      "grad_norm": 2.78125,
      "learning_rate": 4.4757146454616025e-05,
      "loss": 0.8929,
      "step": 166380
    },
    {
      "epoch": 0.5831554672358382,
      "grad_norm": 3.078125,
      "learning_rate": 4.475649742595232e-05,
      "loss": 0.854,
      "step": 166390
    },
    {
      "epoch": 0.5831905147427338,
      "grad_norm": 2.78125,
      "learning_rate": 4.475584839728862e-05,
      "loss": 1.0216,
      "step": 166400
    },
    {
      "epoch": 0.5832255622496294,
      "grad_norm": 3.484375,
      "learning_rate": 4.4755199368624915e-05,
      "loss": 0.9619,
      "step": 166410
    },
    {
      "epoch": 0.5832606097565249,
      "grad_norm": 2.921875,
      "learning_rate": 4.475455033996122e-05,
      "loss": 0.851,
      "step": 166420
    },
    {
      "epoch": 0.5832956572634206,
      "grad_norm": 2.609375,
      "learning_rate": 4.475390131129751e-05,
      "loss": 0.8532,
      "step": 166430
    },
    {
      "epoch": 0.5833307047703161,
      "grad_norm": 3.125,
      "learning_rate": 4.475325228263381e-05,
      "loss": 0.9373,
      "step": 166440
    },
    {
      "epoch": 0.5833657522772118,
      "grad_norm": 3.4375,
      "learning_rate": 4.475260325397011e-05,
      "loss": 0.8796,
      "step": 166450
    },
    {
      "epoch": 0.5834007997841073,
      "grad_norm": 3.21875,
      "learning_rate": 4.475195422530641e-05,
      "loss": 0.8861,
      "step": 166460
    },
    {
      "epoch": 0.583435847291003,
      "grad_norm": 3.0,
      "learning_rate": 4.4751305196642703e-05,
      "loss": 0.9857,
      "step": 166470
    },
    {
      "epoch": 0.5834708947978986,
      "grad_norm": 2.5625,
      "learning_rate": 4.4750656167979005e-05,
      "loss": 0.9617,
      "step": 166480
    },
    {
      "epoch": 0.5835059423047941,
      "grad_norm": 2.40625,
      "learning_rate": 4.4750007139315306e-05,
      "loss": 0.8738,
      "step": 166490
    },
    {
      "epoch": 0.5835409898116898,
      "grad_norm": 2.96875,
      "learning_rate": 4.47493581106516e-05,
      "loss": 0.9428,
      "step": 166500
    },
    {
      "epoch": 0.5835760373185853,
      "grad_norm": 3.1875,
      "learning_rate": 4.47487090819879e-05,
      "loss": 0.8533,
      "step": 166510
    },
    {
      "epoch": 0.583611084825481,
      "grad_norm": 3.21875,
      "learning_rate": 4.47480600533242e-05,
      "loss": 0.9649,
      "step": 166520
    },
    {
      "epoch": 0.5836461323323765,
      "grad_norm": 3.09375,
      "learning_rate": 4.47474110246605e-05,
      "loss": 0.9488,
      "step": 166530
    },
    {
      "epoch": 0.5836811798392721,
      "grad_norm": 2.609375,
      "learning_rate": 4.474676199599679e-05,
      "loss": 0.9155,
      "step": 166540
    },
    {
      "epoch": 0.5837162273461677,
      "grad_norm": 2.703125,
      "learning_rate": 4.4746112967333094e-05,
      "loss": 0.9109,
      "step": 166550
    },
    {
      "epoch": 0.5837512748530633,
      "grad_norm": 2.53125,
      "learning_rate": 4.474546393866939e-05,
      "loss": 0.9392,
      "step": 166560
    },
    {
      "epoch": 0.583786322359959,
      "grad_norm": 3.203125,
      "learning_rate": 4.474481491000569e-05,
      "loss": 1.028,
      "step": 166570
    },
    {
      "epoch": 0.5838213698668545,
      "grad_norm": 2.625,
      "learning_rate": 4.4744165881341985e-05,
      "loss": 0.9277,
      "step": 166580
    },
    {
      "epoch": 0.5838564173737502,
      "grad_norm": 3.296875,
      "learning_rate": 4.4743516852678286e-05,
      "loss": 0.9602,
      "step": 166590
    },
    {
      "epoch": 0.5838914648806457,
      "grad_norm": 2.953125,
      "learning_rate": 4.474286782401458e-05,
      "loss": 0.9214,
      "step": 166600
    },
    {
      "epoch": 0.5839265123875413,
      "grad_norm": 3.0,
      "learning_rate": 4.474221879535088e-05,
      "loss": 0.916,
      "step": 166610
    },
    {
      "epoch": 0.5839615598944369,
      "grad_norm": 2.828125,
      "learning_rate": 4.4741569766687184e-05,
      "loss": 0.8809,
      "step": 166620
    },
    {
      "epoch": 0.5839966074013325,
      "grad_norm": 2.9375,
      "learning_rate": 4.474092073802348e-05,
      "loss": 0.9015,
      "step": 166630
    },
    {
      "epoch": 0.5840316549082281,
      "grad_norm": 3.109375,
      "learning_rate": 4.474027170935978e-05,
      "loss": 0.929,
      "step": 166640
    },
    {
      "epoch": 0.5840667024151237,
      "grad_norm": 2.828125,
      "learning_rate": 4.4739622680696074e-05,
      "loss": 0.887,
      "step": 166650
    },
    {
      "epoch": 0.5841017499220192,
      "grad_norm": 2.671875,
      "learning_rate": 4.4738973652032376e-05,
      "loss": 0.8848,
      "step": 166660
    },
    {
      "epoch": 0.5841367974289149,
      "grad_norm": 3.09375,
      "learning_rate": 4.4738324623368663e-05,
      "loss": 0.9164,
      "step": 166670
    },
    {
      "epoch": 0.5841718449358105,
      "grad_norm": 2.890625,
      "learning_rate": 4.4737675594704965e-05,
      "loss": 0.9304,
      "step": 166680
    },
    {
      "epoch": 0.5842068924427061,
      "grad_norm": 2.75,
      "learning_rate": 4.473702656604126e-05,
      "loss": 0.9001,
      "step": 166690
    },
    {
      "epoch": 0.5842419399496017,
      "grad_norm": 2.9375,
      "learning_rate": 4.473637753737756e-05,
      "loss": 0.9425,
      "step": 166700
    },
    {
      "epoch": 0.5842769874564973,
      "grad_norm": 3.375,
      "learning_rate": 4.473572850871386e-05,
      "loss": 0.9236,
      "step": 166710
    },
    {
      "epoch": 0.5843120349633929,
      "grad_norm": 2.625,
      "learning_rate": 4.473507948005016e-05,
      "loss": 0.8364,
      "step": 166720
    },
    {
      "epoch": 0.5843470824702884,
      "grad_norm": 3.09375,
      "learning_rate": 4.473443045138646e-05,
      "loss": 0.9479,
      "step": 166730
    },
    {
      "epoch": 0.5843821299771841,
      "grad_norm": 2.953125,
      "learning_rate": 4.473378142272275e-05,
      "loss": 0.8624,
      "step": 166740
    },
    {
      "epoch": 0.5844171774840796,
      "grad_norm": 2.90625,
      "learning_rate": 4.4733132394059054e-05,
      "loss": 0.9833,
      "step": 166750
    },
    {
      "epoch": 0.5844522249909753,
      "grad_norm": 3.25,
      "learning_rate": 4.473248336539535e-05,
      "loss": 0.9099,
      "step": 166760
    },
    {
      "epoch": 0.5844872724978709,
      "grad_norm": 3.125,
      "learning_rate": 4.473183433673165e-05,
      "loss": 1.0105,
      "step": 166770
    },
    {
      "epoch": 0.5845223200047664,
      "grad_norm": 2.734375,
      "learning_rate": 4.4731185308067945e-05,
      "loss": 0.9644,
      "step": 166780
    },
    {
      "epoch": 0.5845573675116621,
      "grad_norm": 2.734375,
      "learning_rate": 4.4730536279404246e-05,
      "loss": 0.8747,
      "step": 166790
    },
    {
      "epoch": 0.5845924150185576,
      "grad_norm": 2.625,
      "learning_rate": 4.472988725074054e-05,
      "loss": 0.9093,
      "step": 166800
    },
    {
      "epoch": 0.5846274625254533,
      "grad_norm": 3.453125,
      "learning_rate": 4.472923822207684e-05,
      "loss": 0.9346,
      "step": 166810
    },
    {
      "epoch": 0.5846625100323488,
      "grad_norm": 3.078125,
      "learning_rate": 4.472858919341314e-05,
      "loss": 0.9634,
      "step": 166820
    },
    {
      "epoch": 0.5846975575392445,
      "grad_norm": 3.203125,
      "learning_rate": 4.472794016474944e-05,
      "loss": 0.8728,
      "step": 166830
    },
    {
      "epoch": 0.58473260504614,
      "grad_norm": 2.53125,
      "learning_rate": 4.472729113608573e-05,
      "loss": 0.8371,
      "step": 166840
    },
    {
      "epoch": 0.5847676525530356,
      "grad_norm": 3.265625,
      "learning_rate": 4.4726642107422034e-05,
      "loss": 0.944,
      "step": 166850
    },
    {
      "epoch": 0.5848027000599313,
      "grad_norm": 3.03125,
      "learning_rate": 4.4725993078758336e-05,
      "loss": 0.9653,
      "step": 166860
    },
    {
      "epoch": 0.5848377475668268,
      "grad_norm": 3.140625,
      "learning_rate": 4.472534405009463e-05,
      "loss": 0.9204,
      "step": 166870
    },
    {
      "epoch": 0.5848727950737225,
      "grad_norm": 2.90625,
      "learning_rate": 4.472469502143093e-05,
      "loss": 0.8632,
      "step": 166880
    },
    {
      "epoch": 0.584907842580618,
      "grad_norm": 3.1875,
      "learning_rate": 4.4724045992767226e-05,
      "loss": 0.9001,
      "step": 166890
    },
    {
      "epoch": 0.5849428900875137,
      "grad_norm": 3.390625,
      "learning_rate": 4.472339696410353e-05,
      "loss": 0.9737,
      "step": 166900
    },
    {
      "epoch": 0.5849779375944092,
      "grad_norm": 2.71875,
      "learning_rate": 4.472274793543982e-05,
      "loss": 0.9558,
      "step": 166910
    },
    {
      "epoch": 0.5850129851013048,
      "grad_norm": 2.890625,
      "learning_rate": 4.4722098906776124e-05,
      "loss": 0.9214,
      "step": 166920
    },
    {
      "epoch": 0.5850480326082004,
      "grad_norm": 2.65625,
      "learning_rate": 4.472144987811242e-05,
      "loss": 0.8983,
      "step": 166930
    },
    {
      "epoch": 0.585083080115096,
      "grad_norm": 2.671875,
      "learning_rate": 4.472080084944872e-05,
      "loss": 0.843,
      "step": 166940
    },
    {
      "epoch": 0.5851181276219916,
      "grad_norm": 3.296875,
      "learning_rate": 4.4720151820785014e-05,
      "loss": 0.9542,
      "step": 166950
    },
    {
      "epoch": 0.5851531751288872,
      "grad_norm": 2.75,
      "learning_rate": 4.4719502792121316e-05,
      "loss": 0.9077,
      "step": 166960
    },
    {
      "epoch": 0.5851882226357829,
      "grad_norm": 2.953125,
      "learning_rate": 4.471885376345761e-05,
      "loss": 0.952,
      "step": 166970
    },
    {
      "epoch": 0.5852232701426784,
      "grad_norm": 2.859375,
      "learning_rate": 4.471820473479391e-05,
      "loss": 0.9307,
      "step": 166980
    },
    {
      "epoch": 0.585258317649574,
      "grad_norm": 3.296875,
      "learning_rate": 4.471755570613021e-05,
      "loss": 0.9211,
      "step": 166990
    },
    {
      "epoch": 0.5852933651564696,
      "grad_norm": 2.890625,
      "learning_rate": 4.471690667746651e-05,
      "loss": 0.9866,
      "step": 167000
    },
    {
      "epoch": 0.5853284126633652,
      "grad_norm": 3.828125,
      "learning_rate": 4.471625764880281e-05,
      "loss": 0.9764,
      "step": 167010
    },
    {
      "epoch": 0.5853634601702608,
      "grad_norm": 2.578125,
      "learning_rate": 4.4715608620139104e-05,
      "loss": 0.8558,
      "step": 167020
    },
    {
      "epoch": 0.5853985076771564,
      "grad_norm": 2.796875,
      "learning_rate": 4.4714959591475405e-05,
      "loss": 0.9492,
      "step": 167030
    },
    {
      "epoch": 0.5854335551840519,
      "grad_norm": 2.859375,
      "learning_rate": 4.471431056281169e-05,
      "loss": 0.8916,
      "step": 167040
    },
    {
      "epoch": 0.5854686026909476,
      "grad_norm": 3.015625,
      "learning_rate": 4.4713661534147994e-05,
      "loss": 0.9305,
      "step": 167050
    },
    {
      "epoch": 0.5855036501978432,
      "grad_norm": 2.890625,
      "learning_rate": 4.471301250548429e-05,
      "loss": 0.8924,
      "step": 167060
    },
    {
      "epoch": 0.5855386977047388,
      "grad_norm": 3.328125,
      "learning_rate": 4.471236347682059e-05,
      "loss": 0.9521,
      "step": 167070
    },
    {
      "epoch": 0.5855737452116344,
      "grad_norm": 2.609375,
      "learning_rate": 4.471171444815689e-05,
      "loss": 0.9092,
      "step": 167080
    },
    {
      "epoch": 0.58560879271853,
      "grad_norm": 2.890625,
      "learning_rate": 4.4711065419493186e-05,
      "loss": 0.9101,
      "step": 167090
    },
    {
      "epoch": 0.5856438402254256,
      "grad_norm": 3.40625,
      "learning_rate": 4.471041639082949e-05,
      "loss": 0.9675,
      "step": 167100
    },
    {
      "epoch": 0.5856788877323211,
      "grad_norm": 3.03125,
      "learning_rate": 4.470976736216578e-05,
      "loss": 0.9669,
      "step": 167110
    },
    {
      "epoch": 0.5857139352392168,
      "grad_norm": 2.9375,
      "learning_rate": 4.4709118333502084e-05,
      "loss": 0.8409,
      "step": 167120
    },
    {
      "epoch": 0.5857489827461123,
      "grad_norm": 3.078125,
      "learning_rate": 4.470846930483838e-05,
      "loss": 0.922,
      "step": 167130
    },
    {
      "epoch": 0.585784030253008,
      "grad_norm": 2.59375,
      "learning_rate": 4.470782027617468e-05,
      "loss": 0.9447,
      "step": 167140
    },
    {
      "epoch": 0.5858190777599035,
      "grad_norm": 2.5625,
      "learning_rate": 4.4707171247510974e-05,
      "loss": 0.9191,
      "step": 167150
    },
    {
      "epoch": 0.5858541252667991,
      "grad_norm": 2.390625,
      "learning_rate": 4.4706522218847276e-05,
      "loss": 0.882,
      "step": 167160
    },
    {
      "epoch": 0.5858891727736948,
      "grad_norm": 3.015625,
      "learning_rate": 4.470587319018357e-05,
      "loss": 0.9418,
      "step": 167170
    },
    {
      "epoch": 0.5859242202805903,
      "grad_norm": 2.6875,
      "learning_rate": 4.470522416151987e-05,
      "loss": 0.8887,
      "step": 167180
    },
    {
      "epoch": 0.585959267787486,
      "grad_norm": 2.9375,
      "learning_rate": 4.4704575132856166e-05,
      "loss": 0.9245,
      "step": 167190
    },
    {
      "epoch": 0.5859943152943815,
      "grad_norm": 2.78125,
      "learning_rate": 4.470392610419247e-05,
      "loss": 0.8658,
      "step": 167200
    },
    {
      "epoch": 0.5860293628012772,
      "grad_norm": 2.53125,
      "learning_rate": 4.470327707552876e-05,
      "loss": 0.8752,
      "step": 167210
    },
    {
      "epoch": 0.5860644103081727,
      "grad_norm": 3.46875,
      "learning_rate": 4.4702628046865064e-05,
      "loss": 1.0298,
      "step": 167220
    },
    {
      "epoch": 0.5860994578150683,
      "grad_norm": 2.8125,
      "learning_rate": 4.4701979018201365e-05,
      "loss": 0.9163,
      "step": 167230
    },
    {
      "epoch": 0.5861345053219639,
      "grad_norm": 2.734375,
      "learning_rate": 4.470132998953766e-05,
      "loss": 0.9594,
      "step": 167240
    },
    {
      "epoch": 0.5861695528288595,
      "grad_norm": 3.15625,
      "learning_rate": 4.470068096087396e-05,
      "loss": 0.9601,
      "step": 167250
    },
    {
      "epoch": 0.5862046003357552,
      "grad_norm": 3.5625,
      "learning_rate": 4.4700031932210256e-05,
      "loss": 0.9229,
      "step": 167260
    },
    {
      "epoch": 0.5862396478426507,
      "grad_norm": 3.546875,
      "learning_rate": 4.469938290354656e-05,
      "loss": 0.9583,
      "step": 167270
    },
    {
      "epoch": 0.5862746953495463,
      "grad_norm": 2.859375,
      "learning_rate": 4.469873387488285e-05,
      "loss": 0.8884,
      "step": 167280
    },
    {
      "epoch": 0.5863097428564419,
      "grad_norm": 2.75,
      "learning_rate": 4.469808484621915e-05,
      "loss": 0.9234,
      "step": 167290
    },
    {
      "epoch": 0.5863447903633375,
      "grad_norm": 3.21875,
      "learning_rate": 4.469743581755545e-05,
      "loss": 0.8939,
      "step": 167300
    },
    {
      "epoch": 0.5863798378702331,
      "grad_norm": 2.828125,
      "learning_rate": 4.469678678889175e-05,
      "loss": 0.9283,
      "step": 167310
    },
    {
      "epoch": 0.5864148853771287,
      "grad_norm": 3.34375,
      "learning_rate": 4.4696137760228044e-05,
      "loss": 0.9589,
      "step": 167320
    },
    {
      "epoch": 0.5864499328840242,
      "grad_norm": 3.390625,
      "learning_rate": 4.4695488731564345e-05,
      "loss": 0.9123,
      "step": 167330
    },
    {
      "epoch": 0.5864849803909199,
      "grad_norm": 2.734375,
      "learning_rate": 4.469483970290064e-05,
      "loss": 0.9294,
      "step": 167340
    },
    {
      "epoch": 0.5865200278978155,
      "grad_norm": 2.859375,
      "learning_rate": 4.469419067423694e-05,
      "loss": 0.8254,
      "step": 167350
    },
    {
      "epoch": 0.5865550754047111,
      "grad_norm": 3.3125,
      "learning_rate": 4.469354164557324e-05,
      "loss": 0.8936,
      "step": 167360
    },
    {
      "epoch": 0.5865901229116067,
      "grad_norm": 3.5,
      "learning_rate": 4.469289261690954e-05,
      "loss": 0.9604,
      "step": 167370
    },
    {
      "epoch": 0.5866251704185023,
      "grad_norm": 3.140625,
      "learning_rate": 4.469224358824584e-05,
      "loss": 0.9631,
      "step": 167380
    },
    {
      "epoch": 0.5866602179253979,
      "grad_norm": 2.875,
      "learning_rate": 4.469159455958213e-05,
      "loss": 0.9113,
      "step": 167390
    },
    {
      "epoch": 0.5866952654322934,
      "grad_norm": 2.859375,
      "learning_rate": 4.4690945530918434e-05,
      "loss": 0.9894,
      "step": 167400
    },
    {
      "epoch": 0.5867303129391891,
      "grad_norm": 2.703125,
      "learning_rate": 4.469029650225473e-05,
      "loss": 0.8893,
      "step": 167410
    },
    {
      "epoch": 0.5867653604460846,
      "grad_norm": 2.546875,
      "learning_rate": 4.4689647473591024e-05,
      "loss": 0.847,
      "step": 167420
    },
    {
      "epoch": 0.5868004079529803,
      "grad_norm": 2.78125,
      "learning_rate": 4.468899844492732e-05,
      "loss": 0.8684,
      "step": 167430
    },
    {
      "epoch": 0.5868354554598758,
      "grad_norm": 3.078125,
      "learning_rate": 4.468834941626362e-05,
      "loss": 0.9127,
      "step": 167440
    },
    {
      "epoch": 0.5868705029667715,
      "grad_norm": 3.125,
      "learning_rate": 4.468770038759992e-05,
      "loss": 1.0155,
      "step": 167450
    },
    {
      "epoch": 0.5869055504736671,
      "grad_norm": 2.40625,
      "learning_rate": 4.4687051358936216e-05,
      "loss": 0.9657,
      "step": 167460
    },
    {
      "epoch": 0.5869405979805626,
      "grad_norm": 2.921875,
      "learning_rate": 4.468640233027252e-05,
      "loss": 0.9086,
      "step": 167470
    },
    {
      "epoch": 0.5869756454874583,
      "grad_norm": 3.203125,
      "learning_rate": 4.468575330160881e-05,
      "loss": 0.8379,
      "step": 167480
    },
    {
      "epoch": 0.5870106929943538,
      "grad_norm": 3.140625,
      "learning_rate": 4.468510427294511e-05,
      "loss": 0.9091,
      "step": 167490
    },
    {
      "epoch": 0.5870457405012495,
      "grad_norm": 3.03125,
      "learning_rate": 4.468445524428141e-05,
      "loss": 0.9685,
      "step": 167500
    },
    {
      "epoch": 0.587080788008145,
      "grad_norm": 2.890625,
      "learning_rate": 4.468380621561771e-05,
      "loss": 0.9153,
      "step": 167510
    },
    {
      "epoch": 0.5871158355150407,
      "grad_norm": 3.078125,
      "learning_rate": 4.4683157186954004e-05,
      "loss": 0.874,
      "step": 167520
    },
    {
      "epoch": 0.5871508830219362,
      "grad_norm": 3.015625,
      "learning_rate": 4.4682508158290305e-05,
      "loss": 0.9332,
      "step": 167530
    },
    {
      "epoch": 0.5871859305288318,
      "grad_norm": 3.453125,
      "learning_rate": 4.46818591296266e-05,
      "loss": 0.8606,
      "step": 167540
    },
    {
      "epoch": 0.5872209780357275,
      "grad_norm": 3.140625,
      "learning_rate": 4.46812101009629e-05,
      "loss": 0.8909,
      "step": 167550
    },
    {
      "epoch": 0.587256025542623,
      "grad_norm": 3.453125,
      "learning_rate": 4.4680561072299196e-05,
      "loss": 0.8818,
      "step": 167560
    },
    {
      "epoch": 0.5872910730495187,
      "grad_norm": 3.390625,
      "learning_rate": 4.46799120436355e-05,
      "loss": 0.8392,
      "step": 167570
    },
    {
      "epoch": 0.5873261205564142,
      "grad_norm": 2.359375,
      "learning_rate": 4.46792630149718e-05,
      "loss": 0.8364,
      "step": 167580
    },
    {
      "epoch": 0.5873611680633098,
      "grad_norm": 3.21875,
      "learning_rate": 4.467861398630809e-05,
      "loss": 0.9605,
      "step": 167590
    },
    {
      "epoch": 0.5873962155702054,
      "grad_norm": 2.796875,
      "learning_rate": 4.4677964957644394e-05,
      "loss": 0.913,
      "step": 167600
    },
    {
      "epoch": 0.587431263077101,
      "grad_norm": 2.984375,
      "learning_rate": 4.467731592898069e-05,
      "loss": 0.9145,
      "step": 167610
    },
    {
      "epoch": 0.5874663105839966,
      "grad_norm": 2.875,
      "learning_rate": 4.467666690031699e-05,
      "loss": 0.9375,
      "step": 167620
    },
    {
      "epoch": 0.5875013580908922,
      "grad_norm": 3.53125,
      "learning_rate": 4.4676017871653285e-05,
      "loss": 0.9707,
      "step": 167630
    },
    {
      "epoch": 0.5875364055977877,
      "grad_norm": 3.1875,
      "learning_rate": 4.4675368842989586e-05,
      "loss": 0.9658,
      "step": 167640
    },
    {
      "epoch": 0.5875714531046834,
      "grad_norm": 2.984375,
      "learning_rate": 4.467471981432588e-05,
      "loss": 0.8616,
      "step": 167650
    },
    {
      "epoch": 0.587606500611579,
      "grad_norm": 2.828125,
      "learning_rate": 4.467407078566218e-05,
      "loss": 0.937,
      "step": 167660
    },
    {
      "epoch": 0.5876415481184746,
      "grad_norm": 3.15625,
      "learning_rate": 4.467342175699848e-05,
      "loss": 0.9248,
      "step": 167670
    },
    {
      "epoch": 0.5876765956253702,
      "grad_norm": 2.84375,
      "learning_rate": 4.467277272833478e-05,
      "loss": 0.9674,
      "step": 167680
    },
    {
      "epoch": 0.5877116431322658,
      "grad_norm": 3.109375,
      "learning_rate": 4.467212369967107e-05,
      "loss": 0.8723,
      "step": 167690
    },
    {
      "epoch": 0.5877466906391614,
      "grad_norm": 2.890625,
      "learning_rate": 4.4671474671007374e-05,
      "loss": 0.9877,
      "step": 167700
    },
    {
      "epoch": 0.5877817381460569,
      "grad_norm": 2.40625,
      "learning_rate": 4.467082564234367e-05,
      "loss": 0.9774,
      "step": 167710
    },
    {
      "epoch": 0.5878167856529526,
      "grad_norm": 2.703125,
      "learning_rate": 4.467017661367997e-05,
      "loss": 0.8299,
      "step": 167720
    },
    {
      "epoch": 0.5878518331598481,
      "grad_norm": 2.828125,
      "learning_rate": 4.466952758501627e-05,
      "loss": 0.9392,
      "step": 167730
    },
    {
      "epoch": 0.5878868806667438,
      "grad_norm": 3.046875,
      "learning_rate": 4.4668878556352566e-05,
      "loss": 0.9057,
      "step": 167740
    },
    {
      "epoch": 0.5879219281736394,
      "grad_norm": 3.015625,
      "learning_rate": 4.466822952768887e-05,
      "loss": 0.9791,
      "step": 167750
    },
    {
      "epoch": 0.587956975680535,
      "grad_norm": 2.453125,
      "learning_rate": 4.466758049902516e-05,
      "loss": 0.8985,
      "step": 167760
    },
    {
      "epoch": 0.5879920231874306,
      "grad_norm": 3.125,
      "learning_rate": 4.4666931470361464e-05,
      "loss": 1.0391,
      "step": 167770
    },
    {
      "epoch": 0.5880270706943261,
      "grad_norm": 2.46875,
      "learning_rate": 4.466628244169776e-05,
      "loss": 0.918,
      "step": 167780
    },
    {
      "epoch": 0.5880621182012218,
      "grad_norm": 2.484375,
      "learning_rate": 4.466563341303406e-05,
      "loss": 0.8444,
      "step": 167790
    },
    {
      "epoch": 0.5880971657081173,
      "grad_norm": 2.828125,
      "learning_rate": 4.466498438437035e-05,
      "loss": 0.9124,
      "step": 167800
    },
    {
      "epoch": 0.588132213215013,
      "grad_norm": 3.28125,
      "learning_rate": 4.466433535570665e-05,
      "loss": 0.9427,
      "step": 167810
    },
    {
      "epoch": 0.5881672607219085,
      "grad_norm": 2.34375,
      "learning_rate": 4.466368632704295e-05,
      "loss": 0.8891,
      "step": 167820
    },
    {
      "epoch": 0.5882023082288041,
      "grad_norm": 3.421875,
      "learning_rate": 4.4663037298379245e-05,
      "loss": 0.9745,
      "step": 167830
    },
    {
      "epoch": 0.5882373557356998,
      "grad_norm": 3.03125,
      "learning_rate": 4.4662388269715546e-05,
      "loss": 0.964,
      "step": 167840
    },
    {
      "epoch": 0.5882724032425953,
      "grad_norm": 2.828125,
      "learning_rate": 4.466173924105184e-05,
      "loss": 0.9558,
      "step": 167850
    },
    {
      "epoch": 0.588307450749491,
      "grad_norm": 3.140625,
      "learning_rate": 4.466109021238814e-05,
      "loss": 0.8683,
      "step": 167860
    },
    {
      "epoch": 0.5883424982563865,
      "grad_norm": 3.09375,
      "learning_rate": 4.466044118372444e-05,
      "loss": 0.8946,
      "step": 167870
    },
    {
      "epoch": 0.5883775457632822,
      "grad_norm": 3.109375,
      "learning_rate": 4.465979215506074e-05,
      "loss": 0.8434,
      "step": 167880
    },
    {
      "epoch": 0.5884125932701777,
      "grad_norm": 2.71875,
      "learning_rate": 4.465914312639703e-05,
      "loss": 0.8792,
      "step": 167890
    },
    {
      "epoch": 0.5884476407770733,
      "grad_norm": 3.296875,
      "learning_rate": 4.4658494097733334e-05,
      "loss": 0.918,
      "step": 167900
    },
    {
      "epoch": 0.5884826882839689,
      "grad_norm": 2.609375,
      "learning_rate": 4.465784506906963e-05,
      "loss": 0.8907,
      "step": 167910
    },
    {
      "epoch": 0.5885177357908645,
      "grad_norm": 3.25,
      "learning_rate": 4.465719604040593e-05,
      "loss": 0.9543,
      "step": 167920
    },
    {
      "epoch": 0.5885527832977601,
      "grad_norm": 2.953125,
      "learning_rate": 4.4656547011742225e-05,
      "loss": 0.8664,
      "step": 167930
    },
    {
      "epoch": 0.5885878308046557,
      "grad_norm": 2.796875,
      "learning_rate": 4.4655897983078526e-05,
      "loss": 0.926,
      "step": 167940
    },
    {
      "epoch": 0.5886228783115514,
      "grad_norm": 2.96875,
      "learning_rate": 4.465524895441483e-05,
      "loss": 0.8327,
      "step": 167950
    },
    {
      "epoch": 0.5886579258184469,
      "grad_norm": 3.359375,
      "learning_rate": 4.465459992575112e-05,
      "loss": 0.9262,
      "step": 167960
    },
    {
      "epoch": 0.5886929733253425,
      "grad_norm": 3.03125,
      "learning_rate": 4.4653950897087424e-05,
      "loss": 0.9273,
      "step": 167970
    },
    {
      "epoch": 0.5887280208322381,
      "grad_norm": 3.203125,
      "learning_rate": 4.465330186842372e-05,
      "loss": 0.9293,
      "step": 167980
    },
    {
      "epoch": 0.5887630683391337,
      "grad_norm": 2.984375,
      "learning_rate": 4.465265283976002e-05,
      "loss": 0.8894,
      "step": 167990
    },
    {
      "epoch": 0.5887981158460293,
      "grad_norm": 3.046875,
      "learning_rate": 4.4652003811096314e-05,
      "loss": 0.883,
      "step": 168000
    },
    {
      "epoch": 0.5888331633529249,
      "grad_norm": 2.75,
      "learning_rate": 4.4651354782432616e-05,
      "loss": 0.8941,
      "step": 168010
    },
    {
      "epoch": 0.5888682108598204,
      "grad_norm": 2.890625,
      "learning_rate": 4.465070575376891e-05,
      "loss": 0.9639,
      "step": 168020
    },
    {
      "epoch": 0.5889032583667161,
      "grad_norm": 2.921875,
      "learning_rate": 4.465005672510521e-05,
      "loss": 0.8531,
      "step": 168030
    },
    {
      "epoch": 0.5889383058736117,
      "grad_norm": 2.921875,
      "learning_rate": 4.4649407696441506e-05,
      "loss": 0.8971,
      "step": 168040
    },
    {
      "epoch": 0.5889733533805073,
      "grad_norm": 3.328125,
      "learning_rate": 4.464875866777781e-05,
      "loss": 1.0048,
      "step": 168050
    },
    {
      "epoch": 0.5890084008874029,
      "grad_norm": 3.15625,
      "learning_rate": 4.46481096391141e-05,
      "loss": 0.9342,
      "step": 168060
    },
    {
      "epoch": 0.5890434483942985,
      "grad_norm": 3.296875,
      "learning_rate": 4.4647460610450404e-05,
      "loss": 1.0053,
      "step": 168070
    },
    {
      "epoch": 0.5890784959011941,
      "grad_norm": 3.203125,
      "learning_rate": 4.46468115817867e-05,
      "loss": 0.9269,
      "step": 168080
    },
    {
      "epoch": 0.5891135434080896,
      "grad_norm": 2.5625,
      "learning_rate": 4.4646162553123e-05,
      "loss": 0.8596,
      "step": 168090
    },
    {
      "epoch": 0.5891485909149853,
      "grad_norm": 2.953125,
      "learning_rate": 4.46455135244593e-05,
      "loss": 0.8358,
      "step": 168100
    },
    {
      "epoch": 0.5891836384218808,
      "grad_norm": 2.984375,
      "learning_rate": 4.4644864495795596e-05,
      "loss": 0.9625,
      "step": 168110
    },
    {
      "epoch": 0.5892186859287765,
      "grad_norm": 2.765625,
      "learning_rate": 4.46442154671319e-05,
      "loss": 0.8461,
      "step": 168120
    },
    {
      "epoch": 0.589253733435672,
      "grad_norm": 3.078125,
      "learning_rate": 4.464356643846819e-05,
      "loss": 0.9844,
      "step": 168130
    },
    {
      "epoch": 0.5892887809425676,
      "grad_norm": 2.953125,
      "learning_rate": 4.464291740980449e-05,
      "loss": 1.023,
      "step": 168140
    },
    {
      "epoch": 0.5893238284494633,
      "grad_norm": 3.125,
      "learning_rate": 4.464226838114079e-05,
      "loss": 0.9125,
      "step": 168150
    },
    {
      "epoch": 0.5893588759563588,
      "grad_norm": 3.078125,
      "learning_rate": 4.464161935247709e-05,
      "loss": 0.8882,
      "step": 168160
    },
    {
      "epoch": 0.5893939234632545,
      "grad_norm": 3.15625,
      "learning_rate": 4.464097032381338e-05,
      "loss": 0.95,
      "step": 168170
    },
    {
      "epoch": 0.58942897097015,
      "grad_norm": 3.140625,
      "learning_rate": 4.464032129514968e-05,
      "loss": 0.8377,
      "step": 168180
    },
    {
      "epoch": 0.5894640184770457,
      "grad_norm": 3.265625,
      "learning_rate": 4.463967226648598e-05,
      "loss": 0.9629,
      "step": 168190
    },
    {
      "epoch": 0.5894990659839412,
      "grad_norm": 2.765625,
      "learning_rate": 4.4639023237822274e-05,
      "loss": 0.8855,
      "step": 168200
    },
    {
      "epoch": 0.5895341134908368,
      "grad_norm": 2.796875,
      "learning_rate": 4.4638374209158576e-05,
      "loss": 0.9506,
      "step": 168210
    },
    {
      "epoch": 0.5895691609977324,
      "grad_norm": 2.65625,
      "learning_rate": 4.463772518049487e-05,
      "loss": 0.8555,
      "step": 168220
    },
    {
      "epoch": 0.589604208504628,
      "grad_norm": 2.75,
      "learning_rate": 4.463707615183117e-05,
      "loss": 0.84,
      "step": 168230
    },
    {
      "epoch": 0.5896392560115237,
      "grad_norm": 2.875,
      "learning_rate": 4.4636427123167466e-05,
      "loss": 0.8835,
      "step": 168240
    },
    {
      "epoch": 0.5896743035184192,
      "grad_norm": 3.046875,
      "learning_rate": 4.463577809450377e-05,
      "loss": 0.8796,
      "step": 168250
    },
    {
      "epoch": 0.5897093510253149,
      "grad_norm": 2.5,
      "learning_rate": 4.463512906584006e-05,
      "loss": 0.8715,
      "step": 168260
    },
    {
      "epoch": 0.5897443985322104,
      "grad_norm": 2.859375,
      "learning_rate": 4.4634480037176364e-05,
      "loss": 0.8138,
      "step": 168270
    },
    {
      "epoch": 0.589779446039106,
      "grad_norm": 2.46875,
      "learning_rate": 4.463383100851266e-05,
      "loss": 0.8462,
      "step": 168280
    },
    {
      "epoch": 0.5898144935460016,
      "grad_norm": 2.78125,
      "learning_rate": 4.463318197984896e-05,
      "loss": 0.911,
      "step": 168290
    },
    {
      "epoch": 0.5898495410528972,
      "grad_norm": 3.203125,
      "learning_rate": 4.4632532951185254e-05,
      "loss": 0.9196,
      "step": 168300
    },
    {
      "epoch": 0.5898845885597928,
      "grad_norm": 2.5625,
      "learning_rate": 4.4631883922521556e-05,
      "loss": 0.8674,
      "step": 168310
    },
    {
      "epoch": 0.5899196360666884,
      "grad_norm": 4.0,
      "learning_rate": 4.463123489385786e-05,
      "loss": 0.9564,
      "step": 168320
    },
    {
      "epoch": 0.5899546835735839,
      "grad_norm": 2.46875,
      "learning_rate": 4.463058586519415e-05,
      "loss": 0.8487,
      "step": 168330
    },
    {
      "epoch": 0.5899897310804796,
      "grad_norm": 2.921875,
      "learning_rate": 4.462993683653045e-05,
      "loss": 0.9443,
      "step": 168340
    },
    {
      "epoch": 0.5900247785873752,
      "grad_norm": 3.3125,
      "learning_rate": 4.462928780786675e-05,
      "loss": 0.9988,
      "step": 168350
    },
    {
      "epoch": 0.5900598260942708,
      "grad_norm": 3.1875,
      "learning_rate": 4.462863877920305e-05,
      "loss": 1.0136,
      "step": 168360
    },
    {
      "epoch": 0.5900948736011664,
      "grad_norm": 3.109375,
      "learning_rate": 4.4627989750539344e-05,
      "loss": 0.8784,
      "step": 168370
    },
    {
      "epoch": 0.590129921108062,
      "grad_norm": 2.671875,
      "learning_rate": 4.4627340721875645e-05,
      "loss": 1.0104,
      "step": 168380
    },
    {
      "epoch": 0.5901649686149576,
      "grad_norm": 2.890625,
      "learning_rate": 4.462669169321194e-05,
      "loss": 0.9082,
      "step": 168390
    },
    {
      "epoch": 0.5902000161218531,
      "grad_norm": 2.5,
      "learning_rate": 4.462604266454824e-05,
      "loss": 0.866,
      "step": 168400
    },
    {
      "epoch": 0.5902350636287488,
      "grad_norm": 3.109375,
      "learning_rate": 4.4625393635884536e-05,
      "loss": 0.9185,
      "step": 168410
    },
    {
      "epoch": 0.5902701111356443,
      "grad_norm": 2.875,
      "learning_rate": 4.462474460722084e-05,
      "loss": 0.9845,
      "step": 168420
    },
    {
      "epoch": 0.59030515864254,
      "grad_norm": 3.484375,
      "learning_rate": 4.462409557855713e-05,
      "loss": 0.8985,
      "step": 168430
    },
    {
      "epoch": 0.5903402061494356,
      "grad_norm": 3.40625,
      "learning_rate": 4.462344654989343e-05,
      "loss": 0.9667,
      "step": 168440
    },
    {
      "epoch": 0.5903752536563311,
      "grad_norm": 2.890625,
      "learning_rate": 4.462279752122973e-05,
      "loss": 0.9821,
      "step": 168450
    },
    {
      "epoch": 0.5904103011632268,
      "grad_norm": 2.828125,
      "learning_rate": 4.462214849256603e-05,
      "loss": 0.9154,
      "step": 168460
    },
    {
      "epoch": 0.5904453486701223,
      "grad_norm": 3.265625,
      "learning_rate": 4.462149946390233e-05,
      "loss": 1.0273,
      "step": 168470
    },
    {
      "epoch": 0.590480396177018,
      "grad_norm": 2.53125,
      "learning_rate": 4.4620850435238625e-05,
      "loss": 0.9018,
      "step": 168480
    },
    {
      "epoch": 0.5905154436839135,
      "grad_norm": 2.734375,
      "learning_rate": 4.4620201406574927e-05,
      "loss": 0.9882,
      "step": 168490
    },
    {
      "epoch": 0.5905504911908092,
      "grad_norm": 2.609375,
      "learning_rate": 4.461955237791122e-05,
      "loss": 0.9163,
      "step": 168500
    },
    {
      "epoch": 0.5905855386977047,
      "grad_norm": 2.90625,
      "learning_rate": 4.461890334924752e-05,
      "loss": 0.9267,
      "step": 168510
    },
    {
      "epoch": 0.5906205862046003,
      "grad_norm": 3.140625,
      "learning_rate": 4.461825432058382e-05,
      "loss": 0.9668,
      "step": 168520
    },
    {
      "epoch": 0.590655633711496,
      "grad_norm": 3.109375,
      "learning_rate": 4.461760529192012e-05,
      "loss": 0.97,
      "step": 168530
    },
    {
      "epoch": 0.5906906812183915,
      "grad_norm": 2.921875,
      "learning_rate": 4.461695626325641e-05,
      "loss": 0.9074,
      "step": 168540
    },
    {
      "epoch": 0.5907257287252872,
      "grad_norm": 2.859375,
      "learning_rate": 4.461630723459271e-05,
      "loss": 0.897,
      "step": 168550
    },
    {
      "epoch": 0.5907607762321827,
      "grad_norm": 2.765625,
      "learning_rate": 4.461565820592901e-05,
      "loss": 0.8851,
      "step": 168560
    },
    {
      "epoch": 0.5907958237390784,
      "grad_norm": 3.25,
      "learning_rate": 4.4615009177265304e-05,
      "loss": 1.0024,
      "step": 168570
    },
    {
      "epoch": 0.5908308712459739,
      "grad_norm": 2.90625,
      "learning_rate": 4.4614360148601605e-05,
      "loss": 0.8684,
      "step": 168580
    },
    {
      "epoch": 0.5908659187528695,
      "grad_norm": 2.984375,
      "learning_rate": 4.46137111199379e-05,
      "loss": 0.8651,
      "step": 168590
    },
    {
      "epoch": 0.5909009662597651,
      "grad_norm": 2.875,
      "learning_rate": 4.46130620912742e-05,
      "loss": 0.9298,
      "step": 168600
    },
    {
      "epoch": 0.5909360137666607,
      "grad_norm": 2.859375,
      "learning_rate": 4.4612413062610496e-05,
      "loss": 1.0568,
      "step": 168610
    },
    {
      "epoch": 0.5909710612735563,
      "grad_norm": 3.109375,
      "learning_rate": 4.46117640339468e-05,
      "loss": 0.8357,
      "step": 168620
    },
    {
      "epoch": 0.5910061087804519,
      "grad_norm": 2.65625,
      "learning_rate": 4.461111500528309e-05,
      "loss": 0.8604,
      "step": 168630
    },
    {
      "epoch": 0.5910411562873475,
      "grad_norm": 3.203125,
      "learning_rate": 4.461046597661939e-05,
      "loss": 0.8718,
      "step": 168640
    },
    {
      "epoch": 0.5910762037942431,
      "grad_norm": 2.609375,
      "learning_rate": 4.460981694795569e-05,
      "loss": 0.9488,
      "step": 168650
    },
    {
      "epoch": 0.5911112513011387,
      "grad_norm": 2.59375,
      "learning_rate": 4.460916791929199e-05,
      "loss": 0.9548,
      "step": 168660
    },
    {
      "epoch": 0.5911462988080343,
      "grad_norm": 3.390625,
      "learning_rate": 4.4608518890628284e-05,
      "loss": 0.9035,
      "step": 168670
    },
    {
      "epoch": 0.5911813463149299,
      "grad_norm": 3.546875,
      "learning_rate": 4.4607869861964585e-05,
      "loss": 0.8886,
      "step": 168680
    },
    {
      "epoch": 0.5912163938218254,
      "grad_norm": 3.265625,
      "learning_rate": 4.4607220833300887e-05,
      "loss": 0.938,
      "step": 168690
    },
    {
      "epoch": 0.5912514413287211,
      "grad_norm": 3.390625,
      "learning_rate": 4.460657180463718e-05,
      "loss": 0.9653,
      "step": 168700
    },
    {
      "epoch": 0.5912864888356166,
      "grad_norm": 2.828125,
      "learning_rate": 4.460592277597348e-05,
      "loss": 0.9419,
      "step": 168710
    },
    {
      "epoch": 0.5913215363425123,
      "grad_norm": 2.75,
      "learning_rate": 4.460527374730978e-05,
      "loss": 0.954,
      "step": 168720
    },
    {
      "epoch": 0.5913565838494079,
      "grad_norm": 2.84375,
      "learning_rate": 4.460462471864608e-05,
      "loss": 0.9465,
      "step": 168730
    },
    {
      "epoch": 0.5913916313563035,
      "grad_norm": 2.78125,
      "learning_rate": 4.460397568998237e-05,
      "loss": 0.8879,
      "step": 168740
    },
    {
      "epoch": 0.5914266788631991,
      "grad_norm": 2.96875,
      "learning_rate": 4.4603326661318675e-05,
      "loss": 1.0093,
      "step": 168750
    },
    {
      "epoch": 0.5914617263700946,
      "grad_norm": 2.984375,
      "learning_rate": 4.460267763265497e-05,
      "loss": 0.9491,
      "step": 168760
    },
    {
      "epoch": 0.5914967738769903,
      "grad_norm": 2.703125,
      "learning_rate": 4.460202860399127e-05,
      "loss": 0.8849,
      "step": 168770
    },
    {
      "epoch": 0.5915318213838858,
      "grad_norm": 3.421875,
      "learning_rate": 4.4601379575327565e-05,
      "loss": 0.9175,
      "step": 168780
    },
    {
      "epoch": 0.5915668688907815,
      "grad_norm": 2.875,
      "learning_rate": 4.4600730546663867e-05,
      "loss": 0.8693,
      "step": 168790
    },
    {
      "epoch": 0.591601916397677,
      "grad_norm": 2.828125,
      "learning_rate": 4.460008151800016e-05,
      "loss": 0.8801,
      "step": 168800
    },
    {
      "epoch": 0.5916369639045727,
      "grad_norm": 3.203125,
      "learning_rate": 4.459943248933646e-05,
      "loss": 0.9192,
      "step": 168810
    },
    {
      "epoch": 0.5916720114114682,
      "grad_norm": 2.984375,
      "learning_rate": 4.4598783460672764e-05,
      "loss": 0.9757,
      "step": 168820
    },
    {
      "epoch": 0.5917070589183638,
      "grad_norm": 2.78125,
      "learning_rate": 4.459813443200906e-05,
      "loss": 0.9619,
      "step": 168830
    },
    {
      "epoch": 0.5917421064252595,
      "grad_norm": 3.171875,
      "learning_rate": 4.459748540334536e-05,
      "loss": 0.9407,
      "step": 168840
    },
    {
      "epoch": 0.591777153932155,
      "grad_norm": 2.796875,
      "learning_rate": 4.4596836374681655e-05,
      "loss": 0.886,
      "step": 168850
    },
    {
      "epoch": 0.5918122014390507,
      "grad_norm": 3.1875,
      "learning_rate": 4.4596187346017956e-05,
      "loss": 0.8463,
      "step": 168860
    },
    {
      "epoch": 0.5918472489459462,
      "grad_norm": 3.171875,
      "learning_rate": 4.459553831735425e-05,
      "loss": 0.9751,
      "step": 168870
    },
    {
      "epoch": 0.5918822964528418,
      "grad_norm": 2.859375,
      "learning_rate": 4.459488928869055e-05,
      "loss": 0.8573,
      "step": 168880
    },
    {
      "epoch": 0.5919173439597374,
      "grad_norm": 4.40625,
      "learning_rate": 4.4594240260026847e-05,
      "loss": 0.9584,
      "step": 168890
    },
    {
      "epoch": 0.591952391466633,
      "grad_norm": 3.21875,
      "learning_rate": 4.459359123136315e-05,
      "loss": 0.8926,
      "step": 168900
    },
    {
      "epoch": 0.5919874389735286,
      "grad_norm": 2.953125,
      "learning_rate": 4.459294220269944e-05,
      "loss": 0.8483,
      "step": 168910
    },
    {
      "epoch": 0.5920224864804242,
      "grad_norm": 3.25,
      "learning_rate": 4.4592293174035744e-05,
      "loss": 1.0057,
      "step": 168920
    },
    {
      "epoch": 0.5920575339873199,
      "grad_norm": 2.671875,
      "learning_rate": 4.459164414537204e-05,
      "loss": 0.9489,
      "step": 168930
    },
    {
      "epoch": 0.5920925814942154,
      "grad_norm": 2.90625,
      "learning_rate": 4.459099511670833e-05,
      "loss": 0.919,
      "step": 168940
    },
    {
      "epoch": 0.592127629001111,
      "grad_norm": 2.84375,
      "learning_rate": 4.4590346088044635e-05,
      "loss": 0.9487,
      "step": 168950
    },
    {
      "epoch": 0.5921626765080066,
      "grad_norm": 2.578125,
      "learning_rate": 4.458969705938093e-05,
      "loss": 1.0086,
      "step": 168960
    },
    {
      "epoch": 0.5921977240149022,
      "grad_norm": 3.046875,
      "learning_rate": 4.458904803071723e-05,
      "loss": 0.8603,
      "step": 168970
    },
    {
      "epoch": 0.5922327715217978,
      "grad_norm": 2.953125,
      "learning_rate": 4.4588399002053525e-05,
      "loss": 0.9477,
      "step": 168980
    },
    {
      "epoch": 0.5922678190286934,
      "grad_norm": 3.421875,
      "learning_rate": 4.4587749973389827e-05,
      "loss": 0.892,
      "step": 168990
    },
    {
      "epoch": 0.592302866535589,
      "grad_norm": 3.203125,
      "learning_rate": 4.458710094472612e-05,
      "loss": 0.7476,
      "step": 169000
    },
    {
      "epoch": 0.5923379140424846,
      "grad_norm": 3.203125,
      "learning_rate": 4.458645191606242e-05,
      "loss": 0.8715,
      "step": 169010
    },
    {
      "epoch": 0.5923729615493802,
      "grad_norm": 3.171875,
      "learning_rate": 4.458580288739872e-05,
      "loss": 0.9542,
      "step": 169020
    },
    {
      "epoch": 0.5924080090562758,
      "grad_norm": 2.671875,
      "learning_rate": 4.458515385873502e-05,
      "loss": 0.8881,
      "step": 169030
    },
    {
      "epoch": 0.5924430565631714,
      "grad_norm": 2.609375,
      "learning_rate": 4.458450483007131e-05,
      "loss": 0.903,
      "step": 169040
    },
    {
      "epoch": 0.592478104070067,
      "grad_norm": 3.296875,
      "learning_rate": 4.4583855801407615e-05,
      "loss": 0.9092,
      "step": 169050
    },
    {
      "epoch": 0.5925131515769626,
      "grad_norm": 2.53125,
      "learning_rate": 4.4583206772743916e-05,
      "loss": 0.8942,
      "step": 169060
    },
    {
      "epoch": 0.5925481990838581,
      "grad_norm": 3.09375,
      "learning_rate": 4.458255774408021e-05,
      "loss": 0.9465,
      "step": 169070
    },
    {
      "epoch": 0.5925832465907538,
      "grad_norm": 3.453125,
      "learning_rate": 4.458190871541651e-05,
      "loss": 0.8383,
      "step": 169080
    },
    {
      "epoch": 0.5926182940976493,
      "grad_norm": 3.375,
      "learning_rate": 4.4581259686752807e-05,
      "loss": 0.914,
      "step": 169090
    },
    {
      "epoch": 0.592653341604545,
      "grad_norm": 2.96875,
      "learning_rate": 4.458061065808911e-05,
      "loss": 0.8708,
      "step": 169100
    },
    {
      "epoch": 0.5926883891114405,
      "grad_norm": 3.3125,
      "learning_rate": 4.45799616294254e-05,
      "loss": 0.9259,
      "step": 169110
    },
    {
      "epoch": 0.5927234366183362,
      "grad_norm": 3.03125,
      "learning_rate": 4.4579312600761704e-05,
      "loss": 0.8749,
      "step": 169120
    },
    {
      "epoch": 0.5927584841252318,
      "grad_norm": 2.78125,
      "learning_rate": 4.4578663572098e-05,
      "loss": 0.8468,
      "step": 169130
    },
    {
      "epoch": 0.5927935316321273,
      "grad_norm": 3.09375,
      "learning_rate": 4.45780145434343e-05,
      "loss": 0.8798,
      "step": 169140
    },
    {
      "epoch": 0.592828579139023,
      "grad_norm": 3.59375,
      "learning_rate": 4.4577365514770595e-05,
      "loss": 0.8428,
      "step": 169150
    },
    {
      "epoch": 0.5928636266459185,
      "grad_norm": 3.25,
      "learning_rate": 4.4576716486106896e-05,
      "loss": 0.8488,
      "step": 169160
    },
    {
      "epoch": 0.5928986741528142,
      "grad_norm": 2.96875,
      "learning_rate": 4.457606745744319e-05,
      "loss": 0.9779,
      "step": 169170
    },
    {
      "epoch": 0.5929337216597097,
      "grad_norm": 2.796875,
      "learning_rate": 4.457541842877949e-05,
      "loss": 0.9112,
      "step": 169180
    },
    {
      "epoch": 0.5929687691666053,
      "grad_norm": 3.078125,
      "learning_rate": 4.457476940011579e-05,
      "loss": 1.0011,
      "step": 169190
    },
    {
      "epoch": 0.5930038166735009,
      "grad_norm": 3.015625,
      "learning_rate": 4.457412037145209e-05,
      "loss": 0.9373,
      "step": 169200
    },
    {
      "epoch": 0.5930388641803965,
      "grad_norm": 2.5625,
      "learning_rate": 4.457347134278839e-05,
      "loss": 0.893,
      "step": 169210
    },
    {
      "epoch": 0.5930739116872922,
      "grad_norm": 2.984375,
      "learning_rate": 4.4572822314124684e-05,
      "loss": 0.8422,
      "step": 169220
    },
    {
      "epoch": 0.5931089591941877,
      "grad_norm": 2.453125,
      "learning_rate": 4.4572173285460985e-05,
      "loss": 0.8714,
      "step": 169230
    },
    {
      "epoch": 0.5931440067010834,
      "grad_norm": 2.609375,
      "learning_rate": 4.457152425679728e-05,
      "loss": 0.9188,
      "step": 169240
    },
    {
      "epoch": 0.5931790542079789,
      "grad_norm": 2.953125,
      "learning_rate": 4.457087522813358e-05,
      "loss": 0.9211,
      "step": 169250
    },
    {
      "epoch": 0.5932141017148745,
      "grad_norm": 2.484375,
      "learning_rate": 4.4570226199469876e-05,
      "loss": 0.9584,
      "step": 169260
    },
    {
      "epoch": 0.5932491492217701,
      "grad_norm": 3.4375,
      "learning_rate": 4.456957717080618e-05,
      "loss": 0.8895,
      "step": 169270
    },
    {
      "epoch": 0.5932841967286657,
      "grad_norm": 3.0625,
      "learning_rate": 4.456892814214247e-05,
      "loss": 0.877,
      "step": 169280
    },
    {
      "epoch": 0.5933192442355613,
      "grad_norm": 3.53125,
      "learning_rate": 4.456827911347877e-05,
      "loss": 0.9124,
      "step": 169290
    },
    {
      "epoch": 0.5933542917424569,
      "grad_norm": 3.578125,
      "learning_rate": 4.456763008481507e-05,
      "loss": 0.9237,
      "step": 169300
    },
    {
      "epoch": 0.5933893392493524,
      "grad_norm": 3.1875,
      "learning_rate": 4.456698105615136e-05,
      "loss": 0.9036,
      "step": 169310
    },
    {
      "epoch": 0.5934243867562481,
      "grad_norm": 3.09375,
      "learning_rate": 4.4566332027487664e-05,
      "loss": 0.9563,
      "step": 169320
    },
    {
      "epoch": 0.5934594342631437,
      "grad_norm": 3.03125,
      "learning_rate": 4.456568299882396e-05,
      "loss": 0.967,
      "step": 169330
    },
    {
      "epoch": 0.5934944817700393,
      "grad_norm": 3.3125,
      "learning_rate": 4.456503397016026e-05,
      "loss": 0.9425,
      "step": 169340
    },
    {
      "epoch": 0.5935295292769349,
      "grad_norm": 3.1875,
      "learning_rate": 4.4564384941496555e-05,
      "loss": 0.8931,
      "step": 169350
    },
    {
      "epoch": 0.5935645767838305,
      "grad_norm": 3.0,
      "learning_rate": 4.4563735912832856e-05,
      "loss": 0.8831,
      "step": 169360
    },
    {
      "epoch": 0.5935996242907261,
      "grad_norm": 3.171875,
      "learning_rate": 4.456308688416915e-05,
      "loss": 0.8984,
      "step": 169370
    },
    {
      "epoch": 0.5936346717976216,
      "grad_norm": 3.046875,
      "learning_rate": 4.456243785550545e-05,
      "loss": 0.8765,
      "step": 169380
    },
    {
      "epoch": 0.5936697193045173,
      "grad_norm": 3.21875,
      "learning_rate": 4.4561788826841747e-05,
      "loss": 0.9946,
      "step": 169390
    },
    {
      "epoch": 0.5937047668114128,
      "grad_norm": 3.1875,
      "learning_rate": 4.456113979817805e-05,
      "loss": 0.9602,
      "step": 169400
    },
    {
      "epoch": 0.5937398143183085,
      "grad_norm": 3.28125,
      "learning_rate": 4.456049076951434e-05,
      "loss": 0.8319,
      "step": 169410
    },
    {
      "epoch": 0.5937748618252041,
      "grad_norm": 3.125,
      "learning_rate": 4.4559841740850644e-05,
      "loss": 0.908,
      "step": 169420
    },
    {
      "epoch": 0.5938099093320997,
      "grad_norm": 2.71875,
      "learning_rate": 4.4559192712186945e-05,
      "loss": 0.8829,
      "step": 169430
    },
    {
      "epoch": 0.5938449568389953,
      "grad_norm": 3.125,
      "learning_rate": 4.455854368352324e-05,
      "loss": 0.8872,
      "step": 169440
    },
    {
      "epoch": 0.5938800043458908,
      "grad_norm": 3.03125,
      "learning_rate": 4.455789465485954e-05,
      "loss": 0.925,
      "step": 169450
    },
    {
      "epoch": 0.5939150518527865,
      "grad_norm": 3.890625,
      "learning_rate": 4.4557245626195836e-05,
      "loss": 0.9329,
      "step": 169460
    },
    {
      "epoch": 0.593950099359682,
      "grad_norm": 2.96875,
      "learning_rate": 4.455659659753214e-05,
      "loss": 0.9313,
      "step": 169470
    },
    {
      "epoch": 0.5939851468665777,
      "grad_norm": 2.734375,
      "learning_rate": 4.455594756886843e-05,
      "loss": 0.826,
      "step": 169480
    },
    {
      "epoch": 0.5940201943734732,
      "grad_norm": 3.046875,
      "learning_rate": 4.455529854020473e-05,
      "loss": 0.903,
      "step": 169490
    },
    {
      "epoch": 0.5940552418803688,
      "grad_norm": 3.125,
      "learning_rate": 4.455464951154103e-05,
      "loss": 0.8197,
      "step": 169500
    },
    {
      "epoch": 0.5940902893872645,
      "grad_norm": 2.984375,
      "learning_rate": 4.455400048287733e-05,
      "loss": 0.9881,
      "step": 169510
    },
    {
      "epoch": 0.59412533689416,
      "grad_norm": 2.453125,
      "learning_rate": 4.4553351454213624e-05,
      "loss": 0.8734,
      "step": 169520
    },
    {
      "epoch": 0.5941603844010557,
      "grad_norm": 3.171875,
      "learning_rate": 4.4552702425549925e-05,
      "loss": 0.9717,
      "step": 169530
    },
    {
      "epoch": 0.5941954319079512,
      "grad_norm": 2.984375,
      "learning_rate": 4.455205339688622e-05,
      "loss": 0.8785,
      "step": 169540
    },
    {
      "epoch": 0.5942304794148469,
      "grad_norm": 2.703125,
      "learning_rate": 4.455140436822252e-05,
      "loss": 0.8693,
      "step": 169550
    },
    {
      "epoch": 0.5942655269217424,
      "grad_norm": 2.875,
      "learning_rate": 4.455075533955882e-05,
      "loss": 0.8288,
      "step": 169560
    },
    {
      "epoch": 0.594300574428638,
      "grad_norm": 2.734375,
      "learning_rate": 4.455010631089512e-05,
      "loss": 0.93,
      "step": 169570
    },
    {
      "epoch": 0.5943356219355336,
      "grad_norm": 2.578125,
      "learning_rate": 4.454945728223142e-05,
      "loss": 0.8816,
      "step": 169580
    },
    {
      "epoch": 0.5943706694424292,
      "grad_norm": 2.734375,
      "learning_rate": 4.454880825356771e-05,
      "loss": 1.0087,
      "step": 169590
    },
    {
      "epoch": 0.5944057169493248,
      "grad_norm": 3.015625,
      "learning_rate": 4.4548159224904015e-05,
      "loss": 0.9832,
      "step": 169600
    },
    {
      "epoch": 0.5944407644562204,
      "grad_norm": 3.15625,
      "learning_rate": 4.454751019624031e-05,
      "loss": 0.9217,
      "step": 169610
    },
    {
      "epoch": 0.594475811963116,
      "grad_norm": 3.046875,
      "learning_rate": 4.454686116757661e-05,
      "loss": 0.9223,
      "step": 169620
    },
    {
      "epoch": 0.5945108594700116,
      "grad_norm": 2.921875,
      "learning_rate": 4.4546212138912905e-05,
      "loss": 0.949,
      "step": 169630
    },
    {
      "epoch": 0.5945459069769072,
      "grad_norm": 3.0625,
      "learning_rate": 4.454556311024921e-05,
      "loss": 0.9494,
      "step": 169640
    },
    {
      "epoch": 0.5945809544838028,
      "grad_norm": 3.25,
      "learning_rate": 4.45449140815855e-05,
      "loss": 0.8799,
      "step": 169650
    },
    {
      "epoch": 0.5946160019906984,
      "grad_norm": 3.15625,
      "learning_rate": 4.45442650529218e-05,
      "loss": 0.9829,
      "step": 169660
    },
    {
      "epoch": 0.594651049497594,
      "grad_norm": 2.796875,
      "learning_rate": 4.45436160242581e-05,
      "loss": 0.8422,
      "step": 169670
    },
    {
      "epoch": 0.5946860970044896,
      "grad_norm": 2.671875,
      "learning_rate": 4.454296699559439e-05,
      "loss": 0.8506,
      "step": 169680
    },
    {
      "epoch": 0.5947211445113851,
      "grad_norm": 3.015625,
      "learning_rate": 4.454231796693069e-05,
      "loss": 0.8646,
      "step": 169690
    },
    {
      "epoch": 0.5947561920182808,
      "grad_norm": 3.203125,
      "learning_rate": 4.454166893826699e-05,
      "loss": 0.8885,
      "step": 169700
    },
    {
      "epoch": 0.5947912395251764,
      "grad_norm": 3.078125,
      "learning_rate": 4.454101990960329e-05,
      "loss": 0.9793,
      "step": 169710
    },
    {
      "epoch": 0.594826287032072,
      "grad_norm": 2.953125,
      "learning_rate": 4.4540370880939584e-05,
      "loss": 0.8694,
      "step": 169720
    },
    {
      "epoch": 0.5948613345389676,
      "grad_norm": 3.125,
      "learning_rate": 4.4539721852275885e-05,
      "loss": 0.9416,
      "step": 169730
    },
    {
      "epoch": 0.5948963820458631,
      "grad_norm": 2.734375,
      "learning_rate": 4.453907282361218e-05,
      "loss": 0.856,
      "step": 169740
    },
    {
      "epoch": 0.5949314295527588,
      "grad_norm": 3.140625,
      "learning_rate": 4.453842379494848e-05,
      "loss": 0.9692,
      "step": 169750
    },
    {
      "epoch": 0.5949664770596543,
      "grad_norm": 2.96875,
      "learning_rate": 4.4537774766284776e-05,
      "loss": 0.9408,
      "step": 169760
    },
    {
      "epoch": 0.59500152456655,
      "grad_norm": 2.96875,
      "learning_rate": 4.453712573762108e-05,
      "loss": 0.94,
      "step": 169770
    },
    {
      "epoch": 0.5950365720734455,
      "grad_norm": 3.578125,
      "learning_rate": 4.453647670895738e-05,
      "loss": 0.957,
      "step": 169780
    },
    {
      "epoch": 0.5950716195803412,
      "grad_norm": 2.765625,
      "learning_rate": 4.453582768029367e-05,
      "loss": 0.9209,
      "step": 169790
    },
    {
      "epoch": 0.5951066670872367,
      "grad_norm": 3.109375,
      "learning_rate": 4.4535178651629975e-05,
      "loss": 0.9401,
      "step": 169800
    },
    {
      "epoch": 0.5951417145941323,
      "grad_norm": 2.796875,
      "learning_rate": 4.453452962296627e-05,
      "loss": 0.9426,
      "step": 169810
    },
    {
      "epoch": 0.595176762101028,
      "grad_norm": 3.0625,
      "learning_rate": 4.453388059430257e-05,
      "loss": 0.9633,
      "step": 169820
    },
    {
      "epoch": 0.5952118096079235,
      "grad_norm": 3.328125,
      "learning_rate": 4.4533231565638865e-05,
      "loss": 0.912,
      "step": 169830
    },
    {
      "epoch": 0.5952468571148192,
      "grad_norm": 3.171875,
      "learning_rate": 4.453258253697517e-05,
      "loss": 0.9417,
      "step": 169840
    },
    {
      "epoch": 0.5952819046217147,
      "grad_norm": 3.109375,
      "learning_rate": 4.453193350831146e-05,
      "loss": 0.9675,
      "step": 169850
    },
    {
      "epoch": 0.5953169521286104,
      "grad_norm": 2.859375,
      "learning_rate": 4.453128447964776e-05,
      "loss": 0.8699,
      "step": 169860
    },
    {
      "epoch": 0.5953519996355059,
      "grad_norm": 2.890625,
      "learning_rate": 4.453063545098406e-05,
      "loss": 0.8948,
      "step": 169870
    },
    {
      "epoch": 0.5953870471424015,
      "grad_norm": 3.390625,
      "learning_rate": 4.452998642232036e-05,
      "loss": 0.9952,
      "step": 169880
    },
    {
      "epoch": 0.5954220946492971,
      "grad_norm": 3.125,
      "learning_rate": 4.452933739365665e-05,
      "loss": 0.8835,
      "step": 169890
    },
    {
      "epoch": 0.5954571421561927,
      "grad_norm": 2.734375,
      "learning_rate": 4.4528688364992955e-05,
      "loss": 0.854,
      "step": 169900
    },
    {
      "epoch": 0.5954921896630884,
      "grad_norm": 3.359375,
      "learning_rate": 4.452803933632925e-05,
      "loss": 0.9366,
      "step": 169910
    },
    {
      "epoch": 0.5955272371699839,
      "grad_norm": 2.578125,
      "learning_rate": 4.452739030766555e-05,
      "loss": 0.9991,
      "step": 169920
    },
    {
      "epoch": 0.5955622846768795,
      "grad_norm": 3.046875,
      "learning_rate": 4.452674127900185e-05,
      "loss": 0.8569,
      "step": 169930
    },
    {
      "epoch": 0.5955973321837751,
      "grad_norm": 3.046875,
      "learning_rate": 4.452609225033815e-05,
      "loss": 0.9085,
      "step": 169940
    },
    {
      "epoch": 0.5956323796906707,
      "grad_norm": 2.703125,
      "learning_rate": 4.452544322167445e-05,
      "loss": 0.951,
      "step": 169950
    },
    {
      "epoch": 0.5956674271975663,
      "grad_norm": 3.015625,
      "learning_rate": 4.452479419301074e-05,
      "loss": 0.9521,
      "step": 169960
    },
    {
      "epoch": 0.5957024747044619,
      "grad_norm": 2.796875,
      "learning_rate": 4.4524145164347044e-05,
      "loss": 0.9474,
      "step": 169970
    },
    {
      "epoch": 0.5957375222113575,
      "grad_norm": 2.859375,
      "learning_rate": 4.452349613568334e-05,
      "loss": 0.9086,
      "step": 169980
    },
    {
      "epoch": 0.5957725697182531,
      "grad_norm": 2.921875,
      "learning_rate": 4.452284710701964e-05,
      "loss": 0.8863,
      "step": 169990
    },
    {
      "epoch": 0.5958076172251486,
      "grad_norm": 3.1875,
      "learning_rate": 4.4522198078355935e-05,
      "loss": 0.9515,
      "step": 170000
    },
    {
      "epoch": 0.5958076172251486,
      "eval_loss": 0.8590320348739624,
      "eval_runtime": 553.3297,
      "eval_samples_per_second": 687.539,
      "eval_steps_per_second": 57.295,
      "step": 170000
    },
    {
      "epoch": 0.5958426647320443,
      "grad_norm": 3.265625,
      "learning_rate": 4.4521549049692236e-05,
      "loss": 0.8856,
      "step": 170010
    },
    {
      "epoch": 0.5958777122389399,
      "grad_norm": 3.0,
      "learning_rate": 4.452090002102853e-05,
      "loss": 0.9041,
      "step": 170020
    },
    {
      "epoch": 0.5959127597458355,
      "grad_norm": 2.703125,
      "learning_rate": 4.452025099236483e-05,
      "loss": 0.8464,
      "step": 170030
    },
    {
      "epoch": 0.5959478072527311,
      "grad_norm": 3.125,
      "learning_rate": 4.451960196370113e-05,
      "loss": 0.8989,
      "step": 170040
    },
    {
      "epoch": 0.5959828547596266,
      "grad_norm": 3.046875,
      "learning_rate": 4.451895293503742e-05,
      "loss": 0.9271,
      "step": 170050
    },
    {
      "epoch": 0.5960179022665223,
      "grad_norm": 2.78125,
      "learning_rate": 4.451830390637372e-05,
      "loss": 0.8767,
      "step": 170060
    },
    {
      "epoch": 0.5960529497734178,
      "grad_norm": 2.796875,
      "learning_rate": 4.451765487771002e-05,
      "loss": 0.8969,
      "step": 170070
    },
    {
      "epoch": 0.5960879972803135,
      "grad_norm": 3.25,
      "learning_rate": 4.451700584904632e-05,
      "loss": 0.9844,
      "step": 170080
    },
    {
      "epoch": 0.596123044787209,
      "grad_norm": 2.75,
      "learning_rate": 4.451635682038261e-05,
      "loss": 0.8615,
      "step": 170090
    },
    {
      "epoch": 0.5961580922941047,
      "grad_norm": 3.046875,
      "learning_rate": 4.4515707791718915e-05,
      "loss": 0.875,
      "step": 170100
    },
    {
      "epoch": 0.5961931398010003,
      "grad_norm": 3.0625,
      "learning_rate": 4.451505876305521e-05,
      "loss": 1.025,
      "step": 170110
    },
    {
      "epoch": 0.5962281873078958,
      "grad_norm": 2.6875,
      "learning_rate": 4.451440973439151e-05,
      "loss": 0.8571,
      "step": 170120
    },
    {
      "epoch": 0.5962632348147915,
      "grad_norm": 2.9375,
      "learning_rate": 4.4513760705727805e-05,
      "loss": 0.9484,
      "step": 170130
    },
    {
      "epoch": 0.596298282321687,
      "grad_norm": 3.1875,
      "learning_rate": 4.451311167706411e-05,
      "loss": 0.9082,
      "step": 170140
    },
    {
      "epoch": 0.5963333298285827,
      "grad_norm": 3.03125,
      "learning_rate": 4.451246264840041e-05,
      "loss": 0.8481,
      "step": 170150
    },
    {
      "epoch": 0.5963683773354782,
      "grad_norm": 2.890625,
      "learning_rate": 4.45118136197367e-05,
      "loss": 0.949,
      "step": 170160
    },
    {
      "epoch": 0.5964034248423739,
      "grad_norm": 2.6875,
      "learning_rate": 4.4511164591073004e-05,
      "loss": 0.9328,
      "step": 170170
    },
    {
      "epoch": 0.5964384723492694,
      "grad_norm": 2.53125,
      "learning_rate": 4.45105155624093e-05,
      "loss": 0.9011,
      "step": 170180
    },
    {
      "epoch": 0.596473519856165,
      "grad_norm": 2.828125,
      "learning_rate": 4.45098665337456e-05,
      "loss": 0.8436,
      "step": 170190
    },
    {
      "epoch": 0.5965085673630607,
      "grad_norm": 2.65625,
      "learning_rate": 4.4509217505081895e-05,
      "loss": 0.9093,
      "step": 170200
    },
    {
      "epoch": 0.5965436148699562,
      "grad_norm": 2.984375,
      "learning_rate": 4.4508568476418196e-05,
      "loss": 0.9175,
      "step": 170210
    },
    {
      "epoch": 0.5965786623768519,
      "grad_norm": 3.125,
      "learning_rate": 4.450791944775449e-05,
      "loss": 0.9463,
      "step": 170220
    },
    {
      "epoch": 0.5966137098837474,
      "grad_norm": 2.640625,
      "learning_rate": 4.450727041909079e-05,
      "loss": 0.985,
      "step": 170230
    },
    {
      "epoch": 0.596648757390643,
      "grad_norm": 3.203125,
      "learning_rate": 4.450662139042709e-05,
      "loss": 0.9695,
      "step": 170240
    },
    {
      "epoch": 0.5966838048975386,
      "grad_norm": 2.765625,
      "learning_rate": 4.450597236176339e-05,
      "loss": 0.8091,
      "step": 170250
    },
    {
      "epoch": 0.5967188524044342,
      "grad_norm": 2.5,
      "learning_rate": 4.450532333309968e-05,
      "loss": 0.8313,
      "step": 170260
    },
    {
      "epoch": 0.5967538999113298,
      "grad_norm": 2.78125,
      "learning_rate": 4.4504674304435984e-05,
      "loss": 0.9536,
      "step": 170270
    },
    {
      "epoch": 0.5967889474182254,
      "grad_norm": 3.34375,
      "learning_rate": 4.450402527577228e-05,
      "loss": 1.0061,
      "step": 170280
    },
    {
      "epoch": 0.596823994925121,
      "grad_norm": 3.359375,
      "learning_rate": 4.450337624710858e-05,
      "loss": 0.9253,
      "step": 170290
    },
    {
      "epoch": 0.5968590424320166,
      "grad_norm": 3.21875,
      "learning_rate": 4.450272721844488e-05,
      "loss": 0.8862,
      "step": 170300
    },
    {
      "epoch": 0.5968940899389122,
      "grad_norm": 2.78125,
      "learning_rate": 4.4502078189781176e-05,
      "loss": 0.8695,
      "step": 170310
    },
    {
      "epoch": 0.5969291374458078,
      "grad_norm": 3.578125,
      "learning_rate": 4.450142916111748e-05,
      "loss": 0.8956,
      "step": 170320
    },
    {
      "epoch": 0.5969641849527034,
      "grad_norm": 3.234375,
      "learning_rate": 4.450078013245377e-05,
      "loss": 0.9367,
      "step": 170330
    },
    {
      "epoch": 0.596999232459599,
      "grad_norm": 2.703125,
      "learning_rate": 4.4500131103790073e-05,
      "loss": 0.9452,
      "step": 170340
    },
    {
      "epoch": 0.5970342799664946,
      "grad_norm": 3.0,
      "learning_rate": 4.449948207512637e-05,
      "loss": 1.0219,
      "step": 170350
    },
    {
      "epoch": 0.5970693274733901,
      "grad_norm": 2.765625,
      "learning_rate": 4.449883304646267e-05,
      "loss": 0.9141,
      "step": 170360
    },
    {
      "epoch": 0.5971043749802858,
      "grad_norm": 3.09375,
      "learning_rate": 4.4498184017798964e-05,
      "loss": 0.9697,
      "step": 170370
    },
    {
      "epoch": 0.5971394224871813,
      "grad_norm": 3.21875,
      "learning_rate": 4.4497534989135265e-05,
      "loss": 0.9228,
      "step": 170380
    },
    {
      "epoch": 0.597174469994077,
      "grad_norm": 3.234375,
      "learning_rate": 4.449688596047156e-05,
      "loss": 0.8675,
      "step": 170390
    },
    {
      "epoch": 0.5972095175009726,
      "grad_norm": 3.125,
      "learning_rate": 4.449623693180786e-05,
      "loss": 0.9689,
      "step": 170400
    },
    {
      "epoch": 0.5972445650078682,
      "grad_norm": 3.234375,
      "learning_rate": 4.4495587903144156e-05,
      "loss": 0.9218,
      "step": 170410
    },
    {
      "epoch": 0.5972796125147638,
      "grad_norm": 2.84375,
      "learning_rate": 4.449493887448046e-05,
      "loss": 0.9874,
      "step": 170420
    },
    {
      "epoch": 0.5973146600216593,
      "grad_norm": 2.984375,
      "learning_rate": 4.449428984581675e-05,
      "loss": 0.8976,
      "step": 170430
    },
    {
      "epoch": 0.597349707528555,
      "grad_norm": 3.0625,
      "learning_rate": 4.449364081715305e-05,
      "loss": 0.9583,
      "step": 170440
    },
    {
      "epoch": 0.5973847550354505,
      "grad_norm": 3.109375,
      "learning_rate": 4.449299178848935e-05,
      "loss": 0.9188,
      "step": 170450
    },
    {
      "epoch": 0.5974198025423462,
      "grad_norm": 2.34375,
      "learning_rate": 4.449234275982564e-05,
      "loss": 0.8616,
      "step": 170460
    },
    {
      "epoch": 0.5974548500492417,
      "grad_norm": 2.53125,
      "learning_rate": 4.4491693731161944e-05,
      "loss": 0.8752,
      "step": 170470
    },
    {
      "epoch": 0.5974898975561374,
      "grad_norm": 3.171875,
      "learning_rate": 4.449104470249824e-05,
      "loss": 0.8903,
      "step": 170480
    },
    {
      "epoch": 0.5975249450630329,
      "grad_norm": 3.1875,
      "learning_rate": 4.449039567383454e-05,
      "loss": 0.9205,
      "step": 170490
    },
    {
      "epoch": 0.5975599925699285,
      "grad_norm": 2.796875,
      "learning_rate": 4.4489746645170835e-05,
      "loss": 0.9034,
      "step": 170500
    },
    {
      "epoch": 0.5975950400768242,
      "grad_norm": 3.03125,
      "learning_rate": 4.4489097616507136e-05,
      "loss": 0.9866,
      "step": 170510
    },
    {
      "epoch": 0.5976300875837197,
      "grad_norm": 3.125,
      "learning_rate": 4.448844858784344e-05,
      "loss": 0.8627,
      "step": 170520
    },
    {
      "epoch": 0.5976651350906154,
      "grad_norm": 2.890625,
      "learning_rate": 4.448779955917973e-05,
      "loss": 0.9858,
      "step": 170530
    },
    {
      "epoch": 0.5977001825975109,
      "grad_norm": 3.34375,
      "learning_rate": 4.4487150530516033e-05,
      "loss": 0.9058,
      "step": 170540
    },
    {
      "epoch": 0.5977352301044065,
      "grad_norm": 2.890625,
      "learning_rate": 4.448650150185233e-05,
      "loss": 0.937,
      "step": 170550
    },
    {
      "epoch": 0.5977702776113021,
      "grad_norm": 2.6875,
      "learning_rate": 4.448585247318863e-05,
      "loss": 0.901,
      "step": 170560
    },
    {
      "epoch": 0.5978053251181977,
      "grad_norm": 2.890625,
      "learning_rate": 4.4485203444524924e-05,
      "loss": 0.9431,
      "step": 170570
    },
    {
      "epoch": 0.5978403726250933,
      "grad_norm": 2.921875,
      "learning_rate": 4.4484554415861225e-05,
      "loss": 0.9811,
      "step": 170580
    },
    {
      "epoch": 0.5978754201319889,
      "grad_norm": 2.75,
      "learning_rate": 4.448390538719752e-05,
      "loss": 0.9327,
      "step": 170590
    },
    {
      "epoch": 0.5979104676388846,
      "grad_norm": 2.96875,
      "learning_rate": 4.448325635853382e-05,
      "loss": 1.0062,
      "step": 170600
    },
    {
      "epoch": 0.5979455151457801,
      "grad_norm": 3.0,
      "learning_rate": 4.4482607329870116e-05,
      "loss": 0.9194,
      "step": 170610
    },
    {
      "epoch": 0.5979805626526757,
      "grad_norm": 3.09375,
      "learning_rate": 4.448195830120642e-05,
      "loss": 0.9064,
      "step": 170620
    },
    {
      "epoch": 0.5980156101595713,
      "grad_norm": 3.015625,
      "learning_rate": 4.448130927254271e-05,
      "loss": 0.9664,
      "step": 170630
    },
    {
      "epoch": 0.5980506576664669,
      "grad_norm": 3.15625,
      "learning_rate": 4.4480660243879013e-05,
      "loss": 0.9265,
      "step": 170640
    },
    {
      "epoch": 0.5980857051733625,
      "grad_norm": 2.8125,
      "learning_rate": 4.4480011215215315e-05,
      "loss": 0.9256,
      "step": 170650
    },
    {
      "epoch": 0.5981207526802581,
      "grad_norm": 2.84375,
      "learning_rate": 4.447936218655161e-05,
      "loss": 0.7945,
      "step": 170660
    },
    {
      "epoch": 0.5981558001871536,
      "grad_norm": 3.109375,
      "learning_rate": 4.447871315788791e-05,
      "loss": 0.915,
      "step": 170670
    },
    {
      "epoch": 0.5981908476940493,
      "grad_norm": 3.046875,
      "learning_rate": 4.4478064129224205e-05,
      "loss": 0.921,
      "step": 170680
    },
    {
      "epoch": 0.5982258952009449,
      "grad_norm": 3.359375,
      "learning_rate": 4.447741510056051e-05,
      "loss": 0.9483,
      "step": 170690
    },
    {
      "epoch": 0.5982609427078405,
      "grad_norm": 3.515625,
      "learning_rate": 4.44767660718968e-05,
      "loss": 0.9308,
      "step": 170700
    },
    {
      "epoch": 0.5982959902147361,
      "grad_norm": 2.71875,
      "learning_rate": 4.44761170432331e-05,
      "loss": 0.8487,
      "step": 170710
    },
    {
      "epoch": 0.5983310377216317,
      "grad_norm": 3.453125,
      "learning_rate": 4.44754680145694e-05,
      "loss": 0.9017,
      "step": 170720
    },
    {
      "epoch": 0.5983660852285273,
      "grad_norm": 3.171875,
      "learning_rate": 4.44748189859057e-05,
      "loss": 0.9244,
      "step": 170730
    },
    {
      "epoch": 0.5984011327354228,
      "grad_norm": 2.875,
      "learning_rate": 4.4474169957241993e-05,
      "loss": 1.0269,
      "step": 170740
    },
    {
      "epoch": 0.5984361802423185,
      "grad_norm": 2.8125,
      "learning_rate": 4.4473520928578295e-05,
      "loss": 0.8451,
      "step": 170750
    },
    {
      "epoch": 0.598471227749214,
      "grad_norm": 2.453125,
      "learning_rate": 4.447287189991459e-05,
      "loss": 0.9048,
      "step": 170760
    },
    {
      "epoch": 0.5985062752561097,
      "grad_norm": 2.828125,
      "learning_rate": 4.447222287125089e-05,
      "loss": 0.8394,
      "step": 170770
    },
    {
      "epoch": 0.5985413227630052,
      "grad_norm": 2.671875,
      "learning_rate": 4.4471573842587185e-05,
      "loss": 0.9239,
      "step": 170780
    },
    {
      "epoch": 0.5985763702699008,
      "grad_norm": 3.1875,
      "learning_rate": 4.447092481392349e-05,
      "loss": 0.8875,
      "step": 170790
    },
    {
      "epoch": 0.5986114177767965,
      "grad_norm": 3.265625,
      "learning_rate": 4.447027578525979e-05,
      "loss": 0.9734,
      "step": 170800
    },
    {
      "epoch": 0.598646465283692,
      "grad_norm": 3.25,
      "learning_rate": 4.4469626756596076e-05,
      "loss": 0.9707,
      "step": 170810
    },
    {
      "epoch": 0.5986815127905877,
      "grad_norm": 3.171875,
      "learning_rate": 4.446897772793238e-05,
      "loss": 0.9933,
      "step": 170820
    },
    {
      "epoch": 0.5987165602974832,
      "grad_norm": 2.8125,
      "learning_rate": 4.446832869926867e-05,
      "loss": 0.9203,
      "step": 170830
    },
    {
      "epoch": 0.5987516078043789,
      "grad_norm": 3.109375,
      "learning_rate": 4.4467679670604973e-05,
      "loss": 0.875,
      "step": 170840
    },
    {
      "epoch": 0.5987866553112744,
      "grad_norm": 2.671875,
      "learning_rate": 4.446703064194127e-05,
      "loss": 0.8329,
      "step": 170850
    },
    {
      "epoch": 0.59882170281817,
      "grad_norm": 2.921875,
      "learning_rate": 4.446638161327757e-05,
      "loss": 0.8936,
      "step": 170860
    },
    {
      "epoch": 0.5988567503250656,
      "grad_norm": 2.484375,
      "learning_rate": 4.4465732584613864e-05,
      "loss": 0.94,
      "step": 170870
    },
    {
      "epoch": 0.5988917978319612,
      "grad_norm": 3.078125,
      "learning_rate": 4.4465083555950165e-05,
      "loss": 0.8609,
      "step": 170880
    },
    {
      "epoch": 0.5989268453388569,
      "grad_norm": 3.515625,
      "learning_rate": 4.446443452728647e-05,
      "loss": 0.9586,
      "step": 170890
    },
    {
      "epoch": 0.5989618928457524,
      "grad_norm": 2.859375,
      "learning_rate": 4.446378549862276e-05,
      "loss": 0.8766,
      "step": 170900
    },
    {
      "epoch": 0.598996940352648,
      "grad_norm": 3.0,
      "learning_rate": 4.446313646995906e-05,
      "loss": 0.9656,
      "step": 170910
    },
    {
      "epoch": 0.5990319878595436,
      "grad_norm": 3.359375,
      "learning_rate": 4.446248744129536e-05,
      "loss": 0.9417,
      "step": 170920
    },
    {
      "epoch": 0.5990670353664392,
      "grad_norm": 2.53125,
      "learning_rate": 4.446183841263166e-05,
      "loss": 0.8615,
      "step": 170930
    },
    {
      "epoch": 0.5991020828733348,
      "grad_norm": 2.6875,
      "learning_rate": 4.4461189383967953e-05,
      "loss": 0.9075,
      "step": 170940
    },
    {
      "epoch": 0.5991371303802304,
      "grad_norm": 3.15625,
      "learning_rate": 4.4460540355304255e-05,
      "loss": 0.9195,
      "step": 170950
    },
    {
      "epoch": 0.599172177887126,
      "grad_norm": 2.65625,
      "learning_rate": 4.445989132664055e-05,
      "loss": 0.9168,
      "step": 170960
    },
    {
      "epoch": 0.5992072253940216,
      "grad_norm": 2.9375,
      "learning_rate": 4.445924229797685e-05,
      "loss": 0.8663,
      "step": 170970
    },
    {
      "epoch": 0.5992422729009171,
      "grad_norm": 2.765625,
      "learning_rate": 4.4458593269313145e-05,
      "loss": 0.8462,
      "step": 170980
    },
    {
      "epoch": 0.5992773204078128,
      "grad_norm": 2.90625,
      "learning_rate": 4.445794424064945e-05,
      "loss": 0.842,
      "step": 170990
    },
    {
      "epoch": 0.5993123679147084,
      "grad_norm": 3.5625,
      "learning_rate": 4.445729521198574e-05,
      "loss": 0.8982,
      "step": 171000
    },
    {
      "epoch": 0.599347415421604,
      "grad_norm": 3.015625,
      "learning_rate": 4.445664618332204e-05,
      "loss": 0.9209,
      "step": 171010
    },
    {
      "epoch": 0.5993824629284996,
      "grad_norm": 3.171875,
      "learning_rate": 4.4455997154658344e-05,
      "loss": 0.9529,
      "step": 171020
    },
    {
      "epoch": 0.5994175104353952,
      "grad_norm": 3.140625,
      "learning_rate": 4.445534812599464e-05,
      "loss": 0.933,
      "step": 171030
    },
    {
      "epoch": 0.5994525579422908,
      "grad_norm": 2.8125,
      "learning_rate": 4.445469909733094e-05,
      "loss": 0.9693,
      "step": 171040
    },
    {
      "epoch": 0.5994876054491863,
      "grad_norm": 2.734375,
      "learning_rate": 4.4454050068667235e-05,
      "loss": 0.9025,
      "step": 171050
    },
    {
      "epoch": 0.599522652956082,
      "grad_norm": 2.984375,
      "learning_rate": 4.4453401040003536e-05,
      "loss": 0.859,
      "step": 171060
    },
    {
      "epoch": 0.5995577004629775,
      "grad_norm": 2.875,
      "learning_rate": 4.445275201133983e-05,
      "loss": 0.8874,
      "step": 171070
    },
    {
      "epoch": 0.5995927479698732,
      "grad_norm": 3.171875,
      "learning_rate": 4.445210298267613e-05,
      "loss": 0.9729,
      "step": 171080
    },
    {
      "epoch": 0.5996277954767688,
      "grad_norm": 2.6875,
      "learning_rate": 4.445145395401243e-05,
      "loss": 0.8228,
      "step": 171090
    },
    {
      "epoch": 0.5996628429836643,
      "grad_norm": 2.875,
      "learning_rate": 4.445080492534873e-05,
      "loss": 0.9173,
      "step": 171100
    },
    {
      "epoch": 0.59969789049056,
      "grad_norm": 2.546875,
      "learning_rate": 4.445015589668502e-05,
      "loss": 0.8374,
      "step": 171110
    },
    {
      "epoch": 0.5997329379974555,
      "grad_norm": 2.9375,
      "learning_rate": 4.4449506868021324e-05,
      "loss": 0.9511,
      "step": 171120
    },
    {
      "epoch": 0.5997679855043512,
      "grad_norm": 2.90625,
      "learning_rate": 4.444885783935762e-05,
      "loss": 0.8065,
      "step": 171130
    },
    {
      "epoch": 0.5998030330112467,
      "grad_norm": 3.109375,
      "learning_rate": 4.444820881069392e-05,
      "loss": 0.8602,
      "step": 171140
    },
    {
      "epoch": 0.5998380805181424,
      "grad_norm": 2.734375,
      "learning_rate": 4.4447559782030215e-05,
      "loss": 0.8928,
      "step": 171150
    },
    {
      "epoch": 0.5998731280250379,
      "grad_norm": 3.046875,
      "learning_rate": 4.4446910753366516e-05,
      "loss": 0.8821,
      "step": 171160
    },
    {
      "epoch": 0.5999081755319335,
      "grad_norm": 3.03125,
      "learning_rate": 4.444626172470282e-05,
      "loss": 0.9538,
      "step": 171170
    },
    {
      "epoch": 0.5999432230388292,
      "grad_norm": 2.96875,
      "learning_rate": 4.4445612696039105e-05,
      "loss": 0.986,
      "step": 171180
    },
    {
      "epoch": 0.5999782705457247,
      "grad_norm": 3.0,
      "learning_rate": 4.444496366737541e-05,
      "loss": 0.8972,
      "step": 171190
    },
    {
      "epoch": 0.6000133180526204,
      "grad_norm": 2.671875,
      "learning_rate": 4.44443146387117e-05,
      "loss": 0.8963,
      "step": 171200
    },
    {
      "epoch": 0.6000483655595159,
      "grad_norm": 2.953125,
      "learning_rate": 4.4443665610048e-05,
      "loss": 0.9297,
      "step": 171210
    },
    {
      "epoch": 0.6000834130664116,
      "grad_norm": 3.078125,
      "learning_rate": 4.44430165813843e-05,
      "loss": 0.8665,
      "step": 171220
    },
    {
      "epoch": 0.6001184605733071,
      "grad_norm": 3.234375,
      "learning_rate": 4.44423675527206e-05,
      "loss": 1.0006,
      "step": 171230
    },
    {
      "epoch": 0.6001535080802027,
      "grad_norm": 3.328125,
      "learning_rate": 4.4441718524056893e-05,
      "loss": 0.983,
      "step": 171240
    },
    {
      "epoch": 0.6001885555870983,
      "grad_norm": 2.984375,
      "learning_rate": 4.4441069495393195e-05,
      "loss": 0.9497,
      "step": 171250
    },
    {
      "epoch": 0.6002236030939939,
      "grad_norm": 2.921875,
      "learning_rate": 4.4440420466729496e-05,
      "loss": 0.8881,
      "step": 171260
    },
    {
      "epoch": 0.6002586506008895,
      "grad_norm": 3.0625,
      "learning_rate": 4.443977143806579e-05,
      "loss": 0.9029,
      "step": 171270
    },
    {
      "epoch": 0.6002936981077851,
      "grad_norm": 3.15625,
      "learning_rate": 4.443912240940209e-05,
      "loss": 0.8653,
      "step": 171280
    },
    {
      "epoch": 0.6003287456146807,
      "grad_norm": 3.03125,
      "learning_rate": 4.443847338073839e-05,
      "loss": 0.9254,
      "step": 171290
    },
    {
      "epoch": 0.6003637931215763,
      "grad_norm": 2.484375,
      "learning_rate": 4.443782435207469e-05,
      "loss": 0.9148,
      "step": 171300
    },
    {
      "epoch": 0.6003988406284719,
      "grad_norm": 2.921875,
      "learning_rate": 4.443717532341098e-05,
      "loss": 0.907,
      "step": 171310
    },
    {
      "epoch": 0.6004338881353675,
      "grad_norm": 2.640625,
      "learning_rate": 4.4436526294747284e-05,
      "loss": 0.8993,
      "step": 171320
    },
    {
      "epoch": 0.6004689356422631,
      "grad_norm": 2.546875,
      "learning_rate": 4.443587726608358e-05,
      "loss": 0.827,
      "step": 171330
    },
    {
      "epoch": 0.6005039831491586,
      "grad_norm": 3.109375,
      "learning_rate": 4.443522823741988e-05,
      "loss": 0.969,
      "step": 171340
    },
    {
      "epoch": 0.6005390306560543,
      "grad_norm": 2.828125,
      "learning_rate": 4.4434579208756175e-05,
      "loss": 0.9292,
      "step": 171350
    },
    {
      "epoch": 0.6005740781629498,
      "grad_norm": 3.140625,
      "learning_rate": 4.4433930180092476e-05,
      "loss": 0.8822,
      "step": 171360
    },
    {
      "epoch": 0.6006091256698455,
      "grad_norm": 2.84375,
      "learning_rate": 4.443328115142877e-05,
      "loss": 0.9328,
      "step": 171370
    },
    {
      "epoch": 0.6006441731767411,
      "grad_norm": 3.046875,
      "learning_rate": 4.443263212276507e-05,
      "loss": 0.9038,
      "step": 171380
    },
    {
      "epoch": 0.6006792206836367,
      "grad_norm": 2.828125,
      "learning_rate": 4.4431983094101374e-05,
      "loss": 0.9515,
      "step": 171390
    },
    {
      "epoch": 0.6007142681905323,
      "grad_norm": 3.171875,
      "learning_rate": 4.443133406543767e-05,
      "loss": 0.8665,
      "step": 171400
    },
    {
      "epoch": 0.6007493156974278,
      "grad_norm": 3.109375,
      "learning_rate": 4.443068503677397e-05,
      "loss": 0.8967,
      "step": 171410
    },
    {
      "epoch": 0.6007843632043235,
      "grad_norm": 2.84375,
      "learning_rate": 4.4430036008110264e-05,
      "loss": 0.8936,
      "step": 171420
    },
    {
      "epoch": 0.600819410711219,
      "grad_norm": 2.703125,
      "learning_rate": 4.4429386979446566e-05,
      "loss": 0.9845,
      "step": 171430
    },
    {
      "epoch": 0.6008544582181147,
      "grad_norm": 3.078125,
      "learning_rate": 4.442873795078286e-05,
      "loss": 0.924,
      "step": 171440
    },
    {
      "epoch": 0.6008895057250102,
      "grad_norm": 2.9375,
      "learning_rate": 4.442808892211916e-05,
      "loss": 0.9045,
      "step": 171450
    },
    {
      "epoch": 0.6009245532319059,
      "grad_norm": 2.71875,
      "learning_rate": 4.4427439893455456e-05,
      "loss": 0.845,
      "step": 171460
    },
    {
      "epoch": 0.6009596007388014,
      "grad_norm": 2.75,
      "learning_rate": 4.442679086479176e-05,
      "loss": 0.872,
      "step": 171470
    },
    {
      "epoch": 0.600994648245697,
      "grad_norm": 3.546875,
      "learning_rate": 4.442614183612805e-05,
      "loss": 0.873,
      "step": 171480
    },
    {
      "epoch": 0.6010296957525927,
      "grad_norm": 2.53125,
      "learning_rate": 4.4425492807464354e-05,
      "loss": 0.8576,
      "step": 171490
    },
    {
      "epoch": 0.6010647432594882,
      "grad_norm": 3.078125,
      "learning_rate": 4.442484377880065e-05,
      "loss": 0.8916,
      "step": 171500
    },
    {
      "epoch": 0.6010997907663839,
      "grad_norm": 3.21875,
      "learning_rate": 4.442419475013695e-05,
      "loss": 0.8847,
      "step": 171510
    },
    {
      "epoch": 0.6011348382732794,
      "grad_norm": 3.40625,
      "learning_rate": 4.4423545721473244e-05,
      "loss": 0.9562,
      "step": 171520
    },
    {
      "epoch": 0.601169885780175,
      "grad_norm": 2.921875,
      "learning_rate": 4.4422896692809546e-05,
      "loss": 0.9445,
      "step": 171530
    },
    {
      "epoch": 0.6012049332870706,
      "grad_norm": 2.734375,
      "learning_rate": 4.442224766414585e-05,
      "loss": 0.9262,
      "step": 171540
    },
    {
      "epoch": 0.6012399807939662,
      "grad_norm": 2.875,
      "learning_rate": 4.442159863548214e-05,
      "loss": 0.917,
      "step": 171550
    },
    {
      "epoch": 0.6012750283008618,
      "grad_norm": 2.90625,
      "learning_rate": 4.4420949606818436e-05,
      "loss": 0.861,
      "step": 171560
    },
    {
      "epoch": 0.6013100758077574,
      "grad_norm": 2.6875,
      "learning_rate": 4.442030057815473e-05,
      "loss": 0.9715,
      "step": 171570
    },
    {
      "epoch": 0.6013451233146531,
      "grad_norm": 3.015625,
      "learning_rate": 4.441965154949103e-05,
      "loss": 0.8145,
      "step": 171580
    },
    {
      "epoch": 0.6013801708215486,
      "grad_norm": 2.84375,
      "learning_rate": 4.441900252082733e-05,
      "loss": 0.9172,
      "step": 171590
    },
    {
      "epoch": 0.6014152183284442,
      "grad_norm": 2.96875,
      "learning_rate": 4.441835349216363e-05,
      "loss": 0.8939,
      "step": 171600
    },
    {
      "epoch": 0.6014502658353398,
      "grad_norm": 3.1875,
      "learning_rate": 4.441770446349992e-05,
      "loss": 0.9168,
      "step": 171610
    },
    {
      "epoch": 0.6014853133422354,
      "grad_norm": 2.34375,
      "learning_rate": 4.4417055434836224e-05,
      "loss": 0.9116,
      "step": 171620
    },
    {
      "epoch": 0.601520360849131,
      "grad_norm": 3.203125,
      "learning_rate": 4.4416406406172526e-05,
      "loss": 0.9116,
      "step": 171630
    },
    {
      "epoch": 0.6015554083560266,
      "grad_norm": 2.828125,
      "learning_rate": 4.441575737750882e-05,
      "loss": 0.874,
      "step": 171640
    },
    {
      "epoch": 0.6015904558629221,
      "grad_norm": 4.0625,
      "learning_rate": 4.441510834884512e-05,
      "loss": 0.8677,
      "step": 171650
    },
    {
      "epoch": 0.6016255033698178,
      "grad_norm": 3.1875,
      "learning_rate": 4.4414459320181416e-05,
      "loss": 0.8899,
      "step": 171660
    },
    {
      "epoch": 0.6016605508767134,
      "grad_norm": 3.078125,
      "learning_rate": 4.441381029151772e-05,
      "loss": 1.0545,
      "step": 171670
    },
    {
      "epoch": 0.601695598383609,
      "grad_norm": 2.765625,
      "learning_rate": 4.441316126285401e-05,
      "loss": 0.9202,
      "step": 171680
    },
    {
      "epoch": 0.6017306458905046,
      "grad_norm": 2.6875,
      "learning_rate": 4.4412512234190314e-05,
      "loss": 0.881,
      "step": 171690
    },
    {
      "epoch": 0.6017656933974002,
      "grad_norm": 3.296875,
      "learning_rate": 4.441186320552661e-05,
      "loss": 0.8428,
      "step": 171700
    },
    {
      "epoch": 0.6018007409042958,
      "grad_norm": 2.640625,
      "learning_rate": 4.441121417686291e-05,
      "loss": 0.8335,
      "step": 171710
    },
    {
      "epoch": 0.6018357884111913,
      "grad_norm": 5.625,
      "learning_rate": 4.4410565148199204e-05,
      "loss": 0.9224,
      "step": 171720
    },
    {
      "epoch": 0.601870835918087,
      "grad_norm": 3.15625,
      "learning_rate": 4.4409916119535506e-05,
      "loss": 0.9535,
      "step": 171730
    },
    {
      "epoch": 0.6019058834249825,
      "grad_norm": 2.578125,
      "learning_rate": 4.44092670908718e-05,
      "loss": 0.9043,
      "step": 171740
    },
    {
      "epoch": 0.6019409309318782,
      "grad_norm": 3.328125,
      "learning_rate": 4.44086180622081e-05,
      "loss": 0.8968,
      "step": 171750
    },
    {
      "epoch": 0.6019759784387737,
      "grad_norm": 2.859375,
      "learning_rate": 4.44079690335444e-05,
      "loss": 0.9488,
      "step": 171760
    },
    {
      "epoch": 0.6020110259456694,
      "grad_norm": 2.84375,
      "learning_rate": 4.44073200048807e-05,
      "loss": 0.893,
      "step": 171770
    },
    {
      "epoch": 0.602046073452565,
      "grad_norm": 2.984375,
      "learning_rate": 4.4406670976217e-05,
      "loss": 0.943,
      "step": 171780
    },
    {
      "epoch": 0.6020811209594605,
      "grad_norm": 2.84375,
      "learning_rate": 4.4406021947553294e-05,
      "loss": 0.9507,
      "step": 171790
    },
    {
      "epoch": 0.6021161684663562,
      "grad_norm": 2.546875,
      "learning_rate": 4.4405372918889595e-05,
      "loss": 0.8884,
      "step": 171800
    },
    {
      "epoch": 0.6021512159732517,
      "grad_norm": 2.78125,
      "learning_rate": 4.440472389022589e-05,
      "loss": 0.8683,
      "step": 171810
    },
    {
      "epoch": 0.6021862634801474,
      "grad_norm": 2.84375,
      "learning_rate": 4.440407486156219e-05,
      "loss": 0.9281,
      "step": 171820
    },
    {
      "epoch": 0.6022213109870429,
      "grad_norm": 3.078125,
      "learning_rate": 4.4403425832898486e-05,
      "loss": 1.0395,
      "step": 171830
    },
    {
      "epoch": 0.6022563584939385,
      "grad_norm": 2.96875,
      "learning_rate": 4.440277680423479e-05,
      "loss": 0.9104,
      "step": 171840
    },
    {
      "epoch": 0.6022914060008341,
      "grad_norm": 2.703125,
      "learning_rate": 4.440212777557108e-05,
      "loss": 0.9234,
      "step": 171850
    },
    {
      "epoch": 0.6023264535077297,
      "grad_norm": 3.234375,
      "learning_rate": 4.440147874690738e-05,
      "loss": 0.983,
      "step": 171860
    },
    {
      "epoch": 0.6023615010146254,
      "grad_norm": 2.484375,
      "learning_rate": 4.440082971824368e-05,
      "loss": 0.8697,
      "step": 171870
    },
    {
      "epoch": 0.6023965485215209,
      "grad_norm": 2.828125,
      "learning_rate": 4.440018068957998e-05,
      "loss": 0.9561,
      "step": 171880
    },
    {
      "epoch": 0.6024315960284166,
      "grad_norm": 3.8125,
      "learning_rate": 4.439953166091628e-05,
      "loss": 0.9304,
      "step": 171890
    },
    {
      "epoch": 0.6024666435353121,
      "grad_norm": 2.96875,
      "learning_rate": 4.4398882632252575e-05,
      "loss": 0.871,
      "step": 171900
    },
    {
      "epoch": 0.6025016910422077,
      "grad_norm": 2.9375,
      "learning_rate": 4.4398233603588876e-05,
      "loss": 0.9458,
      "step": 171910
    },
    {
      "epoch": 0.6025367385491033,
      "grad_norm": 2.890625,
      "learning_rate": 4.439758457492517e-05,
      "loss": 0.9533,
      "step": 171920
    },
    {
      "epoch": 0.6025717860559989,
      "grad_norm": 3.09375,
      "learning_rate": 4.4396935546261466e-05,
      "loss": 0.9114,
      "step": 171930
    },
    {
      "epoch": 0.6026068335628945,
      "grad_norm": 3.234375,
      "learning_rate": 4.439628651759776e-05,
      "loss": 0.887,
      "step": 171940
    },
    {
      "epoch": 0.6026418810697901,
      "grad_norm": 2.90625,
      "learning_rate": 4.439563748893406e-05,
      "loss": 0.8774,
      "step": 171950
    },
    {
      "epoch": 0.6026769285766856,
      "grad_norm": 3.0,
      "learning_rate": 4.4394988460270356e-05,
      "loss": 0.9038,
      "step": 171960
    },
    {
      "epoch": 0.6027119760835813,
      "grad_norm": 2.796875,
      "learning_rate": 4.439433943160666e-05,
      "loss": 0.87,
      "step": 171970
    },
    {
      "epoch": 0.6027470235904769,
      "grad_norm": 2.71875,
      "learning_rate": 4.439369040294296e-05,
      "loss": 0.837,
      "step": 171980
    },
    {
      "epoch": 0.6027820710973725,
      "grad_norm": 2.765625,
      "learning_rate": 4.4393041374279254e-05,
      "loss": 0.8989,
      "step": 171990
    },
    {
      "epoch": 0.6028171186042681,
      "grad_norm": 2.671875,
      "learning_rate": 4.4392392345615555e-05,
      "loss": 0.9066,
      "step": 172000
    },
    {
      "epoch": 0.6028521661111637,
      "grad_norm": 3.234375,
      "learning_rate": 4.439174331695185e-05,
      "loss": 0.8526,
      "step": 172010
    },
    {
      "epoch": 0.6028872136180593,
      "grad_norm": 3.34375,
      "learning_rate": 4.439109428828815e-05,
      "loss": 0.9283,
      "step": 172020
    },
    {
      "epoch": 0.6029222611249548,
      "grad_norm": 3.59375,
      "learning_rate": 4.4390445259624446e-05,
      "loss": 0.8839,
      "step": 172030
    },
    {
      "epoch": 0.6029573086318505,
      "grad_norm": 2.84375,
      "learning_rate": 4.438979623096075e-05,
      "loss": 0.9455,
      "step": 172040
    },
    {
      "epoch": 0.602992356138746,
      "grad_norm": 3.125,
      "learning_rate": 4.438914720229704e-05,
      "loss": 0.9701,
      "step": 172050
    },
    {
      "epoch": 0.6030274036456417,
      "grad_norm": 3.125,
      "learning_rate": 4.438849817363334e-05,
      "loss": 0.9311,
      "step": 172060
    },
    {
      "epoch": 0.6030624511525373,
      "grad_norm": 2.796875,
      "learning_rate": 4.438784914496964e-05,
      "loss": 0.8689,
      "step": 172070
    },
    {
      "epoch": 0.6030974986594329,
      "grad_norm": 2.9375,
      "learning_rate": 4.438720011630594e-05,
      "loss": 0.8315,
      "step": 172080
    },
    {
      "epoch": 0.6031325461663285,
      "grad_norm": 3.390625,
      "learning_rate": 4.4386551087642234e-05,
      "loss": 0.9532,
      "step": 172090
    },
    {
      "epoch": 0.603167593673224,
      "grad_norm": 3.125,
      "learning_rate": 4.4385902058978535e-05,
      "loss": 0.8817,
      "step": 172100
    },
    {
      "epoch": 0.6032026411801197,
      "grad_norm": 2.65625,
      "learning_rate": 4.438525303031483e-05,
      "loss": 0.9643,
      "step": 172110
    },
    {
      "epoch": 0.6032376886870152,
      "grad_norm": 2.921875,
      "learning_rate": 4.438460400165113e-05,
      "loss": 0.9076,
      "step": 172120
    },
    {
      "epoch": 0.6032727361939109,
      "grad_norm": 3.328125,
      "learning_rate": 4.438395497298743e-05,
      "loss": 0.8725,
      "step": 172130
    },
    {
      "epoch": 0.6033077837008064,
      "grad_norm": 3.3125,
      "learning_rate": 4.438330594432373e-05,
      "loss": 0.8528,
      "step": 172140
    },
    {
      "epoch": 0.603342831207702,
      "grad_norm": 2.90625,
      "learning_rate": 4.438265691566003e-05,
      "loss": 0.9253,
      "step": 172150
    },
    {
      "epoch": 0.6033778787145976,
      "grad_norm": 2.828125,
      "learning_rate": 4.438200788699632e-05,
      "loss": 0.9479,
      "step": 172160
    },
    {
      "epoch": 0.6034129262214932,
      "grad_norm": 3.4375,
      "learning_rate": 4.4381358858332624e-05,
      "loss": 0.9464,
      "step": 172170
    },
    {
      "epoch": 0.6034479737283889,
      "grad_norm": 2.90625,
      "learning_rate": 4.438070982966892e-05,
      "loss": 0.9282,
      "step": 172180
    },
    {
      "epoch": 0.6034830212352844,
      "grad_norm": 2.875,
      "learning_rate": 4.438006080100522e-05,
      "loss": 0.8506,
      "step": 172190
    },
    {
      "epoch": 0.6035180687421801,
      "grad_norm": 2.703125,
      "learning_rate": 4.4379411772341515e-05,
      "loss": 0.8586,
      "step": 172200
    },
    {
      "epoch": 0.6035531162490756,
      "grad_norm": 2.9375,
      "learning_rate": 4.4378762743677816e-05,
      "loss": 0.9217,
      "step": 172210
    },
    {
      "epoch": 0.6035881637559712,
      "grad_norm": 3.71875,
      "learning_rate": 4.437811371501411e-05,
      "loss": 0.8441,
      "step": 172220
    },
    {
      "epoch": 0.6036232112628668,
      "grad_norm": 2.984375,
      "learning_rate": 4.437746468635041e-05,
      "loss": 0.853,
      "step": 172230
    },
    {
      "epoch": 0.6036582587697624,
      "grad_norm": 3.0,
      "learning_rate": 4.437681565768671e-05,
      "loss": 0.8861,
      "step": 172240
    },
    {
      "epoch": 0.603693306276658,
      "grad_norm": 3.234375,
      "learning_rate": 4.437616662902301e-05,
      "loss": 0.9283,
      "step": 172250
    },
    {
      "epoch": 0.6037283537835536,
      "grad_norm": 3.21875,
      "learning_rate": 4.437551760035931e-05,
      "loss": 0.9772,
      "step": 172260
    },
    {
      "epoch": 0.6037634012904493,
      "grad_norm": 2.609375,
      "learning_rate": 4.4374868571695604e-05,
      "loss": 0.9536,
      "step": 172270
    },
    {
      "epoch": 0.6037984487973448,
      "grad_norm": 3.0,
      "learning_rate": 4.4374219543031906e-05,
      "loss": 0.9128,
      "step": 172280
    },
    {
      "epoch": 0.6038334963042404,
      "grad_norm": 2.984375,
      "learning_rate": 4.43735705143682e-05,
      "loss": 0.8722,
      "step": 172290
    },
    {
      "epoch": 0.603868543811136,
      "grad_norm": 2.609375,
      "learning_rate": 4.43729214857045e-05,
      "loss": 0.8873,
      "step": 172300
    },
    {
      "epoch": 0.6039035913180316,
      "grad_norm": 2.8125,
      "learning_rate": 4.437227245704079e-05,
      "loss": 0.97,
      "step": 172310
    },
    {
      "epoch": 0.6039386388249272,
      "grad_norm": 2.71875,
      "learning_rate": 4.437162342837709e-05,
      "loss": 0.7775,
      "step": 172320
    },
    {
      "epoch": 0.6039736863318228,
      "grad_norm": 2.953125,
      "learning_rate": 4.4370974399713386e-05,
      "loss": 0.9093,
      "step": 172330
    },
    {
      "epoch": 0.6040087338387183,
      "grad_norm": 3.0625,
      "learning_rate": 4.437032537104969e-05,
      "loss": 0.9144,
      "step": 172340
    },
    {
      "epoch": 0.604043781345614,
      "grad_norm": 3.125,
      "learning_rate": 4.436967634238599e-05,
      "loss": 0.9414,
      "step": 172350
    },
    {
      "epoch": 0.6040788288525096,
      "grad_norm": 2.6875,
      "learning_rate": 4.436902731372228e-05,
      "loss": 0.9404,
      "step": 172360
    },
    {
      "epoch": 0.6041138763594052,
      "grad_norm": 3.0625,
      "learning_rate": 4.4368378285058584e-05,
      "loss": 0.8835,
      "step": 172370
    },
    {
      "epoch": 0.6041489238663008,
      "grad_norm": 2.65625,
      "learning_rate": 4.436772925639488e-05,
      "loss": 0.939,
      "step": 172380
    },
    {
      "epoch": 0.6041839713731963,
      "grad_norm": 2.875,
      "learning_rate": 4.436708022773118e-05,
      "loss": 0.9157,
      "step": 172390
    },
    {
      "epoch": 0.604219018880092,
      "grad_norm": 2.9375,
      "learning_rate": 4.4366431199067475e-05,
      "loss": 0.9113,
      "step": 172400
    },
    {
      "epoch": 0.6042540663869875,
      "grad_norm": 3.171875,
      "learning_rate": 4.4365782170403776e-05,
      "loss": 0.8769,
      "step": 172410
    },
    {
      "epoch": 0.6042891138938832,
      "grad_norm": 3.09375,
      "learning_rate": 4.436513314174007e-05,
      "loss": 0.8195,
      "step": 172420
    },
    {
      "epoch": 0.6043241614007787,
      "grad_norm": 3.171875,
      "learning_rate": 4.436448411307637e-05,
      "loss": 0.9906,
      "step": 172430
    },
    {
      "epoch": 0.6043592089076744,
      "grad_norm": 2.671875,
      "learning_rate": 4.436383508441267e-05,
      "loss": 0.8856,
      "step": 172440
    },
    {
      "epoch": 0.6043942564145699,
      "grad_norm": 3.09375,
      "learning_rate": 4.436318605574897e-05,
      "loss": 0.8502,
      "step": 172450
    },
    {
      "epoch": 0.6044293039214655,
      "grad_norm": 2.734375,
      "learning_rate": 4.436253702708526e-05,
      "loss": 0.8309,
      "step": 172460
    },
    {
      "epoch": 0.6044643514283612,
      "grad_norm": 3.09375,
      "learning_rate": 4.4361887998421564e-05,
      "loss": 0.9017,
      "step": 172470
    },
    {
      "epoch": 0.6044993989352567,
      "grad_norm": 2.890625,
      "learning_rate": 4.436123896975786e-05,
      "loss": 0.8803,
      "step": 172480
    },
    {
      "epoch": 0.6045344464421524,
      "grad_norm": 2.890625,
      "learning_rate": 4.436058994109416e-05,
      "loss": 0.8874,
      "step": 172490
    },
    {
      "epoch": 0.6045694939490479,
      "grad_norm": 3.078125,
      "learning_rate": 4.435994091243046e-05,
      "loss": 0.8543,
      "step": 172500
    },
    {
      "epoch": 0.6046045414559436,
      "grad_norm": 2.953125,
      "learning_rate": 4.4359291883766756e-05,
      "loss": 0.9028,
      "step": 172510
    },
    {
      "epoch": 0.6046395889628391,
      "grad_norm": 2.765625,
      "learning_rate": 4.435864285510306e-05,
      "loss": 0.9243,
      "step": 172520
    },
    {
      "epoch": 0.6046746364697347,
      "grad_norm": 2.484375,
      "learning_rate": 4.435799382643935e-05,
      "loss": 0.8748,
      "step": 172530
    },
    {
      "epoch": 0.6047096839766303,
      "grad_norm": 2.640625,
      "learning_rate": 4.4357344797775654e-05,
      "loss": 0.9321,
      "step": 172540
    },
    {
      "epoch": 0.6047447314835259,
      "grad_norm": 2.6875,
      "learning_rate": 4.435669576911195e-05,
      "loss": 0.9484,
      "step": 172550
    },
    {
      "epoch": 0.6047797789904216,
      "grad_norm": 3.328125,
      "learning_rate": 4.435604674044825e-05,
      "loss": 0.9878,
      "step": 172560
    },
    {
      "epoch": 0.6048148264973171,
      "grad_norm": 2.953125,
      "learning_rate": 4.4355397711784544e-05,
      "loss": 0.8649,
      "step": 172570
    },
    {
      "epoch": 0.6048498740042128,
      "grad_norm": 2.609375,
      "learning_rate": 4.4354748683120846e-05,
      "loss": 0.8075,
      "step": 172580
    },
    {
      "epoch": 0.6048849215111083,
      "grad_norm": 3.21875,
      "learning_rate": 4.435409965445714e-05,
      "loss": 0.9665,
      "step": 172590
    },
    {
      "epoch": 0.6049199690180039,
      "grad_norm": 2.828125,
      "learning_rate": 4.435345062579344e-05,
      "loss": 0.8749,
      "step": 172600
    },
    {
      "epoch": 0.6049550165248995,
      "grad_norm": 3.15625,
      "learning_rate": 4.4352801597129736e-05,
      "loss": 0.9339,
      "step": 172610
    },
    {
      "epoch": 0.6049900640317951,
      "grad_norm": 2.84375,
      "learning_rate": 4.435215256846604e-05,
      "loss": 0.9341,
      "step": 172620
    },
    {
      "epoch": 0.6050251115386907,
      "grad_norm": 3.5625,
      "learning_rate": 4.435150353980234e-05,
      "loss": 0.892,
      "step": 172630
    },
    {
      "epoch": 0.6050601590455863,
      "grad_norm": 3.140625,
      "learning_rate": 4.4350854511138634e-05,
      "loss": 0.8509,
      "step": 172640
    },
    {
      "epoch": 0.6050952065524818,
      "grad_norm": 2.921875,
      "learning_rate": 4.4350205482474935e-05,
      "loss": 0.9521,
      "step": 172650
    },
    {
      "epoch": 0.6051302540593775,
      "grad_norm": 2.453125,
      "learning_rate": 4.434955645381123e-05,
      "loss": 0.9097,
      "step": 172660
    },
    {
      "epoch": 0.6051653015662731,
      "grad_norm": 2.71875,
      "learning_rate": 4.434890742514753e-05,
      "loss": 0.8791,
      "step": 172670
    },
    {
      "epoch": 0.6052003490731687,
      "grad_norm": 3.09375,
      "learning_rate": 4.4348258396483826e-05,
      "loss": 0.9031,
      "step": 172680
    },
    {
      "epoch": 0.6052353965800643,
      "grad_norm": 3.203125,
      "learning_rate": 4.434760936782012e-05,
      "loss": 0.9809,
      "step": 172690
    },
    {
      "epoch": 0.6052704440869598,
      "grad_norm": 2.671875,
      "learning_rate": 4.4346960339156415e-05,
      "loss": 0.9294,
      "step": 172700
    },
    {
      "epoch": 0.6053054915938555,
      "grad_norm": 3.125,
      "learning_rate": 4.4346311310492716e-05,
      "loss": 0.9619,
      "step": 172710
    },
    {
      "epoch": 0.605340539100751,
      "grad_norm": 3.53125,
      "learning_rate": 4.434566228182902e-05,
      "loss": 0.9266,
      "step": 172720
    },
    {
      "epoch": 0.6053755866076467,
      "grad_norm": 2.984375,
      "learning_rate": 4.434501325316531e-05,
      "loss": 0.9736,
      "step": 172730
    },
    {
      "epoch": 0.6054106341145422,
      "grad_norm": 2.71875,
      "learning_rate": 4.4344364224501614e-05,
      "loss": 0.8704,
      "step": 172740
    },
    {
      "epoch": 0.6054456816214379,
      "grad_norm": 3.234375,
      "learning_rate": 4.434371519583791e-05,
      "loss": 0.9884,
      "step": 172750
    },
    {
      "epoch": 0.6054807291283335,
      "grad_norm": 3.015625,
      "learning_rate": 4.434306616717421e-05,
      "loss": 0.9814,
      "step": 172760
    },
    {
      "epoch": 0.605515776635229,
      "grad_norm": 2.84375,
      "learning_rate": 4.4342417138510504e-05,
      "loss": 0.9543,
      "step": 172770
    },
    {
      "epoch": 0.6055508241421247,
      "grad_norm": 2.859375,
      "learning_rate": 4.4341768109846806e-05,
      "loss": 0.8847,
      "step": 172780
    },
    {
      "epoch": 0.6055858716490202,
      "grad_norm": 2.6875,
      "learning_rate": 4.43411190811831e-05,
      "loss": 0.8792,
      "step": 172790
    },
    {
      "epoch": 0.6056209191559159,
      "grad_norm": 2.890625,
      "learning_rate": 4.43404700525194e-05,
      "loss": 0.9173,
      "step": 172800
    },
    {
      "epoch": 0.6056559666628114,
      "grad_norm": 2.890625,
      "learning_rate": 4.4339821023855696e-05,
      "loss": 0.8672,
      "step": 172810
    },
    {
      "epoch": 0.605691014169707,
      "grad_norm": 2.5,
      "learning_rate": 4.4339171995192e-05,
      "loss": 0.985,
      "step": 172820
    },
    {
      "epoch": 0.6057260616766026,
      "grad_norm": 3.40625,
      "learning_rate": 4.433852296652829e-05,
      "loss": 0.9213,
      "step": 172830
    },
    {
      "epoch": 0.6057611091834982,
      "grad_norm": 3.421875,
      "learning_rate": 4.4337873937864594e-05,
      "loss": 0.9639,
      "step": 172840
    },
    {
      "epoch": 0.6057961566903939,
      "grad_norm": 3.0,
      "learning_rate": 4.4337224909200895e-05,
      "loss": 0.947,
      "step": 172850
    },
    {
      "epoch": 0.6058312041972894,
      "grad_norm": 2.75,
      "learning_rate": 4.433657588053719e-05,
      "loss": 0.8437,
      "step": 172860
    },
    {
      "epoch": 0.6058662517041851,
      "grad_norm": 3.15625,
      "learning_rate": 4.433592685187349e-05,
      "loss": 0.9568,
      "step": 172870
    },
    {
      "epoch": 0.6059012992110806,
      "grad_norm": 2.71875,
      "learning_rate": 4.4335277823209786e-05,
      "loss": 0.8434,
      "step": 172880
    },
    {
      "epoch": 0.6059363467179762,
      "grad_norm": 3.421875,
      "learning_rate": 4.433462879454609e-05,
      "loss": 0.9484,
      "step": 172890
    },
    {
      "epoch": 0.6059713942248718,
      "grad_norm": 2.953125,
      "learning_rate": 4.433397976588238e-05,
      "loss": 0.947,
      "step": 172900
    },
    {
      "epoch": 0.6060064417317674,
      "grad_norm": 3.03125,
      "learning_rate": 4.433333073721868e-05,
      "loss": 0.8866,
      "step": 172910
    },
    {
      "epoch": 0.606041489238663,
      "grad_norm": 3.5,
      "learning_rate": 4.433268170855498e-05,
      "loss": 0.9095,
      "step": 172920
    },
    {
      "epoch": 0.6060765367455586,
      "grad_norm": 2.84375,
      "learning_rate": 4.433203267989128e-05,
      "loss": 0.9163,
      "step": 172930
    },
    {
      "epoch": 0.6061115842524541,
      "grad_norm": 2.65625,
      "learning_rate": 4.4331383651227574e-05,
      "loss": 0.9372,
      "step": 172940
    },
    {
      "epoch": 0.6061466317593498,
      "grad_norm": 2.765625,
      "learning_rate": 4.4330734622563875e-05,
      "loss": 0.9762,
      "step": 172950
    },
    {
      "epoch": 0.6061816792662454,
      "grad_norm": 3.28125,
      "learning_rate": 4.433008559390017e-05,
      "loss": 0.8709,
      "step": 172960
    },
    {
      "epoch": 0.606216726773141,
      "grad_norm": 2.8125,
      "learning_rate": 4.432943656523647e-05,
      "loss": 0.9697,
      "step": 172970
    },
    {
      "epoch": 0.6062517742800366,
      "grad_norm": 3.140625,
      "learning_rate": 4.4328787536572766e-05,
      "loss": 0.9857,
      "step": 172980
    },
    {
      "epoch": 0.6062868217869322,
      "grad_norm": 3.375,
      "learning_rate": 4.432813850790907e-05,
      "loss": 0.8817,
      "step": 172990
    },
    {
      "epoch": 0.6063218692938278,
      "grad_norm": 3.078125,
      "learning_rate": 4.432748947924537e-05,
      "loss": 0.9009,
      "step": 173000
    },
    {
      "epoch": 0.6063569168007233,
      "grad_norm": 2.671875,
      "learning_rate": 4.432684045058166e-05,
      "loss": 0.8546,
      "step": 173010
    },
    {
      "epoch": 0.606391964307619,
      "grad_norm": 2.6875,
      "learning_rate": 4.4326191421917965e-05,
      "loss": 0.9269,
      "step": 173020
    },
    {
      "epoch": 0.6064270118145145,
      "grad_norm": 3.25,
      "learning_rate": 4.432554239325426e-05,
      "loss": 0.9085,
      "step": 173030
    },
    {
      "epoch": 0.6064620593214102,
      "grad_norm": 3.265625,
      "learning_rate": 4.432489336459056e-05,
      "loss": 0.9785,
      "step": 173040
    },
    {
      "epoch": 0.6064971068283058,
      "grad_norm": 2.921875,
      "learning_rate": 4.4324244335926855e-05,
      "loss": 0.8585,
      "step": 173050
    },
    {
      "epoch": 0.6065321543352014,
      "grad_norm": 2.5625,
      "learning_rate": 4.432359530726315e-05,
      "loss": 0.8843,
      "step": 173060
    },
    {
      "epoch": 0.606567201842097,
      "grad_norm": 2.6875,
      "learning_rate": 4.4322946278599444e-05,
      "loss": 0.9846,
      "step": 173070
    },
    {
      "epoch": 0.6066022493489925,
      "grad_norm": 2.9375,
      "learning_rate": 4.4322297249935746e-05,
      "loss": 0.9852,
      "step": 173080
    },
    {
      "epoch": 0.6066372968558882,
      "grad_norm": 3.046875,
      "learning_rate": 4.432164822127205e-05,
      "loss": 0.8483,
      "step": 173090
    },
    {
      "epoch": 0.6066723443627837,
      "grad_norm": 3.15625,
      "learning_rate": 4.432099919260834e-05,
      "loss": 0.9595,
      "step": 173100
    },
    {
      "epoch": 0.6067073918696794,
      "grad_norm": 3.296875,
      "learning_rate": 4.432035016394464e-05,
      "loss": 0.8935,
      "step": 173110
    },
    {
      "epoch": 0.6067424393765749,
      "grad_norm": 3.125,
      "learning_rate": 4.431970113528094e-05,
      "loss": 0.8593,
      "step": 173120
    },
    {
      "epoch": 0.6067774868834706,
      "grad_norm": 3.078125,
      "learning_rate": 4.431905210661724e-05,
      "loss": 0.94,
      "step": 173130
    },
    {
      "epoch": 0.6068125343903661,
      "grad_norm": 2.90625,
      "learning_rate": 4.4318403077953534e-05,
      "loss": 0.9564,
      "step": 173140
    },
    {
      "epoch": 0.6068475818972617,
      "grad_norm": 2.671875,
      "learning_rate": 4.4317754049289835e-05,
      "loss": 0.8471,
      "step": 173150
    },
    {
      "epoch": 0.6068826294041574,
      "grad_norm": 2.75,
      "learning_rate": 4.431710502062613e-05,
      "loss": 0.9814,
      "step": 173160
    },
    {
      "epoch": 0.6069176769110529,
      "grad_norm": 3.390625,
      "learning_rate": 4.431645599196243e-05,
      "loss": 0.9047,
      "step": 173170
    },
    {
      "epoch": 0.6069527244179486,
      "grad_norm": 2.875,
      "learning_rate": 4.4315806963298726e-05,
      "loss": 0.9575,
      "step": 173180
    },
    {
      "epoch": 0.6069877719248441,
      "grad_norm": 3.328125,
      "learning_rate": 4.431515793463503e-05,
      "loss": 0.971,
      "step": 173190
    },
    {
      "epoch": 0.6070228194317397,
      "grad_norm": 3.25,
      "learning_rate": 4.431450890597132e-05,
      "loss": 0.8934,
      "step": 173200
    },
    {
      "epoch": 0.6070578669386353,
      "grad_norm": 2.765625,
      "learning_rate": 4.431385987730762e-05,
      "loss": 0.8898,
      "step": 173210
    },
    {
      "epoch": 0.6070929144455309,
      "grad_norm": 2.96875,
      "learning_rate": 4.4313210848643925e-05,
      "loss": 0.9152,
      "step": 173220
    },
    {
      "epoch": 0.6071279619524265,
      "grad_norm": 3.3125,
      "learning_rate": 4.431256181998022e-05,
      "loss": 0.9611,
      "step": 173230
    },
    {
      "epoch": 0.6071630094593221,
      "grad_norm": 3.28125,
      "learning_rate": 4.431191279131652e-05,
      "loss": 0.9403,
      "step": 173240
    },
    {
      "epoch": 0.6071980569662178,
      "grad_norm": 3.46875,
      "learning_rate": 4.4311263762652815e-05,
      "loss": 0.9346,
      "step": 173250
    },
    {
      "epoch": 0.6072331044731133,
      "grad_norm": 2.984375,
      "learning_rate": 4.4310614733989117e-05,
      "loss": 0.9021,
      "step": 173260
    },
    {
      "epoch": 0.6072681519800089,
      "grad_norm": 3.109375,
      "learning_rate": 4.430996570532541e-05,
      "loss": 0.9327,
      "step": 173270
    },
    {
      "epoch": 0.6073031994869045,
      "grad_norm": 3.21875,
      "learning_rate": 4.430931667666171e-05,
      "loss": 0.9462,
      "step": 173280
    },
    {
      "epoch": 0.6073382469938001,
      "grad_norm": 3.15625,
      "learning_rate": 4.430866764799801e-05,
      "loss": 0.9397,
      "step": 173290
    },
    {
      "epoch": 0.6073732945006957,
      "grad_norm": 2.921875,
      "learning_rate": 4.430801861933431e-05,
      "loss": 0.9497,
      "step": 173300
    },
    {
      "epoch": 0.6074083420075913,
      "grad_norm": 2.859375,
      "learning_rate": 4.43073695906706e-05,
      "loss": 0.8543,
      "step": 173310
    },
    {
      "epoch": 0.6074433895144868,
      "grad_norm": 2.84375,
      "learning_rate": 4.4306720562006905e-05,
      "loss": 0.9316,
      "step": 173320
    },
    {
      "epoch": 0.6074784370213825,
      "grad_norm": 3.03125,
      "learning_rate": 4.43060715333432e-05,
      "loss": 0.9164,
      "step": 173330
    },
    {
      "epoch": 0.6075134845282781,
      "grad_norm": 2.875,
      "learning_rate": 4.43054225046795e-05,
      "loss": 0.948,
      "step": 173340
    },
    {
      "epoch": 0.6075485320351737,
      "grad_norm": 2.421875,
      "learning_rate": 4.4304773476015795e-05,
      "loss": 0.8386,
      "step": 173350
    },
    {
      "epoch": 0.6075835795420693,
      "grad_norm": 3.0625,
      "learning_rate": 4.4304124447352097e-05,
      "loss": 0.9263,
      "step": 173360
    },
    {
      "epoch": 0.6076186270489649,
      "grad_norm": 3.265625,
      "learning_rate": 4.43034754186884e-05,
      "loss": 0.8322,
      "step": 173370
    },
    {
      "epoch": 0.6076536745558605,
      "grad_norm": 2.59375,
      "learning_rate": 4.430282639002469e-05,
      "loss": 0.9467,
      "step": 173380
    },
    {
      "epoch": 0.607688722062756,
      "grad_norm": 2.828125,
      "learning_rate": 4.4302177361360994e-05,
      "loss": 0.8318,
      "step": 173390
    },
    {
      "epoch": 0.6077237695696517,
      "grad_norm": 2.765625,
      "learning_rate": 4.430152833269729e-05,
      "loss": 0.9094,
      "step": 173400
    },
    {
      "epoch": 0.6077588170765472,
      "grad_norm": 2.796875,
      "learning_rate": 4.430087930403359e-05,
      "loss": 0.9399,
      "step": 173410
    },
    {
      "epoch": 0.6077938645834429,
      "grad_norm": 3.125,
      "learning_rate": 4.4300230275369885e-05,
      "loss": 0.9043,
      "step": 173420
    },
    {
      "epoch": 0.6078289120903384,
      "grad_norm": 3.0625,
      "learning_rate": 4.4299581246706186e-05,
      "loss": 0.8448,
      "step": 173430
    },
    {
      "epoch": 0.607863959597234,
      "grad_norm": 2.8125,
      "learning_rate": 4.4298932218042474e-05,
      "loss": 0.8836,
      "step": 173440
    },
    {
      "epoch": 0.6078990071041297,
      "grad_norm": 3.28125,
      "learning_rate": 4.4298283189378775e-05,
      "loss": 0.9223,
      "step": 173450
    },
    {
      "epoch": 0.6079340546110252,
      "grad_norm": 2.78125,
      "learning_rate": 4.4297634160715077e-05,
      "loss": 0.9508,
      "step": 173460
    },
    {
      "epoch": 0.6079691021179209,
      "grad_norm": 3.125,
      "learning_rate": 4.429698513205137e-05,
      "loss": 0.8931,
      "step": 173470
    },
    {
      "epoch": 0.6080041496248164,
      "grad_norm": 2.90625,
      "learning_rate": 4.429633610338767e-05,
      "loss": 0.9359,
      "step": 173480
    },
    {
      "epoch": 0.6080391971317121,
      "grad_norm": 2.484375,
      "learning_rate": 4.429568707472397e-05,
      "loss": 0.9406,
      "step": 173490
    },
    {
      "epoch": 0.6080742446386076,
      "grad_norm": 3.34375,
      "learning_rate": 4.429503804606027e-05,
      "loss": 0.9615,
      "step": 173500
    },
    {
      "epoch": 0.6081092921455032,
      "grad_norm": 2.8125,
      "learning_rate": 4.429438901739656e-05,
      "loss": 0.84,
      "step": 173510
    },
    {
      "epoch": 0.6081443396523988,
      "grad_norm": 2.71875,
      "learning_rate": 4.4293739988732865e-05,
      "loss": 0.8345,
      "step": 173520
    },
    {
      "epoch": 0.6081793871592944,
      "grad_norm": 3.171875,
      "learning_rate": 4.429309096006916e-05,
      "loss": 0.9238,
      "step": 173530
    },
    {
      "epoch": 0.6082144346661901,
      "grad_norm": 3.140625,
      "learning_rate": 4.429244193140546e-05,
      "loss": 0.9148,
      "step": 173540
    },
    {
      "epoch": 0.6082494821730856,
      "grad_norm": 2.96875,
      "learning_rate": 4.4291792902741755e-05,
      "loss": 0.8852,
      "step": 173550
    },
    {
      "epoch": 0.6082845296799813,
      "grad_norm": 2.546875,
      "learning_rate": 4.4291143874078057e-05,
      "loss": 0.9713,
      "step": 173560
    },
    {
      "epoch": 0.6083195771868768,
      "grad_norm": 3.015625,
      "learning_rate": 4.429049484541435e-05,
      "loss": 0.9313,
      "step": 173570
    },
    {
      "epoch": 0.6083546246937724,
      "grad_norm": 2.59375,
      "learning_rate": 4.428984581675065e-05,
      "loss": 0.9117,
      "step": 173580
    },
    {
      "epoch": 0.608389672200668,
      "grad_norm": 3.046875,
      "learning_rate": 4.4289196788086954e-05,
      "loss": 0.8689,
      "step": 173590
    },
    {
      "epoch": 0.6084247197075636,
      "grad_norm": 2.859375,
      "learning_rate": 4.428854775942325e-05,
      "loss": 0.9204,
      "step": 173600
    },
    {
      "epoch": 0.6084597672144592,
      "grad_norm": 2.796875,
      "learning_rate": 4.428789873075955e-05,
      "loss": 0.884,
      "step": 173610
    },
    {
      "epoch": 0.6084948147213548,
      "grad_norm": 2.828125,
      "learning_rate": 4.4287249702095845e-05,
      "loss": 0.9144,
      "step": 173620
    },
    {
      "epoch": 0.6085298622282503,
      "grad_norm": 3.0,
      "learning_rate": 4.4286600673432146e-05,
      "loss": 0.8365,
      "step": 173630
    },
    {
      "epoch": 0.608564909735146,
      "grad_norm": 2.84375,
      "learning_rate": 4.428595164476844e-05,
      "loss": 0.955,
      "step": 173640
    },
    {
      "epoch": 0.6085999572420416,
      "grad_norm": 2.78125,
      "learning_rate": 4.428530261610474e-05,
      "loss": 0.9114,
      "step": 173650
    },
    {
      "epoch": 0.6086350047489372,
      "grad_norm": 3.015625,
      "learning_rate": 4.4284653587441037e-05,
      "loss": 0.9536,
      "step": 173660
    },
    {
      "epoch": 0.6086700522558328,
      "grad_norm": 2.953125,
      "learning_rate": 4.428400455877734e-05,
      "loss": 0.9001,
      "step": 173670
    },
    {
      "epoch": 0.6087050997627284,
      "grad_norm": 3.09375,
      "learning_rate": 4.428335553011363e-05,
      "loss": 0.9065,
      "step": 173680
    },
    {
      "epoch": 0.608740147269624,
      "grad_norm": 2.765625,
      "learning_rate": 4.4282706501449934e-05,
      "loss": 0.8707,
      "step": 173690
    },
    {
      "epoch": 0.6087751947765195,
      "grad_norm": 2.875,
      "learning_rate": 4.428205747278623e-05,
      "loss": 0.8478,
      "step": 173700
    },
    {
      "epoch": 0.6088102422834152,
      "grad_norm": 3.125,
      "learning_rate": 4.428140844412253e-05,
      "loss": 0.8372,
      "step": 173710
    },
    {
      "epoch": 0.6088452897903107,
      "grad_norm": 3.390625,
      "learning_rate": 4.4280759415458825e-05,
      "loss": 0.9076,
      "step": 173720
    },
    {
      "epoch": 0.6088803372972064,
      "grad_norm": 2.890625,
      "learning_rate": 4.4280110386795126e-05,
      "loss": 0.983,
      "step": 173730
    },
    {
      "epoch": 0.608915384804102,
      "grad_norm": 2.9375,
      "learning_rate": 4.427946135813143e-05,
      "loss": 0.8407,
      "step": 173740
    },
    {
      "epoch": 0.6089504323109975,
      "grad_norm": 2.984375,
      "learning_rate": 4.427881232946772e-05,
      "loss": 0.9732,
      "step": 173750
    },
    {
      "epoch": 0.6089854798178932,
      "grad_norm": 2.671875,
      "learning_rate": 4.427816330080402e-05,
      "loss": 0.9195,
      "step": 173760
    },
    {
      "epoch": 0.6090205273247887,
      "grad_norm": 3.03125,
      "learning_rate": 4.427751427214032e-05,
      "loss": 0.9043,
      "step": 173770
    },
    {
      "epoch": 0.6090555748316844,
      "grad_norm": 3.25,
      "learning_rate": 4.427686524347662e-05,
      "loss": 0.9241,
      "step": 173780
    },
    {
      "epoch": 0.6090906223385799,
      "grad_norm": 3.390625,
      "learning_rate": 4.4276216214812914e-05,
      "loss": 0.943,
      "step": 173790
    },
    {
      "epoch": 0.6091256698454756,
      "grad_norm": 3.15625,
      "learning_rate": 4.4275567186149215e-05,
      "loss": 0.9368,
      "step": 173800
    },
    {
      "epoch": 0.6091607173523711,
      "grad_norm": 2.78125,
      "learning_rate": 4.427491815748551e-05,
      "loss": 0.8933,
      "step": 173810
    },
    {
      "epoch": 0.6091957648592667,
      "grad_norm": 3.046875,
      "learning_rate": 4.4274269128821805e-05,
      "loss": 0.952,
      "step": 173820
    },
    {
      "epoch": 0.6092308123661623,
      "grad_norm": 3.046875,
      "learning_rate": 4.4273620100158106e-05,
      "loss": 0.9124,
      "step": 173830
    },
    {
      "epoch": 0.6092658598730579,
      "grad_norm": 3.015625,
      "learning_rate": 4.42729710714944e-05,
      "loss": 0.9539,
      "step": 173840
    },
    {
      "epoch": 0.6093009073799536,
      "grad_norm": 3.515625,
      "learning_rate": 4.42723220428307e-05,
      "loss": 0.9675,
      "step": 173850
    },
    {
      "epoch": 0.6093359548868491,
      "grad_norm": 2.921875,
      "learning_rate": 4.4271673014166997e-05,
      "loss": 0.8322,
      "step": 173860
    },
    {
      "epoch": 0.6093710023937448,
      "grad_norm": 2.875,
      "learning_rate": 4.42710239855033e-05,
      "loss": 0.9037,
      "step": 173870
    },
    {
      "epoch": 0.6094060499006403,
      "grad_norm": 2.703125,
      "learning_rate": 4.427037495683959e-05,
      "loss": 0.8716,
      "step": 173880
    },
    {
      "epoch": 0.6094410974075359,
      "grad_norm": 3.15625,
      "learning_rate": 4.4269725928175894e-05,
      "loss": 0.9624,
      "step": 173890
    },
    {
      "epoch": 0.6094761449144315,
      "grad_norm": 2.734375,
      "learning_rate": 4.426907689951219e-05,
      "loss": 0.9205,
      "step": 173900
    },
    {
      "epoch": 0.6095111924213271,
      "grad_norm": 3.703125,
      "learning_rate": 4.426842787084849e-05,
      "loss": 0.9112,
      "step": 173910
    },
    {
      "epoch": 0.6095462399282227,
      "grad_norm": 3.109375,
      "learning_rate": 4.4267778842184785e-05,
      "loss": 0.9204,
      "step": 173920
    },
    {
      "epoch": 0.6095812874351183,
      "grad_norm": 3.1875,
      "learning_rate": 4.4267129813521086e-05,
      "loss": 0.9337,
      "step": 173930
    },
    {
      "epoch": 0.609616334942014,
      "grad_norm": 2.65625,
      "learning_rate": 4.426648078485738e-05,
      "loss": 0.9151,
      "step": 173940
    },
    {
      "epoch": 0.6096513824489095,
      "grad_norm": 2.78125,
      "learning_rate": 4.426583175619368e-05,
      "loss": 0.9026,
      "step": 173950
    },
    {
      "epoch": 0.6096864299558051,
      "grad_norm": 3.078125,
      "learning_rate": 4.426518272752998e-05,
      "loss": 0.9448,
      "step": 173960
    },
    {
      "epoch": 0.6097214774627007,
      "grad_norm": 2.734375,
      "learning_rate": 4.426453369886628e-05,
      "loss": 0.9647,
      "step": 173970
    },
    {
      "epoch": 0.6097565249695963,
      "grad_norm": 3.15625,
      "learning_rate": 4.426388467020258e-05,
      "loss": 0.957,
      "step": 173980
    },
    {
      "epoch": 0.6097915724764918,
      "grad_norm": 3.15625,
      "learning_rate": 4.4263235641538874e-05,
      "loss": 0.9142,
      "step": 173990
    },
    {
      "epoch": 0.6098266199833875,
      "grad_norm": 2.953125,
      "learning_rate": 4.4262586612875175e-05,
      "loss": 0.8588,
      "step": 174000
    },
    {
      "epoch": 0.609861667490283,
      "grad_norm": 2.703125,
      "learning_rate": 4.426193758421147e-05,
      "loss": 0.8168,
      "step": 174010
    },
    {
      "epoch": 0.6098967149971787,
      "grad_norm": 3.421875,
      "learning_rate": 4.426128855554777e-05,
      "loss": 0.9418,
      "step": 174020
    },
    {
      "epoch": 0.6099317625040743,
      "grad_norm": 2.9375,
      "learning_rate": 4.4260639526884066e-05,
      "loss": 0.8219,
      "step": 174030
    },
    {
      "epoch": 0.6099668100109699,
      "grad_norm": 2.953125,
      "learning_rate": 4.425999049822037e-05,
      "loss": 0.9734,
      "step": 174040
    },
    {
      "epoch": 0.6100018575178655,
      "grad_norm": 2.953125,
      "learning_rate": 4.425934146955666e-05,
      "loss": 0.8677,
      "step": 174050
    },
    {
      "epoch": 0.610036905024761,
      "grad_norm": 3.328125,
      "learning_rate": 4.425869244089296e-05,
      "loss": 0.9335,
      "step": 174060
    },
    {
      "epoch": 0.6100719525316567,
      "grad_norm": 3.015625,
      "learning_rate": 4.425804341222926e-05,
      "loss": 0.8059,
      "step": 174070
    },
    {
      "epoch": 0.6101070000385522,
      "grad_norm": 2.984375,
      "learning_rate": 4.425739438356556e-05,
      "loss": 0.903,
      "step": 174080
    },
    {
      "epoch": 0.6101420475454479,
      "grad_norm": 2.921875,
      "learning_rate": 4.425674535490186e-05,
      "loss": 0.92,
      "step": 174090
    },
    {
      "epoch": 0.6101770950523434,
      "grad_norm": 3.46875,
      "learning_rate": 4.4256096326238155e-05,
      "loss": 0.9549,
      "step": 174100
    },
    {
      "epoch": 0.6102121425592391,
      "grad_norm": 2.96875,
      "learning_rate": 4.425544729757446e-05,
      "loss": 0.936,
      "step": 174110
    },
    {
      "epoch": 0.6102471900661346,
      "grad_norm": 2.890625,
      "learning_rate": 4.425479826891075e-05,
      "loss": 0.9129,
      "step": 174120
    },
    {
      "epoch": 0.6102822375730302,
      "grad_norm": 3.078125,
      "learning_rate": 4.425414924024705e-05,
      "loss": 0.9986,
      "step": 174130
    },
    {
      "epoch": 0.6103172850799259,
      "grad_norm": 2.765625,
      "learning_rate": 4.425350021158335e-05,
      "loss": 0.8776,
      "step": 174140
    },
    {
      "epoch": 0.6103523325868214,
      "grad_norm": 2.859375,
      "learning_rate": 4.425285118291965e-05,
      "loss": 0.8208,
      "step": 174150
    },
    {
      "epoch": 0.6103873800937171,
      "grad_norm": 2.890625,
      "learning_rate": 4.425220215425594e-05,
      "loss": 0.826,
      "step": 174160
    },
    {
      "epoch": 0.6104224276006126,
      "grad_norm": 2.671875,
      "learning_rate": 4.4251553125592245e-05,
      "loss": 0.9276,
      "step": 174170
    },
    {
      "epoch": 0.6104574751075083,
      "grad_norm": 3.140625,
      "learning_rate": 4.425090409692854e-05,
      "loss": 0.9322,
      "step": 174180
    },
    {
      "epoch": 0.6104925226144038,
      "grad_norm": 2.515625,
      "learning_rate": 4.4250255068264834e-05,
      "loss": 0.8712,
      "step": 174190
    },
    {
      "epoch": 0.6105275701212994,
      "grad_norm": 2.875,
      "learning_rate": 4.4249606039601135e-05,
      "loss": 0.8685,
      "step": 174200
    },
    {
      "epoch": 0.610562617628195,
      "grad_norm": 2.484375,
      "learning_rate": 4.424895701093743e-05,
      "loss": 0.8839,
      "step": 174210
    },
    {
      "epoch": 0.6105976651350906,
      "grad_norm": 3.015625,
      "learning_rate": 4.424830798227373e-05,
      "loss": 0.7686,
      "step": 174220
    },
    {
      "epoch": 0.6106327126419863,
      "grad_norm": 2.96875,
      "learning_rate": 4.4247658953610026e-05,
      "loss": 0.8992,
      "step": 174230
    },
    {
      "epoch": 0.6106677601488818,
      "grad_norm": 2.78125,
      "learning_rate": 4.424700992494633e-05,
      "loss": 0.9283,
      "step": 174240
    },
    {
      "epoch": 0.6107028076557774,
      "grad_norm": 2.953125,
      "learning_rate": 4.424636089628262e-05,
      "loss": 0.9508,
      "step": 174250
    },
    {
      "epoch": 0.610737855162673,
      "grad_norm": 3.1875,
      "learning_rate": 4.424571186761892e-05,
      "loss": 0.893,
      "step": 174260
    },
    {
      "epoch": 0.6107729026695686,
      "grad_norm": 2.71875,
      "learning_rate": 4.424506283895522e-05,
      "loss": 0.9155,
      "step": 174270
    },
    {
      "epoch": 0.6108079501764642,
      "grad_norm": 3.171875,
      "learning_rate": 4.424441381029152e-05,
      "loss": 1.0023,
      "step": 174280
    },
    {
      "epoch": 0.6108429976833598,
      "grad_norm": 2.765625,
      "learning_rate": 4.4243764781627814e-05,
      "loss": 0.8437,
      "step": 174290
    },
    {
      "epoch": 0.6108780451902553,
      "grad_norm": 2.703125,
      "learning_rate": 4.4243115752964115e-05,
      "loss": 0.934,
      "step": 174300
    },
    {
      "epoch": 0.610913092697151,
      "grad_norm": 3.1875,
      "learning_rate": 4.424246672430041e-05,
      "loss": 0.8287,
      "step": 174310
    },
    {
      "epoch": 0.6109481402040465,
      "grad_norm": 3.28125,
      "learning_rate": 4.424181769563671e-05,
      "loss": 0.8007,
      "step": 174320
    },
    {
      "epoch": 0.6109831877109422,
      "grad_norm": 3.078125,
      "learning_rate": 4.424116866697301e-05,
      "loss": 0.9132,
      "step": 174330
    },
    {
      "epoch": 0.6110182352178378,
      "grad_norm": 2.953125,
      "learning_rate": 4.424051963830931e-05,
      "loss": 0.9063,
      "step": 174340
    },
    {
      "epoch": 0.6110532827247334,
      "grad_norm": 2.640625,
      "learning_rate": 4.423987060964561e-05,
      "loss": 0.9361,
      "step": 174350
    },
    {
      "epoch": 0.611088330231629,
      "grad_norm": 3.0,
      "learning_rate": 4.42392215809819e-05,
      "loss": 0.9847,
      "step": 174360
    },
    {
      "epoch": 0.6111233777385245,
      "grad_norm": 3.453125,
      "learning_rate": 4.4238572552318205e-05,
      "loss": 0.9015,
      "step": 174370
    },
    {
      "epoch": 0.6111584252454202,
      "grad_norm": 3.265625,
      "learning_rate": 4.42379235236545e-05,
      "loss": 0.8384,
      "step": 174380
    },
    {
      "epoch": 0.6111934727523157,
      "grad_norm": 2.828125,
      "learning_rate": 4.42372744949908e-05,
      "loss": 0.9099,
      "step": 174390
    },
    {
      "epoch": 0.6112285202592114,
      "grad_norm": 3.46875,
      "learning_rate": 4.4236625466327095e-05,
      "loss": 0.9455,
      "step": 174400
    },
    {
      "epoch": 0.6112635677661069,
      "grad_norm": 3.59375,
      "learning_rate": 4.42359764376634e-05,
      "loss": 0.9428,
      "step": 174410
    },
    {
      "epoch": 0.6112986152730026,
      "grad_norm": 3.03125,
      "learning_rate": 4.423532740899969e-05,
      "loss": 0.9246,
      "step": 174420
    },
    {
      "epoch": 0.6113336627798982,
      "grad_norm": 3.296875,
      "learning_rate": 4.423467838033599e-05,
      "loss": 0.8598,
      "step": 174430
    },
    {
      "epoch": 0.6113687102867937,
      "grad_norm": 2.4375,
      "learning_rate": 4.423402935167229e-05,
      "loss": 0.9407,
      "step": 174440
    },
    {
      "epoch": 0.6114037577936894,
      "grad_norm": 2.96875,
      "learning_rate": 4.423338032300859e-05,
      "loss": 0.9746,
      "step": 174450
    },
    {
      "epoch": 0.6114388053005849,
      "grad_norm": 3.140625,
      "learning_rate": 4.423273129434489e-05,
      "loss": 1.0011,
      "step": 174460
    },
    {
      "epoch": 0.6114738528074806,
      "grad_norm": 2.765625,
      "learning_rate": 4.4232082265681185e-05,
      "loss": 0.8877,
      "step": 174470
    },
    {
      "epoch": 0.6115089003143761,
      "grad_norm": 2.9375,
      "learning_rate": 4.4231433237017486e-05,
      "loss": 0.9237,
      "step": 174480
    },
    {
      "epoch": 0.6115439478212717,
      "grad_norm": 3.046875,
      "learning_rate": 4.423078420835378e-05,
      "loss": 0.8999,
      "step": 174490
    },
    {
      "epoch": 0.6115789953281673,
      "grad_norm": 2.78125,
      "learning_rate": 4.423013517969008e-05,
      "loss": 0.9539,
      "step": 174500
    },
    {
      "epoch": 0.6116140428350629,
      "grad_norm": 3.515625,
      "learning_rate": 4.422948615102638e-05,
      "loss": 0.9906,
      "step": 174510
    },
    {
      "epoch": 0.6116490903419586,
      "grad_norm": 2.59375,
      "learning_rate": 4.422883712236268e-05,
      "loss": 0.9328,
      "step": 174520
    },
    {
      "epoch": 0.6116841378488541,
      "grad_norm": 3.171875,
      "learning_rate": 4.422818809369897e-05,
      "loss": 0.9152,
      "step": 174530
    },
    {
      "epoch": 0.6117191853557498,
      "grad_norm": 2.671875,
      "learning_rate": 4.4227539065035274e-05,
      "loss": 0.9306,
      "step": 174540
    },
    {
      "epoch": 0.6117542328626453,
      "grad_norm": 3.390625,
      "learning_rate": 4.422689003637157e-05,
      "loss": 0.8624,
      "step": 174550
    },
    {
      "epoch": 0.6117892803695409,
      "grad_norm": 3.421875,
      "learning_rate": 4.422624100770787e-05,
      "loss": 1.0184,
      "step": 174560
    },
    {
      "epoch": 0.6118243278764365,
      "grad_norm": 2.984375,
      "learning_rate": 4.4225591979044165e-05,
      "loss": 0.9358,
      "step": 174570
    },
    {
      "epoch": 0.6118593753833321,
      "grad_norm": 3.109375,
      "learning_rate": 4.422494295038046e-05,
      "loss": 0.9365,
      "step": 174580
    },
    {
      "epoch": 0.6118944228902277,
      "grad_norm": 3.03125,
      "learning_rate": 4.422429392171676e-05,
      "loss": 0.8509,
      "step": 174590
    },
    {
      "epoch": 0.6119294703971233,
      "grad_norm": 2.90625,
      "learning_rate": 4.4223644893053055e-05,
      "loss": 0.8726,
      "step": 174600
    },
    {
      "epoch": 0.6119645179040188,
      "grad_norm": 3.125,
      "learning_rate": 4.422299586438936e-05,
      "loss": 0.8392,
      "step": 174610
    },
    {
      "epoch": 0.6119995654109145,
      "grad_norm": 3.046875,
      "learning_rate": 4.422234683572565e-05,
      "loss": 0.9437,
      "step": 174620
    },
    {
      "epoch": 0.6120346129178101,
      "grad_norm": 2.765625,
      "learning_rate": 4.422169780706195e-05,
      "loss": 0.9682,
      "step": 174630
    },
    {
      "epoch": 0.6120696604247057,
      "grad_norm": 3.0625,
      "learning_rate": 4.422104877839825e-05,
      "loss": 0.9333,
      "step": 174640
    },
    {
      "epoch": 0.6121047079316013,
      "grad_norm": 3.171875,
      "learning_rate": 4.422039974973455e-05,
      "loss": 0.9467,
      "step": 174650
    },
    {
      "epoch": 0.6121397554384969,
      "grad_norm": 2.875,
      "learning_rate": 4.421975072107084e-05,
      "loss": 0.8895,
      "step": 174660
    },
    {
      "epoch": 0.6121748029453925,
      "grad_norm": 2.859375,
      "learning_rate": 4.4219101692407145e-05,
      "loss": 0.8729,
      "step": 174670
    },
    {
      "epoch": 0.612209850452288,
      "grad_norm": 4.53125,
      "learning_rate": 4.421845266374344e-05,
      "loss": 0.9447,
      "step": 174680
    },
    {
      "epoch": 0.6122448979591837,
      "grad_norm": 2.953125,
      "learning_rate": 4.421780363507974e-05,
      "loss": 0.9308,
      "step": 174690
    },
    {
      "epoch": 0.6122799454660792,
      "grad_norm": 3.359375,
      "learning_rate": 4.421715460641604e-05,
      "loss": 0.9747,
      "step": 174700
    },
    {
      "epoch": 0.6123149929729749,
      "grad_norm": 2.84375,
      "learning_rate": 4.421650557775234e-05,
      "loss": 0.959,
      "step": 174710
    },
    {
      "epoch": 0.6123500404798705,
      "grad_norm": 3.0,
      "learning_rate": 4.421585654908864e-05,
      "loss": 0.8535,
      "step": 174720
    },
    {
      "epoch": 0.612385087986766,
      "grad_norm": 2.96875,
      "learning_rate": 4.421520752042493e-05,
      "loss": 0.9143,
      "step": 174730
    },
    {
      "epoch": 0.6124201354936617,
      "grad_norm": 3.078125,
      "learning_rate": 4.4214558491761234e-05,
      "loss": 1.0034,
      "step": 174740
    },
    {
      "epoch": 0.6124551830005572,
      "grad_norm": 3.109375,
      "learning_rate": 4.421390946309753e-05,
      "loss": 0.9632,
      "step": 174750
    },
    {
      "epoch": 0.6124902305074529,
      "grad_norm": 2.859375,
      "learning_rate": 4.421326043443383e-05,
      "loss": 0.8924,
      "step": 174760
    },
    {
      "epoch": 0.6125252780143484,
      "grad_norm": 2.984375,
      "learning_rate": 4.4212611405770125e-05,
      "loss": 0.9463,
      "step": 174770
    },
    {
      "epoch": 0.6125603255212441,
      "grad_norm": 3.15625,
      "learning_rate": 4.4211962377106426e-05,
      "loss": 0.9321,
      "step": 174780
    },
    {
      "epoch": 0.6125953730281396,
      "grad_norm": 2.65625,
      "learning_rate": 4.421131334844272e-05,
      "loss": 0.9116,
      "step": 174790
    },
    {
      "epoch": 0.6126304205350352,
      "grad_norm": 3.09375,
      "learning_rate": 4.421066431977902e-05,
      "loss": 1.006,
      "step": 174800
    },
    {
      "epoch": 0.6126654680419308,
      "grad_norm": 2.578125,
      "learning_rate": 4.421001529111532e-05,
      "loss": 0.9075,
      "step": 174810
    },
    {
      "epoch": 0.6127005155488264,
      "grad_norm": 2.671875,
      "learning_rate": 4.420936626245162e-05,
      "loss": 0.8964,
      "step": 174820
    },
    {
      "epoch": 0.6127355630557221,
      "grad_norm": 2.9375,
      "learning_rate": 4.420871723378792e-05,
      "loss": 0.9351,
      "step": 174830
    },
    {
      "epoch": 0.6127706105626176,
      "grad_norm": 3.03125,
      "learning_rate": 4.4208068205124214e-05,
      "loss": 0.9629,
      "step": 174840
    },
    {
      "epoch": 0.6128056580695133,
      "grad_norm": 3.03125,
      "learning_rate": 4.4207419176460515e-05,
      "loss": 0.8874,
      "step": 174850
    },
    {
      "epoch": 0.6128407055764088,
      "grad_norm": 2.921875,
      "learning_rate": 4.420677014779681e-05,
      "loss": 0.9927,
      "step": 174860
    },
    {
      "epoch": 0.6128757530833044,
      "grad_norm": 2.890625,
      "learning_rate": 4.420612111913311e-05,
      "loss": 0.8267,
      "step": 174870
    },
    {
      "epoch": 0.6129108005902,
      "grad_norm": 2.703125,
      "learning_rate": 4.4205472090469406e-05,
      "loss": 0.8931,
      "step": 174880
    },
    {
      "epoch": 0.6129458480970956,
      "grad_norm": 2.828125,
      "learning_rate": 4.420482306180571e-05,
      "loss": 0.8404,
      "step": 174890
    },
    {
      "epoch": 0.6129808956039912,
      "grad_norm": 2.71875,
      "learning_rate": 4.4204174033142e-05,
      "loss": 0.8413,
      "step": 174900
    },
    {
      "epoch": 0.6130159431108868,
      "grad_norm": 2.921875,
      "learning_rate": 4.4203525004478303e-05,
      "loss": 0.899,
      "step": 174910
    },
    {
      "epoch": 0.6130509906177825,
      "grad_norm": 2.609375,
      "learning_rate": 4.42028759758146e-05,
      "loss": 0.8653,
      "step": 174920
    },
    {
      "epoch": 0.613086038124678,
      "grad_norm": 2.8125,
      "learning_rate": 4.42022269471509e-05,
      "loss": 0.8872,
      "step": 174930
    },
    {
      "epoch": 0.6131210856315736,
      "grad_norm": 2.90625,
      "learning_rate": 4.4201577918487194e-05,
      "loss": 0.9189,
      "step": 174940
    },
    {
      "epoch": 0.6131561331384692,
      "grad_norm": 2.890625,
      "learning_rate": 4.420092888982349e-05,
      "loss": 0.8032,
      "step": 174950
    },
    {
      "epoch": 0.6131911806453648,
      "grad_norm": 3.265625,
      "learning_rate": 4.420027986115979e-05,
      "loss": 0.9283,
      "step": 174960
    },
    {
      "epoch": 0.6132262281522604,
      "grad_norm": 2.5,
      "learning_rate": 4.4199630832496085e-05,
      "loss": 0.8851,
      "step": 174970
    },
    {
      "epoch": 0.613261275659156,
      "grad_norm": 2.75,
      "learning_rate": 4.4198981803832386e-05,
      "loss": 0.8611,
      "step": 174980
    },
    {
      "epoch": 0.6132963231660515,
      "grad_norm": 3.0,
      "learning_rate": 4.419833277516868e-05,
      "loss": 0.8967,
      "step": 174990
    },
    {
      "epoch": 0.6133313706729472,
      "grad_norm": 3.28125,
      "learning_rate": 4.419768374650498e-05,
      "loss": 0.8591,
      "step": 175000
    },
    {
      "epoch": 0.6133313706729472,
      "eval_loss": 0.8577172160148621,
      "eval_runtime": 555.031,
      "eval_samples_per_second": 685.432,
      "eval_steps_per_second": 57.119,
      "step": 175000
    },
    {
      "epoch": 0.6133664181798428,
      "grad_norm": 3.1875,
      "learning_rate": 4.419703471784128e-05,
      "loss": 0.9705,
      "step": 175010
    },
    {
      "epoch": 0.6134014656867384,
      "grad_norm": 2.625,
      "learning_rate": 4.419638568917758e-05,
      "loss": 0.9578,
      "step": 175020
    },
    {
      "epoch": 0.613436513193634,
      "grad_norm": 2.609375,
      "learning_rate": 4.419573666051387e-05,
      "loss": 0.8861,
      "step": 175030
    },
    {
      "epoch": 0.6134715607005296,
      "grad_norm": 2.796875,
      "learning_rate": 4.4195087631850174e-05,
      "loss": 0.9078,
      "step": 175040
    },
    {
      "epoch": 0.6135066082074252,
      "grad_norm": 3.265625,
      "learning_rate": 4.4194438603186475e-05,
      "loss": 0.8732,
      "step": 175050
    },
    {
      "epoch": 0.6135416557143207,
      "grad_norm": 2.96875,
      "learning_rate": 4.419378957452277e-05,
      "loss": 0.8729,
      "step": 175060
    },
    {
      "epoch": 0.6135767032212164,
      "grad_norm": 2.59375,
      "learning_rate": 4.419314054585907e-05,
      "loss": 0.8627,
      "step": 175070
    },
    {
      "epoch": 0.6136117507281119,
      "grad_norm": 2.875,
      "learning_rate": 4.4192491517195366e-05,
      "loss": 0.9137,
      "step": 175080
    },
    {
      "epoch": 0.6136467982350076,
      "grad_norm": 3.015625,
      "learning_rate": 4.419184248853167e-05,
      "loss": 0.8872,
      "step": 175090
    },
    {
      "epoch": 0.6136818457419031,
      "grad_norm": 2.9375,
      "learning_rate": 4.419119345986796e-05,
      "loss": 0.9247,
      "step": 175100
    },
    {
      "epoch": 0.6137168932487987,
      "grad_norm": 3.03125,
      "learning_rate": 4.4190544431204263e-05,
      "loss": 0.931,
      "step": 175110
    },
    {
      "epoch": 0.6137519407556944,
      "grad_norm": 2.8125,
      "learning_rate": 4.418989540254056e-05,
      "loss": 0.8678,
      "step": 175120
    },
    {
      "epoch": 0.6137869882625899,
      "grad_norm": 2.96875,
      "learning_rate": 4.418924637387686e-05,
      "loss": 0.8768,
      "step": 175130
    },
    {
      "epoch": 0.6138220357694856,
      "grad_norm": 3.046875,
      "learning_rate": 4.4188597345213154e-05,
      "loss": 0.9186,
      "step": 175140
    },
    {
      "epoch": 0.6138570832763811,
      "grad_norm": 2.75,
      "learning_rate": 4.4187948316549455e-05,
      "loss": 0.9265,
      "step": 175150
    },
    {
      "epoch": 0.6138921307832768,
      "grad_norm": 3.234375,
      "learning_rate": 4.418729928788575e-05,
      "loss": 0.8683,
      "step": 175160
    },
    {
      "epoch": 0.6139271782901723,
      "grad_norm": 2.65625,
      "learning_rate": 4.418665025922205e-05,
      "loss": 0.925,
      "step": 175170
    },
    {
      "epoch": 0.6139622257970679,
      "grad_norm": 2.78125,
      "learning_rate": 4.4186001230558346e-05,
      "loss": 0.8139,
      "step": 175180
    },
    {
      "epoch": 0.6139972733039635,
      "grad_norm": 2.96875,
      "learning_rate": 4.418535220189465e-05,
      "loss": 1.0066,
      "step": 175190
    },
    {
      "epoch": 0.6140323208108591,
      "grad_norm": 3.390625,
      "learning_rate": 4.418470317323095e-05,
      "loss": 0.8609,
      "step": 175200
    },
    {
      "epoch": 0.6140673683177548,
      "grad_norm": 2.78125,
      "learning_rate": 4.4184054144567243e-05,
      "loss": 0.9275,
      "step": 175210
    },
    {
      "epoch": 0.6141024158246503,
      "grad_norm": 2.703125,
      "learning_rate": 4.4183405115903545e-05,
      "loss": 0.9062,
      "step": 175220
    },
    {
      "epoch": 0.614137463331546,
      "grad_norm": 2.890625,
      "learning_rate": 4.418275608723984e-05,
      "loss": 0.8836,
      "step": 175230
    },
    {
      "epoch": 0.6141725108384415,
      "grad_norm": 2.703125,
      "learning_rate": 4.418210705857614e-05,
      "loss": 0.8441,
      "step": 175240
    },
    {
      "epoch": 0.6142075583453371,
      "grad_norm": 3.203125,
      "learning_rate": 4.4181458029912435e-05,
      "loss": 1.0471,
      "step": 175250
    },
    {
      "epoch": 0.6142426058522327,
      "grad_norm": 2.65625,
      "learning_rate": 4.418080900124874e-05,
      "loss": 0.8281,
      "step": 175260
    },
    {
      "epoch": 0.6142776533591283,
      "grad_norm": 2.75,
      "learning_rate": 4.418015997258503e-05,
      "loss": 0.9199,
      "step": 175270
    },
    {
      "epoch": 0.6143127008660239,
      "grad_norm": 3.03125,
      "learning_rate": 4.417951094392133e-05,
      "loss": 0.9722,
      "step": 175280
    },
    {
      "epoch": 0.6143477483729195,
      "grad_norm": 2.40625,
      "learning_rate": 4.417886191525763e-05,
      "loss": 0.8519,
      "step": 175290
    },
    {
      "epoch": 0.614382795879815,
      "grad_norm": 3.484375,
      "learning_rate": 4.417821288659393e-05,
      "loss": 0.9444,
      "step": 175300
    },
    {
      "epoch": 0.6144178433867107,
      "grad_norm": 2.8125,
      "learning_rate": 4.4177563857930223e-05,
      "loss": 0.9322,
      "step": 175310
    },
    {
      "epoch": 0.6144528908936063,
      "grad_norm": 2.65625,
      "learning_rate": 4.417691482926652e-05,
      "loss": 0.8321,
      "step": 175320
    },
    {
      "epoch": 0.6144879384005019,
      "grad_norm": 2.875,
      "learning_rate": 4.417626580060282e-05,
      "loss": 0.8525,
      "step": 175330
    },
    {
      "epoch": 0.6145229859073975,
      "grad_norm": 3.140625,
      "learning_rate": 4.4175616771939114e-05,
      "loss": 0.9032,
      "step": 175340
    },
    {
      "epoch": 0.614558033414293,
      "grad_norm": 3.1875,
      "learning_rate": 4.4174967743275415e-05,
      "loss": 0.8906,
      "step": 175350
    },
    {
      "epoch": 0.6145930809211887,
      "grad_norm": 3.0625,
      "learning_rate": 4.417431871461171e-05,
      "loss": 0.929,
      "step": 175360
    },
    {
      "epoch": 0.6146281284280842,
      "grad_norm": 3.1875,
      "learning_rate": 4.417366968594801e-05,
      "loss": 0.8952,
      "step": 175370
    },
    {
      "epoch": 0.6146631759349799,
      "grad_norm": 3.40625,
      "learning_rate": 4.4173020657284306e-05,
      "loss": 0.9706,
      "step": 175380
    },
    {
      "epoch": 0.6146982234418754,
      "grad_norm": 2.984375,
      "learning_rate": 4.417237162862061e-05,
      "loss": 0.9354,
      "step": 175390
    },
    {
      "epoch": 0.6147332709487711,
      "grad_norm": 2.46875,
      "learning_rate": 4.41717225999569e-05,
      "loss": 0.855,
      "step": 175400
    },
    {
      "epoch": 0.6147683184556667,
      "grad_norm": 3.125,
      "learning_rate": 4.4171073571293203e-05,
      "loss": 0.9148,
      "step": 175410
    },
    {
      "epoch": 0.6148033659625622,
      "grad_norm": 3.578125,
      "learning_rate": 4.4170424542629505e-05,
      "loss": 0.9085,
      "step": 175420
    },
    {
      "epoch": 0.6148384134694579,
      "grad_norm": 2.796875,
      "learning_rate": 4.41697755139658e-05,
      "loss": 0.9641,
      "step": 175430
    },
    {
      "epoch": 0.6148734609763534,
      "grad_norm": 3.0625,
      "learning_rate": 4.41691264853021e-05,
      "loss": 0.9371,
      "step": 175440
    },
    {
      "epoch": 0.6149085084832491,
      "grad_norm": 2.765625,
      "learning_rate": 4.4168477456638395e-05,
      "loss": 0.9084,
      "step": 175450
    },
    {
      "epoch": 0.6149435559901446,
      "grad_norm": 2.8125,
      "learning_rate": 4.41678284279747e-05,
      "loss": 0.9983,
      "step": 175460
    },
    {
      "epoch": 0.6149786034970403,
      "grad_norm": 2.953125,
      "learning_rate": 4.416717939931099e-05,
      "loss": 0.9319,
      "step": 175470
    },
    {
      "epoch": 0.6150136510039358,
      "grad_norm": 3.21875,
      "learning_rate": 4.416653037064729e-05,
      "loss": 0.985,
      "step": 175480
    },
    {
      "epoch": 0.6150486985108314,
      "grad_norm": 4.0,
      "learning_rate": 4.416588134198359e-05,
      "loss": 0.9487,
      "step": 175490
    },
    {
      "epoch": 0.6150837460177271,
      "grad_norm": 3.34375,
      "learning_rate": 4.416523231331989e-05,
      "loss": 0.998,
      "step": 175500
    },
    {
      "epoch": 0.6151187935246226,
      "grad_norm": 3.0625,
      "learning_rate": 4.4164583284656183e-05,
      "loss": 0.8983,
      "step": 175510
    },
    {
      "epoch": 0.6151538410315183,
      "grad_norm": 2.6875,
      "learning_rate": 4.4163934255992485e-05,
      "loss": 0.8645,
      "step": 175520
    },
    {
      "epoch": 0.6151888885384138,
      "grad_norm": 2.9375,
      "learning_rate": 4.416328522732878e-05,
      "loss": 0.9582,
      "step": 175530
    },
    {
      "epoch": 0.6152239360453094,
      "grad_norm": 3.265625,
      "learning_rate": 4.416263619866508e-05,
      "loss": 1.0007,
      "step": 175540
    },
    {
      "epoch": 0.615258983552205,
      "grad_norm": 2.625,
      "learning_rate": 4.4161987170001375e-05,
      "loss": 0.9411,
      "step": 175550
    },
    {
      "epoch": 0.6152940310591006,
      "grad_norm": 2.984375,
      "learning_rate": 4.416133814133768e-05,
      "loss": 0.9184,
      "step": 175560
    },
    {
      "epoch": 0.6153290785659962,
      "grad_norm": 2.71875,
      "learning_rate": 4.416068911267398e-05,
      "loss": 0.9788,
      "step": 175570
    },
    {
      "epoch": 0.6153641260728918,
      "grad_norm": 2.640625,
      "learning_rate": 4.416004008401027e-05,
      "loss": 0.8818,
      "step": 175580
    },
    {
      "epoch": 0.6153991735797874,
      "grad_norm": 3.921875,
      "learning_rate": 4.4159391055346574e-05,
      "loss": 0.955,
      "step": 175590
    },
    {
      "epoch": 0.615434221086683,
      "grad_norm": 2.359375,
      "learning_rate": 4.415874202668287e-05,
      "loss": 0.8958,
      "step": 175600
    },
    {
      "epoch": 0.6154692685935786,
      "grad_norm": 2.6875,
      "learning_rate": 4.415809299801917e-05,
      "loss": 0.8814,
      "step": 175610
    },
    {
      "epoch": 0.6155043161004742,
      "grad_norm": 2.90625,
      "learning_rate": 4.4157443969355465e-05,
      "loss": 0.9358,
      "step": 175620
    },
    {
      "epoch": 0.6155393636073698,
      "grad_norm": 3.34375,
      "learning_rate": 4.4156794940691766e-05,
      "loss": 0.8901,
      "step": 175630
    },
    {
      "epoch": 0.6155744111142654,
      "grad_norm": 3.5,
      "learning_rate": 4.415614591202806e-05,
      "loss": 0.8636,
      "step": 175640
    },
    {
      "epoch": 0.615609458621161,
      "grad_norm": 2.859375,
      "learning_rate": 4.415549688336436e-05,
      "loss": 0.9254,
      "step": 175650
    },
    {
      "epoch": 0.6156445061280565,
      "grad_norm": 3.171875,
      "learning_rate": 4.415484785470066e-05,
      "loss": 0.9922,
      "step": 175660
    },
    {
      "epoch": 0.6156795536349522,
      "grad_norm": 3.171875,
      "learning_rate": 4.415419882603696e-05,
      "loss": 0.9381,
      "step": 175670
    },
    {
      "epoch": 0.6157146011418477,
      "grad_norm": 2.734375,
      "learning_rate": 4.415354979737325e-05,
      "loss": 0.9089,
      "step": 175680
    },
    {
      "epoch": 0.6157496486487434,
      "grad_norm": 3.359375,
      "learning_rate": 4.4152900768709554e-05,
      "loss": 0.8937,
      "step": 175690
    },
    {
      "epoch": 0.615784696155639,
      "grad_norm": 3.578125,
      "learning_rate": 4.415225174004585e-05,
      "loss": 0.8825,
      "step": 175700
    },
    {
      "epoch": 0.6158197436625346,
      "grad_norm": 2.796875,
      "learning_rate": 4.4151602711382143e-05,
      "loss": 0.854,
      "step": 175710
    },
    {
      "epoch": 0.6158547911694302,
      "grad_norm": 3.3125,
      "learning_rate": 4.4150953682718445e-05,
      "loss": 0.9166,
      "step": 175720
    },
    {
      "epoch": 0.6158898386763257,
      "grad_norm": 3.171875,
      "learning_rate": 4.415030465405474e-05,
      "loss": 0.8748,
      "step": 175730
    },
    {
      "epoch": 0.6159248861832214,
      "grad_norm": 2.890625,
      "learning_rate": 4.414965562539104e-05,
      "loss": 0.9737,
      "step": 175740
    },
    {
      "epoch": 0.6159599336901169,
      "grad_norm": 3.015625,
      "learning_rate": 4.4149006596727335e-05,
      "loss": 0.8217,
      "step": 175750
    },
    {
      "epoch": 0.6159949811970126,
      "grad_norm": 2.890625,
      "learning_rate": 4.414835756806364e-05,
      "loss": 0.9307,
      "step": 175760
    },
    {
      "epoch": 0.6160300287039081,
      "grad_norm": 2.828125,
      "learning_rate": 4.414770853939993e-05,
      "loss": 0.9004,
      "step": 175770
    },
    {
      "epoch": 0.6160650762108038,
      "grad_norm": 3.25,
      "learning_rate": 4.414705951073623e-05,
      "loss": 0.8957,
      "step": 175780
    },
    {
      "epoch": 0.6161001237176993,
      "grad_norm": 3.375,
      "learning_rate": 4.4146410482072534e-05,
      "loss": 0.8837,
      "step": 175790
    },
    {
      "epoch": 0.6161351712245949,
      "grad_norm": 3.0,
      "learning_rate": 4.414576145340883e-05,
      "loss": 0.864,
      "step": 175800
    },
    {
      "epoch": 0.6161702187314906,
      "grad_norm": 3.265625,
      "learning_rate": 4.414511242474513e-05,
      "loss": 0.9412,
      "step": 175810
    },
    {
      "epoch": 0.6162052662383861,
      "grad_norm": 3.015625,
      "learning_rate": 4.4144463396081425e-05,
      "loss": 0.943,
      "step": 175820
    },
    {
      "epoch": 0.6162403137452818,
      "grad_norm": 3.046875,
      "learning_rate": 4.4143814367417726e-05,
      "loss": 0.7961,
      "step": 175830
    },
    {
      "epoch": 0.6162753612521773,
      "grad_norm": 2.875,
      "learning_rate": 4.414316533875402e-05,
      "loss": 0.975,
      "step": 175840
    },
    {
      "epoch": 0.616310408759073,
      "grad_norm": 3.34375,
      "learning_rate": 4.414251631009032e-05,
      "loss": 0.9556,
      "step": 175850
    },
    {
      "epoch": 0.6163454562659685,
      "grad_norm": 3.03125,
      "learning_rate": 4.414186728142662e-05,
      "loss": 0.8612,
      "step": 175860
    },
    {
      "epoch": 0.6163805037728641,
      "grad_norm": 2.78125,
      "learning_rate": 4.414121825276292e-05,
      "loss": 0.9494,
      "step": 175870
    },
    {
      "epoch": 0.6164155512797597,
      "grad_norm": 2.984375,
      "learning_rate": 4.414056922409921e-05,
      "loss": 0.973,
      "step": 175880
    },
    {
      "epoch": 0.6164505987866553,
      "grad_norm": 2.78125,
      "learning_rate": 4.4139920195435514e-05,
      "loss": 0.9096,
      "step": 175890
    },
    {
      "epoch": 0.616485646293551,
      "grad_norm": 2.625,
      "learning_rate": 4.413927116677181e-05,
      "loss": 0.9679,
      "step": 175900
    },
    {
      "epoch": 0.6165206938004465,
      "grad_norm": 3.21875,
      "learning_rate": 4.413862213810811e-05,
      "loss": 0.8831,
      "step": 175910
    },
    {
      "epoch": 0.6165557413073421,
      "grad_norm": 3.21875,
      "learning_rate": 4.413797310944441e-05,
      "loss": 0.9468,
      "step": 175920
    },
    {
      "epoch": 0.6165907888142377,
      "grad_norm": 2.734375,
      "learning_rate": 4.4137324080780706e-05,
      "loss": 0.8968,
      "step": 175930
    },
    {
      "epoch": 0.6166258363211333,
      "grad_norm": 3.046875,
      "learning_rate": 4.413667505211701e-05,
      "loss": 0.8743,
      "step": 175940
    },
    {
      "epoch": 0.6166608838280289,
      "grad_norm": 2.71875,
      "learning_rate": 4.41360260234533e-05,
      "loss": 0.9172,
      "step": 175950
    },
    {
      "epoch": 0.6166959313349245,
      "grad_norm": 2.8125,
      "learning_rate": 4.4135376994789604e-05,
      "loss": 0.8576,
      "step": 175960
    },
    {
      "epoch": 0.61673097884182,
      "grad_norm": 3.109375,
      "learning_rate": 4.41347279661259e-05,
      "loss": 1.0464,
      "step": 175970
    },
    {
      "epoch": 0.6167660263487157,
      "grad_norm": 2.953125,
      "learning_rate": 4.41340789374622e-05,
      "loss": 0.9312,
      "step": 175980
    },
    {
      "epoch": 0.6168010738556112,
      "grad_norm": 3.5,
      "learning_rate": 4.4133429908798494e-05,
      "loss": 0.9563,
      "step": 175990
    },
    {
      "epoch": 0.6168361213625069,
      "grad_norm": 3.140625,
      "learning_rate": 4.4132780880134796e-05,
      "loss": 0.8586,
      "step": 176000
    },
    {
      "epoch": 0.6168711688694025,
      "grad_norm": 3.109375,
      "learning_rate": 4.413213185147109e-05,
      "loss": 0.9223,
      "step": 176010
    },
    {
      "epoch": 0.6169062163762981,
      "grad_norm": 2.96875,
      "learning_rate": 4.413148282280739e-05,
      "loss": 0.9709,
      "step": 176020
    },
    {
      "epoch": 0.6169412638831937,
      "grad_norm": 3.28125,
      "learning_rate": 4.4130833794143686e-05,
      "loss": 0.9884,
      "step": 176030
    },
    {
      "epoch": 0.6169763113900892,
      "grad_norm": 2.59375,
      "learning_rate": 4.413018476547999e-05,
      "loss": 0.863,
      "step": 176040
    },
    {
      "epoch": 0.6170113588969849,
      "grad_norm": 2.71875,
      "learning_rate": 4.412953573681628e-05,
      "loss": 0.9214,
      "step": 176050
    },
    {
      "epoch": 0.6170464064038804,
      "grad_norm": 2.9375,
      "learning_rate": 4.4128886708152584e-05,
      "loss": 0.9511,
      "step": 176060
    },
    {
      "epoch": 0.6170814539107761,
      "grad_norm": 3.046875,
      "learning_rate": 4.412823767948888e-05,
      "loss": 0.8702,
      "step": 176070
    },
    {
      "epoch": 0.6171165014176716,
      "grad_norm": 3.15625,
      "learning_rate": 4.412758865082517e-05,
      "loss": 0.973,
      "step": 176080
    },
    {
      "epoch": 0.6171515489245673,
      "grad_norm": 2.90625,
      "learning_rate": 4.4126939622161474e-05,
      "loss": 0.8242,
      "step": 176090
    },
    {
      "epoch": 0.6171865964314629,
      "grad_norm": 2.890625,
      "learning_rate": 4.412629059349777e-05,
      "loss": 0.8975,
      "step": 176100
    },
    {
      "epoch": 0.6172216439383584,
      "grad_norm": 2.765625,
      "learning_rate": 4.412564156483407e-05,
      "loss": 0.8362,
      "step": 176110
    },
    {
      "epoch": 0.6172566914452541,
      "grad_norm": 3.484375,
      "learning_rate": 4.4124992536170365e-05,
      "loss": 0.9021,
      "step": 176120
    },
    {
      "epoch": 0.6172917389521496,
      "grad_norm": 2.8125,
      "learning_rate": 4.4124343507506666e-05,
      "loss": 0.9141,
      "step": 176130
    },
    {
      "epoch": 0.6173267864590453,
      "grad_norm": 2.890625,
      "learning_rate": 4.412369447884296e-05,
      "loss": 0.8488,
      "step": 176140
    },
    {
      "epoch": 0.6173618339659408,
      "grad_norm": 3.46875,
      "learning_rate": 4.412304545017926e-05,
      "loss": 0.9327,
      "step": 176150
    },
    {
      "epoch": 0.6173968814728364,
      "grad_norm": 2.828125,
      "learning_rate": 4.4122396421515564e-05,
      "loss": 0.8584,
      "step": 176160
    },
    {
      "epoch": 0.617431928979732,
      "grad_norm": 3.421875,
      "learning_rate": 4.412174739285186e-05,
      "loss": 0.9593,
      "step": 176170
    },
    {
      "epoch": 0.6174669764866276,
      "grad_norm": 2.703125,
      "learning_rate": 4.412109836418816e-05,
      "loss": 0.9128,
      "step": 176180
    },
    {
      "epoch": 0.6175020239935233,
      "grad_norm": 2.546875,
      "learning_rate": 4.4120449335524454e-05,
      "loss": 0.9215,
      "step": 176190
    },
    {
      "epoch": 0.6175370715004188,
      "grad_norm": 2.734375,
      "learning_rate": 4.4119800306860756e-05,
      "loss": 0.9246,
      "step": 176200
    },
    {
      "epoch": 0.6175721190073145,
      "grad_norm": 3.1875,
      "learning_rate": 4.411915127819705e-05,
      "loss": 0.878,
      "step": 176210
    },
    {
      "epoch": 0.61760716651421,
      "grad_norm": 2.953125,
      "learning_rate": 4.411850224953335e-05,
      "loss": 0.949,
      "step": 176220
    },
    {
      "epoch": 0.6176422140211056,
      "grad_norm": 3.390625,
      "learning_rate": 4.4117853220869646e-05,
      "loss": 0.9587,
      "step": 176230
    },
    {
      "epoch": 0.6176772615280012,
      "grad_norm": 3.296875,
      "learning_rate": 4.411720419220595e-05,
      "loss": 0.9644,
      "step": 176240
    },
    {
      "epoch": 0.6177123090348968,
      "grad_norm": 3.0,
      "learning_rate": 4.411655516354224e-05,
      "loss": 0.8854,
      "step": 176250
    },
    {
      "epoch": 0.6177473565417924,
      "grad_norm": 2.890625,
      "learning_rate": 4.4115906134878544e-05,
      "loss": 0.9204,
      "step": 176260
    },
    {
      "epoch": 0.617782404048688,
      "grad_norm": 2.71875,
      "learning_rate": 4.411525710621484e-05,
      "loss": 0.9012,
      "step": 176270
    },
    {
      "epoch": 0.6178174515555835,
      "grad_norm": 2.796875,
      "learning_rate": 4.411460807755114e-05,
      "loss": 0.8743,
      "step": 176280
    },
    {
      "epoch": 0.6178524990624792,
      "grad_norm": 3.03125,
      "learning_rate": 4.411395904888744e-05,
      "loss": 0.9609,
      "step": 176290
    },
    {
      "epoch": 0.6178875465693748,
      "grad_norm": 2.703125,
      "learning_rate": 4.4113310020223736e-05,
      "loss": 0.875,
      "step": 176300
    },
    {
      "epoch": 0.6179225940762704,
      "grad_norm": 2.890625,
      "learning_rate": 4.411266099156004e-05,
      "loss": 0.913,
      "step": 176310
    },
    {
      "epoch": 0.617957641583166,
      "grad_norm": 3.21875,
      "learning_rate": 4.411201196289633e-05,
      "loss": 0.9446,
      "step": 176320
    },
    {
      "epoch": 0.6179926890900616,
      "grad_norm": 2.703125,
      "learning_rate": 4.411136293423263e-05,
      "loss": 0.9089,
      "step": 176330
    },
    {
      "epoch": 0.6180277365969572,
      "grad_norm": 2.609375,
      "learning_rate": 4.411071390556893e-05,
      "loss": 0.9307,
      "step": 176340
    },
    {
      "epoch": 0.6180627841038527,
      "grad_norm": 3.109375,
      "learning_rate": 4.411006487690523e-05,
      "loss": 0.9571,
      "step": 176350
    },
    {
      "epoch": 0.6180978316107484,
      "grad_norm": 3.71875,
      "learning_rate": 4.4109415848241524e-05,
      "loss": 1.0048,
      "step": 176360
    },
    {
      "epoch": 0.6181328791176439,
      "grad_norm": 3.265625,
      "learning_rate": 4.4108766819577825e-05,
      "loss": 0.9109,
      "step": 176370
    },
    {
      "epoch": 0.6181679266245396,
      "grad_norm": 3.078125,
      "learning_rate": 4.410811779091412e-05,
      "loss": 0.9295,
      "step": 176380
    },
    {
      "epoch": 0.6182029741314352,
      "grad_norm": 3.125,
      "learning_rate": 4.410746876225042e-05,
      "loss": 0.8651,
      "step": 176390
    },
    {
      "epoch": 0.6182380216383307,
      "grad_norm": 2.90625,
      "learning_rate": 4.4106819733586716e-05,
      "loss": 0.9514,
      "step": 176400
    },
    {
      "epoch": 0.6182730691452264,
      "grad_norm": 2.921875,
      "learning_rate": 4.410617070492302e-05,
      "loss": 0.9176,
      "step": 176410
    },
    {
      "epoch": 0.6183081166521219,
      "grad_norm": 2.984375,
      "learning_rate": 4.410552167625931e-05,
      "loss": 0.9557,
      "step": 176420
    },
    {
      "epoch": 0.6183431641590176,
      "grad_norm": 3.109375,
      "learning_rate": 4.410487264759561e-05,
      "loss": 0.8968,
      "step": 176430
    },
    {
      "epoch": 0.6183782116659131,
      "grad_norm": 2.921875,
      "learning_rate": 4.4104223618931914e-05,
      "loss": 0.9963,
      "step": 176440
    },
    {
      "epoch": 0.6184132591728088,
      "grad_norm": 3.09375,
      "learning_rate": 4.41035745902682e-05,
      "loss": 0.9302,
      "step": 176450
    },
    {
      "epoch": 0.6184483066797043,
      "grad_norm": 3.203125,
      "learning_rate": 4.4102925561604504e-05,
      "loss": 0.9901,
      "step": 176460
    },
    {
      "epoch": 0.6184833541865999,
      "grad_norm": 2.921875,
      "learning_rate": 4.41022765329408e-05,
      "loss": 0.9904,
      "step": 176470
    },
    {
      "epoch": 0.6185184016934955,
      "grad_norm": 2.71875,
      "learning_rate": 4.41016275042771e-05,
      "loss": 0.9433,
      "step": 176480
    },
    {
      "epoch": 0.6185534492003911,
      "grad_norm": 2.4375,
      "learning_rate": 4.4100978475613394e-05,
      "loss": 0.8491,
      "step": 176490
    },
    {
      "epoch": 0.6185884967072868,
      "grad_norm": 2.734375,
      "learning_rate": 4.4100329446949696e-05,
      "loss": 0.9001,
      "step": 176500
    },
    {
      "epoch": 0.6186235442141823,
      "grad_norm": 2.828125,
      "learning_rate": 4.409968041828599e-05,
      "loss": 0.8965,
      "step": 176510
    },
    {
      "epoch": 0.618658591721078,
      "grad_norm": 2.796875,
      "learning_rate": 4.409903138962229e-05,
      "loss": 0.8332,
      "step": 176520
    },
    {
      "epoch": 0.6186936392279735,
      "grad_norm": 3.484375,
      "learning_rate": 4.409838236095859e-05,
      "loss": 1.0166,
      "step": 176530
    },
    {
      "epoch": 0.6187286867348691,
      "grad_norm": 2.734375,
      "learning_rate": 4.409773333229489e-05,
      "loss": 0.9435,
      "step": 176540
    },
    {
      "epoch": 0.6187637342417647,
      "grad_norm": 3.296875,
      "learning_rate": 4.409708430363119e-05,
      "loss": 0.8672,
      "step": 176550
    },
    {
      "epoch": 0.6187987817486603,
      "grad_norm": 3.125,
      "learning_rate": 4.4096435274967484e-05,
      "loss": 0.8779,
      "step": 176560
    },
    {
      "epoch": 0.6188338292555559,
      "grad_norm": 2.8125,
      "learning_rate": 4.4095786246303785e-05,
      "loss": 0.8882,
      "step": 176570
    },
    {
      "epoch": 0.6188688767624515,
      "grad_norm": 3.46875,
      "learning_rate": 4.409513721764008e-05,
      "loss": 0.9236,
      "step": 176580
    },
    {
      "epoch": 0.6189039242693471,
      "grad_norm": 2.953125,
      "learning_rate": 4.409448818897638e-05,
      "loss": 0.9167,
      "step": 176590
    },
    {
      "epoch": 0.6189389717762427,
      "grad_norm": 3.25,
      "learning_rate": 4.4093839160312676e-05,
      "loss": 0.853,
      "step": 176600
    },
    {
      "epoch": 0.6189740192831383,
      "grad_norm": 2.8125,
      "learning_rate": 4.409319013164898e-05,
      "loss": 0.9833,
      "step": 176610
    },
    {
      "epoch": 0.6190090667900339,
      "grad_norm": 2.84375,
      "learning_rate": 4.409254110298527e-05,
      "loss": 0.91,
      "step": 176620
    },
    {
      "epoch": 0.6190441142969295,
      "grad_norm": 2.546875,
      "learning_rate": 4.409189207432157e-05,
      "loss": 0.8418,
      "step": 176630
    },
    {
      "epoch": 0.619079161803825,
      "grad_norm": 3.65625,
      "learning_rate": 4.409124304565787e-05,
      "loss": 0.9651,
      "step": 176640
    },
    {
      "epoch": 0.6191142093107207,
      "grad_norm": 2.65625,
      "learning_rate": 4.409059401699417e-05,
      "loss": 0.8832,
      "step": 176650
    },
    {
      "epoch": 0.6191492568176162,
      "grad_norm": 3.15625,
      "learning_rate": 4.408994498833047e-05,
      "loss": 0.9334,
      "step": 176660
    },
    {
      "epoch": 0.6191843043245119,
      "grad_norm": 2.984375,
      "learning_rate": 4.4089295959666765e-05,
      "loss": 0.9514,
      "step": 176670
    },
    {
      "epoch": 0.6192193518314075,
      "grad_norm": 2.8125,
      "learning_rate": 4.4088646931003066e-05,
      "loss": 0.99,
      "step": 176680
    },
    {
      "epoch": 0.6192543993383031,
      "grad_norm": 3.40625,
      "learning_rate": 4.408799790233936e-05,
      "loss": 0.9688,
      "step": 176690
    },
    {
      "epoch": 0.6192894468451987,
      "grad_norm": 2.671875,
      "learning_rate": 4.408734887367566e-05,
      "loss": 0.9335,
      "step": 176700
    },
    {
      "epoch": 0.6193244943520942,
      "grad_norm": 3.125,
      "learning_rate": 4.408669984501196e-05,
      "loss": 0.9477,
      "step": 176710
    },
    {
      "epoch": 0.6193595418589899,
      "grad_norm": 2.875,
      "learning_rate": 4.408605081634826e-05,
      "loss": 0.9235,
      "step": 176720
    },
    {
      "epoch": 0.6193945893658854,
      "grad_norm": 3.125,
      "learning_rate": 4.408540178768455e-05,
      "loss": 0.9322,
      "step": 176730
    },
    {
      "epoch": 0.6194296368727811,
      "grad_norm": 2.796875,
      "learning_rate": 4.4084752759020854e-05,
      "loss": 0.871,
      "step": 176740
    },
    {
      "epoch": 0.6194646843796766,
      "grad_norm": 3.125,
      "learning_rate": 4.408410373035715e-05,
      "loss": 0.9808,
      "step": 176750
    },
    {
      "epoch": 0.6194997318865723,
      "grad_norm": 3.0625,
      "learning_rate": 4.408345470169345e-05,
      "loss": 0.8947,
      "step": 176760
    },
    {
      "epoch": 0.6195347793934678,
      "grad_norm": 3.265625,
      "learning_rate": 4.4082805673029745e-05,
      "loss": 0.8743,
      "step": 176770
    },
    {
      "epoch": 0.6195698269003634,
      "grad_norm": 3.0,
      "learning_rate": 4.4082156644366046e-05,
      "loss": 0.8928,
      "step": 176780
    },
    {
      "epoch": 0.6196048744072591,
      "grad_norm": 3.546875,
      "learning_rate": 4.408150761570234e-05,
      "loss": 0.8925,
      "step": 176790
    },
    {
      "epoch": 0.6196399219141546,
      "grad_norm": 2.34375,
      "learning_rate": 4.408085858703864e-05,
      "loss": 0.8782,
      "step": 176800
    },
    {
      "epoch": 0.6196749694210503,
      "grad_norm": 3.109375,
      "learning_rate": 4.4080209558374944e-05,
      "loss": 0.9276,
      "step": 176810
    },
    {
      "epoch": 0.6197100169279458,
      "grad_norm": 3.21875,
      "learning_rate": 4.407956052971123e-05,
      "loss": 0.9241,
      "step": 176820
    },
    {
      "epoch": 0.6197450644348415,
      "grad_norm": 2.578125,
      "learning_rate": 4.407891150104753e-05,
      "loss": 0.9501,
      "step": 176830
    },
    {
      "epoch": 0.619780111941737,
      "grad_norm": 2.828125,
      "learning_rate": 4.407826247238383e-05,
      "loss": 0.9154,
      "step": 176840
    },
    {
      "epoch": 0.6198151594486326,
      "grad_norm": 2.765625,
      "learning_rate": 4.407761344372013e-05,
      "loss": 0.9246,
      "step": 176850
    },
    {
      "epoch": 0.6198502069555282,
      "grad_norm": 3.09375,
      "learning_rate": 4.4076964415056424e-05,
      "loss": 0.9269,
      "step": 176860
    },
    {
      "epoch": 0.6198852544624238,
      "grad_norm": 3.015625,
      "learning_rate": 4.4076315386392725e-05,
      "loss": 0.9687,
      "step": 176870
    },
    {
      "epoch": 0.6199203019693195,
      "grad_norm": 3.015625,
      "learning_rate": 4.4075666357729026e-05,
      "loss": 0.9182,
      "step": 176880
    },
    {
      "epoch": 0.619955349476215,
      "grad_norm": 3.40625,
      "learning_rate": 4.407501732906532e-05,
      "loss": 0.8723,
      "step": 176890
    },
    {
      "epoch": 0.6199903969831106,
      "grad_norm": 2.796875,
      "learning_rate": 4.407436830040162e-05,
      "loss": 0.8817,
      "step": 176900
    },
    {
      "epoch": 0.6200254444900062,
      "grad_norm": 3.046875,
      "learning_rate": 4.407371927173792e-05,
      "loss": 0.9285,
      "step": 176910
    },
    {
      "epoch": 0.6200604919969018,
      "grad_norm": 3.109375,
      "learning_rate": 4.407307024307422e-05,
      "loss": 0.9714,
      "step": 176920
    },
    {
      "epoch": 0.6200955395037974,
      "grad_norm": 3.375,
      "learning_rate": 4.407242121441051e-05,
      "loss": 0.8538,
      "step": 176930
    },
    {
      "epoch": 0.620130587010693,
      "grad_norm": 4.03125,
      "learning_rate": 4.4071772185746814e-05,
      "loss": 1.0005,
      "step": 176940
    },
    {
      "epoch": 0.6201656345175885,
      "grad_norm": 3.125,
      "learning_rate": 4.407112315708311e-05,
      "loss": 0.9339,
      "step": 176950
    },
    {
      "epoch": 0.6202006820244842,
      "grad_norm": 3.09375,
      "learning_rate": 4.407047412841941e-05,
      "loss": 0.9315,
      "step": 176960
    },
    {
      "epoch": 0.6202357295313797,
      "grad_norm": 3.171875,
      "learning_rate": 4.4069825099755705e-05,
      "loss": 0.8534,
      "step": 176970
    },
    {
      "epoch": 0.6202707770382754,
      "grad_norm": 2.46875,
      "learning_rate": 4.4069176071092006e-05,
      "loss": 0.9763,
      "step": 176980
    },
    {
      "epoch": 0.620305824545171,
      "grad_norm": 2.984375,
      "learning_rate": 4.40685270424283e-05,
      "loss": 0.9048,
      "step": 176990
    },
    {
      "epoch": 0.6203408720520666,
      "grad_norm": 2.9375,
      "learning_rate": 4.40678780137646e-05,
      "loss": 0.9271,
      "step": 177000
    },
    {
      "epoch": 0.6203759195589622,
      "grad_norm": 3.265625,
      "learning_rate": 4.40672289851009e-05,
      "loss": 0.8789,
      "step": 177010
    },
    {
      "epoch": 0.6204109670658577,
      "grad_norm": 3.078125,
      "learning_rate": 4.40665799564372e-05,
      "loss": 0.8677,
      "step": 177020
    },
    {
      "epoch": 0.6204460145727534,
      "grad_norm": 2.703125,
      "learning_rate": 4.40659309277735e-05,
      "loss": 0.8657,
      "step": 177030
    },
    {
      "epoch": 0.6204810620796489,
      "grad_norm": 3.15625,
      "learning_rate": 4.4065281899109794e-05,
      "loss": 0.8696,
      "step": 177040
    },
    {
      "epoch": 0.6205161095865446,
      "grad_norm": 2.921875,
      "learning_rate": 4.4064632870446096e-05,
      "loss": 0.8287,
      "step": 177050
    },
    {
      "epoch": 0.6205511570934401,
      "grad_norm": 2.6875,
      "learning_rate": 4.406398384178239e-05,
      "loss": 0.8989,
      "step": 177060
    },
    {
      "epoch": 0.6205862046003358,
      "grad_norm": 3.09375,
      "learning_rate": 4.406333481311869e-05,
      "loss": 0.9327,
      "step": 177070
    },
    {
      "epoch": 0.6206212521072314,
      "grad_norm": 2.8125,
      "learning_rate": 4.4062685784454986e-05,
      "loss": 0.8613,
      "step": 177080
    },
    {
      "epoch": 0.6206562996141269,
      "grad_norm": 2.96875,
      "learning_rate": 4.406203675579129e-05,
      "loss": 0.9541,
      "step": 177090
    },
    {
      "epoch": 0.6206913471210226,
      "grad_norm": 2.859375,
      "learning_rate": 4.406138772712758e-05,
      "loss": 0.9224,
      "step": 177100
    },
    {
      "epoch": 0.6207263946279181,
      "grad_norm": 2.515625,
      "learning_rate": 4.4060738698463884e-05,
      "loss": 0.9092,
      "step": 177110
    },
    {
      "epoch": 0.6207614421348138,
      "grad_norm": 3.03125,
      "learning_rate": 4.406008966980018e-05,
      "loss": 0.975,
      "step": 177120
    },
    {
      "epoch": 0.6207964896417093,
      "grad_norm": 3.5,
      "learning_rate": 4.405944064113648e-05,
      "loss": 0.9199,
      "step": 177130
    },
    {
      "epoch": 0.620831537148605,
      "grad_norm": 3.015625,
      "learning_rate": 4.4058791612472774e-05,
      "loss": 0.864,
      "step": 177140
    },
    {
      "epoch": 0.6208665846555005,
      "grad_norm": 2.640625,
      "learning_rate": 4.4058142583809076e-05,
      "loss": 0.9665,
      "step": 177150
    },
    {
      "epoch": 0.6209016321623961,
      "grad_norm": 3.078125,
      "learning_rate": 4.405749355514538e-05,
      "loss": 0.8696,
      "step": 177160
    },
    {
      "epoch": 0.6209366796692918,
      "grad_norm": 3.296875,
      "learning_rate": 4.405684452648167e-05,
      "loss": 0.9704,
      "step": 177170
    },
    {
      "epoch": 0.6209717271761873,
      "grad_norm": 3.359375,
      "learning_rate": 4.405619549781797e-05,
      "loss": 1.015,
      "step": 177180
    },
    {
      "epoch": 0.621006774683083,
      "grad_norm": 3.28125,
      "learning_rate": 4.405554646915427e-05,
      "loss": 0.9738,
      "step": 177190
    },
    {
      "epoch": 0.6210418221899785,
      "grad_norm": 3.03125,
      "learning_rate": 4.405489744049056e-05,
      "loss": 0.9211,
      "step": 177200
    },
    {
      "epoch": 0.6210768696968741,
      "grad_norm": 3.03125,
      "learning_rate": 4.405424841182686e-05,
      "loss": 0.9949,
      "step": 177210
    },
    {
      "epoch": 0.6211119172037697,
      "grad_norm": 3.09375,
      "learning_rate": 4.405359938316316e-05,
      "loss": 0.9374,
      "step": 177220
    },
    {
      "epoch": 0.6211469647106653,
      "grad_norm": 3.34375,
      "learning_rate": 4.405295035449945e-05,
      "loss": 1.0036,
      "step": 177230
    },
    {
      "epoch": 0.6211820122175609,
      "grad_norm": 3.0625,
      "learning_rate": 4.4052301325835754e-05,
      "loss": 0.8938,
      "step": 177240
    },
    {
      "epoch": 0.6212170597244565,
      "grad_norm": 2.59375,
      "learning_rate": 4.4051652297172056e-05,
      "loss": 0.9072,
      "step": 177250
    },
    {
      "epoch": 0.621252107231352,
      "grad_norm": 3.28125,
      "learning_rate": 4.405100326850835e-05,
      "loss": 0.8912,
      "step": 177260
    },
    {
      "epoch": 0.6212871547382477,
      "grad_norm": 2.734375,
      "learning_rate": 4.405035423984465e-05,
      "loss": 0.918,
      "step": 177270
    },
    {
      "epoch": 0.6213222022451433,
      "grad_norm": 3.0625,
      "learning_rate": 4.4049705211180946e-05,
      "loss": 0.8737,
      "step": 177280
    },
    {
      "epoch": 0.6213572497520389,
      "grad_norm": 2.875,
      "learning_rate": 4.404905618251725e-05,
      "loss": 0.8869,
      "step": 177290
    },
    {
      "epoch": 0.6213922972589345,
      "grad_norm": 6.625,
      "learning_rate": 4.404840715385354e-05,
      "loss": 0.9047,
      "step": 177300
    },
    {
      "epoch": 0.6214273447658301,
      "grad_norm": 3.21875,
      "learning_rate": 4.4047758125189844e-05,
      "loss": 0.914,
      "step": 177310
    },
    {
      "epoch": 0.6214623922727257,
      "grad_norm": 2.859375,
      "learning_rate": 4.404710909652614e-05,
      "loss": 0.9137,
      "step": 177320
    },
    {
      "epoch": 0.6214974397796212,
      "grad_norm": 3.0,
      "learning_rate": 4.404646006786244e-05,
      "loss": 0.9166,
      "step": 177330
    },
    {
      "epoch": 0.6215324872865169,
      "grad_norm": 2.75,
      "learning_rate": 4.4045811039198734e-05,
      "loss": 0.9638,
      "step": 177340
    },
    {
      "epoch": 0.6215675347934124,
      "grad_norm": 3.296875,
      "learning_rate": 4.4045162010535036e-05,
      "loss": 0.9596,
      "step": 177350
    },
    {
      "epoch": 0.6216025823003081,
      "grad_norm": 2.609375,
      "learning_rate": 4.404451298187133e-05,
      "loss": 0.8127,
      "step": 177360
    },
    {
      "epoch": 0.6216376298072037,
      "grad_norm": 3.3125,
      "learning_rate": 4.404386395320763e-05,
      "loss": 0.9259,
      "step": 177370
    },
    {
      "epoch": 0.6216726773140993,
      "grad_norm": 3.234375,
      "learning_rate": 4.4043214924543926e-05,
      "loss": 0.8702,
      "step": 177380
    },
    {
      "epoch": 0.6217077248209949,
      "grad_norm": 2.953125,
      "learning_rate": 4.404256589588023e-05,
      "loss": 0.9687,
      "step": 177390
    },
    {
      "epoch": 0.6217427723278904,
      "grad_norm": 2.84375,
      "learning_rate": 4.404191686721653e-05,
      "loss": 0.904,
      "step": 177400
    },
    {
      "epoch": 0.6217778198347861,
      "grad_norm": 3.125,
      "learning_rate": 4.4041267838552824e-05,
      "loss": 0.9399,
      "step": 177410
    },
    {
      "epoch": 0.6218128673416816,
      "grad_norm": 2.734375,
      "learning_rate": 4.4040618809889125e-05,
      "loss": 0.904,
      "step": 177420
    },
    {
      "epoch": 0.6218479148485773,
      "grad_norm": 3.03125,
      "learning_rate": 4.403996978122542e-05,
      "loss": 0.9303,
      "step": 177430
    },
    {
      "epoch": 0.6218829623554728,
      "grad_norm": 2.96875,
      "learning_rate": 4.403932075256172e-05,
      "loss": 0.9604,
      "step": 177440
    },
    {
      "epoch": 0.6219180098623684,
      "grad_norm": 3.140625,
      "learning_rate": 4.4038671723898016e-05,
      "loss": 0.9303,
      "step": 177450
    },
    {
      "epoch": 0.621953057369264,
      "grad_norm": 3.5625,
      "learning_rate": 4.403802269523432e-05,
      "loss": 1.0557,
      "step": 177460
    },
    {
      "epoch": 0.6219881048761596,
      "grad_norm": 2.625,
      "learning_rate": 4.403737366657061e-05,
      "loss": 0.929,
      "step": 177470
    },
    {
      "epoch": 0.6220231523830553,
      "grad_norm": 2.859375,
      "learning_rate": 4.403672463790691e-05,
      "loss": 0.8954,
      "step": 177480
    },
    {
      "epoch": 0.6220581998899508,
      "grad_norm": 2.640625,
      "learning_rate": 4.403607560924321e-05,
      "loss": 0.8498,
      "step": 177490
    },
    {
      "epoch": 0.6220932473968465,
      "grad_norm": 3.03125,
      "learning_rate": 4.403542658057951e-05,
      "loss": 0.9709,
      "step": 177500
    },
    {
      "epoch": 0.622128294903742,
      "grad_norm": 2.75,
      "learning_rate": 4.4034777551915804e-05,
      "loss": 0.9497,
      "step": 177510
    },
    {
      "epoch": 0.6221633424106376,
      "grad_norm": 3.671875,
      "learning_rate": 4.4034128523252105e-05,
      "loss": 0.9023,
      "step": 177520
    },
    {
      "epoch": 0.6221983899175332,
      "grad_norm": 2.734375,
      "learning_rate": 4.4033479494588407e-05,
      "loss": 0.8902,
      "step": 177530
    },
    {
      "epoch": 0.6222334374244288,
      "grad_norm": 2.828125,
      "learning_rate": 4.40328304659247e-05,
      "loss": 0.8759,
      "step": 177540
    },
    {
      "epoch": 0.6222684849313244,
      "grad_norm": 2.890625,
      "learning_rate": 4.4032181437261e-05,
      "loss": 0.8709,
      "step": 177550
    },
    {
      "epoch": 0.62230353243822,
      "grad_norm": 2.765625,
      "learning_rate": 4.40315324085973e-05,
      "loss": 0.918,
      "step": 177560
    },
    {
      "epoch": 0.6223385799451157,
      "grad_norm": 2.828125,
      "learning_rate": 4.40308833799336e-05,
      "loss": 0.8855,
      "step": 177570
    },
    {
      "epoch": 0.6223736274520112,
      "grad_norm": 3.28125,
      "learning_rate": 4.4030234351269886e-05,
      "loss": 0.9438,
      "step": 177580
    },
    {
      "epoch": 0.6224086749589068,
      "grad_norm": 3.171875,
      "learning_rate": 4.402958532260619e-05,
      "loss": 0.9621,
      "step": 177590
    },
    {
      "epoch": 0.6224437224658024,
      "grad_norm": 3.15625,
      "learning_rate": 4.402893629394248e-05,
      "loss": 0.9497,
      "step": 177600
    },
    {
      "epoch": 0.622478769972698,
      "grad_norm": 2.8125,
      "learning_rate": 4.4028287265278784e-05,
      "loss": 0.9302,
      "step": 177610
    },
    {
      "epoch": 0.6225138174795936,
      "grad_norm": 3.28125,
      "learning_rate": 4.4027638236615085e-05,
      "loss": 0.9407,
      "step": 177620
    },
    {
      "epoch": 0.6225488649864892,
      "grad_norm": 3.203125,
      "learning_rate": 4.402698920795138e-05,
      "loss": 0.9363,
      "step": 177630
    },
    {
      "epoch": 0.6225839124933847,
      "grad_norm": 2.640625,
      "learning_rate": 4.402634017928768e-05,
      "loss": 0.943,
      "step": 177640
    },
    {
      "epoch": 0.6226189600002804,
      "grad_norm": 2.984375,
      "learning_rate": 4.4025691150623976e-05,
      "loss": 0.9305,
      "step": 177650
    },
    {
      "epoch": 0.6226540075071759,
      "grad_norm": 3.0625,
      "learning_rate": 4.402504212196028e-05,
      "loss": 0.9207,
      "step": 177660
    },
    {
      "epoch": 0.6226890550140716,
      "grad_norm": 2.84375,
      "learning_rate": 4.402439309329657e-05,
      "loss": 0.9368,
      "step": 177670
    },
    {
      "epoch": 0.6227241025209672,
      "grad_norm": 2.90625,
      "learning_rate": 4.402374406463287e-05,
      "loss": 0.9043,
      "step": 177680
    },
    {
      "epoch": 0.6227591500278628,
      "grad_norm": 3.0,
      "learning_rate": 4.402309503596917e-05,
      "loss": 0.8843,
      "step": 177690
    },
    {
      "epoch": 0.6227941975347584,
      "grad_norm": 2.796875,
      "learning_rate": 4.402244600730547e-05,
      "loss": 0.9227,
      "step": 177700
    },
    {
      "epoch": 0.6228292450416539,
      "grad_norm": 2.984375,
      "learning_rate": 4.4021796978641764e-05,
      "loss": 0.9298,
      "step": 177710
    },
    {
      "epoch": 0.6228642925485496,
      "grad_norm": 2.90625,
      "learning_rate": 4.4021147949978065e-05,
      "loss": 0.8855,
      "step": 177720
    },
    {
      "epoch": 0.6228993400554451,
      "grad_norm": 2.703125,
      "learning_rate": 4.402049892131436e-05,
      "loss": 0.9088,
      "step": 177730
    },
    {
      "epoch": 0.6229343875623408,
      "grad_norm": 2.953125,
      "learning_rate": 4.401984989265066e-05,
      "loss": 0.926,
      "step": 177740
    },
    {
      "epoch": 0.6229694350692363,
      "grad_norm": 2.9375,
      "learning_rate": 4.4019200863986956e-05,
      "loss": 0.8967,
      "step": 177750
    },
    {
      "epoch": 0.623004482576132,
      "grad_norm": 4.125,
      "learning_rate": 4.401855183532326e-05,
      "loss": 0.9211,
      "step": 177760
    },
    {
      "epoch": 0.6230395300830276,
      "grad_norm": 2.953125,
      "learning_rate": 4.401790280665956e-05,
      "loss": 0.8231,
      "step": 177770
    },
    {
      "epoch": 0.6230745775899231,
      "grad_norm": 2.5625,
      "learning_rate": 4.401725377799585e-05,
      "loss": 0.8333,
      "step": 177780
    },
    {
      "epoch": 0.6231096250968188,
      "grad_norm": 3.03125,
      "learning_rate": 4.4016604749332155e-05,
      "loss": 0.9209,
      "step": 177790
    },
    {
      "epoch": 0.6231446726037143,
      "grad_norm": 2.734375,
      "learning_rate": 4.401595572066845e-05,
      "loss": 0.8528,
      "step": 177800
    },
    {
      "epoch": 0.62317972011061,
      "grad_norm": 2.453125,
      "learning_rate": 4.401530669200475e-05,
      "loss": 0.8618,
      "step": 177810
    },
    {
      "epoch": 0.6232147676175055,
      "grad_norm": 2.96875,
      "learning_rate": 4.4014657663341045e-05,
      "loss": 0.8394,
      "step": 177820
    },
    {
      "epoch": 0.6232498151244011,
      "grad_norm": 2.875,
      "learning_rate": 4.4014008634677347e-05,
      "loss": 0.9705,
      "step": 177830
    },
    {
      "epoch": 0.6232848626312967,
      "grad_norm": 2.796875,
      "learning_rate": 4.401335960601364e-05,
      "loss": 0.9727,
      "step": 177840
    },
    {
      "epoch": 0.6233199101381923,
      "grad_norm": 3.265625,
      "learning_rate": 4.401271057734994e-05,
      "loss": 0.9154,
      "step": 177850
    },
    {
      "epoch": 0.623354957645088,
      "grad_norm": 2.765625,
      "learning_rate": 4.401206154868624e-05,
      "loss": 0.9385,
      "step": 177860
    },
    {
      "epoch": 0.6233900051519835,
      "grad_norm": 3.0625,
      "learning_rate": 4.401141252002254e-05,
      "loss": 0.9017,
      "step": 177870
    },
    {
      "epoch": 0.6234250526588792,
      "grad_norm": 3.03125,
      "learning_rate": 4.401076349135883e-05,
      "loss": 0.9574,
      "step": 177880
    },
    {
      "epoch": 0.6234601001657747,
      "grad_norm": 2.765625,
      "learning_rate": 4.4010114462695135e-05,
      "loss": 0.9457,
      "step": 177890
    },
    {
      "epoch": 0.6234951476726703,
      "grad_norm": 2.875,
      "learning_rate": 4.4009465434031436e-05,
      "loss": 0.933,
      "step": 177900
    },
    {
      "epoch": 0.6235301951795659,
      "grad_norm": 3.046875,
      "learning_rate": 4.400881640536773e-05,
      "loss": 0.995,
      "step": 177910
    },
    {
      "epoch": 0.6235652426864615,
      "grad_norm": 2.734375,
      "learning_rate": 4.400816737670403e-05,
      "loss": 0.8923,
      "step": 177920
    },
    {
      "epoch": 0.623600290193357,
      "grad_norm": 3.109375,
      "learning_rate": 4.4007518348040327e-05,
      "loss": 0.9751,
      "step": 177930
    },
    {
      "epoch": 0.6236353377002527,
      "grad_norm": 2.734375,
      "learning_rate": 4.400686931937663e-05,
      "loss": 0.8802,
      "step": 177940
    },
    {
      "epoch": 0.6236703852071482,
      "grad_norm": 3.265625,
      "learning_rate": 4.4006220290712916e-05,
      "loss": 0.9233,
      "step": 177950
    },
    {
      "epoch": 0.6237054327140439,
      "grad_norm": 3.078125,
      "learning_rate": 4.400557126204922e-05,
      "loss": 0.9655,
      "step": 177960
    },
    {
      "epoch": 0.6237404802209395,
      "grad_norm": 2.953125,
      "learning_rate": 4.400492223338551e-05,
      "loss": 0.9086,
      "step": 177970
    },
    {
      "epoch": 0.6237755277278351,
      "grad_norm": 2.84375,
      "learning_rate": 4.400427320472181e-05,
      "loss": 0.8694,
      "step": 177980
    },
    {
      "epoch": 0.6238105752347307,
      "grad_norm": 2.859375,
      "learning_rate": 4.4003624176058115e-05,
      "loss": 0.8888,
      "step": 177990
    },
    {
      "epoch": 0.6238456227416262,
      "grad_norm": 3.34375,
      "learning_rate": 4.400297514739441e-05,
      "loss": 0.9627,
      "step": 178000
    },
    {
      "epoch": 0.6238806702485219,
      "grad_norm": 2.875,
      "learning_rate": 4.400232611873071e-05,
      "loss": 0.8948,
      "step": 178010
    },
    {
      "epoch": 0.6239157177554174,
      "grad_norm": 3.328125,
      "learning_rate": 4.4001677090067005e-05,
      "loss": 0.9657,
      "step": 178020
    },
    {
      "epoch": 0.6239507652623131,
      "grad_norm": 2.78125,
      "learning_rate": 4.4001028061403307e-05,
      "loss": 0.8541,
      "step": 178030
    },
    {
      "epoch": 0.6239858127692086,
      "grad_norm": 3.0625,
      "learning_rate": 4.40003790327396e-05,
      "loss": 0.9763,
      "step": 178040
    },
    {
      "epoch": 0.6240208602761043,
      "grad_norm": 3.171875,
      "learning_rate": 4.39997300040759e-05,
      "loss": 0.9125,
      "step": 178050
    },
    {
      "epoch": 0.6240559077829999,
      "grad_norm": 2.71875,
      "learning_rate": 4.39990809754122e-05,
      "loss": 0.8908,
      "step": 178060
    },
    {
      "epoch": 0.6240909552898954,
      "grad_norm": 2.578125,
      "learning_rate": 4.39984319467485e-05,
      "loss": 0.8905,
      "step": 178070
    },
    {
      "epoch": 0.6241260027967911,
      "grad_norm": 2.953125,
      "learning_rate": 4.399778291808479e-05,
      "loss": 0.9511,
      "step": 178080
    },
    {
      "epoch": 0.6241610503036866,
      "grad_norm": 3.0,
      "learning_rate": 4.3997133889421095e-05,
      "loss": 0.8893,
      "step": 178090
    },
    {
      "epoch": 0.6241960978105823,
      "grad_norm": 3.1875,
      "learning_rate": 4.399648486075739e-05,
      "loss": 0.9409,
      "step": 178100
    },
    {
      "epoch": 0.6242311453174778,
      "grad_norm": 2.765625,
      "learning_rate": 4.399583583209369e-05,
      "loss": 0.851,
      "step": 178110
    },
    {
      "epoch": 0.6242661928243735,
      "grad_norm": 3.796875,
      "learning_rate": 4.399518680342999e-05,
      "loss": 0.9212,
      "step": 178120
    },
    {
      "epoch": 0.624301240331269,
      "grad_norm": 3.125,
      "learning_rate": 4.3994537774766287e-05,
      "loss": 0.8771,
      "step": 178130
    },
    {
      "epoch": 0.6243362878381646,
      "grad_norm": 2.78125,
      "learning_rate": 4.399388874610259e-05,
      "loss": 0.8702,
      "step": 178140
    },
    {
      "epoch": 0.6243713353450602,
      "grad_norm": 3.09375,
      "learning_rate": 4.399323971743888e-05,
      "loss": 0.9726,
      "step": 178150
    },
    {
      "epoch": 0.6244063828519558,
      "grad_norm": 2.828125,
      "learning_rate": 4.3992590688775184e-05,
      "loss": 0.9011,
      "step": 178160
    },
    {
      "epoch": 0.6244414303588515,
      "grad_norm": 2.546875,
      "learning_rate": 4.399194166011148e-05,
      "loss": 0.9537,
      "step": 178170
    },
    {
      "epoch": 0.624476477865747,
      "grad_norm": 3.015625,
      "learning_rate": 4.399129263144778e-05,
      "loss": 0.8545,
      "step": 178180
    },
    {
      "epoch": 0.6245115253726427,
      "grad_norm": 2.828125,
      "learning_rate": 4.3990643602784075e-05,
      "loss": 0.9505,
      "step": 178190
    },
    {
      "epoch": 0.6245465728795382,
      "grad_norm": 3.3125,
      "learning_rate": 4.3989994574120376e-05,
      "loss": 0.9643,
      "step": 178200
    },
    {
      "epoch": 0.6245816203864338,
      "grad_norm": 2.921875,
      "learning_rate": 4.398934554545667e-05,
      "loss": 0.8984,
      "step": 178210
    },
    {
      "epoch": 0.6246166678933294,
      "grad_norm": 3.140625,
      "learning_rate": 4.398869651679297e-05,
      "loss": 0.9232,
      "step": 178220
    },
    {
      "epoch": 0.624651715400225,
      "grad_norm": 2.96875,
      "learning_rate": 4.3988047488129267e-05,
      "loss": 0.8263,
      "step": 178230
    },
    {
      "epoch": 0.6246867629071206,
      "grad_norm": 3.03125,
      "learning_rate": 4.398739845946557e-05,
      "loss": 0.9213,
      "step": 178240
    },
    {
      "epoch": 0.6247218104140162,
      "grad_norm": 2.75,
      "learning_rate": 4.398674943080186e-05,
      "loss": 0.7976,
      "step": 178250
    },
    {
      "epoch": 0.6247568579209118,
      "grad_norm": 2.96875,
      "learning_rate": 4.3986100402138164e-05,
      "loss": 0.8372,
      "step": 178260
    },
    {
      "epoch": 0.6247919054278074,
      "grad_norm": 3.140625,
      "learning_rate": 4.3985451373474465e-05,
      "loss": 0.8862,
      "step": 178270
    },
    {
      "epoch": 0.624826952934703,
      "grad_norm": 3.21875,
      "learning_rate": 4.398480234481076e-05,
      "loss": 0.9697,
      "step": 178280
    },
    {
      "epoch": 0.6248620004415986,
      "grad_norm": 2.9375,
      "learning_rate": 4.398415331614706e-05,
      "loss": 0.9475,
      "step": 178290
    },
    {
      "epoch": 0.6248970479484942,
      "grad_norm": 2.921875,
      "learning_rate": 4.3983504287483356e-05,
      "loss": 0.8498,
      "step": 178300
    },
    {
      "epoch": 0.6249320954553897,
      "grad_norm": 3.1875,
      "learning_rate": 4.398285525881966e-05,
      "loss": 0.9136,
      "step": 178310
    },
    {
      "epoch": 0.6249671429622854,
      "grad_norm": 2.828125,
      "learning_rate": 4.398220623015595e-05,
      "loss": 0.8843,
      "step": 178320
    },
    {
      "epoch": 0.6250021904691809,
      "grad_norm": 3.109375,
      "learning_rate": 4.3981557201492247e-05,
      "loss": 0.9721,
      "step": 178330
    },
    {
      "epoch": 0.6250372379760766,
      "grad_norm": 3.0,
      "learning_rate": 4.398090817282854e-05,
      "loss": 0.8952,
      "step": 178340
    },
    {
      "epoch": 0.6250722854829722,
      "grad_norm": 3.03125,
      "learning_rate": 4.398025914416484e-05,
      "loss": 0.8768,
      "step": 178350
    },
    {
      "epoch": 0.6251073329898678,
      "grad_norm": 2.84375,
      "learning_rate": 4.3979610115501144e-05,
      "loss": 1.0009,
      "step": 178360
    },
    {
      "epoch": 0.6251423804967634,
      "grad_norm": 3.234375,
      "learning_rate": 4.397896108683744e-05,
      "loss": 0.93,
      "step": 178370
    },
    {
      "epoch": 0.6251774280036589,
      "grad_norm": 2.90625,
      "learning_rate": 4.397831205817374e-05,
      "loss": 0.9604,
      "step": 178380
    },
    {
      "epoch": 0.6252124755105546,
      "grad_norm": 2.9375,
      "learning_rate": 4.3977663029510035e-05,
      "loss": 0.9202,
      "step": 178390
    },
    {
      "epoch": 0.6252475230174501,
      "grad_norm": 2.765625,
      "learning_rate": 4.3977014000846336e-05,
      "loss": 0.937,
      "step": 178400
    },
    {
      "epoch": 0.6252825705243458,
      "grad_norm": 3.046875,
      "learning_rate": 4.397636497218263e-05,
      "loss": 0.8983,
      "step": 178410
    },
    {
      "epoch": 0.6253176180312413,
      "grad_norm": 2.71875,
      "learning_rate": 4.397571594351893e-05,
      "loss": 0.8882,
      "step": 178420
    },
    {
      "epoch": 0.625352665538137,
      "grad_norm": 3.015625,
      "learning_rate": 4.3975066914855227e-05,
      "loss": 0.8996,
      "step": 178430
    },
    {
      "epoch": 0.6253877130450325,
      "grad_norm": 2.484375,
      "learning_rate": 4.397441788619153e-05,
      "loss": 0.9602,
      "step": 178440
    },
    {
      "epoch": 0.6254227605519281,
      "grad_norm": 3.109375,
      "learning_rate": 4.397376885752782e-05,
      "loss": 0.8635,
      "step": 178450
    },
    {
      "epoch": 0.6254578080588238,
      "grad_norm": 3.015625,
      "learning_rate": 4.3973119828864124e-05,
      "loss": 0.842,
      "step": 178460
    },
    {
      "epoch": 0.6254928555657193,
      "grad_norm": 3.359375,
      "learning_rate": 4.397247080020042e-05,
      "loss": 0.9691,
      "step": 178470
    },
    {
      "epoch": 0.625527903072615,
      "grad_norm": 3.203125,
      "learning_rate": 4.397182177153672e-05,
      "loss": 0.9761,
      "step": 178480
    },
    {
      "epoch": 0.6255629505795105,
      "grad_norm": 2.78125,
      "learning_rate": 4.397117274287302e-05,
      "loss": 0.9281,
      "step": 178490
    },
    {
      "epoch": 0.6255979980864061,
      "grad_norm": 2.796875,
      "learning_rate": 4.3970523714209316e-05,
      "loss": 0.8961,
      "step": 178500
    },
    {
      "epoch": 0.6256330455933017,
      "grad_norm": 2.921875,
      "learning_rate": 4.396987468554562e-05,
      "loss": 0.9199,
      "step": 178510
    },
    {
      "epoch": 0.6256680931001973,
      "grad_norm": 3.3125,
      "learning_rate": 4.396922565688191e-05,
      "loss": 0.9196,
      "step": 178520
    },
    {
      "epoch": 0.6257031406070929,
      "grad_norm": 2.53125,
      "learning_rate": 4.396857662821821e-05,
      "loss": 0.8798,
      "step": 178530
    },
    {
      "epoch": 0.6257381881139885,
      "grad_norm": 2.546875,
      "learning_rate": 4.396792759955451e-05,
      "loss": 1.0498,
      "step": 178540
    },
    {
      "epoch": 0.6257732356208842,
      "grad_norm": 3.046875,
      "learning_rate": 4.396727857089081e-05,
      "loss": 0.9572,
      "step": 178550
    },
    {
      "epoch": 0.6258082831277797,
      "grad_norm": 2.75,
      "learning_rate": 4.3966629542227104e-05,
      "loss": 0.9875,
      "step": 178560
    },
    {
      "epoch": 0.6258433306346753,
      "grad_norm": 2.5,
      "learning_rate": 4.3965980513563405e-05,
      "loss": 0.8646,
      "step": 178570
    },
    {
      "epoch": 0.6258783781415709,
      "grad_norm": 3.1875,
      "learning_rate": 4.39653314848997e-05,
      "loss": 0.9904,
      "step": 178580
    },
    {
      "epoch": 0.6259134256484665,
      "grad_norm": 3.203125,
      "learning_rate": 4.3964682456236e-05,
      "loss": 0.951,
      "step": 178590
    },
    {
      "epoch": 0.6259484731553621,
      "grad_norm": 3.125,
      "learning_rate": 4.3964033427572296e-05,
      "loss": 0.9382,
      "step": 178600
    },
    {
      "epoch": 0.6259835206622577,
      "grad_norm": 2.484375,
      "learning_rate": 4.39633843989086e-05,
      "loss": 0.9371,
      "step": 178610
    },
    {
      "epoch": 0.6260185681691532,
      "grad_norm": 2.78125,
      "learning_rate": 4.396273537024489e-05,
      "loss": 0.8902,
      "step": 178620
    },
    {
      "epoch": 0.6260536156760489,
      "grad_norm": 2.984375,
      "learning_rate": 4.396208634158119e-05,
      "loss": 0.9449,
      "step": 178630
    },
    {
      "epoch": 0.6260886631829444,
      "grad_norm": 3.015625,
      "learning_rate": 4.3961437312917495e-05,
      "loss": 0.9549,
      "step": 178640
    },
    {
      "epoch": 0.6261237106898401,
      "grad_norm": 3.234375,
      "learning_rate": 4.396078828425379e-05,
      "loss": 0.8202,
      "step": 178650
    },
    {
      "epoch": 0.6261587581967357,
      "grad_norm": 3.125,
      "learning_rate": 4.396013925559009e-05,
      "loss": 0.903,
      "step": 178660
    },
    {
      "epoch": 0.6261938057036313,
      "grad_norm": 2.9375,
      "learning_rate": 4.3959490226926385e-05,
      "loss": 0.9321,
      "step": 178670
    },
    {
      "epoch": 0.6262288532105269,
      "grad_norm": 3.09375,
      "learning_rate": 4.395884119826269e-05,
      "loss": 0.8468,
      "step": 178680
    },
    {
      "epoch": 0.6262639007174224,
      "grad_norm": 3.46875,
      "learning_rate": 4.395819216959898e-05,
      "loss": 0.9306,
      "step": 178690
    },
    {
      "epoch": 0.6262989482243181,
      "grad_norm": 3.09375,
      "learning_rate": 4.395754314093528e-05,
      "loss": 0.8154,
      "step": 178700
    },
    {
      "epoch": 0.6263339957312136,
      "grad_norm": 3.15625,
      "learning_rate": 4.395689411227157e-05,
      "loss": 0.8291,
      "step": 178710
    },
    {
      "epoch": 0.6263690432381093,
      "grad_norm": 3.046875,
      "learning_rate": 4.395624508360787e-05,
      "loss": 0.8341,
      "step": 178720
    },
    {
      "epoch": 0.6264040907450048,
      "grad_norm": 3.125,
      "learning_rate": 4.395559605494417e-05,
      "loss": 0.8877,
      "step": 178730
    },
    {
      "epoch": 0.6264391382519005,
      "grad_norm": 2.59375,
      "learning_rate": 4.395494702628047e-05,
      "loss": 0.8462,
      "step": 178740
    },
    {
      "epoch": 0.6264741857587961,
      "grad_norm": 2.9375,
      "learning_rate": 4.395429799761677e-05,
      "loss": 0.8739,
      "step": 178750
    },
    {
      "epoch": 0.6265092332656916,
      "grad_norm": 2.96875,
      "learning_rate": 4.3953648968953064e-05,
      "loss": 0.9595,
      "step": 178760
    },
    {
      "epoch": 0.6265442807725873,
      "grad_norm": 3.09375,
      "learning_rate": 4.3952999940289365e-05,
      "loss": 0.938,
      "step": 178770
    },
    {
      "epoch": 0.6265793282794828,
      "grad_norm": 2.734375,
      "learning_rate": 4.395235091162566e-05,
      "loss": 0.8673,
      "step": 178780
    },
    {
      "epoch": 0.6266143757863785,
      "grad_norm": 3.109375,
      "learning_rate": 4.395170188296196e-05,
      "loss": 0.8441,
      "step": 178790
    },
    {
      "epoch": 0.626649423293274,
      "grad_norm": 2.75,
      "learning_rate": 4.3951052854298256e-05,
      "loss": 0.9139,
      "step": 178800
    },
    {
      "epoch": 0.6266844708001696,
      "grad_norm": 3.0625,
      "learning_rate": 4.395040382563456e-05,
      "loss": 0.9543,
      "step": 178810
    },
    {
      "epoch": 0.6267195183070652,
      "grad_norm": 3.09375,
      "learning_rate": 4.394975479697085e-05,
      "loss": 0.9576,
      "step": 178820
    },
    {
      "epoch": 0.6267545658139608,
      "grad_norm": 3.09375,
      "learning_rate": 4.394910576830715e-05,
      "loss": 0.9217,
      "step": 178830
    },
    {
      "epoch": 0.6267896133208565,
      "grad_norm": 2.921875,
      "learning_rate": 4.394845673964345e-05,
      "loss": 0.8617,
      "step": 178840
    },
    {
      "epoch": 0.626824660827752,
      "grad_norm": 2.671875,
      "learning_rate": 4.394780771097975e-05,
      "loss": 0.8368,
      "step": 178850
    },
    {
      "epoch": 0.6268597083346477,
      "grad_norm": 3.0625,
      "learning_rate": 4.394715868231605e-05,
      "loss": 0.9207,
      "step": 178860
    },
    {
      "epoch": 0.6268947558415432,
      "grad_norm": 2.796875,
      "learning_rate": 4.3946509653652345e-05,
      "loss": 0.9641,
      "step": 178870
    },
    {
      "epoch": 0.6269298033484388,
      "grad_norm": 2.8125,
      "learning_rate": 4.394586062498865e-05,
      "loss": 0.9491,
      "step": 178880
    },
    {
      "epoch": 0.6269648508553344,
      "grad_norm": 2.75,
      "learning_rate": 4.394521159632494e-05,
      "loss": 0.908,
      "step": 178890
    },
    {
      "epoch": 0.62699989836223,
      "grad_norm": 3.015625,
      "learning_rate": 4.394456256766124e-05,
      "loss": 0.8985,
      "step": 178900
    },
    {
      "epoch": 0.6270349458691256,
      "grad_norm": 2.6875,
      "learning_rate": 4.394391353899754e-05,
      "loss": 0.9609,
      "step": 178910
    },
    {
      "epoch": 0.6270699933760212,
      "grad_norm": 2.609375,
      "learning_rate": 4.394326451033384e-05,
      "loss": 0.9187,
      "step": 178920
    },
    {
      "epoch": 0.6271050408829167,
      "grad_norm": 3.515625,
      "learning_rate": 4.394261548167013e-05,
      "loss": 0.9372,
      "step": 178930
    },
    {
      "epoch": 0.6271400883898124,
      "grad_norm": 2.890625,
      "learning_rate": 4.3941966453006435e-05,
      "loss": 0.9337,
      "step": 178940
    },
    {
      "epoch": 0.627175135896708,
      "grad_norm": 3.078125,
      "learning_rate": 4.394131742434273e-05,
      "loss": 0.9266,
      "step": 178950
    },
    {
      "epoch": 0.6272101834036036,
      "grad_norm": 3.375,
      "learning_rate": 4.394066839567903e-05,
      "loss": 0.9691,
      "step": 178960
    },
    {
      "epoch": 0.6272452309104992,
      "grad_norm": 2.859375,
      "learning_rate": 4.3940019367015325e-05,
      "loss": 0.9609,
      "step": 178970
    },
    {
      "epoch": 0.6272802784173948,
      "grad_norm": 2.8125,
      "learning_rate": 4.393937033835163e-05,
      "loss": 0.8392,
      "step": 178980
    },
    {
      "epoch": 0.6273153259242904,
      "grad_norm": 3.234375,
      "learning_rate": 4.393872130968792e-05,
      "loss": 0.8718,
      "step": 178990
    },
    {
      "epoch": 0.6273503734311859,
      "grad_norm": 3.21875,
      "learning_rate": 4.393807228102422e-05,
      "loss": 0.9532,
      "step": 179000
    },
    {
      "epoch": 0.6273854209380816,
      "grad_norm": 2.421875,
      "learning_rate": 4.3937423252360524e-05,
      "loss": 0.9627,
      "step": 179010
    },
    {
      "epoch": 0.6274204684449771,
      "grad_norm": 2.953125,
      "learning_rate": 4.393677422369682e-05,
      "loss": 0.908,
      "step": 179020
    },
    {
      "epoch": 0.6274555159518728,
      "grad_norm": 2.75,
      "learning_rate": 4.393612519503312e-05,
      "loss": 0.8736,
      "step": 179030
    },
    {
      "epoch": 0.6274905634587684,
      "grad_norm": 3.5625,
      "learning_rate": 4.3935476166369415e-05,
      "loss": 0.9444,
      "step": 179040
    },
    {
      "epoch": 0.627525610965664,
      "grad_norm": 3.046875,
      "learning_rate": 4.3934827137705716e-05,
      "loss": 0.9562,
      "step": 179050
    },
    {
      "epoch": 0.6275606584725596,
      "grad_norm": 3.0,
      "learning_rate": 4.393417810904201e-05,
      "loss": 0.9535,
      "step": 179060
    },
    {
      "epoch": 0.6275957059794551,
      "grad_norm": 3.03125,
      "learning_rate": 4.393352908037831e-05,
      "loss": 0.8688,
      "step": 179070
    },
    {
      "epoch": 0.6276307534863508,
      "grad_norm": 2.53125,
      "learning_rate": 4.393288005171461e-05,
      "loss": 0.9289,
      "step": 179080
    },
    {
      "epoch": 0.6276658009932463,
      "grad_norm": 2.859375,
      "learning_rate": 4.39322310230509e-05,
      "loss": 0.9505,
      "step": 179090
    },
    {
      "epoch": 0.627700848500142,
      "grad_norm": 2.78125,
      "learning_rate": 4.39315819943872e-05,
      "loss": 0.8776,
      "step": 179100
    },
    {
      "epoch": 0.6277358960070375,
      "grad_norm": 2.65625,
      "learning_rate": 4.39309329657235e-05,
      "loss": 0.9093,
      "step": 179110
    },
    {
      "epoch": 0.6277709435139331,
      "grad_norm": 3.21875,
      "learning_rate": 4.39302839370598e-05,
      "loss": 0.9373,
      "step": 179120
    },
    {
      "epoch": 0.6278059910208287,
      "grad_norm": 2.921875,
      "learning_rate": 4.392963490839609e-05,
      "loss": 0.8899,
      "step": 179130
    },
    {
      "epoch": 0.6278410385277243,
      "grad_norm": 2.90625,
      "learning_rate": 4.3928985879732395e-05,
      "loss": 0.851,
      "step": 179140
    },
    {
      "epoch": 0.62787608603462,
      "grad_norm": 3.609375,
      "learning_rate": 4.392833685106869e-05,
      "loss": 0.9323,
      "step": 179150
    },
    {
      "epoch": 0.6279111335415155,
      "grad_norm": 3.015625,
      "learning_rate": 4.392768782240499e-05,
      "loss": 0.9652,
      "step": 179160
    },
    {
      "epoch": 0.6279461810484112,
      "grad_norm": 2.84375,
      "learning_rate": 4.3927038793741285e-05,
      "loss": 0.8748,
      "step": 179170
    },
    {
      "epoch": 0.6279812285553067,
      "grad_norm": 2.984375,
      "learning_rate": 4.392638976507759e-05,
      "loss": 0.8474,
      "step": 179180
    },
    {
      "epoch": 0.6280162760622023,
      "grad_norm": 2.75,
      "learning_rate": 4.392574073641388e-05,
      "loss": 0.9329,
      "step": 179190
    },
    {
      "epoch": 0.6280513235690979,
      "grad_norm": 3.140625,
      "learning_rate": 4.392509170775018e-05,
      "loss": 0.9453,
      "step": 179200
    },
    {
      "epoch": 0.6280863710759935,
      "grad_norm": 3.296875,
      "learning_rate": 4.392444267908648e-05,
      "loss": 0.93,
      "step": 179210
    },
    {
      "epoch": 0.6281214185828891,
      "grad_norm": 2.75,
      "learning_rate": 4.392379365042278e-05,
      "loss": 0.9037,
      "step": 179220
    },
    {
      "epoch": 0.6281564660897847,
      "grad_norm": 3.078125,
      "learning_rate": 4.392314462175908e-05,
      "loss": 0.9764,
      "step": 179230
    },
    {
      "epoch": 0.6281915135966804,
      "grad_norm": 3.5625,
      "learning_rate": 4.3922495593095375e-05,
      "loss": 0.9169,
      "step": 179240
    },
    {
      "epoch": 0.6282265611035759,
      "grad_norm": 2.609375,
      "learning_rate": 4.3921846564431676e-05,
      "loss": 0.8521,
      "step": 179250
    },
    {
      "epoch": 0.6282616086104715,
      "grad_norm": 3.34375,
      "learning_rate": 4.392119753576797e-05,
      "loss": 0.9438,
      "step": 179260
    },
    {
      "epoch": 0.6282966561173671,
      "grad_norm": 2.90625,
      "learning_rate": 4.392054850710427e-05,
      "loss": 0.9149,
      "step": 179270
    },
    {
      "epoch": 0.6283317036242627,
      "grad_norm": 3.046875,
      "learning_rate": 4.391989947844057e-05,
      "loss": 0.9327,
      "step": 179280
    },
    {
      "epoch": 0.6283667511311583,
      "grad_norm": 3.78125,
      "learning_rate": 4.391925044977687e-05,
      "loss": 0.9731,
      "step": 179290
    },
    {
      "epoch": 0.6284017986380539,
      "grad_norm": 2.90625,
      "learning_rate": 4.391860142111316e-05,
      "loss": 0.8614,
      "step": 179300
    },
    {
      "epoch": 0.6284368461449494,
      "grad_norm": 2.8125,
      "learning_rate": 4.3917952392449464e-05,
      "loss": 0.9397,
      "step": 179310
    },
    {
      "epoch": 0.6284718936518451,
      "grad_norm": 2.875,
      "learning_rate": 4.391730336378576e-05,
      "loss": 0.8894,
      "step": 179320
    },
    {
      "epoch": 0.6285069411587407,
      "grad_norm": 3.203125,
      "learning_rate": 4.391665433512206e-05,
      "loss": 0.9549,
      "step": 179330
    },
    {
      "epoch": 0.6285419886656363,
      "grad_norm": 3.3125,
      "learning_rate": 4.3916005306458355e-05,
      "loss": 0.9717,
      "step": 179340
    },
    {
      "epoch": 0.6285770361725319,
      "grad_norm": 2.921875,
      "learning_rate": 4.3915356277794656e-05,
      "loss": 0.8772,
      "step": 179350
    },
    {
      "epoch": 0.6286120836794274,
      "grad_norm": 2.78125,
      "learning_rate": 4.391470724913096e-05,
      "loss": 0.9321,
      "step": 179360
    },
    {
      "epoch": 0.6286471311863231,
      "grad_norm": 2.765625,
      "learning_rate": 4.391405822046725e-05,
      "loss": 0.8564,
      "step": 179370
    },
    {
      "epoch": 0.6286821786932186,
      "grad_norm": 3.03125,
      "learning_rate": 4.3913409191803553e-05,
      "loss": 0.8964,
      "step": 179380
    },
    {
      "epoch": 0.6287172262001143,
      "grad_norm": 3.09375,
      "learning_rate": 4.391276016313985e-05,
      "loss": 0.8788,
      "step": 179390
    },
    {
      "epoch": 0.6287522737070098,
      "grad_norm": 2.953125,
      "learning_rate": 4.391211113447615e-05,
      "loss": 0.9228,
      "step": 179400
    },
    {
      "epoch": 0.6287873212139055,
      "grad_norm": 3.046875,
      "learning_rate": 4.3911462105812444e-05,
      "loss": 0.941,
      "step": 179410
    },
    {
      "epoch": 0.628822368720801,
      "grad_norm": 2.671875,
      "learning_rate": 4.3910813077148745e-05,
      "loss": 0.9129,
      "step": 179420
    },
    {
      "epoch": 0.6288574162276966,
      "grad_norm": 2.8125,
      "learning_rate": 4.391016404848504e-05,
      "loss": 0.954,
      "step": 179430
    },
    {
      "epoch": 0.6288924637345923,
      "grad_norm": 3.09375,
      "learning_rate": 4.390951501982134e-05,
      "loss": 0.9181,
      "step": 179440
    },
    {
      "epoch": 0.6289275112414878,
      "grad_norm": 3.171875,
      "learning_rate": 4.3908865991157636e-05,
      "loss": 0.8849,
      "step": 179450
    },
    {
      "epoch": 0.6289625587483835,
      "grad_norm": 3.125,
      "learning_rate": 4.390821696249393e-05,
      "loss": 0.8645,
      "step": 179460
    },
    {
      "epoch": 0.628997606255279,
      "grad_norm": 2.921875,
      "learning_rate": 4.390756793383023e-05,
      "loss": 0.8976,
      "step": 179470
    },
    {
      "epoch": 0.6290326537621747,
      "grad_norm": 2.625,
      "learning_rate": 4.390691890516653e-05,
      "loss": 0.8128,
      "step": 179480
    },
    {
      "epoch": 0.6290677012690702,
      "grad_norm": 2.921875,
      "learning_rate": 4.390626987650283e-05,
      "loss": 0.8771,
      "step": 179490
    },
    {
      "epoch": 0.6291027487759658,
      "grad_norm": 3.046875,
      "learning_rate": 4.390562084783912e-05,
      "loss": 0.8677,
      "step": 179500
    },
    {
      "epoch": 0.6291377962828614,
      "grad_norm": 3.0,
      "learning_rate": 4.3904971819175424e-05,
      "loss": 0.912,
      "step": 179510
    },
    {
      "epoch": 0.629172843789757,
      "grad_norm": 2.421875,
      "learning_rate": 4.390432279051172e-05,
      "loss": 0.8683,
      "step": 179520
    },
    {
      "epoch": 0.6292078912966527,
      "grad_norm": 2.890625,
      "learning_rate": 4.390367376184802e-05,
      "loss": 0.9214,
      "step": 179530
    },
    {
      "epoch": 0.6292429388035482,
      "grad_norm": 2.890625,
      "learning_rate": 4.3903024733184315e-05,
      "loss": 0.8479,
      "step": 179540
    },
    {
      "epoch": 0.6292779863104438,
      "grad_norm": 2.953125,
      "learning_rate": 4.3902375704520616e-05,
      "loss": 1.0316,
      "step": 179550
    },
    {
      "epoch": 0.6293130338173394,
      "grad_norm": 2.640625,
      "learning_rate": 4.390172667585691e-05,
      "loss": 0.9519,
      "step": 179560
    },
    {
      "epoch": 0.629348081324235,
      "grad_norm": 2.625,
      "learning_rate": 4.390107764719321e-05,
      "loss": 0.9435,
      "step": 179570
    },
    {
      "epoch": 0.6293831288311306,
      "grad_norm": 2.625,
      "learning_rate": 4.390042861852951e-05,
      "loss": 0.8982,
      "step": 179580
    },
    {
      "epoch": 0.6294181763380262,
      "grad_norm": 2.953125,
      "learning_rate": 4.389977958986581e-05,
      "loss": 0.8421,
      "step": 179590
    },
    {
      "epoch": 0.6294532238449217,
      "grad_norm": 2.71875,
      "learning_rate": 4.389913056120211e-05,
      "loss": 0.8743,
      "step": 179600
    },
    {
      "epoch": 0.6294882713518174,
      "grad_norm": 2.53125,
      "learning_rate": 4.3898481532538404e-05,
      "loss": 0.886,
      "step": 179610
    },
    {
      "epoch": 0.6295233188587129,
      "grad_norm": 2.796875,
      "learning_rate": 4.3897832503874705e-05,
      "loss": 0.9308,
      "step": 179620
    },
    {
      "epoch": 0.6295583663656086,
      "grad_norm": 2.890625,
      "learning_rate": 4.3897183475211e-05,
      "loss": 0.8789,
      "step": 179630
    },
    {
      "epoch": 0.6295934138725042,
      "grad_norm": 2.53125,
      "learning_rate": 4.38965344465473e-05,
      "loss": 0.8578,
      "step": 179640
    },
    {
      "epoch": 0.6296284613793998,
      "grad_norm": 3.3125,
      "learning_rate": 4.3895885417883596e-05,
      "loss": 0.9366,
      "step": 179650
    },
    {
      "epoch": 0.6296635088862954,
      "grad_norm": 2.703125,
      "learning_rate": 4.38952363892199e-05,
      "loss": 0.8668,
      "step": 179660
    },
    {
      "epoch": 0.629698556393191,
      "grad_norm": 3.03125,
      "learning_rate": 4.389458736055619e-05,
      "loss": 0.8309,
      "step": 179670
    },
    {
      "epoch": 0.6297336039000866,
      "grad_norm": 3.34375,
      "learning_rate": 4.3893938331892493e-05,
      "loss": 0.9784,
      "step": 179680
    },
    {
      "epoch": 0.6297686514069821,
      "grad_norm": 2.890625,
      "learning_rate": 4.389328930322879e-05,
      "loss": 0.9407,
      "step": 179690
    },
    {
      "epoch": 0.6298036989138778,
      "grad_norm": 2.671875,
      "learning_rate": 4.389264027456509e-05,
      "loss": 0.9122,
      "step": 179700
    },
    {
      "epoch": 0.6298387464207733,
      "grad_norm": 2.90625,
      "learning_rate": 4.3891991245901384e-05,
      "loss": 0.9564,
      "step": 179710
    },
    {
      "epoch": 0.629873793927669,
      "grad_norm": 2.71875,
      "learning_rate": 4.3891342217237685e-05,
      "loss": 0.9404,
      "step": 179720
    },
    {
      "epoch": 0.6299088414345646,
      "grad_norm": 2.859375,
      "learning_rate": 4.389069318857399e-05,
      "loss": 0.9286,
      "step": 179730
    },
    {
      "epoch": 0.6299438889414601,
      "grad_norm": 2.65625,
      "learning_rate": 4.389004415991028e-05,
      "loss": 0.8875,
      "step": 179740
    },
    {
      "epoch": 0.6299789364483558,
      "grad_norm": 3.015625,
      "learning_rate": 4.388939513124658e-05,
      "loss": 0.8749,
      "step": 179750
    },
    {
      "epoch": 0.6300139839552513,
      "grad_norm": 3.078125,
      "learning_rate": 4.388874610258288e-05,
      "loss": 0.948,
      "step": 179760
    },
    {
      "epoch": 0.630049031462147,
      "grad_norm": 3.328125,
      "learning_rate": 4.388809707391918e-05,
      "loss": 0.9244,
      "step": 179770
    },
    {
      "epoch": 0.6300840789690425,
      "grad_norm": 3.046875,
      "learning_rate": 4.3887448045255473e-05,
      "loss": 0.9631,
      "step": 179780
    },
    {
      "epoch": 0.6301191264759382,
      "grad_norm": 2.890625,
      "learning_rate": 4.3886799016591775e-05,
      "loss": 0.9102,
      "step": 179790
    },
    {
      "epoch": 0.6301541739828337,
      "grad_norm": 3.015625,
      "learning_rate": 4.388614998792807e-05,
      "loss": 0.9214,
      "step": 179800
    },
    {
      "epoch": 0.6301892214897293,
      "grad_norm": 3.09375,
      "learning_rate": 4.388550095926437e-05,
      "loss": 0.9274,
      "step": 179810
    },
    {
      "epoch": 0.6302242689966249,
      "grad_norm": 3.140625,
      "learning_rate": 4.3884851930600665e-05,
      "loss": 0.9155,
      "step": 179820
    },
    {
      "epoch": 0.6302593165035205,
      "grad_norm": 2.90625,
      "learning_rate": 4.388420290193696e-05,
      "loss": 0.9295,
      "step": 179830
    },
    {
      "epoch": 0.6302943640104162,
      "grad_norm": 3.1875,
      "learning_rate": 4.388355387327326e-05,
      "loss": 0.9002,
      "step": 179840
    },
    {
      "epoch": 0.6303294115173117,
      "grad_norm": 2.984375,
      "learning_rate": 4.3882904844609556e-05,
      "loss": 0.9,
      "step": 179850
    },
    {
      "epoch": 0.6303644590242073,
      "grad_norm": 2.6875,
      "learning_rate": 4.388225581594586e-05,
      "loss": 0.8381,
      "step": 179860
    },
    {
      "epoch": 0.6303995065311029,
      "grad_norm": 2.625,
      "learning_rate": 4.388160678728215e-05,
      "loss": 0.9278,
      "step": 179870
    },
    {
      "epoch": 0.6304345540379985,
      "grad_norm": 3.25,
      "learning_rate": 4.3880957758618453e-05,
      "loss": 0.9193,
      "step": 179880
    },
    {
      "epoch": 0.6304696015448941,
      "grad_norm": 2.96875,
      "learning_rate": 4.388030872995475e-05,
      "loss": 0.9534,
      "step": 179890
    },
    {
      "epoch": 0.6305046490517897,
      "grad_norm": 3.0,
      "learning_rate": 4.387965970129105e-05,
      "loss": 0.904,
      "step": 179900
    },
    {
      "epoch": 0.6305396965586852,
      "grad_norm": 2.90625,
      "learning_rate": 4.3879010672627344e-05,
      "loss": 0.9889,
      "step": 179910
    },
    {
      "epoch": 0.6305747440655809,
      "grad_norm": 2.921875,
      "learning_rate": 4.3878361643963645e-05,
      "loss": 0.8367,
      "step": 179920
    },
    {
      "epoch": 0.6306097915724765,
      "grad_norm": 2.96875,
      "learning_rate": 4.387771261529994e-05,
      "loss": 0.908,
      "step": 179930
    },
    {
      "epoch": 0.6306448390793721,
      "grad_norm": 2.59375,
      "learning_rate": 4.387706358663624e-05,
      "loss": 0.9471,
      "step": 179940
    },
    {
      "epoch": 0.6306798865862677,
      "grad_norm": 3.046875,
      "learning_rate": 4.3876414557972536e-05,
      "loss": 0.8893,
      "step": 179950
    },
    {
      "epoch": 0.6307149340931633,
      "grad_norm": 3.328125,
      "learning_rate": 4.387576552930884e-05,
      "loss": 0.9202,
      "step": 179960
    },
    {
      "epoch": 0.6307499816000589,
      "grad_norm": 2.96875,
      "learning_rate": 4.387511650064514e-05,
      "loss": 1.0257,
      "step": 179970
    },
    {
      "epoch": 0.6307850291069544,
      "grad_norm": 2.796875,
      "learning_rate": 4.3874467471981433e-05,
      "loss": 0.9153,
      "step": 179980
    },
    {
      "epoch": 0.6308200766138501,
      "grad_norm": 3.359375,
      "learning_rate": 4.3873818443317735e-05,
      "loss": 0.9636,
      "step": 179990
    },
    {
      "epoch": 0.6308551241207456,
      "grad_norm": 2.953125,
      "learning_rate": 4.387316941465403e-05,
      "loss": 0.9836,
      "step": 180000
    },
    {
      "epoch": 0.6308551241207456,
      "eval_loss": 0.8543252944946289,
      "eval_runtime": 560.1307,
      "eval_samples_per_second": 679.192,
      "eval_steps_per_second": 56.599,
      "step": 180000
    },
    {
      "epoch": 0.6308901716276413,
      "grad_norm": 3.109375,
      "learning_rate": 4.387252038599033e-05,
      "loss": 0.9277,
      "step": 180010
    },
    {
      "epoch": 0.6309252191345369,
      "grad_norm": 2.46875,
      "learning_rate": 4.3871871357326625e-05,
      "loss": 0.8699,
      "step": 180020
    },
    {
      "epoch": 0.6309602666414325,
      "grad_norm": 3.140625,
      "learning_rate": 4.387122232866293e-05,
      "loss": 0.9145,
      "step": 180030
    },
    {
      "epoch": 0.6309953141483281,
      "grad_norm": 2.90625,
      "learning_rate": 4.387057329999922e-05,
      "loss": 0.924,
      "step": 180040
    },
    {
      "epoch": 0.6310303616552236,
      "grad_norm": 3.15625,
      "learning_rate": 4.386992427133552e-05,
      "loss": 0.9788,
      "step": 180050
    },
    {
      "epoch": 0.6310654091621193,
      "grad_norm": 2.78125,
      "learning_rate": 4.386927524267182e-05,
      "loss": 0.8623,
      "step": 180060
    },
    {
      "epoch": 0.6311004566690148,
      "grad_norm": 2.9375,
      "learning_rate": 4.386862621400812e-05,
      "loss": 0.8078,
      "step": 180070
    },
    {
      "epoch": 0.6311355041759105,
      "grad_norm": 3.203125,
      "learning_rate": 4.3867977185344413e-05,
      "loss": 0.8618,
      "step": 180080
    },
    {
      "epoch": 0.631170551682806,
      "grad_norm": 2.8125,
      "learning_rate": 4.3867328156680715e-05,
      "loss": 0.8791,
      "step": 180090
    },
    {
      "epoch": 0.6312055991897016,
      "grad_norm": 2.828125,
      "learning_rate": 4.3866679128017016e-05,
      "loss": 0.9654,
      "step": 180100
    },
    {
      "epoch": 0.6312406466965972,
      "grad_norm": 2.703125,
      "learning_rate": 4.386603009935331e-05,
      "loss": 0.8653,
      "step": 180110
    },
    {
      "epoch": 0.6312756942034928,
      "grad_norm": 3.125,
      "learning_rate": 4.386538107068961e-05,
      "loss": 0.9387,
      "step": 180120
    },
    {
      "epoch": 0.6313107417103885,
      "grad_norm": 2.765625,
      "learning_rate": 4.386473204202591e-05,
      "loss": 0.919,
      "step": 180130
    },
    {
      "epoch": 0.631345789217284,
      "grad_norm": 3.1875,
      "learning_rate": 4.386408301336221e-05,
      "loss": 0.8838,
      "step": 180140
    },
    {
      "epoch": 0.6313808367241797,
      "grad_norm": 3.328125,
      "learning_rate": 4.38634339846985e-05,
      "loss": 0.9879,
      "step": 180150
    },
    {
      "epoch": 0.6314158842310752,
      "grad_norm": 2.78125,
      "learning_rate": 4.3862784956034804e-05,
      "loss": 0.9035,
      "step": 180160
    },
    {
      "epoch": 0.6314509317379708,
      "grad_norm": 3.109375,
      "learning_rate": 4.38621359273711e-05,
      "loss": 0.8575,
      "step": 180170
    },
    {
      "epoch": 0.6314859792448664,
      "grad_norm": 2.65625,
      "learning_rate": 4.38614868987074e-05,
      "loss": 0.9392,
      "step": 180180
    },
    {
      "epoch": 0.631521026751762,
      "grad_norm": 2.484375,
      "learning_rate": 4.3860837870043695e-05,
      "loss": 0.8701,
      "step": 180190
    },
    {
      "epoch": 0.6315560742586576,
      "grad_norm": 2.609375,
      "learning_rate": 4.3860188841379996e-05,
      "loss": 0.9023,
      "step": 180200
    },
    {
      "epoch": 0.6315911217655532,
      "grad_norm": 3.109375,
      "learning_rate": 4.385953981271629e-05,
      "loss": 0.8827,
      "step": 180210
    },
    {
      "epoch": 0.6316261692724489,
      "grad_norm": 2.703125,
      "learning_rate": 4.3858890784052585e-05,
      "loss": 0.9277,
      "step": 180220
    },
    {
      "epoch": 0.6316612167793444,
      "grad_norm": 3.265625,
      "learning_rate": 4.385824175538889e-05,
      "loss": 0.9154,
      "step": 180230
    },
    {
      "epoch": 0.63169626428624,
      "grad_norm": 3.3125,
      "learning_rate": 4.385759272672518e-05,
      "loss": 0.8733,
      "step": 180240
    },
    {
      "epoch": 0.6317313117931356,
      "grad_norm": 2.828125,
      "learning_rate": 4.385694369806148e-05,
      "loss": 0.9658,
      "step": 180250
    },
    {
      "epoch": 0.6317663593000312,
      "grad_norm": 2.84375,
      "learning_rate": 4.385629466939778e-05,
      "loss": 0.9388,
      "step": 180260
    },
    {
      "epoch": 0.6318014068069268,
      "grad_norm": 2.796875,
      "learning_rate": 4.385564564073408e-05,
      "loss": 0.9936,
      "step": 180270
    },
    {
      "epoch": 0.6318364543138224,
      "grad_norm": 2.71875,
      "learning_rate": 4.3854996612070373e-05,
      "loss": 0.8575,
      "step": 180280
    },
    {
      "epoch": 0.6318715018207179,
      "grad_norm": 2.859375,
      "learning_rate": 4.3854347583406675e-05,
      "loss": 0.9355,
      "step": 180290
    },
    {
      "epoch": 0.6319065493276136,
      "grad_norm": 3.25,
      "learning_rate": 4.385369855474297e-05,
      "loss": 0.9983,
      "step": 180300
    },
    {
      "epoch": 0.6319415968345091,
      "grad_norm": 3.046875,
      "learning_rate": 4.385304952607927e-05,
      "loss": 0.8954,
      "step": 180310
    },
    {
      "epoch": 0.6319766443414048,
      "grad_norm": 2.984375,
      "learning_rate": 4.385240049741557e-05,
      "loss": 0.9856,
      "step": 180320
    },
    {
      "epoch": 0.6320116918483004,
      "grad_norm": 3.0625,
      "learning_rate": 4.385175146875187e-05,
      "loss": 0.8739,
      "step": 180330
    },
    {
      "epoch": 0.632046739355196,
      "grad_norm": 3.1875,
      "learning_rate": 4.385110244008817e-05,
      "loss": 0.8691,
      "step": 180340
    },
    {
      "epoch": 0.6320817868620916,
      "grad_norm": 3.140625,
      "learning_rate": 4.385045341142446e-05,
      "loss": 0.8586,
      "step": 180350
    },
    {
      "epoch": 0.6321168343689871,
      "grad_norm": 3.15625,
      "learning_rate": 4.3849804382760764e-05,
      "loss": 1.0444,
      "step": 180360
    },
    {
      "epoch": 0.6321518818758828,
      "grad_norm": 2.546875,
      "learning_rate": 4.384915535409706e-05,
      "loss": 0.8397,
      "step": 180370
    },
    {
      "epoch": 0.6321869293827783,
      "grad_norm": 3.625,
      "learning_rate": 4.384850632543336e-05,
      "loss": 0.8752,
      "step": 180380
    },
    {
      "epoch": 0.632221976889674,
      "grad_norm": 3.359375,
      "learning_rate": 4.3847857296769655e-05,
      "loss": 0.8689,
      "step": 180390
    },
    {
      "epoch": 0.6322570243965695,
      "grad_norm": 3.125,
      "learning_rate": 4.3847208268105956e-05,
      "loss": 0.8514,
      "step": 180400
    },
    {
      "epoch": 0.6322920719034651,
      "grad_norm": 2.921875,
      "learning_rate": 4.384655923944225e-05,
      "loss": 0.8938,
      "step": 180410
    },
    {
      "epoch": 0.6323271194103608,
      "grad_norm": 2.8125,
      "learning_rate": 4.384591021077855e-05,
      "loss": 0.8733,
      "step": 180420
    },
    {
      "epoch": 0.6323621669172563,
      "grad_norm": 3.21875,
      "learning_rate": 4.384526118211485e-05,
      "loss": 0.9312,
      "step": 180430
    },
    {
      "epoch": 0.632397214424152,
      "grad_norm": 2.875,
      "learning_rate": 4.384461215345115e-05,
      "loss": 0.9231,
      "step": 180440
    },
    {
      "epoch": 0.6324322619310475,
      "grad_norm": 2.53125,
      "learning_rate": 4.384396312478744e-05,
      "loss": 0.845,
      "step": 180450
    },
    {
      "epoch": 0.6324673094379432,
      "grad_norm": 2.875,
      "learning_rate": 4.3843314096123744e-05,
      "loss": 0.932,
      "step": 180460
    },
    {
      "epoch": 0.6325023569448387,
      "grad_norm": 3.203125,
      "learning_rate": 4.3842665067460046e-05,
      "loss": 0.8699,
      "step": 180470
    },
    {
      "epoch": 0.6325374044517343,
      "grad_norm": 3.625,
      "learning_rate": 4.384201603879634e-05,
      "loss": 0.9953,
      "step": 180480
    },
    {
      "epoch": 0.6325724519586299,
      "grad_norm": 3.0625,
      "learning_rate": 4.384136701013264e-05,
      "loss": 0.8609,
      "step": 180490
    },
    {
      "epoch": 0.6326074994655255,
      "grad_norm": 2.65625,
      "learning_rate": 4.3840717981468936e-05,
      "loss": 0.8382,
      "step": 180500
    },
    {
      "epoch": 0.6326425469724212,
      "grad_norm": 2.859375,
      "learning_rate": 4.384006895280524e-05,
      "loss": 0.9377,
      "step": 180510
    },
    {
      "epoch": 0.6326775944793167,
      "grad_norm": 2.71875,
      "learning_rate": 4.383941992414153e-05,
      "loss": 0.8868,
      "step": 180520
    },
    {
      "epoch": 0.6327126419862124,
      "grad_norm": 2.984375,
      "learning_rate": 4.3838770895477834e-05,
      "loss": 0.9683,
      "step": 180530
    },
    {
      "epoch": 0.6327476894931079,
      "grad_norm": 2.5625,
      "learning_rate": 4.383812186681413e-05,
      "loss": 0.856,
      "step": 180540
    },
    {
      "epoch": 0.6327827370000035,
      "grad_norm": 2.78125,
      "learning_rate": 4.383747283815043e-05,
      "loss": 0.9639,
      "step": 180550
    },
    {
      "epoch": 0.6328177845068991,
      "grad_norm": 3.0625,
      "learning_rate": 4.3836823809486724e-05,
      "loss": 0.9435,
      "step": 180560
    },
    {
      "epoch": 0.6328528320137947,
      "grad_norm": 3.390625,
      "learning_rate": 4.3836174780823026e-05,
      "loss": 0.9072,
      "step": 180570
    },
    {
      "epoch": 0.6328878795206903,
      "grad_norm": 2.796875,
      "learning_rate": 4.383552575215932e-05,
      "loss": 0.8714,
      "step": 180580
    },
    {
      "epoch": 0.6329229270275859,
      "grad_norm": 3.125,
      "learning_rate": 4.3834876723495615e-05,
      "loss": 0.9629,
      "step": 180590
    },
    {
      "epoch": 0.6329579745344814,
      "grad_norm": 2.796875,
      "learning_rate": 4.3834227694831916e-05,
      "loss": 0.8596,
      "step": 180600
    },
    {
      "epoch": 0.6329930220413771,
      "grad_norm": 2.75,
      "learning_rate": 4.383357866616821e-05,
      "loss": 0.9043,
      "step": 180610
    },
    {
      "epoch": 0.6330280695482727,
      "grad_norm": 2.921875,
      "learning_rate": 4.383292963750451e-05,
      "loss": 0.9883,
      "step": 180620
    },
    {
      "epoch": 0.6330631170551683,
      "grad_norm": 3.0,
      "learning_rate": 4.383228060884081e-05,
      "loss": 0.8474,
      "step": 180630
    },
    {
      "epoch": 0.6330981645620639,
      "grad_norm": 3.1875,
      "learning_rate": 4.383163158017711e-05,
      "loss": 0.9767,
      "step": 180640
    },
    {
      "epoch": 0.6331332120689595,
      "grad_norm": 3.1875,
      "learning_rate": 4.38309825515134e-05,
      "loss": 0.9909,
      "step": 180650
    },
    {
      "epoch": 0.6331682595758551,
      "grad_norm": 2.46875,
      "learning_rate": 4.3830333522849704e-05,
      "loss": 0.8905,
      "step": 180660
    },
    {
      "epoch": 0.6332033070827506,
      "grad_norm": 2.796875,
      "learning_rate": 4.3829684494186e-05,
      "loss": 0.8222,
      "step": 180670
    },
    {
      "epoch": 0.6332383545896463,
      "grad_norm": 2.640625,
      "learning_rate": 4.38290354655223e-05,
      "loss": 0.931,
      "step": 180680
    },
    {
      "epoch": 0.6332734020965418,
      "grad_norm": 2.875,
      "learning_rate": 4.38283864368586e-05,
      "loss": 0.8496,
      "step": 180690
    },
    {
      "epoch": 0.6333084496034375,
      "grad_norm": 2.84375,
      "learning_rate": 4.3827737408194896e-05,
      "loss": 0.9366,
      "step": 180700
    },
    {
      "epoch": 0.6333434971103331,
      "grad_norm": 3.03125,
      "learning_rate": 4.38270883795312e-05,
      "loss": 0.9257,
      "step": 180710
    },
    {
      "epoch": 0.6333785446172286,
      "grad_norm": 3.234375,
      "learning_rate": 4.382643935086749e-05,
      "loss": 0.8393,
      "step": 180720
    },
    {
      "epoch": 0.6334135921241243,
      "grad_norm": 3.625,
      "learning_rate": 4.3825790322203794e-05,
      "loss": 1.0049,
      "step": 180730
    },
    {
      "epoch": 0.6334486396310198,
      "grad_norm": 2.9375,
      "learning_rate": 4.382514129354009e-05,
      "loss": 0.8433,
      "step": 180740
    },
    {
      "epoch": 0.6334836871379155,
      "grad_norm": 2.78125,
      "learning_rate": 4.382449226487639e-05,
      "loss": 1.0375,
      "step": 180750
    },
    {
      "epoch": 0.633518734644811,
      "grad_norm": 3.421875,
      "learning_rate": 4.3823843236212684e-05,
      "loss": 0.9909,
      "step": 180760
    },
    {
      "epoch": 0.6335537821517067,
      "grad_norm": 3.015625,
      "learning_rate": 4.3823194207548986e-05,
      "loss": 0.9281,
      "step": 180770
    },
    {
      "epoch": 0.6335888296586022,
      "grad_norm": 2.78125,
      "learning_rate": 4.382254517888528e-05,
      "loss": 0.9081,
      "step": 180780
    },
    {
      "epoch": 0.6336238771654978,
      "grad_norm": 2.734375,
      "learning_rate": 4.382189615022158e-05,
      "loss": 0.8745,
      "step": 180790
    },
    {
      "epoch": 0.6336589246723934,
      "grad_norm": 3.296875,
      "learning_rate": 4.3821247121557876e-05,
      "loss": 0.9169,
      "step": 180800
    },
    {
      "epoch": 0.633693972179289,
      "grad_norm": 2.96875,
      "learning_rate": 4.382059809289418e-05,
      "loss": 0.9346,
      "step": 180810
    },
    {
      "epoch": 0.6337290196861847,
      "grad_norm": 3.046875,
      "learning_rate": 4.381994906423047e-05,
      "loss": 0.8459,
      "step": 180820
    },
    {
      "epoch": 0.6337640671930802,
      "grad_norm": 3.21875,
      "learning_rate": 4.3819300035566774e-05,
      "loss": 0.8124,
      "step": 180830
    },
    {
      "epoch": 0.6337991146999759,
      "grad_norm": 2.890625,
      "learning_rate": 4.3818651006903075e-05,
      "loss": 0.8335,
      "step": 180840
    },
    {
      "epoch": 0.6338341622068714,
      "grad_norm": 2.9375,
      "learning_rate": 4.381800197823937e-05,
      "loss": 0.9077,
      "step": 180850
    },
    {
      "epoch": 0.633869209713767,
      "grad_norm": 2.984375,
      "learning_rate": 4.381735294957567e-05,
      "loss": 0.9239,
      "step": 180860
    },
    {
      "epoch": 0.6339042572206626,
      "grad_norm": 2.953125,
      "learning_rate": 4.3816703920911966e-05,
      "loss": 0.8472,
      "step": 180870
    },
    {
      "epoch": 0.6339393047275582,
      "grad_norm": 2.78125,
      "learning_rate": 4.381605489224827e-05,
      "loss": 0.9926,
      "step": 180880
    },
    {
      "epoch": 0.6339743522344538,
      "grad_norm": 3.453125,
      "learning_rate": 4.381540586358456e-05,
      "loss": 0.893,
      "step": 180890
    },
    {
      "epoch": 0.6340093997413494,
      "grad_norm": 2.8125,
      "learning_rate": 4.381475683492086e-05,
      "loss": 0.9985,
      "step": 180900
    },
    {
      "epoch": 0.634044447248245,
      "grad_norm": 2.859375,
      "learning_rate": 4.381410780625716e-05,
      "loss": 0.9443,
      "step": 180910
    },
    {
      "epoch": 0.6340794947551406,
      "grad_norm": 3.359375,
      "learning_rate": 4.381345877759346e-05,
      "loss": 0.8012,
      "step": 180920
    },
    {
      "epoch": 0.6341145422620362,
      "grad_norm": 3.125,
      "learning_rate": 4.3812809748929754e-05,
      "loss": 0.9899,
      "step": 180930
    },
    {
      "epoch": 0.6341495897689318,
      "grad_norm": 2.671875,
      "learning_rate": 4.3812160720266055e-05,
      "loss": 0.9598,
      "step": 180940
    },
    {
      "epoch": 0.6341846372758274,
      "grad_norm": 2.8125,
      "learning_rate": 4.381151169160235e-05,
      "loss": 0.9868,
      "step": 180950
    },
    {
      "epoch": 0.634219684782723,
      "grad_norm": 3.21875,
      "learning_rate": 4.3810862662938644e-05,
      "loss": 0.9232,
      "step": 180960
    },
    {
      "epoch": 0.6342547322896186,
      "grad_norm": 2.921875,
      "learning_rate": 4.3810213634274946e-05,
      "loss": 0.9212,
      "step": 180970
    },
    {
      "epoch": 0.6342897797965141,
      "grad_norm": 2.9375,
      "learning_rate": 4.380956460561124e-05,
      "loss": 0.9356,
      "step": 180980
    },
    {
      "epoch": 0.6343248273034098,
      "grad_norm": 3.125,
      "learning_rate": 4.380891557694754e-05,
      "loss": 0.8764,
      "step": 180990
    },
    {
      "epoch": 0.6343598748103054,
      "grad_norm": 3.21875,
      "learning_rate": 4.3808266548283836e-05,
      "loss": 1.0043,
      "step": 181000
    },
    {
      "epoch": 0.634394922317201,
      "grad_norm": 3.046875,
      "learning_rate": 4.380761751962014e-05,
      "loss": 0.8342,
      "step": 181010
    },
    {
      "epoch": 0.6344299698240966,
      "grad_norm": 3.203125,
      "learning_rate": 4.380696849095643e-05,
      "loss": 0.9338,
      "step": 181020
    },
    {
      "epoch": 0.6344650173309921,
      "grad_norm": 2.921875,
      "learning_rate": 4.3806319462292734e-05,
      "loss": 0.8331,
      "step": 181030
    },
    {
      "epoch": 0.6345000648378878,
      "grad_norm": 2.640625,
      "learning_rate": 4.380567043362903e-05,
      "loss": 0.9006,
      "step": 181040
    },
    {
      "epoch": 0.6345351123447833,
      "grad_norm": 3.109375,
      "learning_rate": 4.380502140496533e-05,
      "loss": 0.9292,
      "step": 181050
    },
    {
      "epoch": 0.634570159851679,
      "grad_norm": 2.8125,
      "learning_rate": 4.380437237630163e-05,
      "loss": 0.8849,
      "step": 181060
    },
    {
      "epoch": 0.6346052073585745,
      "grad_norm": 3.078125,
      "learning_rate": 4.3803723347637926e-05,
      "loss": 0.8409,
      "step": 181070
    },
    {
      "epoch": 0.6346402548654702,
      "grad_norm": 3.25,
      "learning_rate": 4.380307431897423e-05,
      "loss": 0.907,
      "step": 181080
    },
    {
      "epoch": 0.6346753023723657,
      "grad_norm": 3.359375,
      "learning_rate": 4.380242529031052e-05,
      "loss": 0.901,
      "step": 181090
    },
    {
      "epoch": 0.6347103498792613,
      "grad_norm": 2.953125,
      "learning_rate": 4.380177626164682e-05,
      "loss": 0.9073,
      "step": 181100
    },
    {
      "epoch": 0.634745397386157,
      "grad_norm": 2.640625,
      "learning_rate": 4.380112723298312e-05,
      "loss": 0.8716,
      "step": 181110
    },
    {
      "epoch": 0.6347804448930525,
      "grad_norm": 3.296875,
      "learning_rate": 4.380047820431942e-05,
      "loss": 0.9316,
      "step": 181120
    },
    {
      "epoch": 0.6348154923999482,
      "grad_norm": 3.03125,
      "learning_rate": 4.3799829175655714e-05,
      "loss": 0.9333,
      "step": 181130
    },
    {
      "epoch": 0.6348505399068437,
      "grad_norm": 2.453125,
      "learning_rate": 4.3799180146992015e-05,
      "loss": 0.9426,
      "step": 181140
    },
    {
      "epoch": 0.6348855874137393,
      "grad_norm": 3.203125,
      "learning_rate": 4.379853111832831e-05,
      "loss": 0.8762,
      "step": 181150
    },
    {
      "epoch": 0.6349206349206349,
      "grad_norm": 2.90625,
      "learning_rate": 4.379788208966461e-05,
      "loss": 0.9407,
      "step": 181160
    },
    {
      "epoch": 0.6349556824275305,
      "grad_norm": 3.0,
      "learning_rate": 4.3797233061000906e-05,
      "loss": 0.9127,
      "step": 181170
    },
    {
      "epoch": 0.6349907299344261,
      "grad_norm": 2.890625,
      "learning_rate": 4.379658403233721e-05,
      "loss": 0.9192,
      "step": 181180
    },
    {
      "epoch": 0.6350257774413217,
      "grad_norm": 3.203125,
      "learning_rate": 4.379593500367351e-05,
      "loss": 0.9559,
      "step": 181190
    },
    {
      "epoch": 0.6350608249482174,
      "grad_norm": 3.171875,
      "learning_rate": 4.37952859750098e-05,
      "loss": 0.9104,
      "step": 181200
    },
    {
      "epoch": 0.6350958724551129,
      "grad_norm": 2.734375,
      "learning_rate": 4.3794636946346104e-05,
      "loss": 0.8583,
      "step": 181210
    },
    {
      "epoch": 0.6351309199620085,
      "grad_norm": 2.953125,
      "learning_rate": 4.37939879176824e-05,
      "loss": 0.9553,
      "step": 181220
    },
    {
      "epoch": 0.6351659674689041,
      "grad_norm": 3.078125,
      "learning_rate": 4.37933388890187e-05,
      "loss": 0.9548,
      "step": 181230
    },
    {
      "epoch": 0.6352010149757997,
      "grad_norm": 3.0,
      "learning_rate": 4.3792689860354995e-05,
      "loss": 0.9109,
      "step": 181240
    },
    {
      "epoch": 0.6352360624826953,
      "grad_norm": 3.1875,
      "learning_rate": 4.3792040831691296e-05,
      "loss": 0.9858,
      "step": 181250
    },
    {
      "epoch": 0.6352711099895909,
      "grad_norm": 3.03125,
      "learning_rate": 4.379139180302759e-05,
      "loss": 0.8934,
      "step": 181260
    },
    {
      "epoch": 0.6353061574964864,
      "grad_norm": 3.171875,
      "learning_rate": 4.379074277436389e-05,
      "loss": 0.9586,
      "step": 181270
    },
    {
      "epoch": 0.6353412050033821,
      "grad_norm": 3.109375,
      "learning_rate": 4.379009374570019e-05,
      "loss": 0.9723,
      "step": 181280
    },
    {
      "epoch": 0.6353762525102776,
      "grad_norm": 2.546875,
      "learning_rate": 4.378944471703649e-05,
      "loss": 0.8459,
      "step": 181290
    },
    {
      "epoch": 0.6354113000171733,
      "grad_norm": 3.125,
      "learning_rate": 4.378879568837278e-05,
      "loss": 0.8594,
      "step": 181300
    },
    {
      "epoch": 0.6354463475240689,
      "grad_norm": 3.125,
      "learning_rate": 4.3788146659709084e-05,
      "loss": 0.8823,
      "step": 181310
    },
    {
      "epoch": 0.6354813950309645,
      "grad_norm": 3.328125,
      "learning_rate": 4.378749763104538e-05,
      "loss": 0.8737,
      "step": 181320
    },
    {
      "epoch": 0.6355164425378601,
      "grad_norm": 2.734375,
      "learning_rate": 4.378684860238168e-05,
      "loss": 0.927,
      "step": 181330
    },
    {
      "epoch": 0.6355514900447556,
      "grad_norm": 3.0625,
      "learning_rate": 4.3786199573717975e-05,
      "loss": 0.9543,
      "step": 181340
    },
    {
      "epoch": 0.6355865375516513,
      "grad_norm": 2.578125,
      "learning_rate": 4.378555054505427e-05,
      "loss": 0.9345,
      "step": 181350
    },
    {
      "epoch": 0.6356215850585468,
      "grad_norm": 2.828125,
      "learning_rate": 4.378490151639057e-05,
      "loss": 0.8072,
      "step": 181360
    },
    {
      "epoch": 0.6356566325654425,
      "grad_norm": 3.234375,
      "learning_rate": 4.3784252487726866e-05,
      "loss": 0.9505,
      "step": 181370
    },
    {
      "epoch": 0.635691680072338,
      "grad_norm": 2.71875,
      "learning_rate": 4.378360345906317e-05,
      "loss": 0.9932,
      "step": 181380
    },
    {
      "epoch": 0.6357267275792337,
      "grad_norm": 2.921875,
      "learning_rate": 4.378295443039946e-05,
      "loss": 0.9378,
      "step": 181390
    },
    {
      "epoch": 0.6357617750861293,
      "grad_norm": 3.046875,
      "learning_rate": 4.378230540173576e-05,
      "loss": 0.8613,
      "step": 181400
    },
    {
      "epoch": 0.6357968225930248,
      "grad_norm": 3.5,
      "learning_rate": 4.378165637307206e-05,
      "loss": 0.9444,
      "step": 181410
    },
    {
      "epoch": 0.6358318700999205,
      "grad_norm": 3.078125,
      "learning_rate": 4.378100734440836e-05,
      "loss": 0.9308,
      "step": 181420
    },
    {
      "epoch": 0.635866917606816,
      "grad_norm": 2.984375,
      "learning_rate": 4.378035831574466e-05,
      "loss": 0.94,
      "step": 181430
    },
    {
      "epoch": 0.6359019651137117,
      "grad_norm": 2.921875,
      "learning_rate": 4.3779709287080955e-05,
      "loss": 1.0005,
      "step": 181440
    },
    {
      "epoch": 0.6359370126206072,
      "grad_norm": 3.140625,
      "learning_rate": 4.3779060258417256e-05,
      "loss": 0.9644,
      "step": 181450
    },
    {
      "epoch": 0.6359720601275028,
      "grad_norm": 2.6875,
      "learning_rate": 4.377841122975355e-05,
      "loss": 0.8312,
      "step": 181460
    },
    {
      "epoch": 0.6360071076343984,
      "grad_norm": 2.859375,
      "learning_rate": 4.377776220108985e-05,
      "loss": 0.9055,
      "step": 181470
    },
    {
      "epoch": 0.636042155141294,
      "grad_norm": 3.09375,
      "learning_rate": 4.377711317242615e-05,
      "loss": 0.9333,
      "step": 181480
    },
    {
      "epoch": 0.6360772026481896,
      "grad_norm": 2.609375,
      "learning_rate": 4.377646414376245e-05,
      "loss": 0.9177,
      "step": 181490
    },
    {
      "epoch": 0.6361122501550852,
      "grad_norm": 2.46875,
      "learning_rate": 4.377581511509874e-05,
      "loss": 0.8919,
      "step": 181500
    },
    {
      "epoch": 0.6361472976619809,
      "grad_norm": 3.5,
      "learning_rate": 4.3775166086435044e-05,
      "loss": 0.9631,
      "step": 181510
    },
    {
      "epoch": 0.6361823451688764,
      "grad_norm": 3.734375,
      "learning_rate": 4.377451705777134e-05,
      "loss": 0.9251,
      "step": 181520
    },
    {
      "epoch": 0.636217392675772,
      "grad_norm": 3.140625,
      "learning_rate": 4.377386802910764e-05,
      "loss": 0.9606,
      "step": 181530
    },
    {
      "epoch": 0.6362524401826676,
      "grad_norm": 2.90625,
      "learning_rate": 4.3773219000443935e-05,
      "loss": 0.9117,
      "step": 181540
    },
    {
      "epoch": 0.6362874876895632,
      "grad_norm": 2.953125,
      "learning_rate": 4.3772569971780236e-05,
      "loss": 0.9636,
      "step": 181550
    },
    {
      "epoch": 0.6363225351964588,
      "grad_norm": 2.734375,
      "learning_rate": 4.377192094311654e-05,
      "loss": 0.9208,
      "step": 181560
    },
    {
      "epoch": 0.6363575827033544,
      "grad_norm": 2.828125,
      "learning_rate": 4.377127191445283e-05,
      "loss": 0.9159,
      "step": 181570
    },
    {
      "epoch": 0.6363926302102499,
      "grad_norm": 2.921875,
      "learning_rate": 4.3770622885789134e-05,
      "loss": 0.9457,
      "step": 181580
    },
    {
      "epoch": 0.6364276777171456,
      "grad_norm": 3.03125,
      "learning_rate": 4.376997385712543e-05,
      "loss": 0.9546,
      "step": 181590
    },
    {
      "epoch": 0.6364627252240412,
      "grad_norm": 3.5625,
      "learning_rate": 4.376932482846173e-05,
      "loss": 0.9798,
      "step": 181600
    },
    {
      "epoch": 0.6364977727309368,
      "grad_norm": 2.84375,
      "learning_rate": 4.3768675799798024e-05,
      "loss": 0.8744,
      "step": 181610
    },
    {
      "epoch": 0.6365328202378324,
      "grad_norm": 3.046875,
      "learning_rate": 4.3768026771134326e-05,
      "loss": 0.8974,
      "step": 181620
    },
    {
      "epoch": 0.636567867744728,
      "grad_norm": 3.09375,
      "learning_rate": 4.376737774247062e-05,
      "loss": 1.0055,
      "step": 181630
    },
    {
      "epoch": 0.6366029152516236,
      "grad_norm": 3.328125,
      "learning_rate": 4.376672871380692e-05,
      "loss": 0.9302,
      "step": 181640
    },
    {
      "epoch": 0.6366379627585191,
      "grad_norm": 2.9375,
      "learning_rate": 4.3766079685143216e-05,
      "loss": 0.9357,
      "step": 181650
    },
    {
      "epoch": 0.6366730102654148,
      "grad_norm": 3.390625,
      "learning_rate": 4.376543065647952e-05,
      "loss": 0.8496,
      "step": 181660
    },
    {
      "epoch": 0.6367080577723103,
      "grad_norm": 2.65625,
      "learning_rate": 4.376478162781581e-05,
      "loss": 0.8923,
      "step": 181670
    },
    {
      "epoch": 0.636743105279206,
      "grad_norm": 3.21875,
      "learning_rate": 4.3764132599152114e-05,
      "loss": 1.0229,
      "step": 181680
    },
    {
      "epoch": 0.6367781527861016,
      "grad_norm": 3.1875,
      "learning_rate": 4.376348357048841e-05,
      "loss": 0.8655,
      "step": 181690
    },
    {
      "epoch": 0.6368132002929972,
      "grad_norm": 3.109375,
      "learning_rate": 4.376283454182471e-05,
      "loss": 0.8949,
      "step": 181700
    },
    {
      "epoch": 0.6368482477998928,
      "grad_norm": 3.09375,
      "learning_rate": 4.3762185513161004e-05,
      "loss": 0.903,
      "step": 181710
    },
    {
      "epoch": 0.6368832953067883,
      "grad_norm": 2.78125,
      "learning_rate": 4.37615364844973e-05,
      "loss": 1.0195,
      "step": 181720
    },
    {
      "epoch": 0.636918342813684,
      "grad_norm": 2.953125,
      "learning_rate": 4.37608874558336e-05,
      "loss": 0.8162,
      "step": 181730
    },
    {
      "epoch": 0.6369533903205795,
      "grad_norm": 3.203125,
      "learning_rate": 4.3760238427169895e-05,
      "loss": 0.8856,
      "step": 181740
    },
    {
      "epoch": 0.6369884378274752,
      "grad_norm": 3.15625,
      "learning_rate": 4.3759589398506196e-05,
      "loss": 0.9465,
      "step": 181750
    },
    {
      "epoch": 0.6370234853343707,
      "grad_norm": 2.890625,
      "learning_rate": 4.375894036984249e-05,
      "loss": 0.8719,
      "step": 181760
    },
    {
      "epoch": 0.6370585328412663,
      "grad_norm": 2.96875,
      "learning_rate": 4.375829134117879e-05,
      "loss": 0.9153,
      "step": 181770
    },
    {
      "epoch": 0.6370935803481619,
      "grad_norm": 2.71875,
      "learning_rate": 4.375764231251509e-05,
      "loss": 0.953,
      "step": 181780
    },
    {
      "epoch": 0.6371286278550575,
      "grad_norm": 2.65625,
      "learning_rate": 4.375699328385139e-05,
      "loss": 0.8885,
      "step": 181790
    },
    {
      "epoch": 0.6371636753619532,
      "grad_norm": 3.0,
      "learning_rate": 4.375634425518769e-05,
      "loss": 0.8176,
      "step": 181800
    },
    {
      "epoch": 0.6371987228688487,
      "grad_norm": 3.109375,
      "learning_rate": 4.3755695226523984e-05,
      "loss": 0.956,
      "step": 181810
    },
    {
      "epoch": 0.6372337703757444,
      "grad_norm": 2.484375,
      "learning_rate": 4.3755046197860286e-05,
      "loss": 0.943,
      "step": 181820
    },
    {
      "epoch": 0.6372688178826399,
      "grad_norm": 3.125,
      "learning_rate": 4.375439716919658e-05,
      "loss": 0.9297,
      "step": 181830
    },
    {
      "epoch": 0.6373038653895355,
      "grad_norm": 3.21875,
      "learning_rate": 4.375374814053288e-05,
      "loss": 0.8971,
      "step": 181840
    },
    {
      "epoch": 0.6373389128964311,
      "grad_norm": 3.28125,
      "learning_rate": 4.3753099111869176e-05,
      "loss": 0.9056,
      "step": 181850
    },
    {
      "epoch": 0.6373739604033267,
      "grad_norm": 2.765625,
      "learning_rate": 4.375245008320548e-05,
      "loss": 0.8275,
      "step": 181860
    },
    {
      "epoch": 0.6374090079102223,
      "grad_norm": 3.265625,
      "learning_rate": 4.375180105454177e-05,
      "loss": 0.8363,
      "step": 181870
    },
    {
      "epoch": 0.6374440554171179,
      "grad_norm": 3.09375,
      "learning_rate": 4.3751152025878074e-05,
      "loss": 0.9855,
      "step": 181880
    },
    {
      "epoch": 0.6374791029240136,
      "grad_norm": 2.984375,
      "learning_rate": 4.375050299721437e-05,
      "loss": 0.9678,
      "step": 181890
    },
    {
      "epoch": 0.6375141504309091,
      "grad_norm": 2.59375,
      "learning_rate": 4.374985396855067e-05,
      "loss": 0.8615,
      "step": 181900
    },
    {
      "epoch": 0.6375491979378047,
      "grad_norm": 2.703125,
      "learning_rate": 4.3749204939886964e-05,
      "loss": 0.8792,
      "step": 181910
    },
    {
      "epoch": 0.6375842454447003,
      "grad_norm": 2.84375,
      "learning_rate": 4.3748555911223266e-05,
      "loss": 0.9144,
      "step": 181920
    },
    {
      "epoch": 0.6376192929515959,
      "grad_norm": 3.46875,
      "learning_rate": 4.374790688255957e-05,
      "loss": 0.8871,
      "step": 181930
    },
    {
      "epoch": 0.6376543404584915,
      "grad_norm": 2.6875,
      "learning_rate": 4.374725785389586e-05,
      "loss": 0.8585,
      "step": 181940
    },
    {
      "epoch": 0.6376893879653871,
      "grad_norm": 2.84375,
      "learning_rate": 4.374660882523216e-05,
      "loss": 0.9001,
      "step": 181950
    },
    {
      "epoch": 0.6377244354722826,
      "grad_norm": 2.734375,
      "learning_rate": 4.374595979656846e-05,
      "loss": 0.792,
      "step": 181960
    },
    {
      "epoch": 0.6377594829791783,
      "grad_norm": 3.171875,
      "learning_rate": 4.374531076790476e-05,
      "loss": 0.9394,
      "step": 181970
    },
    {
      "epoch": 0.6377945304860738,
      "grad_norm": 3.140625,
      "learning_rate": 4.3744661739241054e-05,
      "loss": 0.8306,
      "step": 181980
    },
    {
      "epoch": 0.6378295779929695,
      "grad_norm": 3.46875,
      "learning_rate": 4.3744012710577355e-05,
      "loss": 0.9466,
      "step": 181990
    },
    {
      "epoch": 0.6378646254998651,
      "grad_norm": 2.890625,
      "learning_rate": 4.374336368191365e-05,
      "loss": 0.9307,
      "step": 182000
    },
    {
      "epoch": 0.6378996730067606,
      "grad_norm": 3.265625,
      "learning_rate": 4.374271465324995e-05,
      "loss": 0.9632,
      "step": 182010
    },
    {
      "epoch": 0.6379347205136563,
      "grad_norm": 3.265625,
      "learning_rate": 4.3742065624586246e-05,
      "loss": 0.8997,
      "step": 182020
    },
    {
      "epoch": 0.6379697680205518,
      "grad_norm": 3.46875,
      "learning_rate": 4.374141659592255e-05,
      "loss": 0.9472,
      "step": 182030
    },
    {
      "epoch": 0.6380048155274475,
      "grad_norm": 3.328125,
      "learning_rate": 4.374076756725884e-05,
      "loss": 0.9465,
      "step": 182040
    },
    {
      "epoch": 0.638039863034343,
      "grad_norm": 2.546875,
      "learning_rate": 4.374011853859514e-05,
      "loss": 0.8408,
      "step": 182050
    },
    {
      "epoch": 0.6380749105412387,
      "grad_norm": 3.09375,
      "learning_rate": 4.373946950993144e-05,
      "loss": 0.9053,
      "step": 182060
    },
    {
      "epoch": 0.6381099580481342,
      "grad_norm": 2.96875,
      "learning_rate": 4.373882048126774e-05,
      "loss": 0.9063,
      "step": 182070
    },
    {
      "epoch": 0.6381450055550298,
      "grad_norm": 2.953125,
      "learning_rate": 4.373817145260404e-05,
      "loss": 0.9722,
      "step": 182080
    },
    {
      "epoch": 0.6381800530619255,
      "grad_norm": 2.828125,
      "learning_rate": 4.373752242394033e-05,
      "loss": 0.9027,
      "step": 182090
    },
    {
      "epoch": 0.638215100568821,
      "grad_norm": 3.21875,
      "learning_rate": 4.373687339527663e-05,
      "loss": 0.9014,
      "step": 182100
    },
    {
      "epoch": 0.6382501480757167,
      "grad_norm": 3.265625,
      "learning_rate": 4.3736224366612924e-05,
      "loss": 0.9507,
      "step": 182110
    },
    {
      "epoch": 0.6382851955826122,
      "grad_norm": 3.53125,
      "learning_rate": 4.3735575337949226e-05,
      "loss": 0.9674,
      "step": 182120
    },
    {
      "epoch": 0.6383202430895079,
      "grad_norm": 3.359375,
      "learning_rate": 4.373492630928552e-05,
      "loss": 0.9074,
      "step": 182130
    },
    {
      "epoch": 0.6383552905964034,
      "grad_norm": 3.09375,
      "learning_rate": 4.373427728062182e-05,
      "loss": 0.9214,
      "step": 182140
    },
    {
      "epoch": 0.638390338103299,
      "grad_norm": 2.96875,
      "learning_rate": 4.373362825195812e-05,
      "loss": 0.9353,
      "step": 182150
    },
    {
      "epoch": 0.6384253856101946,
      "grad_norm": 2.640625,
      "learning_rate": 4.373297922329442e-05,
      "loss": 0.917,
      "step": 182160
    },
    {
      "epoch": 0.6384604331170902,
      "grad_norm": 3.109375,
      "learning_rate": 4.373233019463072e-05,
      "loss": 0.8263,
      "step": 182170
    },
    {
      "epoch": 0.6384954806239859,
      "grad_norm": 3.03125,
      "learning_rate": 4.3731681165967014e-05,
      "loss": 0.9327,
      "step": 182180
    },
    {
      "epoch": 0.6385305281308814,
      "grad_norm": 2.5625,
      "learning_rate": 4.3731032137303315e-05,
      "loss": 0.8774,
      "step": 182190
    },
    {
      "epoch": 0.638565575637777,
      "grad_norm": 2.890625,
      "learning_rate": 4.373038310863961e-05,
      "loss": 0.8946,
      "step": 182200
    },
    {
      "epoch": 0.6386006231446726,
      "grad_norm": 2.71875,
      "learning_rate": 4.372973407997591e-05,
      "loss": 0.9837,
      "step": 182210
    },
    {
      "epoch": 0.6386356706515682,
      "grad_norm": 2.984375,
      "learning_rate": 4.3729085051312206e-05,
      "loss": 0.9789,
      "step": 182220
    },
    {
      "epoch": 0.6386707181584638,
      "grad_norm": 2.578125,
      "learning_rate": 4.372843602264851e-05,
      "loss": 0.92,
      "step": 182230
    },
    {
      "epoch": 0.6387057656653594,
      "grad_norm": 3.421875,
      "learning_rate": 4.37277869939848e-05,
      "loss": 0.8988,
      "step": 182240
    },
    {
      "epoch": 0.638740813172255,
      "grad_norm": 3.359375,
      "learning_rate": 4.37271379653211e-05,
      "loss": 0.8422,
      "step": 182250
    },
    {
      "epoch": 0.6387758606791506,
      "grad_norm": 2.921875,
      "learning_rate": 4.37264889366574e-05,
      "loss": 0.9136,
      "step": 182260
    },
    {
      "epoch": 0.6388109081860461,
      "grad_norm": 2.8125,
      "learning_rate": 4.37258399079937e-05,
      "loss": 0.8717,
      "step": 182270
    },
    {
      "epoch": 0.6388459556929418,
      "grad_norm": 2.546875,
      "learning_rate": 4.3725190879329994e-05,
      "loss": 0.9305,
      "step": 182280
    },
    {
      "epoch": 0.6388810031998374,
      "grad_norm": 3.265625,
      "learning_rate": 4.3724541850666295e-05,
      "loss": 0.9791,
      "step": 182290
    },
    {
      "epoch": 0.638916050706733,
      "grad_norm": 2.984375,
      "learning_rate": 4.3723892822002597e-05,
      "loss": 0.9176,
      "step": 182300
    },
    {
      "epoch": 0.6389510982136286,
      "grad_norm": 3.03125,
      "learning_rate": 4.372324379333889e-05,
      "loss": 0.8771,
      "step": 182310
    },
    {
      "epoch": 0.6389861457205241,
      "grad_norm": 3.03125,
      "learning_rate": 4.372259476467519e-05,
      "loss": 0.9876,
      "step": 182320
    },
    {
      "epoch": 0.6390211932274198,
      "grad_norm": 2.71875,
      "learning_rate": 4.372194573601149e-05,
      "loss": 0.8834,
      "step": 182330
    },
    {
      "epoch": 0.6390562407343153,
      "grad_norm": 2.78125,
      "learning_rate": 4.372129670734779e-05,
      "loss": 0.8867,
      "step": 182340
    },
    {
      "epoch": 0.639091288241211,
      "grad_norm": 3.15625,
      "learning_rate": 4.372064767868408e-05,
      "loss": 0.9485,
      "step": 182350
    },
    {
      "epoch": 0.6391263357481065,
      "grad_norm": 2.953125,
      "learning_rate": 4.3719998650020385e-05,
      "loss": 0.8447,
      "step": 182360
    },
    {
      "epoch": 0.6391613832550022,
      "grad_norm": 3.734375,
      "learning_rate": 4.371934962135668e-05,
      "loss": 0.9024,
      "step": 182370
    },
    {
      "epoch": 0.6391964307618978,
      "grad_norm": 2.703125,
      "learning_rate": 4.371870059269298e-05,
      "loss": 0.9293,
      "step": 182380
    },
    {
      "epoch": 0.6392314782687933,
      "grad_norm": 2.96875,
      "learning_rate": 4.3718051564029275e-05,
      "loss": 0.8986,
      "step": 182390
    },
    {
      "epoch": 0.639266525775689,
      "grad_norm": 2.875,
      "learning_rate": 4.3717402535365577e-05,
      "loss": 0.9524,
      "step": 182400
    },
    {
      "epoch": 0.6393015732825845,
      "grad_norm": 2.828125,
      "learning_rate": 4.371675350670187e-05,
      "loss": 0.8821,
      "step": 182410
    },
    {
      "epoch": 0.6393366207894802,
      "grad_norm": 3.34375,
      "learning_rate": 4.371610447803817e-05,
      "loss": 0.8645,
      "step": 182420
    },
    {
      "epoch": 0.6393716682963757,
      "grad_norm": 2.9375,
      "learning_rate": 4.3715455449374474e-05,
      "loss": 0.9839,
      "step": 182430
    },
    {
      "epoch": 0.6394067158032714,
      "grad_norm": 3.03125,
      "learning_rate": 4.371480642071077e-05,
      "loss": 0.8827,
      "step": 182440
    },
    {
      "epoch": 0.6394417633101669,
      "grad_norm": 3.046875,
      "learning_rate": 4.371415739204707e-05,
      "loss": 0.9693,
      "step": 182450
    },
    {
      "epoch": 0.6394768108170625,
      "grad_norm": 2.671875,
      "learning_rate": 4.3713508363383365e-05,
      "loss": 0.9093,
      "step": 182460
    },
    {
      "epoch": 0.6395118583239581,
      "grad_norm": 2.6875,
      "learning_rate": 4.371285933471966e-05,
      "loss": 0.9264,
      "step": 182470
    },
    {
      "epoch": 0.6395469058308537,
      "grad_norm": 2.546875,
      "learning_rate": 4.3712210306055954e-05,
      "loss": 0.9173,
      "step": 182480
    },
    {
      "epoch": 0.6395819533377494,
      "grad_norm": 3.0625,
      "learning_rate": 4.3711561277392255e-05,
      "loss": 0.839,
      "step": 182490
    },
    {
      "epoch": 0.6396170008446449,
      "grad_norm": 2.90625,
      "learning_rate": 4.371091224872855e-05,
      "loss": 1.0175,
      "step": 182500
    },
    {
      "epoch": 0.6396520483515405,
      "grad_norm": 2.953125,
      "learning_rate": 4.371026322006485e-05,
      "loss": 0.9135,
      "step": 182510
    },
    {
      "epoch": 0.6396870958584361,
      "grad_norm": 3.203125,
      "learning_rate": 4.370961419140115e-05,
      "loss": 0.8898,
      "step": 182520
    },
    {
      "epoch": 0.6397221433653317,
      "grad_norm": 2.8125,
      "learning_rate": 4.370896516273745e-05,
      "loss": 0.8274,
      "step": 182530
    },
    {
      "epoch": 0.6397571908722273,
      "grad_norm": 2.40625,
      "learning_rate": 4.370831613407375e-05,
      "loss": 0.9129,
      "step": 182540
    },
    {
      "epoch": 0.6397922383791229,
      "grad_norm": 2.859375,
      "learning_rate": 4.370766710541004e-05,
      "loss": 0.9311,
      "step": 182550
    },
    {
      "epoch": 0.6398272858860184,
      "grad_norm": 3.171875,
      "learning_rate": 4.3707018076746345e-05,
      "loss": 0.8984,
      "step": 182560
    },
    {
      "epoch": 0.6398623333929141,
      "grad_norm": 2.53125,
      "learning_rate": 4.370636904808264e-05,
      "loss": 0.945,
      "step": 182570
    },
    {
      "epoch": 0.6398973808998097,
      "grad_norm": 3.03125,
      "learning_rate": 4.370572001941894e-05,
      "loss": 0.9181,
      "step": 182580
    },
    {
      "epoch": 0.6399324284067053,
      "grad_norm": 2.9375,
      "learning_rate": 4.3705070990755235e-05,
      "loss": 0.9295,
      "step": 182590
    },
    {
      "epoch": 0.6399674759136009,
      "grad_norm": 3.46875,
      "learning_rate": 4.3704421962091537e-05,
      "loss": 0.8904,
      "step": 182600
    },
    {
      "epoch": 0.6400025234204965,
      "grad_norm": 2.671875,
      "learning_rate": 4.370377293342783e-05,
      "loss": 0.891,
      "step": 182610
    },
    {
      "epoch": 0.6400375709273921,
      "grad_norm": 3.203125,
      "learning_rate": 4.370312390476413e-05,
      "loss": 0.9129,
      "step": 182620
    },
    {
      "epoch": 0.6400726184342876,
      "grad_norm": 2.671875,
      "learning_rate": 4.370247487610043e-05,
      "loss": 0.9354,
      "step": 182630
    },
    {
      "epoch": 0.6401076659411833,
      "grad_norm": 3.109375,
      "learning_rate": 4.370182584743673e-05,
      "loss": 0.9223,
      "step": 182640
    },
    {
      "epoch": 0.6401427134480788,
      "grad_norm": 2.59375,
      "learning_rate": 4.370117681877302e-05,
      "loss": 0.8139,
      "step": 182650
    },
    {
      "epoch": 0.6401777609549745,
      "grad_norm": 3.453125,
      "learning_rate": 4.3700527790109325e-05,
      "loss": 0.9019,
      "step": 182660
    },
    {
      "epoch": 0.6402128084618701,
      "grad_norm": 2.84375,
      "learning_rate": 4.3699878761445626e-05,
      "loss": 0.925,
      "step": 182670
    },
    {
      "epoch": 0.6402478559687657,
      "grad_norm": 3.21875,
      "learning_rate": 4.369922973278192e-05,
      "loss": 0.9498,
      "step": 182680
    },
    {
      "epoch": 0.6402829034756613,
      "grad_norm": 3.171875,
      "learning_rate": 4.369858070411822e-05,
      "loss": 0.8963,
      "step": 182690
    },
    {
      "epoch": 0.6403179509825568,
      "grad_norm": 2.765625,
      "learning_rate": 4.3697931675454517e-05,
      "loss": 0.8946,
      "step": 182700
    },
    {
      "epoch": 0.6403529984894525,
      "grad_norm": 2.890625,
      "learning_rate": 4.369728264679082e-05,
      "loss": 0.8359,
      "step": 182710
    },
    {
      "epoch": 0.640388045996348,
      "grad_norm": 2.671875,
      "learning_rate": 4.369663361812711e-05,
      "loss": 0.8802,
      "step": 182720
    },
    {
      "epoch": 0.6404230935032437,
      "grad_norm": 2.875,
      "learning_rate": 4.3695984589463414e-05,
      "loss": 0.9111,
      "step": 182730
    },
    {
      "epoch": 0.6404581410101392,
      "grad_norm": 3.109375,
      "learning_rate": 4.369533556079971e-05,
      "loss": 0.8152,
      "step": 182740
    },
    {
      "epoch": 0.6404931885170349,
      "grad_norm": 3.0,
      "learning_rate": 4.369468653213601e-05,
      "loss": 0.8663,
      "step": 182750
    },
    {
      "epoch": 0.6405282360239304,
      "grad_norm": 3.015625,
      "learning_rate": 4.3694037503472305e-05,
      "loss": 0.852,
      "step": 182760
    },
    {
      "epoch": 0.640563283530826,
      "grad_norm": 2.96875,
      "learning_rate": 4.3693388474808606e-05,
      "loss": 0.8902,
      "step": 182770
    },
    {
      "epoch": 0.6405983310377217,
      "grad_norm": 3.0625,
      "learning_rate": 4.36927394461449e-05,
      "loss": 0.956,
      "step": 182780
    },
    {
      "epoch": 0.6406333785446172,
      "grad_norm": 3.125,
      "learning_rate": 4.36920904174812e-05,
      "loss": 0.9339,
      "step": 182790
    },
    {
      "epoch": 0.6406684260515129,
      "grad_norm": 2.546875,
      "learning_rate": 4.36914413888175e-05,
      "loss": 0.8689,
      "step": 182800
    },
    {
      "epoch": 0.6407034735584084,
      "grad_norm": 2.46875,
      "learning_rate": 4.36907923601538e-05,
      "loss": 0.8587,
      "step": 182810
    },
    {
      "epoch": 0.640738521065304,
      "grad_norm": 2.859375,
      "learning_rate": 4.36901433314901e-05,
      "loss": 0.9293,
      "step": 182820
    },
    {
      "epoch": 0.6407735685721996,
      "grad_norm": 2.828125,
      "learning_rate": 4.3689494302826394e-05,
      "loss": 0.9069,
      "step": 182830
    },
    {
      "epoch": 0.6408086160790952,
      "grad_norm": 3.046875,
      "learning_rate": 4.368884527416269e-05,
      "loss": 0.8288,
      "step": 182840
    },
    {
      "epoch": 0.6408436635859908,
      "grad_norm": 2.859375,
      "learning_rate": 4.368819624549898e-05,
      "loss": 0.9298,
      "step": 182850
    },
    {
      "epoch": 0.6408787110928864,
      "grad_norm": 2.96875,
      "learning_rate": 4.3687547216835285e-05,
      "loss": 1.0197,
      "step": 182860
    },
    {
      "epoch": 0.6409137585997821,
      "grad_norm": 3.0,
      "learning_rate": 4.368689818817158e-05,
      "loss": 0.9278,
      "step": 182870
    },
    {
      "epoch": 0.6409488061066776,
      "grad_norm": 3.0,
      "learning_rate": 4.368624915950788e-05,
      "loss": 0.9688,
      "step": 182880
    },
    {
      "epoch": 0.6409838536135732,
      "grad_norm": 2.90625,
      "learning_rate": 4.368560013084418e-05,
      "loss": 0.9492,
      "step": 182890
    },
    {
      "epoch": 0.6410189011204688,
      "grad_norm": 2.90625,
      "learning_rate": 4.3684951102180477e-05,
      "loss": 0.8905,
      "step": 182900
    },
    {
      "epoch": 0.6410539486273644,
      "grad_norm": 3.03125,
      "learning_rate": 4.368430207351678e-05,
      "loss": 0.8867,
      "step": 182910
    },
    {
      "epoch": 0.64108899613426,
      "grad_norm": 3.46875,
      "learning_rate": 4.368365304485307e-05,
      "loss": 0.9545,
      "step": 182920
    },
    {
      "epoch": 0.6411240436411556,
      "grad_norm": 2.984375,
      "learning_rate": 4.3683004016189374e-05,
      "loss": 0.9866,
      "step": 182930
    },
    {
      "epoch": 0.6411590911480511,
      "grad_norm": 2.640625,
      "learning_rate": 4.368235498752567e-05,
      "loss": 0.9235,
      "step": 182940
    },
    {
      "epoch": 0.6411941386549468,
      "grad_norm": 2.765625,
      "learning_rate": 4.368170595886197e-05,
      "loss": 0.9536,
      "step": 182950
    },
    {
      "epoch": 0.6412291861618423,
      "grad_norm": 2.953125,
      "learning_rate": 4.3681056930198265e-05,
      "loss": 0.9267,
      "step": 182960
    },
    {
      "epoch": 0.641264233668738,
      "grad_norm": 2.421875,
      "learning_rate": 4.3680407901534566e-05,
      "loss": 0.8737,
      "step": 182970
    },
    {
      "epoch": 0.6412992811756336,
      "grad_norm": 2.84375,
      "learning_rate": 4.367975887287086e-05,
      "loss": 0.9052,
      "step": 182980
    },
    {
      "epoch": 0.6413343286825292,
      "grad_norm": 3.21875,
      "learning_rate": 4.367910984420716e-05,
      "loss": 0.8845,
      "step": 182990
    },
    {
      "epoch": 0.6413693761894248,
      "grad_norm": 2.828125,
      "learning_rate": 4.3678460815543457e-05,
      "loss": 0.9293,
      "step": 183000
    },
    {
      "epoch": 0.6414044236963203,
      "grad_norm": 3.421875,
      "learning_rate": 4.367781178687976e-05,
      "loss": 0.8311,
      "step": 183010
    },
    {
      "epoch": 0.641439471203216,
      "grad_norm": 3.0625,
      "learning_rate": 4.367716275821605e-05,
      "loss": 0.9646,
      "step": 183020
    },
    {
      "epoch": 0.6414745187101115,
      "grad_norm": 3.203125,
      "learning_rate": 4.3676513729552354e-05,
      "loss": 0.9466,
      "step": 183030
    },
    {
      "epoch": 0.6415095662170072,
      "grad_norm": 2.765625,
      "learning_rate": 4.3675864700888655e-05,
      "loss": 0.8772,
      "step": 183040
    },
    {
      "epoch": 0.6415446137239027,
      "grad_norm": 4.15625,
      "learning_rate": 4.367521567222495e-05,
      "loss": 0.8967,
      "step": 183050
    },
    {
      "epoch": 0.6415796612307983,
      "grad_norm": 2.734375,
      "learning_rate": 4.367456664356125e-05,
      "loss": 0.8958,
      "step": 183060
    },
    {
      "epoch": 0.641614708737694,
      "grad_norm": 2.84375,
      "learning_rate": 4.3673917614897546e-05,
      "loss": 0.9427,
      "step": 183070
    },
    {
      "epoch": 0.6416497562445895,
      "grad_norm": 2.75,
      "learning_rate": 4.367326858623385e-05,
      "loss": 0.8535,
      "step": 183080
    },
    {
      "epoch": 0.6416848037514852,
      "grad_norm": 3.34375,
      "learning_rate": 4.367261955757014e-05,
      "loss": 0.9623,
      "step": 183090
    },
    {
      "epoch": 0.6417198512583807,
      "grad_norm": 2.8125,
      "learning_rate": 4.367197052890644e-05,
      "loss": 0.8392,
      "step": 183100
    },
    {
      "epoch": 0.6417548987652764,
      "grad_norm": 2.921875,
      "learning_rate": 4.367132150024274e-05,
      "loss": 0.9775,
      "step": 183110
    },
    {
      "epoch": 0.6417899462721719,
      "grad_norm": 3.078125,
      "learning_rate": 4.367067247157904e-05,
      "loss": 0.9245,
      "step": 183120
    },
    {
      "epoch": 0.6418249937790675,
      "grad_norm": 3.171875,
      "learning_rate": 4.3670023442915334e-05,
      "loss": 0.9854,
      "step": 183130
    },
    {
      "epoch": 0.6418600412859631,
      "grad_norm": 2.875,
      "learning_rate": 4.3669374414251635e-05,
      "loss": 0.912,
      "step": 183140
    },
    {
      "epoch": 0.6418950887928587,
      "grad_norm": 3.28125,
      "learning_rate": 4.366872538558793e-05,
      "loss": 0.8865,
      "step": 183150
    },
    {
      "epoch": 0.6419301362997543,
      "grad_norm": 2.421875,
      "learning_rate": 4.366807635692423e-05,
      "loss": 0.8332,
      "step": 183160
    },
    {
      "epoch": 0.6419651838066499,
      "grad_norm": 2.9375,
      "learning_rate": 4.366742732826053e-05,
      "loss": 0.9277,
      "step": 183170
    },
    {
      "epoch": 0.6420002313135456,
      "grad_norm": 3.109375,
      "learning_rate": 4.366677829959683e-05,
      "loss": 1.0388,
      "step": 183180
    },
    {
      "epoch": 0.6420352788204411,
      "grad_norm": 2.71875,
      "learning_rate": 4.366612927093313e-05,
      "loss": 0.9249,
      "step": 183190
    },
    {
      "epoch": 0.6420703263273367,
      "grad_norm": 2.890625,
      "learning_rate": 4.366548024226942e-05,
      "loss": 0.899,
      "step": 183200
    },
    {
      "epoch": 0.6421053738342323,
      "grad_norm": 2.78125,
      "learning_rate": 4.3664831213605725e-05,
      "loss": 0.9254,
      "step": 183210
    },
    {
      "epoch": 0.6421404213411279,
      "grad_norm": 2.625,
      "learning_rate": 4.366418218494201e-05,
      "loss": 0.8464,
      "step": 183220
    },
    {
      "epoch": 0.6421754688480235,
      "grad_norm": 2.75,
      "learning_rate": 4.3663533156278314e-05,
      "loss": 0.9259,
      "step": 183230
    },
    {
      "epoch": 0.6422105163549191,
      "grad_norm": 2.84375,
      "learning_rate": 4.366288412761461e-05,
      "loss": 0.9102,
      "step": 183240
    },
    {
      "epoch": 0.6422455638618146,
      "grad_norm": 3.015625,
      "learning_rate": 4.366223509895091e-05,
      "loss": 0.901,
      "step": 183250
    },
    {
      "epoch": 0.6422806113687103,
      "grad_norm": 2.921875,
      "learning_rate": 4.366158607028721e-05,
      "loss": 0.9,
      "step": 183260
    },
    {
      "epoch": 0.6423156588756059,
      "grad_norm": 2.9375,
      "learning_rate": 4.3660937041623506e-05,
      "loss": 0.7881,
      "step": 183270
    },
    {
      "epoch": 0.6423507063825015,
      "grad_norm": 3.046875,
      "learning_rate": 4.366028801295981e-05,
      "loss": 0.8405,
      "step": 183280
    },
    {
      "epoch": 0.6423857538893971,
      "grad_norm": 2.921875,
      "learning_rate": 4.36596389842961e-05,
      "loss": 0.8592,
      "step": 183290
    },
    {
      "epoch": 0.6424208013962927,
      "grad_norm": 2.8125,
      "learning_rate": 4.36589899556324e-05,
      "loss": 0.9318,
      "step": 183300
    },
    {
      "epoch": 0.6424558489031883,
      "grad_norm": 3.046875,
      "learning_rate": 4.36583409269687e-05,
      "loss": 0.8528,
      "step": 183310
    },
    {
      "epoch": 0.6424908964100838,
      "grad_norm": 2.734375,
      "learning_rate": 4.3657691898305e-05,
      "loss": 0.8729,
      "step": 183320
    },
    {
      "epoch": 0.6425259439169795,
      "grad_norm": 2.765625,
      "learning_rate": 4.3657042869641294e-05,
      "loss": 0.9431,
      "step": 183330
    },
    {
      "epoch": 0.642560991423875,
      "grad_norm": 3.109375,
      "learning_rate": 4.3656393840977595e-05,
      "loss": 0.9281,
      "step": 183340
    },
    {
      "epoch": 0.6425960389307707,
      "grad_norm": 2.71875,
      "learning_rate": 4.365574481231389e-05,
      "loss": 0.9746,
      "step": 183350
    },
    {
      "epoch": 0.6426310864376663,
      "grad_norm": 3.046875,
      "learning_rate": 4.365509578365019e-05,
      "loss": 0.9484,
      "step": 183360
    },
    {
      "epoch": 0.6426661339445618,
      "grad_norm": 3.078125,
      "learning_rate": 4.3654446754986486e-05,
      "loss": 0.9235,
      "step": 183370
    },
    {
      "epoch": 0.6427011814514575,
      "grad_norm": 2.828125,
      "learning_rate": 4.365379772632279e-05,
      "loss": 0.8877,
      "step": 183380
    },
    {
      "epoch": 0.642736228958353,
      "grad_norm": 3.15625,
      "learning_rate": 4.365314869765909e-05,
      "loss": 0.9025,
      "step": 183390
    },
    {
      "epoch": 0.6427712764652487,
      "grad_norm": 2.84375,
      "learning_rate": 4.365249966899538e-05,
      "loss": 0.8835,
      "step": 183400
    },
    {
      "epoch": 0.6428063239721442,
      "grad_norm": 3.03125,
      "learning_rate": 4.3651850640331685e-05,
      "loss": 0.8795,
      "step": 183410
    },
    {
      "epoch": 0.6428413714790399,
      "grad_norm": 2.953125,
      "learning_rate": 4.365120161166798e-05,
      "loss": 0.9051,
      "step": 183420
    },
    {
      "epoch": 0.6428764189859354,
      "grad_norm": 2.78125,
      "learning_rate": 4.365055258300428e-05,
      "loss": 0.9328,
      "step": 183430
    },
    {
      "epoch": 0.642911466492831,
      "grad_norm": 2.96875,
      "learning_rate": 4.3649903554340575e-05,
      "loss": 0.9327,
      "step": 183440
    },
    {
      "epoch": 0.6429465139997266,
      "grad_norm": 2.984375,
      "learning_rate": 4.364925452567688e-05,
      "loss": 0.9685,
      "step": 183450
    },
    {
      "epoch": 0.6429815615066222,
      "grad_norm": 2.921875,
      "learning_rate": 4.364860549701317e-05,
      "loss": 0.8867,
      "step": 183460
    },
    {
      "epoch": 0.6430166090135179,
      "grad_norm": 3.34375,
      "learning_rate": 4.364795646834947e-05,
      "loss": 0.9199,
      "step": 183470
    },
    {
      "epoch": 0.6430516565204134,
      "grad_norm": 2.640625,
      "learning_rate": 4.364730743968577e-05,
      "loss": 0.9664,
      "step": 183480
    },
    {
      "epoch": 0.643086704027309,
      "grad_norm": 2.859375,
      "learning_rate": 4.364665841102207e-05,
      "loss": 0.9141,
      "step": 183490
    },
    {
      "epoch": 0.6431217515342046,
      "grad_norm": 2.96875,
      "learning_rate": 4.364600938235836e-05,
      "loss": 0.8841,
      "step": 183500
    },
    {
      "epoch": 0.6431567990411002,
      "grad_norm": 2.515625,
      "learning_rate": 4.3645360353694665e-05,
      "loss": 0.9832,
      "step": 183510
    },
    {
      "epoch": 0.6431918465479958,
      "grad_norm": 3.078125,
      "learning_rate": 4.364471132503096e-05,
      "loss": 1.0083,
      "step": 183520
    },
    {
      "epoch": 0.6432268940548914,
      "grad_norm": 3.4375,
      "learning_rate": 4.364406229636726e-05,
      "loss": 0.8655,
      "step": 183530
    },
    {
      "epoch": 0.643261941561787,
      "grad_norm": 3.03125,
      "learning_rate": 4.364341326770356e-05,
      "loss": 0.9433,
      "step": 183540
    },
    {
      "epoch": 0.6432969890686826,
      "grad_norm": 3.203125,
      "learning_rate": 4.364276423903986e-05,
      "loss": 0.9315,
      "step": 183550
    },
    {
      "epoch": 0.6433320365755782,
      "grad_norm": 3.703125,
      "learning_rate": 4.364211521037616e-05,
      "loss": 0.8816,
      "step": 183560
    },
    {
      "epoch": 0.6433670840824738,
      "grad_norm": 2.8125,
      "learning_rate": 4.364146618171245e-05,
      "loss": 0.8402,
      "step": 183570
    },
    {
      "epoch": 0.6434021315893694,
      "grad_norm": 2.890625,
      "learning_rate": 4.3640817153048754e-05,
      "loss": 0.9287,
      "step": 183580
    },
    {
      "epoch": 0.643437179096265,
      "grad_norm": 3.1875,
      "learning_rate": 4.364016812438505e-05,
      "loss": 0.8753,
      "step": 183590
    },
    {
      "epoch": 0.6434722266031606,
      "grad_norm": 3.109375,
      "learning_rate": 4.363951909572134e-05,
      "loss": 0.8468,
      "step": 183600
    },
    {
      "epoch": 0.6435072741100561,
      "grad_norm": 3.203125,
      "learning_rate": 4.363887006705764e-05,
      "loss": 0.9575,
      "step": 183610
    },
    {
      "epoch": 0.6435423216169518,
      "grad_norm": 2.9375,
      "learning_rate": 4.363822103839394e-05,
      "loss": 1.0191,
      "step": 183620
    },
    {
      "epoch": 0.6435773691238473,
      "grad_norm": 2.9375,
      "learning_rate": 4.363757200973024e-05,
      "loss": 0.8749,
      "step": 183630
    },
    {
      "epoch": 0.643612416630743,
      "grad_norm": 2.8125,
      "learning_rate": 4.3636922981066535e-05,
      "loss": 0.9182,
      "step": 183640
    },
    {
      "epoch": 0.6436474641376385,
      "grad_norm": 2.46875,
      "learning_rate": 4.363627395240284e-05,
      "loss": 0.8861,
      "step": 183650
    },
    {
      "epoch": 0.6436825116445342,
      "grad_norm": 2.40625,
      "learning_rate": 4.363562492373913e-05,
      "loss": 0.8184,
      "step": 183660
    },
    {
      "epoch": 0.6437175591514298,
      "grad_norm": 2.84375,
      "learning_rate": 4.363497589507543e-05,
      "loss": 0.894,
      "step": 183670
    },
    {
      "epoch": 0.6437526066583253,
      "grad_norm": 2.9375,
      "learning_rate": 4.363432686641173e-05,
      "loss": 0.9499,
      "step": 183680
    },
    {
      "epoch": 0.643787654165221,
      "grad_norm": 2.828125,
      "learning_rate": 4.363367783774803e-05,
      "loss": 0.8698,
      "step": 183690
    },
    {
      "epoch": 0.6438227016721165,
      "grad_norm": 2.890625,
      "learning_rate": 4.363302880908432e-05,
      "loss": 0.8085,
      "step": 183700
    },
    {
      "epoch": 0.6438577491790122,
      "grad_norm": 2.90625,
      "learning_rate": 4.3632379780420625e-05,
      "loss": 0.873,
      "step": 183710
    },
    {
      "epoch": 0.6438927966859077,
      "grad_norm": 2.421875,
      "learning_rate": 4.363173075175692e-05,
      "loss": 0.8767,
      "step": 183720
    },
    {
      "epoch": 0.6439278441928034,
      "grad_norm": 2.90625,
      "learning_rate": 4.363108172309322e-05,
      "loss": 0.9135,
      "step": 183730
    },
    {
      "epoch": 0.6439628916996989,
      "grad_norm": 3.0625,
      "learning_rate": 4.3630432694429515e-05,
      "loss": 0.9272,
      "step": 183740
    },
    {
      "epoch": 0.6439979392065945,
      "grad_norm": 2.828125,
      "learning_rate": 4.362978366576582e-05,
      "loss": 0.8856,
      "step": 183750
    },
    {
      "epoch": 0.6440329867134902,
      "grad_norm": 2.59375,
      "learning_rate": 4.362913463710212e-05,
      "loss": 0.9247,
      "step": 183760
    },
    {
      "epoch": 0.6440680342203857,
      "grad_norm": 2.78125,
      "learning_rate": 4.362848560843841e-05,
      "loss": 0.9335,
      "step": 183770
    },
    {
      "epoch": 0.6441030817272814,
      "grad_norm": 3.171875,
      "learning_rate": 4.3627836579774714e-05,
      "loss": 0.9853,
      "step": 183780
    },
    {
      "epoch": 0.6441381292341769,
      "grad_norm": 2.53125,
      "learning_rate": 4.362718755111101e-05,
      "loss": 0.8936,
      "step": 183790
    },
    {
      "epoch": 0.6441731767410726,
      "grad_norm": 3.21875,
      "learning_rate": 4.362653852244731e-05,
      "loss": 0.9561,
      "step": 183800
    },
    {
      "epoch": 0.6442082242479681,
      "grad_norm": 2.984375,
      "learning_rate": 4.3625889493783605e-05,
      "loss": 0.9031,
      "step": 183810
    },
    {
      "epoch": 0.6442432717548637,
      "grad_norm": 2.671875,
      "learning_rate": 4.3625240465119906e-05,
      "loss": 0.9422,
      "step": 183820
    },
    {
      "epoch": 0.6442783192617593,
      "grad_norm": 2.84375,
      "learning_rate": 4.36245914364562e-05,
      "loss": 0.9159,
      "step": 183830
    },
    {
      "epoch": 0.6443133667686549,
      "grad_norm": 3.109375,
      "learning_rate": 4.36239424077925e-05,
      "loss": 0.9692,
      "step": 183840
    },
    {
      "epoch": 0.6443484142755506,
      "grad_norm": 3.234375,
      "learning_rate": 4.36232933791288e-05,
      "loss": 0.9834,
      "step": 183850
    },
    {
      "epoch": 0.6443834617824461,
      "grad_norm": 3.109375,
      "learning_rate": 4.36226443504651e-05,
      "loss": 0.9024,
      "step": 183860
    },
    {
      "epoch": 0.6444185092893417,
      "grad_norm": 2.859375,
      "learning_rate": 4.362199532180139e-05,
      "loss": 0.919,
      "step": 183870
    },
    {
      "epoch": 0.6444535567962373,
      "grad_norm": 3.234375,
      "learning_rate": 4.3621346293137694e-05,
      "loss": 0.9071,
      "step": 183880
    },
    {
      "epoch": 0.6444886043031329,
      "grad_norm": 2.921875,
      "learning_rate": 4.362069726447399e-05,
      "loss": 0.8869,
      "step": 183890
    },
    {
      "epoch": 0.6445236518100285,
      "grad_norm": 2.796875,
      "learning_rate": 4.362004823581029e-05,
      "loss": 0.8807,
      "step": 183900
    },
    {
      "epoch": 0.6445586993169241,
      "grad_norm": 3.1875,
      "learning_rate": 4.361939920714659e-05,
      "loss": 0.9464,
      "step": 183910
    },
    {
      "epoch": 0.6445937468238196,
      "grad_norm": 2.734375,
      "learning_rate": 4.3618750178482886e-05,
      "loss": 0.9363,
      "step": 183920
    },
    {
      "epoch": 0.6446287943307153,
      "grad_norm": 3.09375,
      "learning_rate": 4.361810114981919e-05,
      "loss": 0.929,
      "step": 183930
    },
    {
      "epoch": 0.6446638418376108,
      "grad_norm": 3.0,
      "learning_rate": 4.361745212115548e-05,
      "loss": 0.8763,
      "step": 183940
    },
    {
      "epoch": 0.6446988893445065,
      "grad_norm": 3.0625,
      "learning_rate": 4.3616803092491783e-05,
      "loss": 0.862,
      "step": 183950
    },
    {
      "epoch": 0.6447339368514021,
      "grad_norm": 3.453125,
      "learning_rate": 4.361615406382808e-05,
      "loss": 0.8638,
      "step": 183960
    },
    {
      "epoch": 0.6447689843582977,
      "grad_norm": 2.84375,
      "learning_rate": 4.361550503516437e-05,
      "loss": 0.9389,
      "step": 183970
    },
    {
      "epoch": 0.6448040318651933,
      "grad_norm": 3.390625,
      "learning_rate": 4.361485600650067e-05,
      "loss": 0.8189,
      "step": 183980
    },
    {
      "epoch": 0.6448390793720888,
      "grad_norm": 3.203125,
      "learning_rate": 4.361420697783697e-05,
      "loss": 0.8957,
      "step": 183990
    },
    {
      "epoch": 0.6448741268789845,
      "grad_norm": 2.828125,
      "learning_rate": 4.361355794917327e-05,
      "loss": 0.9528,
      "step": 184000
    },
    {
      "epoch": 0.64490917438588,
      "grad_norm": 2.84375,
      "learning_rate": 4.3612908920509565e-05,
      "loss": 0.8502,
      "step": 184010
    },
    {
      "epoch": 0.6449442218927757,
      "grad_norm": 2.71875,
      "learning_rate": 4.3612259891845866e-05,
      "loss": 0.9824,
      "step": 184020
    },
    {
      "epoch": 0.6449792693996712,
      "grad_norm": 3.0625,
      "learning_rate": 4.361161086318216e-05,
      "loss": 0.8201,
      "step": 184030
    },
    {
      "epoch": 0.6450143169065669,
      "grad_norm": 2.859375,
      "learning_rate": 4.361096183451846e-05,
      "loss": 0.8516,
      "step": 184040
    },
    {
      "epoch": 0.6450493644134625,
      "grad_norm": 2.78125,
      "learning_rate": 4.361031280585476e-05,
      "loss": 0.8499,
      "step": 184050
    },
    {
      "epoch": 0.645084411920358,
      "grad_norm": 2.828125,
      "learning_rate": 4.360966377719106e-05,
      "loss": 0.8931,
      "step": 184060
    },
    {
      "epoch": 0.6451194594272537,
      "grad_norm": 2.828125,
      "learning_rate": 4.360901474852735e-05,
      "loss": 0.9827,
      "step": 184070
    },
    {
      "epoch": 0.6451545069341492,
      "grad_norm": 2.703125,
      "learning_rate": 4.3608365719863654e-05,
      "loss": 0.8648,
      "step": 184080
    },
    {
      "epoch": 0.6451895544410449,
      "grad_norm": 3.375,
      "learning_rate": 4.360771669119995e-05,
      "loss": 1.0347,
      "step": 184090
    },
    {
      "epoch": 0.6452246019479404,
      "grad_norm": 3.203125,
      "learning_rate": 4.360706766253625e-05,
      "loss": 0.8681,
      "step": 184100
    },
    {
      "epoch": 0.645259649454836,
      "grad_norm": 2.84375,
      "learning_rate": 4.3606418633872545e-05,
      "loss": 0.8995,
      "step": 184110
    },
    {
      "epoch": 0.6452946969617316,
      "grad_norm": 3.09375,
      "learning_rate": 4.3605769605208846e-05,
      "loss": 0.8996,
      "step": 184120
    },
    {
      "epoch": 0.6453297444686272,
      "grad_norm": 3.421875,
      "learning_rate": 4.360512057654515e-05,
      "loss": 0.9854,
      "step": 184130
    },
    {
      "epoch": 0.6453647919755228,
      "grad_norm": 2.8125,
      "learning_rate": 4.360447154788144e-05,
      "loss": 0.9544,
      "step": 184140
    },
    {
      "epoch": 0.6453998394824184,
      "grad_norm": 3.078125,
      "learning_rate": 4.3603822519217743e-05,
      "loss": 0.9021,
      "step": 184150
    },
    {
      "epoch": 0.6454348869893141,
      "grad_norm": 3.34375,
      "learning_rate": 4.360317349055404e-05,
      "loss": 0.8967,
      "step": 184160
    },
    {
      "epoch": 0.6454699344962096,
      "grad_norm": 2.609375,
      "learning_rate": 4.360252446189034e-05,
      "loss": 0.9216,
      "step": 184170
    },
    {
      "epoch": 0.6455049820031052,
      "grad_norm": 3.78125,
      "learning_rate": 4.3601875433226634e-05,
      "loss": 0.9469,
      "step": 184180
    },
    {
      "epoch": 0.6455400295100008,
      "grad_norm": 2.921875,
      "learning_rate": 4.3601226404562935e-05,
      "loss": 0.8848,
      "step": 184190
    },
    {
      "epoch": 0.6455750770168964,
      "grad_norm": 3.109375,
      "learning_rate": 4.360057737589923e-05,
      "loss": 0.8999,
      "step": 184200
    },
    {
      "epoch": 0.645610124523792,
      "grad_norm": 2.953125,
      "learning_rate": 4.359992834723553e-05,
      "loss": 0.8489,
      "step": 184210
    },
    {
      "epoch": 0.6456451720306876,
      "grad_norm": 2.84375,
      "learning_rate": 4.3599279318571826e-05,
      "loss": 0.9135,
      "step": 184220
    },
    {
      "epoch": 0.6456802195375831,
      "grad_norm": 3.046875,
      "learning_rate": 4.359863028990813e-05,
      "loss": 0.9067,
      "step": 184230
    },
    {
      "epoch": 0.6457152670444788,
      "grad_norm": 2.984375,
      "learning_rate": 4.359798126124442e-05,
      "loss": 0.9044,
      "step": 184240
    },
    {
      "epoch": 0.6457503145513744,
      "grad_norm": 2.890625,
      "learning_rate": 4.3597332232580723e-05,
      "loss": 0.9731,
      "step": 184250
    },
    {
      "epoch": 0.64578536205827,
      "grad_norm": 2.90625,
      "learning_rate": 4.3596683203917025e-05,
      "loss": 0.9882,
      "step": 184260
    },
    {
      "epoch": 0.6458204095651656,
      "grad_norm": 2.390625,
      "learning_rate": 4.359603417525332e-05,
      "loss": 0.9237,
      "step": 184270
    },
    {
      "epoch": 0.6458554570720612,
      "grad_norm": 2.9375,
      "learning_rate": 4.359538514658962e-05,
      "loss": 0.9716,
      "step": 184280
    },
    {
      "epoch": 0.6458905045789568,
      "grad_norm": 3.28125,
      "learning_rate": 4.3594736117925915e-05,
      "loss": 0.8301,
      "step": 184290
    },
    {
      "epoch": 0.6459255520858523,
      "grad_norm": 2.8125,
      "learning_rate": 4.359408708926222e-05,
      "loss": 0.9778,
      "step": 184300
    },
    {
      "epoch": 0.645960599592748,
      "grad_norm": 3.140625,
      "learning_rate": 4.359343806059851e-05,
      "loss": 0.9484,
      "step": 184310
    },
    {
      "epoch": 0.6459956470996435,
      "grad_norm": 2.90625,
      "learning_rate": 4.359278903193481e-05,
      "loss": 0.9792,
      "step": 184320
    },
    {
      "epoch": 0.6460306946065392,
      "grad_norm": 2.375,
      "learning_rate": 4.359214000327111e-05,
      "loss": 0.8178,
      "step": 184330
    },
    {
      "epoch": 0.6460657421134348,
      "grad_norm": 2.46875,
      "learning_rate": 4.359149097460741e-05,
      "loss": 0.7846,
      "step": 184340
    },
    {
      "epoch": 0.6461007896203304,
      "grad_norm": 2.953125,
      "learning_rate": 4.3590841945943703e-05,
      "loss": 0.8871,
      "step": 184350
    },
    {
      "epoch": 0.646135837127226,
      "grad_norm": 3.046875,
      "learning_rate": 4.359019291728e-05,
      "loss": 0.8878,
      "step": 184360
    },
    {
      "epoch": 0.6461708846341215,
      "grad_norm": 2.796875,
      "learning_rate": 4.35895438886163e-05,
      "loss": 0.8505,
      "step": 184370
    },
    {
      "epoch": 0.6462059321410172,
      "grad_norm": 2.953125,
      "learning_rate": 4.3588894859952594e-05,
      "loss": 0.9006,
      "step": 184380
    },
    {
      "epoch": 0.6462409796479127,
      "grad_norm": 2.640625,
      "learning_rate": 4.3588245831288895e-05,
      "loss": 0.9111,
      "step": 184390
    },
    {
      "epoch": 0.6462760271548084,
      "grad_norm": 2.796875,
      "learning_rate": 4.358759680262519e-05,
      "loss": 0.9136,
      "step": 184400
    },
    {
      "epoch": 0.6463110746617039,
      "grad_norm": 2.703125,
      "learning_rate": 4.358694777396149e-05,
      "loss": 0.9252,
      "step": 184410
    },
    {
      "epoch": 0.6463461221685995,
      "grad_norm": 2.90625,
      "learning_rate": 4.3586298745297786e-05,
      "loss": 0.8891,
      "step": 184420
    },
    {
      "epoch": 0.6463811696754951,
      "grad_norm": 3.125,
      "learning_rate": 4.358564971663409e-05,
      "loss": 0.9125,
      "step": 184430
    },
    {
      "epoch": 0.6464162171823907,
      "grad_norm": 2.953125,
      "learning_rate": 4.358500068797038e-05,
      "loss": 0.8998,
      "step": 184440
    },
    {
      "epoch": 0.6464512646892864,
      "grad_norm": 3.109375,
      "learning_rate": 4.3584351659306683e-05,
      "loss": 0.9502,
      "step": 184450
    },
    {
      "epoch": 0.6464863121961819,
      "grad_norm": 3.109375,
      "learning_rate": 4.358370263064298e-05,
      "loss": 0.9253,
      "step": 184460
    },
    {
      "epoch": 0.6465213597030776,
      "grad_norm": 2.59375,
      "learning_rate": 4.358305360197928e-05,
      "loss": 0.8615,
      "step": 184470
    },
    {
      "epoch": 0.6465564072099731,
      "grad_norm": 2.953125,
      "learning_rate": 4.3582404573315574e-05,
      "loss": 0.97,
      "step": 184480
    },
    {
      "epoch": 0.6465914547168687,
      "grad_norm": 3.625,
      "learning_rate": 4.3581755544651875e-05,
      "loss": 0.8722,
      "step": 184490
    },
    {
      "epoch": 0.6466265022237643,
      "grad_norm": 3.390625,
      "learning_rate": 4.358110651598818e-05,
      "loss": 0.9019,
      "step": 184500
    },
    {
      "epoch": 0.6466615497306599,
      "grad_norm": 3.015625,
      "learning_rate": 4.358045748732447e-05,
      "loss": 0.9494,
      "step": 184510
    },
    {
      "epoch": 0.6466965972375555,
      "grad_norm": 2.953125,
      "learning_rate": 4.357980845866077e-05,
      "loss": 0.9566,
      "step": 184520
    },
    {
      "epoch": 0.6467316447444511,
      "grad_norm": 3.34375,
      "learning_rate": 4.357915942999707e-05,
      "loss": 0.9595,
      "step": 184530
    },
    {
      "epoch": 0.6467666922513468,
      "grad_norm": 2.71875,
      "learning_rate": 4.357851040133337e-05,
      "loss": 0.8901,
      "step": 184540
    },
    {
      "epoch": 0.6468017397582423,
      "grad_norm": 3.03125,
      "learning_rate": 4.3577861372669663e-05,
      "loss": 0.8875,
      "step": 184550
    },
    {
      "epoch": 0.6468367872651379,
      "grad_norm": 3.09375,
      "learning_rate": 4.3577212344005965e-05,
      "loss": 0.8979,
      "step": 184560
    },
    {
      "epoch": 0.6468718347720335,
      "grad_norm": 2.75,
      "learning_rate": 4.357656331534226e-05,
      "loss": 0.957,
      "step": 184570
    },
    {
      "epoch": 0.6469068822789291,
      "grad_norm": 3.15625,
      "learning_rate": 4.357591428667856e-05,
      "loss": 0.8874,
      "step": 184580
    },
    {
      "epoch": 0.6469419297858247,
      "grad_norm": 2.640625,
      "learning_rate": 4.3575265258014855e-05,
      "loss": 0.7966,
      "step": 184590
    },
    {
      "epoch": 0.6469769772927203,
      "grad_norm": 3.15625,
      "learning_rate": 4.357461622935116e-05,
      "loss": 0.9438,
      "step": 184600
    },
    {
      "epoch": 0.6470120247996158,
      "grad_norm": 2.578125,
      "learning_rate": 4.357396720068745e-05,
      "loss": 0.9283,
      "step": 184610
    },
    {
      "epoch": 0.6470470723065115,
      "grad_norm": 2.984375,
      "learning_rate": 4.357331817202375e-05,
      "loss": 0.965,
      "step": 184620
    },
    {
      "epoch": 0.647082119813407,
      "grad_norm": 3.328125,
      "learning_rate": 4.3572669143360054e-05,
      "loss": 0.9631,
      "step": 184630
    },
    {
      "epoch": 0.6471171673203027,
      "grad_norm": 2.703125,
      "learning_rate": 4.357202011469635e-05,
      "loss": 0.9518,
      "step": 184640
    },
    {
      "epoch": 0.6471522148271983,
      "grad_norm": 3.390625,
      "learning_rate": 4.357137108603265e-05,
      "loss": 0.8812,
      "step": 184650
    },
    {
      "epoch": 0.6471872623340938,
      "grad_norm": 2.5,
      "learning_rate": 4.3570722057368945e-05,
      "loss": 0.9523,
      "step": 184660
    },
    {
      "epoch": 0.6472223098409895,
      "grad_norm": 2.859375,
      "learning_rate": 4.3570073028705246e-05,
      "loss": 0.9062,
      "step": 184670
    },
    {
      "epoch": 0.647257357347885,
      "grad_norm": 3.4375,
      "learning_rate": 4.356942400004154e-05,
      "loss": 0.9325,
      "step": 184680
    },
    {
      "epoch": 0.6472924048547807,
      "grad_norm": 2.984375,
      "learning_rate": 4.356877497137784e-05,
      "loss": 0.934,
      "step": 184690
    },
    {
      "epoch": 0.6473274523616762,
      "grad_norm": 2.328125,
      "learning_rate": 4.356812594271414e-05,
      "loss": 0.9293,
      "step": 184700
    },
    {
      "epoch": 0.6473624998685719,
      "grad_norm": 3.3125,
      "learning_rate": 4.356747691405044e-05,
      "loss": 1.0041,
      "step": 184710
    },
    {
      "epoch": 0.6473975473754674,
      "grad_norm": 2.796875,
      "learning_rate": 4.356682788538673e-05,
      "loss": 0.9243,
      "step": 184720
    },
    {
      "epoch": 0.647432594882363,
      "grad_norm": 2.84375,
      "learning_rate": 4.356617885672303e-05,
      "loss": 0.9362,
      "step": 184730
    },
    {
      "epoch": 0.6474676423892587,
      "grad_norm": 3.421875,
      "learning_rate": 4.356552982805933e-05,
      "loss": 0.8837,
      "step": 184740
    },
    {
      "epoch": 0.6475026898961542,
      "grad_norm": 3.109375,
      "learning_rate": 4.3564880799395623e-05,
      "loss": 0.9586,
      "step": 184750
    },
    {
      "epoch": 0.6475377374030499,
      "grad_norm": 3.046875,
      "learning_rate": 4.3564231770731925e-05,
      "loss": 0.968,
      "step": 184760
    },
    {
      "epoch": 0.6475727849099454,
      "grad_norm": 2.90625,
      "learning_rate": 4.356358274206822e-05,
      "loss": 0.8966,
      "step": 184770
    },
    {
      "epoch": 0.6476078324168411,
      "grad_norm": 3.203125,
      "learning_rate": 4.356293371340452e-05,
      "loss": 0.8833,
      "step": 184780
    },
    {
      "epoch": 0.6476428799237366,
      "grad_norm": 2.765625,
      "learning_rate": 4.3562284684740815e-05,
      "loss": 0.8731,
      "step": 184790
    },
    {
      "epoch": 0.6476779274306322,
      "grad_norm": 3.296875,
      "learning_rate": 4.356163565607712e-05,
      "loss": 0.9415,
      "step": 184800
    },
    {
      "epoch": 0.6477129749375278,
      "grad_norm": 3.40625,
      "learning_rate": 4.356098662741341e-05,
      "loss": 0.9697,
      "step": 184810
    },
    {
      "epoch": 0.6477480224444234,
      "grad_norm": 2.890625,
      "learning_rate": 4.356033759874971e-05,
      "loss": 0.9394,
      "step": 184820
    },
    {
      "epoch": 0.6477830699513191,
      "grad_norm": 3.34375,
      "learning_rate": 4.355968857008601e-05,
      "loss": 0.9297,
      "step": 184830
    },
    {
      "epoch": 0.6478181174582146,
      "grad_norm": 3.140625,
      "learning_rate": 4.355903954142231e-05,
      "loss": 0.9484,
      "step": 184840
    },
    {
      "epoch": 0.6478531649651103,
      "grad_norm": 3.0625,
      "learning_rate": 4.3558390512758603e-05,
      "loss": 0.8934,
      "step": 184850
    },
    {
      "epoch": 0.6478882124720058,
      "grad_norm": 3.078125,
      "learning_rate": 4.3557741484094905e-05,
      "loss": 0.9111,
      "step": 184860
    },
    {
      "epoch": 0.6479232599789014,
      "grad_norm": 2.5,
      "learning_rate": 4.3557092455431206e-05,
      "loss": 0.8651,
      "step": 184870
    },
    {
      "epoch": 0.647958307485797,
      "grad_norm": 2.765625,
      "learning_rate": 4.35564434267675e-05,
      "loss": 0.8759,
      "step": 184880
    },
    {
      "epoch": 0.6479933549926926,
      "grad_norm": 3.984375,
      "learning_rate": 4.35557943981038e-05,
      "loss": 0.9814,
      "step": 184890
    },
    {
      "epoch": 0.6480284024995882,
      "grad_norm": 2.546875,
      "learning_rate": 4.35551453694401e-05,
      "loss": 0.8797,
      "step": 184900
    },
    {
      "epoch": 0.6480634500064838,
      "grad_norm": 3.15625,
      "learning_rate": 4.35544963407764e-05,
      "loss": 0.9005,
      "step": 184910
    },
    {
      "epoch": 0.6480984975133793,
      "grad_norm": 2.875,
      "learning_rate": 4.355384731211269e-05,
      "loss": 0.9455,
      "step": 184920
    },
    {
      "epoch": 0.648133545020275,
      "grad_norm": 3.0625,
      "learning_rate": 4.3553198283448994e-05,
      "loss": 0.8918,
      "step": 184930
    },
    {
      "epoch": 0.6481685925271706,
      "grad_norm": 2.84375,
      "learning_rate": 4.355254925478529e-05,
      "loss": 0.7979,
      "step": 184940
    },
    {
      "epoch": 0.6482036400340662,
      "grad_norm": 3.15625,
      "learning_rate": 4.355190022612159e-05,
      "loss": 0.8817,
      "step": 184950
    },
    {
      "epoch": 0.6482386875409618,
      "grad_norm": 2.84375,
      "learning_rate": 4.3551251197457885e-05,
      "loss": 0.873,
      "step": 184960
    },
    {
      "epoch": 0.6482737350478573,
      "grad_norm": 2.84375,
      "learning_rate": 4.3550602168794186e-05,
      "loss": 0.8287,
      "step": 184970
    },
    {
      "epoch": 0.648308782554753,
      "grad_norm": 2.828125,
      "learning_rate": 4.354995314013048e-05,
      "loss": 0.943,
      "step": 184980
    },
    {
      "epoch": 0.6483438300616485,
      "grad_norm": 2.953125,
      "learning_rate": 4.354930411146678e-05,
      "loss": 0.9391,
      "step": 184990
    },
    {
      "epoch": 0.6483788775685442,
      "grad_norm": 2.953125,
      "learning_rate": 4.3548655082803084e-05,
      "loss": 0.9126,
      "step": 185000
    },
    {
      "epoch": 0.6483788775685442,
      "eval_loss": 0.8533912301063538,
      "eval_runtime": 560.5219,
      "eval_samples_per_second": 678.717,
      "eval_steps_per_second": 56.56,
      "step": 185000
    },
    {
      "epoch": 0.6484139250754397,
      "grad_norm": 2.640625,
      "learning_rate": 4.354800605413938e-05,
      "loss": 0.8407,
      "step": 185010
    },
    {
      "epoch": 0.6484489725823354,
      "grad_norm": 2.546875,
      "learning_rate": 4.354735702547568e-05,
      "loss": 0.9173,
      "step": 185020
    },
    {
      "epoch": 0.648484020089231,
      "grad_norm": 3.09375,
      "learning_rate": 4.3546707996811974e-05,
      "loss": 0.899,
      "step": 185030
    },
    {
      "epoch": 0.6485190675961265,
      "grad_norm": 2.84375,
      "learning_rate": 4.3546058968148276e-05,
      "loss": 0.9203,
      "step": 185040
    },
    {
      "epoch": 0.6485541151030222,
      "grad_norm": 2.890625,
      "learning_rate": 4.354540993948457e-05,
      "loss": 0.9582,
      "step": 185050
    },
    {
      "epoch": 0.6485891626099177,
      "grad_norm": 3.375,
      "learning_rate": 4.354476091082087e-05,
      "loss": 0.8855,
      "step": 185060
    },
    {
      "epoch": 0.6486242101168134,
      "grad_norm": 2.953125,
      "learning_rate": 4.3544111882157166e-05,
      "loss": 0.9734,
      "step": 185070
    },
    {
      "epoch": 0.6486592576237089,
      "grad_norm": 3.1875,
      "learning_rate": 4.354346285349347e-05,
      "loss": 0.8842,
      "step": 185080
    },
    {
      "epoch": 0.6486943051306046,
      "grad_norm": 2.90625,
      "learning_rate": 4.354281382482976e-05,
      "loss": 0.8595,
      "step": 185090
    },
    {
      "epoch": 0.6487293526375001,
      "grad_norm": 2.953125,
      "learning_rate": 4.354216479616606e-05,
      "loss": 0.9586,
      "step": 185100
    },
    {
      "epoch": 0.6487644001443957,
      "grad_norm": 2.921875,
      "learning_rate": 4.354151576750236e-05,
      "loss": 0.9122,
      "step": 185110
    },
    {
      "epoch": 0.6487994476512913,
      "grad_norm": 2.84375,
      "learning_rate": 4.354086673883865e-05,
      "loss": 0.8822,
      "step": 185120
    },
    {
      "epoch": 0.6488344951581869,
      "grad_norm": 3.140625,
      "learning_rate": 4.3540217710174954e-05,
      "loss": 0.9521,
      "step": 185130
    },
    {
      "epoch": 0.6488695426650826,
      "grad_norm": 3.28125,
      "learning_rate": 4.353956868151125e-05,
      "loss": 0.8616,
      "step": 185140
    },
    {
      "epoch": 0.6489045901719781,
      "grad_norm": 3.328125,
      "learning_rate": 4.353891965284755e-05,
      "loss": 0.9407,
      "step": 185150
    },
    {
      "epoch": 0.6489396376788737,
      "grad_norm": 3.125,
      "learning_rate": 4.3538270624183845e-05,
      "loss": 0.8977,
      "step": 185160
    },
    {
      "epoch": 0.6489746851857693,
      "grad_norm": 3.1875,
      "learning_rate": 4.3537621595520146e-05,
      "loss": 1.0227,
      "step": 185170
    },
    {
      "epoch": 0.6490097326926649,
      "grad_norm": 3.21875,
      "learning_rate": 4.353697256685644e-05,
      "loss": 0.9852,
      "step": 185180
    },
    {
      "epoch": 0.6490447801995605,
      "grad_norm": 3.1875,
      "learning_rate": 4.353632353819274e-05,
      "loss": 0.8787,
      "step": 185190
    },
    {
      "epoch": 0.6490798277064561,
      "grad_norm": 2.5625,
      "learning_rate": 4.353567450952904e-05,
      "loss": 0.8404,
      "step": 185200
    },
    {
      "epoch": 0.6491148752133516,
      "grad_norm": 3.09375,
      "learning_rate": 4.353502548086534e-05,
      "loss": 0.9474,
      "step": 185210
    },
    {
      "epoch": 0.6491499227202473,
      "grad_norm": 3.359375,
      "learning_rate": 4.353437645220163e-05,
      "loss": 0.904,
      "step": 185220
    },
    {
      "epoch": 0.6491849702271429,
      "grad_norm": 3.1875,
      "learning_rate": 4.3533727423537934e-05,
      "loss": 0.9021,
      "step": 185230
    },
    {
      "epoch": 0.6492200177340385,
      "grad_norm": 2.71875,
      "learning_rate": 4.3533078394874236e-05,
      "loss": 0.8542,
      "step": 185240
    },
    {
      "epoch": 0.6492550652409341,
      "grad_norm": 3.34375,
      "learning_rate": 4.353242936621053e-05,
      "loss": 0.9821,
      "step": 185250
    },
    {
      "epoch": 0.6492901127478297,
      "grad_norm": 2.671875,
      "learning_rate": 4.353178033754683e-05,
      "loss": 0.8657,
      "step": 185260
    },
    {
      "epoch": 0.6493251602547253,
      "grad_norm": 2.9375,
      "learning_rate": 4.3531131308883126e-05,
      "loss": 0.9774,
      "step": 185270
    },
    {
      "epoch": 0.6493602077616208,
      "grad_norm": 2.828125,
      "learning_rate": 4.353048228021943e-05,
      "loss": 0.903,
      "step": 185280
    },
    {
      "epoch": 0.6493952552685165,
      "grad_norm": 2.96875,
      "learning_rate": 4.352983325155572e-05,
      "loss": 0.8631,
      "step": 185290
    },
    {
      "epoch": 0.649430302775412,
      "grad_norm": 3.125,
      "learning_rate": 4.3529184222892024e-05,
      "loss": 0.8405,
      "step": 185300
    },
    {
      "epoch": 0.6494653502823077,
      "grad_norm": 2.828125,
      "learning_rate": 4.352853519422832e-05,
      "loss": 0.9449,
      "step": 185310
    },
    {
      "epoch": 0.6495003977892032,
      "grad_norm": 3.234375,
      "learning_rate": 4.352788616556462e-05,
      "loss": 0.9791,
      "step": 185320
    },
    {
      "epoch": 0.6495354452960989,
      "grad_norm": 2.59375,
      "learning_rate": 4.3527237136900914e-05,
      "loss": 0.8032,
      "step": 185330
    },
    {
      "epoch": 0.6495704928029945,
      "grad_norm": 2.875,
      "learning_rate": 4.3526588108237216e-05,
      "loss": 1.0022,
      "step": 185340
    },
    {
      "epoch": 0.64960554030989,
      "grad_norm": 2.6875,
      "learning_rate": 4.352593907957351e-05,
      "loss": 0.8582,
      "step": 185350
    },
    {
      "epoch": 0.6496405878167857,
      "grad_norm": 3.046875,
      "learning_rate": 4.352529005090981e-05,
      "loss": 0.8587,
      "step": 185360
    },
    {
      "epoch": 0.6496756353236812,
      "grad_norm": 2.65625,
      "learning_rate": 4.352464102224611e-05,
      "loss": 0.8876,
      "step": 185370
    },
    {
      "epoch": 0.6497106828305769,
      "grad_norm": 2.90625,
      "learning_rate": 4.352399199358241e-05,
      "loss": 0.7698,
      "step": 185380
    },
    {
      "epoch": 0.6497457303374724,
      "grad_norm": 3.0,
      "learning_rate": 4.352334296491871e-05,
      "loss": 0.89,
      "step": 185390
    },
    {
      "epoch": 0.649780777844368,
      "grad_norm": 2.515625,
      "learning_rate": 4.3522693936255004e-05,
      "loss": 0.9072,
      "step": 185400
    },
    {
      "epoch": 0.6498158253512636,
      "grad_norm": 3.015625,
      "learning_rate": 4.3522044907591305e-05,
      "loss": 0.9188,
      "step": 185410
    },
    {
      "epoch": 0.6498508728581592,
      "grad_norm": 3.234375,
      "learning_rate": 4.35213958789276e-05,
      "loss": 0.9344,
      "step": 185420
    },
    {
      "epoch": 0.6498859203650549,
      "grad_norm": 2.765625,
      "learning_rate": 4.35207468502639e-05,
      "loss": 0.8258,
      "step": 185430
    },
    {
      "epoch": 0.6499209678719504,
      "grad_norm": 3.03125,
      "learning_rate": 4.3520097821600196e-05,
      "loss": 0.8644,
      "step": 185440
    },
    {
      "epoch": 0.6499560153788461,
      "grad_norm": 2.921875,
      "learning_rate": 4.35194487929365e-05,
      "loss": 0.9497,
      "step": 185450
    },
    {
      "epoch": 0.6499910628857416,
      "grad_norm": 3.03125,
      "learning_rate": 4.351879976427279e-05,
      "loss": 0.9777,
      "step": 185460
    },
    {
      "epoch": 0.6500261103926372,
      "grad_norm": 3.328125,
      "learning_rate": 4.351815073560909e-05,
      "loss": 0.9872,
      "step": 185470
    },
    {
      "epoch": 0.6500611578995328,
      "grad_norm": 2.796875,
      "learning_rate": 4.351750170694539e-05,
      "loss": 0.8742,
      "step": 185480
    },
    {
      "epoch": 0.6500962054064284,
      "grad_norm": 2.671875,
      "learning_rate": 4.351685267828168e-05,
      "loss": 0.8467,
      "step": 185490
    },
    {
      "epoch": 0.650131252913324,
      "grad_norm": 2.96875,
      "learning_rate": 4.3516203649617984e-05,
      "loss": 0.9294,
      "step": 185500
    },
    {
      "epoch": 0.6501663004202196,
      "grad_norm": 3.5625,
      "learning_rate": 4.351555462095428e-05,
      "loss": 0.9698,
      "step": 185510
    },
    {
      "epoch": 0.6502013479271153,
      "grad_norm": 2.9375,
      "learning_rate": 4.351490559229058e-05,
      "loss": 0.8941,
      "step": 185520
    },
    {
      "epoch": 0.6502363954340108,
      "grad_norm": 2.90625,
      "learning_rate": 4.3514256563626874e-05,
      "loss": 0.8703,
      "step": 185530
    },
    {
      "epoch": 0.6502714429409064,
      "grad_norm": 3.328125,
      "learning_rate": 4.3513607534963176e-05,
      "loss": 0.8653,
      "step": 185540
    },
    {
      "epoch": 0.650306490447802,
      "grad_norm": 2.65625,
      "learning_rate": 4.351295850629947e-05,
      "loss": 0.8694,
      "step": 185550
    },
    {
      "epoch": 0.6503415379546976,
      "grad_norm": 2.796875,
      "learning_rate": 4.351230947763577e-05,
      "loss": 0.9403,
      "step": 185560
    },
    {
      "epoch": 0.6503765854615932,
      "grad_norm": 3.078125,
      "learning_rate": 4.3511660448972066e-05,
      "loss": 0.891,
      "step": 185570
    },
    {
      "epoch": 0.6504116329684888,
      "grad_norm": 2.875,
      "learning_rate": 4.351101142030837e-05,
      "loss": 0.8341,
      "step": 185580
    },
    {
      "epoch": 0.6504466804753843,
      "grad_norm": 3.515625,
      "learning_rate": 4.351036239164467e-05,
      "loss": 0.8801,
      "step": 185590
    },
    {
      "epoch": 0.65048172798228,
      "grad_norm": 2.953125,
      "learning_rate": 4.3509713362980964e-05,
      "loss": 0.7686,
      "step": 185600
    },
    {
      "epoch": 0.6505167754891755,
      "grad_norm": 2.578125,
      "learning_rate": 4.3509064334317265e-05,
      "loss": 0.8754,
      "step": 185610
    },
    {
      "epoch": 0.6505518229960712,
      "grad_norm": 3.390625,
      "learning_rate": 4.350841530565356e-05,
      "loss": 0.853,
      "step": 185620
    },
    {
      "epoch": 0.6505868705029668,
      "grad_norm": 2.984375,
      "learning_rate": 4.350776627698986e-05,
      "loss": 0.9096,
      "step": 185630
    },
    {
      "epoch": 0.6506219180098624,
      "grad_norm": 2.90625,
      "learning_rate": 4.3507117248326156e-05,
      "loss": 0.8867,
      "step": 185640
    },
    {
      "epoch": 0.650656965516758,
      "grad_norm": 2.890625,
      "learning_rate": 4.350646821966246e-05,
      "loss": 0.9134,
      "step": 185650
    },
    {
      "epoch": 0.6506920130236535,
      "grad_norm": 2.71875,
      "learning_rate": 4.350581919099875e-05,
      "loss": 0.9033,
      "step": 185660
    },
    {
      "epoch": 0.6507270605305492,
      "grad_norm": 3.296875,
      "learning_rate": 4.350517016233505e-05,
      "loss": 0.94,
      "step": 185670
    },
    {
      "epoch": 0.6507621080374447,
      "grad_norm": 3.03125,
      "learning_rate": 4.350452113367135e-05,
      "loss": 0.8991,
      "step": 185680
    },
    {
      "epoch": 0.6507971555443404,
      "grad_norm": 3.40625,
      "learning_rate": 4.350387210500765e-05,
      "loss": 0.8476,
      "step": 185690
    },
    {
      "epoch": 0.6508322030512359,
      "grad_norm": 3.109375,
      "learning_rate": 4.3503223076343944e-05,
      "loss": 0.9046,
      "step": 185700
    },
    {
      "epoch": 0.6508672505581315,
      "grad_norm": 2.859375,
      "learning_rate": 4.3502574047680245e-05,
      "loss": 0.9592,
      "step": 185710
    },
    {
      "epoch": 0.6509022980650272,
      "grad_norm": 3.15625,
      "learning_rate": 4.350192501901654e-05,
      "loss": 0.8236,
      "step": 185720
    },
    {
      "epoch": 0.6509373455719227,
      "grad_norm": 2.859375,
      "learning_rate": 4.350127599035284e-05,
      "loss": 0.8775,
      "step": 185730
    },
    {
      "epoch": 0.6509723930788184,
      "grad_norm": 3.03125,
      "learning_rate": 4.350062696168914e-05,
      "loss": 0.9184,
      "step": 185740
    },
    {
      "epoch": 0.6510074405857139,
      "grad_norm": 3.09375,
      "learning_rate": 4.349997793302544e-05,
      "loss": 0.892,
      "step": 185750
    },
    {
      "epoch": 0.6510424880926096,
      "grad_norm": 2.765625,
      "learning_rate": 4.349932890436174e-05,
      "loss": 0.9472,
      "step": 185760
    },
    {
      "epoch": 0.6510775355995051,
      "grad_norm": 2.609375,
      "learning_rate": 4.349867987569803e-05,
      "loss": 0.8435,
      "step": 185770
    },
    {
      "epoch": 0.6511125831064007,
      "grad_norm": 2.84375,
      "learning_rate": 4.3498030847034334e-05,
      "loss": 1.0142,
      "step": 185780
    },
    {
      "epoch": 0.6511476306132963,
      "grad_norm": 3.4375,
      "learning_rate": 4.349738181837063e-05,
      "loss": 0.9071,
      "step": 185790
    },
    {
      "epoch": 0.6511826781201919,
      "grad_norm": 3.140625,
      "learning_rate": 4.349673278970693e-05,
      "loss": 0.9476,
      "step": 185800
    },
    {
      "epoch": 0.6512177256270875,
      "grad_norm": 2.875,
      "learning_rate": 4.3496083761043225e-05,
      "loss": 0.946,
      "step": 185810
    },
    {
      "epoch": 0.6512527731339831,
      "grad_norm": 3.0625,
      "learning_rate": 4.3495434732379526e-05,
      "loss": 0.8398,
      "step": 185820
    },
    {
      "epoch": 0.6512878206408788,
      "grad_norm": 3.03125,
      "learning_rate": 4.349478570371582e-05,
      "loss": 0.9092,
      "step": 185830
    },
    {
      "epoch": 0.6513228681477743,
      "grad_norm": 2.515625,
      "learning_rate": 4.349413667505212e-05,
      "loss": 0.9158,
      "step": 185840
    },
    {
      "epoch": 0.6513579156546699,
      "grad_norm": 2.6875,
      "learning_rate": 4.349348764638842e-05,
      "loss": 0.9909,
      "step": 185850
    },
    {
      "epoch": 0.6513929631615655,
      "grad_norm": 3.015625,
      "learning_rate": 4.349283861772471e-05,
      "loss": 0.8909,
      "step": 185860
    },
    {
      "epoch": 0.6514280106684611,
      "grad_norm": 2.6875,
      "learning_rate": 4.349218958906101e-05,
      "loss": 0.8643,
      "step": 185870
    },
    {
      "epoch": 0.6514630581753567,
      "grad_norm": 2.390625,
      "learning_rate": 4.349154056039731e-05,
      "loss": 0.8948,
      "step": 185880
    },
    {
      "epoch": 0.6514981056822523,
      "grad_norm": 3.078125,
      "learning_rate": 4.349089153173361e-05,
      "loss": 0.9549,
      "step": 185890
    },
    {
      "epoch": 0.6515331531891478,
      "grad_norm": 2.84375,
      "learning_rate": 4.3490242503069904e-05,
      "loss": 0.8292,
      "step": 185900
    },
    {
      "epoch": 0.6515682006960435,
      "grad_norm": 3.125,
      "learning_rate": 4.3489593474406205e-05,
      "loss": 0.8687,
      "step": 185910
    },
    {
      "epoch": 0.6516032482029391,
      "grad_norm": 2.453125,
      "learning_rate": 4.34889444457425e-05,
      "loss": 0.8804,
      "step": 185920
    },
    {
      "epoch": 0.6516382957098347,
      "grad_norm": 3.0,
      "learning_rate": 4.34882954170788e-05,
      "loss": 0.9422,
      "step": 185930
    },
    {
      "epoch": 0.6516733432167303,
      "grad_norm": 2.84375,
      "learning_rate": 4.3487646388415096e-05,
      "loss": 1.031,
      "step": 185940
    },
    {
      "epoch": 0.6517083907236259,
      "grad_norm": 2.921875,
      "learning_rate": 4.34869973597514e-05,
      "loss": 0.9159,
      "step": 185950
    },
    {
      "epoch": 0.6517434382305215,
      "grad_norm": 2.9375,
      "learning_rate": 4.34863483310877e-05,
      "loss": 0.8642,
      "step": 185960
    },
    {
      "epoch": 0.651778485737417,
      "grad_norm": 2.578125,
      "learning_rate": 4.348569930242399e-05,
      "loss": 0.8812,
      "step": 185970
    },
    {
      "epoch": 0.6518135332443127,
      "grad_norm": 2.8125,
      "learning_rate": 4.3485050273760294e-05,
      "loss": 0.9102,
      "step": 185980
    },
    {
      "epoch": 0.6518485807512082,
      "grad_norm": 2.875,
      "learning_rate": 4.348440124509659e-05,
      "loss": 0.8578,
      "step": 185990
    },
    {
      "epoch": 0.6518836282581039,
      "grad_norm": 3.234375,
      "learning_rate": 4.348375221643289e-05,
      "loss": 0.8436,
      "step": 186000
    },
    {
      "epoch": 0.6519186757649995,
      "grad_norm": 3.0625,
      "learning_rate": 4.3483103187769185e-05,
      "loss": 0.8863,
      "step": 186010
    },
    {
      "epoch": 0.651953723271895,
      "grad_norm": 3.03125,
      "learning_rate": 4.3482454159105486e-05,
      "loss": 0.9766,
      "step": 186020
    },
    {
      "epoch": 0.6519887707787907,
      "grad_norm": 2.5625,
      "learning_rate": 4.348180513044178e-05,
      "loss": 0.8168,
      "step": 186030
    },
    {
      "epoch": 0.6520238182856862,
      "grad_norm": 3.34375,
      "learning_rate": 4.348115610177808e-05,
      "loss": 0.9054,
      "step": 186040
    },
    {
      "epoch": 0.6520588657925819,
      "grad_norm": 2.890625,
      "learning_rate": 4.348050707311438e-05,
      "loss": 0.8641,
      "step": 186050
    },
    {
      "epoch": 0.6520939132994774,
      "grad_norm": 2.8125,
      "learning_rate": 4.347985804445068e-05,
      "loss": 0.9255,
      "step": 186060
    },
    {
      "epoch": 0.6521289608063731,
      "grad_norm": 3.265625,
      "learning_rate": 4.347920901578697e-05,
      "loss": 0.9897,
      "step": 186070
    },
    {
      "epoch": 0.6521640083132686,
      "grad_norm": 2.796875,
      "learning_rate": 4.3478559987123274e-05,
      "loss": 0.8466,
      "step": 186080
    },
    {
      "epoch": 0.6521990558201642,
      "grad_norm": 3.171875,
      "learning_rate": 4.347791095845957e-05,
      "loss": 0.9942,
      "step": 186090
    },
    {
      "epoch": 0.6522341033270598,
      "grad_norm": 3.03125,
      "learning_rate": 4.347726192979587e-05,
      "loss": 0.8559,
      "step": 186100
    },
    {
      "epoch": 0.6522691508339554,
      "grad_norm": 3.03125,
      "learning_rate": 4.347661290113217e-05,
      "loss": 0.9284,
      "step": 186110
    },
    {
      "epoch": 0.6523041983408511,
      "grad_norm": 2.75,
      "learning_rate": 4.3475963872468466e-05,
      "loss": 0.8833,
      "step": 186120
    },
    {
      "epoch": 0.6523392458477466,
      "grad_norm": 2.84375,
      "learning_rate": 4.347531484380477e-05,
      "loss": 0.9418,
      "step": 186130
    },
    {
      "epoch": 0.6523742933546423,
      "grad_norm": 2.8125,
      "learning_rate": 4.347466581514106e-05,
      "loss": 0.9055,
      "step": 186140
    },
    {
      "epoch": 0.6524093408615378,
      "grad_norm": 3.359375,
      "learning_rate": 4.3474016786477364e-05,
      "loss": 0.9055,
      "step": 186150
    },
    {
      "epoch": 0.6524443883684334,
      "grad_norm": 2.9375,
      "learning_rate": 4.347336775781366e-05,
      "loss": 0.9438,
      "step": 186160
    },
    {
      "epoch": 0.652479435875329,
      "grad_norm": 2.78125,
      "learning_rate": 4.347271872914996e-05,
      "loss": 0.8858,
      "step": 186170
    },
    {
      "epoch": 0.6525144833822246,
      "grad_norm": 2.671875,
      "learning_rate": 4.3472069700486254e-05,
      "loss": 0.9738,
      "step": 186180
    },
    {
      "epoch": 0.6525495308891202,
      "grad_norm": 3.09375,
      "learning_rate": 4.3471420671822556e-05,
      "loss": 0.9042,
      "step": 186190
    },
    {
      "epoch": 0.6525845783960158,
      "grad_norm": 3.0,
      "learning_rate": 4.347077164315885e-05,
      "loss": 0.9064,
      "step": 186200
    },
    {
      "epoch": 0.6526196259029114,
      "grad_norm": 3.234375,
      "learning_rate": 4.347012261449515e-05,
      "loss": 0.8886,
      "step": 186210
    },
    {
      "epoch": 0.652654673409807,
      "grad_norm": 2.625,
      "learning_rate": 4.3469473585831446e-05,
      "loss": 0.9336,
      "step": 186220
    },
    {
      "epoch": 0.6526897209167026,
      "grad_norm": 2.78125,
      "learning_rate": 4.346882455716774e-05,
      "loss": 0.8613,
      "step": 186230
    },
    {
      "epoch": 0.6527247684235982,
      "grad_norm": 3.4375,
      "learning_rate": 4.346817552850404e-05,
      "loss": 1.0107,
      "step": 186240
    },
    {
      "epoch": 0.6527598159304938,
      "grad_norm": 2.515625,
      "learning_rate": 4.346752649984034e-05,
      "loss": 0.9577,
      "step": 186250
    },
    {
      "epoch": 0.6527948634373893,
      "grad_norm": 2.78125,
      "learning_rate": 4.346687747117664e-05,
      "loss": 0.9338,
      "step": 186260
    },
    {
      "epoch": 0.652829910944285,
      "grad_norm": 2.75,
      "learning_rate": 4.346622844251293e-05,
      "loss": 0.8533,
      "step": 186270
    },
    {
      "epoch": 0.6528649584511805,
      "grad_norm": 2.71875,
      "learning_rate": 4.3465579413849234e-05,
      "loss": 0.8798,
      "step": 186280
    },
    {
      "epoch": 0.6529000059580762,
      "grad_norm": 3.03125,
      "learning_rate": 4.346493038518553e-05,
      "loss": 0.9504,
      "step": 186290
    },
    {
      "epoch": 0.6529350534649717,
      "grad_norm": 2.0,
      "learning_rate": 4.346428135652183e-05,
      "loss": 0.8221,
      "step": 186300
    },
    {
      "epoch": 0.6529701009718674,
      "grad_norm": 3.0,
      "learning_rate": 4.3463632327858125e-05,
      "loss": 0.8527,
      "step": 186310
    },
    {
      "epoch": 0.653005148478763,
      "grad_norm": 2.90625,
      "learning_rate": 4.3462983299194426e-05,
      "loss": 0.9735,
      "step": 186320
    },
    {
      "epoch": 0.6530401959856585,
      "grad_norm": 3.140625,
      "learning_rate": 4.346233427053073e-05,
      "loss": 0.8773,
      "step": 186330
    },
    {
      "epoch": 0.6530752434925542,
      "grad_norm": 2.546875,
      "learning_rate": 4.346168524186702e-05,
      "loss": 0.8969,
      "step": 186340
    },
    {
      "epoch": 0.6531102909994497,
      "grad_norm": 2.921875,
      "learning_rate": 4.3461036213203324e-05,
      "loss": 0.904,
      "step": 186350
    },
    {
      "epoch": 0.6531453385063454,
      "grad_norm": 2.96875,
      "learning_rate": 4.346038718453962e-05,
      "loss": 0.871,
      "step": 186360
    },
    {
      "epoch": 0.6531803860132409,
      "grad_norm": 3.296875,
      "learning_rate": 4.345973815587592e-05,
      "loss": 0.8982,
      "step": 186370
    },
    {
      "epoch": 0.6532154335201366,
      "grad_norm": 2.703125,
      "learning_rate": 4.3459089127212214e-05,
      "loss": 0.8741,
      "step": 186380
    },
    {
      "epoch": 0.6532504810270321,
      "grad_norm": 3.28125,
      "learning_rate": 4.3458440098548516e-05,
      "loss": 0.9101,
      "step": 186390
    },
    {
      "epoch": 0.6532855285339277,
      "grad_norm": 3.6875,
      "learning_rate": 4.345779106988481e-05,
      "loss": 0.9348,
      "step": 186400
    },
    {
      "epoch": 0.6533205760408234,
      "grad_norm": 2.65625,
      "learning_rate": 4.345714204122111e-05,
      "loss": 0.8688,
      "step": 186410
    },
    {
      "epoch": 0.6533556235477189,
      "grad_norm": 2.734375,
      "learning_rate": 4.3456493012557406e-05,
      "loss": 0.8501,
      "step": 186420
    },
    {
      "epoch": 0.6533906710546146,
      "grad_norm": 2.84375,
      "learning_rate": 4.345584398389371e-05,
      "loss": 0.8766,
      "step": 186430
    },
    {
      "epoch": 0.6534257185615101,
      "grad_norm": 3.375,
      "learning_rate": 4.345519495523e-05,
      "loss": 0.9498,
      "step": 186440
    },
    {
      "epoch": 0.6534607660684058,
      "grad_norm": 2.4375,
      "learning_rate": 4.3454545926566304e-05,
      "loss": 0.9516,
      "step": 186450
    },
    {
      "epoch": 0.6534958135753013,
      "grad_norm": 3.453125,
      "learning_rate": 4.3453896897902605e-05,
      "loss": 0.9973,
      "step": 186460
    },
    {
      "epoch": 0.6535308610821969,
      "grad_norm": 3.3125,
      "learning_rate": 4.34532478692389e-05,
      "loss": 0.9253,
      "step": 186470
    },
    {
      "epoch": 0.6535659085890925,
      "grad_norm": 3.078125,
      "learning_rate": 4.34525988405752e-05,
      "loss": 0.8618,
      "step": 186480
    },
    {
      "epoch": 0.6536009560959881,
      "grad_norm": 2.953125,
      "learning_rate": 4.3451949811911496e-05,
      "loss": 0.9024,
      "step": 186490
    },
    {
      "epoch": 0.6536360036028838,
      "grad_norm": 3.015625,
      "learning_rate": 4.34513007832478e-05,
      "loss": 0.859,
      "step": 186500
    },
    {
      "epoch": 0.6536710511097793,
      "grad_norm": 2.890625,
      "learning_rate": 4.345065175458409e-05,
      "loss": 0.9566,
      "step": 186510
    },
    {
      "epoch": 0.653706098616675,
      "grad_norm": 3.109375,
      "learning_rate": 4.345000272592039e-05,
      "loss": 0.8593,
      "step": 186520
    },
    {
      "epoch": 0.6537411461235705,
      "grad_norm": 2.875,
      "learning_rate": 4.344935369725669e-05,
      "loss": 0.9075,
      "step": 186530
    },
    {
      "epoch": 0.6537761936304661,
      "grad_norm": 3.125,
      "learning_rate": 4.344870466859299e-05,
      "loss": 0.9176,
      "step": 186540
    },
    {
      "epoch": 0.6538112411373617,
      "grad_norm": 2.84375,
      "learning_rate": 4.3448055639929284e-05,
      "loss": 0.8799,
      "step": 186550
    },
    {
      "epoch": 0.6538462886442573,
      "grad_norm": 2.703125,
      "learning_rate": 4.3447406611265585e-05,
      "loss": 0.8991,
      "step": 186560
    },
    {
      "epoch": 0.6538813361511528,
      "grad_norm": 3.046875,
      "learning_rate": 4.344675758260188e-05,
      "loss": 0.8492,
      "step": 186570
    },
    {
      "epoch": 0.6539163836580485,
      "grad_norm": 3.484375,
      "learning_rate": 4.344610855393818e-05,
      "loss": 0.9491,
      "step": 186580
    },
    {
      "epoch": 0.653951431164944,
      "grad_norm": 2.8125,
      "learning_rate": 4.3445459525274476e-05,
      "loss": 0.9713,
      "step": 186590
    },
    {
      "epoch": 0.6539864786718397,
      "grad_norm": 2.609375,
      "learning_rate": 4.344481049661077e-05,
      "loss": 0.8615,
      "step": 186600
    },
    {
      "epoch": 0.6540215261787353,
      "grad_norm": 3.15625,
      "learning_rate": 4.344416146794707e-05,
      "loss": 1.0588,
      "step": 186610
    },
    {
      "epoch": 0.6540565736856309,
      "grad_norm": 3.046875,
      "learning_rate": 4.3443512439283366e-05,
      "loss": 1.0411,
      "step": 186620
    },
    {
      "epoch": 0.6540916211925265,
      "grad_norm": 3.1875,
      "learning_rate": 4.344286341061967e-05,
      "loss": 0.8519,
      "step": 186630
    },
    {
      "epoch": 0.654126668699422,
      "grad_norm": 3.046875,
      "learning_rate": 4.344221438195596e-05,
      "loss": 0.9577,
      "step": 186640
    },
    {
      "epoch": 0.6541617162063177,
      "grad_norm": 3.140625,
      "learning_rate": 4.3441565353292264e-05,
      "loss": 0.9267,
      "step": 186650
    },
    {
      "epoch": 0.6541967637132132,
      "grad_norm": 3.171875,
      "learning_rate": 4.344091632462856e-05,
      "loss": 0.8897,
      "step": 186660
    },
    {
      "epoch": 0.6542318112201089,
      "grad_norm": 2.9375,
      "learning_rate": 4.344026729596486e-05,
      "loss": 0.8953,
      "step": 186670
    },
    {
      "epoch": 0.6542668587270044,
      "grad_norm": 2.8125,
      "learning_rate": 4.3439618267301154e-05,
      "loss": 0.8393,
      "step": 186680
    },
    {
      "epoch": 0.6543019062339,
      "grad_norm": 2.609375,
      "learning_rate": 4.3438969238637456e-05,
      "loss": 0.9074,
      "step": 186690
    },
    {
      "epoch": 0.6543369537407957,
      "grad_norm": 3.203125,
      "learning_rate": 4.343832020997376e-05,
      "loss": 0.9617,
      "step": 186700
    },
    {
      "epoch": 0.6543720012476912,
      "grad_norm": 2.828125,
      "learning_rate": 4.343767118131005e-05,
      "loss": 0.9321,
      "step": 186710
    },
    {
      "epoch": 0.6544070487545869,
      "grad_norm": 3.03125,
      "learning_rate": 4.343702215264635e-05,
      "loss": 0.9148,
      "step": 186720
    },
    {
      "epoch": 0.6544420962614824,
      "grad_norm": 2.890625,
      "learning_rate": 4.343637312398265e-05,
      "loss": 0.853,
      "step": 186730
    },
    {
      "epoch": 0.6544771437683781,
      "grad_norm": 2.71875,
      "learning_rate": 4.343572409531895e-05,
      "loss": 0.9021,
      "step": 186740
    },
    {
      "epoch": 0.6545121912752736,
      "grad_norm": 3.03125,
      "learning_rate": 4.3435075066655244e-05,
      "loss": 0.8815,
      "step": 186750
    },
    {
      "epoch": 0.6545472387821692,
      "grad_norm": 2.890625,
      "learning_rate": 4.3434426037991545e-05,
      "loss": 0.8971,
      "step": 186760
    },
    {
      "epoch": 0.6545822862890648,
      "grad_norm": 2.71875,
      "learning_rate": 4.343377700932784e-05,
      "loss": 0.8318,
      "step": 186770
    },
    {
      "epoch": 0.6546173337959604,
      "grad_norm": 3.0625,
      "learning_rate": 4.343312798066414e-05,
      "loss": 0.9638,
      "step": 186780
    },
    {
      "epoch": 0.654652381302856,
      "grad_norm": 2.78125,
      "learning_rate": 4.3432478952000436e-05,
      "loss": 0.957,
      "step": 186790
    },
    {
      "epoch": 0.6546874288097516,
      "grad_norm": 2.53125,
      "learning_rate": 4.343182992333674e-05,
      "loss": 0.8948,
      "step": 186800
    },
    {
      "epoch": 0.6547224763166473,
      "grad_norm": 3.109375,
      "learning_rate": 4.343118089467303e-05,
      "loss": 0.8828,
      "step": 186810
    },
    {
      "epoch": 0.6547575238235428,
      "grad_norm": 2.859375,
      "learning_rate": 4.343053186600933e-05,
      "loss": 0.948,
      "step": 186820
    },
    {
      "epoch": 0.6547925713304384,
      "grad_norm": 2.84375,
      "learning_rate": 4.3429882837345635e-05,
      "loss": 0.8595,
      "step": 186830
    },
    {
      "epoch": 0.654827618837334,
      "grad_norm": 3.125,
      "learning_rate": 4.342923380868193e-05,
      "loss": 0.9289,
      "step": 186840
    },
    {
      "epoch": 0.6548626663442296,
      "grad_norm": 2.90625,
      "learning_rate": 4.342858478001823e-05,
      "loss": 0.921,
      "step": 186850
    },
    {
      "epoch": 0.6548977138511252,
      "grad_norm": 3.046875,
      "learning_rate": 4.3427935751354525e-05,
      "loss": 0.9854,
      "step": 186860
    },
    {
      "epoch": 0.6549327613580208,
      "grad_norm": 2.765625,
      "learning_rate": 4.3427286722690827e-05,
      "loss": 0.9297,
      "step": 186870
    },
    {
      "epoch": 0.6549678088649163,
      "grad_norm": 2.78125,
      "learning_rate": 4.342663769402712e-05,
      "loss": 0.9663,
      "step": 186880
    },
    {
      "epoch": 0.655002856371812,
      "grad_norm": 3.21875,
      "learning_rate": 4.342598866536342e-05,
      "loss": 0.9468,
      "step": 186890
    },
    {
      "epoch": 0.6550379038787076,
      "grad_norm": 2.828125,
      "learning_rate": 4.342533963669972e-05,
      "loss": 0.9531,
      "step": 186900
    },
    {
      "epoch": 0.6550729513856032,
      "grad_norm": 3.0,
      "learning_rate": 4.342469060803602e-05,
      "loss": 0.9127,
      "step": 186910
    },
    {
      "epoch": 0.6551079988924988,
      "grad_norm": 2.6875,
      "learning_rate": 4.342404157937231e-05,
      "loss": 0.8633,
      "step": 186920
    },
    {
      "epoch": 0.6551430463993944,
      "grad_norm": 3.4375,
      "learning_rate": 4.3423392550708615e-05,
      "loss": 0.8552,
      "step": 186930
    },
    {
      "epoch": 0.65517809390629,
      "grad_norm": 2.53125,
      "learning_rate": 4.342274352204491e-05,
      "loss": 0.8494,
      "step": 186940
    },
    {
      "epoch": 0.6552131414131855,
      "grad_norm": 3.203125,
      "learning_rate": 4.342209449338121e-05,
      "loss": 0.9121,
      "step": 186950
    },
    {
      "epoch": 0.6552481889200812,
      "grad_norm": 3.0,
      "learning_rate": 4.3421445464717505e-05,
      "loss": 0.8579,
      "step": 186960
    },
    {
      "epoch": 0.6552832364269767,
      "grad_norm": 3.25,
      "learning_rate": 4.3420796436053807e-05,
      "loss": 0.8719,
      "step": 186970
    },
    {
      "epoch": 0.6553182839338724,
      "grad_norm": 3.453125,
      "learning_rate": 4.34201474073901e-05,
      "loss": 0.9022,
      "step": 186980
    },
    {
      "epoch": 0.6553533314407679,
      "grad_norm": 3.15625,
      "learning_rate": 4.3419498378726396e-05,
      "loss": 0.8258,
      "step": 186990
    },
    {
      "epoch": 0.6553883789476636,
      "grad_norm": 2.875,
      "learning_rate": 4.34188493500627e-05,
      "loss": 0.8914,
      "step": 187000
    },
    {
      "epoch": 0.6554234264545592,
      "grad_norm": 2.84375,
      "learning_rate": 4.341820032139899e-05,
      "loss": 0.9557,
      "step": 187010
    },
    {
      "epoch": 0.6554584739614547,
      "grad_norm": 3.03125,
      "learning_rate": 4.341755129273529e-05,
      "loss": 0.9021,
      "step": 187020
    },
    {
      "epoch": 0.6554935214683504,
      "grad_norm": 2.796875,
      "learning_rate": 4.341690226407159e-05,
      "loss": 0.8569,
      "step": 187030
    },
    {
      "epoch": 0.6555285689752459,
      "grad_norm": 2.59375,
      "learning_rate": 4.341625323540789e-05,
      "loss": 0.9284,
      "step": 187040
    },
    {
      "epoch": 0.6555636164821416,
      "grad_norm": 2.78125,
      "learning_rate": 4.3415604206744184e-05,
      "loss": 0.8651,
      "step": 187050
    },
    {
      "epoch": 0.6555986639890371,
      "grad_norm": 2.46875,
      "learning_rate": 4.3414955178080485e-05,
      "loss": 0.8517,
      "step": 187060
    },
    {
      "epoch": 0.6556337114959327,
      "grad_norm": 2.859375,
      "learning_rate": 4.3414306149416787e-05,
      "loss": 0.8932,
      "step": 187070
    },
    {
      "epoch": 0.6556687590028283,
      "grad_norm": 2.90625,
      "learning_rate": 4.341365712075308e-05,
      "loss": 0.915,
      "step": 187080
    },
    {
      "epoch": 0.6557038065097239,
      "grad_norm": 3.203125,
      "learning_rate": 4.341300809208938e-05,
      "loss": 0.8833,
      "step": 187090
    },
    {
      "epoch": 0.6557388540166196,
      "grad_norm": 2.203125,
      "learning_rate": 4.341235906342568e-05,
      "loss": 0.8629,
      "step": 187100
    },
    {
      "epoch": 0.6557739015235151,
      "grad_norm": 2.859375,
      "learning_rate": 4.341171003476198e-05,
      "loss": 0.972,
      "step": 187110
    },
    {
      "epoch": 0.6558089490304108,
      "grad_norm": 2.65625,
      "learning_rate": 4.341106100609827e-05,
      "loss": 0.8809,
      "step": 187120
    },
    {
      "epoch": 0.6558439965373063,
      "grad_norm": 2.890625,
      "learning_rate": 4.3410411977434575e-05,
      "loss": 0.8989,
      "step": 187130
    },
    {
      "epoch": 0.6558790440442019,
      "grad_norm": 3.0625,
      "learning_rate": 4.340976294877087e-05,
      "loss": 0.9545,
      "step": 187140
    },
    {
      "epoch": 0.6559140915510975,
      "grad_norm": 2.84375,
      "learning_rate": 4.340911392010717e-05,
      "loss": 0.8693,
      "step": 187150
    },
    {
      "epoch": 0.6559491390579931,
      "grad_norm": 2.984375,
      "learning_rate": 4.3408464891443465e-05,
      "loss": 1.0375,
      "step": 187160
    },
    {
      "epoch": 0.6559841865648887,
      "grad_norm": 3.03125,
      "learning_rate": 4.3407815862779767e-05,
      "loss": 0.8206,
      "step": 187170
    },
    {
      "epoch": 0.6560192340717843,
      "grad_norm": 3.171875,
      "learning_rate": 4.340716683411606e-05,
      "loss": 0.8906,
      "step": 187180
    },
    {
      "epoch": 0.65605428157868,
      "grad_norm": 2.84375,
      "learning_rate": 4.340651780545236e-05,
      "loss": 0.8216,
      "step": 187190
    },
    {
      "epoch": 0.6560893290855755,
      "grad_norm": 3.125,
      "learning_rate": 4.3405868776788664e-05,
      "loss": 0.9919,
      "step": 187200
    },
    {
      "epoch": 0.6561243765924711,
      "grad_norm": 2.796875,
      "learning_rate": 4.340521974812496e-05,
      "loss": 0.862,
      "step": 187210
    },
    {
      "epoch": 0.6561594240993667,
      "grad_norm": 2.71875,
      "learning_rate": 4.340457071946126e-05,
      "loss": 0.8548,
      "step": 187220
    },
    {
      "epoch": 0.6561944716062623,
      "grad_norm": 3.125,
      "learning_rate": 4.3403921690797555e-05,
      "loss": 0.9361,
      "step": 187230
    },
    {
      "epoch": 0.6562295191131579,
      "grad_norm": 2.90625,
      "learning_rate": 4.3403272662133856e-05,
      "loss": 0.9036,
      "step": 187240
    },
    {
      "epoch": 0.6562645666200535,
      "grad_norm": 2.953125,
      "learning_rate": 4.340262363347015e-05,
      "loss": 0.9209,
      "step": 187250
    },
    {
      "epoch": 0.656299614126949,
      "grad_norm": 2.9375,
      "learning_rate": 4.340197460480645e-05,
      "loss": 0.913,
      "step": 187260
    },
    {
      "epoch": 0.6563346616338447,
      "grad_norm": 3.203125,
      "learning_rate": 4.3401325576142747e-05,
      "loss": 0.9362,
      "step": 187270
    },
    {
      "epoch": 0.6563697091407402,
      "grad_norm": 2.625,
      "learning_rate": 4.340067654747905e-05,
      "loss": 0.882,
      "step": 187280
    },
    {
      "epoch": 0.6564047566476359,
      "grad_norm": 3.015625,
      "learning_rate": 4.340002751881534e-05,
      "loss": 0.9247,
      "step": 187290
    },
    {
      "epoch": 0.6564398041545315,
      "grad_norm": 3.34375,
      "learning_rate": 4.3399378490151644e-05,
      "loss": 0.9247,
      "step": 187300
    },
    {
      "epoch": 0.656474851661427,
      "grad_norm": 2.734375,
      "learning_rate": 4.339872946148794e-05,
      "loss": 0.8701,
      "step": 187310
    },
    {
      "epoch": 0.6565098991683227,
      "grad_norm": 3.203125,
      "learning_rate": 4.339808043282424e-05,
      "loss": 0.9495,
      "step": 187320
    },
    {
      "epoch": 0.6565449466752182,
      "grad_norm": 3.109375,
      "learning_rate": 4.3397431404160535e-05,
      "loss": 0.9692,
      "step": 187330
    },
    {
      "epoch": 0.6565799941821139,
      "grad_norm": 2.828125,
      "learning_rate": 4.3396782375496836e-05,
      "loss": 0.8875,
      "step": 187340
    },
    {
      "epoch": 0.6566150416890094,
      "grad_norm": 2.71875,
      "learning_rate": 4.339613334683314e-05,
      "loss": 0.8934,
      "step": 187350
    },
    {
      "epoch": 0.6566500891959051,
      "grad_norm": 2.921875,
      "learning_rate": 4.3395484318169425e-05,
      "loss": 0.9117,
      "step": 187360
    },
    {
      "epoch": 0.6566851367028006,
      "grad_norm": 3.265625,
      "learning_rate": 4.3394835289505727e-05,
      "loss": 0.9403,
      "step": 187370
    },
    {
      "epoch": 0.6567201842096962,
      "grad_norm": 2.75,
      "learning_rate": 4.339418626084202e-05,
      "loss": 0.8697,
      "step": 187380
    },
    {
      "epoch": 0.6567552317165919,
      "grad_norm": 2.546875,
      "learning_rate": 4.339353723217832e-05,
      "loss": 0.8624,
      "step": 187390
    },
    {
      "epoch": 0.6567902792234874,
      "grad_norm": 2.828125,
      "learning_rate": 4.339288820351462e-05,
      "loss": 1.0204,
      "step": 187400
    },
    {
      "epoch": 0.6568253267303831,
      "grad_norm": 2.40625,
      "learning_rate": 4.339223917485092e-05,
      "loss": 0.8211,
      "step": 187410
    },
    {
      "epoch": 0.6568603742372786,
      "grad_norm": 3.09375,
      "learning_rate": 4.339159014618722e-05,
      "loss": 0.8405,
      "step": 187420
    },
    {
      "epoch": 0.6568954217441743,
      "grad_norm": 2.71875,
      "learning_rate": 4.3390941117523515e-05,
      "loss": 0.7925,
      "step": 187430
    },
    {
      "epoch": 0.6569304692510698,
      "grad_norm": 2.765625,
      "learning_rate": 4.3390292088859816e-05,
      "loss": 0.9811,
      "step": 187440
    },
    {
      "epoch": 0.6569655167579654,
      "grad_norm": 3.28125,
      "learning_rate": 4.338964306019611e-05,
      "loss": 0.9327,
      "step": 187450
    },
    {
      "epoch": 0.657000564264861,
      "grad_norm": 3.109375,
      "learning_rate": 4.338899403153241e-05,
      "loss": 0.9534,
      "step": 187460
    },
    {
      "epoch": 0.6570356117717566,
      "grad_norm": 2.71875,
      "learning_rate": 4.3388345002868707e-05,
      "loss": 0.9926,
      "step": 187470
    },
    {
      "epoch": 0.6570706592786522,
      "grad_norm": 2.765625,
      "learning_rate": 4.338769597420501e-05,
      "loss": 0.8463,
      "step": 187480
    },
    {
      "epoch": 0.6571057067855478,
      "grad_norm": 3.375,
      "learning_rate": 4.33870469455413e-05,
      "loss": 0.9444,
      "step": 187490
    },
    {
      "epoch": 0.6571407542924435,
      "grad_norm": 3.078125,
      "learning_rate": 4.3386397916877604e-05,
      "loss": 0.9311,
      "step": 187500
    },
    {
      "epoch": 0.657175801799339,
      "grad_norm": 2.8125,
      "learning_rate": 4.33857488882139e-05,
      "loss": 0.8697,
      "step": 187510
    },
    {
      "epoch": 0.6572108493062346,
      "grad_norm": 2.796875,
      "learning_rate": 4.33850998595502e-05,
      "loss": 0.8868,
      "step": 187520
    },
    {
      "epoch": 0.6572458968131302,
      "grad_norm": 2.78125,
      "learning_rate": 4.3384450830886495e-05,
      "loss": 0.9985,
      "step": 187530
    },
    {
      "epoch": 0.6572809443200258,
      "grad_norm": 3.09375,
      "learning_rate": 4.3383801802222796e-05,
      "loss": 0.94,
      "step": 187540
    },
    {
      "epoch": 0.6573159918269214,
      "grad_norm": 3.109375,
      "learning_rate": 4.338315277355909e-05,
      "loss": 0.9592,
      "step": 187550
    },
    {
      "epoch": 0.657351039333817,
      "grad_norm": 2.703125,
      "learning_rate": 4.338250374489539e-05,
      "loss": 0.9302,
      "step": 187560
    },
    {
      "epoch": 0.6573860868407125,
      "grad_norm": 2.609375,
      "learning_rate": 4.338185471623169e-05,
      "loss": 0.833,
      "step": 187570
    },
    {
      "epoch": 0.6574211343476082,
      "grad_norm": 2.640625,
      "learning_rate": 4.338120568756799e-05,
      "loss": 0.8762,
      "step": 187580
    },
    {
      "epoch": 0.6574561818545038,
      "grad_norm": 3.125,
      "learning_rate": 4.338055665890429e-05,
      "loss": 0.9897,
      "step": 187590
    },
    {
      "epoch": 0.6574912293613994,
      "grad_norm": 2.984375,
      "learning_rate": 4.3379907630240584e-05,
      "loss": 1.0012,
      "step": 187600
    },
    {
      "epoch": 0.657526276868295,
      "grad_norm": 2.921875,
      "learning_rate": 4.3379258601576885e-05,
      "loss": 0.9294,
      "step": 187610
    },
    {
      "epoch": 0.6575613243751905,
      "grad_norm": 2.8125,
      "learning_rate": 4.337860957291318e-05,
      "loss": 0.882,
      "step": 187620
    },
    {
      "epoch": 0.6575963718820862,
      "grad_norm": 2.40625,
      "learning_rate": 4.337796054424948e-05,
      "loss": 0.9481,
      "step": 187630
    },
    {
      "epoch": 0.6576314193889817,
      "grad_norm": 3.265625,
      "learning_rate": 4.3377311515585776e-05,
      "loss": 0.9049,
      "step": 187640
    },
    {
      "epoch": 0.6576664668958774,
      "grad_norm": 3.46875,
      "learning_rate": 4.337666248692208e-05,
      "loss": 0.8695,
      "step": 187650
    },
    {
      "epoch": 0.6577015144027729,
      "grad_norm": 3.046875,
      "learning_rate": 4.337601345825837e-05,
      "loss": 0.8572,
      "step": 187660
    },
    {
      "epoch": 0.6577365619096686,
      "grad_norm": 3.21875,
      "learning_rate": 4.337536442959467e-05,
      "loss": 0.9328,
      "step": 187670
    },
    {
      "epoch": 0.6577716094165642,
      "grad_norm": 3.25,
      "learning_rate": 4.337471540093097e-05,
      "loss": 0.8829,
      "step": 187680
    },
    {
      "epoch": 0.6578066569234597,
      "grad_norm": 2.703125,
      "learning_rate": 4.337406637226727e-05,
      "loss": 0.9165,
      "step": 187690
    },
    {
      "epoch": 0.6578417044303554,
      "grad_norm": 2.84375,
      "learning_rate": 4.337341734360357e-05,
      "loss": 0.8698,
      "step": 187700
    },
    {
      "epoch": 0.6578767519372509,
      "grad_norm": 2.84375,
      "learning_rate": 4.3372768314939865e-05,
      "loss": 0.9297,
      "step": 187710
    },
    {
      "epoch": 0.6579117994441466,
      "grad_norm": 3.515625,
      "learning_rate": 4.337211928627617e-05,
      "loss": 0.9536,
      "step": 187720
    },
    {
      "epoch": 0.6579468469510421,
      "grad_norm": 2.78125,
      "learning_rate": 4.3371470257612455e-05,
      "loss": 0.8622,
      "step": 187730
    },
    {
      "epoch": 0.6579818944579378,
      "grad_norm": 3.015625,
      "learning_rate": 4.3370821228948756e-05,
      "loss": 0.9315,
      "step": 187740
    },
    {
      "epoch": 0.6580169419648333,
      "grad_norm": 2.890625,
      "learning_rate": 4.337017220028505e-05,
      "loss": 0.938,
      "step": 187750
    },
    {
      "epoch": 0.6580519894717289,
      "grad_norm": 2.78125,
      "learning_rate": 4.336952317162135e-05,
      "loss": 0.8219,
      "step": 187760
    },
    {
      "epoch": 0.6580870369786245,
      "grad_norm": 2.71875,
      "learning_rate": 4.3368874142957647e-05,
      "loss": 0.9241,
      "step": 187770
    },
    {
      "epoch": 0.6581220844855201,
      "grad_norm": 3.65625,
      "learning_rate": 4.336822511429395e-05,
      "loss": 0.9946,
      "step": 187780
    },
    {
      "epoch": 0.6581571319924158,
      "grad_norm": 2.953125,
      "learning_rate": 4.336757608563025e-05,
      "loss": 0.9039,
      "step": 187790
    },
    {
      "epoch": 0.6581921794993113,
      "grad_norm": 2.84375,
      "learning_rate": 4.3366927056966544e-05,
      "loss": 0.8398,
      "step": 187800
    },
    {
      "epoch": 0.658227227006207,
      "grad_norm": 2.8125,
      "learning_rate": 4.3366278028302845e-05,
      "loss": 0.8763,
      "step": 187810
    },
    {
      "epoch": 0.6582622745131025,
      "grad_norm": 2.609375,
      "learning_rate": 4.336562899963914e-05,
      "loss": 0.944,
      "step": 187820
    },
    {
      "epoch": 0.6582973220199981,
      "grad_norm": 2.90625,
      "learning_rate": 4.336497997097544e-05,
      "loss": 0.954,
      "step": 187830
    },
    {
      "epoch": 0.6583323695268937,
      "grad_norm": 2.625,
      "learning_rate": 4.3364330942311736e-05,
      "loss": 0.9076,
      "step": 187840
    },
    {
      "epoch": 0.6583674170337893,
      "grad_norm": 3.109375,
      "learning_rate": 4.336368191364804e-05,
      "loss": 0.996,
      "step": 187850
    },
    {
      "epoch": 0.6584024645406849,
      "grad_norm": 2.921875,
      "learning_rate": 4.336303288498433e-05,
      "loss": 0.9839,
      "step": 187860
    },
    {
      "epoch": 0.6584375120475805,
      "grad_norm": 3.453125,
      "learning_rate": 4.336238385632063e-05,
      "loss": 0.9406,
      "step": 187870
    },
    {
      "epoch": 0.6584725595544761,
      "grad_norm": 3.265625,
      "learning_rate": 4.336173482765693e-05,
      "loss": 0.9365,
      "step": 187880
    },
    {
      "epoch": 0.6585076070613717,
      "grad_norm": 2.5,
      "learning_rate": 4.336108579899323e-05,
      "loss": 0.8977,
      "step": 187890
    },
    {
      "epoch": 0.6585426545682673,
      "grad_norm": 3.265625,
      "learning_rate": 4.3360436770329524e-05,
      "loss": 0.9245,
      "step": 187900
    },
    {
      "epoch": 0.6585777020751629,
      "grad_norm": 2.84375,
      "learning_rate": 4.3359787741665825e-05,
      "loss": 1.0455,
      "step": 187910
    },
    {
      "epoch": 0.6586127495820585,
      "grad_norm": 3.078125,
      "learning_rate": 4.335913871300212e-05,
      "loss": 0.8936,
      "step": 187920
    },
    {
      "epoch": 0.658647797088954,
      "grad_norm": 3.0625,
      "learning_rate": 4.335848968433842e-05,
      "loss": 0.9218,
      "step": 187930
    },
    {
      "epoch": 0.6586828445958497,
      "grad_norm": 2.953125,
      "learning_rate": 4.335784065567472e-05,
      "loss": 0.8801,
      "step": 187940
    },
    {
      "epoch": 0.6587178921027452,
      "grad_norm": 3.140625,
      "learning_rate": 4.335719162701102e-05,
      "loss": 0.8882,
      "step": 187950
    },
    {
      "epoch": 0.6587529396096409,
      "grad_norm": 3.171875,
      "learning_rate": 4.335654259834732e-05,
      "loss": 0.9689,
      "step": 187960
    },
    {
      "epoch": 0.6587879871165364,
      "grad_norm": 3.390625,
      "learning_rate": 4.335589356968361e-05,
      "loss": 1.0076,
      "step": 187970
    },
    {
      "epoch": 0.6588230346234321,
      "grad_norm": 2.796875,
      "learning_rate": 4.3355244541019915e-05,
      "loss": 0.8837,
      "step": 187980
    },
    {
      "epoch": 0.6588580821303277,
      "grad_norm": 3.59375,
      "learning_rate": 4.335459551235621e-05,
      "loss": 0.8652,
      "step": 187990
    },
    {
      "epoch": 0.6588931296372232,
      "grad_norm": 2.859375,
      "learning_rate": 4.335394648369251e-05,
      "loss": 0.8608,
      "step": 188000
    },
    {
      "epoch": 0.6589281771441189,
      "grad_norm": 3.578125,
      "learning_rate": 4.3353297455028805e-05,
      "loss": 0.9843,
      "step": 188010
    },
    {
      "epoch": 0.6589632246510144,
      "grad_norm": 2.90625,
      "learning_rate": 4.335264842636511e-05,
      "loss": 0.9716,
      "step": 188020
    },
    {
      "epoch": 0.6589982721579101,
      "grad_norm": 2.578125,
      "learning_rate": 4.33519993977014e-05,
      "loss": 0.8867,
      "step": 188030
    },
    {
      "epoch": 0.6590333196648056,
      "grad_norm": 2.84375,
      "learning_rate": 4.33513503690377e-05,
      "loss": 0.9298,
      "step": 188040
    },
    {
      "epoch": 0.6590683671717013,
      "grad_norm": 3.1875,
      "learning_rate": 4.3350701340374e-05,
      "loss": 0.8573,
      "step": 188050
    },
    {
      "epoch": 0.6591034146785968,
      "grad_norm": 2.75,
      "learning_rate": 4.33500523117103e-05,
      "loss": 0.8599,
      "step": 188060
    },
    {
      "epoch": 0.6591384621854924,
      "grad_norm": 2.953125,
      "learning_rate": 4.33494032830466e-05,
      "loss": 0.9126,
      "step": 188070
    },
    {
      "epoch": 0.6591735096923881,
      "grad_norm": 2.796875,
      "learning_rate": 4.3348754254382895e-05,
      "loss": 0.8964,
      "step": 188080
    },
    {
      "epoch": 0.6592085571992836,
      "grad_norm": 2.609375,
      "learning_rate": 4.3348105225719196e-05,
      "loss": 0.9531,
      "step": 188090
    },
    {
      "epoch": 0.6592436047061793,
      "grad_norm": 2.78125,
      "learning_rate": 4.334745619705549e-05,
      "loss": 0.8185,
      "step": 188100
    },
    {
      "epoch": 0.6592786522130748,
      "grad_norm": 3.09375,
      "learning_rate": 4.3346807168391785e-05,
      "loss": 0.9194,
      "step": 188110
    },
    {
      "epoch": 0.6593136997199704,
      "grad_norm": 2.65625,
      "learning_rate": 4.334615813972808e-05,
      "loss": 0.9074,
      "step": 188120
    },
    {
      "epoch": 0.659348747226866,
      "grad_norm": 2.390625,
      "learning_rate": 4.334550911106438e-05,
      "loss": 0.9256,
      "step": 188130
    },
    {
      "epoch": 0.6593837947337616,
      "grad_norm": 2.640625,
      "learning_rate": 4.3344860082400676e-05,
      "loss": 0.9534,
      "step": 188140
    },
    {
      "epoch": 0.6594188422406572,
      "grad_norm": 3.09375,
      "learning_rate": 4.334421105373698e-05,
      "loss": 0.8864,
      "step": 188150
    },
    {
      "epoch": 0.6594538897475528,
      "grad_norm": 3.078125,
      "learning_rate": 4.334356202507328e-05,
      "loss": 0.9021,
      "step": 188160
    },
    {
      "epoch": 0.6594889372544485,
      "grad_norm": 3.1875,
      "learning_rate": 4.334291299640957e-05,
      "loss": 0.8653,
      "step": 188170
    },
    {
      "epoch": 0.659523984761344,
      "grad_norm": 2.90625,
      "learning_rate": 4.3342263967745875e-05,
      "loss": 0.8699,
      "step": 188180
    },
    {
      "epoch": 0.6595590322682396,
      "grad_norm": 2.828125,
      "learning_rate": 4.334161493908217e-05,
      "loss": 0.8268,
      "step": 188190
    },
    {
      "epoch": 0.6595940797751352,
      "grad_norm": 3.125,
      "learning_rate": 4.334096591041847e-05,
      "loss": 0.9351,
      "step": 188200
    },
    {
      "epoch": 0.6596291272820308,
      "grad_norm": 2.953125,
      "learning_rate": 4.3340316881754765e-05,
      "loss": 0.9186,
      "step": 188210
    },
    {
      "epoch": 0.6596641747889264,
      "grad_norm": 2.859375,
      "learning_rate": 4.333966785309107e-05,
      "loss": 0.8798,
      "step": 188220
    },
    {
      "epoch": 0.659699222295822,
      "grad_norm": 2.71875,
      "learning_rate": 4.333901882442736e-05,
      "loss": 0.9042,
      "step": 188230
    },
    {
      "epoch": 0.6597342698027175,
      "grad_norm": 3.0,
      "learning_rate": 4.333836979576366e-05,
      "loss": 0.9108,
      "step": 188240
    },
    {
      "epoch": 0.6597693173096132,
      "grad_norm": 3.171875,
      "learning_rate": 4.333772076709996e-05,
      "loss": 0.8765,
      "step": 188250
    },
    {
      "epoch": 0.6598043648165087,
      "grad_norm": 2.515625,
      "learning_rate": 4.333707173843626e-05,
      "loss": 0.8472,
      "step": 188260
    },
    {
      "epoch": 0.6598394123234044,
      "grad_norm": 2.765625,
      "learning_rate": 4.333642270977255e-05,
      "loss": 0.9655,
      "step": 188270
    },
    {
      "epoch": 0.6598744598303,
      "grad_norm": 2.84375,
      "learning_rate": 4.3335773681108855e-05,
      "loss": 0.9763,
      "step": 188280
    },
    {
      "epoch": 0.6599095073371956,
      "grad_norm": 3.296875,
      "learning_rate": 4.333512465244515e-05,
      "loss": 0.9481,
      "step": 188290
    },
    {
      "epoch": 0.6599445548440912,
      "grad_norm": 2.578125,
      "learning_rate": 4.333447562378145e-05,
      "loss": 0.9361,
      "step": 188300
    },
    {
      "epoch": 0.6599796023509867,
      "grad_norm": 2.5,
      "learning_rate": 4.333382659511775e-05,
      "loss": 0.9437,
      "step": 188310
    },
    {
      "epoch": 0.6600146498578824,
      "grad_norm": 2.90625,
      "learning_rate": 4.333317756645405e-05,
      "loss": 0.9127,
      "step": 188320
    },
    {
      "epoch": 0.6600496973647779,
      "grad_norm": 2.71875,
      "learning_rate": 4.333252853779035e-05,
      "loss": 0.9428,
      "step": 188330
    },
    {
      "epoch": 0.6600847448716736,
      "grad_norm": 2.75,
      "learning_rate": 4.333187950912664e-05,
      "loss": 0.9117,
      "step": 188340
    },
    {
      "epoch": 0.6601197923785691,
      "grad_norm": 2.890625,
      "learning_rate": 4.3331230480462944e-05,
      "loss": 0.9367,
      "step": 188350
    },
    {
      "epoch": 0.6601548398854648,
      "grad_norm": 2.9375,
      "learning_rate": 4.333058145179924e-05,
      "loss": 0.9116,
      "step": 188360
    },
    {
      "epoch": 0.6601898873923604,
      "grad_norm": 3.1875,
      "learning_rate": 4.332993242313554e-05,
      "loss": 0.8204,
      "step": 188370
    },
    {
      "epoch": 0.6602249348992559,
      "grad_norm": 2.921875,
      "learning_rate": 4.3329283394471835e-05,
      "loss": 0.9647,
      "step": 188380
    },
    {
      "epoch": 0.6602599824061516,
      "grad_norm": 2.671875,
      "learning_rate": 4.3328634365808136e-05,
      "loss": 0.857,
      "step": 188390
    },
    {
      "epoch": 0.6602950299130471,
      "grad_norm": 2.890625,
      "learning_rate": 4.332798533714443e-05,
      "loss": 0.8322,
      "step": 188400
    },
    {
      "epoch": 0.6603300774199428,
      "grad_norm": 3.203125,
      "learning_rate": 4.332733630848073e-05,
      "loss": 0.838,
      "step": 188410
    },
    {
      "epoch": 0.6603651249268383,
      "grad_norm": 2.859375,
      "learning_rate": 4.332668727981703e-05,
      "loss": 0.9173,
      "step": 188420
    },
    {
      "epoch": 0.660400172433734,
      "grad_norm": 2.96875,
      "learning_rate": 4.332603825115333e-05,
      "loss": 0.9692,
      "step": 188430
    },
    {
      "epoch": 0.6604352199406295,
      "grad_norm": 3.578125,
      "learning_rate": 4.332538922248963e-05,
      "loss": 1.0335,
      "step": 188440
    },
    {
      "epoch": 0.6604702674475251,
      "grad_norm": 2.765625,
      "learning_rate": 4.3324740193825924e-05,
      "loss": 0.9759,
      "step": 188450
    },
    {
      "epoch": 0.6605053149544207,
      "grad_norm": 2.703125,
      "learning_rate": 4.3324091165162225e-05,
      "loss": 0.9078,
      "step": 188460
    },
    {
      "epoch": 0.6605403624613163,
      "grad_norm": 2.609375,
      "learning_rate": 4.332344213649852e-05,
      "loss": 0.8603,
      "step": 188470
    },
    {
      "epoch": 0.660575409968212,
      "grad_norm": 2.875,
      "learning_rate": 4.332279310783482e-05,
      "loss": 0.8325,
      "step": 188480
    },
    {
      "epoch": 0.6606104574751075,
      "grad_norm": 2.65625,
      "learning_rate": 4.332214407917111e-05,
      "loss": 1.0211,
      "step": 188490
    },
    {
      "epoch": 0.6606455049820031,
      "grad_norm": 3.234375,
      "learning_rate": 4.332149505050741e-05,
      "loss": 0.9389,
      "step": 188500
    },
    {
      "epoch": 0.6606805524888987,
      "grad_norm": 3.078125,
      "learning_rate": 4.3320846021843705e-05,
      "loss": 0.866,
      "step": 188510
    },
    {
      "epoch": 0.6607155999957943,
      "grad_norm": 2.9375,
      "learning_rate": 4.332019699318001e-05,
      "loss": 0.9384,
      "step": 188520
    },
    {
      "epoch": 0.6607506475026899,
      "grad_norm": 2.75,
      "learning_rate": 4.331954796451631e-05,
      "loss": 0.8485,
      "step": 188530
    },
    {
      "epoch": 0.6607856950095855,
      "grad_norm": 3.046875,
      "learning_rate": 4.33188989358526e-05,
      "loss": 0.9013,
      "step": 188540
    },
    {
      "epoch": 0.660820742516481,
      "grad_norm": 2.78125,
      "learning_rate": 4.3318249907188904e-05,
      "loss": 0.9298,
      "step": 188550
    },
    {
      "epoch": 0.6608557900233767,
      "grad_norm": 2.609375,
      "learning_rate": 4.33176008785252e-05,
      "loss": 0.8369,
      "step": 188560
    },
    {
      "epoch": 0.6608908375302723,
      "grad_norm": 2.515625,
      "learning_rate": 4.33169518498615e-05,
      "loss": 0.8894,
      "step": 188570
    },
    {
      "epoch": 0.6609258850371679,
      "grad_norm": 3.109375,
      "learning_rate": 4.3316302821197795e-05,
      "loss": 0.9051,
      "step": 188580
    },
    {
      "epoch": 0.6609609325440635,
      "grad_norm": 2.84375,
      "learning_rate": 4.3315653792534096e-05,
      "loss": 0.8175,
      "step": 188590
    },
    {
      "epoch": 0.660995980050959,
      "grad_norm": 3.046875,
      "learning_rate": 4.331500476387039e-05,
      "loss": 0.8738,
      "step": 188600
    },
    {
      "epoch": 0.6610310275578547,
      "grad_norm": 2.640625,
      "learning_rate": 4.331435573520669e-05,
      "loss": 0.8558,
      "step": 188610
    },
    {
      "epoch": 0.6610660750647502,
      "grad_norm": 2.953125,
      "learning_rate": 4.331370670654299e-05,
      "loss": 0.906,
      "step": 188620
    },
    {
      "epoch": 0.6611011225716459,
      "grad_norm": 3.171875,
      "learning_rate": 4.331305767787929e-05,
      "loss": 0.9852,
      "step": 188630
    },
    {
      "epoch": 0.6611361700785414,
      "grad_norm": 2.8125,
      "learning_rate": 4.331240864921558e-05,
      "loss": 0.9547,
      "step": 188640
    },
    {
      "epoch": 0.6611712175854371,
      "grad_norm": 3.25,
      "learning_rate": 4.3311759620551884e-05,
      "loss": 0.9242,
      "step": 188650
    },
    {
      "epoch": 0.6612062650923327,
      "grad_norm": 2.875,
      "learning_rate": 4.3311110591888185e-05,
      "loss": 0.9086,
      "step": 188660
    },
    {
      "epoch": 0.6612413125992282,
      "grad_norm": 2.84375,
      "learning_rate": 4.331046156322448e-05,
      "loss": 0.8628,
      "step": 188670
    },
    {
      "epoch": 0.6612763601061239,
      "grad_norm": 3.03125,
      "learning_rate": 4.330981253456078e-05,
      "loss": 0.928,
      "step": 188680
    },
    {
      "epoch": 0.6613114076130194,
      "grad_norm": 3.046875,
      "learning_rate": 4.3309163505897076e-05,
      "loss": 0.84,
      "step": 188690
    },
    {
      "epoch": 0.6613464551199151,
      "grad_norm": 3.625,
      "learning_rate": 4.330851447723338e-05,
      "loss": 0.903,
      "step": 188700
    },
    {
      "epoch": 0.6613815026268106,
      "grad_norm": 2.984375,
      "learning_rate": 4.330786544856967e-05,
      "loss": 0.9654,
      "step": 188710
    },
    {
      "epoch": 0.6614165501337063,
      "grad_norm": 3.09375,
      "learning_rate": 4.3307216419905973e-05,
      "loss": 0.8872,
      "step": 188720
    },
    {
      "epoch": 0.6614515976406018,
      "grad_norm": 2.4375,
      "learning_rate": 4.330656739124227e-05,
      "loss": 0.8831,
      "step": 188730
    },
    {
      "epoch": 0.6614866451474974,
      "grad_norm": 2.65625,
      "learning_rate": 4.330591836257857e-05,
      "loss": 0.8572,
      "step": 188740
    },
    {
      "epoch": 0.661521692654393,
      "grad_norm": 2.78125,
      "learning_rate": 4.3305269333914864e-05,
      "loss": 0.971,
      "step": 188750
    },
    {
      "epoch": 0.6615567401612886,
      "grad_norm": 2.78125,
      "learning_rate": 4.3304620305251165e-05,
      "loss": 0.9266,
      "step": 188760
    },
    {
      "epoch": 0.6615917876681843,
      "grad_norm": 2.8125,
      "learning_rate": 4.330397127658746e-05,
      "loss": 0.9027,
      "step": 188770
    },
    {
      "epoch": 0.6616268351750798,
      "grad_norm": 2.890625,
      "learning_rate": 4.330332224792376e-05,
      "loss": 0.926,
      "step": 188780
    },
    {
      "epoch": 0.6616618826819755,
      "grad_norm": 3.15625,
      "learning_rate": 4.3302673219260056e-05,
      "loss": 0.8861,
      "step": 188790
    },
    {
      "epoch": 0.661696930188871,
      "grad_norm": 3.015625,
      "learning_rate": 4.330202419059636e-05,
      "loss": 0.9254,
      "step": 188800
    },
    {
      "epoch": 0.6617319776957666,
      "grad_norm": 3.1875,
      "learning_rate": 4.330137516193266e-05,
      "loss": 0.947,
      "step": 188810
    },
    {
      "epoch": 0.6617670252026622,
      "grad_norm": 3.125,
      "learning_rate": 4.3300726133268953e-05,
      "loss": 0.9692,
      "step": 188820
    },
    {
      "epoch": 0.6618020727095578,
      "grad_norm": 2.65625,
      "learning_rate": 4.3300077104605255e-05,
      "loss": 0.8669,
      "step": 188830
    },
    {
      "epoch": 0.6618371202164534,
      "grad_norm": 2.734375,
      "learning_rate": 4.329942807594155e-05,
      "loss": 0.8835,
      "step": 188840
    },
    {
      "epoch": 0.661872167723349,
      "grad_norm": 2.90625,
      "learning_rate": 4.329877904727785e-05,
      "loss": 0.9094,
      "step": 188850
    },
    {
      "epoch": 0.6619072152302446,
      "grad_norm": 2.65625,
      "learning_rate": 4.329813001861414e-05,
      "loss": 0.9342,
      "step": 188860
    },
    {
      "epoch": 0.6619422627371402,
      "grad_norm": 2.984375,
      "learning_rate": 4.329748098995044e-05,
      "loss": 0.8788,
      "step": 188870
    },
    {
      "epoch": 0.6619773102440358,
      "grad_norm": 2.8125,
      "learning_rate": 4.3296831961286735e-05,
      "loss": 1.0157,
      "step": 188880
    },
    {
      "epoch": 0.6620123577509314,
      "grad_norm": 3.3125,
      "learning_rate": 4.3296182932623036e-05,
      "loss": 0.9733,
      "step": 188890
    },
    {
      "epoch": 0.662047405257827,
      "grad_norm": 2.375,
      "learning_rate": 4.329553390395934e-05,
      "loss": 0.858,
      "step": 188900
    },
    {
      "epoch": 0.6620824527647226,
      "grad_norm": 2.671875,
      "learning_rate": 4.329488487529563e-05,
      "loss": 0.8628,
      "step": 188910
    },
    {
      "epoch": 0.6621175002716182,
      "grad_norm": 2.71875,
      "learning_rate": 4.3294235846631933e-05,
      "loss": 0.9413,
      "step": 188920
    },
    {
      "epoch": 0.6621525477785137,
      "grad_norm": 2.671875,
      "learning_rate": 4.329358681796823e-05,
      "loss": 0.9035,
      "step": 188930
    },
    {
      "epoch": 0.6621875952854094,
      "grad_norm": 3.34375,
      "learning_rate": 4.329293778930453e-05,
      "loss": 0.9318,
      "step": 188940
    },
    {
      "epoch": 0.6622226427923049,
      "grad_norm": 2.96875,
      "learning_rate": 4.3292288760640824e-05,
      "loss": 0.871,
      "step": 188950
    },
    {
      "epoch": 0.6622576902992006,
      "grad_norm": 3.15625,
      "learning_rate": 4.3291639731977125e-05,
      "loss": 1.0198,
      "step": 188960
    },
    {
      "epoch": 0.6622927378060962,
      "grad_norm": 2.53125,
      "learning_rate": 4.329099070331342e-05,
      "loss": 0.9404,
      "step": 188970
    },
    {
      "epoch": 0.6623277853129917,
      "grad_norm": 2.765625,
      "learning_rate": 4.329034167464972e-05,
      "loss": 1.0011,
      "step": 188980
    },
    {
      "epoch": 0.6623628328198874,
      "grad_norm": 2.734375,
      "learning_rate": 4.3289692645986016e-05,
      "loss": 0.8904,
      "step": 188990
    },
    {
      "epoch": 0.6623978803267829,
      "grad_norm": 2.953125,
      "learning_rate": 4.328904361732232e-05,
      "loss": 0.877,
      "step": 189000
    },
    {
      "epoch": 0.6624329278336786,
      "grad_norm": 3.265625,
      "learning_rate": 4.328839458865861e-05,
      "loss": 0.922,
      "step": 189010
    },
    {
      "epoch": 0.6624679753405741,
      "grad_norm": 2.859375,
      "learning_rate": 4.3287745559994913e-05,
      "loss": 0.9199,
      "step": 189020
    },
    {
      "epoch": 0.6625030228474698,
      "grad_norm": 3.1875,
      "learning_rate": 4.3287096531331215e-05,
      "loss": 0.9834,
      "step": 189030
    },
    {
      "epoch": 0.6625380703543653,
      "grad_norm": 3.265625,
      "learning_rate": 4.328644750266751e-05,
      "loss": 0.9121,
      "step": 189040
    },
    {
      "epoch": 0.6625731178612609,
      "grad_norm": 2.890625,
      "learning_rate": 4.328579847400381e-05,
      "loss": 0.8876,
      "step": 189050
    },
    {
      "epoch": 0.6626081653681566,
      "grad_norm": 3.40625,
      "learning_rate": 4.3285149445340105e-05,
      "loss": 0.8916,
      "step": 189060
    },
    {
      "epoch": 0.6626432128750521,
      "grad_norm": 2.78125,
      "learning_rate": 4.328450041667641e-05,
      "loss": 0.898,
      "step": 189070
    },
    {
      "epoch": 0.6626782603819478,
      "grad_norm": 3.421875,
      "learning_rate": 4.32838513880127e-05,
      "loss": 0.9421,
      "step": 189080
    },
    {
      "epoch": 0.6627133078888433,
      "grad_norm": 3.203125,
      "learning_rate": 4.3283202359349e-05,
      "loss": 0.8916,
      "step": 189090
    },
    {
      "epoch": 0.662748355395739,
      "grad_norm": 2.71875,
      "learning_rate": 4.32825533306853e-05,
      "loss": 0.8587,
      "step": 189100
    },
    {
      "epoch": 0.6627834029026345,
      "grad_norm": 2.625,
      "learning_rate": 4.32819043020216e-05,
      "loss": 0.9611,
      "step": 189110
    },
    {
      "epoch": 0.6628184504095301,
      "grad_norm": 3.234375,
      "learning_rate": 4.3281255273357893e-05,
      "loss": 0.9439,
      "step": 189120
    },
    {
      "epoch": 0.6628534979164257,
      "grad_norm": 2.609375,
      "learning_rate": 4.3280606244694195e-05,
      "loss": 1.0013,
      "step": 189130
    },
    {
      "epoch": 0.6628885454233213,
      "grad_norm": 3.265625,
      "learning_rate": 4.327995721603049e-05,
      "loss": 0.812,
      "step": 189140
    },
    {
      "epoch": 0.6629235929302169,
      "grad_norm": 3.078125,
      "learning_rate": 4.327930818736679e-05,
      "loss": 0.9404,
      "step": 189150
    },
    {
      "epoch": 0.6629586404371125,
      "grad_norm": 2.6875,
      "learning_rate": 4.3278659158703085e-05,
      "loss": 0.8508,
      "step": 189160
    },
    {
      "epoch": 0.6629936879440081,
      "grad_norm": 2.59375,
      "learning_rate": 4.327801013003939e-05,
      "loss": 0.8976,
      "step": 189170
    },
    {
      "epoch": 0.6630287354509037,
      "grad_norm": 2.578125,
      "learning_rate": 4.327736110137569e-05,
      "loss": 0.9541,
      "step": 189180
    },
    {
      "epoch": 0.6630637829577993,
      "grad_norm": 3.125,
      "learning_rate": 4.327671207271198e-05,
      "loss": 0.8505,
      "step": 189190
    },
    {
      "epoch": 0.6630988304646949,
      "grad_norm": 2.6875,
      "learning_rate": 4.3276063044048284e-05,
      "loss": 0.9064,
      "step": 189200
    },
    {
      "epoch": 0.6631338779715905,
      "grad_norm": 2.78125,
      "learning_rate": 4.327541401538458e-05,
      "loss": 0.8783,
      "step": 189210
    },
    {
      "epoch": 0.663168925478486,
      "grad_norm": 2.40625,
      "learning_rate": 4.327476498672088e-05,
      "loss": 0.9637,
      "step": 189220
    },
    {
      "epoch": 0.6632039729853817,
      "grad_norm": 2.984375,
      "learning_rate": 4.3274115958057175e-05,
      "loss": 0.9633,
      "step": 189230
    },
    {
      "epoch": 0.6632390204922772,
      "grad_norm": 3.28125,
      "learning_rate": 4.327346692939347e-05,
      "loss": 1.0066,
      "step": 189240
    },
    {
      "epoch": 0.6632740679991729,
      "grad_norm": 2.859375,
      "learning_rate": 4.3272817900729764e-05,
      "loss": 0.8876,
      "step": 189250
    },
    {
      "epoch": 0.6633091155060685,
      "grad_norm": 2.96875,
      "learning_rate": 4.3272168872066065e-05,
      "loss": 0.9461,
      "step": 189260
    },
    {
      "epoch": 0.6633441630129641,
      "grad_norm": 3.09375,
      "learning_rate": 4.327151984340237e-05,
      "loss": 0.9248,
      "step": 189270
    },
    {
      "epoch": 0.6633792105198597,
      "grad_norm": 2.65625,
      "learning_rate": 4.327087081473866e-05,
      "loss": 0.8977,
      "step": 189280
    },
    {
      "epoch": 0.6634142580267552,
      "grad_norm": 2.765625,
      "learning_rate": 4.327022178607496e-05,
      "loss": 0.8978,
      "step": 189290
    },
    {
      "epoch": 0.6634493055336509,
      "grad_norm": 3.390625,
      "learning_rate": 4.326957275741126e-05,
      "loss": 0.9591,
      "step": 189300
    },
    {
      "epoch": 0.6634843530405464,
      "grad_norm": 3.1875,
      "learning_rate": 4.326892372874756e-05,
      "loss": 0.96,
      "step": 189310
    },
    {
      "epoch": 0.6635194005474421,
      "grad_norm": 3.078125,
      "learning_rate": 4.3268274700083853e-05,
      "loss": 0.9495,
      "step": 189320
    },
    {
      "epoch": 0.6635544480543376,
      "grad_norm": 2.90625,
      "learning_rate": 4.3267625671420155e-05,
      "loss": 0.9367,
      "step": 189330
    },
    {
      "epoch": 0.6635894955612333,
      "grad_norm": 2.65625,
      "learning_rate": 4.326697664275645e-05,
      "loss": 0.8359,
      "step": 189340
    },
    {
      "epoch": 0.6636245430681289,
      "grad_norm": 2.890625,
      "learning_rate": 4.326632761409275e-05,
      "loss": 0.9705,
      "step": 189350
    },
    {
      "epoch": 0.6636595905750244,
      "grad_norm": 2.78125,
      "learning_rate": 4.3265678585429045e-05,
      "loss": 0.9183,
      "step": 189360
    },
    {
      "epoch": 0.6636946380819201,
      "grad_norm": 2.953125,
      "learning_rate": 4.326502955676535e-05,
      "loss": 0.8456,
      "step": 189370
    },
    {
      "epoch": 0.6637296855888156,
      "grad_norm": 2.984375,
      "learning_rate": 4.326438052810164e-05,
      "loss": 0.8956,
      "step": 189380
    },
    {
      "epoch": 0.6637647330957113,
      "grad_norm": 3.21875,
      "learning_rate": 4.326373149943794e-05,
      "loss": 0.9387,
      "step": 189390
    },
    {
      "epoch": 0.6637997806026068,
      "grad_norm": 2.6875,
      "learning_rate": 4.3263082470774244e-05,
      "loss": 0.855,
      "step": 189400
    },
    {
      "epoch": 0.6638348281095025,
      "grad_norm": 2.734375,
      "learning_rate": 4.326243344211054e-05,
      "loss": 0.9561,
      "step": 189410
    },
    {
      "epoch": 0.663869875616398,
      "grad_norm": 3.5,
      "learning_rate": 4.326178441344684e-05,
      "loss": 0.9665,
      "step": 189420
    },
    {
      "epoch": 0.6639049231232936,
      "grad_norm": 3.375,
      "learning_rate": 4.3261135384783135e-05,
      "loss": 0.985,
      "step": 189430
    },
    {
      "epoch": 0.6639399706301892,
      "grad_norm": 2.84375,
      "learning_rate": 4.3260486356119436e-05,
      "loss": 0.8644,
      "step": 189440
    },
    {
      "epoch": 0.6639750181370848,
      "grad_norm": 2.8125,
      "learning_rate": 4.325983732745573e-05,
      "loss": 0.9294,
      "step": 189450
    },
    {
      "epoch": 0.6640100656439805,
      "grad_norm": 2.5625,
      "learning_rate": 4.325918829879203e-05,
      "loss": 0.8205,
      "step": 189460
    },
    {
      "epoch": 0.664045113150876,
      "grad_norm": 2.5625,
      "learning_rate": 4.325853927012833e-05,
      "loss": 0.9159,
      "step": 189470
    },
    {
      "epoch": 0.6640801606577716,
      "grad_norm": 2.796875,
      "learning_rate": 4.325789024146463e-05,
      "loss": 0.9286,
      "step": 189480
    },
    {
      "epoch": 0.6641152081646672,
      "grad_norm": 3.21875,
      "learning_rate": 4.325724121280092e-05,
      "loss": 0.866,
      "step": 189490
    },
    {
      "epoch": 0.6641502556715628,
      "grad_norm": 2.375,
      "learning_rate": 4.3256592184137224e-05,
      "loss": 0.8323,
      "step": 189500
    },
    {
      "epoch": 0.6641853031784584,
      "grad_norm": 2.5,
      "learning_rate": 4.325594315547352e-05,
      "loss": 0.7997,
      "step": 189510
    },
    {
      "epoch": 0.664220350685354,
      "grad_norm": 2.71875,
      "learning_rate": 4.325529412680982e-05,
      "loss": 0.8476,
      "step": 189520
    },
    {
      "epoch": 0.6642553981922495,
      "grad_norm": 2.734375,
      "learning_rate": 4.325464509814612e-05,
      "loss": 0.8839,
      "step": 189530
    },
    {
      "epoch": 0.6642904456991452,
      "grad_norm": 3.234375,
      "learning_rate": 4.3253996069482416e-05,
      "loss": 0.9282,
      "step": 189540
    },
    {
      "epoch": 0.6643254932060408,
      "grad_norm": 3.0,
      "learning_rate": 4.325334704081872e-05,
      "loss": 0.9683,
      "step": 189550
    },
    {
      "epoch": 0.6643605407129364,
      "grad_norm": 2.765625,
      "learning_rate": 4.325269801215501e-05,
      "loss": 0.8636,
      "step": 189560
    },
    {
      "epoch": 0.664395588219832,
      "grad_norm": 2.859375,
      "learning_rate": 4.3252048983491314e-05,
      "loss": 0.9228,
      "step": 189570
    },
    {
      "epoch": 0.6644306357267276,
      "grad_norm": 2.5625,
      "learning_rate": 4.325139995482761e-05,
      "loss": 0.959,
      "step": 189580
    },
    {
      "epoch": 0.6644656832336232,
      "grad_norm": 3.046875,
      "learning_rate": 4.325075092616391e-05,
      "loss": 0.9077,
      "step": 189590
    },
    {
      "epoch": 0.6645007307405187,
      "grad_norm": 3.0625,
      "learning_rate": 4.3250101897500204e-05,
      "loss": 0.9987,
      "step": 189600
    },
    {
      "epoch": 0.6645357782474144,
      "grad_norm": 2.859375,
      "learning_rate": 4.32494528688365e-05,
      "loss": 0.9617,
      "step": 189610
    },
    {
      "epoch": 0.6645708257543099,
      "grad_norm": 2.875,
      "learning_rate": 4.32488038401728e-05,
      "loss": 0.8913,
      "step": 189620
    },
    {
      "epoch": 0.6646058732612056,
      "grad_norm": 2.984375,
      "learning_rate": 4.3248154811509095e-05,
      "loss": 0.935,
      "step": 189630
    },
    {
      "epoch": 0.6646409207681011,
      "grad_norm": 2.921875,
      "learning_rate": 4.3247505782845396e-05,
      "loss": 0.9257,
      "step": 189640
    },
    {
      "epoch": 0.6646759682749968,
      "grad_norm": 2.796875,
      "learning_rate": 4.324685675418169e-05,
      "loss": 0.753,
      "step": 189650
    },
    {
      "epoch": 0.6647110157818924,
      "grad_norm": 2.921875,
      "learning_rate": 4.324620772551799e-05,
      "loss": 0.9507,
      "step": 189660
    },
    {
      "epoch": 0.6647460632887879,
      "grad_norm": 3.1875,
      "learning_rate": 4.324555869685429e-05,
      "loss": 0.8709,
      "step": 189670
    },
    {
      "epoch": 0.6647811107956836,
      "grad_norm": 3.03125,
      "learning_rate": 4.324490966819059e-05,
      "loss": 0.9429,
      "step": 189680
    },
    {
      "epoch": 0.6648161583025791,
      "grad_norm": 2.8125,
      "learning_rate": 4.324426063952688e-05,
      "loss": 0.9409,
      "step": 189690
    },
    {
      "epoch": 0.6648512058094748,
      "grad_norm": 2.953125,
      "learning_rate": 4.3243611610863184e-05,
      "loss": 0.9299,
      "step": 189700
    },
    {
      "epoch": 0.6648862533163703,
      "grad_norm": 2.46875,
      "learning_rate": 4.324296258219948e-05,
      "loss": 1.0275,
      "step": 189710
    },
    {
      "epoch": 0.664921300823266,
      "grad_norm": 3.03125,
      "learning_rate": 4.324231355353578e-05,
      "loss": 0.8461,
      "step": 189720
    },
    {
      "epoch": 0.6649563483301615,
      "grad_norm": 3.125,
      "learning_rate": 4.3241664524872075e-05,
      "loss": 0.9086,
      "step": 189730
    },
    {
      "epoch": 0.6649913958370571,
      "grad_norm": 2.734375,
      "learning_rate": 4.3241015496208376e-05,
      "loss": 0.836,
      "step": 189740
    },
    {
      "epoch": 0.6650264433439528,
      "grad_norm": 3.109375,
      "learning_rate": 4.324036646754467e-05,
      "loss": 1.0248,
      "step": 189750
    },
    {
      "epoch": 0.6650614908508483,
      "grad_norm": 3.140625,
      "learning_rate": 4.323971743888097e-05,
      "loss": 0.9401,
      "step": 189760
    },
    {
      "epoch": 0.665096538357744,
      "grad_norm": 3.234375,
      "learning_rate": 4.3239068410217274e-05,
      "loss": 0.835,
      "step": 189770
    },
    {
      "epoch": 0.6651315858646395,
      "grad_norm": 2.984375,
      "learning_rate": 4.323841938155357e-05,
      "loss": 0.834,
      "step": 189780
    },
    {
      "epoch": 0.6651666333715351,
      "grad_norm": 3.140625,
      "learning_rate": 4.323777035288987e-05,
      "loss": 1.0632,
      "step": 189790
    },
    {
      "epoch": 0.6652016808784307,
      "grad_norm": 2.28125,
      "learning_rate": 4.3237121324226164e-05,
      "loss": 0.8848,
      "step": 189800
    },
    {
      "epoch": 0.6652367283853263,
      "grad_norm": 3.046875,
      "learning_rate": 4.3236472295562466e-05,
      "loss": 0.8202,
      "step": 189810
    },
    {
      "epoch": 0.6652717758922219,
      "grad_norm": 3.09375,
      "learning_rate": 4.323582326689876e-05,
      "loss": 0.944,
      "step": 189820
    },
    {
      "epoch": 0.6653068233991175,
      "grad_norm": 2.46875,
      "learning_rate": 4.323517423823506e-05,
      "loss": 0.8856,
      "step": 189830
    },
    {
      "epoch": 0.6653418709060132,
      "grad_norm": 3.171875,
      "learning_rate": 4.3234525209571356e-05,
      "loss": 0.9079,
      "step": 189840
    },
    {
      "epoch": 0.6653769184129087,
      "grad_norm": 2.453125,
      "learning_rate": 4.323387618090766e-05,
      "loss": 0.9455,
      "step": 189850
    },
    {
      "epoch": 0.6654119659198043,
      "grad_norm": 2.71875,
      "learning_rate": 4.323322715224395e-05,
      "loss": 0.9471,
      "step": 189860
    },
    {
      "epoch": 0.6654470134266999,
      "grad_norm": 2.625,
      "learning_rate": 4.3232578123580254e-05,
      "loss": 0.9154,
      "step": 189870
    },
    {
      "epoch": 0.6654820609335955,
      "grad_norm": 3.28125,
      "learning_rate": 4.323192909491655e-05,
      "loss": 0.8708,
      "step": 189880
    },
    {
      "epoch": 0.6655171084404911,
      "grad_norm": 3.25,
      "learning_rate": 4.323128006625285e-05,
      "loss": 0.8962,
      "step": 189890
    },
    {
      "epoch": 0.6655521559473867,
      "grad_norm": 3.046875,
      "learning_rate": 4.323063103758915e-05,
      "loss": 0.8995,
      "step": 189900
    },
    {
      "epoch": 0.6655872034542822,
      "grad_norm": 2.765625,
      "learning_rate": 4.3229982008925446e-05,
      "loss": 0.9587,
      "step": 189910
    },
    {
      "epoch": 0.6656222509611779,
      "grad_norm": 3.09375,
      "learning_rate": 4.322933298026175e-05,
      "loss": 0.841,
      "step": 189920
    },
    {
      "epoch": 0.6656572984680734,
      "grad_norm": 3.15625,
      "learning_rate": 4.322868395159804e-05,
      "loss": 0.9782,
      "step": 189930
    },
    {
      "epoch": 0.6656923459749691,
      "grad_norm": 3.0625,
      "learning_rate": 4.322803492293434e-05,
      "loss": 0.8946,
      "step": 189940
    },
    {
      "epoch": 0.6657273934818647,
      "grad_norm": 3.015625,
      "learning_rate": 4.322738589427064e-05,
      "loss": 0.8559,
      "step": 189950
    },
    {
      "epoch": 0.6657624409887603,
      "grad_norm": 3.015625,
      "learning_rate": 4.322673686560694e-05,
      "loss": 0.8504,
      "step": 189960
    },
    {
      "epoch": 0.6657974884956559,
      "grad_norm": 2.609375,
      "learning_rate": 4.3226087836943234e-05,
      "loss": 0.9464,
      "step": 189970
    },
    {
      "epoch": 0.6658325360025514,
      "grad_norm": 3.09375,
      "learning_rate": 4.3225438808279535e-05,
      "loss": 0.8934,
      "step": 189980
    },
    {
      "epoch": 0.6658675835094471,
      "grad_norm": 2.890625,
      "learning_rate": 4.322478977961583e-05,
      "loss": 0.8842,
      "step": 189990
    },
    {
      "epoch": 0.6659026310163426,
      "grad_norm": 3.328125,
      "learning_rate": 4.3224140750952124e-05,
      "loss": 0.8613,
      "step": 190000
    },
    {
      "epoch": 0.6659026310163426,
      "eval_loss": 0.8532357215881348,
      "eval_runtime": 564.8684,
      "eval_samples_per_second": 673.495,
      "eval_steps_per_second": 56.125,
      "step": 190000
    },
    {
      "epoch": 0.6659376785232383,
      "grad_norm": 2.765625,
      "learning_rate": 4.3223491722288426e-05,
      "loss": 0.8822,
      "step": 190010
    },
    {
      "epoch": 0.6659727260301338,
      "grad_norm": 3.03125,
      "learning_rate": 4.322284269362472e-05,
      "loss": 0.918,
      "step": 190020
    },
    {
      "epoch": 0.6660077735370294,
      "grad_norm": 2.375,
      "learning_rate": 4.322219366496102e-05,
      "loss": 0.8617,
      "step": 190030
    },
    {
      "epoch": 0.6660428210439251,
      "grad_norm": 3.140625,
      "learning_rate": 4.3221544636297316e-05,
      "loss": 0.9042,
      "step": 190040
    },
    {
      "epoch": 0.6660778685508206,
      "grad_norm": 3.15625,
      "learning_rate": 4.322089560763362e-05,
      "loss": 0.901,
      "step": 190050
    },
    {
      "epoch": 0.6661129160577163,
      "grad_norm": 3.1875,
      "learning_rate": 4.322024657896991e-05,
      "loss": 0.9026,
      "step": 190060
    },
    {
      "epoch": 0.6661479635646118,
      "grad_norm": 3.25,
      "learning_rate": 4.3219597550306214e-05,
      "loss": 0.9151,
      "step": 190070
    },
    {
      "epoch": 0.6661830110715075,
      "grad_norm": 2.84375,
      "learning_rate": 4.321894852164251e-05,
      "loss": 0.8914,
      "step": 190080
    },
    {
      "epoch": 0.666218058578403,
      "grad_norm": 3.390625,
      "learning_rate": 4.321829949297881e-05,
      "loss": 0.9215,
      "step": 190090
    },
    {
      "epoch": 0.6662531060852986,
      "grad_norm": 2.9375,
      "learning_rate": 4.3217650464315104e-05,
      "loss": 0.9122,
      "step": 190100
    },
    {
      "epoch": 0.6662881535921942,
      "grad_norm": 2.921875,
      "learning_rate": 4.3217001435651406e-05,
      "loss": 0.7456,
      "step": 190110
    },
    {
      "epoch": 0.6663232010990898,
      "grad_norm": 3.296875,
      "learning_rate": 4.32163524069877e-05,
      "loss": 0.9914,
      "step": 190120
    },
    {
      "epoch": 0.6663582486059854,
      "grad_norm": 3.0625,
      "learning_rate": 4.3215703378324e-05,
      "loss": 0.8775,
      "step": 190130
    },
    {
      "epoch": 0.666393296112881,
      "grad_norm": 3.21875,
      "learning_rate": 4.32150543496603e-05,
      "loss": 0.8698,
      "step": 190140
    },
    {
      "epoch": 0.6664283436197767,
      "grad_norm": 2.9375,
      "learning_rate": 4.32144053209966e-05,
      "loss": 0.8953,
      "step": 190150
    },
    {
      "epoch": 0.6664633911266722,
      "grad_norm": 3.25,
      "learning_rate": 4.32137562923329e-05,
      "loss": 0.9378,
      "step": 190160
    },
    {
      "epoch": 0.6664984386335678,
      "grad_norm": 2.921875,
      "learning_rate": 4.3213107263669194e-05,
      "loss": 0.8194,
      "step": 190170
    },
    {
      "epoch": 0.6665334861404634,
      "grad_norm": 3.484375,
      "learning_rate": 4.3212458235005495e-05,
      "loss": 0.9441,
      "step": 190180
    },
    {
      "epoch": 0.666568533647359,
      "grad_norm": 2.890625,
      "learning_rate": 4.321180920634179e-05,
      "loss": 0.8538,
      "step": 190190
    },
    {
      "epoch": 0.6666035811542546,
      "grad_norm": 3.296875,
      "learning_rate": 4.321116017767809e-05,
      "loss": 0.888,
      "step": 190200
    },
    {
      "epoch": 0.6666386286611502,
      "grad_norm": 3.4375,
      "learning_rate": 4.3210511149014386e-05,
      "loss": 0.934,
      "step": 190210
    },
    {
      "epoch": 0.6666736761680457,
      "grad_norm": 3.40625,
      "learning_rate": 4.320986212035069e-05,
      "loss": 0.9225,
      "step": 190220
    },
    {
      "epoch": 0.6667087236749414,
      "grad_norm": 2.71875,
      "learning_rate": 4.320921309168698e-05,
      "loss": 0.8421,
      "step": 190230
    },
    {
      "epoch": 0.666743771181837,
      "grad_norm": 3.3125,
      "learning_rate": 4.320856406302328e-05,
      "loss": 0.9904,
      "step": 190240
    },
    {
      "epoch": 0.6667788186887326,
      "grad_norm": 3.109375,
      "learning_rate": 4.320791503435958e-05,
      "loss": 0.9612,
      "step": 190250
    },
    {
      "epoch": 0.6668138661956282,
      "grad_norm": 2.953125,
      "learning_rate": 4.320726600569588e-05,
      "loss": 0.9164,
      "step": 190260
    },
    {
      "epoch": 0.6668489137025237,
      "grad_norm": 3.109375,
      "learning_rate": 4.320661697703218e-05,
      "loss": 0.926,
      "step": 190270
    },
    {
      "epoch": 0.6668839612094194,
      "grad_norm": 3.390625,
      "learning_rate": 4.3205967948368475e-05,
      "loss": 0.9258,
      "step": 190280
    },
    {
      "epoch": 0.6669190087163149,
      "grad_norm": 2.9375,
      "learning_rate": 4.3205318919704776e-05,
      "loss": 0.9333,
      "step": 190290
    },
    {
      "epoch": 0.6669540562232106,
      "grad_norm": 2.8125,
      "learning_rate": 4.320466989104107e-05,
      "loss": 0.8706,
      "step": 190300
    },
    {
      "epoch": 0.6669891037301061,
      "grad_norm": 2.578125,
      "learning_rate": 4.320402086237737e-05,
      "loss": 0.9498,
      "step": 190310
    },
    {
      "epoch": 0.6670241512370018,
      "grad_norm": 2.90625,
      "learning_rate": 4.320337183371367e-05,
      "loss": 0.9425,
      "step": 190320
    },
    {
      "epoch": 0.6670591987438974,
      "grad_norm": 3.03125,
      "learning_rate": 4.320272280504997e-05,
      "loss": 0.8943,
      "step": 190330
    },
    {
      "epoch": 0.6670942462507929,
      "grad_norm": 3.171875,
      "learning_rate": 4.320207377638626e-05,
      "loss": 0.8692,
      "step": 190340
    },
    {
      "epoch": 0.6671292937576886,
      "grad_norm": 2.796875,
      "learning_rate": 4.3201424747722564e-05,
      "loss": 0.885,
      "step": 190350
    },
    {
      "epoch": 0.6671643412645841,
      "grad_norm": 2.765625,
      "learning_rate": 4.320077571905886e-05,
      "loss": 0.9094,
      "step": 190360
    },
    {
      "epoch": 0.6671993887714798,
      "grad_norm": 2.875,
      "learning_rate": 4.3200126690395154e-05,
      "loss": 0.8877,
      "step": 190370
    },
    {
      "epoch": 0.6672344362783753,
      "grad_norm": 2.828125,
      "learning_rate": 4.3199477661731455e-05,
      "loss": 0.8561,
      "step": 190380
    },
    {
      "epoch": 0.667269483785271,
      "grad_norm": 3.21875,
      "learning_rate": 4.319882863306775e-05,
      "loss": 0.7839,
      "step": 190390
    },
    {
      "epoch": 0.6673045312921665,
      "grad_norm": 2.609375,
      "learning_rate": 4.319817960440405e-05,
      "loss": 0.8391,
      "step": 190400
    },
    {
      "epoch": 0.6673395787990621,
      "grad_norm": 2.859375,
      "learning_rate": 4.3197530575740346e-05,
      "loss": 0.915,
      "step": 190410
    },
    {
      "epoch": 0.6673746263059577,
      "grad_norm": 2.546875,
      "learning_rate": 4.319688154707665e-05,
      "loss": 0.8459,
      "step": 190420
    },
    {
      "epoch": 0.6674096738128533,
      "grad_norm": 2.9375,
      "learning_rate": 4.319623251841294e-05,
      "loss": 0.9629,
      "step": 190430
    },
    {
      "epoch": 0.667444721319749,
      "grad_norm": 2.8125,
      "learning_rate": 4.319558348974924e-05,
      "loss": 0.98,
      "step": 190440
    },
    {
      "epoch": 0.6674797688266445,
      "grad_norm": 3.3125,
      "learning_rate": 4.319493446108554e-05,
      "loss": 0.8485,
      "step": 190450
    },
    {
      "epoch": 0.6675148163335402,
      "grad_norm": 3.15625,
      "learning_rate": 4.319428543242184e-05,
      "loss": 0.896,
      "step": 190460
    },
    {
      "epoch": 0.6675498638404357,
      "grad_norm": 3.09375,
      "learning_rate": 4.3193636403758134e-05,
      "loss": 0.8442,
      "step": 190470
    },
    {
      "epoch": 0.6675849113473313,
      "grad_norm": 2.890625,
      "learning_rate": 4.3192987375094435e-05,
      "loss": 0.8658,
      "step": 190480
    },
    {
      "epoch": 0.6676199588542269,
      "grad_norm": 2.765625,
      "learning_rate": 4.319233834643073e-05,
      "loss": 0.9881,
      "step": 190490
    },
    {
      "epoch": 0.6676550063611225,
      "grad_norm": 2.84375,
      "learning_rate": 4.319168931776703e-05,
      "loss": 0.9538,
      "step": 190500
    },
    {
      "epoch": 0.667690053868018,
      "grad_norm": 2.71875,
      "learning_rate": 4.319104028910333e-05,
      "loss": 0.8401,
      "step": 190510
    },
    {
      "epoch": 0.6677251013749137,
      "grad_norm": 2.96875,
      "learning_rate": 4.319039126043963e-05,
      "loss": 0.9249,
      "step": 190520
    },
    {
      "epoch": 0.6677601488818093,
      "grad_norm": 2.90625,
      "learning_rate": 4.318974223177593e-05,
      "loss": 0.8728,
      "step": 190530
    },
    {
      "epoch": 0.6677951963887049,
      "grad_norm": 2.421875,
      "learning_rate": 4.318909320311222e-05,
      "loss": 0.9335,
      "step": 190540
    },
    {
      "epoch": 0.6678302438956005,
      "grad_norm": 3.0625,
      "learning_rate": 4.3188444174448524e-05,
      "loss": 0.9563,
      "step": 190550
    },
    {
      "epoch": 0.6678652914024961,
      "grad_norm": 2.75,
      "learning_rate": 4.318779514578482e-05,
      "loss": 0.833,
      "step": 190560
    },
    {
      "epoch": 0.6679003389093917,
      "grad_norm": 2.765625,
      "learning_rate": 4.318714611712112e-05,
      "loss": 0.8646,
      "step": 190570
    },
    {
      "epoch": 0.6679353864162872,
      "grad_norm": 2.9375,
      "learning_rate": 4.3186497088457415e-05,
      "loss": 0.949,
      "step": 190580
    },
    {
      "epoch": 0.6679704339231829,
      "grad_norm": 2.96875,
      "learning_rate": 4.3185848059793716e-05,
      "loss": 0.8742,
      "step": 190590
    },
    {
      "epoch": 0.6680054814300784,
      "grad_norm": 2.78125,
      "learning_rate": 4.318519903113001e-05,
      "loss": 0.8435,
      "step": 190600
    },
    {
      "epoch": 0.6680405289369741,
      "grad_norm": 2.90625,
      "learning_rate": 4.318455000246631e-05,
      "loss": 0.8389,
      "step": 190610
    },
    {
      "epoch": 0.6680755764438696,
      "grad_norm": 2.9375,
      "learning_rate": 4.318390097380261e-05,
      "loss": 0.9044,
      "step": 190620
    },
    {
      "epoch": 0.6681106239507653,
      "grad_norm": 3.125,
      "learning_rate": 4.318325194513891e-05,
      "loss": 0.9061,
      "step": 190630
    },
    {
      "epoch": 0.6681456714576609,
      "grad_norm": 2.671875,
      "learning_rate": 4.318260291647521e-05,
      "loss": 0.9617,
      "step": 190640
    },
    {
      "epoch": 0.6681807189645564,
      "grad_norm": 4.03125,
      "learning_rate": 4.3181953887811504e-05,
      "loss": 0.9086,
      "step": 190650
    },
    {
      "epoch": 0.6682157664714521,
      "grad_norm": 3.109375,
      "learning_rate": 4.3181304859147806e-05,
      "loss": 0.9448,
      "step": 190660
    },
    {
      "epoch": 0.6682508139783476,
      "grad_norm": 2.640625,
      "learning_rate": 4.31806558304841e-05,
      "loss": 0.8746,
      "step": 190670
    },
    {
      "epoch": 0.6682858614852433,
      "grad_norm": 2.921875,
      "learning_rate": 4.31800068018204e-05,
      "loss": 0.9924,
      "step": 190680
    },
    {
      "epoch": 0.6683209089921388,
      "grad_norm": 2.453125,
      "learning_rate": 4.3179357773156696e-05,
      "loss": 0.9883,
      "step": 190690
    },
    {
      "epoch": 0.6683559564990345,
      "grad_norm": 3.0,
      "learning_rate": 4.3178708744493e-05,
      "loss": 0.8653,
      "step": 190700
    },
    {
      "epoch": 0.66839100400593,
      "grad_norm": 2.75,
      "learning_rate": 4.317805971582929e-05,
      "loss": 0.9359,
      "step": 190710
    },
    {
      "epoch": 0.6684260515128256,
      "grad_norm": 2.84375,
      "learning_rate": 4.3177410687165594e-05,
      "loss": 0.8718,
      "step": 190720
    },
    {
      "epoch": 0.6684610990197213,
      "grad_norm": 3.46875,
      "learning_rate": 4.317676165850189e-05,
      "loss": 0.9012,
      "step": 190730
    },
    {
      "epoch": 0.6684961465266168,
      "grad_norm": 3.296875,
      "learning_rate": 4.317611262983818e-05,
      "loss": 0.8762,
      "step": 190740
    },
    {
      "epoch": 0.6685311940335125,
      "grad_norm": 3.015625,
      "learning_rate": 4.3175463601174484e-05,
      "loss": 0.9022,
      "step": 190750
    },
    {
      "epoch": 0.668566241540408,
      "grad_norm": 2.71875,
      "learning_rate": 4.317481457251078e-05,
      "loss": 0.8797,
      "step": 190760
    },
    {
      "epoch": 0.6686012890473036,
      "grad_norm": 3.0625,
      "learning_rate": 4.317416554384708e-05,
      "loss": 0.9115,
      "step": 190770
    },
    {
      "epoch": 0.6686363365541992,
      "grad_norm": 2.921875,
      "learning_rate": 4.3173516515183375e-05,
      "loss": 0.9684,
      "step": 190780
    },
    {
      "epoch": 0.6686713840610948,
      "grad_norm": 3.140625,
      "learning_rate": 4.3172867486519676e-05,
      "loss": 0.9139,
      "step": 190790
    },
    {
      "epoch": 0.6687064315679904,
      "grad_norm": 2.8125,
      "learning_rate": 4.317221845785597e-05,
      "loss": 0.9311,
      "step": 190800
    },
    {
      "epoch": 0.668741479074886,
      "grad_norm": 2.890625,
      "learning_rate": 4.317156942919227e-05,
      "loss": 0.959,
      "step": 190810
    },
    {
      "epoch": 0.6687765265817815,
      "grad_norm": 2.875,
      "learning_rate": 4.317092040052857e-05,
      "loss": 0.9261,
      "step": 190820
    },
    {
      "epoch": 0.6688115740886772,
      "grad_norm": 3.1875,
      "learning_rate": 4.317027137186487e-05,
      "loss": 0.9432,
      "step": 190830
    },
    {
      "epoch": 0.6688466215955728,
      "grad_norm": 2.4375,
      "learning_rate": 4.316962234320116e-05,
      "loss": 0.8799,
      "step": 190840
    },
    {
      "epoch": 0.6688816691024684,
      "grad_norm": 3.15625,
      "learning_rate": 4.3168973314537464e-05,
      "loss": 0.9765,
      "step": 190850
    },
    {
      "epoch": 0.668916716609364,
      "grad_norm": 2.984375,
      "learning_rate": 4.3168324285873766e-05,
      "loss": 0.8872,
      "step": 190860
    },
    {
      "epoch": 0.6689517641162596,
      "grad_norm": 3.3125,
      "learning_rate": 4.316767525721006e-05,
      "loss": 0.939,
      "step": 190870
    },
    {
      "epoch": 0.6689868116231552,
      "grad_norm": 2.84375,
      "learning_rate": 4.316702622854636e-05,
      "loss": 0.8945,
      "step": 190880
    },
    {
      "epoch": 0.6690218591300507,
      "grad_norm": 2.78125,
      "learning_rate": 4.3166377199882656e-05,
      "loss": 0.8508,
      "step": 190890
    },
    {
      "epoch": 0.6690569066369464,
      "grad_norm": 2.84375,
      "learning_rate": 4.316572817121896e-05,
      "loss": 0.8837,
      "step": 190900
    },
    {
      "epoch": 0.6690919541438419,
      "grad_norm": 2.765625,
      "learning_rate": 4.316507914255525e-05,
      "loss": 0.8936,
      "step": 190910
    },
    {
      "epoch": 0.6691270016507376,
      "grad_norm": 2.8125,
      "learning_rate": 4.3164430113891554e-05,
      "loss": 0.9291,
      "step": 190920
    },
    {
      "epoch": 0.6691620491576332,
      "grad_norm": 2.78125,
      "learning_rate": 4.316378108522785e-05,
      "loss": 0.9145,
      "step": 190930
    },
    {
      "epoch": 0.6691970966645288,
      "grad_norm": 3.28125,
      "learning_rate": 4.316313205656415e-05,
      "loss": 0.9233,
      "step": 190940
    },
    {
      "epoch": 0.6692321441714244,
      "grad_norm": 2.609375,
      "learning_rate": 4.3162483027900444e-05,
      "loss": 0.9221,
      "step": 190950
    },
    {
      "epoch": 0.6692671916783199,
      "grad_norm": 2.875,
      "learning_rate": 4.3161833999236746e-05,
      "loss": 0.8974,
      "step": 190960
    },
    {
      "epoch": 0.6693022391852156,
      "grad_norm": 3.3125,
      "learning_rate": 4.316118497057304e-05,
      "loss": 0.9361,
      "step": 190970
    },
    {
      "epoch": 0.6693372866921111,
      "grad_norm": 3.25,
      "learning_rate": 4.316053594190934e-05,
      "loss": 0.8067,
      "step": 190980
    },
    {
      "epoch": 0.6693723341990068,
      "grad_norm": 2.96875,
      "learning_rate": 4.3159886913245636e-05,
      "loss": 0.9301,
      "step": 190990
    },
    {
      "epoch": 0.6694073817059023,
      "grad_norm": 3.21875,
      "learning_rate": 4.315923788458194e-05,
      "loss": 0.8964,
      "step": 191000
    },
    {
      "epoch": 0.669442429212798,
      "grad_norm": 2.984375,
      "learning_rate": 4.315858885591824e-05,
      "loss": 0.847,
      "step": 191010
    },
    {
      "epoch": 0.6694774767196936,
      "grad_norm": 2.640625,
      "learning_rate": 4.3157939827254534e-05,
      "loss": 0.8679,
      "step": 191020
    },
    {
      "epoch": 0.6695125242265891,
      "grad_norm": 3.203125,
      "learning_rate": 4.3157290798590835e-05,
      "loss": 0.9471,
      "step": 191030
    },
    {
      "epoch": 0.6695475717334848,
      "grad_norm": 3.109375,
      "learning_rate": 4.315664176992713e-05,
      "loss": 0.9162,
      "step": 191040
    },
    {
      "epoch": 0.6695826192403803,
      "grad_norm": 3.546875,
      "learning_rate": 4.315599274126343e-05,
      "loss": 0.8999,
      "step": 191050
    },
    {
      "epoch": 0.669617666747276,
      "grad_norm": 2.765625,
      "learning_rate": 4.3155343712599726e-05,
      "loss": 0.9495,
      "step": 191060
    },
    {
      "epoch": 0.6696527142541715,
      "grad_norm": 2.9375,
      "learning_rate": 4.315469468393603e-05,
      "loss": 0.8529,
      "step": 191070
    },
    {
      "epoch": 0.6696877617610671,
      "grad_norm": 3.40625,
      "learning_rate": 4.315404565527232e-05,
      "loss": 0.9837,
      "step": 191080
    },
    {
      "epoch": 0.6697228092679627,
      "grad_norm": 3.171875,
      "learning_rate": 4.315339662660862e-05,
      "loss": 0.8548,
      "step": 191090
    },
    {
      "epoch": 0.6697578567748583,
      "grad_norm": 2.96875,
      "learning_rate": 4.315274759794492e-05,
      "loss": 0.8744,
      "step": 191100
    },
    {
      "epoch": 0.6697929042817539,
      "grad_norm": 2.890625,
      "learning_rate": 4.315209856928122e-05,
      "loss": 0.9111,
      "step": 191110
    },
    {
      "epoch": 0.6698279517886495,
      "grad_norm": 2.953125,
      "learning_rate": 4.3151449540617514e-05,
      "loss": 0.9535,
      "step": 191120
    },
    {
      "epoch": 0.6698629992955452,
      "grad_norm": 3.0,
      "learning_rate": 4.315080051195381e-05,
      "loss": 0.9498,
      "step": 191130
    },
    {
      "epoch": 0.6698980468024407,
      "grad_norm": 3.25,
      "learning_rate": 4.315015148329011e-05,
      "loss": 0.9723,
      "step": 191140
    },
    {
      "epoch": 0.6699330943093363,
      "grad_norm": 2.671875,
      "learning_rate": 4.3149502454626404e-05,
      "loss": 0.8993,
      "step": 191150
    },
    {
      "epoch": 0.6699681418162319,
      "grad_norm": 2.703125,
      "learning_rate": 4.3148853425962706e-05,
      "loss": 0.9329,
      "step": 191160
    },
    {
      "epoch": 0.6700031893231275,
      "grad_norm": 3.140625,
      "learning_rate": 4.3148204397299e-05,
      "loss": 0.8268,
      "step": 191170
    },
    {
      "epoch": 0.6700382368300231,
      "grad_norm": 3.140625,
      "learning_rate": 4.31475553686353e-05,
      "loss": 0.9201,
      "step": 191180
    },
    {
      "epoch": 0.6700732843369187,
      "grad_norm": 2.796875,
      "learning_rate": 4.3146906339971596e-05,
      "loss": 0.9374,
      "step": 191190
    },
    {
      "epoch": 0.6701083318438142,
      "grad_norm": 3.046875,
      "learning_rate": 4.31462573113079e-05,
      "loss": 0.8679,
      "step": 191200
    },
    {
      "epoch": 0.6701433793507099,
      "grad_norm": 2.9375,
      "learning_rate": 4.314560828264419e-05,
      "loss": 0.9523,
      "step": 191210
    },
    {
      "epoch": 0.6701784268576055,
      "grad_norm": 3.078125,
      "learning_rate": 4.3144959253980494e-05,
      "loss": 0.9262,
      "step": 191220
    },
    {
      "epoch": 0.6702134743645011,
      "grad_norm": 2.9375,
      "learning_rate": 4.3144310225316795e-05,
      "loss": 0.9753,
      "step": 191230
    },
    {
      "epoch": 0.6702485218713967,
      "grad_norm": 2.90625,
      "learning_rate": 4.314366119665309e-05,
      "loss": 0.9019,
      "step": 191240
    },
    {
      "epoch": 0.6702835693782923,
      "grad_norm": 2.71875,
      "learning_rate": 4.314301216798939e-05,
      "loss": 0.8894,
      "step": 191250
    },
    {
      "epoch": 0.6703186168851879,
      "grad_norm": 3.078125,
      "learning_rate": 4.3142363139325686e-05,
      "loss": 0.93,
      "step": 191260
    },
    {
      "epoch": 0.6703536643920834,
      "grad_norm": 2.953125,
      "learning_rate": 4.314171411066199e-05,
      "loss": 0.9029,
      "step": 191270
    },
    {
      "epoch": 0.6703887118989791,
      "grad_norm": 2.921875,
      "learning_rate": 4.314106508199828e-05,
      "loss": 0.8094,
      "step": 191280
    },
    {
      "epoch": 0.6704237594058746,
      "grad_norm": 2.5625,
      "learning_rate": 4.314041605333458e-05,
      "loss": 0.8858,
      "step": 191290
    },
    {
      "epoch": 0.6704588069127703,
      "grad_norm": 2.578125,
      "learning_rate": 4.313976702467088e-05,
      "loss": 0.8978,
      "step": 191300
    },
    {
      "epoch": 0.6704938544196658,
      "grad_norm": 3.71875,
      "learning_rate": 4.313911799600718e-05,
      "loss": 0.9552,
      "step": 191310
    },
    {
      "epoch": 0.6705289019265614,
      "grad_norm": 3.484375,
      "learning_rate": 4.3138468967343474e-05,
      "loss": 0.8561,
      "step": 191320
    },
    {
      "epoch": 0.6705639494334571,
      "grad_norm": 3.0625,
      "learning_rate": 4.3137819938679775e-05,
      "loss": 0.9108,
      "step": 191330
    },
    {
      "epoch": 0.6705989969403526,
      "grad_norm": 3.03125,
      "learning_rate": 4.313717091001607e-05,
      "loss": 1.0197,
      "step": 191340
    },
    {
      "epoch": 0.6706340444472483,
      "grad_norm": 3.0,
      "learning_rate": 4.313652188135237e-05,
      "loss": 0.9321,
      "step": 191350
    },
    {
      "epoch": 0.6706690919541438,
      "grad_norm": 2.171875,
      "learning_rate": 4.3135872852688666e-05,
      "loss": 0.7948,
      "step": 191360
    },
    {
      "epoch": 0.6707041394610395,
      "grad_norm": 2.921875,
      "learning_rate": 4.313522382402497e-05,
      "loss": 0.8519,
      "step": 191370
    },
    {
      "epoch": 0.670739186967935,
      "grad_norm": 3.3125,
      "learning_rate": 4.313457479536127e-05,
      "loss": 0.7582,
      "step": 191380
    },
    {
      "epoch": 0.6707742344748306,
      "grad_norm": 2.828125,
      "learning_rate": 4.313392576669756e-05,
      "loss": 0.9182,
      "step": 191390
    },
    {
      "epoch": 0.6708092819817262,
      "grad_norm": 2.84375,
      "learning_rate": 4.3133276738033865e-05,
      "loss": 0.8764,
      "step": 191400
    },
    {
      "epoch": 0.6708443294886218,
      "grad_norm": 3.1875,
      "learning_rate": 4.313262770937016e-05,
      "loss": 0.9195,
      "step": 191410
    },
    {
      "epoch": 0.6708793769955175,
      "grad_norm": 2.96875,
      "learning_rate": 4.313197868070646e-05,
      "loss": 0.9259,
      "step": 191420
    },
    {
      "epoch": 0.670914424502413,
      "grad_norm": 2.59375,
      "learning_rate": 4.3131329652042755e-05,
      "loss": 0.8741,
      "step": 191430
    },
    {
      "epoch": 0.6709494720093087,
      "grad_norm": 3.0625,
      "learning_rate": 4.3130680623379057e-05,
      "loss": 0.8703,
      "step": 191440
    },
    {
      "epoch": 0.6709845195162042,
      "grad_norm": 3.015625,
      "learning_rate": 4.313003159471535e-05,
      "loss": 0.974,
      "step": 191450
    },
    {
      "epoch": 0.6710195670230998,
      "grad_norm": 2.875,
      "learning_rate": 4.312938256605165e-05,
      "loss": 0.8361,
      "step": 191460
    },
    {
      "epoch": 0.6710546145299954,
      "grad_norm": 2.859375,
      "learning_rate": 4.312873353738795e-05,
      "loss": 0.8979,
      "step": 191470
    },
    {
      "epoch": 0.671089662036891,
      "grad_norm": 2.6875,
      "learning_rate": 4.312808450872425e-05,
      "loss": 0.8687,
      "step": 191480
    },
    {
      "epoch": 0.6711247095437866,
      "grad_norm": 2.890625,
      "learning_rate": 4.312743548006054e-05,
      "loss": 0.9484,
      "step": 191490
    },
    {
      "epoch": 0.6711597570506822,
      "grad_norm": 3.234375,
      "learning_rate": 4.312678645139684e-05,
      "loss": 0.9451,
      "step": 191500
    },
    {
      "epoch": 0.6711948045575779,
      "grad_norm": 2.984375,
      "learning_rate": 4.312613742273314e-05,
      "loss": 0.8336,
      "step": 191510
    },
    {
      "epoch": 0.6712298520644734,
      "grad_norm": 3.03125,
      "learning_rate": 4.3125488394069434e-05,
      "loss": 0.8497,
      "step": 191520
    },
    {
      "epoch": 0.671264899571369,
      "grad_norm": 3.0625,
      "learning_rate": 4.3124839365405735e-05,
      "loss": 0.8679,
      "step": 191530
    },
    {
      "epoch": 0.6712999470782646,
      "grad_norm": 3.28125,
      "learning_rate": 4.312419033674203e-05,
      "loss": 0.9784,
      "step": 191540
    },
    {
      "epoch": 0.6713349945851602,
      "grad_norm": 2.859375,
      "learning_rate": 4.312354130807833e-05,
      "loss": 0.886,
      "step": 191550
    },
    {
      "epoch": 0.6713700420920558,
      "grad_norm": 2.984375,
      "learning_rate": 4.3122892279414626e-05,
      "loss": 0.9016,
      "step": 191560
    },
    {
      "epoch": 0.6714050895989514,
      "grad_norm": 2.796875,
      "learning_rate": 4.312224325075093e-05,
      "loss": 0.9547,
      "step": 191570
    },
    {
      "epoch": 0.6714401371058469,
      "grad_norm": 3.15625,
      "learning_rate": 4.312159422208722e-05,
      "loss": 0.8155,
      "step": 191580
    },
    {
      "epoch": 0.6714751846127426,
      "grad_norm": 2.96875,
      "learning_rate": 4.312094519342352e-05,
      "loss": 0.8994,
      "step": 191590
    },
    {
      "epoch": 0.6715102321196381,
      "grad_norm": 3.25,
      "learning_rate": 4.3120296164759825e-05,
      "loss": 0.9536,
      "step": 191600
    },
    {
      "epoch": 0.6715452796265338,
      "grad_norm": 3.046875,
      "learning_rate": 4.311964713609612e-05,
      "loss": 0.8531,
      "step": 191610
    },
    {
      "epoch": 0.6715803271334294,
      "grad_norm": 2.828125,
      "learning_rate": 4.311899810743242e-05,
      "loss": 0.8487,
      "step": 191620
    },
    {
      "epoch": 0.671615374640325,
      "grad_norm": 2.953125,
      "learning_rate": 4.3118349078768715e-05,
      "loss": 0.9987,
      "step": 191630
    },
    {
      "epoch": 0.6716504221472206,
      "grad_norm": 3.3125,
      "learning_rate": 4.3117700050105017e-05,
      "loss": 0.9219,
      "step": 191640
    },
    {
      "epoch": 0.6716854696541161,
      "grad_norm": 2.90625,
      "learning_rate": 4.311705102144131e-05,
      "loss": 0.9693,
      "step": 191650
    },
    {
      "epoch": 0.6717205171610118,
      "grad_norm": 3.125,
      "learning_rate": 4.311640199277761e-05,
      "loss": 0.9553,
      "step": 191660
    },
    {
      "epoch": 0.6717555646679073,
      "grad_norm": 3.515625,
      "learning_rate": 4.311575296411391e-05,
      "loss": 0.9043,
      "step": 191670
    },
    {
      "epoch": 0.671790612174803,
      "grad_norm": 2.78125,
      "learning_rate": 4.311510393545021e-05,
      "loss": 0.8306,
      "step": 191680
    },
    {
      "epoch": 0.6718256596816985,
      "grad_norm": 3.03125,
      "learning_rate": 4.31144549067865e-05,
      "loss": 0.9832,
      "step": 191690
    },
    {
      "epoch": 0.6718607071885941,
      "grad_norm": 2.53125,
      "learning_rate": 4.3113805878122805e-05,
      "loss": 0.7792,
      "step": 191700
    },
    {
      "epoch": 0.6718957546954898,
      "grad_norm": 2.890625,
      "learning_rate": 4.31131568494591e-05,
      "loss": 0.8807,
      "step": 191710
    },
    {
      "epoch": 0.6719308022023853,
      "grad_norm": 3.015625,
      "learning_rate": 4.31125078207954e-05,
      "loss": 0.8794,
      "step": 191720
    },
    {
      "epoch": 0.671965849709281,
      "grad_norm": 2.53125,
      "learning_rate": 4.31118587921317e-05,
      "loss": 0.9246,
      "step": 191730
    },
    {
      "epoch": 0.6720008972161765,
      "grad_norm": 3.0625,
      "learning_rate": 4.3111209763467997e-05,
      "loss": 0.8133,
      "step": 191740
    },
    {
      "epoch": 0.6720359447230722,
      "grad_norm": 2.921875,
      "learning_rate": 4.31105607348043e-05,
      "loss": 0.9186,
      "step": 191750
    },
    {
      "epoch": 0.6720709922299677,
      "grad_norm": 2.765625,
      "learning_rate": 4.310991170614059e-05,
      "loss": 0.8583,
      "step": 191760
    },
    {
      "epoch": 0.6721060397368633,
      "grad_norm": 2.765625,
      "learning_rate": 4.3109262677476894e-05,
      "loss": 0.9613,
      "step": 191770
    },
    {
      "epoch": 0.6721410872437589,
      "grad_norm": 2.953125,
      "learning_rate": 4.310861364881319e-05,
      "loss": 0.9192,
      "step": 191780
    },
    {
      "epoch": 0.6721761347506545,
      "grad_norm": 2.984375,
      "learning_rate": 4.310796462014949e-05,
      "loss": 0.8454,
      "step": 191790
    },
    {
      "epoch": 0.67221118225755,
      "grad_norm": 3.109375,
      "learning_rate": 4.3107315591485785e-05,
      "loss": 0.8836,
      "step": 191800
    },
    {
      "epoch": 0.6722462297644457,
      "grad_norm": 2.9375,
      "learning_rate": 4.3106666562822086e-05,
      "loss": 0.8954,
      "step": 191810
    },
    {
      "epoch": 0.6722812772713413,
      "grad_norm": 2.984375,
      "learning_rate": 4.310601753415838e-05,
      "loss": 0.9276,
      "step": 191820
    },
    {
      "epoch": 0.6723163247782369,
      "grad_norm": 2.796875,
      "learning_rate": 4.310536850549468e-05,
      "loss": 0.8608,
      "step": 191830
    },
    {
      "epoch": 0.6723513722851325,
      "grad_norm": 3.046875,
      "learning_rate": 4.3104719476830977e-05,
      "loss": 0.9545,
      "step": 191840
    },
    {
      "epoch": 0.6723864197920281,
      "grad_norm": 3.09375,
      "learning_rate": 4.310407044816728e-05,
      "loss": 0.9897,
      "step": 191850
    },
    {
      "epoch": 0.6724214672989237,
      "grad_norm": 3.3125,
      "learning_rate": 4.310342141950357e-05,
      "loss": 0.923,
      "step": 191860
    },
    {
      "epoch": 0.6724565148058192,
      "grad_norm": 2.90625,
      "learning_rate": 4.310277239083987e-05,
      "loss": 0.9075,
      "step": 191870
    },
    {
      "epoch": 0.6724915623127149,
      "grad_norm": 3.15625,
      "learning_rate": 4.310212336217617e-05,
      "loss": 1.0068,
      "step": 191880
    },
    {
      "epoch": 0.6725266098196104,
      "grad_norm": 3.015625,
      "learning_rate": 4.310147433351246e-05,
      "loss": 0.9732,
      "step": 191890
    },
    {
      "epoch": 0.6725616573265061,
      "grad_norm": 3.203125,
      "learning_rate": 4.3100825304848765e-05,
      "loss": 0.8405,
      "step": 191900
    },
    {
      "epoch": 0.6725967048334017,
      "grad_norm": 2.9375,
      "learning_rate": 4.310017627618506e-05,
      "loss": 0.8534,
      "step": 191910
    },
    {
      "epoch": 0.6726317523402973,
      "grad_norm": 3.1875,
      "learning_rate": 4.309952724752136e-05,
      "loss": 0.9213,
      "step": 191920
    },
    {
      "epoch": 0.6726667998471929,
      "grad_norm": 2.875,
      "learning_rate": 4.3098878218857655e-05,
      "loss": 0.7829,
      "step": 191930
    },
    {
      "epoch": 0.6727018473540884,
      "grad_norm": 2.890625,
      "learning_rate": 4.3098229190193957e-05,
      "loss": 0.8832,
      "step": 191940
    },
    {
      "epoch": 0.6727368948609841,
      "grad_norm": 2.84375,
      "learning_rate": 4.309758016153025e-05,
      "loss": 1.0046,
      "step": 191950
    },
    {
      "epoch": 0.6727719423678796,
      "grad_norm": 2.9375,
      "learning_rate": 4.309693113286655e-05,
      "loss": 0.82,
      "step": 191960
    },
    {
      "epoch": 0.6728069898747753,
      "grad_norm": 3.015625,
      "learning_rate": 4.3096282104202854e-05,
      "loss": 0.8015,
      "step": 191970
    },
    {
      "epoch": 0.6728420373816708,
      "grad_norm": 2.453125,
      "learning_rate": 4.309563307553915e-05,
      "loss": 0.8481,
      "step": 191980
    },
    {
      "epoch": 0.6728770848885665,
      "grad_norm": 2.546875,
      "learning_rate": 4.309498404687545e-05,
      "loss": 0.934,
      "step": 191990
    },
    {
      "epoch": 0.6729121323954621,
      "grad_norm": 2.9375,
      "learning_rate": 4.3094335018211745e-05,
      "loss": 0.8529,
      "step": 192000
    },
    {
      "epoch": 0.6729471799023576,
      "grad_norm": 3.109375,
      "learning_rate": 4.3093685989548046e-05,
      "loss": 0.9137,
      "step": 192010
    },
    {
      "epoch": 0.6729822274092533,
      "grad_norm": 2.71875,
      "learning_rate": 4.309303696088434e-05,
      "loss": 0.8707,
      "step": 192020
    },
    {
      "epoch": 0.6730172749161488,
      "grad_norm": 2.359375,
      "learning_rate": 4.309238793222064e-05,
      "loss": 0.9479,
      "step": 192030
    },
    {
      "epoch": 0.6730523224230445,
      "grad_norm": 3.078125,
      "learning_rate": 4.3091738903556937e-05,
      "loss": 0.9237,
      "step": 192040
    },
    {
      "epoch": 0.67308736992994,
      "grad_norm": 2.9375,
      "learning_rate": 4.309108987489324e-05,
      "loss": 0.9045,
      "step": 192050
    },
    {
      "epoch": 0.6731224174368357,
      "grad_norm": 2.9375,
      "learning_rate": 4.309044084622953e-05,
      "loss": 0.9656,
      "step": 192060
    },
    {
      "epoch": 0.6731574649437312,
      "grad_norm": 3.171875,
      "learning_rate": 4.3089791817565834e-05,
      "loss": 0.9445,
      "step": 192070
    },
    {
      "epoch": 0.6731925124506268,
      "grad_norm": 3.015625,
      "learning_rate": 4.308914278890213e-05,
      "loss": 0.8645,
      "step": 192080
    },
    {
      "epoch": 0.6732275599575224,
      "grad_norm": 3.03125,
      "learning_rate": 4.308849376023843e-05,
      "loss": 0.8497,
      "step": 192090
    },
    {
      "epoch": 0.673262607464418,
      "grad_norm": 3.015625,
      "learning_rate": 4.308784473157473e-05,
      "loss": 0.9805,
      "step": 192100
    },
    {
      "epoch": 0.6732976549713137,
      "grad_norm": 3.046875,
      "learning_rate": 4.3087195702911026e-05,
      "loss": 0.9981,
      "step": 192110
    },
    {
      "epoch": 0.6733327024782092,
      "grad_norm": 3.015625,
      "learning_rate": 4.308654667424733e-05,
      "loss": 0.9122,
      "step": 192120
    },
    {
      "epoch": 0.6733677499851048,
      "grad_norm": 3.0625,
      "learning_rate": 4.308589764558362e-05,
      "loss": 0.9277,
      "step": 192130
    },
    {
      "epoch": 0.6734027974920004,
      "grad_norm": 2.828125,
      "learning_rate": 4.308524861691992e-05,
      "loss": 0.9165,
      "step": 192140
    },
    {
      "epoch": 0.673437844998896,
      "grad_norm": 3.46875,
      "learning_rate": 4.308459958825622e-05,
      "loss": 0.9447,
      "step": 192150
    },
    {
      "epoch": 0.6734728925057916,
      "grad_norm": 3.0,
      "learning_rate": 4.308395055959252e-05,
      "loss": 0.9127,
      "step": 192160
    },
    {
      "epoch": 0.6735079400126872,
      "grad_norm": 3.328125,
      "learning_rate": 4.3083301530928814e-05,
      "loss": 0.9035,
      "step": 192170
    },
    {
      "epoch": 0.6735429875195827,
      "grad_norm": 3.203125,
      "learning_rate": 4.3082652502265115e-05,
      "loss": 1.034,
      "step": 192180
    },
    {
      "epoch": 0.6735780350264784,
      "grad_norm": 2.90625,
      "learning_rate": 4.308200347360141e-05,
      "loss": 0.8519,
      "step": 192190
    },
    {
      "epoch": 0.673613082533374,
      "grad_norm": 2.984375,
      "learning_rate": 4.308135444493771e-05,
      "loss": 0.8801,
      "step": 192200
    },
    {
      "epoch": 0.6736481300402696,
      "grad_norm": 3.734375,
      "learning_rate": 4.3080705416274006e-05,
      "loss": 0.8639,
      "step": 192210
    },
    {
      "epoch": 0.6736831775471652,
      "grad_norm": 2.84375,
      "learning_rate": 4.308005638761031e-05,
      "loss": 0.8998,
      "step": 192220
    },
    {
      "epoch": 0.6737182250540608,
      "grad_norm": 3.046875,
      "learning_rate": 4.30794073589466e-05,
      "loss": 0.8601,
      "step": 192230
    },
    {
      "epoch": 0.6737532725609564,
      "grad_norm": 2.796875,
      "learning_rate": 4.30787583302829e-05,
      "loss": 0.9091,
      "step": 192240
    },
    {
      "epoch": 0.6737883200678519,
      "grad_norm": 3.421875,
      "learning_rate": 4.30781093016192e-05,
      "loss": 0.9936,
      "step": 192250
    },
    {
      "epoch": 0.6738233675747476,
      "grad_norm": 2.8125,
      "learning_rate": 4.307746027295549e-05,
      "loss": 0.9103,
      "step": 192260
    },
    {
      "epoch": 0.6738584150816431,
      "grad_norm": 3.671875,
      "learning_rate": 4.3076811244291794e-05,
      "loss": 0.8744,
      "step": 192270
    },
    {
      "epoch": 0.6738934625885388,
      "grad_norm": 3.203125,
      "learning_rate": 4.307616221562809e-05,
      "loss": 0.9526,
      "step": 192280
    },
    {
      "epoch": 0.6739285100954343,
      "grad_norm": 3.375,
      "learning_rate": 4.307551318696439e-05,
      "loss": 0.8483,
      "step": 192290
    },
    {
      "epoch": 0.67396355760233,
      "grad_norm": 2.703125,
      "learning_rate": 4.3074864158300684e-05,
      "loss": 0.8795,
      "step": 192300
    },
    {
      "epoch": 0.6739986051092256,
      "grad_norm": 2.859375,
      "learning_rate": 4.3074215129636986e-05,
      "loss": 0.9281,
      "step": 192310
    },
    {
      "epoch": 0.6740336526161211,
      "grad_norm": 2.671875,
      "learning_rate": 4.307356610097328e-05,
      "loss": 0.9157,
      "step": 192320
    },
    {
      "epoch": 0.6740687001230168,
      "grad_norm": 2.890625,
      "learning_rate": 4.307291707230958e-05,
      "loss": 0.832,
      "step": 192330
    },
    {
      "epoch": 0.6741037476299123,
      "grad_norm": 3.09375,
      "learning_rate": 4.307226804364588e-05,
      "loss": 0.9611,
      "step": 192340
    },
    {
      "epoch": 0.674138795136808,
      "grad_norm": 3.4375,
      "learning_rate": 4.307161901498218e-05,
      "loss": 0.9523,
      "step": 192350
    },
    {
      "epoch": 0.6741738426437035,
      "grad_norm": 3.1875,
      "learning_rate": 4.307096998631848e-05,
      "loss": 0.9318,
      "step": 192360
    },
    {
      "epoch": 0.6742088901505991,
      "grad_norm": 2.796875,
      "learning_rate": 4.3070320957654774e-05,
      "loss": 0.8424,
      "step": 192370
    },
    {
      "epoch": 0.6742439376574947,
      "grad_norm": 2.71875,
      "learning_rate": 4.3069671928991075e-05,
      "loss": 0.8515,
      "step": 192380
    },
    {
      "epoch": 0.6742789851643903,
      "grad_norm": 2.84375,
      "learning_rate": 4.306902290032737e-05,
      "loss": 0.8889,
      "step": 192390
    },
    {
      "epoch": 0.674314032671286,
      "grad_norm": 2.984375,
      "learning_rate": 4.306837387166367e-05,
      "loss": 0.9252,
      "step": 192400
    },
    {
      "epoch": 0.6743490801781815,
      "grad_norm": 2.703125,
      "learning_rate": 4.3067724842999966e-05,
      "loss": 0.992,
      "step": 192410
    },
    {
      "epoch": 0.6743841276850772,
      "grad_norm": 2.96875,
      "learning_rate": 4.306707581433627e-05,
      "loss": 0.9261,
      "step": 192420
    },
    {
      "epoch": 0.6744191751919727,
      "grad_norm": 2.84375,
      "learning_rate": 4.306642678567256e-05,
      "loss": 0.9203,
      "step": 192430
    },
    {
      "epoch": 0.6744542226988683,
      "grad_norm": 2.78125,
      "learning_rate": 4.306577775700886e-05,
      "loss": 0.8644,
      "step": 192440
    },
    {
      "epoch": 0.6744892702057639,
      "grad_norm": 2.453125,
      "learning_rate": 4.306512872834516e-05,
      "loss": 0.8427,
      "step": 192450
    },
    {
      "epoch": 0.6745243177126595,
      "grad_norm": 2.609375,
      "learning_rate": 4.306447969968146e-05,
      "loss": 0.9363,
      "step": 192460
    },
    {
      "epoch": 0.6745593652195551,
      "grad_norm": 3.21875,
      "learning_rate": 4.306383067101776e-05,
      "loss": 0.9332,
      "step": 192470
    },
    {
      "epoch": 0.6745944127264507,
      "grad_norm": 3.40625,
      "learning_rate": 4.3063181642354055e-05,
      "loss": 0.8873,
      "step": 192480
    },
    {
      "epoch": 0.6746294602333464,
      "grad_norm": 2.6875,
      "learning_rate": 4.306253261369036e-05,
      "loss": 0.9086,
      "step": 192490
    },
    {
      "epoch": 0.6746645077402419,
      "grad_norm": 2.734375,
      "learning_rate": 4.306188358502665e-05,
      "loss": 0.9219,
      "step": 192500
    },
    {
      "epoch": 0.6746995552471375,
      "grad_norm": 3.09375,
      "learning_rate": 4.306123455636295e-05,
      "loss": 0.832,
      "step": 192510
    },
    {
      "epoch": 0.6747346027540331,
      "grad_norm": 2.90625,
      "learning_rate": 4.306058552769925e-05,
      "loss": 0.8878,
      "step": 192520
    },
    {
      "epoch": 0.6747696502609287,
      "grad_norm": 3.03125,
      "learning_rate": 4.305993649903555e-05,
      "loss": 0.914,
      "step": 192530
    },
    {
      "epoch": 0.6748046977678243,
      "grad_norm": 3.203125,
      "learning_rate": 4.305928747037184e-05,
      "loss": 0.9626,
      "step": 192540
    },
    {
      "epoch": 0.6748397452747199,
      "grad_norm": 3.3125,
      "learning_rate": 4.3058638441708145e-05,
      "loss": 0.933,
      "step": 192550
    },
    {
      "epoch": 0.6748747927816154,
      "grad_norm": 2.625,
      "learning_rate": 4.305798941304444e-05,
      "loss": 0.8509,
      "step": 192560
    },
    {
      "epoch": 0.6749098402885111,
      "grad_norm": 2.796875,
      "learning_rate": 4.305734038438074e-05,
      "loss": 0.9281,
      "step": 192570
    },
    {
      "epoch": 0.6749448877954066,
      "grad_norm": 3.0625,
      "learning_rate": 4.3056691355717035e-05,
      "loss": 0.9715,
      "step": 192580
    },
    {
      "epoch": 0.6749799353023023,
      "grad_norm": 2.6875,
      "learning_rate": 4.305604232705334e-05,
      "loss": 0.8678,
      "step": 192590
    },
    {
      "epoch": 0.6750149828091979,
      "grad_norm": 2.546875,
      "learning_rate": 4.305539329838963e-05,
      "loss": 0.8959,
      "step": 192600
    },
    {
      "epoch": 0.6750500303160935,
      "grad_norm": 2.71875,
      "learning_rate": 4.305474426972593e-05,
      "loss": 0.958,
      "step": 192610
    },
    {
      "epoch": 0.6750850778229891,
      "grad_norm": 3.15625,
      "learning_rate": 4.305409524106223e-05,
      "loss": 0.9521,
      "step": 192620
    },
    {
      "epoch": 0.6751201253298846,
      "grad_norm": 3.21875,
      "learning_rate": 4.305344621239852e-05,
      "loss": 0.9086,
      "step": 192630
    },
    {
      "epoch": 0.6751551728367803,
      "grad_norm": 2.875,
      "learning_rate": 4.305279718373482e-05,
      "loss": 0.841,
      "step": 192640
    },
    {
      "epoch": 0.6751902203436758,
      "grad_norm": 3.203125,
      "learning_rate": 4.305214815507112e-05,
      "loss": 0.9504,
      "step": 192650
    },
    {
      "epoch": 0.6752252678505715,
      "grad_norm": 2.953125,
      "learning_rate": 4.305149912640742e-05,
      "loss": 0.8953,
      "step": 192660
    },
    {
      "epoch": 0.675260315357467,
      "grad_norm": 2.734375,
      "learning_rate": 4.3050850097743714e-05,
      "loss": 0.866,
      "step": 192670
    },
    {
      "epoch": 0.6752953628643626,
      "grad_norm": 3.21875,
      "learning_rate": 4.3050201069080015e-05,
      "loss": 0.8828,
      "step": 192680
    },
    {
      "epoch": 0.6753304103712583,
      "grad_norm": 2.546875,
      "learning_rate": 4.304955204041632e-05,
      "loss": 0.8674,
      "step": 192690
    },
    {
      "epoch": 0.6753654578781538,
      "grad_norm": 2.9375,
      "learning_rate": 4.304890301175261e-05,
      "loss": 0.8843,
      "step": 192700
    },
    {
      "epoch": 0.6754005053850495,
      "grad_norm": 3.109375,
      "learning_rate": 4.304825398308891e-05,
      "loss": 0.9812,
      "step": 192710
    },
    {
      "epoch": 0.675435552891945,
      "grad_norm": 3.078125,
      "learning_rate": 4.304760495442521e-05,
      "loss": 0.8764,
      "step": 192720
    },
    {
      "epoch": 0.6754706003988407,
      "grad_norm": 2.65625,
      "learning_rate": 4.304695592576151e-05,
      "loss": 0.8631,
      "step": 192730
    },
    {
      "epoch": 0.6755056479057362,
      "grad_norm": 3.125,
      "learning_rate": 4.30463068970978e-05,
      "loss": 0.9197,
      "step": 192740
    },
    {
      "epoch": 0.6755406954126318,
      "grad_norm": 2.75,
      "learning_rate": 4.3045657868434105e-05,
      "loss": 0.9119,
      "step": 192750
    },
    {
      "epoch": 0.6755757429195274,
      "grad_norm": 2.953125,
      "learning_rate": 4.30450088397704e-05,
      "loss": 0.8212,
      "step": 192760
    },
    {
      "epoch": 0.675610790426423,
      "grad_norm": 2.78125,
      "learning_rate": 4.30443598111067e-05,
      "loss": 0.9034,
      "step": 192770
    },
    {
      "epoch": 0.6756458379333186,
      "grad_norm": 2.875,
      "learning_rate": 4.3043710782442995e-05,
      "loss": 0.8966,
      "step": 192780
    },
    {
      "epoch": 0.6756808854402142,
      "grad_norm": 2.9375,
      "learning_rate": 4.30430617537793e-05,
      "loss": 0.8169,
      "step": 192790
    },
    {
      "epoch": 0.6757159329471099,
      "grad_norm": 3.234375,
      "learning_rate": 4.304241272511559e-05,
      "loss": 0.926,
      "step": 192800
    },
    {
      "epoch": 0.6757509804540054,
      "grad_norm": 2.953125,
      "learning_rate": 4.304176369645189e-05,
      "loss": 0.9022,
      "step": 192810
    },
    {
      "epoch": 0.675786027960901,
      "grad_norm": 2.828125,
      "learning_rate": 4.304111466778819e-05,
      "loss": 0.792,
      "step": 192820
    },
    {
      "epoch": 0.6758210754677966,
      "grad_norm": 3.21875,
      "learning_rate": 4.304046563912449e-05,
      "loss": 0.8786,
      "step": 192830
    },
    {
      "epoch": 0.6758561229746922,
      "grad_norm": 2.796875,
      "learning_rate": 4.303981661046079e-05,
      "loss": 0.93,
      "step": 192840
    },
    {
      "epoch": 0.6758911704815878,
      "grad_norm": 3.21875,
      "learning_rate": 4.3039167581797085e-05,
      "loss": 0.8887,
      "step": 192850
    },
    {
      "epoch": 0.6759262179884834,
      "grad_norm": 2.84375,
      "learning_rate": 4.3038518553133386e-05,
      "loss": 0.9293,
      "step": 192860
    },
    {
      "epoch": 0.6759612654953789,
      "grad_norm": 2.828125,
      "learning_rate": 4.303786952446968e-05,
      "loss": 0.8945,
      "step": 192870
    },
    {
      "epoch": 0.6759963130022746,
      "grad_norm": 3.28125,
      "learning_rate": 4.303722049580598e-05,
      "loss": 0.8757,
      "step": 192880
    },
    {
      "epoch": 0.6760313605091702,
      "grad_norm": 2.953125,
      "learning_rate": 4.303657146714228e-05,
      "loss": 0.9185,
      "step": 192890
    },
    {
      "epoch": 0.6760664080160658,
      "grad_norm": 2.921875,
      "learning_rate": 4.303592243847858e-05,
      "loss": 0.8834,
      "step": 192900
    },
    {
      "epoch": 0.6761014555229614,
      "grad_norm": 2.921875,
      "learning_rate": 4.303527340981487e-05,
      "loss": 0.9015,
      "step": 192910
    },
    {
      "epoch": 0.676136503029857,
      "grad_norm": 2.78125,
      "learning_rate": 4.3034624381151174e-05,
      "loss": 0.9339,
      "step": 192920
    },
    {
      "epoch": 0.6761715505367526,
      "grad_norm": 2.921875,
      "learning_rate": 4.303397535248747e-05,
      "loss": 0.8929,
      "step": 192930
    },
    {
      "epoch": 0.6762065980436481,
      "grad_norm": 3.1875,
      "learning_rate": 4.303332632382377e-05,
      "loss": 0.9932,
      "step": 192940
    },
    {
      "epoch": 0.6762416455505438,
      "grad_norm": 2.96875,
      "learning_rate": 4.3032677295160065e-05,
      "loss": 0.8842,
      "step": 192950
    },
    {
      "epoch": 0.6762766930574393,
      "grad_norm": 2.921875,
      "learning_rate": 4.3032028266496366e-05,
      "loss": 0.8905,
      "step": 192960
    },
    {
      "epoch": 0.676311740564335,
      "grad_norm": 2.875,
      "learning_rate": 4.303137923783267e-05,
      "loss": 0.9269,
      "step": 192970
    },
    {
      "epoch": 0.6763467880712305,
      "grad_norm": 3.0,
      "learning_rate": 4.303073020916896e-05,
      "loss": 0.9466,
      "step": 192980
    },
    {
      "epoch": 0.6763818355781261,
      "grad_norm": 2.8125,
      "learning_rate": 4.3030081180505263e-05,
      "loss": 0.9183,
      "step": 192990
    },
    {
      "epoch": 0.6764168830850218,
      "grad_norm": 2.96875,
      "learning_rate": 4.302943215184155e-05,
      "loss": 0.8683,
      "step": 193000
    },
    {
      "epoch": 0.6764519305919173,
      "grad_norm": 2.75,
      "learning_rate": 4.302878312317785e-05,
      "loss": 0.8598,
      "step": 193010
    },
    {
      "epoch": 0.676486978098813,
      "grad_norm": 3.265625,
      "learning_rate": 4.302813409451415e-05,
      "loss": 0.8703,
      "step": 193020
    },
    {
      "epoch": 0.6765220256057085,
      "grad_norm": 3.234375,
      "learning_rate": 4.302748506585045e-05,
      "loss": 0.9923,
      "step": 193030
    },
    {
      "epoch": 0.6765570731126042,
      "grad_norm": 2.890625,
      "learning_rate": 4.302683603718674e-05,
      "loss": 0.9381,
      "step": 193040
    },
    {
      "epoch": 0.6765921206194997,
      "grad_norm": 3.125,
      "learning_rate": 4.3026187008523045e-05,
      "loss": 0.9811,
      "step": 193050
    },
    {
      "epoch": 0.6766271681263953,
      "grad_norm": 3.25,
      "learning_rate": 4.3025537979859346e-05,
      "loss": 0.9321,
      "step": 193060
    },
    {
      "epoch": 0.6766622156332909,
      "grad_norm": 2.8125,
      "learning_rate": 4.302488895119564e-05,
      "loss": 0.8295,
      "step": 193070
    },
    {
      "epoch": 0.6766972631401865,
      "grad_norm": 2.703125,
      "learning_rate": 4.302423992253194e-05,
      "loss": 0.8306,
      "step": 193080
    },
    {
      "epoch": 0.6767323106470822,
      "grad_norm": 2.859375,
      "learning_rate": 4.302359089386824e-05,
      "loss": 0.8927,
      "step": 193090
    },
    {
      "epoch": 0.6767673581539777,
      "grad_norm": 2.953125,
      "learning_rate": 4.302294186520454e-05,
      "loss": 0.9877,
      "step": 193100
    },
    {
      "epoch": 0.6768024056608734,
      "grad_norm": 3.0,
      "learning_rate": 4.302229283654083e-05,
      "loss": 0.8813,
      "step": 193110
    },
    {
      "epoch": 0.6768374531677689,
      "grad_norm": 2.578125,
      "learning_rate": 4.3021643807877134e-05,
      "loss": 0.9446,
      "step": 193120
    },
    {
      "epoch": 0.6768725006746645,
      "grad_norm": 3.1875,
      "learning_rate": 4.302099477921343e-05,
      "loss": 0.896,
      "step": 193130
    },
    {
      "epoch": 0.6769075481815601,
      "grad_norm": 2.859375,
      "learning_rate": 4.302034575054973e-05,
      "loss": 0.9177,
      "step": 193140
    },
    {
      "epoch": 0.6769425956884557,
      "grad_norm": 2.75,
      "learning_rate": 4.3019696721886025e-05,
      "loss": 0.9176,
      "step": 193150
    },
    {
      "epoch": 0.6769776431953513,
      "grad_norm": 2.8125,
      "learning_rate": 4.3019047693222326e-05,
      "loss": 0.878,
      "step": 193160
    },
    {
      "epoch": 0.6770126907022469,
      "grad_norm": 2.703125,
      "learning_rate": 4.301839866455862e-05,
      "loss": 0.8756,
      "step": 193170
    },
    {
      "epoch": 0.6770477382091425,
      "grad_norm": 2.5625,
      "learning_rate": 4.301774963589492e-05,
      "loss": 0.8632,
      "step": 193180
    },
    {
      "epoch": 0.6770827857160381,
      "grad_norm": 3.46875,
      "learning_rate": 4.301710060723122e-05,
      "loss": 0.9421,
      "step": 193190
    },
    {
      "epoch": 0.6771178332229337,
      "grad_norm": 3.03125,
      "learning_rate": 4.301645157856752e-05,
      "loss": 0.9208,
      "step": 193200
    },
    {
      "epoch": 0.6771528807298293,
      "grad_norm": 3.171875,
      "learning_rate": 4.301580254990382e-05,
      "loss": 0.8977,
      "step": 193210
    },
    {
      "epoch": 0.6771879282367249,
      "grad_norm": 2.796875,
      "learning_rate": 4.3015153521240114e-05,
      "loss": 0.8422,
      "step": 193220
    },
    {
      "epoch": 0.6772229757436204,
      "grad_norm": 3.0625,
      "learning_rate": 4.3014504492576415e-05,
      "loss": 0.8709,
      "step": 193230
    },
    {
      "epoch": 0.6772580232505161,
      "grad_norm": 2.765625,
      "learning_rate": 4.301385546391271e-05,
      "loss": 0.8811,
      "step": 193240
    },
    {
      "epoch": 0.6772930707574116,
      "grad_norm": 2.984375,
      "learning_rate": 4.301320643524901e-05,
      "loss": 0.8977,
      "step": 193250
    },
    {
      "epoch": 0.6773281182643073,
      "grad_norm": 3.109375,
      "learning_rate": 4.3012557406585306e-05,
      "loss": 0.8841,
      "step": 193260
    },
    {
      "epoch": 0.6773631657712028,
      "grad_norm": 3.234375,
      "learning_rate": 4.301190837792161e-05,
      "loss": 0.9436,
      "step": 193270
    },
    {
      "epoch": 0.6773982132780985,
      "grad_norm": 2.53125,
      "learning_rate": 4.30112593492579e-05,
      "loss": 0.9315,
      "step": 193280
    },
    {
      "epoch": 0.6774332607849941,
      "grad_norm": 2.625,
      "learning_rate": 4.3010610320594203e-05,
      "loss": 0.9135,
      "step": 193290
    },
    {
      "epoch": 0.6774683082918896,
      "grad_norm": 3.03125,
      "learning_rate": 4.30099612919305e-05,
      "loss": 0.874,
      "step": 193300
    },
    {
      "epoch": 0.6775033557987853,
      "grad_norm": 2.71875,
      "learning_rate": 4.30093122632668e-05,
      "loss": 0.884,
      "step": 193310
    },
    {
      "epoch": 0.6775384033056808,
      "grad_norm": 3.53125,
      "learning_rate": 4.3008663234603094e-05,
      "loss": 0.9113,
      "step": 193320
    },
    {
      "epoch": 0.6775734508125765,
      "grad_norm": 3.34375,
      "learning_rate": 4.3008014205939395e-05,
      "loss": 0.9489,
      "step": 193330
    },
    {
      "epoch": 0.677608498319472,
      "grad_norm": 3.15625,
      "learning_rate": 4.30073651772757e-05,
      "loss": 0.899,
      "step": 193340
    },
    {
      "epoch": 0.6776435458263677,
      "grad_norm": 2.765625,
      "learning_rate": 4.300671614861199e-05,
      "loss": 0.9078,
      "step": 193350
    },
    {
      "epoch": 0.6776785933332632,
      "grad_norm": 3.296875,
      "learning_rate": 4.300606711994829e-05,
      "loss": 0.8954,
      "step": 193360
    },
    {
      "epoch": 0.6777136408401588,
      "grad_norm": 3.125,
      "learning_rate": 4.300541809128459e-05,
      "loss": 0.8329,
      "step": 193370
    },
    {
      "epoch": 0.6777486883470545,
      "grad_norm": 2.671875,
      "learning_rate": 4.300476906262088e-05,
      "loss": 0.8662,
      "step": 193380
    },
    {
      "epoch": 0.67778373585395,
      "grad_norm": 2.984375,
      "learning_rate": 4.300412003395718e-05,
      "loss": 0.9541,
      "step": 193390
    },
    {
      "epoch": 0.6778187833608457,
      "grad_norm": 2.8125,
      "learning_rate": 4.300347100529348e-05,
      "loss": 0.8224,
      "step": 193400
    },
    {
      "epoch": 0.6778538308677412,
      "grad_norm": 2.734375,
      "learning_rate": 4.300282197662977e-05,
      "loss": 0.9407,
      "step": 193410
    },
    {
      "epoch": 0.6778888783746368,
      "grad_norm": 2.984375,
      "learning_rate": 4.3002172947966074e-05,
      "loss": 0.8964,
      "step": 193420
    },
    {
      "epoch": 0.6779239258815324,
      "grad_norm": 2.9375,
      "learning_rate": 4.3001523919302375e-05,
      "loss": 0.9546,
      "step": 193430
    },
    {
      "epoch": 0.677958973388428,
      "grad_norm": 2.921875,
      "learning_rate": 4.300087489063867e-05,
      "loss": 0.8567,
      "step": 193440
    },
    {
      "epoch": 0.6779940208953236,
      "grad_norm": 2.640625,
      "learning_rate": 4.300022586197497e-05,
      "loss": 0.9332,
      "step": 193450
    },
    {
      "epoch": 0.6780290684022192,
      "grad_norm": 3.125,
      "learning_rate": 4.2999576833311266e-05,
      "loss": 0.9206,
      "step": 193460
    },
    {
      "epoch": 0.6780641159091148,
      "grad_norm": 3.1875,
      "learning_rate": 4.299892780464757e-05,
      "loss": 1.0112,
      "step": 193470
    },
    {
      "epoch": 0.6780991634160104,
      "grad_norm": 3.15625,
      "learning_rate": 4.299827877598386e-05,
      "loss": 0.9708,
      "step": 193480
    },
    {
      "epoch": 0.678134210922906,
      "grad_norm": 2.703125,
      "learning_rate": 4.2997629747320163e-05,
      "loss": 0.8718,
      "step": 193490
    },
    {
      "epoch": 0.6781692584298016,
      "grad_norm": 3.515625,
      "learning_rate": 4.299698071865646e-05,
      "loss": 1.0017,
      "step": 193500
    },
    {
      "epoch": 0.6782043059366972,
      "grad_norm": 3.1875,
      "learning_rate": 4.299633168999276e-05,
      "loss": 0.9266,
      "step": 193510
    },
    {
      "epoch": 0.6782393534435928,
      "grad_norm": 3.40625,
      "learning_rate": 4.2995682661329054e-05,
      "loss": 0.9574,
      "step": 193520
    },
    {
      "epoch": 0.6782744009504884,
      "grad_norm": 2.96875,
      "learning_rate": 4.2995033632665355e-05,
      "loss": 0.9141,
      "step": 193530
    },
    {
      "epoch": 0.678309448457384,
      "grad_norm": 2.65625,
      "learning_rate": 4.299438460400165e-05,
      "loss": 0.8824,
      "step": 193540
    },
    {
      "epoch": 0.6783444959642796,
      "grad_norm": 2.890625,
      "learning_rate": 4.299373557533795e-05,
      "loss": 0.867,
      "step": 193550
    },
    {
      "epoch": 0.6783795434711751,
      "grad_norm": 3.03125,
      "learning_rate": 4.2993086546674246e-05,
      "loss": 0.768,
      "step": 193560
    },
    {
      "epoch": 0.6784145909780708,
      "grad_norm": 3.296875,
      "learning_rate": 4.299243751801055e-05,
      "loss": 0.8996,
      "step": 193570
    },
    {
      "epoch": 0.6784496384849664,
      "grad_norm": 3.046875,
      "learning_rate": 4.299178848934685e-05,
      "loss": 0.8253,
      "step": 193580
    },
    {
      "epoch": 0.678484685991862,
      "grad_norm": 3.125,
      "learning_rate": 4.2991139460683143e-05,
      "loss": 0.9214,
      "step": 193590
    },
    {
      "epoch": 0.6785197334987576,
      "grad_norm": 3.140625,
      "learning_rate": 4.2990490432019445e-05,
      "loss": 0.868,
      "step": 193600
    },
    {
      "epoch": 0.6785547810056531,
      "grad_norm": 3.0,
      "learning_rate": 4.298984140335574e-05,
      "loss": 0.9554,
      "step": 193610
    },
    {
      "epoch": 0.6785898285125488,
      "grad_norm": 2.6875,
      "learning_rate": 4.298919237469204e-05,
      "loss": 0.8357,
      "step": 193620
    },
    {
      "epoch": 0.6786248760194443,
      "grad_norm": 3.0,
      "learning_rate": 4.2988543346028335e-05,
      "loss": 0.9279,
      "step": 193630
    },
    {
      "epoch": 0.67865992352634,
      "grad_norm": 2.890625,
      "learning_rate": 4.298789431736464e-05,
      "loss": 0.8766,
      "step": 193640
    },
    {
      "epoch": 0.6786949710332355,
      "grad_norm": 2.65625,
      "learning_rate": 4.298724528870093e-05,
      "loss": 0.9129,
      "step": 193650
    },
    {
      "epoch": 0.6787300185401312,
      "grad_norm": 3.140625,
      "learning_rate": 4.298659626003723e-05,
      "loss": 0.9228,
      "step": 193660
    },
    {
      "epoch": 0.6787650660470268,
      "grad_norm": 2.703125,
      "learning_rate": 4.298594723137353e-05,
      "loss": 0.8914,
      "step": 193670
    },
    {
      "epoch": 0.6788001135539223,
      "grad_norm": 3.015625,
      "learning_rate": 4.298529820270983e-05,
      "loss": 0.9552,
      "step": 193680
    },
    {
      "epoch": 0.678835161060818,
      "grad_norm": 2.953125,
      "learning_rate": 4.2984649174046123e-05,
      "loss": 0.9191,
      "step": 193690
    },
    {
      "epoch": 0.6788702085677135,
      "grad_norm": 3.171875,
      "learning_rate": 4.2984000145382425e-05,
      "loss": 0.938,
      "step": 193700
    },
    {
      "epoch": 0.6789052560746092,
      "grad_norm": 3.28125,
      "learning_rate": 4.2983351116718726e-05,
      "loss": 0.894,
      "step": 193710
    },
    {
      "epoch": 0.6789403035815047,
      "grad_norm": 2.78125,
      "learning_rate": 4.298270208805502e-05,
      "loss": 0.9101,
      "step": 193720
    },
    {
      "epoch": 0.6789753510884003,
      "grad_norm": 3.171875,
      "learning_rate": 4.298205305939132e-05,
      "loss": 0.9392,
      "step": 193730
    },
    {
      "epoch": 0.6790103985952959,
      "grad_norm": 3.3125,
      "learning_rate": 4.298140403072762e-05,
      "loss": 0.8838,
      "step": 193740
    },
    {
      "epoch": 0.6790454461021915,
      "grad_norm": 2.921875,
      "learning_rate": 4.298075500206391e-05,
      "loss": 0.947,
      "step": 193750
    },
    {
      "epoch": 0.6790804936090871,
      "grad_norm": 2.9375,
      "learning_rate": 4.2980105973400206e-05,
      "loss": 0.9506,
      "step": 193760
    },
    {
      "epoch": 0.6791155411159827,
      "grad_norm": 2.46875,
      "learning_rate": 4.297945694473651e-05,
      "loss": 0.8804,
      "step": 193770
    },
    {
      "epoch": 0.6791505886228784,
      "grad_norm": 2.984375,
      "learning_rate": 4.29788079160728e-05,
      "loss": 0.8351,
      "step": 193780
    },
    {
      "epoch": 0.6791856361297739,
      "grad_norm": 2.8125,
      "learning_rate": 4.2978158887409103e-05,
      "loss": 0.9472,
      "step": 193790
    },
    {
      "epoch": 0.6792206836366695,
      "grad_norm": 2.625,
      "learning_rate": 4.2977509858745405e-05,
      "loss": 0.8754,
      "step": 193800
    },
    {
      "epoch": 0.6792557311435651,
      "grad_norm": 3.28125,
      "learning_rate": 4.29768608300817e-05,
      "loss": 0.9573,
      "step": 193810
    },
    {
      "epoch": 0.6792907786504607,
      "grad_norm": 3.15625,
      "learning_rate": 4.2976211801418e-05,
      "loss": 0.9129,
      "step": 193820
    },
    {
      "epoch": 0.6793258261573563,
      "grad_norm": 3.609375,
      "learning_rate": 4.2975562772754295e-05,
      "loss": 0.8151,
      "step": 193830
    },
    {
      "epoch": 0.6793608736642519,
      "grad_norm": 2.859375,
      "learning_rate": 4.29749137440906e-05,
      "loss": 0.9332,
      "step": 193840
    },
    {
      "epoch": 0.6793959211711474,
      "grad_norm": 3.03125,
      "learning_rate": 4.297426471542689e-05,
      "loss": 0.8763,
      "step": 193850
    },
    {
      "epoch": 0.6794309686780431,
      "grad_norm": 3.015625,
      "learning_rate": 4.297361568676319e-05,
      "loss": 0.8845,
      "step": 193860
    },
    {
      "epoch": 0.6794660161849387,
      "grad_norm": 2.78125,
      "learning_rate": 4.297296665809949e-05,
      "loss": 0.9438,
      "step": 193870
    },
    {
      "epoch": 0.6795010636918343,
      "grad_norm": 3.171875,
      "learning_rate": 4.297231762943579e-05,
      "loss": 0.9151,
      "step": 193880
    },
    {
      "epoch": 0.6795361111987299,
      "grad_norm": 2.953125,
      "learning_rate": 4.2971668600772083e-05,
      "loss": 0.8706,
      "step": 193890
    },
    {
      "epoch": 0.6795711587056255,
      "grad_norm": 2.953125,
      "learning_rate": 4.2971019572108385e-05,
      "loss": 1.0235,
      "step": 193900
    },
    {
      "epoch": 0.6796062062125211,
      "grad_norm": 2.796875,
      "learning_rate": 4.297037054344468e-05,
      "loss": 0.888,
      "step": 193910
    },
    {
      "epoch": 0.6796412537194166,
      "grad_norm": 2.796875,
      "learning_rate": 4.296972151478098e-05,
      "loss": 0.9036,
      "step": 193920
    },
    {
      "epoch": 0.6796763012263123,
      "grad_norm": 3.0625,
      "learning_rate": 4.296907248611728e-05,
      "loss": 0.8796,
      "step": 193930
    },
    {
      "epoch": 0.6797113487332078,
      "grad_norm": 3.96875,
      "learning_rate": 4.296842345745358e-05,
      "loss": 0.819,
      "step": 193940
    },
    {
      "epoch": 0.6797463962401035,
      "grad_norm": 3.203125,
      "learning_rate": 4.296777442878988e-05,
      "loss": 0.8513,
      "step": 193950
    },
    {
      "epoch": 0.679781443746999,
      "grad_norm": 3.15625,
      "learning_rate": 4.296712540012617e-05,
      "loss": 0.9507,
      "step": 193960
    },
    {
      "epoch": 0.6798164912538947,
      "grad_norm": 2.46875,
      "learning_rate": 4.2966476371462474e-05,
      "loss": 0.8037,
      "step": 193970
    },
    {
      "epoch": 0.6798515387607903,
      "grad_norm": 2.96875,
      "learning_rate": 4.296582734279877e-05,
      "loss": 0.903,
      "step": 193980
    },
    {
      "epoch": 0.6798865862676858,
      "grad_norm": 2.34375,
      "learning_rate": 4.296517831413507e-05,
      "loss": 0.8711,
      "step": 193990
    },
    {
      "epoch": 0.6799216337745815,
      "grad_norm": 2.96875,
      "learning_rate": 4.2964529285471365e-05,
      "loss": 0.8948,
      "step": 194000
    },
    {
      "epoch": 0.679956681281477,
      "grad_norm": 3.0625,
      "learning_rate": 4.2963880256807666e-05,
      "loss": 0.8184,
      "step": 194010
    },
    {
      "epoch": 0.6799917287883727,
      "grad_norm": 3.015625,
      "learning_rate": 4.296323122814396e-05,
      "loss": 0.9052,
      "step": 194020
    },
    {
      "epoch": 0.6800267762952682,
      "grad_norm": 3.1875,
      "learning_rate": 4.296258219948026e-05,
      "loss": 0.8618,
      "step": 194030
    },
    {
      "epoch": 0.6800618238021638,
      "grad_norm": 2.953125,
      "learning_rate": 4.296193317081656e-05,
      "loss": 0.8776,
      "step": 194040
    },
    {
      "epoch": 0.6800968713090594,
      "grad_norm": 2.96875,
      "learning_rate": 4.296128414215286e-05,
      "loss": 0.9706,
      "step": 194050
    },
    {
      "epoch": 0.680131918815955,
      "grad_norm": 3.03125,
      "learning_rate": 4.296063511348915e-05,
      "loss": 0.8813,
      "step": 194060
    },
    {
      "epoch": 0.6801669663228507,
      "grad_norm": 2.625,
      "learning_rate": 4.2959986084825454e-05,
      "loss": 0.8702,
      "step": 194070
    },
    {
      "epoch": 0.6802020138297462,
      "grad_norm": 2.921875,
      "learning_rate": 4.2959337056161756e-05,
      "loss": 0.9731,
      "step": 194080
    },
    {
      "epoch": 0.6802370613366419,
      "grad_norm": 3.078125,
      "learning_rate": 4.295868802749805e-05,
      "loss": 0.9982,
      "step": 194090
    },
    {
      "epoch": 0.6802721088435374,
      "grad_norm": 2.515625,
      "learning_rate": 4.295803899883435e-05,
      "loss": 0.9262,
      "step": 194100
    },
    {
      "epoch": 0.680307156350433,
      "grad_norm": 2.671875,
      "learning_rate": 4.2957389970170646e-05,
      "loss": 0.9258,
      "step": 194110
    },
    {
      "epoch": 0.6803422038573286,
      "grad_norm": 2.9375,
      "learning_rate": 4.295674094150695e-05,
      "loss": 0.8853,
      "step": 194120
    },
    {
      "epoch": 0.6803772513642242,
      "grad_norm": 3.1875,
      "learning_rate": 4.2956091912843235e-05,
      "loss": 0.8062,
      "step": 194130
    },
    {
      "epoch": 0.6804122988711198,
      "grad_norm": 2.4375,
      "learning_rate": 4.295544288417954e-05,
      "loss": 0.8315,
      "step": 194140
    },
    {
      "epoch": 0.6804473463780154,
      "grad_norm": 3.1875,
      "learning_rate": 4.295479385551583e-05,
      "loss": 0.8849,
      "step": 194150
    },
    {
      "epoch": 0.680482393884911,
      "grad_norm": 2.765625,
      "learning_rate": 4.295414482685213e-05,
      "loss": 0.8756,
      "step": 194160
    },
    {
      "epoch": 0.6805174413918066,
      "grad_norm": 3.1875,
      "learning_rate": 4.2953495798188434e-05,
      "loss": 0.8451,
      "step": 194170
    },
    {
      "epoch": 0.6805524888987022,
      "grad_norm": 2.796875,
      "learning_rate": 4.295284676952473e-05,
      "loss": 0.9134,
      "step": 194180
    },
    {
      "epoch": 0.6805875364055978,
      "grad_norm": 2.875,
      "learning_rate": 4.295219774086103e-05,
      "loss": 0.8769,
      "step": 194190
    },
    {
      "epoch": 0.6806225839124934,
      "grad_norm": 2.65625,
      "learning_rate": 4.2951548712197325e-05,
      "loss": 0.891,
      "step": 194200
    },
    {
      "epoch": 0.680657631419389,
      "grad_norm": 2.953125,
      "learning_rate": 4.2950899683533626e-05,
      "loss": 0.8807,
      "step": 194210
    },
    {
      "epoch": 0.6806926789262846,
      "grad_norm": 2.84375,
      "learning_rate": 4.295025065486992e-05,
      "loss": 0.8853,
      "step": 194220
    },
    {
      "epoch": 0.6807277264331801,
      "grad_norm": 2.640625,
      "learning_rate": 4.294960162620622e-05,
      "loss": 0.8945,
      "step": 194230
    },
    {
      "epoch": 0.6807627739400758,
      "grad_norm": 3.09375,
      "learning_rate": 4.294895259754252e-05,
      "loss": 0.9669,
      "step": 194240
    },
    {
      "epoch": 0.6807978214469713,
      "grad_norm": 2.96875,
      "learning_rate": 4.294830356887882e-05,
      "loss": 0.9416,
      "step": 194250
    },
    {
      "epoch": 0.680832868953867,
      "grad_norm": 3.015625,
      "learning_rate": 4.294765454021511e-05,
      "loss": 0.9634,
      "step": 194260
    },
    {
      "epoch": 0.6808679164607626,
      "grad_norm": 2.828125,
      "learning_rate": 4.2947005511551414e-05,
      "loss": 0.8979,
      "step": 194270
    },
    {
      "epoch": 0.6809029639676581,
      "grad_norm": 3.359375,
      "learning_rate": 4.294635648288771e-05,
      "loss": 0.9428,
      "step": 194280
    },
    {
      "epoch": 0.6809380114745538,
      "grad_norm": 2.515625,
      "learning_rate": 4.294570745422401e-05,
      "loss": 0.8424,
      "step": 194290
    },
    {
      "epoch": 0.6809730589814493,
      "grad_norm": 2.875,
      "learning_rate": 4.294505842556031e-05,
      "loss": 0.9236,
      "step": 194300
    },
    {
      "epoch": 0.681008106488345,
      "grad_norm": 3.375,
      "learning_rate": 4.2944409396896606e-05,
      "loss": 0.8628,
      "step": 194310
    },
    {
      "epoch": 0.6810431539952405,
      "grad_norm": 2.609375,
      "learning_rate": 4.294376036823291e-05,
      "loss": 0.8453,
      "step": 194320
    },
    {
      "epoch": 0.6810782015021362,
      "grad_norm": 2.71875,
      "learning_rate": 4.29431113395692e-05,
      "loss": 0.8327,
      "step": 194330
    },
    {
      "epoch": 0.6811132490090317,
      "grad_norm": 3.5625,
      "learning_rate": 4.2942462310905504e-05,
      "loss": 0.9213,
      "step": 194340
    },
    {
      "epoch": 0.6811482965159273,
      "grad_norm": 3.640625,
      "learning_rate": 4.29418132822418e-05,
      "loss": 0.9361,
      "step": 194350
    },
    {
      "epoch": 0.681183344022823,
      "grad_norm": 2.78125,
      "learning_rate": 4.29411642535781e-05,
      "loss": 0.9647,
      "step": 194360
    },
    {
      "epoch": 0.6812183915297185,
      "grad_norm": 2.859375,
      "learning_rate": 4.2940515224914394e-05,
      "loss": 0.874,
      "step": 194370
    },
    {
      "epoch": 0.6812534390366142,
      "grad_norm": 3.0625,
      "learning_rate": 4.2939866196250696e-05,
      "loss": 0.9104,
      "step": 194380
    },
    {
      "epoch": 0.6812884865435097,
      "grad_norm": 2.671875,
      "learning_rate": 4.293921716758699e-05,
      "loss": 0.8853,
      "step": 194390
    },
    {
      "epoch": 0.6813235340504054,
      "grad_norm": 2.9375,
      "learning_rate": 4.293856813892329e-05,
      "loss": 0.9362,
      "step": 194400
    },
    {
      "epoch": 0.6813585815573009,
      "grad_norm": 3.046875,
      "learning_rate": 4.2937919110259586e-05,
      "loss": 0.9205,
      "step": 194410
    },
    {
      "epoch": 0.6813936290641965,
      "grad_norm": 3.0,
      "learning_rate": 4.293727008159589e-05,
      "loss": 0.8808,
      "step": 194420
    },
    {
      "epoch": 0.6814286765710921,
      "grad_norm": 3.03125,
      "learning_rate": 4.293662105293218e-05,
      "loss": 0.8789,
      "step": 194430
    },
    {
      "epoch": 0.6814637240779877,
      "grad_norm": 3.390625,
      "learning_rate": 4.2935972024268484e-05,
      "loss": 0.8632,
      "step": 194440
    },
    {
      "epoch": 0.6814987715848833,
      "grad_norm": 2.9375,
      "learning_rate": 4.2935322995604785e-05,
      "loss": 0.9124,
      "step": 194450
    },
    {
      "epoch": 0.6815338190917789,
      "grad_norm": 2.875,
      "learning_rate": 4.293467396694108e-05,
      "loss": 0.9319,
      "step": 194460
    },
    {
      "epoch": 0.6815688665986745,
      "grad_norm": 3.203125,
      "learning_rate": 4.293402493827738e-05,
      "loss": 0.9094,
      "step": 194470
    },
    {
      "epoch": 0.6816039141055701,
      "grad_norm": 3.015625,
      "learning_rate": 4.2933375909613676e-05,
      "loss": 0.8051,
      "step": 194480
    },
    {
      "epoch": 0.6816389616124657,
      "grad_norm": 2.78125,
      "learning_rate": 4.293272688094998e-05,
      "loss": 0.8731,
      "step": 194490
    },
    {
      "epoch": 0.6816740091193613,
      "grad_norm": 2.703125,
      "learning_rate": 4.2932077852286265e-05,
      "loss": 1.0127,
      "step": 194500
    },
    {
      "epoch": 0.6817090566262569,
      "grad_norm": 3.53125,
      "learning_rate": 4.2931428823622566e-05,
      "loss": 0.919,
      "step": 194510
    },
    {
      "epoch": 0.6817441041331525,
      "grad_norm": 2.59375,
      "learning_rate": 4.293077979495886e-05,
      "loss": 0.849,
      "step": 194520
    },
    {
      "epoch": 0.6817791516400481,
      "grad_norm": 2.953125,
      "learning_rate": 4.293013076629516e-05,
      "loss": 0.8943,
      "step": 194530
    },
    {
      "epoch": 0.6818141991469436,
      "grad_norm": 3.015625,
      "learning_rate": 4.2929481737631464e-05,
      "loss": 0.863,
      "step": 194540
    },
    {
      "epoch": 0.6818492466538393,
      "grad_norm": 2.578125,
      "learning_rate": 4.292883270896776e-05,
      "loss": 0.902,
      "step": 194550
    },
    {
      "epoch": 0.6818842941607349,
      "grad_norm": 2.78125,
      "learning_rate": 4.292818368030406e-05,
      "loss": 0.882,
      "step": 194560
    },
    {
      "epoch": 0.6819193416676305,
      "grad_norm": 3.21875,
      "learning_rate": 4.2927534651640354e-05,
      "loss": 0.8327,
      "step": 194570
    },
    {
      "epoch": 0.6819543891745261,
      "grad_norm": 2.921875,
      "learning_rate": 4.2926885622976656e-05,
      "loss": 0.8866,
      "step": 194580
    },
    {
      "epoch": 0.6819894366814216,
      "grad_norm": 3.0625,
      "learning_rate": 4.292623659431295e-05,
      "loss": 0.8612,
      "step": 194590
    },
    {
      "epoch": 0.6820244841883173,
      "grad_norm": 3.21875,
      "learning_rate": 4.292558756564925e-05,
      "loss": 0.9525,
      "step": 194600
    },
    {
      "epoch": 0.6820595316952128,
      "grad_norm": 3.359375,
      "learning_rate": 4.2924938536985546e-05,
      "loss": 0.9567,
      "step": 194610
    },
    {
      "epoch": 0.6820945792021085,
      "grad_norm": 2.859375,
      "learning_rate": 4.292428950832185e-05,
      "loss": 0.8521,
      "step": 194620
    },
    {
      "epoch": 0.682129626709004,
      "grad_norm": 2.859375,
      "learning_rate": 4.292364047965814e-05,
      "loss": 0.9008,
      "step": 194630
    },
    {
      "epoch": 0.6821646742158997,
      "grad_norm": 3.0625,
      "learning_rate": 4.2922991450994444e-05,
      "loss": 0.8643,
      "step": 194640
    },
    {
      "epoch": 0.6821997217227952,
      "grad_norm": 2.890625,
      "learning_rate": 4.292234242233074e-05,
      "loss": 0.9755,
      "step": 194650
    },
    {
      "epoch": 0.6822347692296908,
      "grad_norm": 2.84375,
      "learning_rate": 4.292169339366704e-05,
      "loss": 0.9416,
      "step": 194660
    },
    {
      "epoch": 0.6822698167365865,
      "grad_norm": 2.828125,
      "learning_rate": 4.292104436500334e-05,
      "loss": 0.9206,
      "step": 194670
    },
    {
      "epoch": 0.682304864243482,
      "grad_norm": 2.78125,
      "learning_rate": 4.2920395336339636e-05,
      "loss": 0.848,
      "step": 194680
    },
    {
      "epoch": 0.6823399117503777,
      "grad_norm": 3.1875,
      "learning_rate": 4.291974630767594e-05,
      "loss": 1.0528,
      "step": 194690
    },
    {
      "epoch": 0.6823749592572732,
      "grad_norm": 3.09375,
      "learning_rate": 4.291909727901223e-05,
      "loss": 0.936,
      "step": 194700
    },
    {
      "epoch": 0.6824100067641689,
      "grad_norm": 3.328125,
      "learning_rate": 4.291844825034853e-05,
      "loss": 0.9042,
      "step": 194710
    },
    {
      "epoch": 0.6824450542710644,
      "grad_norm": 2.78125,
      "learning_rate": 4.291779922168483e-05,
      "loss": 0.9135,
      "step": 194720
    },
    {
      "epoch": 0.68248010177796,
      "grad_norm": 2.546875,
      "learning_rate": 4.291715019302113e-05,
      "loss": 0.8279,
      "step": 194730
    },
    {
      "epoch": 0.6825151492848556,
      "grad_norm": 3.09375,
      "learning_rate": 4.2916501164357424e-05,
      "loss": 0.8202,
      "step": 194740
    },
    {
      "epoch": 0.6825501967917512,
      "grad_norm": 3.296875,
      "learning_rate": 4.2915852135693725e-05,
      "loss": 0.8468,
      "step": 194750
    },
    {
      "epoch": 0.6825852442986469,
      "grad_norm": 2.859375,
      "learning_rate": 4.291520310703002e-05,
      "loss": 0.9185,
      "step": 194760
    },
    {
      "epoch": 0.6826202918055424,
      "grad_norm": 3.125,
      "learning_rate": 4.291455407836632e-05,
      "loss": 0.918,
      "step": 194770
    },
    {
      "epoch": 0.682655339312438,
      "grad_norm": 2.71875,
      "learning_rate": 4.2913905049702616e-05,
      "loss": 1.0327,
      "step": 194780
    },
    {
      "epoch": 0.6826903868193336,
      "grad_norm": 3.1875,
      "learning_rate": 4.291325602103892e-05,
      "loss": 0.8696,
      "step": 194790
    },
    {
      "epoch": 0.6827254343262292,
      "grad_norm": 2.703125,
      "learning_rate": 4.291260699237522e-05,
      "loss": 0.8447,
      "step": 194800
    },
    {
      "epoch": 0.6827604818331248,
      "grad_norm": 2.75,
      "learning_rate": 4.291195796371151e-05,
      "loss": 0.9271,
      "step": 194810
    },
    {
      "epoch": 0.6827955293400204,
      "grad_norm": 3.671875,
      "learning_rate": 4.2911308935047814e-05,
      "loss": 0.8989,
      "step": 194820
    },
    {
      "epoch": 0.682830576846916,
      "grad_norm": 3.0,
      "learning_rate": 4.291065990638411e-05,
      "loss": 0.883,
      "step": 194830
    },
    {
      "epoch": 0.6828656243538116,
      "grad_norm": 3.09375,
      "learning_rate": 4.291001087772041e-05,
      "loss": 0.9493,
      "step": 194840
    },
    {
      "epoch": 0.6829006718607072,
      "grad_norm": 2.921875,
      "learning_rate": 4.2909361849056705e-05,
      "loss": 0.9614,
      "step": 194850
    },
    {
      "epoch": 0.6829357193676028,
      "grad_norm": 2.40625,
      "learning_rate": 4.2908712820393006e-05,
      "loss": 0.8063,
      "step": 194860
    },
    {
      "epoch": 0.6829707668744984,
      "grad_norm": 2.453125,
      "learning_rate": 4.29080637917293e-05,
      "loss": 0.8161,
      "step": 194870
    },
    {
      "epoch": 0.683005814381394,
      "grad_norm": 2.90625,
      "learning_rate": 4.2907414763065596e-05,
      "loss": 0.8967,
      "step": 194880
    },
    {
      "epoch": 0.6830408618882896,
      "grad_norm": 3.140625,
      "learning_rate": 4.29067657344019e-05,
      "loss": 0.8416,
      "step": 194890
    },
    {
      "epoch": 0.6830759093951851,
      "grad_norm": 2.65625,
      "learning_rate": 4.290611670573819e-05,
      "loss": 0.9348,
      "step": 194900
    },
    {
      "epoch": 0.6831109569020808,
      "grad_norm": 2.796875,
      "learning_rate": 4.290546767707449e-05,
      "loss": 0.7882,
      "step": 194910
    },
    {
      "epoch": 0.6831460044089763,
      "grad_norm": 2.8125,
      "learning_rate": 4.290481864841079e-05,
      "loss": 0.9383,
      "step": 194920
    },
    {
      "epoch": 0.683181051915872,
      "grad_norm": 2.640625,
      "learning_rate": 4.290416961974709e-05,
      "loss": 0.8182,
      "step": 194930
    },
    {
      "epoch": 0.6832160994227675,
      "grad_norm": 3.0,
      "learning_rate": 4.2903520591083384e-05,
      "loss": 0.857,
      "step": 194940
    },
    {
      "epoch": 0.6832511469296632,
      "grad_norm": 3.1875,
      "learning_rate": 4.2902871562419685e-05,
      "loss": 0.9128,
      "step": 194950
    },
    {
      "epoch": 0.6832861944365588,
      "grad_norm": 3.25,
      "learning_rate": 4.290222253375598e-05,
      "loss": 0.9273,
      "step": 194960
    },
    {
      "epoch": 0.6833212419434543,
      "grad_norm": 3.265625,
      "learning_rate": 4.290157350509228e-05,
      "loss": 0.9459,
      "step": 194970
    },
    {
      "epoch": 0.68335628945035,
      "grad_norm": 2.828125,
      "learning_rate": 4.2900924476428576e-05,
      "loss": 0.9953,
      "step": 194980
    },
    {
      "epoch": 0.6833913369572455,
      "grad_norm": 2.828125,
      "learning_rate": 4.290027544776488e-05,
      "loss": 0.8419,
      "step": 194990
    },
    {
      "epoch": 0.6834263844641412,
      "grad_norm": 2.75,
      "learning_rate": 4.289962641910117e-05,
      "loss": 0.9247,
      "step": 195000
    },
    {
      "epoch": 0.6834263844641412,
      "eval_loss": 0.8473583459854126,
      "eval_runtime": 551.4795,
      "eval_samples_per_second": 689.846,
      "eval_steps_per_second": 57.487,
      "step": 195000
    },
    {
      "epoch": 0.6834614319710367,
      "grad_norm": 3.171875,
      "learning_rate": 4.289897739043747e-05,
      "loss": 0.8657,
      "step": 195010
    },
    {
      "epoch": 0.6834964794779324,
      "grad_norm": 3.125,
      "learning_rate": 4.289832836177377e-05,
      "loss": 0.9433,
      "step": 195020
    },
    {
      "epoch": 0.6835315269848279,
      "grad_norm": 3.03125,
      "learning_rate": 4.289767933311007e-05,
      "loss": 0.9234,
      "step": 195030
    },
    {
      "epoch": 0.6835665744917235,
      "grad_norm": 2.8125,
      "learning_rate": 4.289703030444637e-05,
      "loss": 0.8561,
      "step": 195040
    },
    {
      "epoch": 0.6836016219986192,
      "grad_norm": 2.5,
      "learning_rate": 4.2896381275782665e-05,
      "loss": 0.9185,
      "step": 195050
    },
    {
      "epoch": 0.6836366695055147,
      "grad_norm": 2.4375,
      "learning_rate": 4.2895732247118966e-05,
      "loss": 0.8436,
      "step": 195060
    },
    {
      "epoch": 0.6836717170124104,
      "grad_norm": 2.75,
      "learning_rate": 4.289508321845526e-05,
      "loss": 0.9388,
      "step": 195070
    },
    {
      "epoch": 0.6837067645193059,
      "grad_norm": 2.625,
      "learning_rate": 4.289443418979156e-05,
      "loss": 0.8989,
      "step": 195080
    },
    {
      "epoch": 0.6837418120262015,
      "grad_norm": 2.90625,
      "learning_rate": 4.289378516112786e-05,
      "loss": 0.9063,
      "step": 195090
    },
    {
      "epoch": 0.6837768595330971,
      "grad_norm": 2.609375,
      "learning_rate": 4.289313613246416e-05,
      "loss": 0.9522,
      "step": 195100
    },
    {
      "epoch": 0.6838119070399927,
      "grad_norm": 3.125,
      "learning_rate": 4.289248710380045e-05,
      "loss": 1.0237,
      "step": 195110
    },
    {
      "epoch": 0.6838469545468883,
      "grad_norm": 3.015625,
      "learning_rate": 4.2891838075136754e-05,
      "loss": 0.8697,
      "step": 195120
    },
    {
      "epoch": 0.6838820020537839,
      "grad_norm": 2.984375,
      "learning_rate": 4.289118904647305e-05,
      "loss": 0.9077,
      "step": 195130
    },
    {
      "epoch": 0.6839170495606794,
      "grad_norm": 3.078125,
      "learning_rate": 4.289054001780935e-05,
      "loss": 0.9624,
      "step": 195140
    },
    {
      "epoch": 0.6839520970675751,
      "grad_norm": 2.9375,
      "learning_rate": 4.2889890989145645e-05,
      "loss": 0.8934,
      "step": 195150
    },
    {
      "epoch": 0.6839871445744707,
      "grad_norm": 2.90625,
      "learning_rate": 4.2889241960481946e-05,
      "loss": 0.9355,
      "step": 195160
    },
    {
      "epoch": 0.6840221920813663,
      "grad_norm": 3.171875,
      "learning_rate": 4.288859293181825e-05,
      "loss": 0.9078,
      "step": 195170
    },
    {
      "epoch": 0.6840572395882619,
      "grad_norm": 3.125,
      "learning_rate": 4.288794390315454e-05,
      "loss": 0.8789,
      "step": 195180
    },
    {
      "epoch": 0.6840922870951575,
      "grad_norm": 2.96875,
      "learning_rate": 4.2887294874490844e-05,
      "loss": 0.997,
      "step": 195190
    },
    {
      "epoch": 0.6841273346020531,
      "grad_norm": 3.4375,
      "learning_rate": 4.288664584582714e-05,
      "loss": 0.9493,
      "step": 195200
    },
    {
      "epoch": 0.6841623821089486,
      "grad_norm": 3.203125,
      "learning_rate": 4.288599681716344e-05,
      "loss": 0.9406,
      "step": 195210
    },
    {
      "epoch": 0.6841974296158443,
      "grad_norm": 2.8125,
      "learning_rate": 4.2885347788499734e-05,
      "loss": 0.9666,
      "step": 195220
    },
    {
      "epoch": 0.6842324771227398,
      "grad_norm": 3.234375,
      "learning_rate": 4.2884698759836036e-05,
      "loss": 0.9386,
      "step": 195230
    },
    {
      "epoch": 0.6842675246296355,
      "grad_norm": 3.375,
      "learning_rate": 4.288404973117233e-05,
      "loss": 0.9126,
      "step": 195240
    },
    {
      "epoch": 0.6843025721365311,
      "grad_norm": 3.15625,
      "learning_rate": 4.288340070250863e-05,
      "loss": 0.8091,
      "step": 195250
    },
    {
      "epoch": 0.6843376196434267,
      "grad_norm": 2.671875,
      "learning_rate": 4.2882751673844926e-05,
      "loss": 0.8515,
      "step": 195260
    },
    {
      "epoch": 0.6843726671503223,
      "grad_norm": 2.734375,
      "learning_rate": 4.288210264518122e-05,
      "loss": 0.9248,
      "step": 195270
    },
    {
      "epoch": 0.6844077146572178,
      "grad_norm": 3.21875,
      "learning_rate": 4.288145361651752e-05,
      "loss": 0.903,
      "step": 195280
    },
    {
      "epoch": 0.6844427621641135,
      "grad_norm": 2.609375,
      "learning_rate": 4.288080458785382e-05,
      "loss": 0.8084,
      "step": 195290
    },
    {
      "epoch": 0.684477809671009,
      "grad_norm": 3.09375,
      "learning_rate": 4.288015555919012e-05,
      "loss": 0.9032,
      "step": 195300
    },
    {
      "epoch": 0.6845128571779047,
      "grad_norm": 2.671875,
      "learning_rate": 4.287950653052641e-05,
      "loss": 0.89,
      "step": 195310
    },
    {
      "epoch": 0.6845479046848002,
      "grad_norm": 2.28125,
      "learning_rate": 4.2878857501862714e-05,
      "loss": 0.9268,
      "step": 195320
    },
    {
      "epoch": 0.6845829521916958,
      "grad_norm": 3.34375,
      "learning_rate": 4.287820847319901e-05,
      "loss": 1.0092,
      "step": 195330
    },
    {
      "epoch": 0.6846179996985915,
      "grad_norm": 3.140625,
      "learning_rate": 4.287755944453531e-05,
      "loss": 0.8634,
      "step": 195340
    },
    {
      "epoch": 0.684653047205487,
      "grad_norm": 2.90625,
      "learning_rate": 4.2876910415871605e-05,
      "loss": 0.8471,
      "step": 195350
    },
    {
      "epoch": 0.6846880947123827,
      "grad_norm": 2.9375,
      "learning_rate": 4.2876261387207906e-05,
      "loss": 0.8807,
      "step": 195360
    },
    {
      "epoch": 0.6847231422192782,
      "grad_norm": 2.765625,
      "learning_rate": 4.28756123585442e-05,
      "loss": 0.8662,
      "step": 195370
    },
    {
      "epoch": 0.6847581897261739,
      "grad_norm": 2.921875,
      "learning_rate": 4.28749633298805e-05,
      "loss": 0.9558,
      "step": 195380
    },
    {
      "epoch": 0.6847932372330694,
      "grad_norm": 3.046875,
      "learning_rate": 4.28743143012168e-05,
      "loss": 0.8828,
      "step": 195390
    },
    {
      "epoch": 0.684828284739965,
      "grad_norm": 3.015625,
      "learning_rate": 4.28736652725531e-05,
      "loss": 0.921,
      "step": 195400
    },
    {
      "epoch": 0.6848633322468606,
      "grad_norm": 2.421875,
      "learning_rate": 4.28730162438894e-05,
      "loss": 0.8526,
      "step": 195410
    },
    {
      "epoch": 0.6848983797537562,
      "grad_norm": 2.921875,
      "learning_rate": 4.2872367215225694e-05,
      "loss": 0.8662,
      "step": 195420
    },
    {
      "epoch": 0.6849334272606518,
      "grad_norm": 3.28125,
      "learning_rate": 4.2871718186561996e-05,
      "loss": 0.986,
      "step": 195430
    },
    {
      "epoch": 0.6849684747675474,
      "grad_norm": 3.078125,
      "learning_rate": 4.287106915789829e-05,
      "loss": 0.8062,
      "step": 195440
    },
    {
      "epoch": 0.685003522274443,
      "grad_norm": 2.921875,
      "learning_rate": 4.287042012923459e-05,
      "loss": 0.8941,
      "step": 195450
    },
    {
      "epoch": 0.6850385697813386,
      "grad_norm": 3.0,
      "learning_rate": 4.2869771100570886e-05,
      "loss": 0.9429,
      "step": 195460
    },
    {
      "epoch": 0.6850736172882342,
      "grad_norm": 3.140625,
      "learning_rate": 4.286912207190719e-05,
      "loss": 0.9568,
      "step": 195470
    },
    {
      "epoch": 0.6851086647951298,
      "grad_norm": 2.734375,
      "learning_rate": 4.286847304324348e-05,
      "loss": 0.9522,
      "step": 195480
    },
    {
      "epoch": 0.6851437123020254,
      "grad_norm": 2.953125,
      "learning_rate": 4.2867824014579784e-05,
      "loss": 0.955,
      "step": 195490
    },
    {
      "epoch": 0.685178759808921,
      "grad_norm": 2.90625,
      "learning_rate": 4.286717498591608e-05,
      "loss": 0.8568,
      "step": 195500
    },
    {
      "epoch": 0.6852138073158166,
      "grad_norm": 2.859375,
      "learning_rate": 4.286652595725238e-05,
      "loss": 0.936,
      "step": 195510
    },
    {
      "epoch": 0.6852488548227121,
      "grad_norm": 3.234375,
      "learning_rate": 4.2865876928588674e-05,
      "loss": 0.8783,
      "step": 195520
    },
    {
      "epoch": 0.6852839023296078,
      "grad_norm": 2.796875,
      "learning_rate": 4.2865227899924976e-05,
      "loss": 0.9008,
      "step": 195530
    },
    {
      "epoch": 0.6853189498365034,
      "grad_norm": 2.84375,
      "learning_rate": 4.286457887126128e-05,
      "loss": 0.8725,
      "step": 195540
    },
    {
      "epoch": 0.685353997343399,
      "grad_norm": 2.921875,
      "learning_rate": 4.286392984259757e-05,
      "loss": 0.8543,
      "step": 195550
    },
    {
      "epoch": 0.6853890448502946,
      "grad_norm": 3.265625,
      "learning_rate": 4.286328081393387e-05,
      "loss": 0.8534,
      "step": 195560
    },
    {
      "epoch": 0.6854240923571902,
      "grad_norm": 2.640625,
      "learning_rate": 4.286263178527017e-05,
      "loss": 0.8434,
      "step": 195570
    },
    {
      "epoch": 0.6854591398640858,
      "grad_norm": 3.359375,
      "learning_rate": 4.286198275660647e-05,
      "loss": 0.9189,
      "step": 195580
    },
    {
      "epoch": 0.6854941873709813,
      "grad_norm": 2.90625,
      "learning_rate": 4.2861333727942764e-05,
      "loss": 0.8388,
      "step": 195590
    },
    {
      "epoch": 0.685529234877877,
      "grad_norm": 3.015625,
      "learning_rate": 4.2860684699279065e-05,
      "loss": 0.999,
      "step": 195600
    },
    {
      "epoch": 0.6855642823847725,
      "grad_norm": 2.625,
      "learning_rate": 4.286003567061536e-05,
      "loss": 0.9363,
      "step": 195610
    },
    {
      "epoch": 0.6855993298916682,
      "grad_norm": 3.40625,
      "learning_rate": 4.285938664195166e-05,
      "loss": 0.9102,
      "step": 195620
    },
    {
      "epoch": 0.6856343773985637,
      "grad_norm": 3.171875,
      "learning_rate": 4.2858737613287956e-05,
      "loss": 0.9333,
      "step": 195630
    },
    {
      "epoch": 0.6856694249054593,
      "grad_norm": 3.3125,
      "learning_rate": 4.285808858462425e-05,
      "loss": 0.948,
      "step": 195640
    },
    {
      "epoch": 0.685704472412355,
      "grad_norm": 2.859375,
      "learning_rate": 4.285743955596055e-05,
      "loss": 0.9622,
      "step": 195650
    },
    {
      "epoch": 0.6857395199192505,
      "grad_norm": 3.140625,
      "learning_rate": 4.2856790527296846e-05,
      "loss": 0.9151,
      "step": 195660
    },
    {
      "epoch": 0.6857745674261462,
      "grad_norm": 2.96875,
      "learning_rate": 4.285614149863315e-05,
      "loss": 0.8523,
      "step": 195670
    },
    {
      "epoch": 0.6858096149330417,
      "grad_norm": 2.984375,
      "learning_rate": 4.285549246996944e-05,
      "loss": 0.9512,
      "step": 195680
    },
    {
      "epoch": 0.6858446624399374,
      "grad_norm": 2.890625,
      "learning_rate": 4.2854843441305744e-05,
      "loss": 0.9871,
      "step": 195690
    },
    {
      "epoch": 0.6858797099468329,
      "grad_norm": 2.9375,
      "learning_rate": 4.285419441264204e-05,
      "loss": 0.8997,
      "step": 195700
    },
    {
      "epoch": 0.6859147574537285,
      "grad_norm": 3.15625,
      "learning_rate": 4.285354538397834e-05,
      "loss": 0.9236,
      "step": 195710
    },
    {
      "epoch": 0.6859498049606241,
      "grad_norm": 2.703125,
      "learning_rate": 4.2852896355314634e-05,
      "loss": 0.8824,
      "step": 195720
    },
    {
      "epoch": 0.6859848524675197,
      "grad_norm": 3.828125,
      "learning_rate": 4.2852247326650936e-05,
      "loss": 0.9014,
      "step": 195730
    },
    {
      "epoch": 0.6860198999744154,
      "grad_norm": 2.65625,
      "learning_rate": 4.285159829798723e-05,
      "loss": 0.8574,
      "step": 195740
    },
    {
      "epoch": 0.6860549474813109,
      "grad_norm": 2.96875,
      "learning_rate": 4.285094926932353e-05,
      "loss": 0.9559,
      "step": 195750
    },
    {
      "epoch": 0.6860899949882066,
      "grad_norm": 2.921875,
      "learning_rate": 4.2850300240659826e-05,
      "loss": 0.8513,
      "step": 195760
    },
    {
      "epoch": 0.6861250424951021,
      "grad_norm": 3.015625,
      "learning_rate": 4.284965121199613e-05,
      "loss": 0.8467,
      "step": 195770
    },
    {
      "epoch": 0.6861600900019977,
      "grad_norm": 3.34375,
      "learning_rate": 4.284900218333243e-05,
      "loss": 0.8454,
      "step": 195780
    },
    {
      "epoch": 0.6861951375088933,
      "grad_norm": 2.984375,
      "learning_rate": 4.2848353154668724e-05,
      "loss": 0.8937,
      "step": 195790
    },
    {
      "epoch": 0.6862301850157889,
      "grad_norm": 2.890625,
      "learning_rate": 4.2847704126005025e-05,
      "loss": 0.821,
      "step": 195800
    },
    {
      "epoch": 0.6862652325226845,
      "grad_norm": 2.890625,
      "learning_rate": 4.284705509734132e-05,
      "loss": 0.9425,
      "step": 195810
    },
    {
      "epoch": 0.6863002800295801,
      "grad_norm": 2.890625,
      "learning_rate": 4.284640606867762e-05,
      "loss": 0.9799,
      "step": 195820
    },
    {
      "epoch": 0.6863353275364757,
      "grad_norm": 3.1875,
      "learning_rate": 4.2845757040013916e-05,
      "loss": 0.9238,
      "step": 195830
    },
    {
      "epoch": 0.6863703750433713,
      "grad_norm": 2.78125,
      "learning_rate": 4.284510801135022e-05,
      "loss": 0.8583,
      "step": 195840
    },
    {
      "epoch": 0.6864054225502669,
      "grad_norm": 2.578125,
      "learning_rate": 4.284445898268651e-05,
      "loss": 0.8987,
      "step": 195850
    },
    {
      "epoch": 0.6864404700571625,
      "grad_norm": 3.015625,
      "learning_rate": 4.284380995402281e-05,
      "loss": 0.8708,
      "step": 195860
    },
    {
      "epoch": 0.6864755175640581,
      "grad_norm": 3.109375,
      "learning_rate": 4.284316092535911e-05,
      "loss": 0.9752,
      "step": 195870
    },
    {
      "epoch": 0.6865105650709536,
      "grad_norm": 2.671875,
      "learning_rate": 4.284251189669541e-05,
      "loss": 0.8735,
      "step": 195880
    },
    {
      "epoch": 0.6865456125778493,
      "grad_norm": 2.921875,
      "learning_rate": 4.2841862868031704e-05,
      "loss": 0.9308,
      "step": 195890
    },
    {
      "epoch": 0.6865806600847448,
      "grad_norm": 2.515625,
      "learning_rate": 4.2841213839368005e-05,
      "loss": 0.9678,
      "step": 195900
    },
    {
      "epoch": 0.6866157075916405,
      "grad_norm": 2.703125,
      "learning_rate": 4.2840564810704307e-05,
      "loss": 0.9378,
      "step": 195910
    },
    {
      "epoch": 0.686650755098536,
      "grad_norm": 2.828125,
      "learning_rate": 4.28399157820406e-05,
      "loss": 0.8606,
      "step": 195920
    },
    {
      "epoch": 0.6866858026054317,
      "grad_norm": 3.1875,
      "learning_rate": 4.28392667533769e-05,
      "loss": 0.8855,
      "step": 195930
    },
    {
      "epoch": 0.6867208501123273,
      "grad_norm": 3.125,
      "learning_rate": 4.28386177247132e-05,
      "loss": 0.9321,
      "step": 195940
    },
    {
      "epoch": 0.6867558976192228,
      "grad_norm": 2.921875,
      "learning_rate": 4.28379686960495e-05,
      "loss": 0.9302,
      "step": 195950
    },
    {
      "epoch": 0.6867909451261185,
      "grad_norm": 3.015625,
      "learning_rate": 4.283731966738579e-05,
      "loss": 0.9778,
      "step": 195960
    },
    {
      "epoch": 0.686825992633014,
      "grad_norm": 2.875,
      "learning_rate": 4.2836670638722095e-05,
      "loss": 0.8886,
      "step": 195970
    },
    {
      "epoch": 0.6868610401399097,
      "grad_norm": 2.75,
      "learning_rate": 4.283602161005839e-05,
      "loss": 0.8474,
      "step": 195980
    },
    {
      "epoch": 0.6868960876468052,
      "grad_norm": 3.390625,
      "learning_rate": 4.283537258139469e-05,
      "loss": 0.8841,
      "step": 195990
    },
    {
      "epoch": 0.6869311351537009,
      "grad_norm": 2.65625,
      "learning_rate": 4.2834723552730985e-05,
      "loss": 0.8541,
      "step": 196000
    },
    {
      "epoch": 0.6869661826605964,
      "grad_norm": 3.234375,
      "learning_rate": 4.283407452406728e-05,
      "loss": 0.9097,
      "step": 196010
    },
    {
      "epoch": 0.687001230167492,
      "grad_norm": 3.0,
      "learning_rate": 4.283342549540358e-05,
      "loss": 0.8692,
      "step": 196020
    },
    {
      "epoch": 0.6870362776743877,
      "grad_norm": 2.78125,
      "learning_rate": 4.2832776466739876e-05,
      "loss": 0.8771,
      "step": 196030
    },
    {
      "epoch": 0.6870713251812832,
      "grad_norm": 3.046875,
      "learning_rate": 4.283212743807618e-05,
      "loss": 0.9167,
      "step": 196040
    },
    {
      "epoch": 0.6871063726881789,
      "grad_norm": 3.296875,
      "learning_rate": 4.283147840941247e-05,
      "loss": 0.908,
      "step": 196050
    },
    {
      "epoch": 0.6871414201950744,
      "grad_norm": 2.875,
      "learning_rate": 4.283082938074877e-05,
      "loss": 0.8855,
      "step": 196060
    },
    {
      "epoch": 0.68717646770197,
      "grad_norm": 2.875,
      "learning_rate": 4.283018035208507e-05,
      "loss": 0.955,
      "step": 196070
    },
    {
      "epoch": 0.6872115152088656,
      "grad_norm": 2.984375,
      "learning_rate": 4.282953132342137e-05,
      "loss": 0.9209,
      "step": 196080
    },
    {
      "epoch": 0.6872465627157612,
      "grad_norm": 3.125,
      "learning_rate": 4.2828882294757664e-05,
      "loss": 0.8116,
      "step": 196090
    },
    {
      "epoch": 0.6872816102226568,
      "grad_norm": 2.796875,
      "learning_rate": 4.2828233266093965e-05,
      "loss": 0.8259,
      "step": 196100
    },
    {
      "epoch": 0.6873166577295524,
      "grad_norm": 3.0,
      "learning_rate": 4.282758423743026e-05,
      "loss": 0.9185,
      "step": 196110
    },
    {
      "epoch": 0.687351705236448,
      "grad_norm": 2.6875,
      "learning_rate": 4.282693520876656e-05,
      "loss": 0.8857,
      "step": 196120
    },
    {
      "epoch": 0.6873867527433436,
      "grad_norm": 2.90625,
      "learning_rate": 4.282628618010286e-05,
      "loss": 0.9517,
      "step": 196130
    },
    {
      "epoch": 0.6874218002502392,
      "grad_norm": 2.921875,
      "learning_rate": 4.282563715143916e-05,
      "loss": 0.9391,
      "step": 196140
    },
    {
      "epoch": 0.6874568477571348,
      "grad_norm": 3.171875,
      "learning_rate": 4.282498812277546e-05,
      "loss": 0.9755,
      "step": 196150
    },
    {
      "epoch": 0.6874918952640304,
      "grad_norm": 3.0,
      "learning_rate": 4.282433909411175e-05,
      "loss": 0.8832,
      "step": 196160
    },
    {
      "epoch": 0.687526942770926,
      "grad_norm": 3.53125,
      "learning_rate": 4.2823690065448055e-05,
      "loss": 0.9894,
      "step": 196170
    },
    {
      "epoch": 0.6875619902778216,
      "grad_norm": 2.8125,
      "learning_rate": 4.282304103678435e-05,
      "loss": 0.8994,
      "step": 196180
    },
    {
      "epoch": 0.6875970377847171,
      "grad_norm": 2.859375,
      "learning_rate": 4.282239200812065e-05,
      "loss": 0.8855,
      "step": 196190
    },
    {
      "epoch": 0.6876320852916128,
      "grad_norm": 3.125,
      "learning_rate": 4.2821742979456945e-05,
      "loss": 0.8871,
      "step": 196200
    },
    {
      "epoch": 0.6876671327985083,
      "grad_norm": 3.21875,
      "learning_rate": 4.2821093950793247e-05,
      "loss": 0.8381,
      "step": 196210
    },
    {
      "epoch": 0.687702180305404,
      "grad_norm": 3.3125,
      "learning_rate": 4.282044492212954e-05,
      "loss": 0.9619,
      "step": 196220
    },
    {
      "epoch": 0.6877372278122996,
      "grad_norm": 2.984375,
      "learning_rate": 4.281979589346584e-05,
      "loss": 0.905,
      "step": 196230
    },
    {
      "epoch": 0.6877722753191952,
      "grad_norm": 2.890625,
      "learning_rate": 4.281914686480214e-05,
      "loss": 0.8547,
      "step": 196240
    },
    {
      "epoch": 0.6878073228260908,
      "grad_norm": 3.046875,
      "learning_rate": 4.281849783613844e-05,
      "loss": 0.9259,
      "step": 196250
    },
    {
      "epoch": 0.6878423703329863,
      "grad_norm": 3.234375,
      "learning_rate": 4.281784880747473e-05,
      "loss": 0.905,
      "step": 196260
    },
    {
      "epoch": 0.687877417839882,
      "grad_norm": 3.25,
      "learning_rate": 4.2817199778811035e-05,
      "loss": 0.8902,
      "step": 196270
    },
    {
      "epoch": 0.6879124653467775,
      "grad_norm": 2.859375,
      "learning_rate": 4.2816550750147336e-05,
      "loss": 0.8656,
      "step": 196280
    },
    {
      "epoch": 0.6879475128536732,
      "grad_norm": 2.984375,
      "learning_rate": 4.281590172148363e-05,
      "loss": 0.8532,
      "step": 196290
    },
    {
      "epoch": 0.6879825603605687,
      "grad_norm": 2.703125,
      "learning_rate": 4.281525269281993e-05,
      "loss": 0.8367,
      "step": 196300
    },
    {
      "epoch": 0.6880176078674644,
      "grad_norm": 3.171875,
      "learning_rate": 4.2814603664156227e-05,
      "loss": 0.9129,
      "step": 196310
    },
    {
      "epoch": 0.6880526553743599,
      "grad_norm": 3.28125,
      "learning_rate": 4.281395463549253e-05,
      "loss": 0.9848,
      "step": 196320
    },
    {
      "epoch": 0.6880877028812555,
      "grad_norm": 2.5,
      "learning_rate": 4.281330560682882e-05,
      "loss": 0.8473,
      "step": 196330
    },
    {
      "epoch": 0.6881227503881512,
      "grad_norm": 3.5,
      "learning_rate": 4.2812656578165124e-05,
      "loss": 0.8722,
      "step": 196340
    },
    {
      "epoch": 0.6881577978950467,
      "grad_norm": 2.71875,
      "learning_rate": 4.281200754950142e-05,
      "loss": 0.9249,
      "step": 196350
    },
    {
      "epoch": 0.6881928454019424,
      "grad_norm": 3.171875,
      "learning_rate": 4.281135852083772e-05,
      "loss": 0.9448,
      "step": 196360
    },
    {
      "epoch": 0.6882278929088379,
      "grad_norm": 3.125,
      "learning_rate": 4.2810709492174015e-05,
      "loss": 0.9395,
      "step": 196370
    },
    {
      "epoch": 0.6882629404157335,
      "grad_norm": 2.640625,
      "learning_rate": 4.2810060463510316e-05,
      "loss": 0.8744,
      "step": 196380
    },
    {
      "epoch": 0.6882979879226291,
      "grad_norm": 2.53125,
      "learning_rate": 4.280941143484661e-05,
      "loss": 0.8952,
      "step": 196390
    },
    {
      "epoch": 0.6883330354295247,
      "grad_norm": 2.765625,
      "learning_rate": 4.2808762406182905e-05,
      "loss": 0.9589,
      "step": 196400
    },
    {
      "epoch": 0.6883680829364203,
      "grad_norm": 2.953125,
      "learning_rate": 4.2808113377519207e-05,
      "loss": 0.8657,
      "step": 196410
    },
    {
      "epoch": 0.6884031304433159,
      "grad_norm": 3.4375,
      "learning_rate": 4.28074643488555e-05,
      "loss": 0.9619,
      "step": 196420
    },
    {
      "epoch": 0.6884381779502116,
      "grad_norm": 2.9375,
      "learning_rate": 4.28068153201918e-05,
      "loss": 0.8759,
      "step": 196430
    },
    {
      "epoch": 0.6884732254571071,
      "grad_norm": 2.78125,
      "learning_rate": 4.28061662915281e-05,
      "loss": 0.8454,
      "step": 196440
    },
    {
      "epoch": 0.6885082729640027,
      "grad_norm": 2.671875,
      "learning_rate": 4.28055172628644e-05,
      "loss": 0.8635,
      "step": 196450
    },
    {
      "epoch": 0.6885433204708983,
      "grad_norm": 3.15625,
      "learning_rate": 4.280486823420069e-05,
      "loss": 0.856,
      "step": 196460
    },
    {
      "epoch": 0.6885783679777939,
      "grad_norm": 3.0,
      "learning_rate": 4.2804219205536995e-05,
      "loss": 0.964,
      "step": 196470
    },
    {
      "epoch": 0.6886134154846895,
      "grad_norm": 2.65625,
      "learning_rate": 4.280357017687329e-05,
      "loss": 0.8613,
      "step": 196480
    },
    {
      "epoch": 0.6886484629915851,
      "grad_norm": 3.09375,
      "learning_rate": 4.280292114820959e-05,
      "loss": 0.8844,
      "step": 196490
    },
    {
      "epoch": 0.6886835104984806,
      "grad_norm": 3.140625,
      "learning_rate": 4.280227211954589e-05,
      "loss": 0.8776,
      "step": 196500
    },
    {
      "epoch": 0.6887185580053763,
      "grad_norm": 3.203125,
      "learning_rate": 4.2801623090882187e-05,
      "loss": 0.8681,
      "step": 196510
    },
    {
      "epoch": 0.6887536055122719,
      "grad_norm": 2.9375,
      "learning_rate": 4.280097406221849e-05,
      "loss": 0.8834,
      "step": 196520
    },
    {
      "epoch": 0.6887886530191675,
      "grad_norm": 2.828125,
      "learning_rate": 4.280032503355478e-05,
      "loss": 0.8716,
      "step": 196530
    },
    {
      "epoch": 0.6888237005260631,
      "grad_norm": 2.828125,
      "learning_rate": 4.2799676004891084e-05,
      "loss": 0.9827,
      "step": 196540
    },
    {
      "epoch": 0.6888587480329587,
      "grad_norm": 2.921875,
      "learning_rate": 4.279902697622738e-05,
      "loss": 0.9278,
      "step": 196550
    },
    {
      "epoch": 0.6888937955398543,
      "grad_norm": 2.828125,
      "learning_rate": 4.279837794756368e-05,
      "loss": 0.8864,
      "step": 196560
    },
    {
      "epoch": 0.6889288430467498,
      "grad_norm": 2.765625,
      "learning_rate": 4.2797728918899974e-05,
      "loss": 0.9088,
      "step": 196570
    },
    {
      "epoch": 0.6889638905536455,
      "grad_norm": 2.90625,
      "learning_rate": 4.2797079890236276e-05,
      "loss": 0.8783,
      "step": 196580
    },
    {
      "epoch": 0.688998938060541,
      "grad_norm": 3.234375,
      "learning_rate": 4.279643086157257e-05,
      "loss": 0.8152,
      "step": 196590
    },
    {
      "epoch": 0.6890339855674367,
      "grad_norm": 2.828125,
      "learning_rate": 4.279578183290887e-05,
      "loss": 0.8698,
      "step": 196600
    },
    {
      "epoch": 0.6890690330743322,
      "grad_norm": 3.3125,
      "learning_rate": 4.2795132804245166e-05,
      "loss": 0.9492,
      "step": 196610
    },
    {
      "epoch": 0.6891040805812279,
      "grad_norm": 2.71875,
      "learning_rate": 4.279448377558147e-05,
      "loss": 0.8494,
      "step": 196620
    },
    {
      "epoch": 0.6891391280881235,
      "grad_norm": 2.6875,
      "learning_rate": 4.279383474691776e-05,
      "loss": 0.9155,
      "step": 196630
    },
    {
      "epoch": 0.689174175595019,
      "grad_norm": 2.6875,
      "learning_rate": 4.2793185718254064e-05,
      "loss": 0.8795,
      "step": 196640
    },
    {
      "epoch": 0.6892092231019147,
      "grad_norm": 2.859375,
      "learning_rate": 4.2792536689590365e-05,
      "loss": 0.9497,
      "step": 196650
    },
    {
      "epoch": 0.6892442706088102,
      "grad_norm": 3.0625,
      "learning_rate": 4.279188766092666e-05,
      "loss": 1.0068,
      "step": 196660
    },
    {
      "epoch": 0.6892793181157059,
      "grad_norm": 2.671875,
      "learning_rate": 4.279123863226296e-05,
      "loss": 0.8702,
      "step": 196670
    },
    {
      "epoch": 0.6893143656226014,
      "grad_norm": 2.640625,
      "learning_rate": 4.2790589603599256e-05,
      "loss": 0.9313,
      "step": 196680
    },
    {
      "epoch": 0.689349413129497,
      "grad_norm": 2.640625,
      "learning_rate": 4.278994057493556e-05,
      "loss": 0.9034,
      "step": 196690
    },
    {
      "epoch": 0.6893844606363926,
      "grad_norm": 3.25,
      "learning_rate": 4.278929154627185e-05,
      "loss": 0.9146,
      "step": 196700
    },
    {
      "epoch": 0.6894195081432882,
      "grad_norm": 2.953125,
      "learning_rate": 4.278864251760815e-05,
      "loss": 0.8816,
      "step": 196710
    },
    {
      "epoch": 0.6894545556501839,
      "grad_norm": 2.6875,
      "learning_rate": 4.278799348894445e-05,
      "loss": 0.9426,
      "step": 196720
    },
    {
      "epoch": 0.6894896031570794,
      "grad_norm": 3.4375,
      "learning_rate": 4.278734446028075e-05,
      "loss": 0.9899,
      "step": 196730
    },
    {
      "epoch": 0.6895246506639751,
      "grad_norm": 2.640625,
      "learning_rate": 4.2786695431617044e-05,
      "loss": 0.9123,
      "step": 196740
    },
    {
      "epoch": 0.6895596981708706,
      "grad_norm": 3.234375,
      "learning_rate": 4.2786046402953345e-05,
      "loss": 0.8709,
      "step": 196750
    },
    {
      "epoch": 0.6895947456777662,
      "grad_norm": 3.25,
      "learning_rate": 4.278539737428964e-05,
      "loss": 0.9681,
      "step": 196760
    },
    {
      "epoch": 0.6896297931846618,
      "grad_norm": 3.0625,
      "learning_rate": 4.2784748345625934e-05,
      "loss": 0.9348,
      "step": 196770
    },
    {
      "epoch": 0.6896648406915574,
      "grad_norm": 2.8125,
      "learning_rate": 4.2784099316962236e-05,
      "loss": 0.8592,
      "step": 196780
    },
    {
      "epoch": 0.689699888198453,
      "grad_norm": 3.015625,
      "learning_rate": 4.278345028829853e-05,
      "loss": 1.0021,
      "step": 196790
    },
    {
      "epoch": 0.6897349357053486,
      "grad_norm": 2.390625,
      "learning_rate": 4.278280125963483e-05,
      "loss": 0.894,
      "step": 196800
    },
    {
      "epoch": 0.6897699832122441,
      "grad_norm": 2.6875,
      "learning_rate": 4.2782152230971126e-05,
      "loss": 0.974,
      "step": 196810
    },
    {
      "epoch": 0.6898050307191398,
      "grad_norm": 3.59375,
      "learning_rate": 4.278150320230743e-05,
      "loss": 0.847,
      "step": 196820
    },
    {
      "epoch": 0.6898400782260354,
      "grad_norm": 2.890625,
      "learning_rate": 4.278085417364372e-05,
      "loss": 0.9149,
      "step": 196830
    },
    {
      "epoch": 0.689875125732931,
      "grad_norm": 3.296875,
      "learning_rate": 4.2780205144980024e-05,
      "loss": 0.9253,
      "step": 196840
    },
    {
      "epoch": 0.6899101732398266,
      "grad_norm": 2.921875,
      "learning_rate": 4.277955611631632e-05,
      "loss": 0.8958,
      "step": 196850
    },
    {
      "epoch": 0.6899452207467222,
      "grad_norm": 2.59375,
      "learning_rate": 4.277890708765262e-05,
      "loss": 0.8602,
      "step": 196860
    },
    {
      "epoch": 0.6899802682536178,
      "grad_norm": 2.953125,
      "learning_rate": 4.277825805898892e-05,
      "loss": 0.8386,
      "step": 196870
    },
    {
      "epoch": 0.6900153157605133,
      "grad_norm": 2.875,
      "learning_rate": 4.2777609030325216e-05,
      "loss": 0.909,
      "step": 196880
    },
    {
      "epoch": 0.690050363267409,
      "grad_norm": 2.578125,
      "learning_rate": 4.277696000166152e-05,
      "loss": 0.8717,
      "step": 196890
    },
    {
      "epoch": 0.6900854107743045,
      "grad_norm": 2.96875,
      "learning_rate": 4.277631097299781e-05,
      "loss": 0.9654,
      "step": 196900
    },
    {
      "epoch": 0.6901204582812002,
      "grad_norm": 3.0,
      "learning_rate": 4.277566194433411e-05,
      "loss": 0.8576,
      "step": 196910
    },
    {
      "epoch": 0.6901555057880958,
      "grad_norm": 3.046875,
      "learning_rate": 4.277501291567041e-05,
      "loss": 0.9287,
      "step": 196920
    },
    {
      "epoch": 0.6901905532949913,
      "grad_norm": 3.578125,
      "learning_rate": 4.277436388700671e-05,
      "loss": 0.9285,
      "step": 196930
    },
    {
      "epoch": 0.690225600801887,
      "grad_norm": 2.734375,
      "learning_rate": 4.2773714858343004e-05,
      "loss": 0.9077,
      "step": 196940
    },
    {
      "epoch": 0.6902606483087825,
      "grad_norm": 3.90625,
      "learning_rate": 4.2773065829679305e-05,
      "loss": 0.9054,
      "step": 196950
    },
    {
      "epoch": 0.6902956958156782,
      "grad_norm": 3.015625,
      "learning_rate": 4.27724168010156e-05,
      "loss": 0.9032,
      "step": 196960
    },
    {
      "epoch": 0.6903307433225737,
      "grad_norm": 2.859375,
      "learning_rate": 4.27717677723519e-05,
      "loss": 0.9033,
      "step": 196970
    },
    {
      "epoch": 0.6903657908294694,
      "grad_norm": 3.140625,
      "learning_rate": 4.2771118743688196e-05,
      "loss": 0.8567,
      "step": 196980
    },
    {
      "epoch": 0.6904008383363649,
      "grad_norm": 2.859375,
      "learning_rate": 4.27704697150245e-05,
      "loss": 0.9429,
      "step": 196990
    },
    {
      "epoch": 0.6904358858432605,
      "grad_norm": 2.640625,
      "learning_rate": 4.27698206863608e-05,
      "loss": 0.9,
      "step": 197000
    },
    {
      "epoch": 0.6904709333501562,
      "grad_norm": 3.109375,
      "learning_rate": 4.276917165769709e-05,
      "loss": 0.8937,
      "step": 197010
    },
    {
      "epoch": 0.6905059808570517,
      "grad_norm": 2.515625,
      "learning_rate": 4.2768522629033395e-05,
      "loss": 0.9993,
      "step": 197020
    },
    {
      "epoch": 0.6905410283639474,
      "grad_norm": 2.734375,
      "learning_rate": 4.276787360036969e-05,
      "loss": 0.9508,
      "step": 197030
    },
    {
      "epoch": 0.6905760758708429,
      "grad_norm": 2.96875,
      "learning_rate": 4.276722457170599e-05,
      "loss": 1.0055,
      "step": 197040
    },
    {
      "epoch": 0.6906111233777386,
      "grad_norm": 2.90625,
      "learning_rate": 4.2766575543042285e-05,
      "loss": 0.8966,
      "step": 197050
    },
    {
      "epoch": 0.6906461708846341,
      "grad_norm": 3.359375,
      "learning_rate": 4.276592651437859e-05,
      "loss": 0.9428,
      "step": 197060
    },
    {
      "epoch": 0.6906812183915297,
      "grad_norm": 2.984375,
      "learning_rate": 4.276527748571488e-05,
      "loss": 0.9574,
      "step": 197070
    },
    {
      "epoch": 0.6907162658984253,
      "grad_norm": 2.828125,
      "learning_rate": 4.276462845705118e-05,
      "loss": 0.8467,
      "step": 197080
    },
    {
      "epoch": 0.6907513134053209,
      "grad_norm": 3.09375,
      "learning_rate": 4.276397942838748e-05,
      "loss": 0.8758,
      "step": 197090
    },
    {
      "epoch": 0.6907863609122165,
      "grad_norm": 2.921875,
      "learning_rate": 4.276333039972378e-05,
      "loss": 0.8901,
      "step": 197100
    },
    {
      "epoch": 0.6908214084191121,
      "grad_norm": 2.765625,
      "learning_rate": 4.276268137106007e-05,
      "loss": 0.8121,
      "step": 197110
    },
    {
      "epoch": 0.6908564559260078,
      "grad_norm": 2.875,
      "learning_rate": 4.2762032342396375e-05,
      "loss": 0.7839,
      "step": 197120
    },
    {
      "epoch": 0.6908915034329033,
      "grad_norm": 2.84375,
      "learning_rate": 4.276138331373267e-05,
      "loss": 0.9081,
      "step": 197130
    },
    {
      "epoch": 0.6909265509397989,
      "grad_norm": 2.921875,
      "learning_rate": 4.2760734285068964e-05,
      "loss": 0.9308,
      "step": 197140
    },
    {
      "epoch": 0.6909615984466945,
      "grad_norm": 2.9375,
      "learning_rate": 4.2760085256405265e-05,
      "loss": 0.8276,
      "step": 197150
    },
    {
      "epoch": 0.6909966459535901,
      "grad_norm": 2.90625,
      "learning_rate": 4.275943622774156e-05,
      "loss": 0.937,
      "step": 197160
    },
    {
      "epoch": 0.6910316934604857,
      "grad_norm": 3.0,
      "learning_rate": 4.275878719907786e-05,
      "loss": 0.9214,
      "step": 197170
    },
    {
      "epoch": 0.6910667409673813,
      "grad_norm": 3.671875,
      "learning_rate": 4.2758138170414156e-05,
      "loss": 0.8382,
      "step": 197180
    },
    {
      "epoch": 0.6911017884742768,
      "grad_norm": 2.828125,
      "learning_rate": 4.275748914175046e-05,
      "loss": 0.9357,
      "step": 197190
    },
    {
      "epoch": 0.6911368359811725,
      "grad_norm": 2.859375,
      "learning_rate": 4.275684011308675e-05,
      "loss": 0.9059,
      "step": 197200
    },
    {
      "epoch": 0.6911718834880681,
      "grad_norm": 2.84375,
      "learning_rate": 4.275619108442305e-05,
      "loss": 0.9409,
      "step": 197210
    },
    {
      "epoch": 0.6912069309949637,
      "grad_norm": 2.8125,
      "learning_rate": 4.275554205575935e-05,
      "loss": 0.843,
      "step": 197220
    },
    {
      "epoch": 0.6912419785018593,
      "grad_norm": 3.09375,
      "learning_rate": 4.275489302709565e-05,
      "loss": 0.9437,
      "step": 197230
    },
    {
      "epoch": 0.6912770260087548,
      "grad_norm": 2.78125,
      "learning_rate": 4.275424399843195e-05,
      "loss": 0.8607,
      "step": 197240
    },
    {
      "epoch": 0.6913120735156505,
      "grad_norm": 2.8125,
      "learning_rate": 4.2753594969768245e-05,
      "loss": 0.9009,
      "step": 197250
    },
    {
      "epoch": 0.691347121022546,
      "grad_norm": 2.859375,
      "learning_rate": 4.275294594110455e-05,
      "loss": 0.8799,
      "step": 197260
    },
    {
      "epoch": 0.6913821685294417,
      "grad_norm": 2.84375,
      "learning_rate": 4.275229691244084e-05,
      "loss": 0.9053,
      "step": 197270
    },
    {
      "epoch": 0.6914172160363372,
      "grad_norm": 3.125,
      "learning_rate": 4.275164788377714e-05,
      "loss": 0.8427,
      "step": 197280
    },
    {
      "epoch": 0.6914522635432329,
      "grad_norm": 2.921875,
      "learning_rate": 4.275099885511344e-05,
      "loss": 0.9249,
      "step": 197290
    },
    {
      "epoch": 0.6914873110501284,
      "grad_norm": 3.203125,
      "learning_rate": 4.275034982644974e-05,
      "loss": 0.8986,
      "step": 197300
    },
    {
      "epoch": 0.691522358557024,
      "grad_norm": 3.046875,
      "learning_rate": 4.274970079778603e-05,
      "loss": 0.8732,
      "step": 197310
    },
    {
      "epoch": 0.6915574060639197,
      "grad_norm": 3.359375,
      "learning_rate": 4.2749051769122335e-05,
      "loss": 0.9328,
      "step": 197320
    },
    {
      "epoch": 0.6915924535708152,
      "grad_norm": 3.03125,
      "learning_rate": 4.274840274045863e-05,
      "loss": 0.9455,
      "step": 197330
    },
    {
      "epoch": 0.6916275010777109,
      "grad_norm": 3.0625,
      "learning_rate": 4.274775371179493e-05,
      "loss": 0.8695,
      "step": 197340
    },
    {
      "epoch": 0.6916625485846064,
      "grad_norm": 3.234375,
      "learning_rate": 4.2747104683131225e-05,
      "loss": 0.9072,
      "step": 197350
    },
    {
      "epoch": 0.691697596091502,
      "grad_norm": 2.6875,
      "learning_rate": 4.274645565446753e-05,
      "loss": 0.8389,
      "step": 197360
    },
    {
      "epoch": 0.6917326435983976,
      "grad_norm": 3.265625,
      "learning_rate": 4.274580662580383e-05,
      "loss": 0.9582,
      "step": 197370
    },
    {
      "epoch": 0.6917676911052932,
      "grad_norm": 3.03125,
      "learning_rate": 4.274515759714012e-05,
      "loss": 0.8908,
      "step": 197380
    },
    {
      "epoch": 0.6918027386121888,
      "grad_norm": 3.0,
      "learning_rate": 4.2744508568476424e-05,
      "loss": 0.8825,
      "step": 197390
    },
    {
      "epoch": 0.6918377861190844,
      "grad_norm": 3.09375,
      "learning_rate": 4.274385953981272e-05,
      "loss": 0.9237,
      "step": 197400
    },
    {
      "epoch": 0.6918728336259801,
      "grad_norm": 2.453125,
      "learning_rate": 4.274321051114902e-05,
      "loss": 0.8927,
      "step": 197410
    },
    {
      "epoch": 0.6919078811328756,
      "grad_norm": 2.546875,
      "learning_rate": 4.2742561482485315e-05,
      "loss": 0.8713,
      "step": 197420
    },
    {
      "epoch": 0.6919429286397712,
      "grad_norm": 2.78125,
      "learning_rate": 4.2741912453821616e-05,
      "loss": 0.9049,
      "step": 197430
    },
    {
      "epoch": 0.6919779761466668,
      "grad_norm": 3.1875,
      "learning_rate": 4.274126342515791e-05,
      "loss": 0.8834,
      "step": 197440
    },
    {
      "epoch": 0.6920130236535624,
      "grad_norm": 3.109375,
      "learning_rate": 4.274061439649421e-05,
      "loss": 0.9998,
      "step": 197450
    },
    {
      "epoch": 0.692048071160458,
      "grad_norm": 3.125,
      "learning_rate": 4.273996536783051e-05,
      "loss": 0.9307,
      "step": 197460
    },
    {
      "epoch": 0.6920831186673536,
      "grad_norm": 3.328125,
      "learning_rate": 4.273931633916681e-05,
      "loss": 0.9054,
      "step": 197470
    },
    {
      "epoch": 0.6921181661742491,
      "grad_norm": 2.65625,
      "learning_rate": 4.27386673105031e-05,
      "loss": 0.9071,
      "step": 197480
    },
    {
      "epoch": 0.6921532136811448,
      "grad_norm": 3.0625,
      "learning_rate": 4.2738018281839404e-05,
      "loss": 0.9498,
      "step": 197490
    },
    {
      "epoch": 0.6921882611880404,
      "grad_norm": 2.875,
      "learning_rate": 4.27373692531757e-05,
      "loss": 0.9482,
      "step": 197500
    },
    {
      "epoch": 0.692223308694936,
      "grad_norm": 3.1875,
      "learning_rate": 4.273672022451199e-05,
      "loss": 0.8994,
      "step": 197510
    },
    {
      "epoch": 0.6922583562018316,
      "grad_norm": 2.9375,
      "learning_rate": 4.2736071195848295e-05,
      "loss": 0.8893,
      "step": 197520
    },
    {
      "epoch": 0.6922934037087272,
      "grad_norm": 2.78125,
      "learning_rate": 4.273542216718459e-05,
      "loss": 0.8673,
      "step": 197530
    },
    {
      "epoch": 0.6923284512156228,
      "grad_norm": 2.953125,
      "learning_rate": 4.273477313852089e-05,
      "loss": 0.9182,
      "step": 197540
    },
    {
      "epoch": 0.6923634987225183,
      "grad_norm": 2.890625,
      "learning_rate": 4.2734124109857185e-05,
      "loss": 0.9082,
      "step": 197550
    },
    {
      "epoch": 0.692398546229414,
      "grad_norm": 3.015625,
      "learning_rate": 4.273347508119349e-05,
      "loss": 0.8948,
      "step": 197560
    },
    {
      "epoch": 0.6924335937363095,
      "grad_norm": 2.953125,
      "learning_rate": 4.273282605252978e-05,
      "loss": 0.8395,
      "step": 197570
    },
    {
      "epoch": 0.6924686412432052,
      "grad_norm": 2.921875,
      "learning_rate": 4.273217702386608e-05,
      "loss": 0.8788,
      "step": 197580
    },
    {
      "epoch": 0.6925036887501007,
      "grad_norm": 3.0625,
      "learning_rate": 4.273152799520238e-05,
      "loss": 0.914,
      "step": 197590
    },
    {
      "epoch": 0.6925387362569964,
      "grad_norm": 3.21875,
      "learning_rate": 4.273087896653868e-05,
      "loss": 0.9492,
      "step": 197600
    },
    {
      "epoch": 0.692573783763892,
      "grad_norm": 3.09375,
      "learning_rate": 4.273022993787498e-05,
      "loss": 0.9245,
      "step": 197610
    },
    {
      "epoch": 0.6926088312707875,
      "grad_norm": 2.84375,
      "learning_rate": 4.2729580909211275e-05,
      "loss": 0.864,
      "step": 197620
    },
    {
      "epoch": 0.6926438787776832,
      "grad_norm": 3.203125,
      "learning_rate": 4.2728931880547576e-05,
      "loss": 0.8862,
      "step": 197630
    },
    {
      "epoch": 0.6926789262845787,
      "grad_norm": 2.546875,
      "learning_rate": 4.272828285188387e-05,
      "loss": 0.879,
      "step": 197640
    },
    {
      "epoch": 0.6927139737914744,
      "grad_norm": 2.859375,
      "learning_rate": 4.272763382322017e-05,
      "loss": 0.9379,
      "step": 197650
    },
    {
      "epoch": 0.6927490212983699,
      "grad_norm": 3.34375,
      "learning_rate": 4.272698479455647e-05,
      "loss": 0.8931,
      "step": 197660
    },
    {
      "epoch": 0.6927840688052656,
      "grad_norm": 3.203125,
      "learning_rate": 4.272633576589277e-05,
      "loss": 0.9289,
      "step": 197670
    },
    {
      "epoch": 0.6928191163121611,
      "grad_norm": 3.03125,
      "learning_rate": 4.272568673722906e-05,
      "loss": 0.9412,
      "step": 197680
    },
    {
      "epoch": 0.6928541638190567,
      "grad_norm": 3.140625,
      "learning_rate": 4.2725037708565364e-05,
      "loss": 0.9205,
      "step": 197690
    },
    {
      "epoch": 0.6928892113259524,
      "grad_norm": 3.171875,
      "learning_rate": 4.272438867990166e-05,
      "loss": 0.8788,
      "step": 197700
    },
    {
      "epoch": 0.6929242588328479,
      "grad_norm": 2.859375,
      "learning_rate": 4.272373965123796e-05,
      "loss": 0.8673,
      "step": 197710
    },
    {
      "epoch": 0.6929593063397436,
      "grad_norm": 2.90625,
      "learning_rate": 4.2723090622574255e-05,
      "loss": 0.953,
      "step": 197720
    },
    {
      "epoch": 0.6929943538466391,
      "grad_norm": 3.21875,
      "learning_rate": 4.2722441593910556e-05,
      "loss": 0.8701,
      "step": 197730
    },
    {
      "epoch": 0.6930294013535347,
      "grad_norm": 3.015625,
      "learning_rate": 4.272179256524686e-05,
      "loss": 0.9114,
      "step": 197740
    },
    {
      "epoch": 0.6930644488604303,
      "grad_norm": 2.75,
      "learning_rate": 4.272114353658315e-05,
      "loss": 0.8814,
      "step": 197750
    },
    {
      "epoch": 0.6930994963673259,
      "grad_norm": 2.9375,
      "learning_rate": 4.2720494507919453e-05,
      "loss": 0.8719,
      "step": 197760
    },
    {
      "epoch": 0.6931345438742215,
      "grad_norm": 2.484375,
      "learning_rate": 4.271984547925575e-05,
      "loss": 0.864,
      "step": 197770
    },
    {
      "epoch": 0.6931695913811171,
      "grad_norm": 3.203125,
      "learning_rate": 4.271919645059205e-05,
      "loss": 0.9006,
      "step": 197780
    },
    {
      "epoch": 0.6932046388880126,
      "grad_norm": 3.375,
      "learning_rate": 4.2718547421928344e-05,
      "loss": 0.8943,
      "step": 197790
    },
    {
      "epoch": 0.6932396863949083,
      "grad_norm": 3.171875,
      "learning_rate": 4.2717898393264645e-05,
      "loss": 0.8507,
      "step": 197800
    },
    {
      "epoch": 0.6932747339018039,
      "grad_norm": 3.171875,
      "learning_rate": 4.271724936460094e-05,
      "loss": 0.841,
      "step": 197810
    },
    {
      "epoch": 0.6933097814086995,
      "grad_norm": 3.09375,
      "learning_rate": 4.271660033593724e-05,
      "loss": 0.962,
      "step": 197820
    },
    {
      "epoch": 0.6933448289155951,
      "grad_norm": 2.625,
      "learning_rate": 4.2715951307273536e-05,
      "loss": 0.9099,
      "step": 197830
    },
    {
      "epoch": 0.6933798764224907,
      "grad_norm": 2.875,
      "learning_rate": 4.271530227860984e-05,
      "loss": 0.815,
      "step": 197840
    },
    {
      "epoch": 0.6934149239293863,
      "grad_norm": 2.859375,
      "learning_rate": 4.271465324994613e-05,
      "loss": 0.8847,
      "step": 197850
    },
    {
      "epoch": 0.6934499714362818,
      "grad_norm": 3.03125,
      "learning_rate": 4.2714004221282433e-05,
      "loss": 1.007,
      "step": 197860
    },
    {
      "epoch": 0.6934850189431775,
      "grad_norm": 3.015625,
      "learning_rate": 4.271335519261873e-05,
      "loss": 0.9158,
      "step": 197870
    },
    {
      "epoch": 0.693520066450073,
      "grad_norm": 2.953125,
      "learning_rate": 4.271270616395503e-05,
      "loss": 0.8375,
      "step": 197880
    },
    {
      "epoch": 0.6935551139569687,
      "grad_norm": 2.765625,
      "learning_rate": 4.2712057135291324e-05,
      "loss": 0.9601,
      "step": 197890
    },
    {
      "epoch": 0.6935901614638643,
      "grad_norm": 3.015625,
      "learning_rate": 4.271140810662762e-05,
      "loss": 0.9291,
      "step": 197900
    },
    {
      "epoch": 0.6936252089707599,
      "grad_norm": 2.8125,
      "learning_rate": 4.271075907796392e-05,
      "loss": 0.8531,
      "step": 197910
    },
    {
      "epoch": 0.6936602564776555,
      "grad_norm": 3.65625,
      "learning_rate": 4.2710110049300215e-05,
      "loss": 0.9364,
      "step": 197920
    },
    {
      "epoch": 0.693695303984551,
      "grad_norm": 2.96875,
      "learning_rate": 4.2709461020636516e-05,
      "loss": 0.9157,
      "step": 197930
    },
    {
      "epoch": 0.6937303514914467,
      "grad_norm": 2.8125,
      "learning_rate": 4.270881199197281e-05,
      "loss": 0.8631,
      "step": 197940
    },
    {
      "epoch": 0.6937653989983422,
      "grad_norm": 2.546875,
      "learning_rate": 4.270816296330911e-05,
      "loss": 0.9871,
      "step": 197950
    },
    {
      "epoch": 0.6938004465052379,
      "grad_norm": 2.78125,
      "learning_rate": 4.2707513934645413e-05,
      "loss": 0.8214,
      "step": 197960
    },
    {
      "epoch": 0.6938354940121334,
      "grad_norm": 2.796875,
      "learning_rate": 4.270686490598171e-05,
      "loss": 0.8962,
      "step": 197970
    },
    {
      "epoch": 0.693870541519029,
      "grad_norm": 3.078125,
      "learning_rate": 4.270621587731801e-05,
      "loss": 0.9624,
      "step": 197980
    },
    {
      "epoch": 0.6939055890259247,
      "grad_norm": 3.359375,
      "learning_rate": 4.2705566848654304e-05,
      "loss": 0.9589,
      "step": 197990
    },
    {
      "epoch": 0.6939406365328202,
      "grad_norm": 2.859375,
      "learning_rate": 4.2704917819990605e-05,
      "loss": 0.9216,
      "step": 198000
    },
    {
      "epoch": 0.6939756840397159,
      "grad_norm": 2.53125,
      "learning_rate": 4.27042687913269e-05,
      "loss": 0.8724,
      "step": 198010
    },
    {
      "epoch": 0.6940107315466114,
      "grad_norm": 2.5625,
      "learning_rate": 4.27036197626632e-05,
      "loss": 0.8249,
      "step": 198020
    },
    {
      "epoch": 0.6940457790535071,
      "grad_norm": 3.90625,
      "learning_rate": 4.2702970733999496e-05,
      "loss": 0.8598,
      "step": 198030
    },
    {
      "epoch": 0.6940808265604026,
      "grad_norm": 3.015625,
      "learning_rate": 4.27023217053358e-05,
      "loss": 0.9016,
      "step": 198040
    },
    {
      "epoch": 0.6941158740672982,
      "grad_norm": 2.84375,
      "learning_rate": 4.270167267667209e-05,
      "loss": 0.8003,
      "step": 198050
    },
    {
      "epoch": 0.6941509215741938,
      "grad_norm": 2.5,
      "learning_rate": 4.2701023648008393e-05,
      "loss": 0.9246,
      "step": 198060
    },
    {
      "epoch": 0.6941859690810894,
      "grad_norm": 3.140625,
      "learning_rate": 4.270037461934469e-05,
      "loss": 0.855,
      "step": 198070
    },
    {
      "epoch": 0.694221016587985,
      "grad_norm": 2.515625,
      "learning_rate": 4.269972559068099e-05,
      "loss": 0.9361,
      "step": 198080
    },
    {
      "epoch": 0.6942560640948806,
      "grad_norm": 3.359375,
      "learning_rate": 4.2699076562017284e-05,
      "loss": 0.9417,
      "step": 198090
    },
    {
      "epoch": 0.6942911116017763,
      "grad_norm": 2.8125,
      "learning_rate": 4.2698427533353585e-05,
      "loss": 0.9468,
      "step": 198100
    },
    {
      "epoch": 0.6943261591086718,
      "grad_norm": 2.9375,
      "learning_rate": 4.269777850468989e-05,
      "loss": 0.8167,
      "step": 198110
    },
    {
      "epoch": 0.6943612066155674,
      "grad_norm": 2.859375,
      "learning_rate": 4.269712947602618e-05,
      "loss": 0.9933,
      "step": 198120
    },
    {
      "epoch": 0.694396254122463,
      "grad_norm": 2.6875,
      "learning_rate": 4.269648044736248e-05,
      "loss": 0.9353,
      "step": 198130
    },
    {
      "epoch": 0.6944313016293586,
      "grad_norm": 2.859375,
      "learning_rate": 4.269583141869878e-05,
      "loss": 0.9427,
      "step": 198140
    },
    {
      "epoch": 0.6944663491362542,
      "grad_norm": 3.21875,
      "learning_rate": 4.269518239003508e-05,
      "loss": 0.9057,
      "step": 198150
    },
    {
      "epoch": 0.6945013966431498,
      "grad_norm": 2.890625,
      "learning_rate": 4.2694533361371373e-05,
      "loss": 0.8682,
      "step": 198160
    },
    {
      "epoch": 0.6945364441500453,
      "grad_norm": 3.171875,
      "learning_rate": 4.2693884332707675e-05,
      "loss": 0.8889,
      "step": 198170
    },
    {
      "epoch": 0.694571491656941,
      "grad_norm": 3.1875,
      "learning_rate": 4.269323530404397e-05,
      "loss": 0.8933,
      "step": 198180
    },
    {
      "epoch": 0.6946065391638366,
      "grad_norm": 2.921875,
      "learning_rate": 4.269258627538027e-05,
      "loss": 0.927,
      "step": 198190
    },
    {
      "epoch": 0.6946415866707322,
      "grad_norm": 3.421875,
      "learning_rate": 4.2691937246716565e-05,
      "loss": 0.8941,
      "step": 198200
    },
    {
      "epoch": 0.6946766341776278,
      "grad_norm": 2.4375,
      "learning_rate": 4.269128821805287e-05,
      "loss": 0.8542,
      "step": 198210
    },
    {
      "epoch": 0.6947116816845234,
      "grad_norm": 3.640625,
      "learning_rate": 4.269063918938916e-05,
      "loss": 0.8664,
      "step": 198220
    },
    {
      "epoch": 0.694746729191419,
      "grad_norm": 2.828125,
      "learning_rate": 4.268999016072546e-05,
      "loss": 0.8864,
      "step": 198230
    },
    {
      "epoch": 0.6947817766983145,
      "grad_norm": 3.140625,
      "learning_rate": 4.2689341132061764e-05,
      "loss": 0.9251,
      "step": 198240
    },
    {
      "epoch": 0.6948168242052102,
      "grad_norm": 3.0,
      "learning_rate": 4.268869210339806e-05,
      "loss": 0.8617,
      "step": 198250
    },
    {
      "epoch": 0.6948518717121057,
      "grad_norm": 3.09375,
      "learning_rate": 4.268804307473436e-05,
      "loss": 0.9709,
      "step": 198260
    },
    {
      "epoch": 0.6948869192190014,
      "grad_norm": 2.953125,
      "learning_rate": 4.268739404607065e-05,
      "loss": 0.9661,
      "step": 198270
    },
    {
      "epoch": 0.6949219667258969,
      "grad_norm": 3.046875,
      "learning_rate": 4.268674501740695e-05,
      "loss": 0.9275,
      "step": 198280
    },
    {
      "epoch": 0.6949570142327925,
      "grad_norm": 2.90625,
      "learning_rate": 4.2686095988743244e-05,
      "loss": 0.9362,
      "step": 198290
    },
    {
      "epoch": 0.6949920617396882,
      "grad_norm": 2.96875,
      "learning_rate": 4.2685446960079545e-05,
      "loss": 0.8721,
      "step": 198300
    },
    {
      "epoch": 0.6950271092465837,
      "grad_norm": 3.125,
      "learning_rate": 4.268479793141584e-05,
      "loss": 0.9041,
      "step": 198310
    },
    {
      "epoch": 0.6950621567534794,
      "grad_norm": 3.203125,
      "learning_rate": 4.268414890275214e-05,
      "loss": 0.8895,
      "step": 198320
    },
    {
      "epoch": 0.6950972042603749,
      "grad_norm": 3.171875,
      "learning_rate": 4.268349987408844e-05,
      "loss": 0.888,
      "step": 198330
    },
    {
      "epoch": 0.6951322517672706,
      "grad_norm": 3.015625,
      "learning_rate": 4.268285084542474e-05,
      "loss": 0.8246,
      "step": 198340
    },
    {
      "epoch": 0.6951672992741661,
      "grad_norm": 2.5,
      "learning_rate": 4.268220181676104e-05,
      "loss": 0.7942,
      "step": 198350
    },
    {
      "epoch": 0.6952023467810617,
      "grad_norm": 2.921875,
      "learning_rate": 4.2681552788097333e-05,
      "loss": 0.8634,
      "step": 198360
    },
    {
      "epoch": 0.6952373942879573,
      "grad_norm": 3.234375,
      "learning_rate": 4.2680903759433635e-05,
      "loss": 0.9157,
      "step": 198370
    },
    {
      "epoch": 0.6952724417948529,
      "grad_norm": 2.765625,
      "learning_rate": 4.268025473076993e-05,
      "loss": 0.8325,
      "step": 198380
    },
    {
      "epoch": 0.6953074893017486,
      "grad_norm": 2.671875,
      "learning_rate": 4.267960570210623e-05,
      "loss": 0.9135,
      "step": 198390
    },
    {
      "epoch": 0.6953425368086441,
      "grad_norm": 2.5625,
      "learning_rate": 4.2678956673442525e-05,
      "loss": 0.9025,
      "step": 198400
    },
    {
      "epoch": 0.6953775843155398,
      "grad_norm": 2.828125,
      "learning_rate": 4.267830764477883e-05,
      "loss": 0.9634,
      "step": 198410
    },
    {
      "epoch": 0.6954126318224353,
      "grad_norm": 3.21875,
      "learning_rate": 4.267765861611512e-05,
      "loss": 0.9174,
      "step": 198420
    },
    {
      "epoch": 0.6954476793293309,
      "grad_norm": 2.546875,
      "learning_rate": 4.267700958745142e-05,
      "loss": 0.8569,
      "step": 198430
    },
    {
      "epoch": 0.6954827268362265,
      "grad_norm": 3.140625,
      "learning_rate": 4.267636055878772e-05,
      "loss": 0.8715,
      "step": 198440
    },
    {
      "epoch": 0.6955177743431221,
      "grad_norm": 3.28125,
      "learning_rate": 4.267571153012402e-05,
      "loss": 0.9157,
      "step": 198450
    },
    {
      "epoch": 0.6955528218500177,
      "grad_norm": 2.6875,
      "learning_rate": 4.2675062501460313e-05,
      "loss": 0.9019,
      "step": 198460
    },
    {
      "epoch": 0.6955878693569133,
      "grad_norm": 2.921875,
      "learning_rate": 4.2674413472796615e-05,
      "loss": 0.8455,
      "step": 198470
    },
    {
      "epoch": 0.6956229168638088,
      "grad_norm": 2.71875,
      "learning_rate": 4.2673764444132916e-05,
      "loss": 0.945,
      "step": 198480
    },
    {
      "epoch": 0.6956579643707045,
      "grad_norm": 3.109375,
      "learning_rate": 4.267311541546921e-05,
      "loss": 0.924,
      "step": 198490
    },
    {
      "epoch": 0.6956930118776001,
      "grad_norm": 2.96875,
      "learning_rate": 4.267246638680551e-05,
      "loss": 0.8513,
      "step": 198500
    },
    {
      "epoch": 0.6957280593844957,
      "grad_norm": 3.625,
      "learning_rate": 4.267181735814181e-05,
      "loss": 0.8583,
      "step": 198510
    },
    {
      "epoch": 0.6957631068913913,
      "grad_norm": 3.140625,
      "learning_rate": 4.267116832947811e-05,
      "loss": 0.9117,
      "step": 198520
    },
    {
      "epoch": 0.6957981543982868,
      "grad_norm": 3.21875,
      "learning_rate": 4.26705193008144e-05,
      "loss": 0.948,
      "step": 198530
    },
    {
      "epoch": 0.6958332019051825,
      "grad_norm": 3.140625,
      "learning_rate": 4.2669870272150704e-05,
      "loss": 0.9598,
      "step": 198540
    },
    {
      "epoch": 0.695868249412078,
      "grad_norm": 3.296875,
      "learning_rate": 4.2669221243487e-05,
      "loss": 0.9116,
      "step": 198550
    },
    {
      "epoch": 0.6959032969189737,
      "grad_norm": 2.671875,
      "learning_rate": 4.26685722148233e-05,
      "loss": 0.8739,
      "step": 198560
    },
    {
      "epoch": 0.6959383444258692,
      "grad_norm": 2.625,
      "learning_rate": 4.2667923186159595e-05,
      "loss": 0.7932,
      "step": 198570
    },
    {
      "epoch": 0.6959733919327649,
      "grad_norm": 3.390625,
      "learning_rate": 4.2667274157495896e-05,
      "loss": 0.9419,
      "step": 198580
    },
    {
      "epoch": 0.6960084394396605,
      "grad_norm": 3.53125,
      "learning_rate": 4.266662512883219e-05,
      "loss": 0.9339,
      "step": 198590
    },
    {
      "epoch": 0.696043486946556,
      "grad_norm": 2.8125,
      "learning_rate": 4.266597610016849e-05,
      "loss": 0.9226,
      "step": 198600
    },
    {
      "epoch": 0.6960785344534517,
      "grad_norm": 3.265625,
      "learning_rate": 4.2665327071504794e-05,
      "loss": 0.9527,
      "step": 198610
    },
    {
      "epoch": 0.6961135819603472,
      "grad_norm": 3.265625,
      "learning_rate": 4.266467804284109e-05,
      "loss": 0.8834,
      "step": 198620
    },
    {
      "epoch": 0.6961486294672429,
      "grad_norm": 2.734375,
      "learning_rate": 4.266402901417739e-05,
      "loss": 0.9395,
      "step": 198630
    },
    {
      "epoch": 0.6961836769741384,
      "grad_norm": 2.734375,
      "learning_rate": 4.266337998551368e-05,
      "loss": 0.9109,
      "step": 198640
    },
    {
      "epoch": 0.6962187244810341,
      "grad_norm": 2.90625,
      "learning_rate": 4.266273095684998e-05,
      "loss": 0.8393,
      "step": 198650
    },
    {
      "epoch": 0.6962537719879296,
      "grad_norm": 2.765625,
      "learning_rate": 4.2662081928186273e-05,
      "loss": 0.8775,
      "step": 198660
    },
    {
      "epoch": 0.6962888194948252,
      "grad_norm": 3.09375,
      "learning_rate": 4.2661432899522575e-05,
      "loss": 0.9127,
      "step": 198670
    },
    {
      "epoch": 0.6963238670017209,
      "grad_norm": 3.0,
      "learning_rate": 4.266078387085887e-05,
      "loss": 0.948,
      "step": 198680
    },
    {
      "epoch": 0.6963589145086164,
      "grad_norm": 2.8125,
      "learning_rate": 4.266013484219517e-05,
      "loss": 0.8701,
      "step": 198690
    },
    {
      "epoch": 0.6963939620155121,
      "grad_norm": 2.421875,
      "learning_rate": 4.265948581353147e-05,
      "loss": 0.9561,
      "step": 198700
    },
    {
      "epoch": 0.6964290095224076,
      "grad_norm": 3.40625,
      "learning_rate": 4.265883678486777e-05,
      "loss": 0.8639,
      "step": 198710
    },
    {
      "epoch": 0.6964640570293033,
      "grad_norm": 2.734375,
      "learning_rate": 4.265818775620407e-05,
      "loss": 0.9298,
      "step": 198720
    },
    {
      "epoch": 0.6964991045361988,
      "grad_norm": 2.78125,
      "learning_rate": 4.265753872754036e-05,
      "loss": 0.8248,
      "step": 198730
    },
    {
      "epoch": 0.6965341520430944,
      "grad_norm": 2.84375,
      "learning_rate": 4.2656889698876664e-05,
      "loss": 0.9503,
      "step": 198740
    },
    {
      "epoch": 0.69656919954999,
      "grad_norm": 3.40625,
      "learning_rate": 4.265624067021296e-05,
      "loss": 0.9775,
      "step": 198750
    },
    {
      "epoch": 0.6966042470568856,
      "grad_norm": 2.953125,
      "learning_rate": 4.265559164154926e-05,
      "loss": 0.9351,
      "step": 198760
    },
    {
      "epoch": 0.6966392945637812,
      "grad_norm": 2.75,
      "learning_rate": 4.2654942612885555e-05,
      "loss": 0.8998,
      "step": 198770
    },
    {
      "epoch": 0.6966743420706768,
      "grad_norm": 2.890625,
      "learning_rate": 4.2654293584221856e-05,
      "loss": 0.9016,
      "step": 198780
    },
    {
      "epoch": 0.6967093895775724,
      "grad_norm": 3.0,
      "learning_rate": 4.265364455555815e-05,
      "loss": 0.9494,
      "step": 198790
    },
    {
      "epoch": 0.696744437084468,
      "grad_norm": 2.9375,
      "learning_rate": 4.265299552689445e-05,
      "loss": 0.9489,
      "step": 198800
    },
    {
      "epoch": 0.6967794845913636,
      "grad_norm": 2.65625,
      "learning_rate": 4.265234649823075e-05,
      "loss": 0.877,
      "step": 198810
    },
    {
      "epoch": 0.6968145320982592,
      "grad_norm": 2.75,
      "learning_rate": 4.265169746956705e-05,
      "loss": 0.8713,
      "step": 198820
    },
    {
      "epoch": 0.6968495796051548,
      "grad_norm": 3.046875,
      "learning_rate": 4.265104844090334e-05,
      "loss": 0.9327,
      "step": 198830
    },
    {
      "epoch": 0.6968846271120503,
      "grad_norm": 2.96875,
      "learning_rate": 4.2650399412239644e-05,
      "loss": 0.9672,
      "step": 198840
    },
    {
      "epoch": 0.696919674618946,
      "grad_norm": 3.0625,
      "learning_rate": 4.2649750383575946e-05,
      "loss": 0.9744,
      "step": 198850
    },
    {
      "epoch": 0.6969547221258415,
      "grad_norm": 3.3125,
      "learning_rate": 4.264910135491224e-05,
      "loss": 0.9239,
      "step": 198860
    },
    {
      "epoch": 0.6969897696327372,
      "grad_norm": 2.828125,
      "learning_rate": 4.264845232624854e-05,
      "loss": 0.8958,
      "step": 198870
    },
    {
      "epoch": 0.6970248171396328,
      "grad_norm": 3.1875,
      "learning_rate": 4.2647803297584836e-05,
      "loss": 0.8746,
      "step": 198880
    },
    {
      "epoch": 0.6970598646465284,
      "grad_norm": 3.625,
      "learning_rate": 4.264715426892114e-05,
      "loss": 0.8928,
      "step": 198890
    },
    {
      "epoch": 0.697094912153424,
      "grad_norm": 2.953125,
      "learning_rate": 4.264650524025743e-05,
      "loss": 0.8472,
      "step": 198900
    },
    {
      "epoch": 0.6971299596603195,
      "grad_norm": 2.671875,
      "learning_rate": 4.2645856211593734e-05,
      "loss": 0.8647,
      "step": 198910
    },
    {
      "epoch": 0.6971650071672152,
      "grad_norm": 2.859375,
      "learning_rate": 4.264520718293003e-05,
      "loss": 0.8987,
      "step": 198920
    },
    {
      "epoch": 0.6972000546741107,
      "grad_norm": 3.21875,
      "learning_rate": 4.264455815426633e-05,
      "loss": 0.8729,
      "step": 198930
    },
    {
      "epoch": 0.6972351021810064,
      "grad_norm": 2.984375,
      "learning_rate": 4.2643909125602624e-05,
      "loss": 0.8845,
      "step": 198940
    },
    {
      "epoch": 0.6972701496879019,
      "grad_norm": 3.28125,
      "learning_rate": 4.2643260096938926e-05,
      "loss": 0.898,
      "step": 198950
    },
    {
      "epoch": 0.6973051971947976,
      "grad_norm": 2.875,
      "learning_rate": 4.264261106827522e-05,
      "loss": 0.9044,
      "step": 198960
    },
    {
      "epoch": 0.6973402447016931,
      "grad_norm": 2.859375,
      "learning_rate": 4.264196203961152e-05,
      "loss": 0.9412,
      "step": 198970
    },
    {
      "epoch": 0.6973752922085887,
      "grad_norm": 2.796875,
      "learning_rate": 4.264131301094782e-05,
      "loss": 0.8925,
      "step": 198980
    },
    {
      "epoch": 0.6974103397154844,
      "grad_norm": 3.484375,
      "learning_rate": 4.264066398228412e-05,
      "loss": 0.9419,
      "step": 198990
    },
    {
      "epoch": 0.6974453872223799,
      "grad_norm": 2.8125,
      "learning_rate": 4.264001495362042e-05,
      "loss": 0.8639,
      "step": 199000
    },
    {
      "epoch": 0.6974804347292756,
      "grad_norm": 3.375,
      "learning_rate": 4.2639365924956714e-05,
      "loss": 0.88,
      "step": 199010
    },
    {
      "epoch": 0.6975154822361711,
      "grad_norm": 2.8125,
      "learning_rate": 4.263871689629301e-05,
      "loss": 0.8427,
      "step": 199020
    },
    {
      "epoch": 0.6975505297430667,
      "grad_norm": 2.953125,
      "learning_rate": 4.26380678676293e-05,
      "loss": 0.9559,
      "step": 199030
    },
    {
      "epoch": 0.6975855772499623,
      "grad_norm": 2.703125,
      "learning_rate": 4.2637418838965604e-05,
      "loss": 0.9021,
      "step": 199040
    },
    {
      "epoch": 0.6976206247568579,
      "grad_norm": 2.828125,
      "learning_rate": 4.26367698103019e-05,
      "loss": 0.8869,
      "step": 199050
    },
    {
      "epoch": 0.6976556722637535,
      "grad_norm": 2.6875,
      "learning_rate": 4.26361207816382e-05,
      "loss": 0.8741,
      "step": 199060
    },
    {
      "epoch": 0.6976907197706491,
      "grad_norm": 2.734375,
      "learning_rate": 4.26354717529745e-05,
      "loss": 0.9491,
      "step": 199070
    },
    {
      "epoch": 0.6977257672775448,
      "grad_norm": 2.609375,
      "learning_rate": 4.2634822724310796e-05,
      "loss": 0.8745,
      "step": 199080
    },
    {
      "epoch": 0.6977608147844403,
      "grad_norm": 3.203125,
      "learning_rate": 4.26341736956471e-05,
      "loss": 0.9839,
      "step": 199090
    },
    {
      "epoch": 0.6977958622913359,
      "grad_norm": 3.109375,
      "learning_rate": 4.263352466698339e-05,
      "loss": 0.8744,
      "step": 199100
    },
    {
      "epoch": 0.6978309097982315,
      "grad_norm": 2.546875,
      "learning_rate": 4.2632875638319694e-05,
      "loss": 0.9176,
      "step": 199110
    },
    {
      "epoch": 0.6978659573051271,
      "grad_norm": 3.1875,
      "learning_rate": 4.263222660965599e-05,
      "loss": 0.9146,
      "step": 199120
    },
    {
      "epoch": 0.6979010048120227,
      "grad_norm": 3.15625,
      "learning_rate": 4.263157758099229e-05,
      "loss": 0.8551,
      "step": 199130
    },
    {
      "epoch": 0.6979360523189183,
      "grad_norm": 3.28125,
      "learning_rate": 4.2630928552328584e-05,
      "loss": 0.9631,
      "step": 199140
    },
    {
      "epoch": 0.6979710998258138,
      "grad_norm": 3.296875,
      "learning_rate": 4.2630279523664886e-05,
      "loss": 0.9037,
      "step": 199150
    },
    {
      "epoch": 0.6980061473327095,
      "grad_norm": 3.015625,
      "learning_rate": 4.262963049500118e-05,
      "loss": 0.8692,
      "step": 199160
    },
    {
      "epoch": 0.6980411948396051,
      "grad_norm": 2.46875,
      "learning_rate": 4.262898146633748e-05,
      "loss": 0.9296,
      "step": 199170
    },
    {
      "epoch": 0.6980762423465007,
      "grad_norm": 2.8125,
      "learning_rate": 4.2628332437673776e-05,
      "loss": 0.8535,
      "step": 199180
    },
    {
      "epoch": 0.6981112898533963,
      "grad_norm": 3.09375,
      "learning_rate": 4.262768340901008e-05,
      "loss": 0.896,
      "step": 199190
    },
    {
      "epoch": 0.6981463373602919,
      "grad_norm": 3.21875,
      "learning_rate": 4.262703438034638e-05,
      "loss": 0.9138,
      "step": 199200
    },
    {
      "epoch": 0.6981813848671875,
      "grad_norm": 3.09375,
      "learning_rate": 4.2626385351682674e-05,
      "loss": 0.8701,
      "step": 199210
    },
    {
      "epoch": 0.698216432374083,
      "grad_norm": 2.828125,
      "learning_rate": 4.2625736323018975e-05,
      "loss": 0.9111,
      "step": 199220
    },
    {
      "epoch": 0.6982514798809787,
      "grad_norm": 3.25,
      "learning_rate": 4.262508729435527e-05,
      "loss": 0.9634,
      "step": 199230
    },
    {
      "epoch": 0.6982865273878742,
      "grad_norm": 2.984375,
      "learning_rate": 4.262443826569157e-05,
      "loss": 0.8849,
      "step": 199240
    },
    {
      "epoch": 0.6983215748947699,
      "grad_norm": 3.015625,
      "learning_rate": 4.2623789237027866e-05,
      "loss": 0.8505,
      "step": 199250
    },
    {
      "epoch": 0.6983566224016654,
      "grad_norm": 2.890625,
      "learning_rate": 4.262314020836417e-05,
      "loss": 0.8626,
      "step": 199260
    },
    {
      "epoch": 0.698391669908561,
      "grad_norm": 3.0,
      "learning_rate": 4.262249117970046e-05,
      "loss": 0.9699,
      "step": 199270
    },
    {
      "epoch": 0.6984267174154567,
      "grad_norm": 2.828125,
      "learning_rate": 4.262184215103676e-05,
      "loss": 0.7683,
      "step": 199280
    },
    {
      "epoch": 0.6984617649223522,
      "grad_norm": 2.96875,
      "learning_rate": 4.262119312237306e-05,
      "loss": 1.0019,
      "step": 199290
    },
    {
      "epoch": 0.6984968124292479,
      "grad_norm": 3.0,
      "learning_rate": 4.262054409370936e-05,
      "loss": 0.8844,
      "step": 199300
    },
    {
      "epoch": 0.6985318599361434,
      "grad_norm": 3.125,
      "learning_rate": 4.2619895065045654e-05,
      "loss": 0.9023,
      "step": 199310
    },
    {
      "epoch": 0.6985669074430391,
      "grad_norm": 3.296875,
      "learning_rate": 4.2619246036381955e-05,
      "loss": 0.9063,
      "step": 199320
    },
    {
      "epoch": 0.6986019549499346,
      "grad_norm": 2.703125,
      "learning_rate": 4.261859700771825e-05,
      "loss": 0.9363,
      "step": 199330
    },
    {
      "epoch": 0.6986370024568302,
      "grad_norm": 2.453125,
      "learning_rate": 4.261794797905455e-05,
      "loss": 0.9113,
      "step": 199340
    },
    {
      "epoch": 0.6986720499637258,
      "grad_norm": 2.84375,
      "learning_rate": 4.261729895039085e-05,
      "loss": 0.9545,
      "step": 199350
    },
    {
      "epoch": 0.6987070974706214,
      "grad_norm": 2.875,
      "learning_rate": 4.261664992172715e-05,
      "loss": 1.0044,
      "step": 199360
    },
    {
      "epoch": 0.6987421449775171,
      "grad_norm": 3.109375,
      "learning_rate": 4.261600089306345e-05,
      "loss": 0.9074,
      "step": 199370
    },
    {
      "epoch": 0.6987771924844126,
      "grad_norm": 2.890625,
      "learning_rate": 4.261535186439974e-05,
      "loss": 0.8791,
      "step": 199380
    },
    {
      "epoch": 0.6988122399913083,
      "grad_norm": 3.015625,
      "learning_rate": 4.261470283573604e-05,
      "loss": 0.9743,
      "step": 199390
    },
    {
      "epoch": 0.6988472874982038,
      "grad_norm": 3.53125,
      "learning_rate": 4.261405380707233e-05,
      "loss": 0.9255,
      "step": 199400
    },
    {
      "epoch": 0.6988823350050994,
      "grad_norm": 3.34375,
      "learning_rate": 4.2613404778408634e-05,
      "loss": 0.9436,
      "step": 199410
    },
    {
      "epoch": 0.698917382511995,
      "grad_norm": 3.765625,
      "learning_rate": 4.261275574974493e-05,
      "loss": 0.8794,
      "step": 199420
    },
    {
      "epoch": 0.6989524300188906,
      "grad_norm": 3.453125,
      "learning_rate": 4.261210672108123e-05,
      "loss": 0.9547,
      "step": 199430
    },
    {
      "epoch": 0.6989874775257862,
      "grad_norm": 3.109375,
      "learning_rate": 4.261145769241753e-05,
      "loss": 0.8557,
      "step": 199440
    },
    {
      "epoch": 0.6990225250326818,
      "grad_norm": 2.625,
      "learning_rate": 4.2610808663753826e-05,
      "loss": 0.9195,
      "step": 199450
    },
    {
      "epoch": 0.6990575725395773,
      "grad_norm": 3.28125,
      "learning_rate": 4.261015963509013e-05,
      "loss": 0.915,
      "step": 199460
    },
    {
      "epoch": 0.699092620046473,
      "grad_norm": 2.9375,
      "learning_rate": 4.260951060642642e-05,
      "loss": 0.911,
      "step": 199470
    },
    {
      "epoch": 0.6991276675533686,
      "grad_norm": 2.6875,
      "learning_rate": 4.260886157776272e-05,
      "loss": 0.9085,
      "step": 199480
    },
    {
      "epoch": 0.6991627150602642,
      "grad_norm": 2.890625,
      "learning_rate": 4.260821254909902e-05,
      "loss": 0.8636,
      "step": 199490
    },
    {
      "epoch": 0.6991977625671598,
      "grad_norm": 2.5,
      "learning_rate": 4.260756352043532e-05,
      "loss": 0.8343,
      "step": 199500
    },
    {
      "epoch": 0.6992328100740554,
      "grad_norm": 3.046875,
      "learning_rate": 4.2606914491771614e-05,
      "loss": 0.9397,
      "step": 199510
    },
    {
      "epoch": 0.699267857580951,
      "grad_norm": 3.0,
      "learning_rate": 4.2606265463107915e-05,
      "loss": 0.9719,
      "step": 199520
    },
    {
      "epoch": 0.6993029050878465,
      "grad_norm": 2.78125,
      "learning_rate": 4.260561643444421e-05,
      "loss": 0.9286,
      "step": 199530
    },
    {
      "epoch": 0.6993379525947422,
      "grad_norm": 2.390625,
      "learning_rate": 4.260496740578051e-05,
      "loss": 0.8893,
      "step": 199540
    },
    {
      "epoch": 0.6993730001016377,
      "grad_norm": 3.6875,
      "learning_rate": 4.2604318377116806e-05,
      "loss": 0.9563,
      "step": 199550
    },
    {
      "epoch": 0.6994080476085334,
      "grad_norm": 3.203125,
      "learning_rate": 4.260366934845311e-05,
      "loss": 0.863,
      "step": 199560
    },
    {
      "epoch": 0.699443095115429,
      "grad_norm": 2.890625,
      "learning_rate": 4.260302031978941e-05,
      "loss": 0.8134,
      "step": 199570
    },
    {
      "epoch": 0.6994781426223246,
      "grad_norm": 3.40625,
      "learning_rate": 4.26023712911257e-05,
      "loss": 0.9747,
      "step": 199580
    },
    {
      "epoch": 0.6995131901292202,
      "grad_norm": 3.25,
      "learning_rate": 4.2601722262462004e-05,
      "loss": 0.9294,
      "step": 199590
    },
    {
      "epoch": 0.6995482376361157,
      "grad_norm": 2.9375,
      "learning_rate": 4.26010732337983e-05,
      "loss": 0.9769,
      "step": 199600
    },
    {
      "epoch": 0.6995832851430114,
      "grad_norm": 2.921875,
      "learning_rate": 4.26004242051346e-05,
      "loss": 0.8559,
      "step": 199610
    },
    {
      "epoch": 0.6996183326499069,
      "grad_norm": 3.046875,
      "learning_rate": 4.2599775176470895e-05,
      "loss": 0.8838,
      "step": 199620
    },
    {
      "epoch": 0.6996533801568026,
      "grad_norm": 2.9375,
      "learning_rate": 4.2599126147807196e-05,
      "loss": 0.8233,
      "step": 199630
    },
    {
      "epoch": 0.6996884276636981,
      "grad_norm": 3.1875,
      "learning_rate": 4.259847711914349e-05,
      "loss": 0.8955,
      "step": 199640
    },
    {
      "epoch": 0.6997234751705937,
      "grad_norm": 3.0625,
      "learning_rate": 4.259782809047979e-05,
      "loss": 0.9079,
      "step": 199650
    },
    {
      "epoch": 0.6997585226774894,
      "grad_norm": 2.90625,
      "learning_rate": 4.259717906181609e-05,
      "loss": 0.8968,
      "step": 199660
    },
    {
      "epoch": 0.6997935701843849,
      "grad_norm": 2.828125,
      "learning_rate": 4.259653003315239e-05,
      "loss": 0.8003,
      "step": 199670
    },
    {
      "epoch": 0.6998286176912806,
      "grad_norm": 2.890625,
      "learning_rate": 4.259588100448868e-05,
      "loss": 0.9004,
      "step": 199680
    },
    {
      "epoch": 0.6998636651981761,
      "grad_norm": 2.90625,
      "learning_rate": 4.2595231975824984e-05,
      "loss": 0.8524,
      "step": 199690
    },
    {
      "epoch": 0.6998987127050718,
      "grad_norm": 3.21875,
      "learning_rate": 4.259458294716128e-05,
      "loss": 0.9472,
      "step": 199700
    },
    {
      "epoch": 0.6999337602119673,
      "grad_norm": 3.171875,
      "learning_rate": 4.259393391849758e-05,
      "loss": 0.8199,
      "step": 199710
    },
    {
      "epoch": 0.6999688077188629,
      "grad_norm": 3.390625,
      "learning_rate": 4.259328488983388e-05,
      "loss": 0.9586,
      "step": 199720
    },
    {
      "epoch": 0.7000038552257585,
      "grad_norm": 3.0,
      "learning_rate": 4.2592635861170176e-05,
      "loss": 0.9371,
      "step": 199730
    },
    {
      "epoch": 0.7000389027326541,
      "grad_norm": 2.859375,
      "learning_rate": 4.259198683250648e-05,
      "loss": 0.8707,
      "step": 199740
    },
    {
      "epoch": 0.7000739502395497,
      "grad_norm": 3.03125,
      "learning_rate": 4.259133780384277e-05,
      "loss": 0.9133,
      "step": 199750
    },
    {
      "epoch": 0.7001089977464453,
      "grad_norm": 3.015625,
      "learning_rate": 4.2590688775179074e-05,
      "loss": 0.9928,
      "step": 199760
    },
    {
      "epoch": 0.700144045253341,
      "grad_norm": 3.359375,
      "learning_rate": 4.259003974651536e-05,
      "loss": 0.9387,
      "step": 199770
    },
    {
      "epoch": 0.7001790927602365,
      "grad_norm": 3.390625,
      "learning_rate": 4.258939071785166e-05,
      "loss": 0.8966,
      "step": 199780
    },
    {
      "epoch": 0.7002141402671321,
      "grad_norm": 3.125,
      "learning_rate": 4.258874168918796e-05,
      "loss": 0.8751,
      "step": 199790
    },
    {
      "epoch": 0.7002491877740277,
      "grad_norm": 3.5625,
      "learning_rate": 4.258809266052426e-05,
      "loss": 0.8775,
      "step": 199800
    },
    {
      "epoch": 0.7002842352809233,
      "grad_norm": 3.390625,
      "learning_rate": 4.258744363186056e-05,
      "loss": 0.9417,
      "step": 199810
    },
    {
      "epoch": 0.7003192827878189,
      "grad_norm": 2.671875,
      "learning_rate": 4.2586794603196855e-05,
      "loss": 0.8899,
      "step": 199820
    },
    {
      "epoch": 0.7003543302947145,
      "grad_norm": 3.015625,
      "learning_rate": 4.2586145574533156e-05,
      "loss": 0.9084,
      "step": 199830
    },
    {
      "epoch": 0.70038937780161,
      "grad_norm": 2.625,
      "learning_rate": 4.258549654586945e-05,
      "loss": 0.8713,
      "step": 199840
    },
    {
      "epoch": 0.7004244253085057,
      "grad_norm": 3.109375,
      "learning_rate": 4.258484751720575e-05,
      "loss": 0.8831,
      "step": 199850
    },
    {
      "epoch": 0.7004594728154013,
      "grad_norm": 2.796875,
      "learning_rate": 4.258419848854205e-05,
      "loss": 0.8777,
      "step": 199860
    },
    {
      "epoch": 0.7004945203222969,
      "grad_norm": 2.734375,
      "learning_rate": 4.258354945987835e-05,
      "loss": 0.8947,
      "step": 199870
    },
    {
      "epoch": 0.7005295678291925,
      "grad_norm": 3.15625,
      "learning_rate": 4.258290043121464e-05,
      "loss": 0.9762,
      "step": 199880
    },
    {
      "epoch": 0.700564615336088,
      "grad_norm": 2.796875,
      "learning_rate": 4.2582251402550944e-05,
      "loss": 0.8883,
      "step": 199890
    },
    {
      "epoch": 0.7005996628429837,
      "grad_norm": 2.984375,
      "learning_rate": 4.258160237388724e-05,
      "loss": 0.9413,
      "step": 199900
    },
    {
      "epoch": 0.7006347103498792,
      "grad_norm": 3.1875,
      "learning_rate": 4.258095334522354e-05,
      "loss": 0.8494,
      "step": 199910
    },
    {
      "epoch": 0.7006697578567749,
      "grad_norm": 2.953125,
      "learning_rate": 4.2580304316559835e-05,
      "loss": 0.8887,
      "step": 199920
    },
    {
      "epoch": 0.7007048053636704,
      "grad_norm": 3.171875,
      "learning_rate": 4.2579655287896136e-05,
      "loss": 0.9136,
      "step": 199930
    },
    {
      "epoch": 0.7007398528705661,
      "grad_norm": 3.28125,
      "learning_rate": 4.257900625923244e-05,
      "loss": 0.9686,
      "step": 199940
    },
    {
      "epoch": 0.7007749003774616,
      "grad_norm": 3.03125,
      "learning_rate": 4.257835723056873e-05,
      "loss": 0.8763,
      "step": 199950
    },
    {
      "epoch": 0.7008099478843572,
      "grad_norm": 2.515625,
      "learning_rate": 4.2577708201905034e-05,
      "loss": 0.9158,
      "step": 199960
    },
    {
      "epoch": 0.7008449953912529,
      "grad_norm": 2.640625,
      "learning_rate": 4.257705917324133e-05,
      "loss": 0.9066,
      "step": 199970
    },
    {
      "epoch": 0.7008800428981484,
      "grad_norm": 2.984375,
      "learning_rate": 4.257641014457763e-05,
      "loss": 0.897,
      "step": 199980
    },
    {
      "epoch": 0.7009150904050441,
      "grad_norm": 3.203125,
      "learning_rate": 4.2575761115913924e-05,
      "loss": 0.9316,
      "step": 199990
    },
    {
      "epoch": 0.7009501379119396,
      "grad_norm": 2.875,
      "learning_rate": 4.2575112087250226e-05,
      "loss": 0.926,
      "step": 200000
    },
    {
      "epoch": 0.7009501379119396,
      "eval_loss": 0.845213770866394,
      "eval_runtime": 553.6915,
      "eval_samples_per_second": 687.09,
      "eval_steps_per_second": 57.258,
      "step": 200000
    },
    {
      "epoch": 0.7009851854188353,
      "grad_norm": 2.8125,
      "learning_rate": 4.257446305858652e-05,
      "loss": 0.8942,
      "step": 200010
    },
    {
      "epoch": 0.7010202329257308,
      "grad_norm": 2.65625,
      "learning_rate": 4.257381402992282e-05,
      "loss": 0.9076,
      "step": 200020
    },
    {
      "epoch": 0.7010552804326264,
      "grad_norm": 2.984375,
      "learning_rate": 4.2573165001259116e-05,
      "loss": 0.954,
      "step": 200030
    },
    {
      "epoch": 0.701090327939522,
      "grad_norm": 2.8125,
      "learning_rate": 4.257251597259542e-05,
      "loss": 0.9506,
      "step": 200040
    },
    {
      "epoch": 0.7011253754464176,
      "grad_norm": 2.84375,
      "learning_rate": 4.257186694393171e-05,
      "loss": 0.8833,
      "step": 200050
    },
    {
      "epoch": 0.7011604229533133,
      "grad_norm": 2.953125,
      "learning_rate": 4.2571217915268014e-05,
      "loss": 0.8831,
      "step": 200060
    },
    {
      "epoch": 0.7011954704602088,
      "grad_norm": 2.71875,
      "learning_rate": 4.2570568886604315e-05,
      "loss": 0.8404,
      "step": 200070
    },
    {
      "epoch": 0.7012305179671044,
      "grad_norm": 2.8125,
      "learning_rate": 4.256991985794061e-05,
      "loss": 0.9915,
      "step": 200080
    },
    {
      "epoch": 0.701265565474,
      "grad_norm": 2.71875,
      "learning_rate": 4.256927082927691e-05,
      "loss": 0.8697,
      "step": 200090
    },
    {
      "epoch": 0.7013006129808956,
      "grad_norm": 2.5,
      "learning_rate": 4.2568621800613206e-05,
      "loss": 0.9006,
      "step": 200100
    },
    {
      "epoch": 0.7013356604877912,
      "grad_norm": 2.828125,
      "learning_rate": 4.256797277194951e-05,
      "loss": 0.8735,
      "step": 200110
    },
    {
      "epoch": 0.7013707079946868,
      "grad_norm": 2.625,
      "learning_rate": 4.25673237432858e-05,
      "loss": 0.9047,
      "step": 200120
    },
    {
      "epoch": 0.7014057555015824,
      "grad_norm": 2.46875,
      "learning_rate": 4.25666747146221e-05,
      "loss": 0.8273,
      "step": 200130
    },
    {
      "epoch": 0.701440803008478,
      "grad_norm": 3.28125,
      "learning_rate": 4.25660256859584e-05,
      "loss": 0.9867,
      "step": 200140
    },
    {
      "epoch": 0.7014758505153735,
      "grad_norm": 2.984375,
      "learning_rate": 4.256537665729469e-05,
      "loss": 0.8678,
      "step": 200150
    },
    {
      "epoch": 0.7015108980222692,
      "grad_norm": 2.828125,
      "learning_rate": 4.2564727628630994e-05,
      "loss": 0.871,
      "step": 200160
    },
    {
      "epoch": 0.7015459455291648,
      "grad_norm": 2.859375,
      "learning_rate": 4.256407859996729e-05,
      "loss": 0.8693,
      "step": 200170
    },
    {
      "epoch": 0.7015809930360604,
      "grad_norm": 2.875,
      "learning_rate": 4.256342957130359e-05,
      "loss": 1.0185,
      "step": 200180
    },
    {
      "epoch": 0.701616040542956,
      "grad_norm": 2.953125,
      "learning_rate": 4.2562780542639884e-05,
      "loss": 0.9744,
      "step": 200190
    },
    {
      "epoch": 0.7016510880498515,
      "grad_norm": 2.34375,
      "learning_rate": 4.2562131513976186e-05,
      "loss": 0.8863,
      "step": 200200
    },
    {
      "epoch": 0.7016861355567472,
      "grad_norm": 3.140625,
      "learning_rate": 4.256148248531248e-05,
      "loss": 0.9802,
      "step": 200210
    },
    {
      "epoch": 0.7017211830636427,
      "grad_norm": 3.09375,
      "learning_rate": 4.256083345664878e-05,
      "loss": 0.8755,
      "step": 200220
    },
    {
      "epoch": 0.7017562305705384,
      "grad_norm": 3.15625,
      "learning_rate": 4.2560184427985076e-05,
      "loss": 0.8936,
      "step": 200230
    },
    {
      "epoch": 0.7017912780774339,
      "grad_norm": 2.875,
      "learning_rate": 4.255953539932138e-05,
      "loss": 0.9896,
      "step": 200240
    },
    {
      "epoch": 0.7018263255843296,
      "grad_norm": 3.078125,
      "learning_rate": 4.255888637065767e-05,
      "loss": 0.9298,
      "step": 200250
    },
    {
      "epoch": 0.7018613730912252,
      "grad_norm": 2.578125,
      "learning_rate": 4.2558237341993974e-05,
      "loss": 0.8569,
      "step": 200260
    },
    {
      "epoch": 0.7018964205981207,
      "grad_norm": 2.96875,
      "learning_rate": 4.255758831333027e-05,
      "loss": 0.8772,
      "step": 200270
    },
    {
      "epoch": 0.7019314681050164,
      "grad_norm": 2.5625,
      "learning_rate": 4.255693928466657e-05,
      "loss": 0.8457,
      "step": 200280
    },
    {
      "epoch": 0.7019665156119119,
      "grad_norm": 3.28125,
      "learning_rate": 4.2556290256002864e-05,
      "loss": 0.968,
      "step": 200290
    },
    {
      "epoch": 0.7020015631188076,
      "grad_norm": 3.0625,
      "learning_rate": 4.2555641227339166e-05,
      "loss": 1.0186,
      "step": 200300
    },
    {
      "epoch": 0.7020366106257031,
      "grad_norm": 2.953125,
      "learning_rate": 4.255499219867547e-05,
      "loss": 0.8757,
      "step": 200310
    },
    {
      "epoch": 0.7020716581325988,
      "grad_norm": 3.015625,
      "learning_rate": 4.255434317001176e-05,
      "loss": 0.863,
      "step": 200320
    },
    {
      "epoch": 0.7021067056394943,
      "grad_norm": 3.0,
      "learning_rate": 4.255369414134806e-05,
      "loss": 0.9573,
      "step": 200330
    },
    {
      "epoch": 0.7021417531463899,
      "grad_norm": 3.03125,
      "learning_rate": 4.255304511268436e-05,
      "loss": 1.0348,
      "step": 200340
    },
    {
      "epoch": 0.7021768006532856,
      "grad_norm": 2.890625,
      "learning_rate": 4.255239608402066e-05,
      "loss": 0.9043,
      "step": 200350
    },
    {
      "epoch": 0.7022118481601811,
      "grad_norm": 2.65625,
      "learning_rate": 4.2551747055356954e-05,
      "loss": 0.9533,
      "step": 200360
    },
    {
      "epoch": 0.7022468956670768,
      "grad_norm": 3.03125,
      "learning_rate": 4.2551098026693255e-05,
      "loss": 0.9357,
      "step": 200370
    },
    {
      "epoch": 0.7022819431739723,
      "grad_norm": 2.78125,
      "learning_rate": 4.255044899802955e-05,
      "loss": 0.9264,
      "step": 200380
    },
    {
      "epoch": 0.702316990680868,
      "grad_norm": 3.171875,
      "learning_rate": 4.254979996936585e-05,
      "loss": 0.9081,
      "step": 200390
    },
    {
      "epoch": 0.7023520381877635,
      "grad_norm": 2.8125,
      "learning_rate": 4.2549150940702146e-05,
      "loss": 0.9175,
      "step": 200400
    },
    {
      "epoch": 0.7023870856946591,
      "grad_norm": 2.765625,
      "learning_rate": 4.254850191203845e-05,
      "loss": 0.8604,
      "step": 200410
    },
    {
      "epoch": 0.7024221332015547,
      "grad_norm": 3.0625,
      "learning_rate": 4.254785288337474e-05,
      "loss": 0.9058,
      "step": 200420
    },
    {
      "epoch": 0.7024571807084503,
      "grad_norm": 3.15625,
      "learning_rate": 4.254720385471104e-05,
      "loss": 0.8632,
      "step": 200430
    },
    {
      "epoch": 0.7024922282153458,
      "grad_norm": 2.515625,
      "learning_rate": 4.2546554826047345e-05,
      "loss": 0.8812,
      "step": 200440
    },
    {
      "epoch": 0.7025272757222415,
      "grad_norm": 3.0,
      "learning_rate": 4.254590579738364e-05,
      "loss": 0.9353,
      "step": 200450
    },
    {
      "epoch": 0.7025623232291371,
      "grad_norm": 2.578125,
      "learning_rate": 4.254525676871994e-05,
      "loss": 0.8618,
      "step": 200460
    },
    {
      "epoch": 0.7025973707360327,
      "grad_norm": 2.984375,
      "learning_rate": 4.2544607740056235e-05,
      "loss": 0.8591,
      "step": 200470
    },
    {
      "epoch": 0.7026324182429283,
      "grad_norm": 3.140625,
      "learning_rate": 4.2543958711392537e-05,
      "loss": 0.8833,
      "step": 200480
    },
    {
      "epoch": 0.7026674657498239,
      "grad_norm": 2.890625,
      "learning_rate": 4.254330968272883e-05,
      "loss": 0.9991,
      "step": 200490
    },
    {
      "epoch": 0.7027025132567195,
      "grad_norm": 2.859375,
      "learning_rate": 4.254266065406513e-05,
      "loss": 0.8615,
      "step": 200500
    },
    {
      "epoch": 0.702737560763615,
      "grad_norm": 2.859375,
      "learning_rate": 4.254201162540143e-05,
      "loss": 0.8937,
      "step": 200510
    },
    {
      "epoch": 0.7027726082705107,
      "grad_norm": 2.8125,
      "learning_rate": 4.254136259673772e-05,
      "loss": 0.9442,
      "step": 200520
    },
    {
      "epoch": 0.7028076557774062,
      "grad_norm": 2.859375,
      "learning_rate": 4.254071356807402e-05,
      "loss": 0.8778,
      "step": 200530
    },
    {
      "epoch": 0.7028427032843019,
      "grad_norm": 2.859375,
      "learning_rate": 4.254006453941032e-05,
      "loss": 0.9108,
      "step": 200540
    },
    {
      "epoch": 0.7028777507911975,
      "grad_norm": 3.109375,
      "learning_rate": 4.253941551074662e-05,
      "loss": 0.9696,
      "step": 200550
    },
    {
      "epoch": 0.7029127982980931,
      "grad_norm": 2.96875,
      "learning_rate": 4.2538766482082914e-05,
      "loss": 0.9304,
      "step": 200560
    },
    {
      "epoch": 0.7029478458049887,
      "grad_norm": 2.78125,
      "learning_rate": 4.2538117453419215e-05,
      "loss": 0.8698,
      "step": 200570
    },
    {
      "epoch": 0.7029828933118842,
      "grad_norm": 3.515625,
      "learning_rate": 4.253746842475551e-05,
      "loss": 0.934,
      "step": 200580
    },
    {
      "epoch": 0.7030179408187799,
      "grad_norm": 3.328125,
      "learning_rate": 4.253681939609181e-05,
      "loss": 0.9507,
      "step": 200590
    },
    {
      "epoch": 0.7030529883256754,
      "grad_norm": 3.671875,
      "learning_rate": 4.2536170367428106e-05,
      "loss": 0.9433,
      "step": 200600
    },
    {
      "epoch": 0.7030880358325711,
      "grad_norm": 3.03125,
      "learning_rate": 4.253552133876441e-05,
      "loss": 0.8855,
      "step": 200610
    },
    {
      "epoch": 0.7031230833394666,
      "grad_norm": 3.328125,
      "learning_rate": 4.25348723101007e-05,
      "loss": 0.9468,
      "step": 200620
    },
    {
      "epoch": 0.7031581308463623,
      "grad_norm": 2.609375,
      "learning_rate": 4.2534223281437e-05,
      "loss": 0.8475,
      "step": 200630
    },
    {
      "epoch": 0.7031931783532578,
      "grad_norm": 2.953125,
      "learning_rate": 4.25335742527733e-05,
      "loss": 0.9256,
      "step": 200640
    },
    {
      "epoch": 0.7032282258601534,
      "grad_norm": 3.03125,
      "learning_rate": 4.25329252241096e-05,
      "loss": 0.85,
      "step": 200650
    },
    {
      "epoch": 0.7032632733670491,
      "grad_norm": 2.796875,
      "learning_rate": 4.2532276195445894e-05,
      "loss": 0.844,
      "step": 200660
    },
    {
      "epoch": 0.7032983208739446,
      "grad_norm": 2.9375,
      "learning_rate": 4.2531627166782195e-05,
      "loss": 0.8969,
      "step": 200670
    },
    {
      "epoch": 0.7033333683808403,
      "grad_norm": 3.0,
      "learning_rate": 4.2530978138118497e-05,
      "loss": 0.9125,
      "step": 200680
    },
    {
      "epoch": 0.7033684158877358,
      "grad_norm": 3.03125,
      "learning_rate": 4.253032910945479e-05,
      "loss": 0.8454,
      "step": 200690
    },
    {
      "epoch": 0.7034034633946314,
      "grad_norm": 3.03125,
      "learning_rate": 4.252968008079109e-05,
      "loss": 0.8655,
      "step": 200700
    },
    {
      "epoch": 0.703438510901527,
      "grad_norm": 2.59375,
      "learning_rate": 4.252903105212739e-05,
      "loss": 0.8975,
      "step": 200710
    },
    {
      "epoch": 0.7034735584084226,
      "grad_norm": 2.515625,
      "learning_rate": 4.252838202346369e-05,
      "loss": 0.807,
      "step": 200720
    },
    {
      "epoch": 0.7035086059153182,
      "grad_norm": 2.84375,
      "learning_rate": 4.252773299479998e-05,
      "loss": 0.9058,
      "step": 200730
    },
    {
      "epoch": 0.7035436534222138,
      "grad_norm": 2.921875,
      "learning_rate": 4.2527083966136285e-05,
      "loss": 0.9121,
      "step": 200740
    },
    {
      "epoch": 0.7035787009291095,
      "grad_norm": 2.8125,
      "learning_rate": 4.252643493747258e-05,
      "loss": 0.8832,
      "step": 200750
    },
    {
      "epoch": 0.703613748436005,
      "grad_norm": 3.046875,
      "learning_rate": 4.252578590880888e-05,
      "loss": 0.8996,
      "step": 200760
    },
    {
      "epoch": 0.7036487959429006,
      "grad_norm": 3.234375,
      "learning_rate": 4.2525136880145175e-05,
      "loss": 0.9433,
      "step": 200770
    },
    {
      "epoch": 0.7036838434497962,
      "grad_norm": 3.109375,
      "learning_rate": 4.2524487851481477e-05,
      "loss": 0.9295,
      "step": 200780
    },
    {
      "epoch": 0.7037188909566918,
      "grad_norm": 2.75,
      "learning_rate": 4.252383882281777e-05,
      "loss": 0.9521,
      "step": 200790
    },
    {
      "epoch": 0.7037539384635874,
      "grad_norm": 2.765625,
      "learning_rate": 4.252318979415407e-05,
      "loss": 0.8255,
      "step": 200800
    },
    {
      "epoch": 0.703788985970483,
      "grad_norm": 2.640625,
      "learning_rate": 4.2522540765490374e-05,
      "loss": 0.8158,
      "step": 200810
    },
    {
      "epoch": 0.7038240334773785,
      "grad_norm": 2.828125,
      "learning_rate": 4.252189173682667e-05,
      "loss": 0.9428,
      "step": 200820
    },
    {
      "epoch": 0.7038590809842742,
      "grad_norm": 3.25,
      "learning_rate": 4.252124270816297e-05,
      "loss": 0.9144,
      "step": 200830
    },
    {
      "epoch": 0.7038941284911698,
      "grad_norm": 2.546875,
      "learning_rate": 4.2520593679499265e-05,
      "loss": 0.8743,
      "step": 200840
    },
    {
      "epoch": 0.7039291759980654,
      "grad_norm": 2.625,
      "learning_rate": 4.2519944650835566e-05,
      "loss": 0.7897,
      "step": 200850
    },
    {
      "epoch": 0.703964223504961,
      "grad_norm": 3.046875,
      "learning_rate": 4.251929562217186e-05,
      "loss": 0.9245,
      "step": 200860
    },
    {
      "epoch": 0.7039992710118566,
      "grad_norm": 2.671875,
      "learning_rate": 4.251864659350816e-05,
      "loss": 0.8779,
      "step": 200870
    },
    {
      "epoch": 0.7040343185187522,
      "grad_norm": 3.375,
      "learning_rate": 4.2517997564844457e-05,
      "loss": 0.8712,
      "step": 200880
    },
    {
      "epoch": 0.7040693660256477,
      "grad_norm": 2.921875,
      "learning_rate": 4.251734853618076e-05,
      "loss": 0.8741,
      "step": 200890
    },
    {
      "epoch": 0.7041044135325434,
      "grad_norm": 3.109375,
      "learning_rate": 4.251669950751705e-05,
      "loss": 0.9615,
      "step": 200900
    },
    {
      "epoch": 0.7041394610394389,
      "grad_norm": 2.75,
      "learning_rate": 4.251605047885335e-05,
      "loss": 0.8452,
      "step": 200910
    },
    {
      "epoch": 0.7041745085463346,
      "grad_norm": 2.578125,
      "learning_rate": 4.251540145018965e-05,
      "loss": 0.9277,
      "step": 200920
    },
    {
      "epoch": 0.7042095560532301,
      "grad_norm": 2.84375,
      "learning_rate": 4.251475242152594e-05,
      "loss": 0.9446,
      "step": 200930
    },
    {
      "epoch": 0.7042446035601257,
      "grad_norm": 3.265625,
      "learning_rate": 4.2514103392862244e-05,
      "loss": 0.993,
      "step": 200940
    },
    {
      "epoch": 0.7042796510670214,
      "grad_norm": 2.921875,
      "learning_rate": 4.251345436419854e-05,
      "loss": 0.9169,
      "step": 200950
    },
    {
      "epoch": 0.7043146985739169,
      "grad_norm": 3.015625,
      "learning_rate": 4.251280533553484e-05,
      "loss": 0.8823,
      "step": 200960
    },
    {
      "epoch": 0.7043497460808126,
      "grad_norm": 3.046875,
      "learning_rate": 4.2512156306871135e-05,
      "loss": 0.8597,
      "step": 200970
    },
    {
      "epoch": 0.7043847935877081,
      "grad_norm": 3.359375,
      "learning_rate": 4.2511507278207436e-05,
      "loss": 0.8607,
      "step": 200980
    },
    {
      "epoch": 0.7044198410946038,
      "grad_norm": 2.71875,
      "learning_rate": 4.251085824954373e-05,
      "loss": 0.9956,
      "step": 200990
    },
    {
      "epoch": 0.7044548886014993,
      "grad_norm": 2.640625,
      "learning_rate": 4.251020922088003e-05,
      "loss": 0.9079,
      "step": 201000
    },
    {
      "epoch": 0.7044899361083949,
      "grad_norm": 2.796875,
      "learning_rate": 4.250956019221633e-05,
      "loss": 0.9239,
      "step": 201010
    },
    {
      "epoch": 0.7045249836152905,
      "grad_norm": 3.015625,
      "learning_rate": 4.250891116355263e-05,
      "loss": 0.8999,
      "step": 201020
    },
    {
      "epoch": 0.7045600311221861,
      "grad_norm": 2.53125,
      "learning_rate": 4.250826213488893e-05,
      "loss": 0.849,
      "step": 201030
    },
    {
      "epoch": 0.7045950786290818,
      "grad_norm": 3.21875,
      "learning_rate": 4.2507613106225224e-05,
      "loss": 0.8397,
      "step": 201040
    },
    {
      "epoch": 0.7046301261359773,
      "grad_norm": 2.5625,
      "learning_rate": 4.2506964077561526e-05,
      "loss": 0.9496,
      "step": 201050
    },
    {
      "epoch": 0.704665173642873,
      "grad_norm": 3.09375,
      "learning_rate": 4.250631504889782e-05,
      "loss": 0.8498,
      "step": 201060
    },
    {
      "epoch": 0.7047002211497685,
      "grad_norm": 2.796875,
      "learning_rate": 4.250566602023412e-05,
      "loss": 0.9335,
      "step": 201070
    },
    {
      "epoch": 0.7047352686566641,
      "grad_norm": 2.703125,
      "learning_rate": 4.2505016991570416e-05,
      "loss": 0.9027,
      "step": 201080
    },
    {
      "epoch": 0.7047703161635597,
      "grad_norm": 2.796875,
      "learning_rate": 4.250436796290672e-05,
      "loss": 0.9078,
      "step": 201090
    },
    {
      "epoch": 0.7048053636704553,
      "grad_norm": 3.5,
      "learning_rate": 4.250371893424301e-05,
      "loss": 0.9585,
      "step": 201100
    },
    {
      "epoch": 0.7048404111773509,
      "grad_norm": 2.8125,
      "learning_rate": 4.2503069905579314e-05,
      "loss": 0.9029,
      "step": 201110
    },
    {
      "epoch": 0.7048754586842465,
      "grad_norm": 2.8125,
      "learning_rate": 4.250242087691561e-05,
      "loss": 0.8369,
      "step": 201120
    },
    {
      "epoch": 0.704910506191142,
      "grad_norm": 2.625,
      "learning_rate": 4.250177184825191e-05,
      "loss": 0.8751,
      "step": 201130
    },
    {
      "epoch": 0.7049455536980377,
      "grad_norm": 3.296875,
      "learning_rate": 4.2501122819588204e-05,
      "loss": 0.8938,
      "step": 201140
    },
    {
      "epoch": 0.7049806012049333,
      "grad_norm": 3.375,
      "learning_rate": 4.2500473790924506e-05,
      "loss": 0.9798,
      "step": 201150
    },
    {
      "epoch": 0.7050156487118289,
      "grad_norm": 3.15625,
      "learning_rate": 4.24998247622608e-05,
      "loss": 0.8854,
      "step": 201160
    },
    {
      "epoch": 0.7050506962187245,
      "grad_norm": 2.96875,
      "learning_rate": 4.24991757335971e-05,
      "loss": 0.8866,
      "step": 201170
    },
    {
      "epoch": 0.70508574372562,
      "grad_norm": 2.734375,
      "learning_rate": 4.24985267049334e-05,
      "loss": 0.8724,
      "step": 201180
    },
    {
      "epoch": 0.7051207912325157,
      "grad_norm": 3.296875,
      "learning_rate": 4.24978776762697e-05,
      "loss": 0.9164,
      "step": 201190
    },
    {
      "epoch": 0.7051558387394112,
      "grad_norm": 2.921875,
      "learning_rate": 4.2497228647606e-05,
      "loss": 0.9021,
      "step": 201200
    },
    {
      "epoch": 0.7051908862463069,
      "grad_norm": 3.09375,
      "learning_rate": 4.2496579618942294e-05,
      "loss": 0.8953,
      "step": 201210
    },
    {
      "epoch": 0.7052259337532024,
      "grad_norm": 2.71875,
      "learning_rate": 4.2495930590278595e-05,
      "loss": 0.9009,
      "step": 201220
    },
    {
      "epoch": 0.7052609812600981,
      "grad_norm": 2.4375,
      "learning_rate": 4.249528156161489e-05,
      "loss": 0.8235,
      "step": 201230
    },
    {
      "epoch": 0.7052960287669937,
      "grad_norm": 3.109375,
      "learning_rate": 4.249463253295119e-05,
      "loss": 0.8849,
      "step": 201240
    },
    {
      "epoch": 0.7053310762738892,
      "grad_norm": 2.9375,
      "learning_rate": 4.2493983504287486e-05,
      "loss": 0.962,
      "step": 201250
    },
    {
      "epoch": 0.7053661237807849,
      "grad_norm": 2.640625,
      "learning_rate": 4.249333447562379e-05,
      "loss": 0.9417,
      "step": 201260
    },
    {
      "epoch": 0.7054011712876804,
      "grad_norm": 3.25,
      "learning_rate": 4.249268544696008e-05,
      "loss": 0.947,
      "step": 201270
    },
    {
      "epoch": 0.7054362187945761,
      "grad_norm": 3.4375,
      "learning_rate": 4.2492036418296376e-05,
      "loss": 0.9231,
      "step": 201280
    },
    {
      "epoch": 0.7054712663014716,
      "grad_norm": 2.578125,
      "learning_rate": 4.249138738963268e-05,
      "loss": 0.8575,
      "step": 201290
    },
    {
      "epoch": 0.7055063138083673,
      "grad_norm": 2.46875,
      "learning_rate": 4.249073836096897e-05,
      "loss": 0.8468,
      "step": 201300
    },
    {
      "epoch": 0.7055413613152628,
      "grad_norm": 2.890625,
      "learning_rate": 4.2490089332305274e-05,
      "loss": 0.9183,
      "step": 201310
    },
    {
      "epoch": 0.7055764088221584,
      "grad_norm": 3.140625,
      "learning_rate": 4.248944030364157e-05,
      "loss": 0.9263,
      "step": 201320
    },
    {
      "epoch": 0.7056114563290541,
      "grad_norm": 3.796875,
      "learning_rate": 4.248879127497787e-05,
      "loss": 0.8731,
      "step": 201330
    },
    {
      "epoch": 0.7056465038359496,
      "grad_norm": 2.734375,
      "learning_rate": 4.2488142246314164e-05,
      "loss": 0.835,
      "step": 201340
    },
    {
      "epoch": 0.7056815513428453,
      "grad_norm": 3.078125,
      "learning_rate": 4.2487493217650466e-05,
      "loss": 0.9117,
      "step": 201350
    },
    {
      "epoch": 0.7057165988497408,
      "grad_norm": 3.078125,
      "learning_rate": 4.248684418898676e-05,
      "loss": 0.8977,
      "step": 201360
    },
    {
      "epoch": 0.7057516463566365,
      "grad_norm": 2.703125,
      "learning_rate": 4.248619516032306e-05,
      "loss": 0.9305,
      "step": 201370
    },
    {
      "epoch": 0.705786693863532,
      "grad_norm": 3.0,
      "learning_rate": 4.2485546131659356e-05,
      "loss": 0.8702,
      "step": 201380
    },
    {
      "epoch": 0.7058217413704276,
      "grad_norm": 2.625,
      "learning_rate": 4.248489710299566e-05,
      "loss": 0.8929,
      "step": 201390
    },
    {
      "epoch": 0.7058567888773232,
      "grad_norm": 3.234375,
      "learning_rate": 4.248424807433196e-05,
      "loss": 0.9538,
      "step": 201400
    },
    {
      "epoch": 0.7058918363842188,
      "grad_norm": 2.84375,
      "learning_rate": 4.2483599045668254e-05,
      "loss": 0.8668,
      "step": 201410
    },
    {
      "epoch": 0.7059268838911144,
      "grad_norm": 2.796875,
      "learning_rate": 4.2482950017004555e-05,
      "loss": 0.905,
      "step": 201420
    },
    {
      "epoch": 0.70596193139801,
      "grad_norm": 3.5625,
      "learning_rate": 4.248230098834085e-05,
      "loss": 0.912,
      "step": 201430
    },
    {
      "epoch": 0.7059969789049056,
      "grad_norm": 3.015625,
      "learning_rate": 4.248165195967715e-05,
      "loss": 0.9031,
      "step": 201440
    },
    {
      "epoch": 0.7060320264118012,
      "grad_norm": 2.984375,
      "learning_rate": 4.2481002931013446e-05,
      "loss": 0.8845,
      "step": 201450
    },
    {
      "epoch": 0.7060670739186968,
      "grad_norm": 3.046875,
      "learning_rate": 4.248035390234975e-05,
      "loss": 0.9772,
      "step": 201460
    },
    {
      "epoch": 0.7061021214255924,
      "grad_norm": 2.8125,
      "learning_rate": 4.247970487368604e-05,
      "loss": 0.9045,
      "step": 201470
    },
    {
      "epoch": 0.706137168932488,
      "grad_norm": 3.21875,
      "learning_rate": 4.247905584502234e-05,
      "loss": 0.8685,
      "step": 201480
    },
    {
      "epoch": 0.7061722164393835,
      "grad_norm": 3.046875,
      "learning_rate": 4.247840681635864e-05,
      "loss": 0.7995,
      "step": 201490
    },
    {
      "epoch": 0.7062072639462792,
      "grad_norm": 3.328125,
      "learning_rate": 4.247775778769494e-05,
      "loss": 0.8648,
      "step": 201500
    },
    {
      "epoch": 0.7062423114531747,
      "grad_norm": 2.859375,
      "learning_rate": 4.2477108759031234e-05,
      "loss": 0.8562,
      "step": 201510
    },
    {
      "epoch": 0.7062773589600704,
      "grad_norm": 2.75,
      "learning_rate": 4.2476459730367535e-05,
      "loss": 0.7753,
      "step": 201520
    },
    {
      "epoch": 0.706312406466966,
      "grad_norm": 3.390625,
      "learning_rate": 4.247581070170383e-05,
      "loss": 0.8803,
      "step": 201530
    },
    {
      "epoch": 0.7063474539738616,
      "grad_norm": 3.09375,
      "learning_rate": 4.247516167304013e-05,
      "loss": 0.9846,
      "step": 201540
    },
    {
      "epoch": 0.7063825014807572,
      "grad_norm": 2.609375,
      "learning_rate": 4.247451264437643e-05,
      "loss": 0.8659,
      "step": 201550
    },
    {
      "epoch": 0.7064175489876527,
      "grad_norm": 2.625,
      "learning_rate": 4.247386361571273e-05,
      "loss": 0.8636,
      "step": 201560
    },
    {
      "epoch": 0.7064525964945484,
      "grad_norm": 2.984375,
      "learning_rate": 4.247321458704903e-05,
      "loss": 0.8942,
      "step": 201570
    },
    {
      "epoch": 0.7064876440014439,
      "grad_norm": 3.65625,
      "learning_rate": 4.247256555838532e-05,
      "loss": 0.9585,
      "step": 201580
    },
    {
      "epoch": 0.7065226915083396,
      "grad_norm": 2.453125,
      "learning_rate": 4.2471916529721625e-05,
      "loss": 0.8572,
      "step": 201590
    },
    {
      "epoch": 0.7065577390152351,
      "grad_norm": 3.015625,
      "learning_rate": 4.247126750105792e-05,
      "loss": 0.8883,
      "step": 201600
    },
    {
      "epoch": 0.7065927865221308,
      "grad_norm": 3.078125,
      "learning_rate": 4.247061847239422e-05,
      "loss": 0.8701,
      "step": 201610
    },
    {
      "epoch": 0.7066278340290263,
      "grad_norm": 3.484375,
      "learning_rate": 4.2469969443730515e-05,
      "loss": 0.9732,
      "step": 201620
    },
    {
      "epoch": 0.7066628815359219,
      "grad_norm": 2.984375,
      "learning_rate": 4.246932041506682e-05,
      "loss": 1.0086,
      "step": 201630
    },
    {
      "epoch": 0.7066979290428176,
      "grad_norm": 2.578125,
      "learning_rate": 4.246867138640311e-05,
      "loss": 0.8576,
      "step": 201640
    },
    {
      "epoch": 0.7067329765497131,
      "grad_norm": 3.25,
      "learning_rate": 4.2468022357739406e-05,
      "loss": 0.9186,
      "step": 201650
    },
    {
      "epoch": 0.7067680240566088,
      "grad_norm": 3.046875,
      "learning_rate": 4.246737332907571e-05,
      "loss": 0.8911,
      "step": 201660
    },
    {
      "epoch": 0.7068030715635043,
      "grad_norm": 2.65625,
      "learning_rate": 4.2466724300412e-05,
      "loss": 0.9287,
      "step": 201670
    },
    {
      "epoch": 0.7068381190704,
      "grad_norm": 2.9375,
      "learning_rate": 4.24660752717483e-05,
      "loss": 1.0017,
      "step": 201680
    },
    {
      "epoch": 0.7068731665772955,
      "grad_norm": 2.734375,
      "learning_rate": 4.24654262430846e-05,
      "loss": 0.8892,
      "step": 201690
    },
    {
      "epoch": 0.7069082140841911,
      "grad_norm": 2.90625,
      "learning_rate": 4.24647772144209e-05,
      "loss": 0.9012,
      "step": 201700
    },
    {
      "epoch": 0.7069432615910867,
      "grad_norm": 2.265625,
      "learning_rate": 4.2464128185757194e-05,
      "loss": 0.9408,
      "step": 201710
    },
    {
      "epoch": 0.7069783090979823,
      "grad_norm": 3.140625,
      "learning_rate": 4.2463479157093495e-05,
      "loss": 0.9379,
      "step": 201720
    },
    {
      "epoch": 0.707013356604878,
      "grad_norm": 3.328125,
      "learning_rate": 4.246283012842979e-05,
      "loss": 0.9699,
      "step": 201730
    },
    {
      "epoch": 0.7070484041117735,
      "grad_norm": 2.859375,
      "learning_rate": 4.246218109976609e-05,
      "loss": 0.8656,
      "step": 201740
    },
    {
      "epoch": 0.7070834516186691,
      "grad_norm": 3.046875,
      "learning_rate": 4.2461532071102386e-05,
      "loss": 0.8729,
      "step": 201750
    },
    {
      "epoch": 0.7071184991255647,
      "grad_norm": 3.203125,
      "learning_rate": 4.246088304243869e-05,
      "loss": 0.9743,
      "step": 201760
    },
    {
      "epoch": 0.7071535466324603,
      "grad_norm": 2.765625,
      "learning_rate": 4.246023401377499e-05,
      "loss": 0.8997,
      "step": 201770
    },
    {
      "epoch": 0.7071885941393559,
      "grad_norm": 2.921875,
      "learning_rate": 4.245958498511128e-05,
      "loss": 0.8838,
      "step": 201780
    },
    {
      "epoch": 0.7072236416462515,
      "grad_norm": 2.6875,
      "learning_rate": 4.2458935956447585e-05,
      "loss": 0.8763,
      "step": 201790
    },
    {
      "epoch": 0.707258689153147,
      "grad_norm": 2.6875,
      "learning_rate": 4.245828692778388e-05,
      "loss": 0.9172,
      "step": 201800
    },
    {
      "epoch": 0.7072937366600427,
      "grad_norm": 3.546875,
      "learning_rate": 4.245763789912018e-05,
      "loss": 0.9292,
      "step": 201810
    },
    {
      "epoch": 0.7073287841669383,
      "grad_norm": 2.625,
      "learning_rate": 4.2456988870456475e-05,
      "loss": 0.8704,
      "step": 201820
    },
    {
      "epoch": 0.7073638316738339,
      "grad_norm": 2.734375,
      "learning_rate": 4.245633984179278e-05,
      "loss": 0.9298,
      "step": 201830
    },
    {
      "epoch": 0.7073988791807295,
      "grad_norm": 3.171875,
      "learning_rate": 4.245569081312907e-05,
      "loss": 0.846,
      "step": 201840
    },
    {
      "epoch": 0.7074339266876251,
      "grad_norm": 2.625,
      "learning_rate": 4.245504178446537e-05,
      "loss": 0.8605,
      "step": 201850
    },
    {
      "epoch": 0.7074689741945207,
      "grad_norm": 3.125,
      "learning_rate": 4.245439275580167e-05,
      "loss": 0.9473,
      "step": 201860
    },
    {
      "epoch": 0.7075040217014162,
      "grad_norm": 3.453125,
      "learning_rate": 4.245374372713797e-05,
      "loss": 0.9801,
      "step": 201870
    },
    {
      "epoch": 0.7075390692083119,
      "grad_norm": 3.4375,
      "learning_rate": 4.245309469847426e-05,
      "loss": 0.8452,
      "step": 201880
    },
    {
      "epoch": 0.7075741167152074,
      "grad_norm": 2.828125,
      "learning_rate": 4.2452445669810565e-05,
      "loss": 0.9221,
      "step": 201890
    },
    {
      "epoch": 0.7076091642221031,
      "grad_norm": 3.109375,
      "learning_rate": 4.245179664114686e-05,
      "loss": 0.8876,
      "step": 201900
    },
    {
      "epoch": 0.7076442117289986,
      "grad_norm": 2.8125,
      "learning_rate": 4.245114761248316e-05,
      "loss": 0.9331,
      "step": 201910
    },
    {
      "epoch": 0.7076792592358943,
      "grad_norm": 3.453125,
      "learning_rate": 4.245049858381946e-05,
      "loss": 0.9305,
      "step": 201920
    },
    {
      "epoch": 0.7077143067427899,
      "grad_norm": 2.953125,
      "learning_rate": 4.244984955515576e-05,
      "loss": 0.8596,
      "step": 201930
    },
    {
      "epoch": 0.7077493542496854,
      "grad_norm": 3.40625,
      "learning_rate": 4.244920052649206e-05,
      "loss": 0.8366,
      "step": 201940
    },
    {
      "epoch": 0.7077844017565811,
      "grad_norm": 2.84375,
      "learning_rate": 4.244855149782835e-05,
      "loss": 0.8385,
      "step": 201950
    },
    {
      "epoch": 0.7078194492634766,
      "grad_norm": 3.1875,
      "learning_rate": 4.2447902469164654e-05,
      "loss": 0.9691,
      "step": 201960
    },
    {
      "epoch": 0.7078544967703723,
      "grad_norm": 3.109375,
      "learning_rate": 4.244725344050095e-05,
      "loss": 0.9502,
      "step": 201970
    },
    {
      "epoch": 0.7078895442772678,
      "grad_norm": 2.78125,
      "learning_rate": 4.244660441183725e-05,
      "loss": 0.9469,
      "step": 201980
    },
    {
      "epoch": 0.7079245917841634,
      "grad_norm": 2.90625,
      "learning_rate": 4.2445955383173545e-05,
      "loss": 0.9272,
      "step": 201990
    },
    {
      "epoch": 0.707959639291059,
      "grad_norm": 3.140625,
      "learning_rate": 4.2445306354509846e-05,
      "loss": 0.8555,
      "step": 202000
    },
    {
      "epoch": 0.7079946867979546,
      "grad_norm": 3.3125,
      "learning_rate": 4.244465732584614e-05,
      "loss": 0.8722,
      "step": 202010
    },
    {
      "epoch": 0.7080297343048503,
      "grad_norm": 2.453125,
      "learning_rate": 4.244400829718244e-05,
      "loss": 0.8802,
      "step": 202020
    },
    {
      "epoch": 0.7080647818117458,
      "grad_norm": 2.90625,
      "learning_rate": 4.244335926851874e-05,
      "loss": 0.918,
      "step": 202030
    },
    {
      "epoch": 0.7080998293186415,
      "grad_norm": 2.828125,
      "learning_rate": 4.244271023985503e-05,
      "loss": 0.9125,
      "step": 202040
    },
    {
      "epoch": 0.708134876825537,
      "grad_norm": 3.359375,
      "learning_rate": 4.244206121119133e-05,
      "loss": 0.9323,
      "step": 202050
    },
    {
      "epoch": 0.7081699243324326,
      "grad_norm": 2.6875,
      "learning_rate": 4.244141218252763e-05,
      "loss": 0.8625,
      "step": 202060
    },
    {
      "epoch": 0.7082049718393282,
      "grad_norm": 2.84375,
      "learning_rate": 4.244076315386393e-05,
      "loss": 0.9122,
      "step": 202070
    },
    {
      "epoch": 0.7082400193462238,
      "grad_norm": 2.953125,
      "learning_rate": 4.244011412520022e-05,
      "loss": 0.8732,
      "step": 202080
    },
    {
      "epoch": 0.7082750668531194,
      "grad_norm": 2.703125,
      "learning_rate": 4.2439465096536525e-05,
      "loss": 0.8933,
      "step": 202090
    },
    {
      "epoch": 0.708310114360015,
      "grad_norm": 2.625,
      "learning_rate": 4.243881606787282e-05,
      "loss": 0.9129,
      "step": 202100
    },
    {
      "epoch": 0.7083451618669105,
      "grad_norm": 2.96875,
      "learning_rate": 4.243816703920912e-05,
      "loss": 0.8771,
      "step": 202110
    },
    {
      "epoch": 0.7083802093738062,
      "grad_norm": 2.84375,
      "learning_rate": 4.2437518010545415e-05,
      "loss": 0.8938,
      "step": 202120
    },
    {
      "epoch": 0.7084152568807018,
      "grad_norm": 3.09375,
      "learning_rate": 4.243686898188172e-05,
      "loss": 0.8757,
      "step": 202130
    },
    {
      "epoch": 0.7084503043875974,
      "grad_norm": 3.359375,
      "learning_rate": 4.243621995321802e-05,
      "loss": 0.8422,
      "step": 202140
    },
    {
      "epoch": 0.708485351894493,
      "grad_norm": 3.296875,
      "learning_rate": 4.243557092455431e-05,
      "loss": 0.9144,
      "step": 202150
    },
    {
      "epoch": 0.7085203994013886,
      "grad_norm": 3.15625,
      "learning_rate": 4.2434921895890614e-05,
      "loss": 0.8319,
      "step": 202160
    },
    {
      "epoch": 0.7085554469082842,
      "grad_norm": 3.03125,
      "learning_rate": 4.243427286722691e-05,
      "loss": 0.9334,
      "step": 202170
    },
    {
      "epoch": 0.7085904944151797,
      "grad_norm": 3.0625,
      "learning_rate": 4.243362383856321e-05,
      "loss": 0.8991,
      "step": 202180
    },
    {
      "epoch": 0.7086255419220754,
      "grad_norm": 2.734375,
      "learning_rate": 4.2432974809899505e-05,
      "loss": 0.9439,
      "step": 202190
    },
    {
      "epoch": 0.7086605894289709,
      "grad_norm": 2.875,
      "learning_rate": 4.2432325781235806e-05,
      "loss": 0.8894,
      "step": 202200
    },
    {
      "epoch": 0.7086956369358666,
      "grad_norm": 3.03125,
      "learning_rate": 4.24316767525721e-05,
      "loss": 0.9108,
      "step": 202210
    },
    {
      "epoch": 0.7087306844427622,
      "grad_norm": 3.15625,
      "learning_rate": 4.24310277239084e-05,
      "loss": 0.8422,
      "step": 202220
    },
    {
      "epoch": 0.7087657319496578,
      "grad_norm": 3.03125,
      "learning_rate": 4.24303786952447e-05,
      "loss": 0.8907,
      "step": 202230
    },
    {
      "epoch": 0.7088007794565534,
      "grad_norm": 3.140625,
      "learning_rate": 4.2429729666581e-05,
      "loss": 0.9637,
      "step": 202240
    },
    {
      "epoch": 0.7088358269634489,
      "grad_norm": 3.09375,
      "learning_rate": 4.242908063791729e-05,
      "loss": 0.9282,
      "step": 202250
    },
    {
      "epoch": 0.7088708744703446,
      "grad_norm": 2.46875,
      "learning_rate": 4.2428431609253594e-05,
      "loss": 0.9372,
      "step": 202260
    },
    {
      "epoch": 0.7089059219772401,
      "grad_norm": 3.125,
      "learning_rate": 4.2427782580589895e-05,
      "loss": 0.9032,
      "step": 202270
    },
    {
      "epoch": 0.7089409694841358,
      "grad_norm": 3.109375,
      "learning_rate": 4.242713355192619e-05,
      "loss": 0.9825,
      "step": 202280
    },
    {
      "epoch": 0.7089760169910313,
      "grad_norm": 3.25,
      "learning_rate": 4.242648452326249e-05,
      "loss": 0.9477,
      "step": 202290
    },
    {
      "epoch": 0.709011064497927,
      "grad_norm": 2.75,
      "learning_rate": 4.2425835494598786e-05,
      "loss": 0.9427,
      "step": 202300
    },
    {
      "epoch": 0.7090461120048225,
      "grad_norm": 2.546875,
      "learning_rate": 4.242518646593509e-05,
      "loss": 0.8667,
      "step": 202310
    },
    {
      "epoch": 0.7090811595117181,
      "grad_norm": 2.953125,
      "learning_rate": 4.242453743727138e-05,
      "loss": 0.8962,
      "step": 202320
    },
    {
      "epoch": 0.7091162070186138,
      "grad_norm": 3.640625,
      "learning_rate": 4.2423888408607683e-05,
      "loss": 1.0023,
      "step": 202330
    },
    {
      "epoch": 0.7091512545255093,
      "grad_norm": 2.78125,
      "learning_rate": 4.242323937994398e-05,
      "loss": 0.8739,
      "step": 202340
    },
    {
      "epoch": 0.709186302032405,
      "grad_norm": 2.765625,
      "learning_rate": 4.242259035128028e-05,
      "loss": 0.9327,
      "step": 202350
    },
    {
      "epoch": 0.7092213495393005,
      "grad_norm": 3.03125,
      "learning_rate": 4.2421941322616574e-05,
      "loss": 0.8423,
      "step": 202360
    },
    {
      "epoch": 0.7092563970461961,
      "grad_norm": 2.75,
      "learning_rate": 4.2421292293952875e-05,
      "loss": 0.8973,
      "step": 202370
    },
    {
      "epoch": 0.7092914445530917,
      "grad_norm": 3.0,
      "learning_rate": 4.242064326528917e-05,
      "loss": 0.882,
      "step": 202380
    },
    {
      "epoch": 0.7093264920599873,
      "grad_norm": 3.03125,
      "learning_rate": 4.241999423662547e-05,
      "loss": 0.9042,
      "step": 202390
    },
    {
      "epoch": 0.7093615395668829,
      "grad_norm": 2.953125,
      "learning_rate": 4.2419345207961766e-05,
      "loss": 0.9069,
      "step": 202400
    },
    {
      "epoch": 0.7093965870737785,
      "grad_norm": 3.0,
      "learning_rate": 4.241869617929806e-05,
      "loss": 0.8931,
      "step": 202410
    },
    {
      "epoch": 0.7094316345806742,
      "grad_norm": 3.015625,
      "learning_rate": 4.241804715063436e-05,
      "loss": 0.8612,
      "step": 202420
    },
    {
      "epoch": 0.7094666820875697,
      "grad_norm": 2.9375,
      "learning_rate": 4.241739812197066e-05,
      "loss": 0.9221,
      "step": 202430
    },
    {
      "epoch": 0.7095017295944653,
      "grad_norm": 3.109375,
      "learning_rate": 4.241674909330696e-05,
      "loss": 0.9245,
      "step": 202440
    },
    {
      "epoch": 0.7095367771013609,
      "grad_norm": 2.890625,
      "learning_rate": 4.241610006464325e-05,
      "loss": 0.8774,
      "step": 202450
    },
    {
      "epoch": 0.7095718246082565,
      "grad_norm": 2.5,
      "learning_rate": 4.2415451035979554e-05,
      "loss": 0.8296,
      "step": 202460
    },
    {
      "epoch": 0.709606872115152,
      "grad_norm": 3.390625,
      "learning_rate": 4.241480200731585e-05,
      "loss": 0.8328,
      "step": 202470
    },
    {
      "epoch": 0.7096419196220477,
      "grad_norm": 2.9375,
      "learning_rate": 4.241415297865215e-05,
      "loss": 0.8725,
      "step": 202480
    },
    {
      "epoch": 0.7096769671289432,
      "grad_norm": 3.46875,
      "learning_rate": 4.2413503949988445e-05,
      "loss": 0.961,
      "step": 202490
    },
    {
      "epoch": 0.7097120146358389,
      "grad_norm": 3.25,
      "learning_rate": 4.2412854921324746e-05,
      "loss": 0.8654,
      "step": 202500
    },
    {
      "epoch": 0.7097470621427345,
      "grad_norm": 2.546875,
      "learning_rate": 4.241220589266105e-05,
      "loss": 0.8379,
      "step": 202510
    },
    {
      "epoch": 0.7097821096496301,
      "grad_norm": 3.046875,
      "learning_rate": 4.241155686399734e-05,
      "loss": 0.916,
      "step": 202520
    },
    {
      "epoch": 0.7098171571565257,
      "grad_norm": 3.28125,
      "learning_rate": 4.2410907835333643e-05,
      "loss": 0.9194,
      "step": 202530
    },
    {
      "epoch": 0.7098522046634212,
      "grad_norm": 3.65625,
      "learning_rate": 4.241025880666994e-05,
      "loss": 1.0094,
      "step": 202540
    },
    {
      "epoch": 0.7098872521703169,
      "grad_norm": 3.140625,
      "learning_rate": 4.240960977800624e-05,
      "loss": 0.8457,
      "step": 202550
    },
    {
      "epoch": 0.7099222996772124,
      "grad_norm": 2.515625,
      "learning_rate": 4.2408960749342534e-05,
      "loss": 0.8657,
      "step": 202560
    },
    {
      "epoch": 0.7099573471841081,
      "grad_norm": 2.96875,
      "learning_rate": 4.2408311720678835e-05,
      "loss": 0.8615,
      "step": 202570
    },
    {
      "epoch": 0.7099923946910036,
      "grad_norm": 2.65625,
      "learning_rate": 4.240766269201513e-05,
      "loss": 0.9382,
      "step": 202580
    },
    {
      "epoch": 0.7100274421978993,
      "grad_norm": 2.484375,
      "learning_rate": 4.240701366335143e-05,
      "loss": 0.8338,
      "step": 202590
    },
    {
      "epoch": 0.7100624897047948,
      "grad_norm": 3.0625,
      "learning_rate": 4.2406364634687726e-05,
      "loss": 0.9419,
      "step": 202600
    },
    {
      "epoch": 0.7100975372116904,
      "grad_norm": 2.828125,
      "learning_rate": 4.240571560602403e-05,
      "loss": 0.8625,
      "step": 202610
    },
    {
      "epoch": 0.7101325847185861,
      "grad_norm": 3.078125,
      "learning_rate": 4.240506657736032e-05,
      "loss": 0.9337,
      "step": 202620
    },
    {
      "epoch": 0.7101676322254816,
      "grad_norm": 2.6875,
      "learning_rate": 4.2404417548696623e-05,
      "loss": 0.8753,
      "step": 202630
    },
    {
      "epoch": 0.7102026797323773,
      "grad_norm": 2.734375,
      "learning_rate": 4.2403768520032925e-05,
      "loss": 0.9356,
      "step": 202640
    },
    {
      "epoch": 0.7102377272392728,
      "grad_norm": 3.21875,
      "learning_rate": 4.240311949136922e-05,
      "loss": 0.885,
      "step": 202650
    },
    {
      "epoch": 0.7102727747461685,
      "grad_norm": 2.921875,
      "learning_rate": 4.240247046270552e-05,
      "loss": 0.9593,
      "step": 202660
    },
    {
      "epoch": 0.710307822253064,
      "grad_norm": 2.5625,
      "learning_rate": 4.2401821434041815e-05,
      "loss": 0.9247,
      "step": 202670
    },
    {
      "epoch": 0.7103428697599596,
      "grad_norm": 2.953125,
      "learning_rate": 4.240117240537812e-05,
      "loss": 0.9588,
      "step": 202680
    },
    {
      "epoch": 0.7103779172668552,
      "grad_norm": 2.96875,
      "learning_rate": 4.240052337671441e-05,
      "loss": 0.9072,
      "step": 202690
    },
    {
      "epoch": 0.7104129647737508,
      "grad_norm": 3.109375,
      "learning_rate": 4.239987434805071e-05,
      "loss": 0.8676,
      "step": 202700
    },
    {
      "epoch": 0.7104480122806465,
      "grad_norm": 2.90625,
      "learning_rate": 4.239922531938701e-05,
      "loss": 0.9325,
      "step": 202710
    },
    {
      "epoch": 0.710483059787542,
      "grad_norm": 2.796875,
      "learning_rate": 4.239857629072331e-05,
      "loss": 0.947,
      "step": 202720
    },
    {
      "epoch": 0.7105181072944377,
      "grad_norm": 2.90625,
      "learning_rate": 4.2397927262059603e-05,
      "loss": 0.9112,
      "step": 202730
    },
    {
      "epoch": 0.7105531548013332,
      "grad_norm": 2.953125,
      "learning_rate": 4.2397278233395905e-05,
      "loss": 0.8977,
      "step": 202740
    },
    {
      "epoch": 0.7105882023082288,
      "grad_norm": 3.09375,
      "learning_rate": 4.23966292047322e-05,
      "loss": 0.9703,
      "step": 202750
    },
    {
      "epoch": 0.7106232498151244,
      "grad_norm": 2.8125,
      "learning_rate": 4.23959801760685e-05,
      "loss": 0.9039,
      "step": 202760
    },
    {
      "epoch": 0.71065829732202,
      "grad_norm": 2.625,
      "learning_rate": 4.2395331147404795e-05,
      "loss": 0.8822,
      "step": 202770
    },
    {
      "epoch": 0.7106933448289156,
      "grad_norm": 2.65625,
      "learning_rate": 4.239468211874109e-05,
      "loss": 0.8771,
      "step": 202780
    },
    {
      "epoch": 0.7107283923358112,
      "grad_norm": 3.046875,
      "learning_rate": 4.239403309007739e-05,
      "loss": 0.9631,
      "step": 202790
    },
    {
      "epoch": 0.7107634398427067,
      "grad_norm": 2.796875,
      "learning_rate": 4.2393384061413686e-05,
      "loss": 0.9002,
      "step": 202800
    },
    {
      "epoch": 0.7107984873496024,
      "grad_norm": 2.65625,
      "learning_rate": 4.239273503274999e-05,
      "loss": 0.8212,
      "step": 202810
    },
    {
      "epoch": 0.710833534856498,
      "grad_norm": 2.890625,
      "learning_rate": 4.239208600408628e-05,
      "loss": 0.9643,
      "step": 202820
    },
    {
      "epoch": 0.7108685823633936,
      "grad_norm": 3.0625,
      "learning_rate": 4.2391436975422583e-05,
      "loss": 0.9373,
      "step": 202830
    },
    {
      "epoch": 0.7109036298702892,
      "grad_norm": 2.515625,
      "learning_rate": 4.239078794675888e-05,
      "loss": 0.8336,
      "step": 202840
    },
    {
      "epoch": 0.7109386773771847,
      "grad_norm": 2.75,
      "learning_rate": 4.239013891809518e-05,
      "loss": 0.9373,
      "step": 202850
    },
    {
      "epoch": 0.7109737248840804,
      "grad_norm": 2.640625,
      "learning_rate": 4.2389489889431474e-05,
      "loss": 0.9135,
      "step": 202860
    },
    {
      "epoch": 0.7110087723909759,
      "grad_norm": 2.90625,
      "learning_rate": 4.2388840860767775e-05,
      "loss": 0.8829,
      "step": 202870
    },
    {
      "epoch": 0.7110438198978716,
      "grad_norm": 3.234375,
      "learning_rate": 4.238819183210408e-05,
      "loss": 0.8975,
      "step": 202880
    },
    {
      "epoch": 0.7110788674047671,
      "grad_norm": 2.390625,
      "learning_rate": 4.238754280344037e-05,
      "loss": 0.8985,
      "step": 202890
    },
    {
      "epoch": 0.7111139149116628,
      "grad_norm": 3.125,
      "learning_rate": 4.238689377477667e-05,
      "loss": 0.9418,
      "step": 202900
    },
    {
      "epoch": 0.7111489624185584,
      "grad_norm": 2.734375,
      "learning_rate": 4.238624474611297e-05,
      "loss": 0.9033,
      "step": 202910
    },
    {
      "epoch": 0.7111840099254539,
      "grad_norm": 3.5625,
      "learning_rate": 4.238559571744927e-05,
      "loss": 0.8826,
      "step": 202920
    },
    {
      "epoch": 0.7112190574323496,
      "grad_norm": 2.78125,
      "learning_rate": 4.2384946688785563e-05,
      "loss": 0.8756,
      "step": 202930
    },
    {
      "epoch": 0.7112541049392451,
      "grad_norm": 3.125,
      "learning_rate": 4.2384297660121865e-05,
      "loss": 0.9288,
      "step": 202940
    },
    {
      "epoch": 0.7112891524461408,
      "grad_norm": 2.765625,
      "learning_rate": 4.238364863145816e-05,
      "loss": 0.8513,
      "step": 202950
    },
    {
      "epoch": 0.7113241999530363,
      "grad_norm": 2.921875,
      "learning_rate": 4.238299960279446e-05,
      "loss": 0.855,
      "step": 202960
    },
    {
      "epoch": 0.711359247459932,
      "grad_norm": 3.0,
      "learning_rate": 4.2382350574130755e-05,
      "loss": 0.8762,
      "step": 202970
    },
    {
      "epoch": 0.7113942949668275,
      "grad_norm": 2.875,
      "learning_rate": 4.238170154546706e-05,
      "loss": 0.8774,
      "step": 202980
    },
    {
      "epoch": 0.7114293424737231,
      "grad_norm": 2.796875,
      "learning_rate": 4.238105251680335e-05,
      "loss": 0.9397,
      "step": 202990
    },
    {
      "epoch": 0.7114643899806188,
      "grad_norm": 3.421875,
      "learning_rate": 4.238040348813965e-05,
      "loss": 0.9359,
      "step": 203000
    },
    {
      "epoch": 0.7114994374875143,
      "grad_norm": 2.546875,
      "learning_rate": 4.2379754459475954e-05,
      "loss": 0.9268,
      "step": 203010
    },
    {
      "epoch": 0.71153448499441,
      "grad_norm": 3.671875,
      "learning_rate": 4.237910543081225e-05,
      "loss": 0.864,
      "step": 203020
    },
    {
      "epoch": 0.7115695325013055,
      "grad_norm": 3.4375,
      "learning_rate": 4.237845640214855e-05,
      "loss": 0.8693,
      "step": 203030
    },
    {
      "epoch": 0.7116045800082011,
      "grad_norm": 2.8125,
      "learning_rate": 4.2377807373484845e-05,
      "loss": 0.8986,
      "step": 203040
    },
    {
      "epoch": 0.7116396275150967,
      "grad_norm": 3.328125,
      "learning_rate": 4.2377158344821146e-05,
      "loss": 0.9287,
      "step": 203050
    },
    {
      "epoch": 0.7116746750219923,
      "grad_norm": 2.78125,
      "learning_rate": 4.237650931615744e-05,
      "loss": 0.9051,
      "step": 203060
    },
    {
      "epoch": 0.7117097225288879,
      "grad_norm": 2.671875,
      "learning_rate": 4.237586028749374e-05,
      "loss": 0.8975,
      "step": 203070
    },
    {
      "epoch": 0.7117447700357835,
      "grad_norm": 2.953125,
      "learning_rate": 4.237521125883004e-05,
      "loss": 0.8559,
      "step": 203080
    },
    {
      "epoch": 0.711779817542679,
      "grad_norm": 2.890625,
      "learning_rate": 4.237456223016634e-05,
      "loss": 0.8868,
      "step": 203090
    },
    {
      "epoch": 0.7118148650495747,
      "grad_norm": 2.828125,
      "learning_rate": 4.237391320150263e-05,
      "loss": 0.9048,
      "step": 203100
    },
    {
      "epoch": 0.7118499125564703,
      "grad_norm": 2.515625,
      "learning_rate": 4.2373264172838934e-05,
      "loss": 0.8446,
      "step": 203110
    },
    {
      "epoch": 0.7118849600633659,
      "grad_norm": 3.015625,
      "learning_rate": 4.237261514417523e-05,
      "loss": 0.9036,
      "step": 203120
    },
    {
      "epoch": 0.7119200075702615,
      "grad_norm": 3.671875,
      "learning_rate": 4.237196611551153e-05,
      "loss": 0.9858,
      "step": 203130
    },
    {
      "epoch": 0.7119550550771571,
      "grad_norm": 2.984375,
      "learning_rate": 4.2371317086847825e-05,
      "loss": 0.8821,
      "step": 203140
    },
    {
      "epoch": 0.7119901025840527,
      "grad_norm": 2.765625,
      "learning_rate": 4.2370668058184126e-05,
      "loss": 0.8802,
      "step": 203150
    },
    {
      "epoch": 0.7120251500909482,
      "grad_norm": 2.71875,
      "learning_rate": 4.237001902952042e-05,
      "loss": 0.8304,
      "step": 203160
    },
    {
      "epoch": 0.7120601975978439,
      "grad_norm": 3.265625,
      "learning_rate": 4.2369370000856715e-05,
      "loss": 0.8034,
      "step": 203170
    },
    {
      "epoch": 0.7120952451047394,
      "grad_norm": 3.40625,
      "learning_rate": 4.236872097219302e-05,
      "loss": 0.9949,
      "step": 203180
    },
    {
      "epoch": 0.7121302926116351,
      "grad_norm": 3.0625,
      "learning_rate": 4.236807194352931e-05,
      "loss": 0.9125,
      "step": 203190
    },
    {
      "epoch": 0.7121653401185307,
      "grad_norm": 2.78125,
      "learning_rate": 4.236742291486561e-05,
      "loss": 0.8981,
      "step": 203200
    },
    {
      "epoch": 0.7122003876254263,
      "grad_norm": 2.765625,
      "learning_rate": 4.236677388620191e-05,
      "loss": 0.9054,
      "step": 203210
    },
    {
      "epoch": 0.7122354351323219,
      "grad_norm": 2.765625,
      "learning_rate": 4.236612485753821e-05,
      "loss": 0.7961,
      "step": 203220
    },
    {
      "epoch": 0.7122704826392174,
      "grad_norm": 2.921875,
      "learning_rate": 4.236547582887451e-05,
      "loss": 0.9203,
      "step": 203230
    },
    {
      "epoch": 0.7123055301461131,
      "grad_norm": 3.140625,
      "learning_rate": 4.2364826800210805e-05,
      "loss": 0.9151,
      "step": 203240
    },
    {
      "epoch": 0.7123405776530086,
      "grad_norm": 2.890625,
      "learning_rate": 4.2364177771547106e-05,
      "loss": 0.9227,
      "step": 203250
    },
    {
      "epoch": 0.7123756251599043,
      "grad_norm": 3.03125,
      "learning_rate": 4.23635287428834e-05,
      "loss": 0.9162,
      "step": 203260
    },
    {
      "epoch": 0.7124106726667998,
      "grad_norm": 2.9375,
      "learning_rate": 4.23628797142197e-05,
      "loss": 1.0091,
      "step": 203270
    },
    {
      "epoch": 0.7124457201736955,
      "grad_norm": 2.9375,
      "learning_rate": 4.2362230685556e-05,
      "loss": 0.8616,
      "step": 203280
    },
    {
      "epoch": 0.712480767680591,
      "grad_norm": 3.125,
      "learning_rate": 4.23615816568923e-05,
      "loss": 0.8168,
      "step": 203290
    },
    {
      "epoch": 0.7125158151874866,
      "grad_norm": 3.171875,
      "learning_rate": 4.236093262822859e-05,
      "loss": 0.8954,
      "step": 203300
    },
    {
      "epoch": 0.7125508626943823,
      "grad_norm": 3.015625,
      "learning_rate": 4.2360283599564894e-05,
      "loss": 0.9275,
      "step": 203310
    },
    {
      "epoch": 0.7125859102012778,
      "grad_norm": 2.765625,
      "learning_rate": 4.235963457090119e-05,
      "loss": 0.933,
      "step": 203320
    },
    {
      "epoch": 0.7126209577081735,
      "grad_norm": 3.390625,
      "learning_rate": 4.235898554223749e-05,
      "loss": 0.8828,
      "step": 203330
    },
    {
      "epoch": 0.712656005215069,
      "grad_norm": 3.203125,
      "learning_rate": 4.2358336513573785e-05,
      "loss": 0.9333,
      "step": 203340
    },
    {
      "epoch": 0.7126910527219646,
      "grad_norm": 2.734375,
      "learning_rate": 4.2357687484910086e-05,
      "loss": 0.8325,
      "step": 203350
    },
    {
      "epoch": 0.7127261002288602,
      "grad_norm": 3.203125,
      "learning_rate": 4.235703845624638e-05,
      "loss": 0.8845,
      "step": 203360
    },
    {
      "epoch": 0.7127611477357558,
      "grad_norm": 2.90625,
      "learning_rate": 4.235638942758268e-05,
      "loss": 0.8991,
      "step": 203370
    },
    {
      "epoch": 0.7127961952426514,
      "grad_norm": 3.59375,
      "learning_rate": 4.2355740398918984e-05,
      "loss": 0.833,
      "step": 203380
    },
    {
      "epoch": 0.712831242749547,
      "grad_norm": 2.890625,
      "learning_rate": 4.235509137025528e-05,
      "loss": 0.9426,
      "step": 203390
    },
    {
      "epoch": 0.7128662902564427,
      "grad_norm": 2.890625,
      "learning_rate": 4.235444234159158e-05,
      "loss": 0.8565,
      "step": 203400
    },
    {
      "epoch": 0.7129013377633382,
      "grad_norm": 2.984375,
      "learning_rate": 4.2353793312927874e-05,
      "loss": 0.9694,
      "step": 203410
    },
    {
      "epoch": 0.7129363852702338,
      "grad_norm": 3.171875,
      "learning_rate": 4.2353144284264176e-05,
      "loss": 0.8395,
      "step": 203420
    },
    {
      "epoch": 0.7129714327771294,
      "grad_norm": 2.625,
      "learning_rate": 4.235249525560047e-05,
      "loss": 0.9287,
      "step": 203430
    },
    {
      "epoch": 0.713006480284025,
      "grad_norm": 2.90625,
      "learning_rate": 4.235184622693677e-05,
      "loss": 0.9054,
      "step": 203440
    },
    {
      "epoch": 0.7130415277909206,
      "grad_norm": 2.734375,
      "learning_rate": 4.2351197198273066e-05,
      "loss": 0.8547,
      "step": 203450
    },
    {
      "epoch": 0.7130765752978162,
      "grad_norm": 2.90625,
      "learning_rate": 4.235054816960937e-05,
      "loss": 0.8966,
      "step": 203460
    },
    {
      "epoch": 0.7131116228047117,
      "grad_norm": 2.71875,
      "learning_rate": 4.234989914094566e-05,
      "loss": 0.8387,
      "step": 203470
    },
    {
      "epoch": 0.7131466703116074,
      "grad_norm": 3.515625,
      "learning_rate": 4.2349250112281964e-05,
      "loss": 0.9707,
      "step": 203480
    },
    {
      "epoch": 0.713181717818503,
      "grad_norm": 2.859375,
      "learning_rate": 4.234860108361826e-05,
      "loss": 0.921,
      "step": 203490
    },
    {
      "epoch": 0.7132167653253986,
      "grad_norm": 2.765625,
      "learning_rate": 4.234795205495456e-05,
      "loss": 0.8574,
      "step": 203500
    },
    {
      "epoch": 0.7132518128322942,
      "grad_norm": 3.046875,
      "learning_rate": 4.234730302629086e-05,
      "loss": 0.9575,
      "step": 203510
    },
    {
      "epoch": 0.7132868603391898,
      "grad_norm": 2.34375,
      "learning_rate": 4.2346653997627156e-05,
      "loss": 0.8319,
      "step": 203520
    },
    {
      "epoch": 0.7133219078460854,
      "grad_norm": 2.75,
      "learning_rate": 4.234600496896345e-05,
      "loss": 0.8503,
      "step": 203530
    },
    {
      "epoch": 0.7133569553529809,
      "grad_norm": 2.75,
      "learning_rate": 4.2345355940299745e-05,
      "loss": 0.8056,
      "step": 203540
    },
    {
      "epoch": 0.7133920028598766,
      "grad_norm": 2.5,
      "learning_rate": 4.2344706911636046e-05,
      "loss": 0.8594,
      "step": 203550
    },
    {
      "epoch": 0.7134270503667721,
      "grad_norm": 3.0,
      "learning_rate": 4.234405788297234e-05,
      "loss": 0.9159,
      "step": 203560
    },
    {
      "epoch": 0.7134620978736678,
      "grad_norm": 2.9375,
      "learning_rate": 4.234340885430864e-05,
      "loss": 0.9353,
      "step": 203570
    },
    {
      "epoch": 0.7134971453805633,
      "grad_norm": 2.984375,
      "learning_rate": 4.234275982564494e-05,
      "loss": 0.8895,
      "step": 203580
    },
    {
      "epoch": 0.713532192887459,
      "grad_norm": 3.234375,
      "learning_rate": 4.234211079698124e-05,
      "loss": 0.8806,
      "step": 203590
    },
    {
      "epoch": 0.7135672403943546,
      "grad_norm": 2.828125,
      "learning_rate": 4.234146176831754e-05,
      "loss": 0.8742,
      "step": 203600
    },
    {
      "epoch": 0.7136022879012501,
      "grad_norm": 2.8125,
      "learning_rate": 4.2340812739653834e-05,
      "loss": 0.9033,
      "step": 203610
    },
    {
      "epoch": 0.7136373354081458,
      "grad_norm": 3.0,
      "learning_rate": 4.2340163710990136e-05,
      "loss": 0.9652,
      "step": 203620
    },
    {
      "epoch": 0.7136723829150413,
      "grad_norm": 3.046875,
      "learning_rate": 4.233951468232643e-05,
      "loss": 0.8967,
      "step": 203630
    },
    {
      "epoch": 0.713707430421937,
      "grad_norm": 2.859375,
      "learning_rate": 4.233886565366273e-05,
      "loss": 0.9252,
      "step": 203640
    },
    {
      "epoch": 0.7137424779288325,
      "grad_norm": 2.640625,
      "learning_rate": 4.2338216624999026e-05,
      "loss": 0.9589,
      "step": 203650
    },
    {
      "epoch": 0.7137775254357281,
      "grad_norm": 3.3125,
      "learning_rate": 4.233756759633533e-05,
      "loss": 0.9025,
      "step": 203660
    },
    {
      "epoch": 0.7138125729426237,
      "grad_norm": 2.46875,
      "learning_rate": 4.233691856767162e-05,
      "loss": 0.8308,
      "step": 203670
    },
    {
      "epoch": 0.7138476204495193,
      "grad_norm": 2.796875,
      "learning_rate": 4.2336269539007924e-05,
      "loss": 0.8644,
      "step": 203680
    },
    {
      "epoch": 0.713882667956415,
      "grad_norm": 2.6875,
      "learning_rate": 4.233562051034422e-05,
      "loss": 0.889,
      "step": 203690
    },
    {
      "epoch": 0.7139177154633105,
      "grad_norm": 2.671875,
      "learning_rate": 4.233497148168052e-05,
      "loss": 0.9346,
      "step": 203700
    },
    {
      "epoch": 0.7139527629702062,
      "grad_norm": 2.609375,
      "learning_rate": 4.2334322453016814e-05,
      "loss": 0.8439,
      "step": 203710
    },
    {
      "epoch": 0.7139878104771017,
      "grad_norm": 2.828125,
      "learning_rate": 4.2333673424353116e-05,
      "loss": 0.7806,
      "step": 203720
    },
    {
      "epoch": 0.7140228579839973,
      "grad_norm": 2.5625,
      "learning_rate": 4.233302439568941e-05,
      "loss": 0.8624,
      "step": 203730
    },
    {
      "epoch": 0.7140579054908929,
      "grad_norm": 3.375,
      "learning_rate": 4.233237536702571e-05,
      "loss": 0.8932,
      "step": 203740
    },
    {
      "epoch": 0.7140929529977885,
      "grad_norm": 2.828125,
      "learning_rate": 4.233172633836201e-05,
      "loss": 0.9301,
      "step": 203750
    },
    {
      "epoch": 0.7141280005046841,
      "grad_norm": 3.03125,
      "learning_rate": 4.233107730969831e-05,
      "loss": 0.8794,
      "step": 203760
    },
    {
      "epoch": 0.7141630480115797,
      "grad_norm": 2.921875,
      "learning_rate": 4.233042828103461e-05,
      "loss": 0.9292,
      "step": 203770
    },
    {
      "epoch": 0.7141980955184752,
      "grad_norm": 2.78125,
      "learning_rate": 4.2329779252370904e-05,
      "loss": 0.8543,
      "step": 203780
    },
    {
      "epoch": 0.7142331430253709,
      "grad_norm": 2.984375,
      "learning_rate": 4.2329130223707205e-05,
      "loss": 0.8921,
      "step": 203790
    },
    {
      "epoch": 0.7142681905322665,
      "grad_norm": 2.765625,
      "learning_rate": 4.23284811950435e-05,
      "loss": 0.9347,
      "step": 203800
    },
    {
      "epoch": 0.7143032380391621,
      "grad_norm": 2.96875,
      "learning_rate": 4.23278321663798e-05,
      "loss": 0.8395,
      "step": 203810
    },
    {
      "epoch": 0.7143382855460577,
      "grad_norm": 3.125,
      "learning_rate": 4.2327183137716096e-05,
      "loss": 0.9162,
      "step": 203820
    },
    {
      "epoch": 0.7143733330529533,
      "grad_norm": 2.875,
      "learning_rate": 4.23265341090524e-05,
      "loss": 0.9341,
      "step": 203830
    },
    {
      "epoch": 0.7144083805598489,
      "grad_norm": 2.984375,
      "learning_rate": 4.232588508038869e-05,
      "loss": 0.922,
      "step": 203840
    },
    {
      "epoch": 0.7144434280667444,
      "grad_norm": 3.109375,
      "learning_rate": 4.232523605172499e-05,
      "loss": 0.8816,
      "step": 203850
    },
    {
      "epoch": 0.7144784755736401,
      "grad_norm": 3.359375,
      "learning_rate": 4.232458702306129e-05,
      "loss": 0.9734,
      "step": 203860
    },
    {
      "epoch": 0.7145135230805356,
      "grad_norm": 2.8125,
      "learning_rate": 4.232393799439759e-05,
      "loss": 0.9045,
      "step": 203870
    },
    {
      "epoch": 0.7145485705874313,
      "grad_norm": 2.65625,
      "learning_rate": 4.232328896573389e-05,
      "loss": 0.8724,
      "step": 203880
    },
    {
      "epoch": 0.7145836180943269,
      "grad_norm": 2.640625,
      "learning_rate": 4.2322639937070185e-05,
      "loss": 0.9296,
      "step": 203890
    },
    {
      "epoch": 0.7146186656012224,
      "grad_norm": 2.734375,
      "learning_rate": 4.2321990908406486e-05,
      "loss": 0.8443,
      "step": 203900
    },
    {
      "epoch": 0.7146537131081181,
      "grad_norm": 2.96875,
      "learning_rate": 4.2321341879742774e-05,
      "loss": 0.8842,
      "step": 203910
    },
    {
      "epoch": 0.7146887606150136,
      "grad_norm": 3.03125,
      "learning_rate": 4.2320692851079076e-05,
      "loss": 0.8278,
      "step": 203920
    },
    {
      "epoch": 0.7147238081219093,
      "grad_norm": 2.78125,
      "learning_rate": 4.232004382241537e-05,
      "loss": 0.8698,
      "step": 203930
    },
    {
      "epoch": 0.7147588556288048,
      "grad_norm": 2.90625,
      "learning_rate": 4.231939479375167e-05,
      "loss": 0.8451,
      "step": 203940
    },
    {
      "epoch": 0.7147939031357005,
      "grad_norm": 3.015625,
      "learning_rate": 4.2318745765087966e-05,
      "loss": 0.8848,
      "step": 203950
    },
    {
      "epoch": 0.714828950642596,
      "grad_norm": 2.875,
      "learning_rate": 4.231809673642427e-05,
      "loss": 0.9793,
      "step": 203960
    },
    {
      "epoch": 0.7148639981494916,
      "grad_norm": 2.703125,
      "learning_rate": 4.231744770776057e-05,
      "loss": 0.881,
      "step": 203970
    },
    {
      "epoch": 0.7148990456563872,
      "grad_norm": 2.8125,
      "learning_rate": 4.2316798679096864e-05,
      "loss": 0.8182,
      "step": 203980
    },
    {
      "epoch": 0.7149340931632828,
      "grad_norm": 2.625,
      "learning_rate": 4.2316149650433165e-05,
      "loss": 0.8798,
      "step": 203990
    },
    {
      "epoch": 0.7149691406701785,
      "grad_norm": 2.28125,
      "learning_rate": 4.231550062176946e-05,
      "loss": 0.9006,
      "step": 204000
    },
    {
      "epoch": 0.715004188177074,
      "grad_norm": 2.953125,
      "learning_rate": 4.231485159310576e-05,
      "loss": 0.8797,
      "step": 204010
    },
    {
      "epoch": 0.7150392356839697,
      "grad_norm": 2.65625,
      "learning_rate": 4.2314202564442056e-05,
      "loss": 0.8238,
      "step": 204020
    },
    {
      "epoch": 0.7150742831908652,
      "grad_norm": 3.015625,
      "learning_rate": 4.231355353577836e-05,
      "loss": 0.8511,
      "step": 204030
    },
    {
      "epoch": 0.7151093306977608,
      "grad_norm": 2.796875,
      "learning_rate": 4.231290450711465e-05,
      "loss": 0.848,
      "step": 204040
    },
    {
      "epoch": 0.7151443782046564,
      "grad_norm": 3.140625,
      "learning_rate": 4.231225547845095e-05,
      "loss": 0.9166,
      "step": 204050
    },
    {
      "epoch": 0.715179425711552,
      "grad_norm": 2.75,
      "learning_rate": 4.231160644978725e-05,
      "loss": 0.8432,
      "step": 204060
    },
    {
      "epoch": 0.7152144732184476,
      "grad_norm": 2.484375,
      "learning_rate": 4.231095742112355e-05,
      "loss": 0.8203,
      "step": 204070
    },
    {
      "epoch": 0.7152495207253432,
      "grad_norm": 2.4375,
      "learning_rate": 4.2310308392459844e-05,
      "loss": 0.953,
      "step": 204080
    },
    {
      "epoch": 0.7152845682322388,
      "grad_norm": 3.203125,
      "learning_rate": 4.2309659363796145e-05,
      "loss": 0.8705,
      "step": 204090
    },
    {
      "epoch": 0.7153196157391344,
      "grad_norm": 3.0,
      "learning_rate": 4.230901033513244e-05,
      "loss": 0.9028,
      "step": 204100
    },
    {
      "epoch": 0.71535466324603,
      "grad_norm": 3.125,
      "learning_rate": 4.230836130646874e-05,
      "loss": 0.8944,
      "step": 204110
    },
    {
      "epoch": 0.7153897107529256,
      "grad_norm": 2.75,
      "learning_rate": 4.230771227780504e-05,
      "loss": 0.8895,
      "step": 204120
    },
    {
      "epoch": 0.7154247582598212,
      "grad_norm": 2.96875,
      "learning_rate": 4.230706324914134e-05,
      "loss": 0.8897,
      "step": 204130
    },
    {
      "epoch": 0.7154598057667167,
      "grad_norm": 2.765625,
      "learning_rate": 4.230641422047764e-05,
      "loss": 0.9345,
      "step": 204140
    },
    {
      "epoch": 0.7154948532736124,
      "grad_norm": 2.9375,
      "learning_rate": 4.230576519181393e-05,
      "loss": 0.8009,
      "step": 204150
    },
    {
      "epoch": 0.7155299007805079,
      "grad_norm": 3.09375,
      "learning_rate": 4.2305116163150234e-05,
      "loss": 0.9667,
      "step": 204160
    },
    {
      "epoch": 0.7155649482874036,
      "grad_norm": 2.421875,
      "learning_rate": 4.230446713448653e-05,
      "loss": 0.8844,
      "step": 204170
    },
    {
      "epoch": 0.7155999957942992,
      "grad_norm": 2.953125,
      "learning_rate": 4.230381810582283e-05,
      "loss": 0.9024,
      "step": 204180
    },
    {
      "epoch": 0.7156350433011948,
      "grad_norm": 3.078125,
      "learning_rate": 4.2303169077159125e-05,
      "loss": 0.9674,
      "step": 204190
    },
    {
      "epoch": 0.7156700908080904,
      "grad_norm": 3.0,
      "learning_rate": 4.2302520048495426e-05,
      "loss": 0.9116,
      "step": 204200
    },
    {
      "epoch": 0.715705138314986,
      "grad_norm": 3.078125,
      "learning_rate": 4.230187101983172e-05,
      "loss": 0.8896,
      "step": 204210
    },
    {
      "epoch": 0.7157401858218816,
      "grad_norm": 2.6875,
      "learning_rate": 4.230122199116802e-05,
      "loss": 0.9312,
      "step": 204220
    },
    {
      "epoch": 0.7157752333287771,
      "grad_norm": 3.1875,
      "learning_rate": 4.230057296250432e-05,
      "loss": 0.8781,
      "step": 204230
    },
    {
      "epoch": 0.7158102808356728,
      "grad_norm": 2.5625,
      "learning_rate": 4.229992393384062e-05,
      "loss": 0.8979,
      "step": 204240
    },
    {
      "epoch": 0.7158453283425683,
      "grad_norm": 2.546875,
      "learning_rate": 4.229927490517692e-05,
      "loss": 0.8537,
      "step": 204250
    },
    {
      "epoch": 0.715880375849464,
      "grad_norm": 3.015625,
      "learning_rate": 4.2298625876513214e-05,
      "loss": 0.8792,
      "step": 204260
    },
    {
      "epoch": 0.7159154233563595,
      "grad_norm": 3.328125,
      "learning_rate": 4.2297976847849516e-05,
      "loss": 0.8906,
      "step": 204270
    },
    {
      "epoch": 0.7159504708632551,
      "grad_norm": 2.84375,
      "learning_rate": 4.2297327819185804e-05,
      "loss": 0.8379,
      "step": 204280
    },
    {
      "epoch": 0.7159855183701508,
      "grad_norm": 2.90625,
      "learning_rate": 4.2296678790522105e-05,
      "loss": 0.831,
      "step": 204290
    },
    {
      "epoch": 0.7160205658770463,
      "grad_norm": 2.96875,
      "learning_rate": 4.22960297618584e-05,
      "loss": 0.8929,
      "step": 204300
    },
    {
      "epoch": 0.716055613383942,
      "grad_norm": 2.9375,
      "learning_rate": 4.22953807331947e-05,
      "loss": 0.9341,
      "step": 204310
    },
    {
      "epoch": 0.7160906608908375,
      "grad_norm": 3.125,
      "learning_rate": 4.2294731704530996e-05,
      "loss": 0.8614,
      "step": 204320
    },
    {
      "epoch": 0.7161257083977332,
      "grad_norm": 3.359375,
      "learning_rate": 4.22940826758673e-05,
      "loss": 0.8679,
      "step": 204330
    },
    {
      "epoch": 0.7161607559046287,
      "grad_norm": 2.8125,
      "learning_rate": 4.22934336472036e-05,
      "loss": 0.8581,
      "step": 204340
    },
    {
      "epoch": 0.7161958034115243,
      "grad_norm": 2.921875,
      "learning_rate": 4.229278461853989e-05,
      "loss": 0.9985,
      "step": 204350
    },
    {
      "epoch": 0.7162308509184199,
      "grad_norm": 2.875,
      "learning_rate": 4.2292135589876194e-05,
      "loss": 0.8543,
      "step": 204360
    },
    {
      "epoch": 0.7162658984253155,
      "grad_norm": 3.046875,
      "learning_rate": 4.229148656121249e-05,
      "loss": 0.8628,
      "step": 204370
    },
    {
      "epoch": 0.7163009459322112,
      "grad_norm": 3.21875,
      "learning_rate": 4.229083753254879e-05,
      "loss": 0.9683,
      "step": 204380
    },
    {
      "epoch": 0.7163359934391067,
      "grad_norm": 2.921875,
      "learning_rate": 4.2290188503885085e-05,
      "loss": 0.9302,
      "step": 204390
    },
    {
      "epoch": 0.7163710409460023,
      "grad_norm": 2.640625,
      "learning_rate": 4.2289539475221386e-05,
      "loss": 0.9166,
      "step": 204400
    },
    {
      "epoch": 0.7164060884528979,
      "grad_norm": 2.65625,
      "learning_rate": 4.228889044655768e-05,
      "loss": 0.8327,
      "step": 204410
    },
    {
      "epoch": 0.7164411359597935,
      "grad_norm": 2.734375,
      "learning_rate": 4.228824141789398e-05,
      "loss": 0.9585,
      "step": 204420
    },
    {
      "epoch": 0.7164761834666891,
      "grad_norm": 3.109375,
      "learning_rate": 4.228759238923028e-05,
      "loss": 0.8528,
      "step": 204430
    },
    {
      "epoch": 0.7165112309735847,
      "grad_norm": 3.03125,
      "learning_rate": 4.228694336056658e-05,
      "loss": 0.9376,
      "step": 204440
    },
    {
      "epoch": 0.7165462784804802,
      "grad_norm": 3.1875,
      "learning_rate": 4.228629433190287e-05,
      "loss": 0.9423,
      "step": 204450
    },
    {
      "epoch": 0.7165813259873759,
      "grad_norm": 2.984375,
      "learning_rate": 4.2285645303239174e-05,
      "loss": 0.9268,
      "step": 204460
    },
    {
      "epoch": 0.7166163734942714,
      "grad_norm": 2.84375,
      "learning_rate": 4.2284996274575476e-05,
      "loss": 0.8431,
      "step": 204470
    },
    {
      "epoch": 0.7166514210011671,
      "grad_norm": 2.703125,
      "learning_rate": 4.228434724591177e-05,
      "loss": 0.8004,
      "step": 204480
    },
    {
      "epoch": 0.7166864685080627,
      "grad_norm": 2.46875,
      "learning_rate": 4.228369821724807e-05,
      "loss": 0.8542,
      "step": 204490
    },
    {
      "epoch": 0.7167215160149583,
      "grad_norm": 2.890625,
      "learning_rate": 4.2283049188584366e-05,
      "loss": 0.829,
      "step": 204500
    },
    {
      "epoch": 0.7167565635218539,
      "grad_norm": 2.515625,
      "learning_rate": 4.228240015992067e-05,
      "loss": 0.8867,
      "step": 204510
    },
    {
      "epoch": 0.7167916110287494,
      "grad_norm": 3.0625,
      "learning_rate": 4.228175113125696e-05,
      "loss": 0.856,
      "step": 204520
    },
    {
      "epoch": 0.7168266585356451,
      "grad_norm": 3.390625,
      "learning_rate": 4.2281102102593264e-05,
      "loss": 0.9747,
      "step": 204530
    },
    {
      "epoch": 0.7168617060425406,
      "grad_norm": 3.046875,
      "learning_rate": 4.228045307392956e-05,
      "loss": 0.9153,
      "step": 204540
    },
    {
      "epoch": 0.7168967535494363,
      "grad_norm": 2.96875,
      "learning_rate": 4.227980404526586e-05,
      "loss": 0.9674,
      "step": 204550
    },
    {
      "epoch": 0.7169318010563318,
      "grad_norm": 2.765625,
      "learning_rate": 4.2279155016602154e-05,
      "loss": 0.8487,
      "step": 204560
    },
    {
      "epoch": 0.7169668485632275,
      "grad_norm": 2.765625,
      "learning_rate": 4.2278505987938456e-05,
      "loss": 0.8961,
      "step": 204570
    },
    {
      "epoch": 0.7170018960701231,
      "grad_norm": 2.734375,
      "learning_rate": 4.227785695927475e-05,
      "loss": 0.8525,
      "step": 204580
    },
    {
      "epoch": 0.7170369435770186,
      "grad_norm": 3.171875,
      "learning_rate": 4.227720793061105e-05,
      "loss": 0.8671,
      "step": 204590
    },
    {
      "epoch": 0.7170719910839143,
      "grad_norm": 2.8125,
      "learning_rate": 4.2276558901947346e-05,
      "loss": 0.8851,
      "step": 204600
    },
    {
      "epoch": 0.7171070385908098,
      "grad_norm": 3.328125,
      "learning_rate": 4.227590987328365e-05,
      "loss": 0.8638,
      "step": 204610
    },
    {
      "epoch": 0.7171420860977055,
      "grad_norm": 3.15625,
      "learning_rate": 4.227526084461995e-05,
      "loss": 0.9205,
      "step": 204620
    },
    {
      "epoch": 0.717177133604601,
      "grad_norm": 2.921875,
      "learning_rate": 4.2274611815956244e-05,
      "loss": 0.8562,
      "step": 204630
    },
    {
      "epoch": 0.7172121811114966,
      "grad_norm": 2.78125,
      "learning_rate": 4.2273962787292545e-05,
      "loss": 0.923,
      "step": 204640
    },
    {
      "epoch": 0.7172472286183922,
      "grad_norm": 2.953125,
      "learning_rate": 4.227331375862884e-05,
      "loss": 0.8835,
      "step": 204650
    },
    {
      "epoch": 0.7172822761252878,
      "grad_norm": 3.078125,
      "learning_rate": 4.2272664729965134e-05,
      "loss": 0.9197,
      "step": 204660
    },
    {
      "epoch": 0.7173173236321835,
      "grad_norm": 3.0,
      "learning_rate": 4.227201570130143e-05,
      "loss": 0.8939,
      "step": 204670
    },
    {
      "epoch": 0.717352371139079,
      "grad_norm": 3.15625,
      "learning_rate": 4.227136667263773e-05,
      "loss": 0.927,
      "step": 204680
    },
    {
      "epoch": 0.7173874186459747,
      "grad_norm": 2.734375,
      "learning_rate": 4.2270717643974025e-05,
      "loss": 0.9386,
      "step": 204690
    },
    {
      "epoch": 0.7174224661528702,
      "grad_norm": 2.828125,
      "learning_rate": 4.2270068615310326e-05,
      "loss": 0.9382,
      "step": 204700
    },
    {
      "epoch": 0.7174575136597658,
      "grad_norm": 3.1875,
      "learning_rate": 4.226941958664663e-05,
      "loss": 0.9784,
      "step": 204710
    },
    {
      "epoch": 0.7174925611666614,
      "grad_norm": 2.8125,
      "learning_rate": 4.226877055798292e-05,
      "loss": 0.8544,
      "step": 204720
    },
    {
      "epoch": 0.717527608673557,
      "grad_norm": 2.84375,
      "learning_rate": 4.2268121529319224e-05,
      "loss": 0.9812,
      "step": 204730
    },
    {
      "epoch": 0.7175626561804526,
      "grad_norm": 2.6875,
      "learning_rate": 4.226747250065552e-05,
      "loss": 0.978,
      "step": 204740
    },
    {
      "epoch": 0.7175977036873482,
      "grad_norm": 2.703125,
      "learning_rate": 4.226682347199182e-05,
      "loss": 0.8858,
      "step": 204750
    },
    {
      "epoch": 0.7176327511942437,
      "grad_norm": 3.34375,
      "learning_rate": 4.2266174443328114e-05,
      "loss": 0.9377,
      "step": 204760
    },
    {
      "epoch": 0.7176677987011394,
      "grad_norm": 3.015625,
      "learning_rate": 4.2265525414664416e-05,
      "loss": 0.8925,
      "step": 204770
    },
    {
      "epoch": 0.717702846208035,
      "grad_norm": 3.28125,
      "learning_rate": 4.226487638600071e-05,
      "loss": 0.9647,
      "step": 204780
    },
    {
      "epoch": 0.7177378937149306,
      "grad_norm": 2.859375,
      "learning_rate": 4.226422735733701e-05,
      "loss": 0.8785,
      "step": 204790
    },
    {
      "epoch": 0.7177729412218262,
      "grad_norm": 3.5,
      "learning_rate": 4.2263578328673306e-05,
      "loss": 0.9027,
      "step": 204800
    },
    {
      "epoch": 0.7178079887287218,
      "grad_norm": 2.859375,
      "learning_rate": 4.226292930000961e-05,
      "loss": 0.971,
      "step": 204810
    },
    {
      "epoch": 0.7178430362356174,
      "grad_norm": 3.015625,
      "learning_rate": 4.22622802713459e-05,
      "loss": 0.8099,
      "step": 204820
    },
    {
      "epoch": 0.7178780837425129,
      "grad_norm": 2.78125,
      "learning_rate": 4.2261631242682204e-05,
      "loss": 0.9954,
      "step": 204830
    },
    {
      "epoch": 0.7179131312494086,
      "grad_norm": 2.90625,
      "learning_rate": 4.2260982214018505e-05,
      "loss": 0.8602,
      "step": 204840
    },
    {
      "epoch": 0.7179481787563041,
      "grad_norm": 3.0,
      "learning_rate": 4.22603331853548e-05,
      "loss": 0.7657,
      "step": 204850
    },
    {
      "epoch": 0.7179832262631998,
      "grad_norm": 2.8125,
      "learning_rate": 4.22596841566911e-05,
      "loss": 0.9147,
      "step": 204860
    },
    {
      "epoch": 0.7180182737700954,
      "grad_norm": 2.90625,
      "learning_rate": 4.2259035128027396e-05,
      "loss": 0.8091,
      "step": 204870
    },
    {
      "epoch": 0.718053321276991,
      "grad_norm": 3.15625,
      "learning_rate": 4.22583860993637e-05,
      "loss": 0.9076,
      "step": 204880
    },
    {
      "epoch": 0.7180883687838866,
      "grad_norm": 3.0625,
      "learning_rate": 4.225773707069999e-05,
      "loss": 0.9283,
      "step": 204890
    },
    {
      "epoch": 0.7181234162907821,
      "grad_norm": 3.171875,
      "learning_rate": 4.225708804203629e-05,
      "loss": 0.9031,
      "step": 204900
    },
    {
      "epoch": 0.7181584637976778,
      "grad_norm": 2.734375,
      "learning_rate": 4.225643901337259e-05,
      "loss": 0.906,
      "step": 204910
    },
    {
      "epoch": 0.7181935113045733,
      "grad_norm": 2.96875,
      "learning_rate": 4.225578998470889e-05,
      "loss": 0.8836,
      "step": 204920
    },
    {
      "epoch": 0.718228558811469,
      "grad_norm": 2.78125,
      "learning_rate": 4.2255140956045184e-05,
      "loss": 0.9096,
      "step": 204930
    },
    {
      "epoch": 0.7182636063183645,
      "grad_norm": 2.890625,
      "learning_rate": 4.2254491927381485e-05,
      "loss": 0.9008,
      "step": 204940
    },
    {
      "epoch": 0.7182986538252601,
      "grad_norm": 2.75,
      "learning_rate": 4.225384289871778e-05,
      "loss": 0.9084,
      "step": 204950
    },
    {
      "epoch": 0.7183337013321557,
      "grad_norm": 2.640625,
      "learning_rate": 4.225319387005408e-05,
      "loss": 0.9106,
      "step": 204960
    },
    {
      "epoch": 0.7183687488390513,
      "grad_norm": 2.921875,
      "learning_rate": 4.2252544841390376e-05,
      "loss": 0.9582,
      "step": 204970
    },
    {
      "epoch": 0.718403796345947,
      "grad_norm": 2.765625,
      "learning_rate": 4.225189581272668e-05,
      "loss": 0.8165,
      "step": 204980
    },
    {
      "epoch": 0.7184388438528425,
      "grad_norm": 2.8125,
      "learning_rate": 4.225124678406298e-05,
      "loss": 0.8843,
      "step": 204990
    },
    {
      "epoch": 0.7184738913597382,
      "grad_norm": 3.34375,
      "learning_rate": 4.225059775539927e-05,
      "loss": 0.962,
      "step": 205000
    },
    {
      "epoch": 0.7184738913597382,
      "eval_loss": 0.8447493314743042,
      "eval_runtime": 565.3449,
      "eval_samples_per_second": 672.927,
      "eval_steps_per_second": 56.077,
      "step": 205000
    },
    {
      "epoch": 0.7185089388666337,
      "grad_norm": 3.078125,
      "learning_rate": 4.2249948726735575e-05,
      "loss": 0.9205,
      "step": 205010
    },
    {
      "epoch": 0.7185439863735293,
      "grad_norm": 2.59375,
      "learning_rate": 4.224929969807187e-05,
      "loss": 0.8478,
      "step": 205020
    },
    {
      "epoch": 0.7185790338804249,
      "grad_norm": 3.09375,
      "learning_rate": 4.224865066940817e-05,
      "loss": 0.9155,
      "step": 205030
    },
    {
      "epoch": 0.7186140813873205,
      "grad_norm": 2.984375,
      "learning_rate": 4.224800164074446e-05,
      "loss": 0.9553,
      "step": 205040
    },
    {
      "epoch": 0.7186491288942161,
      "grad_norm": 2.875,
      "learning_rate": 4.224735261208076e-05,
      "loss": 0.9183,
      "step": 205050
    },
    {
      "epoch": 0.7186841764011117,
      "grad_norm": 3.171875,
      "learning_rate": 4.2246703583417054e-05,
      "loss": 0.9201,
      "step": 205060
    },
    {
      "epoch": 0.7187192239080074,
      "grad_norm": 3.046875,
      "learning_rate": 4.2246054554753356e-05,
      "loss": 0.8388,
      "step": 205070
    },
    {
      "epoch": 0.7187542714149029,
      "grad_norm": 2.578125,
      "learning_rate": 4.224540552608966e-05,
      "loss": 0.9881,
      "step": 205080
    },
    {
      "epoch": 0.7187893189217985,
      "grad_norm": 2.734375,
      "learning_rate": 4.224475649742595e-05,
      "loss": 0.9034,
      "step": 205090
    },
    {
      "epoch": 0.7188243664286941,
      "grad_norm": 3.03125,
      "learning_rate": 4.224410746876225e-05,
      "loss": 0.9512,
      "step": 205100
    },
    {
      "epoch": 0.7188594139355897,
      "grad_norm": 3.34375,
      "learning_rate": 4.224345844009855e-05,
      "loss": 0.9507,
      "step": 205110
    },
    {
      "epoch": 0.7188944614424853,
      "grad_norm": 2.984375,
      "learning_rate": 4.224280941143485e-05,
      "loss": 0.8366,
      "step": 205120
    },
    {
      "epoch": 0.7189295089493809,
      "grad_norm": 2.671875,
      "learning_rate": 4.2242160382771144e-05,
      "loss": 0.8803,
      "step": 205130
    },
    {
      "epoch": 0.7189645564562764,
      "grad_norm": 2.484375,
      "learning_rate": 4.2241511354107445e-05,
      "loss": 0.8274,
      "step": 205140
    },
    {
      "epoch": 0.7189996039631721,
      "grad_norm": 3.515625,
      "learning_rate": 4.224086232544374e-05,
      "loss": 1.0354,
      "step": 205150
    },
    {
      "epoch": 0.7190346514700677,
      "grad_norm": 2.6875,
      "learning_rate": 4.224021329678004e-05,
      "loss": 0.8774,
      "step": 205160
    },
    {
      "epoch": 0.7190696989769633,
      "grad_norm": 2.671875,
      "learning_rate": 4.2239564268116336e-05,
      "loss": 0.8844,
      "step": 205170
    },
    {
      "epoch": 0.7191047464838589,
      "grad_norm": 2.78125,
      "learning_rate": 4.223891523945264e-05,
      "loss": 0.9266,
      "step": 205180
    },
    {
      "epoch": 0.7191397939907545,
      "grad_norm": 2.703125,
      "learning_rate": 4.223826621078893e-05,
      "loss": 0.8224,
      "step": 205190
    },
    {
      "epoch": 0.7191748414976501,
      "grad_norm": 3.359375,
      "learning_rate": 4.223761718212523e-05,
      "loss": 0.9216,
      "step": 205200
    },
    {
      "epoch": 0.7192098890045456,
      "grad_norm": 2.984375,
      "learning_rate": 4.2236968153461535e-05,
      "loss": 0.8989,
      "step": 205210
    },
    {
      "epoch": 0.7192449365114413,
      "grad_norm": 2.4375,
      "learning_rate": 4.223631912479783e-05,
      "loss": 0.8873,
      "step": 205220
    },
    {
      "epoch": 0.7192799840183368,
      "grad_norm": 2.796875,
      "learning_rate": 4.223567009613413e-05,
      "loss": 0.8335,
      "step": 205230
    },
    {
      "epoch": 0.7193150315252325,
      "grad_norm": 3.109375,
      "learning_rate": 4.2235021067470425e-05,
      "loss": 0.9116,
      "step": 205240
    },
    {
      "epoch": 0.719350079032128,
      "grad_norm": 2.78125,
      "learning_rate": 4.2234372038806727e-05,
      "loss": 0.8704,
      "step": 205250
    },
    {
      "epoch": 0.7193851265390236,
      "grad_norm": 2.953125,
      "learning_rate": 4.223372301014302e-05,
      "loss": 0.8195,
      "step": 205260
    },
    {
      "epoch": 0.7194201740459193,
      "grad_norm": 2.9375,
      "learning_rate": 4.223307398147932e-05,
      "loss": 0.8227,
      "step": 205270
    },
    {
      "epoch": 0.7194552215528148,
      "grad_norm": 2.9375,
      "learning_rate": 4.223242495281562e-05,
      "loss": 0.8961,
      "step": 205280
    },
    {
      "epoch": 0.7194902690597105,
      "grad_norm": 3.078125,
      "learning_rate": 4.223177592415192e-05,
      "loss": 0.8755,
      "step": 205290
    },
    {
      "epoch": 0.719525316566606,
      "grad_norm": 2.828125,
      "learning_rate": 4.223112689548821e-05,
      "loss": 0.8515,
      "step": 205300
    },
    {
      "epoch": 0.7195603640735017,
      "grad_norm": 3.03125,
      "learning_rate": 4.2230477866824515e-05,
      "loss": 0.878,
      "step": 205310
    },
    {
      "epoch": 0.7195954115803972,
      "grad_norm": 3.28125,
      "learning_rate": 4.222982883816081e-05,
      "loss": 0.8547,
      "step": 205320
    },
    {
      "epoch": 0.7196304590872928,
      "grad_norm": 2.9375,
      "learning_rate": 4.222917980949711e-05,
      "loss": 0.8128,
      "step": 205330
    },
    {
      "epoch": 0.7196655065941884,
      "grad_norm": 2.96875,
      "learning_rate": 4.222853078083341e-05,
      "loss": 0.8755,
      "step": 205340
    },
    {
      "epoch": 0.719700554101084,
      "grad_norm": 2.859375,
      "learning_rate": 4.2227881752169706e-05,
      "loss": 0.8682,
      "step": 205350
    },
    {
      "epoch": 0.7197356016079797,
      "grad_norm": 3.390625,
      "learning_rate": 4.222723272350601e-05,
      "loss": 0.9273,
      "step": 205360
    },
    {
      "epoch": 0.7197706491148752,
      "grad_norm": 3.0625,
      "learning_rate": 4.22265836948423e-05,
      "loss": 0.8997,
      "step": 205370
    },
    {
      "epoch": 0.7198056966217709,
      "grad_norm": 2.84375,
      "learning_rate": 4.2225934666178604e-05,
      "loss": 0.8585,
      "step": 205380
    },
    {
      "epoch": 0.7198407441286664,
      "grad_norm": 3.1875,
      "learning_rate": 4.22252856375149e-05,
      "loss": 0.8977,
      "step": 205390
    },
    {
      "epoch": 0.719875791635562,
      "grad_norm": 2.890625,
      "learning_rate": 4.22246366088512e-05,
      "loss": 0.914,
      "step": 205400
    },
    {
      "epoch": 0.7199108391424576,
      "grad_norm": 2.703125,
      "learning_rate": 4.222398758018749e-05,
      "loss": 0.7872,
      "step": 205410
    },
    {
      "epoch": 0.7199458866493532,
      "grad_norm": 2.96875,
      "learning_rate": 4.222333855152379e-05,
      "loss": 0.978,
      "step": 205420
    },
    {
      "epoch": 0.7199809341562488,
      "grad_norm": 3.203125,
      "learning_rate": 4.222268952286009e-05,
      "loss": 0.9292,
      "step": 205430
    },
    {
      "epoch": 0.7200159816631444,
      "grad_norm": 2.625,
      "learning_rate": 4.2222040494196385e-05,
      "loss": 0.9044,
      "step": 205440
    },
    {
      "epoch": 0.7200510291700399,
      "grad_norm": 3.015625,
      "learning_rate": 4.2221391465532686e-05,
      "loss": 0.9384,
      "step": 205450
    },
    {
      "epoch": 0.7200860766769356,
      "grad_norm": 2.65625,
      "learning_rate": 4.222074243686898e-05,
      "loss": 0.9543,
      "step": 205460
    },
    {
      "epoch": 0.7201211241838312,
      "grad_norm": 3.0625,
      "learning_rate": 4.222009340820528e-05,
      "loss": 0.9207,
      "step": 205470
    },
    {
      "epoch": 0.7201561716907268,
      "grad_norm": 2.75,
      "learning_rate": 4.221944437954158e-05,
      "loss": 0.9676,
      "step": 205480
    },
    {
      "epoch": 0.7201912191976224,
      "grad_norm": 2.71875,
      "learning_rate": 4.221879535087788e-05,
      "loss": 0.8616,
      "step": 205490
    },
    {
      "epoch": 0.720226266704518,
      "grad_norm": 2.875,
      "learning_rate": 4.221814632221417e-05,
      "loss": 0.8515,
      "step": 205500
    },
    {
      "epoch": 0.7202613142114136,
      "grad_norm": 3.03125,
      "learning_rate": 4.2217497293550474e-05,
      "loss": 0.8911,
      "step": 205510
    },
    {
      "epoch": 0.7202963617183091,
      "grad_norm": 3.25,
      "learning_rate": 4.221684826488677e-05,
      "loss": 0.8997,
      "step": 205520
    },
    {
      "epoch": 0.7203314092252048,
      "grad_norm": 3.015625,
      "learning_rate": 4.221619923622307e-05,
      "loss": 0.8892,
      "step": 205530
    },
    {
      "epoch": 0.7203664567321003,
      "grad_norm": 3.1875,
      "learning_rate": 4.2215550207559365e-05,
      "loss": 0.8884,
      "step": 205540
    },
    {
      "epoch": 0.720401504238996,
      "grad_norm": 2.96875,
      "learning_rate": 4.2214901178895666e-05,
      "loss": 0.9319,
      "step": 205550
    },
    {
      "epoch": 0.7204365517458916,
      "grad_norm": 3.171875,
      "learning_rate": 4.221425215023196e-05,
      "loss": 0.9099,
      "step": 205560
    },
    {
      "epoch": 0.7204715992527871,
      "grad_norm": 3.0,
      "learning_rate": 4.221360312156826e-05,
      "loss": 0.8867,
      "step": 205570
    },
    {
      "epoch": 0.7205066467596828,
      "grad_norm": 2.828125,
      "learning_rate": 4.2212954092904564e-05,
      "loss": 0.9104,
      "step": 205580
    },
    {
      "epoch": 0.7205416942665783,
      "grad_norm": 2.390625,
      "learning_rate": 4.221230506424086e-05,
      "loss": 0.9266,
      "step": 205590
    },
    {
      "epoch": 0.720576741773474,
      "grad_norm": 2.796875,
      "learning_rate": 4.221165603557716e-05,
      "loss": 0.9537,
      "step": 205600
    },
    {
      "epoch": 0.7206117892803695,
      "grad_norm": 2.75,
      "learning_rate": 4.2211007006913454e-05,
      "loss": 0.9003,
      "step": 205610
    },
    {
      "epoch": 0.7206468367872652,
      "grad_norm": 3.515625,
      "learning_rate": 4.2210357978249756e-05,
      "loss": 0.8153,
      "step": 205620
    },
    {
      "epoch": 0.7206818842941607,
      "grad_norm": 3.125,
      "learning_rate": 4.220970894958605e-05,
      "loss": 0.9024,
      "step": 205630
    },
    {
      "epoch": 0.7207169318010563,
      "grad_norm": 3.625,
      "learning_rate": 4.220905992092235e-05,
      "loss": 0.8943,
      "step": 205640
    },
    {
      "epoch": 0.720751979307952,
      "grad_norm": 2.375,
      "learning_rate": 4.2208410892258646e-05,
      "loss": 0.8479,
      "step": 205650
    },
    {
      "epoch": 0.7207870268148475,
      "grad_norm": 3.140625,
      "learning_rate": 4.220776186359495e-05,
      "loss": 0.8299,
      "step": 205660
    },
    {
      "epoch": 0.7208220743217432,
      "grad_norm": 2.953125,
      "learning_rate": 4.220711283493124e-05,
      "loss": 0.8997,
      "step": 205670
    },
    {
      "epoch": 0.7208571218286387,
      "grad_norm": 2.65625,
      "learning_rate": 4.2206463806267544e-05,
      "loss": 0.9037,
      "step": 205680
    },
    {
      "epoch": 0.7208921693355343,
      "grad_norm": 2.671875,
      "learning_rate": 4.220581477760384e-05,
      "loss": 0.8914,
      "step": 205690
    },
    {
      "epoch": 0.7209272168424299,
      "grad_norm": 2.765625,
      "learning_rate": 4.220516574894014e-05,
      "loss": 0.8085,
      "step": 205700
    },
    {
      "epoch": 0.7209622643493255,
      "grad_norm": 3.453125,
      "learning_rate": 4.220451672027644e-05,
      "loss": 0.9741,
      "step": 205710
    },
    {
      "epoch": 0.7209973118562211,
      "grad_norm": 3.296875,
      "learning_rate": 4.2203867691612736e-05,
      "loss": 0.951,
      "step": 205720
    },
    {
      "epoch": 0.7210323593631167,
      "grad_norm": 3.03125,
      "learning_rate": 4.220321866294904e-05,
      "loss": 0.8674,
      "step": 205730
    },
    {
      "epoch": 0.7210674068700123,
      "grad_norm": 3.25,
      "learning_rate": 4.220256963428533e-05,
      "loss": 0.8652,
      "step": 205740
    },
    {
      "epoch": 0.7211024543769079,
      "grad_norm": 2.8125,
      "learning_rate": 4.220192060562163e-05,
      "loss": 0.8342,
      "step": 205750
    },
    {
      "epoch": 0.7211375018838035,
      "grad_norm": 3.109375,
      "learning_rate": 4.220127157695793e-05,
      "loss": 0.9389,
      "step": 205760
    },
    {
      "epoch": 0.7211725493906991,
      "grad_norm": 3.359375,
      "learning_rate": 4.220062254829423e-05,
      "loss": 0.9473,
      "step": 205770
    },
    {
      "epoch": 0.7212075968975947,
      "grad_norm": 2.9375,
      "learning_rate": 4.2199973519630524e-05,
      "loss": 0.9596,
      "step": 205780
    },
    {
      "epoch": 0.7212426444044903,
      "grad_norm": 3.59375,
      "learning_rate": 4.219932449096682e-05,
      "loss": 0.9984,
      "step": 205790
    },
    {
      "epoch": 0.7212776919113859,
      "grad_norm": 2.84375,
      "learning_rate": 4.219867546230312e-05,
      "loss": 0.8474,
      "step": 205800
    },
    {
      "epoch": 0.7213127394182814,
      "grad_norm": 3.046875,
      "learning_rate": 4.2198026433639414e-05,
      "loss": 0.8936,
      "step": 205810
    },
    {
      "epoch": 0.7213477869251771,
      "grad_norm": 2.984375,
      "learning_rate": 4.2197377404975716e-05,
      "loss": 0.8387,
      "step": 205820
    },
    {
      "epoch": 0.7213828344320726,
      "grad_norm": 3.203125,
      "learning_rate": 4.219672837631201e-05,
      "loss": 0.9127,
      "step": 205830
    },
    {
      "epoch": 0.7214178819389683,
      "grad_norm": 3.09375,
      "learning_rate": 4.219607934764831e-05,
      "loss": 0.9233,
      "step": 205840
    },
    {
      "epoch": 0.7214529294458639,
      "grad_norm": 3.328125,
      "learning_rate": 4.2195430318984606e-05,
      "loss": 0.8598,
      "step": 205850
    },
    {
      "epoch": 0.7214879769527595,
      "grad_norm": 2.859375,
      "learning_rate": 4.219478129032091e-05,
      "loss": 0.8372,
      "step": 205860
    },
    {
      "epoch": 0.7215230244596551,
      "grad_norm": 2.640625,
      "learning_rate": 4.21941322616572e-05,
      "loss": 0.8577,
      "step": 205870
    },
    {
      "epoch": 0.7215580719665506,
      "grad_norm": 2.921875,
      "learning_rate": 4.2193483232993504e-05,
      "loss": 0.9108,
      "step": 205880
    },
    {
      "epoch": 0.7215931194734463,
      "grad_norm": 3.34375,
      "learning_rate": 4.21928342043298e-05,
      "loss": 0.8863,
      "step": 205890
    },
    {
      "epoch": 0.7216281669803418,
      "grad_norm": 3.34375,
      "learning_rate": 4.21921851756661e-05,
      "loss": 0.9249,
      "step": 205900
    },
    {
      "epoch": 0.7216632144872375,
      "grad_norm": 2.609375,
      "learning_rate": 4.2191536147002394e-05,
      "loss": 0.8395,
      "step": 205910
    },
    {
      "epoch": 0.721698261994133,
      "grad_norm": 3.09375,
      "learning_rate": 4.2190887118338696e-05,
      "loss": 0.9236,
      "step": 205920
    },
    {
      "epoch": 0.7217333095010287,
      "grad_norm": 3.203125,
      "learning_rate": 4.219023808967499e-05,
      "loss": 0.8733,
      "step": 205930
    },
    {
      "epoch": 0.7217683570079242,
      "grad_norm": 3.109375,
      "learning_rate": 4.218958906101129e-05,
      "loss": 0.9474,
      "step": 205940
    },
    {
      "epoch": 0.7218034045148198,
      "grad_norm": 3.15625,
      "learning_rate": 4.218894003234759e-05,
      "loss": 0.9209,
      "step": 205950
    },
    {
      "epoch": 0.7218384520217155,
      "grad_norm": 2.5,
      "learning_rate": 4.218829100368389e-05,
      "loss": 0.8714,
      "step": 205960
    },
    {
      "epoch": 0.721873499528611,
      "grad_norm": 3.09375,
      "learning_rate": 4.218764197502019e-05,
      "loss": 0.895,
      "step": 205970
    },
    {
      "epoch": 0.7219085470355067,
      "grad_norm": 2.90625,
      "learning_rate": 4.2186992946356484e-05,
      "loss": 0.8122,
      "step": 205980
    },
    {
      "epoch": 0.7219435945424022,
      "grad_norm": 2.84375,
      "learning_rate": 4.2186343917692785e-05,
      "loss": 0.9286,
      "step": 205990
    },
    {
      "epoch": 0.7219786420492978,
      "grad_norm": 2.828125,
      "learning_rate": 4.218569488902908e-05,
      "loss": 0.8091,
      "step": 206000
    },
    {
      "epoch": 0.7220136895561934,
      "grad_norm": 2.9375,
      "learning_rate": 4.218504586036538e-05,
      "loss": 0.8572,
      "step": 206010
    },
    {
      "epoch": 0.722048737063089,
      "grad_norm": 3.28125,
      "learning_rate": 4.2184396831701676e-05,
      "loss": 0.9483,
      "step": 206020
    },
    {
      "epoch": 0.7220837845699846,
      "grad_norm": 2.703125,
      "learning_rate": 4.218374780303798e-05,
      "loss": 0.9275,
      "step": 206030
    },
    {
      "epoch": 0.7221188320768802,
      "grad_norm": 3.328125,
      "learning_rate": 4.218309877437427e-05,
      "loss": 0.8896,
      "step": 206040
    },
    {
      "epoch": 0.7221538795837759,
      "grad_norm": 2.984375,
      "learning_rate": 4.218244974571057e-05,
      "loss": 0.9977,
      "step": 206050
    },
    {
      "epoch": 0.7221889270906714,
      "grad_norm": 3.078125,
      "learning_rate": 4.218180071704687e-05,
      "loss": 0.8849,
      "step": 206060
    },
    {
      "epoch": 0.722223974597567,
      "grad_norm": 2.984375,
      "learning_rate": 4.218115168838317e-05,
      "loss": 0.9472,
      "step": 206070
    },
    {
      "epoch": 0.7222590221044626,
      "grad_norm": 2.890625,
      "learning_rate": 4.218050265971947e-05,
      "loss": 0.941,
      "step": 206080
    },
    {
      "epoch": 0.7222940696113582,
      "grad_norm": 2.875,
      "learning_rate": 4.2179853631055765e-05,
      "loss": 0.8873,
      "step": 206090
    },
    {
      "epoch": 0.7223291171182538,
      "grad_norm": 3.078125,
      "learning_rate": 4.217920460239207e-05,
      "loss": 0.9225,
      "step": 206100
    },
    {
      "epoch": 0.7223641646251494,
      "grad_norm": 2.96875,
      "learning_rate": 4.217855557372836e-05,
      "loss": 0.9692,
      "step": 206110
    },
    {
      "epoch": 0.7223992121320449,
      "grad_norm": 2.71875,
      "learning_rate": 4.217790654506466e-05,
      "loss": 0.9188,
      "step": 206120
    },
    {
      "epoch": 0.7224342596389406,
      "grad_norm": 3.0,
      "learning_rate": 4.217725751640096e-05,
      "loss": 0.9007,
      "step": 206130
    },
    {
      "epoch": 0.7224693071458361,
      "grad_norm": 3.109375,
      "learning_rate": 4.217660848773726e-05,
      "loss": 0.9429,
      "step": 206140
    },
    {
      "epoch": 0.7225043546527318,
      "grad_norm": 2.875,
      "learning_rate": 4.217595945907355e-05,
      "loss": 0.8897,
      "step": 206150
    },
    {
      "epoch": 0.7225394021596274,
      "grad_norm": 2.71875,
      "learning_rate": 4.2175310430409855e-05,
      "loss": 0.7987,
      "step": 206160
    },
    {
      "epoch": 0.722574449666523,
      "grad_norm": 3.09375,
      "learning_rate": 4.217466140174615e-05,
      "loss": 0.8799,
      "step": 206170
    },
    {
      "epoch": 0.7226094971734186,
      "grad_norm": 3.234375,
      "learning_rate": 4.2174012373082444e-05,
      "loss": 0.945,
      "step": 206180
    },
    {
      "epoch": 0.7226445446803141,
      "grad_norm": 3.03125,
      "learning_rate": 4.2173363344418745e-05,
      "loss": 0.8863,
      "step": 206190
    },
    {
      "epoch": 0.7226795921872098,
      "grad_norm": 2.828125,
      "learning_rate": 4.217271431575504e-05,
      "loss": 0.9501,
      "step": 206200
    },
    {
      "epoch": 0.7227146396941053,
      "grad_norm": 2.71875,
      "learning_rate": 4.217206528709134e-05,
      "loss": 0.8493,
      "step": 206210
    },
    {
      "epoch": 0.722749687201001,
      "grad_norm": 3.03125,
      "learning_rate": 4.2171416258427636e-05,
      "loss": 0.8852,
      "step": 206220
    },
    {
      "epoch": 0.7227847347078965,
      "grad_norm": 3.015625,
      "learning_rate": 4.217076722976394e-05,
      "loss": 0.9368,
      "step": 206230
    },
    {
      "epoch": 0.7228197822147922,
      "grad_norm": 3.0625,
      "learning_rate": 4.217011820110023e-05,
      "loss": 0.9231,
      "step": 206240
    },
    {
      "epoch": 0.7228548297216878,
      "grad_norm": 2.9375,
      "learning_rate": 4.216946917243653e-05,
      "loss": 0.8771,
      "step": 206250
    },
    {
      "epoch": 0.7228898772285833,
      "grad_norm": 3.171875,
      "learning_rate": 4.216882014377283e-05,
      "loss": 0.9279,
      "step": 206260
    },
    {
      "epoch": 0.722924924735479,
      "grad_norm": 2.90625,
      "learning_rate": 4.216817111510913e-05,
      "loss": 0.9233,
      "step": 206270
    },
    {
      "epoch": 0.7229599722423745,
      "grad_norm": 2.765625,
      "learning_rate": 4.2167522086445424e-05,
      "loss": 0.9647,
      "step": 206280
    },
    {
      "epoch": 0.7229950197492702,
      "grad_norm": 3.203125,
      "learning_rate": 4.2166873057781725e-05,
      "loss": 0.8975,
      "step": 206290
    },
    {
      "epoch": 0.7230300672561657,
      "grad_norm": 2.921875,
      "learning_rate": 4.216622402911803e-05,
      "loss": 0.9464,
      "step": 206300
    },
    {
      "epoch": 0.7230651147630613,
      "grad_norm": 2.828125,
      "learning_rate": 4.216557500045432e-05,
      "loss": 0.8441,
      "step": 206310
    },
    {
      "epoch": 0.7231001622699569,
      "grad_norm": 2.53125,
      "learning_rate": 4.216492597179062e-05,
      "loss": 0.9331,
      "step": 206320
    },
    {
      "epoch": 0.7231352097768525,
      "grad_norm": 2.984375,
      "learning_rate": 4.216427694312692e-05,
      "loss": 0.8683,
      "step": 206330
    },
    {
      "epoch": 0.7231702572837482,
      "grad_norm": 3.15625,
      "learning_rate": 4.216362791446322e-05,
      "loss": 0.9392,
      "step": 206340
    },
    {
      "epoch": 0.7232053047906437,
      "grad_norm": 3.046875,
      "learning_rate": 4.216297888579951e-05,
      "loss": 0.8897,
      "step": 206350
    },
    {
      "epoch": 0.7232403522975394,
      "grad_norm": 3.140625,
      "learning_rate": 4.2162329857135815e-05,
      "loss": 0.9891,
      "step": 206360
    },
    {
      "epoch": 0.7232753998044349,
      "grad_norm": 2.890625,
      "learning_rate": 4.216168082847211e-05,
      "loss": 0.8515,
      "step": 206370
    },
    {
      "epoch": 0.7233104473113305,
      "grad_norm": 2.78125,
      "learning_rate": 4.216103179980841e-05,
      "loss": 0.859,
      "step": 206380
    },
    {
      "epoch": 0.7233454948182261,
      "grad_norm": 2.859375,
      "learning_rate": 4.2160382771144705e-05,
      "loss": 0.907,
      "step": 206390
    },
    {
      "epoch": 0.7233805423251217,
      "grad_norm": 2.6875,
      "learning_rate": 4.215973374248101e-05,
      "loss": 0.9392,
      "step": 206400
    },
    {
      "epoch": 0.7234155898320173,
      "grad_norm": 3.328125,
      "learning_rate": 4.21590847138173e-05,
      "loss": 0.8616,
      "step": 206410
    },
    {
      "epoch": 0.7234506373389129,
      "grad_norm": 2.90625,
      "learning_rate": 4.21584356851536e-05,
      "loss": 0.8327,
      "step": 206420
    },
    {
      "epoch": 0.7234856848458084,
      "grad_norm": 3.46875,
      "learning_rate": 4.21577866564899e-05,
      "loss": 0.8871,
      "step": 206430
    },
    {
      "epoch": 0.7235207323527041,
      "grad_norm": 2.875,
      "learning_rate": 4.21571376278262e-05,
      "loss": 0.865,
      "step": 206440
    },
    {
      "epoch": 0.7235557798595997,
      "grad_norm": 3.0,
      "learning_rate": 4.21564885991625e-05,
      "loss": 0.9184,
      "step": 206450
    },
    {
      "epoch": 0.7235908273664953,
      "grad_norm": 3.03125,
      "learning_rate": 4.2155839570498795e-05,
      "loss": 0.878,
      "step": 206460
    },
    {
      "epoch": 0.7236258748733909,
      "grad_norm": 3.390625,
      "learning_rate": 4.2155190541835096e-05,
      "loss": 0.9408,
      "step": 206470
    },
    {
      "epoch": 0.7236609223802865,
      "grad_norm": 3.046875,
      "learning_rate": 4.215454151317139e-05,
      "loss": 0.8713,
      "step": 206480
    },
    {
      "epoch": 0.7236959698871821,
      "grad_norm": 2.921875,
      "learning_rate": 4.215389248450769e-05,
      "loss": 0.9539,
      "step": 206490
    },
    {
      "epoch": 0.7237310173940776,
      "grad_norm": 3.15625,
      "learning_rate": 4.215324345584399e-05,
      "loss": 0.9031,
      "step": 206500
    },
    {
      "epoch": 0.7237660649009733,
      "grad_norm": 2.765625,
      "learning_rate": 4.215259442718029e-05,
      "loss": 0.9881,
      "step": 206510
    },
    {
      "epoch": 0.7238011124078688,
      "grad_norm": 2.734375,
      "learning_rate": 4.215194539851658e-05,
      "loss": 0.8459,
      "step": 206520
    },
    {
      "epoch": 0.7238361599147645,
      "grad_norm": 2.796875,
      "learning_rate": 4.2151296369852884e-05,
      "loss": 0.9649,
      "step": 206530
    },
    {
      "epoch": 0.7238712074216601,
      "grad_norm": 2.484375,
      "learning_rate": 4.215064734118918e-05,
      "loss": 0.8428,
      "step": 206540
    },
    {
      "epoch": 0.7239062549285556,
      "grad_norm": 2.828125,
      "learning_rate": 4.214999831252547e-05,
      "loss": 0.8763,
      "step": 206550
    },
    {
      "epoch": 0.7239413024354513,
      "grad_norm": 3.203125,
      "learning_rate": 4.2149349283861775e-05,
      "loss": 0.8973,
      "step": 206560
    },
    {
      "epoch": 0.7239763499423468,
      "grad_norm": 2.9375,
      "learning_rate": 4.214870025519807e-05,
      "loss": 0.8092,
      "step": 206570
    },
    {
      "epoch": 0.7240113974492425,
      "grad_norm": 2.8125,
      "learning_rate": 4.214805122653437e-05,
      "loss": 0.9174,
      "step": 206580
    },
    {
      "epoch": 0.724046444956138,
      "grad_norm": 2.890625,
      "learning_rate": 4.2147402197870665e-05,
      "loss": 0.8886,
      "step": 206590
    },
    {
      "epoch": 0.7240814924630337,
      "grad_norm": 2.953125,
      "learning_rate": 4.214675316920697e-05,
      "loss": 0.8535,
      "step": 206600
    },
    {
      "epoch": 0.7241165399699292,
      "grad_norm": 3.046875,
      "learning_rate": 4.214610414054326e-05,
      "loss": 0.9414,
      "step": 206610
    },
    {
      "epoch": 0.7241515874768248,
      "grad_norm": 2.921875,
      "learning_rate": 4.214545511187956e-05,
      "loss": 0.8831,
      "step": 206620
    },
    {
      "epoch": 0.7241866349837204,
      "grad_norm": 2.796875,
      "learning_rate": 4.214480608321586e-05,
      "loss": 0.9114,
      "step": 206630
    },
    {
      "epoch": 0.724221682490616,
      "grad_norm": 3.0,
      "learning_rate": 4.214415705455216e-05,
      "loss": 0.819,
      "step": 206640
    },
    {
      "epoch": 0.7242567299975117,
      "grad_norm": 3.265625,
      "learning_rate": 4.214350802588845e-05,
      "loss": 0.9184,
      "step": 206650
    },
    {
      "epoch": 0.7242917775044072,
      "grad_norm": 3.046875,
      "learning_rate": 4.2142858997224755e-05,
      "loss": 0.8724,
      "step": 206660
    },
    {
      "epoch": 0.7243268250113029,
      "grad_norm": 3.28125,
      "learning_rate": 4.2142209968561056e-05,
      "loss": 1.0134,
      "step": 206670
    },
    {
      "epoch": 0.7243618725181984,
      "grad_norm": 3.03125,
      "learning_rate": 4.214156093989735e-05,
      "loss": 0.9349,
      "step": 206680
    },
    {
      "epoch": 0.724396920025094,
      "grad_norm": 2.859375,
      "learning_rate": 4.214091191123365e-05,
      "loss": 0.8413,
      "step": 206690
    },
    {
      "epoch": 0.7244319675319896,
      "grad_norm": 2.828125,
      "learning_rate": 4.214026288256995e-05,
      "loss": 0.8798,
      "step": 206700
    },
    {
      "epoch": 0.7244670150388852,
      "grad_norm": 3.015625,
      "learning_rate": 4.213961385390625e-05,
      "loss": 0.9427,
      "step": 206710
    },
    {
      "epoch": 0.7245020625457808,
      "grad_norm": 3.25,
      "learning_rate": 4.213896482524254e-05,
      "loss": 0.9145,
      "step": 206720
    },
    {
      "epoch": 0.7245371100526764,
      "grad_norm": 2.6875,
      "learning_rate": 4.2138315796578844e-05,
      "loss": 0.8423,
      "step": 206730
    },
    {
      "epoch": 0.724572157559572,
      "grad_norm": 3.09375,
      "learning_rate": 4.213766676791514e-05,
      "loss": 0.8832,
      "step": 206740
    },
    {
      "epoch": 0.7246072050664676,
      "grad_norm": 3.234375,
      "learning_rate": 4.213701773925144e-05,
      "loss": 0.8849,
      "step": 206750
    },
    {
      "epoch": 0.7246422525733632,
      "grad_norm": 3.234375,
      "learning_rate": 4.2136368710587735e-05,
      "loss": 0.9083,
      "step": 206760
    },
    {
      "epoch": 0.7246773000802588,
      "grad_norm": 3.03125,
      "learning_rate": 4.2135719681924036e-05,
      "loss": 0.8856,
      "step": 206770
    },
    {
      "epoch": 0.7247123475871544,
      "grad_norm": 3.5625,
      "learning_rate": 4.213507065326033e-05,
      "loss": 0.865,
      "step": 206780
    },
    {
      "epoch": 0.72474739509405,
      "grad_norm": 2.8125,
      "learning_rate": 4.213442162459663e-05,
      "loss": 0.9808,
      "step": 206790
    },
    {
      "epoch": 0.7247824426009456,
      "grad_norm": 3.3125,
      "learning_rate": 4.213377259593293e-05,
      "loss": 0.9323,
      "step": 206800
    },
    {
      "epoch": 0.7248174901078411,
      "grad_norm": 2.984375,
      "learning_rate": 4.213312356726923e-05,
      "loss": 0.9803,
      "step": 206810
    },
    {
      "epoch": 0.7248525376147368,
      "grad_norm": 2.765625,
      "learning_rate": 4.213247453860553e-05,
      "loss": 0.9458,
      "step": 206820
    },
    {
      "epoch": 0.7248875851216324,
      "grad_norm": 3.046875,
      "learning_rate": 4.2131825509941824e-05,
      "loss": 0.9095,
      "step": 206830
    },
    {
      "epoch": 0.724922632628528,
      "grad_norm": 2.546875,
      "learning_rate": 4.2131176481278125e-05,
      "loss": 0.793,
      "step": 206840
    },
    {
      "epoch": 0.7249576801354236,
      "grad_norm": 2.546875,
      "learning_rate": 4.213052745261442e-05,
      "loss": 0.9245,
      "step": 206850
    },
    {
      "epoch": 0.7249927276423191,
      "grad_norm": 2.890625,
      "learning_rate": 4.212987842395072e-05,
      "loss": 0.9998,
      "step": 206860
    },
    {
      "epoch": 0.7250277751492148,
      "grad_norm": 2.890625,
      "learning_rate": 4.2129229395287016e-05,
      "loss": 0.9141,
      "step": 206870
    },
    {
      "epoch": 0.7250628226561103,
      "grad_norm": 2.859375,
      "learning_rate": 4.212858036662332e-05,
      "loss": 0.8872,
      "step": 206880
    },
    {
      "epoch": 0.725097870163006,
      "grad_norm": 2.609375,
      "learning_rate": 4.212793133795961e-05,
      "loss": 0.8795,
      "step": 206890
    },
    {
      "epoch": 0.7251329176699015,
      "grad_norm": 3.046875,
      "learning_rate": 4.2127282309295913e-05,
      "loss": 0.8108,
      "step": 206900
    },
    {
      "epoch": 0.7251679651767972,
      "grad_norm": 2.90625,
      "learning_rate": 4.212663328063221e-05,
      "loss": 0.9118,
      "step": 206910
    },
    {
      "epoch": 0.7252030126836927,
      "grad_norm": 2.828125,
      "learning_rate": 4.21259842519685e-05,
      "loss": 0.9062,
      "step": 206920
    },
    {
      "epoch": 0.7252380601905883,
      "grad_norm": 2.84375,
      "learning_rate": 4.2125335223304804e-05,
      "loss": 0.8783,
      "step": 206930
    },
    {
      "epoch": 0.725273107697484,
      "grad_norm": 2.921875,
      "learning_rate": 4.21246861946411e-05,
      "loss": 0.834,
      "step": 206940
    },
    {
      "epoch": 0.7253081552043795,
      "grad_norm": 3.21875,
      "learning_rate": 4.21240371659774e-05,
      "loss": 0.9455,
      "step": 206950
    },
    {
      "epoch": 0.7253432027112752,
      "grad_norm": 2.90625,
      "learning_rate": 4.2123388137313695e-05,
      "loss": 0.9361,
      "step": 206960
    },
    {
      "epoch": 0.7253782502181707,
      "grad_norm": 3.1875,
      "learning_rate": 4.2122739108649996e-05,
      "loss": 0.9145,
      "step": 206970
    },
    {
      "epoch": 0.7254132977250664,
      "grad_norm": 2.796875,
      "learning_rate": 4.212209007998629e-05,
      "loss": 0.8199,
      "step": 206980
    },
    {
      "epoch": 0.7254483452319619,
      "grad_norm": 2.875,
      "learning_rate": 4.212144105132259e-05,
      "loss": 0.8829,
      "step": 206990
    },
    {
      "epoch": 0.7254833927388575,
      "grad_norm": 3.3125,
      "learning_rate": 4.212079202265889e-05,
      "loss": 0.8922,
      "step": 207000
    },
    {
      "epoch": 0.7255184402457531,
      "grad_norm": 2.625,
      "learning_rate": 4.212014299399519e-05,
      "loss": 0.8833,
      "step": 207010
    },
    {
      "epoch": 0.7255534877526487,
      "grad_norm": 2.84375,
      "learning_rate": 4.211949396533148e-05,
      "loss": 0.8789,
      "step": 207020
    },
    {
      "epoch": 0.7255885352595444,
      "grad_norm": 2.78125,
      "learning_rate": 4.2118844936667784e-05,
      "loss": 0.9007,
      "step": 207030
    },
    {
      "epoch": 0.7256235827664399,
      "grad_norm": 3.1875,
      "learning_rate": 4.2118195908004085e-05,
      "loss": 0.8794,
      "step": 207040
    },
    {
      "epoch": 0.7256586302733355,
      "grad_norm": 2.71875,
      "learning_rate": 4.211754687934038e-05,
      "loss": 0.8669,
      "step": 207050
    },
    {
      "epoch": 0.7256936777802311,
      "grad_norm": 3.03125,
      "learning_rate": 4.211689785067668e-05,
      "loss": 0.9437,
      "step": 207060
    },
    {
      "epoch": 0.7257287252871267,
      "grad_norm": 3.3125,
      "learning_rate": 4.2116248822012976e-05,
      "loss": 0.9492,
      "step": 207070
    },
    {
      "epoch": 0.7257637727940223,
      "grad_norm": 3.0,
      "learning_rate": 4.211559979334928e-05,
      "loss": 0.9711,
      "step": 207080
    },
    {
      "epoch": 0.7257988203009179,
      "grad_norm": 3.203125,
      "learning_rate": 4.211495076468557e-05,
      "loss": 0.9455,
      "step": 207090
    },
    {
      "epoch": 0.7258338678078134,
      "grad_norm": 2.859375,
      "learning_rate": 4.2114301736021873e-05,
      "loss": 0.9777,
      "step": 207100
    },
    {
      "epoch": 0.7258689153147091,
      "grad_norm": 2.96875,
      "learning_rate": 4.211365270735817e-05,
      "loss": 0.9358,
      "step": 207110
    },
    {
      "epoch": 0.7259039628216046,
      "grad_norm": 2.421875,
      "learning_rate": 4.211300367869447e-05,
      "loss": 0.828,
      "step": 207120
    },
    {
      "epoch": 0.7259390103285003,
      "grad_norm": 3.0,
      "learning_rate": 4.2112354650030764e-05,
      "loss": 0.882,
      "step": 207130
    },
    {
      "epoch": 0.7259740578353959,
      "grad_norm": 2.953125,
      "learning_rate": 4.2111705621367065e-05,
      "loss": 0.916,
      "step": 207140
    },
    {
      "epoch": 0.7260091053422915,
      "grad_norm": 3.375,
      "learning_rate": 4.211105659270336e-05,
      "loss": 0.9206,
      "step": 207150
    },
    {
      "epoch": 0.7260441528491871,
      "grad_norm": 2.609375,
      "learning_rate": 4.211040756403966e-05,
      "loss": 0.9013,
      "step": 207160
    },
    {
      "epoch": 0.7260792003560826,
      "grad_norm": 3.0,
      "learning_rate": 4.2109758535375956e-05,
      "loss": 0.9723,
      "step": 207170
    },
    {
      "epoch": 0.7261142478629783,
      "grad_norm": 3.53125,
      "learning_rate": 4.210910950671226e-05,
      "loss": 0.8729,
      "step": 207180
    },
    {
      "epoch": 0.7261492953698738,
      "grad_norm": 3.265625,
      "learning_rate": 4.210846047804856e-05,
      "loss": 0.9134,
      "step": 207190
    },
    {
      "epoch": 0.7261843428767695,
      "grad_norm": 2.796875,
      "learning_rate": 4.2107811449384853e-05,
      "loss": 0.8638,
      "step": 207200
    },
    {
      "epoch": 0.726219390383665,
      "grad_norm": 2.9375,
      "learning_rate": 4.2107162420721155e-05,
      "loss": 0.8429,
      "step": 207210
    },
    {
      "epoch": 0.7262544378905607,
      "grad_norm": 2.703125,
      "learning_rate": 4.210651339205745e-05,
      "loss": 0.8666,
      "step": 207220
    },
    {
      "epoch": 0.7262894853974563,
      "grad_norm": 2.828125,
      "learning_rate": 4.210586436339375e-05,
      "loss": 0.8902,
      "step": 207230
    },
    {
      "epoch": 0.7263245329043518,
      "grad_norm": 3.1875,
      "learning_rate": 4.2105215334730045e-05,
      "loss": 0.8703,
      "step": 207240
    },
    {
      "epoch": 0.7263595804112475,
      "grad_norm": 3.34375,
      "learning_rate": 4.210456630606635e-05,
      "loss": 0.8396,
      "step": 207250
    },
    {
      "epoch": 0.726394627918143,
      "grad_norm": 3.75,
      "learning_rate": 4.210391727740264e-05,
      "loss": 0.8692,
      "step": 207260
    },
    {
      "epoch": 0.7264296754250387,
      "grad_norm": 2.765625,
      "learning_rate": 4.210326824873894e-05,
      "loss": 0.8598,
      "step": 207270
    },
    {
      "epoch": 0.7264647229319342,
      "grad_norm": 2.6875,
      "learning_rate": 4.210261922007524e-05,
      "loss": 0.8928,
      "step": 207280
    },
    {
      "epoch": 0.7264997704388299,
      "grad_norm": 3.015625,
      "learning_rate": 4.210197019141153e-05,
      "loss": 0.9093,
      "step": 207290
    },
    {
      "epoch": 0.7265348179457254,
      "grad_norm": 3.03125,
      "learning_rate": 4.2101321162747833e-05,
      "loss": 0.9379,
      "step": 207300
    },
    {
      "epoch": 0.726569865452621,
      "grad_norm": 3.59375,
      "learning_rate": 4.210067213408413e-05,
      "loss": 0.9381,
      "step": 207310
    },
    {
      "epoch": 0.7266049129595167,
      "grad_norm": 2.390625,
      "learning_rate": 4.210002310542043e-05,
      "loss": 0.9048,
      "step": 207320
    },
    {
      "epoch": 0.7266399604664122,
      "grad_norm": 3.171875,
      "learning_rate": 4.2099374076756724e-05,
      "loss": 0.9384,
      "step": 207330
    },
    {
      "epoch": 0.7266750079733079,
      "grad_norm": 2.734375,
      "learning_rate": 4.2098725048093025e-05,
      "loss": 0.9102,
      "step": 207340
    },
    {
      "epoch": 0.7267100554802034,
      "grad_norm": 2.859375,
      "learning_rate": 4.209807601942932e-05,
      "loss": 0.8708,
      "step": 207350
    },
    {
      "epoch": 0.726745102987099,
      "grad_norm": 3.296875,
      "learning_rate": 4.209742699076562e-05,
      "loss": 0.9329,
      "step": 207360
    },
    {
      "epoch": 0.7267801504939946,
      "grad_norm": 3.359375,
      "learning_rate": 4.2096777962101916e-05,
      "loss": 0.8947,
      "step": 207370
    },
    {
      "epoch": 0.7268151980008902,
      "grad_norm": 2.921875,
      "learning_rate": 4.209612893343822e-05,
      "loss": 0.9061,
      "step": 207380
    },
    {
      "epoch": 0.7268502455077858,
      "grad_norm": 2.375,
      "learning_rate": 4.209547990477451e-05,
      "loss": 0.87,
      "step": 207390
    },
    {
      "epoch": 0.7268852930146814,
      "grad_norm": 2.9375,
      "learning_rate": 4.2094830876110813e-05,
      "loss": 0.982,
      "step": 207400
    },
    {
      "epoch": 0.726920340521577,
      "grad_norm": 3.078125,
      "learning_rate": 4.2094181847447115e-05,
      "loss": 0.8162,
      "step": 207410
    },
    {
      "epoch": 0.7269553880284726,
      "grad_norm": 3.25,
      "learning_rate": 4.209353281878341e-05,
      "loss": 0.9817,
      "step": 207420
    },
    {
      "epoch": 0.7269904355353682,
      "grad_norm": 2.953125,
      "learning_rate": 4.209288379011971e-05,
      "loss": 0.9229,
      "step": 207430
    },
    {
      "epoch": 0.7270254830422638,
      "grad_norm": 3.0,
      "learning_rate": 4.2092234761456005e-05,
      "loss": 0.9117,
      "step": 207440
    },
    {
      "epoch": 0.7270605305491594,
      "grad_norm": 3.171875,
      "learning_rate": 4.209158573279231e-05,
      "loss": 0.9096,
      "step": 207450
    },
    {
      "epoch": 0.727095578056055,
      "grad_norm": 2.953125,
      "learning_rate": 4.20909367041286e-05,
      "loss": 0.9344,
      "step": 207460
    },
    {
      "epoch": 0.7271306255629506,
      "grad_norm": 3.125,
      "learning_rate": 4.20902876754649e-05,
      "loss": 0.842,
      "step": 207470
    },
    {
      "epoch": 0.7271656730698461,
      "grad_norm": 3.296875,
      "learning_rate": 4.20896386468012e-05,
      "loss": 0.9215,
      "step": 207480
    },
    {
      "epoch": 0.7272007205767418,
      "grad_norm": 2.921875,
      "learning_rate": 4.20889896181375e-05,
      "loss": 0.833,
      "step": 207490
    },
    {
      "epoch": 0.7272357680836373,
      "grad_norm": 2.84375,
      "learning_rate": 4.2088340589473793e-05,
      "loss": 0.8659,
      "step": 207500
    },
    {
      "epoch": 0.727270815590533,
      "grad_norm": 2.921875,
      "learning_rate": 4.2087691560810095e-05,
      "loss": 0.834,
      "step": 207510
    },
    {
      "epoch": 0.7273058630974286,
      "grad_norm": 3.46875,
      "learning_rate": 4.208704253214639e-05,
      "loss": 0.88,
      "step": 207520
    },
    {
      "epoch": 0.7273409106043242,
      "grad_norm": 3.03125,
      "learning_rate": 4.208639350348269e-05,
      "loss": 0.8806,
      "step": 207530
    },
    {
      "epoch": 0.7273759581112198,
      "grad_norm": 3.015625,
      "learning_rate": 4.208574447481899e-05,
      "loss": 0.8796,
      "step": 207540
    },
    {
      "epoch": 0.7274110056181153,
      "grad_norm": 2.96875,
      "learning_rate": 4.208509544615529e-05,
      "loss": 0.882,
      "step": 207550
    },
    {
      "epoch": 0.727446053125011,
      "grad_norm": 3.140625,
      "learning_rate": 4.208444641749159e-05,
      "loss": 0.8489,
      "step": 207560
    },
    {
      "epoch": 0.7274811006319065,
      "grad_norm": 3.328125,
      "learning_rate": 4.208379738882788e-05,
      "loss": 0.9313,
      "step": 207570
    },
    {
      "epoch": 0.7275161481388022,
      "grad_norm": 3.0,
      "learning_rate": 4.2083148360164184e-05,
      "loss": 0.8614,
      "step": 207580
    },
    {
      "epoch": 0.7275511956456977,
      "grad_norm": 2.96875,
      "learning_rate": 4.208249933150048e-05,
      "loss": 0.8841,
      "step": 207590
    },
    {
      "epoch": 0.7275862431525933,
      "grad_norm": 3.125,
      "learning_rate": 4.208185030283678e-05,
      "loss": 0.8551,
      "step": 207600
    },
    {
      "epoch": 0.7276212906594889,
      "grad_norm": 2.78125,
      "learning_rate": 4.2081201274173075e-05,
      "loss": 0.8766,
      "step": 207610
    },
    {
      "epoch": 0.7276563381663845,
      "grad_norm": 3.4375,
      "learning_rate": 4.2080552245509376e-05,
      "loss": 0.8246,
      "step": 207620
    },
    {
      "epoch": 0.7276913856732802,
      "grad_norm": 2.8125,
      "learning_rate": 4.207990321684567e-05,
      "loss": 0.9395,
      "step": 207630
    },
    {
      "epoch": 0.7277264331801757,
      "grad_norm": 3.015625,
      "learning_rate": 4.207925418818197e-05,
      "loss": 0.8452,
      "step": 207640
    },
    {
      "epoch": 0.7277614806870714,
      "grad_norm": 2.9375,
      "learning_rate": 4.207860515951827e-05,
      "loss": 0.9366,
      "step": 207650
    },
    {
      "epoch": 0.7277965281939669,
      "grad_norm": 4.46875,
      "learning_rate": 4.207795613085457e-05,
      "loss": 0.9516,
      "step": 207660
    },
    {
      "epoch": 0.7278315757008625,
      "grad_norm": 2.640625,
      "learning_rate": 4.207730710219086e-05,
      "loss": 0.8509,
      "step": 207670
    },
    {
      "epoch": 0.7278666232077581,
      "grad_norm": 3.0625,
      "learning_rate": 4.207665807352716e-05,
      "loss": 0.9579,
      "step": 207680
    },
    {
      "epoch": 0.7279016707146537,
      "grad_norm": 3.234375,
      "learning_rate": 4.207600904486346e-05,
      "loss": 0.8928,
      "step": 207690
    },
    {
      "epoch": 0.7279367182215493,
      "grad_norm": 2.859375,
      "learning_rate": 4.2075360016199753e-05,
      "loss": 0.8675,
      "step": 207700
    },
    {
      "epoch": 0.7279717657284449,
      "grad_norm": 3.46875,
      "learning_rate": 4.2074710987536055e-05,
      "loss": 0.8923,
      "step": 207710
    },
    {
      "epoch": 0.7280068132353406,
      "grad_norm": 3.265625,
      "learning_rate": 4.207406195887235e-05,
      "loss": 0.8768,
      "step": 207720
    },
    {
      "epoch": 0.7280418607422361,
      "grad_norm": 2.65625,
      "learning_rate": 4.207341293020865e-05,
      "loss": 0.9081,
      "step": 207730
    },
    {
      "epoch": 0.7280769082491317,
      "grad_norm": 2.90625,
      "learning_rate": 4.2072763901544945e-05,
      "loss": 0.7127,
      "step": 207740
    },
    {
      "epoch": 0.7281119557560273,
      "grad_norm": 2.921875,
      "learning_rate": 4.207211487288125e-05,
      "loss": 0.9282,
      "step": 207750
    },
    {
      "epoch": 0.7281470032629229,
      "grad_norm": 3.125,
      "learning_rate": 4.207146584421754e-05,
      "loss": 0.868,
      "step": 207760
    },
    {
      "epoch": 0.7281820507698185,
      "grad_norm": 3.078125,
      "learning_rate": 4.207081681555384e-05,
      "loss": 0.8075,
      "step": 207770
    },
    {
      "epoch": 0.7282170982767141,
      "grad_norm": 2.765625,
      "learning_rate": 4.2070167786890144e-05,
      "loss": 0.8891,
      "step": 207780
    },
    {
      "epoch": 0.7282521457836096,
      "grad_norm": 2.546875,
      "learning_rate": 4.206951875822644e-05,
      "loss": 0.8841,
      "step": 207790
    },
    {
      "epoch": 0.7282871932905053,
      "grad_norm": 3.25,
      "learning_rate": 4.206886972956274e-05,
      "loss": 0.915,
      "step": 207800
    },
    {
      "epoch": 0.7283222407974008,
      "grad_norm": 2.8125,
      "learning_rate": 4.2068220700899035e-05,
      "loss": 0.9152,
      "step": 207810
    },
    {
      "epoch": 0.7283572883042965,
      "grad_norm": 2.78125,
      "learning_rate": 4.2067571672235336e-05,
      "loss": 0.9388,
      "step": 207820
    },
    {
      "epoch": 0.7283923358111921,
      "grad_norm": 2.5625,
      "learning_rate": 4.206692264357163e-05,
      "loss": 0.8827,
      "step": 207830
    },
    {
      "epoch": 0.7284273833180877,
      "grad_norm": 3.34375,
      "learning_rate": 4.206627361490793e-05,
      "loss": 0.9447,
      "step": 207840
    },
    {
      "epoch": 0.7284624308249833,
      "grad_norm": 2.890625,
      "learning_rate": 4.206562458624423e-05,
      "loss": 0.8935,
      "step": 207850
    },
    {
      "epoch": 0.7284974783318788,
      "grad_norm": 2.734375,
      "learning_rate": 4.206497555758053e-05,
      "loss": 0.8682,
      "step": 207860
    },
    {
      "epoch": 0.7285325258387745,
      "grad_norm": 2.6875,
      "learning_rate": 4.206432652891682e-05,
      "loss": 0.9475,
      "step": 207870
    },
    {
      "epoch": 0.72856757334567,
      "grad_norm": 3.265625,
      "learning_rate": 4.2063677500253124e-05,
      "loss": 1.0201,
      "step": 207880
    },
    {
      "epoch": 0.7286026208525657,
      "grad_norm": 3.0,
      "learning_rate": 4.206302847158942e-05,
      "loss": 0.9095,
      "step": 207890
    },
    {
      "epoch": 0.7286376683594612,
      "grad_norm": 2.890625,
      "learning_rate": 4.206237944292572e-05,
      "loss": 0.8859,
      "step": 207900
    },
    {
      "epoch": 0.7286727158663568,
      "grad_norm": 2.953125,
      "learning_rate": 4.206173041426202e-05,
      "loss": 0.9401,
      "step": 207910
    },
    {
      "epoch": 0.7287077633732525,
      "grad_norm": 3.171875,
      "learning_rate": 4.2061081385598316e-05,
      "loss": 0.8686,
      "step": 207920
    },
    {
      "epoch": 0.728742810880148,
      "grad_norm": 2.671875,
      "learning_rate": 4.206043235693462e-05,
      "loss": 0.8762,
      "step": 207930
    },
    {
      "epoch": 0.7287778583870437,
      "grad_norm": 3.5,
      "learning_rate": 4.205978332827091e-05,
      "loss": 0.9312,
      "step": 207940
    },
    {
      "epoch": 0.7288129058939392,
      "grad_norm": 3.46875,
      "learning_rate": 4.2059134299607214e-05,
      "loss": 0.8515,
      "step": 207950
    },
    {
      "epoch": 0.7288479534008349,
      "grad_norm": 2.90625,
      "learning_rate": 4.205848527094351e-05,
      "loss": 0.9224,
      "step": 207960
    },
    {
      "epoch": 0.7288830009077304,
      "grad_norm": 2.640625,
      "learning_rate": 4.205783624227981e-05,
      "loss": 0.8757,
      "step": 207970
    },
    {
      "epoch": 0.728918048414626,
      "grad_norm": 2.828125,
      "learning_rate": 4.2057187213616104e-05,
      "loss": 0.9037,
      "step": 207980
    },
    {
      "epoch": 0.7289530959215216,
      "grad_norm": 2.96875,
      "learning_rate": 4.2056538184952406e-05,
      "loss": 0.9287,
      "step": 207990
    },
    {
      "epoch": 0.7289881434284172,
      "grad_norm": 2.875,
      "learning_rate": 4.20558891562887e-05,
      "loss": 0.8497,
      "step": 208000
    },
    {
      "epoch": 0.7290231909353129,
      "grad_norm": 3.09375,
      "learning_rate": 4.2055240127625e-05,
      "loss": 0.9665,
      "step": 208010
    },
    {
      "epoch": 0.7290582384422084,
      "grad_norm": 2.859375,
      "learning_rate": 4.2054591098961296e-05,
      "loss": 0.8924,
      "step": 208020
    },
    {
      "epoch": 0.729093285949104,
      "grad_norm": 2.90625,
      "learning_rate": 4.20539420702976e-05,
      "loss": 0.8769,
      "step": 208030
    },
    {
      "epoch": 0.7291283334559996,
      "grad_norm": 2.6875,
      "learning_rate": 4.205329304163389e-05,
      "loss": 0.8919,
      "step": 208040
    },
    {
      "epoch": 0.7291633809628952,
      "grad_norm": 3.015625,
      "learning_rate": 4.205264401297019e-05,
      "loss": 0.8778,
      "step": 208050
    },
    {
      "epoch": 0.7291984284697908,
      "grad_norm": 3.015625,
      "learning_rate": 4.205199498430649e-05,
      "loss": 0.9534,
      "step": 208060
    },
    {
      "epoch": 0.7292334759766864,
      "grad_norm": 2.875,
      "learning_rate": 4.205134595564278e-05,
      "loss": 0.8581,
      "step": 208070
    },
    {
      "epoch": 0.729268523483582,
      "grad_norm": 2.96875,
      "learning_rate": 4.2050696926979084e-05,
      "loss": 0.8447,
      "step": 208080
    },
    {
      "epoch": 0.7293035709904776,
      "grad_norm": 2.9375,
      "learning_rate": 4.205004789831538e-05,
      "loss": 0.9359,
      "step": 208090
    },
    {
      "epoch": 0.7293386184973731,
      "grad_norm": 3.046875,
      "learning_rate": 4.204939886965168e-05,
      "loss": 0.8215,
      "step": 208100
    },
    {
      "epoch": 0.7293736660042688,
      "grad_norm": 2.90625,
      "learning_rate": 4.2048749840987975e-05,
      "loss": 0.8974,
      "step": 208110
    },
    {
      "epoch": 0.7294087135111644,
      "grad_norm": 2.78125,
      "learning_rate": 4.2048100812324276e-05,
      "loss": 0.9454,
      "step": 208120
    },
    {
      "epoch": 0.72944376101806,
      "grad_norm": 3.515625,
      "learning_rate": 4.204745178366057e-05,
      "loss": 0.8419,
      "step": 208130
    },
    {
      "epoch": 0.7294788085249556,
      "grad_norm": 2.5,
      "learning_rate": 4.204680275499687e-05,
      "loss": 0.8549,
      "step": 208140
    },
    {
      "epoch": 0.7295138560318511,
      "grad_norm": 2.75,
      "learning_rate": 4.2046153726333174e-05,
      "loss": 0.9167,
      "step": 208150
    },
    {
      "epoch": 0.7295489035387468,
      "grad_norm": 2.828125,
      "learning_rate": 4.204550469766947e-05,
      "loss": 0.9024,
      "step": 208160
    },
    {
      "epoch": 0.7295839510456423,
      "grad_norm": 3.21875,
      "learning_rate": 4.204485566900577e-05,
      "loss": 0.9189,
      "step": 208170
    },
    {
      "epoch": 0.729618998552538,
      "grad_norm": 3.125,
      "learning_rate": 4.2044206640342064e-05,
      "loss": 0.9449,
      "step": 208180
    },
    {
      "epoch": 0.7296540460594335,
      "grad_norm": 3.3125,
      "learning_rate": 4.2043557611678366e-05,
      "loss": 0.8369,
      "step": 208190
    },
    {
      "epoch": 0.7296890935663292,
      "grad_norm": 2.890625,
      "learning_rate": 4.204290858301466e-05,
      "loss": 0.8428,
      "step": 208200
    },
    {
      "epoch": 0.7297241410732248,
      "grad_norm": 2.890625,
      "learning_rate": 4.204225955435096e-05,
      "loss": 0.8681,
      "step": 208210
    },
    {
      "epoch": 0.7297591885801203,
      "grad_norm": 3.140625,
      "learning_rate": 4.2041610525687256e-05,
      "loss": 0.9133,
      "step": 208220
    },
    {
      "epoch": 0.729794236087016,
      "grad_norm": 3.0,
      "learning_rate": 4.204096149702356e-05,
      "loss": 0.905,
      "step": 208230
    },
    {
      "epoch": 0.7298292835939115,
      "grad_norm": 3.203125,
      "learning_rate": 4.204031246835985e-05,
      "loss": 0.9263,
      "step": 208240
    },
    {
      "epoch": 0.7298643311008072,
      "grad_norm": 3.390625,
      "learning_rate": 4.2039663439696154e-05,
      "loss": 0.8711,
      "step": 208250
    },
    {
      "epoch": 0.7298993786077027,
      "grad_norm": 2.8125,
      "learning_rate": 4.203901441103245e-05,
      "loss": 0.9513,
      "step": 208260
    },
    {
      "epoch": 0.7299344261145984,
      "grad_norm": 3.0625,
      "learning_rate": 4.203836538236875e-05,
      "loss": 0.9316,
      "step": 208270
    },
    {
      "epoch": 0.7299694736214939,
      "grad_norm": 2.859375,
      "learning_rate": 4.203771635370505e-05,
      "loss": 0.9176,
      "step": 208280
    },
    {
      "epoch": 0.7300045211283895,
      "grad_norm": 3.15625,
      "learning_rate": 4.2037067325041346e-05,
      "loss": 0.9038,
      "step": 208290
    },
    {
      "epoch": 0.7300395686352851,
      "grad_norm": 3.09375,
      "learning_rate": 4.203641829637765e-05,
      "loss": 0.818,
      "step": 208300
    },
    {
      "epoch": 0.7300746161421807,
      "grad_norm": 2.703125,
      "learning_rate": 4.203576926771394e-05,
      "loss": 0.9067,
      "step": 208310
    },
    {
      "epoch": 0.7301096636490764,
      "grad_norm": 2.84375,
      "learning_rate": 4.203512023905024e-05,
      "loss": 0.9154,
      "step": 208320
    },
    {
      "epoch": 0.7301447111559719,
      "grad_norm": 2.8125,
      "learning_rate": 4.203447121038654e-05,
      "loss": 0.9137,
      "step": 208330
    },
    {
      "epoch": 0.7301797586628676,
      "grad_norm": 2.828125,
      "learning_rate": 4.203382218172284e-05,
      "loss": 0.9246,
      "step": 208340
    },
    {
      "epoch": 0.7302148061697631,
      "grad_norm": 2.734375,
      "learning_rate": 4.2033173153059134e-05,
      "loss": 0.8315,
      "step": 208350
    },
    {
      "epoch": 0.7302498536766587,
      "grad_norm": 3.015625,
      "learning_rate": 4.2032524124395435e-05,
      "loss": 0.8767,
      "step": 208360
    },
    {
      "epoch": 0.7302849011835543,
      "grad_norm": 2.90625,
      "learning_rate": 4.203187509573173e-05,
      "loss": 0.8904,
      "step": 208370
    },
    {
      "epoch": 0.7303199486904499,
      "grad_norm": 2.734375,
      "learning_rate": 4.203122606706803e-05,
      "loss": 0.8862,
      "step": 208380
    },
    {
      "epoch": 0.7303549961973455,
      "grad_norm": 2.5625,
      "learning_rate": 4.2030577038404326e-05,
      "loss": 0.9298,
      "step": 208390
    },
    {
      "epoch": 0.7303900437042411,
      "grad_norm": 2.859375,
      "learning_rate": 4.202992800974063e-05,
      "loss": 0.8151,
      "step": 208400
    },
    {
      "epoch": 0.7304250912111367,
      "grad_norm": 2.78125,
      "learning_rate": 4.202927898107693e-05,
      "loss": 0.8585,
      "step": 208410
    },
    {
      "epoch": 0.7304601387180323,
      "grad_norm": 3.34375,
      "learning_rate": 4.2028629952413216e-05,
      "loss": 0.913,
      "step": 208420
    },
    {
      "epoch": 0.7304951862249279,
      "grad_norm": 3.21875,
      "learning_rate": 4.202798092374952e-05,
      "loss": 0.9802,
      "step": 208430
    },
    {
      "epoch": 0.7305302337318235,
      "grad_norm": 3.359375,
      "learning_rate": 4.202733189508581e-05,
      "loss": 0.92,
      "step": 208440
    },
    {
      "epoch": 0.7305652812387191,
      "grad_norm": 3.125,
      "learning_rate": 4.2026682866422114e-05,
      "loss": 0.9471,
      "step": 208450
    },
    {
      "epoch": 0.7306003287456146,
      "grad_norm": 3.1875,
      "learning_rate": 4.202603383775841e-05,
      "loss": 0.8259,
      "step": 208460
    },
    {
      "epoch": 0.7306353762525103,
      "grad_norm": 3.171875,
      "learning_rate": 4.202538480909471e-05,
      "loss": 0.9915,
      "step": 208470
    },
    {
      "epoch": 0.7306704237594058,
      "grad_norm": 2.609375,
      "learning_rate": 4.2024735780431004e-05,
      "loss": 0.9167,
      "step": 208480
    },
    {
      "epoch": 0.7307054712663015,
      "grad_norm": 2.828125,
      "learning_rate": 4.2024086751767306e-05,
      "loss": 0.9048,
      "step": 208490
    },
    {
      "epoch": 0.7307405187731971,
      "grad_norm": 2.890625,
      "learning_rate": 4.202343772310361e-05,
      "loss": 0.8542,
      "step": 208500
    },
    {
      "epoch": 0.7307755662800927,
      "grad_norm": 3.046875,
      "learning_rate": 4.20227886944399e-05,
      "loss": 0.8922,
      "step": 208510
    },
    {
      "epoch": 0.7308106137869883,
      "grad_norm": 3.140625,
      "learning_rate": 4.20221396657762e-05,
      "loss": 0.8666,
      "step": 208520
    },
    {
      "epoch": 0.7308456612938838,
      "grad_norm": 3.140625,
      "learning_rate": 4.20214906371125e-05,
      "loss": 0.8873,
      "step": 208530
    },
    {
      "epoch": 0.7308807088007795,
      "grad_norm": 3.15625,
      "learning_rate": 4.20208416084488e-05,
      "loss": 0.947,
      "step": 208540
    },
    {
      "epoch": 0.730915756307675,
      "grad_norm": 2.96875,
      "learning_rate": 4.2020192579785094e-05,
      "loss": 0.9364,
      "step": 208550
    },
    {
      "epoch": 0.7309508038145707,
      "grad_norm": 2.515625,
      "learning_rate": 4.2019543551121395e-05,
      "loss": 0.9075,
      "step": 208560
    },
    {
      "epoch": 0.7309858513214662,
      "grad_norm": 2.453125,
      "learning_rate": 4.201889452245769e-05,
      "loss": 0.9017,
      "step": 208570
    },
    {
      "epoch": 0.7310208988283619,
      "grad_norm": 3.515625,
      "learning_rate": 4.201824549379399e-05,
      "loss": 0.9697,
      "step": 208580
    },
    {
      "epoch": 0.7310559463352574,
      "grad_norm": 3.03125,
      "learning_rate": 4.2017596465130286e-05,
      "loss": 0.8893,
      "step": 208590
    },
    {
      "epoch": 0.731090993842153,
      "grad_norm": 2.578125,
      "learning_rate": 4.201694743646659e-05,
      "loss": 0.9093,
      "step": 208600
    },
    {
      "epoch": 0.7311260413490487,
      "grad_norm": 2.734375,
      "learning_rate": 4.201629840780288e-05,
      "loss": 0.8527,
      "step": 208610
    },
    {
      "epoch": 0.7311610888559442,
      "grad_norm": 3.140625,
      "learning_rate": 4.201564937913918e-05,
      "loss": 0.9444,
      "step": 208620
    },
    {
      "epoch": 0.7311961363628399,
      "grad_norm": 2.640625,
      "learning_rate": 4.201500035047548e-05,
      "loss": 0.845,
      "step": 208630
    },
    {
      "epoch": 0.7312311838697354,
      "grad_norm": 2.890625,
      "learning_rate": 4.201435132181178e-05,
      "loss": 0.8375,
      "step": 208640
    },
    {
      "epoch": 0.731266231376631,
      "grad_norm": 2.9375,
      "learning_rate": 4.201370229314808e-05,
      "loss": 0.8767,
      "step": 208650
    },
    {
      "epoch": 0.7313012788835266,
      "grad_norm": 2.953125,
      "learning_rate": 4.2013053264484375e-05,
      "loss": 0.9296,
      "step": 208660
    },
    {
      "epoch": 0.7313363263904222,
      "grad_norm": 2.765625,
      "learning_rate": 4.2012404235820676e-05,
      "loss": 0.8811,
      "step": 208670
    },
    {
      "epoch": 0.7313713738973178,
      "grad_norm": 2.65625,
      "learning_rate": 4.201175520715697e-05,
      "loss": 0.8678,
      "step": 208680
    },
    {
      "epoch": 0.7314064214042134,
      "grad_norm": 2.640625,
      "learning_rate": 4.201110617849327e-05,
      "loss": 0.8158,
      "step": 208690
    },
    {
      "epoch": 0.7314414689111091,
      "grad_norm": 3.15625,
      "learning_rate": 4.201045714982957e-05,
      "loss": 0.9262,
      "step": 208700
    },
    {
      "epoch": 0.7314765164180046,
      "grad_norm": 2.453125,
      "learning_rate": 4.200980812116587e-05,
      "loss": 0.9084,
      "step": 208710
    },
    {
      "epoch": 0.7315115639249002,
      "grad_norm": 3.375,
      "learning_rate": 4.200915909250216e-05,
      "loss": 0.9048,
      "step": 208720
    },
    {
      "epoch": 0.7315466114317958,
      "grad_norm": 3.03125,
      "learning_rate": 4.2008510063838464e-05,
      "loss": 0.9036,
      "step": 208730
    },
    {
      "epoch": 0.7315816589386914,
      "grad_norm": 3.609375,
      "learning_rate": 4.200786103517476e-05,
      "loss": 0.9284,
      "step": 208740
    },
    {
      "epoch": 0.731616706445587,
      "grad_norm": 3.4375,
      "learning_rate": 4.200721200651106e-05,
      "loss": 0.9328,
      "step": 208750
    },
    {
      "epoch": 0.7316517539524826,
      "grad_norm": 2.859375,
      "learning_rate": 4.2006562977847355e-05,
      "loss": 0.8703,
      "step": 208760
    },
    {
      "epoch": 0.7316868014593781,
      "grad_norm": 2.734375,
      "learning_rate": 4.2005913949183656e-05,
      "loss": 0.8667,
      "step": 208770
    },
    {
      "epoch": 0.7317218489662738,
      "grad_norm": 2.71875,
      "learning_rate": 4.200526492051996e-05,
      "loss": 0.8847,
      "step": 208780
    },
    {
      "epoch": 0.7317568964731693,
      "grad_norm": 2.984375,
      "learning_rate": 4.200461589185625e-05,
      "loss": 0.9806,
      "step": 208790
    },
    {
      "epoch": 0.731791943980065,
      "grad_norm": 3.265625,
      "learning_rate": 4.200396686319255e-05,
      "loss": 0.917,
      "step": 208800
    },
    {
      "epoch": 0.7318269914869606,
      "grad_norm": 2.609375,
      "learning_rate": 4.200331783452884e-05,
      "loss": 0.8863,
      "step": 208810
    },
    {
      "epoch": 0.7318620389938562,
      "grad_norm": 2.34375,
      "learning_rate": 4.200266880586514e-05,
      "loss": 0.9239,
      "step": 208820
    },
    {
      "epoch": 0.7318970865007518,
      "grad_norm": 3.171875,
      "learning_rate": 4.200201977720144e-05,
      "loss": 0.8741,
      "step": 208830
    },
    {
      "epoch": 0.7319321340076473,
      "grad_norm": 3.125,
      "learning_rate": 4.200137074853774e-05,
      "loss": 0.9206,
      "step": 208840
    },
    {
      "epoch": 0.731967181514543,
      "grad_norm": 3.34375,
      "learning_rate": 4.2000721719874034e-05,
      "loss": 0.9553,
      "step": 208850
    },
    {
      "epoch": 0.7320022290214385,
      "grad_norm": 3.390625,
      "learning_rate": 4.2000072691210335e-05,
      "loss": 0.9406,
      "step": 208860
    },
    {
      "epoch": 0.7320372765283342,
      "grad_norm": 2.828125,
      "learning_rate": 4.1999423662546636e-05,
      "loss": 0.8905,
      "step": 208870
    },
    {
      "epoch": 0.7320723240352297,
      "grad_norm": 3.203125,
      "learning_rate": 4.199877463388293e-05,
      "loss": 0.9136,
      "step": 208880
    },
    {
      "epoch": 0.7321073715421254,
      "grad_norm": 3.03125,
      "learning_rate": 4.199812560521923e-05,
      "loss": 0.9003,
      "step": 208890
    },
    {
      "epoch": 0.732142419049021,
      "grad_norm": 2.984375,
      "learning_rate": 4.199747657655553e-05,
      "loss": 0.8685,
      "step": 208900
    },
    {
      "epoch": 0.7321774665559165,
      "grad_norm": 3.1875,
      "learning_rate": 4.199682754789183e-05,
      "loss": 0.9024,
      "step": 208910
    },
    {
      "epoch": 0.7322125140628122,
      "grad_norm": 2.53125,
      "learning_rate": 4.199617851922812e-05,
      "loss": 0.9203,
      "step": 208920
    },
    {
      "epoch": 0.7322475615697077,
      "grad_norm": 2.5625,
      "learning_rate": 4.1995529490564424e-05,
      "loss": 0.8268,
      "step": 208930
    },
    {
      "epoch": 0.7322826090766034,
      "grad_norm": 2.625,
      "learning_rate": 4.199488046190072e-05,
      "loss": 0.8965,
      "step": 208940
    },
    {
      "epoch": 0.7323176565834989,
      "grad_norm": 2.640625,
      "learning_rate": 4.199423143323702e-05,
      "loss": 0.8913,
      "step": 208950
    },
    {
      "epoch": 0.7323527040903945,
      "grad_norm": 2.796875,
      "learning_rate": 4.1993582404573315e-05,
      "loss": 0.8866,
      "step": 208960
    },
    {
      "epoch": 0.7323877515972901,
      "grad_norm": 3.015625,
      "learning_rate": 4.1992933375909616e-05,
      "loss": 0.9744,
      "step": 208970
    },
    {
      "epoch": 0.7324227991041857,
      "grad_norm": 3.078125,
      "learning_rate": 4.199228434724591e-05,
      "loss": 0.897,
      "step": 208980
    },
    {
      "epoch": 0.7324578466110814,
      "grad_norm": 2.828125,
      "learning_rate": 4.199163531858221e-05,
      "loss": 0.8389,
      "step": 208990
    },
    {
      "epoch": 0.7324928941179769,
      "grad_norm": 2.90625,
      "learning_rate": 4.199098628991851e-05,
      "loss": 0.9175,
      "step": 209000
    },
    {
      "epoch": 0.7325279416248726,
      "grad_norm": 2.734375,
      "learning_rate": 4.199033726125481e-05,
      "loss": 0.8491,
      "step": 209010
    },
    {
      "epoch": 0.7325629891317681,
      "grad_norm": 2.953125,
      "learning_rate": 4.198968823259111e-05,
      "loss": 0.867,
      "step": 209020
    },
    {
      "epoch": 0.7325980366386637,
      "grad_norm": 2.828125,
      "learning_rate": 4.1989039203927404e-05,
      "loss": 0.9027,
      "step": 209030
    },
    {
      "epoch": 0.7326330841455593,
      "grad_norm": 3.0625,
      "learning_rate": 4.1988390175263706e-05,
      "loss": 0.8501,
      "step": 209040
    },
    {
      "epoch": 0.7326681316524549,
      "grad_norm": 3.015625,
      "learning_rate": 4.19877411466e-05,
      "loss": 0.8625,
      "step": 209050
    },
    {
      "epoch": 0.7327031791593505,
      "grad_norm": 3.109375,
      "learning_rate": 4.19870921179363e-05,
      "loss": 0.9003,
      "step": 209060
    },
    {
      "epoch": 0.7327382266662461,
      "grad_norm": 3.484375,
      "learning_rate": 4.1986443089272596e-05,
      "loss": 0.8446,
      "step": 209070
    },
    {
      "epoch": 0.7327732741731416,
      "grad_norm": 2.53125,
      "learning_rate": 4.19857940606089e-05,
      "loss": 0.9188,
      "step": 209080
    },
    {
      "epoch": 0.7328083216800373,
      "grad_norm": 2.875,
      "learning_rate": 4.198514503194519e-05,
      "loss": 0.9443,
      "step": 209090
    },
    {
      "epoch": 0.7328433691869329,
      "grad_norm": 3.203125,
      "learning_rate": 4.1984496003281494e-05,
      "loss": 0.9295,
      "step": 209100
    },
    {
      "epoch": 0.7328784166938285,
      "grad_norm": 3.015625,
      "learning_rate": 4.198384697461779e-05,
      "loss": 0.8138,
      "step": 209110
    },
    {
      "epoch": 0.7329134642007241,
      "grad_norm": 2.578125,
      "learning_rate": 4.198319794595409e-05,
      "loss": 0.9287,
      "step": 209120
    },
    {
      "epoch": 0.7329485117076197,
      "grad_norm": 2.875,
      "learning_rate": 4.1982548917290384e-05,
      "loss": 0.8271,
      "step": 209130
    },
    {
      "epoch": 0.7329835592145153,
      "grad_norm": 2.921875,
      "learning_rate": 4.1981899888626686e-05,
      "loss": 0.8595,
      "step": 209140
    },
    {
      "epoch": 0.7330186067214108,
      "grad_norm": 2.609375,
      "learning_rate": 4.198125085996299e-05,
      "loss": 0.906,
      "step": 209150
    },
    {
      "epoch": 0.7330536542283065,
      "grad_norm": 2.765625,
      "learning_rate": 4.198060183129928e-05,
      "loss": 0.8891,
      "step": 209160
    },
    {
      "epoch": 0.733088701735202,
      "grad_norm": 2.90625,
      "learning_rate": 4.1979952802635576e-05,
      "loss": 0.8677,
      "step": 209170
    },
    {
      "epoch": 0.7331237492420977,
      "grad_norm": 3.15625,
      "learning_rate": 4.197930377397187e-05,
      "loss": 0.9177,
      "step": 209180
    },
    {
      "epoch": 0.7331587967489933,
      "grad_norm": 3.234375,
      "learning_rate": 4.197865474530817e-05,
      "loss": 0.941,
      "step": 209190
    },
    {
      "epoch": 0.7331938442558888,
      "grad_norm": 3.296875,
      "learning_rate": 4.197800571664447e-05,
      "loss": 0.925,
      "step": 209200
    },
    {
      "epoch": 0.7332288917627845,
      "grad_norm": 2.96875,
      "learning_rate": 4.197735668798077e-05,
      "loss": 0.8959,
      "step": 209210
    },
    {
      "epoch": 0.73326393926968,
      "grad_norm": 2.984375,
      "learning_rate": 4.197670765931706e-05,
      "loss": 0.9519,
      "step": 209220
    },
    {
      "epoch": 0.7332989867765757,
      "grad_norm": 2.796875,
      "learning_rate": 4.1976058630653364e-05,
      "loss": 0.9253,
      "step": 209230
    },
    {
      "epoch": 0.7333340342834712,
      "grad_norm": 2.71875,
      "learning_rate": 4.1975409601989666e-05,
      "loss": 0.9401,
      "step": 209240
    },
    {
      "epoch": 0.7333690817903669,
      "grad_norm": 2.734375,
      "learning_rate": 4.197476057332596e-05,
      "loss": 0.9535,
      "step": 209250
    },
    {
      "epoch": 0.7334041292972624,
      "grad_norm": 2.546875,
      "learning_rate": 4.197411154466226e-05,
      "loss": 0.8773,
      "step": 209260
    },
    {
      "epoch": 0.733439176804158,
      "grad_norm": 3.296875,
      "learning_rate": 4.1973462515998556e-05,
      "loss": 0.878,
      "step": 209270
    },
    {
      "epoch": 0.7334742243110536,
      "grad_norm": 2.828125,
      "learning_rate": 4.197281348733486e-05,
      "loss": 0.963,
      "step": 209280
    },
    {
      "epoch": 0.7335092718179492,
      "grad_norm": 2.75,
      "learning_rate": 4.197216445867115e-05,
      "loss": 0.9216,
      "step": 209290
    },
    {
      "epoch": 0.7335443193248449,
      "grad_norm": 3.03125,
      "learning_rate": 4.1971515430007454e-05,
      "loss": 0.8161,
      "step": 209300
    },
    {
      "epoch": 0.7335793668317404,
      "grad_norm": 3.0625,
      "learning_rate": 4.197086640134375e-05,
      "loss": 0.955,
      "step": 209310
    },
    {
      "epoch": 0.7336144143386361,
      "grad_norm": 3.21875,
      "learning_rate": 4.197021737268005e-05,
      "loss": 0.8482,
      "step": 209320
    },
    {
      "epoch": 0.7336494618455316,
      "grad_norm": 2.796875,
      "learning_rate": 4.1969568344016344e-05,
      "loss": 0.8383,
      "step": 209330
    },
    {
      "epoch": 0.7336845093524272,
      "grad_norm": 2.609375,
      "learning_rate": 4.1968919315352646e-05,
      "loss": 0.9291,
      "step": 209340
    },
    {
      "epoch": 0.7337195568593228,
      "grad_norm": 3.0,
      "learning_rate": 4.196827028668894e-05,
      "loss": 0.9785,
      "step": 209350
    },
    {
      "epoch": 0.7337546043662184,
      "grad_norm": 3.046875,
      "learning_rate": 4.196762125802524e-05,
      "loss": 0.8726,
      "step": 209360
    },
    {
      "epoch": 0.733789651873114,
      "grad_norm": 2.875,
      "learning_rate": 4.1966972229361536e-05,
      "loss": 0.9306,
      "step": 209370
    },
    {
      "epoch": 0.7338246993800096,
      "grad_norm": 2.6875,
      "learning_rate": 4.196632320069784e-05,
      "loss": 0.8033,
      "step": 209380
    },
    {
      "epoch": 0.7338597468869053,
      "grad_norm": 3.09375,
      "learning_rate": 4.196567417203414e-05,
      "loss": 0.8963,
      "step": 209390
    },
    {
      "epoch": 0.7338947943938008,
      "grad_norm": 2.921875,
      "learning_rate": 4.1965025143370434e-05,
      "loss": 0.8969,
      "step": 209400
    },
    {
      "epoch": 0.7339298419006964,
      "grad_norm": 3.171875,
      "learning_rate": 4.1964376114706735e-05,
      "loss": 0.858,
      "step": 209410
    },
    {
      "epoch": 0.733964889407592,
      "grad_norm": 3.046875,
      "learning_rate": 4.196372708604303e-05,
      "loss": 0.9074,
      "step": 209420
    },
    {
      "epoch": 0.7339999369144876,
      "grad_norm": 2.6875,
      "learning_rate": 4.196307805737933e-05,
      "loss": 0.9342,
      "step": 209430
    },
    {
      "epoch": 0.7340349844213832,
      "grad_norm": 2.75,
      "learning_rate": 4.1962429028715626e-05,
      "loss": 0.8956,
      "step": 209440
    },
    {
      "epoch": 0.7340700319282788,
      "grad_norm": 2.71875,
      "learning_rate": 4.196178000005193e-05,
      "loss": 0.8479,
      "step": 209450
    },
    {
      "epoch": 0.7341050794351743,
      "grad_norm": 2.96875,
      "learning_rate": 4.196113097138822e-05,
      "loss": 0.8429,
      "step": 209460
    },
    {
      "epoch": 0.73414012694207,
      "grad_norm": 2.953125,
      "learning_rate": 4.196048194272452e-05,
      "loss": 0.8663,
      "step": 209470
    },
    {
      "epoch": 0.7341751744489655,
      "grad_norm": 2.5625,
      "learning_rate": 4.195983291406082e-05,
      "loss": 0.8386,
      "step": 209480
    },
    {
      "epoch": 0.7342102219558612,
      "grad_norm": 2.859375,
      "learning_rate": 4.195918388539712e-05,
      "loss": 0.9564,
      "step": 209490
    },
    {
      "epoch": 0.7342452694627568,
      "grad_norm": 2.75,
      "learning_rate": 4.1958534856733414e-05,
      "loss": 0.8672,
      "step": 209500
    },
    {
      "epoch": 0.7342803169696523,
      "grad_norm": 2.5,
      "learning_rate": 4.1957885828069715e-05,
      "loss": 0.8442,
      "step": 209510
    },
    {
      "epoch": 0.734315364476548,
      "grad_norm": 2.875,
      "learning_rate": 4.1957236799406017e-05,
      "loss": 0.9789,
      "step": 209520
    },
    {
      "epoch": 0.7343504119834435,
      "grad_norm": 2.5625,
      "learning_rate": 4.195658777074231e-05,
      "loss": 0.7866,
      "step": 209530
    },
    {
      "epoch": 0.7343854594903392,
      "grad_norm": 3.125,
      "learning_rate": 4.195593874207861e-05,
      "loss": 0.8182,
      "step": 209540
    },
    {
      "epoch": 0.7344205069972347,
      "grad_norm": 2.59375,
      "learning_rate": 4.19552897134149e-05,
      "loss": 0.8166,
      "step": 209550
    },
    {
      "epoch": 0.7344555545041304,
      "grad_norm": 3.25,
      "learning_rate": 4.19546406847512e-05,
      "loss": 0.9056,
      "step": 209560
    },
    {
      "epoch": 0.7344906020110259,
      "grad_norm": 2.921875,
      "learning_rate": 4.1953991656087496e-05,
      "loss": 0.9433,
      "step": 209570
    },
    {
      "epoch": 0.7345256495179215,
      "grad_norm": 3.03125,
      "learning_rate": 4.19533426274238e-05,
      "loss": 0.8253,
      "step": 209580
    },
    {
      "epoch": 0.7345606970248172,
      "grad_norm": 2.71875,
      "learning_rate": 4.195269359876009e-05,
      "loss": 0.8685,
      "step": 209590
    },
    {
      "epoch": 0.7345957445317127,
      "grad_norm": 3.28125,
      "learning_rate": 4.1952044570096394e-05,
      "loss": 0.9444,
      "step": 209600
    },
    {
      "epoch": 0.7346307920386084,
      "grad_norm": 3.171875,
      "learning_rate": 4.1951395541432695e-05,
      "loss": 0.855,
      "step": 209610
    },
    {
      "epoch": 0.7346658395455039,
      "grad_norm": 3.0,
      "learning_rate": 4.195074651276899e-05,
      "loss": 0.898,
      "step": 209620
    },
    {
      "epoch": 0.7347008870523996,
      "grad_norm": 2.921875,
      "learning_rate": 4.195009748410529e-05,
      "loss": 0.8217,
      "step": 209630
    },
    {
      "epoch": 0.7347359345592951,
      "grad_norm": 3.484375,
      "learning_rate": 4.1949448455441586e-05,
      "loss": 0.8796,
      "step": 209640
    },
    {
      "epoch": 0.7347709820661907,
      "grad_norm": 2.6875,
      "learning_rate": 4.194879942677789e-05,
      "loss": 0.9058,
      "step": 209650
    },
    {
      "epoch": 0.7348060295730863,
      "grad_norm": 3.09375,
      "learning_rate": 4.194815039811418e-05,
      "loss": 0.863,
      "step": 209660
    },
    {
      "epoch": 0.7348410770799819,
      "grad_norm": 2.6875,
      "learning_rate": 4.194750136945048e-05,
      "loss": 0.8787,
      "step": 209670
    },
    {
      "epoch": 0.7348761245868776,
      "grad_norm": 3.09375,
      "learning_rate": 4.194685234078678e-05,
      "loss": 0.9199,
      "step": 209680
    },
    {
      "epoch": 0.7349111720937731,
      "grad_norm": 3.328125,
      "learning_rate": 4.194620331212308e-05,
      "loss": 0.9221,
      "step": 209690
    },
    {
      "epoch": 0.7349462196006687,
      "grad_norm": 3.71875,
      "learning_rate": 4.1945554283459374e-05,
      "loss": 0.9632,
      "step": 209700
    },
    {
      "epoch": 0.7349812671075643,
      "grad_norm": 2.84375,
      "learning_rate": 4.1944905254795675e-05,
      "loss": 0.8575,
      "step": 209710
    },
    {
      "epoch": 0.7350163146144599,
      "grad_norm": 3.0,
      "learning_rate": 4.194425622613197e-05,
      "loss": 0.8871,
      "step": 209720
    },
    {
      "epoch": 0.7350513621213555,
      "grad_norm": 2.59375,
      "learning_rate": 4.194360719746827e-05,
      "loss": 0.9659,
      "step": 209730
    },
    {
      "epoch": 0.7350864096282511,
      "grad_norm": 3.015625,
      "learning_rate": 4.194295816880457e-05,
      "loss": 0.8601,
      "step": 209740
    },
    {
      "epoch": 0.7351214571351466,
      "grad_norm": 3.078125,
      "learning_rate": 4.194230914014087e-05,
      "loss": 1.0097,
      "step": 209750
    },
    {
      "epoch": 0.7351565046420423,
      "grad_norm": 2.765625,
      "learning_rate": 4.194166011147717e-05,
      "loss": 0.9847,
      "step": 209760
    },
    {
      "epoch": 0.7351915521489378,
      "grad_norm": 2.84375,
      "learning_rate": 4.194101108281346e-05,
      "loss": 0.8697,
      "step": 209770
    },
    {
      "epoch": 0.7352265996558335,
      "grad_norm": 2.71875,
      "learning_rate": 4.1940362054149764e-05,
      "loss": 0.8959,
      "step": 209780
    },
    {
      "epoch": 0.7352616471627291,
      "grad_norm": 2.90625,
      "learning_rate": 4.193971302548606e-05,
      "loss": 0.9281,
      "step": 209790
    },
    {
      "epoch": 0.7352966946696247,
      "grad_norm": 3.015625,
      "learning_rate": 4.193906399682236e-05,
      "loss": 0.8249,
      "step": 209800
    },
    {
      "epoch": 0.7353317421765203,
      "grad_norm": 2.765625,
      "learning_rate": 4.1938414968158655e-05,
      "loss": 0.8177,
      "step": 209810
    },
    {
      "epoch": 0.7353667896834158,
      "grad_norm": 2.796875,
      "learning_rate": 4.1937765939494956e-05,
      "loss": 0.8491,
      "step": 209820
    },
    {
      "epoch": 0.7354018371903115,
      "grad_norm": 2.9375,
      "learning_rate": 4.193711691083125e-05,
      "loss": 0.997,
      "step": 209830
    },
    {
      "epoch": 0.735436884697207,
      "grad_norm": 2.96875,
      "learning_rate": 4.193646788216755e-05,
      "loss": 0.8606,
      "step": 209840
    },
    {
      "epoch": 0.7354719322041027,
      "grad_norm": 2.96875,
      "learning_rate": 4.193581885350385e-05,
      "loss": 0.8404,
      "step": 209850
    },
    {
      "epoch": 0.7355069797109982,
      "grad_norm": 3.15625,
      "learning_rate": 4.193516982484015e-05,
      "loss": 0.8839,
      "step": 209860
    },
    {
      "epoch": 0.7355420272178939,
      "grad_norm": 3.0,
      "learning_rate": 4.193452079617644e-05,
      "loss": 0.9247,
      "step": 209870
    },
    {
      "epoch": 0.7355770747247895,
      "grad_norm": 2.78125,
      "learning_rate": 4.1933871767512744e-05,
      "loss": 0.9342,
      "step": 209880
    },
    {
      "epoch": 0.735612122231685,
      "grad_norm": 3.25,
      "learning_rate": 4.1933222738849046e-05,
      "loss": 0.8889,
      "step": 209890
    },
    {
      "epoch": 0.7356471697385807,
      "grad_norm": 3.015625,
      "learning_rate": 4.193257371018534e-05,
      "loss": 0.8565,
      "step": 209900
    },
    {
      "epoch": 0.7356822172454762,
      "grad_norm": 2.75,
      "learning_rate": 4.193192468152164e-05,
      "loss": 0.8971,
      "step": 209910
    },
    {
      "epoch": 0.7357172647523719,
      "grad_norm": 3.65625,
      "learning_rate": 4.1931275652857936e-05,
      "loss": 0.8833,
      "step": 209920
    },
    {
      "epoch": 0.7357523122592674,
      "grad_norm": 3.078125,
      "learning_rate": 4.193062662419423e-05,
      "loss": 0.8305,
      "step": 209930
    },
    {
      "epoch": 0.735787359766163,
      "grad_norm": 2.96875,
      "learning_rate": 4.1929977595530526e-05,
      "loss": 0.8697,
      "step": 209940
    },
    {
      "epoch": 0.7358224072730586,
      "grad_norm": 3.5,
      "learning_rate": 4.192932856686683e-05,
      "loss": 0.9933,
      "step": 209950
    },
    {
      "epoch": 0.7358574547799542,
      "grad_norm": 2.625,
      "learning_rate": 4.192867953820312e-05,
      "loss": 0.8978,
      "step": 209960
    },
    {
      "epoch": 0.7358925022868498,
      "grad_norm": 3.28125,
      "learning_rate": 4.192803050953942e-05,
      "loss": 0.8813,
      "step": 209970
    },
    {
      "epoch": 0.7359275497937454,
      "grad_norm": 2.71875,
      "learning_rate": 4.1927381480875724e-05,
      "loss": 0.8904,
      "step": 209980
    },
    {
      "epoch": 0.7359625973006411,
      "grad_norm": 3.234375,
      "learning_rate": 4.192673245221202e-05,
      "loss": 0.8315,
      "step": 209990
    },
    {
      "epoch": 0.7359976448075366,
      "grad_norm": 2.8125,
      "learning_rate": 4.192608342354832e-05,
      "loss": 0.8498,
      "step": 210000
    },
    {
      "epoch": 0.7359976448075366,
      "eval_loss": 0.8424704670906067,
      "eval_runtime": 560.4492,
      "eval_samples_per_second": 678.806,
      "eval_steps_per_second": 56.567,
      "step": 210000
    },
    {
      "epoch": 0.7360326923144322,
      "grad_norm": 2.6875,
      "learning_rate": 4.1925434394884615e-05,
      "loss": 0.867,
      "step": 210010
    },
    {
      "epoch": 0.7360677398213278,
      "grad_norm": 3.046875,
      "learning_rate": 4.1924785366220916e-05,
      "loss": 0.8759,
      "step": 210020
    },
    {
      "epoch": 0.7361027873282234,
      "grad_norm": 2.8125,
      "learning_rate": 4.192413633755721e-05,
      "loss": 0.8461,
      "step": 210030
    },
    {
      "epoch": 0.736137834835119,
      "grad_norm": 2.6875,
      "learning_rate": 4.192348730889351e-05,
      "loss": 0.884,
      "step": 210040
    },
    {
      "epoch": 0.7361728823420146,
      "grad_norm": 2.6875,
      "learning_rate": 4.192283828022981e-05,
      "loss": 0.8942,
      "step": 210050
    },
    {
      "epoch": 0.7362079298489101,
      "grad_norm": 3.46875,
      "learning_rate": 4.192218925156611e-05,
      "loss": 0.8754,
      "step": 210060
    },
    {
      "epoch": 0.7362429773558058,
      "grad_norm": 3.078125,
      "learning_rate": 4.19215402229024e-05,
      "loss": 0.9228,
      "step": 210070
    },
    {
      "epoch": 0.7362780248627014,
      "grad_norm": 2.53125,
      "learning_rate": 4.1920891194238704e-05,
      "loss": 0.8957,
      "step": 210080
    },
    {
      "epoch": 0.736313072369597,
      "grad_norm": 3.25,
      "learning_rate": 4.1920242165575e-05,
      "loss": 0.8852,
      "step": 210090
    },
    {
      "epoch": 0.7363481198764926,
      "grad_norm": 2.875,
      "learning_rate": 4.19195931369113e-05,
      "loss": 0.9492,
      "step": 210100
    },
    {
      "epoch": 0.7363831673833882,
      "grad_norm": 2.734375,
      "learning_rate": 4.19189441082476e-05,
      "loss": 0.9849,
      "step": 210110
    },
    {
      "epoch": 0.7364182148902838,
      "grad_norm": 2.78125,
      "learning_rate": 4.1918295079583896e-05,
      "loss": 0.9119,
      "step": 210120
    },
    {
      "epoch": 0.7364532623971793,
      "grad_norm": 3.265625,
      "learning_rate": 4.19176460509202e-05,
      "loss": 0.939,
      "step": 210130
    },
    {
      "epoch": 0.736488309904075,
      "grad_norm": 3.140625,
      "learning_rate": 4.191699702225649e-05,
      "loss": 0.9728,
      "step": 210140
    },
    {
      "epoch": 0.7365233574109705,
      "grad_norm": 2.953125,
      "learning_rate": 4.1916347993592794e-05,
      "loss": 0.9356,
      "step": 210150
    },
    {
      "epoch": 0.7365584049178662,
      "grad_norm": 2.765625,
      "learning_rate": 4.191569896492909e-05,
      "loss": 0.8889,
      "step": 210160
    },
    {
      "epoch": 0.7365934524247618,
      "grad_norm": 3.375,
      "learning_rate": 4.191504993626539e-05,
      "loss": 0.8534,
      "step": 210170
    },
    {
      "epoch": 0.7366284999316574,
      "grad_norm": 2.859375,
      "learning_rate": 4.1914400907601684e-05,
      "loss": 0.9183,
      "step": 210180
    },
    {
      "epoch": 0.736663547438553,
      "grad_norm": 2.90625,
      "learning_rate": 4.1913751878937986e-05,
      "loss": 0.9103,
      "step": 210190
    },
    {
      "epoch": 0.7366985949454485,
      "grad_norm": 2.75,
      "learning_rate": 4.191310285027428e-05,
      "loss": 0.8341,
      "step": 210200
    },
    {
      "epoch": 0.7367336424523442,
      "grad_norm": 2.265625,
      "learning_rate": 4.191245382161058e-05,
      "loss": 0.8879,
      "step": 210210
    },
    {
      "epoch": 0.7367686899592397,
      "grad_norm": 2.859375,
      "learning_rate": 4.1911804792946876e-05,
      "loss": 0.9187,
      "step": 210220
    },
    {
      "epoch": 0.7368037374661354,
      "grad_norm": 2.515625,
      "learning_rate": 4.191115576428318e-05,
      "loss": 0.8783,
      "step": 210230
    },
    {
      "epoch": 0.7368387849730309,
      "grad_norm": 2.59375,
      "learning_rate": 4.191050673561947e-05,
      "loss": 0.8522,
      "step": 210240
    },
    {
      "epoch": 0.7368738324799265,
      "grad_norm": 2.953125,
      "learning_rate": 4.1909857706955774e-05,
      "loss": 0.8941,
      "step": 210250
    },
    {
      "epoch": 0.7369088799868221,
      "grad_norm": 3.0625,
      "learning_rate": 4.1909208678292075e-05,
      "loss": 0.909,
      "step": 210260
    },
    {
      "epoch": 0.7369439274937177,
      "grad_norm": 3.15625,
      "learning_rate": 4.190855964962837e-05,
      "loss": 0.9143,
      "step": 210270
    },
    {
      "epoch": 0.7369789750006134,
      "grad_norm": 3.03125,
      "learning_rate": 4.190791062096467e-05,
      "loss": 0.8213,
      "step": 210280
    },
    {
      "epoch": 0.7370140225075089,
      "grad_norm": 2.953125,
      "learning_rate": 4.1907261592300966e-05,
      "loss": 0.8976,
      "step": 210290
    },
    {
      "epoch": 0.7370490700144046,
      "grad_norm": 2.78125,
      "learning_rate": 4.190661256363726e-05,
      "loss": 0.8542,
      "step": 210300
    },
    {
      "epoch": 0.7370841175213001,
      "grad_norm": 2.828125,
      "learning_rate": 4.1905963534973555e-05,
      "loss": 0.8738,
      "step": 210310
    },
    {
      "epoch": 0.7371191650281957,
      "grad_norm": 2.875,
      "learning_rate": 4.1905314506309856e-05,
      "loss": 0.8854,
      "step": 210320
    },
    {
      "epoch": 0.7371542125350913,
      "grad_norm": 2.96875,
      "learning_rate": 4.190466547764615e-05,
      "loss": 0.9114,
      "step": 210330
    },
    {
      "epoch": 0.7371892600419869,
      "grad_norm": 3.34375,
      "learning_rate": 4.190401644898245e-05,
      "loss": 0.9449,
      "step": 210340
    },
    {
      "epoch": 0.7372243075488825,
      "grad_norm": 2.828125,
      "learning_rate": 4.1903367420318754e-05,
      "loss": 0.8654,
      "step": 210350
    },
    {
      "epoch": 0.7372593550557781,
      "grad_norm": 3.109375,
      "learning_rate": 4.190271839165505e-05,
      "loss": 0.8369,
      "step": 210360
    },
    {
      "epoch": 0.7372944025626738,
      "grad_norm": 3.03125,
      "learning_rate": 4.190206936299135e-05,
      "loss": 0.8902,
      "step": 210370
    },
    {
      "epoch": 0.7373294500695693,
      "grad_norm": 2.8125,
      "learning_rate": 4.1901420334327644e-05,
      "loss": 0.8417,
      "step": 210380
    },
    {
      "epoch": 0.7373644975764649,
      "grad_norm": 3.0,
      "learning_rate": 4.1900771305663946e-05,
      "loss": 0.8633,
      "step": 210390
    },
    {
      "epoch": 0.7373995450833605,
      "grad_norm": 2.9375,
      "learning_rate": 4.190012227700024e-05,
      "loss": 0.939,
      "step": 210400
    },
    {
      "epoch": 0.7374345925902561,
      "grad_norm": 3.125,
      "learning_rate": 4.189947324833654e-05,
      "loss": 0.8884,
      "step": 210410
    },
    {
      "epoch": 0.7374696400971517,
      "grad_norm": 2.984375,
      "learning_rate": 4.1898824219672836e-05,
      "loss": 0.8715,
      "step": 210420
    },
    {
      "epoch": 0.7375046876040473,
      "grad_norm": 3.046875,
      "learning_rate": 4.189817519100914e-05,
      "loss": 0.8956,
      "step": 210430
    },
    {
      "epoch": 0.7375397351109428,
      "grad_norm": 3.09375,
      "learning_rate": 4.189752616234543e-05,
      "loss": 0.9269,
      "step": 210440
    },
    {
      "epoch": 0.7375747826178385,
      "grad_norm": 3.140625,
      "learning_rate": 4.1896877133681734e-05,
      "loss": 0.9717,
      "step": 210450
    },
    {
      "epoch": 0.737609830124734,
      "grad_norm": 3.015625,
      "learning_rate": 4.189622810501803e-05,
      "loss": 0.9288,
      "step": 210460
    },
    {
      "epoch": 0.7376448776316297,
      "grad_norm": 2.796875,
      "learning_rate": 4.189557907635433e-05,
      "loss": 0.8703,
      "step": 210470
    },
    {
      "epoch": 0.7376799251385253,
      "grad_norm": 3.078125,
      "learning_rate": 4.189493004769063e-05,
      "loss": 0.8962,
      "step": 210480
    },
    {
      "epoch": 0.7377149726454209,
      "grad_norm": 3.0625,
      "learning_rate": 4.1894281019026926e-05,
      "loss": 0.9652,
      "step": 210490
    },
    {
      "epoch": 0.7377500201523165,
      "grad_norm": 2.65625,
      "learning_rate": 4.189363199036323e-05,
      "loss": 0.9582,
      "step": 210500
    },
    {
      "epoch": 0.737785067659212,
      "grad_norm": 3.0625,
      "learning_rate": 4.189298296169952e-05,
      "loss": 0.9508,
      "step": 210510
    },
    {
      "epoch": 0.7378201151661077,
      "grad_norm": 3.78125,
      "learning_rate": 4.189233393303582e-05,
      "loss": 0.9475,
      "step": 210520
    },
    {
      "epoch": 0.7378551626730032,
      "grad_norm": 2.46875,
      "learning_rate": 4.189168490437212e-05,
      "loss": 0.8457,
      "step": 210530
    },
    {
      "epoch": 0.7378902101798989,
      "grad_norm": 3.375,
      "learning_rate": 4.189103587570842e-05,
      "loss": 0.9513,
      "step": 210540
    },
    {
      "epoch": 0.7379252576867944,
      "grad_norm": 2.9375,
      "learning_rate": 4.1890386847044714e-05,
      "loss": 0.8683,
      "step": 210550
    },
    {
      "epoch": 0.73796030519369,
      "grad_norm": 3.3125,
      "learning_rate": 4.1889737818381015e-05,
      "loss": 0.916,
      "step": 210560
    },
    {
      "epoch": 0.7379953527005857,
      "grad_norm": 3.015625,
      "learning_rate": 4.188908878971731e-05,
      "loss": 0.8713,
      "step": 210570
    },
    {
      "epoch": 0.7380304002074812,
      "grad_norm": 2.6875,
      "learning_rate": 4.188843976105361e-05,
      "loss": 0.8894,
      "step": 210580
    },
    {
      "epoch": 0.7380654477143769,
      "grad_norm": 2.71875,
      "learning_rate": 4.1887790732389906e-05,
      "loss": 0.87,
      "step": 210590
    },
    {
      "epoch": 0.7381004952212724,
      "grad_norm": 3.203125,
      "learning_rate": 4.188714170372621e-05,
      "loss": 0.8805,
      "step": 210600
    },
    {
      "epoch": 0.7381355427281681,
      "grad_norm": 2.625,
      "learning_rate": 4.188649267506251e-05,
      "loss": 0.8567,
      "step": 210610
    },
    {
      "epoch": 0.7381705902350636,
      "grad_norm": 3.109375,
      "learning_rate": 4.18858436463988e-05,
      "loss": 0.9296,
      "step": 210620
    },
    {
      "epoch": 0.7382056377419592,
      "grad_norm": 3.078125,
      "learning_rate": 4.1885194617735105e-05,
      "loss": 0.9507,
      "step": 210630
    },
    {
      "epoch": 0.7382406852488548,
      "grad_norm": 2.859375,
      "learning_rate": 4.18845455890714e-05,
      "loss": 0.8425,
      "step": 210640
    },
    {
      "epoch": 0.7382757327557504,
      "grad_norm": 2.625,
      "learning_rate": 4.18838965604077e-05,
      "loss": 0.9381,
      "step": 210650
    },
    {
      "epoch": 0.7383107802626461,
      "grad_norm": 2.90625,
      "learning_rate": 4.1883247531743995e-05,
      "loss": 0.8794,
      "step": 210660
    },
    {
      "epoch": 0.7383458277695416,
      "grad_norm": 3.453125,
      "learning_rate": 4.18825985030803e-05,
      "loss": 0.9468,
      "step": 210670
    },
    {
      "epoch": 0.7383808752764373,
      "grad_norm": 2.984375,
      "learning_rate": 4.1881949474416584e-05,
      "loss": 0.9019,
      "step": 210680
    },
    {
      "epoch": 0.7384159227833328,
      "grad_norm": 2.875,
      "learning_rate": 4.1881300445752886e-05,
      "loss": 0.8334,
      "step": 210690
    },
    {
      "epoch": 0.7384509702902284,
      "grad_norm": 2.9375,
      "learning_rate": 4.188065141708919e-05,
      "loss": 0.8625,
      "step": 210700
    },
    {
      "epoch": 0.738486017797124,
      "grad_norm": 3.015625,
      "learning_rate": 4.188000238842548e-05,
      "loss": 0.9056,
      "step": 210710
    },
    {
      "epoch": 0.7385210653040196,
      "grad_norm": 2.84375,
      "learning_rate": 4.187935335976178e-05,
      "loss": 0.8386,
      "step": 210720
    },
    {
      "epoch": 0.7385561128109152,
      "grad_norm": 2.59375,
      "learning_rate": 4.187870433109808e-05,
      "loss": 0.9039,
      "step": 210730
    },
    {
      "epoch": 0.7385911603178108,
      "grad_norm": 2.609375,
      "learning_rate": 4.187805530243438e-05,
      "loss": 0.9393,
      "step": 210740
    },
    {
      "epoch": 0.7386262078247063,
      "grad_norm": 2.96875,
      "learning_rate": 4.1877406273770674e-05,
      "loss": 0.8411,
      "step": 210750
    },
    {
      "epoch": 0.738661255331602,
      "grad_norm": 3.265625,
      "learning_rate": 4.1876757245106975e-05,
      "loss": 0.9888,
      "step": 210760
    },
    {
      "epoch": 0.7386963028384976,
      "grad_norm": 3.171875,
      "learning_rate": 4.187610821644327e-05,
      "loss": 0.9277,
      "step": 210770
    },
    {
      "epoch": 0.7387313503453932,
      "grad_norm": 2.75,
      "learning_rate": 4.187545918777957e-05,
      "loss": 0.896,
      "step": 210780
    },
    {
      "epoch": 0.7387663978522888,
      "grad_norm": 2.96875,
      "learning_rate": 4.1874810159115866e-05,
      "loss": 0.8623,
      "step": 210790
    },
    {
      "epoch": 0.7388014453591844,
      "grad_norm": 3.125,
      "learning_rate": 4.187416113045217e-05,
      "loss": 0.9347,
      "step": 210800
    },
    {
      "epoch": 0.73883649286608,
      "grad_norm": 2.765625,
      "learning_rate": 4.187351210178846e-05,
      "loss": 0.8745,
      "step": 210810
    },
    {
      "epoch": 0.7388715403729755,
      "grad_norm": 3.046875,
      "learning_rate": 4.187286307312476e-05,
      "loss": 0.9772,
      "step": 210820
    },
    {
      "epoch": 0.7389065878798712,
      "grad_norm": 2.671875,
      "learning_rate": 4.187221404446106e-05,
      "loss": 0.8886,
      "step": 210830
    },
    {
      "epoch": 0.7389416353867667,
      "grad_norm": 2.65625,
      "learning_rate": 4.187156501579736e-05,
      "loss": 0.8863,
      "step": 210840
    },
    {
      "epoch": 0.7389766828936624,
      "grad_norm": 3.09375,
      "learning_rate": 4.187091598713366e-05,
      "loss": 0.8468,
      "step": 210850
    },
    {
      "epoch": 0.739011730400558,
      "grad_norm": 2.953125,
      "learning_rate": 4.1870266958469955e-05,
      "loss": 0.9287,
      "step": 210860
    },
    {
      "epoch": 0.7390467779074535,
      "grad_norm": 3.171875,
      "learning_rate": 4.186961792980626e-05,
      "loss": 0.9779,
      "step": 210870
    },
    {
      "epoch": 0.7390818254143492,
      "grad_norm": 3.265625,
      "learning_rate": 4.186896890114255e-05,
      "loss": 0.904,
      "step": 210880
    },
    {
      "epoch": 0.7391168729212447,
      "grad_norm": 2.9375,
      "learning_rate": 4.186831987247885e-05,
      "loss": 0.8783,
      "step": 210890
    },
    {
      "epoch": 0.7391519204281404,
      "grad_norm": 3.375,
      "learning_rate": 4.186767084381515e-05,
      "loss": 0.9929,
      "step": 210900
    },
    {
      "epoch": 0.7391869679350359,
      "grad_norm": 3.421875,
      "learning_rate": 4.186702181515145e-05,
      "loss": 0.8569,
      "step": 210910
    },
    {
      "epoch": 0.7392220154419316,
      "grad_norm": 2.8125,
      "learning_rate": 4.186637278648774e-05,
      "loss": 0.8481,
      "step": 210920
    },
    {
      "epoch": 0.7392570629488271,
      "grad_norm": 2.4375,
      "learning_rate": 4.1865723757824045e-05,
      "loss": 0.8354,
      "step": 210930
    },
    {
      "epoch": 0.7392921104557227,
      "grad_norm": 2.921875,
      "learning_rate": 4.186507472916034e-05,
      "loss": 0.8538,
      "step": 210940
    },
    {
      "epoch": 0.7393271579626183,
      "grad_norm": 3.328125,
      "learning_rate": 4.186442570049664e-05,
      "loss": 0.9267,
      "step": 210950
    },
    {
      "epoch": 0.7393622054695139,
      "grad_norm": 2.734375,
      "learning_rate": 4.1863776671832935e-05,
      "loss": 0.9397,
      "step": 210960
    },
    {
      "epoch": 0.7393972529764096,
      "grad_norm": 2.984375,
      "learning_rate": 4.186312764316924e-05,
      "loss": 0.9073,
      "step": 210970
    },
    {
      "epoch": 0.7394323004833051,
      "grad_norm": 6.34375,
      "learning_rate": 4.186247861450554e-05,
      "loss": 0.9176,
      "step": 210980
    },
    {
      "epoch": 0.7394673479902008,
      "grad_norm": 3.125,
      "learning_rate": 4.186182958584183e-05,
      "loss": 0.8927,
      "step": 210990
    },
    {
      "epoch": 0.7395023954970963,
      "grad_norm": 2.65625,
      "learning_rate": 4.1861180557178134e-05,
      "loss": 0.8794,
      "step": 211000
    },
    {
      "epoch": 0.7395374430039919,
      "grad_norm": 2.84375,
      "learning_rate": 4.186053152851443e-05,
      "loss": 0.9917,
      "step": 211010
    },
    {
      "epoch": 0.7395724905108875,
      "grad_norm": 2.65625,
      "learning_rate": 4.185988249985073e-05,
      "loss": 0.8638,
      "step": 211020
    },
    {
      "epoch": 0.7396075380177831,
      "grad_norm": 3.015625,
      "learning_rate": 4.1859233471187025e-05,
      "loss": 0.8586,
      "step": 211030
    },
    {
      "epoch": 0.7396425855246787,
      "grad_norm": 2.578125,
      "learning_rate": 4.1858584442523326e-05,
      "loss": 0.8614,
      "step": 211040
    },
    {
      "epoch": 0.7396776330315743,
      "grad_norm": 3.1875,
      "learning_rate": 4.185793541385962e-05,
      "loss": 0.954,
      "step": 211050
    },
    {
      "epoch": 0.73971268053847,
      "grad_norm": 2.75,
      "learning_rate": 4.1857286385195915e-05,
      "loss": 0.888,
      "step": 211060
    },
    {
      "epoch": 0.7397477280453655,
      "grad_norm": 2.703125,
      "learning_rate": 4.185663735653222e-05,
      "loss": 0.9517,
      "step": 211070
    },
    {
      "epoch": 0.7397827755522611,
      "grad_norm": 2.734375,
      "learning_rate": 4.185598832786851e-05,
      "loss": 0.89,
      "step": 211080
    },
    {
      "epoch": 0.7398178230591567,
      "grad_norm": 2.515625,
      "learning_rate": 4.185533929920481e-05,
      "loss": 0.8975,
      "step": 211090
    },
    {
      "epoch": 0.7398528705660523,
      "grad_norm": 2.765625,
      "learning_rate": 4.185469027054111e-05,
      "loss": 0.8816,
      "step": 211100
    },
    {
      "epoch": 0.7398879180729478,
      "grad_norm": 2.984375,
      "learning_rate": 4.185404124187741e-05,
      "loss": 0.9026,
      "step": 211110
    },
    {
      "epoch": 0.7399229655798435,
      "grad_norm": 2.71875,
      "learning_rate": 4.18533922132137e-05,
      "loss": 0.8908,
      "step": 211120
    },
    {
      "epoch": 0.739958013086739,
      "grad_norm": 2.875,
      "learning_rate": 4.1852743184550005e-05,
      "loss": 0.8839,
      "step": 211130
    },
    {
      "epoch": 0.7399930605936347,
      "grad_norm": 3.09375,
      "learning_rate": 4.18520941558863e-05,
      "loss": 0.933,
      "step": 211140
    },
    {
      "epoch": 0.7400281081005303,
      "grad_norm": 2.90625,
      "learning_rate": 4.18514451272226e-05,
      "loss": 0.9157,
      "step": 211150
    },
    {
      "epoch": 0.7400631556074259,
      "grad_norm": 3.296875,
      "learning_rate": 4.1850796098558895e-05,
      "loss": 0.8934,
      "step": 211160
    },
    {
      "epoch": 0.7400982031143215,
      "grad_norm": 4.53125,
      "learning_rate": 4.18501470698952e-05,
      "loss": 0.9102,
      "step": 211170
    },
    {
      "epoch": 0.740133250621217,
      "grad_norm": 3.21875,
      "learning_rate": 4.184949804123149e-05,
      "loss": 0.938,
      "step": 211180
    },
    {
      "epoch": 0.7401682981281127,
      "grad_norm": 2.578125,
      "learning_rate": 4.184884901256779e-05,
      "loss": 0.9196,
      "step": 211190
    },
    {
      "epoch": 0.7402033456350082,
      "grad_norm": 2.640625,
      "learning_rate": 4.184819998390409e-05,
      "loss": 0.9506,
      "step": 211200
    },
    {
      "epoch": 0.7402383931419039,
      "grad_norm": 2.734375,
      "learning_rate": 4.184755095524039e-05,
      "loss": 0.8953,
      "step": 211210
    },
    {
      "epoch": 0.7402734406487994,
      "grad_norm": 2.953125,
      "learning_rate": 4.184690192657669e-05,
      "loss": 0.8294,
      "step": 211220
    },
    {
      "epoch": 0.740308488155695,
      "grad_norm": 3.28125,
      "learning_rate": 4.1846252897912985e-05,
      "loss": 0.9445,
      "step": 211230
    },
    {
      "epoch": 0.7403435356625906,
      "grad_norm": 2.53125,
      "learning_rate": 4.1845603869249286e-05,
      "loss": 0.9339,
      "step": 211240
    },
    {
      "epoch": 0.7403785831694862,
      "grad_norm": 2.765625,
      "learning_rate": 4.184495484058558e-05,
      "loss": 1.0823,
      "step": 211250
    },
    {
      "epoch": 0.7404136306763819,
      "grad_norm": 2.828125,
      "learning_rate": 4.184430581192188e-05,
      "loss": 0.9949,
      "step": 211260
    },
    {
      "epoch": 0.7404486781832774,
      "grad_norm": 2.65625,
      "learning_rate": 4.184365678325818e-05,
      "loss": 0.8542,
      "step": 211270
    },
    {
      "epoch": 0.7404837256901731,
      "grad_norm": 2.96875,
      "learning_rate": 4.184300775459448e-05,
      "loss": 0.8778,
      "step": 211280
    },
    {
      "epoch": 0.7405187731970686,
      "grad_norm": 2.609375,
      "learning_rate": 4.184235872593077e-05,
      "loss": 0.8406,
      "step": 211290
    },
    {
      "epoch": 0.7405538207039642,
      "grad_norm": 3.0,
      "learning_rate": 4.1841709697267074e-05,
      "loss": 0.961,
      "step": 211300
    },
    {
      "epoch": 0.7405888682108598,
      "grad_norm": 3.203125,
      "learning_rate": 4.184106066860337e-05,
      "loss": 0.9663,
      "step": 211310
    },
    {
      "epoch": 0.7406239157177554,
      "grad_norm": 3.09375,
      "learning_rate": 4.184041163993967e-05,
      "loss": 0.8458,
      "step": 211320
    },
    {
      "epoch": 0.740658963224651,
      "grad_norm": 2.9375,
      "learning_rate": 4.1839762611275965e-05,
      "loss": 0.8726,
      "step": 211330
    },
    {
      "epoch": 0.7406940107315466,
      "grad_norm": 3.1875,
      "learning_rate": 4.1839113582612266e-05,
      "loss": 0.9013,
      "step": 211340
    },
    {
      "epoch": 0.7407290582384423,
      "grad_norm": 3.078125,
      "learning_rate": 4.183846455394857e-05,
      "loss": 0.8812,
      "step": 211350
    },
    {
      "epoch": 0.7407641057453378,
      "grad_norm": 2.75,
      "learning_rate": 4.183781552528486e-05,
      "loss": 0.8303,
      "step": 211360
    },
    {
      "epoch": 0.7407991532522334,
      "grad_norm": 2.140625,
      "learning_rate": 4.1837166496621163e-05,
      "loss": 0.8954,
      "step": 211370
    },
    {
      "epoch": 0.740834200759129,
      "grad_norm": 2.828125,
      "learning_rate": 4.183651746795746e-05,
      "loss": 0.8865,
      "step": 211380
    },
    {
      "epoch": 0.7408692482660246,
      "grad_norm": 3.078125,
      "learning_rate": 4.183586843929376e-05,
      "loss": 0.9741,
      "step": 211390
    },
    {
      "epoch": 0.7409042957729202,
      "grad_norm": 3.3125,
      "learning_rate": 4.1835219410630054e-05,
      "loss": 0.9269,
      "step": 211400
    },
    {
      "epoch": 0.7409393432798158,
      "grad_norm": 2.96875,
      "learning_rate": 4.1834570381966355e-05,
      "loss": 0.9224,
      "step": 211410
    },
    {
      "epoch": 0.7409743907867113,
      "grad_norm": 2.828125,
      "learning_rate": 4.183392135330265e-05,
      "loss": 0.9495,
      "step": 211420
    },
    {
      "epoch": 0.741009438293607,
      "grad_norm": 3.109375,
      "learning_rate": 4.1833272324638945e-05,
      "loss": 0.8715,
      "step": 211430
    },
    {
      "epoch": 0.7410444858005025,
      "grad_norm": 3.265625,
      "learning_rate": 4.1832623295975246e-05,
      "loss": 0.8318,
      "step": 211440
    },
    {
      "epoch": 0.7410795333073982,
      "grad_norm": 2.921875,
      "learning_rate": 4.183197426731154e-05,
      "loss": 0.877,
      "step": 211450
    },
    {
      "epoch": 0.7411145808142938,
      "grad_norm": 3.015625,
      "learning_rate": 4.183132523864784e-05,
      "loss": 0.9591,
      "step": 211460
    },
    {
      "epoch": 0.7411496283211894,
      "grad_norm": 3.8125,
      "learning_rate": 4.183067620998414e-05,
      "loss": 0.9781,
      "step": 211470
    },
    {
      "epoch": 0.741184675828085,
      "grad_norm": 2.953125,
      "learning_rate": 4.183002718132044e-05,
      "loss": 0.8891,
      "step": 211480
    },
    {
      "epoch": 0.7412197233349805,
      "grad_norm": 2.734375,
      "learning_rate": 4.182937815265673e-05,
      "loss": 0.7939,
      "step": 211490
    },
    {
      "epoch": 0.7412547708418762,
      "grad_norm": 2.6875,
      "learning_rate": 4.1828729123993034e-05,
      "loss": 0.9264,
      "step": 211500
    },
    {
      "epoch": 0.7412898183487717,
      "grad_norm": 3.65625,
      "learning_rate": 4.182808009532933e-05,
      "loss": 0.9022,
      "step": 211510
    },
    {
      "epoch": 0.7413248658556674,
      "grad_norm": 3.234375,
      "learning_rate": 4.182743106666563e-05,
      "loss": 0.9554,
      "step": 211520
    },
    {
      "epoch": 0.7413599133625629,
      "grad_norm": 2.703125,
      "learning_rate": 4.1826782038001925e-05,
      "loss": 0.8718,
      "step": 211530
    },
    {
      "epoch": 0.7413949608694586,
      "grad_norm": 3.1875,
      "learning_rate": 4.1826133009338226e-05,
      "loss": 0.8951,
      "step": 211540
    },
    {
      "epoch": 0.7414300083763542,
      "grad_norm": 2.8125,
      "learning_rate": 4.182548398067452e-05,
      "loss": 0.9355,
      "step": 211550
    },
    {
      "epoch": 0.7414650558832497,
      "grad_norm": 2.796875,
      "learning_rate": 4.182483495201082e-05,
      "loss": 0.8556,
      "step": 211560
    },
    {
      "epoch": 0.7415001033901454,
      "grad_norm": 3.296875,
      "learning_rate": 4.1824185923347123e-05,
      "loss": 0.9694,
      "step": 211570
    },
    {
      "epoch": 0.7415351508970409,
      "grad_norm": 2.53125,
      "learning_rate": 4.182353689468342e-05,
      "loss": 0.9104,
      "step": 211580
    },
    {
      "epoch": 0.7415701984039366,
      "grad_norm": 2.609375,
      "learning_rate": 4.182288786601972e-05,
      "loss": 0.8449,
      "step": 211590
    },
    {
      "epoch": 0.7416052459108321,
      "grad_norm": 2.828125,
      "learning_rate": 4.1822238837356014e-05,
      "loss": 0.8823,
      "step": 211600
    },
    {
      "epoch": 0.7416402934177277,
      "grad_norm": 2.640625,
      "learning_rate": 4.1821589808692315e-05,
      "loss": 0.853,
      "step": 211610
    },
    {
      "epoch": 0.7416753409246233,
      "grad_norm": 2.71875,
      "learning_rate": 4.182094078002861e-05,
      "loss": 0.8753,
      "step": 211620
    },
    {
      "epoch": 0.7417103884315189,
      "grad_norm": 2.859375,
      "learning_rate": 4.182029175136491e-05,
      "loss": 0.986,
      "step": 211630
    },
    {
      "epoch": 0.7417454359384145,
      "grad_norm": 2.828125,
      "learning_rate": 4.1819642722701206e-05,
      "loss": 0.8089,
      "step": 211640
    },
    {
      "epoch": 0.7417804834453101,
      "grad_norm": 3.0,
      "learning_rate": 4.181899369403751e-05,
      "loss": 0.8985,
      "step": 211650
    },
    {
      "epoch": 0.7418155309522058,
      "grad_norm": 2.515625,
      "learning_rate": 4.18183446653738e-05,
      "loss": 0.8935,
      "step": 211660
    },
    {
      "epoch": 0.7418505784591013,
      "grad_norm": 2.65625,
      "learning_rate": 4.1817695636710103e-05,
      "loss": 0.8871,
      "step": 211670
    },
    {
      "epoch": 0.7418856259659969,
      "grad_norm": 2.71875,
      "learning_rate": 4.18170466080464e-05,
      "loss": 0.8673,
      "step": 211680
    },
    {
      "epoch": 0.7419206734728925,
      "grad_norm": 2.703125,
      "learning_rate": 4.18163975793827e-05,
      "loss": 0.8909,
      "step": 211690
    },
    {
      "epoch": 0.7419557209797881,
      "grad_norm": 3.03125,
      "learning_rate": 4.1815748550718994e-05,
      "loss": 0.8708,
      "step": 211700
    },
    {
      "epoch": 0.7419907684866837,
      "grad_norm": 3.140625,
      "learning_rate": 4.1815099522055295e-05,
      "loss": 0.9045,
      "step": 211710
    },
    {
      "epoch": 0.7420258159935793,
      "grad_norm": 3.484375,
      "learning_rate": 4.18144504933916e-05,
      "loss": 0.8659,
      "step": 211720
    },
    {
      "epoch": 0.7420608635004748,
      "grad_norm": 2.921875,
      "learning_rate": 4.181380146472789e-05,
      "loss": 0.9562,
      "step": 211730
    },
    {
      "epoch": 0.7420959110073705,
      "grad_norm": 2.671875,
      "learning_rate": 4.181315243606419e-05,
      "loss": 0.8634,
      "step": 211740
    },
    {
      "epoch": 0.7421309585142661,
      "grad_norm": 3.328125,
      "learning_rate": 4.181250340740049e-05,
      "loss": 0.9133,
      "step": 211750
    },
    {
      "epoch": 0.7421660060211617,
      "grad_norm": 2.90625,
      "learning_rate": 4.181185437873679e-05,
      "loss": 0.8894,
      "step": 211760
    },
    {
      "epoch": 0.7422010535280573,
      "grad_norm": 2.671875,
      "learning_rate": 4.1811205350073083e-05,
      "loss": 0.8669,
      "step": 211770
    },
    {
      "epoch": 0.7422361010349529,
      "grad_norm": 2.59375,
      "learning_rate": 4.1810556321409385e-05,
      "loss": 0.8124,
      "step": 211780
    },
    {
      "epoch": 0.7422711485418485,
      "grad_norm": 3.265625,
      "learning_rate": 4.180990729274568e-05,
      "loss": 0.9292,
      "step": 211790
    },
    {
      "epoch": 0.742306196048744,
      "grad_norm": 3.171875,
      "learning_rate": 4.180925826408198e-05,
      "loss": 0.9007,
      "step": 211800
    },
    {
      "epoch": 0.7423412435556397,
      "grad_norm": 2.625,
      "learning_rate": 4.1808609235418275e-05,
      "loss": 0.7964,
      "step": 211810
    },
    {
      "epoch": 0.7423762910625352,
      "grad_norm": 2.921875,
      "learning_rate": 4.180796020675457e-05,
      "loss": 0.8941,
      "step": 211820
    },
    {
      "epoch": 0.7424113385694309,
      "grad_norm": 2.515625,
      "learning_rate": 4.180731117809087e-05,
      "loss": 0.8634,
      "step": 211830
    },
    {
      "epoch": 0.7424463860763265,
      "grad_norm": 2.46875,
      "learning_rate": 4.1806662149427166e-05,
      "loss": 0.8679,
      "step": 211840
    },
    {
      "epoch": 0.742481433583222,
      "grad_norm": 2.609375,
      "learning_rate": 4.180601312076347e-05,
      "loss": 0.9022,
      "step": 211850
    },
    {
      "epoch": 0.7425164810901177,
      "grad_norm": 3.375,
      "learning_rate": 4.180536409209976e-05,
      "loss": 0.8361,
      "step": 211860
    },
    {
      "epoch": 0.7425515285970132,
      "grad_norm": 2.828125,
      "learning_rate": 4.1804715063436063e-05,
      "loss": 0.9285,
      "step": 211870
    },
    {
      "epoch": 0.7425865761039089,
      "grad_norm": 2.921875,
      "learning_rate": 4.180406603477236e-05,
      "loss": 0.9023,
      "step": 211880
    },
    {
      "epoch": 0.7426216236108044,
      "grad_norm": 3.234375,
      "learning_rate": 4.180341700610866e-05,
      "loss": 0.974,
      "step": 211890
    },
    {
      "epoch": 0.7426566711177001,
      "grad_norm": 3.0,
      "learning_rate": 4.1802767977444954e-05,
      "loss": 0.8781,
      "step": 211900
    },
    {
      "epoch": 0.7426917186245956,
      "grad_norm": 2.875,
      "learning_rate": 4.1802118948781255e-05,
      "loss": 0.9563,
      "step": 211910
    },
    {
      "epoch": 0.7427267661314912,
      "grad_norm": 2.9375,
      "learning_rate": 4.180146992011755e-05,
      "loss": 0.828,
      "step": 211920
    },
    {
      "epoch": 0.7427618136383868,
      "grad_norm": 3.109375,
      "learning_rate": 4.180082089145385e-05,
      "loss": 0.813,
      "step": 211930
    },
    {
      "epoch": 0.7427968611452824,
      "grad_norm": 2.8125,
      "learning_rate": 4.180017186279015e-05,
      "loss": 0.8498,
      "step": 211940
    },
    {
      "epoch": 0.7428319086521781,
      "grad_norm": 3.265625,
      "learning_rate": 4.179952283412645e-05,
      "loss": 0.8846,
      "step": 211950
    },
    {
      "epoch": 0.7428669561590736,
      "grad_norm": 3.5,
      "learning_rate": 4.179887380546275e-05,
      "loss": 0.882,
      "step": 211960
    },
    {
      "epoch": 0.7429020036659693,
      "grad_norm": 2.875,
      "learning_rate": 4.1798224776799043e-05,
      "loss": 0.9479,
      "step": 211970
    },
    {
      "epoch": 0.7429370511728648,
      "grad_norm": 2.9375,
      "learning_rate": 4.1797575748135345e-05,
      "loss": 0.891,
      "step": 211980
    },
    {
      "epoch": 0.7429720986797604,
      "grad_norm": 2.90625,
      "learning_rate": 4.179692671947164e-05,
      "loss": 0.9043,
      "step": 211990
    },
    {
      "epoch": 0.743007146186656,
      "grad_norm": 2.78125,
      "learning_rate": 4.179627769080794e-05,
      "loss": 0.9416,
      "step": 212000
    },
    {
      "epoch": 0.7430421936935516,
      "grad_norm": 2.828125,
      "learning_rate": 4.1795628662144235e-05,
      "loss": 0.8536,
      "step": 212010
    },
    {
      "epoch": 0.7430772412004472,
      "grad_norm": 2.9375,
      "learning_rate": 4.179497963348054e-05,
      "loss": 0.9731,
      "step": 212020
    },
    {
      "epoch": 0.7431122887073428,
      "grad_norm": 2.9375,
      "learning_rate": 4.179433060481683e-05,
      "loss": 0.8864,
      "step": 212030
    },
    {
      "epoch": 0.7431473362142385,
      "grad_norm": 2.421875,
      "learning_rate": 4.179368157615313e-05,
      "loss": 0.9019,
      "step": 212040
    },
    {
      "epoch": 0.743182383721134,
      "grad_norm": 3.03125,
      "learning_rate": 4.179303254748943e-05,
      "loss": 0.8766,
      "step": 212050
    },
    {
      "epoch": 0.7432174312280296,
      "grad_norm": 3.6875,
      "learning_rate": 4.179238351882573e-05,
      "loss": 0.9949,
      "step": 212060
    },
    {
      "epoch": 0.7432524787349252,
      "grad_norm": 2.515625,
      "learning_rate": 4.1791734490162023e-05,
      "loss": 0.9764,
      "step": 212070
    },
    {
      "epoch": 0.7432875262418208,
      "grad_norm": 3.21875,
      "learning_rate": 4.1791085461498325e-05,
      "loss": 0.9286,
      "step": 212080
    },
    {
      "epoch": 0.7433225737487164,
      "grad_norm": 3.109375,
      "learning_rate": 4.1790436432834626e-05,
      "loss": 0.9133,
      "step": 212090
    },
    {
      "epoch": 0.743357621255612,
      "grad_norm": 2.9375,
      "learning_rate": 4.178978740417092e-05,
      "loss": 0.926,
      "step": 212100
    },
    {
      "epoch": 0.7433926687625075,
      "grad_norm": 3.0625,
      "learning_rate": 4.178913837550722e-05,
      "loss": 0.902,
      "step": 212110
    },
    {
      "epoch": 0.7434277162694032,
      "grad_norm": 3.21875,
      "learning_rate": 4.178848934684352e-05,
      "loss": 0.9058,
      "step": 212120
    },
    {
      "epoch": 0.7434627637762987,
      "grad_norm": 3.171875,
      "learning_rate": 4.178784031817982e-05,
      "loss": 0.8109,
      "step": 212130
    },
    {
      "epoch": 0.7434978112831944,
      "grad_norm": 2.796875,
      "learning_rate": 4.178719128951611e-05,
      "loss": 0.8615,
      "step": 212140
    },
    {
      "epoch": 0.74353285879009,
      "grad_norm": 3.515625,
      "learning_rate": 4.1786542260852414e-05,
      "loss": 0.9522,
      "step": 212150
    },
    {
      "epoch": 0.7435679062969855,
      "grad_norm": 2.921875,
      "learning_rate": 4.178589323218871e-05,
      "loss": 0.9042,
      "step": 212160
    },
    {
      "epoch": 0.7436029538038812,
      "grad_norm": 3.28125,
      "learning_rate": 4.178524420352501e-05,
      "loss": 0.888,
      "step": 212170
    },
    {
      "epoch": 0.7436380013107767,
      "grad_norm": 2.015625,
      "learning_rate": 4.1784595174861305e-05,
      "loss": 0.9005,
      "step": 212180
    },
    {
      "epoch": 0.7436730488176724,
      "grad_norm": 3.109375,
      "learning_rate": 4.17839461461976e-05,
      "loss": 0.8847,
      "step": 212190
    },
    {
      "epoch": 0.7437080963245679,
      "grad_norm": 2.90625,
      "learning_rate": 4.17832971175339e-05,
      "loss": 0.904,
      "step": 212200
    },
    {
      "epoch": 0.7437431438314636,
      "grad_norm": 2.953125,
      "learning_rate": 4.1782648088870195e-05,
      "loss": 0.7979,
      "step": 212210
    },
    {
      "epoch": 0.7437781913383591,
      "grad_norm": 2.65625,
      "learning_rate": 4.17819990602065e-05,
      "loss": 0.9335,
      "step": 212220
    },
    {
      "epoch": 0.7438132388452547,
      "grad_norm": 2.71875,
      "learning_rate": 4.178135003154279e-05,
      "loss": 0.8574,
      "step": 212230
    },
    {
      "epoch": 0.7438482863521504,
      "grad_norm": 3.0625,
      "learning_rate": 4.178070100287909e-05,
      "loss": 0.8706,
      "step": 212240
    },
    {
      "epoch": 0.7438833338590459,
      "grad_norm": 3.109375,
      "learning_rate": 4.178005197421539e-05,
      "loss": 0.9182,
      "step": 212250
    },
    {
      "epoch": 0.7439183813659416,
      "grad_norm": 2.796875,
      "learning_rate": 4.177940294555169e-05,
      "loss": 0.9562,
      "step": 212260
    },
    {
      "epoch": 0.7439534288728371,
      "grad_norm": 3.03125,
      "learning_rate": 4.1778753916887983e-05,
      "loss": 0.8777,
      "step": 212270
    },
    {
      "epoch": 0.7439884763797328,
      "grad_norm": 2.8125,
      "learning_rate": 4.1778104888224285e-05,
      "loss": 0.9122,
      "step": 212280
    },
    {
      "epoch": 0.7440235238866283,
      "grad_norm": 2.875,
      "learning_rate": 4.177745585956058e-05,
      "loss": 0.8974,
      "step": 212290
    },
    {
      "epoch": 0.7440585713935239,
      "grad_norm": 2.921875,
      "learning_rate": 4.177680683089688e-05,
      "loss": 0.8172,
      "step": 212300
    },
    {
      "epoch": 0.7440936189004195,
      "grad_norm": 3.359375,
      "learning_rate": 4.177615780223318e-05,
      "loss": 0.8645,
      "step": 212310
    },
    {
      "epoch": 0.7441286664073151,
      "grad_norm": 3.234375,
      "learning_rate": 4.177550877356948e-05,
      "loss": 0.9062,
      "step": 212320
    },
    {
      "epoch": 0.7441637139142108,
      "grad_norm": 3.15625,
      "learning_rate": 4.177485974490578e-05,
      "loss": 0.8754,
      "step": 212330
    },
    {
      "epoch": 0.7441987614211063,
      "grad_norm": 2.859375,
      "learning_rate": 4.177421071624207e-05,
      "loss": 0.8891,
      "step": 212340
    },
    {
      "epoch": 0.744233808928002,
      "grad_norm": 3.25,
      "learning_rate": 4.1773561687578374e-05,
      "loss": 0.9309,
      "step": 212350
    },
    {
      "epoch": 0.7442688564348975,
      "grad_norm": 2.875,
      "learning_rate": 4.177291265891467e-05,
      "loss": 0.8433,
      "step": 212360
    },
    {
      "epoch": 0.7443039039417931,
      "grad_norm": 3.078125,
      "learning_rate": 4.177226363025097e-05,
      "loss": 0.9498,
      "step": 212370
    },
    {
      "epoch": 0.7443389514486887,
      "grad_norm": 3.1875,
      "learning_rate": 4.1771614601587265e-05,
      "loss": 0.9443,
      "step": 212380
    },
    {
      "epoch": 0.7443739989555843,
      "grad_norm": 2.96875,
      "learning_rate": 4.1770965572923566e-05,
      "loss": 0.8766,
      "step": 212390
    },
    {
      "epoch": 0.7444090464624799,
      "grad_norm": 2.953125,
      "learning_rate": 4.177031654425986e-05,
      "loss": 0.9268,
      "step": 212400
    },
    {
      "epoch": 0.7444440939693755,
      "grad_norm": 3.40625,
      "learning_rate": 4.176966751559616e-05,
      "loss": 0.9457,
      "step": 212410
    },
    {
      "epoch": 0.744479141476271,
      "grad_norm": 2.765625,
      "learning_rate": 4.176901848693246e-05,
      "loss": 0.876,
      "step": 212420
    },
    {
      "epoch": 0.7445141889831667,
      "grad_norm": 2.84375,
      "learning_rate": 4.176836945826876e-05,
      "loss": 0.958,
      "step": 212430
    },
    {
      "epoch": 0.7445492364900623,
      "grad_norm": 3.0,
      "learning_rate": 4.176772042960505e-05,
      "loss": 0.9123,
      "step": 212440
    },
    {
      "epoch": 0.7445842839969579,
      "grad_norm": 2.875,
      "learning_rate": 4.1767071400941354e-05,
      "loss": 0.8275,
      "step": 212450
    },
    {
      "epoch": 0.7446193315038535,
      "grad_norm": 2.640625,
      "learning_rate": 4.1766422372277656e-05,
      "loss": 0.8384,
      "step": 212460
    },
    {
      "epoch": 0.744654379010749,
      "grad_norm": 2.6875,
      "learning_rate": 4.176577334361395e-05,
      "loss": 0.9187,
      "step": 212470
    },
    {
      "epoch": 0.7446894265176447,
      "grad_norm": 3.171875,
      "learning_rate": 4.176512431495025e-05,
      "loss": 0.9686,
      "step": 212480
    },
    {
      "epoch": 0.7447244740245402,
      "grad_norm": 2.859375,
      "learning_rate": 4.1764475286286546e-05,
      "loss": 0.9425,
      "step": 212490
    },
    {
      "epoch": 0.7447595215314359,
      "grad_norm": 2.703125,
      "learning_rate": 4.176382625762285e-05,
      "loss": 0.9034,
      "step": 212500
    },
    {
      "epoch": 0.7447945690383314,
      "grad_norm": 3.40625,
      "learning_rate": 4.176317722895914e-05,
      "loss": 0.906,
      "step": 212510
    },
    {
      "epoch": 0.7448296165452271,
      "grad_norm": 2.5,
      "learning_rate": 4.1762528200295444e-05,
      "loss": 0.8588,
      "step": 212520
    },
    {
      "epoch": 0.7448646640521227,
      "grad_norm": 2.671875,
      "learning_rate": 4.176187917163174e-05,
      "loss": 0.821,
      "step": 212530
    },
    {
      "epoch": 0.7448997115590182,
      "grad_norm": 3.140625,
      "learning_rate": 4.176123014296804e-05,
      "loss": 0.9567,
      "step": 212540
    },
    {
      "epoch": 0.7449347590659139,
      "grad_norm": 2.75,
      "learning_rate": 4.1760581114304334e-05,
      "loss": 0.8797,
      "step": 212550
    },
    {
      "epoch": 0.7449698065728094,
      "grad_norm": 2.859375,
      "learning_rate": 4.175993208564063e-05,
      "loss": 0.9068,
      "step": 212560
    },
    {
      "epoch": 0.7450048540797051,
      "grad_norm": 3.046875,
      "learning_rate": 4.175928305697693e-05,
      "loss": 0.9429,
      "step": 212570
    },
    {
      "epoch": 0.7450399015866006,
      "grad_norm": 3.03125,
      "learning_rate": 4.1758634028313225e-05,
      "loss": 0.9234,
      "step": 212580
    },
    {
      "epoch": 0.7450749490934963,
      "grad_norm": 2.78125,
      "learning_rate": 4.1757984999649526e-05,
      "loss": 0.8777,
      "step": 212590
    },
    {
      "epoch": 0.7451099966003918,
      "grad_norm": 2.75,
      "learning_rate": 4.175733597098582e-05,
      "loss": 0.9016,
      "step": 212600
    },
    {
      "epoch": 0.7451450441072874,
      "grad_norm": 3.171875,
      "learning_rate": 4.175668694232212e-05,
      "loss": 0.9701,
      "step": 212610
    },
    {
      "epoch": 0.745180091614183,
      "grad_norm": 3.21875,
      "learning_rate": 4.175603791365842e-05,
      "loss": 0.9166,
      "step": 212620
    },
    {
      "epoch": 0.7452151391210786,
      "grad_norm": 2.984375,
      "learning_rate": 4.175538888499472e-05,
      "loss": 0.854,
      "step": 212630
    },
    {
      "epoch": 0.7452501866279743,
      "grad_norm": 2.890625,
      "learning_rate": 4.175473985633101e-05,
      "loss": 0.8739,
      "step": 212640
    },
    {
      "epoch": 0.7452852341348698,
      "grad_norm": 3.296875,
      "learning_rate": 4.1754090827667314e-05,
      "loss": 0.9001,
      "step": 212650
    },
    {
      "epoch": 0.7453202816417654,
      "grad_norm": 3.140625,
      "learning_rate": 4.175344179900361e-05,
      "loss": 0.9662,
      "step": 212660
    },
    {
      "epoch": 0.745355329148661,
      "grad_norm": 3.203125,
      "learning_rate": 4.175279277033991e-05,
      "loss": 0.8118,
      "step": 212670
    },
    {
      "epoch": 0.7453903766555566,
      "grad_norm": 3.953125,
      "learning_rate": 4.175214374167621e-05,
      "loss": 0.9275,
      "step": 212680
    },
    {
      "epoch": 0.7454254241624522,
      "grad_norm": 3.0625,
      "learning_rate": 4.1751494713012506e-05,
      "loss": 0.8827,
      "step": 212690
    },
    {
      "epoch": 0.7454604716693478,
      "grad_norm": 3.09375,
      "learning_rate": 4.175084568434881e-05,
      "loss": 0.8912,
      "step": 212700
    },
    {
      "epoch": 0.7454955191762433,
      "grad_norm": 2.890625,
      "learning_rate": 4.17501966556851e-05,
      "loss": 0.9593,
      "step": 212710
    },
    {
      "epoch": 0.745530566683139,
      "grad_norm": 2.703125,
      "learning_rate": 4.1749547627021404e-05,
      "loss": 0.8091,
      "step": 212720
    },
    {
      "epoch": 0.7455656141900346,
      "grad_norm": 3.046875,
      "learning_rate": 4.17488985983577e-05,
      "loss": 0.8303,
      "step": 212730
    },
    {
      "epoch": 0.7456006616969302,
      "grad_norm": 2.734375,
      "learning_rate": 4.1748249569694e-05,
      "loss": 0.9082,
      "step": 212740
    },
    {
      "epoch": 0.7456357092038258,
      "grad_norm": 3.234375,
      "learning_rate": 4.1747600541030294e-05,
      "loss": 0.8283,
      "step": 212750
    },
    {
      "epoch": 0.7456707567107214,
      "grad_norm": 3.015625,
      "learning_rate": 4.1746951512366596e-05,
      "loss": 0.8974,
      "step": 212760
    },
    {
      "epoch": 0.745705804217617,
      "grad_norm": 2.546875,
      "learning_rate": 4.174630248370289e-05,
      "loss": 0.9229,
      "step": 212770
    },
    {
      "epoch": 0.7457408517245125,
      "grad_norm": 2.828125,
      "learning_rate": 4.174565345503919e-05,
      "loss": 0.9027,
      "step": 212780
    },
    {
      "epoch": 0.7457758992314082,
      "grad_norm": 2.984375,
      "learning_rate": 4.1745004426375486e-05,
      "loss": 0.8883,
      "step": 212790
    },
    {
      "epoch": 0.7458109467383037,
      "grad_norm": 3.375,
      "learning_rate": 4.174435539771179e-05,
      "loss": 0.9337,
      "step": 212800
    },
    {
      "epoch": 0.7458459942451994,
      "grad_norm": 2.890625,
      "learning_rate": 4.174370636904809e-05,
      "loss": 0.8138,
      "step": 212810
    },
    {
      "epoch": 0.745881041752095,
      "grad_norm": 3.109375,
      "learning_rate": 4.1743057340384384e-05,
      "loss": 0.8713,
      "step": 212820
    },
    {
      "epoch": 0.7459160892589906,
      "grad_norm": 3.03125,
      "learning_rate": 4.1742408311720685e-05,
      "loss": 0.9014,
      "step": 212830
    },
    {
      "epoch": 0.7459511367658862,
      "grad_norm": 3.0,
      "learning_rate": 4.174175928305698e-05,
      "loss": 0.8545,
      "step": 212840
    },
    {
      "epoch": 0.7459861842727817,
      "grad_norm": 2.921875,
      "learning_rate": 4.174111025439328e-05,
      "loss": 0.9612,
      "step": 212850
    },
    {
      "epoch": 0.7460212317796774,
      "grad_norm": 3.125,
      "learning_rate": 4.1740461225729576e-05,
      "loss": 0.9925,
      "step": 212860
    },
    {
      "epoch": 0.7460562792865729,
      "grad_norm": 2.40625,
      "learning_rate": 4.173981219706588e-05,
      "loss": 0.8951,
      "step": 212870
    },
    {
      "epoch": 0.7460913267934686,
      "grad_norm": 3.0,
      "learning_rate": 4.173916316840217e-05,
      "loss": 0.9129,
      "step": 212880
    },
    {
      "epoch": 0.7461263743003641,
      "grad_norm": 2.59375,
      "learning_rate": 4.173851413973847e-05,
      "loss": 0.8387,
      "step": 212890
    },
    {
      "epoch": 0.7461614218072598,
      "grad_norm": 3.046875,
      "learning_rate": 4.173786511107477e-05,
      "loss": 0.8953,
      "step": 212900
    },
    {
      "epoch": 0.7461964693141553,
      "grad_norm": 2.796875,
      "learning_rate": 4.173721608241107e-05,
      "loss": 0.9109,
      "step": 212910
    },
    {
      "epoch": 0.7462315168210509,
      "grad_norm": 2.703125,
      "learning_rate": 4.1736567053747364e-05,
      "loss": 0.8849,
      "step": 212920
    },
    {
      "epoch": 0.7462665643279466,
      "grad_norm": 3.28125,
      "learning_rate": 4.1735918025083665e-05,
      "loss": 0.8846,
      "step": 212930
    },
    {
      "epoch": 0.7463016118348421,
      "grad_norm": 2.984375,
      "learning_rate": 4.173526899641996e-05,
      "loss": 0.9968,
      "step": 212940
    },
    {
      "epoch": 0.7463366593417378,
      "grad_norm": 2.4375,
      "learning_rate": 4.1734619967756254e-05,
      "loss": 0.879,
      "step": 212950
    },
    {
      "epoch": 0.7463717068486333,
      "grad_norm": 2.765625,
      "learning_rate": 4.1733970939092556e-05,
      "loss": 0.8638,
      "step": 212960
    },
    {
      "epoch": 0.746406754355529,
      "grad_norm": 3.234375,
      "learning_rate": 4.173332191042885e-05,
      "loss": 0.8976,
      "step": 212970
    },
    {
      "epoch": 0.7464418018624245,
      "grad_norm": 3.140625,
      "learning_rate": 4.173267288176515e-05,
      "loss": 0.8546,
      "step": 212980
    },
    {
      "epoch": 0.7464768493693201,
      "grad_norm": 3.234375,
      "learning_rate": 4.1732023853101446e-05,
      "loss": 0.8906,
      "step": 212990
    },
    {
      "epoch": 0.7465118968762157,
      "grad_norm": 3.109375,
      "learning_rate": 4.173137482443775e-05,
      "loss": 0.8554,
      "step": 213000
    },
    {
      "epoch": 0.7465469443831113,
      "grad_norm": 3.109375,
      "learning_rate": 4.173072579577404e-05,
      "loss": 0.8676,
      "step": 213010
    },
    {
      "epoch": 0.746581991890007,
      "grad_norm": 2.609375,
      "learning_rate": 4.1730076767110344e-05,
      "loss": 0.911,
      "step": 213020
    },
    {
      "epoch": 0.7466170393969025,
      "grad_norm": 3.09375,
      "learning_rate": 4.172942773844664e-05,
      "loss": 0.9396,
      "step": 213030
    },
    {
      "epoch": 0.7466520869037981,
      "grad_norm": 2.375,
      "learning_rate": 4.172877870978294e-05,
      "loss": 0.9236,
      "step": 213040
    },
    {
      "epoch": 0.7466871344106937,
      "grad_norm": 2.375,
      "learning_rate": 4.172812968111924e-05,
      "loss": 0.9274,
      "step": 213050
    },
    {
      "epoch": 0.7467221819175893,
      "grad_norm": 3.484375,
      "learning_rate": 4.1727480652455536e-05,
      "loss": 0.8707,
      "step": 213060
    },
    {
      "epoch": 0.7467572294244849,
      "grad_norm": 2.78125,
      "learning_rate": 4.172683162379184e-05,
      "loss": 0.8493,
      "step": 213070
    },
    {
      "epoch": 0.7467922769313805,
      "grad_norm": 2.96875,
      "learning_rate": 4.172618259512813e-05,
      "loss": 0.8818,
      "step": 213080
    },
    {
      "epoch": 0.746827324438276,
      "grad_norm": 2.96875,
      "learning_rate": 4.172553356646443e-05,
      "loss": 0.8751,
      "step": 213090
    },
    {
      "epoch": 0.7468623719451717,
      "grad_norm": 3.0,
      "learning_rate": 4.172488453780073e-05,
      "loss": 0.9233,
      "step": 213100
    },
    {
      "epoch": 0.7468974194520672,
      "grad_norm": 2.796875,
      "learning_rate": 4.172423550913703e-05,
      "loss": 0.8479,
      "step": 213110
    },
    {
      "epoch": 0.7469324669589629,
      "grad_norm": 2.734375,
      "learning_rate": 4.1723586480473324e-05,
      "loss": 0.8424,
      "step": 213120
    },
    {
      "epoch": 0.7469675144658585,
      "grad_norm": 2.609375,
      "learning_rate": 4.1722937451809625e-05,
      "loss": 0.8972,
      "step": 213130
    },
    {
      "epoch": 0.747002561972754,
      "grad_norm": 2.859375,
      "learning_rate": 4.172228842314592e-05,
      "loss": 0.8582,
      "step": 213140
    },
    {
      "epoch": 0.7470376094796497,
      "grad_norm": 3.390625,
      "learning_rate": 4.172163939448222e-05,
      "loss": 0.9058,
      "step": 213150
    },
    {
      "epoch": 0.7470726569865452,
      "grad_norm": 2.921875,
      "learning_rate": 4.1720990365818516e-05,
      "loss": 0.9051,
      "step": 213160
    },
    {
      "epoch": 0.7471077044934409,
      "grad_norm": 3.09375,
      "learning_rate": 4.172034133715482e-05,
      "loss": 0.8884,
      "step": 213170
    },
    {
      "epoch": 0.7471427520003364,
      "grad_norm": 2.546875,
      "learning_rate": 4.171969230849112e-05,
      "loss": 0.8367,
      "step": 213180
    },
    {
      "epoch": 0.7471777995072321,
      "grad_norm": 3.015625,
      "learning_rate": 4.171904327982741e-05,
      "loss": 0.8883,
      "step": 213190
    },
    {
      "epoch": 0.7472128470141276,
      "grad_norm": 3.21875,
      "learning_rate": 4.1718394251163714e-05,
      "loss": 0.8784,
      "step": 213200
    },
    {
      "epoch": 0.7472478945210232,
      "grad_norm": 2.71875,
      "learning_rate": 4.171774522250001e-05,
      "loss": 0.8939,
      "step": 213210
    },
    {
      "epoch": 0.7472829420279189,
      "grad_norm": 2.859375,
      "learning_rate": 4.171709619383631e-05,
      "loss": 0.9098,
      "step": 213220
    },
    {
      "epoch": 0.7473179895348144,
      "grad_norm": 3.046875,
      "learning_rate": 4.1716447165172605e-05,
      "loss": 0.9393,
      "step": 213230
    },
    {
      "epoch": 0.7473530370417101,
      "grad_norm": 3.671875,
      "learning_rate": 4.1715798136508906e-05,
      "loss": 0.8556,
      "step": 213240
    },
    {
      "epoch": 0.7473880845486056,
      "grad_norm": 2.5625,
      "learning_rate": 4.17151491078452e-05,
      "loss": 0.852,
      "step": 213250
    },
    {
      "epoch": 0.7474231320555013,
      "grad_norm": 3.234375,
      "learning_rate": 4.17145000791815e-05,
      "loss": 0.8019,
      "step": 213260
    },
    {
      "epoch": 0.7474581795623968,
      "grad_norm": 2.828125,
      "learning_rate": 4.17138510505178e-05,
      "loss": 0.869,
      "step": 213270
    },
    {
      "epoch": 0.7474932270692924,
      "grad_norm": 2.859375,
      "learning_rate": 4.17132020218541e-05,
      "loss": 0.8232,
      "step": 213280
    },
    {
      "epoch": 0.747528274576188,
      "grad_norm": 2.78125,
      "learning_rate": 4.171255299319039e-05,
      "loss": 0.8592,
      "step": 213290
    },
    {
      "epoch": 0.7475633220830836,
      "grad_norm": 2.75,
      "learning_rate": 4.1711903964526694e-05,
      "loss": 0.8535,
      "step": 213300
    },
    {
      "epoch": 0.7475983695899792,
      "grad_norm": 2.734375,
      "learning_rate": 4.171125493586299e-05,
      "loss": 0.9038,
      "step": 213310
    },
    {
      "epoch": 0.7476334170968748,
      "grad_norm": 2.90625,
      "learning_rate": 4.1710605907199284e-05,
      "loss": 0.9592,
      "step": 213320
    },
    {
      "epoch": 0.7476684646037705,
      "grad_norm": 2.640625,
      "learning_rate": 4.1709956878535585e-05,
      "loss": 0.9175,
      "step": 213330
    },
    {
      "epoch": 0.747703512110666,
      "grad_norm": 2.890625,
      "learning_rate": 4.170930784987188e-05,
      "loss": 0.8629,
      "step": 213340
    },
    {
      "epoch": 0.7477385596175616,
      "grad_norm": 3.15625,
      "learning_rate": 4.170865882120818e-05,
      "loss": 0.896,
      "step": 213350
    },
    {
      "epoch": 0.7477736071244572,
      "grad_norm": 2.984375,
      "learning_rate": 4.1708009792544476e-05,
      "loss": 0.8412,
      "step": 213360
    },
    {
      "epoch": 0.7478086546313528,
      "grad_norm": 3.453125,
      "learning_rate": 4.170736076388078e-05,
      "loss": 0.935,
      "step": 213370
    },
    {
      "epoch": 0.7478437021382484,
      "grad_norm": 2.828125,
      "learning_rate": 4.170671173521707e-05,
      "loss": 0.8802,
      "step": 213380
    },
    {
      "epoch": 0.747878749645144,
      "grad_norm": 2.78125,
      "learning_rate": 4.170606270655337e-05,
      "loss": 0.8542,
      "step": 213390
    },
    {
      "epoch": 0.7479137971520395,
      "grad_norm": 2.609375,
      "learning_rate": 4.170541367788967e-05,
      "loss": 0.9005,
      "step": 213400
    },
    {
      "epoch": 0.7479488446589352,
      "grad_norm": 2.890625,
      "learning_rate": 4.170476464922597e-05,
      "loss": 0.9361,
      "step": 213410
    },
    {
      "epoch": 0.7479838921658308,
      "grad_norm": 2.984375,
      "learning_rate": 4.170411562056227e-05,
      "loss": 0.8904,
      "step": 213420
    },
    {
      "epoch": 0.7480189396727264,
      "grad_norm": 2.890625,
      "learning_rate": 4.1703466591898565e-05,
      "loss": 0.9284,
      "step": 213430
    },
    {
      "epoch": 0.748053987179622,
      "grad_norm": 2.65625,
      "learning_rate": 4.1702817563234866e-05,
      "loss": 0.9215,
      "step": 213440
    },
    {
      "epoch": 0.7480890346865176,
      "grad_norm": 3.0625,
      "learning_rate": 4.170216853457116e-05,
      "loss": 0.8783,
      "step": 213450
    },
    {
      "epoch": 0.7481240821934132,
      "grad_norm": 3.015625,
      "learning_rate": 4.170151950590746e-05,
      "loss": 0.994,
      "step": 213460
    },
    {
      "epoch": 0.7481591297003087,
      "grad_norm": 2.953125,
      "learning_rate": 4.170087047724376e-05,
      "loss": 0.879,
      "step": 213470
    },
    {
      "epoch": 0.7481941772072044,
      "grad_norm": 2.609375,
      "learning_rate": 4.170022144858006e-05,
      "loss": 0.9148,
      "step": 213480
    },
    {
      "epoch": 0.7482292247140999,
      "grad_norm": 3.046875,
      "learning_rate": 4.169957241991635e-05,
      "loss": 0.8412,
      "step": 213490
    },
    {
      "epoch": 0.7482642722209956,
      "grad_norm": 3.109375,
      "learning_rate": 4.1698923391252654e-05,
      "loss": 0.8777,
      "step": 213500
    },
    {
      "epoch": 0.7482993197278912,
      "grad_norm": 3.0625,
      "learning_rate": 4.169827436258895e-05,
      "loss": 0.91,
      "step": 213510
    },
    {
      "epoch": 0.7483343672347867,
      "grad_norm": 2.578125,
      "learning_rate": 4.169762533392525e-05,
      "loss": 0.8856,
      "step": 213520
    },
    {
      "epoch": 0.7483694147416824,
      "grad_norm": 3.125,
      "learning_rate": 4.1696976305261545e-05,
      "loss": 0.8491,
      "step": 213530
    },
    {
      "epoch": 0.7484044622485779,
      "grad_norm": 2.796875,
      "learning_rate": 4.1696327276597846e-05,
      "loss": 0.8691,
      "step": 213540
    },
    {
      "epoch": 0.7484395097554736,
      "grad_norm": 2.859375,
      "learning_rate": 4.169567824793415e-05,
      "loss": 0.8599,
      "step": 213550
    },
    {
      "epoch": 0.7484745572623691,
      "grad_norm": 2.96875,
      "learning_rate": 4.169502921927044e-05,
      "loss": 0.8172,
      "step": 213560
    },
    {
      "epoch": 0.7485096047692648,
      "grad_norm": 2.90625,
      "learning_rate": 4.1694380190606744e-05,
      "loss": 0.8652,
      "step": 213570
    },
    {
      "epoch": 0.7485446522761603,
      "grad_norm": 3.390625,
      "learning_rate": 4.169373116194304e-05,
      "loss": 0.9106,
      "step": 213580
    },
    {
      "epoch": 0.7485796997830559,
      "grad_norm": 2.234375,
      "learning_rate": 4.169308213327934e-05,
      "loss": 0.8838,
      "step": 213590
    },
    {
      "epoch": 0.7486147472899515,
      "grad_norm": 2.953125,
      "learning_rate": 4.1692433104615634e-05,
      "loss": 0.867,
      "step": 213600
    },
    {
      "epoch": 0.7486497947968471,
      "grad_norm": 3.265625,
      "learning_rate": 4.1691784075951936e-05,
      "loss": 0.909,
      "step": 213610
    },
    {
      "epoch": 0.7486848423037428,
      "grad_norm": 2.875,
      "learning_rate": 4.169113504728823e-05,
      "loss": 0.8135,
      "step": 213620
    },
    {
      "epoch": 0.7487198898106383,
      "grad_norm": 2.96875,
      "learning_rate": 4.169048601862453e-05,
      "loss": 0.9012,
      "step": 213630
    },
    {
      "epoch": 0.748754937317534,
      "grad_norm": 3.40625,
      "learning_rate": 4.1689836989960826e-05,
      "loss": 0.8611,
      "step": 213640
    },
    {
      "epoch": 0.7487899848244295,
      "grad_norm": 2.78125,
      "learning_rate": 4.168918796129713e-05,
      "loss": 0.9224,
      "step": 213650
    },
    {
      "epoch": 0.7488250323313251,
      "grad_norm": 2.6875,
      "learning_rate": 4.168853893263342e-05,
      "loss": 0.8139,
      "step": 213660
    },
    {
      "epoch": 0.7488600798382207,
      "grad_norm": 2.953125,
      "learning_rate": 4.1687889903969724e-05,
      "loss": 0.8548,
      "step": 213670
    },
    {
      "epoch": 0.7488951273451163,
      "grad_norm": 2.890625,
      "learning_rate": 4.1687240875306025e-05,
      "loss": 0.9806,
      "step": 213680
    },
    {
      "epoch": 0.7489301748520119,
      "grad_norm": 2.59375,
      "learning_rate": 4.168659184664231e-05,
      "loss": 0.8733,
      "step": 213690
    },
    {
      "epoch": 0.7489652223589075,
      "grad_norm": 3.21875,
      "learning_rate": 4.1685942817978614e-05,
      "loss": 0.9452,
      "step": 213700
    },
    {
      "epoch": 0.7490002698658031,
      "grad_norm": 3.0625,
      "learning_rate": 4.168529378931491e-05,
      "loss": 0.825,
      "step": 213710
    },
    {
      "epoch": 0.7490353173726987,
      "grad_norm": 2.8125,
      "learning_rate": 4.168464476065121e-05,
      "loss": 0.9382,
      "step": 213720
    },
    {
      "epoch": 0.7490703648795943,
      "grad_norm": 2.71875,
      "learning_rate": 4.1683995731987505e-05,
      "loss": 0.825,
      "step": 213730
    },
    {
      "epoch": 0.7491054123864899,
      "grad_norm": 3.578125,
      "learning_rate": 4.1683346703323806e-05,
      "loss": 0.9293,
      "step": 213740
    },
    {
      "epoch": 0.7491404598933855,
      "grad_norm": 2.875,
      "learning_rate": 4.16826976746601e-05,
      "loss": 0.9293,
      "step": 213750
    },
    {
      "epoch": 0.749175507400281,
      "grad_norm": 2.625,
      "learning_rate": 4.16820486459964e-05,
      "loss": 0.8271,
      "step": 213760
    },
    {
      "epoch": 0.7492105549071767,
      "grad_norm": 2.609375,
      "learning_rate": 4.1681399617332704e-05,
      "loss": 0.8275,
      "step": 213770
    },
    {
      "epoch": 0.7492456024140722,
      "grad_norm": 3.265625,
      "learning_rate": 4.1680750588669e-05,
      "loss": 0.9053,
      "step": 213780
    },
    {
      "epoch": 0.7492806499209679,
      "grad_norm": 3.015625,
      "learning_rate": 4.16801015600053e-05,
      "loss": 0.8753,
      "step": 213790
    },
    {
      "epoch": 0.7493156974278634,
      "grad_norm": 3.4375,
      "learning_rate": 4.1679452531341594e-05,
      "loss": 0.9481,
      "step": 213800
    },
    {
      "epoch": 0.7493507449347591,
      "grad_norm": 2.71875,
      "learning_rate": 4.1678803502677896e-05,
      "loss": 0.9659,
      "step": 213810
    },
    {
      "epoch": 0.7493857924416547,
      "grad_norm": 2.703125,
      "learning_rate": 4.167815447401419e-05,
      "loss": 0.9474,
      "step": 213820
    },
    {
      "epoch": 0.7494208399485502,
      "grad_norm": 2.875,
      "learning_rate": 4.167750544535049e-05,
      "loss": 0.9712,
      "step": 213830
    },
    {
      "epoch": 0.7494558874554459,
      "grad_norm": 2.84375,
      "learning_rate": 4.1676856416686786e-05,
      "loss": 0.9052,
      "step": 213840
    },
    {
      "epoch": 0.7494909349623414,
      "grad_norm": 3.453125,
      "learning_rate": 4.167620738802309e-05,
      "loss": 0.9084,
      "step": 213850
    },
    {
      "epoch": 0.7495259824692371,
      "grad_norm": 3.34375,
      "learning_rate": 4.167555835935938e-05,
      "loss": 0.9167,
      "step": 213860
    },
    {
      "epoch": 0.7495610299761326,
      "grad_norm": 2.921875,
      "learning_rate": 4.1674909330695684e-05,
      "loss": 0.8747,
      "step": 213870
    },
    {
      "epoch": 0.7495960774830283,
      "grad_norm": 3.359375,
      "learning_rate": 4.167426030203198e-05,
      "loss": 0.9452,
      "step": 213880
    },
    {
      "epoch": 0.7496311249899238,
      "grad_norm": 2.84375,
      "learning_rate": 4.167361127336828e-05,
      "loss": 0.9568,
      "step": 213890
    },
    {
      "epoch": 0.7496661724968194,
      "grad_norm": 2.984375,
      "learning_rate": 4.1672962244704574e-05,
      "loss": 0.805,
      "step": 213900
    },
    {
      "epoch": 0.7497012200037151,
      "grad_norm": 3.015625,
      "learning_rate": 4.1672313216040876e-05,
      "loss": 0.9248,
      "step": 213910
    },
    {
      "epoch": 0.7497362675106106,
      "grad_norm": 3.078125,
      "learning_rate": 4.167166418737718e-05,
      "loss": 0.9048,
      "step": 213920
    },
    {
      "epoch": 0.7497713150175063,
      "grad_norm": 3.03125,
      "learning_rate": 4.167101515871347e-05,
      "loss": 0.9155,
      "step": 213930
    },
    {
      "epoch": 0.7498063625244018,
      "grad_norm": 2.9375,
      "learning_rate": 4.167036613004977e-05,
      "loss": 0.8961,
      "step": 213940
    },
    {
      "epoch": 0.7498414100312975,
      "grad_norm": 2.9375,
      "learning_rate": 4.166971710138607e-05,
      "loss": 0.9567,
      "step": 213950
    },
    {
      "epoch": 0.749876457538193,
      "grad_norm": 2.828125,
      "learning_rate": 4.166906807272237e-05,
      "loss": 0.8697,
      "step": 213960
    },
    {
      "epoch": 0.7499115050450886,
      "grad_norm": 3.078125,
      "learning_rate": 4.1668419044058664e-05,
      "loss": 0.8401,
      "step": 213970
    },
    {
      "epoch": 0.7499465525519842,
      "grad_norm": 3.328125,
      "learning_rate": 4.1667770015394965e-05,
      "loss": 0.9071,
      "step": 213980
    },
    {
      "epoch": 0.7499816000588798,
      "grad_norm": 2.4375,
      "learning_rate": 4.166712098673126e-05,
      "loss": 0.9026,
      "step": 213990
    },
    {
      "epoch": 0.7500166475657755,
      "grad_norm": 2.859375,
      "learning_rate": 4.166647195806756e-05,
      "loss": 0.9391,
      "step": 214000
    },
    {
      "epoch": 0.750051695072671,
      "grad_norm": 3.03125,
      "learning_rate": 4.1665822929403856e-05,
      "loss": 0.9081,
      "step": 214010
    },
    {
      "epoch": 0.7500867425795666,
      "grad_norm": 2.875,
      "learning_rate": 4.166517390074016e-05,
      "loss": 0.8753,
      "step": 214020
    },
    {
      "epoch": 0.7501217900864622,
      "grad_norm": 2.359375,
      "learning_rate": 4.166452487207645e-05,
      "loss": 0.8171,
      "step": 214030
    },
    {
      "epoch": 0.7501568375933578,
      "grad_norm": 2.890625,
      "learning_rate": 4.166387584341275e-05,
      "loss": 0.9238,
      "step": 214040
    },
    {
      "epoch": 0.7501918851002534,
      "grad_norm": 2.6875,
      "learning_rate": 4.1663226814749055e-05,
      "loss": 0.8902,
      "step": 214050
    },
    {
      "epoch": 0.750226932607149,
      "grad_norm": 3.140625,
      "learning_rate": 4.166257778608534e-05,
      "loss": 0.9503,
      "step": 214060
    },
    {
      "epoch": 0.7502619801140445,
      "grad_norm": 3.25,
      "learning_rate": 4.1661928757421644e-05,
      "loss": 0.9122,
      "step": 214070
    },
    {
      "epoch": 0.7502970276209402,
      "grad_norm": 2.921875,
      "learning_rate": 4.166127972875794e-05,
      "loss": 0.9646,
      "step": 214080
    },
    {
      "epoch": 0.7503320751278357,
      "grad_norm": 2.6875,
      "learning_rate": 4.166063070009424e-05,
      "loss": 0.8585,
      "step": 214090
    },
    {
      "epoch": 0.7503671226347314,
      "grad_norm": 3.1875,
      "learning_rate": 4.1659981671430534e-05,
      "loss": 0.8807,
      "step": 214100
    },
    {
      "epoch": 0.750402170141627,
      "grad_norm": 3.03125,
      "learning_rate": 4.1659332642766836e-05,
      "loss": 0.8683,
      "step": 214110
    },
    {
      "epoch": 0.7504372176485226,
      "grad_norm": 2.96875,
      "learning_rate": 4.165868361410313e-05,
      "loss": 0.8688,
      "step": 214120
    },
    {
      "epoch": 0.7504722651554182,
      "grad_norm": 2.796875,
      "learning_rate": 4.165803458543943e-05,
      "loss": 0.8472,
      "step": 214130
    },
    {
      "epoch": 0.7505073126623137,
      "grad_norm": 3.0,
      "learning_rate": 4.165738555677573e-05,
      "loss": 0.8324,
      "step": 214140
    },
    {
      "epoch": 0.7505423601692094,
      "grad_norm": 3.046875,
      "learning_rate": 4.165673652811203e-05,
      "loss": 0.9311,
      "step": 214150
    },
    {
      "epoch": 0.7505774076761049,
      "grad_norm": 3.140625,
      "learning_rate": 4.165608749944833e-05,
      "loss": 0.8706,
      "step": 214160
    },
    {
      "epoch": 0.7506124551830006,
      "grad_norm": 2.40625,
      "learning_rate": 4.1655438470784624e-05,
      "loss": 0.888,
      "step": 214170
    },
    {
      "epoch": 0.7506475026898961,
      "grad_norm": 2.484375,
      "learning_rate": 4.1654789442120925e-05,
      "loss": 0.8978,
      "step": 214180
    },
    {
      "epoch": 0.7506825501967918,
      "grad_norm": 3.03125,
      "learning_rate": 4.165414041345722e-05,
      "loss": 0.9021,
      "step": 214190
    },
    {
      "epoch": 0.7507175977036874,
      "grad_norm": 2.984375,
      "learning_rate": 4.165349138479352e-05,
      "loss": 0.9867,
      "step": 214200
    },
    {
      "epoch": 0.7507526452105829,
      "grad_norm": 3.34375,
      "learning_rate": 4.1652842356129816e-05,
      "loss": 0.9222,
      "step": 214210
    },
    {
      "epoch": 0.7507876927174786,
      "grad_norm": 2.703125,
      "learning_rate": 4.165219332746612e-05,
      "loss": 0.9206,
      "step": 214220
    },
    {
      "epoch": 0.7508227402243741,
      "grad_norm": 2.796875,
      "learning_rate": 4.165154429880241e-05,
      "loss": 0.9025,
      "step": 214230
    },
    {
      "epoch": 0.7508577877312698,
      "grad_norm": 2.640625,
      "learning_rate": 4.165089527013871e-05,
      "loss": 0.8864,
      "step": 214240
    },
    {
      "epoch": 0.7508928352381653,
      "grad_norm": 3.015625,
      "learning_rate": 4.165024624147501e-05,
      "loss": 0.8657,
      "step": 214250
    },
    {
      "epoch": 0.750927882745061,
      "grad_norm": 3.359375,
      "learning_rate": 4.164959721281131e-05,
      "loss": 0.8433,
      "step": 214260
    },
    {
      "epoch": 0.7509629302519565,
      "grad_norm": 3.015625,
      "learning_rate": 4.1648948184147604e-05,
      "loss": 0.8916,
      "step": 214270
    },
    {
      "epoch": 0.7509979777588521,
      "grad_norm": 3.09375,
      "learning_rate": 4.1648299155483905e-05,
      "loss": 0.8947,
      "step": 214280
    },
    {
      "epoch": 0.7510330252657477,
      "grad_norm": 3.078125,
      "learning_rate": 4.1647650126820206e-05,
      "loss": 0.8955,
      "step": 214290
    },
    {
      "epoch": 0.7510680727726433,
      "grad_norm": 3.1875,
      "learning_rate": 4.16470010981565e-05,
      "loss": 0.8646,
      "step": 214300
    },
    {
      "epoch": 0.751103120279539,
      "grad_norm": 3.21875,
      "learning_rate": 4.16463520694928e-05,
      "loss": 0.9657,
      "step": 214310
    },
    {
      "epoch": 0.7511381677864345,
      "grad_norm": 3.046875,
      "learning_rate": 4.16457030408291e-05,
      "loss": 0.9193,
      "step": 214320
    },
    {
      "epoch": 0.7511732152933301,
      "grad_norm": 2.90625,
      "learning_rate": 4.16450540121654e-05,
      "loss": 0.9184,
      "step": 214330
    },
    {
      "epoch": 0.7512082628002257,
      "grad_norm": 2.78125,
      "learning_rate": 4.164440498350169e-05,
      "loss": 0.8904,
      "step": 214340
    },
    {
      "epoch": 0.7512433103071213,
      "grad_norm": 2.5,
      "learning_rate": 4.1643755954837994e-05,
      "loss": 0.8448,
      "step": 214350
    },
    {
      "epoch": 0.7512783578140169,
      "grad_norm": 3.078125,
      "learning_rate": 4.164310692617429e-05,
      "loss": 0.8239,
      "step": 214360
    },
    {
      "epoch": 0.7513134053209125,
      "grad_norm": 3.078125,
      "learning_rate": 4.164245789751059e-05,
      "loss": 0.8876,
      "step": 214370
    },
    {
      "epoch": 0.751348452827808,
      "grad_norm": 2.546875,
      "learning_rate": 4.1641808868846885e-05,
      "loss": 0.8344,
      "step": 214380
    },
    {
      "epoch": 0.7513835003347037,
      "grad_norm": 3.171875,
      "learning_rate": 4.1641159840183186e-05,
      "loss": 0.8799,
      "step": 214390
    },
    {
      "epoch": 0.7514185478415993,
      "grad_norm": 3.265625,
      "learning_rate": 4.164051081151948e-05,
      "loss": 0.8392,
      "step": 214400
    },
    {
      "epoch": 0.7514535953484949,
      "grad_norm": 3.015625,
      "learning_rate": 4.163986178285578e-05,
      "loss": 0.8186,
      "step": 214410
    },
    {
      "epoch": 0.7514886428553905,
      "grad_norm": 2.625,
      "learning_rate": 4.1639212754192084e-05,
      "loss": 0.9615,
      "step": 214420
    },
    {
      "epoch": 0.7515236903622861,
      "grad_norm": 2.765625,
      "learning_rate": 4.163856372552838e-05,
      "loss": 0.8553,
      "step": 214430
    },
    {
      "epoch": 0.7515587378691817,
      "grad_norm": 3.15625,
      "learning_rate": 4.163791469686467e-05,
      "loss": 0.9023,
      "step": 214440
    },
    {
      "epoch": 0.7515937853760772,
      "grad_norm": 3.078125,
      "learning_rate": 4.163726566820097e-05,
      "loss": 0.9803,
      "step": 214450
    },
    {
      "epoch": 0.7516288328829729,
      "grad_norm": 2.765625,
      "learning_rate": 4.163661663953727e-05,
      "loss": 0.8319,
      "step": 214460
    },
    {
      "epoch": 0.7516638803898684,
      "grad_norm": 3.390625,
      "learning_rate": 4.1635967610873564e-05,
      "loss": 0.9388,
      "step": 214470
    },
    {
      "epoch": 0.7516989278967641,
      "grad_norm": 2.734375,
      "learning_rate": 4.1635318582209865e-05,
      "loss": 0.9461,
      "step": 214480
    },
    {
      "epoch": 0.7517339754036597,
      "grad_norm": 3.171875,
      "learning_rate": 4.163466955354616e-05,
      "loss": 0.9183,
      "step": 214490
    },
    {
      "epoch": 0.7517690229105553,
      "grad_norm": 2.109375,
      "learning_rate": 4.163402052488246e-05,
      "loss": 0.8102,
      "step": 214500
    },
    {
      "epoch": 0.7518040704174509,
      "grad_norm": 2.4375,
      "learning_rate": 4.163337149621876e-05,
      "loss": 0.8259,
      "step": 214510
    },
    {
      "epoch": 0.7518391179243464,
      "grad_norm": 2.9375,
      "learning_rate": 4.163272246755506e-05,
      "loss": 0.908,
      "step": 214520
    },
    {
      "epoch": 0.7518741654312421,
      "grad_norm": 2.859375,
      "learning_rate": 4.163207343889136e-05,
      "loss": 0.9391,
      "step": 214530
    },
    {
      "epoch": 0.7519092129381376,
      "grad_norm": 2.84375,
      "learning_rate": 4.163142441022765e-05,
      "loss": 0.8361,
      "step": 214540
    },
    {
      "epoch": 0.7519442604450333,
      "grad_norm": 3.15625,
      "learning_rate": 4.1630775381563954e-05,
      "loss": 0.9362,
      "step": 214550
    },
    {
      "epoch": 0.7519793079519288,
      "grad_norm": 3.59375,
      "learning_rate": 4.163012635290025e-05,
      "loss": 0.9304,
      "step": 214560
    },
    {
      "epoch": 0.7520143554588244,
      "grad_norm": 3.09375,
      "learning_rate": 4.162947732423655e-05,
      "loss": 0.8066,
      "step": 214570
    },
    {
      "epoch": 0.75204940296572,
      "grad_norm": 3.171875,
      "learning_rate": 4.1628828295572845e-05,
      "loss": 0.8723,
      "step": 214580
    },
    {
      "epoch": 0.7520844504726156,
      "grad_norm": 2.703125,
      "learning_rate": 4.1628179266909146e-05,
      "loss": 0.8535,
      "step": 214590
    },
    {
      "epoch": 0.7521194979795113,
      "grad_norm": 2.828125,
      "learning_rate": 4.162753023824544e-05,
      "loss": 0.9742,
      "step": 214600
    },
    {
      "epoch": 0.7521545454864068,
      "grad_norm": 3.046875,
      "learning_rate": 4.162688120958174e-05,
      "loss": 0.9437,
      "step": 214610
    },
    {
      "epoch": 0.7521895929933025,
      "grad_norm": 3.0625,
      "learning_rate": 4.162623218091804e-05,
      "loss": 0.909,
      "step": 214620
    },
    {
      "epoch": 0.752224640500198,
      "grad_norm": 3.015625,
      "learning_rate": 4.162558315225434e-05,
      "loss": 0.9242,
      "step": 214630
    },
    {
      "epoch": 0.7522596880070936,
      "grad_norm": 2.71875,
      "learning_rate": 4.162493412359063e-05,
      "loss": 0.8918,
      "step": 214640
    },
    {
      "epoch": 0.7522947355139892,
      "grad_norm": 2.890625,
      "learning_rate": 4.1624285094926934e-05,
      "loss": 0.869,
      "step": 214650
    },
    {
      "epoch": 0.7523297830208848,
      "grad_norm": 2.953125,
      "learning_rate": 4.1623636066263236e-05,
      "loss": 0.8512,
      "step": 214660
    },
    {
      "epoch": 0.7523648305277804,
      "grad_norm": 3.046875,
      "learning_rate": 4.162298703759953e-05,
      "loss": 0.9873,
      "step": 214670
    },
    {
      "epoch": 0.752399878034676,
      "grad_norm": 2.96875,
      "learning_rate": 4.162233800893583e-05,
      "loss": 0.965,
      "step": 214680
    },
    {
      "epoch": 0.7524349255415717,
      "grad_norm": 2.90625,
      "learning_rate": 4.1621688980272126e-05,
      "loss": 0.8962,
      "step": 214690
    },
    {
      "epoch": 0.7524699730484672,
      "grad_norm": 2.8125,
      "learning_rate": 4.162103995160843e-05,
      "loss": 0.951,
      "step": 214700
    },
    {
      "epoch": 0.7525050205553628,
      "grad_norm": 2.953125,
      "learning_rate": 4.162039092294472e-05,
      "loss": 0.8744,
      "step": 214710
    },
    {
      "epoch": 0.7525400680622584,
      "grad_norm": 2.90625,
      "learning_rate": 4.1619741894281024e-05,
      "loss": 0.8614,
      "step": 214720
    },
    {
      "epoch": 0.752575115569154,
      "grad_norm": 3.234375,
      "learning_rate": 4.161909286561732e-05,
      "loss": 0.8668,
      "step": 214730
    },
    {
      "epoch": 0.7526101630760496,
      "grad_norm": 2.9375,
      "learning_rate": 4.161844383695362e-05,
      "loss": 0.9629,
      "step": 214740
    },
    {
      "epoch": 0.7526452105829452,
      "grad_norm": 3.046875,
      "learning_rate": 4.1617794808289914e-05,
      "loss": 0.879,
      "step": 214750
    },
    {
      "epoch": 0.7526802580898407,
      "grad_norm": 3.53125,
      "learning_rate": 4.1617145779626216e-05,
      "loss": 0.9694,
      "step": 214760
    },
    {
      "epoch": 0.7527153055967364,
      "grad_norm": 3.015625,
      "learning_rate": 4.161649675096251e-05,
      "loss": 0.9488,
      "step": 214770
    },
    {
      "epoch": 0.7527503531036319,
      "grad_norm": 3.0625,
      "learning_rate": 4.161584772229881e-05,
      "loss": 0.9484,
      "step": 214780
    },
    {
      "epoch": 0.7527854006105276,
      "grad_norm": 2.609375,
      "learning_rate": 4.161519869363511e-05,
      "loss": 0.8995,
      "step": 214790
    },
    {
      "epoch": 0.7528204481174232,
      "grad_norm": 2.921875,
      "learning_rate": 4.161454966497141e-05,
      "loss": 0.899,
      "step": 214800
    },
    {
      "epoch": 0.7528554956243187,
      "grad_norm": 3.34375,
      "learning_rate": 4.161390063630771e-05,
      "loss": 0.9366,
      "step": 214810
    },
    {
      "epoch": 0.7528905431312144,
      "grad_norm": 2.484375,
      "learning_rate": 4.1613251607644e-05,
      "loss": 0.8457,
      "step": 214820
    },
    {
      "epoch": 0.7529255906381099,
      "grad_norm": 3.171875,
      "learning_rate": 4.16126025789803e-05,
      "loss": 0.9403,
      "step": 214830
    },
    {
      "epoch": 0.7529606381450056,
      "grad_norm": 3.34375,
      "learning_rate": 4.161195355031659e-05,
      "loss": 0.8844,
      "step": 214840
    },
    {
      "epoch": 0.7529956856519011,
      "grad_norm": 3.015625,
      "learning_rate": 4.1611304521652894e-05,
      "loss": 0.7637,
      "step": 214850
    },
    {
      "epoch": 0.7530307331587968,
      "grad_norm": 2.796875,
      "learning_rate": 4.161065549298919e-05,
      "loss": 0.862,
      "step": 214860
    },
    {
      "epoch": 0.7530657806656923,
      "grad_norm": 2.828125,
      "learning_rate": 4.161000646432549e-05,
      "loss": 0.9249,
      "step": 214870
    },
    {
      "epoch": 0.7531008281725879,
      "grad_norm": 2.828125,
      "learning_rate": 4.160935743566179e-05,
      "loss": 0.9904,
      "step": 214880
    },
    {
      "epoch": 0.7531358756794836,
      "grad_norm": 2.5625,
      "learning_rate": 4.1608708406998086e-05,
      "loss": 0.8749,
      "step": 214890
    },
    {
      "epoch": 0.7531709231863791,
      "grad_norm": 2.8125,
      "learning_rate": 4.160805937833439e-05,
      "loss": 0.9628,
      "step": 214900
    },
    {
      "epoch": 0.7532059706932748,
      "grad_norm": 3.15625,
      "learning_rate": 4.160741034967068e-05,
      "loss": 0.8597,
      "step": 214910
    },
    {
      "epoch": 0.7532410182001703,
      "grad_norm": 2.953125,
      "learning_rate": 4.1606761321006984e-05,
      "loss": 0.8733,
      "step": 214920
    },
    {
      "epoch": 0.753276065707066,
      "grad_norm": 3.140625,
      "learning_rate": 4.160611229234328e-05,
      "loss": 0.9086,
      "step": 214930
    },
    {
      "epoch": 0.7533111132139615,
      "grad_norm": 2.796875,
      "learning_rate": 4.160546326367958e-05,
      "loss": 0.8011,
      "step": 214940
    },
    {
      "epoch": 0.7533461607208571,
      "grad_norm": 2.546875,
      "learning_rate": 4.1604814235015874e-05,
      "loss": 0.8965,
      "step": 214950
    },
    {
      "epoch": 0.7533812082277527,
      "grad_norm": 2.71875,
      "learning_rate": 4.1604165206352176e-05,
      "loss": 0.9196,
      "step": 214960
    },
    {
      "epoch": 0.7534162557346483,
      "grad_norm": 3.234375,
      "learning_rate": 4.160351617768847e-05,
      "loss": 0.9252,
      "step": 214970
    },
    {
      "epoch": 0.753451303241544,
      "grad_norm": 3.0,
      "learning_rate": 4.160286714902477e-05,
      "loss": 0.8925,
      "step": 214980
    },
    {
      "epoch": 0.7534863507484395,
      "grad_norm": 3.078125,
      "learning_rate": 4.1602218120361066e-05,
      "loss": 0.9618,
      "step": 214990
    },
    {
      "epoch": 0.7535213982553352,
      "grad_norm": 2.84375,
      "learning_rate": 4.160156909169737e-05,
      "loss": 0.9212,
      "step": 215000
    },
    {
      "epoch": 0.7535213982553352,
      "eval_loss": 0.8424768447875977,
      "eval_runtime": 565.3267,
      "eval_samples_per_second": 672.949,
      "eval_steps_per_second": 56.079,
      "step": 215000
    },
    {
      "epoch": 0.7535564457622307,
      "grad_norm": 3.109375,
      "learning_rate": 4.160092006303367e-05,
      "loss": 0.8833,
      "step": 215010
    },
    {
      "epoch": 0.7535914932691263,
      "grad_norm": 3.15625,
      "learning_rate": 4.1600271034369964e-05,
      "loss": 0.8224,
      "step": 215020
    },
    {
      "epoch": 0.7536265407760219,
      "grad_norm": 3.0,
      "learning_rate": 4.1599622005706265e-05,
      "loss": 0.9365,
      "step": 215030
    },
    {
      "epoch": 0.7536615882829175,
      "grad_norm": 2.921875,
      "learning_rate": 4.159897297704256e-05,
      "loss": 0.9392,
      "step": 215040
    },
    {
      "epoch": 0.753696635789813,
      "grad_norm": 3.015625,
      "learning_rate": 4.159832394837886e-05,
      "loss": 0.9295,
      "step": 215050
    },
    {
      "epoch": 0.7537316832967087,
      "grad_norm": 2.953125,
      "learning_rate": 4.1597674919715156e-05,
      "loss": 0.8808,
      "step": 215060
    },
    {
      "epoch": 0.7537667308036042,
      "grad_norm": 3.515625,
      "learning_rate": 4.159702589105146e-05,
      "loss": 0.8113,
      "step": 215070
    },
    {
      "epoch": 0.7538017783104999,
      "grad_norm": 3.015625,
      "learning_rate": 4.159637686238775e-05,
      "loss": 0.8528,
      "step": 215080
    },
    {
      "epoch": 0.7538368258173955,
      "grad_norm": 2.8125,
      "learning_rate": 4.159572783372405e-05,
      "loss": 0.8414,
      "step": 215090
    },
    {
      "epoch": 0.7538718733242911,
      "grad_norm": 2.6875,
      "learning_rate": 4.159507880506035e-05,
      "loss": 0.8536,
      "step": 215100
    },
    {
      "epoch": 0.7539069208311867,
      "grad_norm": 3.296875,
      "learning_rate": 4.159442977639665e-05,
      "loss": 0.9061,
      "step": 215110
    },
    {
      "epoch": 0.7539419683380822,
      "grad_norm": 2.921875,
      "learning_rate": 4.1593780747732944e-05,
      "loss": 0.8959,
      "step": 215120
    },
    {
      "epoch": 0.7539770158449779,
      "grad_norm": 3.625,
      "learning_rate": 4.1593131719069245e-05,
      "loss": 0.9669,
      "step": 215130
    },
    {
      "epoch": 0.7540120633518734,
      "grad_norm": 2.765625,
      "learning_rate": 4.159248269040554e-05,
      "loss": 0.8705,
      "step": 215140
    },
    {
      "epoch": 0.7540471108587691,
      "grad_norm": 2.671875,
      "learning_rate": 4.159183366174184e-05,
      "loss": 0.899,
      "step": 215150
    },
    {
      "epoch": 0.7540821583656646,
      "grad_norm": 2.640625,
      "learning_rate": 4.159118463307814e-05,
      "loss": 0.8539,
      "step": 215160
    },
    {
      "epoch": 0.7541172058725603,
      "grad_norm": 3.15625,
      "learning_rate": 4.159053560441444e-05,
      "loss": 0.8972,
      "step": 215170
    },
    {
      "epoch": 0.7541522533794559,
      "grad_norm": 2.984375,
      "learning_rate": 4.158988657575074e-05,
      "loss": 0.8913,
      "step": 215180
    },
    {
      "epoch": 0.7541873008863514,
      "grad_norm": 3.015625,
      "learning_rate": 4.1589237547087026e-05,
      "loss": 0.9366,
      "step": 215190
    },
    {
      "epoch": 0.7542223483932471,
      "grad_norm": 2.921875,
      "learning_rate": 4.158858851842333e-05,
      "loss": 0.9122,
      "step": 215200
    },
    {
      "epoch": 0.7542573959001426,
      "grad_norm": 3.25,
      "learning_rate": 4.158793948975962e-05,
      "loss": 0.9176,
      "step": 215210
    },
    {
      "epoch": 0.7542924434070383,
      "grad_norm": 3.3125,
      "learning_rate": 4.1587290461095924e-05,
      "loss": 0.9434,
      "step": 215220
    },
    {
      "epoch": 0.7543274909139338,
      "grad_norm": 3.203125,
      "learning_rate": 4.158664143243222e-05,
      "loss": 0.8846,
      "step": 215230
    },
    {
      "epoch": 0.7543625384208295,
      "grad_norm": 2.671875,
      "learning_rate": 4.158599240376852e-05,
      "loss": 0.9557,
      "step": 215240
    },
    {
      "epoch": 0.754397585927725,
      "grad_norm": 3.03125,
      "learning_rate": 4.158534337510482e-05,
      "loss": 0.9454,
      "step": 215250
    },
    {
      "epoch": 0.7544326334346206,
      "grad_norm": 2.90625,
      "learning_rate": 4.1584694346441116e-05,
      "loss": 0.8412,
      "step": 215260
    },
    {
      "epoch": 0.7544676809415162,
      "grad_norm": 2.796875,
      "learning_rate": 4.158404531777742e-05,
      "loss": 0.8779,
      "step": 215270
    },
    {
      "epoch": 0.7545027284484118,
      "grad_norm": 2.71875,
      "learning_rate": 4.158339628911371e-05,
      "loss": 0.9133,
      "step": 215280
    },
    {
      "epoch": 0.7545377759553075,
      "grad_norm": 2.828125,
      "learning_rate": 4.158274726045001e-05,
      "loss": 0.8947,
      "step": 215290
    },
    {
      "epoch": 0.754572823462203,
      "grad_norm": 3.046875,
      "learning_rate": 4.158209823178631e-05,
      "loss": 0.9139,
      "step": 215300
    },
    {
      "epoch": 0.7546078709690986,
      "grad_norm": 2.5625,
      "learning_rate": 4.158144920312261e-05,
      "loss": 0.8375,
      "step": 215310
    },
    {
      "epoch": 0.7546429184759942,
      "grad_norm": 3.0,
      "learning_rate": 4.1580800174458904e-05,
      "loss": 0.9026,
      "step": 215320
    },
    {
      "epoch": 0.7546779659828898,
      "grad_norm": 3.359375,
      "learning_rate": 4.1580151145795205e-05,
      "loss": 0.8972,
      "step": 215330
    },
    {
      "epoch": 0.7547130134897854,
      "grad_norm": 4.59375,
      "learning_rate": 4.15795021171315e-05,
      "loss": 0.9099,
      "step": 215340
    },
    {
      "epoch": 0.754748060996681,
      "grad_norm": 2.859375,
      "learning_rate": 4.15788530884678e-05,
      "loss": 0.9054,
      "step": 215350
    },
    {
      "epoch": 0.7547831085035765,
      "grad_norm": 3.203125,
      "learning_rate": 4.1578204059804096e-05,
      "loss": 0.8816,
      "step": 215360
    },
    {
      "epoch": 0.7548181560104722,
      "grad_norm": 3.078125,
      "learning_rate": 4.15775550311404e-05,
      "loss": 0.9431,
      "step": 215370
    },
    {
      "epoch": 0.7548532035173678,
      "grad_norm": 3.03125,
      "learning_rate": 4.15769060024767e-05,
      "loss": 0.8772,
      "step": 215380
    },
    {
      "epoch": 0.7548882510242634,
      "grad_norm": 3.0,
      "learning_rate": 4.157625697381299e-05,
      "loss": 0.9346,
      "step": 215390
    },
    {
      "epoch": 0.754923298531159,
      "grad_norm": 2.765625,
      "learning_rate": 4.1575607945149295e-05,
      "loss": 0.8235,
      "step": 215400
    },
    {
      "epoch": 0.7549583460380546,
      "grad_norm": 2.671875,
      "learning_rate": 4.157495891648559e-05,
      "loss": 0.8707,
      "step": 215410
    },
    {
      "epoch": 0.7549933935449502,
      "grad_norm": 2.984375,
      "learning_rate": 4.157430988782189e-05,
      "loss": 0.901,
      "step": 215420
    },
    {
      "epoch": 0.7550284410518457,
      "grad_norm": 2.875,
      "learning_rate": 4.1573660859158185e-05,
      "loss": 0.8986,
      "step": 215430
    },
    {
      "epoch": 0.7550634885587414,
      "grad_norm": 2.8125,
      "learning_rate": 4.157301183049449e-05,
      "loss": 0.8303,
      "step": 215440
    },
    {
      "epoch": 0.7550985360656369,
      "grad_norm": 3.046875,
      "learning_rate": 4.157236280183078e-05,
      "loss": 0.8963,
      "step": 215450
    },
    {
      "epoch": 0.7551335835725326,
      "grad_norm": 3.140625,
      "learning_rate": 4.157171377316708e-05,
      "loss": 0.8921,
      "step": 215460
    },
    {
      "epoch": 0.7551686310794281,
      "grad_norm": 2.84375,
      "learning_rate": 4.157106474450338e-05,
      "loss": 0.9753,
      "step": 215470
    },
    {
      "epoch": 0.7552036785863238,
      "grad_norm": 3.0,
      "learning_rate": 4.157041571583968e-05,
      "loss": 0.8908,
      "step": 215480
    },
    {
      "epoch": 0.7552387260932194,
      "grad_norm": 3.0,
      "learning_rate": 4.156976668717597e-05,
      "loss": 0.8705,
      "step": 215490
    },
    {
      "epoch": 0.7552737736001149,
      "grad_norm": 2.984375,
      "learning_rate": 4.1569117658512275e-05,
      "loss": 0.8899,
      "step": 215500
    },
    {
      "epoch": 0.7553088211070106,
      "grad_norm": 2.265625,
      "learning_rate": 4.156846862984857e-05,
      "loss": 0.8939,
      "step": 215510
    },
    {
      "epoch": 0.7553438686139061,
      "grad_norm": 3.078125,
      "learning_rate": 4.156781960118487e-05,
      "loss": 0.8831,
      "step": 215520
    },
    {
      "epoch": 0.7553789161208018,
      "grad_norm": 2.65625,
      "learning_rate": 4.156717057252117e-05,
      "loss": 0.843,
      "step": 215530
    },
    {
      "epoch": 0.7554139636276973,
      "grad_norm": 2.859375,
      "learning_rate": 4.156652154385747e-05,
      "loss": 0.8944,
      "step": 215540
    },
    {
      "epoch": 0.755449011134593,
      "grad_norm": 3.171875,
      "learning_rate": 4.156587251519377e-05,
      "loss": 0.9377,
      "step": 215550
    },
    {
      "epoch": 0.7554840586414885,
      "grad_norm": 2.984375,
      "learning_rate": 4.156522348653006e-05,
      "loss": 0.9183,
      "step": 215560
    },
    {
      "epoch": 0.7555191061483841,
      "grad_norm": 2.734375,
      "learning_rate": 4.156457445786636e-05,
      "loss": 0.9041,
      "step": 215570
    },
    {
      "epoch": 0.7555541536552798,
      "grad_norm": 3.0625,
      "learning_rate": 4.156392542920265e-05,
      "loss": 0.9369,
      "step": 215580
    },
    {
      "epoch": 0.7555892011621753,
      "grad_norm": 2.421875,
      "learning_rate": 4.156327640053895e-05,
      "loss": 0.8538,
      "step": 215590
    },
    {
      "epoch": 0.755624248669071,
      "grad_norm": 3.140625,
      "learning_rate": 4.156262737187525e-05,
      "loss": 0.9141,
      "step": 215600
    },
    {
      "epoch": 0.7556592961759665,
      "grad_norm": 3.078125,
      "learning_rate": 4.156197834321155e-05,
      "loss": 0.8584,
      "step": 215610
    },
    {
      "epoch": 0.7556943436828621,
      "grad_norm": 3.0625,
      "learning_rate": 4.156132931454785e-05,
      "loss": 0.866,
      "step": 215620
    },
    {
      "epoch": 0.7557293911897577,
      "grad_norm": 3.125,
      "learning_rate": 4.1560680285884145e-05,
      "loss": 0.9095,
      "step": 215630
    },
    {
      "epoch": 0.7557644386966533,
      "grad_norm": 2.96875,
      "learning_rate": 4.156003125722045e-05,
      "loss": 0.926,
      "step": 215640
    },
    {
      "epoch": 0.7557994862035489,
      "grad_norm": 3.328125,
      "learning_rate": 4.155938222855674e-05,
      "loss": 0.9333,
      "step": 215650
    },
    {
      "epoch": 0.7558345337104445,
      "grad_norm": 2.6875,
      "learning_rate": 4.155873319989304e-05,
      "loss": 0.9531,
      "step": 215660
    },
    {
      "epoch": 0.7558695812173402,
      "grad_norm": 3.234375,
      "learning_rate": 4.155808417122934e-05,
      "loss": 0.9184,
      "step": 215670
    },
    {
      "epoch": 0.7559046287242357,
      "grad_norm": 3.203125,
      "learning_rate": 4.155743514256564e-05,
      "loss": 1.0342,
      "step": 215680
    },
    {
      "epoch": 0.7559396762311313,
      "grad_norm": 2.953125,
      "learning_rate": 4.155678611390193e-05,
      "loss": 0.9003,
      "step": 215690
    },
    {
      "epoch": 0.7559747237380269,
      "grad_norm": 2.46875,
      "learning_rate": 4.1556137085238235e-05,
      "loss": 0.7668,
      "step": 215700
    },
    {
      "epoch": 0.7560097712449225,
      "grad_norm": 3.15625,
      "learning_rate": 4.155548805657453e-05,
      "loss": 0.8888,
      "step": 215710
    },
    {
      "epoch": 0.7560448187518181,
      "grad_norm": 2.921875,
      "learning_rate": 4.155483902791083e-05,
      "loss": 0.8322,
      "step": 215720
    },
    {
      "epoch": 0.7560798662587137,
      "grad_norm": 3.484375,
      "learning_rate": 4.1554189999247125e-05,
      "loss": 0.9032,
      "step": 215730
    },
    {
      "epoch": 0.7561149137656092,
      "grad_norm": 3.046875,
      "learning_rate": 4.155354097058343e-05,
      "loss": 0.902,
      "step": 215740
    },
    {
      "epoch": 0.7561499612725049,
      "grad_norm": 3.140625,
      "learning_rate": 4.155289194191973e-05,
      "loss": 0.9321,
      "step": 215750
    },
    {
      "epoch": 0.7561850087794004,
      "grad_norm": 3.875,
      "learning_rate": 4.155224291325602e-05,
      "loss": 0.9322,
      "step": 215760
    },
    {
      "epoch": 0.7562200562862961,
      "grad_norm": 3.28125,
      "learning_rate": 4.1551593884592324e-05,
      "loss": 0.8783,
      "step": 215770
    },
    {
      "epoch": 0.7562551037931917,
      "grad_norm": 2.859375,
      "learning_rate": 4.155094485592862e-05,
      "loss": 0.8343,
      "step": 215780
    },
    {
      "epoch": 0.7562901513000873,
      "grad_norm": 3.078125,
      "learning_rate": 4.155029582726492e-05,
      "loss": 0.8823,
      "step": 215790
    },
    {
      "epoch": 0.7563251988069829,
      "grad_norm": 3.4375,
      "learning_rate": 4.1549646798601215e-05,
      "loss": 0.8739,
      "step": 215800
    },
    {
      "epoch": 0.7563602463138784,
      "grad_norm": 2.84375,
      "learning_rate": 4.1548997769937516e-05,
      "loss": 0.955,
      "step": 215810
    },
    {
      "epoch": 0.7563952938207741,
      "grad_norm": 2.8125,
      "learning_rate": 4.154834874127381e-05,
      "loss": 0.9495,
      "step": 215820
    },
    {
      "epoch": 0.7564303413276696,
      "grad_norm": 3.0,
      "learning_rate": 4.154769971261011e-05,
      "loss": 0.8956,
      "step": 215830
    },
    {
      "epoch": 0.7564653888345653,
      "grad_norm": 2.6875,
      "learning_rate": 4.154705068394641e-05,
      "loss": 0.901,
      "step": 215840
    },
    {
      "epoch": 0.7565004363414608,
      "grad_norm": 3.34375,
      "learning_rate": 4.154640165528271e-05,
      "loss": 0.9132,
      "step": 215850
    },
    {
      "epoch": 0.7565354838483564,
      "grad_norm": 2.453125,
      "learning_rate": 4.1545752626619e-05,
      "loss": 0.8039,
      "step": 215860
    },
    {
      "epoch": 0.7565705313552521,
      "grad_norm": 3.234375,
      "learning_rate": 4.1545103597955304e-05,
      "loss": 0.9987,
      "step": 215870
    },
    {
      "epoch": 0.7566055788621476,
      "grad_norm": 3.015625,
      "learning_rate": 4.1544454569291605e-05,
      "loss": 0.8455,
      "step": 215880
    },
    {
      "epoch": 0.7566406263690433,
      "grad_norm": 3.109375,
      "learning_rate": 4.15438055406279e-05,
      "loss": 0.9406,
      "step": 215890
    },
    {
      "epoch": 0.7566756738759388,
      "grad_norm": 2.78125,
      "learning_rate": 4.15431565119642e-05,
      "loss": 0.8652,
      "step": 215900
    },
    {
      "epoch": 0.7567107213828345,
      "grad_norm": 3.09375,
      "learning_rate": 4.1542507483300496e-05,
      "loss": 0.9001,
      "step": 215910
    },
    {
      "epoch": 0.75674576888973,
      "grad_norm": 2.640625,
      "learning_rate": 4.15418584546368e-05,
      "loss": 0.839,
      "step": 215920
    },
    {
      "epoch": 0.7567808163966256,
      "grad_norm": 3.203125,
      "learning_rate": 4.154120942597309e-05,
      "loss": 0.843,
      "step": 215930
    },
    {
      "epoch": 0.7568158639035212,
      "grad_norm": 2.6875,
      "learning_rate": 4.1540560397309393e-05,
      "loss": 0.8783,
      "step": 215940
    },
    {
      "epoch": 0.7568509114104168,
      "grad_norm": 2.5625,
      "learning_rate": 4.153991136864568e-05,
      "loss": 0.9047,
      "step": 215950
    },
    {
      "epoch": 0.7568859589173124,
      "grad_norm": 2.90625,
      "learning_rate": 4.153926233998198e-05,
      "loss": 0.8695,
      "step": 215960
    },
    {
      "epoch": 0.756921006424208,
      "grad_norm": 3.0,
      "learning_rate": 4.1538613311318284e-05,
      "loss": 0.9096,
      "step": 215970
    },
    {
      "epoch": 0.7569560539311037,
      "grad_norm": 2.640625,
      "learning_rate": 4.153796428265458e-05,
      "loss": 0.8892,
      "step": 215980
    },
    {
      "epoch": 0.7569911014379992,
      "grad_norm": 2.703125,
      "learning_rate": 4.153731525399088e-05,
      "loss": 0.904,
      "step": 215990
    },
    {
      "epoch": 0.7570261489448948,
      "grad_norm": 3.078125,
      "learning_rate": 4.1536666225327175e-05,
      "loss": 0.9239,
      "step": 216000
    },
    {
      "epoch": 0.7570611964517904,
      "grad_norm": 2.875,
      "learning_rate": 4.1536017196663476e-05,
      "loss": 0.8689,
      "step": 216010
    },
    {
      "epoch": 0.757096243958686,
      "grad_norm": 2.34375,
      "learning_rate": 4.153536816799977e-05,
      "loss": 0.7839,
      "step": 216020
    },
    {
      "epoch": 0.7571312914655816,
      "grad_norm": 2.921875,
      "learning_rate": 4.153471913933607e-05,
      "loss": 0.8922,
      "step": 216030
    },
    {
      "epoch": 0.7571663389724772,
      "grad_norm": 2.90625,
      "learning_rate": 4.153407011067237e-05,
      "loss": 0.9526,
      "step": 216040
    },
    {
      "epoch": 0.7572013864793727,
      "grad_norm": 2.8125,
      "learning_rate": 4.153342108200867e-05,
      "loss": 0.8207,
      "step": 216050
    },
    {
      "epoch": 0.7572364339862684,
      "grad_norm": 3.015625,
      "learning_rate": 4.153277205334496e-05,
      "loss": 0.9219,
      "step": 216060
    },
    {
      "epoch": 0.757271481493164,
      "grad_norm": 2.890625,
      "learning_rate": 4.1532123024681264e-05,
      "loss": 0.8576,
      "step": 216070
    },
    {
      "epoch": 0.7573065290000596,
      "grad_norm": 3.3125,
      "learning_rate": 4.153147399601756e-05,
      "loss": 0.9331,
      "step": 216080
    },
    {
      "epoch": 0.7573415765069552,
      "grad_norm": 2.765625,
      "learning_rate": 4.153082496735386e-05,
      "loss": 0.9273,
      "step": 216090
    },
    {
      "epoch": 0.7573766240138508,
      "grad_norm": 2.953125,
      "learning_rate": 4.1530175938690155e-05,
      "loss": 0.8522,
      "step": 216100
    },
    {
      "epoch": 0.7574116715207464,
      "grad_norm": 3.203125,
      "learning_rate": 4.1529526910026456e-05,
      "loss": 0.9142,
      "step": 216110
    },
    {
      "epoch": 0.7574467190276419,
      "grad_norm": 2.53125,
      "learning_rate": 4.152887788136276e-05,
      "loss": 0.8824,
      "step": 216120
    },
    {
      "epoch": 0.7574817665345376,
      "grad_norm": 4.375,
      "learning_rate": 4.152822885269905e-05,
      "loss": 0.9312,
      "step": 216130
    },
    {
      "epoch": 0.7575168140414331,
      "grad_norm": 2.96875,
      "learning_rate": 4.1527579824035353e-05,
      "loss": 0.8605,
      "step": 216140
    },
    {
      "epoch": 0.7575518615483288,
      "grad_norm": 2.765625,
      "learning_rate": 4.152693079537165e-05,
      "loss": 0.953,
      "step": 216150
    },
    {
      "epoch": 0.7575869090552244,
      "grad_norm": 3.296875,
      "learning_rate": 4.152628176670795e-05,
      "loss": 0.8588,
      "step": 216160
    },
    {
      "epoch": 0.75762195656212,
      "grad_norm": 2.703125,
      "learning_rate": 4.1525632738044244e-05,
      "loss": 0.8457,
      "step": 216170
    },
    {
      "epoch": 0.7576570040690156,
      "grad_norm": 2.703125,
      "learning_rate": 4.1524983709380545e-05,
      "loss": 0.9222,
      "step": 216180
    },
    {
      "epoch": 0.7576920515759111,
      "grad_norm": 2.84375,
      "learning_rate": 4.152433468071684e-05,
      "loss": 1.0056,
      "step": 216190
    },
    {
      "epoch": 0.7577270990828068,
      "grad_norm": 2.890625,
      "learning_rate": 4.152368565205314e-05,
      "loss": 0.9963,
      "step": 216200
    },
    {
      "epoch": 0.7577621465897023,
      "grad_norm": 2.609375,
      "learning_rate": 4.1523036623389436e-05,
      "loss": 0.8732,
      "step": 216210
    },
    {
      "epoch": 0.757797194096598,
      "grad_norm": 2.65625,
      "learning_rate": 4.152238759472574e-05,
      "loss": 0.8448,
      "step": 216220
    },
    {
      "epoch": 0.7578322416034935,
      "grad_norm": 3.140625,
      "learning_rate": 4.152173856606203e-05,
      "loss": 0.9305,
      "step": 216230
    },
    {
      "epoch": 0.7578672891103891,
      "grad_norm": 3.09375,
      "learning_rate": 4.1521089537398333e-05,
      "loss": 0.9542,
      "step": 216240
    },
    {
      "epoch": 0.7579023366172847,
      "grad_norm": 3.1875,
      "learning_rate": 4.1520440508734635e-05,
      "loss": 0.872,
      "step": 216250
    },
    {
      "epoch": 0.7579373841241803,
      "grad_norm": 3.09375,
      "learning_rate": 4.151979148007093e-05,
      "loss": 0.9402,
      "step": 216260
    },
    {
      "epoch": 0.757972431631076,
      "grad_norm": 2.875,
      "learning_rate": 4.151914245140723e-05,
      "loss": 0.8381,
      "step": 216270
    },
    {
      "epoch": 0.7580074791379715,
      "grad_norm": 3.0625,
      "learning_rate": 4.1518493422743525e-05,
      "loss": 0.9311,
      "step": 216280
    },
    {
      "epoch": 0.7580425266448672,
      "grad_norm": 2.65625,
      "learning_rate": 4.151784439407983e-05,
      "loss": 0.8824,
      "step": 216290
    },
    {
      "epoch": 0.7580775741517627,
      "grad_norm": 3.0625,
      "learning_rate": 4.151719536541612e-05,
      "loss": 0.8737,
      "step": 216300
    },
    {
      "epoch": 0.7581126216586583,
      "grad_norm": 2.875,
      "learning_rate": 4.151654633675242e-05,
      "loss": 0.8424,
      "step": 216310
    },
    {
      "epoch": 0.7581476691655539,
      "grad_norm": 3.078125,
      "learning_rate": 4.151589730808871e-05,
      "loss": 0.9126,
      "step": 216320
    },
    {
      "epoch": 0.7581827166724495,
      "grad_norm": 2.578125,
      "learning_rate": 4.151524827942501e-05,
      "loss": 0.9996,
      "step": 216330
    },
    {
      "epoch": 0.758217764179345,
      "grad_norm": 2.984375,
      "learning_rate": 4.1514599250761313e-05,
      "loss": 0.9242,
      "step": 216340
    },
    {
      "epoch": 0.7582528116862407,
      "grad_norm": 3.484375,
      "learning_rate": 4.151395022209761e-05,
      "loss": 0.8586,
      "step": 216350
    },
    {
      "epoch": 0.7582878591931363,
      "grad_norm": 3.125,
      "learning_rate": 4.151330119343391e-05,
      "loss": 0.883,
      "step": 216360
    },
    {
      "epoch": 0.7583229067000319,
      "grad_norm": 2.78125,
      "learning_rate": 4.1512652164770204e-05,
      "loss": 0.9043,
      "step": 216370
    },
    {
      "epoch": 0.7583579542069275,
      "grad_norm": 2.96875,
      "learning_rate": 4.1512003136106505e-05,
      "loss": 0.8254,
      "step": 216380
    },
    {
      "epoch": 0.7583930017138231,
      "grad_norm": 3.265625,
      "learning_rate": 4.15113541074428e-05,
      "loss": 1.0048,
      "step": 216390
    },
    {
      "epoch": 0.7584280492207187,
      "grad_norm": 3.171875,
      "learning_rate": 4.15107050787791e-05,
      "loss": 0.8376,
      "step": 216400
    },
    {
      "epoch": 0.7584630967276142,
      "grad_norm": 3.109375,
      "learning_rate": 4.1510056050115396e-05,
      "loss": 0.9162,
      "step": 216410
    },
    {
      "epoch": 0.7584981442345099,
      "grad_norm": 2.46875,
      "learning_rate": 4.15094070214517e-05,
      "loss": 0.8072,
      "step": 216420
    },
    {
      "epoch": 0.7585331917414054,
      "grad_norm": 3.15625,
      "learning_rate": 4.150875799278799e-05,
      "loss": 0.9552,
      "step": 216430
    },
    {
      "epoch": 0.7585682392483011,
      "grad_norm": 2.765625,
      "learning_rate": 4.1508108964124293e-05,
      "loss": 0.9232,
      "step": 216440
    },
    {
      "epoch": 0.7586032867551966,
      "grad_norm": 3.1875,
      "learning_rate": 4.150745993546059e-05,
      "loss": 0.9683,
      "step": 216450
    },
    {
      "epoch": 0.7586383342620923,
      "grad_norm": 3.40625,
      "learning_rate": 4.150681090679689e-05,
      "loss": 0.8708,
      "step": 216460
    },
    {
      "epoch": 0.7586733817689879,
      "grad_norm": 2.625,
      "learning_rate": 4.1506161878133184e-05,
      "loss": 0.95,
      "step": 216470
    },
    {
      "epoch": 0.7587084292758834,
      "grad_norm": 2.8125,
      "learning_rate": 4.1505512849469485e-05,
      "loss": 0.8691,
      "step": 216480
    },
    {
      "epoch": 0.7587434767827791,
      "grad_norm": 3.09375,
      "learning_rate": 4.150486382080579e-05,
      "loss": 0.8769,
      "step": 216490
    },
    {
      "epoch": 0.7587785242896746,
      "grad_norm": 2.75,
      "learning_rate": 4.150421479214208e-05,
      "loss": 0.8963,
      "step": 216500
    },
    {
      "epoch": 0.7588135717965703,
      "grad_norm": 2.953125,
      "learning_rate": 4.150356576347838e-05,
      "loss": 0.9002,
      "step": 216510
    },
    {
      "epoch": 0.7588486193034658,
      "grad_norm": 2.9375,
      "learning_rate": 4.150291673481468e-05,
      "loss": 0.9101,
      "step": 216520
    },
    {
      "epoch": 0.7588836668103615,
      "grad_norm": 2.640625,
      "learning_rate": 4.150226770615098e-05,
      "loss": 0.8981,
      "step": 216530
    },
    {
      "epoch": 0.758918714317257,
      "grad_norm": 3.25,
      "learning_rate": 4.1501618677487273e-05,
      "loss": 0.9496,
      "step": 216540
    },
    {
      "epoch": 0.7589537618241526,
      "grad_norm": 3.15625,
      "learning_rate": 4.1500969648823575e-05,
      "loss": 0.9718,
      "step": 216550
    },
    {
      "epoch": 0.7589888093310483,
      "grad_norm": 2.734375,
      "learning_rate": 4.150032062015987e-05,
      "loss": 0.8045,
      "step": 216560
    },
    {
      "epoch": 0.7590238568379438,
      "grad_norm": 2.84375,
      "learning_rate": 4.149967159149617e-05,
      "loss": 0.8925,
      "step": 216570
    },
    {
      "epoch": 0.7590589043448395,
      "grad_norm": 2.96875,
      "learning_rate": 4.1499022562832465e-05,
      "loss": 0.9009,
      "step": 216580
    },
    {
      "epoch": 0.759093951851735,
      "grad_norm": 2.984375,
      "learning_rate": 4.149837353416877e-05,
      "loss": 0.875,
      "step": 216590
    },
    {
      "epoch": 0.7591289993586307,
      "grad_norm": 2.890625,
      "learning_rate": 4.149772450550506e-05,
      "loss": 0.957,
      "step": 216600
    },
    {
      "epoch": 0.7591640468655262,
      "grad_norm": 3.25,
      "learning_rate": 4.149707547684136e-05,
      "loss": 0.8749,
      "step": 216610
    },
    {
      "epoch": 0.7591990943724218,
      "grad_norm": 2.796875,
      "learning_rate": 4.1496426448177664e-05,
      "loss": 0.9217,
      "step": 216620
    },
    {
      "epoch": 0.7592341418793174,
      "grad_norm": 2.671875,
      "learning_rate": 4.149577741951396e-05,
      "loss": 0.9269,
      "step": 216630
    },
    {
      "epoch": 0.759269189386213,
      "grad_norm": 2.78125,
      "learning_rate": 4.149512839085026e-05,
      "loss": 0.9356,
      "step": 216640
    },
    {
      "epoch": 0.7593042368931087,
      "grad_norm": 3.1875,
      "learning_rate": 4.1494479362186555e-05,
      "loss": 0.91,
      "step": 216650
    },
    {
      "epoch": 0.7593392844000042,
      "grad_norm": 3.203125,
      "learning_rate": 4.1493830333522856e-05,
      "loss": 0.9318,
      "step": 216660
    },
    {
      "epoch": 0.7593743319068998,
      "grad_norm": 2.921875,
      "learning_rate": 4.149318130485915e-05,
      "loss": 0.9073,
      "step": 216670
    },
    {
      "epoch": 0.7594093794137954,
      "grad_norm": 2.734375,
      "learning_rate": 4.149253227619545e-05,
      "loss": 0.9457,
      "step": 216680
    },
    {
      "epoch": 0.759444426920691,
      "grad_norm": 3.171875,
      "learning_rate": 4.149188324753175e-05,
      "loss": 0.9793,
      "step": 216690
    },
    {
      "epoch": 0.7594794744275866,
      "grad_norm": 3.40625,
      "learning_rate": 4.149123421886804e-05,
      "loss": 0.9544,
      "step": 216700
    },
    {
      "epoch": 0.7595145219344822,
      "grad_norm": 2.84375,
      "learning_rate": 4.149058519020434e-05,
      "loss": 0.9042,
      "step": 216710
    },
    {
      "epoch": 0.7595495694413777,
      "grad_norm": 3.25,
      "learning_rate": 4.148993616154064e-05,
      "loss": 0.9266,
      "step": 216720
    },
    {
      "epoch": 0.7595846169482734,
      "grad_norm": 3.15625,
      "learning_rate": 4.148928713287694e-05,
      "loss": 0.8978,
      "step": 216730
    },
    {
      "epoch": 0.7596196644551689,
      "grad_norm": 2.875,
      "learning_rate": 4.1488638104213233e-05,
      "loss": 0.9151,
      "step": 216740
    },
    {
      "epoch": 0.7596547119620646,
      "grad_norm": 3.03125,
      "learning_rate": 4.1487989075549535e-05,
      "loss": 0.8113,
      "step": 216750
    },
    {
      "epoch": 0.7596897594689602,
      "grad_norm": 2.90625,
      "learning_rate": 4.148734004688583e-05,
      "loss": 0.984,
      "step": 216760
    },
    {
      "epoch": 0.7597248069758558,
      "grad_norm": 3.640625,
      "learning_rate": 4.148669101822213e-05,
      "loss": 0.898,
      "step": 216770
    },
    {
      "epoch": 0.7597598544827514,
      "grad_norm": 2.765625,
      "learning_rate": 4.1486041989558425e-05,
      "loss": 0.8462,
      "step": 216780
    },
    {
      "epoch": 0.7597949019896469,
      "grad_norm": 2.78125,
      "learning_rate": 4.148539296089473e-05,
      "loss": 0.9239,
      "step": 216790
    },
    {
      "epoch": 0.7598299494965426,
      "grad_norm": 2.734375,
      "learning_rate": 4.148474393223102e-05,
      "loss": 0.9226,
      "step": 216800
    },
    {
      "epoch": 0.7598649970034381,
      "grad_norm": 2.84375,
      "learning_rate": 4.148409490356732e-05,
      "loss": 0.8966,
      "step": 216810
    },
    {
      "epoch": 0.7599000445103338,
      "grad_norm": 2.4375,
      "learning_rate": 4.148344587490362e-05,
      "loss": 0.8156,
      "step": 216820
    },
    {
      "epoch": 0.7599350920172293,
      "grad_norm": 2.640625,
      "learning_rate": 4.148279684623992e-05,
      "loss": 0.8685,
      "step": 216830
    },
    {
      "epoch": 0.759970139524125,
      "grad_norm": 2.875,
      "learning_rate": 4.148214781757622e-05,
      "loss": 0.9104,
      "step": 216840
    },
    {
      "epoch": 0.7600051870310206,
      "grad_norm": 3.1875,
      "learning_rate": 4.1481498788912515e-05,
      "loss": 0.8398,
      "step": 216850
    },
    {
      "epoch": 0.7600402345379161,
      "grad_norm": 2.796875,
      "learning_rate": 4.1480849760248816e-05,
      "loss": 0.9132,
      "step": 216860
    },
    {
      "epoch": 0.7600752820448118,
      "grad_norm": 2.984375,
      "learning_rate": 4.148020073158511e-05,
      "loss": 0.9209,
      "step": 216870
    },
    {
      "epoch": 0.7601103295517073,
      "grad_norm": 3.046875,
      "learning_rate": 4.147955170292141e-05,
      "loss": 0.8788,
      "step": 216880
    },
    {
      "epoch": 0.760145377058603,
      "grad_norm": 2.578125,
      "learning_rate": 4.147890267425771e-05,
      "loss": 0.8246,
      "step": 216890
    },
    {
      "epoch": 0.7601804245654985,
      "grad_norm": 3.171875,
      "learning_rate": 4.147825364559401e-05,
      "loss": 0.9015,
      "step": 216900
    },
    {
      "epoch": 0.7602154720723941,
      "grad_norm": 3.09375,
      "learning_rate": 4.14776046169303e-05,
      "loss": 0.9464,
      "step": 216910
    },
    {
      "epoch": 0.7602505195792897,
      "grad_norm": 3.046875,
      "learning_rate": 4.1476955588266604e-05,
      "loss": 0.939,
      "step": 216920
    },
    {
      "epoch": 0.7602855670861853,
      "grad_norm": 2.84375,
      "learning_rate": 4.14763065596029e-05,
      "loss": 0.9516,
      "step": 216930
    },
    {
      "epoch": 0.7603206145930809,
      "grad_norm": 2.984375,
      "learning_rate": 4.14756575309392e-05,
      "loss": 0.9224,
      "step": 216940
    },
    {
      "epoch": 0.7603556620999765,
      "grad_norm": 3.140625,
      "learning_rate": 4.1475008502275495e-05,
      "loss": 0.9382,
      "step": 216950
    },
    {
      "epoch": 0.7603907096068722,
      "grad_norm": 3.265625,
      "learning_rate": 4.1474359473611796e-05,
      "loss": 0.904,
      "step": 216960
    },
    {
      "epoch": 0.7604257571137677,
      "grad_norm": 2.75,
      "learning_rate": 4.147371044494809e-05,
      "loss": 0.9099,
      "step": 216970
    },
    {
      "epoch": 0.7604608046206633,
      "grad_norm": 2.953125,
      "learning_rate": 4.147306141628439e-05,
      "loss": 0.8422,
      "step": 216980
    },
    {
      "epoch": 0.7604958521275589,
      "grad_norm": 2.75,
      "learning_rate": 4.1472412387620694e-05,
      "loss": 0.9261,
      "step": 216990
    },
    {
      "epoch": 0.7605308996344545,
      "grad_norm": 2.8125,
      "learning_rate": 4.147176335895699e-05,
      "loss": 0.8227,
      "step": 217000
    },
    {
      "epoch": 0.7605659471413501,
      "grad_norm": 3.25,
      "learning_rate": 4.147111433029329e-05,
      "loss": 0.8933,
      "step": 217010
    },
    {
      "epoch": 0.7606009946482457,
      "grad_norm": 3.203125,
      "learning_rate": 4.1470465301629584e-05,
      "loss": 0.977,
      "step": 217020
    },
    {
      "epoch": 0.7606360421551412,
      "grad_norm": 3.0,
      "learning_rate": 4.1469816272965886e-05,
      "loss": 0.9137,
      "step": 217030
    },
    {
      "epoch": 0.7606710896620369,
      "grad_norm": 3.265625,
      "learning_rate": 4.146916724430218e-05,
      "loss": 0.8787,
      "step": 217040
    },
    {
      "epoch": 0.7607061371689325,
      "grad_norm": 2.578125,
      "learning_rate": 4.146851821563848e-05,
      "loss": 0.9052,
      "step": 217050
    },
    {
      "epoch": 0.7607411846758281,
      "grad_norm": 2.921875,
      "learning_rate": 4.1467869186974776e-05,
      "loss": 0.8772,
      "step": 217060
    },
    {
      "epoch": 0.7607762321827237,
      "grad_norm": 3.046875,
      "learning_rate": 4.146722015831107e-05,
      "loss": 0.909,
      "step": 217070
    },
    {
      "epoch": 0.7608112796896193,
      "grad_norm": 3.0625,
      "learning_rate": 4.146657112964737e-05,
      "loss": 0.9228,
      "step": 217080
    },
    {
      "epoch": 0.7608463271965149,
      "grad_norm": 2.890625,
      "learning_rate": 4.146592210098367e-05,
      "loss": 0.8506,
      "step": 217090
    },
    {
      "epoch": 0.7608813747034104,
      "grad_norm": 2.546875,
      "learning_rate": 4.146527307231997e-05,
      "loss": 0.9002,
      "step": 217100
    },
    {
      "epoch": 0.7609164222103061,
      "grad_norm": 2.734375,
      "learning_rate": 4.146462404365626e-05,
      "loss": 0.8237,
      "step": 217110
    },
    {
      "epoch": 0.7609514697172016,
      "grad_norm": 2.84375,
      "learning_rate": 4.1463975014992564e-05,
      "loss": 0.8931,
      "step": 217120
    },
    {
      "epoch": 0.7609865172240973,
      "grad_norm": 2.828125,
      "learning_rate": 4.146332598632886e-05,
      "loss": 0.9759,
      "step": 217130
    },
    {
      "epoch": 0.7610215647309928,
      "grad_norm": 3.03125,
      "learning_rate": 4.146267695766516e-05,
      "loss": 0.8378,
      "step": 217140
    },
    {
      "epoch": 0.7610566122378885,
      "grad_norm": 3.328125,
      "learning_rate": 4.1462027929001455e-05,
      "loss": 0.9333,
      "step": 217150
    },
    {
      "epoch": 0.7610916597447841,
      "grad_norm": 3.65625,
      "learning_rate": 4.1461378900337756e-05,
      "loss": 0.8372,
      "step": 217160
    },
    {
      "epoch": 0.7611267072516796,
      "grad_norm": 3.765625,
      "learning_rate": 4.146072987167405e-05,
      "loss": 0.902,
      "step": 217170
    },
    {
      "epoch": 0.7611617547585753,
      "grad_norm": 3.296875,
      "learning_rate": 4.146008084301035e-05,
      "loss": 0.9623,
      "step": 217180
    },
    {
      "epoch": 0.7611968022654708,
      "grad_norm": 2.953125,
      "learning_rate": 4.145943181434665e-05,
      "loss": 0.9013,
      "step": 217190
    },
    {
      "epoch": 0.7612318497723665,
      "grad_norm": 3.0625,
      "learning_rate": 4.145878278568295e-05,
      "loss": 0.8894,
      "step": 217200
    },
    {
      "epoch": 0.761266897279262,
      "grad_norm": 3.15625,
      "learning_rate": 4.145813375701925e-05,
      "loss": 0.8722,
      "step": 217210
    },
    {
      "epoch": 0.7613019447861576,
      "grad_norm": 3.203125,
      "learning_rate": 4.1457484728355544e-05,
      "loss": 0.9848,
      "step": 217220
    },
    {
      "epoch": 0.7613369922930532,
      "grad_norm": 2.890625,
      "learning_rate": 4.1456835699691846e-05,
      "loss": 0.9469,
      "step": 217230
    },
    {
      "epoch": 0.7613720397999488,
      "grad_norm": 3.0625,
      "learning_rate": 4.145618667102814e-05,
      "loss": 0.8978,
      "step": 217240
    },
    {
      "epoch": 0.7614070873068445,
      "grad_norm": 3.0,
      "learning_rate": 4.145553764236444e-05,
      "loss": 0.958,
      "step": 217250
    },
    {
      "epoch": 0.76144213481374,
      "grad_norm": 2.84375,
      "learning_rate": 4.1454888613700736e-05,
      "loss": 0.913,
      "step": 217260
    },
    {
      "epoch": 0.7614771823206357,
      "grad_norm": 2.921875,
      "learning_rate": 4.145423958503704e-05,
      "loss": 0.9216,
      "step": 217270
    },
    {
      "epoch": 0.7615122298275312,
      "grad_norm": 3.09375,
      "learning_rate": 4.145359055637333e-05,
      "loss": 0.9563,
      "step": 217280
    },
    {
      "epoch": 0.7615472773344268,
      "grad_norm": 2.703125,
      "learning_rate": 4.1452941527709634e-05,
      "loss": 0.9088,
      "step": 217290
    },
    {
      "epoch": 0.7615823248413224,
      "grad_norm": 2.734375,
      "learning_rate": 4.145229249904593e-05,
      "loss": 0.9406,
      "step": 217300
    },
    {
      "epoch": 0.761617372348218,
      "grad_norm": 3.171875,
      "learning_rate": 4.145164347038223e-05,
      "loss": 0.88,
      "step": 217310
    },
    {
      "epoch": 0.7616524198551136,
      "grad_norm": 3.390625,
      "learning_rate": 4.1450994441718524e-05,
      "loss": 0.9346,
      "step": 217320
    },
    {
      "epoch": 0.7616874673620092,
      "grad_norm": 3.171875,
      "learning_rate": 4.1450345413054826e-05,
      "loss": 0.8445,
      "step": 217330
    },
    {
      "epoch": 0.7617225148689049,
      "grad_norm": 3.015625,
      "learning_rate": 4.144969638439112e-05,
      "loss": 0.8739,
      "step": 217340
    },
    {
      "epoch": 0.7617575623758004,
      "grad_norm": 3.0,
      "learning_rate": 4.144904735572742e-05,
      "loss": 0.8227,
      "step": 217350
    },
    {
      "epoch": 0.761792609882696,
      "grad_norm": 3.859375,
      "learning_rate": 4.144839832706372e-05,
      "loss": 0.8815,
      "step": 217360
    },
    {
      "epoch": 0.7618276573895916,
      "grad_norm": 2.9375,
      "learning_rate": 4.144774929840002e-05,
      "loss": 0.8738,
      "step": 217370
    },
    {
      "epoch": 0.7618627048964872,
      "grad_norm": 2.734375,
      "learning_rate": 4.144710026973632e-05,
      "loss": 0.9063,
      "step": 217380
    },
    {
      "epoch": 0.7618977524033828,
      "grad_norm": 3.046875,
      "learning_rate": 4.1446451241072614e-05,
      "loss": 0.8729,
      "step": 217390
    },
    {
      "epoch": 0.7619327999102784,
      "grad_norm": 3.296875,
      "learning_rate": 4.1445802212408915e-05,
      "loss": 0.9177,
      "step": 217400
    },
    {
      "epoch": 0.7619678474171739,
      "grad_norm": 2.625,
      "learning_rate": 4.144515318374521e-05,
      "loss": 0.838,
      "step": 217410
    },
    {
      "epoch": 0.7620028949240696,
      "grad_norm": 2.90625,
      "learning_rate": 4.144450415508151e-05,
      "loss": 0.9247,
      "step": 217420
    },
    {
      "epoch": 0.7620379424309651,
      "grad_norm": 2.875,
      "learning_rate": 4.1443855126417806e-05,
      "loss": 0.8786,
      "step": 217430
    },
    {
      "epoch": 0.7620729899378608,
      "grad_norm": 3.03125,
      "learning_rate": 4.144320609775411e-05,
      "loss": 0.8598,
      "step": 217440
    },
    {
      "epoch": 0.7621080374447564,
      "grad_norm": 2.984375,
      "learning_rate": 4.14425570690904e-05,
      "loss": 0.8011,
      "step": 217450
    },
    {
      "epoch": 0.762143084951652,
      "grad_norm": 2.921875,
      "learning_rate": 4.1441908040426696e-05,
      "loss": 0.9079,
      "step": 217460
    },
    {
      "epoch": 0.7621781324585476,
      "grad_norm": 2.8125,
      "learning_rate": 4.1441259011763e-05,
      "loss": 0.9017,
      "step": 217470
    },
    {
      "epoch": 0.7622131799654431,
      "grad_norm": 2.765625,
      "learning_rate": 4.144060998309929e-05,
      "loss": 0.9002,
      "step": 217480
    },
    {
      "epoch": 0.7622482274723388,
      "grad_norm": 2.875,
      "learning_rate": 4.1439960954435594e-05,
      "loss": 0.9328,
      "step": 217490
    },
    {
      "epoch": 0.7622832749792343,
      "grad_norm": 2.34375,
      "learning_rate": 4.143931192577189e-05,
      "loss": 0.9197,
      "step": 217500
    },
    {
      "epoch": 0.76231832248613,
      "grad_norm": 2.46875,
      "learning_rate": 4.143866289710819e-05,
      "loss": 0.9344,
      "step": 217510
    },
    {
      "epoch": 0.7623533699930255,
      "grad_norm": 3.015625,
      "learning_rate": 4.1438013868444484e-05,
      "loss": 0.8772,
      "step": 217520
    },
    {
      "epoch": 0.7623884174999211,
      "grad_norm": 2.875,
      "learning_rate": 4.1437364839780786e-05,
      "loss": 0.938,
      "step": 217530
    },
    {
      "epoch": 0.7624234650068168,
      "grad_norm": 2.90625,
      "learning_rate": 4.143671581111708e-05,
      "loss": 0.881,
      "step": 217540
    },
    {
      "epoch": 0.7624585125137123,
      "grad_norm": 2.953125,
      "learning_rate": 4.143606678245338e-05,
      "loss": 0.9993,
      "step": 217550
    },
    {
      "epoch": 0.762493560020608,
      "grad_norm": 2.515625,
      "learning_rate": 4.1435417753789676e-05,
      "loss": 0.8799,
      "step": 217560
    },
    {
      "epoch": 0.7625286075275035,
      "grad_norm": 2.546875,
      "learning_rate": 4.143476872512598e-05,
      "loss": 0.8553,
      "step": 217570
    },
    {
      "epoch": 0.7625636550343992,
      "grad_norm": 2.234375,
      "learning_rate": 4.143411969646228e-05,
      "loss": 0.831,
      "step": 217580
    },
    {
      "epoch": 0.7625987025412947,
      "grad_norm": 3.21875,
      "learning_rate": 4.1433470667798574e-05,
      "loss": 0.8559,
      "step": 217590
    },
    {
      "epoch": 0.7626337500481903,
      "grad_norm": 3.078125,
      "learning_rate": 4.1432821639134875e-05,
      "loss": 0.8882,
      "step": 217600
    },
    {
      "epoch": 0.7626687975550859,
      "grad_norm": 2.859375,
      "learning_rate": 4.143217261047117e-05,
      "loss": 0.8366,
      "step": 217610
    },
    {
      "epoch": 0.7627038450619815,
      "grad_norm": 2.8125,
      "learning_rate": 4.143152358180747e-05,
      "loss": 0.8467,
      "step": 217620
    },
    {
      "epoch": 0.7627388925688771,
      "grad_norm": 3.28125,
      "learning_rate": 4.1430874553143766e-05,
      "loss": 0.8638,
      "step": 217630
    },
    {
      "epoch": 0.7627739400757727,
      "grad_norm": 3.109375,
      "learning_rate": 4.143022552448007e-05,
      "loss": 0.9332,
      "step": 217640
    },
    {
      "epoch": 0.7628089875826684,
      "grad_norm": 3.296875,
      "learning_rate": 4.142957649581636e-05,
      "loss": 0.9292,
      "step": 217650
    },
    {
      "epoch": 0.7628440350895639,
      "grad_norm": 2.515625,
      "learning_rate": 4.142892746715266e-05,
      "loss": 0.8779,
      "step": 217660
    },
    {
      "epoch": 0.7628790825964595,
      "grad_norm": 2.84375,
      "learning_rate": 4.142827843848896e-05,
      "loss": 0.8712,
      "step": 217670
    },
    {
      "epoch": 0.7629141301033551,
      "grad_norm": 3.046875,
      "learning_rate": 4.142762940982526e-05,
      "loss": 0.9283,
      "step": 217680
    },
    {
      "epoch": 0.7629491776102507,
      "grad_norm": 2.59375,
      "learning_rate": 4.1426980381161554e-05,
      "loss": 0.8676,
      "step": 217690
    },
    {
      "epoch": 0.7629842251171463,
      "grad_norm": 3.03125,
      "learning_rate": 4.1426331352497855e-05,
      "loss": 0.9516,
      "step": 217700
    },
    {
      "epoch": 0.7630192726240419,
      "grad_norm": 2.765625,
      "learning_rate": 4.142568232383415e-05,
      "loss": 0.9121,
      "step": 217710
    },
    {
      "epoch": 0.7630543201309374,
      "grad_norm": 3.28125,
      "learning_rate": 4.142503329517045e-05,
      "loss": 0.9131,
      "step": 217720
    },
    {
      "epoch": 0.7630893676378331,
      "grad_norm": 2.96875,
      "learning_rate": 4.142438426650675e-05,
      "loss": 0.8585,
      "step": 217730
    },
    {
      "epoch": 0.7631244151447287,
      "grad_norm": 2.8125,
      "learning_rate": 4.142373523784305e-05,
      "loss": 0.9178,
      "step": 217740
    },
    {
      "epoch": 0.7631594626516243,
      "grad_norm": 3.296875,
      "learning_rate": 4.142308620917935e-05,
      "loss": 0.8614,
      "step": 217750
    },
    {
      "epoch": 0.7631945101585199,
      "grad_norm": 2.546875,
      "learning_rate": 4.142243718051564e-05,
      "loss": 0.8668,
      "step": 217760
    },
    {
      "epoch": 0.7632295576654154,
      "grad_norm": 2.9375,
      "learning_rate": 4.1421788151851944e-05,
      "loss": 0.9199,
      "step": 217770
    },
    {
      "epoch": 0.7632646051723111,
      "grad_norm": 3.140625,
      "learning_rate": 4.142113912318824e-05,
      "loss": 0.8834,
      "step": 217780
    },
    {
      "epoch": 0.7632996526792066,
      "grad_norm": 2.609375,
      "learning_rate": 4.142049009452454e-05,
      "loss": 0.9027,
      "step": 217790
    },
    {
      "epoch": 0.7633347001861023,
      "grad_norm": 2.59375,
      "learning_rate": 4.1419841065860835e-05,
      "loss": 0.8545,
      "step": 217800
    },
    {
      "epoch": 0.7633697476929978,
      "grad_norm": 2.96875,
      "learning_rate": 4.1419192037197136e-05,
      "loss": 0.8686,
      "step": 217810
    },
    {
      "epoch": 0.7634047951998935,
      "grad_norm": 2.6875,
      "learning_rate": 4.141854300853343e-05,
      "loss": 0.9974,
      "step": 217820
    },
    {
      "epoch": 0.7634398427067891,
      "grad_norm": 2.96875,
      "learning_rate": 4.1417893979869726e-05,
      "loss": 0.882,
      "step": 217830
    },
    {
      "epoch": 0.7634748902136846,
      "grad_norm": 3.125,
      "learning_rate": 4.141724495120603e-05,
      "loss": 0.878,
      "step": 217840
    },
    {
      "epoch": 0.7635099377205803,
      "grad_norm": 2.8125,
      "learning_rate": 4.141659592254232e-05,
      "loss": 0.8606,
      "step": 217850
    },
    {
      "epoch": 0.7635449852274758,
      "grad_norm": 3.4375,
      "learning_rate": 4.141594689387862e-05,
      "loss": 0.889,
      "step": 217860
    },
    {
      "epoch": 0.7635800327343715,
      "grad_norm": 2.890625,
      "learning_rate": 4.141529786521492e-05,
      "loss": 0.8674,
      "step": 217870
    },
    {
      "epoch": 0.763615080241267,
      "grad_norm": 2.265625,
      "learning_rate": 4.141464883655122e-05,
      "loss": 0.8947,
      "step": 217880
    },
    {
      "epoch": 0.7636501277481627,
      "grad_norm": 2.90625,
      "learning_rate": 4.1413999807887514e-05,
      "loss": 0.9734,
      "step": 217890
    },
    {
      "epoch": 0.7636851752550582,
      "grad_norm": 2.765625,
      "learning_rate": 4.1413350779223815e-05,
      "loss": 0.9071,
      "step": 217900
    },
    {
      "epoch": 0.7637202227619538,
      "grad_norm": 3.21875,
      "learning_rate": 4.141270175056011e-05,
      "loss": 0.9197,
      "step": 217910
    },
    {
      "epoch": 0.7637552702688494,
      "grad_norm": 2.890625,
      "learning_rate": 4.141205272189641e-05,
      "loss": 0.9263,
      "step": 217920
    },
    {
      "epoch": 0.763790317775745,
      "grad_norm": 3.0625,
      "learning_rate": 4.1411403693232706e-05,
      "loss": 0.9358,
      "step": 217930
    },
    {
      "epoch": 0.7638253652826407,
      "grad_norm": 2.765625,
      "learning_rate": 4.141075466456901e-05,
      "loss": 0.8344,
      "step": 217940
    },
    {
      "epoch": 0.7638604127895362,
      "grad_norm": 3.78125,
      "learning_rate": 4.141010563590531e-05,
      "loss": 0.9054,
      "step": 217950
    },
    {
      "epoch": 0.7638954602964318,
      "grad_norm": 2.90625,
      "learning_rate": 4.14094566072416e-05,
      "loss": 0.9422,
      "step": 217960
    },
    {
      "epoch": 0.7639305078033274,
      "grad_norm": 3.078125,
      "learning_rate": 4.1408807578577904e-05,
      "loss": 0.829,
      "step": 217970
    },
    {
      "epoch": 0.763965555310223,
      "grad_norm": 3.25,
      "learning_rate": 4.14081585499142e-05,
      "loss": 0.9475,
      "step": 217980
    },
    {
      "epoch": 0.7640006028171186,
      "grad_norm": 2.546875,
      "learning_rate": 4.14075095212505e-05,
      "loss": 0.9225,
      "step": 217990
    },
    {
      "epoch": 0.7640356503240142,
      "grad_norm": 2.71875,
      "learning_rate": 4.1406860492586795e-05,
      "loss": 0.8809,
      "step": 218000
    },
    {
      "epoch": 0.7640706978309098,
      "grad_norm": 2.921875,
      "learning_rate": 4.1406211463923096e-05,
      "loss": 0.8671,
      "step": 218010
    },
    {
      "epoch": 0.7641057453378054,
      "grad_norm": 3.046875,
      "learning_rate": 4.140556243525939e-05,
      "loss": 0.859,
      "step": 218020
    },
    {
      "epoch": 0.764140792844701,
      "grad_norm": 3.1875,
      "learning_rate": 4.140491340659569e-05,
      "loss": 0.9064,
      "step": 218030
    },
    {
      "epoch": 0.7641758403515966,
      "grad_norm": 2.71875,
      "learning_rate": 4.140426437793199e-05,
      "loss": 0.9096,
      "step": 218040
    },
    {
      "epoch": 0.7642108878584922,
      "grad_norm": 2.984375,
      "learning_rate": 4.140361534926829e-05,
      "loss": 0.8026,
      "step": 218050
    },
    {
      "epoch": 0.7642459353653878,
      "grad_norm": 3.171875,
      "learning_rate": 4.140296632060458e-05,
      "loss": 0.8274,
      "step": 218060
    },
    {
      "epoch": 0.7642809828722834,
      "grad_norm": 3.28125,
      "learning_rate": 4.1402317291940884e-05,
      "loss": 0.852,
      "step": 218070
    },
    {
      "epoch": 0.764316030379179,
      "grad_norm": 2.75,
      "learning_rate": 4.1401668263277186e-05,
      "loss": 0.9169,
      "step": 218080
    },
    {
      "epoch": 0.7643510778860746,
      "grad_norm": 2.765625,
      "learning_rate": 4.140101923461348e-05,
      "loss": 0.907,
      "step": 218090
    },
    {
      "epoch": 0.7643861253929701,
      "grad_norm": 3.265625,
      "learning_rate": 4.140037020594978e-05,
      "loss": 0.8052,
      "step": 218100
    },
    {
      "epoch": 0.7644211728998658,
      "grad_norm": 3.34375,
      "learning_rate": 4.1399721177286076e-05,
      "loss": 0.99,
      "step": 218110
    },
    {
      "epoch": 0.7644562204067613,
      "grad_norm": 3.09375,
      "learning_rate": 4.139907214862238e-05,
      "loss": 0.864,
      "step": 218120
    },
    {
      "epoch": 0.764491267913657,
      "grad_norm": 2.484375,
      "learning_rate": 4.139842311995867e-05,
      "loss": 0.8997,
      "step": 218130
    },
    {
      "epoch": 0.7645263154205526,
      "grad_norm": 3.0625,
      "learning_rate": 4.1397774091294974e-05,
      "loss": 0.9139,
      "step": 218140
    },
    {
      "epoch": 0.7645613629274481,
      "grad_norm": 3.015625,
      "learning_rate": 4.139712506263127e-05,
      "loss": 0.9271,
      "step": 218150
    },
    {
      "epoch": 0.7645964104343438,
      "grad_norm": 2.625,
      "learning_rate": 4.139647603396757e-05,
      "loss": 0.8701,
      "step": 218160
    },
    {
      "epoch": 0.7646314579412393,
      "grad_norm": 2.5,
      "learning_rate": 4.1395827005303864e-05,
      "loss": 0.9262,
      "step": 218170
    },
    {
      "epoch": 0.764666505448135,
      "grad_norm": 2.890625,
      "learning_rate": 4.1395177976640166e-05,
      "loss": 0.8817,
      "step": 218180
    },
    {
      "epoch": 0.7647015529550305,
      "grad_norm": 3.1875,
      "learning_rate": 4.139452894797646e-05,
      "loss": 0.9614,
      "step": 218190
    },
    {
      "epoch": 0.7647366004619262,
      "grad_norm": 2.90625,
      "learning_rate": 4.1393879919312755e-05,
      "loss": 0.9138,
      "step": 218200
    },
    {
      "epoch": 0.7647716479688217,
      "grad_norm": 2.90625,
      "learning_rate": 4.1393230890649056e-05,
      "loss": 0.8358,
      "step": 218210
    },
    {
      "epoch": 0.7648066954757173,
      "grad_norm": 3.0,
      "learning_rate": 4.139258186198535e-05,
      "loss": 0.885,
      "step": 218220
    },
    {
      "epoch": 0.764841742982613,
      "grad_norm": 2.6875,
      "learning_rate": 4.139193283332165e-05,
      "loss": 0.8181,
      "step": 218230
    },
    {
      "epoch": 0.7648767904895085,
      "grad_norm": 2.765625,
      "learning_rate": 4.139128380465795e-05,
      "loss": 0.8935,
      "step": 218240
    },
    {
      "epoch": 0.7649118379964042,
      "grad_norm": 3.171875,
      "learning_rate": 4.139063477599425e-05,
      "loss": 0.9325,
      "step": 218250
    },
    {
      "epoch": 0.7649468855032997,
      "grad_norm": 2.890625,
      "learning_rate": 4.138998574733054e-05,
      "loss": 0.8692,
      "step": 218260
    },
    {
      "epoch": 0.7649819330101953,
      "grad_norm": 2.96875,
      "learning_rate": 4.1389336718666844e-05,
      "loss": 0.8633,
      "step": 218270
    },
    {
      "epoch": 0.7650169805170909,
      "grad_norm": 3.21875,
      "learning_rate": 4.138868769000314e-05,
      "loss": 0.9426,
      "step": 218280
    },
    {
      "epoch": 0.7650520280239865,
      "grad_norm": 2.484375,
      "learning_rate": 4.138803866133944e-05,
      "loss": 0.8665,
      "step": 218290
    },
    {
      "epoch": 0.7650870755308821,
      "grad_norm": 2.859375,
      "learning_rate": 4.1387389632675735e-05,
      "loss": 0.8584,
      "step": 218300
    },
    {
      "epoch": 0.7651221230377777,
      "grad_norm": 3.015625,
      "learning_rate": 4.1386740604012036e-05,
      "loss": 0.9066,
      "step": 218310
    },
    {
      "epoch": 0.7651571705446734,
      "grad_norm": 2.8125,
      "learning_rate": 4.138609157534834e-05,
      "loss": 0.8531,
      "step": 218320
    },
    {
      "epoch": 0.7651922180515689,
      "grad_norm": 2.71875,
      "learning_rate": 4.138544254668463e-05,
      "loss": 0.9227,
      "step": 218330
    },
    {
      "epoch": 0.7652272655584645,
      "grad_norm": 2.515625,
      "learning_rate": 4.1384793518020934e-05,
      "loss": 0.8694,
      "step": 218340
    },
    {
      "epoch": 0.7652623130653601,
      "grad_norm": 2.84375,
      "learning_rate": 4.138414448935723e-05,
      "loss": 0.8495,
      "step": 218350
    },
    {
      "epoch": 0.7652973605722557,
      "grad_norm": 2.84375,
      "learning_rate": 4.138349546069353e-05,
      "loss": 0.9896,
      "step": 218360
    },
    {
      "epoch": 0.7653324080791513,
      "grad_norm": 3.125,
      "learning_rate": 4.1382846432029824e-05,
      "loss": 0.8631,
      "step": 218370
    },
    {
      "epoch": 0.7653674555860469,
      "grad_norm": 2.828125,
      "learning_rate": 4.1382197403366126e-05,
      "loss": 0.8833,
      "step": 218380
    },
    {
      "epoch": 0.7654025030929424,
      "grad_norm": 3.140625,
      "learning_rate": 4.138154837470242e-05,
      "loss": 0.9271,
      "step": 218390
    },
    {
      "epoch": 0.7654375505998381,
      "grad_norm": 2.78125,
      "learning_rate": 4.138089934603872e-05,
      "loss": 0.9137,
      "step": 218400
    },
    {
      "epoch": 0.7654725981067336,
      "grad_norm": 2.96875,
      "learning_rate": 4.1380250317375016e-05,
      "loss": 0.8839,
      "step": 218410
    },
    {
      "epoch": 0.7655076456136293,
      "grad_norm": 3.0625,
      "learning_rate": 4.137960128871132e-05,
      "loss": 0.8209,
      "step": 218420
    },
    {
      "epoch": 0.7655426931205249,
      "grad_norm": 3.125,
      "learning_rate": 4.137895226004761e-05,
      "loss": 0.8697,
      "step": 218430
    },
    {
      "epoch": 0.7655777406274205,
      "grad_norm": 3.5,
      "learning_rate": 4.1378303231383914e-05,
      "loss": 1.0349,
      "step": 218440
    },
    {
      "epoch": 0.7656127881343161,
      "grad_norm": 2.984375,
      "learning_rate": 4.1377654202720215e-05,
      "loss": 0.8826,
      "step": 218450
    },
    {
      "epoch": 0.7656478356412116,
      "grad_norm": 3.21875,
      "learning_rate": 4.137700517405651e-05,
      "loss": 0.9651,
      "step": 218460
    },
    {
      "epoch": 0.7656828831481073,
      "grad_norm": 2.921875,
      "learning_rate": 4.137635614539281e-05,
      "loss": 0.9156,
      "step": 218470
    },
    {
      "epoch": 0.7657179306550028,
      "grad_norm": 3.03125,
      "learning_rate": 4.1375707116729106e-05,
      "loss": 0.8624,
      "step": 218480
    },
    {
      "epoch": 0.7657529781618985,
      "grad_norm": 2.875,
      "learning_rate": 4.137505808806541e-05,
      "loss": 0.9199,
      "step": 218490
    },
    {
      "epoch": 0.765788025668794,
      "grad_norm": 2.921875,
      "learning_rate": 4.13744090594017e-05,
      "loss": 0.9723,
      "step": 218500
    },
    {
      "epoch": 0.7658230731756897,
      "grad_norm": 2.9375,
      "learning_rate": 4.1373760030738e-05,
      "loss": 0.8484,
      "step": 218510
    },
    {
      "epoch": 0.7658581206825853,
      "grad_norm": 3.1875,
      "learning_rate": 4.13731110020743e-05,
      "loss": 0.9069,
      "step": 218520
    },
    {
      "epoch": 0.7658931681894808,
      "grad_norm": 2.515625,
      "learning_rate": 4.13724619734106e-05,
      "loss": 0.9033,
      "step": 218530
    },
    {
      "epoch": 0.7659282156963765,
      "grad_norm": 3.21875,
      "learning_rate": 4.1371812944746894e-05,
      "loss": 0.9289,
      "step": 218540
    },
    {
      "epoch": 0.765963263203272,
      "grad_norm": 3.1875,
      "learning_rate": 4.1371163916083195e-05,
      "loss": 0.9295,
      "step": 218550
    },
    {
      "epoch": 0.7659983107101677,
      "grad_norm": 3.359375,
      "learning_rate": 4.137051488741949e-05,
      "loss": 0.8834,
      "step": 218560
    },
    {
      "epoch": 0.7660333582170632,
      "grad_norm": 3.046875,
      "learning_rate": 4.136986585875579e-05,
      "loss": 0.7985,
      "step": 218570
    },
    {
      "epoch": 0.7660684057239588,
      "grad_norm": 2.984375,
      "learning_rate": 4.1369216830092086e-05,
      "loss": 0.9153,
      "step": 218580
    },
    {
      "epoch": 0.7661034532308544,
      "grad_norm": 3.09375,
      "learning_rate": 4.136856780142838e-05,
      "loss": 0.9063,
      "step": 218590
    },
    {
      "epoch": 0.76613850073775,
      "grad_norm": 2.421875,
      "learning_rate": 4.136791877276468e-05,
      "loss": 0.9271,
      "step": 218600
    },
    {
      "epoch": 0.7661735482446456,
      "grad_norm": 2.953125,
      "learning_rate": 4.1367269744100976e-05,
      "loss": 0.9587,
      "step": 218610
    },
    {
      "epoch": 0.7662085957515412,
      "grad_norm": 3.453125,
      "learning_rate": 4.136662071543728e-05,
      "loss": 0.8495,
      "step": 218620
    },
    {
      "epoch": 0.7662436432584369,
      "grad_norm": 2.671875,
      "learning_rate": 4.136597168677357e-05,
      "loss": 0.9258,
      "step": 218630
    },
    {
      "epoch": 0.7662786907653324,
      "grad_norm": 3.15625,
      "learning_rate": 4.1365322658109874e-05,
      "loss": 0.8976,
      "step": 218640
    },
    {
      "epoch": 0.766313738272228,
      "grad_norm": 2.875,
      "learning_rate": 4.136467362944617e-05,
      "loss": 0.8761,
      "step": 218650
    },
    {
      "epoch": 0.7663487857791236,
      "grad_norm": 2.515625,
      "learning_rate": 4.136402460078247e-05,
      "loss": 0.8374,
      "step": 218660
    },
    {
      "epoch": 0.7663838332860192,
      "grad_norm": 3.046875,
      "learning_rate": 4.1363375572118764e-05,
      "loss": 0.9318,
      "step": 218670
    },
    {
      "epoch": 0.7664188807929148,
      "grad_norm": 2.984375,
      "learning_rate": 4.1362726543455066e-05,
      "loss": 0.9216,
      "step": 218680
    },
    {
      "epoch": 0.7664539282998104,
      "grad_norm": 3.375,
      "learning_rate": 4.136207751479137e-05,
      "loss": 0.9321,
      "step": 218690
    },
    {
      "epoch": 0.7664889758067059,
      "grad_norm": 3.28125,
      "learning_rate": 4.136142848612766e-05,
      "loss": 1.0726,
      "step": 218700
    },
    {
      "epoch": 0.7665240233136016,
      "grad_norm": 3.078125,
      "learning_rate": 4.136077945746396e-05,
      "loss": 0.889,
      "step": 218710
    },
    {
      "epoch": 0.7665590708204972,
      "grad_norm": 2.703125,
      "learning_rate": 4.136013042880026e-05,
      "loss": 0.8002,
      "step": 218720
    },
    {
      "epoch": 0.7665941183273928,
      "grad_norm": 3.34375,
      "learning_rate": 4.135948140013656e-05,
      "loss": 0.8769,
      "step": 218730
    },
    {
      "epoch": 0.7666291658342884,
      "grad_norm": 2.5625,
      "learning_rate": 4.1358832371472854e-05,
      "loss": 0.8323,
      "step": 218740
    },
    {
      "epoch": 0.766664213341184,
      "grad_norm": 2.84375,
      "learning_rate": 4.1358183342809155e-05,
      "loss": 0.7728,
      "step": 218750
    },
    {
      "epoch": 0.7666992608480796,
      "grad_norm": 2.53125,
      "learning_rate": 4.135753431414545e-05,
      "loss": 0.8762,
      "step": 218760
    },
    {
      "epoch": 0.7667343083549751,
      "grad_norm": 3.390625,
      "learning_rate": 4.135688528548175e-05,
      "loss": 0.8948,
      "step": 218770
    },
    {
      "epoch": 0.7667693558618708,
      "grad_norm": 3.015625,
      "learning_rate": 4.1356236256818046e-05,
      "loss": 0.9075,
      "step": 218780
    },
    {
      "epoch": 0.7668044033687663,
      "grad_norm": 3.078125,
      "learning_rate": 4.135558722815435e-05,
      "loss": 0.8201,
      "step": 218790
    },
    {
      "epoch": 0.766839450875662,
      "grad_norm": 3.265625,
      "learning_rate": 4.135493819949064e-05,
      "loss": 0.9134,
      "step": 218800
    },
    {
      "epoch": 0.7668744983825576,
      "grad_norm": 3.453125,
      "learning_rate": 4.135428917082694e-05,
      "loss": 0.8242,
      "step": 218810
    },
    {
      "epoch": 0.7669095458894531,
      "grad_norm": 2.859375,
      "learning_rate": 4.1353640142163244e-05,
      "loss": 0.8423,
      "step": 218820
    },
    {
      "epoch": 0.7669445933963488,
      "grad_norm": 2.921875,
      "learning_rate": 4.135299111349954e-05,
      "loss": 0.9335,
      "step": 218830
    },
    {
      "epoch": 0.7669796409032443,
      "grad_norm": 3.390625,
      "learning_rate": 4.135234208483584e-05,
      "loss": 0.9617,
      "step": 218840
    },
    {
      "epoch": 0.76701468841014,
      "grad_norm": 3.234375,
      "learning_rate": 4.1351693056172135e-05,
      "loss": 0.9223,
      "step": 218850
    },
    {
      "epoch": 0.7670497359170355,
      "grad_norm": 3.03125,
      "learning_rate": 4.1351044027508436e-05,
      "loss": 0.8974,
      "step": 218860
    },
    {
      "epoch": 0.7670847834239312,
      "grad_norm": 2.828125,
      "learning_rate": 4.135039499884473e-05,
      "loss": 0.8987,
      "step": 218870
    },
    {
      "epoch": 0.7671198309308267,
      "grad_norm": 3.109375,
      "learning_rate": 4.134974597018103e-05,
      "loss": 0.9345,
      "step": 218880
    },
    {
      "epoch": 0.7671548784377223,
      "grad_norm": 3.171875,
      "learning_rate": 4.134909694151733e-05,
      "loss": 0.8188,
      "step": 218890
    },
    {
      "epoch": 0.7671899259446179,
      "grad_norm": 3.53125,
      "learning_rate": 4.134844791285363e-05,
      "loss": 0.9575,
      "step": 218900
    },
    {
      "epoch": 0.7672249734515135,
      "grad_norm": 3.15625,
      "learning_rate": 4.134779888418992e-05,
      "loss": 0.8943,
      "step": 218910
    },
    {
      "epoch": 0.7672600209584092,
      "grad_norm": 2.8125,
      "learning_rate": 4.1347149855526224e-05,
      "loss": 0.9193,
      "step": 218920
    },
    {
      "epoch": 0.7672950684653047,
      "grad_norm": 3.1875,
      "learning_rate": 4.134650082686252e-05,
      "loss": 0.8478,
      "step": 218930
    },
    {
      "epoch": 0.7673301159722004,
      "grad_norm": 2.734375,
      "learning_rate": 4.134585179819882e-05,
      "loss": 0.8775,
      "step": 218940
    },
    {
      "epoch": 0.7673651634790959,
      "grad_norm": 2.65625,
      "learning_rate": 4.1345202769535115e-05,
      "loss": 0.9547,
      "step": 218950
    },
    {
      "epoch": 0.7674002109859915,
      "grad_norm": 2.75,
      "learning_rate": 4.134455374087141e-05,
      "loss": 0.8748,
      "step": 218960
    },
    {
      "epoch": 0.7674352584928871,
      "grad_norm": 2.8125,
      "learning_rate": 4.134390471220771e-05,
      "loss": 0.8726,
      "step": 218970
    },
    {
      "epoch": 0.7674703059997827,
      "grad_norm": 2.90625,
      "learning_rate": 4.1343255683544006e-05,
      "loss": 0.9425,
      "step": 218980
    },
    {
      "epoch": 0.7675053535066783,
      "grad_norm": 2.5,
      "learning_rate": 4.134260665488031e-05,
      "loss": 0.8565,
      "step": 218990
    },
    {
      "epoch": 0.7675404010135739,
      "grad_norm": 2.8125,
      "learning_rate": 4.13419576262166e-05,
      "loss": 0.8401,
      "step": 219000
    },
    {
      "epoch": 0.7675754485204695,
      "grad_norm": 3.046875,
      "learning_rate": 4.13413085975529e-05,
      "loss": 0.9335,
      "step": 219010
    },
    {
      "epoch": 0.7676104960273651,
      "grad_norm": 3.265625,
      "learning_rate": 4.13406595688892e-05,
      "loss": 0.9735,
      "step": 219020
    },
    {
      "epoch": 0.7676455435342607,
      "grad_norm": 2.953125,
      "learning_rate": 4.13400105402255e-05,
      "loss": 0.8541,
      "step": 219030
    },
    {
      "epoch": 0.7676805910411563,
      "grad_norm": 2.578125,
      "learning_rate": 4.13393615115618e-05,
      "loss": 0.9183,
      "step": 219040
    },
    {
      "epoch": 0.7677156385480519,
      "grad_norm": 2.984375,
      "learning_rate": 4.1338712482898095e-05,
      "loss": 0.8891,
      "step": 219050
    },
    {
      "epoch": 0.7677506860549475,
      "grad_norm": 2.9375,
      "learning_rate": 4.1338063454234396e-05,
      "loss": 0.8014,
      "step": 219060
    },
    {
      "epoch": 0.7677857335618431,
      "grad_norm": 3.015625,
      "learning_rate": 4.133741442557069e-05,
      "loss": 0.8194,
      "step": 219070
    },
    {
      "epoch": 0.7678207810687386,
      "grad_norm": 3.234375,
      "learning_rate": 4.133676539690699e-05,
      "loss": 0.8273,
      "step": 219080
    },
    {
      "epoch": 0.7678558285756343,
      "grad_norm": 3.015625,
      "learning_rate": 4.133611636824329e-05,
      "loss": 0.8817,
      "step": 219090
    },
    {
      "epoch": 0.7678908760825298,
      "grad_norm": 3.1875,
      "learning_rate": 4.133546733957959e-05,
      "loss": 0.9163,
      "step": 219100
    },
    {
      "epoch": 0.7679259235894255,
      "grad_norm": 3.234375,
      "learning_rate": 4.133481831091588e-05,
      "loss": 0.9708,
      "step": 219110
    },
    {
      "epoch": 0.7679609710963211,
      "grad_norm": 2.53125,
      "learning_rate": 4.1334169282252184e-05,
      "loss": 0.9521,
      "step": 219120
    },
    {
      "epoch": 0.7679960186032166,
      "grad_norm": 3.125,
      "learning_rate": 4.133352025358848e-05,
      "loss": 0.9551,
      "step": 219130
    },
    {
      "epoch": 0.7680310661101123,
      "grad_norm": 3.03125,
      "learning_rate": 4.133287122492478e-05,
      "loss": 0.892,
      "step": 219140
    },
    {
      "epoch": 0.7680661136170078,
      "grad_norm": 3.21875,
      "learning_rate": 4.1332222196261075e-05,
      "loss": 0.8965,
      "step": 219150
    },
    {
      "epoch": 0.7681011611239035,
      "grad_norm": 2.859375,
      "learning_rate": 4.1331573167597376e-05,
      "loss": 0.8872,
      "step": 219160
    },
    {
      "epoch": 0.768136208630799,
      "grad_norm": 3.125,
      "learning_rate": 4.133092413893367e-05,
      "loss": 0.9086,
      "step": 219170
    },
    {
      "epoch": 0.7681712561376947,
      "grad_norm": 2.984375,
      "learning_rate": 4.133027511026997e-05,
      "loss": 0.8467,
      "step": 219180
    },
    {
      "epoch": 0.7682063036445902,
      "grad_norm": 2.609375,
      "learning_rate": 4.1329626081606274e-05,
      "loss": 0.8377,
      "step": 219190
    },
    {
      "epoch": 0.7682413511514858,
      "grad_norm": 3.078125,
      "learning_rate": 4.132897705294257e-05,
      "loss": 0.8599,
      "step": 219200
    },
    {
      "epoch": 0.7682763986583815,
      "grad_norm": 2.984375,
      "learning_rate": 4.132832802427887e-05,
      "loss": 0.8661,
      "step": 219210
    },
    {
      "epoch": 0.768311446165277,
      "grad_norm": 3.015625,
      "learning_rate": 4.1327678995615164e-05,
      "loss": 0.8741,
      "step": 219220
    },
    {
      "epoch": 0.7683464936721727,
      "grad_norm": 2.765625,
      "learning_rate": 4.1327029966951466e-05,
      "loss": 0.8607,
      "step": 219230
    },
    {
      "epoch": 0.7683815411790682,
      "grad_norm": 2.703125,
      "learning_rate": 4.132638093828776e-05,
      "loss": 0.9336,
      "step": 219240
    },
    {
      "epoch": 0.7684165886859639,
      "grad_norm": 3.109375,
      "learning_rate": 4.132573190962406e-05,
      "loss": 0.8513,
      "step": 219250
    },
    {
      "epoch": 0.7684516361928594,
      "grad_norm": 2.703125,
      "learning_rate": 4.1325082880960356e-05,
      "loss": 0.9324,
      "step": 219260
    },
    {
      "epoch": 0.768486683699755,
      "grad_norm": 2.765625,
      "learning_rate": 4.132443385229666e-05,
      "loss": 0.8852,
      "step": 219270
    },
    {
      "epoch": 0.7685217312066506,
      "grad_norm": 2.953125,
      "learning_rate": 4.132378482363295e-05,
      "loss": 0.8647,
      "step": 219280
    },
    {
      "epoch": 0.7685567787135462,
      "grad_norm": 2.640625,
      "learning_rate": 4.1323135794969254e-05,
      "loss": 0.8549,
      "step": 219290
    },
    {
      "epoch": 0.7685918262204418,
      "grad_norm": 2.890625,
      "learning_rate": 4.132248676630555e-05,
      "loss": 0.883,
      "step": 219300
    },
    {
      "epoch": 0.7686268737273374,
      "grad_norm": 2.71875,
      "learning_rate": 4.132183773764185e-05,
      "loss": 0.9099,
      "step": 219310
    },
    {
      "epoch": 0.768661921234233,
      "grad_norm": 2.703125,
      "learning_rate": 4.132118870897815e-05,
      "loss": 0.9755,
      "step": 219320
    },
    {
      "epoch": 0.7686969687411286,
      "grad_norm": 2.75,
      "learning_rate": 4.132053968031444e-05,
      "loss": 0.8838,
      "step": 219330
    },
    {
      "epoch": 0.7687320162480242,
      "grad_norm": 3.25,
      "learning_rate": 4.131989065165074e-05,
      "loss": 0.9287,
      "step": 219340
    },
    {
      "epoch": 0.7687670637549198,
      "grad_norm": 3.140625,
      "learning_rate": 4.1319241622987035e-05,
      "loss": 0.8637,
      "step": 219350
    },
    {
      "epoch": 0.7688021112618154,
      "grad_norm": 2.65625,
      "learning_rate": 4.1318592594323336e-05,
      "loss": 0.8897,
      "step": 219360
    },
    {
      "epoch": 0.768837158768711,
      "grad_norm": 2.703125,
      "learning_rate": 4.131794356565963e-05,
      "loss": 0.9469,
      "step": 219370
    },
    {
      "epoch": 0.7688722062756066,
      "grad_norm": 2.890625,
      "learning_rate": 4.131729453699593e-05,
      "loss": 0.8518,
      "step": 219380
    },
    {
      "epoch": 0.7689072537825021,
      "grad_norm": 3.28125,
      "learning_rate": 4.131664550833223e-05,
      "loss": 0.9117,
      "step": 219390
    },
    {
      "epoch": 0.7689423012893978,
      "grad_norm": 2.75,
      "learning_rate": 4.131599647966853e-05,
      "loss": 0.9591,
      "step": 219400
    },
    {
      "epoch": 0.7689773487962934,
      "grad_norm": 3.28125,
      "learning_rate": 4.131534745100483e-05,
      "loss": 0.819,
      "step": 219410
    },
    {
      "epoch": 0.769012396303189,
      "grad_norm": 3.0,
      "learning_rate": 4.1314698422341124e-05,
      "loss": 0.9078,
      "step": 219420
    },
    {
      "epoch": 0.7690474438100846,
      "grad_norm": 2.734375,
      "learning_rate": 4.1314049393677426e-05,
      "loss": 0.9046,
      "step": 219430
    },
    {
      "epoch": 0.7690824913169801,
      "grad_norm": 2.78125,
      "learning_rate": 4.131340036501372e-05,
      "loss": 0.9255,
      "step": 219440
    },
    {
      "epoch": 0.7691175388238758,
      "grad_norm": 2.421875,
      "learning_rate": 4.131275133635002e-05,
      "loss": 0.8444,
      "step": 219450
    },
    {
      "epoch": 0.7691525863307713,
      "grad_norm": 2.875,
      "learning_rate": 4.1312102307686316e-05,
      "loss": 0.9614,
      "step": 219460
    },
    {
      "epoch": 0.769187633837667,
      "grad_norm": 3.09375,
      "learning_rate": 4.131145327902262e-05,
      "loss": 0.916,
      "step": 219470
    },
    {
      "epoch": 0.7692226813445625,
      "grad_norm": 3.515625,
      "learning_rate": 4.131080425035891e-05,
      "loss": 0.9527,
      "step": 219480
    },
    {
      "epoch": 0.7692577288514582,
      "grad_norm": 2.859375,
      "learning_rate": 4.1310155221695214e-05,
      "loss": 0.8716,
      "step": 219490
    },
    {
      "epoch": 0.7692927763583538,
      "grad_norm": 2.984375,
      "learning_rate": 4.130950619303151e-05,
      "loss": 0.8139,
      "step": 219500
    },
    {
      "epoch": 0.7693278238652493,
      "grad_norm": 2.734375,
      "learning_rate": 4.130885716436781e-05,
      "loss": 0.901,
      "step": 219510
    },
    {
      "epoch": 0.769362871372145,
      "grad_norm": 2.96875,
      "learning_rate": 4.1308208135704104e-05,
      "loss": 0.9864,
      "step": 219520
    },
    {
      "epoch": 0.7693979188790405,
      "grad_norm": 3.1875,
      "learning_rate": 4.1307559107040406e-05,
      "loss": 0.8603,
      "step": 219530
    },
    {
      "epoch": 0.7694329663859362,
      "grad_norm": 2.921875,
      "learning_rate": 4.13069100783767e-05,
      "loss": 0.852,
      "step": 219540
    },
    {
      "epoch": 0.7694680138928317,
      "grad_norm": 3.09375,
      "learning_rate": 4.1306261049713e-05,
      "loss": 0.9774,
      "step": 219550
    },
    {
      "epoch": 0.7695030613997274,
      "grad_norm": 3.15625,
      "learning_rate": 4.13056120210493e-05,
      "loss": 0.8372,
      "step": 219560
    },
    {
      "epoch": 0.7695381089066229,
      "grad_norm": 3.078125,
      "learning_rate": 4.13049629923856e-05,
      "loss": 0.8839,
      "step": 219570
    },
    {
      "epoch": 0.7695731564135185,
      "grad_norm": 2.765625,
      "learning_rate": 4.13043139637219e-05,
      "loss": 0.8612,
      "step": 219580
    },
    {
      "epoch": 0.7696082039204141,
      "grad_norm": 3.546875,
      "learning_rate": 4.1303664935058194e-05,
      "loss": 0.931,
      "step": 219590
    },
    {
      "epoch": 0.7696432514273097,
      "grad_norm": 3.078125,
      "learning_rate": 4.1303015906394495e-05,
      "loss": 0.9343,
      "step": 219600
    },
    {
      "epoch": 0.7696782989342054,
      "grad_norm": 3.203125,
      "learning_rate": 4.130236687773079e-05,
      "loss": 0.9062,
      "step": 219610
    },
    {
      "epoch": 0.7697133464411009,
      "grad_norm": 2.984375,
      "learning_rate": 4.130171784906709e-05,
      "loss": 0.9656,
      "step": 219620
    },
    {
      "epoch": 0.7697483939479965,
      "grad_norm": 2.75,
      "learning_rate": 4.1301068820403386e-05,
      "loss": 0.8816,
      "step": 219630
    },
    {
      "epoch": 0.7697834414548921,
      "grad_norm": 3.4375,
      "learning_rate": 4.130041979173969e-05,
      "loss": 0.9598,
      "step": 219640
    },
    {
      "epoch": 0.7698184889617877,
      "grad_norm": 3.0,
      "learning_rate": 4.129977076307598e-05,
      "loss": 0.8656,
      "step": 219650
    },
    {
      "epoch": 0.7698535364686833,
      "grad_norm": 3.078125,
      "learning_rate": 4.129912173441228e-05,
      "loss": 0.9084,
      "step": 219660
    },
    {
      "epoch": 0.7698885839755789,
      "grad_norm": 2.984375,
      "learning_rate": 4.129847270574858e-05,
      "loss": 0.9238,
      "step": 219670
    },
    {
      "epoch": 0.7699236314824744,
      "grad_norm": 3.0,
      "learning_rate": 4.129782367708488e-05,
      "loss": 0.8105,
      "step": 219680
    },
    {
      "epoch": 0.7699586789893701,
      "grad_norm": 3.359375,
      "learning_rate": 4.129717464842118e-05,
      "loss": 0.9961,
      "step": 219690
    },
    {
      "epoch": 0.7699937264962657,
      "grad_norm": 2.6875,
      "learning_rate": 4.1296525619757475e-05,
      "loss": 0.9541,
      "step": 219700
    },
    {
      "epoch": 0.7700287740031613,
      "grad_norm": 2.75,
      "learning_rate": 4.129587659109377e-05,
      "loss": 0.8661,
      "step": 219710
    },
    {
      "epoch": 0.7700638215100569,
      "grad_norm": 3.125,
      "learning_rate": 4.1295227562430064e-05,
      "loss": 0.9011,
      "step": 219720
    },
    {
      "epoch": 0.7700988690169525,
      "grad_norm": 3.171875,
      "learning_rate": 4.1294578533766366e-05,
      "loss": 0.8519,
      "step": 219730
    },
    {
      "epoch": 0.7701339165238481,
      "grad_norm": 2.828125,
      "learning_rate": 4.129392950510266e-05,
      "loss": 0.8527,
      "step": 219740
    },
    {
      "epoch": 0.7701689640307436,
      "grad_norm": 2.609375,
      "learning_rate": 4.129328047643896e-05,
      "loss": 0.9035,
      "step": 219750
    },
    {
      "epoch": 0.7702040115376393,
      "grad_norm": 3.609375,
      "learning_rate": 4.1292631447775256e-05,
      "loss": 0.9279,
      "step": 219760
    },
    {
      "epoch": 0.7702390590445348,
      "grad_norm": 3.21875,
      "learning_rate": 4.129198241911156e-05,
      "loss": 0.8902,
      "step": 219770
    },
    {
      "epoch": 0.7702741065514305,
      "grad_norm": 2.703125,
      "learning_rate": 4.129133339044786e-05,
      "loss": 0.898,
      "step": 219780
    },
    {
      "epoch": 0.770309154058326,
      "grad_norm": 2.921875,
      "learning_rate": 4.1290684361784154e-05,
      "loss": 0.8937,
      "step": 219790
    },
    {
      "epoch": 0.7703442015652217,
      "grad_norm": 3.0625,
      "learning_rate": 4.1290035333120455e-05,
      "loss": 0.8747,
      "step": 219800
    },
    {
      "epoch": 0.7703792490721173,
      "grad_norm": 2.90625,
      "learning_rate": 4.128938630445675e-05,
      "loss": 0.929,
      "step": 219810
    },
    {
      "epoch": 0.7704142965790128,
      "grad_norm": 2.9375,
      "learning_rate": 4.128873727579305e-05,
      "loss": 0.8539,
      "step": 219820
    },
    {
      "epoch": 0.7704493440859085,
      "grad_norm": 3.1875,
      "learning_rate": 4.1288088247129346e-05,
      "loss": 0.9868,
      "step": 219830
    },
    {
      "epoch": 0.770484391592804,
      "grad_norm": 2.4375,
      "learning_rate": 4.128743921846565e-05,
      "loss": 0.8978,
      "step": 219840
    },
    {
      "epoch": 0.7705194390996997,
      "grad_norm": 2.734375,
      "learning_rate": 4.128679018980194e-05,
      "loss": 0.9272,
      "step": 219850
    },
    {
      "epoch": 0.7705544866065952,
      "grad_norm": 3.484375,
      "learning_rate": 4.128614116113824e-05,
      "loss": 0.8885,
      "step": 219860
    },
    {
      "epoch": 0.7705895341134908,
      "grad_norm": 2.828125,
      "learning_rate": 4.128549213247454e-05,
      "loss": 0.9058,
      "step": 219870
    },
    {
      "epoch": 0.7706245816203864,
      "grad_norm": 2.640625,
      "learning_rate": 4.128484310381084e-05,
      "loss": 0.9261,
      "step": 219880
    },
    {
      "epoch": 0.770659629127282,
      "grad_norm": 2.453125,
      "learning_rate": 4.1284194075147134e-05,
      "loss": 0.9592,
      "step": 219890
    },
    {
      "epoch": 0.7706946766341777,
      "grad_norm": 3.125,
      "learning_rate": 4.1283545046483435e-05,
      "loss": 0.8378,
      "step": 219900
    },
    {
      "epoch": 0.7707297241410732,
      "grad_norm": 3.328125,
      "learning_rate": 4.128289601781973e-05,
      "loss": 0.9553,
      "step": 219910
    },
    {
      "epoch": 0.7707647716479689,
      "grad_norm": 2.9375,
      "learning_rate": 4.128224698915603e-05,
      "loss": 0.883,
      "step": 219920
    },
    {
      "epoch": 0.7707998191548644,
      "grad_norm": 2.9375,
      "learning_rate": 4.128159796049233e-05,
      "loss": 0.9726,
      "step": 219930
    },
    {
      "epoch": 0.77083486666176,
      "grad_norm": 2.953125,
      "learning_rate": 4.128094893182863e-05,
      "loss": 0.8073,
      "step": 219940
    },
    {
      "epoch": 0.7708699141686556,
      "grad_norm": 3.015625,
      "learning_rate": 4.128029990316493e-05,
      "loss": 0.8734,
      "step": 219950
    },
    {
      "epoch": 0.7709049616755512,
      "grad_norm": 2.265625,
      "learning_rate": 4.127965087450122e-05,
      "loss": 0.9107,
      "step": 219960
    },
    {
      "epoch": 0.7709400091824468,
      "grad_norm": 3.234375,
      "learning_rate": 4.1279001845837525e-05,
      "loss": 0.8885,
      "step": 219970
    },
    {
      "epoch": 0.7709750566893424,
      "grad_norm": 2.578125,
      "learning_rate": 4.127835281717382e-05,
      "loss": 0.8388,
      "step": 219980
    },
    {
      "epoch": 0.771010104196238,
      "grad_norm": 2.828125,
      "learning_rate": 4.127770378851012e-05,
      "loss": 0.8551,
      "step": 219990
    },
    {
      "epoch": 0.7710451517031336,
      "grad_norm": 3.15625,
      "learning_rate": 4.1277054759846415e-05,
      "loss": 0.8976,
      "step": 220000
    },
    {
      "epoch": 0.7710451517031336,
      "eval_loss": 0.8406285047531128,
      "eval_runtime": 552.59,
      "eval_samples_per_second": 688.46,
      "eval_steps_per_second": 57.372,
      "step": 220000
    },
    {
      "epoch": 0.7710801992100292,
      "grad_norm": 2.828125,
      "learning_rate": 4.127640573118272e-05,
      "loss": 0.8789,
      "step": 220010
    },
    {
      "epoch": 0.7711152467169248,
      "grad_norm": 3.375,
      "learning_rate": 4.127575670251901e-05,
      "loss": 0.9325,
      "step": 220020
    },
    {
      "epoch": 0.7711502942238204,
      "grad_norm": 2.796875,
      "learning_rate": 4.127510767385531e-05,
      "loss": 0.8911,
      "step": 220030
    },
    {
      "epoch": 0.771185341730716,
      "grad_norm": 2.4375,
      "learning_rate": 4.127445864519161e-05,
      "loss": 0.8163,
      "step": 220040
    },
    {
      "epoch": 0.7712203892376116,
      "grad_norm": 2.75,
      "learning_rate": 4.127380961652791e-05,
      "loss": 0.9263,
      "step": 220050
    },
    {
      "epoch": 0.7712554367445071,
      "grad_norm": 3.1875,
      "learning_rate": 4.127316058786421e-05,
      "loss": 0.9386,
      "step": 220060
    },
    {
      "epoch": 0.7712904842514028,
      "grad_norm": 3.03125,
      "learning_rate": 4.1272511559200505e-05,
      "loss": 0.8943,
      "step": 220070
    },
    {
      "epoch": 0.7713255317582983,
      "grad_norm": 2.828125,
      "learning_rate": 4.12718625305368e-05,
      "loss": 0.9483,
      "step": 220080
    },
    {
      "epoch": 0.771360579265194,
      "grad_norm": 3.046875,
      "learning_rate": 4.1271213501873094e-05,
      "loss": 0.9295,
      "step": 220090
    },
    {
      "epoch": 0.7713956267720896,
      "grad_norm": 2.734375,
      "learning_rate": 4.1270564473209395e-05,
      "loss": 0.8962,
      "step": 220100
    },
    {
      "epoch": 0.7714306742789852,
      "grad_norm": 2.765625,
      "learning_rate": 4.126991544454569e-05,
      "loss": 0.8068,
      "step": 220110
    },
    {
      "epoch": 0.7714657217858808,
      "grad_norm": 2.9375,
      "learning_rate": 4.126926641588199e-05,
      "loss": 0.8824,
      "step": 220120
    },
    {
      "epoch": 0.7715007692927763,
      "grad_norm": 3.03125,
      "learning_rate": 4.1268617387218286e-05,
      "loss": 0.8839,
      "step": 220130
    },
    {
      "epoch": 0.771535816799672,
      "grad_norm": 3.015625,
      "learning_rate": 4.126796835855459e-05,
      "loss": 0.8502,
      "step": 220140
    },
    {
      "epoch": 0.7715708643065675,
      "grad_norm": 3.390625,
      "learning_rate": 4.126731932989089e-05,
      "loss": 0.902,
      "step": 220150
    },
    {
      "epoch": 0.7716059118134632,
      "grad_norm": 2.59375,
      "learning_rate": 4.126667030122718e-05,
      "loss": 0.8715,
      "step": 220160
    },
    {
      "epoch": 0.7716409593203587,
      "grad_norm": 2.890625,
      "learning_rate": 4.1266021272563485e-05,
      "loss": 0.8981,
      "step": 220170
    },
    {
      "epoch": 0.7716760068272543,
      "grad_norm": 2.859375,
      "learning_rate": 4.126537224389978e-05,
      "loss": 0.9296,
      "step": 220180
    },
    {
      "epoch": 0.77171105433415,
      "grad_norm": 2.578125,
      "learning_rate": 4.126472321523608e-05,
      "loss": 0.9298,
      "step": 220190
    },
    {
      "epoch": 0.7717461018410455,
      "grad_norm": 2.75,
      "learning_rate": 4.1264074186572375e-05,
      "loss": 0.9364,
      "step": 220200
    },
    {
      "epoch": 0.7717811493479412,
      "grad_norm": 3.25,
      "learning_rate": 4.126342515790868e-05,
      "loss": 0.8798,
      "step": 220210
    },
    {
      "epoch": 0.7718161968548367,
      "grad_norm": 3.1875,
      "learning_rate": 4.126277612924497e-05,
      "loss": 0.9089,
      "step": 220220
    },
    {
      "epoch": 0.7718512443617324,
      "grad_norm": 2.828125,
      "learning_rate": 4.126212710058127e-05,
      "loss": 0.8911,
      "step": 220230
    },
    {
      "epoch": 0.7718862918686279,
      "grad_norm": 2.6875,
      "learning_rate": 4.126147807191757e-05,
      "loss": 0.9544,
      "step": 220240
    },
    {
      "epoch": 0.7719213393755235,
      "grad_norm": 2.40625,
      "learning_rate": 4.126082904325387e-05,
      "loss": 0.81,
      "step": 220250
    },
    {
      "epoch": 0.7719563868824191,
      "grad_norm": 2.828125,
      "learning_rate": 4.126018001459016e-05,
      "loss": 0.9031,
      "step": 220260
    },
    {
      "epoch": 0.7719914343893147,
      "grad_norm": 3.296875,
      "learning_rate": 4.1259530985926465e-05,
      "loss": 0.9191,
      "step": 220270
    },
    {
      "epoch": 0.7720264818962103,
      "grad_norm": 2.875,
      "learning_rate": 4.1258881957262766e-05,
      "loss": 0.9503,
      "step": 220280
    },
    {
      "epoch": 0.7720615294031059,
      "grad_norm": 2.359375,
      "learning_rate": 4.125823292859906e-05,
      "loss": 0.9079,
      "step": 220290
    },
    {
      "epoch": 0.7720965769100016,
      "grad_norm": 2.984375,
      "learning_rate": 4.125758389993536e-05,
      "loss": 0.8648,
      "step": 220300
    },
    {
      "epoch": 0.7721316244168971,
      "grad_norm": 3.34375,
      "learning_rate": 4.125693487127166e-05,
      "loss": 0.846,
      "step": 220310
    },
    {
      "epoch": 0.7721666719237927,
      "grad_norm": 2.84375,
      "learning_rate": 4.125628584260796e-05,
      "loss": 0.8471,
      "step": 220320
    },
    {
      "epoch": 0.7722017194306883,
      "grad_norm": 2.734375,
      "learning_rate": 4.125563681394425e-05,
      "loss": 0.7965,
      "step": 220330
    },
    {
      "epoch": 0.7722367669375839,
      "grad_norm": 3.328125,
      "learning_rate": 4.1254987785280554e-05,
      "loss": 0.8645,
      "step": 220340
    },
    {
      "epoch": 0.7722718144444795,
      "grad_norm": 2.65625,
      "learning_rate": 4.125433875661685e-05,
      "loss": 0.8718,
      "step": 220350
    },
    {
      "epoch": 0.7723068619513751,
      "grad_norm": 2.28125,
      "learning_rate": 4.125368972795315e-05,
      "loss": 0.8257,
      "step": 220360
    },
    {
      "epoch": 0.7723419094582706,
      "grad_norm": 3.125,
      "learning_rate": 4.1253040699289445e-05,
      "loss": 0.906,
      "step": 220370
    },
    {
      "epoch": 0.7723769569651663,
      "grad_norm": 2.953125,
      "learning_rate": 4.1252391670625746e-05,
      "loss": 0.8354,
      "step": 220380
    },
    {
      "epoch": 0.7724120044720619,
      "grad_norm": 2.765625,
      "learning_rate": 4.125174264196204e-05,
      "loss": 0.9108,
      "step": 220390
    },
    {
      "epoch": 0.7724470519789575,
      "grad_norm": 2.828125,
      "learning_rate": 4.125109361329834e-05,
      "loss": 0.8059,
      "step": 220400
    },
    {
      "epoch": 0.7724820994858531,
      "grad_norm": 2.703125,
      "learning_rate": 4.125044458463464e-05,
      "loss": 0.858,
      "step": 220410
    },
    {
      "epoch": 0.7725171469927486,
      "grad_norm": 2.875,
      "learning_rate": 4.124979555597094e-05,
      "loss": 0.8948,
      "step": 220420
    },
    {
      "epoch": 0.7725521944996443,
      "grad_norm": 3.015625,
      "learning_rate": 4.124914652730724e-05,
      "loss": 0.7963,
      "step": 220430
    },
    {
      "epoch": 0.7725872420065398,
      "grad_norm": 3.0,
      "learning_rate": 4.1248497498643534e-05,
      "loss": 1.0266,
      "step": 220440
    },
    {
      "epoch": 0.7726222895134355,
      "grad_norm": 3.1875,
      "learning_rate": 4.1247848469979835e-05,
      "loss": 0.891,
      "step": 220450
    },
    {
      "epoch": 0.772657337020331,
      "grad_norm": 2.453125,
      "learning_rate": 4.124719944131612e-05,
      "loss": 0.9128,
      "step": 220460
    },
    {
      "epoch": 0.7726923845272267,
      "grad_norm": 2.46875,
      "learning_rate": 4.1246550412652425e-05,
      "loss": 0.8978,
      "step": 220470
    },
    {
      "epoch": 0.7727274320341223,
      "grad_norm": 2.8125,
      "learning_rate": 4.124590138398872e-05,
      "loss": 0.8236,
      "step": 220480
    },
    {
      "epoch": 0.7727624795410178,
      "grad_norm": 2.625,
      "learning_rate": 4.124525235532502e-05,
      "loss": 0.9664,
      "step": 220490
    },
    {
      "epoch": 0.7727975270479135,
      "grad_norm": 2.953125,
      "learning_rate": 4.1244603326661315e-05,
      "loss": 0.9149,
      "step": 220500
    },
    {
      "epoch": 0.772832574554809,
      "grad_norm": 2.59375,
      "learning_rate": 4.124395429799762e-05,
      "loss": 0.9749,
      "step": 220510
    },
    {
      "epoch": 0.7728676220617047,
      "grad_norm": 3.34375,
      "learning_rate": 4.124330526933392e-05,
      "loss": 0.8284,
      "step": 220520
    },
    {
      "epoch": 0.7729026695686002,
      "grad_norm": 3.0625,
      "learning_rate": 4.124265624067021e-05,
      "loss": 0.8695,
      "step": 220530
    },
    {
      "epoch": 0.7729377170754959,
      "grad_norm": 3.203125,
      "learning_rate": 4.1242007212006514e-05,
      "loss": 0.8489,
      "step": 220540
    },
    {
      "epoch": 0.7729727645823914,
      "grad_norm": 3.375,
      "learning_rate": 4.124135818334281e-05,
      "loss": 0.9544,
      "step": 220550
    },
    {
      "epoch": 0.773007812089287,
      "grad_norm": 3.484375,
      "learning_rate": 4.124070915467911e-05,
      "loss": 0.9926,
      "step": 220560
    },
    {
      "epoch": 0.7730428595961826,
      "grad_norm": 3.203125,
      "learning_rate": 4.1240060126015405e-05,
      "loss": 0.8789,
      "step": 220570
    },
    {
      "epoch": 0.7730779071030782,
      "grad_norm": 3.3125,
      "learning_rate": 4.1239411097351706e-05,
      "loss": 0.9327,
      "step": 220580
    },
    {
      "epoch": 0.7731129546099739,
      "grad_norm": 2.8125,
      "learning_rate": 4.1238762068688e-05,
      "loss": 0.7668,
      "step": 220590
    },
    {
      "epoch": 0.7731480021168694,
      "grad_norm": 2.734375,
      "learning_rate": 4.12381130400243e-05,
      "loss": 0.9016,
      "step": 220600
    },
    {
      "epoch": 0.773183049623765,
      "grad_norm": 2.9375,
      "learning_rate": 4.12374640113606e-05,
      "loss": 0.9635,
      "step": 220610
    },
    {
      "epoch": 0.7732180971306606,
      "grad_norm": 2.703125,
      "learning_rate": 4.12368149826969e-05,
      "loss": 0.8823,
      "step": 220620
    },
    {
      "epoch": 0.7732531446375562,
      "grad_norm": 2.828125,
      "learning_rate": 4.123616595403319e-05,
      "loss": 0.9177,
      "step": 220630
    },
    {
      "epoch": 0.7732881921444518,
      "grad_norm": 2.984375,
      "learning_rate": 4.1235516925369494e-05,
      "loss": 0.878,
      "step": 220640
    },
    {
      "epoch": 0.7733232396513474,
      "grad_norm": 2.796875,
      "learning_rate": 4.1234867896705795e-05,
      "loss": 0.9342,
      "step": 220650
    },
    {
      "epoch": 0.773358287158243,
      "grad_norm": 2.59375,
      "learning_rate": 4.123421886804209e-05,
      "loss": 0.9019,
      "step": 220660
    },
    {
      "epoch": 0.7733933346651386,
      "grad_norm": 2.96875,
      "learning_rate": 4.123356983937839e-05,
      "loss": 0.9738,
      "step": 220670
    },
    {
      "epoch": 0.7734283821720342,
      "grad_norm": 3.15625,
      "learning_rate": 4.1232920810714686e-05,
      "loss": 0.9579,
      "step": 220680
    },
    {
      "epoch": 0.7734634296789298,
      "grad_norm": 3.21875,
      "learning_rate": 4.123227178205099e-05,
      "loss": 0.8853,
      "step": 220690
    },
    {
      "epoch": 0.7734984771858254,
      "grad_norm": 2.765625,
      "learning_rate": 4.123162275338728e-05,
      "loss": 0.8484,
      "step": 220700
    },
    {
      "epoch": 0.773533524692721,
      "grad_norm": 2.734375,
      "learning_rate": 4.1230973724723583e-05,
      "loss": 0.8553,
      "step": 220710
    },
    {
      "epoch": 0.7735685721996166,
      "grad_norm": 2.703125,
      "learning_rate": 4.123032469605988e-05,
      "loss": 0.8804,
      "step": 220720
    },
    {
      "epoch": 0.7736036197065121,
      "grad_norm": 2.828125,
      "learning_rate": 4.122967566739618e-05,
      "loss": 0.9735,
      "step": 220730
    },
    {
      "epoch": 0.7736386672134078,
      "grad_norm": 2.671875,
      "learning_rate": 4.1229026638732474e-05,
      "loss": 0.8711,
      "step": 220740
    },
    {
      "epoch": 0.7736737147203033,
      "grad_norm": 2.984375,
      "learning_rate": 4.1228377610068775e-05,
      "loss": 0.8297,
      "step": 220750
    },
    {
      "epoch": 0.773708762227199,
      "grad_norm": 2.78125,
      "learning_rate": 4.122772858140507e-05,
      "loss": 0.7702,
      "step": 220760
    },
    {
      "epoch": 0.7737438097340945,
      "grad_norm": 3.03125,
      "learning_rate": 4.122707955274137e-05,
      "loss": 0.8155,
      "step": 220770
    },
    {
      "epoch": 0.7737788572409902,
      "grad_norm": 2.8125,
      "learning_rate": 4.1226430524077666e-05,
      "loss": 0.8923,
      "step": 220780
    },
    {
      "epoch": 0.7738139047478858,
      "grad_norm": 2.5625,
      "learning_rate": 4.122578149541397e-05,
      "loss": 0.8948,
      "step": 220790
    },
    {
      "epoch": 0.7738489522547813,
      "grad_norm": 2.96875,
      "learning_rate": 4.122513246675027e-05,
      "loss": 0.8993,
      "step": 220800
    },
    {
      "epoch": 0.773883999761677,
      "grad_norm": 3.203125,
      "learning_rate": 4.1224483438086563e-05,
      "loss": 0.954,
      "step": 220810
    },
    {
      "epoch": 0.7739190472685725,
      "grad_norm": 2.4375,
      "learning_rate": 4.1223834409422865e-05,
      "loss": 0.8734,
      "step": 220820
    },
    {
      "epoch": 0.7739540947754682,
      "grad_norm": 2.90625,
      "learning_rate": 4.122318538075916e-05,
      "loss": 0.8505,
      "step": 220830
    },
    {
      "epoch": 0.7739891422823637,
      "grad_norm": 2.828125,
      "learning_rate": 4.1222536352095454e-05,
      "loss": 0.9916,
      "step": 220840
    },
    {
      "epoch": 0.7740241897892594,
      "grad_norm": 2.890625,
      "learning_rate": 4.122188732343175e-05,
      "loss": 0.8634,
      "step": 220850
    },
    {
      "epoch": 0.7740592372961549,
      "grad_norm": 2.921875,
      "learning_rate": 4.122123829476805e-05,
      "loss": 0.9554,
      "step": 220860
    },
    {
      "epoch": 0.7740942848030505,
      "grad_norm": 2.625,
      "learning_rate": 4.1220589266104345e-05,
      "loss": 0.8373,
      "step": 220870
    },
    {
      "epoch": 0.7741293323099462,
      "grad_norm": 2.765625,
      "learning_rate": 4.1219940237440646e-05,
      "loss": 0.8953,
      "step": 220880
    },
    {
      "epoch": 0.7741643798168417,
      "grad_norm": 2.984375,
      "learning_rate": 4.121929120877695e-05,
      "loss": 0.8626,
      "step": 220890
    },
    {
      "epoch": 0.7741994273237374,
      "grad_norm": 2.734375,
      "learning_rate": 4.121864218011324e-05,
      "loss": 0.8658,
      "step": 220900
    },
    {
      "epoch": 0.7742344748306329,
      "grad_norm": 3.0625,
      "learning_rate": 4.1217993151449543e-05,
      "loss": 0.9358,
      "step": 220910
    },
    {
      "epoch": 0.7742695223375285,
      "grad_norm": 2.8125,
      "learning_rate": 4.121734412278584e-05,
      "loss": 0.8385,
      "step": 220920
    },
    {
      "epoch": 0.7743045698444241,
      "grad_norm": 3.140625,
      "learning_rate": 4.121669509412214e-05,
      "loss": 1.0309,
      "step": 220930
    },
    {
      "epoch": 0.7743396173513197,
      "grad_norm": 2.453125,
      "learning_rate": 4.1216046065458434e-05,
      "loss": 0.9052,
      "step": 220940
    },
    {
      "epoch": 0.7743746648582153,
      "grad_norm": 3.0625,
      "learning_rate": 4.1215397036794735e-05,
      "loss": 0.8022,
      "step": 220950
    },
    {
      "epoch": 0.7744097123651109,
      "grad_norm": 2.859375,
      "learning_rate": 4.121474800813103e-05,
      "loss": 0.9534,
      "step": 220960
    },
    {
      "epoch": 0.7744447598720064,
      "grad_norm": 3.109375,
      "learning_rate": 4.121409897946733e-05,
      "loss": 0.8584,
      "step": 220970
    },
    {
      "epoch": 0.7744798073789021,
      "grad_norm": 2.703125,
      "learning_rate": 4.1213449950803626e-05,
      "loss": 0.8984,
      "step": 220980
    },
    {
      "epoch": 0.7745148548857977,
      "grad_norm": 2.96875,
      "learning_rate": 4.121280092213993e-05,
      "loss": 0.8483,
      "step": 220990
    },
    {
      "epoch": 0.7745499023926933,
      "grad_norm": 2.875,
      "learning_rate": 4.121215189347622e-05,
      "loss": 0.9217,
      "step": 221000
    },
    {
      "epoch": 0.7745849498995889,
      "grad_norm": 2.984375,
      "learning_rate": 4.1211502864812523e-05,
      "loss": 0.9038,
      "step": 221010
    },
    {
      "epoch": 0.7746199974064845,
      "grad_norm": 2.703125,
      "learning_rate": 4.1210853836148825e-05,
      "loss": 0.8819,
      "step": 221020
    },
    {
      "epoch": 0.7746550449133801,
      "grad_norm": 2.84375,
      "learning_rate": 4.121020480748512e-05,
      "loss": 0.9194,
      "step": 221030
    },
    {
      "epoch": 0.7746900924202756,
      "grad_norm": 3.15625,
      "learning_rate": 4.120955577882142e-05,
      "loss": 0.8937,
      "step": 221040
    },
    {
      "epoch": 0.7747251399271713,
      "grad_norm": 3.171875,
      "learning_rate": 4.1208906750157715e-05,
      "loss": 0.8236,
      "step": 221050
    },
    {
      "epoch": 0.7747601874340668,
      "grad_norm": 3.09375,
      "learning_rate": 4.120825772149402e-05,
      "loss": 0.9456,
      "step": 221060
    },
    {
      "epoch": 0.7747952349409625,
      "grad_norm": 2.671875,
      "learning_rate": 4.120760869283031e-05,
      "loss": 0.8932,
      "step": 221070
    },
    {
      "epoch": 0.7748302824478581,
      "grad_norm": 3.265625,
      "learning_rate": 4.120695966416661e-05,
      "loss": 0.863,
      "step": 221080
    },
    {
      "epoch": 0.7748653299547537,
      "grad_norm": 3.21875,
      "learning_rate": 4.120631063550291e-05,
      "loss": 0.9241,
      "step": 221090
    },
    {
      "epoch": 0.7749003774616493,
      "grad_norm": 3.0625,
      "learning_rate": 4.120566160683921e-05,
      "loss": 0.9341,
      "step": 221100
    },
    {
      "epoch": 0.7749354249685448,
      "grad_norm": 3.15625,
      "learning_rate": 4.1205012578175503e-05,
      "loss": 0.9179,
      "step": 221110
    },
    {
      "epoch": 0.7749704724754405,
      "grad_norm": 2.859375,
      "learning_rate": 4.1204363549511805e-05,
      "loss": 0.899,
      "step": 221120
    },
    {
      "epoch": 0.775005519982336,
      "grad_norm": 3.109375,
      "learning_rate": 4.12037145208481e-05,
      "loss": 0.9922,
      "step": 221130
    },
    {
      "epoch": 0.7750405674892317,
      "grad_norm": 3.0625,
      "learning_rate": 4.12030654921844e-05,
      "loss": 0.9285,
      "step": 221140
    },
    {
      "epoch": 0.7750756149961272,
      "grad_norm": 3.078125,
      "learning_rate": 4.12024164635207e-05,
      "loss": 0.8867,
      "step": 221150
    },
    {
      "epoch": 0.7751106625030229,
      "grad_norm": 2.84375,
      "learning_rate": 4.1201767434857e-05,
      "loss": 0.9437,
      "step": 221160
    },
    {
      "epoch": 0.7751457100099185,
      "grad_norm": 3.0,
      "learning_rate": 4.12011184061933e-05,
      "loss": 0.9028,
      "step": 221170
    },
    {
      "epoch": 0.775180757516814,
      "grad_norm": 2.640625,
      "learning_rate": 4.120046937752959e-05,
      "loss": 0.8666,
      "step": 221180
    },
    {
      "epoch": 0.7752158050237097,
      "grad_norm": 3.296875,
      "learning_rate": 4.1199820348865894e-05,
      "loss": 0.9894,
      "step": 221190
    },
    {
      "epoch": 0.7752508525306052,
      "grad_norm": 2.984375,
      "learning_rate": 4.119917132020219e-05,
      "loss": 0.8633,
      "step": 221200
    },
    {
      "epoch": 0.7752859000375009,
      "grad_norm": 3.125,
      "learning_rate": 4.1198522291538483e-05,
      "loss": 0.8653,
      "step": 221210
    },
    {
      "epoch": 0.7753209475443964,
      "grad_norm": 2.765625,
      "learning_rate": 4.119787326287478e-05,
      "loss": 0.8397,
      "step": 221220
    },
    {
      "epoch": 0.775355995051292,
      "grad_norm": 2.421875,
      "learning_rate": 4.119722423421108e-05,
      "loss": 0.9189,
      "step": 221230
    },
    {
      "epoch": 0.7753910425581876,
      "grad_norm": 3.078125,
      "learning_rate": 4.119657520554738e-05,
      "loss": 0.8937,
      "step": 221240
    },
    {
      "epoch": 0.7754260900650832,
      "grad_norm": 2.890625,
      "learning_rate": 4.1195926176883675e-05,
      "loss": 0.8872,
      "step": 221250
    },
    {
      "epoch": 0.7754611375719788,
      "grad_norm": 2.953125,
      "learning_rate": 4.119527714821998e-05,
      "loss": 0.8851,
      "step": 221260
    },
    {
      "epoch": 0.7754961850788744,
      "grad_norm": 3.296875,
      "learning_rate": 4.119462811955627e-05,
      "loss": 0.9141,
      "step": 221270
    },
    {
      "epoch": 0.7755312325857701,
      "grad_norm": 3.25,
      "learning_rate": 4.119397909089257e-05,
      "loss": 0.9331,
      "step": 221280
    },
    {
      "epoch": 0.7755662800926656,
      "grad_norm": 3.125,
      "learning_rate": 4.119333006222887e-05,
      "loss": 0.8617,
      "step": 221290
    },
    {
      "epoch": 0.7756013275995612,
      "grad_norm": 3.0625,
      "learning_rate": 4.119268103356517e-05,
      "loss": 0.9564,
      "step": 221300
    },
    {
      "epoch": 0.7756363751064568,
      "grad_norm": 3.109375,
      "learning_rate": 4.1192032004901463e-05,
      "loss": 0.9125,
      "step": 221310
    },
    {
      "epoch": 0.7756714226133524,
      "grad_norm": 2.765625,
      "learning_rate": 4.1191382976237765e-05,
      "loss": 0.8979,
      "step": 221320
    },
    {
      "epoch": 0.775706470120248,
      "grad_norm": 2.890625,
      "learning_rate": 4.119073394757406e-05,
      "loss": 0.9029,
      "step": 221330
    },
    {
      "epoch": 0.7757415176271436,
      "grad_norm": 2.6875,
      "learning_rate": 4.119008491891036e-05,
      "loss": 0.8697,
      "step": 221340
    },
    {
      "epoch": 0.7757765651340391,
      "grad_norm": 2.734375,
      "learning_rate": 4.1189435890246655e-05,
      "loss": 0.8906,
      "step": 221350
    },
    {
      "epoch": 0.7758116126409348,
      "grad_norm": 2.96875,
      "learning_rate": 4.118878686158296e-05,
      "loss": 1.0022,
      "step": 221360
    },
    {
      "epoch": 0.7758466601478304,
      "grad_norm": 2.984375,
      "learning_rate": 4.118813783291925e-05,
      "loss": 0.8232,
      "step": 221370
    },
    {
      "epoch": 0.775881707654726,
      "grad_norm": 3.046875,
      "learning_rate": 4.118748880425555e-05,
      "loss": 0.8392,
      "step": 221380
    },
    {
      "epoch": 0.7759167551616216,
      "grad_norm": 2.859375,
      "learning_rate": 4.1186839775591854e-05,
      "loss": 0.8977,
      "step": 221390
    },
    {
      "epoch": 0.7759518026685172,
      "grad_norm": 2.953125,
      "learning_rate": 4.118619074692815e-05,
      "loss": 1.0189,
      "step": 221400
    },
    {
      "epoch": 0.7759868501754128,
      "grad_norm": 2.5625,
      "learning_rate": 4.118554171826445e-05,
      "loss": 0.8359,
      "step": 221410
    },
    {
      "epoch": 0.7760218976823083,
      "grad_norm": 3.390625,
      "learning_rate": 4.1184892689600745e-05,
      "loss": 0.8685,
      "step": 221420
    },
    {
      "epoch": 0.776056945189204,
      "grad_norm": 2.84375,
      "learning_rate": 4.1184243660937046e-05,
      "loss": 0.77,
      "step": 221430
    },
    {
      "epoch": 0.7760919926960995,
      "grad_norm": 3.1875,
      "learning_rate": 4.118359463227334e-05,
      "loss": 0.8895,
      "step": 221440
    },
    {
      "epoch": 0.7761270402029952,
      "grad_norm": 3.390625,
      "learning_rate": 4.118294560360964e-05,
      "loss": 0.985,
      "step": 221450
    },
    {
      "epoch": 0.7761620877098907,
      "grad_norm": 2.90625,
      "learning_rate": 4.118229657494594e-05,
      "loss": 0.9286,
      "step": 221460
    },
    {
      "epoch": 0.7761971352167863,
      "grad_norm": 2.859375,
      "learning_rate": 4.118164754628224e-05,
      "loss": 0.845,
      "step": 221470
    },
    {
      "epoch": 0.776232182723682,
      "grad_norm": 2.859375,
      "learning_rate": 4.118099851761853e-05,
      "loss": 0.8233,
      "step": 221480
    },
    {
      "epoch": 0.7762672302305775,
      "grad_norm": 3.203125,
      "learning_rate": 4.1180349488954834e-05,
      "loss": 0.9896,
      "step": 221490
    },
    {
      "epoch": 0.7763022777374732,
      "grad_norm": 3.03125,
      "learning_rate": 4.117970046029113e-05,
      "loss": 0.8863,
      "step": 221500
    },
    {
      "epoch": 0.7763373252443687,
      "grad_norm": 3.125,
      "learning_rate": 4.117905143162743e-05,
      "loss": 0.8932,
      "step": 221510
    },
    {
      "epoch": 0.7763723727512644,
      "grad_norm": 2.96875,
      "learning_rate": 4.117840240296373e-05,
      "loss": 0.911,
      "step": 221520
    },
    {
      "epoch": 0.7764074202581599,
      "grad_norm": 2.765625,
      "learning_rate": 4.1177753374300026e-05,
      "loss": 0.8806,
      "step": 221530
    },
    {
      "epoch": 0.7764424677650555,
      "grad_norm": 3.21875,
      "learning_rate": 4.117710434563633e-05,
      "loss": 0.8962,
      "step": 221540
    },
    {
      "epoch": 0.7764775152719511,
      "grad_norm": 2.578125,
      "learning_rate": 4.117645531697262e-05,
      "loss": 0.7753,
      "step": 221550
    },
    {
      "epoch": 0.7765125627788467,
      "grad_norm": 3.0,
      "learning_rate": 4.1175806288308924e-05,
      "loss": 0.9689,
      "step": 221560
    },
    {
      "epoch": 0.7765476102857424,
      "grad_norm": 2.609375,
      "learning_rate": 4.117515725964522e-05,
      "loss": 0.8236,
      "step": 221570
    },
    {
      "epoch": 0.7765826577926379,
      "grad_norm": 2.65625,
      "learning_rate": 4.117450823098152e-05,
      "loss": 0.9001,
      "step": 221580
    },
    {
      "epoch": 0.7766177052995336,
      "grad_norm": 3.1875,
      "learning_rate": 4.117385920231781e-05,
      "loss": 0.934,
      "step": 221590
    },
    {
      "epoch": 0.7766527528064291,
      "grad_norm": 2.625,
      "learning_rate": 4.117321017365411e-05,
      "loss": 0.799,
      "step": 221600
    },
    {
      "epoch": 0.7766878003133247,
      "grad_norm": 2.671875,
      "learning_rate": 4.117256114499041e-05,
      "loss": 0.8958,
      "step": 221610
    },
    {
      "epoch": 0.7767228478202203,
      "grad_norm": 2.765625,
      "learning_rate": 4.1171912116326705e-05,
      "loss": 0.9676,
      "step": 221620
    },
    {
      "epoch": 0.7767578953271159,
      "grad_norm": 3.109375,
      "learning_rate": 4.1171263087663006e-05,
      "loss": 0.8382,
      "step": 221630
    },
    {
      "epoch": 0.7767929428340115,
      "grad_norm": 2.78125,
      "learning_rate": 4.11706140589993e-05,
      "loss": 0.7927,
      "step": 221640
    },
    {
      "epoch": 0.7768279903409071,
      "grad_norm": 3.390625,
      "learning_rate": 4.11699650303356e-05,
      "loss": 0.9135,
      "step": 221650
    },
    {
      "epoch": 0.7768630378478028,
      "grad_norm": 3.328125,
      "learning_rate": 4.11693160016719e-05,
      "loss": 0.8533,
      "step": 221660
    },
    {
      "epoch": 0.7768980853546983,
      "grad_norm": 2.90625,
      "learning_rate": 4.11686669730082e-05,
      "loss": 0.9194,
      "step": 221670
    },
    {
      "epoch": 0.7769331328615939,
      "grad_norm": 2.796875,
      "learning_rate": 4.116801794434449e-05,
      "loss": 0.9831,
      "step": 221680
    },
    {
      "epoch": 0.7769681803684895,
      "grad_norm": 2.53125,
      "learning_rate": 4.1167368915680794e-05,
      "loss": 0.9104,
      "step": 221690
    },
    {
      "epoch": 0.7770032278753851,
      "grad_norm": 2.6875,
      "learning_rate": 4.116671988701709e-05,
      "loss": 0.8464,
      "step": 221700
    },
    {
      "epoch": 0.7770382753822807,
      "grad_norm": 2.90625,
      "learning_rate": 4.116607085835339e-05,
      "loss": 0.8525,
      "step": 221710
    },
    {
      "epoch": 0.7770733228891763,
      "grad_norm": 3.46875,
      "learning_rate": 4.1165421829689685e-05,
      "loss": 0.916,
      "step": 221720
    },
    {
      "epoch": 0.7771083703960718,
      "grad_norm": 3.046875,
      "learning_rate": 4.1164772801025986e-05,
      "loss": 0.8941,
      "step": 221730
    },
    {
      "epoch": 0.7771434179029675,
      "grad_norm": 2.875,
      "learning_rate": 4.116412377236228e-05,
      "loss": 0.9251,
      "step": 221740
    },
    {
      "epoch": 0.777178465409863,
      "grad_norm": 3.1875,
      "learning_rate": 4.116347474369858e-05,
      "loss": 0.8553,
      "step": 221750
    },
    {
      "epoch": 0.7772135129167587,
      "grad_norm": 3.078125,
      "learning_rate": 4.1162825715034884e-05,
      "loss": 0.7801,
      "step": 221760
    },
    {
      "epoch": 0.7772485604236543,
      "grad_norm": 2.921875,
      "learning_rate": 4.116217668637118e-05,
      "loss": 0.8748,
      "step": 221770
    },
    {
      "epoch": 0.7772836079305498,
      "grad_norm": 2.921875,
      "learning_rate": 4.116152765770748e-05,
      "loss": 0.8833,
      "step": 221780
    },
    {
      "epoch": 0.7773186554374455,
      "grad_norm": 3.140625,
      "learning_rate": 4.1160878629043774e-05,
      "loss": 0.8446,
      "step": 221790
    },
    {
      "epoch": 0.777353702944341,
      "grad_norm": 2.703125,
      "learning_rate": 4.1160229600380076e-05,
      "loss": 0.8331,
      "step": 221800
    },
    {
      "epoch": 0.7773887504512367,
      "grad_norm": 2.515625,
      "learning_rate": 4.115958057171637e-05,
      "loss": 0.8875,
      "step": 221810
    },
    {
      "epoch": 0.7774237979581322,
      "grad_norm": 2.890625,
      "learning_rate": 4.115893154305267e-05,
      "loss": 0.9381,
      "step": 221820
    },
    {
      "epoch": 0.7774588454650279,
      "grad_norm": 3.046875,
      "learning_rate": 4.1158282514388966e-05,
      "loss": 1.0718,
      "step": 221830
    },
    {
      "epoch": 0.7774938929719234,
      "grad_norm": 3.015625,
      "learning_rate": 4.115763348572527e-05,
      "loss": 0.9876,
      "step": 221840
    },
    {
      "epoch": 0.777528940478819,
      "grad_norm": 2.703125,
      "learning_rate": 4.115698445706156e-05,
      "loss": 0.8969,
      "step": 221850
    },
    {
      "epoch": 0.7775639879857147,
      "grad_norm": 2.703125,
      "learning_rate": 4.1156335428397864e-05,
      "loss": 0.8416,
      "step": 221860
    },
    {
      "epoch": 0.7775990354926102,
      "grad_norm": 2.96875,
      "learning_rate": 4.115568639973416e-05,
      "loss": 0.8954,
      "step": 221870
    },
    {
      "epoch": 0.7776340829995059,
      "grad_norm": 3.421875,
      "learning_rate": 4.115503737107046e-05,
      "loss": 0.9583,
      "step": 221880
    },
    {
      "epoch": 0.7776691305064014,
      "grad_norm": 2.734375,
      "learning_rate": 4.115438834240676e-05,
      "loss": 0.9069,
      "step": 221890
    },
    {
      "epoch": 0.777704178013297,
      "grad_norm": 3.046875,
      "learning_rate": 4.1153739313743056e-05,
      "loss": 0.8726,
      "step": 221900
    },
    {
      "epoch": 0.7777392255201926,
      "grad_norm": 3.03125,
      "learning_rate": 4.115309028507936e-05,
      "loss": 0.933,
      "step": 221910
    },
    {
      "epoch": 0.7777742730270882,
      "grad_norm": 3.984375,
      "learning_rate": 4.115244125641565e-05,
      "loss": 0.8947,
      "step": 221920
    },
    {
      "epoch": 0.7778093205339838,
      "grad_norm": 2.96875,
      "learning_rate": 4.115179222775195e-05,
      "loss": 0.9682,
      "step": 221930
    },
    {
      "epoch": 0.7778443680408794,
      "grad_norm": 3.40625,
      "learning_rate": 4.115114319908825e-05,
      "loss": 0.8378,
      "step": 221940
    },
    {
      "epoch": 0.777879415547775,
      "grad_norm": 3.078125,
      "learning_rate": 4.115049417042455e-05,
      "loss": 0.8733,
      "step": 221950
    },
    {
      "epoch": 0.7779144630546706,
      "grad_norm": 2.78125,
      "learning_rate": 4.114984514176084e-05,
      "loss": 0.8242,
      "step": 221960
    },
    {
      "epoch": 0.7779495105615662,
      "grad_norm": 2.78125,
      "learning_rate": 4.114919611309714e-05,
      "loss": 0.8972,
      "step": 221970
    },
    {
      "epoch": 0.7779845580684618,
      "grad_norm": 2.9375,
      "learning_rate": 4.114854708443344e-05,
      "loss": 0.9867,
      "step": 221980
    },
    {
      "epoch": 0.7780196055753574,
      "grad_norm": 3.125,
      "learning_rate": 4.1147898055769734e-05,
      "loss": 0.9171,
      "step": 221990
    },
    {
      "epoch": 0.778054653082253,
      "grad_norm": 2.5,
      "learning_rate": 4.1147249027106036e-05,
      "loss": 0.9523,
      "step": 222000
    },
    {
      "epoch": 0.7780897005891486,
      "grad_norm": 2.875,
      "learning_rate": 4.114659999844233e-05,
      "loss": 0.8864,
      "step": 222010
    },
    {
      "epoch": 0.7781247480960441,
      "grad_norm": 3.15625,
      "learning_rate": 4.114595096977863e-05,
      "loss": 0.9589,
      "step": 222020
    },
    {
      "epoch": 0.7781597956029398,
      "grad_norm": 2.875,
      "learning_rate": 4.1145301941114926e-05,
      "loss": 0.9802,
      "step": 222030
    },
    {
      "epoch": 0.7781948431098353,
      "grad_norm": 2.796875,
      "learning_rate": 4.114465291245123e-05,
      "loss": 1.0116,
      "step": 222040
    },
    {
      "epoch": 0.778229890616731,
      "grad_norm": 2.59375,
      "learning_rate": 4.114400388378752e-05,
      "loss": 0.8785,
      "step": 222050
    },
    {
      "epoch": 0.7782649381236266,
      "grad_norm": 3.140625,
      "learning_rate": 4.1143354855123824e-05,
      "loss": 0.861,
      "step": 222060
    },
    {
      "epoch": 0.7782999856305222,
      "grad_norm": 3.046875,
      "learning_rate": 4.114270582646012e-05,
      "loss": 0.8565,
      "step": 222070
    },
    {
      "epoch": 0.7783350331374178,
      "grad_norm": 2.828125,
      "learning_rate": 4.114205679779642e-05,
      "loss": 0.9046,
      "step": 222080
    },
    {
      "epoch": 0.7783700806443133,
      "grad_norm": 3.046875,
      "learning_rate": 4.1141407769132714e-05,
      "loss": 0.9199,
      "step": 222090
    },
    {
      "epoch": 0.778405128151209,
      "grad_norm": 3.28125,
      "learning_rate": 4.1140758740469016e-05,
      "loss": 0.9188,
      "step": 222100
    },
    {
      "epoch": 0.7784401756581045,
      "grad_norm": 3.0625,
      "learning_rate": 4.114010971180532e-05,
      "loss": 0.8671,
      "step": 222110
    },
    {
      "epoch": 0.7784752231650002,
      "grad_norm": 3.078125,
      "learning_rate": 4.113946068314161e-05,
      "loss": 0.9738,
      "step": 222120
    },
    {
      "epoch": 0.7785102706718957,
      "grad_norm": 2.71875,
      "learning_rate": 4.113881165447791e-05,
      "loss": 0.9512,
      "step": 222130
    },
    {
      "epoch": 0.7785453181787914,
      "grad_norm": 2.828125,
      "learning_rate": 4.113816262581421e-05,
      "loss": 0.9086,
      "step": 222140
    },
    {
      "epoch": 0.778580365685687,
      "grad_norm": 2.53125,
      "learning_rate": 4.113751359715051e-05,
      "loss": 0.9376,
      "step": 222150
    },
    {
      "epoch": 0.7786154131925825,
      "grad_norm": 2.78125,
      "learning_rate": 4.1136864568486804e-05,
      "loss": 0.8927,
      "step": 222160
    },
    {
      "epoch": 0.7786504606994782,
      "grad_norm": 3.109375,
      "learning_rate": 4.1136215539823105e-05,
      "loss": 0.892,
      "step": 222170
    },
    {
      "epoch": 0.7786855082063737,
      "grad_norm": 2.734375,
      "learning_rate": 4.11355665111594e-05,
      "loss": 0.8893,
      "step": 222180
    },
    {
      "epoch": 0.7787205557132694,
      "grad_norm": 3.171875,
      "learning_rate": 4.11349174824957e-05,
      "loss": 0.8618,
      "step": 222190
    },
    {
      "epoch": 0.7787556032201649,
      "grad_norm": 2.984375,
      "learning_rate": 4.1134268453831996e-05,
      "loss": 0.8649,
      "step": 222200
    },
    {
      "epoch": 0.7787906507270606,
      "grad_norm": 3.234375,
      "learning_rate": 4.11336194251683e-05,
      "loss": 0.939,
      "step": 222210
    },
    {
      "epoch": 0.7788256982339561,
      "grad_norm": 3.171875,
      "learning_rate": 4.113297039650459e-05,
      "loss": 1.0381,
      "step": 222220
    },
    {
      "epoch": 0.7788607457408517,
      "grad_norm": 3.296875,
      "learning_rate": 4.113232136784089e-05,
      "loss": 0.8841,
      "step": 222230
    },
    {
      "epoch": 0.7788957932477473,
      "grad_norm": 3.515625,
      "learning_rate": 4.113167233917719e-05,
      "loss": 0.9586,
      "step": 222240
    },
    {
      "epoch": 0.7789308407546429,
      "grad_norm": 3.140625,
      "learning_rate": 4.113102331051349e-05,
      "loss": 1.0007,
      "step": 222250
    },
    {
      "epoch": 0.7789658882615386,
      "grad_norm": 3.65625,
      "learning_rate": 4.113037428184979e-05,
      "loss": 0.8877,
      "step": 222260
    },
    {
      "epoch": 0.7790009357684341,
      "grad_norm": 2.796875,
      "learning_rate": 4.1129725253186085e-05,
      "loss": 0.9178,
      "step": 222270
    },
    {
      "epoch": 0.7790359832753297,
      "grad_norm": 3.3125,
      "learning_rate": 4.1129076224522386e-05,
      "loss": 0.8732,
      "step": 222280
    },
    {
      "epoch": 0.7790710307822253,
      "grad_norm": 3.140625,
      "learning_rate": 4.112842719585868e-05,
      "loss": 0.8889,
      "step": 222290
    },
    {
      "epoch": 0.7791060782891209,
      "grad_norm": 3.078125,
      "learning_rate": 4.112777816719498e-05,
      "loss": 0.9277,
      "step": 222300
    },
    {
      "epoch": 0.7791411257960165,
      "grad_norm": 2.90625,
      "learning_rate": 4.112712913853128e-05,
      "loss": 0.8746,
      "step": 222310
    },
    {
      "epoch": 0.7791761733029121,
      "grad_norm": 2.703125,
      "learning_rate": 4.112648010986758e-05,
      "loss": 0.8151,
      "step": 222320
    },
    {
      "epoch": 0.7792112208098076,
      "grad_norm": 2.578125,
      "learning_rate": 4.112583108120387e-05,
      "loss": 0.8867,
      "step": 222330
    },
    {
      "epoch": 0.7792462683167033,
      "grad_norm": 2.9375,
      "learning_rate": 4.112518205254017e-05,
      "loss": 0.962,
      "step": 222340
    },
    {
      "epoch": 0.7792813158235989,
      "grad_norm": 3.1875,
      "learning_rate": 4.112453302387647e-05,
      "loss": 0.8751,
      "step": 222350
    },
    {
      "epoch": 0.7793163633304945,
      "grad_norm": 2.875,
      "learning_rate": 4.1123883995212764e-05,
      "loss": 0.8409,
      "step": 222360
    },
    {
      "epoch": 0.7793514108373901,
      "grad_norm": 2.828125,
      "learning_rate": 4.1123234966549065e-05,
      "loss": 0.8142,
      "step": 222370
    },
    {
      "epoch": 0.7793864583442857,
      "grad_norm": 2.671875,
      "learning_rate": 4.112258593788536e-05,
      "loss": 0.8964,
      "step": 222380
    },
    {
      "epoch": 0.7794215058511813,
      "grad_norm": 3.078125,
      "learning_rate": 4.112193690922166e-05,
      "loss": 0.9512,
      "step": 222390
    },
    {
      "epoch": 0.7794565533580768,
      "grad_norm": 3.265625,
      "learning_rate": 4.1121287880557956e-05,
      "loss": 0.8622,
      "step": 222400
    },
    {
      "epoch": 0.7794916008649725,
      "grad_norm": 2.484375,
      "learning_rate": 4.112063885189426e-05,
      "loss": 0.8392,
      "step": 222410
    },
    {
      "epoch": 0.779526648371868,
      "grad_norm": 3.0625,
      "learning_rate": 4.111998982323055e-05,
      "loss": 0.9268,
      "step": 222420
    },
    {
      "epoch": 0.7795616958787637,
      "grad_norm": 2.796875,
      "learning_rate": 4.111934079456685e-05,
      "loss": 0.8467,
      "step": 222430
    },
    {
      "epoch": 0.7795967433856592,
      "grad_norm": 3.0,
      "learning_rate": 4.111869176590315e-05,
      "loss": 0.8281,
      "step": 222440
    },
    {
      "epoch": 0.7796317908925549,
      "grad_norm": 3.375,
      "learning_rate": 4.111804273723945e-05,
      "loss": 0.9142,
      "step": 222450
    },
    {
      "epoch": 0.7796668383994505,
      "grad_norm": 3.0,
      "learning_rate": 4.1117393708575744e-05,
      "loss": 0.9215,
      "step": 222460
    },
    {
      "epoch": 0.779701885906346,
      "grad_norm": 2.828125,
      "learning_rate": 4.1116744679912045e-05,
      "loss": 0.9693,
      "step": 222470
    },
    {
      "epoch": 0.7797369334132417,
      "grad_norm": 3.6875,
      "learning_rate": 4.1116095651248346e-05,
      "loss": 0.9241,
      "step": 222480
    },
    {
      "epoch": 0.7797719809201372,
      "grad_norm": 3.375,
      "learning_rate": 4.111544662258464e-05,
      "loss": 0.8837,
      "step": 222490
    },
    {
      "epoch": 0.7798070284270329,
      "grad_norm": 3.515625,
      "learning_rate": 4.111479759392094e-05,
      "loss": 0.977,
      "step": 222500
    },
    {
      "epoch": 0.7798420759339284,
      "grad_norm": 2.84375,
      "learning_rate": 4.111414856525724e-05,
      "loss": 0.879,
      "step": 222510
    },
    {
      "epoch": 0.779877123440824,
      "grad_norm": 2.9375,
      "learning_rate": 4.111349953659354e-05,
      "loss": 0.9411,
      "step": 222520
    },
    {
      "epoch": 0.7799121709477196,
      "grad_norm": 3.0,
      "learning_rate": 4.111285050792983e-05,
      "loss": 0.9072,
      "step": 222530
    },
    {
      "epoch": 0.7799472184546152,
      "grad_norm": 3.34375,
      "learning_rate": 4.1112201479266134e-05,
      "loss": 0.8422,
      "step": 222540
    },
    {
      "epoch": 0.7799822659615109,
      "grad_norm": 3.328125,
      "learning_rate": 4.111155245060243e-05,
      "loss": 0.9132,
      "step": 222550
    },
    {
      "epoch": 0.7800173134684064,
      "grad_norm": 3.390625,
      "learning_rate": 4.111090342193873e-05,
      "loss": 0.8664,
      "step": 222560
    },
    {
      "epoch": 0.7800523609753021,
      "grad_norm": 3.453125,
      "learning_rate": 4.1110254393275025e-05,
      "loss": 0.8705,
      "step": 222570
    },
    {
      "epoch": 0.7800874084821976,
      "grad_norm": 3.296875,
      "learning_rate": 4.1109605364611326e-05,
      "loss": 0.9114,
      "step": 222580
    },
    {
      "epoch": 0.7801224559890932,
      "grad_norm": 2.984375,
      "learning_rate": 4.110895633594762e-05,
      "loss": 0.8883,
      "step": 222590
    },
    {
      "epoch": 0.7801575034959888,
      "grad_norm": 2.90625,
      "learning_rate": 4.110830730728392e-05,
      "loss": 0.8411,
      "step": 222600
    },
    {
      "epoch": 0.7801925510028844,
      "grad_norm": 2.84375,
      "learning_rate": 4.110765827862022e-05,
      "loss": 0.849,
      "step": 222610
    },
    {
      "epoch": 0.78022759850978,
      "grad_norm": 2.953125,
      "learning_rate": 4.110700924995652e-05,
      "loss": 0.8724,
      "step": 222620
    },
    {
      "epoch": 0.7802626460166756,
      "grad_norm": 3.828125,
      "learning_rate": 4.110636022129282e-05,
      "loss": 0.8313,
      "step": 222630
    },
    {
      "epoch": 0.7802976935235711,
      "grad_norm": 2.90625,
      "learning_rate": 4.1105711192629114e-05,
      "loss": 0.8526,
      "step": 222640
    },
    {
      "epoch": 0.7803327410304668,
      "grad_norm": 3.125,
      "learning_rate": 4.1105062163965416e-05,
      "loss": 0.8913,
      "step": 222650
    },
    {
      "epoch": 0.7803677885373624,
      "grad_norm": 3.109375,
      "learning_rate": 4.110441313530171e-05,
      "loss": 0.8666,
      "step": 222660
    },
    {
      "epoch": 0.780402836044258,
      "grad_norm": 3.125,
      "learning_rate": 4.110376410663801e-05,
      "loss": 0.8469,
      "step": 222670
    },
    {
      "epoch": 0.7804378835511536,
      "grad_norm": 3.03125,
      "learning_rate": 4.1103115077974306e-05,
      "loss": 0.8902,
      "step": 222680
    },
    {
      "epoch": 0.7804729310580492,
      "grad_norm": 3.078125,
      "learning_rate": 4.110246604931061e-05,
      "loss": 0.956,
      "step": 222690
    },
    {
      "epoch": 0.7805079785649448,
      "grad_norm": 2.484375,
      "learning_rate": 4.11018170206469e-05,
      "loss": 0.9293,
      "step": 222700
    },
    {
      "epoch": 0.7805430260718403,
      "grad_norm": 3.15625,
      "learning_rate": 4.1101167991983204e-05,
      "loss": 0.9016,
      "step": 222710
    },
    {
      "epoch": 0.780578073578736,
      "grad_norm": 2.78125,
      "learning_rate": 4.11005189633195e-05,
      "loss": 0.9502,
      "step": 222720
    },
    {
      "epoch": 0.7806131210856315,
      "grad_norm": 2.75,
      "learning_rate": 4.109986993465579e-05,
      "loss": 0.9244,
      "step": 222730
    },
    {
      "epoch": 0.7806481685925272,
      "grad_norm": 2.8125,
      "learning_rate": 4.1099220905992094e-05,
      "loss": 0.8377,
      "step": 222740
    },
    {
      "epoch": 0.7806832160994228,
      "grad_norm": 2.828125,
      "learning_rate": 4.109857187732839e-05,
      "loss": 0.8914,
      "step": 222750
    },
    {
      "epoch": 0.7807182636063184,
      "grad_norm": 3.84375,
      "learning_rate": 4.109792284866469e-05,
      "loss": 0.8986,
      "step": 222760
    },
    {
      "epoch": 0.780753311113214,
      "grad_norm": 2.421875,
      "learning_rate": 4.1097273820000985e-05,
      "loss": 0.8296,
      "step": 222770
    },
    {
      "epoch": 0.7807883586201095,
      "grad_norm": 3.390625,
      "learning_rate": 4.1096624791337286e-05,
      "loss": 0.8822,
      "step": 222780
    },
    {
      "epoch": 0.7808234061270052,
      "grad_norm": 2.953125,
      "learning_rate": 4.109597576267358e-05,
      "loss": 0.888,
      "step": 222790
    },
    {
      "epoch": 0.7808584536339007,
      "grad_norm": 2.875,
      "learning_rate": 4.109532673400988e-05,
      "loss": 0.8865,
      "step": 222800
    },
    {
      "epoch": 0.7808935011407964,
      "grad_norm": 2.5625,
      "learning_rate": 4.109467770534618e-05,
      "loss": 0.855,
      "step": 222810
    },
    {
      "epoch": 0.7809285486476919,
      "grad_norm": 3.03125,
      "learning_rate": 4.109402867668248e-05,
      "loss": 1.0104,
      "step": 222820
    },
    {
      "epoch": 0.7809635961545875,
      "grad_norm": 3.25,
      "learning_rate": 4.109337964801877e-05,
      "loss": 0.8535,
      "step": 222830
    },
    {
      "epoch": 0.7809986436614832,
      "grad_norm": 2.84375,
      "learning_rate": 4.1092730619355074e-05,
      "loss": 0.9119,
      "step": 222840
    },
    {
      "epoch": 0.7810336911683787,
      "grad_norm": 3.25,
      "learning_rate": 4.1092081590691376e-05,
      "loss": 0.9722,
      "step": 222850
    },
    {
      "epoch": 0.7810687386752744,
      "grad_norm": 3.109375,
      "learning_rate": 4.109143256202767e-05,
      "loss": 0.9285,
      "step": 222860
    },
    {
      "epoch": 0.7811037861821699,
      "grad_norm": 2.796875,
      "learning_rate": 4.109078353336397e-05,
      "loss": 0.9768,
      "step": 222870
    },
    {
      "epoch": 0.7811388336890656,
      "grad_norm": 2.859375,
      "learning_rate": 4.1090134504700266e-05,
      "loss": 0.8057,
      "step": 222880
    },
    {
      "epoch": 0.7811738811959611,
      "grad_norm": 2.75,
      "learning_rate": 4.108948547603657e-05,
      "loss": 0.8966,
      "step": 222890
    },
    {
      "epoch": 0.7812089287028567,
      "grad_norm": 3.15625,
      "learning_rate": 4.108883644737286e-05,
      "loss": 0.8591,
      "step": 222900
    },
    {
      "epoch": 0.7812439762097523,
      "grad_norm": 3.375,
      "learning_rate": 4.1088187418709164e-05,
      "loss": 0.8941,
      "step": 222910
    },
    {
      "epoch": 0.7812790237166479,
      "grad_norm": 3.0,
      "learning_rate": 4.108753839004546e-05,
      "loss": 0.8895,
      "step": 222920
    },
    {
      "epoch": 0.7813140712235435,
      "grad_norm": 2.78125,
      "learning_rate": 4.108688936138176e-05,
      "loss": 0.8863,
      "step": 222930
    },
    {
      "epoch": 0.7813491187304391,
      "grad_norm": 2.90625,
      "learning_rate": 4.1086240332718054e-05,
      "loss": 0.8085,
      "step": 222940
    },
    {
      "epoch": 0.7813841662373348,
      "grad_norm": 3.21875,
      "learning_rate": 4.1085591304054356e-05,
      "loss": 0.8957,
      "step": 222950
    },
    {
      "epoch": 0.7814192137442303,
      "grad_norm": 3.015625,
      "learning_rate": 4.108494227539065e-05,
      "loss": 0.8909,
      "step": 222960
    },
    {
      "epoch": 0.7814542612511259,
      "grad_norm": 2.765625,
      "learning_rate": 4.108429324672695e-05,
      "loss": 0.8909,
      "step": 222970
    },
    {
      "epoch": 0.7814893087580215,
      "grad_norm": 2.40625,
      "learning_rate": 4.1083644218063246e-05,
      "loss": 0.9367,
      "step": 222980
    },
    {
      "epoch": 0.7815243562649171,
      "grad_norm": 2.84375,
      "learning_rate": 4.108299518939955e-05,
      "loss": 0.9317,
      "step": 222990
    },
    {
      "epoch": 0.7815594037718127,
      "grad_norm": 3.109375,
      "learning_rate": 4.108234616073585e-05,
      "loss": 0.7909,
      "step": 223000
    },
    {
      "epoch": 0.7815944512787083,
      "grad_norm": 3.484375,
      "learning_rate": 4.1081697132072144e-05,
      "loss": 0.8785,
      "step": 223010
    },
    {
      "epoch": 0.7816294987856038,
      "grad_norm": 3.25,
      "learning_rate": 4.1081048103408445e-05,
      "loss": 0.8708,
      "step": 223020
    },
    {
      "epoch": 0.7816645462924995,
      "grad_norm": 2.828125,
      "learning_rate": 4.108039907474474e-05,
      "loss": 0.8878,
      "step": 223030
    },
    {
      "epoch": 0.7816995937993951,
      "grad_norm": 3.15625,
      "learning_rate": 4.107975004608104e-05,
      "loss": 0.8911,
      "step": 223040
    },
    {
      "epoch": 0.7817346413062907,
      "grad_norm": 3.390625,
      "learning_rate": 4.1079101017417336e-05,
      "loss": 0.8607,
      "step": 223050
    },
    {
      "epoch": 0.7817696888131863,
      "grad_norm": 2.890625,
      "learning_rate": 4.107845198875364e-05,
      "loss": 1.0639,
      "step": 223060
    },
    {
      "epoch": 0.7818047363200819,
      "grad_norm": 3.078125,
      "learning_rate": 4.107780296008993e-05,
      "loss": 0.9397,
      "step": 223070
    },
    {
      "epoch": 0.7818397838269775,
      "grad_norm": 2.5625,
      "learning_rate": 4.107715393142623e-05,
      "loss": 0.9823,
      "step": 223080
    },
    {
      "epoch": 0.781874831333873,
      "grad_norm": 2.921875,
      "learning_rate": 4.107650490276253e-05,
      "loss": 0.9258,
      "step": 223090
    },
    {
      "epoch": 0.7819098788407687,
      "grad_norm": 3.203125,
      "learning_rate": 4.107585587409882e-05,
      "loss": 0.9064,
      "step": 223100
    },
    {
      "epoch": 0.7819449263476642,
      "grad_norm": 2.875,
      "learning_rate": 4.1075206845435124e-05,
      "loss": 0.8791,
      "step": 223110
    },
    {
      "epoch": 0.7819799738545599,
      "grad_norm": 2.3125,
      "learning_rate": 4.107455781677142e-05,
      "loss": 0.8383,
      "step": 223120
    },
    {
      "epoch": 0.7820150213614554,
      "grad_norm": 2.96875,
      "learning_rate": 4.107390878810772e-05,
      "loss": 0.9307,
      "step": 223130
    },
    {
      "epoch": 0.782050068868351,
      "grad_norm": 2.734375,
      "learning_rate": 4.1073259759444014e-05,
      "loss": 0.8841,
      "step": 223140
    },
    {
      "epoch": 0.7820851163752467,
      "grad_norm": 2.703125,
      "learning_rate": 4.1072610730780316e-05,
      "loss": 0.8514,
      "step": 223150
    },
    {
      "epoch": 0.7821201638821422,
      "grad_norm": 2.71875,
      "learning_rate": 4.107196170211661e-05,
      "loss": 0.9463,
      "step": 223160
    },
    {
      "epoch": 0.7821552113890379,
      "grad_norm": 2.796875,
      "learning_rate": 4.107131267345291e-05,
      "loss": 0.7569,
      "step": 223170
    },
    {
      "epoch": 0.7821902588959334,
      "grad_norm": 3.03125,
      "learning_rate": 4.1070663644789206e-05,
      "loss": 0.9218,
      "step": 223180
    },
    {
      "epoch": 0.7822253064028291,
      "grad_norm": 3.015625,
      "learning_rate": 4.107001461612551e-05,
      "loss": 0.9272,
      "step": 223190
    },
    {
      "epoch": 0.7822603539097246,
      "grad_norm": 3.0,
      "learning_rate": 4.10693655874618e-05,
      "loss": 0.941,
      "step": 223200
    },
    {
      "epoch": 0.7822954014166202,
      "grad_norm": 2.984375,
      "learning_rate": 4.1068716558798104e-05,
      "loss": 0.9169,
      "step": 223210
    },
    {
      "epoch": 0.7823304489235158,
      "grad_norm": 2.625,
      "learning_rate": 4.1068067530134405e-05,
      "loss": 0.9097,
      "step": 223220
    },
    {
      "epoch": 0.7823654964304114,
      "grad_norm": 2.984375,
      "learning_rate": 4.10674185014707e-05,
      "loss": 0.8815,
      "step": 223230
    },
    {
      "epoch": 0.7824005439373071,
      "grad_norm": 3.21875,
      "learning_rate": 4.1066769472807e-05,
      "loss": 0.9474,
      "step": 223240
    },
    {
      "epoch": 0.7824355914442026,
      "grad_norm": 2.765625,
      "learning_rate": 4.1066120444143296e-05,
      "loss": 0.924,
      "step": 223250
    },
    {
      "epoch": 0.7824706389510983,
      "grad_norm": 2.671875,
      "learning_rate": 4.10654714154796e-05,
      "loss": 0.8752,
      "step": 223260
    },
    {
      "epoch": 0.7825056864579938,
      "grad_norm": 3.0625,
      "learning_rate": 4.106482238681589e-05,
      "loss": 0.9216,
      "step": 223270
    },
    {
      "epoch": 0.7825407339648894,
      "grad_norm": 3.03125,
      "learning_rate": 4.106417335815219e-05,
      "loss": 0.9294,
      "step": 223280
    },
    {
      "epoch": 0.782575781471785,
      "grad_norm": 2.96875,
      "learning_rate": 4.106352432948849e-05,
      "loss": 0.8402,
      "step": 223290
    },
    {
      "epoch": 0.7826108289786806,
      "grad_norm": 3.046875,
      "learning_rate": 4.106287530082479e-05,
      "loss": 0.9144,
      "step": 223300
    },
    {
      "epoch": 0.7826458764855762,
      "grad_norm": 2.953125,
      "learning_rate": 4.1062226272161084e-05,
      "loss": 0.8915,
      "step": 223310
    },
    {
      "epoch": 0.7826809239924718,
      "grad_norm": 3.6875,
      "learning_rate": 4.1061577243497385e-05,
      "loss": 0.9405,
      "step": 223320
    },
    {
      "epoch": 0.7827159714993674,
      "grad_norm": 3.09375,
      "learning_rate": 4.106092821483368e-05,
      "loss": 0.9142,
      "step": 223330
    },
    {
      "epoch": 0.782751019006263,
      "grad_norm": 3.03125,
      "learning_rate": 4.106027918616998e-05,
      "loss": 0.9707,
      "step": 223340
    },
    {
      "epoch": 0.7827860665131586,
      "grad_norm": 2.34375,
      "learning_rate": 4.105963015750628e-05,
      "loss": 0.8051,
      "step": 223350
    },
    {
      "epoch": 0.7828211140200542,
      "grad_norm": 2.75,
      "learning_rate": 4.105898112884258e-05,
      "loss": 0.7556,
      "step": 223360
    },
    {
      "epoch": 0.7828561615269498,
      "grad_norm": 2.96875,
      "learning_rate": 4.105833210017888e-05,
      "loss": 0.8842,
      "step": 223370
    },
    {
      "epoch": 0.7828912090338453,
      "grad_norm": 2.90625,
      "learning_rate": 4.105768307151517e-05,
      "loss": 0.8988,
      "step": 223380
    },
    {
      "epoch": 0.782926256540741,
      "grad_norm": 3.140625,
      "learning_rate": 4.1057034042851474e-05,
      "loss": 0.9148,
      "step": 223390
    },
    {
      "epoch": 0.7829613040476365,
      "grad_norm": 3.015625,
      "learning_rate": 4.105638501418777e-05,
      "loss": 0.9062,
      "step": 223400
    },
    {
      "epoch": 0.7829963515545322,
      "grad_norm": 2.9375,
      "learning_rate": 4.105573598552407e-05,
      "loss": 0.958,
      "step": 223410
    },
    {
      "epoch": 0.7830313990614277,
      "grad_norm": 3.125,
      "learning_rate": 4.1055086956860365e-05,
      "loss": 0.9815,
      "step": 223420
    },
    {
      "epoch": 0.7830664465683234,
      "grad_norm": 2.8125,
      "learning_rate": 4.1054437928196666e-05,
      "loss": 0.8548,
      "step": 223430
    },
    {
      "epoch": 0.783101494075219,
      "grad_norm": 2.921875,
      "learning_rate": 4.105378889953296e-05,
      "loss": 0.9171,
      "step": 223440
    },
    {
      "epoch": 0.7831365415821145,
      "grad_norm": 3.15625,
      "learning_rate": 4.105313987086926e-05,
      "loss": 0.8686,
      "step": 223450
    },
    {
      "epoch": 0.7831715890890102,
      "grad_norm": 2.828125,
      "learning_rate": 4.105249084220556e-05,
      "loss": 0.8348,
      "step": 223460
    },
    {
      "epoch": 0.7832066365959057,
      "grad_norm": 2.765625,
      "learning_rate": 4.105184181354185e-05,
      "loss": 0.8451,
      "step": 223470
    },
    {
      "epoch": 0.7832416841028014,
      "grad_norm": 3.421875,
      "learning_rate": 4.105119278487815e-05,
      "loss": 0.9119,
      "step": 223480
    },
    {
      "epoch": 0.7832767316096969,
      "grad_norm": 3.015625,
      "learning_rate": 4.105054375621445e-05,
      "loss": 0.9163,
      "step": 223490
    },
    {
      "epoch": 0.7833117791165926,
      "grad_norm": 2.9375,
      "learning_rate": 4.104989472755075e-05,
      "loss": 0.8634,
      "step": 223500
    },
    {
      "epoch": 0.7833468266234881,
      "grad_norm": 2.921875,
      "learning_rate": 4.1049245698887044e-05,
      "loss": 0.978,
      "step": 223510
    },
    {
      "epoch": 0.7833818741303837,
      "grad_norm": 2.78125,
      "learning_rate": 4.1048596670223345e-05,
      "loss": 0.8579,
      "step": 223520
    },
    {
      "epoch": 0.7834169216372794,
      "grad_norm": 2.640625,
      "learning_rate": 4.104794764155964e-05,
      "loss": 0.9314,
      "step": 223530
    },
    {
      "epoch": 0.7834519691441749,
      "grad_norm": 2.921875,
      "learning_rate": 4.104729861289594e-05,
      "loss": 0.9629,
      "step": 223540
    },
    {
      "epoch": 0.7834870166510706,
      "grad_norm": 2.578125,
      "learning_rate": 4.1046649584232236e-05,
      "loss": 0.9228,
      "step": 223550
    },
    {
      "epoch": 0.7835220641579661,
      "grad_norm": 2.703125,
      "learning_rate": 4.104600055556854e-05,
      "loss": 0.9249,
      "step": 223560
    },
    {
      "epoch": 0.7835571116648617,
      "grad_norm": 3.046875,
      "learning_rate": 4.104535152690483e-05,
      "loss": 0.8618,
      "step": 223570
    },
    {
      "epoch": 0.7835921591717573,
      "grad_norm": 3.171875,
      "learning_rate": 4.104470249824113e-05,
      "loss": 0.905,
      "step": 223580
    },
    {
      "epoch": 0.7836272066786529,
      "grad_norm": 3.109375,
      "learning_rate": 4.1044053469577434e-05,
      "loss": 0.8105,
      "step": 223590
    },
    {
      "epoch": 0.7836622541855485,
      "grad_norm": 3.078125,
      "learning_rate": 4.104340444091373e-05,
      "loss": 0.8788,
      "step": 223600
    },
    {
      "epoch": 0.7836973016924441,
      "grad_norm": 3.109375,
      "learning_rate": 4.104275541225003e-05,
      "loss": 0.9543,
      "step": 223610
    },
    {
      "epoch": 0.7837323491993397,
      "grad_norm": 3.203125,
      "learning_rate": 4.1042106383586325e-05,
      "loss": 0.9427,
      "step": 223620
    },
    {
      "epoch": 0.7837673967062353,
      "grad_norm": 2.421875,
      "learning_rate": 4.1041457354922626e-05,
      "loss": 0.9089,
      "step": 223630
    },
    {
      "epoch": 0.7838024442131309,
      "grad_norm": 2.5,
      "learning_rate": 4.104080832625892e-05,
      "loss": 0.881,
      "step": 223640
    },
    {
      "epoch": 0.7838374917200265,
      "grad_norm": 3.140625,
      "learning_rate": 4.104015929759522e-05,
      "loss": 0.907,
      "step": 223650
    },
    {
      "epoch": 0.7838725392269221,
      "grad_norm": 2.953125,
      "learning_rate": 4.103951026893152e-05,
      "loss": 0.9763,
      "step": 223660
    },
    {
      "epoch": 0.7839075867338177,
      "grad_norm": 3.046875,
      "learning_rate": 4.103886124026782e-05,
      "loss": 0.8774,
      "step": 223670
    },
    {
      "epoch": 0.7839426342407133,
      "grad_norm": 3.109375,
      "learning_rate": 4.103821221160411e-05,
      "loss": 0.9697,
      "step": 223680
    },
    {
      "epoch": 0.7839776817476088,
      "grad_norm": 3.234375,
      "learning_rate": 4.1037563182940414e-05,
      "loss": 0.9705,
      "step": 223690
    },
    {
      "epoch": 0.7840127292545045,
      "grad_norm": 3.390625,
      "learning_rate": 4.103691415427671e-05,
      "loss": 0.8791,
      "step": 223700
    },
    {
      "epoch": 0.7840477767614,
      "grad_norm": 2.734375,
      "learning_rate": 4.103626512561301e-05,
      "loss": 0.852,
      "step": 223710
    },
    {
      "epoch": 0.7840828242682957,
      "grad_norm": 2.5,
      "learning_rate": 4.103561609694931e-05,
      "loss": 0.8427,
      "step": 223720
    },
    {
      "epoch": 0.7841178717751913,
      "grad_norm": 3.0625,
      "learning_rate": 4.1034967068285606e-05,
      "loss": 0.921,
      "step": 223730
    },
    {
      "epoch": 0.7841529192820869,
      "grad_norm": 3.0625,
      "learning_rate": 4.103431803962191e-05,
      "loss": 0.9351,
      "step": 223740
    },
    {
      "epoch": 0.7841879667889825,
      "grad_norm": 3.28125,
      "learning_rate": 4.10336690109582e-05,
      "loss": 1.0066,
      "step": 223750
    },
    {
      "epoch": 0.784223014295878,
      "grad_norm": 2.78125,
      "learning_rate": 4.1033019982294504e-05,
      "loss": 0.824,
      "step": 223760
    },
    {
      "epoch": 0.7842580618027737,
      "grad_norm": 3.0,
      "learning_rate": 4.10323709536308e-05,
      "loss": 0.8713,
      "step": 223770
    },
    {
      "epoch": 0.7842931093096692,
      "grad_norm": 3.0,
      "learning_rate": 4.10317219249671e-05,
      "loss": 0.8689,
      "step": 223780
    },
    {
      "epoch": 0.7843281568165649,
      "grad_norm": 2.796875,
      "learning_rate": 4.1031072896303394e-05,
      "loss": 0.8339,
      "step": 223790
    },
    {
      "epoch": 0.7843632043234604,
      "grad_norm": 3.109375,
      "learning_rate": 4.1030423867639696e-05,
      "loss": 0.8875,
      "step": 223800
    },
    {
      "epoch": 0.784398251830356,
      "grad_norm": 2.625,
      "learning_rate": 4.102977483897599e-05,
      "loss": 0.7922,
      "step": 223810
    },
    {
      "epoch": 0.7844332993372517,
      "grad_norm": 2.953125,
      "learning_rate": 4.102912581031229e-05,
      "loss": 0.8461,
      "step": 223820
    },
    {
      "epoch": 0.7844683468441472,
      "grad_norm": 2.59375,
      "learning_rate": 4.1028476781648586e-05,
      "loss": 0.8858,
      "step": 223830
    },
    {
      "epoch": 0.7845033943510429,
      "grad_norm": 3.4375,
      "learning_rate": 4.102782775298488e-05,
      "loss": 0.9306,
      "step": 223840
    },
    {
      "epoch": 0.7845384418579384,
      "grad_norm": 3.09375,
      "learning_rate": 4.102717872432118e-05,
      "loss": 0.7841,
      "step": 223850
    },
    {
      "epoch": 0.7845734893648341,
      "grad_norm": 2.640625,
      "learning_rate": 4.102652969565748e-05,
      "loss": 0.889,
      "step": 223860
    },
    {
      "epoch": 0.7846085368717296,
      "grad_norm": 3.3125,
      "learning_rate": 4.102588066699378e-05,
      "loss": 0.9811,
      "step": 223870
    },
    {
      "epoch": 0.7846435843786252,
      "grad_norm": 2.546875,
      "learning_rate": 4.102523163833007e-05,
      "loss": 0.8578,
      "step": 223880
    },
    {
      "epoch": 0.7846786318855208,
      "grad_norm": 3.015625,
      "learning_rate": 4.1024582609666374e-05,
      "loss": 0.8902,
      "step": 223890
    },
    {
      "epoch": 0.7847136793924164,
      "grad_norm": 3.421875,
      "learning_rate": 4.102393358100267e-05,
      "loss": 0.8793,
      "step": 223900
    },
    {
      "epoch": 0.784748726899312,
      "grad_norm": 3.140625,
      "learning_rate": 4.102328455233897e-05,
      "loss": 0.8895,
      "step": 223910
    },
    {
      "epoch": 0.7847837744062076,
      "grad_norm": 2.6875,
      "learning_rate": 4.1022635523675265e-05,
      "loss": 0.881,
      "step": 223920
    },
    {
      "epoch": 0.7848188219131033,
      "grad_norm": 3.359375,
      "learning_rate": 4.1021986495011566e-05,
      "loss": 0.8928,
      "step": 223930
    },
    {
      "epoch": 0.7848538694199988,
      "grad_norm": 2.921875,
      "learning_rate": 4.102133746634786e-05,
      "loss": 0.9124,
      "step": 223940
    },
    {
      "epoch": 0.7848889169268944,
      "grad_norm": 3.0,
      "learning_rate": 4.102068843768416e-05,
      "loss": 0.8779,
      "step": 223950
    },
    {
      "epoch": 0.78492396443379,
      "grad_norm": 2.953125,
      "learning_rate": 4.1020039409020464e-05,
      "loss": 0.9401,
      "step": 223960
    },
    {
      "epoch": 0.7849590119406856,
      "grad_norm": 3.578125,
      "learning_rate": 4.101939038035676e-05,
      "loss": 0.9489,
      "step": 223970
    },
    {
      "epoch": 0.7849940594475812,
      "grad_norm": 2.875,
      "learning_rate": 4.101874135169306e-05,
      "loss": 0.9371,
      "step": 223980
    },
    {
      "epoch": 0.7850291069544768,
      "grad_norm": 2.828125,
      "learning_rate": 4.1018092323029354e-05,
      "loss": 0.809,
      "step": 223990
    },
    {
      "epoch": 0.7850641544613723,
      "grad_norm": 2.53125,
      "learning_rate": 4.1017443294365656e-05,
      "loss": 0.8777,
      "step": 224000
    },
    {
      "epoch": 0.785099201968268,
      "grad_norm": 3.21875,
      "learning_rate": 4.101679426570195e-05,
      "loss": 0.8264,
      "step": 224010
    },
    {
      "epoch": 0.7851342494751636,
      "grad_norm": 3.09375,
      "learning_rate": 4.101614523703825e-05,
      "loss": 0.8424,
      "step": 224020
    },
    {
      "epoch": 0.7851692969820592,
      "grad_norm": 3.234375,
      "learning_rate": 4.1015496208374546e-05,
      "loss": 0.9555,
      "step": 224030
    },
    {
      "epoch": 0.7852043444889548,
      "grad_norm": 2.5625,
      "learning_rate": 4.101484717971085e-05,
      "loss": 0.8609,
      "step": 224040
    },
    {
      "epoch": 0.7852393919958504,
      "grad_norm": 2.40625,
      "learning_rate": 4.101419815104714e-05,
      "loss": 0.8707,
      "step": 224050
    },
    {
      "epoch": 0.785274439502746,
      "grad_norm": 3.15625,
      "learning_rate": 4.1013549122383444e-05,
      "loss": 0.9315,
      "step": 224060
    },
    {
      "epoch": 0.7853094870096415,
      "grad_norm": 2.625,
      "learning_rate": 4.101290009371974e-05,
      "loss": 0.8266,
      "step": 224070
    },
    {
      "epoch": 0.7853445345165372,
      "grad_norm": 2.8125,
      "learning_rate": 4.101225106505604e-05,
      "loss": 0.8731,
      "step": 224080
    },
    {
      "epoch": 0.7853795820234327,
      "grad_norm": 2.96875,
      "learning_rate": 4.101160203639234e-05,
      "loss": 0.9221,
      "step": 224090
    },
    {
      "epoch": 0.7854146295303284,
      "grad_norm": 2.859375,
      "learning_rate": 4.1010953007728636e-05,
      "loss": 0.9308,
      "step": 224100
    },
    {
      "epoch": 0.7854496770372239,
      "grad_norm": 3.0625,
      "learning_rate": 4.101030397906494e-05,
      "loss": 0.9184,
      "step": 224110
    },
    {
      "epoch": 0.7854847245441196,
      "grad_norm": 3.25,
      "learning_rate": 4.100965495040123e-05,
      "loss": 0.9104,
      "step": 224120
    },
    {
      "epoch": 0.7855197720510152,
      "grad_norm": 3.21875,
      "learning_rate": 4.100900592173753e-05,
      "loss": 0.8835,
      "step": 224130
    },
    {
      "epoch": 0.7855548195579107,
      "grad_norm": 3.046875,
      "learning_rate": 4.100835689307383e-05,
      "loss": 0.9491,
      "step": 224140
    },
    {
      "epoch": 0.7855898670648064,
      "grad_norm": 3.109375,
      "learning_rate": 4.100770786441013e-05,
      "loss": 0.8786,
      "step": 224150
    },
    {
      "epoch": 0.7856249145717019,
      "grad_norm": 3.046875,
      "learning_rate": 4.1007058835746424e-05,
      "loss": 0.9212,
      "step": 224160
    },
    {
      "epoch": 0.7856599620785976,
      "grad_norm": 2.890625,
      "learning_rate": 4.1006409807082725e-05,
      "loss": 0.8893,
      "step": 224170
    },
    {
      "epoch": 0.7856950095854931,
      "grad_norm": 2.90625,
      "learning_rate": 4.100576077841902e-05,
      "loss": 0.8633,
      "step": 224180
    },
    {
      "epoch": 0.7857300570923887,
      "grad_norm": 3.109375,
      "learning_rate": 4.100511174975532e-05,
      "loss": 0.8776,
      "step": 224190
    },
    {
      "epoch": 0.7857651045992843,
      "grad_norm": 2.484375,
      "learning_rate": 4.1004462721091616e-05,
      "loss": 0.8305,
      "step": 224200
    },
    {
      "epoch": 0.7858001521061799,
      "grad_norm": 3.109375,
      "learning_rate": 4.100381369242792e-05,
      "loss": 0.8526,
      "step": 224210
    },
    {
      "epoch": 0.7858351996130756,
      "grad_norm": 2.921875,
      "learning_rate": 4.100316466376421e-05,
      "loss": 0.953,
      "step": 224220
    },
    {
      "epoch": 0.7858702471199711,
      "grad_norm": 2.46875,
      "learning_rate": 4.1002515635100506e-05,
      "loss": 0.8624,
      "step": 224230
    },
    {
      "epoch": 0.7859052946268668,
      "grad_norm": 3.265625,
      "learning_rate": 4.100186660643681e-05,
      "loss": 0.9426,
      "step": 224240
    },
    {
      "epoch": 0.7859403421337623,
      "grad_norm": 2.84375,
      "learning_rate": 4.10012175777731e-05,
      "loss": 0.8061,
      "step": 224250
    },
    {
      "epoch": 0.7859753896406579,
      "grad_norm": 3.0,
      "learning_rate": 4.1000568549109404e-05,
      "loss": 0.9104,
      "step": 224260
    },
    {
      "epoch": 0.7860104371475535,
      "grad_norm": 3.0,
      "learning_rate": 4.09999195204457e-05,
      "loss": 0.8868,
      "step": 224270
    },
    {
      "epoch": 0.7860454846544491,
      "grad_norm": 2.9375,
      "learning_rate": 4.0999270491782e-05,
      "loss": 0.841,
      "step": 224280
    },
    {
      "epoch": 0.7860805321613447,
      "grad_norm": 2.734375,
      "learning_rate": 4.0998621463118294e-05,
      "loss": 0.9167,
      "step": 224290
    },
    {
      "epoch": 0.7861155796682403,
      "grad_norm": 2.796875,
      "learning_rate": 4.0997972434454596e-05,
      "loss": 0.8805,
      "step": 224300
    },
    {
      "epoch": 0.786150627175136,
      "grad_norm": 2.890625,
      "learning_rate": 4.09973234057909e-05,
      "loss": 0.8786,
      "step": 224310
    },
    {
      "epoch": 0.7861856746820315,
      "grad_norm": 2.9375,
      "learning_rate": 4.099667437712719e-05,
      "loss": 0.909,
      "step": 224320
    },
    {
      "epoch": 0.7862207221889271,
      "grad_norm": 3.046875,
      "learning_rate": 4.099602534846349e-05,
      "loss": 0.858,
      "step": 224330
    },
    {
      "epoch": 0.7862557696958227,
      "grad_norm": 3.453125,
      "learning_rate": 4.099537631979979e-05,
      "loss": 0.9963,
      "step": 224340
    },
    {
      "epoch": 0.7862908172027183,
      "grad_norm": 3.1875,
      "learning_rate": 4.099472729113609e-05,
      "loss": 0.9102,
      "step": 224350
    },
    {
      "epoch": 0.7863258647096139,
      "grad_norm": 2.90625,
      "learning_rate": 4.0994078262472384e-05,
      "loss": 0.8161,
      "step": 224360
    },
    {
      "epoch": 0.7863609122165095,
      "grad_norm": 3.171875,
      "learning_rate": 4.0993429233808685e-05,
      "loss": 0.9035,
      "step": 224370
    },
    {
      "epoch": 0.786395959723405,
      "grad_norm": 2.8125,
      "learning_rate": 4.099278020514498e-05,
      "loss": 0.927,
      "step": 224380
    },
    {
      "epoch": 0.7864310072303007,
      "grad_norm": 2.890625,
      "learning_rate": 4.099213117648128e-05,
      "loss": 0.9136,
      "step": 224390
    },
    {
      "epoch": 0.7864660547371962,
      "grad_norm": 2.90625,
      "learning_rate": 4.0991482147817576e-05,
      "loss": 0.88,
      "step": 224400
    },
    {
      "epoch": 0.7865011022440919,
      "grad_norm": 3.03125,
      "learning_rate": 4.099083311915388e-05,
      "loss": 0.8673,
      "step": 224410
    },
    {
      "epoch": 0.7865361497509875,
      "grad_norm": 2.828125,
      "learning_rate": 4.099018409049017e-05,
      "loss": 0.9272,
      "step": 224420
    },
    {
      "epoch": 0.786571197257883,
      "grad_norm": 3.0625,
      "learning_rate": 4.098953506182647e-05,
      "loss": 1.0263,
      "step": 224430
    },
    {
      "epoch": 0.7866062447647787,
      "grad_norm": 2.8125,
      "learning_rate": 4.098888603316277e-05,
      "loss": 0.9077,
      "step": 224440
    },
    {
      "epoch": 0.7866412922716742,
      "grad_norm": 3.34375,
      "learning_rate": 4.098823700449907e-05,
      "loss": 0.8707,
      "step": 224450
    },
    {
      "epoch": 0.7866763397785699,
      "grad_norm": 3.171875,
      "learning_rate": 4.098758797583537e-05,
      "loss": 0.8899,
      "step": 224460
    },
    {
      "epoch": 0.7867113872854654,
      "grad_norm": 3.375,
      "learning_rate": 4.0986938947171665e-05,
      "loss": 0.873,
      "step": 224470
    },
    {
      "epoch": 0.7867464347923611,
      "grad_norm": 2.734375,
      "learning_rate": 4.098628991850797e-05,
      "loss": 0.8631,
      "step": 224480
    },
    {
      "epoch": 0.7867814822992566,
      "grad_norm": 2.828125,
      "learning_rate": 4.098564088984426e-05,
      "loss": 0.8377,
      "step": 224490
    },
    {
      "epoch": 0.7868165298061522,
      "grad_norm": 2.875,
      "learning_rate": 4.098499186118056e-05,
      "loss": 0.926,
      "step": 224500
    },
    {
      "epoch": 0.7868515773130479,
      "grad_norm": 3.0625,
      "learning_rate": 4.098434283251686e-05,
      "loss": 0.9293,
      "step": 224510
    },
    {
      "epoch": 0.7868866248199434,
      "grad_norm": 2.9375,
      "learning_rate": 4.098369380385316e-05,
      "loss": 0.8903,
      "step": 224520
    },
    {
      "epoch": 0.7869216723268391,
      "grad_norm": 2.65625,
      "learning_rate": 4.098304477518945e-05,
      "loss": 0.883,
      "step": 224530
    },
    {
      "epoch": 0.7869567198337346,
      "grad_norm": 2.734375,
      "learning_rate": 4.0982395746525755e-05,
      "loss": 0.9091,
      "step": 224540
    },
    {
      "epoch": 0.7869917673406303,
      "grad_norm": 3.046875,
      "learning_rate": 4.098174671786205e-05,
      "loss": 0.8555,
      "step": 224550
    },
    {
      "epoch": 0.7870268148475258,
      "grad_norm": 3.515625,
      "learning_rate": 4.098109768919835e-05,
      "loss": 0.8773,
      "step": 224560
    },
    {
      "epoch": 0.7870618623544214,
      "grad_norm": 3.265625,
      "learning_rate": 4.0980448660534645e-05,
      "loss": 0.9139,
      "step": 224570
    },
    {
      "epoch": 0.787096909861317,
      "grad_norm": 2.796875,
      "learning_rate": 4.097979963187095e-05,
      "loss": 0.9435,
      "step": 224580
    },
    {
      "epoch": 0.7871319573682126,
      "grad_norm": 3.34375,
      "learning_rate": 4.097915060320725e-05,
      "loss": 0.9369,
      "step": 224590
    },
    {
      "epoch": 0.7871670048751082,
      "grad_norm": 2.75,
      "learning_rate": 4.0978501574543536e-05,
      "loss": 0.8701,
      "step": 224600
    },
    {
      "epoch": 0.7872020523820038,
      "grad_norm": 2.953125,
      "learning_rate": 4.097785254587984e-05,
      "loss": 0.9094,
      "step": 224610
    },
    {
      "epoch": 0.7872370998888994,
      "grad_norm": 2.625,
      "learning_rate": 4.097720351721613e-05,
      "loss": 0.9134,
      "step": 224620
    },
    {
      "epoch": 0.787272147395795,
      "grad_norm": 2.640625,
      "learning_rate": 4.097655448855243e-05,
      "loss": 0.9628,
      "step": 224630
    },
    {
      "epoch": 0.7873071949026906,
      "grad_norm": 2.75,
      "learning_rate": 4.097590545988873e-05,
      "loss": 0.8332,
      "step": 224640
    },
    {
      "epoch": 0.7873422424095862,
      "grad_norm": 3.03125,
      "learning_rate": 4.097525643122503e-05,
      "loss": 0.9268,
      "step": 224650
    },
    {
      "epoch": 0.7873772899164818,
      "grad_norm": 3.28125,
      "learning_rate": 4.0974607402561324e-05,
      "loss": 0.8533,
      "step": 224660
    },
    {
      "epoch": 0.7874123374233774,
      "grad_norm": 3.09375,
      "learning_rate": 4.0973958373897625e-05,
      "loss": 0.8689,
      "step": 224670
    },
    {
      "epoch": 0.787447384930273,
      "grad_norm": 2.578125,
      "learning_rate": 4.097330934523393e-05,
      "loss": 0.8324,
      "step": 224680
    },
    {
      "epoch": 0.7874824324371685,
      "grad_norm": 2.953125,
      "learning_rate": 4.097266031657022e-05,
      "loss": 0.9168,
      "step": 224690
    },
    {
      "epoch": 0.7875174799440642,
      "grad_norm": 3.078125,
      "learning_rate": 4.097201128790652e-05,
      "loss": 0.892,
      "step": 224700
    },
    {
      "epoch": 0.7875525274509598,
      "grad_norm": 3.203125,
      "learning_rate": 4.097136225924282e-05,
      "loss": 0.898,
      "step": 224710
    },
    {
      "epoch": 0.7875875749578554,
      "grad_norm": 2.796875,
      "learning_rate": 4.097071323057912e-05,
      "loss": 0.8469,
      "step": 224720
    },
    {
      "epoch": 0.787622622464751,
      "grad_norm": 2.21875,
      "learning_rate": 4.097006420191541e-05,
      "loss": 0.8857,
      "step": 224730
    },
    {
      "epoch": 0.7876576699716465,
      "grad_norm": 2.96875,
      "learning_rate": 4.0969415173251715e-05,
      "loss": 0.9234,
      "step": 224740
    },
    {
      "epoch": 0.7876927174785422,
      "grad_norm": 3.3125,
      "learning_rate": 4.096876614458801e-05,
      "loss": 0.8632,
      "step": 224750
    },
    {
      "epoch": 0.7877277649854377,
      "grad_norm": 2.890625,
      "learning_rate": 4.096811711592431e-05,
      "loss": 0.823,
      "step": 224760
    },
    {
      "epoch": 0.7877628124923334,
      "grad_norm": 2.96875,
      "learning_rate": 4.0967468087260605e-05,
      "loss": 0.9118,
      "step": 224770
    },
    {
      "epoch": 0.7877978599992289,
      "grad_norm": 3.203125,
      "learning_rate": 4.096681905859691e-05,
      "loss": 0.9124,
      "step": 224780
    },
    {
      "epoch": 0.7878329075061246,
      "grad_norm": 3.078125,
      "learning_rate": 4.09661700299332e-05,
      "loss": 0.844,
      "step": 224790
    },
    {
      "epoch": 0.7878679550130201,
      "grad_norm": 3.25,
      "learning_rate": 4.09655210012695e-05,
      "loss": 0.8895,
      "step": 224800
    },
    {
      "epoch": 0.7879030025199157,
      "grad_norm": 5.5625,
      "learning_rate": 4.09648719726058e-05,
      "loss": 0.9586,
      "step": 224810
    },
    {
      "epoch": 0.7879380500268114,
      "grad_norm": 2.6875,
      "learning_rate": 4.09642229439421e-05,
      "loss": 0.9124,
      "step": 224820
    },
    {
      "epoch": 0.7879730975337069,
      "grad_norm": 3.0,
      "learning_rate": 4.09635739152784e-05,
      "loss": 0.9325,
      "step": 224830
    },
    {
      "epoch": 0.7880081450406026,
      "grad_norm": 2.25,
      "learning_rate": 4.0962924886614695e-05,
      "loss": 0.7905,
      "step": 224840
    },
    {
      "epoch": 0.7880431925474981,
      "grad_norm": 3.03125,
      "learning_rate": 4.0962275857950996e-05,
      "loss": 0.9262,
      "step": 224850
    },
    {
      "epoch": 0.7880782400543938,
      "grad_norm": 2.796875,
      "learning_rate": 4.096162682928729e-05,
      "loss": 0.7972,
      "step": 224860
    },
    {
      "epoch": 0.7881132875612893,
      "grad_norm": 3.0,
      "learning_rate": 4.096097780062359e-05,
      "loss": 0.9111,
      "step": 224870
    },
    {
      "epoch": 0.7881483350681849,
      "grad_norm": 2.625,
      "learning_rate": 4.096032877195989e-05,
      "loss": 0.8977,
      "step": 224880
    },
    {
      "epoch": 0.7881833825750805,
      "grad_norm": 3.171875,
      "learning_rate": 4.095967974329619e-05,
      "loss": 0.8956,
      "step": 224890
    },
    {
      "epoch": 0.7882184300819761,
      "grad_norm": 2.8125,
      "learning_rate": 4.095903071463248e-05,
      "loss": 0.8184,
      "step": 224900
    },
    {
      "epoch": 0.7882534775888718,
      "grad_norm": 3.1875,
      "learning_rate": 4.0958381685968784e-05,
      "loss": 0.9525,
      "step": 224910
    },
    {
      "epoch": 0.7882885250957673,
      "grad_norm": 2.671875,
      "learning_rate": 4.095773265730508e-05,
      "loss": 0.8663,
      "step": 224920
    },
    {
      "epoch": 0.788323572602663,
      "grad_norm": 3.0,
      "learning_rate": 4.095708362864138e-05,
      "loss": 0.8974,
      "step": 224930
    },
    {
      "epoch": 0.7883586201095585,
      "grad_norm": 2.921875,
      "learning_rate": 4.0956434599977675e-05,
      "loss": 0.9694,
      "step": 224940
    },
    {
      "epoch": 0.7883936676164541,
      "grad_norm": 2.75,
      "learning_rate": 4.0955785571313976e-05,
      "loss": 0.8794,
      "step": 224950
    },
    {
      "epoch": 0.7884287151233497,
      "grad_norm": 3.078125,
      "learning_rate": 4.095513654265028e-05,
      "loss": 0.8286,
      "step": 224960
    },
    {
      "epoch": 0.7884637626302453,
      "grad_norm": 3.03125,
      "learning_rate": 4.0954487513986565e-05,
      "loss": 0.8927,
      "step": 224970
    },
    {
      "epoch": 0.7884988101371408,
      "grad_norm": 2.953125,
      "learning_rate": 4.095383848532287e-05,
      "loss": 0.8846,
      "step": 224980
    },
    {
      "epoch": 0.7885338576440365,
      "grad_norm": 2.484375,
      "learning_rate": 4.095318945665916e-05,
      "loss": 0.8501,
      "step": 224990
    },
    {
      "epoch": 0.7885689051509321,
      "grad_norm": 3.234375,
      "learning_rate": 4.095254042799546e-05,
      "loss": 0.8136,
      "step": 225000
    },
    {
      "epoch": 0.7885689051509321,
      "eval_loss": 0.8378106951713562,
      "eval_runtime": 558.3846,
      "eval_samples_per_second": 681.315,
      "eval_steps_per_second": 56.776,
      "step": 225000
    },
    {
      "epoch": 0.7886039526578277,
      "grad_norm": 2.921875,
      "learning_rate": 4.095189139933176e-05,
      "loss": 0.8911,
      "step": 225010
    },
    {
      "epoch": 0.7886390001647233,
      "grad_norm": 2.6875,
      "learning_rate": 4.095124237066806e-05,
      "loss": 0.8406,
      "step": 225020
    },
    {
      "epoch": 0.7886740476716189,
      "grad_norm": 3.0,
      "learning_rate": 4.095059334200435e-05,
      "loss": 0.9333,
      "step": 225030
    },
    {
      "epoch": 0.7887090951785145,
      "grad_norm": 3.03125,
      "learning_rate": 4.0949944313340655e-05,
      "loss": 0.9165,
      "step": 225040
    },
    {
      "epoch": 0.78874414268541,
      "grad_norm": 3.40625,
      "learning_rate": 4.0949295284676956e-05,
      "loss": 0.8824,
      "step": 225050
    },
    {
      "epoch": 0.7887791901923057,
      "grad_norm": 3.015625,
      "learning_rate": 4.094864625601325e-05,
      "loss": 0.8803,
      "step": 225060
    },
    {
      "epoch": 0.7888142376992012,
      "grad_norm": 3.21875,
      "learning_rate": 4.094799722734955e-05,
      "loss": 0.8683,
      "step": 225070
    },
    {
      "epoch": 0.7888492852060969,
      "grad_norm": 2.34375,
      "learning_rate": 4.094734819868585e-05,
      "loss": 0.8504,
      "step": 225080
    },
    {
      "epoch": 0.7888843327129924,
      "grad_norm": 2.703125,
      "learning_rate": 4.094669917002215e-05,
      "loss": 0.9095,
      "step": 225090
    },
    {
      "epoch": 0.7889193802198881,
      "grad_norm": 2.40625,
      "learning_rate": 4.094605014135844e-05,
      "loss": 0.9246,
      "step": 225100
    },
    {
      "epoch": 0.7889544277267837,
      "grad_norm": 2.9375,
      "learning_rate": 4.0945401112694744e-05,
      "loss": 0.8787,
      "step": 225110
    },
    {
      "epoch": 0.7889894752336792,
      "grad_norm": 2.515625,
      "learning_rate": 4.094475208403104e-05,
      "loss": 0.8177,
      "step": 225120
    },
    {
      "epoch": 0.7890245227405749,
      "grad_norm": 2.828125,
      "learning_rate": 4.094410305536734e-05,
      "loss": 0.8667,
      "step": 225130
    },
    {
      "epoch": 0.7890595702474704,
      "grad_norm": 2.921875,
      "learning_rate": 4.0943454026703635e-05,
      "loss": 0.9011,
      "step": 225140
    },
    {
      "epoch": 0.7890946177543661,
      "grad_norm": 2.875,
      "learning_rate": 4.0942804998039936e-05,
      "loss": 0.9403,
      "step": 225150
    },
    {
      "epoch": 0.7891296652612616,
      "grad_norm": 2.6875,
      "learning_rate": 4.094215596937623e-05,
      "loss": 0.911,
      "step": 225160
    },
    {
      "epoch": 0.7891647127681573,
      "grad_norm": 2.859375,
      "learning_rate": 4.094150694071253e-05,
      "loss": 0.9477,
      "step": 225170
    },
    {
      "epoch": 0.7891997602750528,
      "grad_norm": 3.1875,
      "learning_rate": 4.0940857912048833e-05,
      "loss": 0.8947,
      "step": 225180
    },
    {
      "epoch": 0.7892348077819484,
      "grad_norm": 3.140625,
      "learning_rate": 4.094020888338513e-05,
      "loss": 0.9453,
      "step": 225190
    },
    {
      "epoch": 0.7892698552888441,
      "grad_norm": 3.0,
      "learning_rate": 4.093955985472143e-05,
      "loss": 0.9207,
      "step": 225200
    },
    {
      "epoch": 0.7893049027957396,
      "grad_norm": 3.0,
      "learning_rate": 4.0938910826057724e-05,
      "loss": 0.872,
      "step": 225210
    },
    {
      "epoch": 0.7893399503026353,
      "grad_norm": 3.125,
      "learning_rate": 4.0938261797394025e-05,
      "loss": 0.9062,
      "step": 225220
    },
    {
      "epoch": 0.7893749978095308,
      "grad_norm": 2.484375,
      "learning_rate": 4.093761276873032e-05,
      "loss": 0.8716,
      "step": 225230
    },
    {
      "epoch": 0.7894100453164264,
      "grad_norm": 2.90625,
      "learning_rate": 4.093696374006662e-05,
      "loss": 1.0013,
      "step": 225240
    },
    {
      "epoch": 0.789445092823322,
      "grad_norm": 3.1875,
      "learning_rate": 4.0936314711402916e-05,
      "loss": 0.8256,
      "step": 225250
    },
    {
      "epoch": 0.7894801403302176,
      "grad_norm": 2.984375,
      "learning_rate": 4.093566568273922e-05,
      "loss": 0.798,
      "step": 225260
    },
    {
      "epoch": 0.7895151878371132,
      "grad_norm": 2.6875,
      "learning_rate": 4.093501665407551e-05,
      "loss": 0.9512,
      "step": 225270
    },
    {
      "epoch": 0.7895502353440088,
      "grad_norm": 2.890625,
      "learning_rate": 4.0934367625411813e-05,
      "loss": 0.8477,
      "step": 225280
    },
    {
      "epoch": 0.7895852828509043,
      "grad_norm": 2.765625,
      "learning_rate": 4.093371859674811e-05,
      "loss": 0.9384,
      "step": 225290
    },
    {
      "epoch": 0.7896203303578,
      "grad_norm": 2.8125,
      "learning_rate": 4.093306956808441e-05,
      "loss": 0.9352,
      "step": 225300
    },
    {
      "epoch": 0.7896553778646956,
      "grad_norm": 3.53125,
      "learning_rate": 4.0932420539420704e-05,
      "loss": 0.9337,
      "step": 225310
    },
    {
      "epoch": 0.7896904253715912,
      "grad_norm": 2.890625,
      "learning_rate": 4.0931771510757005e-05,
      "loss": 0.8779,
      "step": 225320
    },
    {
      "epoch": 0.7897254728784868,
      "grad_norm": 3.234375,
      "learning_rate": 4.093112248209331e-05,
      "loss": 0.8846,
      "step": 225330
    },
    {
      "epoch": 0.7897605203853824,
      "grad_norm": 3.1875,
      "learning_rate": 4.09304734534296e-05,
      "loss": 0.8587,
      "step": 225340
    },
    {
      "epoch": 0.789795567892278,
      "grad_norm": 2.8125,
      "learning_rate": 4.0929824424765896e-05,
      "loss": 0.9158,
      "step": 225350
    },
    {
      "epoch": 0.7898306153991735,
      "grad_norm": 2.765625,
      "learning_rate": 4.092917539610219e-05,
      "loss": 0.8115,
      "step": 225360
    },
    {
      "epoch": 0.7898656629060692,
      "grad_norm": 2.765625,
      "learning_rate": 4.092852636743849e-05,
      "loss": 0.8722,
      "step": 225370
    },
    {
      "epoch": 0.7899007104129647,
      "grad_norm": 2.796875,
      "learning_rate": 4.092787733877479e-05,
      "loss": 0.9152,
      "step": 225380
    },
    {
      "epoch": 0.7899357579198604,
      "grad_norm": 2.96875,
      "learning_rate": 4.092722831011109e-05,
      "loss": 0.9055,
      "step": 225390
    },
    {
      "epoch": 0.789970805426756,
      "grad_norm": 2.8125,
      "learning_rate": 4.092657928144738e-05,
      "loss": 0.883,
      "step": 225400
    },
    {
      "epoch": 0.7900058529336516,
      "grad_norm": 2.71875,
      "learning_rate": 4.0925930252783684e-05,
      "loss": 0.939,
      "step": 225410
    },
    {
      "epoch": 0.7900409004405472,
      "grad_norm": 2.734375,
      "learning_rate": 4.0925281224119985e-05,
      "loss": 0.7745,
      "step": 225420
    },
    {
      "epoch": 0.7900759479474427,
      "grad_norm": 2.828125,
      "learning_rate": 4.092463219545628e-05,
      "loss": 0.9539,
      "step": 225430
    },
    {
      "epoch": 0.7901109954543384,
      "grad_norm": 2.96875,
      "learning_rate": 4.092398316679258e-05,
      "loss": 0.9308,
      "step": 225440
    },
    {
      "epoch": 0.7901460429612339,
      "grad_norm": 2.84375,
      "learning_rate": 4.0923334138128876e-05,
      "loss": 0.8924,
      "step": 225450
    },
    {
      "epoch": 0.7901810904681296,
      "grad_norm": 3.1875,
      "learning_rate": 4.092268510946518e-05,
      "loss": 0.8822,
      "step": 225460
    },
    {
      "epoch": 0.7902161379750251,
      "grad_norm": 2.953125,
      "learning_rate": 4.092203608080147e-05,
      "loss": 0.8688,
      "step": 225470
    },
    {
      "epoch": 0.7902511854819207,
      "grad_norm": 3.25,
      "learning_rate": 4.0921387052137773e-05,
      "loss": 0.8671,
      "step": 225480
    },
    {
      "epoch": 0.7902862329888164,
      "grad_norm": 3.140625,
      "learning_rate": 4.092073802347407e-05,
      "loss": 0.8652,
      "step": 225490
    },
    {
      "epoch": 0.7903212804957119,
      "grad_norm": 2.671875,
      "learning_rate": 4.092008899481037e-05,
      "loss": 0.8058,
      "step": 225500
    },
    {
      "epoch": 0.7903563280026076,
      "grad_norm": 2.890625,
      "learning_rate": 4.0919439966146664e-05,
      "loss": 0.9377,
      "step": 225510
    },
    {
      "epoch": 0.7903913755095031,
      "grad_norm": 3.6875,
      "learning_rate": 4.0918790937482965e-05,
      "loss": 0.9234,
      "step": 225520
    },
    {
      "epoch": 0.7904264230163988,
      "grad_norm": 2.96875,
      "learning_rate": 4.091814190881926e-05,
      "loss": 0.8973,
      "step": 225530
    },
    {
      "epoch": 0.7904614705232943,
      "grad_norm": 2.65625,
      "learning_rate": 4.091749288015556e-05,
      "loss": 0.9563,
      "step": 225540
    },
    {
      "epoch": 0.7904965180301899,
      "grad_norm": 3.15625,
      "learning_rate": 4.091684385149186e-05,
      "loss": 0.9038,
      "step": 225550
    },
    {
      "epoch": 0.7905315655370855,
      "grad_norm": 3.03125,
      "learning_rate": 4.091619482282816e-05,
      "loss": 1.0035,
      "step": 225560
    },
    {
      "epoch": 0.7905666130439811,
      "grad_norm": 2.875,
      "learning_rate": 4.091554579416446e-05,
      "loss": 0.9084,
      "step": 225570
    },
    {
      "epoch": 0.7906016605508767,
      "grad_norm": 2.78125,
      "learning_rate": 4.0914896765500753e-05,
      "loss": 0.9049,
      "step": 225580
    },
    {
      "epoch": 0.7906367080577723,
      "grad_norm": 2.890625,
      "learning_rate": 4.0914247736837055e-05,
      "loss": 0.9016,
      "step": 225590
    },
    {
      "epoch": 0.790671755564668,
      "grad_norm": 3.65625,
      "learning_rate": 4.091359870817335e-05,
      "loss": 0.8492,
      "step": 225600
    },
    {
      "epoch": 0.7907068030715635,
      "grad_norm": 2.828125,
      "learning_rate": 4.091294967950965e-05,
      "loss": 0.9176,
      "step": 225610
    },
    {
      "epoch": 0.7907418505784591,
      "grad_norm": 3.03125,
      "learning_rate": 4.0912300650845945e-05,
      "loss": 0.8905,
      "step": 225620
    },
    {
      "epoch": 0.7907768980853547,
      "grad_norm": 2.96875,
      "learning_rate": 4.091165162218225e-05,
      "loss": 0.829,
      "step": 225630
    },
    {
      "epoch": 0.7908119455922503,
      "grad_norm": 2.9375,
      "learning_rate": 4.091100259351854e-05,
      "loss": 0.9324,
      "step": 225640
    },
    {
      "epoch": 0.7908469930991459,
      "grad_norm": 3.171875,
      "learning_rate": 4.091035356485484e-05,
      "loss": 0.8673,
      "step": 225650
    },
    {
      "epoch": 0.7908820406060415,
      "grad_norm": 3.25,
      "learning_rate": 4.090970453619114e-05,
      "loss": 0.8189,
      "step": 225660
    },
    {
      "epoch": 0.790917088112937,
      "grad_norm": 3.4375,
      "learning_rate": 4.090905550752744e-05,
      "loss": 0.952,
      "step": 225670
    },
    {
      "epoch": 0.7909521356198327,
      "grad_norm": 2.765625,
      "learning_rate": 4.0908406478863733e-05,
      "loss": 0.855,
      "step": 225680
    },
    {
      "epoch": 0.7909871831267283,
      "grad_norm": 2.75,
      "learning_rate": 4.0907757450200035e-05,
      "loss": 0.8783,
      "step": 225690
    },
    {
      "epoch": 0.7910222306336239,
      "grad_norm": 3.109375,
      "learning_rate": 4.0907108421536336e-05,
      "loss": 0.8749,
      "step": 225700
    },
    {
      "epoch": 0.7910572781405195,
      "grad_norm": 2.828125,
      "learning_rate": 4.090645939287263e-05,
      "loss": 0.8652,
      "step": 225710
    },
    {
      "epoch": 0.791092325647415,
      "grad_norm": 3.09375,
      "learning_rate": 4.090581036420893e-05,
      "loss": 0.9887,
      "step": 225720
    },
    {
      "epoch": 0.7911273731543107,
      "grad_norm": 2.578125,
      "learning_rate": 4.090516133554522e-05,
      "loss": 0.9339,
      "step": 225730
    },
    {
      "epoch": 0.7911624206612062,
      "grad_norm": 3.125,
      "learning_rate": 4.090451230688152e-05,
      "loss": 0.8828,
      "step": 225740
    },
    {
      "epoch": 0.7911974681681019,
      "grad_norm": 2.734375,
      "learning_rate": 4.0903863278217816e-05,
      "loss": 0.8249,
      "step": 225750
    },
    {
      "epoch": 0.7912325156749974,
      "grad_norm": 2.796875,
      "learning_rate": 4.090321424955412e-05,
      "loss": 0.8867,
      "step": 225760
    },
    {
      "epoch": 0.7912675631818931,
      "grad_norm": 3.125,
      "learning_rate": 4.090256522089041e-05,
      "loss": 0.9034,
      "step": 225770
    },
    {
      "epoch": 0.7913026106887886,
      "grad_norm": 3.078125,
      "learning_rate": 4.0901916192226713e-05,
      "loss": 0.9206,
      "step": 225780
    },
    {
      "epoch": 0.7913376581956842,
      "grad_norm": 3.0,
      "learning_rate": 4.0901267163563015e-05,
      "loss": 0.8864,
      "step": 225790
    },
    {
      "epoch": 0.7913727057025799,
      "grad_norm": 2.390625,
      "learning_rate": 4.090061813489931e-05,
      "loss": 0.8641,
      "step": 225800
    },
    {
      "epoch": 0.7914077532094754,
      "grad_norm": 3.140625,
      "learning_rate": 4.089996910623561e-05,
      "loss": 0.9175,
      "step": 225810
    },
    {
      "epoch": 0.7914428007163711,
      "grad_norm": 2.765625,
      "learning_rate": 4.0899320077571905e-05,
      "loss": 0.944,
      "step": 225820
    },
    {
      "epoch": 0.7914778482232666,
      "grad_norm": 2.765625,
      "learning_rate": 4.089867104890821e-05,
      "loss": 0.8928,
      "step": 225830
    },
    {
      "epoch": 0.7915128957301623,
      "grad_norm": 2.984375,
      "learning_rate": 4.08980220202445e-05,
      "loss": 0.8616,
      "step": 225840
    },
    {
      "epoch": 0.7915479432370578,
      "grad_norm": 3.1875,
      "learning_rate": 4.08973729915808e-05,
      "loss": 0.9148,
      "step": 225850
    },
    {
      "epoch": 0.7915829907439534,
      "grad_norm": 3.21875,
      "learning_rate": 4.08967239629171e-05,
      "loss": 0.9709,
      "step": 225860
    },
    {
      "epoch": 0.791618038250849,
      "grad_norm": 2.859375,
      "learning_rate": 4.08960749342534e-05,
      "loss": 0.994,
      "step": 225870
    },
    {
      "epoch": 0.7916530857577446,
      "grad_norm": 2.921875,
      "learning_rate": 4.0895425905589693e-05,
      "loss": 0.9046,
      "step": 225880
    },
    {
      "epoch": 0.7916881332646403,
      "grad_norm": 2.671875,
      "learning_rate": 4.0894776876925995e-05,
      "loss": 0.9667,
      "step": 225890
    },
    {
      "epoch": 0.7917231807715358,
      "grad_norm": 2.5,
      "learning_rate": 4.089412784826229e-05,
      "loss": 0.8748,
      "step": 225900
    },
    {
      "epoch": 0.7917582282784315,
      "grad_norm": 2.859375,
      "learning_rate": 4.089347881959859e-05,
      "loss": 0.9873,
      "step": 225910
    },
    {
      "epoch": 0.791793275785327,
      "grad_norm": 2.671875,
      "learning_rate": 4.089282979093489e-05,
      "loss": 0.8301,
      "step": 225920
    },
    {
      "epoch": 0.7918283232922226,
      "grad_norm": 2.75,
      "learning_rate": 4.089218076227119e-05,
      "loss": 0.8681,
      "step": 225930
    },
    {
      "epoch": 0.7918633707991182,
      "grad_norm": 2.96875,
      "learning_rate": 4.089153173360749e-05,
      "loss": 0.9079,
      "step": 225940
    },
    {
      "epoch": 0.7918984183060138,
      "grad_norm": 2.828125,
      "learning_rate": 4.089088270494378e-05,
      "loss": 0.8839,
      "step": 225950
    },
    {
      "epoch": 0.7919334658129094,
      "grad_norm": 2.828125,
      "learning_rate": 4.0890233676280084e-05,
      "loss": 0.8986,
      "step": 225960
    },
    {
      "epoch": 0.791968513319805,
      "grad_norm": 3.078125,
      "learning_rate": 4.088958464761638e-05,
      "loss": 0.884,
      "step": 225970
    },
    {
      "epoch": 0.7920035608267006,
      "grad_norm": 2.96875,
      "learning_rate": 4.088893561895268e-05,
      "loss": 0.9396,
      "step": 225980
    },
    {
      "epoch": 0.7920386083335962,
      "grad_norm": 3.03125,
      "learning_rate": 4.0888286590288975e-05,
      "loss": 0.9241,
      "step": 225990
    },
    {
      "epoch": 0.7920736558404918,
      "grad_norm": 2.90625,
      "learning_rate": 4.0887637561625276e-05,
      "loss": 0.93,
      "step": 226000
    },
    {
      "epoch": 0.7921087033473874,
      "grad_norm": 2.515625,
      "learning_rate": 4.088698853296157e-05,
      "loss": 0.8388,
      "step": 226010
    },
    {
      "epoch": 0.792143750854283,
      "grad_norm": 2.875,
      "learning_rate": 4.088633950429787e-05,
      "loss": 0.899,
      "step": 226020
    },
    {
      "epoch": 0.7921787983611785,
      "grad_norm": 2.953125,
      "learning_rate": 4.088569047563417e-05,
      "loss": 0.9513,
      "step": 226030
    },
    {
      "epoch": 0.7922138458680742,
      "grad_norm": 2.953125,
      "learning_rate": 4.088504144697047e-05,
      "loss": 0.901,
      "step": 226040
    },
    {
      "epoch": 0.7922488933749697,
      "grad_norm": 2.703125,
      "learning_rate": 4.088439241830676e-05,
      "loss": 0.9379,
      "step": 226050
    },
    {
      "epoch": 0.7922839408818654,
      "grad_norm": 2.9375,
      "learning_rate": 4.0883743389643064e-05,
      "loss": 0.9499,
      "step": 226060
    },
    {
      "epoch": 0.7923189883887609,
      "grad_norm": 2.421875,
      "learning_rate": 4.0883094360979366e-05,
      "loss": 0.9185,
      "step": 226070
    },
    {
      "epoch": 0.7923540358956566,
      "grad_norm": 3.203125,
      "learning_rate": 4.088244533231566e-05,
      "loss": 0.8578,
      "step": 226080
    },
    {
      "epoch": 0.7923890834025522,
      "grad_norm": 2.9375,
      "learning_rate": 4.088179630365196e-05,
      "loss": 0.7912,
      "step": 226090
    },
    {
      "epoch": 0.7924241309094477,
      "grad_norm": 2.796875,
      "learning_rate": 4.088114727498825e-05,
      "loss": 0.8613,
      "step": 226100
    },
    {
      "epoch": 0.7924591784163434,
      "grad_norm": 3.046875,
      "learning_rate": 4.088049824632455e-05,
      "loss": 0.8258,
      "step": 226110
    },
    {
      "epoch": 0.7924942259232389,
      "grad_norm": 3.015625,
      "learning_rate": 4.0879849217660845e-05,
      "loss": 0.8861,
      "step": 226120
    },
    {
      "epoch": 0.7925292734301346,
      "grad_norm": 2.921875,
      "learning_rate": 4.087920018899715e-05,
      "loss": 0.9157,
      "step": 226130
    },
    {
      "epoch": 0.7925643209370301,
      "grad_norm": 2.90625,
      "learning_rate": 4.087855116033344e-05,
      "loss": 0.9773,
      "step": 226140
    },
    {
      "epoch": 0.7925993684439258,
      "grad_norm": 2.640625,
      "learning_rate": 4.087790213166974e-05,
      "loss": 0.8788,
      "step": 226150
    },
    {
      "epoch": 0.7926344159508213,
      "grad_norm": 2.96875,
      "learning_rate": 4.0877253103006044e-05,
      "loss": 0.8694,
      "step": 226160
    },
    {
      "epoch": 0.7926694634577169,
      "grad_norm": 2.84375,
      "learning_rate": 4.087660407434234e-05,
      "loss": 0.8979,
      "step": 226170
    },
    {
      "epoch": 0.7927045109646126,
      "grad_norm": 3.1875,
      "learning_rate": 4.087595504567864e-05,
      "loss": 0.9572,
      "step": 226180
    },
    {
      "epoch": 0.7927395584715081,
      "grad_norm": 2.9375,
      "learning_rate": 4.0875306017014935e-05,
      "loss": 0.9754,
      "step": 226190
    },
    {
      "epoch": 0.7927746059784038,
      "grad_norm": 3.375,
      "learning_rate": 4.0874656988351236e-05,
      "loss": 0.8296,
      "step": 226200
    },
    {
      "epoch": 0.7928096534852993,
      "grad_norm": 2.984375,
      "learning_rate": 4.087400795968753e-05,
      "loss": 0.9303,
      "step": 226210
    },
    {
      "epoch": 0.792844700992195,
      "grad_norm": 2.4375,
      "learning_rate": 4.087335893102383e-05,
      "loss": 0.8806,
      "step": 226220
    },
    {
      "epoch": 0.7928797484990905,
      "grad_norm": 2.875,
      "learning_rate": 4.087270990236013e-05,
      "loss": 0.9193,
      "step": 226230
    },
    {
      "epoch": 0.7929147960059861,
      "grad_norm": 3.0,
      "learning_rate": 4.087206087369643e-05,
      "loss": 0.8235,
      "step": 226240
    },
    {
      "epoch": 0.7929498435128817,
      "grad_norm": 2.859375,
      "learning_rate": 4.087141184503272e-05,
      "loss": 0.9305,
      "step": 226250
    },
    {
      "epoch": 0.7929848910197773,
      "grad_norm": 2.6875,
      "learning_rate": 4.0870762816369024e-05,
      "loss": 0.8292,
      "step": 226260
    },
    {
      "epoch": 0.7930199385266729,
      "grad_norm": 3.0,
      "learning_rate": 4.087011378770532e-05,
      "loss": 0.8652,
      "step": 226270
    },
    {
      "epoch": 0.7930549860335685,
      "grad_norm": 3.296875,
      "learning_rate": 4.086946475904162e-05,
      "loss": 0.824,
      "step": 226280
    },
    {
      "epoch": 0.7930900335404641,
      "grad_norm": 2.859375,
      "learning_rate": 4.086881573037792e-05,
      "loss": 0.8434,
      "step": 226290
    },
    {
      "epoch": 0.7931250810473597,
      "grad_norm": 2.96875,
      "learning_rate": 4.0868166701714216e-05,
      "loss": 0.8479,
      "step": 226300
    },
    {
      "epoch": 0.7931601285542553,
      "grad_norm": 2.734375,
      "learning_rate": 4.086751767305052e-05,
      "loss": 0.8163,
      "step": 226310
    },
    {
      "epoch": 0.7931951760611509,
      "grad_norm": 2.515625,
      "learning_rate": 4.086686864438681e-05,
      "loss": 0.8885,
      "step": 226320
    },
    {
      "epoch": 0.7932302235680465,
      "grad_norm": 3.03125,
      "learning_rate": 4.0866219615723114e-05,
      "loss": 0.8607,
      "step": 226330
    },
    {
      "epoch": 0.793265271074942,
      "grad_norm": 2.984375,
      "learning_rate": 4.086557058705941e-05,
      "loss": 0.8727,
      "step": 226340
    },
    {
      "epoch": 0.7933003185818377,
      "grad_norm": 2.734375,
      "learning_rate": 4.086492155839571e-05,
      "loss": 0.8772,
      "step": 226350
    },
    {
      "epoch": 0.7933353660887332,
      "grad_norm": 2.890625,
      "learning_rate": 4.0864272529732004e-05,
      "loss": 0.8678,
      "step": 226360
    },
    {
      "epoch": 0.7933704135956289,
      "grad_norm": 2.71875,
      "learning_rate": 4.0863623501068306e-05,
      "loss": 0.8883,
      "step": 226370
    },
    {
      "epoch": 0.7934054611025245,
      "grad_norm": 2.875,
      "learning_rate": 4.08629744724046e-05,
      "loss": 0.9165,
      "step": 226380
    },
    {
      "epoch": 0.7934405086094201,
      "grad_norm": 2.875,
      "learning_rate": 4.08623254437409e-05,
      "loss": 0.8801,
      "step": 226390
    },
    {
      "epoch": 0.7934755561163157,
      "grad_norm": 3.40625,
      "learning_rate": 4.0861676415077196e-05,
      "loss": 0.9679,
      "step": 226400
    },
    {
      "epoch": 0.7935106036232112,
      "grad_norm": 2.796875,
      "learning_rate": 4.08610273864135e-05,
      "loss": 0.8931,
      "step": 226410
    },
    {
      "epoch": 0.7935456511301069,
      "grad_norm": 2.921875,
      "learning_rate": 4.08603783577498e-05,
      "loss": 0.8602,
      "step": 226420
    },
    {
      "epoch": 0.7935806986370024,
      "grad_norm": 2.9375,
      "learning_rate": 4.0859729329086094e-05,
      "loss": 0.7943,
      "step": 226430
    },
    {
      "epoch": 0.7936157461438981,
      "grad_norm": 3.109375,
      "learning_rate": 4.0859080300422395e-05,
      "loss": 0.8609,
      "step": 226440
    },
    {
      "epoch": 0.7936507936507936,
      "grad_norm": 2.75,
      "learning_rate": 4.085843127175869e-05,
      "loss": 0.8055,
      "step": 226450
    },
    {
      "epoch": 0.7936858411576893,
      "grad_norm": 2.828125,
      "learning_rate": 4.085778224309499e-05,
      "loss": 0.8975,
      "step": 226460
    },
    {
      "epoch": 0.7937208886645848,
      "grad_norm": 2.734375,
      "learning_rate": 4.0857133214431286e-05,
      "loss": 0.8717,
      "step": 226470
    },
    {
      "epoch": 0.7937559361714804,
      "grad_norm": 2.59375,
      "learning_rate": 4.085648418576758e-05,
      "loss": 0.857,
      "step": 226480
    },
    {
      "epoch": 0.7937909836783761,
      "grad_norm": 2.734375,
      "learning_rate": 4.0855835157103875e-05,
      "loss": 0.8997,
      "step": 226490
    },
    {
      "epoch": 0.7938260311852716,
      "grad_norm": 2.859375,
      "learning_rate": 4.0855186128440176e-05,
      "loss": 0.8917,
      "step": 226500
    },
    {
      "epoch": 0.7938610786921673,
      "grad_norm": 2.40625,
      "learning_rate": 4.085453709977648e-05,
      "loss": 0.8559,
      "step": 226510
    },
    {
      "epoch": 0.7938961261990628,
      "grad_norm": 3.078125,
      "learning_rate": 4.085388807111277e-05,
      "loss": 0.8965,
      "step": 226520
    },
    {
      "epoch": 0.7939311737059584,
      "grad_norm": 2.78125,
      "learning_rate": 4.0853239042449074e-05,
      "loss": 0.8129,
      "step": 226530
    },
    {
      "epoch": 0.793966221212854,
      "grad_norm": 3.015625,
      "learning_rate": 4.085259001378537e-05,
      "loss": 0.8731,
      "step": 226540
    },
    {
      "epoch": 0.7940012687197496,
      "grad_norm": 3.015625,
      "learning_rate": 4.085194098512167e-05,
      "loss": 0.8735,
      "step": 226550
    },
    {
      "epoch": 0.7940363162266452,
      "grad_norm": 2.75,
      "learning_rate": 4.0851291956457964e-05,
      "loss": 0.8971,
      "step": 226560
    },
    {
      "epoch": 0.7940713637335408,
      "grad_norm": 3.046875,
      "learning_rate": 4.0850642927794266e-05,
      "loss": 0.9335,
      "step": 226570
    },
    {
      "epoch": 0.7941064112404365,
      "grad_norm": 3.25,
      "learning_rate": 4.084999389913056e-05,
      "loss": 0.9355,
      "step": 226580
    },
    {
      "epoch": 0.794141458747332,
      "grad_norm": 3.078125,
      "learning_rate": 4.084934487046686e-05,
      "loss": 0.8602,
      "step": 226590
    },
    {
      "epoch": 0.7941765062542276,
      "grad_norm": 3.046875,
      "learning_rate": 4.0848695841803156e-05,
      "loss": 0.849,
      "step": 226600
    },
    {
      "epoch": 0.7942115537611232,
      "grad_norm": 3.078125,
      "learning_rate": 4.084804681313946e-05,
      "loss": 0.8689,
      "step": 226610
    },
    {
      "epoch": 0.7942466012680188,
      "grad_norm": 3.28125,
      "learning_rate": 4.084739778447575e-05,
      "loss": 0.8594,
      "step": 226620
    },
    {
      "epoch": 0.7942816487749144,
      "grad_norm": 3.140625,
      "learning_rate": 4.0846748755812054e-05,
      "loss": 0.8773,
      "step": 226630
    },
    {
      "epoch": 0.79431669628181,
      "grad_norm": 3.046875,
      "learning_rate": 4.084609972714835e-05,
      "loss": 0.9196,
      "step": 226640
    },
    {
      "epoch": 0.7943517437887055,
      "grad_norm": 3.421875,
      "learning_rate": 4.084545069848465e-05,
      "loss": 0.9363,
      "step": 226650
    },
    {
      "epoch": 0.7943867912956012,
      "grad_norm": 2.828125,
      "learning_rate": 4.084480166982095e-05,
      "loss": 0.8539,
      "step": 226660
    },
    {
      "epoch": 0.7944218388024968,
      "grad_norm": 3.25,
      "learning_rate": 4.0844152641157246e-05,
      "loss": 0.8913,
      "step": 226670
    },
    {
      "epoch": 0.7944568863093924,
      "grad_norm": 3.015625,
      "learning_rate": 4.084350361249355e-05,
      "loss": 0.8435,
      "step": 226680
    },
    {
      "epoch": 0.794491933816288,
      "grad_norm": 2.984375,
      "learning_rate": 4.084285458382984e-05,
      "loss": 0.7877,
      "step": 226690
    },
    {
      "epoch": 0.7945269813231836,
      "grad_norm": 3.421875,
      "learning_rate": 4.084220555516614e-05,
      "loss": 0.8917,
      "step": 226700
    },
    {
      "epoch": 0.7945620288300792,
      "grad_norm": 2.78125,
      "learning_rate": 4.084155652650244e-05,
      "loss": 0.8698,
      "step": 226710
    },
    {
      "epoch": 0.7945970763369747,
      "grad_norm": 2.953125,
      "learning_rate": 4.084090749783874e-05,
      "loss": 0.9633,
      "step": 226720
    },
    {
      "epoch": 0.7946321238438704,
      "grad_norm": 2.796875,
      "learning_rate": 4.0840258469175034e-05,
      "loss": 0.8655,
      "step": 226730
    },
    {
      "epoch": 0.7946671713507659,
      "grad_norm": 3.828125,
      "learning_rate": 4.0839609440511335e-05,
      "loss": 0.9017,
      "step": 226740
    },
    {
      "epoch": 0.7947022188576616,
      "grad_norm": 2.75,
      "learning_rate": 4.083896041184763e-05,
      "loss": 0.9524,
      "step": 226750
    },
    {
      "epoch": 0.7947372663645571,
      "grad_norm": 2.453125,
      "learning_rate": 4.083831138318393e-05,
      "loss": 0.9339,
      "step": 226760
    },
    {
      "epoch": 0.7947723138714528,
      "grad_norm": 3.109375,
      "learning_rate": 4.0837662354520226e-05,
      "loss": 0.8653,
      "step": 226770
    },
    {
      "epoch": 0.7948073613783484,
      "grad_norm": 3.109375,
      "learning_rate": 4.083701332585653e-05,
      "loss": 0.9161,
      "step": 226780
    },
    {
      "epoch": 0.7948424088852439,
      "grad_norm": 3.328125,
      "learning_rate": 4.083636429719283e-05,
      "loss": 0.8615,
      "step": 226790
    },
    {
      "epoch": 0.7948774563921396,
      "grad_norm": 2.484375,
      "learning_rate": 4.083571526852912e-05,
      "loss": 0.8313,
      "step": 226800
    },
    {
      "epoch": 0.7949125038990351,
      "grad_norm": 3.390625,
      "learning_rate": 4.0835066239865424e-05,
      "loss": 0.9164,
      "step": 226810
    },
    {
      "epoch": 0.7949475514059308,
      "grad_norm": 2.71875,
      "learning_rate": 4.083441721120172e-05,
      "loss": 0.8328,
      "step": 226820
    },
    {
      "epoch": 0.7949825989128263,
      "grad_norm": 3.078125,
      "learning_rate": 4.083376818253802e-05,
      "loss": 0.9987,
      "step": 226830
    },
    {
      "epoch": 0.795017646419722,
      "grad_norm": 2.859375,
      "learning_rate": 4.0833119153874315e-05,
      "loss": 0.852,
      "step": 226840
    },
    {
      "epoch": 0.7950526939266175,
      "grad_norm": 2.921875,
      "learning_rate": 4.083247012521061e-05,
      "loss": 0.9228,
      "step": 226850
    },
    {
      "epoch": 0.7950877414335131,
      "grad_norm": 3.21875,
      "learning_rate": 4.0831821096546904e-05,
      "loss": 0.9548,
      "step": 226860
    },
    {
      "epoch": 0.7951227889404088,
      "grad_norm": 2.6875,
      "learning_rate": 4.0831172067883206e-05,
      "loss": 0.829,
      "step": 226870
    },
    {
      "epoch": 0.7951578364473043,
      "grad_norm": 2.875,
      "learning_rate": 4.083052303921951e-05,
      "loss": 0.8747,
      "step": 226880
    },
    {
      "epoch": 0.7951928839542,
      "grad_norm": 2.859375,
      "learning_rate": 4.08298740105558e-05,
      "loss": 0.9266,
      "step": 226890
    },
    {
      "epoch": 0.7952279314610955,
      "grad_norm": 2.703125,
      "learning_rate": 4.08292249818921e-05,
      "loss": 0.9566,
      "step": 226900
    },
    {
      "epoch": 0.7952629789679911,
      "grad_norm": 3.234375,
      "learning_rate": 4.08285759532284e-05,
      "loss": 0.8951,
      "step": 226910
    },
    {
      "epoch": 0.7952980264748867,
      "grad_norm": 3.125,
      "learning_rate": 4.08279269245647e-05,
      "loss": 0.8574,
      "step": 226920
    },
    {
      "epoch": 0.7953330739817823,
      "grad_norm": 2.875,
      "learning_rate": 4.0827277895900994e-05,
      "loss": 1.0048,
      "step": 226930
    },
    {
      "epoch": 0.7953681214886779,
      "grad_norm": 2.796875,
      "learning_rate": 4.0826628867237295e-05,
      "loss": 0.8992,
      "step": 226940
    },
    {
      "epoch": 0.7954031689955735,
      "grad_norm": 3.03125,
      "learning_rate": 4.082597983857359e-05,
      "loss": 0.7708,
      "step": 226950
    },
    {
      "epoch": 0.795438216502469,
      "grad_norm": 2.484375,
      "learning_rate": 4.082533080990989e-05,
      "loss": 0.8871,
      "step": 226960
    },
    {
      "epoch": 0.7954732640093647,
      "grad_norm": 2.921875,
      "learning_rate": 4.0824681781246186e-05,
      "loss": 0.9195,
      "step": 226970
    },
    {
      "epoch": 0.7955083115162603,
      "grad_norm": 3.21875,
      "learning_rate": 4.082403275258249e-05,
      "loss": 0.9091,
      "step": 226980
    },
    {
      "epoch": 0.7955433590231559,
      "grad_norm": 2.71875,
      "learning_rate": 4.082338372391878e-05,
      "loss": 1.0129,
      "step": 226990
    },
    {
      "epoch": 0.7955784065300515,
      "grad_norm": 2.796875,
      "learning_rate": 4.082273469525508e-05,
      "loss": 0.8186,
      "step": 227000
    },
    {
      "epoch": 0.795613454036947,
      "grad_norm": 2.875,
      "learning_rate": 4.082208566659138e-05,
      "loss": 0.9133,
      "step": 227010
    },
    {
      "epoch": 0.7956485015438427,
      "grad_norm": 2.921875,
      "learning_rate": 4.082143663792768e-05,
      "loss": 0.9269,
      "step": 227020
    },
    {
      "epoch": 0.7956835490507382,
      "grad_norm": 2.828125,
      "learning_rate": 4.082078760926398e-05,
      "loss": 0.8593,
      "step": 227030
    },
    {
      "epoch": 0.7957185965576339,
      "grad_norm": 3.34375,
      "learning_rate": 4.0820138580600275e-05,
      "loss": 0.8992,
      "step": 227040
    },
    {
      "epoch": 0.7957536440645294,
      "grad_norm": 2.9375,
      "learning_rate": 4.0819489551936576e-05,
      "loss": 0.9098,
      "step": 227050
    },
    {
      "epoch": 0.7957886915714251,
      "grad_norm": 3.40625,
      "learning_rate": 4.081884052327287e-05,
      "loss": 0.926,
      "step": 227060
    },
    {
      "epoch": 0.7958237390783207,
      "grad_norm": 3.078125,
      "learning_rate": 4.081819149460917e-05,
      "loss": 0.9262,
      "step": 227070
    },
    {
      "epoch": 0.7958587865852162,
      "grad_norm": 3.1875,
      "learning_rate": 4.081754246594547e-05,
      "loss": 0.8891,
      "step": 227080
    },
    {
      "epoch": 0.7958938340921119,
      "grad_norm": 3.46875,
      "learning_rate": 4.081689343728177e-05,
      "loss": 0.9421,
      "step": 227090
    },
    {
      "epoch": 0.7959288815990074,
      "grad_norm": 3.109375,
      "learning_rate": 4.081624440861806e-05,
      "loss": 0.8765,
      "step": 227100
    },
    {
      "epoch": 0.7959639291059031,
      "grad_norm": 2.578125,
      "learning_rate": 4.0815595379954364e-05,
      "loss": 0.8485,
      "step": 227110
    },
    {
      "epoch": 0.7959989766127986,
      "grad_norm": 3.421875,
      "learning_rate": 4.081494635129066e-05,
      "loss": 1.0283,
      "step": 227120
    },
    {
      "epoch": 0.7960340241196943,
      "grad_norm": 3.21875,
      "learning_rate": 4.081429732262696e-05,
      "loss": 0.8752,
      "step": 227130
    },
    {
      "epoch": 0.7960690716265898,
      "grad_norm": 3.40625,
      "learning_rate": 4.0813648293963255e-05,
      "loss": 0.8579,
      "step": 227140
    },
    {
      "epoch": 0.7961041191334854,
      "grad_norm": 3.046875,
      "learning_rate": 4.0812999265299556e-05,
      "loss": 0.8477,
      "step": 227150
    },
    {
      "epoch": 0.7961391666403811,
      "grad_norm": 3.046875,
      "learning_rate": 4.081235023663586e-05,
      "loss": 0.8975,
      "step": 227160
    },
    {
      "epoch": 0.7961742141472766,
      "grad_norm": 2.796875,
      "learning_rate": 4.081170120797215e-05,
      "loss": 0.8817,
      "step": 227170
    },
    {
      "epoch": 0.7962092616541723,
      "grad_norm": 2.84375,
      "learning_rate": 4.0811052179308454e-05,
      "loss": 0.8508,
      "step": 227180
    },
    {
      "epoch": 0.7962443091610678,
      "grad_norm": 2.765625,
      "learning_rate": 4.081040315064475e-05,
      "loss": 0.7901,
      "step": 227190
    },
    {
      "epoch": 0.7962793566679635,
      "grad_norm": 2.90625,
      "learning_rate": 4.080975412198105e-05,
      "loss": 0.8502,
      "step": 227200
    },
    {
      "epoch": 0.796314404174859,
      "grad_norm": 2.890625,
      "learning_rate": 4.0809105093317344e-05,
      "loss": 0.8595,
      "step": 227210
    },
    {
      "epoch": 0.7963494516817546,
      "grad_norm": 2.921875,
      "learning_rate": 4.0808456064653646e-05,
      "loss": 0.8993,
      "step": 227220
    },
    {
      "epoch": 0.7963844991886502,
      "grad_norm": 2.640625,
      "learning_rate": 4.0807807035989934e-05,
      "loss": 0.8845,
      "step": 227230
    },
    {
      "epoch": 0.7964195466955458,
      "grad_norm": 2.671875,
      "learning_rate": 4.0807158007326235e-05,
      "loss": 0.8392,
      "step": 227240
    },
    {
      "epoch": 0.7964545942024414,
      "grad_norm": 3.484375,
      "learning_rate": 4.0806508978662536e-05,
      "loss": 0.8728,
      "step": 227250
    },
    {
      "epoch": 0.796489641709337,
      "grad_norm": 2.75,
      "learning_rate": 4.080585994999883e-05,
      "loss": 0.9285,
      "step": 227260
    },
    {
      "epoch": 0.7965246892162327,
      "grad_norm": 2.65625,
      "learning_rate": 4.080521092133513e-05,
      "loss": 0.8056,
      "step": 227270
    },
    {
      "epoch": 0.7965597367231282,
      "grad_norm": 3.03125,
      "learning_rate": 4.080456189267143e-05,
      "loss": 0.9176,
      "step": 227280
    },
    {
      "epoch": 0.7965947842300238,
      "grad_norm": 2.6875,
      "learning_rate": 4.080391286400773e-05,
      "loss": 0.8771,
      "step": 227290
    },
    {
      "epoch": 0.7966298317369194,
      "grad_norm": 2.6875,
      "learning_rate": 4.080326383534402e-05,
      "loss": 0.8995,
      "step": 227300
    },
    {
      "epoch": 0.796664879243815,
      "grad_norm": 2.734375,
      "learning_rate": 4.0802614806680324e-05,
      "loss": 0.9562,
      "step": 227310
    },
    {
      "epoch": 0.7966999267507106,
      "grad_norm": 2.734375,
      "learning_rate": 4.080196577801662e-05,
      "loss": 0.8838,
      "step": 227320
    },
    {
      "epoch": 0.7967349742576062,
      "grad_norm": 3.015625,
      "learning_rate": 4.080131674935292e-05,
      "loss": 0.9232,
      "step": 227330
    },
    {
      "epoch": 0.7967700217645017,
      "grad_norm": 2.875,
      "learning_rate": 4.0800667720689215e-05,
      "loss": 0.8791,
      "step": 227340
    },
    {
      "epoch": 0.7968050692713974,
      "grad_norm": 2.453125,
      "learning_rate": 4.0800018692025516e-05,
      "loss": 0.8213,
      "step": 227350
    },
    {
      "epoch": 0.796840116778293,
      "grad_norm": 3.328125,
      "learning_rate": 4.079936966336181e-05,
      "loss": 0.8758,
      "step": 227360
    },
    {
      "epoch": 0.7968751642851886,
      "grad_norm": 2.71875,
      "learning_rate": 4.079872063469811e-05,
      "loss": 0.9285,
      "step": 227370
    },
    {
      "epoch": 0.7969102117920842,
      "grad_norm": 2.734375,
      "learning_rate": 4.0798071606034414e-05,
      "loss": 0.9078,
      "step": 227380
    },
    {
      "epoch": 0.7969452592989797,
      "grad_norm": 2.765625,
      "learning_rate": 4.079742257737071e-05,
      "loss": 0.9077,
      "step": 227390
    },
    {
      "epoch": 0.7969803068058754,
      "grad_norm": 2.796875,
      "learning_rate": 4.079677354870701e-05,
      "loss": 0.8462,
      "step": 227400
    },
    {
      "epoch": 0.7970153543127709,
      "grad_norm": 2.90625,
      "learning_rate": 4.0796124520043304e-05,
      "loss": 0.8901,
      "step": 227410
    },
    {
      "epoch": 0.7970504018196666,
      "grad_norm": 2.9375,
      "learning_rate": 4.0795475491379606e-05,
      "loss": 0.8709,
      "step": 227420
    },
    {
      "epoch": 0.7970854493265621,
      "grad_norm": 3.234375,
      "learning_rate": 4.07948264627159e-05,
      "loss": 0.9067,
      "step": 227430
    },
    {
      "epoch": 0.7971204968334578,
      "grad_norm": 2.875,
      "learning_rate": 4.07941774340522e-05,
      "loss": 0.895,
      "step": 227440
    },
    {
      "epoch": 0.7971555443403533,
      "grad_norm": 2.96875,
      "learning_rate": 4.0793528405388496e-05,
      "loss": 0.9202,
      "step": 227450
    },
    {
      "epoch": 0.7971905918472489,
      "grad_norm": 3.109375,
      "learning_rate": 4.07928793767248e-05,
      "loss": 0.8957,
      "step": 227460
    },
    {
      "epoch": 0.7972256393541446,
      "grad_norm": 3.125,
      "learning_rate": 4.079223034806109e-05,
      "loss": 0.8945,
      "step": 227470
    },
    {
      "epoch": 0.7972606868610401,
      "grad_norm": 3.0,
      "learning_rate": 4.0791581319397394e-05,
      "loss": 0.9138,
      "step": 227480
    },
    {
      "epoch": 0.7972957343679358,
      "grad_norm": 3.0,
      "learning_rate": 4.079093229073369e-05,
      "loss": 0.9182,
      "step": 227490
    },
    {
      "epoch": 0.7973307818748313,
      "grad_norm": 3.125,
      "learning_rate": 4.079028326206999e-05,
      "loss": 0.9326,
      "step": 227500
    },
    {
      "epoch": 0.797365829381727,
      "grad_norm": 2.765625,
      "learning_rate": 4.0789634233406284e-05,
      "loss": 0.8898,
      "step": 227510
    },
    {
      "epoch": 0.7974008768886225,
      "grad_norm": 2.640625,
      "learning_rate": 4.0788985204742586e-05,
      "loss": 0.9828,
      "step": 227520
    },
    {
      "epoch": 0.7974359243955181,
      "grad_norm": 3.109375,
      "learning_rate": 4.078833617607889e-05,
      "loss": 0.9178,
      "step": 227530
    },
    {
      "epoch": 0.7974709719024137,
      "grad_norm": 2.84375,
      "learning_rate": 4.078768714741518e-05,
      "loss": 0.8503,
      "step": 227540
    },
    {
      "epoch": 0.7975060194093093,
      "grad_norm": 2.90625,
      "learning_rate": 4.078703811875148e-05,
      "loss": 0.9181,
      "step": 227550
    },
    {
      "epoch": 0.797541066916205,
      "grad_norm": 3.09375,
      "learning_rate": 4.078638909008778e-05,
      "loss": 0.9215,
      "step": 227560
    },
    {
      "epoch": 0.7975761144231005,
      "grad_norm": 2.859375,
      "learning_rate": 4.078574006142408e-05,
      "loss": 0.8596,
      "step": 227570
    },
    {
      "epoch": 0.7976111619299961,
      "grad_norm": 3.40625,
      "learning_rate": 4.0785091032760374e-05,
      "loss": 0.8444,
      "step": 227580
    },
    {
      "epoch": 0.7976462094368917,
      "grad_norm": 3.03125,
      "learning_rate": 4.0784442004096675e-05,
      "loss": 0.7815,
      "step": 227590
    },
    {
      "epoch": 0.7976812569437873,
      "grad_norm": 2.78125,
      "learning_rate": 4.078379297543297e-05,
      "loss": 0.8891,
      "step": 227600
    },
    {
      "epoch": 0.7977163044506829,
      "grad_norm": 2.984375,
      "learning_rate": 4.0783143946769264e-05,
      "loss": 0.8729,
      "step": 227610
    },
    {
      "epoch": 0.7977513519575785,
      "grad_norm": 2.640625,
      "learning_rate": 4.0782494918105566e-05,
      "loss": 0.8748,
      "step": 227620
    },
    {
      "epoch": 0.797786399464474,
      "grad_norm": 3.109375,
      "learning_rate": 4.078184588944186e-05,
      "loss": 0.8653,
      "step": 227630
    },
    {
      "epoch": 0.7978214469713697,
      "grad_norm": 2.828125,
      "learning_rate": 4.078119686077816e-05,
      "loss": 0.9205,
      "step": 227640
    },
    {
      "epoch": 0.7978564944782653,
      "grad_norm": 3.03125,
      "learning_rate": 4.0780547832114456e-05,
      "loss": 0.9192,
      "step": 227650
    },
    {
      "epoch": 0.7978915419851609,
      "grad_norm": 2.703125,
      "learning_rate": 4.077989880345076e-05,
      "loss": 0.9714,
      "step": 227660
    },
    {
      "epoch": 0.7979265894920565,
      "grad_norm": 3.09375,
      "learning_rate": 4.077924977478705e-05,
      "loss": 0.8657,
      "step": 227670
    },
    {
      "epoch": 0.7979616369989521,
      "grad_norm": 3.421875,
      "learning_rate": 4.0778600746123354e-05,
      "loss": 0.9072,
      "step": 227680
    },
    {
      "epoch": 0.7979966845058477,
      "grad_norm": 2.953125,
      "learning_rate": 4.077795171745965e-05,
      "loss": 0.864,
      "step": 227690
    },
    {
      "epoch": 0.7980317320127432,
      "grad_norm": 3.28125,
      "learning_rate": 4.077730268879595e-05,
      "loss": 0.9043,
      "step": 227700
    },
    {
      "epoch": 0.7980667795196389,
      "grad_norm": 3.140625,
      "learning_rate": 4.0776653660132244e-05,
      "loss": 0.8934,
      "step": 227710
    },
    {
      "epoch": 0.7981018270265344,
      "grad_norm": 2.953125,
      "learning_rate": 4.0776004631468546e-05,
      "loss": 0.8753,
      "step": 227720
    },
    {
      "epoch": 0.7981368745334301,
      "grad_norm": 2.671875,
      "learning_rate": 4.077535560280484e-05,
      "loss": 0.7828,
      "step": 227730
    },
    {
      "epoch": 0.7981719220403256,
      "grad_norm": 2.625,
      "learning_rate": 4.077470657414114e-05,
      "loss": 0.8844,
      "step": 227740
    },
    {
      "epoch": 0.7982069695472213,
      "grad_norm": 2.75,
      "learning_rate": 4.077405754547744e-05,
      "loss": 0.8704,
      "step": 227750
    },
    {
      "epoch": 0.7982420170541169,
      "grad_norm": 3.765625,
      "learning_rate": 4.077340851681374e-05,
      "loss": 0.8758,
      "step": 227760
    },
    {
      "epoch": 0.7982770645610124,
      "grad_norm": 2.9375,
      "learning_rate": 4.077275948815004e-05,
      "loss": 0.8809,
      "step": 227770
    },
    {
      "epoch": 0.7983121120679081,
      "grad_norm": 3.15625,
      "learning_rate": 4.0772110459486334e-05,
      "loss": 0.9291,
      "step": 227780
    },
    {
      "epoch": 0.7983471595748036,
      "grad_norm": 2.84375,
      "learning_rate": 4.0771461430822635e-05,
      "loss": 0.8832,
      "step": 227790
    },
    {
      "epoch": 0.7983822070816993,
      "grad_norm": 3.0,
      "learning_rate": 4.077081240215893e-05,
      "loss": 0.9085,
      "step": 227800
    },
    {
      "epoch": 0.7984172545885948,
      "grad_norm": 2.625,
      "learning_rate": 4.077016337349523e-05,
      "loss": 0.9054,
      "step": 227810
    },
    {
      "epoch": 0.7984523020954905,
      "grad_norm": 2.859375,
      "learning_rate": 4.0769514344831526e-05,
      "loss": 0.9154,
      "step": 227820
    },
    {
      "epoch": 0.798487349602386,
      "grad_norm": 3.28125,
      "learning_rate": 4.076886531616783e-05,
      "loss": 0.884,
      "step": 227830
    },
    {
      "epoch": 0.7985223971092816,
      "grad_norm": 3.125,
      "learning_rate": 4.076821628750412e-05,
      "loss": 0.8331,
      "step": 227840
    },
    {
      "epoch": 0.7985574446161773,
      "grad_norm": 2.59375,
      "learning_rate": 4.076756725884042e-05,
      "loss": 0.8166,
      "step": 227850
    },
    {
      "epoch": 0.7985924921230728,
      "grad_norm": 2.828125,
      "learning_rate": 4.076691823017672e-05,
      "loss": 0.8999,
      "step": 227860
    },
    {
      "epoch": 0.7986275396299685,
      "grad_norm": 2.46875,
      "learning_rate": 4.076626920151302e-05,
      "loss": 0.8266,
      "step": 227870
    },
    {
      "epoch": 0.798662587136864,
      "grad_norm": 2.75,
      "learning_rate": 4.0765620172849314e-05,
      "loss": 0.7996,
      "step": 227880
    },
    {
      "epoch": 0.7986976346437596,
      "grad_norm": 2.90625,
      "learning_rate": 4.0764971144185615e-05,
      "loss": 0.8173,
      "step": 227890
    },
    {
      "epoch": 0.7987326821506552,
      "grad_norm": 2.90625,
      "learning_rate": 4.0764322115521916e-05,
      "loss": 0.9312,
      "step": 227900
    },
    {
      "epoch": 0.7987677296575508,
      "grad_norm": 3.625,
      "learning_rate": 4.076367308685821e-05,
      "loss": 0.9227,
      "step": 227910
    },
    {
      "epoch": 0.7988027771644464,
      "grad_norm": 2.921875,
      "learning_rate": 4.076302405819451e-05,
      "loss": 0.8961,
      "step": 227920
    },
    {
      "epoch": 0.798837824671342,
      "grad_norm": 2.65625,
      "learning_rate": 4.076237502953081e-05,
      "loss": 0.868,
      "step": 227930
    },
    {
      "epoch": 0.7988728721782375,
      "grad_norm": 3.015625,
      "learning_rate": 4.076172600086711e-05,
      "loss": 0.7624,
      "step": 227940
    },
    {
      "epoch": 0.7989079196851332,
      "grad_norm": 2.90625,
      "learning_rate": 4.07610769722034e-05,
      "loss": 0.8524,
      "step": 227950
    },
    {
      "epoch": 0.7989429671920288,
      "grad_norm": 3.453125,
      "learning_rate": 4.0760427943539704e-05,
      "loss": 0.9791,
      "step": 227960
    },
    {
      "epoch": 0.7989780146989244,
      "grad_norm": 2.65625,
      "learning_rate": 4.0759778914876e-05,
      "loss": 0.8541,
      "step": 227970
    },
    {
      "epoch": 0.79901306220582,
      "grad_norm": 2.578125,
      "learning_rate": 4.0759129886212294e-05,
      "loss": 0.8652,
      "step": 227980
    },
    {
      "epoch": 0.7990481097127156,
      "grad_norm": 2.625,
      "learning_rate": 4.0758480857548595e-05,
      "loss": 0.8311,
      "step": 227990
    },
    {
      "epoch": 0.7990831572196112,
      "grad_norm": 2.734375,
      "learning_rate": 4.075783182888489e-05,
      "loss": 0.9272,
      "step": 228000
    },
    {
      "epoch": 0.7991182047265067,
      "grad_norm": 2.84375,
      "learning_rate": 4.075718280022119e-05,
      "loss": 0.8646,
      "step": 228010
    },
    {
      "epoch": 0.7991532522334024,
      "grad_norm": 3.171875,
      "learning_rate": 4.0756533771557486e-05,
      "loss": 0.8772,
      "step": 228020
    },
    {
      "epoch": 0.7991882997402979,
      "grad_norm": 3.046875,
      "learning_rate": 4.075588474289379e-05,
      "loss": 0.8728,
      "step": 228030
    },
    {
      "epoch": 0.7992233472471936,
      "grad_norm": 3.34375,
      "learning_rate": 4.075523571423008e-05,
      "loss": 0.9063,
      "step": 228040
    },
    {
      "epoch": 0.7992583947540892,
      "grad_norm": 3.078125,
      "learning_rate": 4.075458668556638e-05,
      "loss": 0.9353,
      "step": 228050
    },
    {
      "epoch": 0.7992934422609848,
      "grad_norm": 2.9375,
      "learning_rate": 4.075393765690268e-05,
      "loss": 0.9037,
      "step": 228060
    },
    {
      "epoch": 0.7993284897678804,
      "grad_norm": 2.796875,
      "learning_rate": 4.075328862823898e-05,
      "loss": 0.9574,
      "step": 228070
    },
    {
      "epoch": 0.7993635372747759,
      "grad_norm": 3.015625,
      "learning_rate": 4.0752639599575274e-05,
      "loss": 1.013,
      "step": 228080
    },
    {
      "epoch": 0.7993985847816716,
      "grad_norm": 2.96875,
      "learning_rate": 4.0751990570911575e-05,
      "loss": 0.8258,
      "step": 228090
    },
    {
      "epoch": 0.7994336322885671,
      "grad_norm": 2.9375,
      "learning_rate": 4.075134154224787e-05,
      "loss": 0.9219,
      "step": 228100
    },
    {
      "epoch": 0.7994686797954628,
      "grad_norm": 2.875,
      "learning_rate": 4.075069251358417e-05,
      "loss": 0.9389,
      "step": 228110
    },
    {
      "epoch": 0.7995037273023583,
      "grad_norm": 3.328125,
      "learning_rate": 4.075004348492047e-05,
      "loss": 0.8466,
      "step": 228120
    },
    {
      "epoch": 0.799538774809254,
      "grad_norm": 3.328125,
      "learning_rate": 4.074939445625677e-05,
      "loss": 0.9606,
      "step": 228130
    },
    {
      "epoch": 0.7995738223161496,
      "grad_norm": 3.125,
      "learning_rate": 4.074874542759307e-05,
      "loss": 0.9595,
      "step": 228140
    },
    {
      "epoch": 0.7996088698230451,
      "grad_norm": 2.984375,
      "learning_rate": 4.074809639892936e-05,
      "loss": 0.8023,
      "step": 228150
    },
    {
      "epoch": 0.7996439173299408,
      "grad_norm": 3.015625,
      "learning_rate": 4.0747447370265664e-05,
      "loss": 0.8983,
      "step": 228160
    },
    {
      "epoch": 0.7996789648368363,
      "grad_norm": 2.75,
      "learning_rate": 4.074679834160196e-05,
      "loss": 0.8737,
      "step": 228170
    },
    {
      "epoch": 0.799714012343732,
      "grad_norm": 2.96875,
      "learning_rate": 4.074614931293826e-05,
      "loss": 0.9473,
      "step": 228180
    },
    {
      "epoch": 0.7997490598506275,
      "grad_norm": 3.203125,
      "learning_rate": 4.0745500284274555e-05,
      "loss": 0.9253,
      "step": 228190
    },
    {
      "epoch": 0.7997841073575231,
      "grad_norm": 2.96875,
      "learning_rate": 4.0744851255610856e-05,
      "loss": 0.8292,
      "step": 228200
    },
    {
      "epoch": 0.7998191548644187,
      "grad_norm": 3.296875,
      "learning_rate": 4.074420222694715e-05,
      "loss": 0.8892,
      "step": 228210
    },
    {
      "epoch": 0.7998542023713143,
      "grad_norm": 3.15625,
      "learning_rate": 4.074355319828345e-05,
      "loss": 1.0056,
      "step": 228220
    },
    {
      "epoch": 0.7998892498782099,
      "grad_norm": 2.921875,
      "learning_rate": 4.074290416961975e-05,
      "loss": 0.8579,
      "step": 228230
    },
    {
      "epoch": 0.7999242973851055,
      "grad_norm": 2.78125,
      "learning_rate": 4.074225514095605e-05,
      "loss": 0.8211,
      "step": 228240
    },
    {
      "epoch": 0.7999593448920012,
      "grad_norm": 2.75,
      "learning_rate": 4.074160611229234e-05,
      "loss": 0.9247,
      "step": 228250
    },
    {
      "epoch": 0.7999943923988967,
      "grad_norm": 2.765625,
      "learning_rate": 4.0740957083628644e-05,
      "loss": 0.9121,
      "step": 228260
    },
    {
      "epoch": 0.8000294399057923,
      "grad_norm": 2.921875,
      "learning_rate": 4.0740308054964946e-05,
      "loss": 0.8808,
      "step": 228270
    },
    {
      "epoch": 0.8000644874126879,
      "grad_norm": 3.15625,
      "learning_rate": 4.073965902630124e-05,
      "loss": 0.9189,
      "step": 228280
    },
    {
      "epoch": 0.8000995349195835,
      "grad_norm": 2.515625,
      "learning_rate": 4.073900999763754e-05,
      "loss": 0.7725,
      "step": 228290
    },
    {
      "epoch": 0.8001345824264791,
      "grad_norm": 2.515625,
      "learning_rate": 4.0738360968973836e-05,
      "loss": 0.9075,
      "step": 228300
    },
    {
      "epoch": 0.8001696299333747,
      "grad_norm": 3.671875,
      "learning_rate": 4.073771194031014e-05,
      "loss": 0.921,
      "step": 228310
    },
    {
      "epoch": 0.8002046774402702,
      "grad_norm": 2.890625,
      "learning_rate": 4.073706291164643e-05,
      "loss": 0.9664,
      "step": 228320
    },
    {
      "epoch": 0.8002397249471659,
      "grad_norm": 3.140625,
      "learning_rate": 4.0736413882982734e-05,
      "loss": 0.8199,
      "step": 228330
    },
    {
      "epoch": 0.8002747724540615,
      "grad_norm": 3.0625,
      "learning_rate": 4.073576485431903e-05,
      "loss": 0.9307,
      "step": 228340
    },
    {
      "epoch": 0.8003098199609571,
      "grad_norm": 2.8125,
      "learning_rate": 4.073511582565533e-05,
      "loss": 0.8941,
      "step": 228350
    },
    {
      "epoch": 0.8003448674678527,
      "grad_norm": 2.953125,
      "learning_rate": 4.0734466796991624e-05,
      "loss": 0.9268,
      "step": 228360
    },
    {
      "epoch": 0.8003799149747483,
      "grad_norm": 2.9375,
      "learning_rate": 4.073381776832792e-05,
      "loss": 0.8376,
      "step": 228370
    },
    {
      "epoch": 0.8004149624816439,
      "grad_norm": 2.71875,
      "learning_rate": 4.073316873966422e-05,
      "loss": 0.9295,
      "step": 228380
    },
    {
      "epoch": 0.8004500099885394,
      "grad_norm": 2.859375,
      "learning_rate": 4.0732519711000515e-05,
      "loss": 0.9672,
      "step": 228390
    },
    {
      "epoch": 0.8004850574954351,
      "grad_norm": 3.265625,
      "learning_rate": 4.0731870682336816e-05,
      "loss": 0.8627,
      "step": 228400
    },
    {
      "epoch": 0.8005201050023306,
      "grad_norm": 3.25,
      "learning_rate": 4.073122165367311e-05,
      "loss": 0.8608,
      "step": 228410
    },
    {
      "epoch": 0.8005551525092263,
      "grad_norm": 2.515625,
      "learning_rate": 4.073057262500941e-05,
      "loss": 0.8832,
      "step": 228420
    },
    {
      "epoch": 0.8005902000161218,
      "grad_norm": 2.9375,
      "learning_rate": 4.072992359634571e-05,
      "loss": 0.8412,
      "step": 228430
    },
    {
      "epoch": 0.8006252475230174,
      "grad_norm": 3.328125,
      "learning_rate": 4.072927456768201e-05,
      "loss": 0.9245,
      "step": 228440
    },
    {
      "epoch": 0.8006602950299131,
      "grad_norm": 2.96875,
      "learning_rate": 4.07286255390183e-05,
      "loss": 0.9083,
      "step": 228450
    },
    {
      "epoch": 0.8006953425368086,
      "grad_norm": 2.625,
      "learning_rate": 4.0727976510354604e-05,
      "loss": 0.8711,
      "step": 228460
    },
    {
      "epoch": 0.8007303900437043,
      "grad_norm": 3.109375,
      "learning_rate": 4.07273274816909e-05,
      "loss": 0.9098,
      "step": 228470
    },
    {
      "epoch": 0.8007654375505998,
      "grad_norm": 2.71875,
      "learning_rate": 4.07266784530272e-05,
      "loss": 0.7392,
      "step": 228480
    },
    {
      "epoch": 0.8008004850574955,
      "grad_norm": 2.671875,
      "learning_rate": 4.07260294243635e-05,
      "loss": 0.8862,
      "step": 228490
    },
    {
      "epoch": 0.800835532564391,
      "grad_norm": 2.828125,
      "learning_rate": 4.0725380395699796e-05,
      "loss": 0.8902,
      "step": 228500
    },
    {
      "epoch": 0.8008705800712866,
      "grad_norm": 3.09375,
      "learning_rate": 4.07247313670361e-05,
      "loss": 0.953,
      "step": 228510
    },
    {
      "epoch": 0.8009056275781822,
      "grad_norm": 3.078125,
      "learning_rate": 4.072408233837239e-05,
      "loss": 0.863,
      "step": 228520
    },
    {
      "epoch": 0.8009406750850778,
      "grad_norm": 3.09375,
      "learning_rate": 4.0723433309708694e-05,
      "loss": 0.9503,
      "step": 228530
    },
    {
      "epoch": 0.8009757225919735,
      "grad_norm": 3.125,
      "learning_rate": 4.072278428104499e-05,
      "loss": 0.9854,
      "step": 228540
    },
    {
      "epoch": 0.801010770098869,
      "grad_norm": 2.890625,
      "learning_rate": 4.072213525238129e-05,
      "loss": 0.8462,
      "step": 228550
    },
    {
      "epoch": 0.8010458176057647,
      "grad_norm": 2.640625,
      "learning_rate": 4.0721486223717584e-05,
      "loss": 0.9007,
      "step": 228560
    },
    {
      "epoch": 0.8010808651126602,
      "grad_norm": 2.953125,
      "learning_rate": 4.0720837195053886e-05,
      "loss": 0.8867,
      "step": 228570
    },
    {
      "epoch": 0.8011159126195558,
      "grad_norm": 2.921875,
      "learning_rate": 4.072018816639018e-05,
      "loss": 0.8566,
      "step": 228580
    },
    {
      "epoch": 0.8011509601264514,
      "grad_norm": 2.890625,
      "learning_rate": 4.071953913772648e-05,
      "loss": 0.9058,
      "step": 228590
    },
    {
      "epoch": 0.801186007633347,
      "grad_norm": 3.21875,
      "learning_rate": 4.0718890109062776e-05,
      "loss": 0.951,
      "step": 228600
    },
    {
      "epoch": 0.8012210551402426,
      "grad_norm": 2.5625,
      "learning_rate": 4.071824108039908e-05,
      "loss": 0.8763,
      "step": 228610
    },
    {
      "epoch": 0.8012561026471382,
      "grad_norm": 2.921875,
      "learning_rate": 4.071759205173538e-05,
      "loss": 0.9303,
      "step": 228620
    },
    {
      "epoch": 0.8012911501540337,
      "grad_norm": 3.171875,
      "learning_rate": 4.0716943023071674e-05,
      "loss": 0.8485,
      "step": 228630
    },
    {
      "epoch": 0.8013261976609294,
      "grad_norm": 2.96875,
      "learning_rate": 4.0716293994407975e-05,
      "loss": 0.9519,
      "step": 228640
    },
    {
      "epoch": 0.801361245167825,
      "grad_norm": 2.875,
      "learning_rate": 4.071564496574427e-05,
      "loss": 0.9308,
      "step": 228650
    },
    {
      "epoch": 0.8013962926747206,
      "grad_norm": 3.359375,
      "learning_rate": 4.071499593708057e-05,
      "loss": 0.952,
      "step": 228660
    },
    {
      "epoch": 0.8014313401816162,
      "grad_norm": 2.828125,
      "learning_rate": 4.0714346908416866e-05,
      "loss": 0.9713,
      "step": 228670
    },
    {
      "epoch": 0.8014663876885117,
      "grad_norm": 3.09375,
      "learning_rate": 4.071369787975317e-05,
      "loss": 0.9682,
      "step": 228680
    },
    {
      "epoch": 0.8015014351954074,
      "grad_norm": 3.453125,
      "learning_rate": 4.071304885108946e-05,
      "loss": 0.9013,
      "step": 228690
    },
    {
      "epoch": 0.8015364827023029,
      "grad_norm": 2.625,
      "learning_rate": 4.071239982242576e-05,
      "loss": 0.8108,
      "step": 228700
    },
    {
      "epoch": 0.8015715302091986,
      "grad_norm": 2.328125,
      "learning_rate": 4.071175079376206e-05,
      "loss": 0.8374,
      "step": 228710
    },
    {
      "epoch": 0.8016065777160941,
      "grad_norm": 3.546875,
      "learning_rate": 4.071110176509836e-05,
      "loss": 0.8891,
      "step": 228720
    },
    {
      "epoch": 0.8016416252229898,
      "grad_norm": 2.96875,
      "learning_rate": 4.0710452736434654e-05,
      "loss": 0.8536,
      "step": 228730
    },
    {
      "epoch": 0.8016766727298854,
      "grad_norm": 3.140625,
      "learning_rate": 4.070980370777095e-05,
      "loss": 0.9665,
      "step": 228740
    },
    {
      "epoch": 0.801711720236781,
      "grad_norm": 2.703125,
      "learning_rate": 4.070915467910725e-05,
      "loss": 0.8778,
      "step": 228750
    },
    {
      "epoch": 0.8017467677436766,
      "grad_norm": 2.71875,
      "learning_rate": 4.0708505650443544e-05,
      "loss": 0.8583,
      "step": 228760
    },
    {
      "epoch": 0.8017818152505721,
      "grad_norm": 3.109375,
      "learning_rate": 4.0707856621779846e-05,
      "loss": 0.8889,
      "step": 228770
    },
    {
      "epoch": 0.8018168627574678,
      "grad_norm": 3.078125,
      "learning_rate": 4.070720759311614e-05,
      "loss": 0.813,
      "step": 228780
    },
    {
      "epoch": 0.8018519102643633,
      "grad_norm": 2.78125,
      "learning_rate": 4.070655856445244e-05,
      "loss": 0.9135,
      "step": 228790
    },
    {
      "epoch": 0.801886957771259,
      "grad_norm": 2.75,
      "learning_rate": 4.0705909535788736e-05,
      "loss": 0.8212,
      "step": 228800
    },
    {
      "epoch": 0.8019220052781545,
      "grad_norm": 2.859375,
      "learning_rate": 4.070526050712504e-05,
      "loss": 0.7884,
      "step": 228810
    },
    {
      "epoch": 0.8019570527850501,
      "grad_norm": 3.0625,
      "learning_rate": 4.070461147846133e-05,
      "loss": 0.8413,
      "step": 228820
    },
    {
      "epoch": 0.8019921002919458,
      "grad_norm": 2.65625,
      "learning_rate": 4.0703962449797634e-05,
      "loss": 0.9435,
      "step": 228830
    },
    {
      "epoch": 0.8020271477988413,
      "grad_norm": 3.171875,
      "learning_rate": 4.070331342113393e-05,
      "loss": 1.0264,
      "step": 228840
    },
    {
      "epoch": 0.802062195305737,
      "grad_norm": 2.921875,
      "learning_rate": 4.070266439247023e-05,
      "loss": 0.8954,
      "step": 228850
    },
    {
      "epoch": 0.8020972428126325,
      "grad_norm": 3.0625,
      "learning_rate": 4.070201536380653e-05,
      "loss": 0.8979,
      "step": 228860
    },
    {
      "epoch": 0.8021322903195282,
      "grad_norm": 3.546875,
      "learning_rate": 4.0701366335142826e-05,
      "loss": 0.9062,
      "step": 228870
    },
    {
      "epoch": 0.8021673378264237,
      "grad_norm": 3.140625,
      "learning_rate": 4.070071730647913e-05,
      "loss": 0.9017,
      "step": 228880
    },
    {
      "epoch": 0.8022023853333193,
      "grad_norm": 2.90625,
      "learning_rate": 4.070006827781542e-05,
      "loss": 0.916,
      "step": 228890
    },
    {
      "epoch": 0.8022374328402149,
      "grad_norm": 2.890625,
      "learning_rate": 4.069941924915172e-05,
      "loss": 0.9454,
      "step": 228900
    },
    {
      "epoch": 0.8022724803471105,
      "grad_norm": 2.828125,
      "learning_rate": 4.069877022048802e-05,
      "loss": 0.8218,
      "step": 228910
    },
    {
      "epoch": 0.802307527854006,
      "grad_norm": 3.21875,
      "learning_rate": 4.069812119182432e-05,
      "loss": 0.917,
      "step": 228920
    },
    {
      "epoch": 0.8023425753609017,
      "grad_norm": 2.828125,
      "learning_rate": 4.0697472163160614e-05,
      "loss": 0.9448,
      "step": 228930
    },
    {
      "epoch": 0.8023776228677973,
      "grad_norm": 2.9375,
      "learning_rate": 4.0696823134496915e-05,
      "loss": 0.8939,
      "step": 228940
    },
    {
      "epoch": 0.8024126703746929,
      "grad_norm": 3.046875,
      "learning_rate": 4.069617410583321e-05,
      "loss": 0.8482,
      "step": 228950
    },
    {
      "epoch": 0.8024477178815885,
      "grad_norm": 3.265625,
      "learning_rate": 4.069552507716951e-05,
      "loss": 0.824,
      "step": 228960
    },
    {
      "epoch": 0.8024827653884841,
      "grad_norm": 2.65625,
      "learning_rate": 4.0694876048505806e-05,
      "loss": 0.908,
      "step": 228970
    },
    {
      "epoch": 0.8025178128953797,
      "grad_norm": 2.796875,
      "learning_rate": 4.069422701984211e-05,
      "loss": 0.8428,
      "step": 228980
    },
    {
      "epoch": 0.8025528604022752,
      "grad_norm": 3.015625,
      "learning_rate": 4.069357799117841e-05,
      "loss": 0.9589,
      "step": 228990
    },
    {
      "epoch": 0.8025879079091709,
      "grad_norm": 3.15625,
      "learning_rate": 4.06929289625147e-05,
      "loss": 0.889,
      "step": 229000
    },
    {
      "epoch": 0.8026229554160664,
      "grad_norm": 3.046875,
      "learning_rate": 4.0692279933851005e-05,
      "loss": 0.9006,
      "step": 229010
    },
    {
      "epoch": 0.8026580029229621,
      "grad_norm": 2.890625,
      "learning_rate": 4.06916309051873e-05,
      "loss": 0.8822,
      "step": 229020
    },
    {
      "epoch": 0.8026930504298577,
      "grad_norm": 2.875,
      "learning_rate": 4.06909818765236e-05,
      "loss": 0.9351,
      "step": 229030
    },
    {
      "epoch": 0.8027280979367533,
      "grad_norm": 2.859375,
      "learning_rate": 4.0690332847859895e-05,
      "loss": 0.9461,
      "step": 229040
    },
    {
      "epoch": 0.8027631454436489,
      "grad_norm": 3.046875,
      "learning_rate": 4.06896838191962e-05,
      "loss": 0.9122,
      "step": 229050
    },
    {
      "epoch": 0.8027981929505444,
      "grad_norm": 2.75,
      "learning_rate": 4.068903479053249e-05,
      "loss": 0.8698,
      "step": 229060
    },
    {
      "epoch": 0.8028332404574401,
      "grad_norm": 2.46875,
      "learning_rate": 4.068838576186879e-05,
      "loss": 0.8411,
      "step": 229070
    },
    {
      "epoch": 0.8028682879643356,
      "grad_norm": 3.375,
      "learning_rate": 4.068773673320509e-05,
      "loss": 1.0072,
      "step": 229080
    },
    {
      "epoch": 0.8029033354712313,
      "grad_norm": 3.609375,
      "learning_rate": 4.068708770454139e-05,
      "loss": 1.0005,
      "step": 229090
    },
    {
      "epoch": 0.8029383829781268,
      "grad_norm": 2.875,
      "learning_rate": 4.068643867587768e-05,
      "loss": 1.0208,
      "step": 229100
    },
    {
      "epoch": 0.8029734304850225,
      "grad_norm": 2.640625,
      "learning_rate": 4.068578964721398e-05,
      "loss": 0.9574,
      "step": 229110
    },
    {
      "epoch": 0.803008477991918,
      "grad_norm": 2.609375,
      "learning_rate": 4.068514061855028e-05,
      "loss": 0.8923,
      "step": 229120
    },
    {
      "epoch": 0.8030435254988136,
      "grad_norm": 2.96875,
      "learning_rate": 4.0684491589886574e-05,
      "loss": 0.9109,
      "step": 229130
    },
    {
      "epoch": 0.8030785730057093,
      "grad_norm": 3.421875,
      "learning_rate": 4.0683842561222875e-05,
      "loss": 0.8481,
      "step": 229140
    },
    {
      "epoch": 0.8031136205126048,
      "grad_norm": 2.5625,
      "learning_rate": 4.068319353255917e-05,
      "loss": 0.8965,
      "step": 229150
    },
    {
      "epoch": 0.8031486680195005,
      "grad_norm": 3.0625,
      "learning_rate": 4.068254450389547e-05,
      "loss": 0.9165,
      "step": 229160
    },
    {
      "epoch": 0.803183715526396,
      "grad_norm": 2.640625,
      "learning_rate": 4.0681895475231766e-05,
      "loss": 0.8455,
      "step": 229170
    },
    {
      "epoch": 0.8032187630332916,
      "grad_norm": 3.46875,
      "learning_rate": 4.068124644656807e-05,
      "loss": 0.9704,
      "step": 229180
    },
    {
      "epoch": 0.8032538105401872,
      "grad_norm": 2.78125,
      "learning_rate": 4.068059741790436e-05,
      "loss": 0.9468,
      "step": 229190
    },
    {
      "epoch": 0.8032888580470828,
      "grad_norm": 2.9375,
      "learning_rate": 4.067994838924066e-05,
      "loss": 0.8907,
      "step": 229200
    },
    {
      "epoch": 0.8033239055539784,
      "grad_norm": 3.34375,
      "learning_rate": 4.067929936057696e-05,
      "loss": 0.9371,
      "step": 229210
    },
    {
      "epoch": 0.803358953060874,
      "grad_norm": 3.03125,
      "learning_rate": 4.067865033191326e-05,
      "loss": 0.8304,
      "step": 229220
    },
    {
      "epoch": 0.8033940005677697,
      "grad_norm": 2.921875,
      "learning_rate": 4.067800130324956e-05,
      "loss": 0.8751,
      "step": 229230
    },
    {
      "epoch": 0.8034290480746652,
      "grad_norm": 2.6875,
      "learning_rate": 4.0677352274585855e-05,
      "loss": 0.9095,
      "step": 229240
    },
    {
      "epoch": 0.8034640955815608,
      "grad_norm": 3.1875,
      "learning_rate": 4.067670324592216e-05,
      "loss": 0.8783,
      "step": 229250
    },
    {
      "epoch": 0.8034991430884564,
      "grad_norm": 3.046875,
      "learning_rate": 4.067605421725845e-05,
      "loss": 0.8998,
      "step": 229260
    },
    {
      "epoch": 0.803534190595352,
      "grad_norm": 2.78125,
      "learning_rate": 4.067540518859475e-05,
      "loss": 0.9362,
      "step": 229270
    },
    {
      "epoch": 0.8035692381022476,
      "grad_norm": 2.84375,
      "learning_rate": 4.067475615993105e-05,
      "loss": 0.8822,
      "step": 229280
    },
    {
      "epoch": 0.8036042856091432,
      "grad_norm": 2.953125,
      "learning_rate": 4.067410713126735e-05,
      "loss": 0.9298,
      "step": 229290
    },
    {
      "epoch": 0.8036393331160387,
      "grad_norm": 2.71875,
      "learning_rate": 4.067345810260364e-05,
      "loss": 0.9649,
      "step": 229300
    },
    {
      "epoch": 0.8036743806229344,
      "grad_norm": 2.859375,
      "learning_rate": 4.0672809073939945e-05,
      "loss": 0.8486,
      "step": 229310
    },
    {
      "epoch": 0.80370942812983,
      "grad_norm": 2.59375,
      "learning_rate": 4.067216004527624e-05,
      "loss": 0.8394,
      "step": 229320
    },
    {
      "epoch": 0.8037444756367256,
      "grad_norm": 2.96875,
      "learning_rate": 4.067151101661254e-05,
      "loss": 0.8914,
      "step": 229330
    },
    {
      "epoch": 0.8037795231436212,
      "grad_norm": 2.8125,
      "learning_rate": 4.0670861987948835e-05,
      "loss": 0.9707,
      "step": 229340
    },
    {
      "epoch": 0.8038145706505168,
      "grad_norm": 2.625,
      "learning_rate": 4.067021295928514e-05,
      "loss": 0.8086,
      "step": 229350
    },
    {
      "epoch": 0.8038496181574124,
      "grad_norm": 2.6875,
      "learning_rate": 4.066956393062144e-05,
      "loss": 0.9105,
      "step": 229360
    },
    {
      "epoch": 0.8038846656643079,
      "grad_norm": 2.625,
      "learning_rate": 4.066891490195773e-05,
      "loss": 0.9679,
      "step": 229370
    },
    {
      "epoch": 0.8039197131712036,
      "grad_norm": 3.046875,
      "learning_rate": 4.0668265873294034e-05,
      "loss": 0.8432,
      "step": 229380
    },
    {
      "epoch": 0.8039547606780991,
      "grad_norm": 2.9375,
      "learning_rate": 4.066761684463033e-05,
      "loss": 0.9276,
      "step": 229390
    },
    {
      "epoch": 0.8039898081849948,
      "grad_norm": 2.84375,
      "learning_rate": 4.066696781596663e-05,
      "loss": 0.9386,
      "step": 229400
    },
    {
      "epoch": 0.8040248556918903,
      "grad_norm": 2.890625,
      "learning_rate": 4.0666318787302925e-05,
      "loss": 0.8753,
      "step": 229410
    },
    {
      "epoch": 0.804059903198786,
      "grad_norm": 2.671875,
      "learning_rate": 4.0665669758639226e-05,
      "loss": 0.7985,
      "step": 229420
    },
    {
      "epoch": 0.8040949507056816,
      "grad_norm": 3.421875,
      "learning_rate": 4.066502072997552e-05,
      "loss": 0.972,
      "step": 229430
    },
    {
      "epoch": 0.8041299982125771,
      "grad_norm": 2.53125,
      "learning_rate": 4.066437170131182e-05,
      "loss": 0.9716,
      "step": 229440
    },
    {
      "epoch": 0.8041650457194728,
      "grad_norm": 2.8125,
      "learning_rate": 4.066372267264812e-05,
      "loss": 0.9489,
      "step": 229450
    },
    {
      "epoch": 0.8042000932263683,
      "grad_norm": 3.203125,
      "learning_rate": 4.066307364398442e-05,
      "loss": 0.9173,
      "step": 229460
    },
    {
      "epoch": 0.804235140733264,
      "grad_norm": 3.015625,
      "learning_rate": 4.066242461532071e-05,
      "loss": 0.8355,
      "step": 229470
    },
    {
      "epoch": 0.8042701882401595,
      "grad_norm": 3.109375,
      "learning_rate": 4.0661775586657014e-05,
      "loss": 0.9173,
      "step": 229480
    },
    {
      "epoch": 0.8043052357470551,
      "grad_norm": 2.8125,
      "learning_rate": 4.066112655799331e-05,
      "loss": 0.8477,
      "step": 229490
    },
    {
      "epoch": 0.8043402832539507,
      "grad_norm": 2.9375,
      "learning_rate": 4.06604775293296e-05,
      "loss": 0.9499,
      "step": 229500
    },
    {
      "epoch": 0.8043753307608463,
      "grad_norm": 2.71875,
      "learning_rate": 4.0659828500665905e-05,
      "loss": 0.9027,
      "step": 229510
    },
    {
      "epoch": 0.804410378267742,
      "grad_norm": 2.78125,
      "learning_rate": 4.06591794720022e-05,
      "loss": 0.9007,
      "step": 229520
    },
    {
      "epoch": 0.8044454257746375,
      "grad_norm": 2.90625,
      "learning_rate": 4.06585304433385e-05,
      "loss": 0.8829,
      "step": 229530
    },
    {
      "epoch": 0.8044804732815332,
      "grad_norm": 2.984375,
      "learning_rate": 4.0657881414674795e-05,
      "loss": 0.8691,
      "step": 229540
    },
    {
      "epoch": 0.8045155207884287,
      "grad_norm": 3.375,
      "learning_rate": 4.06572323860111e-05,
      "loss": 0.9692,
      "step": 229550
    },
    {
      "epoch": 0.8045505682953243,
      "grad_norm": 2.921875,
      "learning_rate": 4.065658335734739e-05,
      "loss": 0.8968,
      "step": 229560
    },
    {
      "epoch": 0.8045856158022199,
      "grad_norm": 2.75,
      "learning_rate": 4.065593432868369e-05,
      "loss": 0.8156,
      "step": 229570
    },
    {
      "epoch": 0.8046206633091155,
      "grad_norm": 2.953125,
      "learning_rate": 4.0655285300019994e-05,
      "loss": 0.9108,
      "step": 229580
    },
    {
      "epoch": 0.8046557108160111,
      "grad_norm": 3.0,
      "learning_rate": 4.065463627135629e-05,
      "loss": 0.8508,
      "step": 229590
    },
    {
      "epoch": 0.8046907583229067,
      "grad_norm": 2.9375,
      "learning_rate": 4.065398724269259e-05,
      "loss": 0.8706,
      "step": 229600
    },
    {
      "epoch": 0.8047258058298022,
      "grad_norm": 3.359375,
      "learning_rate": 4.0653338214028885e-05,
      "loss": 0.8679,
      "step": 229610
    },
    {
      "epoch": 0.8047608533366979,
      "grad_norm": 2.875,
      "learning_rate": 4.0652689185365186e-05,
      "loss": 0.9316,
      "step": 229620
    },
    {
      "epoch": 0.8047959008435935,
      "grad_norm": 2.828125,
      "learning_rate": 4.065204015670148e-05,
      "loss": 0.8697,
      "step": 229630
    },
    {
      "epoch": 0.8048309483504891,
      "grad_norm": 2.640625,
      "learning_rate": 4.065139112803778e-05,
      "loss": 0.9523,
      "step": 229640
    },
    {
      "epoch": 0.8048659958573847,
      "grad_norm": 3.078125,
      "learning_rate": 4.065074209937408e-05,
      "loss": 0.9624,
      "step": 229650
    },
    {
      "epoch": 0.8049010433642803,
      "grad_norm": 3.1875,
      "learning_rate": 4.065009307071038e-05,
      "loss": 0.9487,
      "step": 229660
    },
    {
      "epoch": 0.8049360908711759,
      "grad_norm": 2.75,
      "learning_rate": 4.064944404204667e-05,
      "loss": 0.8614,
      "step": 229670
    },
    {
      "epoch": 0.8049711383780714,
      "grad_norm": 2.90625,
      "learning_rate": 4.0648795013382974e-05,
      "loss": 0.8995,
      "step": 229680
    },
    {
      "epoch": 0.8050061858849671,
      "grad_norm": 3.09375,
      "learning_rate": 4.064814598471927e-05,
      "loss": 0.9453,
      "step": 229690
    },
    {
      "epoch": 0.8050412333918626,
      "grad_norm": 2.84375,
      "learning_rate": 4.064749695605557e-05,
      "loss": 0.9012,
      "step": 229700
    },
    {
      "epoch": 0.8050762808987583,
      "grad_norm": 2.421875,
      "learning_rate": 4.0646847927391865e-05,
      "loss": 0.9306,
      "step": 229710
    },
    {
      "epoch": 0.8051113284056539,
      "grad_norm": 3.125,
      "learning_rate": 4.0646198898728166e-05,
      "loss": 0.9369,
      "step": 229720
    },
    {
      "epoch": 0.8051463759125495,
      "grad_norm": 3.1875,
      "learning_rate": 4.064554987006447e-05,
      "loss": 0.9708,
      "step": 229730
    },
    {
      "epoch": 0.8051814234194451,
      "grad_norm": 3.046875,
      "learning_rate": 4.064490084140076e-05,
      "loss": 0.9238,
      "step": 229740
    },
    {
      "epoch": 0.8052164709263406,
      "grad_norm": 3.078125,
      "learning_rate": 4.0644251812737063e-05,
      "loss": 0.9043,
      "step": 229750
    },
    {
      "epoch": 0.8052515184332363,
      "grad_norm": 2.859375,
      "learning_rate": 4.064360278407336e-05,
      "loss": 0.8986,
      "step": 229760
    },
    {
      "epoch": 0.8052865659401318,
      "grad_norm": 3.171875,
      "learning_rate": 4.064295375540966e-05,
      "loss": 0.9356,
      "step": 229770
    },
    {
      "epoch": 0.8053216134470275,
      "grad_norm": 2.828125,
      "learning_rate": 4.0642304726745954e-05,
      "loss": 0.9205,
      "step": 229780
    },
    {
      "epoch": 0.805356660953923,
      "grad_norm": 3.046875,
      "learning_rate": 4.0641655698082255e-05,
      "loss": 0.8755,
      "step": 229790
    },
    {
      "epoch": 0.8053917084608186,
      "grad_norm": 2.796875,
      "learning_rate": 4.064100666941855e-05,
      "loss": 0.8603,
      "step": 229800
    },
    {
      "epoch": 0.8054267559677143,
      "grad_norm": 2.609375,
      "learning_rate": 4.064035764075485e-05,
      "loss": 0.8429,
      "step": 229810
    },
    {
      "epoch": 0.8054618034746098,
      "grad_norm": 3.203125,
      "learning_rate": 4.0639708612091146e-05,
      "loss": 0.8734,
      "step": 229820
    },
    {
      "epoch": 0.8054968509815055,
      "grad_norm": 3.078125,
      "learning_rate": 4.063905958342745e-05,
      "loss": 0.9318,
      "step": 229830
    },
    {
      "epoch": 0.805531898488401,
      "grad_norm": 2.84375,
      "learning_rate": 4.063841055476374e-05,
      "loss": 0.8627,
      "step": 229840
    },
    {
      "epoch": 0.8055669459952967,
      "grad_norm": 2.984375,
      "learning_rate": 4.0637761526100043e-05,
      "loss": 0.8923,
      "step": 229850
    },
    {
      "epoch": 0.8056019935021922,
      "grad_norm": 2.9375,
      "learning_rate": 4.063711249743634e-05,
      "loss": 0.8815,
      "step": 229860
    },
    {
      "epoch": 0.8056370410090878,
      "grad_norm": 3.1875,
      "learning_rate": 4.063646346877263e-05,
      "loss": 0.9858,
      "step": 229870
    },
    {
      "epoch": 0.8056720885159834,
      "grad_norm": 3.078125,
      "learning_rate": 4.0635814440108934e-05,
      "loss": 0.8621,
      "step": 229880
    },
    {
      "epoch": 0.805707136022879,
      "grad_norm": 2.765625,
      "learning_rate": 4.063516541144523e-05,
      "loss": 0.9171,
      "step": 229890
    },
    {
      "epoch": 0.8057421835297746,
      "grad_norm": 3.109375,
      "learning_rate": 4.063451638278153e-05,
      "loss": 0.9074,
      "step": 229900
    },
    {
      "epoch": 0.8057772310366702,
      "grad_norm": 3.03125,
      "learning_rate": 4.0633867354117825e-05,
      "loss": 0.9424,
      "step": 229910
    },
    {
      "epoch": 0.8058122785435659,
      "grad_norm": 2.984375,
      "learning_rate": 4.0633218325454126e-05,
      "loss": 0.8799,
      "step": 229920
    },
    {
      "epoch": 0.8058473260504614,
      "grad_norm": 3.140625,
      "learning_rate": 4.063256929679042e-05,
      "loss": 0.8268,
      "step": 229930
    },
    {
      "epoch": 0.805882373557357,
      "grad_norm": 3.21875,
      "learning_rate": 4.063192026812672e-05,
      "loss": 0.853,
      "step": 229940
    },
    {
      "epoch": 0.8059174210642526,
      "grad_norm": 3.4375,
      "learning_rate": 4.0631271239463023e-05,
      "loss": 0.9224,
      "step": 229950
    },
    {
      "epoch": 0.8059524685711482,
      "grad_norm": 3.734375,
      "learning_rate": 4.063062221079932e-05,
      "loss": 0.8275,
      "step": 229960
    },
    {
      "epoch": 0.8059875160780438,
      "grad_norm": 2.609375,
      "learning_rate": 4.062997318213562e-05,
      "loss": 0.8263,
      "step": 229970
    },
    {
      "epoch": 0.8060225635849394,
      "grad_norm": 3.0,
      "learning_rate": 4.0629324153471914e-05,
      "loss": 0.8766,
      "step": 229980
    },
    {
      "epoch": 0.8060576110918349,
      "grad_norm": 3.03125,
      "learning_rate": 4.0628675124808215e-05,
      "loss": 0.9574,
      "step": 229990
    },
    {
      "epoch": 0.8060926585987306,
      "grad_norm": 3.015625,
      "learning_rate": 4.062802609614451e-05,
      "loss": 0.8394,
      "step": 230000
    },
    {
      "epoch": 0.8060926585987306,
      "eval_loss": 0.835595965385437,
      "eval_runtime": 567.9503,
      "eval_samples_per_second": 669.84,
      "eval_steps_per_second": 55.82,
      "step": 230000
    },
    {
      "epoch": 0.8061277061056262,
      "grad_norm": 2.671875,
      "learning_rate": 4.062737706748081e-05,
      "loss": 0.8743,
      "step": 230010
    },
    {
      "epoch": 0.8061627536125218,
      "grad_norm": 2.8125,
      "learning_rate": 4.0626728038817106e-05,
      "loss": 0.8382,
      "step": 230020
    },
    {
      "epoch": 0.8061978011194174,
      "grad_norm": 3.21875,
      "learning_rate": 4.062607901015341e-05,
      "loss": 0.9184,
      "step": 230030
    },
    {
      "epoch": 0.806232848626313,
      "grad_norm": 2.71875,
      "learning_rate": 4.06254299814897e-05,
      "loss": 0.8994,
      "step": 230040
    },
    {
      "epoch": 0.8062678961332086,
      "grad_norm": 2.84375,
      "learning_rate": 4.0624780952826003e-05,
      "loss": 0.8674,
      "step": 230050
    },
    {
      "epoch": 0.8063029436401041,
      "grad_norm": 3.25,
      "learning_rate": 4.06241319241623e-05,
      "loss": 0.8392,
      "step": 230060
    },
    {
      "epoch": 0.8063379911469998,
      "grad_norm": 3.234375,
      "learning_rate": 4.06234828954986e-05,
      "loss": 0.969,
      "step": 230070
    },
    {
      "epoch": 0.8063730386538953,
      "grad_norm": 3.015625,
      "learning_rate": 4.0622833866834894e-05,
      "loss": 0.8069,
      "step": 230080
    },
    {
      "epoch": 0.806408086160791,
      "grad_norm": 2.90625,
      "learning_rate": 4.0622184838171195e-05,
      "loss": 0.8166,
      "step": 230090
    },
    {
      "epoch": 0.8064431336676865,
      "grad_norm": 2.703125,
      "learning_rate": 4.06215358095075e-05,
      "loss": 0.8447,
      "step": 230100
    },
    {
      "epoch": 0.8064781811745821,
      "grad_norm": 2.53125,
      "learning_rate": 4.062088678084379e-05,
      "loss": 0.7873,
      "step": 230110
    },
    {
      "epoch": 0.8065132286814778,
      "grad_norm": 2.8125,
      "learning_rate": 4.062023775218009e-05,
      "loss": 0.8854,
      "step": 230120
    },
    {
      "epoch": 0.8065482761883733,
      "grad_norm": 2.609375,
      "learning_rate": 4.061958872351639e-05,
      "loss": 0.8065,
      "step": 230130
    },
    {
      "epoch": 0.806583323695269,
      "grad_norm": 3.21875,
      "learning_rate": 4.061893969485269e-05,
      "loss": 0.8723,
      "step": 230140
    },
    {
      "epoch": 0.8066183712021645,
      "grad_norm": 2.8125,
      "learning_rate": 4.0618290666188983e-05,
      "loss": 0.8795,
      "step": 230150
    },
    {
      "epoch": 0.8066534187090602,
      "grad_norm": 2.765625,
      "learning_rate": 4.0617641637525285e-05,
      "loss": 0.8103,
      "step": 230160
    },
    {
      "epoch": 0.8066884662159557,
      "grad_norm": 2.25,
      "learning_rate": 4.061699260886158e-05,
      "loss": 0.8609,
      "step": 230170
    },
    {
      "epoch": 0.8067235137228513,
      "grad_norm": 2.984375,
      "learning_rate": 4.061634358019788e-05,
      "loss": 0.8952,
      "step": 230180
    },
    {
      "epoch": 0.8067585612297469,
      "grad_norm": 2.59375,
      "learning_rate": 4.0615694551534175e-05,
      "loss": 0.8334,
      "step": 230190
    },
    {
      "epoch": 0.8067936087366425,
      "grad_norm": 3.109375,
      "learning_rate": 4.061504552287048e-05,
      "loss": 0.933,
      "step": 230200
    },
    {
      "epoch": 0.8068286562435382,
      "grad_norm": 3.109375,
      "learning_rate": 4.061439649420677e-05,
      "loss": 0.8646,
      "step": 230210
    },
    {
      "epoch": 0.8068637037504337,
      "grad_norm": 3.359375,
      "learning_rate": 4.061374746554307e-05,
      "loss": 0.9055,
      "step": 230220
    },
    {
      "epoch": 0.8068987512573293,
      "grad_norm": 2.484375,
      "learning_rate": 4.0613098436879374e-05,
      "loss": 0.9698,
      "step": 230230
    },
    {
      "epoch": 0.8069337987642249,
      "grad_norm": 2.546875,
      "learning_rate": 4.061244940821566e-05,
      "loss": 0.8587,
      "step": 230240
    },
    {
      "epoch": 0.8069688462711205,
      "grad_norm": 3.0625,
      "learning_rate": 4.0611800379551963e-05,
      "loss": 0.8734,
      "step": 230250
    },
    {
      "epoch": 0.8070038937780161,
      "grad_norm": 4.46875,
      "learning_rate": 4.061115135088826e-05,
      "loss": 0.8835,
      "step": 230260
    },
    {
      "epoch": 0.8070389412849117,
      "grad_norm": 3.078125,
      "learning_rate": 4.061050232222456e-05,
      "loss": 0.8538,
      "step": 230270
    },
    {
      "epoch": 0.8070739887918073,
      "grad_norm": 2.734375,
      "learning_rate": 4.0609853293560854e-05,
      "loss": 0.8881,
      "step": 230280
    },
    {
      "epoch": 0.8071090362987029,
      "grad_norm": 3.0625,
      "learning_rate": 4.0609204264897155e-05,
      "loss": 0.8721,
      "step": 230290
    },
    {
      "epoch": 0.8071440838055984,
      "grad_norm": 3.1875,
      "learning_rate": 4.060855523623345e-05,
      "loss": 0.8963,
      "step": 230300
    },
    {
      "epoch": 0.8071791313124941,
      "grad_norm": 2.875,
      "learning_rate": 4.060790620756975e-05,
      "loss": 0.8508,
      "step": 230310
    },
    {
      "epoch": 0.8072141788193897,
      "grad_norm": 2.71875,
      "learning_rate": 4.060725717890605e-05,
      "loss": 0.8639,
      "step": 230320
    },
    {
      "epoch": 0.8072492263262853,
      "grad_norm": 2.765625,
      "learning_rate": 4.060660815024235e-05,
      "loss": 0.8974,
      "step": 230330
    },
    {
      "epoch": 0.8072842738331809,
      "grad_norm": 2.828125,
      "learning_rate": 4.060595912157865e-05,
      "loss": 0.8385,
      "step": 230340
    },
    {
      "epoch": 0.8073193213400764,
      "grad_norm": 2.5625,
      "learning_rate": 4.0605310092914943e-05,
      "loss": 0.8308,
      "step": 230350
    },
    {
      "epoch": 0.8073543688469721,
      "grad_norm": 2.875,
      "learning_rate": 4.0604661064251245e-05,
      "loss": 0.8785,
      "step": 230360
    },
    {
      "epoch": 0.8073894163538676,
      "grad_norm": 2.390625,
      "learning_rate": 4.060401203558754e-05,
      "loss": 0.9106,
      "step": 230370
    },
    {
      "epoch": 0.8074244638607633,
      "grad_norm": 2.96875,
      "learning_rate": 4.060336300692384e-05,
      "loss": 0.8775,
      "step": 230380
    },
    {
      "epoch": 0.8074595113676588,
      "grad_norm": 2.9375,
      "learning_rate": 4.0602713978260135e-05,
      "loss": 0.8645,
      "step": 230390
    },
    {
      "epoch": 0.8074945588745545,
      "grad_norm": 3.203125,
      "learning_rate": 4.060206494959644e-05,
      "loss": 0.9242,
      "step": 230400
    },
    {
      "epoch": 0.8075296063814501,
      "grad_norm": 2.984375,
      "learning_rate": 4.060141592093273e-05,
      "loss": 0.9104,
      "step": 230410
    },
    {
      "epoch": 0.8075646538883456,
      "grad_norm": 3.0625,
      "learning_rate": 4.060076689226903e-05,
      "loss": 0.891,
      "step": 230420
    },
    {
      "epoch": 0.8075997013952413,
      "grad_norm": 3.09375,
      "learning_rate": 4.060011786360533e-05,
      "loss": 0.8966,
      "step": 230430
    },
    {
      "epoch": 0.8076347489021368,
      "grad_norm": 3.015625,
      "learning_rate": 4.059946883494163e-05,
      "loss": 0.8817,
      "step": 230440
    },
    {
      "epoch": 0.8076697964090325,
      "grad_norm": 2.9375,
      "learning_rate": 4.059881980627793e-05,
      "loss": 0.8921,
      "step": 230450
    },
    {
      "epoch": 0.807704843915928,
      "grad_norm": 3.0,
      "learning_rate": 4.0598170777614225e-05,
      "loss": 0.9042,
      "step": 230460
    },
    {
      "epoch": 0.8077398914228237,
      "grad_norm": 2.75,
      "learning_rate": 4.0597521748950526e-05,
      "loss": 0.831,
      "step": 230470
    },
    {
      "epoch": 0.8077749389297192,
      "grad_norm": 3.015625,
      "learning_rate": 4.059687272028682e-05,
      "loss": 0.8106,
      "step": 230480
    },
    {
      "epoch": 0.8078099864366148,
      "grad_norm": 3.109375,
      "learning_rate": 4.059622369162312e-05,
      "loss": 0.9267,
      "step": 230490
    },
    {
      "epoch": 0.8078450339435105,
      "grad_norm": 2.984375,
      "learning_rate": 4.059557466295942e-05,
      "loss": 0.7868,
      "step": 230500
    },
    {
      "epoch": 0.807880081450406,
      "grad_norm": 3.40625,
      "learning_rate": 4.059492563429572e-05,
      "loss": 0.9424,
      "step": 230510
    },
    {
      "epoch": 0.8079151289573017,
      "grad_norm": 2.28125,
      "learning_rate": 4.059427660563201e-05,
      "loss": 0.862,
      "step": 230520
    },
    {
      "epoch": 0.8079501764641972,
      "grad_norm": 3.140625,
      "learning_rate": 4.0593627576968314e-05,
      "loss": 0.9194,
      "step": 230530
    },
    {
      "epoch": 0.8079852239710928,
      "grad_norm": 3.40625,
      "learning_rate": 4.059297854830461e-05,
      "loss": 0.8964,
      "step": 230540
    },
    {
      "epoch": 0.8080202714779884,
      "grad_norm": 2.796875,
      "learning_rate": 4.059232951964091e-05,
      "loss": 0.8613,
      "step": 230550
    },
    {
      "epoch": 0.808055318984884,
      "grad_norm": 2.984375,
      "learning_rate": 4.0591680490977205e-05,
      "loss": 0.8558,
      "step": 230560
    },
    {
      "epoch": 0.8080903664917796,
      "grad_norm": 2.21875,
      "learning_rate": 4.0591031462313506e-05,
      "loss": 0.9176,
      "step": 230570
    },
    {
      "epoch": 0.8081254139986752,
      "grad_norm": 2.9375,
      "learning_rate": 4.05903824336498e-05,
      "loss": 0.8584,
      "step": 230580
    },
    {
      "epoch": 0.8081604615055707,
      "grad_norm": 2.78125,
      "learning_rate": 4.05897334049861e-05,
      "loss": 0.9063,
      "step": 230590
    },
    {
      "epoch": 0.8081955090124664,
      "grad_norm": 3.0,
      "learning_rate": 4.0589084376322404e-05,
      "loss": 0.8153,
      "step": 230600
    },
    {
      "epoch": 0.808230556519362,
      "grad_norm": 3.375,
      "learning_rate": 4.05884353476587e-05,
      "loss": 0.8563,
      "step": 230610
    },
    {
      "epoch": 0.8082656040262576,
      "grad_norm": 2.65625,
      "learning_rate": 4.058778631899499e-05,
      "loss": 0.8872,
      "step": 230620
    },
    {
      "epoch": 0.8083006515331532,
      "grad_norm": 2.625,
      "learning_rate": 4.058713729033129e-05,
      "loss": 0.8476,
      "step": 230630
    },
    {
      "epoch": 0.8083356990400488,
      "grad_norm": 3.125,
      "learning_rate": 4.058648826166759e-05,
      "loss": 0.9433,
      "step": 230640
    },
    {
      "epoch": 0.8083707465469444,
      "grad_norm": 3.0,
      "learning_rate": 4.0585839233003883e-05,
      "loss": 0.9086,
      "step": 230650
    },
    {
      "epoch": 0.8084057940538399,
      "grad_norm": 3.125,
      "learning_rate": 4.0585190204340185e-05,
      "loss": 0.8495,
      "step": 230660
    },
    {
      "epoch": 0.8084408415607356,
      "grad_norm": 2.59375,
      "learning_rate": 4.058454117567648e-05,
      "loss": 0.8504,
      "step": 230670
    },
    {
      "epoch": 0.8084758890676311,
      "grad_norm": 2.546875,
      "learning_rate": 4.058389214701278e-05,
      "loss": 0.9341,
      "step": 230680
    },
    {
      "epoch": 0.8085109365745268,
      "grad_norm": 2.765625,
      "learning_rate": 4.058324311834908e-05,
      "loss": 0.9263,
      "step": 230690
    },
    {
      "epoch": 0.8085459840814224,
      "grad_norm": 2.6875,
      "learning_rate": 4.058259408968538e-05,
      "loss": 0.9124,
      "step": 230700
    },
    {
      "epoch": 0.808581031588318,
      "grad_norm": 2.953125,
      "learning_rate": 4.058194506102168e-05,
      "loss": 0.9234,
      "step": 230710
    },
    {
      "epoch": 0.8086160790952136,
      "grad_norm": 2.859375,
      "learning_rate": 4.058129603235797e-05,
      "loss": 0.8748,
      "step": 230720
    },
    {
      "epoch": 0.8086511266021091,
      "grad_norm": 2.828125,
      "learning_rate": 4.0580647003694274e-05,
      "loss": 0.9508,
      "step": 230730
    },
    {
      "epoch": 0.8086861741090048,
      "grad_norm": 2.65625,
      "learning_rate": 4.057999797503057e-05,
      "loss": 0.8306,
      "step": 230740
    },
    {
      "epoch": 0.8087212216159003,
      "grad_norm": 2.875,
      "learning_rate": 4.057934894636687e-05,
      "loss": 0.8322,
      "step": 230750
    },
    {
      "epoch": 0.808756269122796,
      "grad_norm": 3.5,
      "learning_rate": 4.0578699917703165e-05,
      "loss": 0.8891,
      "step": 230760
    },
    {
      "epoch": 0.8087913166296915,
      "grad_norm": 3.125,
      "learning_rate": 4.0578050889039466e-05,
      "loss": 0.9555,
      "step": 230770
    },
    {
      "epoch": 0.8088263641365872,
      "grad_norm": 2.8125,
      "learning_rate": 4.057740186037576e-05,
      "loss": 0.9624,
      "step": 230780
    },
    {
      "epoch": 0.8088614116434827,
      "grad_norm": 2.984375,
      "learning_rate": 4.057675283171206e-05,
      "loss": 0.9555,
      "step": 230790
    },
    {
      "epoch": 0.8088964591503783,
      "grad_norm": 3.296875,
      "learning_rate": 4.057610380304836e-05,
      "loss": 0.9371,
      "step": 230800
    },
    {
      "epoch": 0.808931506657274,
      "grad_norm": 2.640625,
      "learning_rate": 4.057545477438466e-05,
      "loss": 0.887,
      "step": 230810
    },
    {
      "epoch": 0.8089665541641695,
      "grad_norm": 2.578125,
      "learning_rate": 4.057480574572096e-05,
      "loss": 0.886,
      "step": 230820
    },
    {
      "epoch": 0.8090016016710652,
      "grad_norm": 2.671875,
      "learning_rate": 4.0574156717057254e-05,
      "loss": 0.8545,
      "step": 230830
    },
    {
      "epoch": 0.8090366491779607,
      "grad_norm": 2.609375,
      "learning_rate": 4.0573507688393556e-05,
      "loss": 0.8921,
      "step": 230840
    },
    {
      "epoch": 0.8090716966848563,
      "grad_norm": 3.046875,
      "learning_rate": 4.057285865972985e-05,
      "loss": 0.9695,
      "step": 230850
    },
    {
      "epoch": 0.8091067441917519,
      "grad_norm": 2.890625,
      "learning_rate": 4.057220963106615e-05,
      "loss": 0.9405,
      "step": 230860
    },
    {
      "epoch": 0.8091417916986475,
      "grad_norm": 3.375,
      "learning_rate": 4.0571560602402446e-05,
      "loss": 0.9449,
      "step": 230870
    },
    {
      "epoch": 0.8091768392055431,
      "grad_norm": 2.796875,
      "learning_rate": 4.057091157373875e-05,
      "loss": 0.8948,
      "step": 230880
    },
    {
      "epoch": 0.8092118867124387,
      "grad_norm": 2.890625,
      "learning_rate": 4.057026254507504e-05,
      "loss": 0.8672,
      "step": 230890
    },
    {
      "epoch": 0.8092469342193344,
      "grad_norm": 2.921875,
      "learning_rate": 4.0569613516411344e-05,
      "loss": 0.905,
      "step": 230900
    },
    {
      "epoch": 0.8092819817262299,
      "grad_norm": 2.859375,
      "learning_rate": 4.056896448774764e-05,
      "loss": 0.7998,
      "step": 230910
    },
    {
      "epoch": 0.8093170292331255,
      "grad_norm": 3.03125,
      "learning_rate": 4.056831545908394e-05,
      "loss": 0.8667,
      "step": 230920
    },
    {
      "epoch": 0.8093520767400211,
      "grad_norm": 3.375,
      "learning_rate": 4.0567666430420234e-05,
      "loss": 0.9084,
      "step": 230930
    },
    {
      "epoch": 0.8093871242469167,
      "grad_norm": 3.078125,
      "learning_rate": 4.0567017401756536e-05,
      "loss": 0.8989,
      "step": 230940
    },
    {
      "epoch": 0.8094221717538123,
      "grad_norm": 3.203125,
      "learning_rate": 4.056636837309283e-05,
      "loss": 0.8674,
      "step": 230950
    },
    {
      "epoch": 0.8094572192607079,
      "grad_norm": 3.0,
      "learning_rate": 4.056571934442913e-05,
      "loss": 0.921,
      "step": 230960
    },
    {
      "epoch": 0.8094922667676034,
      "grad_norm": 2.609375,
      "learning_rate": 4.056507031576543e-05,
      "loss": 0.8543,
      "step": 230970
    },
    {
      "epoch": 0.8095273142744991,
      "grad_norm": 3.078125,
      "learning_rate": 4.056442128710173e-05,
      "loss": 0.906,
      "step": 230980
    },
    {
      "epoch": 0.8095623617813947,
      "grad_norm": 3.21875,
      "learning_rate": 4.056377225843802e-05,
      "loss": 0.8438,
      "step": 230990
    },
    {
      "epoch": 0.8095974092882903,
      "grad_norm": 3.015625,
      "learning_rate": 4.056312322977432e-05,
      "loss": 0.9091,
      "step": 231000
    },
    {
      "epoch": 0.8096324567951859,
      "grad_norm": 2.96875,
      "learning_rate": 4.056247420111062e-05,
      "loss": 0.9313,
      "step": 231010
    },
    {
      "epoch": 0.8096675043020815,
      "grad_norm": 2.6875,
      "learning_rate": 4.056182517244691e-05,
      "loss": 0.8536,
      "step": 231020
    },
    {
      "epoch": 0.8097025518089771,
      "grad_norm": 2.34375,
      "learning_rate": 4.0561176143783214e-05,
      "loss": 0.892,
      "step": 231030
    },
    {
      "epoch": 0.8097375993158726,
      "grad_norm": 2.9375,
      "learning_rate": 4.056052711511951e-05,
      "loss": 0.864,
      "step": 231040
    },
    {
      "epoch": 0.8097726468227683,
      "grad_norm": 3.3125,
      "learning_rate": 4.055987808645581e-05,
      "loss": 0.8966,
      "step": 231050
    },
    {
      "epoch": 0.8098076943296638,
      "grad_norm": 3.421875,
      "learning_rate": 4.055922905779211e-05,
      "loss": 0.852,
      "step": 231060
    },
    {
      "epoch": 0.8098427418365595,
      "grad_norm": 3.0,
      "learning_rate": 4.0558580029128406e-05,
      "loss": 0.8861,
      "step": 231070
    },
    {
      "epoch": 0.809877789343455,
      "grad_norm": 3.078125,
      "learning_rate": 4.055793100046471e-05,
      "loss": 0.9032,
      "step": 231080
    },
    {
      "epoch": 0.8099128368503506,
      "grad_norm": 3.125,
      "learning_rate": 4.0557281971801e-05,
      "loss": 0.8927,
      "step": 231090
    },
    {
      "epoch": 0.8099478843572463,
      "grad_norm": 2.453125,
      "learning_rate": 4.0556632943137304e-05,
      "loss": 0.8381,
      "step": 231100
    },
    {
      "epoch": 0.8099829318641418,
      "grad_norm": 2.96875,
      "learning_rate": 4.05559839144736e-05,
      "loss": 0.8695,
      "step": 231110
    },
    {
      "epoch": 0.8100179793710375,
      "grad_norm": 2.65625,
      "learning_rate": 4.05553348858099e-05,
      "loss": 0.883,
      "step": 231120
    },
    {
      "epoch": 0.810053026877933,
      "grad_norm": 2.890625,
      "learning_rate": 4.0554685857146194e-05,
      "loss": 0.9508,
      "step": 231130
    },
    {
      "epoch": 0.8100880743848287,
      "grad_norm": 2.90625,
      "learning_rate": 4.0554036828482496e-05,
      "loss": 0.9387,
      "step": 231140
    },
    {
      "epoch": 0.8101231218917242,
      "grad_norm": 3.15625,
      "learning_rate": 4.055338779981879e-05,
      "loss": 0.8491,
      "step": 231150
    },
    {
      "epoch": 0.8101581693986198,
      "grad_norm": 2.484375,
      "learning_rate": 4.055273877115509e-05,
      "loss": 0.8427,
      "step": 231160
    },
    {
      "epoch": 0.8101932169055154,
      "grad_norm": 2.90625,
      "learning_rate": 4.0552089742491386e-05,
      "loss": 0.9862,
      "step": 231170
    },
    {
      "epoch": 0.810228264412411,
      "grad_norm": 2.65625,
      "learning_rate": 4.055144071382769e-05,
      "loss": 0.859,
      "step": 231180
    },
    {
      "epoch": 0.8102633119193067,
      "grad_norm": 2.828125,
      "learning_rate": 4.055079168516399e-05,
      "loss": 0.8783,
      "step": 231190
    },
    {
      "epoch": 0.8102983594262022,
      "grad_norm": 2.609375,
      "learning_rate": 4.0550142656500284e-05,
      "loss": 0.8501,
      "step": 231200
    },
    {
      "epoch": 0.8103334069330979,
      "grad_norm": 2.671875,
      "learning_rate": 4.0549493627836585e-05,
      "loss": 0.9393,
      "step": 231210
    },
    {
      "epoch": 0.8103684544399934,
      "grad_norm": 3.09375,
      "learning_rate": 4.054884459917288e-05,
      "loss": 0.8803,
      "step": 231220
    },
    {
      "epoch": 0.810403501946889,
      "grad_norm": 2.984375,
      "learning_rate": 4.054819557050918e-05,
      "loss": 0.9715,
      "step": 231230
    },
    {
      "epoch": 0.8104385494537846,
      "grad_norm": 3.203125,
      "learning_rate": 4.0547546541845476e-05,
      "loss": 0.9219,
      "step": 231240
    },
    {
      "epoch": 0.8104735969606802,
      "grad_norm": 2.703125,
      "learning_rate": 4.054689751318178e-05,
      "loss": 0.8197,
      "step": 231250
    },
    {
      "epoch": 0.8105086444675758,
      "grad_norm": 2.84375,
      "learning_rate": 4.054624848451807e-05,
      "loss": 0.8833,
      "step": 231260
    },
    {
      "epoch": 0.8105436919744714,
      "grad_norm": 3.015625,
      "learning_rate": 4.054559945585437e-05,
      "loss": 0.7878,
      "step": 231270
    },
    {
      "epoch": 0.8105787394813669,
      "grad_norm": 2.8125,
      "learning_rate": 4.054495042719067e-05,
      "loss": 0.9258,
      "step": 231280
    },
    {
      "epoch": 0.8106137869882626,
      "grad_norm": 2.8125,
      "learning_rate": 4.054430139852697e-05,
      "loss": 0.8988,
      "step": 231290
    },
    {
      "epoch": 0.8106488344951582,
      "grad_norm": 3.0,
      "learning_rate": 4.0543652369863264e-05,
      "loss": 0.8122,
      "step": 231300
    },
    {
      "epoch": 0.8106838820020538,
      "grad_norm": 3.390625,
      "learning_rate": 4.0543003341199565e-05,
      "loss": 0.9098,
      "step": 231310
    },
    {
      "epoch": 0.8107189295089494,
      "grad_norm": 2.921875,
      "learning_rate": 4.054235431253586e-05,
      "loss": 0.8318,
      "step": 231320
    },
    {
      "epoch": 0.810753977015845,
      "grad_norm": 2.984375,
      "learning_rate": 4.054170528387216e-05,
      "loss": 0.8924,
      "step": 231330
    },
    {
      "epoch": 0.8107890245227406,
      "grad_norm": 3.0,
      "learning_rate": 4.054105625520846e-05,
      "loss": 0.8957,
      "step": 231340
    },
    {
      "epoch": 0.8108240720296361,
      "grad_norm": 3.390625,
      "learning_rate": 4.054040722654476e-05,
      "loss": 0.9331,
      "step": 231350
    },
    {
      "epoch": 0.8108591195365318,
      "grad_norm": 3.421875,
      "learning_rate": 4.053975819788106e-05,
      "loss": 0.8873,
      "step": 231360
    },
    {
      "epoch": 0.8108941670434273,
      "grad_norm": 2.78125,
      "learning_rate": 4.0539109169217346e-05,
      "loss": 0.9054,
      "step": 231370
    },
    {
      "epoch": 0.810929214550323,
      "grad_norm": 2.609375,
      "learning_rate": 4.053846014055365e-05,
      "loss": 0.8894,
      "step": 231380
    },
    {
      "epoch": 0.8109642620572186,
      "grad_norm": 3.375,
      "learning_rate": 4.053781111188994e-05,
      "loss": 0.827,
      "step": 231390
    },
    {
      "epoch": 0.8109993095641141,
      "grad_norm": 2.59375,
      "learning_rate": 4.0537162083226244e-05,
      "loss": 0.9884,
      "step": 231400
    },
    {
      "epoch": 0.8110343570710098,
      "grad_norm": 3.078125,
      "learning_rate": 4.053651305456254e-05,
      "loss": 0.9527,
      "step": 231410
    },
    {
      "epoch": 0.8110694045779053,
      "grad_norm": 2.75,
      "learning_rate": 4.053586402589884e-05,
      "loss": 0.8563,
      "step": 231420
    },
    {
      "epoch": 0.811104452084801,
      "grad_norm": 3.0,
      "learning_rate": 4.053521499723514e-05,
      "loss": 0.8082,
      "step": 231430
    },
    {
      "epoch": 0.8111394995916965,
      "grad_norm": 2.71875,
      "learning_rate": 4.0534565968571436e-05,
      "loss": 0.8902,
      "step": 231440
    },
    {
      "epoch": 0.8111745470985922,
      "grad_norm": 3.109375,
      "learning_rate": 4.053391693990774e-05,
      "loss": 0.9343,
      "step": 231450
    },
    {
      "epoch": 0.8112095946054877,
      "grad_norm": 2.921875,
      "learning_rate": 4.053326791124403e-05,
      "loss": 0.9065,
      "step": 231460
    },
    {
      "epoch": 0.8112446421123833,
      "grad_norm": 2.796875,
      "learning_rate": 4.053261888258033e-05,
      "loss": 0.9532,
      "step": 231470
    },
    {
      "epoch": 0.811279689619279,
      "grad_norm": 3.65625,
      "learning_rate": 4.053196985391663e-05,
      "loss": 0.8528,
      "step": 231480
    },
    {
      "epoch": 0.8113147371261745,
      "grad_norm": 2.875,
      "learning_rate": 4.053132082525293e-05,
      "loss": 0.9435,
      "step": 231490
    },
    {
      "epoch": 0.8113497846330702,
      "grad_norm": 2.984375,
      "learning_rate": 4.0530671796589224e-05,
      "loss": 0.8898,
      "step": 231500
    },
    {
      "epoch": 0.8113848321399657,
      "grad_norm": 2.6875,
      "learning_rate": 4.0530022767925525e-05,
      "loss": 0.8943,
      "step": 231510
    },
    {
      "epoch": 0.8114198796468614,
      "grad_norm": 2.671875,
      "learning_rate": 4.052937373926182e-05,
      "loss": 0.8928,
      "step": 231520
    },
    {
      "epoch": 0.8114549271537569,
      "grad_norm": 2.46875,
      "learning_rate": 4.052872471059812e-05,
      "loss": 0.9291,
      "step": 231530
    },
    {
      "epoch": 0.8114899746606525,
      "grad_norm": 3.109375,
      "learning_rate": 4.0528075681934416e-05,
      "loss": 0.8606,
      "step": 231540
    },
    {
      "epoch": 0.8115250221675481,
      "grad_norm": 2.859375,
      "learning_rate": 4.052742665327072e-05,
      "loss": 0.8699,
      "step": 231550
    },
    {
      "epoch": 0.8115600696744437,
      "grad_norm": 2.703125,
      "learning_rate": 4.052677762460702e-05,
      "loss": 0.8495,
      "step": 231560
    },
    {
      "epoch": 0.8115951171813393,
      "grad_norm": 2.796875,
      "learning_rate": 4.052612859594331e-05,
      "loss": 0.9159,
      "step": 231570
    },
    {
      "epoch": 0.8116301646882349,
      "grad_norm": 3.140625,
      "learning_rate": 4.0525479567279614e-05,
      "loss": 0.9056,
      "step": 231580
    },
    {
      "epoch": 0.8116652121951305,
      "grad_norm": 2.53125,
      "learning_rate": 4.052483053861591e-05,
      "loss": 0.8462,
      "step": 231590
    },
    {
      "epoch": 0.8117002597020261,
      "grad_norm": 3.21875,
      "learning_rate": 4.052418150995221e-05,
      "loss": 0.8967,
      "step": 231600
    },
    {
      "epoch": 0.8117353072089217,
      "grad_norm": 2.8125,
      "learning_rate": 4.0523532481288505e-05,
      "loss": 0.8897,
      "step": 231610
    },
    {
      "epoch": 0.8117703547158173,
      "grad_norm": 3.203125,
      "learning_rate": 4.0522883452624806e-05,
      "loss": 0.97,
      "step": 231620
    },
    {
      "epoch": 0.8118054022227129,
      "grad_norm": 3.359375,
      "learning_rate": 4.05222344239611e-05,
      "loss": 0.9043,
      "step": 231630
    },
    {
      "epoch": 0.8118404497296084,
      "grad_norm": 2.953125,
      "learning_rate": 4.05215853952974e-05,
      "loss": 0.9638,
      "step": 231640
    },
    {
      "epoch": 0.8118754972365041,
      "grad_norm": 2.78125,
      "learning_rate": 4.05209363666337e-05,
      "loss": 0.883,
      "step": 231650
    },
    {
      "epoch": 0.8119105447433996,
      "grad_norm": 2.765625,
      "learning_rate": 4.052028733797e-05,
      "loss": 0.8758,
      "step": 231660
    },
    {
      "epoch": 0.8119455922502953,
      "grad_norm": 2.84375,
      "learning_rate": 4.051963830930629e-05,
      "loss": 0.8459,
      "step": 231670
    },
    {
      "epoch": 0.8119806397571909,
      "grad_norm": 2.984375,
      "learning_rate": 4.0518989280642594e-05,
      "loss": 0.9277,
      "step": 231680
    },
    {
      "epoch": 0.8120156872640865,
      "grad_norm": 3.0,
      "learning_rate": 4.0518340251978896e-05,
      "loss": 0.8308,
      "step": 231690
    },
    {
      "epoch": 0.8120507347709821,
      "grad_norm": 3.3125,
      "learning_rate": 4.051769122331519e-05,
      "loss": 0.8627,
      "step": 231700
    },
    {
      "epoch": 0.8120857822778776,
      "grad_norm": 3.0,
      "learning_rate": 4.051704219465149e-05,
      "loss": 0.8323,
      "step": 231710
    },
    {
      "epoch": 0.8121208297847733,
      "grad_norm": 2.671875,
      "learning_rate": 4.0516393165987786e-05,
      "loss": 0.8589,
      "step": 231720
    },
    {
      "epoch": 0.8121558772916688,
      "grad_norm": 2.984375,
      "learning_rate": 4.051574413732409e-05,
      "loss": 0.8447,
      "step": 231730
    },
    {
      "epoch": 0.8121909247985645,
      "grad_norm": 2.8125,
      "learning_rate": 4.0515095108660376e-05,
      "loss": 0.8371,
      "step": 231740
    },
    {
      "epoch": 0.81222597230546,
      "grad_norm": 2.640625,
      "learning_rate": 4.051444607999668e-05,
      "loss": 0.8692,
      "step": 231750
    },
    {
      "epoch": 0.8122610198123557,
      "grad_norm": 2.765625,
      "learning_rate": 4.051379705133297e-05,
      "loss": 0.9126,
      "step": 231760
    },
    {
      "epoch": 0.8122960673192512,
      "grad_norm": 2.484375,
      "learning_rate": 4.051314802266927e-05,
      "loss": 0.8976,
      "step": 231770
    },
    {
      "epoch": 0.8123311148261468,
      "grad_norm": 2.75,
      "learning_rate": 4.0512498994005574e-05,
      "loss": 0.9354,
      "step": 231780
    },
    {
      "epoch": 0.8123661623330425,
      "grad_norm": 3.328125,
      "learning_rate": 4.051184996534187e-05,
      "loss": 0.9227,
      "step": 231790
    },
    {
      "epoch": 0.812401209839938,
      "grad_norm": 2.6875,
      "learning_rate": 4.051120093667817e-05,
      "loss": 0.8432,
      "step": 231800
    },
    {
      "epoch": 0.8124362573468337,
      "grad_norm": 3.375,
      "learning_rate": 4.0510551908014465e-05,
      "loss": 0.9601,
      "step": 231810
    },
    {
      "epoch": 0.8124713048537292,
      "grad_norm": 2.90625,
      "learning_rate": 4.0509902879350766e-05,
      "loss": 0.9515,
      "step": 231820
    },
    {
      "epoch": 0.8125063523606249,
      "grad_norm": 3.015625,
      "learning_rate": 4.050925385068706e-05,
      "loss": 0.9044,
      "step": 231830
    },
    {
      "epoch": 0.8125413998675204,
      "grad_norm": 2.734375,
      "learning_rate": 4.050860482202336e-05,
      "loss": 0.8558,
      "step": 231840
    },
    {
      "epoch": 0.812576447374416,
      "grad_norm": 3.140625,
      "learning_rate": 4.050795579335966e-05,
      "loss": 0.8781,
      "step": 231850
    },
    {
      "epoch": 0.8126114948813116,
      "grad_norm": 3.109375,
      "learning_rate": 4.050730676469596e-05,
      "loss": 0.8122,
      "step": 231860
    },
    {
      "epoch": 0.8126465423882072,
      "grad_norm": 2.953125,
      "learning_rate": 4.050665773603225e-05,
      "loss": 0.8996,
      "step": 231870
    },
    {
      "epoch": 0.8126815898951029,
      "grad_norm": 2.984375,
      "learning_rate": 4.0506008707368554e-05,
      "loss": 0.9262,
      "step": 231880
    },
    {
      "epoch": 0.8127166374019984,
      "grad_norm": 2.796875,
      "learning_rate": 4.050535967870485e-05,
      "loss": 0.8817,
      "step": 231890
    },
    {
      "epoch": 0.812751684908894,
      "grad_norm": 2.375,
      "learning_rate": 4.050471065004115e-05,
      "loss": 0.8585,
      "step": 231900
    },
    {
      "epoch": 0.8127867324157896,
      "grad_norm": 2.984375,
      "learning_rate": 4.0504061621377445e-05,
      "loss": 0.8526,
      "step": 231910
    },
    {
      "epoch": 0.8128217799226852,
      "grad_norm": 2.53125,
      "learning_rate": 4.0503412592713746e-05,
      "loss": 0.8306,
      "step": 231920
    },
    {
      "epoch": 0.8128568274295808,
      "grad_norm": 2.71875,
      "learning_rate": 4.050276356405005e-05,
      "loss": 0.8938,
      "step": 231930
    },
    {
      "epoch": 0.8128918749364764,
      "grad_norm": 2.6875,
      "learning_rate": 4.050211453538634e-05,
      "loss": 0.9254,
      "step": 231940
    },
    {
      "epoch": 0.812926922443372,
      "grad_norm": 3.0625,
      "learning_rate": 4.0501465506722644e-05,
      "loss": 0.9365,
      "step": 231950
    },
    {
      "epoch": 0.8129619699502676,
      "grad_norm": 2.953125,
      "learning_rate": 4.050081647805894e-05,
      "loss": 0.9335,
      "step": 231960
    },
    {
      "epoch": 0.8129970174571632,
      "grad_norm": 2.875,
      "learning_rate": 4.050016744939524e-05,
      "loss": 1.024,
      "step": 231970
    },
    {
      "epoch": 0.8130320649640588,
      "grad_norm": 3.375,
      "learning_rate": 4.0499518420731534e-05,
      "loss": 0.8502,
      "step": 231980
    },
    {
      "epoch": 0.8130671124709544,
      "grad_norm": 3.515625,
      "learning_rate": 4.0498869392067836e-05,
      "loss": 0.9215,
      "step": 231990
    },
    {
      "epoch": 0.81310215997785,
      "grad_norm": 3.0,
      "learning_rate": 4.049822036340413e-05,
      "loss": 0.8465,
      "step": 232000
    },
    {
      "epoch": 0.8131372074847456,
      "grad_norm": 2.765625,
      "learning_rate": 4.049757133474043e-05,
      "loss": 0.8936,
      "step": 232010
    },
    {
      "epoch": 0.8131722549916411,
      "grad_norm": 2.84375,
      "learning_rate": 4.0496922306076726e-05,
      "loss": 0.8315,
      "step": 232020
    },
    {
      "epoch": 0.8132073024985368,
      "grad_norm": 2.796875,
      "learning_rate": 4.049627327741303e-05,
      "loss": 0.8673,
      "step": 232030
    },
    {
      "epoch": 0.8132423500054323,
      "grad_norm": 2.640625,
      "learning_rate": 4.049562424874932e-05,
      "loss": 0.8734,
      "step": 232040
    },
    {
      "epoch": 0.813277397512328,
      "grad_norm": 2.90625,
      "learning_rate": 4.0494975220085624e-05,
      "loss": 0.8517,
      "step": 232050
    },
    {
      "epoch": 0.8133124450192235,
      "grad_norm": 2.796875,
      "learning_rate": 4.0494326191421925e-05,
      "loss": 0.9475,
      "step": 232060
    },
    {
      "epoch": 0.8133474925261192,
      "grad_norm": 2.9375,
      "learning_rate": 4.049367716275822e-05,
      "loss": 0.8056,
      "step": 232070
    },
    {
      "epoch": 0.8133825400330148,
      "grad_norm": 3.046875,
      "learning_rate": 4.049302813409452e-05,
      "loss": 0.8656,
      "step": 232080
    },
    {
      "epoch": 0.8134175875399103,
      "grad_norm": 3.078125,
      "learning_rate": 4.0492379105430816e-05,
      "loss": 0.9583,
      "step": 232090
    },
    {
      "epoch": 0.813452635046806,
      "grad_norm": 3.078125,
      "learning_rate": 4.049173007676712e-05,
      "loss": 0.89,
      "step": 232100
    },
    {
      "epoch": 0.8134876825537015,
      "grad_norm": 3.046875,
      "learning_rate": 4.049108104810341e-05,
      "loss": 0.9504,
      "step": 232110
    },
    {
      "epoch": 0.8135227300605972,
      "grad_norm": 3.109375,
      "learning_rate": 4.0490432019439706e-05,
      "loss": 0.918,
      "step": 232120
    },
    {
      "epoch": 0.8135577775674927,
      "grad_norm": 2.765625,
      "learning_rate": 4.0489782990776e-05,
      "loss": 0.8731,
      "step": 232130
    },
    {
      "epoch": 0.8135928250743883,
      "grad_norm": 3.25,
      "learning_rate": 4.04891339621123e-05,
      "loss": 0.8861,
      "step": 232140
    },
    {
      "epoch": 0.8136278725812839,
      "grad_norm": 2.921875,
      "learning_rate": 4.0488484933448604e-05,
      "loss": 0.8577,
      "step": 232150
    },
    {
      "epoch": 0.8136629200881795,
      "grad_norm": 3.140625,
      "learning_rate": 4.04878359047849e-05,
      "loss": 0.9113,
      "step": 232160
    },
    {
      "epoch": 0.8136979675950752,
      "grad_norm": 2.828125,
      "learning_rate": 4.04871868761212e-05,
      "loss": 0.9641,
      "step": 232170
    },
    {
      "epoch": 0.8137330151019707,
      "grad_norm": 2.671875,
      "learning_rate": 4.0486537847457494e-05,
      "loss": 0.8354,
      "step": 232180
    },
    {
      "epoch": 0.8137680626088664,
      "grad_norm": 2.796875,
      "learning_rate": 4.0485888818793796e-05,
      "loss": 0.8197,
      "step": 232190
    },
    {
      "epoch": 0.8138031101157619,
      "grad_norm": 2.921875,
      "learning_rate": 4.048523979013009e-05,
      "loss": 0.8401,
      "step": 232200
    },
    {
      "epoch": 0.8138381576226575,
      "grad_norm": 2.6875,
      "learning_rate": 4.048459076146639e-05,
      "loss": 0.7998,
      "step": 232210
    },
    {
      "epoch": 0.8138732051295531,
      "grad_norm": 3.34375,
      "learning_rate": 4.0483941732802686e-05,
      "loss": 0.902,
      "step": 232220
    },
    {
      "epoch": 0.8139082526364487,
      "grad_norm": 2.453125,
      "learning_rate": 4.048329270413899e-05,
      "loss": 0.9066,
      "step": 232230
    },
    {
      "epoch": 0.8139433001433443,
      "grad_norm": 2.6875,
      "learning_rate": 4.048264367547528e-05,
      "loss": 0.8742,
      "step": 232240
    },
    {
      "epoch": 0.8139783476502399,
      "grad_norm": 2.921875,
      "learning_rate": 4.0481994646811584e-05,
      "loss": 0.8791,
      "step": 232250
    },
    {
      "epoch": 0.8140133951571354,
      "grad_norm": 2.890625,
      "learning_rate": 4.048134561814788e-05,
      "loss": 0.871,
      "step": 232260
    },
    {
      "epoch": 0.8140484426640311,
      "grad_norm": 3.140625,
      "learning_rate": 4.048069658948418e-05,
      "loss": 0.8446,
      "step": 232270
    },
    {
      "epoch": 0.8140834901709267,
      "grad_norm": 2.71875,
      "learning_rate": 4.0480047560820474e-05,
      "loss": 0.8385,
      "step": 232280
    },
    {
      "epoch": 0.8141185376778223,
      "grad_norm": 2.953125,
      "learning_rate": 4.0479398532156776e-05,
      "loss": 0.9834,
      "step": 232290
    },
    {
      "epoch": 0.8141535851847179,
      "grad_norm": 2.578125,
      "learning_rate": 4.047874950349308e-05,
      "loss": 0.804,
      "step": 232300
    },
    {
      "epoch": 0.8141886326916135,
      "grad_norm": 3.203125,
      "learning_rate": 4.047810047482937e-05,
      "loss": 0.9169,
      "step": 232310
    },
    {
      "epoch": 0.8142236801985091,
      "grad_norm": 3.046875,
      "learning_rate": 4.047745144616567e-05,
      "loss": 0.9109,
      "step": 232320
    },
    {
      "epoch": 0.8142587277054046,
      "grad_norm": 3.375,
      "learning_rate": 4.047680241750197e-05,
      "loss": 0.9442,
      "step": 232330
    },
    {
      "epoch": 0.8142937752123003,
      "grad_norm": 2.859375,
      "learning_rate": 4.047615338883827e-05,
      "loss": 0.8485,
      "step": 232340
    },
    {
      "epoch": 0.8143288227191958,
      "grad_norm": 2.875,
      "learning_rate": 4.0475504360174564e-05,
      "loss": 0.8206,
      "step": 232350
    },
    {
      "epoch": 0.8143638702260915,
      "grad_norm": 2.90625,
      "learning_rate": 4.0474855331510865e-05,
      "loss": 0.9137,
      "step": 232360
    },
    {
      "epoch": 0.8143989177329871,
      "grad_norm": 3.453125,
      "learning_rate": 4.047420630284716e-05,
      "loss": 0.8842,
      "step": 232370
    },
    {
      "epoch": 0.8144339652398827,
      "grad_norm": 2.453125,
      "learning_rate": 4.047355727418346e-05,
      "loss": 0.8844,
      "step": 232380
    },
    {
      "epoch": 0.8144690127467783,
      "grad_norm": 3.0625,
      "learning_rate": 4.0472908245519756e-05,
      "loss": 0.8601,
      "step": 232390
    },
    {
      "epoch": 0.8145040602536738,
      "grad_norm": 2.625,
      "learning_rate": 4.047225921685606e-05,
      "loss": 0.8015,
      "step": 232400
    },
    {
      "epoch": 0.8145391077605695,
      "grad_norm": 3.4375,
      "learning_rate": 4.047161018819235e-05,
      "loss": 0.896,
      "step": 232410
    },
    {
      "epoch": 0.814574155267465,
      "grad_norm": 2.890625,
      "learning_rate": 4.047096115952865e-05,
      "loss": 0.8252,
      "step": 232420
    },
    {
      "epoch": 0.8146092027743607,
      "grad_norm": 2.828125,
      "learning_rate": 4.0470312130864954e-05,
      "loss": 0.8683,
      "step": 232430
    },
    {
      "epoch": 0.8146442502812562,
      "grad_norm": 2.90625,
      "learning_rate": 4.046966310220125e-05,
      "loss": 0.8955,
      "step": 232440
    },
    {
      "epoch": 0.8146792977881518,
      "grad_norm": 3.0625,
      "learning_rate": 4.046901407353755e-05,
      "loss": 0.9414,
      "step": 232450
    },
    {
      "epoch": 0.8147143452950474,
      "grad_norm": 3.234375,
      "learning_rate": 4.0468365044873845e-05,
      "loss": 0.9674,
      "step": 232460
    },
    {
      "epoch": 0.814749392801943,
      "grad_norm": 3.3125,
      "learning_rate": 4.0467716016210146e-05,
      "loss": 0.9171,
      "step": 232470
    },
    {
      "epoch": 0.8147844403088387,
      "grad_norm": 2.765625,
      "learning_rate": 4.046706698754644e-05,
      "loss": 0.8683,
      "step": 232480
    },
    {
      "epoch": 0.8148194878157342,
      "grad_norm": 2.578125,
      "learning_rate": 4.046641795888274e-05,
      "loss": 0.8191,
      "step": 232490
    },
    {
      "epoch": 0.8148545353226299,
      "grad_norm": 2.90625,
      "learning_rate": 4.046576893021903e-05,
      "loss": 0.8568,
      "step": 232500
    },
    {
      "epoch": 0.8148895828295254,
      "grad_norm": 2.203125,
      "learning_rate": 4.046511990155533e-05,
      "loss": 0.866,
      "step": 232510
    },
    {
      "epoch": 0.814924630336421,
      "grad_norm": 2.859375,
      "learning_rate": 4.046447087289163e-05,
      "loss": 0.8936,
      "step": 232520
    },
    {
      "epoch": 0.8149596778433166,
      "grad_norm": 3.1875,
      "learning_rate": 4.046382184422793e-05,
      "loss": 0.9259,
      "step": 232530
    },
    {
      "epoch": 0.8149947253502122,
      "grad_norm": 2.9375,
      "learning_rate": 4.046317281556423e-05,
      "loss": 0.8541,
      "step": 232540
    },
    {
      "epoch": 0.8150297728571078,
      "grad_norm": 2.59375,
      "learning_rate": 4.0462523786900524e-05,
      "loss": 0.9335,
      "step": 232550
    },
    {
      "epoch": 0.8150648203640034,
      "grad_norm": 3.125,
      "learning_rate": 4.0461874758236825e-05,
      "loss": 0.9069,
      "step": 232560
    },
    {
      "epoch": 0.815099867870899,
      "grad_norm": 2.625,
      "learning_rate": 4.046122572957312e-05,
      "loss": 0.898,
      "step": 232570
    },
    {
      "epoch": 0.8151349153777946,
      "grad_norm": 2.9375,
      "learning_rate": 4.046057670090942e-05,
      "loss": 0.8274,
      "step": 232580
    },
    {
      "epoch": 0.8151699628846902,
      "grad_norm": 2.890625,
      "learning_rate": 4.0459927672245716e-05,
      "loss": 0.942,
      "step": 232590
    },
    {
      "epoch": 0.8152050103915858,
      "grad_norm": 2.578125,
      "learning_rate": 4.045927864358202e-05,
      "loss": 0.8805,
      "step": 232600
    },
    {
      "epoch": 0.8152400578984814,
      "grad_norm": 2.703125,
      "learning_rate": 4.045862961491831e-05,
      "loss": 0.8856,
      "step": 232610
    },
    {
      "epoch": 0.815275105405377,
      "grad_norm": 2.84375,
      "learning_rate": 4.045798058625461e-05,
      "loss": 0.8464,
      "step": 232620
    },
    {
      "epoch": 0.8153101529122726,
      "grad_norm": 2.96875,
      "learning_rate": 4.045733155759091e-05,
      "loss": 0.9846,
      "step": 232630
    },
    {
      "epoch": 0.8153452004191681,
      "grad_norm": 2.890625,
      "learning_rate": 4.045668252892721e-05,
      "loss": 0.8733,
      "step": 232640
    },
    {
      "epoch": 0.8153802479260638,
      "grad_norm": 2.71875,
      "learning_rate": 4.045603350026351e-05,
      "loss": 0.8327,
      "step": 232650
    },
    {
      "epoch": 0.8154152954329594,
      "grad_norm": 2.734375,
      "learning_rate": 4.0455384471599805e-05,
      "loss": 0.9553,
      "step": 232660
    },
    {
      "epoch": 0.815450342939855,
      "grad_norm": 2.859375,
      "learning_rate": 4.0454735442936106e-05,
      "loss": 0.8353,
      "step": 232670
    },
    {
      "epoch": 0.8154853904467506,
      "grad_norm": 3.046875,
      "learning_rate": 4.04540864142724e-05,
      "loss": 0.9856,
      "step": 232680
    },
    {
      "epoch": 0.8155204379536461,
      "grad_norm": 3.203125,
      "learning_rate": 4.04534373856087e-05,
      "loss": 0.8664,
      "step": 232690
    },
    {
      "epoch": 0.8155554854605418,
      "grad_norm": 3.21875,
      "learning_rate": 4.0452788356945e-05,
      "loss": 0.9115,
      "step": 232700
    },
    {
      "epoch": 0.8155905329674373,
      "grad_norm": 3.0,
      "learning_rate": 4.04521393282813e-05,
      "loss": 0.9163,
      "step": 232710
    },
    {
      "epoch": 0.815625580474333,
      "grad_norm": 3.109375,
      "learning_rate": 4.045149029961759e-05,
      "loss": 0.8107,
      "step": 232720
    },
    {
      "epoch": 0.8156606279812285,
      "grad_norm": 2.96875,
      "learning_rate": 4.0450841270953894e-05,
      "loss": 0.8926,
      "step": 232730
    },
    {
      "epoch": 0.8156956754881242,
      "grad_norm": 2.34375,
      "learning_rate": 4.045019224229019e-05,
      "loss": 0.907,
      "step": 232740
    },
    {
      "epoch": 0.8157307229950197,
      "grad_norm": 2.546875,
      "learning_rate": 4.044954321362649e-05,
      "loss": 0.8548,
      "step": 232750
    },
    {
      "epoch": 0.8157657705019153,
      "grad_norm": 2.65625,
      "learning_rate": 4.0448894184962785e-05,
      "loss": 0.8887,
      "step": 232760
    },
    {
      "epoch": 0.815800818008811,
      "grad_norm": 2.921875,
      "learning_rate": 4.0448245156299086e-05,
      "loss": 0.9435,
      "step": 232770
    },
    {
      "epoch": 0.8158358655157065,
      "grad_norm": 3.421875,
      "learning_rate": 4.044759612763538e-05,
      "loss": 0.9535,
      "step": 232780
    },
    {
      "epoch": 0.8158709130226022,
      "grad_norm": 2.6875,
      "learning_rate": 4.044694709897168e-05,
      "loss": 0.8427,
      "step": 232790
    },
    {
      "epoch": 0.8159059605294977,
      "grad_norm": 3.03125,
      "learning_rate": 4.0446298070307984e-05,
      "loss": 0.9128,
      "step": 232800
    },
    {
      "epoch": 0.8159410080363934,
      "grad_norm": 2.84375,
      "learning_rate": 4.044564904164428e-05,
      "loss": 0.8476,
      "step": 232810
    },
    {
      "epoch": 0.8159760555432889,
      "grad_norm": 3.25,
      "learning_rate": 4.044500001298058e-05,
      "loss": 0.9834,
      "step": 232820
    },
    {
      "epoch": 0.8160111030501845,
      "grad_norm": 2.453125,
      "learning_rate": 4.0444350984316874e-05,
      "loss": 0.8537,
      "step": 232830
    },
    {
      "epoch": 0.8160461505570801,
      "grad_norm": 2.78125,
      "learning_rate": 4.0443701955653176e-05,
      "loss": 0.9365,
      "step": 232840
    },
    {
      "epoch": 0.8160811980639757,
      "grad_norm": 2.578125,
      "learning_rate": 4.044305292698947e-05,
      "loss": 0.8324,
      "step": 232850
    },
    {
      "epoch": 0.8161162455708714,
      "grad_norm": 3.09375,
      "learning_rate": 4.044240389832577e-05,
      "loss": 0.8726,
      "step": 232860
    },
    {
      "epoch": 0.8161512930777669,
      "grad_norm": 2.671875,
      "learning_rate": 4.044175486966206e-05,
      "loss": 0.9414,
      "step": 232870
    },
    {
      "epoch": 0.8161863405846626,
      "grad_norm": 3.375,
      "learning_rate": 4.044110584099836e-05,
      "loss": 0.8344,
      "step": 232880
    },
    {
      "epoch": 0.8162213880915581,
      "grad_norm": 2.546875,
      "learning_rate": 4.044045681233466e-05,
      "loss": 0.8645,
      "step": 232890
    },
    {
      "epoch": 0.8162564355984537,
      "grad_norm": 2.609375,
      "learning_rate": 4.043980778367096e-05,
      "loss": 0.8645,
      "step": 232900
    },
    {
      "epoch": 0.8162914831053493,
      "grad_norm": 2.875,
      "learning_rate": 4.043915875500726e-05,
      "loss": 0.8881,
      "step": 232910
    },
    {
      "epoch": 0.8163265306122449,
      "grad_norm": 2.46875,
      "learning_rate": 4.043850972634355e-05,
      "loss": 0.9543,
      "step": 232920
    },
    {
      "epoch": 0.8163615781191405,
      "grad_norm": 2.828125,
      "learning_rate": 4.0437860697679854e-05,
      "loss": 0.8169,
      "step": 232930
    },
    {
      "epoch": 0.8163966256260361,
      "grad_norm": 3.09375,
      "learning_rate": 4.043721166901615e-05,
      "loss": 0.7887,
      "step": 232940
    },
    {
      "epoch": 0.8164316731329316,
      "grad_norm": 2.734375,
      "learning_rate": 4.043656264035245e-05,
      "loss": 0.9474,
      "step": 232950
    },
    {
      "epoch": 0.8164667206398273,
      "grad_norm": 2.859375,
      "learning_rate": 4.0435913611688745e-05,
      "loss": 0.9086,
      "step": 232960
    },
    {
      "epoch": 0.8165017681467229,
      "grad_norm": 2.8125,
      "learning_rate": 4.0435264583025046e-05,
      "loss": 0.9764,
      "step": 232970
    },
    {
      "epoch": 0.8165368156536185,
      "grad_norm": 2.578125,
      "learning_rate": 4.043461555436134e-05,
      "loss": 0.9003,
      "step": 232980
    },
    {
      "epoch": 0.8165718631605141,
      "grad_norm": 3.234375,
      "learning_rate": 4.043396652569764e-05,
      "loss": 0.962,
      "step": 232990
    },
    {
      "epoch": 0.8166069106674096,
      "grad_norm": 2.53125,
      "learning_rate": 4.043331749703394e-05,
      "loss": 0.9665,
      "step": 233000
    },
    {
      "epoch": 0.8166419581743053,
      "grad_norm": 3.125,
      "learning_rate": 4.043266846837024e-05,
      "loss": 0.9781,
      "step": 233010
    },
    {
      "epoch": 0.8166770056812008,
      "grad_norm": 3.34375,
      "learning_rate": 4.043201943970654e-05,
      "loss": 0.8466,
      "step": 233020
    },
    {
      "epoch": 0.8167120531880965,
      "grad_norm": 2.84375,
      "learning_rate": 4.0431370411042834e-05,
      "loss": 0.8638,
      "step": 233030
    },
    {
      "epoch": 0.816747100694992,
      "grad_norm": 3.015625,
      "learning_rate": 4.0430721382379136e-05,
      "loss": 0.8721,
      "step": 233040
    },
    {
      "epoch": 0.8167821482018877,
      "grad_norm": 2.828125,
      "learning_rate": 4.043007235371543e-05,
      "loss": 0.9044,
      "step": 233050
    },
    {
      "epoch": 0.8168171957087833,
      "grad_norm": 2.546875,
      "learning_rate": 4.042942332505173e-05,
      "loss": 0.8708,
      "step": 233060
    },
    {
      "epoch": 0.8168522432156788,
      "grad_norm": 2.5,
      "learning_rate": 4.0428774296388026e-05,
      "loss": 0.865,
      "step": 233070
    },
    {
      "epoch": 0.8168872907225745,
      "grad_norm": 2.984375,
      "learning_rate": 4.042812526772433e-05,
      "loss": 0.8434,
      "step": 233080
    },
    {
      "epoch": 0.81692233822947,
      "grad_norm": 3.171875,
      "learning_rate": 4.042747623906062e-05,
      "loss": 0.8443,
      "step": 233090
    },
    {
      "epoch": 0.8169573857363657,
      "grad_norm": 2.921875,
      "learning_rate": 4.0426827210396924e-05,
      "loss": 0.8841,
      "step": 233100
    },
    {
      "epoch": 0.8169924332432612,
      "grad_norm": 2.859375,
      "learning_rate": 4.042617818173322e-05,
      "loss": 0.8334,
      "step": 233110
    },
    {
      "epoch": 0.8170274807501569,
      "grad_norm": 2.9375,
      "learning_rate": 4.042552915306952e-05,
      "loss": 0.7985,
      "step": 233120
    },
    {
      "epoch": 0.8170625282570524,
      "grad_norm": 2.734375,
      "learning_rate": 4.0424880124405814e-05,
      "loss": 0.8887,
      "step": 233130
    },
    {
      "epoch": 0.817097575763948,
      "grad_norm": 2.5625,
      "learning_rate": 4.0424231095742116e-05,
      "loss": 0.8414,
      "step": 233140
    },
    {
      "epoch": 0.8171326232708437,
      "grad_norm": 2.9375,
      "learning_rate": 4.042358206707841e-05,
      "loss": 0.9458,
      "step": 233150
    },
    {
      "epoch": 0.8171676707777392,
      "grad_norm": 2.828125,
      "learning_rate": 4.042293303841471e-05,
      "loss": 0.9393,
      "step": 233160
    },
    {
      "epoch": 0.8172027182846349,
      "grad_norm": 2.578125,
      "learning_rate": 4.042228400975101e-05,
      "loss": 0.8116,
      "step": 233170
    },
    {
      "epoch": 0.8172377657915304,
      "grad_norm": 2.984375,
      "learning_rate": 4.042163498108731e-05,
      "loss": 0.893,
      "step": 233180
    },
    {
      "epoch": 0.817272813298426,
      "grad_norm": 3.390625,
      "learning_rate": 4.042098595242361e-05,
      "loss": 0.9095,
      "step": 233190
    },
    {
      "epoch": 0.8173078608053216,
      "grad_norm": 2.890625,
      "learning_rate": 4.0420336923759904e-05,
      "loss": 0.9492,
      "step": 233200
    },
    {
      "epoch": 0.8173429083122172,
      "grad_norm": 2.890625,
      "learning_rate": 4.0419687895096205e-05,
      "loss": 0.9418,
      "step": 233210
    },
    {
      "epoch": 0.8173779558191128,
      "grad_norm": 3.40625,
      "learning_rate": 4.04190388664325e-05,
      "loss": 0.9128,
      "step": 233220
    },
    {
      "epoch": 0.8174130033260084,
      "grad_norm": 2.578125,
      "learning_rate": 4.04183898377688e-05,
      "loss": 0.9333,
      "step": 233230
    },
    {
      "epoch": 0.817448050832904,
      "grad_norm": 3.125,
      "learning_rate": 4.0417740809105096e-05,
      "loss": 0.9131,
      "step": 233240
    },
    {
      "epoch": 0.8174830983397996,
      "grad_norm": 2.640625,
      "learning_rate": 4.041709178044139e-05,
      "loss": 0.8634,
      "step": 233250
    },
    {
      "epoch": 0.8175181458466952,
      "grad_norm": 2.875,
      "learning_rate": 4.041644275177769e-05,
      "loss": 0.8917,
      "step": 233260
    },
    {
      "epoch": 0.8175531933535908,
      "grad_norm": 3.0,
      "learning_rate": 4.0415793723113986e-05,
      "loss": 0.9299,
      "step": 233270
    },
    {
      "epoch": 0.8175882408604864,
      "grad_norm": 2.828125,
      "learning_rate": 4.041514469445029e-05,
      "loss": 0.9029,
      "step": 233280
    },
    {
      "epoch": 0.817623288367382,
      "grad_norm": 2.703125,
      "learning_rate": 4.041449566578658e-05,
      "loss": 0.8933,
      "step": 233290
    },
    {
      "epoch": 0.8176583358742776,
      "grad_norm": 2.71875,
      "learning_rate": 4.0413846637122884e-05,
      "loss": 0.879,
      "step": 233300
    },
    {
      "epoch": 0.8176933833811731,
      "grad_norm": 2.609375,
      "learning_rate": 4.041319760845918e-05,
      "loss": 0.8033,
      "step": 233310
    },
    {
      "epoch": 0.8177284308880688,
      "grad_norm": 2.984375,
      "learning_rate": 4.041254857979548e-05,
      "loss": 0.9538,
      "step": 233320
    },
    {
      "epoch": 0.8177634783949643,
      "grad_norm": 2.6875,
      "learning_rate": 4.0411899551131774e-05,
      "loss": 0.8637,
      "step": 233330
    },
    {
      "epoch": 0.81779852590186,
      "grad_norm": 2.890625,
      "learning_rate": 4.0411250522468076e-05,
      "loss": 0.8358,
      "step": 233340
    },
    {
      "epoch": 0.8178335734087556,
      "grad_norm": 2.625,
      "learning_rate": 4.041060149380437e-05,
      "loss": 0.9269,
      "step": 233350
    },
    {
      "epoch": 0.8178686209156512,
      "grad_norm": 3.0,
      "learning_rate": 4.040995246514067e-05,
      "loss": 0.8234,
      "step": 233360
    },
    {
      "epoch": 0.8179036684225468,
      "grad_norm": 3.046875,
      "learning_rate": 4.0409303436476966e-05,
      "loss": 0.8982,
      "step": 233370
    },
    {
      "epoch": 0.8179387159294423,
      "grad_norm": 2.9375,
      "learning_rate": 4.040865440781327e-05,
      "loss": 0.9272,
      "step": 233380
    },
    {
      "epoch": 0.817973763436338,
      "grad_norm": 2.578125,
      "learning_rate": 4.040800537914957e-05,
      "loss": 0.8301,
      "step": 233390
    },
    {
      "epoch": 0.8180088109432335,
      "grad_norm": 3.1875,
      "learning_rate": 4.0407356350485864e-05,
      "loss": 0.8634,
      "step": 233400
    },
    {
      "epoch": 0.8180438584501292,
      "grad_norm": 3.84375,
      "learning_rate": 4.0406707321822165e-05,
      "loss": 0.896,
      "step": 233410
    },
    {
      "epoch": 0.8180789059570247,
      "grad_norm": 2.90625,
      "learning_rate": 4.040605829315846e-05,
      "loss": 0.827,
      "step": 233420
    },
    {
      "epoch": 0.8181139534639204,
      "grad_norm": 2.75,
      "learning_rate": 4.040540926449476e-05,
      "loss": 0.88,
      "step": 233430
    },
    {
      "epoch": 0.8181490009708159,
      "grad_norm": 2.890625,
      "learning_rate": 4.0404760235831056e-05,
      "loss": 0.8703,
      "step": 233440
    },
    {
      "epoch": 0.8181840484777115,
      "grad_norm": 2.328125,
      "learning_rate": 4.040411120716736e-05,
      "loss": 0.8758,
      "step": 233450
    },
    {
      "epoch": 0.8182190959846072,
      "grad_norm": 3.203125,
      "learning_rate": 4.040346217850365e-05,
      "loss": 0.9806,
      "step": 233460
    },
    {
      "epoch": 0.8182541434915027,
      "grad_norm": 3.40625,
      "learning_rate": 4.040281314983995e-05,
      "loss": 0.8805,
      "step": 233470
    },
    {
      "epoch": 0.8182891909983984,
      "grad_norm": 2.6875,
      "learning_rate": 4.040216412117625e-05,
      "loss": 0.9306,
      "step": 233480
    },
    {
      "epoch": 0.8183242385052939,
      "grad_norm": 2.921875,
      "learning_rate": 4.040151509251255e-05,
      "loss": 0.8451,
      "step": 233490
    },
    {
      "epoch": 0.8183592860121895,
      "grad_norm": 2.296875,
      "learning_rate": 4.0400866063848844e-05,
      "loss": 0.9158,
      "step": 233500
    },
    {
      "epoch": 0.8183943335190851,
      "grad_norm": 2.84375,
      "learning_rate": 4.0400217035185145e-05,
      "loss": 0.8762,
      "step": 233510
    },
    {
      "epoch": 0.8184293810259807,
      "grad_norm": 2.578125,
      "learning_rate": 4.039956800652144e-05,
      "loss": 0.8913,
      "step": 233520
    },
    {
      "epoch": 0.8184644285328763,
      "grad_norm": 3.0,
      "learning_rate": 4.039891897785774e-05,
      "loss": 0.9457,
      "step": 233530
    },
    {
      "epoch": 0.8184994760397719,
      "grad_norm": 3.0625,
      "learning_rate": 4.039826994919404e-05,
      "loss": 0.9176,
      "step": 233540
    },
    {
      "epoch": 0.8185345235466676,
      "grad_norm": 3.015625,
      "learning_rate": 4.039762092053034e-05,
      "loss": 0.8829,
      "step": 233550
    },
    {
      "epoch": 0.8185695710535631,
      "grad_norm": 3.125,
      "learning_rate": 4.039697189186664e-05,
      "loss": 0.8889,
      "step": 233560
    },
    {
      "epoch": 0.8186046185604587,
      "grad_norm": 2.8125,
      "learning_rate": 4.039632286320293e-05,
      "loss": 0.8542,
      "step": 233570
    },
    {
      "epoch": 0.8186396660673543,
      "grad_norm": 2.359375,
      "learning_rate": 4.0395673834539235e-05,
      "loss": 0.9426,
      "step": 233580
    },
    {
      "epoch": 0.8186747135742499,
      "grad_norm": 2.828125,
      "learning_rate": 4.039502480587553e-05,
      "loss": 0.8801,
      "step": 233590
    },
    {
      "epoch": 0.8187097610811455,
      "grad_norm": 2.765625,
      "learning_rate": 4.039437577721183e-05,
      "loss": 0.9408,
      "step": 233600
    },
    {
      "epoch": 0.8187448085880411,
      "grad_norm": 2.796875,
      "learning_rate": 4.0393726748548125e-05,
      "loss": 0.8705,
      "step": 233610
    },
    {
      "epoch": 0.8187798560949366,
      "grad_norm": 3.171875,
      "learning_rate": 4.039307771988443e-05,
      "loss": 0.9075,
      "step": 233620
    },
    {
      "epoch": 0.8188149036018323,
      "grad_norm": 3.328125,
      "learning_rate": 4.039242869122072e-05,
      "loss": 0.9077,
      "step": 233630
    },
    {
      "epoch": 0.8188499511087279,
      "grad_norm": 2.953125,
      "learning_rate": 4.0391779662557016e-05,
      "loss": 0.8324,
      "step": 233640
    },
    {
      "epoch": 0.8188849986156235,
      "grad_norm": 3.078125,
      "learning_rate": 4.039113063389332e-05,
      "loss": 0.9392,
      "step": 233650
    },
    {
      "epoch": 0.8189200461225191,
      "grad_norm": 2.84375,
      "learning_rate": 4.039048160522961e-05,
      "loss": 0.8788,
      "step": 233660
    },
    {
      "epoch": 0.8189550936294147,
      "grad_norm": 2.6875,
      "learning_rate": 4.038983257656591e-05,
      "loss": 0.7965,
      "step": 233670
    },
    {
      "epoch": 0.8189901411363103,
      "grad_norm": 3.03125,
      "learning_rate": 4.038918354790221e-05,
      "loss": 0.9345,
      "step": 233680
    },
    {
      "epoch": 0.8190251886432058,
      "grad_norm": 3.265625,
      "learning_rate": 4.038853451923851e-05,
      "loss": 0.9048,
      "step": 233690
    },
    {
      "epoch": 0.8190602361501015,
      "grad_norm": 2.765625,
      "learning_rate": 4.0387885490574804e-05,
      "loss": 0.9452,
      "step": 233700
    },
    {
      "epoch": 0.819095283656997,
      "grad_norm": 3.109375,
      "learning_rate": 4.0387236461911105e-05,
      "loss": 0.8755,
      "step": 233710
    },
    {
      "epoch": 0.8191303311638927,
      "grad_norm": 3.4375,
      "learning_rate": 4.03865874332474e-05,
      "loss": 0.9117,
      "step": 233720
    },
    {
      "epoch": 0.8191653786707882,
      "grad_norm": 3.203125,
      "learning_rate": 4.03859384045837e-05,
      "loss": 0.9127,
      "step": 233730
    },
    {
      "epoch": 0.8192004261776838,
      "grad_norm": 3.15625,
      "learning_rate": 4.0385289375919996e-05,
      "loss": 0.8819,
      "step": 233740
    },
    {
      "epoch": 0.8192354736845795,
      "grad_norm": 3.09375,
      "learning_rate": 4.03846403472563e-05,
      "loss": 0.8828,
      "step": 233750
    },
    {
      "epoch": 0.819270521191475,
      "grad_norm": 3.0625,
      "learning_rate": 4.03839913185926e-05,
      "loss": 0.8518,
      "step": 233760
    },
    {
      "epoch": 0.8193055686983707,
      "grad_norm": 3.828125,
      "learning_rate": 4.038334228992889e-05,
      "loss": 0.9781,
      "step": 233770
    },
    {
      "epoch": 0.8193406162052662,
      "grad_norm": 2.578125,
      "learning_rate": 4.0382693261265195e-05,
      "loss": 0.9453,
      "step": 233780
    },
    {
      "epoch": 0.8193756637121619,
      "grad_norm": 2.78125,
      "learning_rate": 4.038204423260149e-05,
      "loss": 0.8869,
      "step": 233790
    },
    {
      "epoch": 0.8194107112190574,
      "grad_norm": 3.0625,
      "learning_rate": 4.038139520393779e-05,
      "loss": 0.8938,
      "step": 233800
    },
    {
      "epoch": 0.819445758725953,
      "grad_norm": 2.765625,
      "learning_rate": 4.0380746175274085e-05,
      "loss": 0.8694,
      "step": 233810
    },
    {
      "epoch": 0.8194808062328486,
      "grad_norm": 2.671875,
      "learning_rate": 4.038009714661039e-05,
      "loss": 0.9331,
      "step": 233820
    },
    {
      "epoch": 0.8195158537397442,
      "grad_norm": 3.4375,
      "learning_rate": 4.037944811794668e-05,
      "loss": 0.8713,
      "step": 233830
    },
    {
      "epoch": 0.8195509012466399,
      "grad_norm": 3.21875,
      "learning_rate": 4.037879908928298e-05,
      "loss": 0.8933,
      "step": 233840
    },
    {
      "epoch": 0.8195859487535354,
      "grad_norm": 3.046875,
      "learning_rate": 4.037815006061928e-05,
      "loss": 0.8495,
      "step": 233850
    },
    {
      "epoch": 0.8196209962604311,
      "grad_norm": 3.21875,
      "learning_rate": 4.037750103195558e-05,
      "loss": 1.0112,
      "step": 233860
    },
    {
      "epoch": 0.8196560437673266,
      "grad_norm": 2.9375,
      "learning_rate": 4.037685200329187e-05,
      "loss": 0.8309,
      "step": 233870
    },
    {
      "epoch": 0.8196910912742222,
      "grad_norm": 3.28125,
      "learning_rate": 4.0376202974628175e-05,
      "loss": 0.9856,
      "step": 233880
    },
    {
      "epoch": 0.8197261387811178,
      "grad_norm": 3.078125,
      "learning_rate": 4.0375553945964476e-05,
      "loss": 0.8776,
      "step": 233890
    },
    {
      "epoch": 0.8197611862880134,
      "grad_norm": 3.28125,
      "learning_rate": 4.037490491730077e-05,
      "loss": 0.9306,
      "step": 233900
    },
    {
      "epoch": 0.819796233794909,
      "grad_norm": 2.890625,
      "learning_rate": 4.037425588863707e-05,
      "loss": 0.8747,
      "step": 233910
    },
    {
      "epoch": 0.8198312813018046,
      "grad_norm": 2.8125,
      "learning_rate": 4.037360685997337e-05,
      "loss": 0.8486,
      "step": 233920
    },
    {
      "epoch": 0.8198663288087001,
      "grad_norm": 3.109375,
      "learning_rate": 4.037295783130967e-05,
      "loss": 0.9968,
      "step": 233930
    },
    {
      "epoch": 0.8199013763155958,
      "grad_norm": 3.21875,
      "learning_rate": 4.037230880264596e-05,
      "loss": 0.928,
      "step": 233940
    },
    {
      "epoch": 0.8199364238224914,
      "grad_norm": 3.765625,
      "learning_rate": 4.0371659773982264e-05,
      "loss": 1.0299,
      "step": 233950
    },
    {
      "epoch": 0.819971471329387,
      "grad_norm": 2.828125,
      "learning_rate": 4.037101074531856e-05,
      "loss": 0.9564,
      "step": 233960
    },
    {
      "epoch": 0.8200065188362826,
      "grad_norm": 2.984375,
      "learning_rate": 4.037036171665486e-05,
      "loss": 0.9711,
      "step": 233970
    },
    {
      "epoch": 0.8200415663431782,
      "grad_norm": 2.6875,
      "learning_rate": 4.0369712687991155e-05,
      "loss": 0.9231,
      "step": 233980
    },
    {
      "epoch": 0.8200766138500738,
      "grad_norm": 2.828125,
      "learning_rate": 4.0369063659327456e-05,
      "loss": 0.8903,
      "step": 233990
    },
    {
      "epoch": 0.8201116613569693,
      "grad_norm": 3.40625,
      "learning_rate": 4.036841463066375e-05,
      "loss": 0.9439,
      "step": 234000
    },
    {
      "epoch": 0.820146708863865,
      "grad_norm": 2.671875,
      "learning_rate": 4.0367765602000045e-05,
      "loss": 0.8771,
      "step": 234010
    },
    {
      "epoch": 0.8201817563707605,
      "grad_norm": 2.671875,
      "learning_rate": 4.036711657333635e-05,
      "loss": 0.9035,
      "step": 234020
    },
    {
      "epoch": 0.8202168038776562,
      "grad_norm": 2.8125,
      "learning_rate": 4.036646754467264e-05,
      "loss": 0.8451,
      "step": 234030
    },
    {
      "epoch": 0.8202518513845518,
      "grad_norm": 3.1875,
      "learning_rate": 4.036581851600894e-05,
      "loss": 0.9439,
      "step": 234040
    },
    {
      "epoch": 0.8202868988914473,
      "grad_norm": 2.765625,
      "learning_rate": 4.036516948734524e-05,
      "loss": 0.9286,
      "step": 234050
    },
    {
      "epoch": 0.820321946398343,
      "grad_norm": 2.8125,
      "learning_rate": 4.036452045868154e-05,
      "loss": 0.9011,
      "step": 234060
    },
    {
      "epoch": 0.8203569939052385,
      "grad_norm": 2.75,
      "learning_rate": 4.036387143001783e-05,
      "loss": 0.932,
      "step": 234070
    },
    {
      "epoch": 0.8203920414121342,
      "grad_norm": 3.453125,
      "learning_rate": 4.0363222401354135e-05,
      "loss": 0.9225,
      "step": 234080
    },
    {
      "epoch": 0.8204270889190297,
      "grad_norm": 2.875,
      "learning_rate": 4.036257337269043e-05,
      "loss": 0.8334,
      "step": 234090
    },
    {
      "epoch": 0.8204621364259254,
      "grad_norm": 3.296875,
      "learning_rate": 4.036192434402673e-05,
      "loss": 0.9095,
      "step": 234100
    },
    {
      "epoch": 0.8204971839328209,
      "grad_norm": 2.859375,
      "learning_rate": 4.0361275315363025e-05,
      "loss": 0.9163,
      "step": 234110
    },
    {
      "epoch": 0.8205322314397165,
      "grad_norm": 3.125,
      "learning_rate": 4.036062628669933e-05,
      "loss": 0.8594,
      "step": 234120
    },
    {
      "epoch": 0.8205672789466121,
      "grad_norm": 2.6875,
      "learning_rate": 4.035997725803563e-05,
      "loss": 0.9716,
      "step": 234130
    },
    {
      "epoch": 0.8206023264535077,
      "grad_norm": 2.984375,
      "learning_rate": 4.035932822937192e-05,
      "loss": 0.9048,
      "step": 234140
    },
    {
      "epoch": 0.8206373739604034,
      "grad_norm": 2.546875,
      "learning_rate": 4.0358679200708224e-05,
      "loss": 0.8438,
      "step": 234150
    },
    {
      "epoch": 0.8206724214672989,
      "grad_norm": 2.65625,
      "learning_rate": 4.035803017204452e-05,
      "loss": 0.8416,
      "step": 234160
    },
    {
      "epoch": 0.8207074689741946,
      "grad_norm": 3.21875,
      "learning_rate": 4.035738114338082e-05,
      "loss": 0.8948,
      "step": 234170
    },
    {
      "epoch": 0.8207425164810901,
      "grad_norm": 2.9375,
      "learning_rate": 4.0356732114717115e-05,
      "loss": 0.8135,
      "step": 234180
    },
    {
      "epoch": 0.8207775639879857,
      "grad_norm": 3.015625,
      "learning_rate": 4.0356083086053416e-05,
      "loss": 0.9058,
      "step": 234190
    },
    {
      "epoch": 0.8208126114948813,
      "grad_norm": 2.53125,
      "learning_rate": 4.035543405738971e-05,
      "loss": 0.7854,
      "step": 234200
    },
    {
      "epoch": 0.8208476590017769,
      "grad_norm": 3.078125,
      "learning_rate": 4.035478502872601e-05,
      "loss": 0.9307,
      "step": 234210
    },
    {
      "epoch": 0.8208827065086725,
      "grad_norm": 2.984375,
      "learning_rate": 4.035413600006231e-05,
      "loss": 0.7583,
      "step": 234220
    },
    {
      "epoch": 0.8209177540155681,
      "grad_norm": 2.71875,
      "learning_rate": 4.035348697139861e-05,
      "loss": 0.9322,
      "step": 234230
    },
    {
      "epoch": 0.8209528015224637,
      "grad_norm": 2.921875,
      "learning_rate": 4.03528379427349e-05,
      "loss": 1.0345,
      "step": 234240
    },
    {
      "epoch": 0.8209878490293593,
      "grad_norm": 3.015625,
      "learning_rate": 4.0352188914071204e-05,
      "loss": 0.8356,
      "step": 234250
    },
    {
      "epoch": 0.8210228965362549,
      "grad_norm": 2.734375,
      "learning_rate": 4.0351539885407505e-05,
      "loss": 0.8826,
      "step": 234260
    },
    {
      "epoch": 0.8210579440431505,
      "grad_norm": 2.6875,
      "learning_rate": 4.03508908567438e-05,
      "loss": 0.9102,
      "step": 234270
    },
    {
      "epoch": 0.8210929915500461,
      "grad_norm": 2.90625,
      "learning_rate": 4.03502418280801e-05,
      "loss": 0.9575,
      "step": 234280
    },
    {
      "epoch": 0.8211280390569416,
      "grad_norm": 2.875,
      "learning_rate": 4.0349592799416396e-05,
      "loss": 0.824,
      "step": 234290
    },
    {
      "epoch": 0.8211630865638373,
      "grad_norm": 2.6875,
      "learning_rate": 4.03489437707527e-05,
      "loss": 0.9028,
      "step": 234300
    },
    {
      "epoch": 0.8211981340707328,
      "grad_norm": 3.109375,
      "learning_rate": 4.034829474208899e-05,
      "loss": 0.8553,
      "step": 234310
    },
    {
      "epoch": 0.8212331815776285,
      "grad_norm": 3.03125,
      "learning_rate": 4.0347645713425293e-05,
      "loss": 0.8776,
      "step": 234320
    },
    {
      "epoch": 0.8212682290845241,
      "grad_norm": 3.15625,
      "learning_rate": 4.034699668476159e-05,
      "loss": 0.9393,
      "step": 234330
    },
    {
      "epoch": 0.8213032765914197,
      "grad_norm": 2.734375,
      "learning_rate": 4.034634765609789e-05,
      "loss": 0.9021,
      "step": 234340
    },
    {
      "epoch": 0.8213383240983153,
      "grad_norm": 2.78125,
      "learning_rate": 4.0345698627434184e-05,
      "loss": 0.8924,
      "step": 234350
    },
    {
      "epoch": 0.8213733716052108,
      "grad_norm": 3.03125,
      "learning_rate": 4.0345049598770485e-05,
      "loss": 0.971,
      "step": 234360
    },
    {
      "epoch": 0.8214084191121065,
      "grad_norm": 2.8125,
      "learning_rate": 4.034440057010678e-05,
      "loss": 0.9665,
      "step": 234370
    },
    {
      "epoch": 0.821443466619002,
      "grad_norm": 3.203125,
      "learning_rate": 4.0343751541443075e-05,
      "loss": 0.9288,
      "step": 234380
    },
    {
      "epoch": 0.8214785141258977,
      "grad_norm": 2.953125,
      "learning_rate": 4.0343102512779376e-05,
      "loss": 0.8688,
      "step": 234390
    },
    {
      "epoch": 0.8215135616327932,
      "grad_norm": 3.015625,
      "learning_rate": 4.034245348411567e-05,
      "loss": 0.9307,
      "step": 234400
    },
    {
      "epoch": 0.8215486091396889,
      "grad_norm": 2.890625,
      "learning_rate": 4.034180445545197e-05,
      "loss": 0.8284,
      "step": 234410
    },
    {
      "epoch": 0.8215836566465844,
      "grad_norm": 2.4375,
      "learning_rate": 4.034115542678827e-05,
      "loss": 0.864,
      "step": 234420
    },
    {
      "epoch": 0.82161870415348,
      "grad_norm": 2.96875,
      "learning_rate": 4.034050639812457e-05,
      "loss": 0.8267,
      "step": 234430
    },
    {
      "epoch": 0.8216537516603757,
      "grad_norm": 2.5625,
      "learning_rate": 4.033985736946086e-05,
      "loss": 0.8946,
      "step": 234440
    },
    {
      "epoch": 0.8216887991672712,
      "grad_norm": 3.203125,
      "learning_rate": 4.0339208340797164e-05,
      "loss": 0.8674,
      "step": 234450
    },
    {
      "epoch": 0.8217238466741669,
      "grad_norm": 3.171875,
      "learning_rate": 4.033855931213346e-05,
      "loss": 0.8249,
      "step": 234460
    },
    {
      "epoch": 0.8217588941810624,
      "grad_norm": 2.875,
      "learning_rate": 4.033791028346976e-05,
      "loss": 0.9223,
      "step": 234470
    },
    {
      "epoch": 0.821793941687958,
      "grad_norm": 3.140625,
      "learning_rate": 4.0337261254806055e-05,
      "loss": 0.9413,
      "step": 234480
    },
    {
      "epoch": 0.8218289891948536,
      "grad_norm": 2.890625,
      "learning_rate": 4.0336612226142356e-05,
      "loss": 0.8543,
      "step": 234490
    },
    {
      "epoch": 0.8218640367017492,
      "grad_norm": 2.671875,
      "learning_rate": 4.033596319747866e-05,
      "loss": 0.8553,
      "step": 234500
    },
    {
      "epoch": 0.8218990842086448,
      "grad_norm": 2.921875,
      "learning_rate": 4.033531416881495e-05,
      "loss": 0.8766,
      "step": 234510
    },
    {
      "epoch": 0.8219341317155404,
      "grad_norm": 3.078125,
      "learning_rate": 4.0334665140151253e-05,
      "loss": 0.9208,
      "step": 234520
    },
    {
      "epoch": 0.8219691792224361,
      "grad_norm": 2.953125,
      "learning_rate": 4.033401611148755e-05,
      "loss": 0.9072,
      "step": 234530
    },
    {
      "epoch": 0.8220042267293316,
      "grad_norm": 3.078125,
      "learning_rate": 4.033336708282385e-05,
      "loss": 0.8444,
      "step": 234540
    },
    {
      "epoch": 0.8220392742362272,
      "grad_norm": 2.75,
      "learning_rate": 4.0332718054160144e-05,
      "loss": 0.8404,
      "step": 234550
    },
    {
      "epoch": 0.8220743217431228,
      "grad_norm": 3.0625,
      "learning_rate": 4.0332069025496445e-05,
      "loss": 0.8402,
      "step": 234560
    },
    {
      "epoch": 0.8221093692500184,
      "grad_norm": 3.4375,
      "learning_rate": 4.033141999683274e-05,
      "loss": 0.8766,
      "step": 234570
    },
    {
      "epoch": 0.822144416756914,
      "grad_norm": 3.109375,
      "learning_rate": 4.033077096816904e-05,
      "loss": 0.9653,
      "step": 234580
    },
    {
      "epoch": 0.8221794642638096,
      "grad_norm": 2.59375,
      "learning_rate": 4.0330121939505336e-05,
      "loss": 0.8526,
      "step": 234590
    },
    {
      "epoch": 0.8222145117707051,
      "grad_norm": 3.15625,
      "learning_rate": 4.032947291084164e-05,
      "loss": 0.9221,
      "step": 234600
    },
    {
      "epoch": 0.8222495592776008,
      "grad_norm": 2.546875,
      "learning_rate": 4.032882388217793e-05,
      "loss": 0.8727,
      "step": 234610
    },
    {
      "epoch": 0.8222846067844963,
      "grad_norm": 2.796875,
      "learning_rate": 4.0328174853514233e-05,
      "loss": 0.8318,
      "step": 234620
    },
    {
      "epoch": 0.822319654291392,
      "grad_norm": 2.515625,
      "learning_rate": 4.0327525824850535e-05,
      "loss": 0.8808,
      "step": 234630
    },
    {
      "epoch": 0.8223547017982876,
      "grad_norm": 3.0,
      "learning_rate": 4.032687679618683e-05,
      "loss": 0.8589,
      "step": 234640
    },
    {
      "epoch": 0.8223897493051832,
      "grad_norm": 3.21875,
      "learning_rate": 4.032622776752313e-05,
      "loss": 0.9055,
      "step": 234650
    },
    {
      "epoch": 0.8224247968120788,
      "grad_norm": 2.71875,
      "learning_rate": 4.0325578738859425e-05,
      "loss": 0.9064,
      "step": 234660
    },
    {
      "epoch": 0.8224598443189743,
      "grad_norm": 2.75,
      "learning_rate": 4.032492971019573e-05,
      "loss": 0.7721,
      "step": 234670
    },
    {
      "epoch": 0.82249489182587,
      "grad_norm": 2.625,
      "learning_rate": 4.032428068153202e-05,
      "loss": 0.8871,
      "step": 234680
    },
    {
      "epoch": 0.8225299393327655,
      "grad_norm": 2.921875,
      "learning_rate": 4.032363165286832e-05,
      "loss": 0.9378,
      "step": 234690
    },
    {
      "epoch": 0.8225649868396612,
      "grad_norm": 3.046875,
      "learning_rate": 4.032298262420462e-05,
      "loss": 0.9735,
      "step": 234700
    },
    {
      "epoch": 0.8226000343465567,
      "grad_norm": 2.765625,
      "learning_rate": 4.032233359554092e-05,
      "loss": 0.9317,
      "step": 234710
    },
    {
      "epoch": 0.8226350818534524,
      "grad_norm": 2.90625,
      "learning_rate": 4.0321684566877213e-05,
      "loss": 0.8149,
      "step": 234720
    },
    {
      "epoch": 0.822670129360348,
      "grad_norm": 2.703125,
      "learning_rate": 4.0321035538213515e-05,
      "loss": 0.8261,
      "step": 234730
    },
    {
      "epoch": 0.8227051768672435,
      "grad_norm": 2.96875,
      "learning_rate": 4.032038650954981e-05,
      "loss": 0.9096,
      "step": 234740
    },
    {
      "epoch": 0.8227402243741392,
      "grad_norm": 2.765625,
      "learning_rate": 4.0319737480886104e-05,
      "loss": 0.9065,
      "step": 234750
    },
    {
      "epoch": 0.8227752718810347,
      "grad_norm": 3.296875,
      "learning_rate": 4.0319088452222405e-05,
      "loss": 0.9445,
      "step": 234760
    },
    {
      "epoch": 0.8228103193879304,
      "grad_norm": 2.828125,
      "learning_rate": 4.03184394235587e-05,
      "loss": 0.9335,
      "step": 234770
    },
    {
      "epoch": 0.8228453668948259,
      "grad_norm": 3.203125,
      "learning_rate": 4.0317790394895e-05,
      "loss": 0.938,
      "step": 234780
    },
    {
      "epoch": 0.8228804144017215,
      "grad_norm": 2.96875,
      "learning_rate": 4.0317141366231296e-05,
      "loss": 0.9207,
      "step": 234790
    },
    {
      "epoch": 0.8229154619086171,
      "grad_norm": 2.6875,
      "learning_rate": 4.03164923375676e-05,
      "loss": 0.8581,
      "step": 234800
    },
    {
      "epoch": 0.8229505094155127,
      "grad_norm": 2.390625,
      "learning_rate": 4.031584330890389e-05,
      "loss": 0.8608,
      "step": 234810
    },
    {
      "epoch": 0.8229855569224084,
      "grad_norm": 3.375,
      "learning_rate": 4.0315194280240193e-05,
      "loss": 0.9626,
      "step": 234820
    },
    {
      "epoch": 0.8230206044293039,
      "grad_norm": 2.6875,
      "learning_rate": 4.031454525157649e-05,
      "loss": 0.8821,
      "step": 234830
    },
    {
      "epoch": 0.8230556519361996,
      "grad_norm": 2.78125,
      "learning_rate": 4.031389622291279e-05,
      "loss": 0.9241,
      "step": 234840
    },
    {
      "epoch": 0.8230906994430951,
      "grad_norm": 2.8125,
      "learning_rate": 4.031324719424909e-05,
      "loss": 0.9004,
      "step": 234850
    },
    {
      "epoch": 0.8231257469499907,
      "grad_norm": 2.90625,
      "learning_rate": 4.0312598165585385e-05,
      "loss": 0.8518,
      "step": 234860
    },
    {
      "epoch": 0.8231607944568863,
      "grad_norm": 2.9375,
      "learning_rate": 4.031194913692169e-05,
      "loss": 0.9227,
      "step": 234870
    },
    {
      "epoch": 0.8231958419637819,
      "grad_norm": 2.921875,
      "learning_rate": 4.031130010825798e-05,
      "loss": 0.9988,
      "step": 234880
    },
    {
      "epoch": 0.8232308894706775,
      "grad_norm": 3.0625,
      "learning_rate": 4.031065107959428e-05,
      "loss": 0.9003,
      "step": 234890
    },
    {
      "epoch": 0.8232659369775731,
      "grad_norm": 2.8125,
      "learning_rate": 4.031000205093058e-05,
      "loss": 0.8582,
      "step": 234900
    },
    {
      "epoch": 0.8233009844844686,
      "grad_norm": 2.609375,
      "learning_rate": 4.030935302226688e-05,
      "loss": 0.8052,
      "step": 234910
    },
    {
      "epoch": 0.8233360319913643,
      "grad_norm": 3.015625,
      "learning_rate": 4.0308703993603173e-05,
      "loss": 0.9287,
      "step": 234920
    },
    {
      "epoch": 0.8233710794982599,
      "grad_norm": 3.140625,
      "learning_rate": 4.0308054964939475e-05,
      "loss": 0.9747,
      "step": 234930
    },
    {
      "epoch": 0.8234061270051555,
      "grad_norm": 2.46875,
      "learning_rate": 4.030740593627577e-05,
      "loss": 0.79,
      "step": 234940
    },
    {
      "epoch": 0.8234411745120511,
      "grad_norm": 3.15625,
      "learning_rate": 4.030675690761207e-05,
      "loss": 0.9325,
      "step": 234950
    },
    {
      "epoch": 0.8234762220189467,
      "grad_norm": 3.046875,
      "learning_rate": 4.0306107878948365e-05,
      "loss": 0.88,
      "step": 234960
    },
    {
      "epoch": 0.8235112695258423,
      "grad_norm": 3.09375,
      "learning_rate": 4.030545885028467e-05,
      "loss": 0.9505,
      "step": 234970
    },
    {
      "epoch": 0.8235463170327378,
      "grad_norm": 3.875,
      "learning_rate": 4.030480982162096e-05,
      "loss": 0.8819,
      "step": 234980
    },
    {
      "epoch": 0.8235813645396335,
      "grad_norm": 3.09375,
      "learning_rate": 4.030416079295726e-05,
      "loss": 0.8894,
      "step": 234990
    },
    {
      "epoch": 0.823616412046529,
      "grad_norm": 3.859375,
      "learning_rate": 4.0303511764293564e-05,
      "loss": 0.9809,
      "step": 235000
    },
    {
      "epoch": 0.823616412046529,
      "eval_loss": 0.834777295589447,
      "eval_runtime": 553.0207,
      "eval_samples_per_second": 687.924,
      "eval_steps_per_second": 57.327,
      "step": 235000
    },
    {
      "epoch": 0.8236514595534247,
      "grad_norm": 2.8125,
      "learning_rate": 4.030286273562986e-05,
      "loss": 0.8623,
      "step": 235010
    },
    {
      "epoch": 0.8236865070603203,
      "grad_norm": 2.625,
      "learning_rate": 4.030221370696616e-05,
      "loss": 0.8206,
      "step": 235020
    },
    {
      "epoch": 0.8237215545672159,
      "grad_norm": 3.359375,
      "learning_rate": 4.0301564678302455e-05,
      "loss": 0.9228,
      "step": 235030
    },
    {
      "epoch": 0.8237566020741115,
      "grad_norm": 2.90625,
      "learning_rate": 4.0300915649638756e-05,
      "loss": 0.8804,
      "step": 235040
    },
    {
      "epoch": 0.823791649581007,
      "grad_norm": 3.265625,
      "learning_rate": 4.030026662097505e-05,
      "loss": 0.8439,
      "step": 235050
    },
    {
      "epoch": 0.8238266970879027,
      "grad_norm": 3.09375,
      "learning_rate": 4.029961759231135e-05,
      "loss": 0.8637,
      "step": 235060
    },
    {
      "epoch": 0.8238617445947982,
      "grad_norm": 3.328125,
      "learning_rate": 4.029896856364765e-05,
      "loss": 0.8154,
      "step": 235070
    },
    {
      "epoch": 0.8238967921016939,
      "grad_norm": 3.265625,
      "learning_rate": 4.029831953498395e-05,
      "loss": 0.9006,
      "step": 235080
    },
    {
      "epoch": 0.8239318396085894,
      "grad_norm": 2.890625,
      "learning_rate": 4.029767050632024e-05,
      "loss": 0.8999,
      "step": 235090
    },
    {
      "epoch": 0.823966887115485,
      "grad_norm": 2.640625,
      "learning_rate": 4.0297021477656544e-05,
      "loss": 0.83,
      "step": 235100
    },
    {
      "epoch": 0.8240019346223806,
      "grad_norm": 2.875,
      "learning_rate": 4.029637244899284e-05,
      "loss": 0.8526,
      "step": 235110
    },
    {
      "epoch": 0.8240369821292762,
      "grad_norm": 3.03125,
      "learning_rate": 4.029572342032914e-05,
      "loss": 0.8801,
      "step": 235120
    },
    {
      "epoch": 0.8240720296361719,
      "grad_norm": 2.890625,
      "learning_rate": 4.0295074391665435e-05,
      "loss": 0.8606,
      "step": 235130
    },
    {
      "epoch": 0.8241070771430674,
      "grad_norm": 3.296875,
      "learning_rate": 4.029442536300173e-05,
      "loss": 0.8904,
      "step": 235140
    },
    {
      "epoch": 0.8241421246499631,
      "grad_norm": 2.515625,
      "learning_rate": 4.029377633433803e-05,
      "loss": 0.9601,
      "step": 235150
    },
    {
      "epoch": 0.8241771721568586,
      "grad_norm": 3.171875,
      "learning_rate": 4.0293127305674325e-05,
      "loss": 0.8904,
      "step": 235160
    },
    {
      "epoch": 0.8242122196637542,
      "grad_norm": 2.90625,
      "learning_rate": 4.029247827701063e-05,
      "loss": 0.8129,
      "step": 235170
    },
    {
      "epoch": 0.8242472671706498,
      "grad_norm": 3.296875,
      "learning_rate": 4.029182924834692e-05,
      "loss": 0.882,
      "step": 235180
    },
    {
      "epoch": 0.8242823146775454,
      "grad_norm": 3.0,
      "learning_rate": 4.029118021968322e-05,
      "loss": 0.8811,
      "step": 235190
    },
    {
      "epoch": 0.824317362184441,
      "grad_norm": 3.140625,
      "learning_rate": 4.029053119101952e-05,
      "loss": 0.8561,
      "step": 235200
    },
    {
      "epoch": 0.8243524096913366,
      "grad_norm": 3.796875,
      "learning_rate": 4.028988216235582e-05,
      "loss": 0.9037,
      "step": 235210
    },
    {
      "epoch": 0.8243874571982323,
      "grad_norm": 2.640625,
      "learning_rate": 4.028923313369212e-05,
      "loss": 0.9609,
      "step": 235220
    },
    {
      "epoch": 0.8244225047051278,
      "grad_norm": 2.984375,
      "learning_rate": 4.0288584105028415e-05,
      "loss": 0.8971,
      "step": 235230
    },
    {
      "epoch": 0.8244575522120234,
      "grad_norm": 2.671875,
      "learning_rate": 4.0287935076364716e-05,
      "loss": 0.9313,
      "step": 235240
    },
    {
      "epoch": 0.824492599718919,
      "grad_norm": 3.015625,
      "learning_rate": 4.028728604770101e-05,
      "loss": 0.963,
      "step": 235250
    },
    {
      "epoch": 0.8245276472258146,
      "grad_norm": 2.703125,
      "learning_rate": 4.028663701903731e-05,
      "loss": 0.8328,
      "step": 235260
    },
    {
      "epoch": 0.8245626947327102,
      "grad_norm": 2.796875,
      "learning_rate": 4.028598799037361e-05,
      "loss": 0.9542,
      "step": 235270
    },
    {
      "epoch": 0.8245977422396058,
      "grad_norm": 2.71875,
      "learning_rate": 4.028533896170991e-05,
      "loss": 0.8638,
      "step": 235280
    },
    {
      "epoch": 0.8246327897465013,
      "grad_norm": 2.828125,
      "learning_rate": 4.02846899330462e-05,
      "loss": 0.9184,
      "step": 235290
    },
    {
      "epoch": 0.824667837253397,
      "grad_norm": 2.90625,
      "learning_rate": 4.0284040904382504e-05,
      "loss": 0.8821,
      "step": 235300
    },
    {
      "epoch": 0.8247028847602926,
      "grad_norm": 3.0,
      "learning_rate": 4.02833918757188e-05,
      "loss": 0.9229,
      "step": 235310
    },
    {
      "epoch": 0.8247379322671882,
      "grad_norm": 2.96875,
      "learning_rate": 4.02827428470551e-05,
      "loss": 0.8662,
      "step": 235320
    },
    {
      "epoch": 0.8247729797740838,
      "grad_norm": 2.90625,
      "learning_rate": 4.0282093818391395e-05,
      "loss": 0.9028,
      "step": 235330
    },
    {
      "epoch": 0.8248080272809794,
      "grad_norm": 2.96875,
      "learning_rate": 4.0281444789727696e-05,
      "loss": 0.9064,
      "step": 235340
    },
    {
      "epoch": 0.824843074787875,
      "grad_norm": 2.90625,
      "learning_rate": 4.028079576106399e-05,
      "loss": 0.9638,
      "step": 235350
    },
    {
      "epoch": 0.8248781222947705,
      "grad_norm": 3.171875,
      "learning_rate": 4.028014673240029e-05,
      "loss": 0.9078,
      "step": 235360
    },
    {
      "epoch": 0.8249131698016662,
      "grad_norm": 3.359375,
      "learning_rate": 4.0279497703736594e-05,
      "loss": 0.8989,
      "step": 235370
    },
    {
      "epoch": 0.8249482173085617,
      "grad_norm": 3.15625,
      "learning_rate": 4.027884867507289e-05,
      "loss": 0.9287,
      "step": 235380
    },
    {
      "epoch": 0.8249832648154574,
      "grad_norm": 2.921875,
      "learning_rate": 4.027819964640919e-05,
      "loss": 0.9152,
      "step": 235390
    },
    {
      "epoch": 0.8250183123223529,
      "grad_norm": 2.78125,
      "learning_rate": 4.0277550617745484e-05,
      "loss": 0.8488,
      "step": 235400
    },
    {
      "epoch": 0.8250533598292485,
      "grad_norm": 2.75,
      "learning_rate": 4.0276901589081786e-05,
      "loss": 0.8789,
      "step": 235410
    },
    {
      "epoch": 0.8250884073361442,
      "grad_norm": 2.71875,
      "learning_rate": 4.027625256041808e-05,
      "loss": 0.8844,
      "step": 235420
    },
    {
      "epoch": 0.8251234548430397,
      "grad_norm": 2.4375,
      "learning_rate": 4.027560353175438e-05,
      "loss": 0.8632,
      "step": 235430
    },
    {
      "epoch": 0.8251585023499354,
      "grad_norm": 3.015625,
      "learning_rate": 4.0274954503090676e-05,
      "loss": 0.8831,
      "step": 235440
    },
    {
      "epoch": 0.8251935498568309,
      "grad_norm": 2.890625,
      "learning_rate": 4.027430547442698e-05,
      "loss": 0.8763,
      "step": 235450
    },
    {
      "epoch": 0.8252285973637266,
      "grad_norm": 2.921875,
      "learning_rate": 4.027365644576327e-05,
      "loss": 0.8602,
      "step": 235460
    },
    {
      "epoch": 0.8252636448706221,
      "grad_norm": 3.21875,
      "learning_rate": 4.0273007417099574e-05,
      "loss": 0.8305,
      "step": 235470
    },
    {
      "epoch": 0.8252986923775177,
      "grad_norm": 2.9375,
      "learning_rate": 4.027235838843587e-05,
      "loss": 0.923,
      "step": 235480
    },
    {
      "epoch": 0.8253337398844133,
      "grad_norm": 2.96875,
      "learning_rate": 4.027170935977217e-05,
      "loss": 0.9128,
      "step": 235490
    },
    {
      "epoch": 0.8253687873913089,
      "grad_norm": 2.703125,
      "learning_rate": 4.027106033110847e-05,
      "loss": 0.9239,
      "step": 235500
    },
    {
      "epoch": 0.8254038348982046,
      "grad_norm": 3.09375,
      "learning_rate": 4.027041130244476e-05,
      "loss": 0.8845,
      "step": 235510
    },
    {
      "epoch": 0.8254388824051001,
      "grad_norm": 3.0,
      "learning_rate": 4.026976227378106e-05,
      "loss": 0.8976,
      "step": 235520
    },
    {
      "epoch": 0.8254739299119958,
      "grad_norm": 2.625,
      "learning_rate": 4.0269113245117355e-05,
      "loss": 0.7913,
      "step": 235530
    },
    {
      "epoch": 0.8255089774188913,
      "grad_norm": 2.984375,
      "learning_rate": 4.0268464216453656e-05,
      "loss": 0.8568,
      "step": 235540
    },
    {
      "epoch": 0.8255440249257869,
      "grad_norm": 2.546875,
      "learning_rate": 4.026781518778995e-05,
      "loss": 0.852,
      "step": 235550
    },
    {
      "epoch": 0.8255790724326825,
      "grad_norm": 3.0625,
      "learning_rate": 4.026716615912625e-05,
      "loss": 0.9327,
      "step": 235560
    },
    {
      "epoch": 0.8256141199395781,
      "grad_norm": 2.890625,
      "learning_rate": 4.026651713046255e-05,
      "loss": 0.8963,
      "step": 235570
    },
    {
      "epoch": 0.8256491674464737,
      "grad_norm": 2.546875,
      "learning_rate": 4.026586810179885e-05,
      "loss": 1.0007,
      "step": 235580
    },
    {
      "epoch": 0.8256842149533693,
      "grad_norm": 2.59375,
      "learning_rate": 4.026521907313515e-05,
      "loss": 0.9102,
      "step": 235590
    },
    {
      "epoch": 0.8257192624602648,
      "grad_norm": 3.484375,
      "learning_rate": 4.0264570044471444e-05,
      "loss": 0.9774,
      "step": 235600
    },
    {
      "epoch": 0.8257543099671605,
      "grad_norm": 2.59375,
      "learning_rate": 4.0263921015807746e-05,
      "loss": 0.8318,
      "step": 235610
    },
    {
      "epoch": 0.8257893574740561,
      "grad_norm": 2.875,
      "learning_rate": 4.026327198714404e-05,
      "loss": 0.9384,
      "step": 235620
    },
    {
      "epoch": 0.8258244049809517,
      "grad_norm": 3.15625,
      "learning_rate": 4.026262295848034e-05,
      "loss": 0.8514,
      "step": 235630
    },
    {
      "epoch": 0.8258594524878473,
      "grad_norm": 2.8125,
      "learning_rate": 4.0261973929816636e-05,
      "loss": 0.9083,
      "step": 235640
    },
    {
      "epoch": 0.8258944999947428,
      "grad_norm": 3.203125,
      "learning_rate": 4.026132490115294e-05,
      "loss": 0.865,
      "step": 235650
    },
    {
      "epoch": 0.8259295475016385,
      "grad_norm": 3.546875,
      "learning_rate": 4.026067587248923e-05,
      "loss": 0.9576,
      "step": 235660
    },
    {
      "epoch": 0.825964595008534,
      "grad_norm": 3.171875,
      "learning_rate": 4.0260026843825534e-05,
      "loss": 0.8716,
      "step": 235670
    },
    {
      "epoch": 0.8259996425154297,
      "grad_norm": 2.796875,
      "learning_rate": 4.025937781516183e-05,
      "loss": 0.8992,
      "step": 235680
    },
    {
      "epoch": 0.8260346900223252,
      "grad_norm": 2.796875,
      "learning_rate": 4.025872878649813e-05,
      "loss": 0.8814,
      "step": 235690
    },
    {
      "epoch": 0.8260697375292209,
      "grad_norm": 3.171875,
      "learning_rate": 4.0258079757834424e-05,
      "loss": 0.9702,
      "step": 235700
    },
    {
      "epoch": 0.8261047850361165,
      "grad_norm": 2.9375,
      "learning_rate": 4.0257430729170726e-05,
      "loss": 0.8519,
      "step": 235710
    },
    {
      "epoch": 0.826139832543012,
      "grad_norm": 3.25,
      "learning_rate": 4.025678170050703e-05,
      "loss": 0.9319,
      "step": 235720
    },
    {
      "epoch": 0.8261748800499077,
      "grad_norm": 2.859375,
      "learning_rate": 4.025613267184332e-05,
      "loss": 0.9595,
      "step": 235730
    },
    {
      "epoch": 0.8262099275568032,
      "grad_norm": 2.796875,
      "learning_rate": 4.025548364317962e-05,
      "loss": 0.8551,
      "step": 235740
    },
    {
      "epoch": 0.8262449750636989,
      "grad_norm": 2.9375,
      "learning_rate": 4.025483461451592e-05,
      "loss": 0.8336,
      "step": 235750
    },
    {
      "epoch": 0.8262800225705944,
      "grad_norm": 2.75,
      "learning_rate": 4.025418558585222e-05,
      "loss": 0.8783,
      "step": 235760
    },
    {
      "epoch": 0.82631507007749,
      "grad_norm": 3.1875,
      "learning_rate": 4.0253536557188514e-05,
      "loss": 0.8884,
      "step": 235770
    },
    {
      "epoch": 0.8263501175843856,
      "grad_norm": 2.984375,
      "learning_rate": 4.0252887528524815e-05,
      "loss": 0.9439,
      "step": 235780
    },
    {
      "epoch": 0.8263851650912812,
      "grad_norm": 2.953125,
      "learning_rate": 4.025223849986111e-05,
      "loss": 0.9405,
      "step": 235790
    },
    {
      "epoch": 0.8264202125981768,
      "grad_norm": 2.671875,
      "learning_rate": 4.025158947119741e-05,
      "loss": 0.8791,
      "step": 235800
    },
    {
      "epoch": 0.8264552601050724,
      "grad_norm": 2.84375,
      "learning_rate": 4.0250940442533706e-05,
      "loss": 0.9007,
      "step": 235810
    },
    {
      "epoch": 0.8264903076119681,
      "grad_norm": 3.578125,
      "learning_rate": 4.025029141387001e-05,
      "loss": 0.9755,
      "step": 235820
    },
    {
      "epoch": 0.8265253551188636,
      "grad_norm": 2.703125,
      "learning_rate": 4.02496423852063e-05,
      "loss": 0.8564,
      "step": 235830
    },
    {
      "epoch": 0.8265604026257592,
      "grad_norm": 3.53125,
      "learning_rate": 4.02489933565426e-05,
      "loss": 0.89,
      "step": 235840
    },
    {
      "epoch": 0.8265954501326548,
      "grad_norm": 2.546875,
      "learning_rate": 4.02483443278789e-05,
      "loss": 0.8684,
      "step": 235850
    },
    {
      "epoch": 0.8266304976395504,
      "grad_norm": 2.734375,
      "learning_rate": 4.02476952992152e-05,
      "loss": 0.938,
      "step": 235860
    },
    {
      "epoch": 0.826665545146446,
      "grad_norm": 3.28125,
      "learning_rate": 4.02470462705515e-05,
      "loss": 0.879,
      "step": 235870
    },
    {
      "epoch": 0.8267005926533416,
      "grad_norm": 2.84375,
      "learning_rate": 4.024639724188779e-05,
      "loss": 0.9147,
      "step": 235880
    },
    {
      "epoch": 0.8267356401602372,
      "grad_norm": 3.03125,
      "learning_rate": 4.024574821322409e-05,
      "loss": 0.9074,
      "step": 235890
    },
    {
      "epoch": 0.8267706876671328,
      "grad_norm": 2.609375,
      "learning_rate": 4.0245099184560384e-05,
      "loss": 0.9575,
      "step": 235900
    },
    {
      "epoch": 0.8268057351740284,
      "grad_norm": 2.9375,
      "learning_rate": 4.0244450155896686e-05,
      "loss": 0.9331,
      "step": 235910
    },
    {
      "epoch": 0.826840782680924,
      "grad_norm": 3.109375,
      "learning_rate": 4.024380112723298e-05,
      "loss": 0.8147,
      "step": 235920
    },
    {
      "epoch": 0.8268758301878196,
      "grad_norm": 3.0,
      "learning_rate": 4.024315209856928e-05,
      "loss": 0.8459,
      "step": 235930
    },
    {
      "epoch": 0.8269108776947152,
      "grad_norm": 3.1875,
      "learning_rate": 4.0242503069905576e-05,
      "loss": 0.969,
      "step": 235940
    },
    {
      "epoch": 0.8269459252016108,
      "grad_norm": 3.203125,
      "learning_rate": 4.024185404124188e-05,
      "loss": 0.8757,
      "step": 235950
    },
    {
      "epoch": 0.8269809727085063,
      "grad_norm": 2.78125,
      "learning_rate": 4.024120501257818e-05,
      "loss": 0.9056,
      "step": 235960
    },
    {
      "epoch": 0.827016020215402,
      "grad_norm": 3.359375,
      "learning_rate": 4.0240555983914474e-05,
      "loss": 0.9729,
      "step": 235970
    },
    {
      "epoch": 0.8270510677222975,
      "grad_norm": 3.15625,
      "learning_rate": 4.0239906955250775e-05,
      "loss": 0.9348,
      "step": 235980
    },
    {
      "epoch": 0.8270861152291932,
      "grad_norm": 2.96875,
      "learning_rate": 4.023925792658707e-05,
      "loss": 0.9565,
      "step": 235990
    },
    {
      "epoch": 0.8271211627360888,
      "grad_norm": 3.21875,
      "learning_rate": 4.023860889792337e-05,
      "loss": 0.9161,
      "step": 236000
    },
    {
      "epoch": 0.8271562102429844,
      "grad_norm": 3.015625,
      "learning_rate": 4.0237959869259666e-05,
      "loss": 0.9044,
      "step": 236010
    },
    {
      "epoch": 0.82719125774988,
      "grad_norm": 3.21875,
      "learning_rate": 4.023731084059597e-05,
      "loss": 0.8643,
      "step": 236020
    },
    {
      "epoch": 0.8272263052567755,
      "grad_norm": 3.09375,
      "learning_rate": 4.023666181193226e-05,
      "loss": 0.955,
      "step": 236030
    },
    {
      "epoch": 0.8272613527636712,
      "grad_norm": 3.046875,
      "learning_rate": 4.023601278326856e-05,
      "loss": 0.9812,
      "step": 236040
    },
    {
      "epoch": 0.8272964002705667,
      "grad_norm": 3.015625,
      "learning_rate": 4.023536375460486e-05,
      "loss": 0.9701,
      "step": 236050
    },
    {
      "epoch": 0.8273314477774624,
      "grad_norm": 2.890625,
      "learning_rate": 4.023471472594116e-05,
      "loss": 0.8725,
      "step": 236060
    },
    {
      "epoch": 0.8273664952843579,
      "grad_norm": 3.0,
      "learning_rate": 4.0234065697277454e-05,
      "loss": 0.8849,
      "step": 236070
    },
    {
      "epoch": 0.8274015427912536,
      "grad_norm": 3.0625,
      "learning_rate": 4.0233416668613755e-05,
      "loss": 0.8729,
      "step": 236080
    },
    {
      "epoch": 0.8274365902981491,
      "grad_norm": 3.078125,
      "learning_rate": 4.0232767639950056e-05,
      "loss": 0.8611,
      "step": 236090
    },
    {
      "epoch": 0.8274716378050447,
      "grad_norm": 2.953125,
      "learning_rate": 4.023211861128635e-05,
      "loss": 0.8786,
      "step": 236100
    },
    {
      "epoch": 0.8275066853119404,
      "grad_norm": 3.0,
      "learning_rate": 4.023146958262265e-05,
      "loss": 0.8987,
      "step": 236110
    },
    {
      "epoch": 0.8275417328188359,
      "grad_norm": 2.921875,
      "learning_rate": 4.023082055395895e-05,
      "loss": 0.911,
      "step": 236120
    },
    {
      "epoch": 0.8275767803257316,
      "grad_norm": 3.453125,
      "learning_rate": 4.023017152529525e-05,
      "loss": 0.8626,
      "step": 236130
    },
    {
      "epoch": 0.8276118278326271,
      "grad_norm": 2.71875,
      "learning_rate": 4.022952249663154e-05,
      "loss": 0.8284,
      "step": 236140
    },
    {
      "epoch": 0.8276468753395227,
      "grad_norm": 3.0625,
      "learning_rate": 4.0228873467967844e-05,
      "loss": 0.8528,
      "step": 236150
    },
    {
      "epoch": 0.8276819228464183,
      "grad_norm": 2.578125,
      "learning_rate": 4.022822443930414e-05,
      "loss": 0.857,
      "step": 236160
    },
    {
      "epoch": 0.8277169703533139,
      "grad_norm": 2.671875,
      "learning_rate": 4.022757541064044e-05,
      "loss": 0.8835,
      "step": 236170
    },
    {
      "epoch": 0.8277520178602095,
      "grad_norm": 2.6875,
      "learning_rate": 4.0226926381976735e-05,
      "loss": 0.8487,
      "step": 236180
    },
    {
      "epoch": 0.8277870653671051,
      "grad_norm": 3.078125,
      "learning_rate": 4.0226277353313036e-05,
      "loss": 0.9491,
      "step": 236190
    },
    {
      "epoch": 0.8278221128740008,
      "grad_norm": 3.234375,
      "learning_rate": 4.022562832464933e-05,
      "loss": 0.8815,
      "step": 236200
    },
    {
      "epoch": 0.8278571603808963,
      "grad_norm": 2.796875,
      "learning_rate": 4.022497929598563e-05,
      "loss": 0.8669,
      "step": 236210
    },
    {
      "epoch": 0.8278922078877919,
      "grad_norm": 2.859375,
      "learning_rate": 4.022433026732193e-05,
      "loss": 0.8932,
      "step": 236220
    },
    {
      "epoch": 0.8279272553946875,
      "grad_norm": 3.21875,
      "learning_rate": 4.022368123865823e-05,
      "loss": 0.876,
      "step": 236230
    },
    {
      "epoch": 0.8279623029015831,
      "grad_norm": 3.015625,
      "learning_rate": 4.022303220999453e-05,
      "loss": 0.959,
      "step": 236240
    },
    {
      "epoch": 0.8279973504084787,
      "grad_norm": 3.265625,
      "learning_rate": 4.0222383181330824e-05,
      "loss": 0.8635,
      "step": 236250
    },
    {
      "epoch": 0.8280323979153743,
      "grad_norm": 2.84375,
      "learning_rate": 4.022173415266712e-05,
      "loss": 0.8804,
      "step": 236260
    },
    {
      "epoch": 0.8280674454222698,
      "grad_norm": 2.84375,
      "learning_rate": 4.0221085124003414e-05,
      "loss": 0.8984,
      "step": 236270
    },
    {
      "epoch": 0.8281024929291655,
      "grad_norm": 2.921875,
      "learning_rate": 4.0220436095339715e-05,
      "loss": 0.8503,
      "step": 236280
    },
    {
      "epoch": 0.828137540436061,
      "grad_norm": 2.6875,
      "learning_rate": 4.021978706667601e-05,
      "loss": 0.8739,
      "step": 236290
    },
    {
      "epoch": 0.8281725879429567,
      "grad_norm": 3.296875,
      "learning_rate": 4.021913803801231e-05,
      "loss": 1.0229,
      "step": 236300
    },
    {
      "epoch": 0.8282076354498523,
      "grad_norm": 2.828125,
      "learning_rate": 4.0218489009348606e-05,
      "loss": 0.9867,
      "step": 236310
    },
    {
      "epoch": 0.8282426829567479,
      "grad_norm": 2.78125,
      "learning_rate": 4.021783998068491e-05,
      "loss": 0.8974,
      "step": 236320
    },
    {
      "epoch": 0.8282777304636435,
      "grad_norm": 3.265625,
      "learning_rate": 4.021719095202121e-05,
      "loss": 0.9388,
      "step": 236330
    },
    {
      "epoch": 0.828312777970539,
      "grad_norm": 2.59375,
      "learning_rate": 4.02165419233575e-05,
      "loss": 0.9357,
      "step": 236340
    },
    {
      "epoch": 0.8283478254774347,
      "grad_norm": 3.046875,
      "learning_rate": 4.0215892894693804e-05,
      "loss": 0.855,
      "step": 236350
    },
    {
      "epoch": 0.8283828729843302,
      "grad_norm": 2.921875,
      "learning_rate": 4.02152438660301e-05,
      "loss": 0.9173,
      "step": 236360
    },
    {
      "epoch": 0.8284179204912259,
      "grad_norm": 2.9375,
      "learning_rate": 4.02145948373664e-05,
      "loss": 0.8964,
      "step": 236370
    },
    {
      "epoch": 0.8284529679981214,
      "grad_norm": 3.21875,
      "learning_rate": 4.0213945808702695e-05,
      "loss": 0.8578,
      "step": 236380
    },
    {
      "epoch": 0.828488015505017,
      "grad_norm": 2.828125,
      "learning_rate": 4.0213296780038996e-05,
      "loss": 0.8311,
      "step": 236390
    },
    {
      "epoch": 0.8285230630119127,
      "grad_norm": 2.53125,
      "learning_rate": 4.021264775137529e-05,
      "loss": 0.7928,
      "step": 236400
    },
    {
      "epoch": 0.8285581105188082,
      "grad_norm": 2.921875,
      "learning_rate": 4.021199872271159e-05,
      "loss": 0.9166,
      "step": 236410
    },
    {
      "epoch": 0.8285931580257039,
      "grad_norm": 2.515625,
      "learning_rate": 4.021134969404789e-05,
      "loss": 0.8656,
      "step": 236420
    },
    {
      "epoch": 0.8286282055325994,
      "grad_norm": 3.28125,
      "learning_rate": 4.021070066538419e-05,
      "loss": 0.9196,
      "step": 236430
    },
    {
      "epoch": 0.8286632530394951,
      "grad_norm": 2.953125,
      "learning_rate": 4.021005163672048e-05,
      "loss": 0.9415,
      "step": 236440
    },
    {
      "epoch": 0.8286983005463906,
      "grad_norm": 2.78125,
      "learning_rate": 4.0209402608056784e-05,
      "loss": 0.862,
      "step": 236450
    },
    {
      "epoch": 0.8287333480532862,
      "grad_norm": 3.359375,
      "learning_rate": 4.0208753579393086e-05,
      "loss": 0.8526,
      "step": 236460
    },
    {
      "epoch": 0.8287683955601818,
      "grad_norm": 2.515625,
      "learning_rate": 4.020810455072938e-05,
      "loss": 0.8885,
      "step": 236470
    },
    {
      "epoch": 0.8288034430670774,
      "grad_norm": 3.1875,
      "learning_rate": 4.020745552206568e-05,
      "loss": 0.9032,
      "step": 236480
    },
    {
      "epoch": 0.8288384905739731,
      "grad_norm": 3.078125,
      "learning_rate": 4.0206806493401976e-05,
      "loss": 0.8453,
      "step": 236490
    },
    {
      "epoch": 0.8288735380808686,
      "grad_norm": 3.140625,
      "learning_rate": 4.020615746473828e-05,
      "loss": 0.9377,
      "step": 236500
    },
    {
      "epoch": 0.8289085855877643,
      "grad_norm": 2.84375,
      "learning_rate": 4.020550843607457e-05,
      "loss": 0.8668,
      "step": 236510
    },
    {
      "epoch": 0.8289436330946598,
      "grad_norm": 3.125,
      "learning_rate": 4.0204859407410874e-05,
      "loss": 0.9306,
      "step": 236520
    },
    {
      "epoch": 0.8289786806015554,
      "grad_norm": 3.15625,
      "learning_rate": 4.020421037874717e-05,
      "loss": 0.8387,
      "step": 236530
    },
    {
      "epoch": 0.829013728108451,
      "grad_norm": 2.6875,
      "learning_rate": 4.020356135008347e-05,
      "loss": 0.8674,
      "step": 236540
    },
    {
      "epoch": 0.8290487756153466,
      "grad_norm": 3.53125,
      "learning_rate": 4.0202912321419764e-05,
      "loss": 0.9668,
      "step": 236550
    },
    {
      "epoch": 0.8290838231222422,
      "grad_norm": 2.546875,
      "learning_rate": 4.0202263292756066e-05,
      "loss": 0.8227,
      "step": 236560
    },
    {
      "epoch": 0.8291188706291378,
      "grad_norm": 3.25,
      "learning_rate": 4.020161426409236e-05,
      "loss": 0.9449,
      "step": 236570
    },
    {
      "epoch": 0.8291539181360333,
      "grad_norm": 3.203125,
      "learning_rate": 4.020096523542866e-05,
      "loss": 0.8974,
      "step": 236580
    },
    {
      "epoch": 0.829188965642929,
      "grad_norm": 2.96875,
      "learning_rate": 4.0200316206764956e-05,
      "loss": 0.9227,
      "step": 236590
    },
    {
      "epoch": 0.8292240131498246,
      "grad_norm": 2.75,
      "learning_rate": 4.019966717810126e-05,
      "loss": 0.9466,
      "step": 236600
    },
    {
      "epoch": 0.8292590606567202,
      "grad_norm": 2.546875,
      "learning_rate": 4.019901814943756e-05,
      "loss": 0.8673,
      "step": 236610
    },
    {
      "epoch": 0.8292941081636158,
      "grad_norm": 3.078125,
      "learning_rate": 4.0198369120773854e-05,
      "loss": 0.9154,
      "step": 236620
    },
    {
      "epoch": 0.8293291556705114,
      "grad_norm": 3.046875,
      "learning_rate": 4.019772009211015e-05,
      "loss": 0.8793,
      "step": 236630
    },
    {
      "epoch": 0.829364203177407,
      "grad_norm": 2.890625,
      "learning_rate": 4.019707106344644e-05,
      "loss": 0.8483,
      "step": 236640
    },
    {
      "epoch": 0.8293992506843025,
      "grad_norm": 2.953125,
      "learning_rate": 4.0196422034782744e-05,
      "loss": 0.992,
      "step": 236650
    },
    {
      "epoch": 0.8294342981911982,
      "grad_norm": 2.953125,
      "learning_rate": 4.019577300611904e-05,
      "loss": 0.8889,
      "step": 236660
    },
    {
      "epoch": 0.8294693456980937,
      "grad_norm": 2.421875,
      "learning_rate": 4.019512397745534e-05,
      "loss": 0.8116,
      "step": 236670
    },
    {
      "epoch": 0.8295043932049894,
      "grad_norm": 3.59375,
      "learning_rate": 4.019447494879164e-05,
      "loss": 0.8918,
      "step": 236680
    },
    {
      "epoch": 0.829539440711885,
      "grad_norm": 2.96875,
      "learning_rate": 4.0193825920127936e-05,
      "loss": 0.9052,
      "step": 236690
    },
    {
      "epoch": 0.8295744882187805,
      "grad_norm": 3.390625,
      "learning_rate": 4.019317689146424e-05,
      "loss": 0.9238,
      "step": 236700
    },
    {
      "epoch": 0.8296095357256762,
      "grad_norm": 2.703125,
      "learning_rate": 4.019252786280053e-05,
      "loss": 0.8357,
      "step": 236710
    },
    {
      "epoch": 0.8296445832325717,
      "grad_norm": 2.796875,
      "learning_rate": 4.0191878834136834e-05,
      "loss": 0.8756,
      "step": 236720
    },
    {
      "epoch": 0.8296796307394674,
      "grad_norm": 2.71875,
      "learning_rate": 4.019122980547313e-05,
      "loss": 0.8077,
      "step": 236730
    },
    {
      "epoch": 0.8297146782463629,
      "grad_norm": 2.671875,
      "learning_rate": 4.019058077680943e-05,
      "loss": 0.8601,
      "step": 236740
    },
    {
      "epoch": 0.8297497257532586,
      "grad_norm": 2.578125,
      "learning_rate": 4.0189931748145724e-05,
      "loss": 0.9875,
      "step": 236750
    },
    {
      "epoch": 0.8297847732601541,
      "grad_norm": 2.796875,
      "learning_rate": 4.0189282719482026e-05,
      "loss": 0.889,
      "step": 236760
    },
    {
      "epoch": 0.8298198207670497,
      "grad_norm": 3.515625,
      "learning_rate": 4.018863369081832e-05,
      "loss": 0.9282,
      "step": 236770
    },
    {
      "epoch": 0.8298548682739453,
      "grad_norm": 3.28125,
      "learning_rate": 4.018798466215462e-05,
      "loss": 1.0016,
      "step": 236780
    },
    {
      "epoch": 0.8298899157808409,
      "grad_norm": 2.5,
      "learning_rate": 4.0187335633490916e-05,
      "loss": 0.8327,
      "step": 236790
    },
    {
      "epoch": 0.8299249632877366,
      "grad_norm": 2.859375,
      "learning_rate": 4.018668660482722e-05,
      "loss": 0.7908,
      "step": 236800
    },
    {
      "epoch": 0.8299600107946321,
      "grad_norm": 2.828125,
      "learning_rate": 4.018603757616351e-05,
      "loss": 0.8961,
      "step": 236810
    },
    {
      "epoch": 0.8299950583015278,
      "grad_norm": 2.5625,
      "learning_rate": 4.0185388547499814e-05,
      "loss": 0.8344,
      "step": 236820
    },
    {
      "epoch": 0.8300301058084233,
      "grad_norm": 2.609375,
      "learning_rate": 4.0184739518836115e-05,
      "loss": 0.882,
      "step": 236830
    },
    {
      "epoch": 0.8300651533153189,
      "grad_norm": 3.015625,
      "learning_rate": 4.018409049017241e-05,
      "loss": 0.8411,
      "step": 236840
    },
    {
      "epoch": 0.8301002008222145,
      "grad_norm": 3.015625,
      "learning_rate": 4.018344146150871e-05,
      "loss": 0.8909,
      "step": 236850
    },
    {
      "epoch": 0.8301352483291101,
      "grad_norm": 2.609375,
      "learning_rate": 4.0182792432845006e-05,
      "loss": 0.9557,
      "step": 236860
    },
    {
      "epoch": 0.8301702958360057,
      "grad_norm": 3.046875,
      "learning_rate": 4.018214340418131e-05,
      "loss": 0.8622,
      "step": 236870
    },
    {
      "epoch": 0.8302053433429013,
      "grad_norm": 2.71875,
      "learning_rate": 4.01814943755176e-05,
      "loss": 0.9466,
      "step": 236880
    },
    {
      "epoch": 0.830240390849797,
      "grad_norm": 2.65625,
      "learning_rate": 4.01808453468539e-05,
      "loss": 0.8892,
      "step": 236890
    },
    {
      "epoch": 0.8302754383566925,
      "grad_norm": 2.234375,
      "learning_rate": 4.01801963181902e-05,
      "loss": 0.8478,
      "step": 236900
    },
    {
      "epoch": 0.8303104858635881,
      "grad_norm": 2.828125,
      "learning_rate": 4.01795472895265e-05,
      "loss": 0.9383,
      "step": 236910
    },
    {
      "epoch": 0.8303455333704837,
      "grad_norm": 2.90625,
      "learning_rate": 4.0178898260862794e-05,
      "loss": 0.9014,
      "step": 236920
    },
    {
      "epoch": 0.8303805808773793,
      "grad_norm": 2.859375,
      "learning_rate": 4.0178249232199095e-05,
      "loss": 1.0084,
      "step": 236930
    },
    {
      "epoch": 0.8304156283842749,
      "grad_norm": 3.125,
      "learning_rate": 4.017760020353539e-05,
      "loss": 0.9413,
      "step": 236940
    },
    {
      "epoch": 0.8304506758911705,
      "grad_norm": 3.21875,
      "learning_rate": 4.017695117487169e-05,
      "loss": 0.8866,
      "step": 236950
    },
    {
      "epoch": 0.830485723398066,
      "grad_norm": 2.984375,
      "learning_rate": 4.017630214620799e-05,
      "loss": 0.8479,
      "step": 236960
    },
    {
      "epoch": 0.8305207709049617,
      "grad_norm": 3.125,
      "learning_rate": 4.017565311754429e-05,
      "loss": 0.9629,
      "step": 236970
    },
    {
      "epoch": 0.8305558184118573,
      "grad_norm": 3.125,
      "learning_rate": 4.017500408888059e-05,
      "loss": 0.9513,
      "step": 236980
    },
    {
      "epoch": 0.8305908659187529,
      "grad_norm": 3.28125,
      "learning_rate": 4.017435506021688e-05,
      "loss": 0.8774,
      "step": 236990
    },
    {
      "epoch": 0.8306259134256485,
      "grad_norm": 3.09375,
      "learning_rate": 4.0173706031553184e-05,
      "loss": 0.9865,
      "step": 237000
    },
    {
      "epoch": 0.830660960932544,
      "grad_norm": 3.015625,
      "learning_rate": 4.017305700288947e-05,
      "loss": 0.8471,
      "step": 237010
    },
    {
      "epoch": 0.8306960084394397,
      "grad_norm": 2.828125,
      "learning_rate": 4.0172407974225774e-05,
      "loss": 0.903,
      "step": 237020
    },
    {
      "epoch": 0.8307310559463352,
      "grad_norm": 3.6875,
      "learning_rate": 4.017175894556207e-05,
      "loss": 0.9274,
      "step": 237030
    },
    {
      "epoch": 0.8307661034532309,
      "grad_norm": 3.015625,
      "learning_rate": 4.017110991689837e-05,
      "loss": 0.8334,
      "step": 237040
    },
    {
      "epoch": 0.8308011509601264,
      "grad_norm": 3.3125,
      "learning_rate": 4.017046088823467e-05,
      "loss": 0.9492,
      "step": 237050
    },
    {
      "epoch": 0.8308361984670221,
      "grad_norm": 2.890625,
      "learning_rate": 4.0169811859570966e-05,
      "loss": 0.892,
      "step": 237060
    },
    {
      "epoch": 0.8308712459739176,
      "grad_norm": 2.765625,
      "learning_rate": 4.016916283090727e-05,
      "loss": 0.9304,
      "step": 237070
    },
    {
      "epoch": 0.8309062934808132,
      "grad_norm": 3.0,
      "learning_rate": 4.016851380224356e-05,
      "loss": 0.8323,
      "step": 237080
    },
    {
      "epoch": 0.8309413409877089,
      "grad_norm": 2.8125,
      "learning_rate": 4.016786477357986e-05,
      "loss": 0.933,
      "step": 237090
    },
    {
      "epoch": 0.8309763884946044,
      "grad_norm": 2.953125,
      "learning_rate": 4.016721574491616e-05,
      "loss": 0.9195,
      "step": 237100
    },
    {
      "epoch": 0.8310114360015001,
      "grad_norm": 3.125,
      "learning_rate": 4.016656671625246e-05,
      "loss": 0.9028,
      "step": 237110
    },
    {
      "epoch": 0.8310464835083956,
      "grad_norm": 3.421875,
      "learning_rate": 4.0165917687588754e-05,
      "loss": 0.8766,
      "step": 237120
    },
    {
      "epoch": 0.8310815310152913,
      "grad_norm": 2.578125,
      "learning_rate": 4.0165268658925055e-05,
      "loss": 0.8334,
      "step": 237130
    },
    {
      "epoch": 0.8311165785221868,
      "grad_norm": 2.859375,
      "learning_rate": 4.016461963026135e-05,
      "loss": 0.8129,
      "step": 237140
    },
    {
      "epoch": 0.8311516260290824,
      "grad_norm": 2.78125,
      "learning_rate": 4.016397060159765e-05,
      "loss": 0.9159,
      "step": 237150
    },
    {
      "epoch": 0.831186673535978,
      "grad_norm": 3.09375,
      "learning_rate": 4.0163321572933946e-05,
      "loss": 0.8555,
      "step": 237160
    },
    {
      "epoch": 0.8312217210428736,
      "grad_norm": 2.96875,
      "learning_rate": 4.016267254427025e-05,
      "loss": 0.8534,
      "step": 237170
    },
    {
      "epoch": 0.8312567685497693,
      "grad_norm": 2.703125,
      "learning_rate": 4.016202351560654e-05,
      "loss": 0.8322,
      "step": 237180
    },
    {
      "epoch": 0.8312918160566648,
      "grad_norm": 2.78125,
      "learning_rate": 4.016137448694284e-05,
      "loss": 0.8927,
      "step": 237190
    },
    {
      "epoch": 0.8313268635635604,
      "grad_norm": 2.953125,
      "learning_rate": 4.0160725458279144e-05,
      "loss": 0.8797,
      "step": 237200
    },
    {
      "epoch": 0.831361911070456,
      "grad_norm": 2.96875,
      "learning_rate": 4.016007642961544e-05,
      "loss": 0.9152,
      "step": 237210
    },
    {
      "epoch": 0.8313969585773516,
      "grad_norm": 3.078125,
      "learning_rate": 4.015942740095174e-05,
      "loss": 0.8726,
      "step": 237220
    },
    {
      "epoch": 0.8314320060842472,
      "grad_norm": 2.453125,
      "learning_rate": 4.0158778372288035e-05,
      "loss": 0.9344,
      "step": 237230
    },
    {
      "epoch": 0.8314670535911428,
      "grad_norm": 2.921875,
      "learning_rate": 4.0158129343624336e-05,
      "loss": 0.9044,
      "step": 237240
    },
    {
      "epoch": 0.8315021010980383,
      "grad_norm": 2.578125,
      "learning_rate": 4.015748031496063e-05,
      "loss": 0.7962,
      "step": 237250
    },
    {
      "epoch": 0.831537148604934,
      "grad_norm": 2.8125,
      "learning_rate": 4.015683128629693e-05,
      "loss": 0.836,
      "step": 237260
    },
    {
      "epoch": 0.8315721961118295,
      "grad_norm": 3.25,
      "learning_rate": 4.015618225763323e-05,
      "loss": 0.9443,
      "step": 237270
    },
    {
      "epoch": 0.8316072436187252,
      "grad_norm": 2.890625,
      "learning_rate": 4.015553322896953e-05,
      "loss": 0.9192,
      "step": 237280
    },
    {
      "epoch": 0.8316422911256208,
      "grad_norm": 2.765625,
      "learning_rate": 4.015488420030582e-05,
      "loss": 0.8883,
      "step": 237290
    },
    {
      "epoch": 0.8316773386325164,
      "grad_norm": 3.03125,
      "learning_rate": 4.0154235171642124e-05,
      "loss": 0.9556,
      "step": 237300
    },
    {
      "epoch": 0.831712386139412,
      "grad_norm": 2.78125,
      "learning_rate": 4.015358614297842e-05,
      "loss": 0.8087,
      "step": 237310
    },
    {
      "epoch": 0.8317474336463075,
      "grad_norm": 3.234375,
      "learning_rate": 4.015293711431472e-05,
      "loss": 0.8896,
      "step": 237320
    },
    {
      "epoch": 0.8317824811532032,
      "grad_norm": 3.234375,
      "learning_rate": 4.015228808565102e-05,
      "loss": 0.8645,
      "step": 237330
    },
    {
      "epoch": 0.8318175286600987,
      "grad_norm": 3.40625,
      "learning_rate": 4.0151639056987316e-05,
      "loss": 0.9154,
      "step": 237340
    },
    {
      "epoch": 0.8318525761669944,
      "grad_norm": 3.15625,
      "learning_rate": 4.015099002832362e-05,
      "loss": 0.9136,
      "step": 237350
    },
    {
      "epoch": 0.8318876236738899,
      "grad_norm": 2.703125,
      "learning_rate": 4.015034099965991e-05,
      "loss": 0.9187,
      "step": 237360
    },
    {
      "epoch": 0.8319226711807856,
      "grad_norm": 3.046875,
      "learning_rate": 4.0149691970996214e-05,
      "loss": 0.871,
      "step": 237370
    },
    {
      "epoch": 0.8319577186876812,
      "grad_norm": 2.75,
      "learning_rate": 4.014904294233251e-05,
      "loss": 0.8942,
      "step": 237380
    },
    {
      "epoch": 0.8319927661945767,
      "grad_norm": 3.21875,
      "learning_rate": 4.01483939136688e-05,
      "loss": 0.8963,
      "step": 237390
    },
    {
      "epoch": 0.8320278137014724,
      "grad_norm": 2.828125,
      "learning_rate": 4.01477448850051e-05,
      "loss": 0.8692,
      "step": 237400
    },
    {
      "epoch": 0.8320628612083679,
      "grad_norm": 2.859375,
      "learning_rate": 4.01470958563414e-05,
      "loss": 0.9034,
      "step": 237410
    },
    {
      "epoch": 0.8320979087152636,
      "grad_norm": 2.640625,
      "learning_rate": 4.01464468276777e-05,
      "loss": 0.885,
      "step": 237420
    },
    {
      "epoch": 0.8321329562221591,
      "grad_norm": 3.015625,
      "learning_rate": 4.0145797799013995e-05,
      "loss": 0.8796,
      "step": 237430
    },
    {
      "epoch": 0.8321680037290548,
      "grad_norm": 2.875,
      "learning_rate": 4.0145148770350296e-05,
      "loss": 0.8746,
      "step": 237440
    },
    {
      "epoch": 0.8322030512359503,
      "grad_norm": 2.796875,
      "learning_rate": 4.014449974168659e-05,
      "loss": 0.9774,
      "step": 237450
    },
    {
      "epoch": 0.8322380987428459,
      "grad_norm": 2.921875,
      "learning_rate": 4.014385071302289e-05,
      "loss": 0.8208,
      "step": 237460
    },
    {
      "epoch": 0.8322731462497416,
      "grad_norm": 3.0,
      "learning_rate": 4.014320168435919e-05,
      "loss": 0.9284,
      "step": 237470
    },
    {
      "epoch": 0.8323081937566371,
      "grad_norm": 3.0,
      "learning_rate": 4.014255265569549e-05,
      "loss": 0.8894,
      "step": 237480
    },
    {
      "epoch": 0.8323432412635328,
      "grad_norm": 2.609375,
      "learning_rate": 4.014190362703178e-05,
      "loss": 0.9865,
      "step": 237490
    },
    {
      "epoch": 0.8323782887704283,
      "grad_norm": 2.859375,
      "learning_rate": 4.0141254598368084e-05,
      "loss": 0.993,
      "step": 237500
    },
    {
      "epoch": 0.832413336277324,
      "grad_norm": 2.578125,
      "learning_rate": 4.014060556970438e-05,
      "loss": 0.8911,
      "step": 237510
    },
    {
      "epoch": 0.8324483837842195,
      "grad_norm": 2.65625,
      "learning_rate": 4.013995654104068e-05,
      "loss": 0.9273,
      "step": 237520
    },
    {
      "epoch": 0.8324834312911151,
      "grad_norm": 2.859375,
      "learning_rate": 4.0139307512376975e-05,
      "loss": 0.9116,
      "step": 237530
    },
    {
      "epoch": 0.8325184787980107,
      "grad_norm": 2.9375,
      "learning_rate": 4.0138658483713276e-05,
      "loss": 0.9728,
      "step": 237540
    },
    {
      "epoch": 0.8325535263049063,
      "grad_norm": 3.171875,
      "learning_rate": 4.013800945504957e-05,
      "loss": 0.8845,
      "step": 237550
    },
    {
      "epoch": 0.8325885738118018,
      "grad_norm": 2.828125,
      "learning_rate": 4.013736042638587e-05,
      "loss": 0.9112,
      "step": 237560
    },
    {
      "epoch": 0.8326236213186975,
      "grad_norm": 2.546875,
      "learning_rate": 4.0136711397722174e-05,
      "loss": 0.8704,
      "step": 237570
    },
    {
      "epoch": 0.8326586688255931,
      "grad_norm": 2.578125,
      "learning_rate": 4.013606236905847e-05,
      "loss": 0.8031,
      "step": 237580
    },
    {
      "epoch": 0.8326937163324887,
      "grad_norm": 3.203125,
      "learning_rate": 4.013541334039477e-05,
      "loss": 0.9464,
      "step": 237590
    },
    {
      "epoch": 0.8327287638393843,
      "grad_norm": 3.03125,
      "learning_rate": 4.0134764311731064e-05,
      "loss": 0.8449,
      "step": 237600
    },
    {
      "epoch": 0.8327638113462799,
      "grad_norm": 3.078125,
      "learning_rate": 4.0134115283067366e-05,
      "loss": 0.8201,
      "step": 237610
    },
    {
      "epoch": 0.8327988588531755,
      "grad_norm": 2.03125,
      "learning_rate": 4.013346625440366e-05,
      "loss": 0.8981,
      "step": 237620
    },
    {
      "epoch": 0.832833906360071,
      "grad_norm": 3.359375,
      "learning_rate": 4.013281722573996e-05,
      "loss": 0.848,
      "step": 237630
    },
    {
      "epoch": 0.8328689538669667,
      "grad_norm": 2.6875,
      "learning_rate": 4.0132168197076256e-05,
      "loss": 0.8877,
      "step": 237640
    },
    {
      "epoch": 0.8329040013738622,
      "grad_norm": 2.640625,
      "learning_rate": 4.013151916841256e-05,
      "loss": 0.9452,
      "step": 237650
    },
    {
      "epoch": 0.8329390488807579,
      "grad_norm": 2.875,
      "learning_rate": 4.013087013974885e-05,
      "loss": 0.9457,
      "step": 237660
    },
    {
      "epoch": 0.8329740963876535,
      "grad_norm": 3.171875,
      "learning_rate": 4.0130221111085154e-05,
      "loss": 0.8988,
      "step": 237670
    },
    {
      "epoch": 0.833009143894549,
      "grad_norm": 2.9375,
      "learning_rate": 4.012957208242145e-05,
      "loss": 0.9138,
      "step": 237680
    },
    {
      "epoch": 0.8330441914014447,
      "grad_norm": 3.15625,
      "learning_rate": 4.012892305375775e-05,
      "loss": 0.9471,
      "step": 237690
    },
    {
      "epoch": 0.8330792389083402,
      "grad_norm": 2.84375,
      "learning_rate": 4.012827402509405e-05,
      "loss": 0.9077,
      "step": 237700
    },
    {
      "epoch": 0.8331142864152359,
      "grad_norm": 3.078125,
      "learning_rate": 4.0127624996430346e-05,
      "loss": 1.022,
      "step": 237710
    },
    {
      "epoch": 0.8331493339221314,
      "grad_norm": 2.765625,
      "learning_rate": 4.012697596776665e-05,
      "loss": 0.8521,
      "step": 237720
    },
    {
      "epoch": 0.8331843814290271,
      "grad_norm": 2.609375,
      "learning_rate": 4.012632693910294e-05,
      "loss": 0.9043,
      "step": 237730
    },
    {
      "epoch": 0.8332194289359226,
      "grad_norm": 3.03125,
      "learning_rate": 4.012567791043924e-05,
      "loss": 0.8883,
      "step": 237740
    },
    {
      "epoch": 0.8332544764428182,
      "grad_norm": 3.015625,
      "learning_rate": 4.012502888177554e-05,
      "loss": 0.8892,
      "step": 237750
    },
    {
      "epoch": 0.8332895239497138,
      "grad_norm": 2.703125,
      "learning_rate": 4.012437985311183e-05,
      "loss": 0.8921,
      "step": 237760
    },
    {
      "epoch": 0.8333245714566094,
      "grad_norm": 2.9375,
      "learning_rate": 4.012373082444813e-05,
      "loss": 0.9029,
      "step": 237770
    },
    {
      "epoch": 0.8333596189635051,
      "grad_norm": 2.78125,
      "learning_rate": 4.012308179578443e-05,
      "loss": 0.8337,
      "step": 237780
    },
    {
      "epoch": 0.8333946664704006,
      "grad_norm": 3.21875,
      "learning_rate": 4.012243276712073e-05,
      "loss": 0.8556,
      "step": 237790
    },
    {
      "epoch": 0.8334297139772963,
      "grad_norm": 2.703125,
      "learning_rate": 4.0121783738457024e-05,
      "loss": 0.8604,
      "step": 237800
    },
    {
      "epoch": 0.8334647614841918,
      "grad_norm": 2.796875,
      "learning_rate": 4.0121134709793326e-05,
      "loss": 0.9197,
      "step": 237810
    },
    {
      "epoch": 0.8334998089910874,
      "grad_norm": 2.8125,
      "learning_rate": 4.012048568112962e-05,
      "loss": 0.8831,
      "step": 237820
    },
    {
      "epoch": 0.833534856497983,
      "grad_norm": 3.078125,
      "learning_rate": 4.011983665246592e-05,
      "loss": 0.9239,
      "step": 237830
    },
    {
      "epoch": 0.8335699040048786,
      "grad_norm": 2.78125,
      "learning_rate": 4.0119187623802216e-05,
      "loss": 0.8851,
      "step": 237840
    },
    {
      "epoch": 0.8336049515117742,
      "grad_norm": 2.8125,
      "learning_rate": 4.011853859513852e-05,
      "loss": 0.9159,
      "step": 237850
    },
    {
      "epoch": 0.8336399990186698,
      "grad_norm": 2.9375,
      "learning_rate": 4.011788956647481e-05,
      "loss": 0.7492,
      "step": 237860
    },
    {
      "epoch": 0.8336750465255655,
      "grad_norm": 3.09375,
      "learning_rate": 4.0117240537811114e-05,
      "loss": 0.9496,
      "step": 237870
    },
    {
      "epoch": 0.833710094032461,
      "grad_norm": 2.84375,
      "learning_rate": 4.011659150914741e-05,
      "loss": 0.8882,
      "step": 237880
    },
    {
      "epoch": 0.8337451415393566,
      "grad_norm": 2.71875,
      "learning_rate": 4.011594248048371e-05,
      "loss": 0.8528,
      "step": 237890
    },
    {
      "epoch": 0.8337801890462522,
      "grad_norm": 3.046875,
      "learning_rate": 4.0115293451820004e-05,
      "loss": 0.9063,
      "step": 237900
    },
    {
      "epoch": 0.8338152365531478,
      "grad_norm": 3.21875,
      "learning_rate": 4.0114644423156306e-05,
      "loss": 0.9643,
      "step": 237910
    },
    {
      "epoch": 0.8338502840600434,
      "grad_norm": 2.8125,
      "learning_rate": 4.011399539449261e-05,
      "loss": 0.8485,
      "step": 237920
    },
    {
      "epoch": 0.833885331566939,
      "grad_norm": 2.6875,
      "learning_rate": 4.01133463658289e-05,
      "loss": 0.875,
      "step": 237930
    },
    {
      "epoch": 0.8339203790738345,
      "grad_norm": 2.671875,
      "learning_rate": 4.01126973371652e-05,
      "loss": 0.9074,
      "step": 237940
    },
    {
      "epoch": 0.8339554265807302,
      "grad_norm": 2.796875,
      "learning_rate": 4.01120483085015e-05,
      "loss": 0.8798,
      "step": 237950
    },
    {
      "epoch": 0.8339904740876257,
      "grad_norm": 3.109375,
      "learning_rate": 4.01113992798378e-05,
      "loss": 0.8314,
      "step": 237960
    },
    {
      "epoch": 0.8340255215945214,
      "grad_norm": 2.9375,
      "learning_rate": 4.0110750251174094e-05,
      "loss": 0.9323,
      "step": 237970
    },
    {
      "epoch": 0.834060569101417,
      "grad_norm": 3.03125,
      "learning_rate": 4.0110101222510395e-05,
      "loss": 0.9314,
      "step": 237980
    },
    {
      "epoch": 0.8340956166083126,
      "grad_norm": 2.96875,
      "learning_rate": 4.010945219384669e-05,
      "loss": 0.9237,
      "step": 237990
    },
    {
      "epoch": 0.8341306641152082,
      "grad_norm": 3.109375,
      "learning_rate": 4.010880316518299e-05,
      "loss": 0.8748,
      "step": 238000
    },
    {
      "epoch": 0.8341657116221037,
      "grad_norm": 3.0625,
      "learning_rate": 4.0108154136519286e-05,
      "loss": 0.9408,
      "step": 238010
    },
    {
      "epoch": 0.8342007591289994,
      "grad_norm": 2.71875,
      "learning_rate": 4.010750510785559e-05,
      "loss": 0.9287,
      "step": 238020
    },
    {
      "epoch": 0.8342358066358949,
      "grad_norm": 2.796875,
      "learning_rate": 4.010685607919188e-05,
      "loss": 0.8543,
      "step": 238030
    },
    {
      "epoch": 0.8342708541427906,
      "grad_norm": 2.96875,
      "learning_rate": 4.010620705052818e-05,
      "loss": 0.8351,
      "step": 238040
    },
    {
      "epoch": 0.8343059016496861,
      "grad_norm": 2.96875,
      "learning_rate": 4.010555802186448e-05,
      "loss": 0.8875,
      "step": 238050
    },
    {
      "epoch": 0.8343409491565817,
      "grad_norm": 2.703125,
      "learning_rate": 4.010490899320078e-05,
      "loss": 0.9685,
      "step": 238060
    },
    {
      "epoch": 0.8343759966634774,
      "grad_norm": 2.765625,
      "learning_rate": 4.010425996453708e-05,
      "loss": 0.828,
      "step": 238070
    },
    {
      "epoch": 0.8344110441703729,
      "grad_norm": 2.703125,
      "learning_rate": 4.0103610935873375e-05,
      "loss": 0.841,
      "step": 238080
    },
    {
      "epoch": 0.8344460916772686,
      "grad_norm": 2.984375,
      "learning_rate": 4.010296190720968e-05,
      "loss": 0.9421,
      "step": 238090
    },
    {
      "epoch": 0.8344811391841641,
      "grad_norm": 3.125,
      "learning_rate": 4.010231287854597e-05,
      "loss": 0.9597,
      "step": 238100
    },
    {
      "epoch": 0.8345161866910598,
      "grad_norm": 3.078125,
      "learning_rate": 4.010166384988227e-05,
      "loss": 0.8036,
      "step": 238110
    },
    {
      "epoch": 0.8345512341979553,
      "grad_norm": 2.828125,
      "learning_rate": 4.010101482121857e-05,
      "loss": 0.895,
      "step": 238120
    },
    {
      "epoch": 0.8345862817048509,
      "grad_norm": 2.9375,
      "learning_rate": 4.010036579255487e-05,
      "loss": 0.8891,
      "step": 238130
    },
    {
      "epoch": 0.8346213292117465,
      "grad_norm": 2.578125,
      "learning_rate": 4.0099716763891156e-05,
      "loss": 0.8845,
      "step": 238140
    },
    {
      "epoch": 0.8346563767186421,
      "grad_norm": 2.6875,
      "learning_rate": 4.009906773522746e-05,
      "loss": 0.8656,
      "step": 238150
    },
    {
      "epoch": 0.8346914242255378,
      "grad_norm": 3.109375,
      "learning_rate": 4.009841870656376e-05,
      "loss": 0.8316,
      "step": 238160
    },
    {
      "epoch": 0.8347264717324333,
      "grad_norm": 2.90625,
      "learning_rate": 4.0097769677900054e-05,
      "loss": 0.8304,
      "step": 238170
    },
    {
      "epoch": 0.834761519239329,
      "grad_norm": 3.046875,
      "learning_rate": 4.0097120649236355e-05,
      "loss": 0.8339,
      "step": 238180
    },
    {
      "epoch": 0.8347965667462245,
      "grad_norm": 2.75,
      "learning_rate": 4.009647162057265e-05,
      "loss": 0.8571,
      "step": 238190
    },
    {
      "epoch": 0.8348316142531201,
      "grad_norm": 2.984375,
      "learning_rate": 4.009582259190895e-05,
      "loss": 0.8517,
      "step": 238200
    },
    {
      "epoch": 0.8348666617600157,
      "grad_norm": 2.96875,
      "learning_rate": 4.0095173563245246e-05,
      "loss": 0.9547,
      "step": 238210
    },
    {
      "epoch": 0.8349017092669113,
      "grad_norm": 3.015625,
      "learning_rate": 4.009452453458155e-05,
      "loss": 0.8581,
      "step": 238220
    },
    {
      "epoch": 0.8349367567738069,
      "grad_norm": 2.796875,
      "learning_rate": 4.009387550591784e-05,
      "loss": 0.9088,
      "step": 238230
    },
    {
      "epoch": 0.8349718042807025,
      "grad_norm": 2.984375,
      "learning_rate": 4.009322647725414e-05,
      "loss": 0.9268,
      "step": 238240
    },
    {
      "epoch": 0.835006851787598,
      "grad_norm": 3.046875,
      "learning_rate": 4.009257744859044e-05,
      "loss": 0.8842,
      "step": 238250
    },
    {
      "epoch": 0.8350418992944937,
      "grad_norm": 2.71875,
      "learning_rate": 4.009192841992674e-05,
      "loss": 0.8779,
      "step": 238260
    },
    {
      "epoch": 0.8350769468013893,
      "grad_norm": 3.03125,
      "learning_rate": 4.0091279391263034e-05,
      "loss": 0.999,
      "step": 238270
    },
    {
      "epoch": 0.8351119943082849,
      "grad_norm": 3.0,
      "learning_rate": 4.0090630362599335e-05,
      "loss": 0.9037,
      "step": 238280
    },
    {
      "epoch": 0.8351470418151805,
      "grad_norm": 2.875,
      "learning_rate": 4.008998133393564e-05,
      "loss": 0.9664,
      "step": 238290
    },
    {
      "epoch": 0.835182089322076,
      "grad_norm": 2.875,
      "learning_rate": 4.008933230527193e-05,
      "loss": 0.8568,
      "step": 238300
    },
    {
      "epoch": 0.8352171368289717,
      "grad_norm": 2.75,
      "learning_rate": 4.008868327660823e-05,
      "loss": 0.879,
      "step": 238310
    },
    {
      "epoch": 0.8352521843358672,
      "grad_norm": 3.109375,
      "learning_rate": 4.008803424794453e-05,
      "loss": 0.8878,
      "step": 238320
    },
    {
      "epoch": 0.8352872318427629,
      "grad_norm": 3.03125,
      "learning_rate": 4.008738521928083e-05,
      "loss": 0.9355,
      "step": 238330
    },
    {
      "epoch": 0.8353222793496584,
      "grad_norm": 2.921875,
      "learning_rate": 4.008673619061712e-05,
      "loss": 0.889,
      "step": 238340
    },
    {
      "epoch": 0.8353573268565541,
      "grad_norm": 3.4375,
      "learning_rate": 4.0086087161953425e-05,
      "loss": 1.019,
      "step": 238350
    },
    {
      "epoch": 0.8353923743634497,
      "grad_norm": 2.90625,
      "learning_rate": 4.008543813328972e-05,
      "loss": 0.9487,
      "step": 238360
    },
    {
      "epoch": 0.8354274218703452,
      "grad_norm": 2.828125,
      "learning_rate": 4.008478910462602e-05,
      "loss": 0.845,
      "step": 238370
    },
    {
      "epoch": 0.8354624693772409,
      "grad_norm": 2.859375,
      "learning_rate": 4.0084140075962315e-05,
      "loss": 0.8343,
      "step": 238380
    },
    {
      "epoch": 0.8354975168841364,
      "grad_norm": 2.625,
      "learning_rate": 4.008349104729862e-05,
      "loss": 0.8702,
      "step": 238390
    },
    {
      "epoch": 0.8355325643910321,
      "grad_norm": 2.796875,
      "learning_rate": 4.008284201863491e-05,
      "loss": 0.8902,
      "step": 238400
    },
    {
      "epoch": 0.8355676118979276,
      "grad_norm": 2.9375,
      "learning_rate": 4.008219298997121e-05,
      "loss": 0.9769,
      "step": 238410
    },
    {
      "epoch": 0.8356026594048233,
      "grad_norm": 2.71875,
      "learning_rate": 4.008154396130751e-05,
      "loss": 0.8415,
      "step": 238420
    },
    {
      "epoch": 0.8356377069117188,
      "grad_norm": 2.6875,
      "learning_rate": 4.008089493264381e-05,
      "loss": 0.8617,
      "step": 238430
    },
    {
      "epoch": 0.8356727544186144,
      "grad_norm": 3.21875,
      "learning_rate": 4.008024590398011e-05,
      "loss": 0.9132,
      "step": 238440
    },
    {
      "epoch": 0.83570780192551,
      "grad_norm": 2.75,
      "learning_rate": 4.0079596875316405e-05,
      "loss": 0.8387,
      "step": 238450
    },
    {
      "epoch": 0.8357428494324056,
      "grad_norm": 2.90625,
      "learning_rate": 4.0078947846652706e-05,
      "loss": 0.8796,
      "step": 238460
    },
    {
      "epoch": 0.8357778969393013,
      "grad_norm": 3.046875,
      "learning_rate": 4.0078298817989e-05,
      "loss": 0.8927,
      "step": 238470
    },
    {
      "epoch": 0.8358129444461968,
      "grad_norm": 3.0,
      "learning_rate": 4.00776497893253e-05,
      "loss": 0.9135,
      "step": 238480
    },
    {
      "epoch": 0.8358479919530925,
      "grad_norm": 2.796875,
      "learning_rate": 4.00770007606616e-05,
      "loss": 0.8564,
      "step": 238490
    },
    {
      "epoch": 0.835883039459988,
      "grad_norm": 3.171875,
      "learning_rate": 4.00763517319979e-05,
      "loss": 0.8715,
      "step": 238500
    },
    {
      "epoch": 0.8359180869668836,
      "grad_norm": 2.71875,
      "learning_rate": 4.007570270333419e-05,
      "loss": 0.9248,
      "step": 238510
    },
    {
      "epoch": 0.8359531344737792,
      "grad_norm": 2.8125,
      "learning_rate": 4.007505367467049e-05,
      "loss": 0.8237,
      "step": 238520
    },
    {
      "epoch": 0.8359881819806748,
      "grad_norm": 3.0625,
      "learning_rate": 4.007440464600679e-05,
      "loss": 0.8801,
      "step": 238530
    },
    {
      "epoch": 0.8360232294875704,
      "grad_norm": 3.171875,
      "learning_rate": 4.007375561734308e-05,
      "loss": 0.8437,
      "step": 238540
    },
    {
      "epoch": 0.836058276994466,
      "grad_norm": 2.859375,
      "learning_rate": 4.0073106588679385e-05,
      "loss": 0.9184,
      "step": 238550
    },
    {
      "epoch": 0.8360933245013616,
      "grad_norm": 2.578125,
      "learning_rate": 4.007245756001568e-05,
      "loss": 0.9317,
      "step": 238560
    },
    {
      "epoch": 0.8361283720082572,
      "grad_norm": 3.25,
      "learning_rate": 4.007180853135198e-05,
      "loss": 0.9046,
      "step": 238570
    },
    {
      "epoch": 0.8361634195151528,
      "grad_norm": 2.953125,
      "learning_rate": 4.0071159502688275e-05,
      "loss": 0.842,
      "step": 238580
    },
    {
      "epoch": 0.8361984670220484,
      "grad_norm": 3.328125,
      "learning_rate": 4.007051047402458e-05,
      "loss": 1.0351,
      "step": 238590
    },
    {
      "epoch": 0.836233514528944,
      "grad_norm": 2.46875,
      "learning_rate": 4.006986144536087e-05,
      "loss": 0.8552,
      "step": 238600
    },
    {
      "epoch": 0.8362685620358395,
      "grad_norm": 2.859375,
      "learning_rate": 4.006921241669717e-05,
      "loss": 0.8502,
      "step": 238610
    },
    {
      "epoch": 0.8363036095427352,
      "grad_norm": 3.125,
      "learning_rate": 4.006856338803347e-05,
      "loss": 0.9028,
      "step": 238620
    },
    {
      "epoch": 0.8363386570496307,
      "grad_norm": 3.125,
      "learning_rate": 4.006791435936977e-05,
      "loss": 0.9942,
      "step": 238630
    },
    {
      "epoch": 0.8363737045565264,
      "grad_norm": 2.65625,
      "learning_rate": 4.006726533070606e-05,
      "loss": 0.8146,
      "step": 238640
    },
    {
      "epoch": 0.836408752063422,
      "grad_norm": 2.75,
      "learning_rate": 4.0066616302042365e-05,
      "loss": 0.9273,
      "step": 238650
    },
    {
      "epoch": 0.8364437995703176,
      "grad_norm": 3.203125,
      "learning_rate": 4.0065967273378666e-05,
      "loss": 0.8744,
      "step": 238660
    },
    {
      "epoch": 0.8364788470772132,
      "grad_norm": 2.71875,
      "learning_rate": 4.006531824471496e-05,
      "loss": 0.8824,
      "step": 238670
    },
    {
      "epoch": 0.8365138945841087,
      "grad_norm": 2.78125,
      "learning_rate": 4.006466921605126e-05,
      "loss": 0.8753,
      "step": 238680
    },
    {
      "epoch": 0.8365489420910044,
      "grad_norm": 2.65625,
      "learning_rate": 4.006402018738756e-05,
      "loss": 0.9015,
      "step": 238690
    },
    {
      "epoch": 0.8365839895978999,
      "grad_norm": 3.28125,
      "learning_rate": 4.006337115872386e-05,
      "loss": 0.9549,
      "step": 238700
    },
    {
      "epoch": 0.8366190371047956,
      "grad_norm": 2.90625,
      "learning_rate": 4.006272213006015e-05,
      "loss": 0.8368,
      "step": 238710
    },
    {
      "epoch": 0.8366540846116911,
      "grad_norm": 3.171875,
      "learning_rate": 4.0062073101396454e-05,
      "loss": 0.8141,
      "step": 238720
    },
    {
      "epoch": 0.8366891321185868,
      "grad_norm": 3.609375,
      "learning_rate": 4.006142407273275e-05,
      "loss": 0.8298,
      "step": 238730
    },
    {
      "epoch": 0.8367241796254823,
      "grad_norm": 3.296875,
      "learning_rate": 4.006077504406905e-05,
      "loss": 0.999,
      "step": 238740
    },
    {
      "epoch": 0.8367592271323779,
      "grad_norm": 3.0,
      "learning_rate": 4.0060126015405345e-05,
      "loss": 0.831,
      "step": 238750
    },
    {
      "epoch": 0.8367942746392736,
      "grad_norm": 2.8125,
      "learning_rate": 4.0059476986741646e-05,
      "loss": 0.8308,
      "step": 238760
    },
    {
      "epoch": 0.8368293221461691,
      "grad_norm": 2.453125,
      "learning_rate": 4.005882795807794e-05,
      "loss": 0.9392,
      "step": 238770
    },
    {
      "epoch": 0.8368643696530648,
      "grad_norm": 2.8125,
      "learning_rate": 4.005817892941424e-05,
      "loss": 0.8788,
      "step": 238780
    },
    {
      "epoch": 0.8368994171599603,
      "grad_norm": 2.75,
      "learning_rate": 4.005752990075054e-05,
      "loss": 0.8345,
      "step": 238790
    },
    {
      "epoch": 0.836934464666856,
      "grad_norm": 3.484375,
      "learning_rate": 4.005688087208684e-05,
      "loss": 0.8908,
      "step": 238800
    },
    {
      "epoch": 0.8369695121737515,
      "grad_norm": 2.96875,
      "learning_rate": 4.005623184342314e-05,
      "loss": 0.9305,
      "step": 238810
    },
    {
      "epoch": 0.8370045596806471,
      "grad_norm": 2.75,
      "learning_rate": 4.0055582814759434e-05,
      "loss": 0.8255,
      "step": 238820
    },
    {
      "epoch": 0.8370396071875427,
      "grad_norm": 2.5625,
      "learning_rate": 4.0054933786095735e-05,
      "loss": 0.8577,
      "step": 238830
    },
    {
      "epoch": 0.8370746546944383,
      "grad_norm": 3.125,
      "learning_rate": 4.005428475743203e-05,
      "loss": 0.9102,
      "step": 238840
    },
    {
      "epoch": 0.837109702201334,
      "grad_norm": 2.78125,
      "learning_rate": 4.005363572876833e-05,
      "loss": 0.8897,
      "step": 238850
    },
    {
      "epoch": 0.8371447497082295,
      "grad_norm": 3.390625,
      "learning_rate": 4.0052986700104626e-05,
      "loss": 0.9091,
      "step": 238860
    },
    {
      "epoch": 0.8371797972151251,
      "grad_norm": 3.015625,
      "learning_rate": 4.005233767144093e-05,
      "loss": 0.9448,
      "step": 238870
    },
    {
      "epoch": 0.8372148447220207,
      "grad_norm": 3.609375,
      "learning_rate": 4.005168864277722e-05,
      "loss": 0.8852,
      "step": 238880
    },
    {
      "epoch": 0.8372498922289163,
      "grad_norm": 3.03125,
      "learning_rate": 4.005103961411352e-05,
      "loss": 0.9077,
      "step": 238890
    },
    {
      "epoch": 0.8372849397358119,
      "grad_norm": 2.984375,
      "learning_rate": 4.005039058544982e-05,
      "loss": 0.8847,
      "step": 238900
    },
    {
      "epoch": 0.8373199872427075,
      "grad_norm": 2.9375,
      "learning_rate": 4.004974155678611e-05,
      "loss": 0.8398,
      "step": 238910
    },
    {
      "epoch": 0.837355034749603,
      "grad_norm": 2.78125,
      "learning_rate": 4.0049092528122414e-05,
      "loss": 0.9395,
      "step": 238920
    },
    {
      "epoch": 0.8373900822564987,
      "grad_norm": 3.109375,
      "learning_rate": 4.004844349945871e-05,
      "loss": 0.8612,
      "step": 238930
    },
    {
      "epoch": 0.8374251297633942,
      "grad_norm": 3.140625,
      "learning_rate": 4.004779447079501e-05,
      "loss": 0.9037,
      "step": 238940
    },
    {
      "epoch": 0.8374601772702899,
      "grad_norm": 2.984375,
      "learning_rate": 4.0047145442131305e-05,
      "loss": 0.9027,
      "step": 238950
    },
    {
      "epoch": 0.8374952247771855,
      "grad_norm": 2.59375,
      "learning_rate": 4.0046496413467606e-05,
      "loss": 0.8715,
      "step": 238960
    },
    {
      "epoch": 0.8375302722840811,
      "grad_norm": 3.0,
      "learning_rate": 4.00458473848039e-05,
      "loss": 0.8866,
      "step": 238970
    },
    {
      "epoch": 0.8375653197909767,
      "grad_norm": 3.0,
      "learning_rate": 4.00451983561402e-05,
      "loss": 0.9248,
      "step": 238980
    },
    {
      "epoch": 0.8376003672978722,
      "grad_norm": 3.0,
      "learning_rate": 4.00445493274765e-05,
      "loss": 0.8793,
      "step": 238990
    },
    {
      "epoch": 0.8376354148047679,
      "grad_norm": 2.6875,
      "learning_rate": 4.00439002988128e-05,
      "loss": 0.8637,
      "step": 239000
    },
    {
      "epoch": 0.8376704623116634,
      "grad_norm": 2.71875,
      "learning_rate": 4.004325127014909e-05,
      "loss": 0.8319,
      "step": 239010
    },
    {
      "epoch": 0.8377055098185591,
      "grad_norm": 2.953125,
      "learning_rate": 4.0042602241485394e-05,
      "loss": 0.9104,
      "step": 239020
    },
    {
      "epoch": 0.8377405573254546,
      "grad_norm": 2.96875,
      "learning_rate": 4.0041953212821695e-05,
      "loss": 0.8414,
      "step": 239030
    },
    {
      "epoch": 0.8377756048323503,
      "grad_norm": 2.828125,
      "learning_rate": 4.004130418415799e-05,
      "loss": 0.8945,
      "step": 239040
    },
    {
      "epoch": 0.8378106523392459,
      "grad_norm": 2.40625,
      "learning_rate": 4.004065515549429e-05,
      "loss": 0.8798,
      "step": 239050
    },
    {
      "epoch": 0.8378456998461414,
      "grad_norm": 3.1875,
      "learning_rate": 4.0040006126830586e-05,
      "loss": 0.8559,
      "step": 239060
    },
    {
      "epoch": 0.8378807473530371,
      "grad_norm": 2.59375,
      "learning_rate": 4.003935709816689e-05,
      "loss": 0.8368,
      "step": 239070
    },
    {
      "epoch": 0.8379157948599326,
      "grad_norm": 2.5,
      "learning_rate": 4.003870806950318e-05,
      "loss": 0.8286,
      "step": 239080
    },
    {
      "epoch": 0.8379508423668283,
      "grad_norm": 2.9375,
      "learning_rate": 4.0038059040839483e-05,
      "loss": 0.8686,
      "step": 239090
    },
    {
      "epoch": 0.8379858898737238,
      "grad_norm": 2.5625,
      "learning_rate": 4.003741001217578e-05,
      "loss": 0.9273,
      "step": 239100
    },
    {
      "epoch": 0.8380209373806194,
      "grad_norm": 3.71875,
      "learning_rate": 4.003676098351208e-05,
      "loss": 0.8807,
      "step": 239110
    },
    {
      "epoch": 0.838055984887515,
      "grad_norm": 2.453125,
      "learning_rate": 4.0036111954848374e-05,
      "loss": 0.9029,
      "step": 239120
    },
    {
      "epoch": 0.8380910323944106,
      "grad_norm": 2.453125,
      "learning_rate": 4.0035462926184675e-05,
      "loss": 0.8872,
      "step": 239130
    },
    {
      "epoch": 0.8381260799013063,
      "grad_norm": 2.609375,
      "learning_rate": 4.003481389752097e-05,
      "loss": 0.9247,
      "step": 239140
    },
    {
      "epoch": 0.8381611274082018,
      "grad_norm": 3.078125,
      "learning_rate": 4.003416486885727e-05,
      "loss": 0.9597,
      "step": 239150
    },
    {
      "epoch": 0.8381961749150975,
      "grad_norm": 2.28125,
      "learning_rate": 4.003351584019357e-05,
      "loss": 0.8368,
      "step": 239160
    },
    {
      "epoch": 0.838231222421993,
      "grad_norm": 3.390625,
      "learning_rate": 4.003286681152987e-05,
      "loss": 0.8216,
      "step": 239170
    },
    {
      "epoch": 0.8382662699288886,
      "grad_norm": 2.984375,
      "learning_rate": 4.003221778286617e-05,
      "loss": 0.8531,
      "step": 239180
    },
    {
      "epoch": 0.8383013174357842,
      "grad_norm": 2.546875,
      "learning_rate": 4.0031568754202463e-05,
      "loss": 0.8009,
      "step": 239190
    },
    {
      "epoch": 0.8383363649426798,
      "grad_norm": 2.859375,
      "learning_rate": 4.0030919725538765e-05,
      "loss": 0.9514,
      "step": 239200
    },
    {
      "epoch": 0.8383714124495754,
      "grad_norm": 3.0,
      "learning_rate": 4.003027069687506e-05,
      "loss": 0.8835,
      "step": 239210
    },
    {
      "epoch": 0.838406459956471,
      "grad_norm": 2.796875,
      "learning_rate": 4.002962166821136e-05,
      "loss": 0.881,
      "step": 239220
    },
    {
      "epoch": 0.8384415074633665,
      "grad_norm": 2.859375,
      "learning_rate": 4.0028972639547655e-05,
      "loss": 0.8859,
      "step": 239230
    },
    {
      "epoch": 0.8384765549702622,
      "grad_norm": 2.96875,
      "learning_rate": 4.002832361088396e-05,
      "loss": 0.9372,
      "step": 239240
    },
    {
      "epoch": 0.8385116024771578,
      "grad_norm": 2.828125,
      "learning_rate": 4.002767458222025e-05,
      "loss": 0.8561,
      "step": 239250
    },
    {
      "epoch": 0.8385466499840534,
      "grad_norm": 2.859375,
      "learning_rate": 4.002702555355655e-05,
      "loss": 0.813,
      "step": 239260
    },
    {
      "epoch": 0.838581697490949,
      "grad_norm": 2.75,
      "learning_rate": 4.002637652489285e-05,
      "loss": 0.9654,
      "step": 239270
    },
    {
      "epoch": 0.8386167449978446,
      "grad_norm": 2.78125,
      "learning_rate": 4.002572749622914e-05,
      "loss": 0.9293,
      "step": 239280
    },
    {
      "epoch": 0.8386517925047402,
      "grad_norm": 2.5,
      "learning_rate": 4.0025078467565443e-05,
      "loss": 0.9286,
      "step": 239290
    },
    {
      "epoch": 0.8386868400116357,
      "grad_norm": 2.875,
      "learning_rate": 4.002442943890174e-05,
      "loss": 0.9037,
      "step": 239300
    },
    {
      "epoch": 0.8387218875185314,
      "grad_norm": 3.4375,
      "learning_rate": 4.002378041023804e-05,
      "loss": 0.8096,
      "step": 239310
    },
    {
      "epoch": 0.8387569350254269,
      "grad_norm": 2.984375,
      "learning_rate": 4.0023131381574334e-05,
      "loss": 0.9672,
      "step": 239320
    },
    {
      "epoch": 0.8387919825323226,
      "grad_norm": 3.171875,
      "learning_rate": 4.0022482352910635e-05,
      "loss": 0.8893,
      "step": 239330
    },
    {
      "epoch": 0.8388270300392182,
      "grad_norm": 2.953125,
      "learning_rate": 4.002183332424693e-05,
      "loss": 0.8118,
      "step": 239340
    },
    {
      "epoch": 0.8388620775461137,
      "grad_norm": 3.21875,
      "learning_rate": 4.002118429558323e-05,
      "loss": 0.8554,
      "step": 239350
    },
    {
      "epoch": 0.8388971250530094,
      "grad_norm": 2.703125,
      "learning_rate": 4.0020535266919526e-05,
      "loss": 0.8359,
      "step": 239360
    },
    {
      "epoch": 0.8389321725599049,
      "grad_norm": 2.515625,
      "learning_rate": 4.001988623825583e-05,
      "loss": 0.9096,
      "step": 239370
    },
    {
      "epoch": 0.8389672200668006,
      "grad_norm": 3.171875,
      "learning_rate": 4.001923720959212e-05,
      "loss": 0.9305,
      "step": 239380
    },
    {
      "epoch": 0.8390022675736961,
      "grad_norm": 3.015625,
      "learning_rate": 4.0018588180928423e-05,
      "loss": 0.8252,
      "step": 239390
    },
    {
      "epoch": 0.8390373150805918,
      "grad_norm": 2.671875,
      "learning_rate": 4.0017939152264725e-05,
      "loss": 0.8444,
      "step": 239400
    },
    {
      "epoch": 0.8390723625874873,
      "grad_norm": 2.71875,
      "learning_rate": 4.001729012360102e-05,
      "loss": 0.9098,
      "step": 239410
    },
    {
      "epoch": 0.8391074100943829,
      "grad_norm": 3.0,
      "learning_rate": 4.001664109493732e-05,
      "loss": 0.8826,
      "step": 239420
    },
    {
      "epoch": 0.8391424576012785,
      "grad_norm": 3.1875,
      "learning_rate": 4.0015992066273615e-05,
      "loss": 0.8068,
      "step": 239430
    },
    {
      "epoch": 0.8391775051081741,
      "grad_norm": 2.890625,
      "learning_rate": 4.001534303760992e-05,
      "loss": 0.8604,
      "step": 239440
    },
    {
      "epoch": 0.8392125526150698,
      "grad_norm": 2.984375,
      "learning_rate": 4.001469400894621e-05,
      "loss": 0.8537,
      "step": 239450
    },
    {
      "epoch": 0.8392476001219653,
      "grad_norm": 2.65625,
      "learning_rate": 4.001404498028251e-05,
      "loss": 0.8303,
      "step": 239460
    },
    {
      "epoch": 0.839282647628861,
      "grad_norm": 2.9375,
      "learning_rate": 4.001339595161881e-05,
      "loss": 0.9119,
      "step": 239470
    },
    {
      "epoch": 0.8393176951357565,
      "grad_norm": 3.125,
      "learning_rate": 4.001274692295511e-05,
      "loss": 0.9404,
      "step": 239480
    },
    {
      "epoch": 0.8393527426426521,
      "grad_norm": 2.609375,
      "learning_rate": 4.0012097894291403e-05,
      "loss": 0.8808,
      "step": 239490
    },
    {
      "epoch": 0.8393877901495477,
      "grad_norm": 3.109375,
      "learning_rate": 4.0011448865627705e-05,
      "loss": 0.8158,
      "step": 239500
    },
    {
      "epoch": 0.8394228376564433,
      "grad_norm": 3.484375,
      "learning_rate": 4.0010799836964e-05,
      "loss": 0.9096,
      "step": 239510
    },
    {
      "epoch": 0.8394578851633389,
      "grad_norm": 3.40625,
      "learning_rate": 4.00101508083003e-05,
      "loss": 0.9052,
      "step": 239520
    },
    {
      "epoch": 0.8394929326702345,
      "grad_norm": 2.59375,
      "learning_rate": 4.00095017796366e-05,
      "loss": 0.8693,
      "step": 239530
    },
    {
      "epoch": 0.8395279801771302,
      "grad_norm": 2.984375,
      "learning_rate": 4.00088527509729e-05,
      "loss": 0.9416,
      "step": 239540
    },
    {
      "epoch": 0.8395630276840257,
      "grad_norm": 3.0,
      "learning_rate": 4.00082037223092e-05,
      "loss": 0.8093,
      "step": 239550
    },
    {
      "epoch": 0.8395980751909213,
      "grad_norm": 3.0625,
      "learning_rate": 4.000755469364549e-05,
      "loss": 0.9024,
      "step": 239560
    },
    {
      "epoch": 0.8396331226978169,
      "grad_norm": 2.890625,
      "learning_rate": 4.0006905664981794e-05,
      "loss": 0.8668,
      "step": 239570
    },
    {
      "epoch": 0.8396681702047125,
      "grad_norm": 2.828125,
      "learning_rate": 4.000625663631809e-05,
      "loss": 0.8692,
      "step": 239580
    },
    {
      "epoch": 0.839703217711608,
      "grad_norm": 2.796875,
      "learning_rate": 4.000560760765439e-05,
      "loss": 0.8694,
      "step": 239590
    },
    {
      "epoch": 0.8397382652185037,
      "grad_norm": 3.03125,
      "learning_rate": 4.0004958578990685e-05,
      "loss": 0.8793,
      "step": 239600
    },
    {
      "epoch": 0.8397733127253992,
      "grad_norm": 2.953125,
      "learning_rate": 4.0004309550326986e-05,
      "loss": 0.8537,
      "step": 239610
    },
    {
      "epoch": 0.8398083602322949,
      "grad_norm": 3.265625,
      "learning_rate": 4.000366052166328e-05,
      "loss": 0.9082,
      "step": 239620
    },
    {
      "epoch": 0.8398434077391904,
      "grad_norm": 3.21875,
      "learning_rate": 4.000301149299958e-05,
      "loss": 0.9719,
      "step": 239630
    },
    {
      "epoch": 0.8398784552460861,
      "grad_norm": 2.875,
      "learning_rate": 4.000236246433588e-05,
      "loss": 0.885,
      "step": 239640
    },
    {
      "epoch": 0.8399135027529817,
      "grad_norm": 2.9375,
      "learning_rate": 4.000171343567217e-05,
      "loss": 0.9011,
      "step": 239650
    },
    {
      "epoch": 0.8399485502598772,
      "grad_norm": 2.96875,
      "learning_rate": 4.000106440700847e-05,
      "loss": 0.8932,
      "step": 239660
    },
    {
      "epoch": 0.8399835977667729,
      "grad_norm": 3.171875,
      "learning_rate": 4.000041537834477e-05,
      "loss": 0.8783,
      "step": 239670
    },
    {
      "epoch": 0.8400186452736684,
      "grad_norm": 2.828125,
      "learning_rate": 3.999976634968107e-05,
      "loss": 0.8718,
      "step": 239680
    },
    {
      "epoch": 0.8400536927805641,
      "grad_norm": 2.65625,
      "learning_rate": 3.9999117321017363e-05,
      "loss": 0.8236,
      "step": 239690
    },
    {
      "epoch": 0.8400887402874596,
      "grad_norm": 3.03125,
      "learning_rate": 3.9998468292353665e-05,
      "loss": 0.8599,
      "step": 239700
    },
    {
      "epoch": 0.8401237877943553,
      "grad_norm": 2.671875,
      "learning_rate": 3.999781926368996e-05,
      "loss": 0.8666,
      "step": 239710
    },
    {
      "epoch": 0.8401588353012508,
      "grad_norm": 2.75,
      "learning_rate": 3.999717023502626e-05,
      "loss": 0.8557,
      "step": 239720
    },
    {
      "epoch": 0.8401938828081464,
      "grad_norm": 2.953125,
      "learning_rate": 3.9996521206362555e-05,
      "loss": 0.9381,
      "step": 239730
    },
    {
      "epoch": 0.8402289303150421,
      "grad_norm": 2.734375,
      "learning_rate": 3.999587217769886e-05,
      "loss": 0.9486,
      "step": 239740
    },
    {
      "epoch": 0.8402639778219376,
      "grad_norm": 3.078125,
      "learning_rate": 3.999522314903515e-05,
      "loss": 0.8711,
      "step": 239750
    },
    {
      "epoch": 0.8402990253288333,
      "grad_norm": 2.5625,
      "learning_rate": 3.999457412037145e-05,
      "loss": 0.8943,
      "step": 239760
    },
    {
      "epoch": 0.8403340728357288,
      "grad_norm": 3.03125,
      "learning_rate": 3.9993925091707754e-05,
      "loss": 0.8966,
      "step": 239770
    },
    {
      "epoch": 0.8403691203426245,
      "grad_norm": 2.8125,
      "learning_rate": 3.999327606304405e-05,
      "loss": 0.8818,
      "step": 239780
    },
    {
      "epoch": 0.84040416784952,
      "grad_norm": 3.1875,
      "learning_rate": 3.999262703438035e-05,
      "loss": 0.8971,
      "step": 239790
    },
    {
      "epoch": 0.8404392153564156,
      "grad_norm": 3.171875,
      "learning_rate": 3.9991978005716645e-05,
      "loss": 1.0083,
      "step": 239800
    },
    {
      "epoch": 0.8404742628633112,
      "grad_norm": 2.984375,
      "learning_rate": 3.9991328977052946e-05,
      "loss": 0.9359,
      "step": 239810
    },
    {
      "epoch": 0.8405093103702068,
      "grad_norm": 2.65625,
      "learning_rate": 3.999067994838924e-05,
      "loss": 0.8354,
      "step": 239820
    },
    {
      "epoch": 0.8405443578771025,
      "grad_norm": 3.140625,
      "learning_rate": 3.999003091972554e-05,
      "loss": 0.9163,
      "step": 239830
    },
    {
      "epoch": 0.840579405383998,
      "grad_norm": 2.75,
      "learning_rate": 3.998938189106184e-05,
      "loss": 0.9112,
      "step": 239840
    },
    {
      "epoch": 0.8406144528908936,
      "grad_norm": 2.96875,
      "learning_rate": 3.998873286239814e-05,
      "loss": 0.8428,
      "step": 239850
    },
    {
      "epoch": 0.8406495003977892,
      "grad_norm": 3.140625,
      "learning_rate": 3.998808383373443e-05,
      "loss": 0.8702,
      "step": 239860
    },
    {
      "epoch": 0.8406845479046848,
      "grad_norm": 2.671875,
      "learning_rate": 3.9987434805070734e-05,
      "loss": 0.8283,
      "step": 239870
    },
    {
      "epoch": 0.8407195954115804,
      "grad_norm": 3.375,
      "learning_rate": 3.998678577640703e-05,
      "loss": 1.0151,
      "step": 239880
    },
    {
      "epoch": 0.840754642918476,
      "grad_norm": 2.703125,
      "learning_rate": 3.998613674774333e-05,
      "loss": 0.8483,
      "step": 239890
    },
    {
      "epoch": 0.8407896904253715,
      "grad_norm": 2.90625,
      "learning_rate": 3.998548771907963e-05,
      "loss": 0.8784,
      "step": 239900
    },
    {
      "epoch": 0.8408247379322672,
      "grad_norm": 3.15625,
      "learning_rate": 3.9984838690415926e-05,
      "loss": 0.9278,
      "step": 239910
    },
    {
      "epoch": 0.8408597854391627,
      "grad_norm": 2.828125,
      "learning_rate": 3.998418966175223e-05,
      "loss": 0.92,
      "step": 239920
    },
    {
      "epoch": 0.8408948329460584,
      "grad_norm": 3.1875,
      "learning_rate": 3.998354063308852e-05,
      "loss": 0.9144,
      "step": 239930
    },
    {
      "epoch": 0.840929880452954,
      "grad_norm": 2.828125,
      "learning_rate": 3.9982891604424824e-05,
      "loss": 0.9191,
      "step": 239940
    },
    {
      "epoch": 0.8409649279598496,
      "grad_norm": 2.96875,
      "learning_rate": 3.998224257576112e-05,
      "loss": 0.9043,
      "step": 239950
    },
    {
      "epoch": 0.8409999754667452,
      "grad_norm": 2.796875,
      "learning_rate": 3.998159354709742e-05,
      "loss": 0.9154,
      "step": 239960
    },
    {
      "epoch": 0.8410350229736407,
      "grad_norm": 2.390625,
      "learning_rate": 3.9980944518433714e-05,
      "loss": 0.8203,
      "step": 239970
    },
    {
      "epoch": 0.8410700704805364,
      "grad_norm": 3.1875,
      "learning_rate": 3.9980295489770016e-05,
      "loss": 0.8344,
      "step": 239980
    },
    {
      "epoch": 0.8411051179874319,
      "grad_norm": 2.90625,
      "learning_rate": 3.997964646110631e-05,
      "loss": 0.8835,
      "step": 239990
    },
    {
      "epoch": 0.8411401654943276,
      "grad_norm": 3.28125,
      "learning_rate": 3.997899743244261e-05,
      "loss": 0.8806,
      "step": 240000
    },
    {
      "epoch": 0.8411401654943276,
      "eval_loss": 0.8330914974212646,
      "eval_runtime": 560.0407,
      "eval_samples_per_second": 679.301,
      "eval_steps_per_second": 56.608,
      "step": 240000
    },
    {
      "epoch": 0.8411752130012231,
      "grad_norm": 2.75,
      "learning_rate": 3.9978348403778906e-05,
      "loss": 0.883,
      "step": 240010
    },
    {
      "epoch": 0.8412102605081188,
      "grad_norm": 2.96875,
      "learning_rate": 3.99776993751152e-05,
      "loss": 0.9066,
      "step": 240020
    },
    {
      "epoch": 0.8412453080150144,
      "grad_norm": 2.609375,
      "learning_rate": 3.99770503464515e-05,
      "loss": 0.9238,
      "step": 240030
    },
    {
      "epoch": 0.8412803555219099,
      "grad_norm": 3.53125,
      "learning_rate": 3.99764013177878e-05,
      "loss": 0.9133,
      "step": 240040
    },
    {
      "epoch": 0.8413154030288056,
      "grad_norm": 3.015625,
      "learning_rate": 3.99757522891241e-05,
      "loss": 0.9555,
      "step": 240050
    },
    {
      "epoch": 0.8413504505357011,
      "grad_norm": 2.703125,
      "learning_rate": 3.997510326046039e-05,
      "loss": 0.9054,
      "step": 240060
    },
    {
      "epoch": 0.8413854980425968,
      "grad_norm": 2.453125,
      "learning_rate": 3.9974454231796694e-05,
      "loss": 0.8578,
      "step": 240070
    },
    {
      "epoch": 0.8414205455494923,
      "grad_norm": 2.6875,
      "learning_rate": 3.997380520313299e-05,
      "loss": 0.8977,
      "step": 240080
    },
    {
      "epoch": 0.841455593056388,
      "grad_norm": 3.359375,
      "learning_rate": 3.997315617446929e-05,
      "loss": 0.8694,
      "step": 240090
    },
    {
      "epoch": 0.8414906405632835,
      "grad_norm": 2.78125,
      "learning_rate": 3.9972507145805585e-05,
      "loss": 0.941,
      "step": 240100
    },
    {
      "epoch": 0.8415256880701791,
      "grad_norm": 2.53125,
      "learning_rate": 3.9971858117141886e-05,
      "loss": 0.8839,
      "step": 240110
    },
    {
      "epoch": 0.8415607355770747,
      "grad_norm": 3.28125,
      "learning_rate": 3.997120908847819e-05,
      "loss": 0.9512,
      "step": 240120
    },
    {
      "epoch": 0.8415957830839703,
      "grad_norm": 3.5,
      "learning_rate": 3.997056005981448e-05,
      "loss": 0.9138,
      "step": 240130
    },
    {
      "epoch": 0.841630830590866,
      "grad_norm": 3.015625,
      "learning_rate": 3.9969911031150784e-05,
      "loss": 0.9169,
      "step": 240140
    },
    {
      "epoch": 0.8416658780977615,
      "grad_norm": 3.0625,
      "learning_rate": 3.996926200248708e-05,
      "loss": 0.8703,
      "step": 240150
    },
    {
      "epoch": 0.8417009256046571,
      "grad_norm": 2.5,
      "learning_rate": 3.996861297382338e-05,
      "loss": 0.9,
      "step": 240160
    },
    {
      "epoch": 0.8417359731115527,
      "grad_norm": 2.84375,
      "learning_rate": 3.9967963945159674e-05,
      "loss": 0.7795,
      "step": 240170
    },
    {
      "epoch": 0.8417710206184483,
      "grad_norm": 2.375,
      "learning_rate": 3.9967314916495976e-05,
      "loss": 0.8552,
      "step": 240180
    },
    {
      "epoch": 0.8418060681253439,
      "grad_norm": 2.890625,
      "learning_rate": 3.996666588783227e-05,
      "loss": 0.9499,
      "step": 240190
    },
    {
      "epoch": 0.8418411156322395,
      "grad_norm": 3.078125,
      "learning_rate": 3.996601685916857e-05,
      "loss": 0.8766,
      "step": 240200
    },
    {
      "epoch": 0.841876163139135,
      "grad_norm": 2.890625,
      "learning_rate": 3.9965367830504866e-05,
      "loss": 0.8574,
      "step": 240210
    },
    {
      "epoch": 0.8419112106460307,
      "grad_norm": 2.796875,
      "learning_rate": 3.996471880184117e-05,
      "loss": 0.9134,
      "step": 240220
    },
    {
      "epoch": 0.8419462581529263,
      "grad_norm": 2.953125,
      "learning_rate": 3.996406977317746e-05,
      "loss": 0.889,
      "step": 240230
    },
    {
      "epoch": 0.8419813056598219,
      "grad_norm": 3.0,
      "learning_rate": 3.9963420744513764e-05,
      "loss": 0.9104,
      "step": 240240
    },
    {
      "epoch": 0.8420163531667175,
      "grad_norm": 2.75,
      "learning_rate": 3.996277171585006e-05,
      "loss": 0.8828,
      "step": 240250
    },
    {
      "epoch": 0.8420514006736131,
      "grad_norm": 3.0,
      "learning_rate": 3.996212268718636e-05,
      "loss": 0.8941,
      "step": 240260
    },
    {
      "epoch": 0.8420864481805087,
      "grad_norm": 2.703125,
      "learning_rate": 3.996147365852266e-05,
      "loss": 0.9364,
      "step": 240270
    },
    {
      "epoch": 0.8421214956874042,
      "grad_norm": 3.140625,
      "learning_rate": 3.9960824629858956e-05,
      "loss": 0.8766,
      "step": 240280
    },
    {
      "epoch": 0.8421565431942999,
      "grad_norm": 3.265625,
      "learning_rate": 3.996017560119526e-05,
      "loss": 0.9277,
      "step": 240290
    },
    {
      "epoch": 0.8421915907011954,
      "grad_norm": 2.90625,
      "learning_rate": 3.995952657253155e-05,
      "loss": 0.9107,
      "step": 240300
    },
    {
      "epoch": 0.8422266382080911,
      "grad_norm": 3.03125,
      "learning_rate": 3.995887754386785e-05,
      "loss": 0.9346,
      "step": 240310
    },
    {
      "epoch": 0.8422616857149867,
      "grad_norm": 2.953125,
      "learning_rate": 3.995822851520415e-05,
      "loss": 0.8928,
      "step": 240320
    },
    {
      "epoch": 0.8422967332218823,
      "grad_norm": 2.671875,
      "learning_rate": 3.995757948654045e-05,
      "loss": 0.8597,
      "step": 240330
    },
    {
      "epoch": 0.8423317807287779,
      "grad_norm": 3.125,
      "learning_rate": 3.9956930457876744e-05,
      "loss": 0.9036,
      "step": 240340
    },
    {
      "epoch": 0.8423668282356734,
      "grad_norm": 3.578125,
      "learning_rate": 3.9956281429213045e-05,
      "loss": 0.9299,
      "step": 240350
    },
    {
      "epoch": 0.8424018757425691,
      "grad_norm": 3.03125,
      "learning_rate": 3.995563240054934e-05,
      "loss": 0.815,
      "step": 240360
    },
    {
      "epoch": 0.8424369232494646,
      "grad_norm": 2.984375,
      "learning_rate": 3.995498337188564e-05,
      "loss": 0.9125,
      "step": 240370
    },
    {
      "epoch": 0.8424719707563603,
      "grad_norm": 3.0,
      "learning_rate": 3.9954334343221936e-05,
      "loss": 0.8604,
      "step": 240380
    },
    {
      "epoch": 0.8425070182632558,
      "grad_norm": 2.875,
      "learning_rate": 3.995368531455824e-05,
      "loss": 0.9019,
      "step": 240390
    },
    {
      "epoch": 0.8425420657701514,
      "grad_norm": 2.828125,
      "learning_rate": 3.995303628589453e-05,
      "loss": 0.8221,
      "step": 240400
    },
    {
      "epoch": 0.842577113277047,
      "grad_norm": 2.953125,
      "learning_rate": 3.9952387257230826e-05,
      "loss": 0.8657,
      "step": 240410
    },
    {
      "epoch": 0.8426121607839426,
      "grad_norm": 3.6875,
      "learning_rate": 3.995173822856713e-05,
      "loss": 0.8812,
      "step": 240420
    },
    {
      "epoch": 0.8426472082908383,
      "grad_norm": 2.609375,
      "learning_rate": 3.995108919990342e-05,
      "loss": 0.9346,
      "step": 240430
    },
    {
      "epoch": 0.8426822557977338,
      "grad_norm": 3.171875,
      "learning_rate": 3.9950440171239724e-05,
      "loss": 0.9176,
      "step": 240440
    },
    {
      "epoch": 0.8427173033046295,
      "grad_norm": 2.890625,
      "learning_rate": 3.994979114257602e-05,
      "loss": 0.8499,
      "step": 240450
    },
    {
      "epoch": 0.842752350811525,
      "grad_norm": 3.046875,
      "learning_rate": 3.994914211391232e-05,
      "loss": 0.8517,
      "step": 240460
    },
    {
      "epoch": 0.8427873983184206,
      "grad_norm": 3.203125,
      "learning_rate": 3.9948493085248614e-05,
      "loss": 0.8355,
      "step": 240470
    },
    {
      "epoch": 0.8428224458253162,
      "grad_norm": 2.84375,
      "learning_rate": 3.9947844056584916e-05,
      "loss": 0.8063,
      "step": 240480
    },
    {
      "epoch": 0.8428574933322118,
      "grad_norm": 3.0625,
      "learning_rate": 3.994719502792122e-05,
      "loss": 0.8659,
      "step": 240490
    },
    {
      "epoch": 0.8428925408391074,
      "grad_norm": 2.8125,
      "learning_rate": 3.994654599925751e-05,
      "loss": 0.8615,
      "step": 240500
    },
    {
      "epoch": 0.842927588346003,
      "grad_norm": 2.59375,
      "learning_rate": 3.994589697059381e-05,
      "loss": 0.8,
      "step": 240510
    },
    {
      "epoch": 0.8429626358528987,
      "grad_norm": 2.5,
      "learning_rate": 3.994524794193011e-05,
      "loss": 0.9129,
      "step": 240520
    },
    {
      "epoch": 0.8429976833597942,
      "grad_norm": 3.046875,
      "learning_rate": 3.994459891326641e-05,
      "loss": 0.8975,
      "step": 240530
    },
    {
      "epoch": 0.8430327308666898,
      "grad_norm": 2.5625,
      "learning_rate": 3.9943949884602704e-05,
      "loss": 0.8979,
      "step": 240540
    },
    {
      "epoch": 0.8430677783735854,
      "grad_norm": 3.234375,
      "learning_rate": 3.9943300855939005e-05,
      "loss": 0.9587,
      "step": 240550
    },
    {
      "epoch": 0.843102825880481,
      "grad_norm": 2.921875,
      "learning_rate": 3.99426518272753e-05,
      "loss": 0.8679,
      "step": 240560
    },
    {
      "epoch": 0.8431378733873766,
      "grad_norm": 2.921875,
      "learning_rate": 3.99420027986116e-05,
      "loss": 0.9794,
      "step": 240570
    },
    {
      "epoch": 0.8431729208942722,
      "grad_norm": 3.453125,
      "learning_rate": 3.9941353769947896e-05,
      "loss": 0.8664,
      "step": 240580
    },
    {
      "epoch": 0.8432079684011677,
      "grad_norm": 2.859375,
      "learning_rate": 3.99407047412842e-05,
      "loss": 0.8895,
      "step": 240590
    },
    {
      "epoch": 0.8432430159080634,
      "grad_norm": 3.234375,
      "learning_rate": 3.994005571262049e-05,
      "loss": 0.8715,
      "step": 240600
    },
    {
      "epoch": 0.8432780634149589,
      "grad_norm": 2.953125,
      "learning_rate": 3.993940668395679e-05,
      "loss": 0.8206,
      "step": 240610
    },
    {
      "epoch": 0.8433131109218546,
      "grad_norm": 3.109375,
      "learning_rate": 3.993875765529309e-05,
      "loss": 0.845,
      "step": 240620
    },
    {
      "epoch": 0.8433481584287502,
      "grad_norm": 2.90625,
      "learning_rate": 3.993810862662939e-05,
      "loss": 0.8972,
      "step": 240630
    },
    {
      "epoch": 0.8433832059356458,
      "grad_norm": 2.75,
      "learning_rate": 3.993745959796569e-05,
      "loss": 0.9352,
      "step": 240640
    },
    {
      "epoch": 0.8434182534425414,
      "grad_norm": 2.859375,
      "learning_rate": 3.9936810569301985e-05,
      "loss": 0.8469,
      "step": 240650
    },
    {
      "epoch": 0.8434533009494369,
      "grad_norm": 2.828125,
      "learning_rate": 3.9936161540638286e-05,
      "loss": 0.9234,
      "step": 240660
    },
    {
      "epoch": 0.8434883484563326,
      "grad_norm": 2.84375,
      "learning_rate": 3.993551251197458e-05,
      "loss": 0.8587,
      "step": 240670
    },
    {
      "epoch": 0.8435233959632281,
      "grad_norm": 3.046875,
      "learning_rate": 3.993486348331088e-05,
      "loss": 0.8245,
      "step": 240680
    },
    {
      "epoch": 0.8435584434701238,
      "grad_norm": 2.921875,
      "learning_rate": 3.993421445464718e-05,
      "loss": 0.8797,
      "step": 240690
    },
    {
      "epoch": 0.8435934909770193,
      "grad_norm": 3.0625,
      "learning_rate": 3.993356542598348e-05,
      "loss": 0.8706,
      "step": 240700
    },
    {
      "epoch": 0.843628538483915,
      "grad_norm": 3.34375,
      "learning_rate": 3.993291639731977e-05,
      "loss": 0.8753,
      "step": 240710
    },
    {
      "epoch": 0.8436635859908106,
      "grad_norm": 2.984375,
      "learning_rate": 3.9932267368656074e-05,
      "loss": 0.8894,
      "step": 240720
    },
    {
      "epoch": 0.8436986334977061,
      "grad_norm": 2.703125,
      "learning_rate": 3.993161833999237e-05,
      "loss": 0.9697,
      "step": 240730
    },
    {
      "epoch": 0.8437336810046018,
      "grad_norm": 2.8125,
      "learning_rate": 3.993096931132867e-05,
      "loss": 0.9226,
      "step": 240740
    },
    {
      "epoch": 0.8437687285114973,
      "grad_norm": 3.15625,
      "learning_rate": 3.9930320282664965e-05,
      "loss": 0.8213,
      "step": 240750
    },
    {
      "epoch": 0.843803776018393,
      "grad_norm": 2.953125,
      "learning_rate": 3.9929671254001266e-05,
      "loss": 0.8719,
      "step": 240760
    },
    {
      "epoch": 0.8438388235252885,
      "grad_norm": 2.96875,
      "learning_rate": 3.992902222533756e-05,
      "loss": 0.8917,
      "step": 240770
    },
    {
      "epoch": 0.8438738710321841,
      "grad_norm": 2.828125,
      "learning_rate": 3.9928373196673856e-05,
      "loss": 1.0068,
      "step": 240780
    },
    {
      "epoch": 0.8439089185390797,
      "grad_norm": 3.03125,
      "learning_rate": 3.992772416801016e-05,
      "loss": 0.8616,
      "step": 240790
    },
    {
      "epoch": 0.8439439660459753,
      "grad_norm": 3.25,
      "learning_rate": 3.992707513934645e-05,
      "loss": 0.8575,
      "step": 240800
    },
    {
      "epoch": 0.843979013552871,
      "grad_norm": 2.609375,
      "learning_rate": 3.992642611068275e-05,
      "loss": 0.8998,
      "step": 240810
    },
    {
      "epoch": 0.8440140610597665,
      "grad_norm": 3.046875,
      "learning_rate": 3.992577708201905e-05,
      "loss": 0.8836,
      "step": 240820
    },
    {
      "epoch": 0.8440491085666622,
      "grad_norm": 2.90625,
      "learning_rate": 3.992512805335535e-05,
      "loss": 0.8936,
      "step": 240830
    },
    {
      "epoch": 0.8440841560735577,
      "grad_norm": 2.578125,
      "learning_rate": 3.9924479024691644e-05,
      "loss": 0.8794,
      "step": 240840
    },
    {
      "epoch": 0.8441192035804533,
      "grad_norm": 3.203125,
      "learning_rate": 3.9923829996027945e-05,
      "loss": 0.8207,
      "step": 240850
    },
    {
      "epoch": 0.8441542510873489,
      "grad_norm": 3.109375,
      "learning_rate": 3.9923180967364246e-05,
      "loss": 0.9011,
      "step": 240860
    },
    {
      "epoch": 0.8441892985942445,
      "grad_norm": 2.765625,
      "learning_rate": 3.992253193870054e-05,
      "loss": 0.8858,
      "step": 240870
    },
    {
      "epoch": 0.84422434610114,
      "grad_norm": 2.890625,
      "learning_rate": 3.992188291003684e-05,
      "loss": 0.8703,
      "step": 240880
    },
    {
      "epoch": 0.8442593936080357,
      "grad_norm": 2.796875,
      "learning_rate": 3.992123388137314e-05,
      "loss": 0.8896,
      "step": 240890
    },
    {
      "epoch": 0.8442944411149312,
      "grad_norm": 2.59375,
      "learning_rate": 3.992058485270944e-05,
      "loss": 0.8375,
      "step": 240900
    },
    {
      "epoch": 0.8443294886218269,
      "grad_norm": 2.71875,
      "learning_rate": 3.991993582404573e-05,
      "loss": 0.8786,
      "step": 240910
    },
    {
      "epoch": 0.8443645361287225,
      "grad_norm": 3.0,
      "learning_rate": 3.9919286795382034e-05,
      "loss": 0.9902,
      "step": 240920
    },
    {
      "epoch": 0.8443995836356181,
      "grad_norm": 2.75,
      "learning_rate": 3.991863776671833e-05,
      "loss": 0.8744,
      "step": 240930
    },
    {
      "epoch": 0.8444346311425137,
      "grad_norm": 2.875,
      "learning_rate": 3.991798873805463e-05,
      "loss": 0.8401,
      "step": 240940
    },
    {
      "epoch": 0.8444696786494092,
      "grad_norm": 3.203125,
      "learning_rate": 3.9917339709390925e-05,
      "loss": 0.9027,
      "step": 240950
    },
    {
      "epoch": 0.8445047261563049,
      "grad_norm": 3.125,
      "learning_rate": 3.9916690680727226e-05,
      "loss": 0.8002,
      "step": 240960
    },
    {
      "epoch": 0.8445397736632004,
      "grad_norm": 2.65625,
      "learning_rate": 3.991604165206352e-05,
      "loss": 0.9198,
      "step": 240970
    },
    {
      "epoch": 0.8445748211700961,
      "grad_norm": 3.265625,
      "learning_rate": 3.991539262339982e-05,
      "loss": 0.926,
      "step": 240980
    },
    {
      "epoch": 0.8446098686769916,
      "grad_norm": 2.625,
      "learning_rate": 3.9914743594736124e-05,
      "loss": 0.8938,
      "step": 240990
    },
    {
      "epoch": 0.8446449161838873,
      "grad_norm": 2.515625,
      "learning_rate": 3.991409456607242e-05,
      "loss": 0.8172,
      "step": 241000
    },
    {
      "epoch": 0.8446799636907829,
      "grad_norm": 2.640625,
      "learning_rate": 3.991344553740872e-05,
      "loss": 0.8298,
      "step": 241010
    },
    {
      "epoch": 0.8447150111976784,
      "grad_norm": 2.8125,
      "learning_rate": 3.9912796508745014e-05,
      "loss": 0.9144,
      "step": 241020
    },
    {
      "epoch": 0.8447500587045741,
      "grad_norm": 2.828125,
      "learning_rate": 3.9912147480081316e-05,
      "loss": 0.8857,
      "step": 241030
    },
    {
      "epoch": 0.8447851062114696,
      "grad_norm": 3.0,
      "learning_rate": 3.991149845141761e-05,
      "loss": 0.8268,
      "step": 241040
    },
    {
      "epoch": 0.8448201537183653,
      "grad_norm": 3.0,
      "learning_rate": 3.991084942275391e-05,
      "loss": 0.9602,
      "step": 241050
    },
    {
      "epoch": 0.8448552012252608,
      "grad_norm": 2.828125,
      "learning_rate": 3.9910200394090206e-05,
      "loss": 0.9015,
      "step": 241060
    },
    {
      "epoch": 0.8448902487321565,
      "grad_norm": 2.703125,
      "learning_rate": 3.990955136542651e-05,
      "loss": 0.8957,
      "step": 241070
    },
    {
      "epoch": 0.844925296239052,
      "grad_norm": 2.6875,
      "learning_rate": 3.99089023367628e-05,
      "loss": 0.8564,
      "step": 241080
    },
    {
      "epoch": 0.8449603437459476,
      "grad_norm": 2.703125,
      "learning_rate": 3.9908253308099104e-05,
      "loss": 0.8917,
      "step": 241090
    },
    {
      "epoch": 0.8449953912528432,
      "grad_norm": 3.15625,
      "learning_rate": 3.99076042794354e-05,
      "loss": 0.9872,
      "step": 241100
    },
    {
      "epoch": 0.8450304387597388,
      "grad_norm": 3.09375,
      "learning_rate": 3.99069552507717e-05,
      "loss": 0.853,
      "step": 241110
    },
    {
      "epoch": 0.8450654862666345,
      "grad_norm": 2.609375,
      "learning_rate": 3.9906306222107994e-05,
      "loss": 0.8824,
      "step": 241120
    },
    {
      "epoch": 0.84510053377353,
      "grad_norm": 2.65625,
      "learning_rate": 3.9905657193444296e-05,
      "loss": 0.8646,
      "step": 241130
    },
    {
      "epoch": 0.8451355812804257,
      "grad_norm": 2.5625,
      "learning_rate": 3.99050081647806e-05,
      "loss": 0.8927,
      "step": 241140
    },
    {
      "epoch": 0.8451706287873212,
      "grad_norm": 3.125,
      "learning_rate": 3.9904359136116885e-05,
      "loss": 0.9337,
      "step": 241150
    },
    {
      "epoch": 0.8452056762942168,
      "grad_norm": 2.828125,
      "learning_rate": 3.9903710107453186e-05,
      "loss": 0.8672,
      "step": 241160
    },
    {
      "epoch": 0.8452407238011124,
      "grad_norm": 3.203125,
      "learning_rate": 3.990306107878948e-05,
      "loss": 0.8946,
      "step": 241170
    },
    {
      "epoch": 0.845275771308008,
      "grad_norm": 3.09375,
      "learning_rate": 3.990241205012578e-05,
      "loss": 0.8903,
      "step": 241180
    },
    {
      "epoch": 0.8453108188149036,
      "grad_norm": 2.640625,
      "learning_rate": 3.990176302146208e-05,
      "loss": 0.8806,
      "step": 241190
    },
    {
      "epoch": 0.8453458663217992,
      "grad_norm": 2.921875,
      "learning_rate": 3.990111399279838e-05,
      "loss": 0.9178,
      "step": 241200
    },
    {
      "epoch": 0.8453809138286948,
      "grad_norm": 2.953125,
      "learning_rate": 3.990046496413467e-05,
      "loss": 0.8977,
      "step": 241210
    },
    {
      "epoch": 0.8454159613355904,
      "grad_norm": 3.265625,
      "learning_rate": 3.9899815935470974e-05,
      "loss": 0.9338,
      "step": 241220
    },
    {
      "epoch": 0.845451008842486,
      "grad_norm": 2.625,
      "learning_rate": 3.9899166906807276e-05,
      "loss": 0.8526,
      "step": 241230
    },
    {
      "epoch": 0.8454860563493816,
      "grad_norm": 2.5625,
      "learning_rate": 3.989851787814357e-05,
      "loss": 0.922,
      "step": 241240
    },
    {
      "epoch": 0.8455211038562772,
      "grad_norm": 2.90625,
      "learning_rate": 3.989786884947987e-05,
      "loss": 0.9551,
      "step": 241250
    },
    {
      "epoch": 0.8455561513631727,
      "grad_norm": 2.984375,
      "learning_rate": 3.9897219820816166e-05,
      "loss": 0.9447,
      "step": 241260
    },
    {
      "epoch": 0.8455911988700684,
      "grad_norm": 3.453125,
      "learning_rate": 3.989657079215247e-05,
      "loss": 0.9802,
      "step": 241270
    },
    {
      "epoch": 0.8456262463769639,
      "grad_norm": 3.125,
      "learning_rate": 3.989592176348876e-05,
      "loss": 0.826,
      "step": 241280
    },
    {
      "epoch": 0.8456612938838596,
      "grad_norm": 2.9375,
      "learning_rate": 3.9895272734825064e-05,
      "loss": 0.8762,
      "step": 241290
    },
    {
      "epoch": 0.8456963413907552,
      "grad_norm": 2.4375,
      "learning_rate": 3.989462370616136e-05,
      "loss": 0.9112,
      "step": 241300
    },
    {
      "epoch": 0.8457313888976508,
      "grad_norm": 3.28125,
      "learning_rate": 3.989397467749766e-05,
      "loss": 0.8329,
      "step": 241310
    },
    {
      "epoch": 0.8457664364045464,
      "grad_norm": 3.640625,
      "learning_rate": 3.9893325648833954e-05,
      "loss": 0.8699,
      "step": 241320
    },
    {
      "epoch": 0.8458014839114419,
      "grad_norm": 2.828125,
      "learning_rate": 3.9892676620170256e-05,
      "loss": 0.8882,
      "step": 241330
    },
    {
      "epoch": 0.8458365314183376,
      "grad_norm": 3.34375,
      "learning_rate": 3.989202759150655e-05,
      "loss": 1.0181,
      "step": 241340
    },
    {
      "epoch": 0.8458715789252331,
      "grad_norm": 2.71875,
      "learning_rate": 3.989137856284285e-05,
      "loss": 0.7899,
      "step": 241350
    },
    {
      "epoch": 0.8459066264321288,
      "grad_norm": 2.96875,
      "learning_rate": 3.989072953417915e-05,
      "loss": 0.9479,
      "step": 241360
    },
    {
      "epoch": 0.8459416739390243,
      "grad_norm": 3.40625,
      "learning_rate": 3.989008050551545e-05,
      "loss": 1.116,
      "step": 241370
    },
    {
      "epoch": 0.84597672144592,
      "grad_norm": 2.765625,
      "learning_rate": 3.988943147685175e-05,
      "loss": 0.897,
      "step": 241380
    },
    {
      "epoch": 0.8460117689528155,
      "grad_norm": 2.984375,
      "learning_rate": 3.9888782448188044e-05,
      "loss": 0.8851,
      "step": 241390
    },
    {
      "epoch": 0.8460468164597111,
      "grad_norm": 2.84375,
      "learning_rate": 3.9888133419524345e-05,
      "loss": 0.8509,
      "step": 241400
    },
    {
      "epoch": 0.8460818639666068,
      "grad_norm": 3.015625,
      "learning_rate": 3.988748439086064e-05,
      "loss": 0.8396,
      "step": 241410
    },
    {
      "epoch": 0.8461169114735023,
      "grad_norm": 2.828125,
      "learning_rate": 3.988683536219694e-05,
      "loss": 0.8948,
      "step": 241420
    },
    {
      "epoch": 0.846151958980398,
      "grad_norm": 2.578125,
      "learning_rate": 3.9886186333533236e-05,
      "loss": 0.9142,
      "step": 241430
    },
    {
      "epoch": 0.8461870064872935,
      "grad_norm": 3.828125,
      "learning_rate": 3.988553730486954e-05,
      "loss": 0.8243,
      "step": 241440
    },
    {
      "epoch": 0.8462220539941891,
      "grad_norm": 2.921875,
      "learning_rate": 3.988488827620583e-05,
      "loss": 0.9218,
      "step": 241450
    },
    {
      "epoch": 0.8462571015010847,
      "grad_norm": 2.671875,
      "learning_rate": 3.988423924754213e-05,
      "loss": 0.9068,
      "step": 241460
    },
    {
      "epoch": 0.8462921490079803,
      "grad_norm": 2.78125,
      "learning_rate": 3.988359021887843e-05,
      "loss": 0.8831,
      "step": 241470
    },
    {
      "epoch": 0.8463271965148759,
      "grad_norm": 3.125,
      "learning_rate": 3.988294119021473e-05,
      "loss": 0.7872,
      "step": 241480
    },
    {
      "epoch": 0.8463622440217715,
      "grad_norm": 2.921875,
      "learning_rate": 3.9882292161551024e-05,
      "loss": 0.9021,
      "step": 241490
    },
    {
      "epoch": 0.8463972915286672,
      "grad_norm": 3.46875,
      "learning_rate": 3.9881643132887325e-05,
      "loss": 0.9261,
      "step": 241500
    },
    {
      "epoch": 0.8464323390355627,
      "grad_norm": 2.9375,
      "learning_rate": 3.9880994104223626e-05,
      "loss": 0.8407,
      "step": 241510
    },
    {
      "epoch": 0.8464673865424583,
      "grad_norm": 2.453125,
      "learning_rate": 3.9880345075559914e-05,
      "loss": 0.8351,
      "step": 241520
    },
    {
      "epoch": 0.8465024340493539,
      "grad_norm": 2.875,
      "learning_rate": 3.9879696046896216e-05,
      "loss": 0.9424,
      "step": 241530
    },
    {
      "epoch": 0.8465374815562495,
      "grad_norm": 2.890625,
      "learning_rate": 3.987904701823251e-05,
      "loss": 0.8764,
      "step": 241540
    },
    {
      "epoch": 0.8465725290631451,
      "grad_norm": 3.203125,
      "learning_rate": 3.987839798956881e-05,
      "loss": 0.9225,
      "step": 241550
    },
    {
      "epoch": 0.8466075765700407,
      "grad_norm": 3.625,
      "learning_rate": 3.9877748960905106e-05,
      "loss": 0.8128,
      "step": 241560
    },
    {
      "epoch": 0.8466426240769362,
      "grad_norm": 2.53125,
      "learning_rate": 3.987709993224141e-05,
      "loss": 0.9101,
      "step": 241570
    },
    {
      "epoch": 0.8466776715838319,
      "grad_norm": 2.5,
      "learning_rate": 3.98764509035777e-05,
      "loss": 0.8957,
      "step": 241580
    },
    {
      "epoch": 0.8467127190907274,
      "grad_norm": 2.859375,
      "learning_rate": 3.9875801874914004e-05,
      "loss": 1.0632,
      "step": 241590
    },
    {
      "epoch": 0.8467477665976231,
      "grad_norm": 2.53125,
      "learning_rate": 3.9875152846250305e-05,
      "loss": 0.8341,
      "step": 241600
    },
    {
      "epoch": 0.8467828141045187,
      "grad_norm": 2.96875,
      "learning_rate": 3.98745038175866e-05,
      "loss": 0.8919,
      "step": 241610
    },
    {
      "epoch": 0.8468178616114143,
      "grad_norm": 2.828125,
      "learning_rate": 3.98738547889229e-05,
      "loss": 0.8731,
      "step": 241620
    },
    {
      "epoch": 0.8468529091183099,
      "grad_norm": 3.28125,
      "learning_rate": 3.9873205760259196e-05,
      "loss": 0.9315,
      "step": 241630
    },
    {
      "epoch": 0.8468879566252054,
      "grad_norm": 2.625,
      "learning_rate": 3.98725567315955e-05,
      "loss": 0.8516,
      "step": 241640
    },
    {
      "epoch": 0.8469230041321011,
      "grad_norm": 3.3125,
      "learning_rate": 3.987190770293179e-05,
      "loss": 0.8849,
      "step": 241650
    },
    {
      "epoch": 0.8469580516389966,
      "grad_norm": 2.96875,
      "learning_rate": 3.987125867426809e-05,
      "loss": 0.8466,
      "step": 241660
    },
    {
      "epoch": 0.8469930991458923,
      "grad_norm": 3.421875,
      "learning_rate": 3.987060964560439e-05,
      "loss": 0.8662,
      "step": 241670
    },
    {
      "epoch": 0.8470281466527878,
      "grad_norm": 2.890625,
      "learning_rate": 3.986996061694069e-05,
      "loss": 0.9418,
      "step": 241680
    },
    {
      "epoch": 0.8470631941596835,
      "grad_norm": 3.046875,
      "learning_rate": 3.9869311588276984e-05,
      "loss": 0.87,
      "step": 241690
    },
    {
      "epoch": 0.8470982416665791,
      "grad_norm": 2.96875,
      "learning_rate": 3.9868662559613285e-05,
      "loss": 0.8709,
      "step": 241700
    },
    {
      "epoch": 0.8471332891734746,
      "grad_norm": 3.140625,
      "learning_rate": 3.986801353094958e-05,
      "loss": 0.8173,
      "step": 241710
    },
    {
      "epoch": 0.8471683366803703,
      "grad_norm": 3.0625,
      "learning_rate": 3.986736450228588e-05,
      "loss": 0.8974,
      "step": 241720
    },
    {
      "epoch": 0.8472033841872658,
      "grad_norm": 2.875,
      "learning_rate": 3.986671547362218e-05,
      "loss": 0.8382,
      "step": 241730
    },
    {
      "epoch": 0.8472384316941615,
      "grad_norm": 3.0625,
      "learning_rate": 3.986606644495848e-05,
      "loss": 0.9398,
      "step": 241740
    },
    {
      "epoch": 0.847273479201057,
      "grad_norm": 3.046875,
      "learning_rate": 3.986541741629478e-05,
      "loss": 0.9242,
      "step": 241750
    },
    {
      "epoch": 0.8473085267079526,
      "grad_norm": 3.015625,
      "learning_rate": 3.986476838763107e-05,
      "loss": 0.9906,
      "step": 241760
    },
    {
      "epoch": 0.8473435742148482,
      "grad_norm": 2.828125,
      "learning_rate": 3.9864119358967374e-05,
      "loss": 0.9147,
      "step": 241770
    },
    {
      "epoch": 0.8473786217217438,
      "grad_norm": 3.25,
      "learning_rate": 3.986347033030367e-05,
      "loss": 0.8727,
      "step": 241780
    },
    {
      "epoch": 0.8474136692286394,
      "grad_norm": 3.0625,
      "learning_rate": 3.986282130163997e-05,
      "loss": 0.8193,
      "step": 241790
    },
    {
      "epoch": 0.847448716735535,
      "grad_norm": 3.25,
      "learning_rate": 3.9862172272976265e-05,
      "loss": 0.8621,
      "step": 241800
    },
    {
      "epoch": 0.8474837642424307,
      "grad_norm": 2.859375,
      "learning_rate": 3.9861523244312566e-05,
      "loss": 0.879,
      "step": 241810
    },
    {
      "epoch": 0.8475188117493262,
      "grad_norm": 3.4375,
      "learning_rate": 3.986087421564886e-05,
      "loss": 0.9201,
      "step": 241820
    },
    {
      "epoch": 0.8475538592562218,
      "grad_norm": 3.09375,
      "learning_rate": 3.986022518698516e-05,
      "loss": 0.8684,
      "step": 241830
    },
    {
      "epoch": 0.8475889067631174,
      "grad_norm": 2.8125,
      "learning_rate": 3.985957615832146e-05,
      "loss": 0.8655,
      "step": 241840
    },
    {
      "epoch": 0.847623954270013,
      "grad_norm": 2.9375,
      "learning_rate": 3.985892712965776e-05,
      "loss": 0.8553,
      "step": 241850
    },
    {
      "epoch": 0.8476590017769086,
      "grad_norm": 2.640625,
      "learning_rate": 3.985827810099405e-05,
      "loss": 0.8842,
      "step": 241860
    },
    {
      "epoch": 0.8476940492838042,
      "grad_norm": 2.875,
      "learning_rate": 3.9857629072330354e-05,
      "loss": 0.8645,
      "step": 241870
    },
    {
      "epoch": 0.8477290967906997,
      "grad_norm": 3.1875,
      "learning_rate": 3.9856980043666656e-05,
      "loss": 0.9446,
      "step": 241880
    },
    {
      "epoch": 0.8477641442975954,
      "grad_norm": 3.859375,
      "learning_rate": 3.985633101500295e-05,
      "loss": 0.9405,
      "step": 241890
    },
    {
      "epoch": 0.847799191804491,
      "grad_norm": 3.25,
      "learning_rate": 3.9855681986339245e-05,
      "loss": 0.9439,
      "step": 241900
    },
    {
      "epoch": 0.8478342393113866,
      "grad_norm": 3.078125,
      "learning_rate": 3.985503295767554e-05,
      "loss": 0.838,
      "step": 241910
    },
    {
      "epoch": 0.8478692868182822,
      "grad_norm": 2.890625,
      "learning_rate": 3.985438392901184e-05,
      "loss": 0.8471,
      "step": 241920
    },
    {
      "epoch": 0.8479043343251778,
      "grad_norm": 3.125,
      "learning_rate": 3.9853734900348136e-05,
      "loss": 0.8624,
      "step": 241930
    },
    {
      "epoch": 0.8479393818320734,
      "grad_norm": 3.078125,
      "learning_rate": 3.985308587168444e-05,
      "loss": 0.9289,
      "step": 241940
    },
    {
      "epoch": 0.8479744293389689,
      "grad_norm": 3.265625,
      "learning_rate": 3.985243684302074e-05,
      "loss": 0.8966,
      "step": 241950
    },
    {
      "epoch": 0.8480094768458646,
      "grad_norm": 2.984375,
      "learning_rate": 3.985178781435703e-05,
      "loss": 0.9642,
      "step": 241960
    },
    {
      "epoch": 0.8480445243527601,
      "grad_norm": 2.984375,
      "learning_rate": 3.9851138785693334e-05,
      "loss": 0.8505,
      "step": 241970
    },
    {
      "epoch": 0.8480795718596558,
      "grad_norm": 2.59375,
      "learning_rate": 3.985048975702963e-05,
      "loss": 0.795,
      "step": 241980
    },
    {
      "epoch": 0.8481146193665514,
      "grad_norm": 3.203125,
      "learning_rate": 3.984984072836593e-05,
      "loss": 0.8879,
      "step": 241990
    },
    {
      "epoch": 0.848149666873447,
      "grad_norm": 3.203125,
      "learning_rate": 3.9849191699702225e-05,
      "loss": 0.9351,
      "step": 242000
    },
    {
      "epoch": 0.8481847143803426,
      "grad_norm": 2.96875,
      "learning_rate": 3.9848542671038526e-05,
      "loss": 0.8574,
      "step": 242010
    },
    {
      "epoch": 0.8482197618872381,
      "grad_norm": 2.859375,
      "learning_rate": 3.984789364237482e-05,
      "loss": 0.8974,
      "step": 242020
    },
    {
      "epoch": 0.8482548093941338,
      "grad_norm": 2.96875,
      "learning_rate": 3.984724461371112e-05,
      "loss": 0.8965,
      "step": 242030
    },
    {
      "epoch": 0.8482898569010293,
      "grad_norm": 3.1875,
      "learning_rate": 3.984659558504742e-05,
      "loss": 0.8705,
      "step": 242040
    },
    {
      "epoch": 0.848324904407925,
      "grad_norm": 2.640625,
      "learning_rate": 3.984594655638372e-05,
      "loss": 0.8487,
      "step": 242050
    },
    {
      "epoch": 0.8483599519148205,
      "grad_norm": 3.03125,
      "learning_rate": 3.984529752772001e-05,
      "loss": 0.8826,
      "step": 242060
    },
    {
      "epoch": 0.8483949994217161,
      "grad_norm": 2.609375,
      "learning_rate": 3.9844648499056314e-05,
      "loss": 0.9068,
      "step": 242070
    },
    {
      "epoch": 0.8484300469286117,
      "grad_norm": 3.171875,
      "learning_rate": 3.984399947039261e-05,
      "loss": 0.9122,
      "step": 242080
    },
    {
      "epoch": 0.8484650944355073,
      "grad_norm": 3.234375,
      "learning_rate": 3.984335044172891e-05,
      "loss": 0.8827,
      "step": 242090
    },
    {
      "epoch": 0.848500141942403,
      "grad_norm": 2.875,
      "learning_rate": 3.984270141306521e-05,
      "loss": 0.7755,
      "step": 242100
    },
    {
      "epoch": 0.8485351894492985,
      "grad_norm": 3.015625,
      "learning_rate": 3.9842052384401506e-05,
      "loss": 0.9208,
      "step": 242110
    },
    {
      "epoch": 0.8485702369561942,
      "grad_norm": 2.546875,
      "learning_rate": 3.984140335573781e-05,
      "loss": 0.8784,
      "step": 242120
    },
    {
      "epoch": 0.8486052844630897,
      "grad_norm": 3.125,
      "learning_rate": 3.98407543270741e-05,
      "loss": 0.9714,
      "step": 242130
    },
    {
      "epoch": 0.8486403319699853,
      "grad_norm": 3.203125,
      "learning_rate": 3.9840105298410404e-05,
      "loss": 0.8516,
      "step": 242140
    },
    {
      "epoch": 0.8486753794768809,
      "grad_norm": 3.21875,
      "learning_rate": 3.98394562697467e-05,
      "loss": 0.863,
      "step": 242150
    },
    {
      "epoch": 0.8487104269837765,
      "grad_norm": 3.109375,
      "learning_rate": 3.9838807241083e-05,
      "loss": 0.8468,
      "step": 242160
    },
    {
      "epoch": 0.8487454744906721,
      "grad_norm": 3.078125,
      "learning_rate": 3.9838158212419294e-05,
      "loss": 0.9236,
      "step": 242170
    },
    {
      "epoch": 0.8487805219975677,
      "grad_norm": 3.046875,
      "learning_rate": 3.9837509183755596e-05,
      "loss": 0.8044,
      "step": 242180
    },
    {
      "epoch": 0.8488155695044634,
      "grad_norm": 2.953125,
      "learning_rate": 3.983686015509189e-05,
      "loss": 0.9308,
      "step": 242190
    },
    {
      "epoch": 0.8488506170113589,
      "grad_norm": 2.640625,
      "learning_rate": 3.983621112642819e-05,
      "loss": 0.8633,
      "step": 242200
    },
    {
      "epoch": 0.8488856645182545,
      "grad_norm": 2.703125,
      "learning_rate": 3.9835562097764486e-05,
      "loss": 0.921,
      "step": 242210
    },
    {
      "epoch": 0.8489207120251501,
      "grad_norm": 2.375,
      "learning_rate": 3.983491306910079e-05,
      "loss": 0.8822,
      "step": 242220
    },
    {
      "epoch": 0.8489557595320457,
      "grad_norm": 2.796875,
      "learning_rate": 3.983426404043709e-05,
      "loss": 0.8235,
      "step": 242230
    },
    {
      "epoch": 0.8489908070389413,
      "grad_norm": 3.125,
      "learning_rate": 3.9833615011773384e-05,
      "loss": 0.7848,
      "step": 242240
    },
    {
      "epoch": 0.8490258545458369,
      "grad_norm": 3.03125,
      "learning_rate": 3.9832965983109685e-05,
      "loss": 0.8333,
      "step": 242250
    },
    {
      "epoch": 0.8490609020527324,
      "grad_norm": 2.9375,
      "learning_rate": 3.983231695444598e-05,
      "loss": 0.9227,
      "step": 242260
    },
    {
      "epoch": 0.8490959495596281,
      "grad_norm": 2.859375,
      "learning_rate": 3.983166792578228e-05,
      "loss": 0.9022,
      "step": 242270
    },
    {
      "epoch": 0.8491309970665236,
      "grad_norm": 3.484375,
      "learning_rate": 3.983101889711857e-05,
      "loss": 0.9545,
      "step": 242280
    },
    {
      "epoch": 0.8491660445734193,
      "grad_norm": 2.90625,
      "learning_rate": 3.983036986845487e-05,
      "loss": 0.8359,
      "step": 242290
    },
    {
      "epoch": 0.8492010920803149,
      "grad_norm": 2.75,
      "learning_rate": 3.9829720839791165e-05,
      "loss": 0.8478,
      "step": 242300
    },
    {
      "epoch": 0.8492361395872104,
      "grad_norm": 3.109375,
      "learning_rate": 3.9829071811127466e-05,
      "loss": 0.8987,
      "step": 242310
    },
    {
      "epoch": 0.8492711870941061,
      "grad_norm": 2.796875,
      "learning_rate": 3.982842278246377e-05,
      "loss": 0.8025,
      "step": 242320
    },
    {
      "epoch": 0.8493062346010016,
      "grad_norm": 2.75,
      "learning_rate": 3.982777375380006e-05,
      "loss": 0.9339,
      "step": 242330
    },
    {
      "epoch": 0.8493412821078973,
      "grad_norm": 3.203125,
      "learning_rate": 3.9827124725136364e-05,
      "loss": 0.9068,
      "step": 242340
    },
    {
      "epoch": 0.8493763296147928,
      "grad_norm": 3.0625,
      "learning_rate": 3.982647569647266e-05,
      "loss": 0.8567,
      "step": 242350
    },
    {
      "epoch": 0.8494113771216885,
      "grad_norm": 3.140625,
      "learning_rate": 3.982582666780896e-05,
      "loss": 0.9193,
      "step": 242360
    },
    {
      "epoch": 0.849446424628584,
      "grad_norm": 3.265625,
      "learning_rate": 3.9825177639145254e-05,
      "loss": 0.8395,
      "step": 242370
    },
    {
      "epoch": 0.8494814721354796,
      "grad_norm": 3.03125,
      "learning_rate": 3.9824528610481556e-05,
      "loss": 0.8748,
      "step": 242380
    },
    {
      "epoch": 0.8495165196423753,
      "grad_norm": 3.53125,
      "learning_rate": 3.982387958181785e-05,
      "loss": 0.9229,
      "step": 242390
    },
    {
      "epoch": 0.8495515671492708,
      "grad_norm": 3.09375,
      "learning_rate": 3.982323055315415e-05,
      "loss": 0.8363,
      "step": 242400
    },
    {
      "epoch": 0.8495866146561665,
      "grad_norm": 3.078125,
      "learning_rate": 3.9822581524490446e-05,
      "loss": 0.975,
      "step": 242410
    },
    {
      "epoch": 0.849621662163062,
      "grad_norm": 3.375,
      "learning_rate": 3.982193249582675e-05,
      "loss": 0.9851,
      "step": 242420
    },
    {
      "epoch": 0.8496567096699577,
      "grad_norm": 2.578125,
      "learning_rate": 3.982128346716304e-05,
      "loss": 0.9038,
      "step": 242430
    },
    {
      "epoch": 0.8496917571768532,
      "grad_norm": 2.625,
      "learning_rate": 3.9820634438499344e-05,
      "loss": 0.9202,
      "step": 242440
    },
    {
      "epoch": 0.8497268046837488,
      "grad_norm": 2.828125,
      "learning_rate": 3.981998540983564e-05,
      "loss": 0.8847,
      "step": 242450
    },
    {
      "epoch": 0.8497618521906444,
      "grad_norm": 2.796875,
      "learning_rate": 3.981933638117194e-05,
      "loss": 0.9162,
      "step": 242460
    },
    {
      "epoch": 0.84979689969754,
      "grad_norm": 3.078125,
      "learning_rate": 3.981868735250824e-05,
      "loss": 0.8654,
      "step": 242470
    },
    {
      "epoch": 0.8498319472044357,
      "grad_norm": 2.734375,
      "learning_rate": 3.9818038323844536e-05,
      "loss": 0.7603,
      "step": 242480
    },
    {
      "epoch": 0.8498669947113312,
      "grad_norm": 3.578125,
      "learning_rate": 3.981738929518084e-05,
      "loss": 0.9293,
      "step": 242490
    },
    {
      "epoch": 0.8499020422182268,
      "grad_norm": 2.90625,
      "learning_rate": 3.981674026651713e-05,
      "loss": 0.9601,
      "step": 242500
    },
    {
      "epoch": 0.8499370897251224,
      "grad_norm": 3.359375,
      "learning_rate": 3.981609123785343e-05,
      "loss": 0.9141,
      "step": 242510
    },
    {
      "epoch": 0.849972137232018,
      "grad_norm": 3.078125,
      "learning_rate": 3.981544220918973e-05,
      "loss": 0.9192,
      "step": 242520
    },
    {
      "epoch": 0.8500071847389136,
      "grad_norm": 2.59375,
      "learning_rate": 3.981479318052603e-05,
      "loss": 0.9242,
      "step": 242530
    },
    {
      "epoch": 0.8500422322458092,
      "grad_norm": 2.96875,
      "learning_rate": 3.9814144151862324e-05,
      "loss": 0.868,
      "step": 242540
    },
    {
      "epoch": 0.8500772797527048,
      "grad_norm": 3.0625,
      "learning_rate": 3.9813495123198625e-05,
      "loss": 0.8168,
      "step": 242550
    },
    {
      "epoch": 0.8501123272596004,
      "grad_norm": 2.921875,
      "learning_rate": 3.981284609453492e-05,
      "loss": 0.948,
      "step": 242560
    },
    {
      "epoch": 0.8501473747664959,
      "grad_norm": 3.6875,
      "learning_rate": 3.981219706587122e-05,
      "loss": 0.8657,
      "step": 242570
    },
    {
      "epoch": 0.8501824222733916,
      "grad_norm": 3.296875,
      "learning_rate": 3.9811548037207516e-05,
      "loss": 0.8658,
      "step": 242580
    },
    {
      "epoch": 0.8502174697802872,
      "grad_norm": 3.125,
      "learning_rate": 3.981089900854382e-05,
      "loss": 0.848,
      "step": 242590
    },
    {
      "epoch": 0.8502525172871828,
      "grad_norm": 3.03125,
      "learning_rate": 3.981024997988012e-05,
      "loss": 0.9049,
      "step": 242600
    },
    {
      "epoch": 0.8502875647940784,
      "grad_norm": 3.25,
      "learning_rate": 3.980960095121641e-05,
      "loss": 0.9378,
      "step": 242610
    },
    {
      "epoch": 0.850322612300974,
      "grad_norm": 2.984375,
      "learning_rate": 3.9808951922552715e-05,
      "loss": 0.8196,
      "step": 242620
    },
    {
      "epoch": 0.8503576598078696,
      "grad_norm": 3.0,
      "learning_rate": 3.980830289388901e-05,
      "loss": 0.8875,
      "step": 242630
    },
    {
      "epoch": 0.8503927073147651,
      "grad_norm": 3.09375,
      "learning_rate": 3.980765386522531e-05,
      "loss": 0.8577,
      "step": 242640
    },
    {
      "epoch": 0.8504277548216608,
      "grad_norm": 3.25,
      "learning_rate": 3.98070048365616e-05,
      "loss": 0.8622,
      "step": 242650
    },
    {
      "epoch": 0.8504628023285563,
      "grad_norm": 3.265625,
      "learning_rate": 3.98063558078979e-05,
      "loss": 0.8178,
      "step": 242660
    },
    {
      "epoch": 0.850497849835452,
      "grad_norm": 3.359375,
      "learning_rate": 3.9805706779234194e-05,
      "loss": 0.8945,
      "step": 242670
    },
    {
      "epoch": 0.8505328973423476,
      "grad_norm": 2.5,
      "learning_rate": 3.9805057750570496e-05,
      "loss": 0.8383,
      "step": 242680
    },
    {
      "epoch": 0.8505679448492431,
      "grad_norm": 3.0,
      "learning_rate": 3.98044087219068e-05,
      "loss": 0.9115,
      "step": 242690
    },
    {
      "epoch": 0.8506029923561388,
      "grad_norm": 2.578125,
      "learning_rate": 3.980375969324309e-05,
      "loss": 0.8326,
      "step": 242700
    },
    {
      "epoch": 0.8506380398630343,
      "grad_norm": 3.15625,
      "learning_rate": 3.980311066457939e-05,
      "loss": 0.9102,
      "step": 242710
    },
    {
      "epoch": 0.85067308736993,
      "grad_norm": 2.625,
      "learning_rate": 3.980246163591569e-05,
      "loss": 0.8294,
      "step": 242720
    },
    {
      "epoch": 0.8507081348768255,
      "grad_norm": 3.046875,
      "learning_rate": 3.980181260725199e-05,
      "loss": 0.9036,
      "step": 242730
    },
    {
      "epoch": 0.8507431823837212,
      "grad_norm": 2.703125,
      "learning_rate": 3.9801163578588284e-05,
      "loss": 0.8919,
      "step": 242740
    },
    {
      "epoch": 0.8507782298906167,
      "grad_norm": 2.78125,
      "learning_rate": 3.9800514549924585e-05,
      "loss": 0.8269,
      "step": 242750
    },
    {
      "epoch": 0.8508132773975123,
      "grad_norm": 2.765625,
      "learning_rate": 3.979986552126088e-05,
      "loss": 0.8807,
      "step": 242760
    },
    {
      "epoch": 0.8508483249044079,
      "grad_norm": 3.09375,
      "learning_rate": 3.979921649259718e-05,
      "loss": 0.849,
      "step": 242770
    },
    {
      "epoch": 0.8508833724113035,
      "grad_norm": 2.5625,
      "learning_rate": 3.9798567463933476e-05,
      "loss": 0.9429,
      "step": 242780
    },
    {
      "epoch": 0.8509184199181992,
      "grad_norm": 2.984375,
      "learning_rate": 3.979791843526978e-05,
      "loss": 0.9037,
      "step": 242790
    },
    {
      "epoch": 0.8509534674250947,
      "grad_norm": 3.3125,
      "learning_rate": 3.979726940660607e-05,
      "loss": 0.9311,
      "step": 242800
    },
    {
      "epoch": 0.8509885149319903,
      "grad_norm": 3.15625,
      "learning_rate": 3.979662037794237e-05,
      "loss": 0.8495,
      "step": 242810
    },
    {
      "epoch": 0.8510235624388859,
      "grad_norm": 3.03125,
      "learning_rate": 3.979597134927867e-05,
      "loss": 0.9374,
      "step": 242820
    },
    {
      "epoch": 0.8510586099457815,
      "grad_norm": 2.828125,
      "learning_rate": 3.979532232061497e-05,
      "loss": 0.8686,
      "step": 242830
    },
    {
      "epoch": 0.8510936574526771,
      "grad_norm": 2.921875,
      "learning_rate": 3.979467329195127e-05,
      "loss": 0.9563,
      "step": 242840
    },
    {
      "epoch": 0.8511287049595727,
      "grad_norm": 3.34375,
      "learning_rate": 3.9794024263287565e-05,
      "loss": 0.8977,
      "step": 242850
    },
    {
      "epoch": 0.8511637524664682,
      "grad_norm": 2.390625,
      "learning_rate": 3.979337523462387e-05,
      "loss": 0.8462,
      "step": 242860
    },
    {
      "epoch": 0.8511987999733639,
      "grad_norm": 2.90625,
      "learning_rate": 3.979272620596016e-05,
      "loss": 0.9413,
      "step": 242870
    },
    {
      "epoch": 0.8512338474802595,
      "grad_norm": 2.515625,
      "learning_rate": 3.979207717729646e-05,
      "loss": 0.8428,
      "step": 242880
    },
    {
      "epoch": 0.8512688949871551,
      "grad_norm": 2.796875,
      "learning_rate": 3.979142814863276e-05,
      "loss": 0.9535,
      "step": 242890
    },
    {
      "epoch": 0.8513039424940507,
      "grad_norm": 2.515625,
      "learning_rate": 3.979077911996906e-05,
      "loss": 0.8829,
      "step": 242900
    },
    {
      "epoch": 0.8513389900009463,
      "grad_norm": 3.4375,
      "learning_rate": 3.979013009130535e-05,
      "loss": 0.9381,
      "step": 242910
    },
    {
      "epoch": 0.8513740375078419,
      "grad_norm": 3.125,
      "learning_rate": 3.9789481062641655e-05,
      "loss": 0.9104,
      "step": 242920
    },
    {
      "epoch": 0.8514090850147374,
      "grad_norm": 3.1875,
      "learning_rate": 3.978883203397795e-05,
      "loss": 0.924,
      "step": 242930
    },
    {
      "epoch": 0.8514441325216331,
      "grad_norm": 2.875,
      "learning_rate": 3.978818300531425e-05,
      "loss": 0.8923,
      "step": 242940
    },
    {
      "epoch": 0.8514791800285286,
      "grad_norm": 2.78125,
      "learning_rate": 3.9787533976650545e-05,
      "loss": 0.9585,
      "step": 242950
    },
    {
      "epoch": 0.8515142275354243,
      "grad_norm": 3.171875,
      "learning_rate": 3.978688494798685e-05,
      "loss": 0.866,
      "step": 242960
    },
    {
      "epoch": 0.8515492750423199,
      "grad_norm": 2.96875,
      "learning_rate": 3.978623591932315e-05,
      "loss": 0.9129,
      "step": 242970
    },
    {
      "epoch": 0.8515843225492155,
      "grad_norm": 3.40625,
      "learning_rate": 3.978558689065944e-05,
      "loss": 0.9046,
      "step": 242980
    },
    {
      "epoch": 0.8516193700561111,
      "grad_norm": 3.21875,
      "learning_rate": 3.9784937861995744e-05,
      "loss": 0.8807,
      "step": 242990
    },
    {
      "epoch": 0.8516544175630066,
      "grad_norm": 3.546875,
      "learning_rate": 3.978428883333204e-05,
      "loss": 1.0253,
      "step": 243000
    },
    {
      "epoch": 0.8516894650699023,
      "grad_norm": 3.28125,
      "learning_rate": 3.978363980466834e-05,
      "loss": 0.8626,
      "step": 243010
    },
    {
      "epoch": 0.8517245125767978,
      "grad_norm": 2.359375,
      "learning_rate": 3.9782990776004635e-05,
      "loss": 0.9199,
      "step": 243020
    },
    {
      "epoch": 0.8517595600836935,
      "grad_norm": 3.140625,
      "learning_rate": 3.978234174734093e-05,
      "loss": 0.8485,
      "step": 243030
    },
    {
      "epoch": 0.851794607590589,
      "grad_norm": 2.546875,
      "learning_rate": 3.9781692718677224e-05,
      "loss": 0.8542,
      "step": 243040
    },
    {
      "epoch": 0.8518296550974847,
      "grad_norm": 3.0,
      "learning_rate": 3.9781043690013525e-05,
      "loss": 0.8711,
      "step": 243050
    },
    {
      "epoch": 0.8518647026043802,
      "grad_norm": 2.78125,
      "learning_rate": 3.978039466134983e-05,
      "loss": 0.908,
      "step": 243060
    },
    {
      "epoch": 0.8518997501112758,
      "grad_norm": 2.9375,
      "learning_rate": 3.977974563268612e-05,
      "loss": 0.8847,
      "step": 243070
    },
    {
      "epoch": 0.8519347976181715,
      "grad_norm": 3.1875,
      "learning_rate": 3.977909660402242e-05,
      "loss": 0.9174,
      "step": 243080
    },
    {
      "epoch": 0.851969845125067,
      "grad_norm": 2.671875,
      "learning_rate": 3.977844757535872e-05,
      "loss": 0.8536,
      "step": 243090
    },
    {
      "epoch": 0.8520048926319627,
      "grad_norm": 3.234375,
      "learning_rate": 3.977779854669502e-05,
      "loss": 0.8722,
      "step": 243100
    },
    {
      "epoch": 0.8520399401388582,
      "grad_norm": 3.40625,
      "learning_rate": 3.977714951803131e-05,
      "loss": 0.9288,
      "step": 243110
    },
    {
      "epoch": 0.8520749876457538,
      "grad_norm": 3.09375,
      "learning_rate": 3.9776500489367615e-05,
      "loss": 0.8793,
      "step": 243120
    },
    {
      "epoch": 0.8521100351526494,
      "grad_norm": 2.953125,
      "learning_rate": 3.977585146070391e-05,
      "loss": 0.8854,
      "step": 243130
    },
    {
      "epoch": 0.852145082659545,
      "grad_norm": 3.0625,
      "learning_rate": 3.977520243204021e-05,
      "loss": 0.9321,
      "step": 243140
    },
    {
      "epoch": 0.8521801301664406,
      "grad_norm": 2.703125,
      "learning_rate": 3.9774553403376505e-05,
      "loss": 0.9003,
      "step": 243150
    },
    {
      "epoch": 0.8522151776733362,
      "grad_norm": 3.328125,
      "learning_rate": 3.977390437471281e-05,
      "loss": 0.9901,
      "step": 243160
    },
    {
      "epoch": 0.8522502251802319,
      "grad_norm": 3.015625,
      "learning_rate": 3.97732553460491e-05,
      "loss": 0.8481,
      "step": 243170
    },
    {
      "epoch": 0.8522852726871274,
      "grad_norm": 2.75,
      "learning_rate": 3.97726063173854e-05,
      "loss": 0.7543,
      "step": 243180
    },
    {
      "epoch": 0.852320320194023,
      "grad_norm": 3.109375,
      "learning_rate": 3.9771957288721704e-05,
      "loss": 0.9198,
      "step": 243190
    },
    {
      "epoch": 0.8523553677009186,
      "grad_norm": 3.234375,
      "learning_rate": 3.9771308260058e-05,
      "loss": 0.9222,
      "step": 243200
    },
    {
      "epoch": 0.8523904152078142,
      "grad_norm": 2.5,
      "learning_rate": 3.97706592313943e-05,
      "loss": 0.8653,
      "step": 243210
    },
    {
      "epoch": 0.8524254627147098,
      "grad_norm": 3.296875,
      "learning_rate": 3.9770010202730595e-05,
      "loss": 0.8535,
      "step": 243220
    },
    {
      "epoch": 0.8524605102216054,
      "grad_norm": 3.3125,
      "learning_rate": 3.9769361174066896e-05,
      "loss": 0.8857,
      "step": 243230
    },
    {
      "epoch": 0.8524955577285009,
      "grad_norm": 2.84375,
      "learning_rate": 3.976871214540319e-05,
      "loss": 0.8263,
      "step": 243240
    },
    {
      "epoch": 0.8525306052353966,
      "grad_norm": 2.828125,
      "learning_rate": 3.976806311673949e-05,
      "loss": 0.8595,
      "step": 243250
    },
    {
      "epoch": 0.8525656527422921,
      "grad_norm": 2.578125,
      "learning_rate": 3.976741408807579e-05,
      "loss": 0.9571,
      "step": 243260
    },
    {
      "epoch": 0.8526007002491878,
      "grad_norm": 3.359375,
      "learning_rate": 3.976676505941209e-05,
      "loss": 0.8717,
      "step": 243270
    },
    {
      "epoch": 0.8526357477560834,
      "grad_norm": 2.734375,
      "learning_rate": 3.976611603074838e-05,
      "loss": 0.9252,
      "step": 243280
    },
    {
      "epoch": 0.852670795262979,
      "grad_norm": 2.671875,
      "learning_rate": 3.9765467002084684e-05,
      "loss": 0.8451,
      "step": 243290
    },
    {
      "epoch": 0.8527058427698746,
      "grad_norm": 3.203125,
      "learning_rate": 3.976481797342098e-05,
      "loss": 0.881,
      "step": 243300
    },
    {
      "epoch": 0.8527408902767701,
      "grad_norm": 3.09375,
      "learning_rate": 3.976416894475728e-05,
      "loss": 0.9042,
      "step": 243310
    },
    {
      "epoch": 0.8527759377836658,
      "grad_norm": 2.96875,
      "learning_rate": 3.9763519916093575e-05,
      "loss": 0.8569,
      "step": 243320
    },
    {
      "epoch": 0.8528109852905613,
      "grad_norm": 3.046875,
      "learning_rate": 3.9762870887429876e-05,
      "loss": 0.9127,
      "step": 243330
    },
    {
      "epoch": 0.852846032797457,
      "grad_norm": 2.546875,
      "learning_rate": 3.976222185876618e-05,
      "loss": 0.821,
      "step": 243340
    },
    {
      "epoch": 0.8528810803043525,
      "grad_norm": 2.328125,
      "learning_rate": 3.976157283010247e-05,
      "loss": 0.8894,
      "step": 243350
    },
    {
      "epoch": 0.8529161278112481,
      "grad_norm": 2.9375,
      "learning_rate": 3.9760923801438773e-05,
      "loss": 0.8238,
      "step": 243360
    },
    {
      "epoch": 0.8529511753181438,
      "grad_norm": 2.84375,
      "learning_rate": 3.976027477277507e-05,
      "loss": 0.8623,
      "step": 243370
    },
    {
      "epoch": 0.8529862228250393,
      "grad_norm": 2.9375,
      "learning_rate": 3.975962574411137e-05,
      "loss": 0.8618,
      "step": 243380
    },
    {
      "epoch": 0.853021270331935,
      "grad_norm": 2.734375,
      "learning_rate": 3.9758976715447664e-05,
      "loss": 0.8555,
      "step": 243390
    },
    {
      "epoch": 0.8530563178388305,
      "grad_norm": 3.03125,
      "learning_rate": 3.9758327686783965e-05,
      "loss": 0.9112,
      "step": 243400
    },
    {
      "epoch": 0.8530913653457262,
      "grad_norm": 2.796875,
      "learning_rate": 3.975767865812025e-05,
      "loss": 0.9313,
      "step": 243410
    },
    {
      "epoch": 0.8531264128526217,
      "grad_norm": 3.28125,
      "learning_rate": 3.9757029629456555e-05,
      "loss": 0.9229,
      "step": 243420
    },
    {
      "epoch": 0.8531614603595173,
      "grad_norm": 3.1875,
      "learning_rate": 3.9756380600792856e-05,
      "loss": 0.8717,
      "step": 243430
    },
    {
      "epoch": 0.8531965078664129,
      "grad_norm": 3.171875,
      "learning_rate": 3.975573157212915e-05,
      "loss": 0.8343,
      "step": 243440
    },
    {
      "epoch": 0.8532315553733085,
      "grad_norm": 3.015625,
      "learning_rate": 3.975508254346545e-05,
      "loss": 0.9571,
      "step": 243450
    },
    {
      "epoch": 0.8532666028802041,
      "grad_norm": 3.03125,
      "learning_rate": 3.975443351480175e-05,
      "loss": 0.9262,
      "step": 243460
    },
    {
      "epoch": 0.8533016503870997,
      "grad_norm": 2.84375,
      "learning_rate": 3.975378448613805e-05,
      "loss": 0.8411,
      "step": 243470
    },
    {
      "epoch": 0.8533366978939954,
      "grad_norm": 2.9375,
      "learning_rate": 3.975313545747434e-05,
      "loss": 0.876,
      "step": 243480
    },
    {
      "epoch": 0.8533717454008909,
      "grad_norm": 3.234375,
      "learning_rate": 3.9752486428810644e-05,
      "loss": 0.9394,
      "step": 243490
    },
    {
      "epoch": 0.8534067929077865,
      "grad_norm": 2.90625,
      "learning_rate": 3.975183740014694e-05,
      "loss": 0.9582,
      "step": 243500
    },
    {
      "epoch": 0.8534418404146821,
      "grad_norm": 2.59375,
      "learning_rate": 3.975118837148324e-05,
      "loss": 0.8948,
      "step": 243510
    },
    {
      "epoch": 0.8534768879215777,
      "grad_norm": 2.875,
      "learning_rate": 3.9750539342819535e-05,
      "loss": 0.8311,
      "step": 243520
    },
    {
      "epoch": 0.8535119354284733,
      "grad_norm": 3.21875,
      "learning_rate": 3.9749890314155836e-05,
      "loss": 0.8881,
      "step": 243530
    },
    {
      "epoch": 0.8535469829353689,
      "grad_norm": 3.375,
      "learning_rate": 3.974924128549213e-05,
      "loss": 0.9866,
      "step": 243540
    },
    {
      "epoch": 0.8535820304422644,
      "grad_norm": 2.765625,
      "learning_rate": 3.974859225682843e-05,
      "loss": 0.916,
      "step": 243550
    },
    {
      "epoch": 0.8536170779491601,
      "grad_norm": 3.078125,
      "learning_rate": 3.9747943228164733e-05,
      "loss": 0.8855,
      "step": 243560
    },
    {
      "epoch": 0.8536521254560557,
      "grad_norm": 2.984375,
      "learning_rate": 3.974729419950103e-05,
      "loss": 0.8963,
      "step": 243570
    },
    {
      "epoch": 0.8536871729629513,
      "grad_norm": 2.921875,
      "learning_rate": 3.974664517083733e-05,
      "loss": 0.8808,
      "step": 243580
    },
    {
      "epoch": 0.8537222204698469,
      "grad_norm": 2.890625,
      "learning_rate": 3.9745996142173624e-05,
      "loss": 0.8046,
      "step": 243590
    },
    {
      "epoch": 0.8537572679767425,
      "grad_norm": 3.03125,
      "learning_rate": 3.9745347113509925e-05,
      "loss": 0.842,
      "step": 243600
    },
    {
      "epoch": 0.8537923154836381,
      "grad_norm": 3.015625,
      "learning_rate": 3.974469808484622e-05,
      "loss": 0.9525,
      "step": 243610
    },
    {
      "epoch": 0.8538273629905336,
      "grad_norm": 3.015625,
      "learning_rate": 3.974404905618252e-05,
      "loss": 0.8138,
      "step": 243620
    },
    {
      "epoch": 0.8538624104974293,
      "grad_norm": 3.484375,
      "learning_rate": 3.9743400027518816e-05,
      "loss": 0.9215,
      "step": 243630
    },
    {
      "epoch": 0.8538974580043248,
      "grad_norm": 3.109375,
      "learning_rate": 3.974275099885512e-05,
      "loss": 0.8675,
      "step": 243640
    },
    {
      "epoch": 0.8539325055112205,
      "grad_norm": 3.140625,
      "learning_rate": 3.974210197019141e-05,
      "loss": 0.8571,
      "step": 243650
    },
    {
      "epoch": 0.8539675530181161,
      "grad_norm": 2.609375,
      "learning_rate": 3.9741452941527713e-05,
      "loss": 0.8798,
      "step": 243660
    },
    {
      "epoch": 0.8540026005250116,
      "grad_norm": 3.53125,
      "learning_rate": 3.974080391286401e-05,
      "loss": 0.9304,
      "step": 243670
    },
    {
      "epoch": 0.8540376480319073,
      "grad_norm": 3.09375,
      "learning_rate": 3.974015488420031e-05,
      "loss": 0.8459,
      "step": 243680
    },
    {
      "epoch": 0.8540726955388028,
      "grad_norm": 3.515625,
      "learning_rate": 3.9739505855536604e-05,
      "loss": 0.84,
      "step": 243690
    },
    {
      "epoch": 0.8541077430456985,
      "grad_norm": 2.84375,
      "learning_rate": 3.9738856826872905e-05,
      "loss": 0.8391,
      "step": 243700
    },
    {
      "epoch": 0.854142790552594,
      "grad_norm": 2.8125,
      "learning_rate": 3.973820779820921e-05,
      "loss": 0.788,
      "step": 243710
    },
    {
      "epoch": 0.8541778380594897,
      "grad_norm": 2.578125,
      "learning_rate": 3.97375587695455e-05,
      "loss": 0.8725,
      "step": 243720
    },
    {
      "epoch": 0.8542128855663852,
      "grad_norm": 2.875,
      "learning_rate": 3.97369097408818e-05,
      "loss": 0.8474,
      "step": 243730
    },
    {
      "epoch": 0.8542479330732808,
      "grad_norm": 2.984375,
      "learning_rate": 3.97362607122181e-05,
      "loss": 0.7943,
      "step": 243740
    },
    {
      "epoch": 0.8542829805801764,
      "grad_norm": 3.1875,
      "learning_rate": 3.97356116835544e-05,
      "loss": 0.8662,
      "step": 243750
    },
    {
      "epoch": 0.854318028087072,
      "grad_norm": 2.390625,
      "learning_rate": 3.9734962654890693e-05,
      "loss": 0.8987,
      "step": 243760
    },
    {
      "epoch": 0.8543530755939677,
      "grad_norm": 2.578125,
      "learning_rate": 3.9734313626226995e-05,
      "loss": 0.8362,
      "step": 243770
    },
    {
      "epoch": 0.8543881231008632,
      "grad_norm": 3.140625,
      "learning_rate": 3.973366459756328e-05,
      "loss": 0.8838,
      "step": 243780
    },
    {
      "epoch": 0.8544231706077589,
      "grad_norm": 2.84375,
      "learning_rate": 3.9733015568899584e-05,
      "loss": 0.8511,
      "step": 243790
    },
    {
      "epoch": 0.8544582181146544,
      "grad_norm": 2.46875,
      "learning_rate": 3.9732366540235885e-05,
      "loss": 0.8674,
      "step": 243800
    },
    {
      "epoch": 0.85449326562155,
      "grad_norm": 3.0625,
      "learning_rate": 3.973171751157218e-05,
      "loss": 0.9002,
      "step": 243810
    },
    {
      "epoch": 0.8545283131284456,
      "grad_norm": 3.03125,
      "learning_rate": 3.973106848290848e-05,
      "loss": 0.9386,
      "step": 243820
    },
    {
      "epoch": 0.8545633606353412,
      "grad_norm": 2.734375,
      "learning_rate": 3.9730419454244776e-05,
      "loss": 0.8288,
      "step": 243830
    },
    {
      "epoch": 0.8545984081422368,
      "grad_norm": 3.046875,
      "learning_rate": 3.972977042558108e-05,
      "loss": 0.8918,
      "step": 243840
    },
    {
      "epoch": 0.8546334556491324,
      "grad_norm": 3.03125,
      "learning_rate": 3.972912139691737e-05,
      "loss": 0.8931,
      "step": 243850
    },
    {
      "epoch": 0.854668503156028,
      "grad_norm": 3.078125,
      "learning_rate": 3.9728472368253673e-05,
      "loss": 0.8472,
      "step": 243860
    },
    {
      "epoch": 0.8547035506629236,
      "grad_norm": 3.46875,
      "learning_rate": 3.972782333958997e-05,
      "loss": 0.9234,
      "step": 243870
    },
    {
      "epoch": 0.8547385981698192,
      "grad_norm": 2.8125,
      "learning_rate": 3.972717431092627e-05,
      "loss": 0.877,
      "step": 243880
    },
    {
      "epoch": 0.8547736456767148,
      "grad_norm": 2.828125,
      "learning_rate": 3.9726525282262564e-05,
      "loss": 0.9486,
      "step": 243890
    },
    {
      "epoch": 0.8548086931836104,
      "grad_norm": 2.859375,
      "learning_rate": 3.9725876253598865e-05,
      "loss": 0.9142,
      "step": 243900
    },
    {
      "epoch": 0.854843740690506,
      "grad_norm": 2.890625,
      "learning_rate": 3.972522722493516e-05,
      "loss": 0.911,
      "step": 243910
    },
    {
      "epoch": 0.8548787881974016,
      "grad_norm": 3.125,
      "learning_rate": 3.972457819627146e-05,
      "loss": 0.9124,
      "step": 243920
    },
    {
      "epoch": 0.8549138357042971,
      "grad_norm": 2.828125,
      "learning_rate": 3.972392916760776e-05,
      "loss": 0.9633,
      "step": 243930
    },
    {
      "epoch": 0.8549488832111928,
      "grad_norm": 2.703125,
      "learning_rate": 3.972328013894406e-05,
      "loss": 0.9883,
      "step": 243940
    },
    {
      "epoch": 0.8549839307180883,
      "grad_norm": 2.734375,
      "learning_rate": 3.972263111028036e-05,
      "loss": 0.882,
      "step": 243950
    },
    {
      "epoch": 0.855018978224984,
      "grad_norm": 3.140625,
      "learning_rate": 3.9721982081616653e-05,
      "loss": 0.829,
      "step": 243960
    },
    {
      "epoch": 0.8550540257318796,
      "grad_norm": 2.953125,
      "learning_rate": 3.9721333052952955e-05,
      "loss": 0.9786,
      "step": 243970
    },
    {
      "epoch": 0.8550890732387751,
      "grad_norm": 2.5,
      "learning_rate": 3.972068402428925e-05,
      "loss": 0.9664,
      "step": 243980
    },
    {
      "epoch": 0.8551241207456708,
      "grad_norm": 2.453125,
      "learning_rate": 3.972003499562555e-05,
      "loss": 0.8589,
      "step": 243990
    },
    {
      "epoch": 0.8551591682525663,
      "grad_norm": 3.03125,
      "learning_rate": 3.9719385966961845e-05,
      "loss": 0.8748,
      "step": 244000
    },
    {
      "epoch": 0.855194215759462,
      "grad_norm": 2.609375,
      "learning_rate": 3.971873693829815e-05,
      "loss": 0.8306,
      "step": 244010
    },
    {
      "epoch": 0.8552292632663575,
      "grad_norm": 3.140625,
      "learning_rate": 3.971808790963444e-05,
      "loss": 0.9562,
      "step": 244020
    },
    {
      "epoch": 0.8552643107732532,
      "grad_norm": 3.234375,
      "learning_rate": 3.971743888097074e-05,
      "loss": 0.9177,
      "step": 244030
    },
    {
      "epoch": 0.8552993582801487,
      "grad_norm": 2.875,
      "learning_rate": 3.971678985230704e-05,
      "loss": 0.8914,
      "step": 244040
    },
    {
      "epoch": 0.8553344057870443,
      "grad_norm": 2.515625,
      "learning_rate": 3.971614082364334e-05,
      "loss": 0.7498,
      "step": 244050
    },
    {
      "epoch": 0.85536945329394,
      "grad_norm": 2.765625,
      "learning_rate": 3.971549179497964e-05,
      "loss": 0.8773,
      "step": 244060
    },
    {
      "epoch": 0.8554045008008355,
      "grad_norm": 2.796875,
      "learning_rate": 3.9714842766315935e-05,
      "loss": 0.8387,
      "step": 244070
    },
    {
      "epoch": 0.8554395483077312,
      "grad_norm": 2.90625,
      "learning_rate": 3.9714193737652236e-05,
      "loss": 0.9239,
      "step": 244080
    },
    {
      "epoch": 0.8554745958146267,
      "grad_norm": 2.65625,
      "learning_rate": 3.971354470898853e-05,
      "loss": 0.8844,
      "step": 244090
    },
    {
      "epoch": 0.8555096433215224,
      "grad_norm": 2.6875,
      "learning_rate": 3.971289568032483e-05,
      "loss": 0.8602,
      "step": 244100
    },
    {
      "epoch": 0.8555446908284179,
      "grad_norm": 3.0,
      "learning_rate": 3.971224665166113e-05,
      "loss": 0.9094,
      "step": 244110
    },
    {
      "epoch": 0.8555797383353135,
      "grad_norm": 2.828125,
      "learning_rate": 3.971159762299743e-05,
      "loss": 0.8029,
      "step": 244120
    },
    {
      "epoch": 0.8556147858422091,
      "grad_norm": 3.015625,
      "learning_rate": 3.971094859433372e-05,
      "loss": 0.9584,
      "step": 244130
    },
    {
      "epoch": 0.8556498333491047,
      "grad_norm": 2.671875,
      "learning_rate": 3.9710299565670024e-05,
      "loss": 0.8954,
      "step": 244140
    },
    {
      "epoch": 0.8556848808560004,
      "grad_norm": 3.09375,
      "learning_rate": 3.970965053700632e-05,
      "loss": 0.9108,
      "step": 244150
    },
    {
      "epoch": 0.8557199283628959,
      "grad_norm": 2.671875,
      "learning_rate": 3.9709001508342613e-05,
      "loss": 0.923,
      "step": 244160
    },
    {
      "epoch": 0.8557549758697915,
      "grad_norm": 3.34375,
      "learning_rate": 3.9708352479678915e-05,
      "loss": 0.9114,
      "step": 244170
    },
    {
      "epoch": 0.8557900233766871,
      "grad_norm": 2.84375,
      "learning_rate": 3.970770345101521e-05,
      "loss": 0.8728,
      "step": 244180
    },
    {
      "epoch": 0.8558250708835827,
      "grad_norm": 3.375,
      "learning_rate": 3.970705442235151e-05,
      "loss": 0.8788,
      "step": 244190
    },
    {
      "epoch": 0.8558601183904783,
      "grad_norm": 2.71875,
      "learning_rate": 3.9706405393687805e-05,
      "loss": 0.81,
      "step": 244200
    },
    {
      "epoch": 0.8558951658973739,
      "grad_norm": 3.0,
      "learning_rate": 3.970575636502411e-05,
      "loss": 0.9042,
      "step": 244210
    },
    {
      "epoch": 0.8559302134042694,
      "grad_norm": 3.234375,
      "learning_rate": 3.97051073363604e-05,
      "loss": 0.8931,
      "step": 244220
    },
    {
      "epoch": 0.8559652609111651,
      "grad_norm": 3.015625,
      "learning_rate": 3.97044583076967e-05,
      "loss": 0.9264,
      "step": 244230
    },
    {
      "epoch": 0.8560003084180606,
      "grad_norm": 2.734375,
      "learning_rate": 3.9703809279033e-05,
      "loss": 1.0038,
      "step": 244240
    },
    {
      "epoch": 0.8560353559249563,
      "grad_norm": 2.78125,
      "learning_rate": 3.97031602503693e-05,
      "loss": 0.9255,
      "step": 244250
    },
    {
      "epoch": 0.8560704034318519,
      "grad_norm": 3.078125,
      "learning_rate": 3.9702511221705593e-05,
      "loss": 0.9256,
      "step": 244260
    },
    {
      "epoch": 0.8561054509387475,
      "grad_norm": 2.890625,
      "learning_rate": 3.9701862193041895e-05,
      "loss": 0.8665,
      "step": 244270
    },
    {
      "epoch": 0.8561404984456431,
      "grad_norm": 2.96875,
      "learning_rate": 3.970121316437819e-05,
      "loss": 0.8836,
      "step": 244280
    },
    {
      "epoch": 0.8561755459525386,
      "grad_norm": 2.734375,
      "learning_rate": 3.970056413571449e-05,
      "loss": 0.8566,
      "step": 244290
    },
    {
      "epoch": 0.8562105934594343,
      "grad_norm": 3.15625,
      "learning_rate": 3.969991510705079e-05,
      "loss": 0.8957,
      "step": 244300
    },
    {
      "epoch": 0.8562456409663298,
      "grad_norm": 2.765625,
      "learning_rate": 3.969926607838709e-05,
      "loss": 0.769,
      "step": 244310
    },
    {
      "epoch": 0.8562806884732255,
      "grad_norm": 3.015625,
      "learning_rate": 3.969861704972339e-05,
      "loss": 0.8865,
      "step": 244320
    },
    {
      "epoch": 0.856315735980121,
      "grad_norm": 3.03125,
      "learning_rate": 3.969796802105968e-05,
      "loss": 0.8759,
      "step": 244330
    },
    {
      "epoch": 0.8563507834870167,
      "grad_norm": 3.171875,
      "learning_rate": 3.9697318992395984e-05,
      "loss": 0.8265,
      "step": 244340
    },
    {
      "epoch": 0.8563858309939123,
      "grad_norm": 3.03125,
      "learning_rate": 3.969666996373228e-05,
      "loss": 0.8834,
      "step": 244350
    },
    {
      "epoch": 0.8564208785008078,
      "grad_norm": 2.84375,
      "learning_rate": 3.969602093506858e-05,
      "loss": 0.9434,
      "step": 244360
    },
    {
      "epoch": 0.8564559260077035,
      "grad_norm": 3.140625,
      "learning_rate": 3.9695371906404875e-05,
      "loss": 0.881,
      "step": 244370
    },
    {
      "epoch": 0.856490973514599,
      "grad_norm": 2.953125,
      "learning_rate": 3.9694722877741176e-05,
      "loss": 0.8951,
      "step": 244380
    },
    {
      "epoch": 0.8565260210214947,
      "grad_norm": 3.046875,
      "learning_rate": 3.969407384907747e-05,
      "loss": 0.8495,
      "step": 244390
    },
    {
      "epoch": 0.8565610685283902,
      "grad_norm": 3.109375,
      "learning_rate": 3.969342482041377e-05,
      "loss": 0.9368,
      "step": 244400
    },
    {
      "epoch": 0.8565961160352858,
      "grad_norm": 2.90625,
      "learning_rate": 3.969277579175007e-05,
      "loss": 0.9859,
      "step": 244410
    },
    {
      "epoch": 0.8566311635421814,
      "grad_norm": 2.71875,
      "learning_rate": 3.969212676308637e-05,
      "loss": 0.9109,
      "step": 244420
    },
    {
      "epoch": 0.856666211049077,
      "grad_norm": 2.828125,
      "learning_rate": 3.969147773442267e-05,
      "loss": 0.8659,
      "step": 244430
    },
    {
      "epoch": 0.8567012585559726,
      "grad_norm": 2.828125,
      "learning_rate": 3.9690828705758964e-05,
      "loss": 0.9878,
      "step": 244440
    },
    {
      "epoch": 0.8567363060628682,
      "grad_norm": 3.109375,
      "learning_rate": 3.9690179677095266e-05,
      "loss": 0.9838,
      "step": 244450
    },
    {
      "epoch": 0.8567713535697639,
      "grad_norm": 2.75,
      "learning_rate": 3.968953064843156e-05,
      "loss": 0.8368,
      "step": 244460
    },
    {
      "epoch": 0.8568064010766594,
      "grad_norm": 2.921875,
      "learning_rate": 3.968888161976786e-05,
      "loss": 0.8255,
      "step": 244470
    },
    {
      "epoch": 0.856841448583555,
      "grad_norm": 2.609375,
      "learning_rate": 3.9688232591104156e-05,
      "loss": 0.9218,
      "step": 244480
    },
    {
      "epoch": 0.8568764960904506,
      "grad_norm": 2.984375,
      "learning_rate": 3.968758356244046e-05,
      "loss": 0.9749,
      "step": 244490
    },
    {
      "epoch": 0.8569115435973462,
      "grad_norm": 2.484375,
      "learning_rate": 3.968693453377675e-05,
      "loss": 0.8698,
      "step": 244500
    },
    {
      "epoch": 0.8569465911042418,
      "grad_norm": 3.109375,
      "learning_rate": 3.9686285505113054e-05,
      "loss": 0.9009,
      "step": 244510
    },
    {
      "epoch": 0.8569816386111374,
      "grad_norm": 3.203125,
      "learning_rate": 3.968563647644935e-05,
      "loss": 0.8743,
      "step": 244520
    },
    {
      "epoch": 0.8570166861180329,
      "grad_norm": 3.09375,
      "learning_rate": 3.968498744778564e-05,
      "loss": 0.948,
      "step": 244530
    },
    {
      "epoch": 0.8570517336249286,
      "grad_norm": 3.515625,
      "learning_rate": 3.9684338419121944e-05,
      "loss": 0.8879,
      "step": 244540
    },
    {
      "epoch": 0.8570867811318242,
      "grad_norm": 2.9375,
      "learning_rate": 3.968368939045824e-05,
      "loss": 0.8588,
      "step": 244550
    },
    {
      "epoch": 0.8571218286387198,
      "grad_norm": 3.40625,
      "learning_rate": 3.968304036179454e-05,
      "loss": 0.8651,
      "step": 244560
    },
    {
      "epoch": 0.8571568761456154,
      "grad_norm": 3.0625,
      "learning_rate": 3.9682391333130835e-05,
      "loss": 0.8503,
      "step": 244570
    },
    {
      "epoch": 0.857191923652511,
      "grad_norm": 2.75,
      "learning_rate": 3.9681742304467136e-05,
      "loss": 0.8222,
      "step": 244580
    },
    {
      "epoch": 0.8572269711594066,
      "grad_norm": 3.078125,
      "learning_rate": 3.968109327580343e-05,
      "loss": 0.9101,
      "step": 244590
    },
    {
      "epoch": 0.8572620186663021,
      "grad_norm": 2.953125,
      "learning_rate": 3.968044424713973e-05,
      "loss": 0.8809,
      "step": 244600
    },
    {
      "epoch": 0.8572970661731978,
      "grad_norm": 2.875,
      "learning_rate": 3.967979521847603e-05,
      "loss": 0.9034,
      "step": 244610
    },
    {
      "epoch": 0.8573321136800933,
      "grad_norm": 3.140625,
      "learning_rate": 3.967914618981233e-05,
      "loss": 0.9213,
      "step": 244620
    },
    {
      "epoch": 0.857367161186989,
      "grad_norm": 2.875,
      "learning_rate": 3.967849716114862e-05,
      "loss": 0.8588,
      "step": 244630
    },
    {
      "epoch": 0.8574022086938846,
      "grad_norm": 3.0625,
      "learning_rate": 3.9677848132484924e-05,
      "loss": 0.8833,
      "step": 244640
    },
    {
      "epoch": 0.8574372562007802,
      "grad_norm": 2.78125,
      "learning_rate": 3.967719910382122e-05,
      "loss": 0.9361,
      "step": 244650
    },
    {
      "epoch": 0.8574723037076758,
      "grad_norm": 2.453125,
      "learning_rate": 3.967655007515752e-05,
      "loss": 0.9455,
      "step": 244660
    },
    {
      "epoch": 0.8575073512145713,
      "grad_norm": 2.578125,
      "learning_rate": 3.967590104649382e-05,
      "loss": 0.8866,
      "step": 244670
    },
    {
      "epoch": 0.857542398721467,
      "grad_norm": 3.03125,
      "learning_rate": 3.9675252017830116e-05,
      "loss": 0.9311,
      "step": 244680
    },
    {
      "epoch": 0.8575774462283625,
      "grad_norm": 3.015625,
      "learning_rate": 3.967460298916642e-05,
      "loss": 0.918,
      "step": 244690
    },
    {
      "epoch": 0.8576124937352582,
      "grad_norm": 3.09375,
      "learning_rate": 3.967395396050271e-05,
      "loss": 0.9091,
      "step": 244700
    },
    {
      "epoch": 0.8576475412421537,
      "grad_norm": 3.03125,
      "learning_rate": 3.9673304931839014e-05,
      "loss": 0.8834,
      "step": 244710
    },
    {
      "epoch": 0.8576825887490493,
      "grad_norm": 3.03125,
      "learning_rate": 3.967265590317531e-05,
      "loss": 0.8207,
      "step": 244720
    },
    {
      "epoch": 0.8577176362559449,
      "grad_norm": 3.046875,
      "learning_rate": 3.967200687451161e-05,
      "loss": 0.8997,
      "step": 244730
    },
    {
      "epoch": 0.8577526837628405,
      "grad_norm": 2.875,
      "learning_rate": 3.9671357845847904e-05,
      "loss": 0.8722,
      "step": 244740
    },
    {
      "epoch": 0.8577877312697362,
      "grad_norm": 3.171875,
      "learning_rate": 3.9670708817184206e-05,
      "loss": 0.9036,
      "step": 244750
    },
    {
      "epoch": 0.8578227787766317,
      "grad_norm": 2.671875,
      "learning_rate": 3.96700597885205e-05,
      "loss": 0.8774,
      "step": 244760
    },
    {
      "epoch": 0.8578578262835274,
      "grad_norm": 2.90625,
      "learning_rate": 3.96694107598568e-05,
      "loss": 0.8533,
      "step": 244770
    },
    {
      "epoch": 0.8578928737904229,
      "grad_norm": 2.71875,
      "learning_rate": 3.9668761731193096e-05,
      "loss": 0.9139,
      "step": 244780
    },
    {
      "epoch": 0.8579279212973185,
      "grad_norm": 2.984375,
      "learning_rate": 3.96681127025294e-05,
      "loss": 0.8972,
      "step": 244790
    },
    {
      "epoch": 0.8579629688042141,
      "grad_norm": 2.53125,
      "learning_rate": 3.96674636738657e-05,
      "loss": 0.9321,
      "step": 244800
    },
    {
      "epoch": 0.8579980163111097,
      "grad_norm": 3.140625,
      "learning_rate": 3.9666814645201994e-05,
      "loss": 0.864,
      "step": 244810
    },
    {
      "epoch": 0.8580330638180053,
      "grad_norm": 2.828125,
      "learning_rate": 3.9666165616538295e-05,
      "loss": 0.9189,
      "step": 244820
    },
    {
      "epoch": 0.8580681113249009,
      "grad_norm": 2.96875,
      "learning_rate": 3.966551658787459e-05,
      "loss": 0.9587,
      "step": 244830
    },
    {
      "epoch": 0.8581031588317966,
      "grad_norm": 3.328125,
      "learning_rate": 3.966486755921089e-05,
      "loss": 0.8814,
      "step": 244840
    },
    {
      "epoch": 0.8581382063386921,
      "grad_norm": 2.875,
      "learning_rate": 3.9664218530547186e-05,
      "loss": 0.9043,
      "step": 244850
    },
    {
      "epoch": 0.8581732538455877,
      "grad_norm": 2.796875,
      "learning_rate": 3.966356950188349e-05,
      "loss": 0.9133,
      "step": 244860
    },
    {
      "epoch": 0.8582083013524833,
      "grad_norm": 2.859375,
      "learning_rate": 3.966292047321978e-05,
      "loss": 0.7716,
      "step": 244870
    },
    {
      "epoch": 0.8582433488593789,
      "grad_norm": 2.6875,
      "learning_rate": 3.966227144455608e-05,
      "loss": 0.898,
      "step": 244880
    },
    {
      "epoch": 0.8582783963662745,
      "grad_norm": 2.796875,
      "learning_rate": 3.966162241589238e-05,
      "loss": 0.901,
      "step": 244890
    },
    {
      "epoch": 0.8583134438731701,
      "grad_norm": 2.796875,
      "learning_rate": 3.966097338722868e-05,
      "loss": 0.806,
      "step": 244900
    },
    {
      "epoch": 0.8583484913800656,
      "grad_norm": 3.375,
      "learning_rate": 3.9660324358564974e-05,
      "loss": 0.7721,
      "step": 244910
    },
    {
      "epoch": 0.8583835388869613,
      "grad_norm": 3.09375,
      "learning_rate": 3.965967532990127e-05,
      "loss": 0.8792,
      "step": 244920
    },
    {
      "epoch": 0.8584185863938568,
      "grad_norm": 2.765625,
      "learning_rate": 3.965902630123757e-05,
      "loss": 0.8139,
      "step": 244930
    },
    {
      "epoch": 0.8584536339007525,
      "grad_norm": 2.734375,
      "learning_rate": 3.9658377272573864e-05,
      "loss": 0.8301,
      "step": 244940
    },
    {
      "epoch": 0.8584886814076481,
      "grad_norm": 3.21875,
      "learning_rate": 3.9657728243910166e-05,
      "loss": 1.0005,
      "step": 244950
    },
    {
      "epoch": 0.8585237289145436,
      "grad_norm": 2.9375,
      "learning_rate": 3.965707921524646e-05,
      "loss": 0.899,
      "step": 244960
    },
    {
      "epoch": 0.8585587764214393,
      "grad_norm": 2.828125,
      "learning_rate": 3.965643018658276e-05,
      "loss": 0.9564,
      "step": 244970
    },
    {
      "epoch": 0.8585938239283348,
      "grad_norm": 3.1875,
      "learning_rate": 3.9655781157919056e-05,
      "loss": 0.9897,
      "step": 244980
    },
    {
      "epoch": 0.8586288714352305,
      "grad_norm": 2.90625,
      "learning_rate": 3.965513212925536e-05,
      "loss": 0.8786,
      "step": 244990
    },
    {
      "epoch": 0.858663918942126,
      "grad_norm": 2.875,
      "learning_rate": 3.965448310059165e-05,
      "loss": 0.9304,
      "step": 245000
    },
    {
      "epoch": 0.858663918942126,
      "eval_loss": 0.8309920430183411,
      "eval_runtime": 558.1393,
      "eval_samples_per_second": 681.615,
      "eval_steps_per_second": 56.801,
      "step": 245000
    },
    {
      "epoch": 0.8586989664490217,
      "grad_norm": 3.125,
      "learning_rate": 3.9653834071927954e-05,
      "loss": 0.8936,
      "step": 245010
    },
    {
      "epoch": 0.8587340139559172,
      "grad_norm": 2.96875,
      "learning_rate": 3.965318504326425e-05,
      "loss": 0.8898,
      "step": 245020
    },
    {
      "epoch": 0.8587690614628128,
      "grad_norm": 3.28125,
      "learning_rate": 3.965253601460055e-05,
      "loss": 0.8383,
      "step": 245030
    },
    {
      "epoch": 0.8588041089697085,
      "grad_norm": 3.21875,
      "learning_rate": 3.965188698593685e-05,
      "loss": 0.8112,
      "step": 245040
    },
    {
      "epoch": 0.858839156476604,
      "grad_norm": 3.21875,
      "learning_rate": 3.9651237957273146e-05,
      "loss": 0.8962,
      "step": 245050
    },
    {
      "epoch": 0.8588742039834997,
      "grad_norm": 3.140625,
      "learning_rate": 3.965058892860945e-05,
      "loss": 0.944,
      "step": 245060
    },
    {
      "epoch": 0.8589092514903952,
      "grad_norm": 2.9375,
      "learning_rate": 3.964993989994574e-05,
      "loss": 0.8813,
      "step": 245070
    },
    {
      "epoch": 0.8589442989972909,
      "grad_norm": 3.28125,
      "learning_rate": 3.964929087128204e-05,
      "loss": 1.0183,
      "step": 245080
    },
    {
      "epoch": 0.8589793465041864,
      "grad_norm": 2.953125,
      "learning_rate": 3.964864184261834e-05,
      "loss": 0.9263,
      "step": 245090
    },
    {
      "epoch": 0.859014394011082,
      "grad_norm": 2.875,
      "learning_rate": 3.964799281395464e-05,
      "loss": 0.8072,
      "step": 245100
    },
    {
      "epoch": 0.8590494415179776,
      "grad_norm": 2.703125,
      "learning_rate": 3.9647343785290934e-05,
      "loss": 0.8902,
      "step": 245110
    },
    {
      "epoch": 0.8590844890248732,
      "grad_norm": 3.15625,
      "learning_rate": 3.9646694756627235e-05,
      "loss": 0.8892,
      "step": 245120
    },
    {
      "epoch": 0.8591195365317688,
      "grad_norm": 3.375,
      "learning_rate": 3.964604572796353e-05,
      "loss": 0.8925,
      "step": 245130
    },
    {
      "epoch": 0.8591545840386644,
      "grad_norm": 3.359375,
      "learning_rate": 3.964539669929983e-05,
      "loss": 0.9034,
      "step": 245140
    },
    {
      "epoch": 0.85918963154556,
      "grad_norm": 3.296875,
      "learning_rate": 3.9644747670636126e-05,
      "loss": 0.8877,
      "step": 245150
    },
    {
      "epoch": 0.8592246790524556,
      "grad_norm": 3.390625,
      "learning_rate": 3.964409864197243e-05,
      "loss": 0.9236,
      "step": 245160
    },
    {
      "epoch": 0.8592597265593512,
      "grad_norm": 2.984375,
      "learning_rate": 3.964344961330873e-05,
      "loss": 0.8991,
      "step": 245170
    },
    {
      "epoch": 0.8592947740662468,
      "grad_norm": 3.140625,
      "learning_rate": 3.964280058464502e-05,
      "loss": 0.8562,
      "step": 245180
    },
    {
      "epoch": 0.8593298215731424,
      "grad_norm": 2.859375,
      "learning_rate": 3.9642151555981324e-05,
      "loss": 0.9419,
      "step": 245190
    },
    {
      "epoch": 0.859364869080038,
      "grad_norm": 3.125,
      "learning_rate": 3.964150252731762e-05,
      "loss": 0.9319,
      "step": 245200
    },
    {
      "epoch": 0.8593999165869336,
      "grad_norm": 2.546875,
      "learning_rate": 3.964085349865392e-05,
      "loss": 0.8601,
      "step": 245210
    },
    {
      "epoch": 0.8594349640938291,
      "grad_norm": 2.859375,
      "learning_rate": 3.9640204469990215e-05,
      "loss": 0.8892,
      "step": 245220
    },
    {
      "epoch": 0.8594700116007248,
      "grad_norm": 2.921875,
      "learning_rate": 3.9639555441326516e-05,
      "loss": 0.8786,
      "step": 245230
    },
    {
      "epoch": 0.8595050591076204,
      "grad_norm": 3.203125,
      "learning_rate": 3.963890641266281e-05,
      "loss": 0.852,
      "step": 245240
    },
    {
      "epoch": 0.859540106614516,
      "grad_norm": 2.9375,
      "learning_rate": 3.963825738399911e-05,
      "loss": 0.8808,
      "step": 245250
    },
    {
      "epoch": 0.8595751541214116,
      "grad_norm": 3.046875,
      "learning_rate": 3.963760835533541e-05,
      "loss": 0.8787,
      "step": 245260
    },
    {
      "epoch": 0.8596102016283071,
      "grad_norm": 3.0625,
      "learning_rate": 3.963695932667171e-05,
      "loss": 0.8779,
      "step": 245270
    },
    {
      "epoch": 0.8596452491352028,
      "grad_norm": 2.734375,
      "learning_rate": 3.9636310298008e-05,
      "loss": 0.8168,
      "step": 245280
    },
    {
      "epoch": 0.8596802966420983,
      "grad_norm": 3.078125,
      "learning_rate": 3.96356612693443e-05,
      "loss": 0.8589,
      "step": 245290
    },
    {
      "epoch": 0.859715344148994,
      "grad_norm": 2.9375,
      "learning_rate": 3.96350122406806e-05,
      "loss": 0.8755,
      "step": 245300
    },
    {
      "epoch": 0.8597503916558895,
      "grad_norm": 2.84375,
      "learning_rate": 3.9634363212016894e-05,
      "loss": 0.8367,
      "step": 245310
    },
    {
      "epoch": 0.8597854391627852,
      "grad_norm": 2.484375,
      "learning_rate": 3.9633714183353195e-05,
      "loss": 0.8895,
      "step": 245320
    },
    {
      "epoch": 0.8598204866696808,
      "grad_norm": 3.140625,
      "learning_rate": 3.963306515468949e-05,
      "loss": 0.8904,
      "step": 245330
    },
    {
      "epoch": 0.8598555341765763,
      "grad_norm": 3.5,
      "learning_rate": 3.963241612602579e-05,
      "loss": 0.8964,
      "step": 245340
    },
    {
      "epoch": 0.859890581683472,
      "grad_norm": 2.84375,
      "learning_rate": 3.9631767097362086e-05,
      "loss": 0.9218,
      "step": 245350
    },
    {
      "epoch": 0.8599256291903675,
      "grad_norm": 2.65625,
      "learning_rate": 3.963111806869839e-05,
      "loss": 0.8246,
      "step": 245360
    },
    {
      "epoch": 0.8599606766972632,
      "grad_norm": 3.21875,
      "learning_rate": 3.963046904003468e-05,
      "loss": 0.8768,
      "step": 245370
    },
    {
      "epoch": 0.8599957242041587,
      "grad_norm": 2.96875,
      "learning_rate": 3.962982001137098e-05,
      "loss": 0.8732,
      "step": 245380
    },
    {
      "epoch": 0.8600307717110544,
      "grad_norm": 3.421875,
      "learning_rate": 3.9629170982707284e-05,
      "loss": 0.8913,
      "step": 245390
    },
    {
      "epoch": 0.8600658192179499,
      "grad_norm": 2.84375,
      "learning_rate": 3.962852195404358e-05,
      "loss": 0.8218,
      "step": 245400
    },
    {
      "epoch": 0.8601008667248455,
      "grad_norm": 3.0625,
      "learning_rate": 3.962787292537988e-05,
      "loss": 0.8474,
      "step": 245410
    },
    {
      "epoch": 0.8601359142317411,
      "grad_norm": 3.609375,
      "learning_rate": 3.9627223896716175e-05,
      "loss": 0.97,
      "step": 245420
    },
    {
      "epoch": 0.8601709617386367,
      "grad_norm": 3.109375,
      "learning_rate": 3.9626574868052476e-05,
      "loss": 0.8719,
      "step": 245430
    },
    {
      "epoch": 0.8602060092455324,
      "grad_norm": 3.453125,
      "learning_rate": 3.962592583938877e-05,
      "loss": 0.8858,
      "step": 245440
    },
    {
      "epoch": 0.8602410567524279,
      "grad_norm": 2.6875,
      "learning_rate": 3.962527681072507e-05,
      "loss": 0.8832,
      "step": 245450
    },
    {
      "epoch": 0.8602761042593235,
      "grad_norm": 2.734375,
      "learning_rate": 3.962462778206137e-05,
      "loss": 0.8565,
      "step": 245460
    },
    {
      "epoch": 0.8603111517662191,
      "grad_norm": 3.078125,
      "learning_rate": 3.962397875339767e-05,
      "loss": 0.9675,
      "step": 245470
    },
    {
      "epoch": 0.8603461992731147,
      "grad_norm": 2.953125,
      "learning_rate": 3.962332972473396e-05,
      "loss": 0.861,
      "step": 245480
    },
    {
      "epoch": 0.8603812467800103,
      "grad_norm": 2.484375,
      "learning_rate": 3.9622680696070264e-05,
      "loss": 0.8107,
      "step": 245490
    },
    {
      "epoch": 0.8604162942869059,
      "grad_norm": 2.921875,
      "learning_rate": 3.962203166740656e-05,
      "loss": 0.9418,
      "step": 245500
    },
    {
      "epoch": 0.8604513417938014,
      "grad_norm": 2.9375,
      "learning_rate": 3.962138263874286e-05,
      "loss": 0.8485,
      "step": 245510
    },
    {
      "epoch": 0.8604863893006971,
      "grad_norm": 2.40625,
      "learning_rate": 3.9620733610079155e-05,
      "loss": 0.8362,
      "step": 245520
    },
    {
      "epoch": 0.8605214368075927,
      "grad_norm": 2.546875,
      "learning_rate": 3.9620084581415456e-05,
      "loss": 0.9325,
      "step": 245530
    },
    {
      "epoch": 0.8605564843144883,
      "grad_norm": 3.03125,
      "learning_rate": 3.961943555275176e-05,
      "loss": 0.8595,
      "step": 245540
    },
    {
      "epoch": 0.8605915318213839,
      "grad_norm": 3.09375,
      "learning_rate": 3.961878652408805e-05,
      "loss": 0.9987,
      "step": 245550
    },
    {
      "epoch": 0.8606265793282795,
      "grad_norm": 3.265625,
      "learning_rate": 3.9618137495424354e-05,
      "loss": 0.9696,
      "step": 245560
    },
    {
      "epoch": 0.8606616268351751,
      "grad_norm": 3.1875,
      "learning_rate": 3.961748846676065e-05,
      "loss": 0.8627,
      "step": 245570
    },
    {
      "epoch": 0.8606966743420706,
      "grad_norm": 3.078125,
      "learning_rate": 3.961683943809695e-05,
      "loss": 0.9771,
      "step": 245580
    },
    {
      "epoch": 0.8607317218489663,
      "grad_norm": 2.875,
      "learning_rate": 3.9616190409433244e-05,
      "loss": 0.8095,
      "step": 245590
    },
    {
      "epoch": 0.8607667693558618,
      "grad_norm": 3.28125,
      "learning_rate": 3.9615541380769546e-05,
      "loss": 0.9951,
      "step": 245600
    },
    {
      "epoch": 0.8608018168627575,
      "grad_norm": 2.875,
      "learning_rate": 3.961489235210584e-05,
      "loss": 0.8105,
      "step": 245610
    },
    {
      "epoch": 0.860836864369653,
      "grad_norm": 2.65625,
      "learning_rate": 3.961424332344214e-05,
      "loss": 0.7388,
      "step": 245620
    },
    {
      "epoch": 0.8608719118765487,
      "grad_norm": 3.21875,
      "learning_rate": 3.9613594294778436e-05,
      "loss": 0.9312,
      "step": 245630
    },
    {
      "epoch": 0.8609069593834443,
      "grad_norm": 2.859375,
      "learning_rate": 3.961294526611474e-05,
      "loss": 0.8787,
      "step": 245640
    },
    {
      "epoch": 0.8609420068903398,
      "grad_norm": 2.328125,
      "learning_rate": 3.961229623745103e-05,
      "loss": 0.7999,
      "step": 245650
    },
    {
      "epoch": 0.8609770543972355,
      "grad_norm": 3.046875,
      "learning_rate": 3.961164720878733e-05,
      "loss": 0.9589,
      "step": 245660
    },
    {
      "epoch": 0.861012101904131,
      "grad_norm": 2.46875,
      "learning_rate": 3.961099818012363e-05,
      "loss": 0.8568,
      "step": 245670
    },
    {
      "epoch": 0.8610471494110267,
      "grad_norm": 2.90625,
      "learning_rate": 3.961034915145992e-05,
      "loss": 0.8924,
      "step": 245680
    },
    {
      "epoch": 0.8610821969179222,
      "grad_norm": 3.34375,
      "learning_rate": 3.9609700122796224e-05,
      "loss": 0.8144,
      "step": 245690
    },
    {
      "epoch": 0.8611172444248179,
      "grad_norm": 3.125,
      "learning_rate": 3.960905109413252e-05,
      "loss": 0.888,
      "step": 245700
    },
    {
      "epoch": 0.8611522919317134,
      "grad_norm": 3.015625,
      "learning_rate": 3.960840206546882e-05,
      "loss": 0.9432,
      "step": 245710
    },
    {
      "epoch": 0.861187339438609,
      "grad_norm": 3.0625,
      "learning_rate": 3.9607753036805115e-05,
      "loss": 0.8292,
      "step": 245720
    },
    {
      "epoch": 0.8612223869455047,
      "grad_norm": 3.03125,
      "learning_rate": 3.9607104008141416e-05,
      "loss": 0.8979,
      "step": 245730
    },
    {
      "epoch": 0.8612574344524002,
      "grad_norm": 2.78125,
      "learning_rate": 3.960645497947771e-05,
      "loss": 0.8575,
      "step": 245740
    },
    {
      "epoch": 0.8612924819592959,
      "grad_norm": 2.71875,
      "learning_rate": 3.960580595081401e-05,
      "loss": 0.8248,
      "step": 245750
    },
    {
      "epoch": 0.8613275294661914,
      "grad_norm": 3.21875,
      "learning_rate": 3.9605156922150314e-05,
      "loss": 0.9324,
      "step": 245760
    },
    {
      "epoch": 0.861362576973087,
      "grad_norm": 2.953125,
      "learning_rate": 3.960450789348661e-05,
      "loss": 0.9006,
      "step": 245770
    },
    {
      "epoch": 0.8613976244799826,
      "grad_norm": 2.78125,
      "learning_rate": 3.960385886482291e-05,
      "loss": 0.8338,
      "step": 245780
    },
    {
      "epoch": 0.8614326719868782,
      "grad_norm": 2.34375,
      "learning_rate": 3.9603209836159204e-05,
      "loss": 0.943,
      "step": 245790
    },
    {
      "epoch": 0.8614677194937738,
      "grad_norm": 2.78125,
      "learning_rate": 3.9602560807495506e-05,
      "loss": 0.93,
      "step": 245800
    },
    {
      "epoch": 0.8615027670006694,
      "grad_norm": 2.921875,
      "learning_rate": 3.96019117788318e-05,
      "loss": 0.8809,
      "step": 245810
    },
    {
      "epoch": 0.8615378145075651,
      "grad_norm": 2.25,
      "learning_rate": 3.96012627501681e-05,
      "loss": 0.8622,
      "step": 245820
    },
    {
      "epoch": 0.8615728620144606,
      "grad_norm": 2.859375,
      "learning_rate": 3.9600613721504396e-05,
      "loss": 0.8395,
      "step": 245830
    },
    {
      "epoch": 0.8616079095213562,
      "grad_norm": 2.40625,
      "learning_rate": 3.95999646928407e-05,
      "loss": 0.8144,
      "step": 245840
    },
    {
      "epoch": 0.8616429570282518,
      "grad_norm": 2.78125,
      "learning_rate": 3.959931566417699e-05,
      "loss": 0.8782,
      "step": 245850
    },
    {
      "epoch": 0.8616780045351474,
      "grad_norm": 3.328125,
      "learning_rate": 3.9598666635513294e-05,
      "loss": 0.8872,
      "step": 245860
    },
    {
      "epoch": 0.861713052042043,
      "grad_norm": 2.765625,
      "learning_rate": 3.959801760684959e-05,
      "loss": 0.8196,
      "step": 245870
    },
    {
      "epoch": 0.8617480995489386,
      "grad_norm": 3.0,
      "learning_rate": 3.959736857818589e-05,
      "loss": 0.8118,
      "step": 245880
    },
    {
      "epoch": 0.8617831470558341,
      "grad_norm": 2.734375,
      "learning_rate": 3.9596719549522184e-05,
      "loss": 0.8488,
      "step": 245890
    },
    {
      "epoch": 0.8618181945627298,
      "grad_norm": 2.984375,
      "learning_rate": 3.9596070520858486e-05,
      "loss": 0.9026,
      "step": 245900
    },
    {
      "epoch": 0.8618532420696253,
      "grad_norm": 3.09375,
      "learning_rate": 3.959542149219479e-05,
      "loss": 0.782,
      "step": 245910
    },
    {
      "epoch": 0.861888289576521,
      "grad_norm": 2.953125,
      "learning_rate": 3.959477246353108e-05,
      "loss": 0.8963,
      "step": 245920
    },
    {
      "epoch": 0.8619233370834166,
      "grad_norm": 2.546875,
      "learning_rate": 3.959412343486738e-05,
      "loss": 0.8408,
      "step": 245930
    },
    {
      "epoch": 0.8619583845903122,
      "grad_norm": 2.5,
      "learning_rate": 3.959347440620368e-05,
      "loss": 0.801,
      "step": 245940
    },
    {
      "epoch": 0.8619934320972078,
      "grad_norm": 2.796875,
      "learning_rate": 3.959282537753998e-05,
      "loss": 0.9014,
      "step": 245950
    },
    {
      "epoch": 0.8620284796041033,
      "grad_norm": 2.953125,
      "learning_rate": 3.9592176348876274e-05,
      "loss": 0.927,
      "step": 245960
    },
    {
      "epoch": 0.862063527110999,
      "grad_norm": 2.890625,
      "learning_rate": 3.9591527320212575e-05,
      "loss": 0.871,
      "step": 245970
    },
    {
      "epoch": 0.8620985746178945,
      "grad_norm": 3.234375,
      "learning_rate": 3.959087829154887e-05,
      "loss": 0.843,
      "step": 245980
    },
    {
      "epoch": 0.8621336221247902,
      "grad_norm": 2.859375,
      "learning_rate": 3.959022926288517e-05,
      "loss": 0.9183,
      "step": 245990
    },
    {
      "epoch": 0.8621686696316857,
      "grad_norm": 2.90625,
      "learning_rate": 3.9589580234221466e-05,
      "loss": 0.8972,
      "step": 246000
    },
    {
      "epoch": 0.8622037171385813,
      "grad_norm": 3.1875,
      "learning_rate": 3.958893120555777e-05,
      "loss": 0.8281,
      "step": 246010
    },
    {
      "epoch": 0.862238764645477,
      "grad_norm": 2.5625,
      "learning_rate": 3.958828217689406e-05,
      "loss": 0.8525,
      "step": 246020
    },
    {
      "epoch": 0.8622738121523725,
      "grad_norm": 2.703125,
      "learning_rate": 3.958763314823036e-05,
      "loss": 0.8335,
      "step": 246030
    },
    {
      "epoch": 0.8623088596592682,
      "grad_norm": 3.046875,
      "learning_rate": 3.958698411956666e-05,
      "loss": 0.8913,
      "step": 246040
    },
    {
      "epoch": 0.8623439071661637,
      "grad_norm": 2.671875,
      "learning_rate": 3.958633509090295e-05,
      "loss": 0.9428,
      "step": 246050
    },
    {
      "epoch": 0.8623789546730594,
      "grad_norm": 3.40625,
      "learning_rate": 3.9585686062239254e-05,
      "loss": 0.8334,
      "step": 246060
    },
    {
      "epoch": 0.8624140021799549,
      "grad_norm": 3.0,
      "learning_rate": 3.958503703357555e-05,
      "loss": 0.8901,
      "step": 246070
    },
    {
      "epoch": 0.8624490496868505,
      "grad_norm": 3.140625,
      "learning_rate": 3.958438800491185e-05,
      "loss": 0.8866,
      "step": 246080
    },
    {
      "epoch": 0.8624840971937461,
      "grad_norm": 3.109375,
      "learning_rate": 3.9583738976248144e-05,
      "loss": 0.8951,
      "step": 246090
    },
    {
      "epoch": 0.8625191447006417,
      "grad_norm": 3.140625,
      "learning_rate": 3.9583089947584446e-05,
      "loss": 0.8032,
      "step": 246100
    },
    {
      "epoch": 0.8625541922075373,
      "grad_norm": 2.84375,
      "learning_rate": 3.958244091892074e-05,
      "loss": 0.9186,
      "step": 246110
    },
    {
      "epoch": 0.8625892397144329,
      "grad_norm": 3.015625,
      "learning_rate": 3.958179189025704e-05,
      "loss": 0.8516,
      "step": 246120
    },
    {
      "epoch": 0.8626242872213286,
      "grad_norm": 2.890625,
      "learning_rate": 3.958114286159334e-05,
      "loss": 0.9192,
      "step": 246130
    },
    {
      "epoch": 0.8626593347282241,
      "grad_norm": 2.953125,
      "learning_rate": 3.958049383292964e-05,
      "loss": 0.8491,
      "step": 246140
    },
    {
      "epoch": 0.8626943822351197,
      "grad_norm": 2.96875,
      "learning_rate": 3.957984480426594e-05,
      "loss": 0.8945,
      "step": 246150
    },
    {
      "epoch": 0.8627294297420153,
      "grad_norm": 2.859375,
      "learning_rate": 3.9579195775602234e-05,
      "loss": 0.968,
      "step": 246160
    },
    {
      "epoch": 0.8627644772489109,
      "grad_norm": 3.0625,
      "learning_rate": 3.9578546746938535e-05,
      "loss": 0.8788,
      "step": 246170
    },
    {
      "epoch": 0.8627995247558065,
      "grad_norm": 3.25,
      "learning_rate": 3.957789771827483e-05,
      "loss": 0.9001,
      "step": 246180
    },
    {
      "epoch": 0.8628345722627021,
      "grad_norm": 3.203125,
      "learning_rate": 3.957724868961113e-05,
      "loss": 0.9701,
      "step": 246190
    },
    {
      "epoch": 0.8628696197695976,
      "grad_norm": 2.859375,
      "learning_rate": 3.9576599660947426e-05,
      "loss": 0.8856,
      "step": 246200
    },
    {
      "epoch": 0.8629046672764933,
      "grad_norm": 2.859375,
      "learning_rate": 3.957595063228373e-05,
      "loss": 0.877,
      "step": 246210
    },
    {
      "epoch": 0.8629397147833889,
      "grad_norm": 3.078125,
      "learning_rate": 3.957530160362002e-05,
      "loss": 0.7878,
      "step": 246220
    },
    {
      "epoch": 0.8629747622902845,
      "grad_norm": 2.96875,
      "learning_rate": 3.957465257495632e-05,
      "loss": 0.8619,
      "step": 246230
    },
    {
      "epoch": 0.8630098097971801,
      "grad_norm": 2.609375,
      "learning_rate": 3.957400354629262e-05,
      "loss": 0.8378,
      "step": 246240
    },
    {
      "epoch": 0.8630448573040757,
      "grad_norm": 2.921875,
      "learning_rate": 3.957335451762892e-05,
      "loss": 0.8373,
      "step": 246250
    },
    {
      "epoch": 0.8630799048109713,
      "grad_norm": 2.625,
      "learning_rate": 3.957270548896522e-05,
      "loss": 0.8504,
      "step": 246260
    },
    {
      "epoch": 0.8631149523178668,
      "grad_norm": 3.3125,
      "learning_rate": 3.9572056460301515e-05,
      "loss": 0.8947,
      "step": 246270
    },
    {
      "epoch": 0.8631499998247625,
      "grad_norm": 3.53125,
      "learning_rate": 3.9571407431637816e-05,
      "loss": 0.8739,
      "step": 246280
    },
    {
      "epoch": 0.863185047331658,
      "grad_norm": 3.21875,
      "learning_rate": 3.957075840297411e-05,
      "loss": 0.8453,
      "step": 246290
    },
    {
      "epoch": 0.8632200948385537,
      "grad_norm": 2.53125,
      "learning_rate": 3.957010937431041e-05,
      "loss": 0.9072,
      "step": 246300
    },
    {
      "epoch": 0.8632551423454493,
      "grad_norm": 2.59375,
      "learning_rate": 3.956946034564671e-05,
      "loss": 0.8471,
      "step": 246310
    },
    {
      "epoch": 0.8632901898523448,
      "grad_norm": 2.6875,
      "learning_rate": 3.956881131698301e-05,
      "loss": 0.8171,
      "step": 246320
    },
    {
      "epoch": 0.8633252373592405,
      "grad_norm": 2.921875,
      "learning_rate": 3.95681622883193e-05,
      "loss": 0.9209,
      "step": 246330
    },
    {
      "epoch": 0.863360284866136,
      "grad_norm": 3.015625,
      "learning_rate": 3.9567513259655604e-05,
      "loss": 0.814,
      "step": 246340
    },
    {
      "epoch": 0.8633953323730317,
      "grad_norm": 2.703125,
      "learning_rate": 3.95668642309919e-05,
      "loss": 0.9313,
      "step": 246350
    },
    {
      "epoch": 0.8634303798799272,
      "grad_norm": 2.875,
      "learning_rate": 3.95662152023282e-05,
      "loss": 0.8962,
      "step": 246360
    },
    {
      "epoch": 0.8634654273868229,
      "grad_norm": 2.84375,
      "learning_rate": 3.9565566173664495e-05,
      "loss": 0.8362,
      "step": 246370
    },
    {
      "epoch": 0.8635004748937184,
      "grad_norm": 2.609375,
      "learning_rate": 3.9564917145000796e-05,
      "loss": 0.8699,
      "step": 246380
    },
    {
      "epoch": 0.863535522400614,
      "grad_norm": 2.96875,
      "learning_rate": 3.956426811633709e-05,
      "loss": 0.8578,
      "step": 246390
    },
    {
      "epoch": 0.8635705699075096,
      "grad_norm": 2.921875,
      "learning_rate": 3.956361908767339e-05,
      "loss": 0.87,
      "step": 246400
    },
    {
      "epoch": 0.8636056174144052,
      "grad_norm": 2.875,
      "learning_rate": 3.956297005900969e-05,
      "loss": 0.8563,
      "step": 246410
    },
    {
      "epoch": 0.8636406649213009,
      "grad_norm": 3.015625,
      "learning_rate": 3.956232103034598e-05,
      "loss": 0.8725,
      "step": 246420
    },
    {
      "epoch": 0.8636757124281964,
      "grad_norm": 2.71875,
      "learning_rate": 3.956167200168228e-05,
      "loss": 0.892,
      "step": 246430
    },
    {
      "epoch": 0.863710759935092,
      "grad_norm": 2.984375,
      "learning_rate": 3.956102297301858e-05,
      "loss": 0.9558,
      "step": 246440
    },
    {
      "epoch": 0.8637458074419876,
      "grad_norm": 2.9375,
      "learning_rate": 3.956037394435488e-05,
      "loss": 0.8632,
      "step": 246450
    },
    {
      "epoch": 0.8637808549488832,
      "grad_norm": 2.609375,
      "learning_rate": 3.9559724915691174e-05,
      "loss": 0.89,
      "step": 246460
    },
    {
      "epoch": 0.8638159024557788,
      "grad_norm": 2.875,
      "learning_rate": 3.9559075887027475e-05,
      "loss": 0.8792,
      "step": 246470
    },
    {
      "epoch": 0.8638509499626744,
      "grad_norm": 2.953125,
      "learning_rate": 3.955842685836377e-05,
      "loss": 0.859,
      "step": 246480
    },
    {
      "epoch": 0.86388599746957,
      "grad_norm": 2.578125,
      "learning_rate": 3.955777782970007e-05,
      "loss": 0.8066,
      "step": 246490
    },
    {
      "epoch": 0.8639210449764656,
      "grad_norm": 3.15625,
      "learning_rate": 3.955712880103637e-05,
      "loss": 0.9298,
      "step": 246500
    },
    {
      "epoch": 0.8639560924833612,
      "grad_norm": 2.875,
      "learning_rate": 3.955647977237267e-05,
      "loss": 0.7875,
      "step": 246510
    },
    {
      "epoch": 0.8639911399902568,
      "grad_norm": 2.59375,
      "learning_rate": 3.955583074370897e-05,
      "loss": 0.874,
      "step": 246520
    },
    {
      "epoch": 0.8640261874971524,
      "grad_norm": 3.1875,
      "learning_rate": 3.955518171504526e-05,
      "loss": 0.8279,
      "step": 246530
    },
    {
      "epoch": 0.864061235004048,
      "grad_norm": 2.59375,
      "learning_rate": 3.9554532686381564e-05,
      "loss": 0.8451,
      "step": 246540
    },
    {
      "epoch": 0.8640962825109436,
      "grad_norm": 3.078125,
      "learning_rate": 3.955388365771786e-05,
      "loss": 0.9293,
      "step": 246550
    },
    {
      "epoch": 0.8641313300178391,
      "grad_norm": 3.1875,
      "learning_rate": 3.955323462905416e-05,
      "loss": 0.8686,
      "step": 246560
    },
    {
      "epoch": 0.8641663775247348,
      "grad_norm": 3.0625,
      "learning_rate": 3.9552585600390455e-05,
      "loss": 0.7908,
      "step": 246570
    },
    {
      "epoch": 0.8642014250316303,
      "grad_norm": 2.828125,
      "learning_rate": 3.9551936571726756e-05,
      "loss": 0.8448,
      "step": 246580
    },
    {
      "epoch": 0.864236472538526,
      "grad_norm": 4.1875,
      "learning_rate": 3.955128754306305e-05,
      "loss": 0.9185,
      "step": 246590
    },
    {
      "epoch": 0.8642715200454215,
      "grad_norm": 3.0625,
      "learning_rate": 3.955063851439935e-05,
      "loss": 0.8385,
      "step": 246600
    },
    {
      "epoch": 0.8643065675523172,
      "grad_norm": 2.78125,
      "learning_rate": 3.954998948573565e-05,
      "loss": 0.8541,
      "step": 246610
    },
    {
      "epoch": 0.8643416150592128,
      "grad_norm": 3.015625,
      "learning_rate": 3.954934045707195e-05,
      "loss": 0.8073,
      "step": 246620
    },
    {
      "epoch": 0.8643766625661083,
      "grad_norm": 3.140625,
      "learning_rate": 3.954869142840825e-05,
      "loss": 0.9176,
      "step": 246630
    },
    {
      "epoch": 0.864411710073004,
      "grad_norm": 4.0625,
      "learning_rate": 3.9548042399744544e-05,
      "loss": 0.9161,
      "step": 246640
    },
    {
      "epoch": 0.8644467575798995,
      "grad_norm": 2.734375,
      "learning_rate": 3.9547393371080846e-05,
      "loss": 0.9151,
      "step": 246650
    },
    {
      "epoch": 0.8644818050867952,
      "grad_norm": 2.953125,
      "learning_rate": 3.954674434241714e-05,
      "loss": 0.8705,
      "step": 246660
    },
    {
      "epoch": 0.8645168525936907,
      "grad_norm": 2.53125,
      "learning_rate": 3.954609531375344e-05,
      "loss": 0.8651,
      "step": 246670
    },
    {
      "epoch": 0.8645519001005864,
      "grad_norm": 3.203125,
      "learning_rate": 3.9545446285089736e-05,
      "loss": 0.8301,
      "step": 246680
    },
    {
      "epoch": 0.8645869476074819,
      "grad_norm": 3.3125,
      "learning_rate": 3.954479725642604e-05,
      "loss": 0.89,
      "step": 246690
    },
    {
      "epoch": 0.8646219951143775,
      "grad_norm": 3.109375,
      "learning_rate": 3.954414822776233e-05,
      "loss": 0.8353,
      "step": 246700
    },
    {
      "epoch": 0.8646570426212732,
      "grad_norm": 2.921875,
      "learning_rate": 3.9543499199098634e-05,
      "loss": 0.9142,
      "step": 246710
    },
    {
      "epoch": 0.8646920901281687,
      "grad_norm": 2.59375,
      "learning_rate": 3.954285017043493e-05,
      "loss": 0.8959,
      "step": 246720
    },
    {
      "epoch": 0.8647271376350644,
      "grad_norm": 3.109375,
      "learning_rate": 3.954220114177123e-05,
      "loss": 0.9598,
      "step": 246730
    },
    {
      "epoch": 0.8647621851419599,
      "grad_norm": 2.96875,
      "learning_rate": 3.9541552113107524e-05,
      "loss": 0.8719,
      "step": 246740
    },
    {
      "epoch": 0.8647972326488556,
      "grad_norm": 2.8125,
      "learning_rate": 3.9540903084443826e-05,
      "loss": 0.8809,
      "step": 246750
    },
    {
      "epoch": 0.8648322801557511,
      "grad_norm": 3.375,
      "learning_rate": 3.954025405578012e-05,
      "loss": 0.8989,
      "step": 246760
    },
    {
      "epoch": 0.8648673276626467,
      "grad_norm": 3.1875,
      "learning_rate": 3.953960502711642e-05,
      "loss": 0.8422,
      "step": 246770
    },
    {
      "epoch": 0.8649023751695423,
      "grad_norm": 3.078125,
      "learning_rate": 3.953895599845272e-05,
      "loss": 0.9121,
      "step": 246780
    },
    {
      "epoch": 0.8649374226764379,
      "grad_norm": 2.96875,
      "learning_rate": 3.953830696978901e-05,
      "loss": 0.8922,
      "step": 246790
    },
    {
      "epoch": 0.8649724701833336,
      "grad_norm": 3.078125,
      "learning_rate": 3.953765794112531e-05,
      "loss": 0.9163,
      "step": 246800
    },
    {
      "epoch": 0.8650075176902291,
      "grad_norm": 3.109375,
      "learning_rate": 3.953700891246161e-05,
      "loss": 0.9133,
      "step": 246810
    },
    {
      "epoch": 0.8650425651971247,
      "grad_norm": 3.03125,
      "learning_rate": 3.953635988379791e-05,
      "loss": 0.8636,
      "step": 246820
    },
    {
      "epoch": 0.8650776127040203,
      "grad_norm": 2.796875,
      "learning_rate": 3.95357108551342e-05,
      "loss": 0.8656,
      "step": 246830
    },
    {
      "epoch": 0.8651126602109159,
      "grad_norm": 2.890625,
      "learning_rate": 3.9535061826470504e-05,
      "loss": 0.9234,
      "step": 246840
    },
    {
      "epoch": 0.8651477077178115,
      "grad_norm": 2.5625,
      "learning_rate": 3.95344127978068e-05,
      "loss": 0.9084,
      "step": 246850
    },
    {
      "epoch": 0.8651827552247071,
      "grad_norm": 2.765625,
      "learning_rate": 3.95337637691431e-05,
      "loss": 0.8678,
      "step": 246860
    },
    {
      "epoch": 0.8652178027316026,
      "grad_norm": 2.40625,
      "learning_rate": 3.95331147404794e-05,
      "loss": 0.9158,
      "step": 246870
    },
    {
      "epoch": 0.8652528502384983,
      "grad_norm": 2.265625,
      "learning_rate": 3.9532465711815696e-05,
      "loss": 0.8064,
      "step": 246880
    },
    {
      "epoch": 0.8652878977453938,
      "grad_norm": 3.25,
      "learning_rate": 3.9531816683152e-05,
      "loss": 0.8722,
      "step": 246890
    },
    {
      "epoch": 0.8653229452522895,
      "grad_norm": 3.25,
      "learning_rate": 3.953116765448829e-05,
      "loss": 0.8946,
      "step": 246900
    },
    {
      "epoch": 0.8653579927591851,
      "grad_norm": 3.03125,
      "learning_rate": 3.9530518625824594e-05,
      "loss": 0.8834,
      "step": 246910
    },
    {
      "epoch": 0.8653930402660807,
      "grad_norm": 2.828125,
      "learning_rate": 3.952986959716089e-05,
      "loss": 0.8519,
      "step": 246920
    },
    {
      "epoch": 0.8654280877729763,
      "grad_norm": 3.140625,
      "learning_rate": 3.952922056849719e-05,
      "loss": 0.8494,
      "step": 246930
    },
    {
      "epoch": 0.8654631352798718,
      "grad_norm": 2.78125,
      "learning_rate": 3.9528571539833484e-05,
      "loss": 0.8164,
      "step": 246940
    },
    {
      "epoch": 0.8654981827867675,
      "grad_norm": 3.59375,
      "learning_rate": 3.9527922511169786e-05,
      "loss": 0.9225,
      "step": 246950
    },
    {
      "epoch": 0.865533230293663,
      "grad_norm": 2.578125,
      "learning_rate": 3.952727348250608e-05,
      "loss": 0.8565,
      "step": 246960
    },
    {
      "epoch": 0.8655682778005587,
      "grad_norm": 2.34375,
      "learning_rate": 3.952662445384238e-05,
      "loss": 0.8494,
      "step": 246970
    },
    {
      "epoch": 0.8656033253074542,
      "grad_norm": 2.65625,
      "learning_rate": 3.9525975425178676e-05,
      "loss": 0.8215,
      "step": 246980
    },
    {
      "epoch": 0.8656383728143499,
      "grad_norm": 3.71875,
      "learning_rate": 3.952532639651498e-05,
      "loss": 0.9828,
      "step": 246990
    },
    {
      "epoch": 0.8656734203212455,
      "grad_norm": 3.15625,
      "learning_rate": 3.952467736785128e-05,
      "loss": 0.8644,
      "step": 247000
    },
    {
      "epoch": 0.865708467828141,
      "grad_norm": 3.109375,
      "learning_rate": 3.9524028339187574e-05,
      "loss": 0.9241,
      "step": 247010
    },
    {
      "epoch": 0.8657435153350367,
      "grad_norm": 2.71875,
      "learning_rate": 3.9523379310523875e-05,
      "loss": 0.8903,
      "step": 247020
    },
    {
      "epoch": 0.8657785628419322,
      "grad_norm": 2.65625,
      "learning_rate": 3.952273028186017e-05,
      "loss": 0.7843,
      "step": 247030
    },
    {
      "epoch": 0.8658136103488279,
      "grad_norm": 3.28125,
      "learning_rate": 3.952208125319647e-05,
      "loss": 0.8425,
      "step": 247040
    },
    {
      "epoch": 0.8658486578557234,
      "grad_norm": 2.953125,
      "learning_rate": 3.9521432224532766e-05,
      "loss": 0.908,
      "step": 247050
    },
    {
      "epoch": 0.865883705362619,
      "grad_norm": 3.078125,
      "learning_rate": 3.952078319586907e-05,
      "loss": 0.9114,
      "step": 247060
    },
    {
      "epoch": 0.8659187528695146,
      "grad_norm": 2.53125,
      "learning_rate": 3.952013416720536e-05,
      "loss": 0.9383,
      "step": 247070
    },
    {
      "epoch": 0.8659538003764102,
      "grad_norm": 2.734375,
      "learning_rate": 3.951948513854166e-05,
      "loss": 0.9279,
      "step": 247080
    },
    {
      "epoch": 0.8659888478833058,
      "grad_norm": 3.71875,
      "learning_rate": 3.951883610987796e-05,
      "loss": 0.8137,
      "step": 247090
    },
    {
      "epoch": 0.8660238953902014,
      "grad_norm": 2.78125,
      "learning_rate": 3.951818708121426e-05,
      "loss": 0.9055,
      "step": 247100
    },
    {
      "epoch": 0.8660589428970971,
      "grad_norm": 2.6875,
      "learning_rate": 3.9517538052550554e-05,
      "loss": 0.8585,
      "step": 247110
    },
    {
      "epoch": 0.8660939904039926,
      "grad_norm": 2.921875,
      "learning_rate": 3.9516889023886855e-05,
      "loss": 0.8686,
      "step": 247120
    },
    {
      "epoch": 0.8661290379108882,
      "grad_norm": 2.5,
      "learning_rate": 3.951623999522315e-05,
      "loss": 0.795,
      "step": 247130
    },
    {
      "epoch": 0.8661640854177838,
      "grad_norm": 2.640625,
      "learning_rate": 3.951559096655945e-05,
      "loss": 0.8954,
      "step": 247140
    },
    {
      "epoch": 0.8661991329246794,
      "grad_norm": 2.875,
      "learning_rate": 3.951494193789575e-05,
      "loss": 0.9633,
      "step": 247150
    },
    {
      "epoch": 0.866234180431575,
      "grad_norm": 3.390625,
      "learning_rate": 3.951429290923205e-05,
      "loss": 0.9041,
      "step": 247160
    },
    {
      "epoch": 0.8662692279384706,
      "grad_norm": 3.359375,
      "learning_rate": 3.951364388056834e-05,
      "loss": 0.8944,
      "step": 247170
    },
    {
      "epoch": 0.8663042754453661,
      "grad_norm": 3.140625,
      "learning_rate": 3.9512994851904636e-05,
      "loss": 0.8574,
      "step": 247180
    },
    {
      "epoch": 0.8663393229522618,
      "grad_norm": 3.25,
      "learning_rate": 3.951234582324094e-05,
      "loss": 0.8696,
      "step": 247190
    },
    {
      "epoch": 0.8663743704591574,
      "grad_norm": 3.125,
      "learning_rate": 3.951169679457723e-05,
      "loss": 0.8716,
      "step": 247200
    },
    {
      "epoch": 0.866409417966053,
      "grad_norm": 3.015625,
      "learning_rate": 3.9511047765913534e-05,
      "loss": 0.8531,
      "step": 247210
    },
    {
      "epoch": 0.8664444654729486,
      "grad_norm": 3.0,
      "learning_rate": 3.9510398737249835e-05,
      "loss": 0.9518,
      "step": 247220
    },
    {
      "epoch": 0.8664795129798442,
      "grad_norm": 2.171875,
      "learning_rate": 3.950974970858613e-05,
      "loss": 0.9082,
      "step": 247230
    },
    {
      "epoch": 0.8665145604867398,
      "grad_norm": 2.671875,
      "learning_rate": 3.950910067992243e-05,
      "loss": 0.7737,
      "step": 247240
    },
    {
      "epoch": 0.8665496079936353,
      "grad_norm": 2.90625,
      "learning_rate": 3.9508451651258726e-05,
      "loss": 0.8752,
      "step": 247250
    },
    {
      "epoch": 0.866584655500531,
      "grad_norm": 3.34375,
      "learning_rate": 3.950780262259503e-05,
      "loss": 0.9129,
      "step": 247260
    },
    {
      "epoch": 0.8666197030074265,
      "grad_norm": 2.796875,
      "learning_rate": 3.950715359393132e-05,
      "loss": 0.8183,
      "step": 247270
    },
    {
      "epoch": 0.8666547505143222,
      "grad_norm": 3.328125,
      "learning_rate": 3.950650456526762e-05,
      "loss": 0.8404,
      "step": 247280
    },
    {
      "epoch": 0.8666897980212177,
      "grad_norm": 2.578125,
      "learning_rate": 3.950585553660392e-05,
      "loss": 0.8648,
      "step": 247290
    },
    {
      "epoch": 0.8667248455281134,
      "grad_norm": 2.890625,
      "learning_rate": 3.950520650794022e-05,
      "loss": 0.8595,
      "step": 247300
    },
    {
      "epoch": 0.866759893035009,
      "grad_norm": 3.03125,
      "learning_rate": 3.9504557479276514e-05,
      "loss": 0.8989,
      "step": 247310
    },
    {
      "epoch": 0.8667949405419045,
      "grad_norm": 2.9375,
      "learning_rate": 3.9503908450612815e-05,
      "loss": 0.9042,
      "step": 247320
    },
    {
      "epoch": 0.8668299880488002,
      "grad_norm": 2.75,
      "learning_rate": 3.950325942194911e-05,
      "loss": 0.9534,
      "step": 247330
    },
    {
      "epoch": 0.8668650355556957,
      "grad_norm": 3.015625,
      "learning_rate": 3.950261039328541e-05,
      "loss": 0.8443,
      "step": 247340
    },
    {
      "epoch": 0.8669000830625914,
      "grad_norm": 2.734375,
      "learning_rate": 3.9501961364621706e-05,
      "loss": 0.8603,
      "step": 247350
    },
    {
      "epoch": 0.8669351305694869,
      "grad_norm": 2.734375,
      "learning_rate": 3.950131233595801e-05,
      "loss": 0.8238,
      "step": 247360
    },
    {
      "epoch": 0.8669701780763825,
      "grad_norm": 2.734375,
      "learning_rate": 3.950066330729431e-05,
      "loss": 0.9037,
      "step": 247370
    },
    {
      "epoch": 0.8670052255832781,
      "grad_norm": 2.8125,
      "learning_rate": 3.95000142786306e-05,
      "loss": 0.8956,
      "step": 247380
    },
    {
      "epoch": 0.8670402730901737,
      "grad_norm": 2.796875,
      "learning_rate": 3.9499365249966905e-05,
      "loss": 0.8588,
      "step": 247390
    },
    {
      "epoch": 0.8670753205970694,
      "grad_norm": 2.71875,
      "learning_rate": 3.94987162213032e-05,
      "loss": 0.8517,
      "step": 247400
    },
    {
      "epoch": 0.8671103681039649,
      "grad_norm": 2.890625,
      "learning_rate": 3.94980671926395e-05,
      "loss": 0.8867,
      "step": 247410
    },
    {
      "epoch": 0.8671454156108606,
      "grad_norm": 2.359375,
      "learning_rate": 3.9497418163975795e-05,
      "loss": 0.8149,
      "step": 247420
    },
    {
      "epoch": 0.8671804631177561,
      "grad_norm": 3.53125,
      "learning_rate": 3.94967691353121e-05,
      "loss": 1.004,
      "step": 247430
    },
    {
      "epoch": 0.8672155106246517,
      "grad_norm": 3.296875,
      "learning_rate": 3.949612010664839e-05,
      "loss": 0.9628,
      "step": 247440
    },
    {
      "epoch": 0.8672505581315473,
      "grad_norm": 2.765625,
      "learning_rate": 3.949547107798469e-05,
      "loss": 0.8246,
      "step": 247450
    },
    {
      "epoch": 0.8672856056384429,
      "grad_norm": 2.859375,
      "learning_rate": 3.949482204932099e-05,
      "loss": 0.949,
      "step": 247460
    },
    {
      "epoch": 0.8673206531453385,
      "grad_norm": 2.890625,
      "learning_rate": 3.949417302065729e-05,
      "loss": 0.8212,
      "step": 247470
    },
    {
      "epoch": 0.8673557006522341,
      "grad_norm": 2.546875,
      "learning_rate": 3.949352399199358e-05,
      "loss": 0.8889,
      "step": 247480
    },
    {
      "epoch": 0.8673907481591298,
      "grad_norm": 2.921875,
      "learning_rate": 3.9492874963329885e-05,
      "loss": 0.9655,
      "step": 247490
    },
    {
      "epoch": 0.8674257956660253,
      "grad_norm": 3.171875,
      "learning_rate": 3.9492225934666186e-05,
      "loss": 0.8852,
      "step": 247500
    },
    {
      "epoch": 0.8674608431729209,
      "grad_norm": 2.828125,
      "learning_rate": 3.949157690600248e-05,
      "loss": 0.8772,
      "step": 247510
    },
    {
      "epoch": 0.8674958906798165,
      "grad_norm": 2.703125,
      "learning_rate": 3.949092787733878e-05,
      "loss": 0.81,
      "step": 247520
    },
    {
      "epoch": 0.8675309381867121,
      "grad_norm": 3.21875,
      "learning_rate": 3.949027884867508e-05,
      "loss": 0.9517,
      "step": 247530
    },
    {
      "epoch": 0.8675659856936077,
      "grad_norm": 3.0,
      "learning_rate": 3.948962982001137e-05,
      "loss": 0.8732,
      "step": 247540
    },
    {
      "epoch": 0.8676010332005033,
      "grad_norm": 3.0625,
      "learning_rate": 3.9488980791347666e-05,
      "loss": 0.8609,
      "step": 247550
    },
    {
      "epoch": 0.8676360807073988,
      "grad_norm": 2.453125,
      "learning_rate": 3.948833176268397e-05,
      "loss": 0.8628,
      "step": 247560
    },
    {
      "epoch": 0.8676711282142945,
      "grad_norm": 3.25,
      "learning_rate": 3.948768273402026e-05,
      "loss": 0.9231,
      "step": 247570
    },
    {
      "epoch": 0.86770617572119,
      "grad_norm": 2.875,
      "learning_rate": 3.948703370535656e-05,
      "loss": 0.9941,
      "step": 247580
    },
    {
      "epoch": 0.8677412232280857,
      "grad_norm": 2.828125,
      "learning_rate": 3.9486384676692865e-05,
      "loss": 0.9326,
      "step": 247590
    },
    {
      "epoch": 0.8677762707349813,
      "grad_norm": 2.890625,
      "learning_rate": 3.948573564802916e-05,
      "loss": 0.9369,
      "step": 247600
    },
    {
      "epoch": 0.8678113182418769,
      "grad_norm": 3.671875,
      "learning_rate": 3.948508661936546e-05,
      "loss": 0.9125,
      "step": 247610
    },
    {
      "epoch": 0.8678463657487725,
      "grad_norm": 3.46875,
      "learning_rate": 3.9484437590701755e-05,
      "loss": 0.9692,
      "step": 247620
    },
    {
      "epoch": 0.867881413255668,
      "grad_norm": 2.8125,
      "learning_rate": 3.948378856203806e-05,
      "loss": 0.8461,
      "step": 247630
    },
    {
      "epoch": 0.8679164607625637,
      "grad_norm": 3.015625,
      "learning_rate": 3.948313953337435e-05,
      "loss": 0.7853,
      "step": 247640
    },
    {
      "epoch": 0.8679515082694592,
      "grad_norm": 2.671875,
      "learning_rate": 3.948249050471065e-05,
      "loss": 0.8629,
      "step": 247650
    },
    {
      "epoch": 0.8679865557763549,
      "grad_norm": 3.03125,
      "learning_rate": 3.948184147604695e-05,
      "loss": 0.9727,
      "step": 247660
    },
    {
      "epoch": 0.8680216032832504,
      "grad_norm": 2.84375,
      "learning_rate": 3.948119244738325e-05,
      "loss": 0.8931,
      "step": 247670
    },
    {
      "epoch": 0.868056650790146,
      "grad_norm": 2.8125,
      "learning_rate": 3.948054341871954e-05,
      "loss": 0.9166,
      "step": 247680
    },
    {
      "epoch": 0.8680916982970417,
      "grad_norm": 2.875,
      "learning_rate": 3.9479894390055845e-05,
      "loss": 0.8983,
      "step": 247690
    },
    {
      "epoch": 0.8681267458039372,
      "grad_norm": 2.9375,
      "learning_rate": 3.947924536139214e-05,
      "loss": 0.9725,
      "step": 247700
    },
    {
      "epoch": 0.8681617933108329,
      "grad_norm": 3.0625,
      "learning_rate": 3.947859633272844e-05,
      "loss": 0.8081,
      "step": 247710
    },
    {
      "epoch": 0.8681968408177284,
      "grad_norm": 3.0625,
      "learning_rate": 3.9477947304064735e-05,
      "loss": 0.924,
      "step": 247720
    },
    {
      "epoch": 0.8682318883246241,
      "grad_norm": 2.578125,
      "learning_rate": 3.947729827540104e-05,
      "loss": 0.9548,
      "step": 247730
    },
    {
      "epoch": 0.8682669358315196,
      "grad_norm": 2.75,
      "learning_rate": 3.947664924673734e-05,
      "loss": 1.0075,
      "step": 247740
    },
    {
      "epoch": 0.8683019833384152,
      "grad_norm": 3.359375,
      "learning_rate": 3.947600021807363e-05,
      "loss": 0.8673,
      "step": 247750
    },
    {
      "epoch": 0.8683370308453108,
      "grad_norm": 3.03125,
      "learning_rate": 3.9475351189409934e-05,
      "loss": 0.8796,
      "step": 247760
    },
    {
      "epoch": 0.8683720783522064,
      "grad_norm": 2.78125,
      "learning_rate": 3.947470216074623e-05,
      "loss": 0.9231,
      "step": 247770
    },
    {
      "epoch": 0.868407125859102,
      "grad_norm": 3.046875,
      "learning_rate": 3.947405313208253e-05,
      "loss": 0.8629,
      "step": 247780
    },
    {
      "epoch": 0.8684421733659976,
      "grad_norm": 2.796875,
      "learning_rate": 3.9473404103418825e-05,
      "loss": 0.7977,
      "step": 247790
    },
    {
      "epoch": 0.8684772208728933,
      "grad_norm": 3.703125,
      "learning_rate": 3.9472755074755126e-05,
      "loss": 0.9403,
      "step": 247800
    },
    {
      "epoch": 0.8685122683797888,
      "grad_norm": 3.0625,
      "learning_rate": 3.947210604609142e-05,
      "loss": 0.9206,
      "step": 247810
    },
    {
      "epoch": 0.8685473158866844,
      "grad_norm": 3.046875,
      "learning_rate": 3.947145701742772e-05,
      "loss": 0.8532,
      "step": 247820
    },
    {
      "epoch": 0.86858236339358,
      "grad_norm": 2.984375,
      "learning_rate": 3.947080798876402e-05,
      "loss": 0.8953,
      "step": 247830
    },
    {
      "epoch": 0.8686174109004756,
      "grad_norm": 3.0625,
      "learning_rate": 3.947015896010032e-05,
      "loss": 0.8715,
      "step": 247840
    },
    {
      "epoch": 0.8686524584073712,
      "grad_norm": 2.640625,
      "learning_rate": 3.946950993143661e-05,
      "loss": 0.8956,
      "step": 247850
    },
    {
      "epoch": 0.8686875059142668,
      "grad_norm": 3.015625,
      "learning_rate": 3.9468860902772914e-05,
      "loss": 0.9144,
      "step": 247860
    },
    {
      "epoch": 0.8687225534211623,
      "grad_norm": 2.828125,
      "learning_rate": 3.9468211874109215e-05,
      "loss": 0.9148,
      "step": 247870
    },
    {
      "epoch": 0.868757600928058,
      "grad_norm": 2.96875,
      "learning_rate": 3.946756284544551e-05,
      "loss": 0.8373,
      "step": 247880
    },
    {
      "epoch": 0.8687926484349536,
      "grad_norm": 2.890625,
      "learning_rate": 3.946691381678181e-05,
      "loss": 0.8989,
      "step": 247890
    },
    {
      "epoch": 0.8688276959418492,
      "grad_norm": 2.984375,
      "learning_rate": 3.9466264788118106e-05,
      "loss": 0.8395,
      "step": 247900
    },
    {
      "epoch": 0.8688627434487448,
      "grad_norm": 2.734375,
      "learning_rate": 3.946561575945441e-05,
      "loss": 0.8676,
      "step": 247910
    },
    {
      "epoch": 0.8688977909556403,
      "grad_norm": 2.9375,
      "learning_rate": 3.9464966730790695e-05,
      "loss": 0.8519,
      "step": 247920
    },
    {
      "epoch": 0.868932838462536,
      "grad_norm": 2.984375,
      "learning_rate": 3.9464317702127e-05,
      "loss": 0.8408,
      "step": 247930
    },
    {
      "epoch": 0.8689678859694315,
      "grad_norm": 2.609375,
      "learning_rate": 3.946366867346329e-05,
      "loss": 0.8629,
      "step": 247940
    },
    {
      "epoch": 0.8690029334763272,
      "grad_norm": 3.03125,
      "learning_rate": 3.946301964479959e-05,
      "loss": 0.9057,
      "step": 247950
    },
    {
      "epoch": 0.8690379809832227,
      "grad_norm": 2.71875,
      "learning_rate": 3.9462370616135894e-05,
      "loss": 0.8445,
      "step": 247960
    },
    {
      "epoch": 0.8690730284901184,
      "grad_norm": 3.125,
      "learning_rate": 3.946172158747219e-05,
      "loss": 0.8823,
      "step": 247970
    },
    {
      "epoch": 0.869108075997014,
      "grad_norm": 3.140625,
      "learning_rate": 3.946107255880849e-05,
      "loss": 0.7978,
      "step": 247980
    },
    {
      "epoch": 0.8691431235039095,
      "grad_norm": 2.953125,
      "learning_rate": 3.9460423530144785e-05,
      "loss": 0.9549,
      "step": 247990
    },
    {
      "epoch": 0.8691781710108052,
      "grad_norm": 3.0625,
      "learning_rate": 3.9459774501481086e-05,
      "loss": 0.9268,
      "step": 248000
    },
    {
      "epoch": 0.8692132185177007,
      "grad_norm": 3.359375,
      "learning_rate": 3.945912547281738e-05,
      "loss": 0.9401,
      "step": 248010
    },
    {
      "epoch": 0.8692482660245964,
      "grad_norm": 2.609375,
      "learning_rate": 3.945847644415368e-05,
      "loss": 0.8466,
      "step": 248020
    },
    {
      "epoch": 0.8692833135314919,
      "grad_norm": 3.1875,
      "learning_rate": 3.945782741548998e-05,
      "loss": 0.8731,
      "step": 248030
    },
    {
      "epoch": 0.8693183610383876,
      "grad_norm": 2.9375,
      "learning_rate": 3.945717838682628e-05,
      "loss": 0.8763,
      "step": 248040
    },
    {
      "epoch": 0.8693534085452831,
      "grad_norm": 2.765625,
      "learning_rate": 3.945652935816257e-05,
      "loss": 0.9501,
      "step": 248050
    },
    {
      "epoch": 0.8693884560521787,
      "grad_norm": 2.953125,
      "learning_rate": 3.9455880329498874e-05,
      "loss": 0.8878,
      "step": 248060
    },
    {
      "epoch": 0.8694235035590743,
      "grad_norm": 2.75,
      "learning_rate": 3.945523130083517e-05,
      "loss": 0.8465,
      "step": 248070
    },
    {
      "epoch": 0.8694585510659699,
      "grad_norm": 2.734375,
      "learning_rate": 3.945458227217147e-05,
      "loss": 0.8837,
      "step": 248080
    },
    {
      "epoch": 0.8694935985728656,
      "grad_norm": 2.453125,
      "learning_rate": 3.9453933243507765e-05,
      "loss": 0.8327,
      "step": 248090
    },
    {
      "epoch": 0.8695286460797611,
      "grad_norm": 3.078125,
      "learning_rate": 3.9453284214844066e-05,
      "loss": 0.8374,
      "step": 248100
    },
    {
      "epoch": 0.8695636935866567,
      "grad_norm": 3.171875,
      "learning_rate": 3.945263518618037e-05,
      "loss": 0.9151,
      "step": 248110
    },
    {
      "epoch": 0.8695987410935523,
      "grad_norm": 2.921875,
      "learning_rate": 3.945198615751666e-05,
      "loss": 0.8327,
      "step": 248120
    },
    {
      "epoch": 0.8696337886004479,
      "grad_norm": 2.515625,
      "learning_rate": 3.9451337128852963e-05,
      "loss": 0.8749,
      "step": 248130
    },
    {
      "epoch": 0.8696688361073435,
      "grad_norm": 3.109375,
      "learning_rate": 3.945068810018926e-05,
      "loss": 0.8706,
      "step": 248140
    },
    {
      "epoch": 0.8697038836142391,
      "grad_norm": 3.25,
      "learning_rate": 3.945003907152556e-05,
      "loss": 0.9185,
      "step": 248150
    },
    {
      "epoch": 0.8697389311211347,
      "grad_norm": 2.96875,
      "learning_rate": 3.9449390042861854e-05,
      "loss": 0.9121,
      "step": 248160
    },
    {
      "epoch": 0.8697739786280303,
      "grad_norm": 2.6875,
      "learning_rate": 3.9448741014198155e-05,
      "loss": 0.8858,
      "step": 248170
    },
    {
      "epoch": 0.8698090261349259,
      "grad_norm": 3.125,
      "learning_rate": 3.944809198553445e-05,
      "loss": 0.9332,
      "step": 248180
    },
    {
      "epoch": 0.8698440736418215,
      "grad_norm": 2.734375,
      "learning_rate": 3.944744295687075e-05,
      "loss": 0.8122,
      "step": 248190
    },
    {
      "epoch": 0.8698791211487171,
      "grad_norm": 3.046875,
      "learning_rate": 3.9446793928207046e-05,
      "loss": 0.9309,
      "step": 248200
    },
    {
      "epoch": 0.8699141686556127,
      "grad_norm": 2.890625,
      "learning_rate": 3.944614489954335e-05,
      "loss": 0.9251,
      "step": 248210
    },
    {
      "epoch": 0.8699492161625083,
      "grad_norm": 2.46875,
      "learning_rate": 3.944549587087964e-05,
      "loss": 0.8358,
      "step": 248220
    },
    {
      "epoch": 0.8699842636694038,
      "grad_norm": 2.671875,
      "learning_rate": 3.9444846842215943e-05,
      "loss": 0.8562,
      "step": 248230
    },
    {
      "epoch": 0.8700193111762995,
      "grad_norm": 2.46875,
      "learning_rate": 3.9444197813552245e-05,
      "loss": 0.9503,
      "step": 248240
    },
    {
      "epoch": 0.870054358683195,
      "grad_norm": 2.421875,
      "learning_rate": 3.944354878488854e-05,
      "loss": 0.8689,
      "step": 248250
    },
    {
      "epoch": 0.8700894061900907,
      "grad_norm": 2.890625,
      "learning_rate": 3.944289975622484e-05,
      "loss": 0.763,
      "step": 248260
    },
    {
      "epoch": 0.8701244536969862,
      "grad_norm": 2.6875,
      "learning_rate": 3.9442250727561135e-05,
      "loss": 0.7993,
      "step": 248270
    },
    {
      "epoch": 0.8701595012038819,
      "grad_norm": 2.984375,
      "learning_rate": 3.944160169889744e-05,
      "loss": 1.0331,
      "step": 248280
    },
    {
      "epoch": 0.8701945487107775,
      "grad_norm": 2.921875,
      "learning_rate": 3.944095267023373e-05,
      "loss": 0.9496,
      "step": 248290
    },
    {
      "epoch": 0.870229596217673,
      "grad_norm": 3.421875,
      "learning_rate": 3.9440303641570026e-05,
      "loss": 0.9058,
      "step": 248300
    },
    {
      "epoch": 0.8702646437245687,
      "grad_norm": 2.703125,
      "learning_rate": 3.943965461290632e-05,
      "loss": 0.8927,
      "step": 248310
    },
    {
      "epoch": 0.8702996912314642,
      "grad_norm": 2.8125,
      "learning_rate": 3.943900558424262e-05,
      "loss": 0.8298,
      "step": 248320
    },
    {
      "epoch": 0.8703347387383599,
      "grad_norm": 2.78125,
      "learning_rate": 3.9438356555578923e-05,
      "loss": 0.8411,
      "step": 248330
    },
    {
      "epoch": 0.8703697862452554,
      "grad_norm": 3.109375,
      "learning_rate": 3.943770752691522e-05,
      "loss": 0.9026,
      "step": 248340
    },
    {
      "epoch": 0.870404833752151,
      "grad_norm": 3.0625,
      "learning_rate": 3.943705849825152e-05,
      "loss": 0.8572,
      "step": 248350
    },
    {
      "epoch": 0.8704398812590466,
      "grad_norm": 2.703125,
      "learning_rate": 3.9436409469587814e-05,
      "loss": 0.87,
      "step": 248360
    },
    {
      "epoch": 0.8704749287659422,
      "grad_norm": 2.921875,
      "learning_rate": 3.9435760440924115e-05,
      "loss": 0.8766,
      "step": 248370
    },
    {
      "epoch": 0.8705099762728379,
      "grad_norm": 2.828125,
      "learning_rate": 3.943511141226041e-05,
      "loss": 0.8723,
      "step": 248380
    },
    {
      "epoch": 0.8705450237797334,
      "grad_norm": 2.9375,
      "learning_rate": 3.943446238359671e-05,
      "loss": 0.9066,
      "step": 248390
    },
    {
      "epoch": 0.8705800712866291,
      "grad_norm": 3.375,
      "learning_rate": 3.9433813354933006e-05,
      "loss": 0.9004,
      "step": 248400
    },
    {
      "epoch": 0.8706151187935246,
      "grad_norm": 3.265625,
      "learning_rate": 3.943316432626931e-05,
      "loss": 0.8611,
      "step": 248410
    },
    {
      "epoch": 0.8706501663004202,
      "grad_norm": 3.078125,
      "learning_rate": 3.94325152976056e-05,
      "loss": 0.9985,
      "step": 248420
    },
    {
      "epoch": 0.8706852138073158,
      "grad_norm": 3.078125,
      "learning_rate": 3.9431866268941903e-05,
      "loss": 0.8367,
      "step": 248430
    },
    {
      "epoch": 0.8707202613142114,
      "grad_norm": 3.078125,
      "learning_rate": 3.94312172402782e-05,
      "loss": 0.8319,
      "step": 248440
    },
    {
      "epoch": 0.870755308821107,
      "grad_norm": 2.71875,
      "learning_rate": 3.94305682116145e-05,
      "loss": 0.9253,
      "step": 248450
    },
    {
      "epoch": 0.8707903563280026,
      "grad_norm": 2.9375,
      "learning_rate": 3.94299191829508e-05,
      "loss": 0.9161,
      "step": 248460
    },
    {
      "epoch": 0.8708254038348983,
      "grad_norm": 3.234375,
      "learning_rate": 3.9429270154287095e-05,
      "loss": 0.9314,
      "step": 248470
    },
    {
      "epoch": 0.8708604513417938,
      "grad_norm": 3.359375,
      "learning_rate": 3.94286211256234e-05,
      "loss": 0.9031,
      "step": 248480
    },
    {
      "epoch": 0.8708954988486894,
      "grad_norm": 2.953125,
      "learning_rate": 3.942797209695969e-05,
      "loss": 0.9176,
      "step": 248490
    },
    {
      "epoch": 0.870930546355585,
      "grad_norm": 2.546875,
      "learning_rate": 3.942732306829599e-05,
      "loss": 0.8196,
      "step": 248500
    },
    {
      "epoch": 0.8709655938624806,
      "grad_norm": 2.796875,
      "learning_rate": 3.942667403963229e-05,
      "loss": 0.9362,
      "step": 248510
    },
    {
      "epoch": 0.8710006413693762,
      "grad_norm": 2.625,
      "learning_rate": 3.942602501096859e-05,
      "loss": 0.881,
      "step": 248520
    },
    {
      "epoch": 0.8710356888762718,
      "grad_norm": 2.953125,
      "learning_rate": 3.9425375982304883e-05,
      "loss": 0.8653,
      "step": 248530
    },
    {
      "epoch": 0.8710707363831673,
      "grad_norm": 3.1875,
      "learning_rate": 3.9424726953641185e-05,
      "loss": 0.9497,
      "step": 248540
    },
    {
      "epoch": 0.871105783890063,
      "grad_norm": 3.234375,
      "learning_rate": 3.942407792497748e-05,
      "loss": 0.8683,
      "step": 248550
    },
    {
      "epoch": 0.8711408313969585,
      "grad_norm": 2.84375,
      "learning_rate": 3.942342889631378e-05,
      "loss": 0.9049,
      "step": 248560
    },
    {
      "epoch": 0.8711758789038542,
      "grad_norm": 3.078125,
      "learning_rate": 3.9422779867650075e-05,
      "loss": 0.8916,
      "step": 248570
    },
    {
      "epoch": 0.8712109264107498,
      "grad_norm": 2.453125,
      "learning_rate": 3.942213083898638e-05,
      "loss": 0.8565,
      "step": 248580
    },
    {
      "epoch": 0.8712459739176454,
      "grad_norm": 3.046875,
      "learning_rate": 3.942148181032267e-05,
      "loss": 0.8453,
      "step": 248590
    },
    {
      "epoch": 0.871281021424541,
      "grad_norm": 2.890625,
      "learning_rate": 3.942083278165897e-05,
      "loss": 0.888,
      "step": 248600
    },
    {
      "epoch": 0.8713160689314365,
      "grad_norm": 3.0,
      "learning_rate": 3.9420183752995274e-05,
      "loss": 0.9238,
      "step": 248610
    },
    {
      "epoch": 0.8713511164383322,
      "grad_norm": 2.828125,
      "learning_rate": 3.941953472433157e-05,
      "loss": 0.8311,
      "step": 248620
    },
    {
      "epoch": 0.8713861639452277,
      "grad_norm": 2.84375,
      "learning_rate": 3.941888569566787e-05,
      "loss": 0.953,
      "step": 248630
    },
    {
      "epoch": 0.8714212114521234,
      "grad_norm": 2.9375,
      "learning_rate": 3.9418236667004165e-05,
      "loss": 0.8879,
      "step": 248640
    },
    {
      "epoch": 0.8714562589590189,
      "grad_norm": 3.296875,
      "learning_rate": 3.9417587638340466e-05,
      "loss": 0.8112,
      "step": 248650
    },
    {
      "epoch": 0.8714913064659146,
      "grad_norm": 3.53125,
      "learning_rate": 3.941693860967676e-05,
      "loss": 0.8846,
      "step": 248660
    },
    {
      "epoch": 0.8715263539728102,
      "grad_norm": 3.296875,
      "learning_rate": 3.9416289581013055e-05,
      "loss": 0.9748,
      "step": 248670
    },
    {
      "epoch": 0.8715614014797057,
      "grad_norm": 2.9375,
      "learning_rate": 3.941564055234935e-05,
      "loss": 0.8492,
      "step": 248680
    },
    {
      "epoch": 0.8715964489866014,
      "grad_norm": 2.296875,
      "learning_rate": 3.941499152368565e-05,
      "loss": 0.8557,
      "step": 248690
    },
    {
      "epoch": 0.8716314964934969,
      "grad_norm": 2.984375,
      "learning_rate": 3.941434249502195e-05,
      "loss": 0.9541,
      "step": 248700
    },
    {
      "epoch": 0.8716665440003926,
      "grad_norm": 3.0,
      "learning_rate": 3.941369346635825e-05,
      "loss": 0.877,
      "step": 248710
    },
    {
      "epoch": 0.8717015915072881,
      "grad_norm": 3.09375,
      "learning_rate": 3.941304443769455e-05,
      "loss": 0.8714,
      "step": 248720
    },
    {
      "epoch": 0.8717366390141837,
      "grad_norm": 2.734375,
      "learning_rate": 3.9412395409030843e-05,
      "loss": 0.8911,
      "step": 248730
    },
    {
      "epoch": 0.8717716865210793,
      "grad_norm": 2.9375,
      "learning_rate": 3.9411746380367145e-05,
      "loss": 0.8797,
      "step": 248740
    },
    {
      "epoch": 0.8718067340279749,
      "grad_norm": 2.953125,
      "learning_rate": 3.941109735170344e-05,
      "loss": 0.8816,
      "step": 248750
    },
    {
      "epoch": 0.8718417815348705,
      "grad_norm": 2.75,
      "learning_rate": 3.941044832303974e-05,
      "loss": 0.9267,
      "step": 248760
    },
    {
      "epoch": 0.8718768290417661,
      "grad_norm": 3.015625,
      "learning_rate": 3.9409799294376035e-05,
      "loss": 0.9719,
      "step": 248770
    },
    {
      "epoch": 0.8719118765486618,
      "grad_norm": 3.03125,
      "learning_rate": 3.940915026571234e-05,
      "loss": 0.8912,
      "step": 248780
    },
    {
      "epoch": 0.8719469240555573,
      "grad_norm": 3.296875,
      "learning_rate": 3.940850123704863e-05,
      "loss": 0.9136,
      "step": 248790
    },
    {
      "epoch": 0.8719819715624529,
      "grad_norm": 2.90625,
      "learning_rate": 3.940785220838493e-05,
      "loss": 0.844,
      "step": 248800
    },
    {
      "epoch": 0.8720170190693485,
      "grad_norm": 3.1875,
      "learning_rate": 3.940720317972123e-05,
      "loss": 0.9054,
      "step": 248810
    },
    {
      "epoch": 0.8720520665762441,
      "grad_norm": 2.6875,
      "learning_rate": 3.940655415105753e-05,
      "loss": 0.9226,
      "step": 248820
    },
    {
      "epoch": 0.8720871140831397,
      "grad_norm": 3.09375,
      "learning_rate": 3.940590512239383e-05,
      "loss": 0.8164,
      "step": 248830
    },
    {
      "epoch": 0.8721221615900353,
      "grad_norm": 2.78125,
      "learning_rate": 3.9405256093730125e-05,
      "loss": 0.927,
      "step": 248840
    },
    {
      "epoch": 0.8721572090969308,
      "grad_norm": 3.046875,
      "learning_rate": 3.9404607065066426e-05,
      "loss": 0.9758,
      "step": 248850
    },
    {
      "epoch": 0.8721922566038265,
      "grad_norm": 2.515625,
      "learning_rate": 3.940395803640272e-05,
      "loss": 0.8443,
      "step": 248860
    },
    {
      "epoch": 0.8722273041107221,
      "grad_norm": 3.203125,
      "learning_rate": 3.940330900773902e-05,
      "loss": 0.913,
      "step": 248870
    },
    {
      "epoch": 0.8722623516176177,
      "grad_norm": 3.390625,
      "learning_rate": 3.940265997907532e-05,
      "loss": 0.91,
      "step": 248880
    },
    {
      "epoch": 0.8722973991245133,
      "grad_norm": 2.984375,
      "learning_rate": 3.940201095041162e-05,
      "loss": 0.9045,
      "step": 248890
    },
    {
      "epoch": 0.8723324466314089,
      "grad_norm": 3.234375,
      "learning_rate": 3.940136192174791e-05,
      "loss": 0.8544,
      "step": 248900
    },
    {
      "epoch": 0.8723674941383045,
      "grad_norm": 3.265625,
      "learning_rate": 3.9400712893084214e-05,
      "loss": 0.8929,
      "step": 248910
    },
    {
      "epoch": 0.8724025416452,
      "grad_norm": 2.640625,
      "learning_rate": 3.940006386442051e-05,
      "loss": 0.8349,
      "step": 248920
    },
    {
      "epoch": 0.8724375891520957,
      "grad_norm": 2.78125,
      "learning_rate": 3.939941483575681e-05,
      "loss": 0.8933,
      "step": 248930
    },
    {
      "epoch": 0.8724726366589912,
      "grad_norm": 3.234375,
      "learning_rate": 3.9398765807093105e-05,
      "loss": 0.9019,
      "step": 248940
    },
    {
      "epoch": 0.8725076841658869,
      "grad_norm": 3.015625,
      "learning_rate": 3.9398116778429406e-05,
      "loss": 0.9295,
      "step": 248950
    },
    {
      "epoch": 0.8725427316727824,
      "grad_norm": 3.171875,
      "learning_rate": 3.93974677497657e-05,
      "loss": 0.8373,
      "step": 248960
    },
    {
      "epoch": 0.872577779179678,
      "grad_norm": 2.859375,
      "learning_rate": 3.9396818721102e-05,
      "loss": 0.872,
      "step": 248970
    },
    {
      "epoch": 0.8726128266865737,
      "grad_norm": 3.109375,
      "learning_rate": 3.9396169692438304e-05,
      "loss": 0.8938,
      "step": 248980
    },
    {
      "epoch": 0.8726478741934692,
      "grad_norm": 2.84375,
      "learning_rate": 3.93955206637746e-05,
      "loss": 0.85,
      "step": 248990
    },
    {
      "epoch": 0.8726829217003649,
      "grad_norm": 3.34375,
      "learning_rate": 3.93948716351109e-05,
      "loss": 0.9255,
      "step": 249000
    },
    {
      "epoch": 0.8727179692072604,
      "grad_norm": 2.9375,
      "learning_rate": 3.9394222606447194e-05,
      "loss": 0.9228,
      "step": 249010
    },
    {
      "epoch": 0.8727530167141561,
      "grad_norm": 2.96875,
      "learning_rate": 3.9393573577783496e-05,
      "loss": 0.8627,
      "step": 249020
    },
    {
      "epoch": 0.8727880642210516,
      "grad_norm": 2.859375,
      "learning_rate": 3.939292454911979e-05,
      "loss": 0.8719,
      "step": 249030
    },
    {
      "epoch": 0.8728231117279472,
      "grad_norm": 3.078125,
      "learning_rate": 3.939227552045609e-05,
      "loss": 0.8976,
      "step": 249040
    },
    {
      "epoch": 0.8728581592348428,
      "grad_norm": 3.15625,
      "learning_rate": 3.939162649179238e-05,
      "loss": 0.9266,
      "step": 249050
    },
    {
      "epoch": 0.8728932067417384,
      "grad_norm": 2.84375,
      "learning_rate": 3.939097746312868e-05,
      "loss": 0.8739,
      "step": 249060
    },
    {
      "epoch": 0.8729282542486341,
      "grad_norm": 2.9375,
      "learning_rate": 3.939032843446498e-05,
      "loss": 0.8721,
      "step": 249070
    },
    {
      "epoch": 0.8729633017555296,
      "grad_norm": 2.484375,
      "learning_rate": 3.938967940580128e-05,
      "loss": 0.8184,
      "step": 249080
    },
    {
      "epoch": 0.8729983492624253,
      "grad_norm": 3.203125,
      "learning_rate": 3.938903037713758e-05,
      "loss": 0.8905,
      "step": 249090
    },
    {
      "epoch": 0.8730333967693208,
      "grad_norm": 3.359375,
      "learning_rate": 3.938838134847387e-05,
      "loss": 0.9274,
      "step": 249100
    },
    {
      "epoch": 0.8730684442762164,
      "grad_norm": 2.609375,
      "learning_rate": 3.9387732319810174e-05,
      "loss": 0.8961,
      "step": 249110
    },
    {
      "epoch": 0.873103491783112,
      "grad_norm": 3.3125,
      "learning_rate": 3.938708329114647e-05,
      "loss": 0.9331,
      "step": 249120
    },
    {
      "epoch": 0.8731385392900076,
      "grad_norm": 2.75,
      "learning_rate": 3.938643426248277e-05,
      "loss": 0.8799,
      "step": 249130
    },
    {
      "epoch": 0.8731735867969032,
      "grad_norm": 2.734375,
      "learning_rate": 3.9385785233819065e-05,
      "loss": 0.8753,
      "step": 249140
    },
    {
      "epoch": 0.8732086343037988,
      "grad_norm": 3.328125,
      "learning_rate": 3.9385136205155366e-05,
      "loss": 0.934,
      "step": 249150
    },
    {
      "epoch": 0.8732436818106944,
      "grad_norm": 3.171875,
      "learning_rate": 3.938448717649166e-05,
      "loss": 0.8215,
      "step": 249160
    },
    {
      "epoch": 0.87327872931759,
      "grad_norm": 3.078125,
      "learning_rate": 3.938383814782796e-05,
      "loss": 0.9046,
      "step": 249170
    },
    {
      "epoch": 0.8733137768244856,
      "grad_norm": 3.109375,
      "learning_rate": 3.938318911916426e-05,
      "loss": 0.919,
      "step": 249180
    },
    {
      "epoch": 0.8733488243313812,
      "grad_norm": 2.703125,
      "learning_rate": 3.938254009050056e-05,
      "loss": 0.8548,
      "step": 249190
    },
    {
      "epoch": 0.8733838718382768,
      "grad_norm": 2.765625,
      "learning_rate": 3.938189106183686e-05,
      "loss": 0.8917,
      "step": 249200
    },
    {
      "epoch": 0.8734189193451724,
      "grad_norm": 3.03125,
      "learning_rate": 3.9381242033173154e-05,
      "loss": 0.8915,
      "step": 249210
    },
    {
      "epoch": 0.873453966852068,
      "grad_norm": 3.265625,
      "learning_rate": 3.9380593004509456e-05,
      "loss": 0.8749,
      "step": 249220
    },
    {
      "epoch": 0.8734890143589635,
      "grad_norm": 2.671875,
      "learning_rate": 3.937994397584575e-05,
      "loss": 0.8838,
      "step": 249230
    },
    {
      "epoch": 0.8735240618658592,
      "grad_norm": 2.71875,
      "learning_rate": 3.937929494718205e-05,
      "loss": 0.9511,
      "step": 249240
    },
    {
      "epoch": 0.8735591093727547,
      "grad_norm": 3.171875,
      "learning_rate": 3.9378645918518346e-05,
      "loss": 0.9008,
      "step": 249250
    },
    {
      "epoch": 0.8735941568796504,
      "grad_norm": 2.71875,
      "learning_rate": 3.937799688985465e-05,
      "loss": 0.8829,
      "step": 249260
    },
    {
      "epoch": 0.873629204386546,
      "grad_norm": 3.328125,
      "learning_rate": 3.937734786119094e-05,
      "loss": 0.818,
      "step": 249270
    },
    {
      "epoch": 0.8736642518934415,
      "grad_norm": 2.90625,
      "learning_rate": 3.9376698832527244e-05,
      "loss": 0.9786,
      "step": 249280
    },
    {
      "epoch": 0.8736992994003372,
      "grad_norm": 2.828125,
      "learning_rate": 3.937604980386354e-05,
      "loss": 0.8974,
      "step": 249290
    },
    {
      "epoch": 0.8737343469072327,
      "grad_norm": 2.75,
      "learning_rate": 3.937540077519984e-05,
      "loss": 0.8976,
      "step": 249300
    },
    {
      "epoch": 0.8737693944141284,
      "grad_norm": 3.140625,
      "learning_rate": 3.9374751746536134e-05,
      "loss": 0.9054,
      "step": 249310
    },
    {
      "epoch": 0.8738044419210239,
      "grad_norm": 3.0,
      "learning_rate": 3.9374102717872436e-05,
      "loss": 0.9366,
      "step": 249320
    },
    {
      "epoch": 0.8738394894279196,
      "grad_norm": 3.21875,
      "learning_rate": 3.937345368920874e-05,
      "loss": 0.9296,
      "step": 249330
    },
    {
      "epoch": 0.8738745369348151,
      "grad_norm": 2.484375,
      "learning_rate": 3.937280466054503e-05,
      "loss": 0.8974,
      "step": 249340
    },
    {
      "epoch": 0.8739095844417107,
      "grad_norm": 3.046875,
      "learning_rate": 3.937215563188133e-05,
      "loss": 0.891,
      "step": 249350
    },
    {
      "epoch": 0.8739446319486064,
      "grad_norm": 2.734375,
      "learning_rate": 3.937150660321763e-05,
      "loss": 0.8784,
      "step": 249360
    },
    {
      "epoch": 0.8739796794555019,
      "grad_norm": 3.296875,
      "learning_rate": 3.937085757455393e-05,
      "loss": 0.912,
      "step": 249370
    },
    {
      "epoch": 0.8740147269623976,
      "grad_norm": 3.078125,
      "learning_rate": 3.9370208545890224e-05,
      "loss": 0.8762,
      "step": 249380
    },
    {
      "epoch": 0.8740497744692931,
      "grad_norm": 3.328125,
      "learning_rate": 3.9369559517226525e-05,
      "loss": 0.9536,
      "step": 249390
    },
    {
      "epoch": 0.8740848219761888,
      "grad_norm": 2.828125,
      "learning_rate": 3.936891048856282e-05,
      "loss": 0.8825,
      "step": 249400
    },
    {
      "epoch": 0.8741198694830843,
      "grad_norm": 2.84375,
      "learning_rate": 3.936826145989912e-05,
      "loss": 0.8352,
      "step": 249410
    },
    {
      "epoch": 0.8741549169899799,
      "grad_norm": 2.84375,
      "learning_rate": 3.9367612431235416e-05,
      "loss": 0.9301,
      "step": 249420
    },
    {
      "epoch": 0.8741899644968755,
      "grad_norm": 2.984375,
      "learning_rate": 3.936696340257171e-05,
      "loss": 0.9171,
      "step": 249430
    },
    {
      "epoch": 0.8742250120037711,
      "grad_norm": 3.359375,
      "learning_rate": 3.936631437390801e-05,
      "loss": 0.8131,
      "step": 249440
    },
    {
      "epoch": 0.8742600595106667,
      "grad_norm": 2.96875,
      "learning_rate": 3.9365665345244306e-05,
      "loss": 0.9158,
      "step": 249450
    },
    {
      "epoch": 0.8742951070175623,
      "grad_norm": 3.0,
      "learning_rate": 3.936501631658061e-05,
      "loss": 0.7991,
      "step": 249460
    },
    {
      "epoch": 0.874330154524458,
      "grad_norm": 2.890625,
      "learning_rate": 3.93643672879169e-05,
      "loss": 0.8487,
      "step": 249470
    },
    {
      "epoch": 0.8743652020313535,
      "grad_norm": 2.71875,
      "learning_rate": 3.9363718259253204e-05,
      "loss": 0.868,
      "step": 249480
    },
    {
      "epoch": 0.8744002495382491,
      "grad_norm": 3.171875,
      "learning_rate": 3.93630692305895e-05,
      "loss": 0.8594,
      "step": 249490
    },
    {
      "epoch": 0.8744352970451447,
      "grad_norm": 2.625,
      "learning_rate": 3.93624202019258e-05,
      "loss": 0.8845,
      "step": 249500
    },
    {
      "epoch": 0.8744703445520403,
      "grad_norm": 2.578125,
      "learning_rate": 3.9361771173262094e-05,
      "loss": 0.9239,
      "step": 249510
    },
    {
      "epoch": 0.8745053920589358,
      "grad_norm": 2.765625,
      "learning_rate": 3.9361122144598396e-05,
      "loss": 0.8615,
      "step": 249520
    },
    {
      "epoch": 0.8745404395658315,
      "grad_norm": 2.796875,
      "learning_rate": 3.936047311593469e-05,
      "loss": 0.8148,
      "step": 249530
    },
    {
      "epoch": 0.874575487072727,
      "grad_norm": 3.4375,
      "learning_rate": 3.935982408727099e-05,
      "loss": 0.821,
      "step": 249540
    },
    {
      "epoch": 0.8746105345796227,
      "grad_norm": 2.828125,
      "learning_rate": 3.9359175058607286e-05,
      "loss": 0.9299,
      "step": 249550
    },
    {
      "epoch": 0.8746455820865183,
      "grad_norm": 3.3125,
      "learning_rate": 3.935852602994359e-05,
      "loss": 0.8692,
      "step": 249560
    },
    {
      "epoch": 0.8746806295934139,
      "grad_norm": 3.09375,
      "learning_rate": 3.935787700127989e-05,
      "loss": 0.8932,
      "step": 249570
    },
    {
      "epoch": 0.8747156771003095,
      "grad_norm": 2.90625,
      "learning_rate": 3.9357227972616184e-05,
      "loss": 0.9059,
      "step": 249580
    },
    {
      "epoch": 0.874750724607205,
      "grad_norm": 2.9375,
      "learning_rate": 3.9356578943952485e-05,
      "loss": 0.765,
      "step": 249590
    },
    {
      "epoch": 0.8747857721141007,
      "grad_norm": 2.96875,
      "learning_rate": 3.935592991528878e-05,
      "loss": 0.8466,
      "step": 249600
    },
    {
      "epoch": 0.8748208196209962,
      "grad_norm": 3.3125,
      "learning_rate": 3.935528088662508e-05,
      "loss": 0.8456,
      "step": 249610
    },
    {
      "epoch": 0.8748558671278919,
      "grad_norm": 3.421875,
      "learning_rate": 3.9354631857961376e-05,
      "loss": 0.887,
      "step": 249620
    },
    {
      "epoch": 0.8748909146347874,
      "grad_norm": 2.546875,
      "learning_rate": 3.935398282929768e-05,
      "loss": 0.8199,
      "step": 249630
    },
    {
      "epoch": 0.8749259621416831,
      "grad_norm": 3.0625,
      "learning_rate": 3.935333380063397e-05,
      "loss": 0.8709,
      "step": 249640
    },
    {
      "epoch": 0.8749610096485787,
      "grad_norm": 2.671875,
      "learning_rate": 3.935268477197027e-05,
      "loss": 0.8959,
      "step": 249650
    },
    {
      "epoch": 0.8749960571554742,
      "grad_norm": 2.859375,
      "learning_rate": 3.935203574330657e-05,
      "loss": 0.8341,
      "step": 249660
    },
    {
      "epoch": 0.8750311046623699,
      "grad_norm": 2.890625,
      "learning_rate": 3.935138671464287e-05,
      "loss": 0.9011,
      "step": 249670
    },
    {
      "epoch": 0.8750661521692654,
      "grad_norm": 2.703125,
      "learning_rate": 3.9350737685979164e-05,
      "loss": 0.9034,
      "step": 249680
    },
    {
      "epoch": 0.8751011996761611,
      "grad_norm": 3.046875,
      "learning_rate": 3.9350088657315465e-05,
      "loss": 0.9114,
      "step": 249690
    },
    {
      "epoch": 0.8751362471830566,
      "grad_norm": 2.953125,
      "learning_rate": 3.9349439628651766e-05,
      "loss": 0.9868,
      "step": 249700
    },
    {
      "epoch": 0.8751712946899523,
      "grad_norm": 2.96875,
      "learning_rate": 3.934879059998806e-05,
      "loss": 0.8326,
      "step": 249710
    },
    {
      "epoch": 0.8752063421968478,
      "grad_norm": 2.71875,
      "learning_rate": 3.934814157132436e-05,
      "loss": 0.8848,
      "step": 249720
    },
    {
      "epoch": 0.8752413897037434,
      "grad_norm": 2.9375,
      "learning_rate": 3.934749254266066e-05,
      "loss": 0.889,
      "step": 249730
    },
    {
      "epoch": 0.875276437210639,
      "grad_norm": 2.96875,
      "learning_rate": 3.934684351399696e-05,
      "loss": 0.9096,
      "step": 249740
    },
    {
      "epoch": 0.8753114847175346,
      "grad_norm": 2.78125,
      "learning_rate": 3.934619448533325e-05,
      "loss": 0.9118,
      "step": 249750
    },
    {
      "epoch": 0.8753465322244303,
      "grad_norm": 2.46875,
      "learning_rate": 3.9345545456669554e-05,
      "loss": 0.8646,
      "step": 249760
    },
    {
      "epoch": 0.8753815797313258,
      "grad_norm": 2.609375,
      "learning_rate": 3.934489642800585e-05,
      "loss": 0.7998,
      "step": 249770
    },
    {
      "epoch": 0.8754166272382214,
      "grad_norm": 2.640625,
      "learning_rate": 3.934424739934215e-05,
      "loss": 0.8584,
      "step": 249780
    },
    {
      "epoch": 0.875451674745117,
      "grad_norm": 2.9375,
      "learning_rate": 3.9343598370678445e-05,
      "loss": 0.8212,
      "step": 249790
    },
    {
      "epoch": 0.8754867222520126,
      "grad_norm": 2.765625,
      "learning_rate": 3.934294934201474e-05,
      "loss": 0.8229,
      "step": 249800
    },
    {
      "epoch": 0.8755217697589082,
      "grad_norm": 2.921875,
      "learning_rate": 3.934230031335104e-05,
      "loss": 0.7535,
      "step": 249810
    },
    {
      "epoch": 0.8755568172658038,
      "grad_norm": 3.25,
      "learning_rate": 3.9341651284687336e-05,
      "loss": 0.9196,
      "step": 249820
    },
    {
      "epoch": 0.8755918647726993,
      "grad_norm": 2.828125,
      "learning_rate": 3.934100225602364e-05,
      "loss": 0.9155,
      "step": 249830
    },
    {
      "epoch": 0.875626912279595,
      "grad_norm": 2.5,
      "learning_rate": 3.934035322735993e-05,
      "loss": 0.9068,
      "step": 249840
    },
    {
      "epoch": 0.8756619597864906,
      "grad_norm": 3.328125,
      "learning_rate": 3.933970419869623e-05,
      "loss": 0.9326,
      "step": 249850
    },
    {
      "epoch": 0.8756970072933862,
      "grad_norm": 3.359375,
      "learning_rate": 3.933905517003253e-05,
      "loss": 0.8937,
      "step": 249860
    },
    {
      "epoch": 0.8757320548002818,
      "grad_norm": 2.984375,
      "learning_rate": 3.933840614136883e-05,
      "loss": 0.9067,
      "step": 249870
    },
    {
      "epoch": 0.8757671023071774,
      "grad_norm": 3.078125,
      "learning_rate": 3.9337757112705124e-05,
      "loss": 0.8285,
      "step": 249880
    },
    {
      "epoch": 0.875802149814073,
      "grad_norm": 3.25,
      "learning_rate": 3.9337108084041425e-05,
      "loss": 0.8703,
      "step": 249890
    },
    {
      "epoch": 0.8758371973209685,
      "grad_norm": 3.21875,
      "learning_rate": 3.933645905537772e-05,
      "loss": 0.824,
      "step": 249900
    },
    {
      "epoch": 0.8758722448278642,
      "grad_norm": 3.296875,
      "learning_rate": 3.933581002671402e-05,
      "loss": 0.9309,
      "step": 249910
    },
    {
      "epoch": 0.8759072923347597,
      "grad_norm": 3.109375,
      "learning_rate": 3.9335160998050316e-05,
      "loss": 0.9022,
      "step": 249920
    },
    {
      "epoch": 0.8759423398416554,
      "grad_norm": 2.515625,
      "learning_rate": 3.933451196938662e-05,
      "loss": 0.9421,
      "step": 249930
    },
    {
      "epoch": 0.8759773873485509,
      "grad_norm": 3.140625,
      "learning_rate": 3.933386294072292e-05,
      "loss": 0.9165,
      "step": 249940
    },
    {
      "epoch": 0.8760124348554466,
      "grad_norm": 2.734375,
      "learning_rate": 3.933321391205921e-05,
      "loss": 0.8862,
      "step": 249950
    },
    {
      "epoch": 0.8760474823623422,
      "grad_norm": 3.453125,
      "learning_rate": 3.9332564883395514e-05,
      "loss": 0.9066,
      "step": 249960
    },
    {
      "epoch": 0.8760825298692377,
      "grad_norm": 3.078125,
      "learning_rate": 3.933191585473181e-05,
      "loss": 0.918,
      "step": 249970
    },
    {
      "epoch": 0.8761175773761334,
      "grad_norm": 2.8125,
      "learning_rate": 3.933126682606811e-05,
      "loss": 0.8577,
      "step": 249980
    },
    {
      "epoch": 0.8761526248830289,
      "grad_norm": 2.84375,
      "learning_rate": 3.9330617797404405e-05,
      "loss": 0.8927,
      "step": 249990
    },
    {
      "epoch": 0.8761876723899246,
      "grad_norm": 3.171875,
      "learning_rate": 3.9329968768740706e-05,
      "loss": 0.7923,
      "step": 250000
    },
    {
      "epoch": 0.8761876723899246,
      "eval_loss": 0.8290725946426392,
      "eval_runtime": 563.4643,
      "eval_samples_per_second": 675.173,
      "eval_steps_per_second": 56.264,
      "step": 250000
    },
    {
      "epoch": 0.8762227198968201,
      "grad_norm": 3.0,
      "learning_rate": 3.9329319740077e-05,
      "loss": 0.9591,
      "step": 250010
    },
    {
      "epoch": 0.8762577674037157,
      "grad_norm": 2.765625,
      "learning_rate": 3.93286707114133e-05,
      "loss": 0.8929,
      "step": 250020
    },
    {
      "epoch": 0.8762928149106113,
      "grad_norm": 3.28125,
      "learning_rate": 3.93280216827496e-05,
      "loss": 0.8932,
      "step": 250030
    },
    {
      "epoch": 0.8763278624175069,
      "grad_norm": 2.65625,
      "learning_rate": 3.93273726540859e-05,
      "loss": 0.9276,
      "step": 250040
    },
    {
      "epoch": 0.8763629099244026,
      "grad_norm": 2.953125,
      "learning_rate": 3.932672362542219e-05,
      "loss": 0.9497,
      "step": 250050
    },
    {
      "epoch": 0.8763979574312981,
      "grad_norm": 3.15625,
      "learning_rate": 3.9326074596758494e-05,
      "loss": 0.8659,
      "step": 250060
    },
    {
      "epoch": 0.8764330049381938,
      "grad_norm": 3.25,
      "learning_rate": 3.9325425568094796e-05,
      "loss": 0.8874,
      "step": 250070
    },
    {
      "epoch": 0.8764680524450893,
      "grad_norm": 2.84375,
      "learning_rate": 3.932477653943109e-05,
      "loss": 0.8996,
      "step": 250080
    },
    {
      "epoch": 0.8765030999519849,
      "grad_norm": 3.125,
      "learning_rate": 3.932412751076739e-05,
      "loss": 0.868,
      "step": 250090
    },
    {
      "epoch": 0.8765381474588805,
      "grad_norm": 2.6875,
      "learning_rate": 3.9323478482103686e-05,
      "loss": 0.7841,
      "step": 250100
    },
    {
      "epoch": 0.8765731949657761,
      "grad_norm": 3.125,
      "learning_rate": 3.932282945343999e-05,
      "loss": 0.9807,
      "step": 250110
    },
    {
      "epoch": 0.8766082424726717,
      "grad_norm": 2.796875,
      "learning_rate": 3.932218042477628e-05,
      "loss": 0.8465,
      "step": 250120
    },
    {
      "epoch": 0.8766432899795673,
      "grad_norm": 2.71875,
      "learning_rate": 3.9321531396112584e-05,
      "loss": 0.8531,
      "step": 250130
    },
    {
      "epoch": 0.876678337486463,
      "grad_norm": 3.125,
      "learning_rate": 3.932088236744888e-05,
      "loss": 0.8599,
      "step": 250140
    },
    {
      "epoch": 0.8767133849933585,
      "grad_norm": 3.5,
      "learning_rate": 3.932023333878518e-05,
      "loss": 0.8657,
      "step": 250150
    },
    {
      "epoch": 0.8767484325002541,
      "grad_norm": 2.90625,
      "learning_rate": 3.9319584310121474e-05,
      "loss": 0.8914,
      "step": 250160
    },
    {
      "epoch": 0.8767834800071497,
      "grad_norm": 2.875,
      "learning_rate": 3.9318935281457776e-05,
      "loss": 0.7923,
      "step": 250170
    },
    {
      "epoch": 0.8768185275140453,
      "grad_norm": 2.953125,
      "learning_rate": 3.931828625279407e-05,
      "loss": 0.922,
      "step": 250180
    },
    {
      "epoch": 0.8768535750209409,
      "grad_norm": 2.828125,
      "learning_rate": 3.9317637224130365e-05,
      "loss": 0.9346,
      "step": 250190
    },
    {
      "epoch": 0.8768886225278365,
      "grad_norm": 2.8125,
      "learning_rate": 3.9316988195466666e-05,
      "loss": 0.8955,
      "step": 250200
    },
    {
      "epoch": 0.876923670034732,
      "grad_norm": 2.796875,
      "learning_rate": 3.931633916680296e-05,
      "loss": 0.8354,
      "step": 250210
    },
    {
      "epoch": 0.8769587175416277,
      "grad_norm": 2.65625,
      "learning_rate": 3.931569013813926e-05,
      "loss": 0.841,
      "step": 250220
    },
    {
      "epoch": 0.8769937650485232,
      "grad_norm": 2.921875,
      "learning_rate": 3.931504110947556e-05,
      "loss": 0.8679,
      "step": 250230
    },
    {
      "epoch": 0.8770288125554189,
      "grad_norm": 2.6875,
      "learning_rate": 3.931439208081186e-05,
      "loss": 0.8452,
      "step": 250240
    },
    {
      "epoch": 0.8770638600623145,
      "grad_norm": 2.78125,
      "learning_rate": 3.931374305214815e-05,
      "loss": 0.8711,
      "step": 250250
    },
    {
      "epoch": 0.87709890756921,
      "grad_norm": 2.921875,
      "learning_rate": 3.9313094023484454e-05,
      "loss": 0.8949,
      "step": 250260
    },
    {
      "epoch": 0.8771339550761057,
      "grad_norm": 3.578125,
      "learning_rate": 3.931244499482075e-05,
      "loss": 0.8807,
      "step": 250270
    },
    {
      "epoch": 0.8771690025830012,
      "grad_norm": 2.5625,
      "learning_rate": 3.931179596615705e-05,
      "loss": 0.8948,
      "step": 250280
    },
    {
      "epoch": 0.8772040500898969,
      "grad_norm": 3.140625,
      "learning_rate": 3.9311146937493345e-05,
      "loss": 0.8457,
      "step": 250290
    },
    {
      "epoch": 0.8772390975967924,
      "grad_norm": 3.484375,
      "learning_rate": 3.9310497908829646e-05,
      "loss": 0.8956,
      "step": 250300
    },
    {
      "epoch": 0.8772741451036881,
      "grad_norm": 3.03125,
      "learning_rate": 3.930984888016595e-05,
      "loss": 0.8061,
      "step": 250310
    },
    {
      "epoch": 0.8773091926105836,
      "grad_norm": 2.9375,
      "learning_rate": 3.930919985150224e-05,
      "loss": 0.8789,
      "step": 250320
    },
    {
      "epoch": 0.8773442401174792,
      "grad_norm": 2.515625,
      "learning_rate": 3.9308550822838544e-05,
      "loss": 0.8659,
      "step": 250330
    },
    {
      "epoch": 0.8773792876243749,
      "grad_norm": 2.859375,
      "learning_rate": 3.930790179417484e-05,
      "loss": 0.9536,
      "step": 250340
    },
    {
      "epoch": 0.8774143351312704,
      "grad_norm": 2.25,
      "learning_rate": 3.930725276551114e-05,
      "loss": 0.807,
      "step": 250350
    },
    {
      "epoch": 0.8774493826381661,
      "grad_norm": 3.1875,
      "learning_rate": 3.9306603736847434e-05,
      "loss": 0.8976,
      "step": 250360
    },
    {
      "epoch": 0.8774844301450616,
      "grad_norm": 3.1875,
      "learning_rate": 3.9305954708183736e-05,
      "loss": 0.8433,
      "step": 250370
    },
    {
      "epoch": 0.8775194776519573,
      "grad_norm": 3.71875,
      "learning_rate": 3.930530567952003e-05,
      "loss": 0.9105,
      "step": 250380
    },
    {
      "epoch": 0.8775545251588528,
      "grad_norm": 3.03125,
      "learning_rate": 3.930465665085633e-05,
      "loss": 0.7761,
      "step": 250390
    },
    {
      "epoch": 0.8775895726657484,
      "grad_norm": 2.9375,
      "learning_rate": 3.9304007622192626e-05,
      "loss": 0.941,
      "step": 250400
    },
    {
      "epoch": 0.877624620172644,
      "grad_norm": 3.125,
      "learning_rate": 3.930335859352893e-05,
      "loss": 0.8837,
      "step": 250410
    },
    {
      "epoch": 0.8776596676795396,
      "grad_norm": 2.828125,
      "learning_rate": 3.930270956486522e-05,
      "loss": 0.9323,
      "step": 250420
    },
    {
      "epoch": 0.8776947151864352,
      "grad_norm": 3.015625,
      "learning_rate": 3.9302060536201524e-05,
      "loss": 0.9012,
      "step": 250430
    },
    {
      "epoch": 0.8777297626933308,
      "grad_norm": 2.890625,
      "learning_rate": 3.9301411507537825e-05,
      "loss": 0.8899,
      "step": 250440
    },
    {
      "epoch": 0.8777648102002265,
      "grad_norm": 3.171875,
      "learning_rate": 3.930076247887412e-05,
      "loss": 0.9166,
      "step": 250450
    },
    {
      "epoch": 0.877799857707122,
      "grad_norm": 3.0,
      "learning_rate": 3.930011345021042e-05,
      "loss": 0.9117,
      "step": 250460
    },
    {
      "epoch": 0.8778349052140176,
      "grad_norm": 2.921875,
      "learning_rate": 3.9299464421546716e-05,
      "loss": 0.9002,
      "step": 250470
    },
    {
      "epoch": 0.8778699527209132,
      "grad_norm": 2.890625,
      "learning_rate": 3.929881539288302e-05,
      "loss": 0.8687,
      "step": 250480
    },
    {
      "epoch": 0.8779050002278088,
      "grad_norm": 3.265625,
      "learning_rate": 3.929816636421931e-05,
      "loss": 0.8184,
      "step": 250490
    },
    {
      "epoch": 0.8779400477347044,
      "grad_norm": 2.84375,
      "learning_rate": 3.929751733555561e-05,
      "loss": 0.8471,
      "step": 250500
    },
    {
      "epoch": 0.8779750952416,
      "grad_norm": 3.3125,
      "learning_rate": 3.929686830689191e-05,
      "loss": 0.9182,
      "step": 250510
    },
    {
      "epoch": 0.8780101427484955,
      "grad_norm": 3.0,
      "learning_rate": 3.929621927822821e-05,
      "loss": 0.8278,
      "step": 250520
    },
    {
      "epoch": 0.8780451902553912,
      "grad_norm": 2.875,
      "learning_rate": 3.9295570249564504e-05,
      "loss": 0.9319,
      "step": 250530
    },
    {
      "epoch": 0.8780802377622868,
      "grad_norm": 2.734375,
      "learning_rate": 3.9294921220900805e-05,
      "loss": 0.8993,
      "step": 250540
    },
    {
      "epoch": 0.8781152852691824,
      "grad_norm": 3.140625,
      "learning_rate": 3.92942721922371e-05,
      "loss": 1.0159,
      "step": 250550
    },
    {
      "epoch": 0.878150332776078,
      "grad_norm": 3.421875,
      "learning_rate": 3.9293623163573394e-05,
      "loss": 0.8542,
      "step": 250560
    },
    {
      "epoch": 0.8781853802829735,
      "grad_norm": 2.734375,
      "learning_rate": 3.9292974134909696e-05,
      "loss": 0.8439,
      "step": 250570
    },
    {
      "epoch": 0.8782204277898692,
      "grad_norm": 2.734375,
      "learning_rate": 3.929232510624599e-05,
      "loss": 0.9294,
      "step": 250580
    },
    {
      "epoch": 0.8782554752967647,
      "grad_norm": 3.328125,
      "learning_rate": 3.929167607758229e-05,
      "loss": 0.8881,
      "step": 250590
    },
    {
      "epoch": 0.8782905228036604,
      "grad_norm": 2.59375,
      "learning_rate": 3.9291027048918586e-05,
      "loss": 0.8786,
      "step": 250600
    },
    {
      "epoch": 0.8783255703105559,
      "grad_norm": 2.796875,
      "learning_rate": 3.929037802025489e-05,
      "loss": 0.9119,
      "step": 250610
    },
    {
      "epoch": 0.8783606178174516,
      "grad_norm": 2.84375,
      "learning_rate": 3.928972899159118e-05,
      "loss": 0.8762,
      "step": 250620
    },
    {
      "epoch": 0.8783956653243472,
      "grad_norm": 2.75,
      "learning_rate": 3.9289079962927484e-05,
      "loss": 0.9176,
      "step": 250630
    },
    {
      "epoch": 0.8784307128312427,
      "grad_norm": 3.140625,
      "learning_rate": 3.928843093426378e-05,
      "loss": 0.8933,
      "step": 250640
    },
    {
      "epoch": 0.8784657603381384,
      "grad_norm": 3.0625,
      "learning_rate": 3.928778190560008e-05,
      "loss": 0.8815,
      "step": 250650
    },
    {
      "epoch": 0.8785008078450339,
      "grad_norm": 3.203125,
      "learning_rate": 3.928713287693638e-05,
      "loss": 0.8551,
      "step": 250660
    },
    {
      "epoch": 0.8785358553519296,
      "grad_norm": 3.046875,
      "learning_rate": 3.9286483848272676e-05,
      "loss": 0.8675,
      "step": 250670
    },
    {
      "epoch": 0.8785709028588251,
      "grad_norm": 3.234375,
      "learning_rate": 3.928583481960898e-05,
      "loss": 0.8086,
      "step": 250680
    },
    {
      "epoch": 0.8786059503657208,
      "grad_norm": 2.96875,
      "learning_rate": 3.928518579094527e-05,
      "loss": 0.8924,
      "step": 250690
    },
    {
      "epoch": 0.8786409978726163,
      "grad_norm": 3.0625,
      "learning_rate": 3.928453676228157e-05,
      "loss": 0.8123,
      "step": 250700
    },
    {
      "epoch": 0.8786760453795119,
      "grad_norm": 3.1875,
      "learning_rate": 3.928388773361787e-05,
      "loss": 0.877,
      "step": 250710
    },
    {
      "epoch": 0.8787110928864075,
      "grad_norm": 2.953125,
      "learning_rate": 3.928323870495417e-05,
      "loss": 0.9151,
      "step": 250720
    },
    {
      "epoch": 0.8787461403933031,
      "grad_norm": 2.953125,
      "learning_rate": 3.9282589676290464e-05,
      "loss": 0.8955,
      "step": 250730
    },
    {
      "epoch": 0.8787811879001988,
      "grad_norm": 2.84375,
      "learning_rate": 3.9281940647626765e-05,
      "loss": 0.9734,
      "step": 250740
    },
    {
      "epoch": 0.8788162354070943,
      "grad_norm": 2.640625,
      "learning_rate": 3.928129161896306e-05,
      "loss": 0.8766,
      "step": 250750
    },
    {
      "epoch": 0.87885128291399,
      "grad_norm": 2.859375,
      "learning_rate": 3.928064259029936e-05,
      "loss": 0.9468,
      "step": 250760
    },
    {
      "epoch": 0.8788863304208855,
      "grad_norm": 2.5,
      "learning_rate": 3.9279993561635656e-05,
      "loss": 0.8574,
      "step": 250770
    },
    {
      "epoch": 0.8789213779277811,
      "grad_norm": 3.203125,
      "learning_rate": 3.927934453297196e-05,
      "loss": 0.8648,
      "step": 250780
    },
    {
      "epoch": 0.8789564254346767,
      "grad_norm": 2.4375,
      "learning_rate": 3.927869550430825e-05,
      "loss": 0.8315,
      "step": 250790
    },
    {
      "epoch": 0.8789914729415723,
      "grad_norm": 2.71875,
      "learning_rate": 3.927804647564455e-05,
      "loss": 0.9527,
      "step": 250800
    },
    {
      "epoch": 0.8790265204484679,
      "grad_norm": 3.1875,
      "learning_rate": 3.9277397446980854e-05,
      "loss": 0.8698,
      "step": 250810
    },
    {
      "epoch": 0.8790615679553635,
      "grad_norm": 3.171875,
      "learning_rate": 3.927674841831715e-05,
      "loss": 0.8874,
      "step": 250820
    },
    {
      "epoch": 0.8790966154622591,
      "grad_norm": 2.90625,
      "learning_rate": 3.927609938965345e-05,
      "loss": 0.8858,
      "step": 250830
    },
    {
      "epoch": 0.8791316629691547,
      "grad_norm": 3.671875,
      "learning_rate": 3.9275450360989745e-05,
      "loss": 0.8992,
      "step": 250840
    },
    {
      "epoch": 0.8791667104760503,
      "grad_norm": 2.9375,
      "learning_rate": 3.9274801332326046e-05,
      "loss": 0.8919,
      "step": 250850
    },
    {
      "epoch": 0.8792017579829459,
      "grad_norm": 3.109375,
      "learning_rate": 3.927415230366234e-05,
      "loss": 0.9481,
      "step": 250860
    },
    {
      "epoch": 0.8792368054898415,
      "grad_norm": 3.046875,
      "learning_rate": 3.927350327499864e-05,
      "loss": 0.9448,
      "step": 250870
    },
    {
      "epoch": 0.879271852996737,
      "grad_norm": 2.9375,
      "learning_rate": 3.927285424633494e-05,
      "loss": 0.8828,
      "step": 250880
    },
    {
      "epoch": 0.8793069005036327,
      "grad_norm": 2.609375,
      "learning_rate": 3.927220521767124e-05,
      "loss": 0.8743,
      "step": 250890
    },
    {
      "epoch": 0.8793419480105282,
      "grad_norm": 3.046875,
      "learning_rate": 3.927155618900753e-05,
      "loss": 0.9019,
      "step": 250900
    },
    {
      "epoch": 0.8793769955174239,
      "grad_norm": 3.171875,
      "learning_rate": 3.9270907160343834e-05,
      "loss": 0.959,
      "step": 250910
    },
    {
      "epoch": 0.8794120430243194,
      "grad_norm": 3.015625,
      "learning_rate": 3.927025813168013e-05,
      "loss": 0.8197,
      "step": 250920
    },
    {
      "epoch": 0.8794470905312151,
      "grad_norm": 3.21875,
      "learning_rate": 3.9269609103016424e-05,
      "loss": 0.9269,
      "step": 250930
    },
    {
      "epoch": 0.8794821380381107,
      "grad_norm": 3.265625,
      "learning_rate": 3.9268960074352725e-05,
      "loss": 0.9869,
      "step": 250940
    },
    {
      "epoch": 0.8795171855450062,
      "grad_norm": 3.0,
      "learning_rate": 3.926831104568902e-05,
      "loss": 0.9164,
      "step": 250950
    },
    {
      "epoch": 0.8795522330519019,
      "grad_norm": 2.78125,
      "learning_rate": 3.926766201702532e-05,
      "loss": 0.8548,
      "step": 250960
    },
    {
      "epoch": 0.8795872805587974,
      "grad_norm": 3.1875,
      "learning_rate": 3.9267012988361616e-05,
      "loss": 0.9015,
      "step": 250970
    },
    {
      "epoch": 0.8796223280656931,
      "grad_norm": 2.84375,
      "learning_rate": 3.926636395969792e-05,
      "loss": 0.8879,
      "step": 250980
    },
    {
      "epoch": 0.8796573755725886,
      "grad_norm": 2.625,
      "learning_rate": 3.926571493103421e-05,
      "loss": 0.8918,
      "step": 250990
    },
    {
      "epoch": 0.8796924230794843,
      "grad_norm": 2.96875,
      "learning_rate": 3.926506590237051e-05,
      "loss": 0.9642,
      "step": 251000
    },
    {
      "epoch": 0.8797274705863798,
      "grad_norm": 2.78125,
      "learning_rate": 3.926441687370681e-05,
      "loss": 0.8585,
      "step": 251010
    },
    {
      "epoch": 0.8797625180932754,
      "grad_norm": 2.671875,
      "learning_rate": 3.926376784504311e-05,
      "loss": 0.893,
      "step": 251020
    },
    {
      "epoch": 0.8797975656001711,
      "grad_norm": 3.21875,
      "learning_rate": 3.926311881637941e-05,
      "loss": 1.0066,
      "step": 251030
    },
    {
      "epoch": 0.8798326131070666,
      "grad_norm": 2.65625,
      "learning_rate": 3.9262469787715705e-05,
      "loss": 0.9337,
      "step": 251040
    },
    {
      "epoch": 0.8798676606139623,
      "grad_norm": 2.75,
      "learning_rate": 3.9261820759052006e-05,
      "loss": 0.8731,
      "step": 251050
    },
    {
      "epoch": 0.8799027081208578,
      "grad_norm": 3.015625,
      "learning_rate": 3.92611717303883e-05,
      "loss": 0.9882,
      "step": 251060
    },
    {
      "epoch": 0.8799377556277534,
      "grad_norm": 3.609375,
      "learning_rate": 3.92605227017246e-05,
      "loss": 0.9242,
      "step": 251070
    },
    {
      "epoch": 0.879972803134649,
      "grad_norm": 2.765625,
      "learning_rate": 3.92598736730609e-05,
      "loss": 0.857,
      "step": 251080
    },
    {
      "epoch": 0.8800078506415446,
      "grad_norm": 2.46875,
      "learning_rate": 3.92592246443972e-05,
      "loss": 0.9069,
      "step": 251090
    },
    {
      "epoch": 0.8800428981484402,
      "grad_norm": 3.265625,
      "learning_rate": 3.925857561573349e-05,
      "loss": 0.9001,
      "step": 251100
    },
    {
      "epoch": 0.8800779456553358,
      "grad_norm": 3.078125,
      "learning_rate": 3.9257926587069794e-05,
      "loss": 0.8871,
      "step": 251110
    },
    {
      "epoch": 0.8801129931622313,
      "grad_norm": 3.015625,
      "learning_rate": 3.925727755840609e-05,
      "loss": 0.7906,
      "step": 251120
    },
    {
      "epoch": 0.880148040669127,
      "grad_norm": 2.90625,
      "learning_rate": 3.925662852974239e-05,
      "loss": 0.8276,
      "step": 251130
    },
    {
      "epoch": 0.8801830881760226,
      "grad_norm": 2.96875,
      "learning_rate": 3.9255979501078685e-05,
      "loss": 0.7819,
      "step": 251140
    },
    {
      "epoch": 0.8802181356829182,
      "grad_norm": 2.734375,
      "learning_rate": 3.9255330472414986e-05,
      "loss": 0.9353,
      "step": 251150
    },
    {
      "epoch": 0.8802531831898138,
      "grad_norm": 2.796875,
      "learning_rate": 3.925468144375128e-05,
      "loss": 0.8709,
      "step": 251160
    },
    {
      "epoch": 0.8802882306967094,
      "grad_norm": 2.53125,
      "learning_rate": 3.925403241508758e-05,
      "loss": 0.8905,
      "step": 251170
    },
    {
      "epoch": 0.880323278203605,
      "grad_norm": 2.875,
      "learning_rate": 3.9253383386423884e-05,
      "loss": 0.9708,
      "step": 251180
    },
    {
      "epoch": 0.8803583257105005,
      "grad_norm": 3.046875,
      "learning_rate": 3.925273435776018e-05,
      "loss": 1.0108,
      "step": 251190
    },
    {
      "epoch": 0.8803933732173962,
      "grad_norm": 2.84375,
      "learning_rate": 3.925208532909648e-05,
      "loss": 0.8958,
      "step": 251200
    },
    {
      "epoch": 0.8804284207242917,
      "grad_norm": 3.125,
      "learning_rate": 3.9251436300432774e-05,
      "loss": 0.8277,
      "step": 251210
    },
    {
      "epoch": 0.8804634682311874,
      "grad_norm": 3.171875,
      "learning_rate": 3.9250787271769076e-05,
      "loss": 0.9536,
      "step": 251220
    },
    {
      "epoch": 0.880498515738083,
      "grad_norm": 3.09375,
      "learning_rate": 3.925013824310537e-05,
      "loss": 0.8937,
      "step": 251230
    },
    {
      "epoch": 0.8805335632449786,
      "grad_norm": 2.78125,
      "learning_rate": 3.924948921444167e-05,
      "loss": 0.8896,
      "step": 251240
    },
    {
      "epoch": 0.8805686107518742,
      "grad_norm": 3.359375,
      "learning_rate": 3.9248840185777966e-05,
      "loss": 0.8436,
      "step": 251250
    },
    {
      "epoch": 0.8806036582587697,
      "grad_norm": 2.75,
      "learning_rate": 3.924819115711427e-05,
      "loss": 0.8711,
      "step": 251260
    },
    {
      "epoch": 0.8806387057656654,
      "grad_norm": 2.875,
      "learning_rate": 3.924754212845056e-05,
      "loss": 0.8727,
      "step": 251270
    },
    {
      "epoch": 0.8806737532725609,
      "grad_norm": 2.9375,
      "learning_rate": 3.9246893099786864e-05,
      "loss": 0.8886,
      "step": 251280
    },
    {
      "epoch": 0.8807088007794566,
      "grad_norm": 3.015625,
      "learning_rate": 3.924624407112316e-05,
      "loss": 0.8399,
      "step": 251290
    },
    {
      "epoch": 0.8807438482863521,
      "grad_norm": 2.578125,
      "learning_rate": 3.924559504245945e-05,
      "loss": 0.8935,
      "step": 251300
    },
    {
      "epoch": 0.8807788957932478,
      "grad_norm": 2.875,
      "learning_rate": 3.9244946013795754e-05,
      "loss": 0.8556,
      "step": 251310
    },
    {
      "epoch": 0.8808139433001434,
      "grad_norm": 2.65625,
      "learning_rate": 3.924429698513205e-05,
      "loss": 0.932,
      "step": 251320
    },
    {
      "epoch": 0.8808489908070389,
      "grad_norm": 2.75,
      "learning_rate": 3.924364795646835e-05,
      "loss": 0.9263,
      "step": 251330
    },
    {
      "epoch": 0.8808840383139346,
      "grad_norm": 3.1875,
      "learning_rate": 3.9242998927804645e-05,
      "loss": 0.9192,
      "step": 251340
    },
    {
      "epoch": 0.8809190858208301,
      "grad_norm": 2.828125,
      "learning_rate": 3.9242349899140946e-05,
      "loss": 0.8778,
      "step": 251350
    },
    {
      "epoch": 0.8809541333277258,
      "grad_norm": 3.046875,
      "learning_rate": 3.924170087047724e-05,
      "loss": 0.8136,
      "step": 251360
    },
    {
      "epoch": 0.8809891808346213,
      "grad_norm": 2.75,
      "learning_rate": 3.924105184181354e-05,
      "loss": 0.8644,
      "step": 251370
    },
    {
      "epoch": 0.881024228341517,
      "grad_norm": 2.6875,
      "learning_rate": 3.924040281314984e-05,
      "loss": 0.8962,
      "step": 251380
    },
    {
      "epoch": 0.8810592758484125,
      "grad_norm": 2.84375,
      "learning_rate": 3.923975378448614e-05,
      "loss": 0.9292,
      "step": 251390
    },
    {
      "epoch": 0.8810943233553081,
      "grad_norm": 3.046875,
      "learning_rate": 3.923910475582244e-05,
      "loss": 0.9393,
      "step": 251400
    },
    {
      "epoch": 0.8811293708622037,
      "grad_norm": 3.5625,
      "learning_rate": 3.9238455727158734e-05,
      "loss": 0.9753,
      "step": 251410
    },
    {
      "epoch": 0.8811644183690993,
      "grad_norm": 3.0625,
      "learning_rate": 3.9237806698495036e-05,
      "loss": 0.9214,
      "step": 251420
    },
    {
      "epoch": 0.881199465875995,
      "grad_norm": 3.1875,
      "learning_rate": 3.923715766983133e-05,
      "loss": 0.8534,
      "step": 251430
    },
    {
      "epoch": 0.8812345133828905,
      "grad_norm": 2.96875,
      "learning_rate": 3.923650864116763e-05,
      "loss": 0.9384,
      "step": 251440
    },
    {
      "epoch": 0.8812695608897861,
      "grad_norm": 2.875,
      "learning_rate": 3.9235859612503926e-05,
      "loss": 0.8289,
      "step": 251450
    },
    {
      "epoch": 0.8813046083966817,
      "grad_norm": 3.28125,
      "learning_rate": 3.923521058384023e-05,
      "loss": 0.8746,
      "step": 251460
    },
    {
      "epoch": 0.8813396559035773,
      "grad_norm": 2.953125,
      "learning_rate": 3.923456155517652e-05,
      "loss": 0.8765,
      "step": 251470
    },
    {
      "epoch": 0.8813747034104729,
      "grad_norm": 2.890625,
      "learning_rate": 3.9233912526512824e-05,
      "loss": 0.925,
      "step": 251480
    },
    {
      "epoch": 0.8814097509173685,
      "grad_norm": 3.28125,
      "learning_rate": 3.923326349784912e-05,
      "loss": 0.9157,
      "step": 251490
    },
    {
      "epoch": 0.881444798424264,
      "grad_norm": 3.015625,
      "learning_rate": 3.923261446918542e-05,
      "loss": 0.8705,
      "step": 251500
    },
    {
      "epoch": 0.8814798459311597,
      "grad_norm": 3.015625,
      "learning_rate": 3.9231965440521714e-05,
      "loss": 0.9696,
      "step": 251510
    },
    {
      "epoch": 0.8815148934380553,
      "grad_norm": 2.828125,
      "learning_rate": 3.9231316411858016e-05,
      "loss": 0.8715,
      "step": 251520
    },
    {
      "epoch": 0.8815499409449509,
      "grad_norm": 3.15625,
      "learning_rate": 3.923066738319432e-05,
      "loss": 0.8508,
      "step": 251530
    },
    {
      "epoch": 0.8815849884518465,
      "grad_norm": 2.828125,
      "learning_rate": 3.923001835453061e-05,
      "loss": 0.8876,
      "step": 251540
    },
    {
      "epoch": 0.881620035958742,
      "grad_norm": 2.78125,
      "learning_rate": 3.922936932586691e-05,
      "loss": 0.883,
      "step": 251550
    },
    {
      "epoch": 0.8816550834656377,
      "grad_norm": 2.515625,
      "learning_rate": 3.922872029720321e-05,
      "loss": 0.8896,
      "step": 251560
    },
    {
      "epoch": 0.8816901309725332,
      "grad_norm": 3.125,
      "learning_rate": 3.922807126853951e-05,
      "loss": 0.8863,
      "step": 251570
    },
    {
      "epoch": 0.8817251784794289,
      "grad_norm": 3.3125,
      "learning_rate": 3.9227422239875804e-05,
      "loss": 0.9099,
      "step": 251580
    },
    {
      "epoch": 0.8817602259863244,
      "grad_norm": 3.21875,
      "learning_rate": 3.9226773211212105e-05,
      "loss": 0.9272,
      "step": 251590
    },
    {
      "epoch": 0.8817952734932201,
      "grad_norm": 3.125,
      "learning_rate": 3.92261241825484e-05,
      "loss": 0.9509,
      "step": 251600
    },
    {
      "epoch": 0.8818303210001156,
      "grad_norm": 2.859375,
      "learning_rate": 3.92254751538847e-05,
      "loss": 0.9636,
      "step": 251610
    },
    {
      "epoch": 0.8818653685070112,
      "grad_norm": 2.921875,
      "learning_rate": 3.9224826125220996e-05,
      "loss": 0.8707,
      "step": 251620
    },
    {
      "epoch": 0.8819004160139069,
      "grad_norm": 2.875,
      "learning_rate": 3.92241770965573e-05,
      "loss": 0.917,
      "step": 251630
    },
    {
      "epoch": 0.8819354635208024,
      "grad_norm": 2.921875,
      "learning_rate": 3.922352806789359e-05,
      "loss": 0.9042,
      "step": 251640
    },
    {
      "epoch": 0.8819705110276981,
      "grad_norm": 2.78125,
      "learning_rate": 3.922287903922989e-05,
      "loss": 0.8231,
      "step": 251650
    },
    {
      "epoch": 0.8820055585345936,
      "grad_norm": 2.875,
      "learning_rate": 3.922223001056619e-05,
      "loss": 0.8567,
      "step": 251660
    },
    {
      "epoch": 0.8820406060414893,
      "grad_norm": 2.640625,
      "learning_rate": 3.922158098190249e-05,
      "loss": 0.8675,
      "step": 251670
    },
    {
      "epoch": 0.8820756535483848,
      "grad_norm": 3.359375,
      "learning_rate": 3.9220931953238784e-05,
      "loss": 0.8704,
      "step": 251680
    },
    {
      "epoch": 0.8821107010552804,
      "grad_norm": 3.25,
      "learning_rate": 3.922028292457508e-05,
      "loss": 0.887,
      "step": 251690
    },
    {
      "epoch": 0.882145748562176,
      "grad_norm": 2.765625,
      "learning_rate": 3.921963389591138e-05,
      "loss": 0.8594,
      "step": 251700
    },
    {
      "epoch": 0.8821807960690716,
      "grad_norm": 3.03125,
      "learning_rate": 3.9218984867247674e-05,
      "loss": 0.923,
      "step": 251710
    },
    {
      "epoch": 0.8822158435759673,
      "grad_norm": 3.109375,
      "learning_rate": 3.9218335838583976e-05,
      "loss": 0.9131,
      "step": 251720
    },
    {
      "epoch": 0.8822508910828628,
      "grad_norm": 2.921875,
      "learning_rate": 3.921768680992027e-05,
      "loss": 0.9098,
      "step": 251730
    },
    {
      "epoch": 0.8822859385897585,
      "grad_norm": 2.859375,
      "learning_rate": 3.921703778125657e-05,
      "loss": 0.921,
      "step": 251740
    },
    {
      "epoch": 0.882320986096654,
      "grad_norm": 2.765625,
      "learning_rate": 3.9216388752592866e-05,
      "loss": 0.8606,
      "step": 251750
    },
    {
      "epoch": 0.8823560336035496,
      "grad_norm": 3.0625,
      "learning_rate": 3.921573972392917e-05,
      "loss": 0.922,
      "step": 251760
    },
    {
      "epoch": 0.8823910811104452,
      "grad_norm": 2.8125,
      "learning_rate": 3.921509069526547e-05,
      "loss": 0.7844,
      "step": 251770
    },
    {
      "epoch": 0.8824261286173408,
      "grad_norm": 2.890625,
      "learning_rate": 3.9214441666601764e-05,
      "loss": 0.9584,
      "step": 251780
    },
    {
      "epoch": 0.8824611761242364,
      "grad_norm": 2.765625,
      "learning_rate": 3.9213792637938065e-05,
      "loss": 0.9008,
      "step": 251790
    },
    {
      "epoch": 0.882496223631132,
      "grad_norm": 3.09375,
      "learning_rate": 3.921314360927436e-05,
      "loss": 0.9018,
      "step": 251800
    },
    {
      "epoch": 0.8825312711380277,
      "grad_norm": 2.921875,
      "learning_rate": 3.921249458061066e-05,
      "loss": 0.8854,
      "step": 251810
    },
    {
      "epoch": 0.8825663186449232,
      "grad_norm": 3.171875,
      "learning_rate": 3.9211845551946956e-05,
      "loss": 0.9789,
      "step": 251820
    },
    {
      "epoch": 0.8826013661518188,
      "grad_norm": 3.140625,
      "learning_rate": 3.921119652328326e-05,
      "loss": 0.9204,
      "step": 251830
    },
    {
      "epoch": 0.8826364136587144,
      "grad_norm": 2.9375,
      "learning_rate": 3.921054749461955e-05,
      "loss": 0.83,
      "step": 251840
    },
    {
      "epoch": 0.88267146116561,
      "grad_norm": 3.265625,
      "learning_rate": 3.920989846595585e-05,
      "loss": 0.9364,
      "step": 251850
    },
    {
      "epoch": 0.8827065086725056,
      "grad_norm": 3.0,
      "learning_rate": 3.920924943729215e-05,
      "loss": 0.8672,
      "step": 251860
    },
    {
      "epoch": 0.8827415561794012,
      "grad_norm": 3.078125,
      "learning_rate": 3.920860040862845e-05,
      "loss": 0.9044,
      "step": 251870
    },
    {
      "epoch": 0.8827766036862967,
      "grad_norm": 2.953125,
      "learning_rate": 3.9207951379964744e-05,
      "loss": 0.9369,
      "step": 251880
    },
    {
      "epoch": 0.8828116511931924,
      "grad_norm": 3.28125,
      "learning_rate": 3.9207302351301045e-05,
      "loss": 0.8702,
      "step": 251890
    },
    {
      "epoch": 0.8828466987000879,
      "grad_norm": 2.515625,
      "learning_rate": 3.920665332263735e-05,
      "loss": 0.8638,
      "step": 251900
    },
    {
      "epoch": 0.8828817462069836,
      "grad_norm": 2.90625,
      "learning_rate": 3.920600429397364e-05,
      "loss": 0.8886,
      "step": 251910
    },
    {
      "epoch": 0.8829167937138792,
      "grad_norm": 2.53125,
      "learning_rate": 3.920535526530994e-05,
      "loss": 0.8421,
      "step": 251920
    },
    {
      "epoch": 0.8829518412207747,
      "grad_norm": 2.75,
      "learning_rate": 3.920470623664624e-05,
      "loss": 0.8963,
      "step": 251930
    },
    {
      "epoch": 0.8829868887276704,
      "grad_norm": 3.171875,
      "learning_rate": 3.920405720798254e-05,
      "loss": 0.8069,
      "step": 251940
    },
    {
      "epoch": 0.8830219362345659,
      "grad_norm": 2.84375,
      "learning_rate": 3.920340817931883e-05,
      "loss": 0.8584,
      "step": 251950
    },
    {
      "epoch": 0.8830569837414616,
      "grad_norm": 3.09375,
      "learning_rate": 3.9202759150655135e-05,
      "loss": 0.9275,
      "step": 251960
    },
    {
      "epoch": 0.8830920312483571,
      "grad_norm": 2.609375,
      "learning_rate": 3.920211012199143e-05,
      "loss": 0.9069,
      "step": 251970
    },
    {
      "epoch": 0.8831270787552528,
      "grad_norm": 3.203125,
      "learning_rate": 3.920146109332773e-05,
      "loss": 0.8927,
      "step": 251980
    },
    {
      "epoch": 0.8831621262621483,
      "grad_norm": 2.671875,
      "learning_rate": 3.9200812064664025e-05,
      "loss": 0.8993,
      "step": 251990
    },
    {
      "epoch": 0.8831971737690439,
      "grad_norm": 2.921875,
      "learning_rate": 3.920016303600033e-05,
      "loss": 0.949,
      "step": 252000
    },
    {
      "epoch": 0.8832322212759396,
      "grad_norm": 2.96875,
      "learning_rate": 3.919951400733662e-05,
      "loss": 0.856,
      "step": 252010
    },
    {
      "epoch": 0.8832672687828351,
      "grad_norm": 2.765625,
      "learning_rate": 3.919886497867292e-05,
      "loss": 0.9149,
      "step": 252020
    },
    {
      "epoch": 0.8833023162897308,
      "grad_norm": 2.875,
      "learning_rate": 3.919821595000922e-05,
      "loss": 0.9591,
      "step": 252030
    },
    {
      "epoch": 0.8833373637966263,
      "grad_norm": 2.046875,
      "learning_rate": 3.919756692134552e-05,
      "loss": 0.8392,
      "step": 252040
    },
    {
      "epoch": 0.883372411303522,
      "grad_norm": 2.609375,
      "learning_rate": 3.919691789268182e-05,
      "loss": 0.8877,
      "step": 252050
    },
    {
      "epoch": 0.8834074588104175,
      "grad_norm": 3.640625,
      "learning_rate": 3.919626886401811e-05,
      "loss": 0.9438,
      "step": 252060
    },
    {
      "epoch": 0.8834425063173131,
      "grad_norm": 2.5625,
      "learning_rate": 3.919561983535441e-05,
      "loss": 0.823,
      "step": 252070
    },
    {
      "epoch": 0.8834775538242087,
      "grad_norm": 2.96875,
      "learning_rate": 3.9194970806690704e-05,
      "loss": 0.8549,
      "step": 252080
    },
    {
      "epoch": 0.8835126013311043,
      "grad_norm": 2.953125,
      "learning_rate": 3.9194321778027005e-05,
      "loss": 0.9056,
      "step": 252090
    },
    {
      "epoch": 0.8835476488379999,
      "grad_norm": 2.96875,
      "learning_rate": 3.91936727493633e-05,
      "loss": 0.9413,
      "step": 252100
    },
    {
      "epoch": 0.8835826963448955,
      "grad_norm": 2.734375,
      "learning_rate": 3.91930237206996e-05,
      "loss": 0.8549,
      "step": 252110
    },
    {
      "epoch": 0.8836177438517911,
      "grad_norm": 3.203125,
      "learning_rate": 3.9192374692035896e-05,
      "loss": 0.884,
      "step": 252120
    },
    {
      "epoch": 0.8836527913586867,
      "grad_norm": 2.640625,
      "learning_rate": 3.91917256633722e-05,
      "loss": 0.8575,
      "step": 252130
    },
    {
      "epoch": 0.8836878388655823,
      "grad_norm": 3.234375,
      "learning_rate": 3.91910766347085e-05,
      "loss": 0.8605,
      "step": 252140
    },
    {
      "epoch": 0.8837228863724779,
      "grad_norm": 3.265625,
      "learning_rate": 3.919042760604479e-05,
      "loss": 0.8614,
      "step": 252150
    },
    {
      "epoch": 0.8837579338793735,
      "grad_norm": 3.375,
      "learning_rate": 3.9189778577381095e-05,
      "loss": 0.7628,
      "step": 252160
    },
    {
      "epoch": 0.883792981386269,
      "grad_norm": 2.96875,
      "learning_rate": 3.918912954871739e-05,
      "loss": 0.9221,
      "step": 252170
    },
    {
      "epoch": 0.8838280288931647,
      "grad_norm": 2.71875,
      "learning_rate": 3.918848052005369e-05,
      "loss": 0.8727,
      "step": 252180
    },
    {
      "epoch": 0.8838630764000602,
      "grad_norm": 2.328125,
      "learning_rate": 3.9187831491389985e-05,
      "loss": 0.8742,
      "step": 252190
    },
    {
      "epoch": 0.8838981239069559,
      "grad_norm": 3.109375,
      "learning_rate": 3.918718246272629e-05,
      "loss": 0.9095,
      "step": 252200
    },
    {
      "epoch": 0.8839331714138515,
      "grad_norm": 2.6875,
      "learning_rate": 3.918653343406258e-05,
      "loss": 0.9439,
      "step": 252210
    },
    {
      "epoch": 0.8839682189207471,
      "grad_norm": 2.890625,
      "learning_rate": 3.918588440539888e-05,
      "loss": 0.874,
      "step": 252220
    },
    {
      "epoch": 0.8840032664276427,
      "grad_norm": 2.78125,
      "learning_rate": 3.918523537673518e-05,
      "loss": 0.8793,
      "step": 252230
    },
    {
      "epoch": 0.8840383139345382,
      "grad_norm": 3.03125,
      "learning_rate": 3.918458634807148e-05,
      "loss": 0.8438,
      "step": 252240
    },
    {
      "epoch": 0.8840733614414339,
      "grad_norm": 3.09375,
      "learning_rate": 3.918393731940777e-05,
      "loss": 0.7857,
      "step": 252250
    },
    {
      "epoch": 0.8841084089483294,
      "grad_norm": 2.671875,
      "learning_rate": 3.9183288290744075e-05,
      "loss": 0.9521,
      "step": 252260
    },
    {
      "epoch": 0.8841434564552251,
      "grad_norm": 2.890625,
      "learning_rate": 3.9182639262080376e-05,
      "loss": 0.8618,
      "step": 252270
    },
    {
      "epoch": 0.8841785039621206,
      "grad_norm": 2.484375,
      "learning_rate": 3.918199023341667e-05,
      "loss": 0.896,
      "step": 252280
    },
    {
      "epoch": 0.8842135514690163,
      "grad_norm": 2.65625,
      "learning_rate": 3.918134120475297e-05,
      "loss": 0.8896,
      "step": 252290
    },
    {
      "epoch": 0.8842485989759119,
      "grad_norm": 2.578125,
      "learning_rate": 3.918069217608927e-05,
      "loss": 0.8876,
      "step": 252300
    },
    {
      "epoch": 0.8842836464828074,
      "grad_norm": 2.96875,
      "learning_rate": 3.918004314742557e-05,
      "loss": 0.9563,
      "step": 252310
    },
    {
      "epoch": 0.8843186939897031,
      "grad_norm": 3.078125,
      "learning_rate": 3.917939411876186e-05,
      "loss": 0.8717,
      "step": 252320
    },
    {
      "epoch": 0.8843537414965986,
      "grad_norm": 2.75,
      "learning_rate": 3.9178745090098164e-05,
      "loss": 0.8417,
      "step": 252330
    },
    {
      "epoch": 0.8843887890034943,
      "grad_norm": 2.828125,
      "learning_rate": 3.917809606143446e-05,
      "loss": 0.859,
      "step": 252340
    },
    {
      "epoch": 0.8844238365103898,
      "grad_norm": 3.03125,
      "learning_rate": 3.917744703277076e-05,
      "loss": 0.8357,
      "step": 252350
    },
    {
      "epoch": 0.8844588840172855,
      "grad_norm": 2.53125,
      "learning_rate": 3.9176798004107055e-05,
      "loss": 0.9096,
      "step": 252360
    },
    {
      "epoch": 0.884493931524181,
      "grad_norm": 3.109375,
      "learning_rate": 3.9176148975443356e-05,
      "loss": 0.8726,
      "step": 252370
    },
    {
      "epoch": 0.8845289790310766,
      "grad_norm": 3.671875,
      "learning_rate": 3.917549994677965e-05,
      "loss": 0.8581,
      "step": 252380
    },
    {
      "epoch": 0.8845640265379722,
      "grad_norm": 3.234375,
      "learning_rate": 3.917485091811595e-05,
      "loss": 0.8997,
      "step": 252390
    },
    {
      "epoch": 0.8845990740448678,
      "grad_norm": 3.0625,
      "learning_rate": 3.917420188945225e-05,
      "loss": 0.983,
      "step": 252400
    },
    {
      "epoch": 0.8846341215517635,
      "grad_norm": 3.5,
      "learning_rate": 3.917355286078855e-05,
      "loss": 0.8415,
      "step": 252410
    },
    {
      "epoch": 0.884669169058659,
      "grad_norm": 2.828125,
      "learning_rate": 3.917290383212485e-05,
      "loss": 0.8512,
      "step": 252420
    },
    {
      "epoch": 0.8847042165655546,
      "grad_norm": 2.734375,
      "learning_rate": 3.917225480346114e-05,
      "loss": 0.9155,
      "step": 252430
    },
    {
      "epoch": 0.8847392640724502,
      "grad_norm": 3.078125,
      "learning_rate": 3.917160577479744e-05,
      "loss": 0.8736,
      "step": 252440
    },
    {
      "epoch": 0.8847743115793458,
      "grad_norm": 7.46875,
      "learning_rate": 3.917095674613373e-05,
      "loss": 0.9093,
      "step": 252450
    },
    {
      "epoch": 0.8848093590862414,
      "grad_norm": 2.96875,
      "learning_rate": 3.9170307717470035e-05,
      "loss": 0.8096,
      "step": 252460
    },
    {
      "epoch": 0.884844406593137,
      "grad_norm": 3.03125,
      "learning_rate": 3.916965868880633e-05,
      "loss": 0.9413,
      "step": 252470
    },
    {
      "epoch": 0.8848794541000325,
      "grad_norm": 2.40625,
      "learning_rate": 3.916900966014263e-05,
      "loss": 0.8383,
      "step": 252480
    },
    {
      "epoch": 0.8849145016069282,
      "grad_norm": 2.484375,
      "learning_rate": 3.916836063147893e-05,
      "loss": 0.8469,
      "step": 252490
    },
    {
      "epoch": 0.8849495491138238,
      "grad_norm": 2.765625,
      "learning_rate": 3.916771160281523e-05,
      "loss": 0.9284,
      "step": 252500
    },
    {
      "epoch": 0.8849845966207194,
      "grad_norm": 2.84375,
      "learning_rate": 3.916706257415153e-05,
      "loss": 0.8731,
      "step": 252510
    },
    {
      "epoch": 0.885019644127615,
      "grad_norm": 2.71875,
      "learning_rate": 3.916641354548782e-05,
      "loss": 0.8665,
      "step": 252520
    },
    {
      "epoch": 0.8850546916345106,
      "grad_norm": 2.546875,
      "learning_rate": 3.9165764516824124e-05,
      "loss": 0.8879,
      "step": 252530
    },
    {
      "epoch": 0.8850897391414062,
      "grad_norm": 2.75,
      "learning_rate": 3.916511548816042e-05,
      "loss": 0.8883,
      "step": 252540
    },
    {
      "epoch": 0.8851247866483017,
      "grad_norm": 2.859375,
      "learning_rate": 3.916446645949672e-05,
      "loss": 0.8316,
      "step": 252550
    },
    {
      "epoch": 0.8851598341551974,
      "grad_norm": 3.109375,
      "learning_rate": 3.9163817430833015e-05,
      "loss": 0.8533,
      "step": 252560
    },
    {
      "epoch": 0.8851948816620929,
      "grad_norm": 3.09375,
      "learning_rate": 3.9163168402169316e-05,
      "loss": 0.8734,
      "step": 252570
    },
    {
      "epoch": 0.8852299291689886,
      "grad_norm": 2.796875,
      "learning_rate": 3.916251937350561e-05,
      "loss": 0.9721,
      "step": 252580
    },
    {
      "epoch": 0.8852649766758841,
      "grad_norm": 2.65625,
      "learning_rate": 3.916187034484191e-05,
      "loss": 0.8472,
      "step": 252590
    },
    {
      "epoch": 0.8853000241827798,
      "grad_norm": 2.875,
      "learning_rate": 3.916122131617821e-05,
      "loss": 0.8393,
      "step": 252600
    },
    {
      "epoch": 0.8853350716896754,
      "grad_norm": 2.71875,
      "learning_rate": 3.916057228751451e-05,
      "loss": 0.9079,
      "step": 252610
    },
    {
      "epoch": 0.8853701191965709,
      "grad_norm": 3.25,
      "learning_rate": 3.91599232588508e-05,
      "loss": 0.8574,
      "step": 252620
    },
    {
      "epoch": 0.8854051667034666,
      "grad_norm": 2.828125,
      "learning_rate": 3.9159274230187104e-05,
      "loss": 0.8004,
      "step": 252630
    },
    {
      "epoch": 0.8854402142103621,
      "grad_norm": 3.140625,
      "learning_rate": 3.9158625201523405e-05,
      "loss": 0.9022,
      "step": 252640
    },
    {
      "epoch": 0.8854752617172578,
      "grad_norm": 3.078125,
      "learning_rate": 3.91579761728597e-05,
      "loss": 0.9221,
      "step": 252650
    },
    {
      "epoch": 0.8855103092241533,
      "grad_norm": 2.484375,
      "learning_rate": 3.9157327144196e-05,
      "loss": 0.8657,
      "step": 252660
    },
    {
      "epoch": 0.885545356731049,
      "grad_norm": 3.25,
      "learning_rate": 3.9156678115532296e-05,
      "loss": 0.9748,
      "step": 252670
    },
    {
      "epoch": 0.8855804042379445,
      "grad_norm": 3.09375,
      "learning_rate": 3.91560290868686e-05,
      "loss": 0.9759,
      "step": 252680
    },
    {
      "epoch": 0.8856154517448401,
      "grad_norm": 2.359375,
      "learning_rate": 3.915538005820489e-05,
      "loss": 0.889,
      "step": 252690
    },
    {
      "epoch": 0.8856504992517358,
      "grad_norm": 2.90625,
      "learning_rate": 3.9154731029541193e-05,
      "loss": 0.8891,
      "step": 252700
    },
    {
      "epoch": 0.8856855467586313,
      "grad_norm": 2.984375,
      "learning_rate": 3.915408200087749e-05,
      "loss": 0.9858,
      "step": 252710
    },
    {
      "epoch": 0.885720594265527,
      "grad_norm": 3.015625,
      "learning_rate": 3.915343297221379e-05,
      "loss": 0.9166,
      "step": 252720
    },
    {
      "epoch": 0.8857556417724225,
      "grad_norm": 2.71875,
      "learning_rate": 3.9152783943550084e-05,
      "loss": 1.0079,
      "step": 252730
    },
    {
      "epoch": 0.8857906892793181,
      "grad_norm": 3.3125,
      "learning_rate": 3.9152134914886385e-05,
      "loss": 0.9134,
      "step": 252740
    },
    {
      "epoch": 0.8858257367862137,
      "grad_norm": 2.84375,
      "learning_rate": 3.915148588622268e-05,
      "loss": 0.8407,
      "step": 252750
    },
    {
      "epoch": 0.8858607842931093,
      "grad_norm": 3.171875,
      "learning_rate": 3.915083685755898e-05,
      "loss": 0.9119,
      "step": 252760
    },
    {
      "epoch": 0.8858958318000049,
      "grad_norm": 3.0625,
      "learning_rate": 3.915018782889528e-05,
      "loss": 0.8653,
      "step": 252770
    },
    {
      "epoch": 0.8859308793069005,
      "grad_norm": 2.46875,
      "learning_rate": 3.914953880023158e-05,
      "loss": 0.9066,
      "step": 252780
    },
    {
      "epoch": 0.885965926813796,
      "grad_norm": 2.765625,
      "learning_rate": 3.914888977156788e-05,
      "loss": 0.8397,
      "step": 252790
    },
    {
      "epoch": 0.8860009743206917,
      "grad_norm": 2.234375,
      "learning_rate": 3.9148240742904173e-05,
      "loss": 0.8655,
      "step": 252800
    },
    {
      "epoch": 0.8860360218275873,
      "grad_norm": 2.90625,
      "learning_rate": 3.914759171424047e-05,
      "loss": 0.8273,
      "step": 252810
    },
    {
      "epoch": 0.8860710693344829,
      "grad_norm": 2.9375,
      "learning_rate": 3.914694268557676e-05,
      "loss": 0.8056,
      "step": 252820
    },
    {
      "epoch": 0.8861061168413785,
      "grad_norm": 2.875,
      "learning_rate": 3.9146293656913064e-05,
      "loss": 0.8937,
      "step": 252830
    },
    {
      "epoch": 0.8861411643482741,
      "grad_norm": 3.03125,
      "learning_rate": 3.914564462824936e-05,
      "loss": 0.7867,
      "step": 252840
    },
    {
      "epoch": 0.8861762118551697,
      "grad_norm": 3.234375,
      "learning_rate": 3.914499559958566e-05,
      "loss": 0.9034,
      "step": 252850
    },
    {
      "epoch": 0.8862112593620652,
      "grad_norm": 3.40625,
      "learning_rate": 3.914434657092196e-05,
      "loss": 0.8298,
      "step": 252860
    },
    {
      "epoch": 0.8862463068689609,
      "grad_norm": 2.4375,
      "learning_rate": 3.9143697542258256e-05,
      "loss": 0.9094,
      "step": 252870
    },
    {
      "epoch": 0.8862813543758564,
      "grad_norm": 2.859375,
      "learning_rate": 3.914304851359456e-05,
      "loss": 0.7738,
      "step": 252880
    },
    {
      "epoch": 0.8863164018827521,
      "grad_norm": 2.328125,
      "learning_rate": 3.914239948493085e-05,
      "loss": 0.7881,
      "step": 252890
    },
    {
      "epoch": 0.8863514493896477,
      "grad_norm": 2.921875,
      "learning_rate": 3.9141750456267153e-05,
      "loss": 0.8991,
      "step": 252900
    },
    {
      "epoch": 0.8863864968965433,
      "grad_norm": 2.4375,
      "learning_rate": 3.914110142760345e-05,
      "loss": 0.7494,
      "step": 252910
    },
    {
      "epoch": 0.8864215444034389,
      "grad_norm": 2.859375,
      "learning_rate": 3.914045239893975e-05,
      "loss": 0.8389,
      "step": 252920
    },
    {
      "epoch": 0.8864565919103344,
      "grad_norm": 3.15625,
      "learning_rate": 3.9139803370276044e-05,
      "loss": 0.8138,
      "step": 252930
    },
    {
      "epoch": 0.8864916394172301,
      "grad_norm": 3.15625,
      "learning_rate": 3.9139154341612345e-05,
      "loss": 0.8992,
      "step": 252940
    },
    {
      "epoch": 0.8865266869241256,
      "grad_norm": 3.0625,
      "learning_rate": 3.913850531294864e-05,
      "loss": 0.9036,
      "step": 252950
    },
    {
      "epoch": 0.8865617344310213,
      "grad_norm": 2.875,
      "learning_rate": 3.913785628428494e-05,
      "loss": 0.8643,
      "step": 252960
    },
    {
      "epoch": 0.8865967819379168,
      "grad_norm": 6.0625,
      "learning_rate": 3.9137207255621236e-05,
      "loss": 0.9019,
      "step": 252970
    },
    {
      "epoch": 0.8866318294448124,
      "grad_norm": 2.921875,
      "learning_rate": 3.913655822695754e-05,
      "loss": 0.9227,
      "step": 252980
    },
    {
      "epoch": 0.8866668769517081,
      "grad_norm": 3.203125,
      "learning_rate": 3.913590919829383e-05,
      "loss": 0.9071,
      "step": 252990
    },
    {
      "epoch": 0.8867019244586036,
      "grad_norm": 2.859375,
      "learning_rate": 3.9135260169630133e-05,
      "loss": 0.8839,
      "step": 253000
    },
    {
      "epoch": 0.8867369719654993,
      "grad_norm": 2.984375,
      "learning_rate": 3.9134611140966435e-05,
      "loss": 0.8755,
      "step": 253010
    },
    {
      "epoch": 0.8867720194723948,
      "grad_norm": 2.78125,
      "learning_rate": 3.913396211230273e-05,
      "loss": 0.808,
      "step": 253020
    },
    {
      "epoch": 0.8868070669792905,
      "grad_norm": 2.640625,
      "learning_rate": 3.913331308363903e-05,
      "loss": 0.9216,
      "step": 253030
    },
    {
      "epoch": 0.886842114486186,
      "grad_norm": 2.890625,
      "learning_rate": 3.9132664054975325e-05,
      "loss": 0.9239,
      "step": 253040
    },
    {
      "epoch": 0.8868771619930816,
      "grad_norm": 2.6875,
      "learning_rate": 3.913201502631163e-05,
      "loss": 0.8576,
      "step": 253050
    },
    {
      "epoch": 0.8869122094999772,
      "grad_norm": 3.21875,
      "learning_rate": 3.913136599764792e-05,
      "loss": 0.9312,
      "step": 253060
    },
    {
      "epoch": 0.8869472570068728,
      "grad_norm": 2.5625,
      "learning_rate": 3.913071696898422e-05,
      "loss": 0.9139,
      "step": 253070
    },
    {
      "epoch": 0.8869823045137684,
      "grad_norm": 3.03125,
      "learning_rate": 3.913006794032052e-05,
      "loss": 0.8731,
      "step": 253080
    },
    {
      "epoch": 0.887017352020664,
      "grad_norm": 2.921875,
      "learning_rate": 3.912941891165682e-05,
      "loss": 0.9202,
      "step": 253090
    },
    {
      "epoch": 0.8870523995275597,
      "grad_norm": 3.375,
      "learning_rate": 3.9128769882993113e-05,
      "loss": 0.9032,
      "step": 253100
    },
    {
      "epoch": 0.8870874470344552,
      "grad_norm": 2.984375,
      "learning_rate": 3.9128120854329415e-05,
      "loss": 0.8732,
      "step": 253110
    },
    {
      "epoch": 0.8871224945413508,
      "grad_norm": 3.3125,
      "learning_rate": 3.912747182566571e-05,
      "loss": 0.9267,
      "step": 253120
    },
    {
      "epoch": 0.8871575420482464,
      "grad_norm": 3.0625,
      "learning_rate": 3.912682279700201e-05,
      "loss": 0.7909,
      "step": 253130
    },
    {
      "epoch": 0.887192589555142,
      "grad_norm": 2.875,
      "learning_rate": 3.912617376833831e-05,
      "loss": 0.8191,
      "step": 253140
    },
    {
      "epoch": 0.8872276370620376,
      "grad_norm": 2.671875,
      "learning_rate": 3.912552473967461e-05,
      "loss": 0.8552,
      "step": 253150
    },
    {
      "epoch": 0.8872626845689332,
      "grad_norm": 3.203125,
      "learning_rate": 3.912487571101091e-05,
      "loss": 0.8659,
      "step": 253160
    },
    {
      "epoch": 0.8872977320758287,
      "grad_norm": 2.828125,
      "learning_rate": 3.91242266823472e-05,
      "loss": 0.9824,
      "step": 253170
    },
    {
      "epoch": 0.8873327795827244,
      "grad_norm": 3.328125,
      "learning_rate": 3.9123577653683504e-05,
      "loss": 0.8878,
      "step": 253180
    },
    {
      "epoch": 0.88736782708962,
      "grad_norm": 2.78125,
      "learning_rate": 3.912292862501979e-05,
      "loss": 0.7701,
      "step": 253190
    },
    {
      "epoch": 0.8874028745965156,
      "grad_norm": 3.09375,
      "learning_rate": 3.9122279596356093e-05,
      "loss": 0.8935,
      "step": 253200
    },
    {
      "epoch": 0.8874379221034112,
      "grad_norm": 3.0625,
      "learning_rate": 3.912163056769239e-05,
      "loss": 0.8681,
      "step": 253210
    },
    {
      "epoch": 0.8874729696103067,
      "grad_norm": 2.71875,
      "learning_rate": 3.912098153902869e-05,
      "loss": 0.8577,
      "step": 253220
    },
    {
      "epoch": 0.8875080171172024,
      "grad_norm": 2.890625,
      "learning_rate": 3.912033251036499e-05,
      "loss": 0.8457,
      "step": 253230
    },
    {
      "epoch": 0.8875430646240979,
      "grad_norm": 3.453125,
      "learning_rate": 3.9119683481701285e-05,
      "loss": 0.8177,
      "step": 253240
    },
    {
      "epoch": 0.8875781121309936,
      "grad_norm": 2.78125,
      "learning_rate": 3.911903445303759e-05,
      "loss": 0.8237,
      "step": 253250
    },
    {
      "epoch": 0.8876131596378891,
      "grad_norm": 2.90625,
      "learning_rate": 3.911838542437388e-05,
      "loss": 0.8621,
      "step": 253260
    },
    {
      "epoch": 0.8876482071447848,
      "grad_norm": 3.21875,
      "learning_rate": 3.911773639571018e-05,
      "loss": 0.917,
      "step": 253270
    },
    {
      "epoch": 0.8876832546516803,
      "grad_norm": 2.84375,
      "learning_rate": 3.911708736704648e-05,
      "loss": 0.926,
      "step": 253280
    },
    {
      "epoch": 0.887718302158576,
      "grad_norm": 2.953125,
      "learning_rate": 3.911643833838278e-05,
      "loss": 0.9121,
      "step": 253290
    },
    {
      "epoch": 0.8877533496654716,
      "grad_norm": 2.765625,
      "learning_rate": 3.9115789309719073e-05,
      "loss": 0.8527,
      "step": 253300
    },
    {
      "epoch": 0.8877883971723671,
      "grad_norm": 2.875,
      "learning_rate": 3.9115140281055375e-05,
      "loss": 0.7997,
      "step": 253310
    },
    {
      "epoch": 0.8878234446792628,
      "grad_norm": 2.953125,
      "learning_rate": 3.911449125239167e-05,
      "loss": 0.8598,
      "step": 253320
    },
    {
      "epoch": 0.8878584921861583,
      "grad_norm": 2.75,
      "learning_rate": 3.911384222372797e-05,
      "loss": 0.8331,
      "step": 253330
    },
    {
      "epoch": 0.887893539693054,
      "grad_norm": 2.609375,
      "learning_rate": 3.9113193195064265e-05,
      "loss": 0.9115,
      "step": 253340
    },
    {
      "epoch": 0.8879285871999495,
      "grad_norm": 2.828125,
      "learning_rate": 3.911254416640057e-05,
      "loss": 0.9351,
      "step": 253350
    },
    {
      "epoch": 0.8879636347068451,
      "grad_norm": 2.828125,
      "learning_rate": 3.911189513773686e-05,
      "loss": 0.9253,
      "step": 253360
    },
    {
      "epoch": 0.8879986822137407,
      "grad_norm": 2.296875,
      "learning_rate": 3.911124610907316e-05,
      "loss": 0.8175,
      "step": 253370
    },
    {
      "epoch": 0.8880337297206363,
      "grad_norm": 2.828125,
      "learning_rate": 3.9110597080409464e-05,
      "loss": 0.8612,
      "step": 253380
    },
    {
      "epoch": 0.888068777227532,
      "grad_norm": 3.203125,
      "learning_rate": 3.910994805174576e-05,
      "loss": 0.8842,
      "step": 253390
    },
    {
      "epoch": 0.8881038247344275,
      "grad_norm": 3.03125,
      "learning_rate": 3.910929902308206e-05,
      "loss": 0.8895,
      "step": 253400
    },
    {
      "epoch": 0.8881388722413232,
      "grad_norm": 2.734375,
      "learning_rate": 3.9108649994418355e-05,
      "loss": 0.9034,
      "step": 253410
    },
    {
      "epoch": 0.8881739197482187,
      "grad_norm": 2.890625,
      "learning_rate": 3.9108000965754656e-05,
      "loss": 0.9567,
      "step": 253420
    },
    {
      "epoch": 0.8882089672551143,
      "grad_norm": 2.875,
      "learning_rate": 3.910735193709095e-05,
      "loss": 0.9375,
      "step": 253430
    },
    {
      "epoch": 0.8882440147620099,
      "grad_norm": 3.109375,
      "learning_rate": 3.910670290842725e-05,
      "loss": 0.8905,
      "step": 253440
    },
    {
      "epoch": 0.8882790622689055,
      "grad_norm": 3.421875,
      "learning_rate": 3.910605387976355e-05,
      "loss": 0.8128,
      "step": 253450
    },
    {
      "epoch": 0.888314109775801,
      "grad_norm": 3.078125,
      "learning_rate": 3.910540485109985e-05,
      "loss": 0.8756,
      "step": 253460
    },
    {
      "epoch": 0.8883491572826967,
      "grad_norm": 2.703125,
      "learning_rate": 3.910475582243614e-05,
      "loss": 0.8417,
      "step": 253470
    },
    {
      "epoch": 0.8883842047895923,
      "grad_norm": 2.578125,
      "learning_rate": 3.9104106793772444e-05,
      "loss": 0.9153,
      "step": 253480
    },
    {
      "epoch": 0.8884192522964879,
      "grad_norm": 2.765625,
      "learning_rate": 3.910345776510874e-05,
      "loss": 0.9546,
      "step": 253490
    },
    {
      "epoch": 0.8884542998033835,
      "grad_norm": 2.84375,
      "learning_rate": 3.910280873644504e-05,
      "loss": 0.8493,
      "step": 253500
    },
    {
      "epoch": 0.8884893473102791,
      "grad_norm": 2.921875,
      "learning_rate": 3.910215970778134e-05,
      "loss": 0.9387,
      "step": 253510
    },
    {
      "epoch": 0.8885243948171747,
      "grad_norm": 3.15625,
      "learning_rate": 3.9101510679117636e-05,
      "loss": 0.8565,
      "step": 253520
    },
    {
      "epoch": 0.8885594423240702,
      "grad_norm": 2.828125,
      "learning_rate": 3.910086165045394e-05,
      "loss": 0.8951,
      "step": 253530
    },
    {
      "epoch": 0.8885944898309659,
      "grad_norm": 3.21875,
      "learning_rate": 3.910021262179023e-05,
      "loss": 0.8722,
      "step": 253540
    },
    {
      "epoch": 0.8886295373378614,
      "grad_norm": 3.625,
      "learning_rate": 3.9099563593126534e-05,
      "loss": 0.9139,
      "step": 253550
    },
    {
      "epoch": 0.8886645848447571,
      "grad_norm": 2.875,
      "learning_rate": 3.909891456446282e-05,
      "loss": 0.8418,
      "step": 253560
    },
    {
      "epoch": 0.8886996323516526,
      "grad_norm": 2.1875,
      "learning_rate": 3.909826553579912e-05,
      "loss": 0.8861,
      "step": 253570
    },
    {
      "epoch": 0.8887346798585483,
      "grad_norm": 3.0625,
      "learning_rate": 3.909761650713542e-05,
      "loss": 0.8606,
      "step": 253580
    },
    {
      "epoch": 0.8887697273654439,
      "grad_norm": 2.453125,
      "learning_rate": 3.909696747847172e-05,
      "loss": 0.8396,
      "step": 253590
    },
    {
      "epoch": 0.8888047748723394,
      "grad_norm": 3.15625,
      "learning_rate": 3.909631844980802e-05,
      "loss": 0.9003,
      "step": 253600
    },
    {
      "epoch": 0.8888398223792351,
      "grad_norm": 3.59375,
      "learning_rate": 3.9095669421144315e-05,
      "loss": 0.9067,
      "step": 253610
    },
    {
      "epoch": 0.8888748698861306,
      "grad_norm": 3.046875,
      "learning_rate": 3.9095020392480616e-05,
      "loss": 0.8032,
      "step": 253620
    },
    {
      "epoch": 0.8889099173930263,
      "grad_norm": 3.234375,
      "learning_rate": 3.909437136381691e-05,
      "loss": 0.867,
      "step": 253630
    },
    {
      "epoch": 0.8889449648999218,
      "grad_norm": 3.15625,
      "learning_rate": 3.909372233515321e-05,
      "loss": 0.9531,
      "step": 253640
    },
    {
      "epoch": 0.8889800124068175,
      "grad_norm": 3.03125,
      "learning_rate": 3.909307330648951e-05,
      "loss": 0.8324,
      "step": 253650
    },
    {
      "epoch": 0.889015059913713,
      "grad_norm": 3.078125,
      "learning_rate": 3.909242427782581e-05,
      "loss": 0.9022,
      "step": 253660
    },
    {
      "epoch": 0.8890501074206086,
      "grad_norm": 2.84375,
      "learning_rate": 3.90917752491621e-05,
      "loss": 0.8775,
      "step": 253670
    },
    {
      "epoch": 0.8890851549275043,
      "grad_norm": 2.796875,
      "learning_rate": 3.9091126220498404e-05,
      "loss": 0.8559,
      "step": 253680
    },
    {
      "epoch": 0.8891202024343998,
      "grad_norm": 2.828125,
      "learning_rate": 3.90904771918347e-05,
      "loss": 0.8898,
      "step": 253690
    },
    {
      "epoch": 0.8891552499412955,
      "grad_norm": 2.734375,
      "learning_rate": 3.9089828163171e-05,
      "loss": 0.8523,
      "step": 253700
    },
    {
      "epoch": 0.889190297448191,
      "grad_norm": 3.421875,
      "learning_rate": 3.9089179134507295e-05,
      "loss": 0.9273,
      "step": 253710
    },
    {
      "epoch": 0.8892253449550866,
      "grad_norm": 2.796875,
      "learning_rate": 3.9088530105843596e-05,
      "loss": 0.8386,
      "step": 253720
    },
    {
      "epoch": 0.8892603924619822,
      "grad_norm": 3.015625,
      "learning_rate": 3.90878810771799e-05,
      "loss": 0.8927,
      "step": 253730
    },
    {
      "epoch": 0.8892954399688778,
      "grad_norm": 3.015625,
      "learning_rate": 3.908723204851619e-05,
      "loss": 1.0228,
      "step": 253740
    },
    {
      "epoch": 0.8893304874757734,
      "grad_norm": 2.953125,
      "learning_rate": 3.9086583019852494e-05,
      "loss": 0.9246,
      "step": 253750
    },
    {
      "epoch": 0.889365534982669,
      "grad_norm": 2.625,
      "learning_rate": 3.908593399118879e-05,
      "loss": 0.822,
      "step": 253760
    },
    {
      "epoch": 0.8894005824895646,
      "grad_norm": 2.828125,
      "learning_rate": 3.908528496252509e-05,
      "loss": 0.8683,
      "step": 253770
    },
    {
      "epoch": 0.8894356299964602,
      "grad_norm": 3.09375,
      "learning_rate": 3.9084635933861384e-05,
      "loss": 0.8985,
      "step": 253780
    },
    {
      "epoch": 0.8894706775033558,
      "grad_norm": 2.84375,
      "learning_rate": 3.9083986905197686e-05,
      "loss": 0.8621,
      "step": 253790
    },
    {
      "epoch": 0.8895057250102514,
      "grad_norm": 2.796875,
      "learning_rate": 3.908333787653398e-05,
      "loss": 0.9032,
      "step": 253800
    },
    {
      "epoch": 0.889540772517147,
      "grad_norm": 3.0,
      "learning_rate": 3.908268884787028e-05,
      "loss": 0.8005,
      "step": 253810
    },
    {
      "epoch": 0.8895758200240426,
      "grad_norm": 2.828125,
      "learning_rate": 3.9082039819206576e-05,
      "loss": 0.8256,
      "step": 253820
    },
    {
      "epoch": 0.8896108675309382,
      "grad_norm": 3.21875,
      "learning_rate": 3.908139079054288e-05,
      "loss": 0.8608,
      "step": 253830
    },
    {
      "epoch": 0.8896459150378337,
      "grad_norm": 3.09375,
      "learning_rate": 3.908074176187917e-05,
      "loss": 0.9395,
      "step": 253840
    },
    {
      "epoch": 0.8896809625447294,
      "grad_norm": 3.03125,
      "learning_rate": 3.9080092733215474e-05,
      "loss": 0.8245,
      "step": 253850
    },
    {
      "epoch": 0.8897160100516249,
      "grad_norm": 2.84375,
      "learning_rate": 3.907944370455177e-05,
      "loss": 0.935,
      "step": 253860
    },
    {
      "epoch": 0.8897510575585206,
      "grad_norm": 3.1875,
      "learning_rate": 3.907879467588807e-05,
      "loss": 0.8792,
      "step": 253870
    },
    {
      "epoch": 0.8897861050654162,
      "grad_norm": 3.03125,
      "learning_rate": 3.907814564722437e-05,
      "loss": 0.8544,
      "step": 253880
    },
    {
      "epoch": 0.8898211525723118,
      "grad_norm": 2.859375,
      "learning_rate": 3.9077496618560666e-05,
      "loss": 0.8934,
      "step": 253890
    },
    {
      "epoch": 0.8898562000792074,
      "grad_norm": 2.96875,
      "learning_rate": 3.907684758989697e-05,
      "loss": 0.8251,
      "step": 253900
    },
    {
      "epoch": 0.8898912475861029,
      "grad_norm": 3.046875,
      "learning_rate": 3.907619856123326e-05,
      "loss": 0.8654,
      "step": 253910
    },
    {
      "epoch": 0.8899262950929986,
      "grad_norm": 3.0625,
      "learning_rate": 3.907554953256956e-05,
      "loss": 0.8954,
      "step": 253920
    },
    {
      "epoch": 0.8899613425998941,
      "grad_norm": 3.234375,
      "learning_rate": 3.907490050390586e-05,
      "loss": 0.9364,
      "step": 253930
    },
    {
      "epoch": 0.8899963901067898,
      "grad_norm": 2.984375,
      "learning_rate": 3.907425147524215e-05,
      "loss": 0.9429,
      "step": 253940
    },
    {
      "epoch": 0.8900314376136853,
      "grad_norm": 3.125,
      "learning_rate": 3.907360244657845e-05,
      "loss": 0.9752,
      "step": 253950
    },
    {
      "epoch": 0.890066485120581,
      "grad_norm": 2.78125,
      "learning_rate": 3.907295341791475e-05,
      "loss": 0.8889,
      "step": 253960
    },
    {
      "epoch": 0.8901015326274766,
      "grad_norm": 3.03125,
      "learning_rate": 3.907230438925105e-05,
      "loss": 0.8331,
      "step": 253970
    },
    {
      "epoch": 0.8901365801343721,
      "grad_norm": 2.890625,
      "learning_rate": 3.9071655360587344e-05,
      "loss": 0.9387,
      "step": 253980
    },
    {
      "epoch": 0.8901716276412678,
      "grad_norm": 2.546875,
      "learning_rate": 3.9071006331923646e-05,
      "loss": 0.9029,
      "step": 253990
    },
    {
      "epoch": 0.8902066751481633,
      "grad_norm": 2.890625,
      "learning_rate": 3.907035730325994e-05,
      "loss": 0.9245,
      "step": 254000
    },
    {
      "epoch": 0.890241722655059,
      "grad_norm": 3.078125,
      "learning_rate": 3.906970827459624e-05,
      "loss": 0.8451,
      "step": 254010
    },
    {
      "epoch": 0.8902767701619545,
      "grad_norm": 3.0,
      "learning_rate": 3.9069059245932536e-05,
      "loss": 0.9529,
      "step": 254020
    },
    {
      "epoch": 0.8903118176688501,
      "grad_norm": 3.015625,
      "learning_rate": 3.906841021726884e-05,
      "loss": 0.873,
      "step": 254030
    },
    {
      "epoch": 0.8903468651757457,
      "grad_norm": 3.03125,
      "learning_rate": 3.906776118860513e-05,
      "loss": 0.8084,
      "step": 254040
    },
    {
      "epoch": 0.8903819126826413,
      "grad_norm": 2.75,
      "learning_rate": 3.9067112159941434e-05,
      "loss": 0.8973,
      "step": 254050
    },
    {
      "epoch": 0.8904169601895369,
      "grad_norm": 2.78125,
      "learning_rate": 3.906646313127773e-05,
      "loss": 0.7955,
      "step": 254060
    },
    {
      "epoch": 0.8904520076964325,
      "grad_norm": 3.734375,
      "learning_rate": 3.906581410261403e-05,
      "loss": 0.9308,
      "step": 254070
    },
    {
      "epoch": 0.8904870552033282,
      "grad_norm": 3.03125,
      "learning_rate": 3.9065165073950324e-05,
      "loss": 0.9386,
      "step": 254080
    },
    {
      "epoch": 0.8905221027102237,
      "grad_norm": 2.953125,
      "learning_rate": 3.9064516045286626e-05,
      "loss": 0.9098,
      "step": 254090
    },
    {
      "epoch": 0.8905571502171193,
      "grad_norm": 2.75,
      "learning_rate": 3.906386701662293e-05,
      "loss": 0.903,
      "step": 254100
    },
    {
      "epoch": 0.8905921977240149,
      "grad_norm": 2.734375,
      "learning_rate": 3.906321798795922e-05,
      "loss": 0.8483,
      "step": 254110
    },
    {
      "epoch": 0.8906272452309105,
      "grad_norm": 2.84375,
      "learning_rate": 3.906256895929552e-05,
      "loss": 0.9038,
      "step": 254120
    },
    {
      "epoch": 0.8906622927378061,
      "grad_norm": 2.609375,
      "learning_rate": 3.906191993063182e-05,
      "loss": 0.8729,
      "step": 254130
    },
    {
      "epoch": 0.8906973402447017,
      "grad_norm": 2.734375,
      "learning_rate": 3.906127090196812e-05,
      "loss": 0.8522,
      "step": 254140
    },
    {
      "epoch": 0.8907323877515972,
      "grad_norm": 2.734375,
      "learning_rate": 3.9060621873304414e-05,
      "loss": 0.8529,
      "step": 254150
    },
    {
      "epoch": 0.8907674352584929,
      "grad_norm": 2.65625,
      "learning_rate": 3.9059972844640715e-05,
      "loss": 0.8421,
      "step": 254160
    },
    {
      "epoch": 0.8908024827653885,
      "grad_norm": 2.6875,
      "learning_rate": 3.905932381597701e-05,
      "loss": 0.8085,
      "step": 254170
    },
    {
      "epoch": 0.8908375302722841,
      "grad_norm": 2.265625,
      "learning_rate": 3.905867478731331e-05,
      "loss": 0.838,
      "step": 254180
    },
    {
      "epoch": 0.8908725777791797,
      "grad_norm": 3.015625,
      "learning_rate": 3.9058025758649606e-05,
      "loss": 0.9152,
      "step": 254190
    },
    {
      "epoch": 0.8909076252860753,
      "grad_norm": 3.09375,
      "learning_rate": 3.905737672998591e-05,
      "loss": 0.8624,
      "step": 254200
    },
    {
      "epoch": 0.8909426727929709,
      "grad_norm": 2.90625,
      "learning_rate": 3.90567277013222e-05,
      "loss": 0.8523,
      "step": 254210
    },
    {
      "epoch": 0.8909777202998664,
      "grad_norm": 2.8125,
      "learning_rate": 3.90560786726585e-05,
      "loss": 0.8256,
      "step": 254220
    },
    {
      "epoch": 0.8910127678067621,
      "grad_norm": 3.140625,
      "learning_rate": 3.90554296439948e-05,
      "loss": 0.9331,
      "step": 254230
    },
    {
      "epoch": 0.8910478153136576,
      "grad_norm": 2.90625,
      "learning_rate": 3.90547806153311e-05,
      "loss": 0.9864,
      "step": 254240
    },
    {
      "epoch": 0.8910828628205533,
      "grad_norm": 3.125,
      "learning_rate": 3.90541315866674e-05,
      "loss": 0.9939,
      "step": 254250
    },
    {
      "epoch": 0.8911179103274488,
      "grad_norm": 2.625,
      "learning_rate": 3.9053482558003695e-05,
      "loss": 0.9146,
      "step": 254260
    },
    {
      "epoch": 0.8911529578343445,
      "grad_norm": 2.921875,
      "learning_rate": 3.9052833529339996e-05,
      "loss": 0.8514,
      "step": 254270
    },
    {
      "epoch": 0.8911880053412401,
      "grad_norm": 2.890625,
      "learning_rate": 3.905218450067629e-05,
      "loss": 0.8922,
      "step": 254280
    },
    {
      "epoch": 0.8912230528481356,
      "grad_norm": 3.140625,
      "learning_rate": 3.905153547201259e-05,
      "loss": 0.8507,
      "step": 254290
    },
    {
      "epoch": 0.8912581003550313,
      "grad_norm": 2.671875,
      "learning_rate": 3.905088644334889e-05,
      "loss": 0.9262,
      "step": 254300
    },
    {
      "epoch": 0.8912931478619268,
      "grad_norm": 2.625,
      "learning_rate": 3.905023741468518e-05,
      "loss": 0.8596,
      "step": 254310
    },
    {
      "epoch": 0.8913281953688225,
      "grad_norm": 2.953125,
      "learning_rate": 3.9049588386021476e-05,
      "loss": 0.8403,
      "step": 254320
    },
    {
      "epoch": 0.891363242875718,
      "grad_norm": 3.09375,
      "learning_rate": 3.904893935735778e-05,
      "loss": 0.9142,
      "step": 254330
    },
    {
      "epoch": 0.8913982903826136,
      "grad_norm": 3.34375,
      "learning_rate": 3.904829032869408e-05,
      "loss": 0.9235,
      "step": 254340
    },
    {
      "epoch": 0.8914333378895092,
      "grad_norm": 3.71875,
      "learning_rate": 3.9047641300030374e-05,
      "loss": 0.8779,
      "step": 254350
    },
    {
      "epoch": 0.8914683853964048,
      "grad_norm": 3.046875,
      "learning_rate": 3.9046992271366675e-05,
      "loss": 0.8226,
      "step": 254360
    },
    {
      "epoch": 0.8915034329033005,
      "grad_norm": 2.609375,
      "learning_rate": 3.904634324270297e-05,
      "loss": 0.8652,
      "step": 254370
    },
    {
      "epoch": 0.891538480410196,
      "grad_norm": 2.921875,
      "learning_rate": 3.904569421403927e-05,
      "loss": 0.8365,
      "step": 254380
    },
    {
      "epoch": 0.8915735279170917,
      "grad_norm": 2.953125,
      "learning_rate": 3.9045045185375566e-05,
      "loss": 0.859,
      "step": 254390
    },
    {
      "epoch": 0.8916085754239872,
      "grad_norm": 2.734375,
      "learning_rate": 3.904439615671187e-05,
      "loss": 0.8411,
      "step": 254400
    },
    {
      "epoch": 0.8916436229308828,
      "grad_norm": 2.828125,
      "learning_rate": 3.904374712804816e-05,
      "loss": 0.8989,
      "step": 254410
    },
    {
      "epoch": 0.8916786704377784,
      "grad_norm": 2.859375,
      "learning_rate": 3.904309809938446e-05,
      "loss": 0.9278,
      "step": 254420
    },
    {
      "epoch": 0.891713717944674,
      "grad_norm": 2.875,
      "learning_rate": 3.904244907072076e-05,
      "loss": 0.9309,
      "step": 254430
    },
    {
      "epoch": 0.8917487654515696,
      "grad_norm": 3.0,
      "learning_rate": 3.904180004205706e-05,
      "loss": 0.9519,
      "step": 254440
    },
    {
      "epoch": 0.8917838129584652,
      "grad_norm": 2.796875,
      "learning_rate": 3.9041151013393354e-05,
      "loss": 0.9199,
      "step": 254450
    },
    {
      "epoch": 0.8918188604653609,
      "grad_norm": 2.765625,
      "learning_rate": 3.9040501984729655e-05,
      "loss": 0.8985,
      "step": 254460
    },
    {
      "epoch": 0.8918539079722564,
      "grad_norm": 2.421875,
      "learning_rate": 3.9039852956065956e-05,
      "loss": 0.8829,
      "step": 254470
    },
    {
      "epoch": 0.891888955479152,
      "grad_norm": 2.8125,
      "learning_rate": 3.903920392740225e-05,
      "loss": 0.9211,
      "step": 254480
    },
    {
      "epoch": 0.8919240029860476,
      "grad_norm": 3.296875,
      "learning_rate": 3.903855489873855e-05,
      "loss": 0.8529,
      "step": 254490
    },
    {
      "epoch": 0.8919590504929432,
      "grad_norm": 2.734375,
      "learning_rate": 3.903790587007485e-05,
      "loss": 0.9006,
      "step": 254500
    },
    {
      "epoch": 0.8919940979998388,
      "grad_norm": 2.765625,
      "learning_rate": 3.903725684141115e-05,
      "loss": 0.9018,
      "step": 254510
    },
    {
      "epoch": 0.8920291455067344,
      "grad_norm": 3.234375,
      "learning_rate": 3.903660781274744e-05,
      "loss": 0.9052,
      "step": 254520
    },
    {
      "epoch": 0.8920641930136299,
      "grad_norm": 2.890625,
      "learning_rate": 3.9035958784083744e-05,
      "loss": 0.8846,
      "step": 254530
    },
    {
      "epoch": 0.8920992405205256,
      "grad_norm": 3.359375,
      "learning_rate": 3.903530975542004e-05,
      "loss": 0.9007,
      "step": 254540
    },
    {
      "epoch": 0.8921342880274211,
      "grad_norm": 2.328125,
      "learning_rate": 3.903466072675634e-05,
      "loss": 0.8435,
      "step": 254550
    },
    {
      "epoch": 0.8921693355343168,
      "grad_norm": 3.375,
      "learning_rate": 3.9034011698092635e-05,
      "loss": 0.9049,
      "step": 254560
    },
    {
      "epoch": 0.8922043830412124,
      "grad_norm": 2.875,
      "learning_rate": 3.9033362669428936e-05,
      "loss": 0.9296,
      "step": 254570
    },
    {
      "epoch": 0.892239430548108,
      "grad_norm": 2.546875,
      "learning_rate": 3.903271364076523e-05,
      "loss": 0.807,
      "step": 254580
    },
    {
      "epoch": 0.8922744780550036,
      "grad_norm": 2.9375,
      "learning_rate": 3.903206461210153e-05,
      "loss": 0.9099,
      "step": 254590
    },
    {
      "epoch": 0.8923095255618991,
      "grad_norm": 3.15625,
      "learning_rate": 3.9031415583437834e-05,
      "loss": 0.9634,
      "step": 254600
    },
    {
      "epoch": 0.8923445730687948,
      "grad_norm": 3.328125,
      "learning_rate": 3.903076655477413e-05,
      "loss": 0.8688,
      "step": 254610
    },
    {
      "epoch": 0.8923796205756903,
      "grad_norm": 3.0625,
      "learning_rate": 3.903011752611043e-05,
      "loss": 0.9311,
      "step": 254620
    },
    {
      "epoch": 0.892414668082586,
      "grad_norm": 2.71875,
      "learning_rate": 3.9029468497446724e-05,
      "loss": 0.897,
      "step": 254630
    },
    {
      "epoch": 0.8924497155894815,
      "grad_norm": 2.609375,
      "learning_rate": 3.9028819468783026e-05,
      "loss": 1.0203,
      "step": 254640
    },
    {
      "epoch": 0.8924847630963771,
      "grad_norm": 2.984375,
      "learning_rate": 3.902817044011932e-05,
      "loss": 0.8693,
      "step": 254650
    },
    {
      "epoch": 0.8925198106032728,
      "grad_norm": 2.890625,
      "learning_rate": 3.902752141145562e-05,
      "loss": 1.0086,
      "step": 254660
    },
    {
      "epoch": 0.8925548581101683,
      "grad_norm": 2.6875,
      "learning_rate": 3.9026872382791916e-05,
      "loss": 0.8635,
      "step": 254670
    },
    {
      "epoch": 0.892589905617064,
      "grad_norm": 3.078125,
      "learning_rate": 3.902622335412822e-05,
      "loss": 0.8547,
      "step": 254680
    },
    {
      "epoch": 0.8926249531239595,
      "grad_norm": 3.0,
      "learning_rate": 3.902557432546451e-05,
      "loss": 0.8993,
      "step": 254690
    },
    {
      "epoch": 0.8926600006308552,
      "grad_norm": 3.203125,
      "learning_rate": 3.902492529680081e-05,
      "loss": 0.927,
      "step": 254700
    },
    {
      "epoch": 0.8926950481377507,
      "grad_norm": 2.75,
      "learning_rate": 3.902427626813711e-05,
      "loss": 0.9152,
      "step": 254710
    },
    {
      "epoch": 0.8927300956446463,
      "grad_norm": 3.09375,
      "learning_rate": 3.90236272394734e-05,
      "loss": 0.9311,
      "step": 254720
    },
    {
      "epoch": 0.8927651431515419,
      "grad_norm": 2.9375,
      "learning_rate": 3.9022978210809704e-05,
      "loss": 0.9557,
      "step": 254730
    },
    {
      "epoch": 0.8928001906584375,
      "grad_norm": 2.9375,
      "learning_rate": 3.9022329182146e-05,
      "loss": 0.8296,
      "step": 254740
    },
    {
      "epoch": 0.8928352381653331,
      "grad_norm": 3.3125,
      "learning_rate": 3.90216801534823e-05,
      "loss": 0.8946,
      "step": 254750
    },
    {
      "epoch": 0.8928702856722287,
      "grad_norm": 2.8125,
      "learning_rate": 3.9021031124818595e-05,
      "loss": 0.8513,
      "step": 254760
    },
    {
      "epoch": 0.8929053331791243,
      "grad_norm": 2.859375,
      "learning_rate": 3.9020382096154896e-05,
      "loss": 0.9152,
      "step": 254770
    },
    {
      "epoch": 0.8929403806860199,
      "grad_norm": 3.015625,
      "learning_rate": 3.901973306749119e-05,
      "loss": 0.8601,
      "step": 254780
    },
    {
      "epoch": 0.8929754281929155,
      "grad_norm": 2.921875,
      "learning_rate": 3.901908403882749e-05,
      "loss": 0.8853,
      "step": 254790
    },
    {
      "epoch": 0.8930104756998111,
      "grad_norm": 3.015625,
      "learning_rate": 3.901843501016379e-05,
      "loss": 0.8859,
      "step": 254800
    },
    {
      "epoch": 0.8930455232067067,
      "grad_norm": 3.34375,
      "learning_rate": 3.901778598150009e-05,
      "loss": 0.8663,
      "step": 254810
    },
    {
      "epoch": 0.8930805707136023,
      "grad_norm": 3.109375,
      "learning_rate": 3.901713695283638e-05,
      "loss": 0.9569,
      "step": 254820
    },
    {
      "epoch": 0.8931156182204979,
      "grad_norm": 2.71875,
      "learning_rate": 3.9016487924172684e-05,
      "loss": 0.9624,
      "step": 254830
    },
    {
      "epoch": 0.8931506657273934,
      "grad_norm": 3.28125,
      "learning_rate": 3.9015838895508986e-05,
      "loss": 0.9164,
      "step": 254840
    },
    {
      "epoch": 0.8931857132342891,
      "grad_norm": 3.25,
      "learning_rate": 3.901518986684528e-05,
      "loss": 0.8288,
      "step": 254850
    },
    {
      "epoch": 0.8932207607411847,
      "grad_norm": 2.8125,
      "learning_rate": 3.901454083818158e-05,
      "loss": 0.8813,
      "step": 254860
    },
    {
      "epoch": 0.8932558082480803,
      "grad_norm": 3.0625,
      "learning_rate": 3.9013891809517876e-05,
      "loss": 0.9198,
      "step": 254870
    },
    {
      "epoch": 0.8932908557549759,
      "grad_norm": 3.25,
      "learning_rate": 3.901324278085418e-05,
      "loss": 0.8775,
      "step": 254880
    },
    {
      "epoch": 0.8933259032618714,
      "grad_norm": 3.453125,
      "learning_rate": 3.901259375219047e-05,
      "loss": 0.8626,
      "step": 254890
    },
    {
      "epoch": 0.8933609507687671,
      "grad_norm": 3.09375,
      "learning_rate": 3.9011944723526774e-05,
      "loss": 0.8577,
      "step": 254900
    },
    {
      "epoch": 0.8933959982756626,
      "grad_norm": 2.984375,
      "learning_rate": 3.901129569486307e-05,
      "loss": 0.9135,
      "step": 254910
    },
    {
      "epoch": 0.8934310457825583,
      "grad_norm": 2.765625,
      "learning_rate": 3.901064666619937e-05,
      "loss": 0.9059,
      "step": 254920
    },
    {
      "epoch": 0.8934660932894538,
      "grad_norm": 2.75,
      "learning_rate": 3.9009997637535664e-05,
      "loss": 0.8166,
      "step": 254930
    },
    {
      "epoch": 0.8935011407963495,
      "grad_norm": 3.0625,
      "learning_rate": 3.9009348608871966e-05,
      "loss": 0.8996,
      "step": 254940
    },
    {
      "epoch": 0.893536188303245,
      "grad_norm": 2.84375,
      "learning_rate": 3.900869958020826e-05,
      "loss": 0.9636,
      "step": 254950
    },
    {
      "epoch": 0.8935712358101406,
      "grad_norm": 2.984375,
      "learning_rate": 3.900805055154456e-05,
      "loss": 0.906,
      "step": 254960
    },
    {
      "epoch": 0.8936062833170363,
      "grad_norm": 2.5625,
      "learning_rate": 3.900740152288086e-05,
      "loss": 0.8194,
      "step": 254970
    },
    {
      "epoch": 0.8936413308239318,
      "grad_norm": 2.34375,
      "learning_rate": 3.900675249421716e-05,
      "loss": 0.8824,
      "step": 254980
    },
    {
      "epoch": 0.8936763783308275,
      "grad_norm": 3.96875,
      "learning_rate": 3.900610346555346e-05,
      "loss": 0.9506,
      "step": 254990
    },
    {
      "epoch": 0.893711425837723,
      "grad_norm": 3.140625,
      "learning_rate": 3.9005454436889754e-05,
      "loss": 0.8414,
      "step": 255000
    },
    {
      "epoch": 0.893711425837723,
      "eval_loss": 0.8310039043426514,
      "eval_runtime": 553.5101,
      "eval_samples_per_second": 687.315,
      "eval_steps_per_second": 57.276,
      "step": 255000
    },
    {
      "epoch": 0.8937464733446187,
      "grad_norm": 2.84375,
      "learning_rate": 3.9004805408226055e-05,
      "loss": 0.8811,
      "step": 255010
    },
    {
      "epoch": 0.8937815208515142,
      "grad_norm": 2.796875,
      "learning_rate": 3.900415637956235e-05,
      "loss": 0.8211,
      "step": 255020
    },
    {
      "epoch": 0.8938165683584098,
      "grad_norm": 3.234375,
      "learning_rate": 3.900350735089865e-05,
      "loss": 0.8809,
      "step": 255030
    },
    {
      "epoch": 0.8938516158653054,
      "grad_norm": 3.0,
      "learning_rate": 3.9002858322234946e-05,
      "loss": 0.9321,
      "step": 255040
    },
    {
      "epoch": 0.893886663372201,
      "grad_norm": 2.640625,
      "learning_rate": 3.900220929357125e-05,
      "loss": 0.8854,
      "step": 255050
    },
    {
      "epoch": 0.8939217108790967,
      "grad_norm": 3.234375,
      "learning_rate": 3.900156026490754e-05,
      "loss": 0.8971,
      "step": 255060
    },
    {
      "epoch": 0.8939567583859922,
      "grad_norm": 2.984375,
      "learning_rate": 3.9000911236243836e-05,
      "loss": 0.9883,
      "step": 255070
    },
    {
      "epoch": 0.8939918058928878,
      "grad_norm": 2.546875,
      "learning_rate": 3.900026220758014e-05,
      "loss": 0.7919,
      "step": 255080
    },
    {
      "epoch": 0.8940268533997834,
      "grad_norm": 2.640625,
      "learning_rate": 3.899961317891643e-05,
      "loss": 0.8047,
      "step": 255090
    },
    {
      "epoch": 0.894061900906679,
      "grad_norm": 2.984375,
      "learning_rate": 3.8998964150252734e-05,
      "loss": 0.8864,
      "step": 255100
    },
    {
      "epoch": 0.8940969484135746,
      "grad_norm": 3.09375,
      "learning_rate": 3.899831512158903e-05,
      "loss": 0.8633,
      "step": 255110
    },
    {
      "epoch": 0.8941319959204702,
      "grad_norm": 3.09375,
      "learning_rate": 3.899766609292533e-05,
      "loss": 0.8644,
      "step": 255120
    },
    {
      "epoch": 0.8941670434273657,
      "grad_norm": 3.265625,
      "learning_rate": 3.8997017064261624e-05,
      "loss": 0.9325,
      "step": 255130
    },
    {
      "epoch": 0.8942020909342614,
      "grad_norm": 2.984375,
      "learning_rate": 3.8996368035597926e-05,
      "loss": 0.8538,
      "step": 255140
    },
    {
      "epoch": 0.894237138441157,
      "grad_norm": 2.9375,
      "learning_rate": 3.899571900693422e-05,
      "loss": 0.7338,
      "step": 255150
    },
    {
      "epoch": 0.8942721859480526,
      "grad_norm": 3.390625,
      "learning_rate": 3.899506997827052e-05,
      "loss": 0.9269,
      "step": 255160
    },
    {
      "epoch": 0.8943072334549482,
      "grad_norm": 2.8125,
      "learning_rate": 3.8994420949606816e-05,
      "loss": 0.7531,
      "step": 255170
    },
    {
      "epoch": 0.8943422809618438,
      "grad_norm": 2.953125,
      "learning_rate": 3.899377192094312e-05,
      "loss": 0.8956,
      "step": 255180
    },
    {
      "epoch": 0.8943773284687394,
      "grad_norm": 2.859375,
      "learning_rate": 3.899312289227941e-05,
      "loss": 0.9127,
      "step": 255190
    },
    {
      "epoch": 0.8944123759756349,
      "grad_norm": 3.15625,
      "learning_rate": 3.8992473863615714e-05,
      "loss": 0.8688,
      "step": 255200
    },
    {
      "epoch": 0.8944474234825306,
      "grad_norm": 2.671875,
      "learning_rate": 3.8991824834952015e-05,
      "loss": 0.8518,
      "step": 255210
    },
    {
      "epoch": 0.8944824709894261,
      "grad_norm": 2.875,
      "learning_rate": 3.899117580628831e-05,
      "loss": 0.7939,
      "step": 255220
    },
    {
      "epoch": 0.8945175184963218,
      "grad_norm": 3.015625,
      "learning_rate": 3.899052677762461e-05,
      "loss": 0.8835,
      "step": 255230
    },
    {
      "epoch": 0.8945525660032173,
      "grad_norm": 3.09375,
      "learning_rate": 3.8989877748960906e-05,
      "loss": 0.8712,
      "step": 255240
    },
    {
      "epoch": 0.894587613510113,
      "grad_norm": 2.78125,
      "learning_rate": 3.898922872029721e-05,
      "loss": 0.8913,
      "step": 255250
    },
    {
      "epoch": 0.8946226610170086,
      "grad_norm": 3.0,
      "learning_rate": 3.89885796916335e-05,
      "loss": 0.9563,
      "step": 255260
    },
    {
      "epoch": 0.8946577085239041,
      "grad_norm": 3.09375,
      "learning_rate": 3.89879306629698e-05,
      "loss": 0.9674,
      "step": 255270
    },
    {
      "epoch": 0.8946927560307998,
      "grad_norm": 2.953125,
      "learning_rate": 3.89872816343061e-05,
      "loss": 0.8249,
      "step": 255280
    },
    {
      "epoch": 0.8947278035376953,
      "grad_norm": 3.015625,
      "learning_rate": 3.89866326056424e-05,
      "loss": 0.8185,
      "step": 255290
    },
    {
      "epoch": 0.894762851044591,
      "grad_norm": 2.9375,
      "learning_rate": 3.8985983576978694e-05,
      "loss": 0.9032,
      "step": 255300
    },
    {
      "epoch": 0.8947978985514865,
      "grad_norm": 3.390625,
      "learning_rate": 3.8985334548314995e-05,
      "loss": 0.9282,
      "step": 255310
    },
    {
      "epoch": 0.8948329460583822,
      "grad_norm": 3.03125,
      "learning_rate": 3.898468551965129e-05,
      "loss": 0.807,
      "step": 255320
    },
    {
      "epoch": 0.8948679935652777,
      "grad_norm": 2.984375,
      "learning_rate": 3.898403649098759e-05,
      "loss": 0.8421,
      "step": 255330
    },
    {
      "epoch": 0.8949030410721733,
      "grad_norm": 2.875,
      "learning_rate": 3.898338746232389e-05,
      "loss": 0.8782,
      "step": 255340
    },
    {
      "epoch": 0.894938088579069,
      "grad_norm": 2.890625,
      "learning_rate": 3.898273843366019e-05,
      "loss": 0.9268,
      "step": 255350
    },
    {
      "epoch": 0.8949731360859645,
      "grad_norm": 3.0,
      "learning_rate": 3.898208940499649e-05,
      "loss": 0.9809,
      "step": 255360
    },
    {
      "epoch": 0.8950081835928602,
      "grad_norm": 2.59375,
      "learning_rate": 3.898144037633278e-05,
      "loss": 0.8733,
      "step": 255370
    },
    {
      "epoch": 0.8950432310997557,
      "grad_norm": 2.75,
      "learning_rate": 3.8980791347669084e-05,
      "loss": 0.8736,
      "step": 255380
    },
    {
      "epoch": 0.8950782786066513,
      "grad_norm": 2.984375,
      "learning_rate": 3.898014231900538e-05,
      "loss": 0.9054,
      "step": 255390
    },
    {
      "epoch": 0.8951133261135469,
      "grad_norm": 3.015625,
      "learning_rate": 3.897949329034168e-05,
      "loss": 0.9002,
      "step": 255400
    },
    {
      "epoch": 0.8951483736204425,
      "grad_norm": 3.390625,
      "learning_rate": 3.8978844261677975e-05,
      "loss": 0.9653,
      "step": 255410
    },
    {
      "epoch": 0.8951834211273381,
      "grad_norm": 3.15625,
      "learning_rate": 3.8978195233014276e-05,
      "loss": 0.8484,
      "step": 255420
    },
    {
      "epoch": 0.8952184686342337,
      "grad_norm": 2.796875,
      "learning_rate": 3.897754620435057e-05,
      "loss": 0.8282,
      "step": 255430
    },
    {
      "epoch": 0.8952535161411292,
      "grad_norm": 3.03125,
      "learning_rate": 3.8976897175686866e-05,
      "loss": 0.8968,
      "step": 255440
    },
    {
      "epoch": 0.8952885636480249,
      "grad_norm": 2.953125,
      "learning_rate": 3.897624814702317e-05,
      "loss": 0.8353,
      "step": 255450
    },
    {
      "epoch": 0.8953236111549205,
      "grad_norm": 3.140625,
      "learning_rate": 3.897559911835946e-05,
      "loss": 0.8235,
      "step": 255460
    },
    {
      "epoch": 0.8953586586618161,
      "grad_norm": 2.890625,
      "learning_rate": 3.897495008969576e-05,
      "loss": 0.8564,
      "step": 255470
    },
    {
      "epoch": 0.8953937061687117,
      "grad_norm": 2.421875,
      "learning_rate": 3.897430106103206e-05,
      "loss": 0.9258,
      "step": 255480
    },
    {
      "epoch": 0.8954287536756073,
      "grad_norm": 2.5625,
      "learning_rate": 3.897365203236836e-05,
      "loss": 0.9169,
      "step": 255490
    },
    {
      "epoch": 0.8954638011825029,
      "grad_norm": 3.296875,
      "learning_rate": 3.8973003003704654e-05,
      "loss": 0.901,
      "step": 255500
    },
    {
      "epoch": 0.8954988486893984,
      "grad_norm": 3.0625,
      "learning_rate": 3.8972353975040955e-05,
      "loss": 0.8268,
      "step": 255510
    },
    {
      "epoch": 0.8955338961962941,
      "grad_norm": 2.921875,
      "learning_rate": 3.897170494637725e-05,
      "loss": 0.9079,
      "step": 255520
    },
    {
      "epoch": 0.8955689437031896,
      "grad_norm": 2.390625,
      "learning_rate": 3.897105591771355e-05,
      "loss": 0.898,
      "step": 255530
    },
    {
      "epoch": 0.8956039912100853,
      "grad_norm": 2.890625,
      "learning_rate": 3.8970406889049846e-05,
      "loss": 0.8948,
      "step": 255540
    },
    {
      "epoch": 0.8956390387169809,
      "grad_norm": 2.890625,
      "learning_rate": 3.896975786038615e-05,
      "loss": 0.8933,
      "step": 255550
    },
    {
      "epoch": 0.8956740862238765,
      "grad_norm": 2.59375,
      "learning_rate": 3.896910883172244e-05,
      "loss": 0.8381,
      "step": 255560
    },
    {
      "epoch": 0.8957091337307721,
      "grad_norm": 2.515625,
      "learning_rate": 3.896845980305874e-05,
      "loss": 0.9015,
      "step": 255570
    },
    {
      "epoch": 0.8957441812376676,
      "grad_norm": 2.359375,
      "learning_rate": 3.8967810774395044e-05,
      "loss": 0.8802,
      "step": 255580
    },
    {
      "epoch": 0.8957792287445633,
      "grad_norm": 2.4375,
      "learning_rate": 3.896716174573134e-05,
      "loss": 0.9499,
      "step": 255590
    },
    {
      "epoch": 0.8958142762514588,
      "grad_norm": 2.703125,
      "learning_rate": 3.896651271706764e-05,
      "loss": 0.927,
      "step": 255600
    },
    {
      "epoch": 0.8958493237583545,
      "grad_norm": 3.203125,
      "learning_rate": 3.8965863688403935e-05,
      "loss": 0.8183,
      "step": 255610
    },
    {
      "epoch": 0.89588437126525,
      "grad_norm": 3.546875,
      "learning_rate": 3.8965214659740236e-05,
      "loss": 0.9306,
      "step": 255620
    },
    {
      "epoch": 0.8959194187721456,
      "grad_norm": 2.703125,
      "learning_rate": 3.896456563107653e-05,
      "loss": 0.9084,
      "step": 255630
    },
    {
      "epoch": 0.8959544662790413,
      "grad_norm": 3.015625,
      "learning_rate": 3.896391660241283e-05,
      "loss": 0.9312,
      "step": 255640
    },
    {
      "epoch": 0.8959895137859368,
      "grad_norm": 2.5,
      "learning_rate": 3.896326757374913e-05,
      "loss": 0.8492,
      "step": 255650
    },
    {
      "epoch": 0.8960245612928325,
      "grad_norm": 3.03125,
      "learning_rate": 3.896261854508543e-05,
      "loss": 0.8243,
      "step": 255660
    },
    {
      "epoch": 0.896059608799728,
      "grad_norm": 3.09375,
      "learning_rate": 3.896196951642172e-05,
      "loss": 0.8933,
      "step": 255670
    },
    {
      "epoch": 0.8960946563066237,
      "grad_norm": 2.0,
      "learning_rate": 3.8961320487758024e-05,
      "loss": 0.8569,
      "step": 255680
    },
    {
      "epoch": 0.8961297038135192,
      "grad_norm": 3.15625,
      "learning_rate": 3.896067145909432e-05,
      "loss": 0.9072,
      "step": 255690
    },
    {
      "epoch": 0.8961647513204148,
      "grad_norm": 2.53125,
      "learning_rate": 3.896002243043062e-05,
      "loss": 0.8011,
      "step": 255700
    },
    {
      "epoch": 0.8961997988273104,
      "grad_norm": 2.453125,
      "learning_rate": 3.895937340176692e-05,
      "loss": 0.902,
      "step": 255710
    },
    {
      "epoch": 0.896234846334206,
      "grad_norm": 2.78125,
      "learning_rate": 3.8958724373103216e-05,
      "loss": 0.9556,
      "step": 255720
    },
    {
      "epoch": 0.8962698938411016,
      "grad_norm": 3.109375,
      "learning_rate": 3.895807534443952e-05,
      "loss": 0.8464,
      "step": 255730
    },
    {
      "epoch": 0.8963049413479972,
      "grad_norm": 3.21875,
      "learning_rate": 3.895742631577581e-05,
      "loss": 0.859,
      "step": 255740
    },
    {
      "epoch": 0.8963399888548929,
      "grad_norm": 2.859375,
      "learning_rate": 3.8956777287112114e-05,
      "loss": 0.8715,
      "step": 255750
    },
    {
      "epoch": 0.8963750363617884,
      "grad_norm": 2.84375,
      "learning_rate": 3.895612825844841e-05,
      "loss": 0.9003,
      "step": 255760
    },
    {
      "epoch": 0.896410083868684,
      "grad_norm": 3.171875,
      "learning_rate": 3.895547922978471e-05,
      "loss": 0.9102,
      "step": 255770
    },
    {
      "epoch": 0.8964451313755796,
      "grad_norm": 3.546875,
      "learning_rate": 3.8954830201121004e-05,
      "loss": 0.8504,
      "step": 255780
    },
    {
      "epoch": 0.8964801788824752,
      "grad_norm": 3.15625,
      "learning_rate": 3.8954181172457306e-05,
      "loss": 0.9067,
      "step": 255790
    },
    {
      "epoch": 0.8965152263893708,
      "grad_norm": 2.984375,
      "learning_rate": 3.89535321437936e-05,
      "loss": 0.8755,
      "step": 255800
    },
    {
      "epoch": 0.8965502738962664,
      "grad_norm": 3.03125,
      "learning_rate": 3.89528831151299e-05,
      "loss": 0.8508,
      "step": 255810
    },
    {
      "epoch": 0.8965853214031619,
      "grad_norm": 2.71875,
      "learning_rate": 3.8952234086466196e-05,
      "loss": 0.8145,
      "step": 255820
    },
    {
      "epoch": 0.8966203689100576,
      "grad_norm": 3.0,
      "learning_rate": 3.895158505780249e-05,
      "loss": 0.88,
      "step": 255830
    },
    {
      "epoch": 0.8966554164169532,
      "grad_norm": 2.921875,
      "learning_rate": 3.895093602913879e-05,
      "loss": 0.8632,
      "step": 255840
    },
    {
      "epoch": 0.8966904639238488,
      "grad_norm": 2.96875,
      "learning_rate": 3.895028700047509e-05,
      "loss": 0.9225,
      "step": 255850
    },
    {
      "epoch": 0.8967255114307444,
      "grad_norm": 2.84375,
      "learning_rate": 3.894963797181139e-05,
      "loss": 0.8979,
      "step": 255860
    },
    {
      "epoch": 0.89676055893764,
      "grad_norm": 3.09375,
      "learning_rate": 3.894898894314768e-05,
      "loss": 0.844,
      "step": 255870
    },
    {
      "epoch": 0.8967956064445356,
      "grad_norm": 2.84375,
      "learning_rate": 3.8948339914483984e-05,
      "loss": 0.9402,
      "step": 255880
    },
    {
      "epoch": 0.8968306539514311,
      "grad_norm": 3.0625,
      "learning_rate": 3.894769088582028e-05,
      "loss": 0.8775,
      "step": 255890
    },
    {
      "epoch": 0.8968657014583268,
      "grad_norm": 2.96875,
      "learning_rate": 3.894704185715658e-05,
      "loss": 0.8964,
      "step": 255900
    },
    {
      "epoch": 0.8969007489652223,
      "grad_norm": 3.171875,
      "learning_rate": 3.8946392828492875e-05,
      "loss": 0.8772,
      "step": 255910
    },
    {
      "epoch": 0.896935796472118,
      "grad_norm": 3.078125,
      "learning_rate": 3.8945743799829176e-05,
      "loss": 0.9113,
      "step": 255920
    },
    {
      "epoch": 0.8969708439790135,
      "grad_norm": 2.875,
      "learning_rate": 3.894509477116548e-05,
      "loss": 0.9448,
      "step": 255930
    },
    {
      "epoch": 0.8970058914859091,
      "grad_norm": 2.8125,
      "learning_rate": 3.894444574250177e-05,
      "loss": 0.9127,
      "step": 255940
    },
    {
      "epoch": 0.8970409389928048,
      "grad_norm": 3.109375,
      "learning_rate": 3.8943796713838074e-05,
      "loss": 0.935,
      "step": 255950
    },
    {
      "epoch": 0.8970759864997003,
      "grad_norm": 2.78125,
      "learning_rate": 3.894314768517437e-05,
      "loss": 0.8848,
      "step": 255960
    },
    {
      "epoch": 0.897111034006596,
      "grad_norm": 2.9375,
      "learning_rate": 3.894249865651067e-05,
      "loss": 0.8452,
      "step": 255970
    },
    {
      "epoch": 0.8971460815134915,
      "grad_norm": 2.8125,
      "learning_rate": 3.8941849627846964e-05,
      "loss": 0.9295,
      "step": 255980
    },
    {
      "epoch": 0.8971811290203872,
      "grad_norm": 2.859375,
      "learning_rate": 3.8941200599183266e-05,
      "loss": 0.848,
      "step": 255990
    },
    {
      "epoch": 0.8972161765272827,
      "grad_norm": 3.3125,
      "learning_rate": 3.894055157051956e-05,
      "loss": 0.8884,
      "step": 256000
    },
    {
      "epoch": 0.8972512240341783,
      "grad_norm": 2.859375,
      "learning_rate": 3.893990254185586e-05,
      "loss": 0.8771,
      "step": 256010
    },
    {
      "epoch": 0.8972862715410739,
      "grad_norm": 2.75,
      "learning_rate": 3.8939253513192156e-05,
      "loss": 0.799,
      "step": 256020
    },
    {
      "epoch": 0.8973213190479695,
      "grad_norm": 3.109375,
      "learning_rate": 3.893860448452846e-05,
      "loss": 0.959,
      "step": 256030
    },
    {
      "epoch": 0.8973563665548652,
      "grad_norm": 3.140625,
      "learning_rate": 3.893795545586475e-05,
      "loss": 0.8087,
      "step": 256040
    },
    {
      "epoch": 0.8973914140617607,
      "grad_norm": 3.1875,
      "learning_rate": 3.8937306427201054e-05,
      "loss": 0.8576,
      "step": 256050
    },
    {
      "epoch": 0.8974264615686564,
      "grad_norm": 2.59375,
      "learning_rate": 3.893665739853735e-05,
      "loss": 0.7938,
      "step": 256060
    },
    {
      "epoch": 0.8974615090755519,
      "grad_norm": 2.953125,
      "learning_rate": 3.893600836987365e-05,
      "loss": 0.9344,
      "step": 256070
    },
    {
      "epoch": 0.8974965565824475,
      "grad_norm": 2.75,
      "learning_rate": 3.893535934120995e-05,
      "loss": 0.8199,
      "step": 256080
    },
    {
      "epoch": 0.8975316040893431,
      "grad_norm": 2.75,
      "learning_rate": 3.8934710312546246e-05,
      "loss": 0.866,
      "step": 256090
    },
    {
      "epoch": 0.8975666515962387,
      "grad_norm": 3.125,
      "learning_rate": 3.893406128388255e-05,
      "loss": 0.8837,
      "step": 256100
    },
    {
      "epoch": 0.8976016991031343,
      "grad_norm": 3.328125,
      "learning_rate": 3.893341225521884e-05,
      "loss": 0.8935,
      "step": 256110
    },
    {
      "epoch": 0.8976367466100299,
      "grad_norm": 2.9375,
      "learning_rate": 3.893276322655514e-05,
      "loss": 0.8392,
      "step": 256120
    },
    {
      "epoch": 0.8976717941169255,
      "grad_norm": 2.671875,
      "learning_rate": 3.893211419789144e-05,
      "loss": 0.8175,
      "step": 256130
    },
    {
      "epoch": 0.8977068416238211,
      "grad_norm": 3.03125,
      "learning_rate": 3.893146516922774e-05,
      "loss": 0.9196,
      "step": 256140
    },
    {
      "epoch": 0.8977418891307167,
      "grad_norm": 2.875,
      "learning_rate": 3.8930816140564034e-05,
      "loss": 0.9144,
      "step": 256150
    },
    {
      "epoch": 0.8977769366376123,
      "grad_norm": 2.78125,
      "learning_rate": 3.8930167111900335e-05,
      "loss": 0.9159,
      "step": 256160
    },
    {
      "epoch": 0.8978119841445079,
      "grad_norm": 2.84375,
      "learning_rate": 3.892951808323663e-05,
      "loss": 0.9286,
      "step": 256170
    },
    {
      "epoch": 0.8978470316514034,
      "grad_norm": 3.046875,
      "learning_rate": 3.892886905457293e-05,
      "loss": 0.893,
      "step": 256180
    },
    {
      "epoch": 0.8978820791582991,
      "grad_norm": 3.296875,
      "learning_rate": 3.8928220025909226e-05,
      "loss": 0.8573,
      "step": 256190
    },
    {
      "epoch": 0.8979171266651946,
      "grad_norm": 2.96875,
      "learning_rate": 3.892757099724552e-05,
      "loss": 0.9082,
      "step": 256200
    },
    {
      "epoch": 0.8979521741720903,
      "grad_norm": 2.703125,
      "learning_rate": 3.892692196858182e-05,
      "loss": 0.8966,
      "step": 256210
    },
    {
      "epoch": 0.8979872216789858,
      "grad_norm": 2.796875,
      "learning_rate": 3.8926272939918116e-05,
      "loss": 0.8385,
      "step": 256220
    },
    {
      "epoch": 0.8980222691858815,
      "grad_norm": 3.234375,
      "learning_rate": 3.892562391125442e-05,
      "loss": 0.8811,
      "step": 256230
    },
    {
      "epoch": 0.8980573166927771,
      "grad_norm": 3.046875,
      "learning_rate": 3.892497488259071e-05,
      "loss": 0.9195,
      "step": 256240
    },
    {
      "epoch": 0.8980923641996726,
      "grad_norm": 2.765625,
      "learning_rate": 3.8924325853927014e-05,
      "loss": 0.836,
      "step": 256250
    },
    {
      "epoch": 0.8981274117065683,
      "grad_norm": 2.546875,
      "learning_rate": 3.892367682526331e-05,
      "loss": 0.8571,
      "step": 256260
    },
    {
      "epoch": 0.8981624592134638,
      "grad_norm": 2.84375,
      "learning_rate": 3.892302779659961e-05,
      "loss": 0.7862,
      "step": 256270
    },
    {
      "epoch": 0.8981975067203595,
      "grad_norm": 2.78125,
      "learning_rate": 3.8922378767935904e-05,
      "loss": 0.9333,
      "step": 256280
    },
    {
      "epoch": 0.898232554227255,
      "grad_norm": 2.828125,
      "learning_rate": 3.8921729739272206e-05,
      "loss": 0.9429,
      "step": 256290
    },
    {
      "epoch": 0.8982676017341507,
      "grad_norm": 2.78125,
      "learning_rate": 3.892108071060851e-05,
      "loss": 0.9184,
      "step": 256300
    },
    {
      "epoch": 0.8983026492410462,
      "grad_norm": 2.890625,
      "learning_rate": 3.89204316819448e-05,
      "loss": 0.9129,
      "step": 256310
    },
    {
      "epoch": 0.8983376967479418,
      "grad_norm": 3.140625,
      "learning_rate": 3.89197826532811e-05,
      "loss": 0.9723,
      "step": 256320
    },
    {
      "epoch": 0.8983727442548375,
      "grad_norm": 3.046875,
      "learning_rate": 3.89191336246174e-05,
      "loss": 0.871,
      "step": 256330
    },
    {
      "epoch": 0.898407791761733,
      "grad_norm": 2.90625,
      "learning_rate": 3.89184845959537e-05,
      "loss": 0.9066,
      "step": 256340
    },
    {
      "epoch": 0.8984428392686287,
      "grad_norm": 2.5625,
      "learning_rate": 3.8917835567289994e-05,
      "loss": 0.8381,
      "step": 256350
    },
    {
      "epoch": 0.8984778867755242,
      "grad_norm": 3.0,
      "learning_rate": 3.8917186538626295e-05,
      "loss": 0.845,
      "step": 256360
    },
    {
      "epoch": 0.8985129342824199,
      "grad_norm": 2.8125,
      "learning_rate": 3.891653750996259e-05,
      "loss": 0.8095,
      "step": 256370
    },
    {
      "epoch": 0.8985479817893154,
      "grad_norm": 2.921875,
      "learning_rate": 3.891588848129889e-05,
      "loss": 0.7656,
      "step": 256380
    },
    {
      "epoch": 0.898583029296211,
      "grad_norm": 2.953125,
      "learning_rate": 3.8915239452635186e-05,
      "loss": 0.9085,
      "step": 256390
    },
    {
      "epoch": 0.8986180768031066,
      "grad_norm": 2.734375,
      "learning_rate": 3.891459042397149e-05,
      "loss": 0.8803,
      "step": 256400
    },
    {
      "epoch": 0.8986531243100022,
      "grad_norm": 2.640625,
      "learning_rate": 3.891394139530778e-05,
      "loss": 0.8597,
      "step": 256410
    },
    {
      "epoch": 0.8986881718168978,
      "grad_norm": 3.4375,
      "learning_rate": 3.891329236664408e-05,
      "loss": 0.9327,
      "step": 256420
    },
    {
      "epoch": 0.8987232193237934,
      "grad_norm": 2.453125,
      "learning_rate": 3.891264333798038e-05,
      "loss": 0.9062,
      "step": 256430
    },
    {
      "epoch": 0.898758266830689,
      "grad_norm": 3.03125,
      "learning_rate": 3.891199430931668e-05,
      "loss": 0.9076,
      "step": 256440
    },
    {
      "epoch": 0.8987933143375846,
      "grad_norm": 2.6875,
      "learning_rate": 3.891134528065298e-05,
      "loss": 0.8863,
      "step": 256450
    },
    {
      "epoch": 0.8988283618444802,
      "grad_norm": 3.21875,
      "learning_rate": 3.8910696251989275e-05,
      "loss": 0.9324,
      "step": 256460
    },
    {
      "epoch": 0.8988634093513758,
      "grad_norm": 3.65625,
      "learning_rate": 3.891004722332558e-05,
      "loss": 0.8876,
      "step": 256470
    },
    {
      "epoch": 0.8988984568582714,
      "grad_norm": 2.953125,
      "learning_rate": 3.890939819466187e-05,
      "loss": 0.8536,
      "step": 256480
    },
    {
      "epoch": 0.898933504365167,
      "grad_norm": 2.453125,
      "learning_rate": 3.890874916599817e-05,
      "loss": 0.8186,
      "step": 256490
    },
    {
      "epoch": 0.8989685518720626,
      "grad_norm": 3.296875,
      "learning_rate": 3.890810013733447e-05,
      "loss": 0.908,
      "step": 256500
    },
    {
      "epoch": 0.8990035993789581,
      "grad_norm": 2.765625,
      "learning_rate": 3.890745110867077e-05,
      "loss": 0.8802,
      "step": 256510
    },
    {
      "epoch": 0.8990386468858538,
      "grad_norm": 2.984375,
      "learning_rate": 3.890680208000706e-05,
      "loss": 0.8944,
      "step": 256520
    },
    {
      "epoch": 0.8990736943927494,
      "grad_norm": 2.859375,
      "learning_rate": 3.8906153051343365e-05,
      "loss": 0.8351,
      "step": 256530
    },
    {
      "epoch": 0.899108741899645,
      "grad_norm": 3.046875,
      "learning_rate": 3.890550402267966e-05,
      "loss": 0.9591,
      "step": 256540
    },
    {
      "epoch": 0.8991437894065406,
      "grad_norm": 3.0,
      "learning_rate": 3.890485499401596e-05,
      "loss": 0.9109,
      "step": 256550
    },
    {
      "epoch": 0.8991788369134361,
      "grad_norm": 3.484375,
      "learning_rate": 3.8904205965352255e-05,
      "loss": 0.8043,
      "step": 256560
    },
    {
      "epoch": 0.8992138844203318,
      "grad_norm": 2.828125,
      "learning_rate": 3.890355693668855e-05,
      "loss": 0.8336,
      "step": 256570
    },
    {
      "epoch": 0.8992489319272273,
      "grad_norm": 2.921875,
      "learning_rate": 3.890290790802485e-05,
      "loss": 0.9293,
      "step": 256580
    },
    {
      "epoch": 0.899283979434123,
      "grad_norm": 4.78125,
      "learning_rate": 3.8902258879361146e-05,
      "loss": 0.8126,
      "step": 256590
    },
    {
      "epoch": 0.8993190269410185,
      "grad_norm": 4.625,
      "learning_rate": 3.890160985069745e-05,
      "loss": 0.9979,
      "step": 256600
    },
    {
      "epoch": 0.8993540744479142,
      "grad_norm": 3.0625,
      "learning_rate": 3.890096082203374e-05,
      "loss": 0.8819,
      "step": 256610
    },
    {
      "epoch": 0.8993891219548097,
      "grad_norm": 2.984375,
      "learning_rate": 3.890031179337004e-05,
      "loss": 0.9577,
      "step": 256620
    },
    {
      "epoch": 0.8994241694617053,
      "grad_norm": 3.265625,
      "learning_rate": 3.889966276470634e-05,
      "loss": 0.9049,
      "step": 256630
    },
    {
      "epoch": 0.899459216968601,
      "grad_norm": 2.796875,
      "learning_rate": 3.889901373604264e-05,
      "loss": 0.8845,
      "step": 256640
    },
    {
      "epoch": 0.8994942644754965,
      "grad_norm": 3.0625,
      "learning_rate": 3.8898364707378934e-05,
      "loss": 0.8632,
      "step": 256650
    },
    {
      "epoch": 0.8995293119823922,
      "grad_norm": 3.4375,
      "learning_rate": 3.8897715678715235e-05,
      "loss": 0.8409,
      "step": 256660
    },
    {
      "epoch": 0.8995643594892877,
      "grad_norm": 3.25,
      "learning_rate": 3.889706665005154e-05,
      "loss": 0.8906,
      "step": 256670
    },
    {
      "epoch": 0.8995994069961833,
      "grad_norm": 2.90625,
      "learning_rate": 3.889641762138783e-05,
      "loss": 0.8003,
      "step": 256680
    },
    {
      "epoch": 0.8996344545030789,
      "grad_norm": 2.875,
      "learning_rate": 3.889576859272413e-05,
      "loss": 0.9183,
      "step": 256690
    },
    {
      "epoch": 0.8996695020099745,
      "grad_norm": 2.671875,
      "learning_rate": 3.889511956406043e-05,
      "loss": 0.8829,
      "step": 256700
    },
    {
      "epoch": 0.8997045495168701,
      "grad_norm": 2.765625,
      "learning_rate": 3.889447053539673e-05,
      "loss": 0.9012,
      "step": 256710
    },
    {
      "epoch": 0.8997395970237657,
      "grad_norm": 3.3125,
      "learning_rate": 3.889382150673302e-05,
      "loss": 0.9317,
      "step": 256720
    },
    {
      "epoch": 0.8997746445306614,
      "grad_norm": 2.84375,
      "learning_rate": 3.8893172478069325e-05,
      "loss": 0.9466,
      "step": 256730
    },
    {
      "epoch": 0.8998096920375569,
      "grad_norm": 2.5625,
      "learning_rate": 3.889252344940562e-05,
      "loss": 0.8798,
      "step": 256740
    },
    {
      "epoch": 0.8998447395444525,
      "grad_norm": 2.4375,
      "learning_rate": 3.889187442074192e-05,
      "loss": 0.7933,
      "step": 256750
    },
    {
      "epoch": 0.8998797870513481,
      "grad_norm": 3.03125,
      "learning_rate": 3.8891225392078215e-05,
      "loss": 0.9115,
      "step": 256760
    },
    {
      "epoch": 0.8999148345582437,
      "grad_norm": 3.234375,
      "learning_rate": 3.889057636341452e-05,
      "loss": 0.9292,
      "step": 256770
    },
    {
      "epoch": 0.8999498820651393,
      "grad_norm": 2.390625,
      "learning_rate": 3.888992733475081e-05,
      "loss": 0.9022,
      "step": 256780
    },
    {
      "epoch": 0.8999849295720349,
      "grad_norm": 2.734375,
      "learning_rate": 3.888927830608711e-05,
      "loss": 0.8935,
      "step": 256790
    },
    {
      "epoch": 0.9000199770789304,
      "grad_norm": 3.234375,
      "learning_rate": 3.8888629277423414e-05,
      "loss": 1.0204,
      "step": 256800
    },
    {
      "epoch": 0.9000550245858261,
      "grad_norm": 3.171875,
      "learning_rate": 3.888798024875971e-05,
      "loss": 1.0137,
      "step": 256810
    },
    {
      "epoch": 0.9000900720927217,
      "grad_norm": 2.96875,
      "learning_rate": 3.888733122009601e-05,
      "loss": 0.8698,
      "step": 256820
    },
    {
      "epoch": 0.9001251195996173,
      "grad_norm": 2.796875,
      "learning_rate": 3.8886682191432305e-05,
      "loss": 0.8462,
      "step": 256830
    },
    {
      "epoch": 0.9001601671065129,
      "grad_norm": 3.28125,
      "learning_rate": 3.8886033162768606e-05,
      "loss": 0.8909,
      "step": 256840
    },
    {
      "epoch": 0.9001952146134085,
      "grad_norm": 2.921875,
      "learning_rate": 3.88853841341049e-05,
      "loss": 0.8927,
      "step": 256850
    },
    {
      "epoch": 0.9002302621203041,
      "grad_norm": 2.65625,
      "learning_rate": 3.88847351054412e-05,
      "loss": 0.8648,
      "step": 256860
    },
    {
      "epoch": 0.9002653096271996,
      "grad_norm": 2.703125,
      "learning_rate": 3.88840860767775e-05,
      "loss": 0.8905,
      "step": 256870
    },
    {
      "epoch": 0.9003003571340953,
      "grad_norm": 2.828125,
      "learning_rate": 3.88834370481138e-05,
      "loss": 0.8266,
      "step": 256880
    },
    {
      "epoch": 0.9003354046409908,
      "grad_norm": 3.203125,
      "learning_rate": 3.888278801945009e-05,
      "loss": 0.8904,
      "step": 256890
    },
    {
      "epoch": 0.9003704521478865,
      "grad_norm": 3.453125,
      "learning_rate": 3.8882138990786394e-05,
      "loss": 0.855,
      "step": 256900
    },
    {
      "epoch": 0.900405499654782,
      "grad_norm": 3.1875,
      "learning_rate": 3.888148996212269e-05,
      "loss": 0.9006,
      "step": 256910
    },
    {
      "epoch": 0.9004405471616777,
      "grad_norm": 2.78125,
      "learning_rate": 3.888084093345899e-05,
      "loss": 0.9111,
      "step": 256920
    },
    {
      "epoch": 0.9004755946685733,
      "grad_norm": 3.0625,
      "learning_rate": 3.8880191904795285e-05,
      "loss": 0.8797,
      "step": 256930
    },
    {
      "epoch": 0.9005106421754688,
      "grad_norm": 2.5625,
      "learning_rate": 3.8879542876131586e-05,
      "loss": 0.8022,
      "step": 256940
    },
    {
      "epoch": 0.9005456896823645,
      "grad_norm": 3.15625,
      "learning_rate": 3.887889384746788e-05,
      "loss": 0.878,
      "step": 256950
    },
    {
      "epoch": 0.90058073718926,
      "grad_norm": 3.109375,
      "learning_rate": 3.8878244818804175e-05,
      "loss": 0.8864,
      "step": 256960
    },
    {
      "epoch": 0.9006157846961557,
      "grad_norm": 2.609375,
      "learning_rate": 3.887759579014048e-05,
      "loss": 0.9201,
      "step": 256970
    },
    {
      "epoch": 0.9006508322030512,
      "grad_norm": 2.984375,
      "learning_rate": 3.887694676147677e-05,
      "loss": 0.824,
      "step": 256980
    },
    {
      "epoch": 0.9006858797099468,
      "grad_norm": 3.46875,
      "learning_rate": 3.887629773281307e-05,
      "loss": 0.8623,
      "step": 256990
    },
    {
      "epoch": 0.9007209272168424,
      "grad_norm": 2.609375,
      "learning_rate": 3.887564870414937e-05,
      "loss": 0.8822,
      "step": 257000
    },
    {
      "epoch": 0.900755974723738,
      "grad_norm": 2.78125,
      "learning_rate": 3.887499967548567e-05,
      "loss": 0.9349,
      "step": 257010
    },
    {
      "epoch": 0.9007910222306337,
      "grad_norm": 2.765625,
      "learning_rate": 3.887435064682196e-05,
      "loss": 0.8764,
      "step": 257020
    },
    {
      "epoch": 0.9008260697375292,
      "grad_norm": 3.015625,
      "learning_rate": 3.8873701618158265e-05,
      "loss": 0.8485,
      "step": 257030
    },
    {
      "epoch": 0.9008611172444249,
      "grad_norm": 2.6875,
      "learning_rate": 3.8873052589494566e-05,
      "loss": 0.8658,
      "step": 257040
    },
    {
      "epoch": 0.9008961647513204,
      "grad_norm": 2.953125,
      "learning_rate": 3.887240356083086e-05,
      "loss": 0.9321,
      "step": 257050
    },
    {
      "epoch": 0.900931212258216,
      "grad_norm": 3.34375,
      "learning_rate": 3.887175453216716e-05,
      "loss": 0.9342,
      "step": 257060
    },
    {
      "epoch": 0.9009662597651116,
      "grad_norm": 2.703125,
      "learning_rate": 3.887110550350346e-05,
      "loss": 0.842,
      "step": 257070
    },
    {
      "epoch": 0.9010013072720072,
      "grad_norm": 2.71875,
      "learning_rate": 3.887045647483976e-05,
      "loss": 0.8363,
      "step": 257080
    },
    {
      "epoch": 0.9010363547789028,
      "grad_norm": 2.9375,
      "learning_rate": 3.886980744617605e-05,
      "loss": 0.8564,
      "step": 257090
    },
    {
      "epoch": 0.9010714022857984,
      "grad_norm": 3.03125,
      "learning_rate": 3.8869158417512354e-05,
      "loss": 0.9069,
      "step": 257100
    },
    {
      "epoch": 0.9011064497926939,
      "grad_norm": 2.421875,
      "learning_rate": 3.886850938884865e-05,
      "loss": 0.8522,
      "step": 257110
    },
    {
      "epoch": 0.9011414972995896,
      "grad_norm": 2.6875,
      "learning_rate": 3.886786036018495e-05,
      "loss": 0.885,
      "step": 257120
    },
    {
      "epoch": 0.9011765448064852,
      "grad_norm": 2.96875,
      "learning_rate": 3.8867211331521245e-05,
      "loss": 0.9295,
      "step": 257130
    },
    {
      "epoch": 0.9012115923133808,
      "grad_norm": 3.109375,
      "learning_rate": 3.8866562302857546e-05,
      "loss": 1.0274,
      "step": 257140
    },
    {
      "epoch": 0.9012466398202764,
      "grad_norm": 2.734375,
      "learning_rate": 3.886591327419384e-05,
      "loss": 0.89,
      "step": 257150
    },
    {
      "epoch": 0.901281687327172,
      "grad_norm": 3.125,
      "learning_rate": 3.886526424553014e-05,
      "loss": 0.9225,
      "step": 257160
    },
    {
      "epoch": 0.9013167348340676,
      "grad_norm": 3.0625,
      "learning_rate": 3.8864615216866443e-05,
      "loss": 0.9293,
      "step": 257170
    },
    {
      "epoch": 0.9013517823409631,
      "grad_norm": 2.890625,
      "learning_rate": 3.886396618820274e-05,
      "loss": 0.9539,
      "step": 257180
    },
    {
      "epoch": 0.9013868298478588,
      "grad_norm": 2.609375,
      "learning_rate": 3.886331715953904e-05,
      "loss": 0.8713,
      "step": 257190
    },
    {
      "epoch": 0.9014218773547543,
      "grad_norm": 3.15625,
      "learning_rate": 3.8862668130875334e-05,
      "loss": 0.9415,
      "step": 257200
    },
    {
      "epoch": 0.90145692486165,
      "grad_norm": 2.765625,
      "learning_rate": 3.8862019102211635e-05,
      "loss": 0.8856,
      "step": 257210
    },
    {
      "epoch": 0.9014919723685456,
      "grad_norm": 3.078125,
      "learning_rate": 3.886137007354793e-05,
      "loss": 0.9574,
      "step": 257220
    },
    {
      "epoch": 0.9015270198754411,
      "grad_norm": 3.296875,
      "learning_rate": 3.886072104488423e-05,
      "loss": 0.8784,
      "step": 257230
    },
    {
      "epoch": 0.9015620673823368,
      "grad_norm": 2.84375,
      "learning_rate": 3.8860072016220526e-05,
      "loss": 0.7906,
      "step": 257240
    },
    {
      "epoch": 0.9015971148892323,
      "grad_norm": 3.328125,
      "learning_rate": 3.885942298755683e-05,
      "loss": 0.9577,
      "step": 257250
    },
    {
      "epoch": 0.901632162396128,
      "grad_norm": 2.765625,
      "learning_rate": 3.885877395889312e-05,
      "loss": 0.9061,
      "step": 257260
    },
    {
      "epoch": 0.9016672099030235,
      "grad_norm": 2.90625,
      "learning_rate": 3.8858124930229423e-05,
      "loss": 0.8547,
      "step": 257270
    },
    {
      "epoch": 0.9017022574099192,
      "grad_norm": 2.65625,
      "learning_rate": 3.885747590156572e-05,
      "loss": 0.8903,
      "step": 257280
    },
    {
      "epoch": 0.9017373049168147,
      "grad_norm": 2.734375,
      "learning_rate": 3.885682687290202e-05,
      "loss": 0.8815,
      "step": 257290
    },
    {
      "epoch": 0.9017723524237103,
      "grad_norm": 2.859375,
      "learning_rate": 3.8856177844238314e-05,
      "loss": 0.9624,
      "step": 257300
    },
    {
      "epoch": 0.901807399930606,
      "grad_norm": 3.390625,
      "learning_rate": 3.8855528815574615e-05,
      "loss": 0.8129,
      "step": 257310
    },
    {
      "epoch": 0.9018424474375015,
      "grad_norm": 2.96875,
      "learning_rate": 3.885487978691091e-05,
      "loss": 0.9509,
      "step": 257320
    },
    {
      "epoch": 0.9018774949443972,
      "grad_norm": 2.625,
      "learning_rate": 3.8854230758247205e-05,
      "loss": 0.8038,
      "step": 257330
    },
    {
      "epoch": 0.9019125424512927,
      "grad_norm": 2.953125,
      "learning_rate": 3.8853581729583506e-05,
      "loss": 0.8072,
      "step": 257340
    },
    {
      "epoch": 0.9019475899581884,
      "grad_norm": 2.359375,
      "learning_rate": 3.88529327009198e-05,
      "loss": 0.8471,
      "step": 257350
    },
    {
      "epoch": 0.9019826374650839,
      "grad_norm": 2.921875,
      "learning_rate": 3.88522836722561e-05,
      "loss": 0.8255,
      "step": 257360
    },
    {
      "epoch": 0.9020176849719795,
      "grad_norm": 2.90625,
      "learning_rate": 3.88516346435924e-05,
      "loss": 0.8103,
      "step": 257370
    },
    {
      "epoch": 0.9020527324788751,
      "grad_norm": 3.0625,
      "learning_rate": 3.88509856149287e-05,
      "loss": 0.8694,
      "step": 257380
    },
    {
      "epoch": 0.9020877799857707,
      "grad_norm": 2.65625,
      "learning_rate": 3.885033658626499e-05,
      "loss": 0.8768,
      "step": 257390
    },
    {
      "epoch": 0.9021228274926663,
      "grad_norm": 2.625,
      "learning_rate": 3.8849687557601294e-05,
      "loss": 0.9054,
      "step": 257400
    },
    {
      "epoch": 0.9021578749995619,
      "grad_norm": 2.375,
      "learning_rate": 3.8849038528937595e-05,
      "loss": 0.8359,
      "step": 257410
    },
    {
      "epoch": 0.9021929225064576,
      "grad_norm": 2.765625,
      "learning_rate": 3.884838950027389e-05,
      "loss": 0.8447,
      "step": 257420
    },
    {
      "epoch": 0.9022279700133531,
      "grad_norm": 2.828125,
      "learning_rate": 3.884774047161019e-05,
      "loss": 0.8875,
      "step": 257430
    },
    {
      "epoch": 0.9022630175202487,
      "grad_norm": 3.015625,
      "learning_rate": 3.8847091442946486e-05,
      "loss": 0.8619,
      "step": 257440
    },
    {
      "epoch": 0.9022980650271443,
      "grad_norm": 2.703125,
      "learning_rate": 3.884644241428279e-05,
      "loss": 0.7929,
      "step": 257450
    },
    {
      "epoch": 0.9023331125340399,
      "grad_norm": 3.015625,
      "learning_rate": 3.884579338561908e-05,
      "loss": 0.9773,
      "step": 257460
    },
    {
      "epoch": 0.9023681600409355,
      "grad_norm": 2.546875,
      "learning_rate": 3.8845144356955383e-05,
      "loss": 0.9392,
      "step": 257470
    },
    {
      "epoch": 0.9024032075478311,
      "grad_norm": 3.359375,
      "learning_rate": 3.884449532829168e-05,
      "loss": 0.8598,
      "step": 257480
    },
    {
      "epoch": 0.9024382550547266,
      "grad_norm": 2.671875,
      "learning_rate": 3.884384629962798e-05,
      "loss": 0.9067,
      "step": 257490
    },
    {
      "epoch": 0.9024733025616223,
      "grad_norm": 2.921875,
      "learning_rate": 3.8843197270964274e-05,
      "loss": 0.9011,
      "step": 257500
    },
    {
      "epoch": 0.9025083500685179,
      "grad_norm": 3.0,
      "learning_rate": 3.8842548242300575e-05,
      "loss": 0.883,
      "step": 257510
    },
    {
      "epoch": 0.9025433975754135,
      "grad_norm": 2.875,
      "learning_rate": 3.884189921363687e-05,
      "loss": 0.888,
      "step": 257520
    },
    {
      "epoch": 0.9025784450823091,
      "grad_norm": 3.0,
      "learning_rate": 3.884125018497317e-05,
      "loss": 0.8768,
      "step": 257530
    },
    {
      "epoch": 0.9026134925892046,
      "grad_norm": 2.9375,
      "learning_rate": 3.884060115630947e-05,
      "loss": 0.7847,
      "step": 257540
    },
    {
      "epoch": 0.9026485400961003,
      "grad_norm": 3.0625,
      "learning_rate": 3.883995212764577e-05,
      "loss": 0.8664,
      "step": 257550
    },
    {
      "epoch": 0.9026835876029958,
      "grad_norm": 2.828125,
      "learning_rate": 3.883930309898207e-05,
      "loss": 0.8686,
      "step": 257560
    },
    {
      "epoch": 0.9027186351098915,
      "grad_norm": 2.75,
      "learning_rate": 3.8838654070318363e-05,
      "loss": 0.873,
      "step": 257570
    },
    {
      "epoch": 0.902753682616787,
      "grad_norm": 3.09375,
      "learning_rate": 3.8838005041654665e-05,
      "loss": 0.8537,
      "step": 257580
    },
    {
      "epoch": 0.9027887301236827,
      "grad_norm": 3.5625,
      "learning_rate": 3.883735601299096e-05,
      "loss": 0.92,
      "step": 257590
    },
    {
      "epoch": 0.9028237776305782,
      "grad_norm": 3.09375,
      "learning_rate": 3.883670698432726e-05,
      "loss": 0.9586,
      "step": 257600
    },
    {
      "epoch": 0.9028588251374738,
      "grad_norm": 2.515625,
      "learning_rate": 3.8836057955663555e-05,
      "loss": 0.9357,
      "step": 257610
    },
    {
      "epoch": 0.9028938726443695,
      "grad_norm": 3.203125,
      "learning_rate": 3.883540892699986e-05,
      "loss": 0.8819,
      "step": 257620
    },
    {
      "epoch": 0.902928920151265,
      "grad_norm": 2.890625,
      "learning_rate": 3.883475989833615e-05,
      "loss": 0.8693,
      "step": 257630
    },
    {
      "epoch": 0.9029639676581607,
      "grad_norm": 2.609375,
      "learning_rate": 3.883411086967245e-05,
      "loss": 0.8311,
      "step": 257640
    },
    {
      "epoch": 0.9029990151650562,
      "grad_norm": 2.734375,
      "learning_rate": 3.883346184100875e-05,
      "loss": 0.8843,
      "step": 257650
    },
    {
      "epoch": 0.9030340626719519,
      "grad_norm": 2.84375,
      "learning_rate": 3.883281281234505e-05,
      "loss": 0.8516,
      "step": 257660
    },
    {
      "epoch": 0.9030691101788474,
      "grad_norm": 2.96875,
      "learning_rate": 3.8832163783681343e-05,
      "loss": 0.8809,
      "step": 257670
    },
    {
      "epoch": 0.903104157685743,
      "grad_norm": 3.6875,
      "learning_rate": 3.8831514755017645e-05,
      "loss": 0.9808,
      "step": 257680
    },
    {
      "epoch": 0.9031392051926386,
      "grad_norm": 3.25,
      "learning_rate": 3.8830865726353946e-05,
      "loss": 0.8577,
      "step": 257690
    },
    {
      "epoch": 0.9031742526995342,
      "grad_norm": 3.34375,
      "learning_rate": 3.8830216697690234e-05,
      "loss": 0.9379,
      "step": 257700
    },
    {
      "epoch": 0.9032093002064299,
      "grad_norm": 3.125,
      "learning_rate": 3.8829567669026535e-05,
      "loss": 0.9589,
      "step": 257710
    },
    {
      "epoch": 0.9032443477133254,
      "grad_norm": 2.828125,
      "learning_rate": 3.882891864036283e-05,
      "loss": 0.8298,
      "step": 257720
    },
    {
      "epoch": 0.903279395220221,
      "grad_norm": 2.78125,
      "learning_rate": 3.882826961169913e-05,
      "loss": 0.8569,
      "step": 257730
    },
    {
      "epoch": 0.9033144427271166,
      "grad_norm": 2.609375,
      "learning_rate": 3.8827620583035426e-05,
      "loss": 0.7821,
      "step": 257740
    },
    {
      "epoch": 0.9033494902340122,
      "grad_norm": 3.109375,
      "learning_rate": 3.882697155437173e-05,
      "loss": 0.8866,
      "step": 257750
    },
    {
      "epoch": 0.9033845377409078,
      "grad_norm": 2.921875,
      "learning_rate": 3.882632252570803e-05,
      "loss": 0.8159,
      "step": 257760
    },
    {
      "epoch": 0.9034195852478034,
      "grad_norm": 3.390625,
      "learning_rate": 3.8825673497044323e-05,
      "loss": 0.9398,
      "step": 257770
    },
    {
      "epoch": 0.903454632754699,
      "grad_norm": 2.984375,
      "learning_rate": 3.8825024468380625e-05,
      "loss": 0.9186,
      "step": 257780
    },
    {
      "epoch": 0.9034896802615946,
      "grad_norm": 2.75,
      "learning_rate": 3.882437543971692e-05,
      "loss": 0.8066,
      "step": 257790
    },
    {
      "epoch": 0.9035247277684902,
      "grad_norm": 3.203125,
      "learning_rate": 3.882372641105322e-05,
      "loss": 0.8932,
      "step": 257800
    },
    {
      "epoch": 0.9035597752753858,
      "grad_norm": 3.171875,
      "learning_rate": 3.8823077382389515e-05,
      "loss": 0.8583,
      "step": 257810
    },
    {
      "epoch": 0.9035948227822814,
      "grad_norm": 2.625,
      "learning_rate": 3.882242835372582e-05,
      "loss": 0.8614,
      "step": 257820
    },
    {
      "epoch": 0.903629870289177,
      "grad_norm": 2.6875,
      "learning_rate": 3.882177932506211e-05,
      "loss": 0.871,
      "step": 257830
    },
    {
      "epoch": 0.9036649177960726,
      "grad_norm": 2.765625,
      "learning_rate": 3.882113029639841e-05,
      "loss": 0.8446,
      "step": 257840
    },
    {
      "epoch": 0.9036999653029681,
      "grad_norm": 2.84375,
      "learning_rate": 3.882048126773471e-05,
      "loss": 0.9088,
      "step": 257850
    },
    {
      "epoch": 0.9037350128098638,
      "grad_norm": 3.1875,
      "learning_rate": 3.881983223907101e-05,
      "loss": 0.9143,
      "step": 257860
    },
    {
      "epoch": 0.9037700603167593,
      "grad_norm": 2.75,
      "learning_rate": 3.8819183210407303e-05,
      "loss": 0.9095,
      "step": 257870
    },
    {
      "epoch": 0.903805107823655,
      "grad_norm": 2.5,
      "learning_rate": 3.8818534181743605e-05,
      "loss": 0.8872,
      "step": 257880
    },
    {
      "epoch": 0.9038401553305505,
      "grad_norm": 3.0625,
      "learning_rate": 3.88178851530799e-05,
      "loss": 0.9296,
      "step": 257890
    },
    {
      "epoch": 0.9038752028374462,
      "grad_norm": 2.53125,
      "learning_rate": 3.88172361244162e-05,
      "loss": 0.8192,
      "step": 257900
    },
    {
      "epoch": 0.9039102503443418,
      "grad_norm": 3.015625,
      "learning_rate": 3.88165870957525e-05,
      "loss": 0.9688,
      "step": 257910
    },
    {
      "epoch": 0.9039452978512373,
      "grad_norm": 3.15625,
      "learning_rate": 3.88159380670888e-05,
      "loss": 0.8753,
      "step": 257920
    },
    {
      "epoch": 0.903980345358133,
      "grad_norm": 2.921875,
      "learning_rate": 3.88152890384251e-05,
      "loss": 0.9501,
      "step": 257930
    },
    {
      "epoch": 0.9040153928650285,
      "grad_norm": 2.8125,
      "learning_rate": 3.881464000976139e-05,
      "loss": 0.8842,
      "step": 257940
    },
    {
      "epoch": 0.9040504403719242,
      "grad_norm": 2.859375,
      "learning_rate": 3.8813990981097694e-05,
      "loss": 0.8869,
      "step": 257950
    },
    {
      "epoch": 0.9040854878788197,
      "grad_norm": 3.09375,
      "learning_rate": 3.881334195243399e-05,
      "loss": 0.9224,
      "step": 257960
    },
    {
      "epoch": 0.9041205353857154,
      "grad_norm": 2.796875,
      "learning_rate": 3.881269292377029e-05,
      "loss": 0.8565,
      "step": 257970
    },
    {
      "epoch": 0.9041555828926109,
      "grad_norm": 3.140625,
      "learning_rate": 3.8812043895106585e-05,
      "loss": 0.9495,
      "step": 257980
    },
    {
      "epoch": 0.9041906303995065,
      "grad_norm": 3.015625,
      "learning_rate": 3.8811394866442886e-05,
      "loss": 0.9028,
      "step": 257990
    },
    {
      "epoch": 0.9042256779064022,
      "grad_norm": 2.65625,
      "learning_rate": 3.881074583777918e-05,
      "loss": 0.8839,
      "step": 258000
    },
    {
      "epoch": 0.9042607254132977,
      "grad_norm": 3.234375,
      "learning_rate": 3.881009680911548e-05,
      "loss": 0.8718,
      "step": 258010
    },
    {
      "epoch": 0.9042957729201934,
      "grad_norm": 2.953125,
      "learning_rate": 3.880944778045178e-05,
      "loss": 0.9996,
      "step": 258020
    },
    {
      "epoch": 0.9043308204270889,
      "grad_norm": 3.234375,
      "learning_rate": 3.880879875178808e-05,
      "loss": 0.9092,
      "step": 258030
    },
    {
      "epoch": 0.9043658679339845,
      "grad_norm": 3.078125,
      "learning_rate": 3.880814972312438e-05,
      "loss": 0.7671,
      "step": 258040
    },
    {
      "epoch": 0.9044009154408801,
      "grad_norm": 2.625,
      "learning_rate": 3.8807500694460674e-05,
      "loss": 0.8429,
      "step": 258050
    },
    {
      "epoch": 0.9044359629477757,
      "grad_norm": 2.578125,
      "learning_rate": 3.8806851665796976e-05,
      "loss": 0.9124,
      "step": 258060
    },
    {
      "epoch": 0.9044710104546713,
      "grad_norm": 3.09375,
      "learning_rate": 3.880620263713327e-05,
      "loss": 0.8859,
      "step": 258070
    },
    {
      "epoch": 0.9045060579615669,
      "grad_norm": 2.75,
      "learning_rate": 3.8805553608469565e-05,
      "loss": 0.8773,
      "step": 258080
    },
    {
      "epoch": 0.9045411054684624,
      "grad_norm": 3.09375,
      "learning_rate": 3.880490457980586e-05,
      "loss": 0.8623,
      "step": 258090
    },
    {
      "epoch": 0.9045761529753581,
      "grad_norm": 2.90625,
      "learning_rate": 3.880425555114216e-05,
      "loss": 0.861,
      "step": 258100
    },
    {
      "epoch": 0.9046112004822537,
      "grad_norm": 2.890625,
      "learning_rate": 3.8803606522478455e-05,
      "loss": 0.9243,
      "step": 258110
    },
    {
      "epoch": 0.9046462479891493,
      "grad_norm": 2.875,
      "learning_rate": 3.880295749381476e-05,
      "loss": 0.8241,
      "step": 258120
    },
    {
      "epoch": 0.9046812954960449,
      "grad_norm": 2.65625,
      "learning_rate": 3.880230846515106e-05,
      "loss": 0.817,
      "step": 258130
    },
    {
      "epoch": 0.9047163430029405,
      "grad_norm": 2.578125,
      "learning_rate": 3.880165943648735e-05,
      "loss": 0.8737,
      "step": 258140
    },
    {
      "epoch": 0.9047513905098361,
      "grad_norm": 2.984375,
      "learning_rate": 3.8801010407823654e-05,
      "loss": 0.8941,
      "step": 258150
    },
    {
      "epoch": 0.9047864380167316,
      "grad_norm": 2.59375,
      "learning_rate": 3.880036137915995e-05,
      "loss": 0.9,
      "step": 258160
    },
    {
      "epoch": 0.9048214855236273,
      "grad_norm": 2.921875,
      "learning_rate": 3.879971235049625e-05,
      "loss": 0.9101,
      "step": 258170
    },
    {
      "epoch": 0.9048565330305228,
      "grad_norm": 2.65625,
      "learning_rate": 3.8799063321832545e-05,
      "loss": 0.8121,
      "step": 258180
    },
    {
      "epoch": 0.9048915805374185,
      "grad_norm": 3.109375,
      "learning_rate": 3.8798414293168846e-05,
      "loss": 0.9361,
      "step": 258190
    },
    {
      "epoch": 0.9049266280443141,
      "grad_norm": 3.046875,
      "learning_rate": 3.879776526450514e-05,
      "loss": 0.9037,
      "step": 258200
    },
    {
      "epoch": 0.9049616755512097,
      "grad_norm": 2.734375,
      "learning_rate": 3.879711623584144e-05,
      "loss": 0.9159,
      "step": 258210
    },
    {
      "epoch": 0.9049967230581053,
      "grad_norm": 2.90625,
      "learning_rate": 3.879646720717774e-05,
      "loss": 0.8657,
      "step": 258220
    },
    {
      "epoch": 0.9050317705650008,
      "grad_norm": 2.859375,
      "learning_rate": 3.879581817851404e-05,
      "loss": 0.8353,
      "step": 258230
    },
    {
      "epoch": 0.9050668180718965,
      "grad_norm": 2.875,
      "learning_rate": 3.879516914985033e-05,
      "loss": 0.8503,
      "step": 258240
    },
    {
      "epoch": 0.905101865578792,
      "grad_norm": 2.796875,
      "learning_rate": 3.8794520121186634e-05,
      "loss": 0.8179,
      "step": 258250
    },
    {
      "epoch": 0.9051369130856877,
      "grad_norm": 2.96875,
      "learning_rate": 3.879387109252293e-05,
      "loss": 0.9233,
      "step": 258260
    },
    {
      "epoch": 0.9051719605925832,
      "grad_norm": 2.46875,
      "learning_rate": 3.879322206385923e-05,
      "loss": 0.826,
      "step": 258270
    },
    {
      "epoch": 0.9052070080994788,
      "grad_norm": 2.484375,
      "learning_rate": 3.879257303519553e-05,
      "loss": 0.8718,
      "step": 258280
    },
    {
      "epoch": 0.9052420556063744,
      "grad_norm": 3.125,
      "learning_rate": 3.8791924006531826e-05,
      "loss": 0.901,
      "step": 258290
    },
    {
      "epoch": 0.90527710311327,
      "grad_norm": 2.75,
      "learning_rate": 3.879127497786813e-05,
      "loss": 0.8803,
      "step": 258300
    },
    {
      "epoch": 0.9053121506201657,
      "grad_norm": 3.140625,
      "learning_rate": 3.879062594920442e-05,
      "loss": 0.9075,
      "step": 258310
    },
    {
      "epoch": 0.9053471981270612,
      "grad_norm": 3.109375,
      "learning_rate": 3.8789976920540724e-05,
      "loss": 0.8663,
      "step": 258320
    },
    {
      "epoch": 0.9053822456339569,
      "grad_norm": 2.671875,
      "learning_rate": 3.878932789187702e-05,
      "loss": 0.8804,
      "step": 258330
    },
    {
      "epoch": 0.9054172931408524,
      "grad_norm": 2.78125,
      "learning_rate": 3.878867886321332e-05,
      "loss": 0.8491,
      "step": 258340
    },
    {
      "epoch": 0.905452340647748,
      "grad_norm": 2.796875,
      "learning_rate": 3.8788029834549614e-05,
      "loss": 0.901,
      "step": 258350
    },
    {
      "epoch": 0.9054873881546436,
      "grad_norm": 2.84375,
      "learning_rate": 3.8787380805885916e-05,
      "loss": 0.8115,
      "step": 258360
    },
    {
      "epoch": 0.9055224356615392,
      "grad_norm": 2.90625,
      "learning_rate": 3.878673177722221e-05,
      "loss": 0.845,
      "step": 258370
    },
    {
      "epoch": 0.9055574831684348,
      "grad_norm": 2.828125,
      "learning_rate": 3.878608274855851e-05,
      "loss": 0.8601,
      "step": 258380
    },
    {
      "epoch": 0.9055925306753304,
      "grad_norm": 3.078125,
      "learning_rate": 3.8785433719894806e-05,
      "loss": 0.8911,
      "step": 258390
    },
    {
      "epoch": 0.9056275781822261,
      "grad_norm": 3.171875,
      "learning_rate": 3.878478469123111e-05,
      "loss": 0.9262,
      "step": 258400
    },
    {
      "epoch": 0.9056626256891216,
      "grad_norm": 3.09375,
      "learning_rate": 3.878413566256741e-05,
      "loss": 0.9121,
      "step": 258410
    },
    {
      "epoch": 0.9056976731960172,
      "grad_norm": 2.703125,
      "learning_rate": 3.8783486633903704e-05,
      "loss": 0.885,
      "step": 258420
    },
    {
      "epoch": 0.9057327207029128,
      "grad_norm": 3.328125,
      "learning_rate": 3.8782837605240005e-05,
      "loss": 0.894,
      "step": 258430
    },
    {
      "epoch": 0.9057677682098084,
      "grad_norm": 2.65625,
      "learning_rate": 3.87821885765763e-05,
      "loss": 0.8745,
      "step": 258440
    },
    {
      "epoch": 0.905802815716704,
      "grad_norm": 3.0625,
      "learning_rate": 3.8781539547912594e-05,
      "loss": 0.8971,
      "step": 258450
    },
    {
      "epoch": 0.9058378632235996,
      "grad_norm": 2.71875,
      "learning_rate": 3.878089051924889e-05,
      "loss": 0.7909,
      "step": 258460
    },
    {
      "epoch": 0.9058729107304951,
      "grad_norm": 2.6875,
      "learning_rate": 3.878024149058519e-05,
      "loss": 0.8055,
      "step": 258470
    },
    {
      "epoch": 0.9059079582373908,
      "grad_norm": 3.0625,
      "learning_rate": 3.8779592461921485e-05,
      "loss": 0.9338,
      "step": 258480
    },
    {
      "epoch": 0.9059430057442864,
      "grad_norm": 2.734375,
      "learning_rate": 3.8778943433257786e-05,
      "loss": 0.8163,
      "step": 258490
    },
    {
      "epoch": 0.905978053251182,
      "grad_norm": 2.59375,
      "learning_rate": 3.877829440459409e-05,
      "loss": 0.8161,
      "step": 258500
    },
    {
      "epoch": 0.9060131007580776,
      "grad_norm": 2.578125,
      "learning_rate": 3.877764537593038e-05,
      "loss": 0.8336,
      "step": 258510
    },
    {
      "epoch": 0.9060481482649732,
      "grad_norm": 2.828125,
      "learning_rate": 3.8776996347266684e-05,
      "loss": 0.8543,
      "step": 258520
    },
    {
      "epoch": 0.9060831957718688,
      "grad_norm": 3.078125,
      "learning_rate": 3.877634731860298e-05,
      "loss": 0.978,
      "step": 258530
    },
    {
      "epoch": 0.9061182432787643,
      "grad_norm": 3.21875,
      "learning_rate": 3.877569828993928e-05,
      "loss": 0.9194,
      "step": 258540
    },
    {
      "epoch": 0.90615329078566,
      "grad_norm": 3.4375,
      "learning_rate": 3.8775049261275574e-05,
      "loss": 0.8827,
      "step": 258550
    },
    {
      "epoch": 0.9061883382925555,
      "grad_norm": 3.1875,
      "learning_rate": 3.8774400232611876e-05,
      "loss": 0.8392,
      "step": 258560
    },
    {
      "epoch": 0.9062233857994512,
      "grad_norm": 2.734375,
      "learning_rate": 3.877375120394817e-05,
      "loss": 0.829,
      "step": 258570
    },
    {
      "epoch": 0.9062584333063467,
      "grad_norm": 2.703125,
      "learning_rate": 3.877310217528447e-05,
      "loss": 0.8586,
      "step": 258580
    },
    {
      "epoch": 0.9062934808132423,
      "grad_norm": 2.9375,
      "learning_rate": 3.8772453146620766e-05,
      "loss": 0.8928,
      "step": 258590
    },
    {
      "epoch": 0.906328528320138,
      "grad_norm": 2.9375,
      "learning_rate": 3.877180411795707e-05,
      "loss": 0.9615,
      "step": 258600
    },
    {
      "epoch": 0.9063635758270335,
      "grad_norm": 2.859375,
      "learning_rate": 3.877115508929336e-05,
      "loss": 0.8327,
      "step": 258610
    },
    {
      "epoch": 0.9063986233339292,
      "grad_norm": 2.84375,
      "learning_rate": 3.8770506060629664e-05,
      "loss": 0.9197,
      "step": 258620
    },
    {
      "epoch": 0.9064336708408247,
      "grad_norm": 3.125,
      "learning_rate": 3.876985703196596e-05,
      "loss": 0.9012,
      "step": 258630
    },
    {
      "epoch": 0.9064687183477204,
      "grad_norm": 3.171875,
      "learning_rate": 3.876920800330226e-05,
      "loss": 0.8379,
      "step": 258640
    },
    {
      "epoch": 0.9065037658546159,
      "grad_norm": 3.0,
      "learning_rate": 3.876855897463856e-05,
      "loss": 1.0374,
      "step": 258650
    },
    {
      "epoch": 0.9065388133615115,
      "grad_norm": 2.515625,
      "learning_rate": 3.8767909945974856e-05,
      "loss": 0.8848,
      "step": 258660
    },
    {
      "epoch": 0.9065738608684071,
      "grad_norm": 2.984375,
      "learning_rate": 3.876726091731116e-05,
      "loss": 0.8832,
      "step": 258670
    },
    {
      "epoch": 0.9066089083753027,
      "grad_norm": 3.03125,
      "learning_rate": 3.876661188864745e-05,
      "loss": 0.8358,
      "step": 258680
    },
    {
      "epoch": 0.9066439558821984,
      "grad_norm": 2.65625,
      "learning_rate": 3.876596285998375e-05,
      "loss": 0.7947,
      "step": 258690
    },
    {
      "epoch": 0.9066790033890939,
      "grad_norm": 3.03125,
      "learning_rate": 3.876531383132005e-05,
      "loss": 0.9572,
      "step": 258700
    },
    {
      "epoch": 0.9067140508959896,
      "grad_norm": 2.859375,
      "learning_rate": 3.876466480265635e-05,
      "loss": 0.7837,
      "step": 258710
    },
    {
      "epoch": 0.9067490984028851,
      "grad_norm": 3.046875,
      "learning_rate": 3.8764015773992644e-05,
      "loss": 0.8634,
      "step": 258720
    },
    {
      "epoch": 0.9067841459097807,
      "grad_norm": 3.046875,
      "learning_rate": 3.8763366745328945e-05,
      "loss": 0.9505,
      "step": 258730
    },
    {
      "epoch": 0.9068191934166763,
      "grad_norm": 3.1875,
      "learning_rate": 3.876271771666524e-05,
      "loss": 0.8691,
      "step": 258740
    },
    {
      "epoch": 0.9068542409235719,
      "grad_norm": 2.6875,
      "learning_rate": 3.876206868800154e-05,
      "loss": 0.8662,
      "step": 258750
    },
    {
      "epoch": 0.9068892884304675,
      "grad_norm": 2.921875,
      "learning_rate": 3.8761419659337836e-05,
      "loss": 0.8751,
      "step": 258760
    },
    {
      "epoch": 0.9069243359373631,
      "grad_norm": 3.234375,
      "learning_rate": 3.876077063067414e-05,
      "loss": 0.8969,
      "step": 258770
    },
    {
      "epoch": 0.9069593834442586,
      "grad_norm": 3.25,
      "learning_rate": 3.876012160201044e-05,
      "loss": 0.8786,
      "step": 258780
    },
    {
      "epoch": 0.9069944309511543,
      "grad_norm": 2.734375,
      "learning_rate": 3.875947257334673e-05,
      "loss": 0.8376,
      "step": 258790
    },
    {
      "epoch": 0.9070294784580499,
      "grad_norm": 2.484375,
      "learning_rate": 3.8758823544683034e-05,
      "loss": 0.8531,
      "step": 258800
    },
    {
      "epoch": 0.9070645259649455,
      "grad_norm": 2.671875,
      "learning_rate": 3.875817451601933e-05,
      "loss": 0.8279,
      "step": 258810
    },
    {
      "epoch": 0.9070995734718411,
      "grad_norm": 2.796875,
      "learning_rate": 3.875752548735563e-05,
      "loss": 0.8878,
      "step": 258820
    },
    {
      "epoch": 0.9071346209787366,
      "grad_norm": 3.296875,
      "learning_rate": 3.875687645869192e-05,
      "loss": 0.8361,
      "step": 258830
    },
    {
      "epoch": 0.9071696684856323,
      "grad_norm": 2.703125,
      "learning_rate": 3.875622743002822e-05,
      "loss": 0.8361,
      "step": 258840
    },
    {
      "epoch": 0.9072047159925278,
      "grad_norm": 3.03125,
      "learning_rate": 3.8755578401364514e-05,
      "loss": 0.8282,
      "step": 258850
    },
    {
      "epoch": 0.9072397634994235,
      "grad_norm": 2.765625,
      "learning_rate": 3.8754929372700816e-05,
      "loss": 0.972,
      "step": 258860
    },
    {
      "epoch": 0.907274811006319,
      "grad_norm": 2.859375,
      "learning_rate": 3.875428034403712e-05,
      "loss": 0.9037,
      "step": 258870
    },
    {
      "epoch": 0.9073098585132147,
      "grad_norm": 2.671875,
      "learning_rate": 3.875363131537341e-05,
      "loss": 0.8659,
      "step": 258880
    },
    {
      "epoch": 0.9073449060201103,
      "grad_norm": 2.59375,
      "learning_rate": 3.875298228670971e-05,
      "loss": 0.8461,
      "step": 258890
    },
    {
      "epoch": 0.9073799535270058,
      "grad_norm": 3.125,
      "learning_rate": 3.875233325804601e-05,
      "loss": 0.8304,
      "step": 258900
    },
    {
      "epoch": 0.9074150010339015,
      "grad_norm": 2.921875,
      "learning_rate": 3.875168422938231e-05,
      "loss": 0.9128,
      "step": 258910
    },
    {
      "epoch": 0.907450048540797,
      "grad_norm": 2.78125,
      "learning_rate": 3.8751035200718604e-05,
      "loss": 0.8538,
      "step": 258920
    },
    {
      "epoch": 0.9074850960476927,
      "grad_norm": 2.765625,
      "learning_rate": 3.8750386172054905e-05,
      "loss": 0.8276,
      "step": 258930
    },
    {
      "epoch": 0.9075201435545882,
      "grad_norm": 2.5625,
      "learning_rate": 3.87497371433912e-05,
      "loss": 0.8643,
      "step": 258940
    },
    {
      "epoch": 0.9075551910614839,
      "grad_norm": 2.75,
      "learning_rate": 3.87490881147275e-05,
      "loss": 0.844,
      "step": 258950
    },
    {
      "epoch": 0.9075902385683794,
      "grad_norm": 3.28125,
      "learning_rate": 3.8748439086063796e-05,
      "loss": 0.8763,
      "step": 258960
    },
    {
      "epoch": 0.907625286075275,
      "grad_norm": 2.765625,
      "learning_rate": 3.87477900574001e-05,
      "loss": 0.8642,
      "step": 258970
    },
    {
      "epoch": 0.9076603335821707,
      "grad_norm": 3.03125,
      "learning_rate": 3.874714102873639e-05,
      "loss": 0.8762,
      "step": 258980
    },
    {
      "epoch": 0.9076953810890662,
      "grad_norm": 2.96875,
      "learning_rate": 3.874649200007269e-05,
      "loss": 0.8894,
      "step": 258990
    },
    {
      "epoch": 0.9077304285959619,
      "grad_norm": 2.546875,
      "learning_rate": 3.8745842971408994e-05,
      "loss": 0.8339,
      "step": 259000
    },
    {
      "epoch": 0.9077654761028574,
      "grad_norm": 3.140625,
      "learning_rate": 3.874519394274529e-05,
      "loss": 0.8622,
      "step": 259010
    },
    {
      "epoch": 0.907800523609753,
      "grad_norm": 3.109375,
      "learning_rate": 3.874454491408159e-05,
      "loss": 0.8787,
      "step": 259020
    },
    {
      "epoch": 0.9078355711166486,
      "grad_norm": 2.796875,
      "learning_rate": 3.8743895885417885e-05,
      "loss": 0.9881,
      "step": 259030
    },
    {
      "epoch": 0.9078706186235442,
      "grad_norm": 2.921875,
      "learning_rate": 3.8743246856754186e-05,
      "loss": 0.9104,
      "step": 259040
    },
    {
      "epoch": 0.9079056661304398,
      "grad_norm": 2.765625,
      "learning_rate": 3.874259782809048e-05,
      "loss": 0.8865,
      "step": 259050
    },
    {
      "epoch": 0.9079407136373354,
      "grad_norm": 3.359375,
      "learning_rate": 3.874194879942678e-05,
      "loss": 0.8648,
      "step": 259060
    },
    {
      "epoch": 0.907975761144231,
      "grad_norm": 2.96875,
      "learning_rate": 3.874129977076308e-05,
      "loss": 0.8626,
      "step": 259070
    },
    {
      "epoch": 0.9080108086511266,
      "grad_norm": 2.828125,
      "learning_rate": 3.874065074209938e-05,
      "loss": 0.8617,
      "step": 259080
    },
    {
      "epoch": 0.9080458561580222,
      "grad_norm": 3.0625,
      "learning_rate": 3.874000171343567e-05,
      "loss": 0.9064,
      "step": 259090
    },
    {
      "epoch": 0.9080809036649178,
      "grad_norm": 3.171875,
      "learning_rate": 3.8739352684771974e-05,
      "loss": 0.9159,
      "step": 259100
    },
    {
      "epoch": 0.9081159511718134,
      "grad_norm": 2.515625,
      "learning_rate": 3.873870365610827e-05,
      "loss": 0.8661,
      "step": 259110
    },
    {
      "epoch": 0.908150998678709,
      "grad_norm": 2.609375,
      "learning_rate": 3.873805462744457e-05,
      "loss": 0.8266,
      "step": 259120
    },
    {
      "epoch": 0.9081860461856046,
      "grad_norm": 2.859375,
      "learning_rate": 3.8737405598780865e-05,
      "loss": 0.8771,
      "step": 259130
    },
    {
      "epoch": 0.9082210936925001,
      "grad_norm": 2.671875,
      "learning_rate": 3.8736756570117166e-05,
      "loss": 0.833,
      "step": 259140
    },
    {
      "epoch": 0.9082561411993958,
      "grad_norm": 2.515625,
      "learning_rate": 3.873610754145347e-05,
      "loss": 0.8578,
      "step": 259150
    },
    {
      "epoch": 0.9082911887062913,
      "grad_norm": 3.0,
      "learning_rate": 3.873545851278976e-05,
      "loss": 0.9614,
      "step": 259160
    },
    {
      "epoch": 0.908326236213187,
      "grad_norm": 3.0,
      "learning_rate": 3.8734809484126064e-05,
      "loss": 0.9117,
      "step": 259170
    },
    {
      "epoch": 0.9083612837200826,
      "grad_norm": 3.15625,
      "learning_rate": 3.873416045546236e-05,
      "loss": 0.8719,
      "step": 259180
    },
    {
      "epoch": 0.9083963312269782,
      "grad_norm": 2.578125,
      "learning_rate": 3.873351142679866e-05,
      "loss": 0.8282,
      "step": 259190
    },
    {
      "epoch": 0.9084313787338738,
      "grad_norm": 2.625,
      "learning_rate": 3.873286239813495e-05,
      "loss": 0.9173,
      "step": 259200
    },
    {
      "epoch": 0.9084664262407693,
      "grad_norm": 2.515625,
      "learning_rate": 3.873221336947125e-05,
      "loss": 0.7763,
      "step": 259210
    },
    {
      "epoch": 0.908501473747665,
      "grad_norm": 3.234375,
      "learning_rate": 3.8731564340807544e-05,
      "loss": 0.9611,
      "step": 259220
    },
    {
      "epoch": 0.9085365212545605,
      "grad_norm": 3.1875,
      "learning_rate": 3.8730915312143845e-05,
      "loss": 0.8024,
      "step": 259230
    },
    {
      "epoch": 0.9085715687614562,
      "grad_norm": 2.765625,
      "learning_rate": 3.8730266283480146e-05,
      "loss": 0.8015,
      "step": 259240
    },
    {
      "epoch": 0.9086066162683517,
      "grad_norm": 2.515625,
      "learning_rate": 3.872961725481644e-05,
      "loss": 0.8148,
      "step": 259250
    },
    {
      "epoch": 0.9086416637752474,
      "grad_norm": 3.109375,
      "learning_rate": 3.872896822615274e-05,
      "loss": 0.8862,
      "step": 259260
    },
    {
      "epoch": 0.9086767112821429,
      "grad_norm": 3.578125,
      "learning_rate": 3.872831919748904e-05,
      "loss": 0.9558,
      "step": 259270
    },
    {
      "epoch": 0.9087117587890385,
      "grad_norm": 3.15625,
      "learning_rate": 3.872767016882534e-05,
      "loss": 0.824,
      "step": 259280
    },
    {
      "epoch": 0.9087468062959342,
      "grad_norm": 3.234375,
      "learning_rate": 3.872702114016163e-05,
      "loss": 0.8808,
      "step": 259290
    },
    {
      "epoch": 0.9087818538028297,
      "grad_norm": 2.546875,
      "learning_rate": 3.8726372111497934e-05,
      "loss": 0.9031,
      "step": 259300
    },
    {
      "epoch": 0.9088169013097254,
      "grad_norm": 2.796875,
      "learning_rate": 3.872572308283423e-05,
      "loss": 0.8242,
      "step": 259310
    },
    {
      "epoch": 0.9088519488166209,
      "grad_norm": 3.265625,
      "learning_rate": 3.872507405417053e-05,
      "loss": 0.8396,
      "step": 259320
    },
    {
      "epoch": 0.9088869963235165,
      "grad_norm": 2.453125,
      "learning_rate": 3.8724425025506825e-05,
      "loss": 0.923,
      "step": 259330
    },
    {
      "epoch": 0.9089220438304121,
      "grad_norm": 3.171875,
      "learning_rate": 3.8723775996843126e-05,
      "loss": 0.832,
      "step": 259340
    },
    {
      "epoch": 0.9089570913373077,
      "grad_norm": 3.078125,
      "learning_rate": 3.872312696817942e-05,
      "loss": 0.8342,
      "step": 259350
    },
    {
      "epoch": 0.9089921388442033,
      "grad_norm": 3.0,
      "learning_rate": 3.872247793951572e-05,
      "loss": 0.9506,
      "step": 259360
    },
    {
      "epoch": 0.9090271863510989,
      "grad_norm": 3.0625,
      "learning_rate": 3.8721828910852024e-05,
      "loss": 0.8767,
      "step": 259370
    },
    {
      "epoch": 0.9090622338579946,
      "grad_norm": 2.65625,
      "learning_rate": 3.872117988218832e-05,
      "loss": 0.8438,
      "step": 259380
    },
    {
      "epoch": 0.9090972813648901,
      "grad_norm": 3.046875,
      "learning_rate": 3.872053085352462e-05,
      "loss": 0.9279,
      "step": 259390
    },
    {
      "epoch": 0.9091323288717857,
      "grad_norm": 3.15625,
      "learning_rate": 3.8719881824860914e-05,
      "loss": 0.9113,
      "step": 259400
    },
    {
      "epoch": 0.9091673763786813,
      "grad_norm": 3.1875,
      "learning_rate": 3.8719232796197216e-05,
      "loss": 0.9856,
      "step": 259410
    },
    {
      "epoch": 0.9092024238855769,
      "grad_norm": 3.40625,
      "learning_rate": 3.871858376753351e-05,
      "loss": 0.9194,
      "step": 259420
    },
    {
      "epoch": 0.9092374713924725,
      "grad_norm": 2.75,
      "learning_rate": 3.871793473886981e-05,
      "loss": 0.8547,
      "step": 259430
    },
    {
      "epoch": 0.9092725188993681,
      "grad_norm": 3.0,
      "learning_rate": 3.8717285710206106e-05,
      "loss": 0.9414,
      "step": 259440
    },
    {
      "epoch": 0.9093075664062636,
      "grad_norm": 2.984375,
      "learning_rate": 3.871663668154241e-05,
      "loss": 0.8943,
      "step": 259450
    },
    {
      "epoch": 0.9093426139131593,
      "grad_norm": 3.125,
      "learning_rate": 3.87159876528787e-05,
      "loss": 0.9186,
      "step": 259460
    },
    {
      "epoch": 0.9093776614200549,
      "grad_norm": 3.4375,
      "learning_rate": 3.8715338624215004e-05,
      "loss": 0.8465,
      "step": 259470
    },
    {
      "epoch": 0.9094127089269505,
      "grad_norm": 2.765625,
      "learning_rate": 3.87146895955513e-05,
      "loss": 0.8794,
      "step": 259480
    },
    {
      "epoch": 0.9094477564338461,
      "grad_norm": 2.875,
      "learning_rate": 3.87140405668876e-05,
      "loss": 0.8667,
      "step": 259490
    },
    {
      "epoch": 0.9094828039407417,
      "grad_norm": 2.53125,
      "learning_rate": 3.8713391538223894e-05,
      "loss": 0.8554,
      "step": 259500
    },
    {
      "epoch": 0.9095178514476373,
      "grad_norm": 2.9375,
      "learning_rate": 3.8712742509560196e-05,
      "loss": 0.8485,
      "step": 259510
    },
    {
      "epoch": 0.9095528989545328,
      "grad_norm": 2.828125,
      "learning_rate": 3.87120934808965e-05,
      "loss": 0.9104,
      "step": 259520
    },
    {
      "epoch": 0.9095879464614285,
      "grad_norm": 2.78125,
      "learning_rate": 3.871144445223279e-05,
      "loss": 0.8673,
      "step": 259530
    },
    {
      "epoch": 0.909622993968324,
      "grad_norm": 2.59375,
      "learning_rate": 3.871079542356909e-05,
      "loss": 0.8884,
      "step": 259540
    },
    {
      "epoch": 0.9096580414752197,
      "grad_norm": 3.34375,
      "learning_rate": 3.871014639490539e-05,
      "loss": 0.8948,
      "step": 259550
    },
    {
      "epoch": 0.9096930889821152,
      "grad_norm": 2.671875,
      "learning_rate": 3.870949736624169e-05,
      "loss": 0.8327,
      "step": 259560
    },
    {
      "epoch": 0.9097281364890109,
      "grad_norm": 2.96875,
      "learning_rate": 3.8708848337577984e-05,
      "loss": 0.8425,
      "step": 259570
    },
    {
      "epoch": 0.9097631839959065,
      "grad_norm": 3.265625,
      "learning_rate": 3.870819930891428e-05,
      "loss": 0.8796,
      "step": 259580
    },
    {
      "epoch": 0.909798231502802,
      "grad_norm": 2.859375,
      "learning_rate": 3.870755028025057e-05,
      "loss": 0.9531,
      "step": 259590
    },
    {
      "epoch": 0.9098332790096977,
      "grad_norm": 2.734375,
      "learning_rate": 3.8706901251586874e-05,
      "loss": 0.8025,
      "step": 259600
    },
    {
      "epoch": 0.9098683265165932,
      "grad_norm": 2.90625,
      "learning_rate": 3.8706252222923176e-05,
      "loss": 0.8555,
      "step": 259610
    },
    {
      "epoch": 0.9099033740234889,
      "grad_norm": 3.21875,
      "learning_rate": 3.870560319425947e-05,
      "loss": 0.8893,
      "step": 259620
    },
    {
      "epoch": 0.9099384215303844,
      "grad_norm": 3.0625,
      "learning_rate": 3.870495416559577e-05,
      "loss": 0.9329,
      "step": 259630
    },
    {
      "epoch": 0.90997346903728,
      "grad_norm": 3.0625,
      "learning_rate": 3.8704305136932066e-05,
      "loss": 0.9492,
      "step": 259640
    },
    {
      "epoch": 0.9100085165441756,
      "grad_norm": 2.828125,
      "learning_rate": 3.870365610826837e-05,
      "loss": 0.9096,
      "step": 259650
    },
    {
      "epoch": 0.9100435640510712,
      "grad_norm": 2.71875,
      "learning_rate": 3.870300707960466e-05,
      "loss": 0.858,
      "step": 259660
    },
    {
      "epoch": 0.9100786115579669,
      "grad_norm": 2.96875,
      "learning_rate": 3.8702358050940964e-05,
      "loss": 0.9041,
      "step": 259670
    },
    {
      "epoch": 0.9101136590648624,
      "grad_norm": 3.734375,
      "learning_rate": 3.870170902227726e-05,
      "loss": 0.8425,
      "step": 259680
    },
    {
      "epoch": 0.9101487065717581,
      "grad_norm": 3.171875,
      "learning_rate": 3.870105999361356e-05,
      "loss": 0.8448,
      "step": 259690
    },
    {
      "epoch": 0.9101837540786536,
      "grad_norm": 3.03125,
      "learning_rate": 3.8700410964949854e-05,
      "loss": 0.8879,
      "step": 259700
    },
    {
      "epoch": 0.9102188015855492,
      "grad_norm": 2.953125,
      "learning_rate": 3.8699761936286156e-05,
      "loss": 0.8117,
      "step": 259710
    },
    {
      "epoch": 0.9102538490924448,
      "grad_norm": 3.453125,
      "learning_rate": 3.869911290762245e-05,
      "loss": 0.8976,
      "step": 259720
    },
    {
      "epoch": 0.9102888965993404,
      "grad_norm": 2.71875,
      "learning_rate": 3.869846387895875e-05,
      "loss": 0.9215,
      "step": 259730
    },
    {
      "epoch": 0.910323944106236,
      "grad_norm": 2.8125,
      "learning_rate": 3.869781485029505e-05,
      "loss": 0.8693,
      "step": 259740
    },
    {
      "epoch": 0.9103589916131316,
      "grad_norm": 2.8125,
      "learning_rate": 3.869716582163135e-05,
      "loss": 0.8878,
      "step": 259750
    },
    {
      "epoch": 0.9103940391200271,
      "grad_norm": 2.734375,
      "learning_rate": 3.869651679296765e-05,
      "loss": 0.8874,
      "step": 259760
    },
    {
      "epoch": 0.9104290866269228,
      "grad_norm": 2.8125,
      "learning_rate": 3.8695867764303944e-05,
      "loss": 0.8828,
      "step": 259770
    },
    {
      "epoch": 0.9104641341338184,
      "grad_norm": 3.078125,
      "learning_rate": 3.8695218735640245e-05,
      "loss": 0.8679,
      "step": 259780
    },
    {
      "epoch": 0.910499181640714,
      "grad_norm": 3.046875,
      "learning_rate": 3.869456970697654e-05,
      "loss": 0.8849,
      "step": 259790
    },
    {
      "epoch": 0.9105342291476096,
      "grad_norm": 2.8125,
      "learning_rate": 3.869392067831284e-05,
      "loss": 0.7539,
      "step": 259800
    },
    {
      "epoch": 0.9105692766545052,
      "grad_norm": 2.859375,
      "learning_rate": 3.8693271649649136e-05,
      "loss": 0.929,
      "step": 259810
    },
    {
      "epoch": 0.9106043241614008,
      "grad_norm": 2.59375,
      "learning_rate": 3.869262262098544e-05,
      "loss": 0.9132,
      "step": 259820
    },
    {
      "epoch": 0.9106393716682963,
      "grad_norm": 2.828125,
      "learning_rate": 3.869197359232173e-05,
      "loss": 0.7992,
      "step": 259830
    },
    {
      "epoch": 0.910674419175192,
      "grad_norm": 3.3125,
      "learning_rate": 3.869132456365803e-05,
      "loss": 0.9576,
      "step": 259840
    },
    {
      "epoch": 0.9107094666820875,
      "grad_norm": 3.046875,
      "learning_rate": 3.869067553499433e-05,
      "loss": 0.9396,
      "step": 259850
    },
    {
      "epoch": 0.9107445141889832,
      "grad_norm": 3.171875,
      "learning_rate": 3.869002650633063e-05,
      "loss": 0.8839,
      "step": 259860
    },
    {
      "epoch": 0.9107795616958788,
      "grad_norm": 3.09375,
      "learning_rate": 3.868937747766693e-05,
      "loss": 0.798,
      "step": 259870
    },
    {
      "epoch": 0.9108146092027744,
      "grad_norm": 2.6875,
      "learning_rate": 3.8688728449003225e-05,
      "loss": 0.8951,
      "step": 259880
    },
    {
      "epoch": 0.91084965670967,
      "grad_norm": 3.09375,
      "learning_rate": 3.8688079420339526e-05,
      "loss": 0.9019,
      "step": 259890
    },
    {
      "epoch": 0.9108847042165655,
      "grad_norm": 2.65625,
      "learning_rate": 3.868743039167582e-05,
      "loss": 0.8815,
      "step": 259900
    },
    {
      "epoch": 0.9109197517234612,
      "grad_norm": 3.1875,
      "learning_rate": 3.868678136301212e-05,
      "loss": 0.8678,
      "step": 259910
    },
    {
      "epoch": 0.9109547992303567,
      "grad_norm": 2.84375,
      "learning_rate": 3.868613233434842e-05,
      "loss": 0.8577,
      "step": 259920
    },
    {
      "epoch": 0.9109898467372524,
      "grad_norm": 2.6875,
      "learning_rate": 3.868548330568472e-05,
      "loss": 0.9579,
      "step": 259930
    },
    {
      "epoch": 0.9110248942441479,
      "grad_norm": 2.828125,
      "learning_rate": 3.868483427702101e-05,
      "loss": 0.8282,
      "step": 259940
    },
    {
      "epoch": 0.9110599417510435,
      "grad_norm": 2.78125,
      "learning_rate": 3.8684185248357314e-05,
      "loss": 0.9092,
      "step": 259950
    },
    {
      "epoch": 0.9110949892579392,
      "grad_norm": 2.71875,
      "learning_rate": 3.868353621969361e-05,
      "loss": 0.803,
      "step": 259960
    },
    {
      "epoch": 0.9111300367648347,
      "grad_norm": 3.015625,
      "learning_rate": 3.8682887191029904e-05,
      "loss": 0.8867,
      "step": 259970
    },
    {
      "epoch": 0.9111650842717304,
      "grad_norm": 3.015625,
      "learning_rate": 3.8682238162366205e-05,
      "loss": 0.8981,
      "step": 259980
    },
    {
      "epoch": 0.9112001317786259,
      "grad_norm": 2.4375,
      "learning_rate": 3.86815891337025e-05,
      "loss": 0.8153,
      "step": 259990
    },
    {
      "epoch": 0.9112351792855216,
      "grad_norm": 2.953125,
      "learning_rate": 3.86809401050388e-05,
      "loss": 0.8124,
      "step": 260000
    },
    {
      "epoch": 0.9112351792855216,
      "eval_loss": 0.825437068939209,
      "eval_runtime": 556.4172,
      "eval_samples_per_second": 683.724,
      "eval_steps_per_second": 56.977,
      "step": 260000
    },
    {
      "epoch": 0.9112702267924171,
      "grad_norm": 3.265625,
      "learning_rate": 3.8680291076375096e-05,
      "loss": 0.8932,
      "step": 260010
    },
    {
      "epoch": 0.9113052742993127,
      "grad_norm": 2.859375,
      "learning_rate": 3.86796420477114e-05,
      "loss": 0.7822,
      "step": 260020
    },
    {
      "epoch": 0.9113403218062083,
      "grad_norm": 3.15625,
      "learning_rate": 3.867899301904769e-05,
      "loss": 0.8953,
      "step": 260030
    },
    {
      "epoch": 0.9113753693131039,
      "grad_norm": 2.84375,
      "learning_rate": 3.867834399038399e-05,
      "loss": 0.8921,
      "step": 260040
    },
    {
      "epoch": 0.9114104168199995,
      "grad_norm": 2.390625,
      "learning_rate": 3.867769496172029e-05,
      "loss": 0.8934,
      "step": 260050
    },
    {
      "epoch": 0.9114454643268951,
      "grad_norm": 3.109375,
      "learning_rate": 3.867704593305659e-05,
      "loss": 0.8393,
      "step": 260060
    },
    {
      "epoch": 0.9114805118337908,
      "grad_norm": 3.046875,
      "learning_rate": 3.8676396904392884e-05,
      "loss": 0.8863,
      "step": 260070
    },
    {
      "epoch": 0.9115155593406863,
      "grad_norm": 2.828125,
      "learning_rate": 3.8675747875729185e-05,
      "loss": 0.8639,
      "step": 260080
    },
    {
      "epoch": 0.9115506068475819,
      "grad_norm": 2.859375,
      "learning_rate": 3.867509884706548e-05,
      "loss": 0.8237,
      "step": 260090
    },
    {
      "epoch": 0.9115856543544775,
      "grad_norm": 2.828125,
      "learning_rate": 3.867444981840178e-05,
      "loss": 0.8531,
      "step": 260100
    },
    {
      "epoch": 0.9116207018613731,
      "grad_norm": 3.25,
      "learning_rate": 3.867380078973808e-05,
      "loss": 0.8681,
      "step": 260110
    },
    {
      "epoch": 0.9116557493682687,
      "grad_norm": 3.125,
      "learning_rate": 3.867315176107438e-05,
      "loss": 0.8662,
      "step": 260120
    },
    {
      "epoch": 0.9116907968751643,
      "grad_norm": 2.78125,
      "learning_rate": 3.867250273241068e-05,
      "loss": 0.8448,
      "step": 260130
    },
    {
      "epoch": 0.9117258443820598,
      "grad_norm": 2.609375,
      "learning_rate": 3.867185370374697e-05,
      "loss": 0.8314,
      "step": 260140
    },
    {
      "epoch": 0.9117608918889555,
      "grad_norm": 2.90625,
      "learning_rate": 3.8671204675083274e-05,
      "loss": 0.905,
      "step": 260150
    },
    {
      "epoch": 0.9117959393958511,
      "grad_norm": 3.046875,
      "learning_rate": 3.867055564641957e-05,
      "loss": 0.941,
      "step": 260160
    },
    {
      "epoch": 0.9118309869027467,
      "grad_norm": 3.015625,
      "learning_rate": 3.866990661775587e-05,
      "loss": 0.9381,
      "step": 260170
    },
    {
      "epoch": 0.9118660344096423,
      "grad_norm": 2.671875,
      "learning_rate": 3.8669257589092165e-05,
      "loss": 0.9099,
      "step": 260180
    },
    {
      "epoch": 0.9119010819165378,
      "grad_norm": 2.78125,
      "learning_rate": 3.8668608560428466e-05,
      "loss": 0.9321,
      "step": 260190
    },
    {
      "epoch": 0.9119361294234335,
      "grad_norm": 2.921875,
      "learning_rate": 3.866795953176476e-05,
      "loss": 0.8843,
      "step": 260200
    },
    {
      "epoch": 0.911971176930329,
      "grad_norm": 2.9375,
      "learning_rate": 3.866731050310106e-05,
      "loss": 0.9458,
      "step": 260210
    },
    {
      "epoch": 0.9120062244372247,
      "grad_norm": 2.609375,
      "learning_rate": 3.866666147443736e-05,
      "loss": 0.8624,
      "step": 260220
    },
    {
      "epoch": 0.9120412719441202,
      "grad_norm": 3.03125,
      "learning_rate": 3.866601244577366e-05,
      "loss": 0.8782,
      "step": 260230
    },
    {
      "epoch": 0.9120763194510159,
      "grad_norm": 3.0,
      "learning_rate": 3.866536341710996e-05,
      "loss": 0.8527,
      "step": 260240
    },
    {
      "epoch": 0.9121113669579114,
      "grad_norm": 3.03125,
      "learning_rate": 3.8664714388446254e-05,
      "loss": 0.9008,
      "step": 260250
    },
    {
      "epoch": 0.912146414464807,
      "grad_norm": 2.9375,
      "learning_rate": 3.8664065359782556e-05,
      "loss": 0.8663,
      "step": 260260
    },
    {
      "epoch": 0.9121814619717027,
      "grad_norm": 2.78125,
      "learning_rate": 3.866341633111885e-05,
      "loss": 0.8193,
      "step": 260270
    },
    {
      "epoch": 0.9122165094785982,
      "grad_norm": 3.125,
      "learning_rate": 3.866276730245515e-05,
      "loss": 0.8305,
      "step": 260280
    },
    {
      "epoch": 0.9122515569854939,
      "grad_norm": 2.90625,
      "learning_rate": 3.8662118273791446e-05,
      "loss": 0.8703,
      "step": 260290
    },
    {
      "epoch": 0.9122866044923894,
      "grad_norm": 2.8125,
      "learning_rate": 3.866146924512775e-05,
      "loss": 0.9326,
      "step": 260300
    },
    {
      "epoch": 0.912321651999285,
      "grad_norm": 2.625,
      "learning_rate": 3.866082021646404e-05,
      "loss": 0.927,
      "step": 260310
    },
    {
      "epoch": 0.9123566995061806,
      "grad_norm": 2.9375,
      "learning_rate": 3.8660171187800344e-05,
      "loss": 0.9797,
      "step": 260320
    },
    {
      "epoch": 0.9123917470130762,
      "grad_norm": 2.359375,
      "learning_rate": 3.865952215913664e-05,
      "loss": 0.8378,
      "step": 260330
    },
    {
      "epoch": 0.9124267945199718,
      "grad_norm": 2.890625,
      "learning_rate": 3.865887313047293e-05,
      "loss": 0.8919,
      "step": 260340
    },
    {
      "epoch": 0.9124618420268674,
      "grad_norm": 3.0625,
      "learning_rate": 3.8658224101809234e-05,
      "loss": 0.8411,
      "step": 260350
    },
    {
      "epoch": 0.9124968895337631,
      "grad_norm": 2.71875,
      "learning_rate": 3.865757507314553e-05,
      "loss": 0.8674,
      "step": 260360
    },
    {
      "epoch": 0.9125319370406586,
      "grad_norm": 3.109375,
      "learning_rate": 3.865692604448183e-05,
      "loss": 0.9179,
      "step": 260370
    },
    {
      "epoch": 0.9125669845475542,
      "grad_norm": 3.140625,
      "learning_rate": 3.8656277015818125e-05,
      "loss": 0.8563,
      "step": 260380
    },
    {
      "epoch": 0.9126020320544498,
      "grad_norm": 2.8125,
      "learning_rate": 3.8655627987154426e-05,
      "loss": 0.8894,
      "step": 260390
    },
    {
      "epoch": 0.9126370795613454,
      "grad_norm": 3.484375,
      "learning_rate": 3.865497895849072e-05,
      "loss": 0.9435,
      "step": 260400
    },
    {
      "epoch": 0.912672127068241,
      "grad_norm": 3.21875,
      "learning_rate": 3.865432992982702e-05,
      "loss": 0.9046,
      "step": 260410
    },
    {
      "epoch": 0.9127071745751366,
      "grad_norm": 2.984375,
      "learning_rate": 3.865368090116332e-05,
      "loss": 0.9229,
      "step": 260420
    },
    {
      "epoch": 0.9127422220820322,
      "grad_norm": 3.09375,
      "learning_rate": 3.865303187249962e-05,
      "loss": 0.8158,
      "step": 260430
    },
    {
      "epoch": 0.9127772695889278,
      "grad_norm": 2.859375,
      "learning_rate": 3.865238284383591e-05,
      "loss": 0.9624,
      "step": 260440
    },
    {
      "epoch": 0.9128123170958233,
      "grad_norm": 2.75,
      "learning_rate": 3.8651733815172214e-05,
      "loss": 0.8425,
      "step": 260450
    },
    {
      "epoch": 0.912847364602719,
      "grad_norm": 3.265625,
      "learning_rate": 3.865108478650851e-05,
      "loss": 0.9335,
      "step": 260460
    },
    {
      "epoch": 0.9128824121096146,
      "grad_norm": 3.03125,
      "learning_rate": 3.865043575784481e-05,
      "loss": 0.8833,
      "step": 260470
    },
    {
      "epoch": 0.9129174596165102,
      "grad_norm": 3.015625,
      "learning_rate": 3.864978672918111e-05,
      "loss": 0.8215,
      "step": 260480
    },
    {
      "epoch": 0.9129525071234058,
      "grad_norm": 2.984375,
      "learning_rate": 3.8649137700517406e-05,
      "loss": 0.9424,
      "step": 260490
    },
    {
      "epoch": 0.9129875546303013,
      "grad_norm": 3.015625,
      "learning_rate": 3.864848867185371e-05,
      "loss": 0.9236,
      "step": 260500
    },
    {
      "epoch": 0.913022602137197,
      "grad_norm": 2.796875,
      "learning_rate": 3.864783964319e-05,
      "loss": 0.9811,
      "step": 260510
    },
    {
      "epoch": 0.9130576496440925,
      "grad_norm": 3.234375,
      "learning_rate": 3.8647190614526304e-05,
      "loss": 0.7972,
      "step": 260520
    },
    {
      "epoch": 0.9130926971509882,
      "grad_norm": 3.1875,
      "learning_rate": 3.86465415858626e-05,
      "loss": 0.9352,
      "step": 260530
    },
    {
      "epoch": 0.9131277446578837,
      "grad_norm": 2.75,
      "learning_rate": 3.86458925571989e-05,
      "loss": 0.8937,
      "step": 260540
    },
    {
      "epoch": 0.9131627921647794,
      "grad_norm": 3.09375,
      "learning_rate": 3.8645243528535194e-05,
      "loss": 0.8274,
      "step": 260550
    },
    {
      "epoch": 0.913197839671675,
      "grad_norm": 3.15625,
      "learning_rate": 3.8644594499871496e-05,
      "loss": 0.8174,
      "step": 260560
    },
    {
      "epoch": 0.9132328871785705,
      "grad_norm": 2.671875,
      "learning_rate": 3.864394547120779e-05,
      "loss": 0.8787,
      "step": 260570
    },
    {
      "epoch": 0.9132679346854662,
      "grad_norm": 2.578125,
      "learning_rate": 3.864329644254409e-05,
      "loss": 0.9333,
      "step": 260580
    },
    {
      "epoch": 0.9133029821923617,
      "grad_norm": 3.0,
      "learning_rate": 3.8642647413880386e-05,
      "loss": 0.8016,
      "step": 260590
    },
    {
      "epoch": 0.9133380296992574,
      "grad_norm": 2.546875,
      "learning_rate": 3.864199838521669e-05,
      "loss": 0.8782,
      "step": 260600
    },
    {
      "epoch": 0.9133730772061529,
      "grad_norm": 2.34375,
      "learning_rate": 3.864134935655299e-05,
      "loss": 0.882,
      "step": 260610
    },
    {
      "epoch": 0.9134081247130486,
      "grad_norm": 2.921875,
      "learning_rate": 3.8640700327889284e-05,
      "loss": 0.8718,
      "step": 260620
    },
    {
      "epoch": 0.9134431722199441,
      "grad_norm": 2.78125,
      "learning_rate": 3.8640051299225585e-05,
      "loss": 0.9146,
      "step": 260630
    },
    {
      "epoch": 0.9134782197268397,
      "grad_norm": 2.90625,
      "learning_rate": 3.863940227056188e-05,
      "loss": 0.8635,
      "step": 260640
    },
    {
      "epoch": 0.9135132672337354,
      "grad_norm": 3.0,
      "learning_rate": 3.863875324189818e-05,
      "loss": 0.8701,
      "step": 260650
    },
    {
      "epoch": 0.9135483147406309,
      "grad_norm": 2.546875,
      "learning_rate": 3.8638104213234476e-05,
      "loss": 0.7746,
      "step": 260660
    },
    {
      "epoch": 0.9135833622475266,
      "grad_norm": 13.9375,
      "learning_rate": 3.863745518457078e-05,
      "loss": 0.8842,
      "step": 260670
    },
    {
      "epoch": 0.9136184097544221,
      "grad_norm": 3.6875,
      "learning_rate": 3.863680615590707e-05,
      "loss": 0.9644,
      "step": 260680
    },
    {
      "epoch": 0.9136534572613177,
      "grad_norm": 2.734375,
      "learning_rate": 3.863615712724337e-05,
      "loss": 0.8749,
      "step": 260690
    },
    {
      "epoch": 0.9136885047682133,
      "grad_norm": 3.296875,
      "learning_rate": 3.863550809857967e-05,
      "loss": 0.933,
      "step": 260700
    },
    {
      "epoch": 0.9137235522751089,
      "grad_norm": 2.984375,
      "learning_rate": 3.863485906991596e-05,
      "loss": 0.8584,
      "step": 260710
    },
    {
      "epoch": 0.9137585997820045,
      "grad_norm": 3.359375,
      "learning_rate": 3.8634210041252264e-05,
      "loss": 0.8906,
      "step": 260720
    },
    {
      "epoch": 0.9137936472889001,
      "grad_norm": 3.140625,
      "learning_rate": 3.863356101258856e-05,
      "loss": 0.943,
      "step": 260730
    },
    {
      "epoch": 0.9138286947957956,
      "grad_norm": 3.109375,
      "learning_rate": 3.863291198392486e-05,
      "loss": 0.8967,
      "step": 260740
    },
    {
      "epoch": 0.9138637423026913,
      "grad_norm": 2.625,
      "learning_rate": 3.8632262955261154e-05,
      "loss": 0.8577,
      "step": 260750
    },
    {
      "epoch": 0.9138987898095869,
      "grad_norm": 2.671875,
      "learning_rate": 3.8631613926597456e-05,
      "loss": 0.9089,
      "step": 260760
    },
    {
      "epoch": 0.9139338373164825,
      "grad_norm": 2.921875,
      "learning_rate": 3.863096489793375e-05,
      "loss": 0.9003,
      "step": 260770
    },
    {
      "epoch": 0.9139688848233781,
      "grad_norm": 2.890625,
      "learning_rate": 3.863031586927005e-05,
      "loss": 0.8425,
      "step": 260780
    },
    {
      "epoch": 0.9140039323302737,
      "grad_norm": 2.9375,
      "learning_rate": 3.8629666840606346e-05,
      "loss": 0.9119,
      "step": 260790
    },
    {
      "epoch": 0.9140389798371693,
      "grad_norm": 2.578125,
      "learning_rate": 3.862901781194265e-05,
      "loss": 0.8421,
      "step": 260800
    },
    {
      "epoch": 0.9140740273440648,
      "grad_norm": 2.859375,
      "learning_rate": 3.862836878327894e-05,
      "loss": 0.8312,
      "step": 260810
    },
    {
      "epoch": 0.9141090748509605,
      "grad_norm": 2.90625,
      "learning_rate": 3.8627719754615244e-05,
      "loss": 0.8647,
      "step": 260820
    },
    {
      "epoch": 0.914144122357856,
      "grad_norm": 3.125,
      "learning_rate": 3.8627070725951545e-05,
      "loss": 0.83,
      "step": 260830
    },
    {
      "epoch": 0.9141791698647517,
      "grad_norm": 2.6875,
      "learning_rate": 3.862642169728784e-05,
      "loss": 0.7786,
      "step": 260840
    },
    {
      "epoch": 0.9142142173716473,
      "grad_norm": 3.328125,
      "learning_rate": 3.862577266862414e-05,
      "loss": 0.9335,
      "step": 260850
    },
    {
      "epoch": 0.9142492648785429,
      "grad_norm": 2.921875,
      "learning_rate": 3.8625123639960436e-05,
      "loss": 0.7902,
      "step": 260860
    },
    {
      "epoch": 0.9142843123854385,
      "grad_norm": 2.859375,
      "learning_rate": 3.862447461129674e-05,
      "loss": 0.861,
      "step": 260870
    },
    {
      "epoch": 0.914319359892334,
      "grad_norm": 3.234375,
      "learning_rate": 3.862382558263303e-05,
      "loss": 0.8748,
      "step": 260880
    },
    {
      "epoch": 0.9143544073992297,
      "grad_norm": 2.578125,
      "learning_rate": 3.862317655396933e-05,
      "loss": 0.8866,
      "step": 260890
    },
    {
      "epoch": 0.9143894549061252,
      "grad_norm": 3.203125,
      "learning_rate": 3.862252752530563e-05,
      "loss": 0.8897,
      "step": 260900
    },
    {
      "epoch": 0.9144245024130209,
      "grad_norm": 3.515625,
      "learning_rate": 3.862187849664193e-05,
      "loss": 0.8173,
      "step": 260910
    },
    {
      "epoch": 0.9144595499199164,
      "grad_norm": 3.328125,
      "learning_rate": 3.8621229467978224e-05,
      "loss": 0.9375,
      "step": 260920
    },
    {
      "epoch": 0.914494597426812,
      "grad_norm": 2.640625,
      "learning_rate": 3.8620580439314525e-05,
      "loss": 0.7935,
      "step": 260930
    },
    {
      "epoch": 0.9145296449337076,
      "grad_norm": 3.046875,
      "learning_rate": 3.861993141065082e-05,
      "loss": 0.8855,
      "step": 260940
    },
    {
      "epoch": 0.9145646924406032,
      "grad_norm": 3.3125,
      "learning_rate": 3.861928238198712e-05,
      "loss": 0.8535,
      "step": 260950
    },
    {
      "epoch": 0.9145997399474989,
      "grad_norm": 3.203125,
      "learning_rate": 3.8618633353323416e-05,
      "loss": 0.9492,
      "step": 260960
    },
    {
      "epoch": 0.9146347874543944,
      "grad_norm": 2.9375,
      "learning_rate": 3.861798432465972e-05,
      "loss": 0.9036,
      "step": 260970
    },
    {
      "epoch": 0.9146698349612901,
      "grad_norm": 3.078125,
      "learning_rate": 3.861733529599602e-05,
      "loss": 0.8338,
      "step": 260980
    },
    {
      "epoch": 0.9147048824681856,
      "grad_norm": 3.109375,
      "learning_rate": 3.861668626733231e-05,
      "loss": 0.9568,
      "step": 260990
    },
    {
      "epoch": 0.9147399299750812,
      "grad_norm": 3.015625,
      "learning_rate": 3.8616037238668615e-05,
      "loss": 0.9101,
      "step": 261000
    },
    {
      "epoch": 0.9147749774819768,
      "grad_norm": 2.8125,
      "learning_rate": 3.861538821000491e-05,
      "loss": 0.8238,
      "step": 261010
    },
    {
      "epoch": 0.9148100249888724,
      "grad_norm": 3.375,
      "learning_rate": 3.861473918134121e-05,
      "loss": 0.8868,
      "step": 261020
    },
    {
      "epoch": 0.914845072495768,
      "grad_norm": 3.484375,
      "learning_rate": 3.8614090152677505e-05,
      "loss": 0.9817,
      "step": 261030
    },
    {
      "epoch": 0.9148801200026636,
      "grad_norm": 2.828125,
      "learning_rate": 3.861344112401381e-05,
      "loss": 0.9105,
      "step": 261040
    },
    {
      "epoch": 0.9149151675095593,
      "grad_norm": 2.5625,
      "learning_rate": 3.86127920953501e-05,
      "loss": 0.9199,
      "step": 261050
    },
    {
      "epoch": 0.9149502150164548,
      "grad_norm": 2.609375,
      "learning_rate": 3.86121430666864e-05,
      "loss": 0.8454,
      "step": 261060
    },
    {
      "epoch": 0.9149852625233504,
      "grad_norm": 2.890625,
      "learning_rate": 3.86114940380227e-05,
      "loss": 0.8846,
      "step": 261070
    },
    {
      "epoch": 0.915020310030246,
      "grad_norm": 3.03125,
      "learning_rate": 3.8610845009359e-05,
      "loss": 0.9353,
      "step": 261080
    },
    {
      "epoch": 0.9150553575371416,
      "grad_norm": 2.84375,
      "learning_rate": 3.861019598069529e-05,
      "loss": 0.8707,
      "step": 261090
    },
    {
      "epoch": 0.9150904050440372,
      "grad_norm": 3.140625,
      "learning_rate": 3.860954695203159e-05,
      "loss": 0.8119,
      "step": 261100
    },
    {
      "epoch": 0.9151254525509328,
      "grad_norm": 3.0625,
      "learning_rate": 3.860889792336789e-05,
      "loss": 0.8792,
      "step": 261110
    },
    {
      "epoch": 0.9151605000578283,
      "grad_norm": 3.078125,
      "learning_rate": 3.8608248894704184e-05,
      "loss": 0.847,
      "step": 261120
    },
    {
      "epoch": 0.915195547564724,
      "grad_norm": 3.203125,
      "learning_rate": 3.8607599866040485e-05,
      "loss": 0.9159,
      "step": 261130
    },
    {
      "epoch": 0.9152305950716196,
      "grad_norm": 2.828125,
      "learning_rate": 3.860695083737678e-05,
      "loss": 0.8299,
      "step": 261140
    },
    {
      "epoch": 0.9152656425785152,
      "grad_norm": 2.953125,
      "learning_rate": 3.860630180871308e-05,
      "loss": 0.8061,
      "step": 261150
    },
    {
      "epoch": 0.9153006900854108,
      "grad_norm": 2.75,
      "learning_rate": 3.8605652780049376e-05,
      "loss": 0.8389,
      "step": 261160
    },
    {
      "epoch": 0.9153357375923064,
      "grad_norm": 3.328125,
      "learning_rate": 3.860500375138568e-05,
      "loss": 0.8768,
      "step": 261170
    },
    {
      "epoch": 0.915370785099202,
      "grad_norm": 2.546875,
      "learning_rate": 3.860435472272197e-05,
      "loss": 0.8815,
      "step": 261180
    },
    {
      "epoch": 0.9154058326060975,
      "grad_norm": 2.625,
      "learning_rate": 3.860370569405827e-05,
      "loss": 0.8453,
      "step": 261190
    },
    {
      "epoch": 0.9154408801129932,
      "grad_norm": 3.046875,
      "learning_rate": 3.8603056665394575e-05,
      "loss": 0.9216,
      "step": 261200
    },
    {
      "epoch": 0.9154759276198887,
      "grad_norm": 3.09375,
      "learning_rate": 3.860240763673087e-05,
      "loss": 0.8287,
      "step": 261210
    },
    {
      "epoch": 0.9155109751267844,
      "grad_norm": 2.765625,
      "learning_rate": 3.860175860806717e-05,
      "loss": 0.904,
      "step": 261220
    },
    {
      "epoch": 0.9155460226336799,
      "grad_norm": 2.78125,
      "learning_rate": 3.8601109579403465e-05,
      "loss": 0.9052,
      "step": 261230
    },
    {
      "epoch": 0.9155810701405755,
      "grad_norm": 2.59375,
      "learning_rate": 3.860046055073977e-05,
      "loss": 0.8833,
      "step": 261240
    },
    {
      "epoch": 0.9156161176474712,
      "grad_norm": 2.6875,
      "learning_rate": 3.859981152207606e-05,
      "loss": 0.8857,
      "step": 261250
    },
    {
      "epoch": 0.9156511651543667,
      "grad_norm": 3.03125,
      "learning_rate": 3.859916249341236e-05,
      "loss": 0.8539,
      "step": 261260
    },
    {
      "epoch": 0.9156862126612624,
      "grad_norm": 3.34375,
      "learning_rate": 3.859851346474866e-05,
      "loss": 0.9346,
      "step": 261270
    },
    {
      "epoch": 0.9157212601681579,
      "grad_norm": 2.421875,
      "learning_rate": 3.859786443608496e-05,
      "loss": 0.7997,
      "step": 261280
    },
    {
      "epoch": 0.9157563076750536,
      "grad_norm": 3.234375,
      "learning_rate": 3.859721540742125e-05,
      "loss": 0.9032,
      "step": 261290
    },
    {
      "epoch": 0.9157913551819491,
      "grad_norm": 3.359375,
      "learning_rate": 3.8596566378757555e-05,
      "loss": 0.8911,
      "step": 261300
    },
    {
      "epoch": 0.9158264026888447,
      "grad_norm": 2.4375,
      "learning_rate": 3.859591735009385e-05,
      "loss": 0.8537,
      "step": 261310
    },
    {
      "epoch": 0.9158614501957403,
      "grad_norm": 2.65625,
      "learning_rate": 3.859526832143015e-05,
      "loss": 0.8789,
      "step": 261320
    },
    {
      "epoch": 0.9158964977026359,
      "grad_norm": 3.0625,
      "learning_rate": 3.8594619292766445e-05,
      "loss": 0.9094,
      "step": 261330
    },
    {
      "epoch": 0.9159315452095316,
      "grad_norm": 3.125,
      "learning_rate": 3.859397026410275e-05,
      "loss": 0.8923,
      "step": 261340
    },
    {
      "epoch": 0.9159665927164271,
      "grad_norm": 2.921875,
      "learning_rate": 3.859332123543905e-05,
      "loss": 0.8486,
      "step": 261350
    },
    {
      "epoch": 0.9160016402233228,
      "grad_norm": 2.90625,
      "learning_rate": 3.859267220677534e-05,
      "loss": 0.8657,
      "step": 261360
    },
    {
      "epoch": 0.9160366877302183,
      "grad_norm": 3.09375,
      "learning_rate": 3.8592023178111644e-05,
      "loss": 0.9138,
      "step": 261370
    },
    {
      "epoch": 0.9160717352371139,
      "grad_norm": 2.9375,
      "learning_rate": 3.859137414944794e-05,
      "loss": 0.9284,
      "step": 261380
    },
    {
      "epoch": 0.9161067827440095,
      "grad_norm": 2.734375,
      "learning_rate": 3.859072512078424e-05,
      "loss": 0.9601,
      "step": 261390
    },
    {
      "epoch": 0.9161418302509051,
      "grad_norm": 2.65625,
      "learning_rate": 3.8590076092120535e-05,
      "loss": 0.7985,
      "step": 261400
    },
    {
      "epoch": 0.9161768777578007,
      "grad_norm": 2.828125,
      "learning_rate": 3.8589427063456836e-05,
      "loss": 0.9503,
      "step": 261410
    },
    {
      "epoch": 0.9162119252646963,
      "grad_norm": 3.25,
      "learning_rate": 3.858877803479313e-05,
      "loss": 0.9414,
      "step": 261420
    },
    {
      "epoch": 0.9162469727715918,
      "grad_norm": 2.671875,
      "learning_rate": 3.858812900612943e-05,
      "loss": 0.9361,
      "step": 261430
    },
    {
      "epoch": 0.9162820202784875,
      "grad_norm": 3.171875,
      "learning_rate": 3.858747997746573e-05,
      "loss": 0.8984,
      "step": 261440
    },
    {
      "epoch": 0.9163170677853831,
      "grad_norm": 2.96875,
      "learning_rate": 3.858683094880203e-05,
      "loss": 0.8328,
      "step": 261450
    },
    {
      "epoch": 0.9163521152922787,
      "grad_norm": 3.015625,
      "learning_rate": 3.858618192013832e-05,
      "loss": 0.9103,
      "step": 261460
    },
    {
      "epoch": 0.9163871627991743,
      "grad_norm": 3.078125,
      "learning_rate": 3.858553289147462e-05,
      "loss": 0.972,
      "step": 261470
    },
    {
      "epoch": 0.9164222103060699,
      "grad_norm": 3.015625,
      "learning_rate": 3.858488386281092e-05,
      "loss": 0.8094,
      "step": 261480
    },
    {
      "epoch": 0.9164572578129655,
      "grad_norm": 3.078125,
      "learning_rate": 3.858423483414721e-05,
      "loss": 0.8623,
      "step": 261490
    },
    {
      "epoch": 0.916492305319861,
      "grad_norm": 3.0,
      "learning_rate": 3.8583585805483515e-05,
      "loss": 0.8897,
      "step": 261500
    },
    {
      "epoch": 0.9165273528267567,
      "grad_norm": 2.8125,
      "learning_rate": 3.858293677681981e-05,
      "loss": 0.8377,
      "step": 261510
    },
    {
      "epoch": 0.9165624003336522,
      "grad_norm": 2.90625,
      "learning_rate": 3.858228774815611e-05,
      "loss": 0.9355,
      "step": 261520
    },
    {
      "epoch": 0.9165974478405479,
      "grad_norm": 2.984375,
      "learning_rate": 3.8581638719492405e-05,
      "loss": 0.8734,
      "step": 261530
    },
    {
      "epoch": 0.9166324953474435,
      "grad_norm": 3.046875,
      "learning_rate": 3.858098969082871e-05,
      "loss": 0.8701,
      "step": 261540
    },
    {
      "epoch": 0.916667542854339,
      "grad_norm": 3.21875,
      "learning_rate": 3.8580340662165e-05,
      "loss": 0.8632,
      "step": 261550
    },
    {
      "epoch": 0.9167025903612347,
      "grad_norm": 2.46875,
      "learning_rate": 3.85796916335013e-05,
      "loss": 0.9772,
      "step": 261560
    },
    {
      "epoch": 0.9167376378681302,
      "grad_norm": 2.71875,
      "learning_rate": 3.8579042604837604e-05,
      "loss": 0.9209,
      "step": 261570
    },
    {
      "epoch": 0.9167726853750259,
      "grad_norm": 2.859375,
      "learning_rate": 3.85783935761739e-05,
      "loss": 0.8496,
      "step": 261580
    },
    {
      "epoch": 0.9168077328819214,
      "grad_norm": 2.703125,
      "learning_rate": 3.85777445475102e-05,
      "loss": 0.864,
      "step": 261590
    },
    {
      "epoch": 0.9168427803888171,
      "grad_norm": 3.03125,
      "learning_rate": 3.8577095518846495e-05,
      "loss": 0.8801,
      "step": 261600
    },
    {
      "epoch": 0.9168778278957126,
      "grad_norm": 3.09375,
      "learning_rate": 3.8576446490182796e-05,
      "loss": 0.9297,
      "step": 261610
    },
    {
      "epoch": 0.9169128754026082,
      "grad_norm": 2.6875,
      "learning_rate": 3.857579746151909e-05,
      "loss": 0.8327,
      "step": 261620
    },
    {
      "epoch": 0.9169479229095039,
      "grad_norm": 2.5625,
      "learning_rate": 3.857514843285539e-05,
      "loss": 0.8925,
      "step": 261630
    },
    {
      "epoch": 0.9169829704163994,
      "grad_norm": 3.109375,
      "learning_rate": 3.857449940419169e-05,
      "loss": 0.8905,
      "step": 261640
    },
    {
      "epoch": 0.9170180179232951,
      "grad_norm": 2.953125,
      "learning_rate": 3.857385037552799e-05,
      "loss": 0.8574,
      "step": 261650
    },
    {
      "epoch": 0.9170530654301906,
      "grad_norm": 2.578125,
      "learning_rate": 3.857320134686428e-05,
      "loss": 0.8664,
      "step": 261660
    },
    {
      "epoch": 0.9170881129370863,
      "grad_norm": 2.609375,
      "learning_rate": 3.8572552318200584e-05,
      "loss": 0.8438,
      "step": 261670
    },
    {
      "epoch": 0.9171231604439818,
      "grad_norm": 3.046875,
      "learning_rate": 3.857190328953688e-05,
      "loss": 0.9131,
      "step": 261680
    },
    {
      "epoch": 0.9171582079508774,
      "grad_norm": 2.8125,
      "learning_rate": 3.857125426087318e-05,
      "loss": 0.9175,
      "step": 261690
    },
    {
      "epoch": 0.917193255457773,
      "grad_norm": 3.140625,
      "learning_rate": 3.8570605232209475e-05,
      "loss": 0.9338,
      "step": 261700
    },
    {
      "epoch": 0.9172283029646686,
      "grad_norm": 3.0,
      "learning_rate": 3.8569956203545776e-05,
      "loss": 0.8586,
      "step": 261710
    },
    {
      "epoch": 0.9172633504715642,
      "grad_norm": 3.171875,
      "learning_rate": 3.856930717488208e-05,
      "loss": 0.945,
      "step": 261720
    },
    {
      "epoch": 0.9172983979784598,
      "grad_norm": 3.046875,
      "learning_rate": 3.856865814621837e-05,
      "loss": 0.9408,
      "step": 261730
    },
    {
      "epoch": 0.9173334454853554,
      "grad_norm": 3.53125,
      "learning_rate": 3.8568009117554673e-05,
      "loss": 0.8506,
      "step": 261740
    },
    {
      "epoch": 0.917368492992251,
      "grad_norm": 2.890625,
      "learning_rate": 3.856736008889097e-05,
      "loss": 0.8686,
      "step": 261750
    },
    {
      "epoch": 0.9174035404991466,
      "grad_norm": 2.890625,
      "learning_rate": 3.856671106022727e-05,
      "loss": 0.869,
      "step": 261760
    },
    {
      "epoch": 0.9174385880060422,
      "grad_norm": 2.578125,
      "learning_rate": 3.8566062031563564e-05,
      "loss": 0.932,
      "step": 261770
    },
    {
      "epoch": 0.9174736355129378,
      "grad_norm": 2.8125,
      "learning_rate": 3.8565413002899865e-05,
      "loss": 0.8207,
      "step": 261780
    },
    {
      "epoch": 0.9175086830198333,
      "grad_norm": 3.234375,
      "learning_rate": 3.856476397423616e-05,
      "loss": 0.9092,
      "step": 261790
    },
    {
      "epoch": 0.917543730526729,
      "grad_norm": 2.75,
      "learning_rate": 3.856411494557246e-05,
      "loss": 0.8338,
      "step": 261800
    },
    {
      "epoch": 0.9175787780336245,
      "grad_norm": 2.796875,
      "learning_rate": 3.8563465916908756e-05,
      "loss": 0.9306,
      "step": 261810
    },
    {
      "epoch": 0.9176138255405202,
      "grad_norm": 3.265625,
      "learning_rate": 3.856281688824506e-05,
      "loss": 0.9736,
      "step": 261820
    },
    {
      "epoch": 0.9176488730474158,
      "grad_norm": 2.90625,
      "learning_rate": 3.856216785958135e-05,
      "loss": 0.8873,
      "step": 261830
    },
    {
      "epoch": 0.9176839205543114,
      "grad_norm": 2.890625,
      "learning_rate": 3.856151883091765e-05,
      "loss": 0.901,
      "step": 261840
    },
    {
      "epoch": 0.917718968061207,
      "grad_norm": 3.265625,
      "learning_rate": 3.856086980225395e-05,
      "loss": 0.9797,
      "step": 261850
    },
    {
      "epoch": 0.9177540155681025,
      "grad_norm": 2.671875,
      "learning_rate": 3.856022077359024e-05,
      "loss": 0.9848,
      "step": 261860
    },
    {
      "epoch": 0.9177890630749982,
      "grad_norm": 3.3125,
      "learning_rate": 3.8559571744926544e-05,
      "loss": 0.868,
      "step": 261870
    },
    {
      "epoch": 0.9178241105818937,
      "grad_norm": 2.625,
      "learning_rate": 3.855892271626284e-05,
      "loss": 0.8845,
      "step": 261880
    },
    {
      "epoch": 0.9178591580887894,
      "grad_norm": 3.265625,
      "learning_rate": 3.855827368759914e-05,
      "loss": 0.9039,
      "step": 261890
    },
    {
      "epoch": 0.9178942055956849,
      "grad_norm": 2.34375,
      "learning_rate": 3.8557624658935435e-05,
      "loss": 0.7611,
      "step": 261900
    },
    {
      "epoch": 0.9179292531025806,
      "grad_norm": 2.6875,
      "learning_rate": 3.8556975630271736e-05,
      "loss": 0.8719,
      "step": 261910
    },
    {
      "epoch": 0.9179643006094761,
      "grad_norm": 2.84375,
      "learning_rate": 3.855632660160803e-05,
      "loss": 0.9041,
      "step": 261920
    },
    {
      "epoch": 0.9179993481163717,
      "grad_norm": 3.15625,
      "learning_rate": 3.855567757294433e-05,
      "loss": 0.7779,
      "step": 261930
    },
    {
      "epoch": 0.9180343956232674,
      "grad_norm": 2.96875,
      "learning_rate": 3.8555028544280633e-05,
      "loss": 0.8959,
      "step": 261940
    },
    {
      "epoch": 0.9180694431301629,
      "grad_norm": 2.984375,
      "learning_rate": 3.855437951561693e-05,
      "loss": 0.9372,
      "step": 261950
    },
    {
      "epoch": 0.9181044906370586,
      "grad_norm": 2.734375,
      "learning_rate": 3.855373048695323e-05,
      "loss": 0.8988,
      "step": 261960
    },
    {
      "epoch": 0.9181395381439541,
      "grad_norm": 2.890625,
      "learning_rate": 3.8553081458289524e-05,
      "loss": 0.9259,
      "step": 261970
    },
    {
      "epoch": 0.9181745856508498,
      "grad_norm": 3.390625,
      "learning_rate": 3.8552432429625825e-05,
      "loss": 0.9089,
      "step": 261980
    },
    {
      "epoch": 0.9182096331577453,
      "grad_norm": 2.609375,
      "learning_rate": 3.855178340096212e-05,
      "loss": 0.8925,
      "step": 261990
    },
    {
      "epoch": 0.9182446806646409,
      "grad_norm": 2.46875,
      "learning_rate": 3.855113437229842e-05,
      "loss": 0.862,
      "step": 262000
    },
    {
      "epoch": 0.9182797281715365,
      "grad_norm": 2.921875,
      "learning_rate": 3.8550485343634716e-05,
      "loss": 0.8508,
      "step": 262010
    },
    {
      "epoch": 0.9183147756784321,
      "grad_norm": 3.109375,
      "learning_rate": 3.854983631497102e-05,
      "loss": 0.8184,
      "step": 262020
    },
    {
      "epoch": 0.9183498231853278,
      "grad_norm": 2.78125,
      "learning_rate": 3.854918728630731e-05,
      "loss": 0.905,
      "step": 262030
    },
    {
      "epoch": 0.9183848706922233,
      "grad_norm": 2.921875,
      "learning_rate": 3.8548538257643613e-05,
      "loss": 0.9477,
      "step": 262040
    },
    {
      "epoch": 0.918419918199119,
      "grad_norm": 3.234375,
      "learning_rate": 3.854788922897991e-05,
      "loss": 0.89,
      "step": 262050
    },
    {
      "epoch": 0.9184549657060145,
      "grad_norm": 2.765625,
      "learning_rate": 3.854724020031621e-05,
      "loss": 0.8946,
      "step": 262060
    },
    {
      "epoch": 0.9184900132129101,
      "grad_norm": 2.828125,
      "learning_rate": 3.854659117165251e-05,
      "loss": 0.8059,
      "step": 262070
    },
    {
      "epoch": 0.9185250607198057,
      "grad_norm": 3.328125,
      "learning_rate": 3.8545942142988805e-05,
      "loss": 0.9149,
      "step": 262080
    },
    {
      "epoch": 0.9185601082267013,
      "grad_norm": 2.90625,
      "learning_rate": 3.854529311432511e-05,
      "loss": 0.802,
      "step": 262090
    },
    {
      "epoch": 0.9185951557335968,
      "grad_norm": 3.015625,
      "learning_rate": 3.85446440856614e-05,
      "loss": 0.9158,
      "step": 262100
    },
    {
      "epoch": 0.9186302032404925,
      "grad_norm": 3.109375,
      "learning_rate": 3.85439950569977e-05,
      "loss": 0.8771,
      "step": 262110
    },
    {
      "epoch": 0.918665250747388,
      "grad_norm": 2.796875,
      "learning_rate": 3.8543346028334e-05,
      "loss": 0.8966,
      "step": 262120
    },
    {
      "epoch": 0.9187002982542837,
      "grad_norm": 3.125,
      "learning_rate": 3.85426969996703e-05,
      "loss": 0.9203,
      "step": 262130
    },
    {
      "epoch": 0.9187353457611793,
      "grad_norm": 2.765625,
      "learning_rate": 3.8542047971006593e-05,
      "loss": 0.8393,
      "step": 262140
    },
    {
      "epoch": 0.9187703932680749,
      "grad_norm": 2.890625,
      "learning_rate": 3.8541398942342895e-05,
      "loss": 0.893,
      "step": 262150
    },
    {
      "epoch": 0.9188054407749705,
      "grad_norm": 2.921875,
      "learning_rate": 3.854074991367919e-05,
      "loss": 0.8872,
      "step": 262160
    },
    {
      "epoch": 0.918840488281866,
      "grad_norm": 2.671875,
      "learning_rate": 3.854010088501549e-05,
      "loss": 0.7944,
      "step": 262170
    },
    {
      "epoch": 0.9188755357887617,
      "grad_norm": 3.40625,
      "learning_rate": 3.8539451856351785e-05,
      "loss": 0.8989,
      "step": 262180
    },
    {
      "epoch": 0.9189105832956572,
      "grad_norm": 3.1875,
      "learning_rate": 3.853880282768809e-05,
      "loss": 0.8661,
      "step": 262190
    },
    {
      "epoch": 0.9189456308025529,
      "grad_norm": 3.703125,
      "learning_rate": 3.853815379902438e-05,
      "loss": 0.8438,
      "step": 262200
    },
    {
      "epoch": 0.9189806783094484,
      "grad_norm": 3.0,
      "learning_rate": 3.8537504770360676e-05,
      "loss": 0.8554,
      "step": 262210
    },
    {
      "epoch": 0.919015725816344,
      "grad_norm": 3.578125,
      "learning_rate": 3.853685574169698e-05,
      "loss": 0.8836,
      "step": 262220
    },
    {
      "epoch": 0.9190507733232397,
      "grad_norm": 2.46875,
      "learning_rate": 3.853620671303327e-05,
      "loss": 0.8534,
      "step": 262230
    },
    {
      "epoch": 0.9190858208301352,
      "grad_norm": 3.09375,
      "learning_rate": 3.8535557684369573e-05,
      "loss": 0.8323,
      "step": 262240
    },
    {
      "epoch": 0.9191208683370309,
      "grad_norm": 2.625,
      "learning_rate": 3.853490865570587e-05,
      "loss": 0.9035,
      "step": 262250
    },
    {
      "epoch": 0.9191559158439264,
      "grad_norm": 3.0,
      "learning_rate": 3.853425962704217e-05,
      "loss": 0.9035,
      "step": 262260
    },
    {
      "epoch": 0.9191909633508221,
      "grad_norm": 2.90625,
      "learning_rate": 3.8533610598378464e-05,
      "loss": 0.791,
      "step": 262270
    },
    {
      "epoch": 0.9192260108577176,
      "grad_norm": 3.203125,
      "learning_rate": 3.8532961569714765e-05,
      "loss": 0.8122,
      "step": 262280
    },
    {
      "epoch": 0.9192610583646132,
      "grad_norm": 2.9375,
      "learning_rate": 3.853231254105106e-05,
      "loss": 0.876,
      "step": 262290
    },
    {
      "epoch": 0.9192961058715088,
      "grad_norm": 2.78125,
      "learning_rate": 3.853166351238736e-05,
      "loss": 0.9123,
      "step": 262300
    },
    {
      "epoch": 0.9193311533784044,
      "grad_norm": 3.21875,
      "learning_rate": 3.853101448372366e-05,
      "loss": 0.8995,
      "step": 262310
    },
    {
      "epoch": 0.9193662008853001,
      "grad_norm": 2.828125,
      "learning_rate": 3.853036545505996e-05,
      "loss": 0.8481,
      "step": 262320
    },
    {
      "epoch": 0.9194012483921956,
      "grad_norm": 2.65625,
      "learning_rate": 3.852971642639626e-05,
      "loss": 0.7778,
      "step": 262330
    },
    {
      "epoch": 0.9194362958990913,
      "grad_norm": 3.25,
      "learning_rate": 3.8529067397732553e-05,
      "loss": 0.9302,
      "step": 262340
    },
    {
      "epoch": 0.9194713434059868,
      "grad_norm": 2.671875,
      "learning_rate": 3.8528418369068855e-05,
      "loss": 0.9176,
      "step": 262350
    },
    {
      "epoch": 0.9195063909128824,
      "grad_norm": 2.296875,
      "learning_rate": 3.852776934040515e-05,
      "loss": 0.8509,
      "step": 262360
    },
    {
      "epoch": 0.919541438419778,
      "grad_norm": 2.78125,
      "learning_rate": 3.852712031174145e-05,
      "loss": 0.8714,
      "step": 262370
    },
    {
      "epoch": 0.9195764859266736,
      "grad_norm": 3.296875,
      "learning_rate": 3.8526471283077745e-05,
      "loss": 0.9733,
      "step": 262380
    },
    {
      "epoch": 0.9196115334335692,
      "grad_norm": 2.546875,
      "learning_rate": 3.852582225441405e-05,
      "loss": 0.8765,
      "step": 262390
    },
    {
      "epoch": 0.9196465809404648,
      "grad_norm": 3.359375,
      "learning_rate": 3.852517322575034e-05,
      "loss": 0.8807,
      "step": 262400
    },
    {
      "epoch": 0.9196816284473603,
      "grad_norm": 2.6875,
      "learning_rate": 3.852452419708664e-05,
      "loss": 0.8466,
      "step": 262410
    },
    {
      "epoch": 0.919716675954256,
      "grad_norm": 2.796875,
      "learning_rate": 3.852387516842294e-05,
      "loss": 0.8378,
      "step": 262420
    },
    {
      "epoch": 0.9197517234611516,
      "grad_norm": 2.765625,
      "learning_rate": 3.852322613975924e-05,
      "loss": 0.8863,
      "step": 262430
    },
    {
      "epoch": 0.9197867709680472,
      "grad_norm": 2.5,
      "learning_rate": 3.852257711109554e-05,
      "loss": 0.8559,
      "step": 262440
    },
    {
      "epoch": 0.9198218184749428,
      "grad_norm": 3.03125,
      "learning_rate": 3.8521928082431835e-05,
      "loss": 0.9188,
      "step": 262450
    },
    {
      "epoch": 0.9198568659818384,
      "grad_norm": 2.9375,
      "learning_rate": 3.8521279053768136e-05,
      "loss": 0.9469,
      "step": 262460
    },
    {
      "epoch": 0.919891913488734,
      "grad_norm": 2.921875,
      "learning_rate": 3.852063002510443e-05,
      "loss": 0.8091,
      "step": 262470
    },
    {
      "epoch": 0.9199269609956295,
      "grad_norm": 3.078125,
      "learning_rate": 3.851998099644073e-05,
      "loss": 0.8859,
      "step": 262480
    },
    {
      "epoch": 0.9199620085025252,
      "grad_norm": 3.328125,
      "learning_rate": 3.851933196777703e-05,
      "loss": 0.901,
      "step": 262490
    },
    {
      "epoch": 0.9199970560094207,
      "grad_norm": 2.8125,
      "learning_rate": 3.851868293911333e-05,
      "loss": 0.9405,
      "step": 262500
    },
    {
      "epoch": 0.9200321035163164,
      "grad_norm": 2.75,
      "learning_rate": 3.851803391044962e-05,
      "loss": 0.8551,
      "step": 262510
    },
    {
      "epoch": 0.920067151023212,
      "grad_norm": 2.921875,
      "learning_rate": 3.8517384881785924e-05,
      "loss": 1.0277,
      "step": 262520
    },
    {
      "epoch": 0.9201021985301076,
      "grad_norm": 2.65625,
      "learning_rate": 3.851673585312222e-05,
      "loss": 0.8726,
      "step": 262530
    },
    {
      "epoch": 0.9201372460370032,
      "grad_norm": 2.921875,
      "learning_rate": 3.851608682445852e-05,
      "loss": 0.8736,
      "step": 262540
    },
    {
      "epoch": 0.9201722935438987,
      "grad_norm": 2.90625,
      "learning_rate": 3.8515437795794815e-05,
      "loss": 0.9166,
      "step": 262550
    },
    {
      "epoch": 0.9202073410507944,
      "grad_norm": 3.03125,
      "learning_rate": 3.8514788767131116e-05,
      "loss": 0.8658,
      "step": 262560
    },
    {
      "epoch": 0.9202423885576899,
      "grad_norm": 3.359375,
      "learning_rate": 3.851413973846741e-05,
      "loss": 0.9042,
      "step": 262570
    },
    {
      "epoch": 0.9202774360645856,
      "grad_norm": 2.984375,
      "learning_rate": 3.851349070980371e-05,
      "loss": 0.9057,
      "step": 262580
    },
    {
      "epoch": 0.9203124835714811,
      "grad_norm": 3.53125,
      "learning_rate": 3.851284168114001e-05,
      "loss": 0.9402,
      "step": 262590
    },
    {
      "epoch": 0.9203475310783767,
      "grad_norm": 3.046875,
      "learning_rate": 3.85121926524763e-05,
      "loss": 0.9563,
      "step": 262600
    },
    {
      "epoch": 0.9203825785852723,
      "grad_norm": 2.984375,
      "learning_rate": 3.85115436238126e-05,
      "loss": 0.9265,
      "step": 262610
    },
    {
      "epoch": 0.9204176260921679,
      "grad_norm": 3.421875,
      "learning_rate": 3.85108945951489e-05,
      "loss": 0.9545,
      "step": 262620
    },
    {
      "epoch": 0.9204526735990636,
      "grad_norm": 2.84375,
      "learning_rate": 3.85102455664852e-05,
      "loss": 0.9129,
      "step": 262630
    },
    {
      "epoch": 0.9204877211059591,
      "grad_norm": 2.640625,
      "learning_rate": 3.8509596537821493e-05,
      "loss": 0.91,
      "step": 262640
    },
    {
      "epoch": 0.9205227686128548,
      "grad_norm": 2.78125,
      "learning_rate": 3.8508947509157795e-05,
      "loss": 0.9731,
      "step": 262650
    },
    {
      "epoch": 0.9205578161197503,
      "grad_norm": 2.859375,
      "learning_rate": 3.850829848049409e-05,
      "loss": 0.9218,
      "step": 262660
    },
    {
      "epoch": 0.9205928636266459,
      "grad_norm": 3.15625,
      "learning_rate": 3.850764945183039e-05,
      "loss": 0.8816,
      "step": 262670
    },
    {
      "epoch": 0.9206279111335415,
      "grad_norm": 2.90625,
      "learning_rate": 3.850700042316669e-05,
      "loss": 0.8378,
      "step": 262680
    },
    {
      "epoch": 0.9206629586404371,
      "grad_norm": 2.484375,
      "learning_rate": 3.850635139450299e-05,
      "loss": 0.9763,
      "step": 262690
    },
    {
      "epoch": 0.9206980061473327,
      "grad_norm": 2.96875,
      "learning_rate": 3.850570236583929e-05,
      "loss": 0.9013,
      "step": 262700
    },
    {
      "epoch": 0.9207330536542283,
      "grad_norm": 2.734375,
      "learning_rate": 3.850505333717558e-05,
      "loss": 0.8403,
      "step": 262710
    },
    {
      "epoch": 0.920768101161124,
      "grad_norm": 3.203125,
      "learning_rate": 3.8504404308511884e-05,
      "loss": 0.895,
      "step": 262720
    },
    {
      "epoch": 0.9208031486680195,
      "grad_norm": 2.734375,
      "learning_rate": 3.850375527984818e-05,
      "loss": 0.8736,
      "step": 262730
    },
    {
      "epoch": 0.9208381961749151,
      "grad_norm": 2.421875,
      "learning_rate": 3.850310625118448e-05,
      "loss": 0.8159,
      "step": 262740
    },
    {
      "epoch": 0.9208732436818107,
      "grad_norm": 3.03125,
      "learning_rate": 3.8502457222520775e-05,
      "loss": 0.8289,
      "step": 262750
    },
    {
      "epoch": 0.9209082911887063,
      "grad_norm": 2.78125,
      "learning_rate": 3.8501808193857076e-05,
      "loss": 0.926,
      "step": 262760
    },
    {
      "epoch": 0.9209433386956019,
      "grad_norm": 2.9375,
      "learning_rate": 3.850115916519337e-05,
      "loss": 0.8658,
      "step": 262770
    },
    {
      "epoch": 0.9209783862024975,
      "grad_norm": 2.765625,
      "learning_rate": 3.850051013652967e-05,
      "loss": 0.8638,
      "step": 262780
    },
    {
      "epoch": 0.921013433709393,
      "grad_norm": 3.0,
      "learning_rate": 3.849986110786597e-05,
      "loss": 0.8104,
      "step": 262790
    },
    {
      "epoch": 0.9210484812162887,
      "grad_norm": 3.28125,
      "learning_rate": 3.849921207920227e-05,
      "loss": 0.8741,
      "step": 262800
    },
    {
      "epoch": 0.9210835287231843,
      "grad_norm": 2.890625,
      "learning_rate": 3.849856305053857e-05,
      "loss": 0.916,
      "step": 262810
    },
    {
      "epoch": 0.9211185762300799,
      "grad_norm": 3.203125,
      "learning_rate": 3.8497914021874864e-05,
      "loss": 0.8182,
      "step": 262820
    },
    {
      "epoch": 0.9211536237369755,
      "grad_norm": 2.578125,
      "learning_rate": 3.8497264993211166e-05,
      "loss": 0.9119,
      "step": 262830
    },
    {
      "epoch": 0.921188671243871,
      "grad_norm": 3.0625,
      "learning_rate": 3.849661596454746e-05,
      "loss": 0.8024,
      "step": 262840
    },
    {
      "epoch": 0.9212237187507667,
      "grad_norm": 2.78125,
      "learning_rate": 3.849596693588376e-05,
      "loss": 0.9437,
      "step": 262850
    },
    {
      "epoch": 0.9212587662576622,
      "grad_norm": 2.75,
      "learning_rate": 3.8495317907220056e-05,
      "loss": 0.8973,
      "step": 262860
    },
    {
      "epoch": 0.9212938137645579,
      "grad_norm": 2.78125,
      "learning_rate": 3.849466887855636e-05,
      "loss": 0.8105,
      "step": 262870
    },
    {
      "epoch": 0.9213288612714534,
      "grad_norm": 2.90625,
      "learning_rate": 3.849401984989265e-05,
      "loss": 0.9178,
      "step": 262880
    },
    {
      "epoch": 0.9213639087783491,
      "grad_norm": 3.015625,
      "learning_rate": 3.8493370821228954e-05,
      "loss": 0.8536,
      "step": 262890
    },
    {
      "epoch": 0.9213989562852446,
      "grad_norm": 3.03125,
      "learning_rate": 3.849272179256525e-05,
      "loss": 0.8424,
      "step": 262900
    },
    {
      "epoch": 0.9214340037921402,
      "grad_norm": 3.296875,
      "learning_rate": 3.849207276390155e-05,
      "loss": 0.8486,
      "step": 262910
    },
    {
      "epoch": 0.9214690512990359,
      "grad_norm": 2.984375,
      "learning_rate": 3.8491423735237844e-05,
      "loss": 0.8931,
      "step": 262920
    },
    {
      "epoch": 0.9215040988059314,
      "grad_norm": 2.8125,
      "learning_rate": 3.8490774706574146e-05,
      "loss": 0.8704,
      "step": 262930
    },
    {
      "epoch": 0.9215391463128271,
      "grad_norm": 2.796875,
      "learning_rate": 3.849012567791044e-05,
      "loss": 0.901,
      "step": 262940
    },
    {
      "epoch": 0.9215741938197226,
      "grad_norm": 2.71875,
      "learning_rate": 3.848947664924674e-05,
      "loss": 0.8491,
      "step": 262950
    },
    {
      "epoch": 0.9216092413266183,
      "grad_norm": 2.78125,
      "learning_rate": 3.848882762058304e-05,
      "loss": 0.8185,
      "step": 262960
    },
    {
      "epoch": 0.9216442888335138,
      "grad_norm": 2.484375,
      "learning_rate": 3.848817859191933e-05,
      "loss": 0.8222,
      "step": 262970
    },
    {
      "epoch": 0.9216793363404094,
      "grad_norm": 2.65625,
      "learning_rate": 3.848752956325563e-05,
      "loss": 0.8041,
      "step": 262980
    },
    {
      "epoch": 0.921714383847305,
      "grad_norm": 3.046875,
      "learning_rate": 3.848688053459193e-05,
      "loss": 0.9235,
      "step": 262990
    },
    {
      "epoch": 0.9217494313542006,
      "grad_norm": 2.625,
      "learning_rate": 3.848623150592823e-05,
      "loss": 0.8642,
      "step": 263000
    },
    {
      "epoch": 0.9217844788610963,
      "grad_norm": 2.765625,
      "learning_rate": 3.848558247726452e-05,
      "loss": 0.9522,
      "step": 263010
    },
    {
      "epoch": 0.9218195263679918,
      "grad_norm": 3.1875,
      "learning_rate": 3.8484933448600824e-05,
      "loss": 0.8927,
      "step": 263020
    },
    {
      "epoch": 0.9218545738748875,
      "grad_norm": 3.109375,
      "learning_rate": 3.8484284419937126e-05,
      "loss": 0.8244,
      "step": 263030
    },
    {
      "epoch": 0.921889621381783,
      "grad_norm": 3.21875,
      "learning_rate": 3.848363539127342e-05,
      "loss": 0.8807,
      "step": 263040
    },
    {
      "epoch": 0.9219246688886786,
      "grad_norm": 2.59375,
      "learning_rate": 3.848298636260972e-05,
      "loss": 0.8463,
      "step": 263050
    },
    {
      "epoch": 0.9219597163955742,
      "grad_norm": 2.9375,
      "learning_rate": 3.8482337333946016e-05,
      "loss": 0.928,
      "step": 263060
    },
    {
      "epoch": 0.9219947639024698,
      "grad_norm": 3.046875,
      "learning_rate": 3.848168830528232e-05,
      "loss": 0.862,
      "step": 263070
    },
    {
      "epoch": 0.9220298114093654,
      "grad_norm": 3.28125,
      "learning_rate": 3.848103927661861e-05,
      "loss": 0.9251,
      "step": 263080
    },
    {
      "epoch": 0.922064858916261,
      "grad_norm": 2.828125,
      "learning_rate": 3.8480390247954914e-05,
      "loss": 0.8101,
      "step": 263090
    },
    {
      "epoch": 0.9220999064231565,
      "grad_norm": 2.515625,
      "learning_rate": 3.847974121929121e-05,
      "loss": 0.8403,
      "step": 263100
    },
    {
      "epoch": 0.9221349539300522,
      "grad_norm": 2.75,
      "learning_rate": 3.847909219062751e-05,
      "loss": 0.8353,
      "step": 263110
    },
    {
      "epoch": 0.9221700014369478,
      "grad_norm": 2.984375,
      "learning_rate": 3.8478443161963804e-05,
      "loss": 1.0221,
      "step": 263120
    },
    {
      "epoch": 0.9222050489438434,
      "grad_norm": 2.953125,
      "learning_rate": 3.8477794133300106e-05,
      "loss": 0.8409,
      "step": 263130
    },
    {
      "epoch": 0.922240096450739,
      "grad_norm": 3.0,
      "learning_rate": 3.84771451046364e-05,
      "loss": 0.8661,
      "step": 263140
    },
    {
      "epoch": 0.9222751439576345,
      "grad_norm": 3.0,
      "learning_rate": 3.84764960759727e-05,
      "loss": 0.9007,
      "step": 263150
    },
    {
      "epoch": 0.9223101914645302,
      "grad_norm": 3.0625,
      "learning_rate": 3.8475847047308996e-05,
      "loss": 0.8942,
      "step": 263160
    },
    {
      "epoch": 0.9223452389714257,
      "grad_norm": 2.625,
      "learning_rate": 3.84751980186453e-05,
      "loss": 0.9543,
      "step": 263170
    },
    {
      "epoch": 0.9223802864783214,
      "grad_norm": 2.625,
      "learning_rate": 3.84745489899816e-05,
      "loss": 0.8828,
      "step": 263180
    },
    {
      "epoch": 0.9224153339852169,
      "grad_norm": 3.03125,
      "learning_rate": 3.8473899961317894e-05,
      "loss": 0.9179,
      "step": 263190
    },
    {
      "epoch": 0.9224503814921126,
      "grad_norm": 2.484375,
      "learning_rate": 3.8473250932654195e-05,
      "loss": 0.8809,
      "step": 263200
    },
    {
      "epoch": 0.9224854289990082,
      "grad_norm": 3.375,
      "learning_rate": 3.847260190399049e-05,
      "loss": 0.8984,
      "step": 263210
    },
    {
      "epoch": 0.9225204765059037,
      "grad_norm": 3.109375,
      "learning_rate": 3.847195287532679e-05,
      "loss": 0.8648,
      "step": 263220
    },
    {
      "epoch": 0.9225555240127994,
      "grad_norm": 3.09375,
      "learning_rate": 3.8471303846663086e-05,
      "loss": 0.874,
      "step": 263230
    },
    {
      "epoch": 0.9225905715196949,
      "grad_norm": 2.921875,
      "learning_rate": 3.847065481799939e-05,
      "loss": 0.7997,
      "step": 263240
    },
    {
      "epoch": 0.9226256190265906,
      "grad_norm": 2.59375,
      "learning_rate": 3.847000578933568e-05,
      "loss": 0.8363,
      "step": 263250
    },
    {
      "epoch": 0.9226606665334861,
      "grad_norm": 2.6875,
      "learning_rate": 3.846935676067198e-05,
      "loss": 0.8337,
      "step": 263260
    },
    {
      "epoch": 0.9226957140403818,
      "grad_norm": 2.9375,
      "learning_rate": 3.846870773200828e-05,
      "loss": 0.841,
      "step": 263270
    },
    {
      "epoch": 0.9227307615472773,
      "grad_norm": 2.984375,
      "learning_rate": 3.846805870334458e-05,
      "loss": 0.8491,
      "step": 263280
    },
    {
      "epoch": 0.9227658090541729,
      "grad_norm": 2.59375,
      "learning_rate": 3.8467409674680874e-05,
      "loss": 0.782,
      "step": 263290
    },
    {
      "epoch": 0.9228008565610686,
      "grad_norm": 2.875,
      "learning_rate": 3.8466760646017175e-05,
      "loss": 0.9251,
      "step": 263300
    },
    {
      "epoch": 0.9228359040679641,
      "grad_norm": 2.859375,
      "learning_rate": 3.8466111617353476e-05,
      "loss": 0.8657,
      "step": 263310
    },
    {
      "epoch": 0.9228709515748598,
      "grad_norm": 3.109375,
      "learning_rate": 3.846546258868977e-05,
      "loss": 0.8565,
      "step": 263320
    },
    {
      "epoch": 0.9229059990817553,
      "grad_norm": 2.703125,
      "learning_rate": 3.846481356002607e-05,
      "loss": 0.8761,
      "step": 263330
    },
    {
      "epoch": 0.922941046588651,
      "grad_norm": 2.5625,
      "learning_rate": 3.846416453136236e-05,
      "loss": 0.8369,
      "step": 263340
    },
    {
      "epoch": 0.9229760940955465,
      "grad_norm": 2.96875,
      "learning_rate": 3.846351550269866e-05,
      "loss": 0.9577,
      "step": 263350
    },
    {
      "epoch": 0.9230111416024421,
      "grad_norm": 2.296875,
      "learning_rate": 3.8462866474034956e-05,
      "loss": 0.8813,
      "step": 263360
    },
    {
      "epoch": 0.9230461891093377,
      "grad_norm": 3.125,
      "learning_rate": 3.846221744537126e-05,
      "loss": 0.8887,
      "step": 263370
    },
    {
      "epoch": 0.9230812366162333,
      "grad_norm": 2.9375,
      "learning_rate": 3.846156841670755e-05,
      "loss": 0.7559,
      "step": 263380
    },
    {
      "epoch": 0.9231162841231288,
      "grad_norm": 2.953125,
      "learning_rate": 3.8460919388043854e-05,
      "loss": 0.9159,
      "step": 263390
    },
    {
      "epoch": 0.9231513316300245,
      "grad_norm": 3.296875,
      "learning_rate": 3.8460270359380155e-05,
      "loss": 0.9191,
      "step": 263400
    },
    {
      "epoch": 0.9231863791369201,
      "grad_norm": 2.71875,
      "learning_rate": 3.845962133071645e-05,
      "loss": 0.8888,
      "step": 263410
    },
    {
      "epoch": 0.9232214266438157,
      "grad_norm": 2.90625,
      "learning_rate": 3.845897230205275e-05,
      "loss": 0.8665,
      "step": 263420
    },
    {
      "epoch": 0.9232564741507113,
      "grad_norm": 2.875,
      "learning_rate": 3.8458323273389046e-05,
      "loss": 0.816,
      "step": 263430
    },
    {
      "epoch": 0.9232915216576069,
      "grad_norm": 2.640625,
      "learning_rate": 3.845767424472535e-05,
      "loss": 0.8435,
      "step": 263440
    },
    {
      "epoch": 0.9233265691645025,
      "grad_norm": 3.109375,
      "learning_rate": 3.845702521606164e-05,
      "loss": 0.8762,
      "step": 263450
    },
    {
      "epoch": 0.923361616671398,
      "grad_norm": 2.6875,
      "learning_rate": 3.845637618739794e-05,
      "loss": 0.8587,
      "step": 263460
    },
    {
      "epoch": 0.9233966641782937,
      "grad_norm": 2.96875,
      "learning_rate": 3.845572715873424e-05,
      "loss": 0.8927,
      "step": 263470
    },
    {
      "epoch": 0.9234317116851892,
      "grad_norm": 3.234375,
      "learning_rate": 3.845507813007054e-05,
      "loss": 0.814,
      "step": 263480
    },
    {
      "epoch": 0.9234667591920849,
      "grad_norm": 2.875,
      "learning_rate": 3.8454429101406834e-05,
      "loss": 0.8434,
      "step": 263490
    },
    {
      "epoch": 0.9235018066989805,
      "grad_norm": 3.3125,
      "learning_rate": 3.8453780072743135e-05,
      "loss": 0.9197,
      "step": 263500
    },
    {
      "epoch": 0.9235368542058761,
      "grad_norm": 2.703125,
      "learning_rate": 3.845313104407943e-05,
      "loss": 0.9191,
      "step": 263510
    },
    {
      "epoch": 0.9235719017127717,
      "grad_norm": 2.53125,
      "learning_rate": 3.845248201541573e-05,
      "loss": 0.8295,
      "step": 263520
    },
    {
      "epoch": 0.9236069492196672,
      "grad_norm": 3.1875,
      "learning_rate": 3.8451832986752026e-05,
      "loss": 0.8976,
      "step": 263530
    },
    {
      "epoch": 0.9236419967265629,
      "grad_norm": 3.28125,
      "learning_rate": 3.845118395808833e-05,
      "loss": 0.8788,
      "step": 263540
    },
    {
      "epoch": 0.9236770442334584,
      "grad_norm": 2.875,
      "learning_rate": 3.845053492942463e-05,
      "loss": 0.9294,
      "step": 263550
    },
    {
      "epoch": 0.9237120917403541,
      "grad_norm": 2.765625,
      "learning_rate": 3.844988590076092e-05,
      "loss": 0.9398,
      "step": 263560
    },
    {
      "epoch": 0.9237471392472496,
      "grad_norm": 3.078125,
      "learning_rate": 3.8449236872097224e-05,
      "loss": 0.8484,
      "step": 263570
    },
    {
      "epoch": 0.9237821867541453,
      "grad_norm": 3.390625,
      "learning_rate": 3.844858784343352e-05,
      "loss": 0.8306,
      "step": 263580
    },
    {
      "epoch": 0.9238172342610408,
      "grad_norm": 2.65625,
      "learning_rate": 3.844793881476982e-05,
      "loss": 0.9607,
      "step": 263590
    },
    {
      "epoch": 0.9238522817679364,
      "grad_norm": 2.78125,
      "learning_rate": 3.8447289786106115e-05,
      "loss": 0.8689,
      "step": 263600
    },
    {
      "epoch": 0.9238873292748321,
      "grad_norm": 2.53125,
      "learning_rate": 3.8446640757442416e-05,
      "loss": 0.8873,
      "step": 263610
    },
    {
      "epoch": 0.9239223767817276,
      "grad_norm": 3.15625,
      "learning_rate": 3.844599172877871e-05,
      "loss": 0.8638,
      "step": 263620
    },
    {
      "epoch": 0.9239574242886233,
      "grad_norm": 2.6875,
      "learning_rate": 3.844534270011501e-05,
      "loss": 0.9417,
      "step": 263630
    },
    {
      "epoch": 0.9239924717955188,
      "grad_norm": 2.78125,
      "learning_rate": 3.844469367145131e-05,
      "loss": 0.8802,
      "step": 263640
    },
    {
      "epoch": 0.9240275193024144,
      "grad_norm": 3.078125,
      "learning_rate": 3.844404464278761e-05,
      "loss": 0.9106,
      "step": 263650
    },
    {
      "epoch": 0.92406256680931,
      "grad_norm": 2.578125,
      "learning_rate": 3.84433956141239e-05,
      "loss": 0.9306,
      "step": 263660
    },
    {
      "epoch": 0.9240976143162056,
      "grad_norm": 2.609375,
      "learning_rate": 3.8442746585460204e-05,
      "loss": 0.8383,
      "step": 263670
    },
    {
      "epoch": 0.9241326618231012,
      "grad_norm": 2.859375,
      "learning_rate": 3.8442097556796506e-05,
      "loss": 0.8811,
      "step": 263680
    },
    {
      "epoch": 0.9241677093299968,
      "grad_norm": 2.984375,
      "learning_rate": 3.84414485281328e-05,
      "loss": 0.8895,
      "step": 263690
    },
    {
      "epoch": 0.9242027568368925,
      "grad_norm": 2.796875,
      "learning_rate": 3.84407994994691e-05,
      "loss": 0.8285,
      "step": 263700
    },
    {
      "epoch": 0.924237804343788,
      "grad_norm": 3.109375,
      "learning_rate": 3.8440150470805396e-05,
      "loss": 0.8711,
      "step": 263710
    },
    {
      "epoch": 0.9242728518506836,
      "grad_norm": 3.015625,
      "learning_rate": 3.843950144214169e-05,
      "loss": 0.8466,
      "step": 263720
    },
    {
      "epoch": 0.9243078993575792,
      "grad_norm": 3.09375,
      "learning_rate": 3.8438852413477986e-05,
      "loss": 0.8617,
      "step": 263730
    },
    {
      "epoch": 0.9243429468644748,
      "grad_norm": 2.796875,
      "learning_rate": 3.843820338481429e-05,
      "loss": 0.9061,
      "step": 263740
    },
    {
      "epoch": 0.9243779943713704,
      "grad_norm": 3.03125,
      "learning_rate": 3.843755435615058e-05,
      "loss": 0.9237,
      "step": 263750
    },
    {
      "epoch": 0.924413041878266,
      "grad_norm": 2.84375,
      "learning_rate": 3.843690532748688e-05,
      "loss": 0.9174,
      "step": 263760
    },
    {
      "epoch": 0.9244480893851615,
      "grad_norm": 3.0625,
      "learning_rate": 3.8436256298823184e-05,
      "loss": 0.8734,
      "step": 263770
    },
    {
      "epoch": 0.9244831368920572,
      "grad_norm": 2.625,
      "learning_rate": 3.843560727015948e-05,
      "loss": 0.899,
      "step": 263780
    },
    {
      "epoch": 0.9245181843989528,
      "grad_norm": 2.859375,
      "learning_rate": 3.843495824149578e-05,
      "loss": 0.8882,
      "step": 263790
    },
    {
      "epoch": 0.9245532319058484,
      "grad_norm": 2.84375,
      "learning_rate": 3.8434309212832075e-05,
      "loss": 0.8796,
      "step": 263800
    },
    {
      "epoch": 0.924588279412744,
      "grad_norm": 2.9375,
      "learning_rate": 3.8433660184168376e-05,
      "loss": 0.9245,
      "step": 263810
    },
    {
      "epoch": 0.9246233269196396,
      "grad_norm": 2.65625,
      "learning_rate": 3.843301115550467e-05,
      "loss": 0.8018,
      "step": 263820
    },
    {
      "epoch": 0.9246583744265352,
      "grad_norm": 2.84375,
      "learning_rate": 3.843236212684097e-05,
      "loss": 0.8357,
      "step": 263830
    },
    {
      "epoch": 0.9246934219334307,
      "grad_norm": 3.046875,
      "learning_rate": 3.843171309817727e-05,
      "loss": 0.941,
      "step": 263840
    },
    {
      "epoch": 0.9247284694403264,
      "grad_norm": 3.359375,
      "learning_rate": 3.843106406951357e-05,
      "loss": 0.9737,
      "step": 263850
    },
    {
      "epoch": 0.9247635169472219,
      "grad_norm": 3.0,
      "learning_rate": 3.843041504084986e-05,
      "loss": 0.8189,
      "step": 263860
    },
    {
      "epoch": 0.9247985644541176,
      "grad_norm": 2.734375,
      "learning_rate": 3.8429766012186164e-05,
      "loss": 0.8483,
      "step": 263870
    },
    {
      "epoch": 0.9248336119610131,
      "grad_norm": 2.8125,
      "learning_rate": 3.842911698352246e-05,
      "loss": 0.8265,
      "step": 263880
    },
    {
      "epoch": 0.9248686594679087,
      "grad_norm": 2.984375,
      "learning_rate": 3.842846795485876e-05,
      "loss": 0.9224,
      "step": 263890
    },
    {
      "epoch": 0.9249037069748044,
      "grad_norm": 3.109375,
      "learning_rate": 3.8427818926195055e-05,
      "loss": 0.8863,
      "step": 263900
    },
    {
      "epoch": 0.9249387544816999,
      "grad_norm": 2.96875,
      "learning_rate": 3.8427169897531356e-05,
      "loss": 0.8893,
      "step": 263910
    },
    {
      "epoch": 0.9249738019885956,
      "grad_norm": 2.984375,
      "learning_rate": 3.842652086886766e-05,
      "loss": 0.8077,
      "step": 263920
    },
    {
      "epoch": 0.9250088494954911,
      "grad_norm": 2.796875,
      "learning_rate": 3.842587184020395e-05,
      "loss": 0.9358,
      "step": 263930
    },
    {
      "epoch": 0.9250438970023868,
      "grad_norm": 3.15625,
      "learning_rate": 3.8425222811540254e-05,
      "loss": 0.8889,
      "step": 263940
    },
    {
      "epoch": 0.9250789445092823,
      "grad_norm": 2.625,
      "learning_rate": 3.842457378287655e-05,
      "loss": 0.8805,
      "step": 263950
    },
    {
      "epoch": 0.9251139920161779,
      "grad_norm": 3.34375,
      "learning_rate": 3.842392475421285e-05,
      "loss": 0.9115,
      "step": 263960
    },
    {
      "epoch": 0.9251490395230735,
      "grad_norm": 3.265625,
      "learning_rate": 3.8423275725549144e-05,
      "loss": 0.8524,
      "step": 263970
    },
    {
      "epoch": 0.9251840870299691,
      "grad_norm": 2.9375,
      "learning_rate": 3.8422626696885446e-05,
      "loss": 0.8647,
      "step": 263980
    },
    {
      "epoch": 0.9252191345368648,
      "grad_norm": 3.15625,
      "learning_rate": 3.842197766822174e-05,
      "loss": 0.8668,
      "step": 263990
    },
    {
      "epoch": 0.9252541820437603,
      "grad_norm": 3.109375,
      "learning_rate": 3.842132863955804e-05,
      "loss": 0.8908,
      "step": 264000
    },
    {
      "epoch": 0.925289229550656,
      "grad_norm": 3.3125,
      "learning_rate": 3.8420679610894336e-05,
      "loss": 0.8641,
      "step": 264010
    },
    {
      "epoch": 0.9253242770575515,
      "grad_norm": 2.9375,
      "learning_rate": 3.842003058223064e-05,
      "loss": 0.9283,
      "step": 264020
    },
    {
      "epoch": 0.9253593245644471,
      "grad_norm": 2.609375,
      "learning_rate": 3.841938155356693e-05,
      "loss": 0.8656,
      "step": 264030
    },
    {
      "epoch": 0.9253943720713427,
      "grad_norm": 3.203125,
      "learning_rate": 3.8418732524903234e-05,
      "loss": 0.8872,
      "step": 264040
    },
    {
      "epoch": 0.9254294195782383,
      "grad_norm": 2.84375,
      "learning_rate": 3.8418083496239535e-05,
      "loss": 0.8761,
      "step": 264050
    },
    {
      "epoch": 0.9254644670851339,
      "grad_norm": 2.578125,
      "learning_rate": 3.841743446757583e-05,
      "loss": 0.8979,
      "step": 264060
    },
    {
      "epoch": 0.9254995145920295,
      "grad_norm": 3.09375,
      "learning_rate": 3.841678543891213e-05,
      "loss": 0.9236,
      "step": 264070
    },
    {
      "epoch": 0.925534562098925,
      "grad_norm": 2.90625,
      "learning_rate": 3.8416136410248426e-05,
      "loss": 0.9007,
      "step": 264080
    },
    {
      "epoch": 0.9255696096058207,
      "grad_norm": 3.25,
      "learning_rate": 3.841548738158472e-05,
      "loss": 0.8347,
      "step": 264090
    },
    {
      "epoch": 0.9256046571127163,
      "grad_norm": 3.296875,
      "learning_rate": 3.8414838352921015e-05,
      "loss": 0.9574,
      "step": 264100
    },
    {
      "epoch": 0.9256397046196119,
      "grad_norm": 2.859375,
      "learning_rate": 3.8414189324257316e-05,
      "loss": 0.8721,
      "step": 264110
    },
    {
      "epoch": 0.9256747521265075,
      "grad_norm": 3.03125,
      "learning_rate": 3.841354029559361e-05,
      "loss": 0.876,
      "step": 264120
    },
    {
      "epoch": 0.925709799633403,
      "grad_norm": 3.109375,
      "learning_rate": 3.841289126692991e-05,
      "loss": 0.9447,
      "step": 264130
    },
    {
      "epoch": 0.9257448471402987,
      "grad_norm": 3.125,
      "learning_rate": 3.8412242238266214e-05,
      "loss": 0.9685,
      "step": 264140
    },
    {
      "epoch": 0.9257798946471942,
      "grad_norm": 2.75,
      "learning_rate": 3.841159320960251e-05,
      "loss": 0.8494,
      "step": 264150
    },
    {
      "epoch": 0.9258149421540899,
      "grad_norm": 3.171875,
      "learning_rate": 3.841094418093881e-05,
      "loss": 0.8953,
      "step": 264160
    },
    {
      "epoch": 0.9258499896609854,
      "grad_norm": 3.546875,
      "learning_rate": 3.8410295152275104e-05,
      "loss": 0.8357,
      "step": 264170
    },
    {
      "epoch": 0.9258850371678811,
      "grad_norm": 2.84375,
      "learning_rate": 3.8409646123611406e-05,
      "loss": 0.8526,
      "step": 264180
    },
    {
      "epoch": 0.9259200846747767,
      "grad_norm": 2.734375,
      "learning_rate": 3.84089970949477e-05,
      "loss": 0.8015,
      "step": 264190
    },
    {
      "epoch": 0.9259551321816722,
      "grad_norm": 2.875,
      "learning_rate": 3.8408348066284e-05,
      "loss": 0.8785,
      "step": 264200
    },
    {
      "epoch": 0.9259901796885679,
      "grad_norm": 3.09375,
      "learning_rate": 3.8407699037620296e-05,
      "loss": 0.9634,
      "step": 264210
    },
    {
      "epoch": 0.9260252271954634,
      "grad_norm": 3.09375,
      "learning_rate": 3.84070500089566e-05,
      "loss": 0.9551,
      "step": 264220
    },
    {
      "epoch": 0.9260602747023591,
      "grad_norm": 3.1875,
      "learning_rate": 3.840640098029289e-05,
      "loss": 0.9165,
      "step": 264230
    },
    {
      "epoch": 0.9260953222092546,
      "grad_norm": 2.8125,
      "learning_rate": 3.8405751951629194e-05,
      "loss": 0.829,
      "step": 264240
    },
    {
      "epoch": 0.9261303697161503,
      "grad_norm": 3.078125,
      "learning_rate": 3.840510292296549e-05,
      "loss": 0.8063,
      "step": 264250
    },
    {
      "epoch": 0.9261654172230458,
      "grad_norm": 3.15625,
      "learning_rate": 3.840445389430179e-05,
      "loss": 0.9789,
      "step": 264260
    },
    {
      "epoch": 0.9262004647299414,
      "grad_norm": 2.9375,
      "learning_rate": 3.840380486563809e-05,
      "loss": 0.9054,
      "step": 264270
    },
    {
      "epoch": 0.926235512236837,
      "grad_norm": 2.34375,
      "learning_rate": 3.8403155836974386e-05,
      "loss": 0.8798,
      "step": 264280
    },
    {
      "epoch": 0.9262705597437326,
      "grad_norm": 3.203125,
      "learning_rate": 3.840250680831069e-05,
      "loss": 0.9178,
      "step": 264290
    },
    {
      "epoch": 0.9263056072506283,
      "grad_norm": 2.578125,
      "learning_rate": 3.840185777964698e-05,
      "loss": 0.8718,
      "step": 264300
    },
    {
      "epoch": 0.9263406547575238,
      "grad_norm": 2.84375,
      "learning_rate": 3.840120875098328e-05,
      "loss": 0.9551,
      "step": 264310
    },
    {
      "epoch": 0.9263757022644195,
      "grad_norm": 3.078125,
      "learning_rate": 3.840055972231958e-05,
      "loss": 0.8913,
      "step": 264320
    },
    {
      "epoch": 0.926410749771315,
      "grad_norm": 2.921875,
      "learning_rate": 3.839991069365588e-05,
      "loss": 0.8639,
      "step": 264330
    },
    {
      "epoch": 0.9264457972782106,
      "grad_norm": 3.015625,
      "learning_rate": 3.8399261664992174e-05,
      "loss": 0.8873,
      "step": 264340
    },
    {
      "epoch": 0.9264808447851062,
      "grad_norm": 2.984375,
      "learning_rate": 3.8398612636328475e-05,
      "loss": 0.8672,
      "step": 264350
    },
    {
      "epoch": 0.9265158922920018,
      "grad_norm": 2.96875,
      "learning_rate": 3.839796360766477e-05,
      "loss": 0.9201,
      "step": 264360
    },
    {
      "epoch": 0.9265509397988974,
      "grad_norm": 2.984375,
      "learning_rate": 3.839731457900107e-05,
      "loss": 0.8587,
      "step": 264370
    },
    {
      "epoch": 0.926585987305793,
      "grad_norm": 2.84375,
      "learning_rate": 3.8396665550337366e-05,
      "loss": 0.8305,
      "step": 264380
    },
    {
      "epoch": 0.9266210348126886,
      "grad_norm": 3.25,
      "learning_rate": 3.839601652167367e-05,
      "loss": 0.8289,
      "step": 264390
    },
    {
      "epoch": 0.9266560823195842,
      "grad_norm": 2.875,
      "learning_rate": 3.839536749300996e-05,
      "loss": 0.8417,
      "step": 264400
    },
    {
      "epoch": 0.9266911298264798,
      "grad_norm": 2.765625,
      "learning_rate": 3.839471846434626e-05,
      "loss": 0.8228,
      "step": 264410
    },
    {
      "epoch": 0.9267261773333754,
      "grad_norm": 2.984375,
      "learning_rate": 3.8394069435682564e-05,
      "loss": 0.9223,
      "step": 264420
    },
    {
      "epoch": 0.926761224840271,
      "grad_norm": 2.859375,
      "learning_rate": 3.839342040701886e-05,
      "loss": 0.8371,
      "step": 264430
    },
    {
      "epoch": 0.9267962723471665,
      "grad_norm": 2.53125,
      "learning_rate": 3.839277137835516e-05,
      "loss": 0.8632,
      "step": 264440
    },
    {
      "epoch": 0.9268313198540622,
      "grad_norm": 2.96875,
      "learning_rate": 3.8392122349691455e-05,
      "loss": 0.8732,
      "step": 264450
    },
    {
      "epoch": 0.9268663673609577,
      "grad_norm": 3.03125,
      "learning_rate": 3.8391473321027756e-05,
      "loss": 0.8997,
      "step": 264460
    },
    {
      "epoch": 0.9269014148678534,
      "grad_norm": 2.421875,
      "learning_rate": 3.8390824292364044e-05,
      "loss": 0.9127,
      "step": 264470
    },
    {
      "epoch": 0.926936462374749,
      "grad_norm": 2.84375,
      "learning_rate": 3.8390175263700346e-05,
      "loss": 0.8714,
      "step": 264480
    },
    {
      "epoch": 0.9269715098816446,
      "grad_norm": 2.734375,
      "learning_rate": 3.838952623503664e-05,
      "loss": 0.8269,
      "step": 264490
    },
    {
      "epoch": 0.9270065573885402,
      "grad_norm": 2.6875,
      "learning_rate": 3.838887720637294e-05,
      "loss": 0.8378,
      "step": 264500
    },
    {
      "epoch": 0.9270416048954357,
      "grad_norm": 2.671875,
      "learning_rate": 3.838822817770924e-05,
      "loss": 0.862,
      "step": 264510
    },
    {
      "epoch": 0.9270766524023314,
      "grad_norm": 2.96875,
      "learning_rate": 3.838757914904554e-05,
      "loss": 0.8652,
      "step": 264520
    },
    {
      "epoch": 0.9271116999092269,
      "grad_norm": 3.0,
      "learning_rate": 3.838693012038184e-05,
      "loss": 0.8452,
      "step": 264530
    },
    {
      "epoch": 0.9271467474161226,
      "grad_norm": 3.0,
      "learning_rate": 3.8386281091718134e-05,
      "loss": 0.8885,
      "step": 264540
    },
    {
      "epoch": 0.9271817949230181,
      "grad_norm": 2.796875,
      "learning_rate": 3.8385632063054435e-05,
      "loss": 0.8229,
      "step": 264550
    },
    {
      "epoch": 0.9272168424299138,
      "grad_norm": 3.125,
      "learning_rate": 3.838498303439073e-05,
      "loss": 0.9456,
      "step": 264560
    },
    {
      "epoch": 0.9272518899368093,
      "grad_norm": 3.234375,
      "learning_rate": 3.838433400572703e-05,
      "loss": 0.9207,
      "step": 264570
    },
    {
      "epoch": 0.9272869374437049,
      "grad_norm": 3.171875,
      "learning_rate": 3.8383684977063326e-05,
      "loss": 0.8944,
      "step": 264580
    },
    {
      "epoch": 0.9273219849506006,
      "grad_norm": 2.9375,
      "learning_rate": 3.838303594839963e-05,
      "loss": 0.9406,
      "step": 264590
    },
    {
      "epoch": 0.9273570324574961,
      "grad_norm": 3.09375,
      "learning_rate": 3.838238691973592e-05,
      "loss": 0.8646,
      "step": 264600
    },
    {
      "epoch": 0.9273920799643918,
      "grad_norm": 2.515625,
      "learning_rate": 3.838173789107222e-05,
      "loss": 0.8184,
      "step": 264610
    },
    {
      "epoch": 0.9274271274712873,
      "grad_norm": 2.875,
      "learning_rate": 3.838108886240852e-05,
      "loss": 0.9761,
      "step": 264620
    },
    {
      "epoch": 0.927462174978183,
      "grad_norm": 3.078125,
      "learning_rate": 3.838043983374482e-05,
      "loss": 0.9023,
      "step": 264630
    },
    {
      "epoch": 0.9274972224850785,
      "grad_norm": 3.3125,
      "learning_rate": 3.837979080508112e-05,
      "loss": 0.9226,
      "step": 264640
    },
    {
      "epoch": 0.9275322699919741,
      "grad_norm": 2.65625,
      "learning_rate": 3.8379141776417415e-05,
      "loss": 0.8474,
      "step": 264650
    },
    {
      "epoch": 0.9275673174988697,
      "grad_norm": 2.578125,
      "learning_rate": 3.8378492747753716e-05,
      "loss": 0.9247,
      "step": 264660
    },
    {
      "epoch": 0.9276023650057653,
      "grad_norm": 2.953125,
      "learning_rate": 3.837784371909001e-05,
      "loss": 0.9215,
      "step": 264670
    },
    {
      "epoch": 0.927637412512661,
      "grad_norm": 3.3125,
      "learning_rate": 3.837719469042631e-05,
      "loss": 0.9218,
      "step": 264680
    },
    {
      "epoch": 0.9276724600195565,
      "grad_norm": 3.171875,
      "learning_rate": 3.837654566176261e-05,
      "loss": 0.8718,
      "step": 264690
    },
    {
      "epoch": 0.9277075075264521,
      "grad_norm": 3.21875,
      "learning_rate": 3.837589663309891e-05,
      "loss": 0.9366,
      "step": 264700
    },
    {
      "epoch": 0.9277425550333477,
      "grad_norm": 3.171875,
      "learning_rate": 3.83752476044352e-05,
      "loss": 0.8577,
      "step": 264710
    },
    {
      "epoch": 0.9277776025402433,
      "grad_norm": 2.953125,
      "learning_rate": 3.8374598575771504e-05,
      "loss": 0.9273,
      "step": 264720
    },
    {
      "epoch": 0.9278126500471389,
      "grad_norm": 2.765625,
      "learning_rate": 3.83739495471078e-05,
      "loss": 0.8479,
      "step": 264730
    },
    {
      "epoch": 0.9278476975540345,
      "grad_norm": 3.265625,
      "learning_rate": 3.83733005184441e-05,
      "loss": 0.8451,
      "step": 264740
    },
    {
      "epoch": 0.92788274506093,
      "grad_norm": 3.34375,
      "learning_rate": 3.8372651489780395e-05,
      "loss": 0.8719,
      "step": 264750
    },
    {
      "epoch": 0.9279177925678257,
      "grad_norm": 2.453125,
      "learning_rate": 3.8372002461116696e-05,
      "loss": 0.8926,
      "step": 264760
    },
    {
      "epoch": 0.9279528400747212,
      "grad_norm": 3.0,
      "learning_rate": 3.837135343245299e-05,
      "loss": 0.835,
      "step": 264770
    },
    {
      "epoch": 0.9279878875816169,
      "grad_norm": 3.0,
      "learning_rate": 3.837070440378929e-05,
      "loss": 0.8053,
      "step": 264780
    },
    {
      "epoch": 0.9280229350885125,
      "grad_norm": 3.21875,
      "learning_rate": 3.8370055375125594e-05,
      "loss": 0.9497,
      "step": 264790
    },
    {
      "epoch": 0.9280579825954081,
      "grad_norm": 2.9375,
      "learning_rate": 3.836940634646189e-05,
      "loss": 0.8595,
      "step": 264800
    },
    {
      "epoch": 0.9280930301023037,
      "grad_norm": 2.640625,
      "learning_rate": 3.836875731779819e-05,
      "loss": 0.8942,
      "step": 264810
    },
    {
      "epoch": 0.9281280776091992,
      "grad_norm": 2.453125,
      "learning_rate": 3.8368108289134484e-05,
      "loss": 0.8269,
      "step": 264820
    },
    {
      "epoch": 0.9281631251160949,
      "grad_norm": 2.640625,
      "learning_rate": 3.8367459260470786e-05,
      "loss": 0.8021,
      "step": 264830
    },
    {
      "epoch": 0.9281981726229904,
      "grad_norm": 2.59375,
      "learning_rate": 3.836681023180708e-05,
      "loss": 0.9019,
      "step": 264840
    },
    {
      "epoch": 0.9282332201298861,
      "grad_norm": 2.703125,
      "learning_rate": 3.8366161203143375e-05,
      "loss": 0.8402,
      "step": 264850
    },
    {
      "epoch": 0.9282682676367816,
      "grad_norm": 2.84375,
      "learning_rate": 3.836551217447967e-05,
      "loss": 0.8577,
      "step": 264860
    },
    {
      "epoch": 0.9283033151436773,
      "grad_norm": 3.625,
      "learning_rate": 3.836486314581597e-05,
      "loss": 0.9011,
      "step": 264870
    },
    {
      "epoch": 0.9283383626505729,
      "grad_norm": 2.828125,
      "learning_rate": 3.836421411715227e-05,
      "loss": 0.9376,
      "step": 264880
    },
    {
      "epoch": 0.9283734101574684,
      "grad_norm": 2.75,
      "learning_rate": 3.836356508848857e-05,
      "loss": 0.9019,
      "step": 264890
    },
    {
      "epoch": 0.9284084576643641,
      "grad_norm": 2.578125,
      "learning_rate": 3.836291605982487e-05,
      "loss": 0.8916,
      "step": 264900
    },
    {
      "epoch": 0.9284435051712596,
      "grad_norm": 2.859375,
      "learning_rate": 3.836226703116116e-05,
      "loss": 0.9054,
      "step": 264910
    },
    {
      "epoch": 0.9284785526781553,
      "grad_norm": 3.046875,
      "learning_rate": 3.8361618002497464e-05,
      "loss": 0.7837,
      "step": 264920
    },
    {
      "epoch": 0.9285136001850508,
      "grad_norm": 2.90625,
      "learning_rate": 3.836096897383376e-05,
      "loss": 0.9308,
      "step": 264930
    },
    {
      "epoch": 0.9285486476919464,
      "grad_norm": 2.578125,
      "learning_rate": 3.836031994517006e-05,
      "loss": 0.9057,
      "step": 264940
    },
    {
      "epoch": 0.928583695198842,
      "grad_norm": 3.15625,
      "learning_rate": 3.8359670916506355e-05,
      "loss": 0.8705,
      "step": 264950
    },
    {
      "epoch": 0.9286187427057376,
      "grad_norm": 3.15625,
      "learning_rate": 3.8359021887842656e-05,
      "loss": 0.8651,
      "step": 264960
    },
    {
      "epoch": 0.9286537902126333,
      "grad_norm": 111.0,
      "learning_rate": 3.835837285917895e-05,
      "loss": 0.8751,
      "step": 264970
    },
    {
      "epoch": 0.9286888377195288,
      "grad_norm": 2.65625,
      "learning_rate": 3.835772383051525e-05,
      "loss": 0.9048,
      "step": 264980
    },
    {
      "epoch": 0.9287238852264245,
      "grad_norm": 2.796875,
      "learning_rate": 3.835707480185155e-05,
      "loss": 0.8328,
      "step": 264990
    },
    {
      "epoch": 0.92875893273332,
      "grad_norm": 2.859375,
      "learning_rate": 3.835642577318785e-05,
      "loss": 0.903,
      "step": 265000
    },
    {
      "epoch": 0.92875893273332,
      "eval_loss": 0.8255337476730347,
      "eval_runtime": 566.9546,
      "eval_samples_per_second": 671.017,
      "eval_steps_per_second": 55.918,
      "step": 265000
    },
    {
      "epoch": 0.9287939802402156,
      "grad_norm": 2.921875,
      "learning_rate": 3.835577674452415e-05,
      "loss": 0.8902,
      "step": 265010
    },
    {
      "epoch": 0.9288290277471112,
      "grad_norm": 2.90625,
      "learning_rate": 3.8355127715860444e-05,
      "loss": 0.9216,
      "step": 265020
    },
    {
      "epoch": 0.9288640752540068,
      "grad_norm": 3.125,
      "learning_rate": 3.8354478687196746e-05,
      "loss": 0.9648,
      "step": 265030
    },
    {
      "epoch": 0.9288991227609024,
      "grad_norm": 2.8125,
      "learning_rate": 3.835382965853304e-05,
      "loss": 0.7976,
      "step": 265040
    },
    {
      "epoch": 0.928934170267798,
      "grad_norm": 3.109375,
      "learning_rate": 3.835318062986934e-05,
      "loss": 0.9198,
      "step": 265050
    },
    {
      "epoch": 0.9289692177746935,
      "grad_norm": 2.671875,
      "learning_rate": 3.8352531601205636e-05,
      "loss": 0.9314,
      "step": 265060
    },
    {
      "epoch": 0.9290042652815892,
      "grad_norm": 2.640625,
      "learning_rate": 3.835188257254194e-05,
      "loss": 0.8637,
      "step": 265070
    },
    {
      "epoch": 0.9290393127884848,
      "grad_norm": 3.171875,
      "learning_rate": 3.835123354387823e-05,
      "loss": 0.8548,
      "step": 265080
    },
    {
      "epoch": 0.9290743602953804,
      "grad_norm": 3.296875,
      "learning_rate": 3.8350584515214534e-05,
      "loss": 0.8685,
      "step": 265090
    },
    {
      "epoch": 0.929109407802276,
      "grad_norm": 2.984375,
      "learning_rate": 3.834993548655083e-05,
      "loss": 0.8893,
      "step": 265100
    },
    {
      "epoch": 0.9291444553091716,
      "grad_norm": 2.671875,
      "learning_rate": 3.834928645788713e-05,
      "loss": 0.9156,
      "step": 265110
    },
    {
      "epoch": 0.9291795028160672,
      "grad_norm": 3.328125,
      "learning_rate": 3.8348637429223424e-05,
      "loss": 0.8307,
      "step": 265120
    },
    {
      "epoch": 0.9292145503229627,
      "grad_norm": 2.75,
      "learning_rate": 3.8347988400559726e-05,
      "loss": 0.8956,
      "step": 265130
    },
    {
      "epoch": 0.9292495978298584,
      "grad_norm": 3.0,
      "learning_rate": 3.834733937189603e-05,
      "loss": 0.8866,
      "step": 265140
    },
    {
      "epoch": 0.9292846453367539,
      "grad_norm": 2.859375,
      "learning_rate": 3.834669034323232e-05,
      "loss": 0.9072,
      "step": 265150
    },
    {
      "epoch": 0.9293196928436496,
      "grad_norm": 2.890625,
      "learning_rate": 3.834604131456862e-05,
      "loss": 0.866,
      "step": 265160
    },
    {
      "epoch": 0.9293547403505452,
      "grad_norm": 2.65625,
      "learning_rate": 3.834539228590492e-05,
      "loss": 0.9008,
      "step": 265170
    },
    {
      "epoch": 0.9293897878574408,
      "grad_norm": 2.640625,
      "learning_rate": 3.834474325724122e-05,
      "loss": 0.9271,
      "step": 265180
    },
    {
      "epoch": 0.9294248353643364,
      "grad_norm": 3.515625,
      "learning_rate": 3.8344094228577514e-05,
      "loss": 0.894,
      "step": 265190
    },
    {
      "epoch": 0.9294598828712319,
      "grad_norm": 3.84375,
      "learning_rate": 3.8343445199913815e-05,
      "loss": 0.8774,
      "step": 265200
    },
    {
      "epoch": 0.9294949303781276,
      "grad_norm": 3.0625,
      "learning_rate": 3.834279617125011e-05,
      "loss": 0.9448,
      "step": 265210
    },
    {
      "epoch": 0.9295299778850231,
      "grad_norm": 3.125,
      "learning_rate": 3.8342147142586404e-05,
      "loss": 0.8991,
      "step": 265220
    },
    {
      "epoch": 0.9295650253919188,
      "grad_norm": 2.65625,
      "learning_rate": 3.8341498113922706e-05,
      "loss": 0.8386,
      "step": 265230
    },
    {
      "epoch": 0.9296000728988143,
      "grad_norm": 2.609375,
      "learning_rate": 3.8340849085259e-05,
      "loss": 0.9142,
      "step": 265240
    },
    {
      "epoch": 0.92963512040571,
      "grad_norm": 2.734375,
      "learning_rate": 3.83402000565953e-05,
      "loss": 0.8935,
      "step": 265250
    },
    {
      "epoch": 0.9296701679126055,
      "grad_norm": 3.03125,
      "learning_rate": 3.8339551027931596e-05,
      "loss": 0.8483,
      "step": 265260
    },
    {
      "epoch": 0.9297052154195011,
      "grad_norm": 2.90625,
      "learning_rate": 3.83389019992679e-05,
      "loss": 0.8553,
      "step": 265270
    },
    {
      "epoch": 0.9297402629263968,
      "grad_norm": 3.0,
      "learning_rate": 3.833825297060419e-05,
      "loss": 0.8255,
      "step": 265280
    },
    {
      "epoch": 0.9297753104332923,
      "grad_norm": 3.453125,
      "learning_rate": 3.8337603941940494e-05,
      "loss": 0.9591,
      "step": 265290
    },
    {
      "epoch": 0.929810357940188,
      "grad_norm": 3.078125,
      "learning_rate": 3.833695491327679e-05,
      "loss": 0.8976,
      "step": 265300
    },
    {
      "epoch": 0.9298454054470835,
      "grad_norm": 2.859375,
      "learning_rate": 3.833630588461309e-05,
      "loss": 0.8582,
      "step": 265310
    },
    {
      "epoch": 0.9298804529539791,
      "grad_norm": 2.8125,
      "learning_rate": 3.8335656855949384e-05,
      "loss": 0.9329,
      "step": 265320
    },
    {
      "epoch": 0.9299155004608747,
      "grad_norm": 2.953125,
      "learning_rate": 3.8335007827285686e-05,
      "loss": 0.7975,
      "step": 265330
    },
    {
      "epoch": 0.9299505479677703,
      "grad_norm": 2.546875,
      "learning_rate": 3.833435879862198e-05,
      "loss": 0.8704,
      "step": 265340
    },
    {
      "epoch": 0.9299855954746659,
      "grad_norm": 3.296875,
      "learning_rate": 3.833370976995828e-05,
      "loss": 0.9094,
      "step": 265350
    },
    {
      "epoch": 0.9300206429815615,
      "grad_norm": 3.71875,
      "learning_rate": 3.8333060741294576e-05,
      "loss": 0.9229,
      "step": 265360
    },
    {
      "epoch": 0.9300556904884572,
      "grad_norm": 2.859375,
      "learning_rate": 3.833241171263088e-05,
      "loss": 0.8811,
      "step": 265370
    },
    {
      "epoch": 0.9300907379953527,
      "grad_norm": 2.8125,
      "learning_rate": 3.833176268396718e-05,
      "loss": 0.8589,
      "step": 265380
    },
    {
      "epoch": 0.9301257855022483,
      "grad_norm": 3.234375,
      "learning_rate": 3.8331113655303474e-05,
      "loss": 0.9034,
      "step": 265390
    },
    {
      "epoch": 0.9301608330091439,
      "grad_norm": 3.0625,
      "learning_rate": 3.8330464626639775e-05,
      "loss": 0.9016,
      "step": 265400
    },
    {
      "epoch": 0.9301958805160395,
      "grad_norm": 2.96875,
      "learning_rate": 3.832981559797607e-05,
      "loss": 0.9318,
      "step": 265410
    },
    {
      "epoch": 0.930230928022935,
      "grad_norm": 3.0,
      "learning_rate": 3.832916656931237e-05,
      "loss": 0.7936,
      "step": 265420
    },
    {
      "epoch": 0.9302659755298307,
      "grad_norm": 2.9375,
      "learning_rate": 3.8328517540648666e-05,
      "loss": 0.9343,
      "step": 265430
    },
    {
      "epoch": 0.9303010230367262,
      "grad_norm": 3.03125,
      "learning_rate": 3.832786851198497e-05,
      "loss": 0.819,
      "step": 265440
    },
    {
      "epoch": 0.9303360705436219,
      "grad_norm": 2.90625,
      "learning_rate": 3.832721948332126e-05,
      "loss": 0.8408,
      "step": 265450
    },
    {
      "epoch": 0.9303711180505175,
      "grad_norm": 3.109375,
      "learning_rate": 3.832657045465756e-05,
      "loss": 0.8235,
      "step": 265460
    },
    {
      "epoch": 0.9304061655574131,
      "grad_norm": 3.109375,
      "learning_rate": 3.832592142599386e-05,
      "loss": 0.813,
      "step": 265470
    },
    {
      "epoch": 0.9304412130643087,
      "grad_norm": 2.84375,
      "learning_rate": 3.832527239733016e-05,
      "loss": 0.8887,
      "step": 265480
    },
    {
      "epoch": 0.9304762605712043,
      "grad_norm": 2.875,
      "learning_rate": 3.8324623368666454e-05,
      "loss": 0.8209,
      "step": 265490
    },
    {
      "epoch": 0.9305113080780999,
      "grad_norm": 2.90625,
      "learning_rate": 3.8323974340002755e-05,
      "loss": 0.8819,
      "step": 265500
    },
    {
      "epoch": 0.9305463555849954,
      "grad_norm": 2.65625,
      "learning_rate": 3.832332531133906e-05,
      "loss": 0.8754,
      "step": 265510
    },
    {
      "epoch": 0.9305814030918911,
      "grad_norm": 2.59375,
      "learning_rate": 3.832267628267535e-05,
      "loss": 0.8854,
      "step": 265520
    },
    {
      "epoch": 0.9306164505987866,
      "grad_norm": 3.25,
      "learning_rate": 3.832202725401165e-05,
      "loss": 0.8594,
      "step": 265530
    },
    {
      "epoch": 0.9306514981056823,
      "grad_norm": 2.546875,
      "learning_rate": 3.832137822534795e-05,
      "loss": 0.8488,
      "step": 265540
    },
    {
      "epoch": 0.9306865456125778,
      "grad_norm": 2.90625,
      "learning_rate": 3.832072919668425e-05,
      "loss": 0.8435,
      "step": 265550
    },
    {
      "epoch": 0.9307215931194734,
      "grad_norm": 2.765625,
      "learning_rate": 3.832008016802054e-05,
      "loss": 0.9225,
      "step": 265560
    },
    {
      "epoch": 0.9307566406263691,
      "grad_norm": 3.03125,
      "learning_rate": 3.8319431139356845e-05,
      "loss": 0.8406,
      "step": 265570
    },
    {
      "epoch": 0.9307916881332646,
      "grad_norm": 2.78125,
      "learning_rate": 3.831878211069314e-05,
      "loss": 0.894,
      "step": 265580
    },
    {
      "epoch": 0.9308267356401603,
      "grad_norm": 3.03125,
      "learning_rate": 3.831813308202944e-05,
      "loss": 0.9498,
      "step": 265590
    },
    {
      "epoch": 0.9308617831470558,
      "grad_norm": 3.125,
      "learning_rate": 3.8317484053365735e-05,
      "loss": 0.9134,
      "step": 265600
    },
    {
      "epoch": 0.9308968306539515,
      "grad_norm": 2.875,
      "learning_rate": 3.831683502470203e-05,
      "loss": 0.8957,
      "step": 265610
    },
    {
      "epoch": 0.930931878160847,
      "grad_norm": 2.96875,
      "learning_rate": 3.831618599603833e-05,
      "loss": 0.9025,
      "step": 265620
    },
    {
      "epoch": 0.9309669256677426,
      "grad_norm": 2.671875,
      "learning_rate": 3.8315536967374626e-05,
      "loss": 0.8435,
      "step": 265630
    },
    {
      "epoch": 0.9310019731746382,
      "grad_norm": 2.53125,
      "learning_rate": 3.831488793871093e-05,
      "loss": 0.8115,
      "step": 265640
    },
    {
      "epoch": 0.9310370206815338,
      "grad_norm": 3.203125,
      "learning_rate": 3.831423891004722e-05,
      "loss": 0.9337,
      "step": 265650
    },
    {
      "epoch": 0.9310720681884295,
      "grad_norm": 2.90625,
      "learning_rate": 3.831358988138352e-05,
      "loss": 0.9207,
      "step": 265660
    },
    {
      "epoch": 0.931107115695325,
      "grad_norm": 2.65625,
      "learning_rate": 3.831294085271982e-05,
      "loss": 0.9424,
      "step": 265670
    },
    {
      "epoch": 0.9311421632022207,
      "grad_norm": 2.953125,
      "learning_rate": 3.831229182405612e-05,
      "loss": 0.9045,
      "step": 265680
    },
    {
      "epoch": 0.9311772107091162,
      "grad_norm": 2.9375,
      "learning_rate": 3.8311642795392414e-05,
      "loss": 0.9124,
      "step": 265690
    },
    {
      "epoch": 0.9312122582160118,
      "grad_norm": 2.5,
      "learning_rate": 3.8310993766728715e-05,
      "loss": 0.785,
      "step": 265700
    },
    {
      "epoch": 0.9312473057229074,
      "grad_norm": 2.84375,
      "learning_rate": 3.831034473806501e-05,
      "loss": 0.9335,
      "step": 265710
    },
    {
      "epoch": 0.931282353229803,
      "grad_norm": 2.703125,
      "learning_rate": 3.830969570940131e-05,
      "loss": 0.8559,
      "step": 265720
    },
    {
      "epoch": 0.9313174007366986,
      "grad_norm": 2.9375,
      "learning_rate": 3.8309046680737606e-05,
      "loss": 0.861,
      "step": 265730
    },
    {
      "epoch": 0.9313524482435942,
      "grad_norm": 2.921875,
      "learning_rate": 3.830839765207391e-05,
      "loss": 0.8981,
      "step": 265740
    },
    {
      "epoch": 0.9313874957504897,
      "grad_norm": 2.96875,
      "learning_rate": 3.830774862341021e-05,
      "loss": 0.833,
      "step": 265750
    },
    {
      "epoch": 0.9314225432573854,
      "grad_norm": 2.796875,
      "learning_rate": 3.83070995947465e-05,
      "loss": 1.001,
      "step": 265760
    },
    {
      "epoch": 0.931457590764281,
      "grad_norm": 2.578125,
      "learning_rate": 3.8306450566082805e-05,
      "loss": 0.8224,
      "step": 265770
    },
    {
      "epoch": 0.9314926382711766,
      "grad_norm": 3.125,
      "learning_rate": 3.83058015374191e-05,
      "loss": 0.909,
      "step": 265780
    },
    {
      "epoch": 0.9315276857780722,
      "grad_norm": 3.296875,
      "learning_rate": 3.83051525087554e-05,
      "loss": 0.8923,
      "step": 265790
    },
    {
      "epoch": 0.9315627332849677,
      "grad_norm": 3.140625,
      "learning_rate": 3.8304503480091695e-05,
      "loss": 0.9125,
      "step": 265800
    },
    {
      "epoch": 0.9315977807918634,
      "grad_norm": 3.125,
      "learning_rate": 3.8303854451428e-05,
      "loss": 0.831,
      "step": 265810
    },
    {
      "epoch": 0.9316328282987589,
      "grad_norm": 2.984375,
      "learning_rate": 3.830320542276429e-05,
      "loss": 0.9575,
      "step": 265820
    },
    {
      "epoch": 0.9316678758056546,
      "grad_norm": 3.09375,
      "learning_rate": 3.830255639410059e-05,
      "loss": 0.9118,
      "step": 265830
    },
    {
      "epoch": 0.9317029233125501,
      "grad_norm": 2.546875,
      "learning_rate": 3.830190736543689e-05,
      "loss": 0.8677,
      "step": 265840
    },
    {
      "epoch": 0.9317379708194458,
      "grad_norm": 2.8125,
      "learning_rate": 3.830125833677319e-05,
      "loss": 0.894,
      "step": 265850
    },
    {
      "epoch": 0.9317730183263414,
      "grad_norm": 2.859375,
      "learning_rate": 3.830060930810948e-05,
      "loss": 0.84,
      "step": 265860
    },
    {
      "epoch": 0.9318080658332369,
      "grad_norm": 2.8125,
      "learning_rate": 3.8299960279445785e-05,
      "loss": 0.8859,
      "step": 265870
    },
    {
      "epoch": 0.9318431133401326,
      "grad_norm": 2.96875,
      "learning_rate": 3.8299311250782086e-05,
      "loss": 0.9664,
      "step": 265880
    },
    {
      "epoch": 0.9318781608470281,
      "grad_norm": 2.578125,
      "learning_rate": 3.829866222211838e-05,
      "loss": 0.8554,
      "step": 265890
    },
    {
      "epoch": 0.9319132083539238,
      "grad_norm": 3.265625,
      "learning_rate": 3.829801319345468e-05,
      "loss": 0.8851,
      "step": 265900
    },
    {
      "epoch": 0.9319482558608193,
      "grad_norm": 3.109375,
      "learning_rate": 3.829736416479098e-05,
      "loss": 0.9066,
      "step": 265910
    },
    {
      "epoch": 0.931983303367715,
      "grad_norm": 2.90625,
      "learning_rate": 3.829671513612728e-05,
      "loss": 0.9382,
      "step": 265920
    },
    {
      "epoch": 0.9320183508746105,
      "grad_norm": 2.359375,
      "learning_rate": 3.829606610746357e-05,
      "loss": 0.7849,
      "step": 265930
    },
    {
      "epoch": 0.9320533983815061,
      "grad_norm": 2.59375,
      "learning_rate": 3.8295417078799874e-05,
      "loss": 0.882,
      "step": 265940
    },
    {
      "epoch": 0.9320884458884017,
      "grad_norm": 2.9375,
      "learning_rate": 3.829476805013617e-05,
      "loss": 0.8478,
      "step": 265950
    },
    {
      "epoch": 0.9321234933952973,
      "grad_norm": 3.28125,
      "learning_rate": 3.829411902147247e-05,
      "loss": 0.9434,
      "step": 265960
    },
    {
      "epoch": 0.932158540902193,
      "grad_norm": 2.78125,
      "learning_rate": 3.8293469992808765e-05,
      "loss": 0.8985,
      "step": 265970
    },
    {
      "epoch": 0.9321935884090885,
      "grad_norm": 2.625,
      "learning_rate": 3.829282096414506e-05,
      "loss": 0.9017,
      "step": 265980
    },
    {
      "epoch": 0.9322286359159841,
      "grad_norm": 2.84375,
      "learning_rate": 3.829217193548136e-05,
      "loss": 0.8276,
      "step": 265990
    },
    {
      "epoch": 0.9322636834228797,
      "grad_norm": 2.890625,
      "learning_rate": 3.8291522906817655e-05,
      "loss": 0.8204,
      "step": 266000
    },
    {
      "epoch": 0.9322987309297753,
      "grad_norm": 2.703125,
      "learning_rate": 3.829087387815396e-05,
      "loss": 0.891,
      "step": 266010
    },
    {
      "epoch": 0.9323337784366709,
      "grad_norm": 2.8125,
      "learning_rate": 3.829022484949025e-05,
      "loss": 0.7925,
      "step": 266020
    },
    {
      "epoch": 0.9323688259435665,
      "grad_norm": 3.265625,
      "learning_rate": 3.828957582082655e-05,
      "loss": 0.9217,
      "step": 266030
    },
    {
      "epoch": 0.932403873450462,
      "grad_norm": 3.109375,
      "learning_rate": 3.828892679216285e-05,
      "loss": 0.9512,
      "step": 266040
    },
    {
      "epoch": 0.9324389209573577,
      "grad_norm": 3.234375,
      "learning_rate": 3.828827776349915e-05,
      "loss": 0.9167,
      "step": 266050
    },
    {
      "epoch": 0.9324739684642533,
      "grad_norm": 3.078125,
      "learning_rate": 3.828762873483544e-05,
      "loss": 0.9142,
      "step": 266060
    },
    {
      "epoch": 0.9325090159711489,
      "grad_norm": 3.546875,
      "learning_rate": 3.8286979706171745e-05,
      "loss": 0.8387,
      "step": 266070
    },
    {
      "epoch": 0.9325440634780445,
      "grad_norm": 2.65625,
      "learning_rate": 3.828633067750804e-05,
      "loss": 0.8976,
      "step": 266080
    },
    {
      "epoch": 0.9325791109849401,
      "grad_norm": 3.078125,
      "learning_rate": 3.828568164884434e-05,
      "loss": 0.8544,
      "step": 266090
    },
    {
      "epoch": 0.9326141584918357,
      "grad_norm": 2.734375,
      "learning_rate": 3.828503262018064e-05,
      "loss": 0.9072,
      "step": 266100
    },
    {
      "epoch": 0.9326492059987312,
      "grad_norm": 3.46875,
      "learning_rate": 3.828438359151694e-05,
      "loss": 0.9313,
      "step": 266110
    },
    {
      "epoch": 0.9326842535056269,
      "grad_norm": 3.21875,
      "learning_rate": 3.828373456285324e-05,
      "loss": 0.9248,
      "step": 266120
    },
    {
      "epoch": 0.9327193010125224,
      "grad_norm": 2.71875,
      "learning_rate": 3.828308553418953e-05,
      "loss": 0.8133,
      "step": 266130
    },
    {
      "epoch": 0.9327543485194181,
      "grad_norm": 2.765625,
      "learning_rate": 3.8282436505525834e-05,
      "loss": 0.9614,
      "step": 266140
    },
    {
      "epoch": 0.9327893960263137,
      "grad_norm": 2.90625,
      "learning_rate": 3.828178747686213e-05,
      "loss": 0.8838,
      "step": 266150
    },
    {
      "epoch": 0.9328244435332093,
      "grad_norm": 2.875,
      "learning_rate": 3.828113844819843e-05,
      "loss": 0.8465,
      "step": 266160
    },
    {
      "epoch": 0.9328594910401049,
      "grad_norm": 3.0625,
      "learning_rate": 3.8280489419534725e-05,
      "loss": 0.8665,
      "step": 266170
    },
    {
      "epoch": 0.9328945385470004,
      "grad_norm": 3.25,
      "learning_rate": 3.8279840390871026e-05,
      "loss": 0.9116,
      "step": 266180
    },
    {
      "epoch": 0.9329295860538961,
      "grad_norm": 3.40625,
      "learning_rate": 3.827919136220732e-05,
      "loss": 0.9027,
      "step": 266190
    },
    {
      "epoch": 0.9329646335607916,
      "grad_norm": 3.0625,
      "learning_rate": 3.827854233354362e-05,
      "loss": 0.8314,
      "step": 266200
    },
    {
      "epoch": 0.9329996810676873,
      "grad_norm": 2.6875,
      "learning_rate": 3.827789330487992e-05,
      "loss": 0.8114,
      "step": 266210
    },
    {
      "epoch": 0.9330347285745828,
      "grad_norm": 2.59375,
      "learning_rate": 3.827724427621622e-05,
      "loss": 0.8446,
      "step": 266220
    },
    {
      "epoch": 0.9330697760814785,
      "grad_norm": 2.84375,
      "learning_rate": 3.827659524755251e-05,
      "loss": 0.9959,
      "step": 266230
    },
    {
      "epoch": 0.933104823588374,
      "grad_norm": 2.953125,
      "learning_rate": 3.8275946218888814e-05,
      "loss": 0.8817,
      "step": 266240
    },
    {
      "epoch": 0.9331398710952696,
      "grad_norm": 2.59375,
      "learning_rate": 3.8275297190225115e-05,
      "loss": 0.9055,
      "step": 266250
    },
    {
      "epoch": 0.9331749186021653,
      "grad_norm": 2.734375,
      "learning_rate": 3.827464816156141e-05,
      "loss": 0.9535,
      "step": 266260
    },
    {
      "epoch": 0.9332099661090608,
      "grad_norm": 2.90625,
      "learning_rate": 3.827399913289771e-05,
      "loss": 0.9595,
      "step": 266270
    },
    {
      "epoch": 0.9332450136159565,
      "grad_norm": 3.078125,
      "learning_rate": 3.8273350104234006e-05,
      "loss": 0.7906,
      "step": 266280
    },
    {
      "epoch": 0.933280061122852,
      "grad_norm": 3.078125,
      "learning_rate": 3.827270107557031e-05,
      "loss": 0.8625,
      "step": 266290
    },
    {
      "epoch": 0.9333151086297476,
      "grad_norm": 2.84375,
      "learning_rate": 3.82720520469066e-05,
      "loss": 0.8181,
      "step": 266300
    },
    {
      "epoch": 0.9333501561366432,
      "grad_norm": 2.515625,
      "learning_rate": 3.8271403018242903e-05,
      "loss": 0.8186,
      "step": 266310
    },
    {
      "epoch": 0.9333852036435388,
      "grad_norm": 2.984375,
      "learning_rate": 3.82707539895792e-05,
      "loss": 0.9139,
      "step": 266320
    },
    {
      "epoch": 0.9334202511504344,
      "grad_norm": 2.84375,
      "learning_rate": 3.82701049609155e-05,
      "loss": 0.9802,
      "step": 266330
    },
    {
      "epoch": 0.93345529865733,
      "grad_norm": 3.03125,
      "learning_rate": 3.8269455932251794e-05,
      "loss": 0.8804,
      "step": 266340
    },
    {
      "epoch": 0.9334903461642257,
      "grad_norm": 2.875,
      "learning_rate": 3.826880690358809e-05,
      "loss": 0.8966,
      "step": 266350
    },
    {
      "epoch": 0.9335253936711212,
      "grad_norm": 2.671875,
      "learning_rate": 3.826815787492439e-05,
      "loss": 0.8484,
      "step": 266360
    },
    {
      "epoch": 0.9335604411780168,
      "grad_norm": 3.421875,
      "learning_rate": 3.8267508846260685e-05,
      "loss": 0.9455,
      "step": 266370
    },
    {
      "epoch": 0.9335954886849124,
      "grad_norm": 3.09375,
      "learning_rate": 3.8266859817596986e-05,
      "loss": 0.8716,
      "step": 266380
    },
    {
      "epoch": 0.933630536191808,
      "grad_norm": 3.71875,
      "learning_rate": 3.826621078893328e-05,
      "loss": 0.8636,
      "step": 266390
    },
    {
      "epoch": 0.9336655836987036,
      "grad_norm": 2.96875,
      "learning_rate": 3.826556176026958e-05,
      "loss": 0.8607,
      "step": 266400
    },
    {
      "epoch": 0.9337006312055992,
      "grad_norm": 2.96875,
      "learning_rate": 3.826491273160588e-05,
      "loss": 0.8777,
      "step": 266410
    },
    {
      "epoch": 0.9337356787124947,
      "grad_norm": 2.984375,
      "learning_rate": 3.826426370294218e-05,
      "loss": 0.9426,
      "step": 266420
    },
    {
      "epoch": 0.9337707262193904,
      "grad_norm": 3.1875,
      "learning_rate": 3.826361467427847e-05,
      "loss": 0.8548,
      "step": 266430
    },
    {
      "epoch": 0.9338057737262859,
      "grad_norm": 2.609375,
      "learning_rate": 3.8262965645614774e-05,
      "loss": 0.861,
      "step": 266440
    },
    {
      "epoch": 0.9338408212331816,
      "grad_norm": 2.984375,
      "learning_rate": 3.826231661695107e-05,
      "loss": 0.9236,
      "step": 266450
    },
    {
      "epoch": 0.9338758687400772,
      "grad_norm": 2.84375,
      "learning_rate": 3.826166758828737e-05,
      "loss": 0.83,
      "step": 266460
    },
    {
      "epoch": 0.9339109162469728,
      "grad_norm": 2.734375,
      "learning_rate": 3.826101855962367e-05,
      "loss": 0.8916,
      "step": 266470
    },
    {
      "epoch": 0.9339459637538684,
      "grad_norm": 2.890625,
      "learning_rate": 3.8260369530959966e-05,
      "loss": 0.9141,
      "step": 266480
    },
    {
      "epoch": 0.9339810112607639,
      "grad_norm": 2.6875,
      "learning_rate": 3.825972050229627e-05,
      "loss": 0.888,
      "step": 266490
    },
    {
      "epoch": 0.9340160587676596,
      "grad_norm": 2.640625,
      "learning_rate": 3.825907147363256e-05,
      "loss": 0.8258,
      "step": 266500
    },
    {
      "epoch": 0.9340511062745551,
      "grad_norm": 2.953125,
      "learning_rate": 3.8258422444968863e-05,
      "loss": 0.8749,
      "step": 266510
    },
    {
      "epoch": 0.9340861537814508,
      "grad_norm": 3.234375,
      "learning_rate": 3.825777341630516e-05,
      "loss": 0.9035,
      "step": 266520
    },
    {
      "epoch": 0.9341212012883463,
      "grad_norm": 2.96875,
      "learning_rate": 3.825712438764146e-05,
      "loss": 0.8519,
      "step": 266530
    },
    {
      "epoch": 0.934156248795242,
      "grad_norm": 2.53125,
      "learning_rate": 3.8256475358977754e-05,
      "loss": 0.892,
      "step": 266540
    },
    {
      "epoch": 0.9341912963021376,
      "grad_norm": 2.90625,
      "learning_rate": 3.8255826330314055e-05,
      "loss": 0.9173,
      "step": 266550
    },
    {
      "epoch": 0.9342263438090331,
      "grad_norm": 3.078125,
      "learning_rate": 3.825517730165035e-05,
      "loss": 0.9187,
      "step": 266560
    },
    {
      "epoch": 0.9342613913159288,
      "grad_norm": 3.078125,
      "learning_rate": 3.825452827298665e-05,
      "loss": 0.8027,
      "step": 266570
    },
    {
      "epoch": 0.9342964388228243,
      "grad_norm": 3.125,
      "learning_rate": 3.8253879244322946e-05,
      "loss": 0.9175,
      "step": 266580
    },
    {
      "epoch": 0.93433148632972,
      "grad_norm": 2.78125,
      "learning_rate": 3.825323021565925e-05,
      "loss": 0.8718,
      "step": 266590
    },
    {
      "epoch": 0.9343665338366155,
      "grad_norm": 2.796875,
      "learning_rate": 3.825258118699554e-05,
      "loss": 0.8389,
      "step": 266600
    },
    {
      "epoch": 0.9344015813435111,
      "grad_norm": 2.828125,
      "learning_rate": 3.8251932158331843e-05,
      "loss": 0.8509,
      "step": 266610
    },
    {
      "epoch": 0.9344366288504067,
      "grad_norm": 3.203125,
      "learning_rate": 3.8251283129668145e-05,
      "loss": 0.8626,
      "step": 266620
    },
    {
      "epoch": 0.9344716763573023,
      "grad_norm": 3.359375,
      "learning_rate": 3.825063410100444e-05,
      "loss": 0.8742,
      "step": 266630
    },
    {
      "epoch": 0.934506723864198,
      "grad_norm": 3.078125,
      "learning_rate": 3.824998507234074e-05,
      "loss": 0.925,
      "step": 266640
    },
    {
      "epoch": 0.9345417713710935,
      "grad_norm": 3.125,
      "learning_rate": 3.8249336043677035e-05,
      "loss": 0.849,
      "step": 266650
    },
    {
      "epoch": 0.9345768188779892,
      "grad_norm": 3.5625,
      "learning_rate": 3.824868701501334e-05,
      "loss": 0.8327,
      "step": 266660
    },
    {
      "epoch": 0.9346118663848847,
      "grad_norm": 2.9375,
      "learning_rate": 3.824803798634963e-05,
      "loss": 0.8953,
      "step": 266670
    },
    {
      "epoch": 0.9346469138917803,
      "grad_norm": 2.890625,
      "learning_rate": 3.824738895768593e-05,
      "loss": 0.8653,
      "step": 266680
    },
    {
      "epoch": 0.9346819613986759,
      "grad_norm": 2.8125,
      "learning_rate": 3.824673992902223e-05,
      "loss": 0.8243,
      "step": 266690
    },
    {
      "epoch": 0.9347170089055715,
      "grad_norm": 2.875,
      "learning_rate": 3.824609090035853e-05,
      "loss": 1.0124,
      "step": 266700
    },
    {
      "epoch": 0.9347520564124671,
      "grad_norm": 2.875,
      "learning_rate": 3.8245441871694823e-05,
      "loss": 0.9055,
      "step": 266710
    },
    {
      "epoch": 0.9347871039193627,
      "grad_norm": 3.21875,
      "learning_rate": 3.8244792843031125e-05,
      "loss": 0.8339,
      "step": 266720
    },
    {
      "epoch": 0.9348221514262582,
      "grad_norm": 2.96875,
      "learning_rate": 3.824414381436742e-05,
      "loss": 0.9642,
      "step": 266730
    },
    {
      "epoch": 0.9348571989331539,
      "grad_norm": 2.421875,
      "learning_rate": 3.8243494785703714e-05,
      "loss": 0.798,
      "step": 266740
    },
    {
      "epoch": 0.9348922464400495,
      "grad_norm": 2.921875,
      "learning_rate": 3.8242845757040015e-05,
      "loss": 0.9373,
      "step": 266750
    },
    {
      "epoch": 0.9349272939469451,
      "grad_norm": 2.953125,
      "learning_rate": 3.824219672837631e-05,
      "loss": 0.8647,
      "step": 266760
    },
    {
      "epoch": 0.9349623414538407,
      "grad_norm": 2.65625,
      "learning_rate": 3.824154769971261e-05,
      "loss": 0.8349,
      "step": 266770
    },
    {
      "epoch": 0.9349973889607363,
      "grad_norm": 2.421875,
      "learning_rate": 3.8240898671048906e-05,
      "loss": 0.9346,
      "step": 266780
    },
    {
      "epoch": 0.9350324364676319,
      "grad_norm": 2.859375,
      "learning_rate": 3.824024964238521e-05,
      "loss": 0.8768,
      "step": 266790
    },
    {
      "epoch": 0.9350674839745274,
      "grad_norm": 3.4375,
      "learning_rate": 3.82396006137215e-05,
      "loss": 0.8345,
      "step": 266800
    },
    {
      "epoch": 0.9351025314814231,
      "grad_norm": 2.59375,
      "learning_rate": 3.8238951585057803e-05,
      "loss": 0.9136,
      "step": 266810
    },
    {
      "epoch": 0.9351375789883186,
      "grad_norm": 2.625,
      "learning_rate": 3.82383025563941e-05,
      "loss": 0.8818,
      "step": 266820
    },
    {
      "epoch": 0.9351726264952143,
      "grad_norm": 2.703125,
      "learning_rate": 3.82376535277304e-05,
      "loss": 0.8815,
      "step": 266830
    },
    {
      "epoch": 0.9352076740021099,
      "grad_norm": 2.65625,
      "learning_rate": 3.82370044990667e-05,
      "loss": 0.834,
      "step": 266840
    },
    {
      "epoch": 0.9352427215090054,
      "grad_norm": 2.8125,
      "learning_rate": 3.8236355470402995e-05,
      "loss": 0.9126,
      "step": 266850
    },
    {
      "epoch": 0.9352777690159011,
      "grad_norm": 2.734375,
      "learning_rate": 3.82357064417393e-05,
      "loss": 0.8355,
      "step": 266860
    },
    {
      "epoch": 0.9353128165227966,
      "grad_norm": 2.96875,
      "learning_rate": 3.823505741307559e-05,
      "loss": 0.8459,
      "step": 266870
    },
    {
      "epoch": 0.9353478640296923,
      "grad_norm": 3.0625,
      "learning_rate": 3.823440838441189e-05,
      "loss": 0.8294,
      "step": 266880
    },
    {
      "epoch": 0.9353829115365878,
      "grad_norm": 2.84375,
      "learning_rate": 3.823375935574819e-05,
      "loss": 0.8558,
      "step": 266890
    },
    {
      "epoch": 0.9354179590434835,
      "grad_norm": 3.09375,
      "learning_rate": 3.823311032708449e-05,
      "loss": 0.8235,
      "step": 266900
    },
    {
      "epoch": 0.935453006550379,
      "grad_norm": 2.59375,
      "learning_rate": 3.8232461298420783e-05,
      "loss": 0.9019,
      "step": 266910
    },
    {
      "epoch": 0.9354880540572746,
      "grad_norm": 2.78125,
      "learning_rate": 3.8231812269757085e-05,
      "loss": 0.8628,
      "step": 266920
    },
    {
      "epoch": 0.9355231015641702,
      "grad_norm": 3.25,
      "learning_rate": 3.823116324109338e-05,
      "loss": 0.9332,
      "step": 266930
    },
    {
      "epoch": 0.9355581490710658,
      "grad_norm": 2.953125,
      "learning_rate": 3.823051421242968e-05,
      "loss": 0.8352,
      "step": 266940
    },
    {
      "epoch": 0.9355931965779615,
      "grad_norm": 2.734375,
      "learning_rate": 3.8229865183765975e-05,
      "loss": 0.8408,
      "step": 266950
    },
    {
      "epoch": 0.935628244084857,
      "grad_norm": 2.90625,
      "learning_rate": 3.822921615510228e-05,
      "loss": 0.954,
      "step": 266960
    },
    {
      "epoch": 0.9356632915917527,
      "grad_norm": 3.25,
      "learning_rate": 3.822856712643857e-05,
      "loss": 0.8957,
      "step": 266970
    },
    {
      "epoch": 0.9356983390986482,
      "grad_norm": 2.8125,
      "learning_rate": 3.822791809777487e-05,
      "loss": 0.8627,
      "step": 266980
    },
    {
      "epoch": 0.9357333866055438,
      "grad_norm": 2.90625,
      "learning_rate": 3.8227269069111174e-05,
      "loss": 0.9039,
      "step": 266990
    },
    {
      "epoch": 0.9357684341124394,
      "grad_norm": 3.234375,
      "learning_rate": 3.822662004044747e-05,
      "loss": 0.9026,
      "step": 267000
    },
    {
      "epoch": 0.935803481619335,
      "grad_norm": 2.71875,
      "learning_rate": 3.822597101178377e-05,
      "loss": 0.8112,
      "step": 267010
    },
    {
      "epoch": 0.9358385291262306,
      "grad_norm": 3.328125,
      "learning_rate": 3.8225321983120065e-05,
      "loss": 0.9077,
      "step": 267020
    },
    {
      "epoch": 0.9358735766331262,
      "grad_norm": 2.78125,
      "learning_rate": 3.8224672954456366e-05,
      "loss": 0.8284,
      "step": 267030
    },
    {
      "epoch": 0.9359086241400218,
      "grad_norm": 2.796875,
      "learning_rate": 3.822402392579266e-05,
      "loss": 0.8602,
      "step": 267040
    },
    {
      "epoch": 0.9359436716469174,
      "grad_norm": 3.390625,
      "learning_rate": 3.822337489712896e-05,
      "loss": 0.8796,
      "step": 267050
    },
    {
      "epoch": 0.935978719153813,
      "grad_norm": 2.671875,
      "learning_rate": 3.822272586846526e-05,
      "loss": 0.8419,
      "step": 267060
    },
    {
      "epoch": 0.9360137666607086,
      "grad_norm": 3.34375,
      "learning_rate": 3.822207683980156e-05,
      "loss": 0.8208,
      "step": 267070
    },
    {
      "epoch": 0.9360488141676042,
      "grad_norm": 2.9375,
      "learning_rate": 3.822142781113785e-05,
      "loss": 0.9007,
      "step": 267080
    },
    {
      "epoch": 0.9360838616744998,
      "grad_norm": 2.546875,
      "learning_rate": 3.8220778782474154e-05,
      "loss": 0.841,
      "step": 267090
    },
    {
      "epoch": 0.9361189091813954,
      "grad_norm": 2.90625,
      "learning_rate": 3.822012975381045e-05,
      "loss": 0.9275,
      "step": 267100
    },
    {
      "epoch": 0.9361539566882909,
      "grad_norm": 2.921875,
      "learning_rate": 3.8219480725146743e-05,
      "loss": 0.9439,
      "step": 267110
    },
    {
      "epoch": 0.9361890041951866,
      "grad_norm": 3.109375,
      "learning_rate": 3.8218831696483045e-05,
      "loss": 0.8724,
      "step": 267120
    },
    {
      "epoch": 0.9362240517020822,
      "grad_norm": 3.09375,
      "learning_rate": 3.821818266781934e-05,
      "loss": 0.7892,
      "step": 267130
    },
    {
      "epoch": 0.9362590992089778,
      "grad_norm": 2.84375,
      "learning_rate": 3.821753363915564e-05,
      "loss": 0.9111,
      "step": 267140
    },
    {
      "epoch": 0.9362941467158734,
      "grad_norm": 2.671875,
      "learning_rate": 3.8216884610491935e-05,
      "loss": 0.8803,
      "step": 267150
    },
    {
      "epoch": 0.936329194222769,
      "grad_norm": 2.8125,
      "learning_rate": 3.821623558182824e-05,
      "loss": 0.837,
      "step": 267160
    },
    {
      "epoch": 0.9363642417296646,
      "grad_norm": 2.5625,
      "learning_rate": 3.821558655316453e-05,
      "loss": 0.9514,
      "step": 267170
    },
    {
      "epoch": 0.9363992892365601,
      "grad_norm": 3.09375,
      "learning_rate": 3.821493752450083e-05,
      "loss": 0.8597,
      "step": 267180
    },
    {
      "epoch": 0.9364343367434558,
      "grad_norm": 2.71875,
      "learning_rate": 3.821428849583713e-05,
      "loss": 0.8689,
      "step": 267190
    },
    {
      "epoch": 0.9364693842503513,
      "grad_norm": 3.484375,
      "learning_rate": 3.821363946717343e-05,
      "loss": 0.9509,
      "step": 267200
    },
    {
      "epoch": 0.936504431757247,
      "grad_norm": 3.203125,
      "learning_rate": 3.821299043850973e-05,
      "loss": 0.9415,
      "step": 267210
    },
    {
      "epoch": 0.9365394792641425,
      "grad_norm": 2.453125,
      "learning_rate": 3.8212341409846025e-05,
      "loss": 0.9052,
      "step": 267220
    },
    {
      "epoch": 0.9365745267710381,
      "grad_norm": 3.0625,
      "learning_rate": 3.8211692381182326e-05,
      "loss": 0.8759,
      "step": 267230
    },
    {
      "epoch": 0.9366095742779338,
      "grad_norm": 2.890625,
      "learning_rate": 3.821104335251862e-05,
      "loss": 0.8993,
      "step": 267240
    },
    {
      "epoch": 0.9366446217848293,
      "grad_norm": 2.5,
      "learning_rate": 3.821039432385492e-05,
      "loss": 0.8697,
      "step": 267250
    },
    {
      "epoch": 0.936679669291725,
      "grad_norm": 2.953125,
      "learning_rate": 3.820974529519122e-05,
      "loss": 0.8399,
      "step": 267260
    },
    {
      "epoch": 0.9367147167986205,
      "grad_norm": 3.0,
      "learning_rate": 3.820909626652752e-05,
      "loss": 0.8866,
      "step": 267270
    },
    {
      "epoch": 0.9367497643055162,
      "grad_norm": 2.296875,
      "learning_rate": 3.820844723786381e-05,
      "loss": 0.7289,
      "step": 267280
    },
    {
      "epoch": 0.9367848118124117,
      "grad_norm": 2.765625,
      "learning_rate": 3.8207798209200114e-05,
      "loss": 0.9322,
      "step": 267290
    },
    {
      "epoch": 0.9368198593193073,
      "grad_norm": 3.078125,
      "learning_rate": 3.820714918053641e-05,
      "loss": 0.9707,
      "step": 267300
    },
    {
      "epoch": 0.9368549068262029,
      "grad_norm": 3.234375,
      "learning_rate": 3.820650015187271e-05,
      "loss": 0.873,
      "step": 267310
    },
    {
      "epoch": 0.9368899543330985,
      "grad_norm": 2.671875,
      "learning_rate": 3.8205851123209005e-05,
      "loss": 0.8512,
      "step": 267320
    },
    {
      "epoch": 0.9369250018399942,
      "grad_norm": 2.671875,
      "learning_rate": 3.8205202094545306e-05,
      "loss": 0.9333,
      "step": 267330
    },
    {
      "epoch": 0.9369600493468897,
      "grad_norm": 2.9375,
      "learning_rate": 3.820455306588161e-05,
      "loss": 0.8471,
      "step": 267340
    },
    {
      "epoch": 0.9369950968537853,
      "grad_norm": 2.890625,
      "learning_rate": 3.82039040372179e-05,
      "loss": 0.8543,
      "step": 267350
    },
    {
      "epoch": 0.9370301443606809,
      "grad_norm": 2.8125,
      "learning_rate": 3.8203255008554204e-05,
      "loss": 0.9325,
      "step": 267360
    },
    {
      "epoch": 0.9370651918675765,
      "grad_norm": 2.703125,
      "learning_rate": 3.82026059798905e-05,
      "loss": 0.8373,
      "step": 267370
    },
    {
      "epoch": 0.9371002393744721,
      "grad_norm": 3.015625,
      "learning_rate": 3.82019569512268e-05,
      "loss": 0.8676,
      "step": 267380
    },
    {
      "epoch": 0.9371352868813677,
      "grad_norm": 3.25,
      "learning_rate": 3.8201307922563094e-05,
      "loss": 0.9022,
      "step": 267390
    },
    {
      "epoch": 0.9371703343882632,
      "grad_norm": 2.625,
      "learning_rate": 3.8200658893899396e-05,
      "loss": 0.7643,
      "step": 267400
    },
    {
      "epoch": 0.9372053818951589,
      "grad_norm": 3.1875,
      "learning_rate": 3.820000986523569e-05,
      "loss": 0.8979,
      "step": 267410
    },
    {
      "epoch": 0.9372404294020544,
      "grad_norm": 2.765625,
      "learning_rate": 3.819936083657199e-05,
      "loss": 0.9173,
      "step": 267420
    },
    {
      "epoch": 0.9372754769089501,
      "grad_norm": 2.90625,
      "learning_rate": 3.8198711807908286e-05,
      "loss": 0.8394,
      "step": 267430
    },
    {
      "epoch": 0.9373105244158457,
      "grad_norm": 3.40625,
      "learning_rate": 3.819806277924459e-05,
      "loss": 0.8901,
      "step": 267440
    },
    {
      "epoch": 0.9373455719227413,
      "grad_norm": 3.234375,
      "learning_rate": 3.819741375058088e-05,
      "loss": 0.8485,
      "step": 267450
    },
    {
      "epoch": 0.9373806194296369,
      "grad_norm": 2.859375,
      "learning_rate": 3.8196764721917184e-05,
      "loss": 0.8429,
      "step": 267460
    },
    {
      "epoch": 0.9374156669365324,
      "grad_norm": 3.546875,
      "learning_rate": 3.819611569325348e-05,
      "loss": 0.827,
      "step": 267470
    },
    {
      "epoch": 0.9374507144434281,
      "grad_norm": 2.796875,
      "learning_rate": 3.819546666458977e-05,
      "loss": 0.8367,
      "step": 267480
    },
    {
      "epoch": 0.9374857619503236,
      "grad_norm": 2.921875,
      "learning_rate": 3.8194817635926074e-05,
      "loss": 0.9306,
      "step": 267490
    },
    {
      "epoch": 0.9375208094572193,
      "grad_norm": 2.828125,
      "learning_rate": 3.819416860726237e-05,
      "loss": 0.8939,
      "step": 267500
    },
    {
      "epoch": 0.9375558569641148,
      "grad_norm": 2.640625,
      "learning_rate": 3.819351957859867e-05,
      "loss": 0.8929,
      "step": 267510
    },
    {
      "epoch": 0.9375909044710105,
      "grad_norm": 3.171875,
      "learning_rate": 3.8192870549934965e-05,
      "loss": 0.9629,
      "step": 267520
    },
    {
      "epoch": 0.9376259519779061,
      "grad_norm": 3.046875,
      "learning_rate": 3.8192221521271266e-05,
      "loss": 0.838,
      "step": 267530
    },
    {
      "epoch": 0.9376609994848016,
      "grad_norm": 2.78125,
      "learning_rate": 3.819157249260756e-05,
      "loss": 0.8746,
      "step": 267540
    },
    {
      "epoch": 0.9376960469916973,
      "grad_norm": 2.640625,
      "learning_rate": 3.819092346394386e-05,
      "loss": 0.7836,
      "step": 267550
    },
    {
      "epoch": 0.9377310944985928,
      "grad_norm": 2.828125,
      "learning_rate": 3.819027443528016e-05,
      "loss": 0.8232,
      "step": 267560
    },
    {
      "epoch": 0.9377661420054885,
      "grad_norm": 3.109375,
      "learning_rate": 3.818962540661646e-05,
      "loss": 0.9195,
      "step": 267570
    },
    {
      "epoch": 0.937801189512384,
      "grad_norm": 2.953125,
      "learning_rate": 3.818897637795276e-05,
      "loss": 0.857,
      "step": 267580
    },
    {
      "epoch": 0.9378362370192797,
      "grad_norm": 2.90625,
      "learning_rate": 3.8188327349289054e-05,
      "loss": 0.8898,
      "step": 267590
    },
    {
      "epoch": 0.9378712845261752,
      "grad_norm": 2.6875,
      "learning_rate": 3.8187678320625356e-05,
      "loss": 0.8898,
      "step": 267600
    },
    {
      "epoch": 0.9379063320330708,
      "grad_norm": 2.625,
      "learning_rate": 3.818702929196165e-05,
      "loss": 0.8162,
      "step": 267610
    },
    {
      "epoch": 0.9379413795399665,
      "grad_norm": 3.28125,
      "learning_rate": 3.818638026329795e-05,
      "loss": 0.9361,
      "step": 267620
    },
    {
      "epoch": 0.937976427046862,
      "grad_norm": 2.734375,
      "learning_rate": 3.8185731234634246e-05,
      "loss": 0.8685,
      "step": 267630
    },
    {
      "epoch": 0.9380114745537577,
      "grad_norm": 2.703125,
      "learning_rate": 3.818508220597055e-05,
      "loss": 0.8634,
      "step": 267640
    },
    {
      "epoch": 0.9380465220606532,
      "grad_norm": 2.71875,
      "learning_rate": 3.818443317730684e-05,
      "loss": 0.8095,
      "step": 267650
    },
    {
      "epoch": 0.9380815695675488,
      "grad_norm": 2.609375,
      "learning_rate": 3.8183784148643144e-05,
      "loss": 0.8947,
      "step": 267660
    },
    {
      "epoch": 0.9381166170744444,
      "grad_norm": 2.65625,
      "learning_rate": 3.818313511997944e-05,
      "loss": 0.89,
      "step": 267670
    },
    {
      "epoch": 0.93815166458134,
      "grad_norm": 3.171875,
      "learning_rate": 3.818248609131574e-05,
      "loss": 0.8731,
      "step": 267680
    },
    {
      "epoch": 0.9381867120882356,
      "grad_norm": 2.71875,
      "learning_rate": 3.8181837062652034e-05,
      "loss": 0.873,
      "step": 267690
    },
    {
      "epoch": 0.9382217595951312,
      "grad_norm": 3.0625,
      "learning_rate": 3.8181188033988336e-05,
      "loss": 0.8249,
      "step": 267700
    },
    {
      "epoch": 0.9382568071020267,
      "grad_norm": 3.703125,
      "learning_rate": 3.818053900532464e-05,
      "loss": 0.9086,
      "step": 267710
    },
    {
      "epoch": 0.9382918546089224,
      "grad_norm": 2.921875,
      "learning_rate": 3.817988997666093e-05,
      "loss": 0.7541,
      "step": 267720
    },
    {
      "epoch": 0.938326902115818,
      "grad_norm": 2.921875,
      "learning_rate": 3.817924094799723e-05,
      "loss": 0.8551,
      "step": 267730
    },
    {
      "epoch": 0.9383619496227136,
      "grad_norm": 2.984375,
      "learning_rate": 3.817859191933353e-05,
      "loss": 0.914,
      "step": 267740
    },
    {
      "epoch": 0.9383969971296092,
      "grad_norm": 2.734375,
      "learning_rate": 3.817794289066983e-05,
      "loss": 0.9088,
      "step": 267750
    },
    {
      "epoch": 0.9384320446365048,
      "grad_norm": 2.859375,
      "learning_rate": 3.8177293862006124e-05,
      "loss": 0.976,
      "step": 267760
    },
    {
      "epoch": 0.9384670921434004,
      "grad_norm": 3.171875,
      "learning_rate": 3.8176644833342425e-05,
      "loss": 0.9309,
      "step": 267770
    },
    {
      "epoch": 0.9385021396502959,
      "grad_norm": 2.984375,
      "learning_rate": 3.817599580467872e-05,
      "loss": 0.946,
      "step": 267780
    },
    {
      "epoch": 0.9385371871571916,
      "grad_norm": 2.484375,
      "learning_rate": 3.817534677601502e-05,
      "loss": 0.8894,
      "step": 267790
    },
    {
      "epoch": 0.9385722346640871,
      "grad_norm": 2.296875,
      "learning_rate": 3.8174697747351316e-05,
      "loss": 0.9757,
      "step": 267800
    },
    {
      "epoch": 0.9386072821709828,
      "grad_norm": 3.46875,
      "learning_rate": 3.817404871868762e-05,
      "loss": 0.9555,
      "step": 267810
    },
    {
      "epoch": 0.9386423296778784,
      "grad_norm": 2.9375,
      "learning_rate": 3.817339969002391e-05,
      "loss": 0.9423,
      "step": 267820
    },
    {
      "epoch": 0.938677377184774,
      "grad_norm": 3.234375,
      "learning_rate": 3.817275066136021e-05,
      "loss": 0.8546,
      "step": 267830
    },
    {
      "epoch": 0.9387124246916696,
      "grad_norm": 2.5625,
      "learning_rate": 3.817210163269651e-05,
      "loss": 0.8872,
      "step": 267840
    },
    {
      "epoch": 0.9387474721985651,
      "grad_norm": 2.6875,
      "learning_rate": 3.817145260403281e-05,
      "loss": 0.8187,
      "step": 267850
    },
    {
      "epoch": 0.9387825197054608,
      "grad_norm": 3.125,
      "learning_rate": 3.8170803575369104e-05,
      "loss": 0.9071,
      "step": 267860
    },
    {
      "epoch": 0.9388175672123563,
      "grad_norm": 2.765625,
      "learning_rate": 3.81701545467054e-05,
      "loss": 0.8798,
      "step": 267870
    },
    {
      "epoch": 0.938852614719252,
      "grad_norm": 2.96875,
      "learning_rate": 3.81695055180417e-05,
      "loss": 0.85,
      "step": 267880
    },
    {
      "epoch": 0.9388876622261475,
      "grad_norm": 3.421875,
      "learning_rate": 3.8168856489377994e-05,
      "loss": 0.9254,
      "step": 267890
    },
    {
      "epoch": 0.9389227097330431,
      "grad_norm": 2.890625,
      "learning_rate": 3.8168207460714296e-05,
      "loss": 0.9025,
      "step": 267900
    },
    {
      "epoch": 0.9389577572399387,
      "grad_norm": 2.78125,
      "learning_rate": 3.816755843205059e-05,
      "loss": 0.8252,
      "step": 267910
    },
    {
      "epoch": 0.9389928047468343,
      "grad_norm": 3.25,
      "learning_rate": 3.816690940338689e-05,
      "loss": 0.8716,
      "step": 267920
    },
    {
      "epoch": 0.93902785225373,
      "grad_norm": 3.03125,
      "learning_rate": 3.8166260374723186e-05,
      "loss": 0.8911,
      "step": 267930
    },
    {
      "epoch": 0.9390628997606255,
      "grad_norm": 2.875,
      "learning_rate": 3.816561134605949e-05,
      "loss": 0.8898,
      "step": 267940
    },
    {
      "epoch": 0.9390979472675212,
      "grad_norm": 2.890625,
      "learning_rate": 3.816496231739579e-05,
      "loss": 0.9054,
      "step": 267950
    },
    {
      "epoch": 0.9391329947744167,
      "grad_norm": 2.71875,
      "learning_rate": 3.8164313288732084e-05,
      "loss": 0.8941,
      "step": 267960
    },
    {
      "epoch": 0.9391680422813123,
      "grad_norm": 3.28125,
      "learning_rate": 3.8163664260068385e-05,
      "loss": 0.9857,
      "step": 267970
    },
    {
      "epoch": 0.9392030897882079,
      "grad_norm": 3.34375,
      "learning_rate": 3.816301523140468e-05,
      "loss": 0.9164,
      "step": 267980
    },
    {
      "epoch": 0.9392381372951035,
      "grad_norm": 2.953125,
      "learning_rate": 3.816236620274098e-05,
      "loss": 0.8894,
      "step": 267990
    },
    {
      "epoch": 0.9392731848019991,
      "grad_norm": 3.0625,
      "learning_rate": 3.8161717174077276e-05,
      "loss": 0.9181,
      "step": 268000
    },
    {
      "epoch": 0.9393082323088947,
      "grad_norm": 3.21875,
      "learning_rate": 3.816106814541358e-05,
      "loss": 0.9197,
      "step": 268010
    },
    {
      "epoch": 0.9393432798157904,
      "grad_norm": 3.109375,
      "learning_rate": 3.816041911674987e-05,
      "loss": 0.8947,
      "step": 268020
    },
    {
      "epoch": 0.9393783273226859,
      "grad_norm": 2.921875,
      "learning_rate": 3.815977008808617e-05,
      "loss": 0.8338,
      "step": 268030
    },
    {
      "epoch": 0.9394133748295815,
      "grad_norm": 3.28125,
      "learning_rate": 3.815912105942247e-05,
      "loss": 0.8595,
      "step": 268040
    },
    {
      "epoch": 0.9394484223364771,
      "grad_norm": 3.21875,
      "learning_rate": 3.815847203075877e-05,
      "loss": 0.8488,
      "step": 268050
    },
    {
      "epoch": 0.9394834698433727,
      "grad_norm": 2.78125,
      "learning_rate": 3.8157823002095064e-05,
      "loss": 0.9155,
      "step": 268060
    },
    {
      "epoch": 0.9395185173502683,
      "grad_norm": 3.03125,
      "learning_rate": 3.8157173973431365e-05,
      "loss": 0.885,
      "step": 268070
    },
    {
      "epoch": 0.9395535648571639,
      "grad_norm": 2.9375,
      "learning_rate": 3.8156524944767666e-05,
      "loss": 0.7948,
      "step": 268080
    },
    {
      "epoch": 0.9395886123640594,
      "grad_norm": 3.09375,
      "learning_rate": 3.815587591610396e-05,
      "loss": 0.8519,
      "step": 268090
    },
    {
      "epoch": 0.9396236598709551,
      "grad_norm": 3.03125,
      "learning_rate": 3.815522688744026e-05,
      "loss": 0.935,
      "step": 268100
    },
    {
      "epoch": 0.9396587073778506,
      "grad_norm": 2.734375,
      "learning_rate": 3.815457785877656e-05,
      "loss": 0.8364,
      "step": 268110
    },
    {
      "epoch": 0.9396937548847463,
      "grad_norm": 2.625,
      "learning_rate": 3.815392883011286e-05,
      "loss": 0.9213,
      "step": 268120
    },
    {
      "epoch": 0.9397288023916419,
      "grad_norm": 2.90625,
      "learning_rate": 3.815327980144915e-05,
      "loss": 0.8867,
      "step": 268130
    },
    {
      "epoch": 0.9397638498985375,
      "grad_norm": 2.84375,
      "learning_rate": 3.8152630772785454e-05,
      "loss": 0.9212,
      "step": 268140
    },
    {
      "epoch": 0.9397988974054331,
      "grad_norm": 2.609375,
      "learning_rate": 3.815198174412175e-05,
      "loss": 0.8793,
      "step": 268150
    },
    {
      "epoch": 0.9398339449123286,
      "grad_norm": 2.984375,
      "learning_rate": 3.815133271545805e-05,
      "loss": 0.9227,
      "step": 268160
    },
    {
      "epoch": 0.9398689924192243,
      "grad_norm": 3.171875,
      "learning_rate": 3.8150683686794345e-05,
      "loss": 0.8209,
      "step": 268170
    },
    {
      "epoch": 0.9399040399261198,
      "grad_norm": 2.765625,
      "learning_rate": 3.8150034658130646e-05,
      "loss": 0.9282,
      "step": 268180
    },
    {
      "epoch": 0.9399390874330155,
      "grad_norm": 3.140625,
      "learning_rate": 3.814938562946694e-05,
      "loss": 0.807,
      "step": 268190
    },
    {
      "epoch": 0.939974134939911,
      "grad_norm": 3.28125,
      "learning_rate": 3.814873660080324e-05,
      "loss": 0.8665,
      "step": 268200
    },
    {
      "epoch": 0.9400091824468066,
      "grad_norm": 2.734375,
      "learning_rate": 3.8148087572139544e-05,
      "loss": 0.7926,
      "step": 268210
    },
    {
      "epoch": 0.9400442299537023,
      "grad_norm": 3.109375,
      "learning_rate": 3.814743854347584e-05,
      "loss": 0.9645,
      "step": 268220
    },
    {
      "epoch": 0.9400792774605978,
      "grad_norm": 2.8125,
      "learning_rate": 3.814678951481213e-05,
      "loss": 0.8894,
      "step": 268230
    },
    {
      "epoch": 0.9401143249674935,
      "grad_norm": 2.953125,
      "learning_rate": 3.814614048614843e-05,
      "loss": 0.952,
      "step": 268240
    },
    {
      "epoch": 0.940149372474389,
      "grad_norm": 2.9375,
      "learning_rate": 3.814549145748473e-05,
      "loss": 0.8489,
      "step": 268250
    },
    {
      "epoch": 0.9401844199812847,
      "grad_norm": 2.703125,
      "learning_rate": 3.8144842428821024e-05,
      "loss": 0.8262,
      "step": 268260
    },
    {
      "epoch": 0.9402194674881802,
      "grad_norm": 2.578125,
      "learning_rate": 3.8144193400157325e-05,
      "loss": 0.8513,
      "step": 268270
    },
    {
      "epoch": 0.9402545149950758,
      "grad_norm": 2.875,
      "learning_rate": 3.814354437149362e-05,
      "loss": 0.9167,
      "step": 268280
    },
    {
      "epoch": 0.9402895625019714,
      "grad_norm": 3.0625,
      "learning_rate": 3.814289534282992e-05,
      "loss": 0.8217,
      "step": 268290
    },
    {
      "epoch": 0.940324610008867,
      "grad_norm": 2.6875,
      "learning_rate": 3.814224631416622e-05,
      "loss": 0.9179,
      "step": 268300
    },
    {
      "epoch": 0.9403596575157627,
      "grad_norm": 2.875,
      "learning_rate": 3.814159728550252e-05,
      "loss": 0.8556,
      "step": 268310
    },
    {
      "epoch": 0.9403947050226582,
      "grad_norm": 2.9375,
      "learning_rate": 3.814094825683882e-05,
      "loss": 0.888,
      "step": 268320
    },
    {
      "epoch": 0.9404297525295539,
      "grad_norm": 3.28125,
      "learning_rate": 3.814029922817511e-05,
      "loss": 0.8809,
      "step": 268330
    },
    {
      "epoch": 0.9404648000364494,
      "grad_norm": 2.75,
      "learning_rate": 3.8139650199511414e-05,
      "loss": 0.7984,
      "step": 268340
    },
    {
      "epoch": 0.940499847543345,
      "grad_norm": 3.015625,
      "learning_rate": 3.813900117084771e-05,
      "loss": 0.9059,
      "step": 268350
    },
    {
      "epoch": 0.9405348950502406,
      "grad_norm": 2.75,
      "learning_rate": 3.813835214218401e-05,
      "loss": 0.8529,
      "step": 268360
    },
    {
      "epoch": 0.9405699425571362,
      "grad_norm": 2.90625,
      "learning_rate": 3.8137703113520305e-05,
      "loss": 0.9008,
      "step": 268370
    },
    {
      "epoch": 0.9406049900640318,
      "grad_norm": 2.921875,
      "learning_rate": 3.8137054084856606e-05,
      "loss": 0.8697,
      "step": 268380
    },
    {
      "epoch": 0.9406400375709274,
      "grad_norm": 3.296875,
      "learning_rate": 3.81364050561929e-05,
      "loss": 0.8858,
      "step": 268390
    },
    {
      "epoch": 0.9406750850778229,
      "grad_norm": 2.703125,
      "learning_rate": 3.81357560275292e-05,
      "loss": 0.8575,
      "step": 268400
    },
    {
      "epoch": 0.9407101325847186,
      "grad_norm": 2.5,
      "learning_rate": 3.81351069988655e-05,
      "loss": 0.8378,
      "step": 268410
    },
    {
      "epoch": 0.9407451800916142,
      "grad_norm": 2.796875,
      "learning_rate": 3.81344579702018e-05,
      "loss": 0.9532,
      "step": 268420
    },
    {
      "epoch": 0.9407802275985098,
      "grad_norm": 2.984375,
      "learning_rate": 3.813380894153809e-05,
      "loss": 0.8353,
      "step": 268430
    },
    {
      "epoch": 0.9408152751054054,
      "grad_norm": 2.890625,
      "learning_rate": 3.8133159912874394e-05,
      "loss": 0.9237,
      "step": 268440
    },
    {
      "epoch": 0.940850322612301,
      "grad_norm": 2.859375,
      "learning_rate": 3.8132510884210696e-05,
      "loss": 0.9641,
      "step": 268450
    },
    {
      "epoch": 0.9408853701191966,
      "grad_norm": 2.921875,
      "learning_rate": 3.813186185554699e-05,
      "loss": 0.7783,
      "step": 268460
    },
    {
      "epoch": 0.9409204176260921,
      "grad_norm": 3.078125,
      "learning_rate": 3.813121282688329e-05,
      "loss": 0.8933,
      "step": 268470
    },
    {
      "epoch": 0.9409554651329878,
      "grad_norm": 2.75,
      "learning_rate": 3.8130563798219586e-05,
      "loss": 0.8799,
      "step": 268480
    },
    {
      "epoch": 0.9409905126398833,
      "grad_norm": 2.46875,
      "learning_rate": 3.812991476955589e-05,
      "loss": 0.8294,
      "step": 268490
    },
    {
      "epoch": 0.941025560146779,
      "grad_norm": 3.234375,
      "learning_rate": 3.812926574089218e-05,
      "loss": 0.8885,
      "step": 268500
    },
    {
      "epoch": 0.9410606076536746,
      "grad_norm": 2.78125,
      "learning_rate": 3.8128616712228484e-05,
      "loss": 0.9369,
      "step": 268510
    },
    {
      "epoch": 0.9410956551605701,
      "grad_norm": 2.671875,
      "learning_rate": 3.812796768356478e-05,
      "loss": 0.8674,
      "step": 268520
    },
    {
      "epoch": 0.9411307026674658,
      "grad_norm": 2.75,
      "learning_rate": 3.812731865490108e-05,
      "loss": 0.7817,
      "step": 268530
    },
    {
      "epoch": 0.9411657501743613,
      "grad_norm": 3.0625,
      "learning_rate": 3.8126669626237374e-05,
      "loss": 0.9716,
      "step": 268540
    },
    {
      "epoch": 0.941200797681257,
      "grad_norm": 2.09375,
      "learning_rate": 3.8126020597573676e-05,
      "loss": 0.8636,
      "step": 268550
    },
    {
      "epoch": 0.9412358451881525,
      "grad_norm": 2.515625,
      "learning_rate": 3.812537156890997e-05,
      "loss": 0.8467,
      "step": 268560
    },
    {
      "epoch": 0.9412708926950482,
      "grad_norm": 3.421875,
      "learning_rate": 3.812472254024627e-05,
      "loss": 0.9004,
      "step": 268570
    },
    {
      "epoch": 0.9413059402019437,
      "grad_norm": 2.6875,
      "learning_rate": 3.812407351158257e-05,
      "loss": 0.8872,
      "step": 268580
    },
    {
      "epoch": 0.9413409877088393,
      "grad_norm": 3.234375,
      "learning_rate": 3.812342448291887e-05,
      "loss": 0.9469,
      "step": 268590
    },
    {
      "epoch": 0.9413760352157349,
      "grad_norm": 2.609375,
      "learning_rate": 3.812277545425517e-05,
      "loss": 0.8141,
      "step": 268600
    },
    {
      "epoch": 0.9414110827226305,
      "grad_norm": 2.90625,
      "learning_rate": 3.812212642559146e-05,
      "loss": 0.8277,
      "step": 268610
    },
    {
      "epoch": 0.9414461302295262,
      "grad_norm": 2.859375,
      "learning_rate": 3.812147739692776e-05,
      "loss": 0.8941,
      "step": 268620
    },
    {
      "epoch": 0.9414811777364217,
      "grad_norm": 2.90625,
      "learning_rate": 3.812082836826405e-05,
      "loss": 0.8705,
      "step": 268630
    },
    {
      "epoch": 0.9415162252433174,
      "grad_norm": 2.671875,
      "learning_rate": 3.8120179339600354e-05,
      "loss": 0.8938,
      "step": 268640
    },
    {
      "epoch": 0.9415512727502129,
      "grad_norm": 3.1875,
      "learning_rate": 3.811953031093665e-05,
      "loss": 0.8117,
      "step": 268650
    },
    {
      "epoch": 0.9415863202571085,
      "grad_norm": 2.84375,
      "learning_rate": 3.811888128227295e-05,
      "loss": 0.8887,
      "step": 268660
    },
    {
      "epoch": 0.9416213677640041,
      "grad_norm": 2.765625,
      "learning_rate": 3.811823225360925e-05,
      "loss": 0.8894,
      "step": 268670
    },
    {
      "epoch": 0.9416564152708997,
      "grad_norm": 2.390625,
      "learning_rate": 3.8117583224945546e-05,
      "loss": 0.8564,
      "step": 268680
    },
    {
      "epoch": 0.9416914627777953,
      "grad_norm": 3.15625,
      "learning_rate": 3.811693419628185e-05,
      "loss": 0.8591,
      "step": 268690
    },
    {
      "epoch": 0.9417265102846909,
      "grad_norm": 2.6875,
      "learning_rate": 3.811628516761814e-05,
      "loss": 0.8873,
      "step": 268700
    },
    {
      "epoch": 0.9417615577915865,
      "grad_norm": 3.203125,
      "learning_rate": 3.8115636138954444e-05,
      "loss": 0.8431,
      "step": 268710
    },
    {
      "epoch": 0.9417966052984821,
      "grad_norm": 2.75,
      "learning_rate": 3.811498711029074e-05,
      "loss": 0.8299,
      "step": 268720
    },
    {
      "epoch": 0.9418316528053777,
      "grad_norm": 2.6875,
      "learning_rate": 3.811433808162704e-05,
      "loss": 0.7868,
      "step": 268730
    },
    {
      "epoch": 0.9418667003122733,
      "grad_norm": 2.953125,
      "learning_rate": 3.8113689052963334e-05,
      "loss": 0.8527,
      "step": 268740
    },
    {
      "epoch": 0.9419017478191689,
      "grad_norm": 2.90625,
      "learning_rate": 3.8113040024299636e-05,
      "loss": 0.8498,
      "step": 268750
    },
    {
      "epoch": 0.9419367953260644,
      "grad_norm": 2.984375,
      "learning_rate": 3.811239099563593e-05,
      "loss": 0.8845,
      "step": 268760
    },
    {
      "epoch": 0.9419718428329601,
      "grad_norm": 2.953125,
      "learning_rate": 3.811174196697223e-05,
      "loss": 0.9272,
      "step": 268770
    },
    {
      "epoch": 0.9420068903398556,
      "grad_norm": 2.90625,
      "learning_rate": 3.8111092938308526e-05,
      "loss": 0.856,
      "step": 268780
    },
    {
      "epoch": 0.9420419378467513,
      "grad_norm": 3.203125,
      "learning_rate": 3.811044390964483e-05,
      "loss": 0.9441,
      "step": 268790
    },
    {
      "epoch": 0.9420769853536469,
      "grad_norm": 2.6875,
      "learning_rate": 3.810979488098112e-05,
      "loss": 0.8567,
      "step": 268800
    },
    {
      "epoch": 0.9421120328605425,
      "grad_norm": 2.953125,
      "learning_rate": 3.8109145852317424e-05,
      "loss": 0.9451,
      "step": 268810
    },
    {
      "epoch": 0.9421470803674381,
      "grad_norm": 3.4375,
      "learning_rate": 3.8108496823653725e-05,
      "loss": 0.8698,
      "step": 268820
    },
    {
      "epoch": 0.9421821278743336,
      "grad_norm": 3.15625,
      "learning_rate": 3.810784779499002e-05,
      "loss": 0.9263,
      "step": 268830
    },
    {
      "epoch": 0.9422171753812293,
      "grad_norm": 2.453125,
      "learning_rate": 3.810719876632632e-05,
      "loss": 0.9423,
      "step": 268840
    },
    {
      "epoch": 0.9422522228881248,
      "grad_norm": 2.84375,
      "learning_rate": 3.8106549737662616e-05,
      "loss": 0.9265,
      "step": 268850
    },
    {
      "epoch": 0.9422872703950205,
      "grad_norm": 2.875,
      "learning_rate": 3.810590070899892e-05,
      "loss": 0.9397,
      "step": 268860
    },
    {
      "epoch": 0.942322317901916,
      "grad_norm": 3.109375,
      "learning_rate": 3.810525168033521e-05,
      "loss": 0.832,
      "step": 268870
    },
    {
      "epoch": 0.9423573654088117,
      "grad_norm": 2.875,
      "learning_rate": 3.810460265167151e-05,
      "loss": 0.8583,
      "step": 268880
    },
    {
      "epoch": 0.9423924129157072,
      "grad_norm": 3.125,
      "learning_rate": 3.810395362300781e-05,
      "loss": 0.8983,
      "step": 268890
    },
    {
      "epoch": 0.9424274604226028,
      "grad_norm": 2.765625,
      "learning_rate": 3.810330459434411e-05,
      "loss": 0.9807,
      "step": 268900
    },
    {
      "epoch": 0.9424625079294985,
      "grad_norm": 2.96875,
      "learning_rate": 3.8102655565680404e-05,
      "loss": 0.8221,
      "step": 268910
    },
    {
      "epoch": 0.942497555436394,
      "grad_norm": 2.875,
      "learning_rate": 3.8102006537016705e-05,
      "loss": 0.8034,
      "step": 268920
    },
    {
      "epoch": 0.9425326029432897,
      "grad_norm": 2.828125,
      "learning_rate": 3.8101357508353e-05,
      "loss": 0.8673,
      "step": 268930
    },
    {
      "epoch": 0.9425676504501852,
      "grad_norm": 2.859375,
      "learning_rate": 3.81007084796893e-05,
      "loss": 0.9048,
      "step": 268940
    },
    {
      "epoch": 0.9426026979570808,
      "grad_norm": 2.578125,
      "learning_rate": 3.81000594510256e-05,
      "loss": 0.934,
      "step": 268950
    },
    {
      "epoch": 0.9426377454639764,
      "grad_norm": 2.921875,
      "learning_rate": 3.80994104223619e-05,
      "loss": 0.8211,
      "step": 268960
    },
    {
      "epoch": 0.942672792970872,
      "grad_norm": 3.34375,
      "learning_rate": 3.80987613936982e-05,
      "loss": 0.9184,
      "step": 268970
    },
    {
      "epoch": 0.9427078404777676,
      "grad_norm": 2.5,
      "learning_rate": 3.8098112365034486e-05,
      "loss": 0.916,
      "step": 268980
    },
    {
      "epoch": 0.9427428879846632,
      "grad_norm": 2.765625,
      "learning_rate": 3.809746333637079e-05,
      "loss": 0.8727,
      "step": 268990
    },
    {
      "epoch": 0.9427779354915589,
      "grad_norm": 2.703125,
      "learning_rate": 3.809681430770708e-05,
      "loss": 0.8651,
      "step": 269000
    },
    {
      "epoch": 0.9428129829984544,
      "grad_norm": 3.15625,
      "learning_rate": 3.8096165279043384e-05,
      "loss": 0.8732,
      "step": 269010
    },
    {
      "epoch": 0.94284803050535,
      "grad_norm": 3.03125,
      "learning_rate": 3.809551625037968e-05,
      "loss": 0.8568,
      "step": 269020
    },
    {
      "epoch": 0.9428830780122456,
      "grad_norm": 3.25,
      "learning_rate": 3.809486722171598e-05,
      "loss": 0.9331,
      "step": 269030
    },
    {
      "epoch": 0.9429181255191412,
      "grad_norm": 3.046875,
      "learning_rate": 3.809421819305228e-05,
      "loss": 0.8116,
      "step": 269040
    },
    {
      "epoch": 0.9429531730260368,
      "grad_norm": 2.578125,
      "learning_rate": 3.8093569164388576e-05,
      "loss": 0.8469,
      "step": 269050
    },
    {
      "epoch": 0.9429882205329324,
      "grad_norm": 2.796875,
      "learning_rate": 3.809292013572488e-05,
      "loss": 0.8329,
      "step": 269060
    },
    {
      "epoch": 0.9430232680398279,
      "grad_norm": 2.546875,
      "learning_rate": 3.809227110706117e-05,
      "loss": 0.8864,
      "step": 269070
    },
    {
      "epoch": 0.9430583155467236,
      "grad_norm": 2.78125,
      "learning_rate": 3.809162207839747e-05,
      "loss": 0.8092,
      "step": 269080
    },
    {
      "epoch": 0.9430933630536191,
      "grad_norm": 3.125,
      "learning_rate": 3.809097304973377e-05,
      "loss": 0.9183,
      "step": 269090
    },
    {
      "epoch": 0.9431284105605148,
      "grad_norm": 2.515625,
      "learning_rate": 3.809032402107007e-05,
      "loss": 0.8512,
      "step": 269100
    },
    {
      "epoch": 0.9431634580674104,
      "grad_norm": 2.65625,
      "learning_rate": 3.8089674992406364e-05,
      "loss": 0.8413,
      "step": 269110
    },
    {
      "epoch": 0.943198505574306,
      "grad_norm": 2.9375,
      "learning_rate": 3.8089025963742665e-05,
      "loss": 0.9314,
      "step": 269120
    },
    {
      "epoch": 0.9432335530812016,
      "grad_norm": 2.84375,
      "learning_rate": 3.808837693507896e-05,
      "loss": 0.9393,
      "step": 269130
    },
    {
      "epoch": 0.9432686005880971,
      "grad_norm": 3.28125,
      "learning_rate": 3.808772790641526e-05,
      "loss": 0.8796,
      "step": 269140
    },
    {
      "epoch": 0.9433036480949928,
      "grad_norm": 2.84375,
      "learning_rate": 3.8087078877751556e-05,
      "loss": 0.8377,
      "step": 269150
    },
    {
      "epoch": 0.9433386956018883,
      "grad_norm": 3.328125,
      "learning_rate": 3.808642984908786e-05,
      "loss": 0.8942,
      "step": 269160
    },
    {
      "epoch": 0.943373743108784,
      "grad_norm": 3.125,
      "learning_rate": 3.808578082042415e-05,
      "loss": 0.8688,
      "step": 269170
    },
    {
      "epoch": 0.9434087906156795,
      "grad_norm": 3.09375,
      "learning_rate": 3.808513179176045e-05,
      "loss": 0.8489,
      "step": 269180
    },
    {
      "epoch": 0.9434438381225752,
      "grad_norm": 3.140625,
      "learning_rate": 3.8084482763096754e-05,
      "loss": 0.9682,
      "step": 269190
    },
    {
      "epoch": 0.9434788856294708,
      "grad_norm": 2.625,
      "learning_rate": 3.808383373443305e-05,
      "loss": 0.9081,
      "step": 269200
    },
    {
      "epoch": 0.9435139331363663,
      "grad_norm": 2.96875,
      "learning_rate": 3.808318470576935e-05,
      "loss": 0.8958,
      "step": 269210
    },
    {
      "epoch": 0.943548980643262,
      "grad_norm": 2.828125,
      "learning_rate": 3.8082535677105645e-05,
      "loss": 0.8848,
      "step": 269220
    },
    {
      "epoch": 0.9435840281501575,
      "grad_norm": 2.703125,
      "learning_rate": 3.8081886648441946e-05,
      "loss": 0.8627,
      "step": 269230
    },
    {
      "epoch": 0.9436190756570532,
      "grad_norm": 3.390625,
      "learning_rate": 3.808123761977824e-05,
      "loss": 0.8581,
      "step": 269240
    },
    {
      "epoch": 0.9436541231639487,
      "grad_norm": 3.0625,
      "learning_rate": 3.808058859111454e-05,
      "loss": 0.9467,
      "step": 269250
    },
    {
      "epoch": 0.9436891706708443,
      "grad_norm": 2.953125,
      "learning_rate": 3.807993956245084e-05,
      "loss": 0.8911,
      "step": 269260
    },
    {
      "epoch": 0.9437242181777399,
      "grad_norm": 3.140625,
      "learning_rate": 3.807929053378714e-05,
      "loss": 0.9138,
      "step": 269270
    },
    {
      "epoch": 0.9437592656846355,
      "grad_norm": 2.9375,
      "learning_rate": 3.807864150512343e-05,
      "loss": 0.8926,
      "step": 269280
    },
    {
      "epoch": 0.9437943131915312,
      "grad_norm": 3.078125,
      "learning_rate": 3.8077992476459734e-05,
      "loss": 0.9521,
      "step": 269290
    },
    {
      "epoch": 0.9438293606984267,
      "grad_norm": 3.390625,
      "learning_rate": 3.807734344779603e-05,
      "loss": 0.8951,
      "step": 269300
    },
    {
      "epoch": 0.9438644082053224,
      "grad_norm": 3.140625,
      "learning_rate": 3.807669441913233e-05,
      "loss": 0.9924,
      "step": 269310
    },
    {
      "epoch": 0.9438994557122179,
      "grad_norm": 3.265625,
      "learning_rate": 3.807604539046863e-05,
      "loss": 0.9318,
      "step": 269320
    },
    {
      "epoch": 0.9439345032191135,
      "grad_norm": 3.40625,
      "learning_rate": 3.8075396361804926e-05,
      "loss": 0.9142,
      "step": 269330
    },
    {
      "epoch": 0.9439695507260091,
      "grad_norm": 2.828125,
      "learning_rate": 3.807474733314123e-05,
      "loss": 0.9139,
      "step": 269340
    },
    {
      "epoch": 0.9440045982329047,
      "grad_norm": 2.71875,
      "learning_rate": 3.807409830447752e-05,
      "loss": 0.9719,
      "step": 269350
    },
    {
      "epoch": 0.9440396457398003,
      "grad_norm": 2.828125,
      "learning_rate": 3.807344927581382e-05,
      "loss": 0.9147,
      "step": 269360
    },
    {
      "epoch": 0.9440746932466959,
      "grad_norm": 3.171875,
      "learning_rate": 3.807280024715011e-05,
      "loss": 0.9614,
      "step": 269370
    },
    {
      "epoch": 0.9441097407535914,
      "grad_norm": 2.828125,
      "learning_rate": 3.807215121848641e-05,
      "loss": 0.929,
      "step": 269380
    },
    {
      "epoch": 0.9441447882604871,
      "grad_norm": 3.046875,
      "learning_rate": 3.807150218982271e-05,
      "loss": 0.997,
      "step": 269390
    },
    {
      "epoch": 0.9441798357673827,
      "grad_norm": 3.328125,
      "learning_rate": 3.807085316115901e-05,
      "loss": 0.8302,
      "step": 269400
    },
    {
      "epoch": 0.9442148832742783,
      "grad_norm": 2.5,
      "learning_rate": 3.807020413249531e-05,
      "loss": 0.8905,
      "step": 269410
    },
    {
      "epoch": 0.9442499307811739,
      "grad_norm": 2.734375,
      "learning_rate": 3.8069555103831605e-05,
      "loss": 0.8761,
      "step": 269420
    },
    {
      "epoch": 0.9442849782880695,
      "grad_norm": 3.203125,
      "learning_rate": 3.8068906075167906e-05,
      "loss": 0.9287,
      "step": 269430
    },
    {
      "epoch": 0.9443200257949651,
      "grad_norm": 3.140625,
      "learning_rate": 3.80682570465042e-05,
      "loss": 0.9178,
      "step": 269440
    },
    {
      "epoch": 0.9443550733018606,
      "grad_norm": 2.59375,
      "learning_rate": 3.80676080178405e-05,
      "loss": 0.8682,
      "step": 269450
    },
    {
      "epoch": 0.9443901208087563,
      "grad_norm": 2.71875,
      "learning_rate": 3.80669589891768e-05,
      "loss": 0.8722,
      "step": 269460
    },
    {
      "epoch": 0.9444251683156518,
      "grad_norm": 2.953125,
      "learning_rate": 3.80663099605131e-05,
      "loss": 0.9195,
      "step": 269470
    },
    {
      "epoch": 0.9444602158225475,
      "grad_norm": 3.078125,
      "learning_rate": 3.806566093184939e-05,
      "loss": 0.8641,
      "step": 269480
    },
    {
      "epoch": 0.9444952633294431,
      "grad_norm": 3.34375,
      "learning_rate": 3.8065011903185694e-05,
      "loss": 0.9547,
      "step": 269490
    },
    {
      "epoch": 0.9445303108363386,
      "grad_norm": 3.28125,
      "learning_rate": 3.806436287452199e-05,
      "loss": 0.8251,
      "step": 269500
    },
    {
      "epoch": 0.9445653583432343,
      "grad_norm": 3.078125,
      "learning_rate": 3.806371384585829e-05,
      "loss": 0.9252,
      "step": 269510
    },
    {
      "epoch": 0.9446004058501298,
      "grad_norm": 2.765625,
      "learning_rate": 3.8063064817194585e-05,
      "loss": 0.8175,
      "step": 269520
    },
    {
      "epoch": 0.9446354533570255,
      "grad_norm": 2.875,
      "learning_rate": 3.8062415788530886e-05,
      "loss": 0.8714,
      "step": 269530
    },
    {
      "epoch": 0.944670500863921,
      "grad_norm": 2.796875,
      "learning_rate": 3.806176675986719e-05,
      "loss": 0.8215,
      "step": 269540
    },
    {
      "epoch": 0.9447055483708167,
      "grad_norm": 2.6875,
      "learning_rate": 3.806111773120348e-05,
      "loss": 0.8239,
      "step": 269550
    },
    {
      "epoch": 0.9447405958777122,
      "grad_norm": 3.28125,
      "learning_rate": 3.8060468702539784e-05,
      "loss": 0.8556,
      "step": 269560
    },
    {
      "epoch": 0.9447756433846078,
      "grad_norm": 2.796875,
      "learning_rate": 3.805981967387608e-05,
      "loss": 0.8932,
      "step": 269570
    },
    {
      "epoch": 0.9448106908915034,
      "grad_norm": 3.140625,
      "learning_rate": 3.805917064521238e-05,
      "loss": 0.8654,
      "step": 269580
    },
    {
      "epoch": 0.944845738398399,
      "grad_norm": 2.859375,
      "learning_rate": 3.8058521616548674e-05,
      "loss": 0.9073,
      "step": 269590
    },
    {
      "epoch": 0.9448807859052947,
      "grad_norm": 2.734375,
      "learning_rate": 3.8057872587884976e-05,
      "loss": 0.853,
      "step": 269600
    },
    {
      "epoch": 0.9449158334121902,
      "grad_norm": 2.71875,
      "learning_rate": 3.805722355922127e-05,
      "loss": 0.7719,
      "step": 269610
    },
    {
      "epoch": 0.9449508809190859,
      "grad_norm": 3.09375,
      "learning_rate": 3.805657453055757e-05,
      "loss": 0.9287,
      "step": 269620
    },
    {
      "epoch": 0.9449859284259814,
      "grad_norm": 2.8125,
      "learning_rate": 3.8055925501893866e-05,
      "loss": 0.8152,
      "step": 269630
    },
    {
      "epoch": 0.945020975932877,
      "grad_norm": 2.8125,
      "learning_rate": 3.805527647323017e-05,
      "loss": 0.812,
      "step": 269640
    },
    {
      "epoch": 0.9450560234397726,
      "grad_norm": 2.796875,
      "learning_rate": 3.805462744456646e-05,
      "loss": 0.8498,
      "step": 269650
    },
    {
      "epoch": 0.9450910709466682,
      "grad_norm": 2.96875,
      "learning_rate": 3.8053978415902764e-05,
      "loss": 0.882,
      "step": 269660
    },
    {
      "epoch": 0.9451261184535638,
      "grad_norm": 2.875,
      "learning_rate": 3.805332938723906e-05,
      "loss": 0.9248,
      "step": 269670
    },
    {
      "epoch": 0.9451611659604594,
      "grad_norm": 2.578125,
      "learning_rate": 3.805268035857536e-05,
      "loss": 0.9153,
      "step": 269680
    },
    {
      "epoch": 0.945196213467355,
      "grad_norm": 3.28125,
      "learning_rate": 3.805203132991166e-05,
      "loss": 0.7847,
      "step": 269690
    },
    {
      "epoch": 0.9452312609742506,
      "grad_norm": 2.859375,
      "learning_rate": 3.8051382301247956e-05,
      "loss": 0.8711,
      "step": 269700
    },
    {
      "epoch": 0.9452663084811462,
      "grad_norm": 2.734375,
      "learning_rate": 3.805073327258426e-05,
      "loss": 0.9087,
      "step": 269710
    },
    {
      "epoch": 0.9453013559880418,
      "grad_norm": 2.75,
      "learning_rate": 3.805008424392055e-05,
      "loss": 0.8692,
      "step": 269720
    },
    {
      "epoch": 0.9453364034949374,
      "grad_norm": 2.875,
      "learning_rate": 3.804943521525685e-05,
      "loss": 0.8981,
      "step": 269730
    },
    {
      "epoch": 0.945371451001833,
      "grad_norm": 2.96875,
      "learning_rate": 3.804878618659314e-05,
      "loss": 0.744,
      "step": 269740
    },
    {
      "epoch": 0.9454064985087286,
      "grad_norm": 3.40625,
      "learning_rate": 3.804813715792944e-05,
      "loss": 1.0082,
      "step": 269750
    },
    {
      "epoch": 0.9454415460156241,
      "grad_norm": 2.734375,
      "learning_rate": 3.804748812926574e-05,
      "loss": 0.9045,
      "step": 269760
    },
    {
      "epoch": 0.9454765935225198,
      "grad_norm": 3.375,
      "learning_rate": 3.804683910060204e-05,
      "loss": 0.8699,
      "step": 269770
    },
    {
      "epoch": 0.9455116410294153,
      "grad_norm": 2.953125,
      "learning_rate": 3.804619007193834e-05,
      "loss": 0.8375,
      "step": 269780
    },
    {
      "epoch": 0.945546688536311,
      "grad_norm": 2.953125,
      "learning_rate": 3.8045541043274634e-05,
      "loss": 0.8184,
      "step": 269790
    },
    {
      "epoch": 0.9455817360432066,
      "grad_norm": 2.640625,
      "learning_rate": 3.8044892014610936e-05,
      "loss": 0.8605,
      "step": 269800
    },
    {
      "epoch": 0.9456167835501021,
      "grad_norm": 3.328125,
      "learning_rate": 3.804424298594723e-05,
      "loss": 0.8922,
      "step": 269810
    },
    {
      "epoch": 0.9456518310569978,
      "grad_norm": 2.21875,
      "learning_rate": 3.804359395728353e-05,
      "loss": 0.9518,
      "step": 269820
    },
    {
      "epoch": 0.9456868785638933,
      "grad_norm": 3.0,
      "learning_rate": 3.8042944928619826e-05,
      "loss": 0.889,
      "step": 269830
    },
    {
      "epoch": 0.945721926070789,
      "grad_norm": 2.6875,
      "learning_rate": 3.804229589995613e-05,
      "loss": 0.782,
      "step": 269840
    },
    {
      "epoch": 0.9457569735776845,
      "grad_norm": 3.328125,
      "learning_rate": 3.804164687129242e-05,
      "loss": 0.832,
      "step": 269850
    },
    {
      "epoch": 0.9457920210845802,
      "grad_norm": 3.046875,
      "learning_rate": 3.8040997842628724e-05,
      "loss": 0.9199,
      "step": 269860
    },
    {
      "epoch": 0.9458270685914757,
      "grad_norm": 3.015625,
      "learning_rate": 3.804034881396502e-05,
      "loss": 0.9139,
      "step": 269870
    },
    {
      "epoch": 0.9458621160983713,
      "grad_norm": 2.796875,
      "learning_rate": 3.803969978530132e-05,
      "loss": 0.9078,
      "step": 269880
    },
    {
      "epoch": 0.945897163605267,
      "grad_norm": 2.765625,
      "learning_rate": 3.8039050756637614e-05,
      "loss": 0.9116,
      "step": 269890
    },
    {
      "epoch": 0.9459322111121625,
      "grad_norm": 3.0,
      "learning_rate": 3.8038401727973916e-05,
      "loss": 0.832,
      "step": 269900
    },
    {
      "epoch": 0.9459672586190582,
      "grad_norm": 3.03125,
      "learning_rate": 3.803775269931022e-05,
      "loss": 0.8587,
      "step": 269910
    },
    {
      "epoch": 0.9460023061259537,
      "grad_norm": 3.203125,
      "learning_rate": 3.803710367064651e-05,
      "loss": 0.8585,
      "step": 269920
    },
    {
      "epoch": 0.9460373536328494,
      "grad_norm": 3.0,
      "learning_rate": 3.803645464198281e-05,
      "loss": 0.7781,
      "step": 269930
    },
    {
      "epoch": 0.9460724011397449,
      "grad_norm": 3.21875,
      "learning_rate": 3.803580561331911e-05,
      "loss": 0.9025,
      "step": 269940
    },
    {
      "epoch": 0.9461074486466405,
      "grad_norm": 2.9375,
      "learning_rate": 3.803515658465541e-05,
      "loss": 0.8344,
      "step": 269950
    },
    {
      "epoch": 0.9461424961535361,
      "grad_norm": 3.09375,
      "learning_rate": 3.8034507555991704e-05,
      "loss": 0.8778,
      "step": 269960
    },
    {
      "epoch": 0.9461775436604317,
      "grad_norm": 3.03125,
      "learning_rate": 3.8033858527328005e-05,
      "loss": 0.8684,
      "step": 269970
    },
    {
      "epoch": 0.9462125911673274,
      "grad_norm": 3.359375,
      "learning_rate": 3.80332094986643e-05,
      "loss": 0.839,
      "step": 269980
    },
    {
      "epoch": 0.9462476386742229,
      "grad_norm": 2.625,
      "learning_rate": 3.80325604700006e-05,
      "loss": 0.8511,
      "step": 269990
    },
    {
      "epoch": 0.9462826861811185,
      "grad_norm": 2.828125,
      "learning_rate": 3.8031911441336896e-05,
      "loss": 0.8729,
      "step": 270000
    },
    {
      "epoch": 0.9462826861811185,
      "eval_loss": 0.8239274024963379,
      "eval_runtime": 553.0534,
      "eval_samples_per_second": 687.883,
      "eval_steps_per_second": 57.324,
      "step": 270000
    },
    {
      "epoch": 0.9463177336880141,
      "grad_norm": 3.0625,
      "learning_rate": 3.80312624126732e-05,
      "loss": 0.8857,
      "step": 270010
    },
    {
      "epoch": 0.9463527811949097,
      "grad_norm": 2.90625,
      "learning_rate": 3.803061338400949e-05,
      "loss": 0.9223,
      "step": 270020
    },
    {
      "epoch": 0.9463878287018053,
      "grad_norm": 3.453125,
      "learning_rate": 3.802996435534579e-05,
      "loss": 0.8899,
      "step": 270030
    },
    {
      "epoch": 0.9464228762087009,
      "grad_norm": 2.1875,
      "learning_rate": 3.802931532668209e-05,
      "loss": 0.8,
      "step": 270040
    },
    {
      "epoch": 0.9464579237155964,
      "grad_norm": 2.734375,
      "learning_rate": 3.802866629801839e-05,
      "loss": 0.8045,
      "step": 270050
    },
    {
      "epoch": 0.9464929712224921,
      "grad_norm": 3.0625,
      "learning_rate": 3.802801726935469e-05,
      "loss": 0.8356,
      "step": 270060
    },
    {
      "epoch": 0.9465280187293876,
      "grad_norm": 3.046875,
      "learning_rate": 3.8027368240690985e-05,
      "loss": 0.9909,
      "step": 270070
    },
    {
      "epoch": 0.9465630662362833,
      "grad_norm": 2.578125,
      "learning_rate": 3.802671921202729e-05,
      "loss": 0.8105,
      "step": 270080
    },
    {
      "epoch": 0.9465981137431789,
      "grad_norm": 3.09375,
      "learning_rate": 3.802607018336358e-05,
      "loss": 0.9176,
      "step": 270090
    },
    {
      "epoch": 0.9466331612500745,
      "grad_norm": 2.9375,
      "learning_rate": 3.802542115469988e-05,
      "loss": 0.896,
      "step": 270100
    },
    {
      "epoch": 0.9466682087569701,
      "grad_norm": 2.96875,
      "learning_rate": 3.802477212603617e-05,
      "loss": 0.8886,
      "step": 270110
    },
    {
      "epoch": 0.9467032562638656,
      "grad_norm": 3.109375,
      "learning_rate": 3.802412309737247e-05,
      "loss": 0.8906,
      "step": 270120
    },
    {
      "epoch": 0.9467383037707613,
      "grad_norm": 2.75,
      "learning_rate": 3.8023474068708766e-05,
      "loss": 0.92,
      "step": 270130
    },
    {
      "epoch": 0.9467733512776568,
      "grad_norm": 2.5625,
      "learning_rate": 3.802282504004507e-05,
      "loss": 0.9538,
      "step": 270140
    },
    {
      "epoch": 0.9468083987845525,
      "grad_norm": 2.53125,
      "learning_rate": 3.802217601138137e-05,
      "loss": 0.8436,
      "step": 270150
    },
    {
      "epoch": 0.946843446291448,
      "grad_norm": 2.625,
      "learning_rate": 3.8021526982717664e-05,
      "loss": 0.8622,
      "step": 270160
    },
    {
      "epoch": 0.9468784937983437,
      "grad_norm": 3.0,
      "learning_rate": 3.8020877954053965e-05,
      "loss": 0.8628,
      "step": 270170
    },
    {
      "epoch": 0.9469135413052393,
      "grad_norm": 2.78125,
      "learning_rate": 3.802022892539026e-05,
      "loss": 0.8372,
      "step": 270180
    },
    {
      "epoch": 0.9469485888121348,
      "grad_norm": 2.859375,
      "learning_rate": 3.801957989672656e-05,
      "loss": 0.9354,
      "step": 270190
    },
    {
      "epoch": 0.9469836363190305,
      "grad_norm": 2.65625,
      "learning_rate": 3.8018930868062856e-05,
      "loss": 0.8917,
      "step": 270200
    },
    {
      "epoch": 0.947018683825926,
      "grad_norm": 3.140625,
      "learning_rate": 3.801828183939916e-05,
      "loss": 0.8713,
      "step": 270210
    },
    {
      "epoch": 0.9470537313328217,
      "grad_norm": 3.046875,
      "learning_rate": 3.801763281073545e-05,
      "loss": 0.8033,
      "step": 270220
    },
    {
      "epoch": 0.9470887788397172,
      "grad_norm": 2.703125,
      "learning_rate": 3.801698378207175e-05,
      "loss": 0.843,
      "step": 270230
    },
    {
      "epoch": 0.9471238263466129,
      "grad_norm": 2.96875,
      "learning_rate": 3.801633475340805e-05,
      "loss": 0.9054,
      "step": 270240
    },
    {
      "epoch": 0.9471588738535084,
      "grad_norm": 3.125,
      "learning_rate": 3.801568572474435e-05,
      "loss": 0.9498,
      "step": 270250
    },
    {
      "epoch": 0.947193921360404,
      "grad_norm": 2.5625,
      "learning_rate": 3.8015036696080644e-05,
      "loss": 0.8503,
      "step": 270260
    },
    {
      "epoch": 0.9472289688672996,
      "grad_norm": 2.984375,
      "learning_rate": 3.8014387667416945e-05,
      "loss": 0.9532,
      "step": 270270
    },
    {
      "epoch": 0.9472640163741952,
      "grad_norm": 2.65625,
      "learning_rate": 3.801373863875325e-05,
      "loss": 0.809,
      "step": 270280
    },
    {
      "epoch": 0.9472990638810909,
      "grad_norm": 3.171875,
      "learning_rate": 3.801308961008954e-05,
      "loss": 0.8774,
      "step": 270290
    },
    {
      "epoch": 0.9473341113879864,
      "grad_norm": 2.71875,
      "learning_rate": 3.801244058142584e-05,
      "loss": 0.894,
      "step": 270300
    },
    {
      "epoch": 0.947369158894882,
      "grad_norm": 2.78125,
      "learning_rate": 3.801179155276214e-05,
      "loss": 0.9123,
      "step": 270310
    },
    {
      "epoch": 0.9474042064017776,
      "grad_norm": 3.390625,
      "learning_rate": 3.801114252409844e-05,
      "loss": 0.8987,
      "step": 270320
    },
    {
      "epoch": 0.9474392539086732,
      "grad_norm": 2.890625,
      "learning_rate": 3.801049349543473e-05,
      "loss": 0.8716,
      "step": 270330
    },
    {
      "epoch": 0.9474743014155688,
      "grad_norm": 3.171875,
      "learning_rate": 3.8009844466771035e-05,
      "loss": 0.8572,
      "step": 270340
    },
    {
      "epoch": 0.9475093489224644,
      "grad_norm": 2.84375,
      "learning_rate": 3.800919543810733e-05,
      "loss": 0.8506,
      "step": 270350
    },
    {
      "epoch": 0.94754439642936,
      "grad_norm": 2.828125,
      "learning_rate": 3.800854640944363e-05,
      "loss": 0.8512,
      "step": 270360
    },
    {
      "epoch": 0.9475794439362556,
      "grad_norm": 2.96875,
      "learning_rate": 3.8007897380779925e-05,
      "loss": 0.8758,
      "step": 270370
    },
    {
      "epoch": 0.9476144914431512,
      "grad_norm": 3.046875,
      "learning_rate": 3.800724835211623e-05,
      "loss": 0.8472,
      "step": 270380
    },
    {
      "epoch": 0.9476495389500468,
      "grad_norm": 3.03125,
      "learning_rate": 3.800659932345252e-05,
      "loss": 0.8865,
      "step": 270390
    },
    {
      "epoch": 0.9476845864569424,
      "grad_norm": 2.46875,
      "learning_rate": 3.800595029478882e-05,
      "loss": 0.8075,
      "step": 270400
    },
    {
      "epoch": 0.947719633963838,
      "grad_norm": 3.09375,
      "learning_rate": 3.8005301266125124e-05,
      "loss": 0.9623,
      "step": 270410
    },
    {
      "epoch": 0.9477546814707336,
      "grad_norm": 2.375,
      "learning_rate": 3.800465223746142e-05,
      "loss": 0.9014,
      "step": 270420
    },
    {
      "epoch": 0.9477897289776291,
      "grad_norm": 3.296875,
      "learning_rate": 3.800400320879772e-05,
      "loss": 0.9207,
      "step": 270430
    },
    {
      "epoch": 0.9478247764845248,
      "grad_norm": 2.71875,
      "learning_rate": 3.8003354180134015e-05,
      "loss": 0.8545,
      "step": 270440
    },
    {
      "epoch": 0.9478598239914203,
      "grad_norm": 2.65625,
      "learning_rate": 3.8002705151470316e-05,
      "loss": 0.8625,
      "step": 270450
    },
    {
      "epoch": 0.947894871498316,
      "grad_norm": 2.5625,
      "learning_rate": 3.800205612280661e-05,
      "loss": 0.8744,
      "step": 270460
    },
    {
      "epoch": 0.9479299190052116,
      "grad_norm": 3.15625,
      "learning_rate": 3.800140709414291e-05,
      "loss": 0.8513,
      "step": 270470
    },
    {
      "epoch": 0.9479649665121072,
      "grad_norm": 2.671875,
      "learning_rate": 3.800075806547921e-05,
      "loss": 0.9149,
      "step": 270480
    },
    {
      "epoch": 0.9480000140190028,
      "grad_norm": 3.03125,
      "learning_rate": 3.80001090368155e-05,
      "loss": 0.9701,
      "step": 270490
    },
    {
      "epoch": 0.9480350615258983,
      "grad_norm": 3.09375,
      "learning_rate": 3.79994600081518e-05,
      "loss": 0.8172,
      "step": 270500
    },
    {
      "epoch": 0.948070109032794,
      "grad_norm": 2.359375,
      "learning_rate": 3.79988109794881e-05,
      "loss": 0.8769,
      "step": 270510
    },
    {
      "epoch": 0.9481051565396895,
      "grad_norm": 2.78125,
      "learning_rate": 3.79981619508244e-05,
      "loss": 0.9485,
      "step": 270520
    },
    {
      "epoch": 0.9481402040465852,
      "grad_norm": 3.265625,
      "learning_rate": 3.799751292216069e-05,
      "loss": 0.9058,
      "step": 270530
    },
    {
      "epoch": 0.9481752515534807,
      "grad_norm": 3.328125,
      "learning_rate": 3.7996863893496995e-05,
      "loss": 0.9565,
      "step": 270540
    },
    {
      "epoch": 0.9482102990603763,
      "grad_norm": 2.890625,
      "learning_rate": 3.799621486483329e-05,
      "loss": 0.9484,
      "step": 270550
    },
    {
      "epoch": 0.9482453465672719,
      "grad_norm": 2.78125,
      "learning_rate": 3.799556583616959e-05,
      "loss": 0.918,
      "step": 270560
    },
    {
      "epoch": 0.9482803940741675,
      "grad_norm": 2.609375,
      "learning_rate": 3.7994916807505885e-05,
      "loss": 0.7994,
      "step": 270570
    },
    {
      "epoch": 0.9483154415810632,
      "grad_norm": 2.828125,
      "learning_rate": 3.799426777884219e-05,
      "loss": 0.859,
      "step": 270580
    },
    {
      "epoch": 0.9483504890879587,
      "grad_norm": 3.0,
      "learning_rate": 3.799361875017848e-05,
      "loss": 0.8672,
      "step": 270590
    },
    {
      "epoch": 0.9483855365948544,
      "grad_norm": 2.8125,
      "learning_rate": 3.799296972151478e-05,
      "loss": 0.889,
      "step": 270600
    },
    {
      "epoch": 0.9484205841017499,
      "grad_norm": 2.703125,
      "learning_rate": 3.799232069285108e-05,
      "loss": 0.9221,
      "step": 270610
    },
    {
      "epoch": 0.9484556316086455,
      "grad_norm": 2.96875,
      "learning_rate": 3.799167166418738e-05,
      "loss": 0.8663,
      "step": 270620
    },
    {
      "epoch": 0.9484906791155411,
      "grad_norm": 2.890625,
      "learning_rate": 3.799102263552367e-05,
      "loss": 0.8847,
      "step": 270630
    },
    {
      "epoch": 0.9485257266224367,
      "grad_norm": 2.71875,
      "learning_rate": 3.7990373606859975e-05,
      "loss": 0.823,
      "step": 270640
    },
    {
      "epoch": 0.9485607741293323,
      "grad_norm": 2.984375,
      "learning_rate": 3.7989724578196276e-05,
      "loss": 0.9862,
      "step": 270650
    },
    {
      "epoch": 0.9485958216362279,
      "grad_norm": 2.90625,
      "learning_rate": 3.798907554953257e-05,
      "loss": 0.9127,
      "step": 270660
    },
    {
      "epoch": 0.9486308691431236,
      "grad_norm": 3.09375,
      "learning_rate": 3.798842652086887e-05,
      "loss": 0.8312,
      "step": 270670
    },
    {
      "epoch": 0.9486659166500191,
      "grad_norm": 2.765625,
      "learning_rate": 3.798777749220517e-05,
      "loss": 0.9199,
      "step": 270680
    },
    {
      "epoch": 0.9487009641569147,
      "grad_norm": 2.71875,
      "learning_rate": 3.798712846354147e-05,
      "loss": 0.8746,
      "step": 270690
    },
    {
      "epoch": 0.9487360116638103,
      "grad_norm": 2.21875,
      "learning_rate": 3.798647943487776e-05,
      "loss": 0.8163,
      "step": 270700
    },
    {
      "epoch": 0.9487710591707059,
      "grad_norm": 2.9375,
      "learning_rate": 3.7985830406214064e-05,
      "loss": 0.853,
      "step": 270710
    },
    {
      "epoch": 0.9488061066776015,
      "grad_norm": 3.0,
      "learning_rate": 3.798518137755036e-05,
      "loss": 0.9437,
      "step": 270720
    },
    {
      "epoch": 0.9488411541844971,
      "grad_norm": 3.75,
      "learning_rate": 3.798453234888666e-05,
      "loss": 0.8579,
      "step": 270730
    },
    {
      "epoch": 0.9488762016913926,
      "grad_norm": 3.109375,
      "learning_rate": 3.7983883320222955e-05,
      "loss": 0.9296,
      "step": 270740
    },
    {
      "epoch": 0.9489112491982883,
      "grad_norm": 2.71875,
      "learning_rate": 3.7983234291559256e-05,
      "loss": 0.8542,
      "step": 270750
    },
    {
      "epoch": 0.9489462967051838,
      "grad_norm": 2.796875,
      "learning_rate": 3.798258526289555e-05,
      "loss": 0.8452,
      "step": 270760
    },
    {
      "epoch": 0.9489813442120795,
      "grad_norm": 3.234375,
      "learning_rate": 3.798193623423185e-05,
      "loss": 0.9381,
      "step": 270770
    },
    {
      "epoch": 0.9490163917189751,
      "grad_norm": 3.515625,
      "learning_rate": 3.7981287205568153e-05,
      "loss": 0.81,
      "step": 270780
    },
    {
      "epoch": 0.9490514392258707,
      "grad_norm": 2.515625,
      "learning_rate": 3.798063817690445e-05,
      "loss": 0.827,
      "step": 270790
    },
    {
      "epoch": 0.9490864867327663,
      "grad_norm": 2.71875,
      "learning_rate": 3.797998914824075e-05,
      "loss": 0.9105,
      "step": 270800
    },
    {
      "epoch": 0.9491215342396618,
      "grad_norm": 2.828125,
      "learning_rate": 3.7979340119577044e-05,
      "loss": 0.8239,
      "step": 270810
    },
    {
      "epoch": 0.9491565817465575,
      "grad_norm": 2.6875,
      "learning_rate": 3.7978691090913345e-05,
      "loss": 0.9498,
      "step": 270820
    },
    {
      "epoch": 0.949191629253453,
      "grad_norm": 2.640625,
      "learning_rate": 3.797804206224964e-05,
      "loss": 0.86,
      "step": 270830
    },
    {
      "epoch": 0.9492266767603487,
      "grad_norm": 2.796875,
      "learning_rate": 3.797739303358594e-05,
      "loss": 0.8656,
      "step": 270840
    },
    {
      "epoch": 0.9492617242672442,
      "grad_norm": 3.5,
      "learning_rate": 3.7976744004922236e-05,
      "loss": 0.8596,
      "step": 270850
    },
    {
      "epoch": 0.9492967717741398,
      "grad_norm": 2.59375,
      "learning_rate": 3.797609497625854e-05,
      "loss": 0.843,
      "step": 270860
    },
    {
      "epoch": 0.9493318192810355,
      "grad_norm": 2.5,
      "learning_rate": 3.797544594759483e-05,
      "loss": 0.8503,
      "step": 270870
    },
    {
      "epoch": 0.949366866787931,
      "grad_norm": 2.640625,
      "learning_rate": 3.797479691893113e-05,
      "loss": 0.8424,
      "step": 270880
    },
    {
      "epoch": 0.9494019142948267,
      "grad_norm": 2.828125,
      "learning_rate": 3.797414789026743e-05,
      "loss": 0.9684,
      "step": 270890
    },
    {
      "epoch": 0.9494369618017222,
      "grad_norm": 3.0,
      "learning_rate": 3.797349886160372e-05,
      "loss": 0.91,
      "step": 270900
    },
    {
      "epoch": 0.9494720093086179,
      "grad_norm": 2.78125,
      "learning_rate": 3.7972849832940024e-05,
      "loss": 0.8732,
      "step": 270910
    },
    {
      "epoch": 0.9495070568155134,
      "grad_norm": 3.125,
      "learning_rate": 3.797220080427632e-05,
      "loss": 0.8781,
      "step": 270920
    },
    {
      "epoch": 0.949542104322409,
      "grad_norm": 3.21875,
      "learning_rate": 3.797155177561262e-05,
      "loss": 0.8508,
      "step": 270930
    },
    {
      "epoch": 0.9495771518293046,
      "grad_norm": 2.9375,
      "learning_rate": 3.7970902746948915e-05,
      "loss": 0.8548,
      "step": 270940
    },
    {
      "epoch": 0.9496121993362002,
      "grad_norm": 3.046875,
      "learning_rate": 3.7970253718285216e-05,
      "loss": 0.9067,
      "step": 270950
    },
    {
      "epoch": 0.9496472468430959,
      "grad_norm": 2.9375,
      "learning_rate": 3.796960468962151e-05,
      "loss": 0.8977,
      "step": 270960
    },
    {
      "epoch": 0.9496822943499914,
      "grad_norm": 2.765625,
      "learning_rate": 3.796895566095781e-05,
      "loss": 0.8799,
      "step": 270970
    },
    {
      "epoch": 0.949717341856887,
      "grad_norm": 2.984375,
      "learning_rate": 3.796830663229411e-05,
      "loss": 0.9773,
      "step": 270980
    },
    {
      "epoch": 0.9497523893637826,
      "grad_norm": 2.125,
      "learning_rate": 3.796765760363041e-05,
      "loss": 0.7812,
      "step": 270990
    },
    {
      "epoch": 0.9497874368706782,
      "grad_norm": 3.109375,
      "learning_rate": 3.79670085749667e-05,
      "loss": 0.8481,
      "step": 271000
    },
    {
      "epoch": 0.9498224843775738,
      "grad_norm": 2.859375,
      "learning_rate": 3.7966359546303004e-05,
      "loss": 0.8706,
      "step": 271010
    },
    {
      "epoch": 0.9498575318844694,
      "grad_norm": 2.9375,
      "learning_rate": 3.7965710517639305e-05,
      "loss": 0.9371,
      "step": 271020
    },
    {
      "epoch": 0.949892579391365,
      "grad_norm": 3.09375,
      "learning_rate": 3.79650614889756e-05,
      "loss": 0.8454,
      "step": 271030
    },
    {
      "epoch": 0.9499276268982606,
      "grad_norm": 2.90625,
      "learning_rate": 3.79644124603119e-05,
      "loss": 0.8213,
      "step": 271040
    },
    {
      "epoch": 0.9499626744051561,
      "grad_norm": 2.8125,
      "learning_rate": 3.7963763431648196e-05,
      "loss": 0.8862,
      "step": 271050
    },
    {
      "epoch": 0.9499977219120518,
      "grad_norm": 3.171875,
      "learning_rate": 3.79631144029845e-05,
      "loss": 0.8951,
      "step": 271060
    },
    {
      "epoch": 0.9500327694189474,
      "grad_norm": 2.65625,
      "learning_rate": 3.796246537432079e-05,
      "loss": 0.8637,
      "step": 271070
    },
    {
      "epoch": 0.950067816925843,
      "grad_norm": 2.8125,
      "learning_rate": 3.7961816345657093e-05,
      "loss": 0.9188,
      "step": 271080
    },
    {
      "epoch": 0.9501028644327386,
      "grad_norm": 3.28125,
      "learning_rate": 3.796116731699339e-05,
      "loss": 0.9141,
      "step": 271090
    },
    {
      "epoch": 0.9501379119396341,
      "grad_norm": 2.921875,
      "learning_rate": 3.796051828832969e-05,
      "loss": 0.9188,
      "step": 271100
    },
    {
      "epoch": 0.9501729594465298,
      "grad_norm": 3.046875,
      "learning_rate": 3.7959869259665984e-05,
      "loss": 0.8856,
      "step": 271110
    },
    {
      "epoch": 0.9502080069534253,
      "grad_norm": 3.125,
      "learning_rate": 3.7959220231002285e-05,
      "loss": 0.9365,
      "step": 271120
    },
    {
      "epoch": 0.950243054460321,
      "grad_norm": 2.765625,
      "learning_rate": 3.795857120233858e-05,
      "loss": 0.8546,
      "step": 271130
    },
    {
      "epoch": 0.9502781019672165,
      "grad_norm": 3.046875,
      "learning_rate": 3.795792217367488e-05,
      "loss": 0.8096,
      "step": 271140
    },
    {
      "epoch": 0.9503131494741122,
      "grad_norm": 2.828125,
      "learning_rate": 3.795727314501118e-05,
      "loss": 0.8763,
      "step": 271150
    },
    {
      "epoch": 0.9503481969810078,
      "grad_norm": 2.859375,
      "learning_rate": 3.795662411634748e-05,
      "loss": 0.8042,
      "step": 271160
    },
    {
      "epoch": 0.9503832444879033,
      "grad_norm": 2.734375,
      "learning_rate": 3.795597508768378e-05,
      "loss": 0.8129,
      "step": 271170
    },
    {
      "epoch": 0.950418291994799,
      "grad_norm": 3.046875,
      "learning_rate": 3.7955326059020073e-05,
      "loss": 0.8238,
      "step": 271180
    },
    {
      "epoch": 0.9504533395016945,
      "grad_norm": 2.90625,
      "learning_rate": 3.7954677030356375e-05,
      "loss": 0.8944,
      "step": 271190
    },
    {
      "epoch": 0.9504883870085902,
      "grad_norm": 2.703125,
      "learning_rate": 3.795402800169267e-05,
      "loss": 0.8953,
      "step": 271200
    },
    {
      "epoch": 0.9505234345154857,
      "grad_norm": 2.515625,
      "learning_rate": 3.795337897302897e-05,
      "loss": 0.7996,
      "step": 271210
    },
    {
      "epoch": 0.9505584820223814,
      "grad_norm": 3.1875,
      "learning_rate": 3.7952729944365265e-05,
      "loss": 0.9055,
      "step": 271220
    },
    {
      "epoch": 0.9505935295292769,
      "grad_norm": 3.109375,
      "learning_rate": 3.795208091570157e-05,
      "loss": 0.8649,
      "step": 271230
    },
    {
      "epoch": 0.9506285770361725,
      "grad_norm": 2.96875,
      "learning_rate": 3.795143188703786e-05,
      "loss": 0.9055,
      "step": 271240
    },
    {
      "epoch": 0.9506636245430681,
      "grad_norm": 3.28125,
      "learning_rate": 3.7950782858374156e-05,
      "loss": 0.9459,
      "step": 271250
    },
    {
      "epoch": 0.9506986720499637,
      "grad_norm": 2.890625,
      "learning_rate": 3.795013382971046e-05,
      "loss": 0.8587,
      "step": 271260
    },
    {
      "epoch": 0.9507337195568594,
      "grad_norm": 3.34375,
      "learning_rate": 3.794948480104675e-05,
      "loss": 0.9002,
      "step": 271270
    },
    {
      "epoch": 0.9507687670637549,
      "grad_norm": 3.40625,
      "learning_rate": 3.7948835772383053e-05,
      "loss": 0.8809,
      "step": 271280
    },
    {
      "epoch": 0.9508038145706506,
      "grad_norm": 2.9375,
      "learning_rate": 3.794818674371935e-05,
      "loss": 0.8527,
      "step": 271290
    },
    {
      "epoch": 0.9508388620775461,
      "grad_norm": 3.09375,
      "learning_rate": 3.794753771505565e-05,
      "loss": 0.9264,
      "step": 271300
    },
    {
      "epoch": 0.9508739095844417,
      "grad_norm": 3.015625,
      "learning_rate": 3.7946888686391944e-05,
      "loss": 0.8885,
      "step": 271310
    },
    {
      "epoch": 0.9509089570913373,
      "grad_norm": 3.1875,
      "learning_rate": 3.7946239657728245e-05,
      "loss": 0.871,
      "step": 271320
    },
    {
      "epoch": 0.9509440045982329,
      "grad_norm": 3.109375,
      "learning_rate": 3.794559062906454e-05,
      "loss": 0.8766,
      "step": 271330
    },
    {
      "epoch": 0.9509790521051285,
      "grad_norm": 3.171875,
      "learning_rate": 3.794494160040084e-05,
      "loss": 0.8422,
      "step": 271340
    },
    {
      "epoch": 0.9510140996120241,
      "grad_norm": 2.578125,
      "learning_rate": 3.7944292571737136e-05,
      "loss": 0.8612,
      "step": 271350
    },
    {
      "epoch": 0.9510491471189197,
      "grad_norm": 3.15625,
      "learning_rate": 3.794364354307344e-05,
      "loss": 0.8002,
      "step": 271360
    },
    {
      "epoch": 0.9510841946258153,
      "grad_norm": 3.078125,
      "learning_rate": 3.794299451440974e-05,
      "loss": 0.882,
      "step": 271370
    },
    {
      "epoch": 0.9511192421327109,
      "grad_norm": 2.796875,
      "learning_rate": 3.7942345485746033e-05,
      "loss": 0.8782,
      "step": 271380
    },
    {
      "epoch": 0.9511542896396065,
      "grad_norm": 2.625,
      "learning_rate": 3.7941696457082335e-05,
      "loss": 0.9657,
      "step": 271390
    },
    {
      "epoch": 0.9511893371465021,
      "grad_norm": 2.90625,
      "learning_rate": 3.794104742841863e-05,
      "loss": 0.8613,
      "step": 271400
    },
    {
      "epoch": 0.9512243846533976,
      "grad_norm": 3.28125,
      "learning_rate": 3.794039839975493e-05,
      "loss": 1.0161,
      "step": 271410
    },
    {
      "epoch": 0.9512594321602933,
      "grad_norm": 3.375,
      "learning_rate": 3.7939749371091225e-05,
      "loss": 0.9588,
      "step": 271420
    },
    {
      "epoch": 0.9512944796671888,
      "grad_norm": 2.953125,
      "learning_rate": 3.793910034242753e-05,
      "loss": 0.892,
      "step": 271430
    },
    {
      "epoch": 0.9513295271740845,
      "grad_norm": 2.8125,
      "learning_rate": 3.793845131376382e-05,
      "loss": 0.8991,
      "step": 271440
    },
    {
      "epoch": 0.95136457468098,
      "grad_norm": 2.71875,
      "learning_rate": 3.793780228510012e-05,
      "loss": 0.79,
      "step": 271450
    },
    {
      "epoch": 0.9513996221878757,
      "grad_norm": 2.859375,
      "learning_rate": 3.793715325643642e-05,
      "loss": 0.8908,
      "step": 271460
    },
    {
      "epoch": 0.9514346696947713,
      "grad_norm": 3.3125,
      "learning_rate": 3.793650422777272e-05,
      "loss": 0.8874,
      "step": 271470
    },
    {
      "epoch": 0.9514697172016668,
      "grad_norm": 2.71875,
      "learning_rate": 3.7935855199109013e-05,
      "loss": 0.8505,
      "step": 271480
    },
    {
      "epoch": 0.9515047647085625,
      "grad_norm": 2.90625,
      "learning_rate": 3.7935206170445315e-05,
      "loss": 0.8526,
      "step": 271490
    },
    {
      "epoch": 0.951539812215458,
      "grad_norm": 2.671875,
      "learning_rate": 3.793455714178161e-05,
      "loss": 0.8468,
      "step": 271500
    },
    {
      "epoch": 0.9515748597223537,
      "grad_norm": 2.484375,
      "learning_rate": 3.793390811311791e-05,
      "loss": 0.8973,
      "step": 271510
    },
    {
      "epoch": 0.9516099072292492,
      "grad_norm": 2.765625,
      "learning_rate": 3.793325908445421e-05,
      "loss": 0.8615,
      "step": 271520
    },
    {
      "epoch": 0.9516449547361449,
      "grad_norm": 2.90625,
      "learning_rate": 3.793261005579051e-05,
      "loss": 0.8071,
      "step": 271530
    },
    {
      "epoch": 0.9516800022430404,
      "grad_norm": 3.21875,
      "learning_rate": 3.793196102712681e-05,
      "loss": 0.8595,
      "step": 271540
    },
    {
      "epoch": 0.951715049749936,
      "grad_norm": 3.09375,
      "learning_rate": 3.79313119984631e-05,
      "loss": 0.8805,
      "step": 271550
    },
    {
      "epoch": 0.9517500972568317,
      "grad_norm": 2.5625,
      "learning_rate": 3.7930662969799404e-05,
      "loss": 0.7665,
      "step": 271560
    },
    {
      "epoch": 0.9517851447637272,
      "grad_norm": 2.828125,
      "learning_rate": 3.79300139411357e-05,
      "loss": 0.8152,
      "step": 271570
    },
    {
      "epoch": 0.9518201922706229,
      "grad_norm": 2.578125,
      "learning_rate": 3.7929364912472e-05,
      "loss": 0.948,
      "step": 271580
    },
    {
      "epoch": 0.9518552397775184,
      "grad_norm": 2.84375,
      "learning_rate": 3.7928715883808295e-05,
      "loss": 0.9087,
      "step": 271590
    },
    {
      "epoch": 0.951890287284414,
      "grad_norm": 2.578125,
      "learning_rate": 3.7928066855144596e-05,
      "loss": 0.833,
      "step": 271600
    },
    {
      "epoch": 0.9519253347913096,
      "grad_norm": 2.75,
      "learning_rate": 3.792741782648089e-05,
      "loss": 0.8092,
      "step": 271610
    },
    {
      "epoch": 0.9519603822982052,
      "grad_norm": 3.125,
      "learning_rate": 3.7926768797817185e-05,
      "loss": 0.9204,
      "step": 271620
    },
    {
      "epoch": 0.9519954298051008,
      "grad_norm": 3.046875,
      "learning_rate": 3.792611976915349e-05,
      "loss": 0.9238,
      "step": 271630
    },
    {
      "epoch": 0.9520304773119964,
      "grad_norm": 2.84375,
      "learning_rate": 3.792547074048978e-05,
      "loss": 0.8801,
      "step": 271640
    },
    {
      "epoch": 0.9520655248188921,
      "grad_norm": 3.3125,
      "learning_rate": 3.792482171182608e-05,
      "loss": 0.844,
      "step": 271650
    },
    {
      "epoch": 0.9521005723257876,
      "grad_norm": 3.015625,
      "learning_rate": 3.792417268316238e-05,
      "loss": 0.8948,
      "step": 271660
    },
    {
      "epoch": 0.9521356198326832,
      "grad_norm": 2.953125,
      "learning_rate": 3.792352365449868e-05,
      "loss": 0.9253,
      "step": 271670
    },
    {
      "epoch": 0.9521706673395788,
      "grad_norm": 2.46875,
      "learning_rate": 3.7922874625834973e-05,
      "loss": 0.9815,
      "step": 271680
    },
    {
      "epoch": 0.9522057148464744,
      "grad_norm": 3.15625,
      "learning_rate": 3.7922225597171275e-05,
      "loss": 0.8738,
      "step": 271690
    },
    {
      "epoch": 0.95224076235337,
      "grad_norm": 2.953125,
      "learning_rate": 3.792157656850757e-05,
      "loss": 0.9321,
      "step": 271700
    },
    {
      "epoch": 0.9522758098602656,
      "grad_norm": 3.03125,
      "learning_rate": 3.792092753984387e-05,
      "loss": 0.8702,
      "step": 271710
    },
    {
      "epoch": 0.9523108573671611,
      "grad_norm": 2.859375,
      "learning_rate": 3.7920278511180165e-05,
      "loss": 0.9183,
      "step": 271720
    },
    {
      "epoch": 0.9523459048740568,
      "grad_norm": 3.28125,
      "learning_rate": 3.791962948251647e-05,
      "loss": 0.8948,
      "step": 271730
    },
    {
      "epoch": 0.9523809523809523,
      "grad_norm": 2.9375,
      "learning_rate": 3.791898045385277e-05,
      "loss": 0.925,
      "step": 271740
    },
    {
      "epoch": 0.952415999887848,
      "grad_norm": 3.125,
      "learning_rate": 3.791833142518906e-05,
      "loss": 0.8666,
      "step": 271750
    },
    {
      "epoch": 0.9524510473947436,
      "grad_norm": 2.53125,
      "learning_rate": 3.7917682396525364e-05,
      "loss": 0.9459,
      "step": 271760
    },
    {
      "epoch": 0.9524860949016392,
      "grad_norm": 2.984375,
      "learning_rate": 3.791703336786166e-05,
      "loss": 0.9003,
      "step": 271770
    },
    {
      "epoch": 0.9525211424085348,
      "grad_norm": 2.921875,
      "learning_rate": 3.791638433919796e-05,
      "loss": 0.9159,
      "step": 271780
    },
    {
      "epoch": 0.9525561899154303,
      "grad_norm": 3.265625,
      "learning_rate": 3.7915735310534255e-05,
      "loss": 0.8793,
      "step": 271790
    },
    {
      "epoch": 0.952591237422326,
      "grad_norm": 3.15625,
      "learning_rate": 3.7915086281870556e-05,
      "loss": 0.9066,
      "step": 271800
    },
    {
      "epoch": 0.9526262849292215,
      "grad_norm": 2.90625,
      "learning_rate": 3.791443725320685e-05,
      "loss": 0.9042,
      "step": 271810
    },
    {
      "epoch": 0.9526613324361172,
      "grad_norm": 2.859375,
      "learning_rate": 3.791378822454315e-05,
      "loss": 0.7743,
      "step": 271820
    },
    {
      "epoch": 0.9526963799430127,
      "grad_norm": 2.875,
      "learning_rate": 3.791313919587945e-05,
      "loss": 0.8806,
      "step": 271830
    },
    {
      "epoch": 0.9527314274499084,
      "grad_norm": 2.65625,
      "learning_rate": 3.791249016721575e-05,
      "loss": 0.8219,
      "step": 271840
    },
    {
      "epoch": 0.952766474956804,
      "grad_norm": 3.453125,
      "learning_rate": 3.791184113855204e-05,
      "loss": 0.936,
      "step": 271850
    },
    {
      "epoch": 0.9528015224636995,
      "grad_norm": 2.265625,
      "learning_rate": 3.7911192109888344e-05,
      "loss": 0.8359,
      "step": 271860
    },
    {
      "epoch": 0.9528365699705952,
      "grad_norm": 2.765625,
      "learning_rate": 3.791054308122464e-05,
      "loss": 0.8605,
      "step": 271870
    },
    {
      "epoch": 0.9528716174774907,
      "grad_norm": 2.59375,
      "learning_rate": 3.790989405256094e-05,
      "loss": 0.9696,
      "step": 271880
    },
    {
      "epoch": 0.9529066649843864,
      "grad_norm": 2.734375,
      "learning_rate": 3.790924502389724e-05,
      "loss": 0.9571,
      "step": 271890
    },
    {
      "epoch": 0.9529417124912819,
      "grad_norm": 2.796875,
      "learning_rate": 3.7908595995233536e-05,
      "loss": 0.9045,
      "step": 271900
    },
    {
      "epoch": 0.9529767599981775,
      "grad_norm": 3.34375,
      "learning_rate": 3.790794696656984e-05,
      "loss": 0.8518,
      "step": 271910
    },
    {
      "epoch": 0.9530118075050731,
      "grad_norm": 2.859375,
      "learning_rate": 3.790729793790613e-05,
      "loss": 0.8601,
      "step": 271920
    },
    {
      "epoch": 0.9530468550119687,
      "grad_norm": 2.9375,
      "learning_rate": 3.7906648909242434e-05,
      "loss": 0.9243,
      "step": 271930
    },
    {
      "epoch": 0.9530819025188643,
      "grad_norm": 3.078125,
      "learning_rate": 3.790599988057873e-05,
      "loss": 0.9011,
      "step": 271940
    },
    {
      "epoch": 0.9531169500257599,
      "grad_norm": 2.71875,
      "learning_rate": 3.790535085191503e-05,
      "loss": 0.8548,
      "step": 271950
    },
    {
      "epoch": 0.9531519975326556,
      "grad_norm": 2.859375,
      "learning_rate": 3.7904701823251324e-05,
      "loss": 0.8604,
      "step": 271960
    },
    {
      "epoch": 0.9531870450395511,
      "grad_norm": 3.03125,
      "learning_rate": 3.7904052794587626e-05,
      "loss": 0.9323,
      "step": 271970
    },
    {
      "epoch": 0.9532220925464467,
      "grad_norm": 3.109375,
      "learning_rate": 3.790340376592392e-05,
      "loss": 0.94,
      "step": 271980
    },
    {
      "epoch": 0.9532571400533423,
      "grad_norm": 3.0,
      "learning_rate": 3.7902754737260215e-05,
      "loss": 0.9083,
      "step": 271990
    },
    {
      "epoch": 0.9532921875602379,
      "grad_norm": 2.5,
      "learning_rate": 3.7902105708596516e-05,
      "loss": 0.8492,
      "step": 272000
    },
    {
      "epoch": 0.9533272350671335,
      "grad_norm": 3.203125,
      "learning_rate": 3.790145667993281e-05,
      "loss": 0.9167,
      "step": 272010
    },
    {
      "epoch": 0.9533622825740291,
      "grad_norm": 3.375,
      "learning_rate": 3.790080765126911e-05,
      "loss": 0.8367,
      "step": 272020
    },
    {
      "epoch": 0.9533973300809246,
      "grad_norm": 3.0,
      "learning_rate": 3.790015862260541e-05,
      "loss": 0.7604,
      "step": 272030
    },
    {
      "epoch": 0.9534323775878203,
      "grad_norm": 3.234375,
      "learning_rate": 3.789950959394171e-05,
      "loss": 0.886,
      "step": 272040
    },
    {
      "epoch": 0.9534674250947159,
      "grad_norm": 2.890625,
      "learning_rate": 3.7898860565278e-05,
      "loss": 0.948,
      "step": 272050
    },
    {
      "epoch": 0.9535024726016115,
      "grad_norm": 2.84375,
      "learning_rate": 3.7898211536614304e-05,
      "loss": 0.8723,
      "step": 272060
    },
    {
      "epoch": 0.9535375201085071,
      "grad_norm": 2.609375,
      "learning_rate": 3.78975625079506e-05,
      "loss": 0.7981,
      "step": 272070
    },
    {
      "epoch": 0.9535725676154027,
      "grad_norm": 2.921875,
      "learning_rate": 3.78969134792869e-05,
      "loss": 0.8151,
      "step": 272080
    },
    {
      "epoch": 0.9536076151222983,
      "grad_norm": 3.3125,
      "learning_rate": 3.7896264450623195e-05,
      "loss": 1.0503,
      "step": 272090
    },
    {
      "epoch": 0.9536426626291938,
      "grad_norm": 2.578125,
      "learning_rate": 3.7895615421959496e-05,
      "loss": 0.8463,
      "step": 272100
    },
    {
      "epoch": 0.9536777101360895,
      "grad_norm": 2.796875,
      "learning_rate": 3.78949663932958e-05,
      "loss": 0.83,
      "step": 272110
    },
    {
      "epoch": 0.953712757642985,
      "grad_norm": 2.671875,
      "learning_rate": 3.789431736463209e-05,
      "loss": 0.8544,
      "step": 272120
    },
    {
      "epoch": 0.9537478051498807,
      "grad_norm": 2.890625,
      "learning_rate": 3.7893668335968394e-05,
      "loss": 0.9304,
      "step": 272130
    },
    {
      "epoch": 0.9537828526567763,
      "grad_norm": 2.8125,
      "learning_rate": 3.789301930730469e-05,
      "loss": 0.7734,
      "step": 272140
    },
    {
      "epoch": 0.9538179001636719,
      "grad_norm": 3.046875,
      "learning_rate": 3.789237027864099e-05,
      "loss": 0.8072,
      "step": 272150
    },
    {
      "epoch": 0.9538529476705675,
      "grad_norm": 3.1875,
      "learning_rate": 3.7891721249977284e-05,
      "loss": 0.9615,
      "step": 272160
    },
    {
      "epoch": 0.953887995177463,
      "grad_norm": 2.515625,
      "learning_rate": 3.7891072221313586e-05,
      "loss": 0.8478,
      "step": 272170
    },
    {
      "epoch": 0.9539230426843587,
      "grad_norm": 2.953125,
      "learning_rate": 3.789042319264988e-05,
      "loss": 0.8989,
      "step": 272180
    },
    {
      "epoch": 0.9539580901912542,
      "grad_norm": 2.703125,
      "learning_rate": 3.788977416398618e-05,
      "loss": 0.8757,
      "step": 272190
    },
    {
      "epoch": 0.9539931376981499,
      "grad_norm": 3.0,
      "learning_rate": 3.7889125135322476e-05,
      "loss": 0.9377,
      "step": 272200
    },
    {
      "epoch": 0.9540281852050454,
      "grad_norm": 2.71875,
      "learning_rate": 3.788847610665878e-05,
      "loss": 0.8331,
      "step": 272210
    },
    {
      "epoch": 0.954063232711941,
      "grad_norm": 3.203125,
      "learning_rate": 3.788782707799507e-05,
      "loss": 0.9261,
      "step": 272220
    },
    {
      "epoch": 0.9540982802188366,
      "grad_norm": 2.6875,
      "learning_rate": 3.7887178049331374e-05,
      "loss": 0.8817,
      "step": 272230
    },
    {
      "epoch": 0.9541333277257322,
      "grad_norm": 3.015625,
      "learning_rate": 3.788652902066767e-05,
      "loss": 0.9048,
      "step": 272240
    },
    {
      "epoch": 0.9541683752326279,
      "grad_norm": 2.984375,
      "learning_rate": 3.788587999200397e-05,
      "loss": 0.855,
      "step": 272250
    },
    {
      "epoch": 0.9542034227395234,
      "grad_norm": 3.0,
      "learning_rate": 3.788523096334027e-05,
      "loss": 0.8776,
      "step": 272260
    },
    {
      "epoch": 0.9542384702464191,
      "grad_norm": 3.03125,
      "learning_rate": 3.7884581934676566e-05,
      "loss": 0.9554,
      "step": 272270
    },
    {
      "epoch": 0.9542735177533146,
      "grad_norm": 2.5625,
      "learning_rate": 3.788393290601287e-05,
      "loss": 0.8737,
      "step": 272280
    },
    {
      "epoch": 0.9543085652602102,
      "grad_norm": 3.0625,
      "learning_rate": 3.788328387734916e-05,
      "loss": 0.908,
      "step": 272290
    },
    {
      "epoch": 0.9543436127671058,
      "grad_norm": 2.390625,
      "learning_rate": 3.788263484868546e-05,
      "loss": 0.7263,
      "step": 272300
    },
    {
      "epoch": 0.9543786602740014,
      "grad_norm": 3.203125,
      "learning_rate": 3.788198582002176e-05,
      "loss": 0.8782,
      "step": 272310
    },
    {
      "epoch": 0.954413707780897,
      "grad_norm": 2.609375,
      "learning_rate": 3.788133679135806e-05,
      "loss": 0.873,
      "step": 272320
    },
    {
      "epoch": 0.9544487552877926,
      "grad_norm": 2.71875,
      "learning_rate": 3.7880687762694354e-05,
      "loss": 0.9149,
      "step": 272330
    },
    {
      "epoch": 0.9544838027946883,
      "grad_norm": 3.03125,
      "learning_rate": 3.7880038734030655e-05,
      "loss": 0.8116,
      "step": 272340
    },
    {
      "epoch": 0.9545188503015838,
      "grad_norm": 2.84375,
      "learning_rate": 3.787938970536695e-05,
      "loss": 0.8279,
      "step": 272350
    },
    {
      "epoch": 0.9545538978084794,
      "grad_norm": 3.28125,
      "learning_rate": 3.787874067670325e-05,
      "loss": 0.9226,
      "step": 272360
    },
    {
      "epoch": 0.954588945315375,
      "grad_norm": 3.21875,
      "learning_rate": 3.7878091648039546e-05,
      "loss": 0.8174,
      "step": 272370
    },
    {
      "epoch": 0.9546239928222706,
      "grad_norm": 3.5625,
      "learning_rate": 3.787744261937584e-05,
      "loss": 0.8715,
      "step": 272380
    },
    {
      "epoch": 0.9546590403291662,
      "grad_norm": 3.109375,
      "learning_rate": 3.787679359071214e-05,
      "loss": 0.9835,
      "step": 272390
    },
    {
      "epoch": 0.9546940878360618,
      "grad_norm": 2.609375,
      "learning_rate": 3.7876144562048436e-05,
      "loss": 0.8579,
      "step": 272400
    },
    {
      "epoch": 0.9547291353429573,
      "grad_norm": 2.90625,
      "learning_rate": 3.787549553338474e-05,
      "loss": 0.939,
      "step": 272410
    },
    {
      "epoch": 0.954764182849853,
      "grad_norm": 3.203125,
      "learning_rate": 3.787484650472103e-05,
      "loss": 0.9231,
      "step": 272420
    },
    {
      "epoch": 0.9547992303567485,
      "grad_norm": 2.875,
      "learning_rate": 3.7874197476057334e-05,
      "loss": 0.9252,
      "step": 272430
    },
    {
      "epoch": 0.9548342778636442,
      "grad_norm": 3.484375,
      "learning_rate": 3.787354844739363e-05,
      "loss": 0.8009,
      "step": 272440
    },
    {
      "epoch": 0.9548693253705398,
      "grad_norm": 3.21875,
      "learning_rate": 3.787289941872993e-05,
      "loss": 0.9298,
      "step": 272450
    },
    {
      "epoch": 0.9549043728774353,
      "grad_norm": 2.546875,
      "learning_rate": 3.7872250390066224e-05,
      "loss": 0.8566,
      "step": 272460
    },
    {
      "epoch": 0.954939420384331,
      "grad_norm": 2.9375,
      "learning_rate": 3.7871601361402526e-05,
      "loss": 0.8534,
      "step": 272470
    },
    {
      "epoch": 0.9549744678912265,
      "grad_norm": 2.859375,
      "learning_rate": 3.787095233273883e-05,
      "loss": 0.8398,
      "step": 272480
    },
    {
      "epoch": 0.9550095153981222,
      "grad_norm": 2.8125,
      "learning_rate": 3.787030330407512e-05,
      "loss": 0.8624,
      "step": 272490
    },
    {
      "epoch": 0.9550445629050177,
      "grad_norm": 3.0,
      "learning_rate": 3.786965427541142e-05,
      "loss": 0.8794,
      "step": 272500
    },
    {
      "epoch": 0.9550796104119134,
      "grad_norm": 2.921875,
      "learning_rate": 3.786900524674772e-05,
      "loss": 0.9105,
      "step": 272510
    },
    {
      "epoch": 0.9551146579188089,
      "grad_norm": 2.859375,
      "learning_rate": 3.786835621808402e-05,
      "loss": 0.8529,
      "step": 272520
    },
    {
      "epoch": 0.9551497054257045,
      "grad_norm": 2.546875,
      "learning_rate": 3.7867707189420314e-05,
      "loss": 0.8623,
      "step": 272530
    },
    {
      "epoch": 0.9551847529326002,
      "grad_norm": 3.0625,
      "learning_rate": 3.7867058160756615e-05,
      "loss": 0.853,
      "step": 272540
    },
    {
      "epoch": 0.9552198004394957,
      "grad_norm": 2.859375,
      "learning_rate": 3.786640913209291e-05,
      "loss": 0.8264,
      "step": 272550
    },
    {
      "epoch": 0.9552548479463914,
      "grad_norm": 2.90625,
      "learning_rate": 3.786576010342921e-05,
      "loss": 0.9013,
      "step": 272560
    },
    {
      "epoch": 0.9552898954532869,
      "grad_norm": 3.390625,
      "learning_rate": 3.7865111074765506e-05,
      "loss": 0.9242,
      "step": 272570
    },
    {
      "epoch": 0.9553249429601826,
      "grad_norm": 2.65625,
      "learning_rate": 3.786446204610181e-05,
      "loss": 0.8634,
      "step": 272580
    },
    {
      "epoch": 0.9553599904670781,
      "grad_norm": 2.703125,
      "learning_rate": 3.78638130174381e-05,
      "loss": 0.9174,
      "step": 272590
    },
    {
      "epoch": 0.9553950379739737,
      "grad_norm": 2.53125,
      "learning_rate": 3.78631639887744e-05,
      "loss": 0.8082,
      "step": 272600
    },
    {
      "epoch": 0.9554300854808693,
      "grad_norm": 2.875,
      "learning_rate": 3.7862514960110704e-05,
      "loss": 0.812,
      "step": 272610
    },
    {
      "epoch": 0.9554651329877649,
      "grad_norm": 2.671875,
      "learning_rate": 3.7861865931447e-05,
      "loss": 0.8811,
      "step": 272620
    },
    {
      "epoch": 0.9555001804946606,
      "grad_norm": 2.984375,
      "learning_rate": 3.78612169027833e-05,
      "loss": 0.9608,
      "step": 272630
    },
    {
      "epoch": 0.9555352280015561,
      "grad_norm": 2.71875,
      "learning_rate": 3.7860567874119595e-05,
      "loss": 0.902,
      "step": 272640
    },
    {
      "epoch": 0.9555702755084517,
      "grad_norm": 2.734375,
      "learning_rate": 3.7859918845455896e-05,
      "loss": 0.8412,
      "step": 272650
    },
    {
      "epoch": 0.9556053230153473,
      "grad_norm": 3.0,
      "learning_rate": 3.785926981679219e-05,
      "loss": 0.8611,
      "step": 272660
    },
    {
      "epoch": 0.9556403705222429,
      "grad_norm": 2.671875,
      "learning_rate": 3.785862078812849e-05,
      "loss": 0.8491,
      "step": 272670
    },
    {
      "epoch": 0.9556754180291385,
      "grad_norm": 3.296875,
      "learning_rate": 3.785797175946479e-05,
      "loss": 0.915,
      "step": 272680
    },
    {
      "epoch": 0.9557104655360341,
      "grad_norm": 2.5625,
      "learning_rate": 3.785732273080109e-05,
      "loss": 0.8586,
      "step": 272690
    },
    {
      "epoch": 0.9557455130429297,
      "grad_norm": 3.140625,
      "learning_rate": 3.785667370213738e-05,
      "loss": 0.7965,
      "step": 272700
    },
    {
      "epoch": 0.9557805605498253,
      "grad_norm": 2.84375,
      "learning_rate": 3.7856024673473684e-05,
      "loss": 0.8792,
      "step": 272710
    },
    {
      "epoch": 0.9558156080567208,
      "grad_norm": 2.578125,
      "learning_rate": 3.785537564480998e-05,
      "loss": 0.9048,
      "step": 272720
    },
    {
      "epoch": 0.9558506555636165,
      "grad_norm": 2.796875,
      "learning_rate": 3.785472661614628e-05,
      "loss": 0.7735,
      "step": 272730
    },
    {
      "epoch": 0.9558857030705121,
      "grad_norm": 2.703125,
      "learning_rate": 3.7854077587482575e-05,
      "loss": 0.8679,
      "step": 272740
    },
    {
      "epoch": 0.9559207505774077,
      "grad_norm": 3.125,
      "learning_rate": 3.785342855881887e-05,
      "loss": 0.8993,
      "step": 272750
    },
    {
      "epoch": 0.9559557980843033,
      "grad_norm": 2.46875,
      "learning_rate": 3.785277953015517e-05,
      "loss": 0.8283,
      "step": 272760
    },
    {
      "epoch": 0.9559908455911988,
      "grad_norm": 2.859375,
      "learning_rate": 3.7852130501491466e-05,
      "loss": 0.8944,
      "step": 272770
    },
    {
      "epoch": 0.9560258930980945,
      "grad_norm": 3.3125,
      "learning_rate": 3.785148147282777e-05,
      "loss": 0.9514,
      "step": 272780
    },
    {
      "epoch": 0.95606094060499,
      "grad_norm": 2.90625,
      "learning_rate": 3.785083244416406e-05,
      "loss": 0.8893,
      "step": 272790
    },
    {
      "epoch": 0.9560959881118857,
      "grad_norm": 2.890625,
      "learning_rate": 3.785018341550036e-05,
      "loss": 0.862,
      "step": 272800
    },
    {
      "epoch": 0.9561310356187812,
      "grad_norm": 2.875,
      "learning_rate": 3.784953438683666e-05,
      "loss": 0.8586,
      "step": 272810
    },
    {
      "epoch": 0.9561660831256769,
      "grad_norm": 3.25,
      "learning_rate": 3.784888535817296e-05,
      "loss": 0.9005,
      "step": 272820
    },
    {
      "epoch": 0.9562011306325725,
      "grad_norm": 2.859375,
      "learning_rate": 3.7848236329509254e-05,
      "loss": 0.8831,
      "step": 272830
    },
    {
      "epoch": 0.956236178139468,
      "grad_norm": 3.1875,
      "learning_rate": 3.7847587300845555e-05,
      "loss": 0.9348,
      "step": 272840
    },
    {
      "epoch": 0.9562712256463637,
      "grad_norm": 3.0625,
      "learning_rate": 3.7846938272181856e-05,
      "loss": 0.8922,
      "step": 272850
    },
    {
      "epoch": 0.9563062731532592,
      "grad_norm": 2.734375,
      "learning_rate": 3.784628924351815e-05,
      "loss": 0.82,
      "step": 272860
    },
    {
      "epoch": 0.9563413206601549,
      "grad_norm": 3.0625,
      "learning_rate": 3.784564021485445e-05,
      "loss": 0.9493,
      "step": 272870
    },
    {
      "epoch": 0.9563763681670504,
      "grad_norm": 2.875,
      "learning_rate": 3.784499118619075e-05,
      "loss": 0.9331,
      "step": 272880
    },
    {
      "epoch": 0.956411415673946,
      "grad_norm": 3.15625,
      "learning_rate": 3.784434215752705e-05,
      "loss": 0.8316,
      "step": 272890
    },
    {
      "epoch": 0.9564464631808416,
      "grad_norm": 2.34375,
      "learning_rate": 3.784369312886334e-05,
      "loss": 0.869,
      "step": 272900
    },
    {
      "epoch": 0.9564815106877372,
      "grad_norm": 2.828125,
      "learning_rate": 3.7843044100199644e-05,
      "loss": 0.8471,
      "step": 272910
    },
    {
      "epoch": 0.9565165581946328,
      "grad_norm": 2.65625,
      "learning_rate": 3.784239507153594e-05,
      "loss": 0.9394,
      "step": 272920
    },
    {
      "epoch": 0.9565516057015284,
      "grad_norm": 3.125,
      "learning_rate": 3.784174604287224e-05,
      "loss": 0.9453,
      "step": 272930
    },
    {
      "epoch": 0.9565866532084241,
      "grad_norm": 2.921875,
      "learning_rate": 3.7841097014208535e-05,
      "loss": 0.9268,
      "step": 272940
    },
    {
      "epoch": 0.9566217007153196,
      "grad_norm": 2.5,
      "learning_rate": 3.7840447985544836e-05,
      "loss": 0.942,
      "step": 272950
    },
    {
      "epoch": 0.9566567482222152,
      "grad_norm": 3.3125,
      "learning_rate": 3.783979895688113e-05,
      "loss": 0.9393,
      "step": 272960
    },
    {
      "epoch": 0.9566917957291108,
      "grad_norm": 2.890625,
      "learning_rate": 3.783914992821743e-05,
      "loss": 0.9413,
      "step": 272970
    },
    {
      "epoch": 0.9567268432360064,
      "grad_norm": 2.96875,
      "learning_rate": 3.7838500899553734e-05,
      "loss": 0.8838,
      "step": 272980
    },
    {
      "epoch": 0.956761890742902,
      "grad_norm": 2.78125,
      "learning_rate": 3.783785187089003e-05,
      "loss": 0.8451,
      "step": 272990
    },
    {
      "epoch": 0.9567969382497976,
      "grad_norm": 2.90625,
      "learning_rate": 3.783720284222633e-05,
      "loss": 0.8219,
      "step": 273000
    },
    {
      "epoch": 0.9568319857566931,
      "grad_norm": 3.390625,
      "learning_rate": 3.7836553813562624e-05,
      "loss": 0.8991,
      "step": 273010
    },
    {
      "epoch": 0.9568670332635888,
      "grad_norm": 3.1875,
      "learning_rate": 3.7835904784898926e-05,
      "loss": 0.9494,
      "step": 273020
    },
    {
      "epoch": 0.9569020807704844,
      "grad_norm": 2.8125,
      "learning_rate": 3.783525575623522e-05,
      "loss": 0.8654,
      "step": 273030
    },
    {
      "epoch": 0.95693712827738,
      "grad_norm": 2.96875,
      "learning_rate": 3.783460672757152e-05,
      "loss": 0.967,
      "step": 273040
    },
    {
      "epoch": 0.9569721757842756,
      "grad_norm": 2.9375,
      "learning_rate": 3.7833957698907816e-05,
      "loss": 0.8234,
      "step": 273050
    },
    {
      "epoch": 0.9570072232911712,
      "grad_norm": 3.0625,
      "learning_rate": 3.783330867024412e-05,
      "loss": 0.9166,
      "step": 273060
    },
    {
      "epoch": 0.9570422707980668,
      "grad_norm": 3.359375,
      "learning_rate": 3.783265964158041e-05,
      "loss": 0.908,
      "step": 273070
    },
    {
      "epoch": 0.9570773183049623,
      "grad_norm": 3.1875,
      "learning_rate": 3.7832010612916714e-05,
      "loss": 0.8868,
      "step": 273080
    },
    {
      "epoch": 0.957112365811858,
      "grad_norm": 2.8125,
      "learning_rate": 3.783136158425301e-05,
      "loss": 0.9109,
      "step": 273090
    },
    {
      "epoch": 0.9571474133187535,
      "grad_norm": 3.53125,
      "learning_rate": 3.783071255558931e-05,
      "loss": 0.915,
      "step": 273100
    },
    {
      "epoch": 0.9571824608256492,
      "grad_norm": 3.109375,
      "learning_rate": 3.7830063526925604e-05,
      "loss": 0.862,
      "step": 273110
    },
    {
      "epoch": 0.9572175083325448,
      "grad_norm": 3.4375,
      "learning_rate": 3.78294144982619e-05,
      "loss": 0.7822,
      "step": 273120
    },
    {
      "epoch": 0.9572525558394404,
      "grad_norm": 2.8125,
      "learning_rate": 3.78287654695982e-05,
      "loss": 0.8908,
      "step": 273130
    },
    {
      "epoch": 0.957287603346336,
      "grad_norm": 3.234375,
      "learning_rate": 3.7828116440934495e-05,
      "loss": 0.9783,
      "step": 273140
    },
    {
      "epoch": 0.9573226508532315,
      "grad_norm": 2.953125,
      "learning_rate": 3.7827467412270796e-05,
      "loss": 0.8893,
      "step": 273150
    },
    {
      "epoch": 0.9573576983601272,
      "grad_norm": 3.3125,
      "learning_rate": 3.782681838360709e-05,
      "loss": 0.8765,
      "step": 273160
    },
    {
      "epoch": 0.9573927458670227,
      "grad_norm": 2.515625,
      "learning_rate": 3.782616935494339e-05,
      "loss": 0.854,
      "step": 273170
    },
    {
      "epoch": 0.9574277933739184,
      "grad_norm": 3.203125,
      "learning_rate": 3.782552032627969e-05,
      "loss": 0.9633,
      "step": 273180
    },
    {
      "epoch": 0.9574628408808139,
      "grad_norm": 3.265625,
      "learning_rate": 3.782487129761599e-05,
      "loss": 0.9182,
      "step": 273190
    },
    {
      "epoch": 0.9574978883877096,
      "grad_norm": 2.8125,
      "learning_rate": 3.782422226895228e-05,
      "loss": 0.8865,
      "step": 273200
    },
    {
      "epoch": 0.9575329358946051,
      "grad_norm": 3.171875,
      "learning_rate": 3.7823573240288584e-05,
      "loss": 0.909,
      "step": 273210
    },
    {
      "epoch": 0.9575679834015007,
      "grad_norm": 2.734375,
      "learning_rate": 3.7822924211624886e-05,
      "loss": 0.886,
      "step": 273220
    },
    {
      "epoch": 0.9576030309083964,
      "grad_norm": 3.96875,
      "learning_rate": 3.782227518296118e-05,
      "loss": 0.9211,
      "step": 273230
    },
    {
      "epoch": 0.9576380784152919,
      "grad_norm": 2.53125,
      "learning_rate": 3.782162615429748e-05,
      "loss": 0.9133,
      "step": 273240
    },
    {
      "epoch": 0.9576731259221876,
      "grad_norm": 2.796875,
      "learning_rate": 3.7820977125633776e-05,
      "loss": 0.9525,
      "step": 273250
    },
    {
      "epoch": 0.9577081734290831,
      "grad_norm": 2.421875,
      "learning_rate": 3.782032809697008e-05,
      "loss": 0.8811,
      "step": 273260
    },
    {
      "epoch": 0.9577432209359787,
      "grad_norm": 3.109375,
      "learning_rate": 3.781967906830637e-05,
      "loss": 0.8791,
      "step": 273270
    },
    {
      "epoch": 0.9577782684428743,
      "grad_norm": 3.109375,
      "learning_rate": 3.7819030039642674e-05,
      "loss": 0.9296,
      "step": 273280
    },
    {
      "epoch": 0.9578133159497699,
      "grad_norm": 2.8125,
      "learning_rate": 3.781838101097897e-05,
      "loss": 0.9024,
      "step": 273290
    },
    {
      "epoch": 0.9578483634566655,
      "grad_norm": 3.03125,
      "learning_rate": 3.781773198231527e-05,
      "loss": 0.8769,
      "step": 273300
    },
    {
      "epoch": 0.9578834109635611,
      "grad_norm": 3.109375,
      "learning_rate": 3.7817082953651564e-05,
      "loss": 0.9045,
      "step": 273310
    },
    {
      "epoch": 0.9579184584704568,
      "grad_norm": 3.109375,
      "learning_rate": 3.7816433924987866e-05,
      "loss": 0.8523,
      "step": 273320
    },
    {
      "epoch": 0.9579535059773523,
      "grad_norm": 2.8125,
      "learning_rate": 3.781578489632416e-05,
      "loss": 0.9499,
      "step": 273330
    },
    {
      "epoch": 0.9579885534842479,
      "grad_norm": 2.890625,
      "learning_rate": 3.781513586766046e-05,
      "loss": 0.9107,
      "step": 273340
    },
    {
      "epoch": 0.9580236009911435,
      "grad_norm": 3.125,
      "learning_rate": 3.781448683899676e-05,
      "loss": 0.974,
      "step": 273350
    },
    {
      "epoch": 0.9580586484980391,
      "grad_norm": 2.65625,
      "learning_rate": 3.781383781033306e-05,
      "loss": 0.9243,
      "step": 273360
    },
    {
      "epoch": 0.9580936960049347,
      "grad_norm": 2.765625,
      "learning_rate": 3.781318878166936e-05,
      "loss": 0.9005,
      "step": 273370
    },
    {
      "epoch": 0.9581287435118303,
      "grad_norm": 2.75,
      "learning_rate": 3.7812539753005654e-05,
      "loss": 0.8717,
      "step": 273380
    },
    {
      "epoch": 0.9581637910187258,
      "grad_norm": 3.28125,
      "learning_rate": 3.7811890724341955e-05,
      "loss": 0.8333,
      "step": 273390
    },
    {
      "epoch": 0.9581988385256215,
      "grad_norm": 2.875,
      "learning_rate": 3.781124169567825e-05,
      "loss": 0.8583,
      "step": 273400
    },
    {
      "epoch": 0.958233886032517,
      "grad_norm": 3.34375,
      "learning_rate": 3.781059266701455e-05,
      "loss": 0.8402,
      "step": 273410
    },
    {
      "epoch": 0.9582689335394127,
      "grad_norm": 2.296875,
      "learning_rate": 3.7809943638350846e-05,
      "loss": 0.8634,
      "step": 273420
    },
    {
      "epoch": 0.9583039810463083,
      "grad_norm": 2.9375,
      "learning_rate": 3.780929460968715e-05,
      "loss": 0.9097,
      "step": 273430
    },
    {
      "epoch": 0.9583390285532039,
      "grad_norm": 3.09375,
      "learning_rate": 3.780864558102344e-05,
      "loss": 0.9559,
      "step": 273440
    },
    {
      "epoch": 0.9583740760600995,
      "grad_norm": 2.859375,
      "learning_rate": 3.780799655235974e-05,
      "loss": 0.8037,
      "step": 273450
    },
    {
      "epoch": 0.958409123566995,
      "grad_norm": 3.65625,
      "learning_rate": 3.780734752369604e-05,
      "loss": 0.905,
      "step": 273460
    },
    {
      "epoch": 0.9584441710738907,
      "grad_norm": 2.875,
      "learning_rate": 3.780669849503234e-05,
      "loss": 0.8338,
      "step": 273470
    },
    {
      "epoch": 0.9584792185807862,
      "grad_norm": 2.5,
      "learning_rate": 3.780604946636864e-05,
      "loss": 0.787,
      "step": 273480
    },
    {
      "epoch": 0.9585142660876819,
      "grad_norm": 3.0,
      "learning_rate": 3.7805400437704935e-05,
      "loss": 0.8699,
      "step": 273490
    },
    {
      "epoch": 0.9585493135945774,
      "grad_norm": 3.015625,
      "learning_rate": 3.780475140904123e-05,
      "loss": 0.9342,
      "step": 273500
    },
    {
      "epoch": 0.958584361101473,
      "grad_norm": 2.96875,
      "learning_rate": 3.7804102380377524e-05,
      "loss": 0.9243,
      "step": 273510
    },
    {
      "epoch": 0.9586194086083687,
      "grad_norm": 3.171875,
      "learning_rate": 3.7803453351713826e-05,
      "loss": 0.9154,
      "step": 273520
    },
    {
      "epoch": 0.9586544561152642,
      "grad_norm": 3.578125,
      "learning_rate": 3.780280432305012e-05,
      "loss": 0.9645,
      "step": 273530
    },
    {
      "epoch": 0.9586895036221599,
      "grad_norm": 3.296875,
      "learning_rate": 3.780215529438642e-05,
      "loss": 0.8877,
      "step": 273540
    },
    {
      "epoch": 0.9587245511290554,
      "grad_norm": 3.0,
      "learning_rate": 3.7801506265722716e-05,
      "loss": 0.8314,
      "step": 273550
    },
    {
      "epoch": 0.9587595986359511,
      "grad_norm": 2.625,
      "learning_rate": 3.780085723705902e-05,
      "loss": 0.816,
      "step": 273560
    },
    {
      "epoch": 0.9587946461428466,
      "grad_norm": 3.171875,
      "learning_rate": 3.780020820839532e-05,
      "loss": 0.9282,
      "step": 273570
    },
    {
      "epoch": 0.9588296936497422,
      "grad_norm": 2.65625,
      "learning_rate": 3.7799559179731614e-05,
      "loss": 0.834,
      "step": 273580
    },
    {
      "epoch": 0.9588647411566378,
      "grad_norm": 2.71875,
      "learning_rate": 3.7798910151067915e-05,
      "loss": 0.8909,
      "step": 273590
    },
    {
      "epoch": 0.9588997886635334,
      "grad_norm": 2.828125,
      "learning_rate": 3.779826112240421e-05,
      "loss": 0.8646,
      "step": 273600
    },
    {
      "epoch": 0.958934836170429,
      "grad_norm": 2.359375,
      "learning_rate": 3.779761209374051e-05,
      "loss": 0.9175,
      "step": 273610
    },
    {
      "epoch": 0.9589698836773246,
      "grad_norm": 2.984375,
      "learning_rate": 3.7796963065076806e-05,
      "loss": 0.8818,
      "step": 273620
    },
    {
      "epoch": 0.9590049311842203,
      "grad_norm": 3.015625,
      "learning_rate": 3.779631403641311e-05,
      "loss": 0.8507,
      "step": 273630
    },
    {
      "epoch": 0.9590399786911158,
      "grad_norm": 2.96875,
      "learning_rate": 3.77956650077494e-05,
      "loss": 0.8937,
      "step": 273640
    },
    {
      "epoch": 0.9590750261980114,
      "grad_norm": 2.984375,
      "learning_rate": 3.77950159790857e-05,
      "loss": 0.8082,
      "step": 273650
    },
    {
      "epoch": 0.959110073704907,
      "grad_norm": 2.59375,
      "learning_rate": 3.7794366950422e-05,
      "loss": 0.8725,
      "step": 273660
    },
    {
      "epoch": 0.9591451212118026,
      "grad_norm": 2.578125,
      "learning_rate": 3.77937179217583e-05,
      "loss": 0.909,
      "step": 273670
    },
    {
      "epoch": 0.9591801687186982,
      "grad_norm": 2.96875,
      "learning_rate": 3.7793068893094594e-05,
      "loss": 0.8054,
      "step": 273680
    },
    {
      "epoch": 0.9592152162255938,
      "grad_norm": 2.921875,
      "learning_rate": 3.7792419864430895e-05,
      "loss": 0.9286,
      "step": 273690
    },
    {
      "epoch": 0.9592502637324893,
      "grad_norm": 2.46875,
      "learning_rate": 3.779177083576719e-05,
      "loss": 0.8548,
      "step": 273700
    },
    {
      "epoch": 0.959285311239385,
      "grad_norm": 2.6875,
      "learning_rate": 3.779112180710349e-05,
      "loss": 0.8004,
      "step": 273710
    },
    {
      "epoch": 0.9593203587462806,
      "grad_norm": 3.203125,
      "learning_rate": 3.779047277843979e-05,
      "loss": 0.8892,
      "step": 273720
    },
    {
      "epoch": 0.9593554062531762,
      "grad_norm": 2.796875,
      "learning_rate": 3.778982374977609e-05,
      "loss": 0.8891,
      "step": 273730
    },
    {
      "epoch": 0.9593904537600718,
      "grad_norm": 2.6875,
      "learning_rate": 3.778917472111239e-05,
      "loss": 0.8305,
      "step": 273740
    },
    {
      "epoch": 0.9594255012669674,
      "grad_norm": 3.359375,
      "learning_rate": 3.778852569244868e-05,
      "loss": 0.9489,
      "step": 273750
    },
    {
      "epoch": 0.959460548773863,
      "grad_norm": 2.734375,
      "learning_rate": 3.7787876663784984e-05,
      "loss": 0.8952,
      "step": 273760
    },
    {
      "epoch": 0.9594955962807585,
      "grad_norm": 3.140625,
      "learning_rate": 3.778722763512128e-05,
      "loss": 0.8329,
      "step": 273770
    },
    {
      "epoch": 0.9595306437876542,
      "grad_norm": 3.140625,
      "learning_rate": 3.778657860645758e-05,
      "loss": 0.9152,
      "step": 273780
    },
    {
      "epoch": 0.9595656912945497,
      "grad_norm": 3.046875,
      "learning_rate": 3.7785929577793875e-05,
      "loss": 0.9239,
      "step": 273790
    },
    {
      "epoch": 0.9596007388014454,
      "grad_norm": 2.8125,
      "learning_rate": 3.7785280549130176e-05,
      "loss": 0.8378,
      "step": 273800
    },
    {
      "epoch": 0.959635786308341,
      "grad_norm": 2.59375,
      "learning_rate": 3.778463152046647e-05,
      "loss": 0.8128,
      "step": 273810
    },
    {
      "epoch": 0.9596708338152365,
      "grad_norm": 2.640625,
      "learning_rate": 3.778398249180277e-05,
      "loss": 0.8314,
      "step": 273820
    },
    {
      "epoch": 0.9597058813221322,
      "grad_norm": 2.734375,
      "learning_rate": 3.778333346313907e-05,
      "loss": 0.8268,
      "step": 273830
    },
    {
      "epoch": 0.9597409288290277,
      "grad_norm": 2.609375,
      "learning_rate": 3.778268443447537e-05,
      "loss": 0.8398,
      "step": 273840
    },
    {
      "epoch": 0.9597759763359234,
      "grad_norm": 2.8125,
      "learning_rate": 3.778203540581167e-05,
      "loss": 0.8641,
      "step": 273850
    },
    {
      "epoch": 0.9598110238428189,
      "grad_norm": 3.09375,
      "learning_rate": 3.7781386377147964e-05,
      "loss": 0.8597,
      "step": 273860
    },
    {
      "epoch": 0.9598460713497146,
      "grad_norm": 2.890625,
      "learning_rate": 3.778073734848426e-05,
      "loss": 0.9165,
      "step": 273870
    },
    {
      "epoch": 0.9598811188566101,
      "grad_norm": 2.765625,
      "learning_rate": 3.7780088319820554e-05,
      "loss": 0.8221,
      "step": 273880
    },
    {
      "epoch": 0.9599161663635057,
      "grad_norm": 2.796875,
      "learning_rate": 3.7779439291156855e-05,
      "loss": 0.8482,
      "step": 273890
    },
    {
      "epoch": 0.9599512138704013,
      "grad_norm": 3.140625,
      "learning_rate": 3.777879026249315e-05,
      "loss": 0.9455,
      "step": 273900
    },
    {
      "epoch": 0.9599862613772969,
      "grad_norm": 3.0,
      "learning_rate": 3.777814123382945e-05,
      "loss": 0.911,
      "step": 273910
    },
    {
      "epoch": 0.9600213088841926,
      "grad_norm": 3.125,
      "learning_rate": 3.7777492205165746e-05,
      "loss": 0.8227,
      "step": 273920
    },
    {
      "epoch": 0.9600563563910881,
      "grad_norm": 2.6875,
      "learning_rate": 3.777684317650205e-05,
      "loss": 0.8725,
      "step": 273930
    },
    {
      "epoch": 0.9600914038979838,
      "grad_norm": 2.875,
      "learning_rate": 3.777619414783835e-05,
      "loss": 0.8455,
      "step": 273940
    },
    {
      "epoch": 0.9601264514048793,
      "grad_norm": 2.484375,
      "learning_rate": 3.777554511917464e-05,
      "loss": 0.861,
      "step": 273950
    },
    {
      "epoch": 0.9601614989117749,
      "grad_norm": 2.765625,
      "learning_rate": 3.7774896090510944e-05,
      "loss": 0.8419,
      "step": 273960
    },
    {
      "epoch": 0.9601965464186705,
      "grad_norm": 2.546875,
      "learning_rate": 3.777424706184724e-05,
      "loss": 0.776,
      "step": 273970
    },
    {
      "epoch": 0.9602315939255661,
      "grad_norm": 3.515625,
      "learning_rate": 3.777359803318354e-05,
      "loss": 0.9306,
      "step": 273980
    },
    {
      "epoch": 0.9602666414324617,
      "grad_norm": 2.5625,
      "learning_rate": 3.7772949004519835e-05,
      "loss": 0.9365,
      "step": 273990
    },
    {
      "epoch": 0.9603016889393573,
      "grad_norm": 3.34375,
      "learning_rate": 3.7772299975856136e-05,
      "loss": 0.886,
      "step": 274000
    },
    {
      "epoch": 0.960336736446253,
      "grad_norm": 3.234375,
      "learning_rate": 3.777165094719243e-05,
      "loss": 0.9519,
      "step": 274010
    },
    {
      "epoch": 0.9603717839531485,
      "grad_norm": 2.640625,
      "learning_rate": 3.777100191852873e-05,
      "loss": 0.9012,
      "step": 274020
    },
    {
      "epoch": 0.9604068314600441,
      "grad_norm": 3.09375,
      "learning_rate": 3.777035288986503e-05,
      "loss": 0.9358,
      "step": 274030
    },
    {
      "epoch": 0.9604418789669397,
      "grad_norm": 2.96875,
      "learning_rate": 3.776970386120133e-05,
      "loss": 0.8817,
      "step": 274040
    },
    {
      "epoch": 0.9604769264738353,
      "grad_norm": 3.015625,
      "learning_rate": 3.776905483253762e-05,
      "loss": 0.917,
      "step": 274050
    },
    {
      "epoch": 0.9605119739807308,
      "grad_norm": 2.6875,
      "learning_rate": 3.7768405803873924e-05,
      "loss": 0.8494,
      "step": 274060
    },
    {
      "epoch": 0.9605470214876265,
      "grad_norm": 2.546875,
      "learning_rate": 3.776775677521022e-05,
      "loss": 0.8996,
      "step": 274070
    },
    {
      "epoch": 0.960582068994522,
      "grad_norm": 2.8125,
      "learning_rate": 3.776710774654652e-05,
      "loss": 0.7693,
      "step": 274080
    },
    {
      "epoch": 0.9606171165014177,
      "grad_norm": 3.03125,
      "learning_rate": 3.776645871788282e-05,
      "loss": 0.9259,
      "step": 274090
    },
    {
      "epoch": 0.9606521640083132,
      "grad_norm": 3.15625,
      "learning_rate": 3.7765809689219116e-05,
      "loss": 0.8786,
      "step": 274100
    },
    {
      "epoch": 0.9606872115152089,
      "grad_norm": 3.21875,
      "learning_rate": 3.776516066055542e-05,
      "loss": 0.9019,
      "step": 274110
    },
    {
      "epoch": 0.9607222590221045,
      "grad_norm": 2.84375,
      "learning_rate": 3.776451163189171e-05,
      "loss": 0.8556,
      "step": 274120
    },
    {
      "epoch": 0.960757306529,
      "grad_norm": 2.515625,
      "learning_rate": 3.7763862603228014e-05,
      "loss": 0.961,
      "step": 274130
    },
    {
      "epoch": 0.9607923540358957,
      "grad_norm": 3.140625,
      "learning_rate": 3.776321357456431e-05,
      "loss": 0.9174,
      "step": 274140
    },
    {
      "epoch": 0.9608274015427912,
      "grad_norm": 2.734375,
      "learning_rate": 3.776256454590061e-05,
      "loss": 0.901,
      "step": 274150
    },
    {
      "epoch": 0.9608624490496869,
      "grad_norm": 3.125,
      "learning_rate": 3.7761915517236904e-05,
      "loss": 0.8695,
      "step": 274160
    },
    {
      "epoch": 0.9608974965565824,
      "grad_norm": 2.953125,
      "learning_rate": 3.7761266488573206e-05,
      "loss": 0.9207,
      "step": 274170
    },
    {
      "epoch": 0.9609325440634781,
      "grad_norm": 3.03125,
      "learning_rate": 3.77606174599095e-05,
      "loss": 0.8949,
      "step": 274180
    },
    {
      "epoch": 0.9609675915703736,
      "grad_norm": 2.359375,
      "learning_rate": 3.77599684312458e-05,
      "loss": 0.8676,
      "step": 274190
    },
    {
      "epoch": 0.9610026390772692,
      "grad_norm": 2.71875,
      "learning_rate": 3.7759319402582096e-05,
      "loss": 0.9202,
      "step": 274200
    },
    {
      "epoch": 0.9610376865841649,
      "grad_norm": 2.6875,
      "learning_rate": 3.77586703739184e-05,
      "loss": 0.9028,
      "step": 274210
    },
    {
      "epoch": 0.9610727340910604,
      "grad_norm": 2.9375,
      "learning_rate": 3.77580213452547e-05,
      "loss": 0.9028,
      "step": 274220
    },
    {
      "epoch": 0.9611077815979561,
      "grad_norm": 3.171875,
      "learning_rate": 3.7757372316590994e-05,
      "loss": 0.8953,
      "step": 274230
    },
    {
      "epoch": 0.9611428291048516,
      "grad_norm": 2.90625,
      "learning_rate": 3.7756723287927295e-05,
      "loss": 0.9519,
      "step": 274240
    },
    {
      "epoch": 0.9611778766117473,
      "grad_norm": 3.015625,
      "learning_rate": 3.775607425926358e-05,
      "loss": 0.9338,
      "step": 274250
    },
    {
      "epoch": 0.9612129241186428,
      "grad_norm": 3.046875,
      "learning_rate": 3.7755425230599884e-05,
      "loss": 0.9084,
      "step": 274260
    },
    {
      "epoch": 0.9612479716255384,
      "grad_norm": 2.828125,
      "learning_rate": 3.775477620193618e-05,
      "loss": 0.9283,
      "step": 274270
    },
    {
      "epoch": 0.961283019132434,
      "grad_norm": 3.171875,
      "learning_rate": 3.775412717327248e-05,
      "loss": 0.857,
      "step": 274280
    },
    {
      "epoch": 0.9613180666393296,
      "grad_norm": 2.71875,
      "learning_rate": 3.7753478144608775e-05,
      "loss": 0.7994,
      "step": 274290
    },
    {
      "epoch": 0.9613531141462253,
      "grad_norm": 3.15625,
      "learning_rate": 3.7752829115945076e-05,
      "loss": 0.8433,
      "step": 274300
    },
    {
      "epoch": 0.9613881616531208,
      "grad_norm": 2.734375,
      "learning_rate": 3.775218008728138e-05,
      "loss": 0.8387,
      "step": 274310
    },
    {
      "epoch": 0.9614232091600164,
      "grad_norm": 2.421875,
      "learning_rate": 3.775153105861767e-05,
      "loss": 0.8703,
      "step": 274320
    },
    {
      "epoch": 0.961458256666912,
      "grad_norm": 2.703125,
      "learning_rate": 3.7750882029953974e-05,
      "loss": 0.8182,
      "step": 274330
    },
    {
      "epoch": 0.9614933041738076,
      "grad_norm": 3.078125,
      "learning_rate": 3.775023300129027e-05,
      "loss": 0.8379,
      "step": 274340
    },
    {
      "epoch": 0.9615283516807032,
      "grad_norm": 2.53125,
      "learning_rate": 3.774958397262657e-05,
      "loss": 0.8207,
      "step": 274350
    },
    {
      "epoch": 0.9615633991875988,
      "grad_norm": 2.796875,
      "learning_rate": 3.7748934943962864e-05,
      "loss": 0.8388,
      "step": 274360
    },
    {
      "epoch": 0.9615984466944943,
      "grad_norm": 2.859375,
      "learning_rate": 3.7748285915299166e-05,
      "loss": 0.88,
      "step": 274370
    },
    {
      "epoch": 0.96163349420139,
      "grad_norm": 3.078125,
      "learning_rate": 3.774763688663546e-05,
      "loss": 0.8826,
      "step": 274380
    },
    {
      "epoch": 0.9616685417082855,
      "grad_norm": 3.015625,
      "learning_rate": 3.774698785797176e-05,
      "loss": 0.8903,
      "step": 274390
    },
    {
      "epoch": 0.9617035892151812,
      "grad_norm": 2.703125,
      "learning_rate": 3.7746338829308056e-05,
      "loss": 0.8713,
      "step": 274400
    },
    {
      "epoch": 0.9617386367220768,
      "grad_norm": 2.96875,
      "learning_rate": 3.774568980064436e-05,
      "loss": 0.9456,
      "step": 274410
    },
    {
      "epoch": 0.9617736842289724,
      "grad_norm": 2.984375,
      "learning_rate": 3.774504077198065e-05,
      "loss": 0.8212,
      "step": 274420
    },
    {
      "epoch": 0.961808731735868,
      "grad_norm": 2.953125,
      "learning_rate": 3.7744391743316954e-05,
      "loss": 0.7733,
      "step": 274430
    },
    {
      "epoch": 0.9618437792427635,
      "grad_norm": 2.859375,
      "learning_rate": 3.774374271465325e-05,
      "loss": 0.8902,
      "step": 274440
    },
    {
      "epoch": 0.9618788267496592,
      "grad_norm": 3.078125,
      "learning_rate": 3.774309368598955e-05,
      "loss": 0.8745,
      "step": 274450
    },
    {
      "epoch": 0.9619138742565547,
      "grad_norm": 2.40625,
      "learning_rate": 3.774244465732585e-05,
      "loss": 0.8815,
      "step": 274460
    },
    {
      "epoch": 0.9619489217634504,
      "grad_norm": 2.609375,
      "learning_rate": 3.7741795628662146e-05,
      "loss": 0.8978,
      "step": 274470
    },
    {
      "epoch": 0.9619839692703459,
      "grad_norm": 2.796875,
      "learning_rate": 3.774114659999845e-05,
      "loss": 0.9413,
      "step": 274480
    },
    {
      "epoch": 0.9620190167772416,
      "grad_norm": 3.15625,
      "learning_rate": 3.774049757133474e-05,
      "loss": 0.8907,
      "step": 274490
    },
    {
      "epoch": 0.9620540642841372,
      "grad_norm": 2.734375,
      "learning_rate": 3.773984854267104e-05,
      "loss": 0.8822,
      "step": 274500
    },
    {
      "epoch": 0.9620891117910327,
      "grad_norm": 2.671875,
      "learning_rate": 3.773919951400734e-05,
      "loss": 0.8032,
      "step": 274510
    },
    {
      "epoch": 0.9621241592979284,
      "grad_norm": 2.703125,
      "learning_rate": 3.773855048534364e-05,
      "loss": 0.8322,
      "step": 274520
    },
    {
      "epoch": 0.9621592068048239,
      "grad_norm": 3.515625,
      "learning_rate": 3.7737901456679934e-05,
      "loss": 0.8899,
      "step": 274530
    },
    {
      "epoch": 0.9621942543117196,
      "grad_norm": 2.46875,
      "learning_rate": 3.7737252428016235e-05,
      "loss": 0.8126,
      "step": 274540
    },
    {
      "epoch": 0.9622293018186151,
      "grad_norm": 2.859375,
      "learning_rate": 3.773660339935253e-05,
      "loss": 0.9354,
      "step": 274550
    },
    {
      "epoch": 0.9622643493255107,
      "grad_norm": 2.828125,
      "learning_rate": 3.773595437068883e-05,
      "loss": 0.8566,
      "step": 274560
    },
    {
      "epoch": 0.9622993968324063,
      "grad_norm": 2.859375,
      "learning_rate": 3.7735305342025126e-05,
      "loss": 0.8896,
      "step": 274570
    },
    {
      "epoch": 0.9623344443393019,
      "grad_norm": 2.65625,
      "learning_rate": 3.773465631336143e-05,
      "loss": 0.8962,
      "step": 274580
    },
    {
      "epoch": 0.9623694918461975,
      "grad_norm": 2.5,
      "learning_rate": 3.773400728469773e-05,
      "loss": 0.8615,
      "step": 274590
    },
    {
      "epoch": 0.9624045393530931,
      "grad_norm": 3.09375,
      "learning_rate": 3.773335825603402e-05,
      "loss": 0.9243,
      "step": 274600
    },
    {
      "epoch": 0.9624395868599888,
      "grad_norm": 3.34375,
      "learning_rate": 3.7732709227370325e-05,
      "loss": 0.9469,
      "step": 274610
    },
    {
      "epoch": 0.9624746343668843,
      "grad_norm": 2.53125,
      "learning_rate": 3.773206019870662e-05,
      "loss": 0.8775,
      "step": 274620
    },
    {
      "epoch": 0.9625096818737799,
      "grad_norm": 2.53125,
      "learning_rate": 3.7731411170042914e-05,
      "loss": 0.7844,
      "step": 274630
    },
    {
      "epoch": 0.9625447293806755,
      "grad_norm": 2.625,
      "learning_rate": 3.773076214137921e-05,
      "loss": 0.8389,
      "step": 274640
    },
    {
      "epoch": 0.9625797768875711,
      "grad_norm": 2.765625,
      "learning_rate": 3.773011311271551e-05,
      "loss": 0.8417,
      "step": 274650
    },
    {
      "epoch": 0.9626148243944667,
      "grad_norm": 3.046875,
      "learning_rate": 3.7729464084051804e-05,
      "loss": 0.8844,
      "step": 274660
    },
    {
      "epoch": 0.9626498719013623,
      "grad_norm": 3.09375,
      "learning_rate": 3.7728815055388106e-05,
      "loss": 0.8623,
      "step": 274670
    },
    {
      "epoch": 0.9626849194082578,
      "grad_norm": 2.875,
      "learning_rate": 3.772816602672441e-05,
      "loss": 0.9301,
      "step": 274680
    },
    {
      "epoch": 0.9627199669151535,
      "grad_norm": 2.765625,
      "learning_rate": 3.77275169980607e-05,
      "loss": 0.8799,
      "step": 274690
    },
    {
      "epoch": 0.9627550144220491,
      "grad_norm": 3.4375,
      "learning_rate": 3.7726867969397e-05,
      "loss": 0.9497,
      "step": 274700
    },
    {
      "epoch": 0.9627900619289447,
      "grad_norm": 3.296875,
      "learning_rate": 3.77262189407333e-05,
      "loss": 0.8316,
      "step": 274710
    },
    {
      "epoch": 0.9628251094358403,
      "grad_norm": 2.875,
      "learning_rate": 3.77255699120696e-05,
      "loss": 0.9155,
      "step": 274720
    },
    {
      "epoch": 0.9628601569427359,
      "grad_norm": 2.71875,
      "learning_rate": 3.7724920883405894e-05,
      "loss": 0.8815,
      "step": 274730
    },
    {
      "epoch": 0.9628952044496315,
      "grad_norm": 3.03125,
      "learning_rate": 3.7724271854742195e-05,
      "loss": 0.8325,
      "step": 274740
    },
    {
      "epoch": 0.962930251956527,
      "grad_norm": 3.015625,
      "learning_rate": 3.772362282607849e-05,
      "loss": 0.9401,
      "step": 274750
    },
    {
      "epoch": 0.9629652994634227,
      "grad_norm": 2.515625,
      "learning_rate": 3.772297379741479e-05,
      "loss": 0.8855,
      "step": 274760
    },
    {
      "epoch": 0.9630003469703182,
      "grad_norm": 2.34375,
      "learning_rate": 3.7722324768751086e-05,
      "loss": 0.9477,
      "step": 274770
    },
    {
      "epoch": 0.9630353944772139,
      "grad_norm": 3.03125,
      "learning_rate": 3.772167574008739e-05,
      "loss": 0.908,
      "step": 274780
    },
    {
      "epoch": 0.9630704419841095,
      "grad_norm": 2.625,
      "learning_rate": 3.772102671142368e-05,
      "loss": 0.7863,
      "step": 274790
    },
    {
      "epoch": 0.963105489491005,
      "grad_norm": 2.375,
      "learning_rate": 3.772037768275998e-05,
      "loss": 0.8498,
      "step": 274800
    },
    {
      "epoch": 0.9631405369979007,
      "grad_norm": 2.515625,
      "learning_rate": 3.7719728654096285e-05,
      "loss": 0.8763,
      "step": 274810
    },
    {
      "epoch": 0.9631755845047962,
      "grad_norm": 3.46875,
      "learning_rate": 3.771907962543258e-05,
      "loss": 0.9807,
      "step": 274820
    },
    {
      "epoch": 0.9632106320116919,
      "grad_norm": 3.203125,
      "learning_rate": 3.771843059676888e-05,
      "loss": 0.8485,
      "step": 274830
    },
    {
      "epoch": 0.9632456795185874,
      "grad_norm": 3.140625,
      "learning_rate": 3.7717781568105175e-05,
      "loss": 0.8978,
      "step": 274840
    },
    {
      "epoch": 0.9632807270254831,
      "grad_norm": 2.90625,
      "learning_rate": 3.771713253944148e-05,
      "loss": 0.9257,
      "step": 274850
    },
    {
      "epoch": 0.9633157745323786,
      "grad_norm": 3.046875,
      "learning_rate": 3.771648351077777e-05,
      "loss": 0.9896,
      "step": 274860
    },
    {
      "epoch": 0.9633508220392742,
      "grad_norm": 2.96875,
      "learning_rate": 3.771583448211407e-05,
      "loss": 0.8208,
      "step": 274870
    },
    {
      "epoch": 0.9633858695461698,
      "grad_norm": 3.125,
      "learning_rate": 3.771518545345037e-05,
      "loss": 0.8739,
      "step": 274880
    },
    {
      "epoch": 0.9634209170530654,
      "grad_norm": 2.4375,
      "learning_rate": 3.771453642478667e-05,
      "loss": 0.8204,
      "step": 274890
    },
    {
      "epoch": 0.9634559645599611,
      "grad_norm": 2.96875,
      "learning_rate": 3.771388739612296e-05,
      "loss": 0.8909,
      "step": 274900
    },
    {
      "epoch": 0.9634910120668566,
      "grad_norm": 2.671875,
      "learning_rate": 3.7713238367459265e-05,
      "loss": 1.0046,
      "step": 274910
    },
    {
      "epoch": 0.9635260595737523,
      "grad_norm": 2.875,
      "learning_rate": 3.771258933879556e-05,
      "loss": 0.8969,
      "step": 274920
    },
    {
      "epoch": 0.9635611070806478,
      "grad_norm": 3.015625,
      "learning_rate": 3.771194031013186e-05,
      "loss": 0.8952,
      "step": 274930
    },
    {
      "epoch": 0.9635961545875434,
      "grad_norm": 3.234375,
      "learning_rate": 3.7711291281468155e-05,
      "loss": 0.9181,
      "step": 274940
    },
    {
      "epoch": 0.963631202094439,
      "grad_norm": 2.671875,
      "learning_rate": 3.771064225280446e-05,
      "loss": 0.8273,
      "step": 274950
    },
    {
      "epoch": 0.9636662496013346,
      "grad_norm": 2.609375,
      "learning_rate": 3.770999322414076e-05,
      "loss": 0.8667,
      "step": 274960
    },
    {
      "epoch": 0.9637012971082302,
      "grad_norm": 3.03125,
      "learning_rate": 3.770934419547705e-05,
      "loss": 0.8643,
      "step": 274970
    },
    {
      "epoch": 0.9637363446151258,
      "grad_norm": 2.625,
      "learning_rate": 3.7708695166813354e-05,
      "loss": 0.8121,
      "step": 274980
    },
    {
      "epoch": 0.9637713921220215,
      "grad_norm": 2.90625,
      "learning_rate": 3.770804613814965e-05,
      "loss": 0.807,
      "step": 274990
    },
    {
      "epoch": 0.963806439628917,
      "grad_norm": 2.828125,
      "learning_rate": 3.770739710948594e-05,
      "loss": 0.927,
      "step": 275000
    },
    {
      "epoch": 0.963806439628917,
      "eval_loss": 0.8225326538085938,
      "eval_runtime": 557.417,
      "eval_samples_per_second": 682.498,
      "eval_steps_per_second": 56.875,
      "step": 275000
    },
    {
      "epoch": 0.9638414871358126,
      "grad_norm": 2.921875,
      "learning_rate": 3.770674808082224e-05,
      "loss": 0.8932,
      "step": 275010
    },
    {
      "epoch": 0.9638765346427082,
      "grad_norm": 2.765625,
      "learning_rate": 3.770609905215854e-05,
      "loss": 0.902,
      "step": 275020
    },
    {
      "epoch": 0.9639115821496038,
      "grad_norm": 3.171875,
      "learning_rate": 3.7705450023494834e-05,
      "loss": 0.8414,
      "step": 275030
    },
    {
      "epoch": 0.9639466296564994,
      "grad_norm": 3.03125,
      "learning_rate": 3.7704800994831135e-05,
      "loss": 0.8947,
      "step": 275040
    },
    {
      "epoch": 0.963981677163395,
      "grad_norm": 3.078125,
      "learning_rate": 3.770415196616744e-05,
      "loss": 0.8787,
      "step": 275050
    },
    {
      "epoch": 0.9640167246702905,
      "grad_norm": 3.15625,
      "learning_rate": 3.770350293750373e-05,
      "loss": 0.8622,
      "step": 275060
    },
    {
      "epoch": 0.9640517721771862,
      "grad_norm": 2.8125,
      "learning_rate": 3.770285390884003e-05,
      "loss": 0.8809,
      "step": 275070
    },
    {
      "epoch": 0.9640868196840817,
      "grad_norm": 3.265625,
      "learning_rate": 3.770220488017633e-05,
      "loss": 0.8374,
      "step": 275080
    },
    {
      "epoch": 0.9641218671909774,
      "grad_norm": 2.84375,
      "learning_rate": 3.770155585151263e-05,
      "loss": 0.8029,
      "step": 275090
    },
    {
      "epoch": 0.964156914697873,
      "grad_norm": 2.828125,
      "learning_rate": 3.770090682284892e-05,
      "loss": 0.9498,
      "step": 275100
    },
    {
      "epoch": 0.9641919622047685,
      "grad_norm": 2.671875,
      "learning_rate": 3.7700257794185225e-05,
      "loss": 0.863,
      "step": 275110
    },
    {
      "epoch": 0.9642270097116642,
      "grad_norm": 3.15625,
      "learning_rate": 3.769960876552152e-05,
      "loss": 0.8415,
      "step": 275120
    },
    {
      "epoch": 0.9642620572185597,
      "grad_norm": 2.640625,
      "learning_rate": 3.769895973685782e-05,
      "loss": 0.8779,
      "step": 275130
    },
    {
      "epoch": 0.9642971047254554,
      "grad_norm": 2.328125,
      "learning_rate": 3.7698310708194115e-05,
      "loss": 0.8598,
      "step": 275140
    },
    {
      "epoch": 0.9643321522323509,
      "grad_norm": 3.09375,
      "learning_rate": 3.769766167953042e-05,
      "loss": 0.8692,
      "step": 275150
    },
    {
      "epoch": 0.9643671997392466,
      "grad_norm": 3.015625,
      "learning_rate": 3.769701265086671e-05,
      "loss": 0.9586,
      "step": 275160
    },
    {
      "epoch": 0.9644022472461421,
      "grad_norm": 2.96875,
      "learning_rate": 3.769636362220301e-05,
      "loss": 0.9006,
      "step": 275170
    },
    {
      "epoch": 0.9644372947530377,
      "grad_norm": 2.671875,
      "learning_rate": 3.7695714593539314e-05,
      "loss": 0.7823,
      "step": 275180
    },
    {
      "epoch": 0.9644723422599334,
      "grad_norm": 2.65625,
      "learning_rate": 3.769506556487561e-05,
      "loss": 0.8143,
      "step": 275190
    },
    {
      "epoch": 0.9645073897668289,
      "grad_norm": 2.78125,
      "learning_rate": 3.769441653621191e-05,
      "loss": 0.8655,
      "step": 275200
    },
    {
      "epoch": 0.9645424372737246,
      "grad_norm": 2.515625,
      "learning_rate": 3.7693767507548205e-05,
      "loss": 0.8939,
      "step": 275210
    },
    {
      "epoch": 0.9645774847806201,
      "grad_norm": 2.859375,
      "learning_rate": 3.7693118478884506e-05,
      "loss": 0.7951,
      "step": 275220
    },
    {
      "epoch": 0.9646125322875158,
      "grad_norm": 3.140625,
      "learning_rate": 3.76924694502208e-05,
      "loss": 0.8917,
      "step": 275230
    },
    {
      "epoch": 0.9646475797944113,
      "grad_norm": 2.59375,
      "learning_rate": 3.76918204215571e-05,
      "loss": 0.9827,
      "step": 275240
    },
    {
      "epoch": 0.9646826273013069,
      "grad_norm": 3.046875,
      "learning_rate": 3.76911713928934e-05,
      "loss": 0.8665,
      "step": 275250
    },
    {
      "epoch": 0.9647176748082025,
      "grad_norm": 2.78125,
      "learning_rate": 3.76905223642297e-05,
      "loss": 0.9081,
      "step": 275260
    },
    {
      "epoch": 0.9647527223150981,
      "grad_norm": 3.25,
      "learning_rate": 3.768987333556599e-05,
      "loss": 0.8054,
      "step": 275270
    },
    {
      "epoch": 0.9647877698219937,
      "grad_norm": 2.984375,
      "learning_rate": 3.7689224306902294e-05,
      "loss": 0.8918,
      "step": 275280
    },
    {
      "epoch": 0.9648228173288893,
      "grad_norm": 3.03125,
      "learning_rate": 3.768857527823859e-05,
      "loss": 0.8935,
      "step": 275290
    },
    {
      "epoch": 0.964857864835785,
      "grad_norm": 2.796875,
      "learning_rate": 3.768792624957489e-05,
      "loss": 0.8308,
      "step": 275300
    },
    {
      "epoch": 0.9648929123426805,
      "grad_norm": 2.9375,
      "learning_rate": 3.7687277220911185e-05,
      "loss": 0.8356,
      "step": 275310
    },
    {
      "epoch": 0.9649279598495761,
      "grad_norm": 3.125,
      "learning_rate": 3.7686628192247486e-05,
      "loss": 0.9193,
      "step": 275320
    },
    {
      "epoch": 0.9649630073564717,
      "grad_norm": 2.734375,
      "learning_rate": 3.768597916358379e-05,
      "loss": 0.8184,
      "step": 275330
    },
    {
      "epoch": 0.9649980548633673,
      "grad_norm": 2.828125,
      "learning_rate": 3.768533013492008e-05,
      "loss": 0.8006,
      "step": 275340
    },
    {
      "epoch": 0.9650331023702629,
      "grad_norm": 3.34375,
      "learning_rate": 3.7684681106256383e-05,
      "loss": 0.9448,
      "step": 275350
    },
    {
      "epoch": 0.9650681498771585,
      "grad_norm": 3.171875,
      "learning_rate": 3.768403207759268e-05,
      "loss": 0.8449,
      "step": 275360
    },
    {
      "epoch": 0.965103197384054,
      "grad_norm": 2.875,
      "learning_rate": 3.768338304892898e-05,
      "loss": 0.9401,
      "step": 275370
    },
    {
      "epoch": 0.9651382448909497,
      "grad_norm": 2.828125,
      "learning_rate": 3.768273402026527e-05,
      "loss": 0.773,
      "step": 275380
    },
    {
      "epoch": 0.9651732923978453,
      "grad_norm": 2.859375,
      "learning_rate": 3.768208499160157e-05,
      "loss": 0.9225,
      "step": 275390
    },
    {
      "epoch": 0.9652083399047409,
      "grad_norm": 2.796875,
      "learning_rate": 3.768143596293786e-05,
      "loss": 0.8503,
      "step": 275400
    },
    {
      "epoch": 0.9652433874116365,
      "grad_norm": 2.75,
      "learning_rate": 3.7680786934274165e-05,
      "loss": 0.8874,
      "step": 275410
    },
    {
      "epoch": 0.965278434918532,
      "grad_norm": 2.78125,
      "learning_rate": 3.7680137905610466e-05,
      "loss": 0.9205,
      "step": 275420
    },
    {
      "epoch": 0.9653134824254277,
      "grad_norm": 2.75,
      "learning_rate": 3.767948887694676e-05,
      "loss": 0.8409,
      "step": 275430
    },
    {
      "epoch": 0.9653485299323232,
      "grad_norm": 2.8125,
      "learning_rate": 3.767883984828306e-05,
      "loss": 0.8121,
      "step": 275440
    },
    {
      "epoch": 0.9653835774392189,
      "grad_norm": 2.96875,
      "learning_rate": 3.767819081961936e-05,
      "loss": 0.8681,
      "step": 275450
    },
    {
      "epoch": 0.9654186249461144,
      "grad_norm": 3.03125,
      "learning_rate": 3.767754179095566e-05,
      "loss": 0.8634,
      "step": 275460
    },
    {
      "epoch": 0.9654536724530101,
      "grad_norm": 2.65625,
      "learning_rate": 3.767689276229195e-05,
      "loss": 0.8087,
      "step": 275470
    },
    {
      "epoch": 0.9654887199599057,
      "grad_norm": 2.6875,
      "learning_rate": 3.7676243733628254e-05,
      "loss": 1.0095,
      "step": 275480
    },
    {
      "epoch": 0.9655237674668012,
      "grad_norm": 2.90625,
      "learning_rate": 3.767559470496455e-05,
      "loss": 0.9011,
      "step": 275490
    },
    {
      "epoch": 0.9655588149736969,
      "grad_norm": 3.140625,
      "learning_rate": 3.767494567630085e-05,
      "loss": 0.886,
      "step": 275500
    },
    {
      "epoch": 0.9655938624805924,
      "grad_norm": 2.828125,
      "learning_rate": 3.7674296647637145e-05,
      "loss": 0.8981,
      "step": 275510
    },
    {
      "epoch": 0.9656289099874881,
      "grad_norm": 2.90625,
      "learning_rate": 3.7673647618973446e-05,
      "loss": 0.9343,
      "step": 275520
    },
    {
      "epoch": 0.9656639574943836,
      "grad_norm": 2.625,
      "learning_rate": 3.767299859030974e-05,
      "loss": 0.8812,
      "step": 275530
    },
    {
      "epoch": 0.9656990050012793,
      "grad_norm": 2.828125,
      "learning_rate": 3.767234956164604e-05,
      "loss": 0.914,
      "step": 275540
    },
    {
      "epoch": 0.9657340525081748,
      "grad_norm": 2.9375,
      "learning_rate": 3.7671700532982343e-05,
      "loss": 0.8744,
      "step": 275550
    },
    {
      "epoch": 0.9657691000150704,
      "grad_norm": 3.03125,
      "learning_rate": 3.767105150431864e-05,
      "loss": 0.9413,
      "step": 275560
    },
    {
      "epoch": 0.965804147521966,
      "grad_norm": 3.265625,
      "learning_rate": 3.767040247565494e-05,
      "loss": 0.9328,
      "step": 275570
    },
    {
      "epoch": 0.9658391950288616,
      "grad_norm": 3.171875,
      "learning_rate": 3.7669753446991234e-05,
      "loss": 0.9697,
      "step": 275580
    },
    {
      "epoch": 0.9658742425357573,
      "grad_norm": 2.421875,
      "learning_rate": 3.7669104418327535e-05,
      "loss": 0.9277,
      "step": 275590
    },
    {
      "epoch": 0.9659092900426528,
      "grad_norm": 3.15625,
      "learning_rate": 3.766845538966383e-05,
      "loss": 0.7977,
      "step": 275600
    },
    {
      "epoch": 0.9659443375495484,
      "grad_norm": 2.890625,
      "learning_rate": 3.766780636100013e-05,
      "loss": 0.8246,
      "step": 275610
    },
    {
      "epoch": 0.965979385056444,
      "grad_norm": 2.921875,
      "learning_rate": 3.7667157332336426e-05,
      "loss": 0.9489,
      "step": 275620
    },
    {
      "epoch": 0.9660144325633396,
      "grad_norm": 2.890625,
      "learning_rate": 3.766650830367273e-05,
      "loss": 0.9087,
      "step": 275630
    },
    {
      "epoch": 0.9660494800702352,
      "grad_norm": 2.953125,
      "learning_rate": 3.766585927500902e-05,
      "loss": 0.8735,
      "step": 275640
    },
    {
      "epoch": 0.9660845275771308,
      "grad_norm": 2.71875,
      "learning_rate": 3.7665210246345323e-05,
      "loss": 0.9275,
      "step": 275650
    },
    {
      "epoch": 0.9661195750840263,
      "grad_norm": 3.125,
      "learning_rate": 3.766456121768162e-05,
      "loss": 0.9035,
      "step": 275660
    },
    {
      "epoch": 0.966154622590922,
      "grad_norm": 3.125,
      "learning_rate": 3.766391218901792e-05,
      "loss": 0.9289,
      "step": 275670
    },
    {
      "epoch": 0.9661896700978176,
      "grad_norm": 2.796875,
      "learning_rate": 3.766326316035422e-05,
      "loss": 0.812,
      "step": 275680
    },
    {
      "epoch": 0.9662247176047132,
      "grad_norm": 2.6875,
      "learning_rate": 3.7662614131690515e-05,
      "loss": 0.805,
      "step": 275690
    },
    {
      "epoch": 0.9662597651116088,
      "grad_norm": 2.84375,
      "learning_rate": 3.766196510302682e-05,
      "loss": 0.8355,
      "step": 275700
    },
    {
      "epoch": 0.9662948126185044,
      "grad_norm": 2.65625,
      "learning_rate": 3.766131607436311e-05,
      "loss": 0.8526,
      "step": 275710
    },
    {
      "epoch": 0.9663298601254,
      "grad_norm": 2.96875,
      "learning_rate": 3.766066704569941e-05,
      "loss": 0.8921,
      "step": 275720
    },
    {
      "epoch": 0.9663649076322955,
      "grad_norm": 3.078125,
      "learning_rate": 3.766001801703571e-05,
      "loss": 0.9178,
      "step": 275730
    },
    {
      "epoch": 0.9663999551391912,
      "grad_norm": 2.859375,
      "learning_rate": 3.765936898837201e-05,
      "loss": 0.9088,
      "step": 275740
    },
    {
      "epoch": 0.9664350026460867,
      "grad_norm": 3.25,
      "learning_rate": 3.7658719959708303e-05,
      "loss": 0.9241,
      "step": 275750
    },
    {
      "epoch": 0.9664700501529824,
      "grad_norm": 3.21875,
      "learning_rate": 3.76580709310446e-05,
      "loss": 0.9055,
      "step": 275760
    },
    {
      "epoch": 0.9665050976598779,
      "grad_norm": 2.65625,
      "learning_rate": 3.76574219023809e-05,
      "loss": 0.8059,
      "step": 275770
    },
    {
      "epoch": 0.9665401451667736,
      "grad_norm": 2.5,
      "learning_rate": 3.7656772873717194e-05,
      "loss": 0.8359,
      "step": 275780
    },
    {
      "epoch": 0.9665751926736692,
      "grad_norm": 2.90625,
      "learning_rate": 3.7656123845053495e-05,
      "loss": 0.8832,
      "step": 275790
    },
    {
      "epoch": 0.9666102401805647,
      "grad_norm": 3.1875,
      "learning_rate": 3.765547481638979e-05,
      "loss": 0.8115,
      "step": 275800
    },
    {
      "epoch": 0.9666452876874604,
      "grad_norm": 3.171875,
      "learning_rate": 3.765482578772609e-05,
      "loss": 0.8845,
      "step": 275810
    },
    {
      "epoch": 0.9666803351943559,
      "grad_norm": 3.0625,
      "learning_rate": 3.7654176759062386e-05,
      "loss": 0.8415,
      "step": 275820
    },
    {
      "epoch": 0.9667153827012516,
      "grad_norm": 3.09375,
      "learning_rate": 3.765352773039869e-05,
      "loss": 0.8888,
      "step": 275830
    },
    {
      "epoch": 0.9667504302081471,
      "grad_norm": 3.046875,
      "learning_rate": 3.765287870173498e-05,
      "loss": 0.8467,
      "step": 275840
    },
    {
      "epoch": 0.9667854777150428,
      "grad_norm": 2.734375,
      "learning_rate": 3.7652229673071283e-05,
      "loss": 0.9258,
      "step": 275850
    },
    {
      "epoch": 0.9668205252219383,
      "grad_norm": 3.078125,
      "learning_rate": 3.765158064440758e-05,
      "loss": 0.8845,
      "step": 275860
    },
    {
      "epoch": 0.9668555727288339,
      "grad_norm": 2.703125,
      "learning_rate": 3.765093161574388e-05,
      "loss": 0.8632,
      "step": 275870
    },
    {
      "epoch": 0.9668906202357296,
      "grad_norm": 2.859375,
      "learning_rate": 3.7650282587080174e-05,
      "loss": 0.8806,
      "step": 275880
    },
    {
      "epoch": 0.9669256677426251,
      "grad_norm": 3.0625,
      "learning_rate": 3.7649633558416475e-05,
      "loss": 0.8197,
      "step": 275890
    },
    {
      "epoch": 0.9669607152495208,
      "grad_norm": 2.75,
      "learning_rate": 3.764898452975277e-05,
      "loss": 0.8343,
      "step": 275900
    },
    {
      "epoch": 0.9669957627564163,
      "grad_norm": 3.046875,
      "learning_rate": 3.764833550108907e-05,
      "loss": 0.851,
      "step": 275910
    },
    {
      "epoch": 0.967030810263312,
      "grad_norm": 2.8125,
      "learning_rate": 3.764768647242537e-05,
      "loss": 0.9494,
      "step": 275920
    },
    {
      "epoch": 0.9670658577702075,
      "grad_norm": 3.140625,
      "learning_rate": 3.764703744376167e-05,
      "loss": 0.9264,
      "step": 275930
    },
    {
      "epoch": 0.9671009052771031,
      "grad_norm": 3.21875,
      "learning_rate": 3.764638841509797e-05,
      "loss": 0.8966,
      "step": 275940
    },
    {
      "epoch": 0.9671359527839987,
      "grad_norm": 2.796875,
      "learning_rate": 3.7645739386434263e-05,
      "loss": 0.9188,
      "step": 275950
    },
    {
      "epoch": 0.9671710002908943,
      "grad_norm": 2.40625,
      "learning_rate": 3.7645090357770565e-05,
      "loss": 0.908,
      "step": 275960
    },
    {
      "epoch": 0.96720604779779,
      "grad_norm": 3.0,
      "learning_rate": 3.764444132910686e-05,
      "loss": 0.8674,
      "step": 275970
    },
    {
      "epoch": 0.9672410953046855,
      "grad_norm": 3.015625,
      "learning_rate": 3.764379230044316e-05,
      "loss": 0.9117,
      "step": 275980
    },
    {
      "epoch": 0.9672761428115811,
      "grad_norm": 3.53125,
      "learning_rate": 3.7643143271779455e-05,
      "loss": 0.966,
      "step": 275990
    },
    {
      "epoch": 0.9673111903184767,
      "grad_norm": 2.984375,
      "learning_rate": 3.764249424311576e-05,
      "loss": 0.9106,
      "step": 276000
    },
    {
      "epoch": 0.9673462378253723,
      "grad_norm": 2.625,
      "learning_rate": 3.764184521445205e-05,
      "loss": 0.8491,
      "step": 276010
    },
    {
      "epoch": 0.9673812853322679,
      "grad_norm": 3.078125,
      "learning_rate": 3.764119618578835e-05,
      "loss": 0.8173,
      "step": 276020
    },
    {
      "epoch": 0.9674163328391635,
      "grad_norm": 2.625,
      "learning_rate": 3.764054715712465e-05,
      "loss": 0.7987,
      "step": 276030
    },
    {
      "epoch": 0.967451380346059,
      "grad_norm": 3.171875,
      "learning_rate": 3.763989812846095e-05,
      "loss": 0.8839,
      "step": 276040
    },
    {
      "epoch": 0.9674864278529547,
      "grad_norm": 3.203125,
      "learning_rate": 3.763924909979725e-05,
      "loss": 0.8484,
      "step": 276050
    },
    {
      "epoch": 0.9675214753598502,
      "grad_norm": 3.1875,
      "learning_rate": 3.7638600071133545e-05,
      "loss": 0.9256,
      "step": 276060
    },
    {
      "epoch": 0.9675565228667459,
      "grad_norm": 2.859375,
      "learning_rate": 3.7637951042469846e-05,
      "loss": 0.8925,
      "step": 276070
    },
    {
      "epoch": 0.9675915703736415,
      "grad_norm": 3.390625,
      "learning_rate": 3.763730201380614e-05,
      "loss": 0.8912,
      "step": 276080
    },
    {
      "epoch": 0.967626617880537,
      "grad_norm": 2.96875,
      "learning_rate": 3.763665298514244e-05,
      "loss": 0.9264,
      "step": 276090
    },
    {
      "epoch": 0.9676616653874327,
      "grad_norm": 3.109375,
      "learning_rate": 3.763600395647874e-05,
      "loss": 0.8205,
      "step": 276100
    },
    {
      "epoch": 0.9676967128943282,
      "grad_norm": 4.5,
      "learning_rate": 3.763535492781504e-05,
      "loss": 0.8798,
      "step": 276110
    },
    {
      "epoch": 0.9677317604012239,
      "grad_norm": 2.78125,
      "learning_rate": 3.763470589915133e-05,
      "loss": 0.8137,
      "step": 276120
    },
    {
      "epoch": 0.9677668079081194,
      "grad_norm": 3.1875,
      "learning_rate": 3.763405687048763e-05,
      "loss": 0.9436,
      "step": 276130
    },
    {
      "epoch": 0.9678018554150151,
      "grad_norm": 2.921875,
      "learning_rate": 3.763340784182393e-05,
      "loss": 0.8409,
      "step": 276140
    },
    {
      "epoch": 0.9678369029219106,
      "grad_norm": 3.03125,
      "learning_rate": 3.763275881316022e-05,
      "loss": 0.7912,
      "step": 276150
    },
    {
      "epoch": 0.9678719504288062,
      "grad_norm": 3.15625,
      "learning_rate": 3.7632109784496525e-05,
      "loss": 0.9361,
      "step": 276160
    },
    {
      "epoch": 0.9679069979357019,
      "grad_norm": 2.640625,
      "learning_rate": 3.763146075583282e-05,
      "loss": 0.8568,
      "step": 276170
    },
    {
      "epoch": 0.9679420454425974,
      "grad_norm": 3.0,
      "learning_rate": 3.763081172716912e-05,
      "loss": 0.9495,
      "step": 276180
    },
    {
      "epoch": 0.9679770929494931,
      "grad_norm": 2.84375,
      "learning_rate": 3.7630162698505415e-05,
      "loss": 0.8444,
      "step": 276190
    },
    {
      "epoch": 0.9680121404563886,
      "grad_norm": 2.9375,
      "learning_rate": 3.762951366984172e-05,
      "loss": 0.7699,
      "step": 276200
    },
    {
      "epoch": 0.9680471879632843,
      "grad_norm": 3.078125,
      "learning_rate": 3.762886464117801e-05,
      "loss": 0.9309,
      "step": 276210
    },
    {
      "epoch": 0.9680822354701798,
      "grad_norm": 2.5625,
      "learning_rate": 3.762821561251431e-05,
      "loss": 0.8818,
      "step": 276220
    },
    {
      "epoch": 0.9681172829770754,
      "grad_norm": 2.859375,
      "learning_rate": 3.762756658385061e-05,
      "loss": 0.8622,
      "step": 276230
    },
    {
      "epoch": 0.968152330483971,
      "grad_norm": 2.875,
      "learning_rate": 3.762691755518691e-05,
      "loss": 0.9671,
      "step": 276240
    },
    {
      "epoch": 0.9681873779908666,
      "grad_norm": 2.890625,
      "learning_rate": 3.76262685265232e-05,
      "loss": 0.7792,
      "step": 276250
    },
    {
      "epoch": 0.9682224254977622,
      "grad_norm": 4.03125,
      "learning_rate": 3.7625619497859505e-05,
      "loss": 0.8822,
      "step": 276260
    },
    {
      "epoch": 0.9682574730046578,
      "grad_norm": 3.03125,
      "learning_rate": 3.76249704691958e-05,
      "loss": 0.9469,
      "step": 276270
    },
    {
      "epoch": 0.9682925205115535,
      "grad_norm": 3.046875,
      "learning_rate": 3.76243214405321e-05,
      "loss": 0.9194,
      "step": 276280
    },
    {
      "epoch": 0.968327568018449,
      "grad_norm": 2.640625,
      "learning_rate": 3.76236724118684e-05,
      "loss": 0.8206,
      "step": 276290
    },
    {
      "epoch": 0.9683626155253446,
      "grad_norm": 3.21875,
      "learning_rate": 3.76230233832047e-05,
      "loss": 0.9225,
      "step": 276300
    },
    {
      "epoch": 0.9683976630322402,
      "grad_norm": 2.953125,
      "learning_rate": 3.7622374354541e-05,
      "loss": 0.855,
      "step": 276310
    },
    {
      "epoch": 0.9684327105391358,
      "grad_norm": 3.171875,
      "learning_rate": 3.762172532587729e-05,
      "loss": 0.9054,
      "step": 276320
    },
    {
      "epoch": 0.9684677580460314,
      "grad_norm": 2.75,
      "learning_rate": 3.7621076297213594e-05,
      "loss": 0.8338,
      "step": 276330
    },
    {
      "epoch": 0.968502805552927,
      "grad_norm": 2.578125,
      "learning_rate": 3.762042726854989e-05,
      "loss": 0.8622,
      "step": 276340
    },
    {
      "epoch": 0.9685378530598225,
      "grad_norm": 3.203125,
      "learning_rate": 3.761977823988619e-05,
      "loss": 0.8356,
      "step": 276350
    },
    {
      "epoch": 0.9685729005667182,
      "grad_norm": 2.84375,
      "learning_rate": 3.7619129211222485e-05,
      "loss": 0.9756,
      "step": 276360
    },
    {
      "epoch": 0.9686079480736138,
      "grad_norm": 3.8125,
      "learning_rate": 3.7618480182558786e-05,
      "loss": 0.8872,
      "step": 276370
    },
    {
      "epoch": 0.9686429955805094,
      "grad_norm": 3.0,
      "learning_rate": 3.761783115389508e-05,
      "loss": 0.8849,
      "step": 276380
    },
    {
      "epoch": 0.968678043087405,
      "grad_norm": 2.84375,
      "learning_rate": 3.761718212523138e-05,
      "loss": 0.9285,
      "step": 276390
    },
    {
      "epoch": 0.9687130905943006,
      "grad_norm": 2.78125,
      "learning_rate": 3.761653309656768e-05,
      "loss": 0.8753,
      "step": 276400
    },
    {
      "epoch": 0.9687481381011962,
      "grad_norm": 2.875,
      "learning_rate": 3.761588406790398e-05,
      "loss": 0.8741,
      "step": 276410
    },
    {
      "epoch": 0.9687831856080917,
      "grad_norm": 2.890625,
      "learning_rate": 3.761523503924028e-05,
      "loss": 0.9093,
      "step": 276420
    },
    {
      "epoch": 0.9688182331149874,
      "grad_norm": 2.84375,
      "learning_rate": 3.7614586010576574e-05,
      "loss": 0.8384,
      "step": 276430
    },
    {
      "epoch": 0.9688532806218829,
      "grad_norm": 2.6875,
      "learning_rate": 3.7613936981912876e-05,
      "loss": 0.845,
      "step": 276440
    },
    {
      "epoch": 0.9688883281287786,
      "grad_norm": 3.296875,
      "learning_rate": 3.761328795324917e-05,
      "loss": 0.8585,
      "step": 276450
    },
    {
      "epoch": 0.9689233756356742,
      "grad_norm": 3.1875,
      "learning_rate": 3.761263892458547e-05,
      "loss": 0.8526,
      "step": 276460
    },
    {
      "epoch": 0.9689584231425697,
      "grad_norm": 2.9375,
      "learning_rate": 3.7611989895921766e-05,
      "loss": 0.8743,
      "step": 276470
    },
    {
      "epoch": 0.9689934706494654,
      "grad_norm": 2.5,
      "learning_rate": 3.761134086725807e-05,
      "loss": 0.8216,
      "step": 276480
    },
    {
      "epoch": 0.9690285181563609,
      "grad_norm": 3.125,
      "learning_rate": 3.761069183859436e-05,
      "loss": 0.8935,
      "step": 276490
    },
    {
      "epoch": 0.9690635656632566,
      "grad_norm": 2.859375,
      "learning_rate": 3.7610042809930664e-05,
      "loss": 0.8341,
      "step": 276500
    },
    {
      "epoch": 0.9690986131701521,
      "grad_norm": 2.765625,
      "learning_rate": 3.760939378126696e-05,
      "loss": 0.9118,
      "step": 276510
    },
    {
      "epoch": 0.9691336606770478,
      "grad_norm": 3.421875,
      "learning_rate": 3.760874475260325e-05,
      "loss": 0.872,
      "step": 276520
    },
    {
      "epoch": 0.9691687081839433,
      "grad_norm": 2.65625,
      "learning_rate": 3.7608095723939554e-05,
      "loss": 0.7801,
      "step": 276530
    },
    {
      "epoch": 0.9692037556908389,
      "grad_norm": 3.125,
      "learning_rate": 3.760744669527585e-05,
      "loss": 0.8134,
      "step": 276540
    },
    {
      "epoch": 0.9692388031977345,
      "grad_norm": 3.15625,
      "learning_rate": 3.760679766661215e-05,
      "loss": 0.7467,
      "step": 276550
    },
    {
      "epoch": 0.9692738507046301,
      "grad_norm": 3.21875,
      "learning_rate": 3.7606148637948445e-05,
      "loss": 0.9136,
      "step": 276560
    },
    {
      "epoch": 0.9693088982115258,
      "grad_norm": 2.90625,
      "learning_rate": 3.7605499609284746e-05,
      "loss": 0.8661,
      "step": 276570
    },
    {
      "epoch": 0.9693439457184213,
      "grad_norm": 3.203125,
      "learning_rate": 3.760485058062104e-05,
      "loss": 0.8173,
      "step": 276580
    },
    {
      "epoch": 0.969378993225317,
      "grad_norm": 3.28125,
      "learning_rate": 3.760420155195734e-05,
      "loss": 0.9131,
      "step": 276590
    },
    {
      "epoch": 0.9694140407322125,
      "grad_norm": 2.921875,
      "learning_rate": 3.760355252329364e-05,
      "loss": 0.8447,
      "step": 276600
    },
    {
      "epoch": 0.9694490882391081,
      "grad_norm": 3.015625,
      "learning_rate": 3.760290349462994e-05,
      "loss": 0.8154,
      "step": 276610
    },
    {
      "epoch": 0.9694841357460037,
      "grad_norm": 2.78125,
      "learning_rate": 3.760225446596623e-05,
      "loss": 0.828,
      "step": 276620
    },
    {
      "epoch": 0.9695191832528993,
      "grad_norm": 2.765625,
      "learning_rate": 3.7601605437302534e-05,
      "loss": 0.8575,
      "step": 276630
    },
    {
      "epoch": 0.9695542307597949,
      "grad_norm": 2.390625,
      "learning_rate": 3.7600956408638836e-05,
      "loss": 0.8512,
      "step": 276640
    },
    {
      "epoch": 0.9695892782666905,
      "grad_norm": 3.21875,
      "learning_rate": 3.760030737997513e-05,
      "loss": 0.9679,
      "step": 276650
    },
    {
      "epoch": 0.9696243257735861,
      "grad_norm": 2.96875,
      "learning_rate": 3.759965835131143e-05,
      "loss": 0.9101,
      "step": 276660
    },
    {
      "epoch": 0.9696593732804817,
      "grad_norm": 2.953125,
      "learning_rate": 3.7599009322647726e-05,
      "loss": 0.9411,
      "step": 276670
    },
    {
      "epoch": 0.9696944207873773,
      "grad_norm": 3.5,
      "learning_rate": 3.759836029398403e-05,
      "loss": 0.9056,
      "step": 276680
    },
    {
      "epoch": 0.9697294682942729,
      "grad_norm": 3.359375,
      "learning_rate": 3.759771126532032e-05,
      "loss": 0.921,
      "step": 276690
    },
    {
      "epoch": 0.9697645158011685,
      "grad_norm": 2.8125,
      "learning_rate": 3.7597062236656624e-05,
      "loss": 0.8105,
      "step": 276700
    },
    {
      "epoch": 0.969799563308064,
      "grad_norm": 2.71875,
      "learning_rate": 3.759641320799292e-05,
      "loss": 0.8342,
      "step": 276710
    },
    {
      "epoch": 0.9698346108149597,
      "grad_norm": 2.8125,
      "learning_rate": 3.759576417932922e-05,
      "loss": 0.8986,
      "step": 276720
    },
    {
      "epoch": 0.9698696583218552,
      "grad_norm": 2.703125,
      "learning_rate": 3.7595115150665514e-05,
      "loss": 0.8431,
      "step": 276730
    },
    {
      "epoch": 0.9699047058287509,
      "grad_norm": 2.734375,
      "learning_rate": 3.7594466122001816e-05,
      "loss": 0.9077,
      "step": 276740
    },
    {
      "epoch": 0.9699397533356464,
      "grad_norm": 3.25,
      "learning_rate": 3.759381709333811e-05,
      "loss": 0.9572,
      "step": 276750
    },
    {
      "epoch": 0.9699748008425421,
      "grad_norm": 3.109375,
      "learning_rate": 3.759316806467441e-05,
      "loss": 0.8292,
      "step": 276760
    },
    {
      "epoch": 0.9700098483494377,
      "grad_norm": 3.0,
      "learning_rate": 3.7592519036010706e-05,
      "loss": 0.7982,
      "step": 276770
    },
    {
      "epoch": 0.9700448958563332,
      "grad_norm": 3.015625,
      "learning_rate": 3.759187000734701e-05,
      "loss": 0.8835,
      "step": 276780
    },
    {
      "epoch": 0.9700799433632289,
      "grad_norm": 3.125,
      "learning_rate": 3.759122097868331e-05,
      "loss": 0.8944,
      "step": 276790
    },
    {
      "epoch": 0.9701149908701244,
      "grad_norm": 3.15625,
      "learning_rate": 3.7590571950019604e-05,
      "loss": 0.8038,
      "step": 276800
    },
    {
      "epoch": 0.9701500383770201,
      "grad_norm": 2.859375,
      "learning_rate": 3.7589922921355905e-05,
      "loss": 0.8891,
      "step": 276810
    },
    {
      "epoch": 0.9701850858839156,
      "grad_norm": 3.0,
      "learning_rate": 3.75892738926922e-05,
      "loss": 0.8222,
      "step": 276820
    },
    {
      "epoch": 0.9702201333908113,
      "grad_norm": 2.25,
      "learning_rate": 3.75886248640285e-05,
      "loss": 0.8691,
      "step": 276830
    },
    {
      "epoch": 0.9702551808977068,
      "grad_norm": 2.796875,
      "learning_rate": 3.7587975835364796e-05,
      "loss": 0.9416,
      "step": 276840
    },
    {
      "epoch": 0.9702902284046024,
      "grad_norm": 3.15625,
      "learning_rate": 3.75873268067011e-05,
      "loss": 0.9174,
      "step": 276850
    },
    {
      "epoch": 0.9703252759114981,
      "grad_norm": 2.90625,
      "learning_rate": 3.758667777803739e-05,
      "loss": 0.9668,
      "step": 276860
    },
    {
      "epoch": 0.9703603234183936,
      "grad_norm": 2.890625,
      "learning_rate": 3.758602874937369e-05,
      "loss": 0.8277,
      "step": 276870
    },
    {
      "epoch": 0.9703953709252893,
      "grad_norm": 2.859375,
      "learning_rate": 3.758537972070999e-05,
      "loss": 0.9116,
      "step": 276880
    },
    {
      "epoch": 0.9704304184321848,
      "grad_norm": 2.765625,
      "learning_rate": 3.758473069204628e-05,
      "loss": 0.8579,
      "step": 276890
    },
    {
      "epoch": 0.9704654659390805,
      "grad_norm": 2.890625,
      "learning_rate": 3.7584081663382584e-05,
      "loss": 0.8675,
      "step": 276900
    },
    {
      "epoch": 0.970500513445976,
      "grad_norm": 3.484375,
      "learning_rate": 3.758343263471888e-05,
      "loss": 0.8681,
      "step": 276910
    },
    {
      "epoch": 0.9705355609528716,
      "grad_norm": 3.109375,
      "learning_rate": 3.758278360605518e-05,
      "loss": 0.9334,
      "step": 276920
    },
    {
      "epoch": 0.9705706084597672,
      "grad_norm": 3.109375,
      "learning_rate": 3.7582134577391474e-05,
      "loss": 0.9526,
      "step": 276930
    },
    {
      "epoch": 0.9706056559666628,
      "grad_norm": 2.625,
      "learning_rate": 3.7581485548727776e-05,
      "loss": 0.8292,
      "step": 276940
    },
    {
      "epoch": 0.9706407034735585,
      "grad_norm": 3.0,
      "learning_rate": 3.758083652006407e-05,
      "loss": 0.9017,
      "step": 276950
    },
    {
      "epoch": 0.970675750980454,
      "grad_norm": 2.90625,
      "learning_rate": 3.758018749140037e-05,
      "loss": 0.9209,
      "step": 276960
    },
    {
      "epoch": 0.9707107984873496,
      "grad_norm": 2.96875,
      "learning_rate": 3.7579538462736666e-05,
      "loss": 0.8237,
      "step": 276970
    },
    {
      "epoch": 0.9707458459942452,
      "grad_norm": 2.953125,
      "learning_rate": 3.757888943407297e-05,
      "loss": 0.9072,
      "step": 276980
    },
    {
      "epoch": 0.9707808935011408,
      "grad_norm": 2.265625,
      "learning_rate": 3.757824040540926e-05,
      "loss": 0.8987,
      "step": 276990
    },
    {
      "epoch": 0.9708159410080364,
      "grad_norm": 2.96875,
      "learning_rate": 3.7577591376745564e-05,
      "loss": 0.9248,
      "step": 277000
    },
    {
      "epoch": 0.970850988514932,
      "grad_norm": 2.625,
      "learning_rate": 3.7576942348081865e-05,
      "loss": 0.8975,
      "step": 277010
    },
    {
      "epoch": 0.9708860360218275,
      "grad_norm": 3.0625,
      "learning_rate": 3.757629331941816e-05,
      "loss": 0.8398,
      "step": 277020
    },
    {
      "epoch": 0.9709210835287232,
      "grad_norm": 2.515625,
      "learning_rate": 3.757564429075446e-05,
      "loss": 0.8163,
      "step": 277030
    },
    {
      "epoch": 0.9709561310356187,
      "grad_norm": 2.640625,
      "learning_rate": 3.7574995262090756e-05,
      "loss": 0.8829,
      "step": 277040
    },
    {
      "epoch": 0.9709911785425144,
      "grad_norm": 2.890625,
      "learning_rate": 3.757434623342706e-05,
      "loss": 0.792,
      "step": 277050
    },
    {
      "epoch": 0.97102622604941,
      "grad_norm": 2.953125,
      "learning_rate": 3.757369720476335e-05,
      "loss": 0.8553,
      "step": 277060
    },
    {
      "epoch": 0.9710612735563056,
      "grad_norm": 3.046875,
      "learning_rate": 3.757304817609965e-05,
      "loss": 0.844,
      "step": 277070
    },
    {
      "epoch": 0.9710963210632012,
      "grad_norm": 3.53125,
      "learning_rate": 3.757239914743595e-05,
      "loss": 0.9931,
      "step": 277080
    },
    {
      "epoch": 0.9711313685700967,
      "grad_norm": 2.890625,
      "learning_rate": 3.757175011877225e-05,
      "loss": 0.9338,
      "step": 277090
    },
    {
      "epoch": 0.9711664160769924,
      "grad_norm": 3.28125,
      "learning_rate": 3.7571101090108544e-05,
      "loss": 0.8533,
      "step": 277100
    },
    {
      "epoch": 0.9712014635838879,
      "grad_norm": 2.5,
      "learning_rate": 3.7570452061444845e-05,
      "loss": 0.8535,
      "step": 277110
    },
    {
      "epoch": 0.9712365110907836,
      "grad_norm": 3.21875,
      "learning_rate": 3.756980303278114e-05,
      "loss": 0.8716,
      "step": 277120
    },
    {
      "epoch": 0.9712715585976791,
      "grad_norm": 3.078125,
      "learning_rate": 3.756915400411744e-05,
      "loss": 0.8493,
      "step": 277130
    },
    {
      "epoch": 0.9713066061045748,
      "grad_norm": 3.09375,
      "learning_rate": 3.7568504975453736e-05,
      "loss": 0.9118,
      "step": 277140
    },
    {
      "epoch": 0.9713416536114704,
      "grad_norm": 2.765625,
      "learning_rate": 3.756785594679004e-05,
      "loss": 0.8787,
      "step": 277150
    },
    {
      "epoch": 0.9713767011183659,
      "grad_norm": 3.453125,
      "learning_rate": 3.756720691812634e-05,
      "loss": 0.9645,
      "step": 277160
    },
    {
      "epoch": 0.9714117486252616,
      "grad_norm": 3.3125,
      "learning_rate": 3.756655788946263e-05,
      "loss": 0.8635,
      "step": 277170
    },
    {
      "epoch": 0.9714467961321571,
      "grad_norm": 2.90625,
      "learning_rate": 3.7565908860798934e-05,
      "loss": 0.8671,
      "step": 277180
    },
    {
      "epoch": 0.9714818436390528,
      "grad_norm": 2.78125,
      "learning_rate": 3.756525983213523e-05,
      "loss": 0.8,
      "step": 277190
    },
    {
      "epoch": 0.9715168911459483,
      "grad_norm": 2.78125,
      "learning_rate": 3.756461080347153e-05,
      "loss": 0.8439,
      "step": 277200
    },
    {
      "epoch": 0.971551938652844,
      "grad_norm": 2.484375,
      "learning_rate": 3.7563961774807825e-05,
      "loss": 0.8892,
      "step": 277210
    },
    {
      "epoch": 0.9715869861597395,
      "grad_norm": 2.640625,
      "learning_rate": 3.7563312746144126e-05,
      "loss": 0.8655,
      "step": 277220
    },
    {
      "epoch": 0.9716220336666351,
      "grad_norm": 2.515625,
      "learning_rate": 3.756266371748042e-05,
      "loss": 0.853,
      "step": 277230
    },
    {
      "epoch": 0.9716570811735307,
      "grad_norm": 2.921875,
      "learning_rate": 3.756201468881672e-05,
      "loss": 0.8763,
      "step": 277240
    },
    {
      "epoch": 0.9716921286804263,
      "grad_norm": 2.84375,
      "learning_rate": 3.756136566015302e-05,
      "loss": 0.8232,
      "step": 277250
    },
    {
      "epoch": 0.971727176187322,
      "grad_norm": 3.25,
      "learning_rate": 3.756071663148931e-05,
      "loss": 0.8599,
      "step": 277260
    },
    {
      "epoch": 0.9717622236942175,
      "grad_norm": 2.671875,
      "learning_rate": 3.756006760282561e-05,
      "loss": 0.8307,
      "step": 277270
    },
    {
      "epoch": 0.9717972712011131,
      "grad_norm": 2.59375,
      "learning_rate": 3.755941857416191e-05,
      "loss": 0.8173,
      "step": 277280
    },
    {
      "epoch": 0.9718323187080087,
      "grad_norm": 2.984375,
      "learning_rate": 3.755876954549821e-05,
      "loss": 0.8676,
      "step": 277290
    },
    {
      "epoch": 0.9718673662149043,
      "grad_norm": 2.84375,
      "learning_rate": 3.7558120516834504e-05,
      "loss": 0.8459,
      "step": 277300
    },
    {
      "epoch": 0.9719024137217999,
      "grad_norm": 2.734375,
      "learning_rate": 3.7557471488170805e-05,
      "loss": 0.836,
      "step": 277310
    },
    {
      "epoch": 0.9719374612286955,
      "grad_norm": 3.015625,
      "learning_rate": 3.75568224595071e-05,
      "loss": 0.9928,
      "step": 277320
    },
    {
      "epoch": 0.971972508735591,
      "grad_norm": 2.890625,
      "learning_rate": 3.75561734308434e-05,
      "loss": 0.8256,
      "step": 277330
    },
    {
      "epoch": 0.9720075562424867,
      "grad_norm": 2.859375,
      "learning_rate": 3.7555524402179696e-05,
      "loss": 0.9557,
      "step": 277340
    },
    {
      "epoch": 0.9720426037493823,
      "grad_norm": 3.046875,
      "learning_rate": 3.7554875373516e-05,
      "loss": 0.8564,
      "step": 277350
    },
    {
      "epoch": 0.9720776512562779,
      "grad_norm": 2.921875,
      "learning_rate": 3.755422634485229e-05,
      "loss": 0.8607,
      "step": 277360
    },
    {
      "epoch": 0.9721126987631735,
      "grad_norm": 2.75,
      "learning_rate": 3.755357731618859e-05,
      "loss": 0.8313,
      "step": 277370
    },
    {
      "epoch": 0.9721477462700691,
      "grad_norm": 2.90625,
      "learning_rate": 3.7552928287524894e-05,
      "loss": 0.9148,
      "step": 277380
    },
    {
      "epoch": 0.9721827937769647,
      "grad_norm": 2.875,
      "learning_rate": 3.755227925886119e-05,
      "loss": 0.7524,
      "step": 277390
    },
    {
      "epoch": 0.9722178412838602,
      "grad_norm": 2.90625,
      "learning_rate": 3.755163023019749e-05,
      "loss": 0.8441,
      "step": 277400
    },
    {
      "epoch": 0.9722528887907559,
      "grad_norm": 2.859375,
      "learning_rate": 3.7550981201533785e-05,
      "loss": 0.8407,
      "step": 277410
    },
    {
      "epoch": 0.9722879362976514,
      "grad_norm": 2.890625,
      "learning_rate": 3.7550332172870086e-05,
      "loss": 0.9534,
      "step": 277420
    },
    {
      "epoch": 0.9723229838045471,
      "grad_norm": 3.09375,
      "learning_rate": 3.754968314420638e-05,
      "loss": 0.8232,
      "step": 277430
    },
    {
      "epoch": 0.9723580313114426,
      "grad_norm": 2.484375,
      "learning_rate": 3.754903411554268e-05,
      "loss": 0.933,
      "step": 277440
    },
    {
      "epoch": 0.9723930788183383,
      "grad_norm": 3.140625,
      "learning_rate": 3.754838508687898e-05,
      "loss": 0.9048,
      "step": 277450
    },
    {
      "epoch": 0.9724281263252339,
      "grad_norm": 2.40625,
      "learning_rate": 3.754773605821528e-05,
      "loss": 0.9168,
      "step": 277460
    },
    {
      "epoch": 0.9724631738321294,
      "grad_norm": 3.421875,
      "learning_rate": 3.754708702955157e-05,
      "loss": 0.8383,
      "step": 277470
    },
    {
      "epoch": 0.9724982213390251,
      "grad_norm": 2.921875,
      "learning_rate": 3.7546438000887874e-05,
      "loss": 0.9221,
      "step": 277480
    },
    {
      "epoch": 0.9725332688459206,
      "grad_norm": 2.8125,
      "learning_rate": 3.754578897222417e-05,
      "loss": 0.8108,
      "step": 277490
    },
    {
      "epoch": 0.9725683163528163,
      "grad_norm": 3.125,
      "learning_rate": 3.754513994356047e-05,
      "loss": 0.8721,
      "step": 277500
    },
    {
      "epoch": 0.9726033638597118,
      "grad_norm": 2.921875,
      "learning_rate": 3.7544490914896765e-05,
      "loss": 0.8472,
      "step": 277510
    },
    {
      "epoch": 0.9726384113666074,
      "grad_norm": 3.140625,
      "learning_rate": 3.7543841886233066e-05,
      "loss": 0.8816,
      "step": 277520
    },
    {
      "epoch": 0.972673458873503,
      "grad_norm": 2.734375,
      "learning_rate": 3.754319285756937e-05,
      "loss": 0.861,
      "step": 277530
    },
    {
      "epoch": 0.9727085063803986,
      "grad_norm": 3.0625,
      "learning_rate": 3.754254382890566e-05,
      "loss": 0.8993,
      "step": 277540
    },
    {
      "epoch": 0.9727435538872943,
      "grad_norm": 2.890625,
      "learning_rate": 3.7541894800241964e-05,
      "loss": 0.7959,
      "step": 277550
    },
    {
      "epoch": 0.9727786013941898,
      "grad_norm": 2.734375,
      "learning_rate": 3.754124577157826e-05,
      "loss": 0.866,
      "step": 277560
    },
    {
      "epoch": 0.9728136489010855,
      "grad_norm": 2.5625,
      "learning_rate": 3.754059674291456e-05,
      "loss": 0.8586,
      "step": 277570
    },
    {
      "epoch": 0.972848696407981,
      "grad_norm": 2.953125,
      "learning_rate": 3.7539947714250854e-05,
      "loss": 0.8116,
      "step": 277580
    },
    {
      "epoch": 0.9728837439148766,
      "grad_norm": 3.15625,
      "learning_rate": 3.7539298685587156e-05,
      "loss": 0.9108,
      "step": 277590
    },
    {
      "epoch": 0.9729187914217722,
      "grad_norm": 2.828125,
      "learning_rate": 3.753864965692345e-05,
      "loss": 0.9543,
      "step": 277600
    },
    {
      "epoch": 0.9729538389286678,
      "grad_norm": 3.234375,
      "learning_rate": 3.753800062825975e-05,
      "loss": 0.8075,
      "step": 277610
    },
    {
      "epoch": 0.9729888864355634,
      "grad_norm": 3.171875,
      "learning_rate": 3.7537351599596046e-05,
      "loss": 0.8865,
      "step": 277620
    },
    {
      "epoch": 0.973023933942459,
      "grad_norm": 2.96875,
      "learning_rate": 3.753670257093235e-05,
      "loss": 0.8642,
      "step": 277630
    },
    {
      "epoch": 0.9730589814493547,
      "grad_norm": 3.28125,
      "learning_rate": 3.753605354226864e-05,
      "loss": 0.9664,
      "step": 277640
    },
    {
      "epoch": 0.9730940289562502,
      "grad_norm": 2.84375,
      "learning_rate": 3.753540451360494e-05,
      "loss": 0.8652,
      "step": 277650
    },
    {
      "epoch": 0.9731290764631458,
      "grad_norm": 3.34375,
      "learning_rate": 3.753475548494124e-05,
      "loss": 0.9028,
      "step": 277660
    },
    {
      "epoch": 0.9731641239700414,
      "grad_norm": 2.984375,
      "learning_rate": 3.753410645627753e-05,
      "loss": 0.7964,
      "step": 277670
    },
    {
      "epoch": 0.973199171476937,
      "grad_norm": 2.984375,
      "learning_rate": 3.7533457427613834e-05,
      "loss": 0.8727,
      "step": 277680
    },
    {
      "epoch": 0.9732342189838326,
      "grad_norm": 2.859375,
      "learning_rate": 3.753280839895013e-05,
      "loss": 0.8265,
      "step": 277690
    },
    {
      "epoch": 0.9732692664907282,
      "grad_norm": 2.6875,
      "learning_rate": 3.753215937028643e-05,
      "loss": 0.8873,
      "step": 277700
    },
    {
      "epoch": 0.9733043139976237,
      "grad_norm": 2.609375,
      "learning_rate": 3.7531510341622725e-05,
      "loss": 0.8291,
      "step": 277710
    },
    {
      "epoch": 0.9733393615045194,
      "grad_norm": 2.734375,
      "learning_rate": 3.7530861312959026e-05,
      "loss": 0.8521,
      "step": 277720
    },
    {
      "epoch": 0.9733744090114149,
      "grad_norm": 3.109375,
      "learning_rate": 3.753021228429532e-05,
      "loss": 0.8714,
      "step": 277730
    },
    {
      "epoch": 0.9734094565183106,
      "grad_norm": 3.125,
      "learning_rate": 3.752956325563162e-05,
      "loss": 0.9018,
      "step": 277740
    },
    {
      "epoch": 0.9734445040252062,
      "grad_norm": 3.109375,
      "learning_rate": 3.7528914226967924e-05,
      "loss": 0.8329,
      "step": 277750
    },
    {
      "epoch": 0.9734795515321018,
      "grad_norm": 2.46875,
      "learning_rate": 3.752826519830422e-05,
      "loss": 0.8991,
      "step": 277760
    },
    {
      "epoch": 0.9735145990389974,
      "grad_norm": 2.890625,
      "learning_rate": 3.752761616964052e-05,
      "loss": 0.9102,
      "step": 277770
    },
    {
      "epoch": 0.9735496465458929,
      "grad_norm": 2.609375,
      "learning_rate": 3.7526967140976814e-05,
      "loss": 0.9252,
      "step": 277780
    },
    {
      "epoch": 0.9735846940527886,
      "grad_norm": 3.265625,
      "learning_rate": 3.7526318112313116e-05,
      "loss": 0.9448,
      "step": 277790
    },
    {
      "epoch": 0.9736197415596841,
      "grad_norm": 2.90625,
      "learning_rate": 3.752566908364941e-05,
      "loss": 0.8384,
      "step": 277800
    },
    {
      "epoch": 0.9736547890665798,
      "grad_norm": 2.921875,
      "learning_rate": 3.752502005498571e-05,
      "loss": 0.8325,
      "step": 277810
    },
    {
      "epoch": 0.9736898365734753,
      "grad_norm": 2.984375,
      "learning_rate": 3.7524371026322006e-05,
      "loss": 0.9103,
      "step": 277820
    },
    {
      "epoch": 0.973724884080371,
      "grad_norm": 2.953125,
      "learning_rate": 3.752372199765831e-05,
      "loss": 0.8668,
      "step": 277830
    },
    {
      "epoch": 0.9737599315872666,
      "grad_norm": 2.8125,
      "learning_rate": 3.75230729689946e-05,
      "loss": 0.8684,
      "step": 277840
    },
    {
      "epoch": 0.9737949790941621,
      "grad_norm": 2.890625,
      "learning_rate": 3.7522423940330904e-05,
      "loss": 0.8156,
      "step": 277850
    },
    {
      "epoch": 0.9738300266010578,
      "grad_norm": 2.46875,
      "learning_rate": 3.75217749116672e-05,
      "loss": 0.8515,
      "step": 277860
    },
    {
      "epoch": 0.9738650741079533,
      "grad_norm": 2.9375,
      "learning_rate": 3.75211258830035e-05,
      "loss": 0.858,
      "step": 277870
    },
    {
      "epoch": 0.973900121614849,
      "grad_norm": 2.859375,
      "learning_rate": 3.75204768543398e-05,
      "loss": 0.8918,
      "step": 277880
    },
    {
      "epoch": 0.9739351691217445,
      "grad_norm": 3.09375,
      "learning_rate": 3.7519827825676096e-05,
      "loss": 0.9083,
      "step": 277890
    },
    {
      "epoch": 0.9739702166286401,
      "grad_norm": 3.125,
      "learning_rate": 3.75191787970124e-05,
      "loss": 0.8311,
      "step": 277900
    },
    {
      "epoch": 0.9740052641355357,
      "grad_norm": 2.734375,
      "learning_rate": 3.751852976834869e-05,
      "loss": 0.826,
      "step": 277910
    },
    {
      "epoch": 0.9740403116424313,
      "grad_norm": 2.671875,
      "learning_rate": 3.751788073968499e-05,
      "loss": 0.8869,
      "step": 277920
    },
    {
      "epoch": 0.9740753591493269,
      "grad_norm": 2.796875,
      "learning_rate": 3.751723171102129e-05,
      "loss": 0.8968,
      "step": 277930
    },
    {
      "epoch": 0.9741104066562225,
      "grad_norm": 2.578125,
      "learning_rate": 3.751658268235759e-05,
      "loss": 0.8361,
      "step": 277940
    },
    {
      "epoch": 0.9741454541631182,
      "grad_norm": 2.390625,
      "learning_rate": 3.7515933653693884e-05,
      "loss": 0.8228,
      "step": 277950
    },
    {
      "epoch": 0.9741805016700137,
      "grad_norm": 3.359375,
      "learning_rate": 3.7515284625030185e-05,
      "loss": 0.9025,
      "step": 277960
    },
    {
      "epoch": 0.9742155491769093,
      "grad_norm": 2.6875,
      "learning_rate": 3.751463559636648e-05,
      "loss": 0.921,
      "step": 277970
    },
    {
      "epoch": 0.9742505966838049,
      "grad_norm": 3.09375,
      "learning_rate": 3.751398656770278e-05,
      "loss": 0.9016,
      "step": 277980
    },
    {
      "epoch": 0.9742856441907005,
      "grad_norm": 2.640625,
      "learning_rate": 3.7513337539039076e-05,
      "loss": 0.8337,
      "step": 277990
    },
    {
      "epoch": 0.974320691697596,
      "grad_norm": 2.59375,
      "learning_rate": 3.751268851037538e-05,
      "loss": 0.8809,
      "step": 278000
    },
    {
      "epoch": 0.9743557392044917,
      "grad_norm": 2.703125,
      "learning_rate": 3.751203948171167e-05,
      "loss": 0.967,
      "step": 278010
    },
    {
      "epoch": 0.9743907867113872,
      "grad_norm": 3.4375,
      "learning_rate": 3.7511390453047966e-05,
      "loss": 0.8332,
      "step": 278020
    },
    {
      "epoch": 0.9744258342182829,
      "grad_norm": 2.671875,
      "learning_rate": 3.751074142438427e-05,
      "loss": 0.9087,
      "step": 278030
    },
    {
      "epoch": 0.9744608817251785,
      "grad_norm": 2.984375,
      "learning_rate": 3.751009239572056e-05,
      "loss": 0.9205,
      "step": 278040
    },
    {
      "epoch": 0.9744959292320741,
      "grad_norm": 2.984375,
      "learning_rate": 3.7509443367056864e-05,
      "loss": 0.8549,
      "step": 278050
    },
    {
      "epoch": 0.9745309767389697,
      "grad_norm": 2.96875,
      "learning_rate": 3.750879433839316e-05,
      "loss": 0.8693,
      "step": 278060
    },
    {
      "epoch": 0.9745660242458652,
      "grad_norm": 3.0,
      "learning_rate": 3.750814530972946e-05,
      "loss": 0.8543,
      "step": 278070
    },
    {
      "epoch": 0.9746010717527609,
      "grad_norm": 2.984375,
      "learning_rate": 3.7507496281065754e-05,
      "loss": 0.8929,
      "step": 278080
    },
    {
      "epoch": 0.9746361192596564,
      "grad_norm": 2.265625,
      "learning_rate": 3.7506847252402056e-05,
      "loss": 0.7789,
      "step": 278090
    },
    {
      "epoch": 0.9746711667665521,
      "grad_norm": 3.234375,
      "learning_rate": 3.750619822373835e-05,
      "loss": 0.8069,
      "step": 278100
    },
    {
      "epoch": 0.9747062142734476,
      "grad_norm": 3.078125,
      "learning_rate": 3.750554919507465e-05,
      "loss": 0.843,
      "step": 278110
    },
    {
      "epoch": 0.9747412617803433,
      "grad_norm": 2.96875,
      "learning_rate": 3.750490016641095e-05,
      "loss": 0.9255,
      "step": 278120
    },
    {
      "epoch": 0.9747763092872389,
      "grad_norm": 3.03125,
      "learning_rate": 3.750425113774725e-05,
      "loss": 0.8904,
      "step": 278130
    },
    {
      "epoch": 0.9748113567941344,
      "grad_norm": 2.9375,
      "learning_rate": 3.750360210908355e-05,
      "loss": 0.8296,
      "step": 278140
    },
    {
      "epoch": 0.9748464043010301,
      "grad_norm": 2.875,
      "learning_rate": 3.7502953080419844e-05,
      "loss": 0.8149,
      "step": 278150
    },
    {
      "epoch": 0.9748814518079256,
      "grad_norm": 2.90625,
      "learning_rate": 3.7502304051756145e-05,
      "loss": 0.8422,
      "step": 278160
    },
    {
      "epoch": 0.9749164993148213,
      "grad_norm": 3.15625,
      "learning_rate": 3.750165502309244e-05,
      "loss": 0.9034,
      "step": 278170
    },
    {
      "epoch": 0.9749515468217168,
      "grad_norm": 2.8125,
      "learning_rate": 3.750100599442874e-05,
      "loss": 0.8387,
      "step": 278180
    },
    {
      "epoch": 0.9749865943286125,
      "grad_norm": 3.015625,
      "learning_rate": 3.7500356965765036e-05,
      "loss": 0.8349,
      "step": 278190
    },
    {
      "epoch": 0.975021641835508,
      "grad_norm": 2.96875,
      "learning_rate": 3.749970793710134e-05,
      "loss": 0.8598,
      "step": 278200
    },
    {
      "epoch": 0.9750566893424036,
      "grad_norm": 2.5,
      "learning_rate": 3.749905890843763e-05,
      "loss": 0.8419,
      "step": 278210
    },
    {
      "epoch": 0.9750917368492992,
      "grad_norm": 2.9375,
      "learning_rate": 3.749840987977393e-05,
      "loss": 0.8904,
      "step": 278220
    },
    {
      "epoch": 0.9751267843561948,
      "grad_norm": 3.375,
      "learning_rate": 3.749776085111023e-05,
      "loss": 0.8618,
      "step": 278230
    },
    {
      "epoch": 0.9751618318630905,
      "grad_norm": 2.4375,
      "learning_rate": 3.749711182244653e-05,
      "loss": 0.866,
      "step": 278240
    },
    {
      "epoch": 0.975196879369986,
      "grad_norm": 2.4375,
      "learning_rate": 3.749646279378283e-05,
      "loss": 0.8017,
      "step": 278250
    },
    {
      "epoch": 0.9752319268768816,
      "grad_norm": 2.640625,
      "learning_rate": 3.7495813765119125e-05,
      "loss": 0.8592,
      "step": 278260
    },
    {
      "epoch": 0.9752669743837772,
      "grad_norm": 3.703125,
      "learning_rate": 3.7495164736455426e-05,
      "loss": 0.8888,
      "step": 278270
    },
    {
      "epoch": 0.9753020218906728,
      "grad_norm": 3.140625,
      "learning_rate": 3.749451570779172e-05,
      "loss": 0.8839,
      "step": 278280
    },
    {
      "epoch": 0.9753370693975684,
      "grad_norm": 2.859375,
      "learning_rate": 3.749386667912802e-05,
      "loss": 0.8015,
      "step": 278290
    },
    {
      "epoch": 0.975372116904464,
      "grad_norm": 2.609375,
      "learning_rate": 3.749321765046432e-05,
      "loss": 0.8785,
      "step": 278300
    },
    {
      "epoch": 0.9754071644113596,
      "grad_norm": 2.953125,
      "learning_rate": 3.749256862180062e-05,
      "loss": 0.842,
      "step": 278310
    },
    {
      "epoch": 0.9754422119182552,
      "grad_norm": 2.6875,
      "learning_rate": 3.749191959313691e-05,
      "loss": 0.8327,
      "step": 278320
    },
    {
      "epoch": 0.9754772594251508,
      "grad_norm": 2.59375,
      "learning_rate": 3.7491270564473214e-05,
      "loss": 0.9986,
      "step": 278330
    },
    {
      "epoch": 0.9755123069320464,
      "grad_norm": 2.625,
      "learning_rate": 3.749062153580951e-05,
      "loss": 0.9184,
      "step": 278340
    },
    {
      "epoch": 0.975547354438942,
      "grad_norm": 2.75,
      "learning_rate": 3.748997250714581e-05,
      "loss": 0.9167,
      "step": 278350
    },
    {
      "epoch": 0.9755824019458376,
      "grad_norm": 3.078125,
      "learning_rate": 3.7489323478482105e-05,
      "loss": 0.9065,
      "step": 278360
    },
    {
      "epoch": 0.9756174494527332,
      "grad_norm": 3.0625,
      "learning_rate": 3.7488674449818406e-05,
      "loss": 0.8488,
      "step": 278370
    },
    {
      "epoch": 0.9756524969596287,
      "grad_norm": 3.171875,
      "learning_rate": 3.74880254211547e-05,
      "loss": 0.9649,
      "step": 278380
    },
    {
      "epoch": 0.9756875444665244,
      "grad_norm": 2.625,
      "learning_rate": 3.7487376392490996e-05,
      "loss": 0.8647,
      "step": 278390
    },
    {
      "epoch": 0.9757225919734199,
      "grad_norm": 3.078125,
      "learning_rate": 3.74867273638273e-05,
      "loss": 0.9,
      "step": 278400
    },
    {
      "epoch": 0.9757576394803156,
      "grad_norm": 2.609375,
      "learning_rate": 3.748607833516359e-05,
      "loss": 0.7458,
      "step": 278410
    },
    {
      "epoch": 0.9757926869872111,
      "grad_norm": 4.53125,
      "learning_rate": 3.748542930649989e-05,
      "loss": 0.8865,
      "step": 278420
    },
    {
      "epoch": 0.9758277344941068,
      "grad_norm": 2.875,
      "learning_rate": 3.748478027783619e-05,
      "loss": 0.9018,
      "step": 278430
    },
    {
      "epoch": 0.9758627820010024,
      "grad_norm": 3.5625,
      "learning_rate": 3.748413124917249e-05,
      "loss": 0.8327,
      "step": 278440
    },
    {
      "epoch": 0.9758978295078979,
      "grad_norm": 3.25,
      "learning_rate": 3.7483482220508784e-05,
      "loss": 0.8624,
      "step": 278450
    },
    {
      "epoch": 0.9759328770147936,
      "grad_norm": 3.171875,
      "learning_rate": 3.7482833191845085e-05,
      "loss": 0.9142,
      "step": 278460
    },
    {
      "epoch": 0.9759679245216891,
      "grad_norm": 2.6875,
      "learning_rate": 3.748218416318138e-05,
      "loss": 0.8138,
      "step": 278470
    },
    {
      "epoch": 0.9760029720285848,
      "grad_norm": 2.96875,
      "learning_rate": 3.748153513451768e-05,
      "loss": 0.8446,
      "step": 278480
    },
    {
      "epoch": 0.9760380195354803,
      "grad_norm": 3.15625,
      "learning_rate": 3.748088610585398e-05,
      "loss": 0.8806,
      "step": 278490
    },
    {
      "epoch": 0.976073067042376,
      "grad_norm": 2.640625,
      "learning_rate": 3.748023707719028e-05,
      "loss": 0.839,
      "step": 278500
    },
    {
      "epoch": 0.9761081145492715,
      "grad_norm": 3.203125,
      "learning_rate": 3.747958804852658e-05,
      "loss": 0.9738,
      "step": 278510
    },
    {
      "epoch": 0.9761431620561671,
      "grad_norm": 3.296875,
      "learning_rate": 3.747893901986287e-05,
      "loss": 0.859,
      "step": 278520
    },
    {
      "epoch": 0.9761782095630628,
      "grad_norm": 3.046875,
      "learning_rate": 3.7478289991199174e-05,
      "loss": 0.8773,
      "step": 278530
    },
    {
      "epoch": 0.9762132570699583,
      "grad_norm": 2.796875,
      "learning_rate": 3.747764096253547e-05,
      "loss": 0.906,
      "step": 278540
    },
    {
      "epoch": 0.976248304576854,
      "grad_norm": 2.703125,
      "learning_rate": 3.747699193387177e-05,
      "loss": 0.8206,
      "step": 278550
    },
    {
      "epoch": 0.9762833520837495,
      "grad_norm": 2.703125,
      "learning_rate": 3.7476342905208065e-05,
      "loss": 0.9046,
      "step": 278560
    },
    {
      "epoch": 0.9763183995906451,
      "grad_norm": 2.65625,
      "learning_rate": 3.7475693876544366e-05,
      "loss": 0.8748,
      "step": 278570
    },
    {
      "epoch": 0.9763534470975407,
      "grad_norm": 3.1875,
      "learning_rate": 3.747504484788066e-05,
      "loss": 0.8688,
      "step": 278580
    },
    {
      "epoch": 0.9763884946044363,
      "grad_norm": 3.0,
      "learning_rate": 3.747439581921696e-05,
      "loss": 0.8445,
      "step": 278590
    },
    {
      "epoch": 0.9764235421113319,
      "grad_norm": 3.0,
      "learning_rate": 3.747374679055326e-05,
      "loss": 0.7671,
      "step": 278600
    },
    {
      "epoch": 0.9764585896182275,
      "grad_norm": 3.03125,
      "learning_rate": 3.747309776188956e-05,
      "loss": 0.8345,
      "step": 278610
    },
    {
      "epoch": 0.9764936371251232,
      "grad_norm": 2.921875,
      "learning_rate": 3.747244873322586e-05,
      "loss": 0.8913,
      "step": 278620
    },
    {
      "epoch": 0.9765286846320187,
      "grad_norm": 2.859375,
      "learning_rate": 3.7471799704562154e-05,
      "loss": 0.8239,
      "step": 278630
    },
    {
      "epoch": 0.9765637321389143,
      "grad_norm": 2.890625,
      "learning_rate": 3.7471150675898456e-05,
      "loss": 0.8859,
      "step": 278640
    },
    {
      "epoch": 0.9765987796458099,
      "grad_norm": 3.1875,
      "learning_rate": 3.747050164723475e-05,
      "loss": 0.8033,
      "step": 278650
    },
    {
      "epoch": 0.9766338271527055,
      "grad_norm": 3.3125,
      "learning_rate": 3.746985261857105e-05,
      "loss": 0.9239,
      "step": 278660
    },
    {
      "epoch": 0.9766688746596011,
      "grad_norm": 3.0625,
      "learning_rate": 3.7469203589907346e-05,
      "loss": 0.8892,
      "step": 278670
    },
    {
      "epoch": 0.9767039221664967,
      "grad_norm": 3.484375,
      "learning_rate": 3.746855456124365e-05,
      "loss": 0.969,
      "step": 278680
    },
    {
      "epoch": 0.9767389696733922,
      "grad_norm": 2.859375,
      "learning_rate": 3.746790553257994e-05,
      "loss": 0.9088,
      "step": 278690
    },
    {
      "epoch": 0.9767740171802879,
      "grad_norm": 2.78125,
      "learning_rate": 3.7467256503916244e-05,
      "loss": 0.9004,
      "step": 278700
    },
    {
      "epoch": 0.9768090646871834,
      "grad_norm": 3.1875,
      "learning_rate": 3.746660747525254e-05,
      "loss": 0.8854,
      "step": 278710
    },
    {
      "epoch": 0.9768441121940791,
      "grad_norm": 2.859375,
      "learning_rate": 3.746595844658884e-05,
      "loss": 0.832,
      "step": 278720
    },
    {
      "epoch": 0.9768791597009747,
      "grad_norm": 2.796875,
      "learning_rate": 3.7465309417925134e-05,
      "loss": 0.8955,
      "step": 278730
    },
    {
      "epoch": 0.9769142072078703,
      "grad_norm": 2.8125,
      "learning_rate": 3.7464660389261436e-05,
      "loss": 0.887,
      "step": 278740
    },
    {
      "epoch": 0.9769492547147659,
      "grad_norm": 2.984375,
      "learning_rate": 3.746401136059774e-05,
      "loss": 0.8635,
      "step": 278750
    },
    {
      "epoch": 0.9769843022216614,
      "grad_norm": 3.125,
      "learning_rate": 3.7463362331934025e-05,
      "loss": 0.8622,
      "step": 278760
    },
    {
      "epoch": 0.9770193497285571,
      "grad_norm": 3.1875,
      "learning_rate": 3.7462713303270326e-05,
      "loss": 0.8858,
      "step": 278770
    },
    {
      "epoch": 0.9770543972354526,
      "grad_norm": 2.5,
      "learning_rate": 3.746206427460662e-05,
      "loss": 0.859,
      "step": 278780
    },
    {
      "epoch": 0.9770894447423483,
      "grad_norm": 2.8125,
      "learning_rate": 3.746141524594292e-05,
      "loss": 0.8885,
      "step": 278790
    },
    {
      "epoch": 0.9771244922492438,
      "grad_norm": 3.171875,
      "learning_rate": 3.746076621727922e-05,
      "loss": 0.7917,
      "step": 278800
    },
    {
      "epoch": 0.9771595397561395,
      "grad_norm": 2.859375,
      "learning_rate": 3.746011718861552e-05,
      "loss": 0.8574,
      "step": 278810
    },
    {
      "epoch": 0.9771945872630351,
      "grad_norm": 2.765625,
      "learning_rate": 3.745946815995181e-05,
      "loss": 0.9202,
      "step": 278820
    },
    {
      "epoch": 0.9772296347699306,
      "grad_norm": 3.046875,
      "learning_rate": 3.7458819131288114e-05,
      "loss": 0.8989,
      "step": 278830
    },
    {
      "epoch": 0.9772646822768263,
      "grad_norm": 2.984375,
      "learning_rate": 3.7458170102624416e-05,
      "loss": 0.8666,
      "step": 278840
    },
    {
      "epoch": 0.9772997297837218,
      "grad_norm": 2.640625,
      "learning_rate": 3.745752107396071e-05,
      "loss": 0.8966,
      "step": 278850
    },
    {
      "epoch": 0.9773347772906175,
      "grad_norm": 3.25,
      "learning_rate": 3.745687204529701e-05,
      "loss": 0.9022,
      "step": 278860
    },
    {
      "epoch": 0.977369824797513,
      "grad_norm": 2.9375,
      "learning_rate": 3.7456223016633306e-05,
      "loss": 0.8473,
      "step": 278870
    },
    {
      "epoch": 0.9774048723044086,
      "grad_norm": 2.859375,
      "learning_rate": 3.745557398796961e-05,
      "loss": 0.8894,
      "step": 278880
    },
    {
      "epoch": 0.9774399198113042,
      "grad_norm": 2.84375,
      "learning_rate": 3.74549249593059e-05,
      "loss": 0.8055,
      "step": 278890
    },
    {
      "epoch": 0.9774749673181998,
      "grad_norm": 2.640625,
      "learning_rate": 3.7454275930642204e-05,
      "loss": 0.828,
      "step": 278900
    },
    {
      "epoch": 0.9775100148250954,
      "grad_norm": 2.890625,
      "learning_rate": 3.74536269019785e-05,
      "loss": 0.84,
      "step": 278910
    },
    {
      "epoch": 0.977545062331991,
      "grad_norm": 2.921875,
      "learning_rate": 3.74529778733148e-05,
      "loss": 0.8752,
      "step": 278920
    },
    {
      "epoch": 0.9775801098388867,
      "grad_norm": 2.6875,
      "learning_rate": 3.7452328844651094e-05,
      "loss": 0.8479,
      "step": 278930
    },
    {
      "epoch": 0.9776151573457822,
      "grad_norm": 3.046875,
      "learning_rate": 3.7451679815987396e-05,
      "loss": 0.8966,
      "step": 278940
    },
    {
      "epoch": 0.9776502048526778,
      "grad_norm": 2.78125,
      "learning_rate": 3.745103078732369e-05,
      "loss": 0.9775,
      "step": 278950
    },
    {
      "epoch": 0.9776852523595734,
      "grad_norm": 2.8125,
      "learning_rate": 3.745038175865999e-05,
      "loss": 0.8727,
      "step": 278960
    },
    {
      "epoch": 0.977720299866469,
      "grad_norm": 3.015625,
      "learning_rate": 3.7449732729996286e-05,
      "loss": 0.8592,
      "step": 278970
    },
    {
      "epoch": 0.9777553473733646,
      "grad_norm": 3.078125,
      "learning_rate": 3.744908370133259e-05,
      "loss": 0.9041,
      "step": 278980
    },
    {
      "epoch": 0.9777903948802602,
      "grad_norm": 2.296875,
      "learning_rate": 3.744843467266889e-05,
      "loss": 0.8022,
      "step": 278990
    },
    {
      "epoch": 0.9778254423871557,
      "grad_norm": 2.625,
      "learning_rate": 3.7447785644005184e-05,
      "loss": 0.8335,
      "step": 279000
    },
    {
      "epoch": 0.9778604898940514,
      "grad_norm": 2.6875,
      "learning_rate": 3.7447136615341485e-05,
      "loss": 0.9228,
      "step": 279010
    },
    {
      "epoch": 0.977895537400947,
      "grad_norm": 2.703125,
      "learning_rate": 3.744648758667778e-05,
      "loss": 0.8898,
      "step": 279020
    },
    {
      "epoch": 0.9779305849078426,
      "grad_norm": 3.09375,
      "learning_rate": 3.744583855801408e-05,
      "loss": 0.872,
      "step": 279030
    },
    {
      "epoch": 0.9779656324147382,
      "grad_norm": 3.03125,
      "learning_rate": 3.7445189529350376e-05,
      "loss": 0.8876,
      "step": 279040
    },
    {
      "epoch": 0.9780006799216338,
      "grad_norm": 2.796875,
      "learning_rate": 3.744454050068668e-05,
      "loss": 0.9101,
      "step": 279050
    },
    {
      "epoch": 0.9780357274285294,
      "grad_norm": 3.34375,
      "learning_rate": 3.744389147202297e-05,
      "loss": 0.9024,
      "step": 279060
    },
    {
      "epoch": 0.9780707749354249,
      "grad_norm": 3.03125,
      "learning_rate": 3.744324244335927e-05,
      "loss": 0.8342,
      "step": 279070
    },
    {
      "epoch": 0.9781058224423206,
      "grad_norm": 2.984375,
      "learning_rate": 3.744259341469557e-05,
      "loss": 0.8554,
      "step": 279080
    },
    {
      "epoch": 0.9781408699492161,
      "grad_norm": 3.234375,
      "learning_rate": 3.744194438603187e-05,
      "loss": 0.9278,
      "step": 279090
    },
    {
      "epoch": 0.9781759174561118,
      "grad_norm": 2.890625,
      "learning_rate": 3.7441295357368164e-05,
      "loss": 0.9677,
      "step": 279100
    },
    {
      "epoch": 0.9782109649630073,
      "grad_norm": 3.359375,
      "learning_rate": 3.7440646328704465e-05,
      "loss": 0.8727,
      "step": 279110
    },
    {
      "epoch": 0.978246012469903,
      "grad_norm": 3.65625,
      "learning_rate": 3.743999730004077e-05,
      "loss": 0.8957,
      "step": 279120
    },
    {
      "epoch": 0.9782810599767986,
      "grad_norm": 2.90625,
      "learning_rate": 3.743934827137706e-05,
      "loss": 0.8309,
      "step": 279130
    },
    {
      "epoch": 0.9783161074836941,
      "grad_norm": 2.8125,
      "learning_rate": 3.7438699242713356e-05,
      "loss": 0.8857,
      "step": 279140
    },
    {
      "epoch": 0.9783511549905898,
      "grad_norm": 2.46875,
      "learning_rate": 3.743805021404965e-05,
      "loss": 0.8199,
      "step": 279150
    },
    {
      "epoch": 0.9783862024974853,
      "grad_norm": 3.09375,
      "learning_rate": 3.743740118538595e-05,
      "loss": 0.8655,
      "step": 279160
    },
    {
      "epoch": 0.978421250004381,
      "grad_norm": 3.265625,
      "learning_rate": 3.7436752156722246e-05,
      "loss": 0.9007,
      "step": 279170
    },
    {
      "epoch": 0.9784562975112765,
      "grad_norm": 2.71875,
      "learning_rate": 3.743610312805855e-05,
      "loss": 0.8519,
      "step": 279180
    },
    {
      "epoch": 0.9784913450181721,
      "grad_norm": 3.046875,
      "learning_rate": 3.743545409939484e-05,
      "loss": 0.8953,
      "step": 279190
    },
    {
      "epoch": 0.9785263925250677,
      "grad_norm": 2.890625,
      "learning_rate": 3.7434805070731144e-05,
      "loss": 0.8059,
      "step": 279200
    },
    {
      "epoch": 0.9785614400319633,
      "grad_norm": 2.875,
      "learning_rate": 3.7434156042067445e-05,
      "loss": 0.8885,
      "step": 279210
    },
    {
      "epoch": 0.978596487538859,
      "grad_norm": 3.25,
      "learning_rate": 3.743350701340374e-05,
      "loss": 0.8899,
      "step": 279220
    },
    {
      "epoch": 0.9786315350457545,
      "grad_norm": 2.5625,
      "learning_rate": 3.743285798474004e-05,
      "loss": 0.8499,
      "step": 279230
    },
    {
      "epoch": 0.9786665825526502,
      "grad_norm": 2.734375,
      "learning_rate": 3.7432208956076336e-05,
      "loss": 0.8716,
      "step": 279240
    },
    {
      "epoch": 0.9787016300595457,
      "grad_norm": 2.921875,
      "learning_rate": 3.743155992741264e-05,
      "loss": 0.8746,
      "step": 279250
    },
    {
      "epoch": 0.9787366775664413,
      "grad_norm": 2.90625,
      "learning_rate": 3.743091089874893e-05,
      "loss": 0.8042,
      "step": 279260
    },
    {
      "epoch": 0.9787717250733369,
      "grad_norm": 3.046875,
      "learning_rate": 3.743026187008523e-05,
      "loss": 0.9138,
      "step": 279270
    },
    {
      "epoch": 0.9788067725802325,
      "grad_norm": 3.109375,
      "learning_rate": 3.742961284142153e-05,
      "loss": 0.7806,
      "step": 279280
    },
    {
      "epoch": 0.9788418200871281,
      "grad_norm": 2.53125,
      "learning_rate": 3.742896381275783e-05,
      "loss": 0.8918,
      "step": 279290
    },
    {
      "epoch": 0.9788768675940237,
      "grad_norm": 3.0,
      "learning_rate": 3.7428314784094124e-05,
      "loss": 0.9064,
      "step": 279300
    },
    {
      "epoch": 0.9789119151009193,
      "grad_norm": 3.140625,
      "learning_rate": 3.7427665755430425e-05,
      "loss": 0.9243,
      "step": 279310
    },
    {
      "epoch": 0.9789469626078149,
      "grad_norm": 2.6875,
      "learning_rate": 3.742701672676672e-05,
      "loss": 0.8996,
      "step": 279320
    },
    {
      "epoch": 0.9789820101147105,
      "grad_norm": 2.8125,
      "learning_rate": 3.742636769810302e-05,
      "loss": 0.8884,
      "step": 279330
    },
    {
      "epoch": 0.9790170576216061,
      "grad_norm": 3.28125,
      "learning_rate": 3.7425718669439316e-05,
      "loss": 0.8522,
      "step": 279340
    },
    {
      "epoch": 0.9790521051285017,
      "grad_norm": 2.765625,
      "learning_rate": 3.742506964077562e-05,
      "loss": 0.8813,
      "step": 279350
    },
    {
      "epoch": 0.9790871526353973,
      "grad_norm": 2.96875,
      "learning_rate": 3.742442061211192e-05,
      "loss": 0.8095,
      "step": 279360
    },
    {
      "epoch": 0.9791222001422929,
      "grad_norm": 3.359375,
      "learning_rate": 3.742377158344821e-05,
      "loss": 0.9452,
      "step": 279370
    },
    {
      "epoch": 0.9791572476491884,
      "grad_norm": 2.640625,
      "learning_rate": 3.7423122554784515e-05,
      "loss": 0.8437,
      "step": 279380
    },
    {
      "epoch": 0.9791922951560841,
      "grad_norm": 2.890625,
      "learning_rate": 3.742247352612081e-05,
      "loss": 0.8398,
      "step": 279390
    },
    {
      "epoch": 0.9792273426629796,
      "grad_norm": 3.109375,
      "learning_rate": 3.742182449745711e-05,
      "loss": 0.9114,
      "step": 279400
    },
    {
      "epoch": 0.9792623901698753,
      "grad_norm": 2.984375,
      "learning_rate": 3.7421175468793405e-05,
      "loss": 0.9071,
      "step": 279410
    },
    {
      "epoch": 0.9792974376767709,
      "grad_norm": 2.53125,
      "learning_rate": 3.742052644012971e-05,
      "loss": 0.8448,
      "step": 279420
    },
    {
      "epoch": 0.9793324851836664,
      "grad_norm": 3.0,
      "learning_rate": 3.7419877411466e-05,
      "loss": 0.8233,
      "step": 279430
    },
    {
      "epoch": 0.9793675326905621,
      "grad_norm": 2.984375,
      "learning_rate": 3.74192283828023e-05,
      "loss": 0.8855,
      "step": 279440
    },
    {
      "epoch": 0.9794025801974576,
      "grad_norm": 3.25,
      "learning_rate": 3.74185793541386e-05,
      "loss": 0.9001,
      "step": 279450
    },
    {
      "epoch": 0.9794376277043533,
      "grad_norm": 3.046875,
      "learning_rate": 3.74179303254749e-05,
      "loss": 0.9089,
      "step": 279460
    },
    {
      "epoch": 0.9794726752112488,
      "grad_norm": 3.078125,
      "learning_rate": 3.741728129681119e-05,
      "loss": 0.9327,
      "step": 279470
    },
    {
      "epoch": 0.9795077227181445,
      "grad_norm": 3.265625,
      "learning_rate": 3.7416632268147495e-05,
      "loss": 0.9041,
      "step": 279480
    },
    {
      "epoch": 0.97954277022504,
      "grad_norm": 2.90625,
      "learning_rate": 3.7415983239483796e-05,
      "loss": 0.9208,
      "step": 279490
    },
    {
      "epoch": 0.9795778177319356,
      "grad_norm": 2.640625,
      "learning_rate": 3.741533421082009e-05,
      "loss": 0.8858,
      "step": 279500
    },
    {
      "epoch": 0.9796128652388313,
      "grad_norm": 3.078125,
      "learning_rate": 3.741468518215639e-05,
      "loss": 0.8476,
      "step": 279510
    },
    {
      "epoch": 0.9796479127457268,
      "grad_norm": 3.125,
      "learning_rate": 3.741403615349268e-05,
      "loss": 0.8197,
      "step": 279520
    },
    {
      "epoch": 0.9796829602526225,
      "grad_norm": 2.875,
      "learning_rate": 3.741338712482898e-05,
      "loss": 0.9103,
      "step": 279530
    },
    {
      "epoch": 0.979718007759518,
      "grad_norm": 3.28125,
      "learning_rate": 3.7412738096165276e-05,
      "loss": 0.8518,
      "step": 279540
    },
    {
      "epoch": 0.9797530552664137,
      "grad_norm": 3.03125,
      "learning_rate": 3.741208906750158e-05,
      "loss": 0.9339,
      "step": 279550
    },
    {
      "epoch": 0.9797881027733092,
      "grad_norm": 2.90625,
      "learning_rate": 3.741144003883787e-05,
      "loss": 0.833,
      "step": 279560
    },
    {
      "epoch": 0.9798231502802048,
      "grad_norm": 2.953125,
      "learning_rate": 3.741079101017417e-05,
      "loss": 0.8058,
      "step": 279570
    },
    {
      "epoch": 0.9798581977871004,
      "grad_norm": 2.875,
      "learning_rate": 3.7410141981510475e-05,
      "loss": 0.909,
      "step": 279580
    },
    {
      "epoch": 0.979893245293996,
      "grad_norm": 2.421875,
      "learning_rate": 3.740949295284677e-05,
      "loss": 0.8884,
      "step": 279590
    },
    {
      "epoch": 0.9799282928008916,
      "grad_norm": 2.984375,
      "learning_rate": 3.740884392418307e-05,
      "loss": 0.9116,
      "step": 279600
    },
    {
      "epoch": 0.9799633403077872,
      "grad_norm": 3.359375,
      "learning_rate": 3.7408194895519365e-05,
      "loss": 0.9466,
      "step": 279610
    },
    {
      "epoch": 0.9799983878146828,
      "grad_norm": 2.984375,
      "learning_rate": 3.740754586685567e-05,
      "loss": 0.8892,
      "step": 279620
    },
    {
      "epoch": 0.9800334353215784,
      "grad_norm": 3.328125,
      "learning_rate": 3.740689683819196e-05,
      "loss": 0.9095,
      "step": 279630
    },
    {
      "epoch": 0.980068482828474,
      "grad_norm": 2.90625,
      "learning_rate": 3.740624780952826e-05,
      "loss": 0.8072,
      "step": 279640
    },
    {
      "epoch": 0.9801035303353696,
      "grad_norm": 3.515625,
      "learning_rate": 3.740559878086456e-05,
      "loss": 0.9168,
      "step": 279650
    },
    {
      "epoch": 0.9801385778422652,
      "grad_norm": 3.15625,
      "learning_rate": 3.740494975220086e-05,
      "loss": 0.8886,
      "step": 279660
    },
    {
      "epoch": 0.9801736253491607,
      "grad_norm": 2.96875,
      "learning_rate": 3.740430072353715e-05,
      "loss": 0.8871,
      "step": 279670
    },
    {
      "epoch": 0.9802086728560564,
      "grad_norm": 2.796875,
      "learning_rate": 3.7403651694873455e-05,
      "loss": 0.868,
      "step": 279680
    },
    {
      "epoch": 0.9802437203629519,
      "grad_norm": 2.875,
      "learning_rate": 3.740300266620975e-05,
      "loss": 0.8059,
      "step": 279690
    },
    {
      "epoch": 0.9802787678698476,
      "grad_norm": 3.046875,
      "learning_rate": 3.740235363754605e-05,
      "loss": 0.8852,
      "step": 279700
    },
    {
      "epoch": 0.9803138153767432,
      "grad_norm": 3.078125,
      "learning_rate": 3.7401704608882345e-05,
      "loss": 0.8859,
      "step": 279710
    },
    {
      "epoch": 0.9803488628836388,
      "grad_norm": 2.625,
      "learning_rate": 3.740105558021865e-05,
      "loss": 0.8183,
      "step": 279720
    },
    {
      "epoch": 0.9803839103905344,
      "grad_norm": 2.75,
      "learning_rate": 3.740040655155495e-05,
      "loss": 0.7917,
      "step": 279730
    },
    {
      "epoch": 0.9804189578974299,
      "grad_norm": 2.71875,
      "learning_rate": 3.739975752289124e-05,
      "loss": 0.88,
      "step": 279740
    },
    {
      "epoch": 0.9804540054043256,
      "grad_norm": 3.015625,
      "learning_rate": 3.7399108494227544e-05,
      "loss": 0.9252,
      "step": 279750
    },
    {
      "epoch": 0.9804890529112211,
      "grad_norm": 2.84375,
      "learning_rate": 3.739845946556384e-05,
      "loss": 0.8317,
      "step": 279760
    },
    {
      "epoch": 0.9805241004181168,
      "grad_norm": 3.34375,
      "learning_rate": 3.739781043690014e-05,
      "loss": 0.9217,
      "step": 279770
    },
    {
      "epoch": 0.9805591479250123,
      "grad_norm": 2.890625,
      "learning_rate": 3.7397161408236435e-05,
      "loss": 0.8115,
      "step": 279780
    },
    {
      "epoch": 0.980594195431908,
      "grad_norm": 3.015625,
      "learning_rate": 3.7396512379572736e-05,
      "loss": 0.8809,
      "step": 279790
    },
    {
      "epoch": 0.9806292429388036,
      "grad_norm": 2.53125,
      "learning_rate": 3.739586335090903e-05,
      "loss": 0.7785,
      "step": 279800
    },
    {
      "epoch": 0.9806642904456991,
      "grad_norm": 2.78125,
      "learning_rate": 3.739521432224533e-05,
      "loss": 0.8897,
      "step": 279810
    },
    {
      "epoch": 0.9806993379525948,
      "grad_norm": 3.234375,
      "learning_rate": 3.739456529358163e-05,
      "loss": 0.8489,
      "step": 279820
    },
    {
      "epoch": 0.9807343854594903,
      "grad_norm": 2.90625,
      "learning_rate": 3.739391626491793e-05,
      "loss": 0.8683,
      "step": 279830
    },
    {
      "epoch": 0.980769432966386,
      "grad_norm": 3.140625,
      "learning_rate": 3.739326723625422e-05,
      "loss": 0.9815,
      "step": 279840
    },
    {
      "epoch": 0.9808044804732815,
      "grad_norm": 3.28125,
      "learning_rate": 3.7392618207590524e-05,
      "loss": 0.9521,
      "step": 279850
    },
    {
      "epoch": 0.9808395279801772,
      "grad_norm": 3.328125,
      "learning_rate": 3.7391969178926825e-05,
      "loss": 0.8002,
      "step": 279860
    },
    {
      "epoch": 0.9808745754870727,
      "grad_norm": 3.1875,
      "learning_rate": 3.739132015026312e-05,
      "loss": 0.887,
      "step": 279870
    },
    {
      "epoch": 0.9809096229939683,
      "grad_norm": 2.9375,
      "learning_rate": 3.739067112159942e-05,
      "loss": 0.8736,
      "step": 279880
    },
    {
      "epoch": 0.9809446705008639,
      "grad_norm": 2.953125,
      "learning_rate": 3.739002209293571e-05,
      "loss": 0.8224,
      "step": 279890
    },
    {
      "epoch": 0.9809797180077595,
      "grad_norm": 3.28125,
      "learning_rate": 3.738937306427201e-05,
      "loss": 0.8943,
      "step": 279900
    },
    {
      "epoch": 0.9810147655146552,
      "grad_norm": 2.828125,
      "learning_rate": 3.7388724035608305e-05,
      "loss": 0.9028,
      "step": 279910
    },
    {
      "epoch": 0.9810498130215507,
      "grad_norm": 2.828125,
      "learning_rate": 3.738807500694461e-05,
      "loss": 0.9991,
      "step": 279920
    },
    {
      "epoch": 0.9810848605284463,
      "grad_norm": 3.078125,
      "learning_rate": 3.73874259782809e-05,
      "loss": 0.9211,
      "step": 279930
    },
    {
      "epoch": 0.9811199080353419,
      "grad_norm": 2.78125,
      "learning_rate": 3.73867769496172e-05,
      "loss": 0.8859,
      "step": 279940
    },
    {
      "epoch": 0.9811549555422375,
      "grad_norm": 3.046875,
      "learning_rate": 3.7386127920953504e-05,
      "loss": 0.871,
      "step": 279950
    },
    {
      "epoch": 0.9811900030491331,
      "grad_norm": 3.125,
      "learning_rate": 3.73854788922898e-05,
      "loss": 0.8865,
      "step": 279960
    },
    {
      "epoch": 0.9812250505560287,
      "grad_norm": 2.9375,
      "learning_rate": 3.73848298636261e-05,
      "loss": 0.8303,
      "step": 279970
    },
    {
      "epoch": 0.9812600980629242,
      "grad_norm": 2.75,
      "learning_rate": 3.7384180834962395e-05,
      "loss": 0.8717,
      "step": 279980
    },
    {
      "epoch": 0.9812951455698199,
      "grad_norm": 2.75,
      "learning_rate": 3.7383531806298696e-05,
      "loss": 0.8779,
      "step": 279990
    },
    {
      "epoch": 0.9813301930767155,
      "grad_norm": 2.984375,
      "learning_rate": 3.738288277763499e-05,
      "loss": 0.929,
      "step": 280000
    },
    {
      "epoch": 0.9813301930767155,
      "eval_loss": 0.8207725882530212,
      "eval_runtime": 564.3433,
      "eval_samples_per_second": 674.122,
      "eval_steps_per_second": 56.177,
      "step": 280000
    },
    {
      "epoch": 0.9813652405836111,
      "grad_norm": 3.234375,
      "learning_rate": 3.738223374897129e-05,
      "loss": 0.8369,
      "step": 280010
    },
    {
      "epoch": 0.9814002880905067,
      "grad_norm": 3.375,
      "learning_rate": 3.738158472030759e-05,
      "loss": 0.8828,
      "step": 280020
    },
    {
      "epoch": 0.9814353355974023,
      "grad_norm": 3.109375,
      "learning_rate": 3.738093569164389e-05,
      "loss": 0.8794,
      "step": 280030
    },
    {
      "epoch": 0.9814703831042979,
      "grad_norm": 2.78125,
      "learning_rate": 3.738028666298018e-05,
      "loss": 0.8478,
      "step": 280040
    },
    {
      "epoch": 0.9815054306111934,
      "grad_norm": 3.25,
      "learning_rate": 3.7379637634316484e-05,
      "loss": 0.8941,
      "step": 280050
    },
    {
      "epoch": 0.9815404781180891,
      "grad_norm": 2.6875,
      "learning_rate": 3.737898860565278e-05,
      "loss": 0.874,
      "step": 280060
    },
    {
      "epoch": 0.9815755256249846,
      "grad_norm": 2.59375,
      "learning_rate": 3.737833957698908e-05,
      "loss": 0.8193,
      "step": 280070
    },
    {
      "epoch": 0.9816105731318803,
      "grad_norm": 2.796875,
      "learning_rate": 3.737769054832538e-05,
      "loss": 0.8415,
      "step": 280080
    },
    {
      "epoch": 0.9816456206387758,
      "grad_norm": 2.640625,
      "learning_rate": 3.7377041519661676e-05,
      "loss": 0.8706,
      "step": 280090
    },
    {
      "epoch": 0.9816806681456715,
      "grad_norm": 3.46875,
      "learning_rate": 3.737639249099798e-05,
      "loss": 0.8957,
      "step": 280100
    },
    {
      "epoch": 0.9817157156525671,
      "grad_norm": 2.84375,
      "learning_rate": 3.737574346233427e-05,
      "loss": 0.8013,
      "step": 280110
    },
    {
      "epoch": 0.9817507631594626,
      "grad_norm": 2.796875,
      "learning_rate": 3.7375094433670573e-05,
      "loss": 0.8808,
      "step": 280120
    },
    {
      "epoch": 0.9817858106663583,
      "grad_norm": 2.734375,
      "learning_rate": 3.737444540500687e-05,
      "loss": 0.8428,
      "step": 280130
    },
    {
      "epoch": 0.9818208581732538,
      "grad_norm": 3.109375,
      "learning_rate": 3.737379637634317e-05,
      "loss": 0.8753,
      "step": 280140
    },
    {
      "epoch": 0.9818559056801495,
      "grad_norm": 2.796875,
      "learning_rate": 3.7373147347679464e-05,
      "loss": 0.9402,
      "step": 280150
    },
    {
      "epoch": 0.981890953187045,
      "grad_norm": 2.75,
      "learning_rate": 3.7372498319015765e-05,
      "loss": 0.8427,
      "step": 280160
    },
    {
      "epoch": 0.9819260006939406,
      "grad_norm": 2.578125,
      "learning_rate": 3.737184929035206e-05,
      "loss": 0.8807,
      "step": 280170
    },
    {
      "epoch": 0.9819610482008362,
      "grad_norm": 2.65625,
      "learning_rate": 3.737120026168836e-05,
      "loss": 0.9269,
      "step": 280180
    },
    {
      "epoch": 0.9819960957077318,
      "grad_norm": 3.390625,
      "learning_rate": 3.7370551233024656e-05,
      "loss": 1.0111,
      "step": 280190
    },
    {
      "epoch": 0.9820311432146275,
      "grad_norm": 2.859375,
      "learning_rate": 3.736990220436096e-05,
      "loss": 0.8668,
      "step": 280200
    },
    {
      "epoch": 0.982066190721523,
      "grad_norm": 3.296875,
      "learning_rate": 3.736925317569725e-05,
      "loss": 0.876,
      "step": 280210
    },
    {
      "epoch": 0.9821012382284187,
      "grad_norm": 2.84375,
      "learning_rate": 3.7368604147033553e-05,
      "loss": 0.9098,
      "step": 280220
    },
    {
      "epoch": 0.9821362857353142,
      "grad_norm": 2.921875,
      "learning_rate": 3.7367955118369855e-05,
      "loss": 0.916,
      "step": 280230
    },
    {
      "epoch": 0.9821713332422098,
      "grad_norm": 3.21875,
      "learning_rate": 3.736730608970615e-05,
      "loss": 0.8832,
      "step": 280240
    },
    {
      "epoch": 0.9822063807491054,
      "grad_norm": 3.328125,
      "learning_rate": 3.736665706104245e-05,
      "loss": 0.8932,
      "step": 280250
    },
    {
      "epoch": 0.982241428256001,
      "grad_norm": 3.21875,
      "learning_rate": 3.7366008032378745e-05,
      "loss": 0.9248,
      "step": 280260
    },
    {
      "epoch": 0.9822764757628966,
      "grad_norm": 3.109375,
      "learning_rate": 3.736535900371504e-05,
      "loss": 0.9258,
      "step": 280270
    },
    {
      "epoch": 0.9823115232697922,
      "grad_norm": 2.828125,
      "learning_rate": 3.7364709975051335e-05,
      "loss": 0.8261,
      "step": 280280
    },
    {
      "epoch": 0.9823465707766879,
      "grad_norm": 2.71875,
      "learning_rate": 3.7364060946387636e-05,
      "loss": 0.888,
      "step": 280290
    },
    {
      "epoch": 0.9823816182835834,
      "grad_norm": 2.9375,
      "learning_rate": 3.736341191772393e-05,
      "loss": 0.9123,
      "step": 280300
    },
    {
      "epoch": 0.982416665790479,
      "grad_norm": 2.765625,
      "learning_rate": 3.736276288906023e-05,
      "loss": 0.8778,
      "step": 280310
    },
    {
      "epoch": 0.9824517132973746,
      "grad_norm": 3.328125,
      "learning_rate": 3.7362113860396533e-05,
      "loss": 0.8437,
      "step": 280320
    },
    {
      "epoch": 0.9824867608042702,
      "grad_norm": 2.9375,
      "learning_rate": 3.736146483173283e-05,
      "loss": 0.8032,
      "step": 280330
    },
    {
      "epoch": 0.9825218083111658,
      "grad_norm": 2.984375,
      "learning_rate": 3.736081580306913e-05,
      "loss": 0.904,
      "step": 280340
    },
    {
      "epoch": 0.9825568558180614,
      "grad_norm": 3.0625,
      "learning_rate": 3.7360166774405424e-05,
      "loss": 0.8215,
      "step": 280350
    },
    {
      "epoch": 0.9825919033249569,
      "grad_norm": 2.609375,
      "learning_rate": 3.7359517745741725e-05,
      "loss": 0.8718,
      "step": 280360
    },
    {
      "epoch": 0.9826269508318526,
      "grad_norm": 2.671875,
      "learning_rate": 3.735886871707802e-05,
      "loss": 0.835,
      "step": 280370
    },
    {
      "epoch": 0.9826619983387481,
      "grad_norm": 2.53125,
      "learning_rate": 3.735821968841432e-05,
      "loss": 0.9443,
      "step": 280380
    },
    {
      "epoch": 0.9826970458456438,
      "grad_norm": 3.1875,
      "learning_rate": 3.7357570659750616e-05,
      "loss": 0.9141,
      "step": 280390
    },
    {
      "epoch": 0.9827320933525394,
      "grad_norm": 2.46875,
      "learning_rate": 3.735692163108692e-05,
      "loss": 0.8231,
      "step": 280400
    },
    {
      "epoch": 0.982767140859435,
      "grad_norm": 2.84375,
      "learning_rate": 3.735627260242321e-05,
      "loss": 0.9005,
      "step": 280410
    },
    {
      "epoch": 0.9828021883663306,
      "grad_norm": 2.90625,
      "learning_rate": 3.7355623573759513e-05,
      "loss": 0.8884,
      "step": 280420
    },
    {
      "epoch": 0.9828372358732261,
      "grad_norm": 2.59375,
      "learning_rate": 3.735497454509581e-05,
      "loss": 0.8594,
      "step": 280430
    },
    {
      "epoch": 0.9828722833801218,
      "grad_norm": 2.390625,
      "learning_rate": 3.735432551643211e-05,
      "loss": 0.8945,
      "step": 280440
    },
    {
      "epoch": 0.9829073308870173,
      "grad_norm": 3.15625,
      "learning_rate": 3.735367648776841e-05,
      "loss": 0.9359,
      "step": 280450
    },
    {
      "epoch": 0.982942378393913,
      "grad_norm": 3.1875,
      "learning_rate": 3.7353027459104705e-05,
      "loss": 0.9005,
      "step": 280460
    },
    {
      "epoch": 0.9829774259008085,
      "grad_norm": 2.953125,
      "learning_rate": 3.735237843044101e-05,
      "loss": 0.8379,
      "step": 280470
    },
    {
      "epoch": 0.9830124734077041,
      "grad_norm": 2.5625,
      "learning_rate": 3.73517294017773e-05,
      "loss": 0.8121,
      "step": 280480
    },
    {
      "epoch": 0.9830475209145998,
      "grad_norm": 2.78125,
      "learning_rate": 3.73510803731136e-05,
      "loss": 0.8739,
      "step": 280490
    },
    {
      "epoch": 0.9830825684214953,
      "grad_norm": 3.046875,
      "learning_rate": 3.73504313444499e-05,
      "loss": 0.834,
      "step": 280500
    },
    {
      "epoch": 0.983117615928391,
      "grad_norm": 3.0,
      "learning_rate": 3.73497823157862e-05,
      "loss": 0.8015,
      "step": 280510
    },
    {
      "epoch": 0.9831526634352865,
      "grad_norm": 2.875,
      "learning_rate": 3.734913328712249e-05,
      "loss": 0.968,
      "step": 280520
    },
    {
      "epoch": 0.9831877109421822,
      "grad_norm": 2.671875,
      "learning_rate": 3.7348484258458795e-05,
      "loss": 0.9082,
      "step": 280530
    },
    {
      "epoch": 0.9832227584490777,
      "grad_norm": 3.140625,
      "learning_rate": 3.734783522979509e-05,
      "loss": 0.875,
      "step": 280540
    },
    {
      "epoch": 0.9832578059559733,
      "grad_norm": 3.09375,
      "learning_rate": 3.734718620113139e-05,
      "loss": 0.9207,
      "step": 280550
    },
    {
      "epoch": 0.9832928534628689,
      "grad_norm": 2.96875,
      "learning_rate": 3.7346537172467685e-05,
      "loss": 0.8879,
      "step": 280560
    },
    {
      "epoch": 0.9833279009697645,
      "grad_norm": 3.03125,
      "learning_rate": 3.734588814380399e-05,
      "loss": 0.8237,
      "step": 280570
    },
    {
      "epoch": 0.9833629484766601,
      "grad_norm": 3.046875,
      "learning_rate": 3.734523911514028e-05,
      "loss": 0.9013,
      "step": 280580
    },
    {
      "epoch": 0.9833979959835557,
      "grad_norm": 3.0,
      "learning_rate": 3.734459008647658e-05,
      "loss": 0.8458,
      "step": 280590
    },
    {
      "epoch": 0.9834330434904514,
      "grad_norm": 2.890625,
      "learning_rate": 3.7343941057812884e-05,
      "loss": 0.8571,
      "step": 280600
    },
    {
      "epoch": 0.9834680909973469,
      "grad_norm": 2.984375,
      "learning_rate": 3.734329202914918e-05,
      "loss": 0.8997,
      "step": 280610
    },
    {
      "epoch": 0.9835031385042425,
      "grad_norm": 3.09375,
      "learning_rate": 3.734264300048548e-05,
      "loss": 0.9196,
      "step": 280620
    },
    {
      "epoch": 0.9835381860111381,
      "grad_norm": 2.640625,
      "learning_rate": 3.7341993971821775e-05,
      "loss": 0.8877,
      "step": 280630
    },
    {
      "epoch": 0.9835732335180337,
      "grad_norm": 2.625,
      "learning_rate": 3.7341344943158076e-05,
      "loss": 0.8591,
      "step": 280640
    },
    {
      "epoch": 0.9836082810249293,
      "grad_norm": 2.671875,
      "learning_rate": 3.7340695914494364e-05,
      "loss": 0.8139,
      "step": 280650
    },
    {
      "epoch": 0.9836433285318249,
      "grad_norm": 2.921875,
      "learning_rate": 3.7340046885830665e-05,
      "loss": 0.898,
      "step": 280660
    },
    {
      "epoch": 0.9836783760387204,
      "grad_norm": 2.703125,
      "learning_rate": 3.733939785716696e-05,
      "loss": 0.8727,
      "step": 280670
    },
    {
      "epoch": 0.9837134235456161,
      "grad_norm": 2.78125,
      "learning_rate": 3.733874882850326e-05,
      "loss": 0.8932,
      "step": 280680
    },
    {
      "epoch": 0.9837484710525117,
      "grad_norm": 2.921875,
      "learning_rate": 3.733809979983956e-05,
      "loss": 0.7753,
      "step": 280690
    },
    {
      "epoch": 0.9837835185594073,
      "grad_norm": 2.671875,
      "learning_rate": 3.733745077117586e-05,
      "loss": 0.8782,
      "step": 280700
    },
    {
      "epoch": 0.9838185660663029,
      "grad_norm": 2.984375,
      "learning_rate": 3.733680174251216e-05,
      "loss": 0.7845,
      "step": 280710
    },
    {
      "epoch": 0.9838536135731984,
      "grad_norm": 2.8125,
      "learning_rate": 3.733615271384845e-05,
      "loss": 0.8157,
      "step": 280720
    },
    {
      "epoch": 0.9838886610800941,
      "grad_norm": 3.25,
      "learning_rate": 3.7335503685184755e-05,
      "loss": 0.9242,
      "step": 280730
    },
    {
      "epoch": 0.9839237085869896,
      "grad_norm": 2.875,
      "learning_rate": 3.733485465652105e-05,
      "loss": 0.82,
      "step": 280740
    },
    {
      "epoch": 0.9839587560938853,
      "grad_norm": 2.578125,
      "learning_rate": 3.733420562785735e-05,
      "loss": 0.8799,
      "step": 280750
    },
    {
      "epoch": 0.9839938036007808,
      "grad_norm": 3.0,
      "learning_rate": 3.7333556599193645e-05,
      "loss": 0.8535,
      "step": 280760
    },
    {
      "epoch": 0.9840288511076765,
      "grad_norm": 2.4375,
      "learning_rate": 3.733290757052995e-05,
      "loss": 0.8277,
      "step": 280770
    },
    {
      "epoch": 0.9840638986145721,
      "grad_norm": 2.875,
      "learning_rate": 3.733225854186624e-05,
      "loss": 0.8834,
      "step": 280780
    },
    {
      "epoch": 0.9840989461214676,
      "grad_norm": 2.875,
      "learning_rate": 3.733160951320254e-05,
      "loss": 0.9221,
      "step": 280790
    },
    {
      "epoch": 0.9841339936283633,
      "grad_norm": 2.953125,
      "learning_rate": 3.733096048453884e-05,
      "loss": 0.8844,
      "step": 280800
    },
    {
      "epoch": 0.9841690411352588,
      "grad_norm": 2.8125,
      "learning_rate": 3.733031145587514e-05,
      "loss": 0.8433,
      "step": 280810
    },
    {
      "epoch": 0.9842040886421545,
      "grad_norm": 2.984375,
      "learning_rate": 3.732966242721144e-05,
      "loss": 0.9177,
      "step": 280820
    },
    {
      "epoch": 0.98423913614905,
      "grad_norm": 3.359375,
      "learning_rate": 3.7329013398547735e-05,
      "loss": 0.9382,
      "step": 280830
    },
    {
      "epoch": 0.9842741836559457,
      "grad_norm": 3.140625,
      "learning_rate": 3.7328364369884036e-05,
      "loss": 0.9002,
      "step": 280840
    },
    {
      "epoch": 0.9843092311628412,
      "grad_norm": 2.921875,
      "learning_rate": 3.732771534122033e-05,
      "loss": 0.8689,
      "step": 280850
    },
    {
      "epoch": 0.9843442786697368,
      "grad_norm": 3.25,
      "learning_rate": 3.732706631255663e-05,
      "loss": 0.8398,
      "step": 280860
    },
    {
      "epoch": 0.9843793261766324,
      "grad_norm": 2.734375,
      "learning_rate": 3.732641728389293e-05,
      "loss": 0.8828,
      "step": 280870
    },
    {
      "epoch": 0.984414373683528,
      "grad_norm": 2.875,
      "learning_rate": 3.732576825522923e-05,
      "loss": 0.8526,
      "step": 280880
    },
    {
      "epoch": 0.9844494211904237,
      "grad_norm": 3.0,
      "learning_rate": 3.732511922656552e-05,
      "loss": 0.8969,
      "step": 280890
    },
    {
      "epoch": 0.9844844686973192,
      "grad_norm": 2.890625,
      "learning_rate": 3.7324470197901824e-05,
      "loss": 0.8412,
      "step": 280900
    },
    {
      "epoch": 0.9845195162042149,
      "grad_norm": 2.859375,
      "learning_rate": 3.732382116923812e-05,
      "loss": 0.9108,
      "step": 280910
    },
    {
      "epoch": 0.9845545637111104,
      "grad_norm": 2.71875,
      "learning_rate": 3.732317214057442e-05,
      "loss": 0.8424,
      "step": 280920
    },
    {
      "epoch": 0.984589611218006,
      "grad_norm": 2.75,
      "learning_rate": 3.7322523111910715e-05,
      "loss": 0.8088,
      "step": 280930
    },
    {
      "epoch": 0.9846246587249016,
      "grad_norm": 2.640625,
      "learning_rate": 3.7321874083247016e-05,
      "loss": 0.844,
      "step": 280940
    },
    {
      "epoch": 0.9846597062317972,
      "grad_norm": 3.140625,
      "learning_rate": 3.732122505458332e-05,
      "loss": 0.9617,
      "step": 280950
    },
    {
      "epoch": 0.9846947537386928,
      "grad_norm": 2.859375,
      "learning_rate": 3.732057602591961e-05,
      "loss": 0.9587,
      "step": 280960
    },
    {
      "epoch": 0.9847298012455884,
      "grad_norm": 2.8125,
      "learning_rate": 3.7319926997255914e-05,
      "loss": 0.8955,
      "step": 280970
    },
    {
      "epoch": 0.984764848752484,
      "grad_norm": 2.78125,
      "learning_rate": 3.731927796859221e-05,
      "loss": 0.8525,
      "step": 280980
    },
    {
      "epoch": 0.9847998962593796,
      "grad_norm": 2.921875,
      "learning_rate": 3.731862893992851e-05,
      "loss": 0.8684,
      "step": 280990
    },
    {
      "epoch": 0.9848349437662752,
      "grad_norm": 2.75,
      "learning_rate": 3.7317979911264804e-05,
      "loss": 0.9725,
      "step": 281000
    },
    {
      "epoch": 0.9848699912731708,
      "grad_norm": 3.1875,
      "learning_rate": 3.7317330882601106e-05,
      "loss": 0.876,
      "step": 281010
    },
    {
      "epoch": 0.9849050387800664,
      "grad_norm": 2.84375,
      "learning_rate": 3.731668185393739e-05,
      "loss": 0.8597,
      "step": 281020
    },
    {
      "epoch": 0.984940086286962,
      "grad_norm": 2.96875,
      "learning_rate": 3.7316032825273695e-05,
      "loss": 0.9127,
      "step": 281030
    },
    {
      "epoch": 0.9849751337938576,
      "grad_norm": 2.46875,
      "learning_rate": 3.7315383796609996e-05,
      "loss": 0.8559,
      "step": 281040
    },
    {
      "epoch": 0.9850101813007531,
      "grad_norm": 3.0625,
      "learning_rate": 3.731473476794629e-05,
      "loss": 0.8474,
      "step": 281050
    },
    {
      "epoch": 0.9850452288076488,
      "grad_norm": 2.671875,
      "learning_rate": 3.731408573928259e-05,
      "loss": 0.9173,
      "step": 281060
    },
    {
      "epoch": 0.9850802763145443,
      "grad_norm": 2.703125,
      "learning_rate": 3.731343671061889e-05,
      "loss": 0.9488,
      "step": 281070
    },
    {
      "epoch": 0.98511532382144,
      "grad_norm": 3.375,
      "learning_rate": 3.731278768195519e-05,
      "loss": 0.8878,
      "step": 281080
    },
    {
      "epoch": 0.9851503713283356,
      "grad_norm": 3.296875,
      "learning_rate": 3.731213865329148e-05,
      "loss": 0.8591,
      "step": 281090
    },
    {
      "epoch": 0.9851854188352311,
      "grad_norm": 3.0625,
      "learning_rate": 3.7311489624627784e-05,
      "loss": 1.0068,
      "step": 281100
    },
    {
      "epoch": 0.9852204663421268,
      "grad_norm": 3.28125,
      "learning_rate": 3.731084059596408e-05,
      "loss": 0.9187,
      "step": 281110
    },
    {
      "epoch": 0.9852555138490223,
      "grad_norm": 2.921875,
      "learning_rate": 3.731019156730038e-05,
      "loss": 0.8694,
      "step": 281120
    },
    {
      "epoch": 0.985290561355918,
      "grad_norm": 2.8125,
      "learning_rate": 3.7309542538636675e-05,
      "loss": 0.848,
      "step": 281130
    },
    {
      "epoch": 0.9853256088628135,
      "grad_norm": 2.953125,
      "learning_rate": 3.7308893509972976e-05,
      "loss": 0.9395,
      "step": 281140
    },
    {
      "epoch": 0.9853606563697092,
      "grad_norm": 2.96875,
      "learning_rate": 3.730824448130927e-05,
      "loss": 0.8724,
      "step": 281150
    },
    {
      "epoch": 0.9853957038766047,
      "grad_norm": 2.828125,
      "learning_rate": 3.730759545264557e-05,
      "loss": 0.8776,
      "step": 281160
    },
    {
      "epoch": 0.9854307513835003,
      "grad_norm": 2.984375,
      "learning_rate": 3.730694642398187e-05,
      "loss": 0.822,
      "step": 281170
    },
    {
      "epoch": 0.985465798890396,
      "grad_norm": 2.578125,
      "learning_rate": 3.730629739531817e-05,
      "loss": 0.8949,
      "step": 281180
    },
    {
      "epoch": 0.9855008463972915,
      "grad_norm": 2.703125,
      "learning_rate": 3.730564836665447e-05,
      "loss": 0.8101,
      "step": 281190
    },
    {
      "epoch": 0.9855358939041872,
      "grad_norm": 2.421875,
      "learning_rate": 3.7304999337990764e-05,
      "loss": 0.8656,
      "step": 281200
    },
    {
      "epoch": 0.9855709414110827,
      "grad_norm": 2.859375,
      "learning_rate": 3.7304350309327066e-05,
      "loss": 0.8785,
      "step": 281210
    },
    {
      "epoch": 0.9856059889179783,
      "grad_norm": 3.046875,
      "learning_rate": 3.730370128066336e-05,
      "loss": 0.8942,
      "step": 281220
    },
    {
      "epoch": 0.9856410364248739,
      "grad_norm": 2.8125,
      "learning_rate": 3.730305225199966e-05,
      "loss": 0.9494,
      "step": 281230
    },
    {
      "epoch": 0.9856760839317695,
      "grad_norm": 2.40625,
      "learning_rate": 3.7302403223335956e-05,
      "loss": 0.8922,
      "step": 281240
    },
    {
      "epoch": 0.9857111314386651,
      "grad_norm": 2.859375,
      "learning_rate": 3.730175419467226e-05,
      "loss": 0.8743,
      "step": 281250
    },
    {
      "epoch": 0.9857461789455607,
      "grad_norm": 2.59375,
      "learning_rate": 3.730110516600855e-05,
      "loss": 0.8814,
      "step": 281260
    },
    {
      "epoch": 0.9857812264524562,
      "grad_norm": 2.6875,
      "learning_rate": 3.7300456137344854e-05,
      "loss": 0.9162,
      "step": 281270
    },
    {
      "epoch": 0.9858162739593519,
      "grad_norm": 2.90625,
      "learning_rate": 3.729980710868115e-05,
      "loss": 0.8296,
      "step": 281280
    },
    {
      "epoch": 0.9858513214662475,
      "grad_norm": 3.1875,
      "learning_rate": 3.729915808001745e-05,
      "loss": 0.8811,
      "step": 281290
    },
    {
      "epoch": 0.9858863689731431,
      "grad_norm": 2.71875,
      "learning_rate": 3.7298509051353744e-05,
      "loss": 0.8252,
      "step": 281300
    },
    {
      "epoch": 0.9859214164800387,
      "grad_norm": 3.109375,
      "learning_rate": 3.7297860022690046e-05,
      "loss": 0.8416,
      "step": 281310
    },
    {
      "epoch": 0.9859564639869343,
      "grad_norm": 2.859375,
      "learning_rate": 3.729721099402635e-05,
      "loss": 0.8604,
      "step": 281320
    },
    {
      "epoch": 0.9859915114938299,
      "grad_norm": 2.75,
      "learning_rate": 3.729656196536264e-05,
      "loss": 0.883,
      "step": 281330
    },
    {
      "epoch": 0.9860265590007254,
      "grad_norm": 3.609375,
      "learning_rate": 3.729591293669894e-05,
      "loss": 0.922,
      "step": 281340
    },
    {
      "epoch": 0.9860616065076211,
      "grad_norm": 3.234375,
      "learning_rate": 3.729526390803524e-05,
      "loss": 0.9053,
      "step": 281350
    },
    {
      "epoch": 0.9860966540145166,
      "grad_norm": 2.9375,
      "learning_rate": 3.729461487937154e-05,
      "loss": 0.898,
      "step": 281360
    },
    {
      "epoch": 0.9861317015214123,
      "grad_norm": 2.890625,
      "learning_rate": 3.7293965850707834e-05,
      "loss": 0.8282,
      "step": 281370
    },
    {
      "epoch": 0.9861667490283079,
      "grad_norm": 2.578125,
      "learning_rate": 3.7293316822044135e-05,
      "loss": 0.8333,
      "step": 281380
    },
    {
      "epoch": 0.9862017965352035,
      "grad_norm": 3.015625,
      "learning_rate": 3.729266779338043e-05,
      "loss": 0.828,
      "step": 281390
    },
    {
      "epoch": 0.9862368440420991,
      "grad_norm": 3.109375,
      "learning_rate": 3.7292018764716724e-05,
      "loss": 0.8204,
      "step": 281400
    },
    {
      "epoch": 0.9862718915489946,
      "grad_norm": 2.890625,
      "learning_rate": 3.7291369736053026e-05,
      "loss": 0.8954,
      "step": 281410
    },
    {
      "epoch": 0.9863069390558903,
      "grad_norm": 2.734375,
      "learning_rate": 3.729072070738932e-05,
      "loss": 0.8837,
      "step": 281420
    },
    {
      "epoch": 0.9863419865627858,
      "grad_norm": 2.890625,
      "learning_rate": 3.729007167872562e-05,
      "loss": 0.8051,
      "step": 281430
    },
    {
      "epoch": 0.9863770340696815,
      "grad_norm": 2.921875,
      "learning_rate": 3.7289422650061916e-05,
      "loss": 0.8218,
      "step": 281440
    },
    {
      "epoch": 0.986412081576577,
      "grad_norm": 3.15625,
      "learning_rate": 3.728877362139822e-05,
      "loss": 0.8146,
      "step": 281450
    },
    {
      "epoch": 0.9864471290834727,
      "grad_norm": 3.203125,
      "learning_rate": 3.728812459273451e-05,
      "loss": 0.8403,
      "step": 281460
    },
    {
      "epoch": 0.9864821765903683,
      "grad_norm": 2.53125,
      "learning_rate": 3.7287475564070814e-05,
      "loss": 0.8248,
      "step": 281470
    },
    {
      "epoch": 0.9865172240972638,
      "grad_norm": 3.890625,
      "learning_rate": 3.728682653540711e-05,
      "loss": 0.8354,
      "step": 281480
    },
    {
      "epoch": 0.9865522716041595,
      "grad_norm": 3.125,
      "learning_rate": 3.728617750674341e-05,
      "loss": 0.8245,
      "step": 281490
    },
    {
      "epoch": 0.986587319111055,
      "grad_norm": 2.875,
      "learning_rate": 3.7285528478079704e-05,
      "loss": 0.8552,
      "step": 281500
    },
    {
      "epoch": 0.9866223666179507,
      "grad_norm": 2.953125,
      "learning_rate": 3.7284879449416006e-05,
      "loss": 0.9134,
      "step": 281510
    },
    {
      "epoch": 0.9866574141248462,
      "grad_norm": 2.921875,
      "learning_rate": 3.72842304207523e-05,
      "loss": 0.918,
      "step": 281520
    },
    {
      "epoch": 0.9866924616317418,
      "grad_norm": 3.15625,
      "learning_rate": 3.72835813920886e-05,
      "loss": 0.8845,
      "step": 281530
    },
    {
      "epoch": 0.9867275091386374,
      "grad_norm": 3.046875,
      "learning_rate": 3.7282932363424896e-05,
      "loss": 0.9092,
      "step": 281540
    },
    {
      "epoch": 0.986762556645533,
      "grad_norm": 3.359375,
      "learning_rate": 3.72822833347612e-05,
      "loss": 0.8247,
      "step": 281550
    },
    {
      "epoch": 0.9867976041524286,
      "grad_norm": 2.78125,
      "learning_rate": 3.72816343060975e-05,
      "loss": 0.9226,
      "step": 281560
    },
    {
      "epoch": 0.9868326516593242,
      "grad_norm": 3.015625,
      "learning_rate": 3.7280985277433794e-05,
      "loss": 0.8402,
      "step": 281570
    },
    {
      "epoch": 0.9868676991662199,
      "grad_norm": 2.703125,
      "learning_rate": 3.7280336248770095e-05,
      "loss": 0.8902,
      "step": 281580
    },
    {
      "epoch": 0.9869027466731154,
      "grad_norm": 3.21875,
      "learning_rate": 3.727968722010639e-05,
      "loss": 0.7895,
      "step": 281590
    },
    {
      "epoch": 0.986937794180011,
      "grad_norm": 2.796875,
      "learning_rate": 3.727903819144269e-05,
      "loss": 0.8932,
      "step": 281600
    },
    {
      "epoch": 0.9869728416869066,
      "grad_norm": 3.15625,
      "learning_rate": 3.7278389162778986e-05,
      "loss": 1.0038,
      "step": 281610
    },
    {
      "epoch": 0.9870078891938022,
      "grad_norm": 2.375,
      "learning_rate": 3.727774013411529e-05,
      "loss": 0.8738,
      "step": 281620
    },
    {
      "epoch": 0.9870429367006978,
      "grad_norm": 2.5625,
      "learning_rate": 3.727709110545158e-05,
      "loss": 0.7463,
      "step": 281630
    },
    {
      "epoch": 0.9870779842075934,
      "grad_norm": 2.875,
      "learning_rate": 3.727644207678788e-05,
      "loss": 0.8421,
      "step": 281640
    },
    {
      "epoch": 0.9871130317144889,
      "grad_norm": 3.140625,
      "learning_rate": 3.727579304812418e-05,
      "loss": 0.9111,
      "step": 281650
    },
    {
      "epoch": 0.9871480792213846,
      "grad_norm": 3.0625,
      "learning_rate": 3.727514401946048e-05,
      "loss": 0.9305,
      "step": 281660
    },
    {
      "epoch": 0.9871831267282802,
      "grad_norm": 2.859375,
      "learning_rate": 3.7274494990796774e-05,
      "loss": 0.9826,
      "step": 281670
    },
    {
      "epoch": 0.9872181742351758,
      "grad_norm": 3.234375,
      "learning_rate": 3.7273845962133075e-05,
      "loss": 0.9065,
      "step": 281680
    },
    {
      "epoch": 0.9872532217420714,
      "grad_norm": 2.578125,
      "learning_rate": 3.7273196933469376e-05,
      "loss": 0.8516,
      "step": 281690
    },
    {
      "epoch": 0.987288269248967,
      "grad_norm": 3.359375,
      "learning_rate": 3.727254790480567e-05,
      "loss": 0.8071,
      "step": 281700
    },
    {
      "epoch": 0.9873233167558626,
      "grad_norm": 2.828125,
      "learning_rate": 3.727189887614197e-05,
      "loss": 0.8641,
      "step": 281710
    },
    {
      "epoch": 0.9873583642627581,
      "grad_norm": 2.796875,
      "learning_rate": 3.727124984747827e-05,
      "loss": 0.9024,
      "step": 281720
    },
    {
      "epoch": 0.9873934117696538,
      "grad_norm": 3.09375,
      "learning_rate": 3.727060081881457e-05,
      "loss": 0.8653,
      "step": 281730
    },
    {
      "epoch": 0.9874284592765493,
      "grad_norm": 3.125,
      "learning_rate": 3.726995179015086e-05,
      "loss": 0.8952,
      "step": 281740
    },
    {
      "epoch": 0.987463506783445,
      "grad_norm": 2.84375,
      "learning_rate": 3.7269302761487164e-05,
      "loss": 0.8549,
      "step": 281750
    },
    {
      "epoch": 0.9874985542903405,
      "grad_norm": 2.515625,
      "learning_rate": 3.726865373282346e-05,
      "loss": 0.8827,
      "step": 281760
    },
    {
      "epoch": 0.9875336017972361,
      "grad_norm": 3.203125,
      "learning_rate": 3.7268004704159754e-05,
      "loss": 0.8232,
      "step": 281770
    },
    {
      "epoch": 0.9875686493041318,
      "grad_norm": 3.59375,
      "learning_rate": 3.7267355675496055e-05,
      "loss": 0.8924,
      "step": 281780
    },
    {
      "epoch": 0.9876036968110273,
      "grad_norm": 3.09375,
      "learning_rate": 3.726670664683235e-05,
      "loss": 0.8769,
      "step": 281790
    },
    {
      "epoch": 0.987638744317923,
      "grad_norm": 2.484375,
      "learning_rate": 3.726605761816865e-05,
      "loss": 0.8638,
      "step": 281800
    },
    {
      "epoch": 0.9876737918248185,
      "grad_norm": 2.75,
      "learning_rate": 3.7265408589504946e-05,
      "loss": 0.8839,
      "step": 281810
    },
    {
      "epoch": 0.9877088393317142,
      "grad_norm": 2.96875,
      "learning_rate": 3.726475956084125e-05,
      "loss": 0.8325,
      "step": 281820
    },
    {
      "epoch": 0.9877438868386097,
      "grad_norm": 2.71875,
      "learning_rate": 3.726411053217754e-05,
      "loss": 0.8544,
      "step": 281830
    },
    {
      "epoch": 0.9877789343455053,
      "grad_norm": 3.078125,
      "learning_rate": 3.726346150351384e-05,
      "loss": 0.8035,
      "step": 281840
    },
    {
      "epoch": 0.9878139818524009,
      "grad_norm": 3.09375,
      "learning_rate": 3.726281247485014e-05,
      "loss": 0.9439,
      "step": 281850
    },
    {
      "epoch": 0.9878490293592965,
      "grad_norm": 3.109375,
      "learning_rate": 3.726216344618644e-05,
      "loss": 0.8615,
      "step": 281860
    },
    {
      "epoch": 0.9878840768661922,
      "grad_norm": 3.109375,
      "learning_rate": 3.7261514417522734e-05,
      "loss": 0.962,
      "step": 281870
    },
    {
      "epoch": 0.9879191243730877,
      "grad_norm": 2.96875,
      "learning_rate": 3.7260865388859035e-05,
      "loss": 0.9114,
      "step": 281880
    },
    {
      "epoch": 0.9879541718799834,
      "grad_norm": 2.890625,
      "learning_rate": 3.726021636019533e-05,
      "loss": 0.922,
      "step": 281890
    },
    {
      "epoch": 0.9879892193868789,
      "grad_norm": 2.84375,
      "learning_rate": 3.725956733153163e-05,
      "loss": 0.8902,
      "step": 281900
    },
    {
      "epoch": 0.9880242668937745,
      "grad_norm": 3.015625,
      "learning_rate": 3.725891830286793e-05,
      "loss": 0.9528,
      "step": 281910
    },
    {
      "epoch": 0.9880593144006701,
      "grad_norm": 3.0,
      "learning_rate": 3.725826927420423e-05,
      "loss": 0.865,
      "step": 281920
    },
    {
      "epoch": 0.9880943619075657,
      "grad_norm": 3.0625,
      "learning_rate": 3.725762024554053e-05,
      "loss": 0.917,
      "step": 281930
    },
    {
      "epoch": 0.9881294094144613,
      "grad_norm": 2.65625,
      "learning_rate": 3.725697121687682e-05,
      "loss": 0.8594,
      "step": 281940
    },
    {
      "epoch": 0.9881644569213569,
      "grad_norm": 3.0625,
      "learning_rate": 3.7256322188213124e-05,
      "loss": 0.9035,
      "step": 281950
    },
    {
      "epoch": 0.9881995044282526,
      "grad_norm": 3.6875,
      "learning_rate": 3.725567315954942e-05,
      "loss": 0.9418,
      "step": 281960
    },
    {
      "epoch": 0.9882345519351481,
      "grad_norm": 2.78125,
      "learning_rate": 3.725502413088572e-05,
      "loss": 0.8566,
      "step": 281970
    },
    {
      "epoch": 0.9882695994420437,
      "grad_norm": 3.140625,
      "learning_rate": 3.7254375102222015e-05,
      "loss": 0.9609,
      "step": 281980
    },
    {
      "epoch": 0.9883046469489393,
      "grad_norm": 2.734375,
      "learning_rate": 3.7253726073558316e-05,
      "loss": 0.8841,
      "step": 281990
    },
    {
      "epoch": 0.9883396944558349,
      "grad_norm": 3.15625,
      "learning_rate": 3.725307704489461e-05,
      "loss": 0.9118,
      "step": 282000
    },
    {
      "epoch": 0.9883747419627305,
      "grad_norm": 3.0625,
      "learning_rate": 3.725242801623091e-05,
      "loss": 0.8002,
      "step": 282010
    },
    {
      "epoch": 0.9884097894696261,
      "grad_norm": 3.046875,
      "learning_rate": 3.725177898756721e-05,
      "loss": 0.8866,
      "step": 282020
    },
    {
      "epoch": 0.9884448369765216,
      "grad_norm": 2.6875,
      "learning_rate": 3.725112995890351e-05,
      "loss": 0.8328,
      "step": 282030
    },
    {
      "epoch": 0.9884798844834173,
      "grad_norm": 3.796875,
      "learning_rate": 3.72504809302398e-05,
      "loss": 0.9324,
      "step": 282040
    },
    {
      "epoch": 0.9885149319903128,
      "grad_norm": 3.546875,
      "learning_rate": 3.7249831901576104e-05,
      "loss": 0.9035,
      "step": 282050
    },
    {
      "epoch": 0.9885499794972085,
      "grad_norm": 2.78125,
      "learning_rate": 3.7249182872912406e-05,
      "loss": 0.817,
      "step": 282060
    },
    {
      "epoch": 0.9885850270041041,
      "grad_norm": 2.484375,
      "learning_rate": 3.72485338442487e-05,
      "loss": 0.7858,
      "step": 282070
    },
    {
      "epoch": 0.9886200745109996,
      "grad_norm": 3.03125,
      "learning_rate": 3.7247884815585e-05,
      "loss": 0.8347,
      "step": 282080
    },
    {
      "epoch": 0.9886551220178953,
      "grad_norm": 2.78125,
      "learning_rate": 3.7247235786921296e-05,
      "loss": 0.8841,
      "step": 282090
    },
    {
      "epoch": 0.9886901695247908,
      "grad_norm": 2.859375,
      "learning_rate": 3.72465867582576e-05,
      "loss": 0.9388,
      "step": 282100
    },
    {
      "epoch": 0.9887252170316865,
      "grad_norm": 2.8125,
      "learning_rate": 3.724593772959389e-05,
      "loss": 0.9353,
      "step": 282110
    },
    {
      "epoch": 0.988760264538582,
      "grad_norm": 3.5625,
      "learning_rate": 3.7245288700930194e-05,
      "loss": 0.8534,
      "step": 282120
    },
    {
      "epoch": 0.9887953120454777,
      "grad_norm": 2.875,
      "learning_rate": 3.724463967226649e-05,
      "loss": 0.8428,
      "step": 282130
    },
    {
      "epoch": 0.9888303595523732,
      "grad_norm": 3.59375,
      "learning_rate": 3.724399064360279e-05,
      "loss": 0.8274,
      "step": 282140
    },
    {
      "epoch": 0.9888654070592688,
      "grad_norm": 2.984375,
      "learning_rate": 3.7243341614939084e-05,
      "loss": 0.861,
      "step": 282150
    },
    {
      "epoch": 0.9889004545661645,
      "grad_norm": 2.765625,
      "learning_rate": 3.724269258627538e-05,
      "loss": 0.8007,
      "step": 282160
    },
    {
      "epoch": 0.98893550207306,
      "grad_norm": 3.40625,
      "learning_rate": 3.724204355761168e-05,
      "loss": 0.9175,
      "step": 282170
    },
    {
      "epoch": 0.9889705495799557,
      "grad_norm": 2.890625,
      "learning_rate": 3.7241394528947975e-05,
      "loss": 0.8927,
      "step": 282180
    },
    {
      "epoch": 0.9890055970868512,
      "grad_norm": 3.0625,
      "learning_rate": 3.7240745500284276e-05,
      "loss": 0.8465,
      "step": 282190
    },
    {
      "epoch": 0.9890406445937469,
      "grad_norm": 3.03125,
      "learning_rate": 3.724009647162057e-05,
      "loss": 0.8543,
      "step": 282200
    },
    {
      "epoch": 0.9890756921006424,
      "grad_norm": 2.8125,
      "learning_rate": 3.723944744295687e-05,
      "loss": 0.8997,
      "step": 282210
    },
    {
      "epoch": 0.989110739607538,
      "grad_norm": 3.046875,
      "learning_rate": 3.723879841429317e-05,
      "loss": 0.8784,
      "step": 282220
    },
    {
      "epoch": 0.9891457871144336,
      "grad_norm": 2.40625,
      "learning_rate": 3.723814938562947e-05,
      "loss": 0.7959,
      "step": 282230
    },
    {
      "epoch": 0.9891808346213292,
      "grad_norm": 2.796875,
      "learning_rate": 3.723750035696576e-05,
      "loss": 0.8615,
      "step": 282240
    },
    {
      "epoch": 0.9892158821282248,
      "grad_norm": 2.8125,
      "learning_rate": 3.7236851328302064e-05,
      "loss": 0.8152,
      "step": 282250
    },
    {
      "epoch": 0.9892509296351204,
      "grad_norm": 3.078125,
      "learning_rate": 3.723620229963836e-05,
      "loss": 0.8529,
      "step": 282260
    },
    {
      "epoch": 0.989285977142016,
      "grad_norm": 2.609375,
      "learning_rate": 3.723555327097466e-05,
      "loss": 0.8817,
      "step": 282270
    },
    {
      "epoch": 0.9893210246489116,
      "grad_norm": 2.8125,
      "learning_rate": 3.723490424231096e-05,
      "loss": 0.8638,
      "step": 282280
    },
    {
      "epoch": 0.9893560721558072,
      "grad_norm": 3.078125,
      "learning_rate": 3.7234255213647256e-05,
      "loss": 0.9252,
      "step": 282290
    },
    {
      "epoch": 0.9893911196627028,
      "grad_norm": 2.8125,
      "learning_rate": 3.723360618498356e-05,
      "loss": 0.9019,
      "step": 282300
    },
    {
      "epoch": 0.9894261671695984,
      "grad_norm": 2.59375,
      "learning_rate": 3.723295715631985e-05,
      "loss": 0.804,
      "step": 282310
    },
    {
      "epoch": 0.989461214676494,
      "grad_norm": 3.0625,
      "learning_rate": 3.7232308127656154e-05,
      "loss": 0.9535,
      "step": 282320
    },
    {
      "epoch": 0.9894962621833896,
      "grad_norm": 2.828125,
      "learning_rate": 3.723165909899245e-05,
      "loss": 0.9066,
      "step": 282330
    },
    {
      "epoch": 0.9895313096902851,
      "grad_norm": 2.890625,
      "learning_rate": 3.723101007032875e-05,
      "loss": 0.7228,
      "step": 282340
    },
    {
      "epoch": 0.9895663571971808,
      "grad_norm": 2.71875,
      "learning_rate": 3.7230361041665044e-05,
      "loss": 0.863,
      "step": 282350
    },
    {
      "epoch": 0.9896014047040764,
      "grad_norm": 3.234375,
      "learning_rate": 3.7229712013001346e-05,
      "loss": 0.7871,
      "step": 282360
    },
    {
      "epoch": 0.989636452210972,
      "grad_norm": 2.859375,
      "learning_rate": 3.722906298433764e-05,
      "loss": 0.8052,
      "step": 282370
    },
    {
      "epoch": 0.9896714997178676,
      "grad_norm": 3.328125,
      "learning_rate": 3.722841395567394e-05,
      "loss": 0.8589,
      "step": 282380
    },
    {
      "epoch": 0.9897065472247631,
      "grad_norm": 2.578125,
      "learning_rate": 3.7227764927010236e-05,
      "loss": 0.8587,
      "step": 282390
    },
    {
      "epoch": 0.9897415947316588,
      "grad_norm": 2.59375,
      "learning_rate": 3.722711589834654e-05,
      "loss": 0.8585,
      "step": 282400
    },
    {
      "epoch": 0.9897766422385543,
      "grad_norm": 3.0625,
      "learning_rate": 3.722646686968283e-05,
      "loss": 0.8803,
      "step": 282410
    },
    {
      "epoch": 0.98981168974545,
      "grad_norm": 2.578125,
      "learning_rate": 3.7225817841019134e-05,
      "loss": 0.8798,
      "step": 282420
    },
    {
      "epoch": 0.9898467372523455,
      "grad_norm": 2.953125,
      "learning_rate": 3.7225168812355435e-05,
      "loss": 0.8843,
      "step": 282430
    },
    {
      "epoch": 0.9898817847592412,
      "grad_norm": 2.796875,
      "learning_rate": 3.722451978369173e-05,
      "loss": 0.8351,
      "step": 282440
    },
    {
      "epoch": 0.9899168322661368,
      "grad_norm": 2.921875,
      "learning_rate": 3.722387075502803e-05,
      "loss": 0.8595,
      "step": 282450
    },
    {
      "epoch": 0.9899518797730323,
      "grad_norm": 3.109375,
      "learning_rate": 3.7223221726364326e-05,
      "loss": 0.7991,
      "step": 282460
    },
    {
      "epoch": 0.989986927279928,
      "grad_norm": 2.515625,
      "learning_rate": 3.722257269770063e-05,
      "loss": 0.8519,
      "step": 282470
    },
    {
      "epoch": 0.9900219747868235,
      "grad_norm": 2.515625,
      "learning_rate": 3.722192366903692e-05,
      "loss": 0.8988,
      "step": 282480
    },
    {
      "epoch": 0.9900570222937192,
      "grad_norm": 3.09375,
      "learning_rate": 3.722127464037322e-05,
      "loss": 0.8358,
      "step": 282490
    },
    {
      "epoch": 0.9900920698006147,
      "grad_norm": 3.03125,
      "learning_rate": 3.722062561170952e-05,
      "loss": 0.811,
      "step": 282500
    },
    {
      "epoch": 0.9901271173075104,
      "grad_norm": 3.1875,
      "learning_rate": 3.721997658304582e-05,
      "loss": 0.9586,
      "step": 282510
    },
    {
      "epoch": 0.9901621648144059,
      "grad_norm": 2.953125,
      "learning_rate": 3.7219327554382114e-05,
      "loss": 0.8027,
      "step": 282520
    },
    {
      "epoch": 0.9901972123213015,
      "grad_norm": 3.375,
      "learning_rate": 3.721867852571841e-05,
      "loss": 0.9569,
      "step": 282530
    },
    {
      "epoch": 0.9902322598281971,
      "grad_norm": 2.921875,
      "learning_rate": 3.721802949705471e-05,
      "loss": 0.9041,
      "step": 282540
    },
    {
      "epoch": 0.9902673073350927,
      "grad_norm": 3.203125,
      "learning_rate": 3.7217380468391004e-05,
      "loss": 0.9459,
      "step": 282550
    },
    {
      "epoch": 0.9903023548419884,
      "grad_norm": 3.265625,
      "learning_rate": 3.7216731439727306e-05,
      "loss": 0.8836,
      "step": 282560
    },
    {
      "epoch": 0.9903374023488839,
      "grad_norm": 3.296875,
      "learning_rate": 3.72160824110636e-05,
      "loss": 0.8749,
      "step": 282570
    },
    {
      "epoch": 0.9903724498557795,
      "grad_norm": 2.5625,
      "learning_rate": 3.72154333823999e-05,
      "loss": 0.9036,
      "step": 282580
    },
    {
      "epoch": 0.9904074973626751,
      "grad_norm": 3.09375,
      "learning_rate": 3.7214784353736196e-05,
      "loss": 0.9335,
      "step": 282590
    },
    {
      "epoch": 0.9904425448695707,
      "grad_norm": 2.796875,
      "learning_rate": 3.72141353250725e-05,
      "loss": 0.8628,
      "step": 282600
    },
    {
      "epoch": 0.9904775923764663,
      "grad_norm": 2.84375,
      "learning_rate": 3.721348629640879e-05,
      "loss": 0.8611,
      "step": 282610
    },
    {
      "epoch": 0.9905126398833619,
      "grad_norm": 3.03125,
      "learning_rate": 3.7212837267745094e-05,
      "loss": 0.9413,
      "step": 282620
    },
    {
      "epoch": 0.9905476873902574,
      "grad_norm": 3.09375,
      "learning_rate": 3.721218823908139e-05,
      "loss": 0.8642,
      "step": 282630
    },
    {
      "epoch": 0.9905827348971531,
      "grad_norm": 3.203125,
      "learning_rate": 3.721153921041769e-05,
      "loss": 0.8241,
      "step": 282640
    },
    {
      "epoch": 0.9906177824040487,
      "grad_norm": 2.859375,
      "learning_rate": 3.721089018175399e-05,
      "loss": 0.9003,
      "step": 282650
    },
    {
      "epoch": 0.9906528299109443,
      "grad_norm": 2.640625,
      "learning_rate": 3.7210241153090286e-05,
      "loss": 0.9052,
      "step": 282660
    },
    {
      "epoch": 0.9906878774178399,
      "grad_norm": 3.15625,
      "learning_rate": 3.720959212442659e-05,
      "loss": 0.9478,
      "step": 282670
    },
    {
      "epoch": 0.9907229249247355,
      "grad_norm": 2.8125,
      "learning_rate": 3.720894309576288e-05,
      "loss": 0.8254,
      "step": 282680
    },
    {
      "epoch": 0.9907579724316311,
      "grad_norm": 3.03125,
      "learning_rate": 3.720829406709918e-05,
      "loss": 0.8575,
      "step": 282690
    },
    {
      "epoch": 0.9907930199385266,
      "grad_norm": 2.828125,
      "learning_rate": 3.720764503843548e-05,
      "loss": 0.8969,
      "step": 282700
    },
    {
      "epoch": 0.9908280674454223,
      "grad_norm": 2.859375,
      "learning_rate": 3.720699600977178e-05,
      "loss": 0.9065,
      "step": 282710
    },
    {
      "epoch": 0.9908631149523178,
      "grad_norm": 3.046875,
      "learning_rate": 3.7206346981108074e-05,
      "loss": 0.8673,
      "step": 282720
    },
    {
      "epoch": 0.9908981624592135,
      "grad_norm": 2.875,
      "learning_rate": 3.7205697952444375e-05,
      "loss": 0.9376,
      "step": 282730
    },
    {
      "epoch": 0.990933209966109,
      "grad_norm": 2.609375,
      "learning_rate": 3.720504892378067e-05,
      "loss": 0.7997,
      "step": 282740
    },
    {
      "epoch": 0.9909682574730047,
      "grad_norm": 3.1875,
      "learning_rate": 3.720439989511697e-05,
      "loss": 0.8591,
      "step": 282750
    },
    {
      "epoch": 0.9910033049799003,
      "grad_norm": 2.8125,
      "learning_rate": 3.7203750866453266e-05,
      "loss": 0.8282,
      "step": 282760
    },
    {
      "epoch": 0.9910383524867958,
      "grad_norm": 3.078125,
      "learning_rate": 3.720310183778957e-05,
      "loss": 0.8137,
      "step": 282770
    },
    {
      "epoch": 0.9910733999936915,
      "grad_norm": 2.875,
      "learning_rate": 3.720245280912586e-05,
      "loss": 0.8453,
      "step": 282780
    },
    {
      "epoch": 0.991108447500587,
      "grad_norm": 2.609375,
      "learning_rate": 3.720180378046216e-05,
      "loss": 0.8485,
      "step": 282790
    },
    {
      "epoch": 0.9911434950074827,
      "grad_norm": 2.75,
      "learning_rate": 3.7201154751798464e-05,
      "loss": 0.8744,
      "step": 282800
    },
    {
      "epoch": 0.9911785425143782,
      "grad_norm": 3.234375,
      "learning_rate": 3.720050572313476e-05,
      "loss": 0.9233,
      "step": 282810
    },
    {
      "epoch": 0.9912135900212738,
      "grad_norm": 3.1875,
      "learning_rate": 3.719985669447106e-05,
      "loss": 0.8937,
      "step": 282820
    },
    {
      "epoch": 0.9912486375281694,
      "grad_norm": 2.65625,
      "learning_rate": 3.7199207665807355e-05,
      "loss": 0.9364,
      "step": 282830
    },
    {
      "epoch": 0.991283685035065,
      "grad_norm": 2.984375,
      "learning_rate": 3.7198558637143656e-05,
      "loss": 0.8264,
      "step": 282840
    },
    {
      "epoch": 0.9913187325419607,
      "grad_norm": 3.109375,
      "learning_rate": 3.719790960847995e-05,
      "loss": 0.8327,
      "step": 282850
    },
    {
      "epoch": 0.9913537800488562,
      "grad_norm": 2.53125,
      "learning_rate": 3.719726057981625e-05,
      "loss": 0.8822,
      "step": 282860
    },
    {
      "epoch": 0.9913888275557519,
      "grad_norm": 2.6875,
      "learning_rate": 3.719661155115255e-05,
      "loss": 0.9098,
      "step": 282870
    },
    {
      "epoch": 0.9914238750626474,
      "grad_norm": 2.84375,
      "learning_rate": 3.719596252248885e-05,
      "loss": 0.855,
      "step": 282880
    },
    {
      "epoch": 0.991458922569543,
      "grad_norm": 2.890625,
      "learning_rate": 3.719531349382514e-05,
      "loss": 0.8711,
      "step": 282890
    },
    {
      "epoch": 0.9914939700764386,
      "grad_norm": 3.0625,
      "learning_rate": 3.719466446516144e-05,
      "loss": 0.9138,
      "step": 282900
    },
    {
      "epoch": 0.9915290175833342,
      "grad_norm": 3.0625,
      "learning_rate": 3.719401543649774e-05,
      "loss": 0.8548,
      "step": 282910
    },
    {
      "epoch": 0.9915640650902298,
      "grad_norm": 2.859375,
      "learning_rate": 3.7193366407834034e-05,
      "loss": 0.952,
      "step": 282920
    },
    {
      "epoch": 0.9915991125971254,
      "grad_norm": 2.953125,
      "learning_rate": 3.7192717379170335e-05,
      "loss": 0.9071,
      "step": 282930
    },
    {
      "epoch": 0.991634160104021,
      "grad_norm": 2.734375,
      "learning_rate": 3.719206835050663e-05,
      "loss": 0.9227,
      "step": 282940
    },
    {
      "epoch": 0.9916692076109166,
      "grad_norm": 2.953125,
      "learning_rate": 3.719141932184293e-05,
      "loss": 0.9092,
      "step": 282950
    },
    {
      "epoch": 0.9917042551178122,
      "grad_norm": 2.671875,
      "learning_rate": 3.7190770293179226e-05,
      "loss": 0.8891,
      "step": 282960
    },
    {
      "epoch": 0.9917393026247078,
      "grad_norm": 3.015625,
      "learning_rate": 3.719012126451553e-05,
      "loss": 0.8624,
      "step": 282970
    },
    {
      "epoch": 0.9917743501316034,
      "grad_norm": 2.796875,
      "learning_rate": 3.718947223585182e-05,
      "loss": 0.8421,
      "step": 282980
    },
    {
      "epoch": 0.991809397638499,
      "grad_norm": 3.078125,
      "learning_rate": 3.718882320718812e-05,
      "loss": 0.8998,
      "step": 282990
    },
    {
      "epoch": 0.9918444451453946,
      "grad_norm": 2.84375,
      "learning_rate": 3.718817417852442e-05,
      "loss": 0.8595,
      "step": 283000
    },
    {
      "epoch": 0.9918794926522901,
      "grad_norm": 2.8125,
      "learning_rate": 3.718752514986072e-05,
      "loss": 0.9018,
      "step": 283010
    },
    {
      "epoch": 0.9919145401591858,
      "grad_norm": 3.15625,
      "learning_rate": 3.718687612119702e-05,
      "loss": 0.8893,
      "step": 283020
    },
    {
      "epoch": 0.9919495876660813,
      "grad_norm": 3.140625,
      "learning_rate": 3.7186227092533315e-05,
      "loss": 0.8283,
      "step": 283030
    },
    {
      "epoch": 0.991984635172977,
      "grad_norm": 2.875,
      "learning_rate": 3.7185578063869616e-05,
      "loss": 0.7672,
      "step": 283040
    },
    {
      "epoch": 0.9920196826798726,
      "grad_norm": 2.90625,
      "learning_rate": 3.718492903520591e-05,
      "loss": 0.9054,
      "step": 283050
    },
    {
      "epoch": 0.9920547301867682,
      "grad_norm": 3.109375,
      "learning_rate": 3.718428000654221e-05,
      "loss": 0.8793,
      "step": 283060
    },
    {
      "epoch": 0.9920897776936638,
      "grad_norm": 2.96875,
      "learning_rate": 3.718363097787851e-05,
      "loss": 0.8546,
      "step": 283070
    },
    {
      "epoch": 0.9921248252005593,
      "grad_norm": 2.671875,
      "learning_rate": 3.718298194921481e-05,
      "loss": 0.8815,
      "step": 283080
    },
    {
      "epoch": 0.992159872707455,
      "grad_norm": 3.078125,
      "learning_rate": 3.71823329205511e-05,
      "loss": 0.8064,
      "step": 283090
    },
    {
      "epoch": 0.9921949202143505,
      "grad_norm": 2.734375,
      "learning_rate": 3.7181683891887404e-05,
      "loss": 0.9587,
      "step": 283100
    },
    {
      "epoch": 0.9922299677212462,
      "grad_norm": 2.921875,
      "learning_rate": 3.71810348632237e-05,
      "loss": 0.8504,
      "step": 283110
    },
    {
      "epoch": 0.9922650152281417,
      "grad_norm": 3.1875,
      "learning_rate": 3.718038583456e-05,
      "loss": 0.8694,
      "step": 283120
    },
    {
      "epoch": 0.9923000627350373,
      "grad_norm": 2.921875,
      "learning_rate": 3.7179736805896295e-05,
      "loss": 0.8429,
      "step": 283130
    },
    {
      "epoch": 0.992335110241933,
      "grad_norm": 3.234375,
      "learning_rate": 3.7179087777232596e-05,
      "loss": 0.8364,
      "step": 283140
    },
    {
      "epoch": 0.9923701577488285,
      "grad_norm": 3.109375,
      "learning_rate": 3.71784387485689e-05,
      "loss": 0.8585,
      "step": 283150
    },
    {
      "epoch": 0.9924052052557242,
      "grad_norm": 3.4375,
      "learning_rate": 3.717778971990519e-05,
      "loss": 0.9734,
      "step": 283160
    },
    {
      "epoch": 0.9924402527626197,
      "grad_norm": 2.75,
      "learning_rate": 3.7177140691241494e-05,
      "loss": 0.8578,
      "step": 283170
    },
    {
      "epoch": 0.9924753002695154,
      "grad_norm": 2.984375,
      "learning_rate": 3.717649166257779e-05,
      "loss": 0.8167,
      "step": 283180
    },
    {
      "epoch": 0.9925103477764109,
      "grad_norm": 2.359375,
      "learning_rate": 3.717584263391409e-05,
      "loss": 0.8247,
      "step": 283190
    },
    {
      "epoch": 0.9925453952833065,
      "grad_norm": 2.671875,
      "learning_rate": 3.7175193605250384e-05,
      "loss": 0.8889,
      "step": 283200
    },
    {
      "epoch": 0.9925804427902021,
      "grad_norm": 2.8125,
      "learning_rate": 3.7174544576586686e-05,
      "loss": 0.9297,
      "step": 283210
    },
    {
      "epoch": 0.9926154902970977,
      "grad_norm": 2.84375,
      "learning_rate": 3.717389554792298e-05,
      "loss": 0.8326,
      "step": 283220
    },
    {
      "epoch": 0.9926505378039933,
      "grad_norm": 3.21875,
      "learning_rate": 3.717324651925928e-05,
      "loss": 0.8322,
      "step": 283230
    },
    {
      "epoch": 0.9926855853108889,
      "grad_norm": 2.859375,
      "learning_rate": 3.7172597490595576e-05,
      "loss": 0.8782,
      "step": 283240
    },
    {
      "epoch": 0.9927206328177846,
      "grad_norm": 2.640625,
      "learning_rate": 3.717194846193188e-05,
      "loss": 0.7856,
      "step": 283250
    },
    {
      "epoch": 0.9927556803246801,
      "grad_norm": 3.46875,
      "learning_rate": 3.717129943326817e-05,
      "loss": 0.9216,
      "step": 283260
    },
    {
      "epoch": 0.9927907278315757,
      "grad_norm": 3.0625,
      "learning_rate": 3.7170650404604474e-05,
      "loss": 0.8695,
      "step": 283270
    },
    {
      "epoch": 0.9928257753384713,
      "grad_norm": 2.953125,
      "learning_rate": 3.717000137594077e-05,
      "loss": 0.9186,
      "step": 283280
    },
    {
      "epoch": 0.9928608228453669,
      "grad_norm": 3.0,
      "learning_rate": 3.716935234727706e-05,
      "loss": 0.8664,
      "step": 283290
    },
    {
      "epoch": 0.9928958703522625,
      "grad_norm": 3.21875,
      "learning_rate": 3.7168703318613364e-05,
      "loss": 0.8938,
      "step": 283300
    },
    {
      "epoch": 0.9929309178591581,
      "grad_norm": 3.109375,
      "learning_rate": 3.716805428994966e-05,
      "loss": 0.9609,
      "step": 283310
    },
    {
      "epoch": 0.9929659653660536,
      "grad_norm": 2.703125,
      "learning_rate": 3.716740526128596e-05,
      "loss": 0.9301,
      "step": 283320
    },
    {
      "epoch": 0.9930010128729493,
      "grad_norm": 2.796875,
      "learning_rate": 3.7166756232622255e-05,
      "loss": 0.85,
      "step": 283330
    },
    {
      "epoch": 0.9930360603798449,
      "grad_norm": 3.265625,
      "learning_rate": 3.7166107203958556e-05,
      "loss": 0.8878,
      "step": 283340
    },
    {
      "epoch": 0.9930711078867405,
      "grad_norm": 3.078125,
      "learning_rate": 3.716545817529485e-05,
      "loss": 0.9766,
      "step": 283350
    },
    {
      "epoch": 0.9931061553936361,
      "grad_norm": 2.765625,
      "learning_rate": 3.716480914663115e-05,
      "loss": 0.8855,
      "step": 283360
    },
    {
      "epoch": 0.9931412029005316,
      "grad_norm": 2.703125,
      "learning_rate": 3.716416011796745e-05,
      "loss": 0.8801,
      "step": 283370
    },
    {
      "epoch": 0.9931762504074273,
      "grad_norm": 2.796875,
      "learning_rate": 3.716351108930375e-05,
      "loss": 0.8386,
      "step": 283380
    },
    {
      "epoch": 0.9932112979143228,
      "grad_norm": 2.96875,
      "learning_rate": 3.716286206064005e-05,
      "loss": 0.9482,
      "step": 283390
    },
    {
      "epoch": 0.9932463454212185,
      "grad_norm": 2.78125,
      "learning_rate": 3.7162213031976344e-05,
      "loss": 0.8875,
      "step": 283400
    },
    {
      "epoch": 0.993281392928114,
      "grad_norm": 2.625,
      "learning_rate": 3.7161564003312646e-05,
      "loss": 0.909,
      "step": 283410
    },
    {
      "epoch": 0.9933164404350097,
      "grad_norm": 3.0625,
      "learning_rate": 3.716091497464894e-05,
      "loss": 0.8906,
      "step": 283420
    },
    {
      "epoch": 0.9933514879419052,
      "grad_norm": 2.390625,
      "learning_rate": 3.716026594598524e-05,
      "loss": 0.8703,
      "step": 283430
    },
    {
      "epoch": 0.9933865354488008,
      "grad_norm": 3.0,
      "learning_rate": 3.7159616917321536e-05,
      "loss": 1.0051,
      "step": 283440
    },
    {
      "epoch": 0.9934215829556965,
      "grad_norm": 3.0,
      "learning_rate": 3.715896788865784e-05,
      "loss": 0.8958,
      "step": 283450
    },
    {
      "epoch": 0.993456630462592,
      "grad_norm": 2.96875,
      "learning_rate": 3.715831885999413e-05,
      "loss": 0.9002,
      "step": 283460
    },
    {
      "epoch": 0.9934916779694877,
      "grad_norm": 2.890625,
      "learning_rate": 3.7157669831330434e-05,
      "loss": 0.9438,
      "step": 283470
    },
    {
      "epoch": 0.9935267254763832,
      "grad_norm": 2.46875,
      "learning_rate": 3.715702080266673e-05,
      "loss": 0.9052,
      "step": 283480
    },
    {
      "epoch": 0.9935617729832789,
      "grad_norm": 3.171875,
      "learning_rate": 3.715637177400303e-05,
      "loss": 0.8401,
      "step": 283490
    },
    {
      "epoch": 0.9935968204901744,
      "grad_norm": 2.6875,
      "learning_rate": 3.7155722745339324e-05,
      "loss": 0.8802,
      "step": 283500
    },
    {
      "epoch": 0.99363186799707,
      "grad_norm": 2.3125,
      "learning_rate": 3.7155073716675626e-05,
      "loss": 0.8172,
      "step": 283510
    },
    {
      "epoch": 0.9936669155039656,
      "grad_norm": 2.65625,
      "learning_rate": 3.715442468801193e-05,
      "loss": 0.9137,
      "step": 283520
    },
    {
      "epoch": 0.9937019630108612,
      "grad_norm": 3.0,
      "learning_rate": 3.715377565934822e-05,
      "loss": 0.9044,
      "step": 283530
    },
    {
      "epoch": 0.9937370105177569,
      "grad_norm": 3.125,
      "learning_rate": 3.715312663068452e-05,
      "loss": 0.8554,
      "step": 283540
    },
    {
      "epoch": 0.9937720580246524,
      "grad_norm": 2.71875,
      "learning_rate": 3.715247760202082e-05,
      "loss": 0.9394,
      "step": 283550
    },
    {
      "epoch": 0.993807105531548,
      "grad_norm": 2.875,
      "learning_rate": 3.715182857335712e-05,
      "loss": 0.921,
      "step": 283560
    },
    {
      "epoch": 0.9938421530384436,
      "grad_norm": 3.25,
      "learning_rate": 3.7151179544693414e-05,
      "loss": 0.93,
      "step": 283570
    },
    {
      "epoch": 0.9938772005453392,
      "grad_norm": 2.65625,
      "learning_rate": 3.7150530516029715e-05,
      "loss": 0.8452,
      "step": 283580
    },
    {
      "epoch": 0.9939122480522348,
      "grad_norm": 2.90625,
      "learning_rate": 3.714988148736601e-05,
      "loss": 0.9017,
      "step": 283590
    },
    {
      "epoch": 0.9939472955591304,
      "grad_norm": 3.171875,
      "learning_rate": 3.714923245870231e-05,
      "loss": 0.8985,
      "step": 283600
    },
    {
      "epoch": 0.993982343066026,
      "grad_norm": 2.828125,
      "learning_rate": 3.7148583430038606e-05,
      "loss": 0.8134,
      "step": 283610
    },
    {
      "epoch": 0.9940173905729216,
      "grad_norm": 3.390625,
      "learning_rate": 3.714793440137491e-05,
      "loss": 0.9025,
      "step": 283620
    },
    {
      "epoch": 0.9940524380798172,
      "grad_norm": 2.84375,
      "learning_rate": 3.71472853727112e-05,
      "loss": 0.8628,
      "step": 283630
    },
    {
      "epoch": 0.9940874855867128,
      "grad_norm": 3.0,
      "learning_rate": 3.71466363440475e-05,
      "loss": 0.8706,
      "step": 283640
    },
    {
      "epoch": 0.9941225330936084,
      "grad_norm": 2.734375,
      "learning_rate": 3.71459873153838e-05,
      "loss": 0.8097,
      "step": 283650
    },
    {
      "epoch": 0.994157580600504,
      "grad_norm": 2.84375,
      "learning_rate": 3.714533828672009e-05,
      "loss": 0.8402,
      "step": 283660
    },
    {
      "epoch": 0.9941926281073996,
      "grad_norm": 2.796875,
      "learning_rate": 3.7144689258056394e-05,
      "loss": 0.9141,
      "step": 283670
    },
    {
      "epoch": 0.9942276756142951,
      "grad_norm": 3.09375,
      "learning_rate": 3.714404022939269e-05,
      "loss": 0.876,
      "step": 283680
    },
    {
      "epoch": 0.9942627231211908,
      "grad_norm": 2.75,
      "learning_rate": 3.714339120072899e-05,
      "loss": 0.8762,
      "step": 283690
    },
    {
      "epoch": 0.9942977706280863,
      "grad_norm": 3.296875,
      "learning_rate": 3.7142742172065284e-05,
      "loss": 0.8463,
      "step": 283700
    },
    {
      "epoch": 0.994332818134982,
      "grad_norm": 3.046875,
      "learning_rate": 3.7142093143401586e-05,
      "loss": 0.9208,
      "step": 283710
    },
    {
      "epoch": 0.9943678656418775,
      "grad_norm": 3.640625,
      "learning_rate": 3.714144411473788e-05,
      "loss": 0.9165,
      "step": 283720
    },
    {
      "epoch": 0.9944029131487732,
      "grad_norm": 3.15625,
      "learning_rate": 3.714079508607418e-05,
      "loss": 0.9249,
      "step": 283730
    },
    {
      "epoch": 0.9944379606556688,
      "grad_norm": 2.78125,
      "learning_rate": 3.7140146057410476e-05,
      "loss": 0.8688,
      "step": 283740
    },
    {
      "epoch": 0.9944730081625643,
      "grad_norm": 2.828125,
      "learning_rate": 3.713949702874678e-05,
      "loss": 0.8667,
      "step": 283750
    },
    {
      "epoch": 0.99450805566946,
      "grad_norm": 2.953125,
      "learning_rate": 3.713884800008308e-05,
      "loss": 0.8905,
      "step": 283760
    },
    {
      "epoch": 0.9945431031763555,
      "grad_norm": 3.0625,
      "learning_rate": 3.7138198971419374e-05,
      "loss": 0.9321,
      "step": 283770
    },
    {
      "epoch": 0.9945781506832512,
      "grad_norm": 3.3125,
      "learning_rate": 3.7137549942755675e-05,
      "loss": 0.8981,
      "step": 283780
    },
    {
      "epoch": 0.9946131981901467,
      "grad_norm": 2.625,
      "learning_rate": 3.713690091409197e-05,
      "loss": 0.8767,
      "step": 283790
    },
    {
      "epoch": 0.9946482456970424,
      "grad_norm": 3.046875,
      "learning_rate": 3.713625188542827e-05,
      "loss": 0.9271,
      "step": 283800
    },
    {
      "epoch": 0.9946832932039379,
      "grad_norm": 2.9375,
      "learning_rate": 3.7135602856764566e-05,
      "loss": 0.8629,
      "step": 283810
    },
    {
      "epoch": 0.9947183407108335,
      "grad_norm": 2.75,
      "learning_rate": 3.713495382810087e-05,
      "loss": 0.9029,
      "step": 283820
    },
    {
      "epoch": 0.9947533882177292,
      "grad_norm": 2.90625,
      "learning_rate": 3.713430479943716e-05,
      "loss": 0.8162,
      "step": 283830
    },
    {
      "epoch": 0.9947884357246247,
      "grad_norm": 3.015625,
      "learning_rate": 3.713365577077346e-05,
      "loss": 0.852,
      "step": 283840
    },
    {
      "epoch": 0.9948234832315204,
      "grad_norm": 2.671875,
      "learning_rate": 3.713300674210976e-05,
      "loss": 0.8523,
      "step": 283850
    },
    {
      "epoch": 0.9948585307384159,
      "grad_norm": 3.015625,
      "learning_rate": 3.713235771344606e-05,
      "loss": 0.9923,
      "step": 283860
    },
    {
      "epoch": 0.9948935782453115,
      "grad_norm": 3.03125,
      "learning_rate": 3.7131708684782354e-05,
      "loss": 0.8728,
      "step": 283870
    },
    {
      "epoch": 0.9949286257522071,
      "grad_norm": 2.796875,
      "learning_rate": 3.7131059656118655e-05,
      "loss": 0.8678,
      "step": 283880
    },
    {
      "epoch": 0.9949636732591027,
      "grad_norm": 2.90625,
      "learning_rate": 3.713041062745496e-05,
      "loss": 0.9065,
      "step": 283890
    },
    {
      "epoch": 0.9949987207659983,
      "grad_norm": 2.921875,
      "learning_rate": 3.712976159879125e-05,
      "loss": 0.8122,
      "step": 283900
    },
    {
      "epoch": 0.9950337682728939,
      "grad_norm": 3.21875,
      "learning_rate": 3.712911257012755e-05,
      "loss": 0.8344,
      "step": 283910
    },
    {
      "epoch": 0.9950688157797895,
      "grad_norm": 3.34375,
      "learning_rate": 3.712846354146385e-05,
      "loss": 0.8704,
      "step": 283920
    },
    {
      "epoch": 0.9951038632866851,
      "grad_norm": 2.734375,
      "learning_rate": 3.712781451280015e-05,
      "loss": 0.8341,
      "step": 283930
    },
    {
      "epoch": 0.9951389107935807,
      "grad_norm": 3.171875,
      "learning_rate": 3.712716548413644e-05,
      "loss": 0.9119,
      "step": 283940
    },
    {
      "epoch": 0.9951739583004763,
      "grad_norm": 2.6875,
      "learning_rate": 3.7126516455472745e-05,
      "loss": 0.8841,
      "step": 283950
    },
    {
      "epoch": 0.9952090058073719,
      "grad_norm": 2.796875,
      "learning_rate": 3.712586742680904e-05,
      "loss": 0.8336,
      "step": 283960
    },
    {
      "epoch": 0.9952440533142675,
      "grad_norm": 3.203125,
      "learning_rate": 3.712521839814534e-05,
      "loss": 0.9897,
      "step": 283970
    },
    {
      "epoch": 0.9952791008211631,
      "grad_norm": 3.25,
      "learning_rate": 3.7124569369481635e-05,
      "loss": 1.0136,
      "step": 283980
    },
    {
      "epoch": 0.9953141483280586,
      "grad_norm": 2.375,
      "learning_rate": 3.712392034081794e-05,
      "loss": 0.8069,
      "step": 283990
    },
    {
      "epoch": 0.9953491958349543,
      "grad_norm": 2.65625,
      "learning_rate": 3.712327131215423e-05,
      "loss": 0.7459,
      "step": 284000
    },
    {
      "epoch": 0.9953842433418498,
      "grad_norm": 3.140625,
      "learning_rate": 3.712262228349053e-05,
      "loss": 0.8967,
      "step": 284010
    },
    {
      "epoch": 0.9954192908487455,
      "grad_norm": 2.796875,
      "learning_rate": 3.7121973254826834e-05,
      "loss": 0.8049,
      "step": 284020
    },
    {
      "epoch": 0.9954543383556411,
      "grad_norm": 2.984375,
      "learning_rate": 3.712132422616312e-05,
      "loss": 0.8256,
      "step": 284030
    },
    {
      "epoch": 0.9954893858625367,
      "grad_norm": 2.84375,
      "learning_rate": 3.712067519749942e-05,
      "loss": 0.8411,
      "step": 284040
    },
    {
      "epoch": 0.9955244333694323,
      "grad_norm": 2.8125,
      "learning_rate": 3.712002616883572e-05,
      "loss": 0.8509,
      "step": 284050
    },
    {
      "epoch": 0.9955594808763278,
      "grad_norm": 2.640625,
      "learning_rate": 3.711937714017202e-05,
      "loss": 0.8767,
      "step": 284060
    },
    {
      "epoch": 0.9955945283832235,
      "grad_norm": 2.84375,
      "learning_rate": 3.7118728111508314e-05,
      "loss": 0.8871,
      "step": 284070
    },
    {
      "epoch": 0.995629575890119,
      "grad_norm": 2.75,
      "learning_rate": 3.7118079082844615e-05,
      "loss": 0.84,
      "step": 284080
    },
    {
      "epoch": 0.9956646233970147,
      "grad_norm": 3.109375,
      "learning_rate": 3.711743005418091e-05,
      "loss": 0.9728,
      "step": 284090
    },
    {
      "epoch": 0.9956996709039102,
      "grad_norm": 3.421875,
      "learning_rate": 3.711678102551721e-05,
      "loss": 0.8878,
      "step": 284100
    },
    {
      "epoch": 0.9957347184108059,
      "grad_norm": 3.0,
      "learning_rate": 3.711613199685351e-05,
      "loss": 0.9753,
      "step": 284110
    },
    {
      "epoch": 0.9957697659177015,
      "grad_norm": 2.65625,
      "learning_rate": 3.711548296818981e-05,
      "loss": 0.8537,
      "step": 284120
    },
    {
      "epoch": 0.995804813424597,
      "grad_norm": 3.421875,
      "learning_rate": 3.711483393952611e-05,
      "loss": 0.9312,
      "step": 284130
    },
    {
      "epoch": 0.9958398609314927,
      "grad_norm": 3.03125,
      "learning_rate": 3.71141849108624e-05,
      "loss": 0.9056,
      "step": 284140
    },
    {
      "epoch": 0.9958749084383882,
      "grad_norm": 2.9375,
      "learning_rate": 3.7113535882198705e-05,
      "loss": 0.872,
      "step": 284150
    },
    {
      "epoch": 0.9959099559452839,
      "grad_norm": 3.171875,
      "learning_rate": 3.7112886853535e-05,
      "loss": 0.9089,
      "step": 284160
    },
    {
      "epoch": 0.9959450034521794,
      "grad_norm": 2.96875,
      "learning_rate": 3.71122378248713e-05,
      "loss": 0.8549,
      "step": 284170
    },
    {
      "epoch": 0.995980050959075,
      "grad_norm": 2.96875,
      "learning_rate": 3.7111588796207595e-05,
      "loss": 0.8829,
      "step": 284180
    },
    {
      "epoch": 0.9960150984659706,
      "grad_norm": 3.171875,
      "learning_rate": 3.71109397675439e-05,
      "loss": 0.9045,
      "step": 284190
    },
    {
      "epoch": 0.9960501459728662,
      "grad_norm": 2.828125,
      "learning_rate": 3.711029073888019e-05,
      "loss": 0.8942,
      "step": 284200
    },
    {
      "epoch": 0.9960851934797618,
      "grad_norm": 3.484375,
      "learning_rate": 3.710964171021649e-05,
      "loss": 0.927,
      "step": 284210
    },
    {
      "epoch": 0.9961202409866574,
      "grad_norm": 2.90625,
      "learning_rate": 3.710899268155279e-05,
      "loss": 0.8051,
      "step": 284220
    },
    {
      "epoch": 0.9961552884935531,
      "grad_norm": 3.328125,
      "learning_rate": 3.710834365288909e-05,
      "loss": 0.8876,
      "step": 284230
    },
    {
      "epoch": 0.9961903360004486,
      "grad_norm": 2.828125,
      "learning_rate": 3.710769462422538e-05,
      "loss": 0.8745,
      "step": 284240
    },
    {
      "epoch": 0.9962253835073442,
      "grad_norm": 2.5,
      "learning_rate": 3.7107045595561685e-05,
      "loss": 0.8995,
      "step": 284250
    },
    {
      "epoch": 0.9962604310142398,
      "grad_norm": 3.0625,
      "learning_rate": 3.7106396566897986e-05,
      "loss": 0.824,
      "step": 284260
    },
    {
      "epoch": 0.9962954785211354,
      "grad_norm": 2.9375,
      "learning_rate": 3.710574753823428e-05,
      "loss": 0.8419,
      "step": 284270
    },
    {
      "epoch": 0.996330526028031,
      "grad_norm": 3.140625,
      "learning_rate": 3.710509850957058e-05,
      "loss": 0.988,
      "step": 284280
    },
    {
      "epoch": 0.9963655735349266,
      "grad_norm": 2.890625,
      "learning_rate": 3.710444948090688e-05,
      "loss": 0.9118,
      "step": 284290
    },
    {
      "epoch": 0.9964006210418221,
      "grad_norm": 2.703125,
      "learning_rate": 3.710380045224318e-05,
      "loss": 0.9064,
      "step": 284300
    },
    {
      "epoch": 0.9964356685487178,
      "grad_norm": 2.890625,
      "learning_rate": 3.710315142357947e-05,
      "loss": 0.9196,
      "step": 284310
    },
    {
      "epoch": 0.9964707160556134,
      "grad_norm": 2.921875,
      "learning_rate": 3.7102502394915774e-05,
      "loss": 0.8022,
      "step": 284320
    },
    {
      "epoch": 0.996505763562509,
      "grad_norm": 2.625,
      "learning_rate": 3.710185336625207e-05,
      "loss": 0.8119,
      "step": 284330
    },
    {
      "epoch": 0.9965408110694046,
      "grad_norm": 2.703125,
      "learning_rate": 3.710120433758837e-05,
      "loss": 0.8468,
      "step": 284340
    },
    {
      "epoch": 0.9965758585763002,
      "grad_norm": 2.796875,
      "learning_rate": 3.7100555308924665e-05,
      "loss": 0.9036,
      "step": 284350
    },
    {
      "epoch": 0.9966109060831958,
      "grad_norm": 3.5,
      "learning_rate": 3.7099906280260966e-05,
      "loss": 0.9366,
      "step": 284360
    },
    {
      "epoch": 0.9966459535900913,
      "grad_norm": 2.71875,
      "learning_rate": 3.709925725159726e-05,
      "loss": 0.9462,
      "step": 284370
    },
    {
      "epoch": 0.996681001096987,
      "grad_norm": 2.921875,
      "learning_rate": 3.709860822293356e-05,
      "loss": 0.9042,
      "step": 284380
    },
    {
      "epoch": 0.9967160486038825,
      "grad_norm": 2.984375,
      "learning_rate": 3.7097959194269863e-05,
      "loss": 0.8793,
      "step": 284390
    },
    {
      "epoch": 0.9967510961107782,
      "grad_norm": 3.03125,
      "learning_rate": 3.709731016560616e-05,
      "loss": 0.8594,
      "step": 284400
    },
    {
      "epoch": 0.9967861436176737,
      "grad_norm": 2.609375,
      "learning_rate": 3.709666113694245e-05,
      "loss": 0.8046,
      "step": 284410
    },
    {
      "epoch": 0.9968211911245694,
      "grad_norm": 2.84375,
      "learning_rate": 3.709601210827875e-05,
      "loss": 0.9193,
      "step": 284420
    },
    {
      "epoch": 0.996856238631465,
      "grad_norm": 2.78125,
      "learning_rate": 3.709536307961505e-05,
      "loss": 0.8124,
      "step": 284430
    },
    {
      "epoch": 0.9968912861383605,
      "grad_norm": 3.265625,
      "learning_rate": 3.709471405095134e-05,
      "loss": 0.9002,
      "step": 284440
    },
    {
      "epoch": 0.9969263336452562,
      "grad_norm": 2.65625,
      "learning_rate": 3.7094065022287645e-05,
      "loss": 0.7843,
      "step": 284450
    },
    {
      "epoch": 0.9969613811521517,
      "grad_norm": 2.578125,
      "learning_rate": 3.709341599362394e-05,
      "loss": 0.8875,
      "step": 284460
    },
    {
      "epoch": 0.9969964286590474,
      "grad_norm": 3.125,
      "learning_rate": 3.709276696496024e-05,
      "loss": 0.8824,
      "step": 284470
    },
    {
      "epoch": 0.9970314761659429,
      "grad_norm": 2.765625,
      "learning_rate": 3.709211793629654e-05,
      "loss": 0.9712,
      "step": 284480
    },
    {
      "epoch": 0.9970665236728385,
      "grad_norm": 3.03125,
      "learning_rate": 3.709146890763284e-05,
      "loss": 0.9013,
      "step": 284490
    },
    {
      "epoch": 0.9971015711797341,
      "grad_norm": 2.953125,
      "learning_rate": 3.709081987896914e-05,
      "loss": 0.8326,
      "step": 284500
    },
    {
      "epoch": 0.9971366186866297,
      "grad_norm": 2.921875,
      "learning_rate": 3.709017085030543e-05,
      "loss": 0.8368,
      "step": 284510
    },
    {
      "epoch": 0.9971716661935254,
      "grad_norm": 2.703125,
      "learning_rate": 3.7089521821641734e-05,
      "loss": 0.9081,
      "step": 284520
    },
    {
      "epoch": 0.9972067137004209,
      "grad_norm": 2.984375,
      "learning_rate": 3.708887279297803e-05,
      "loss": 0.9104,
      "step": 284530
    },
    {
      "epoch": 0.9972417612073166,
      "grad_norm": 2.734375,
      "learning_rate": 3.708822376431433e-05,
      "loss": 0.8056,
      "step": 284540
    },
    {
      "epoch": 0.9972768087142121,
      "grad_norm": 2.9375,
      "learning_rate": 3.7087574735650625e-05,
      "loss": 0.8403,
      "step": 284550
    },
    {
      "epoch": 0.9973118562211077,
      "grad_norm": 2.84375,
      "learning_rate": 3.7086925706986926e-05,
      "loss": 0.8711,
      "step": 284560
    },
    {
      "epoch": 0.9973469037280033,
      "grad_norm": 2.4375,
      "learning_rate": 3.708627667832322e-05,
      "loss": 0.8532,
      "step": 284570
    },
    {
      "epoch": 0.9973819512348989,
      "grad_norm": 3.265625,
      "learning_rate": 3.708562764965952e-05,
      "loss": 0.8899,
      "step": 284580
    },
    {
      "epoch": 0.9974169987417945,
      "grad_norm": 2.890625,
      "learning_rate": 3.708497862099582e-05,
      "loss": 0.8505,
      "step": 284590
    },
    {
      "epoch": 0.9974520462486901,
      "grad_norm": 3.1875,
      "learning_rate": 3.708432959233212e-05,
      "loss": 0.8743,
      "step": 284600
    },
    {
      "epoch": 0.9974870937555856,
      "grad_norm": 3.0625,
      "learning_rate": 3.708368056366841e-05,
      "loss": 0.8582,
      "step": 284610
    },
    {
      "epoch": 0.9975221412624813,
      "grad_norm": 3.28125,
      "learning_rate": 3.7083031535004714e-05,
      "loss": 0.8024,
      "step": 284620
    },
    {
      "epoch": 0.9975571887693769,
      "grad_norm": 3.140625,
      "learning_rate": 3.7082382506341015e-05,
      "loss": 0.9466,
      "step": 284630
    },
    {
      "epoch": 0.9975922362762725,
      "grad_norm": 3.171875,
      "learning_rate": 3.708173347767731e-05,
      "loss": 0.8982,
      "step": 284640
    },
    {
      "epoch": 0.9976272837831681,
      "grad_norm": 3.15625,
      "learning_rate": 3.708108444901361e-05,
      "loss": 0.8415,
      "step": 284650
    },
    {
      "epoch": 0.9976623312900637,
      "grad_norm": 2.96875,
      "learning_rate": 3.7080435420349906e-05,
      "loss": 0.9378,
      "step": 284660
    },
    {
      "epoch": 0.9976973787969593,
      "grad_norm": 2.984375,
      "learning_rate": 3.707978639168621e-05,
      "loss": 0.8741,
      "step": 284670
    },
    {
      "epoch": 0.9977324263038548,
      "grad_norm": 2.78125,
      "learning_rate": 3.70791373630225e-05,
      "loss": 0.8222,
      "step": 284680
    },
    {
      "epoch": 0.9977674738107505,
      "grad_norm": 2.765625,
      "learning_rate": 3.7078488334358803e-05,
      "loss": 0.873,
      "step": 284690
    },
    {
      "epoch": 0.997802521317646,
      "grad_norm": 2.796875,
      "learning_rate": 3.70778393056951e-05,
      "loss": 0.7964,
      "step": 284700
    },
    {
      "epoch": 0.9978375688245417,
      "grad_norm": 3.015625,
      "learning_rate": 3.70771902770314e-05,
      "loss": 0.9212,
      "step": 284710
    },
    {
      "epoch": 0.9978726163314373,
      "grad_norm": 2.8125,
      "learning_rate": 3.7076541248367694e-05,
      "loss": 0.7931,
      "step": 284720
    },
    {
      "epoch": 0.9979076638383328,
      "grad_norm": 2.859375,
      "learning_rate": 3.7075892219703995e-05,
      "loss": 0.8563,
      "step": 284730
    },
    {
      "epoch": 0.9979427113452285,
      "grad_norm": 2.984375,
      "learning_rate": 3.707524319104029e-05,
      "loss": 0.8727,
      "step": 284740
    },
    {
      "epoch": 0.997977758852124,
      "grad_norm": 3.078125,
      "learning_rate": 3.707459416237659e-05,
      "loss": 0.901,
      "step": 284750
    },
    {
      "epoch": 0.9980128063590197,
      "grad_norm": 3.03125,
      "learning_rate": 3.707394513371289e-05,
      "loss": 0.8785,
      "step": 284760
    },
    {
      "epoch": 0.9980478538659152,
      "grad_norm": 3.21875,
      "learning_rate": 3.707329610504919e-05,
      "loss": 0.8478,
      "step": 284770
    },
    {
      "epoch": 0.9980829013728109,
      "grad_norm": 2.953125,
      "learning_rate": 3.707264707638548e-05,
      "loss": 0.886,
      "step": 284780
    },
    {
      "epoch": 0.9981179488797064,
      "grad_norm": 2.921875,
      "learning_rate": 3.707199804772178e-05,
      "loss": 0.8382,
      "step": 284790
    },
    {
      "epoch": 0.998152996386602,
      "grad_norm": 2.953125,
      "learning_rate": 3.707134901905808e-05,
      "loss": 0.899,
      "step": 284800
    },
    {
      "epoch": 0.9981880438934977,
      "grad_norm": 3.3125,
      "learning_rate": 3.707069999039437e-05,
      "loss": 0.9281,
      "step": 284810
    },
    {
      "epoch": 0.9982230914003932,
      "grad_norm": 2.828125,
      "learning_rate": 3.7070050961730674e-05,
      "loss": 0.8368,
      "step": 284820
    },
    {
      "epoch": 0.9982581389072889,
      "grad_norm": 3.109375,
      "learning_rate": 3.706940193306697e-05,
      "loss": 0.7615,
      "step": 284830
    },
    {
      "epoch": 0.9982931864141844,
      "grad_norm": 2.890625,
      "learning_rate": 3.706875290440327e-05,
      "loss": 0.8939,
      "step": 284840
    },
    {
      "epoch": 0.99832823392108,
      "grad_norm": 2.859375,
      "learning_rate": 3.706810387573957e-05,
      "loss": 0.8874,
      "step": 284850
    },
    {
      "epoch": 0.9983632814279756,
      "grad_norm": 2.59375,
      "learning_rate": 3.7067454847075866e-05,
      "loss": 0.8333,
      "step": 284860
    },
    {
      "epoch": 0.9983983289348712,
      "grad_norm": 2.984375,
      "learning_rate": 3.706680581841217e-05,
      "loss": 0.9136,
      "step": 284870
    },
    {
      "epoch": 0.9984333764417668,
      "grad_norm": 2.890625,
      "learning_rate": 3.706615678974846e-05,
      "loss": 0.8804,
      "step": 284880
    },
    {
      "epoch": 0.9984684239486624,
      "grad_norm": 2.640625,
      "learning_rate": 3.706550776108476e-05,
      "loss": 0.8136,
      "step": 284890
    },
    {
      "epoch": 0.998503471455558,
      "grad_norm": 2.875,
      "learning_rate": 3.706485873242106e-05,
      "loss": 0.9023,
      "step": 284900
    },
    {
      "epoch": 0.9985385189624536,
      "grad_norm": 2.765625,
      "learning_rate": 3.706420970375736e-05,
      "loss": 0.9144,
      "step": 284910
    },
    {
      "epoch": 0.9985735664693492,
      "grad_norm": 2.953125,
      "learning_rate": 3.7063560675093654e-05,
      "loss": 0.8103,
      "step": 284920
    },
    {
      "epoch": 0.9986086139762448,
      "grad_norm": 2.875,
      "learning_rate": 3.7062911646429955e-05,
      "loss": 0.8707,
      "step": 284930
    },
    {
      "epoch": 0.9986436614831404,
      "grad_norm": 2.9375,
      "learning_rate": 3.706226261776625e-05,
      "loss": 0.9189,
      "step": 284940
    },
    {
      "epoch": 0.998678708990036,
      "grad_norm": 2.953125,
      "learning_rate": 3.706161358910255e-05,
      "loss": 0.848,
      "step": 284950
    },
    {
      "epoch": 0.9987137564969316,
      "grad_norm": 3.265625,
      "learning_rate": 3.7060964560438846e-05,
      "loss": 0.8388,
      "step": 284960
    },
    {
      "epoch": 0.9987488040038272,
      "grad_norm": 2.96875,
      "learning_rate": 3.706031553177515e-05,
      "loss": 0.9069,
      "step": 284970
    },
    {
      "epoch": 0.9987838515107228,
      "grad_norm": 2.734375,
      "learning_rate": 3.705966650311145e-05,
      "loss": 0.9575,
      "step": 284980
    },
    {
      "epoch": 0.9988188990176183,
      "grad_norm": 2.453125,
      "learning_rate": 3.705901747444774e-05,
      "loss": 0.8243,
      "step": 284990
    },
    {
      "epoch": 0.998853946524514,
      "grad_norm": 2.9375,
      "learning_rate": 3.7058368445784045e-05,
      "loss": 0.8164,
      "step": 285000
    },
    {
      "epoch": 0.998853946524514,
      "eval_loss": 0.8206360340118408,
      "eval_runtime": 556.318,
      "eval_samples_per_second": 683.846,
      "eval_steps_per_second": 56.987,
      "step": 285000
    },
    {
      "epoch": 0.9988889940314096,
      "grad_norm": 3.359375,
      "learning_rate": 3.705771941712034e-05,
      "loss": 0.9145,
      "step": 285010
    },
    {
      "epoch": 0.9989240415383052,
      "grad_norm": 2.78125,
      "learning_rate": 3.705707038845664e-05,
      "loss": 0.8915,
      "step": 285020
    },
    {
      "epoch": 0.9989590890452008,
      "grad_norm": 2.953125,
      "learning_rate": 3.7056421359792935e-05,
      "loss": 0.96,
      "step": 285030
    },
    {
      "epoch": 0.9989941365520963,
      "grad_norm": 2.953125,
      "learning_rate": 3.705577233112924e-05,
      "loss": 0.8931,
      "step": 285040
    },
    {
      "epoch": 0.999029184058992,
      "grad_norm": 2.78125,
      "learning_rate": 3.705512330246553e-05,
      "loss": 0.8975,
      "step": 285050
    },
    {
      "epoch": 0.9990642315658875,
      "grad_norm": 3.484375,
      "learning_rate": 3.705447427380183e-05,
      "loss": 0.8157,
      "step": 285060
    },
    {
      "epoch": 0.9990992790727832,
      "grad_norm": 2.671875,
      "learning_rate": 3.705382524513813e-05,
      "loss": 0.7838,
      "step": 285070
    },
    {
      "epoch": 0.9991343265796787,
      "grad_norm": 2.953125,
      "learning_rate": 3.705317621647443e-05,
      "loss": 0.8707,
      "step": 285080
    },
    {
      "epoch": 0.9991693740865744,
      "grad_norm": 3.125,
      "learning_rate": 3.705252718781072e-05,
      "loss": 0.8576,
      "step": 285090
    },
    {
      "epoch": 0.9992044215934699,
      "grad_norm": 2.5625,
      "learning_rate": 3.7051878159147025e-05,
      "loss": 0.9124,
      "step": 285100
    },
    {
      "epoch": 0.9992394691003655,
      "grad_norm": 2.921875,
      "learning_rate": 3.705122913048332e-05,
      "loss": 0.8752,
      "step": 285110
    },
    {
      "epoch": 0.9992745166072612,
      "grad_norm": 2.890625,
      "learning_rate": 3.705058010181962e-05,
      "loss": 0.8532,
      "step": 285120
    },
    {
      "epoch": 0.9993095641141567,
      "grad_norm": 2.859375,
      "learning_rate": 3.704993107315592e-05,
      "loss": 0.8597,
      "step": 285130
    },
    {
      "epoch": 0.9993446116210524,
      "grad_norm": 3.21875,
      "learning_rate": 3.704928204449222e-05,
      "loss": 0.8997,
      "step": 285140
    },
    {
      "epoch": 0.9993796591279479,
      "grad_norm": 3.421875,
      "learning_rate": 3.704863301582852e-05,
      "loss": 0.9432,
      "step": 285150
    },
    {
      "epoch": 0.9994147066348436,
      "grad_norm": 2.734375,
      "learning_rate": 3.7047983987164806e-05,
      "loss": 0.8838,
      "step": 285160
    },
    {
      "epoch": 0.9994497541417391,
      "grad_norm": 2.734375,
      "learning_rate": 3.704733495850111e-05,
      "loss": 0.867,
      "step": 285170
    },
    {
      "epoch": 0.9994848016486347,
      "grad_norm": 2.671875,
      "learning_rate": 3.70466859298374e-05,
      "loss": 0.9072,
      "step": 285180
    },
    {
      "epoch": 0.9995198491555303,
      "grad_norm": 2.9375,
      "learning_rate": 3.70460369011737e-05,
      "loss": 0.8545,
      "step": 285190
    },
    {
      "epoch": 0.9995548966624259,
      "grad_norm": 2.828125,
      "learning_rate": 3.704538787251e-05,
      "loss": 0.8204,
      "step": 285200
    },
    {
      "epoch": 0.9995899441693216,
      "grad_norm": 2.859375,
      "learning_rate": 3.70447388438463e-05,
      "loss": 0.8112,
      "step": 285210
    },
    {
      "epoch": 0.9996249916762171,
      "grad_norm": 3.0,
      "learning_rate": 3.70440898151826e-05,
      "loss": 0.9139,
      "step": 285220
    },
    {
      "epoch": 0.9996600391831127,
      "grad_norm": 2.703125,
      "learning_rate": 3.7043440786518895e-05,
      "loss": 0.8399,
      "step": 285230
    },
    {
      "epoch": 0.9996950866900083,
      "grad_norm": 3.09375,
      "learning_rate": 3.70427917578552e-05,
      "loss": 0.8112,
      "step": 285240
    },
    {
      "epoch": 0.9997301341969039,
      "grad_norm": 2.265625,
      "learning_rate": 3.704214272919149e-05,
      "loss": 0.8673,
      "step": 285250
    },
    {
      "epoch": 0.9997651817037995,
      "grad_norm": 2.71875,
      "learning_rate": 3.704149370052779e-05,
      "loss": 0.8743,
      "step": 285260
    },
    {
      "epoch": 0.9998002292106951,
      "grad_norm": 3.046875,
      "learning_rate": 3.704084467186409e-05,
      "loss": 0.8908,
      "step": 285270
    },
    {
      "epoch": 0.9998352767175906,
      "grad_norm": 2.75,
      "learning_rate": 3.704019564320039e-05,
      "loss": 0.8673,
      "step": 285280
    },
    {
      "epoch": 0.9998703242244863,
      "grad_norm": 2.890625,
      "learning_rate": 3.703954661453668e-05,
      "loss": 0.8219,
      "step": 285290
    },
    {
      "epoch": 0.9999053717313819,
      "grad_norm": 2.703125,
      "learning_rate": 3.7038897585872985e-05,
      "loss": 0.8732,
      "step": 285300
    },
    {
      "epoch": 0.9999404192382775,
      "grad_norm": 2.546875,
      "learning_rate": 3.703824855720928e-05,
      "loss": 0.8863,
      "step": 285310
    },
    {
      "epoch": 0.9999754667451731,
      "grad_norm": 3.015625,
      "learning_rate": 3.703759952854558e-05,
      "loss": 0.9328,
      "step": 285320
    },
    {
      "epoch": 1.0000105142520688,
      "grad_norm": 2.796875,
      "learning_rate": 3.7036950499881875e-05,
      "loss": 0.8301,
      "step": 285330
    },
    {
      "epoch": 1.0000455617589643,
      "grad_norm": 2.84375,
      "learning_rate": 3.703630147121818e-05,
      "loss": 0.833,
      "step": 285340
    },
    {
      "epoch": 1.0000806092658598,
      "grad_norm": 2.796875,
      "learning_rate": 3.703565244255448e-05,
      "loss": 0.7801,
      "step": 285350
    },
    {
      "epoch": 1.0001156567727554,
      "grad_norm": 3.046875,
      "learning_rate": 3.703500341389077e-05,
      "loss": 0.8374,
      "step": 285360
    },
    {
      "epoch": 1.0001507042796511,
      "grad_norm": 3.0,
      "learning_rate": 3.7034354385227074e-05,
      "loss": 0.9012,
      "step": 285370
    },
    {
      "epoch": 1.0001857517865467,
      "grad_norm": 2.65625,
      "learning_rate": 3.703370535656337e-05,
      "loss": 0.8233,
      "step": 285380
    },
    {
      "epoch": 1.0002207992934422,
      "grad_norm": 2.6875,
      "learning_rate": 3.703305632789967e-05,
      "loss": 0.914,
      "step": 285390
    },
    {
      "epoch": 1.000255846800338,
      "grad_norm": 2.84375,
      "learning_rate": 3.7032407299235965e-05,
      "loss": 0.8734,
      "step": 285400
    },
    {
      "epoch": 1.0002908943072335,
      "grad_norm": 3.125,
      "learning_rate": 3.7031758270572266e-05,
      "loss": 0.9092,
      "step": 285410
    },
    {
      "epoch": 1.000325941814129,
      "grad_norm": 3.421875,
      "learning_rate": 3.703110924190856e-05,
      "loss": 0.8568,
      "step": 285420
    },
    {
      "epoch": 1.0003609893210246,
      "grad_norm": 2.703125,
      "learning_rate": 3.703046021324486e-05,
      "loss": 0.9347,
      "step": 285430
    },
    {
      "epoch": 1.0003960368279203,
      "grad_norm": 2.84375,
      "learning_rate": 3.702981118458116e-05,
      "loss": 0.9042,
      "step": 285440
    },
    {
      "epoch": 1.0004310843348159,
      "grad_norm": 2.90625,
      "learning_rate": 3.702916215591746e-05,
      "loss": 0.8931,
      "step": 285450
    },
    {
      "epoch": 1.0004661318417114,
      "grad_norm": 2.75,
      "learning_rate": 3.702851312725375e-05,
      "loss": 0.9243,
      "step": 285460
    },
    {
      "epoch": 1.000501179348607,
      "grad_norm": 2.71875,
      "learning_rate": 3.7027864098590054e-05,
      "loss": 0.9506,
      "step": 285470
    },
    {
      "epoch": 1.0005362268555027,
      "grad_norm": 3.265625,
      "learning_rate": 3.702721506992635e-05,
      "loss": 0.8377,
      "step": 285480
    },
    {
      "epoch": 1.0005712743623982,
      "grad_norm": 2.84375,
      "learning_rate": 3.702656604126265e-05,
      "loss": 0.9303,
      "step": 285490
    },
    {
      "epoch": 1.0006063218692938,
      "grad_norm": 2.75,
      "learning_rate": 3.702591701259895e-05,
      "loss": 0.8881,
      "step": 285500
    },
    {
      "epoch": 1.0006413693761895,
      "grad_norm": 3.109375,
      "learning_rate": 3.7025267983935246e-05,
      "loss": 0.9207,
      "step": 285510
    },
    {
      "epoch": 1.000676416883085,
      "grad_norm": 3.40625,
      "learning_rate": 3.702461895527155e-05,
      "loss": 0.9439,
      "step": 285520
    },
    {
      "epoch": 1.0007114643899806,
      "grad_norm": 2.828125,
      "learning_rate": 3.702396992660784e-05,
      "loss": 0.8421,
      "step": 285530
    },
    {
      "epoch": 1.0007465118968761,
      "grad_norm": 2.4375,
      "learning_rate": 3.702332089794414e-05,
      "loss": 0.8183,
      "step": 285540
    },
    {
      "epoch": 1.000781559403772,
      "grad_norm": 3.046875,
      "learning_rate": 3.702267186928043e-05,
      "loss": 0.9531,
      "step": 285550
    },
    {
      "epoch": 1.0008166069106674,
      "grad_norm": 2.90625,
      "learning_rate": 3.702202284061673e-05,
      "loss": 0.8565,
      "step": 285560
    },
    {
      "epoch": 1.000851654417563,
      "grad_norm": 3.046875,
      "learning_rate": 3.702137381195303e-05,
      "loss": 0.9243,
      "step": 285570
    },
    {
      "epoch": 1.0008867019244585,
      "grad_norm": 2.84375,
      "learning_rate": 3.702072478328933e-05,
      "loss": 0.8386,
      "step": 285580
    },
    {
      "epoch": 1.0009217494313543,
      "grad_norm": 2.984375,
      "learning_rate": 3.702007575462563e-05,
      "loss": 0.8217,
      "step": 285590
    },
    {
      "epoch": 1.0009567969382498,
      "grad_norm": 2.953125,
      "learning_rate": 3.7019426725961925e-05,
      "loss": 0.8919,
      "step": 285600
    },
    {
      "epoch": 1.0009918444451453,
      "grad_norm": 3.1875,
      "learning_rate": 3.7018777697298226e-05,
      "loss": 0.8328,
      "step": 285610
    },
    {
      "epoch": 1.001026891952041,
      "grad_norm": 3.109375,
      "learning_rate": 3.701812866863452e-05,
      "loss": 0.8018,
      "step": 285620
    },
    {
      "epoch": 1.0010619394589366,
      "grad_norm": 2.640625,
      "learning_rate": 3.701747963997082e-05,
      "loss": 0.8829,
      "step": 285630
    },
    {
      "epoch": 1.0010969869658322,
      "grad_norm": 3.046875,
      "learning_rate": 3.701683061130712e-05,
      "loss": 0.8789,
      "step": 285640
    },
    {
      "epoch": 1.0011320344727277,
      "grad_norm": 2.953125,
      "learning_rate": 3.701618158264342e-05,
      "loss": 0.9035,
      "step": 285650
    },
    {
      "epoch": 1.0011670819796235,
      "grad_norm": 3.453125,
      "learning_rate": 3.701553255397971e-05,
      "loss": 0.9135,
      "step": 285660
    },
    {
      "epoch": 1.001202129486519,
      "grad_norm": 3.015625,
      "learning_rate": 3.7014883525316014e-05,
      "loss": 0.9161,
      "step": 285670
    },
    {
      "epoch": 1.0012371769934145,
      "grad_norm": 2.828125,
      "learning_rate": 3.701423449665231e-05,
      "loss": 0.9164,
      "step": 285680
    },
    {
      "epoch": 1.00127222450031,
      "grad_norm": 3.015625,
      "learning_rate": 3.701358546798861e-05,
      "loss": 0.837,
      "step": 285690
    },
    {
      "epoch": 1.0013072720072058,
      "grad_norm": 3.125,
      "learning_rate": 3.7012936439324905e-05,
      "loss": 0.9077,
      "step": 285700
    },
    {
      "epoch": 1.0013423195141014,
      "grad_norm": 2.984375,
      "learning_rate": 3.7012287410661206e-05,
      "loss": 0.8733,
      "step": 285710
    },
    {
      "epoch": 1.001377367020997,
      "grad_norm": 2.6875,
      "learning_rate": 3.701163838199751e-05,
      "loss": 0.7453,
      "step": 285720
    },
    {
      "epoch": 1.0014124145278926,
      "grad_norm": 3.0,
      "learning_rate": 3.70109893533338e-05,
      "loss": 0.967,
      "step": 285730
    },
    {
      "epoch": 1.0014474620347882,
      "grad_norm": 3.3125,
      "learning_rate": 3.7010340324670104e-05,
      "loss": 0.833,
      "step": 285740
    },
    {
      "epoch": 1.0014825095416837,
      "grad_norm": 3.171875,
      "learning_rate": 3.70096912960064e-05,
      "loss": 0.8931,
      "step": 285750
    },
    {
      "epoch": 1.0015175570485793,
      "grad_norm": 2.859375,
      "learning_rate": 3.70090422673427e-05,
      "loss": 0.8036,
      "step": 285760
    },
    {
      "epoch": 1.001552604555475,
      "grad_norm": 3.03125,
      "learning_rate": 3.7008393238678994e-05,
      "loss": 0.8687,
      "step": 285770
    },
    {
      "epoch": 1.0015876520623705,
      "grad_norm": 3.046875,
      "learning_rate": 3.7007744210015296e-05,
      "loss": 0.9572,
      "step": 285780
    },
    {
      "epoch": 1.001622699569266,
      "grad_norm": 3.109375,
      "learning_rate": 3.700709518135159e-05,
      "loss": 0.9305,
      "step": 285790
    },
    {
      "epoch": 1.0016577470761618,
      "grad_norm": 3.078125,
      "learning_rate": 3.700644615268789e-05,
      "loss": 0.7835,
      "step": 285800
    },
    {
      "epoch": 1.0016927945830574,
      "grad_norm": 2.9375,
      "learning_rate": 3.7005797124024186e-05,
      "loss": 0.9147,
      "step": 285810
    },
    {
      "epoch": 1.001727842089953,
      "grad_norm": 2.9375,
      "learning_rate": 3.700514809536049e-05,
      "loss": 0.8573,
      "step": 285820
    },
    {
      "epoch": 1.0017628895968484,
      "grad_norm": 2.84375,
      "learning_rate": 3.700449906669678e-05,
      "loss": 0.8908,
      "step": 285830
    },
    {
      "epoch": 1.0017979371037442,
      "grad_norm": 3.109375,
      "learning_rate": 3.7003850038033084e-05,
      "loss": 0.9123,
      "step": 285840
    },
    {
      "epoch": 1.0018329846106397,
      "grad_norm": 2.90625,
      "learning_rate": 3.700320100936938e-05,
      "loss": 0.865,
      "step": 285850
    },
    {
      "epoch": 1.0018680321175353,
      "grad_norm": 3.875,
      "learning_rate": 3.700255198070568e-05,
      "loss": 0.9205,
      "step": 285860
    },
    {
      "epoch": 1.0019030796244308,
      "grad_norm": 3.0625,
      "learning_rate": 3.700190295204198e-05,
      "loss": 0.96,
      "step": 285870
    },
    {
      "epoch": 1.0019381271313266,
      "grad_norm": 2.828125,
      "learning_rate": 3.7001253923378276e-05,
      "loss": 0.8456,
      "step": 285880
    },
    {
      "epoch": 1.001973174638222,
      "grad_norm": 2.53125,
      "learning_rate": 3.700060489471458e-05,
      "loss": 0.8013,
      "step": 285890
    },
    {
      "epoch": 1.0020082221451176,
      "grad_norm": 2.875,
      "learning_rate": 3.699995586605087e-05,
      "loss": 0.8459,
      "step": 285900
    },
    {
      "epoch": 1.0020432696520134,
      "grad_norm": 3.203125,
      "learning_rate": 3.6999306837387166e-05,
      "loss": 0.8916,
      "step": 285910
    },
    {
      "epoch": 1.002078317158909,
      "grad_norm": 2.984375,
      "learning_rate": 3.699865780872346e-05,
      "loss": 0.892,
      "step": 285920
    },
    {
      "epoch": 1.0021133646658045,
      "grad_norm": 2.734375,
      "learning_rate": 3.699800878005976e-05,
      "loss": 0.8868,
      "step": 285930
    },
    {
      "epoch": 1.0021484121727,
      "grad_norm": 2.84375,
      "learning_rate": 3.699735975139606e-05,
      "loss": 0.8637,
      "step": 285940
    },
    {
      "epoch": 1.0021834596795958,
      "grad_norm": 3.1875,
      "learning_rate": 3.699671072273236e-05,
      "loss": 0.8748,
      "step": 285950
    },
    {
      "epoch": 1.0022185071864913,
      "grad_norm": 2.53125,
      "learning_rate": 3.699606169406866e-05,
      "loss": 0.9197,
      "step": 285960
    },
    {
      "epoch": 1.0022535546933868,
      "grad_norm": 3.484375,
      "learning_rate": 3.6995412665404954e-05,
      "loss": 0.8776,
      "step": 285970
    },
    {
      "epoch": 1.0022886022002824,
      "grad_norm": 2.671875,
      "learning_rate": 3.6994763636741256e-05,
      "loss": 0.8302,
      "step": 285980
    },
    {
      "epoch": 1.0023236497071781,
      "grad_norm": 2.984375,
      "learning_rate": 3.699411460807755e-05,
      "loss": 0.9095,
      "step": 285990
    },
    {
      "epoch": 1.0023586972140737,
      "grad_norm": 2.953125,
      "learning_rate": 3.699346557941385e-05,
      "loss": 0.9247,
      "step": 286000
    },
    {
      "epoch": 1.0023937447209692,
      "grad_norm": 3.109375,
      "learning_rate": 3.6992816550750146e-05,
      "loss": 0.9303,
      "step": 286010
    },
    {
      "epoch": 1.002428792227865,
      "grad_norm": 2.40625,
      "learning_rate": 3.699216752208645e-05,
      "loss": 0.8265,
      "step": 286020
    },
    {
      "epoch": 1.0024638397347605,
      "grad_norm": 3.03125,
      "learning_rate": 3.699151849342274e-05,
      "loss": 0.8351,
      "step": 286030
    },
    {
      "epoch": 1.002498887241656,
      "grad_norm": 3.0,
      "learning_rate": 3.6990869464759044e-05,
      "loss": 0.8312,
      "step": 286040
    },
    {
      "epoch": 1.0025339347485516,
      "grad_norm": 3.3125,
      "learning_rate": 3.699022043609534e-05,
      "loss": 0.8042,
      "step": 286050
    },
    {
      "epoch": 1.0025689822554473,
      "grad_norm": 3.109375,
      "learning_rate": 3.698957140743164e-05,
      "loss": 0.8264,
      "step": 286060
    },
    {
      "epoch": 1.0026040297623429,
      "grad_norm": 2.796875,
      "learning_rate": 3.6988922378767934e-05,
      "loss": 0.8073,
      "step": 286070
    },
    {
      "epoch": 1.0026390772692384,
      "grad_norm": 2.578125,
      "learning_rate": 3.6988273350104236e-05,
      "loss": 0.8241,
      "step": 286080
    },
    {
      "epoch": 1.0026741247761342,
      "grad_norm": 2.875,
      "learning_rate": 3.698762432144054e-05,
      "loss": 0.8602,
      "step": 286090
    },
    {
      "epoch": 1.0027091722830297,
      "grad_norm": 2.59375,
      "learning_rate": 3.698697529277683e-05,
      "loss": 0.9149,
      "step": 286100
    },
    {
      "epoch": 1.0027442197899252,
      "grad_norm": 2.8125,
      "learning_rate": 3.698632626411313e-05,
      "loss": 0.9299,
      "step": 286110
    },
    {
      "epoch": 1.0027792672968208,
      "grad_norm": 2.734375,
      "learning_rate": 3.698567723544943e-05,
      "loss": 0.8543,
      "step": 286120
    },
    {
      "epoch": 1.0028143148037165,
      "grad_norm": 3.234375,
      "learning_rate": 3.698502820678573e-05,
      "loss": 0.9498,
      "step": 286130
    },
    {
      "epoch": 1.002849362310612,
      "grad_norm": 3.171875,
      "learning_rate": 3.6984379178122024e-05,
      "loss": 0.8917,
      "step": 286140
    },
    {
      "epoch": 1.0028844098175076,
      "grad_norm": 2.359375,
      "learning_rate": 3.6983730149458325e-05,
      "loss": 0.8167,
      "step": 286150
    },
    {
      "epoch": 1.0029194573244031,
      "grad_norm": 2.578125,
      "learning_rate": 3.698308112079462e-05,
      "loss": 0.841,
      "step": 286160
    },
    {
      "epoch": 1.0029545048312989,
      "grad_norm": 2.984375,
      "learning_rate": 3.698243209213092e-05,
      "loss": 0.8058,
      "step": 286170
    },
    {
      "epoch": 1.0029895523381944,
      "grad_norm": 3.0,
      "learning_rate": 3.6981783063467216e-05,
      "loss": 0.9233,
      "step": 286180
    },
    {
      "epoch": 1.00302459984509,
      "grad_norm": 3.078125,
      "learning_rate": 3.698113403480352e-05,
      "loss": 0.8136,
      "step": 286190
    },
    {
      "epoch": 1.0030596473519857,
      "grad_norm": 2.921875,
      "learning_rate": 3.698048500613981e-05,
      "loss": 0.8794,
      "step": 286200
    },
    {
      "epoch": 1.0030946948588813,
      "grad_norm": 3.296875,
      "learning_rate": 3.697983597747611e-05,
      "loss": 0.8987,
      "step": 286210
    },
    {
      "epoch": 1.0031297423657768,
      "grad_norm": 3.21875,
      "learning_rate": 3.6979186948812414e-05,
      "loss": 0.8991,
      "step": 286220
    },
    {
      "epoch": 1.0031647898726723,
      "grad_norm": 3.390625,
      "learning_rate": 3.697853792014871e-05,
      "loss": 0.8477,
      "step": 286230
    },
    {
      "epoch": 1.003199837379568,
      "grad_norm": 2.5,
      "learning_rate": 3.697788889148501e-05,
      "loss": 0.852,
      "step": 286240
    },
    {
      "epoch": 1.0032348848864636,
      "grad_norm": 2.890625,
      "learning_rate": 3.6977239862821305e-05,
      "loss": 0.7586,
      "step": 286250
    },
    {
      "epoch": 1.0032699323933592,
      "grad_norm": 2.78125,
      "learning_rate": 3.6976590834157606e-05,
      "loss": 0.8844,
      "step": 286260
    },
    {
      "epoch": 1.0033049799002547,
      "grad_norm": 3.15625,
      "learning_rate": 3.69759418054939e-05,
      "loss": 0.8002,
      "step": 286270
    },
    {
      "epoch": 1.0033400274071504,
      "grad_norm": 2.75,
      "learning_rate": 3.69752927768302e-05,
      "loss": 0.8046,
      "step": 286280
    },
    {
      "epoch": 1.003375074914046,
      "grad_norm": 2.640625,
      "learning_rate": 3.697464374816649e-05,
      "loss": 0.8067,
      "step": 286290
    },
    {
      "epoch": 1.0034101224209415,
      "grad_norm": 3.0,
      "learning_rate": 3.697399471950279e-05,
      "loss": 0.9393,
      "step": 286300
    },
    {
      "epoch": 1.0034451699278373,
      "grad_norm": 2.953125,
      "learning_rate": 3.697334569083909e-05,
      "loss": 0.8522,
      "step": 286310
    },
    {
      "epoch": 1.0034802174347328,
      "grad_norm": 3.109375,
      "learning_rate": 3.697269666217539e-05,
      "loss": 0.852,
      "step": 286320
    },
    {
      "epoch": 1.0035152649416283,
      "grad_norm": 2.65625,
      "learning_rate": 3.697204763351169e-05,
      "loss": 0.9282,
      "step": 286330
    },
    {
      "epoch": 1.0035503124485239,
      "grad_norm": 2.4375,
      "learning_rate": 3.6971398604847984e-05,
      "loss": 0.7947,
      "step": 286340
    },
    {
      "epoch": 1.0035853599554196,
      "grad_norm": 2.953125,
      "learning_rate": 3.6970749576184285e-05,
      "loss": 0.9234,
      "step": 286350
    },
    {
      "epoch": 1.0036204074623152,
      "grad_norm": 2.90625,
      "learning_rate": 3.697010054752058e-05,
      "loss": 0.8918,
      "step": 286360
    },
    {
      "epoch": 1.0036554549692107,
      "grad_norm": 2.96875,
      "learning_rate": 3.696945151885688e-05,
      "loss": 0.872,
      "step": 286370
    },
    {
      "epoch": 1.0036905024761065,
      "grad_norm": 3.640625,
      "learning_rate": 3.6968802490193176e-05,
      "loss": 0.9879,
      "step": 286380
    },
    {
      "epoch": 1.003725549983002,
      "grad_norm": 3.15625,
      "learning_rate": 3.696815346152948e-05,
      "loss": 0.8149,
      "step": 286390
    },
    {
      "epoch": 1.0037605974898975,
      "grad_norm": 3.203125,
      "learning_rate": 3.696750443286577e-05,
      "loss": 0.881,
      "step": 286400
    },
    {
      "epoch": 1.003795644996793,
      "grad_norm": 3.421875,
      "learning_rate": 3.696685540420207e-05,
      "loss": 0.8762,
      "step": 286410
    },
    {
      "epoch": 1.0038306925036888,
      "grad_norm": 2.859375,
      "learning_rate": 3.696620637553837e-05,
      "loss": 0.8573,
      "step": 286420
    },
    {
      "epoch": 1.0038657400105844,
      "grad_norm": 2.984375,
      "learning_rate": 3.696555734687467e-05,
      "loss": 0.8846,
      "step": 286430
    },
    {
      "epoch": 1.00390078751748,
      "grad_norm": 2.984375,
      "learning_rate": 3.6964908318210964e-05,
      "loss": 0.8977,
      "step": 286440
    },
    {
      "epoch": 1.0039358350243754,
      "grad_norm": 2.71875,
      "learning_rate": 3.6964259289547265e-05,
      "loss": 0.8995,
      "step": 286450
    },
    {
      "epoch": 1.0039708825312712,
      "grad_norm": 3.125,
      "learning_rate": 3.6963610260883566e-05,
      "loss": 0.918,
      "step": 286460
    },
    {
      "epoch": 1.0040059300381667,
      "grad_norm": 2.796875,
      "learning_rate": 3.696296123221986e-05,
      "loss": 0.8176,
      "step": 286470
    },
    {
      "epoch": 1.0040409775450623,
      "grad_norm": 2.921875,
      "learning_rate": 3.696231220355616e-05,
      "loss": 0.8367,
      "step": 286480
    },
    {
      "epoch": 1.004076025051958,
      "grad_norm": 2.828125,
      "learning_rate": 3.696166317489246e-05,
      "loss": 0.8376,
      "step": 286490
    },
    {
      "epoch": 1.0041110725588536,
      "grad_norm": 3.140625,
      "learning_rate": 3.696101414622876e-05,
      "loss": 0.8433,
      "step": 286500
    },
    {
      "epoch": 1.004146120065749,
      "grad_norm": 2.796875,
      "learning_rate": 3.696036511756505e-05,
      "loss": 0.8563,
      "step": 286510
    },
    {
      "epoch": 1.0041811675726446,
      "grad_norm": 3.765625,
      "learning_rate": 3.6959716088901354e-05,
      "loss": 0.7957,
      "step": 286520
    },
    {
      "epoch": 1.0042162150795404,
      "grad_norm": 3.1875,
      "learning_rate": 3.695906706023765e-05,
      "loss": 0.8024,
      "step": 286530
    },
    {
      "epoch": 1.004251262586436,
      "grad_norm": 3.25,
      "learning_rate": 3.695841803157395e-05,
      "loss": 0.9346,
      "step": 286540
    },
    {
      "epoch": 1.0042863100933315,
      "grad_norm": 2.75,
      "learning_rate": 3.6957769002910245e-05,
      "loss": 0.8597,
      "step": 286550
    },
    {
      "epoch": 1.004321357600227,
      "grad_norm": 2.953125,
      "learning_rate": 3.6957119974246546e-05,
      "loss": 1.0293,
      "step": 286560
    },
    {
      "epoch": 1.0043564051071228,
      "grad_norm": 2.703125,
      "learning_rate": 3.695647094558284e-05,
      "loss": 0.8516,
      "step": 286570
    },
    {
      "epoch": 1.0043914526140183,
      "grad_norm": 3.09375,
      "learning_rate": 3.695582191691914e-05,
      "loss": 0.9061,
      "step": 286580
    },
    {
      "epoch": 1.0044265001209138,
      "grad_norm": 2.546875,
      "learning_rate": 3.6955172888255444e-05,
      "loss": 0.8242,
      "step": 286590
    },
    {
      "epoch": 1.0044615476278096,
      "grad_norm": 3.375,
      "learning_rate": 3.695452385959174e-05,
      "loss": 0.901,
      "step": 286600
    },
    {
      "epoch": 1.0044965951347051,
      "grad_norm": 2.609375,
      "learning_rate": 3.695387483092804e-05,
      "loss": 0.851,
      "step": 286610
    },
    {
      "epoch": 1.0045316426416007,
      "grad_norm": 2.984375,
      "learning_rate": 3.6953225802264334e-05,
      "loss": 0.8678,
      "step": 286620
    },
    {
      "epoch": 1.0045666901484962,
      "grad_norm": 3.140625,
      "learning_rate": 3.6952576773600636e-05,
      "loss": 0.8555,
      "step": 286630
    },
    {
      "epoch": 1.004601737655392,
      "grad_norm": 2.84375,
      "learning_rate": 3.695192774493693e-05,
      "loss": 0.8305,
      "step": 286640
    },
    {
      "epoch": 1.0046367851622875,
      "grad_norm": 3.421875,
      "learning_rate": 3.695127871627323e-05,
      "loss": 0.8047,
      "step": 286650
    },
    {
      "epoch": 1.004671832669183,
      "grad_norm": 2.734375,
      "learning_rate": 3.695062968760952e-05,
      "loss": 0.8877,
      "step": 286660
    },
    {
      "epoch": 1.0047068801760786,
      "grad_norm": 3.078125,
      "learning_rate": 3.694998065894582e-05,
      "loss": 0.8544,
      "step": 286670
    },
    {
      "epoch": 1.0047419276829743,
      "grad_norm": 2.859375,
      "learning_rate": 3.694933163028212e-05,
      "loss": 0.93,
      "step": 286680
    },
    {
      "epoch": 1.0047769751898699,
      "grad_norm": 2.890625,
      "learning_rate": 3.694868260161842e-05,
      "loss": 0.819,
      "step": 286690
    },
    {
      "epoch": 1.0048120226967654,
      "grad_norm": 2.609375,
      "learning_rate": 3.694803357295472e-05,
      "loss": 0.8727,
      "step": 286700
    },
    {
      "epoch": 1.0048470702036612,
      "grad_norm": 2.890625,
      "learning_rate": 3.694738454429101e-05,
      "loss": 0.8753,
      "step": 286710
    },
    {
      "epoch": 1.0048821177105567,
      "grad_norm": 3.015625,
      "learning_rate": 3.6946735515627314e-05,
      "loss": 0.9001,
      "step": 286720
    },
    {
      "epoch": 1.0049171652174522,
      "grad_norm": 2.5625,
      "learning_rate": 3.694608648696361e-05,
      "loss": 0.8461,
      "step": 286730
    },
    {
      "epoch": 1.0049522127243478,
      "grad_norm": 2.796875,
      "learning_rate": 3.694543745829991e-05,
      "loss": 0.8692,
      "step": 286740
    },
    {
      "epoch": 1.0049872602312435,
      "grad_norm": 2.890625,
      "learning_rate": 3.6944788429636205e-05,
      "loss": 0.8782,
      "step": 286750
    },
    {
      "epoch": 1.005022307738139,
      "grad_norm": 2.640625,
      "learning_rate": 3.6944139400972506e-05,
      "loss": 0.8445,
      "step": 286760
    },
    {
      "epoch": 1.0050573552450346,
      "grad_norm": 3.140625,
      "learning_rate": 3.69434903723088e-05,
      "loss": 0.9089,
      "step": 286770
    },
    {
      "epoch": 1.0050924027519303,
      "grad_norm": 2.265625,
      "learning_rate": 3.69428413436451e-05,
      "loss": 0.8611,
      "step": 286780
    },
    {
      "epoch": 1.0051274502588259,
      "grad_norm": 2.984375,
      "learning_rate": 3.69421923149814e-05,
      "loss": 0.8419,
      "step": 286790
    },
    {
      "epoch": 1.0051624977657214,
      "grad_norm": 2.859375,
      "learning_rate": 3.69415432863177e-05,
      "loss": 0.8421,
      "step": 286800
    },
    {
      "epoch": 1.005197545272617,
      "grad_norm": 2.59375,
      "learning_rate": 3.694089425765399e-05,
      "loss": 0.9162,
      "step": 286810
    },
    {
      "epoch": 1.0052325927795127,
      "grad_norm": 2.40625,
      "learning_rate": 3.6940245228990294e-05,
      "loss": 0.8544,
      "step": 286820
    },
    {
      "epoch": 1.0052676402864082,
      "grad_norm": 3.03125,
      "learning_rate": 3.6939596200326596e-05,
      "loss": 0.8472,
      "step": 286830
    },
    {
      "epoch": 1.0053026877933038,
      "grad_norm": 2.71875,
      "learning_rate": 3.693894717166289e-05,
      "loss": 0.8345,
      "step": 286840
    },
    {
      "epoch": 1.0053377353001993,
      "grad_norm": 2.453125,
      "learning_rate": 3.693829814299919e-05,
      "loss": 0.932,
      "step": 286850
    },
    {
      "epoch": 1.005372782807095,
      "grad_norm": 3.046875,
      "learning_rate": 3.6937649114335486e-05,
      "loss": 0.8925,
      "step": 286860
    },
    {
      "epoch": 1.0054078303139906,
      "grad_norm": 2.71875,
      "learning_rate": 3.693700008567179e-05,
      "loss": 0.8462,
      "step": 286870
    },
    {
      "epoch": 1.0054428778208861,
      "grad_norm": 2.671875,
      "learning_rate": 3.693635105700808e-05,
      "loss": 0.9189,
      "step": 286880
    },
    {
      "epoch": 1.005477925327782,
      "grad_norm": 2.703125,
      "learning_rate": 3.6935702028344384e-05,
      "loss": 0.8272,
      "step": 286890
    },
    {
      "epoch": 1.0055129728346774,
      "grad_norm": 2.640625,
      "learning_rate": 3.693505299968068e-05,
      "loss": 0.879,
      "step": 286900
    },
    {
      "epoch": 1.005548020341573,
      "grad_norm": 2.734375,
      "learning_rate": 3.693440397101698e-05,
      "loss": 0.9207,
      "step": 286910
    },
    {
      "epoch": 1.0055830678484685,
      "grad_norm": 2.765625,
      "learning_rate": 3.6933754942353274e-05,
      "loss": 0.8204,
      "step": 286920
    },
    {
      "epoch": 1.0056181153553643,
      "grad_norm": 3.578125,
      "learning_rate": 3.6933105913689576e-05,
      "loss": 0.984,
      "step": 286930
    },
    {
      "epoch": 1.0056531628622598,
      "grad_norm": 2.453125,
      "learning_rate": 3.693245688502587e-05,
      "loss": 0.9505,
      "step": 286940
    },
    {
      "epoch": 1.0056882103691553,
      "grad_norm": 2.828125,
      "learning_rate": 3.693180785636217e-05,
      "loss": 0.907,
      "step": 286950
    },
    {
      "epoch": 1.0057232578760509,
      "grad_norm": 2.671875,
      "learning_rate": 3.693115882769847e-05,
      "loss": 0.7978,
      "step": 286960
    },
    {
      "epoch": 1.0057583053829466,
      "grad_norm": 2.8125,
      "learning_rate": 3.693050979903477e-05,
      "loss": 0.8343,
      "step": 286970
    },
    {
      "epoch": 1.0057933528898422,
      "grad_norm": 2.984375,
      "learning_rate": 3.692986077037107e-05,
      "loss": 0.8533,
      "step": 286980
    },
    {
      "epoch": 1.0058284003967377,
      "grad_norm": 3.234375,
      "learning_rate": 3.6929211741707364e-05,
      "loss": 0.8921,
      "step": 286990
    },
    {
      "epoch": 1.0058634479036335,
      "grad_norm": 3.078125,
      "learning_rate": 3.6928562713043665e-05,
      "loss": 0.9618,
      "step": 287000
    },
    {
      "epoch": 1.005898495410529,
      "grad_norm": 2.421875,
      "learning_rate": 3.692791368437996e-05,
      "loss": 0.8246,
      "step": 287010
    },
    {
      "epoch": 1.0059335429174245,
      "grad_norm": 3.0,
      "learning_rate": 3.692726465571626e-05,
      "loss": 0.9074,
      "step": 287020
    },
    {
      "epoch": 1.00596859042432,
      "grad_norm": 2.5625,
      "learning_rate": 3.6926615627052556e-05,
      "loss": 0.8967,
      "step": 287030
    },
    {
      "epoch": 1.0060036379312158,
      "grad_norm": 3.296875,
      "learning_rate": 3.692596659838885e-05,
      "loss": 0.8455,
      "step": 287040
    },
    {
      "epoch": 1.0060386854381114,
      "grad_norm": 3.6875,
      "learning_rate": 3.692531756972515e-05,
      "loss": 0.9909,
      "step": 287050
    },
    {
      "epoch": 1.006073732945007,
      "grad_norm": 3.078125,
      "learning_rate": 3.6924668541061446e-05,
      "loss": 0.7747,
      "step": 287060
    },
    {
      "epoch": 1.0061087804519027,
      "grad_norm": 3.03125,
      "learning_rate": 3.692401951239775e-05,
      "loss": 0.9188,
      "step": 287070
    },
    {
      "epoch": 1.0061438279587982,
      "grad_norm": 2.96875,
      "learning_rate": 3.692337048373404e-05,
      "loss": 1.0115,
      "step": 287080
    },
    {
      "epoch": 1.0061788754656937,
      "grad_norm": 2.703125,
      "learning_rate": 3.6922721455070344e-05,
      "loss": 0.8167,
      "step": 287090
    },
    {
      "epoch": 1.0062139229725893,
      "grad_norm": 3.546875,
      "learning_rate": 3.692207242640664e-05,
      "loss": 0.9174,
      "step": 287100
    },
    {
      "epoch": 1.006248970479485,
      "grad_norm": 2.859375,
      "learning_rate": 3.692142339774294e-05,
      "loss": 0.9123,
      "step": 287110
    },
    {
      "epoch": 1.0062840179863806,
      "grad_norm": 3.015625,
      "learning_rate": 3.6920774369079234e-05,
      "loss": 0.8805,
      "step": 287120
    },
    {
      "epoch": 1.006319065493276,
      "grad_norm": 3.109375,
      "learning_rate": 3.6920125340415536e-05,
      "loss": 0.9378,
      "step": 287130
    },
    {
      "epoch": 1.0063541130001716,
      "grad_norm": 3.34375,
      "learning_rate": 3.691947631175183e-05,
      "loss": 0.8644,
      "step": 287140
    },
    {
      "epoch": 1.0063891605070674,
      "grad_norm": 3.09375,
      "learning_rate": 3.691882728308813e-05,
      "loss": 0.8997,
      "step": 287150
    },
    {
      "epoch": 1.006424208013963,
      "grad_norm": 2.859375,
      "learning_rate": 3.6918178254424426e-05,
      "loss": 0.926,
      "step": 287160
    },
    {
      "epoch": 1.0064592555208585,
      "grad_norm": 2.625,
      "learning_rate": 3.691752922576073e-05,
      "loss": 0.9008,
      "step": 287170
    },
    {
      "epoch": 1.0064943030277542,
      "grad_norm": 2.984375,
      "learning_rate": 3.691688019709703e-05,
      "loss": 0.9827,
      "step": 287180
    },
    {
      "epoch": 1.0065293505346498,
      "grad_norm": 3.203125,
      "learning_rate": 3.6916231168433324e-05,
      "loss": 0.982,
      "step": 287190
    },
    {
      "epoch": 1.0065643980415453,
      "grad_norm": 2.671875,
      "learning_rate": 3.6915582139769625e-05,
      "loss": 0.8402,
      "step": 287200
    },
    {
      "epoch": 1.0065994455484408,
      "grad_norm": 2.71875,
      "learning_rate": 3.691493311110592e-05,
      "loss": 0.8155,
      "step": 287210
    },
    {
      "epoch": 1.0066344930553366,
      "grad_norm": 3.15625,
      "learning_rate": 3.691428408244222e-05,
      "loss": 0.9207,
      "step": 287220
    },
    {
      "epoch": 1.0066695405622321,
      "grad_norm": 3.421875,
      "learning_rate": 3.6913635053778516e-05,
      "loss": 0.9382,
      "step": 287230
    },
    {
      "epoch": 1.0067045880691277,
      "grad_norm": 2.421875,
      "learning_rate": 3.691298602511482e-05,
      "loss": 0.8008,
      "step": 287240
    },
    {
      "epoch": 1.0067396355760232,
      "grad_norm": 3.46875,
      "learning_rate": 3.691233699645111e-05,
      "loss": 0.9064,
      "step": 287250
    },
    {
      "epoch": 1.006774683082919,
      "grad_norm": 2.75,
      "learning_rate": 3.691168796778741e-05,
      "loss": 0.7967,
      "step": 287260
    },
    {
      "epoch": 1.0068097305898145,
      "grad_norm": 2.6875,
      "learning_rate": 3.691103893912371e-05,
      "loss": 0.8284,
      "step": 287270
    },
    {
      "epoch": 1.00684477809671,
      "grad_norm": 2.921875,
      "learning_rate": 3.691038991046001e-05,
      "loss": 0.8465,
      "step": 287280
    },
    {
      "epoch": 1.0068798256036058,
      "grad_norm": 3.921875,
      "learning_rate": 3.6909740881796304e-05,
      "loss": 0.9678,
      "step": 287290
    },
    {
      "epoch": 1.0069148731105013,
      "grad_norm": 2.546875,
      "learning_rate": 3.6909091853132605e-05,
      "loss": 0.9289,
      "step": 287300
    },
    {
      "epoch": 1.0069499206173969,
      "grad_norm": 2.890625,
      "learning_rate": 3.69084428244689e-05,
      "loss": 0.8058,
      "step": 287310
    },
    {
      "epoch": 1.0069849681242924,
      "grad_norm": 2.8125,
      "learning_rate": 3.69077937958052e-05,
      "loss": 0.9677,
      "step": 287320
    },
    {
      "epoch": 1.0070200156311881,
      "grad_norm": 3.15625,
      "learning_rate": 3.69071447671415e-05,
      "loss": 0.8523,
      "step": 287330
    },
    {
      "epoch": 1.0070550631380837,
      "grad_norm": 2.6875,
      "learning_rate": 3.69064957384778e-05,
      "loss": 0.8739,
      "step": 287340
    },
    {
      "epoch": 1.0070901106449792,
      "grad_norm": 2.828125,
      "learning_rate": 3.69058467098141e-05,
      "loss": 0.8461,
      "step": 287350
    },
    {
      "epoch": 1.007125158151875,
      "grad_norm": 2.59375,
      "learning_rate": 3.690519768115039e-05,
      "loss": 0.8459,
      "step": 287360
    },
    {
      "epoch": 1.0071602056587705,
      "grad_norm": 2.84375,
      "learning_rate": 3.6904548652486694e-05,
      "loss": 0.8455,
      "step": 287370
    },
    {
      "epoch": 1.007195253165666,
      "grad_norm": 3.265625,
      "learning_rate": 3.690389962382299e-05,
      "loss": 0.9008,
      "step": 287380
    },
    {
      "epoch": 1.0072303006725616,
      "grad_norm": 2.703125,
      "learning_rate": 3.690325059515929e-05,
      "loss": 0.7994,
      "step": 287390
    },
    {
      "epoch": 1.0072653481794573,
      "grad_norm": 2.9375,
      "learning_rate": 3.6902601566495585e-05,
      "loss": 0.9238,
      "step": 287400
    },
    {
      "epoch": 1.0073003956863529,
      "grad_norm": 3.078125,
      "learning_rate": 3.6901952537831886e-05,
      "loss": 0.9012,
      "step": 287410
    },
    {
      "epoch": 1.0073354431932484,
      "grad_norm": 2.6875,
      "learning_rate": 3.690130350916818e-05,
      "loss": 0.8057,
      "step": 287420
    },
    {
      "epoch": 1.007370490700144,
      "grad_norm": 2.734375,
      "learning_rate": 3.6900654480504476e-05,
      "loss": 0.854,
      "step": 287430
    },
    {
      "epoch": 1.0074055382070397,
      "grad_norm": 3.15625,
      "learning_rate": 3.690000545184078e-05,
      "loss": 0.9123,
      "step": 287440
    },
    {
      "epoch": 1.0074405857139352,
      "grad_norm": 2.9375,
      "learning_rate": 3.689935642317707e-05,
      "loss": 0.9006,
      "step": 287450
    },
    {
      "epoch": 1.0074756332208308,
      "grad_norm": 3.484375,
      "learning_rate": 3.689870739451337e-05,
      "loss": 0.9353,
      "step": 287460
    },
    {
      "epoch": 1.0075106807277265,
      "grad_norm": 3.125,
      "learning_rate": 3.689805836584967e-05,
      "loss": 0.848,
      "step": 287470
    },
    {
      "epoch": 1.007545728234622,
      "grad_norm": 3.125,
      "learning_rate": 3.689740933718597e-05,
      "loss": 0.8099,
      "step": 287480
    },
    {
      "epoch": 1.0075807757415176,
      "grad_norm": 3.0625,
      "learning_rate": 3.6896760308522264e-05,
      "loss": 0.8946,
      "step": 287490
    },
    {
      "epoch": 1.0076158232484131,
      "grad_norm": 3.015625,
      "learning_rate": 3.6896111279858565e-05,
      "loss": 0.9184,
      "step": 287500
    },
    {
      "epoch": 1.007650870755309,
      "grad_norm": 3.109375,
      "learning_rate": 3.689546225119486e-05,
      "loss": 0.8564,
      "step": 287510
    },
    {
      "epoch": 1.0076859182622044,
      "grad_norm": 2.671875,
      "learning_rate": 3.689481322253116e-05,
      "loss": 0.8688,
      "step": 287520
    },
    {
      "epoch": 1.0077209657691,
      "grad_norm": 2.984375,
      "learning_rate": 3.6894164193867456e-05,
      "loss": 0.8423,
      "step": 287530
    },
    {
      "epoch": 1.0077560132759955,
      "grad_norm": 2.6875,
      "learning_rate": 3.689351516520376e-05,
      "loss": 0.8501,
      "step": 287540
    },
    {
      "epoch": 1.0077910607828913,
      "grad_norm": 2.703125,
      "learning_rate": 3.689286613654006e-05,
      "loss": 0.8322,
      "step": 287550
    },
    {
      "epoch": 1.0078261082897868,
      "grad_norm": 3.125,
      "learning_rate": 3.689221710787635e-05,
      "loss": 0.8389,
      "step": 287560
    },
    {
      "epoch": 1.0078611557966823,
      "grad_norm": 2.65625,
      "learning_rate": 3.6891568079212654e-05,
      "loss": 0.8154,
      "step": 287570
    },
    {
      "epoch": 1.007896203303578,
      "grad_norm": 2.796875,
      "learning_rate": 3.689091905054895e-05,
      "loss": 0.8636,
      "step": 287580
    },
    {
      "epoch": 1.0079312508104736,
      "grad_norm": 3.109375,
      "learning_rate": 3.689027002188525e-05,
      "loss": 0.9406,
      "step": 287590
    },
    {
      "epoch": 1.0079662983173692,
      "grad_norm": 2.8125,
      "learning_rate": 3.6889620993221545e-05,
      "loss": 0.924,
      "step": 287600
    },
    {
      "epoch": 1.0080013458242647,
      "grad_norm": 2.640625,
      "learning_rate": 3.6888971964557846e-05,
      "loss": 0.7893,
      "step": 287610
    },
    {
      "epoch": 1.0080363933311605,
      "grad_norm": 2.921875,
      "learning_rate": 3.688832293589414e-05,
      "loss": 0.8494,
      "step": 287620
    },
    {
      "epoch": 1.008071440838056,
      "grad_norm": 2.90625,
      "learning_rate": 3.688767390723044e-05,
      "loss": 0.8601,
      "step": 287630
    },
    {
      "epoch": 1.0081064883449515,
      "grad_norm": 2.796875,
      "learning_rate": 3.688702487856674e-05,
      "loss": 0.8885,
      "step": 287640
    },
    {
      "epoch": 1.008141535851847,
      "grad_norm": 3.140625,
      "learning_rate": 3.688637584990304e-05,
      "loss": 0.8216,
      "step": 287650
    },
    {
      "epoch": 1.0081765833587428,
      "grad_norm": 2.875,
      "learning_rate": 3.688572682123933e-05,
      "loss": 0.8157,
      "step": 287660
    },
    {
      "epoch": 1.0082116308656384,
      "grad_norm": 2.921875,
      "learning_rate": 3.6885077792575634e-05,
      "loss": 0.9519,
      "step": 287670
    },
    {
      "epoch": 1.008246678372534,
      "grad_norm": 2.890625,
      "learning_rate": 3.688442876391193e-05,
      "loss": 0.8428,
      "step": 287680
    },
    {
      "epoch": 1.0082817258794297,
      "grad_norm": 2.890625,
      "learning_rate": 3.688377973524823e-05,
      "loss": 0.8967,
      "step": 287690
    },
    {
      "epoch": 1.0083167733863252,
      "grad_norm": 2.90625,
      "learning_rate": 3.688313070658453e-05,
      "loss": 0.8825,
      "step": 287700
    },
    {
      "epoch": 1.0083518208932207,
      "grad_norm": 2.84375,
      "learning_rate": 3.6882481677920826e-05,
      "loss": 0.8372,
      "step": 287710
    },
    {
      "epoch": 1.0083868684001163,
      "grad_norm": 3.09375,
      "learning_rate": 3.688183264925713e-05,
      "loss": 0.8888,
      "step": 287720
    },
    {
      "epoch": 1.008421915907012,
      "grad_norm": 3.21875,
      "learning_rate": 3.688118362059342e-05,
      "loss": 0.8333,
      "step": 287730
    },
    {
      "epoch": 1.0084569634139076,
      "grad_norm": 2.796875,
      "learning_rate": 3.6880534591929724e-05,
      "loss": 0.8278,
      "step": 287740
    },
    {
      "epoch": 1.008492010920803,
      "grad_norm": 2.515625,
      "learning_rate": 3.687988556326602e-05,
      "loss": 0.8757,
      "step": 287750
    },
    {
      "epoch": 1.0085270584276989,
      "grad_norm": 2.6875,
      "learning_rate": 3.687923653460232e-05,
      "loss": 0.9416,
      "step": 287760
    },
    {
      "epoch": 1.0085621059345944,
      "grad_norm": 3.046875,
      "learning_rate": 3.6878587505938614e-05,
      "loss": 0.9105,
      "step": 287770
    },
    {
      "epoch": 1.00859715344149,
      "grad_norm": 2.9375,
      "learning_rate": 3.6877938477274916e-05,
      "loss": 0.8993,
      "step": 287780
    },
    {
      "epoch": 1.0086322009483855,
      "grad_norm": 3.015625,
      "learning_rate": 3.687728944861121e-05,
      "loss": 0.8995,
      "step": 287790
    },
    {
      "epoch": 1.0086672484552812,
      "grad_norm": 2.625,
      "learning_rate": 3.6876640419947505e-05,
      "loss": 0.8805,
      "step": 287800
    },
    {
      "epoch": 1.0087022959621768,
      "grad_norm": 2.796875,
      "learning_rate": 3.6875991391283806e-05,
      "loss": 0.8167,
      "step": 287810
    },
    {
      "epoch": 1.0087373434690723,
      "grad_norm": 3.125,
      "learning_rate": 3.68753423626201e-05,
      "loss": 1.0229,
      "step": 287820
    },
    {
      "epoch": 1.0087723909759678,
      "grad_norm": 2.953125,
      "learning_rate": 3.68746933339564e-05,
      "loss": 0.9203,
      "step": 287830
    },
    {
      "epoch": 1.0088074384828636,
      "grad_norm": 3.484375,
      "learning_rate": 3.68740443052927e-05,
      "loss": 0.9118,
      "step": 287840
    },
    {
      "epoch": 1.0088424859897591,
      "grad_norm": 3.109375,
      "learning_rate": 3.6873395276629e-05,
      "loss": 0.8399,
      "step": 287850
    },
    {
      "epoch": 1.0088775334966547,
      "grad_norm": 2.75,
      "learning_rate": 3.687274624796529e-05,
      "loss": 0.8978,
      "step": 287860
    },
    {
      "epoch": 1.0089125810035504,
      "grad_norm": 2.75,
      "learning_rate": 3.6872097219301594e-05,
      "loss": 0.8142,
      "step": 287870
    },
    {
      "epoch": 1.008947628510446,
      "grad_norm": 3.234375,
      "learning_rate": 3.687144819063789e-05,
      "loss": 0.8782,
      "step": 287880
    },
    {
      "epoch": 1.0089826760173415,
      "grad_norm": 2.921875,
      "learning_rate": 3.687079916197419e-05,
      "loss": 0.8252,
      "step": 287890
    },
    {
      "epoch": 1.009017723524237,
      "grad_norm": 3.15625,
      "learning_rate": 3.6870150133310485e-05,
      "loss": 0.9089,
      "step": 287900
    },
    {
      "epoch": 1.0090527710311328,
      "grad_norm": 3.046875,
      "learning_rate": 3.6869501104646786e-05,
      "loss": 0.8957,
      "step": 287910
    },
    {
      "epoch": 1.0090878185380283,
      "grad_norm": 2.546875,
      "learning_rate": 3.686885207598309e-05,
      "loss": 0.765,
      "step": 287920
    },
    {
      "epoch": 1.0091228660449238,
      "grad_norm": 3.3125,
      "learning_rate": 3.686820304731938e-05,
      "loss": 0.8626,
      "step": 287930
    },
    {
      "epoch": 1.0091579135518194,
      "grad_norm": 3.203125,
      "learning_rate": 3.6867554018655684e-05,
      "loss": 0.9097,
      "step": 287940
    },
    {
      "epoch": 1.0091929610587151,
      "grad_norm": 2.640625,
      "learning_rate": 3.686690498999198e-05,
      "loss": 0.8605,
      "step": 287950
    },
    {
      "epoch": 1.0092280085656107,
      "grad_norm": 2.515625,
      "learning_rate": 3.686625596132828e-05,
      "loss": 0.8375,
      "step": 287960
    },
    {
      "epoch": 1.0092630560725062,
      "grad_norm": 2.515625,
      "learning_rate": 3.6865606932664574e-05,
      "loss": 0.8772,
      "step": 287970
    },
    {
      "epoch": 1.009298103579402,
      "grad_norm": 3.453125,
      "learning_rate": 3.6864957904000876e-05,
      "loss": 0.83,
      "step": 287980
    },
    {
      "epoch": 1.0093331510862975,
      "grad_norm": 3.234375,
      "learning_rate": 3.686430887533717e-05,
      "loss": 0.9532,
      "step": 287990
    },
    {
      "epoch": 1.009368198593193,
      "grad_norm": 2.875,
      "learning_rate": 3.686365984667347e-05,
      "loss": 0.9295,
      "step": 288000
    },
    {
      "epoch": 1.0094032461000886,
      "grad_norm": 3.03125,
      "learning_rate": 3.6863010818009766e-05,
      "loss": 0.8685,
      "step": 288010
    },
    {
      "epoch": 1.0094382936069843,
      "grad_norm": 2.59375,
      "learning_rate": 3.686236178934607e-05,
      "loss": 0.8638,
      "step": 288020
    },
    {
      "epoch": 1.0094733411138799,
      "grad_norm": 2.890625,
      "learning_rate": 3.686171276068236e-05,
      "loss": 0.7647,
      "step": 288030
    },
    {
      "epoch": 1.0095083886207754,
      "grad_norm": 2.875,
      "learning_rate": 3.6861063732018664e-05,
      "loss": 0.9239,
      "step": 288040
    },
    {
      "epoch": 1.0095434361276712,
      "grad_norm": 3.5625,
      "learning_rate": 3.686041470335496e-05,
      "loss": 0.8725,
      "step": 288050
    },
    {
      "epoch": 1.0095784836345667,
      "grad_norm": 2.8125,
      "learning_rate": 3.685976567469126e-05,
      "loss": 0.7597,
      "step": 288060
    },
    {
      "epoch": 1.0096135311414622,
      "grad_norm": 3.015625,
      "learning_rate": 3.685911664602756e-05,
      "loss": 0.9023,
      "step": 288070
    },
    {
      "epoch": 1.0096485786483578,
      "grad_norm": 3.734375,
      "learning_rate": 3.6858467617363856e-05,
      "loss": 0.8614,
      "step": 288080
    },
    {
      "epoch": 1.0096836261552535,
      "grad_norm": 2.875,
      "learning_rate": 3.685781858870016e-05,
      "loss": 0.8523,
      "step": 288090
    },
    {
      "epoch": 1.009718673662149,
      "grad_norm": 3.171875,
      "learning_rate": 3.685716956003645e-05,
      "loss": 0.8129,
      "step": 288100
    },
    {
      "epoch": 1.0097537211690446,
      "grad_norm": 2.953125,
      "learning_rate": 3.685652053137275e-05,
      "loss": 0.8564,
      "step": 288110
    },
    {
      "epoch": 1.0097887686759401,
      "grad_norm": 3.203125,
      "learning_rate": 3.685587150270905e-05,
      "loss": 0.8805,
      "step": 288120
    },
    {
      "epoch": 1.009823816182836,
      "grad_norm": 2.734375,
      "learning_rate": 3.685522247404535e-05,
      "loss": 0.8193,
      "step": 288130
    },
    {
      "epoch": 1.0098588636897314,
      "grad_norm": 3.171875,
      "learning_rate": 3.6854573445381644e-05,
      "loss": 0.7615,
      "step": 288140
    },
    {
      "epoch": 1.009893911196627,
      "grad_norm": 2.859375,
      "learning_rate": 3.6853924416717945e-05,
      "loss": 0.7958,
      "step": 288150
    },
    {
      "epoch": 1.0099289587035227,
      "grad_norm": 2.703125,
      "learning_rate": 3.685327538805424e-05,
      "loss": 0.7262,
      "step": 288160
    },
    {
      "epoch": 1.0099640062104183,
      "grad_norm": 3.03125,
      "learning_rate": 3.6852626359390534e-05,
      "loss": 0.7964,
      "step": 288170
    },
    {
      "epoch": 1.0099990537173138,
      "grad_norm": 3.0625,
      "learning_rate": 3.6851977330726836e-05,
      "loss": 0.8514,
      "step": 288180
    },
    {
      "epoch": 1.0100341012242093,
      "grad_norm": 3.09375,
      "learning_rate": 3.685132830206313e-05,
      "loss": 0.8635,
      "step": 288190
    },
    {
      "epoch": 1.010069148731105,
      "grad_norm": 2.765625,
      "learning_rate": 3.685067927339943e-05,
      "loss": 0.8092,
      "step": 288200
    },
    {
      "epoch": 1.0101041962380006,
      "grad_norm": 3.375,
      "learning_rate": 3.6850030244735726e-05,
      "loss": 0.8742,
      "step": 288210
    },
    {
      "epoch": 1.0101392437448962,
      "grad_norm": 2.921875,
      "learning_rate": 3.684938121607203e-05,
      "loss": 0.9,
      "step": 288220
    },
    {
      "epoch": 1.0101742912517917,
      "grad_norm": 3.140625,
      "learning_rate": 3.684873218740832e-05,
      "loss": 0.8577,
      "step": 288230
    },
    {
      "epoch": 1.0102093387586875,
      "grad_norm": 2.78125,
      "learning_rate": 3.6848083158744624e-05,
      "loss": 0.8247,
      "step": 288240
    },
    {
      "epoch": 1.010244386265583,
      "grad_norm": 2.390625,
      "learning_rate": 3.684743413008092e-05,
      "loss": 0.8843,
      "step": 288250
    },
    {
      "epoch": 1.0102794337724785,
      "grad_norm": 2.734375,
      "learning_rate": 3.684678510141722e-05,
      "loss": 0.8879,
      "step": 288260
    },
    {
      "epoch": 1.0103144812793743,
      "grad_norm": 2.84375,
      "learning_rate": 3.6846136072753514e-05,
      "loss": 0.8575,
      "step": 288270
    },
    {
      "epoch": 1.0103495287862698,
      "grad_norm": 3.265625,
      "learning_rate": 3.6845487044089816e-05,
      "loss": 0.8032,
      "step": 288280
    },
    {
      "epoch": 1.0103845762931654,
      "grad_norm": 2.640625,
      "learning_rate": 3.684483801542612e-05,
      "loss": 0.8953,
      "step": 288290
    },
    {
      "epoch": 1.010419623800061,
      "grad_norm": 3.0625,
      "learning_rate": 3.684418898676241e-05,
      "loss": 0.8964,
      "step": 288300
    },
    {
      "epoch": 1.0104546713069567,
      "grad_norm": 3.078125,
      "learning_rate": 3.684353995809871e-05,
      "loss": 0.8683,
      "step": 288310
    },
    {
      "epoch": 1.0104897188138522,
      "grad_norm": 3.078125,
      "learning_rate": 3.684289092943501e-05,
      "loss": 0.8873,
      "step": 288320
    },
    {
      "epoch": 1.0105247663207477,
      "grad_norm": 3.09375,
      "learning_rate": 3.684224190077131e-05,
      "loss": 0.8732,
      "step": 288330
    },
    {
      "epoch": 1.0105598138276433,
      "grad_norm": 2.6875,
      "learning_rate": 3.6841592872107604e-05,
      "loss": 0.8115,
      "step": 288340
    },
    {
      "epoch": 1.010594861334539,
      "grad_norm": 2.921875,
      "learning_rate": 3.6840943843443905e-05,
      "loss": 0.8448,
      "step": 288350
    },
    {
      "epoch": 1.0106299088414346,
      "grad_norm": 3.203125,
      "learning_rate": 3.68402948147802e-05,
      "loss": 0.9421,
      "step": 288360
    },
    {
      "epoch": 1.01066495634833,
      "grad_norm": 2.921875,
      "learning_rate": 3.68396457861165e-05,
      "loss": 0.9164,
      "step": 288370
    },
    {
      "epoch": 1.0107000038552258,
      "grad_norm": 3.328125,
      "learning_rate": 3.6838996757452796e-05,
      "loss": 0.8907,
      "step": 288380
    },
    {
      "epoch": 1.0107350513621214,
      "grad_norm": 2.921875,
      "learning_rate": 3.68383477287891e-05,
      "loss": 0.9116,
      "step": 288390
    },
    {
      "epoch": 1.010770098869017,
      "grad_norm": 3.015625,
      "learning_rate": 3.683769870012539e-05,
      "loss": 0.8864,
      "step": 288400
    },
    {
      "epoch": 1.0108051463759125,
      "grad_norm": 2.5,
      "learning_rate": 3.683704967146169e-05,
      "loss": 0.8865,
      "step": 288410
    },
    {
      "epoch": 1.0108401938828082,
      "grad_norm": 2.96875,
      "learning_rate": 3.6836400642797995e-05,
      "loss": 0.7706,
      "step": 288420
    },
    {
      "epoch": 1.0108752413897037,
      "grad_norm": 3.125,
      "learning_rate": 3.683575161413429e-05,
      "loss": 0.8471,
      "step": 288430
    },
    {
      "epoch": 1.0109102888965993,
      "grad_norm": 3.078125,
      "learning_rate": 3.683510258547059e-05,
      "loss": 0.8785,
      "step": 288440
    },
    {
      "epoch": 1.010945336403495,
      "grad_norm": 2.59375,
      "learning_rate": 3.6834453556806885e-05,
      "loss": 0.8926,
      "step": 288450
    },
    {
      "epoch": 1.0109803839103906,
      "grad_norm": 3.34375,
      "learning_rate": 3.683380452814319e-05,
      "loss": 0.9042,
      "step": 288460
    },
    {
      "epoch": 1.0110154314172861,
      "grad_norm": 2.65625,
      "learning_rate": 3.683315549947948e-05,
      "loss": 0.8657,
      "step": 288470
    },
    {
      "epoch": 1.0110504789241817,
      "grad_norm": 3.015625,
      "learning_rate": 3.683250647081578e-05,
      "loss": 0.8324,
      "step": 288480
    },
    {
      "epoch": 1.0110855264310774,
      "grad_norm": 2.671875,
      "learning_rate": 3.683185744215208e-05,
      "loss": 0.8868,
      "step": 288490
    },
    {
      "epoch": 1.011120573937973,
      "grad_norm": 2.390625,
      "learning_rate": 3.683120841348838e-05,
      "loss": 0.9485,
      "step": 288500
    },
    {
      "epoch": 1.0111556214448685,
      "grad_norm": 2.890625,
      "learning_rate": 3.683055938482467e-05,
      "loss": 0.8727,
      "step": 288510
    },
    {
      "epoch": 1.011190668951764,
      "grad_norm": 3.328125,
      "learning_rate": 3.6829910356160975e-05,
      "loss": 0.9334,
      "step": 288520
    },
    {
      "epoch": 1.0112257164586598,
      "grad_norm": 3.265625,
      "learning_rate": 3.682926132749727e-05,
      "loss": 0.9304,
      "step": 288530
    },
    {
      "epoch": 1.0112607639655553,
      "grad_norm": 2.8125,
      "learning_rate": 3.6828612298833564e-05,
      "loss": 0.9057,
      "step": 288540
    },
    {
      "epoch": 1.0112958114724508,
      "grad_norm": 2.75,
      "learning_rate": 3.6827963270169865e-05,
      "loss": 0.948,
      "step": 288550
    },
    {
      "epoch": 1.0113308589793466,
      "grad_norm": 2.984375,
      "learning_rate": 3.682731424150616e-05,
      "loss": 0.8505,
      "step": 288560
    },
    {
      "epoch": 1.0113659064862421,
      "grad_norm": 2.78125,
      "learning_rate": 3.682666521284246e-05,
      "loss": 0.9091,
      "step": 288570
    },
    {
      "epoch": 1.0114009539931377,
      "grad_norm": 2.90625,
      "learning_rate": 3.6826016184178756e-05,
      "loss": 0.8946,
      "step": 288580
    },
    {
      "epoch": 1.0114360015000332,
      "grad_norm": 3.234375,
      "learning_rate": 3.682536715551506e-05,
      "loss": 0.9285,
      "step": 288590
    },
    {
      "epoch": 1.011471049006929,
      "grad_norm": 2.703125,
      "learning_rate": 3.682471812685135e-05,
      "loss": 0.9191,
      "step": 288600
    },
    {
      "epoch": 1.0115060965138245,
      "grad_norm": 3.03125,
      "learning_rate": 3.682406909818765e-05,
      "loss": 0.8563,
      "step": 288610
    },
    {
      "epoch": 1.01154114402072,
      "grad_norm": 3.171875,
      "learning_rate": 3.682342006952395e-05,
      "loss": 0.8668,
      "step": 288620
    },
    {
      "epoch": 1.0115761915276156,
      "grad_norm": 3.015625,
      "learning_rate": 3.682277104086025e-05,
      "loss": 0.9317,
      "step": 288630
    },
    {
      "epoch": 1.0116112390345113,
      "grad_norm": 2.828125,
      "learning_rate": 3.6822122012196544e-05,
      "loss": 0.8144,
      "step": 288640
    },
    {
      "epoch": 1.0116462865414069,
      "grad_norm": 3.171875,
      "learning_rate": 3.6821472983532845e-05,
      "loss": 0.8866,
      "step": 288650
    },
    {
      "epoch": 1.0116813340483024,
      "grad_norm": 2.84375,
      "learning_rate": 3.682082395486915e-05,
      "loss": 0.8223,
      "step": 288660
    },
    {
      "epoch": 1.0117163815551982,
      "grad_norm": 3.09375,
      "learning_rate": 3.682017492620544e-05,
      "loss": 0.8928,
      "step": 288670
    },
    {
      "epoch": 1.0117514290620937,
      "grad_norm": 2.921875,
      "learning_rate": 3.681952589754174e-05,
      "loss": 0.8924,
      "step": 288680
    },
    {
      "epoch": 1.0117864765689892,
      "grad_norm": 3.25,
      "learning_rate": 3.681887686887804e-05,
      "loss": 0.9188,
      "step": 288690
    },
    {
      "epoch": 1.0118215240758848,
      "grad_norm": 3.28125,
      "learning_rate": 3.681822784021434e-05,
      "loss": 0.8209,
      "step": 288700
    },
    {
      "epoch": 1.0118565715827805,
      "grad_norm": 2.828125,
      "learning_rate": 3.681757881155063e-05,
      "loss": 0.7829,
      "step": 288710
    },
    {
      "epoch": 1.011891619089676,
      "grad_norm": 2.671875,
      "learning_rate": 3.6816929782886935e-05,
      "loss": 0.8438,
      "step": 288720
    },
    {
      "epoch": 1.0119266665965716,
      "grad_norm": 2.765625,
      "learning_rate": 3.681628075422323e-05,
      "loss": 0.7971,
      "step": 288730
    },
    {
      "epoch": 1.0119617141034674,
      "grad_norm": 2.828125,
      "learning_rate": 3.681563172555953e-05,
      "loss": 0.7798,
      "step": 288740
    },
    {
      "epoch": 1.011996761610363,
      "grad_norm": 2.734375,
      "learning_rate": 3.6814982696895825e-05,
      "loss": 0.9131,
      "step": 288750
    },
    {
      "epoch": 1.0120318091172584,
      "grad_norm": 2.609375,
      "learning_rate": 3.681433366823213e-05,
      "loss": 0.9299,
      "step": 288760
    },
    {
      "epoch": 1.012066856624154,
      "grad_norm": 2.78125,
      "learning_rate": 3.681368463956842e-05,
      "loss": 0.8567,
      "step": 288770
    },
    {
      "epoch": 1.0121019041310497,
      "grad_norm": 3.125,
      "learning_rate": 3.681303561090472e-05,
      "loss": 0.8811,
      "step": 288780
    },
    {
      "epoch": 1.0121369516379453,
      "grad_norm": 2.90625,
      "learning_rate": 3.6812386582241024e-05,
      "loss": 0.8472,
      "step": 288790
    },
    {
      "epoch": 1.0121719991448408,
      "grad_norm": 3.28125,
      "learning_rate": 3.681173755357732e-05,
      "loss": 0.8825,
      "step": 288800
    },
    {
      "epoch": 1.0122070466517363,
      "grad_norm": 3.015625,
      "learning_rate": 3.681108852491362e-05,
      "loss": 0.8624,
      "step": 288810
    },
    {
      "epoch": 1.012242094158632,
      "grad_norm": 2.734375,
      "learning_rate": 3.6810439496249915e-05,
      "loss": 0.9319,
      "step": 288820
    },
    {
      "epoch": 1.0122771416655276,
      "grad_norm": 2.515625,
      "learning_rate": 3.6809790467586216e-05,
      "loss": 0.8864,
      "step": 288830
    },
    {
      "epoch": 1.0123121891724232,
      "grad_norm": 2.703125,
      "learning_rate": 3.680914143892251e-05,
      "loss": 0.8542,
      "step": 288840
    },
    {
      "epoch": 1.012347236679319,
      "grad_norm": 2.546875,
      "learning_rate": 3.680849241025881e-05,
      "loss": 0.8001,
      "step": 288850
    },
    {
      "epoch": 1.0123822841862145,
      "grad_norm": 3.078125,
      "learning_rate": 3.680784338159511e-05,
      "loss": 0.9633,
      "step": 288860
    },
    {
      "epoch": 1.01241733169311,
      "grad_norm": 3.421875,
      "learning_rate": 3.680719435293141e-05,
      "loss": 0.8957,
      "step": 288870
    },
    {
      "epoch": 1.0124523792000055,
      "grad_norm": 2.734375,
      "learning_rate": 3.68065453242677e-05,
      "loss": 0.8132,
      "step": 288880
    },
    {
      "epoch": 1.0124874267069013,
      "grad_norm": 2.84375,
      "learning_rate": 3.6805896295604004e-05,
      "loss": 0.8364,
      "step": 288890
    },
    {
      "epoch": 1.0125224742137968,
      "grad_norm": 2.890625,
      "learning_rate": 3.68052472669403e-05,
      "loss": 0.9408,
      "step": 288900
    },
    {
      "epoch": 1.0125575217206924,
      "grad_norm": 3.296875,
      "learning_rate": 3.68045982382766e-05,
      "loss": 0.8977,
      "step": 288910
    },
    {
      "epoch": 1.012592569227588,
      "grad_norm": 3.171875,
      "learning_rate": 3.6803949209612895e-05,
      "loss": 0.9057,
      "step": 288920
    },
    {
      "epoch": 1.0126276167344836,
      "grad_norm": 2.75,
      "learning_rate": 3.680330018094919e-05,
      "loss": 0.7781,
      "step": 288930
    },
    {
      "epoch": 1.0126626642413792,
      "grad_norm": 2.75,
      "learning_rate": 3.680265115228549e-05,
      "loss": 0.8426,
      "step": 288940
    },
    {
      "epoch": 1.0126977117482747,
      "grad_norm": 2.5,
      "learning_rate": 3.6802002123621785e-05,
      "loss": 0.8733,
      "step": 288950
    },
    {
      "epoch": 1.0127327592551705,
      "grad_norm": 3.03125,
      "learning_rate": 3.680135309495809e-05,
      "loss": 0.822,
      "step": 288960
    },
    {
      "epoch": 1.012767806762066,
      "grad_norm": 3.03125,
      "learning_rate": 3.680070406629438e-05,
      "loss": 0.8914,
      "step": 288970
    },
    {
      "epoch": 1.0128028542689615,
      "grad_norm": 2.828125,
      "learning_rate": 3.680005503763068e-05,
      "loss": 0.8915,
      "step": 288980
    },
    {
      "epoch": 1.012837901775857,
      "grad_norm": 2.921875,
      "learning_rate": 3.679940600896698e-05,
      "loss": 0.7967,
      "step": 288990
    },
    {
      "epoch": 1.0128729492827528,
      "grad_norm": 3.21875,
      "learning_rate": 3.679875698030328e-05,
      "loss": 0.8526,
      "step": 289000
    },
    {
      "epoch": 1.0129079967896484,
      "grad_norm": 3.046875,
      "learning_rate": 3.679810795163957e-05,
      "loss": 0.8647,
      "step": 289010
    },
    {
      "epoch": 1.012943044296544,
      "grad_norm": 2.703125,
      "learning_rate": 3.6797458922975875e-05,
      "loss": 0.866,
      "step": 289020
    },
    {
      "epoch": 1.0129780918034395,
      "grad_norm": 2.71875,
      "learning_rate": 3.6796809894312176e-05,
      "loss": 0.7893,
      "step": 289030
    },
    {
      "epoch": 1.0130131393103352,
      "grad_norm": 3.171875,
      "learning_rate": 3.679616086564847e-05,
      "loss": 0.9054,
      "step": 289040
    },
    {
      "epoch": 1.0130481868172307,
      "grad_norm": 2.859375,
      "learning_rate": 3.679551183698477e-05,
      "loss": 0.8697,
      "step": 289050
    },
    {
      "epoch": 1.0130832343241263,
      "grad_norm": 3.484375,
      "learning_rate": 3.679486280832107e-05,
      "loss": 0.9492,
      "step": 289060
    },
    {
      "epoch": 1.013118281831022,
      "grad_norm": 2.75,
      "learning_rate": 3.679421377965737e-05,
      "loss": 0.8998,
      "step": 289070
    },
    {
      "epoch": 1.0131533293379176,
      "grad_norm": 3.234375,
      "learning_rate": 3.679356475099366e-05,
      "loss": 1.0429,
      "step": 289080
    },
    {
      "epoch": 1.013188376844813,
      "grad_norm": 2.84375,
      "learning_rate": 3.6792915722329964e-05,
      "loss": 0.8849,
      "step": 289090
    },
    {
      "epoch": 1.0132234243517086,
      "grad_norm": 2.671875,
      "learning_rate": 3.679226669366626e-05,
      "loss": 0.8353,
      "step": 289100
    },
    {
      "epoch": 1.0132584718586044,
      "grad_norm": 3.140625,
      "learning_rate": 3.679161766500256e-05,
      "loss": 0.8374,
      "step": 289110
    },
    {
      "epoch": 1.0132935193655,
      "grad_norm": 2.828125,
      "learning_rate": 3.6790968636338855e-05,
      "loss": 0.9313,
      "step": 289120
    },
    {
      "epoch": 1.0133285668723955,
      "grad_norm": 3.171875,
      "learning_rate": 3.6790319607675156e-05,
      "loss": 0.8495,
      "step": 289130
    },
    {
      "epoch": 1.0133636143792912,
      "grad_norm": 3.390625,
      "learning_rate": 3.678967057901145e-05,
      "loss": 0.8847,
      "step": 289140
    },
    {
      "epoch": 1.0133986618861868,
      "grad_norm": 3.015625,
      "learning_rate": 3.678902155034775e-05,
      "loss": 0.8401,
      "step": 289150
    },
    {
      "epoch": 1.0134337093930823,
      "grad_norm": 3.03125,
      "learning_rate": 3.6788372521684053e-05,
      "loss": 0.8693,
      "step": 289160
    },
    {
      "epoch": 1.0134687568999778,
      "grad_norm": 3.328125,
      "learning_rate": 3.678772349302035e-05,
      "loss": 0.8394,
      "step": 289170
    },
    {
      "epoch": 1.0135038044068736,
      "grad_norm": 2.765625,
      "learning_rate": 3.678707446435665e-05,
      "loss": 0.8502,
      "step": 289180
    },
    {
      "epoch": 1.0135388519137691,
      "grad_norm": 2.765625,
      "learning_rate": 3.6786425435692944e-05,
      "loss": 0.8592,
      "step": 289190
    },
    {
      "epoch": 1.0135738994206647,
      "grad_norm": 3.3125,
      "learning_rate": 3.6785776407029245e-05,
      "loss": 0.8777,
      "step": 289200
    },
    {
      "epoch": 1.0136089469275602,
      "grad_norm": 2.890625,
      "learning_rate": 3.678512737836554e-05,
      "loss": 0.8794,
      "step": 289210
    },
    {
      "epoch": 1.013643994434456,
      "grad_norm": 3.171875,
      "learning_rate": 3.678447834970184e-05,
      "loss": 0.8287,
      "step": 289220
    },
    {
      "epoch": 1.0136790419413515,
      "grad_norm": 3.015625,
      "learning_rate": 3.6783829321038136e-05,
      "loss": 0.8119,
      "step": 289230
    },
    {
      "epoch": 1.013714089448247,
      "grad_norm": 2.6875,
      "learning_rate": 3.678318029237444e-05,
      "loss": 0.8714,
      "step": 289240
    },
    {
      "epoch": 1.0137491369551428,
      "grad_norm": 2.765625,
      "learning_rate": 3.678253126371073e-05,
      "loss": 0.8699,
      "step": 289250
    },
    {
      "epoch": 1.0137841844620383,
      "grad_norm": 2.859375,
      "learning_rate": 3.678188223504703e-05,
      "loss": 0.8654,
      "step": 289260
    },
    {
      "epoch": 1.0138192319689339,
      "grad_norm": 3.34375,
      "learning_rate": 3.678123320638333e-05,
      "loss": 0.8553,
      "step": 289270
    },
    {
      "epoch": 1.0138542794758294,
      "grad_norm": 3.0625,
      "learning_rate": 3.678058417771963e-05,
      "loss": 0.8578,
      "step": 289280
    },
    {
      "epoch": 1.0138893269827252,
      "grad_norm": 3.1875,
      "learning_rate": 3.677993514905593e-05,
      "loss": 0.834,
      "step": 289290
    },
    {
      "epoch": 1.0139243744896207,
      "grad_norm": 3.109375,
      "learning_rate": 3.677928612039222e-05,
      "loss": 0.8833,
      "step": 289300
    },
    {
      "epoch": 1.0139594219965162,
      "grad_norm": 3.359375,
      "learning_rate": 3.677863709172852e-05,
      "loss": 0.8615,
      "step": 289310
    },
    {
      "epoch": 1.0139944695034118,
      "grad_norm": 2.765625,
      "learning_rate": 3.6777988063064815e-05,
      "loss": 0.8967,
      "step": 289320
    },
    {
      "epoch": 1.0140295170103075,
      "grad_norm": 2.859375,
      "learning_rate": 3.6777339034401116e-05,
      "loss": 0.8297,
      "step": 289330
    },
    {
      "epoch": 1.014064564517203,
      "grad_norm": 3.140625,
      "learning_rate": 3.677669000573741e-05,
      "loss": 0.9147,
      "step": 289340
    },
    {
      "epoch": 1.0140996120240986,
      "grad_norm": 2.921875,
      "learning_rate": 3.677604097707371e-05,
      "loss": 0.8142,
      "step": 289350
    },
    {
      "epoch": 1.0141346595309944,
      "grad_norm": 2.90625,
      "learning_rate": 3.6775391948410007e-05,
      "loss": 0.8711,
      "step": 289360
    },
    {
      "epoch": 1.01416970703789,
      "grad_norm": 3.1875,
      "learning_rate": 3.677474291974631e-05,
      "loss": 0.8739,
      "step": 289370
    },
    {
      "epoch": 1.0142047545447854,
      "grad_norm": 3.125,
      "learning_rate": 3.677409389108261e-05,
      "loss": 0.9084,
      "step": 289380
    },
    {
      "epoch": 1.014239802051681,
      "grad_norm": 3.0625,
      "learning_rate": 3.6773444862418904e-05,
      "loss": 0.8872,
      "step": 289390
    },
    {
      "epoch": 1.0142748495585767,
      "grad_norm": 2.828125,
      "learning_rate": 3.6772795833755205e-05,
      "loss": 0.8203,
      "step": 289400
    },
    {
      "epoch": 1.0143098970654723,
      "grad_norm": 2.796875,
      "learning_rate": 3.67721468050915e-05,
      "loss": 0.8018,
      "step": 289410
    },
    {
      "epoch": 1.0143449445723678,
      "grad_norm": 3.046875,
      "learning_rate": 3.67714977764278e-05,
      "loss": 0.917,
      "step": 289420
    },
    {
      "epoch": 1.0143799920792635,
      "grad_norm": 3.375,
      "learning_rate": 3.6770848747764096e-05,
      "loss": 0.9069,
      "step": 289430
    },
    {
      "epoch": 1.014415039586159,
      "grad_norm": 2.515625,
      "learning_rate": 3.67701997191004e-05,
      "loss": 0.8416,
      "step": 289440
    },
    {
      "epoch": 1.0144500870930546,
      "grad_norm": 2.4375,
      "learning_rate": 3.676955069043669e-05,
      "loss": 0.7834,
      "step": 289450
    },
    {
      "epoch": 1.0144851345999502,
      "grad_norm": 2.84375,
      "learning_rate": 3.676890166177299e-05,
      "loss": 0.928,
      "step": 289460
    },
    {
      "epoch": 1.014520182106846,
      "grad_norm": 3.171875,
      "learning_rate": 3.676825263310929e-05,
      "loss": 1.0278,
      "step": 289470
    },
    {
      "epoch": 1.0145552296137414,
      "grad_norm": 3.4375,
      "learning_rate": 3.676760360444559e-05,
      "loss": 0.9001,
      "step": 289480
    },
    {
      "epoch": 1.014590277120637,
      "grad_norm": 2.953125,
      "learning_rate": 3.6766954575781884e-05,
      "loss": 0.8556,
      "step": 289490
    },
    {
      "epoch": 1.0146253246275325,
      "grad_norm": 2.609375,
      "learning_rate": 3.6766305547118185e-05,
      "loss": 0.7801,
      "step": 289500
    },
    {
      "epoch": 1.0146603721344283,
      "grad_norm": 3.03125,
      "learning_rate": 3.676565651845448e-05,
      "loss": 0.8614,
      "step": 289510
    },
    {
      "epoch": 1.0146954196413238,
      "grad_norm": 3.21875,
      "learning_rate": 3.676500748979078e-05,
      "loss": 0.9344,
      "step": 289520
    },
    {
      "epoch": 1.0147304671482194,
      "grad_norm": 2.953125,
      "learning_rate": 3.676435846112708e-05,
      "loss": 0.8653,
      "step": 289530
    },
    {
      "epoch": 1.014765514655115,
      "grad_norm": 3.203125,
      "learning_rate": 3.676370943246338e-05,
      "loss": 0.9208,
      "step": 289540
    },
    {
      "epoch": 1.0148005621620106,
      "grad_norm": 3.140625,
      "learning_rate": 3.676306040379968e-05,
      "loss": 0.9168,
      "step": 289550
    },
    {
      "epoch": 1.0148356096689062,
      "grad_norm": 2.5,
      "learning_rate": 3.676241137513597e-05,
      "loss": 0.8344,
      "step": 289560
    },
    {
      "epoch": 1.0148706571758017,
      "grad_norm": 2.890625,
      "learning_rate": 3.6761762346472275e-05,
      "loss": 0.8665,
      "step": 289570
    },
    {
      "epoch": 1.0149057046826975,
      "grad_norm": 2.53125,
      "learning_rate": 3.676111331780857e-05,
      "loss": 0.805,
      "step": 289580
    },
    {
      "epoch": 1.014940752189593,
      "grad_norm": 2.953125,
      "learning_rate": 3.676046428914487e-05,
      "loss": 0.8465,
      "step": 289590
    },
    {
      "epoch": 1.0149757996964885,
      "grad_norm": 3.109375,
      "learning_rate": 3.6759815260481165e-05,
      "loss": 0.9489,
      "step": 289600
    },
    {
      "epoch": 1.015010847203384,
      "grad_norm": 3.53125,
      "learning_rate": 3.675916623181747e-05,
      "loss": 0.8984,
      "step": 289610
    },
    {
      "epoch": 1.0150458947102798,
      "grad_norm": 3.078125,
      "learning_rate": 3.675851720315376e-05,
      "loss": 0.8499,
      "step": 289620
    },
    {
      "epoch": 1.0150809422171754,
      "grad_norm": 2.78125,
      "learning_rate": 3.675786817449006e-05,
      "loss": 0.8421,
      "step": 289630
    },
    {
      "epoch": 1.015115989724071,
      "grad_norm": 2.859375,
      "learning_rate": 3.675721914582636e-05,
      "loss": 0.9906,
      "step": 289640
    },
    {
      "epoch": 1.0151510372309667,
      "grad_norm": 2.671875,
      "learning_rate": 3.675657011716266e-05,
      "loss": 0.8764,
      "step": 289650
    },
    {
      "epoch": 1.0151860847378622,
      "grad_norm": 3.09375,
      "learning_rate": 3.675592108849896e-05,
      "loss": 0.7632,
      "step": 289660
    },
    {
      "epoch": 1.0152211322447577,
      "grad_norm": 2.703125,
      "learning_rate": 3.675527205983525e-05,
      "loss": 0.9287,
      "step": 289670
    },
    {
      "epoch": 1.0152561797516533,
      "grad_norm": 3.1875,
      "learning_rate": 3.675462303117155e-05,
      "loss": 0.8652,
      "step": 289680
    },
    {
      "epoch": 1.015291227258549,
      "grad_norm": 3.109375,
      "learning_rate": 3.6753974002507844e-05,
      "loss": 0.8433,
      "step": 289690
    },
    {
      "epoch": 1.0153262747654446,
      "grad_norm": 2.671875,
      "learning_rate": 3.6753324973844145e-05,
      "loss": 0.9134,
      "step": 289700
    },
    {
      "epoch": 1.01536132227234,
      "grad_norm": 2.75,
      "learning_rate": 3.675267594518044e-05,
      "loss": 0.9006,
      "step": 289710
    },
    {
      "epoch": 1.0153963697792359,
      "grad_norm": 3.03125,
      "learning_rate": 3.675202691651674e-05,
      "loss": 0.8201,
      "step": 289720
    },
    {
      "epoch": 1.0154314172861314,
      "grad_norm": 3.21875,
      "learning_rate": 3.6751377887853036e-05,
      "loss": 0.9043,
      "step": 289730
    },
    {
      "epoch": 1.015466464793027,
      "grad_norm": 3.03125,
      "learning_rate": 3.675072885918934e-05,
      "loss": 0.8329,
      "step": 289740
    },
    {
      "epoch": 1.0155015122999225,
      "grad_norm": 2.875,
      "learning_rate": 3.675007983052564e-05,
      "loss": 0.8208,
      "step": 289750
    },
    {
      "epoch": 1.0155365598068182,
      "grad_norm": 3.234375,
      "learning_rate": 3.674943080186193e-05,
      "loss": 0.9068,
      "step": 289760
    },
    {
      "epoch": 1.0155716073137138,
      "grad_norm": 3.140625,
      "learning_rate": 3.6748781773198235e-05,
      "loss": 0.9154,
      "step": 289770
    },
    {
      "epoch": 1.0156066548206093,
      "grad_norm": 2.8125,
      "learning_rate": 3.674813274453453e-05,
      "loss": 0.8776,
      "step": 289780
    },
    {
      "epoch": 1.0156417023275048,
      "grad_norm": 3.3125,
      "learning_rate": 3.674748371587083e-05,
      "loss": 0.9018,
      "step": 289790
    },
    {
      "epoch": 1.0156767498344006,
      "grad_norm": 3.3125,
      "learning_rate": 3.6746834687207125e-05,
      "loss": 0.8151,
      "step": 289800
    },
    {
      "epoch": 1.0157117973412961,
      "grad_norm": 3.015625,
      "learning_rate": 3.674618565854343e-05,
      "loss": 0.8757,
      "step": 289810
    },
    {
      "epoch": 1.0157468448481917,
      "grad_norm": 2.53125,
      "learning_rate": 3.674553662987972e-05,
      "loss": 0.8993,
      "step": 289820
    },
    {
      "epoch": 1.0157818923550874,
      "grad_norm": 3.03125,
      "learning_rate": 3.674488760121602e-05,
      "loss": 0.8196,
      "step": 289830
    },
    {
      "epoch": 1.015816939861983,
      "grad_norm": 2.78125,
      "learning_rate": 3.674423857255232e-05,
      "loss": 0.8769,
      "step": 289840
    },
    {
      "epoch": 1.0158519873688785,
      "grad_norm": 2.703125,
      "learning_rate": 3.674358954388862e-05,
      "loss": 0.7273,
      "step": 289850
    },
    {
      "epoch": 1.015887034875774,
      "grad_norm": 2.796875,
      "learning_rate": 3.674294051522491e-05,
      "loss": 0.8641,
      "step": 289860
    },
    {
      "epoch": 1.0159220823826698,
      "grad_norm": 2.859375,
      "learning_rate": 3.6742291486561215e-05,
      "loss": 0.8539,
      "step": 289870
    },
    {
      "epoch": 1.0159571298895653,
      "grad_norm": 2.84375,
      "learning_rate": 3.674164245789751e-05,
      "loss": 0.8583,
      "step": 289880
    },
    {
      "epoch": 1.0159921773964609,
      "grad_norm": 2.734375,
      "learning_rate": 3.674099342923381e-05,
      "loss": 0.8077,
      "step": 289890
    },
    {
      "epoch": 1.0160272249033564,
      "grad_norm": 2.46875,
      "learning_rate": 3.674034440057011e-05,
      "loss": 0.8245,
      "step": 289900
    },
    {
      "epoch": 1.0160622724102522,
      "grad_norm": 3.125,
      "learning_rate": 3.673969537190641e-05,
      "loss": 0.8928,
      "step": 289910
    },
    {
      "epoch": 1.0160973199171477,
      "grad_norm": 3.5,
      "learning_rate": 3.673904634324271e-05,
      "loss": 0.8591,
      "step": 289920
    },
    {
      "epoch": 1.0161323674240432,
      "grad_norm": 2.921875,
      "learning_rate": 3.6738397314579e-05,
      "loss": 0.8467,
      "step": 289930
    },
    {
      "epoch": 1.016167414930939,
      "grad_norm": 2.546875,
      "learning_rate": 3.6737748285915304e-05,
      "loss": 0.8502,
      "step": 289940
    },
    {
      "epoch": 1.0162024624378345,
      "grad_norm": 2.796875,
      "learning_rate": 3.67370992572516e-05,
      "loss": 0.9131,
      "step": 289950
    },
    {
      "epoch": 1.01623750994473,
      "grad_norm": 3.0,
      "learning_rate": 3.67364502285879e-05,
      "loss": 0.8885,
      "step": 289960
    },
    {
      "epoch": 1.0162725574516256,
      "grad_norm": 3.171875,
      "learning_rate": 3.6735801199924195e-05,
      "loss": 0.863,
      "step": 289970
    },
    {
      "epoch": 1.0163076049585213,
      "grad_norm": 2.59375,
      "learning_rate": 3.6735152171260496e-05,
      "loss": 0.7771,
      "step": 289980
    },
    {
      "epoch": 1.0163426524654169,
      "grad_norm": 3.015625,
      "learning_rate": 3.673450314259679e-05,
      "loss": 0.9228,
      "step": 289990
    },
    {
      "epoch": 1.0163776999723124,
      "grad_norm": 2.59375,
      "learning_rate": 3.673385411393309e-05,
      "loss": 0.8163,
      "step": 290000
    },
    {
      "epoch": 1.0163776999723124,
      "eval_loss": 0.818240761756897,
      "eval_runtime": 554.1695,
      "eval_samples_per_second": 686.498,
      "eval_steps_per_second": 57.208,
      "step": 290000
    },
    {
      "epoch": 1.0164127474792082,
      "grad_norm": 2.796875,
      "learning_rate": 3.673320508526939e-05,
      "loss": 0.8559,
      "step": 290010
    },
    {
      "epoch": 1.0164477949861037,
      "grad_norm": 3.265625,
      "learning_rate": 3.673255605660569e-05,
      "loss": 0.8501,
      "step": 290020
    },
    {
      "epoch": 1.0164828424929993,
      "grad_norm": 3.0,
      "learning_rate": 3.673190702794199e-05,
      "loss": 0.9229,
      "step": 290030
    },
    {
      "epoch": 1.0165178899998948,
      "grad_norm": 2.96875,
      "learning_rate": 3.6731257999278284e-05,
      "loss": 0.8318,
      "step": 290040
    },
    {
      "epoch": 1.0165529375067905,
      "grad_norm": 3.0,
      "learning_rate": 3.673060897061458e-05,
      "loss": 0.8417,
      "step": 290050
    },
    {
      "epoch": 1.016587985013686,
      "grad_norm": 2.4375,
      "learning_rate": 3.672995994195087e-05,
      "loss": 0.8475,
      "step": 290060
    },
    {
      "epoch": 1.0166230325205816,
      "grad_norm": 2.9375,
      "learning_rate": 3.6729310913287175e-05,
      "loss": 0.8369,
      "step": 290070
    },
    {
      "epoch": 1.0166580800274772,
      "grad_norm": 2.609375,
      "learning_rate": 3.672866188462347e-05,
      "loss": 0.9305,
      "step": 290080
    },
    {
      "epoch": 1.016693127534373,
      "grad_norm": 3.0625,
      "learning_rate": 3.672801285595977e-05,
      "loss": 0.8598,
      "step": 290090
    },
    {
      "epoch": 1.0167281750412684,
      "grad_norm": 2.875,
      "learning_rate": 3.6727363827296065e-05,
      "loss": 0.8057,
      "step": 290100
    },
    {
      "epoch": 1.016763222548164,
      "grad_norm": 2.375,
      "learning_rate": 3.672671479863237e-05,
      "loss": 0.753,
      "step": 290110
    },
    {
      "epoch": 1.0167982700550597,
      "grad_norm": 2.625,
      "learning_rate": 3.672606576996867e-05,
      "loss": 0.89,
      "step": 290120
    },
    {
      "epoch": 1.0168333175619553,
      "grad_norm": 3.015625,
      "learning_rate": 3.672541674130496e-05,
      "loss": 0.8414,
      "step": 290130
    },
    {
      "epoch": 1.0168683650688508,
      "grad_norm": 2.90625,
      "learning_rate": 3.6724767712641264e-05,
      "loss": 0.8511,
      "step": 290140
    },
    {
      "epoch": 1.0169034125757463,
      "grad_norm": 2.96875,
      "learning_rate": 3.672411868397756e-05,
      "loss": 0.9155,
      "step": 290150
    },
    {
      "epoch": 1.016938460082642,
      "grad_norm": 2.546875,
      "learning_rate": 3.672346965531386e-05,
      "loss": 0.8685,
      "step": 290160
    },
    {
      "epoch": 1.0169735075895376,
      "grad_norm": 3.0625,
      "learning_rate": 3.6722820626650155e-05,
      "loss": 0.9043,
      "step": 290170
    },
    {
      "epoch": 1.0170085550964332,
      "grad_norm": 3.265625,
      "learning_rate": 3.6722171597986456e-05,
      "loss": 0.9084,
      "step": 290180
    },
    {
      "epoch": 1.0170436026033287,
      "grad_norm": 2.78125,
      "learning_rate": 3.672152256932275e-05,
      "loss": 0.8396,
      "step": 290190
    },
    {
      "epoch": 1.0170786501102245,
      "grad_norm": 2.921875,
      "learning_rate": 3.672087354065905e-05,
      "loss": 0.9421,
      "step": 290200
    },
    {
      "epoch": 1.01711369761712,
      "grad_norm": 2.828125,
      "learning_rate": 3.672022451199535e-05,
      "loss": 0.843,
      "step": 290210
    },
    {
      "epoch": 1.0171487451240155,
      "grad_norm": 2.71875,
      "learning_rate": 3.671957548333165e-05,
      "loss": 0.8068,
      "step": 290220
    },
    {
      "epoch": 1.0171837926309113,
      "grad_norm": 2.625,
      "learning_rate": 3.671892645466794e-05,
      "loss": 0.8661,
      "step": 290230
    },
    {
      "epoch": 1.0172188401378068,
      "grad_norm": 2.515625,
      "learning_rate": 3.6718277426004244e-05,
      "loss": 0.7989,
      "step": 290240
    },
    {
      "epoch": 1.0172538876447024,
      "grad_norm": 3.234375,
      "learning_rate": 3.6717628397340546e-05,
      "loss": 0.8484,
      "step": 290250
    },
    {
      "epoch": 1.017288935151598,
      "grad_norm": 2.828125,
      "learning_rate": 3.671697936867684e-05,
      "loss": 0.845,
      "step": 290260
    },
    {
      "epoch": 1.0173239826584937,
      "grad_norm": 3.28125,
      "learning_rate": 3.671633034001314e-05,
      "loss": 0.9365,
      "step": 290270
    },
    {
      "epoch": 1.0173590301653892,
      "grad_norm": 3.0,
      "learning_rate": 3.6715681311349436e-05,
      "loss": 0.8629,
      "step": 290280
    },
    {
      "epoch": 1.0173940776722847,
      "grad_norm": 3.15625,
      "learning_rate": 3.671503228268574e-05,
      "loss": 0.8703,
      "step": 290290
    },
    {
      "epoch": 1.0174291251791803,
      "grad_norm": 2.875,
      "learning_rate": 3.671438325402203e-05,
      "loss": 0.7979,
      "step": 290300
    },
    {
      "epoch": 1.017464172686076,
      "grad_norm": 3.03125,
      "learning_rate": 3.6713734225358334e-05,
      "loss": 0.8735,
      "step": 290310
    },
    {
      "epoch": 1.0174992201929716,
      "grad_norm": 3.234375,
      "learning_rate": 3.671308519669463e-05,
      "loss": 0.9243,
      "step": 290320
    },
    {
      "epoch": 1.017534267699867,
      "grad_norm": 3.421875,
      "learning_rate": 3.671243616803093e-05,
      "loss": 0.8378,
      "step": 290330
    },
    {
      "epoch": 1.0175693152067629,
      "grad_norm": 3.109375,
      "learning_rate": 3.6711787139367224e-05,
      "loss": 0.8469,
      "step": 290340
    },
    {
      "epoch": 1.0176043627136584,
      "grad_norm": 2.921875,
      "learning_rate": 3.6711138110703526e-05,
      "loss": 0.8245,
      "step": 290350
    },
    {
      "epoch": 1.017639410220554,
      "grad_norm": 3.0,
      "learning_rate": 3.671048908203982e-05,
      "loss": 0.8993,
      "step": 290360
    },
    {
      "epoch": 1.0176744577274495,
      "grad_norm": 3.703125,
      "learning_rate": 3.670984005337612e-05,
      "loss": 0.977,
      "step": 290370
    },
    {
      "epoch": 1.0177095052343452,
      "grad_norm": 2.71875,
      "learning_rate": 3.6709191024712416e-05,
      "loss": 0.8086,
      "step": 290380
    },
    {
      "epoch": 1.0177445527412408,
      "grad_norm": 3.140625,
      "learning_rate": 3.670854199604872e-05,
      "loss": 0.8752,
      "step": 290390
    },
    {
      "epoch": 1.0177796002481363,
      "grad_norm": 3.03125,
      "learning_rate": 3.670789296738502e-05,
      "loss": 0.9669,
      "step": 290400
    },
    {
      "epoch": 1.017814647755032,
      "grad_norm": 2.640625,
      "learning_rate": 3.6707243938721314e-05,
      "loss": 0.855,
      "step": 290410
    },
    {
      "epoch": 1.0178496952619276,
      "grad_norm": 2.46875,
      "learning_rate": 3.6706594910057615e-05,
      "loss": 0.909,
      "step": 290420
    },
    {
      "epoch": 1.0178847427688231,
      "grad_norm": 2.640625,
      "learning_rate": 3.67059458813939e-05,
      "loss": 0.9158,
      "step": 290430
    },
    {
      "epoch": 1.0179197902757187,
      "grad_norm": 3.109375,
      "learning_rate": 3.6705296852730204e-05,
      "loss": 0.9546,
      "step": 290440
    },
    {
      "epoch": 1.0179548377826144,
      "grad_norm": 3.28125,
      "learning_rate": 3.67046478240665e-05,
      "loss": 0.9187,
      "step": 290450
    },
    {
      "epoch": 1.01798988528951,
      "grad_norm": 3.046875,
      "learning_rate": 3.67039987954028e-05,
      "loss": 0.8251,
      "step": 290460
    },
    {
      "epoch": 1.0180249327964055,
      "grad_norm": 2.9375,
      "learning_rate": 3.6703349766739095e-05,
      "loss": 0.815,
      "step": 290470
    },
    {
      "epoch": 1.018059980303301,
      "grad_norm": 3.25,
      "learning_rate": 3.6702700738075396e-05,
      "loss": 0.8579,
      "step": 290480
    },
    {
      "epoch": 1.0180950278101968,
      "grad_norm": 3.0,
      "learning_rate": 3.67020517094117e-05,
      "loss": 0.9203,
      "step": 290490
    },
    {
      "epoch": 1.0181300753170923,
      "grad_norm": 3.171875,
      "learning_rate": 3.670140268074799e-05,
      "loss": 0.838,
      "step": 290500
    },
    {
      "epoch": 1.0181651228239879,
      "grad_norm": 2.734375,
      "learning_rate": 3.6700753652084294e-05,
      "loss": 0.8117,
      "step": 290510
    },
    {
      "epoch": 1.0182001703308836,
      "grad_norm": 3.09375,
      "learning_rate": 3.670010462342059e-05,
      "loss": 0.8905,
      "step": 290520
    },
    {
      "epoch": 1.0182352178377791,
      "grad_norm": 3.21875,
      "learning_rate": 3.669945559475689e-05,
      "loss": 0.8829,
      "step": 290530
    },
    {
      "epoch": 1.0182702653446747,
      "grad_norm": 2.9375,
      "learning_rate": 3.6698806566093184e-05,
      "loss": 0.9707,
      "step": 290540
    },
    {
      "epoch": 1.0183053128515702,
      "grad_norm": 3.1875,
      "learning_rate": 3.6698157537429486e-05,
      "loss": 0.896,
      "step": 290550
    },
    {
      "epoch": 1.018340360358466,
      "grad_norm": 3.109375,
      "learning_rate": 3.669750850876578e-05,
      "loss": 0.8159,
      "step": 290560
    },
    {
      "epoch": 1.0183754078653615,
      "grad_norm": 3.0,
      "learning_rate": 3.669685948010208e-05,
      "loss": 0.821,
      "step": 290570
    },
    {
      "epoch": 1.018410455372257,
      "grad_norm": 3.203125,
      "learning_rate": 3.6696210451438376e-05,
      "loss": 0.9244,
      "step": 290580
    },
    {
      "epoch": 1.0184455028791526,
      "grad_norm": 2.828125,
      "learning_rate": 3.669556142277468e-05,
      "loss": 0.9373,
      "step": 290590
    },
    {
      "epoch": 1.0184805503860483,
      "grad_norm": 3.125,
      "learning_rate": 3.669491239411097e-05,
      "loss": 0.882,
      "step": 290600
    },
    {
      "epoch": 1.0185155978929439,
      "grad_norm": 3.0,
      "learning_rate": 3.6694263365447274e-05,
      "loss": 0.8728,
      "step": 290610
    },
    {
      "epoch": 1.0185506453998394,
      "grad_norm": 2.6875,
      "learning_rate": 3.6693614336783575e-05,
      "loss": 0.7808,
      "step": 290620
    },
    {
      "epoch": 1.0185856929067352,
      "grad_norm": 3.15625,
      "learning_rate": 3.669296530811987e-05,
      "loss": 0.8695,
      "step": 290630
    },
    {
      "epoch": 1.0186207404136307,
      "grad_norm": 2.921875,
      "learning_rate": 3.669231627945617e-05,
      "loss": 0.9034,
      "step": 290640
    },
    {
      "epoch": 1.0186557879205262,
      "grad_norm": 3.15625,
      "learning_rate": 3.6691667250792466e-05,
      "loss": 0.8816,
      "step": 290650
    },
    {
      "epoch": 1.0186908354274218,
      "grad_norm": 3.078125,
      "learning_rate": 3.669101822212877e-05,
      "loss": 0.8644,
      "step": 290660
    },
    {
      "epoch": 1.0187258829343175,
      "grad_norm": 2.578125,
      "learning_rate": 3.669036919346506e-05,
      "loss": 0.9292,
      "step": 290670
    },
    {
      "epoch": 1.018760930441213,
      "grad_norm": 2.71875,
      "learning_rate": 3.668972016480136e-05,
      "loss": 0.9033,
      "step": 290680
    },
    {
      "epoch": 1.0187959779481086,
      "grad_norm": 2.75,
      "learning_rate": 3.668907113613766e-05,
      "loss": 0.864,
      "step": 290690
    },
    {
      "epoch": 1.0188310254550044,
      "grad_norm": 2.875,
      "learning_rate": 3.668842210747396e-05,
      "loss": 0.8432,
      "step": 290700
    },
    {
      "epoch": 1.0188660729619,
      "grad_norm": 2.75,
      "learning_rate": 3.6687773078810254e-05,
      "loss": 0.8472,
      "step": 290710
    },
    {
      "epoch": 1.0189011204687954,
      "grad_norm": 2.796875,
      "learning_rate": 3.6687124050146555e-05,
      "loss": 0.803,
      "step": 290720
    },
    {
      "epoch": 1.018936167975691,
      "grad_norm": 2.890625,
      "learning_rate": 3.668647502148285e-05,
      "loss": 0.8381,
      "step": 290730
    },
    {
      "epoch": 1.0189712154825867,
      "grad_norm": 3.015625,
      "learning_rate": 3.668582599281915e-05,
      "loss": 0.8224,
      "step": 290740
    },
    {
      "epoch": 1.0190062629894823,
      "grad_norm": 2.96875,
      "learning_rate": 3.6685176964155446e-05,
      "loss": 0.8094,
      "step": 290750
    },
    {
      "epoch": 1.0190413104963778,
      "grad_norm": 3.09375,
      "learning_rate": 3.668452793549175e-05,
      "loss": 0.8439,
      "step": 290760
    },
    {
      "epoch": 1.0190763580032733,
      "grad_norm": 2.9375,
      "learning_rate": 3.668387890682805e-05,
      "loss": 0.8465,
      "step": 290770
    },
    {
      "epoch": 1.019111405510169,
      "grad_norm": 3.0,
      "learning_rate": 3.668322987816434e-05,
      "loss": 0.7857,
      "step": 290780
    },
    {
      "epoch": 1.0191464530170646,
      "grad_norm": 2.671875,
      "learning_rate": 3.6682580849500644e-05,
      "loss": 0.8257,
      "step": 290790
    },
    {
      "epoch": 1.0191815005239602,
      "grad_norm": 3.359375,
      "learning_rate": 3.668193182083693e-05,
      "loss": 0.9629,
      "step": 290800
    },
    {
      "epoch": 1.019216548030856,
      "grad_norm": 2.984375,
      "learning_rate": 3.6681282792173234e-05,
      "loss": 0.9626,
      "step": 290810
    },
    {
      "epoch": 1.0192515955377515,
      "grad_norm": 2.984375,
      "learning_rate": 3.668063376350953e-05,
      "loss": 0.8887,
      "step": 290820
    },
    {
      "epoch": 1.019286643044647,
      "grad_norm": 2.9375,
      "learning_rate": 3.667998473484583e-05,
      "loss": 0.8015,
      "step": 290830
    },
    {
      "epoch": 1.0193216905515425,
      "grad_norm": 2.59375,
      "learning_rate": 3.6679335706182124e-05,
      "loss": 0.8964,
      "step": 290840
    },
    {
      "epoch": 1.0193567380584383,
      "grad_norm": 2.875,
      "learning_rate": 3.6678686677518426e-05,
      "loss": 0.8757,
      "step": 290850
    },
    {
      "epoch": 1.0193917855653338,
      "grad_norm": 2.796875,
      "learning_rate": 3.667803764885473e-05,
      "loss": 0.9927,
      "step": 290860
    },
    {
      "epoch": 1.0194268330722294,
      "grad_norm": 3.046875,
      "learning_rate": 3.667738862019102e-05,
      "loss": 0.8433,
      "step": 290870
    },
    {
      "epoch": 1.019461880579125,
      "grad_norm": 3.15625,
      "learning_rate": 3.667673959152732e-05,
      "loss": 0.8744,
      "step": 290880
    },
    {
      "epoch": 1.0194969280860207,
      "grad_norm": 3.109375,
      "learning_rate": 3.667609056286362e-05,
      "loss": 0.9629,
      "step": 290890
    },
    {
      "epoch": 1.0195319755929162,
      "grad_norm": 2.859375,
      "learning_rate": 3.667544153419992e-05,
      "loss": 0.7865,
      "step": 290900
    },
    {
      "epoch": 1.0195670230998117,
      "grad_norm": 3.15625,
      "learning_rate": 3.6674792505536214e-05,
      "loss": 0.9159,
      "step": 290910
    },
    {
      "epoch": 1.0196020706067075,
      "grad_norm": 3.125,
      "learning_rate": 3.6674143476872515e-05,
      "loss": 0.816,
      "step": 290920
    },
    {
      "epoch": 1.019637118113603,
      "grad_norm": 2.984375,
      "learning_rate": 3.667349444820881e-05,
      "loss": 0.8531,
      "step": 290930
    },
    {
      "epoch": 1.0196721656204986,
      "grad_norm": 3.328125,
      "learning_rate": 3.667284541954511e-05,
      "loss": 0.9023,
      "step": 290940
    },
    {
      "epoch": 1.019707213127394,
      "grad_norm": 2.71875,
      "learning_rate": 3.6672196390881406e-05,
      "loss": 0.8113,
      "step": 290950
    },
    {
      "epoch": 1.0197422606342899,
      "grad_norm": 3.046875,
      "learning_rate": 3.667154736221771e-05,
      "loss": 0.9805,
      "step": 290960
    },
    {
      "epoch": 1.0197773081411854,
      "grad_norm": 2.421875,
      "learning_rate": 3.6670898333554e-05,
      "loss": 0.8919,
      "step": 290970
    },
    {
      "epoch": 1.019812355648081,
      "grad_norm": 3.3125,
      "learning_rate": 3.66702493048903e-05,
      "loss": 0.8373,
      "step": 290980
    },
    {
      "epoch": 1.0198474031549765,
      "grad_norm": 2.609375,
      "learning_rate": 3.6669600276226604e-05,
      "loss": 0.8178,
      "step": 290990
    },
    {
      "epoch": 1.0198824506618722,
      "grad_norm": 2.859375,
      "learning_rate": 3.66689512475629e-05,
      "loss": 0.9287,
      "step": 291000
    },
    {
      "epoch": 1.0199174981687678,
      "grad_norm": 2.8125,
      "learning_rate": 3.66683022188992e-05,
      "loss": 0.8424,
      "step": 291010
    },
    {
      "epoch": 1.0199525456756633,
      "grad_norm": 3.296875,
      "learning_rate": 3.6667653190235495e-05,
      "loss": 0.8867,
      "step": 291020
    },
    {
      "epoch": 1.019987593182559,
      "grad_norm": 2.984375,
      "learning_rate": 3.6667004161571796e-05,
      "loss": 0.9213,
      "step": 291030
    },
    {
      "epoch": 1.0200226406894546,
      "grad_norm": 2.890625,
      "learning_rate": 3.666635513290809e-05,
      "loss": 0.8153,
      "step": 291040
    },
    {
      "epoch": 1.0200576881963501,
      "grad_norm": 3.28125,
      "learning_rate": 3.666570610424439e-05,
      "loss": 0.9293,
      "step": 291050
    },
    {
      "epoch": 1.0200927357032457,
      "grad_norm": 2.984375,
      "learning_rate": 3.666505707558069e-05,
      "loss": 0.8891,
      "step": 291060
    },
    {
      "epoch": 1.0201277832101414,
      "grad_norm": 2.78125,
      "learning_rate": 3.666440804691699e-05,
      "loss": 0.8903,
      "step": 291070
    },
    {
      "epoch": 1.020162830717037,
      "grad_norm": 3.3125,
      "learning_rate": 3.666375901825328e-05,
      "loss": 0.8661,
      "step": 291080
    },
    {
      "epoch": 1.0201978782239325,
      "grad_norm": 2.78125,
      "learning_rate": 3.6663109989589584e-05,
      "loss": 0.8842,
      "step": 291090
    },
    {
      "epoch": 1.0202329257308282,
      "grad_norm": 3.109375,
      "learning_rate": 3.666246096092588e-05,
      "loss": 0.8088,
      "step": 291100
    },
    {
      "epoch": 1.0202679732377238,
      "grad_norm": 3.0,
      "learning_rate": 3.666181193226218e-05,
      "loss": 0.8605,
      "step": 291110
    },
    {
      "epoch": 1.0203030207446193,
      "grad_norm": 2.921875,
      "learning_rate": 3.6661162903598475e-05,
      "loss": 0.9124,
      "step": 291120
    },
    {
      "epoch": 1.0203380682515149,
      "grad_norm": 2.734375,
      "learning_rate": 3.6660513874934776e-05,
      "loss": 0.8682,
      "step": 291130
    },
    {
      "epoch": 1.0203731157584106,
      "grad_norm": 2.96875,
      "learning_rate": 3.665986484627108e-05,
      "loss": 0.8408,
      "step": 291140
    },
    {
      "epoch": 1.0204081632653061,
      "grad_norm": 3.109375,
      "learning_rate": 3.665921581760737e-05,
      "loss": 0.9336,
      "step": 291150
    },
    {
      "epoch": 1.0204432107722017,
      "grad_norm": 2.875,
      "learning_rate": 3.6658566788943674e-05,
      "loss": 0.8818,
      "step": 291160
    },
    {
      "epoch": 1.0204782582790972,
      "grad_norm": 2.96875,
      "learning_rate": 3.665791776027997e-05,
      "loss": 0.8304,
      "step": 291170
    },
    {
      "epoch": 1.020513305785993,
      "grad_norm": 2.96875,
      "learning_rate": 3.665726873161626e-05,
      "loss": 0.9023,
      "step": 291180
    },
    {
      "epoch": 1.0205483532928885,
      "grad_norm": 3.328125,
      "learning_rate": 3.665661970295256e-05,
      "loss": 0.8607,
      "step": 291190
    },
    {
      "epoch": 1.020583400799784,
      "grad_norm": 3.125,
      "learning_rate": 3.665597067428886e-05,
      "loss": 0.8639,
      "step": 291200
    },
    {
      "epoch": 1.0206184483066798,
      "grad_norm": 2.765625,
      "learning_rate": 3.6655321645625154e-05,
      "loss": 0.8483,
      "step": 291210
    },
    {
      "epoch": 1.0206534958135753,
      "grad_norm": 2.921875,
      "learning_rate": 3.6654672616961455e-05,
      "loss": 0.7915,
      "step": 291220
    },
    {
      "epoch": 1.0206885433204709,
      "grad_norm": 3.390625,
      "learning_rate": 3.6654023588297756e-05,
      "loss": 0.9156,
      "step": 291230
    },
    {
      "epoch": 1.0207235908273664,
      "grad_norm": 2.625,
      "learning_rate": 3.665337455963405e-05,
      "loss": 0.8716,
      "step": 291240
    },
    {
      "epoch": 1.0207586383342622,
      "grad_norm": 2.578125,
      "learning_rate": 3.665272553097035e-05,
      "loss": 0.9101,
      "step": 291250
    },
    {
      "epoch": 1.0207936858411577,
      "grad_norm": 2.546875,
      "learning_rate": 3.665207650230665e-05,
      "loss": 0.8606,
      "step": 291260
    },
    {
      "epoch": 1.0208287333480532,
      "grad_norm": 2.671875,
      "learning_rate": 3.665142747364295e-05,
      "loss": 0.8252,
      "step": 291270
    },
    {
      "epoch": 1.0208637808549488,
      "grad_norm": 2.984375,
      "learning_rate": 3.665077844497924e-05,
      "loss": 0.9181,
      "step": 291280
    },
    {
      "epoch": 1.0208988283618445,
      "grad_norm": 3.0625,
      "learning_rate": 3.6650129416315544e-05,
      "loss": 0.8807,
      "step": 291290
    },
    {
      "epoch": 1.02093387586874,
      "grad_norm": 2.828125,
      "learning_rate": 3.664948038765184e-05,
      "loss": 0.8515,
      "step": 291300
    },
    {
      "epoch": 1.0209689233756356,
      "grad_norm": 2.859375,
      "learning_rate": 3.664883135898814e-05,
      "loss": 0.8673,
      "step": 291310
    },
    {
      "epoch": 1.0210039708825314,
      "grad_norm": 3.140625,
      "learning_rate": 3.6648182330324435e-05,
      "loss": 0.9115,
      "step": 291320
    },
    {
      "epoch": 1.021039018389427,
      "grad_norm": 2.703125,
      "learning_rate": 3.6647533301660736e-05,
      "loss": 0.9146,
      "step": 291330
    },
    {
      "epoch": 1.0210740658963224,
      "grad_norm": 2.734375,
      "learning_rate": 3.664688427299703e-05,
      "loss": 0.8161,
      "step": 291340
    },
    {
      "epoch": 1.021109113403218,
      "grad_norm": 2.6875,
      "learning_rate": 3.664623524433333e-05,
      "loss": 0.9035,
      "step": 291350
    },
    {
      "epoch": 1.0211441609101137,
      "grad_norm": 3.859375,
      "learning_rate": 3.6645586215669634e-05,
      "loss": 0.9434,
      "step": 291360
    },
    {
      "epoch": 1.0211792084170093,
      "grad_norm": 2.90625,
      "learning_rate": 3.664493718700593e-05,
      "loss": 0.8993,
      "step": 291370
    },
    {
      "epoch": 1.0212142559239048,
      "grad_norm": 2.5,
      "learning_rate": 3.664428815834223e-05,
      "loss": 0.869,
      "step": 291380
    },
    {
      "epoch": 1.0212493034308006,
      "grad_norm": 2.703125,
      "learning_rate": 3.6643639129678524e-05,
      "loss": 0.8586,
      "step": 291390
    },
    {
      "epoch": 1.021284350937696,
      "grad_norm": 2.90625,
      "learning_rate": 3.6642990101014826e-05,
      "loss": 0.8604,
      "step": 291400
    },
    {
      "epoch": 1.0213193984445916,
      "grad_norm": 2.71875,
      "learning_rate": 3.664234107235112e-05,
      "loss": 0.8439,
      "step": 291410
    },
    {
      "epoch": 1.0213544459514872,
      "grad_norm": 3.0625,
      "learning_rate": 3.664169204368742e-05,
      "loss": 0.8869,
      "step": 291420
    },
    {
      "epoch": 1.021389493458383,
      "grad_norm": 2.703125,
      "learning_rate": 3.6641043015023716e-05,
      "loss": 0.8728,
      "step": 291430
    },
    {
      "epoch": 1.0214245409652785,
      "grad_norm": 2.71875,
      "learning_rate": 3.664039398636002e-05,
      "loss": 0.8597,
      "step": 291440
    },
    {
      "epoch": 1.021459588472174,
      "grad_norm": 3.0,
      "learning_rate": 3.663974495769631e-05,
      "loss": 0.8585,
      "step": 291450
    },
    {
      "epoch": 1.0214946359790695,
      "grad_norm": 2.96875,
      "learning_rate": 3.6639095929032614e-05,
      "loss": 0.8986,
      "step": 291460
    },
    {
      "epoch": 1.0215296834859653,
      "grad_norm": 2.859375,
      "learning_rate": 3.663844690036891e-05,
      "loss": 0.9283,
      "step": 291470
    },
    {
      "epoch": 1.0215647309928608,
      "grad_norm": 2.453125,
      "learning_rate": 3.663779787170521e-05,
      "loss": 0.8461,
      "step": 291480
    },
    {
      "epoch": 1.0215997784997564,
      "grad_norm": 2.953125,
      "learning_rate": 3.663714884304151e-05,
      "loss": 0.8866,
      "step": 291490
    },
    {
      "epoch": 1.0216348260066521,
      "grad_norm": 3.140625,
      "learning_rate": 3.6636499814377806e-05,
      "loss": 0.976,
      "step": 291500
    },
    {
      "epoch": 1.0216698735135477,
      "grad_norm": 3.078125,
      "learning_rate": 3.663585078571411e-05,
      "loss": 0.8727,
      "step": 291510
    },
    {
      "epoch": 1.0217049210204432,
      "grad_norm": 3.046875,
      "learning_rate": 3.66352017570504e-05,
      "loss": 0.834,
      "step": 291520
    },
    {
      "epoch": 1.0217399685273387,
      "grad_norm": 2.828125,
      "learning_rate": 3.66345527283867e-05,
      "loss": 0.8647,
      "step": 291530
    },
    {
      "epoch": 1.0217750160342345,
      "grad_norm": 3.046875,
      "learning_rate": 3.6633903699723e-05,
      "loss": 0.8428,
      "step": 291540
    },
    {
      "epoch": 1.02181006354113,
      "grad_norm": 3.0,
      "learning_rate": 3.663325467105929e-05,
      "loss": 0.8609,
      "step": 291550
    },
    {
      "epoch": 1.0218451110480256,
      "grad_norm": 2.96875,
      "learning_rate": 3.663260564239559e-05,
      "loss": 0.9242,
      "step": 291560
    },
    {
      "epoch": 1.021880158554921,
      "grad_norm": 3.09375,
      "learning_rate": 3.663195661373189e-05,
      "loss": 0.8597,
      "step": 291570
    },
    {
      "epoch": 1.0219152060618168,
      "grad_norm": 2.734375,
      "learning_rate": 3.663130758506819e-05,
      "loss": 0.8508,
      "step": 291580
    },
    {
      "epoch": 1.0219502535687124,
      "grad_norm": 2.8125,
      "learning_rate": 3.6630658556404484e-05,
      "loss": 0.8296,
      "step": 291590
    },
    {
      "epoch": 1.021985301075608,
      "grad_norm": 2.84375,
      "learning_rate": 3.6630009527740786e-05,
      "loss": 0.8477,
      "step": 291600
    },
    {
      "epoch": 1.0220203485825037,
      "grad_norm": 3.359375,
      "learning_rate": 3.662936049907708e-05,
      "loss": 0.8667,
      "step": 291610
    },
    {
      "epoch": 1.0220553960893992,
      "grad_norm": 2.875,
      "learning_rate": 3.662871147041338e-05,
      "loss": 0.8984,
      "step": 291620
    },
    {
      "epoch": 1.0220904435962948,
      "grad_norm": 3.578125,
      "learning_rate": 3.6628062441749676e-05,
      "loss": 0.902,
      "step": 291630
    },
    {
      "epoch": 1.0221254911031903,
      "grad_norm": 2.828125,
      "learning_rate": 3.662741341308598e-05,
      "loss": 0.876,
      "step": 291640
    },
    {
      "epoch": 1.022160538610086,
      "grad_norm": 3.046875,
      "learning_rate": 3.662676438442227e-05,
      "loss": 0.8966,
      "step": 291650
    },
    {
      "epoch": 1.0221955861169816,
      "grad_norm": 2.96875,
      "learning_rate": 3.6626115355758574e-05,
      "loss": 0.8043,
      "step": 291660
    },
    {
      "epoch": 1.0222306336238771,
      "grad_norm": 2.65625,
      "learning_rate": 3.662546632709487e-05,
      "loss": 0.867,
      "step": 291670
    },
    {
      "epoch": 1.0222656811307727,
      "grad_norm": 3.140625,
      "learning_rate": 3.662481729843117e-05,
      "loss": 0.8666,
      "step": 291680
    },
    {
      "epoch": 1.0223007286376684,
      "grad_norm": 3.234375,
      "learning_rate": 3.6624168269767464e-05,
      "loss": 0.8892,
      "step": 291690
    },
    {
      "epoch": 1.022335776144564,
      "grad_norm": 3.265625,
      "learning_rate": 3.6623519241103766e-05,
      "loss": 0.8195,
      "step": 291700
    },
    {
      "epoch": 1.0223708236514595,
      "grad_norm": 2.9375,
      "learning_rate": 3.662287021244006e-05,
      "loss": 0.8955,
      "step": 291710
    },
    {
      "epoch": 1.0224058711583552,
      "grad_norm": 2.953125,
      "learning_rate": 3.662222118377636e-05,
      "loss": 0.8232,
      "step": 291720
    },
    {
      "epoch": 1.0224409186652508,
      "grad_norm": 2.75,
      "learning_rate": 3.662157215511266e-05,
      "loss": 0.8933,
      "step": 291730
    },
    {
      "epoch": 1.0224759661721463,
      "grad_norm": 3.046875,
      "learning_rate": 3.662092312644896e-05,
      "loss": 0.8674,
      "step": 291740
    },
    {
      "epoch": 1.0225110136790418,
      "grad_norm": 2.765625,
      "learning_rate": 3.662027409778526e-05,
      "loss": 0.8662,
      "step": 291750
    },
    {
      "epoch": 1.0225460611859376,
      "grad_norm": 2.53125,
      "learning_rate": 3.6619625069121554e-05,
      "loss": 0.8519,
      "step": 291760
    },
    {
      "epoch": 1.0225811086928331,
      "grad_norm": 3.0,
      "learning_rate": 3.6618976040457855e-05,
      "loss": 0.9062,
      "step": 291770
    },
    {
      "epoch": 1.0226161561997287,
      "grad_norm": 3.234375,
      "learning_rate": 3.661832701179415e-05,
      "loss": 0.9237,
      "step": 291780
    },
    {
      "epoch": 1.0226512037066244,
      "grad_norm": 2.453125,
      "learning_rate": 3.661767798313045e-05,
      "loss": 0.838,
      "step": 291790
    },
    {
      "epoch": 1.02268625121352,
      "grad_norm": 3.3125,
      "learning_rate": 3.6617028954466746e-05,
      "loss": 0.8583,
      "step": 291800
    },
    {
      "epoch": 1.0227212987204155,
      "grad_norm": 2.515625,
      "learning_rate": 3.661637992580305e-05,
      "loss": 0.9187,
      "step": 291810
    },
    {
      "epoch": 1.022756346227311,
      "grad_norm": 2.859375,
      "learning_rate": 3.661573089713934e-05,
      "loss": 0.8427,
      "step": 291820
    },
    {
      "epoch": 1.0227913937342068,
      "grad_norm": 3.0,
      "learning_rate": 3.661508186847564e-05,
      "loss": 0.8845,
      "step": 291830
    },
    {
      "epoch": 1.0228264412411023,
      "grad_norm": 3.171875,
      "learning_rate": 3.661443283981194e-05,
      "loss": 0.8576,
      "step": 291840
    },
    {
      "epoch": 1.0228614887479979,
      "grad_norm": 2.984375,
      "learning_rate": 3.661378381114824e-05,
      "loss": 0.8236,
      "step": 291850
    },
    {
      "epoch": 1.0228965362548934,
      "grad_norm": 3.609375,
      "learning_rate": 3.661313478248454e-05,
      "loss": 0.8765,
      "step": 291860
    },
    {
      "epoch": 1.0229315837617892,
      "grad_norm": 3.21875,
      "learning_rate": 3.6612485753820835e-05,
      "loss": 0.8513,
      "step": 291870
    },
    {
      "epoch": 1.0229666312686847,
      "grad_norm": 2.796875,
      "learning_rate": 3.6611836725157136e-05,
      "loss": 0.8371,
      "step": 291880
    },
    {
      "epoch": 1.0230016787755802,
      "grad_norm": 2.96875,
      "learning_rate": 3.661118769649343e-05,
      "loss": 0.9552,
      "step": 291890
    },
    {
      "epoch": 1.023036726282476,
      "grad_norm": 2.890625,
      "learning_rate": 3.661053866782973e-05,
      "loss": 0.8592,
      "step": 291900
    },
    {
      "epoch": 1.0230717737893715,
      "grad_norm": 3.140625,
      "learning_rate": 3.660988963916603e-05,
      "loss": 0.8708,
      "step": 291910
    },
    {
      "epoch": 1.023106821296267,
      "grad_norm": 2.84375,
      "learning_rate": 3.660924061050233e-05,
      "loss": 0.8486,
      "step": 291920
    },
    {
      "epoch": 1.0231418688031626,
      "grad_norm": 2.96875,
      "learning_rate": 3.6608591581838616e-05,
      "loss": 0.8709,
      "step": 291930
    },
    {
      "epoch": 1.0231769163100584,
      "grad_norm": 2.953125,
      "learning_rate": 3.660794255317492e-05,
      "loss": 0.8263,
      "step": 291940
    },
    {
      "epoch": 1.023211963816954,
      "grad_norm": 3.109375,
      "learning_rate": 3.660729352451122e-05,
      "loss": 0.881,
      "step": 291950
    },
    {
      "epoch": 1.0232470113238494,
      "grad_norm": 2.453125,
      "learning_rate": 3.6606644495847514e-05,
      "loss": 0.8139,
      "step": 291960
    },
    {
      "epoch": 1.023282058830745,
      "grad_norm": 3.046875,
      "learning_rate": 3.6605995467183815e-05,
      "loss": 0.8474,
      "step": 291970
    },
    {
      "epoch": 1.0233171063376407,
      "grad_norm": 2.65625,
      "learning_rate": 3.660534643852011e-05,
      "loss": 0.7997,
      "step": 291980
    },
    {
      "epoch": 1.0233521538445363,
      "grad_norm": 3.296875,
      "learning_rate": 3.660469740985641e-05,
      "loss": 0.7964,
      "step": 291990
    },
    {
      "epoch": 1.0233872013514318,
      "grad_norm": 3.296875,
      "learning_rate": 3.6604048381192706e-05,
      "loss": 0.8572,
      "step": 292000
    },
    {
      "epoch": 1.0234222488583276,
      "grad_norm": 2.828125,
      "learning_rate": 3.660339935252901e-05,
      "loss": 0.9668,
      "step": 292010
    },
    {
      "epoch": 1.023457296365223,
      "grad_norm": 2.796875,
      "learning_rate": 3.66027503238653e-05,
      "loss": 0.8174,
      "step": 292020
    },
    {
      "epoch": 1.0234923438721186,
      "grad_norm": 3.3125,
      "learning_rate": 3.66021012952016e-05,
      "loss": 0.8666,
      "step": 292030
    },
    {
      "epoch": 1.0235273913790142,
      "grad_norm": 2.90625,
      "learning_rate": 3.66014522665379e-05,
      "loss": 0.9469,
      "step": 292040
    },
    {
      "epoch": 1.02356243888591,
      "grad_norm": 2.6875,
      "learning_rate": 3.66008032378742e-05,
      "loss": 0.8611,
      "step": 292050
    },
    {
      "epoch": 1.0235974863928055,
      "grad_norm": 2.796875,
      "learning_rate": 3.6600154209210494e-05,
      "loss": 0.8694,
      "step": 292060
    },
    {
      "epoch": 1.023632533899701,
      "grad_norm": 2.671875,
      "learning_rate": 3.6599505180546795e-05,
      "loss": 0.7941,
      "step": 292070
    },
    {
      "epoch": 1.0236675814065967,
      "grad_norm": 2.59375,
      "learning_rate": 3.659885615188309e-05,
      "loss": 0.8529,
      "step": 292080
    },
    {
      "epoch": 1.0237026289134923,
      "grad_norm": 3.078125,
      "learning_rate": 3.659820712321939e-05,
      "loss": 0.857,
      "step": 292090
    },
    {
      "epoch": 1.0237376764203878,
      "grad_norm": 2.890625,
      "learning_rate": 3.659755809455569e-05,
      "loss": 0.9243,
      "step": 292100
    },
    {
      "epoch": 1.0237727239272834,
      "grad_norm": 2.765625,
      "learning_rate": 3.659690906589199e-05,
      "loss": 0.8298,
      "step": 292110
    },
    {
      "epoch": 1.0238077714341791,
      "grad_norm": 3.03125,
      "learning_rate": 3.659626003722829e-05,
      "loss": 0.8889,
      "step": 292120
    },
    {
      "epoch": 1.0238428189410747,
      "grad_norm": 2.859375,
      "learning_rate": 3.659561100856458e-05,
      "loss": 0.8673,
      "step": 292130
    },
    {
      "epoch": 1.0238778664479702,
      "grad_norm": 2.84375,
      "learning_rate": 3.6594961979900884e-05,
      "loss": 0.8229,
      "step": 292140
    },
    {
      "epoch": 1.0239129139548657,
      "grad_norm": 2.328125,
      "learning_rate": 3.659431295123718e-05,
      "loss": 0.795,
      "step": 292150
    },
    {
      "epoch": 1.0239479614617615,
      "grad_norm": 3.0625,
      "learning_rate": 3.659366392257348e-05,
      "loss": 0.9805,
      "step": 292160
    },
    {
      "epoch": 1.023983008968657,
      "grad_norm": 3.09375,
      "learning_rate": 3.6593014893909775e-05,
      "loss": 0.8988,
      "step": 292170
    },
    {
      "epoch": 1.0240180564755526,
      "grad_norm": 3.265625,
      "learning_rate": 3.6592365865246076e-05,
      "loss": 0.9227,
      "step": 292180
    },
    {
      "epoch": 1.0240531039824483,
      "grad_norm": 3.03125,
      "learning_rate": 3.659171683658237e-05,
      "loss": 0.8039,
      "step": 292190
    },
    {
      "epoch": 1.0240881514893438,
      "grad_norm": 3.078125,
      "learning_rate": 3.659106780791867e-05,
      "loss": 0.8101,
      "step": 292200
    },
    {
      "epoch": 1.0241231989962394,
      "grad_norm": 2.703125,
      "learning_rate": 3.659041877925497e-05,
      "loss": 0.8552,
      "step": 292210
    },
    {
      "epoch": 1.024158246503135,
      "grad_norm": 3.265625,
      "learning_rate": 3.658976975059127e-05,
      "loss": 0.8866,
      "step": 292220
    },
    {
      "epoch": 1.0241932940100307,
      "grad_norm": 2.921875,
      "learning_rate": 3.658912072192757e-05,
      "loss": 0.8892,
      "step": 292230
    },
    {
      "epoch": 1.0242283415169262,
      "grad_norm": 3.390625,
      "learning_rate": 3.6588471693263864e-05,
      "loss": 0.8951,
      "step": 292240
    },
    {
      "epoch": 1.0242633890238217,
      "grad_norm": 2.65625,
      "learning_rate": 3.6587822664600166e-05,
      "loss": 0.8059,
      "step": 292250
    },
    {
      "epoch": 1.0242984365307173,
      "grad_norm": 2.484375,
      "learning_rate": 3.658717363593646e-05,
      "loss": 0.9337,
      "step": 292260
    },
    {
      "epoch": 1.024333484037613,
      "grad_norm": 2.578125,
      "learning_rate": 3.658652460727276e-05,
      "loss": 0.87,
      "step": 292270
    },
    {
      "epoch": 1.0243685315445086,
      "grad_norm": 2.546875,
      "learning_rate": 3.6585875578609056e-05,
      "loss": 0.8188,
      "step": 292280
    },
    {
      "epoch": 1.0244035790514041,
      "grad_norm": 3.203125,
      "learning_rate": 3.658522654994536e-05,
      "loss": 0.8705,
      "step": 292290
    },
    {
      "epoch": 1.0244386265582999,
      "grad_norm": 3.015625,
      "learning_rate": 3.658457752128165e-05,
      "loss": 0.8743,
      "step": 292300
    },
    {
      "epoch": 1.0244736740651954,
      "grad_norm": 2.65625,
      "learning_rate": 3.658392849261795e-05,
      "loss": 0.8687,
      "step": 292310
    },
    {
      "epoch": 1.024508721572091,
      "grad_norm": 2.671875,
      "learning_rate": 3.658327946395425e-05,
      "loss": 0.8211,
      "step": 292320
    },
    {
      "epoch": 1.0245437690789865,
      "grad_norm": 2.84375,
      "learning_rate": 3.658263043529054e-05,
      "loss": 0.8826,
      "step": 292330
    },
    {
      "epoch": 1.0245788165858822,
      "grad_norm": 2.859375,
      "learning_rate": 3.6581981406626844e-05,
      "loss": 0.8187,
      "step": 292340
    },
    {
      "epoch": 1.0246138640927778,
      "grad_norm": 3.109375,
      "learning_rate": 3.658133237796314e-05,
      "loss": 0.8953,
      "step": 292350
    },
    {
      "epoch": 1.0246489115996733,
      "grad_norm": 2.71875,
      "learning_rate": 3.658068334929944e-05,
      "loss": 0.8679,
      "step": 292360
    },
    {
      "epoch": 1.0246839591065688,
      "grad_norm": 2.828125,
      "learning_rate": 3.6580034320635735e-05,
      "loss": 0.8668,
      "step": 292370
    },
    {
      "epoch": 1.0247190066134646,
      "grad_norm": 3.125,
      "learning_rate": 3.6579385291972036e-05,
      "loss": 0.9919,
      "step": 292380
    },
    {
      "epoch": 1.0247540541203601,
      "grad_norm": 2.796875,
      "learning_rate": 3.657873626330833e-05,
      "loss": 0.8009,
      "step": 292390
    },
    {
      "epoch": 1.0247891016272557,
      "grad_norm": 2.828125,
      "learning_rate": 3.657808723464463e-05,
      "loss": 0.8479,
      "step": 292400
    },
    {
      "epoch": 1.0248241491341514,
      "grad_norm": 3.125,
      "learning_rate": 3.657743820598093e-05,
      "loss": 0.8972,
      "step": 292410
    },
    {
      "epoch": 1.024859196641047,
      "grad_norm": 2.890625,
      "learning_rate": 3.657678917731723e-05,
      "loss": 0.8301,
      "step": 292420
    },
    {
      "epoch": 1.0248942441479425,
      "grad_norm": 2.65625,
      "learning_rate": 3.657614014865352e-05,
      "loss": 0.8856,
      "step": 292430
    },
    {
      "epoch": 1.024929291654838,
      "grad_norm": 2.984375,
      "learning_rate": 3.6575491119989824e-05,
      "loss": 0.8761,
      "step": 292440
    },
    {
      "epoch": 1.0249643391617338,
      "grad_norm": 2.78125,
      "learning_rate": 3.6574842091326126e-05,
      "loss": 0.8691,
      "step": 292450
    },
    {
      "epoch": 1.0249993866686293,
      "grad_norm": 2.921875,
      "learning_rate": 3.657419306266242e-05,
      "loss": 0.9472,
      "step": 292460
    },
    {
      "epoch": 1.0250344341755249,
      "grad_norm": 3.265625,
      "learning_rate": 3.657354403399872e-05,
      "loss": 0.8332,
      "step": 292470
    },
    {
      "epoch": 1.0250694816824206,
      "grad_norm": 3.1875,
      "learning_rate": 3.6572895005335016e-05,
      "loss": 0.9011,
      "step": 292480
    },
    {
      "epoch": 1.0251045291893162,
      "grad_norm": 2.921875,
      "learning_rate": 3.657224597667132e-05,
      "loss": 0.8417,
      "step": 292490
    },
    {
      "epoch": 1.0251395766962117,
      "grad_norm": 3.0625,
      "learning_rate": 3.657159694800761e-05,
      "loss": 0.9192,
      "step": 292500
    },
    {
      "epoch": 1.0251746242031072,
      "grad_norm": 3.375,
      "learning_rate": 3.6570947919343914e-05,
      "loss": 0.7927,
      "step": 292510
    },
    {
      "epoch": 1.025209671710003,
      "grad_norm": 3.3125,
      "learning_rate": 3.657029889068021e-05,
      "loss": 0.8658,
      "step": 292520
    },
    {
      "epoch": 1.0252447192168985,
      "grad_norm": 3.015625,
      "learning_rate": 3.656964986201651e-05,
      "loss": 0.8085,
      "step": 292530
    },
    {
      "epoch": 1.025279766723794,
      "grad_norm": 2.640625,
      "learning_rate": 3.6569000833352804e-05,
      "loss": 0.832,
      "step": 292540
    },
    {
      "epoch": 1.0253148142306896,
      "grad_norm": 3.1875,
      "learning_rate": 3.6568351804689106e-05,
      "loss": 0.8317,
      "step": 292550
    },
    {
      "epoch": 1.0253498617375854,
      "grad_norm": 2.9375,
      "learning_rate": 3.65677027760254e-05,
      "loss": 0.8649,
      "step": 292560
    },
    {
      "epoch": 1.025384909244481,
      "grad_norm": 2.6875,
      "learning_rate": 3.65670537473617e-05,
      "loss": 0.9041,
      "step": 292570
    },
    {
      "epoch": 1.0254199567513764,
      "grad_norm": 3.1875,
      "learning_rate": 3.6566404718697996e-05,
      "loss": 0.8375,
      "step": 292580
    },
    {
      "epoch": 1.0254550042582722,
      "grad_norm": 3.21875,
      "learning_rate": 3.65657556900343e-05,
      "loss": 0.9045,
      "step": 292590
    },
    {
      "epoch": 1.0254900517651677,
      "grad_norm": 2.765625,
      "learning_rate": 3.65651066613706e-05,
      "loss": 0.9213,
      "step": 292600
    },
    {
      "epoch": 1.0255250992720633,
      "grad_norm": 3.046875,
      "learning_rate": 3.6564457632706894e-05,
      "loss": 0.8779,
      "step": 292610
    },
    {
      "epoch": 1.0255601467789588,
      "grad_norm": 2.765625,
      "learning_rate": 3.6563808604043195e-05,
      "loss": 0.8749,
      "step": 292620
    },
    {
      "epoch": 1.0255951942858546,
      "grad_norm": 2.5,
      "learning_rate": 3.656315957537949e-05,
      "loss": 0.7763,
      "step": 292630
    },
    {
      "epoch": 1.02563024179275,
      "grad_norm": 3.125,
      "learning_rate": 3.656251054671579e-05,
      "loss": 0.9135,
      "step": 292640
    },
    {
      "epoch": 1.0256652892996456,
      "grad_norm": 2.6875,
      "learning_rate": 3.6561861518052086e-05,
      "loss": 0.8515,
      "step": 292650
    },
    {
      "epoch": 1.0257003368065412,
      "grad_norm": 3.0,
      "learning_rate": 3.656121248938839e-05,
      "loss": 0.9042,
      "step": 292660
    },
    {
      "epoch": 1.025735384313437,
      "grad_norm": 2.859375,
      "learning_rate": 3.656056346072468e-05,
      "loss": 0.9113,
      "step": 292670
    },
    {
      "epoch": 1.0257704318203325,
      "grad_norm": 3.328125,
      "learning_rate": 3.6559914432060976e-05,
      "loss": 0.8901,
      "step": 292680
    },
    {
      "epoch": 1.025805479327228,
      "grad_norm": 2.765625,
      "learning_rate": 3.655926540339728e-05,
      "loss": 0.8393,
      "step": 292690
    },
    {
      "epoch": 1.0258405268341237,
      "grad_norm": 3.25,
      "learning_rate": 3.655861637473357e-05,
      "loss": 0.9125,
      "step": 292700
    },
    {
      "epoch": 1.0258755743410193,
      "grad_norm": 2.625,
      "learning_rate": 3.6557967346069874e-05,
      "loss": 0.8339,
      "step": 292710
    },
    {
      "epoch": 1.0259106218479148,
      "grad_norm": 2.71875,
      "learning_rate": 3.655731831740617e-05,
      "loss": 0.886,
      "step": 292720
    },
    {
      "epoch": 1.0259456693548104,
      "grad_norm": 2.765625,
      "learning_rate": 3.655666928874247e-05,
      "loss": 0.8531,
      "step": 292730
    },
    {
      "epoch": 1.025980716861706,
      "grad_norm": 3.25,
      "learning_rate": 3.6556020260078764e-05,
      "loss": 0.8246,
      "step": 292740
    },
    {
      "epoch": 1.0260157643686016,
      "grad_norm": 2.96875,
      "learning_rate": 3.6555371231415066e-05,
      "loss": 0.8653,
      "step": 292750
    },
    {
      "epoch": 1.0260508118754972,
      "grad_norm": 2.875,
      "learning_rate": 3.655472220275136e-05,
      "loss": 0.8741,
      "step": 292760
    },
    {
      "epoch": 1.026085859382393,
      "grad_norm": 2.578125,
      "learning_rate": 3.655407317408766e-05,
      "loss": 0.9088,
      "step": 292770
    },
    {
      "epoch": 1.0261209068892885,
      "grad_norm": 2.46875,
      "learning_rate": 3.6553424145423956e-05,
      "loss": 0.7995,
      "step": 292780
    },
    {
      "epoch": 1.026155954396184,
      "grad_norm": 2.84375,
      "learning_rate": 3.655277511676026e-05,
      "loss": 0.8405,
      "step": 292790
    },
    {
      "epoch": 1.0261910019030795,
      "grad_norm": 3.4375,
      "learning_rate": 3.655212608809655e-05,
      "loss": 0.9015,
      "step": 292800
    },
    {
      "epoch": 1.0262260494099753,
      "grad_norm": 2.453125,
      "learning_rate": 3.6551477059432854e-05,
      "loss": 0.7518,
      "step": 292810
    },
    {
      "epoch": 1.0262610969168708,
      "grad_norm": 2.78125,
      "learning_rate": 3.6550828030769155e-05,
      "loss": 0.7727,
      "step": 292820
    },
    {
      "epoch": 1.0262961444237664,
      "grad_norm": 2.625,
      "learning_rate": 3.655017900210545e-05,
      "loss": 0.8126,
      "step": 292830
    },
    {
      "epoch": 1.026331191930662,
      "grad_norm": 2.4375,
      "learning_rate": 3.654952997344175e-05,
      "loss": 0.872,
      "step": 292840
    },
    {
      "epoch": 1.0263662394375577,
      "grad_norm": 3.109375,
      "learning_rate": 3.6548880944778046e-05,
      "loss": 0.8608,
      "step": 292850
    },
    {
      "epoch": 1.0264012869444532,
      "grad_norm": 2.671875,
      "learning_rate": 3.654823191611435e-05,
      "loss": 0.7987,
      "step": 292860
    },
    {
      "epoch": 1.0264363344513487,
      "grad_norm": 2.859375,
      "learning_rate": 3.654758288745064e-05,
      "loss": 0.8666,
      "step": 292870
    },
    {
      "epoch": 1.0264713819582445,
      "grad_norm": 3.6875,
      "learning_rate": 3.654693385878694e-05,
      "loss": 0.9373,
      "step": 292880
    },
    {
      "epoch": 1.02650642946514,
      "grad_norm": 2.4375,
      "learning_rate": 3.654628483012324e-05,
      "loss": 0.8475,
      "step": 292890
    },
    {
      "epoch": 1.0265414769720356,
      "grad_norm": 2.796875,
      "learning_rate": 3.654563580145954e-05,
      "loss": 0.7677,
      "step": 292900
    },
    {
      "epoch": 1.026576524478931,
      "grad_norm": 2.765625,
      "learning_rate": 3.6544986772795834e-05,
      "loss": 0.8399,
      "step": 292910
    },
    {
      "epoch": 1.0266115719858269,
      "grad_norm": 2.984375,
      "learning_rate": 3.6544337744132135e-05,
      "loss": 0.8701,
      "step": 292920
    },
    {
      "epoch": 1.0266466194927224,
      "grad_norm": 3.09375,
      "learning_rate": 3.654368871546843e-05,
      "loss": 0.8122,
      "step": 292930
    },
    {
      "epoch": 1.026681666999618,
      "grad_norm": 3.046875,
      "learning_rate": 3.654303968680473e-05,
      "loss": 0.926,
      "step": 292940
    },
    {
      "epoch": 1.0267167145065135,
      "grad_norm": 3.40625,
      "learning_rate": 3.6542390658141026e-05,
      "loss": 1.0587,
      "step": 292950
    },
    {
      "epoch": 1.0267517620134092,
      "grad_norm": 2.78125,
      "learning_rate": 3.654174162947733e-05,
      "loss": 0.8898,
      "step": 292960
    },
    {
      "epoch": 1.0267868095203048,
      "grad_norm": 2.96875,
      "learning_rate": 3.654109260081363e-05,
      "loss": 0.891,
      "step": 292970
    },
    {
      "epoch": 1.0268218570272003,
      "grad_norm": 2.6875,
      "learning_rate": 3.654044357214992e-05,
      "loss": 0.8383,
      "step": 292980
    },
    {
      "epoch": 1.026856904534096,
      "grad_norm": 3.03125,
      "learning_rate": 3.6539794543486225e-05,
      "loss": 0.9642,
      "step": 292990
    },
    {
      "epoch": 1.0268919520409916,
      "grad_norm": 2.875,
      "learning_rate": 3.653914551482252e-05,
      "loss": 0.9143,
      "step": 293000
    },
    {
      "epoch": 1.0269269995478871,
      "grad_norm": 2.78125,
      "learning_rate": 3.653849648615882e-05,
      "loss": 0.8412,
      "step": 293010
    },
    {
      "epoch": 1.0269620470547827,
      "grad_norm": 2.6875,
      "learning_rate": 3.6537847457495115e-05,
      "loss": 0.9262,
      "step": 293020
    },
    {
      "epoch": 1.0269970945616784,
      "grad_norm": 2.875,
      "learning_rate": 3.653719842883142e-05,
      "loss": 0.7988,
      "step": 293030
    },
    {
      "epoch": 1.027032142068574,
      "grad_norm": 2.59375,
      "learning_rate": 3.653654940016771e-05,
      "loss": 0.8262,
      "step": 293040
    },
    {
      "epoch": 1.0270671895754695,
      "grad_norm": 2.921875,
      "learning_rate": 3.653590037150401e-05,
      "loss": 0.837,
      "step": 293050
    },
    {
      "epoch": 1.0271022370823653,
      "grad_norm": 3.140625,
      "learning_rate": 3.653525134284031e-05,
      "loss": 0.8248,
      "step": 293060
    },
    {
      "epoch": 1.0271372845892608,
      "grad_norm": 3.046875,
      "learning_rate": 3.65346023141766e-05,
      "loss": 0.9044,
      "step": 293070
    },
    {
      "epoch": 1.0271723320961563,
      "grad_norm": 3.015625,
      "learning_rate": 3.65339532855129e-05,
      "loss": 0.8999,
      "step": 293080
    },
    {
      "epoch": 1.0272073796030519,
      "grad_norm": 2.921875,
      "learning_rate": 3.65333042568492e-05,
      "loss": 0.8341,
      "step": 293090
    },
    {
      "epoch": 1.0272424271099476,
      "grad_norm": 2.875,
      "learning_rate": 3.65326552281855e-05,
      "loss": 0.845,
      "step": 293100
    },
    {
      "epoch": 1.0272774746168432,
      "grad_norm": 2.640625,
      "learning_rate": 3.6532006199521794e-05,
      "loss": 0.9317,
      "step": 293110
    },
    {
      "epoch": 1.0273125221237387,
      "grad_norm": 2.703125,
      "learning_rate": 3.6531357170858095e-05,
      "loss": 0.8812,
      "step": 293120
    },
    {
      "epoch": 1.0273475696306342,
      "grad_norm": 3.125,
      "learning_rate": 3.653070814219439e-05,
      "loss": 0.8711,
      "step": 293130
    },
    {
      "epoch": 1.02738261713753,
      "grad_norm": 2.96875,
      "learning_rate": 3.653005911353069e-05,
      "loss": 0.9106,
      "step": 293140
    },
    {
      "epoch": 1.0274176646444255,
      "grad_norm": 2.65625,
      "learning_rate": 3.6529410084866986e-05,
      "loss": 0.7775,
      "step": 293150
    },
    {
      "epoch": 1.027452712151321,
      "grad_norm": 2.40625,
      "learning_rate": 3.652876105620329e-05,
      "loss": 0.8179,
      "step": 293160
    },
    {
      "epoch": 1.0274877596582168,
      "grad_norm": 2.859375,
      "learning_rate": 3.652811202753958e-05,
      "loss": 0.942,
      "step": 293170
    },
    {
      "epoch": 1.0275228071651124,
      "grad_norm": 3.0625,
      "learning_rate": 3.652746299887588e-05,
      "loss": 0.8544,
      "step": 293180
    },
    {
      "epoch": 1.0275578546720079,
      "grad_norm": 2.71875,
      "learning_rate": 3.6526813970212185e-05,
      "loss": 0.8634,
      "step": 293190
    },
    {
      "epoch": 1.0275929021789034,
      "grad_norm": 2.9375,
      "learning_rate": 3.652616494154848e-05,
      "loss": 0.8479,
      "step": 293200
    },
    {
      "epoch": 1.0276279496857992,
      "grad_norm": 3.15625,
      "learning_rate": 3.652551591288478e-05,
      "loss": 0.8965,
      "step": 293210
    },
    {
      "epoch": 1.0276629971926947,
      "grad_norm": 2.671875,
      "learning_rate": 3.6524866884221075e-05,
      "loss": 0.8692,
      "step": 293220
    },
    {
      "epoch": 1.0276980446995903,
      "grad_norm": 3.359375,
      "learning_rate": 3.652421785555738e-05,
      "loss": 0.8333,
      "step": 293230
    },
    {
      "epoch": 1.0277330922064858,
      "grad_norm": 3.09375,
      "learning_rate": 3.652356882689367e-05,
      "loss": 0.9988,
      "step": 293240
    },
    {
      "epoch": 1.0277681397133815,
      "grad_norm": 2.9375,
      "learning_rate": 3.652291979822997e-05,
      "loss": 0.8099,
      "step": 293250
    },
    {
      "epoch": 1.027803187220277,
      "grad_norm": 2.328125,
      "learning_rate": 3.652227076956627e-05,
      "loss": 0.8404,
      "step": 293260
    },
    {
      "epoch": 1.0278382347271726,
      "grad_norm": 3.015625,
      "learning_rate": 3.652162174090257e-05,
      "loss": 0.7652,
      "step": 293270
    },
    {
      "epoch": 1.0278732822340684,
      "grad_norm": 3.46875,
      "learning_rate": 3.652097271223886e-05,
      "loss": 0.9235,
      "step": 293280
    },
    {
      "epoch": 1.027908329740964,
      "grad_norm": 3.109375,
      "learning_rate": 3.6520323683575165e-05,
      "loss": 0.8631,
      "step": 293290
    },
    {
      "epoch": 1.0279433772478594,
      "grad_norm": 2.71875,
      "learning_rate": 3.651967465491146e-05,
      "loss": 0.9012,
      "step": 293300
    },
    {
      "epoch": 1.027978424754755,
      "grad_norm": 2.71875,
      "learning_rate": 3.651902562624776e-05,
      "loss": 0.7932,
      "step": 293310
    },
    {
      "epoch": 1.0280134722616507,
      "grad_norm": 3.09375,
      "learning_rate": 3.6518376597584055e-05,
      "loss": 0.9043,
      "step": 293320
    },
    {
      "epoch": 1.0280485197685463,
      "grad_norm": 2.625,
      "learning_rate": 3.651772756892036e-05,
      "loss": 0.8859,
      "step": 293330
    },
    {
      "epoch": 1.0280835672754418,
      "grad_norm": 3.0625,
      "learning_rate": 3.651707854025666e-05,
      "loss": 0.8557,
      "step": 293340
    },
    {
      "epoch": 1.0281186147823376,
      "grad_norm": 3.140625,
      "learning_rate": 3.651642951159295e-05,
      "loss": 0.8835,
      "step": 293350
    },
    {
      "epoch": 1.028153662289233,
      "grad_norm": 3.3125,
      "learning_rate": 3.6515780482929254e-05,
      "loss": 0.7961,
      "step": 293360
    },
    {
      "epoch": 1.0281887097961286,
      "grad_norm": 2.640625,
      "learning_rate": 3.651513145426555e-05,
      "loss": 0.8649,
      "step": 293370
    },
    {
      "epoch": 1.0282237573030242,
      "grad_norm": 3.671875,
      "learning_rate": 3.651448242560185e-05,
      "loss": 0.8773,
      "step": 293380
    },
    {
      "epoch": 1.02825880480992,
      "grad_norm": 2.9375,
      "learning_rate": 3.6513833396938145e-05,
      "loss": 0.9156,
      "step": 293390
    },
    {
      "epoch": 1.0282938523168155,
      "grad_norm": 2.78125,
      "learning_rate": 3.6513184368274446e-05,
      "loss": 0.8913,
      "step": 293400
    },
    {
      "epoch": 1.028328899823711,
      "grad_norm": 2.5,
      "learning_rate": 3.651253533961074e-05,
      "loss": 0.8839,
      "step": 293410
    },
    {
      "epoch": 1.0283639473306065,
      "grad_norm": 3.0,
      "learning_rate": 3.651188631094704e-05,
      "loss": 0.9118,
      "step": 293420
    },
    {
      "epoch": 1.0283989948375023,
      "grad_norm": 2.8125,
      "learning_rate": 3.651123728228334e-05,
      "loss": 0.8602,
      "step": 293430
    },
    {
      "epoch": 1.0284340423443978,
      "grad_norm": 3.140625,
      "learning_rate": 3.651058825361963e-05,
      "loss": 0.9027,
      "step": 293440
    },
    {
      "epoch": 1.0284690898512934,
      "grad_norm": 2.828125,
      "learning_rate": 3.650993922495593e-05,
      "loss": 0.8095,
      "step": 293450
    },
    {
      "epoch": 1.0285041373581891,
      "grad_norm": 2.640625,
      "learning_rate": 3.650929019629223e-05,
      "loss": 0.8659,
      "step": 293460
    },
    {
      "epoch": 1.0285391848650847,
      "grad_norm": 3.265625,
      "learning_rate": 3.650864116762853e-05,
      "loss": 0.875,
      "step": 293470
    },
    {
      "epoch": 1.0285742323719802,
      "grad_norm": 2.609375,
      "learning_rate": 3.650799213896482e-05,
      "loss": 0.8398,
      "step": 293480
    },
    {
      "epoch": 1.0286092798788757,
      "grad_norm": 2.59375,
      "learning_rate": 3.6507343110301125e-05,
      "loss": 0.8193,
      "step": 293490
    },
    {
      "epoch": 1.0286443273857715,
      "grad_norm": 3.03125,
      "learning_rate": 3.650669408163742e-05,
      "loss": 0.8465,
      "step": 293500
    },
    {
      "epoch": 1.028679374892667,
      "grad_norm": 3.015625,
      "learning_rate": 3.650604505297372e-05,
      "loss": 0.8859,
      "step": 293510
    },
    {
      "epoch": 1.0287144223995626,
      "grad_norm": 3.40625,
      "learning_rate": 3.6505396024310015e-05,
      "loss": 0.9939,
      "step": 293520
    },
    {
      "epoch": 1.028749469906458,
      "grad_norm": 3.359375,
      "learning_rate": 3.650474699564632e-05,
      "loss": 0.9382,
      "step": 293530
    },
    {
      "epoch": 1.0287845174133539,
      "grad_norm": 3.0,
      "learning_rate": 3.650409796698261e-05,
      "loss": 0.9127,
      "step": 293540
    },
    {
      "epoch": 1.0288195649202494,
      "grad_norm": 2.984375,
      "learning_rate": 3.650344893831891e-05,
      "loss": 0.8958,
      "step": 293550
    },
    {
      "epoch": 1.028854612427145,
      "grad_norm": 2.75,
      "learning_rate": 3.6502799909655214e-05,
      "loss": 0.973,
      "step": 293560
    },
    {
      "epoch": 1.0288896599340407,
      "grad_norm": 3.125,
      "learning_rate": 3.650215088099151e-05,
      "loss": 0.8538,
      "step": 293570
    },
    {
      "epoch": 1.0289247074409362,
      "grad_norm": 3.09375,
      "learning_rate": 3.650150185232781e-05,
      "loss": 0.8464,
      "step": 293580
    },
    {
      "epoch": 1.0289597549478318,
      "grad_norm": 2.8125,
      "learning_rate": 3.6500852823664105e-05,
      "loss": 0.8531,
      "step": 293590
    },
    {
      "epoch": 1.0289948024547273,
      "grad_norm": 2.671875,
      "learning_rate": 3.6500203795000406e-05,
      "loss": 0.7693,
      "step": 293600
    },
    {
      "epoch": 1.029029849961623,
      "grad_norm": 2.90625,
      "learning_rate": 3.64995547663367e-05,
      "loss": 0.9276,
      "step": 293610
    },
    {
      "epoch": 1.0290648974685186,
      "grad_norm": 3.515625,
      "learning_rate": 3.6498905737673e-05,
      "loss": 0.8806,
      "step": 293620
    },
    {
      "epoch": 1.0290999449754141,
      "grad_norm": 2.859375,
      "learning_rate": 3.6498256709009297e-05,
      "loss": 0.9048,
      "step": 293630
    },
    {
      "epoch": 1.0291349924823097,
      "grad_norm": 2.96875,
      "learning_rate": 3.64976076803456e-05,
      "loss": 0.9654,
      "step": 293640
    },
    {
      "epoch": 1.0291700399892054,
      "grad_norm": 2.734375,
      "learning_rate": 3.649695865168189e-05,
      "loss": 0.7888,
      "step": 293650
    },
    {
      "epoch": 1.029205087496101,
      "grad_norm": 2.96875,
      "learning_rate": 3.6496309623018194e-05,
      "loss": 0.917,
      "step": 293660
    },
    {
      "epoch": 1.0292401350029965,
      "grad_norm": 2.734375,
      "learning_rate": 3.649566059435449e-05,
      "loss": 0.9049,
      "step": 293670
    },
    {
      "epoch": 1.0292751825098923,
      "grad_norm": 2.6875,
      "learning_rate": 3.649501156569079e-05,
      "loss": 0.8809,
      "step": 293680
    },
    {
      "epoch": 1.0293102300167878,
      "grad_norm": 2.625,
      "learning_rate": 3.649436253702709e-05,
      "loss": 0.841,
      "step": 293690
    },
    {
      "epoch": 1.0293452775236833,
      "grad_norm": 2.84375,
      "learning_rate": 3.6493713508363386e-05,
      "loss": 0.8649,
      "step": 293700
    },
    {
      "epoch": 1.0293803250305789,
      "grad_norm": 2.921875,
      "learning_rate": 3.649306447969969e-05,
      "loss": 0.8053,
      "step": 293710
    },
    {
      "epoch": 1.0294153725374746,
      "grad_norm": 2.78125,
      "learning_rate": 3.649241545103598e-05,
      "loss": 0.7945,
      "step": 293720
    },
    {
      "epoch": 1.0294504200443702,
      "grad_norm": 2.578125,
      "learning_rate": 3.649176642237228e-05,
      "loss": 0.9075,
      "step": 293730
    },
    {
      "epoch": 1.0294854675512657,
      "grad_norm": 3.265625,
      "learning_rate": 3.649111739370858e-05,
      "loss": 0.8496,
      "step": 293740
    },
    {
      "epoch": 1.0295205150581614,
      "grad_norm": 2.875,
      "learning_rate": 3.649046836504488e-05,
      "loss": 0.8614,
      "step": 293750
    },
    {
      "epoch": 1.029555562565057,
      "grad_norm": 2.984375,
      "learning_rate": 3.6489819336381174e-05,
      "loss": 0.9042,
      "step": 293760
    },
    {
      "epoch": 1.0295906100719525,
      "grad_norm": 3.0,
      "learning_rate": 3.6489170307717475e-05,
      "loss": 0.8337,
      "step": 293770
    },
    {
      "epoch": 1.029625657578848,
      "grad_norm": 2.71875,
      "learning_rate": 3.648852127905377e-05,
      "loss": 0.8373,
      "step": 293780
    },
    {
      "epoch": 1.0296607050857438,
      "grad_norm": 3.078125,
      "learning_rate": 3.648787225039007e-05,
      "loss": 0.8275,
      "step": 293790
    },
    {
      "epoch": 1.0296957525926393,
      "grad_norm": 2.734375,
      "learning_rate": 3.6487223221726366e-05,
      "loss": 0.8235,
      "step": 293800
    },
    {
      "epoch": 1.0297308000995349,
      "grad_norm": 2.65625,
      "learning_rate": 3.648657419306266e-05,
      "loss": 0.7644,
      "step": 293810
    },
    {
      "epoch": 1.0297658476064304,
      "grad_norm": 3.296875,
      "learning_rate": 3.648592516439896e-05,
      "loss": 0.8191,
      "step": 293820
    },
    {
      "epoch": 1.0298008951133262,
      "grad_norm": 2.859375,
      "learning_rate": 3.6485276135735257e-05,
      "loss": 0.8545,
      "step": 293830
    },
    {
      "epoch": 1.0298359426202217,
      "grad_norm": 3.109375,
      "learning_rate": 3.648462710707156e-05,
      "loss": 0.8945,
      "step": 293840
    },
    {
      "epoch": 1.0298709901271172,
      "grad_norm": 2.984375,
      "learning_rate": 3.648397807840785e-05,
      "loss": 0.8984,
      "step": 293850
    },
    {
      "epoch": 1.029906037634013,
      "grad_norm": 2.765625,
      "learning_rate": 3.6483329049744154e-05,
      "loss": 0.8212,
      "step": 293860
    },
    {
      "epoch": 1.0299410851409085,
      "grad_norm": 3.15625,
      "learning_rate": 3.648268002108045e-05,
      "loss": 0.8323,
      "step": 293870
    },
    {
      "epoch": 1.029976132647804,
      "grad_norm": 3.375,
      "learning_rate": 3.648203099241675e-05,
      "loss": 0.8867,
      "step": 293880
    },
    {
      "epoch": 1.0300111801546996,
      "grad_norm": 2.453125,
      "learning_rate": 3.6481381963753045e-05,
      "loss": 0.8123,
      "step": 293890
    },
    {
      "epoch": 1.0300462276615954,
      "grad_norm": 3.109375,
      "learning_rate": 3.6480732935089346e-05,
      "loss": 0.8274,
      "step": 293900
    },
    {
      "epoch": 1.030081275168491,
      "grad_norm": 2.546875,
      "learning_rate": 3.648008390642564e-05,
      "loss": 0.8887,
      "step": 293910
    },
    {
      "epoch": 1.0301163226753864,
      "grad_norm": 2.46875,
      "learning_rate": 3.647943487776194e-05,
      "loss": 0.8514,
      "step": 293920
    },
    {
      "epoch": 1.030151370182282,
      "grad_norm": 2.859375,
      "learning_rate": 3.647878584909824e-05,
      "loss": 0.8758,
      "step": 293930
    },
    {
      "epoch": 1.0301864176891777,
      "grad_norm": 3.1875,
      "learning_rate": 3.647813682043454e-05,
      "loss": 0.8893,
      "step": 293940
    },
    {
      "epoch": 1.0302214651960733,
      "grad_norm": 2.796875,
      "learning_rate": 3.647748779177084e-05,
      "loss": 0.8842,
      "step": 293950
    },
    {
      "epoch": 1.0302565127029688,
      "grad_norm": 2.578125,
      "learning_rate": 3.6476838763107134e-05,
      "loss": 0.8952,
      "step": 293960
    },
    {
      "epoch": 1.0302915602098646,
      "grad_norm": 3.109375,
      "learning_rate": 3.6476189734443435e-05,
      "loss": 0.8646,
      "step": 293970
    },
    {
      "epoch": 1.03032660771676,
      "grad_norm": 2.734375,
      "learning_rate": 3.647554070577973e-05,
      "loss": 0.8232,
      "step": 293980
    },
    {
      "epoch": 1.0303616552236556,
      "grad_norm": 3.0,
      "learning_rate": 3.647489167711603e-05,
      "loss": 0.9226,
      "step": 293990
    },
    {
      "epoch": 1.0303967027305512,
      "grad_norm": 2.984375,
      "learning_rate": 3.6474242648452326e-05,
      "loss": 0.7675,
      "step": 294000
    },
    {
      "epoch": 1.030431750237447,
      "grad_norm": 3.125,
      "learning_rate": 3.647359361978863e-05,
      "loss": 0.8791,
      "step": 294010
    },
    {
      "epoch": 1.0304667977443425,
      "grad_norm": 3.359375,
      "learning_rate": 3.647294459112492e-05,
      "loss": 0.8767,
      "step": 294020
    },
    {
      "epoch": 1.030501845251238,
      "grad_norm": 2.90625,
      "learning_rate": 3.647229556246122e-05,
      "loss": 0.8337,
      "step": 294030
    },
    {
      "epoch": 1.0305368927581338,
      "grad_norm": 2.796875,
      "learning_rate": 3.647164653379752e-05,
      "loss": 0.842,
      "step": 294040
    },
    {
      "epoch": 1.0305719402650293,
      "grad_norm": 2.953125,
      "learning_rate": 3.647099750513382e-05,
      "loss": 0.8127,
      "step": 294050
    },
    {
      "epoch": 1.0306069877719248,
      "grad_norm": 2.90625,
      "learning_rate": 3.647034847647012e-05,
      "loss": 0.8815,
      "step": 294060
    },
    {
      "epoch": 1.0306420352788204,
      "grad_norm": 3.1875,
      "learning_rate": 3.6469699447806415e-05,
      "loss": 0.8677,
      "step": 294070
    },
    {
      "epoch": 1.0306770827857161,
      "grad_norm": 3.375,
      "learning_rate": 3.646905041914272e-05,
      "loss": 0.9168,
      "step": 294080
    },
    {
      "epoch": 1.0307121302926117,
      "grad_norm": 3.03125,
      "learning_rate": 3.646840139047901e-05,
      "loss": 0.8678,
      "step": 294090
    },
    {
      "epoch": 1.0307471777995072,
      "grad_norm": 3.171875,
      "learning_rate": 3.646775236181531e-05,
      "loss": 0.9284,
      "step": 294100
    },
    {
      "epoch": 1.0307822253064027,
      "grad_norm": 3.203125,
      "learning_rate": 3.646710333315161e-05,
      "loss": 0.8718,
      "step": 294110
    },
    {
      "epoch": 1.0308172728132985,
      "grad_norm": 2.890625,
      "learning_rate": 3.646645430448791e-05,
      "loss": 0.8983,
      "step": 294120
    },
    {
      "epoch": 1.030852320320194,
      "grad_norm": 3.0,
      "learning_rate": 3.64658052758242e-05,
      "loss": 0.8913,
      "step": 294130
    },
    {
      "epoch": 1.0308873678270896,
      "grad_norm": 3.234375,
      "learning_rate": 3.6465156247160505e-05,
      "loss": 0.835,
      "step": 294140
    },
    {
      "epoch": 1.0309224153339853,
      "grad_norm": 2.921875,
      "learning_rate": 3.64645072184968e-05,
      "loss": 0.8394,
      "step": 294150
    },
    {
      "epoch": 1.0309574628408809,
      "grad_norm": 3.078125,
      "learning_rate": 3.64638581898331e-05,
      "loss": 0.8588,
      "step": 294160
    },
    {
      "epoch": 1.0309925103477764,
      "grad_norm": 2.8125,
      "learning_rate": 3.6463209161169395e-05,
      "loss": 0.8627,
      "step": 294170
    },
    {
      "epoch": 1.031027557854672,
      "grad_norm": 3.21875,
      "learning_rate": 3.64625601325057e-05,
      "loss": 0.8689,
      "step": 294180
    },
    {
      "epoch": 1.0310626053615677,
      "grad_norm": 2.9375,
      "learning_rate": 3.646191110384199e-05,
      "loss": 0.9425,
      "step": 294190
    },
    {
      "epoch": 1.0310976528684632,
      "grad_norm": 2.578125,
      "learning_rate": 3.6461262075178286e-05,
      "loss": 0.8785,
      "step": 294200
    },
    {
      "epoch": 1.0311327003753588,
      "grad_norm": 2.71875,
      "learning_rate": 3.646061304651459e-05,
      "loss": 0.8355,
      "step": 294210
    },
    {
      "epoch": 1.0311677478822543,
      "grad_norm": 3.1875,
      "learning_rate": 3.645996401785088e-05,
      "loss": 0.9535,
      "step": 294220
    },
    {
      "epoch": 1.03120279538915,
      "grad_norm": 3.125,
      "learning_rate": 3.645931498918718e-05,
      "loss": 0.9396,
      "step": 294230
    },
    {
      "epoch": 1.0312378428960456,
      "grad_norm": 3.25,
      "learning_rate": 3.645866596052348e-05,
      "loss": 0.8654,
      "step": 294240
    },
    {
      "epoch": 1.0312728904029411,
      "grad_norm": 2.890625,
      "learning_rate": 3.645801693185978e-05,
      "loss": 0.8229,
      "step": 294250
    },
    {
      "epoch": 1.0313079379098369,
      "grad_norm": 2.796875,
      "learning_rate": 3.6457367903196074e-05,
      "loss": 0.817,
      "step": 294260
    },
    {
      "epoch": 1.0313429854167324,
      "grad_norm": 2.9375,
      "learning_rate": 3.6456718874532375e-05,
      "loss": 0.8887,
      "step": 294270
    },
    {
      "epoch": 1.031378032923628,
      "grad_norm": 2.328125,
      "learning_rate": 3.645606984586867e-05,
      "loss": 0.8942,
      "step": 294280
    },
    {
      "epoch": 1.0314130804305235,
      "grad_norm": 2.921875,
      "learning_rate": 3.645542081720497e-05,
      "loss": 0.8785,
      "step": 294290
    },
    {
      "epoch": 1.0314481279374192,
      "grad_norm": 3.109375,
      "learning_rate": 3.645477178854127e-05,
      "loss": 0.8769,
      "step": 294300
    },
    {
      "epoch": 1.0314831754443148,
      "grad_norm": 3.078125,
      "learning_rate": 3.645412275987757e-05,
      "loss": 0.8548,
      "step": 294310
    },
    {
      "epoch": 1.0315182229512103,
      "grad_norm": 2.578125,
      "learning_rate": 3.645347373121387e-05,
      "loss": 0.8369,
      "step": 294320
    },
    {
      "epoch": 1.0315532704581059,
      "grad_norm": 3.09375,
      "learning_rate": 3.645282470255016e-05,
      "loss": 0.937,
      "step": 294330
    },
    {
      "epoch": 1.0315883179650016,
      "grad_norm": 2.734375,
      "learning_rate": 3.6452175673886465e-05,
      "loss": 0.813,
      "step": 294340
    },
    {
      "epoch": 1.0316233654718971,
      "grad_norm": 2.6875,
      "learning_rate": 3.645152664522276e-05,
      "loss": 0.873,
      "step": 294350
    },
    {
      "epoch": 1.0316584129787927,
      "grad_norm": 3.078125,
      "learning_rate": 3.645087761655906e-05,
      "loss": 0.9279,
      "step": 294360
    },
    {
      "epoch": 1.0316934604856884,
      "grad_norm": 2.65625,
      "learning_rate": 3.6450228587895355e-05,
      "loss": 0.9026,
      "step": 294370
    },
    {
      "epoch": 1.031728507992584,
      "grad_norm": 2.921875,
      "learning_rate": 3.644957955923166e-05,
      "loss": 0.8788,
      "step": 294380
    },
    {
      "epoch": 1.0317635554994795,
      "grad_norm": 2.890625,
      "learning_rate": 3.644893053056795e-05,
      "loss": 0.9091,
      "step": 294390
    },
    {
      "epoch": 1.031798603006375,
      "grad_norm": 3.546875,
      "learning_rate": 3.644828150190425e-05,
      "loss": 0.8625,
      "step": 294400
    },
    {
      "epoch": 1.0318336505132708,
      "grad_norm": 3.015625,
      "learning_rate": 3.644763247324055e-05,
      "loss": 0.892,
      "step": 294410
    },
    {
      "epoch": 1.0318686980201663,
      "grad_norm": 3.015625,
      "learning_rate": 3.644698344457685e-05,
      "loss": 0.8646,
      "step": 294420
    },
    {
      "epoch": 1.0319037455270619,
      "grad_norm": 3.125,
      "learning_rate": 3.644633441591315e-05,
      "loss": 0.9297,
      "step": 294430
    },
    {
      "epoch": 1.0319387930339576,
      "grad_norm": 2.828125,
      "learning_rate": 3.6445685387249445e-05,
      "loss": 0.8382,
      "step": 294440
    },
    {
      "epoch": 1.0319738405408532,
      "grad_norm": 2.46875,
      "learning_rate": 3.6445036358585746e-05,
      "loss": 0.9153,
      "step": 294450
    },
    {
      "epoch": 1.0320088880477487,
      "grad_norm": 3.296875,
      "learning_rate": 3.644438732992204e-05,
      "loss": 0.7907,
      "step": 294460
    },
    {
      "epoch": 1.0320439355546442,
      "grad_norm": 2.96875,
      "learning_rate": 3.644373830125834e-05,
      "loss": 0.8652,
      "step": 294470
    },
    {
      "epoch": 1.03207898306154,
      "grad_norm": 2.75,
      "learning_rate": 3.644308927259464e-05,
      "loss": 0.8784,
      "step": 294480
    },
    {
      "epoch": 1.0321140305684355,
      "grad_norm": 2.96875,
      "learning_rate": 3.644244024393094e-05,
      "loss": 0.9578,
      "step": 294490
    },
    {
      "epoch": 1.032149078075331,
      "grad_norm": 3.203125,
      "learning_rate": 3.644179121526723e-05,
      "loss": 0.8474,
      "step": 294500
    },
    {
      "epoch": 1.0321841255822266,
      "grad_norm": 3.171875,
      "learning_rate": 3.6441142186603534e-05,
      "loss": 0.8876,
      "step": 294510
    },
    {
      "epoch": 1.0322191730891224,
      "grad_norm": 2.90625,
      "learning_rate": 3.644049315793983e-05,
      "loss": 0.8498,
      "step": 294520
    },
    {
      "epoch": 1.032254220596018,
      "grad_norm": 3.40625,
      "learning_rate": 3.643984412927613e-05,
      "loss": 0.8546,
      "step": 294530
    },
    {
      "epoch": 1.0322892681029134,
      "grad_norm": 3.0625,
      "learning_rate": 3.6439195100612425e-05,
      "loss": 0.8722,
      "step": 294540
    },
    {
      "epoch": 1.0323243156098092,
      "grad_norm": 2.6875,
      "learning_rate": 3.6438546071948726e-05,
      "loss": 0.8877,
      "step": 294550
    },
    {
      "epoch": 1.0323593631167047,
      "grad_norm": 2.421875,
      "learning_rate": 3.643789704328502e-05,
      "loss": 0.8631,
      "step": 294560
    },
    {
      "epoch": 1.0323944106236003,
      "grad_norm": 2.890625,
      "learning_rate": 3.6437248014621315e-05,
      "loss": 0.9203,
      "step": 294570
    },
    {
      "epoch": 1.0324294581304958,
      "grad_norm": 2.765625,
      "learning_rate": 3.643659898595762e-05,
      "loss": 0.8287,
      "step": 294580
    },
    {
      "epoch": 1.0324645056373916,
      "grad_norm": 2.875,
      "learning_rate": 3.643594995729391e-05,
      "loss": 0.9352,
      "step": 294590
    },
    {
      "epoch": 1.032499553144287,
      "grad_norm": 2.875,
      "learning_rate": 3.643530092863021e-05,
      "loss": 0.8983,
      "step": 294600
    },
    {
      "epoch": 1.0325346006511826,
      "grad_norm": 2.796875,
      "learning_rate": 3.643465189996651e-05,
      "loss": 0.8545,
      "step": 294610
    },
    {
      "epoch": 1.0325696481580782,
      "grad_norm": 2.578125,
      "learning_rate": 3.643400287130281e-05,
      "loss": 0.8844,
      "step": 294620
    },
    {
      "epoch": 1.032604695664974,
      "grad_norm": 3.15625,
      "learning_rate": 3.64333538426391e-05,
      "loss": 0.9237,
      "step": 294630
    },
    {
      "epoch": 1.0326397431718695,
      "grad_norm": 2.53125,
      "learning_rate": 3.6432704813975405e-05,
      "loss": 0.8572,
      "step": 294640
    },
    {
      "epoch": 1.032674790678765,
      "grad_norm": 2.5625,
      "learning_rate": 3.6432055785311706e-05,
      "loss": 0.8439,
      "step": 294650
    },
    {
      "epoch": 1.0327098381856608,
      "grad_norm": 2.78125,
      "learning_rate": 3.6431406756648e-05,
      "loss": 0.9084,
      "step": 294660
    },
    {
      "epoch": 1.0327448856925563,
      "grad_norm": 2.90625,
      "learning_rate": 3.64307577279843e-05,
      "loss": 0.8388,
      "step": 294670
    },
    {
      "epoch": 1.0327799331994518,
      "grad_norm": 2.875,
      "learning_rate": 3.64301086993206e-05,
      "loss": 0.7611,
      "step": 294680
    },
    {
      "epoch": 1.0328149807063474,
      "grad_norm": 3.546875,
      "learning_rate": 3.64294596706569e-05,
      "loss": 0.9231,
      "step": 294690
    },
    {
      "epoch": 1.0328500282132431,
      "grad_norm": 2.640625,
      "learning_rate": 3.642881064199319e-05,
      "loss": 0.8638,
      "step": 294700
    },
    {
      "epoch": 1.0328850757201387,
      "grad_norm": 3.140625,
      "learning_rate": 3.6428161613329494e-05,
      "loss": 0.9135,
      "step": 294710
    },
    {
      "epoch": 1.0329201232270342,
      "grad_norm": 3.125,
      "learning_rate": 3.642751258466579e-05,
      "loss": 0.9289,
      "step": 294720
    },
    {
      "epoch": 1.03295517073393,
      "grad_norm": 2.65625,
      "learning_rate": 3.642686355600209e-05,
      "loss": 0.8394,
      "step": 294730
    },
    {
      "epoch": 1.0329902182408255,
      "grad_norm": 2.78125,
      "learning_rate": 3.6426214527338385e-05,
      "loss": 0.8783,
      "step": 294740
    },
    {
      "epoch": 1.033025265747721,
      "grad_norm": 2.796875,
      "learning_rate": 3.6425565498674686e-05,
      "loss": 0.8701,
      "step": 294750
    },
    {
      "epoch": 1.0330603132546166,
      "grad_norm": 2.859375,
      "learning_rate": 3.642491647001098e-05,
      "loss": 0.8583,
      "step": 294760
    },
    {
      "epoch": 1.0330953607615123,
      "grad_norm": 3.046875,
      "learning_rate": 3.642426744134728e-05,
      "loss": 0.8969,
      "step": 294770
    },
    {
      "epoch": 1.0331304082684079,
      "grad_norm": 2.8125,
      "learning_rate": 3.642361841268358e-05,
      "loss": 0.9011,
      "step": 294780
    },
    {
      "epoch": 1.0331654557753034,
      "grad_norm": 3.03125,
      "learning_rate": 3.642296938401988e-05,
      "loss": 0.863,
      "step": 294790
    },
    {
      "epoch": 1.033200503282199,
      "grad_norm": 3.0625,
      "learning_rate": 3.642232035535618e-05,
      "loss": 0.9197,
      "step": 294800
    },
    {
      "epoch": 1.0332355507890947,
      "grad_norm": 3.203125,
      "learning_rate": 3.6421671326692474e-05,
      "loss": 0.955,
      "step": 294810
    },
    {
      "epoch": 1.0332705982959902,
      "grad_norm": 2.671875,
      "learning_rate": 3.6421022298028776e-05,
      "loss": 0.8945,
      "step": 294820
    },
    {
      "epoch": 1.0333056458028858,
      "grad_norm": 2.484375,
      "learning_rate": 3.642037326936507e-05,
      "loss": 0.8447,
      "step": 294830
    },
    {
      "epoch": 1.0333406933097815,
      "grad_norm": 3.203125,
      "learning_rate": 3.641972424070137e-05,
      "loss": 0.825,
      "step": 294840
    },
    {
      "epoch": 1.033375740816677,
      "grad_norm": 2.9375,
      "learning_rate": 3.6419075212037666e-05,
      "loss": 0.8956,
      "step": 294850
    },
    {
      "epoch": 1.0334107883235726,
      "grad_norm": 3.5625,
      "learning_rate": 3.641842618337397e-05,
      "loss": 0.9972,
      "step": 294860
    },
    {
      "epoch": 1.0334458358304681,
      "grad_norm": 3.21875,
      "learning_rate": 3.641777715471026e-05,
      "loss": 0.855,
      "step": 294870
    },
    {
      "epoch": 1.0334808833373639,
      "grad_norm": 3.125,
      "learning_rate": 3.6417128126046564e-05,
      "loss": 0.8376,
      "step": 294880
    },
    {
      "epoch": 1.0335159308442594,
      "grad_norm": 2.75,
      "learning_rate": 3.641647909738286e-05,
      "loss": 0.9108,
      "step": 294890
    },
    {
      "epoch": 1.033550978351155,
      "grad_norm": 2.6875,
      "learning_rate": 3.641583006871916e-05,
      "loss": 0.8378,
      "step": 294900
    },
    {
      "epoch": 1.0335860258580505,
      "grad_norm": 2.96875,
      "learning_rate": 3.6415181040055454e-05,
      "loss": 0.8466,
      "step": 294910
    },
    {
      "epoch": 1.0336210733649462,
      "grad_norm": 2.75,
      "learning_rate": 3.6414532011391756e-05,
      "loss": 0.8631,
      "step": 294920
    },
    {
      "epoch": 1.0336561208718418,
      "grad_norm": 2.90625,
      "learning_rate": 3.641388298272806e-05,
      "loss": 0.9082,
      "step": 294930
    },
    {
      "epoch": 1.0336911683787373,
      "grad_norm": 2.96875,
      "learning_rate": 3.6413233954064345e-05,
      "loss": 0.8392,
      "step": 294940
    },
    {
      "epoch": 1.033726215885633,
      "grad_norm": 2.75,
      "learning_rate": 3.6412584925400646e-05,
      "loss": 0.7791,
      "step": 294950
    },
    {
      "epoch": 1.0337612633925286,
      "grad_norm": 3.09375,
      "learning_rate": 3.641193589673694e-05,
      "loss": 0.9109,
      "step": 294960
    },
    {
      "epoch": 1.0337963108994241,
      "grad_norm": 3.1875,
      "learning_rate": 3.641128686807324e-05,
      "loss": 0.8698,
      "step": 294970
    },
    {
      "epoch": 1.0338313584063197,
      "grad_norm": 3.328125,
      "learning_rate": 3.641063783940954e-05,
      "loss": 0.8608,
      "step": 294980
    },
    {
      "epoch": 1.0338664059132154,
      "grad_norm": 3.5625,
      "learning_rate": 3.640998881074584e-05,
      "loss": 0.8228,
      "step": 294990
    },
    {
      "epoch": 1.033901453420111,
      "grad_norm": 2.875,
      "learning_rate": 3.640933978208213e-05,
      "loss": 0.9117,
      "step": 295000
    },
    {
      "epoch": 1.033901453420111,
      "eval_loss": 0.818539023399353,
      "eval_runtime": 554.8829,
      "eval_samples_per_second": 685.615,
      "eval_steps_per_second": 57.135,
      "step": 295000
    },
    {
      "epoch": 1.0339365009270065,
      "grad_norm": 2.90625,
      "learning_rate": 3.6408690753418434e-05,
      "loss": 0.938,
      "step": 295010
    },
    {
      "epoch": 1.033971548433902,
      "grad_norm": 2.875,
      "learning_rate": 3.6408041724754736e-05,
      "loss": 0.8477,
      "step": 295020
    },
    {
      "epoch": 1.0340065959407978,
      "grad_norm": 2.90625,
      "learning_rate": 3.640739269609103e-05,
      "loss": 0.9029,
      "step": 295030
    },
    {
      "epoch": 1.0340416434476933,
      "grad_norm": 2.859375,
      "learning_rate": 3.640674366742733e-05,
      "loss": 0.8686,
      "step": 295040
    },
    {
      "epoch": 1.0340766909545889,
      "grad_norm": 2.859375,
      "learning_rate": 3.6406094638763626e-05,
      "loss": 0.8501,
      "step": 295050
    },
    {
      "epoch": 1.0341117384614846,
      "grad_norm": 2.9375,
      "learning_rate": 3.640544561009993e-05,
      "loss": 0.9057,
      "step": 295060
    },
    {
      "epoch": 1.0341467859683802,
      "grad_norm": 2.46875,
      "learning_rate": 3.640479658143622e-05,
      "loss": 0.8599,
      "step": 295070
    },
    {
      "epoch": 1.0341818334752757,
      "grad_norm": 2.765625,
      "learning_rate": 3.6404147552772524e-05,
      "loss": 0.9204,
      "step": 295080
    },
    {
      "epoch": 1.0342168809821712,
      "grad_norm": 3.125,
      "learning_rate": 3.640349852410882e-05,
      "loss": 0.8337,
      "step": 295090
    },
    {
      "epoch": 1.034251928489067,
      "grad_norm": 2.828125,
      "learning_rate": 3.640284949544512e-05,
      "loss": 0.8233,
      "step": 295100
    },
    {
      "epoch": 1.0342869759959625,
      "grad_norm": 2.96875,
      "learning_rate": 3.6402200466781414e-05,
      "loss": 0.8754,
      "step": 295110
    },
    {
      "epoch": 1.034322023502858,
      "grad_norm": 2.890625,
      "learning_rate": 3.6401551438117716e-05,
      "loss": 0.9067,
      "step": 295120
    },
    {
      "epoch": 1.0343570710097538,
      "grad_norm": 2.328125,
      "learning_rate": 3.640090240945401e-05,
      "loss": 0.82,
      "step": 295130
    },
    {
      "epoch": 1.0343921185166494,
      "grad_norm": 2.78125,
      "learning_rate": 3.640025338079031e-05,
      "loss": 0.8447,
      "step": 295140
    },
    {
      "epoch": 1.034427166023545,
      "grad_norm": 2.8125,
      "learning_rate": 3.6399604352126606e-05,
      "loss": 0.864,
      "step": 295150
    },
    {
      "epoch": 1.0344622135304404,
      "grad_norm": 2.734375,
      "learning_rate": 3.639895532346291e-05,
      "loss": 0.9409,
      "step": 295160
    },
    {
      "epoch": 1.0344972610373362,
      "grad_norm": 2.734375,
      "learning_rate": 3.639830629479921e-05,
      "loss": 0.8381,
      "step": 295170
    },
    {
      "epoch": 1.0345323085442317,
      "grad_norm": 3.078125,
      "learning_rate": 3.6397657266135504e-05,
      "loss": 0.8404,
      "step": 295180
    },
    {
      "epoch": 1.0345673560511273,
      "grad_norm": 2.953125,
      "learning_rate": 3.6397008237471805e-05,
      "loss": 0.8112,
      "step": 295190
    },
    {
      "epoch": 1.0346024035580228,
      "grad_norm": 3.203125,
      "learning_rate": 3.63963592088081e-05,
      "loss": 1.0594,
      "step": 295200
    },
    {
      "epoch": 1.0346374510649186,
      "grad_norm": 2.890625,
      "learning_rate": 3.63957101801444e-05,
      "loss": 0.9012,
      "step": 295210
    },
    {
      "epoch": 1.034672498571814,
      "grad_norm": 2.5625,
      "learning_rate": 3.6395061151480696e-05,
      "loss": 0.8761,
      "step": 295220
    },
    {
      "epoch": 1.0347075460787096,
      "grad_norm": 2.9375,
      "learning_rate": 3.6394412122817e-05,
      "loss": 0.9413,
      "step": 295230
    },
    {
      "epoch": 1.0347425935856054,
      "grad_norm": 3.359375,
      "learning_rate": 3.639376309415329e-05,
      "loss": 0.9173,
      "step": 295240
    },
    {
      "epoch": 1.034777641092501,
      "grad_norm": 3.078125,
      "learning_rate": 3.639311406548959e-05,
      "loss": 0.8596,
      "step": 295250
    },
    {
      "epoch": 1.0348126885993965,
      "grad_norm": 3.03125,
      "learning_rate": 3.639246503682589e-05,
      "loss": 0.884,
      "step": 295260
    },
    {
      "epoch": 1.034847736106292,
      "grad_norm": 3.234375,
      "learning_rate": 3.639181600816219e-05,
      "loss": 0.8571,
      "step": 295270
    },
    {
      "epoch": 1.0348827836131878,
      "grad_norm": 2.890625,
      "learning_rate": 3.6391166979498484e-05,
      "loss": 0.841,
      "step": 295280
    },
    {
      "epoch": 1.0349178311200833,
      "grad_norm": 2.703125,
      "learning_rate": 3.6390517950834785e-05,
      "loss": 0.8615,
      "step": 295290
    },
    {
      "epoch": 1.0349528786269788,
      "grad_norm": 3.015625,
      "learning_rate": 3.6389868922171086e-05,
      "loss": 0.907,
      "step": 295300
    },
    {
      "epoch": 1.0349879261338746,
      "grad_norm": 2.4375,
      "learning_rate": 3.638921989350738e-05,
      "loss": 0.8316,
      "step": 295310
    },
    {
      "epoch": 1.0350229736407701,
      "grad_norm": 3.046875,
      "learning_rate": 3.6388570864843676e-05,
      "loss": 0.9353,
      "step": 295320
    },
    {
      "epoch": 1.0350580211476657,
      "grad_norm": 2.640625,
      "learning_rate": 3.638792183617997e-05,
      "loss": 0.7968,
      "step": 295330
    },
    {
      "epoch": 1.0350930686545612,
      "grad_norm": 2.75,
      "learning_rate": 3.638727280751627e-05,
      "loss": 0.8288,
      "step": 295340
    },
    {
      "epoch": 1.035128116161457,
      "grad_norm": 2.96875,
      "learning_rate": 3.6386623778852566e-05,
      "loss": 0.913,
      "step": 295350
    },
    {
      "epoch": 1.0351631636683525,
      "grad_norm": 2.625,
      "learning_rate": 3.638597475018887e-05,
      "loss": 0.846,
      "step": 295360
    },
    {
      "epoch": 1.035198211175248,
      "grad_norm": 3.109375,
      "learning_rate": 3.638532572152516e-05,
      "loss": 0.8984,
      "step": 295370
    },
    {
      "epoch": 1.0352332586821436,
      "grad_norm": 2.6875,
      "learning_rate": 3.6384676692861464e-05,
      "loss": 0.7924,
      "step": 295380
    },
    {
      "epoch": 1.0352683061890393,
      "grad_norm": 2.453125,
      "learning_rate": 3.6384027664197765e-05,
      "loss": 0.8286,
      "step": 295390
    },
    {
      "epoch": 1.0353033536959348,
      "grad_norm": 3.15625,
      "learning_rate": 3.638337863553406e-05,
      "loss": 0.8883,
      "step": 295400
    },
    {
      "epoch": 1.0353384012028304,
      "grad_norm": 3.078125,
      "learning_rate": 3.638272960687036e-05,
      "loss": 0.9707,
      "step": 295410
    },
    {
      "epoch": 1.0353734487097261,
      "grad_norm": 3.03125,
      "learning_rate": 3.6382080578206656e-05,
      "loss": 0.8747,
      "step": 295420
    },
    {
      "epoch": 1.0354084962166217,
      "grad_norm": 2.625,
      "learning_rate": 3.638143154954296e-05,
      "loss": 0.8285,
      "step": 295430
    },
    {
      "epoch": 1.0354435437235172,
      "grad_norm": 2.78125,
      "learning_rate": 3.638078252087925e-05,
      "loss": 0.882,
      "step": 295440
    },
    {
      "epoch": 1.0354785912304127,
      "grad_norm": 2.65625,
      "learning_rate": 3.638013349221555e-05,
      "loss": 0.8052,
      "step": 295450
    },
    {
      "epoch": 1.0355136387373085,
      "grad_norm": 3.09375,
      "learning_rate": 3.637948446355185e-05,
      "loss": 0.9141,
      "step": 295460
    },
    {
      "epoch": 1.035548686244204,
      "grad_norm": 2.6875,
      "learning_rate": 3.637883543488815e-05,
      "loss": 0.8638,
      "step": 295470
    },
    {
      "epoch": 1.0355837337510996,
      "grad_norm": 2.53125,
      "learning_rate": 3.6378186406224444e-05,
      "loss": 0.878,
      "step": 295480
    },
    {
      "epoch": 1.0356187812579951,
      "grad_norm": 3.109375,
      "learning_rate": 3.6377537377560745e-05,
      "loss": 0.899,
      "step": 295490
    },
    {
      "epoch": 1.0356538287648909,
      "grad_norm": 3.234375,
      "learning_rate": 3.637688834889704e-05,
      "loss": 0.9073,
      "step": 295500
    },
    {
      "epoch": 1.0356888762717864,
      "grad_norm": 2.9375,
      "learning_rate": 3.637623932023334e-05,
      "loss": 0.8777,
      "step": 295510
    },
    {
      "epoch": 1.035723923778682,
      "grad_norm": 2.9375,
      "learning_rate": 3.637559029156964e-05,
      "loss": 0.8257,
      "step": 295520
    },
    {
      "epoch": 1.0357589712855777,
      "grad_norm": 3.078125,
      "learning_rate": 3.637494126290594e-05,
      "loss": 0.8276,
      "step": 295530
    },
    {
      "epoch": 1.0357940187924732,
      "grad_norm": 2.734375,
      "learning_rate": 3.637429223424224e-05,
      "loss": 0.857,
      "step": 295540
    },
    {
      "epoch": 1.0358290662993688,
      "grad_norm": 2.9375,
      "learning_rate": 3.637364320557853e-05,
      "loss": 0.8941,
      "step": 295550
    },
    {
      "epoch": 1.0358641138062643,
      "grad_norm": 3.015625,
      "learning_rate": 3.6372994176914834e-05,
      "loss": 0.9037,
      "step": 295560
    },
    {
      "epoch": 1.03589916131316,
      "grad_norm": 2.671875,
      "learning_rate": 3.637234514825113e-05,
      "loss": 0.8656,
      "step": 295570
    },
    {
      "epoch": 1.0359342088200556,
      "grad_norm": 3.015625,
      "learning_rate": 3.637169611958743e-05,
      "loss": 0.8028,
      "step": 295580
    },
    {
      "epoch": 1.0359692563269511,
      "grad_norm": 2.609375,
      "learning_rate": 3.6371047090923725e-05,
      "loss": 0.8019,
      "step": 295590
    },
    {
      "epoch": 1.0360043038338467,
      "grad_norm": 3.0,
      "learning_rate": 3.6370398062260026e-05,
      "loss": 0.8823,
      "step": 295600
    },
    {
      "epoch": 1.0360393513407424,
      "grad_norm": 3.203125,
      "learning_rate": 3.636974903359632e-05,
      "loss": 0.7891,
      "step": 295610
    },
    {
      "epoch": 1.036074398847638,
      "grad_norm": 2.375,
      "learning_rate": 3.636910000493262e-05,
      "loss": 0.7966,
      "step": 295620
    },
    {
      "epoch": 1.0361094463545335,
      "grad_norm": 2.90625,
      "learning_rate": 3.636845097626892e-05,
      "loss": 0.9198,
      "step": 295630
    },
    {
      "epoch": 1.0361444938614293,
      "grad_norm": 3.484375,
      "learning_rate": 3.636780194760522e-05,
      "loss": 0.9425,
      "step": 295640
    },
    {
      "epoch": 1.0361795413683248,
      "grad_norm": 2.9375,
      "learning_rate": 3.636715291894151e-05,
      "loss": 0.8571,
      "step": 295650
    },
    {
      "epoch": 1.0362145888752203,
      "grad_norm": 2.96875,
      "learning_rate": 3.6366503890277814e-05,
      "loss": 0.8807,
      "step": 295660
    },
    {
      "epoch": 1.0362496363821159,
      "grad_norm": 3.078125,
      "learning_rate": 3.6365854861614116e-05,
      "loss": 0.9237,
      "step": 295670
    },
    {
      "epoch": 1.0362846838890116,
      "grad_norm": 2.8125,
      "learning_rate": 3.636520583295041e-05,
      "loss": 0.8346,
      "step": 295680
    },
    {
      "epoch": 1.0363197313959072,
      "grad_norm": 3.0625,
      "learning_rate": 3.6364556804286705e-05,
      "loss": 0.8522,
      "step": 295690
    },
    {
      "epoch": 1.0363547789028027,
      "grad_norm": 2.796875,
      "learning_rate": 3.6363907775623e-05,
      "loss": 0.8653,
      "step": 295700
    },
    {
      "epoch": 1.0363898264096982,
      "grad_norm": 2.5,
      "learning_rate": 3.63632587469593e-05,
      "loss": 0.8126,
      "step": 295710
    },
    {
      "epoch": 1.036424873916594,
      "grad_norm": 2.90625,
      "learning_rate": 3.6362609718295596e-05,
      "loss": 0.8749,
      "step": 295720
    },
    {
      "epoch": 1.0364599214234895,
      "grad_norm": 3.078125,
      "learning_rate": 3.63619606896319e-05,
      "loss": 0.9145,
      "step": 295730
    },
    {
      "epoch": 1.036494968930385,
      "grad_norm": 2.265625,
      "learning_rate": 3.636131166096819e-05,
      "loss": 0.8574,
      "step": 295740
    },
    {
      "epoch": 1.0365300164372808,
      "grad_norm": 3.015625,
      "learning_rate": 3.636066263230449e-05,
      "loss": 0.899,
      "step": 295750
    },
    {
      "epoch": 1.0365650639441764,
      "grad_norm": 2.890625,
      "learning_rate": 3.6360013603640794e-05,
      "loss": 0.855,
      "step": 295760
    },
    {
      "epoch": 1.036600111451072,
      "grad_norm": 3.390625,
      "learning_rate": 3.635936457497709e-05,
      "loss": 0.8815,
      "step": 295770
    },
    {
      "epoch": 1.0366351589579674,
      "grad_norm": 2.875,
      "learning_rate": 3.635871554631339e-05,
      "loss": 0.8194,
      "step": 295780
    },
    {
      "epoch": 1.0366702064648632,
      "grad_norm": 2.796875,
      "learning_rate": 3.6358066517649685e-05,
      "loss": 0.8239,
      "step": 295790
    },
    {
      "epoch": 1.0367052539717587,
      "grad_norm": 2.921875,
      "learning_rate": 3.6357417488985986e-05,
      "loss": 0.9139,
      "step": 295800
    },
    {
      "epoch": 1.0367403014786543,
      "grad_norm": 2.84375,
      "learning_rate": 3.635676846032228e-05,
      "loss": 0.9217,
      "step": 295810
    },
    {
      "epoch": 1.03677534898555,
      "grad_norm": 3.03125,
      "learning_rate": 3.635611943165858e-05,
      "loss": 0.8119,
      "step": 295820
    },
    {
      "epoch": 1.0368103964924456,
      "grad_norm": 2.828125,
      "learning_rate": 3.635547040299488e-05,
      "loss": 0.8336,
      "step": 295830
    },
    {
      "epoch": 1.036845443999341,
      "grad_norm": 3.0625,
      "learning_rate": 3.635482137433118e-05,
      "loss": 0.8286,
      "step": 295840
    },
    {
      "epoch": 1.0368804915062366,
      "grad_norm": 2.953125,
      "learning_rate": 3.635417234566747e-05,
      "loss": 0.9048,
      "step": 295850
    },
    {
      "epoch": 1.0369155390131324,
      "grad_norm": 3.078125,
      "learning_rate": 3.6353523317003774e-05,
      "loss": 0.8824,
      "step": 295860
    },
    {
      "epoch": 1.036950586520028,
      "grad_norm": 2.734375,
      "learning_rate": 3.635287428834007e-05,
      "loss": 0.9485,
      "step": 295870
    },
    {
      "epoch": 1.0369856340269235,
      "grad_norm": 2.765625,
      "learning_rate": 3.635222525967637e-05,
      "loss": 0.8709,
      "step": 295880
    },
    {
      "epoch": 1.037020681533819,
      "grad_norm": 3.015625,
      "learning_rate": 3.635157623101267e-05,
      "loss": 0.8616,
      "step": 295890
    },
    {
      "epoch": 1.0370557290407147,
      "grad_norm": 3.140625,
      "learning_rate": 3.6350927202348966e-05,
      "loss": 0.9527,
      "step": 295900
    },
    {
      "epoch": 1.0370907765476103,
      "grad_norm": 3.203125,
      "learning_rate": 3.635027817368527e-05,
      "loss": 0.8654,
      "step": 295910
    },
    {
      "epoch": 1.0371258240545058,
      "grad_norm": 2.921875,
      "learning_rate": 3.634962914502156e-05,
      "loss": 0.899,
      "step": 295920
    },
    {
      "epoch": 1.0371608715614016,
      "grad_norm": 3.03125,
      "learning_rate": 3.6348980116357864e-05,
      "loss": 0.8092,
      "step": 295930
    },
    {
      "epoch": 1.0371959190682971,
      "grad_norm": 2.65625,
      "learning_rate": 3.634833108769416e-05,
      "loss": 0.826,
      "step": 295940
    },
    {
      "epoch": 1.0372309665751926,
      "grad_norm": 2.890625,
      "learning_rate": 3.634768205903046e-05,
      "loss": 0.8276,
      "step": 295950
    },
    {
      "epoch": 1.0372660140820882,
      "grad_norm": 3.109375,
      "learning_rate": 3.6347033030366754e-05,
      "loss": 0.9148,
      "step": 295960
    },
    {
      "epoch": 1.037301061588984,
      "grad_norm": 2.8125,
      "learning_rate": 3.6346384001703056e-05,
      "loss": 0.9219,
      "step": 295970
    },
    {
      "epoch": 1.0373361090958795,
      "grad_norm": 2.6875,
      "learning_rate": 3.634573497303935e-05,
      "loss": 0.7692,
      "step": 295980
    },
    {
      "epoch": 1.037371156602775,
      "grad_norm": 2.796875,
      "learning_rate": 3.634508594437565e-05,
      "loss": 0.8746,
      "step": 295990
    },
    {
      "epoch": 1.0374062041096708,
      "grad_norm": 2.8125,
      "learning_rate": 3.6344436915711946e-05,
      "loss": 0.8225,
      "step": 296000
    },
    {
      "epoch": 1.0374412516165663,
      "grad_norm": 3.046875,
      "learning_rate": 3.634378788704825e-05,
      "loss": 0.8218,
      "step": 296010
    },
    {
      "epoch": 1.0374762991234618,
      "grad_norm": 2.9375,
      "learning_rate": 3.634313885838454e-05,
      "loss": 0.9048,
      "step": 296020
    },
    {
      "epoch": 1.0375113466303574,
      "grad_norm": 2.8125,
      "learning_rate": 3.6342489829720844e-05,
      "loss": 0.821,
      "step": 296030
    },
    {
      "epoch": 1.0375463941372531,
      "grad_norm": 2.53125,
      "learning_rate": 3.6341840801057145e-05,
      "loss": 0.889,
      "step": 296040
    },
    {
      "epoch": 1.0375814416441487,
      "grad_norm": 2.984375,
      "learning_rate": 3.634119177239344e-05,
      "loss": 0.8815,
      "step": 296050
    },
    {
      "epoch": 1.0376164891510442,
      "grad_norm": 3.046875,
      "learning_rate": 3.634054274372974e-05,
      "loss": 0.9324,
      "step": 296060
    },
    {
      "epoch": 1.0376515366579397,
      "grad_norm": 3.28125,
      "learning_rate": 3.633989371506603e-05,
      "loss": 0.8241,
      "step": 296070
    },
    {
      "epoch": 1.0376865841648355,
      "grad_norm": 2.828125,
      "learning_rate": 3.633924468640233e-05,
      "loss": 0.8312,
      "step": 296080
    },
    {
      "epoch": 1.037721631671731,
      "grad_norm": 2.84375,
      "learning_rate": 3.6338595657738625e-05,
      "loss": 0.8669,
      "step": 296090
    },
    {
      "epoch": 1.0377566791786266,
      "grad_norm": 3.09375,
      "learning_rate": 3.6337946629074926e-05,
      "loss": 0.8737,
      "step": 296100
    },
    {
      "epoch": 1.0377917266855223,
      "grad_norm": 2.296875,
      "learning_rate": 3.633729760041122e-05,
      "loss": 0.8425,
      "step": 296110
    },
    {
      "epoch": 1.0378267741924179,
      "grad_norm": 2.484375,
      "learning_rate": 3.633664857174752e-05,
      "loss": 0.9016,
      "step": 296120
    },
    {
      "epoch": 1.0378618216993134,
      "grad_norm": 3.34375,
      "learning_rate": 3.6335999543083824e-05,
      "loss": 0.858,
      "step": 296130
    },
    {
      "epoch": 1.037896869206209,
      "grad_norm": 2.953125,
      "learning_rate": 3.633535051442012e-05,
      "loss": 0.8931,
      "step": 296140
    },
    {
      "epoch": 1.0379319167131047,
      "grad_norm": 2.859375,
      "learning_rate": 3.633470148575642e-05,
      "loss": 0.8921,
      "step": 296150
    },
    {
      "epoch": 1.0379669642200002,
      "grad_norm": 3.0625,
      "learning_rate": 3.6334052457092714e-05,
      "loss": 0.8789,
      "step": 296160
    },
    {
      "epoch": 1.0380020117268958,
      "grad_norm": 3.21875,
      "learning_rate": 3.6333403428429016e-05,
      "loss": 0.8891,
      "step": 296170
    },
    {
      "epoch": 1.0380370592337913,
      "grad_norm": 2.8125,
      "learning_rate": 3.633275439976531e-05,
      "loss": 0.8415,
      "step": 296180
    },
    {
      "epoch": 1.038072106740687,
      "grad_norm": 2.640625,
      "learning_rate": 3.633210537110161e-05,
      "loss": 0.8804,
      "step": 296190
    },
    {
      "epoch": 1.0381071542475826,
      "grad_norm": 3.359375,
      "learning_rate": 3.6331456342437906e-05,
      "loss": 0.794,
      "step": 296200
    },
    {
      "epoch": 1.0381422017544781,
      "grad_norm": 3.015625,
      "learning_rate": 3.633080731377421e-05,
      "loss": 0.8373,
      "step": 296210
    },
    {
      "epoch": 1.038177249261374,
      "grad_norm": 3.734375,
      "learning_rate": 3.63301582851105e-05,
      "loss": 0.8726,
      "step": 296220
    },
    {
      "epoch": 1.0382122967682694,
      "grad_norm": 2.890625,
      "learning_rate": 3.6329509256446804e-05,
      "loss": 0.9161,
      "step": 296230
    },
    {
      "epoch": 1.038247344275165,
      "grad_norm": 2.96875,
      "learning_rate": 3.63288602277831e-05,
      "loss": 0.8517,
      "step": 296240
    },
    {
      "epoch": 1.0382823917820605,
      "grad_norm": 2.859375,
      "learning_rate": 3.63282111991194e-05,
      "loss": 0.8801,
      "step": 296250
    },
    {
      "epoch": 1.0383174392889563,
      "grad_norm": 3.21875,
      "learning_rate": 3.63275621704557e-05,
      "loss": 0.9264,
      "step": 296260
    },
    {
      "epoch": 1.0383524867958518,
      "grad_norm": 2.75,
      "learning_rate": 3.6326913141791996e-05,
      "loss": 0.8641,
      "step": 296270
    },
    {
      "epoch": 1.0383875343027473,
      "grad_norm": 2.375,
      "learning_rate": 3.63262641131283e-05,
      "loss": 0.8982,
      "step": 296280
    },
    {
      "epoch": 1.0384225818096429,
      "grad_norm": 2.53125,
      "learning_rate": 3.632561508446459e-05,
      "loss": 0.8964,
      "step": 296290
    },
    {
      "epoch": 1.0384576293165386,
      "grad_norm": 2.984375,
      "learning_rate": 3.632496605580089e-05,
      "loss": 0.84,
      "step": 296300
    },
    {
      "epoch": 1.0384926768234342,
      "grad_norm": 3.375,
      "learning_rate": 3.632431702713719e-05,
      "loss": 0.9083,
      "step": 296310
    },
    {
      "epoch": 1.0385277243303297,
      "grad_norm": 2.796875,
      "learning_rate": 3.632366799847349e-05,
      "loss": 0.8712,
      "step": 296320
    },
    {
      "epoch": 1.0385627718372255,
      "grad_norm": 2.75,
      "learning_rate": 3.6323018969809784e-05,
      "loss": 0.8152,
      "step": 296330
    },
    {
      "epoch": 1.038597819344121,
      "grad_norm": 3.125,
      "learning_rate": 3.6322369941146085e-05,
      "loss": 0.9251,
      "step": 296340
    },
    {
      "epoch": 1.0386328668510165,
      "grad_norm": 2.546875,
      "learning_rate": 3.632172091248238e-05,
      "loss": 0.938,
      "step": 296350
    },
    {
      "epoch": 1.038667914357912,
      "grad_norm": 2.921875,
      "learning_rate": 3.632107188381868e-05,
      "loss": 0.9719,
      "step": 296360
    },
    {
      "epoch": 1.0387029618648078,
      "grad_norm": 2.9375,
      "learning_rate": 3.6320422855154976e-05,
      "loss": 0.8289,
      "step": 296370
    },
    {
      "epoch": 1.0387380093717034,
      "grad_norm": 3.015625,
      "learning_rate": 3.631977382649128e-05,
      "loss": 0.9368,
      "step": 296380
    },
    {
      "epoch": 1.0387730568785989,
      "grad_norm": 2.703125,
      "learning_rate": 3.631912479782757e-05,
      "loss": 0.8063,
      "step": 296390
    },
    {
      "epoch": 1.0388081043854944,
      "grad_norm": 2.59375,
      "learning_rate": 3.631847576916387e-05,
      "loss": 0.8341,
      "step": 296400
    },
    {
      "epoch": 1.0388431518923902,
      "grad_norm": 2.9375,
      "learning_rate": 3.6317826740500174e-05,
      "loss": 0.8823,
      "step": 296410
    },
    {
      "epoch": 1.0388781993992857,
      "grad_norm": 2.75,
      "learning_rate": 3.631717771183647e-05,
      "loss": 0.8138,
      "step": 296420
    },
    {
      "epoch": 1.0389132469061813,
      "grad_norm": 3.0625,
      "learning_rate": 3.631652868317277e-05,
      "loss": 0.8064,
      "step": 296430
    },
    {
      "epoch": 1.038948294413077,
      "grad_norm": 2.609375,
      "learning_rate": 3.631587965450906e-05,
      "loss": 0.908,
      "step": 296440
    },
    {
      "epoch": 1.0389833419199725,
      "grad_norm": 2.5,
      "learning_rate": 3.631523062584536e-05,
      "loss": 0.8638,
      "step": 296450
    },
    {
      "epoch": 1.039018389426868,
      "grad_norm": 3.109375,
      "learning_rate": 3.6314581597181654e-05,
      "loss": 0.8433,
      "step": 296460
    },
    {
      "epoch": 1.0390534369337636,
      "grad_norm": 2.640625,
      "learning_rate": 3.6313932568517956e-05,
      "loss": 0.8719,
      "step": 296470
    },
    {
      "epoch": 1.0390884844406594,
      "grad_norm": 3.09375,
      "learning_rate": 3.631328353985426e-05,
      "loss": 0.8824,
      "step": 296480
    },
    {
      "epoch": 1.039123531947555,
      "grad_norm": 3.25,
      "learning_rate": 3.631263451119055e-05,
      "loss": 0.8442,
      "step": 296490
    },
    {
      "epoch": 1.0391585794544504,
      "grad_norm": 3.203125,
      "learning_rate": 3.631198548252685e-05,
      "loss": 0.9268,
      "step": 296500
    },
    {
      "epoch": 1.0391936269613462,
      "grad_norm": 3.015625,
      "learning_rate": 3.631133645386315e-05,
      "loss": 0.9569,
      "step": 296510
    },
    {
      "epoch": 1.0392286744682417,
      "grad_norm": 2.46875,
      "learning_rate": 3.631068742519945e-05,
      "loss": 0.8844,
      "step": 296520
    },
    {
      "epoch": 1.0392637219751373,
      "grad_norm": 2.578125,
      "learning_rate": 3.6310038396535744e-05,
      "loss": 0.829,
      "step": 296530
    },
    {
      "epoch": 1.0392987694820328,
      "grad_norm": 2.671875,
      "learning_rate": 3.6309389367872045e-05,
      "loss": 0.8948,
      "step": 296540
    },
    {
      "epoch": 1.0393338169889286,
      "grad_norm": 2.78125,
      "learning_rate": 3.630874033920834e-05,
      "loss": 0.9435,
      "step": 296550
    },
    {
      "epoch": 1.039368864495824,
      "grad_norm": 2.859375,
      "learning_rate": 3.630809131054464e-05,
      "loss": 0.9482,
      "step": 296560
    },
    {
      "epoch": 1.0394039120027196,
      "grad_norm": 3.109375,
      "learning_rate": 3.6307442281880936e-05,
      "loss": 0.8479,
      "step": 296570
    },
    {
      "epoch": 1.0394389595096152,
      "grad_norm": 3.078125,
      "learning_rate": 3.630679325321724e-05,
      "loss": 0.8866,
      "step": 296580
    },
    {
      "epoch": 1.039474007016511,
      "grad_norm": 2.96875,
      "learning_rate": 3.630614422455353e-05,
      "loss": 0.7483,
      "step": 296590
    },
    {
      "epoch": 1.0395090545234065,
      "grad_norm": 2.84375,
      "learning_rate": 3.630549519588983e-05,
      "loss": 0.9285,
      "step": 296600
    },
    {
      "epoch": 1.039544102030302,
      "grad_norm": 2.9375,
      "learning_rate": 3.630484616722613e-05,
      "loss": 0.9251,
      "step": 296610
    },
    {
      "epoch": 1.0395791495371978,
      "grad_norm": 3.28125,
      "learning_rate": 3.630419713856243e-05,
      "loss": 0.8874,
      "step": 296620
    },
    {
      "epoch": 1.0396141970440933,
      "grad_norm": 2.75,
      "learning_rate": 3.630354810989873e-05,
      "loss": 0.8422,
      "step": 296630
    },
    {
      "epoch": 1.0396492445509888,
      "grad_norm": 3.078125,
      "learning_rate": 3.6302899081235025e-05,
      "loss": 0.8861,
      "step": 296640
    },
    {
      "epoch": 1.0396842920578844,
      "grad_norm": 2.6875,
      "learning_rate": 3.6302250052571326e-05,
      "loss": 0.8552,
      "step": 296650
    },
    {
      "epoch": 1.0397193395647801,
      "grad_norm": 2.8125,
      "learning_rate": 3.630160102390762e-05,
      "loss": 0.8374,
      "step": 296660
    },
    {
      "epoch": 1.0397543870716757,
      "grad_norm": 2.90625,
      "learning_rate": 3.630095199524392e-05,
      "loss": 0.8462,
      "step": 296670
    },
    {
      "epoch": 1.0397894345785712,
      "grad_norm": 3.03125,
      "learning_rate": 3.630030296658022e-05,
      "loss": 0.8715,
      "step": 296680
    },
    {
      "epoch": 1.039824482085467,
      "grad_norm": 3.03125,
      "learning_rate": 3.629965393791652e-05,
      "loss": 0.8832,
      "step": 296690
    },
    {
      "epoch": 1.0398595295923625,
      "grad_norm": 3.34375,
      "learning_rate": 3.629900490925281e-05,
      "loss": 0.8271,
      "step": 296700
    },
    {
      "epoch": 1.039894577099258,
      "grad_norm": 2.890625,
      "learning_rate": 3.6298355880589114e-05,
      "loss": 0.8669,
      "step": 296710
    },
    {
      "epoch": 1.0399296246061536,
      "grad_norm": 2.90625,
      "learning_rate": 3.629770685192541e-05,
      "loss": 0.8513,
      "step": 296720
    },
    {
      "epoch": 1.0399646721130493,
      "grad_norm": 2.765625,
      "learning_rate": 3.629705782326171e-05,
      "loss": 0.856,
      "step": 296730
    },
    {
      "epoch": 1.0399997196199449,
      "grad_norm": 2.65625,
      "learning_rate": 3.6296408794598005e-05,
      "loss": 0.8633,
      "step": 296740
    },
    {
      "epoch": 1.0400347671268404,
      "grad_norm": 2.859375,
      "learning_rate": 3.6295759765934306e-05,
      "loss": 0.8244,
      "step": 296750
    },
    {
      "epoch": 1.040069814633736,
      "grad_norm": 3.0625,
      "learning_rate": 3.629511073727061e-05,
      "loss": 0.8297,
      "step": 296760
    },
    {
      "epoch": 1.0401048621406317,
      "grad_norm": 3.0625,
      "learning_rate": 3.62944617086069e-05,
      "loss": 0.8569,
      "step": 296770
    },
    {
      "epoch": 1.0401399096475272,
      "grad_norm": 3.046875,
      "learning_rate": 3.6293812679943204e-05,
      "loss": 0.8507,
      "step": 296780
    },
    {
      "epoch": 1.0401749571544228,
      "grad_norm": 2.609375,
      "learning_rate": 3.62931636512795e-05,
      "loss": 0.7956,
      "step": 296790
    },
    {
      "epoch": 1.0402100046613185,
      "grad_norm": 3.421875,
      "learning_rate": 3.62925146226158e-05,
      "loss": 0.8374,
      "step": 296800
    },
    {
      "epoch": 1.040245052168214,
      "grad_norm": 3.0625,
      "learning_rate": 3.6291865593952094e-05,
      "loss": 0.7573,
      "step": 296810
    },
    {
      "epoch": 1.0402800996751096,
      "grad_norm": 3.03125,
      "learning_rate": 3.629121656528839e-05,
      "loss": 0.866,
      "step": 296820
    },
    {
      "epoch": 1.0403151471820051,
      "grad_norm": 2.859375,
      "learning_rate": 3.6290567536624684e-05,
      "loss": 0.8103,
      "step": 296830
    },
    {
      "epoch": 1.0403501946889009,
      "grad_norm": 3.015625,
      "learning_rate": 3.6289918507960985e-05,
      "loss": 0.859,
      "step": 296840
    },
    {
      "epoch": 1.0403852421957964,
      "grad_norm": 2.6875,
      "learning_rate": 3.6289269479297286e-05,
      "loss": 0.8787,
      "step": 296850
    },
    {
      "epoch": 1.040420289702692,
      "grad_norm": 3.40625,
      "learning_rate": 3.628862045063358e-05,
      "loss": 0.8755,
      "step": 296860
    },
    {
      "epoch": 1.0404553372095875,
      "grad_norm": 2.875,
      "learning_rate": 3.628797142196988e-05,
      "loss": 0.8239,
      "step": 296870
    },
    {
      "epoch": 1.0404903847164833,
      "grad_norm": 3.0,
      "learning_rate": 3.628732239330618e-05,
      "loss": 0.918,
      "step": 296880
    },
    {
      "epoch": 1.0405254322233788,
      "grad_norm": 3.3125,
      "learning_rate": 3.628667336464248e-05,
      "loss": 0.8801,
      "step": 296890
    },
    {
      "epoch": 1.0405604797302743,
      "grad_norm": 3.125,
      "learning_rate": 3.628602433597877e-05,
      "loss": 0.8427,
      "step": 296900
    },
    {
      "epoch": 1.04059552723717,
      "grad_norm": 2.859375,
      "learning_rate": 3.6285375307315074e-05,
      "loss": 0.9412,
      "step": 296910
    },
    {
      "epoch": 1.0406305747440656,
      "grad_norm": 3.515625,
      "learning_rate": 3.628472627865137e-05,
      "loss": 0.8316,
      "step": 296920
    },
    {
      "epoch": 1.0406656222509612,
      "grad_norm": 3.046875,
      "learning_rate": 3.628407724998767e-05,
      "loss": 0.8497,
      "step": 296930
    },
    {
      "epoch": 1.0407006697578567,
      "grad_norm": 3.171875,
      "learning_rate": 3.6283428221323965e-05,
      "loss": 0.866,
      "step": 296940
    },
    {
      "epoch": 1.0407357172647524,
      "grad_norm": 2.90625,
      "learning_rate": 3.6282779192660266e-05,
      "loss": 0.8803,
      "step": 296950
    },
    {
      "epoch": 1.040770764771648,
      "grad_norm": 2.53125,
      "learning_rate": 3.628213016399656e-05,
      "loss": 0.897,
      "step": 296960
    },
    {
      "epoch": 1.0408058122785435,
      "grad_norm": 3.09375,
      "learning_rate": 3.628148113533286e-05,
      "loss": 0.8677,
      "step": 296970
    },
    {
      "epoch": 1.040840859785439,
      "grad_norm": 3.0,
      "learning_rate": 3.628083210666916e-05,
      "loss": 0.8223,
      "step": 296980
    },
    {
      "epoch": 1.0408759072923348,
      "grad_norm": 3.078125,
      "learning_rate": 3.628018307800546e-05,
      "loss": 0.8211,
      "step": 296990
    },
    {
      "epoch": 1.0409109547992303,
      "grad_norm": 2.609375,
      "learning_rate": 3.627953404934176e-05,
      "loss": 0.8282,
      "step": 297000
    },
    {
      "epoch": 1.0409460023061259,
      "grad_norm": 2.75,
      "learning_rate": 3.6278885020678054e-05,
      "loss": 0.906,
      "step": 297010
    },
    {
      "epoch": 1.0409810498130216,
      "grad_norm": 2.859375,
      "learning_rate": 3.6278235992014356e-05,
      "loss": 0.8904,
      "step": 297020
    },
    {
      "epoch": 1.0410160973199172,
      "grad_norm": 2.75,
      "learning_rate": 3.627758696335065e-05,
      "loss": 0.8179,
      "step": 297030
    },
    {
      "epoch": 1.0410511448268127,
      "grad_norm": 2.625,
      "learning_rate": 3.627693793468695e-05,
      "loss": 0.9184,
      "step": 297040
    },
    {
      "epoch": 1.0410861923337082,
      "grad_norm": 3.15625,
      "learning_rate": 3.6276288906023246e-05,
      "loss": 0.9071,
      "step": 297050
    },
    {
      "epoch": 1.041121239840604,
      "grad_norm": 3.234375,
      "learning_rate": 3.627563987735955e-05,
      "loss": 0.9702,
      "step": 297060
    },
    {
      "epoch": 1.0411562873474995,
      "grad_norm": 3.390625,
      "learning_rate": 3.627499084869584e-05,
      "loss": 0.8544,
      "step": 297070
    },
    {
      "epoch": 1.041191334854395,
      "grad_norm": 2.890625,
      "learning_rate": 3.6274341820032144e-05,
      "loss": 0.8617,
      "step": 297080
    },
    {
      "epoch": 1.0412263823612908,
      "grad_norm": 2.6875,
      "learning_rate": 3.627369279136844e-05,
      "loss": 0.8964,
      "step": 297090
    },
    {
      "epoch": 1.0412614298681864,
      "grad_norm": 2.734375,
      "learning_rate": 3.627304376270474e-05,
      "loss": 0.8255,
      "step": 297100
    },
    {
      "epoch": 1.041296477375082,
      "grad_norm": 3.25,
      "learning_rate": 3.6272394734041034e-05,
      "loss": 0.93,
      "step": 297110
    },
    {
      "epoch": 1.0413315248819774,
      "grad_norm": 2.890625,
      "learning_rate": 3.6271745705377336e-05,
      "loss": 0.8597,
      "step": 297120
    },
    {
      "epoch": 1.0413665723888732,
      "grad_norm": 2.96875,
      "learning_rate": 3.627109667671364e-05,
      "loss": 0.8827,
      "step": 297130
    },
    {
      "epoch": 1.0414016198957687,
      "grad_norm": 3.375,
      "learning_rate": 3.627044764804993e-05,
      "loss": 0.8904,
      "step": 297140
    },
    {
      "epoch": 1.0414366674026643,
      "grad_norm": 3.453125,
      "learning_rate": 3.626979861938623e-05,
      "loss": 0.9244,
      "step": 297150
    },
    {
      "epoch": 1.0414717149095598,
      "grad_norm": 2.921875,
      "learning_rate": 3.626914959072253e-05,
      "loss": 0.9121,
      "step": 297160
    },
    {
      "epoch": 1.0415067624164556,
      "grad_norm": 2.9375,
      "learning_rate": 3.626850056205883e-05,
      "loss": 0.8134,
      "step": 297170
    },
    {
      "epoch": 1.041541809923351,
      "grad_norm": 2.78125,
      "learning_rate": 3.6267851533395124e-05,
      "loss": 0.8958,
      "step": 297180
    },
    {
      "epoch": 1.0415768574302466,
      "grad_norm": 3.34375,
      "learning_rate": 3.6267202504731425e-05,
      "loss": 0.8744,
      "step": 297190
    },
    {
      "epoch": 1.0416119049371424,
      "grad_norm": 3.5,
      "learning_rate": 3.626655347606771e-05,
      "loss": 0.8891,
      "step": 297200
    },
    {
      "epoch": 1.041646952444038,
      "grad_norm": 3.890625,
      "learning_rate": 3.6265904447404014e-05,
      "loss": 0.9332,
      "step": 297210
    },
    {
      "epoch": 1.0416819999509335,
      "grad_norm": 2.921875,
      "learning_rate": 3.6265255418740316e-05,
      "loss": 0.8149,
      "step": 297220
    },
    {
      "epoch": 1.041717047457829,
      "grad_norm": 2.828125,
      "learning_rate": 3.626460639007661e-05,
      "loss": 0.8999,
      "step": 297230
    },
    {
      "epoch": 1.0417520949647248,
      "grad_norm": 2.78125,
      "learning_rate": 3.626395736141291e-05,
      "loss": 0.8476,
      "step": 297240
    },
    {
      "epoch": 1.0417871424716203,
      "grad_norm": 3.109375,
      "learning_rate": 3.6263308332749206e-05,
      "loss": 0.7892,
      "step": 297250
    },
    {
      "epoch": 1.0418221899785158,
      "grad_norm": 3.203125,
      "learning_rate": 3.626265930408551e-05,
      "loss": 0.8642,
      "step": 297260
    },
    {
      "epoch": 1.0418572374854114,
      "grad_norm": 3.40625,
      "learning_rate": 3.62620102754218e-05,
      "loss": 0.8601,
      "step": 297270
    },
    {
      "epoch": 1.0418922849923071,
      "grad_norm": 2.84375,
      "learning_rate": 3.6261361246758104e-05,
      "loss": 0.9298,
      "step": 297280
    },
    {
      "epoch": 1.0419273324992027,
      "grad_norm": 2.75,
      "learning_rate": 3.62607122180944e-05,
      "loss": 0.8546,
      "step": 297290
    },
    {
      "epoch": 1.0419623800060982,
      "grad_norm": 2.859375,
      "learning_rate": 3.62600631894307e-05,
      "loss": 0.8567,
      "step": 297300
    },
    {
      "epoch": 1.041997427512994,
      "grad_norm": 3.875,
      "learning_rate": 3.6259414160766994e-05,
      "loss": 0.8907,
      "step": 297310
    },
    {
      "epoch": 1.0420324750198895,
      "grad_norm": 2.734375,
      "learning_rate": 3.6258765132103296e-05,
      "loss": 0.8641,
      "step": 297320
    },
    {
      "epoch": 1.042067522526785,
      "grad_norm": 2.921875,
      "learning_rate": 3.625811610343959e-05,
      "loss": 0.8233,
      "step": 297330
    },
    {
      "epoch": 1.0421025700336806,
      "grad_norm": 2.921875,
      "learning_rate": 3.625746707477589e-05,
      "loss": 0.8207,
      "step": 297340
    },
    {
      "epoch": 1.0421376175405763,
      "grad_norm": 2.921875,
      "learning_rate": 3.6256818046112186e-05,
      "loss": 0.9038,
      "step": 297350
    },
    {
      "epoch": 1.0421726650474719,
      "grad_norm": 2.671875,
      "learning_rate": 3.625616901744849e-05,
      "loss": 0.8505,
      "step": 297360
    },
    {
      "epoch": 1.0422077125543674,
      "grad_norm": 3.0625,
      "learning_rate": 3.625551998878479e-05,
      "loss": 0.8521,
      "step": 297370
    },
    {
      "epoch": 1.0422427600612632,
      "grad_norm": 4.1875,
      "learning_rate": 3.6254870960121084e-05,
      "loss": 0.8247,
      "step": 297380
    },
    {
      "epoch": 1.0422778075681587,
      "grad_norm": 2.984375,
      "learning_rate": 3.6254221931457385e-05,
      "loss": 0.9514,
      "step": 297390
    },
    {
      "epoch": 1.0423128550750542,
      "grad_norm": 3.0625,
      "learning_rate": 3.625357290279368e-05,
      "loss": 0.85,
      "step": 297400
    },
    {
      "epoch": 1.0423479025819498,
      "grad_norm": 3.40625,
      "learning_rate": 3.625292387412998e-05,
      "loss": 0.939,
      "step": 297410
    },
    {
      "epoch": 1.0423829500888455,
      "grad_norm": 2.5625,
      "learning_rate": 3.6252274845466276e-05,
      "loss": 0.8527,
      "step": 297420
    },
    {
      "epoch": 1.042417997595741,
      "grad_norm": 3.21875,
      "learning_rate": 3.625162581680258e-05,
      "loss": 0.9274,
      "step": 297430
    },
    {
      "epoch": 1.0424530451026366,
      "grad_norm": 2.59375,
      "learning_rate": 3.625097678813887e-05,
      "loss": 0.7923,
      "step": 297440
    },
    {
      "epoch": 1.0424880926095321,
      "grad_norm": 3.234375,
      "learning_rate": 3.625032775947517e-05,
      "loss": 0.8376,
      "step": 297450
    },
    {
      "epoch": 1.0425231401164279,
      "grad_norm": 3.328125,
      "learning_rate": 3.624967873081147e-05,
      "loss": 0.9181,
      "step": 297460
    },
    {
      "epoch": 1.0425581876233234,
      "grad_norm": 3.0,
      "learning_rate": 3.624902970214777e-05,
      "loss": 0.9121,
      "step": 297470
    },
    {
      "epoch": 1.042593235130219,
      "grad_norm": 2.5625,
      "learning_rate": 3.6248380673484064e-05,
      "loss": 0.8286,
      "step": 297480
    },
    {
      "epoch": 1.0426282826371147,
      "grad_norm": 3.21875,
      "learning_rate": 3.6247731644820365e-05,
      "loss": 1.0049,
      "step": 297490
    },
    {
      "epoch": 1.0426633301440102,
      "grad_norm": 3.078125,
      "learning_rate": 3.624708261615667e-05,
      "loss": 0.9441,
      "step": 297500
    },
    {
      "epoch": 1.0426983776509058,
      "grad_norm": 2.65625,
      "learning_rate": 3.624643358749296e-05,
      "loss": 0.8257,
      "step": 297510
    },
    {
      "epoch": 1.0427334251578013,
      "grad_norm": 2.9375,
      "learning_rate": 3.624578455882926e-05,
      "loss": 0.8489,
      "step": 297520
    },
    {
      "epoch": 1.042768472664697,
      "grad_norm": 2.765625,
      "learning_rate": 3.624513553016556e-05,
      "loss": 0.8485,
      "step": 297530
    },
    {
      "epoch": 1.0428035201715926,
      "grad_norm": 2.875,
      "learning_rate": 3.624448650150186e-05,
      "loss": 0.8597,
      "step": 297540
    },
    {
      "epoch": 1.0428385676784881,
      "grad_norm": 2.59375,
      "learning_rate": 3.624383747283815e-05,
      "loss": 0.7794,
      "step": 297550
    },
    {
      "epoch": 1.0428736151853837,
      "grad_norm": 3.09375,
      "learning_rate": 3.6243188444174455e-05,
      "loss": 0.8969,
      "step": 297560
    },
    {
      "epoch": 1.0429086626922794,
      "grad_norm": 3.0,
      "learning_rate": 3.624253941551074e-05,
      "loss": 0.8939,
      "step": 297570
    },
    {
      "epoch": 1.042943710199175,
      "grad_norm": 2.734375,
      "learning_rate": 3.6241890386847044e-05,
      "loss": 0.7248,
      "step": 297580
    },
    {
      "epoch": 1.0429787577060705,
      "grad_norm": 3.109375,
      "learning_rate": 3.6241241358183345e-05,
      "loss": 0.9526,
      "step": 297590
    },
    {
      "epoch": 1.0430138052129663,
      "grad_norm": 2.4375,
      "learning_rate": 3.624059232951964e-05,
      "loss": 0.8516,
      "step": 297600
    },
    {
      "epoch": 1.0430488527198618,
      "grad_norm": 2.96875,
      "learning_rate": 3.623994330085594e-05,
      "loss": 0.865,
      "step": 297610
    },
    {
      "epoch": 1.0430839002267573,
      "grad_norm": 2.75,
      "learning_rate": 3.6239294272192236e-05,
      "loss": 0.8568,
      "step": 297620
    },
    {
      "epoch": 1.0431189477336529,
      "grad_norm": 2.921875,
      "learning_rate": 3.623864524352854e-05,
      "loss": 0.7948,
      "step": 297630
    },
    {
      "epoch": 1.0431539952405486,
      "grad_norm": 3.015625,
      "learning_rate": 3.623799621486483e-05,
      "loss": 0.7817,
      "step": 297640
    },
    {
      "epoch": 1.0431890427474442,
      "grad_norm": 3.09375,
      "learning_rate": 3.623734718620113e-05,
      "loss": 0.8771,
      "step": 297650
    },
    {
      "epoch": 1.0432240902543397,
      "grad_norm": 2.953125,
      "learning_rate": 3.623669815753743e-05,
      "loss": 0.8819,
      "step": 297660
    },
    {
      "epoch": 1.0432591377612352,
      "grad_norm": 3.46875,
      "learning_rate": 3.623604912887373e-05,
      "loss": 0.8365,
      "step": 297670
    },
    {
      "epoch": 1.043294185268131,
      "grad_norm": 2.84375,
      "learning_rate": 3.6235400100210024e-05,
      "loss": 0.8791,
      "step": 297680
    },
    {
      "epoch": 1.0433292327750265,
      "grad_norm": 3.078125,
      "learning_rate": 3.6234751071546325e-05,
      "loss": 0.8775,
      "step": 297690
    },
    {
      "epoch": 1.043364280281922,
      "grad_norm": 2.578125,
      "learning_rate": 3.623410204288262e-05,
      "loss": 0.8271,
      "step": 297700
    },
    {
      "epoch": 1.0433993277888178,
      "grad_norm": 2.90625,
      "learning_rate": 3.623345301421892e-05,
      "loss": 0.8699,
      "step": 297710
    },
    {
      "epoch": 1.0434343752957134,
      "grad_norm": 2.8125,
      "learning_rate": 3.623280398555522e-05,
      "loss": 0.9304,
      "step": 297720
    },
    {
      "epoch": 1.043469422802609,
      "grad_norm": 2.8125,
      "learning_rate": 3.623215495689152e-05,
      "loss": 0.8998,
      "step": 297730
    },
    {
      "epoch": 1.0435044703095044,
      "grad_norm": 2.984375,
      "learning_rate": 3.623150592822782e-05,
      "loss": 0.8659,
      "step": 297740
    },
    {
      "epoch": 1.0435395178164002,
      "grad_norm": 2.671875,
      "learning_rate": 3.623085689956411e-05,
      "loss": 0.7926,
      "step": 297750
    },
    {
      "epoch": 1.0435745653232957,
      "grad_norm": 2.890625,
      "learning_rate": 3.6230207870900415e-05,
      "loss": 0.8655,
      "step": 297760
    },
    {
      "epoch": 1.0436096128301913,
      "grad_norm": 2.875,
      "learning_rate": 3.622955884223671e-05,
      "loss": 0.8899,
      "step": 297770
    },
    {
      "epoch": 1.043644660337087,
      "grad_norm": 2.828125,
      "learning_rate": 3.622890981357301e-05,
      "loss": 0.9164,
      "step": 297780
    },
    {
      "epoch": 1.0436797078439826,
      "grad_norm": 2.859375,
      "learning_rate": 3.6228260784909305e-05,
      "loss": 0.7329,
      "step": 297790
    },
    {
      "epoch": 1.043714755350878,
      "grad_norm": 2.84375,
      "learning_rate": 3.622761175624561e-05,
      "loss": 0.8202,
      "step": 297800
    },
    {
      "epoch": 1.0437498028577736,
      "grad_norm": 2.84375,
      "learning_rate": 3.62269627275819e-05,
      "loss": 0.8391,
      "step": 297810
    },
    {
      "epoch": 1.0437848503646694,
      "grad_norm": 2.484375,
      "learning_rate": 3.62263136989182e-05,
      "loss": 0.8507,
      "step": 297820
    },
    {
      "epoch": 1.043819897871565,
      "grad_norm": 2.890625,
      "learning_rate": 3.62256646702545e-05,
      "loss": 0.8362,
      "step": 297830
    },
    {
      "epoch": 1.0438549453784605,
      "grad_norm": 2.90625,
      "learning_rate": 3.62250156415908e-05,
      "loss": 0.9184,
      "step": 297840
    },
    {
      "epoch": 1.043889992885356,
      "grad_norm": 2.90625,
      "learning_rate": 3.622436661292709e-05,
      "loss": 0.8942,
      "step": 297850
    },
    {
      "epoch": 1.0439250403922518,
      "grad_norm": 3.4375,
      "learning_rate": 3.6223717584263395e-05,
      "loss": 1.0167,
      "step": 297860
    },
    {
      "epoch": 1.0439600878991473,
      "grad_norm": 2.921875,
      "learning_rate": 3.6223068555599696e-05,
      "loss": 0.8494,
      "step": 297870
    },
    {
      "epoch": 1.0439951354060428,
      "grad_norm": 2.65625,
      "learning_rate": 3.622241952693599e-05,
      "loss": 0.7784,
      "step": 297880
    },
    {
      "epoch": 1.0440301829129386,
      "grad_norm": 2.765625,
      "learning_rate": 3.622177049827229e-05,
      "loss": 0.9041,
      "step": 297890
    },
    {
      "epoch": 1.0440652304198341,
      "grad_norm": 3.4375,
      "learning_rate": 3.622112146960859e-05,
      "loss": 0.8917,
      "step": 297900
    },
    {
      "epoch": 1.0441002779267297,
      "grad_norm": 2.765625,
      "learning_rate": 3.622047244094489e-05,
      "loss": 0.8589,
      "step": 297910
    },
    {
      "epoch": 1.0441353254336252,
      "grad_norm": 2.84375,
      "learning_rate": 3.621982341228118e-05,
      "loss": 0.8495,
      "step": 297920
    },
    {
      "epoch": 1.044170372940521,
      "grad_norm": 2.75,
      "learning_rate": 3.6219174383617484e-05,
      "loss": 0.9598,
      "step": 297930
    },
    {
      "epoch": 1.0442054204474165,
      "grad_norm": 3.078125,
      "learning_rate": 3.621852535495378e-05,
      "loss": 0.9053,
      "step": 297940
    },
    {
      "epoch": 1.044240467954312,
      "grad_norm": 3.078125,
      "learning_rate": 3.621787632629007e-05,
      "loss": 0.9391,
      "step": 297950
    },
    {
      "epoch": 1.0442755154612076,
      "grad_norm": 2.875,
      "learning_rate": 3.6217227297626375e-05,
      "loss": 0.818,
      "step": 297960
    },
    {
      "epoch": 1.0443105629681033,
      "grad_norm": 2.96875,
      "learning_rate": 3.621657826896267e-05,
      "loss": 0.9123,
      "step": 297970
    },
    {
      "epoch": 1.0443456104749989,
      "grad_norm": 3.0625,
      "learning_rate": 3.621592924029897e-05,
      "loss": 0.8153,
      "step": 297980
    },
    {
      "epoch": 1.0443806579818944,
      "grad_norm": 3.0,
      "learning_rate": 3.6215280211635265e-05,
      "loss": 0.8902,
      "step": 297990
    },
    {
      "epoch": 1.0444157054887901,
      "grad_norm": 3.171875,
      "learning_rate": 3.6214631182971567e-05,
      "loss": 0.9031,
      "step": 298000
    },
    {
      "epoch": 1.0444507529956857,
      "grad_norm": 2.890625,
      "learning_rate": 3.621398215430786e-05,
      "loss": 0.8084,
      "step": 298010
    },
    {
      "epoch": 1.0444858005025812,
      "grad_norm": 2.84375,
      "learning_rate": 3.621333312564416e-05,
      "loss": 0.8199,
      "step": 298020
    },
    {
      "epoch": 1.0445208480094768,
      "grad_norm": 3.171875,
      "learning_rate": 3.621268409698046e-05,
      "loss": 0.8966,
      "step": 298030
    },
    {
      "epoch": 1.0445558955163725,
      "grad_norm": 2.640625,
      "learning_rate": 3.621203506831676e-05,
      "loss": 0.9152,
      "step": 298040
    },
    {
      "epoch": 1.044590943023268,
      "grad_norm": 2.828125,
      "learning_rate": 3.621138603965305e-05,
      "loss": 0.9035,
      "step": 298050
    },
    {
      "epoch": 1.0446259905301636,
      "grad_norm": 2.953125,
      "learning_rate": 3.6210737010989355e-05,
      "loss": 0.9399,
      "step": 298060
    },
    {
      "epoch": 1.0446610380370593,
      "grad_norm": 2.796875,
      "learning_rate": 3.621008798232565e-05,
      "loss": 0.8447,
      "step": 298070
    },
    {
      "epoch": 1.0446960855439549,
      "grad_norm": 2.875,
      "learning_rate": 3.620943895366195e-05,
      "loss": 0.8616,
      "step": 298080
    },
    {
      "epoch": 1.0447311330508504,
      "grad_norm": 2.625,
      "learning_rate": 3.620878992499825e-05,
      "loss": 0.8171,
      "step": 298090
    },
    {
      "epoch": 1.044766180557746,
      "grad_norm": 2.875,
      "learning_rate": 3.6208140896334547e-05,
      "loss": 0.8561,
      "step": 298100
    },
    {
      "epoch": 1.0448012280646417,
      "grad_norm": 3.8125,
      "learning_rate": 3.620749186767085e-05,
      "loss": 0.9095,
      "step": 298110
    },
    {
      "epoch": 1.0448362755715372,
      "grad_norm": 2.59375,
      "learning_rate": 3.620684283900714e-05,
      "loss": 0.9411,
      "step": 298120
    },
    {
      "epoch": 1.0448713230784328,
      "grad_norm": 3.3125,
      "learning_rate": 3.6206193810343444e-05,
      "loss": 0.9185,
      "step": 298130
    },
    {
      "epoch": 1.0449063705853283,
      "grad_norm": 2.640625,
      "learning_rate": 3.620554478167974e-05,
      "loss": 0.8638,
      "step": 298140
    },
    {
      "epoch": 1.044941418092224,
      "grad_norm": 2.703125,
      "learning_rate": 3.620489575301604e-05,
      "loss": 0.8439,
      "step": 298150
    },
    {
      "epoch": 1.0449764655991196,
      "grad_norm": 3.171875,
      "learning_rate": 3.6204246724352335e-05,
      "loss": 0.9177,
      "step": 298160
    },
    {
      "epoch": 1.0450115131060151,
      "grad_norm": 2.953125,
      "learning_rate": 3.6203597695688636e-05,
      "loss": 0.8549,
      "step": 298170
    },
    {
      "epoch": 1.045046560612911,
      "grad_norm": 3.546875,
      "learning_rate": 3.620294866702493e-05,
      "loss": 0.9343,
      "step": 298180
    },
    {
      "epoch": 1.0450816081198064,
      "grad_norm": 3.0,
      "learning_rate": 3.620229963836123e-05,
      "loss": 0.8792,
      "step": 298190
    },
    {
      "epoch": 1.045116655626702,
      "grad_norm": 2.828125,
      "learning_rate": 3.6201650609697527e-05,
      "loss": 0.8979,
      "step": 298200
    },
    {
      "epoch": 1.0451517031335975,
      "grad_norm": 2.5,
      "learning_rate": 3.620100158103383e-05,
      "loss": 0.814,
      "step": 298210
    },
    {
      "epoch": 1.0451867506404933,
      "grad_norm": 2.609375,
      "learning_rate": 3.620035255237012e-05,
      "loss": 0.9048,
      "step": 298220
    },
    {
      "epoch": 1.0452217981473888,
      "grad_norm": 2.703125,
      "learning_rate": 3.6199703523706424e-05,
      "loss": 0.8373,
      "step": 298230
    },
    {
      "epoch": 1.0452568456542843,
      "grad_norm": 2.578125,
      "learning_rate": 3.6199054495042725e-05,
      "loss": 0.8331,
      "step": 298240
    },
    {
      "epoch": 1.0452918931611799,
      "grad_norm": 2.3125,
      "learning_rate": 3.619840546637902e-05,
      "loss": 0.8747,
      "step": 298250
    },
    {
      "epoch": 1.0453269406680756,
      "grad_norm": 3.53125,
      "learning_rate": 3.619775643771532e-05,
      "loss": 0.8665,
      "step": 298260
    },
    {
      "epoch": 1.0453619881749712,
      "grad_norm": 2.84375,
      "learning_rate": 3.6197107409051616e-05,
      "loss": 0.8171,
      "step": 298270
    },
    {
      "epoch": 1.0453970356818667,
      "grad_norm": 2.8125,
      "learning_rate": 3.619645838038792e-05,
      "loss": 0.8467,
      "step": 298280
    },
    {
      "epoch": 1.0454320831887625,
      "grad_norm": 2.984375,
      "learning_rate": 3.619580935172421e-05,
      "loss": 0.7984,
      "step": 298290
    },
    {
      "epoch": 1.045467130695658,
      "grad_norm": 2.8125,
      "learning_rate": 3.619516032306051e-05,
      "loss": 0.9209,
      "step": 298300
    },
    {
      "epoch": 1.0455021782025535,
      "grad_norm": 2.9375,
      "learning_rate": 3.619451129439681e-05,
      "loss": 0.7944,
      "step": 298310
    },
    {
      "epoch": 1.045537225709449,
      "grad_norm": 2.625,
      "learning_rate": 3.619386226573311e-05,
      "loss": 0.8657,
      "step": 298320
    },
    {
      "epoch": 1.0455722732163448,
      "grad_norm": 2.796875,
      "learning_rate": 3.6193213237069404e-05,
      "loss": 0.8383,
      "step": 298330
    },
    {
      "epoch": 1.0456073207232404,
      "grad_norm": 2.875,
      "learning_rate": 3.61925642084057e-05,
      "loss": 0.9522,
      "step": 298340
    },
    {
      "epoch": 1.045642368230136,
      "grad_norm": 2.703125,
      "learning_rate": 3.6191915179742e-05,
      "loss": 0.8248,
      "step": 298350
    },
    {
      "epoch": 1.0456774157370314,
      "grad_norm": 2.734375,
      "learning_rate": 3.6191266151078295e-05,
      "loss": 0.8334,
      "step": 298360
    },
    {
      "epoch": 1.0457124632439272,
      "grad_norm": 2.8125,
      "learning_rate": 3.6190617122414596e-05,
      "loss": 0.8978,
      "step": 298370
    },
    {
      "epoch": 1.0457475107508227,
      "grad_norm": 2.90625,
      "learning_rate": 3.618996809375089e-05,
      "loss": 0.8391,
      "step": 298380
    },
    {
      "epoch": 1.0457825582577183,
      "grad_norm": 2.921875,
      "learning_rate": 3.618931906508719e-05,
      "loss": 0.8456,
      "step": 298390
    },
    {
      "epoch": 1.045817605764614,
      "grad_norm": 3.046875,
      "learning_rate": 3.6188670036423487e-05,
      "loss": 0.8697,
      "step": 298400
    },
    {
      "epoch": 1.0458526532715096,
      "grad_norm": 2.8125,
      "learning_rate": 3.618802100775979e-05,
      "loss": 0.7919,
      "step": 298410
    },
    {
      "epoch": 1.045887700778405,
      "grad_norm": 2.953125,
      "learning_rate": 3.618737197909608e-05,
      "loss": 0.8743,
      "step": 298420
    },
    {
      "epoch": 1.0459227482853006,
      "grad_norm": 2.828125,
      "learning_rate": 3.6186722950432384e-05,
      "loss": 0.8744,
      "step": 298430
    },
    {
      "epoch": 1.0459577957921964,
      "grad_norm": 2.59375,
      "learning_rate": 3.618607392176868e-05,
      "loss": 0.773,
      "step": 298440
    },
    {
      "epoch": 1.045992843299092,
      "grad_norm": 2.796875,
      "learning_rate": 3.618542489310498e-05,
      "loss": 0.8916,
      "step": 298450
    },
    {
      "epoch": 1.0460278908059875,
      "grad_norm": 2.84375,
      "learning_rate": 3.618477586444128e-05,
      "loss": 0.8424,
      "step": 298460
    },
    {
      "epoch": 1.0460629383128832,
      "grad_norm": 2.796875,
      "learning_rate": 3.6184126835777576e-05,
      "loss": 0.8348,
      "step": 298470
    },
    {
      "epoch": 1.0460979858197788,
      "grad_norm": 2.875,
      "learning_rate": 3.618347780711388e-05,
      "loss": 0.7838,
      "step": 298480
    },
    {
      "epoch": 1.0461330333266743,
      "grad_norm": 2.890625,
      "learning_rate": 3.618282877845017e-05,
      "loss": 0.903,
      "step": 298490
    },
    {
      "epoch": 1.0461680808335698,
      "grad_norm": 3.03125,
      "learning_rate": 3.618217974978647e-05,
      "loss": 0.8933,
      "step": 298500
    },
    {
      "epoch": 1.0462031283404656,
      "grad_norm": 2.984375,
      "learning_rate": 3.618153072112277e-05,
      "loss": 0.8682,
      "step": 298510
    },
    {
      "epoch": 1.0462381758473611,
      "grad_norm": 2.609375,
      "learning_rate": 3.618088169245907e-05,
      "loss": 0.8986,
      "step": 298520
    },
    {
      "epoch": 1.0462732233542567,
      "grad_norm": 2.765625,
      "learning_rate": 3.6180232663795364e-05,
      "loss": 0.8373,
      "step": 298530
    },
    {
      "epoch": 1.0463082708611522,
      "grad_norm": 3.4375,
      "learning_rate": 3.6179583635131665e-05,
      "loss": 0.8269,
      "step": 298540
    },
    {
      "epoch": 1.046343318368048,
      "grad_norm": 2.25,
      "learning_rate": 3.617893460646796e-05,
      "loss": 0.8247,
      "step": 298550
    },
    {
      "epoch": 1.0463783658749435,
      "grad_norm": 2.484375,
      "learning_rate": 3.617828557780426e-05,
      "loss": 0.9228,
      "step": 298560
    },
    {
      "epoch": 1.046413413381839,
      "grad_norm": 2.546875,
      "learning_rate": 3.6177636549140556e-05,
      "loss": 0.8018,
      "step": 298570
    },
    {
      "epoch": 1.0464484608887348,
      "grad_norm": 2.8125,
      "learning_rate": 3.617698752047686e-05,
      "loss": 0.8433,
      "step": 298580
    },
    {
      "epoch": 1.0464835083956303,
      "grad_norm": 2.96875,
      "learning_rate": 3.617633849181315e-05,
      "loss": 0.8968,
      "step": 298590
    },
    {
      "epoch": 1.0465185559025258,
      "grad_norm": 3.28125,
      "learning_rate": 3.617568946314945e-05,
      "loss": 0.9241,
      "step": 298600
    },
    {
      "epoch": 1.0465536034094214,
      "grad_norm": 3.15625,
      "learning_rate": 3.6175040434485755e-05,
      "loss": 0.8564,
      "step": 298610
    },
    {
      "epoch": 1.0465886509163171,
      "grad_norm": 2.953125,
      "learning_rate": 3.617439140582205e-05,
      "loss": 1.0432,
      "step": 298620
    },
    {
      "epoch": 1.0466236984232127,
      "grad_norm": 3.1875,
      "learning_rate": 3.617374237715835e-05,
      "loss": 0.8634,
      "step": 298630
    },
    {
      "epoch": 1.0466587459301082,
      "grad_norm": 2.796875,
      "learning_rate": 3.6173093348494645e-05,
      "loss": 0.8588,
      "step": 298640
    },
    {
      "epoch": 1.046693793437004,
      "grad_norm": 2.53125,
      "learning_rate": 3.617244431983095e-05,
      "loss": 0.8549,
      "step": 298650
    },
    {
      "epoch": 1.0467288409438995,
      "grad_norm": 3.109375,
      "learning_rate": 3.617179529116724e-05,
      "loss": 0.8506,
      "step": 298660
    },
    {
      "epoch": 1.046763888450795,
      "grad_norm": 3.4375,
      "learning_rate": 3.617114626250354e-05,
      "loss": 0.9062,
      "step": 298670
    },
    {
      "epoch": 1.0467989359576906,
      "grad_norm": 2.796875,
      "learning_rate": 3.617049723383984e-05,
      "loss": 0.8285,
      "step": 298680
    },
    {
      "epoch": 1.0468339834645863,
      "grad_norm": 3.140625,
      "learning_rate": 3.616984820517614e-05,
      "loss": 0.8057,
      "step": 298690
    },
    {
      "epoch": 1.0468690309714819,
      "grad_norm": 3.25,
      "learning_rate": 3.616919917651243e-05,
      "loss": 0.9053,
      "step": 298700
    },
    {
      "epoch": 1.0469040784783774,
      "grad_norm": 3.234375,
      "learning_rate": 3.616855014784873e-05,
      "loss": 0.8985,
      "step": 298710
    },
    {
      "epoch": 1.046939125985273,
      "grad_norm": 2.625,
      "learning_rate": 3.616790111918503e-05,
      "loss": 0.9035,
      "step": 298720
    },
    {
      "epoch": 1.0469741734921687,
      "grad_norm": 3.171875,
      "learning_rate": 3.6167252090521324e-05,
      "loss": 0.8756,
      "step": 298730
    },
    {
      "epoch": 1.0470092209990642,
      "grad_norm": 3.546875,
      "learning_rate": 3.6166603061857625e-05,
      "loss": 0.9221,
      "step": 298740
    },
    {
      "epoch": 1.0470442685059598,
      "grad_norm": 2.828125,
      "learning_rate": 3.616595403319392e-05,
      "loss": 0.9616,
      "step": 298750
    },
    {
      "epoch": 1.0470793160128555,
      "grad_norm": 2.921875,
      "learning_rate": 3.616530500453022e-05,
      "loss": 0.8977,
      "step": 298760
    },
    {
      "epoch": 1.047114363519751,
      "grad_norm": 2.71875,
      "learning_rate": 3.6164655975866516e-05,
      "loss": 0.8231,
      "step": 298770
    },
    {
      "epoch": 1.0471494110266466,
      "grad_norm": 2.71875,
      "learning_rate": 3.616400694720282e-05,
      "loss": 0.8662,
      "step": 298780
    },
    {
      "epoch": 1.0471844585335421,
      "grad_norm": 2.671875,
      "learning_rate": 3.616335791853911e-05,
      "loss": 0.8364,
      "step": 298790
    },
    {
      "epoch": 1.047219506040438,
      "grad_norm": 2.875,
      "learning_rate": 3.616270888987541e-05,
      "loss": 0.8516,
      "step": 298800
    },
    {
      "epoch": 1.0472545535473334,
      "grad_norm": 3.515625,
      "learning_rate": 3.616205986121171e-05,
      "loss": 0.8805,
      "step": 298810
    },
    {
      "epoch": 1.047289601054229,
      "grad_norm": 2.796875,
      "learning_rate": 3.616141083254801e-05,
      "loss": 0.8287,
      "step": 298820
    },
    {
      "epoch": 1.0473246485611245,
      "grad_norm": 3.3125,
      "learning_rate": 3.616076180388431e-05,
      "loss": 0.8281,
      "step": 298830
    },
    {
      "epoch": 1.0473596960680203,
      "grad_norm": 3.5,
      "learning_rate": 3.6160112775220605e-05,
      "loss": 0.9148,
      "step": 298840
    },
    {
      "epoch": 1.0473947435749158,
      "grad_norm": 2.9375,
      "learning_rate": 3.615946374655691e-05,
      "loss": 0.8838,
      "step": 298850
    },
    {
      "epoch": 1.0474297910818113,
      "grad_norm": 2.859375,
      "learning_rate": 3.61588147178932e-05,
      "loss": 0.9487,
      "step": 298860
    },
    {
      "epoch": 1.047464838588707,
      "grad_norm": 2.921875,
      "learning_rate": 3.61581656892295e-05,
      "loss": 0.8782,
      "step": 298870
    },
    {
      "epoch": 1.0474998860956026,
      "grad_norm": 2.796875,
      "learning_rate": 3.61575166605658e-05,
      "loss": 0.9287,
      "step": 298880
    },
    {
      "epoch": 1.0475349336024982,
      "grad_norm": 3.1875,
      "learning_rate": 3.61568676319021e-05,
      "loss": 0.91,
      "step": 298890
    },
    {
      "epoch": 1.0475699811093937,
      "grad_norm": 2.875,
      "learning_rate": 3.615621860323839e-05,
      "loss": 0.8495,
      "step": 298900
    },
    {
      "epoch": 1.0476050286162895,
      "grad_norm": 2.953125,
      "learning_rate": 3.6155569574574695e-05,
      "loss": 0.9024,
      "step": 298910
    },
    {
      "epoch": 1.047640076123185,
      "grad_norm": 3.015625,
      "learning_rate": 3.615492054591099e-05,
      "loss": 0.8741,
      "step": 298920
    },
    {
      "epoch": 1.0476751236300805,
      "grad_norm": 3.09375,
      "learning_rate": 3.615427151724729e-05,
      "loss": 0.9135,
      "step": 298930
    },
    {
      "epoch": 1.047710171136976,
      "grad_norm": 2.859375,
      "learning_rate": 3.6153622488583585e-05,
      "loss": 0.9131,
      "step": 298940
    },
    {
      "epoch": 1.0477452186438718,
      "grad_norm": 3.171875,
      "learning_rate": 3.615297345991989e-05,
      "loss": 0.9154,
      "step": 298950
    },
    {
      "epoch": 1.0477802661507674,
      "grad_norm": 2.96875,
      "learning_rate": 3.615232443125619e-05,
      "loss": 0.829,
      "step": 298960
    },
    {
      "epoch": 1.047815313657663,
      "grad_norm": 2.71875,
      "learning_rate": 3.615167540259248e-05,
      "loss": 0.8801,
      "step": 298970
    },
    {
      "epoch": 1.0478503611645587,
      "grad_norm": 3.21875,
      "learning_rate": 3.6151026373928784e-05,
      "loss": 0.8039,
      "step": 298980
    },
    {
      "epoch": 1.0478854086714542,
      "grad_norm": 2.859375,
      "learning_rate": 3.615037734526508e-05,
      "loss": 0.9403,
      "step": 298990
    },
    {
      "epoch": 1.0479204561783497,
      "grad_norm": 3.234375,
      "learning_rate": 3.614972831660138e-05,
      "loss": 0.8977,
      "step": 299000
    },
    {
      "epoch": 1.0479555036852453,
      "grad_norm": 3.1875,
      "learning_rate": 3.6149079287937675e-05,
      "loss": 0.8642,
      "step": 299010
    },
    {
      "epoch": 1.047990551192141,
      "grad_norm": 2.46875,
      "learning_rate": 3.6148430259273976e-05,
      "loss": 0.8755,
      "step": 299020
    },
    {
      "epoch": 1.0480255986990366,
      "grad_norm": 2.8125,
      "learning_rate": 3.614778123061027e-05,
      "loss": 0.9182,
      "step": 299030
    },
    {
      "epoch": 1.048060646205932,
      "grad_norm": 3.046875,
      "learning_rate": 3.614713220194657e-05,
      "loss": 0.8997,
      "step": 299040
    },
    {
      "epoch": 1.0480956937128276,
      "grad_norm": 2.921875,
      "learning_rate": 3.614648317328287e-05,
      "loss": 0.8324,
      "step": 299050
    },
    {
      "epoch": 1.0481307412197234,
      "grad_norm": 2.921875,
      "learning_rate": 3.614583414461917e-05,
      "loss": 0.9095,
      "step": 299060
    },
    {
      "epoch": 1.048165788726619,
      "grad_norm": 3.078125,
      "learning_rate": 3.614518511595546e-05,
      "loss": 0.8455,
      "step": 299070
    },
    {
      "epoch": 1.0482008362335145,
      "grad_norm": 2.703125,
      "learning_rate": 3.614453608729176e-05,
      "loss": 0.8558,
      "step": 299080
    },
    {
      "epoch": 1.0482358837404102,
      "grad_norm": 3.453125,
      "learning_rate": 3.614388705862806e-05,
      "loss": 0.9659,
      "step": 299090
    },
    {
      "epoch": 1.0482709312473057,
      "grad_norm": 2.828125,
      "learning_rate": 3.614323802996435e-05,
      "loss": 0.7972,
      "step": 299100
    },
    {
      "epoch": 1.0483059787542013,
      "grad_norm": 3.921875,
      "learning_rate": 3.6142589001300655e-05,
      "loss": 0.7673,
      "step": 299110
    },
    {
      "epoch": 1.0483410262610968,
      "grad_norm": 2.5,
      "learning_rate": 3.614193997263695e-05,
      "loss": 0.8457,
      "step": 299120
    },
    {
      "epoch": 1.0483760737679926,
      "grad_norm": 2.78125,
      "learning_rate": 3.614129094397325e-05,
      "loss": 1.0209,
      "step": 299130
    },
    {
      "epoch": 1.0484111212748881,
      "grad_norm": 3.375,
      "learning_rate": 3.6140641915309545e-05,
      "loss": 0.8533,
      "step": 299140
    },
    {
      "epoch": 1.0484461687817836,
      "grad_norm": 3.25,
      "learning_rate": 3.613999288664585e-05,
      "loss": 0.8484,
      "step": 299150
    },
    {
      "epoch": 1.0484812162886794,
      "grad_norm": 3.21875,
      "learning_rate": 3.613934385798214e-05,
      "loss": 0.9194,
      "step": 299160
    },
    {
      "epoch": 1.048516263795575,
      "grad_norm": 3.234375,
      "learning_rate": 3.613869482931844e-05,
      "loss": 0.9544,
      "step": 299170
    },
    {
      "epoch": 1.0485513113024705,
      "grad_norm": 2.53125,
      "learning_rate": 3.613804580065474e-05,
      "loss": 0.9171,
      "step": 299180
    },
    {
      "epoch": 1.048586358809366,
      "grad_norm": 2.953125,
      "learning_rate": 3.613739677199104e-05,
      "loss": 0.9295,
      "step": 299190
    },
    {
      "epoch": 1.0486214063162618,
      "grad_norm": 2.8125,
      "learning_rate": 3.613674774332734e-05,
      "loss": 0.9083,
      "step": 299200
    },
    {
      "epoch": 1.0486564538231573,
      "grad_norm": 2.875,
      "learning_rate": 3.6136098714663635e-05,
      "loss": 0.9715,
      "step": 299210
    },
    {
      "epoch": 1.0486915013300528,
      "grad_norm": 2.53125,
      "learning_rate": 3.6135449685999936e-05,
      "loss": 0.787,
      "step": 299220
    },
    {
      "epoch": 1.0487265488369484,
      "grad_norm": 3.203125,
      "learning_rate": 3.613480065733623e-05,
      "loss": 0.9139,
      "step": 299230
    },
    {
      "epoch": 1.0487615963438441,
      "grad_norm": 2.96875,
      "learning_rate": 3.613415162867253e-05,
      "loss": 0.9074,
      "step": 299240
    },
    {
      "epoch": 1.0487966438507397,
      "grad_norm": 3.171875,
      "learning_rate": 3.613350260000883e-05,
      "loss": 0.8251,
      "step": 299250
    },
    {
      "epoch": 1.0488316913576352,
      "grad_norm": 2.4375,
      "learning_rate": 3.613285357134513e-05,
      "loss": 0.8203,
      "step": 299260
    },
    {
      "epoch": 1.048866738864531,
      "grad_norm": 3.265625,
      "learning_rate": 3.613220454268142e-05,
      "loss": 0.9054,
      "step": 299270
    },
    {
      "epoch": 1.0489017863714265,
      "grad_norm": 3.125,
      "learning_rate": 3.6131555514017724e-05,
      "loss": 0.8696,
      "step": 299280
    },
    {
      "epoch": 1.048936833878322,
      "grad_norm": 2.9375,
      "learning_rate": 3.613090648535402e-05,
      "loss": 0.8092,
      "step": 299290
    },
    {
      "epoch": 1.0489718813852176,
      "grad_norm": 3.078125,
      "learning_rate": 3.613025745669032e-05,
      "loss": 0.9183,
      "step": 299300
    },
    {
      "epoch": 1.0490069288921133,
      "grad_norm": 3.109375,
      "learning_rate": 3.6129608428026615e-05,
      "loss": 0.8499,
      "step": 299310
    },
    {
      "epoch": 1.0490419763990089,
      "grad_norm": 2.375,
      "learning_rate": 3.6128959399362916e-05,
      "loss": 0.9348,
      "step": 299320
    },
    {
      "epoch": 1.0490770239059044,
      "grad_norm": 2.6875,
      "learning_rate": 3.612831037069922e-05,
      "loss": 0.8737,
      "step": 299330
    },
    {
      "epoch": 1.0491120714128002,
      "grad_norm": 2.921875,
      "learning_rate": 3.612766134203551e-05,
      "loss": 0.8622,
      "step": 299340
    },
    {
      "epoch": 1.0491471189196957,
      "grad_norm": 2.828125,
      "learning_rate": 3.6127012313371814e-05,
      "loss": 0.8814,
      "step": 299350
    },
    {
      "epoch": 1.0491821664265912,
      "grad_norm": 2.734375,
      "learning_rate": 3.612636328470811e-05,
      "loss": 0.8597,
      "step": 299360
    },
    {
      "epoch": 1.0492172139334868,
      "grad_norm": 2.765625,
      "learning_rate": 3.612571425604441e-05,
      "loss": 0.8391,
      "step": 299370
    },
    {
      "epoch": 1.0492522614403825,
      "grad_norm": 2.9375,
      "learning_rate": 3.6125065227380704e-05,
      "loss": 0.8534,
      "step": 299380
    },
    {
      "epoch": 1.049287308947278,
      "grad_norm": 2.703125,
      "learning_rate": 3.6124416198717006e-05,
      "loss": 0.9048,
      "step": 299390
    },
    {
      "epoch": 1.0493223564541736,
      "grad_norm": 2.9375,
      "learning_rate": 3.61237671700533e-05,
      "loss": 0.9369,
      "step": 299400
    },
    {
      "epoch": 1.0493574039610691,
      "grad_norm": 2.984375,
      "learning_rate": 3.61231181413896e-05,
      "loss": 0.8289,
      "step": 299410
    },
    {
      "epoch": 1.049392451467965,
      "grad_norm": 3.546875,
      "learning_rate": 3.6122469112725896e-05,
      "loss": 0.8798,
      "step": 299420
    },
    {
      "epoch": 1.0494274989748604,
      "grad_norm": 2.953125,
      "learning_rate": 3.61218200840622e-05,
      "loss": 0.8806,
      "step": 299430
    },
    {
      "epoch": 1.049462546481756,
      "grad_norm": 2.484375,
      "learning_rate": 3.612117105539849e-05,
      "loss": 0.8572,
      "step": 299440
    },
    {
      "epoch": 1.0494975939886517,
      "grad_norm": 3.109375,
      "learning_rate": 3.612052202673479e-05,
      "loss": 0.8793,
      "step": 299450
    },
    {
      "epoch": 1.0495326414955473,
      "grad_norm": 3.171875,
      "learning_rate": 3.611987299807109e-05,
      "loss": 0.849,
      "step": 299460
    },
    {
      "epoch": 1.0495676890024428,
      "grad_norm": 3.546875,
      "learning_rate": 3.611922396940738e-05,
      "loss": 0.9017,
      "step": 299470
    },
    {
      "epoch": 1.0496027365093383,
      "grad_norm": 2.78125,
      "learning_rate": 3.6118574940743684e-05,
      "loss": 0.9979,
      "step": 299480
    },
    {
      "epoch": 1.049637784016234,
      "grad_norm": 2.609375,
      "learning_rate": 3.611792591207998e-05,
      "loss": 0.9135,
      "step": 299490
    },
    {
      "epoch": 1.0496728315231296,
      "grad_norm": 2.375,
      "learning_rate": 3.611727688341628e-05,
      "loss": 0.8609,
      "step": 299500
    },
    {
      "epoch": 1.0497078790300252,
      "grad_norm": 3.109375,
      "learning_rate": 3.6116627854752575e-05,
      "loss": 0.8976,
      "step": 299510
    },
    {
      "epoch": 1.0497429265369207,
      "grad_norm": 3.15625,
      "learning_rate": 3.6115978826088876e-05,
      "loss": 0.8439,
      "step": 299520
    },
    {
      "epoch": 1.0497779740438165,
      "grad_norm": 2.875,
      "learning_rate": 3.611532979742517e-05,
      "loss": 0.9239,
      "step": 299530
    },
    {
      "epoch": 1.049813021550712,
      "grad_norm": 3.25,
      "learning_rate": 3.611468076876147e-05,
      "loss": 0.8971,
      "step": 299540
    },
    {
      "epoch": 1.0498480690576075,
      "grad_norm": 2.65625,
      "learning_rate": 3.611403174009777e-05,
      "loss": 0.8317,
      "step": 299550
    },
    {
      "epoch": 1.0498831165645033,
      "grad_norm": 3.015625,
      "learning_rate": 3.611338271143407e-05,
      "loss": 0.8041,
      "step": 299560
    },
    {
      "epoch": 1.0499181640713988,
      "grad_norm": 2.78125,
      "learning_rate": 3.611273368277037e-05,
      "loss": 0.8802,
      "step": 299570
    },
    {
      "epoch": 1.0499532115782944,
      "grad_norm": 2.65625,
      "learning_rate": 3.6112084654106664e-05,
      "loss": 0.9067,
      "step": 299580
    },
    {
      "epoch": 1.04998825908519,
      "grad_norm": 2.765625,
      "learning_rate": 3.6111435625442966e-05,
      "loss": 0.8983,
      "step": 299590
    },
    {
      "epoch": 1.0500233065920856,
      "grad_norm": 2.6875,
      "learning_rate": 3.611078659677926e-05,
      "loss": 0.8755,
      "step": 299600
    },
    {
      "epoch": 1.0500583540989812,
      "grad_norm": 2.9375,
      "learning_rate": 3.611013756811556e-05,
      "loss": 0.8307,
      "step": 299610
    },
    {
      "epoch": 1.0500934016058767,
      "grad_norm": 3.015625,
      "learning_rate": 3.6109488539451856e-05,
      "loss": 0.9402,
      "step": 299620
    },
    {
      "epoch": 1.0501284491127723,
      "grad_norm": 3.171875,
      "learning_rate": 3.610883951078816e-05,
      "loss": 0.814,
      "step": 299630
    },
    {
      "epoch": 1.050163496619668,
      "grad_norm": 2.65625,
      "learning_rate": 3.610819048212445e-05,
      "loss": 0.8592,
      "step": 299640
    },
    {
      "epoch": 1.0501985441265635,
      "grad_norm": 3.09375,
      "learning_rate": 3.6107541453460754e-05,
      "loss": 0.8424,
      "step": 299650
    },
    {
      "epoch": 1.050233591633459,
      "grad_norm": 3.234375,
      "learning_rate": 3.610689242479705e-05,
      "loss": 0.8606,
      "step": 299660
    },
    {
      "epoch": 1.0502686391403548,
      "grad_norm": 3.078125,
      "learning_rate": 3.610624339613335e-05,
      "loss": 0.9127,
      "step": 299670
    },
    {
      "epoch": 1.0503036866472504,
      "grad_norm": 2.40625,
      "learning_rate": 3.6105594367469644e-05,
      "loss": 0.8582,
      "step": 299680
    },
    {
      "epoch": 1.050338734154146,
      "grad_norm": 2.5625,
      "learning_rate": 3.6104945338805946e-05,
      "loss": 0.8208,
      "step": 299690
    },
    {
      "epoch": 1.0503737816610415,
      "grad_norm": 2.8125,
      "learning_rate": 3.610429631014225e-05,
      "loss": 0.8057,
      "step": 299700
    },
    {
      "epoch": 1.0504088291679372,
      "grad_norm": 2.703125,
      "learning_rate": 3.610364728147854e-05,
      "loss": 0.8648,
      "step": 299710
    },
    {
      "epoch": 1.0504438766748327,
      "grad_norm": 3.078125,
      "learning_rate": 3.610299825281484e-05,
      "loss": 0.8224,
      "step": 299720
    },
    {
      "epoch": 1.0504789241817283,
      "grad_norm": 3.078125,
      "learning_rate": 3.610234922415114e-05,
      "loss": 0.8452,
      "step": 299730
    },
    {
      "epoch": 1.0505139716886238,
      "grad_norm": 3.140625,
      "learning_rate": 3.610170019548744e-05,
      "loss": 0.9161,
      "step": 299740
    },
    {
      "epoch": 1.0505490191955196,
      "grad_norm": 2.796875,
      "learning_rate": 3.6101051166823734e-05,
      "loss": 0.9184,
      "step": 299750
    },
    {
      "epoch": 1.050584066702415,
      "grad_norm": 2.890625,
      "learning_rate": 3.6100402138160035e-05,
      "loss": 0.8517,
      "step": 299760
    },
    {
      "epoch": 1.0506191142093106,
      "grad_norm": 3.421875,
      "learning_rate": 3.609975310949633e-05,
      "loss": 0.9658,
      "step": 299770
    },
    {
      "epoch": 1.0506541617162064,
      "grad_norm": 2.546875,
      "learning_rate": 3.609910408083263e-05,
      "loss": 0.7968,
      "step": 299780
    },
    {
      "epoch": 1.050689209223102,
      "grad_norm": 2.703125,
      "learning_rate": 3.6098455052168926e-05,
      "loss": 0.895,
      "step": 299790
    },
    {
      "epoch": 1.0507242567299975,
      "grad_norm": 3.265625,
      "learning_rate": 3.609780602350523e-05,
      "loss": 0.8784,
      "step": 299800
    },
    {
      "epoch": 1.050759304236893,
      "grad_norm": 3.015625,
      "learning_rate": 3.609715699484152e-05,
      "loss": 0.8733,
      "step": 299810
    },
    {
      "epoch": 1.0507943517437888,
      "grad_norm": 3.0,
      "learning_rate": 3.609650796617782e-05,
      "loss": 0.7846,
      "step": 299820
    },
    {
      "epoch": 1.0508293992506843,
      "grad_norm": 2.859375,
      "learning_rate": 3.609585893751412e-05,
      "loss": 0.8666,
      "step": 299830
    },
    {
      "epoch": 1.0508644467575798,
      "grad_norm": 2.484375,
      "learning_rate": 3.609520990885041e-05,
      "loss": 0.8546,
      "step": 299840
    },
    {
      "epoch": 1.0508994942644756,
      "grad_norm": 2.75,
      "learning_rate": 3.6094560880186714e-05,
      "loss": 0.8498,
      "step": 299850
    },
    {
      "epoch": 1.0509345417713711,
      "grad_norm": 2.8125,
      "learning_rate": 3.609391185152301e-05,
      "loss": 0.8302,
      "step": 299860
    },
    {
      "epoch": 1.0509695892782667,
      "grad_norm": 2.640625,
      "learning_rate": 3.609326282285931e-05,
      "loss": 0.8416,
      "step": 299870
    },
    {
      "epoch": 1.0510046367851622,
      "grad_norm": 2.78125,
      "learning_rate": 3.6092613794195604e-05,
      "loss": 0.9312,
      "step": 299880
    },
    {
      "epoch": 1.051039684292058,
      "grad_norm": 2.890625,
      "learning_rate": 3.6091964765531906e-05,
      "loss": 0.9053,
      "step": 299890
    },
    {
      "epoch": 1.0510747317989535,
      "grad_norm": 2.75,
      "learning_rate": 3.60913157368682e-05,
      "loss": 0.8363,
      "step": 299900
    },
    {
      "epoch": 1.051109779305849,
      "grad_norm": 3.015625,
      "learning_rate": 3.60906667082045e-05,
      "loss": 0.9139,
      "step": 299910
    },
    {
      "epoch": 1.0511448268127446,
      "grad_norm": 2.765625,
      "learning_rate": 3.60900176795408e-05,
      "loss": 0.9193,
      "step": 299920
    },
    {
      "epoch": 1.0511798743196403,
      "grad_norm": 2.921875,
      "learning_rate": 3.60893686508771e-05,
      "loss": 0.9164,
      "step": 299930
    },
    {
      "epoch": 1.0512149218265359,
      "grad_norm": 3.125,
      "learning_rate": 3.60887196222134e-05,
      "loss": 0.8243,
      "step": 299940
    },
    {
      "epoch": 1.0512499693334314,
      "grad_norm": 3.078125,
      "learning_rate": 3.6088070593549694e-05,
      "loss": 0.9034,
      "step": 299950
    },
    {
      "epoch": 1.0512850168403272,
      "grad_norm": 2.75,
      "learning_rate": 3.6087421564885995e-05,
      "loss": 0.8992,
      "step": 299960
    },
    {
      "epoch": 1.0513200643472227,
      "grad_norm": 2.984375,
      "learning_rate": 3.608677253622229e-05,
      "loss": 0.921,
      "step": 299970
    },
    {
      "epoch": 1.0513551118541182,
      "grad_norm": 2.765625,
      "learning_rate": 3.608612350755859e-05,
      "loss": 0.8473,
      "step": 299980
    },
    {
      "epoch": 1.0513901593610138,
      "grad_norm": 3.171875,
      "learning_rate": 3.6085474478894886e-05,
      "loss": 0.8148,
      "step": 299990
    },
    {
      "epoch": 1.0514252068679095,
      "grad_norm": 2.734375,
      "learning_rate": 3.608482545023119e-05,
      "loss": 0.831,
      "step": 300000
    },
    {
      "epoch": 1.0514252068679095,
      "eval_loss": 0.8164169788360596,
      "eval_runtime": 552.5141,
      "eval_samples_per_second": 688.554,
      "eval_steps_per_second": 57.38,
      "step": 300000
    },
    {
      "epoch": 1.051460254374805,
      "grad_norm": 3.09375,
      "learning_rate": 3.608417642156748e-05,
      "loss": 0.8541,
      "step": 300010
    },
    {
      "epoch": 1.0514953018817006,
      "grad_norm": 2.921875,
      "learning_rate": 3.608352739290378e-05,
      "loss": 0.8156,
      "step": 300020
    },
    {
      "epoch": 1.0515303493885964,
      "grad_norm": 3.421875,
      "learning_rate": 3.608287836424008e-05,
      "loss": 0.8448,
      "step": 300030
    },
    {
      "epoch": 1.0515653968954919,
      "grad_norm": 2.609375,
      "learning_rate": 3.608222933557638e-05,
      "loss": 0.8704,
      "step": 300040
    },
    {
      "epoch": 1.0516004444023874,
      "grad_norm": 3.140625,
      "learning_rate": 3.6081580306912674e-05,
      "loss": 0.8935,
      "step": 300050
    },
    {
      "epoch": 1.051635491909283,
      "grad_norm": 3.15625,
      "learning_rate": 3.6080931278248975e-05,
      "loss": 0.9302,
      "step": 300060
    },
    {
      "epoch": 1.0516705394161787,
      "grad_norm": 2.640625,
      "learning_rate": 3.6080282249585276e-05,
      "loss": 0.8053,
      "step": 300070
    },
    {
      "epoch": 1.0517055869230743,
      "grad_norm": 2.671875,
      "learning_rate": 3.607963322092157e-05,
      "loss": 0.7231,
      "step": 300080
    },
    {
      "epoch": 1.0517406344299698,
      "grad_norm": 2.875,
      "learning_rate": 3.607898419225787e-05,
      "loss": 0.9421,
      "step": 300090
    },
    {
      "epoch": 1.0517756819368653,
      "grad_norm": 2.921875,
      "learning_rate": 3.607833516359417e-05,
      "loss": 0.801,
      "step": 300100
    },
    {
      "epoch": 1.051810729443761,
      "grad_norm": 2.6875,
      "learning_rate": 3.607768613493047e-05,
      "loss": 0.8623,
      "step": 300110
    },
    {
      "epoch": 1.0518457769506566,
      "grad_norm": 3.328125,
      "learning_rate": 3.607703710626676e-05,
      "loss": 0.9725,
      "step": 300120
    },
    {
      "epoch": 1.0518808244575522,
      "grad_norm": 3.09375,
      "learning_rate": 3.6076388077603064e-05,
      "loss": 0.8215,
      "step": 300130
    },
    {
      "epoch": 1.051915871964448,
      "grad_norm": 3.03125,
      "learning_rate": 3.607573904893936e-05,
      "loss": 0.8549,
      "step": 300140
    },
    {
      "epoch": 1.0519509194713434,
      "grad_norm": 3.375,
      "learning_rate": 3.607509002027566e-05,
      "loss": 0.8785,
      "step": 300150
    },
    {
      "epoch": 1.051985966978239,
      "grad_norm": 3.140625,
      "learning_rate": 3.6074440991611955e-05,
      "loss": 0.8683,
      "step": 300160
    },
    {
      "epoch": 1.0520210144851345,
      "grad_norm": 2.421875,
      "learning_rate": 3.6073791962948256e-05,
      "loss": 0.8989,
      "step": 300170
    },
    {
      "epoch": 1.0520560619920303,
      "grad_norm": 2.859375,
      "learning_rate": 3.607314293428455e-05,
      "loss": 0.8204,
      "step": 300180
    },
    {
      "epoch": 1.0520911094989258,
      "grad_norm": 2.65625,
      "learning_rate": 3.607249390562085e-05,
      "loss": 0.8569,
      "step": 300190
    },
    {
      "epoch": 1.0521261570058213,
      "grad_norm": 2.90625,
      "learning_rate": 3.6071844876957154e-05,
      "loss": 0.9149,
      "step": 300200
    },
    {
      "epoch": 1.0521612045127169,
      "grad_norm": 3.4375,
      "learning_rate": 3.607119584829344e-05,
      "loss": 0.9188,
      "step": 300210
    },
    {
      "epoch": 1.0521962520196126,
      "grad_norm": 2.734375,
      "learning_rate": 3.607054681962974e-05,
      "loss": 0.9453,
      "step": 300220
    },
    {
      "epoch": 1.0522312995265082,
      "grad_norm": 2.4375,
      "learning_rate": 3.606989779096604e-05,
      "loss": 0.8053,
      "step": 300230
    },
    {
      "epoch": 1.0522663470334037,
      "grad_norm": 2.671875,
      "learning_rate": 3.606924876230234e-05,
      "loss": 0.8615,
      "step": 300240
    },
    {
      "epoch": 1.0523013945402995,
      "grad_norm": 2.921875,
      "learning_rate": 3.6068599733638634e-05,
      "loss": 0.9098,
      "step": 300250
    },
    {
      "epoch": 1.052336442047195,
      "grad_norm": 2.625,
      "learning_rate": 3.6067950704974935e-05,
      "loss": 0.821,
      "step": 300260
    },
    {
      "epoch": 1.0523714895540905,
      "grad_norm": 2.71875,
      "learning_rate": 3.606730167631123e-05,
      "loss": 0.8513,
      "step": 300270
    },
    {
      "epoch": 1.052406537060986,
      "grad_norm": 2.921875,
      "learning_rate": 3.606665264764753e-05,
      "loss": 0.8751,
      "step": 300280
    },
    {
      "epoch": 1.0524415845678818,
      "grad_norm": 3.1875,
      "learning_rate": 3.606600361898383e-05,
      "loss": 0.9196,
      "step": 300290
    },
    {
      "epoch": 1.0524766320747774,
      "grad_norm": 2.78125,
      "learning_rate": 3.606535459032013e-05,
      "loss": 0.7976,
      "step": 300300
    },
    {
      "epoch": 1.052511679581673,
      "grad_norm": 2.71875,
      "learning_rate": 3.606470556165643e-05,
      "loss": 0.8596,
      "step": 300310
    },
    {
      "epoch": 1.0525467270885684,
      "grad_norm": 2.5,
      "learning_rate": 3.606405653299272e-05,
      "loss": 0.8194,
      "step": 300320
    },
    {
      "epoch": 1.0525817745954642,
      "grad_norm": 2.921875,
      "learning_rate": 3.6063407504329024e-05,
      "loss": 0.9935,
      "step": 300330
    },
    {
      "epoch": 1.0526168221023597,
      "grad_norm": 2.875,
      "learning_rate": 3.606275847566532e-05,
      "loss": 0.8487,
      "step": 300340
    },
    {
      "epoch": 1.0526518696092553,
      "grad_norm": 2.671875,
      "learning_rate": 3.606210944700162e-05,
      "loss": 0.8684,
      "step": 300350
    },
    {
      "epoch": 1.052686917116151,
      "grad_norm": 3.109375,
      "learning_rate": 3.6061460418337915e-05,
      "loss": 0.8749,
      "step": 300360
    },
    {
      "epoch": 1.0527219646230466,
      "grad_norm": 3.109375,
      "learning_rate": 3.6060811389674216e-05,
      "loss": 0.8837,
      "step": 300370
    },
    {
      "epoch": 1.052757012129942,
      "grad_norm": 2.90625,
      "learning_rate": 3.606016236101051e-05,
      "loss": 0.8757,
      "step": 300380
    },
    {
      "epoch": 1.0527920596368376,
      "grad_norm": 3.09375,
      "learning_rate": 3.605951333234681e-05,
      "loss": 0.9504,
      "step": 300390
    },
    {
      "epoch": 1.0528271071437334,
      "grad_norm": 3.0,
      "learning_rate": 3.605886430368311e-05,
      "loss": 0.8972,
      "step": 300400
    },
    {
      "epoch": 1.052862154650629,
      "grad_norm": 3.0,
      "learning_rate": 3.605821527501941e-05,
      "loss": 0.973,
      "step": 300410
    },
    {
      "epoch": 1.0528972021575245,
      "grad_norm": 2.84375,
      "learning_rate": 3.60575662463557e-05,
      "loss": 0.8461,
      "step": 300420
    },
    {
      "epoch": 1.0529322496644202,
      "grad_norm": 2.5,
      "learning_rate": 3.6056917217692004e-05,
      "loss": 0.8162,
      "step": 300430
    },
    {
      "epoch": 1.0529672971713158,
      "grad_norm": 2.71875,
      "learning_rate": 3.6056268189028306e-05,
      "loss": 0.975,
      "step": 300440
    },
    {
      "epoch": 1.0530023446782113,
      "grad_norm": 3.046875,
      "learning_rate": 3.60556191603646e-05,
      "loss": 0.9033,
      "step": 300450
    },
    {
      "epoch": 1.0530373921851068,
      "grad_norm": 2.8125,
      "learning_rate": 3.60549701317009e-05,
      "loss": 0.911,
      "step": 300460
    },
    {
      "epoch": 1.0530724396920026,
      "grad_norm": 2.90625,
      "learning_rate": 3.6054321103037196e-05,
      "loss": 0.8786,
      "step": 300470
    },
    {
      "epoch": 1.0531074871988981,
      "grad_norm": 2.5625,
      "learning_rate": 3.60536720743735e-05,
      "loss": 0.9601,
      "step": 300480
    },
    {
      "epoch": 1.0531425347057937,
      "grad_norm": 2.796875,
      "learning_rate": 3.605302304570979e-05,
      "loss": 0.9382,
      "step": 300490
    },
    {
      "epoch": 1.0531775822126892,
      "grad_norm": 2.578125,
      "learning_rate": 3.6052374017046094e-05,
      "loss": 0.8565,
      "step": 300500
    },
    {
      "epoch": 1.053212629719585,
      "grad_norm": 2.65625,
      "learning_rate": 3.605172498838239e-05,
      "loss": 0.8906,
      "step": 300510
    },
    {
      "epoch": 1.0532476772264805,
      "grad_norm": 3.203125,
      "learning_rate": 3.605107595971869e-05,
      "loss": 0.8087,
      "step": 300520
    },
    {
      "epoch": 1.053282724733376,
      "grad_norm": 3.03125,
      "learning_rate": 3.6050426931054984e-05,
      "loss": 0.846,
      "step": 300530
    },
    {
      "epoch": 1.0533177722402718,
      "grad_norm": 2.921875,
      "learning_rate": 3.6049777902391286e-05,
      "loss": 0.9287,
      "step": 300540
    },
    {
      "epoch": 1.0533528197471673,
      "grad_norm": 3.109375,
      "learning_rate": 3.604912887372758e-05,
      "loss": 0.9016,
      "step": 300550
    },
    {
      "epoch": 1.0533878672540629,
      "grad_norm": 2.84375,
      "learning_rate": 3.604847984506388e-05,
      "loss": 0.8376,
      "step": 300560
    },
    {
      "epoch": 1.0534229147609584,
      "grad_norm": 2.84375,
      "learning_rate": 3.604783081640018e-05,
      "loss": 0.962,
      "step": 300570
    },
    {
      "epoch": 1.0534579622678542,
      "grad_norm": 2.734375,
      "learning_rate": 3.604718178773647e-05,
      "loss": 0.9472,
      "step": 300580
    },
    {
      "epoch": 1.0534930097747497,
      "grad_norm": 2.984375,
      "learning_rate": 3.604653275907277e-05,
      "loss": 0.8667,
      "step": 300590
    },
    {
      "epoch": 1.0535280572816452,
      "grad_norm": 3.0625,
      "learning_rate": 3.604588373040907e-05,
      "loss": 0.8872,
      "step": 300600
    },
    {
      "epoch": 1.0535631047885408,
      "grad_norm": 3.140625,
      "learning_rate": 3.604523470174537e-05,
      "loss": 0.9575,
      "step": 300610
    },
    {
      "epoch": 1.0535981522954365,
      "grad_norm": 3.21875,
      "learning_rate": 3.604458567308166e-05,
      "loss": 0.9475,
      "step": 300620
    },
    {
      "epoch": 1.053633199802332,
      "grad_norm": 2.75,
      "learning_rate": 3.6043936644417964e-05,
      "loss": 0.8469,
      "step": 300630
    },
    {
      "epoch": 1.0536682473092276,
      "grad_norm": 2.46875,
      "learning_rate": 3.604328761575426e-05,
      "loss": 0.8787,
      "step": 300640
    },
    {
      "epoch": 1.0537032948161233,
      "grad_norm": 4.0625,
      "learning_rate": 3.604263858709056e-05,
      "loss": 0.8776,
      "step": 300650
    },
    {
      "epoch": 1.0537383423230189,
      "grad_norm": 2.78125,
      "learning_rate": 3.604198955842686e-05,
      "loss": 0.9355,
      "step": 300660
    },
    {
      "epoch": 1.0537733898299144,
      "grad_norm": 2.6875,
      "learning_rate": 3.6041340529763156e-05,
      "loss": 0.7852,
      "step": 300670
    },
    {
      "epoch": 1.05380843733681,
      "grad_norm": 2.875,
      "learning_rate": 3.604069150109946e-05,
      "loss": 0.8914,
      "step": 300680
    },
    {
      "epoch": 1.0538434848437057,
      "grad_norm": 2.625,
      "learning_rate": 3.604004247243575e-05,
      "loss": 0.8212,
      "step": 300690
    },
    {
      "epoch": 1.0538785323506012,
      "grad_norm": 3.078125,
      "learning_rate": 3.6039393443772054e-05,
      "loss": 0.8681,
      "step": 300700
    },
    {
      "epoch": 1.0539135798574968,
      "grad_norm": 3.109375,
      "learning_rate": 3.603874441510835e-05,
      "loss": 0.9264,
      "step": 300710
    },
    {
      "epoch": 1.0539486273643925,
      "grad_norm": 3.234375,
      "learning_rate": 3.603809538644465e-05,
      "loss": 0.8329,
      "step": 300720
    },
    {
      "epoch": 1.053983674871288,
      "grad_norm": 2.765625,
      "learning_rate": 3.6037446357780944e-05,
      "loss": 0.8356,
      "step": 300730
    },
    {
      "epoch": 1.0540187223781836,
      "grad_norm": 3.125,
      "learning_rate": 3.6036797329117246e-05,
      "loss": 0.8217,
      "step": 300740
    },
    {
      "epoch": 1.0540537698850792,
      "grad_norm": 3.078125,
      "learning_rate": 3.603614830045354e-05,
      "loss": 0.848,
      "step": 300750
    },
    {
      "epoch": 1.054088817391975,
      "grad_norm": 3.109375,
      "learning_rate": 3.603549927178984e-05,
      "loss": 0.8805,
      "step": 300760
    },
    {
      "epoch": 1.0541238648988704,
      "grad_norm": 3.5625,
      "learning_rate": 3.6034850243126136e-05,
      "loss": 0.9125,
      "step": 300770
    },
    {
      "epoch": 1.054158912405766,
      "grad_norm": 3.25,
      "learning_rate": 3.603420121446244e-05,
      "loss": 0.8713,
      "step": 300780
    },
    {
      "epoch": 1.0541939599126615,
      "grad_norm": 2.78125,
      "learning_rate": 3.603355218579874e-05,
      "loss": 0.9529,
      "step": 300790
    },
    {
      "epoch": 1.0542290074195573,
      "grad_norm": 2.84375,
      "learning_rate": 3.6032903157135034e-05,
      "loss": 0.8223,
      "step": 300800
    },
    {
      "epoch": 1.0542640549264528,
      "grad_norm": 2.75,
      "learning_rate": 3.6032254128471335e-05,
      "loss": 0.8538,
      "step": 300810
    },
    {
      "epoch": 1.0542991024333483,
      "grad_norm": 2.984375,
      "learning_rate": 3.603160509980763e-05,
      "loss": 0.8226,
      "step": 300820
    },
    {
      "epoch": 1.054334149940244,
      "grad_norm": 2.796875,
      "learning_rate": 3.603095607114393e-05,
      "loss": 0.8523,
      "step": 300830
    },
    {
      "epoch": 1.0543691974471396,
      "grad_norm": 2.90625,
      "learning_rate": 3.6030307042480226e-05,
      "loss": 0.8945,
      "step": 300840
    },
    {
      "epoch": 1.0544042449540352,
      "grad_norm": 2.65625,
      "learning_rate": 3.602965801381653e-05,
      "loss": 0.8646,
      "step": 300850
    },
    {
      "epoch": 1.0544392924609307,
      "grad_norm": 3.09375,
      "learning_rate": 3.602900898515282e-05,
      "loss": 0.8884,
      "step": 300860
    },
    {
      "epoch": 1.0544743399678265,
      "grad_norm": 2.828125,
      "learning_rate": 3.602835995648912e-05,
      "loss": 0.8814,
      "step": 300870
    },
    {
      "epoch": 1.054509387474722,
      "grad_norm": 3.171875,
      "learning_rate": 3.602771092782542e-05,
      "loss": 0.8341,
      "step": 300880
    },
    {
      "epoch": 1.0545444349816175,
      "grad_norm": 3.15625,
      "learning_rate": 3.602706189916172e-05,
      "loss": 0.9192,
      "step": 300890
    },
    {
      "epoch": 1.054579482488513,
      "grad_norm": 3.203125,
      "learning_rate": 3.6026412870498014e-05,
      "loss": 0.8735,
      "step": 300900
    },
    {
      "epoch": 1.0546145299954088,
      "grad_norm": 2.765625,
      "learning_rate": 3.6025763841834315e-05,
      "loss": 0.8329,
      "step": 300910
    },
    {
      "epoch": 1.0546495775023044,
      "grad_norm": 2.9375,
      "learning_rate": 3.602511481317061e-05,
      "loss": 0.9388,
      "step": 300920
    },
    {
      "epoch": 1.0546846250092,
      "grad_norm": 2.9375,
      "learning_rate": 3.602446578450691e-05,
      "loss": 0.903,
      "step": 300930
    },
    {
      "epoch": 1.0547196725160957,
      "grad_norm": 3.015625,
      "learning_rate": 3.602381675584321e-05,
      "loss": 0.8214,
      "step": 300940
    },
    {
      "epoch": 1.0547547200229912,
      "grad_norm": 3.0625,
      "learning_rate": 3.602316772717951e-05,
      "loss": 0.9077,
      "step": 300950
    },
    {
      "epoch": 1.0547897675298867,
      "grad_norm": 3.125,
      "learning_rate": 3.60225186985158e-05,
      "loss": 0.9185,
      "step": 300960
    },
    {
      "epoch": 1.0548248150367823,
      "grad_norm": 2.875,
      "learning_rate": 3.6021869669852096e-05,
      "loss": 0.8772,
      "step": 300970
    },
    {
      "epoch": 1.054859862543678,
      "grad_norm": 2.59375,
      "learning_rate": 3.60212206411884e-05,
      "loss": 0.8952,
      "step": 300980
    },
    {
      "epoch": 1.0548949100505736,
      "grad_norm": 3.015625,
      "learning_rate": 3.602057161252469e-05,
      "loss": 0.8962,
      "step": 300990
    },
    {
      "epoch": 1.054929957557469,
      "grad_norm": 2.9375,
      "learning_rate": 3.6019922583860994e-05,
      "loss": 0.8033,
      "step": 301000
    },
    {
      "epoch": 1.0549650050643646,
      "grad_norm": 3.875,
      "learning_rate": 3.601927355519729e-05,
      "loss": 0.8809,
      "step": 301010
    },
    {
      "epoch": 1.0550000525712604,
      "grad_norm": 2.828125,
      "learning_rate": 3.601862452653359e-05,
      "loss": 0.908,
      "step": 301020
    },
    {
      "epoch": 1.055035100078156,
      "grad_norm": 2.84375,
      "learning_rate": 3.601797549786989e-05,
      "loss": 0.8446,
      "step": 301030
    },
    {
      "epoch": 1.0550701475850515,
      "grad_norm": 2.90625,
      "learning_rate": 3.6017326469206186e-05,
      "loss": 0.8882,
      "step": 301040
    },
    {
      "epoch": 1.0551051950919472,
      "grad_norm": 2.8125,
      "learning_rate": 3.601667744054249e-05,
      "loss": 0.8312,
      "step": 301050
    },
    {
      "epoch": 1.0551402425988428,
      "grad_norm": 2.609375,
      "learning_rate": 3.601602841187878e-05,
      "loss": 0.9568,
      "step": 301060
    },
    {
      "epoch": 1.0551752901057383,
      "grad_norm": 2.25,
      "learning_rate": 3.601537938321508e-05,
      "loss": 0.8511,
      "step": 301070
    },
    {
      "epoch": 1.0552103376126338,
      "grad_norm": 2.75,
      "learning_rate": 3.601473035455138e-05,
      "loss": 0.9596,
      "step": 301080
    },
    {
      "epoch": 1.0552453851195296,
      "grad_norm": 2.796875,
      "learning_rate": 3.601408132588768e-05,
      "loss": 0.8396,
      "step": 301090
    },
    {
      "epoch": 1.0552804326264251,
      "grad_norm": 2.984375,
      "learning_rate": 3.6013432297223974e-05,
      "loss": 0.918,
      "step": 301100
    },
    {
      "epoch": 1.0553154801333207,
      "grad_norm": 2.984375,
      "learning_rate": 3.6012783268560275e-05,
      "loss": 0.8503,
      "step": 301110
    },
    {
      "epoch": 1.0553505276402164,
      "grad_norm": 2.9375,
      "learning_rate": 3.601213423989657e-05,
      "loss": 0.9473,
      "step": 301120
    },
    {
      "epoch": 1.055385575147112,
      "grad_norm": 3.171875,
      "learning_rate": 3.601148521123287e-05,
      "loss": 0.9817,
      "step": 301130
    },
    {
      "epoch": 1.0554206226540075,
      "grad_norm": 2.71875,
      "learning_rate": 3.6010836182569166e-05,
      "loss": 0.8568,
      "step": 301140
    },
    {
      "epoch": 1.055455670160903,
      "grad_norm": 3.015625,
      "learning_rate": 3.601018715390547e-05,
      "loss": 0.8477,
      "step": 301150
    },
    {
      "epoch": 1.0554907176677988,
      "grad_norm": 3.09375,
      "learning_rate": 3.600953812524177e-05,
      "loss": 0.8216,
      "step": 301160
    },
    {
      "epoch": 1.0555257651746943,
      "grad_norm": 3.109375,
      "learning_rate": 3.600888909657806e-05,
      "loss": 0.8828,
      "step": 301170
    },
    {
      "epoch": 1.0555608126815899,
      "grad_norm": 3.140625,
      "learning_rate": 3.6008240067914364e-05,
      "loss": 0.9224,
      "step": 301180
    },
    {
      "epoch": 1.0555958601884854,
      "grad_norm": 2.8125,
      "learning_rate": 3.600759103925066e-05,
      "loss": 0.814,
      "step": 301190
    },
    {
      "epoch": 1.0556309076953811,
      "grad_norm": 3.25,
      "learning_rate": 3.600694201058696e-05,
      "loss": 0.8978,
      "step": 301200
    },
    {
      "epoch": 1.0556659552022767,
      "grad_norm": 3.109375,
      "learning_rate": 3.6006292981923255e-05,
      "loss": 0.845,
      "step": 301210
    },
    {
      "epoch": 1.0557010027091722,
      "grad_norm": 2.65625,
      "learning_rate": 3.6005643953259556e-05,
      "loss": 0.8729,
      "step": 301220
    },
    {
      "epoch": 1.055736050216068,
      "grad_norm": 3.234375,
      "learning_rate": 3.600499492459585e-05,
      "loss": 0.9467,
      "step": 301230
    },
    {
      "epoch": 1.0557710977229635,
      "grad_norm": 2.8125,
      "learning_rate": 3.600434589593215e-05,
      "loss": 0.846,
      "step": 301240
    },
    {
      "epoch": 1.055806145229859,
      "grad_norm": 3.203125,
      "learning_rate": 3.600369686726845e-05,
      "loss": 0.884,
      "step": 301250
    },
    {
      "epoch": 1.0558411927367546,
      "grad_norm": 2.71875,
      "learning_rate": 3.600304783860475e-05,
      "loss": 0.8047,
      "step": 301260
    },
    {
      "epoch": 1.0558762402436503,
      "grad_norm": 3.140625,
      "learning_rate": 3.600239880994104e-05,
      "loss": 0.8651,
      "step": 301270
    },
    {
      "epoch": 1.0559112877505459,
      "grad_norm": 3.125,
      "learning_rate": 3.6001749781277344e-05,
      "loss": 0.93,
      "step": 301280
    },
    {
      "epoch": 1.0559463352574414,
      "grad_norm": 3.296875,
      "learning_rate": 3.600110075261364e-05,
      "loss": 0.8192,
      "step": 301290
    },
    {
      "epoch": 1.0559813827643372,
      "grad_norm": 2.828125,
      "learning_rate": 3.600045172394994e-05,
      "loss": 0.8454,
      "step": 301300
    },
    {
      "epoch": 1.0560164302712327,
      "grad_norm": 3.109375,
      "learning_rate": 3.599980269528624e-05,
      "loss": 0.8442,
      "step": 301310
    },
    {
      "epoch": 1.0560514777781282,
      "grad_norm": 2.953125,
      "learning_rate": 3.5999153666622536e-05,
      "loss": 0.8975,
      "step": 301320
    },
    {
      "epoch": 1.0560865252850238,
      "grad_norm": 2.640625,
      "learning_rate": 3.599850463795883e-05,
      "loss": 0.8296,
      "step": 301330
    },
    {
      "epoch": 1.0561215727919195,
      "grad_norm": 2.96875,
      "learning_rate": 3.5997855609295126e-05,
      "loss": 0.9139,
      "step": 301340
    },
    {
      "epoch": 1.056156620298815,
      "grad_norm": 2.71875,
      "learning_rate": 3.599720658063143e-05,
      "loss": 0.7738,
      "step": 301350
    },
    {
      "epoch": 1.0561916678057106,
      "grad_norm": 3.328125,
      "learning_rate": 3.599655755196772e-05,
      "loss": 0.9249,
      "step": 301360
    },
    {
      "epoch": 1.0562267153126061,
      "grad_norm": 2.671875,
      "learning_rate": 3.599590852330402e-05,
      "loss": 0.8262,
      "step": 301370
    },
    {
      "epoch": 1.056261762819502,
      "grad_norm": 2.953125,
      "learning_rate": 3.599525949464032e-05,
      "loss": 0.8666,
      "step": 301380
    },
    {
      "epoch": 1.0562968103263974,
      "grad_norm": 3.0,
      "learning_rate": 3.599461046597662e-05,
      "loss": 0.7904,
      "step": 301390
    },
    {
      "epoch": 1.056331857833293,
      "grad_norm": 3.125,
      "learning_rate": 3.599396143731292e-05,
      "loss": 0.8686,
      "step": 301400
    },
    {
      "epoch": 1.0563669053401887,
      "grad_norm": 3.5625,
      "learning_rate": 3.5993312408649215e-05,
      "loss": 0.9291,
      "step": 301410
    },
    {
      "epoch": 1.0564019528470843,
      "grad_norm": 3.125,
      "learning_rate": 3.5992663379985516e-05,
      "loss": 0.933,
      "step": 301420
    },
    {
      "epoch": 1.0564370003539798,
      "grad_norm": 3.15625,
      "learning_rate": 3.599201435132181e-05,
      "loss": 0.9513,
      "step": 301430
    },
    {
      "epoch": 1.0564720478608753,
      "grad_norm": 2.8125,
      "learning_rate": 3.599136532265811e-05,
      "loss": 0.8789,
      "step": 301440
    },
    {
      "epoch": 1.056507095367771,
      "grad_norm": 3.21875,
      "learning_rate": 3.599071629399441e-05,
      "loss": 0.9344,
      "step": 301450
    },
    {
      "epoch": 1.0565421428746666,
      "grad_norm": 3.125,
      "learning_rate": 3.599006726533071e-05,
      "loss": 0.8805,
      "step": 301460
    },
    {
      "epoch": 1.0565771903815622,
      "grad_norm": 3.328125,
      "learning_rate": 3.5989418236667e-05,
      "loss": 0.8842,
      "step": 301470
    },
    {
      "epoch": 1.0566122378884577,
      "grad_norm": 2.4375,
      "learning_rate": 3.5988769208003304e-05,
      "loss": 0.7792,
      "step": 301480
    },
    {
      "epoch": 1.0566472853953535,
      "grad_norm": 3.125,
      "learning_rate": 3.59881201793396e-05,
      "loss": 0.8387,
      "step": 301490
    },
    {
      "epoch": 1.056682332902249,
      "grad_norm": 2.5625,
      "learning_rate": 3.59874711506759e-05,
      "loss": 0.8224,
      "step": 301500
    },
    {
      "epoch": 1.0567173804091445,
      "grad_norm": 3.390625,
      "learning_rate": 3.5986822122012195e-05,
      "loss": 0.9369,
      "step": 301510
    },
    {
      "epoch": 1.0567524279160403,
      "grad_norm": 2.953125,
      "learning_rate": 3.5986173093348496e-05,
      "loss": 0.8595,
      "step": 301520
    },
    {
      "epoch": 1.0567874754229358,
      "grad_norm": 2.515625,
      "learning_rate": 3.59855240646848e-05,
      "loss": 0.7879,
      "step": 301530
    },
    {
      "epoch": 1.0568225229298314,
      "grad_norm": 3.03125,
      "learning_rate": 3.598487503602109e-05,
      "loss": 0.8892,
      "step": 301540
    },
    {
      "epoch": 1.056857570436727,
      "grad_norm": 3.0625,
      "learning_rate": 3.5984226007357394e-05,
      "loss": 0.8438,
      "step": 301550
    },
    {
      "epoch": 1.0568926179436227,
      "grad_norm": 2.796875,
      "learning_rate": 3.598357697869369e-05,
      "loss": 0.931,
      "step": 301560
    },
    {
      "epoch": 1.0569276654505182,
      "grad_norm": 3.234375,
      "learning_rate": 3.598292795002999e-05,
      "loss": 0.8046,
      "step": 301570
    },
    {
      "epoch": 1.0569627129574137,
      "grad_norm": 2.875,
      "learning_rate": 3.5982278921366284e-05,
      "loss": 0.8901,
      "step": 301580
    },
    {
      "epoch": 1.0569977604643093,
      "grad_norm": 3.140625,
      "learning_rate": 3.5981629892702586e-05,
      "loss": 0.8258,
      "step": 301590
    },
    {
      "epoch": 1.057032807971205,
      "grad_norm": 3.359375,
      "learning_rate": 3.598098086403888e-05,
      "loss": 0.9491,
      "step": 301600
    },
    {
      "epoch": 1.0570678554781006,
      "grad_norm": 2.640625,
      "learning_rate": 3.598033183537518e-05,
      "loss": 0.7982,
      "step": 301610
    },
    {
      "epoch": 1.057102902984996,
      "grad_norm": 2.828125,
      "learning_rate": 3.5979682806711476e-05,
      "loss": 0.8638,
      "step": 301620
    },
    {
      "epoch": 1.0571379504918919,
      "grad_norm": 3.359375,
      "learning_rate": 3.597903377804778e-05,
      "loss": 0.8633,
      "step": 301630
    },
    {
      "epoch": 1.0571729979987874,
      "grad_norm": 2.703125,
      "learning_rate": 3.597838474938407e-05,
      "loss": 0.8737,
      "step": 301640
    },
    {
      "epoch": 1.057208045505683,
      "grad_norm": 3.0,
      "learning_rate": 3.5977735720720374e-05,
      "loss": 0.9539,
      "step": 301650
    },
    {
      "epoch": 1.0572430930125785,
      "grad_norm": 2.59375,
      "learning_rate": 3.597708669205667e-05,
      "loss": 0.8844,
      "step": 301660
    },
    {
      "epoch": 1.0572781405194742,
      "grad_norm": 3.140625,
      "learning_rate": 3.597643766339297e-05,
      "loss": 0.8693,
      "step": 301670
    },
    {
      "epoch": 1.0573131880263698,
      "grad_norm": 3.109375,
      "learning_rate": 3.597578863472927e-05,
      "loss": 0.842,
      "step": 301680
    },
    {
      "epoch": 1.0573482355332653,
      "grad_norm": 2.75,
      "learning_rate": 3.5975139606065566e-05,
      "loss": 0.8498,
      "step": 301690
    },
    {
      "epoch": 1.0573832830401608,
      "grad_norm": 3.0625,
      "learning_rate": 3.597449057740187e-05,
      "loss": 0.8359,
      "step": 301700
    },
    {
      "epoch": 1.0574183305470566,
      "grad_norm": 2.671875,
      "learning_rate": 3.5973841548738155e-05,
      "loss": 0.8407,
      "step": 301710
    },
    {
      "epoch": 1.0574533780539521,
      "grad_norm": 2.90625,
      "learning_rate": 3.5973192520074456e-05,
      "loss": 0.7842,
      "step": 301720
    },
    {
      "epoch": 1.0574884255608477,
      "grad_norm": 3.09375,
      "learning_rate": 3.597254349141075e-05,
      "loss": 0.8804,
      "step": 301730
    },
    {
      "epoch": 1.0575234730677434,
      "grad_norm": 3.015625,
      "learning_rate": 3.597189446274705e-05,
      "loss": 0.9211,
      "step": 301740
    },
    {
      "epoch": 1.057558520574639,
      "grad_norm": 3.078125,
      "learning_rate": 3.5971245434083354e-05,
      "loss": 0.8596,
      "step": 301750
    },
    {
      "epoch": 1.0575935680815345,
      "grad_norm": 3.359375,
      "learning_rate": 3.597059640541965e-05,
      "loss": 0.9342,
      "step": 301760
    },
    {
      "epoch": 1.05762861558843,
      "grad_norm": 3.1875,
      "learning_rate": 3.596994737675595e-05,
      "loss": 0.9381,
      "step": 301770
    },
    {
      "epoch": 1.0576636630953258,
      "grad_norm": 2.703125,
      "learning_rate": 3.5969298348092244e-05,
      "loss": 0.8357,
      "step": 301780
    },
    {
      "epoch": 1.0576987106022213,
      "grad_norm": 2.828125,
      "learning_rate": 3.5968649319428546e-05,
      "loss": 0.8272,
      "step": 301790
    },
    {
      "epoch": 1.0577337581091169,
      "grad_norm": 2.90625,
      "learning_rate": 3.596800029076484e-05,
      "loss": 0.9826,
      "step": 301800
    },
    {
      "epoch": 1.0577688056160126,
      "grad_norm": 2.609375,
      "learning_rate": 3.596735126210114e-05,
      "loss": 0.873,
      "step": 301810
    },
    {
      "epoch": 1.0578038531229081,
      "grad_norm": 2.59375,
      "learning_rate": 3.5966702233437436e-05,
      "loss": 0.8175,
      "step": 301820
    },
    {
      "epoch": 1.0578389006298037,
      "grad_norm": 3.140625,
      "learning_rate": 3.596605320477374e-05,
      "loss": 0.9222,
      "step": 301830
    },
    {
      "epoch": 1.0578739481366992,
      "grad_norm": 2.859375,
      "learning_rate": 3.596540417611003e-05,
      "loss": 0.9688,
      "step": 301840
    },
    {
      "epoch": 1.057908995643595,
      "grad_norm": 2.875,
      "learning_rate": 3.5964755147446334e-05,
      "loss": 0.8413,
      "step": 301850
    },
    {
      "epoch": 1.0579440431504905,
      "grad_norm": 2.8125,
      "learning_rate": 3.596410611878263e-05,
      "loss": 0.833,
      "step": 301860
    },
    {
      "epoch": 1.057979090657386,
      "grad_norm": 3.296875,
      "learning_rate": 3.596345709011893e-05,
      "loss": 0.8422,
      "step": 301870
    },
    {
      "epoch": 1.0580141381642816,
      "grad_norm": 2.875,
      "learning_rate": 3.5962808061455224e-05,
      "loss": 0.8938,
      "step": 301880
    },
    {
      "epoch": 1.0580491856711773,
      "grad_norm": 2.75,
      "learning_rate": 3.5962159032791526e-05,
      "loss": 0.8816,
      "step": 301890
    },
    {
      "epoch": 1.0580842331780729,
      "grad_norm": 3.015625,
      "learning_rate": 3.596151000412783e-05,
      "loss": 0.9295,
      "step": 301900
    },
    {
      "epoch": 1.0581192806849684,
      "grad_norm": 2.71875,
      "learning_rate": 3.596086097546412e-05,
      "loss": 0.8341,
      "step": 301910
    },
    {
      "epoch": 1.0581543281918642,
      "grad_norm": 3.15625,
      "learning_rate": 3.596021194680042e-05,
      "loss": 0.9148,
      "step": 301920
    },
    {
      "epoch": 1.0581893756987597,
      "grad_norm": 2.84375,
      "learning_rate": 3.595956291813672e-05,
      "loss": 0.9443,
      "step": 301930
    },
    {
      "epoch": 1.0582244232056552,
      "grad_norm": 3.078125,
      "learning_rate": 3.595891388947302e-05,
      "loss": 0.8896,
      "step": 301940
    },
    {
      "epoch": 1.0582594707125508,
      "grad_norm": 3.03125,
      "learning_rate": 3.5958264860809314e-05,
      "loss": 0.7902,
      "step": 301950
    },
    {
      "epoch": 1.0582945182194465,
      "grad_norm": 2.859375,
      "learning_rate": 3.5957615832145615e-05,
      "loss": 0.8709,
      "step": 301960
    },
    {
      "epoch": 1.058329565726342,
      "grad_norm": 3.3125,
      "learning_rate": 3.595696680348191e-05,
      "loss": 0.9288,
      "step": 301970
    },
    {
      "epoch": 1.0583646132332376,
      "grad_norm": 2.828125,
      "learning_rate": 3.595631777481821e-05,
      "loss": 0.8534,
      "step": 301980
    },
    {
      "epoch": 1.0583996607401334,
      "grad_norm": 3.34375,
      "learning_rate": 3.5955668746154506e-05,
      "loss": 0.8607,
      "step": 301990
    },
    {
      "epoch": 1.058434708247029,
      "grad_norm": 2.828125,
      "learning_rate": 3.595501971749081e-05,
      "loss": 0.8845,
      "step": 302000
    },
    {
      "epoch": 1.0584697557539244,
      "grad_norm": 2.796875,
      "learning_rate": 3.59543706888271e-05,
      "loss": 0.8364,
      "step": 302010
    },
    {
      "epoch": 1.05850480326082,
      "grad_norm": 3.0625,
      "learning_rate": 3.59537216601634e-05,
      "loss": 0.8857,
      "step": 302020
    },
    {
      "epoch": 1.0585398507677157,
      "grad_norm": 3.1875,
      "learning_rate": 3.5953072631499705e-05,
      "loss": 0.8847,
      "step": 302030
    },
    {
      "epoch": 1.0585748982746113,
      "grad_norm": 3.109375,
      "learning_rate": 3.5952423602836e-05,
      "loss": 0.8608,
      "step": 302040
    },
    {
      "epoch": 1.0586099457815068,
      "grad_norm": 2.984375,
      "learning_rate": 3.59517745741723e-05,
      "loss": 0.801,
      "step": 302050
    },
    {
      "epoch": 1.0586449932884023,
      "grad_norm": 2.625,
      "learning_rate": 3.5951125545508595e-05,
      "loss": 0.8705,
      "step": 302060
    },
    {
      "epoch": 1.058680040795298,
      "grad_norm": 2.921875,
      "learning_rate": 3.59504765168449e-05,
      "loss": 0.8275,
      "step": 302070
    },
    {
      "epoch": 1.0587150883021936,
      "grad_norm": 3.1875,
      "learning_rate": 3.594982748818119e-05,
      "loss": 0.9478,
      "step": 302080
    },
    {
      "epoch": 1.0587501358090892,
      "grad_norm": 3.078125,
      "learning_rate": 3.5949178459517486e-05,
      "loss": 0.9107,
      "step": 302090
    },
    {
      "epoch": 1.058785183315985,
      "grad_norm": 3.125,
      "learning_rate": 3.594852943085378e-05,
      "loss": 0.8567,
      "step": 302100
    },
    {
      "epoch": 1.0588202308228805,
      "grad_norm": 2.890625,
      "learning_rate": 3.594788040219008e-05,
      "loss": 0.7926,
      "step": 302110
    },
    {
      "epoch": 1.058855278329776,
      "grad_norm": 3.09375,
      "learning_rate": 3.594723137352638e-05,
      "loss": 0.8765,
      "step": 302120
    },
    {
      "epoch": 1.0588903258366715,
      "grad_norm": 2.78125,
      "learning_rate": 3.594658234486268e-05,
      "loss": 0.9335,
      "step": 302130
    },
    {
      "epoch": 1.0589253733435673,
      "grad_norm": 3.0,
      "learning_rate": 3.594593331619898e-05,
      "loss": 0.9053,
      "step": 302140
    },
    {
      "epoch": 1.0589604208504628,
      "grad_norm": 3.0,
      "learning_rate": 3.5945284287535274e-05,
      "loss": 0.8976,
      "step": 302150
    },
    {
      "epoch": 1.0589954683573584,
      "grad_norm": 3.015625,
      "learning_rate": 3.5944635258871575e-05,
      "loss": 0.8493,
      "step": 302160
    },
    {
      "epoch": 1.059030515864254,
      "grad_norm": 2.453125,
      "learning_rate": 3.594398623020787e-05,
      "loss": 0.9019,
      "step": 302170
    },
    {
      "epoch": 1.0590655633711497,
      "grad_norm": 3.25,
      "learning_rate": 3.594333720154417e-05,
      "loss": 0.8516,
      "step": 302180
    },
    {
      "epoch": 1.0591006108780452,
      "grad_norm": 3.203125,
      "learning_rate": 3.5942688172880466e-05,
      "loss": 0.89,
      "step": 302190
    },
    {
      "epoch": 1.0591356583849407,
      "grad_norm": 2.828125,
      "learning_rate": 3.594203914421677e-05,
      "loss": 0.784,
      "step": 302200
    },
    {
      "epoch": 1.0591707058918365,
      "grad_norm": 2.90625,
      "learning_rate": 3.594139011555306e-05,
      "loss": 0.8235,
      "step": 302210
    },
    {
      "epoch": 1.059205753398732,
      "grad_norm": 3.234375,
      "learning_rate": 3.594074108688936e-05,
      "loss": 0.8833,
      "step": 302220
    },
    {
      "epoch": 1.0592408009056276,
      "grad_norm": 3.09375,
      "learning_rate": 3.594009205822566e-05,
      "loss": 0.8423,
      "step": 302230
    },
    {
      "epoch": 1.059275848412523,
      "grad_norm": 2.796875,
      "learning_rate": 3.593944302956196e-05,
      "loss": 0.8274,
      "step": 302240
    },
    {
      "epoch": 1.0593108959194188,
      "grad_norm": 3.078125,
      "learning_rate": 3.5938794000898254e-05,
      "loss": 0.9393,
      "step": 302250
    },
    {
      "epoch": 1.0593459434263144,
      "grad_norm": 2.765625,
      "learning_rate": 3.5938144972234555e-05,
      "loss": 0.9127,
      "step": 302260
    },
    {
      "epoch": 1.05938099093321,
      "grad_norm": 2.953125,
      "learning_rate": 3.593749594357086e-05,
      "loss": 0.8905,
      "step": 302270
    },
    {
      "epoch": 1.0594160384401055,
      "grad_norm": 3.140625,
      "learning_rate": 3.593684691490715e-05,
      "loss": 0.8731,
      "step": 302280
    },
    {
      "epoch": 1.0594510859470012,
      "grad_norm": 2.84375,
      "learning_rate": 3.593619788624345e-05,
      "loss": 0.8421,
      "step": 302290
    },
    {
      "epoch": 1.0594861334538968,
      "grad_norm": 3.078125,
      "learning_rate": 3.593554885757975e-05,
      "loss": 0.88,
      "step": 302300
    },
    {
      "epoch": 1.0595211809607923,
      "grad_norm": 2.5,
      "learning_rate": 3.593489982891605e-05,
      "loss": 0.8729,
      "step": 302310
    },
    {
      "epoch": 1.059556228467688,
      "grad_norm": 2.765625,
      "learning_rate": 3.593425080025234e-05,
      "loss": 0.9672,
      "step": 302320
    },
    {
      "epoch": 1.0595912759745836,
      "grad_norm": 3.015625,
      "learning_rate": 3.5933601771588645e-05,
      "loss": 0.8618,
      "step": 302330
    },
    {
      "epoch": 1.0596263234814791,
      "grad_norm": 3.046875,
      "learning_rate": 3.593295274292494e-05,
      "loss": 0.7968,
      "step": 302340
    },
    {
      "epoch": 1.0596613709883747,
      "grad_norm": 3.234375,
      "learning_rate": 3.593230371426124e-05,
      "loss": 0.9476,
      "step": 302350
    },
    {
      "epoch": 1.0596964184952704,
      "grad_norm": 2.453125,
      "learning_rate": 3.5931654685597535e-05,
      "loss": 0.7307,
      "step": 302360
    },
    {
      "epoch": 1.059731466002166,
      "grad_norm": 2.703125,
      "learning_rate": 3.5931005656933837e-05,
      "loss": 0.8076,
      "step": 302370
    },
    {
      "epoch": 1.0597665135090615,
      "grad_norm": 2.828125,
      "learning_rate": 3.593035662827013e-05,
      "loss": 0.9308,
      "step": 302380
    },
    {
      "epoch": 1.059801561015957,
      "grad_norm": 2.59375,
      "learning_rate": 3.592970759960643e-05,
      "loss": 0.8577,
      "step": 302390
    },
    {
      "epoch": 1.0598366085228528,
      "grad_norm": 3.15625,
      "learning_rate": 3.5929058570942734e-05,
      "loss": 0.8591,
      "step": 302400
    },
    {
      "epoch": 1.0598716560297483,
      "grad_norm": 3.171875,
      "learning_rate": 3.592840954227903e-05,
      "loss": 0.8699,
      "step": 302410
    },
    {
      "epoch": 1.0599067035366438,
      "grad_norm": 2.53125,
      "learning_rate": 3.592776051361533e-05,
      "loss": 0.9656,
      "step": 302420
    },
    {
      "epoch": 1.0599417510435396,
      "grad_norm": 2.71875,
      "learning_rate": 3.5927111484951625e-05,
      "loss": 0.8856,
      "step": 302430
    },
    {
      "epoch": 1.0599767985504351,
      "grad_norm": 2.4375,
      "learning_rate": 3.5926462456287926e-05,
      "loss": 0.8149,
      "step": 302440
    },
    {
      "epoch": 1.0600118460573307,
      "grad_norm": 3.140625,
      "learning_rate": 3.592581342762422e-05,
      "loss": 0.8526,
      "step": 302450
    },
    {
      "epoch": 1.0600468935642262,
      "grad_norm": 2.984375,
      "learning_rate": 3.5925164398960515e-05,
      "loss": 0.9402,
      "step": 302460
    },
    {
      "epoch": 1.060081941071122,
      "grad_norm": 2.796875,
      "learning_rate": 3.592451537029681e-05,
      "loss": 0.8045,
      "step": 302470
    },
    {
      "epoch": 1.0601169885780175,
      "grad_norm": 3.25,
      "learning_rate": 3.592386634163311e-05,
      "loss": 0.9076,
      "step": 302480
    },
    {
      "epoch": 1.060152036084913,
      "grad_norm": 2.875,
      "learning_rate": 3.592321731296941e-05,
      "loss": 0.8873,
      "step": 302490
    },
    {
      "epoch": 1.0601870835918088,
      "grad_norm": 3.5625,
      "learning_rate": 3.592256828430571e-05,
      "loss": 0.9466,
      "step": 302500
    },
    {
      "epoch": 1.0602221310987043,
      "grad_norm": 3.0625,
      "learning_rate": 3.592191925564201e-05,
      "loss": 0.9045,
      "step": 302510
    },
    {
      "epoch": 1.0602571786055999,
      "grad_norm": 3.21875,
      "learning_rate": 3.59212702269783e-05,
      "loss": 0.9002,
      "step": 302520
    },
    {
      "epoch": 1.0602922261124954,
      "grad_norm": 2.8125,
      "learning_rate": 3.5920621198314605e-05,
      "loss": 0.7676,
      "step": 302530
    },
    {
      "epoch": 1.0603272736193912,
      "grad_norm": 3.515625,
      "learning_rate": 3.59199721696509e-05,
      "loss": 0.885,
      "step": 302540
    },
    {
      "epoch": 1.0603623211262867,
      "grad_norm": 3.140625,
      "learning_rate": 3.59193231409872e-05,
      "loss": 0.8543,
      "step": 302550
    },
    {
      "epoch": 1.0603973686331822,
      "grad_norm": 3.34375,
      "learning_rate": 3.5918674112323495e-05,
      "loss": 0.8507,
      "step": 302560
    },
    {
      "epoch": 1.0604324161400778,
      "grad_norm": 2.625,
      "learning_rate": 3.5918025083659797e-05,
      "loss": 0.9152,
      "step": 302570
    },
    {
      "epoch": 1.0604674636469735,
      "grad_norm": 3.203125,
      "learning_rate": 3.591737605499609e-05,
      "loss": 0.9154,
      "step": 302580
    },
    {
      "epoch": 1.060502511153869,
      "grad_norm": 2.953125,
      "learning_rate": 3.591672702633239e-05,
      "loss": 0.8364,
      "step": 302590
    },
    {
      "epoch": 1.0605375586607646,
      "grad_norm": 2.5625,
      "learning_rate": 3.591607799766869e-05,
      "loss": 0.7785,
      "step": 302600
    },
    {
      "epoch": 1.0605726061676604,
      "grad_norm": 2.71875,
      "learning_rate": 3.591542896900499e-05,
      "loss": 0.9178,
      "step": 302610
    },
    {
      "epoch": 1.060607653674556,
      "grad_norm": 2.671875,
      "learning_rate": 3.591477994034128e-05,
      "loss": 0.8807,
      "step": 302620
    },
    {
      "epoch": 1.0606427011814514,
      "grad_norm": 3.015625,
      "learning_rate": 3.5914130911677585e-05,
      "loss": 0.8214,
      "step": 302630
    },
    {
      "epoch": 1.060677748688347,
      "grad_norm": 2.0625,
      "learning_rate": 3.5913481883013886e-05,
      "loss": 0.8133,
      "step": 302640
    },
    {
      "epoch": 1.0607127961952427,
      "grad_norm": 2.65625,
      "learning_rate": 3.591283285435018e-05,
      "loss": 0.8049,
      "step": 302650
    },
    {
      "epoch": 1.0607478437021383,
      "grad_norm": 3.0625,
      "learning_rate": 3.591218382568648e-05,
      "loss": 0.8648,
      "step": 302660
    },
    {
      "epoch": 1.0607828912090338,
      "grad_norm": 2.859375,
      "learning_rate": 3.5911534797022777e-05,
      "loss": 0.8205,
      "step": 302670
    },
    {
      "epoch": 1.0608179387159296,
      "grad_norm": 3.515625,
      "learning_rate": 3.591088576835908e-05,
      "loss": 0.8694,
      "step": 302680
    },
    {
      "epoch": 1.060852986222825,
      "grad_norm": 2.65625,
      "learning_rate": 3.591023673969537e-05,
      "loss": 0.8685,
      "step": 302690
    },
    {
      "epoch": 1.0608880337297206,
      "grad_norm": 2.328125,
      "learning_rate": 3.5909587711031674e-05,
      "loss": 0.8158,
      "step": 302700
    },
    {
      "epoch": 1.0609230812366162,
      "grad_norm": 3.203125,
      "learning_rate": 3.590893868236797e-05,
      "loss": 0.9034,
      "step": 302710
    },
    {
      "epoch": 1.060958128743512,
      "grad_norm": 2.6875,
      "learning_rate": 3.590828965370427e-05,
      "loss": 0.8199,
      "step": 302720
    },
    {
      "epoch": 1.0609931762504075,
      "grad_norm": 2.59375,
      "learning_rate": 3.5907640625040565e-05,
      "loss": 0.8514,
      "step": 302730
    },
    {
      "epoch": 1.061028223757303,
      "grad_norm": 2.578125,
      "learning_rate": 3.5906991596376866e-05,
      "loss": 0.8489,
      "step": 302740
    },
    {
      "epoch": 1.0610632712641985,
      "grad_norm": 2.828125,
      "learning_rate": 3.590634256771316e-05,
      "loss": 0.8504,
      "step": 302750
    },
    {
      "epoch": 1.0610983187710943,
      "grad_norm": 2.765625,
      "learning_rate": 3.590569353904946e-05,
      "loss": 0.7947,
      "step": 302760
    },
    {
      "epoch": 1.0611333662779898,
      "grad_norm": 2.71875,
      "learning_rate": 3.590504451038576e-05,
      "loss": 0.8811,
      "step": 302770
    },
    {
      "epoch": 1.0611684137848854,
      "grad_norm": 2.859375,
      "learning_rate": 3.590439548172206e-05,
      "loss": 0.7463,
      "step": 302780
    },
    {
      "epoch": 1.0612034612917811,
      "grad_norm": 3.4375,
      "learning_rate": 3.590374645305836e-05,
      "loss": 0.8979,
      "step": 302790
    },
    {
      "epoch": 1.0612385087986766,
      "grad_norm": 2.953125,
      "learning_rate": 3.5903097424394654e-05,
      "loss": 0.7576,
      "step": 302800
    },
    {
      "epoch": 1.0612735563055722,
      "grad_norm": 2.84375,
      "learning_rate": 3.5902448395730955e-05,
      "loss": 0.8171,
      "step": 302810
    },
    {
      "epoch": 1.0613086038124677,
      "grad_norm": 2.890625,
      "learning_rate": 3.590179936706725e-05,
      "loss": 0.8392,
      "step": 302820
    },
    {
      "epoch": 1.0613436513193635,
      "grad_norm": 3.15625,
      "learning_rate": 3.590115033840355e-05,
      "loss": 0.8687,
      "step": 302830
    },
    {
      "epoch": 1.061378698826259,
      "grad_norm": 2.6875,
      "learning_rate": 3.590050130973984e-05,
      "loss": 0.852,
      "step": 302840
    },
    {
      "epoch": 1.0614137463331546,
      "grad_norm": 2.28125,
      "learning_rate": 3.589985228107614e-05,
      "loss": 0.8185,
      "step": 302850
    },
    {
      "epoch": 1.06144879384005,
      "grad_norm": 3.65625,
      "learning_rate": 3.589920325241244e-05,
      "loss": 0.8032,
      "step": 302860
    },
    {
      "epoch": 1.0614838413469458,
      "grad_norm": 3.15625,
      "learning_rate": 3.5898554223748737e-05,
      "loss": 0.9014,
      "step": 302870
    },
    {
      "epoch": 1.0615188888538414,
      "grad_norm": 2.53125,
      "learning_rate": 3.589790519508504e-05,
      "loss": 0.9472,
      "step": 302880
    },
    {
      "epoch": 1.061553936360737,
      "grad_norm": 3.140625,
      "learning_rate": 3.589725616642133e-05,
      "loss": 0.8361,
      "step": 302890
    },
    {
      "epoch": 1.0615889838676327,
      "grad_norm": 2.546875,
      "learning_rate": 3.5896607137757634e-05,
      "loss": 0.8261,
      "step": 302900
    },
    {
      "epoch": 1.0616240313745282,
      "grad_norm": 2.625,
      "learning_rate": 3.589595810909393e-05,
      "loss": 0.8416,
      "step": 302910
    },
    {
      "epoch": 1.0616590788814237,
      "grad_norm": 3.078125,
      "learning_rate": 3.589530908043023e-05,
      "loss": 0.9669,
      "step": 302920
    },
    {
      "epoch": 1.0616941263883193,
      "grad_norm": 2.5,
      "learning_rate": 3.5894660051766525e-05,
      "loss": 0.8378,
      "step": 302930
    },
    {
      "epoch": 1.061729173895215,
      "grad_norm": 2.75,
      "learning_rate": 3.5894011023102826e-05,
      "loss": 0.7787,
      "step": 302940
    },
    {
      "epoch": 1.0617642214021106,
      "grad_norm": 3.703125,
      "learning_rate": 3.589336199443912e-05,
      "loss": 0.9125,
      "step": 302950
    },
    {
      "epoch": 1.061799268909006,
      "grad_norm": 3.171875,
      "learning_rate": 3.589271296577542e-05,
      "loss": 0.9335,
      "step": 302960
    },
    {
      "epoch": 1.0618343164159016,
      "grad_norm": 2.828125,
      "learning_rate": 3.5892063937111717e-05,
      "loss": 0.8541,
      "step": 302970
    },
    {
      "epoch": 1.0618693639227974,
      "grad_norm": 3.125,
      "learning_rate": 3.589141490844802e-05,
      "loss": 0.9462,
      "step": 302980
    },
    {
      "epoch": 1.061904411429693,
      "grad_norm": 3.109375,
      "learning_rate": 3.589076587978432e-05,
      "loss": 0.8835,
      "step": 302990
    },
    {
      "epoch": 1.0619394589365885,
      "grad_norm": 2.875,
      "learning_rate": 3.5890116851120614e-05,
      "loss": 0.8595,
      "step": 303000
    },
    {
      "epoch": 1.0619745064434842,
      "grad_norm": 3.15625,
      "learning_rate": 3.5889467822456915e-05,
      "loss": 0.8519,
      "step": 303010
    },
    {
      "epoch": 1.0620095539503798,
      "grad_norm": 2.75,
      "learning_rate": 3.588881879379321e-05,
      "loss": 0.8424,
      "step": 303020
    },
    {
      "epoch": 1.0620446014572753,
      "grad_norm": 3.0,
      "learning_rate": 3.588816976512951e-05,
      "loss": 0.8824,
      "step": 303030
    },
    {
      "epoch": 1.0620796489641708,
      "grad_norm": 3.0,
      "learning_rate": 3.5887520736465806e-05,
      "loss": 0.8512,
      "step": 303040
    },
    {
      "epoch": 1.0621146964710666,
      "grad_norm": 3.0625,
      "learning_rate": 3.588687170780211e-05,
      "loss": 0.8816,
      "step": 303050
    },
    {
      "epoch": 1.0621497439779621,
      "grad_norm": 2.71875,
      "learning_rate": 3.58862226791384e-05,
      "loss": 0.8318,
      "step": 303060
    },
    {
      "epoch": 1.0621847914848577,
      "grad_norm": 3.046875,
      "learning_rate": 3.58855736504747e-05,
      "loss": 0.8883,
      "step": 303070
    },
    {
      "epoch": 1.0622198389917534,
      "grad_norm": 2.859375,
      "learning_rate": 3.5884924621811e-05,
      "loss": 0.9117,
      "step": 303080
    },
    {
      "epoch": 1.062254886498649,
      "grad_norm": 3.140625,
      "learning_rate": 3.58842755931473e-05,
      "loss": 0.8269,
      "step": 303090
    },
    {
      "epoch": 1.0622899340055445,
      "grad_norm": 3.15625,
      "learning_rate": 3.5883626564483594e-05,
      "loss": 0.8544,
      "step": 303100
    },
    {
      "epoch": 1.06232498151244,
      "grad_norm": 3.09375,
      "learning_rate": 3.5882977535819895e-05,
      "loss": 0.9428,
      "step": 303110
    },
    {
      "epoch": 1.0623600290193358,
      "grad_norm": 2.953125,
      "learning_rate": 3.588232850715619e-05,
      "loss": 0.8465,
      "step": 303120
    },
    {
      "epoch": 1.0623950765262313,
      "grad_norm": 3.015625,
      "learning_rate": 3.588167947849249e-05,
      "loss": 0.8337,
      "step": 303130
    },
    {
      "epoch": 1.0624301240331269,
      "grad_norm": 2.546875,
      "learning_rate": 3.588103044982879e-05,
      "loss": 0.8448,
      "step": 303140
    },
    {
      "epoch": 1.0624651715400224,
      "grad_norm": 2.90625,
      "learning_rate": 3.588038142116509e-05,
      "loss": 0.8828,
      "step": 303150
    },
    {
      "epoch": 1.0625002190469182,
      "grad_norm": 3.0625,
      "learning_rate": 3.587973239250139e-05,
      "loss": 0.9677,
      "step": 303160
    },
    {
      "epoch": 1.0625352665538137,
      "grad_norm": 3.03125,
      "learning_rate": 3.587908336383768e-05,
      "loss": 0.8487,
      "step": 303170
    },
    {
      "epoch": 1.0625703140607092,
      "grad_norm": 3.609375,
      "learning_rate": 3.5878434335173985e-05,
      "loss": 0.8796,
      "step": 303180
    },
    {
      "epoch": 1.062605361567605,
      "grad_norm": 2.5,
      "learning_rate": 3.587778530651028e-05,
      "loss": 0.8592,
      "step": 303190
    },
    {
      "epoch": 1.0626404090745005,
      "grad_norm": 2.8125,
      "learning_rate": 3.587713627784658e-05,
      "loss": 0.7907,
      "step": 303200
    },
    {
      "epoch": 1.062675456581396,
      "grad_norm": 2.890625,
      "learning_rate": 3.5876487249182875e-05,
      "loss": 0.8513,
      "step": 303210
    },
    {
      "epoch": 1.0627105040882916,
      "grad_norm": 3.078125,
      "learning_rate": 3.587583822051917e-05,
      "loss": 0.857,
      "step": 303220
    },
    {
      "epoch": 1.0627455515951874,
      "grad_norm": 3.453125,
      "learning_rate": 3.587518919185547e-05,
      "loss": 0.884,
      "step": 303230
    },
    {
      "epoch": 1.062780599102083,
      "grad_norm": 2.6875,
      "learning_rate": 3.5874540163191766e-05,
      "loss": 0.8614,
      "step": 303240
    },
    {
      "epoch": 1.0628156466089784,
      "grad_norm": 3.125,
      "learning_rate": 3.587389113452807e-05,
      "loss": 0.853,
      "step": 303250
    },
    {
      "epoch": 1.0628506941158742,
      "grad_norm": 3.171875,
      "learning_rate": 3.587324210586436e-05,
      "loss": 0.8639,
      "step": 303260
    },
    {
      "epoch": 1.0628857416227697,
      "grad_norm": 2.96875,
      "learning_rate": 3.587259307720066e-05,
      "loss": 0.8432,
      "step": 303270
    },
    {
      "epoch": 1.0629207891296653,
      "grad_norm": 3.125,
      "learning_rate": 3.587194404853696e-05,
      "loss": 0.8831,
      "step": 303280
    },
    {
      "epoch": 1.0629558366365608,
      "grad_norm": 2.96875,
      "learning_rate": 3.587129501987326e-05,
      "loss": 0.9565,
      "step": 303290
    },
    {
      "epoch": 1.0629908841434565,
      "grad_norm": 2.765625,
      "learning_rate": 3.5870645991209554e-05,
      "loss": 0.8482,
      "step": 303300
    },
    {
      "epoch": 1.063025931650352,
      "grad_norm": 2.734375,
      "learning_rate": 3.5869996962545855e-05,
      "loss": 0.8983,
      "step": 303310
    },
    {
      "epoch": 1.0630609791572476,
      "grad_norm": 2.953125,
      "learning_rate": 3.586934793388215e-05,
      "loss": 0.9347,
      "step": 303320
    },
    {
      "epoch": 1.0630960266641432,
      "grad_norm": 2.640625,
      "learning_rate": 3.586869890521845e-05,
      "loss": 0.8935,
      "step": 303330
    },
    {
      "epoch": 1.063131074171039,
      "grad_norm": 3.046875,
      "learning_rate": 3.5868049876554746e-05,
      "loss": 0.9633,
      "step": 303340
    },
    {
      "epoch": 1.0631661216779345,
      "grad_norm": 2.84375,
      "learning_rate": 3.586740084789105e-05,
      "loss": 0.8976,
      "step": 303350
    },
    {
      "epoch": 1.06320116918483,
      "grad_norm": 2.609375,
      "learning_rate": 3.586675181922735e-05,
      "loss": 0.8213,
      "step": 303360
    },
    {
      "epoch": 1.0632362166917257,
      "grad_norm": 2.796875,
      "learning_rate": 3.586610279056364e-05,
      "loss": 0.8647,
      "step": 303370
    },
    {
      "epoch": 1.0632712641986213,
      "grad_norm": 2.9375,
      "learning_rate": 3.5865453761899945e-05,
      "loss": 0.9129,
      "step": 303380
    },
    {
      "epoch": 1.0633063117055168,
      "grad_norm": 3.203125,
      "learning_rate": 3.586480473323624e-05,
      "loss": 0.8433,
      "step": 303390
    },
    {
      "epoch": 1.0633413592124124,
      "grad_norm": 3.09375,
      "learning_rate": 3.586415570457254e-05,
      "loss": 0.9183,
      "step": 303400
    },
    {
      "epoch": 1.063376406719308,
      "grad_norm": 3.34375,
      "learning_rate": 3.5863506675908835e-05,
      "loss": 0.8069,
      "step": 303410
    },
    {
      "epoch": 1.0634114542262036,
      "grad_norm": 3.484375,
      "learning_rate": 3.586285764724514e-05,
      "loss": 0.856,
      "step": 303420
    },
    {
      "epoch": 1.0634465017330992,
      "grad_norm": 3.078125,
      "learning_rate": 3.586220861858143e-05,
      "loss": 0.7936,
      "step": 303430
    },
    {
      "epoch": 1.0634815492399947,
      "grad_norm": 2.6875,
      "learning_rate": 3.586155958991773e-05,
      "loss": 0.9001,
      "step": 303440
    },
    {
      "epoch": 1.0635165967468905,
      "grad_norm": 2.75,
      "learning_rate": 3.586091056125403e-05,
      "loss": 0.805,
      "step": 303450
    },
    {
      "epoch": 1.063551644253786,
      "grad_norm": 2.75,
      "learning_rate": 3.586026153259033e-05,
      "loss": 0.9008,
      "step": 303460
    },
    {
      "epoch": 1.0635866917606815,
      "grad_norm": 2.5,
      "learning_rate": 3.585961250392662e-05,
      "loss": 0.8677,
      "step": 303470
    },
    {
      "epoch": 1.0636217392675773,
      "grad_norm": 24.75,
      "learning_rate": 3.5858963475262925e-05,
      "loss": 0.8993,
      "step": 303480
    },
    {
      "epoch": 1.0636567867744728,
      "grad_norm": 2.703125,
      "learning_rate": 3.585831444659922e-05,
      "loss": 0.8097,
      "step": 303490
    },
    {
      "epoch": 1.0636918342813684,
      "grad_norm": 3.4375,
      "learning_rate": 3.585766541793552e-05,
      "loss": 0.9154,
      "step": 303500
    },
    {
      "epoch": 1.063726881788264,
      "grad_norm": 3.125,
      "learning_rate": 3.585701638927182e-05,
      "loss": 0.8551,
      "step": 303510
    },
    {
      "epoch": 1.0637619292951597,
      "grad_norm": 2.953125,
      "learning_rate": 3.585636736060812e-05,
      "loss": 0.8901,
      "step": 303520
    },
    {
      "epoch": 1.0637969768020552,
      "grad_norm": 3.046875,
      "learning_rate": 3.585571833194442e-05,
      "loss": 0.8459,
      "step": 303530
    },
    {
      "epoch": 1.0638320243089507,
      "grad_norm": 2.96875,
      "learning_rate": 3.585506930328071e-05,
      "loss": 0.9546,
      "step": 303540
    },
    {
      "epoch": 1.0638670718158463,
      "grad_norm": 2.484375,
      "learning_rate": 3.5854420274617014e-05,
      "loss": 0.8516,
      "step": 303550
    },
    {
      "epoch": 1.063902119322742,
      "grad_norm": 2.421875,
      "learning_rate": 3.585377124595331e-05,
      "loss": 0.7918,
      "step": 303560
    },
    {
      "epoch": 1.0639371668296376,
      "grad_norm": 3.0,
      "learning_rate": 3.585312221728961e-05,
      "loss": 0.8351,
      "step": 303570
    },
    {
      "epoch": 1.063972214336533,
      "grad_norm": 2.5,
      "learning_rate": 3.5852473188625905e-05,
      "loss": 0.752,
      "step": 303580
    },
    {
      "epoch": 1.0640072618434289,
      "grad_norm": 2.921875,
      "learning_rate": 3.58518241599622e-05,
      "loss": 0.856,
      "step": 303590
    },
    {
      "epoch": 1.0640423093503244,
      "grad_norm": 3.234375,
      "learning_rate": 3.58511751312985e-05,
      "loss": 0.8805,
      "step": 303600
    },
    {
      "epoch": 1.06407735685722,
      "grad_norm": 2.546875,
      "learning_rate": 3.5850526102634795e-05,
      "loss": 0.9256,
      "step": 303610
    },
    {
      "epoch": 1.0641124043641155,
      "grad_norm": 3.0625,
      "learning_rate": 3.58498770739711e-05,
      "loss": 0.9389,
      "step": 303620
    },
    {
      "epoch": 1.0641474518710112,
      "grad_norm": 2.859375,
      "learning_rate": 3.584922804530739e-05,
      "loss": 0.8048,
      "step": 303630
    },
    {
      "epoch": 1.0641824993779068,
      "grad_norm": 3.265625,
      "learning_rate": 3.584857901664369e-05,
      "loss": 0.8109,
      "step": 303640
    },
    {
      "epoch": 1.0642175468848023,
      "grad_norm": 2.859375,
      "learning_rate": 3.584792998797999e-05,
      "loss": 0.9551,
      "step": 303650
    },
    {
      "epoch": 1.0642525943916978,
      "grad_norm": 2.953125,
      "learning_rate": 3.584728095931629e-05,
      "loss": 0.933,
      "step": 303660
    },
    {
      "epoch": 1.0642876418985936,
      "grad_norm": 2.59375,
      "learning_rate": 3.584663193065258e-05,
      "loss": 0.8985,
      "step": 303670
    },
    {
      "epoch": 1.0643226894054891,
      "grad_norm": 2.765625,
      "learning_rate": 3.5845982901988885e-05,
      "loss": 0.8265,
      "step": 303680
    },
    {
      "epoch": 1.0643577369123847,
      "grad_norm": 3.109375,
      "learning_rate": 3.584533387332518e-05,
      "loss": 0.9324,
      "step": 303690
    },
    {
      "epoch": 1.0643927844192804,
      "grad_norm": 2.640625,
      "learning_rate": 3.584468484466148e-05,
      "loss": 0.8256,
      "step": 303700
    },
    {
      "epoch": 1.064427831926176,
      "grad_norm": 3.0625,
      "learning_rate": 3.5844035815997775e-05,
      "loss": 0.8561,
      "step": 303710
    },
    {
      "epoch": 1.0644628794330715,
      "grad_norm": 2.859375,
      "learning_rate": 3.584338678733408e-05,
      "loss": 0.7644,
      "step": 303720
    },
    {
      "epoch": 1.064497926939967,
      "grad_norm": 2.875,
      "learning_rate": 3.584273775867038e-05,
      "loss": 0.8037,
      "step": 303730
    },
    {
      "epoch": 1.0645329744468628,
      "grad_norm": 2.84375,
      "learning_rate": 3.584208873000667e-05,
      "loss": 0.8933,
      "step": 303740
    },
    {
      "epoch": 1.0645680219537583,
      "grad_norm": 3.71875,
      "learning_rate": 3.5841439701342974e-05,
      "loss": 0.9617,
      "step": 303750
    },
    {
      "epoch": 1.0646030694606539,
      "grad_norm": 3.140625,
      "learning_rate": 3.584079067267927e-05,
      "loss": 0.8608,
      "step": 303760
    },
    {
      "epoch": 1.0646381169675494,
      "grad_norm": 2.796875,
      "learning_rate": 3.584014164401557e-05,
      "loss": 0.9563,
      "step": 303770
    },
    {
      "epoch": 1.0646731644744452,
      "grad_norm": 2.796875,
      "learning_rate": 3.5839492615351865e-05,
      "loss": 0.7956,
      "step": 303780
    },
    {
      "epoch": 1.0647082119813407,
      "grad_norm": 3.515625,
      "learning_rate": 3.5838843586688166e-05,
      "loss": 0.8455,
      "step": 303790
    },
    {
      "epoch": 1.0647432594882362,
      "grad_norm": 2.765625,
      "learning_rate": 3.583819455802446e-05,
      "loss": 0.8388,
      "step": 303800
    },
    {
      "epoch": 1.064778306995132,
      "grad_norm": 3.21875,
      "learning_rate": 3.583754552936076e-05,
      "loss": 0.8466,
      "step": 303810
    },
    {
      "epoch": 1.0648133545020275,
      "grad_norm": 3.109375,
      "learning_rate": 3.583689650069706e-05,
      "loss": 0.8586,
      "step": 303820
    },
    {
      "epoch": 1.064848402008923,
      "grad_norm": 2.90625,
      "learning_rate": 3.583624747203336e-05,
      "loss": 0.8717,
      "step": 303830
    },
    {
      "epoch": 1.0648834495158186,
      "grad_norm": 3.140625,
      "learning_rate": 3.583559844336965e-05,
      "loss": 0.9046,
      "step": 303840
    },
    {
      "epoch": 1.0649184970227143,
      "grad_norm": 2.625,
      "learning_rate": 3.5834949414705954e-05,
      "loss": 0.8732,
      "step": 303850
    },
    {
      "epoch": 1.0649535445296099,
      "grad_norm": 2.875,
      "learning_rate": 3.583430038604225e-05,
      "loss": 0.8877,
      "step": 303860
    },
    {
      "epoch": 1.0649885920365054,
      "grad_norm": 2.9375,
      "learning_rate": 3.583365135737855e-05,
      "loss": 0.8259,
      "step": 303870
    },
    {
      "epoch": 1.0650236395434012,
      "grad_norm": 3.359375,
      "learning_rate": 3.583300232871485e-05,
      "loss": 0.8893,
      "step": 303880
    },
    {
      "epoch": 1.0650586870502967,
      "grad_norm": 3.5,
      "learning_rate": 3.5832353300051146e-05,
      "loss": 0.8505,
      "step": 303890
    },
    {
      "epoch": 1.0650937345571923,
      "grad_norm": 3.015625,
      "learning_rate": 3.583170427138745e-05,
      "loss": 0.8684,
      "step": 303900
    },
    {
      "epoch": 1.0651287820640878,
      "grad_norm": 2.8125,
      "learning_rate": 3.583105524272374e-05,
      "loss": 0.8766,
      "step": 303910
    },
    {
      "epoch": 1.0651638295709835,
      "grad_norm": 3.125,
      "learning_rate": 3.5830406214060044e-05,
      "loss": 0.946,
      "step": 303920
    },
    {
      "epoch": 1.065198877077879,
      "grad_norm": 3.140625,
      "learning_rate": 3.582975718539634e-05,
      "loss": 0.857,
      "step": 303930
    },
    {
      "epoch": 1.0652339245847746,
      "grad_norm": 2.546875,
      "learning_rate": 3.582910815673264e-05,
      "loss": 0.8343,
      "step": 303940
    },
    {
      "epoch": 1.0652689720916704,
      "grad_norm": 2.953125,
      "learning_rate": 3.5828459128068934e-05,
      "loss": 0.8577,
      "step": 303950
    },
    {
      "epoch": 1.065304019598566,
      "grad_norm": 2.875,
      "learning_rate": 3.5827810099405236e-05,
      "loss": 0.9225,
      "step": 303960
    },
    {
      "epoch": 1.0653390671054614,
      "grad_norm": 2.484375,
      "learning_rate": 3.582716107074153e-05,
      "loss": 0.82,
      "step": 303970
    },
    {
      "epoch": 1.065374114612357,
      "grad_norm": 3.390625,
      "learning_rate": 3.5826512042077825e-05,
      "loss": 0.8247,
      "step": 303980
    },
    {
      "epoch": 1.0654091621192527,
      "grad_norm": 2.796875,
      "learning_rate": 3.5825863013414126e-05,
      "loss": 0.9074,
      "step": 303990
    },
    {
      "epoch": 1.0654442096261483,
      "grad_norm": 3.171875,
      "learning_rate": 3.582521398475042e-05,
      "loss": 0.8841,
      "step": 304000
    },
    {
      "epoch": 1.0654792571330438,
      "grad_norm": 2.734375,
      "learning_rate": 3.582456495608672e-05,
      "loss": 0.8095,
      "step": 304010
    },
    {
      "epoch": 1.0655143046399393,
      "grad_norm": 2.859375,
      "learning_rate": 3.582391592742302e-05,
      "loss": 0.8643,
      "step": 304020
    },
    {
      "epoch": 1.065549352146835,
      "grad_norm": 2.875,
      "learning_rate": 3.582326689875932e-05,
      "loss": 0.9015,
      "step": 304030
    },
    {
      "epoch": 1.0655843996537306,
      "grad_norm": 2.953125,
      "learning_rate": 3.582261787009561e-05,
      "loss": 0.8439,
      "step": 304040
    },
    {
      "epoch": 1.0656194471606262,
      "grad_norm": 2.65625,
      "learning_rate": 3.5821968841431914e-05,
      "loss": 0.9955,
      "step": 304050
    },
    {
      "epoch": 1.065654494667522,
      "grad_norm": 2.890625,
      "learning_rate": 3.582131981276821e-05,
      "loss": 0.8935,
      "step": 304060
    },
    {
      "epoch": 1.0656895421744175,
      "grad_norm": 2.796875,
      "learning_rate": 3.582067078410451e-05,
      "loss": 0.8853,
      "step": 304070
    },
    {
      "epoch": 1.065724589681313,
      "grad_norm": 2.625,
      "learning_rate": 3.5820021755440805e-05,
      "loss": 0.8358,
      "step": 304080
    },
    {
      "epoch": 1.0657596371882085,
      "grad_norm": 2.8125,
      "learning_rate": 3.5819372726777106e-05,
      "loss": 0.9544,
      "step": 304090
    },
    {
      "epoch": 1.0657946846951043,
      "grad_norm": 3.046875,
      "learning_rate": 3.581872369811341e-05,
      "loss": 0.7911,
      "step": 304100
    },
    {
      "epoch": 1.0658297322019998,
      "grad_norm": 2.859375,
      "learning_rate": 3.58180746694497e-05,
      "loss": 0.8836,
      "step": 304110
    },
    {
      "epoch": 1.0658647797088954,
      "grad_norm": 2.734375,
      "learning_rate": 3.5817425640786004e-05,
      "loss": 0.7955,
      "step": 304120
    },
    {
      "epoch": 1.065899827215791,
      "grad_norm": 2.203125,
      "learning_rate": 3.58167766121223e-05,
      "loss": 0.9167,
      "step": 304130
    },
    {
      "epoch": 1.0659348747226867,
      "grad_norm": 3.0625,
      "learning_rate": 3.58161275834586e-05,
      "loss": 0.857,
      "step": 304140
    },
    {
      "epoch": 1.0659699222295822,
      "grad_norm": 2.921875,
      "learning_rate": 3.5815478554794894e-05,
      "loss": 0.8152,
      "step": 304150
    },
    {
      "epoch": 1.0660049697364777,
      "grad_norm": 2.609375,
      "learning_rate": 3.5814829526131196e-05,
      "loss": 0.9694,
      "step": 304160
    },
    {
      "epoch": 1.0660400172433735,
      "grad_norm": 2.734375,
      "learning_rate": 3.581418049746749e-05,
      "loss": 0.8827,
      "step": 304170
    },
    {
      "epoch": 1.066075064750269,
      "grad_norm": 2.953125,
      "learning_rate": 3.581353146880379e-05,
      "loss": 0.8481,
      "step": 304180
    },
    {
      "epoch": 1.0661101122571646,
      "grad_norm": 3.140625,
      "learning_rate": 3.5812882440140086e-05,
      "loss": 0.8273,
      "step": 304190
    },
    {
      "epoch": 1.06614515976406,
      "grad_norm": 3.359375,
      "learning_rate": 3.581223341147639e-05,
      "loss": 0.8366,
      "step": 304200
    },
    {
      "epoch": 1.0661802072709559,
      "grad_norm": 2.8125,
      "learning_rate": 3.581158438281268e-05,
      "loss": 0.8835,
      "step": 304210
    },
    {
      "epoch": 1.0662152547778514,
      "grad_norm": 2.625,
      "learning_rate": 3.5810935354148984e-05,
      "loss": 0.8588,
      "step": 304220
    },
    {
      "epoch": 1.066250302284747,
      "grad_norm": 2.6875,
      "learning_rate": 3.5810286325485285e-05,
      "loss": 0.871,
      "step": 304230
    },
    {
      "epoch": 1.0662853497916425,
      "grad_norm": 2.484375,
      "learning_rate": 3.580963729682158e-05,
      "loss": 0.8707,
      "step": 304240
    },
    {
      "epoch": 1.0663203972985382,
      "grad_norm": 2.875,
      "learning_rate": 3.580898826815788e-05,
      "loss": 0.831,
      "step": 304250
    },
    {
      "epoch": 1.0663554448054338,
      "grad_norm": 2.90625,
      "learning_rate": 3.5808339239494176e-05,
      "loss": 0.8587,
      "step": 304260
    },
    {
      "epoch": 1.0663904923123293,
      "grad_norm": 2.890625,
      "learning_rate": 3.580769021083048e-05,
      "loss": 0.8984,
      "step": 304270
    },
    {
      "epoch": 1.066425539819225,
      "grad_norm": 2.875,
      "learning_rate": 3.580704118216677e-05,
      "loss": 0.9292,
      "step": 304280
    },
    {
      "epoch": 1.0664605873261206,
      "grad_norm": 2.609375,
      "learning_rate": 3.580639215350307e-05,
      "loss": 0.7711,
      "step": 304290
    },
    {
      "epoch": 1.0664956348330161,
      "grad_norm": 2.65625,
      "learning_rate": 3.580574312483937e-05,
      "loss": 0.8253,
      "step": 304300
    },
    {
      "epoch": 1.0665306823399117,
      "grad_norm": 3.265625,
      "learning_rate": 3.580509409617567e-05,
      "loss": 0.8657,
      "step": 304310
    },
    {
      "epoch": 1.0665657298468074,
      "grad_norm": 3.140625,
      "learning_rate": 3.5804445067511964e-05,
      "loss": 0.8527,
      "step": 304320
    },
    {
      "epoch": 1.066600777353703,
      "grad_norm": 3.171875,
      "learning_rate": 3.5803796038848265e-05,
      "loss": 0.9776,
      "step": 304330
    },
    {
      "epoch": 1.0666358248605985,
      "grad_norm": 3.078125,
      "learning_rate": 3.580314701018456e-05,
      "loss": 0.9104,
      "step": 304340
    },
    {
      "epoch": 1.066670872367494,
      "grad_norm": 2.890625,
      "learning_rate": 3.5802497981520854e-05,
      "loss": 0.8326,
      "step": 304350
    },
    {
      "epoch": 1.0667059198743898,
      "grad_norm": 3.1875,
      "learning_rate": 3.5801848952857156e-05,
      "loss": 0.8399,
      "step": 304360
    },
    {
      "epoch": 1.0667409673812853,
      "grad_norm": 2.8125,
      "learning_rate": 3.580119992419345e-05,
      "loss": 0.8852,
      "step": 304370
    },
    {
      "epoch": 1.0667760148881809,
      "grad_norm": 3.125,
      "learning_rate": 3.580055089552975e-05,
      "loss": 0.8826,
      "step": 304380
    },
    {
      "epoch": 1.0668110623950766,
      "grad_norm": 2.71875,
      "learning_rate": 3.5799901866866046e-05,
      "loss": 0.9332,
      "step": 304390
    },
    {
      "epoch": 1.0668461099019722,
      "grad_norm": 2.96875,
      "learning_rate": 3.579925283820235e-05,
      "loss": 0.96,
      "step": 304400
    },
    {
      "epoch": 1.0668811574088677,
      "grad_norm": 2.546875,
      "learning_rate": 3.579860380953864e-05,
      "loss": 0.8751,
      "step": 304410
    },
    {
      "epoch": 1.0669162049157632,
      "grad_norm": 2.59375,
      "learning_rate": 3.5797954780874944e-05,
      "loss": 0.9354,
      "step": 304420
    },
    {
      "epoch": 1.066951252422659,
      "grad_norm": 2.921875,
      "learning_rate": 3.579730575221124e-05,
      "loss": 0.8591,
      "step": 304430
    },
    {
      "epoch": 1.0669862999295545,
      "grad_norm": 2.625,
      "learning_rate": 3.579665672354754e-05,
      "loss": 0.8342,
      "step": 304440
    },
    {
      "epoch": 1.06702134743645,
      "grad_norm": 3.0,
      "learning_rate": 3.5796007694883834e-05,
      "loss": 0.8054,
      "step": 304450
    },
    {
      "epoch": 1.0670563949433456,
      "grad_norm": 3.046875,
      "learning_rate": 3.5795358666220136e-05,
      "loss": 0.8887,
      "step": 304460
    },
    {
      "epoch": 1.0670914424502413,
      "grad_norm": 3.21875,
      "learning_rate": 3.579470963755644e-05,
      "loss": 0.9443,
      "step": 304470
    },
    {
      "epoch": 1.0671264899571369,
      "grad_norm": 3.234375,
      "learning_rate": 3.579406060889273e-05,
      "loss": 0.868,
      "step": 304480
    },
    {
      "epoch": 1.0671615374640324,
      "grad_norm": 2.90625,
      "learning_rate": 3.579341158022903e-05,
      "loss": 0.8865,
      "step": 304490
    },
    {
      "epoch": 1.0671965849709282,
      "grad_norm": 3.125,
      "learning_rate": 3.579276255156533e-05,
      "loss": 0.897,
      "step": 304500
    },
    {
      "epoch": 1.0672316324778237,
      "grad_norm": 3.03125,
      "learning_rate": 3.579211352290163e-05,
      "loss": 0.8866,
      "step": 304510
    },
    {
      "epoch": 1.0672666799847192,
      "grad_norm": 2.953125,
      "learning_rate": 3.5791464494237924e-05,
      "loss": 0.8371,
      "step": 304520
    },
    {
      "epoch": 1.0673017274916148,
      "grad_norm": 3.0625,
      "learning_rate": 3.5790815465574225e-05,
      "loss": 0.8487,
      "step": 304530
    },
    {
      "epoch": 1.0673367749985105,
      "grad_norm": 3.171875,
      "learning_rate": 3.579016643691052e-05,
      "loss": 0.8632,
      "step": 304540
    },
    {
      "epoch": 1.067371822505406,
      "grad_norm": 3.015625,
      "learning_rate": 3.578951740824682e-05,
      "loss": 0.8112,
      "step": 304550
    },
    {
      "epoch": 1.0674068700123016,
      "grad_norm": 2.734375,
      "learning_rate": 3.5788868379583116e-05,
      "loss": 0.8055,
      "step": 304560
    },
    {
      "epoch": 1.0674419175191974,
      "grad_norm": 3.25,
      "learning_rate": 3.578821935091942e-05,
      "loss": 0.8696,
      "step": 304570
    },
    {
      "epoch": 1.067476965026093,
      "grad_norm": 3.859375,
      "learning_rate": 3.578757032225571e-05,
      "loss": 0.8945,
      "step": 304580
    },
    {
      "epoch": 1.0675120125329884,
      "grad_norm": 3.203125,
      "learning_rate": 3.578692129359201e-05,
      "loss": 0.8217,
      "step": 304590
    },
    {
      "epoch": 1.067547060039884,
      "grad_norm": 2.859375,
      "learning_rate": 3.5786272264928314e-05,
      "loss": 0.7967,
      "step": 304600
    },
    {
      "epoch": 1.0675821075467797,
      "grad_norm": 2.78125,
      "learning_rate": 3.578562323626461e-05,
      "loss": 0.8181,
      "step": 304610
    },
    {
      "epoch": 1.0676171550536753,
      "grad_norm": 2.765625,
      "learning_rate": 3.578497420760091e-05,
      "loss": 0.7438,
      "step": 304620
    },
    {
      "epoch": 1.0676522025605708,
      "grad_norm": 2.78125,
      "learning_rate": 3.5784325178937205e-05,
      "loss": 0.799,
      "step": 304630
    },
    {
      "epoch": 1.0676872500674666,
      "grad_norm": 2.671875,
      "learning_rate": 3.5783676150273506e-05,
      "loss": 0.9036,
      "step": 304640
    },
    {
      "epoch": 1.067722297574362,
      "grad_norm": 2.984375,
      "learning_rate": 3.57830271216098e-05,
      "loss": 0.9453,
      "step": 304650
    },
    {
      "epoch": 1.0677573450812576,
      "grad_norm": 3.390625,
      "learning_rate": 3.57823780929461e-05,
      "loss": 0.934,
      "step": 304660
    },
    {
      "epoch": 1.0677923925881532,
      "grad_norm": 3.21875,
      "learning_rate": 3.57817290642824e-05,
      "loss": 0.9206,
      "step": 304670
    },
    {
      "epoch": 1.067827440095049,
      "grad_norm": 3.046875,
      "learning_rate": 3.57810800356187e-05,
      "loss": 0.96,
      "step": 304680
    },
    {
      "epoch": 1.0678624876019445,
      "grad_norm": 2.875,
      "learning_rate": 3.578043100695499e-05,
      "loss": 0.9031,
      "step": 304690
    },
    {
      "epoch": 1.06789753510884,
      "grad_norm": 3.046875,
      "learning_rate": 3.5779781978291294e-05,
      "loss": 0.884,
      "step": 304700
    },
    {
      "epoch": 1.0679325826157355,
      "grad_norm": 3.421875,
      "learning_rate": 3.577913294962759e-05,
      "loss": 0.9056,
      "step": 304710
    },
    {
      "epoch": 1.0679676301226313,
      "grad_norm": 2.671875,
      "learning_rate": 3.5778483920963884e-05,
      "loss": 0.8217,
      "step": 304720
    },
    {
      "epoch": 1.0680026776295268,
      "grad_norm": 3.078125,
      "learning_rate": 3.5777834892300185e-05,
      "loss": 0.8316,
      "step": 304730
    },
    {
      "epoch": 1.0680377251364224,
      "grad_norm": 3.0,
      "learning_rate": 3.577718586363648e-05,
      "loss": 0.8516,
      "step": 304740
    },
    {
      "epoch": 1.0680727726433181,
      "grad_norm": 2.671875,
      "learning_rate": 3.577653683497278e-05,
      "loss": 0.802,
      "step": 304750
    },
    {
      "epoch": 1.0681078201502137,
      "grad_norm": 2.734375,
      "learning_rate": 3.5775887806309076e-05,
      "loss": 0.8668,
      "step": 304760
    },
    {
      "epoch": 1.0681428676571092,
      "grad_norm": 3.25,
      "learning_rate": 3.577523877764538e-05,
      "loss": 0.9672,
      "step": 304770
    },
    {
      "epoch": 1.0681779151640047,
      "grad_norm": 2.9375,
      "learning_rate": 3.577458974898167e-05,
      "loss": 0.8978,
      "step": 304780
    },
    {
      "epoch": 1.0682129626709005,
      "grad_norm": 2.953125,
      "learning_rate": 3.577394072031797e-05,
      "loss": 0.8657,
      "step": 304790
    },
    {
      "epoch": 1.068248010177796,
      "grad_norm": 2.765625,
      "learning_rate": 3.577329169165427e-05,
      "loss": 0.9002,
      "step": 304800
    },
    {
      "epoch": 1.0682830576846916,
      "grad_norm": 2.71875,
      "learning_rate": 3.577264266299057e-05,
      "loss": 0.8579,
      "step": 304810
    },
    {
      "epoch": 1.068318105191587,
      "grad_norm": 3.0,
      "learning_rate": 3.5771993634326864e-05,
      "loss": 0.899,
      "step": 304820
    },
    {
      "epoch": 1.0683531526984829,
      "grad_norm": 2.796875,
      "learning_rate": 3.5771344605663165e-05,
      "loss": 0.8718,
      "step": 304830
    },
    {
      "epoch": 1.0683882002053784,
      "grad_norm": 2.703125,
      "learning_rate": 3.5770695576999466e-05,
      "loss": 0.8533,
      "step": 304840
    },
    {
      "epoch": 1.068423247712274,
      "grad_norm": 3.1875,
      "learning_rate": 3.577004654833576e-05,
      "loss": 0.8808,
      "step": 304850
    },
    {
      "epoch": 1.0684582952191697,
      "grad_norm": 2.890625,
      "learning_rate": 3.576939751967206e-05,
      "loss": 0.8547,
      "step": 304860
    },
    {
      "epoch": 1.0684933427260652,
      "grad_norm": 2.953125,
      "learning_rate": 3.576874849100836e-05,
      "loss": 0.8501,
      "step": 304870
    },
    {
      "epoch": 1.0685283902329608,
      "grad_norm": 2.8125,
      "learning_rate": 3.576809946234466e-05,
      "loss": 0.8372,
      "step": 304880
    },
    {
      "epoch": 1.0685634377398563,
      "grad_norm": 3.765625,
      "learning_rate": 3.576745043368095e-05,
      "loss": 0.8764,
      "step": 304890
    },
    {
      "epoch": 1.068598485246752,
      "grad_norm": 2.578125,
      "learning_rate": 3.5766801405017254e-05,
      "loss": 0.9519,
      "step": 304900
    },
    {
      "epoch": 1.0686335327536476,
      "grad_norm": 2.890625,
      "learning_rate": 3.576615237635355e-05,
      "loss": 0.8254,
      "step": 304910
    },
    {
      "epoch": 1.0686685802605431,
      "grad_norm": 2.546875,
      "learning_rate": 3.576550334768985e-05,
      "loss": 0.8766,
      "step": 304920
    },
    {
      "epoch": 1.0687036277674387,
      "grad_norm": 2.6875,
      "learning_rate": 3.5764854319026145e-05,
      "loss": 0.8247,
      "step": 304930
    },
    {
      "epoch": 1.0687386752743344,
      "grad_norm": 2.734375,
      "learning_rate": 3.5764205290362446e-05,
      "loss": 0.8268,
      "step": 304940
    },
    {
      "epoch": 1.06877372278123,
      "grad_norm": 2.796875,
      "learning_rate": 3.576355626169874e-05,
      "loss": 0.9016,
      "step": 304950
    },
    {
      "epoch": 1.0688087702881255,
      "grad_norm": 3.359375,
      "learning_rate": 3.576290723303504e-05,
      "loss": 0.7895,
      "step": 304960
    },
    {
      "epoch": 1.0688438177950212,
      "grad_norm": 3.03125,
      "learning_rate": 3.5762258204371344e-05,
      "loss": 0.8872,
      "step": 304970
    },
    {
      "epoch": 1.0688788653019168,
      "grad_norm": 3.171875,
      "learning_rate": 3.576160917570764e-05,
      "loss": 0.8553,
      "step": 304980
    },
    {
      "epoch": 1.0689139128088123,
      "grad_norm": 3.0625,
      "learning_rate": 3.576096014704394e-05,
      "loss": 0.8632,
      "step": 304990
    },
    {
      "epoch": 1.0689489603157079,
      "grad_norm": 3.1875,
      "learning_rate": 3.5760311118380234e-05,
      "loss": 0.8318,
      "step": 305000
    },
    {
      "epoch": 1.0689489603157079,
      "eval_loss": 0.8165531754493713,
      "eval_runtime": 553.076,
      "eval_samples_per_second": 687.855,
      "eval_steps_per_second": 57.321,
      "step": 305000
    },
    {
      "epoch": 1.0689840078226036,
      "grad_norm": 2.875,
      "learning_rate": 3.5759662089716536e-05,
      "loss": 0.9527,
      "step": 305010
    },
    {
      "epoch": 1.0690190553294991,
      "grad_norm": 2.40625,
      "learning_rate": 3.575901306105283e-05,
      "loss": 0.8089,
      "step": 305020
    },
    {
      "epoch": 1.0690541028363947,
      "grad_norm": 3.015625,
      "learning_rate": 3.575836403238913e-05,
      "loss": 0.8303,
      "step": 305030
    },
    {
      "epoch": 1.0690891503432902,
      "grad_norm": 2.859375,
      "learning_rate": 3.5757715003725426e-05,
      "loss": 0.8562,
      "step": 305040
    },
    {
      "epoch": 1.069124197850186,
      "grad_norm": 3.015625,
      "learning_rate": 3.575706597506173e-05,
      "loss": 0.8573,
      "step": 305050
    },
    {
      "epoch": 1.0691592453570815,
      "grad_norm": 2.890625,
      "learning_rate": 3.575641694639802e-05,
      "loss": 0.8015,
      "step": 305060
    },
    {
      "epoch": 1.069194292863977,
      "grad_norm": 2.765625,
      "learning_rate": 3.5755767917734324e-05,
      "loss": 0.8309,
      "step": 305070
    },
    {
      "epoch": 1.0692293403708728,
      "grad_norm": 3.28125,
      "learning_rate": 3.575511888907062e-05,
      "loss": 0.8946,
      "step": 305080
    },
    {
      "epoch": 1.0692643878777683,
      "grad_norm": 3.4375,
      "learning_rate": 3.575446986040692e-05,
      "loss": 0.8702,
      "step": 305090
    },
    {
      "epoch": 1.0692994353846639,
      "grad_norm": 3.359375,
      "learning_rate": 3.5753820831743214e-05,
      "loss": 0.8347,
      "step": 305100
    },
    {
      "epoch": 1.0693344828915594,
      "grad_norm": 3.140625,
      "learning_rate": 3.575317180307951e-05,
      "loss": 0.8138,
      "step": 305110
    },
    {
      "epoch": 1.0693695303984552,
      "grad_norm": 2.796875,
      "learning_rate": 3.575252277441581e-05,
      "loss": 0.8285,
      "step": 305120
    },
    {
      "epoch": 1.0694045779053507,
      "grad_norm": 2.78125,
      "learning_rate": 3.5751873745752105e-05,
      "loss": 0.8949,
      "step": 305130
    },
    {
      "epoch": 1.0694396254122462,
      "grad_norm": 2.5,
      "learning_rate": 3.5751224717088406e-05,
      "loss": 0.8383,
      "step": 305140
    },
    {
      "epoch": 1.069474672919142,
      "grad_norm": 2.96875,
      "learning_rate": 3.57505756884247e-05,
      "loss": 0.8509,
      "step": 305150
    },
    {
      "epoch": 1.0695097204260375,
      "grad_norm": 2.734375,
      "learning_rate": 3.5749926659761e-05,
      "loss": 0.8339,
      "step": 305160
    },
    {
      "epoch": 1.069544767932933,
      "grad_norm": 3.375,
      "learning_rate": 3.57492776310973e-05,
      "loss": 0.8362,
      "step": 305170
    },
    {
      "epoch": 1.0695798154398286,
      "grad_norm": 2.890625,
      "learning_rate": 3.57486286024336e-05,
      "loss": 0.8388,
      "step": 305180
    },
    {
      "epoch": 1.0696148629467244,
      "grad_norm": 2.859375,
      "learning_rate": 3.57479795737699e-05,
      "loss": 0.9032,
      "step": 305190
    },
    {
      "epoch": 1.06964991045362,
      "grad_norm": 3.25,
      "learning_rate": 3.5747330545106194e-05,
      "loss": 0.8711,
      "step": 305200
    },
    {
      "epoch": 1.0696849579605154,
      "grad_norm": 3.5,
      "learning_rate": 3.5746681516442496e-05,
      "loss": 0.8548,
      "step": 305210
    },
    {
      "epoch": 1.069720005467411,
      "grad_norm": 3.015625,
      "learning_rate": 3.574603248777879e-05,
      "loss": 0.9643,
      "step": 305220
    },
    {
      "epoch": 1.0697550529743067,
      "grad_norm": 3.0625,
      "learning_rate": 3.574538345911509e-05,
      "loss": 0.9069,
      "step": 305230
    },
    {
      "epoch": 1.0697901004812023,
      "grad_norm": 2.640625,
      "learning_rate": 3.5744734430451386e-05,
      "loss": 0.8917,
      "step": 305240
    },
    {
      "epoch": 1.0698251479880978,
      "grad_norm": 2.875,
      "learning_rate": 3.574408540178769e-05,
      "loss": 0.7684,
      "step": 305250
    },
    {
      "epoch": 1.0698601954949936,
      "grad_norm": 3.296875,
      "learning_rate": 3.574343637312398e-05,
      "loss": 0.9172,
      "step": 305260
    },
    {
      "epoch": 1.069895243001889,
      "grad_norm": 2.96875,
      "learning_rate": 3.5742787344460284e-05,
      "loss": 0.8718,
      "step": 305270
    },
    {
      "epoch": 1.0699302905087846,
      "grad_norm": 2.84375,
      "learning_rate": 3.574213831579658e-05,
      "loss": 0.8883,
      "step": 305280
    },
    {
      "epoch": 1.0699653380156802,
      "grad_norm": 2.671875,
      "learning_rate": 3.574148928713288e-05,
      "loss": 0.851,
      "step": 305290
    },
    {
      "epoch": 1.070000385522576,
      "grad_norm": 2.5,
      "learning_rate": 3.5740840258469174e-05,
      "loss": 0.8039,
      "step": 305300
    },
    {
      "epoch": 1.0700354330294715,
      "grad_norm": 3.0,
      "learning_rate": 3.5740191229805476e-05,
      "loss": 0.86,
      "step": 305310
    },
    {
      "epoch": 1.070070480536367,
      "grad_norm": 3.046875,
      "learning_rate": 3.573954220114177e-05,
      "loss": 0.9068,
      "step": 305320
    },
    {
      "epoch": 1.0701055280432628,
      "grad_norm": 3.09375,
      "learning_rate": 3.573889317247807e-05,
      "loss": 0.9299,
      "step": 305330
    },
    {
      "epoch": 1.0701405755501583,
      "grad_norm": 2.9375,
      "learning_rate": 3.573824414381437e-05,
      "loss": 0.9813,
      "step": 305340
    },
    {
      "epoch": 1.0701756230570538,
      "grad_norm": 3.171875,
      "learning_rate": 3.573759511515067e-05,
      "loss": 0.8153,
      "step": 305350
    },
    {
      "epoch": 1.0702106705639494,
      "grad_norm": 2.859375,
      "learning_rate": 3.573694608648697e-05,
      "loss": 0.87,
      "step": 305360
    },
    {
      "epoch": 1.0702457180708451,
      "grad_norm": 3.25,
      "learning_rate": 3.5736297057823264e-05,
      "loss": 0.803,
      "step": 305370
    },
    {
      "epoch": 1.0702807655777407,
      "grad_norm": 2.34375,
      "learning_rate": 3.5735648029159565e-05,
      "loss": 0.8732,
      "step": 305380
    },
    {
      "epoch": 1.0703158130846362,
      "grad_norm": 2.890625,
      "learning_rate": 3.573499900049586e-05,
      "loss": 0.8431,
      "step": 305390
    },
    {
      "epoch": 1.0703508605915317,
      "grad_norm": 2.5625,
      "learning_rate": 3.573434997183216e-05,
      "loss": 0.7687,
      "step": 305400
    },
    {
      "epoch": 1.0703859080984275,
      "grad_norm": 3.203125,
      "learning_rate": 3.5733700943168456e-05,
      "loss": 0.9196,
      "step": 305410
    },
    {
      "epoch": 1.070420955605323,
      "grad_norm": 2.640625,
      "learning_rate": 3.573305191450476e-05,
      "loss": 0.8264,
      "step": 305420
    },
    {
      "epoch": 1.0704560031122186,
      "grad_norm": 2.96875,
      "learning_rate": 3.573240288584105e-05,
      "loss": 0.8145,
      "step": 305430
    },
    {
      "epoch": 1.0704910506191143,
      "grad_norm": 3.125,
      "learning_rate": 3.573175385717735e-05,
      "loss": 0.9039,
      "step": 305440
    },
    {
      "epoch": 1.0705260981260099,
      "grad_norm": 2.5625,
      "learning_rate": 3.573110482851365e-05,
      "loss": 0.9409,
      "step": 305450
    },
    {
      "epoch": 1.0705611456329054,
      "grad_norm": 3.125,
      "learning_rate": 3.573045579984995e-05,
      "loss": 0.9948,
      "step": 305460
    },
    {
      "epoch": 1.070596193139801,
      "grad_norm": 2.171875,
      "learning_rate": 3.5729806771186244e-05,
      "loss": 0.791,
      "step": 305470
    },
    {
      "epoch": 1.0706312406466967,
      "grad_norm": 3.09375,
      "learning_rate": 3.572915774252254e-05,
      "loss": 0.8682,
      "step": 305480
    },
    {
      "epoch": 1.0706662881535922,
      "grad_norm": 3.03125,
      "learning_rate": 3.572850871385884e-05,
      "loss": 0.8287,
      "step": 305490
    },
    {
      "epoch": 1.0707013356604878,
      "grad_norm": 3.234375,
      "learning_rate": 3.5727859685195134e-05,
      "loss": 0.9304,
      "step": 305500
    },
    {
      "epoch": 1.0707363831673833,
      "grad_norm": 2.4375,
      "learning_rate": 3.5727210656531436e-05,
      "loss": 0.8842,
      "step": 305510
    },
    {
      "epoch": 1.070771430674279,
      "grad_norm": 2.625,
      "learning_rate": 3.572656162786773e-05,
      "loss": 0.8573,
      "step": 305520
    },
    {
      "epoch": 1.0708064781811746,
      "grad_norm": 3.1875,
      "learning_rate": 3.572591259920403e-05,
      "loss": 1.0528,
      "step": 305530
    },
    {
      "epoch": 1.0708415256880701,
      "grad_norm": 3.03125,
      "learning_rate": 3.5725263570540326e-05,
      "loss": 0.8309,
      "step": 305540
    },
    {
      "epoch": 1.0708765731949659,
      "grad_norm": 2.4375,
      "learning_rate": 3.572461454187663e-05,
      "loss": 0.8909,
      "step": 305550
    },
    {
      "epoch": 1.0709116207018614,
      "grad_norm": 2.640625,
      "learning_rate": 3.572396551321293e-05,
      "loss": 0.8877,
      "step": 305560
    },
    {
      "epoch": 1.070946668208757,
      "grad_norm": 2.703125,
      "learning_rate": 3.5723316484549224e-05,
      "loss": 0.8236,
      "step": 305570
    },
    {
      "epoch": 1.0709817157156525,
      "grad_norm": 2.40625,
      "learning_rate": 3.5722667455885525e-05,
      "loss": 0.7946,
      "step": 305580
    },
    {
      "epoch": 1.0710167632225482,
      "grad_norm": 2.75,
      "learning_rate": 3.572201842722182e-05,
      "loss": 0.8808,
      "step": 305590
    },
    {
      "epoch": 1.0710518107294438,
      "grad_norm": 3.5625,
      "learning_rate": 3.572136939855812e-05,
      "loss": 0.8281,
      "step": 305600
    },
    {
      "epoch": 1.0710868582363393,
      "grad_norm": 2.875,
      "learning_rate": 3.5720720369894416e-05,
      "loss": 0.8713,
      "step": 305610
    },
    {
      "epoch": 1.0711219057432348,
      "grad_norm": 3.046875,
      "learning_rate": 3.572007134123072e-05,
      "loss": 0.8967,
      "step": 305620
    },
    {
      "epoch": 1.0711569532501306,
      "grad_norm": 2.890625,
      "learning_rate": 3.571942231256701e-05,
      "loss": 0.8614,
      "step": 305630
    },
    {
      "epoch": 1.0711920007570261,
      "grad_norm": 2.34375,
      "learning_rate": 3.571877328390331e-05,
      "loss": 0.8338,
      "step": 305640
    },
    {
      "epoch": 1.0712270482639217,
      "grad_norm": 3.140625,
      "learning_rate": 3.571812425523961e-05,
      "loss": 1.0197,
      "step": 305650
    },
    {
      "epoch": 1.0712620957708174,
      "grad_norm": 2.390625,
      "learning_rate": 3.571747522657591e-05,
      "loss": 0.9114,
      "step": 305660
    },
    {
      "epoch": 1.071297143277713,
      "grad_norm": 2.625,
      "learning_rate": 3.5716826197912204e-05,
      "loss": 0.8163,
      "step": 305670
    },
    {
      "epoch": 1.0713321907846085,
      "grad_norm": 2.953125,
      "learning_rate": 3.5716177169248505e-05,
      "loss": 0.8814,
      "step": 305680
    },
    {
      "epoch": 1.071367238291504,
      "grad_norm": 2.84375,
      "learning_rate": 3.57155281405848e-05,
      "loss": 0.8898,
      "step": 305690
    },
    {
      "epoch": 1.0714022857983998,
      "grad_norm": 3.359375,
      "learning_rate": 3.57148791119211e-05,
      "loss": 0.8221,
      "step": 305700
    },
    {
      "epoch": 1.0714373333052953,
      "grad_norm": 3.0,
      "learning_rate": 3.57142300832574e-05,
      "loss": 0.8484,
      "step": 305710
    },
    {
      "epoch": 1.0714723808121909,
      "grad_norm": 3.390625,
      "learning_rate": 3.57135810545937e-05,
      "loss": 0.874,
      "step": 305720
    },
    {
      "epoch": 1.0715074283190864,
      "grad_norm": 3.25,
      "learning_rate": 3.571293202593e-05,
      "loss": 0.8357,
      "step": 305730
    },
    {
      "epoch": 1.0715424758259822,
      "grad_norm": 2.796875,
      "learning_rate": 3.571228299726629e-05,
      "loss": 0.8167,
      "step": 305740
    },
    {
      "epoch": 1.0715775233328777,
      "grad_norm": 2.828125,
      "learning_rate": 3.5711633968602594e-05,
      "loss": 0.9414,
      "step": 305750
    },
    {
      "epoch": 1.0716125708397732,
      "grad_norm": 3.171875,
      "learning_rate": 3.571098493993889e-05,
      "loss": 0.8465,
      "step": 305760
    },
    {
      "epoch": 1.071647618346669,
      "grad_norm": 3.109375,
      "learning_rate": 3.571033591127519e-05,
      "loss": 0.8622,
      "step": 305770
    },
    {
      "epoch": 1.0716826658535645,
      "grad_norm": 3.15625,
      "learning_rate": 3.5709686882611485e-05,
      "loss": 0.9162,
      "step": 305780
    },
    {
      "epoch": 1.07171771336046,
      "grad_norm": 2.953125,
      "learning_rate": 3.5709037853947786e-05,
      "loss": 0.8211,
      "step": 305790
    },
    {
      "epoch": 1.0717527608673556,
      "grad_norm": 3.109375,
      "learning_rate": 3.570838882528408e-05,
      "loss": 0.8237,
      "step": 305800
    },
    {
      "epoch": 1.0717878083742514,
      "grad_norm": 2.765625,
      "learning_rate": 3.570773979662038e-05,
      "loss": 0.7845,
      "step": 305810
    },
    {
      "epoch": 1.071822855881147,
      "grad_norm": 3.234375,
      "learning_rate": 3.570709076795668e-05,
      "loss": 0.9281,
      "step": 305820
    },
    {
      "epoch": 1.0718579033880424,
      "grad_norm": 2.796875,
      "learning_rate": 3.570644173929298e-05,
      "loss": 0.9231,
      "step": 305830
    },
    {
      "epoch": 1.0718929508949382,
      "grad_norm": 3.0625,
      "learning_rate": 3.570579271062928e-05,
      "loss": 0.8466,
      "step": 305840
    },
    {
      "epoch": 1.0719279984018337,
      "grad_norm": 2.859375,
      "learning_rate": 3.570514368196557e-05,
      "loss": 0.9152,
      "step": 305850
    },
    {
      "epoch": 1.0719630459087293,
      "grad_norm": 2.59375,
      "learning_rate": 3.570449465330187e-05,
      "loss": 0.8738,
      "step": 305860
    },
    {
      "epoch": 1.0719980934156248,
      "grad_norm": 2.9375,
      "learning_rate": 3.5703845624638164e-05,
      "loss": 0.8208,
      "step": 305870
    },
    {
      "epoch": 1.0720331409225206,
      "grad_norm": 2.96875,
      "learning_rate": 3.5703196595974465e-05,
      "loss": 0.8989,
      "step": 305880
    },
    {
      "epoch": 1.072068188429416,
      "grad_norm": 3.015625,
      "learning_rate": 3.570254756731076e-05,
      "loss": 0.919,
      "step": 305890
    },
    {
      "epoch": 1.0721032359363116,
      "grad_norm": 2.8125,
      "learning_rate": 3.570189853864706e-05,
      "loss": 0.8458,
      "step": 305900
    },
    {
      "epoch": 1.0721382834432074,
      "grad_norm": 2.875,
      "learning_rate": 3.5701249509983356e-05,
      "loss": 0.8184,
      "step": 305910
    },
    {
      "epoch": 1.072173330950103,
      "grad_norm": 3.0,
      "learning_rate": 3.570060048131966e-05,
      "loss": 0.8758,
      "step": 305920
    },
    {
      "epoch": 1.0722083784569985,
      "grad_norm": 2.96875,
      "learning_rate": 3.569995145265596e-05,
      "loss": 0.8626,
      "step": 305930
    },
    {
      "epoch": 1.072243425963894,
      "grad_norm": 3.0625,
      "learning_rate": 3.569930242399225e-05,
      "loss": 0.8199,
      "step": 305940
    },
    {
      "epoch": 1.0722784734707898,
      "grad_norm": 3.125,
      "learning_rate": 3.5698653395328554e-05,
      "loss": 0.8657,
      "step": 305950
    },
    {
      "epoch": 1.0723135209776853,
      "grad_norm": 3.15625,
      "learning_rate": 3.569800436666485e-05,
      "loss": 0.8274,
      "step": 305960
    },
    {
      "epoch": 1.0723485684845808,
      "grad_norm": 3.3125,
      "learning_rate": 3.569735533800115e-05,
      "loss": 0.8746,
      "step": 305970
    },
    {
      "epoch": 1.0723836159914764,
      "grad_norm": 2.828125,
      "learning_rate": 3.5696706309337445e-05,
      "loss": 0.8842,
      "step": 305980
    },
    {
      "epoch": 1.0724186634983721,
      "grad_norm": 3.1875,
      "learning_rate": 3.5696057280673746e-05,
      "loss": 0.8527,
      "step": 305990
    },
    {
      "epoch": 1.0724537110052677,
      "grad_norm": 3.171875,
      "learning_rate": 3.569540825201004e-05,
      "loss": 0.8221,
      "step": 306000
    },
    {
      "epoch": 1.0724887585121632,
      "grad_norm": 2.515625,
      "learning_rate": 3.569475922334634e-05,
      "loss": 0.8952,
      "step": 306010
    },
    {
      "epoch": 1.072523806019059,
      "grad_norm": 3.203125,
      "learning_rate": 3.569411019468264e-05,
      "loss": 0.8245,
      "step": 306020
    },
    {
      "epoch": 1.0725588535259545,
      "grad_norm": 2.5,
      "learning_rate": 3.569346116601894e-05,
      "loss": 0.844,
      "step": 306030
    },
    {
      "epoch": 1.07259390103285,
      "grad_norm": 3.21875,
      "learning_rate": 3.569281213735523e-05,
      "loss": 0.8815,
      "step": 306040
    },
    {
      "epoch": 1.0726289485397456,
      "grad_norm": 3.0625,
      "learning_rate": 3.5692163108691534e-05,
      "loss": 0.9468,
      "step": 306050
    },
    {
      "epoch": 1.0726639960466413,
      "grad_norm": 2.765625,
      "learning_rate": 3.5691514080027836e-05,
      "loss": 0.8467,
      "step": 306060
    },
    {
      "epoch": 1.0726990435535368,
      "grad_norm": 2.71875,
      "learning_rate": 3.569086505136413e-05,
      "loss": 0.9208,
      "step": 306070
    },
    {
      "epoch": 1.0727340910604324,
      "grad_norm": 2.703125,
      "learning_rate": 3.569021602270043e-05,
      "loss": 0.7732,
      "step": 306080
    },
    {
      "epoch": 1.072769138567328,
      "grad_norm": 3.171875,
      "learning_rate": 3.5689566994036726e-05,
      "loss": 0.9009,
      "step": 306090
    },
    {
      "epoch": 1.0728041860742237,
      "grad_norm": 2.859375,
      "learning_rate": 3.568891796537303e-05,
      "loss": 0.8962,
      "step": 306100
    },
    {
      "epoch": 1.0728392335811192,
      "grad_norm": 2.8125,
      "learning_rate": 3.568826893670932e-05,
      "loss": 0.8404,
      "step": 306110
    },
    {
      "epoch": 1.0728742810880147,
      "grad_norm": 3.28125,
      "learning_rate": 3.5687619908045624e-05,
      "loss": 0.8943,
      "step": 306120
    },
    {
      "epoch": 1.0729093285949105,
      "grad_norm": 2.46875,
      "learning_rate": 3.568697087938192e-05,
      "loss": 0.9125,
      "step": 306130
    },
    {
      "epoch": 1.072944376101806,
      "grad_norm": 3.03125,
      "learning_rate": 3.568632185071822e-05,
      "loss": 0.9006,
      "step": 306140
    },
    {
      "epoch": 1.0729794236087016,
      "grad_norm": 2.78125,
      "learning_rate": 3.5685672822054514e-05,
      "loss": 0.8544,
      "step": 306150
    },
    {
      "epoch": 1.0730144711155971,
      "grad_norm": 2.671875,
      "learning_rate": 3.5685023793390816e-05,
      "loss": 0.8333,
      "step": 306160
    },
    {
      "epoch": 1.0730495186224929,
      "grad_norm": 3.03125,
      "learning_rate": 3.568437476472711e-05,
      "loss": 0.8545,
      "step": 306170
    },
    {
      "epoch": 1.0730845661293884,
      "grad_norm": 2.859375,
      "learning_rate": 3.568372573606341e-05,
      "loss": 0.8599,
      "step": 306180
    },
    {
      "epoch": 1.073119613636284,
      "grad_norm": 2.8125,
      "learning_rate": 3.5683076707399706e-05,
      "loss": 0.9793,
      "step": 306190
    },
    {
      "epoch": 1.0731546611431795,
      "grad_norm": 3.140625,
      "learning_rate": 3.568242767873601e-05,
      "loss": 0.8627,
      "step": 306200
    },
    {
      "epoch": 1.0731897086500752,
      "grad_norm": 2.828125,
      "learning_rate": 3.568177865007231e-05,
      "loss": 0.8434,
      "step": 306210
    },
    {
      "epoch": 1.0732247561569708,
      "grad_norm": 3.015625,
      "learning_rate": 3.56811296214086e-05,
      "loss": 0.9307,
      "step": 306220
    },
    {
      "epoch": 1.0732598036638663,
      "grad_norm": 2.890625,
      "learning_rate": 3.56804805927449e-05,
      "loss": 0.8978,
      "step": 306230
    },
    {
      "epoch": 1.073294851170762,
      "grad_norm": 2.546875,
      "learning_rate": 3.567983156408119e-05,
      "loss": 0.8338,
      "step": 306240
    },
    {
      "epoch": 1.0733298986776576,
      "grad_norm": 2.9375,
      "learning_rate": 3.5679182535417494e-05,
      "loss": 0.731,
      "step": 306250
    },
    {
      "epoch": 1.0733649461845531,
      "grad_norm": 2.8125,
      "learning_rate": 3.567853350675379e-05,
      "loss": 0.8384,
      "step": 306260
    },
    {
      "epoch": 1.0733999936914487,
      "grad_norm": 3.234375,
      "learning_rate": 3.567788447809009e-05,
      "loss": 0.9364,
      "step": 306270
    },
    {
      "epoch": 1.0734350411983444,
      "grad_norm": 2.65625,
      "learning_rate": 3.5677235449426385e-05,
      "loss": 0.7775,
      "step": 306280
    },
    {
      "epoch": 1.07347008870524,
      "grad_norm": 3.015625,
      "learning_rate": 3.5676586420762686e-05,
      "loss": 0.8632,
      "step": 306290
    },
    {
      "epoch": 1.0735051362121355,
      "grad_norm": 2.453125,
      "learning_rate": 3.567593739209899e-05,
      "loss": 0.9032,
      "step": 306300
    },
    {
      "epoch": 1.073540183719031,
      "grad_norm": 3.046875,
      "learning_rate": 3.567528836343528e-05,
      "loss": 0.801,
      "step": 306310
    },
    {
      "epoch": 1.0735752312259268,
      "grad_norm": 2.5625,
      "learning_rate": 3.5674639334771584e-05,
      "loss": 0.7587,
      "step": 306320
    },
    {
      "epoch": 1.0736102787328223,
      "grad_norm": 3.234375,
      "learning_rate": 3.567399030610788e-05,
      "loss": 0.8827,
      "step": 306330
    },
    {
      "epoch": 1.0736453262397179,
      "grad_norm": 3.21875,
      "learning_rate": 3.567334127744418e-05,
      "loss": 0.8927,
      "step": 306340
    },
    {
      "epoch": 1.0736803737466136,
      "grad_norm": 3.828125,
      "learning_rate": 3.5672692248780474e-05,
      "loss": 0.8855,
      "step": 306350
    },
    {
      "epoch": 1.0737154212535092,
      "grad_norm": 3.125,
      "learning_rate": 3.5672043220116776e-05,
      "loss": 0.8627,
      "step": 306360
    },
    {
      "epoch": 1.0737504687604047,
      "grad_norm": 3.171875,
      "learning_rate": 3.567139419145307e-05,
      "loss": 0.8794,
      "step": 306370
    },
    {
      "epoch": 1.0737855162673002,
      "grad_norm": 2.78125,
      "learning_rate": 3.567074516278937e-05,
      "loss": 0.8286,
      "step": 306380
    },
    {
      "epoch": 1.073820563774196,
      "grad_norm": 2.921875,
      "learning_rate": 3.5670096134125666e-05,
      "loss": 0.9487,
      "step": 306390
    },
    {
      "epoch": 1.0738556112810915,
      "grad_norm": 3.125,
      "learning_rate": 3.566944710546197e-05,
      "loss": 0.9406,
      "step": 306400
    },
    {
      "epoch": 1.073890658787987,
      "grad_norm": 2.34375,
      "learning_rate": 3.566879807679826e-05,
      "loss": 0.792,
      "step": 306410
    },
    {
      "epoch": 1.0739257062948826,
      "grad_norm": 2.9375,
      "learning_rate": 3.5668149048134564e-05,
      "loss": 0.8353,
      "step": 306420
    },
    {
      "epoch": 1.0739607538017784,
      "grad_norm": 3.015625,
      "learning_rate": 3.5667500019470865e-05,
      "loss": 0.9184,
      "step": 306430
    },
    {
      "epoch": 1.073995801308674,
      "grad_norm": 3.046875,
      "learning_rate": 3.566685099080716e-05,
      "loss": 0.8216,
      "step": 306440
    },
    {
      "epoch": 1.0740308488155694,
      "grad_norm": 2.984375,
      "learning_rate": 3.566620196214346e-05,
      "loss": 0.8487,
      "step": 306450
    },
    {
      "epoch": 1.0740658963224652,
      "grad_norm": 2.890625,
      "learning_rate": 3.5665552933479756e-05,
      "loss": 0.8257,
      "step": 306460
    },
    {
      "epoch": 1.0741009438293607,
      "grad_norm": 2.640625,
      "learning_rate": 3.566490390481606e-05,
      "loss": 0.9328,
      "step": 306470
    },
    {
      "epoch": 1.0741359913362563,
      "grad_norm": 2.796875,
      "learning_rate": 3.566425487615235e-05,
      "loss": 0.9128,
      "step": 306480
    },
    {
      "epoch": 1.0741710388431518,
      "grad_norm": 2.9375,
      "learning_rate": 3.566360584748865e-05,
      "loss": 0.8618,
      "step": 306490
    },
    {
      "epoch": 1.0742060863500476,
      "grad_norm": 3.515625,
      "learning_rate": 3.566295681882495e-05,
      "loss": 0.8741,
      "step": 306500
    },
    {
      "epoch": 1.074241133856943,
      "grad_norm": 3.375,
      "learning_rate": 3.566230779016125e-05,
      "loss": 0.8869,
      "step": 306510
    },
    {
      "epoch": 1.0742761813638386,
      "grad_norm": 2.640625,
      "learning_rate": 3.5661658761497544e-05,
      "loss": 0.8794,
      "step": 306520
    },
    {
      "epoch": 1.0743112288707344,
      "grad_norm": 2.984375,
      "learning_rate": 3.5661009732833845e-05,
      "loss": 0.9159,
      "step": 306530
    },
    {
      "epoch": 1.07434627637763,
      "grad_norm": 2.53125,
      "learning_rate": 3.566036070417014e-05,
      "loss": 0.8286,
      "step": 306540
    },
    {
      "epoch": 1.0743813238845255,
      "grad_norm": 3.3125,
      "learning_rate": 3.565971167550644e-05,
      "loss": 0.9019,
      "step": 306550
    },
    {
      "epoch": 1.074416371391421,
      "grad_norm": 2.546875,
      "learning_rate": 3.5659062646842736e-05,
      "loss": 0.8441,
      "step": 306560
    },
    {
      "epoch": 1.0744514188983167,
      "grad_norm": 3.25,
      "learning_rate": 3.565841361817904e-05,
      "loss": 0.9072,
      "step": 306570
    },
    {
      "epoch": 1.0744864664052123,
      "grad_norm": 3.1875,
      "learning_rate": 3.565776458951534e-05,
      "loss": 0.8799,
      "step": 306580
    },
    {
      "epoch": 1.0745215139121078,
      "grad_norm": 2.90625,
      "learning_rate": 3.565711556085163e-05,
      "loss": 0.8619,
      "step": 306590
    },
    {
      "epoch": 1.0745565614190036,
      "grad_norm": 2.96875,
      "learning_rate": 3.565646653218793e-05,
      "loss": 0.8965,
      "step": 306600
    },
    {
      "epoch": 1.0745916089258991,
      "grad_norm": 2.875,
      "learning_rate": 3.565581750352422e-05,
      "loss": 0.8396,
      "step": 306610
    },
    {
      "epoch": 1.0746266564327946,
      "grad_norm": 3.5,
      "learning_rate": 3.5655168474860524e-05,
      "loss": 0.906,
      "step": 306620
    },
    {
      "epoch": 1.0746617039396902,
      "grad_norm": 3.125,
      "learning_rate": 3.565451944619682e-05,
      "loss": 0.9143,
      "step": 306630
    },
    {
      "epoch": 1.074696751446586,
      "grad_norm": 3.296875,
      "learning_rate": 3.565387041753312e-05,
      "loss": 0.9671,
      "step": 306640
    },
    {
      "epoch": 1.0747317989534815,
      "grad_norm": 3.171875,
      "learning_rate": 3.5653221388869414e-05,
      "loss": 0.9105,
      "step": 306650
    },
    {
      "epoch": 1.074766846460377,
      "grad_norm": 2.796875,
      "learning_rate": 3.5652572360205716e-05,
      "loss": 0.8554,
      "step": 306660
    },
    {
      "epoch": 1.0748018939672725,
      "grad_norm": 3.046875,
      "learning_rate": 3.565192333154202e-05,
      "loss": 0.8886,
      "step": 306670
    },
    {
      "epoch": 1.0748369414741683,
      "grad_norm": 2.75,
      "learning_rate": 3.565127430287831e-05,
      "loss": 0.8888,
      "step": 306680
    },
    {
      "epoch": 1.0748719889810638,
      "grad_norm": 3.46875,
      "learning_rate": 3.565062527421461e-05,
      "loss": 0.8313,
      "step": 306690
    },
    {
      "epoch": 1.0749070364879594,
      "grad_norm": 2.875,
      "learning_rate": 3.564997624555091e-05,
      "loss": 0.842,
      "step": 306700
    },
    {
      "epoch": 1.0749420839948551,
      "grad_norm": 3.109375,
      "learning_rate": 3.564932721688721e-05,
      "loss": 0.8923,
      "step": 306710
    },
    {
      "epoch": 1.0749771315017507,
      "grad_norm": 3.140625,
      "learning_rate": 3.5648678188223504e-05,
      "loss": 0.8725,
      "step": 306720
    },
    {
      "epoch": 1.0750121790086462,
      "grad_norm": 2.75,
      "learning_rate": 3.5648029159559805e-05,
      "loss": 0.8967,
      "step": 306730
    },
    {
      "epoch": 1.0750472265155417,
      "grad_norm": 2.90625,
      "learning_rate": 3.56473801308961e-05,
      "loss": 0.886,
      "step": 306740
    },
    {
      "epoch": 1.0750822740224375,
      "grad_norm": 2.5,
      "learning_rate": 3.56467311022324e-05,
      "loss": 0.8963,
      "step": 306750
    },
    {
      "epoch": 1.075117321529333,
      "grad_norm": 2.578125,
      "learning_rate": 3.5646082073568696e-05,
      "loss": 0.8159,
      "step": 306760
    },
    {
      "epoch": 1.0751523690362286,
      "grad_norm": 2.59375,
      "learning_rate": 3.5645433044905e-05,
      "loss": 0.9077,
      "step": 306770
    },
    {
      "epoch": 1.075187416543124,
      "grad_norm": 2.953125,
      "learning_rate": 3.564478401624129e-05,
      "loss": 0.8011,
      "step": 306780
    },
    {
      "epoch": 1.0752224640500199,
      "grad_norm": 2.765625,
      "learning_rate": 3.564413498757759e-05,
      "loss": 0.846,
      "step": 306790
    },
    {
      "epoch": 1.0752575115569154,
      "grad_norm": 3.34375,
      "learning_rate": 3.5643485958913895e-05,
      "loss": 0.8098,
      "step": 306800
    },
    {
      "epoch": 1.075292559063811,
      "grad_norm": 2.640625,
      "learning_rate": 3.564283693025019e-05,
      "loss": 0.8764,
      "step": 306810
    },
    {
      "epoch": 1.0753276065707067,
      "grad_norm": 3.28125,
      "learning_rate": 3.564218790158649e-05,
      "loss": 0.9157,
      "step": 306820
    },
    {
      "epoch": 1.0753626540776022,
      "grad_norm": 3.078125,
      "learning_rate": 3.5641538872922785e-05,
      "loss": 0.8126,
      "step": 306830
    },
    {
      "epoch": 1.0753977015844978,
      "grad_norm": 2.828125,
      "learning_rate": 3.5640889844259087e-05,
      "loss": 0.8426,
      "step": 306840
    },
    {
      "epoch": 1.0754327490913933,
      "grad_norm": 3.03125,
      "learning_rate": 3.564024081559538e-05,
      "loss": 0.8827,
      "step": 306850
    },
    {
      "epoch": 1.075467796598289,
      "grad_norm": 2.90625,
      "learning_rate": 3.563959178693168e-05,
      "loss": 0.8211,
      "step": 306860
    },
    {
      "epoch": 1.0755028441051846,
      "grad_norm": 2.421875,
      "learning_rate": 3.563894275826798e-05,
      "loss": 0.8848,
      "step": 306870
    },
    {
      "epoch": 1.0755378916120801,
      "grad_norm": 3.859375,
      "learning_rate": 3.563829372960428e-05,
      "loss": 0.8348,
      "step": 306880
    },
    {
      "epoch": 1.0755729391189757,
      "grad_norm": 2.921875,
      "learning_rate": 3.563764470094057e-05,
      "loss": 0.8302,
      "step": 306890
    },
    {
      "epoch": 1.0756079866258714,
      "grad_norm": 2.578125,
      "learning_rate": 3.5636995672276875e-05,
      "loss": 0.9138,
      "step": 306900
    },
    {
      "epoch": 1.075643034132767,
      "grad_norm": 2.6875,
      "learning_rate": 3.563634664361317e-05,
      "loss": 0.8244,
      "step": 306910
    },
    {
      "epoch": 1.0756780816396625,
      "grad_norm": 2.828125,
      "learning_rate": 3.563569761494947e-05,
      "loss": 0.9163,
      "step": 306920
    },
    {
      "epoch": 1.0757131291465583,
      "grad_norm": 2.9375,
      "learning_rate": 3.5635048586285765e-05,
      "loss": 0.9623,
      "step": 306930
    },
    {
      "epoch": 1.0757481766534538,
      "grad_norm": 3.1875,
      "learning_rate": 3.5634399557622067e-05,
      "loss": 1.0153,
      "step": 306940
    },
    {
      "epoch": 1.0757832241603493,
      "grad_norm": 2.78125,
      "learning_rate": 3.563375052895837e-05,
      "loss": 0.7808,
      "step": 306950
    },
    {
      "epoch": 1.0758182716672449,
      "grad_norm": 3.296875,
      "learning_rate": 3.563310150029466e-05,
      "loss": 0.9851,
      "step": 306960
    },
    {
      "epoch": 1.0758533191741406,
      "grad_norm": 2.890625,
      "learning_rate": 3.5632452471630964e-05,
      "loss": 0.8833,
      "step": 306970
    },
    {
      "epoch": 1.0758883666810362,
      "grad_norm": 3.171875,
      "learning_rate": 3.563180344296725e-05,
      "loss": 0.8583,
      "step": 306980
    },
    {
      "epoch": 1.0759234141879317,
      "grad_norm": 2.734375,
      "learning_rate": 3.563115441430355e-05,
      "loss": 0.848,
      "step": 306990
    },
    {
      "epoch": 1.0759584616948272,
      "grad_norm": 3.0,
      "learning_rate": 3.563050538563985e-05,
      "loss": 0.8243,
      "step": 307000
    },
    {
      "epoch": 1.075993509201723,
      "grad_norm": 3.203125,
      "learning_rate": 3.562985635697615e-05,
      "loss": 0.94,
      "step": 307010
    },
    {
      "epoch": 1.0760285567086185,
      "grad_norm": 2.875,
      "learning_rate": 3.562920732831245e-05,
      "loss": 0.8713,
      "step": 307020
    },
    {
      "epoch": 1.076063604215514,
      "grad_norm": 2.734375,
      "learning_rate": 3.5628558299648745e-05,
      "loss": 0.8461,
      "step": 307030
    },
    {
      "epoch": 1.0760986517224098,
      "grad_norm": 2.71875,
      "learning_rate": 3.5627909270985047e-05,
      "loss": 0.9029,
      "step": 307040
    },
    {
      "epoch": 1.0761336992293054,
      "grad_norm": 2.4375,
      "learning_rate": 3.562726024232134e-05,
      "loss": 0.9457,
      "step": 307050
    },
    {
      "epoch": 1.0761687467362009,
      "grad_norm": 2.59375,
      "learning_rate": 3.562661121365764e-05,
      "loss": 0.8642,
      "step": 307060
    },
    {
      "epoch": 1.0762037942430964,
      "grad_norm": 3.0,
      "learning_rate": 3.562596218499394e-05,
      "loss": 0.8959,
      "step": 307070
    },
    {
      "epoch": 1.0762388417499922,
      "grad_norm": 2.3125,
      "learning_rate": 3.562531315633024e-05,
      "loss": 0.8189,
      "step": 307080
    },
    {
      "epoch": 1.0762738892568877,
      "grad_norm": 2.890625,
      "learning_rate": 3.562466412766653e-05,
      "loss": 0.8883,
      "step": 307090
    },
    {
      "epoch": 1.0763089367637833,
      "grad_norm": 2.796875,
      "learning_rate": 3.5624015099002835e-05,
      "loss": 0.8765,
      "step": 307100
    },
    {
      "epoch": 1.0763439842706788,
      "grad_norm": 3.046875,
      "learning_rate": 3.562336607033913e-05,
      "loss": 0.9578,
      "step": 307110
    },
    {
      "epoch": 1.0763790317775745,
      "grad_norm": 4.09375,
      "learning_rate": 3.562271704167543e-05,
      "loss": 0.8785,
      "step": 307120
    },
    {
      "epoch": 1.07641407928447,
      "grad_norm": 3.1875,
      "learning_rate": 3.5622068013011725e-05,
      "loss": 0.9378,
      "step": 307130
    },
    {
      "epoch": 1.0764491267913656,
      "grad_norm": 2.96875,
      "learning_rate": 3.5621418984348027e-05,
      "loss": 0.8834,
      "step": 307140
    },
    {
      "epoch": 1.0764841742982614,
      "grad_norm": 2.25,
      "learning_rate": 3.562076995568432e-05,
      "loss": 0.9348,
      "step": 307150
    },
    {
      "epoch": 1.076519221805157,
      "grad_norm": 3.09375,
      "learning_rate": 3.562012092702062e-05,
      "loss": 0.8543,
      "step": 307160
    },
    {
      "epoch": 1.0765542693120524,
      "grad_norm": 2.953125,
      "learning_rate": 3.5619471898356924e-05,
      "loss": 0.9154,
      "step": 307170
    },
    {
      "epoch": 1.076589316818948,
      "grad_norm": 2.828125,
      "learning_rate": 3.561882286969322e-05,
      "loss": 0.8809,
      "step": 307180
    },
    {
      "epoch": 1.0766243643258437,
      "grad_norm": 3.328125,
      "learning_rate": 3.561817384102952e-05,
      "loss": 0.877,
      "step": 307190
    },
    {
      "epoch": 1.0766594118327393,
      "grad_norm": 2.734375,
      "learning_rate": 3.5617524812365815e-05,
      "loss": 0.8422,
      "step": 307200
    },
    {
      "epoch": 1.0766944593396348,
      "grad_norm": 2.734375,
      "learning_rate": 3.5616875783702116e-05,
      "loss": 0.8547,
      "step": 307210
    },
    {
      "epoch": 1.0767295068465306,
      "grad_norm": 2.859375,
      "learning_rate": 3.561622675503841e-05,
      "loss": 0.8365,
      "step": 307220
    },
    {
      "epoch": 1.076764554353426,
      "grad_norm": 3.078125,
      "learning_rate": 3.561557772637471e-05,
      "loss": 0.8989,
      "step": 307230
    },
    {
      "epoch": 1.0767996018603216,
      "grad_norm": 2.59375,
      "learning_rate": 3.5614928697711007e-05,
      "loss": 0.8362,
      "step": 307240
    },
    {
      "epoch": 1.0768346493672172,
      "grad_norm": 4.71875,
      "learning_rate": 3.561427966904731e-05,
      "loss": 0.8571,
      "step": 307250
    },
    {
      "epoch": 1.076869696874113,
      "grad_norm": 3.140625,
      "learning_rate": 3.56136306403836e-05,
      "loss": 0.792,
      "step": 307260
    },
    {
      "epoch": 1.0769047443810085,
      "grad_norm": 2.890625,
      "learning_rate": 3.5612981611719904e-05,
      "loss": 0.7969,
      "step": 307270
    },
    {
      "epoch": 1.076939791887904,
      "grad_norm": 2.90625,
      "learning_rate": 3.56123325830562e-05,
      "loss": 0.8873,
      "step": 307280
    },
    {
      "epoch": 1.0769748393947998,
      "grad_norm": 2.453125,
      "learning_rate": 3.56116835543925e-05,
      "loss": 0.8576,
      "step": 307290
    },
    {
      "epoch": 1.0770098869016953,
      "grad_norm": 3.625,
      "learning_rate": 3.56110345257288e-05,
      "loss": 0.9535,
      "step": 307300
    },
    {
      "epoch": 1.0770449344085908,
      "grad_norm": 2.46875,
      "learning_rate": 3.5610385497065096e-05,
      "loss": 0.8018,
      "step": 307310
    },
    {
      "epoch": 1.0770799819154864,
      "grad_norm": 3.078125,
      "learning_rate": 3.56097364684014e-05,
      "loss": 0.8376,
      "step": 307320
    },
    {
      "epoch": 1.0771150294223821,
      "grad_norm": 3.09375,
      "learning_rate": 3.560908743973769e-05,
      "loss": 0.9204,
      "step": 307330
    },
    {
      "epoch": 1.0771500769292777,
      "grad_norm": 3.6875,
      "learning_rate": 3.560843841107399e-05,
      "loss": 0.8509,
      "step": 307340
    },
    {
      "epoch": 1.0771851244361732,
      "grad_norm": 2.890625,
      "learning_rate": 3.560778938241028e-05,
      "loss": 0.8612,
      "step": 307350
    },
    {
      "epoch": 1.0772201719430687,
      "grad_norm": 3.375,
      "learning_rate": 3.560714035374658e-05,
      "loss": 0.8673,
      "step": 307360
    },
    {
      "epoch": 1.0772552194499645,
      "grad_norm": 3.65625,
      "learning_rate": 3.560649132508288e-05,
      "loss": 0.864,
      "step": 307370
    },
    {
      "epoch": 1.07729026695686,
      "grad_norm": 3.0625,
      "learning_rate": 3.560584229641918e-05,
      "loss": 0.9231,
      "step": 307380
    },
    {
      "epoch": 1.0773253144637556,
      "grad_norm": 2.859375,
      "learning_rate": 3.560519326775548e-05,
      "loss": 0.9123,
      "step": 307390
    },
    {
      "epoch": 1.0773603619706513,
      "grad_norm": 3.109375,
      "learning_rate": 3.5604544239091775e-05,
      "loss": 0.9168,
      "step": 307400
    },
    {
      "epoch": 1.0773954094775469,
      "grad_norm": 3.21875,
      "learning_rate": 3.5603895210428076e-05,
      "loss": 0.8113,
      "step": 307410
    },
    {
      "epoch": 1.0774304569844424,
      "grad_norm": 2.609375,
      "learning_rate": 3.560324618176437e-05,
      "loss": 0.8104,
      "step": 307420
    },
    {
      "epoch": 1.077465504491338,
      "grad_norm": 3.140625,
      "learning_rate": 3.560259715310067e-05,
      "loss": 0.9045,
      "step": 307430
    },
    {
      "epoch": 1.0775005519982337,
      "grad_norm": 2.765625,
      "learning_rate": 3.5601948124436967e-05,
      "loss": 0.876,
      "step": 307440
    },
    {
      "epoch": 1.0775355995051292,
      "grad_norm": 3.359375,
      "learning_rate": 3.560129909577327e-05,
      "loss": 0.9509,
      "step": 307450
    },
    {
      "epoch": 1.0775706470120248,
      "grad_norm": 3.265625,
      "learning_rate": 3.560065006710956e-05,
      "loss": 0.8971,
      "step": 307460
    },
    {
      "epoch": 1.0776056945189203,
      "grad_norm": 2.796875,
      "learning_rate": 3.5600001038445864e-05,
      "loss": 0.7594,
      "step": 307470
    },
    {
      "epoch": 1.077640742025816,
      "grad_norm": 2.515625,
      "learning_rate": 3.559935200978216e-05,
      "loss": 0.9337,
      "step": 307480
    },
    {
      "epoch": 1.0776757895327116,
      "grad_norm": 2.984375,
      "learning_rate": 3.559870298111846e-05,
      "loss": 0.8585,
      "step": 307490
    },
    {
      "epoch": 1.0777108370396071,
      "grad_norm": 3.125,
      "learning_rate": 3.5598053952454755e-05,
      "loss": 0.8684,
      "step": 307500
    },
    {
      "epoch": 1.0777458845465029,
      "grad_norm": 3.140625,
      "learning_rate": 3.5597404923791056e-05,
      "loss": 0.8313,
      "step": 307510
    },
    {
      "epoch": 1.0777809320533984,
      "grad_norm": 2.640625,
      "learning_rate": 3.559675589512735e-05,
      "loss": 0.7933,
      "step": 307520
    },
    {
      "epoch": 1.077815979560294,
      "grad_norm": 2.59375,
      "learning_rate": 3.559610686646365e-05,
      "loss": 0.888,
      "step": 307530
    },
    {
      "epoch": 1.0778510270671895,
      "grad_norm": 3.03125,
      "learning_rate": 3.559545783779995e-05,
      "loss": 0.8753,
      "step": 307540
    },
    {
      "epoch": 1.0778860745740853,
      "grad_norm": 2.6875,
      "learning_rate": 3.559480880913625e-05,
      "loss": 0.8224,
      "step": 307550
    },
    {
      "epoch": 1.0779211220809808,
      "grad_norm": 3.03125,
      "learning_rate": 3.559415978047255e-05,
      "loss": 0.7977,
      "step": 307560
    },
    {
      "epoch": 1.0779561695878763,
      "grad_norm": 3.0625,
      "learning_rate": 3.5593510751808844e-05,
      "loss": 0.8286,
      "step": 307570
    },
    {
      "epoch": 1.0779912170947719,
      "grad_norm": 3.0625,
      "learning_rate": 3.5592861723145145e-05,
      "loss": 0.8836,
      "step": 307580
    },
    {
      "epoch": 1.0780262646016676,
      "grad_norm": 3.15625,
      "learning_rate": 3.559221269448144e-05,
      "loss": 0.8729,
      "step": 307590
    },
    {
      "epoch": 1.0780613121085632,
      "grad_norm": 3.3125,
      "learning_rate": 3.559156366581774e-05,
      "loss": 0.9005,
      "step": 307600
    },
    {
      "epoch": 1.0780963596154587,
      "grad_norm": 2.953125,
      "learning_rate": 3.5590914637154036e-05,
      "loss": 0.8257,
      "step": 307610
    },
    {
      "epoch": 1.0781314071223544,
      "grad_norm": 3.078125,
      "learning_rate": 3.559026560849034e-05,
      "loss": 0.8811,
      "step": 307620
    },
    {
      "epoch": 1.07816645462925,
      "grad_norm": 2.828125,
      "learning_rate": 3.558961657982663e-05,
      "loss": 0.9054,
      "step": 307630
    },
    {
      "epoch": 1.0782015021361455,
      "grad_norm": 2.9375,
      "learning_rate": 3.558896755116293e-05,
      "loss": 0.8833,
      "step": 307640
    },
    {
      "epoch": 1.078236549643041,
      "grad_norm": 2.671875,
      "learning_rate": 3.558831852249923e-05,
      "loss": 0.8385,
      "step": 307650
    },
    {
      "epoch": 1.0782715971499368,
      "grad_norm": 3.296875,
      "learning_rate": 3.558766949383553e-05,
      "loss": 0.8717,
      "step": 307660
    },
    {
      "epoch": 1.0783066446568323,
      "grad_norm": 2.953125,
      "learning_rate": 3.558702046517183e-05,
      "loss": 0.9131,
      "step": 307670
    },
    {
      "epoch": 1.0783416921637279,
      "grad_norm": 3.078125,
      "learning_rate": 3.5586371436508125e-05,
      "loss": 0.8585,
      "step": 307680
    },
    {
      "epoch": 1.0783767396706234,
      "grad_norm": 2.703125,
      "learning_rate": 3.558572240784443e-05,
      "loss": 0.9467,
      "step": 307690
    },
    {
      "epoch": 1.0784117871775192,
      "grad_norm": 2.921875,
      "learning_rate": 3.558507337918072e-05,
      "loss": 0.9072,
      "step": 307700
    },
    {
      "epoch": 1.0784468346844147,
      "grad_norm": 2.96875,
      "learning_rate": 3.558442435051702e-05,
      "loss": 0.8028,
      "step": 307710
    },
    {
      "epoch": 1.0784818821913102,
      "grad_norm": 3.1875,
      "learning_rate": 3.558377532185332e-05,
      "loss": 0.8982,
      "step": 307720
    },
    {
      "epoch": 1.078516929698206,
      "grad_norm": 2.46875,
      "learning_rate": 3.558312629318961e-05,
      "loss": 0.8413,
      "step": 307730
    },
    {
      "epoch": 1.0785519772051015,
      "grad_norm": 2.828125,
      "learning_rate": 3.5582477264525907e-05,
      "loss": 0.8066,
      "step": 307740
    },
    {
      "epoch": 1.078587024711997,
      "grad_norm": 3.171875,
      "learning_rate": 3.558182823586221e-05,
      "loss": 0.8381,
      "step": 307750
    },
    {
      "epoch": 1.0786220722188926,
      "grad_norm": 2.890625,
      "learning_rate": 3.558117920719851e-05,
      "loss": 0.8333,
      "step": 307760
    },
    {
      "epoch": 1.0786571197257884,
      "grad_norm": 2.625,
      "learning_rate": 3.5580530178534804e-05,
      "loss": 0.8424,
      "step": 307770
    },
    {
      "epoch": 1.078692167232684,
      "grad_norm": 3.3125,
      "learning_rate": 3.5579881149871105e-05,
      "loss": 0.9297,
      "step": 307780
    },
    {
      "epoch": 1.0787272147395794,
      "grad_norm": 3.09375,
      "learning_rate": 3.55792321212074e-05,
      "loss": 0.8654,
      "step": 307790
    },
    {
      "epoch": 1.0787622622464752,
      "grad_norm": 3.40625,
      "learning_rate": 3.55785830925437e-05,
      "loss": 0.9885,
      "step": 307800
    },
    {
      "epoch": 1.0787973097533707,
      "grad_norm": 3.109375,
      "learning_rate": 3.5577934063879996e-05,
      "loss": 0.8437,
      "step": 307810
    },
    {
      "epoch": 1.0788323572602663,
      "grad_norm": 2.90625,
      "learning_rate": 3.55772850352163e-05,
      "loss": 0.8571,
      "step": 307820
    },
    {
      "epoch": 1.0788674047671618,
      "grad_norm": 2.953125,
      "learning_rate": 3.557663600655259e-05,
      "loss": 0.8121,
      "step": 307830
    },
    {
      "epoch": 1.0789024522740576,
      "grad_norm": 2.625,
      "learning_rate": 3.557598697788889e-05,
      "loss": 0.9214,
      "step": 307840
    },
    {
      "epoch": 1.078937499780953,
      "grad_norm": 2.59375,
      "learning_rate": 3.557533794922519e-05,
      "loss": 0.834,
      "step": 307850
    },
    {
      "epoch": 1.0789725472878486,
      "grad_norm": 3.109375,
      "learning_rate": 3.557468892056149e-05,
      "loss": 0.9148,
      "step": 307860
    },
    {
      "epoch": 1.0790075947947442,
      "grad_norm": 2.4375,
      "learning_rate": 3.5574039891897784e-05,
      "loss": 0.7966,
      "step": 307870
    },
    {
      "epoch": 1.07904264230164,
      "grad_norm": 2.6875,
      "learning_rate": 3.5573390863234085e-05,
      "loss": 0.8441,
      "step": 307880
    },
    {
      "epoch": 1.0790776898085355,
      "grad_norm": 2.515625,
      "learning_rate": 3.557274183457038e-05,
      "loss": 0.8793,
      "step": 307890
    },
    {
      "epoch": 1.079112737315431,
      "grad_norm": 2.734375,
      "learning_rate": 3.557209280590668e-05,
      "loss": 0.9145,
      "step": 307900
    },
    {
      "epoch": 1.0791477848223268,
      "grad_norm": 2.6875,
      "learning_rate": 3.557144377724298e-05,
      "loss": 0.9231,
      "step": 307910
    },
    {
      "epoch": 1.0791828323292223,
      "grad_norm": 2.8125,
      "learning_rate": 3.557079474857928e-05,
      "loss": 0.795,
      "step": 307920
    },
    {
      "epoch": 1.0792178798361178,
      "grad_norm": 2.953125,
      "learning_rate": 3.557014571991558e-05,
      "loss": 0.865,
      "step": 307930
    },
    {
      "epoch": 1.0792529273430134,
      "grad_norm": 2.984375,
      "learning_rate": 3.556949669125187e-05,
      "loss": 0.9177,
      "step": 307940
    },
    {
      "epoch": 1.0792879748499091,
      "grad_norm": 2.9375,
      "learning_rate": 3.5568847662588175e-05,
      "loss": 0.8406,
      "step": 307950
    },
    {
      "epoch": 1.0793230223568047,
      "grad_norm": 2.984375,
      "learning_rate": 3.556819863392447e-05,
      "loss": 0.8523,
      "step": 307960
    },
    {
      "epoch": 1.0793580698637002,
      "grad_norm": 3.53125,
      "learning_rate": 3.556754960526077e-05,
      "loss": 0.882,
      "step": 307970
    },
    {
      "epoch": 1.079393117370596,
      "grad_norm": 2.671875,
      "learning_rate": 3.5566900576597065e-05,
      "loss": 0.8369,
      "step": 307980
    },
    {
      "epoch": 1.0794281648774915,
      "grad_norm": 2.890625,
      "learning_rate": 3.556625154793337e-05,
      "loss": 0.9054,
      "step": 307990
    },
    {
      "epoch": 1.079463212384387,
      "grad_norm": 2.546875,
      "learning_rate": 3.556560251926966e-05,
      "loss": 0.8419,
      "step": 308000
    },
    {
      "epoch": 1.0794982598912826,
      "grad_norm": 2.671875,
      "learning_rate": 3.556495349060596e-05,
      "loss": 0.8423,
      "step": 308010
    },
    {
      "epoch": 1.0795333073981783,
      "grad_norm": 3.171875,
      "learning_rate": 3.556430446194226e-05,
      "loss": 0.9006,
      "step": 308020
    },
    {
      "epoch": 1.0795683549050739,
      "grad_norm": 2.796875,
      "learning_rate": 3.556365543327856e-05,
      "loss": 0.8414,
      "step": 308030
    },
    {
      "epoch": 1.0796034024119694,
      "grad_norm": 2.875,
      "learning_rate": 3.556300640461486e-05,
      "loss": 0.8464,
      "step": 308040
    },
    {
      "epoch": 1.079638449918865,
      "grad_norm": 2.90625,
      "learning_rate": 3.5562357375951155e-05,
      "loss": 0.8827,
      "step": 308050
    },
    {
      "epoch": 1.0796734974257607,
      "grad_norm": 2.875,
      "learning_rate": 3.5561708347287456e-05,
      "loss": 0.9264,
      "step": 308060
    },
    {
      "epoch": 1.0797085449326562,
      "grad_norm": 2.78125,
      "learning_rate": 3.556105931862375e-05,
      "loss": 0.8661,
      "step": 308070
    },
    {
      "epoch": 1.0797435924395518,
      "grad_norm": 2.984375,
      "learning_rate": 3.556041028996005e-05,
      "loss": 0.9573,
      "step": 308080
    },
    {
      "epoch": 1.0797786399464475,
      "grad_norm": 2.875,
      "learning_rate": 3.555976126129635e-05,
      "loss": 0.8905,
      "step": 308090
    },
    {
      "epoch": 1.079813687453343,
      "grad_norm": 2.359375,
      "learning_rate": 3.555911223263265e-05,
      "loss": 0.8092,
      "step": 308100
    },
    {
      "epoch": 1.0798487349602386,
      "grad_norm": 2.84375,
      "learning_rate": 3.5558463203968936e-05,
      "loss": 0.8616,
      "step": 308110
    },
    {
      "epoch": 1.0798837824671341,
      "grad_norm": 3.046875,
      "learning_rate": 3.555781417530524e-05,
      "loss": 0.8733,
      "step": 308120
    },
    {
      "epoch": 1.0799188299740299,
      "grad_norm": 2.515625,
      "learning_rate": 3.555716514664154e-05,
      "loss": 0.8468,
      "step": 308130
    },
    {
      "epoch": 1.0799538774809254,
      "grad_norm": 3.28125,
      "learning_rate": 3.555651611797783e-05,
      "loss": 0.8761,
      "step": 308140
    },
    {
      "epoch": 1.079988924987821,
      "grad_norm": 2.953125,
      "learning_rate": 3.5555867089314135e-05,
      "loss": 0.8309,
      "step": 308150
    },
    {
      "epoch": 1.0800239724947165,
      "grad_norm": 3.140625,
      "learning_rate": 3.555521806065043e-05,
      "loss": 0.9461,
      "step": 308160
    },
    {
      "epoch": 1.0800590200016122,
      "grad_norm": 2.90625,
      "learning_rate": 3.555456903198673e-05,
      "loss": 0.8932,
      "step": 308170
    },
    {
      "epoch": 1.0800940675085078,
      "grad_norm": 2.734375,
      "learning_rate": 3.5553920003323025e-05,
      "loss": 0.7892,
      "step": 308180
    },
    {
      "epoch": 1.0801291150154033,
      "grad_norm": 3.125,
      "learning_rate": 3.555327097465933e-05,
      "loss": 0.9202,
      "step": 308190
    },
    {
      "epoch": 1.080164162522299,
      "grad_norm": 2.84375,
      "learning_rate": 3.555262194599562e-05,
      "loss": 0.8986,
      "step": 308200
    },
    {
      "epoch": 1.0801992100291946,
      "grad_norm": 3.015625,
      "learning_rate": 3.555197291733192e-05,
      "loss": 0.9011,
      "step": 308210
    },
    {
      "epoch": 1.0802342575360901,
      "grad_norm": 2.828125,
      "learning_rate": 3.555132388866822e-05,
      "loss": 0.8655,
      "step": 308220
    },
    {
      "epoch": 1.0802693050429857,
      "grad_norm": 2.828125,
      "learning_rate": 3.555067486000452e-05,
      "loss": 0.9242,
      "step": 308230
    },
    {
      "epoch": 1.0803043525498814,
      "grad_norm": 2.40625,
      "learning_rate": 3.555002583134081e-05,
      "loss": 0.8142,
      "step": 308240
    },
    {
      "epoch": 1.080339400056777,
      "grad_norm": 2.953125,
      "learning_rate": 3.5549376802677115e-05,
      "loss": 0.8483,
      "step": 308250
    },
    {
      "epoch": 1.0803744475636725,
      "grad_norm": 2.734375,
      "learning_rate": 3.5548727774013416e-05,
      "loss": 0.8818,
      "step": 308260
    },
    {
      "epoch": 1.080409495070568,
      "grad_norm": 2.921875,
      "learning_rate": 3.554807874534971e-05,
      "loss": 0.8877,
      "step": 308270
    },
    {
      "epoch": 1.0804445425774638,
      "grad_norm": 3.296875,
      "learning_rate": 3.554742971668601e-05,
      "loss": 0.9157,
      "step": 308280
    },
    {
      "epoch": 1.0804795900843593,
      "grad_norm": 3.171875,
      "learning_rate": 3.554678068802231e-05,
      "loss": 0.9243,
      "step": 308290
    },
    {
      "epoch": 1.0805146375912549,
      "grad_norm": 3.140625,
      "learning_rate": 3.554613165935861e-05,
      "loss": 0.8942,
      "step": 308300
    },
    {
      "epoch": 1.0805496850981506,
      "grad_norm": 2.640625,
      "learning_rate": 3.55454826306949e-05,
      "loss": 0.8253,
      "step": 308310
    },
    {
      "epoch": 1.0805847326050462,
      "grad_norm": 2.84375,
      "learning_rate": 3.5544833602031204e-05,
      "loss": 0.8882,
      "step": 308320
    },
    {
      "epoch": 1.0806197801119417,
      "grad_norm": 3.09375,
      "learning_rate": 3.55441845733675e-05,
      "loss": 0.9164,
      "step": 308330
    },
    {
      "epoch": 1.0806548276188372,
      "grad_norm": 2.765625,
      "learning_rate": 3.55435355447038e-05,
      "loss": 0.8924,
      "step": 308340
    },
    {
      "epoch": 1.080689875125733,
      "grad_norm": 2.984375,
      "learning_rate": 3.5542886516040095e-05,
      "loss": 0.874,
      "step": 308350
    },
    {
      "epoch": 1.0807249226326285,
      "grad_norm": 2.984375,
      "learning_rate": 3.5542237487376396e-05,
      "loss": 0.7988,
      "step": 308360
    },
    {
      "epoch": 1.080759970139524,
      "grad_norm": 2.609375,
      "learning_rate": 3.554158845871269e-05,
      "loss": 0.8191,
      "step": 308370
    },
    {
      "epoch": 1.0807950176464196,
      "grad_norm": 2.65625,
      "learning_rate": 3.554093943004899e-05,
      "loss": 0.8173,
      "step": 308380
    },
    {
      "epoch": 1.0808300651533154,
      "grad_norm": 2.515625,
      "learning_rate": 3.554029040138529e-05,
      "loss": 0.9418,
      "step": 308390
    },
    {
      "epoch": 1.080865112660211,
      "grad_norm": 2.859375,
      "learning_rate": 3.553964137272159e-05,
      "loss": 0.8237,
      "step": 308400
    },
    {
      "epoch": 1.0809001601671064,
      "grad_norm": 2.984375,
      "learning_rate": 3.553899234405789e-05,
      "loss": 1.0122,
      "step": 308410
    },
    {
      "epoch": 1.0809352076740022,
      "grad_norm": 2.578125,
      "learning_rate": 3.5538343315394184e-05,
      "loss": 0.8361,
      "step": 308420
    },
    {
      "epoch": 1.0809702551808977,
      "grad_norm": 2.765625,
      "learning_rate": 3.5537694286730486e-05,
      "loss": 0.9431,
      "step": 308430
    },
    {
      "epoch": 1.0810053026877933,
      "grad_norm": 2.515625,
      "learning_rate": 3.553704525806678e-05,
      "loss": 0.7532,
      "step": 308440
    },
    {
      "epoch": 1.0810403501946888,
      "grad_norm": 2.71875,
      "learning_rate": 3.553639622940308e-05,
      "loss": 0.904,
      "step": 308450
    },
    {
      "epoch": 1.0810753977015846,
      "grad_norm": 2.796875,
      "learning_rate": 3.5535747200739376e-05,
      "loss": 0.8563,
      "step": 308460
    },
    {
      "epoch": 1.08111044520848,
      "grad_norm": 2.796875,
      "learning_rate": 3.553509817207568e-05,
      "loss": 0.7833,
      "step": 308470
    },
    {
      "epoch": 1.0811454927153756,
      "grad_norm": 2.71875,
      "learning_rate": 3.5534449143411965e-05,
      "loss": 0.8231,
      "step": 308480
    },
    {
      "epoch": 1.0811805402222714,
      "grad_norm": 2.9375,
      "learning_rate": 3.553380011474827e-05,
      "loss": 0.8451,
      "step": 308490
    },
    {
      "epoch": 1.081215587729167,
      "grad_norm": 2.921875,
      "learning_rate": 3.553315108608457e-05,
      "loss": 0.8738,
      "step": 308500
    },
    {
      "epoch": 1.0812506352360625,
      "grad_norm": 2.96875,
      "learning_rate": 3.553250205742086e-05,
      "loss": 0.8957,
      "step": 308510
    },
    {
      "epoch": 1.081285682742958,
      "grad_norm": 2.84375,
      "learning_rate": 3.5531853028757164e-05,
      "loss": 0.8914,
      "step": 308520
    },
    {
      "epoch": 1.0813207302498538,
      "grad_norm": 2.640625,
      "learning_rate": 3.553120400009346e-05,
      "loss": 0.9179,
      "step": 308530
    },
    {
      "epoch": 1.0813557777567493,
      "grad_norm": 2.796875,
      "learning_rate": 3.553055497142976e-05,
      "loss": 0.8793,
      "step": 308540
    },
    {
      "epoch": 1.0813908252636448,
      "grad_norm": 3.046875,
      "learning_rate": 3.5529905942766055e-05,
      "loss": 0.8512,
      "step": 308550
    },
    {
      "epoch": 1.0814258727705404,
      "grad_norm": 2.703125,
      "learning_rate": 3.5529256914102356e-05,
      "loss": 0.914,
      "step": 308560
    },
    {
      "epoch": 1.0814609202774361,
      "grad_norm": 3.40625,
      "learning_rate": 3.552860788543865e-05,
      "loss": 0.8734,
      "step": 308570
    },
    {
      "epoch": 1.0814959677843317,
      "grad_norm": 2.984375,
      "learning_rate": 3.552795885677495e-05,
      "loss": 0.9401,
      "step": 308580
    },
    {
      "epoch": 1.0815310152912272,
      "grad_norm": 2.734375,
      "learning_rate": 3.552730982811125e-05,
      "loss": 0.8737,
      "step": 308590
    },
    {
      "epoch": 1.081566062798123,
      "grad_norm": 2.671875,
      "learning_rate": 3.552666079944755e-05,
      "loss": 0.845,
      "step": 308600
    },
    {
      "epoch": 1.0816011103050185,
      "grad_norm": 3.0625,
      "learning_rate": 3.552601177078384e-05,
      "loss": 0.8023,
      "step": 308610
    },
    {
      "epoch": 1.081636157811914,
      "grad_norm": 2.828125,
      "learning_rate": 3.5525362742120144e-05,
      "loss": 0.8403,
      "step": 308620
    },
    {
      "epoch": 1.0816712053188096,
      "grad_norm": 3.375,
      "learning_rate": 3.5524713713456446e-05,
      "loss": 0.8495,
      "step": 308630
    },
    {
      "epoch": 1.0817062528257053,
      "grad_norm": 3.078125,
      "learning_rate": 3.552406468479274e-05,
      "loss": 0.8363,
      "step": 308640
    },
    {
      "epoch": 1.0817413003326009,
      "grad_norm": 3.140625,
      "learning_rate": 3.552341565612904e-05,
      "loss": 0.9275,
      "step": 308650
    },
    {
      "epoch": 1.0817763478394964,
      "grad_norm": 3.03125,
      "learning_rate": 3.5522766627465336e-05,
      "loss": 0.8001,
      "step": 308660
    },
    {
      "epoch": 1.0818113953463921,
      "grad_norm": 3.578125,
      "learning_rate": 3.552211759880164e-05,
      "loss": 0.8699,
      "step": 308670
    },
    {
      "epoch": 1.0818464428532877,
      "grad_norm": 2.859375,
      "learning_rate": 3.552146857013793e-05,
      "loss": 0.7906,
      "step": 308680
    },
    {
      "epoch": 1.0818814903601832,
      "grad_norm": 3.125,
      "learning_rate": 3.5520819541474234e-05,
      "loss": 0.863,
      "step": 308690
    },
    {
      "epoch": 1.0819165378670788,
      "grad_norm": 3.0625,
      "learning_rate": 3.552017051281053e-05,
      "loss": 0.8639,
      "step": 308700
    },
    {
      "epoch": 1.0819515853739745,
      "grad_norm": 2.84375,
      "learning_rate": 3.551952148414683e-05,
      "loss": 0.8081,
      "step": 308710
    },
    {
      "epoch": 1.08198663288087,
      "grad_norm": 2.6875,
      "learning_rate": 3.5518872455483124e-05,
      "loss": 0.86,
      "step": 308720
    },
    {
      "epoch": 1.0820216803877656,
      "grad_norm": 2.984375,
      "learning_rate": 3.5518223426819426e-05,
      "loss": 0.857,
      "step": 308730
    },
    {
      "epoch": 1.0820567278946611,
      "grad_norm": 2.8125,
      "learning_rate": 3.551757439815572e-05,
      "loss": 0.8882,
      "step": 308740
    },
    {
      "epoch": 1.0820917754015569,
      "grad_norm": 2.90625,
      "learning_rate": 3.551692536949202e-05,
      "loss": 0.8237,
      "step": 308750
    },
    {
      "epoch": 1.0821268229084524,
      "grad_norm": 2.5,
      "learning_rate": 3.5516276340828316e-05,
      "loss": 0.8107,
      "step": 308760
    },
    {
      "epoch": 1.082161870415348,
      "grad_norm": 3.0,
      "learning_rate": 3.551562731216462e-05,
      "loss": 0.9912,
      "step": 308770
    },
    {
      "epoch": 1.0821969179222437,
      "grad_norm": 2.8125,
      "learning_rate": 3.551497828350092e-05,
      "loss": 0.9496,
      "step": 308780
    },
    {
      "epoch": 1.0822319654291392,
      "grad_norm": 2.734375,
      "learning_rate": 3.5514329254837214e-05,
      "loss": 0.8847,
      "step": 308790
    },
    {
      "epoch": 1.0822670129360348,
      "grad_norm": 3.0625,
      "learning_rate": 3.5513680226173515e-05,
      "loss": 0.9062,
      "step": 308800
    },
    {
      "epoch": 1.0823020604429303,
      "grad_norm": 2.78125,
      "learning_rate": 3.551303119750981e-05,
      "loss": 0.8699,
      "step": 308810
    },
    {
      "epoch": 1.082337107949826,
      "grad_norm": 2.78125,
      "learning_rate": 3.551238216884611e-05,
      "loss": 0.7927,
      "step": 308820
    },
    {
      "epoch": 1.0823721554567216,
      "grad_norm": 2.890625,
      "learning_rate": 3.5511733140182406e-05,
      "loss": 0.8273,
      "step": 308830
    },
    {
      "epoch": 1.0824072029636171,
      "grad_norm": 3.0625,
      "learning_rate": 3.551108411151871e-05,
      "loss": 0.8834,
      "step": 308840
    },
    {
      "epoch": 1.0824422504705127,
      "grad_norm": 3.0,
      "learning_rate": 3.5510435082855e-05,
      "loss": 0.838,
      "step": 308850
    },
    {
      "epoch": 1.0824772979774084,
      "grad_norm": 2.6875,
      "learning_rate": 3.5509786054191296e-05,
      "loss": 0.8489,
      "step": 308860
    },
    {
      "epoch": 1.082512345484304,
      "grad_norm": 3.078125,
      "learning_rate": 3.55091370255276e-05,
      "loss": 0.935,
      "step": 308870
    },
    {
      "epoch": 1.0825473929911995,
      "grad_norm": 3.28125,
      "learning_rate": 3.550848799686389e-05,
      "loss": 0.8435,
      "step": 308880
    },
    {
      "epoch": 1.0825824404980953,
      "grad_norm": 2.828125,
      "learning_rate": 3.5507838968200194e-05,
      "loss": 0.7946,
      "step": 308890
    },
    {
      "epoch": 1.0826174880049908,
      "grad_norm": 2.40625,
      "learning_rate": 3.550718993953649e-05,
      "loss": 0.8168,
      "step": 308900
    },
    {
      "epoch": 1.0826525355118863,
      "grad_norm": 3.15625,
      "learning_rate": 3.550654091087279e-05,
      "loss": 0.9363,
      "step": 308910
    },
    {
      "epoch": 1.0826875830187819,
      "grad_norm": 2.8125,
      "learning_rate": 3.5505891882209084e-05,
      "loss": 0.7881,
      "step": 308920
    },
    {
      "epoch": 1.0827226305256776,
      "grad_norm": 2.984375,
      "learning_rate": 3.5505242853545386e-05,
      "loss": 0.8943,
      "step": 308930
    },
    {
      "epoch": 1.0827576780325732,
      "grad_norm": 2.875,
      "learning_rate": 3.550459382488168e-05,
      "loss": 0.9128,
      "step": 308940
    },
    {
      "epoch": 1.0827927255394687,
      "grad_norm": 2.75,
      "learning_rate": 3.550394479621798e-05,
      "loss": 0.838,
      "step": 308950
    },
    {
      "epoch": 1.0828277730463642,
      "grad_norm": 2.640625,
      "learning_rate": 3.5503295767554276e-05,
      "loss": 0.8079,
      "step": 308960
    },
    {
      "epoch": 1.08286282055326,
      "grad_norm": 3.0625,
      "learning_rate": 3.550264673889058e-05,
      "loss": 0.8631,
      "step": 308970
    },
    {
      "epoch": 1.0828978680601555,
      "grad_norm": 2.5625,
      "learning_rate": 3.550199771022687e-05,
      "loss": 0.8893,
      "step": 308980
    },
    {
      "epoch": 1.082932915567051,
      "grad_norm": 2.984375,
      "learning_rate": 3.5501348681563174e-05,
      "loss": 0.8389,
      "step": 308990
    },
    {
      "epoch": 1.0829679630739468,
      "grad_norm": 2.953125,
      "learning_rate": 3.5500699652899475e-05,
      "loss": 0.8373,
      "step": 309000
    },
    {
      "epoch": 1.0830030105808424,
      "grad_norm": 3.234375,
      "learning_rate": 3.550005062423577e-05,
      "loss": 0.8643,
      "step": 309010
    },
    {
      "epoch": 1.083038058087738,
      "grad_norm": 2.953125,
      "learning_rate": 3.549940159557207e-05,
      "loss": 0.8516,
      "step": 309020
    },
    {
      "epoch": 1.0830731055946334,
      "grad_norm": 3.34375,
      "learning_rate": 3.5498752566908366e-05,
      "loss": 0.8567,
      "step": 309030
    },
    {
      "epoch": 1.0831081531015292,
      "grad_norm": 3.03125,
      "learning_rate": 3.549810353824467e-05,
      "loss": 0.9539,
      "step": 309040
    },
    {
      "epoch": 1.0831432006084247,
      "grad_norm": 3.125,
      "learning_rate": 3.549745450958096e-05,
      "loss": 0.815,
      "step": 309050
    },
    {
      "epoch": 1.0831782481153203,
      "grad_norm": 3.109375,
      "learning_rate": 3.549680548091726e-05,
      "loss": 0.9143,
      "step": 309060
    },
    {
      "epoch": 1.0832132956222158,
      "grad_norm": 2.78125,
      "learning_rate": 3.549615645225356e-05,
      "loss": 0.8365,
      "step": 309070
    },
    {
      "epoch": 1.0832483431291116,
      "grad_norm": 2.75,
      "learning_rate": 3.549550742358986e-05,
      "loss": 0.838,
      "step": 309080
    },
    {
      "epoch": 1.083283390636007,
      "grad_norm": 2.90625,
      "learning_rate": 3.5494858394926154e-05,
      "loss": 0.8767,
      "step": 309090
    },
    {
      "epoch": 1.0833184381429026,
      "grad_norm": 2.9375,
      "learning_rate": 3.5494209366262455e-05,
      "loss": 0.8473,
      "step": 309100
    },
    {
      "epoch": 1.0833534856497984,
      "grad_norm": 3.109375,
      "learning_rate": 3.549356033759875e-05,
      "loss": 0.8767,
      "step": 309110
    },
    {
      "epoch": 1.083388533156694,
      "grad_norm": 3.203125,
      "learning_rate": 3.549291130893505e-05,
      "loss": 0.9161,
      "step": 309120
    },
    {
      "epoch": 1.0834235806635895,
      "grad_norm": 3.171875,
      "learning_rate": 3.549226228027135e-05,
      "loss": 0.9297,
      "step": 309130
    },
    {
      "epoch": 1.083458628170485,
      "grad_norm": 2.6875,
      "learning_rate": 3.549161325160765e-05,
      "loss": 0.7417,
      "step": 309140
    },
    {
      "epoch": 1.0834936756773808,
      "grad_norm": 2.921875,
      "learning_rate": 3.549096422294395e-05,
      "loss": 0.8681,
      "step": 309150
    },
    {
      "epoch": 1.0835287231842763,
      "grad_norm": 2.75,
      "learning_rate": 3.549031519428024e-05,
      "loss": 0.7598,
      "step": 309160
    },
    {
      "epoch": 1.0835637706911718,
      "grad_norm": 2.921875,
      "learning_rate": 3.5489666165616544e-05,
      "loss": 0.8809,
      "step": 309170
    },
    {
      "epoch": 1.0835988181980676,
      "grad_norm": 2.328125,
      "learning_rate": 3.548901713695284e-05,
      "loss": 0.8391,
      "step": 309180
    },
    {
      "epoch": 1.0836338657049631,
      "grad_norm": 3.21875,
      "learning_rate": 3.548836810828914e-05,
      "loss": 0.9228,
      "step": 309190
    },
    {
      "epoch": 1.0836689132118587,
      "grad_norm": 2.9375,
      "learning_rate": 3.5487719079625435e-05,
      "loss": 0.8259,
      "step": 309200
    },
    {
      "epoch": 1.0837039607187542,
      "grad_norm": 2.90625,
      "learning_rate": 3.5487070050961736e-05,
      "loss": 0.8429,
      "step": 309210
    },
    {
      "epoch": 1.08373900822565,
      "grad_norm": 2.890625,
      "learning_rate": 3.548642102229803e-05,
      "loss": 0.8829,
      "step": 309220
    },
    {
      "epoch": 1.0837740557325455,
      "grad_norm": 2.890625,
      "learning_rate": 3.5485771993634326e-05,
      "loss": 0.8751,
      "step": 309230
    },
    {
      "epoch": 1.083809103239441,
      "grad_norm": 2.90625,
      "learning_rate": 3.548512296497063e-05,
      "loss": 0.8361,
      "step": 309240
    },
    {
      "epoch": 1.0838441507463368,
      "grad_norm": 2.84375,
      "learning_rate": 3.548447393630692e-05,
      "loss": 0.8078,
      "step": 309250
    },
    {
      "epoch": 1.0838791982532323,
      "grad_norm": 2.78125,
      "learning_rate": 3.548382490764322e-05,
      "loss": 0.8496,
      "step": 309260
    },
    {
      "epoch": 1.0839142457601278,
      "grad_norm": 2.890625,
      "learning_rate": 3.548317587897952e-05,
      "loss": 0.8586,
      "step": 309270
    },
    {
      "epoch": 1.0839492932670234,
      "grad_norm": 2.640625,
      "learning_rate": 3.548252685031582e-05,
      "loss": 0.8774,
      "step": 309280
    },
    {
      "epoch": 1.0839843407739191,
      "grad_norm": 3.265625,
      "learning_rate": 3.5481877821652114e-05,
      "loss": 0.8871,
      "step": 309290
    },
    {
      "epoch": 1.0840193882808147,
      "grad_norm": 3.0625,
      "learning_rate": 3.5481228792988415e-05,
      "loss": 0.8917,
      "step": 309300
    },
    {
      "epoch": 1.0840544357877102,
      "grad_norm": 3.03125,
      "learning_rate": 3.548057976432471e-05,
      "loss": 0.7841,
      "step": 309310
    },
    {
      "epoch": 1.0840894832946057,
      "grad_norm": 3.296875,
      "learning_rate": 3.547993073566101e-05,
      "loss": 0.8424,
      "step": 309320
    },
    {
      "epoch": 1.0841245308015015,
      "grad_norm": 2.890625,
      "learning_rate": 3.5479281706997306e-05,
      "loss": 0.9,
      "step": 309330
    },
    {
      "epoch": 1.084159578308397,
      "grad_norm": 2.421875,
      "learning_rate": 3.547863267833361e-05,
      "loss": 0.8282,
      "step": 309340
    },
    {
      "epoch": 1.0841946258152926,
      "grad_norm": 2.984375,
      "learning_rate": 3.54779836496699e-05,
      "loss": 0.8791,
      "step": 309350
    },
    {
      "epoch": 1.0842296733221883,
      "grad_norm": 2.640625,
      "learning_rate": 3.54773346210062e-05,
      "loss": 0.8448,
      "step": 309360
    },
    {
      "epoch": 1.0842647208290839,
      "grad_norm": 2.921875,
      "learning_rate": 3.5476685592342504e-05,
      "loss": 0.8464,
      "step": 309370
    },
    {
      "epoch": 1.0842997683359794,
      "grad_norm": 3.09375,
      "learning_rate": 3.54760365636788e-05,
      "loss": 0.9082,
      "step": 309380
    },
    {
      "epoch": 1.084334815842875,
      "grad_norm": 2.9375,
      "learning_rate": 3.54753875350151e-05,
      "loss": 0.8796,
      "step": 309390
    },
    {
      "epoch": 1.0843698633497707,
      "grad_norm": 2.890625,
      "learning_rate": 3.5474738506351395e-05,
      "loss": 0.91,
      "step": 309400
    },
    {
      "epoch": 1.0844049108566662,
      "grad_norm": 3.09375,
      "learning_rate": 3.5474089477687696e-05,
      "loss": 0.9784,
      "step": 309410
    },
    {
      "epoch": 1.0844399583635618,
      "grad_norm": 2.953125,
      "learning_rate": 3.547344044902399e-05,
      "loss": 0.9398,
      "step": 309420
    },
    {
      "epoch": 1.0844750058704573,
      "grad_norm": 3.40625,
      "learning_rate": 3.547279142036029e-05,
      "loss": 0.8897,
      "step": 309430
    },
    {
      "epoch": 1.084510053377353,
      "grad_norm": 2.578125,
      "learning_rate": 3.547214239169659e-05,
      "loss": 0.8622,
      "step": 309440
    },
    {
      "epoch": 1.0845451008842486,
      "grad_norm": 3.1875,
      "learning_rate": 3.547149336303289e-05,
      "loss": 0.9082,
      "step": 309450
    },
    {
      "epoch": 1.0845801483911441,
      "grad_norm": 2.828125,
      "learning_rate": 3.547084433436918e-05,
      "loss": 0.8399,
      "step": 309460
    },
    {
      "epoch": 1.08461519589804,
      "grad_norm": 3.453125,
      "learning_rate": 3.5470195305705484e-05,
      "loss": 0.8609,
      "step": 309470
    },
    {
      "epoch": 1.0846502434049354,
      "grad_norm": 2.828125,
      "learning_rate": 3.546954627704178e-05,
      "loss": 0.8345,
      "step": 309480
    },
    {
      "epoch": 1.084685290911831,
      "grad_norm": 3.140625,
      "learning_rate": 3.546889724837808e-05,
      "loss": 0.8867,
      "step": 309490
    },
    {
      "epoch": 1.0847203384187265,
      "grad_norm": 3.046875,
      "learning_rate": 3.546824821971438e-05,
      "loss": 0.8625,
      "step": 309500
    },
    {
      "epoch": 1.0847553859256223,
      "grad_norm": 3.171875,
      "learning_rate": 3.5467599191050676e-05,
      "loss": 0.906,
      "step": 309510
    },
    {
      "epoch": 1.0847904334325178,
      "grad_norm": 4.1875,
      "learning_rate": 3.546695016238698e-05,
      "loss": 0.9043,
      "step": 309520
    },
    {
      "epoch": 1.0848254809394133,
      "grad_norm": 2.984375,
      "learning_rate": 3.546630113372327e-05,
      "loss": 0.9361,
      "step": 309530
    },
    {
      "epoch": 1.0848605284463089,
      "grad_norm": 2.59375,
      "learning_rate": 3.5465652105059574e-05,
      "loss": 0.9037,
      "step": 309540
    },
    {
      "epoch": 1.0848955759532046,
      "grad_norm": 2.890625,
      "learning_rate": 3.546500307639587e-05,
      "loss": 0.8266,
      "step": 309550
    },
    {
      "epoch": 1.0849306234601002,
      "grad_norm": 2.9375,
      "learning_rate": 3.546435404773217e-05,
      "loss": 0.7984,
      "step": 309560
    },
    {
      "epoch": 1.0849656709669957,
      "grad_norm": 2.75,
      "learning_rate": 3.5463705019068464e-05,
      "loss": 0.8308,
      "step": 309570
    },
    {
      "epoch": 1.0850007184738915,
      "grad_norm": 3.09375,
      "learning_rate": 3.5463055990404766e-05,
      "loss": 0.9137,
      "step": 309580
    },
    {
      "epoch": 1.085035765980787,
      "grad_norm": 2.796875,
      "learning_rate": 3.546240696174106e-05,
      "loss": 0.849,
      "step": 309590
    },
    {
      "epoch": 1.0850708134876825,
      "grad_norm": 2.5,
      "learning_rate": 3.546175793307736e-05,
      "loss": 0.876,
      "step": 309600
    },
    {
      "epoch": 1.085105860994578,
      "grad_norm": 3.453125,
      "learning_rate": 3.5461108904413656e-05,
      "loss": 0.8985,
      "step": 309610
    },
    {
      "epoch": 1.0851409085014738,
      "grad_norm": 3.171875,
      "learning_rate": 3.546045987574995e-05,
      "loss": 0.8613,
      "step": 309620
    },
    {
      "epoch": 1.0851759560083694,
      "grad_norm": 3.078125,
      "learning_rate": 3.545981084708625e-05,
      "loss": 0.8869,
      "step": 309630
    },
    {
      "epoch": 1.085211003515265,
      "grad_norm": 2.953125,
      "learning_rate": 3.545916181842255e-05,
      "loss": 0.8391,
      "step": 309640
    },
    {
      "epoch": 1.0852460510221604,
      "grad_norm": 2.5,
      "learning_rate": 3.545851278975885e-05,
      "loss": 0.849,
      "step": 309650
    },
    {
      "epoch": 1.0852810985290562,
      "grad_norm": 2.953125,
      "learning_rate": 3.545786376109514e-05,
      "loss": 0.9156,
      "step": 309660
    },
    {
      "epoch": 1.0853161460359517,
      "grad_norm": 3.1875,
      "learning_rate": 3.5457214732431444e-05,
      "loss": 0.8649,
      "step": 309670
    },
    {
      "epoch": 1.0853511935428473,
      "grad_norm": 3.015625,
      "learning_rate": 3.545656570376774e-05,
      "loss": 0.8891,
      "step": 309680
    },
    {
      "epoch": 1.085386241049743,
      "grad_norm": 2.515625,
      "learning_rate": 3.545591667510404e-05,
      "loss": 0.777,
      "step": 309690
    },
    {
      "epoch": 1.0854212885566386,
      "grad_norm": 3.25,
      "learning_rate": 3.5455267646440335e-05,
      "loss": 0.9136,
      "step": 309700
    },
    {
      "epoch": 1.085456336063534,
      "grad_norm": 2.125,
      "learning_rate": 3.5454618617776636e-05,
      "loss": 0.8414,
      "step": 309710
    },
    {
      "epoch": 1.0854913835704296,
      "grad_norm": 2.75,
      "learning_rate": 3.545396958911293e-05,
      "loss": 0.8371,
      "step": 309720
    },
    {
      "epoch": 1.0855264310773254,
      "grad_norm": 2.953125,
      "learning_rate": 3.545332056044923e-05,
      "loss": 0.8394,
      "step": 309730
    },
    {
      "epoch": 1.085561478584221,
      "grad_norm": 2.859375,
      "learning_rate": 3.5452671531785534e-05,
      "loss": 0.8069,
      "step": 309740
    },
    {
      "epoch": 1.0855965260911165,
      "grad_norm": 2.890625,
      "learning_rate": 3.545202250312183e-05,
      "loss": 0.884,
      "step": 309750
    },
    {
      "epoch": 1.085631573598012,
      "grad_norm": 2.765625,
      "learning_rate": 3.545137347445813e-05,
      "loss": 0.874,
      "step": 309760
    },
    {
      "epoch": 1.0856666211049077,
      "grad_norm": 2.8125,
      "learning_rate": 3.5450724445794424e-05,
      "loss": 0.8202,
      "step": 309770
    },
    {
      "epoch": 1.0857016686118033,
      "grad_norm": 3.09375,
      "learning_rate": 3.5450075417130726e-05,
      "loss": 0.9086,
      "step": 309780
    },
    {
      "epoch": 1.0857367161186988,
      "grad_norm": 3.265625,
      "learning_rate": 3.544942638846702e-05,
      "loss": 0.9777,
      "step": 309790
    },
    {
      "epoch": 1.0857717636255946,
      "grad_norm": 3.265625,
      "learning_rate": 3.544877735980332e-05,
      "loss": 0.9025,
      "step": 309800
    },
    {
      "epoch": 1.0858068111324901,
      "grad_norm": 2.59375,
      "learning_rate": 3.5448128331139616e-05,
      "loss": 0.83,
      "step": 309810
    },
    {
      "epoch": 1.0858418586393856,
      "grad_norm": 2.53125,
      "learning_rate": 3.544747930247592e-05,
      "loss": 0.7906,
      "step": 309820
    },
    {
      "epoch": 1.0858769061462812,
      "grad_norm": 2.921875,
      "learning_rate": 3.544683027381221e-05,
      "loss": 0.9115,
      "step": 309830
    },
    {
      "epoch": 1.085911953653177,
      "grad_norm": 3.1875,
      "learning_rate": 3.5446181245148514e-05,
      "loss": 0.7999,
      "step": 309840
    },
    {
      "epoch": 1.0859470011600725,
      "grad_norm": 3.484375,
      "learning_rate": 3.544553221648481e-05,
      "loss": 0.9427,
      "step": 309850
    },
    {
      "epoch": 1.085982048666968,
      "grad_norm": 3.015625,
      "learning_rate": 3.544488318782111e-05,
      "loss": 0.8467,
      "step": 309860
    },
    {
      "epoch": 1.0860170961738638,
      "grad_norm": 2.640625,
      "learning_rate": 3.544423415915741e-05,
      "loss": 0.8278,
      "step": 309870
    },
    {
      "epoch": 1.0860521436807593,
      "grad_norm": 2.203125,
      "learning_rate": 3.5443585130493706e-05,
      "loss": 0.8423,
      "step": 309880
    },
    {
      "epoch": 1.0860871911876548,
      "grad_norm": 3.125,
      "learning_rate": 3.544293610183001e-05,
      "loss": 0.9036,
      "step": 309890
    },
    {
      "epoch": 1.0861222386945504,
      "grad_norm": 2.765625,
      "learning_rate": 3.54422870731663e-05,
      "loss": 0.8322,
      "step": 309900
    },
    {
      "epoch": 1.0861572862014461,
      "grad_norm": 2.703125,
      "learning_rate": 3.54416380445026e-05,
      "loss": 0.8433,
      "step": 309910
    },
    {
      "epoch": 1.0861923337083417,
      "grad_norm": 3.171875,
      "learning_rate": 3.54409890158389e-05,
      "loss": 0.8729,
      "step": 309920
    },
    {
      "epoch": 1.0862273812152372,
      "grad_norm": 3.0625,
      "learning_rate": 3.54403399871752e-05,
      "loss": 0.8662,
      "step": 309930
    },
    {
      "epoch": 1.086262428722133,
      "grad_norm": 3.453125,
      "learning_rate": 3.5439690958511494e-05,
      "loss": 0.8853,
      "step": 309940
    },
    {
      "epoch": 1.0862974762290285,
      "grad_norm": 2.53125,
      "learning_rate": 3.5439041929847795e-05,
      "loss": 0.8736,
      "step": 309950
    },
    {
      "epoch": 1.086332523735924,
      "grad_norm": 2.90625,
      "learning_rate": 3.543839290118409e-05,
      "loss": 0.8351,
      "step": 309960
    },
    {
      "epoch": 1.0863675712428196,
      "grad_norm": 3.09375,
      "learning_rate": 3.543774387252039e-05,
      "loss": 0.9879,
      "step": 309970
    },
    {
      "epoch": 1.0864026187497153,
      "grad_norm": 2.953125,
      "learning_rate": 3.5437094843856686e-05,
      "loss": 0.8855,
      "step": 309980
    },
    {
      "epoch": 1.0864376662566109,
      "grad_norm": 2.9375,
      "learning_rate": 3.543644581519298e-05,
      "loss": 0.9173,
      "step": 309990
    },
    {
      "epoch": 1.0864727137635064,
      "grad_norm": 3.078125,
      "learning_rate": 3.543579678652928e-05,
      "loss": 0.8665,
      "step": 310000
    },
    {
      "epoch": 1.0864727137635064,
      "eval_loss": 0.8125312924385071,
      "eval_runtime": 560.119,
      "eval_samples_per_second": 679.206,
      "eval_steps_per_second": 56.6,
      "step": 310000
    },
    {
      "epoch": 1.086507761270402,
      "grad_norm": 3.046875,
      "learning_rate": 3.5435147757865576e-05,
      "loss": 0.9314,
      "step": 310010
    },
    {
      "epoch": 1.0865428087772977,
      "grad_norm": 2.96875,
      "learning_rate": 3.543449872920188e-05,
      "loss": 0.9013,
      "step": 310020
    },
    {
      "epoch": 1.0865778562841932,
      "grad_norm": 2.671875,
      "learning_rate": 3.543384970053817e-05,
      "loss": 0.8304,
      "step": 310030
    },
    {
      "epoch": 1.0866129037910888,
      "grad_norm": 3.03125,
      "learning_rate": 3.5433200671874474e-05,
      "loss": 0.9198,
      "step": 310040
    },
    {
      "epoch": 1.0866479512979845,
      "grad_norm": 2.71875,
      "learning_rate": 3.543255164321077e-05,
      "loss": 0.8152,
      "step": 310050
    },
    {
      "epoch": 1.08668299880488,
      "grad_norm": 3.125,
      "learning_rate": 3.543190261454707e-05,
      "loss": 0.8397,
      "step": 310060
    },
    {
      "epoch": 1.0867180463117756,
      "grad_norm": 3.09375,
      "learning_rate": 3.5431253585883364e-05,
      "loss": 0.9231,
      "step": 310070
    },
    {
      "epoch": 1.0867530938186711,
      "grad_norm": 2.75,
      "learning_rate": 3.5430604557219666e-05,
      "loss": 0.9157,
      "step": 310080
    },
    {
      "epoch": 1.086788141325567,
      "grad_norm": 2.84375,
      "learning_rate": 3.542995552855596e-05,
      "loss": 0.7831,
      "step": 310090
    },
    {
      "epoch": 1.0868231888324624,
      "grad_norm": 3.171875,
      "learning_rate": 3.542930649989226e-05,
      "loss": 0.7919,
      "step": 310100
    },
    {
      "epoch": 1.086858236339358,
      "grad_norm": 3.15625,
      "learning_rate": 3.542865747122856e-05,
      "loss": 0.9327,
      "step": 310110
    },
    {
      "epoch": 1.0868932838462535,
      "grad_norm": 2.453125,
      "learning_rate": 3.542800844256486e-05,
      "loss": 0.7779,
      "step": 310120
    },
    {
      "epoch": 1.0869283313531493,
      "grad_norm": 2.734375,
      "learning_rate": 3.542735941390116e-05,
      "loss": 0.8354,
      "step": 310130
    },
    {
      "epoch": 1.0869633788600448,
      "grad_norm": 2.9375,
      "learning_rate": 3.5426710385237454e-05,
      "loss": 0.9057,
      "step": 310140
    },
    {
      "epoch": 1.0869984263669403,
      "grad_norm": 2.828125,
      "learning_rate": 3.5426061356573755e-05,
      "loss": 0.8299,
      "step": 310150
    },
    {
      "epoch": 1.087033473873836,
      "grad_norm": 2.96875,
      "learning_rate": 3.542541232791005e-05,
      "loss": 0.9501,
      "step": 310160
    },
    {
      "epoch": 1.0870685213807316,
      "grad_norm": 3.46875,
      "learning_rate": 3.542476329924635e-05,
      "loss": 0.8715,
      "step": 310170
    },
    {
      "epoch": 1.0871035688876272,
      "grad_norm": 3.4375,
      "learning_rate": 3.5424114270582646e-05,
      "loss": 0.7971,
      "step": 310180
    },
    {
      "epoch": 1.0871386163945227,
      "grad_norm": 2.984375,
      "learning_rate": 3.542346524191895e-05,
      "loss": 0.8579,
      "step": 310190
    },
    {
      "epoch": 1.0871736639014185,
      "grad_norm": 2.921875,
      "learning_rate": 3.542281621325524e-05,
      "loss": 0.861,
      "step": 310200
    },
    {
      "epoch": 1.087208711408314,
      "grad_norm": 2.875,
      "learning_rate": 3.542216718459154e-05,
      "loss": 0.8442,
      "step": 310210
    },
    {
      "epoch": 1.0872437589152095,
      "grad_norm": 2.8125,
      "learning_rate": 3.542151815592784e-05,
      "loss": 0.894,
      "step": 310220
    },
    {
      "epoch": 1.087278806422105,
      "grad_norm": 3.328125,
      "learning_rate": 3.542086912726414e-05,
      "loss": 0.8629,
      "step": 310230
    },
    {
      "epoch": 1.0873138539290008,
      "grad_norm": 2.875,
      "learning_rate": 3.542022009860044e-05,
      "loss": 0.8069,
      "step": 310240
    },
    {
      "epoch": 1.0873489014358964,
      "grad_norm": 2.625,
      "learning_rate": 3.5419571069936735e-05,
      "loss": 0.8789,
      "step": 310250
    },
    {
      "epoch": 1.087383948942792,
      "grad_norm": 3.234375,
      "learning_rate": 3.5418922041273036e-05,
      "loss": 0.8495,
      "step": 310260
    },
    {
      "epoch": 1.0874189964496876,
      "grad_norm": 3.078125,
      "learning_rate": 3.541827301260933e-05,
      "loss": 0.8692,
      "step": 310270
    },
    {
      "epoch": 1.0874540439565832,
      "grad_norm": 3.078125,
      "learning_rate": 3.541762398394563e-05,
      "loss": 0.8443,
      "step": 310280
    },
    {
      "epoch": 1.0874890914634787,
      "grad_norm": 2.953125,
      "learning_rate": 3.541697495528193e-05,
      "loss": 0.8854,
      "step": 310290
    },
    {
      "epoch": 1.0875241389703743,
      "grad_norm": 3.328125,
      "learning_rate": 3.541632592661823e-05,
      "loss": 0.8174,
      "step": 310300
    },
    {
      "epoch": 1.08755918647727,
      "grad_norm": 3.171875,
      "learning_rate": 3.541567689795452e-05,
      "loss": 0.855,
      "step": 310310
    },
    {
      "epoch": 1.0875942339841655,
      "grad_norm": 2.734375,
      "learning_rate": 3.5415027869290824e-05,
      "loss": 0.8085,
      "step": 310320
    },
    {
      "epoch": 1.087629281491061,
      "grad_norm": 3.375,
      "learning_rate": 3.541437884062712e-05,
      "loss": 0.8952,
      "step": 310330
    },
    {
      "epoch": 1.0876643289979566,
      "grad_norm": 3.0,
      "learning_rate": 3.541372981196342e-05,
      "loss": 1.0217,
      "step": 310340
    },
    {
      "epoch": 1.0876993765048524,
      "grad_norm": 2.40625,
      "learning_rate": 3.5413080783299715e-05,
      "loss": 0.8989,
      "step": 310350
    },
    {
      "epoch": 1.087734424011748,
      "grad_norm": 2.84375,
      "learning_rate": 3.541243175463601e-05,
      "loss": 0.8554,
      "step": 310360
    },
    {
      "epoch": 1.0877694715186434,
      "grad_norm": 2.90625,
      "learning_rate": 3.541178272597231e-05,
      "loss": 0.8671,
      "step": 310370
    },
    {
      "epoch": 1.0878045190255392,
      "grad_norm": 2.859375,
      "learning_rate": 3.5411133697308606e-05,
      "loss": 0.8849,
      "step": 310380
    },
    {
      "epoch": 1.0878395665324347,
      "grad_norm": 2.90625,
      "learning_rate": 3.541048466864491e-05,
      "loss": 0.8585,
      "step": 310390
    },
    {
      "epoch": 1.0878746140393303,
      "grad_norm": 3.0625,
      "learning_rate": 3.54098356399812e-05,
      "loss": 0.797,
      "step": 310400
    },
    {
      "epoch": 1.0879096615462258,
      "grad_norm": 2.90625,
      "learning_rate": 3.54091866113175e-05,
      "loss": 0.928,
      "step": 310410
    },
    {
      "epoch": 1.0879447090531216,
      "grad_norm": 2.34375,
      "learning_rate": 3.54085375826538e-05,
      "loss": 0.9119,
      "step": 310420
    },
    {
      "epoch": 1.087979756560017,
      "grad_norm": 2.828125,
      "learning_rate": 3.54078885539901e-05,
      "loss": 0.8897,
      "step": 310430
    },
    {
      "epoch": 1.0880148040669126,
      "grad_norm": 2.875,
      "learning_rate": 3.5407239525326394e-05,
      "loss": 0.8507,
      "step": 310440
    },
    {
      "epoch": 1.0880498515738082,
      "grad_norm": 2.703125,
      "learning_rate": 3.5406590496662695e-05,
      "loss": 0.8418,
      "step": 310450
    },
    {
      "epoch": 1.088084899080704,
      "grad_norm": 3.078125,
      "learning_rate": 3.5405941467998996e-05,
      "loss": 0.8866,
      "step": 310460
    },
    {
      "epoch": 1.0881199465875995,
      "grad_norm": 3.171875,
      "learning_rate": 3.540529243933529e-05,
      "loss": 0.7845,
      "step": 310470
    },
    {
      "epoch": 1.088154994094495,
      "grad_norm": 2.4375,
      "learning_rate": 3.540464341067159e-05,
      "loss": 0.8727,
      "step": 310480
    },
    {
      "epoch": 1.0881900416013908,
      "grad_norm": 2.78125,
      "learning_rate": 3.540399438200789e-05,
      "loss": 0.8461,
      "step": 310490
    },
    {
      "epoch": 1.0882250891082863,
      "grad_norm": 3.015625,
      "learning_rate": 3.540334535334419e-05,
      "loss": 0.8511,
      "step": 310500
    },
    {
      "epoch": 1.0882601366151818,
      "grad_norm": 3.4375,
      "learning_rate": 3.540269632468048e-05,
      "loss": 0.9021,
      "step": 310510
    },
    {
      "epoch": 1.0882951841220774,
      "grad_norm": 2.90625,
      "learning_rate": 3.5402047296016784e-05,
      "loss": 0.8803,
      "step": 310520
    },
    {
      "epoch": 1.0883302316289731,
      "grad_norm": 3.328125,
      "learning_rate": 3.540139826735308e-05,
      "loss": 0.8097,
      "step": 310530
    },
    {
      "epoch": 1.0883652791358687,
      "grad_norm": 2.765625,
      "learning_rate": 3.540074923868938e-05,
      "loss": 0.8401,
      "step": 310540
    },
    {
      "epoch": 1.0884003266427642,
      "grad_norm": 3.03125,
      "learning_rate": 3.5400100210025675e-05,
      "loss": 0.8361,
      "step": 310550
    },
    {
      "epoch": 1.08843537414966,
      "grad_norm": 2.609375,
      "learning_rate": 3.5399451181361976e-05,
      "loss": 0.7661,
      "step": 310560
    },
    {
      "epoch": 1.0884704216565555,
      "grad_norm": 3.546875,
      "learning_rate": 3.539880215269827e-05,
      "loss": 0.758,
      "step": 310570
    },
    {
      "epoch": 1.088505469163451,
      "grad_norm": 2.484375,
      "learning_rate": 3.539815312403457e-05,
      "loss": 0.823,
      "step": 310580
    },
    {
      "epoch": 1.0885405166703466,
      "grad_norm": 3.109375,
      "learning_rate": 3.539750409537087e-05,
      "loss": 0.7783,
      "step": 310590
    },
    {
      "epoch": 1.0885755641772423,
      "grad_norm": 2.828125,
      "learning_rate": 3.539685506670717e-05,
      "loss": 0.8938,
      "step": 310600
    },
    {
      "epoch": 1.0886106116841379,
      "grad_norm": 2.8125,
      "learning_rate": 3.539620603804347e-05,
      "loss": 0.8603,
      "step": 310610
    },
    {
      "epoch": 1.0886456591910334,
      "grad_norm": 2.78125,
      "learning_rate": 3.5395557009379764e-05,
      "loss": 0.8012,
      "step": 310620
    },
    {
      "epoch": 1.0886807066979292,
      "grad_norm": 2.96875,
      "learning_rate": 3.5394907980716066e-05,
      "loss": 0.888,
      "step": 310630
    },
    {
      "epoch": 1.0887157542048247,
      "grad_norm": 2.953125,
      "learning_rate": 3.539425895205236e-05,
      "loss": 0.8878,
      "step": 310640
    },
    {
      "epoch": 1.0887508017117202,
      "grad_norm": 2.921875,
      "learning_rate": 3.539360992338866e-05,
      "loss": 0.8305,
      "step": 310650
    },
    {
      "epoch": 1.0887858492186158,
      "grad_norm": 3.140625,
      "learning_rate": 3.5392960894724956e-05,
      "loss": 0.8626,
      "step": 310660
    },
    {
      "epoch": 1.0888208967255115,
      "grad_norm": 3.375,
      "learning_rate": 3.539231186606126e-05,
      "loss": 0.9218,
      "step": 310670
    },
    {
      "epoch": 1.088855944232407,
      "grad_norm": 3.03125,
      "learning_rate": 3.539166283739755e-05,
      "loss": 0.7975,
      "step": 310680
    },
    {
      "epoch": 1.0888909917393026,
      "grad_norm": 2.671875,
      "learning_rate": 3.5391013808733854e-05,
      "loss": 0.7901,
      "step": 310690
    },
    {
      "epoch": 1.0889260392461981,
      "grad_norm": 3.15625,
      "learning_rate": 3.539036478007015e-05,
      "loss": 0.9193,
      "step": 310700
    },
    {
      "epoch": 1.0889610867530939,
      "grad_norm": 2.828125,
      "learning_rate": 3.538971575140645e-05,
      "loss": 0.8428,
      "step": 310710
    },
    {
      "epoch": 1.0889961342599894,
      "grad_norm": 2.6875,
      "learning_rate": 3.5389066722742744e-05,
      "loss": 0.8693,
      "step": 310720
    },
    {
      "epoch": 1.089031181766885,
      "grad_norm": 2.203125,
      "learning_rate": 3.5388417694079046e-05,
      "loss": 0.7661,
      "step": 310730
    },
    {
      "epoch": 1.0890662292737807,
      "grad_norm": 3.75,
      "learning_rate": 3.538776866541534e-05,
      "loss": 0.8864,
      "step": 310740
    },
    {
      "epoch": 1.0891012767806763,
      "grad_norm": 2.609375,
      "learning_rate": 3.5387119636751635e-05,
      "loss": 0.8829,
      "step": 310750
    },
    {
      "epoch": 1.0891363242875718,
      "grad_norm": 2.515625,
      "learning_rate": 3.5386470608087936e-05,
      "loss": 0.8188,
      "step": 310760
    },
    {
      "epoch": 1.0891713717944673,
      "grad_norm": 3.25,
      "learning_rate": 3.538582157942423e-05,
      "loss": 0.9554,
      "step": 310770
    },
    {
      "epoch": 1.089206419301363,
      "grad_norm": 3.03125,
      "learning_rate": 3.538517255076053e-05,
      "loss": 0.9216,
      "step": 310780
    },
    {
      "epoch": 1.0892414668082586,
      "grad_norm": 2.34375,
      "learning_rate": 3.538452352209683e-05,
      "loss": 0.8093,
      "step": 310790
    },
    {
      "epoch": 1.0892765143151542,
      "grad_norm": 3.453125,
      "learning_rate": 3.538387449343313e-05,
      "loss": 0.8574,
      "step": 310800
    },
    {
      "epoch": 1.0893115618220497,
      "grad_norm": 3.015625,
      "learning_rate": 3.538322546476942e-05,
      "loss": 0.8226,
      "step": 310810
    },
    {
      "epoch": 1.0893466093289454,
      "grad_norm": 3.109375,
      "learning_rate": 3.5382576436105724e-05,
      "loss": 0.8526,
      "step": 310820
    },
    {
      "epoch": 1.089381656835841,
      "grad_norm": 2.875,
      "learning_rate": 3.5381927407442026e-05,
      "loss": 0.9008,
      "step": 310830
    },
    {
      "epoch": 1.0894167043427365,
      "grad_norm": 2.78125,
      "learning_rate": 3.538127837877832e-05,
      "loss": 0.8537,
      "step": 310840
    },
    {
      "epoch": 1.0894517518496323,
      "grad_norm": 2.90625,
      "learning_rate": 3.538062935011462e-05,
      "loss": 0.872,
      "step": 310850
    },
    {
      "epoch": 1.0894867993565278,
      "grad_norm": 3.0625,
      "learning_rate": 3.5379980321450916e-05,
      "loss": 0.9487,
      "step": 310860
    },
    {
      "epoch": 1.0895218468634233,
      "grad_norm": 3.109375,
      "learning_rate": 3.537933129278722e-05,
      "loss": 0.8673,
      "step": 310870
    },
    {
      "epoch": 1.0895568943703189,
      "grad_norm": 2.984375,
      "learning_rate": 3.537868226412351e-05,
      "loss": 0.8482,
      "step": 310880
    },
    {
      "epoch": 1.0895919418772146,
      "grad_norm": 2.953125,
      "learning_rate": 3.5378033235459814e-05,
      "loss": 0.8762,
      "step": 310890
    },
    {
      "epoch": 1.0896269893841102,
      "grad_norm": 2.6875,
      "learning_rate": 3.537738420679611e-05,
      "loss": 0.909,
      "step": 310900
    },
    {
      "epoch": 1.0896620368910057,
      "grad_norm": 3.0625,
      "learning_rate": 3.537673517813241e-05,
      "loss": 0.854,
      "step": 310910
    },
    {
      "epoch": 1.0896970843979012,
      "grad_norm": 4.40625,
      "learning_rate": 3.5376086149468704e-05,
      "loss": 0.7893,
      "step": 310920
    },
    {
      "epoch": 1.089732131904797,
      "grad_norm": 2.828125,
      "learning_rate": 3.5375437120805006e-05,
      "loss": 0.8736,
      "step": 310930
    },
    {
      "epoch": 1.0897671794116925,
      "grad_norm": 3.171875,
      "learning_rate": 3.53747880921413e-05,
      "loss": 0.8984,
      "step": 310940
    },
    {
      "epoch": 1.089802226918588,
      "grad_norm": 2.546875,
      "learning_rate": 3.53741390634776e-05,
      "loss": 0.9039,
      "step": 310950
    },
    {
      "epoch": 1.0898372744254838,
      "grad_norm": 3.1875,
      "learning_rate": 3.5373490034813896e-05,
      "loss": 0.8713,
      "step": 310960
    },
    {
      "epoch": 1.0898723219323794,
      "grad_norm": 2.9375,
      "learning_rate": 3.53728410061502e-05,
      "loss": 0.8945,
      "step": 310970
    },
    {
      "epoch": 1.089907369439275,
      "grad_norm": 2.703125,
      "learning_rate": 3.53721919774865e-05,
      "loss": 0.8239,
      "step": 310980
    },
    {
      "epoch": 1.0899424169461704,
      "grad_norm": 3.140625,
      "learning_rate": 3.5371542948822794e-05,
      "loss": 0.8184,
      "step": 310990
    },
    {
      "epoch": 1.0899774644530662,
      "grad_norm": 2.90625,
      "learning_rate": 3.5370893920159095e-05,
      "loss": 0.9084,
      "step": 311000
    },
    {
      "epoch": 1.0900125119599617,
      "grad_norm": 3.375,
      "learning_rate": 3.537024489149539e-05,
      "loss": 0.8687,
      "step": 311010
    },
    {
      "epoch": 1.0900475594668573,
      "grad_norm": 3.109375,
      "learning_rate": 3.536959586283169e-05,
      "loss": 0.7875,
      "step": 311020
    },
    {
      "epoch": 1.0900826069737528,
      "grad_norm": 3.125,
      "learning_rate": 3.5368946834167986e-05,
      "loss": 0.8348,
      "step": 311030
    },
    {
      "epoch": 1.0901176544806486,
      "grad_norm": 2.78125,
      "learning_rate": 3.536829780550429e-05,
      "loss": 0.8906,
      "step": 311040
    },
    {
      "epoch": 1.090152701987544,
      "grad_norm": 3.140625,
      "learning_rate": 3.536764877684058e-05,
      "loss": 0.9797,
      "step": 311050
    },
    {
      "epoch": 1.0901877494944396,
      "grad_norm": 2.953125,
      "learning_rate": 3.536699974817688e-05,
      "loss": 0.8586,
      "step": 311060
    },
    {
      "epoch": 1.0902227970013354,
      "grad_norm": 3.890625,
      "learning_rate": 3.536635071951318e-05,
      "loss": 0.9104,
      "step": 311070
    },
    {
      "epoch": 1.090257844508231,
      "grad_norm": 2.6875,
      "learning_rate": 3.536570169084948e-05,
      "loss": 0.8925,
      "step": 311080
    },
    {
      "epoch": 1.0902928920151265,
      "grad_norm": 2.71875,
      "learning_rate": 3.5365052662185774e-05,
      "loss": 0.8741,
      "step": 311090
    },
    {
      "epoch": 1.090327939522022,
      "grad_norm": 3.28125,
      "learning_rate": 3.5364403633522075e-05,
      "loss": 0.9337,
      "step": 311100
    },
    {
      "epoch": 1.0903629870289178,
      "grad_norm": 2.765625,
      "learning_rate": 3.536375460485837e-05,
      "loss": 0.914,
      "step": 311110
    },
    {
      "epoch": 1.0903980345358133,
      "grad_norm": 2.53125,
      "learning_rate": 3.5363105576194664e-05,
      "loss": 0.8382,
      "step": 311120
    },
    {
      "epoch": 1.0904330820427088,
      "grad_norm": 2.734375,
      "learning_rate": 3.5362456547530966e-05,
      "loss": 0.9513,
      "step": 311130
    },
    {
      "epoch": 1.0904681295496046,
      "grad_norm": 2.546875,
      "learning_rate": 3.536180751886726e-05,
      "loss": 0.878,
      "step": 311140
    },
    {
      "epoch": 1.0905031770565001,
      "grad_norm": 2.875,
      "learning_rate": 3.536115849020356e-05,
      "loss": 0.899,
      "step": 311150
    },
    {
      "epoch": 1.0905382245633957,
      "grad_norm": 3.4375,
      "learning_rate": 3.5360509461539856e-05,
      "loss": 0.8505,
      "step": 311160
    },
    {
      "epoch": 1.0905732720702912,
      "grad_norm": 3.0625,
      "learning_rate": 3.535986043287616e-05,
      "loss": 0.9091,
      "step": 311170
    },
    {
      "epoch": 1.090608319577187,
      "grad_norm": 2.4375,
      "learning_rate": 3.535921140421245e-05,
      "loss": 0.8336,
      "step": 311180
    },
    {
      "epoch": 1.0906433670840825,
      "grad_norm": 3.203125,
      "learning_rate": 3.5358562375548754e-05,
      "loss": 0.8323,
      "step": 311190
    },
    {
      "epoch": 1.090678414590978,
      "grad_norm": 2.703125,
      "learning_rate": 3.5357913346885055e-05,
      "loss": 0.8357,
      "step": 311200
    },
    {
      "epoch": 1.0907134620978736,
      "grad_norm": 3.09375,
      "learning_rate": 3.535726431822135e-05,
      "loss": 0.8675,
      "step": 311210
    },
    {
      "epoch": 1.0907485096047693,
      "grad_norm": 2.578125,
      "learning_rate": 3.535661528955765e-05,
      "loss": 0.8927,
      "step": 311220
    },
    {
      "epoch": 1.0907835571116649,
      "grad_norm": 2.578125,
      "learning_rate": 3.5355966260893946e-05,
      "loss": 0.876,
      "step": 311230
    },
    {
      "epoch": 1.0908186046185604,
      "grad_norm": 3.0625,
      "learning_rate": 3.535531723223025e-05,
      "loss": 0.843,
      "step": 311240
    },
    {
      "epoch": 1.0908536521254562,
      "grad_norm": 2.53125,
      "learning_rate": 3.535466820356654e-05,
      "loss": 0.7835,
      "step": 311250
    },
    {
      "epoch": 1.0908886996323517,
      "grad_norm": 3.109375,
      "learning_rate": 3.535401917490284e-05,
      "loss": 0.7768,
      "step": 311260
    },
    {
      "epoch": 1.0909237471392472,
      "grad_norm": 3.0,
      "learning_rate": 3.535337014623914e-05,
      "loss": 0.8997,
      "step": 311270
    },
    {
      "epoch": 1.0909587946461428,
      "grad_norm": 3.203125,
      "learning_rate": 3.535272111757544e-05,
      "loss": 0.9188,
      "step": 311280
    },
    {
      "epoch": 1.0909938421530385,
      "grad_norm": 2.703125,
      "learning_rate": 3.5352072088911734e-05,
      "loss": 0.9385,
      "step": 311290
    },
    {
      "epoch": 1.091028889659934,
      "grad_norm": 2.96875,
      "learning_rate": 3.5351423060248035e-05,
      "loss": 0.8838,
      "step": 311300
    },
    {
      "epoch": 1.0910639371668296,
      "grad_norm": 2.828125,
      "learning_rate": 3.535077403158433e-05,
      "loss": 0.8214,
      "step": 311310
    },
    {
      "epoch": 1.0910989846737253,
      "grad_norm": 3.1875,
      "learning_rate": 3.535012500292063e-05,
      "loss": 0.7987,
      "step": 311320
    },
    {
      "epoch": 1.0911340321806209,
      "grad_norm": 3.21875,
      "learning_rate": 3.534947597425693e-05,
      "loss": 0.8827,
      "step": 311330
    },
    {
      "epoch": 1.0911690796875164,
      "grad_norm": 2.875,
      "learning_rate": 3.534882694559323e-05,
      "loss": 0.9461,
      "step": 311340
    },
    {
      "epoch": 1.091204127194412,
      "grad_norm": 2.6875,
      "learning_rate": 3.534817791692953e-05,
      "loss": 0.8328,
      "step": 311350
    },
    {
      "epoch": 1.0912391747013077,
      "grad_norm": 3.078125,
      "learning_rate": 3.534752888826582e-05,
      "loss": 0.9174,
      "step": 311360
    },
    {
      "epoch": 1.0912742222082032,
      "grad_norm": 3.015625,
      "learning_rate": 3.5346879859602125e-05,
      "loss": 0.8655,
      "step": 311370
    },
    {
      "epoch": 1.0913092697150988,
      "grad_norm": 2.953125,
      "learning_rate": 3.534623083093842e-05,
      "loss": 0.8339,
      "step": 311380
    },
    {
      "epoch": 1.0913443172219943,
      "grad_norm": 3.515625,
      "learning_rate": 3.534558180227472e-05,
      "loss": 0.9006,
      "step": 311390
    },
    {
      "epoch": 1.09137936472889,
      "grad_norm": 2.734375,
      "learning_rate": 3.5344932773611015e-05,
      "loss": 0.9174,
      "step": 311400
    },
    {
      "epoch": 1.0914144122357856,
      "grad_norm": 3.203125,
      "learning_rate": 3.5344283744947317e-05,
      "loss": 0.9812,
      "step": 311410
    },
    {
      "epoch": 1.0914494597426811,
      "grad_norm": 2.796875,
      "learning_rate": 3.534363471628361e-05,
      "loss": 0.8112,
      "step": 311420
    },
    {
      "epoch": 1.091484507249577,
      "grad_norm": 3.078125,
      "learning_rate": 3.534298568761991e-05,
      "loss": 0.8874,
      "step": 311430
    },
    {
      "epoch": 1.0915195547564724,
      "grad_norm": 3.234375,
      "learning_rate": 3.534233665895621e-05,
      "loss": 0.9346,
      "step": 311440
    },
    {
      "epoch": 1.091554602263368,
      "grad_norm": 2.875,
      "learning_rate": 3.534168763029251e-05,
      "loss": 0.8114,
      "step": 311450
    },
    {
      "epoch": 1.0915896497702635,
      "grad_norm": 3.234375,
      "learning_rate": 3.53410386016288e-05,
      "loss": 0.8317,
      "step": 311460
    },
    {
      "epoch": 1.0916246972771593,
      "grad_norm": 2.78125,
      "learning_rate": 3.5340389572965105e-05,
      "loss": 0.8766,
      "step": 311470
    },
    {
      "epoch": 1.0916597447840548,
      "grad_norm": 2.890625,
      "learning_rate": 3.5339740544301406e-05,
      "loss": 0.8699,
      "step": 311480
    },
    {
      "epoch": 1.0916947922909503,
      "grad_norm": 2.953125,
      "learning_rate": 3.5339091515637694e-05,
      "loss": 0.9093,
      "step": 311490
    },
    {
      "epoch": 1.0917298397978459,
      "grad_norm": 2.6875,
      "learning_rate": 3.5338442486973995e-05,
      "loss": 0.8235,
      "step": 311500
    },
    {
      "epoch": 1.0917648873047416,
      "grad_norm": 2.453125,
      "learning_rate": 3.533779345831029e-05,
      "loss": 0.8955,
      "step": 311510
    },
    {
      "epoch": 1.0917999348116372,
      "grad_norm": 3.046875,
      "learning_rate": 3.533714442964659e-05,
      "loss": 0.899,
      "step": 311520
    },
    {
      "epoch": 1.0918349823185327,
      "grad_norm": 2.765625,
      "learning_rate": 3.5336495400982886e-05,
      "loss": 0.8752,
      "step": 311530
    },
    {
      "epoch": 1.0918700298254285,
      "grad_norm": 2.875,
      "learning_rate": 3.533584637231919e-05,
      "loss": 0.8913,
      "step": 311540
    },
    {
      "epoch": 1.091905077332324,
      "grad_norm": 3.1875,
      "learning_rate": 3.533519734365548e-05,
      "loss": 0.7892,
      "step": 311550
    },
    {
      "epoch": 1.0919401248392195,
      "grad_norm": 3.15625,
      "learning_rate": 3.533454831499178e-05,
      "loss": 0.8759,
      "step": 311560
    },
    {
      "epoch": 1.091975172346115,
      "grad_norm": 2.734375,
      "learning_rate": 3.5333899286328085e-05,
      "loss": 0.849,
      "step": 311570
    },
    {
      "epoch": 1.0920102198530108,
      "grad_norm": 3.15625,
      "learning_rate": 3.533325025766438e-05,
      "loss": 0.8676,
      "step": 311580
    },
    {
      "epoch": 1.0920452673599064,
      "grad_norm": 3.0625,
      "learning_rate": 3.533260122900068e-05,
      "loss": 0.8537,
      "step": 311590
    },
    {
      "epoch": 1.092080314866802,
      "grad_norm": 3.171875,
      "learning_rate": 3.5331952200336975e-05,
      "loss": 0.8342,
      "step": 311600
    },
    {
      "epoch": 1.0921153623736974,
      "grad_norm": 2.828125,
      "learning_rate": 3.5331303171673277e-05,
      "loss": 0.8686,
      "step": 311610
    },
    {
      "epoch": 1.0921504098805932,
      "grad_norm": 3.3125,
      "learning_rate": 3.533065414300957e-05,
      "loss": 0.8885,
      "step": 311620
    },
    {
      "epoch": 1.0921854573874887,
      "grad_norm": 2.8125,
      "learning_rate": 3.533000511434587e-05,
      "loss": 0.9156,
      "step": 311630
    },
    {
      "epoch": 1.0922205048943843,
      "grad_norm": 3.09375,
      "learning_rate": 3.532935608568217e-05,
      "loss": 0.8937,
      "step": 311640
    },
    {
      "epoch": 1.09225555240128,
      "grad_norm": 3.03125,
      "learning_rate": 3.532870705701847e-05,
      "loss": 0.8394,
      "step": 311650
    },
    {
      "epoch": 1.0922905999081756,
      "grad_norm": 2.640625,
      "learning_rate": 3.532805802835476e-05,
      "loss": 0.9709,
      "step": 311660
    },
    {
      "epoch": 1.092325647415071,
      "grad_norm": 2.484375,
      "learning_rate": 3.5327408999691065e-05,
      "loss": 0.8183,
      "step": 311670
    },
    {
      "epoch": 1.0923606949219666,
      "grad_norm": 3.140625,
      "learning_rate": 3.532675997102736e-05,
      "loss": 0.8314,
      "step": 311680
    },
    {
      "epoch": 1.0923957424288624,
      "grad_norm": 2.71875,
      "learning_rate": 3.532611094236366e-05,
      "loss": 0.8632,
      "step": 311690
    },
    {
      "epoch": 1.092430789935758,
      "grad_norm": 2.9375,
      "learning_rate": 3.532546191369996e-05,
      "loss": 0.8961,
      "step": 311700
    },
    {
      "epoch": 1.0924658374426535,
      "grad_norm": 2.890625,
      "learning_rate": 3.5324812885036257e-05,
      "loss": 0.847,
      "step": 311710
    },
    {
      "epoch": 1.092500884949549,
      "grad_norm": 2.890625,
      "learning_rate": 3.532416385637256e-05,
      "loss": 0.8606,
      "step": 311720
    },
    {
      "epoch": 1.0925359324564448,
      "grad_norm": 2.875,
      "learning_rate": 3.532351482770885e-05,
      "loss": 0.8154,
      "step": 311730
    },
    {
      "epoch": 1.0925709799633403,
      "grad_norm": 3.109375,
      "learning_rate": 3.5322865799045154e-05,
      "loss": 0.8849,
      "step": 311740
    },
    {
      "epoch": 1.0926060274702358,
      "grad_norm": 2.859375,
      "learning_rate": 3.532221677038145e-05,
      "loss": 0.8921,
      "step": 311750
    },
    {
      "epoch": 1.0926410749771316,
      "grad_norm": 2.875,
      "learning_rate": 3.532156774171775e-05,
      "loss": 0.846,
      "step": 311760
    },
    {
      "epoch": 1.0926761224840271,
      "grad_norm": 2.625,
      "learning_rate": 3.5320918713054045e-05,
      "loss": 0.8956,
      "step": 311770
    },
    {
      "epoch": 1.0927111699909227,
      "grad_norm": 3.28125,
      "learning_rate": 3.5320269684390346e-05,
      "loss": 0.9367,
      "step": 311780
    },
    {
      "epoch": 1.0927462174978182,
      "grad_norm": 2.828125,
      "learning_rate": 3.531962065572664e-05,
      "loss": 0.8555,
      "step": 311790
    },
    {
      "epoch": 1.092781265004714,
      "grad_norm": 3.25,
      "learning_rate": 3.531897162706294e-05,
      "loss": 0.8896,
      "step": 311800
    },
    {
      "epoch": 1.0928163125116095,
      "grad_norm": 3.21875,
      "learning_rate": 3.5318322598399237e-05,
      "loss": 0.8957,
      "step": 311810
    },
    {
      "epoch": 1.092851360018505,
      "grad_norm": 3.125,
      "learning_rate": 3.531767356973554e-05,
      "loss": 0.8473,
      "step": 311820
    },
    {
      "epoch": 1.0928864075254008,
      "grad_norm": 2.765625,
      "learning_rate": 3.531702454107183e-05,
      "loss": 0.8871,
      "step": 311830
    },
    {
      "epoch": 1.0929214550322963,
      "grad_norm": 3.71875,
      "learning_rate": 3.5316375512408134e-05,
      "loss": 0.8718,
      "step": 311840
    },
    {
      "epoch": 1.0929565025391919,
      "grad_norm": 2.984375,
      "learning_rate": 3.5315726483744435e-05,
      "loss": 0.8697,
      "step": 311850
    },
    {
      "epoch": 1.0929915500460874,
      "grad_norm": 2.734375,
      "learning_rate": 3.531507745508073e-05,
      "loss": 0.8808,
      "step": 311860
    },
    {
      "epoch": 1.0930265975529831,
      "grad_norm": 2.75,
      "learning_rate": 3.5314428426417025e-05,
      "loss": 0.8263,
      "step": 311870
    },
    {
      "epoch": 1.0930616450598787,
      "grad_norm": 3.921875,
      "learning_rate": 3.531377939775332e-05,
      "loss": 0.862,
      "step": 311880
    },
    {
      "epoch": 1.0930966925667742,
      "grad_norm": 3.21875,
      "learning_rate": 3.531313036908962e-05,
      "loss": 0.8356,
      "step": 311890
    },
    {
      "epoch": 1.09313174007367,
      "grad_norm": 3.046875,
      "learning_rate": 3.5312481340425915e-05,
      "loss": 0.7305,
      "step": 311900
    },
    {
      "epoch": 1.0931667875805655,
      "grad_norm": 2.984375,
      "learning_rate": 3.5311832311762217e-05,
      "loss": 0.8492,
      "step": 311910
    },
    {
      "epoch": 1.093201835087461,
      "grad_norm": 2.90625,
      "learning_rate": 3.531118328309851e-05,
      "loss": 0.8365,
      "step": 311920
    },
    {
      "epoch": 1.0932368825943566,
      "grad_norm": 2.5625,
      "learning_rate": 3.531053425443481e-05,
      "loss": 0.8604,
      "step": 311930
    },
    {
      "epoch": 1.0932719301012523,
      "grad_norm": 2.90625,
      "learning_rate": 3.5309885225771114e-05,
      "loss": 0.8932,
      "step": 311940
    },
    {
      "epoch": 1.0933069776081479,
      "grad_norm": 2.953125,
      "learning_rate": 3.530923619710741e-05,
      "loss": 0.8563,
      "step": 311950
    },
    {
      "epoch": 1.0933420251150434,
      "grad_norm": 3.015625,
      "learning_rate": 3.530858716844371e-05,
      "loss": 0.8626,
      "step": 311960
    },
    {
      "epoch": 1.093377072621939,
      "grad_norm": 3.0625,
      "learning_rate": 3.5307938139780005e-05,
      "loss": 0.806,
      "step": 311970
    },
    {
      "epoch": 1.0934121201288347,
      "grad_norm": 2.921875,
      "learning_rate": 3.5307289111116306e-05,
      "loss": 0.8616,
      "step": 311980
    },
    {
      "epoch": 1.0934471676357302,
      "grad_norm": 3.046875,
      "learning_rate": 3.53066400824526e-05,
      "loss": 0.9029,
      "step": 311990
    },
    {
      "epoch": 1.0934822151426258,
      "grad_norm": 2.9375,
      "learning_rate": 3.53059910537889e-05,
      "loss": 0.8285,
      "step": 312000
    },
    {
      "epoch": 1.0935172626495215,
      "grad_norm": 2.59375,
      "learning_rate": 3.5305342025125197e-05,
      "loss": 0.8735,
      "step": 312010
    },
    {
      "epoch": 1.093552310156417,
      "grad_norm": 2.859375,
      "learning_rate": 3.53046929964615e-05,
      "loss": 0.8478,
      "step": 312020
    },
    {
      "epoch": 1.0935873576633126,
      "grad_norm": 2.421875,
      "learning_rate": 3.530404396779779e-05,
      "loss": 0.8183,
      "step": 312030
    },
    {
      "epoch": 1.0936224051702081,
      "grad_norm": 2.8125,
      "learning_rate": 3.5303394939134094e-05,
      "loss": 0.8225,
      "step": 312040
    },
    {
      "epoch": 1.093657452677104,
      "grad_norm": 2.890625,
      "learning_rate": 3.530274591047039e-05,
      "loss": 0.8248,
      "step": 312050
    },
    {
      "epoch": 1.0936925001839994,
      "grad_norm": 2.890625,
      "learning_rate": 3.530209688180669e-05,
      "loss": 0.9232,
      "step": 312060
    },
    {
      "epoch": 1.093727547690895,
      "grad_norm": 3.0625,
      "learning_rate": 3.530144785314299e-05,
      "loss": 0.8527,
      "step": 312070
    },
    {
      "epoch": 1.0937625951977905,
      "grad_norm": 2.953125,
      "learning_rate": 3.5300798824479286e-05,
      "loss": 0.9144,
      "step": 312080
    },
    {
      "epoch": 1.0937976427046863,
      "grad_norm": 3.03125,
      "learning_rate": 3.530014979581559e-05,
      "loss": 0.8624,
      "step": 312090
    },
    {
      "epoch": 1.0938326902115818,
      "grad_norm": 2.90625,
      "learning_rate": 3.529950076715188e-05,
      "loss": 0.7946,
      "step": 312100
    },
    {
      "epoch": 1.0938677377184773,
      "grad_norm": 3.28125,
      "learning_rate": 3.529885173848818e-05,
      "loss": 0.9434,
      "step": 312110
    },
    {
      "epoch": 1.093902785225373,
      "grad_norm": 3.28125,
      "learning_rate": 3.529820270982448e-05,
      "loss": 0.8269,
      "step": 312120
    },
    {
      "epoch": 1.0939378327322686,
      "grad_norm": 2.796875,
      "learning_rate": 3.529755368116078e-05,
      "loss": 0.9153,
      "step": 312130
    },
    {
      "epoch": 1.0939728802391642,
      "grad_norm": 3.0625,
      "learning_rate": 3.5296904652497074e-05,
      "loss": 0.8192,
      "step": 312140
    },
    {
      "epoch": 1.0940079277460597,
      "grad_norm": 3.15625,
      "learning_rate": 3.5296255623833375e-05,
      "loss": 0.9524,
      "step": 312150
    },
    {
      "epoch": 1.0940429752529555,
      "grad_norm": 3.28125,
      "learning_rate": 3.529560659516967e-05,
      "loss": 0.9406,
      "step": 312160
    },
    {
      "epoch": 1.094078022759851,
      "grad_norm": 3.125,
      "learning_rate": 3.529495756650597e-05,
      "loss": 0.9007,
      "step": 312170
    },
    {
      "epoch": 1.0941130702667465,
      "grad_norm": 2.8125,
      "learning_rate": 3.5294308537842266e-05,
      "loss": 0.8296,
      "step": 312180
    },
    {
      "epoch": 1.094148117773642,
      "grad_norm": 3.015625,
      "learning_rate": 3.529365950917857e-05,
      "loss": 0.9417,
      "step": 312190
    },
    {
      "epoch": 1.0941831652805378,
      "grad_norm": 2.53125,
      "learning_rate": 3.529301048051486e-05,
      "loss": 0.7656,
      "step": 312200
    },
    {
      "epoch": 1.0942182127874334,
      "grad_norm": 2.296875,
      "learning_rate": 3.529236145185116e-05,
      "loss": 0.8368,
      "step": 312210
    },
    {
      "epoch": 1.094253260294329,
      "grad_norm": 2.921875,
      "learning_rate": 3.5291712423187465e-05,
      "loss": 0.8465,
      "step": 312220
    },
    {
      "epoch": 1.0942883078012247,
      "grad_norm": 2.984375,
      "learning_rate": 3.529106339452376e-05,
      "loss": 0.8723,
      "step": 312230
    },
    {
      "epoch": 1.0943233553081202,
      "grad_norm": 2.8125,
      "learning_rate": 3.5290414365860054e-05,
      "loss": 0.9632,
      "step": 312240
    },
    {
      "epoch": 1.0943584028150157,
      "grad_norm": 2.921875,
      "learning_rate": 3.528976533719635e-05,
      "loss": 0.8509,
      "step": 312250
    },
    {
      "epoch": 1.0943934503219113,
      "grad_norm": 3.125,
      "learning_rate": 3.528911630853265e-05,
      "loss": 0.8898,
      "step": 312260
    },
    {
      "epoch": 1.094428497828807,
      "grad_norm": 2.890625,
      "learning_rate": 3.5288467279868945e-05,
      "loss": 0.8799,
      "step": 312270
    },
    {
      "epoch": 1.0944635453357026,
      "grad_norm": 3.15625,
      "learning_rate": 3.5287818251205246e-05,
      "loss": 0.7843,
      "step": 312280
    },
    {
      "epoch": 1.094498592842598,
      "grad_norm": 3.546875,
      "learning_rate": 3.528716922254155e-05,
      "loss": 0.9857,
      "step": 312290
    },
    {
      "epoch": 1.0945336403494936,
      "grad_norm": 2.921875,
      "learning_rate": 3.528652019387784e-05,
      "loss": 0.8861,
      "step": 312300
    },
    {
      "epoch": 1.0945686878563894,
      "grad_norm": 3.28125,
      "learning_rate": 3.528587116521414e-05,
      "loss": 0.9288,
      "step": 312310
    },
    {
      "epoch": 1.094603735363285,
      "grad_norm": 2.984375,
      "learning_rate": 3.528522213655044e-05,
      "loss": 0.8638,
      "step": 312320
    },
    {
      "epoch": 1.0946387828701805,
      "grad_norm": 3.234375,
      "learning_rate": 3.528457310788674e-05,
      "loss": 0.8701,
      "step": 312330
    },
    {
      "epoch": 1.0946738303770762,
      "grad_norm": 3.09375,
      "learning_rate": 3.5283924079223034e-05,
      "loss": 0.8389,
      "step": 312340
    },
    {
      "epoch": 1.0947088778839718,
      "grad_norm": 2.90625,
      "learning_rate": 3.5283275050559335e-05,
      "loss": 0.8297,
      "step": 312350
    },
    {
      "epoch": 1.0947439253908673,
      "grad_norm": 2.984375,
      "learning_rate": 3.528262602189563e-05,
      "loss": 0.8863,
      "step": 312360
    },
    {
      "epoch": 1.0947789728977628,
      "grad_norm": 3.109375,
      "learning_rate": 3.528197699323193e-05,
      "loss": 0.8625,
      "step": 312370
    },
    {
      "epoch": 1.0948140204046586,
      "grad_norm": 3.34375,
      "learning_rate": 3.5281327964568226e-05,
      "loss": 0.9335,
      "step": 312380
    },
    {
      "epoch": 1.0948490679115541,
      "grad_norm": 2.96875,
      "learning_rate": 3.528067893590453e-05,
      "loss": 0.8769,
      "step": 312390
    },
    {
      "epoch": 1.0948841154184497,
      "grad_norm": 2.9375,
      "learning_rate": 3.528002990724082e-05,
      "loss": 0.8255,
      "step": 312400
    },
    {
      "epoch": 1.0949191629253452,
      "grad_norm": 3.046875,
      "learning_rate": 3.527938087857712e-05,
      "loss": 0.8504,
      "step": 312410
    },
    {
      "epoch": 1.094954210432241,
      "grad_norm": 3.09375,
      "learning_rate": 3.527873184991342e-05,
      "loss": 0.8635,
      "step": 312420
    },
    {
      "epoch": 1.0949892579391365,
      "grad_norm": 3.015625,
      "learning_rate": 3.527808282124972e-05,
      "loss": 0.8761,
      "step": 312430
    },
    {
      "epoch": 1.095024305446032,
      "grad_norm": 3.0625,
      "learning_rate": 3.527743379258602e-05,
      "loss": 0.8814,
      "step": 312440
    },
    {
      "epoch": 1.0950593529529278,
      "grad_norm": 2.734375,
      "learning_rate": 3.5276784763922315e-05,
      "loss": 0.7798,
      "step": 312450
    },
    {
      "epoch": 1.0950944004598233,
      "grad_norm": 2.75,
      "learning_rate": 3.527613573525862e-05,
      "loss": 0.8541,
      "step": 312460
    },
    {
      "epoch": 1.0951294479667188,
      "grad_norm": 2.859375,
      "learning_rate": 3.527548670659491e-05,
      "loss": 0.8891,
      "step": 312470
    },
    {
      "epoch": 1.0951644954736144,
      "grad_norm": 2.890625,
      "learning_rate": 3.527483767793121e-05,
      "loss": 0.8433,
      "step": 312480
    },
    {
      "epoch": 1.0951995429805101,
      "grad_norm": 2.90625,
      "learning_rate": 3.527418864926751e-05,
      "loss": 0.9095,
      "step": 312490
    },
    {
      "epoch": 1.0952345904874057,
      "grad_norm": 3.0,
      "learning_rate": 3.527353962060381e-05,
      "loss": 0.7972,
      "step": 312500
    },
    {
      "epoch": 1.0952696379943012,
      "grad_norm": 2.75,
      "learning_rate": 3.52728905919401e-05,
      "loss": 1.0065,
      "step": 312510
    },
    {
      "epoch": 1.095304685501197,
      "grad_norm": 3.265625,
      "learning_rate": 3.5272241563276405e-05,
      "loss": 0.9397,
      "step": 312520
    },
    {
      "epoch": 1.0953397330080925,
      "grad_norm": 3.1875,
      "learning_rate": 3.52715925346127e-05,
      "loss": 0.9078,
      "step": 312530
    },
    {
      "epoch": 1.095374780514988,
      "grad_norm": 2.75,
      "learning_rate": 3.5270943505949e-05,
      "loss": 0.8289,
      "step": 312540
    },
    {
      "epoch": 1.0954098280218836,
      "grad_norm": 2.984375,
      "learning_rate": 3.5270294477285295e-05,
      "loss": 0.893,
      "step": 312550
    },
    {
      "epoch": 1.0954448755287793,
      "grad_norm": 2.875,
      "learning_rate": 3.52696454486216e-05,
      "loss": 0.9449,
      "step": 312560
    },
    {
      "epoch": 1.0954799230356749,
      "grad_norm": 2.734375,
      "learning_rate": 3.52689964199579e-05,
      "loss": 0.8448,
      "step": 312570
    },
    {
      "epoch": 1.0955149705425704,
      "grad_norm": 2.703125,
      "learning_rate": 3.526834739129419e-05,
      "loss": 0.8521,
      "step": 312580
    },
    {
      "epoch": 1.0955500180494662,
      "grad_norm": 2.609375,
      "learning_rate": 3.5267698362630494e-05,
      "loss": 0.8702,
      "step": 312590
    },
    {
      "epoch": 1.0955850655563617,
      "grad_norm": 2.703125,
      "learning_rate": 3.526704933396679e-05,
      "loss": 0.8116,
      "step": 312600
    },
    {
      "epoch": 1.0956201130632572,
      "grad_norm": 3.28125,
      "learning_rate": 3.526640030530309e-05,
      "loss": 0.9844,
      "step": 312610
    },
    {
      "epoch": 1.0956551605701528,
      "grad_norm": 2.9375,
      "learning_rate": 3.526575127663938e-05,
      "loss": 0.8421,
      "step": 312620
    },
    {
      "epoch": 1.0956902080770485,
      "grad_norm": 3.390625,
      "learning_rate": 3.526510224797568e-05,
      "loss": 0.8466,
      "step": 312630
    },
    {
      "epoch": 1.095725255583944,
      "grad_norm": 2.71875,
      "learning_rate": 3.5264453219311974e-05,
      "loss": 0.9113,
      "step": 312640
    },
    {
      "epoch": 1.0957603030908396,
      "grad_norm": 2.5625,
      "learning_rate": 3.5263804190648275e-05,
      "loss": 0.8559,
      "step": 312650
    },
    {
      "epoch": 1.0957953505977351,
      "grad_norm": 2.609375,
      "learning_rate": 3.526315516198458e-05,
      "loss": 0.8775,
      "step": 312660
    },
    {
      "epoch": 1.095830398104631,
      "grad_norm": 2.9375,
      "learning_rate": 3.526250613332087e-05,
      "loss": 0.8372,
      "step": 312670
    },
    {
      "epoch": 1.0958654456115264,
      "grad_norm": 2.5625,
      "learning_rate": 3.526185710465717e-05,
      "loss": 0.8329,
      "step": 312680
    },
    {
      "epoch": 1.095900493118422,
      "grad_norm": 2.71875,
      "learning_rate": 3.526120807599347e-05,
      "loss": 0.8246,
      "step": 312690
    },
    {
      "epoch": 1.0959355406253177,
      "grad_norm": 3.21875,
      "learning_rate": 3.526055904732977e-05,
      "loss": 0.8704,
      "step": 312700
    },
    {
      "epoch": 1.0959705881322133,
      "grad_norm": 3.125,
      "learning_rate": 3.525991001866606e-05,
      "loss": 0.9289,
      "step": 312710
    },
    {
      "epoch": 1.0960056356391088,
      "grad_norm": 2.546875,
      "learning_rate": 3.5259260990002365e-05,
      "loss": 0.8715,
      "step": 312720
    },
    {
      "epoch": 1.0960406831460043,
      "grad_norm": 2.84375,
      "learning_rate": 3.525861196133866e-05,
      "loss": 0.8467,
      "step": 312730
    },
    {
      "epoch": 1.0960757306529,
      "grad_norm": 3.078125,
      "learning_rate": 3.525796293267496e-05,
      "loss": 0.8345,
      "step": 312740
    },
    {
      "epoch": 1.0961107781597956,
      "grad_norm": 3.203125,
      "learning_rate": 3.5257313904011255e-05,
      "loss": 0.8345,
      "step": 312750
    },
    {
      "epoch": 1.0961458256666912,
      "grad_norm": 2.65625,
      "learning_rate": 3.525666487534756e-05,
      "loss": 0.815,
      "step": 312760
    },
    {
      "epoch": 1.0961808731735867,
      "grad_norm": 2.625,
      "learning_rate": 3.525601584668385e-05,
      "loss": 0.9166,
      "step": 312770
    },
    {
      "epoch": 1.0962159206804825,
      "grad_norm": 2.375,
      "learning_rate": 3.525536681802015e-05,
      "loss": 0.8037,
      "step": 312780
    },
    {
      "epoch": 1.096250968187378,
      "grad_norm": 3.0625,
      "learning_rate": 3.525471778935645e-05,
      "loss": 0.9078,
      "step": 312790
    },
    {
      "epoch": 1.0962860156942735,
      "grad_norm": 2.96875,
      "learning_rate": 3.525406876069275e-05,
      "loss": 0.8312,
      "step": 312800
    },
    {
      "epoch": 1.0963210632011693,
      "grad_norm": 2.703125,
      "learning_rate": 3.525341973202905e-05,
      "loss": 0.9441,
      "step": 312810
    },
    {
      "epoch": 1.0963561107080648,
      "grad_norm": 2.828125,
      "learning_rate": 3.5252770703365345e-05,
      "loss": 0.7319,
      "step": 312820
    },
    {
      "epoch": 1.0963911582149604,
      "grad_norm": 2.484375,
      "learning_rate": 3.5252121674701646e-05,
      "loss": 0.8334,
      "step": 312830
    },
    {
      "epoch": 1.096426205721856,
      "grad_norm": 3.171875,
      "learning_rate": 3.525147264603794e-05,
      "loss": 0.831,
      "step": 312840
    },
    {
      "epoch": 1.0964612532287517,
      "grad_norm": 3.71875,
      "learning_rate": 3.525082361737424e-05,
      "loss": 0.842,
      "step": 312850
    },
    {
      "epoch": 1.0964963007356472,
      "grad_norm": 2.9375,
      "learning_rate": 3.525017458871054e-05,
      "loss": 0.8942,
      "step": 312860
    },
    {
      "epoch": 1.0965313482425427,
      "grad_norm": 2.765625,
      "learning_rate": 3.524952556004684e-05,
      "loss": 0.8244,
      "step": 312870
    },
    {
      "epoch": 1.0965663957494383,
      "grad_norm": 3.140625,
      "learning_rate": 3.524887653138313e-05,
      "loss": 0.8299,
      "step": 312880
    },
    {
      "epoch": 1.096601443256334,
      "grad_norm": 2.59375,
      "learning_rate": 3.5248227502719434e-05,
      "loss": 0.9012,
      "step": 312890
    },
    {
      "epoch": 1.0966364907632296,
      "grad_norm": 2.90625,
      "learning_rate": 3.524757847405573e-05,
      "loss": 0.8492,
      "step": 312900
    },
    {
      "epoch": 1.096671538270125,
      "grad_norm": 2.59375,
      "learning_rate": 3.524692944539203e-05,
      "loss": 0.8267,
      "step": 312910
    },
    {
      "epoch": 1.0967065857770208,
      "grad_norm": 2.671875,
      "learning_rate": 3.5246280416728325e-05,
      "loss": 0.8828,
      "step": 312920
    },
    {
      "epoch": 1.0967416332839164,
      "grad_norm": 3.046875,
      "learning_rate": 3.5245631388064626e-05,
      "loss": 0.8991,
      "step": 312930
    },
    {
      "epoch": 1.096776680790812,
      "grad_norm": 2.546875,
      "learning_rate": 3.524498235940093e-05,
      "loss": 0.8172,
      "step": 312940
    },
    {
      "epoch": 1.0968117282977075,
      "grad_norm": 2.875,
      "learning_rate": 3.524433333073722e-05,
      "loss": 0.8491,
      "step": 312950
    },
    {
      "epoch": 1.0968467758046032,
      "grad_norm": 3.328125,
      "learning_rate": 3.5243684302073524e-05,
      "loss": 0.8539,
      "step": 312960
    },
    {
      "epoch": 1.0968818233114987,
      "grad_norm": 2.890625,
      "learning_rate": 3.524303527340982e-05,
      "loss": 0.8974,
      "step": 312970
    },
    {
      "epoch": 1.0969168708183943,
      "grad_norm": 2.90625,
      "learning_rate": 3.524238624474612e-05,
      "loss": 0.8454,
      "step": 312980
    },
    {
      "epoch": 1.0969519183252898,
      "grad_norm": 2.6875,
      "learning_rate": 3.5241737216082414e-05,
      "loss": 0.8366,
      "step": 312990
    },
    {
      "epoch": 1.0969869658321856,
      "grad_norm": 2.796875,
      "learning_rate": 3.524108818741871e-05,
      "loss": 0.8849,
      "step": 313000
    },
    {
      "epoch": 1.0970220133390811,
      "grad_norm": 2.671875,
      "learning_rate": 3.5240439158755e-05,
      "loss": 0.8404,
      "step": 313010
    },
    {
      "epoch": 1.0970570608459767,
      "grad_norm": 3.15625,
      "learning_rate": 3.5239790130091305e-05,
      "loss": 0.859,
      "step": 313020
    },
    {
      "epoch": 1.0970921083528724,
      "grad_norm": 2.640625,
      "learning_rate": 3.5239141101427606e-05,
      "loss": 0.7718,
      "step": 313030
    },
    {
      "epoch": 1.097127155859768,
      "grad_norm": 3.15625,
      "learning_rate": 3.52384920727639e-05,
      "loss": 0.8883,
      "step": 313040
    },
    {
      "epoch": 1.0971622033666635,
      "grad_norm": 2.3125,
      "learning_rate": 3.52378430441002e-05,
      "loss": 0.8003,
      "step": 313050
    },
    {
      "epoch": 1.097197250873559,
      "grad_norm": 2.421875,
      "learning_rate": 3.52371940154365e-05,
      "loss": 0.8887,
      "step": 313060
    },
    {
      "epoch": 1.0972322983804548,
      "grad_norm": 2.828125,
      "learning_rate": 3.52365449867728e-05,
      "loss": 0.8757,
      "step": 313070
    },
    {
      "epoch": 1.0972673458873503,
      "grad_norm": 3.234375,
      "learning_rate": 3.523589595810909e-05,
      "loss": 0.9503,
      "step": 313080
    },
    {
      "epoch": 1.0973023933942458,
      "grad_norm": 2.796875,
      "learning_rate": 3.5235246929445394e-05,
      "loss": 0.9197,
      "step": 313090
    },
    {
      "epoch": 1.0973374409011414,
      "grad_norm": 2.765625,
      "learning_rate": 3.523459790078169e-05,
      "loss": 0.9303,
      "step": 313100
    },
    {
      "epoch": 1.0973724884080371,
      "grad_norm": 2.546875,
      "learning_rate": 3.523394887211799e-05,
      "loss": 0.8697,
      "step": 313110
    },
    {
      "epoch": 1.0974075359149327,
      "grad_norm": 2.890625,
      "learning_rate": 3.5233299843454285e-05,
      "loss": 0.8418,
      "step": 313120
    },
    {
      "epoch": 1.0974425834218282,
      "grad_norm": 2.71875,
      "learning_rate": 3.5232650814790586e-05,
      "loss": 0.8637,
      "step": 313130
    },
    {
      "epoch": 1.097477630928724,
      "grad_norm": 2.9375,
      "learning_rate": 3.523200178612688e-05,
      "loss": 0.8202,
      "step": 313140
    },
    {
      "epoch": 1.0975126784356195,
      "grad_norm": 3.265625,
      "learning_rate": 3.523135275746318e-05,
      "loss": 0.8621,
      "step": 313150
    },
    {
      "epoch": 1.097547725942515,
      "grad_norm": 2.734375,
      "learning_rate": 3.523070372879948e-05,
      "loss": 0.8732,
      "step": 313160
    },
    {
      "epoch": 1.0975827734494106,
      "grad_norm": 2.75,
      "learning_rate": 3.523005470013578e-05,
      "loss": 0.7188,
      "step": 313170
    },
    {
      "epoch": 1.0976178209563063,
      "grad_norm": 3.09375,
      "learning_rate": 3.522940567147208e-05,
      "loss": 0.8769,
      "step": 313180
    },
    {
      "epoch": 1.0976528684632019,
      "grad_norm": 2.859375,
      "learning_rate": 3.5228756642808374e-05,
      "loss": 0.9253,
      "step": 313190
    },
    {
      "epoch": 1.0976879159700974,
      "grad_norm": 2.515625,
      "learning_rate": 3.5228107614144676e-05,
      "loss": 0.9013,
      "step": 313200
    },
    {
      "epoch": 1.0977229634769932,
      "grad_norm": 3.203125,
      "learning_rate": 3.522745858548097e-05,
      "loss": 0.8369,
      "step": 313210
    },
    {
      "epoch": 1.0977580109838887,
      "grad_norm": 2.671875,
      "learning_rate": 3.522680955681727e-05,
      "loss": 0.7886,
      "step": 313220
    },
    {
      "epoch": 1.0977930584907842,
      "grad_norm": 3.25,
      "learning_rate": 3.5226160528153566e-05,
      "loss": 0.9044,
      "step": 313230
    },
    {
      "epoch": 1.0978281059976798,
      "grad_norm": 2.90625,
      "learning_rate": 3.522551149948987e-05,
      "loss": 0.9531,
      "step": 313240
    },
    {
      "epoch": 1.0978631535045755,
      "grad_norm": 4.1875,
      "learning_rate": 3.522486247082616e-05,
      "loss": 0.9447,
      "step": 313250
    },
    {
      "epoch": 1.097898201011471,
      "grad_norm": 2.625,
      "learning_rate": 3.5224213442162464e-05,
      "loss": 0.8377,
      "step": 313260
    },
    {
      "epoch": 1.0979332485183666,
      "grad_norm": 3.609375,
      "learning_rate": 3.522356441349876e-05,
      "loss": 0.845,
      "step": 313270
    },
    {
      "epoch": 1.0979682960252624,
      "grad_norm": 2.984375,
      "learning_rate": 3.522291538483506e-05,
      "loss": 0.8362,
      "step": 313280
    },
    {
      "epoch": 1.098003343532158,
      "grad_norm": 2.59375,
      "learning_rate": 3.5222266356171354e-05,
      "loss": 0.8142,
      "step": 313290
    },
    {
      "epoch": 1.0980383910390534,
      "grad_norm": 2.8125,
      "learning_rate": 3.5221617327507656e-05,
      "loss": 0.8628,
      "step": 313300
    },
    {
      "epoch": 1.098073438545949,
      "grad_norm": 2.6875,
      "learning_rate": 3.522096829884396e-05,
      "loss": 0.8314,
      "step": 313310
    },
    {
      "epoch": 1.0981084860528447,
      "grad_norm": 2.921875,
      "learning_rate": 3.522031927018025e-05,
      "loss": 0.8332,
      "step": 313320
    },
    {
      "epoch": 1.0981435335597403,
      "grad_norm": 3.34375,
      "learning_rate": 3.521967024151655e-05,
      "loss": 0.9276,
      "step": 313330
    },
    {
      "epoch": 1.0981785810666358,
      "grad_norm": 3.09375,
      "learning_rate": 3.521902121285285e-05,
      "loss": 0.8726,
      "step": 313340
    },
    {
      "epoch": 1.0982136285735313,
      "grad_norm": 3.015625,
      "learning_rate": 3.521837218418915e-05,
      "loss": 0.8686,
      "step": 313350
    },
    {
      "epoch": 1.098248676080427,
      "grad_norm": 2.578125,
      "learning_rate": 3.5217723155525444e-05,
      "loss": 0.7816,
      "step": 313360
    },
    {
      "epoch": 1.0982837235873226,
      "grad_norm": 3.15625,
      "learning_rate": 3.521707412686174e-05,
      "loss": 0.8351,
      "step": 313370
    },
    {
      "epoch": 1.0983187710942182,
      "grad_norm": 2.71875,
      "learning_rate": 3.521642509819803e-05,
      "loss": 0.8354,
      "step": 313380
    },
    {
      "epoch": 1.098353818601114,
      "grad_norm": 3.265625,
      "learning_rate": 3.5215776069534334e-05,
      "loss": 0.9077,
      "step": 313390
    },
    {
      "epoch": 1.0983888661080095,
      "grad_norm": 3.03125,
      "learning_rate": 3.5215127040870636e-05,
      "loss": 0.8694,
      "step": 313400
    },
    {
      "epoch": 1.098423913614905,
      "grad_norm": 2.296875,
      "learning_rate": 3.521447801220693e-05,
      "loss": 0.9234,
      "step": 313410
    },
    {
      "epoch": 1.0984589611218005,
      "grad_norm": 3.109375,
      "learning_rate": 3.521382898354323e-05,
      "loss": 0.8569,
      "step": 313420
    },
    {
      "epoch": 1.0984940086286963,
      "grad_norm": 3.421875,
      "learning_rate": 3.5213179954879526e-05,
      "loss": 0.9307,
      "step": 313430
    },
    {
      "epoch": 1.0985290561355918,
      "grad_norm": 2.734375,
      "learning_rate": 3.521253092621583e-05,
      "loss": 0.8751,
      "step": 313440
    },
    {
      "epoch": 1.0985641036424874,
      "grad_norm": 2.96875,
      "learning_rate": 3.521188189755212e-05,
      "loss": 0.8192,
      "step": 313450
    },
    {
      "epoch": 1.098599151149383,
      "grad_norm": 3.28125,
      "learning_rate": 3.5211232868888424e-05,
      "loss": 0.9359,
      "step": 313460
    },
    {
      "epoch": 1.0986341986562786,
      "grad_norm": 2.828125,
      "learning_rate": 3.521058384022472e-05,
      "loss": 0.8686,
      "step": 313470
    },
    {
      "epoch": 1.0986692461631742,
      "grad_norm": 3.546875,
      "learning_rate": 3.520993481156102e-05,
      "loss": 0.9326,
      "step": 313480
    },
    {
      "epoch": 1.0987042936700697,
      "grad_norm": 2.453125,
      "learning_rate": 3.5209285782897314e-05,
      "loss": 0.8285,
      "step": 313490
    },
    {
      "epoch": 1.0987393411769655,
      "grad_norm": 3.15625,
      "learning_rate": 3.5208636754233616e-05,
      "loss": 0.887,
      "step": 313500
    },
    {
      "epoch": 1.098774388683861,
      "grad_norm": 2.75,
      "learning_rate": 3.520798772556991e-05,
      "loss": 0.7939,
      "step": 313510
    },
    {
      "epoch": 1.0988094361907565,
      "grad_norm": 2.921875,
      "learning_rate": 3.520733869690621e-05,
      "loss": 0.8624,
      "step": 313520
    },
    {
      "epoch": 1.098844483697652,
      "grad_norm": 3.09375,
      "learning_rate": 3.520668966824251e-05,
      "loss": 0.9292,
      "step": 313530
    },
    {
      "epoch": 1.0988795312045478,
      "grad_norm": 2.796875,
      "learning_rate": 3.520604063957881e-05,
      "loss": 0.8001,
      "step": 313540
    },
    {
      "epoch": 1.0989145787114434,
      "grad_norm": 3.09375,
      "learning_rate": 3.520539161091511e-05,
      "loss": 0.7699,
      "step": 313550
    },
    {
      "epoch": 1.098949626218339,
      "grad_norm": 3.0625,
      "learning_rate": 3.5204742582251404e-05,
      "loss": 0.9024,
      "step": 313560
    },
    {
      "epoch": 1.0989846737252345,
      "grad_norm": 2.828125,
      "learning_rate": 3.5204093553587705e-05,
      "loss": 0.913,
      "step": 313570
    },
    {
      "epoch": 1.0990197212321302,
      "grad_norm": 2.890625,
      "learning_rate": 3.5203444524924e-05,
      "loss": 0.9151,
      "step": 313580
    },
    {
      "epoch": 1.0990547687390257,
      "grad_norm": 2.78125,
      "learning_rate": 3.52027954962603e-05,
      "loss": 0.9639,
      "step": 313590
    },
    {
      "epoch": 1.0990898162459213,
      "grad_norm": 3.15625,
      "learning_rate": 3.5202146467596596e-05,
      "loss": 0.8881,
      "step": 313600
    },
    {
      "epoch": 1.099124863752817,
      "grad_norm": 3.0625,
      "learning_rate": 3.52014974389329e-05,
      "loss": 0.8064,
      "step": 313610
    },
    {
      "epoch": 1.0991599112597126,
      "grad_norm": 2.921875,
      "learning_rate": 3.520084841026919e-05,
      "loss": 0.9405,
      "step": 313620
    },
    {
      "epoch": 1.099194958766608,
      "grad_norm": 2.421875,
      "learning_rate": 3.520019938160549e-05,
      "loss": 0.8188,
      "step": 313630
    },
    {
      "epoch": 1.0992300062735036,
      "grad_norm": 3.109375,
      "learning_rate": 3.519955035294179e-05,
      "loss": 0.7927,
      "step": 313640
    },
    {
      "epoch": 1.0992650537803994,
      "grad_norm": 2.734375,
      "learning_rate": 3.519890132427809e-05,
      "loss": 0.9438,
      "step": 313650
    },
    {
      "epoch": 1.099300101287295,
      "grad_norm": 2.640625,
      "learning_rate": 3.5198252295614384e-05,
      "loss": 0.9371,
      "step": 313660
    },
    {
      "epoch": 1.0993351487941905,
      "grad_norm": 3.171875,
      "learning_rate": 3.5197603266950685e-05,
      "loss": 0.8693,
      "step": 313670
    },
    {
      "epoch": 1.099370196301086,
      "grad_norm": 2.59375,
      "learning_rate": 3.5196954238286986e-05,
      "loss": 0.8296,
      "step": 313680
    },
    {
      "epoch": 1.0994052438079818,
      "grad_norm": 2.484375,
      "learning_rate": 3.519630520962328e-05,
      "loss": 0.8363,
      "step": 313690
    },
    {
      "epoch": 1.0994402913148773,
      "grad_norm": 2.859375,
      "learning_rate": 3.519565618095958e-05,
      "loss": 0.8756,
      "step": 313700
    },
    {
      "epoch": 1.0994753388217728,
      "grad_norm": 3.296875,
      "learning_rate": 3.519500715229588e-05,
      "loss": 0.8569,
      "step": 313710
    },
    {
      "epoch": 1.0995103863286686,
      "grad_norm": 3.234375,
      "learning_rate": 3.519435812363218e-05,
      "loss": 0.8774,
      "step": 313720
    },
    {
      "epoch": 1.0995454338355641,
      "grad_norm": 2.890625,
      "learning_rate": 3.519370909496847e-05,
      "loss": 0.8252,
      "step": 313730
    },
    {
      "epoch": 1.0995804813424597,
      "grad_norm": 2.875,
      "learning_rate": 3.5193060066304774e-05,
      "loss": 0.8027,
      "step": 313740
    },
    {
      "epoch": 1.0996155288493552,
      "grad_norm": 3.1875,
      "learning_rate": 3.519241103764106e-05,
      "loss": 0.8021,
      "step": 313750
    },
    {
      "epoch": 1.099650576356251,
      "grad_norm": 2.75,
      "learning_rate": 3.5191762008977364e-05,
      "loss": 0.816,
      "step": 313760
    },
    {
      "epoch": 1.0996856238631465,
      "grad_norm": 2.515625,
      "learning_rate": 3.5191112980313665e-05,
      "loss": 0.8077,
      "step": 313770
    },
    {
      "epoch": 1.099720671370042,
      "grad_norm": 2.75,
      "learning_rate": 3.519046395164996e-05,
      "loss": 0.8408,
      "step": 313780
    },
    {
      "epoch": 1.0997557188769376,
      "grad_norm": 3.046875,
      "learning_rate": 3.518981492298626e-05,
      "loss": 0.9227,
      "step": 313790
    },
    {
      "epoch": 1.0997907663838333,
      "grad_norm": 3.171875,
      "learning_rate": 3.5189165894322556e-05,
      "loss": 0.844,
      "step": 313800
    },
    {
      "epoch": 1.0998258138907289,
      "grad_norm": 3.28125,
      "learning_rate": 3.518851686565886e-05,
      "loss": 0.897,
      "step": 313810
    },
    {
      "epoch": 1.0998608613976244,
      "grad_norm": 2.71875,
      "learning_rate": 3.518786783699515e-05,
      "loss": 0.9049,
      "step": 313820
    },
    {
      "epoch": 1.0998959089045202,
      "grad_norm": 2.90625,
      "learning_rate": 3.518721880833145e-05,
      "loss": 0.8536,
      "step": 313830
    },
    {
      "epoch": 1.0999309564114157,
      "grad_norm": 3.078125,
      "learning_rate": 3.518656977966775e-05,
      "loss": 0.8859,
      "step": 313840
    },
    {
      "epoch": 1.0999660039183112,
      "grad_norm": 3.484375,
      "learning_rate": 3.518592075100405e-05,
      "loss": 0.8615,
      "step": 313850
    },
    {
      "epoch": 1.1000010514252068,
      "grad_norm": 3.109375,
      "learning_rate": 3.5185271722340344e-05,
      "loss": 0.8611,
      "step": 313860
    },
    {
      "epoch": 1.1000360989321025,
      "grad_norm": 3.125,
      "learning_rate": 3.5184622693676645e-05,
      "loss": 0.8836,
      "step": 313870
    },
    {
      "epoch": 1.100071146438998,
      "grad_norm": 3.796875,
      "learning_rate": 3.518397366501294e-05,
      "loss": 0.8866,
      "step": 313880
    },
    {
      "epoch": 1.1001061939458936,
      "grad_norm": 3.46875,
      "learning_rate": 3.518332463634924e-05,
      "loss": 0.8051,
      "step": 313890
    },
    {
      "epoch": 1.1001412414527894,
      "grad_norm": 3.296875,
      "learning_rate": 3.518267560768554e-05,
      "loss": 0.9083,
      "step": 313900
    },
    {
      "epoch": 1.100176288959685,
      "grad_norm": 3.34375,
      "learning_rate": 3.518202657902184e-05,
      "loss": 0.814,
      "step": 313910
    },
    {
      "epoch": 1.1002113364665804,
      "grad_norm": 2.75,
      "learning_rate": 3.518137755035814e-05,
      "loss": 0.8501,
      "step": 313920
    },
    {
      "epoch": 1.100246383973476,
      "grad_norm": 2.65625,
      "learning_rate": 3.518072852169443e-05,
      "loss": 0.8815,
      "step": 313930
    },
    {
      "epoch": 1.1002814314803717,
      "grad_norm": 2.84375,
      "learning_rate": 3.5180079493030734e-05,
      "loss": 0.8783,
      "step": 313940
    },
    {
      "epoch": 1.1003164789872673,
      "grad_norm": 2.5625,
      "learning_rate": 3.517943046436703e-05,
      "loss": 0.8801,
      "step": 313950
    },
    {
      "epoch": 1.1003515264941628,
      "grad_norm": 2.546875,
      "learning_rate": 3.517878143570333e-05,
      "loss": 0.8751,
      "step": 313960
    },
    {
      "epoch": 1.1003865740010585,
      "grad_norm": 3.125,
      "learning_rate": 3.5178132407039625e-05,
      "loss": 0.9082,
      "step": 313970
    },
    {
      "epoch": 1.100421621507954,
      "grad_norm": 2.890625,
      "learning_rate": 3.5177483378375926e-05,
      "loss": 0.9289,
      "step": 313980
    },
    {
      "epoch": 1.1004566690148496,
      "grad_norm": 2.96875,
      "learning_rate": 3.517683434971222e-05,
      "loss": 0.8655,
      "step": 313990
    },
    {
      "epoch": 1.1004917165217452,
      "grad_norm": 2.859375,
      "learning_rate": 3.517618532104852e-05,
      "loss": 0.8483,
      "step": 314000
    },
    {
      "epoch": 1.100526764028641,
      "grad_norm": 3.140625,
      "learning_rate": 3.517553629238482e-05,
      "loss": 0.9302,
      "step": 314010
    },
    {
      "epoch": 1.1005618115355364,
      "grad_norm": 2.78125,
      "learning_rate": 3.517488726372112e-05,
      "loss": 0.8628,
      "step": 314020
    },
    {
      "epoch": 1.100596859042432,
      "grad_norm": 2.796875,
      "learning_rate": 3.517423823505741e-05,
      "loss": 0.9157,
      "step": 314030
    },
    {
      "epoch": 1.1006319065493275,
      "grad_norm": 2.984375,
      "learning_rate": 3.5173589206393714e-05,
      "loss": 0.8732,
      "step": 314040
    },
    {
      "epoch": 1.1006669540562233,
      "grad_norm": 2.765625,
      "learning_rate": 3.5172940177730016e-05,
      "loss": 0.7912,
      "step": 314050
    },
    {
      "epoch": 1.1007020015631188,
      "grad_norm": 2.765625,
      "learning_rate": 3.517229114906631e-05,
      "loss": 0.831,
      "step": 314060
    },
    {
      "epoch": 1.1007370490700144,
      "grad_norm": 3.125,
      "learning_rate": 3.517164212040261e-05,
      "loss": 0.9289,
      "step": 314070
    },
    {
      "epoch": 1.10077209657691,
      "grad_norm": 2.703125,
      "learning_rate": 3.5170993091738906e-05,
      "loss": 0.8834,
      "step": 314080
    },
    {
      "epoch": 1.1008071440838056,
      "grad_norm": 2.8125,
      "learning_rate": 3.517034406307521e-05,
      "loss": 0.9531,
      "step": 314090
    },
    {
      "epoch": 1.1008421915907012,
      "grad_norm": 2.8125,
      "learning_rate": 3.51696950344115e-05,
      "loss": 0.8112,
      "step": 314100
    },
    {
      "epoch": 1.1008772390975967,
      "grad_norm": 2.71875,
      "learning_rate": 3.5169046005747804e-05,
      "loss": 0.9106,
      "step": 314110
    },
    {
      "epoch": 1.1009122866044925,
      "grad_norm": 3.015625,
      "learning_rate": 3.516839697708409e-05,
      "loss": 0.8207,
      "step": 314120
    },
    {
      "epoch": 1.100947334111388,
      "grad_norm": 3.28125,
      "learning_rate": 3.516774794842039e-05,
      "loss": 0.8487,
      "step": 314130
    },
    {
      "epoch": 1.1009823816182835,
      "grad_norm": 3.109375,
      "learning_rate": 3.5167098919756694e-05,
      "loss": 0.9823,
      "step": 314140
    },
    {
      "epoch": 1.101017429125179,
      "grad_norm": 3.109375,
      "learning_rate": 3.516644989109299e-05,
      "loss": 0.8248,
      "step": 314150
    },
    {
      "epoch": 1.1010524766320748,
      "grad_norm": 3.140625,
      "learning_rate": 3.516580086242929e-05,
      "loss": 0.8254,
      "step": 314160
    },
    {
      "epoch": 1.1010875241389704,
      "grad_norm": 2.640625,
      "learning_rate": 3.5165151833765585e-05,
      "loss": 0.8239,
      "step": 314170
    },
    {
      "epoch": 1.101122571645866,
      "grad_norm": 3.078125,
      "learning_rate": 3.5164502805101886e-05,
      "loss": 0.8956,
      "step": 314180
    },
    {
      "epoch": 1.1011576191527617,
      "grad_norm": 3.234375,
      "learning_rate": 3.516385377643818e-05,
      "loss": 0.9524,
      "step": 314190
    },
    {
      "epoch": 1.1011926666596572,
      "grad_norm": 2.765625,
      "learning_rate": 3.516320474777448e-05,
      "loss": 0.9842,
      "step": 314200
    },
    {
      "epoch": 1.1012277141665527,
      "grad_norm": 3.265625,
      "learning_rate": 3.516255571911078e-05,
      "loss": 0.8407,
      "step": 314210
    },
    {
      "epoch": 1.1012627616734483,
      "grad_norm": 2.78125,
      "learning_rate": 3.516190669044708e-05,
      "loss": 0.8271,
      "step": 314220
    },
    {
      "epoch": 1.101297809180344,
      "grad_norm": 2.828125,
      "learning_rate": 3.516125766178337e-05,
      "loss": 0.8548,
      "step": 314230
    },
    {
      "epoch": 1.1013328566872396,
      "grad_norm": 2.96875,
      "learning_rate": 3.5160608633119674e-05,
      "loss": 0.8821,
      "step": 314240
    },
    {
      "epoch": 1.101367904194135,
      "grad_norm": 3.296875,
      "learning_rate": 3.515995960445597e-05,
      "loss": 0.9074,
      "step": 314250
    },
    {
      "epoch": 1.1014029517010306,
      "grad_norm": 2.953125,
      "learning_rate": 3.515931057579227e-05,
      "loss": 0.8797,
      "step": 314260
    },
    {
      "epoch": 1.1014379992079264,
      "grad_norm": 2.75,
      "learning_rate": 3.515866154712857e-05,
      "loss": 0.8633,
      "step": 314270
    },
    {
      "epoch": 1.101473046714822,
      "grad_norm": 2.625,
      "learning_rate": 3.5158012518464866e-05,
      "loss": 0.8309,
      "step": 314280
    },
    {
      "epoch": 1.1015080942217175,
      "grad_norm": 3.078125,
      "learning_rate": 3.515736348980117e-05,
      "loss": 0.8767,
      "step": 314290
    },
    {
      "epoch": 1.1015431417286132,
      "grad_norm": 2.578125,
      "learning_rate": 3.515671446113746e-05,
      "loss": 0.912,
      "step": 314300
    },
    {
      "epoch": 1.1015781892355088,
      "grad_norm": 2.90625,
      "learning_rate": 3.5156065432473764e-05,
      "loss": 0.8416,
      "step": 314310
    },
    {
      "epoch": 1.1016132367424043,
      "grad_norm": 2.734375,
      "learning_rate": 3.515541640381006e-05,
      "loss": 0.8555,
      "step": 314320
    },
    {
      "epoch": 1.1016482842492998,
      "grad_norm": 2.90625,
      "learning_rate": 3.515476737514636e-05,
      "loss": 0.921,
      "step": 314330
    },
    {
      "epoch": 1.1016833317561956,
      "grad_norm": 3.546875,
      "learning_rate": 3.5154118346482654e-05,
      "loss": 0.89,
      "step": 314340
    },
    {
      "epoch": 1.1017183792630911,
      "grad_norm": 2.90625,
      "learning_rate": 3.5153469317818956e-05,
      "loss": 0.8735,
      "step": 314350
    },
    {
      "epoch": 1.1017534267699867,
      "grad_norm": 2.75,
      "learning_rate": 3.515282028915525e-05,
      "loss": 0.913,
      "step": 314360
    },
    {
      "epoch": 1.1017884742768822,
      "grad_norm": 2.703125,
      "learning_rate": 3.515217126049155e-05,
      "loss": 0.8341,
      "step": 314370
    },
    {
      "epoch": 1.101823521783778,
      "grad_norm": 3.09375,
      "learning_rate": 3.5151522231827846e-05,
      "loss": 0.8932,
      "step": 314380
    },
    {
      "epoch": 1.1018585692906735,
      "grad_norm": 2.890625,
      "learning_rate": 3.515087320316415e-05,
      "loss": 0.772,
      "step": 314390
    },
    {
      "epoch": 1.101893616797569,
      "grad_norm": 3.015625,
      "learning_rate": 3.515022417450045e-05,
      "loss": 0.8731,
      "step": 314400
    },
    {
      "epoch": 1.1019286643044648,
      "grad_norm": 2.84375,
      "learning_rate": 3.5149575145836744e-05,
      "loss": 0.8725,
      "step": 314410
    },
    {
      "epoch": 1.1019637118113603,
      "grad_norm": 2.75,
      "learning_rate": 3.5148926117173045e-05,
      "loss": 0.8902,
      "step": 314420
    },
    {
      "epoch": 1.1019987593182559,
      "grad_norm": 3.1875,
      "learning_rate": 3.514827708850934e-05,
      "loss": 0.8257,
      "step": 314430
    },
    {
      "epoch": 1.1020338068251514,
      "grad_norm": 2.890625,
      "learning_rate": 3.514762805984564e-05,
      "loss": 0.8058,
      "step": 314440
    },
    {
      "epoch": 1.1020688543320472,
      "grad_norm": 3.0,
      "learning_rate": 3.5146979031181936e-05,
      "loss": 0.8518,
      "step": 314450
    },
    {
      "epoch": 1.1021039018389427,
      "grad_norm": 2.875,
      "learning_rate": 3.514633000251824e-05,
      "loss": 0.9054,
      "step": 314460
    },
    {
      "epoch": 1.1021389493458382,
      "grad_norm": 3.265625,
      "learning_rate": 3.514568097385453e-05,
      "loss": 0.9177,
      "step": 314470
    },
    {
      "epoch": 1.102173996852734,
      "grad_norm": 2.9375,
      "learning_rate": 3.514503194519083e-05,
      "loss": 0.8561,
      "step": 314480
    },
    {
      "epoch": 1.1022090443596295,
      "grad_norm": 3.03125,
      "learning_rate": 3.514438291652713e-05,
      "loss": 0.8842,
      "step": 314490
    },
    {
      "epoch": 1.102244091866525,
      "grad_norm": 3.0625,
      "learning_rate": 3.514373388786342e-05,
      "loss": 0.906,
      "step": 314500
    },
    {
      "epoch": 1.1022791393734206,
      "grad_norm": 2.8125,
      "learning_rate": 3.5143084859199724e-05,
      "loss": 0.8956,
      "step": 314510
    },
    {
      "epoch": 1.1023141868803163,
      "grad_norm": 2.6875,
      "learning_rate": 3.514243583053602e-05,
      "loss": 0.8516,
      "step": 314520
    },
    {
      "epoch": 1.1023492343872119,
      "grad_norm": 2.703125,
      "learning_rate": 3.514178680187232e-05,
      "loss": 0.8366,
      "step": 314530
    },
    {
      "epoch": 1.1023842818941074,
      "grad_norm": 3.515625,
      "learning_rate": 3.5141137773208614e-05,
      "loss": 0.9493,
      "step": 314540
    },
    {
      "epoch": 1.102419329401003,
      "grad_norm": 2.90625,
      "learning_rate": 3.5140488744544916e-05,
      "loss": 0.841,
      "step": 314550
    },
    {
      "epoch": 1.1024543769078987,
      "grad_norm": 2.5625,
      "learning_rate": 3.513983971588121e-05,
      "loss": 0.824,
      "step": 314560
    },
    {
      "epoch": 1.1024894244147943,
      "grad_norm": 4.0,
      "learning_rate": 3.513919068721751e-05,
      "loss": 0.8828,
      "step": 314570
    },
    {
      "epoch": 1.1025244719216898,
      "grad_norm": 2.609375,
      "learning_rate": 3.5138541658553806e-05,
      "loss": 0.8139,
      "step": 314580
    },
    {
      "epoch": 1.1025595194285855,
      "grad_norm": 2.625,
      "learning_rate": 3.513789262989011e-05,
      "loss": 0.837,
      "step": 314590
    },
    {
      "epoch": 1.102594566935481,
      "grad_norm": 2.828125,
      "learning_rate": 3.51372436012264e-05,
      "loss": 0.8188,
      "step": 314600
    },
    {
      "epoch": 1.1026296144423766,
      "grad_norm": 2.96875,
      "learning_rate": 3.5136594572562704e-05,
      "loss": 0.8161,
      "step": 314610
    },
    {
      "epoch": 1.1026646619492722,
      "grad_norm": 2.828125,
      "learning_rate": 3.5135945543899e-05,
      "loss": 0.8216,
      "step": 314620
    },
    {
      "epoch": 1.102699709456168,
      "grad_norm": 2.71875,
      "learning_rate": 3.51352965152353e-05,
      "loss": 0.9246,
      "step": 314630
    },
    {
      "epoch": 1.1027347569630634,
      "grad_norm": 2.65625,
      "learning_rate": 3.51346474865716e-05,
      "loss": 0.9172,
      "step": 314640
    },
    {
      "epoch": 1.102769804469959,
      "grad_norm": 2.921875,
      "learning_rate": 3.5133998457907896e-05,
      "loss": 0.8575,
      "step": 314650
    },
    {
      "epoch": 1.1028048519768547,
      "grad_norm": 3.28125,
      "learning_rate": 3.51333494292442e-05,
      "loss": 0.8554,
      "step": 314660
    },
    {
      "epoch": 1.1028398994837503,
      "grad_norm": 3.1875,
      "learning_rate": 3.513270040058049e-05,
      "loss": 0.8725,
      "step": 314670
    },
    {
      "epoch": 1.1028749469906458,
      "grad_norm": 3.15625,
      "learning_rate": 3.513205137191679e-05,
      "loss": 0.8238,
      "step": 314680
    },
    {
      "epoch": 1.1029099944975413,
      "grad_norm": 3.0,
      "learning_rate": 3.513140234325309e-05,
      "loss": 0.8882,
      "step": 314690
    },
    {
      "epoch": 1.102945042004437,
      "grad_norm": 2.328125,
      "learning_rate": 3.513075331458939e-05,
      "loss": 0.804,
      "step": 314700
    },
    {
      "epoch": 1.1029800895113326,
      "grad_norm": 2.828125,
      "learning_rate": 3.5130104285925684e-05,
      "loss": 0.8483,
      "step": 314710
    },
    {
      "epoch": 1.1030151370182282,
      "grad_norm": 3.265625,
      "learning_rate": 3.5129455257261985e-05,
      "loss": 0.8825,
      "step": 314720
    },
    {
      "epoch": 1.1030501845251237,
      "grad_norm": 3.015625,
      "learning_rate": 3.512880622859828e-05,
      "loss": 0.8278,
      "step": 314730
    },
    {
      "epoch": 1.1030852320320195,
      "grad_norm": 3.15625,
      "learning_rate": 3.512815719993458e-05,
      "loss": 0.9261,
      "step": 314740
    },
    {
      "epoch": 1.103120279538915,
      "grad_norm": 2.375,
      "learning_rate": 3.5127508171270876e-05,
      "loss": 0.9575,
      "step": 314750
    },
    {
      "epoch": 1.1031553270458105,
      "grad_norm": 2.921875,
      "learning_rate": 3.512685914260718e-05,
      "loss": 0.8691,
      "step": 314760
    },
    {
      "epoch": 1.1031903745527063,
      "grad_norm": 2.671875,
      "learning_rate": 3.512621011394348e-05,
      "loss": 0.815,
      "step": 314770
    },
    {
      "epoch": 1.1032254220596018,
      "grad_norm": 3.15625,
      "learning_rate": 3.512556108527977e-05,
      "loss": 0.8201,
      "step": 314780
    },
    {
      "epoch": 1.1032604695664974,
      "grad_norm": 3.109375,
      "learning_rate": 3.5124912056616074e-05,
      "loss": 0.869,
      "step": 314790
    },
    {
      "epoch": 1.103295517073393,
      "grad_norm": 3.078125,
      "learning_rate": 3.512426302795237e-05,
      "loss": 0.9278,
      "step": 314800
    },
    {
      "epoch": 1.1033305645802887,
      "grad_norm": 3.015625,
      "learning_rate": 3.512361399928867e-05,
      "loss": 0.8298,
      "step": 314810
    },
    {
      "epoch": 1.1033656120871842,
      "grad_norm": 2.6875,
      "learning_rate": 3.5122964970624965e-05,
      "loss": 0.8327,
      "step": 314820
    },
    {
      "epoch": 1.1034006595940797,
      "grad_norm": 2.84375,
      "learning_rate": 3.5122315941961266e-05,
      "loss": 0.8496,
      "step": 314830
    },
    {
      "epoch": 1.1034357071009753,
      "grad_norm": 2.9375,
      "learning_rate": 3.512166691329756e-05,
      "loss": 0.8697,
      "step": 314840
    },
    {
      "epoch": 1.103470754607871,
      "grad_norm": 2.875,
      "learning_rate": 3.512101788463386e-05,
      "loss": 0.8181,
      "step": 314850
    },
    {
      "epoch": 1.1035058021147666,
      "grad_norm": 2.828125,
      "learning_rate": 3.512036885597016e-05,
      "loss": 0.8193,
      "step": 314860
    },
    {
      "epoch": 1.103540849621662,
      "grad_norm": 3.015625,
      "learning_rate": 3.511971982730646e-05,
      "loss": 0.8307,
      "step": 314870
    },
    {
      "epoch": 1.1035758971285579,
      "grad_norm": 2.953125,
      "learning_rate": 3.511907079864275e-05,
      "loss": 0.8885,
      "step": 314880
    },
    {
      "epoch": 1.1036109446354534,
      "grad_norm": 3.0625,
      "learning_rate": 3.511842176997905e-05,
      "loss": 0.8061,
      "step": 314890
    },
    {
      "epoch": 1.103645992142349,
      "grad_norm": 2.984375,
      "learning_rate": 3.511777274131535e-05,
      "loss": 0.8642,
      "step": 314900
    },
    {
      "epoch": 1.1036810396492445,
      "grad_norm": 3.203125,
      "learning_rate": 3.5117123712651644e-05,
      "loss": 0.8421,
      "step": 314910
    },
    {
      "epoch": 1.1037160871561402,
      "grad_norm": 3.078125,
      "learning_rate": 3.5116474683987945e-05,
      "loss": 0.94,
      "step": 314920
    },
    {
      "epoch": 1.1037511346630358,
      "grad_norm": 2.671875,
      "learning_rate": 3.511582565532424e-05,
      "loss": 0.8686,
      "step": 314930
    },
    {
      "epoch": 1.1037861821699313,
      "grad_norm": 2.953125,
      "learning_rate": 3.511517662666054e-05,
      "loss": 0.8901,
      "step": 314940
    },
    {
      "epoch": 1.1038212296768268,
      "grad_norm": 3.53125,
      "learning_rate": 3.5114527597996836e-05,
      "loss": 0.8264,
      "step": 314950
    },
    {
      "epoch": 1.1038562771837226,
      "grad_norm": 2.421875,
      "learning_rate": 3.511387856933314e-05,
      "loss": 0.8286,
      "step": 314960
    },
    {
      "epoch": 1.1038913246906181,
      "grad_norm": 3.125,
      "learning_rate": 3.511322954066943e-05,
      "loss": 0.8438,
      "step": 314970
    },
    {
      "epoch": 1.1039263721975137,
      "grad_norm": 2.703125,
      "learning_rate": 3.511258051200573e-05,
      "loss": 0.8439,
      "step": 314980
    },
    {
      "epoch": 1.1039614197044094,
      "grad_norm": 3.296875,
      "learning_rate": 3.511193148334203e-05,
      "loss": 0.9175,
      "step": 314990
    },
    {
      "epoch": 1.103996467211305,
      "grad_norm": 2.9375,
      "learning_rate": 3.511128245467833e-05,
      "loss": 0.8252,
      "step": 315000
    },
    {
      "epoch": 1.103996467211305,
      "eval_loss": 0.8117058277130127,
      "eval_runtime": 553.0186,
      "eval_samples_per_second": 687.926,
      "eval_steps_per_second": 57.327,
      "step": 315000
    },
    {
      "epoch": 1.1040315147182005,
      "grad_norm": 3.046875,
      "learning_rate": 3.511063342601463e-05,
      "loss": 0.8955,
      "step": 315010
    },
    {
      "epoch": 1.104066562225096,
      "grad_norm": 2.546875,
      "learning_rate": 3.5109984397350925e-05,
      "loss": 0.8863,
      "step": 315020
    },
    {
      "epoch": 1.1041016097319918,
      "grad_norm": 2.796875,
      "learning_rate": 3.5109335368687226e-05,
      "loss": 0.9357,
      "step": 315030
    },
    {
      "epoch": 1.1041366572388873,
      "grad_norm": 3.28125,
      "learning_rate": 3.510868634002352e-05,
      "loss": 0.8108,
      "step": 315040
    },
    {
      "epoch": 1.1041717047457829,
      "grad_norm": 3.0625,
      "learning_rate": 3.510803731135982e-05,
      "loss": 0.8366,
      "step": 315050
    },
    {
      "epoch": 1.1042067522526784,
      "grad_norm": 2.84375,
      "learning_rate": 3.510738828269612e-05,
      "loss": 0.8157,
      "step": 315060
    },
    {
      "epoch": 1.1042417997595741,
      "grad_norm": 2.75,
      "learning_rate": 3.510673925403242e-05,
      "loss": 0.8186,
      "step": 315070
    },
    {
      "epoch": 1.1042768472664697,
      "grad_norm": 2.765625,
      "learning_rate": 3.510609022536871e-05,
      "loss": 0.8813,
      "step": 315080
    },
    {
      "epoch": 1.1043118947733652,
      "grad_norm": 2.796875,
      "learning_rate": 3.5105441196705014e-05,
      "loss": 0.875,
      "step": 315090
    },
    {
      "epoch": 1.104346942280261,
      "grad_norm": 3.15625,
      "learning_rate": 3.510479216804131e-05,
      "loss": 0.8302,
      "step": 315100
    },
    {
      "epoch": 1.1043819897871565,
      "grad_norm": 3.4375,
      "learning_rate": 3.510414313937761e-05,
      "loss": 0.9149,
      "step": 315110
    },
    {
      "epoch": 1.104417037294052,
      "grad_norm": 3.03125,
      "learning_rate": 3.5103494110713905e-05,
      "loss": 0.8842,
      "step": 315120
    },
    {
      "epoch": 1.1044520848009476,
      "grad_norm": 2.921875,
      "learning_rate": 3.5102845082050206e-05,
      "loss": 0.8272,
      "step": 315130
    },
    {
      "epoch": 1.1044871323078433,
      "grad_norm": 3.421875,
      "learning_rate": 3.510219605338651e-05,
      "loss": 0.889,
      "step": 315140
    },
    {
      "epoch": 1.1045221798147389,
      "grad_norm": 3.78125,
      "learning_rate": 3.51015470247228e-05,
      "loss": 0.8979,
      "step": 315150
    },
    {
      "epoch": 1.1045572273216344,
      "grad_norm": 2.375,
      "learning_rate": 3.5100897996059104e-05,
      "loss": 0.7468,
      "step": 315160
    },
    {
      "epoch": 1.1045922748285302,
      "grad_norm": 3.203125,
      "learning_rate": 3.51002489673954e-05,
      "loss": 0.8553,
      "step": 315170
    },
    {
      "epoch": 1.1046273223354257,
      "grad_norm": 3.0,
      "learning_rate": 3.50995999387317e-05,
      "loss": 0.8146,
      "step": 315180
    },
    {
      "epoch": 1.1046623698423212,
      "grad_norm": 3.234375,
      "learning_rate": 3.5098950910067994e-05,
      "loss": 0.8142,
      "step": 315190
    },
    {
      "epoch": 1.1046974173492168,
      "grad_norm": 2.875,
      "learning_rate": 3.5098301881404296e-05,
      "loss": 0.9233,
      "step": 315200
    },
    {
      "epoch": 1.1047324648561125,
      "grad_norm": 3.109375,
      "learning_rate": 3.509765285274059e-05,
      "loss": 0.8541,
      "step": 315210
    },
    {
      "epoch": 1.104767512363008,
      "grad_norm": 2.640625,
      "learning_rate": 3.509700382407689e-05,
      "loss": 0.8702,
      "step": 315220
    },
    {
      "epoch": 1.1048025598699036,
      "grad_norm": 2.625,
      "learning_rate": 3.5096354795413186e-05,
      "loss": 0.9068,
      "step": 315230
    },
    {
      "epoch": 1.1048376073767994,
      "grad_norm": 2.65625,
      "learning_rate": 3.509570576674949e-05,
      "loss": 0.7937,
      "step": 315240
    },
    {
      "epoch": 1.104872654883695,
      "grad_norm": 2.65625,
      "learning_rate": 3.509505673808578e-05,
      "loss": 0.8872,
      "step": 315250
    },
    {
      "epoch": 1.1049077023905904,
      "grad_norm": 2.90625,
      "learning_rate": 3.509440770942208e-05,
      "loss": 0.9387,
      "step": 315260
    },
    {
      "epoch": 1.104942749897486,
      "grad_norm": 2.59375,
      "learning_rate": 3.509375868075838e-05,
      "loss": 0.9332,
      "step": 315270
    },
    {
      "epoch": 1.1049777974043817,
      "grad_norm": 2.40625,
      "learning_rate": 3.509310965209467e-05,
      "loss": 0.8828,
      "step": 315280
    },
    {
      "epoch": 1.1050128449112773,
      "grad_norm": 3.171875,
      "learning_rate": 3.5092460623430974e-05,
      "loss": 0.9492,
      "step": 315290
    },
    {
      "epoch": 1.1050478924181728,
      "grad_norm": 2.8125,
      "learning_rate": 3.509181159476727e-05,
      "loss": 0.8517,
      "step": 315300
    },
    {
      "epoch": 1.1050829399250683,
      "grad_norm": 3.109375,
      "learning_rate": 3.509116256610357e-05,
      "loss": 0.8775,
      "step": 315310
    },
    {
      "epoch": 1.105117987431964,
      "grad_norm": 2.859375,
      "learning_rate": 3.5090513537439865e-05,
      "loss": 0.8856,
      "step": 315320
    },
    {
      "epoch": 1.1051530349388596,
      "grad_norm": 2.890625,
      "learning_rate": 3.5089864508776166e-05,
      "loss": 0.8474,
      "step": 315330
    },
    {
      "epoch": 1.1051880824457552,
      "grad_norm": 2.453125,
      "learning_rate": 3.508921548011246e-05,
      "loss": 0.8351,
      "step": 315340
    },
    {
      "epoch": 1.105223129952651,
      "grad_norm": 3.1875,
      "learning_rate": 3.508856645144876e-05,
      "loss": 0.869,
      "step": 315350
    },
    {
      "epoch": 1.1052581774595465,
      "grad_norm": 3.046875,
      "learning_rate": 3.508791742278506e-05,
      "loss": 0.8771,
      "step": 315360
    },
    {
      "epoch": 1.105293224966442,
      "grad_norm": 2.8125,
      "learning_rate": 3.508726839412136e-05,
      "loss": 0.8885,
      "step": 315370
    },
    {
      "epoch": 1.1053282724733375,
      "grad_norm": 2.921875,
      "learning_rate": 3.508661936545766e-05,
      "loss": 0.8484,
      "step": 315380
    },
    {
      "epoch": 1.1053633199802333,
      "grad_norm": 2.9375,
      "learning_rate": 3.5085970336793954e-05,
      "loss": 0.8723,
      "step": 315390
    },
    {
      "epoch": 1.1053983674871288,
      "grad_norm": 3.09375,
      "learning_rate": 3.5085321308130256e-05,
      "loss": 0.8124,
      "step": 315400
    },
    {
      "epoch": 1.1054334149940244,
      "grad_norm": 3.03125,
      "learning_rate": 3.508467227946655e-05,
      "loss": 0.8905,
      "step": 315410
    },
    {
      "epoch": 1.10546846250092,
      "grad_norm": 2.96875,
      "learning_rate": 3.508402325080285e-05,
      "loss": 0.9321,
      "step": 315420
    },
    {
      "epoch": 1.1055035100078157,
      "grad_norm": 2.703125,
      "learning_rate": 3.5083374222139146e-05,
      "loss": 0.8331,
      "step": 315430
    },
    {
      "epoch": 1.1055385575147112,
      "grad_norm": 2.5625,
      "learning_rate": 3.508272519347545e-05,
      "loss": 0.8188,
      "step": 315440
    },
    {
      "epoch": 1.1055736050216067,
      "grad_norm": 2.796875,
      "learning_rate": 3.508207616481174e-05,
      "loss": 0.8701,
      "step": 315450
    },
    {
      "epoch": 1.1056086525285025,
      "grad_norm": 2.65625,
      "learning_rate": 3.5081427136148044e-05,
      "loss": 0.8622,
      "step": 315460
    },
    {
      "epoch": 1.105643700035398,
      "grad_norm": 3.09375,
      "learning_rate": 3.508077810748434e-05,
      "loss": 0.924,
      "step": 315470
    },
    {
      "epoch": 1.1056787475422936,
      "grad_norm": 4.15625,
      "learning_rate": 3.508012907882064e-05,
      "loss": 0.8692,
      "step": 315480
    },
    {
      "epoch": 1.105713795049189,
      "grad_norm": 2.890625,
      "learning_rate": 3.5079480050156934e-05,
      "loss": 0.9092,
      "step": 315490
    },
    {
      "epoch": 1.1057488425560849,
      "grad_norm": 2.5625,
      "learning_rate": 3.5078831021493236e-05,
      "loss": 0.8612,
      "step": 315500
    },
    {
      "epoch": 1.1057838900629804,
      "grad_norm": 2.984375,
      "learning_rate": 3.507818199282954e-05,
      "loss": 0.9177,
      "step": 315510
    },
    {
      "epoch": 1.105818937569876,
      "grad_norm": 2.671875,
      "learning_rate": 3.507753296416583e-05,
      "loss": 0.8723,
      "step": 315520
    },
    {
      "epoch": 1.1058539850767715,
      "grad_norm": 2.8125,
      "learning_rate": 3.507688393550213e-05,
      "loss": 0.799,
      "step": 315530
    },
    {
      "epoch": 1.1058890325836672,
      "grad_norm": 3.296875,
      "learning_rate": 3.507623490683843e-05,
      "loss": 0.8389,
      "step": 315540
    },
    {
      "epoch": 1.1059240800905628,
      "grad_norm": 2.90625,
      "learning_rate": 3.507558587817473e-05,
      "loss": 0.7595,
      "step": 315550
    },
    {
      "epoch": 1.1059591275974583,
      "grad_norm": 2.453125,
      "learning_rate": 3.5074936849511024e-05,
      "loss": 0.8858,
      "step": 315560
    },
    {
      "epoch": 1.105994175104354,
      "grad_norm": 2.59375,
      "learning_rate": 3.5074287820847325e-05,
      "loss": 0.8821,
      "step": 315570
    },
    {
      "epoch": 1.1060292226112496,
      "grad_norm": 2.609375,
      "learning_rate": 3.507363879218362e-05,
      "loss": 0.8998,
      "step": 315580
    },
    {
      "epoch": 1.1060642701181451,
      "grad_norm": 2.875,
      "learning_rate": 3.507298976351992e-05,
      "loss": 0.9158,
      "step": 315590
    },
    {
      "epoch": 1.1060993176250407,
      "grad_norm": 3.359375,
      "learning_rate": 3.5072340734856216e-05,
      "loss": 0.8513,
      "step": 315600
    },
    {
      "epoch": 1.1061343651319364,
      "grad_norm": 2.921875,
      "learning_rate": 3.507169170619252e-05,
      "loss": 0.9263,
      "step": 315610
    },
    {
      "epoch": 1.106169412638832,
      "grad_norm": 2.46875,
      "learning_rate": 3.507104267752881e-05,
      "loss": 0.8672,
      "step": 315620
    },
    {
      "epoch": 1.1062044601457275,
      "grad_norm": 3.0,
      "learning_rate": 3.5070393648865106e-05,
      "loss": 0.8907,
      "step": 315630
    },
    {
      "epoch": 1.106239507652623,
      "grad_norm": 2.75,
      "learning_rate": 3.506974462020141e-05,
      "loss": 0.8894,
      "step": 315640
    },
    {
      "epoch": 1.1062745551595188,
      "grad_norm": 2.515625,
      "learning_rate": 3.50690955915377e-05,
      "loss": 0.8653,
      "step": 315650
    },
    {
      "epoch": 1.1063096026664143,
      "grad_norm": 3.3125,
      "learning_rate": 3.5068446562874004e-05,
      "loss": 0.901,
      "step": 315660
    },
    {
      "epoch": 1.1063446501733099,
      "grad_norm": 2.359375,
      "learning_rate": 3.50677975342103e-05,
      "loss": 0.8166,
      "step": 315670
    },
    {
      "epoch": 1.1063796976802056,
      "grad_norm": 2.265625,
      "learning_rate": 3.50671485055466e-05,
      "loss": 0.8987,
      "step": 315680
    },
    {
      "epoch": 1.1064147451871011,
      "grad_norm": 2.703125,
      "learning_rate": 3.5066499476882894e-05,
      "loss": 0.9362,
      "step": 315690
    },
    {
      "epoch": 1.1064497926939967,
      "grad_norm": 2.875,
      "learning_rate": 3.5065850448219196e-05,
      "loss": 0.8556,
      "step": 315700
    },
    {
      "epoch": 1.1064848402008922,
      "grad_norm": 2.546875,
      "learning_rate": 3.506520141955549e-05,
      "loss": 0.8301,
      "step": 315710
    },
    {
      "epoch": 1.106519887707788,
      "grad_norm": 2.640625,
      "learning_rate": 3.506455239089179e-05,
      "loss": 0.7913,
      "step": 315720
    },
    {
      "epoch": 1.1065549352146835,
      "grad_norm": 2.765625,
      "learning_rate": 3.506390336222809e-05,
      "loss": 0.879,
      "step": 315730
    },
    {
      "epoch": 1.106589982721579,
      "grad_norm": 2.609375,
      "learning_rate": 3.506325433356439e-05,
      "loss": 0.816,
      "step": 315740
    },
    {
      "epoch": 1.1066250302284746,
      "grad_norm": 3.140625,
      "learning_rate": 3.506260530490069e-05,
      "loss": 0.8762,
      "step": 315750
    },
    {
      "epoch": 1.1066600777353703,
      "grad_norm": 2.84375,
      "learning_rate": 3.5061956276236984e-05,
      "loss": 0.8974,
      "step": 315760
    },
    {
      "epoch": 1.1066951252422659,
      "grad_norm": 3.078125,
      "learning_rate": 3.5061307247573285e-05,
      "loss": 0.8272,
      "step": 315770
    },
    {
      "epoch": 1.1067301727491614,
      "grad_norm": 2.796875,
      "learning_rate": 3.506065821890958e-05,
      "loss": 0.8169,
      "step": 315780
    },
    {
      "epoch": 1.1067652202560572,
      "grad_norm": 3.078125,
      "learning_rate": 3.506000919024588e-05,
      "loss": 0.8538,
      "step": 315790
    },
    {
      "epoch": 1.1068002677629527,
      "grad_norm": 2.5625,
      "learning_rate": 3.5059360161582176e-05,
      "loss": 0.8504,
      "step": 315800
    },
    {
      "epoch": 1.1068353152698482,
      "grad_norm": 3.1875,
      "learning_rate": 3.505871113291848e-05,
      "loss": 0.8177,
      "step": 315810
    },
    {
      "epoch": 1.1068703627767438,
      "grad_norm": 3.453125,
      "learning_rate": 3.505806210425477e-05,
      "loss": 0.865,
      "step": 315820
    },
    {
      "epoch": 1.1069054102836395,
      "grad_norm": 3.078125,
      "learning_rate": 3.505741307559107e-05,
      "loss": 0.8896,
      "step": 315830
    },
    {
      "epoch": 1.106940457790535,
      "grad_norm": 3.09375,
      "learning_rate": 3.505676404692737e-05,
      "loss": 0.895,
      "step": 315840
    },
    {
      "epoch": 1.1069755052974306,
      "grad_norm": 2.859375,
      "learning_rate": 3.505611501826367e-05,
      "loss": 0.8873,
      "step": 315850
    },
    {
      "epoch": 1.1070105528043264,
      "grad_norm": 3.1875,
      "learning_rate": 3.5055465989599964e-05,
      "loss": 0.9426,
      "step": 315860
    },
    {
      "epoch": 1.107045600311222,
      "grad_norm": 3.25,
      "learning_rate": 3.5054816960936265e-05,
      "loss": 0.869,
      "step": 315870
    },
    {
      "epoch": 1.1070806478181174,
      "grad_norm": 2.96875,
      "learning_rate": 3.5054167932272567e-05,
      "loss": 0.9001,
      "step": 315880
    },
    {
      "epoch": 1.107115695325013,
      "grad_norm": 3.046875,
      "learning_rate": 3.505351890360886e-05,
      "loss": 0.8994,
      "step": 315890
    },
    {
      "epoch": 1.1071507428319087,
      "grad_norm": 2.84375,
      "learning_rate": 3.505286987494516e-05,
      "loss": 0.776,
      "step": 315900
    },
    {
      "epoch": 1.1071857903388043,
      "grad_norm": 3.25,
      "learning_rate": 3.505222084628146e-05,
      "loss": 0.8614,
      "step": 315910
    },
    {
      "epoch": 1.1072208378456998,
      "grad_norm": 3.0,
      "learning_rate": 3.505157181761776e-05,
      "loss": 0.8276,
      "step": 315920
    },
    {
      "epoch": 1.1072558853525956,
      "grad_norm": 2.78125,
      "learning_rate": 3.505092278895405e-05,
      "loss": 0.9112,
      "step": 315930
    },
    {
      "epoch": 1.107290932859491,
      "grad_norm": 2.703125,
      "learning_rate": 3.5050273760290355e-05,
      "loss": 0.8485,
      "step": 315940
    },
    {
      "epoch": 1.1073259803663866,
      "grad_norm": 2.421875,
      "learning_rate": 3.504962473162665e-05,
      "loss": 0.906,
      "step": 315950
    },
    {
      "epoch": 1.1073610278732822,
      "grad_norm": 2.53125,
      "learning_rate": 3.504897570296295e-05,
      "loss": 0.8443,
      "step": 315960
    },
    {
      "epoch": 1.107396075380178,
      "grad_norm": 3.125,
      "learning_rate": 3.5048326674299245e-05,
      "loss": 0.7551,
      "step": 315970
    },
    {
      "epoch": 1.1074311228870735,
      "grad_norm": 2.9375,
      "learning_rate": 3.5047677645635547e-05,
      "loss": 0.8726,
      "step": 315980
    },
    {
      "epoch": 1.107466170393969,
      "grad_norm": 3.203125,
      "learning_rate": 3.504702861697184e-05,
      "loss": 0.8928,
      "step": 315990
    },
    {
      "epoch": 1.1075012179008645,
      "grad_norm": 3.1875,
      "learning_rate": 3.5046379588308136e-05,
      "loss": 0.9549,
      "step": 316000
    },
    {
      "epoch": 1.1075362654077603,
      "grad_norm": 3.171875,
      "learning_rate": 3.504573055964444e-05,
      "loss": 0.9718,
      "step": 316010
    },
    {
      "epoch": 1.1075713129146558,
      "grad_norm": 2.53125,
      "learning_rate": 3.504508153098073e-05,
      "loss": 0.8434,
      "step": 316020
    },
    {
      "epoch": 1.1076063604215514,
      "grad_norm": 3.21875,
      "learning_rate": 3.504443250231703e-05,
      "loss": 0.8937,
      "step": 316030
    },
    {
      "epoch": 1.1076414079284471,
      "grad_norm": 2.875,
      "learning_rate": 3.504378347365333e-05,
      "loss": 0.9148,
      "step": 316040
    },
    {
      "epoch": 1.1076764554353427,
      "grad_norm": 2.90625,
      "learning_rate": 3.504313444498963e-05,
      "loss": 0.8593,
      "step": 316050
    },
    {
      "epoch": 1.1077115029422382,
      "grad_norm": 3.125,
      "learning_rate": 3.5042485416325924e-05,
      "loss": 0.8309,
      "step": 316060
    },
    {
      "epoch": 1.1077465504491337,
      "grad_norm": 3.09375,
      "learning_rate": 3.5041836387662225e-05,
      "loss": 0.8968,
      "step": 316070
    },
    {
      "epoch": 1.1077815979560295,
      "grad_norm": 2.90625,
      "learning_rate": 3.504118735899852e-05,
      "loss": 0.8314,
      "step": 316080
    },
    {
      "epoch": 1.107816645462925,
      "grad_norm": 3.09375,
      "learning_rate": 3.504053833033482e-05,
      "loss": 0.8754,
      "step": 316090
    },
    {
      "epoch": 1.1078516929698206,
      "grad_norm": 3.234375,
      "learning_rate": 3.503988930167112e-05,
      "loss": 0.9565,
      "step": 316100
    },
    {
      "epoch": 1.107886740476716,
      "grad_norm": 2.515625,
      "learning_rate": 3.503924027300742e-05,
      "loss": 0.8148,
      "step": 316110
    },
    {
      "epoch": 1.1079217879836118,
      "grad_norm": 2.921875,
      "learning_rate": 3.503859124434372e-05,
      "loss": 0.9322,
      "step": 316120
    },
    {
      "epoch": 1.1079568354905074,
      "grad_norm": 3.15625,
      "learning_rate": 3.503794221568001e-05,
      "loss": 0.8781,
      "step": 316130
    },
    {
      "epoch": 1.107991882997403,
      "grad_norm": 3.25,
      "learning_rate": 3.5037293187016315e-05,
      "loss": 0.8954,
      "step": 316140
    },
    {
      "epoch": 1.1080269305042987,
      "grad_norm": 2.5,
      "learning_rate": 3.503664415835261e-05,
      "loss": 0.9009,
      "step": 316150
    },
    {
      "epoch": 1.1080619780111942,
      "grad_norm": 3.1875,
      "learning_rate": 3.503599512968891e-05,
      "loss": 0.7633,
      "step": 316160
    },
    {
      "epoch": 1.1080970255180898,
      "grad_norm": 2.921875,
      "learning_rate": 3.5035346101025205e-05,
      "loss": 0.9297,
      "step": 316170
    },
    {
      "epoch": 1.1081320730249853,
      "grad_norm": 2.5625,
      "learning_rate": 3.5034697072361507e-05,
      "loss": 0.901,
      "step": 316180
    },
    {
      "epoch": 1.108167120531881,
      "grad_norm": 2.921875,
      "learning_rate": 3.50340480436978e-05,
      "loss": 0.8529,
      "step": 316190
    },
    {
      "epoch": 1.1082021680387766,
      "grad_norm": 3.03125,
      "learning_rate": 3.50333990150341e-05,
      "loss": 0.8879,
      "step": 316200
    },
    {
      "epoch": 1.1082372155456721,
      "grad_norm": 2.9375,
      "learning_rate": 3.50327499863704e-05,
      "loss": 0.8867,
      "step": 316210
    },
    {
      "epoch": 1.1082722630525677,
      "grad_norm": 2.953125,
      "learning_rate": 3.50321009577067e-05,
      "loss": 0.8706,
      "step": 316220
    },
    {
      "epoch": 1.1083073105594634,
      "grad_norm": 3.265625,
      "learning_rate": 3.503145192904299e-05,
      "loss": 0.8481,
      "step": 316230
    },
    {
      "epoch": 1.108342358066359,
      "grad_norm": 3.296875,
      "learning_rate": 3.5030802900379295e-05,
      "loss": 0.8974,
      "step": 316240
    },
    {
      "epoch": 1.1083774055732545,
      "grad_norm": 3.046875,
      "learning_rate": 3.5030153871715596e-05,
      "loss": 0.8975,
      "step": 316250
    },
    {
      "epoch": 1.1084124530801502,
      "grad_norm": 2.78125,
      "learning_rate": 3.502950484305189e-05,
      "loss": 0.8992,
      "step": 316260
    },
    {
      "epoch": 1.1084475005870458,
      "grad_norm": 2.953125,
      "learning_rate": 3.502885581438819e-05,
      "loss": 0.8523,
      "step": 316270
    },
    {
      "epoch": 1.1084825480939413,
      "grad_norm": 3.328125,
      "learning_rate": 3.5028206785724487e-05,
      "loss": 0.8829,
      "step": 316280
    },
    {
      "epoch": 1.1085175956008368,
      "grad_norm": 2.53125,
      "learning_rate": 3.502755775706079e-05,
      "loss": 0.8551,
      "step": 316290
    },
    {
      "epoch": 1.1085526431077326,
      "grad_norm": 2.609375,
      "learning_rate": 3.502690872839708e-05,
      "loss": 0.8331,
      "step": 316300
    },
    {
      "epoch": 1.1085876906146281,
      "grad_norm": 2.96875,
      "learning_rate": 3.5026259699733384e-05,
      "loss": 0.8249,
      "step": 316310
    },
    {
      "epoch": 1.1086227381215237,
      "grad_norm": 3.125,
      "learning_rate": 3.502561067106968e-05,
      "loss": 0.8191,
      "step": 316320
    },
    {
      "epoch": 1.1086577856284192,
      "grad_norm": 2.578125,
      "learning_rate": 3.502496164240598e-05,
      "loss": 0.9976,
      "step": 316330
    },
    {
      "epoch": 1.108692833135315,
      "grad_norm": 3.09375,
      "learning_rate": 3.5024312613742275e-05,
      "loss": 0.8125,
      "step": 316340
    },
    {
      "epoch": 1.1087278806422105,
      "grad_norm": 3.390625,
      "learning_rate": 3.5023663585078576e-05,
      "loss": 0.8938,
      "step": 316350
    },
    {
      "epoch": 1.108762928149106,
      "grad_norm": 3.078125,
      "learning_rate": 3.502301455641487e-05,
      "loss": 0.8582,
      "step": 316360
    },
    {
      "epoch": 1.1087979756560018,
      "grad_norm": 2.71875,
      "learning_rate": 3.502236552775117e-05,
      "loss": 0.8298,
      "step": 316370
    },
    {
      "epoch": 1.1088330231628973,
      "grad_norm": 3.109375,
      "learning_rate": 3.5021716499087467e-05,
      "loss": 0.8306,
      "step": 316380
    },
    {
      "epoch": 1.1088680706697929,
      "grad_norm": 3.171875,
      "learning_rate": 3.502106747042376e-05,
      "loss": 0.8168,
      "step": 316390
    },
    {
      "epoch": 1.1089031181766884,
      "grad_norm": 2.984375,
      "learning_rate": 3.502041844176006e-05,
      "loss": 0.7857,
      "step": 316400
    },
    {
      "epoch": 1.1089381656835842,
      "grad_norm": 2.8125,
      "learning_rate": 3.501976941309636e-05,
      "loss": 0.7889,
      "step": 316410
    },
    {
      "epoch": 1.1089732131904797,
      "grad_norm": 2.90625,
      "learning_rate": 3.501912038443266e-05,
      "loss": 0.8,
      "step": 316420
    },
    {
      "epoch": 1.1090082606973752,
      "grad_norm": 2.953125,
      "learning_rate": 3.501847135576895e-05,
      "loss": 0.8338,
      "step": 316430
    },
    {
      "epoch": 1.1090433082042708,
      "grad_norm": 2.875,
      "learning_rate": 3.5017822327105255e-05,
      "loss": 0.8589,
      "step": 316440
    },
    {
      "epoch": 1.1090783557111665,
      "grad_norm": 2.984375,
      "learning_rate": 3.501717329844155e-05,
      "loss": 0.8891,
      "step": 316450
    },
    {
      "epoch": 1.109113403218062,
      "grad_norm": 2.859375,
      "learning_rate": 3.501652426977785e-05,
      "loss": 0.7874,
      "step": 316460
    },
    {
      "epoch": 1.1091484507249576,
      "grad_norm": 2.828125,
      "learning_rate": 3.501587524111415e-05,
      "loss": 0.8429,
      "step": 316470
    },
    {
      "epoch": 1.1091834982318534,
      "grad_norm": 2.96875,
      "learning_rate": 3.5015226212450447e-05,
      "loss": 0.8679,
      "step": 316480
    },
    {
      "epoch": 1.109218545738749,
      "grad_norm": 2.890625,
      "learning_rate": 3.501457718378675e-05,
      "loss": 0.9008,
      "step": 316490
    },
    {
      "epoch": 1.1092535932456444,
      "grad_norm": 2.65625,
      "learning_rate": 3.501392815512304e-05,
      "loss": 0.8511,
      "step": 316500
    },
    {
      "epoch": 1.10928864075254,
      "grad_norm": 2.921875,
      "learning_rate": 3.5013279126459344e-05,
      "loss": 0.8076,
      "step": 316510
    },
    {
      "epoch": 1.1093236882594357,
      "grad_norm": 3.125,
      "learning_rate": 3.501263009779564e-05,
      "loss": 0.8532,
      "step": 316520
    },
    {
      "epoch": 1.1093587357663313,
      "grad_norm": 3.25,
      "learning_rate": 3.501198106913194e-05,
      "loss": 0.8628,
      "step": 316530
    },
    {
      "epoch": 1.1093937832732268,
      "grad_norm": 2.703125,
      "learning_rate": 3.5011332040468235e-05,
      "loss": 0.8689,
      "step": 316540
    },
    {
      "epoch": 1.1094288307801226,
      "grad_norm": 2.9375,
      "learning_rate": 3.5010683011804536e-05,
      "loss": 0.8924,
      "step": 316550
    },
    {
      "epoch": 1.109463878287018,
      "grad_norm": 2.96875,
      "learning_rate": 3.501003398314083e-05,
      "loss": 0.7998,
      "step": 316560
    },
    {
      "epoch": 1.1094989257939136,
      "grad_norm": 3.34375,
      "learning_rate": 3.500938495447713e-05,
      "loss": 0.9212,
      "step": 316570
    },
    {
      "epoch": 1.1095339733008092,
      "grad_norm": 3.25,
      "learning_rate": 3.5008735925813427e-05,
      "loss": 0.9033,
      "step": 316580
    },
    {
      "epoch": 1.109569020807705,
      "grad_norm": 3.078125,
      "learning_rate": 3.500808689714973e-05,
      "loss": 0.9109,
      "step": 316590
    },
    {
      "epoch": 1.1096040683146005,
      "grad_norm": 3.296875,
      "learning_rate": 3.500743786848603e-05,
      "loss": 0.9326,
      "step": 316600
    },
    {
      "epoch": 1.109639115821496,
      "grad_norm": 2.625,
      "learning_rate": 3.5006788839822324e-05,
      "loss": 0.8308,
      "step": 316610
    },
    {
      "epoch": 1.1096741633283917,
      "grad_norm": 2.453125,
      "learning_rate": 3.5006139811158625e-05,
      "loss": 0.8572,
      "step": 316620
    },
    {
      "epoch": 1.1097092108352873,
      "grad_norm": 2.9375,
      "learning_rate": 3.500549078249492e-05,
      "loss": 0.901,
      "step": 316630
    },
    {
      "epoch": 1.1097442583421828,
      "grad_norm": 3.140625,
      "learning_rate": 3.500484175383122e-05,
      "loss": 0.9019,
      "step": 316640
    },
    {
      "epoch": 1.1097793058490784,
      "grad_norm": 2.90625,
      "learning_rate": 3.5004192725167516e-05,
      "loss": 0.897,
      "step": 316650
    },
    {
      "epoch": 1.1098143533559741,
      "grad_norm": 2.6875,
      "learning_rate": 3.500354369650382e-05,
      "loss": 0.8976,
      "step": 316660
    },
    {
      "epoch": 1.1098494008628697,
      "grad_norm": 2.984375,
      "learning_rate": 3.500289466784011e-05,
      "loss": 0.8795,
      "step": 316670
    },
    {
      "epoch": 1.1098844483697652,
      "grad_norm": 2.90625,
      "learning_rate": 3.500224563917641e-05,
      "loss": 0.8665,
      "step": 316680
    },
    {
      "epoch": 1.1099194958766607,
      "grad_norm": 2.921875,
      "learning_rate": 3.500159661051271e-05,
      "loss": 0.8051,
      "step": 316690
    },
    {
      "epoch": 1.1099545433835565,
      "grad_norm": 2.609375,
      "learning_rate": 3.500094758184901e-05,
      "loss": 0.8106,
      "step": 316700
    },
    {
      "epoch": 1.109989590890452,
      "grad_norm": 2.75,
      "learning_rate": 3.5000298553185304e-05,
      "loss": 0.8599,
      "step": 316710
    },
    {
      "epoch": 1.1100246383973476,
      "grad_norm": 3.296875,
      "learning_rate": 3.4999649524521605e-05,
      "loss": 0.8891,
      "step": 316720
    },
    {
      "epoch": 1.1100596859042433,
      "grad_norm": 3.09375,
      "learning_rate": 3.49990004958579e-05,
      "loss": 0.9537,
      "step": 316730
    },
    {
      "epoch": 1.1100947334111388,
      "grad_norm": 2.90625,
      "learning_rate": 3.49983514671942e-05,
      "loss": 0.7969,
      "step": 316740
    },
    {
      "epoch": 1.1101297809180344,
      "grad_norm": 2.53125,
      "learning_rate": 3.49977024385305e-05,
      "loss": 0.8558,
      "step": 316750
    },
    {
      "epoch": 1.11016482842493,
      "grad_norm": 3.1875,
      "learning_rate": 3.499705340986679e-05,
      "loss": 0.8954,
      "step": 316760
    },
    {
      "epoch": 1.1101998759318257,
      "grad_norm": 2.984375,
      "learning_rate": 3.499640438120309e-05,
      "loss": 0.8516,
      "step": 316770
    },
    {
      "epoch": 1.1102349234387212,
      "grad_norm": 2.734375,
      "learning_rate": 3.4995755352539387e-05,
      "loss": 0.7854,
      "step": 316780
    },
    {
      "epoch": 1.1102699709456167,
      "grad_norm": 3.203125,
      "learning_rate": 3.499510632387569e-05,
      "loss": 0.856,
      "step": 316790
    },
    {
      "epoch": 1.1103050184525123,
      "grad_norm": 3.046875,
      "learning_rate": 3.499445729521198e-05,
      "loss": 0.9086,
      "step": 316800
    },
    {
      "epoch": 1.110340065959408,
      "grad_norm": 2.984375,
      "learning_rate": 3.4993808266548284e-05,
      "loss": 0.7444,
      "step": 316810
    },
    {
      "epoch": 1.1103751134663036,
      "grad_norm": 2.875,
      "learning_rate": 3.499315923788458e-05,
      "loss": 0.8907,
      "step": 316820
    },
    {
      "epoch": 1.1104101609731991,
      "grad_norm": 2.671875,
      "learning_rate": 3.499251020922088e-05,
      "loss": 0.8191,
      "step": 316830
    },
    {
      "epoch": 1.1104452084800949,
      "grad_norm": 2.75,
      "learning_rate": 3.499186118055718e-05,
      "loss": 0.8804,
      "step": 316840
    },
    {
      "epoch": 1.1104802559869904,
      "grad_norm": 3.125,
      "learning_rate": 3.4991212151893476e-05,
      "loss": 0.8179,
      "step": 316850
    },
    {
      "epoch": 1.110515303493886,
      "grad_norm": 2.921875,
      "learning_rate": 3.499056312322978e-05,
      "loss": 0.833,
      "step": 316860
    },
    {
      "epoch": 1.1105503510007815,
      "grad_norm": 3.0,
      "learning_rate": 3.498991409456607e-05,
      "loss": 0.8097,
      "step": 316870
    },
    {
      "epoch": 1.1105853985076772,
      "grad_norm": 3.21875,
      "learning_rate": 3.498926506590237e-05,
      "loss": 0.95,
      "step": 316880
    },
    {
      "epoch": 1.1106204460145728,
      "grad_norm": 3.109375,
      "learning_rate": 3.498861603723867e-05,
      "loss": 0.8713,
      "step": 316890
    },
    {
      "epoch": 1.1106554935214683,
      "grad_norm": 2.9375,
      "learning_rate": 3.498796700857497e-05,
      "loss": 0.888,
      "step": 316900
    },
    {
      "epoch": 1.1106905410283638,
      "grad_norm": 2.703125,
      "learning_rate": 3.4987317979911264e-05,
      "loss": 0.8462,
      "step": 316910
    },
    {
      "epoch": 1.1107255885352596,
      "grad_norm": 2.90625,
      "learning_rate": 3.4986668951247565e-05,
      "loss": 0.8143,
      "step": 316920
    },
    {
      "epoch": 1.1107606360421551,
      "grad_norm": 3.375,
      "learning_rate": 3.498601992258386e-05,
      "loss": 0.8289,
      "step": 316930
    },
    {
      "epoch": 1.1107956835490507,
      "grad_norm": 3.1875,
      "learning_rate": 3.498537089392016e-05,
      "loss": 0.8687,
      "step": 316940
    },
    {
      "epoch": 1.1108307310559464,
      "grad_norm": 3.640625,
      "learning_rate": 3.4984721865256456e-05,
      "loss": 0.8301,
      "step": 316950
    },
    {
      "epoch": 1.110865778562842,
      "grad_norm": 2.75,
      "learning_rate": 3.498407283659276e-05,
      "loss": 0.8663,
      "step": 316960
    },
    {
      "epoch": 1.1109008260697375,
      "grad_norm": 3.28125,
      "learning_rate": 3.498342380792906e-05,
      "loss": 0.8651,
      "step": 316970
    },
    {
      "epoch": 1.110935873576633,
      "grad_norm": 2.828125,
      "learning_rate": 3.498277477926535e-05,
      "loss": 0.8695,
      "step": 316980
    },
    {
      "epoch": 1.1109709210835288,
      "grad_norm": 2.984375,
      "learning_rate": 3.4982125750601655e-05,
      "loss": 0.9267,
      "step": 316990
    },
    {
      "epoch": 1.1110059685904243,
      "grad_norm": 3.015625,
      "learning_rate": 3.498147672193795e-05,
      "loss": 0.8828,
      "step": 317000
    },
    {
      "epoch": 1.1110410160973199,
      "grad_norm": 2.359375,
      "learning_rate": 3.498082769327425e-05,
      "loss": 0.8635,
      "step": 317010
    },
    {
      "epoch": 1.1110760636042154,
      "grad_norm": 2.90625,
      "learning_rate": 3.4980178664610545e-05,
      "loss": 0.7732,
      "step": 317020
    },
    {
      "epoch": 1.1111111111111112,
      "grad_norm": 2.71875,
      "learning_rate": 3.497952963594685e-05,
      "loss": 0.8853,
      "step": 317030
    },
    {
      "epoch": 1.1111461586180067,
      "grad_norm": 2.390625,
      "learning_rate": 3.497888060728314e-05,
      "loss": 0.8491,
      "step": 317040
    },
    {
      "epoch": 1.1111812061249022,
      "grad_norm": 3.546875,
      "learning_rate": 3.497823157861944e-05,
      "loss": 0.876,
      "step": 317050
    },
    {
      "epoch": 1.111216253631798,
      "grad_norm": 3.328125,
      "learning_rate": 3.497758254995574e-05,
      "loss": 0.9481,
      "step": 317060
    },
    {
      "epoch": 1.1112513011386935,
      "grad_norm": 3.140625,
      "learning_rate": 3.497693352129204e-05,
      "loss": 0.8537,
      "step": 317070
    },
    {
      "epoch": 1.111286348645589,
      "grad_norm": 3.28125,
      "learning_rate": 3.497628449262833e-05,
      "loss": 0.8671,
      "step": 317080
    },
    {
      "epoch": 1.1113213961524846,
      "grad_norm": 3.203125,
      "learning_rate": 3.4975635463964635e-05,
      "loss": 0.8351,
      "step": 317090
    },
    {
      "epoch": 1.1113564436593804,
      "grad_norm": 2.5625,
      "learning_rate": 3.497498643530093e-05,
      "loss": 0.8654,
      "step": 317100
    },
    {
      "epoch": 1.111391491166276,
      "grad_norm": 2.9375,
      "learning_rate": 3.497433740663723e-05,
      "loss": 0.824,
      "step": 317110
    },
    {
      "epoch": 1.1114265386731714,
      "grad_norm": 3.03125,
      "learning_rate": 3.497368837797353e-05,
      "loss": 0.9179,
      "step": 317120
    },
    {
      "epoch": 1.1114615861800672,
      "grad_norm": 3.234375,
      "learning_rate": 3.497303934930982e-05,
      "loss": 0.8411,
      "step": 317130
    },
    {
      "epoch": 1.1114966336869627,
      "grad_norm": 2.640625,
      "learning_rate": 3.497239032064612e-05,
      "loss": 0.834,
      "step": 317140
    },
    {
      "epoch": 1.1115316811938583,
      "grad_norm": 2.796875,
      "learning_rate": 3.4971741291982416e-05,
      "loss": 0.8825,
      "step": 317150
    },
    {
      "epoch": 1.1115667287007538,
      "grad_norm": 3.265625,
      "learning_rate": 3.497109226331872e-05,
      "loss": 0.8696,
      "step": 317160
    },
    {
      "epoch": 1.1116017762076496,
      "grad_norm": 2.609375,
      "learning_rate": 3.497044323465501e-05,
      "loss": 0.7736,
      "step": 317170
    },
    {
      "epoch": 1.111636823714545,
      "grad_norm": 2.96875,
      "learning_rate": 3.496979420599131e-05,
      "loss": 0.8626,
      "step": 317180
    },
    {
      "epoch": 1.1116718712214406,
      "grad_norm": 2.921875,
      "learning_rate": 3.496914517732761e-05,
      "loss": 0.9019,
      "step": 317190
    },
    {
      "epoch": 1.1117069187283362,
      "grad_norm": 2.609375,
      "learning_rate": 3.496849614866391e-05,
      "loss": 0.8644,
      "step": 317200
    },
    {
      "epoch": 1.111741966235232,
      "grad_norm": 3.234375,
      "learning_rate": 3.496784712000021e-05,
      "loss": 0.873,
      "step": 317210
    },
    {
      "epoch": 1.1117770137421275,
      "grad_norm": 2.4375,
      "learning_rate": 3.4967198091336505e-05,
      "loss": 0.8726,
      "step": 317220
    },
    {
      "epoch": 1.111812061249023,
      "grad_norm": 2.84375,
      "learning_rate": 3.496654906267281e-05,
      "loss": 0.8972,
      "step": 317230
    },
    {
      "epoch": 1.1118471087559187,
      "grad_norm": 2.609375,
      "learning_rate": 3.49659000340091e-05,
      "loss": 0.8369,
      "step": 317240
    },
    {
      "epoch": 1.1118821562628143,
      "grad_norm": 3.0,
      "learning_rate": 3.49652510053454e-05,
      "loss": 0.8746,
      "step": 317250
    },
    {
      "epoch": 1.1119172037697098,
      "grad_norm": 2.75,
      "learning_rate": 3.49646019766817e-05,
      "loss": 0.8529,
      "step": 317260
    },
    {
      "epoch": 1.1119522512766054,
      "grad_norm": 3.140625,
      "learning_rate": 3.4963952948018e-05,
      "loss": 0.8936,
      "step": 317270
    },
    {
      "epoch": 1.111987298783501,
      "grad_norm": 2.6875,
      "learning_rate": 3.496330391935429e-05,
      "loss": 0.8453,
      "step": 317280
    },
    {
      "epoch": 1.1120223462903966,
      "grad_norm": 2.828125,
      "learning_rate": 3.4962654890690595e-05,
      "loss": 0.8383,
      "step": 317290
    },
    {
      "epoch": 1.1120573937972922,
      "grad_norm": 2.765625,
      "learning_rate": 3.496200586202689e-05,
      "loss": 0.821,
      "step": 317300
    },
    {
      "epoch": 1.112092441304188,
      "grad_norm": 3.0,
      "learning_rate": 3.496135683336319e-05,
      "loss": 0.8215,
      "step": 317310
    },
    {
      "epoch": 1.1121274888110835,
      "grad_norm": 3.03125,
      "learning_rate": 3.4960707804699485e-05,
      "loss": 0.9411,
      "step": 317320
    },
    {
      "epoch": 1.112162536317979,
      "grad_norm": 3.078125,
      "learning_rate": 3.496005877603579e-05,
      "loss": 0.8677,
      "step": 317330
    },
    {
      "epoch": 1.1121975838248745,
      "grad_norm": 2.9375,
      "learning_rate": 3.495940974737209e-05,
      "loss": 0.7997,
      "step": 317340
    },
    {
      "epoch": 1.1122326313317703,
      "grad_norm": 3.125,
      "learning_rate": 3.495876071870838e-05,
      "loss": 0.8111,
      "step": 317350
    },
    {
      "epoch": 1.1122676788386658,
      "grad_norm": 2.671875,
      "learning_rate": 3.4958111690044684e-05,
      "loss": 0.8865,
      "step": 317360
    },
    {
      "epoch": 1.1123027263455614,
      "grad_norm": 2.296875,
      "learning_rate": 3.495746266138098e-05,
      "loss": 0.8674,
      "step": 317370
    },
    {
      "epoch": 1.112337773852457,
      "grad_norm": 3.015625,
      "learning_rate": 3.495681363271728e-05,
      "loss": 0.798,
      "step": 317380
    },
    {
      "epoch": 1.1123728213593527,
      "grad_norm": 3.109375,
      "learning_rate": 3.4956164604053575e-05,
      "loss": 0.8955,
      "step": 317390
    },
    {
      "epoch": 1.1124078688662482,
      "grad_norm": 3.515625,
      "learning_rate": 3.4955515575389876e-05,
      "loss": 0.9914,
      "step": 317400
    },
    {
      "epoch": 1.1124429163731437,
      "grad_norm": 2.515625,
      "learning_rate": 3.495486654672617e-05,
      "loss": 0.8532,
      "step": 317410
    },
    {
      "epoch": 1.1124779638800395,
      "grad_norm": 3.109375,
      "learning_rate": 3.495421751806247e-05,
      "loss": 0.8406,
      "step": 317420
    },
    {
      "epoch": 1.112513011386935,
      "grad_norm": 3.125,
      "learning_rate": 3.495356848939877e-05,
      "loss": 0.8643,
      "step": 317430
    },
    {
      "epoch": 1.1125480588938306,
      "grad_norm": 2.453125,
      "learning_rate": 3.495291946073507e-05,
      "loss": 0.8185,
      "step": 317440
    },
    {
      "epoch": 1.112583106400726,
      "grad_norm": 3.015625,
      "learning_rate": 3.495227043207136e-05,
      "loss": 0.8744,
      "step": 317450
    },
    {
      "epoch": 1.1126181539076219,
      "grad_norm": 3.125,
      "learning_rate": 3.4951621403407664e-05,
      "loss": 0.8422,
      "step": 317460
    },
    {
      "epoch": 1.1126532014145174,
      "grad_norm": 2.984375,
      "learning_rate": 3.495097237474396e-05,
      "loss": 0.9172,
      "step": 317470
    },
    {
      "epoch": 1.112688248921413,
      "grad_norm": 2.4375,
      "learning_rate": 3.495032334608026e-05,
      "loss": 0.8691,
      "step": 317480
    },
    {
      "epoch": 1.1127232964283085,
      "grad_norm": 3.109375,
      "learning_rate": 3.494967431741656e-05,
      "loss": 0.8993,
      "step": 317490
    },
    {
      "epoch": 1.1127583439352042,
      "grad_norm": 2.875,
      "learning_rate": 3.4949025288752856e-05,
      "loss": 0.8839,
      "step": 317500
    },
    {
      "epoch": 1.1127933914420998,
      "grad_norm": 3.125,
      "learning_rate": 3.494837626008915e-05,
      "loss": 0.9124,
      "step": 317510
    },
    {
      "epoch": 1.1128284389489953,
      "grad_norm": 3.359375,
      "learning_rate": 3.4947727231425445e-05,
      "loss": 0.854,
      "step": 317520
    },
    {
      "epoch": 1.112863486455891,
      "grad_norm": 3.4375,
      "learning_rate": 3.494707820276175e-05,
      "loss": 0.8299,
      "step": 317530
    },
    {
      "epoch": 1.1128985339627866,
      "grad_norm": 3.46875,
      "learning_rate": 3.494642917409804e-05,
      "loss": 0.9251,
      "step": 317540
    },
    {
      "epoch": 1.1129335814696821,
      "grad_norm": 2.671875,
      "learning_rate": 3.494578014543434e-05,
      "loss": 0.8341,
      "step": 317550
    },
    {
      "epoch": 1.1129686289765777,
      "grad_norm": 2.90625,
      "learning_rate": 3.4945131116770644e-05,
      "loss": 0.9151,
      "step": 317560
    },
    {
      "epoch": 1.1130036764834734,
      "grad_norm": 3.359375,
      "learning_rate": 3.494448208810694e-05,
      "loss": 0.8956,
      "step": 317570
    },
    {
      "epoch": 1.113038723990369,
      "grad_norm": 2.65625,
      "learning_rate": 3.494383305944324e-05,
      "loss": 0.8463,
      "step": 317580
    },
    {
      "epoch": 1.1130737714972645,
      "grad_norm": 2.859375,
      "learning_rate": 3.4943184030779535e-05,
      "loss": 0.8551,
      "step": 317590
    },
    {
      "epoch": 1.11310881900416,
      "grad_norm": 3.09375,
      "learning_rate": 3.4942535002115836e-05,
      "loss": 0.9005,
      "step": 317600
    },
    {
      "epoch": 1.1131438665110558,
      "grad_norm": 2.90625,
      "learning_rate": 3.494188597345213e-05,
      "loss": 0.8043,
      "step": 317610
    },
    {
      "epoch": 1.1131789140179513,
      "grad_norm": 2.90625,
      "learning_rate": 3.494123694478843e-05,
      "loss": 0.9429,
      "step": 317620
    },
    {
      "epoch": 1.1132139615248469,
      "grad_norm": 2.671875,
      "learning_rate": 3.494058791612473e-05,
      "loss": 0.8616,
      "step": 317630
    },
    {
      "epoch": 1.1132490090317426,
      "grad_norm": 2.703125,
      "learning_rate": 3.493993888746103e-05,
      "loss": 0.8031,
      "step": 317640
    },
    {
      "epoch": 1.1132840565386382,
      "grad_norm": 2.71875,
      "learning_rate": 3.493928985879732e-05,
      "loss": 0.8707,
      "step": 317650
    },
    {
      "epoch": 1.1133191040455337,
      "grad_norm": 2.96875,
      "learning_rate": 3.4938640830133624e-05,
      "loss": 0.8301,
      "step": 317660
    },
    {
      "epoch": 1.1133541515524292,
      "grad_norm": 3.015625,
      "learning_rate": 3.493799180146992e-05,
      "loss": 0.8984,
      "step": 317670
    },
    {
      "epoch": 1.113389199059325,
      "grad_norm": 3.171875,
      "learning_rate": 3.493734277280622e-05,
      "loss": 0.8514,
      "step": 317680
    },
    {
      "epoch": 1.1134242465662205,
      "grad_norm": 3.359375,
      "learning_rate": 3.4936693744142515e-05,
      "loss": 0.8777,
      "step": 317690
    },
    {
      "epoch": 1.113459294073116,
      "grad_norm": 2.796875,
      "learning_rate": 3.4936044715478816e-05,
      "loss": 0.9097,
      "step": 317700
    },
    {
      "epoch": 1.1134943415800116,
      "grad_norm": 3.015625,
      "learning_rate": 3.493539568681512e-05,
      "loss": 0.8619,
      "step": 317710
    },
    {
      "epoch": 1.1135293890869074,
      "grad_norm": 3.1875,
      "learning_rate": 3.493474665815141e-05,
      "loss": 0.876,
      "step": 317720
    },
    {
      "epoch": 1.1135644365938029,
      "grad_norm": 2.953125,
      "learning_rate": 3.4934097629487714e-05,
      "loss": 0.9283,
      "step": 317730
    },
    {
      "epoch": 1.1135994841006984,
      "grad_norm": 2.703125,
      "learning_rate": 3.493344860082401e-05,
      "loss": 0.8722,
      "step": 317740
    },
    {
      "epoch": 1.1136345316075942,
      "grad_norm": 2.984375,
      "learning_rate": 3.493279957216031e-05,
      "loss": 0.8753,
      "step": 317750
    },
    {
      "epoch": 1.1136695791144897,
      "grad_norm": 3.265625,
      "learning_rate": 3.4932150543496604e-05,
      "loss": 0.9536,
      "step": 317760
    },
    {
      "epoch": 1.1137046266213853,
      "grad_norm": 3.203125,
      "learning_rate": 3.4931501514832906e-05,
      "loss": 0.8799,
      "step": 317770
    },
    {
      "epoch": 1.1137396741282808,
      "grad_norm": 2.390625,
      "learning_rate": 3.49308524861692e-05,
      "loss": 0.9074,
      "step": 317780
    },
    {
      "epoch": 1.1137747216351765,
      "grad_norm": 2.71875,
      "learning_rate": 3.49302034575055e-05,
      "loss": 0.8665,
      "step": 317790
    },
    {
      "epoch": 1.113809769142072,
      "grad_norm": 2.75,
      "learning_rate": 3.4929554428841796e-05,
      "loss": 0.7915,
      "step": 317800
    },
    {
      "epoch": 1.1138448166489676,
      "grad_norm": 2.484375,
      "learning_rate": 3.49289054001781e-05,
      "loss": 0.8354,
      "step": 317810
    },
    {
      "epoch": 1.1138798641558634,
      "grad_norm": 3.109375,
      "learning_rate": 3.492825637151439e-05,
      "loss": 0.8502,
      "step": 317820
    },
    {
      "epoch": 1.113914911662759,
      "grad_norm": 2.71875,
      "learning_rate": 3.4927607342850694e-05,
      "loss": 0.8869,
      "step": 317830
    },
    {
      "epoch": 1.1139499591696544,
      "grad_norm": 2.984375,
      "learning_rate": 3.4926958314186995e-05,
      "loss": 0.8786,
      "step": 317840
    },
    {
      "epoch": 1.11398500667655,
      "grad_norm": 3.125,
      "learning_rate": 3.492630928552329e-05,
      "loss": 0.894,
      "step": 317850
    },
    {
      "epoch": 1.1140200541834457,
      "grad_norm": 2.375,
      "learning_rate": 3.492566025685959e-05,
      "loss": 0.9857,
      "step": 317860
    },
    {
      "epoch": 1.1140551016903413,
      "grad_norm": 2.875,
      "learning_rate": 3.4925011228195886e-05,
      "loss": 0.8174,
      "step": 317870
    },
    {
      "epoch": 1.1140901491972368,
      "grad_norm": 2.75,
      "learning_rate": 3.492436219953219e-05,
      "loss": 0.8479,
      "step": 317880
    },
    {
      "epoch": 1.1141251967041323,
      "grad_norm": 3.046875,
      "learning_rate": 3.4923713170868475e-05,
      "loss": 0.911,
      "step": 317890
    },
    {
      "epoch": 1.114160244211028,
      "grad_norm": 3.09375,
      "learning_rate": 3.4923064142204776e-05,
      "loss": 0.8315,
      "step": 317900
    },
    {
      "epoch": 1.1141952917179236,
      "grad_norm": 2.78125,
      "learning_rate": 3.492241511354107e-05,
      "loss": 0.818,
      "step": 317910
    },
    {
      "epoch": 1.1142303392248192,
      "grad_norm": 2.84375,
      "learning_rate": 3.492176608487737e-05,
      "loss": 0.8385,
      "step": 317920
    },
    {
      "epoch": 1.114265386731715,
      "grad_norm": 2.875,
      "learning_rate": 3.4921117056213674e-05,
      "loss": 0.7765,
      "step": 317930
    },
    {
      "epoch": 1.1143004342386105,
      "grad_norm": 3.09375,
      "learning_rate": 3.492046802754997e-05,
      "loss": 0.8522,
      "step": 317940
    },
    {
      "epoch": 1.114335481745506,
      "grad_norm": 2.421875,
      "learning_rate": 3.491981899888627e-05,
      "loss": 0.8384,
      "step": 317950
    },
    {
      "epoch": 1.1143705292524015,
      "grad_norm": 2.390625,
      "learning_rate": 3.4919169970222564e-05,
      "loss": 0.7516,
      "step": 317960
    },
    {
      "epoch": 1.1144055767592973,
      "grad_norm": 3.28125,
      "learning_rate": 3.4918520941558866e-05,
      "loss": 0.9065,
      "step": 317970
    },
    {
      "epoch": 1.1144406242661928,
      "grad_norm": 2.84375,
      "learning_rate": 3.491787191289516e-05,
      "loss": 0.8425,
      "step": 317980
    },
    {
      "epoch": 1.1144756717730884,
      "grad_norm": 3.015625,
      "learning_rate": 3.491722288423146e-05,
      "loss": 0.8671,
      "step": 317990
    },
    {
      "epoch": 1.1145107192799841,
      "grad_norm": 2.96875,
      "learning_rate": 3.4916573855567756e-05,
      "loss": 0.827,
      "step": 318000
    },
    {
      "epoch": 1.1145457667868797,
      "grad_norm": 3.359375,
      "learning_rate": 3.491592482690406e-05,
      "loss": 0.9352,
      "step": 318010
    },
    {
      "epoch": 1.1145808142937752,
      "grad_norm": 3.109375,
      "learning_rate": 3.491527579824035e-05,
      "loss": 0.8961,
      "step": 318020
    },
    {
      "epoch": 1.1146158618006707,
      "grad_norm": 3.109375,
      "learning_rate": 3.4914626769576654e-05,
      "loss": 0.9173,
      "step": 318030
    },
    {
      "epoch": 1.1146509093075665,
      "grad_norm": 3.375,
      "learning_rate": 3.491397774091295e-05,
      "loss": 0.9338,
      "step": 318040
    },
    {
      "epoch": 1.114685956814462,
      "grad_norm": 2.8125,
      "learning_rate": 3.491332871224925e-05,
      "loss": 0.8348,
      "step": 318050
    },
    {
      "epoch": 1.1147210043213576,
      "grad_norm": 2.640625,
      "learning_rate": 3.4912679683585544e-05,
      "loss": 0.7981,
      "step": 318060
    },
    {
      "epoch": 1.114756051828253,
      "grad_norm": 2.578125,
      "learning_rate": 3.4912030654921846e-05,
      "loss": 0.8563,
      "step": 318070
    },
    {
      "epoch": 1.1147910993351489,
      "grad_norm": 2.734375,
      "learning_rate": 3.491138162625815e-05,
      "loss": 0.8564,
      "step": 318080
    },
    {
      "epoch": 1.1148261468420444,
      "grad_norm": 2.5,
      "learning_rate": 3.491073259759444e-05,
      "loss": 0.9164,
      "step": 318090
    },
    {
      "epoch": 1.11486119434894,
      "grad_norm": 3.09375,
      "learning_rate": 3.491008356893074e-05,
      "loss": 0.9703,
      "step": 318100
    },
    {
      "epoch": 1.1148962418558357,
      "grad_norm": 3.140625,
      "learning_rate": 3.490943454026704e-05,
      "loss": 0.8572,
      "step": 318110
    },
    {
      "epoch": 1.1149312893627312,
      "grad_norm": 2.5625,
      "learning_rate": 3.490878551160334e-05,
      "loss": 0.946,
      "step": 318120
    },
    {
      "epoch": 1.1149663368696268,
      "grad_norm": 2.734375,
      "learning_rate": 3.4908136482939634e-05,
      "loss": 0.8446,
      "step": 318130
    },
    {
      "epoch": 1.1150013843765223,
      "grad_norm": 3.03125,
      "learning_rate": 3.4907487454275935e-05,
      "loss": 0.8714,
      "step": 318140
    },
    {
      "epoch": 1.115036431883418,
      "grad_norm": 2.71875,
      "learning_rate": 3.490683842561223e-05,
      "loss": 0.8648,
      "step": 318150
    },
    {
      "epoch": 1.1150714793903136,
      "grad_norm": 2.890625,
      "learning_rate": 3.490618939694853e-05,
      "loss": 0.8376,
      "step": 318160
    },
    {
      "epoch": 1.1151065268972091,
      "grad_norm": 2.65625,
      "learning_rate": 3.4905540368284826e-05,
      "loss": 0.7805,
      "step": 318170
    },
    {
      "epoch": 1.1151415744041047,
      "grad_norm": 3.046875,
      "learning_rate": 3.490489133962113e-05,
      "loss": 0.8031,
      "step": 318180
    },
    {
      "epoch": 1.1151766219110004,
      "grad_norm": 2.984375,
      "learning_rate": 3.490424231095742e-05,
      "loss": 0.9565,
      "step": 318190
    },
    {
      "epoch": 1.115211669417896,
      "grad_norm": 3.5,
      "learning_rate": 3.490359328229372e-05,
      "loss": 0.9772,
      "step": 318200
    },
    {
      "epoch": 1.1152467169247915,
      "grad_norm": 2.921875,
      "learning_rate": 3.4902944253630024e-05,
      "loss": 0.8188,
      "step": 318210
    },
    {
      "epoch": 1.1152817644316873,
      "grad_norm": 3.25,
      "learning_rate": 3.490229522496632e-05,
      "loss": 0.9096,
      "step": 318220
    },
    {
      "epoch": 1.1153168119385828,
      "grad_norm": 2.9375,
      "learning_rate": 3.490164619630262e-05,
      "loss": 0.8794,
      "step": 318230
    },
    {
      "epoch": 1.1153518594454783,
      "grad_norm": 2.65625,
      "learning_rate": 3.4900997167638915e-05,
      "loss": 0.9021,
      "step": 318240
    },
    {
      "epoch": 1.1153869069523739,
      "grad_norm": 2.71875,
      "learning_rate": 3.4900348138975216e-05,
      "loss": 0.9137,
      "step": 318250
    },
    {
      "epoch": 1.1154219544592696,
      "grad_norm": 3.28125,
      "learning_rate": 3.4899699110311504e-05,
      "loss": 0.9222,
      "step": 318260
    },
    {
      "epoch": 1.1154570019661652,
      "grad_norm": 3.109375,
      "learning_rate": 3.4899050081647806e-05,
      "loss": 0.8844,
      "step": 318270
    },
    {
      "epoch": 1.1154920494730607,
      "grad_norm": 2.734375,
      "learning_rate": 3.48984010529841e-05,
      "loss": 0.8476,
      "step": 318280
    },
    {
      "epoch": 1.1155270969799562,
      "grad_norm": 2.625,
      "learning_rate": 3.48977520243204e-05,
      "loss": 0.8219,
      "step": 318290
    },
    {
      "epoch": 1.115562144486852,
      "grad_norm": 3.0,
      "learning_rate": 3.48971029956567e-05,
      "loss": 0.9126,
      "step": 318300
    },
    {
      "epoch": 1.1155971919937475,
      "grad_norm": 3.0625,
      "learning_rate": 3.4896453966993e-05,
      "loss": 0.7965,
      "step": 318310
    },
    {
      "epoch": 1.115632239500643,
      "grad_norm": 2.65625,
      "learning_rate": 3.48958049383293e-05,
      "loss": 0.876,
      "step": 318320
    },
    {
      "epoch": 1.1156672870075388,
      "grad_norm": 2.421875,
      "learning_rate": 3.4895155909665594e-05,
      "loss": 0.8753,
      "step": 318330
    },
    {
      "epoch": 1.1157023345144343,
      "grad_norm": 3.21875,
      "learning_rate": 3.4894506881001895e-05,
      "loss": 0.8632,
      "step": 318340
    },
    {
      "epoch": 1.1157373820213299,
      "grad_norm": 2.9375,
      "learning_rate": 3.489385785233819e-05,
      "loss": 0.9407,
      "step": 318350
    },
    {
      "epoch": 1.1157724295282254,
      "grad_norm": 3.578125,
      "learning_rate": 3.489320882367449e-05,
      "loss": 0.8589,
      "step": 318360
    },
    {
      "epoch": 1.1158074770351212,
      "grad_norm": 2.8125,
      "learning_rate": 3.4892559795010786e-05,
      "loss": 0.9217,
      "step": 318370
    },
    {
      "epoch": 1.1158425245420167,
      "grad_norm": 2.5,
      "learning_rate": 3.489191076634709e-05,
      "loss": 0.8725,
      "step": 318380
    },
    {
      "epoch": 1.1158775720489122,
      "grad_norm": 2.671875,
      "learning_rate": 3.489126173768338e-05,
      "loss": 0.8763,
      "step": 318390
    },
    {
      "epoch": 1.1159126195558078,
      "grad_norm": 3.125,
      "learning_rate": 3.489061270901968e-05,
      "loss": 0.895,
      "step": 318400
    },
    {
      "epoch": 1.1159476670627035,
      "grad_norm": 2.984375,
      "learning_rate": 3.488996368035598e-05,
      "loss": 0.8732,
      "step": 318410
    },
    {
      "epoch": 1.115982714569599,
      "grad_norm": 3.0625,
      "learning_rate": 3.488931465169228e-05,
      "loss": 0.8762,
      "step": 318420
    },
    {
      "epoch": 1.1160177620764946,
      "grad_norm": 2.65625,
      "learning_rate": 3.4888665623028574e-05,
      "loss": 0.8855,
      "step": 318430
    },
    {
      "epoch": 1.1160528095833904,
      "grad_norm": 2.390625,
      "learning_rate": 3.4888016594364875e-05,
      "loss": 0.8365,
      "step": 318440
    },
    {
      "epoch": 1.116087857090286,
      "grad_norm": 3.109375,
      "learning_rate": 3.4887367565701176e-05,
      "loss": 0.8228,
      "step": 318450
    },
    {
      "epoch": 1.1161229045971814,
      "grad_norm": 2.78125,
      "learning_rate": 3.488671853703747e-05,
      "loss": 0.8353,
      "step": 318460
    },
    {
      "epoch": 1.116157952104077,
      "grad_norm": 3.34375,
      "learning_rate": 3.488606950837377e-05,
      "loss": 0.8761,
      "step": 318470
    },
    {
      "epoch": 1.1161929996109727,
      "grad_norm": 3.1875,
      "learning_rate": 3.488542047971007e-05,
      "loss": 0.8824,
      "step": 318480
    },
    {
      "epoch": 1.1162280471178683,
      "grad_norm": 3.453125,
      "learning_rate": 3.488477145104637e-05,
      "loss": 0.8606,
      "step": 318490
    },
    {
      "epoch": 1.1162630946247638,
      "grad_norm": 3.125,
      "learning_rate": 3.488412242238266e-05,
      "loss": 0.856,
      "step": 318500
    },
    {
      "epoch": 1.1162981421316596,
      "grad_norm": 2.640625,
      "learning_rate": 3.4883473393718964e-05,
      "loss": 0.8584,
      "step": 318510
    },
    {
      "epoch": 1.116333189638555,
      "grad_norm": 2.84375,
      "learning_rate": 3.488282436505526e-05,
      "loss": 0.9306,
      "step": 318520
    },
    {
      "epoch": 1.1163682371454506,
      "grad_norm": 3.34375,
      "learning_rate": 3.488217533639156e-05,
      "loss": 0.9166,
      "step": 318530
    },
    {
      "epoch": 1.1164032846523462,
      "grad_norm": 2.546875,
      "learning_rate": 3.4881526307727855e-05,
      "loss": 0.8673,
      "step": 318540
    },
    {
      "epoch": 1.116438332159242,
      "grad_norm": 2.765625,
      "learning_rate": 3.4880877279064156e-05,
      "loss": 0.8701,
      "step": 318550
    },
    {
      "epoch": 1.1164733796661375,
      "grad_norm": 2.921875,
      "learning_rate": 3.488022825040045e-05,
      "loss": 0.9043,
      "step": 318560
    },
    {
      "epoch": 1.116508427173033,
      "grad_norm": 3.203125,
      "learning_rate": 3.487957922173675e-05,
      "loss": 0.8868,
      "step": 318570
    },
    {
      "epoch": 1.1165434746799288,
      "grad_norm": 2.53125,
      "learning_rate": 3.4878930193073054e-05,
      "loss": 0.7882,
      "step": 318580
    },
    {
      "epoch": 1.1165785221868243,
      "grad_norm": 3.234375,
      "learning_rate": 3.487828116440935e-05,
      "loss": 0.9148,
      "step": 318590
    },
    {
      "epoch": 1.1166135696937198,
      "grad_norm": 2.984375,
      "learning_rate": 3.487763213574565e-05,
      "loss": 0.8811,
      "step": 318600
    },
    {
      "epoch": 1.1166486172006154,
      "grad_norm": 2.609375,
      "learning_rate": 3.4876983107081944e-05,
      "loss": 0.8176,
      "step": 318610
    },
    {
      "epoch": 1.1166836647075111,
      "grad_norm": 2.53125,
      "learning_rate": 3.4876334078418246e-05,
      "loss": 0.8923,
      "step": 318620
    },
    {
      "epoch": 1.1167187122144067,
      "grad_norm": 2.65625,
      "learning_rate": 3.487568504975454e-05,
      "loss": 0.7964,
      "step": 318630
    },
    {
      "epoch": 1.1167537597213022,
      "grad_norm": 2.9375,
      "learning_rate": 3.4875036021090835e-05,
      "loss": 0.8712,
      "step": 318640
    },
    {
      "epoch": 1.1167888072281977,
      "grad_norm": 3.25,
      "learning_rate": 3.487438699242713e-05,
      "loss": 0.7856,
      "step": 318650
    },
    {
      "epoch": 1.1168238547350935,
      "grad_norm": 2.875,
      "learning_rate": 3.487373796376343e-05,
      "loss": 0.8745,
      "step": 318660
    },
    {
      "epoch": 1.116858902241989,
      "grad_norm": 2.96875,
      "learning_rate": 3.487308893509973e-05,
      "loss": 0.846,
      "step": 318670
    },
    {
      "epoch": 1.1168939497488846,
      "grad_norm": 2.609375,
      "learning_rate": 3.487243990643603e-05,
      "loss": 0.813,
      "step": 318680
    },
    {
      "epoch": 1.1169289972557803,
      "grad_norm": 2.984375,
      "learning_rate": 3.487179087777233e-05,
      "loss": 0.8844,
      "step": 318690
    },
    {
      "epoch": 1.1169640447626759,
      "grad_norm": 2.78125,
      "learning_rate": 3.487114184910862e-05,
      "loss": 0.9091,
      "step": 318700
    },
    {
      "epoch": 1.1169990922695714,
      "grad_norm": 3.078125,
      "learning_rate": 3.4870492820444924e-05,
      "loss": 0.9606,
      "step": 318710
    },
    {
      "epoch": 1.117034139776467,
      "grad_norm": 2.859375,
      "learning_rate": 3.486984379178122e-05,
      "loss": 0.8774,
      "step": 318720
    },
    {
      "epoch": 1.1170691872833627,
      "grad_norm": 2.984375,
      "learning_rate": 3.486919476311752e-05,
      "loss": 0.8347,
      "step": 318730
    },
    {
      "epoch": 1.1171042347902582,
      "grad_norm": 2.546875,
      "learning_rate": 3.4868545734453815e-05,
      "loss": 0.8237,
      "step": 318740
    },
    {
      "epoch": 1.1171392822971538,
      "grad_norm": 3.59375,
      "learning_rate": 3.4867896705790116e-05,
      "loss": 0.9017,
      "step": 318750
    },
    {
      "epoch": 1.1171743298040493,
      "grad_norm": 2.703125,
      "learning_rate": 3.486724767712641e-05,
      "loss": 0.8753,
      "step": 318760
    },
    {
      "epoch": 1.117209377310945,
      "grad_norm": 2.953125,
      "learning_rate": 3.486659864846271e-05,
      "loss": 0.8963,
      "step": 318770
    },
    {
      "epoch": 1.1172444248178406,
      "grad_norm": 2.546875,
      "learning_rate": 3.486594961979901e-05,
      "loss": 0.8678,
      "step": 318780
    },
    {
      "epoch": 1.1172794723247361,
      "grad_norm": 2.59375,
      "learning_rate": 3.486530059113531e-05,
      "loss": 0.8686,
      "step": 318790
    },
    {
      "epoch": 1.1173145198316319,
      "grad_norm": 2.96875,
      "learning_rate": 3.486465156247161e-05,
      "loss": 0.9231,
      "step": 318800
    },
    {
      "epoch": 1.1173495673385274,
      "grad_norm": 2.859375,
      "learning_rate": 3.4864002533807904e-05,
      "loss": 0.8533,
      "step": 318810
    },
    {
      "epoch": 1.117384614845423,
      "grad_norm": 2.796875,
      "learning_rate": 3.4863353505144206e-05,
      "loss": 0.8445,
      "step": 318820
    },
    {
      "epoch": 1.1174196623523185,
      "grad_norm": 2.953125,
      "learning_rate": 3.48627044764805e-05,
      "loss": 0.847,
      "step": 318830
    },
    {
      "epoch": 1.1174547098592142,
      "grad_norm": 2.53125,
      "learning_rate": 3.48620554478168e-05,
      "loss": 0.853,
      "step": 318840
    },
    {
      "epoch": 1.1174897573661098,
      "grad_norm": 3.265625,
      "learning_rate": 3.4861406419153096e-05,
      "loss": 0.8462,
      "step": 318850
    },
    {
      "epoch": 1.1175248048730053,
      "grad_norm": 3.421875,
      "learning_rate": 3.48607573904894e-05,
      "loss": 0.8855,
      "step": 318860
    },
    {
      "epoch": 1.1175598523799009,
      "grad_norm": 2.890625,
      "learning_rate": 3.486010836182569e-05,
      "loss": 0.8596,
      "step": 318870
    },
    {
      "epoch": 1.1175948998867966,
      "grad_norm": 3.59375,
      "learning_rate": 3.4859459333161994e-05,
      "loss": 0.7697,
      "step": 318880
    },
    {
      "epoch": 1.1176299473936921,
      "grad_norm": 3.03125,
      "learning_rate": 3.485881030449829e-05,
      "loss": 0.8589,
      "step": 318890
    },
    {
      "epoch": 1.1176649949005877,
      "grad_norm": 2.671875,
      "learning_rate": 3.485816127583459e-05,
      "loss": 0.8609,
      "step": 318900
    },
    {
      "epoch": 1.1177000424074834,
      "grad_norm": 2.9375,
      "learning_rate": 3.4857512247170884e-05,
      "loss": 0.8712,
      "step": 318910
    },
    {
      "epoch": 1.117735089914379,
      "grad_norm": 3.078125,
      "learning_rate": 3.4856863218507186e-05,
      "loss": 0.7899,
      "step": 318920
    },
    {
      "epoch": 1.1177701374212745,
      "grad_norm": 2.984375,
      "learning_rate": 3.485621418984348e-05,
      "loss": 0.8335,
      "step": 318930
    },
    {
      "epoch": 1.11780518492817,
      "grad_norm": 3.0,
      "learning_rate": 3.485556516117978e-05,
      "loss": 0.9196,
      "step": 318940
    },
    {
      "epoch": 1.1178402324350658,
      "grad_norm": 2.84375,
      "learning_rate": 3.485491613251608e-05,
      "loss": 0.7962,
      "step": 318950
    },
    {
      "epoch": 1.1178752799419613,
      "grad_norm": 2.796875,
      "learning_rate": 3.485426710385238e-05,
      "loss": 0.8776,
      "step": 318960
    },
    {
      "epoch": 1.1179103274488569,
      "grad_norm": 3.140625,
      "learning_rate": 3.485361807518868e-05,
      "loss": 0.8485,
      "step": 318970
    },
    {
      "epoch": 1.1179453749557524,
      "grad_norm": 2.921875,
      "learning_rate": 3.4852969046524974e-05,
      "loss": 0.8564,
      "step": 318980
    },
    {
      "epoch": 1.1179804224626482,
      "grad_norm": 2.703125,
      "learning_rate": 3.4852320017861275e-05,
      "loss": 0.7929,
      "step": 318990
    },
    {
      "epoch": 1.1180154699695437,
      "grad_norm": 2.984375,
      "learning_rate": 3.485167098919757e-05,
      "loss": 0.8446,
      "step": 319000
    },
    {
      "epoch": 1.1180505174764392,
      "grad_norm": 3.21875,
      "learning_rate": 3.4851021960533864e-05,
      "loss": 0.8953,
      "step": 319010
    },
    {
      "epoch": 1.118085564983335,
      "grad_norm": 3.046875,
      "learning_rate": 3.485037293187016e-05,
      "loss": 0.8276,
      "step": 319020
    },
    {
      "epoch": 1.1181206124902305,
      "grad_norm": 3.125,
      "learning_rate": 3.484972390320646e-05,
      "loss": 0.925,
      "step": 319030
    },
    {
      "epoch": 1.118155659997126,
      "grad_norm": 3.0625,
      "learning_rate": 3.484907487454276e-05,
      "loss": 0.9363,
      "step": 319040
    },
    {
      "epoch": 1.1181907075040216,
      "grad_norm": 2.84375,
      "learning_rate": 3.4848425845879056e-05,
      "loss": 0.8476,
      "step": 319050
    },
    {
      "epoch": 1.1182257550109174,
      "grad_norm": 2.96875,
      "learning_rate": 3.484777681721536e-05,
      "loss": 0.9058,
      "step": 319060
    },
    {
      "epoch": 1.118260802517813,
      "grad_norm": 3.078125,
      "learning_rate": 3.484712778855165e-05,
      "loss": 0.8883,
      "step": 319070
    },
    {
      "epoch": 1.1182958500247084,
      "grad_norm": 2.5625,
      "learning_rate": 3.4846478759887954e-05,
      "loss": 0.8523,
      "step": 319080
    },
    {
      "epoch": 1.118330897531604,
      "grad_norm": 2.796875,
      "learning_rate": 3.484582973122425e-05,
      "loss": 0.8608,
      "step": 319090
    },
    {
      "epoch": 1.1183659450384997,
      "grad_norm": 2.90625,
      "learning_rate": 3.484518070256055e-05,
      "loss": 0.923,
      "step": 319100
    },
    {
      "epoch": 1.1184009925453953,
      "grad_norm": 3.0625,
      "learning_rate": 3.4844531673896844e-05,
      "loss": 0.8766,
      "step": 319110
    },
    {
      "epoch": 1.1184360400522908,
      "grad_norm": 3.046875,
      "learning_rate": 3.4843882645233146e-05,
      "loss": 0.8704,
      "step": 319120
    },
    {
      "epoch": 1.1184710875591866,
      "grad_norm": 2.484375,
      "learning_rate": 3.484323361656944e-05,
      "loss": 0.866,
      "step": 319130
    },
    {
      "epoch": 1.118506135066082,
      "grad_norm": 3.078125,
      "learning_rate": 3.484258458790574e-05,
      "loss": 0.8986,
      "step": 319140
    },
    {
      "epoch": 1.1185411825729776,
      "grad_norm": 2.71875,
      "learning_rate": 3.4841935559242036e-05,
      "loss": 0.79,
      "step": 319150
    },
    {
      "epoch": 1.1185762300798732,
      "grad_norm": 2.796875,
      "learning_rate": 3.484128653057834e-05,
      "loss": 0.8282,
      "step": 319160
    },
    {
      "epoch": 1.118611277586769,
      "grad_norm": 3.3125,
      "learning_rate": 3.484063750191464e-05,
      "loss": 0.8692,
      "step": 319170
    },
    {
      "epoch": 1.1186463250936645,
      "grad_norm": 2.78125,
      "learning_rate": 3.4839988473250934e-05,
      "loss": 0.9144,
      "step": 319180
    },
    {
      "epoch": 1.11868137260056,
      "grad_norm": 2.734375,
      "learning_rate": 3.4839339444587235e-05,
      "loss": 0.8024,
      "step": 319190
    },
    {
      "epoch": 1.1187164201074558,
      "grad_norm": 2.78125,
      "learning_rate": 3.483869041592353e-05,
      "loss": 0.8013,
      "step": 319200
    },
    {
      "epoch": 1.1187514676143513,
      "grad_norm": 3.53125,
      "learning_rate": 3.483804138725983e-05,
      "loss": 0.873,
      "step": 319210
    },
    {
      "epoch": 1.1187865151212468,
      "grad_norm": 2.953125,
      "learning_rate": 3.4837392358596126e-05,
      "loss": 0.7858,
      "step": 319220
    },
    {
      "epoch": 1.1188215626281424,
      "grad_norm": 3.421875,
      "learning_rate": 3.483674332993243e-05,
      "loss": 0.88,
      "step": 319230
    },
    {
      "epoch": 1.1188566101350381,
      "grad_norm": 3.140625,
      "learning_rate": 3.483609430126872e-05,
      "loss": 0.8213,
      "step": 319240
    },
    {
      "epoch": 1.1188916576419337,
      "grad_norm": 2.71875,
      "learning_rate": 3.483544527260502e-05,
      "loss": 0.8012,
      "step": 319250
    },
    {
      "epoch": 1.1189267051488292,
      "grad_norm": 2.890625,
      "learning_rate": 3.483479624394132e-05,
      "loss": 0.9489,
      "step": 319260
    },
    {
      "epoch": 1.118961752655725,
      "grad_norm": 2.578125,
      "learning_rate": 3.483414721527762e-05,
      "loss": 0.8654,
      "step": 319270
    },
    {
      "epoch": 1.1189968001626205,
      "grad_norm": 3.125,
      "learning_rate": 3.4833498186613914e-05,
      "loss": 0.8838,
      "step": 319280
    },
    {
      "epoch": 1.119031847669516,
      "grad_norm": 3.171875,
      "learning_rate": 3.4832849157950215e-05,
      "loss": 0.922,
      "step": 319290
    },
    {
      "epoch": 1.1190668951764116,
      "grad_norm": 3.09375,
      "learning_rate": 3.483220012928651e-05,
      "loss": 0.8573,
      "step": 319300
    },
    {
      "epoch": 1.1191019426833073,
      "grad_norm": 2.828125,
      "learning_rate": 3.483155110062281e-05,
      "loss": 0.8816,
      "step": 319310
    },
    {
      "epoch": 1.1191369901902029,
      "grad_norm": 2.71875,
      "learning_rate": 3.483090207195911e-05,
      "loss": 0.8962,
      "step": 319320
    },
    {
      "epoch": 1.1191720376970984,
      "grad_norm": 3.3125,
      "learning_rate": 3.483025304329541e-05,
      "loss": 0.8722,
      "step": 319330
    },
    {
      "epoch": 1.119207085203994,
      "grad_norm": 3.234375,
      "learning_rate": 3.482960401463171e-05,
      "loss": 0.8772,
      "step": 319340
    },
    {
      "epoch": 1.1192421327108897,
      "grad_norm": 3.203125,
      "learning_rate": 3.4828954985968e-05,
      "loss": 0.9075,
      "step": 319350
    },
    {
      "epoch": 1.1192771802177852,
      "grad_norm": 2.859375,
      "learning_rate": 3.4828305957304304e-05,
      "loss": 0.8212,
      "step": 319360
    },
    {
      "epoch": 1.1193122277246808,
      "grad_norm": 3.109375,
      "learning_rate": 3.48276569286406e-05,
      "loss": 0.8486,
      "step": 319370
    },
    {
      "epoch": 1.1193472752315765,
      "grad_norm": 2.640625,
      "learning_rate": 3.48270078999769e-05,
      "loss": 0.7784,
      "step": 319380
    },
    {
      "epoch": 1.119382322738472,
      "grad_norm": 2.796875,
      "learning_rate": 3.482635887131319e-05,
      "loss": 0.8724,
      "step": 319390
    },
    {
      "epoch": 1.1194173702453676,
      "grad_norm": 3.09375,
      "learning_rate": 3.482570984264949e-05,
      "loss": 0.8299,
      "step": 319400
    },
    {
      "epoch": 1.1194524177522631,
      "grad_norm": 2.875,
      "learning_rate": 3.482506081398579e-05,
      "loss": 0.859,
      "step": 319410
    },
    {
      "epoch": 1.1194874652591589,
      "grad_norm": 2.875,
      "learning_rate": 3.4824411785322086e-05,
      "loss": 0.8421,
      "step": 319420
    },
    {
      "epoch": 1.1195225127660544,
      "grad_norm": 2.921875,
      "learning_rate": 3.482376275665839e-05,
      "loss": 0.8366,
      "step": 319430
    },
    {
      "epoch": 1.11955756027295,
      "grad_norm": 3.3125,
      "learning_rate": 3.482311372799468e-05,
      "loss": 0.9153,
      "step": 319440
    },
    {
      "epoch": 1.1195926077798455,
      "grad_norm": 2.625,
      "learning_rate": 3.482246469933098e-05,
      "loss": 0.8371,
      "step": 319450
    },
    {
      "epoch": 1.1196276552867412,
      "grad_norm": 2.625,
      "learning_rate": 3.482181567066728e-05,
      "loss": 0.7888,
      "step": 319460
    },
    {
      "epoch": 1.1196627027936368,
      "grad_norm": 3.078125,
      "learning_rate": 3.482116664200358e-05,
      "loss": 0.9021,
      "step": 319470
    },
    {
      "epoch": 1.1196977503005323,
      "grad_norm": 2.671875,
      "learning_rate": 3.4820517613339874e-05,
      "loss": 0.9078,
      "step": 319480
    },
    {
      "epoch": 1.119732797807428,
      "grad_norm": 2.671875,
      "learning_rate": 3.4819868584676175e-05,
      "loss": 0.7905,
      "step": 319490
    },
    {
      "epoch": 1.1197678453143236,
      "grad_norm": 2.796875,
      "learning_rate": 3.481921955601247e-05,
      "loss": 0.7982,
      "step": 319500
    },
    {
      "epoch": 1.1198028928212191,
      "grad_norm": 2.828125,
      "learning_rate": 3.481857052734877e-05,
      "loss": 0.8766,
      "step": 319510
    },
    {
      "epoch": 1.1198379403281147,
      "grad_norm": 2.953125,
      "learning_rate": 3.4817921498685066e-05,
      "loss": 0.9305,
      "step": 319520
    },
    {
      "epoch": 1.1198729878350104,
      "grad_norm": 2.90625,
      "learning_rate": 3.481727247002137e-05,
      "loss": 0.9178,
      "step": 319530
    },
    {
      "epoch": 1.119908035341906,
      "grad_norm": 3.3125,
      "learning_rate": 3.481662344135767e-05,
      "loss": 0.8217,
      "step": 319540
    },
    {
      "epoch": 1.1199430828488015,
      "grad_norm": 3.46875,
      "learning_rate": 3.481597441269396e-05,
      "loss": 0.8354,
      "step": 319550
    },
    {
      "epoch": 1.119978130355697,
      "grad_norm": 2.953125,
      "learning_rate": 3.4815325384030264e-05,
      "loss": 0.8871,
      "step": 319560
    },
    {
      "epoch": 1.1200131778625928,
      "grad_norm": 3.078125,
      "learning_rate": 3.481467635536656e-05,
      "loss": 0.8958,
      "step": 319570
    },
    {
      "epoch": 1.1200482253694883,
      "grad_norm": 3.125,
      "learning_rate": 3.481402732670286e-05,
      "loss": 0.9333,
      "step": 319580
    },
    {
      "epoch": 1.1200832728763839,
      "grad_norm": 3.15625,
      "learning_rate": 3.4813378298039155e-05,
      "loss": 0.9412,
      "step": 319590
    },
    {
      "epoch": 1.1201183203832796,
      "grad_norm": 2.8125,
      "learning_rate": 3.4812729269375456e-05,
      "loss": 0.7886,
      "step": 319600
    },
    {
      "epoch": 1.1201533678901752,
      "grad_norm": 2.953125,
      "learning_rate": 3.481208024071175e-05,
      "loss": 0.9642,
      "step": 319610
    },
    {
      "epoch": 1.1201884153970707,
      "grad_norm": 2.828125,
      "learning_rate": 3.481143121204805e-05,
      "loss": 0.8085,
      "step": 319620
    },
    {
      "epoch": 1.1202234629039662,
      "grad_norm": 2.9375,
      "learning_rate": 3.481078218338435e-05,
      "loss": 0.8956,
      "step": 319630
    },
    {
      "epoch": 1.120258510410862,
      "grad_norm": 2.859375,
      "learning_rate": 3.481013315472065e-05,
      "loss": 0.9578,
      "step": 319640
    },
    {
      "epoch": 1.1202935579177575,
      "grad_norm": 2.875,
      "learning_rate": 3.480948412605694e-05,
      "loss": 0.9185,
      "step": 319650
    },
    {
      "epoch": 1.120328605424653,
      "grad_norm": 2.578125,
      "learning_rate": 3.4808835097393244e-05,
      "loss": 0.8651,
      "step": 319660
    },
    {
      "epoch": 1.1203636529315486,
      "grad_norm": 2.71875,
      "learning_rate": 3.4808186068729546e-05,
      "loss": 0.8494,
      "step": 319670
    },
    {
      "epoch": 1.1203987004384444,
      "grad_norm": 3.21875,
      "learning_rate": 3.480753704006584e-05,
      "loss": 0.8756,
      "step": 319680
    },
    {
      "epoch": 1.12043374794534,
      "grad_norm": 2.5625,
      "learning_rate": 3.480688801140214e-05,
      "loss": 0.8811,
      "step": 319690
    },
    {
      "epoch": 1.1204687954522354,
      "grad_norm": 2.875,
      "learning_rate": 3.4806238982738436e-05,
      "loss": 0.9072,
      "step": 319700
    },
    {
      "epoch": 1.1205038429591312,
      "grad_norm": 3.125,
      "learning_rate": 3.480558995407474e-05,
      "loss": 0.8351,
      "step": 319710
    },
    {
      "epoch": 1.1205388904660267,
      "grad_norm": 2.8125,
      "learning_rate": 3.480494092541103e-05,
      "loss": 0.8547,
      "step": 319720
    },
    {
      "epoch": 1.1205739379729223,
      "grad_norm": 2.5625,
      "learning_rate": 3.4804291896747334e-05,
      "loss": 0.8653,
      "step": 319730
    },
    {
      "epoch": 1.1206089854798178,
      "grad_norm": 3.265625,
      "learning_rate": 3.480364286808363e-05,
      "loss": 0.9489,
      "step": 319740
    },
    {
      "epoch": 1.1206440329867136,
      "grad_norm": 3.09375,
      "learning_rate": 3.480299383941993e-05,
      "loss": 0.8146,
      "step": 319750
    },
    {
      "epoch": 1.120679080493609,
      "grad_norm": 2.46875,
      "learning_rate": 3.4802344810756224e-05,
      "loss": 0.827,
      "step": 319760
    },
    {
      "epoch": 1.1207141280005046,
      "grad_norm": 2.9375,
      "learning_rate": 3.480169578209252e-05,
      "loss": 0.9311,
      "step": 319770
    },
    {
      "epoch": 1.1207491755074002,
      "grad_norm": 2.78125,
      "learning_rate": 3.480104675342882e-05,
      "loss": 0.8774,
      "step": 319780
    },
    {
      "epoch": 1.120784223014296,
      "grad_norm": 3.203125,
      "learning_rate": 3.4800397724765115e-05,
      "loss": 0.777,
      "step": 319790
    },
    {
      "epoch": 1.1208192705211915,
      "grad_norm": 2.578125,
      "learning_rate": 3.4799748696101416e-05,
      "loss": 0.848,
      "step": 319800
    },
    {
      "epoch": 1.120854318028087,
      "grad_norm": 2.96875,
      "learning_rate": 3.479909966743771e-05,
      "loss": 0.9065,
      "step": 319810
    },
    {
      "epoch": 1.1208893655349828,
      "grad_norm": 2.84375,
      "learning_rate": 3.479845063877401e-05,
      "loss": 0.7929,
      "step": 319820
    },
    {
      "epoch": 1.1209244130418783,
      "grad_norm": 2.578125,
      "learning_rate": 3.479780161011031e-05,
      "loss": 0.8862,
      "step": 319830
    },
    {
      "epoch": 1.1209594605487738,
      "grad_norm": 3.171875,
      "learning_rate": 3.479715258144661e-05,
      "loss": 0.916,
      "step": 319840
    },
    {
      "epoch": 1.1209945080556694,
      "grad_norm": 2.9375,
      "learning_rate": 3.47965035527829e-05,
      "loss": 0.8327,
      "step": 319850
    },
    {
      "epoch": 1.1210295555625651,
      "grad_norm": 3.15625,
      "learning_rate": 3.4795854524119204e-05,
      "loss": 0.8551,
      "step": 319860
    },
    {
      "epoch": 1.1210646030694607,
      "grad_norm": 2.96875,
      "learning_rate": 3.47952054954555e-05,
      "loss": 0.8413,
      "step": 319870
    },
    {
      "epoch": 1.1210996505763562,
      "grad_norm": 3.046875,
      "learning_rate": 3.47945564667918e-05,
      "loss": 0.8119,
      "step": 319880
    },
    {
      "epoch": 1.121134698083252,
      "grad_norm": 3.046875,
      "learning_rate": 3.4793907438128095e-05,
      "loss": 0.8813,
      "step": 319890
    },
    {
      "epoch": 1.1211697455901475,
      "grad_norm": 2.9375,
      "learning_rate": 3.4793258409464396e-05,
      "loss": 0.9086,
      "step": 319900
    },
    {
      "epoch": 1.121204793097043,
      "grad_norm": 3.15625,
      "learning_rate": 3.47926093808007e-05,
      "loss": 0.8308,
      "step": 319910
    },
    {
      "epoch": 1.1212398406039386,
      "grad_norm": 2.84375,
      "learning_rate": 3.479196035213699e-05,
      "loss": 0.9001,
      "step": 319920
    },
    {
      "epoch": 1.1212748881108343,
      "grad_norm": 2.890625,
      "learning_rate": 3.4791311323473294e-05,
      "loss": 0.9383,
      "step": 319930
    },
    {
      "epoch": 1.1213099356177298,
      "grad_norm": 2.953125,
      "learning_rate": 3.479066229480959e-05,
      "loss": 0.8606,
      "step": 319940
    },
    {
      "epoch": 1.1213449831246254,
      "grad_norm": 2.875,
      "learning_rate": 3.479001326614589e-05,
      "loss": 0.9285,
      "step": 319950
    },
    {
      "epoch": 1.1213800306315211,
      "grad_norm": 2.890625,
      "learning_rate": 3.4789364237482184e-05,
      "loss": 0.9597,
      "step": 319960
    },
    {
      "epoch": 1.1214150781384167,
      "grad_norm": 2.546875,
      "learning_rate": 3.4788715208818486e-05,
      "loss": 0.8224,
      "step": 319970
    },
    {
      "epoch": 1.1214501256453122,
      "grad_norm": 2.671875,
      "learning_rate": 3.478806618015478e-05,
      "loss": 0.7834,
      "step": 319980
    },
    {
      "epoch": 1.1214851731522077,
      "grad_norm": 3.109375,
      "learning_rate": 3.478741715149108e-05,
      "loss": 0.888,
      "step": 319990
    },
    {
      "epoch": 1.1215202206591035,
      "grad_norm": 2.828125,
      "learning_rate": 3.4786768122827376e-05,
      "loss": 0.8444,
      "step": 320000
    },
    {
      "epoch": 1.1215202206591035,
      "eval_loss": 0.8108518123626709,
      "eval_runtime": 560.089,
      "eval_samples_per_second": 679.242,
      "eval_steps_per_second": 56.604,
      "step": 320000
    },
    {
      "epoch": 1.121555268165999,
      "grad_norm": 2.890625,
      "learning_rate": 3.478611909416368e-05,
      "loss": 0.922,
      "step": 320010
    },
    {
      "epoch": 1.1215903156728946,
      "grad_norm": 3.0,
      "learning_rate": 3.478547006549997e-05,
      "loss": 0.8514,
      "step": 320020
    },
    {
      "epoch": 1.1216253631797901,
      "grad_norm": 2.734375,
      "learning_rate": 3.4784821036836274e-05,
      "loss": 0.9019,
      "step": 320030
    },
    {
      "epoch": 1.1216604106866859,
      "grad_norm": 3.0,
      "learning_rate": 3.4784172008172575e-05,
      "loss": 0.863,
      "step": 320040
    },
    {
      "epoch": 1.1216954581935814,
      "grad_norm": 2.75,
      "learning_rate": 3.478352297950887e-05,
      "loss": 0.8253,
      "step": 320050
    },
    {
      "epoch": 1.121730505700477,
      "grad_norm": 2.84375,
      "learning_rate": 3.478287395084517e-05,
      "loss": 0.9661,
      "step": 320060
    },
    {
      "epoch": 1.1217655532073727,
      "grad_norm": 3.0,
      "learning_rate": 3.4782224922181466e-05,
      "loss": 0.8868,
      "step": 320070
    },
    {
      "epoch": 1.1218006007142682,
      "grad_norm": 2.625,
      "learning_rate": 3.478157589351777e-05,
      "loss": 0.8509,
      "step": 320080
    },
    {
      "epoch": 1.1218356482211638,
      "grad_norm": 2.96875,
      "learning_rate": 3.478092686485406e-05,
      "loss": 0.9359,
      "step": 320090
    },
    {
      "epoch": 1.1218706957280593,
      "grad_norm": 3.296875,
      "learning_rate": 3.478027783619036e-05,
      "loss": 0.866,
      "step": 320100
    },
    {
      "epoch": 1.121905743234955,
      "grad_norm": 2.546875,
      "learning_rate": 3.477962880752666e-05,
      "loss": 0.8983,
      "step": 320110
    },
    {
      "epoch": 1.1219407907418506,
      "grad_norm": 2.890625,
      "learning_rate": 3.477897977886296e-05,
      "loss": 0.9265,
      "step": 320120
    },
    {
      "epoch": 1.1219758382487461,
      "grad_norm": 3.390625,
      "learning_rate": 3.4778330750199254e-05,
      "loss": 0.8788,
      "step": 320130
    },
    {
      "epoch": 1.1220108857556417,
      "grad_norm": 2.90625,
      "learning_rate": 3.477768172153555e-05,
      "loss": 0.8657,
      "step": 320140
    },
    {
      "epoch": 1.1220459332625374,
      "grad_norm": 2.828125,
      "learning_rate": 3.477703269287185e-05,
      "loss": 0.8698,
      "step": 320150
    },
    {
      "epoch": 1.122080980769433,
      "grad_norm": 2.75,
      "learning_rate": 3.4776383664208144e-05,
      "loss": 0.8795,
      "step": 320160
    },
    {
      "epoch": 1.1221160282763285,
      "grad_norm": 2.890625,
      "learning_rate": 3.4775734635544446e-05,
      "loss": 0.8171,
      "step": 320170
    },
    {
      "epoch": 1.1221510757832243,
      "grad_norm": 2.78125,
      "learning_rate": 3.477508560688074e-05,
      "loss": 0.8246,
      "step": 320180
    },
    {
      "epoch": 1.1221861232901198,
      "grad_norm": 2.75,
      "learning_rate": 3.477443657821704e-05,
      "loss": 0.8606,
      "step": 320190
    },
    {
      "epoch": 1.1222211707970153,
      "grad_norm": 2.875,
      "learning_rate": 3.4773787549553336e-05,
      "loss": 0.8318,
      "step": 320200
    },
    {
      "epoch": 1.1222562183039109,
      "grad_norm": 3.59375,
      "learning_rate": 3.477313852088964e-05,
      "loss": 1.0024,
      "step": 320210
    },
    {
      "epoch": 1.1222912658108066,
      "grad_norm": 3.046875,
      "learning_rate": 3.477248949222593e-05,
      "loss": 0.833,
      "step": 320220
    },
    {
      "epoch": 1.1223263133177022,
      "grad_norm": 3.0625,
      "learning_rate": 3.4771840463562234e-05,
      "loss": 0.9279,
      "step": 320230
    },
    {
      "epoch": 1.1223613608245977,
      "grad_norm": 2.546875,
      "learning_rate": 3.477119143489853e-05,
      "loss": 0.8526,
      "step": 320240
    },
    {
      "epoch": 1.1223964083314932,
      "grad_norm": 2.8125,
      "learning_rate": 3.477054240623483e-05,
      "loss": 0.8894,
      "step": 320250
    },
    {
      "epoch": 1.122431455838389,
      "grad_norm": 2.96875,
      "learning_rate": 3.4769893377571124e-05,
      "loss": 0.8838,
      "step": 320260
    },
    {
      "epoch": 1.1224665033452845,
      "grad_norm": 2.921875,
      "learning_rate": 3.4769244348907426e-05,
      "loss": 0.8797,
      "step": 320270
    },
    {
      "epoch": 1.12250155085218,
      "grad_norm": 3.015625,
      "learning_rate": 3.476859532024373e-05,
      "loss": 0.874,
      "step": 320280
    },
    {
      "epoch": 1.1225365983590758,
      "grad_norm": 2.96875,
      "learning_rate": 3.476794629158002e-05,
      "loss": 0.9132,
      "step": 320290
    },
    {
      "epoch": 1.1225716458659714,
      "grad_norm": 2.703125,
      "learning_rate": 3.476729726291632e-05,
      "loss": 0.8764,
      "step": 320300
    },
    {
      "epoch": 1.122606693372867,
      "grad_norm": 3.40625,
      "learning_rate": 3.476664823425262e-05,
      "loss": 0.9579,
      "step": 320310
    },
    {
      "epoch": 1.1226417408797624,
      "grad_norm": 2.703125,
      "learning_rate": 3.476599920558892e-05,
      "loss": 0.8649,
      "step": 320320
    },
    {
      "epoch": 1.1226767883866582,
      "grad_norm": 3.078125,
      "learning_rate": 3.4765350176925214e-05,
      "loss": 0.8328,
      "step": 320330
    },
    {
      "epoch": 1.1227118358935537,
      "grad_norm": 2.640625,
      "learning_rate": 3.4764701148261515e-05,
      "loss": 0.8886,
      "step": 320340
    },
    {
      "epoch": 1.1227468834004493,
      "grad_norm": 3.09375,
      "learning_rate": 3.476405211959781e-05,
      "loss": 0.8966,
      "step": 320350
    },
    {
      "epoch": 1.1227819309073448,
      "grad_norm": 2.625,
      "learning_rate": 3.476340309093411e-05,
      "loss": 0.922,
      "step": 320360
    },
    {
      "epoch": 1.1228169784142406,
      "grad_norm": 2.96875,
      "learning_rate": 3.4762754062270406e-05,
      "loss": 0.8614,
      "step": 320370
    },
    {
      "epoch": 1.122852025921136,
      "grad_norm": 2.90625,
      "learning_rate": 3.476210503360671e-05,
      "loss": 0.844,
      "step": 320380
    },
    {
      "epoch": 1.1228870734280316,
      "grad_norm": 2.625,
      "learning_rate": 3.4761456004943e-05,
      "loss": 0.9352,
      "step": 320390
    },
    {
      "epoch": 1.1229221209349274,
      "grad_norm": 2.8125,
      "learning_rate": 3.47608069762793e-05,
      "loss": 0.9705,
      "step": 320400
    },
    {
      "epoch": 1.122957168441823,
      "grad_norm": 3.09375,
      "learning_rate": 3.4760157947615605e-05,
      "loss": 0.8862,
      "step": 320410
    },
    {
      "epoch": 1.1229922159487185,
      "grad_norm": 3.015625,
      "learning_rate": 3.47595089189519e-05,
      "loss": 0.8645,
      "step": 320420
    },
    {
      "epoch": 1.123027263455614,
      "grad_norm": 2.9375,
      "learning_rate": 3.47588598902882e-05,
      "loss": 0.8056,
      "step": 320430
    },
    {
      "epoch": 1.1230623109625097,
      "grad_norm": 2.875,
      "learning_rate": 3.4758210861624495e-05,
      "loss": 0.8937,
      "step": 320440
    },
    {
      "epoch": 1.1230973584694053,
      "grad_norm": 3.125,
      "learning_rate": 3.4757561832960797e-05,
      "loss": 0.8868,
      "step": 320450
    },
    {
      "epoch": 1.1231324059763008,
      "grad_norm": 2.703125,
      "learning_rate": 3.475691280429709e-05,
      "loss": 0.8249,
      "step": 320460
    },
    {
      "epoch": 1.1231674534831966,
      "grad_norm": 2.546875,
      "learning_rate": 3.475626377563339e-05,
      "loss": 0.9282,
      "step": 320470
    },
    {
      "epoch": 1.1232025009900921,
      "grad_norm": 3.078125,
      "learning_rate": 3.475561474696969e-05,
      "loss": 0.8338,
      "step": 320480
    },
    {
      "epoch": 1.1232375484969876,
      "grad_norm": 3.265625,
      "learning_rate": 3.475496571830599e-05,
      "loss": 0.8771,
      "step": 320490
    },
    {
      "epoch": 1.1232725960038832,
      "grad_norm": 2.984375,
      "learning_rate": 3.475431668964228e-05,
      "loss": 0.8756,
      "step": 320500
    },
    {
      "epoch": 1.123307643510779,
      "grad_norm": 2.609375,
      "learning_rate": 3.4753667660978585e-05,
      "loss": 0.7916,
      "step": 320510
    },
    {
      "epoch": 1.1233426910176745,
      "grad_norm": 2.8125,
      "learning_rate": 3.475301863231488e-05,
      "loss": 0.8715,
      "step": 320520
    },
    {
      "epoch": 1.12337773852457,
      "grad_norm": 3.515625,
      "learning_rate": 3.4752369603651174e-05,
      "loss": 0.9059,
      "step": 320530
    },
    {
      "epoch": 1.1234127860314655,
      "grad_norm": 2.71875,
      "learning_rate": 3.4751720574987475e-05,
      "loss": 0.8586,
      "step": 320540
    },
    {
      "epoch": 1.1234478335383613,
      "grad_norm": 3.1875,
      "learning_rate": 3.475107154632377e-05,
      "loss": 0.8092,
      "step": 320550
    },
    {
      "epoch": 1.1234828810452568,
      "grad_norm": 2.859375,
      "learning_rate": 3.475042251766007e-05,
      "loss": 0.8535,
      "step": 320560
    },
    {
      "epoch": 1.1235179285521524,
      "grad_norm": 2.875,
      "learning_rate": 3.4749773488996366e-05,
      "loss": 0.7692,
      "step": 320570
    },
    {
      "epoch": 1.1235529760590481,
      "grad_norm": 2.921875,
      "learning_rate": 3.474912446033267e-05,
      "loss": 0.8777,
      "step": 320580
    },
    {
      "epoch": 1.1235880235659437,
      "grad_norm": 2.796875,
      "learning_rate": 3.474847543166896e-05,
      "loss": 0.8572,
      "step": 320590
    },
    {
      "epoch": 1.1236230710728392,
      "grad_norm": 2.921875,
      "learning_rate": 3.474782640300526e-05,
      "loss": 0.8458,
      "step": 320600
    },
    {
      "epoch": 1.1236581185797347,
      "grad_norm": 2.53125,
      "learning_rate": 3.474717737434156e-05,
      "loss": 0.8953,
      "step": 320610
    },
    {
      "epoch": 1.1236931660866305,
      "grad_norm": 2.96875,
      "learning_rate": 3.474652834567786e-05,
      "loss": 0.7746,
      "step": 320620
    },
    {
      "epoch": 1.123728213593526,
      "grad_norm": 3.15625,
      "learning_rate": 3.474587931701416e-05,
      "loss": 0.9429,
      "step": 320630
    },
    {
      "epoch": 1.1237632611004216,
      "grad_norm": 2.71875,
      "learning_rate": 3.4745230288350455e-05,
      "loss": 0.8414,
      "step": 320640
    },
    {
      "epoch": 1.1237983086073173,
      "grad_norm": 2.609375,
      "learning_rate": 3.4744581259686757e-05,
      "loss": 0.8269,
      "step": 320650
    },
    {
      "epoch": 1.1238333561142129,
      "grad_norm": 3.40625,
      "learning_rate": 3.474393223102305e-05,
      "loss": 1.0148,
      "step": 320660
    },
    {
      "epoch": 1.1238684036211084,
      "grad_norm": 2.453125,
      "learning_rate": 3.474328320235935e-05,
      "loss": 0.787,
      "step": 320670
    },
    {
      "epoch": 1.123903451128004,
      "grad_norm": 2.703125,
      "learning_rate": 3.474263417369565e-05,
      "loss": 0.8454,
      "step": 320680
    },
    {
      "epoch": 1.1239384986348997,
      "grad_norm": 3.125,
      "learning_rate": 3.474198514503195e-05,
      "loss": 0.8522,
      "step": 320690
    },
    {
      "epoch": 1.1239735461417952,
      "grad_norm": 3.453125,
      "learning_rate": 3.474133611636824e-05,
      "loss": 0.9067,
      "step": 320700
    },
    {
      "epoch": 1.1240085936486908,
      "grad_norm": 2.78125,
      "learning_rate": 3.4740687087704545e-05,
      "loss": 0.8657,
      "step": 320710
    },
    {
      "epoch": 1.1240436411555863,
      "grad_norm": 2.921875,
      "learning_rate": 3.474003805904084e-05,
      "loss": 0.7935,
      "step": 320720
    },
    {
      "epoch": 1.124078688662482,
      "grad_norm": 2.96875,
      "learning_rate": 3.473938903037714e-05,
      "loss": 0.8222,
      "step": 320730
    },
    {
      "epoch": 1.1241137361693776,
      "grad_norm": 3.359375,
      "learning_rate": 3.4738740001713435e-05,
      "loss": 0.8742,
      "step": 320740
    },
    {
      "epoch": 1.1241487836762731,
      "grad_norm": 2.609375,
      "learning_rate": 3.4738090973049737e-05,
      "loss": 0.9482,
      "step": 320750
    },
    {
      "epoch": 1.124183831183169,
      "grad_norm": 3.0,
      "learning_rate": 3.473744194438603e-05,
      "loss": 0.8815,
      "step": 320760
    },
    {
      "epoch": 1.1242188786900644,
      "grad_norm": 2.828125,
      "learning_rate": 3.473679291572233e-05,
      "loss": 0.8402,
      "step": 320770
    },
    {
      "epoch": 1.12425392619696,
      "grad_norm": 3.078125,
      "learning_rate": 3.4736143887058634e-05,
      "loss": 0.8222,
      "step": 320780
    },
    {
      "epoch": 1.1242889737038555,
      "grad_norm": 3.125,
      "learning_rate": 3.473549485839493e-05,
      "loss": 0.9246,
      "step": 320790
    },
    {
      "epoch": 1.1243240212107513,
      "grad_norm": 3.0625,
      "learning_rate": 3.473484582973123e-05,
      "loss": 0.8825,
      "step": 320800
    },
    {
      "epoch": 1.1243590687176468,
      "grad_norm": 2.96875,
      "learning_rate": 3.4734196801067525e-05,
      "loss": 0.8359,
      "step": 320810
    },
    {
      "epoch": 1.1243941162245423,
      "grad_norm": 2.8125,
      "learning_rate": 3.4733547772403826e-05,
      "loss": 0.8892,
      "step": 320820
    },
    {
      "epoch": 1.1244291637314379,
      "grad_norm": 2.984375,
      "learning_rate": 3.473289874374012e-05,
      "loss": 0.9084,
      "step": 320830
    },
    {
      "epoch": 1.1244642112383336,
      "grad_norm": 2.484375,
      "learning_rate": 3.473224971507642e-05,
      "loss": 0.8126,
      "step": 320840
    },
    {
      "epoch": 1.1244992587452292,
      "grad_norm": 2.640625,
      "learning_rate": 3.4731600686412717e-05,
      "loss": 0.8745,
      "step": 320850
    },
    {
      "epoch": 1.1245343062521247,
      "grad_norm": 2.765625,
      "learning_rate": 3.473095165774902e-05,
      "loss": 0.821,
      "step": 320860
    },
    {
      "epoch": 1.1245693537590205,
      "grad_norm": 3.15625,
      "learning_rate": 3.473030262908531e-05,
      "loss": 0.8777,
      "step": 320870
    },
    {
      "epoch": 1.124604401265916,
      "grad_norm": 3.265625,
      "learning_rate": 3.4729653600421614e-05,
      "loss": 0.9136,
      "step": 320880
    },
    {
      "epoch": 1.1246394487728115,
      "grad_norm": 3.375,
      "learning_rate": 3.472900457175791e-05,
      "loss": 0.8969,
      "step": 320890
    },
    {
      "epoch": 1.124674496279707,
      "grad_norm": 2.78125,
      "learning_rate": 3.47283555430942e-05,
      "loss": 0.8077,
      "step": 320900
    },
    {
      "epoch": 1.1247095437866028,
      "grad_norm": 2.875,
      "learning_rate": 3.4727706514430505e-05,
      "loss": 0.7667,
      "step": 320910
    },
    {
      "epoch": 1.1247445912934984,
      "grad_norm": 2.84375,
      "learning_rate": 3.47270574857668e-05,
      "loss": 0.9382,
      "step": 320920
    },
    {
      "epoch": 1.1247796388003939,
      "grad_norm": 2.875,
      "learning_rate": 3.47264084571031e-05,
      "loss": 0.7878,
      "step": 320930
    },
    {
      "epoch": 1.1248146863072894,
      "grad_norm": 2.890625,
      "learning_rate": 3.4725759428439395e-05,
      "loss": 0.9051,
      "step": 320940
    },
    {
      "epoch": 1.1248497338141852,
      "grad_norm": 2.9375,
      "learning_rate": 3.4725110399775697e-05,
      "loss": 0.8324,
      "step": 320950
    },
    {
      "epoch": 1.1248847813210807,
      "grad_norm": 2.3125,
      "learning_rate": 3.472446137111199e-05,
      "loss": 0.8636,
      "step": 320960
    },
    {
      "epoch": 1.1249198288279763,
      "grad_norm": 2.578125,
      "learning_rate": 3.472381234244829e-05,
      "loss": 0.8477,
      "step": 320970
    },
    {
      "epoch": 1.124954876334872,
      "grad_norm": 2.96875,
      "learning_rate": 3.472316331378459e-05,
      "loss": 0.7831,
      "step": 320980
    },
    {
      "epoch": 1.1249899238417675,
      "grad_norm": 3.53125,
      "learning_rate": 3.472251428512089e-05,
      "loss": 0.9021,
      "step": 320990
    },
    {
      "epoch": 1.125024971348663,
      "grad_norm": 2.453125,
      "learning_rate": 3.472186525645719e-05,
      "loss": 0.8616,
      "step": 321000
    },
    {
      "epoch": 1.1250600188555586,
      "grad_norm": 2.796875,
      "learning_rate": 3.4721216227793485e-05,
      "loss": 0.9408,
      "step": 321010
    },
    {
      "epoch": 1.1250950663624544,
      "grad_norm": 3.4375,
      "learning_rate": 3.4720567199129786e-05,
      "loss": 0.9567,
      "step": 321020
    },
    {
      "epoch": 1.12513011386935,
      "grad_norm": 3.34375,
      "learning_rate": 3.471991817046608e-05,
      "loss": 0.9107,
      "step": 321030
    },
    {
      "epoch": 1.1251651613762454,
      "grad_norm": 2.765625,
      "learning_rate": 3.471926914180238e-05,
      "loss": 0.8244,
      "step": 321040
    },
    {
      "epoch": 1.125200208883141,
      "grad_norm": 3.21875,
      "learning_rate": 3.4718620113138677e-05,
      "loss": 0.8924,
      "step": 321050
    },
    {
      "epoch": 1.1252352563900367,
      "grad_norm": 2.84375,
      "learning_rate": 3.471797108447498e-05,
      "loss": 0.8976,
      "step": 321060
    },
    {
      "epoch": 1.1252703038969323,
      "grad_norm": 2.765625,
      "learning_rate": 3.471732205581127e-05,
      "loss": 0.8366,
      "step": 321070
    },
    {
      "epoch": 1.1253053514038278,
      "grad_norm": 2.640625,
      "learning_rate": 3.4716673027147574e-05,
      "loss": 0.8426,
      "step": 321080
    },
    {
      "epoch": 1.1253403989107236,
      "grad_norm": 2.9375,
      "learning_rate": 3.471602399848387e-05,
      "loss": 0.9575,
      "step": 321090
    },
    {
      "epoch": 1.125375446417619,
      "grad_norm": 2.875,
      "learning_rate": 3.471537496982017e-05,
      "loss": 0.8882,
      "step": 321100
    },
    {
      "epoch": 1.1254104939245146,
      "grad_norm": 2.609375,
      "learning_rate": 3.4714725941156465e-05,
      "loss": 0.8482,
      "step": 321110
    },
    {
      "epoch": 1.1254455414314104,
      "grad_norm": 2.453125,
      "learning_rate": 3.4714076912492766e-05,
      "loss": 0.8936,
      "step": 321120
    },
    {
      "epoch": 1.125480588938306,
      "grad_norm": 2.625,
      "learning_rate": 3.471342788382906e-05,
      "loss": 0.7782,
      "step": 321130
    },
    {
      "epoch": 1.1255156364452015,
      "grad_norm": 2.765625,
      "learning_rate": 3.471277885516536e-05,
      "loss": 0.8816,
      "step": 321140
    },
    {
      "epoch": 1.125550683952097,
      "grad_norm": 2.78125,
      "learning_rate": 3.471212982650166e-05,
      "loss": 0.8969,
      "step": 321150
    },
    {
      "epoch": 1.1255857314589925,
      "grad_norm": 3.03125,
      "learning_rate": 3.471148079783796e-05,
      "loss": 0.9022,
      "step": 321160
    },
    {
      "epoch": 1.1256207789658883,
      "grad_norm": 2.703125,
      "learning_rate": 3.471083176917426e-05,
      "loss": 0.835,
      "step": 321170
    },
    {
      "epoch": 1.1256558264727838,
      "grad_norm": 2.90625,
      "learning_rate": 3.4710182740510554e-05,
      "loss": 0.832,
      "step": 321180
    },
    {
      "epoch": 1.1256908739796794,
      "grad_norm": 3.296875,
      "learning_rate": 3.4709533711846855e-05,
      "loss": 0.8755,
      "step": 321190
    },
    {
      "epoch": 1.1257259214865751,
      "grad_norm": 2.59375,
      "learning_rate": 3.470888468318315e-05,
      "loss": 0.8644,
      "step": 321200
    },
    {
      "epoch": 1.1257609689934707,
      "grad_norm": 3.203125,
      "learning_rate": 3.470823565451945e-05,
      "loss": 0.8475,
      "step": 321210
    },
    {
      "epoch": 1.1257960165003662,
      "grad_norm": 3.265625,
      "learning_rate": 3.4707586625855746e-05,
      "loss": 0.8953,
      "step": 321220
    },
    {
      "epoch": 1.125831064007262,
      "grad_norm": 2.90625,
      "learning_rate": 3.470693759719205e-05,
      "loss": 0.8485,
      "step": 321230
    },
    {
      "epoch": 1.1258661115141575,
      "grad_norm": 2.796875,
      "learning_rate": 3.470628856852834e-05,
      "loss": 0.7892,
      "step": 321240
    },
    {
      "epoch": 1.125901159021053,
      "grad_norm": 2.609375,
      "learning_rate": 3.470563953986464e-05,
      "loss": 0.798,
      "step": 321250
    },
    {
      "epoch": 1.1259362065279486,
      "grad_norm": 3.234375,
      "learning_rate": 3.470499051120094e-05,
      "loss": 0.8361,
      "step": 321260
    },
    {
      "epoch": 1.1259712540348443,
      "grad_norm": 2.859375,
      "learning_rate": 3.470434148253723e-05,
      "loss": 0.8384,
      "step": 321270
    },
    {
      "epoch": 1.1260063015417399,
      "grad_norm": 2.921875,
      "learning_rate": 3.4703692453873534e-05,
      "loss": 0.8761,
      "step": 321280
    },
    {
      "epoch": 1.1260413490486354,
      "grad_norm": 2.953125,
      "learning_rate": 3.470304342520983e-05,
      "loss": 0.8277,
      "step": 321290
    },
    {
      "epoch": 1.126076396555531,
      "grad_norm": 2.765625,
      "learning_rate": 3.470239439654613e-05,
      "loss": 0.8578,
      "step": 321300
    },
    {
      "epoch": 1.1261114440624267,
      "grad_norm": 2.59375,
      "learning_rate": 3.4701745367882425e-05,
      "loss": 0.9009,
      "step": 321310
    },
    {
      "epoch": 1.1261464915693222,
      "grad_norm": 2.53125,
      "learning_rate": 3.4701096339218726e-05,
      "loss": 0.851,
      "step": 321320
    },
    {
      "epoch": 1.1261815390762178,
      "grad_norm": 2.78125,
      "learning_rate": 3.470044731055502e-05,
      "loss": 0.848,
      "step": 321330
    },
    {
      "epoch": 1.1262165865831135,
      "grad_norm": 2.90625,
      "learning_rate": 3.469979828189132e-05,
      "loss": 0.8525,
      "step": 321340
    },
    {
      "epoch": 1.126251634090009,
      "grad_norm": 2.359375,
      "learning_rate": 3.4699149253227617e-05,
      "loss": 0.8186,
      "step": 321350
    },
    {
      "epoch": 1.1262866815969046,
      "grad_norm": 2.9375,
      "learning_rate": 3.469850022456392e-05,
      "loss": 0.8659,
      "step": 321360
    },
    {
      "epoch": 1.1263217291038001,
      "grad_norm": 3.171875,
      "learning_rate": 3.469785119590022e-05,
      "loss": 0.8731,
      "step": 321370
    },
    {
      "epoch": 1.1263567766106959,
      "grad_norm": 3.4375,
      "learning_rate": 3.4697202167236514e-05,
      "loss": 0.8271,
      "step": 321380
    },
    {
      "epoch": 1.1263918241175914,
      "grad_norm": 2.921875,
      "learning_rate": 3.4696553138572815e-05,
      "loss": 0.8397,
      "step": 321390
    },
    {
      "epoch": 1.126426871624487,
      "grad_norm": 3.015625,
      "learning_rate": 3.469590410990911e-05,
      "loss": 0.9258,
      "step": 321400
    },
    {
      "epoch": 1.1264619191313825,
      "grad_norm": 2.84375,
      "learning_rate": 3.469525508124541e-05,
      "loss": 0.9345,
      "step": 321410
    },
    {
      "epoch": 1.1264969666382783,
      "grad_norm": 2.3125,
      "learning_rate": 3.4694606052581706e-05,
      "loss": 0.8638,
      "step": 321420
    },
    {
      "epoch": 1.1265320141451738,
      "grad_norm": 3.421875,
      "learning_rate": 3.469395702391801e-05,
      "loss": 0.8644,
      "step": 321430
    },
    {
      "epoch": 1.1265670616520693,
      "grad_norm": 2.84375,
      "learning_rate": 3.46933079952543e-05,
      "loss": 0.8939,
      "step": 321440
    },
    {
      "epoch": 1.126602109158965,
      "grad_norm": 2.984375,
      "learning_rate": 3.46926589665906e-05,
      "loss": 0.8241,
      "step": 321450
    },
    {
      "epoch": 1.1266371566658606,
      "grad_norm": 2.984375,
      "learning_rate": 3.46920099379269e-05,
      "loss": 0.9342,
      "step": 321460
    },
    {
      "epoch": 1.1266722041727562,
      "grad_norm": 2.828125,
      "learning_rate": 3.46913609092632e-05,
      "loss": 0.8417,
      "step": 321470
    },
    {
      "epoch": 1.1267072516796517,
      "grad_norm": 2.96875,
      "learning_rate": 3.4690711880599494e-05,
      "loss": 0.8473,
      "step": 321480
    },
    {
      "epoch": 1.1267422991865474,
      "grad_norm": 2.953125,
      "learning_rate": 3.4690062851935795e-05,
      "loss": 0.89,
      "step": 321490
    },
    {
      "epoch": 1.126777346693443,
      "grad_norm": 2.96875,
      "learning_rate": 3.468941382327209e-05,
      "loss": 0.8876,
      "step": 321500
    },
    {
      "epoch": 1.1268123942003385,
      "grad_norm": 3.03125,
      "learning_rate": 3.468876479460839e-05,
      "loss": 0.8281,
      "step": 321510
    },
    {
      "epoch": 1.126847441707234,
      "grad_norm": 3.40625,
      "learning_rate": 3.468811576594469e-05,
      "loss": 0.8437,
      "step": 321520
    },
    {
      "epoch": 1.1268824892141298,
      "grad_norm": 2.6875,
      "learning_rate": 3.468746673728099e-05,
      "loss": 0.8445,
      "step": 321530
    },
    {
      "epoch": 1.1269175367210253,
      "grad_norm": 2.515625,
      "learning_rate": 3.468681770861729e-05,
      "loss": 0.9061,
      "step": 321540
    },
    {
      "epoch": 1.1269525842279209,
      "grad_norm": 2.640625,
      "learning_rate": 3.468616867995358e-05,
      "loss": 0.8689,
      "step": 321550
    },
    {
      "epoch": 1.1269876317348166,
      "grad_norm": 2.9375,
      "learning_rate": 3.4685519651289885e-05,
      "loss": 0.8562,
      "step": 321560
    },
    {
      "epoch": 1.1270226792417122,
      "grad_norm": 2.890625,
      "learning_rate": 3.468487062262618e-05,
      "loss": 0.8642,
      "step": 321570
    },
    {
      "epoch": 1.1270577267486077,
      "grad_norm": 3.015625,
      "learning_rate": 3.468422159396248e-05,
      "loss": 0.9127,
      "step": 321580
    },
    {
      "epoch": 1.1270927742555032,
      "grad_norm": 3.078125,
      "learning_rate": 3.4683572565298775e-05,
      "loss": 0.856,
      "step": 321590
    },
    {
      "epoch": 1.127127821762399,
      "grad_norm": 3.03125,
      "learning_rate": 3.468292353663508e-05,
      "loss": 0.8921,
      "step": 321600
    },
    {
      "epoch": 1.1271628692692945,
      "grad_norm": 2.953125,
      "learning_rate": 3.468227450797137e-05,
      "loss": 0.8959,
      "step": 321610
    },
    {
      "epoch": 1.12719791677619,
      "grad_norm": 2.6875,
      "learning_rate": 3.468162547930767e-05,
      "loss": 0.8915,
      "step": 321620
    },
    {
      "epoch": 1.1272329642830856,
      "grad_norm": 2.703125,
      "learning_rate": 3.468097645064397e-05,
      "loss": 0.891,
      "step": 321630
    },
    {
      "epoch": 1.1272680117899814,
      "grad_norm": 3.3125,
      "learning_rate": 3.468032742198027e-05,
      "loss": 0.9073,
      "step": 321640
    },
    {
      "epoch": 1.127303059296877,
      "grad_norm": 3.1875,
      "learning_rate": 3.467967839331656e-05,
      "loss": 0.8904,
      "step": 321650
    },
    {
      "epoch": 1.1273381068037724,
      "grad_norm": 2.875,
      "learning_rate": 3.467902936465286e-05,
      "loss": 0.8447,
      "step": 321660
    },
    {
      "epoch": 1.1273731543106682,
      "grad_norm": 2.890625,
      "learning_rate": 3.467838033598916e-05,
      "loss": 0.8804,
      "step": 321670
    },
    {
      "epoch": 1.1274082018175637,
      "grad_norm": 2.984375,
      "learning_rate": 3.4677731307325454e-05,
      "loss": 0.8806,
      "step": 321680
    },
    {
      "epoch": 1.1274432493244593,
      "grad_norm": 3.203125,
      "learning_rate": 3.4677082278661755e-05,
      "loss": 0.925,
      "step": 321690
    },
    {
      "epoch": 1.1274782968313548,
      "grad_norm": 2.78125,
      "learning_rate": 3.467643324999805e-05,
      "loss": 0.8506,
      "step": 321700
    },
    {
      "epoch": 1.1275133443382506,
      "grad_norm": 3.03125,
      "learning_rate": 3.467578422133435e-05,
      "loss": 0.7933,
      "step": 321710
    },
    {
      "epoch": 1.127548391845146,
      "grad_norm": 2.90625,
      "learning_rate": 3.4675135192670646e-05,
      "loss": 0.8117,
      "step": 321720
    },
    {
      "epoch": 1.1275834393520416,
      "grad_norm": 2.734375,
      "learning_rate": 3.467448616400695e-05,
      "loss": 0.8691,
      "step": 321730
    },
    {
      "epoch": 1.1276184868589372,
      "grad_norm": 3.15625,
      "learning_rate": 3.467383713534325e-05,
      "loss": 0.9057,
      "step": 321740
    },
    {
      "epoch": 1.127653534365833,
      "grad_norm": 2.625,
      "learning_rate": 3.467318810667954e-05,
      "loss": 0.8778,
      "step": 321750
    },
    {
      "epoch": 1.1276885818727285,
      "grad_norm": 2.75,
      "learning_rate": 3.4672539078015845e-05,
      "loss": 0.8738,
      "step": 321760
    },
    {
      "epoch": 1.127723629379624,
      "grad_norm": 2.765625,
      "learning_rate": 3.467189004935214e-05,
      "loss": 0.8406,
      "step": 321770
    },
    {
      "epoch": 1.1277586768865198,
      "grad_norm": 2.53125,
      "learning_rate": 3.467124102068844e-05,
      "loss": 0.8112,
      "step": 321780
    },
    {
      "epoch": 1.1277937243934153,
      "grad_norm": 3.03125,
      "learning_rate": 3.4670591992024735e-05,
      "loss": 0.7732,
      "step": 321790
    },
    {
      "epoch": 1.1278287719003108,
      "grad_norm": 2.84375,
      "learning_rate": 3.466994296336104e-05,
      "loss": 0.9141,
      "step": 321800
    },
    {
      "epoch": 1.1278638194072066,
      "grad_norm": 3.078125,
      "learning_rate": 3.466929393469733e-05,
      "loss": 0.8898,
      "step": 321810
    },
    {
      "epoch": 1.1278988669141021,
      "grad_norm": 3.375,
      "learning_rate": 3.466864490603363e-05,
      "loss": 0.8912,
      "step": 321820
    },
    {
      "epoch": 1.1279339144209977,
      "grad_norm": 2.359375,
      "learning_rate": 3.466799587736993e-05,
      "loss": 0.8827,
      "step": 321830
    },
    {
      "epoch": 1.1279689619278932,
      "grad_norm": 3.140625,
      "learning_rate": 3.466734684870623e-05,
      "loss": 0.8907,
      "step": 321840
    },
    {
      "epoch": 1.1280040094347887,
      "grad_norm": 3.109375,
      "learning_rate": 3.466669782004252e-05,
      "loss": 0.8411,
      "step": 321850
    },
    {
      "epoch": 1.1280390569416845,
      "grad_norm": 2.9375,
      "learning_rate": 3.4666048791378825e-05,
      "loss": 0.8314,
      "step": 321860
    },
    {
      "epoch": 1.12807410444858,
      "grad_norm": 2.625,
      "learning_rate": 3.4665399762715126e-05,
      "loss": 0.8521,
      "step": 321870
    },
    {
      "epoch": 1.1281091519554756,
      "grad_norm": 2.8125,
      "learning_rate": 3.466475073405142e-05,
      "loss": 0.7487,
      "step": 321880
    },
    {
      "epoch": 1.1281441994623713,
      "grad_norm": 3.203125,
      "learning_rate": 3.466410170538772e-05,
      "loss": 0.8886,
      "step": 321890
    },
    {
      "epoch": 1.1281792469692669,
      "grad_norm": 2.84375,
      "learning_rate": 3.466345267672402e-05,
      "loss": 0.9188,
      "step": 321900
    },
    {
      "epoch": 1.1282142944761624,
      "grad_norm": 2.796875,
      "learning_rate": 3.466280364806032e-05,
      "loss": 0.854,
      "step": 321910
    },
    {
      "epoch": 1.1282493419830582,
      "grad_norm": 2.984375,
      "learning_rate": 3.466215461939661e-05,
      "loss": 0.8252,
      "step": 321920
    },
    {
      "epoch": 1.1282843894899537,
      "grad_norm": 3.0625,
      "learning_rate": 3.4661505590732914e-05,
      "loss": 0.8726,
      "step": 321930
    },
    {
      "epoch": 1.1283194369968492,
      "grad_norm": 3.125,
      "learning_rate": 3.466085656206921e-05,
      "loss": 0.8653,
      "step": 321940
    },
    {
      "epoch": 1.1283544845037448,
      "grad_norm": 2.84375,
      "learning_rate": 3.466020753340551e-05,
      "loss": 0.8368,
      "step": 321950
    },
    {
      "epoch": 1.1283895320106405,
      "grad_norm": 2.296875,
      "learning_rate": 3.4659558504741805e-05,
      "loss": 0.8514,
      "step": 321960
    },
    {
      "epoch": 1.128424579517536,
      "grad_norm": 3.046875,
      "learning_rate": 3.4658909476078106e-05,
      "loss": 0.9437,
      "step": 321970
    },
    {
      "epoch": 1.1284596270244316,
      "grad_norm": 2.484375,
      "learning_rate": 3.46582604474144e-05,
      "loss": 0.8466,
      "step": 321980
    },
    {
      "epoch": 1.1284946745313271,
      "grad_norm": 3.046875,
      "learning_rate": 3.46576114187507e-05,
      "loss": 0.9921,
      "step": 321990
    },
    {
      "epoch": 1.1285297220382229,
      "grad_norm": 2.578125,
      "learning_rate": 3.4656962390087e-05,
      "loss": 0.8367,
      "step": 322000
    },
    {
      "epoch": 1.1285647695451184,
      "grad_norm": 2.734375,
      "learning_rate": 3.46563133614233e-05,
      "loss": 0.8556,
      "step": 322010
    },
    {
      "epoch": 1.128599817052014,
      "grad_norm": 2.84375,
      "learning_rate": 3.465566433275959e-05,
      "loss": 0.8337,
      "step": 322020
    },
    {
      "epoch": 1.1286348645589097,
      "grad_norm": 3.59375,
      "learning_rate": 3.465501530409589e-05,
      "loss": 0.8626,
      "step": 322030
    },
    {
      "epoch": 1.1286699120658052,
      "grad_norm": 2.953125,
      "learning_rate": 3.465436627543219e-05,
      "loss": 0.8273,
      "step": 322040
    },
    {
      "epoch": 1.1287049595727008,
      "grad_norm": 2.84375,
      "learning_rate": 3.465371724676848e-05,
      "loss": 0.8139,
      "step": 322050
    },
    {
      "epoch": 1.1287400070795963,
      "grad_norm": 3.28125,
      "learning_rate": 3.4653068218104785e-05,
      "loss": 0.8967,
      "step": 322060
    },
    {
      "epoch": 1.128775054586492,
      "grad_norm": 2.5625,
      "learning_rate": 3.465241918944108e-05,
      "loss": 0.8361,
      "step": 322070
    },
    {
      "epoch": 1.1288101020933876,
      "grad_norm": 3.21875,
      "learning_rate": 3.465177016077738e-05,
      "loss": 0.9221,
      "step": 322080
    },
    {
      "epoch": 1.1288451496002831,
      "grad_norm": 3.109375,
      "learning_rate": 3.4651121132113675e-05,
      "loss": 0.8688,
      "step": 322090
    },
    {
      "epoch": 1.1288801971071787,
      "grad_norm": 2.46875,
      "learning_rate": 3.465047210344998e-05,
      "loss": 0.891,
      "step": 322100
    },
    {
      "epoch": 1.1289152446140744,
      "grad_norm": 2.84375,
      "learning_rate": 3.464982307478628e-05,
      "loss": 0.9199,
      "step": 322110
    },
    {
      "epoch": 1.12895029212097,
      "grad_norm": 3.0625,
      "learning_rate": 3.464917404612257e-05,
      "loss": 0.873,
      "step": 322120
    },
    {
      "epoch": 1.1289853396278655,
      "grad_norm": 2.59375,
      "learning_rate": 3.4648525017458874e-05,
      "loss": 0.8853,
      "step": 322130
    },
    {
      "epoch": 1.1290203871347613,
      "grad_norm": 2.78125,
      "learning_rate": 3.464787598879517e-05,
      "loss": 0.8895,
      "step": 322140
    },
    {
      "epoch": 1.1290554346416568,
      "grad_norm": 2.96875,
      "learning_rate": 3.464722696013147e-05,
      "loss": 0.7756,
      "step": 322150
    },
    {
      "epoch": 1.1290904821485523,
      "grad_norm": 2.734375,
      "learning_rate": 3.4646577931467765e-05,
      "loss": 0.9186,
      "step": 322160
    },
    {
      "epoch": 1.1291255296554479,
      "grad_norm": 2.609375,
      "learning_rate": 3.4645928902804066e-05,
      "loss": 0.8388,
      "step": 322170
    },
    {
      "epoch": 1.1291605771623436,
      "grad_norm": 3.359375,
      "learning_rate": 3.464527987414036e-05,
      "loss": 0.8655,
      "step": 322180
    },
    {
      "epoch": 1.1291956246692392,
      "grad_norm": 2.875,
      "learning_rate": 3.464463084547666e-05,
      "loss": 0.8629,
      "step": 322190
    },
    {
      "epoch": 1.1292306721761347,
      "grad_norm": 2.71875,
      "learning_rate": 3.464398181681296e-05,
      "loss": 0.8865,
      "step": 322200
    },
    {
      "epoch": 1.1292657196830302,
      "grad_norm": 2.796875,
      "learning_rate": 3.464333278814926e-05,
      "loss": 0.8171,
      "step": 322210
    },
    {
      "epoch": 1.129300767189926,
      "grad_norm": 3.125,
      "learning_rate": 3.464268375948555e-05,
      "loss": 0.8052,
      "step": 322220
    },
    {
      "epoch": 1.1293358146968215,
      "grad_norm": 3.125,
      "learning_rate": 3.4642034730821854e-05,
      "loss": 0.8491,
      "step": 322230
    },
    {
      "epoch": 1.129370862203717,
      "grad_norm": 3.21875,
      "learning_rate": 3.4641385702158156e-05,
      "loss": 0.8695,
      "step": 322240
    },
    {
      "epoch": 1.1294059097106128,
      "grad_norm": 3.359375,
      "learning_rate": 3.464073667349445e-05,
      "loss": 0.8762,
      "step": 322250
    },
    {
      "epoch": 1.1294409572175084,
      "grad_norm": 3.1875,
      "learning_rate": 3.464008764483075e-05,
      "loss": 0.9034,
      "step": 322260
    },
    {
      "epoch": 1.129476004724404,
      "grad_norm": 2.78125,
      "learning_rate": 3.4639438616167046e-05,
      "loss": 0.8349,
      "step": 322270
    },
    {
      "epoch": 1.1295110522312994,
      "grad_norm": 2.890625,
      "learning_rate": 3.463878958750335e-05,
      "loss": 0.922,
      "step": 322280
    },
    {
      "epoch": 1.1295460997381952,
      "grad_norm": 3.015625,
      "learning_rate": 3.463814055883964e-05,
      "loss": 0.8691,
      "step": 322290
    },
    {
      "epoch": 1.1295811472450907,
      "grad_norm": 2.953125,
      "learning_rate": 3.4637491530175944e-05,
      "loss": 0.8744,
      "step": 322300
    },
    {
      "epoch": 1.1296161947519863,
      "grad_norm": 3.1875,
      "learning_rate": 3.463684250151224e-05,
      "loss": 0.8756,
      "step": 322310
    },
    {
      "epoch": 1.1296512422588818,
      "grad_norm": 2.921875,
      "learning_rate": 3.463619347284854e-05,
      "loss": 0.9117,
      "step": 322320
    },
    {
      "epoch": 1.1296862897657776,
      "grad_norm": 3.09375,
      "learning_rate": 3.4635544444184834e-05,
      "loss": 0.8428,
      "step": 322330
    },
    {
      "epoch": 1.129721337272673,
      "grad_norm": 3.15625,
      "learning_rate": 3.4634895415521136e-05,
      "loss": 0.8106,
      "step": 322340
    },
    {
      "epoch": 1.1297563847795686,
      "grad_norm": 2.453125,
      "learning_rate": 3.463424638685743e-05,
      "loss": 0.8226,
      "step": 322350
    },
    {
      "epoch": 1.1297914322864644,
      "grad_norm": 2.84375,
      "learning_rate": 3.463359735819373e-05,
      "loss": 0.8669,
      "step": 322360
    },
    {
      "epoch": 1.12982647979336,
      "grad_norm": 3.03125,
      "learning_rate": 3.4632948329530026e-05,
      "loss": 0.8783,
      "step": 322370
    },
    {
      "epoch": 1.1298615273002555,
      "grad_norm": 3.25,
      "learning_rate": 3.463229930086633e-05,
      "loss": 0.8854,
      "step": 322380
    },
    {
      "epoch": 1.129896574807151,
      "grad_norm": 2.875,
      "learning_rate": 3.463165027220263e-05,
      "loss": 0.7798,
      "step": 322390
    },
    {
      "epoch": 1.1299316223140468,
      "grad_norm": 3.140625,
      "learning_rate": 3.463100124353892e-05,
      "loss": 0.8689,
      "step": 322400
    },
    {
      "epoch": 1.1299666698209423,
      "grad_norm": 3.125,
      "learning_rate": 3.463035221487522e-05,
      "loss": 0.8378,
      "step": 322410
    },
    {
      "epoch": 1.1300017173278378,
      "grad_norm": 3.125,
      "learning_rate": 3.462970318621151e-05,
      "loss": 0.8778,
      "step": 322420
    },
    {
      "epoch": 1.1300367648347334,
      "grad_norm": 3.09375,
      "learning_rate": 3.4629054157547814e-05,
      "loss": 0.9027,
      "step": 322430
    },
    {
      "epoch": 1.1300718123416291,
      "grad_norm": 2.65625,
      "learning_rate": 3.462840512888411e-05,
      "loss": 0.8574,
      "step": 322440
    },
    {
      "epoch": 1.1301068598485247,
      "grad_norm": 2.53125,
      "learning_rate": 3.462775610022041e-05,
      "loss": 0.9045,
      "step": 322450
    },
    {
      "epoch": 1.1301419073554202,
      "grad_norm": 3.4375,
      "learning_rate": 3.4627107071556705e-05,
      "loss": 0.8206,
      "step": 322460
    },
    {
      "epoch": 1.130176954862316,
      "grad_norm": 3.078125,
      "learning_rate": 3.4626458042893006e-05,
      "loss": 0.8548,
      "step": 322470
    },
    {
      "epoch": 1.1302120023692115,
      "grad_norm": 2.828125,
      "learning_rate": 3.462580901422931e-05,
      "loss": 0.8741,
      "step": 322480
    },
    {
      "epoch": 1.130247049876107,
      "grad_norm": 2.671875,
      "learning_rate": 3.46251599855656e-05,
      "loss": 0.8946,
      "step": 322490
    },
    {
      "epoch": 1.1302820973830028,
      "grad_norm": 3.15625,
      "learning_rate": 3.4624510956901904e-05,
      "loss": 0.959,
      "step": 322500
    },
    {
      "epoch": 1.1303171448898983,
      "grad_norm": 2.71875,
      "learning_rate": 3.46238619282382e-05,
      "loss": 0.928,
      "step": 322510
    },
    {
      "epoch": 1.1303521923967939,
      "grad_norm": 3.1875,
      "learning_rate": 3.46232128995745e-05,
      "loss": 0.9713,
      "step": 322520
    },
    {
      "epoch": 1.1303872399036894,
      "grad_norm": 2.8125,
      "learning_rate": 3.4622563870910794e-05,
      "loss": 0.8811,
      "step": 322530
    },
    {
      "epoch": 1.130422287410585,
      "grad_norm": 2.5,
      "learning_rate": 3.4621914842247096e-05,
      "loss": 0.9097,
      "step": 322540
    },
    {
      "epoch": 1.1304573349174807,
      "grad_norm": 2.953125,
      "learning_rate": 3.462126581358339e-05,
      "loss": 0.8099,
      "step": 322550
    },
    {
      "epoch": 1.1304923824243762,
      "grad_norm": 3.4375,
      "learning_rate": 3.462061678491969e-05,
      "loss": 0.8441,
      "step": 322560
    },
    {
      "epoch": 1.1305274299312718,
      "grad_norm": 2.484375,
      "learning_rate": 3.4619967756255986e-05,
      "loss": 0.8341,
      "step": 322570
    },
    {
      "epoch": 1.1305624774381675,
      "grad_norm": 3.078125,
      "learning_rate": 3.461931872759229e-05,
      "loss": 0.7858,
      "step": 322580
    },
    {
      "epoch": 1.130597524945063,
      "grad_norm": 3.25,
      "learning_rate": 3.461866969892858e-05,
      "loss": 0.8655,
      "step": 322590
    },
    {
      "epoch": 1.1306325724519586,
      "grad_norm": 3.421875,
      "learning_rate": 3.4618020670264884e-05,
      "loss": 0.9349,
      "step": 322600
    },
    {
      "epoch": 1.1306676199588543,
      "grad_norm": 3.15625,
      "learning_rate": 3.4617371641601185e-05,
      "loss": 0.9568,
      "step": 322610
    },
    {
      "epoch": 1.1307026674657499,
      "grad_norm": 2.859375,
      "learning_rate": 3.461672261293748e-05,
      "loss": 0.8733,
      "step": 322620
    },
    {
      "epoch": 1.1307377149726454,
      "grad_norm": 2.75,
      "learning_rate": 3.461607358427378e-05,
      "loss": 0.8949,
      "step": 322630
    },
    {
      "epoch": 1.130772762479541,
      "grad_norm": 2.84375,
      "learning_rate": 3.4615424555610076e-05,
      "loss": 0.9648,
      "step": 322640
    },
    {
      "epoch": 1.1308078099864367,
      "grad_norm": 2.9375,
      "learning_rate": 3.461477552694638e-05,
      "loss": 0.8316,
      "step": 322650
    },
    {
      "epoch": 1.1308428574933322,
      "grad_norm": 2.96875,
      "learning_rate": 3.461412649828267e-05,
      "loss": 0.8872,
      "step": 322660
    },
    {
      "epoch": 1.1308779050002278,
      "grad_norm": 2.75,
      "learning_rate": 3.461347746961897e-05,
      "loss": 0.865,
      "step": 322670
    },
    {
      "epoch": 1.1309129525071233,
      "grad_norm": 2.8125,
      "learning_rate": 3.461282844095527e-05,
      "loss": 0.8665,
      "step": 322680
    },
    {
      "epoch": 1.130948000014019,
      "grad_norm": 2.984375,
      "learning_rate": 3.461217941229157e-05,
      "loss": 0.7938,
      "step": 322690
    },
    {
      "epoch": 1.1309830475209146,
      "grad_norm": 3.203125,
      "learning_rate": 3.4611530383627864e-05,
      "loss": 0.8917,
      "step": 322700
    },
    {
      "epoch": 1.1310180950278101,
      "grad_norm": 2.734375,
      "learning_rate": 3.4610881354964165e-05,
      "loss": 0.9626,
      "step": 322710
    },
    {
      "epoch": 1.131053142534706,
      "grad_norm": 3.03125,
      "learning_rate": 3.461023232630046e-05,
      "loss": 0.8057,
      "step": 322720
    },
    {
      "epoch": 1.1310881900416014,
      "grad_norm": 2.875,
      "learning_rate": 3.460958329763676e-05,
      "loss": 0.865,
      "step": 322730
    },
    {
      "epoch": 1.131123237548497,
      "grad_norm": 2.859375,
      "learning_rate": 3.4608934268973056e-05,
      "loss": 0.9563,
      "step": 322740
    },
    {
      "epoch": 1.1311582850553925,
      "grad_norm": 3.25,
      "learning_rate": 3.460828524030936e-05,
      "loss": 0.8454,
      "step": 322750
    },
    {
      "epoch": 1.1311933325622883,
      "grad_norm": 3.515625,
      "learning_rate": 3.460763621164566e-05,
      "loss": 0.9602,
      "step": 322760
    },
    {
      "epoch": 1.1312283800691838,
      "grad_norm": 2.734375,
      "learning_rate": 3.460698718298195e-05,
      "loss": 0.8979,
      "step": 322770
    },
    {
      "epoch": 1.1312634275760793,
      "grad_norm": 2.84375,
      "learning_rate": 3.460633815431825e-05,
      "loss": 0.7976,
      "step": 322780
    },
    {
      "epoch": 1.1312984750829749,
      "grad_norm": 2.671875,
      "learning_rate": 3.460568912565454e-05,
      "loss": 0.7666,
      "step": 322790
    },
    {
      "epoch": 1.1313335225898706,
      "grad_norm": 3.59375,
      "learning_rate": 3.4605040096990844e-05,
      "loss": 0.9424,
      "step": 322800
    },
    {
      "epoch": 1.1313685700967662,
      "grad_norm": 2.828125,
      "learning_rate": 3.460439106832714e-05,
      "loss": 0.8582,
      "step": 322810
    },
    {
      "epoch": 1.1314036176036617,
      "grad_norm": 2.953125,
      "learning_rate": 3.460374203966344e-05,
      "loss": 0.8038,
      "step": 322820
    },
    {
      "epoch": 1.1314386651105575,
      "grad_norm": 3.171875,
      "learning_rate": 3.460309301099974e-05,
      "loss": 0.8925,
      "step": 322830
    },
    {
      "epoch": 1.131473712617453,
      "grad_norm": 2.90625,
      "learning_rate": 3.4602443982336036e-05,
      "loss": 0.8619,
      "step": 322840
    },
    {
      "epoch": 1.1315087601243485,
      "grad_norm": 2.59375,
      "learning_rate": 3.460179495367234e-05,
      "loss": 0.8793,
      "step": 322850
    },
    {
      "epoch": 1.131543807631244,
      "grad_norm": 2.890625,
      "learning_rate": 3.460114592500863e-05,
      "loss": 0.9116,
      "step": 322860
    },
    {
      "epoch": 1.1315788551381398,
      "grad_norm": 2.734375,
      "learning_rate": 3.460049689634493e-05,
      "loss": 0.8297,
      "step": 322870
    },
    {
      "epoch": 1.1316139026450354,
      "grad_norm": 3.03125,
      "learning_rate": 3.459984786768123e-05,
      "loss": 0.8887,
      "step": 322880
    },
    {
      "epoch": 1.131648950151931,
      "grad_norm": 2.71875,
      "learning_rate": 3.459919883901753e-05,
      "loss": 0.8067,
      "step": 322890
    },
    {
      "epoch": 1.1316839976588264,
      "grad_norm": 2.96875,
      "learning_rate": 3.4598549810353824e-05,
      "loss": 0.8125,
      "step": 322900
    },
    {
      "epoch": 1.1317190451657222,
      "grad_norm": 3.234375,
      "learning_rate": 3.4597900781690125e-05,
      "loss": 0.7848,
      "step": 322910
    },
    {
      "epoch": 1.1317540926726177,
      "grad_norm": 3.015625,
      "learning_rate": 3.459725175302642e-05,
      "loss": 0.9233,
      "step": 322920
    },
    {
      "epoch": 1.1317891401795133,
      "grad_norm": 2.96875,
      "learning_rate": 3.459660272436272e-05,
      "loss": 0.8355,
      "step": 322930
    },
    {
      "epoch": 1.131824187686409,
      "grad_norm": 2.609375,
      "learning_rate": 3.4595953695699016e-05,
      "loss": 0.7846,
      "step": 322940
    },
    {
      "epoch": 1.1318592351933046,
      "grad_norm": 2.875,
      "learning_rate": 3.459530466703532e-05,
      "loss": 0.8186,
      "step": 322950
    },
    {
      "epoch": 1.1318942827002,
      "grad_norm": 2.921875,
      "learning_rate": 3.459465563837161e-05,
      "loss": 0.8406,
      "step": 322960
    },
    {
      "epoch": 1.1319293302070956,
      "grad_norm": 3.25,
      "learning_rate": 3.459400660970791e-05,
      "loss": 0.8526,
      "step": 322970
    },
    {
      "epoch": 1.1319643777139914,
      "grad_norm": 2.578125,
      "learning_rate": 3.4593357581044214e-05,
      "loss": 0.8287,
      "step": 322980
    },
    {
      "epoch": 1.131999425220887,
      "grad_norm": 2.96875,
      "learning_rate": 3.459270855238051e-05,
      "loss": 0.8789,
      "step": 322990
    },
    {
      "epoch": 1.1320344727277825,
      "grad_norm": 2.71875,
      "learning_rate": 3.459205952371681e-05,
      "loss": 0.8471,
      "step": 323000
    },
    {
      "epoch": 1.132069520234678,
      "grad_norm": 2.796875,
      "learning_rate": 3.4591410495053105e-05,
      "loss": 0.8354,
      "step": 323010
    },
    {
      "epoch": 1.1321045677415738,
      "grad_norm": 3.140625,
      "learning_rate": 3.4590761466389406e-05,
      "loss": 0.9546,
      "step": 323020
    },
    {
      "epoch": 1.1321396152484693,
      "grad_norm": 2.5,
      "learning_rate": 3.45901124377257e-05,
      "loss": 0.8989,
      "step": 323030
    },
    {
      "epoch": 1.1321746627553648,
      "grad_norm": 2.640625,
      "learning_rate": 3.4589463409062e-05,
      "loss": 0.8562,
      "step": 323040
    },
    {
      "epoch": 1.1322097102622606,
      "grad_norm": 2.65625,
      "learning_rate": 3.45888143803983e-05,
      "loss": 0.8957,
      "step": 323050
    },
    {
      "epoch": 1.1322447577691561,
      "grad_norm": 2.890625,
      "learning_rate": 3.45881653517346e-05,
      "loss": 0.8739,
      "step": 323060
    },
    {
      "epoch": 1.1322798052760517,
      "grad_norm": 3.15625,
      "learning_rate": 3.458751632307089e-05,
      "loss": 0.8745,
      "step": 323070
    },
    {
      "epoch": 1.1323148527829472,
      "grad_norm": 2.796875,
      "learning_rate": 3.4586867294407194e-05,
      "loss": 0.8162,
      "step": 323080
    },
    {
      "epoch": 1.132349900289843,
      "grad_norm": 3.265625,
      "learning_rate": 3.458621826574349e-05,
      "loss": 0.9135,
      "step": 323090
    },
    {
      "epoch": 1.1323849477967385,
      "grad_norm": 3.453125,
      "learning_rate": 3.458556923707979e-05,
      "loss": 0.8291,
      "step": 323100
    },
    {
      "epoch": 1.132419995303634,
      "grad_norm": 2.5,
      "learning_rate": 3.458492020841609e-05,
      "loss": 0.8075,
      "step": 323110
    },
    {
      "epoch": 1.1324550428105296,
      "grad_norm": 2.71875,
      "learning_rate": 3.4584271179752386e-05,
      "loss": 0.8382,
      "step": 323120
    },
    {
      "epoch": 1.1324900903174253,
      "grad_norm": 2.875,
      "learning_rate": 3.458362215108869e-05,
      "loss": 0.855,
      "step": 323130
    },
    {
      "epoch": 1.1325251378243208,
      "grad_norm": 2.625,
      "learning_rate": 3.458297312242498e-05,
      "loss": 0.8663,
      "step": 323140
    },
    {
      "epoch": 1.1325601853312164,
      "grad_norm": 3.09375,
      "learning_rate": 3.458232409376128e-05,
      "loss": 0.8738,
      "step": 323150
    },
    {
      "epoch": 1.1325952328381121,
      "grad_norm": 2.171875,
      "learning_rate": 3.458167506509757e-05,
      "loss": 0.8132,
      "step": 323160
    },
    {
      "epoch": 1.1326302803450077,
      "grad_norm": 2.625,
      "learning_rate": 3.458102603643387e-05,
      "loss": 0.8099,
      "step": 323170
    },
    {
      "epoch": 1.1326653278519032,
      "grad_norm": 2.921875,
      "learning_rate": 3.458037700777017e-05,
      "loss": 0.8431,
      "step": 323180
    },
    {
      "epoch": 1.132700375358799,
      "grad_norm": 2.734375,
      "learning_rate": 3.457972797910647e-05,
      "loss": 0.7798,
      "step": 323190
    },
    {
      "epoch": 1.1327354228656945,
      "grad_norm": 2.859375,
      "learning_rate": 3.457907895044277e-05,
      "loss": 0.8583,
      "step": 323200
    },
    {
      "epoch": 1.13277047037259,
      "grad_norm": 2.703125,
      "learning_rate": 3.4578429921779065e-05,
      "loss": 0.923,
      "step": 323210
    },
    {
      "epoch": 1.1328055178794856,
      "grad_norm": 3.015625,
      "learning_rate": 3.4577780893115366e-05,
      "loss": 0.8359,
      "step": 323220
    },
    {
      "epoch": 1.1328405653863811,
      "grad_norm": 2.84375,
      "learning_rate": 3.457713186445166e-05,
      "loss": 0.8668,
      "step": 323230
    },
    {
      "epoch": 1.1328756128932769,
      "grad_norm": 3.0625,
      "learning_rate": 3.457648283578796e-05,
      "loss": 0.8539,
      "step": 323240
    },
    {
      "epoch": 1.1329106604001724,
      "grad_norm": 2.59375,
      "learning_rate": 3.457583380712426e-05,
      "loss": 0.8268,
      "step": 323250
    },
    {
      "epoch": 1.132945707907068,
      "grad_norm": 2.921875,
      "learning_rate": 3.457518477846056e-05,
      "loss": 0.8947,
      "step": 323260
    },
    {
      "epoch": 1.1329807554139637,
      "grad_norm": 2.625,
      "learning_rate": 3.457453574979685e-05,
      "loss": 0.9223,
      "step": 323270
    },
    {
      "epoch": 1.1330158029208592,
      "grad_norm": 3.0625,
      "learning_rate": 3.4573886721133154e-05,
      "loss": 0.8292,
      "step": 323280
    },
    {
      "epoch": 1.1330508504277548,
      "grad_norm": 3.359375,
      "learning_rate": 3.457323769246945e-05,
      "loss": 0.7931,
      "step": 323290
    },
    {
      "epoch": 1.1330858979346505,
      "grad_norm": 3.3125,
      "learning_rate": 3.457258866380575e-05,
      "loss": 0.879,
      "step": 323300
    },
    {
      "epoch": 1.133120945441546,
      "grad_norm": 3.171875,
      "learning_rate": 3.4571939635142045e-05,
      "loss": 0.8431,
      "step": 323310
    },
    {
      "epoch": 1.1331559929484416,
      "grad_norm": 2.6875,
      "learning_rate": 3.4571290606478346e-05,
      "loss": 0.8784,
      "step": 323320
    },
    {
      "epoch": 1.1331910404553371,
      "grad_norm": 2.828125,
      "learning_rate": 3.457064157781464e-05,
      "loss": 0.8466,
      "step": 323330
    },
    {
      "epoch": 1.133226087962233,
      "grad_norm": 3.1875,
      "learning_rate": 3.456999254915094e-05,
      "loss": 0.9153,
      "step": 323340
    },
    {
      "epoch": 1.1332611354691284,
      "grad_norm": 2.71875,
      "learning_rate": 3.4569343520487244e-05,
      "loss": 0.8559,
      "step": 323350
    },
    {
      "epoch": 1.133296182976024,
      "grad_norm": 2.78125,
      "learning_rate": 3.456869449182354e-05,
      "loss": 0.8743,
      "step": 323360
    },
    {
      "epoch": 1.1333312304829195,
      "grad_norm": 3.25,
      "learning_rate": 3.456804546315984e-05,
      "loss": 0.8984,
      "step": 323370
    },
    {
      "epoch": 1.1333662779898153,
      "grad_norm": 3.265625,
      "learning_rate": 3.4567396434496134e-05,
      "loss": 0.9171,
      "step": 323380
    },
    {
      "epoch": 1.1334013254967108,
      "grad_norm": 2.984375,
      "learning_rate": 3.4566747405832436e-05,
      "loss": 0.8014,
      "step": 323390
    },
    {
      "epoch": 1.1334363730036063,
      "grad_norm": 2.9375,
      "learning_rate": 3.456609837716873e-05,
      "loss": 0.9356,
      "step": 323400
    },
    {
      "epoch": 1.133471420510502,
      "grad_norm": 2.640625,
      "learning_rate": 3.456544934850503e-05,
      "loss": 0.9047,
      "step": 323410
    },
    {
      "epoch": 1.1335064680173976,
      "grad_norm": 2.75,
      "learning_rate": 3.4564800319841326e-05,
      "loss": 0.8258,
      "step": 323420
    },
    {
      "epoch": 1.1335415155242932,
      "grad_norm": 3.109375,
      "learning_rate": 3.456415129117763e-05,
      "loss": 0.9296,
      "step": 323430
    },
    {
      "epoch": 1.1335765630311887,
      "grad_norm": 2.484375,
      "learning_rate": 3.456350226251392e-05,
      "loss": 0.8562,
      "step": 323440
    },
    {
      "epoch": 1.1336116105380845,
      "grad_norm": 2.921875,
      "learning_rate": 3.4562853233850224e-05,
      "loss": 0.9253,
      "step": 323450
    },
    {
      "epoch": 1.13364665804498,
      "grad_norm": 3.125,
      "learning_rate": 3.456220420518652e-05,
      "loss": 0.8105,
      "step": 323460
    },
    {
      "epoch": 1.1336817055518755,
      "grad_norm": 2.78125,
      "learning_rate": 3.456155517652282e-05,
      "loss": 0.7965,
      "step": 323470
    },
    {
      "epoch": 1.133716753058771,
      "grad_norm": 3.09375,
      "learning_rate": 3.456090614785912e-05,
      "loss": 0.8821,
      "step": 323480
    },
    {
      "epoch": 1.1337518005656668,
      "grad_norm": 2.8125,
      "learning_rate": 3.4560257119195416e-05,
      "loss": 0.957,
      "step": 323490
    },
    {
      "epoch": 1.1337868480725624,
      "grad_norm": 2.765625,
      "learning_rate": 3.455960809053172e-05,
      "loss": 0.8856,
      "step": 323500
    },
    {
      "epoch": 1.133821895579458,
      "grad_norm": 2.8125,
      "learning_rate": 3.455895906186801e-05,
      "loss": 0.8583,
      "step": 323510
    },
    {
      "epoch": 1.1338569430863537,
      "grad_norm": 2.953125,
      "learning_rate": 3.455831003320431e-05,
      "loss": 0.9005,
      "step": 323520
    },
    {
      "epoch": 1.1338919905932492,
      "grad_norm": 3.21875,
      "learning_rate": 3.45576610045406e-05,
      "loss": 0.8643,
      "step": 323530
    },
    {
      "epoch": 1.1339270381001447,
      "grad_norm": 3.34375,
      "learning_rate": 3.45570119758769e-05,
      "loss": 0.8222,
      "step": 323540
    },
    {
      "epoch": 1.1339620856070403,
      "grad_norm": 2.53125,
      "learning_rate": 3.45563629472132e-05,
      "loss": 0.8451,
      "step": 323550
    },
    {
      "epoch": 1.133997133113936,
      "grad_norm": 3.015625,
      "learning_rate": 3.45557139185495e-05,
      "loss": 0.8315,
      "step": 323560
    },
    {
      "epoch": 1.1340321806208316,
      "grad_norm": 2.484375,
      "learning_rate": 3.45550648898858e-05,
      "loss": 0.7775,
      "step": 323570
    },
    {
      "epoch": 1.134067228127727,
      "grad_norm": 2.578125,
      "learning_rate": 3.4554415861222094e-05,
      "loss": 0.8413,
      "step": 323580
    },
    {
      "epoch": 1.1341022756346226,
      "grad_norm": 2.953125,
      "learning_rate": 3.4553766832558396e-05,
      "loss": 0.8878,
      "step": 323590
    },
    {
      "epoch": 1.1341373231415184,
      "grad_norm": 3.28125,
      "learning_rate": 3.455311780389469e-05,
      "loss": 0.871,
      "step": 323600
    },
    {
      "epoch": 1.134172370648414,
      "grad_norm": 2.734375,
      "learning_rate": 3.455246877523099e-05,
      "loss": 0.8157,
      "step": 323610
    },
    {
      "epoch": 1.1342074181553095,
      "grad_norm": 2.890625,
      "learning_rate": 3.4551819746567286e-05,
      "loss": 0.7868,
      "step": 323620
    },
    {
      "epoch": 1.1342424656622052,
      "grad_norm": 2.578125,
      "learning_rate": 3.455117071790359e-05,
      "loss": 0.8476,
      "step": 323630
    },
    {
      "epoch": 1.1342775131691007,
      "grad_norm": 2.53125,
      "learning_rate": 3.455052168923988e-05,
      "loss": 0.8387,
      "step": 323640
    },
    {
      "epoch": 1.1343125606759963,
      "grad_norm": 3.046875,
      "learning_rate": 3.4549872660576184e-05,
      "loss": 0.8718,
      "step": 323650
    },
    {
      "epoch": 1.1343476081828918,
      "grad_norm": 2.359375,
      "learning_rate": 3.454922363191248e-05,
      "loss": 0.8904,
      "step": 323660
    },
    {
      "epoch": 1.1343826556897876,
      "grad_norm": 2.96875,
      "learning_rate": 3.454857460324878e-05,
      "loss": 0.8356,
      "step": 323670
    },
    {
      "epoch": 1.1344177031966831,
      "grad_norm": 2.921875,
      "learning_rate": 3.4547925574585074e-05,
      "loss": 0.8429,
      "step": 323680
    },
    {
      "epoch": 1.1344527507035786,
      "grad_norm": 3.171875,
      "learning_rate": 3.4547276545921376e-05,
      "loss": 0.8886,
      "step": 323690
    },
    {
      "epoch": 1.1344877982104742,
      "grad_norm": 3.1875,
      "learning_rate": 3.454662751725767e-05,
      "loss": 0.9368,
      "step": 323700
    },
    {
      "epoch": 1.13452284571737,
      "grad_norm": 3.21875,
      "learning_rate": 3.454597848859397e-05,
      "loss": 0.8198,
      "step": 323710
    },
    {
      "epoch": 1.1345578932242655,
      "grad_norm": 2.953125,
      "learning_rate": 3.454532945993027e-05,
      "loss": 0.8823,
      "step": 323720
    },
    {
      "epoch": 1.134592940731161,
      "grad_norm": 2.96875,
      "learning_rate": 3.454468043126657e-05,
      "loss": 0.8526,
      "step": 323730
    },
    {
      "epoch": 1.1346279882380568,
      "grad_norm": 2.984375,
      "learning_rate": 3.454403140260287e-05,
      "loss": 0.909,
      "step": 323740
    },
    {
      "epoch": 1.1346630357449523,
      "grad_norm": 2.265625,
      "learning_rate": 3.4543382373939164e-05,
      "loss": 0.8361,
      "step": 323750
    },
    {
      "epoch": 1.1346980832518478,
      "grad_norm": 3.125,
      "learning_rate": 3.4542733345275465e-05,
      "loss": 0.9147,
      "step": 323760
    },
    {
      "epoch": 1.1347331307587434,
      "grad_norm": 2.625,
      "learning_rate": 3.454208431661176e-05,
      "loss": 0.8458,
      "step": 323770
    },
    {
      "epoch": 1.1347681782656391,
      "grad_norm": 2.875,
      "learning_rate": 3.454143528794806e-05,
      "loss": 0.7858,
      "step": 323780
    },
    {
      "epoch": 1.1348032257725347,
      "grad_norm": 3.09375,
      "learning_rate": 3.4540786259284356e-05,
      "loss": 0.8309,
      "step": 323790
    },
    {
      "epoch": 1.1348382732794302,
      "grad_norm": 2.671875,
      "learning_rate": 3.454013723062066e-05,
      "loss": 0.7907,
      "step": 323800
    },
    {
      "epoch": 1.1348733207863257,
      "grad_norm": 3.34375,
      "learning_rate": 3.453948820195695e-05,
      "loss": 0.9217,
      "step": 323810
    },
    {
      "epoch": 1.1349083682932215,
      "grad_norm": 3.234375,
      "learning_rate": 3.453883917329325e-05,
      "loss": 0.9136,
      "step": 323820
    },
    {
      "epoch": 1.134943415800117,
      "grad_norm": 2.546875,
      "learning_rate": 3.453819014462955e-05,
      "loss": 0.7909,
      "step": 323830
    },
    {
      "epoch": 1.1349784633070126,
      "grad_norm": 3.078125,
      "learning_rate": 3.453754111596585e-05,
      "loss": 0.8616,
      "step": 323840
    },
    {
      "epoch": 1.1350135108139083,
      "grad_norm": 3.0625,
      "learning_rate": 3.453689208730215e-05,
      "loss": 0.836,
      "step": 323850
    },
    {
      "epoch": 1.1350485583208039,
      "grad_norm": 3.078125,
      "learning_rate": 3.4536243058638445e-05,
      "loss": 0.8548,
      "step": 323860
    },
    {
      "epoch": 1.1350836058276994,
      "grad_norm": 2.859375,
      "learning_rate": 3.4535594029974746e-05,
      "loss": 0.8892,
      "step": 323870
    },
    {
      "epoch": 1.1351186533345952,
      "grad_norm": 2.90625,
      "learning_rate": 3.453494500131104e-05,
      "loss": 0.8489,
      "step": 323880
    },
    {
      "epoch": 1.1351537008414907,
      "grad_norm": 2.84375,
      "learning_rate": 3.453429597264734e-05,
      "loss": 0.8713,
      "step": 323890
    },
    {
      "epoch": 1.1351887483483862,
      "grad_norm": 3.109375,
      "learning_rate": 3.453364694398363e-05,
      "loss": 0.8719,
      "step": 323900
    },
    {
      "epoch": 1.1352237958552818,
      "grad_norm": 3.03125,
      "learning_rate": 3.453299791531993e-05,
      "loss": 0.9101,
      "step": 323910
    },
    {
      "epoch": 1.1352588433621775,
      "grad_norm": 2.71875,
      "learning_rate": 3.4532348886656226e-05,
      "loss": 0.8088,
      "step": 323920
    },
    {
      "epoch": 1.135293890869073,
      "grad_norm": 2.890625,
      "learning_rate": 3.453169985799253e-05,
      "loss": 0.8835,
      "step": 323930
    },
    {
      "epoch": 1.1353289383759686,
      "grad_norm": 2.6875,
      "learning_rate": 3.453105082932883e-05,
      "loss": 0.8197,
      "step": 323940
    },
    {
      "epoch": 1.1353639858828641,
      "grad_norm": 3.046875,
      "learning_rate": 3.4530401800665124e-05,
      "loss": 0.8585,
      "step": 323950
    },
    {
      "epoch": 1.13539903338976,
      "grad_norm": 3.0,
      "learning_rate": 3.4529752772001425e-05,
      "loss": 0.998,
      "step": 323960
    },
    {
      "epoch": 1.1354340808966554,
      "grad_norm": 2.8125,
      "learning_rate": 3.452910374333772e-05,
      "loss": 0.8471,
      "step": 323970
    },
    {
      "epoch": 1.135469128403551,
      "grad_norm": 2.875,
      "learning_rate": 3.452845471467402e-05,
      "loss": 0.9087,
      "step": 323980
    },
    {
      "epoch": 1.1355041759104467,
      "grad_norm": 2.953125,
      "learning_rate": 3.4527805686010316e-05,
      "loss": 0.8373,
      "step": 323990
    },
    {
      "epoch": 1.1355392234173423,
      "grad_norm": 2.765625,
      "learning_rate": 3.452715665734662e-05,
      "loss": 0.8328,
      "step": 324000
    },
    {
      "epoch": 1.1355742709242378,
      "grad_norm": 2.859375,
      "learning_rate": 3.452650762868291e-05,
      "loss": 0.8753,
      "step": 324010
    },
    {
      "epoch": 1.1356093184311333,
      "grad_norm": 3.125,
      "learning_rate": 3.452585860001921e-05,
      "loss": 0.8441,
      "step": 324020
    },
    {
      "epoch": 1.135644365938029,
      "grad_norm": 2.734375,
      "learning_rate": 3.452520957135551e-05,
      "loss": 0.9066,
      "step": 324030
    },
    {
      "epoch": 1.1356794134449246,
      "grad_norm": 2.71875,
      "learning_rate": 3.452456054269181e-05,
      "loss": 0.8113,
      "step": 324040
    },
    {
      "epoch": 1.1357144609518202,
      "grad_norm": 3.078125,
      "learning_rate": 3.4523911514028104e-05,
      "loss": 0.8633,
      "step": 324050
    },
    {
      "epoch": 1.1357495084587157,
      "grad_norm": 3.0625,
      "learning_rate": 3.4523262485364405e-05,
      "loss": 0.8958,
      "step": 324060
    },
    {
      "epoch": 1.1357845559656115,
      "grad_norm": 3.109375,
      "learning_rate": 3.4522613456700706e-05,
      "loss": 0.8764,
      "step": 324070
    },
    {
      "epoch": 1.135819603472507,
      "grad_norm": 2.703125,
      "learning_rate": 3.4521964428037e-05,
      "loss": 0.7587,
      "step": 324080
    },
    {
      "epoch": 1.1358546509794025,
      "grad_norm": 2.84375,
      "learning_rate": 3.45213153993733e-05,
      "loss": 0.8468,
      "step": 324090
    },
    {
      "epoch": 1.1358896984862983,
      "grad_norm": 2.890625,
      "learning_rate": 3.45206663707096e-05,
      "loss": 0.8615,
      "step": 324100
    },
    {
      "epoch": 1.1359247459931938,
      "grad_norm": 3.078125,
      "learning_rate": 3.45200173420459e-05,
      "loss": 0.8034,
      "step": 324110
    },
    {
      "epoch": 1.1359597935000894,
      "grad_norm": 2.484375,
      "learning_rate": 3.451936831338219e-05,
      "loss": 0.86,
      "step": 324120
    },
    {
      "epoch": 1.135994841006985,
      "grad_norm": 3.015625,
      "learning_rate": 3.4518719284718494e-05,
      "loss": 0.9445,
      "step": 324130
    },
    {
      "epoch": 1.1360298885138806,
      "grad_norm": 2.765625,
      "learning_rate": 3.451807025605479e-05,
      "loss": 0.875,
      "step": 324140
    },
    {
      "epoch": 1.1360649360207762,
      "grad_norm": 3.171875,
      "learning_rate": 3.451742122739109e-05,
      "loss": 0.8324,
      "step": 324150
    },
    {
      "epoch": 1.1360999835276717,
      "grad_norm": 3.046875,
      "learning_rate": 3.4516772198727385e-05,
      "loss": 0.8723,
      "step": 324160
    },
    {
      "epoch": 1.1361350310345673,
      "grad_norm": 3.609375,
      "learning_rate": 3.4516123170063686e-05,
      "loss": 0.8307,
      "step": 324170
    },
    {
      "epoch": 1.136170078541463,
      "grad_norm": 2.96875,
      "learning_rate": 3.451547414139998e-05,
      "loss": 0.7769,
      "step": 324180
    },
    {
      "epoch": 1.1362051260483585,
      "grad_norm": 2.5625,
      "learning_rate": 3.451482511273628e-05,
      "loss": 0.9015,
      "step": 324190
    },
    {
      "epoch": 1.136240173555254,
      "grad_norm": 2.734375,
      "learning_rate": 3.451417608407258e-05,
      "loss": 0.8244,
      "step": 324200
    },
    {
      "epoch": 1.1362752210621498,
      "grad_norm": 2.96875,
      "learning_rate": 3.451352705540888e-05,
      "loss": 0.8102,
      "step": 324210
    },
    {
      "epoch": 1.1363102685690454,
      "grad_norm": 3.234375,
      "learning_rate": 3.451287802674518e-05,
      "loss": 0.8883,
      "step": 324220
    },
    {
      "epoch": 1.136345316075941,
      "grad_norm": 3.203125,
      "learning_rate": 3.4512228998081474e-05,
      "loss": 0.851,
      "step": 324230
    },
    {
      "epoch": 1.1363803635828365,
      "grad_norm": 2.8125,
      "learning_rate": 3.4511579969417776e-05,
      "loss": 0.9254,
      "step": 324240
    },
    {
      "epoch": 1.1364154110897322,
      "grad_norm": 2.734375,
      "learning_rate": 3.451093094075407e-05,
      "loss": 0.8633,
      "step": 324250
    },
    {
      "epoch": 1.1364504585966277,
      "grad_norm": 2.78125,
      "learning_rate": 3.451028191209037e-05,
      "loss": 0.8634,
      "step": 324260
    },
    {
      "epoch": 1.1364855061035233,
      "grad_norm": 3.0,
      "learning_rate": 3.4509632883426666e-05,
      "loss": 0.9573,
      "step": 324270
    },
    {
      "epoch": 1.1365205536104188,
      "grad_norm": 2.515625,
      "learning_rate": 3.450898385476296e-05,
      "loss": 0.9022,
      "step": 324280
    },
    {
      "epoch": 1.1365556011173146,
      "grad_norm": 3.03125,
      "learning_rate": 3.4508334826099256e-05,
      "loss": 0.8432,
      "step": 324290
    },
    {
      "epoch": 1.13659064862421,
      "grad_norm": 2.890625,
      "learning_rate": 3.450768579743556e-05,
      "loss": 0.8614,
      "step": 324300
    },
    {
      "epoch": 1.1366256961311056,
      "grad_norm": 2.984375,
      "learning_rate": 3.450703676877186e-05,
      "loss": 0.8632,
      "step": 324310
    },
    {
      "epoch": 1.1366607436380014,
      "grad_norm": 2.8125,
      "learning_rate": 3.450638774010815e-05,
      "loss": 0.866,
      "step": 324320
    },
    {
      "epoch": 1.136695791144897,
      "grad_norm": 3.046875,
      "learning_rate": 3.4505738711444454e-05,
      "loss": 0.9038,
      "step": 324330
    },
    {
      "epoch": 1.1367308386517925,
      "grad_norm": 3.484375,
      "learning_rate": 3.450508968278075e-05,
      "loss": 0.8998,
      "step": 324340
    },
    {
      "epoch": 1.136765886158688,
      "grad_norm": 3.328125,
      "learning_rate": 3.450444065411705e-05,
      "loss": 0.9586,
      "step": 324350
    },
    {
      "epoch": 1.1368009336655838,
      "grad_norm": 3.1875,
      "learning_rate": 3.4503791625453345e-05,
      "loss": 0.8591,
      "step": 324360
    },
    {
      "epoch": 1.1368359811724793,
      "grad_norm": 2.8125,
      "learning_rate": 3.4503142596789646e-05,
      "loss": 0.8464,
      "step": 324370
    },
    {
      "epoch": 1.1368710286793748,
      "grad_norm": 3.203125,
      "learning_rate": 3.450249356812594e-05,
      "loss": 0.9399,
      "step": 324380
    },
    {
      "epoch": 1.1369060761862704,
      "grad_norm": 3.265625,
      "learning_rate": 3.450184453946224e-05,
      "loss": 0.8267,
      "step": 324390
    },
    {
      "epoch": 1.1369411236931661,
      "grad_norm": 3.15625,
      "learning_rate": 3.450119551079854e-05,
      "loss": 0.9626,
      "step": 324400
    },
    {
      "epoch": 1.1369761712000617,
      "grad_norm": 3.390625,
      "learning_rate": 3.450054648213484e-05,
      "loss": 0.895,
      "step": 324410
    },
    {
      "epoch": 1.1370112187069572,
      "grad_norm": 3.015625,
      "learning_rate": 3.449989745347113e-05,
      "loss": 0.8902,
      "step": 324420
    },
    {
      "epoch": 1.137046266213853,
      "grad_norm": 2.96875,
      "learning_rate": 3.4499248424807434e-05,
      "loss": 0.8189,
      "step": 324430
    },
    {
      "epoch": 1.1370813137207485,
      "grad_norm": 2.875,
      "learning_rate": 3.4498599396143736e-05,
      "loss": 0.8556,
      "step": 324440
    },
    {
      "epoch": 1.137116361227644,
      "grad_norm": 3.21875,
      "learning_rate": 3.449795036748003e-05,
      "loss": 0.8545,
      "step": 324450
    },
    {
      "epoch": 1.1371514087345398,
      "grad_norm": 3.109375,
      "learning_rate": 3.449730133881633e-05,
      "loss": 0.9615,
      "step": 324460
    },
    {
      "epoch": 1.1371864562414353,
      "grad_norm": 2.6875,
      "learning_rate": 3.4496652310152626e-05,
      "loss": 0.8809,
      "step": 324470
    },
    {
      "epoch": 1.1372215037483309,
      "grad_norm": 2.5,
      "learning_rate": 3.449600328148893e-05,
      "loss": 0.8759,
      "step": 324480
    },
    {
      "epoch": 1.1372565512552264,
      "grad_norm": 2.9375,
      "learning_rate": 3.449535425282522e-05,
      "loss": 0.8508,
      "step": 324490
    },
    {
      "epoch": 1.137291598762122,
      "grad_norm": 2.765625,
      "learning_rate": 3.4494705224161524e-05,
      "loss": 0.8649,
      "step": 324500
    },
    {
      "epoch": 1.1373266462690177,
      "grad_norm": 3.5,
      "learning_rate": 3.449405619549782e-05,
      "loss": 0.9415,
      "step": 324510
    },
    {
      "epoch": 1.1373616937759132,
      "grad_norm": 2.890625,
      "learning_rate": 3.449340716683412e-05,
      "loss": 0.9118,
      "step": 324520
    },
    {
      "epoch": 1.1373967412828088,
      "grad_norm": 3.171875,
      "learning_rate": 3.4492758138170414e-05,
      "loss": 0.9007,
      "step": 324530
    },
    {
      "epoch": 1.1374317887897045,
      "grad_norm": 2.640625,
      "learning_rate": 3.4492109109506716e-05,
      "loss": 0.7764,
      "step": 324540
    },
    {
      "epoch": 1.1374668362966,
      "grad_norm": 3.03125,
      "learning_rate": 3.449146008084301e-05,
      "loss": 0.8242,
      "step": 324550
    },
    {
      "epoch": 1.1375018838034956,
      "grad_norm": 2.984375,
      "learning_rate": 3.449081105217931e-05,
      "loss": 0.8625,
      "step": 324560
    },
    {
      "epoch": 1.1375369313103914,
      "grad_norm": 2.859375,
      "learning_rate": 3.4490162023515606e-05,
      "loss": 0.8802,
      "step": 324570
    },
    {
      "epoch": 1.1375719788172869,
      "grad_norm": 2.484375,
      "learning_rate": 3.448951299485191e-05,
      "loss": 0.8875,
      "step": 324580
    },
    {
      "epoch": 1.1376070263241824,
      "grad_norm": 3.015625,
      "learning_rate": 3.448886396618821e-05,
      "loss": 0.8397,
      "step": 324590
    },
    {
      "epoch": 1.137642073831078,
      "grad_norm": 2.46875,
      "learning_rate": 3.4488214937524504e-05,
      "loss": 0.8717,
      "step": 324600
    },
    {
      "epoch": 1.1376771213379737,
      "grad_norm": 2.921875,
      "learning_rate": 3.4487565908860805e-05,
      "loss": 0.8396,
      "step": 324610
    },
    {
      "epoch": 1.1377121688448693,
      "grad_norm": 2.90625,
      "learning_rate": 3.44869168801971e-05,
      "loss": 0.9113,
      "step": 324620
    },
    {
      "epoch": 1.1377472163517648,
      "grad_norm": 3.125,
      "learning_rate": 3.44862678515334e-05,
      "loss": 1.0077,
      "step": 324630
    },
    {
      "epoch": 1.1377822638586603,
      "grad_norm": 2.96875,
      "learning_rate": 3.4485618822869696e-05,
      "loss": 0.9389,
      "step": 324640
    },
    {
      "epoch": 1.137817311365556,
      "grad_norm": 2.9375,
      "learning_rate": 3.4484969794206e-05,
      "loss": 0.8005,
      "step": 324650
    },
    {
      "epoch": 1.1378523588724516,
      "grad_norm": 2.875,
      "learning_rate": 3.4484320765542285e-05,
      "loss": 0.9118,
      "step": 324660
    },
    {
      "epoch": 1.1378874063793472,
      "grad_norm": 2.65625,
      "learning_rate": 3.4483671736878586e-05,
      "loss": 0.8296,
      "step": 324670
    },
    {
      "epoch": 1.137922453886243,
      "grad_norm": 2.40625,
      "learning_rate": 3.448302270821489e-05,
      "loss": 0.8362,
      "step": 324680
    },
    {
      "epoch": 1.1379575013931384,
      "grad_norm": 2.875,
      "learning_rate": 3.448237367955118e-05,
      "loss": 0.8522,
      "step": 324690
    },
    {
      "epoch": 1.137992548900034,
      "grad_norm": 2.8125,
      "learning_rate": 3.4481724650887484e-05,
      "loss": 0.8217,
      "step": 324700
    },
    {
      "epoch": 1.1380275964069295,
      "grad_norm": 3.15625,
      "learning_rate": 3.448107562222378e-05,
      "loss": 0.9126,
      "step": 324710
    },
    {
      "epoch": 1.1380626439138253,
      "grad_norm": 2.875,
      "learning_rate": 3.448042659356008e-05,
      "loss": 0.8455,
      "step": 324720
    },
    {
      "epoch": 1.1380976914207208,
      "grad_norm": 2.953125,
      "learning_rate": 3.4479777564896374e-05,
      "loss": 0.8553,
      "step": 324730
    },
    {
      "epoch": 1.1381327389276163,
      "grad_norm": 2.953125,
      "learning_rate": 3.4479128536232676e-05,
      "loss": 0.8987,
      "step": 324740
    },
    {
      "epoch": 1.1381677864345119,
      "grad_norm": 2.78125,
      "learning_rate": 3.447847950756897e-05,
      "loss": 0.8562,
      "step": 324750
    },
    {
      "epoch": 1.1382028339414076,
      "grad_norm": 2.875,
      "learning_rate": 3.447783047890527e-05,
      "loss": 0.8641,
      "step": 324760
    },
    {
      "epoch": 1.1382378814483032,
      "grad_norm": 3.015625,
      "learning_rate": 3.4477181450241566e-05,
      "loss": 0.9116,
      "step": 324770
    },
    {
      "epoch": 1.1382729289551987,
      "grad_norm": 2.484375,
      "learning_rate": 3.447653242157787e-05,
      "loss": 0.8393,
      "step": 324780
    },
    {
      "epoch": 1.1383079764620945,
      "grad_norm": 2.75,
      "learning_rate": 3.447588339291416e-05,
      "loss": 0.8481,
      "step": 324790
    },
    {
      "epoch": 1.13834302396899,
      "grad_norm": 3.296875,
      "learning_rate": 3.4475234364250464e-05,
      "loss": 0.9344,
      "step": 324800
    },
    {
      "epoch": 1.1383780714758855,
      "grad_norm": 2.75,
      "learning_rate": 3.4474585335586765e-05,
      "loss": 0.8324,
      "step": 324810
    },
    {
      "epoch": 1.138413118982781,
      "grad_norm": 3.15625,
      "learning_rate": 3.447393630692306e-05,
      "loss": 0.8979,
      "step": 324820
    },
    {
      "epoch": 1.1384481664896768,
      "grad_norm": 2.8125,
      "learning_rate": 3.447328727825936e-05,
      "loss": 0.8596,
      "step": 324830
    },
    {
      "epoch": 1.1384832139965724,
      "grad_norm": 2.828125,
      "learning_rate": 3.4472638249595656e-05,
      "loss": 0.8272,
      "step": 324840
    },
    {
      "epoch": 1.138518261503468,
      "grad_norm": 3.03125,
      "learning_rate": 3.447198922093196e-05,
      "loss": 0.8809,
      "step": 324850
    },
    {
      "epoch": 1.1385533090103634,
      "grad_norm": 2.640625,
      "learning_rate": 3.447134019226825e-05,
      "loss": 0.8812,
      "step": 324860
    },
    {
      "epoch": 1.1385883565172592,
      "grad_norm": 2.46875,
      "learning_rate": 3.447069116360455e-05,
      "loss": 0.8688,
      "step": 324870
    },
    {
      "epoch": 1.1386234040241547,
      "grad_norm": 3.203125,
      "learning_rate": 3.447004213494085e-05,
      "loss": 0.8734,
      "step": 324880
    },
    {
      "epoch": 1.1386584515310503,
      "grad_norm": 2.734375,
      "learning_rate": 3.446939310627715e-05,
      "loss": 0.8254,
      "step": 324890
    },
    {
      "epoch": 1.138693499037946,
      "grad_norm": 3.109375,
      "learning_rate": 3.4468744077613444e-05,
      "loss": 0.8292,
      "step": 324900
    },
    {
      "epoch": 1.1387285465448416,
      "grad_norm": 2.46875,
      "learning_rate": 3.4468095048949745e-05,
      "loss": 0.8208,
      "step": 324910
    },
    {
      "epoch": 1.138763594051737,
      "grad_norm": 3.21875,
      "learning_rate": 3.446744602028604e-05,
      "loss": 0.9274,
      "step": 324920
    },
    {
      "epoch": 1.1387986415586326,
      "grad_norm": 2.765625,
      "learning_rate": 3.446679699162234e-05,
      "loss": 0.8632,
      "step": 324930
    },
    {
      "epoch": 1.1388336890655284,
      "grad_norm": 2.609375,
      "learning_rate": 3.446614796295864e-05,
      "loss": 0.8901,
      "step": 324940
    },
    {
      "epoch": 1.138868736572424,
      "grad_norm": 2.921875,
      "learning_rate": 3.446549893429494e-05,
      "loss": 0.8658,
      "step": 324950
    },
    {
      "epoch": 1.1389037840793195,
      "grad_norm": 2.921875,
      "learning_rate": 3.446484990563124e-05,
      "loss": 0.8346,
      "step": 324960
    },
    {
      "epoch": 1.138938831586215,
      "grad_norm": 2.9375,
      "learning_rate": 3.446420087696753e-05,
      "loss": 0.9744,
      "step": 324970
    },
    {
      "epoch": 1.1389738790931108,
      "grad_norm": 2.96875,
      "learning_rate": 3.4463551848303835e-05,
      "loss": 0.8495,
      "step": 324980
    },
    {
      "epoch": 1.1390089266000063,
      "grad_norm": 2.921875,
      "learning_rate": 3.446290281964013e-05,
      "loss": 0.8422,
      "step": 324990
    },
    {
      "epoch": 1.1390439741069018,
      "grad_norm": 2.90625,
      "learning_rate": 3.446225379097643e-05,
      "loss": 0.832,
      "step": 325000
    },
    {
      "epoch": 1.1390439741069018,
      "eval_loss": 0.8112837672233582,
      "eval_runtime": 552.3972,
      "eval_samples_per_second": 688.7,
      "eval_steps_per_second": 57.392,
      "step": 325000
    },
    {
      "epoch": 1.1390790216137976,
      "grad_norm": 2.5625,
      "learning_rate": 3.4461604762312725e-05,
      "loss": 0.8137,
      "step": 325010
    },
    {
      "epoch": 1.1391140691206931,
      "grad_norm": 3.109375,
      "learning_rate": 3.4460955733649027e-05,
      "loss": 0.8952,
      "step": 325020
    },
    {
      "epoch": 1.1391491166275887,
      "grad_norm": 2.828125,
      "learning_rate": 3.446030670498532e-05,
      "loss": 0.9597,
      "step": 325030
    },
    {
      "epoch": 1.1391841641344842,
      "grad_norm": 2.34375,
      "learning_rate": 3.4459657676321616e-05,
      "loss": 0.8636,
      "step": 325040
    },
    {
      "epoch": 1.13921921164138,
      "grad_norm": 2.890625,
      "learning_rate": 3.445900864765792e-05,
      "loss": 0.9842,
      "step": 325050
    },
    {
      "epoch": 1.1392542591482755,
      "grad_norm": 2.5625,
      "learning_rate": 3.445835961899421e-05,
      "loss": 0.8319,
      "step": 325060
    },
    {
      "epoch": 1.139289306655171,
      "grad_norm": 3.15625,
      "learning_rate": 3.445771059033051e-05,
      "loss": 0.9021,
      "step": 325070
    },
    {
      "epoch": 1.1393243541620666,
      "grad_norm": 2.5625,
      "learning_rate": 3.445706156166681e-05,
      "loss": 0.8702,
      "step": 325080
    },
    {
      "epoch": 1.1393594016689623,
      "grad_norm": 2.59375,
      "learning_rate": 3.445641253300311e-05,
      "loss": 0.8733,
      "step": 325090
    },
    {
      "epoch": 1.1393944491758579,
      "grad_norm": 3.1875,
      "learning_rate": 3.4455763504339404e-05,
      "loss": 0.9116,
      "step": 325100
    },
    {
      "epoch": 1.1394294966827534,
      "grad_norm": 2.890625,
      "learning_rate": 3.4455114475675705e-05,
      "loss": 0.853,
      "step": 325110
    },
    {
      "epoch": 1.1394645441896492,
      "grad_norm": 2.953125,
      "learning_rate": 3.4454465447012e-05,
      "loss": 0.9433,
      "step": 325120
    },
    {
      "epoch": 1.1394995916965447,
      "grad_norm": 2.796875,
      "learning_rate": 3.44538164183483e-05,
      "loss": 0.8662,
      "step": 325130
    },
    {
      "epoch": 1.1395346392034402,
      "grad_norm": 2.59375,
      "learning_rate": 3.4453167389684596e-05,
      "loss": 0.9267,
      "step": 325140
    },
    {
      "epoch": 1.139569686710336,
      "grad_norm": 2.703125,
      "learning_rate": 3.44525183610209e-05,
      "loss": 0.7509,
      "step": 325150
    },
    {
      "epoch": 1.1396047342172315,
      "grad_norm": 3.03125,
      "learning_rate": 3.445186933235719e-05,
      "loss": 0.8528,
      "step": 325160
    },
    {
      "epoch": 1.139639781724127,
      "grad_norm": 2.8125,
      "learning_rate": 3.445122030369349e-05,
      "loss": 0.736,
      "step": 325170
    },
    {
      "epoch": 1.1396748292310226,
      "grad_norm": 3.1875,
      "learning_rate": 3.4450571275029795e-05,
      "loss": 0.8911,
      "step": 325180
    },
    {
      "epoch": 1.1397098767379181,
      "grad_norm": 2.96875,
      "learning_rate": 3.444992224636609e-05,
      "loss": 0.8312,
      "step": 325190
    },
    {
      "epoch": 1.1397449242448139,
      "grad_norm": 2.78125,
      "learning_rate": 3.444927321770239e-05,
      "loss": 0.918,
      "step": 325200
    },
    {
      "epoch": 1.1397799717517094,
      "grad_norm": 2.5,
      "learning_rate": 3.4448624189038685e-05,
      "loss": 0.8204,
      "step": 325210
    },
    {
      "epoch": 1.139815019258605,
      "grad_norm": 2.671875,
      "learning_rate": 3.4447975160374987e-05,
      "loss": 0.7966,
      "step": 325220
    },
    {
      "epoch": 1.1398500667655007,
      "grad_norm": 2.984375,
      "learning_rate": 3.444732613171128e-05,
      "loss": 0.8207,
      "step": 325230
    },
    {
      "epoch": 1.1398851142723962,
      "grad_norm": 2.53125,
      "learning_rate": 3.444667710304758e-05,
      "loss": 0.8598,
      "step": 325240
    },
    {
      "epoch": 1.1399201617792918,
      "grad_norm": 2.546875,
      "learning_rate": 3.444602807438388e-05,
      "loss": 0.8556,
      "step": 325250
    },
    {
      "epoch": 1.1399552092861875,
      "grad_norm": 3.078125,
      "learning_rate": 3.444537904572018e-05,
      "loss": 0.8527,
      "step": 325260
    },
    {
      "epoch": 1.139990256793083,
      "grad_norm": 3.015625,
      "learning_rate": 3.444473001705647e-05,
      "loss": 0.9437,
      "step": 325270
    },
    {
      "epoch": 1.1400253042999786,
      "grad_norm": 2.9375,
      "learning_rate": 3.4444080988392775e-05,
      "loss": 0.8694,
      "step": 325280
    },
    {
      "epoch": 1.1400603518068742,
      "grad_norm": 2.984375,
      "learning_rate": 3.444343195972907e-05,
      "loss": 0.9082,
      "step": 325290
    },
    {
      "epoch": 1.14009539931377,
      "grad_norm": 2.8125,
      "learning_rate": 3.444278293106537e-05,
      "loss": 0.9083,
      "step": 325300
    },
    {
      "epoch": 1.1401304468206654,
      "grad_norm": 3.046875,
      "learning_rate": 3.444213390240167e-05,
      "loss": 0.8304,
      "step": 325310
    },
    {
      "epoch": 1.140165494327561,
      "grad_norm": 3.53125,
      "learning_rate": 3.4441484873737967e-05,
      "loss": 0.8865,
      "step": 325320
    },
    {
      "epoch": 1.1402005418344565,
      "grad_norm": 1.9921875,
      "learning_rate": 3.444083584507427e-05,
      "loss": 0.8203,
      "step": 325330
    },
    {
      "epoch": 1.1402355893413523,
      "grad_norm": 3.421875,
      "learning_rate": 3.444018681641056e-05,
      "loss": 0.8673,
      "step": 325340
    },
    {
      "epoch": 1.1402706368482478,
      "grad_norm": 3.078125,
      "learning_rate": 3.4439537787746864e-05,
      "loss": 0.8696,
      "step": 325350
    },
    {
      "epoch": 1.1403056843551433,
      "grad_norm": 2.84375,
      "learning_rate": 3.443888875908316e-05,
      "loss": 0.795,
      "step": 325360
    },
    {
      "epoch": 1.140340731862039,
      "grad_norm": 2.84375,
      "learning_rate": 3.443823973041946e-05,
      "loss": 0.8871,
      "step": 325370
    },
    {
      "epoch": 1.1403757793689346,
      "grad_norm": 3.0625,
      "learning_rate": 3.4437590701755755e-05,
      "loss": 0.9002,
      "step": 325380
    },
    {
      "epoch": 1.1404108268758302,
      "grad_norm": 2.640625,
      "learning_rate": 3.4436941673092056e-05,
      "loss": 0.8238,
      "step": 325390
    },
    {
      "epoch": 1.1404458743827257,
      "grad_norm": 2.71875,
      "learning_rate": 3.443629264442835e-05,
      "loss": 0.8267,
      "step": 325400
    },
    {
      "epoch": 1.1404809218896215,
      "grad_norm": 2.796875,
      "learning_rate": 3.4435643615764645e-05,
      "loss": 0.8672,
      "step": 325410
    },
    {
      "epoch": 1.140515969396517,
      "grad_norm": 2.75,
      "learning_rate": 3.4434994587100947e-05,
      "loss": 0.867,
      "step": 325420
    },
    {
      "epoch": 1.1405510169034125,
      "grad_norm": 2.6875,
      "learning_rate": 3.443434555843724e-05,
      "loss": 0.9745,
      "step": 325430
    },
    {
      "epoch": 1.140586064410308,
      "grad_norm": 2.796875,
      "learning_rate": 3.443369652977354e-05,
      "loss": 0.8703,
      "step": 325440
    },
    {
      "epoch": 1.1406211119172038,
      "grad_norm": 2.875,
      "learning_rate": 3.443304750110984e-05,
      "loss": 0.8487,
      "step": 325450
    },
    {
      "epoch": 1.1406561594240994,
      "grad_norm": 2.921875,
      "learning_rate": 3.443239847244614e-05,
      "loss": 0.9389,
      "step": 325460
    },
    {
      "epoch": 1.140691206930995,
      "grad_norm": 3.09375,
      "learning_rate": 3.443174944378243e-05,
      "loss": 0.9325,
      "step": 325470
    },
    {
      "epoch": 1.1407262544378907,
      "grad_norm": 2.859375,
      "learning_rate": 3.4431100415118735e-05,
      "loss": 0.8389,
      "step": 325480
    },
    {
      "epoch": 1.1407613019447862,
      "grad_norm": 2.84375,
      "learning_rate": 3.443045138645503e-05,
      "loss": 0.8334,
      "step": 325490
    },
    {
      "epoch": 1.1407963494516817,
      "grad_norm": 2.78125,
      "learning_rate": 3.442980235779133e-05,
      "loss": 0.9078,
      "step": 325500
    },
    {
      "epoch": 1.1408313969585773,
      "grad_norm": 2.859375,
      "learning_rate": 3.4429153329127625e-05,
      "loss": 0.8875,
      "step": 325510
    },
    {
      "epoch": 1.140866444465473,
      "grad_norm": 2.625,
      "learning_rate": 3.4428504300463927e-05,
      "loss": 0.9045,
      "step": 325520
    },
    {
      "epoch": 1.1409014919723686,
      "grad_norm": 2.78125,
      "learning_rate": 3.442785527180022e-05,
      "loss": 0.8804,
      "step": 325530
    },
    {
      "epoch": 1.140936539479264,
      "grad_norm": 2.5625,
      "learning_rate": 3.442720624313652e-05,
      "loss": 0.8955,
      "step": 325540
    },
    {
      "epoch": 1.1409715869861596,
      "grad_norm": 3.34375,
      "learning_rate": 3.4426557214472824e-05,
      "loss": 0.8465,
      "step": 325550
    },
    {
      "epoch": 1.1410066344930554,
      "grad_norm": 3.21875,
      "learning_rate": 3.442590818580912e-05,
      "loss": 0.8551,
      "step": 325560
    },
    {
      "epoch": 1.141041681999951,
      "grad_norm": 3.375,
      "learning_rate": 3.442525915714542e-05,
      "loss": 0.8487,
      "step": 325570
    },
    {
      "epoch": 1.1410767295068465,
      "grad_norm": 2.953125,
      "learning_rate": 3.4424610128481715e-05,
      "loss": 0.8632,
      "step": 325580
    },
    {
      "epoch": 1.1411117770137422,
      "grad_norm": 3.171875,
      "learning_rate": 3.4423961099818016e-05,
      "loss": 0.8508,
      "step": 325590
    },
    {
      "epoch": 1.1411468245206378,
      "grad_norm": 2.65625,
      "learning_rate": 3.442331207115431e-05,
      "loss": 0.8138,
      "step": 325600
    },
    {
      "epoch": 1.1411818720275333,
      "grad_norm": 3.140625,
      "learning_rate": 3.442266304249061e-05,
      "loss": 0.9324,
      "step": 325610
    },
    {
      "epoch": 1.1412169195344288,
      "grad_norm": 2.703125,
      "learning_rate": 3.4422014013826907e-05,
      "loss": 0.8496,
      "step": 325620
    },
    {
      "epoch": 1.1412519670413246,
      "grad_norm": 2.484375,
      "learning_rate": 3.442136498516321e-05,
      "loss": 0.8819,
      "step": 325630
    },
    {
      "epoch": 1.1412870145482201,
      "grad_norm": 2.6875,
      "learning_rate": 3.44207159564995e-05,
      "loss": 0.8473,
      "step": 325640
    },
    {
      "epoch": 1.1413220620551157,
      "grad_norm": 3.375,
      "learning_rate": 3.4420066927835804e-05,
      "loss": 0.8209,
      "step": 325650
    },
    {
      "epoch": 1.1413571095620112,
      "grad_norm": 3.390625,
      "learning_rate": 3.44194178991721e-05,
      "loss": 0.9046,
      "step": 325660
    },
    {
      "epoch": 1.141392157068907,
      "grad_norm": 2.84375,
      "learning_rate": 3.44187688705084e-05,
      "loss": 0.8783,
      "step": 325670
    },
    {
      "epoch": 1.1414272045758025,
      "grad_norm": 2.546875,
      "learning_rate": 3.44181198418447e-05,
      "loss": 0.8707,
      "step": 325680
    },
    {
      "epoch": 1.141462252082698,
      "grad_norm": 2.8125,
      "learning_rate": 3.4417470813180996e-05,
      "loss": 0.8488,
      "step": 325690
    },
    {
      "epoch": 1.1414972995895938,
      "grad_norm": 2.734375,
      "learning_rate": 3.44168217845173e-05,
      "loss": 0.8179,
      "step": 325700
    },
    {
      "epoch": 1.1415323470964893,
      "grad_norm": 3.171875,
      "learning_rate": 3.441617275585359e-05,
      "loss": 0.7552,
      "step": 325710
    },
    {
      "epoch": 1.1415673946033849,
      "grad_norm": 2.859375,
      "learning_rate": 3.441552372718989e-05,
      "loss": 0.8104,
      "step": 325720
    },
    {
      "epoch": 1.1416024421102804,
      "grad_norm": 2.84375,
      "learning_rate": 3.441487469852619e-05,
      "loss": 0.8466,
      "step": 325730
    },
    {
      "epoch": 1.1416374896171761,
      "grad_norm": 2.765625,
      "learning_rate": 3.441422566986249e-05,
      "loss": 0.8233,
      "step": 325740
    },
    {
      "epoch": 1.1416725371240717,
      "grad_norm": 2.765625,
      "learning_rate": 3.4413576641198784e-05,
      "loss": 0.7932,
      "step": 325750
    },
    {
      "epoch": 1.1417075846309672,
      "grad_norm": 2.953125,
      "learning_rate": 3.4412927612535085e-05,
      "loss": 0.9193,
      "step": 325760
    },
    {
      "epoch": 1.1417426321378628,
      "grad_norm": 2.90625,
      "learning_rate": 3.441227858387138e-05,
      "loss": 0.8833,
      "step": 325770
    },
    {
      "epoch": 1.1417776796447585,
      "grad_norm": 2.921875,
      "learning_rate": 3.441162955520768e-05,
      "loss": 0.8669,
      "step": 325780
    },
    {
      "epoch": 1.141812727151654,
      "grad_norm": 2.921875,
      "learning_rate": 3.4410980526543976e-05,
      "loss": 0.8351,
      "step": 325790
    },
    {
      "epoch": 1.1418477746585496,
      "grad_norm": 3.734375,
      "learning_rate": 3.441033149788027e-05,
      "loss": 0.8421,
      "step": 325800
    },
    {
      "epoch": 1.1418828221654453,
      "grad_norm": 3.09375,
      "learning_rate": 3.440968246921657e-05,
      "loss": 0.8016,
      "step": 325810
    },
    {
      "epoch": 1.1419178696723409,
      "grad_norm": 2.921875,
      "learning_rate": 3.4409033440552867e-05,
      "loss": 0.8524,
      "step": 325820
    },
    {
      "epoch": 1.1419529171792364,
      "grad_norm": 2.765625,
      "learning_rate": 3.440838441188917e-05,
      "loss": 0.811,
      "step": 325830
    },
    {
      "epoch": 1.1419879646861322,
      "grad_norm": 2.734375,
      "learning_rate": 3.440773538322546e-05,
      "loss": 0.9421,
      "step": 325840
    },
    {
      "epoch": 1.1420230121930277,
      "grad_norm": 3.34375,
      "learning_rate": 3.4407086354561764e-05,
      "loss": 0.8235,
      "step": 325850
    },
    {
      "epoch": 1.1420580596999232,
      "grad_norm": 2.734375,
      "learning_rate": 3.440643732589806e-05,
      "loss": 0.8986,
      "step": 325860
    },
    {
      "epoch": 1.1420931072068188,
      "grad_norm": 2.546875,
      "learning_rate": 3.440578829723436e-05,
      "loss": 0.7466,
      "step": 325870
    },
    {
      "epoch": 1.1421281547137143,
      "grad_norm": 3.0,
      "learning_rate": 3.4405139268570655e-05,
      "loss": 0.8644,
      "step": 325880
    },
    {
      "epoch": 1.14216320222061,
      "grad_norm": 3.09375,
      "learning_rate": 3.4404490239906956e-05,
      "loss": 0.8767,
      "step": 325890
    },
    {
      "epoch": 1.1421982497275056,
      "grad_norm": 2.765625,
      "learning_rate": 3.440384121124326e-05,
      "loss": 0.8849,
      "step": 325900
    },
    {
      "epoch": 1.1422332972344011,
      "grad_norm": 2.859375,
      "learning_rate": 3.440319218257955e-05,
      "loss": 0.8899,
      "step": 325910
    },
    {
      "epoch": 1.142268344741297,
      "grad_norm": 3.0,
      "learning_rate": 3.440254315391585e-05,
      "loss": 0.9268,
      "step": 325920
    },
    {
      "epoch": 1.1423033922481924,
      "grad_norm": 2.953125,
      "learning_rate": 3.440189412525215e-05,
      "loss": 0.9282,
      "step": 325930
    },
    {
      "epoch": 1.142338439755088,
      "grad_norm": 2.359375,
      "learning_rate": 3.440124509658845e-05,
      "loss": 0.7908,
      "step": 325940
    },
    {
      "epoch": 1.1423734872619837,
      "grad_norm": 2.640625,
      "learning_rate": 3.4400596067924744e-05,
      "loss": 0.846,
      "step": 325950
    },
    {
      "epoch": 1.1424085347688793,
      "grad_norm": 3.171875,
      "learning_rate": 3.4399947039261045e-05,
      "loss": 0.8431,
      "step": 325960
    },
    {
      "epoch": 1.1424435822757748,
      "grad_norm": 2.34375,
      "learning_rate": 3.439929801059734e-05,
      "loss": 0.8119,
      "step": 325970
    },
    {
      "epoch": 1.1424786297826703,
      "grad_norm": 3.25,
      "learning_rate": 3.439864898193364e-05,
      "loss": 0.8575,
      "step": 325980
    },
    {
      "epoch": 1.142513677289566,
      "grad_norm": 3.390625,
      "learning_rate": 3.4397999953269936e-05,
      "loss": 0.8315,
      "step": 325990
    },
    {
      "epoch": 1.1425487247964616,
      "grad_norm": 2.796875,
      "learning_rate": 3.439735092460624e-05,
      "loss": 0.9037,
      "step": 326000
    },
    {
      "epoch": 1.1425837723033572,
      "grad_norm": 2.5,
      "learning_rate": 3.439670189594253e-05,
      "loss": 0.8926,
      "step": 326010
    },
    {
      "epoch": 1.1426188198102527,
      "grad_norm": 2.984375,
      "learning_rate": 3.439605286727883e-05,
      "loss": 0.892,
      "step": 326020
    },
    {
      "epoch": 1.1426538673171485,
      "grad_norm": 2.828125,
      "learning_rate": 3.439540383861513e-05,
      "loss": 0.8459,
      "step": 326030
    },
    {
      "epoch": 1.142688914824044,
      "grad_norm": 3.75,
      "learning_rate": 3.439475480995143e-05,
      "loss": 0.8647,
      "step": 326040
    },
    {
      "epoch": 1.1427239623309395,
      "grad_norm": 2.4375,
      "learning_rate": 3.439410578128773e-05,
      "loss": 0.912,
      "step": 326050
    },
    {
      "epoch": 1.1427590098378353,
      "grad_norm": 2.96875,
      "learning_rate": 3.4393456752624025e-05,
      "loss": 0.7822,
      "step": 326060
    },
    {
      "epoch": 1.1427940573447308,
      "grad_norm": 3.28125,
      "learning_rate": 3.439280772396033e-05,
      "loss": 0.8535,
      "step": 326070
    },
    {
      "epoch": 1.1428291048516264,
      "grad_norm": 3.015625,
      "learning_rate": 3.439215869529662e-05,
      "loss": 0.8649,
      "step": 326080
    },
    {
      "epoch": 1.142864152358522,
      "grad_norm": 3.15625,
      "learning_rate": 3.439150966663292e-05,
      "loss": 0.8751,
      "step": 326090
    },
    {
      "epoch": 1.1428991998654177,
      "grad_norm": 2.4375,
      "learning_rate": 3.439086063796922e-05,
      "loss": 0.8356,
      "step": 326100
    },
    {
      "epoch": 1.1429342473723132,
      "grad_norm": 2.71875,
      "learning_rate": 3.439021160930552e-05,
      "loss": 0.9289,
      "step": 326110
    },
    {
      "epoch": 1.1429692948792087,
      "grad_norm": 3.328125,
      "learning_rate": 3.438956258064181e-05,
      "loss": 0.7379,
      "step": 326120
    },
    {
      "epoch": 1.1430043423861043,
      "grad_norm": 2.328125,
      "learning_rate": 3.4388913551978115e-05,
      "loss": 0.8282,
      "step": 326130
    },
    {
      "epoch": 1.143039389893,
      "grad_norm": 2.5,
      "learning_rate": 3.438826452331441e-05,
      "loss": 0.8612,
      "step": 326140
    },
    {
      "epoch": 1.1430744373998956,
      "grad_norm": 2.6875,
      "learning_rate": 3.438761549465071e-05,
      "loss": 0.8329,
      "step": 326150
    },
    {
      "epoch": 1.143109484906791,
      "grad_norm": 2.953125,
      "learning_rate": 3.4386966465987005e-05,
      "loss": 0.8287,
      "step": 326160
    },
    {
      "epoch": 1.1431445324136869,
      "grad_norm": 2.78125,
      "learning_rate": 3.43863174373233e-05,
      "loss": 0.904,
      "step": 326170
    },
    {
      "epoch": 1.1431795799205824,
      "grad_norm": 2.984375,
      "learning_rate": 3.43856684086596e-05,
      "loss": 0.8945,
      "step": 326180
    },
    {
      "epoch": 1.143214627427478,
      "grad_norm": 3.03125,
      "learning_rate": 3.4385019379995896e-05,
      "loss": 0.8374,
      "step": 326190
    },
    {
      "epoch": 1.1432496749343735,
      "grad_norm": 2.828125,
      "learning_rate": 3.43843703513322e-05,
      "loss": 0.8467,
      "step": 326200
    },
    {
      "epoch": 1.1432847224412692,
      "grad_norm": 2.703125,
      "learning_rate": 3.438372132266849e-05,
      "loss": 0.9179,
      "step": 326210
    },
    {
      "epoch": 1.1433197699481648,
      "grad_norm": 2.625,
      "learning_rate": 3.438307229400479e-05,
      "loss": 0.8703,
      "step": 326220
    },
    {
      "epoch": 1.1433548174550603,
      "grad_norm": 2.984375,
      "learning_rate": 3.438242326534109e-05,
      "loss": 0.9191,
      "step": 326230
    },
    {
      "epoch": 1.1433898649619558,
      "grad_norm": 2.796875,
      "learning_rate": 3.438177423667739e-05,
      "loss": 0.8787,
      "step": 326240
    },
    {
      "epoch": 1.1434249124688516,
      "grad_norm": 2.59375,
      "learning_rate": 3.4381125208013684e-05,
      "loss": 0.8025,
      "step": 326250
    },
    {
      "epoch": 1.1434599599757471,
      "grad_norm": 2.75,
      "learning_rate": 3.4380476179349985e-05,
      "loss": 0.9293,
      "step": 326260
    },
    {
      "epoch": 1.1434950074826427,
      "grad_norm": 2.8125,
      "learning_rate": 3.437982715068629e-05,
      "loss": 0.8225,
      "step": 326270
    },
    {
      "epoch": 1.1435300549895384,
      "grad_norm": 3.203125,
      "learning_rate": 3.437917812202258e-05,
      "loss": 0.8893,
      "step": 326280
    },
    {
      "epoch": 1.143565102496434,
      "grad_norm": 2.625,
      "learning_rate": 3.437852909335888e-05,
      "loss": 0.9305,
      "step": 326290
    },
    {
      "epoch": 1.1436001500033295,
      "grad_norm": 2.609375,
      "learning_rate": 3.437788006469518e-05,
      "loss": 0.8011,
      "step": 326300
    },
    {
      "epoch": 1.143635197510225,
      "grad_norm": 2.84375,
      "learning_rate": 3.437723103603148e-05,
      "loss": 0.8403,
      "step": 326310
    },
    {
      "epoch": 1.1436702450171208,
      "grad_norm": 2.859375,
      "learning_rate": 3.437658200736777e-05,
      "loss": 0.9071,
      "step": 326320
    },
    {
      "epoch": 1.1437052925240163,
      "grad_norm": 3.4375,
      "learning_rate": 3.4375932978704075e-05,
      "loss": 0.8446,
      "step": 326330
    },
    {
      "epoch": 1.1437403400309119,
      "grad_norm": 2.828125,
      "learning_rate": 3.437528395004037e-05,
      "loss": 0.7451,
      "step": 326340
    },
    {
      "epoch": 1.1437753875378074,
      "grad_norm": 2.796875,
      "learning_rate": 3.437463492137667e-05,
      "loss": 0.9006,
      "step": 326350
    },
    {
      "epoch": 1.1438104350447031,
      "grad_norm": 2.625,
      "learning_rate": 3.4373985892712965e-05,
      "loss": 0.8308,
      "step": 326360
    },
    {
      "epoch": 1.1438454825515987,
      "grad_norm": 2.8125,
      "learning_rate": 3.437333686404927e-05,
      "loss": 0.8603,
      "step": 326370
    },
    {
      "epoch": 1.1438805300584942,
      "grad_norm": 2.859375,
      "learning_rate": 3.437268783538556e-05,
      "loss": 0.9774,
      "step": 326380
    },
    {
      "epoch": 1.14391557756539,
      "grad_norm": 2.6875,
      "learning_rate": 3.437203880672186e-05,
      "loss": 0.8378,
      "step": 326390
    },
    {
      "epoch": 1.1439506250722855,
      "grad_norm": 2.8125,
      "learning_rate": 3.437138977805816e-05,
      "loss": 0.8277,
      "step": 326400
    },
    {
      "epoch": 1.143985672579181,
      "grad_norm": 2.984375,
      "learning_rate": 3.437074074939446e-05,
      "loss": 0.8749,
      "step": 326410
    },
    {
      "epoch": 1.1440207200860766,
      "grad_norm": 2.65625,
      "learning_rate": 3.437009172073076e-05,
      "loss": 0.869,
      "step": 326420
    },
    {
      "epoch": 1.1440557675929723,
      "grad_norm": 2.734375,
      "learning_rate": 3.4369442692067055e-05,
      "loss": 0.775,
      "step": 326430
    },
    {
      "epoch": 1.1440908150998679,
      "grad_norm": 2.671875,
      "learning_rate": 3.4368793663403356e-05,
      "loss": 0.8494,
      "step": 326440
    },
    {
      "epoch": 1.1441258626067634,
      "grad_norm": 2.9375,
      "learning_rate": 3.436814463473965e-05,
      "loss": 0.8719,
      "step": 326450
    },
    {
      "epoch": 1.144160910113659,
      "grad_norm": 3.0,
      "learning_rate": 3.436749560607595e-05,
      "loss": 0.9535,
      "step": 326460
    },
    {
      "epoch": 1.1441959576205547,
      "grad_norm": 3.859375,
      "learning_rate": 3.436684657741225e-05,
      "loss": 0.9309,
      "step": 326470
    },
    {
      "epoch": 1.1442310051274502,
      "grad_norm": 2.5,
      "learning_rate": 3.436619754874855e-05,
      "loss": 0.8187,
      "step": 326480
    },
    {
      "epoch": 1.1442660526343458,
      "grad_norm": 3.09375,
      "learning_rate": 3.436554852008484e-05,
      "loss": 0.863,
      "step": 326490
    },
    {
      "epoch": 1.1443011001412415,
      "grad_norm": 2.8125,
      "learning_rate": 3.4364899491421144e-05,
      "loss": 0.8692,
      "step": 326500
    },
    {
      "epoch": 1.144336147648137,
      "grad_norm": 2.59375,
      "learning_rate": 3.436425046275744e-05,
      "loss": 0.789,
      "step": 326510
    },
    {
      "epoch": 1.1443711951550326,
      "grad_norm": 2.796875,
      "learning_rate": 3.436360143409374e-05,
      "loss": 0.8783,
      "step": 326520
    },
    {
      "epoch": 1.1444062426619284,
      "grad_norm": 2.5625,
      "learning_rate": 3.4362952405430035e-05,
      "loss": 0.8411,
      "step": 326530
    },
    {
      "epoch": 1.144441290168824,
      "grad_norm": 3.21875,
      "learning_rate": 3.436230337676633e-05,
      "loss": 0.8959,
      "step": 326540
    },
    {
      "epoch": 1.1444763376757194,
      "grad_norm": 3.171875,
      "learning_rate": 3.436165434810263e-05,
      "loss": 0.8757,
      "step": 326550
    },
    {
      "epoch": 1.144511385182615,
      "grad_norm": 2.609375,
      "learning_rate": 3.4361005319438925e-05,
      "loss": 0.8998,
      "step": 326560
    },
    {
      "epoch": 1.1445464326895107,
      "grad_norm": 3.25,
      "learning_rate": 3.436035629077523e-05,
      "loss": 0.8409,
      "step": 326570
    },
    {
      "epoch": 1.1445814801964063,
      "grad_norm": 2.890625,
      "learning_rate": 3.435970726211152e-05,
      "loss": 0.8524,
      "step": 326580
    },
    {
      "epoch": 1.1446165277033018,
      "grad_norm": 2.421875,
      "learning_rate": 3.435905823344782e-05,
      "loss": 0.8482,
      "step": 326590
    },
    {
      "epoch": 1.1446515752101973,
      "grad_norm": 2.75,
      "learning_rate": 3.435840920478412e-05,
      "loss": 0.8644,
      "step": 326600
    },
    {
      "epoch": 1.144686622717093,
      "grad_norm": 2.953125,
      "learning_rate": 3.435776017612042e-05,
      "loss": 0.8478,
      "step": 326610
    },
    {
      "epoch": 1.1447216702239886,
      "grad_norm": 2.859375,
      "learning_rate": 3.435711114745671e-05,
      "loss": 0.8648,
      "step": 326620
    },
    {
      "epoch": 1.1447567177308842,
      "grad_norm": 3.15625,
      "learning_rate": 3.4356462118793015e-05,
      "loss": 0.8694,
      "step": 326630
    },
    {
      "epoch": 1.14479176523778,
      "grad_norm": 3.171875,
      "learning_rate": 3.4355813090129316e-05,
      "loss": 0.8944,
      "step": 326640
    },
    {
      "epoch": 1.1448268127446755,
      "grad_norm": 3.046875,
      "learning_rate": 3.435516406146561e-05,
      "loss": 0.8884,
      "step": 326650
    },
    {
      "epoch": 1.144861860251571,
      "grad_norm": 2.640625,
      "learning_rate": 3.435451503280191e-05,
      "loss": 0.8215,
      "step": 326660
    },
    {
      "epoch": 1.1448969077584665,
      "grad_norm": 3.28125,
      "learning_rate": 3.435386600413821e-05,
      "loss": 0.9114,
      "step": 326670
    },
    {
      "epoch": 1.1449319552653623,
      "grad_norm": 2.9375,
      "learning_rate": 3.435321697547451e-05,
      "loss": 0.8119,
      "step": 326680
    },
    {
      "epoch": 1.1449670027722578,
      "grad_norm": 2.671875,
      "learning_rate": 3.43525679468108e-05,
      "loss": 0.8514,
      "step": 326690
    },
    {
      "epoch": 1.1450020502791534,
      "grad_norm": 3.125,
      "learning_rate": 3.4351918918147104e-05,
      "loss": 0.8465,
      "step": 326700
    },
    {
      "epoch": 1.145037097786049,
      "grad_norm": 2.640625,
      "learning_rate": 3.43512698894834e-05,
      "loss": 0.8511,
      "step": 326710
    },
    {
      "epoch": 1.1450721452929447,
      "grad_norm": 2.921875,
      "learning_rate": 3.43506208608197e-05,
      "loss": 0.8948,
      "step": 326720
    },
    {
      "epoch": 1.1451071927998402,
      "grad_norm": 3.140625,
      "learning_rate": 3.4349971832155995e-05,
      "loss": 0.8875,
      "step": 326730
    },
    {
      "epoch": 1.1451422403067357,
      "grad_norm": 2.765625,
      "learning_rate": 3.4349322803492296e-05,
      "loss": 0.9263,
      "step": 326740
    },
    {
      "epoch": 1.1451772878136315,
      "grad_norm": 3.0625,
      "learning_rate": 3.434867377482859e-05,
      "loss": 0.8763,
      "step": 326750
    },
    {
      "epoch": 1.145212335320527,
      "grad_norm": 3.046875,
      "learning_rate": 3.434802474616489e-05,
      "loss": 0.9243,
      "step": 326760
    },
    {
      "epoch": 1.1452473828274226,
      "grad_norm": 3.125,
      "learning_rate": 3.434737571750119e-05,
      "loss": 0.861,
      "step": 326770
    },
    {
      "epoch": 1.145282430334318,
      "grad_norm": 2.65625,
      "learning_rate": 3.434672668883749e-05,
      "loss": 0.8183,
      "step": 326780
    },
    {
      "epoch": 1.1453174778412138,
      "grad_norm": 2.953125,
      "learning_rate": 3.434607766017379e-05,
      "loss": 0.8807,
      "step": 326790
    },
    {
      "epoch": 1.1453525253481094,
      "grad_norm": 3.234375,
      "learning_rate": 3.4345428631510084e-05,
      "loss": 0.8723,
      "step": 326800
    },
    {
      "epoch": 1.145387572855005,
      "grad_norm": 2.8125,
      "learning_rate": 3.4344779602846386e-05,
      "loss": 0.8212,
      "step": 326810
    },
    {
      "epoch": 1.1454226203619005,
      "grad_norm": 3.359375,
      "learning_rate": 3.434413057418268e-05,
      "loss": 0.9458,
      "step": 326820
    },
    {
      "epoch": 1.1454576678687962,
      "grad_norm": 2.859375,
      "learning_rate": 3.434348154551898e-05,
      "loss": 0.88,
      "step": 326830
    },
    {
      "epoch": 1.1454927153756918,
      "grad_norm": 2.484375,
      "learning_rate": 3.4342832516855276e-05,
      "loss": 0.8246,
      "step": 326840
    },
    {
      "epoch": 1.1455277628825873,
      "grad_norm": 3.078125,
      "learning_rate": 3.434218348819158e-05,
      "loss": 0.8558,
      "step": 326850
    },
    {
      "epoch": 1.145562810389483,
      "grad_norm": 3.0,
      "learning_rate": 3.434153445952787e-05,
      "loss": 0.8139,
      "step": 326860
    },
    {
      "epoch": 1.1455978578963786,
      "grad_norm": 2.78125,
      "learning_rate": 3.4340885430864174e-05,
      "loss": 0.8746,
      "step": 326870
    },
    {
      "epoch": 1.1456329054032741,
      "grad_norm": 3.28125,
      "learning_rate": 3.434023640220047e-05,
      "loss": 0.8596,
      "step": 326880
    },
    {
      "epoch": 1.1456679529101697,
      "grad_norm": 2.71875,
      "learning_rate": 3.433958737353677e-05,
      "loss": 0.9673,
      "step": 326890
    },
    {
      "epoch": 1.1457030004170654,
      "grad_norm": 3.234375,
      "learning_rate": 3.4338938344873064e-05,
      "loss": 0.8808,
      "step": 326900
    },
    {
      "epoch": 1.145738047923961,
      "grad_norm": 2.796875,
      "learning_rate": 3.433828931620936e-05,
      "loss": 0.8819,
      "step": 326910
    },
    {
      "epoch": 1.1457730954308565,
      "grad_norm": 3.046875,
      "learning_rate": 3.433764028754566e-05,
      "loss": 0.8403,
      "step": 326920
    },
    {
      "epoch": 1.145808142937752,
      "grad_norm": 2.75,
      "learning_rate": 3.4336991258881955e-05,
      "loss": 0.8381,
      "step": 326930
    },
    {
      "epoch": 1.1458431904446478,
      "grad_norm": 2.578125,
      "learning_rate": 3.4336342230218256e-05,
      "loss": 0.9002,
      "step": 326940
    },
    {
      "epoch": 1.1458782379515433,
      "grad_norm": 3.25,
      "learning_rate": 3.433569320155455e-05,
      "loss": 0.8235,
      "step": 326950
    },
    {
      "epoch": 1.1459132854584388,
      "grad_norm": 2.71875,
      "learning_rate": 3.433504417289085e-05,
      "loss": 0.7809,
      "step": 326960
    },
    {
      "epoch": 1.1459483329653346,
      "grad_norm": 2.921875,
      "learning_rate": 3.433439514422715e-05,
      "loss": 0.9329,
      "step": 326970
    },
    {
      "epoch": 1.1459833804722301,
      "grad_norm": 2.609375,
      "learning_rate": 3.433374611556345e-05,
      "loss": 0.8995,
      "step": 326980
    },
    {
      "epoch": 1.1460184279791257,
      "grad_norm": 2.5,
      "learning_rate": 3.433309708689974e-05,
      "loss": 0.8357,
      "step": 326990
    },
    {
      "epoch": 1.1460534754860212,
      "grad_norm": 2.640625,
      "learning_rate": 3.4332448058236044e-05,
      "loss": 0.8186,
      "step": 327000
    },
    {
      "epoch": 1.146088522992917,
      "grad_norm": 2.625,
      "learning_rate": 3.4331799029572346e-05,
      "loss": 0.8459,
      "step": 327010
    },
    {
      "epoch": 1.1461235704998125,
      "grad_norm": 2.8125,
      "learning_rate": 3.433115000090864e-05,
      "loss": 0.8958,
      "step": 327020
    },
    {
      "epoch": 1.146158618006708,
      "grad_norm": 2.421875,
      "learning_rate": 3.433050097224494e-05,
      "loss": 0.8288,
      "step": 327030
    },
    {
      "epoch": 1.1461936655136036,
      "grad_norm": 2.15625,
      "learning_rate": 3.4329851943581236e-05,
      "loss": 0.8089,
      "step": 327040
    },
    {
      "epoch": 1.1462287130204993,
      "grad_norm": 2.953125,
      "learning_rate": 3.432920291491754e-05,
      "loss": 0.8911,
      "step": 327050
    },
    {
      "epoch": 1.1462637605273949,
      "grad_norm": 3.3125,
      "learning_rate": 3.432855388625383e-05,
      "loss": 0.9172,
      "step": 327060
    },
    {
      "epoch": 1.1462988080342904,
      "grad_norm": 3.03125,
      "learning_rate": 3.4327904857590134e-05,
      "loss": 0.8783,
      "step": 327070
    },
    {
      "epoch": 1.1463338555411862,
      "grad_norm": 3.078125,
      "learning_rate": 3.432725582892643e-05,
      "loss": 0.8868,
      "step": 327080
    },
    {
      "epoch": 1.1463689030480817,
      "grad_norm": 2.984375,
      "learning_rate": 3.432660680026273e-05,
      "loss": 0.8482,
      "step": 327090
    },
    {
      "epoch": 1.1464039505549772,
      "grad_norm": 3.515625,
      "learning_rate": 3.4325957771599024e-05,
      "loss": 0.9302,
      "step": 327100
    },
    {
      "epoch": 1.1464389980618728,
      "grad_norm": 2.71875,
      "learning_rate": 3.4325308742935326e-05,
      "loss": 0.9455,
      "step": 327110
    },
    {
      "epoch": 1.1464740455687685,
      "grad_norm": 3.1875,
      "learning_rate": 3.432465971427162e-05,
      "loss": 0.8464,
      "step": 327120
    },
    {
      "epoch": 1.146509093075664,
      "grad_norm": 2.65625,
      "learning_rate": 3.432401068560792e-05,
      "loss": 0.8114,
      "step": 327130
    },
    {
      "epoch": 1.1465441405825596,
      "grad_norm": 2.921875,
      "learning_rate": 3.432336165694422e-05,
      "loss": 0.9232,
      "step": 327140
    },
    {
      "epoch": 1.1465791880894551,
      "grad_norm": 2.6875,
      "learning_rate": 3.432271262828052e-05,
      "loss": 0.9934,
      "step": 327150
    },
    {
      "epoch": 1.146614235596351,
      "grad_norm": 3.171875,
      "learning_rate": 3.432206359961682e-05,
      "loss": 0.8519,
      "step": 327160
    },
    {
      "epoch": 1.1466492831032464,
      "grad_norm": 2.75,
      "learning_rate": 3.4321414570953114e-05,
      "loss": 0.8281,
      "step": 327170
    },
    {
      "epoch": 1.146684330610142,
      "grad_norm": 2.875,
      "learning_rate": 3.4320765542289415e-05,
      "loss": 0.9118,
      "step": 327180
    },
    {
      "epoch": 1.1467193781170377,
      "grad_norm": 2.8125,
      "learning_rate": 3.432011651362571e-05,
      "loss": 0.9071,
      "step": 327190
    },
    {
      "epoch": 1.1467544256239333,
      "grad_norm": 2.890625,
      "learning_rate": 3.431946748496201e-05,
      "loss": 0.8772,
      "step": 327200
    },
    {
      "epoch": 1.1467894731308288,
      "grad_norm": 2.921875,
      "learning_rate": 3.4318818456298306e-05,
      "loss": 0.8479,
      "step": 327210
    },
    {
      "epoch": 1.1468245206377246,
      "grad_norm": 2.96875,
      "learning_rate": 3.431816942763461e-05,
      "loss": 0.9147,
      "step": 327220
    },
    {
      "epoch": 1.14685956814462,
      "grad_norm": 2.921875,
      "learning_rate": 3.43175203989709e-05,
      "loss": 0.8845,
      "step": 327230
    },
    {
      "epoch": 1.1468946156515156,
      "grad_norm": 2.984375,
      "learning_rate": 3.43168713703072e-05,
      "loss": 0.822,
      "step": 327240
    },
    {
      "epoch": 1.1469296631584112,
      "grad_norm": 3.125,
      "learning_rate": 3.43162223416435e-05,
      "loss": 0.8683,
      "step": 327250
    },
    {
      "epoch": 1.146964710665307,
      "grad_norm": 2.765625,
      "learning_rate": 3.43155733129798e-05,
      "loss": 0.8957,
      "step": 327260
    },
    {
      "epoch": 1.1469997581722025,
      "grad_norm": 2.703125,
      "learning_rate": 3.4314924284316094e-05,
      "loss": 0.7984,
      "step": 327270
    },
    {
      "epoch": 1.147034805679098,
      "grad_norm": 2.578125,
      "learning_rate": 3.4314275255652395e-05,
      "loss": 0.8406,
      "step": 327280
    },
    {
      "epoch": 1.1470698531859935,
      "grad_norm": 2.984375,
      "learning_rate": 3.431362622698869e-05,
      "loss": 0.8245,
      "step": 327290
    },
    {
      "epoch": 1.1471049006928893,
      "grad_norm": 2.921875,
      "learning_rate": 3.4312977198324984e-05,
      "loss": 0.89,
      "step": 327300
    },
    {
      "epoch": 1.1471399481997848,
      "grad_norm": 2.65625,
      "learning_rate": 3.4312328169661286e-05,
      "loss": 0.8906,
      "step": 327310
    },
    {
      "epoch": 1.1471749957066804,
      "grad_norm": 2.65625,
      "learning_rate": 3.431167914099758e-05,
      "loss": 0.942,
      "step": 327320
    },
    {
      "epoch": 1.1472100432135761,
      "grad_norm": 2.5625,
      "learning_rate": 3.431103011233388e-05,
      "loss": 0.82,
      "step": 327330
    },
    {
      "epoch": 1.1472450907204716,
      "grad_norm": 3.21875,
      "learning_rate": 3.4310381083670176e-05,
      "loss": 0.8508,
      "step": 327340
    },
    {
      "epoch": 1.1472801382273672,
      "grad_norm": 3.140625,
      "learning_rate": 3.430973205500648e-05,
      "loss": 0.9768,
      "step": 327350
    },
    {
      "epoch": 1.1473151857342627,
      "grad_norm": 2.25,
      "learning_rate": 3.430908302634277e-05,
      "loss": 0.7926,
      "step": 327360
    },
    {
      "epoch": 1.1473502332411585,
      "grad_norm": 2.65625,
      "learning_rate": 3.4308433997679074e-05,
      "loss": 0.8444,
      "step": 327370
    },
    {
      "epoch": 1.147385280748054,
      "grad_norm": 2.671875,
      "learning_rate": 3.4307784969015375e-05,
      "loss": 0.7605,
      "step": 327380
    },
    {
      "epoch": 1.1474203282549496,
      "grad_norm": 2.625,
      "learning_rate": 3.430713594035167e-05,
      "loss": 0.8615,
      "step": 327390
    },
    {
      "epoch": 1.147455375761845,
      "grad_norm": 2.765625,
      "learning_rate": 3.430648691168797e-05,
      "loss": 0.8979,
      "step": 327400
    },
    {
      "epoch": 1.1474904232687408,
      "grad_norm": 2.65625,
      "learning_rate": 3.4305837883024266e-05,
      "loss": 0.9035,
      "step": 327410
    },
    {
      "epoch": 1.1475254707756364,
      "grad_norm": 3.109375,
      "learning_rate": 3.430518885436057e-05,
      "loss": 0.7654,
      "step": 327420
    },
    {
      "epoch": 1.147560518282532,
      "grad_norm": 2.90625,
      "learning_rate": 3.430453982569686e-05,
      "loss": 0.8494,
      "step": 327430
    },
    {
      "epoch": 1.1475955657894277,
      "grad_norm": 2.625,
      "learning_rate": 3.430389079703316e-05,
      "loss": 0.8409,
      "step": 327440
    },
    {
      "epoch": 1.1476306132963232,
      "grad_norm": 2.78125,
      "learning_rate": 3.430324176836946e-05,
      "loss": 0.8757,
      "step": 327450
    },
    {
      "epoch": 1.1476656608032187,
      "grad_norm": 2.828125,
      "learning_rate": 3.430259273970576e-05,
      "loss": 0.8703,
      "step": 327460
    },
    {
      "epoch": 1.1477007083101143,
      "grad_norm": 3.3125,
      "learning_rate": 3.4301943711042054e-05,
      "loss": 0.9059,
      "step": 327470
    },
    {
      "epoch": 1.14773575581701,
      "grad_norm": 2.546875,
      "learning_rate": 3.4301294682378355e-05,
      "loss": 0.8358,
      "step": 327480
    },
    {
      "epoch": 1.1477708033239056,
      "grad_norm": 2.96875,
      "learning_rate": 3.430064565371465e-05,
      "loss": 0.9447,
      "step": 327490
    },
    {
      "epoch": 1.147805850830801,
      "grad_norm": 3.09375,
      "learning_rate": 3.429999662505095e-05,
      "loss": 0.879,
      "step": 327500
    },
    {
      "epoch": 1.1478408983376966,
      "grad_norm": 2.609375,
      "learning_rate": 3.429934759638725e-05,
      "loss": 0.8645,
      "step": 327510
    },
    {
      "epoch": 1.1478759458445924,
      "grad_norm": 3.1875,
      "learning_rate": 3.429869856772355e-05,
      "loss": 0.8458,
      "step": 327520
    },
    {
      "epoch": 1.147910993351488,
      "grad_norm": 2.734375,
      "learning_rate": 3.429804953905985e-05,
      "loss": 0.8718,
      "step": 327530
    },
    {
      "epoch": 1.1479460408583835,
      "grad_norm": 3.0,
      "learning_rate": 3.429740051039614e-05,
      "loss": 0.8603,
      "step": 327540
    },
    {
      "epoch": 1.1479810883652792,
      "grad_norm": 2.859375,
      "learning_rate": 3.4296751481732444e-05,
      "loss": 0.7834,
      "step": 327550
    },
    {
      "epoch": 1.1480161358721748,
      "grad_norm": 2.703125,
      "learning_rate": 3.429610245306874e-05,
      "loss": 0.9002,
      "step": 327560
    },
    {
      "epoch": 1.1480511833790703,
      "grad_norm": 3.984375,
      "learning_rate": 3.429545342440504e-05,
      "loss": 0.8409,
      "step": 327570
    },
    {
      "epoch": 1.1480862308859658,
      "grad_norm": 2.984375,
      "learning_rate": 3.4294804395741335e-05,
      "loss": 0.8705,
      "step": 327580
    },
    {
      "epoch": 1.1481212783928616,
      "grad_norm": 3.25,
      "learning_rate": 3.4294155367077636e-05,
      "loss": 0.9589,
      "step": 327590
    },
    {
      "epoch": 1.1481563258997571,
      "grad_norm": 2.890625,
      "learning_rate": 3.429350633841393e-05,
      "loss": 0.8227,
      "step": 327600
    },
    {
      "epoch": 1.1481913734066527,
      "grad_norm": 2.75,
      "learning_rate": 3.429285730975023e-05,
      "loss": 0.7754,
      "step": 327610
    },
    {
      "epoch": 1.1482264209135482,
      "grad_norm": 2.734375,
      "learning_rate": 3.429220828108653e-05,
      "loss": 0.8053,
      "step": 327620
    },
    {
      "epoch": 1.148261468420444,
      "grad_norm": 2.59375,
      "learning_rate": 3.429155925242283e-05,
      "loss": 0.9102,
      "step": 327630
    },
    {
      "epoch": 1.1482965159273395,
      "grad_norm": 2.6875,
      "learning_rate": 3.429091022375912e-05,
      "loss": 0.8563,
      "step": 327640
    },
    {
      "epoch": 1.148331563434235,
      "grad_norm": 2.796875,
      "learning_rate": 3.4290261195095424e-05,
      "loss": 0.9,
      "step": 327650
    },
    {
      "epoch": 1.1483666109411308,
      "grad_norm": 2.71875,
      "learning_rate": 3.4289612166431726e-05,
      "loss": 0.9133,
      "step": 327660
    },
    {
      "epoch": 1.1484016584480263,
      "grad_norm": 2.828125,
      "learning_rate": 3.4288963137768014e-05,
      "loss": 0.8759,
      "step": 327670
    },
    {
      "epoch": 1.1484367059549219,
      "grad_norm": 2.890625,
      "learning_rate": 3.4288314109104315e-05,
      "loss": 0.9036,
      "step": 327680
    },
    {
      "epoch": 1.1484717534618174,
      "grad_norm": 3.21875,
      "learning_rate": 3.428766508044061e-05,
      "loss": 0.8825,
      "step": 327690
    },
    {
      "epoch": 1.1485068009687132,
      "grad_norm": 2.890625,
      "learning_rate": 3.428701605177691e-05,
      "loss": 0.9194,
      "step": 327700
    },
    {
      "epoch": 1.1485418484756087,
      "grad_norm": 2.890625,
      "learning_rate": 3.4286367023113206e-05,
      "loss": 0.9037,
      "step": 327710
    },
    {
      "epoch": 1.1485768959825042,
      "grad_norm": 2.359375,
      "learning_rate": 3.428571799444951e-05,
      "loss": 0.8269,
      "step": 327720
    },
    {
      "epoch": 1.1486119434893998,
      "grad_norm": 2.859375,
      "learning_rate": 3.42850689657858e-05,
      "loss": 0.8529,
      "step": 327730
    },
    {
      "epoch": 1.1486469909962955,
      "grad_norm": 2.75,
      "learning_rate": 3.42844199371221e-05,
      "loss": 0.9244,
      "step": 327740
    },
    {
      "epoch": 1.148682038503191,
      "grad_norm": 3.109375,
      "learning_rate": 3.4283770908458404e-05,
      "loss": 0.8557,
      "step": 327750
    },
    {
      "epoch": 1.1487170860100866,
      "grad_norm": 2.3125,
      "learning_rate": 3.42831218797947e-05,
      "loss": 0.8406,
      "step": 327760
    },
    {
      "epoch": 1.1487521335169824,
      "grad_norm": 3.171875,
      "learning_rate": 3.4282472851131e-05,
      "loss": 0.8845,
      "step": 327770
    },
    {
      "epoch": 1.148787181023878,
      "grad_norm": 3.1875,
      "learning_rate": 3.4281823822467295e-05,
      "loss": 0.8405,
      "step": 327780
    },
    {
      "epoch": 1.1488222285307734,
      "grad_norm": 2.828125,
      "learning_rate": 3.4281174793803596e-05,
      "loss": 0.9007,
      "step": 327790
    },
    {
      "epoch": 1.1488572760376692,
      "grad_norm": 3.015625,
      "learning_rate": 3.428052576513989e-05,
      "loss": 0.9017,
      "step": 327800
    },
    {
      "epoch": 1.1488923235445647,
      "grad_norm": 3.28125,
      "learning_rate": 3.427987673647619e-05,
      "loss": 0.849,
      "step": 327810
    },
    {
      "epoch": 1.1489273710514603,
      "grad_norm": 2.796875,
      "learning_rate": 3.427922770781249e-05,
      "loss": 0.8458,
      "step": 327820
    },
    {
      "epoch": 1.1489624185583558,
      "grad_norm": 3.34375,
      "learning_rate": 3.427857867914879e-05,
      "loss": 0.9137,
      "step": 327830
    },
    {
      "epoch": 1.1489974660652513,
      "grad_norm": 2.890625,
      "learning_rate": 3.427792965048508e-05,
      "loss": 0.895,
      "step": 327840
    },
    {
      "epoch": 1.149032513572147,
      "grad_norm": 2.796875,
      "learning_rate": 3.4277280621821384e-05,
      "loss": 0.8879,
      "step": 327850
    },
    {
      "epoch": 1.1490675610790426,
      "grad_norm": 2.53125,
      "learning_rate": 3.427663159315768e-05,
      "loss": 0.8554,
      "step": 327860
    },
    {
      "epoch": 1.1491026085859382,
      "grad_norm": 3.0625,
      "learning_rate": 3.427598256449398e-05,
      "loss": 0.7635,
      "step": 327870
    },
    {
      "epoch": 1.149137656092834,
      "grad_norm": 2.9375,
      "learning_rate": 3.427533353583028e-05,
      "loss": 0.8188,
      "step": 327880
    },
    {
      "epoch": 1.1491727035997295,
      "grad_norm": 3.296875,
      "learning_rate": 3.4274684507166576e-05,
      "loss": 0.9497,
      "step": 327890
    },
    {
      "epoch": 1.149207751106625,
      "grad_norm": 3.0625,
      "learning_rate": 3.427403547850288e-05,
      "loss": 0.8543,
      "step": 327900
    },
    {
      "epoch": 1.1492427986135207,
      "grad_norm": 2.796875,
      "learning_rate": 3.427338644983917e-05,
      "loss": 0.904,
      "step": 327910
    },
    {
      "epoch": 1.1492778461204163,
      "grad_norm": 3.015625,
      "learning_rate": 3.4272737421175474e-05,
      "loss": 0.7901,
      "step": 327920
    },
    {
      "epoch": 1.1493128936273118,
      "grad_norm": 2.65625,
      "learning_rate": 3.427208839251177e-05,
      "loss": 0.779,
      "step": 327930
    },
    {
      "epoch": 1.1493479411342074,
      "grad_norm": 2.59375,
      "learning_rate": 3.427143936384807e-05,
      "loss": 0.8531,
      "step": 327940
    },
    {
      "epoch": 1.149382988641103,
      "grad_norm": 3.0,
      "learning_rate": 3.4270790335184364e-05,
      "loss": 0.8648,
      "step": 327950
    },
    {
      "epoch": 1.1494180361479986,
      "grad_norm": 2.9375,
      "learning_rate": 3.4270141306520666e-05,
      "loss": 0.8531,
      "step": 327960
    },
    {
      "epoch": 1.1494530836548942,
      "grad_norm": 2.421875,
      "learning_rate": 3.426949227785696e-05,
      "loss": 0.7583,
      "step": 327970
    },
    {
      "epoch": 1.1494881311617897,
      "grad_norm": 2.53125,
      "learning_rate": 3.426884324919326e-05,
      "loss": 0.8282,
      "step": 327980
    },
    {
      "epoch": 1.1495231786686855,
      "grad_norm": 3.296875,
      "learning_rate": 3.4268194220529556e-05,
      "loss": 0.8763,
      "step": 327990
    },
    {
      "epoch": 1.149558226175581,
      "grad_norm": 2.78125,
      "learning_rate": 3.426754519186586e-05,
      "loss": 0.8425,
      "step": 328000
    },
    {
      "epoch": 1.1495932736824765,
      "grad_norm": 2.796875,
      "learning_rate": 3.426689616320215e-05,
      "loss": 0.8355,
      "step": 328010
    },
    {
      "epoch": 1.1496283211893723,
      "grad_norm": 2.71875,
      "learning_rate": 3.4266247134538454e-05,
      "loss": 0.842,
      "step": 328020
    },
    {
      "epoch": 1.1496633686962678,
      "grad_norm": 3.125,
      "learning_rate": 3.4265598105874755e-05,
      "loss": 0.8844,
      "step": 328030
    },
    {
      "epoch": 1.1496984162031634,
      "grad_norm": 2.765625,
      "learning_rate": 3.426494907721104e-05,
      "loss": 0.8397,
      "step": 328040
    },
    {
      "epoch": 1.149733463710059,
      "grad_norm": 3.078125,
      "learning_rate": 3.4264300048547344e-05,
      "loss": 0.8708,
      "step": 328050
    },
    {
      "epoch": 1.1497685112169547,
      "grad_norm": 2.78125,
      "learning_rate": 3.426365101988364e-05,
      "loss": 0.8473,
      "step": 328060
    },
    {
      "epoch": 1.1498035587238502,
      "grad_norm": 2.828125,
      "learning_rate": 3.426300199121994e-05,
      "loss": 0.7966,
      "step": 328070
    },
    {
      "epoch": 1.1498386062307457,
      "grad_norm": 3.078125,
      "learning_rate": 3.4262352962556235e-05,
      "loss": 0.7778,
      "step": 328080
    },
    {
      "epoch": 1.1498736537376413,
      "grad_norm": 3.3125,
      "learning_rate": 3.4261703933892536e-05,
      "loss": 0.9274,
      "step": 328090
    },
    {
      "epoch": 1.149908701244537,
      "grad_norm": 2.375,
      "learning_rate": 3.426105490522884e-05,
      "loss": 0.8335,
      "step": 328100
    },
    {
      "epoch": 1.1499437487514326,
      "grad_norm": 3.109375,
      "learning_rate": 3.426040587656513e-05,
      "loss": 0.9099,
      "step": 328110
    },
    {
      "epoch": 1.149978796258328,
      "grad_norm": 2.875,
      "learning_rate": 3.4259756847901434e-05,
      "loss": 0.7834,
      "step": 328120
    },
    {
      "epoch": 1.1500138437652239,
      "grad_norm": 3.125,
      "learning_rate": 3.425910781923773e-05,
      "loss": 0.7634,
      "step": 328130
    },
    {
      "epoch": 1.1500488912721194,
      "grad_norm": 3.078125,
      "learning_rate": 3.425845879057403e-05,
      "loss": 0.9406,
      "step": 328140
    },
    {
      "epoch": 1.150083938779015,
      "grad_norm": 3.59375,
      "learning_rate": 3.4257809761910324e-05,
      "loss": 0.8723,
      "step": 328150
    },
    {
      "epoch": 1.1501189862859105,
      "grad_norm": 2.671875,
      "learning_rate": 3.4257160733246626e-05,
      "loss": 0.9067,
      "step": 328160
    },
    {
      "epoch": 1.1501540337928062,
      "grad_norm": 3.453125,
      "learning_rate": 3.425651170458292e-05,
      "loss": 0.8661,
      "step": 328170
    },
    {
      "epoch": 1.1501890812997018,
      "grad_norm": 3.0625,
      "learning_rate": 3.425586267591922e-05,
      "loss": 0.8505,
      "step": 328180
    },
    {
      "epoch": 1.1502241288065973,
      "grad_norm": 3.546875,
      "learning_rate": 3.4255213647255516e-05,
      "loss": 0.9279,
      "step": 328190
    },
    {
      "epoch": 1.1502591763134928,
      "grad_norm": 2.859375,
      "learning_rate": 3.425456461859182e-05,
      "loss": 0.9044,
      "step": 328200
    },
    {
      "epoch": 1.1502942238203886,
      "grad_norm": 2.859375,
      "learning_rate": 3.425391558992811e-05,
      "loss": 0.8862,
      "step": 328210
    },
    {
      "epoch": 1.1503292713272841,
      "grad_norm": 3.0,
      "learning_rate": 3.4253266561264414e-05,
      "loss": 0.8917,
      "step": 328220
    },
    {
      "epoch": 1.1503643188341797,
      "grad_norm": 3.234375,
      "learning_rate": 3.425261753260071e-05,
      "loss": 0.9341,
      "step": 328230
    },
    {
      "epoch": 1.1503993663410754,
      "grad_norm": 2.875,
      "learning_rate": 3.425196850393701e-05,
      "loss": 0.8907,
      "step": 328240
    },
    {
      "epoch": 1.150434413847971,
      "grad_norm": 2.828125,
      "learning_rate": 3.425131947527331e-05,
      "loss": 0.8499,
      "step": 328250
    },
    {
      "epoch": 1.1504694613548665,
      "grad_norm": 3.125,
      "learning_rate": 3.4250670446609606e-05,
      "loss": 0.8825,
      "step": 328260
    },
    {
      "epoch": 1.150504508861762,
      "grad_norm": 2.984375,
      "learning_rate": 3.425002141794591e-05,
      "loss": 0.9074,
      "step": 328270
    },
    {
      "epoch": 1.1505395563686578,
      "grad_norm": 2.28125,
      "learning_rate": 3.42493723892822e-05,
      "loss": 0.7542,
      "step": 328280
    },
    {
      "epoch": 1.1505746038755533,
      "grad_norm": 2.5625,
      "learning_rate": 3.42487233606185e-05,
      "loss": 0.7588,
      "step": 328290
    },
    {
      "epoch": 1.1506096513824489,
      "grad_norm": 3.015625,
      "learning_rate": 3.42480743319548e-05,
      "loss": 0.8612,
      "step": 328300
    },
    {
      "epoch": 1.1506446988893444,
      "grad_norm": 2.921875,
      "learning_rate": 3.42474253032911e-05,
      "loss": 0.8401,
      "step": 328310
    },
    {
      "epoch": 1.1506797463962402,
      "grad_norm": 2.890625,
      "learning_rate": 3.4246776274627394e-05,
      "loss": 0.8296,
      "step": 328320
    },
    {
      "epoch": 1.1507147939031357,
      "grad_norm": 2.765625,
      "learning_rate": 3.4246127245963695e-05,
      "loss": 0.8768,
      "step": 328330
    },
    {
      "epoch": 1.1507498414100312,
      "grad_norm": 2.828125,
      "learning_rate": 3.424547821729999e-05,
      "loss": 0.7668,
      "step": 328340
    },
    {
      "epoch": 1.150784888916927,
      "grad_norm": 2.59375,
      "learning_rate": 3.424482918863629e-05,
      "loss": 0.7947,
      "step": 328350
    },
    {
      "epoch": 1.1508199364238225,
      "grad_norm": 2.53125,
      "learning_rate": 3.4244180159972586e-05,
      "loss": 0.8438,
      "step": 328360
    },
    {
      "epoch": 1.150854983930718,
      "grad_norm": 3.171875,
      "learning_rate": 3.424353113130889e-05,
      "loss": 0.8698,
      "step": 328370
    },
    {
      "epoch": 1.1508900314376136,
      "grad_norm": 3.234375,
      "learning_rate": 3.424288210264519e-05,
      "loss": 0.8264,
      "step": 328380
    },
    {
      "epoch": 1.1509250789445093,
      "grad_norm": 3.0625,
      "learning_rate": 3.424223307398148e-05,
      "loss": 0.7961,
      "step": 328390
    },
    {
      "epoch": 1.1509601264514049,
      "grad_norm": 3.015625,
      "learning_rate": 3.4241584045317784e-05,
      "loss": 0.9232,
      "step": 328400
    },
    {
      "epoch": 1.1509951739583004,
      "grad_norm": 2.59375,
      "learning_rate": 3.424093501665408e-05,
      "loss": 0.8949,
      "step": 328410
    },
    {
      "epoch": 1.151030221465196,
      "grad_norm": 2.625,
      "learning_rate": 3.4240285987990374e-05,
      "loss": 0.8306,
      "step": 328420
    },
    {
      "epoch": 1.1510652689720917,
      "grad_norm": 3.125,
      "learning_rate": 3.423963695932667e-05,
      "loss": 0.9531,
      "step": 328430
    },
    {
      "epoch": 1.1511003164789873,
      "grad_norm": 2.703125,
      "learning_rate": 3.423898793066297e-05,
      "loss": 0.8629,
      "step": 328440
    },
    {
      "epoch": 1.1511353639858828,
      "grad_norm": 3.0625,
      "learning_rate": 3.4238338901999264e-05,
      "loss": 0.8593,
      "step": 328450
    },
    {
      "epoch": 1.1511704114927785,
      "grad_norm": 3.015625,
      "learning_rate": 3.4237689873335566e-05,
      "loss": 0.8533,
      "step": 328460
    },
    {
      "epoch": 1.151205458999674,
      "grad_norm": 3.078125,
      "learning_rate": 3.423704084467187e-05,
      "loss": 0.7752,
      "step": 328470
    },
    {
      "epoch": 1.1512405065065696,
      "grad_norm": 2.40625,
      "learning_rate": 3.423639181600816e-05,
      "loss": 0.8536,
      "step": 328480
    },
    {
      "epoch": 1.1512755540134654,
      "grad_norm": 2.640625,
      "learning_rate": 3.423574278734446e-05,
      "loss": 0.8892,
      "step": 328490
    },
    {
      "epoch": 1.151310601520361,
      "grad_norm": 2.5625,
      "learning_rate": 3.423509375868076e-05,
      "loss": 0.825,
      "step": 328500
    },
    {
      "epoch": 1.1513456490272564,
      "grad_norm": 2.984375,
      "learning_rate": 3.423444473001706e-05,
      "loss": 0.8824,
      "step": 328510
    },
    {
      "epoch": 1.151380696534152,
      "grad_norm": 2.578125,
      "learning_rate": 3.4233795701353354e-05,
      "loss": 0.8575,
      "step": 328520
    },
    {
      "epoch": 1.1514157440410475,
      "grad_norm": 2.6875,
      "learning_rate": 3.4233146672689655e-05,
      "loss": 0.9623,
      "step": 328530
    },
    {
      "epoch": 1.1514507915479433,
      "grad_norm": 3.140625,
      "learning_rate": 3.423249764402595e-05,
      "loss": 0.9103,
      "step": 328540
    },
    {
      "epoch": 1.1514858390548388,
      "grad_norm": 2.890625,
      "learning_rate": 3.423184861536225e-05,
      "loss": 0.8584,
      "step": 328550
    },
    {
      "epoch": 1.1515208865617343,
      "grad_norm": 3.09375,
      "learning_rate": 3.4231199586698546e-05,
      "loss": 0.8093,
      "step": 328560
    },
    {
      "epoch": 1.15155593406863,
      "grad_norm": 2.671875,
      "learning_rate": 3.423055055803485e-05,
      "loss": 0.8729,
      "step": 328570
    },
    {
      "epoch": 1.1515909815755256,
      "grad_norm": 2.875,
      "learning_rate": 3.422990152937114e-05,
      "loss": 0.8707,
      "step": 328580
    },
    {
      "epoch": 1.1516260290824212,
      "grad_norm": 3.15625,
      "learning_rate": 3.422925250070744e-05,
      "loss": 0.8635,
      "step": 328590
    },
    {
      "epoch": 1.151661076589317,
      "grad_norm": 2.59375,
      "learning_rate": 3.422860347204374e-05,
      "loss": 0.8725,
      "step": 328600
    },
    {
      "epoch": 1.1516961240962125,
      "grad_norm": 3.1875,
      "learning_rate": 3.422795444338004e-05,
      "loss": 0.9021,
      "step": 328610
    },
    {
      "epoch": 1.151731171603108,
      "grad_norm": 2.84375,
      "learning_rate": 3.422730541471634e-05,
      "loss": 0.9034,
      "step": 328620
    },
    {
      "epoch": 1.1517662191100035,
      "grad_norm": 3.15625,
      "learning_rate": 3.4226656386052635e-05,
      "loss": 0.8719,
      "step": 328630
    },
    {
      "epoch": 1.1518012666168993,
      "grad_norm": 2.84375,
      "learning_rate": 3.4226007357388936e-05,
      "loss": 0.717,
      "step": 328640
    },
    {
      "epoch": 1.1518363141237948,
      "grad_norm": 3.265625,
      "learning_rate": 3.422535832872523e-05,
      "loss": 0.929,
      "step": 328650
    },
    {
      "epoch": 1.1518713616306904,
      "grad_norm": 2.8125,
      "learning_rate": 3.422470930006153e-05,
      "loss": 0.826,
      "step": 328660
    },
    {
      "epoch": 1.151906409137586,
      "grad_norm": 2.5625,
      "learning_rate": 3.422406027139783e-05,
      "loss": 0.9316,
      "step": 328670
    },
    {
      "epoch": 1.1519414566444817,
      "grad_norm": 2.640625,
      "learning_rate": 3.422341124273413e-05,
      "loss": 0.8107,
      "step": 328680
    },
    {
      "epoch": 1.1519765041513772,
      "grad_norm": 2.640625,
      "learning_rate": 3.422276221407042e-05,
      "loss": 0.9002,
      "step": 328690
    },
    {
      "epoch": 1.1520115516582727,
      "grad_norm": 2.40625,
      "learning_rate": 3.4222113185406724e-05,
      "loss": 0.811,
      "step": 328700
    },
    {
      "epoch": 1.1520465991651685,
      "grad_norm": 3.0,
      "learning_rate": 3.422146415674302e-05,
      "loss": 0.8711,
      "step": 328710
    },
    {
      "epoch": 1.152081646672064,
      "grad_norm": 3.078125,
      "learning_rate": 3.422081512807932e-05,
      "loss": 0.8079,
      "step": 328720
    },
    {
      "epoch": 1.1521166941789596,
      "grad_norm": 3.265625,
      "learning_rate": 3.4220166099415615e-05,
      "loss": 0.9135,
      "step": 328730
    },
    {
      "epoch": 1.152151741685855,
      "grad_norm": 2.890625,
      "learning_rate": 3.4219517070751916e-05,
      "loss": 0.8921,
      "step": 328740
    },
    {
      "epoch": 1.1521867891927509,
      "grad_norm": 2.84375,
      "learning_rate": 3.421886804208822e-05,
      "loss": 0.8497,
      "step": 328750
    },
    {
      "epoch": 1.1522218366996464,
      "grad_norm": 2.984375,
      "learning_rate": 3.421821901342451e-05,
      "loss": 0.8557,
      "step": 328760
    },
    {
      "epoch": 1.152256884206542,
      "grad_norm": 3.28125,
      "learning_rate": 3.4217569984760814e-05,
      "loss": 0.7895,
      "step": 328770
    },
    {
      "epoch": 1.1522919317134375,
      "grad_norm": 2.71875,
      "learning_rate": 3.421692095609711e-05,
      "loss": 0.8317,
      "step": 328780
    },
    {
      "epoch": 1.1523269792203332,
      "grad_norm": 3.515625,
      "learning_rate": 3.42162719274334e-05,
      "loss": 0.9416,
      "step": 328790
    },
    {
      "epoch": 1.1523620267272288,
      "grad_norm": 2.828125,
      "learning_rate": 3.42156228987697e-05,
      "loss": 0.8721,
      "step": 328800
    },
    {
      "epoch": 1.1523970742341243,
      "grad_norm": 3.34375,
      "learning_rate": 3.4214973870106e-05,
      "loss": 0.9277,
      "step": 328810
    },
    {
      "epoch": 1.15243212174102,
      "grad_norm": 2.796875,
      "learning_rate": 3.4214324841442294e-05,
      "loss": 0.8108,
      "step": 328820
    },
    {
      "epoch": 1.1524671692479156,
      "grad_norm": 3.0625,
      "learning_rate": 3.4213675812778595e-05,
      "loss": 0.8785,
      "step": 328830
    },
    {
      "epoch": 1.1525022167548111,
      "grad_norm": 3.0625,
      "learning_rate": 3.4213026784114896e-05,
      "loss": 0.8102,
      "step": 328840
    },
    {
      "epoch": 1.1525372642617067,
      "grad_norm": 2.734375,
      "learning_rate": 3.421237775545119e-05,
      "loss": 0.8614,
      "step": 328850
    },
    {
      "epoch": 1.1525723117686024,
      "grad_norm": 2.6875,
      "learning_rate": 3.421172872678749e-05,
      "loss": 0.7944,
      "step": 328860
    },
    {
      "epoch": 1.152607359275498,
      "grad_norm": 2.8125,
      "learning_rate": 3.421107969812379e-05,
      "loss": 0.8618,
      "step": 328870
    },
    {
      "epoch": 1.1526424067823935,
      "grad_norm": 2.421875,
      "learning_rate": 3.421043066946009e-05,
      "loss": 0.8267,
      "step": 328880
    },
    {
      "epoch": 1.152677454289289,
      "grad_norm": 2.921875,
      "learning_rate": 3.420978164079638e-05,
      "loss": 0.8428,
      "step": 328890
    },
    {
      "epoch": 1.1527125017961848,
      "grad_norm": 2.53125,
      "learning_rate": 3.4209132612132684e-05,
      "loss": 0.8645,
      "step": 328900
    },
    {
      "epoch": 1.1527475493030803,
      "grad_norm": 2.6875,
      "learning_rate": 3.420848358346898e-05,
      "loss": 0.8446,
      "step": 328910
    },
    {
      "epoch": 1.1527825968099759,
      "grad_norm": 2.734375,
      "learning_rate": 3.420783455480528e-05,
      "loss": 0.8995,
      "step": 328920
    },
    {
      "epoch": 1.1528176443168716,
      "grad_norm": 2.6875,
      "learning_rate": 3.4207185526141575e-05,
      "loss": 0.8587,
      "step": 328930
    },
    {
      "epoch": 1.1528526918237672,
      "grad_norm": 3.0625,
      "learning_rate": 3.4206536497477876e-05,
      "loss": 0.8386,
      "step": 328940
    },
    {
      "epoch": 1.1528877393306627,
      "grad_norm": 3.046875,
      "learning_rate": 3.420588746881417e-05,
      "loss": 0.8563,
      "step": 328950
    },
    {
      "epoch": 1.1529227868375582,
      "grad_norm": 2.96875,
      "learning_rate": 3.420523844015047e-05,
      "loss": 0.8828,
      "step": 328960
    },
    {
      "epoch": 1.152957834344454,
      "grad_norm": 2.734375,
      "learning_rate": 3.420458941148677e-05,
      "loss": 0.9047,
      "step": 328970
    },
    {
      "epoch": 1.1529928818513495,
      "grad_norm": 2.5,
      "learning_rate": 3.420394038282307e-05,
      "loss": 0.7412,
      "step": 328980
    },
    {
      "epoch": 1.153027929358245,
      "grad_norm": 2.703125,
      "learning_rate": 3.420329135415937e-05,
      "loss": 0.8189,
      "step": 328990
    },
    {
      "epoch": 1.1530629768651406,
      "grad_norm": 2.96875,
      "learning_rate": 3.4202642325495664e-05,
      "loss": 0.7722,
      "step": 329000
    },
    {
      "epoch": 1.1530980243720363,
      "grad_norm": 3.078125,
      "learning_rate": 3.4201993296831966e-05,
      "loss": 0.9116,
      "step": 329010
    },
    {
      "epoch": 1.1531330718789319,
      "grad_norm": 2.90625,
      "learning_rate": 3.420134426816826e-05,
      "loss": 0.8791,
      "step": 329020
    },
    {
      "epoch": 1.1531681193858274,
      "grad_norm": 3.03125,
      "learning_rate": 3.420069523950456e-05,
      "loss": 0.8844,
      "step": 329030
    },
    {
      "epoch": 1.1532031668927232,
      "grad_norm": 2.5,
      "learning_rate": 3.4200046210840856e-05,
      "loss": 0.8981,
      "step": 329040
    },
    {
      "epoch": 1.1532382143996187,
      "grad_norm": 3.328125,
      "learning_rate": 3.419939718217716e-05,
      "loss": 0.8657,
      "step": 329050
    },
    {
      "epoch": 1.1532732619065142,
      "grad_norm": 2.703125,
      "learning_rate": 3.419874815351345e-05,
      "loss": 0.8974,
      "step": 329060
    },
    {
      "epoch": 1.1533083094134098,
      "grad_norm": 2.8125,
      "learning_rate": 3.4198099124849754e-05,
      "loss": 0.8788,
      "step": 329070
    },
    {
      "epoch": 1.1533433569203055,
      "grad_norm": 2.484375,
      "learning_rate": 3.419745009618605e-05,
      "loss": 0.849,
      "step": 329080
    },
    {
      "epoch": 1.153378404427201,
      "grad_norm": 2.78125,
      "learning_rate": 3.419680106752235e-05,
      "loss": 0.8498,
      "step": 329090
    },
    {
      "epoch": 1.1534134519340966,
      "grad_norm": 2.75,
      "learning_rate": 3.4196152038858644e-05,
      "loss": 0.8594,
      "step": 329100
    },
    {
      "epoch": 1.1534484994409921,
      "grad_norm": 3.09375,
      "learning_rate": 3.4195503010194946e-05,
      "loss": 0.8846,
      "step": 329110
    },
    {
      "epoch": 1.153483546947888,
      "grad_norm": 3.046875,
      "learning_rate": 3.419485398153125e-05,
      "loss": 0.8399,
      "step": 329120
    },
    {
      "epoch": 1.1535185944547834,
      "grad_norm": 2.375,
      "learning_rate": 3.419420495286754e-05,
      "loss": 0.9097,
      "step": 329130
    },
    {
      "epoch": 1.153553641961679,
      "grad_norm": 3.078125,
      "learning_rate": 3.419355592420384e-05,
      "loss": 0.869,
      "step": 329140
    },
    {
      "epoch": 1.1535886894685747,
      "grad_norm": 3.0625,
      "learning_rate": 3.419290689554014e-05,
      "loss": 0.9095,
      "step": 329150
    },
    {
      "epoch": 1.1536237369754703,
      "grad_norm": 3.203125,
      "learning_rate": 3.419225786687644e-05,
      "loss": 0.8403,
      "step": 329160
    },
    {
      "epoch": 1.1536587844823658,
      "grad_norm": 3.09375,
      "learning_rate": 3.419160883821273e-05,
      "loss": 0.8535,
      "step": 329170
    },
    {
      "epoch": 1.1536938319892616,
      "grad_norm": 3.03125,
      "learning_rate": 3.419095980954903e-05,
      "loss": 0.8673,
      "step": 329180
    },
    {
      "epoch": 1.153728879496157,
      "grad_norm": 2.859375,
      "learning_rate": 3.419031078088532e-05,
      "loss": 0.9054,
      "step": 329190
    },
    {
      "epoch": 1.1537639270030526,
      "grad_norm": 2.90625,
      "learning_rate": 3.4189661752221624e-05,
      "loss": 0.9574,
      "step": 329200
    },
    {
      "epoch": 1.1537989745099482,
      "grad_norm": 2.890625,
      "learning_rate": 3.4189012723557926e-05,
      "loss": 0.8963,
      "step": 329210
    },
    {
      "epoch": 1.1538340220168437,
      "grad_norm": 2.765625,
      "learning_rate": 3.418836369489422e-05,
      "loss": 0.8749,
      "step": 329220
    },
    {
      "epoch": 1.1538690695237395,
      "grad_norm": 3.046875,
      "learning_rate": 3.418771466623052e-05,
      "loss": 0.8376,
      "step": 329230
    },
    {
      "epoch": 1.153904117030635,
      "grad_norm": 2.9375,
      "learning_rate": 3.4187065637566816e-05,
      "loss": 0.8659,
      "step": 329240
    },
    {
      "epoch": 1.1539391645375305,
      "grad_norm": 3.296875,
      "learning_rate": 3.418641660890312e-05,
      "loss": 0.8586,
      "step": 329250
    },
    {
      "epoch": 1.1539742120444263,
      "grad_norm": 2.90625,
      "learning_rate": 3.418576758023941e-05,
      "loss": 0.9242,
      "step": 329260
    },
    {
      "epoch": 1.1540092595513218,
      "grad_norm": 2.84375,
      "learning_rate": 3.4185118551575714e-05,
      "loss": 0.8489,
      "step": 329270
    },
    {
      "epoch": 1.1540443070582174,
      "grad_norm": 3.203125,
      "learning_rate": 3.418446952291201e-05,
      "loss": 0.8336,
      "step": 329280
    },
    {
      "epoch": 1.1540793545651131,
      "grad_norm": 2.71875,
      "learning_rate": 3.418382049424831e-05,
      "loss": 0.8531,
      "step": 329290
    },
    {
      "epoch": 1.1541144020720087,
      "grad_norm": 3.09375,
      "learning_rate": 3.4183171465584604e-05,
      "loss": 0.8987,
      "step": 329300
    },
    {
      "epoch": 1.1541494495789042,
      "grad_norm": 2.59375,
      "learning_rate": 3.4182522436920906e-05,
      "loss": 0.8125,
      "step": 329310
    },
    {
      "epoch": 1.1541844970857997,
      "grad_norm": 3.046875,
      "learning_rate": 3.41818734082572e-05,
      "loss": 0.8618,
      "step": 329320
    },
    {
      "epoch": 1.1542195445926955,
      "grad_norm": 2.828125,
      "learning_rate": 3.41812243795935e-05,
      "loss": 0.8587,
      "step": 329330
    },
    {
      "epoch": 1.154254592099591,
      "grad_norm": 2.734375,
      "learning_rate": 3.41805753509298e-05,
      "loss": 0.8721,
      "step": 329340
    },
    {
      "epoch": 1.1542896396064866,
      "grad_norm": 3.046875,
      "learning_rate": 3.41799263222661e-05,
      "loss": 0.9457,
      "step": 329350
    },
    {
      "epoch": 1.154324687113382,
      "grad_norm": 2.703125,
      "learning_rate": 3.41792772936024e-05,
      "loss": 0.7741,
      "step": 329360
    },
    {
      "epoch": 1.1543597346202779,
      "grad_norm": 2.96875,
      "learning_rate": 3.4178628264938694e-05,
      "loss": 0.8784,
      "step": 329370
    },
    {
      "epoch": 1.1543947821271734,
      "grad_norm": 3.5,
      "learning_rate": 3.4177979236274995e-05,
      "loss": 0.89,
      "step": 329380
    },
    {
      "epoch": 1.154429829634069,
      "grad_norm": 2.84375,
      "learning_rate": 3.417733020761129e-05,
      "loss": 0.7768,
      "step": 329390
    },
    {
      "epoch": 1.1544648771409647,
      "grad_norm": 2.3125,
      "learning_rate": 3.417668117894759e-05,
      "loss": 0.7544,
      "step": 329400
    },
    {
      "epoch": 1.1544999246478602,
      "grad_norm": 3.21875,
      "learning_rate": 3.4176032150283886e-05,
      "loss": 0.8635,
      "step": 329410
    },
    {
      "epoch": 1.1545349721547558,
      "grad_norm": 3.09375,
      "learning_rate": 3.417538312162019e-05,
      "loss": 0.9476,
      "step": 329420
    },
    {
      "epoch": 1.1545700196616513,
      "grad_norm": 3.5,
      "learning_rate": 3.417473409295648e-05,
      "loss": 0.8875,
      "step": 329430
    },
    {
      "epoch": 1.154605067168547,
      "grad_norm": 3.0625,
      "learning_rate": 3.417408506429278e-05,
      "loss": 0.8728,
      "step": 329440
    },
    {
      "epoch": 1.1546401146754426,
      "grad_norm": 3.09375,
      "learning_rate": 3.417343603562908e-05,
      "loss": 0.885,
      "step": 329450
    },
    {
      "epoch": 1.1546751621823381,
      "grad_norm": 3.234375,
      "learning_rate": 3.417278700696538e-05,
      "loss": 0.8892,
      "step": 329460
    },
    {
      "epoch": 1.1547102096892337,
      "grad_norm": 2.890625,
      "learning_rate": 3.4172137978301674e-05,
      "loss": 0.7989,
      "step": 329470
    },
    {
      "epoch": 1.1547452571961294,
      "grad_norm": 3.140625,
      "learning_rate": 3.4171488949637975e-05,
      "loss": 0.8656,
      "step": 329480
    },
    {
      "epoch": 1.154780304703025,
      "grad_norm": 3.21875,
      "learning_rate": 3.4170839920974277e-05,
      "loss": 0.8317,
      "step": 329490
    },
    {
      "epoch": 1.1548153522099205,
      "grad_norm": 2.734375,
      "learning_rate": 3.417019089231057e-05,
      "loss": 0.8434,
      "step": 329500
    },
    {
      "epoch": 1.1548503997168162,
      "grad_norm": 2.84375,
      "learning_rate": 3.416954186364687e-05,
      "loss": 0.8785,
      "step": 329510
    },
    {
      "epoch": 1.1548854472237118,
      "grad_norm": 2.84375,
      "learning_rate": 3.416889283498317e-05,
      "loss": 0.9345,
      "step": 329520
    },
    {
      "epoch": 1.1549204947306073,
      "grad_norm": 3.484375,
      "learning_rate": 3.416824380631947e-05,
      "loss": 0.8062,
      "step": 329530
    },
    {
      "epoch": 1.1549555422375029,
      "grad_norm": 2.828125,
      "learning_rate": 3.416759477765576e-05,
      "loss": 0.8333,
      "step": 329540
    },
    {
      "epoch": 1.1549905897443986,
      "grad_norm": 2.5625,
      "learning_rate": 3.416694574899206e-05,
      "loss": 0.8578,
      "step": 329550
    },
    {
      "epoch": 1.1550256372512941,
      "grad_norm": 2.875,
      "learning_rate": 3.416629672032835e-05,
      "loss": 0.8178,
      "step": 329560
    },
    {
      "epoch": 1.1550606847581897,
      "grad_norm": 2.9375,
      "learning_rate": 3.4165647691664654e-05,
      "loss": 0.8717,
      "step": 329570
    },
    {
      "epoch": 1.1550957322650852,
      "grad_norm": 2.890625,
      "learning_rate": 3.4164998663000955e-05,
      "loss": 0.8755,
      "step": 329580
    },
    {
      "epoch": 1.155130779771981,
      "grad_norm": 2.9375,
      "learning_rate": 3.416434963433725e-05,
      "loss": 0.8991,
      "step": 329590
    },
    {
      "epoch": 1.1551658272788765,
      "grad_norm": 2.8125,
      "learning_rate": 3.416370060567355e-05,
      "loss": 0.8692,
      "step": 329600
    },
    {
      "epoch": 1.155200874785772,
      "grad_norm": 2.34375,
      "learning_rate": 3.4163051577009846e-05,
      "loss": 0.8041,
      "step": 329610
    },
    {
      "epoch": 1.1552359222926678,
      "grad_norm": 3.515625,
      "learning_rate": 3.416240254834615e-05,
      "loss": 0.8792,
      "step": 329620
    },
    {
      "epoch": 1.1552709697995633,
      "grad_norm": 2.6875,
      "learning_rate": 3.416175351968244e-05,
      "loss": 0.7655,
      "step": 329630
    },
    {
      "epoch": 1.1553060173064589,
      "grad_norm": 2.765625,
      "learning_rate": 3.416110449101874e-05,
      "loss": 0.8281,
      "step": 329640
    },
    {
      "epoch": 1.1553410648133544,
      "grad_norm": 3.109375,
      "learning_rate": 3.416045546235504e-05,
      "loss": 0.8653,
      "step": 329650
    },
    {
      "epoch": 1.1553761123202502,
      "grad_norm": 3.015625,
      "learning_rate": 3.415980643369134e-05,
      "loss": 0.8888,
      "step": 329660
    },
    {
      "epoch": 1.1554111598271457,
      "grad_norm": 3.296875,
      "learning_rate": 3.4159157405027634e-05,
      "loss": 0.8452,
      "step": 329670
    },
    {
      "epoch": 1.1554462073340412,
      "grad_norm": 2.671875,
      "learning_rate": 3.4158508376363935e-05,
      "loss": 0.8471,
      "step": 329680
    },
    {
      "epoch": 1.1554812548409368,
      "grad_norm": 2.84375,
      "learning_rate": 3.415785934770023e-05,
      "loss": 0.8553,
      "step": 329690
    },
    {
      "epoch": 1.1555163023478325,
      "grad_norm": 2.703125,
      "learning_rate": 3.415721031903653e-05,
      "loss": 0.9076,
      "step": 329700
    },
    {
      "epoch": 1.155551349854728,
      "grad_norm": 3.3125,
      "learning_rate": 3.415656129037283e-05,
      "loss": 0.8444,
      "step": 329710
    },
    {
      "epoch": 1.1555863973616236,
      "grad_norm": 2.21875,
      "learning_rate": 3.415591226170913e-05,
      "loss": 0.7786,
      "step": 329720
    },
    {
      "epoch": 1.1556214448685194,
      "grad_norm": 3.046875,
      "learning_rate": 3.415526323304543e-05,
      "loss": 0.7815,
      "step": 329730
    },
    {
      "epoch": 1.155656492375415,
      "grad_norm": 2.953125,
      "learning_rate": 3.415461420438172e-05,
      "loss": 0.9116,
      "step": 329740
    },
    {
      "epoch": 1.1556915398823104,
      "grad_norm": 2.875,
      "learning_rate": 3.4153965175718025e-05,
      "loss": 0.8466,
      "step": 329750
    },
    {
      "epoch": 1.155726587389206,
      "grad_norm": 2.84375,
      "learning_rate": 3.415331614705432e-05,
      "loss": 0.9171,
      "step": 329760
    },
    {
      "epoch": 1.1557616348961017,
      "grad_norm": 2.890625,
      "learning_rate": 3.415266711839062e-05,
      "loss": 0.9224,
      "step": 329770
    },
    {
      "epoch": 1.1557966824029973,
      "grad_norm": 2.5625,
      "learning_rate": 3.4152018089726915e-05,
      "loss": 0.8467,
      "step": 329780
    },
    {
      "epoch": 1.1558317299098928,
      "grad_norm": 3.09375,
      "learning_rate": 3.4151369061063217e-05,
      "loss": 0.8454,
      "step": 329790
    },
    {
      "epoch": 1.1558667774167883,
      "grad_norm": 3.0,
      "learning_rate": 3.415072003239951e-05,
      "loss": 0.8483,
      "step": 329800
    },
    {
      "epoch": 1.155901824923684,
      "grad_norm": 2.578125,
      "learning_rate": 3.415007100373581e-05,
      "loss": 0.8706,
      "step": 329810
    },
    {
      "epoch": 1.1559368724305796,
      "grad_norm": 3.3125,
      "learning_rate": 3.414942197507211e-05,
      "loss": 0.8707,
      "step": 329820
    },
    {
      "epoch": 1.1559719199374752,
      "grad_norm": 2.4375,
      "learning_rate": 3.414877294640841e-05,
      "loss": 0.8936,
      "step": 329830
    },
    {
      "epoch": 1.156006967444371,
      "grad_norm": 3.0,
      "learning_rate": 3.41481239177447e-05,
      "loss": 0.8934,
      "step": 329840
    },
    {
      "epoch": 1.1560420149512665,
      "grad_norm": 2.984375,
      "learning_rate": 3.4147474889081005e-05,
      "loss": 0.8899,
      "step": 329850
    },
    {
      "epoch": 1.156077062458162,
      "grad_norm": 5.0625,
      "learning_rate": 3.4146825860417306e-05,
      "loss": 0.8238,
      "step": 329860
    },
    {
      "epoch": 1.1561121099650578,
      "grad_norm": 2.734375,
      "learning_rate": 3.41461768317536e-05,
      "loss": 0.8512,
      "step": 329870
    },
    {
      "epoch": 1.1561471574719533,
      "grad_norm": 2.671875,
      "learning_rate": 3.41455278030899e-05,
      "loss": 0.8819,
      "step": 329880
    },
    {
      "epoch": 1.1561822049788488,
      "grad_norm": 3.203125,
      "learning_rate": 3.4144878774426197e-05,
      "loss": 0.8866,
      "step": 329890
    },
    {
      "epoch": 1.1562172524857444,
      "grad_norm": 2.765625,
      "learning_rate": 3.41442297457625e-05,
      "loss": 0.8414,
      "step": 329900
    },
    {
      "epoch": 1.1562522999926401,
      "grad_norm": 2.71875,
      "learning_rate": 3.414358071709879e-05,
      "loss": 0.8574,
      "step": 329910
    },
    {
      "epoch": 1.1562873474995357,
      "grad_norm": 3.0625,
      "learning_rate": 3.414293168843509e-05,
      "loss": 0.8127,
      "step": 329920
    },
    {
      "epoch": 1.1563223950064312,
      "grad_norm": 2.25,
      "learning_rate": 3.414228265977138e-05,
      "loss": 0.8605,
      "step": 329930
    },
    {
      "epoch": 1.1563574425133267,
      "grad_norm": 2.921875,
      "learning_rate": 3.414163363110768e-05,
      "loss": 0.8331,
      "step": 329940
    },
    {
      "epoch": 1.1563924900202225,
      "grad_norm": 2.890625,
      "learning_rate": 3.4140984602443985e-05,
      "loss": 0.8748,
      "step": 329950
    },
    {
      "epoch": 1.156427537527118,
      "grad_norm": 3.265625,
      "learning_rate": 3.414033557378028e-05,
      "loss": 0.7542,
      "step": 329960
    },
    {
      "epoch": 1.1564625850340136,
      "grad_norm": 3.03125,
      "learning_rate": 3.413968654511658e-05,
      "loss": 0.925,
      "step": 329970
    },
    {
      "epoch": 1.1564976325409093,
      "grad_norm": 2.75,
      "learning_rate": 3.4139037516452875e-05,
      "loss": 0.9346,
      "step": 329980
    },
    {
      "epoch": 1.1565326800478049,
      "grad_norm": 2.84375,
      "learning_rate": 3.4138388487789177e-05,
      "loss": 0.8349,
      "step": 329990
    },
    {
      "epoch": 1.1565677275547004,
      "grad_norm": 2.90625,
      "learning_rate": 3.413773945912547e-05,
      "loss": 0.8302,
      "step": 330000
    },
    {
      "epoch": 1.1565677275547004,
      "eval_loss": 0.8088414669036865,
      "eval_runtime": 557.1753,
      "eval_samples_per_second": 682.794,
      "eval_steps_per_second": 56.9,
      "step": 330000
    },
    {
      "epoch": 1.156602775061596,
      "grad_norm": 2.828125,
      "learning_rate": 3.413709043046177e-05,
      "loss": 0.9095,
      "step": 330010
    },
    {
      "epoch": 1.1566378225684917,
      "grad_norm": 2.390625,
      "learning_rate": 3.413644140179807e-05,
      "loss": 0.8198,
      "step": 330020
    },
    {
      "epoch": 1.1566728700753872,
      "grad_norm": 2.921875,
      "learning_rate": 3.413579237313437e-05,
      "loss": 0.9142,
      "step": 330030
    },
    {
      "epoch": 1.1567079175822828,
      "grad_norm": 2.765625,
      "learning_rate": 3.413514334447066e-05,
      "loss": 0.901,
      "step": 330040
    },
    {
      "epoch": 1.1567429650891783,
      "grad_norm": 2.734375,
      "learning_rate": 3.4134494315806965e-05,
      "loss": 0.8369,
      "step": 330050
    },
    {
      "epoch": 1.156778012596074,
      "grad_norm": 3.078125,
      "learning_rate": 3.413384528714326e-05,
      "loss": 0.9497,
      "step": 330060
    },
    {
      "epoch": 1.1568130601029696,
      "grad_norm": 3.109375,
      "learning_rate": 3.413319625847956e-05,
      "loss": 0.8552,
      "step": 330070
    },
    {
      "epoch": 1.1568481076098651,
      "grad_norm": 2.921875,
      "learning_rate": 3.413254722981586e-05,
      "loss": 0.8818,
      "step": 330080
    },
    {
      "epoch": 1.1568831551167609,
      "grad_norm": 2.984375,
      "learning_rate": 3.4131898201152157e-05,
      "loss": 0.9226,
      "step": 330090
    },
    {
      "epoch": 1.1569182026236564,
      "grad_norm": 2.90625,
      "learning_rate": 3.413124917248846e-05,
      "loss": 0.8948,
      "step": 330100
    },
    {
      "epoch": 1.156953250130552,
      "grad_norm": 2.75,
      "learning_rate": 3.413060014382475e-05,
      "loss": 0.8617,
      "step": 330110
    },
    {
      "epoch": 1.1569882976374475,
      "grad_norm": 2.59375,
      "learning_rate": 3.4129951115161054e-05,
      "loss": 0.8498,
      "step": 330120
    },
    {
      "epoch": 1.1570233451443432,
      "grad_norm": 2.75,
      "learning_rate": 3.412930208649735e-05,
      "loss": 0.8124,
      "step": 330130
    },
    {
      "epoch": 1.1570583926512388,
      "grad_norm": 2.53125,
      "learning_rate": 3.412865305783365e-05,
      "loss": 0.8649,
      "step": 330140
    },
    {
      "epoch": 1.1570934401581343,
      "grad_norm": 2.8125,
      "learning_rate": 3.4128004029169945e-05,
      "loss": 0.8766,
      "step": 330150
    },
    {
      "epoch": 1.1571284876650298,
      "grad_norm": 3.140625,
      "learning_rate": 3.4127355000506246e-05,
      "loss": 0.8605,
      "step": 330160
    },
    {
      "epoch": 1.1571635351719256,
      "grad_norm": 3.6875,
      "learning_rate": 3.412670597184254e-05,
      "loss": 0.8797,
      "step": 330170
    },
    {
      "epoch": 1.1571985826788211,
      "grad_norm": 2.84375,
      "learning_rate": 3.412605694317884e-05,
      "loss": 0.871,
      "step": 330180
    },
    {
      "epoch": 1.1572336301857167,
      "grad_norm": 2.625,
      "learning_rate": 3.4125407914515137e-05,
      "loss": 0.8166,
      "step": 330190
    },
    {
      "epoch": 1.1572686776926124,
      "grad_norm": 3.15625,
      "learning_rate": 3.412475888585144e-05,
      "loss": 0.8226,
      "step": 330200
    },
    {
      "epoch": 1.157303725199508,
      "grad_norm": 3.21875,
      "learning_rate": 3.412410985718774e-05,
      "loss": 0.8401,
      "step": 330210
    },
    {
      "epoch": 1.1573387727064035,
      "grad_norm": 3.0625,
      "learning_rate": 3.4123460828524034e-05,
      "loss": 0.867,
      "step": 330220
    },
    {
      "epoch": 1.157373820213299,
      "grad_norm": 3.046875,
      "learning_rate": 3.4122811799860335e-05,
      "loss": 0.8315,
      "step": 330230
    },
    {
      "epoch": 1.1574088677201948,
      "grad_norm": 2.6875,
      "learning_rate": 3.412216277119663e-05,
      "loss": 0.8733,
      "step": 330240
    },
    {
      "epoch": 1.1574439152270903,
      "grad_norm": 4.5,
      "learning_rate": 3.412151374253293e-05,
      "loss": 0.95,
      "step": 330250
    },
    {
      "epoch": 1.1574789627339859,
      "grad_norm": 4.5625,
      "learning_rate": 3.4120864713869226e-05,
      "loss": 0.8721,
      "step": 330260
    },
    {
      "epoch": 1.1575140102408814,
      "grad_norm": 2.703125,
      "learning_rate": 3.412021568520553e-05,
      "loss": 0.8396,
      "step": 330270
    },
    {
      "epoch": 1.1575490577477772,
      "grad_norm": 2.875,
      "learning_rate": 3.411956665654182e-05,
      "loss": 0.891,
      "step": 330280
    },
    {
      "epoch": 1.1575841052546727,
      "grad_norm": 3.140625,
      "learning_rate": 3.411891762787812e-05,
      "loss": 0.8241,
      "step": 330290
    },
    {
      "epoch": 1.1576191527615682,
      "grad_norm": 3.171875,
      "learning_rate": 3.411826859921442e-05,
      "loss": 0.8876,
      "step": 330300
    },
    {
      "epoch": 1.157654200268464,
      "grad_norm": 2.8125,
      "learning_rate": 3.411761957055071e-05,
      "loss": 0.8518,
      "step": 330310
    },
    {
      "epoch": 1.1576892477753595,
      "grad_norm": 2.5,
      "learning_rate": 3.4116970541887014e-05,
      "loss": 0.7884,
      "step": 330320
    },
    {
      "epoch": 1.157724295282255,
      "grad_norm": 2.8125,
      "learning_rate": 3.411632151322331e-05,
      "loss": 0.8459,
      "step": 330330
    },
    {
      "epoch": 1.1577593427891506,
      "grad_norm": 2.625,
      "learning_rate": 3.411567248455961e-05,
      "loss": 0.8817,
      "step": 330340
    },
    {
      "epoch": 1.1577943902960464,
      "grad_norm": 3.125,
      "learning_rate": 3.4115023455895905e-05,
      "loss": 0.9243,
      "step": 330350
    },
    {
      "epoch": 1.157829437802942,
      "grad_norm": 2.75,
      "learning_rate": 3.4114374427232206e-05,
      "loss": 0.8309,
      "step": 330360
    },
    {
      "epoch": 1.1578644853098374,
      "grad_norm": 2.984375,
      "learning_rate": 3.41137253985685e-05,
      "loss": 0.8009,
      "step": 330370
    },
    {
      "epoch": 1.157899532816733,
      "grad_norm": 3.140625,
      "learning_rate": 3.41130763699048e-05,
      "loss": 0.8988,
      "step": 330380
    },
    {
      "epoch": 1.1579345803236287,
      "grad_norm": 2.6875,
      "learning_rate": 3.4112427341241097e-05,
      "loss": 0.9259,
      "step": 330390
    },
    {
      "epoch": 1.1579696278305243,
      "grad_norm": 2.984375,
      "learning_rate": 3.41117783125774e-05,
      "loss": 0.8631,
      "step": 330400
    },
    {
      "epoch": 1.1580046753374198,
      "grad_norm": 2.796875,
      "learning_rate": 3.411112928391369e-05,
      "loss": 0.849,
      "step": 330410
    },
    {
      "epoch": 1.1580397228443156,
      "grad_norm": 2.75,
      "learning_rate": 3.4110480255249994e-05,
      "loss": 0.8429,
      "step": 330420
    },
    {
      "epoch": 1.158074770351211,
      "grad_norm": 2.78125,
      "learning_rate": 3.410983122658629e-05,
      "loss": 0.9215,
      "step": 330430
    },
    {
      "epoch": 1.1581098178581066,
      "grad_norm": 2.890625,
      "learning_rate": 3.410918219792259e-05,
      "loss": 0.8801,
      "step": 330440
    },
    {
      "epoch": 1.1581448653650024,
      "grad_norm": 2.859375,
      "learning_rate": 3.410853316925889e-05,
      "loss": 0.8439,
      "step": 330450
    },
    {
      "epoch": 1.158179912871898,
      "grad_norm": 2.828125,
      "learning_rate": 3.4107884140595186e-05,
      "loss": 0.8391,
      "step": 330460
    },
    {
      "epoch": 1.1582149603787935,
      "grad_norm": 2.875,
      "learning_rate": 3.410723511193149e-05,
      "loss": 0.8753,
      "step": 330470
    },
    {
      "epoch": 1.158250007885689,
      "grad_norm": 2.578125,
      "learning_rate": 3.410658608326778e-05,
      "loss": 0.7309,
      "step": 330480
    },
    {
      "epoch": 1.1582850553925845,
      "grad_norm": 2.78125,
      "learning_rate": 3.410593705460408e-05,
      "loss": 0.8226,
      "step": 330490
    },
    {
      "epoch": 1.1583201028994803,
      "grad_norm": 2.5625,
      "learning_rate": 3.410528802594038e-05,
      "loss": 0.9431,
      "step": 330500
    },
    {
      "epoch": 1.1583551504063758,
      "grad_norm": 2.984375,
      "learning_rate": 3.410463899727668e-05,
      "loss": 0.9777,
      "step": 330510
    },
    {
      "epoch": 1.1583901979132714,
      "grad_norm": 3.234375,
      "learning_rate": 3.4103989968612974e-05,
      "loss": 0.8358,
      "step": 330520
    },
    {
      "epoch": 1.1584252454201671,
      "grad_norm": 2.6875,
      "learning_rate": 3.4103340939949275e-05,
      "loss": 0.9048,
      "step": 330530
    },
    {
      "epoch": 1.1584602929270627,
      "grad_norm": 3.0,
      "learning_rate": 3.410269191128557e-05,
      "loss": 0.8242,
      "step": 330540
    },
    {
      "epoch": 1.1584953404339582,
      "grad_norm": 2.640625,
      "learning_rate": 3.410204288262187e-05,
      "loss": 0.8816,
      "step": 330550
    },
    {
      "epoch": 1.158530387940854,
      "grad_norm": 3.140625,
      "learning_rate": 3.4101393853958166e-05,
      "loss": 0.8342,
      "step": 330560
    },
    {
      "epoch": 1.1585654354477495,
      "grad_norm": 3.8125,
      "learning_rate": 3.410074482529447e-05,
      "loss": 0.8958,
      "step": 330570
    },
    {
      "epoch": 1.158600482954645,
      "grad_norm": 2.90625,
      "learning_rate": 3.410009579663077e-05,
      "loss": 0.8429,
      "step": 330580
    },
    {
      "epoch": 1.1586355304615406,
      "grad_norm": 2.484375,
      "learning_rate": 3.409944676796706e-05,
      "loss": 0.801,
      "step": 330590
    },
    {
      "epoch": 1.1586705779684363,
      "grad_norm": 3.0,
      "learning_rate": 3.4098797739303365e-05,
      "loss": 0.9832,
      "step": 330600
    },
    {
      "epoch": 1.1587056254753318,
      "grad_norm": 3.015625,
      "learning_rate": 3.409814871063966e-05,
      "loss": 0.889,
      "step": 330610
    },
    {
      "epoch": 1.1587406729822274,
      "grad_norm": 2.5625,
      "learning_rate": 3.409749968197596e-05,
      "loss": 0.806,
      "step": 330620
    },
    {
      "epoch": 1.158775720489123,
      "grad_norm": 3.265625,
      "learning_rate": 3.4096850653312255e-05,
      "loss": 0.8884,
      "step": 330630
    },
    {
      "epoch": 1.1588107679960187,
      "grad_norm": 3.0625,
      "learning_rate": 3.409620162464856e-05,
      "loss": 0.7543,
      "step": 330640
    },
    {
      "epoch": 1.1588458155029142,
      "grad_norm": 3.15625,
      "learning_rate": 3.409555259598485e-05,
      "loss": 0.8046,
      "step": 330650
    },
    {
      "epoch": 1.1588808630098097,
      "grad_norm": 2.921875,
      "learning_rate": 3.409490356732115e-05,
      "loss": 0.8664,
      "step": 330660
    },
    {
      "epoch": 1.1589159105167055,
      "grad_norm": 3.046875,
      "learning_rate": 3.409425453865745e-05,
      "loss": 0.893,
      "step": 330670
    },
    {
      "epoch": 1.158950958023601,
      "grad_norm": 3.75,
      "learning_rate": 3.409360550999374e-05,
      "loss": 0.9203,
      "step": 330680
    },
    {
      "epoch": 1.1589860055304966,
      "grad_norm": 2.890625,
      "learning_rate": 3.409295648133004e-05,
      "loss": 0.8288,
      "step": 330690
    },
    {
      "epoch": 1.1590210530373921,
      "grad_norm": 2.890625,
      "learning_rate": 3.409230745266634e-05,
      "loss": 0.8043,
      "step": 330700
    },
    {
      "epoch": 1.1590561005442879,
      "grad_norm": 2.796875,
      "learning_rate": 3.409165842400264e-05,
      "loss": 0.8186,
      "step": 330710
    },
    {
      "epoch": 1.1590911480511834,
      "grad_norm": 2.484375,
      "learning_rate": 3.4091009395338934e-05,
      "loss": 0.8585,
      "step": 330720
    },
    {
      "epoch": 1.159126195558079,
      "grad_norm": 2.765625,
      "learning_rate": 3.4090360366675235e-05,
      "loss": 0.8661,
      "step": 330730
    },
    {
      "epoch": 1.1591612430649745,
      "grad_norm": 3.46875,
      "learning_rate": 3.408971133801153e-05,
      "loss": 0.8825,
      "step": 330740
    },
    {
      "epoch": 1.1591962905718702,
      "grad_norm": 2.921875,
      "learning_rate": 3.408906230934783e-05,
      "loss": 0.8885,
      "step": 330750
    },
    {
      "epoch": 1.1592313380787658,
      "grad_norm": 3.046875,
      "learning_rate": 3.4088413280684126e-05,
      "loss": 0.8463,
      "step": 330760
    },
    {
      "epoch": 1.1592663855856613,
      "grad_norm": 2.53125,
      "learning_rate": 3.408776425202043e-05,
      "loss": 0.8258,
      "step": 330770
    },
    {
      "epoch": 1.159301433092557,
      "grad_norm": 3.140625,
      "learning_rate": 3.408711522335672e-05,
      "loss": 0.855,
      "step": 330780
    },
    {
      "epoch": 1.1593364805994526,
      "grad_norm": 2.8125,
      "learning_rate": 3.408646619469302e-05,
      "loss": 0.8099,
      "step": 330790
    },
    {
      "epoch": 1.1593715281063481,
      "grad_norm": 2.78125,
      "learning_rate": 3.408581716602932e-05,
      "loss": 0.8364,
      "step": 330800
    },
    {
      "epoch": 1.1594065756132437,
      "grad_norm": 3.265625,
      "learning_rate": 3.408516813736562e-05,
      "loss": 0.9407,
      "step": 330810
    },
    {
      "epoch": 1.1594416231201394,
      "grad_norm": 2.78125,
      "learning_rate": 3.408451910870192e-05,
      "loss": 0.9955,
      "step": 330820
    },
    {
      "epoch": 1.159476670627035,
      "grad_norm": 2.703125,
      "learning_rate": 3.4083870080038215e-05,
      "loss": 0.8308,
      "step": 330830
    },
    {
      "epoch": 1.1595117181339305,
      "grad_norm": 3.1875,
      "learning_rate": 3.408322105137452e-05,
      "loss": 0.9403,
      "step": 330840
    },
    {
      "epoch": 1.159546765640826,
      "grad_norm": 2.890625,
      "learning_rate": 3.408257202271081e-05,
      "loss": 0.8586,
      "step": 330850
    },
    {
      "epoch": 1.1595818131477218,
      "grad_norm": 2.953125,
      "learning_rate": 3.408192299404711e-05,
      "loss": 0.8308,
      "step": 330860
    },
    {
      "epoch": 1.1596168606546173,
      "grad_norm": 2.90625,
      "learning_rate": 3.408127396538341e-05,
      "loss": 0.8344,
      "step": 330870
    },
    {
      "epoch": 1.1596519081615129,
      "grad_norm": 2.921875,
      "learning_rate": 3.408062493671971e-05,
      "loss": 0.9126,
      "step": 330880
    },
    {
      "epoch": 1.1596869556684086,
      "grad_norm": 2.90625,
      "learning_rate": 3.4079975908056e-05,
      "loss": 0.8659,
      "step": 330890
    },
    {
      "epoch": 1.1597220031753042,
      "grad_norm": 3.0,
      "learning_rate": 3.4079326879392305e-05,
      "loss": 0.9038,
      "step": 330900
    },
    {
      "epoch": 1.1597570506821997,
      "grad_norm": 2.90625,
      "learning_rate": 3.40786778507286e-05,
      "loss": 0.9298,
      "step": 330910
    },
    {
      "epoch": 1.1597920981890952,
      "grad_norm": 3.046875,
      "learning_rate": 3.40780288220649e-05,
      "loss": 0.8229,
      "step": 330920
    },
    {
      "epoch": 1.159827145695991,
      "grad_norm": 2.5,
      "learning_rate": 3.4077379793401195e-05,
      "loss": 0.8712,
      "step": 330930
    },
    {
      "epoch": 1.1598621932028865,
      "grad_norm": 3.109375,
      "learning_rate": 3.40767307647375e-05,
      "loss": 0.8452,
      "step": 330940
    },
    {
      "epoch": 1.159897240709782,
      "grad_norm": 3.328125,
      "learning_rate": 3.40760817360738e-05,
      "loss": 0.9126,
      "step": 330950
    },
    {
      "epoch": 1.1599322882166776,
      "grad_norm": 2.828125,
      "learning_rate": 3.407543270741009e-05,
      "loss": 0.9441,
      "step": 330960
    },
    {
      "epoch": 1.1599673357235734,
      "grad_norm": 2.9375,
      "learning_rate": 3.4074783678746394e-05,
      "loss": 0.8723,
      "step": 330970
    },
    {
      "epoch": 1.160002383230469,
      "grad_norm": 2.859375,
      "learning_rate": 3.407413465008269e-05,
      "loss": 0.8868,
      "step": 330980
    },
    {
      "epoch": 1.1600374307373644,
      "grad_norm": 2.765625,
      "learning_rate": 3.407348562141899e-05,
      "loss": 0.8344,
      "step": 330990
    },
    {
      "epoch": 1.1600724782442602,
      "grad_norm": 2.65625,
      "learning_rate": 3.4072836592755285e-05,
      "loss": 0.8985,
      "step": 331000
    },
    {
      "epoch": 1.1601075257511557,
      "grad_norm": 2.78125,
      "learning_rate": 3.4072187564091586e-05,
      "loss": 0.783,
      "step": 331010
    },
    {
      "epoch": 1.1601425732580513,
      "grad_norm": 3.234375,
      "learning_rate": 3.407153853542788e-05,
      "loss": 0.9155,
      "step": 331020
    },
    {
      "epoch": 1.1601776207649468,
      "grad_norm": 2.6875,
      "learning_rate": 3.407088950676418e-05,
      "loss": 0.8822,
      "step": 331030
    },
    {
      "epoch": 1.1602126682718426,
      "grad_norm": 2.90625,
      "learning_rate": 3.407024047810048e-05,
      "loss": 0.8547,
      "step": 331040
    },
    {
      "epoch": 1.160247715778738,
      "grad_norm": 3.234375,
      "learning_rate": 3.406959144943677e-05,
      "loss": 0.8621,
      "step": 331050
    },
    {
      "epoch": 1.1602827632856336,
      "grad_norm": 2.890625,
      "learning_rate": 3.406894242077307e-05,
      "loss": 0.8139,
      "step": 331060
    },
    {
      "epoch": 1.1603178107925292,
      "grad_norm": 3.125,
      "learning_rate": 3.406829339210937e-05,
      "loss": 0.7949,
      "step": 331070
    },
    {
      "epoch": 1.160352858299425,
      "grad_norm": 2.71875,
      "learning_rate": 3.406764436344567e-05,
      "loss": 0.9092,
      "step": 331080
    },
    {
      "epoch": 1.1603879058063205,
      "grad_norm": 2.796875,
      "learning_rate": 3.406699533478196e-05,
      "loss": 0.8696,
      "step": 331090
    },
    {
      "epoch": 1.160422953313216,
      "grad_norm": 2.390625,
      "learning_rate": 3.4066346306118265e-05,
      "loss": 0.7926,
      "step": 331100
    },
    {
      "epoch": 1.1604580008201117,
      "grad_norm": 3.421875,
      "learning_rate": 3.406569727745456e-05,
      "loss": 0.8176,
      "step": 331110
    },
    {
      "epoch": 1.1604930483270073,
      "grad_norm": 2.859375,
      "learning_rate": 3.406504824879086e-05,
      "loss": 0.8536,
      "step": 331120
    },
    {
      "epoch": 1.1605280958339028,
      "grad_norm": 2.359375,
      "learning_rate": 3.4064399220127155e-05,
      "loss": 0.7995,
      "step": 331130
    },
    {
      "epoch": 1.1605631433407986,
      "grad_norm": 3.046875,
      "learning_rate": 3.406375019146346e-05,
      "loss": 0.902,
      "step": 331140
    },
    {
      "epoch": 1.1605981908476941,
      "grad_norm": 3.390625,
      "learning_rate": 3.406310116279975e-05,
      "loss": 0.9438,
      "step": 331150
    },
    {
      "epoch": 1.1606332383545896,
      "grad_norm": 2.515625,
      "learning_rate": 3.406245213413605e-05,
      "loss": 0.895,
      "step": 331160
    },
    {
      "epoch": 1.1606682858614852,
      "grad_norm": 3.046875,
      "learning_rate": 3.4061803105472354e-05,
      "loss": 0.8884,
      "step": 331170
    },
    {
      "epoch": 1.1607033333683807,
      "grad_norm": 2.203125,
      "learning_rate": 3.406115407680865e-05,
      "loss": 0.8068,
      "step": 331180
    },
    {
      "epoch": 1.1607383808752765,
      "grad_norm": 3.09375,
      "learning_rate": 3.406050504814495e-05,
      "loss": 0.8705,
      "step": 331190
    },
    {
      "epoch": 1.160773428382172,
      "grad_norm": 3.0,
      "learning_rate": 3.4059856019481245e-05,
      "loss": 0.8108,
      "step": 331200
    },
    {
      "epoch": 1.1608084758890675,
      "grad_norm": 2.90625,
      "learning_rate": 3.4059206990817546e-05,
      "loss": 0.7992,
      "step": 331210
    },
    {
      "epoch": 1.1608435233959633,
      "grad_norm": 2.859375,
      "learning_rate": 3.405855796215384e-05,
      "loss": 0.866,
      "step": 331220
    },
    {
      "epoch": 1.1608785709028588,
      "grad_norm": 3.109375,
      "learning_rate": 3.405790893349014e-05,
      "loss": 0.9987,
      "step": 331230
    },
    {
      "epoch": 1.1609136184097544,
      "grad_norm": 3.078125,
      "learning_rate": 3.405725990482644e-05,
      "loss": 0.8251,
      "step": 331240
    },
    {
      "epoch": 1.1609486659166501,
      "grad_norm": 2.8125,
      "learning_rate": 3.405661087616274e-05,
      "loss": 0.9123,
      "step": 331250
    },
    {
      "epoch": 1.1609837134235457,
      "grad_norm": 3.5,
      "learning_rate": 3.405596184749903e-05,
      "loss": 0.8763,
      "step": 331260
    },
    {
      "epoch": 1.1610187609304412,
      "grad_norm": 2.75,
      "learning_rate": 3.4055312818835334e-05,
      "loss": 0.8959,
      "step": 331270
    },
    {
      "epoch": 1.1610538084373367,
      "grad_norm": 3.03125,
      "learning_rate": 3.405466379017163e-05,
      "loss": 0.8826,
      "step": 331280
    },
    {
      "epoch": 1.1610888559442325,
      "grad_norm": 2.921875,
      "learning_rate": 3.405401476150793e-05,
      "loss": 0.9109,
      "step": 331290
    },
    {
      "epoch": 1.161123903451128,
      "grad_norm": 2.53125,
      "learning_rate": 3.4053365732844225e-05,
      "loss": 0.7599,
      "step": 331300
    },
    {
      "epoch": 1.1611589509580236,
      "grad_norm": 2.703125,
      "learning_rate": 3.4052716704180526e-05,
      "loss": 0.8691,
      "step": 331310
    },
    {
      "epoch": 1.161193998464919,
      "grad_norm": 2.828125,
      "learning_rate": 3.405206767551683e-05,
      "loss": 0.9122,
      "step": 331320
    },
    {
      "epoch": 1.1612290459718149,
      "grad_norm": 2.875,
      "learning_rate": 3.405141864685312e-05,
      "loss": 0.8455,
      "step": 331330
    },
    {
      "epoch": 1.1612640934787104,
      "grad_norm": 2.546875,
      "learning_rate": 3.4050769618189424e-05,
      "loss": 0.8763,
      "step": 331340
    },
    {
      "epoch": 1.161299140985606,
      "grad_norm": 3.96875,
      "learning_rate": 3.405012058952572e-05,
      "loss": 0.8933,
      "step": 331350
    },
    {
      "epoch": 1.1613341884925017,
      "grad_norm": 2.8125,
      "learning_rate": 3.404947156086202e-05,
      "loss": 0.8503,
      "step": 331360
    },
    {
      "epoch": 1.1613692359993972,
      "grad_norm": 2.890625,
      "learning_rate": 3.4048822532198314e-05,
      "loss": 0.8456,
      "step": 331370
    },
    {
      "epoch": 1.1614042835062928,
      "grad_norm": 2.84375,
      "learning_rate": 3.4048173503534616e-05,
      "loss": 0.8518,
      "step": 331380
    },
    {
      "epoch": 1.1614393310131883,
      "grad_norm": 3.6875,
      "learning_rate": 3.404752447487091e-05,
      "loss": 0.8644,
      "step": 331390
    },
    {
      "epoch": 1.161474378520084,
      "grad_norm": 2.375,
      "learning_rate": 3.404687544620721e-05,
      "loss": 0.8622,
      "step": 331400
    },
    {
      "epoch": 1.1615094260269796,
      "grad_norm": 2.640625,
      "learning_rate": 3.4046226417543506e-05,
      "loss": 0.9408,
      "step": 331410
    },
    {
      "epoch": 1.1615444735338751,
      "grad_norm": 2.828125,
      "learning_rate": 3.404557738887981e-05,
      "loss": 0.8917,
      "step": 331420
    },
    {
      "epoch": 1.1615795210407707,
      "grad_norm": 3.015625,
      "learning_rate": 3.40449283602161e-05,
      "loss": 0.9186,
      "step": 331430
    },
    {
      "epoch": 1.1616145685476664,
      "grad_norm": 2.859375,
      "learning_rate": 3.40442793315524e-05,
      "loss": 0.7595,
      "step": 331440
    },
    {
      "epoch": 1.161649616054562,
      "grad_norm": 2.6875,
      "learning_rate": 3.40436303028887e-05,
      "loss": 0.8044,
      "step": 331450
    },
    {
      "epoch": 1.1616846635614575,
      "grad_norm": 2.734375,
      "learning_rate": 3.404298127422499e-05,
      "loss": 0.8105,
      "step": 331460
    },
    {
      "epoch": 1.1617197110683533,
      "grad_norm": 2.484375,
      "learning_rate": 3.4042332245561294e-05,
      "loss": 0.9285,
      "step": 331470
    },
    {
      "epoch": 1.1617547585752488,
      "grad_norm": 3.234375,
      "learning_rate": 3.404168321689759e-05,
      "loss": 0.982,
      "step": 331480
    },
    {
      "epoch": 1.1617898060821443,
      "grad_norm": 3.265625,
      "learning_rate": 3.404103418823389e-05,
      "loss": 0.8759,
      "step": 331490
    },
    {
      "epoch": 1.1618248535890399,
      "grad_norm": 2.578125,
      "learning_rate": 3.4040385159570185e-05,
      "loss": 0.8193,
      "step": 331500
    },
    {
      "epoch": 1.1618599010959356,
      "grad_norm": 2.515625,
      "learning_rate": 3.4039736130906486e-05,
      "loss": 0.87,
      "step": 331510
    },
    {
      "epoch": 1.1618949486028312,
      "grad_norm": 2.84375,
      "learning_rate": 3.403908710224278e-05,
      "loss": 0.9099,
      "step": 331520
    },
    {
      "epoch": 1.1619299961097267,
      "grad_norm": 2.796875,
      "learning_rate": 3.403843807357908e-05,
      "loss": 0.7855,
      "step": 331530
    },
    {
      "epoch": 1.1619650436166222,
      "grad_norm": 2.984375,
      "learning_rate": 3.4037789044915384e-05,
      "loss": 0.8615,
      "step": 331540
    },
    {
      "epoch": 1.162000091123518,
      "grad_norm": 2.9375,
      "learning_rate": 3.403714001625168e-05,
      "loss": 0.8861,
      "step": 331550
    },
    {
      "epoch": 1.1620351386304135,
      "grad_norm": 3.0625,
      "learning_rate": 3.403649098758798e-05,
      "loss": 0.8039,
      "step": 331560
    },
    {
      "epoch": 1.162070186137309,
      "grad_norm": 3.265625,
      "learning_rate": 3.4035841958924274e-05,
      "loss": 0.8955,
      "step": 331570
    },
    {
      "epoch": 1.1621052336442048,
      "grad_norm": 3.203125,
      "learning_rate": 3.4035192930260576e-05,
      "loss": 0.8502,
      "step": 331580
    },
    {
      "epoch": 1.1621402811511004,
      "grad_norm": 3.171875,
      "learning_rate": 3.403454390159687e-05,
      "loss": 0.915,
      "step": 331590
    },
    {
      "epoch": 1.1621753286579959,
      "grad_norm": 2.703125,
      "learning_rate": 3.403389487293317e-05,
      "loss": 0.8411,
      "step": 331600
    },
    {
      "epoch": 1.1622103761648914,
      "grad_norm": 2.71875,
      "learning_rate": 3.4033245844269466e-05,
      "loss": 0.8869,
      "step": 331610
    },
    {
      "epoch": 1.1622454236717872,
      "grad_norm": 2.578125,
      "learning_rate": 3.403259681560577e-05,
      "loss": 0.7966,
      "step": 331620
    },
    {
      "epoch": 1.1622804711786827,
      "grad_norm": 2.90625,
      "learning_rate": 3.403194778694206e-05,
      "loss": 0.9129,
      "step": 331630
    },
    {
      "epoch": 1.1623155186855783,
      "grad_norm": 2.921875,
      "learning_rate": 3.4031298758278364e-05,
      "loss": 0.8733,
      "step": 331640
    },
    {
      "epoch": 1.1623505661924738,
      "grad_norm": 3.625,
      "learning_rate": 3.403064972961466e-05,
      "loss": 0.9488,
      "step": 331650
    },
    {
      "epoch": 1.1623856136993695,
      "grad_norm": 3.1875,
      "learning_rate": 3.403000070095096e-05,
      "loss": 0.9573,
      "step": 331660
    },
    {
      "epoch": 1.162420661206265,
      "grad_norm": 2.4375,
      "learning_rate": 3.4029351672287254e-05,
      "loss": 0.896,
      "step": 331670
    },
    {
      "epoch": 1.1624557087131606,
      "grad_norm": 2.90625,
      "learning_rate": 3.4028702643623556e-05,
      "loss": 0.8592,
      "step": 331680
    },
    {
      "epoch": 1.1624907562200564,
      "grad_norm": 3.078125,
      "learning_rate": 3.402805361495986e-05,
      "loss": 0.9066,
      "step": 331690
    },
    {
      "epoch": 1.162525803726952,
      "grad_norm": 3.046875,
      "learning_rate": 3.402740458629615e-05,
      "loss": 0.8579,
      "step": 331700
    },
    {
      "epoch": 1.1625608512338474,
      "grad_norm": 2.65625,
      "learning_rate": 3.402675555763245e-05,
      "loss": 0.8716,
      "step": 331710
    },
    {
      "epoch": 1.162595898740743,
      "grad_norm": 3.03125,
      "learning_rate": 3.402610652896875e-05,
      "loss": 0.9551,
      "step": 331720
    },
    {
      "epoch": 1.1626309462476387,
      "grad_norm": 3.046875,
      "learning_rate": 3.402545750030505e-05,
      "loss": 0.8593,
      "step": 331730
    },
    {
      "epoch": 1.1626659937545343,
      "grad_norm": 3.390625,
      "learning_rate": 3.4024808471641344e-05,
      "loss": 0.9078,
      "step": 331740
    },
    {
      "epoch": 1.1627010412614298,
      "grad_norm": 2.796875,
      "learning_rate": 3.4024159442977645e-05,
      "loss": 0.823,
      "step": 331750
    },
    {
      "epoch": 1.1627360887683253,
      "grad_norm": 2.734375,
      "learning_rate": 3.402351041431394e-05,
      "loss": 0.8718,
      "step": 331760
    },
    {
      "epoch": 1.162771136275221,
      "grad_norm": 3.25,
      "learning_rate": 3.402286138565024e-05,
      "loss": 0.9108,
      "step": 331770
    },
    {
      "epoch": 1.1628061837821166,
      "grad_norm": 2.40625,
      "learning_rate": 3.4022212356986536e-05,
      "loss": 0.8795,
      "step": 331780
    },
    {
      "epoch": 1.1628412312890122,
      "grad_norm": 3.0,
      "learning_rate": 3.402156332832284e-05,
      "loss": 0.7831,
      "step": 331790
    },
    {
      "epoch": 1.162876278795908,
      "grad_norm": 3.03125,
      "learning_rate": 3.402091429965913e-05,
      "loss": 0.893,
      "step": 331800
    },
    {
      "epoch": 1.1629113263028035,
      "grad_norm": 2.828125,
      "learning_rate": 3.4020265270995426e-05,
      "loss": 0.8904,
      "step": 331810
    },
    {
      "epoch": 1.162946373809699,
      "grad_norm": 2.703125,
      "learning_rate": 3.401961624233173e-05,
      "loss": 0.8556,
      "step": 331820
    },
    {
      "epoch": 1.1629814213165948,
      "grad_norm": 3.140625,
      "learning_rate": 3.401896721366802e-05,
      "loss": 0.7818,
      "step": 331830
    },
    {
      "epoch": 1.1630164688234903,
      "grad_norm": 3.015625,
      "learning_rate": 3.4018318185004324e-05,
      "loss": 0.8171,
      "step": 331840
    },
    {
      "epoch": 1.1630515163303858,
      "grad_norm": 3.203125,
      "learning_rate": 3.401766915634062e-05,
      "loss": 0.8289,
      "step": 331850
    },
    {
      "epoch": 1.1630865638372814,
      "grad_norm": 3.078125,
      "learning_rate": 3.401702012767692e-05,
      "loss": 0.856,
      "step": 331860
    },
    {
      "epoch": 1.163121611344177,
      "grad_norm": 3.078125,
      "learning_rate": 3.4016371099013214e-05,
      "loss": 0.903,
      "step": 331870
    },
    {
      "epoch": 1.1631566588510727,
      "grad_norm": 2.71875,
      "learning_rate": 3.4015722070349516e-05,
      "loss": 0.8138,
      "step": 331880
    },
    {
      "epoch": 1.1631917063579682,
      "grad_norm": 2.921875,
      "learning_rate": 3.401507304168581e-05,
      "loss": 0.8673,
      "step": 331890
    },
    {
      "epoch": 1.1632267538648637,
      "grad_norm": 2.9375,
      "learning_rate": 3.401442401302211e-05,
      "loss": 0.8347,
      "step": 331900
    },
    {
      "epoch": 1.1632618013717595,
      "grad_norm": 3.3125,
      "learning_rate": 3.401377498435841e-05,
      "loss": 0.7857,
      "step": 331910
    },
    {
      "epoch": 1.163296848878655,
      "grad_norm": 3.109375,
      "learning_rate": 3.401312595569471e-05,
      "loss": 0.9391,
      "step": 331920
    },
    {
      "epoch": 1.1633318963855506,
      "grad_norm": 2.515625,
      "learning_rate": 3.401247692703101e-05,
      "loss": 0.8592,
      "step": 331930
    },
    {
      "epoch": 1.1633669438924463,
      "grad_norm": 2.546875,
      "learning_rate": 3.4011827898367304e-05,
      "loss": 0.917,
      "step": 331940
    },
    {
      "epoch": 1.1634019913993419,
      "grad_norm": 2.84375,
      "learning_rate": 3.4011178869703605e-05,
      "loss": 0.9018,
      "step": 331950
    },
    {
      "epoch": 1.1634370389062374,
      "grad_norm": 2.796875,
      "learning_rate": 3.40105298410399e-05,
      "loss": 0.8247,
      "step": 331960
    },
    {
      "epoch": 1.163472086413133,
      "grad_norm": 3.125,
      "learning_rate": 3.40098808123762e-05,
      "loss": 0.8434,
      "step": 331970
    },
    {
      "epoch": 1.1635071339200287,
      "grad_norm": 2.84375,
      "learning_rate": 3.4009231783712496e-05,
      "loss": 0.8366,
      "step": 331980
    },
    {
      "epoch": 1.1635421814269242,
      "grad_norm": 2.84375,
      "learning_rate": 3.40085827550488e-05,
      "loss": 0.8961,
      "step": 331990
    },
    {
      "epoch": 1.1635772289338198,
      "grad_norm": 2.71875,
      "learning_rate": 3.400793372638509e-05,
      "loss": 0.922,
      "step": 332000
    },
    {
      "epoch": 1.1636122764407153,
      "grad_norm": 3.140625,
      "learning_rate": 3.400728469772139e-05,
      "loss": 0.8284,
      "step": 332010
    },
    {
      "epoch": 1.163647323947611,
      "grad_norm": 3.21875,
      "learning_rate": 3.400663566905769e-05,
      "loss": 0.9065,
      "step": 332020
    },
    {
      "epoch": 1.1636823714545066,
      "grad_norm": 2.953125,
      "learning_rate": 3.400598664039399e-05,
      "loss": 0.9544,
      "step": 332030
    },
    {
      "epoch": 1.1637174189614021,
      "grad_norm": 3.203125,
      "learning_rate": 3.4005337611730284e-05,
      "loss": 0.8975,
      "step": 332040
    },
    {
      "epoch": 1.1637524664682979,
      "grad_norm": 2.859375,
      "learning_rate": 3.4004688583066585e-05,
      "loss": 0.8805,
      "step": 332050
    },
    {
      "epoch": 1.1637875139751934,
      "grad_norm": 2.46875,
      "learning_rate": 3.4004039554402886e-05,
      "loss": 0.8049,
      "step": 332060
    },
    {
      "epoch": 1.163822561482089,
      "grad_norm": 3.078125,
      "learning_rate": 3.400339052573918e-05,
      "loss": 0.8502,
      "step": 332070
    },
    {
      "epoch": 1.1638576089889845,
      "grad_norm": 2.875,
      "learning_rate": 3.400274149707548e-05,
      "loss": 0.8768,
      "step": 332080
    },
    {
      "epoch": 1.1638926564958803,
      "grad_norm": 2.71875,
      "learning_rate": 3.400209246841178e-05,
      "loss": 0.9321,
      "step": 332090
    },
    {
      "epoch": 1.1639277040027758,
      "grad_norm": 3.21875,
      "learning_rate": 3.400144343974808e-05,
      "loss": 0.8138,
      "step": 332100
    },
    {
      "epoch": 1.1639627515096713,
      "grad_norm": 2.546875,
      "learning_rate": 3.400079441108437e-05,
      "loss": 0.8252,
      "step": 332110
    },
    {
      "epoch": 1.1639977990165669,
      "grad_norm": 2.8125,
      "learning_rate": 3.4000145382420674e-05,
      "loss": 0.8332,
      "step": 332120
    },
    {
      "epoch": 1.1640328465234626,
      "grad_norm": 2.671875,
      "learning_rate": 3.399949635375697e-05,
      "loss": 0.9078,
      "step": 332130
    },
    {
      "epoch": 1.1640678940303582,
      "grad_norm": 2.546875,
      "learning_rate": 3.399884732509327e-05,
      "loss": 0.7898,
      "step": 332140
    },
    {
      "epoch": 1.1641029415372537,
      "grad_norm": 2.796875,
      "learning_rate": 3.3998198296429565e-05,
      "loss": 0.8794,
      "step": 332150
    },
    {
      "epoch": 1.1641379890441494,
      "grad_norm": 2.859375,
      "learning_rate": 3.3997549267765866e-05,
      "loss": 0.8975,
      "step": 332160
    },
    {
      "epoch": 1.164173036551045,
      "grad_norm": 2.921875,
      "learning_rate": 3.399690023910216e-05,
      "loss": 0.8221,
      "step": 332170
    },
    {
      "epoch": 1.1642080840579405,
      "grad_norm": 2.546875,
      "learning_rate": 3.3996251210438456e-05,
      "loss": 0.8494,
      "step": 332180
    },
    {
      "epoch": 1.164243131564836,
      "grad_norm": 3.0,
      "learning_rate": 3.399560218177476e-05,
      "loss": 0.9105,
      "step": 332190
    },
    {
      "epoch": 1.1642781790717318,
      "grad_norm": 2.921875,
      "learning_rate": 3.399495315311105e-05,
      "loss": 0.8551,
      "step": 332200
    },
    {
      "epoch": 1.1643132265786273,
      "grad_norm": 2.421875,
      "learning_rate": 3.399430412444735e-05,
      "loss": 0.8837,
      "step": 332210
    },
    {
      "epoch": 1.1643482740855229,
      "grad_norm": 3.0625,
      "learning_rate": 3.399365509578365e-05,
      "loss": 0.8213,
      "step": 332220
    },
    {
      "epoch": 1.1643833215924184,
      "grad_norm": 3.703125,
      "learning_rate": 3.399300606711995e-05,
      "loss": 0.9135,
      "step": 332230
    },
    {
      "epoch": 1.1644183690993142,
      "grad_norm": 2.984375,
      "learning_rate": 3.3992357038456244e-05,
      "loss": 0.852,
      "step": 332240
    },
    {
      "epoch": 1.1644534166062097,
      "grad_norm": 2.546875,
      "learning_rate": 3.3991708009792545e-05,
      "loss": 0.8411,
      "step": 332250
    },
    {
      "epoch": 1.1644884641131052,
      "grad_norm": 2.9375,
      "learning_rate": 3.399105898112884e-05,
      "loss": 0.9022,
      "step": 332260
    },
    {
      "epoch": 1.164523511620001,
      "grad_norm": 2.8125,
      "learning_rate": 3.399040995246514e-05,
      "loss": 0.8305,
      "step": 332270
    },
    {
      "epoch": 1.1645585591268965,
      "grad_norm": 2.953125,
      "learning_rate": 3.398976092380144e-05,
      "loss": 0.8586,
      "step": 332280
    },
    {
      "epoch": 1.164593606633792,
      "grad_norm": 3.34375,
      "learning_rate": 3.398911189513774e-05,
      "loss": 0.8525,
      "step": 332290
    },
    {
      "epoch": 1.1646286541406876,
      "grad_norm": 3.046875,
      "learning_rate": 3.398846286647404e-05,
      "loss": 0.8434,
      "step": 332300
    },
    {
      "epoch": 1.1646637016475834,
      "grad_norm": 2.796875,
      "learning_rate": 3.398781383781033e-05,
      "loss": 0.894,
      "step": 332310
    },
    {
      "epoch": 1.164698749154479,
      "grad_norm": 3.046875,
      "learning_rate": 3.3987164809146634e-05,
      "loss": 0.9278,
      "step": 332320
    },
    {
      "epoch": 1.1647337966613744,
      "grad_norm": 2.921875,
      "learning_rate": 3.398651578048293e-05,
      "loss": 0.8531,
      "step": 332330
    },
    {
      "epoch": 1.16476884416827,
      "grad_norm": 3.375,
      "learning_rate": 3.398586675181923e-05,
      "loss": 0.8621,
      "step": 332340
    },
    {
      "epoch": 1.1648038916751657,
      "grad_norm": 2.71875,
      "learning_rate": 3.3985217723155525e-05,
      "loss": 0.8653,
      "step": 332350
    },
    {
      "epoch": 1.1648389391820613,
      "grad_norm": 2.765625,
      "learning_rate": 3.3984568694491826e-05,
      "loss": 0.9132,
      "step": 332360
    },
    {
      "epoch": 1.1648739866889568,
      "grad_norm": 2.71875,
      "learning_rate": 3.398391966582812e-05,
      "loss": 0.849,
      "step": 332370
    },
    {
      "epoch": 1.1649090341958526,
      "grad_norm": 2.671875,
      "learning_rate": 3.398327063716442e-05,
      "loss": 0.8813,
      "step": 332380
    },
    {
      "epoch": 1.164944081702748,
      "grad_norm": 2.875,
      "learning_rate": 3.398262160850072e-05,
      "loss": 0.8438,
      "step": 332390
    },
    {
      "epoch": 1.1649791292096436,
      "grad_norm": 2.84375,
      "learning_rate": 3.398197257983702e-05,
      "loss": 0.9021,
      "step": 332400
    },
    {
      "epoch": 1.1650141767165392,
      "grad_norm": 3.15625,
      "learning_rate": 3.398132355117332e-05,
      "loss": 0.9777,
      "step": 332410
    },
    {
      "epoch": 1.165049224223435,
      "grad_norm": 2.6875,
      "learning_rate": 3.3980674522509614e-05,
      "loss": 0.8041,
      "step": 332420
    },
    {
      "epoch": 1.1650842717303305,
      "grad_norm": 2.921875,
      "learning_rate": 3.3980025493845916e-05,
      "loss": 0.8262,
      "step": 332430
    },
    {
      "epoch": 1.165119319237226,
      "grad_norm": 3.125,
      "learning_rate": 3.397937646518221e-05,
      "loss": 0.8551,
      "step": 332440
    },
    {
      "epoch": 1.1651543667441215,
      "grad_norm": 3.0,
      "learning_rate": 3.397872743651851e-05,
      "loss": 0.9318,
      "step": 332450
    },
    {
      "epoch": 1.1651894142510173,
      "grad_norm": 2.609375,
      "learning_rate": 3.3978078407854806e-05,
      "loss": 0.8131,
      "step": 332460
    },
    {
      "epoch": 1.1652244617579128,
      "grad_norm": 3.109375,
      "learning_rate": 3.397742937919111e-05,
      "loss": 0.9306,
      "step": 332470
    },
    {
      "epoch": 1.1652595092648084,
      "grad_norm": 2.78125,
      "learning_rate": 3.39767803505274e-05,
      "loss": 0.8121,
      "step": 332480
    },
    {
      "epoch": 1.1652945567717041,
      "grad_norm": 2.734375,
      "learning_rate": 3.3976131321863704e-05,
      "loss": 0.8489,
      "step": 332490
    },
    {
      "epoch": 1.1653296042785997,
      "grad_norm": 2.765625,
      "learning_rate": 3.39754822932e-05,
      "loss": 0.9162,
      "step": 332500
    },
    {
      "epoch": 1.1653646517854952,
      "grad_norm": 2.703125,
      "learning_rate": 3.39748332645363e-05,
      "loss": 0.8184,
      "step": 332510
    },
    {
      "epoch": 1.165399699292391,
      "grad_norm": 2.859375,
      "learning_rate": 3.3974184235872594e-05,
      "loss": 0.8716,
      "step": 332520
    },
    {
      "epoch": 1.1654347467992865,
      "grad_norm": 2.84375,
      "learning_rate": 3.3973535207208896e-05,
      "loss": 0.8448,
      "step": 332530
    },
    {
      "epoch": 1.165469794306182,
      "grad_norm": 2.265625,
      "learning_rate": 3.397288617854519e-05,
      "loss": 0.7851,
      "step": 332540
    },
    {
      "epoch": 1.1655048418130776,
      "grad_norm": 3.078125,
      "learning_rate": 3.397223714988149e-05,
      "loss": 0.9137,
      "step": 332550
    },
    {
      "epoch": 1.165539889319973,
      "grad_norm": 2.9375,
      "learning_rate": 3.3971588121217786e-05,
      "loss": 0.8841,
      "step": 332560
    },
    {
      "epoch": 1.1655749368268689,
      "grad_norm": 3.296875,
      "learning_rate": 3.397093909255408e-05,
      "loss": 0.896,
      "step": 332570
    },
    {
      "epoch": 1.1656099843337644,
      "grad_norm": 2.890625,
      "learning_rate": 3.397029006389038e-05,
      "loss": 0.9325,
      "step": 332580
    },
    {
      "epoch": 1.16564503184066,
      "grad_norm": 2.59375,
      "learning_rate": 3.396964103522668e-05,
      "loss": 0.849,
      "step": 332590
    },
    {
      "epoch": 1.1656800793475557,
      "grad_norm": 2.984375,
      "learning_rate": 3.396899200656298e-05,
      "loss": 0.945,
      "step": 332600
    },
    {
      "epoch": 1.1657151268544512,
      "grad_norm": 2.875,
      "learning_rate": 3.396834297789927e-05,
      "loss": 0.8147,
      "step": 332610
    },
    {
      "epoch": 1.1657501743613468,
      "grad_norm": 2.828125,
      "learning_rate": 3.3967693949235574e-05,
      "loss": 0.7641,
      "step": 332620
    },
    {
      "epoch": 1.1657852218682425,
      "grad_norm": 3.109375,
      "learning_rate": 3.396704492057187e-05,
      "loss": 0.8873,
      "step": 332630
    },
    {
      "epoch": 1.165820269375138,
      "grad_norm": 2.578125,
      "learning_rate": 3.396639589190817e-05,
      "loss": 0.8285,
      "step": 332640
    },
    {
      "epoch": 1.1658553168820336,
      "grad_norm": 2.9375,
      "learning_rate": 3.396574686324447e-05,
      "loss": 0.9221,
      "step": 332650
    },
    {
      "epoch": 1.1658903643889291,
      "grad_norm": 3.09375,
      "learning_rate": 3.3965097834580766e-05,
      "loss": 0.9171,
      "step": 332660
    },
    {
      "epoch": 1.1659254118958249,
      "grad_norm": 2.78125,
      "learning_rate": 3.396444880591707e-05,
      "loss": 0.8936,
      "step": 332670
    },
    {
      "epoch": 1.1659604594027204,
      "grad_norm": 3.03125,
      "learning_rate": 3.396379977725336e-05,
      "loss": 0.9121,
      "step": 332680
    },
    {
      "epoch": 1.165995506909616,
      "grad_norm": 2.5625,
      "learning_rate": 3.3963150748589664e-05,
      "loss": 0.9713,
      "step": 332690
    },
    {
      "epoch": 1.1660305544165115,
      "grad_norm": 2.78125,
      "learning_rate": 3.396250171992596e-05,
      "loss": 0.9468,
      "step": 332700
    },
    {
      "epoch": 1.1660656019234072,
      "grad_norm": 2.90625,
      "learning_rate": 3.396185269126226e-05,
      "loss": 0.8977,
      "step": 332710
    },
    {
      "epoch": 1.1661006494303028,
      "grad_norm": 2.59375,
      "learning_rate": 3.3961203662598554e-05,
      "loss": 0.8428,
      "step": 332720
    },
    {
      "epoch": 1.1661356969371983,
      "grad_norm": 3.09375,
      "learning_rate": 3.3960554633934856e-05,
      "loss": 0.8268,
      "step": 332730
    },
    {
      "epoch": 1.166170744444094,
      "grad_norm": 2.890625,
      "learning_rate": 3.395990560527115e-05,
      "loss": 0.9529,
      "step": 332740
    },
    {
      "epoch": 1.1662057919509896,
      "grad_norm": 2.75,
      "learning_rate": 3.395925657660745e-05,
      "loss": 0.8743,
      "step": 332750
    },
    {
      "epoch": 1.1662408394578851,
      "grad_norm": 2.515625,
      "learning_rate": 3.3958607547943746e-05,
      "loss": 0.8423,
      "step": 332760
    },
    {
      "epoch": 1.1662758869647807,
      "grad_norm": 3.40625,
      "learning_rate": 3.395795851928005e-05,
      "loss": 0.9053,
      "step": 332770
    },
    {
      "epoch": 1.1663109344716764,
      "grad_norm": 2.390625,
      "learning_rate": 3.395730949061635e-05,
      "loss": 0.8211,
      "step": 332780
    },
    {
      "epoch": 1.166345981978572,
      "grad_norm": 2.90625,
      "learning_rate": 3.3956660461952644e-05,
      "loss": 0.8169,
      "step": 332790
    },
    {
      "epoch": 1.1663810294854675,
      "grad_norm": 3.109375,
      "learning_rate": 3.3956011433288945e-05,
      "loss": 0.8552,
      "step": 332800
    },
    {
      "epoch": 1.166416076992363,
      "grad_norm": 3.109375,
      "learning_rate": 3.395536240462524e-05,
      "loss": 0.8435,
      "step": 332810
    },
    {
      "epoch": 1.1664511244992588,
      "grad_norm": 3.171875,
      "learning_rate": 3.395471337596154e-05,
      "loss": 0.8864,
      "step": 332820
    },
    {
      "epoch": 1.1664861720061543,
      "grad_norm": 2.46875,
      "learning_rate": 3.3954064347297836e-05,
      "loss": 0.7876,
      "step": 332830
    },
    {
      "epoch": 1.1665212195130499,
      "grad_norm": 2.375,
      "learning_rate": 3.395341531863414e-05,
      "loss": 0.8442,
      "step": 332840
    },
    {
      "epoch": 1.1665562670199456,
      "grad_norm": 2.484375,
      "learning_rate": 3.395276628997043e-05,
      "loss": 0.9077,
      "step": 332850
    },
    {
      "epoch": 1.1665913145268412,
      "grad_norm": 2.46875,
      "learning_rate": 3.395211726130673e-05,
      "loss": 0.7118,
      "step": 332860
    },
    {
      "epoch": 1.1666263620337367,
      "grad_norm": 3.328125,
      "learning_rate": 3.395146823264303e-05,
      "loss": 0.908,
      "step": 332870
    },
    {
      "epoch": 1.1666614095406322,
      "grad_norm": 3.328125,
      "learning_rate": 3.395081920397933e-05,
      "loss": 0.8139,
      "step": 332880
    },
    {
      "epoch": 1.166696457047528,
      "grad_norm": 2.671875,
      "learning_rate": 3.3950170175315624e-05,
      "loss": 0.8206,
      "step": 332890
    },
    {
      "epoch": 1.1667315045544235,
      "grad_norm": 3.015625,
      "learning_rate": 3.3949521146651925e-05,
      "loss": 0.9056,
      "step": 332900
    },
    {
      "epoch": 1.166766552061319,
      "grad_norm": 3.0625,
      "learning_rate": 3.394887211798822e-05,
      "loss": 0.867,
      "step": 332910
    },
    {
      "epoch": 1.1668015995682146,
      "grad_norm": 2.796875,
      "learning_rate": 3.394822308932452e-05,
      "loss": 0.8069,
      "step": 332920
    },
    {
      "epoch": 1.1668366470751104,
      "grad_norm": 2.890625,
      "learning_rate": 3.3947574060660816e-05,
      "loss": 0.8759,
      "step": 332930
    },
    {
      "epoch": 1.166871694582006,
      "grad_norm": 2.953125,
      "learning_rate": 3.394692503199711e-05,
      "loss": 0.8808,
      "step": 332940
    },
    {
      "epoch": 1.1669067420889014,
      "grad_norm": 2.625,
      "learning_rate": 3.394627600333341e-05,
      "loss": 0.8499,
      "step": 332950
    },
    {
      "epoch": 1.1669417895957972,
      "grad_norm": 2.640625,
      "learning_rate": 3.3945626974669706e-05,
      "loss": 0.7596,
      "step": 332960
    },
    {
      "epoch": 1.1669768371026927,
      "grad_norm": 3.171875,
      "learning_rate": 3.394497794600601e-05,
      "loss": 0.9241,
      "step": 332970
    },
    {
      "epoch": 1.1670118846095883,
      "grad_norm": 2.453125,
      "learning_rate": 3.39443289173423e-05,
      "loss": 0.8416,
      "step": 332980
    },
    {
      "epoch": 1.1670469321164838,
      "grad_norm": 2.734375,
      "learning_rate": 3.3943679888678604e-05,
      "loss": 0.908,
      "step": 332990
    },
    {
      "epoch": 1.1670819796233796,
      "grad_norm": 2.703125,
      "learning_rate": 3.39430308600149e-05,
      "loss": 0.8796,
      "step": 333000
    },
    {
      "epoch": 1.167117027130275,
      "grad_norm": 2.6875,
      "learning_rate": 3.39423818313512e-05,
      "loss": 0.8977,
      "step": 333010
    },
    {
      "epoch": 1.1671520746371706,
      "grad_norm": 3.015625,
      "learning_rate": 3.39417328026875e-05,
      "loss": 0.8534,
      "step": 333020
    },
    {
      "epoch": 1.1671871221440662,
      "grad_norm": 3.484375,
      "learning_rate": 3.3941083774023796e-05,
      "loss": 0.8711,
      "step": 333030
    },
    {
      "epoch": 1.167222169650962,
      "grad_norm": 3.171875,
      "learning_rate": 3.39404347453601e-05,
      "loss": 0.8645,
      "step": 333040
    },
    {
      "epoch": 1.1672572171578575,
      "grad_norm": 2.78125,
      "learning_rate": 3.393978571669639e-05,
      "loss": 0.9749,
      "step": 333050
    },
    {
      "epoch": 1.167292264664753,
      "grad_norm": 2.484375,
      "learning_rate": 3.393913668803269e-05,
      "loss": 0.8682,
      "step": 333060
    },
    {
      "epoch": 1.1673273121716488,
      "grad_norm": 2.71875,
      "learning_rate": 3.393848765936899e-05,
      "loss": 0.8685,
      "step": 333070
    },
    {
      "epoch": 1.1673623596785443,
      "grad_norm": 3.453125,
      "learning_rate": 3.393783863070529e-05,
      "loss": 0.9131,
      "step": 333080
    },
    {
      "epoch": 1.1673974071854398,
      "grad_norm": 2.75,
      "learning_rate": 3.3937189602041584e-05,
      "loss": 0.8019,
      "step": 333090
    },
    {
      "epoch": 1.1674324546923354,
      "grad_norm": 2.765625,
      "learning_rate": 3.3936540573377885e-05,
      "loss": 0.8893,
      "step": 333100
    },
    {
      "epoch": 1.1674675021992311,
      "grad_norm": 3.0,
      "learning_rate": 3.393589154471418e-05,
      "loss": 0.8376,
      "step": 333110
    },
    {
      "epoch": 1.1675025497061267,
      "grad_norm": 3.25,
      "learning_rate": 3.393524251605048e-05,
      "loss": 0.8716,
      "step": 333120
    },
    {
      "epoch": 1.1675375972130222,
      "grad_norm": 3.046875,
      "learning_rate": 3.3934593487386776e-05,
      "loss": 0.778,
      "step": 333130
    },
    {
      "epoch": 1.1675726447199177,
      "grad_norm": 3.203125,
      "learning_rate": 3.393394445872308e-05,
      "loss": 0.9039,
      "step": 333140
    },
    {
      "epoch": 1.1676076922268135,
      "grad_norm": 3.25,
      "learning_rate": 3.393329543005938e-05,
      "loss": 0.9073,
      "step": 333150
    },
    {
      "epoch": 1.167642739733709,
      "grad_norm": 2.484375,
      "learning_rate": 3.393264640139567e-05,
      "loss": 0.8462,
      "step": 333160
    },
    {
      "epoch": 1.1676777872406046,
      "grad_norm": 2.765625,
      "learning_rate": 3.3931997372731974e-05,
      "loss": 0.9247,
      "step": 333170
    },
    {
      "epoch": 1.1677128347475003,
      "grad_norm": 2.78125,
      "learning_rate": 3.393134834406827e-05,
      "loss": 0.8985,
      "step": 333180
    },
    {
      "epoch": 1.1677478822543959,
      "grad_norm": 3.328125,
      "learning_rate": 3.393069931540457e-05,
      "loss": 0.932,
      "step": 333190
    },
    {
      "epoch": 1.1677829297612914,
      "grad_norm": 3.140625,
      "learning_rate": 3.3930050286740865e-05,
      "loss": 0.843,
      "step": 333200
    },
    {
      "epoch": 1.1678179772681871,
      "grad_norm": 3.5625,
      "learning_rate": 3.3929401258077166e-05,
      "loss": 0.8469,
      "step": 333210
    },
    {
      "epoch": 1.1678530247750827,
      "grad_norm": 3.15625,
      "learning_rate": 3.392875222941346e-05,
      "loss": 0.8579,
      "step": 333220
    },
    {
      "epoch": 1.1678880722819782,
      "grad_norm": 3.0,
      "learning_rate": 3.392810320074976e-05,
      "loss": 0.8033,
      "step": 333230
    },
    {
      "epoch": 1.1679231197888738,
      "grad_norm": 2.5625,
      "learning_rate": 3.392745417208606e-05,
      "loss": 0.9015,
      "step": 333240
    },
    {
      "epoch": 1.1679581672957695,
      "grad_norm": 3.078125,
      "learning_rate": 3.392680514342236e-05,
      "loss": 0.8771,
      "step": 333250
    },
    {
      "epoch": 1.167993214802665,
      "grad_norm": 2.5625,
      "learning_rate": 3.392615611475865e-05,
      "loss": 0.8697,
      "step": 333260
    },
    {
      "epoch": 1.1680282623095606,
      "grad_norm": 2.59375,
      "learning_rate": 3.3925507086094954e-05,
      "loss": 0.8811,
      "step": 333270
    },
    {
      "epoch": 1.1680633098164561,
      "grad_norm": 2.96875,
      "learning_rate": 3.3924858057431256e-05,
      "loss": 0.8347,
      "step": 333280
    },
    {
      "epoch": 1.1680983573233519,
      "grad_norm": 2.734375,
      "learning_rate": 3.392420902876755e-05,
      "loss": 0.762,
      "step": 333290
    },
    {
      "epoch": 1.1681334048302474,
      "grad_norm": 2.953125,
      "learning_rate": 3.392356000010385e-05,
      "loss": 0.868,
      "step": 333300
    },
    {
      "epoch": 1.168168452337143,
      "grad_norm": 2.84375,
      "learning_rate": 3.392291097144014e-05,
      "loss": 0.9349,
      "step": 333310
    },
    {
      "epoch": 1.1682034998440387,
      "grad_norm": 2.453125,
      "learning_rate": 3.392226194277644e-05,
      "loss": 0.831,
      "step": 333320
    },
    {
      "epoch": 1.1682385473509342,
      "grad_norm": 2.90625,
      "learning_rate": 3.3921612914112736e-05,
      "loss": 0.8862,
      "step": 333330
    },
    {
      "epoch": 1.1682735948578298,
      "grad_norm": 3.1875,
      "learning_rate": 3.392096388544904e-05,
      "loss": 0.909,
      "step": 333340
    },
    {
      "epoch": 1.1683086423647253,
      "grad_norm": 2.828125,
      "learning_rate": 3.392031485678533e-05,
      "loss": 0.9125,
      "step": 333350
    },
    {
      "epoch": 1.168343689871621,
      "grad_norm": 2.53125,
      "learning_rate": 3.391966582812163e-05,
      "loss": 0.8844,
      "step": 333360
    },
    {
      "epoch": 1.1683787373785166,
      "grad_norm": 2.890625,
      "learning_rate": 3.3919016799457934e-05,
      "loss": 0.8549,
      "step": 333370
    },
    {
      "epoch": 1.1684137848854121,
      "grad_norm": 2.828125,
      "learning_rate": 3.391836777079423e-05,
      "loss": 0.9266,
      "step": 333380
    },
    {
      "epoch": 1.1684488323923077,
      "grad_norm": 3.296875,
      "learning_rate": 3.391771874213053e-05,
      "loss": 0.9289,
      "step": 333390
    },
    {
      "epoch": 1.1684838798992034,
      "grad_norm": 2.625,
      "learning_rate": 3.3917069713466825e-05,
      "loss": 0.9015,
      "step": 333400
    },
    {
      "epoch": 1.168518927406099,
      "grad_norm": 2.5625,
      "learning_rate": 3.3916420684803126e-05,
      "loss": 0.9089,
      "step": 333410
    },
    {
      "epoch": 1.1685539749129945,
      "grad_norm": 3.078125,
      "learning_rate": 3.391577165613942e-05,
      "loss": 0.8529,
      "step": 333420
    },
    {
      "epoch": 1.1685890224198903,
      "grad_norm": 2.6875,
      "learning_rate": 3.391512262747572e-05,
      "loss": 0.8355,
      "step": 333430
    },
    {
      "epoch": 1.1686240699267858,
      "grad_norm": 2.671875,
      "learning_rate": 3.391447359881202e-05,
      "loss": 0.8588,
      "step": 333440
    },
    {
      "epoch": 1.1686591174336813,
      "grad_norm": 3.015625,
      "learning_rate": 3.391382457014832e-05,
      "loss": 0.8781,
      "step": 333450
    },
    {
      "epoch": 1.1686941649405769,
      "grad_norm": 2.859375,
      "learning_rate": 3.391317554148461e-05,
      "loss": 0.9145,
      "step": 333460
    },
    {
      "epoch": 1.1687292124474726,
      "grad_norm": 2.828125,
      "learning_rate": 3.3912526512820914e-05,
      "loss": 0.8223,
      "step": 333470
    },
    {
      "epoch": 1.1687642599543682,
      "grad_norm": 2.90625,
      "learning_rate": 3.391187748415721e-05,
      "loss": 0.8512,
      "step": 333480
    },
    {
      "epoch": 1.1687993074612637,
      "grad_norm": 3.234375,
      "learning_rate": 3.391122845549351e-05,
      "loss": 0.8422,
      "step": 333490
    },
    {
      "epoch": 1.1688343549681592,
      "grad_norm": 2.828125,
      "learning_rate": 3.3910579426829805e-05,
      "loss": 0.8712,
      "step": 333500
    },
    {
      "epoch": 1.168869402475055,
      "grad_norm": 2.921875,
      "learning_rate": 3.3909930398166106e-05,
      "loss": 0.8275,
      "step": 333510
    },
    {
      "epoch": 1.1689044499819505,
      "grad_norm": 4.40625,
      "learning_rate": 3.390928136950241e-05,
      "loss": 0.9285,
      "step": 333520
    },
    {
      "epoch": 1.168939497488846,
      "grad_norm": 2.8125,
      "learning_rate": 3.39086323408387e-05,
      "loss": 0.8743,
      "step": 333530
    },
    {
      "epoch": 1.1689745449957418,
      "grad_norm": 3.28125,
      "learning_rate": 3.3907983312175004e-05,
      "loss": 0.9267,
      "step": 333540
    },
    {
      "epoch": 1.1690095925026374,
      "grad_norm": 2.6875,
      "learning_rate": 3.39073342835113e-05,
      "loss": 0.8271,
      "step": 333550
    },
    {
      "epoch": 1.169044640009533,
      "grad_norm": 3.25,
      "learning_rate": 3.39066852548476e-05,
      "loss": 0.8488,
      "step": 333560
    },
    {
      "epoch": 1.1690796875164284,
      "grad_norm": 3.265625,
      "learning_rate": 3.3906036226183894e-05,
      "loss": 0.9103,
      "step": 333570
    },
    {
      "epoch": 1.1691147350233242,
      "grad_norm": 2.5,
      "learning_rate": 3.3905387197520196e-05,
      "loss": 0.878,
      "step": 333580
    },
    {
      "epoch": 1.1691497825302197,
      "grad_norm": 3.125,
      "learning_rate": 3.390473816885649e-05,
      "loss": 0.8382,
      "step": 333590
    },
    {
      "epoch": 1.1691848300371153,
      "grad_norm": 3.328125,
      "learning_rate": 3.390408914019279e-05,
      "loss": 0.8545,
      "step": 333600
    },
    {
      "epoch": 1.1692198775440108,
      "grad_norm": 2.703125,
      "learning_rate": 3.3903440111529086e-05,
      "loss": 0.8765,
      "step": 333610
    },
    {
      "epoch": 1.1692549250509066,
      "grad_norm": 2.671875,
      "learning_rate": 3.390279108286539e-05,
      "loss": 0.873,
      "step": 333620
    },
    {
      "epoch": 1.169289972557802,
      "grad_norm": 3.015625,
      "learning_rate": 3.390214205420168e-05,
      "loss": 0.8541,
      "step": 333630
    },
    {
      "epoch": 1.1693250200646976,
      "grad_norm": 2.84375,
      "learning_rate": 3.3901493025537984e-05,
      "loss": 0.889,
      "step": 333640
    },
    {
      "epoch": 1.1693600675715934,
      "grad_norm": 2.828125,
      "learning_rate": 3.3900843996874285e-05,
      "loss": 0.7895,
      "step": 333650
    },
    {
      "epoch": 1.169395115078489,
      "grad_norm": 2.953125,
      "learning_rate": 3.390019496821058e-05,
      "loss": 0.926,
      "step": 333660
    },
    {
      "epoch": 1.1694301625853845,
      "grad_norm": 2.953125,
      "learning_rate": 3.389954593954688e-05,
      "loss": 0.8391,
      "step": 333670
    },
    {
      "epoch": 1.16946521009228,
      "grad_norm": 3.09375,
      "learning_rate": 3.389889691088317e-05,
      "loss": 0.8927,
      "step": 333680
    },
    {
      "epoch": 1.1695002575991758,
      "grad_norm": 3.171875,
      "learning_rate": 3.389824788221947e-05,
      "loss": 0.8901,
      "step": 333690
    },
    {
      "epoch": 1.1695353051060713,
      "grad_norm": 2.796875,
      "learning_rate": 3.3897598853555765e-05,
      "loss": 0.793,
      "step": 333700
    },
    {
      "epoch": 1.1695703526129668,
      "grad_norm": 2.609375,
      "learning_rate": 3.3896949824892066e-05,
      "loss": 0.9251,
      "step": 333710
    },
    {
      "epoch": 1.1696054001198624,
      "grad_norm": 2.90625,
      "learning_rate": 3.389630079622836e-05,
      "loss": 0.8641,
      "step": 333720
    },
    {
      "epoch": 1.1696404476267581,
      "grad_norm": 2.71875,
      "learning_rate": 3.389565176756466e-05,
      "loss": 0.9377,
      "step": 333730
    },
    {
      "epoch": 1.1696754951336537,
      "grad_norm": 2.78125,
      "learning_rate": 3.3895002738900964e-05,
      "loss": 0.7744,
      "step": 333740
    },
    {
      "epoch": 1.1697105426405492,
      "grad_norm": 3.25,
      "learning_rate": 3.389435371023726e-05,
      "loss": 0.9215,
      "step": 333750
    },
    {
      "epoch": 1.169745590147445,
      "grad_norm": 2.671875,
      "learning_rate": 3.389370468157356e-05,
      "loss": 0.7686,
      "step": 333760
    },
    {
      "epoch": 1.1697806376543405,
      "grad_norm": 3.09375,
      "learning_rate": 3.3893055652909854e-05,
      "loss": 0.8227,
      "step": 333770
    },
    {
      "epoch": 1.169815685161236,
      "grad_norm": 2.875,
      "learning_rate": 3.3892406624246156e-05,
      "loss": 0.8521,
      "step": 333780
    },
    {
      "epoch": 1.1698507326681318,
      "grad_norm": 2.5625,
      "learning_rate": 3.389175759558245e-05,
      "loss": 0.8061,
      "step": 333790
    },
    {
      "epoch": 1.1698857801750273,
      "grad_norm": 2.890625,
      "learning_rate": 3.389110856691875e-05,
      "loss": 0.8086,
      "step": 333800
    },
    {
      "epoch": 1.1699208276819228,
      "grad_norm": 3.078125,
      "learning_rate": 3.3890459538255046e-05,
      "loss": 0.7968,
      "step": 333810
    },
    {
      "epoch": 1.1699558751888184,
      "grad_norm": 2.671875,
      "learning_rate": 3.388981050959135e-05,
      "loss": 0.8763,
      "step": 333820
    },
    {
      "epoch": 1.169990922695714,
      "grad_norm": 3.125,
      "learning_rate": 3.388916148092764e-05,
      "loss": 0.8864,
      "step": 333830
    },
    {
      "epoch": 1.1700259702026097,
      "grad_norm": 2.828125,
      "learning_rate": 3.3888512452263944e-05,
      "loss": 0.8884,
      "step": 333840
    },
    {
      "epoch": 1.1700610177095052,
      "grad_norm": 3.484375,
      "learning_rate": 3.388786342360024e-05,
      "loss": 0.8749,
      "step": 333850
    },
    {
      "epoch": 1.1700960652164007,
      "grad_norm": 3.0625,
      "learning_rate": 3.388721439493654e-05,
      "loss": 0.8499,
      "step": 333860
    },
    {
      "epoch": 1.1701311127232965,
      "grad_norm": 3.125,
      "learning_rate": 3.3886565366272834e-05,
      "loss": 0.9159,
      "step": 333870
    },
    {
      "epoch": 1.170166160230192,
      "grad_norm": 2.84375,
      "learning_rate": 3.3885916337609136e-05,
      "loss": 0.8454,
      "step": 333880
    },
    {
      "epoch": 1.1702012077370876,
      "grad_norm": 3.09375,
      "learning_rate": 3.388526730894544e-05,
      "loss": 0.9551,
      "step": 333890
    },
    {
      "epoch": 1.1702362552439833,
      "grad_norm": 2.84375,
      "learning_rate": 3.388461828028173e-05,
      "loss": 0.8526,
      "step": 333900
    },
    {
      "epoch": 1.1702713027508789,
      "grad_norm": 3.21875,
      "learning_rate": 3.388396925161803e-05,
      "loss": 0.9656,
      "step": 333910
    },
    {
      "epoch": 1.1703063502577744,
      "grad_norm": 2.484375,
      "learning_rate": 3.388332022295433e-05,
      "loss": 0.9087,
      "step": 333920
    },
    {
      "epoch": 1.17034139776467,
      "grad_norm": 3.015625,
      "learning_rate": 3.388267119429063e-05,
      "loss": 0.8509,
      "step": 333930
    },
    {
      "epoch": 1.1703764452715657,
      "grad_norm": 2.640625,
      "learning_rate": 3.3882022165626924e-05,
      "loss": 0.9122,
      "step": 333940
    },
    {
      "epoch": 1.1704114927784612,
      "grad_norm": 3.15625,
      "learning_rate": 3.3881373136963225e-05,
      "loss": 0.8845,
      "step": 333950
    },
    {
      "epoch": 1.1704465402853568,
      "grad_norm": 3.09375,
      "learning_rate": 3.388072410829952e-05,
      "loss": 0.9281,
      "step": 333960
    },
    {
      "epoch": 1.1704815877922523,
      "grad_norm": 2.921875,
      "learning_rate": 3.388007507963582e-05,
      "loss": 0.9272,
      "step": 333970
    },
    {
      "epoch": 1.170516635299148,
      "grad_norm": 2.96875,
      "learning_rate": 3.3879426050972116e-05,
      "loss": 0.9308,
      "step": 333980
    },
    {
      "epoch": 1.1705516828060436,
      "grad_norm": 3.125,
      "learning_rate": 3.387877702230842e-05,
      "loss": 0.7963,
      "step": 333990
    },
    {
      "epoch": 1.1705867303129391,
      "grad_norm": 2.578125,
      "learning_rate": 3.387812799364471e-05,
      "loss": 0.8442,
      "step": 334000
    },
    {
      "epoch": 1.170621777819835,
      "grad_norm": 2.609375,
      "learning_rate": 3.387747896498101e-05,
      "loss": 0.8707,
      "step": 334010
    },
    {
      "epoch": 1.1706568253267304,
      "grad_norm": 2.671875,
      "learning_rate": 3.3876829936317315e-05,
      "loss": 0.832,
      "step": 334020
    },
    {
      "epoch": 1.170691872833626,
      "grad_norm": 2.796875,
      "learning_rate": 3.387618090765361e-05,
      "loss": 0.9176,
      "step": 334030
    },
    {
      "epoch": 1.1707269203405215,
      "grad_norm": 3.1875,
      "learning_rate": 3.387553187898991e-05,
      "loss": 0.8292,
      "step": 334040
    },
    {
      "epoch": 1.1707619678474173,
      "grad_norm": 2.328125,
      "learning_rate": 3.3874882850326205e-05,
      "loss": 0.7916,
      "step": 334050
    },
    {
      "epoch": 1.1707970153543128,
      "grad_norm": 3.15625,
      "learning_rate": 3.38742338216625e-05,
      "loss": 0.8378,
      "step": 334060
    },
    {
      "epoch": 1.1708320628612083,
      "grad_norm": 3.25,
      "learning_rate": 3.3873584792998794e-05,
      "loss": 0.9135,
      "step": 334070
    },
    {
      "epoch": 1.1708671103681039,
      "grad_norm": 2.859375,
      "learning_rate": 3.3872935764335096e-05,
      "loss": 0.8487,
      "step": 334080
    },
    {
      "epoch": 1.1709021578749996,
      "grad_norm": 3.0,
      "learning_rate": 3.387228673567139e-05,
      "loss": 0.8542,
      "step": 334090
    },
    {
      "epoch": 1.1709372053818952,
      "grad_norm": 3.09375,
      "learning_rate": 3.387163770700769e-05,
      "loss": 0.7985,
      "step": 334100
    },
    {
      "epoch": 1.1709722528887907,
      "grad_norm": 2.8125,
      "learning_rate": 3.387098867834399e-05,
      "loss": 0.8572,
      "step": 334110
    },
    {
      "epoch": 1.1710073003956865,
      "grad_norm": 2.53125,
      "learning_rate": 3.387033964968029e-05,
      "loss": 0.8225,
      "step": 334120
    },
    {
      "epoch": 1.171042347902582,
      "grad_norm": 3.25,
      "learning_rate": 3.386969062101659e-05,
      "loss": 0.8736,
      "step": 334130
    },
    {
      "epoch": 1.1710773954094775,
      "grad_norm": 3.046875,
      "learning_rate": 3.3869041592352884e-05,
      "loss": 0.8785,
      "step": 334140
    },
    {
      "epoch": 1.171112442916373,
      "grad_norm": 3.71875,
      "learning_rate": 3.3868392563689185e-05,
      "loss": 0.9093,
      "step": 334150
    },
    {
      "epoch": 1.1711474904232688,
      "grad_norm": 2.875,
      "learning_rate": 3.386774353502548e-05,
      "loss": 0.8795,
      "step": 334160
    },
    {
      "epoch": 1.1711825379301644,
      "grad_norm": 3.15625,
      "learning_rate": 3.386709450636178e-05,
      "loss": 0.8107,
      "step": 334170
    },
    {
      "epoch": 1.17121758543706,
      "grad_norm": 2.84375,
      "learning_rate": 3.3866445477698076e-05,
      "loss": 0.8428,
      "step": 334180
    },
    {
      "epoch": 1.1712526329439554,
      "grad_norm": 3.015625,
      "learning_rate": 3.386579644903438e-05,
      "loss": 0.8385,
      "step": 334190
    },
    {
      "epoch": 1.1712876804508512,
      "grad_norm": 3.171875,
      "learning_rate": 3.386514742037067e-05,
      "loss": 0.8822,
      "step": 334200
    },
    {
      "epoch": 1.1713227279577467,
      "grad_norm": 2.546875,
      "learning_rate": 3.386449839170697e-05,
      "loss": 0.8496,
      "step": 334210
    },
    {
      "epoch": 1.1713577754646423,
      "grad_norm": 2.390625,
      "learning_rate": 3.386384936304327e-05,
      "loss": 0.8756,
      "step": 334220
    },
    {
      "epoch": 1.171392822971538,
      "grad_norm": 2.8125,
      "learning_rate": 3.386320033437957e-05,
      "loss": 0.8501,
      "step": 334230
    },
    {
      "epoch": 1.1714278704784336,
      "grad_norm": 3.265625,
      "learning_rate": 3.3862551305715864e-05,
      "loss": 0.8847,
      "step": 334240
    },
    {
      "epoch": 1.171462917985329,
      "grad_norm": 3.125,
      "learning_rate": 3.3861902277052165e-05,
      "loss": 0.8981,
      "step": 334250
    },
    {
      "epoch": 1.1714979654922246,
      "grad_norm": 3.296875,
      "learning_rate": 3.3861253248388467e-05,
      "loss": 0.9855,
      "step": 334260
    },
    {
      "epoch": 1.1715330129991204,
      "grad_norm": 2.515625,
      "learning_rate": 3.386060421972476e-05,
      "loss": 0.8341,
      "step": 334270
    },
    {
      "epoch": 1.171568060506016,
      "grad_norm": 2.265625,
      "learning_rate": 3.385995519106106e-05,
      "loss": 0.8667,
      "step": 334280
    },
    {
      "epoch": 1.1716031080129115,
      "grad_norm": 2.6875,
      "learning_rate": 3.385930616239736e-05,
      "loss": 0.8855,
      "step": 334290
    },
    {
      "epoch": 1.171638155519807,
      "grad_norm": 3.0,
      "learning_rate": 3.385865713373366e-05,
      "loss": 0.8472,
      "step": 334300
    },
    {
      "epoch": 1.1716732030267027,
      "grad_norm": 2.75,
      "learning_rate": 3.385800810506995e-05,
      "loss": 0.8469,
      "step": 334310
    },
    {
      "epoch": 1.1717082505335983,
      "grad_norm": 2.8125,
      "learning_rate": 3.3857359076406255e-05,
      "loss": 0.9495,
      "step": 334320
    },
    {
      "epoch": 1.1717432980404938,
      "grad_norm": 2.875,
      "learning_rate": 3.385671004774255e-05,
      "loss": 0.8351,
      "step": 334330
    },
    {
      "epoch": 1.1717783455473896,
      "grad_norm": 3.171875,
      "learning_rate": 3.385606101907885e-05,
      "loss": 0.8634,
      "step": 334340
    },
    {
      "epoch": 1.1718133930542851,
      "grad_norm": 2.984375,
      "learning_rate": 3.3855411990415145e-05,
      "loss": 0.8699,
      "step": 334350
    },
    {
      "epoch": 1.1718484405611806,
      "grad_norm": 2.734375,
      "learning_rate": 3.3854762961751447e-05,
      "loss": 0.9024,
      "step": 334360
    },
    {
      "epoch": 1.1718834880680762,
      "grad_norm": 2.890625,
      "learning_rate": 3.385411393308774e-05,
      "loss": 0.9158,
      "step": 334370
    },
    {
      "epoch": 1.171918535574972,
      "grad_norm": 2.765625,
      "learning_rate": 3.385346490442404e-05,
      "loss": 0.8399,
      "step": 334380
    },
    {
      "epoch": 1.1719535830818675,
      "grad_norm": 3.0,
      "learning_rate": 3.3852815875760344e-05,
      "loss": 0.9637,
      "step": 334390
    },
    {
      "epoch": 1.171988630588763,
      "grad_norm": 2.75,
      "learning_rate": 3.385216684709664e-05,
      "loss": 0.86,
      "step": 334400
    },
    {
      "epoch": 1.1720236780956585,
      "grad_norm": 3.03125,
      "learning_rate": 3.385151781843294e-05,
      "loss": 0.805,
      "step": 334410
    },
    {
      "epoch": 1.1720587256025543,
      "grad_norm": 2.71875,
      "learning_rate": 3.3850868789769235e-05,
      "loss": 0.8037,
      "step": 334420
    },
    {
      "epoch": 1.1720937731094498,
      "grad_norm": 2.609375,
      "learning_rate": 3.3850219761105536e-05,
      "loss": 0.8798,
      "step": 334430
    },
    {
      "epoch": 1.1721288206163454,
      "grad_norm": 2.65625,
      "learning_rate": 3.3849570732441824e-05,
      "loss": 0.8483,
      "step": 334440
    },
    {
      "epoch": 1.1721638681232411,
      "grad_norm": 3.046875,
      "learning_rate": 3.3848921703778125e-05,
      "loss": 0.8557,
      "step": 334450
    },
    {
      "epoch": 1.1721989156301367,
      "grad_norm": 2.515625,
      "learning_rate": 3.384827267511442e-05,
      "loss": 0.836,
      "step": 334460
    },
    {
      "epoch": 1.1722339631370322,
      "grad_norm": 2.921875,
      "learning_rate": 3.384762364645072e-05,
      "loss": 0.846,
      "step": 334470
    },
    {
      "epoch": 1.172269010643928,
      "grad_norm": 3.359375,
      "learning_rate": 3.384697461778702e-05,
      "loss": 0.8514,
      "step": 334480
    },
    {
      "epoch": 1.1723040581508235,
      "grad_norm": 2.4375,
      "learning_rate": 3.384632558912332e-05,
      "loss": 0.8936,
      "step": 334490
    },
    {
      "epoch": 1.172339105657719,
      "grad_norm": 3.421875,
      "learning_rate": 3.384567656045962e-05,
      "loss": 0.9406,
      "step": 334500
    },
    {
      "epoch": 1.1723741531646146,
      "grad_norm": 2.875,
      "learning_rate": 3.384502753179591e-05,
      "loss": 0.8281,
      "step": 334510
    },
    {
      "epoch": 1.17240920067151,
      "grad_norm": 3.296875,
      "learning_rate": 3.3844378503132215e-05,
      "loss": 0.8963,
      "step": 334520
    },
    {
      "epoch": 1.1724442481784059,
      "grad_norm": 2.71875,
      "learning_rate": 3.384372947446851e-05,
      "loss": 0.868,
      "step": 334530
    },
    {
      "epoch": 1.1724792956853014,
      "grad_norm": 2.859375,
      "learning_rate": 3.384308044580481e-05,
      "loss": 0.8216,
      "step": 334540
    },
    {
      "epoch": 1.172514343192197,
      "grad_norm": 2.875,
      "learning_rate": 3.3842431417141105e-05,
      "loss": 0.9097,
      "step": 334550
    },
    {
      "epoch": 1.1725493906990927,
      "grad_norm": 2.28125,
      "learning_rate": 3.3841782388477407e-05,
      "loss": 0.8715,
      "step": 334560
    },
    {
      "epoch": 1.1725844382059882,
      "grad_norm": 2.921875,
      "learning_rate": 3.38411333598137e-05,
      "loss": 0.8902,
      "step": 334570
    },
    {
      "epoch": 1.1726194857128838,
      "grad_norm": 3.109375,
      "learning_rate": 3.384048433115e-05,
      "loss": 0.8516,
      "step": 334580
    },
    {
      "epoch": 1.1726545332197795,
      "grad_norm": 3.390625,
      "learning_rate": 3.38398353024863e-05,
      "loss": 0.9721,
      "step": 334590
    },
    {
      "epoch": 1.172689580726675,
      "grad_norm": 3.015625,
      "learning_rate": 3.38391862738226e-05,
      "loss": 0.8474,
      "step": 334600
    },
    {
      "epoch": 1.1727246282335706,
      "grad_norm": 3.078125,
      "learning_rate": 3.38385372451589e-05,
      "loss": 0.8962,
      "step": 334610
    },
    {
      "epoch": 1.1727596757404661,
      "grad_norm": 3.34375,
      "learning_rate": 3.3837888216495195e-05,
      "loss": 0.9132,
      "step": 334620
    },
    {
      "epoch": 1.172794723247362,
      "grad_norm": 3.015625,
      "learning_rate": 3.3837239187831496e-05,
      "loss": 0.8563,
      "step": 334630
    },
    {
      "epoch": 1.1728297707542574,
      "grad_norm": 2.734375,
      "learning_rate": 3.383659015916779e-05,
      "loss": 0.9441,
      "step": 334640
    },
    {
      "epoch": 1.172864818261153,
      "grad_norm": 3.15625,
      "learning_rate": 3.383594113050409e-05,
      "loss": 0.8962,
      "step": 334650
    },
    {
      "epoch": 1.1728998657680485,
      "grad_norm": 3.203125,
      "learning_rate": 3.3835292101840387e-05,
      "loss": 0.9471,
      "step": 334660
    },
    {
      "epoch": 1.1729349132749443,
      "grad_norm": 2.71875,
      "learning_rate": 3.383464307317669e-05,
      "loss": 0.8738,
      "step": 334670
    },
    {
      "epoch": 1.1729699607818398,
      "grad_norm": 3.0,
      "learning_rate": 3.383399404451298e-05,
      "loss": 0.9044,
      "step": 334680
    },
    {
      "epoch": 1.1730050082887353,
      "grad_norm": 2.640625,
      "learning_rate": 3.3833345015849284e-05,
      "loss": 0.8872,
      "step": 334690
    },
    {
      "epoch": 1.173040055795631,
      "grad_norm": 3.015625,
      "learning_rate": 3.383269598718558e-05,
      "loss": 0.9174,
      "step": 334700
    },
    {
      "epoch": 1.1730751033025266,
      "grad_norm": 3.015625,
      "learning_rate": 3.383204695852188e-05,
      "loss": 0.8075,
      "step": 334710
    },
    {
      "epoch": 1.1731101508094222,
      "grad_norm": 2.90625,
      "learning_rate": 3.3831397929858175e-05,
      "loss": 0.8796,
      "step": 334720
    },
    {
      "epoch": 1.1731451983163177,
      "grad_norm": 2.671875,
      "learning_rate": 3.3830748901194476e-05,
      "loss": 0.7908,
      "step": 334730
    },
    {
      "epoch": 1.1731802458232135,
      "grad_norm": 3.703125,
      "learning_rate": 3.383009987253077e-05,
      "loss": 0.8875,
      "step": 334740
    },
    {
      "epoch": 1.173215293330109,
      "grad_norm": 3.0625,
      "learning_rate": 3.382945084386707e-05,
      "loss": 0.9301,
      "step": 334750
    },
    {
      "epoch": 1.1732503408370045,
      "grad_norm": 2.75,
      "learning_rate": 3.382880181520337e-05,
      "loss": 0.8366,
      "step": 334760
    },
    {
      "epoch": 1.1732853883439,
      "grad_norm": 3.796875,
      "learning_rate": 3.382815278653967e-05,
      "loss": 0.886,
      "step": 334770
    },
    {
      "epoch": 1.1733204358507958,
      "grad_norm": 3.359375,
      "learning_rate": 3.382750375787597e-05,
      "loss": 0.8177,
      "step": 334780
    },
    {
      "epoch": 1.1733554833576914,
      "grad_norm": 2.921875,
      "learning_rate": 3.3826854729212264e-05,
      "loss": 0.8345,
      "step": 334790
    },
    {
      "epoch": 1.173390530864587,
      "grad_norm": 3.0,
      "learning_rate": 3.3826205700548565e-05,
      "loss": 0.8439,
      "step": 334800
    },
    {
      "epoch": 1.1734255783714826,
      "grad_norm": 2.46875,
      "learning_rate": 3.382555667188485e-05,
      "loss": 0.8917,
      "step": 334810
    },
    {
      "epoch": 1.1734606258783782,
      "grad_norm": 3.390625,
      "learning_rate": 3.3824907643221155e-05,
      "loss": 0.9201,
      "step": 334820
    },
    {
      "epoch": 1.1734956733852737,
      "grad_norm": 3.1875,
      "learning_rate": 3.382425861455745e-05,
      "loss": 0.9262,
      "step": 334830
    },
    {
      "epoch": 1.1735307208921693,
      "grad_norm": 3.21875,
      "learning_rate": 3.382360958589375e-05,
      "loss": 0.9332,
      "step": 334840
    },
    {
      "epoch": 1.173565768399065,
      "grad_norm": 3.171875,
      "learning_rate": 3.382296055723005e-05,
      "loss": 0.8547,
      "step": 334850
    },
    {
      "epoch": 1.1736008159059605,
      "grad_norm": 3.390625,
      "learning_rate": 3.3822311528566347e-05,
      "loss": 0.9177,
      "step": 334860
    },
    {
      "epoch": 1.173635863412856,
      "grad_norm": 3.265625,
      "learning_rate": 3.382166249990265e-05,
      "loss": 0.8673,
      "step": 334870
    },
    {
      "epoch": 1.1736709109197516,
      "grad_norm": 3.015625,
      "learning_rate": 3.382101347123894e-05,
      "loss": 0.8619,
      "step": 334880
    },
    {
      "epoch": 1.1737059584266474,
      "grad_norm": 3.015625,
      "learning_rate": 3.3820364442575244e-05,
      "loss": 0.7713,
      "step": 334890
    },
    {
      "epoch": 1.173741005933543,
      "grad_norm": 3.078125,
      "learning_rate": 3.381971541391154e-05,
      "loss": 0.8907,
      "step": 334900
    },
    {
      "epoch": 1.1737760534404384,
      "grad_norm": 3.171875,
      "learning_rate": 3.381906638524784e-05,
      "loss": 0.9722,
      "step": 334910
    },
    {
      "epoch": 1.1738111009473342,
      "grad_norm": 2.6875,
      "learning_rate": 3.3818417356584135e-05,
      "loss": 0.8625,
      "step": 334920
    },
    {
      "epoch": 1.1738461484542297,
      "grad_norm": 2.859375,
      "learning_rate": 3.3817768327920436e-05,
      "loss": 0.9306,
      "step": 334930
    },
    {
      "epoch": 1.1738811959611253,
      "grad_norm": 2.796875,
      "learning_rate": 3.381711929925673e-05,
      "loss": 0.7675,
      "step": 334940
    },
    {
      "epoch": 1.1739162434680208,
      "grad_norm": 2.78125,
      "learning_rate": 3.381647027059303e-05,
      "loss": 0.8436,
      "step": 334950
    },
    {
      "epoch": 1.1739512909749166,
      "grad_norm": 3.234375,
      "learning_rate": 3.3815821241929327e-05,
      "loss": 0.9229,
      "step": 334960
    },
    {
      "epoch": 1.173986338481812,
      "grad_norm": 2.5625,
      "learning_rate": 3.381517221326563e-05,
      "loss": 0.8608,
      "step": 334970
    },
    {
      "epoch": 1.1740213859887076,
      "grad_norm": 2.828125,
      "learning_rate": 3.381452318460193e-05,
      "loss": 0.8369,
      "step": 334980
    },
    {
      "epoch": 1.1740564334956032,
      "grad_norm": 2.921875,
      "learning_rate": 3.3813874155938224e-05,
      "loss": 0.7985,
      "step": 334990
    },
    {
      "epoch": 1.174091481002499,
      "grad_norm": 2.46875,
      "learning_rate": 3.3813225127274525e-05,
      "loss": 0.9312,
      "step": 335000
    },
    {
      "epoch": 1.174091481002499,
      "eval_loss": 0.8082605600357056,
      "eval_runtime": 567.4504,
      "eval_samples_per_second": 670.43,
      "eval_steps_per_second": 55.869,
      "step": 335000
    },
    {
      "epoch": 1.1741265285093945,
      "grad_norm": 2.796875,
      "learning_rate": 3.381257609861082e-05,
      "loss": 0.8244,
      "step": 335010
    },
    {
      "epoch": 1.17416157601629,
      "grad_norm": 2.765625,
      "learning_rate": 3.381192706994712e-05,
      "loss": 0.7502,
      "step": 335020
    },
    {
      "epoch": 1.1741966235231858,
      "grad_norm": 2.75,
      "learning_rate": 3.3811278041283416e-05,
      "loss": 0.888,
      "step": 335030
    },
    {
      "epoch": 1.1742316710300813,
      "grad_norm": 2.421875,
      "learning_rate": 3.381062901261972e-05,
      "loss": 0.8347,
      "step": 335040
    },
    {
      "epoch": 1.1742667185369768,
      "grad_norm": 2.9375,
      "learning_rate": 3.380997998395601e-05,
      "loss": 0.9489,
      "step": 335050
    },
    {
      "epoch": 1.1743017660438724,
      "grad_norm": 3.09375,
      "learning_rate": 3.380933095529231e-05,
      "loss": 0.9533,
      "step": 335060
    },
    {
      "epoch": 1.1743368135507681,
      "grad_norm": 2.765625,
      "learning_rate": 3.380868192662861e-05,
      "loss": 0.9356,
      "step": 335070
    },
    {
      "epoch": 1.1743718610576637,
      "grad_norm": 2.609375,
      "learning_rate": 3.380803289796491e-05,
      "loss": 0.8244,
      "step": 335080
    },
    {
      "epoch": 1.1744069085645592,
      "grad_norm": 2.609375,
      "learning_rate": 3.3807383869301204e-05,
      "loss": 0.8214,
      "step": 335090
    },
    {
      "epoch": 1.1744419560714547,
      "grad_norm": 3.5625,
      "learning_rate": 3.3806734840637505e-05,
      "loss": 0.931,
      "step": 335100
    },
    {
      "epoch": 1.1744770035783505,
      "grad_norm": 2.453125,
      "learning_rate": 3.38060858119738e-05,
      "loss": 0.8164,
      "step": 335110
    },
    {
      "epoch": 1.174512051085246,
      "grad_norm": 2.875,
      "learning_rate": 3.38054367833101e-05,
      "loss": 0.8391,
      "step": 335120
    },
    {
      "epoch": 1.1745470985921416,
      "grad_norm": 2.71875,
      "learning_rate": 3.38047877546464e-05,
      "loss": 0.8833,
      "step": 335130
    },
    {
      "epoch": 1.1745821460990373,
      "grad_norm": 2.484375,
      "learning_rate": 3.38041387259827e-05,
      "loss": 0.9039,
      "step": 335140
    },
    {
      "epoch": 1.1746171936059329,
      "grad_norm": 2.890625,
      "learning_rate": 3.3803489697319e-05,
      "loss": 0.8727,
      "step": 335150
    },
    {
      "epoch": 1.1746522411128284,
      "grad_norm": 2.65625,
      "learning_rate": 3.380284066865529e-05,
      "loss": 0.8992,
      "step": 335160
    },
    {
      "epoch": 1.1746872886197242,
      "grad_norm": 2.984375,
      "learning_rate": 3.3802191639991595e-05,
      "loss": 0.8546,
      "step": 335170
    },
    {
      "epoch": 1.1747223361266197,
      "grad_norm": 2.875,
      "learning_rate": 3.380154261132789e-05,
      "loss": 0.9903,
      "step": 335180
    },
    {
      "epoch": 1.1747573836335152,
      "grad_norm": 2.71875,
      "learning_rate": 3.3800893582664184e-05,
      "loss": 0.8095,
      "step": 335190
    },
    {
      "epoch": 1.1747924311404108,
      "grad_norm": 2.921875,
      "learning_rate": 3.380024455400048e-05,
      "loss": 0.8252,
      "step": 335200
    },
    {
      "epoch": 1.1748274786473063,
      "grad_norm": 2.890625,
      "learning_rate": 3.379959552533678e-05,
      "loss": 0.9914,
      "step": 335210
    },
    {
      "epoch": 1.174862526154202,
      "grad_norm": 2.75,
      "learning_rate": 3.379894649667308e-05,
      "loss": 0.8085,
      "step": 335220
    },
    {
      "epoch": 1.1748975736610976,
      "grad_norm": 2.671875,
      "learning_rate": 3.3798297468009376e-05,
      "loss": 0.8548,
      "step": 335230
    },
    {
      "epoch": 1.1749326211679931,
      "grad_norm": 2.75,
      "learning_rate": 3.379764843934568e-05,
      "loss": 0.9191,
      "step": 335240
    },
    {
      "epoch": 1.1749676686748889,
      "grad_norm": 2.6875,
      "learning_rate": 3.379699941068197e-05,
      "loss": 0.9352,
      "step": 335250
    },
    {
      "epoch": 1.1750027161817844,
      "grad_norm": 2.78125,
      "learning_rate": 3.379635038201827e-05,
      "loss": 0.8036,
      "step": 335260
    },
    {
      "epoch": 1.17503776368868,
      "grad_norm": 3.109375,
      "learning_rate": 3.379570135335457e-05,
      "loss": 0.8161,
      "step": 335270
    },
    {
      "epoch": 1.1750728111955757,
      "grad_norm": 3.21875,
      "learning_rate": 3.379505232469087e-05,
      "loss": 0.9124,
      "step": 335280
    },
    {
      "epoch": 1.1751078587024713,
      "grad_norm": 3.125,
      "learning_rate": 3.3794403296027164e-05,
      "loss": 0.8421,
      "step": 335290
    },
    {
      "epoch": 1.1751429062093668,
      "grad_norm": 2.953125,
      "learning_rate": 3.3793754267363465e-05,
      "loss": 0.8794,
      "step": 335300
    },
    {
      "epoch": 1.1751779537162623,
      "grad_norm": 2.671875,
      "learning_rate": 3.379310523869976e-05,
      "loss": 0.7977,
      "step": 335310
    },
    {
      "epoch": 1.175213001223158,
      "grad_norm": 3.078125,
      "learning_rate": 3.379245621003606e-05,
      "loss": 0.9021,
      "step": 335320
    },
    {
      "epoch": 1.1752480487300536,
      "grad_norm": 3.453125,
      "learning_rate": 3.3791807181372356e-05,
      "loss": 0.8548,
      "step": 335330
    },
    {
      "epoch": 1.1752830962369492,
      "grad_norm": 2.75,
      "learning_rate": 3.379115815270866e-05,
      "loss": 0.8243,
      "step": 335340
    },
    {
      "epoch": 1.1753181437438447,
      "grad_norm": 3.0,
      "learning_rate": 3.379050912404496e-05,
      "loss": 0.8038,
      "step": 335350
    },
    {
      "epoch": 1.1753531912507404,
      "grad_norm": 2.796875,
      "learning_rate": 3.378986009538125e-05,
      "loss": 0.7999,
      "step": 335360
    },
    {
      "epoch": 1.175388238757636,
      "grad_norm": 3.140625,
      "learning_rate": 3.3789211066717555e-05,
      "loss": 0.9103,
      "step": 335370
    },
    {
      "epoch": 1.1754232862645315,
      "grad_norm": 2.84375,
      "learning_rate": 3.378856203805385e-05,
      "loss": 0.9063,
      "step": 335380
    },
    {
      "epoch": 1.1754583337714273,
      "grad_norm": 2.8125,
      "learning_rate": 3.378791300939015e-05,
      "loss": 0.8579,
      "step": 335390
    },
    {
      "epoch": 1.1754933812783228,
      "grad_norm": 2.421875,
      "learning_rate": 3.3787263980726445e-05,
      "loss": 0.8567,
      "step": 335400
    },
    {
      "epoch": 1.1755284287852183,
      "grad_norm": 3.0625,
      "learning_rate": 3.378661495206275e-05,
      "loss": 0.904,
      "step": 335410
    },
    {
      "epoch": 1.1755634762921139,
      "grad_norm": 2.9375,
      "learning_rate": 3.378596592339904e-05,
      "loss": 0.8304,
      "step": 335420
    },
    {
      "epoch": 1.1755985237990096,
      "grad_norm": 2.734375,
      "learning_rate": 3.378531689473534e-05,
      "loss": 0.877,
      "step": 335430
    },
    {
      "epoch": 1.1756335713059052,
      "grad_norm": 3.015625,
      "learning_rate": 3.378466786607164e-05,
      "loss": 0.9174,
      "step": 335440
    },
    {
      "epoch": 1.1756686188128007,
      "grad_norm": 3.78125,
      "learning_rate": 3.378401883740794e-05,
      "loss": 0.8619,
      "step": 335450
    },
    {
      "epoch": 1.1757036663196963,
      "grad_norm": 2.734375,
      "learning_rate": 3.378336980874423e-05,
      "loss": 0.8669,
      "step": 335460
    },
    {
      "epoch": 1.175738713826592,
      "grad_norm": 2.953125,
      "learning_rate": 3.3782720780080535e-05,
      "loss": 0.8119,
      "step": 335470
    },
    {
      "epoch": 1.1757737613334875,
      "grad_norm": 2.890625,
      "learning_rate": 3.3782071751416836e-05,
      "loss": 0.9025,
      "step": 335480
    },
    {
      "epoch": 1.175808808840383,
      "grad_norm": 2.984375,
      "learning_rate": 3.378142272275313e-05,
      "loss": 0.8804,
      "step": 335490
    },
    {
      "epoch": 1.1758438563472788,
      "grad_norm": 2.8125,
      "learning_rate": 3.378077369408943e-05,
      "loss": 0.8934,
      "step": 335500
    },
    {
      "epoch": 1.1758789038541744,
      "grad_norm": 2.78125,
      "learning_rate": 3.378012466542573e-05,
      "loss": 0.8241,
      "step": 335510
    },
    {
      "epoch": 1.17591395136107,
      "grad_norm": 3.328125,
      "learning_rate": 3.377947563676203e-05,
      "loss": 0.9127,
      "step": 335520
    },
    {
      "epoch": 1.1759489988679654,
      "grad_norm": 2.859375,
      "learning_rate": 3.377882660809832e-05,
      "loss": 0.8308,
      "step": 335530
    },
    {
      "epoch": 1.1759840463748612,
      "grad_norm": 2.84375,
      "learning_rate": 3.3778177579434624e-05,
      "loss": 0.7984,
      "step": 335540
    },
    {
      "epoch": 1.1760190938817567,
      "grad_norm": 2.953125,
      "learning_rate": 3.377752855077092e-05,
      "loss": 0.8603,
      "step": 335550
    },
    {
      "epoch": 1.1760541413886523,
      "grad_norm": 3.203125,
      "learning_rate": 3.377687952210722e-05,
      "loss": 0.9172,
      "step": 335560
    },
    {
      "epoch": 1.1760891888955478,
      "grad_norm": 2.765625,
      "learning_rate": 3.3776230493443515e-05,
      "loss": 0.791,
      "step": 335570
    },
    {
      "epoch": 1.1761242364024436,
      "grad_norm": 2.96875,
      "learning_rate": 3.377558146477981e-05,
      "loss": 0.8046,
      "step": 335580
    },
    {
      "epoch": 1.176159283909339,
      "grad_norm": 3.03125,
      "learning_rate": 3.377493243611611e-05,
      "loss": 0.9014,
      "step": 335590
    },
    {
      "epoch": 1.1761943314162346,
      "grad_norm": 3.125,
      "learning_rate": 3.3774283407452405e-05,
      "loss": 0.9343,
      "step": 335600
    },
    {
      "epoch": 1.1762293789231304,
      "grad_norm": 3.0,
      "learning_rate": 3.377363437878871e-05,
      "loss": 0.8095,
      "step": 335610
    },
    {
      "epoch": 1.176264426430026,
      "grad_norm": 2.8125,
      "learning_rate": 3.3772985350125e-05,
      "loss": 0.8278,
      "step": 335620
    },
    {
      "epoch": 1.1762994739369215,
      "grad_norm": 2.5625,
      "learning_rate": 3.37723363214613e-05,
      "loss": 0.8587,
      "step": 335630
    },
    {
      "epoch": 1.176334521443817,
      "grad_norm": 2.703125,
      "learning_rate": 3.37716872927976e-05,
      "loss": 0.8814,
      "step": 335640
    },
    {
      "epoch": 1.1763695689507128,
      "grad_norm": 3.03125,
      "learning_rate": 3.37710382641339e-05,
      "loss": 0.9584,
      "step": 335650
    },
    {
      "epoch": 1.1764046164576083,
      "grad_norm": 2.640625,
      "learning_rate": 3.377038923547019e-05,
      "loss": 0.7952,
      "step": 335660
    },
    {
      "epoch": 1.1764396639645038,
      "grad_norm": 3.703125,
      "learning_rate": 3.3769740206806495e-05,
      "loss": 0.828,
      "step": 335670
    },
    {
      "epoch": 1.1764747114713994,
      "grad_norm": 3.3125,
      "learning_rate": 3.376909117814279e-05,
      "loss": 0.8706,
      "step": 335680
    },
    {
      "epoch": 1.1765097589782951,
      "grad_norm": 3.40625,
      "learning_rate": 3.376844214947909e-05,
      "loss": 0.8804,
      "step": 335690
    },
    {
      "epoch": 1.1765448064851907,
      "grad_norm": 2.640625,
      "learning_rate": 3.3767793120815385e-05,
      "loss": 0.9348,
      "step": 335700
    },
    {
      "epoch": 1.1765798539920862,
      "grad_norm": 3.03125,
      "learning_rate": 3.376714409215169e-05,
      "loss": 0.9017,
      "step": 335710
    },
    {
      "epoch": 1.176614901498982,
      "grad_norm": 3.015625,
      "learning_rate": 3.376649506348799e-05,
      "loss": 0.8486,
      "step": 335720
    },
    {
      "epoch": 1.1766499490058775,
      "grad_norm": 2.53125,
      "learning_rate": 3.376584603482428e-05,
      "loss": 0.7751,
      "step": 335730
    },
    {
      "epoch": 1.176684996512773,
      "grad_norm": 3.34375,
      "learning_rate": 3.3765197006160584e-05,
      "loss": 0.8565,
      "step": 335740
    },
    {
      "epoch": 1.1767200440196686,
      "grad_norm": 3.375,
      "learning_rate": 3.376454797749688e-05,
      "loss": 0.8817,
      "step": 335750
    },
    {
      "epoch": 1.1767550915265643,
      "grad_norm": 2.796875,
      "learning_rate": 3.376389894883318e-05,
      "loss": 0.8661,
      "step": 335760
    },
    {
      "epoch": 1.1767901390334599,
      "grad_norm": 3.453125,
      "learning_rate": 3.3763249920169475e-05,
      "loss": 0.8632,
      "step": 335770
    },
    {
      "epoch": 1.1768251865403554,
      "grad_norm": 3.265625,
      "learning_rate": 3.3762600891505776e-05,
      "loss": 0.9356,
      "step": 335780
    },
    {
      "epoch": 1.176860234047251,
      "grad_norm": 2.671875,
      "learning_rate": 3.376195186284207e-05,
      "loss": 0.8653,
      "step": 335790
    },
    {
      "epoch": 1.1768952815541467,
      "grad_norm": 3.171875,
      "learning_rate": 3.376130283417837e-05,
      "loss": 0.9113,
      "step": 335800
    },
    {
      "epoch": 1.1769303290610422,
      "grad_norm": 3.4375,
      "learning_rate": 3.376065380551467e-05,
      "loss": 0.9207,
      "step": 335810
    },
    {
      "epoch": 1.1769653765679378,
      "grad_norm": 3.265625,
      "learning_rate": 3.376000477685097e-05,
      "loss": 0.9832,
      "step": 335820
    },
    {
      "epoch": 1.1770004240748335,
      "grad_norm": 3.078125,
      "learning_rate": 3.375935574818726e-05,
      "loss": 0.9022,
      "step": 335830
    },
    {
      "epoch": 1.177035471581729,
      "grad_norm": 2.734375,
      "learning_rate": 3.3758706719523564e-05,
      "loss": 0.9133,
      "step": 335840
    },
    {
      "epoch": 1.1770705190886246,
      "grad_norm": 3.140625,
      "learning_rate": 3.3758057690859866e-05,
      "loss": 0.897,
      "step": 335850
    },
    {
      "epoch": 1.1771055665955203,
      "grad_norm": 3.421875,
      "learning_rate": 3.375740866219616e-05,
      "loss": 0.9392,
      "step": 335860
    },
    {
      "epoch": 1.1771406141024159,
      "grad_norm": 2.8125,
      "learning_rate": 3.375675963353246e-05,
      "loss": 0.853,
      "step": 335870
    },
    {
      "epoch": 1.1771756616093114,
      "grad_norm": 2.4375,
      "learning_rate": 3.3756110604868756e-05,
      "loss": 0.9485,
      "step": 335880
    },
    {
      "epoch": 1.177210709116207,
      "grad_norm": 2.953125,
      "learning_rate": 3.375546157620506e-05,
      "loss": 0.9159,
      "step": 335890
    },
    {
      "epoch": 1.1772457566231027,
      "grad_norm": 2.5,
      "learning_rate": 3.375481254754135e-05,
      "loss": 0.8957,
      "step": 335900
    },
    {
      "epoch": 1.1772808041299982,
      "grad_norm": 2.734375,
      "learning_rate": 3.3754163518877654e-05,
      "loss": 0.8303,
      "step": 335910
    },
    {
      "epoch": 1.1773158516368938,
      "grad_norm": 2.78125,
      "learning_rate": 3.375351449021395e-05,
      "loss": 0.7997,
      "step": 335920
    },
    {
      "epoch": 1.1773508991437893,
      "grad_norm": 2.96875,
      "learning_rate": 3.375286546155025e-05,
      "loss": 0.7973,
      "step": 335930
    },
    {
      "epoch": 1.177385946650685,
      "grad_norm": 2.921875,
      "learning_rate": 3.3752216432886544e-05,
      "loss": 0.8824,
      "step": 335940
    },
    {
      "epoch": 1.1774209941575806,
      "grad_norm": 3.015625,
      "learning_rate": 3.375156740422284e-05,
      "loss": 0.8528,
      "step": 335950
    },
    {
      "epoch": 1.1774560416644761,
      "grad_norm": 2.890625,
      "learning_rate": 3.375091837555914e-05,
      "loss": 0.8875,
      "step": 335960
    },
    {
      "epoch": 1.177491089171372,
      "grad_norm": 2.90625,
      "learning_rate": 3.3750269346895435e-05,
      "loss": 0.8924,
      "step": 335970
    },
    {
      "epoch": 1.1775261366782674,
      "grad_norm": 2.75,
      "learning_rate": 3.3749620318231736e-05,
      "loss": 0.8808,
      "step": 335980
    },
    {
      "epoch": 1.177561184185163,
      "grad_norm": 3.234375,
      "learning_rate": 3.374897128956803e-05,
      "loss": 0.887,
      "step": 335990
    },
    {
      "epoch": 1.1775962316920585,
      "grad_norm": 3.015625,
      "learning_rate": 3.374832226090433e-05,
      "loss": 0.8469,
      "step": 336000
    },
    {
      "epoch": 1.1776312791989543,
      "grad_norm": 2.96875,
      "learning_rate": 3.374767323224063e-05,
      "loss": 0.9279,
      "step": 336010
    },
    {
      "epoch": 1.1776663267058498,
      "grad_norm": 3.4375,
      "learning_rate": 3.374702420357693e-05,
      "loss": 0.8843,
      "step": 336020
    },
    {
      "epoch": 1.1777013742127453,
      "grad_norm": 3.421875,
      "learning_rate": 3.374637517491322e-05,
      "loss": 0.8295,
      "step": 336030
    },
    {
      "epoch": 1.1777364217196409,
      "grad_norm": 3.03125,
      "learning_rate": 3.3745726146249524e-05,
      "loss": 0.8563,
      "step": 336040
    },
    {
      "epoch": 1.1777714692265366,
      "grad_norm": 3.046875,
      "learning_rate": 3.374507711758582e-05,
      "loss": 0.9257,
      "step": 336050
    },
    {
      "epoch": 1.1778065167334322,
      "grad_norm": 2.296875,
      "learning_rate": 3.374442808892212e-05,
      "loss": 0.8548,
      "step": 336060
    },
    {
      "epoch": 1.1778415642403277,
      "grad_norm": 2.8125,
      "learning_rate": 3.3743779060258415e-05,
      "loss": 0.8544,
      "step": 336070
    },
    {
      "epoch": 1.1778766117472235,
      "grad_norm": 2.984375,
      "learning_rate": 3.3743130031594716e-05,
      "loss": 0.9005,
      "step": 336080
    },
    {
      "epoch": 1.177911659254119,
      "grad_norm": 2.59375,
      "learning_rate": 3.374248100293102e-05,
      "loss": 0.848,
      "step": 336090
    },
    {
      "epoch": 1.1779467067610145,
      "grad_norm": 2.546875,
      "learning_rate": 3.374183197426731e-05,
      "loss": 0.9203,
      "step": 336100
    },
    {
      "epoch": 1.17798175426791,
      "grad_norm": 3.1875,
      "learning_rate": 3.3741182945603614e-05,
      "loss": 0.9415,
      "step": 336110
    },
    {
      "epoch": 1.1780168017748058,
      "grad_norm": 2.734375,
      "learning_rate": 3.374053391693991e-05,
      "loss": 0.8375,
      "step": 336120
    },
    {
      "epoch": 1.1780518492817014,
      "grad_norm": 2.828125,
      "learning_rate": 3.373988488827621e-05,
      "loss": 0.857,
      "step": 336130
    },
    {
      "epoch": 1.178086896788597,
      "grad_norm": 2.5625,
      "learning_rate": 3.3739235859612504e-05,
      "loss": 0.8404,
      "step": 336140
    },
    {
      "epoch": 1.1781219442954924,
      "grad_norm": 2.6875,
      "learning_rate": 3.3738586830948806e-05,
      "loss": 0.8674,
      "step": 336150
    },
    {
      "epoch": 1.1781569918023882,
      "grad_norm": 2.796875,
      "learning_rate": 3.37379378022851e-05,
      "loss": 0.8355,
      "step": 336160
    },
    {
      "epoch": 1.1781920393092837,
      "grad_norm": 2.75,
      "learning_rate": 3.37372887736214e-05,
      "loss": 0.8277,
      "step": 336170
    },
    {
      "epoch": 1.1782270868161793,
      "grad_norm": 2.640625,
      "learning_rate": 3.3736639744957696e-05,
      "loss": 0.8135,
      "step": 336180
    },
    {
      "epoch": 1.178262134323075,
      "grad_norm": 2.609375,
      "learning_rate": 3.3735990716294e-05,
      "loss": 0.8206,
      "step": 336190
    },
    {
      "epoch": 1.1782971818299706,
      "grad_norm": 2.671875,
      "learning_rate": 3.373534168763029e-05,
      "loss": 0.8897,
      "step": 336200
    },
    {
      "epoch": 1.178332229336866,
      "grad_norm": 3.015625,
      "learning_rate": 3.3734692658966594e-05,
      "loss": 0.8917,
      "step": 336210
    },
    {
      "epoch": 1.1783672768437616,
      "grad_norm": 2.84375,
      "learning_rate": 3.3734043630302895e-05,
      "loss": 0.8991,
      "step": 336220
    },
    {
      "epoch": 1.1784023243506574,
      "grad_norm": 2.6875,
      "learning_rate": 3.373339460163919e-05,
      "loss": 0.8672,
      "step": 336230
    },
    {
      "epoch": 1.178437371857553,
      "grad_norm": 3.09375,
      "learning_rate": 3.373274557297549e-05,
      "loss": 0.8038,
      "step": 336240
    },
    {
      "epoch": 1.1784724193644485,
      "grad_norm": 2.40625,
      "learning_rate": 3.3732096544311786e-05,
      "loss": 0.8778,
      "step": 336250
    },
    {
      "epoch": 1.178507466871344,
      "grad_norm": 2.984375,
      "learning_rate": 3.373144751564809e-05,
      "loss": 0.8394,
      "step": 336260
    },
    {
      "epoch": 1.1785425143782398,
      "grad_norm": 2.8125,
      "learning_rate": 3.373079848698438e-05,
      "loss": 0.8323,
      "step": 336270
    },
    {
      "epoch": 1.1785775618851353,
      "grad_norm": 2.875,
      "learning_rate": 3.373014945832068e-05,
      "loss": 0.8621,
      "step": 336280
    },
    {
      "epoch": 1.1786126093920308,
      "grad_norm": 3.328125,
      "learning_rate": 3.372950042965698e-05,
      "loss": 0.8154,
      "step": 336290
    },
    {
      "epoch": 1.1786476568989266,
      "grad_norm": 3.015625,
      "learning_rate": 3.372885140099328e-05,
      "loss": 0.8246,
      "step": 336300
    },
    {
      "epoch": 1.1786827044058221,
      "grad_norm": 2.703125,
      "learning_rate": 3.3728202372329574e-05,
      "loss": 0.919,
      "step": 336310
    },
    {
      "epoch": 1.1787177519127177,
      "grad_norm": 3.03125,
      "learning_rate": 3.372755334366587e-05,
      "loss": 0.8899,
      "step": 336320
    },
    {
      "epoch": 1.1787527994196132,
      "grad_norm": 2.546875,
      "learning_rate": 3.372690431500217e-05,
      "loss": 0.8256,
      "step": 336330
    },
    {
      "epoch": 1.178787846926509,
      "grad_norm": 2.71875,
      "learning_rate": 3.3726255286338464e-05,
      "loss": 0.843,
      "step": 336340
    },
    {
      "epoch": 1.1788228944334045,
      "grad_norm": 3.53125,
      "learning_rate": 3.3725606257674766e-05,
      "loss": 0.8783,
      "step": 336350
    },
    {
      "epoch": 1.1788579419403,
      "grad_norm": 3.328125,
      "learning_rate": 3.372495722901106e-05,
      "loss": 0.8667,
      "step": 336360
    },
    {
      "epoch": 1.1788929894471956,
      "grad_norm": 2.703125,
      "learning_rate": 3.372430820034736e-05,
      "loss": 0.8594,
      "step": 336370
    },
    {
      "epoch": 1.1789280369540913,
      "grad_norm": 2.75,
      "learning_rate": 3.3723659171683656e-05,
      "loss": 0.8267,
      "step": 336380
    },
    {
      "epoch": 1.1789630844609869,
      "grad_norm": 2.609375,
      "learning_rate": 3.372301014301996e-05,
      "loss": 0.7638,
      "step": 336390
    },
    {
      "epoch": 1.1789981319678824,
      "grad_norm": 3.046875,
      "learning_rate": 3.372236111435625e-05,
      "loss": 0.9793,
      "step": 336400
    },
    {
      "epoch": 1.1790331794747781,
      "grad_norm": 2.859375,
      "learning_rate": 3.3721712085692554e-05,
      "loss": 0.8683,
      "step": 336410
    },
    {
      "epoch": 1.1790682269816737,
      "grad_norm": 3.09375,
      "learning_rate": 3.372106305702885e-05,
      "loss": 0.894,
      "step": 336420
    },
    {
      "epoch": 1.1791032744885692,
      "grad_norm": 2.8125,
      "learning_rate": 3.372041402836515e-05,
      "loss": 0.8263,
      "step": 336430
    },
    {
      "epoch": 1.179138321995465,
      "grad_norm": 2.5,
      "learning_rate": 3.371976499970145e-05,
      "loss": 0.7458,
      "step": 336440
    },
    {
      "epoch": 1.1791733695023605,
      "grad_norm": 2.4375,
      "learning_rate": 3.3719115971037746e-05,
      "loss": 0.8817,
      "step": 336450
    },
    {
      "epoch": 1.179208417009256,
      "grad_norm": 2.875,
      "learning_rate": 3.371846694237405e-05,
      "loss": 0.8601,
      "step": 336460
    },
    {
      "epoch": 1.1792434645161516,
      "grad_norm": 2.8125,
      "learning_rate": 3.371781791371034e-05,
      "loss": 0.8456,
      "step": 336470
    },
    {
      "epoch": 1.1792785120230471,
      "grad_norm": 3.078125,
      "learning_rate": 3.371716888504664e-05,
      "loss": 0.8734,
      "step": 336480
    },
    {
      "epoch": 1.1793135595299429,
      "grad_norm": 3.03125,
      "learning_rate": 3.371651985638294e-05,
      "loss": 0.835,
      "step": 336490
    },
    {
      "epoch": 1.1793486070368384,
      "grad_norm": 2.953125,
      "learning_rate": 3.371587082771924e-05,
      "loss": 0.8258,
      "step": 336500
    },
    {
      "epoch": 1.179383654543734,
      "grad_norm": 3.015625,
      "learning_rate": 3.3715221799055534e-05,
      "loss": 0.7766,
      "step": 336510
    },
    {
      "epoch": 1.1794187020506297,
      "grad_norm": 3.03125,
      "learning_rate": 3.3714572770391835e-05,
      "loss": 0.8483,
      "step": 336520
    },
    {
      "epoch": 1.1794537495575252,
      "grad_norm": 3.0625,
      "learning_rate": 3.371392374172813e-05,
      "loss": 0.9075,
      "step": 336530
    },
    {
      "epoch": 1.1794887970644208,
      "grad_norm": 2.84375,
      "learning_rate": 3.371327471306443e-05,
      "loss": 0.8546,
      "step": 336540
    },
    {
      "epoch": 1.1795238445713165,
      "grad_norm": 3.03125,
      "learning_rate": 3.3712625684400726e-05,
      "loss": 0.9212,
      "step": 336550
    },
    {
      "epoch": 1.179558892078212,
      "grad_norm": 2.75,
      "learning_rate": 3.371197665573703e-05,
      "loss": 0.8487,
      "step": 336560
    },
    {
      "epoch": 1.1795939395851076,
      "grad_norm": 3.171875,
      "learning_rate": 3.371132762707332e-05,
      "loss": 0.8665,
      "step": 336570
    },
    {
      "epoch": 1.1796289870920031,
      "grad_norm": 3.265625,
      "learning_rate": 3.371067859840962e-05,
      "loss": 0.8708,
      "step": 336580
    },
    {
      "epoch": 1.179664034598899,
      "grad_norm": 2.578125,
      "learning_rate": 3.3710029569745924e-05,
      "loss": 0.875,
      "step": 336590
    },
    {
      "epoch": 1.1796990821057944,
      "grad_norm": 3.140625,
      "learning_rate": 3.370938054108222e-05,
      "loss": 0.8737,
      "step": 336600
    },
    {
      "epoch": 1.17973412961269,
      "grad_norm": 2.65625,
      "learning_rate": 3.370873151241852e-05,
      "loss": 0.8889,
      "step": 336610
    },
    {
      "epoch": 1.1797691771195855,
      "grad_norm": 2.75,
      "learning_rate": 3.3708082483754815e-05,
      "loss": 0.9583,
      "step": 336620
    },
    {
      "epoch": 1.1798042246264813,
      "grad_norm": 3.3125,
      "learning_rate": 3.3707433455091116e-05,
      "loss": 0.8665,
      "step": 336630
    },
    {
      "epoch": 1.1798392721333768,
      "grad_norm": 2.96875,
      "learning_rate": 3.370678442642741e-05,
      "loss": 0.8307,
      "step": 336640
    },
    {
      "epoch": 1.1798743196402723,
      "grad_norm": 3.046875,
      "learning_rate": 3.370613539776371e-05,
      "loss": 0.8391,
      "step": 336650
    },
    {
      "epoch": 1.179909367147168,
      "grad_norm": 2.84375,
      "learning_rate": 3.370548636910001e-05,
      "loss": 0.8252,
      "step": 336660
    },
    {
      "epoch": 1.1799444146540636,
      "grad_norm": 3.21875,
      "learning_rate": 3.370483734043631e-05,
      "loss": 0.8502,
      "step": 336670
    },
    {
      "epoch": 1.1799794621609592,
      "grad_norm": 3.046875,
      "learning_rate": 3.37041883117726e-05,
      "loss": 0.9125,
      "step": 336680
    },
    {
      "epoch": 1.1800145096678547,
      "grad_norm": 2.828125,
      "learning_rate": 3.37035392831089e-05,
      "loss": 0.8241,
      "step": 336690
    },
    {
      "epoch": 1.1800495571747505,
      "grad_norm": 3.0,
      "learning_rate": 3.37028902544452e-05,
      "loss": 0.8003,
      "step": 336700
    },
    {
      "epoch": 1.180084604681646,
      "grad_norm": 2.5625,
      "learning_rate": 3.3702241225781494e-05,
      "loss": 0.8181,
      "step": 336710
    },
    {
      "epoch": 1.1801196521885415,
      "grad_norm": 2.9375,
      "learning_rate": 3.3701592197117795e-05,
      "loss": 0.8586,
      "step": 336720
    },
    {
      "epoch": 1.180154699695437,
      "grad_norm": 2.921875,
      "learning_rate": 3.370094316845409e-05,
      "loss": 0.8836,
      "step": 336730
    },
    {
      "epoch": 1.1801897472023328,
      "grad_norm": 2.421875,
      "learning_rate": 3.370029413979039e-05,
      "loss": 0.875,
      "step": 336740
    },
    {
      "epoch": 1.1802247947092284,
      "grad_norm": 2.875,
      "learning_rate": 3.3699645111126686e-05,
      "loss": 0.839,
      "step": 336750
    },
    {
      "epoch": 1.180259842216124,
      "grad_norm": 2.984375,
      "learning_rate": 3.369899608246299e-05,
      "loss": 0.8653,
      "step": 336760
    },
    {
      "epoch": 1.1802948897230197,
      "grad_norm": 3.234375,
      "learning_rate": 3.369834705379928e-05,
      "loss": 0.9081,
      "step": 336770
    },
    {
      "epoch": 1.1803299372299152,
      "grad_norm": 2.78125,
      "learning_rate": 3.369769802513558e-05,
      "loss": 0.758,
      "step": 336780
    },
    {
      "epoch": 1.1803649847368107,
      "grad_norm": 2.90625,
      "learning_rate": 3.369704899647188e-05,
      "loss": 0.7657,
      "step": 336790
    },
    {
      "epoch": 1.1804000322437063,
      "grad_norm": 2.875,
      "learning_rate": 3.369639996780818e-05,
      "loss": 0.8116,
      "step": 336800
    },
    {
      "epoch": 1.180435079750602,
      "grad_norm": 2.875,
      "learning_rate": 3.369575093914448e-05,
      "loss": 0.8631,
      "step": 336810
    },
    {
      "epoch": 1.1804701272574976,
      "grad_norm": 2.546875,
      "learning_rate": 3.3695101910480775e-05,
      "loss": 0.8345,
      "step": 336820
    },
    {
      "epoch": 1.180505174764393,
      "grad_norm": 2.578125,
      "learning_rate": 3.3694452881817076e-05,
      "loss": 0.838,
      "step": 336830
    },
    {
      "epoch": 1.1805402222712886,
      "grad_norm": 2.890625,
      "learning_rate": 3.369380385315337e-05,
      "loss": 0.883,
      "step": 336840
    },
    {
      "epoch": 1.1805752697781844,
      "grad_norm": 2.828125,
      "learning_rate": 3.369315482448967e-05,
      "loss": 0.8552,
      "step": 336850
    },
    {
      "epoch": 1.18061031728508,
      "grad_norm": 3.28125,
      "learning_rate": 3.369250579582597e-05,
      "loss": 0.7887,
      "step": 336860
    },
    {
      "epoch": 1.1806453647919755,
      "grad_norm": 2.875,
      "learning_rate": 3.369185676716227e-05,
      "loss": 0.8162,
      "step": 336870
    },
    {
      "epoch": 1.1806804122988712,
      "grad_norm": 2.8125,
      "learning_rate": 3.369120773849856e-05,
      "loss": 0.9267,
      "step": 336880
    },
    {
      "epoch": 1.1807154598057668,
      "grad_norm": 3.046875,
      "learning_rate": 3.3690558709834864e-05,
      "loss": 0.9257,
      "step": 336890
    },
    {
      "epoch": 1.1807505073126623,
      "grad_norm": 2.828125,
      "learning_rate": 3.368990968117116e-05,
      "loss": 0.8211,
      "step": 336900
    },
    {
      "epoch": 1.1807855548195578,
      "grad_norm": 3.140625,
      "learning_rate": 3.368926065250746e-05,
      "loss": 0.9373,
      "step": 336910
    },
    {
      "epoch": 1.1808206023264536,
      "grad_norm": 3.078125,
      "learning_rate": 3.3688611623843755e-05,
      "loss": 0.8208,
      "step": 336920
    },
    {
      "epoch": 1.1808556498333491,
      "grad_norm": 3.265625,
      "learning_rate": 3.3687962595180056e-05,
      "loss": 0.9142,
      "step": 336930
    },
    {
      "epoch": 1.1808906973402447,
      "grad_norm": 2.96875,
      "learning_rate": 3.368731356651635e-05,
      "loss": 0.8902,
      "step": 336940
    },
    {
      "epoch": 1.1809257448471402,
      "grad_norm": 2.828125,
      "learning_rate": 3.368666453785265e-05,
      "loss": 0.8457,
      "step": 336950
    },
    {
      "epoch": 1.180960792354036,
      "grad_norm": 2.71875,
      "learning_rate": 3.3686015509188954e-05,
      "loss": 0.879,
      "step": 336960
    },
    {
      "epoch": 1.1809958398609315,
      "grad_norm": 2.578125,
      "learning_rate": 3.368536648052525e-05,
      "loss": 0.7576,
      "step": 336970
    },
    {
      "epoch": 1.181030887367827,
      "grad_norm": 3.28125,
      "learning_rate": 3.368471745186155e-05,
      "loss": 0.8304,
      "step": 336980
    },
    {
      "epoch": 1.1810659348747228,
      "grad_norm": 2.953125,
      "learning_rate": 3.3684068423197844e-05,
      "loss": 0.8592,
      "step": 336990
    },
    {
      "epoch": 1.1811009823816183,
      "grad_norm": 2.84375,
      "learning_rate": 3.3683419394534146e-05,
      "loss": 0.8626,
      "step": 337000
    },
    {
      "epoch": 1.1811360298885138,
      "grad_norm": 2.953125,
      "learning_rate": 3.368277036587044e-05,
      "loss": 0.796,
      "step": 337010
    },
    {
      "epoch": 1.1811710773954094,
      "grad_norm": 2.796875,
      "learning_rate": 3.368212133720674e-05,
      "loss": 0.9015,
      "step": 337020
    },
    {
      "epoch": 1.1812061249023051,
      "grad_norm": 2.953125,
      "learning_rate": 3.3681472308543036e-05,
      "loss": 0.8083,
      "step": 337030
    },
    {
      "epoch": 1.1812411724092007,
      "grad_norm": 2.265625,
      "learning_rate": 3.368082327987934e-05,
      "loss": 0.9766,
      "step": 337040
    },
    {
      "epoch": 1.1812762199160962,
      "grad_norm": 3.28125,
      "learning_rate": 3.368017425121563e-05,
      "loss": 0.7909,
      "step": 337050
    },
    {
      "epoch": 1.1813112674229918,
      "grad_norm": 2.984375,
      "learning_rate": 3.3679525222551934e-05,
      "loss": 0.8535,
      "step": 337060
    },
    {
      "epoch": 1.1813463149298875,
      "grad_norm": 2.25,
      "learning_rate": 3.367887619388823e-05,
      "loss": 0.8274,
      "step": 337070
    },
    {
      "epoch": 1.181381362436783,
      "grad_norm": 3.484375,
      "learning_rate": 3.367822716522452e-05,
      "loss": 0.8528,
      "step": 337080
    },
    {
      "epoch": 1.1814164099436786,
      "grad_norm": 2.8125,
      "learning_rate": 3.3677578136560824e-05,
      "loss": 0.864,
      "step": 337090
    },
    {
      "epoch": 1.1814514574505743,
      "grad_norm": 3.125,
      "learning_rate": 3.367692910789712e-05,
      "loss": 0.8411,
      "step": 337100
    },
    {
      "epoch": 1.1814865049574699,
      "grad_norm": 2.9375,
      "learning_rate": 3.367628007923342e-05,
      "loss": 0.8765,
      "step": 337110
    },
    {
      "epoch": 1.1815215524643654,
      "grad_norm": 2.90625,
      "learning_rate": 3.3675631050569715e-05,
      "loss": 0.896,
      "step": 337120
    },
    {
      "epoch": 1.1815565999712612,
      "grad_norm": 3.625,
      "learning_rate": 3.3674982021906016e-05,
      "loss": 0.8441,
      "step": 337130
    },
    {
      "epoch": 1.1815916474781567,
      "grad_norm": 3.375,
      "learning_rate": 3.367433299324231e-05,
      "loss": 0.8647,
      "step": 337140
    },
    {
      "epoch": 1.1816266949850522,
      "grad_norm": 3.015625,
      "learning_rate": 3.367368396457861e-05,
      "loss": 0.873,
      "step": 337150
    },
    {
      "epoch": 1.1816617424919478,
      "grad_norm": 2.765625,
      "learning_rate": 3.367303493591491e-05,
      "loss": 0.8901,
      "step": 337160
    },
    {
      "epoch": 1.1816967899988433,
      "grad_norm": 3.375,
      "learning_rate": 3.367238590725121e-05,
      "loss": 0.7637,
      "step": 337170
    },
    {
      "epoch": 1.181731837505739,
      "grad_norm": 3.234375,
      "learning_rate": 3.367173687858751e-05,
      "loss": 0.8605,
      "step": 337180
    },
    {
      "epoch": 1.1817668850126346,
      "grad_norm": 2.6875,
      "learning_rate": 3.3671087849923804e-05,
      "loss": 0.8265,
      "step": 337190
    },
    {
      "epoch": 1.1818019325195301,
      "grad_norm": 2.65625,
      "learning_rate": 3.3670438821260106e-05,
      "loss": 0.797,
      "step": 337200
    },
    {
      "epoch": 1.181836980026426,
      "grad_norm": 2.984375,
      "learning_rate": 3.36697897925964e-05,
      "loss": 0.8472,
      "step": 337210
    },
    {
      "epoch": 1.1818720275333214,
      "grad_norm": 2.8125,
      "learning_rate": 3.36691407639327e-05,
      "loss": 0.8789,
      "step": 337220
    },
    {
      "epoch": 1.181907075040217,
      "grad_norm": 2.9375,
      "learning_rate": 3.3668491735268996e-05,
      "loss": 0.8621,
      "step": 337230
    },
    {
      "epoch": 1.1819421225471127,
      "grad_norm": 2.75,
      "learning_rate": 3.36678427066053e-05,
      "loss": 0.903,
      "step": 337240
    },
    {
      "epoch": 1.1819771700540083,
      "grad_norm": 2.71875,
      "learning_rate": 3.366719367794159e-05,
      "loss": 0.8271,
      "step": 337250
    },
    {
      "epoch": 1.1820122175609038,
      "grad_norm": 2.953125,
      "learning_rate": 3.3666544649277894e-05,
      "loss": 0.8476,
      "step": 337260
    },
    {
      "epoch": 1.1820472650677993,
      "grad_norm": 2.78125,
      "learning_rate": 3.366589562061419e-05,
      "loss": 0.8625,
      "step": 337270
    },
    {
      "epoch": 1.182082312574695,
      "grad_norm": 2.953125,
      "learning_rate": 3.366524659195049e-05,
      "loss": 0.926,
      "step": 337280
    },
    {
      "epoch": 1.1821173600815906,
      "grad_norm": 2.9375,
      "learning_rate": 3.3664597563286784e-05,
      "loss": 0.8834,
      "step": 337290
    },
    {
      "epoch": 1.1821524075884862,
      "grad_norm": 2.84375,
      "learning_rate": 3.3663948534623086e-05,
      "loss": 0.811,
      "step": 337300
    },
    {
      "epoch": 1.1821874550953817,
      "grad_norm": 3.125,
      "learning_rate": 3.366329950595938e-05,
      "loss": 0.8499,
      "step": 337310
    },
    {
      "epoch": 1.1822225026022775,
      "grad_norm": 2.84375,
      "learning_rate": 3.366265047729568e-05,
      "loss": 0.8138,
      "step": 337320
    },
    {
      "epoch": 1.182257550109173,
      "grad_norm": 2.921875,
      "learning_rate": 3.366200144863198e-05,
      "loss": 0.8139,
      "step": 337330
    },
    {
      "epoch": 1.1822925976160685,
      "grad_norm": 2.890625,
      "learning_rate": 3.366135241996828e-05,
      "loss": 0.8681,
      "step": 337340
    },
    {
      "epoch": 1.1823276451229643,
      "grad_norm": 3.109375,
      "learning_rate": 3.366070339130458e-05,
      "loss": 0.7733,
      "step": 337350
    },
    {
      "epoch": 1.1823626926298598,
      "grad_norm": 2.875,
      "learning_rate": 3.3660054362640874e-05,
      "loss": 0.8532,
      "step": 337360
    },
    {
      "epoch": 1.1823977401367554,
      "grad_norm": 2.984375,
      "learning_rate": 3.3659405333977175e-05,
      "loss": 0.9202,
      "step": 337370
    },
    {
      "epoch": 1.182432787643651,
      "grad_norm": 3.09375,
      "learning_rate": 3.365875630531347e-05,
      "loss": 0.9342,
      "step": 337380
    },
    {
      "epoch": 1.1824678351505467,
      "grad_norm": 3.03125,
      "learning_rate": 3.365810727664977e-05,
      "loss": 0.8741,
      "step": 337390
    },
    {
      "epoch": 1.1825028826574422,
      "grad_norm": 2.53125,
      "learning_rate": 3.3657458247986066e-05,
      "loss": 0.8485,
      "step": 337400
    },
    {
      "epoch": 1.1825379301643377,
      "grad_norm": 2.75,
      "learning_rate": 3.365680921932237e-05,
      "loss": 0.8847,
      "step": 337410
    },
    {
      "epoch": 1.1825729776712333,
      "grad_norm": 2.75,
      "learning_rate": 3.365616019065866e-05,
      "loss": 0.8502,
      "step": 337420
    },
    {
      "epoch": 1.182608025178129,
      "grad_norm": 2.703125,
      "learning_rate": 3.365551116199496e-05,
      "loss": 0.8948,
      "step": 337430
    },
    {
      "epoch": 1.1826430726850246,
      "grad_norm": 2.96875,
      "learning_rate": 3.365486213333126e-05,
      "loss": 0.7972,
      "step": 337440
    },
    {
      "epoch": 1.18267812019192,
      "grad_norm": 2.75,
      "learning_rate": 3.365421310466755e-05,
      "loss": 0.8087,
      "step": 337450
    },
    {
      "epoch": 1.1827131676988158,
      "grad_norm": 3.21875,
      "learning_rate": 3.3653564076003854e-05,
      "loss": 0.8381,
      "step": 337460
    },
    {
      "epoch": 1.1827482152057114,
      "grad_norm": 2.3125,
      "learning_rate": 3.365291504734015e-05,
      "loss": 0.7996,
      "step": 337470
    },
    {
      "epoch": 1.182783262712607,
      "grad_norm": 2.75,
      "learning_rate": 3.365226601867645e-05,
      "loss": 0.819,
      "step": 337480
    },
    {
      "epoch": 1.1828183102195025,
      "grad_norm": 2.875,
      "learning_rate": 3.3651616990012744e-05,
      "loss": 0.8225,
      "step": 337490
    },
    {
      "epoch": 1.1828533577263982,
      "grad_norm": 3.109375,
      "learning_rate": 3.3650967961349046e-05,
      "loss": 0.9113,
      "step": 337500
    },
    {
      "epoch": 1.1828884052332937,
      "grad_norm": 2.921875,
      "learning_rate": 3.365031893268534e-05,
      "loss": 0.9182,
      "step": 337510
    },
    {
      "epoch": 1.1829234527401893,
      "grad_norm": 2.9375,
      "learning_rate": 3.364966990402164e-05,
      "loss": 0.9657,
      "step": 337520
    },
    {
      "epoch": 1.1829585002470848,
      "grad_norm": 2.875,
      "learning_rate": 3.3649020875357936e-05,
      "loss": 0.9099,
      "step": 337530
    },
    {
      "epoch": 1.1829935477539806,
      "grad_norm": 3.296875,
      "learning_rate": 3.364837184669424e-05,
      "loss": 0.816,
      "step": 337540
    },
    {
      "epoch": 1.1830285952608761,
      "grad_norm": 3.359375,
      "learning_rate": 3.364772281803054e-05,
      "loss": 0.8672,
      "step": 337550
    },
    {
      "epoch": 1.1830636427677717,
      "grad_norm": 2.984375,
      "learning_rate": 3.3647073789366834e-05,
      "loss": 0.8396,
      "step": 337560
    },
    {
      "epoch": 1.1830986902746674,
      "grad_norm": 3.1875,
      "learning_rate": 3.3646424760703135e-05,
      "loss": 0.8965,
      "step": 337570
    },
    {
      "epoch": 1.183133737781563,
      "grad_norm": 3.078125,
      "learning_rate": 3.364577573203943e-05,
      "loss": 0.8847,
      "step": 337580
    },
    {
      "epoch": 1.1831687852884585,
      "grad_norm": 3.453125,
      "learning_rate": 3.364512670337573e-05,
      "loss": 0.8448,
      "step": 337590
    },
    {
      "epoch": 1.183203832795354,
      "grad_norm": 3.140625,
      "learning_rate": 3.3644477674712026e-05,
      "loss": 0.7949,
      "step": 337600
    },
    {
      "epoch": 1.1832388803022498,
      "grad_norm": 2.625,
      "learning_rate": 3.364382864604833e-05,
      "loss": 0.9246,
      "step": 337610
    },
    {
      "epoch": 1.1832739278091453,
      "grad_norm": 2.6875,
      "learning_rate": 3.364317961738462e-05,
      "loss": 0.804,
      "step": 337620
    },
    {
      "epoch": 1.1833089753160408,
      "grad_norm": 3.0625,
      "learning_rate": 3.364253058872092e-05,
      "loss": 0.8471,
      "step": 337630
    },
    {
      "epoch": 1.1833440228229364,
      "grad_norm": 2.84375,
      "learning_rate": 3.364188156005722e-05,
      "loss": 0.7961,
      "step": 337640
    },
    {
      "epoch": 1.1833790703298321,
      "grad_norm": 2.9375,
      "learning_rate": 3.364123253139352e-05,
      "loss": 0.7582,
      "step": 337650
    },
    {
      "epoch": 1.1834141178367277,
      "grad_norm": 3.0625,
      "learning_rate": 3.3640583502729814e-05,
      "loss": 0.8726,
      "step": 337660
    },
    {
      "epoch": 1.1834491653436232,
      "grad_norm": 2.84375,
      "learning_rate": 3.3639934474066115e-05,
      "loss": 0.9138,
      "step": 337670
    },
    {
      "epoch": 1.183484212850519,
      "grad_norm": 3.234375,
      "learning_rate": 3.3639285445402416e-05,
      "loss": 0.8761,
      "step": 337680
    },
    {
      "epoch": 1.1835192603574145,
      "grad_norm": 2.46875,
      "learning_rate": 3.363863641673871e-05,
      "loss": 0.818,
      "step": 337690
    },
    {
      "epoch": 1.18355430786431,
      "grad_norm": 3.015625,
      "learning_rate": 3.363798738807501e-05,
      "loss": 0.8676,
      "step": 337700
    },
    {
      "epoch": 1.1835893553712056,
      "grad_norm": 2.71875,
      "learning_rate": 3.363733835941131e-05,
      "loss": 0.8574,
      "step": 337710
    },
    {
      "epoch": 1.1836244028781013,
      "grad_norm": 2.9375,
      "learning_rate": 3.363668933074761e-05,
      "loss": 0.8136,
      "step": 337720
    },
    {
      "epoch": 1.1836594503849969,
      "grad_norm": 3.046875,
      "learning_rate": 3.36360403020839e-05,
      "loss": 0.8483,
      "step": 337730
    },
    {
      "epoch": 1.1836944978918924,
      "grad_norm": 2.859375,
      "learning_rate": 3.3635391273420204e-05,
      "loss": 0.9221,
      "step": 337740
    },
    {
      "epoch": 1.183729545398788,
      "grad_norm": 3.125,
      "learning_rate": 3.36347422447565e-05,
      "loss": 0.8145,
      "step": 337750
    },
    {
      "epoch": 1.1837645929056837,
      "grad_norm": 2.609375,
      "learning_rate": 3.36340932160928e-05,
      "loss": 0.8479,
      "step": 337760
    },
    {
      "epoch": 1.1837996404125792,
      "grad_norm": 2.640625,
      "learning_rate": 3.3633444187429095e-05,
      "loss": 0.7918,
      "step": 337770
    },
    {
      "epoch": 1.1838346879194748,
      "grad_norm": 3.8125,
      "learning_rate": 3.3632795158765396e-05,
      "loss": 0.8736,
      "step": 337780
    },
    {
      "epoch": 1.1838697354263705,
      "grad_norm": 3.21875,
      "learning_rate": 3.363214613010169e-05,
      "loss": 0.9062,
      "step": 337790
    },
    {
      "epoch": 1.183904782933266,
      "grad_norm": 2.578125,
      "learning_rate": 3.363149710143799e-05,
      "loss": 0.8265,
      "step": 337800
    },
    {
      "epoch": 1.1839398304401616,
      "grad_norm": 3.015625,
      "learning_rate": 3.363084807277429e-05,
      "loss": 0.8489,
      "step": 337810
    },
    {
      "epoch": 1.1839748779470574,
      "grad_norm": 2.609375,
      "learning_rate": 3.363019904411058e-05,
      "loss": 0.8145,
      "step": 337820
    },
    {
      "epoch": 1.184009925453953,
      "grad_norm": 3.03125,
      "learning_rate": 3.362955001544688e-05,
      "loss": 0.8313,
      "step": 337830
    },
    {
      "epoch": 1.1840449729608484,
      "grad_norm": 2.8125,
      "learning_rate": 3.362890098678318e-05,
      "loss": 0.8856,
      "step": 337840
    },
    {
      "epoch": 1.184080020467744,
      "grad_norm": 3.390625,
      "learning_rate": 3.362825195811948e-05,
      "loss": 0.8651,
      "step": 337850
    },
    {
      "epoch": 1.1841150679746395,
      "grad_norm": 3.140625,
      "learning_rate": 3.3627602929455774e-05,
      "loss": 0.8936,
      "step": 337860
    },
    {
      "epoch": 1.1841501154815353,
      "grad_norm": 2.96875,
      "learning_rate": 3.3626953900792075e-05,
      "loss": 0.9122,
      "step": 337870
    },
    {
      "epoch": 1.1841851629884308,
      "grad_norm": 3.0625,
      "learning_rate": 3.362630487212837e-05,
      "loss": 0.873,
      "step": 337880
    },
    {
      "epoch": 1.1842202104953263,
      "grad_norm": 3.328125,
      "learning_rate": 3.362565584346467e-05,
      "loss": 0.8912,
      "step": 337890
    },
    {
      "epoch": 1.184255258002222,
      "grad_norm": 2.640625,
      "learning_rate": 3.3625006814800966e-05,
      "loss": 0.8432,
      "step": 337900
    },
    {
      "epoch": 1.1842903055091176,
      "grad_norm": 2.90625,
      "learning_rate": 3.362435778613727e-05,
      "loss": 0.8779,
      "step": 337910
    },
    {
      "epoch": 1.1843253530160132,
      "grad_norm": 2.265625,
      "learning_rate": 3.362370875747357e-05,
      "loss": 0.8285,
      "step": 337920
    },
    {
      "epoch": 1.184360400522909,
      "grad_norm": 2.96875,
      "learning_rate": 3.362305972880986e-05,
      "loss": 0.8648,
      "step": 337930
    },
    {
      "epoch": 1.1843954480298045,
      "grad_norm": 2.703125,
      "learning_rate": 3.3622410700146164e-05,
      "loss": 0.8815,
      "step": 337940
    },
    {
      "epoch": 1.1844304955367,
      "grad_norm": 3.171875,
      "learning_rate": 3.362176167148246e-05,
      "loss": 0.883,
      "step": 337950
    },
    {
      "epoch": 1.1844655430435955,
      "grad_norm": 3.34375,
      "learning_rate": 3.362111264281876e-05,
      "loss": 0.885,
      "step": 337960
    },
    {
      "epoch": 1.1845005905504913,
      "grad_norm": 3.140625,
      "learning_rate": 3.3620463614155055e-05,
      "loss": 0.7954,
      "step": 337970
    },
    {
      "epoch": 1.1845356380573868,
      "grad_norm": 2.734375,
      "learning_rate": 3.3619814585491356e-05,
      "loss": 0.8696,
      "step": 337980
    },
    {
      "epoch": 1.1845706855642824,
      "grad_norm": 2.9375,
      "learning_rate": 3.361916555682765e-05,
      "loss": 0.8291,
      "step": 337990
    },
    {
      "epoch": 1.184605733071178,
      "grad_norm": 3.046875,
      "learning_rate": 3.361851652816395e-05,
      "loss": 0.863,
      "step": 338000
    },
    {
      "epoch": 1.1846407805780736,
      "grad_norm": 2.484375,
      "learning_rate": 3.361786749950025e-05,
      "loss": 0.8086,
      "step": 338010
    },
    {
      "epoch": 1.1846758280849692,
      "grad_norm": 2.609375,
      "learning_rate": 3.361721847083655e-05,
      "loss": 0.8654,
      "step": 338020
    },
    {
      "epoch": 1.1847108755918647,
      "grad_norm": 2.90625,
      "learning_rate": 3.361656944217284e-05,
      "loss": 0.8309,
      "step": 338030
    },
    {
      "epoch": 1.1847459230987605,
      "grad_norm": 2.671875,
      "learning_rate": 3.3615920413509144e-05,
      "loss": 0.7827,
      "step": 338040
    },
    {
      "epoch": 1.184780970605656,
      "grad_norm": 3.5,
      "learning_rate": 3.3615271384845446e-05,
      "loss": 0.9099,
      "step": 338050
    },
    {
      "epoch": 1.1848160181125515,
      "grad_norm": 3.328125,
      "learning_rate": 3.361462235618174e-05,
      "loss": 0.8718,
      "step": 338060
    },
    {
      "epoch": 1.184851065619447,
      "grad_norm": 3.046875,
      "learning_rate": 3.361397332751804e-05,
      "loss": 0.9221,
      "step": 338070
    },
    {
      "epoch": 1.1848861131263428,
      "grad_norm": 2.71875,
      "learning_rate": 3.3613324298854336e-05,
      "loss": 0.8755,
      "step": 338080
    },
    {
      "epoch": 1.1849211606332384,
      "grad_norm": 2.578125,
      "learning_rate": 3.361267527019064e-05,
      "loss": 0.8678,
      "step": 338090
    },
    {
      "epoch": 1.184956208140134,
      "grad_norm": 3.203125,
      "learning_rate": 3.361202624152693e-05,
      "loss": 0.9052,
      "step": 338100
    },
    {
      "epoch": 1.1849912556470295,
      "grad_norm": 2.734375,
      "learning_rate": 3.3611377212863234e-05,
      "loss": 0.9712,
      "step": 338110
    },
    {
      "epoch": 1.1850263031539252,
      "grad_norm": 3.21875,
      "learning_rate": 3.361072818419953e-05,
      "loss": 0.7819,
      "step": 338120
    },
    {
      "epoch": 1.1850613506608207,
      "grad_norm": 2.765625,
      "learning_rate": 3.361007915553583e-05,
      "loss": 0.8948,
      "step": 338130
    },
    {
      "epoch": 1.1850963981677163,
      "grad_norm": 3.09375,
      "learning_rate": 3.3609430126872124e-05,
      "loss": 0.8879,
      "step": 338140
    },
    {
      "epoch": 1.185131445674612,
      "grad_norm": 3.09375,
      "learning_rate": 3.3608781098208426e-05,
      "loss": 0.8672,
      "step": 338150
    },
    {
      "epoch": 1.1851664931815076,
      "grad_norm": 2.984375,
      "learning_rate": 3.360813206954472e-05,
      "loss": 0.8645,
      "step": 338160
    },
    {
      "epoch": 1.185201540688403,
      "grad_norm": 2.953125,
      "learning_rate": 3.360748304088102e-05,
      "loss": 0.88,
      "step": 338170
    },
    {
      "epoch": 1.1852365881952986,
      "grad_norm": 2.75,
      "learning_rate": 3.3606834012217316e-05,
      "loss": 0.8598,
      "step": 338180
    },
    {
      "epoch": 1.1852716357021944,
      "grad_norm": 3.125,
      "learning_rate": 3.360618498355362e-05,
      "loss": 0.7566,
      "step": 338190
    },
    {
      "epoch": 1.18530668320909,
      "grad_norm": 2.828125,
      "learning_rate": 3.360553595488991e-05,
      "loss": 0.849,
      "step": 338200
    },
    {
      "epoch": 1.1853417307159855,
      "grad_norm": 2.921875,
      "learning_rate": 3.360488692622621e-05,
      "loss": 0.9191,
      "step": 338210
    },
    {
      "epoch": 1.185376778222881,
      "grad_norm": 3.140625,
      "learning_rate": 3.360423789756251e-05,
      "loss": 0.8718,
      "step": 338220
    },
    {
      "epoch": 1.1854118257297768,
      "grad_norm": 3.28125,
      "learning_rate": 3.36035888688988e-05,
      "loss": 0.852,
      "step": 338230
    },
    {
      "epoch": 1.1854468732366723,
      "grad_norm": 2.890625,
      "learning_rate": 3.3602939840235104e-05,
      "loss": 0.7947,
      "step": 338240
    },
    {
      "epoch": 1.1854819207435678,
      "grad_norm": 2.515625,
      "learning_rate": 3.36022908115714e-05,
      "loss": 0.8125,
      "step": 338250
    },
    {
      "epoch": 1.1855169682504636,
      "grad_norm": 3.0625,
      "learning_rate": 3.36016417829077e-05,
      "loss": 0.964,
      "step": 338260
    },
    {
      "epoch": 1.1855520157573591,
      "grad_norm": 2.828125,
      "learning_rate": 3.3600992754243995e-05,
      "loss": 0.8656,
      "step": 338270
    },
    {
      "epoch": 1.1855870632642547,
      "grad_norm": 2.9375,
      "learning_rate": 3.3600343725580296e-05,
      "loss": 0.797,
      "step": 338280
    },
    {
      "epoch": 1.1856221107711502,
      "grad_norm": 2.921875,
      "learning_rate": 3.35996946969166e-05,
      "loss": 0.9119,
      "step": 338290
    },
    {
      "epoch": 1.185657158278046,
      "grad_norm": 2.8125,
      "learning_rate": 3.359904566825289e-05,
      "loss": 0.8446,
      "step": 338300
    },
    {
      "epoch": 1.1856922057849415,
      "grad_norm": 2.765625,
      "learning_rate": 3.3598396639589194e-05,
      "loss": 0.853,
      "step": 338310
    },
    {
      "epoch": 1.185727253291837,
      "grad_norm": 2.484375,
      "learning_rate": 3.359774761092549e-05,
      "loss": 0.8774,
      "step": 338320
    },
    {
      "epoch": 1.1857623007987326,
      "grad_norm": 2.9375,
      "learning_rate": 3.359709858226179e-05,
      "loss": 0.8188,
      "step": 338330
    },
    {
      "epoch": 1.1857973483056283,
      "grad_norm": 2.984375,
      "learning_rate": 3.3596449553598084e-05,
      "loss": 0.8581,
      "step": 338340
    },
    {
      "epoch": 1.1858323958125239,
      "grad_norm": 2.9375,
      "learning_rate": 3.3595800524934386e-05,
      "loss": 0.813,
      "step": 338350
    },
    {
      "epoch": 1.1858674433194194,
      "grad_norm": 2.9375,
      "learning_rate": 3.359515149627068e-05,
      "loss": 0.8638,
      "step": 338360
    },
    {
      "epoch": 1.1859024908263152,
      "grad_norm": 2.859375,
      "learning_rate": 3.359450246760698e-05,
      "loss": 0.7482,
      "step": 338370
    },
    {
      "epoch": 1.1859375383332107,
      "grad_norm": 3.046875,
      "learning_rate": 3.3593853438943276e-05,
      "loss": 0.8939,
      "step": 338380
    },
    {
      "epoch": 1.1859725858401062,
      "grad_norm": 3.078125,
      "learning_rate": 3.359320441027958e-05,
      "loss": 0.862,
      "step": 338390
    },
    {
      "epoch": 1.1860076333470018,
      "grad_norm": 2.796875,
      "learning_rate": 3.359255538161587e-05,
      "loss": 0.7729,
      "step": 338400
    },
    {
      "epoch": 1.1860426808538975,
      "grad_norm": 3.203125,
      "learning_rate": 3.3591906352952174e-05,
      "loss": 0.9399,
      "step": 338410
    },
    {
      "epoch": 1.186077728360793,
      "grad_norm": 2.953125,
      "learning_rate": 3.3591257324288475e-05,
      "loss": 0.9361,
      "step": 338420
    },
    {
      "epoch": 1.1861127758676886,
      "grad_norm": 3.203125,
      "learning_rate": 3.359060829562477e-05,
      "loss": 0.8508,
      "step": 338430
    },
    {
      "epoch": 1.1861478233745841,
      "grad_norm": 2.984375,
      "learning_rate": 3.358995926696107e-05,
      "loss": 0.9152,
      "step": 338440
    },
    {
      "epoch": 1.18618287088148,
      "grad_norm": 2.90625,
      "learning_rate": 3.3589310238297366e-05,
      "loss": 0.8791,
      "step": 338450
    },
    {
      "epoch": 1.1862179183883754,
      "grad_norm": 3.015625,
      "learning_rate": 3.358866120963367e-05,
      "loss": 0.9165,
      "step": 338460
    },
    {
      "epoch": 1.186252965895271,
      "grad_norm": 3.5625,
      "learning_rate": 3.358801218096996e-05,
      "loss": 0.7943,
      "step": 338470
    },
    {
      "epoch": 1.1862880134021667,
      "grad_norm": 2.78125,
      "learning_rate": 3.358736315230626e-05,
      "loss": 0.8438,
      "step": 338480
    },
    {
      "epoch": 1.1863230609090623,
      "grad_norm": 3.21875,
      "learning_rate": 3.358671412364256e-05,
      "loss": 0.7985,
      "step": 338490
    },
    {
      "epoch": 1.1863581084159578,
      "grad_norm": 3.03125,
      "learning_rate": 3.358606509497886e-05,
      "loss": 0.8972,
      "step": 338500
    },
    {
      "epoch": 1.1863931559228535,
      "grad_norm": 2.828125,
      "learning_rate": 3.3585416066315154e-05,
      "loss": 0.7565,
      "step": 338510
    },
    {
      "epoch": 1.186428203429749,
      "grad_norm": 2.84375,
      "learning_rate": 3.3584767037651455e-05,
      "loss": 0.8923,
      "step": 338520
    },
    {
      "epoch": 1.1864632509366446,
      "grad_norm": 3.109375,
      "learning_rate": 3.358411800898775e-05,
      "loss": 0.8786,
      "step": 338530
    },
    {
      "epoch": 1.1864982984435402,
      "grad_norm": 2.578125,
      "learning_rate": 3.358346898032405e-05,
      "loss": 0.833,
      "step": 338540
    },
    {
      "epoch": 1.1865333459504357,
      "grad_norm": 2.875,
      "learning_rate": 3.358281995166035e-05,
      "loss": 0.8744,
      "step": 338550
    },
    {
      "epoch": 1.1865683934573314,
      "grad_norm": 2.984375,
      "learning_rate": 3.358217092299665e-05,
      "loss": 0.8874,
      "step": 338560
    },
    {
      "epoch": 1.186603440964227,
      "grad_norm": 3.109375,
      "learning_rate": 3.358152189433294e-05,
      "loss": 0.8537,
      "step": 338570
    },
    {
      "epoch": 1.1866384884711225,
      "grad_norm": 2.828125,
      "learning_rate": 3.3580872865669236e-05,
      "loss": 0.7179,
      "step": 338580
    },
    {
      "epoch": 1.1866735359780183,
      "grad_norm": 3.078125,
      "learning_rate": 3.358022383700554e-05,
      "loss": 0.8326,
      "step": 338590
    },
    {
      "epoch": 1.1867085834849138,
      "grad_norm": 3.09375,
      "learning_rate": 3.357957480834183e-05,
      "loss": 0.9002,
      "step": 338600
    },
    {
      "epoch": 1.1867436309918094,
      "grad_norm": 4.15625,
      "learning_rate": 3.3578925779678134e-05,
      "loss": 0.7585,
      "step": 338610
    },
    {
      "epoch": 1.186778678498705,
      "grad_norm": 3.4375,
      "learning_rate": 3.357827675101443e-05,
      "loss": 0.8939,
      "step": 338620
    },
    {
      "epoch": 1.1868137260056006,
      "grad_norm": 3.1875,
      "learning_rate": 3.357762772235073e-05,
      "loss": 0.9044,
      "step": 338630
    },
    {
      "epoch": 1.1868487735124962,
      "grad_norm": 3.234375,
      "learning_rate": 3.357697869368703e-05,
      "loss": 0.8454,
      "step": 338640
    },
    {
      "epoch": 1.1868838210193917,
      "grad_norm": 2.640625,
      "learning_rate": 3.3576329665023326e-05,
      "loss": 0.7624,
      "step": 338650
    },
    {
      "epoch": 1.1869188685262875,
      "grad_norm": 2.53125,
      "learning_rate": 3.357568063635963e-05,
      "loss": 0.8367,
      "step": 338660
    },
    {
      "epoch": 1.186953916033183,
      "grad_norm": 2.59375,
      "learning_rate": 3.357503160769592e-05,
      "loss": 0.7867,
      "step": 338670
    },
    {
      "epoch": 1.1869889635400785,
      "grad_norm": 2.8125,
      "learning_rate": 3.357438257903222e-05,
      "loss": 0.7715,
      "step": 338680
    },
    {
      "epoch": 1.187024011046974,
      "grad_norm": 2.4375,
      "learning_rate": 3.357373355036852e-05,
      "loss": 0.8369,
      "step": 338690
    },
    {
      "epoch": 1.1870590585538698,
      "grad_norm": 3.078125,
      "learning_rate": 3.357308452170482e-05,
      "loss": 0.8768,
      "step": 338700
    },
    {
      "epoch": 1.1870941060607654,
      "grad_norm": 2.96875,
      "learning_rate": 3.3572435493041114e-05,
      "loss": 0.9181,
      "step": 338710
    },
    {
      "epoch": 1.187129153567661,
      "grad_norm": 2.0625,
      "learning_rate": 3.3571786464377415e-05,
      "loss": 0.8705,
      "step": 338720
    },
    {
      "epoch": 1.1871642010745567,
      "grad_norm": 2.875,
      "learning_rate": 3.357113743571371e-05,
      "loss": 0.9143,
      "step": 338730
    },
    {
      "epoch": 1.1871992485814522,
      "grad_norm": 2.46875,
      "learning_rate": 3.357048840705001e-05,
      "loss": 0.7204,
      "step": 338740
    },
    {
      "epoch": 1.1872342960883477,
      "grad_norm": 2.8125,
      "learning_rate": 3.3569839378386306e-05,
      "loss": 0.8214,
      "step": 338750
    },
    {
      "epoch": 1.1872693435952433,
      "grad_norm": 3.40625,
      "learning_rate": 3.356919034972261e-05,
      "loss": 0.8697,
      "step": 338760
    },
    {
      "epoch": 1.187304391102139,
      "grad_norm": 3.25,
      "learning_rate": 3.35685413210589e-05,
      "loss": 0.9347,
      "step": 338770
    },
    {
      "epoch": 1.1873394386090346,
      "grad_norm": 2.84375,
      "learning_rate": 3.35678922923952e-05,
      "loss": 0.8408,
      "step": 338780
    },
    {
      "epoch": 1.18737448611593,
      "grad_norm": 3.40625,
      "learning_rate": 3.3567243263731505e-05,
      "loss": 0.9731,
      "step": 338790
    },
    {
      "epoch": 1.1874095336228256,
      "grad_norm": 3.0625,
      "learning_rate": 3.35665942350678e-05,
      "loss": 0.8617,
      "step": 338800
    },
    {
      "epoch": 1.1874445811297214,
      "grad_norm": 3.421875,
      "learning_rate": 3.35659452064041e-05,
      "loss": 0.8727,
      "step": 338810
    },
    {
      "epoch": 1.187479628636617,
      "grad_norm": 2.78125,
      "learning_rate": 3.3565296177740395e-05,
      "loss": 0.8358,
      "step": 338820
    },
    {
      "epoch": 1.1875146761435125,
      "grad_norm": 3.078125,
      "learning_rate": 3.3564647149076697e-05,
      "loss": 0.9254,
      "step": 338830
    },
    {
      "epoch": 1.1875497236504082,
      "grad_norm": 2.8125,
      "learning_rate": 3.356399812041299e-05,
      "loss": 0.9001,
      "step": 338840
    },
    {
      "epoch": 1.1875847711573038,
      "grad_norm": 3.578125,
      "learning_rate": 3.356334909174929e-05,
      "loss": 0.8687,
      "step": 338850
    },
    {
      "epoch": 1.1876198186641993,
      "grad_norm": 2.984375,
      "learning_rate": 3.356270006308559e-05,
      "loss": 0.8033,
      "step": 338860
    },
    {
      "epoch": 1.1876548661710948,
      "grad_norm": 2.875,
      "learning_rate": 3.356205103442189e-05,
      "loss": 0.8086,
      "step": 338870
    },
    {
      "epoch": 1.1876899136779906,
      "grad_norm": 2.84375,
      "learning_rate": 3.356140200575818e-05,
      "loss": 0.8206,
      "step": 338880
    },
    {
      "epoch": 1.1877249611848861,
      "grad_norm": 2.671875,
      "learning_rate": 3.3560752977094485e-05,
      "loss": 0.9043,
      "step": 338890
    },
    {
      "epoch": 1.1877600086917817,
      "grad_norm": 3.125,
      "learning_rate": 3.356010394843078e-05,
      "loss": 0.8552,
      "step": 338900
    },
    {
      "epoch": 1.1877950561986772,
      "grad_norm": 2.640625,
      "learning_rate": 3.355945491976708e-05,
      "loss": 0.8234,
      "step": 338910
    },
    {
      "epoch": 1.187830103705573,
      "grad_norm": 2.421875,
      "learning_rate": 3.355880589110338e-05,
      "loss": 0.8139,
      "step": 338920
    },
    {
      "epoch": 1.1878651512124685,
      "grad_norm": 2.953125,
      "learning_rate": 3.3558156862439677e-05,
      "loss": 0.8549,
      "step": 338930
    },
    {
      "epoch": 1.187900198719364,
      "grad_norm": 2.59375,
      "learning_rate": 3.355750783377598e-05,
      "loss": 0.8492,
      "step": 338940
    },
    {
      "epoch": 1.1879352462262598,
      "grad_norm": 2.546875,
      "learning_rate": 3.3556858805112266e-05,
      "loss": 0.944,
      "step": 338950
    },
    {
      "epoch": 1.1879702937331553,
      "grad_norm": 2.78125,
      "learning_rate": 3.355620977644857e-05,
      "loss": 0.8959,
      "step": 338960
    },
    {
      "epoch": 1.1880053412400509,
      "grad_norm": 2.8125,
      "learning_rate": 3.355556074778486e-05,
      "loss": 0.8333,
      "step": 338970
    },
    {
      "epoch": 1.1880403887469464,
      "grad_norm": 2.953125,
      "learning_rate": 3.355491171912116e-05,
      "loss": 0.959,
      "step": 338980
    },
    {
      "epoch": 1.1880754362538422,
      "grad_norm": 2.546875,
      "learning_rate": 3.355426269045746e-05,
      "loss": 0.8496,
      "step": 338990
    },
    {
      "epoch": 1.1881104837607377,
      "grad_norm": 2.84375,
      "learning_rate": 3.355361366179376e-05,
      "loss": 0.9157,
      "step": 339000
    },
    {
      "epoch": 1.1881455312676332,
      "grad_norm": 3.203125,
      "learning_rate": 3.355296463313006e-05,
      "loss": 0.8706,
      "step": 339010
    },
    {
      "epoch": 1.1881805787745288,
      "grad_norm": 2.671875,
      "learning_rate": 3.3552315604466355e-05,
      "loss": 0.8539,
      "step": 339020
    },
    {
      "epoch": 1.1882156262814245,
      "grad_norm": 3.078125,
      "learning_rate": 3.3551666575802657e-05,
      "loss": 0.7831,
      "step": 339030
    },
    {
      "epoch": 1.18825067378832,
      "grad_norm": 2.78125,
      "learning_rate": 3.355101754713895e-05,
      "loss": 0.8781,
      "step": 339040
    },
    {
      "epoch": 1.1882857212952156,
      "grad_norm": 2.828125,
      "learning_rate": 3.355036851847525e-05,
      "loss": 0.864,
      "step": 339050
    },
    {
      "epoch": 1.1883207688021113,
      "grad_norm": 2.59375,
      "learning_rate": 3.354971948981155e-05,
      "loss": 0.8982,
      "step": 339060
    },
    {
      "epoch": 1.1883558163090069,
      "grad_norm": 2.9375,
      "learning_rate": 3.354907046114785e-05,
      "loss": 0.8316,
      "step": 339070
    },
    {
      "epoch": 1.1883908638159024,
      "grad_norm": 3.203125,
      "learning_rate": 3.354842143248414e-05,
      "loss": 0.8268,
      "step": 339080
    },
    {
      "epoch": 1.188425911322798,
      "grad_norm": 2.9375,
      "learning_rate": 3.3547772403820445e-05,
      "loss": 0.8057,
      "step": 339090
    },
    {
      "epoch": 1.1884609588296937,
      "grad_norm": 3.171875,
      "learning_rate": 3.354712337515674e-05,
      "loss": 0.8851,
      "step": 339100
    },
    {
      "epoch": 1.1884960063365893,
      "grad_norm": 2.671875,
      "learning_rate": 3.354647434649304e-05,
      "loss": 0.8008,
      "step": 339110
    },
    {
      "epoch": 1.1885310538434848,
      "grad_norm": 2.703125,
      "learning_rate": 3.3545825317829335e-05,
      "loss": 0.8199,
      "step": 339120
    },
    {
      "epoch": 1.1885661013503803,
      "grad_norm": 3.0,
      "learning_rate": 3.3545176289165637e-05,
      "loss": 0.8523,
      "step": 339130
    },
    {
      "epoch": 1.188601148857276,
      "grad_norm": 3.125,
      "learning_rate": 3.354452726050193e-05,
      "loss": 0.8591,
      "step": 339140
    },
    {
      "epoch": 1.1886361963641716,
      "grad_norm": 3.234375,
      "learning_rate": 3.354387823183823e-05,
      "loss": 0.7949,
      "step": 339150
    },
    {
      "epoch": 1.1886712438710672,
      "grad_norm": 2.609375,
      "learning_rate": 3.3543229203174534e-05,
      "loss": 0.9061,
      "step": 339160
    },
    {
      "epoch": 1.188706291377963,
      "grad_norm": 3.375,
      "learning_rate": 3.354258017451083e-05,
      "loss": 0.8243,
      "step": 339170
    },
    {
      "epoch": 1.1887413388848584,
      "grad_norm": 3.0,
      "learning_rate": 3.354193114584713e-05,
      "loss": 0.7673,
      "step": 339180
    },
    {
      "epoch": 1.188776386391754,
      "grad_norm": 3.109375,
      "learning_rate": 3.3541282117183425e-05,
      "loss": 0.8696,
      "step": 339190
    },
    {
      "epoch": 1.1888114338986497,
      "grad_norm": 2.453125,
      "learning_rate": 3.3540633088519726e-05,
      "loss": 0.7776,
      "step": 339200
    },
    {
      "epoch": 1.1888464814055453,
      "grad_norm": 3.21875,
      "learning_rate": 3.353998405985602e-05,
      "loss": 0.7721,
      "step": 339210
    },
    {
      "epoch": 1.1888815289124408,
      "grad_norm": 3.359375,
      "learning_rate": 3.353933503119232e-05,
      "loss": 0.8881,
      "step": 339220
    },
    {
      "epoch": 1.1889165764193363,
      "grad_norm": 3.09375,
      "learning_rate": 3.3538686002528617e-05,
      "loss": 0.9474,
      "step": 339230
    },
    {
      "epoch": 1.188951623926232,
      "grad_norm": 2.890625,
      "learning_rate": 3.353803697386492e-05,
      "loss": 0.8968,
      "step": 339240
    },
    {
      "epoch": 1.1889866714331276,
      "grad_norm": 3.171875,
      "learning_rate": 3.353738794520121e-05,
      "loss": 0.9095,
      "step": 339250
    },
    {
      "epoch": 1.1890217189400232,
      "grad_norm": 2.859375,
      "learning_rate": 3.3536738916537514e-05,
      "loss": 0.8167,
      "step": 339260
    },
    {
      "epoch": 1.1890567664469187,
      "grad_norm": 3.046875,
      "learning_rate": 3.353608988787381e-05,
      "loss": 0.8218,
      "step": 339270
    },
    {
      "epoch": 1.1890918139538145,
      "grad_norm": 2.625,
      "learning_rate": 3.353544085921011e-05,
      "loss": 0.871,
      "step": 339280
    },
    {
      "epoch": 1.18912686146071,
      "grad_norm": 3.328125,
      "learning_rate": 3.353479183054641e-05,
      "loss": 0.8572,
      "step": 339290
    },
    {
      "epoch": 1.1891619089676055,
      "grad_norm": 3.046875,
      "learning_rate": 3.3534142801882706e-05,
      "loss": 0.8392,
      "step": 339300
    },
    {
      "epoch": 1.1891969564745013,
      "grad_norm": 2.78125,
      "learning_rate": 3.353349377321901e-05,
      "loss": 0.8207,
      "step": 339310
    },
    {
      "epoch": 1.1892320039813968,
      "grad_norm": 3.015625,
      "learning_rate": 3.35328447445553e-05,
      "loss": 0.7796,
      "step": 339320
    },
    {
      "epoch": 1.1892670514882924,
      "grad_norm": 2.96875,
      "learning_rate": 3.3532195715891597e-05,
      "loss": 0.8371,
      "step": 339330
    },
    {
      "epoch": 1.189302098995188,
      "grad_norm": 2.75,
      "learning_rate": 3.353154668722789e-05,
      "loss": 0.8638,
      "step": 339340
    },
    {
      "epoch": 1.1893371465020837,
      "grad_norm": 2.671875,
      "learning_rate": 3.353089765856419e-05,
      "loss": 0.9442,
      "step": 339350
    },
    {
      "epoch": 1.1893721940089792,
      "grad_norm": 2.875,
      "learning_rate": 3.353024862990049e-05,
      "loss": 0.8924,
      "step": 339360
    },
    {
      "epoch": 1.1894072415158747,
      "grad_norm": 2.515625,
      "learning_rate": 3.352959960123679e-05,
      "loss": 0.9629,
      "step": 339370
    },
    {
      "epoch": 1.1894422890227703,
      "grad_norm": 2.671875,
      "learning_rate": 3.352895057257309e-05,
      "loss": 0.8238,
      "step": 339380
    },
    {
      "epoch": 1.189477336529666,
      "grad_norm": 2.640625,
      "learning_rate": 3.3528301543909385e-05,
      "loss": 0.874,
      "step": 339390
    },
    {
      "epoch": 1.1895123840365616,
      "grad_norm": 2.921875,
      "learning_rate": 3.3527652515245686e-05,
      "loss": 0.9352,
      "step": 339400
    },
    {
      "epoch": 1.189547431543457,
      "grad_norm": 2.515625,
      "learning_rate": 3.352700348658198e-05,
      "loss": 0.884,
      "step": 339410
    },
    {
      "epoch": 1.1895824790503529,
      "grad_norm": 2.765625,
      "learning_rate": 3.352635445791828e-05,
      "loss": 0.8686,
      "step": 339420
    },
    {
      "epoch": 1.1896175265572484,
      "grad_norm": 2.4375,
      "learning_rate": 3.3525705429254577e-05,
      "loss": 0.8012,
      "step": 339430
    },
    {
      "epoch": 1.189652574064144,
      "grad_norm": 3.203125,
      "learning_rate": 3.352505640059088e-05,
      "loss": 0.9026,
      "step": 339440
    },
    {
      "epoch": 1.1896876215710395,
      "grad_norm": 3.046875,
      "learning_rate": 3.352440737192717e-05,
      "loss": 0.9235,
      "step": 339450
    },
    {
      "epoch": 1.1897226690779352,
      "grad_norm": 3.21875,
      "learning_rate": 3.3523758343263474e-05,
      "loss": 0.8564,
      "step": 339460
    },
    {
      "epoch": 1.1897577165848308,
      "grad_norm": 3.15625,
      "learning_rate": 3.352310931459977e-05,
      "loss": 0.8497,
      "step": 339470
    },
    {
      "epoch": 1.1897927640917263,
      "grad_norm": 2.90625,
      "learning_rate": 3.352246028593607e-05,
      "loss": 0.9025,
      "step": 339480
    },
    {
      "epoch": 1.1898278115986218,
      "grad_norm": 2.875,
      "learning_rate": 3.3521811257272365e-05,
      "loss": 0.912,
      "step": 339490
    },
    {
      "epoch": 1.1898628591055176,
      "grad_norm": 2.703125,
      "learning_rate": 3.3521162228608666e-05,
      "loss": 0.8713,
      "step": 339500
    },
    {
      "epoch": 1.1898979066124131,
      "grad_norm": 2.984375,
      "learning_rate": 3.352051319994496e-05,
      "loss": 0.8466,
      "step": 339510
    },
    {
      "epoch": 1.1899329541193087,
      "grad_norm": 2.828125,
      "learning_rate": 3.351986417128126e-05,
      "loss": 0.874,
      "step": 339520
    },
    {
      "epoch": 1.1899680016262044,
      "grad_norm": 2.71875,
      "learning_rate": 3.351921514261756e-05,
      "loss": 0.8319,
      "step": 339530
    },
    {
      "epoch": 1.1900030491331,
      "grad_norm": 2.921875,
      "learning_rate": 3.351856611395386e-05,
      "loss": 0.8077,
      "step": 339540
    },
    {
      "epoch": 1.1900380966399955,
      "grad_norm": 2.921875,
      "learning_rate": 3.351791708529016e-05,
      "loss": 0.7991,
      "step": 339550
    },
    {
      "epoch": 1.190073144146891,
      "grad_norm": 3.109375,
      "learning_rate": 3.3517268056626454e-05,
      "loss": 0.908,
      "step": 339560
    },
    {
      "epoch": 1.1901081916537868,
      "grad_norm": 2.984375,
      "learning_rate": 3.3516619027962755e-05,
      "loss": 0.861,
      "step": 339570
    },
    {
      "epoch": 1.1901432391606823,
      "grad_norm": 3.015625,
      "learning_rate": 3.351596999929905e-05,
      "loss": 0.8574,
      "step": 339580
    },
    {
      "epoch": 1.1901782866675779,
      "grad_norm": 2.59375,
      "learning_rate": 3.351532097063535e-05,
      "loss": 0.7821,
      "step": 339590
    },
    {
      "epoch": 1.1902133341744734,
      "grad_norm": 2.9375,
      "learning_rate": 3.3514671941971646e-05,
      "loss": 0.9535,
      "step": 339600
    },
    {
      "epoch": 1.1902483816813691,
      "grad_norm": 2.515625,
      "learning_rate": 3.351402291330795e-05,
      "loss": 0.7898,
      "step": 339610
    },
    {
      "epoch": 1.1902834291882647,
      "grad_norm": 3.171875,
      "learning_rate": 3.351337388464424e-05,
      "loss": 0.8861,
      "step": 339620
    },
    {
      "epoch": 1.1903184766951602,
      "grad_norm": 2.984375,
      "learning_rate": 3.351272485598054e-05,
      "loss": 0.9337,
      "step": 339630
    },
    {
      "epoch": 1.190353524202056,
      "grad_norm": 2.84375,
      "learning_rate": 3.351207582731684e-05,
      "loss": 0.7967,
      "step": 339640
    },
    {
      "epoch": 1.1903885717089515,
      "grad_norm": 3.125,
      "learning_rate": 3.351142679865314e-05,
      "loss": 0.8544,
      "step": 339650
    },
    {
      "epoch": 1.190423619215847,
      "grad_norm": 2.875,
      "learning_rate": 3.351077776998944e-05,
      "loss": 0.8942,
      "step": 339660
    },
    {
      "epoch": 1.1904586667227426,
      "grad_norm": 3.140625,
      "learning_rate": 3.3510128741325735e-05,
      "loss": 0.8005,
      "step": 339670
    },
    {
      "epoch": 1.1904937142296383,
      "grad_norm": 2.78125,
      "learning_rate": 3.350947971266204e-05,
      "loss": 0.8855,
      "step": 339680
    },
    {
      "epoch": 1.1905287617365339,
      "grad_norm": 2.90625,
      "learning_rate": 3.350883068399833e-05,
      "loss": 0.9151,
      "step": 339690
    },
    {
      "epoch": 1.1905638092434294,
      "grad_norm": 2.859375,
      "learning_rate": 3.3508181655334626e-05,
      "loss": 0.9332,
      "step": 339700
    },
    {
      "epoch": 1.190598856750325,
      "grad_norm": 3.390625,
      "learning_rate": 3.350753262667092e-05,
      "loss": 0.9435,
      "step": 339710
    },
    {
      "epoch": 1.1906339042572207,
      "grad_norm": 3.234375,
      "learning_rate": 3.350688359800722e-05,
      "loss": 0.8482,
      "step": 339720
    },
    {
      "epoch": 1.1906689517641162,
      "grad_norm": 2.828125,
      "learning_rate": 3.3506234569343517e-05,
      "loss": 0.8187,
      "step": 339730
    },
    {
      "epoch": 1.1907039992710118,
      "grad_norm": 3.015625,
      "learning_rate": 3.350558554067982e-05,
      "loss": 0.9451,
      "step": 339740
    },
    {
      "epoch": 1.1907390467779075,
      "grad_norm": 2.640625,
      "learning_rate": 3.350493651201612e-05,
      "loss": 0.8701,
      "step": 339750
    },
    {
      "epoch": 1.190774094284803,
      "grad_norm": 2.4375,
      "learning_rate": 3.3504287483352414e-05,
      "loss": 0.8637,
      "step": 339760
    },
    {
      "epoch": 1.1908091417916986,
      "grad_norm": 2.65625,
      "learning_rate": 3.3503638454688715e-05,
      "loss": 0.8,
      "step": 339770
    },
    {
      "epoch": 1.1908441892985944,
      "grad_norm": 2.484375,
      "learning_rate": 3.350298942602501e-05,
      "loss": 0.9069,
      "step": 339780
    },
    {
      "epoch": 1.19087923680549,
      "grad_norm": 2.984375,
      "learning_rate": 3.350234039736131e-05,
      "loss": 0.8497,
      "step": 339790
    },
    {
      "epoch": 1.1909142843123854,
      "grad_norm": 2.84375,
      "learning_rate": 3.3501691368697606e-05,
      "loss": 0.8455,
      "step": 339800
    },
    {
      "epoch": 1.190949331819281,
      "grad_norm": 3.078125,
      "learning_rate": 3.350104234003391e-05,
      "loss": 0.8161,
      "step": 339810
    },
    {
      "epoch": 1.1909843793261765,
      "grad_norm": 3.234375,
      "learning_rate": 3.35003933113702e-05,
      "loss": 0.8808,
      "step": 339820
    },
    {
      "epoch": 1.1910194268330723,
      "grad_norm": 2.90625,
      "learning_rate": 3.34997442827065e-05,
      "loss": 0.9457,
      "step": 339830
    },
    {
      "epoch": 1.1910544743399678,
      "grad_norm": 2.875,
      "learning_rate": 3.34990952540428e-05,
      "loss": 0.9109,
      "step": 339840
    },
    {
      "epoch": 1.1910895218468633,
      "grad_norm": 2.859375,
      "learning_rate": 3.34984462253791e-05,
      "loss": 0.8819,
      "step": 339850
    },
    {
      "epoch": 1.191124569353759,
      "grad_norm": 2.9375,
      "learning_rate": 3.3497797196715394e-05,
      "loss": 0.8602,
      "step": 339860
    },
    {
      "epoch": 1.1911596168606546,
      "grad_norm": 2.265625,
      "learning_rate": 3.3497148168051695e-05,
      "loss": 0.7615,
      "step": 339870
    },
    {
      "epoch": 1.1911946643675502,
      "grad_norm": 2.96875,
      "learning_rate": 3.3496499139388e-05,
      "loss": 0.7771,
      "step": 339880
    },
    {
      "epoch": 1.191229711874446,
      "grad_norm": 2.953125,
      "learning_rate": 3.349585011072429e-05,
      "loss": 0.8022,
      "step": 339890
    },
    {
      "epoch": 1.1912647593813415,
      "grad_norm": 2.890625,
      "learning_rate": 3.349520108206059e-05,
      "loss": 0.8576,
      "step": 339900
    },
    {
      "epoch": 1.191299806888237,
      "grad_norm": 3.09375,
      "learning_rate": 3.349455205339689e-05,
      "loss": 0.7819,
      "step": 339910
    },
    {
      "epoch": 1.1913348543951325,
      "grad_norm": 3.0,
      "learning_rate": 3.349390302473319e-05,
      "loss": 0.8524,
      "step": 339920
    },
    {
      "epoch": 1.1913699019020283,
      "grad_norm": 3.21875,
      "learning_rate": 3.349325399606948e-05,
      "loss": 0.9166,
      "step": 339930
    },
    {
      "epoch": 1.1914049494089238,
      "grad_norm": 2.609375,
      "learning_rate": 3.3492604967405785e-05,
      "loss": 0.8789,
      "step": 339940
    },
    {
      "epoch": 1.1914399969158194,
      "grad_norm": 2.5625,
      "learning_rate": 3.349195593874208e-05,
      "loss": 0.7953,
      "step": 339950
    },
    {
      "epoch": 1.191475044422715,
      "grad_norm": 2.8125,
      "learning_rate": 3.349130691007838e-05,
      "loss": 0.8762,
      "step": 339960
    },
    {
      "epoch": 1.1915100919296107,
      "grad_norm": 2.5625,
      "learning_rate": 3.3490657881414675e-05,
      "loss": 0.8113,
      "step": 339970
    },
    {
      "epoch": 1.1915451394365062,
      "grad_norm": 3.0625,
      "learning_rate": 3.349000885275098e-05,
      "loss": 0.8546,
      "step": 339980
    },
    {
      "epoch": 1.1915801869434017,
      "grad_norm": 2.921875,
      "learning_rate": 3.348935982408727e-05,
      "loss": 0.8713,
      "step": 339990
    },
    {
      "epoch": 1.1916152344502975,
      "grad_norm": 3.375,
      "learning_rate": 3.348871079542357e-05,
      "loss": 0.8663,
      "step": 340000
    },
    {
      "epoch": 1.1916152344502975,
      "eval_loss": 0.8078789710998535,
      "eval_runtime": 565.232,
      "eval_samples_per_second": 673.062,
      "eval_steps_per_second": 56.088,
      "step": 340000
    },
    {
      "epoch": 1.191650281957193,
      "grad_norm": 2.78125,
      "learning_rate": 3.348806176675987e-05,
      "loss": 0.8662,
      "step": 340010
    },
    {
      "epoch": 1.1916853294640886,
      "grad_norm": 2.5625,
      "learning_rate": 3.348741273809617e-05,
      "loss": 0.9343,
      "step": 340020
    },
    {
      "epoch": 1.191720376970984,
      "grad_norm": 2.78125,
      "learning_rate": 3.348676370943247e-05,
      "loss": 0.8646,
      "step": 340030
    },
    {
      "epoch": 1.1917554244778799,
      "grad_norm": 2.796875,
      "learning_rate": 3.3486114680768765e-05,
      "loss": 0.8601,
      "step": 340040
    },
    {
      "epoch": 1.1917904719847754,
      "grad_norm": 2.28125,
      "learning_rate": 3.3485465652105066e-05,
      "loss": 0.7842,
      "step": 340050
    },
    {
      "epoch": 1.191825519491671,
      "grad_norm": 2.921875,
      "learning_rate": 3.348481662344136e-05,
      "loss": 0.7748,
      "step": 340060
    },
    {
      "epoch": 1.1918605669985665,
      "grad_norm": 2.703125,
      "learning_rate": 3.348416759477766e-05,
      "loss": 0.9141,
      "step": 340070
    },
    {
      "epoch": 1.1918956145054622,
      "grad_norm": 2.90625,
      "learning_rate": 3.348351856611395e-05,
      "loss": 0.9029,
      "step": 340080
    },
    {
      "epoch": 1.1919306620123578,
      "grad_norm": 2.71875,
      "learning_rate": 3.348286953745025e-05,
      "loss": 0.8446,
      "step": 340090
    },
    {
      "epoch": 1.1919657095192533,
      "grad_norm": 3.109375,
      "learning_rate": 3.3482220508786546e-05,
      "loss": 0.9315,
      "step": 340100
    },
    {
      "epoch": 1.192000757026149,
      "grad_norm": 3.1875,
      "learning_rate": 3.348157148012285e-05,
      "loss": 0.8655,
      "step": 340110
    },
    {
      "epoch": 1.1920358045330446,
      "grad_norm": 2.765625,
      "learning_rate": 3.348092245145915e-05,
      "loss": 0.8138,
      "step": 340120
    },
    {
      "epoch": 1.1920708520399401,
      "grad_norm": 3.1875,
      "learning_rate": 3.348027342279544e-05,
      "loss": 0.8943,
      "step": 340130
    },
    {
      "epoch": 1.1921058995468357,
      "grad_norm": 2.796875,
      "learning_rate": 3.3479624394131745e-05,
      "loss": 0.8404,
      "step": 340140
    },
    {
      "epoch": 1.1921409470537314,
      "grad_norm": 2.34375,
      "learning_rate": 3.347897536546804e-05,
      "loss": 0.7537,
      "step": 340150
    },
    {
      "epoch": 1.192175994560627,
      "grad_norm": 2.859375,
      "learning_rate": 3.347832633680434e-05,
      "loss": 0.8458,
      "step": 340160
    },
    {
      "epoch": 1.1922110420675225,
      "grad_norm": 3.40625,
      "learning_rate": 3.3477677308140635e-05,
      "loss": 0.8456,
      "step": 340170
    },
    {
      "epoch": 1.192246089574418,
      "grad_norm": 2.390625,
      "learning_rate": 3.347702827947694e-05,
      "loss": 0.8407,
      "step": 340180
    },
    {
      "epoch": 1.1922811370813138,
      "grad_norm": 2.578125,
      "learning_rate": 3.347637925081323e-05,
      "loss": 0.818,
      "step": 340190
    },
    {
      "epoch": 1.1923161845882093,
      "grad_norm": 3.125,
      "learning_rate": 3.347573022214953e-05,
      "loss": 0.8517,
      "step": 340200
    },
    {
      "epoch": 1.1923512320951049,
      "grad_norm": 3.15625,
      "learning_rate": 3.347508119348583e-05,
      "loss": 0.9088,
      "step": 340210
    },
    {
      "epoch": 1.1923862796020006,
      "grad_norm": 3.0,
      "learning_rate": 3.347443216482213e-05,
      "loss": 0.8546,
      "step": 340220
    },
    {
      "epoch": 1.1924213271088961,
      "grad_norm": 3.203125,
      "learning_rate": 3.347378313615842e-05,
      "loss": 0.8941,
      "step": 340230
    },
    {
      "epoch": 1.1924563746157917,
      "grad_norm": 2.875,
      "learning_rate": 3.3473134107494725e-05,
      "loss": 0.8883,
      "step": 340240
    },
    {
      "epoch": 1.1924914221226872,
      "grad_norm": 2.5,
      "learning_rate": 3.3472485078831026e-05,
      "loss": 0.8106,
      "step": 340250
    },
    {
      "epoch": 1.192526469629583,
      "grad_norm": 2.90625,
      "learning_rate": 3.347183605016732e-05,
      "loss": 0.8503,
      "step": 340260
    },
    {
      "epoch": 1.1925615171364785,
      "grad_norm": 3.03125,
      "learning_rate": 3.347118702150362e-05,
      "loss": 0.811,
      "step": 340270
    },
    {
      "epoch": 1.192596564643374,
      "grad_norm": 2.625,
      "learning_rate": 3.347053799283992e-05,
      "loss": 0.7779,
      "step": 340280
    },
    {
      "epoch": 1.1926316121502696,
      "grad_norm": 2.203125,
      "learning_rate": 3.346988896417622e-05,
      "loss": 0.7314,
      "step": 340290
    },
    {
      "epoch": 1.1926666596571653,
      "grad_norm": 2.953125,
      "learning_rate": 3.346923993551251e-05,
      "loss": 0.8192,
      "step": 340300
    },
    {
      "epoch": 1.1927017071640609,
      "grad_norm": 2.578125,
      "learning_rate": 3.3468590906848814e-05,
      "loss": 0.8511,
      "step": 340310
    },
    {
      "epoch": 1.1927367546709564,
      "grad_norm": 2.765625,
      "learning_rate": 3.346794187818511e-05,
      "loss": 0.8092,
      "step": 340320
    },
    {
      "epoch": 1.1927718021778522,
      "grad_norm": 2.90625,
      "learning_rate": 3.346729284952141e-05,
      "loss": 0.849,
      "step": 340330
    },
    {
      "epoch": 1.1928068496847477,
      "grad_norm": 2.546875,
      "learning_rate": 3.3466643820857705e-05,
      "loss": 0.8483,
      "step": 340340
    },
    {
      "epoch": 1.1928418971916432,
      "grad_norm": 3.40625,
      "learning_rate": 3.3465994792194006e-05,
      "loss": 0.9377,
      "step": 340350
    },
    {
      "epoch": 1.1928769446985388,
      "grad_norm": 3.0,
      "learning_rate": 3.34653457635303e-05,
      "loss": 0.8634,
      "step": 340360
    },
    {
      "epoch": 1.1929119922054345,
      "grad_norm": 3.28125,
      "learning_rate": 3.34646967348666e-05,
      "loss": 0.8756,
      "step": 340370
    },
    {
      "epoch": 1.19294703971233,
      "grad_norm": 2.859375,
      "learning_rate": 3.34640477062029e-05,
      "loss": 0.8581,
      "step": 340380
    },
    {
      "epoch": 1.1929820872192256,
      "grad_norm": 3.4375,
      "learning_rate": 3.34633986775392e-05,
      "loss": 0.8384,
      "step": 340390
    },
    {
      "epoch": 1.1930171347261211,
      "grad_norm": 2.671875,
      "learning_rate": 3.34627496488755e-05,
      "loss": 0.8766,
      "step": 340400
    },
    {
      "epoch": 1.193052182233017,
      "grad_norm": 3.140625,
      "learning_rate": 3.3462100620211794e-05,
      "loss": 0.7995,
      "step": 340410
    },
    {
      "epoch": 1.1930872297399124,
      "grad_norm": 2.734375,
      "learning_rate": 3.3461451591548096e-05,
      "loss": 0.8827,
      "step": 340420
    },
    {
      "epoch": 1.193122277246808,
      "grad_norm": 3.046875,
      "learning_rate": 3.346080256288439e-05,
      "loss": 0.924,
      "step": 340430
    },
    {
      "epoch": 1.1931573247537037,
      "grad_norm": 2.9375,
      "learning_rate": 3.346015353422069e-05,
      "loss": 0.8461,
      "step": 340440
    },
    {
      "epoch": 1.1931923722605993,
      "grad_norm": 2.84375,
      "learning_rate": 3.3459504505556986e-05,
      "loss": 0.871,
      "step": 340450
    },
    {
      "epoch": 1.1932274197674948,
      "grad_norm": 2.828125,
      "learning_rate": 3.345885547689328e-05,
      "loss": 0.8752,
      "step": 340460
    },
    {
      "epoch": 1.1932624672743906,
      "grad_norm": 2.90625,
      "learning_rate": 3.3458206448229575e-05,
      "loss": 0.9079,
      "step": 340470
    },
    {
      "epoch": 1.193297514781286,
      "grad_norm": 2.40625,
      "learning_rate": 3.345755741956588e-05,
      "loss": 0.8583,
      "step": 340480
    },
    {
      "epoch": 1.1933325622881816,
      "grad_norm": 2.96875,
      "learning_rate": 3.345690839090218e-05,
      "loss": 0.9026,
      "step": 340490
    },
    {
      "epoch": 1.1933676097950772,
      "grad_norm": 2.875,
      "learning_rate": 3.345625936223847e-05,
      "loss": 0.8999,
      "step": 340500
    },
    {
      "epoch": 1.1934026573019727,
      "grad_norm": 3.15625,
      "learning_rate": 3.3455610333574774e-05,
      "loss": 0.8299,
      "step": 340510
    },
    {
      "epoch": 1.1934377048088685,
      "grad_norm": 2.609375,
      "learning_rate": 3.345496130491107e-05,
      "loss": 0.8449,
      "step": 340520
    },
    {
      "epoch": 1.193472752315764,
      "grad_norm": 2.5,
      "learning_rate": 3.345431227624737e-05,
      "loss": 0.8073,
      "step": 340530
    },
    {
      "epoch": 1.1935077998226595,
      "grad_norm": 3.046875,
      "learning_rate": 3.3453663247583665e-05,
      "loss": 0.8779,
      "step": 340540
    },
    {
      "epoch": 1.1935428473295553,
      "grad_norm": 2.671875,
      "learning_rate": 3.3453014218919966e-05,
      "loss": 0.8424,
      "step": 340550
    },
    {
      "epoch": 1.1935778948364508,
      "grad_norm": 3.046875,
      "learning_rate": 3.345236519025626e-05,
      "loss": 0.8272,
      "step": 340560
    },
    {
      "epoch": 1.1936129423433464,
      "grad_norm": 2.890625,
      "learning_rate": 3.345171616159256e-05,
      "loss": 0.869,
      "step": 340570
    },
    {
      "epoch": 1.1936479898502421,
      "grad_norm": 2.921875,
      "learning_rate": 3.345106713292886e-05,
      "loss": 0.8925,
      "step": 340580
    },
    {
      "epoch": 1.1936830373571377,
      "grad_norm": 2.78125,
      "learning_rate": 3.345041810426516e-05,
      "loss": 0.8485,
      "step": 340590
    },
    {
      "epoch": 1.1937180848640332,
      "grad_norm": 2.953125,
      "learning_rate": 3.344976907560145e-05,
      "loss": 0.9387,
      "step": 340600
    },
    {
      "epoch": 1.1937531323709287,
      "grad_norm": 3.234375,
      "learning_rate": 3.3449120046937754e-05,
      "loss": 0.8568,
      "step": 340610
    },
    {
      "epoch": 1.1937881798778245,
      "grad_norm": 3.25,
      "learning_rate": 3.3448471018274056e-05,
      "loss": 0.8576,
      "step": 340620
    },
    {
      "epoch": 1.19382322738472,
      "grad_norm": 2.6875,
      "learning_rate": 3.344782198961035e-05,
      "loss": 0.9658,
      "step": 340630
    },
    {
      "epoch": 1.1938582748916156,
      "grad_norm": 2.703125,
      "learning_rate": 3.344717296094665e-05,
      "loss": 0.8707,
      "step": 340640
    },
    {
      "epoch": 1.193893322398511,
      "grad_norm": 3.234375,
      "learning_rate": 3.3446523932282946e-05,
      "loss": 0.91,
      "step": 340650
    },
    {
      "epoch": 1.1939283699054068,
      "grad_norm": 2.90625,
      "learning_rate": 3.344587490361925e-05,
      "loss": 0.8044,
      "step": 340660
    },
    {
      "epoch": 1.1939634174123024,
      "grad_norm": 2.671875,
      "learning_rate": 3.344522587495554e-05,
      "loss": 0.8998,
      "step": 340670
    },
    {
      "epoch": 1.193998464919198,
      "grad_norm": 3.015625,
      "learning_rate": 3.3444576846291844e-05,
      "loss": 0.9382,
      "step": 340680
    },
    {
      "epoch": 1.1940335124260937,
      "grad_norm": 2.953125,
      "learning_rate": 3.344392781762814e-05,
      "loss": 0.8666,
      "step": 340690
    },
    {
      "epoch": 1.1940685599329892,
      "grad_norm": 3.265625,
      "learning_rate": 3.344327878896444e-05,
      "loss": 0.9149,
      "step": 340700
    },
    {
      "epoch": 1.1941036074398848,
      "grad_norm": 2.59375,
      "learning_rate": 3.3442629760300734e-05,
      "loss": 0.7751,
      "step": 340710
    },
    {
      "epoch": 1.1941386549467803,
      "grad_norm": 2.703125,
      "learning_rate": 3.3441980731637036e-05,
      "loss": 0.9008,
      "step": 340720
    },
    {
      "epoch": 1.194173702453676,
      "grad_norm": 2.921875,
      "learning_rate": 3.344133170297333e-05,
      "loss": 0.9126,
      "step": 340730
    },
    {
      "epoch": 1.1942087499605716,
      "grad_norm": 3.015625,
      "learning_rate": 3.344068267430963e-05,
      "loss": 0.8296,
      "step": 340740
    },
    {
      "epoch": 1.1942437974674671,
      "grad_norm": 3.234375,
      "learning_rate": 3.344003364564593e-05,
      "loss": 0.8317,
      "step": 340750
    },
    {
      "epoch": 1.1942788449743627,
      "grad_norm": 2.5625,
      "learning_rate": 3.343938461698223e-05,
      "loss": 0.8164,
      "step": 340760
    },
    {
      "epoch": 1.1943138924812584,
      "grad_norm": 3.734375,
      "learning_rate": 3.343873558831853e-05,
      "loss": 0.7917,
      "step": 340770
    },
    {
      "epoch": 1.194348939988154,
      "grad_norm": 3.328125,
      "learning_rate": 3.3438086559654824e-05,
      "loss": 0.9218,
      "step": 340780
    },
    {
      "epoch": 1.1943839874950495,
      "grad_norm": 2.859375,
      "learning_rate": 3.3437437530991125e-05,
      "loss": 0.9259,
      "step": 340790
    },
    {
      "epoch": 1.1944190350019452,
      "grad_norm": 2.75,
      "learning_rate": 3.343678850232742e-05,
      "loss": 0.8638,
      "step": 340800
    },
    {
      "epoch": 1.1944540825088408,
      "grad_norm": 2.84375,
      "learning_rate": 3.343613947366372e-05,
      "loss": 0.8928,
      "step": 340810
    },
    {
      "epoch": 1.1944891300157363,
      "grad_norm": 3.046875,
      "learning_rate": 3.3435490445000016e-05,
      "loss": 0.8674,
      "step": 340820
    },
    {
      "epoch": 1.1945241775226318,
      "grad_norm": 3.3125,
      "learning_rate": 3.343484141633631e-05,
      "loss": 0.8264,
      "step": 340830
    },
    {
      "epoch": 1.1945592250295276,
      "grad_norm": 3.40625,
      "learning_rate": 3.343419238767261e-05,
      "loss": 0.8024,
      "step": 340840
    },
    {
      "epoch": 1.1945942725364231,
      "grad_norm": 2.8125,
      "learning_rate": 3.3433543359008906e-05,
      "loss": 0.8473,
      "step": 340850
    },
    {
      "epoch": 1.1946293200433187,
      "grad_norm": 2.765625,
      "learning_rate": 3.343289433034521e-05,
      "loss": 0.8334,
      "step": 340860
    },
    {
      "epoch": 1.1946643675502142,
      "grad_norm": 2.515625,
      "learning_rate": 3.34322453016815e-05,
      "loss": 0.8116,
      "step": 340870
    },
    {
      "epoch": 1.19469941505711,
      "grad_norm": 3.4375,
      "learning_rate": 3.3431596273017804e-05,
      "loss": 0.9313,
      "step": 340880
    },
    {
      "epoch": 1.1947344625640055,
      "grad_norm": 2.828125,
      "learning_rate": 3.34309472443541e-05,
      "loss": 0.9159,
      "step": 340890
    },
    {
      "epoch": 1.194769510070901,
      "grad_norm": 3.015625,
      "learning_rate": 3.34302982156904e-05,
      "loss": 0.8614,
      "step": 340900
    },
    {
      "epoch": 1.1948045575777968,
      "grad_norm": 3.15625,
      "learning_rate": 3.3429649187026694e-05,
      "loss": 0.8548,
      "step": 340910
    },
    {
      "epoch": 1.1948396050846923,
      "grad_norm": 2.9375,
      "learning_rate": 3.3429000158362996e-05,
      "loss": 0.9082,
      "step": 340920
    },
    {
      "epoch": 1.1948746525915879,
      "grad_norm": 2.875,
      "learning_rate": 3.342835112969929e-05,
      "loss": 0.8844,
      "step": 340930
    },
    {
      "epoch": 1.1949097000984834,
      "grad_norm": 2.984375,
      "learning_rate": 3.342770210103559e-05,
      "loss": 0.7957,
      "step": 340940
    },
    {
      "epoch": 1.1949447476053792,
      "grad_norm": 2.5625,
      "learning_rate": 3.3427053072371886e-05,
      "loss": 0.8183,
      "step": 340950
    },
    {
      "epoch": 1.1949797951122747,
      "grad_norm": 2.734375,
      "learning_rate": 3.342640404370819e-05,
      "loss": 0.8432,
      "step": 340960
    },
    {
      "epoch": 1.1950148426191702,
      "grad_norm": 2.65625,
      "learning_rate": 3.342575501504448e-05,
      "loss": 0.9529,
      "step": 340970
    },
    {
      "epoch": 1.1950498901260658,
      "grad_norm": 3.09375,
      "learning_rate": 3.3425105986380784e-05,
      "loss": 0.8785,
      "step": 340980
    },
    {
      "epoch": 1.1950849376329615,
      "grad_norm": 3.15625,
      "learning_rate": 3.3424456957717085e-05,
      "loss": 0.9098,
      "step": 340990
    },
    {
      "epoch": 1.195119985139857,
      "grad_norm": 3.265625,
      "learning_rate": 3.342380792905338e-05,
      "loss": 0.9539,
      "step": 341000
    },
    {
      "epoch": 1.1951550326467526,
      "grad_norm": 3.21875,
      "learning_rate": 3.342315890038968e-05,
      "loss": 0.8711,
      "step": 341010
    },
    {
      "epoch": 1.1951900801536484,
      "grad_norm": 2.828125,
      "learning_rate": 3.3422509871725976e-05,
      "loss": 0.7956,
      "step": 341020
    },
    {
      "epoch": 1.195225127660544,
      "grad_norm": 2.84375,
      "learning_rate": 3.342186084306228e-05,
      "loss": 0.8506,
      "step": 341030
    },
    {
      "epoch": 1.1952601751674394,
      "grad_norm": 2.921875,
      "learning_rate": 3.342121181439857e-05,
      "loss": 0.8747,
      "step": 341040
    },
    {
      "epoch": 1.195295222674335,
      "grad_norm": 2.96875,
      "learning_rate": 3.342056278573487e-05,
      "loss": 0.8704,
      "step": 341050
    },
    {
      "epoch": 1.1953302701812307,
      "grad_norm": 2.484375,
      "learning_rate": 3.341991375707117e-05,
      "loss": 0.8521,
      "step": 341060
    },
    {
      "epoch": 1.1953653176881263,
      "grad_norm": 2.734375,
      "learning_rate": 3.341926472840747e-05,
      "loss": 0.9762,
      "step": 341070
    },
    {
      "epoch": 1.1954003651950218,
      "grad_norm": 3.125,
      "learning_rate": 3.3418615699743764e-05,
      "loss": 0.8789,
      "step": 341080
    },
    {
      "epoch": 1.1954354127019173,
      "grad_norm": 3.078125,
      "learning_rate": 3.3417966671080065e-05,
      "loss": 0.7822,
      "step": 341090
    },
    {
      "epoch": 1.195470460208813,
      "grad_norm": 3.140625,
      "learning_rate": 3.341731764241636e-05,
      "loss": 0.8617,
      "step": 341100
    },
    {
      "epoch": 1.1955055077157086,
      "grad_norm": 2.15625,
      "learning_rate": 3.341666861375266e-05,
      "loss": 0.8366,
      "step": 341110
    },
    {
      "epoch": 1.1955405552226042,
      "grad_norm": 3.046875,
      "learning_rate": 3.341601958508896e-05,
      "loss": 0.8866,
      "step": 341120
    },
    {
      "epoch": 1.1955756027295,
      "grad_norm": 2.765625,
      "learning_rate": 3.341537055642526e-05,
      "loss": 0.9336,
      "step": 341130
    },
    {
      "epoch": 1.1956106502363955,
      "grad_norm": 3.390625,
      "learning_rate": 3.341472152776156e-05,
      "loss": 0.8311,
      "step": 341140
    },
    {
      "epoch": 1.195645697743291,
      "grad_norm": 3.1875,
      "learning_rate": 3.341407249909785e-05,
      "loss": 0.929,
      "step": 341150
    },
    {
      "epoch": 1.1956807452501867,
      "grad_norm": 3.078125,
      "learning_rate": 3.3413423470434154e-05,
      "loss": 0.8914,
      "step": 341160
    },
    {
      "epoch": 1.1957157927570823,
      "grad_norm": 2.625,
      "learning_rate": 3.341277444177045e-05,
      "loss": 0.8596,
      "step": 341170
    },
    {
      "epoch": 1.1957508402639778,
      "grad_norm": 2.421875,
      "learning_rate": 3.341212541310675e-05,
      "loss": 0.8156,
      "step": 341180
    },
    {
      "epoch": 1.1957858877708734,
      "grad_norm": 2.546875,
      "learning_rate": 3.3411476384443045e-05,
      "loss": 0.9157,
      "step": 341190
    },
    {
      "epoch": 1.195820935277769,
      "grad_norm": 3.0,
      "learning_rate": 3.3410827355779346e-05,
      "loss": 0.8456,
      "step": 341200
    },
    {
      "epoch": 1.1958559827846647,
      "grad_norm": 2.40625,
      "learning_rate": 3.341017832711564e-05,
      "loss": 0.752,
      "step": 341210
    },
    {
      "epoch": 1.1958910302915602,
      "grad_norm": 3.03125,
      "learning_rate": 3.3409529298451936e-05,
      "loss": 0.8521,
      "step": 341220
    },
    {
      "epoch": 1.1959260777984557,
      "grad_norm": 2.640625,
      "learning_rate": 3.340888026978824e-05,
      "loss": 0.8957,
      "step": 341230
    },
    {
      "epoch": 1.1959611253053515,
      "grad_norm": 2.734375,
      "learning_rate": 3.340823124112453e-05,
      "loss": 0.824,
      "step": 341240
    },
    {
      "epoch": 1.195996172812247,
      "grad_norm": 3.015625,
      "learning_rate": 3.340758221246083e-05,
      "loss": 0.8518,
      "step": 341250
    },
    {
      "epoch": 1.1960312203191426,
      "grad_norm": 2.953125,
      "learning_rate": 3.340693318379713e-05,
      "loss": 0.9383,
      "step": 341260
    },
    {
      "epoch": 1.1960662678260383,
      "grad_norm": 2.921875,
      "learning_rate": 3.340628415513343e-05,
      "loss": 0.9353,
      "step": 341270
    },
    {
      "epoch": 1.1961013153329338,
      "grad_norm": 2.65625,
      "learning_rate": 3.3405635126469724e-05,
      "loss": 0.8095,
      "step": 341280
    },
    {
      "epoch": 1.1961363628398294,
      "grad_norm": 2.859375,
      "learning_rate": 3.3404986097806025e-05,
      "loss": 0.8126,
      "step": 341290
    },
    {
      "epoch": 1.196171410346725,
      "grad_norm": 2.609375,
      "learning_rate": 3.340433706914232e-05,
      "loss": 0.9082,
      "step": 341300
    },
    {
      "epoch": 1.1962064578536207,
      "grad_norm": 2.59375,
      "learning_rate": 3.340368804047862e-05,
      "loss": 0.7986,
      "step": 341310
    },
    {
      "epoch": 1.1962415053605162,
      "grad_norm": 3.1875,
      "learning_rate": 3.3403039011814916e-05,
      "loss": 0.7917,
      "step": 341320
    },
    {
      "epoch": 1.1962765528674117,
      "grad_norm": 3.265625,
      "learning_rate": 3.340238998315122e-05,
      "loss": 0.8157,
      "step": 341330
    },
    {
      "epoch": 1.1963116003743073,
      "grad_norm": 3.109375,
      "learning_rate": 3.340174095448751e-05,
      "loss": 0.8596,
      "step": 341340
    },
    {
      "epoch": 1.196346647881203,
      "grad_norm": 2.984375,
      "learning_rate": 3.340109192582381e-05,
      "loss": 0.8356,
      "step": 341350
    },
    {
      "epoch": 1.1963816953880986,
      "grad_norm": 3.203125,
      "learning_rate": 3.3400442897160114e-05,
      "loss": 0.7386,
      "step": 341360
    },
    {
      "epoch": 1.1964167428949941,
      "grad_norm": 2.84375,
      "learning_rate": 3.339979386849641e-05,
      "loss": 0.8142,
      "step": 341370
    },
    {
      "epoch": 1.1964517904018899,
      "grad_norm": 3.125,
      "learning_rate": 3.339914483983271e-05,
      "loss": 0.8708,
      "step": 341380
    },
    {
      "epoch": 1.1964868379087854,
      "grad_norm": 2.53125,
      "learning_rate": 3.3398495811169005e-05,
      "loss": 0.9062,
      "step": 341390
    },
    {
      "epoch": 1.196521885415681,
      "grad_norm": 3.203125,
      "learning_rate": 3.3397846782505306e-05,
      "loss": 0.9486,
      "step": 341400
    },
    {
      "epoch": 1.1965569329225765,
      "grad_norm": 3.03125,
      "learning_rate": 3.33971977538416e-05,
      "loss": 0.9307,
      "step": 341410
    },
    {
      "epoch": 1.1965919804294722,
      "grad_norm": 2.53125,
      "learning_rate": 3.33965487251779e-05,
      "loss": 0.804,
      "step": 341420
    },
    {
      "epoch": 1.1966270279363678,
      "grad_norm": 2.375,
      "learning_rate": 3.33958996965142e-05,
      "loss": 0.7913,
      "step": 341430
    },
    {
      "epoch": 1.1966620754432633,
      "grad_norm": 2.890625,
      "learning_rate": 3.33952506678505e-05,
      "loss": 0.7686,
      "step": 341440
    },
    {
      "epoch": 1.1966971229501588,
      "grad_norm": 2.59375,
      "learning_rate": 3.339460163918679e-05,
      "loss": 0.8919,
      "step": 341450
    },
    {
      "epoch": 1.1967321704570546,
      "grad_norm": 2.71875,
      "learning_rate": 3.3393952610523094e-05,
      "loss": 0.7676,
      "step": 341460
    },
    {
      "epoch": 1.1967672179639501,
      "grad_norm": 2.90625,
      "learning_rate": 3.339330358185939e-05,
      "loss": 0.9064,
      "step": 341470
    },
    {
      "epoch": 1.1968022654708457,
      "grad_norm": 3.234375,
      "learning_rate": 3.339265455319569e-05,
      "loss": 0.8535,
      "step": 341480
    },
    {
      "epoch": 1.1968373129777414,
      "grad_norm": 3.0625,
      "learning_rate": 3.339200552453199e-05,
      "loss": 0.7915,
      "step": 341490
    },
    {
      "epoch": 1.196872360484637,
      "grad_norm": 2.765625,
      "learning_rate": 3.3391356495868286e-05,
      "loss": 0.8192,
      "step": 341500
    },
    {
      "epoch": 1.1969074079915325,
      "grad_norm": 3.375,
      "learning_rate": 3.339070746720459e-05,
      "loss": 0.8679,
      "step": 341510
    },
    {
      "epoch": 1.196942455498428,
      "grad_norm": 3.078125,
      "learning_rate": 3.339005843854088e-05,
      "loss": 0.8307,
      "step": 341520
    },
    {
      "epoch": 1.1969775030053238,
      "grad_norm": 2.703125,
      "learning_rate": 3.3389409409877184e-05,
      "loss": 0.8087,
      "step": 341530
    },
    {
      "epoch": 1.1970125505122193,
      "grad_norm": 3.15625,
      "learning_rate": 3.338876038121348e-05,
      "loss": 0.8547,
      "step": 341540
    },
    {
      "epoch": 1.1970475980191149,
      "grad_norm": 2.96875,
      "learning_rate": 3.338811135254978e-05,
      "loss": 0.8356,
      "step": 341550
    },
    {
      "epoch": 1.1970826455260104,
      "grad_norm": 3.078125,
      "learning_rate": 3.3387462323886074e-05,
      "loss": 0.9221,
      "step": 341560
    },
    {
      "epoch": 1.1971176930329062,
      "grad_norm": 2.59375,
      "learning_rate": 3.3386813295222376e-05,
      "loss": 0.8157,
      "step": 341570
    },
    {
      "epoch": 1.1971527405398017,
      "grad_norm": 2.84375,
      "learning_rate": 3.338616426655867e-05,
      "loss": 0.8748,
      "step": 341580
    },
    {
      "epoch": 1.1971877880466972,
      "grad_norm": 3.078125,
      "learning_rate": 3.3385515237894965e-05,
      "loss": 0.8684,
      "step": 341590
    },
    {
      "epoch": 1.197222835553593,
      "grad_norm": 3.109375,
      "learning_rate": 3.3384866209231266e-05,
      "loss": 0.8863,
      "step": 341600
    },
    {
      "epoch": 1.1972578830604885,
      "grad_norm": 3.28125,
      "learning_rate": 3.338421718056756e-05,
      "loss": 0.8729,
      "step": 341610
    },
    {
      "epoch": 1.197292930567384,
      "grad_norm": 2.71875,
      "learning_rate": 3.338356815190386e-05,
      "loss": 0.8063,
      "step": 341620
    },
    {
      "epoch": 1.1973279780742796,
      "grad_norm": 2.875,
      "learning_rate": 3.338291912324016e-05,
      "loss": 0.861,
      "step": 341630
    },
    {
      "epoch": 1.1973630255811754,
      "grad_norm": 3.109375,
      "learning_rate": 3.338227009457646e-05,
      "loss": 0.8464,
      "step": 341640
    },
    {
      "epoch": 1.197398073088071,
      "grad_norm": 3.328125,
      "learning_rate": 3.338162106591275e-05,
      "loss": 0.883,
      "step": 341650
    },
    {
      "epoch": 1.1974331205949664,
      "grad_norm": 2.71875,
      "learning_rate": 3.3380972037249054e-05,
      "loss": 0.8914,
      "step": 341660
    },
    {
      "epoch": 1.197468168101862,
      "grad_norm": 2.984375,
      "learning_rate": 3.338032300858535e-05,
      "loss": 0.8475,
      "step": 341670
    },
    {
      "epoch": 1.1975032156087577,
      "grad_norm": 2.5,
      "learning_rate": 3.337967397992165e-05,
      "loss": 0.8276,
      "step": 341680
    },
    {
      "epoch": 1.1975382631156533,
      "grad_norm": 3.03125,
      "learning_rate": 3.3379024951257945e-05,
      "loss": 0.9199,
      "step": 341690
    },
    {
      "epoch": 1.1975733106225488,
      "grad_norm": 2.6875,
      "learning_rate": 3.3378375922594246e-05,
      "loss": 0.9134,
      "step": 341700
    },
    {
      "epoch": 1.1976083581294446,
      "grad_norm": 2.859375,
      "learning_rate": 3.337772689393055e-05,
      "loss": 0.8507,
      "step": 341710
    },
    {
      "epoch": 1.19764340563634,
      "grad_norm": 2.859375,
      "learning_rate": 3.337707786526684e-05,
      "loss": 0.8928,
      "step": 341720
    },
    {
      "epoch": 1.1976784531432356,
      "grad_norm": 3.015625,
      "learning_rate": 3.3376428836603144e-05,
      "loss": 0.819,
      "step": 341730
    },
    {
      "epoch": 1.1977135006501312,
      "grad_norm": 2.828125,
      "learning_rate": 3.337577980793944e-05,
      "loss": 0.8586,
      "step": 341740
    },
    {
      "epoch": 1.197748548157027,
      "grad_norm": 2.265625,
      "learning_rate": 3.337513077927574e-05,
      "loss": 0.812,
      "step": 341750
    },
    {
      "epoch": 1.1977835956639225,
      "grad_norm": 2.546875,
      "learning_rate": 3.3374481750612034e-05,
      "loss": 0.8176,
      "step": 341760
    },
    {
      "epoch": 1.197818643170818,
      "grad_norm": 2.453125,
      "learning_rate": 3.3373832721948336e-05,
      "loss": 0.8084,
      "step": 341770
    },
    {
      "epoch": 1.1978536906777135,
      "grad_norm": 3.015625,
      "learning_rate": 3.337318369328463e-05,
      "loss": 0.8217,
      "step": 341780
    },
    {
      "epoch": 1.1978887381846093,
      "grad_norm": 2.921875,
      "learning_rate": 3.337253466462093e-05,
      "loss": 0.8446,
      "step": 341790
    },
    {
      "epoch": 1.1979237856915048,
      "grad_norm": 3.46875,
      "learning_rate": 3.3371885635957226e-05,
      "loss": 0.8967,
      "step": 341800
    },
    {
      "epoch": 1.1979588331984004,
      "grad_norm": 3.265625,
      "learning_rate": 3.337123660729353e-05,
      "loss": 0.8525,
      "step": 341810
    },
    {
      "epoch": 1.197993880705296,
      "grad_norm": 2.53125,
      "learning_rate": 3.337058757862982e-05,
      "loss": 0.767,
      "step": 341820
    },
    {
      "epoch": 1.1980289282121916,
      "grad_norm": 2.8125,
      "learning_rate": 3.3369938549966124e-05,
      "loss": 0.9223,
      "step": 341830
    },
    {
      "epoch": 1.1980639757190872,
      "grad_norm": 3.078125,
      "learning_rate": 3.336928952130242e-05,
      "loss": 0.8809,
      "step": 341840
    },
    {
      "epoch": 1.198099023225983,
      "grad_norm": 3.1875,
      "learning_rate": 3.336864049263872e-05,
      "loss": 0.8252,
      "step": 341850
    },
    {
      "epoch": 1.1981340707328785,
      "grad_norm": 2.828125,
      "learning_rate": 3.336799146397502e-05,
      "loss": 0.9152,
      "step": 341860
    },
    {
      "epoch": 1.198169118239774,
      "grad_norm": 3.015625,
      "learning_rate": 3.3367342435311316e-05,
      "loss": 0.8097,
      "step": 341870
    },
    {
      "epoch": 1.1982041657466695,
      "grad_norm": 2.71875,
      "learning_rate": 3.336669340664762e-05,
      "loss": 0.8873,
      "step": 341880
    },
    {
      "epoch": 1.198239213253565,
      "grad_norm": 2.859375,
      "learning_rate": 3.336604437798391e-05,
      "loss": 0.8325,
      "step": 341890
    },
    {
      "epoch": 1.1982742607604608,
      "grad_norm": 2.453125,
      "learning_rate": 3.336539534932021e-05,
      "loss": 0.8538,
      "step": 341900
    },
    {
      "epoch": 1.1983093082673564,
      "grad_norm": 3.078125,
      "learning_rate": 3.336474632065651e-05,
      "loss": 0.8468,
      "step": 341910
    },
    {
      "epoch": 1.198344355774252,
      "grad_norm": 2.921875,
      "learning_rate": 3.336409729199281e-05,
      "loss": 0.876,
      "step": 341920
    },
    {
      "epoch": 1.1983794032811477,
      "grad_norm": 2.71875,
      "learning_rate": 3.3363448263329104e-05,
      "loss": 0.9393,
      "step": 341930
    },
    {
      "epoch": 1.1984144507880432,
      "grad_norm": 2.515625,
      "learning_rate": 3.3362799234665405e-05,
      "loss": 0.8353,
      "step": 341940
    },
    {
      "epoch": 1.1984494982949387,
      "grad_norm": 2.96875,
      "learning_rate": 3.33621502060017e-05,
      "loss": 0.9556,
      "step": 341950
    },
    {
      "epoch": 1.1984845458018345,
      "grad_norm": 2.9375,
      "learning_rate": 3.3361501177337994e-05,
      "loss": 0.8725,
      "step": 341960
    },
    {
      "epoch": 1.19851959330873,
      "grad_norm": 2.84375,
      "learning_rate": 3.3360852148674296e-05,
      "loss": 0.829,
      "step": 341970
    },
    {
      "epoch": 1.1985546408156256,
      "grad_norm": 3.125,
      "learning_rate": 3.336020312001059e-05,
      "loss": 0.9136,
      "step": 341980
    },
    {
      "epoch": 1.198589688322521,
      "grad_norm": 2.9375,
      "learning_rate": 3.335955409134689e-05,
      "loss": 0.8955,
      "step": 341990
    },
    {
      "epoch": 1.1986247358294169,
      "grad_norm": 3.140625,
      "learning_rate": 3.3358905062683186e-05,
      "loss": 0.8336,
      "step": 342000
    },
    {
      "epoch": 1.1986597833363124,
      "grad_norm": 3.109375,
      "learning_rate": 3.335825603401949e-05,
      "loss": 0.8717,
      "step": 342010
    },
    {
      "epoch": 1.198694830843208,
      "grad_norm": 2.875,
      "learning_rate": 3.335760700535578e-05,
      "loss": 0.8457,
      "step": 342020
    },
    {
      "epoch": 1.1987298783501035,
      "grad_norm": 2.9375,
      "learning_rate": 3.3356957976692084e-05,
      "loss": 0.8779,
      "step": 342030
    },
    {
      "epoch": 1.1987649258569992,
      "grad_norm": 2.65625,
      "learning_rate": 3.335630894802838e-05,
      "loss": 0.8367,
      "step": 342040
    },
    {
      "epoch": 1.1987999733638948,
      "grad_norm": 3.296875,
      "learning_rate": 3.335565991936468e-05,
      "loss": 0.8504,
      "step": 342050
    },
    {
      "epoch": 1.1988350208707903,
      "grad_norm": 3.0625,
      "learning_rate": 3.3355010890700974e-05,
      "loss": 0.9337,
      "step": 342060
    },
    {
      "epoch": 1.198870068377686,
      "grad_norm": 2.8125,
      "learning_rate": 3.3354361862037276e-05,
      "loss": 0.8622,
      "step": 342070
    },
    {
      "epoch": 1.1989051158845816,
      "grad_norm": 2.75,
      "learning_rate": 3.335371283337358e-05,
      "loss": 0.905,
      "step": 342080
    },
    {
      "epoch": 1.1989401633914771,
      "grad_norm": 2.515625,
      "learning_rate": 3.335306380470987e-05,
      "loss": 0.8391,
      "step": 342090
    },
    {
      "epoch": 1.1989752108983727,
      "grad_norm": 2.828125,
      "learning_rate": 3.335241477604617e-05,
      "loss": 0.8368,
      "step": 342100
    },
    {
      "epoch": 1.1990102584052684,
      "grad_norm": 2.9375,
      "learning_rate": 3.335176574738247e-05,
      "loss": 0.852,
      "step": 342110
    },
    {
      "epoch": 1.199045305912164,
      "grad_norm": 2.78125,
      "learning_rate": 3.335111671871877e-05,
      "loss": 0.8537,
      "step": 342120
    },
    {
      "epoch": 1.1990803534190595,
      "grad_norm": 2.796875,
      "learning_rate": 3.3350467690055064e-05,
      "loss": 0.9025,
      "step": 342130
    },
    {
      "epoch": 1.199115400925955,
      "grad_norm": 3.046875,
      "learning_rate": 3.3349818661391365e-05,
      "loss": 0.9165,
      "step": 342140
    },
    {
      "epoch": 1.1991504484328508,
      "grad_norm": 3.15625,
      "learning_rate": 3.334916963272766e-05,
      "loss": 0.8097,
      "step": 342150
    },
    {
      "epoch": 1.1991854959397463,
      "grad_norm": 3.015625,
      "learning_rate": 3.334852060406396e-05,
      "loss": 0.8739,
      "step": 342160
    },
    {
      "epoch": 1.1992205434466419,
      "grad_norm": 2.5,
      "learning_rate": 3.3347871575400256e-05,
      "loss": 0.8078,
      "step": 342170
    },
    {
      "epoch": 1.1992555909535376,
      "grad_norm": 2.796875,
      "learning_rate": 3.334722254673656e-05,
      "loss": 0.8916,
      "step": 342180
    },
    {
      "epoch": 1.1992906384604332,
      "grad_norm": 2.625,
      "learning_rate": 3.334657351807285e-05,
      "loss": 0.8424,
      "step": 342190
    },
    {
      "epoch": 1.1993256859673287,
      "grad_norm": 2.953125,
      "learning_rate": 3.334592448940915e-05,
      "loss": 0.8401,
      "step": 342200
    },
    {
      "epoch": 1.1993607334742242,
      "grad_norm": 2.796875,
      "learning_rate": 3.334527546074545e-05,
      "loss": 0.9267,
      "step": 342210
    },
    {
      "epoch": 1.19939578098112,
      "grad_norm": 2.9375,
      "learning_rate": 3.334462643208175e-05,
      "loss": 0.8359,
      "step": 342220
    },
    {
      "epoch": 1.1994308284880155,
      "grad_norm": 3.1875,
      "learning_rate": 3.334397740341805e-05,
      "loss": 0.8488,
      "step": 342230
    },
    {
      "epoch": 1.199465875994911,
      "grad_norm": 2.875,
      "learning_rate": 3.3343328374754345e-05,
      "loss": 0.7691,
      "step": 342240
    },
    {
      "epoch": 1.1995009235018066,
      "grad_norm": 2.734375,
      "learning_rate": 3.3342679346090646e-05,
      "loss": 0.8233,
      "step": 342250
    },
    {
      "epoch": 1.1995359710087024,
      "grad_norm": 2.796875,
      "learning_rate": 3.334203031742694e-05,
      "loss": 0.7795,
      "step": 342260
    },
    {
      "epoch": 1.1995710185155979,
      "grad_norm": 2.984375,
      "learning_rate": 3.334138128876324e-05,
      "loss": 0.7998,
      "step": 342270
    },
    {
      "epoch": 1.1996060660224934,
      "grad_norm": 3.34375,
      "learning_rate": 3.334073226009954e-05,
      "loss": 0.9137,
      "step": 342280
    },
    {
      "epoch": 1.1996411135293892,
      "grad_norm": 3.140625,
      "learning_rate": 3.334008323143584e-05,
      "loss": 0.8636,
      "step": 342290
    },
    {
      "epoch": 1.1996761610362847,
      "grad_norm": 2.796875,
      "learning_rate": 3.333943420277213e-05,
      "loss": 0.8409,
      "step": 342300
    },
    {
      "epoch": 1.1997112085431803,
      "grad_norm": 2.84375,
      "learning_rate": 3.3338785174108434e-05,
      "loss": 0.8505,
      "step": 342310
    },
    {
      "epoch": 1.1997462560500758,
      "grad_norm": 2.9375,
      "learning_rate": 3.333813614544473e-05,
      "loss": 0.7842,
      "step": 342320
    },
    {
      "epoch": 1.1997813035569715,
      "grad_norm": 2.828125,
      "learning_rate": 3.333748711678103e-05,
      "loss": 0.804,
      "step": 342330
    },
    {
      "epoch": 1.199816351063867,
      "grad_norm": 3.03125,
      "learning_rate": 3.3336838088117325e-05,
      "loss": 0.9081,
      "step": 342340
    },
    {
      "epoch": 1.1998513985707626,
      "grad_norm": 3.140625,
      "learning_rate": 3.333618905945362e-05,
      "loss": 0.8958,
      "step": 342350
    },
    {
      "epoch": 1.1998864460776582,
      "grad_norm": 2.96875,
      "learning_rate": 3.333554003078992e-05,
      "loss": 0.8919,
      "step": 342360
    },
    {
      "epoch": 1.199921493584554,
      "grad_norm": 2.953125,
      "learning_rate": 3.3334891002126216e-05,
      "loss": 0.8914,
      "step": 342370
    },
    {
      "epoch": 1.1999565410914494,
      "grad_norm": 2.96875,
      "learning_rate": 3.333424197346252e-05,
      "loss": 0.842,
      "step": 342380
    },
    {
      "epoch": 1.199991588598345,
      "grad_norm": 3.171875,
      "learning_rate": 3.333359294479881e-05,
      "loss": 0.8544,
      "step": 342390
    },
    {
      "epoch": 1.2000266361052407,
      "grad_norm": 2.28125,
      "learning_rate": 3.333294391613511e-05,
      "loss": 0.7812,
      "step": 342400
    },
    {
      "epoch": 1.2000616836121363,
      "grad_norm": 2.71875,
      "learning_rate": 3.333229488747141e-05,
      "loss": 0.8323,
      "step": 342410
    },
    {
      "epoch": 1.2000967311190318,
      "grad_norm": 2.90625,
      "learning_rate": 3.333164585880771e-05,
      "loss": 0.8843,
      "step": 342420
    },
    {
      "epoch": 1.2001317786259273,
      "grad_norm": 2.6875,
      "learning_rate": 3.3330996830144004e-05,
      "loss": 0.7858,
      "step": 342430
    },
    {
      "epoch": 1.200166826132823,
      "grad_norm": 2.828125,
      "learning_rate": 3.3330347801480305e-05,
      "loss": 0.7904,
      "step": 342440
    },
    {
      "epoch": 1.2002018736397186,
      "grad_norm": 3.265625,
      "learning_rate": 3.3329698772816606e-05,
      "loss": 0.9276,
      "step": 342450
    },
    {
      "epoch": 1.2002369211466142,
      "grad_norm": 3.125,
      "learning_rate": 3.33290497441529e-05,
      "loss": 0.816,
      "step": 342460
    },
    {
      "epoch": 1.2002719686535097,
      "grad_norm": 3.265625,
      "learning_rate": 3.33284007154892e-05,
      "loss": 0.8008,
      "step": 342470
    },
    {
      "epoch": 1.2003070161604055,
      "grad_norm": 3.234375,
      "learning_rate": 3.33277516868255e-05,
      "loss": 0.8099,
      "step": 342480
    },
    {
      "epoch": 1.200342063667301,
      "grad_norm": 2.5625,
      "learning_rate": 3.33271026581618e-05,
      "loss": 0.9119,
      "step": 342490
    },
    {
      "epoch": 1.2003771111741965,
      "grad_norm": 2.921875,
      "learning_rate": 3.332645362949809e-05,
      "loss": 0.8308,
      "step": 342500
    },
    {
      "epoch": 1.2004121586810923,
      "grad_norm": 2.75,
      "learning_rate": 3.3325804600834394e-05,
      "loss": 0.9612,
      "step": 342510
    },
    {
      "epoch": 1.2004472061879878,
      "grad_norm": 3.390625,
      "learning_rate": 3.332515557217069e-05,
      "loss": 0.8531,
      "step": 342520
    },
    {
      "epoch": 1.2004822536948834,
      "grad_norm": 2.640625,
      "learning_rate": 3.332450654350699e-05,
      "loss": 0.7902,
      "step": 342530
    },
    {
      "epoch": 1.2005173012017791,
      "grad_norm": 2.40625,
      "learning_rate": 3.3323857514843285e-05,
      "loss": 0.8101,
      "step": 342540
    },
    {
      "epoch": 1.2005523487086747,
      "grad_norm": 2.828125,
      "learning_rate": 3.3323208486179586e-05,
      "loss": 0.796,
      "step": 342550
    },
    {
      "epoch": 1.2005873962155702,
      "grad_norm": 2.8125,
      "learning_rate": 3.332255945751588e-05,
      "loss": 0.8058,
      "step": 342560
    },
    {
      "epoch": 1.2006224437224657,
      "grad_norm": 2.765625,
      "learning_rate": 3.332191042885218e-05,
      "loss": 0.8599,
      "step": 342570
    },
    {
      "epoch": 1.2006574912293615,
      "grad_norm": 3.359375,
      "learning_rate": 3.332126140018848e-05,
      "loss": 0.8501,
      "step": 342580
    },
    {
      "epoch": 1.200692538736257,
      "grad_norm": 2.5625,
      "learning_rate": 3.332061237152478e-05,
      "loss": 0.7541,
      "step": 342590
    },
    {
      "epoch": 1.2007275862431526,
      "grad_norm": 3.140625,
      "learning_rate": 3.331996334286108e-05,
      "loss": 0.8541,
      "step": 342600
    },
    {
      "epoch": 1.200762633750048,
      "grad_norm": 3.046875,
      "learning_rate": 3.3319314314197374e-05,
      "loss": 0.9102,
      "step": 342610
    },
    {
      "epoch": 1.2007976812569439,
      "grad_norm": 2.8125,
      "learning_rate": 3.3318665285533676e-05,
      "loss": 0.8627,
      "step": 342620
    },
    {
      "epoch": 1.2008327287638394,
      "grad_norm": 3.1875,
      "learning_rate": 3.331801625686997e-05,
      "loss": 0.8136,
      "step": 342630
    },
    {
      "epoch": 1.200867776270735,
      "grad_norm": 3.25,
      "learning_rate": 3.331736722820627e-05,
      "loss": 0.9216,
      "step": 342640
    },
    {
      "epoch": 1.2009028237776307,
      "grad_norm": 2.640625,
      "learning_rate": 3.3316718199542566e-05,
      "loss": 0.8144,
      "step": 342650
    },
    {
      "epoch": 1.2009378712845262,
      "grad_norm": 3.109375,
      "learning_rate": 3.331606917087887e-05,
      "loss": 0.9251,
      "step": 342660
    },
    {
      "epoch": 1.2009729187914218,
      "grad_norm": 3.109375,
      "learning_rate": 3.331542014221516e-05,
      "loss": 0.8605,
      "step": 342670
    },
    {
      "epoch": 1.2010079662983173,
      "grad_norm": 2.890625,
      "learning_rate": 3.3314771113551464e-05,
      "loss": 0.8637,
      "step": 342680
    },
    {
      "epoch": 1.201043013805213,
      "grad_norm": 2.5,
      "learning_rate": 3.331412208488776e-05,
      "loss": 0.824,
      "step": 342690
    },
    {
      "epoch": 1.2010780613121086,
      "grad_norm": 3.0,
      "learning_rate": 3.331347305622406e-05,
      "loss": 0.7987,
      "step": 342700
    },
    {
      "epoch": 1.2011131088190041,
      "grad_norm": 2.796875,
      "learning_rate": 3.3312824027560354e-05,
      "loss": 0.8189,
      "step": 342710
    },
    {
      "epoch": 1.2011481563258997,
      "grad_norm": 2.953125,
      "learning_rate": 3.331217499889665e-05,
      "loss": 0.8605,
      "step": 342720
    },
    {
      "epoch": 1.2011832038327954,
      "grad_norm": 2.8125,
      "learning_rate": 3.331152597023295e-05,
      "loss": 0.8825,
      "step": 342730
    },
    {
      "epoch": 1.201218251339691,
      "grad_norm": 2.84375,
      "learning_rate": 3.3310876941569245e-05,
      "loss": 0.9399,
      "step": 342740
    },
    {
      "epoch": 1.2012532988465865,
      "grad_norm": 2.75,
      "learning_rate": 3.3310227912905546e-05,
      "loss": 0.8973,
      "step": 342750
    },
    {
      "epoch": 1.2012883463534823,
      "grad_norm": 2.765625,
      "learning_rate": 3.330957888424184e-05,
      "loss": 0.8013,
      "step": 342760
    },
    {
      "epoch": 1.2013233938603778,
      "grad_norm": 2.890625,
      "learning_rate": 3.330892985557814e-05,
      "loss": 0.831,
      "step": 342770
    },
    {
      "epoch": 1.2013584413672733,
      "grad_norm": 3.28125,
      "learning_rate": 3.330828082691444e-05,
      "loss": 0.9592,
      "step": 342780
    },
    {
      "epoch": 1.2013934888741689,
      "grad_norm": 2.921875,
      "learning_rate": 3.330763179825074e-05,
      "loss": 0.8992,
      "step": 342790
    },
    {
      "epoch": 1.2014285363810646,
      "grad_norm": 2.953125,
      "learning_rate": 3.330698276958703e-05,
      "loss": 0.8489,
      "step": 342800
    },
    {
      "epoch": 1.2014635838879602,
      "grad_norm": 3.328125,
      "learning_rate": 3.3306333740923334e-05,
      "loss": 0.7484,
      "step": 342810
    },
    {
      "epoch": 1.2014986313948557,
      "grad_norm": 2.890625,
      "learning_rate": 3.3305684712259636e-05,
      "loss": 0.8468,
      "step": 342820
    },
    {
      "epoch": 1.2015336789017512,
      "grad_norm": 2.71875,
      "learning_rate": 3.330503568359593e-05,
      "loss": 0.9028,
      "step": 342830
    },
    {
      "epoch": 1.201568726408647,
      "grad_norm": 2.921875,
      "learning_rate": 3.330438665493223e-05,
      "loss": 0.9367,
      "step": 342840
    },
    {
      "epoch": 1.2016037739155425,
      "grad_norm": 2.78125,
      "learning_rate": 3.3303737626268526e-05,
      "loss": 0.8116,
      "step": 342850
    },
    {
      "epoch": 1.201638821422438,
      "grad_norm": 3.3125,
      "learning_rate": 3.330308859760483e-05,
      "loss": 0.8206,
      "step": 342860
    },
    {
      "epoch": 1.2016738689293338,
      "grad_norm": 3.046875,
      "learning_rate": 3.330243956894112e-05,
      "loss": 0.7795,
      "step": 342870
    },
    {
      "epoch": 1.2017089164362293,
      "grad_norm": 2.921875,
      "learning_rate": 3.3301790540277424e-05,
      "loss": 0.8784,
      "step": 342880
    },
    {
      "epoch": 1.2017439639431249,
      "grad_norm": 3.234375,
      "learning_rate": 3.330114151161372e-05,
      "loss": 0.9163,
      "step": 342890
    },
    {
      "epoch": 1.2017790114500204,
      "grad_norm": 3.171875,
      "learning_rate": 3.330049248295002e-05,
      "loss": 0.8358,
      "step": 342900
    },
    {
      "epoch": 1.2018140589569162,
      "grad_norm": 2.765625,
      "learning_rate": 3.3299843454286314e-05,
      "loss": 0.8572,
      "step": 342910
    },
    {
      "epoch": 1.2018491064638117,
      "grad_norm": 2.828125,
      "learning_rate": 3.3299194425622616e-05,
      "loss": 0.8963,
      "step": 342920
    },
    {
      "epoch": 1.2018841539707072,
      "grad_norm": 2.90625,
      "learning_rate": 3.329854539695891e-05,
      "loss": 0.873,
      "step": 342930
    },
    {
      "epoch": 1.2019192014776028,
      "grad_norm": 2.625,
      "learning_rate": 3.329789636829521e-05,
      "loss": 0.9535,
      "step": 342940
    },
    {
      "epoch": 1.2019542489844985,
      "grad_norm": 2.984375,
      "learning_rate": 3.329724733963151e-05,
      "loss": 0.8315,
      "step": 342950
    },
    {
      "epoch": 1.201989296491394,
      "grad_norm": 2.59375,
      "learning_rate": 3.329659831096781e-05,
      "loss": 0.8626,
      "step": 342960
    },
    {
      "epoch": 1.2020243439982896,
      "grad_norm": 2.875,
      "learning_rate": 3.329594928230411e-05,
      "loss": 0.8124,
      "step": 342970
    },
    {
      "epoch": 1.2020593915051854,
      "grad_norm": 3.109375,
      "learning_rate": 3.3295300253640404e-05,
      "loss": 0.8151,
      "step": 342980
    },
    {
      "epoch": 1.202094439012081,
      "grad_norm": 2.78125,
      "learning_rate": 3.3294651224976705e-05,
      "loss": 0.9564,
      "step": 342990
    },
    {
      "epoch": 1.2021294865189764,
      "grad_norm": 2.84375,
      "learning_rate": 3.3294002196313e-05,
      "loss": 0.924,
      "step": 343000
    },
    {
      "epoch": 1.202164534025872,
      "grad_norm": 2.328125,
      "learning_rate": 3.32933531676493e-05,
      "loss": 0.8509,
      "step": 343010
    },
    {
      "epoch": 1.2021995815327677,
      "grad_norm": 3.09375,
      "learning_rate": 3.3292704138985596e-05,
      "loss": 0.8248,
      "step": 343020
    },
    {
      "epoch": 1.2022346290396633,
      "grad_norm": 2.875,
      "learning_rate": 3.32920551103219e-05,
      "loss": 0.929,
      "step": 343030
    },
    {
      "epoch": 1.2022696765465588,
      "grad_norm": 3.8125,
      "learning_rate": 3.329140608165819e-05,
      "loss": 0.9465,
      "step": 343040
    },
    {
      "epoch": 1.2023047240534543,
      "grad_norm": 2.9375,
      "learning_rate": 3.329075705299449e-05,
      "loss": 0.8216,
      "step": 343050
    },
    {
      "epoch": 1.20233977156035,
      "grad_norm": 3.078125,
      "learning_rate": 3.329010802433079e-05,
      "loss": 0.8462,
      "step": 343060
    },
    {
      "epoch": 1.2023748190672456,
      "grad_norm": 3.140625,
      "learning_rate": 3.328945899566709e-05,
      "loss": 0.8747,
      "step": 343070
    },
    {
      "epoch": 1.2024098665741412,
      "grad_norm": 3.328125,
      "learning_rate": 3.3288809967003384e-05,
      "loss": 0.9271,
      "step": 343080
    },
    {
      "epoch": 1.202444914081037,
      "grad_norm": 2.9375,
      "learning_rate": 3.328816093833968e-05,
      "loss": 0.8839,
      "step": 343090
    },
    {
      "epoch": 1.2024799615879325,
      "grad_norm": 2.9375,
      "learning_rate": 3.328751190967598e-05,
      "loss": 0.831,
      "step": 343100
    },
    {
      "epoch": 1.202515009094828,
      "grad_norm": 2.890625,
      "learning_rate": 3.3286862881012274e-05,
      "loss": 0.8997,
      "step": 343110
    },
    {
      "epoch": 1.2025500566017238,
      "grad_norm": 3.015625,
      "learning_rate": 3.3286213852348576e-05,
      "loss": 0.9298,
      "step": 343120
    },
    {
      "epoch": 1.2025851041086193,
      "grad_norm": 2.640625,
      "learning_rate": 3.328556482368487e-05,
      "loss": 0.9337,
      "step": 343130
    },
    {
      "epoch": 1.2026201516155148,
      "grad_norm": 2.703125,
      "learning_rate": 3.328491579502117e-05,
      "loss": 0.8703,
      "step": 343140
    },
    {
      "epoch": 1.2026551991224104,
      "grad_norm": 3.125,
      "learning_rate": 3.3284266766357466e-05,
      "loss": 0.9741,
      "step": 343150
    },
    {
      "epoch": 1.202690246629306,
      "grad_norm": 2.875,
      "learning_rate": 3.328361773769377e-05,
      "loss": 0.9012,
      "step": 343160
    },
    {
      "epoch": 1.2027252941362017,
      "grad_norm": 3.03125,
      "learning_rate": 3.328296870903006e-05,
      "loss": 0.9052,
      "step": 343170
    },
    {
      "epoch": 1.2027603416430972,
      "grad_norm": 3.03125,
      "learning_rate": 3.3282319680366364e-05,
      "loss": 0.8726,
      "step": 343180
    },
    {
      "epoch": 1.2027953891499927,
      "grad_norm": 2.828125,
      "learning_rate": 3.3281670651702665e-05,
      "loss": 0.8593,
      "step": 343190
    },
    {
      "epoch": 1.2028304366568885,
      "grad_norm": 2.671875,
      "learning_rate": 3.328102162303896e-05,
      "loss": 0.8384,
      "step": 343200
    },
    {
      "epoch": 1.202865484163784,
      "grad_norm": 2.625,
      "learning_rate": 3.328037259437526e-05,
      "loss": 0.8201,
      "step": 343210
    },
    {
      "epoch": 1.2029005316706796,
      "grad_norm": 2.828125,
      "learning_rate": 3.3279723565711556e-05,
      "loss": 0.802,
      "step": 343220
    },
    {
      "epoch": 1.2029355791775753,
      "grad_norm": 3.484375,
      "learning_rate": 3.327907453704786e-05,
      "loss": 0.9106,
      "step": 343230
    },
    {
      "epoch": 1.2029706266844709,
      "grad_norm": 3.28125,
      "learning_rate": 3.327842550838415e-05,
      "loss": 0.9427,
      "step": 343240
    },
    {
      "epoch": 1.2030056741913664,
      "grad_norm": 3.1875,
      "learning_rate": 3.327777647972045e-05,
      "loss": 0.8428,
      "step": 343250
    },
    {
      "epoch": 1.203040721698262,
      "grad_norm": 2.65625,
      "learning_rate": 3.327712745105675e-05,
      "loss": 0.8311,
      "step": 343260
    },
    {
      "epoch": 1.2030757692051577,
      "grad_norm": 2.84375,
      "learning_rate": 3.327647842239305e-05,
      "loss": 0.8341,
      "step": 343270
    },
    {
      "epoch": 1.2031108167120532,
      "grad_norm": 2.796875,
      "learning_rate": 3.3275829393729344e-05,
      "loss": 0.8742,
      "step": 343280
    },
    {
      "epoch": 1.2031458642189488,
      "grad_norm": 3.484375,
      "learning_rate": 3.3275180365065645e-05,
      "loss": 0.8522,
      "step": 343290
    },
    {
      "epoch": 1.2031809117258443,
      "grad_norm": 2.625,
      "learning_rate": 3.327453133640194e-05,
      "loss": 0.9158,
      "step": 343300
    },
    {
      "epoch": 1.20321595923274,
      "grad_norm": 2.640625,
      "learning_rate": 3.327388230773824e-05,
      "loss": 0.8851,
      "step": 343310
    },
    {
      "epoch": 1.2032510067396356,
      "grad_norm": 2.8125,
      "learning_rate": 3.327323327907454e-05,
      "loss": 0.7575,
      "step": 343320
    },
    {
      "epoch": 1.2032860542465311,
      "grad_norm": 2.71875,
      "learning_rate": 3.327258425041084e-05,
      "loss": 0.8341,
      "step": 343330
    },
    {
      "epoch": 1.2033211017534269,
      "grad_norm": 2.8125,
      "learning_rate": 3.327193522174714e-05,
      "loss": 0.8312,
      "step": 343340
    },
    {
      "epoch": 1.2033561492603224,
      "grad_norm": 3.09375,
      "learning_rate": 3.327128619308343e-05,
      "loss": 0.9374,
      "step": 343350
    },
    {
      "epoch": 1.203391196767218,
      "grad_norm": 2.875,
      "learning_rate": 3.3270637164419735e-05,
      "loss": 0.8069,
      "step": 343360
    },
    {
      "epoch": 1.2034262442741135,
      "grad_norm": 2.734375,
      "learning_rate": 3.326998813575603e-05,
      "loss": 0.8329,
      "step": 343370
    },
    {
      "epoch": 1.2034612917810092,
      "grad_norm": 2.828125,
      "learning_rate": 3.326933910709233e-05,
      "loss": 0.8144,
      "step": 343380
    },
    {
      "epoch": 1.2034963392879048,
      "grad_norm": 2.90625,
      "learning_rate": 3.3268690078428625e-05,
      "loss": 0.8212,
      "step": 343390
    },
    {
      "epoch": 1.2035313867948003,
      "grad_norm": 3.015625,
      "learning_rate": 3.3268041049764927e-05,
      "loss": 0.9387,
      "step": 343400
    },
    {
      "epoch": 1.2035664343016959,
      "grad_norm": 3.296875,
      "learning_rate": 3.326739202110122e-05,
      "loss": 0.9835,
      "step": 343410
    },
    {
      "epoch": 1.2036014818085916,
      "grad_norm": 2.953125,
      "learning_rate": 3.326674299243752e-05,
      "loss": 0.8337,
      "step": 343420
    },
    {
      "epoch": 1.2036365293154871,
      "grad_norm": 2.671875,
      "learning_rate": 3.326609396377382e-05,
      "loss": 0.8599,
      "step": 343430
    },
    {
      "epoch": 1.2036715768223827,
      "grad_norm": 3.1875,
      "learning_rate": 3.326544493511012e-05,
      "loss": 0.9261,
      "step": 343440
    },
    {
      "epoch": 1.2037066243292784,
      "grad_norm": 2.734375,
      "learning_rate": 3.326479590644641e-05,
      "loss": 0.8551,
      "step": 343450
    },
    {
      "epoch": 1.203741671836174,
      "grad_norm": 3.203125,
      "learning_rate": 3.326414687778271e-05,
      "loss": 0.8956,
      "step": 343460
    },
    {
      "epoch": 1.2037767193430695,
      "grad_norm": 3.140625,
      "learning_rate": 3.326349784911901e-05,
      "loss": 0.8563,
      "step": 343470
    },
    {
      "epoch": 1.203811766849965,
      "grad_norm": 2.8125,
      "learning_rate": 3.3262848820455304e-05,
      "loss": 0.8789,
      "step": 343480
    },
    {
      "epoch": 1.2038468143568608,
      "grad_norm": 2.5625,
      "learning_rate": 3.3262199791791605e-05,
      "loss": 0.8215,
      "step": 343490
    },
    {
      "epoch": 1.2038818618637563,
      "grad_norm": 2.796875,
      "learning_rate": 3.32615507631279e-05,
      "loss": 0.8919,
      "step": 343500
    },
    {
      "epoch": 1.2039169093706519,
      "grad_norm": 2.9375,
      "learning_rate": 3.32609017344642e-05,
      "loss": 0.9223,
      "step": 343510
    },
    {
      "epoch": 1.2039519568775474,
      "grad_norm": 3.203125,
      "learning_rate": 3.3260252705800496e-05,
      "loss": 0.9208,
      "step": 343520
    },
    {
      "epoch": 1.2039870043844432,
      "grad_norm": 2.796875,
      "learning_rate": 3.32596036771368e-05,
      "loss": 0.8538,
      "step": 343530
    },
    {
      "epoch": 1.2040220518913387,
      "grad_norm": 3.25,
      "learning_rate": 3.325895464847309e-05,
      "loss": 0.8642,
      "step": 343540
    },
    {
      "epoch": 1.2040570993982342,
      "grad_norm": 3.359375,
      "learning_rate": 3.325830561980939e-05,
      "loss": 0.904,
      "step": 343550
    },
    {
      "epoch": 1.20409214690513,
      "grad_norm": 2.9375,
      "learning_rate": 3.3257656591145695e-05,
      "loss": 0.7521,
      "step": 343560
    },
    {
      "epoch": 1.2041271944120255,
      "grad_norm": 3.40625,
      "learning_rate": 3.325700756248199e-05,
      "loss": 0.8804,
      "step": 343570
    },
    {
      "epoch": 1.204162241918921,
      "grad_norm": 2.59375,
      "learning_rate": 3.325635853381829e-05,
      "loss": 0.8161,
      "step": 343580
    },
    {
      "epoch": 1.2041972894258166,
      "grad_norm": 3.0,
      "learning_rate": 3.3255709505154585e-05,
      "loss": 0.8798,
      "step": 343590
    },
    {
      "epoch": 1.2042323369327124,
      "grad_norm": 2.65625,
      "learning_rate": 3.3255060476490887e-05,
      "loss": 0.8177,
      "step": 343600
    },
    {
      "epoch": 1.204267384439608,
      "grad_norm": 3.140625,
      "learning_rate": 3.325441144782718e-05,
      "loss": 0.8976,
      "step": 343610
    },
    {
      "epoch": 1.2043024319465034,
      "grad_norm": 3.03125,
      "learning_rate": 3.325376241916348e-05,
      "loss": 0.8373,
      "step": 343620
    },
    {
      "epoch": 1.204337479453399,
      "grad_norm": 2.40625,
      "learning_rate": 3.325311339049978e-05,
      "loss": 0.7784,
      "step": 343630
    },
    {
      "epoch": 1.2043725269602947,
      "grad_norm": 2.90625,
      "learning_rate": 3.325246436183608e-05,
      "loss": 0.8454,
      "step": 343640
    },
    {
      "epoch": 1.2044075744671903,
      "grad_norm": 2.59375,
      "learning_rate": 3.325181533317237e-05,
      "loss": 0.8828,
      "step": 343650
    },
    {
      "epoch": 1.2044426219740858,
      "grad_norm": 3.140625,
      "learning_rate": 3.3251166304508675e-05,
      "loss": 0.8476,
      "step": 343660
    },
    {
      "epoch": 1.2044776694809816,
      "grad_norm": 2.671875,
      "learning_rate": 3.325051727584497e-05,
      "loss": 0.9019,
      "step": 343670
    },
    {
      "epoch": 1.204512716987877,
      "grad_norm": 2.875,
      "learning_rate": 3.324986824718127e-05,
      "loss": 0.8542,
      "step": 343680
    },
    {
      "epoch": 1.2045477644947726,
      "grad_norm": 3.046875,
      "learning_rate": 3.324921921851757e-05,
      "loss": 0.8676,
      "step": 343690
    },
    {
      "epoch": 1.2045828120016682,
      "grad_norm": 3.15625,
      "learning_rate": 3.3248570189853867e-05,
      "loss": 0.8624,
      "step": 343700
    },
    {
      "epoch": 1.204617859508564,
      "grad_norm": 3.0625,
      "learning_rate": 3.324792116119017e-05,
      "loss": 0.8748,
      "step": 343710
    },
    {
      "epoch": 1.2046529070154595,
      "grad_norm": 2.9375,
      "learning_rate": 3.324727213252646e-05,
      "loss": 0.8511,
      "step": 343720
    },
    {
      "epoch": 1.204687954522355,
      "grad_norm": 2.71875,
      "learning_rate": 3.3246623103862764e-05,
      "loss": 0.8459,
      "step": 343730
    },
    {
      "epoch": 1.2047230020292505,
      "grad_norm": 2.890625,
      "learning_rate": 3.324597407519906e-05,
      "loss": 0.8119,
      "step": 343740
    },
    {
      "epoch": 1.2047580495361463,
      "grad_norm": 2.46875,
      "learning_rate": 3.324532504653536e-05,
      "loss": 0.8746,
      "step": 343750
    },
    {
      "epoch": 1.2047930970430418,
      "grad_norm": 2.921875,
      "learning_rate": 3.3244676017871655e-05,
      "loss": 0.8453,
      "step": 343760
    },
    {
      "epoch": 1.2048281445499374,
      "grad_norm": 2.875,
      "learning_rate": 3.3244026989207956e-05,
      "loss": 0.8641,
      "step": 343770
    },
    {
      "epoch": 1.2048631920568331,
      "grad_norm": 3.28125,
      "learning_rate": 3.324337796054425e-05,
      "loss": 0.7722,
      "step": 343780
    },
    {
      "epoch": 1.2048982395637287,
      "grad_norm": 3.171875,
      "learning_rate": 3.324272893188055e-05,
      "loss": 0.8141,
      "step": 343790
    },
    {
      "epoch": 1.2049332870706242,
      "grad_norm": 2.859375,
      "learning_rate": 3.3242079903216847e-05,
      "loss": 0.8721,
      "step": 343800
    },
    {
      "epoch": 1.20496833457752,
      "grad_norm": 3.203125,
      "learning_rate": 3.324143087455315e-05,
      "loss": 0.901,
      "step": 343810
    },
    {
      "epoch": 1.2050033820844155,
      "grad_norm": 2.75,
      "learning_rate": 3.324078184588945e-05,
      "loss": 0.8437,
      "step": 343820
    },
    {
      "epoch": 1.205038429591311,
      "grad_norm": 2.984375,
      "learning_rate": 3.3240132817225744e-05,
      "loss": 0.8048,
      "step": 343830
    },
    {
      "epoch": 1.2050734770982066,
      "grad_norm": 3.21875,
      "learning_rate": 3.323948378856204e-05,
      "loss": 0.8945,
      "step": 343840
    },
    {
      "epoch": 1.205108524605102,
      "grad_norm": 2.921875,
      "learning_rate": 3.323883475989833e-05,
      "loss": 0.8376,
      "step": 343850
    },
    {
      "epoch": 1.2051435721119979,
      "grad_norm": 2.6875,
      "learning_rate": 3.3238185731234635e-05,
      "loss": 0.9014,
      "step": 343860
    },
    {
      "epoch": 1.2051786196188934,
      "grad_norm": 2.75,
      "learning_rate": 3.323753670257093e-05,
      "loss": 0.9006,
      "step": 343870
    },
    {
      "epoch": 1.205213667125789,
      "grad_norm": 2.765625,
      "learning_rate": 3.323688767390723e-05,
      "loss": 0.8156,
      "step": 343880
    },
    {
      "epoch": 1.2052487146326847,
      "grad_norm": 3.171875,
      "learning_rate": 3.3236238645243525e-05,
      "loss": 0.8423,
      "step": 343890
    },
    {
      "epoch": 1.2052837621395802,
      "grad_norm": 3.078125,
      "learning_rate": 3.3235589616579827e-05,
      "loss": 0.8137,
      "step": 343900
    },
    {
      "epoch": 1.2053188096464758,
      "grad_norm": 2.875,
      "learning_rate": 3.323494058791613e-05,
      "loss": 0.8555,
      "step": 343910
    },
    {
      "epoch": 1.2053538571533715,
      "grad_norm": 3.265625,
      "learning_rate": 3.323429155925242e-05,
      "loss": 0.8675,
      "step": 343920
    },
    {
      "epoch": 1.205388904660267,
      "grad_norm": 3.0625,
      "learning_rate": 3.3233642530588724e-05,
      "loss": 0.788,
      "step": 343930
    },
    {
      "epoch": 1.2054239521671626,
      "grad_norm": 2.46875,
      "learning_rate": 3.323299350192502e-05,
      "loss": 0.8496,
      "step": 343940
    },
    {
      "epoch": 1.2054589996740581,
      "grad_norm": 2.90625,
      "learning_rate": 3.323234447326132e-05,
      "loss": 0.8423,
      "step": 343950
    },
    {
      "epoch": 1.2054940471809539,
      "grad_norm": 3.0,
      "learning_rate": 3.3231695444597615e-05,
      "loss": 0.924,
      "step": 343960
    },
    {
      "epoch": 1.2055290946878494,
      "grad_norm": 3.0625,
      "learning_rate": 3.3231046415933916e-05,
      "loss": 0.8794,
      "step": 343970
    },
    {
      "epoch": 1.205564142194745,
      "grad_norm": 2.6875,
      "learning_rate": 3.323039738727021e-05,
      "loss": 0.8511,
      "step": 343980
    },
    {
      "epoch": 1.2055991897016405,
      "grad_norm": 2.625,
      "learning_rate": 3.322974835860651e-05,
      "loss": 0.8281,
      "step": 343990
    },
    {
      "epoch": 1.2056342372085362,
      "grad_norm": 3.34375,
      "learning_rate": 3.3229099329942807e-05,
      "loss": 1.0116,
      "step": 344000
    },
    {
      "epoch": 1.2056692847154318,
      "grad_norm": 2.640625,
      "learning_rate": 3.322845030127911e-05,
      "loss": 0.824,
      "step": 344010
    },
    {
      "epoch": 1.2057043322223273,
      "grad_norm": 2.75,
      "learning_rate": 3.32278012726154e-05,
      "loss": 0.8542,
      "step": 344020
    },
    {
      "epoch": 1.205739379729223,
      "grad_norm": 3.125,
      "learning_rate": 3.3227152243951704e-05,
      "loss": 0.8859,
      "step": 344030
    },
    {
      "epoch": 1.2057744272361186,
      "grad_norm": 2.890625,
      "learning_rate": 3.3226503215288e-05,
      "loss": 0.8265,
      "step": 344040
    },
    {
      "epoch": 1.2058094747430141,
      "grad_norm": 3.03125,
      "learning_rate": 3.32258541866243e-05,
      "loss": 0.7512,
      "step": 344050
    },
    {
      "epoch": 1.2058445222499097,
      "grad_norm": 3.421875,
      "learning_rate": 3.32252051579606e-05,
      "loss": 0.8306,
      "step": 344060
    },
    {
      "epoch": 1.2058795697568054,
      "grad_norm": 2.609375,
      "learning_rate": 3.3224556129296896e-05,
      "loss": 0.8517,
      "step": 344070
    },
    {
      "epoch": 1.205914617263701,
      "grad_norm": 3.4375,
      "learning_rate": 3.32239071006332e-05,
      "loss": 0.8133,
      "step": 344080
    },
    {
      "epoch": 1.2059496647705965,
      "grad_norm": 2.828125,
      "learning_rate": 3.322325807196949e-05,
      "loss": 0.8695,
      "step": 344090
    },
    {
      "epoch": 1.205984712277492,
      "grad_norm": 2.875,
      "learning_rate": 3.322260904330579e-05,
      "loss": 0.8872,
      "step": 344100
    },
    {
      "epoch": 1.2060197597843878,
      "grad_norm": 3.15625,
      "learning_rate": 3.322196001464209e-05,
      "loss": 0.8256,
      "step": 344110
    },
    {
      "epoch": 1.2060548072912833,
      "grad_norm": 3.03125,
      "learning_rate": 3.322131098597839e-05,
      "loss": 0.8603,
      "step": 344120
    },
    {
      "epoch": 1.2060898547981789,
      "grad_norm": 2.734375,
      "learning_rate": 3.3220661957314684e-05,
      "loss": 0.8056,
      "step": 344130
    },
    {
      "epoch": 1.2061249023050746,
      "grad_norm": 3.328125,
      "learning_rate": 3.3220012928650985e-05,
      "loss": 0.8547,
      "step": 344140
    },
    {
      "epoch": 1.2061599498119702,
      "grad_norm": 2.84375,
      "learning_rate": 3.321936389998728e-05,
      "loss": 0.8245,
      "step": 344150
    },
    {
      "epoch": 1.2061949973188657,
      "grad_norm": 2.703125,
      "learning_rate": 3.321871487132358e-05,
      "loss": 0.8889,
      "step": 344160
    },
    {
      "epoch": 1.2062300448257612,
      "grad_norm": 2.5625,
      "learning_rate": 3.3218065842659876e-05,
      "loss": 0.8517,
      "step": 344170
    },
    {
      "epoch": 1.206265092332657,
      "grad_norm": 2.65625,
      "learning_rate": 3.321741681399618e-05,
      "loss": 0.7774,
      "step": 344180
    },
    {
      "epoch": 1.2063001398395525,
      "grad_norm": 2.875,
      "learning_rate": 3.321676778533248e-05,
      "loss": 0.811,
      "step": 344190
    },
    {
      "epoch": 1.206335187346448,
      "grad_norm": 2.828125,
      "learning_rate": 3.321611875666877e-05,
      "loss": 0.9697,
      "step": 344200
    },
    {
      "epoch": 1.2063702348533436,
      "grad_norm": 2.828125,
      "learning_rate": 3.3215469728005075e-05,
      "loss": 0.8462,
      "step": 344210
    },
    {
      "epoch": 1.2064052823602394,
      "grad_norm": 3.203125,
      "learning_rate": 3.321482069934136e-05,
      "loss": 0.9299,
      "step": 344220
    },
    {
      "epoch": 1.206440329867135,
      "grad_norm": 2.6875,
      "learning_rate": 3.3214171670677664e-05,
      "loss": 0.8012,
      "step": 344230
    },
    {
      "epoch": 1.2064753773740304,
      "grad_norm": 3.046875,
      "learning_rate": 3.321352264201396e-05,
      "loss": 0.8879,
      "step": 344240
    },
    {
      "epoch": 1.2065104248809262,
      "grad_norm": 3.46875,
      "learning_rate": 3.321287361335026e-05,
      "loss": 0.8329,
      "step": 344250
    },
    {
      "epoch": 1.2065454723878217,
      "grad_norm": 3.046875,
      "learning_rate": 3.3212224584686555e-05,
      "loss": 0.965,
      "step": 344260
    },
    {
      "epoch": 1.2065805198947173,
      "grad_norm": 3.0,
      "learning_rate": 3.3211575556022856e-05,
      "loss": 0.8985,
      "step": 344270
    },
    {
      "epoch": 1.2066155674016128,
      "grad_norm": 2.75,
      "learning_rate": 3.321092652735916e-05,
      "loss": 0.8698,
      "step": 344280
    },
    {
      "epoch": 1.2066506149085086,
      "grad_norm": 2.90625,
      "learning_rate": 3.321027749869545e-05,
      "loss": 0.7787,
      "step": 344290
    },
    {
      "epoch": 1.206685662415404,
      "grad_norm": 2.84375,
      "learning_rate": 3.320962847003175e-05,
      "loss": 0.9045,
      "step": 344300
    },
    {
      "epoch": 1.2067207099222996,
      "grad_norm": 2.890625,
      "learning_rate": 3.320897944136805e-05,
      "loss": 0.8604,
      "step": 344310
    },
    {
      "epoch": 1.2067557574291952,
      "grad_norm": 2.875,
      "learning_rate": 3.320833041270435e-05,
      "loss": 0.8839,
      "step": 344320
    },
    {
      "epoch": 1.206790804936091,
      "grad_norm": 2.9375,
      "learning_rate": 3.3207681384040644e-05,
      "loss": 0.7938,
      "step": 344330
    },
    {
      "epoch": 1.2068258524429865,
      "grad_norm": 3.265625,
      "learning_rate": 3.3207032355376945e-05,
      "loss": 0.8853,
      "step": 344340
    },
    {
      "epoch": 1.206860899949882,
      "grad_norm": 3.421875,
      "learning_rate": 3.320638332671324e-05,
      "loss": 0.867,
      "step": 344350
    },
    {
      "epoch": 1.2068959474567778,
      "grad_norm": 3.1875,
      "learning_rate": 3.320573429804954e-05,
      "loss": 0.8233,
      "step": 344360
    },
    {
      "epoch": 1.2069309949636733,
      "grad_norm": 2.828125,
      "learning_rate": 3.3205085269385836e-05,
      "loss": 0.8514,
      "step": 344370
    },
    {
      "epoch": 1.2069660424705688,
      "grad_norm": 2.875,
      "learning_rate": 3.320443624072214e-05,
      "loss": 0.8669,
      "step": 344380
    },
    {
      "epoch": 1.2070010899774644,
      "grad_norm": 2.8125,
      "learning_rate": 3.320378721205843e-05,
      "loss": 0.8031,
      "step": 344390
    },
    {
      "epoch": 1.2070361374843601,
      "grad_norm": 3.140625,
      "learning_rate": 3.320313818339473e-05,
      "loss": 0.9331,
      "step": 344400
    },
    {
      "epoch": 1.2070711849912557,
      "grad_norm": 2.78125,
      "learning_rate": 3.320248915473103e-05,
      "loss": 0.8603,
      "step": 344410
    },
    {
      "epoch": 1.2071062324981512,
      "grad_norm": 2.5,
      "learning_rate": 3.320184012606733e-05,
      "loss": 0.8066,
      "step": 344420
    },
    {
      "epoch": 1.2071412800050467,
      "grad_norm": 2.8125,
      "learning_rate": 3.320119109740363e-05,
      "loss": 0.8458,
      "step": 344430
    },
    {
      "epoch": 1.2071763275119425,
      "grad_norm": 2.609375,
      "learning_rate": 3.3200542068739925e-05,
      "loss": 0.8438,
      "step": 344440
    },
    {
      "epoch": 1.207211375018838,
      "grad_norm": 2.65625,
      "learning_rate": 3.319989304007623e-05,
      "loss": 0.8358,
      "step": 344450
    },
    {
      "epoch": 1.2072464225257336,
      "grad_norm": 2.953125,
      "learning_rate": 3.319924401141252e-05,
      "loss": 0.7903,
      "step": 344460
    },
    {
      "epoch": 1.2072814700326293,
      "grad_norm": 3.09375,
      "learning_rate": 3.319859498274882e-05,
      "loss": 0.8643,
      "step": 344470
    },
    {
      "epoch": 1.2073165175395248,
      "grad_norm": 2.59375,
      "learning_rate": 3.319794595408512e-05,
      "loss": 0.8506,
      "step": 344480
    },
    {
      "epoch": 1.2073515650464204,
      "grad_norm": 2.546875,
      "learning_rate": 3.319729692542142e-05,
      "loss": 0.7353,
      "step": 344490
    },
    {
      "epoch": 1.2073866125533161,
      "grad_norm": 2.859375,
      "learning_rate": 3.319664789675771e-05,
      "loss": 0.804,
      "step": 344500
    },
    {
      "epoch": 1.2074216600602117,
      "grad_norm": 2.984375,
      "learning_rate": 3.3195998868094015e-05,
      "loss": 0.8741,
      "step": 344510
    },
    {
      "epoch": 1.2074567075671072,
      "grad_norm": 3.25,
      "learning_rate": 3.319534983943031e-05,
      "loss": 0.8677,
      "step": 344520
    },
    {
      "epoch": 1.2074917550740027,
      "grad_norm": 3.078125,
      "learning_rate": 3.319470081076661e-05,
      "loss": 0.8937,
      "step": 344530
    },
    {
      "epoch": 1.2075268025808983,
      "grad_norm": 3.171875,
      "learning_rate": 3.3194051782102905e-05,
      "loss": 0.895,
      "step": 344540
    },
    {
      "epoch": 1.207561850087794,
      "grad_norm": 2.984375,
      "learning_rate": 3.319340275343921e-05,
      "loss": 0.8791,
      "step": 344550
    },
    {
      "epoch": 1.2075968975946896,
      "grad_norm": 2.484375,
      "learning_rate": 3.319275372477551e-05,
      "loss": 0.8421,
      "step": 344560
    },
    {
      "epoch": 1.2076319451015851,
      "grad_norm": 2.859375,
      "learning_rate": 3.31921046961118e-05,
      "loss": 0.8574,
      "step": 344570
    },
    {
      "epoch": 1.2076669926084809,
      "grad_norm": 3.203125,
      "learning_rate": 3.3191455667448104e-05,
      "loss": 0.7626,
      "step": 344580
    },
    {
      "epoch": 1.2077020401153764,
      "grad_norm": 2.984375,
      "learning_rate": 3.319080663878439e-05,
      "loss": 0.8817,
      "step": 344590
    },
    {
      "epoch": 1.207737087622272,
      "grad_norm": 2.71875,
      "learning_rate": 3.319015761012069e-05,
      "loss": 0.8595,
      "step": 344600
    },
    {
      "epoch": 1.2077721351291677,
      "grad_norm": 3.140625,
      "learning_rate": 3.318950858145699e-05,
      "loss": 0.8318,
      "step": 344610
    },
    {
      "epoch": 1.2078071826360632,
      "grad_norm": 2.65625,
      "learning_rate": 3.318885955279329e-05,
      "loss": 0.7813,
      "step": 344620
    },
    {
      "epoch": 1.2078422301429588,
      "grad_norm": 3.015625,
      "learning_rate": 3.3188210524129584e-05,
      "loss": 0.8415,
      "step": 344630
    },
    {
      "epoch": 1.2078772776498543,
      "grad_norm": 2.75,
      "learning_rate": 3.3187561495465885e-05,
      "loss": 0.8723,
      "step": 344640
    },
    {
      "epoch": 1.20791232515675,
      "grad_norm": 2.96875,
      "learning_rate": 3.318691246680219e-05,
      "loss": 0.8828,
      "step": 344650
    },
    {
      "epoch": 1.2079473726636456,
      "grad_norm": 3.015625,
      "learning_rate": 3.318626343813848e-05,
      "loss": 0.8469,
      "step": 344660
    },
    {
      "epoch": 1.2079824201705411,
      "grad_norm": 2.9375,
      "learning_rate": 3.318561440947478e-05,
      "loss": 0.7816,
      "step": 344670
    },
    {
      "epoch": 1.2080174676774367,
      "grad_norm": 2.640625,
      "learning_rate": 3.318496538081108e-05,
      "loss": 0.7921,
      "step": 344680
    },
    {
      "epoch": 1.2080525151843324,
      "grad_norm": 3.125,
      "learning_rate": 3.318431635214738e-05,
      "loss": 0.8821,
      "step": 344690
    },
    {
      "epoch": 1.208087562691228,
      "grad_norm": 2.90625,
      "learning_rate": 3.318366732348367e-05,
      "loss": 0.8762,
      "step": 344700
    },
    {
      "epoch": 1.2081226101981235,
      "grad_norm": 2.625,
      "learning_rate": 3.3183018294819975e-05,
      "loss": 0.913,
      "step": 344710
    },
    {
      "epoch": 1.2081576577050193,
      "grad_norm": 3.140625,
      "learning_rate": 3.318236926615627e-05,
      "loss": 0.8799,
      "step": 344720
    },
    {
      "epoch": 1.2081927052119148,
      "grad_norm": 2.921875,
      "learning_rate": 3.318172023749257e-05,
      "loss": 0.8471,
      "step": 344730
    },
    {
      "epoch": 1.2082277527188103,
      "grad_norm": 3.34375,
      "learning_rate": 3.3181071208828865e-05,
      "loss": 0.8681,
      "step": 344740
    },
    {
      "epoch": 1.2082628002257059,
      "grad_norm": 2.75,
      "learning_rate": 3.318042218016517e-05,
      "loss": 0.8944,
      "step": 344750
    },
    {
      "epoch": 1.2082978477326016,
      "grad_norm": 3.3125,
      "learning_rate": 3.317977315150146e-05,
      "loss": 0.8742,
      "step": 344760
    },
    {
      "epoch": 1.2083328952394972,
      "grad_norm": 3.296875,
      "learning_rate": 3.317912412283776e-05,
      "loss": 0.83,
      "step": 344770
    },
    {
      "epoch": 1.2083679427463927,
      "grad_norm": 2.9375,
      "learning_rate": 3.3178475094174064e-05,
      "loss": 0.8341,
      "step": 344780
    },
    {
      "epoch": 1.2084029902532882,
      "grad_norm": 2.6875,
      "learning_rate": 3.317782606551036e-05,
      "loss": 0.8578,
      "step": 344790
    },
    {
      "epoch": 1.208438037760184,
      "grad_norm": 2.953125,
      "learning_rate": 3.317717703684666e-05,
      "loss": 0.8766,
      "step": 344800
    },
    {
      "epoch": 1.2084730852670795,
      "grad_norm": 2.78125,
      "learning_rate": 3.3176528008182955e-05,
      "loss": 0.7703,
      "step": 344810
    },
    {
      "epoch": 1.208508132773975,
      "grad_norm": 3.0,
      "learning_rate": 3.3175878979519256e-05,
      "loss": 0.8755,
      "step": 344820
    },
    {
      "epoch": 1.2085431802808708,
      "grad_norm": 2.875,
      "learning_rate": 3.317522995085555e-05,
      "loss": 0.7954,
      "step": 344830
    },
    {
      "epoch": 1.2085782277877664,
      "grad_norm": 2.9375,
      "learning_rate": 3.317458092219185e-05,
      "loss": 0.8413,
      "step": 344840
    },
    {
      "epoch": 1.208613275294662,
      "grad_norm": 2.890625,
      "learning_rate": 3.317393189352815e-05,
      "loss": 0.8001,
      "step": 344850
    },
    {
      "epoch": 1.2086483228015574,
      "grad_norm": 2.390625,
      "learning_rate": 3.317328286486445e-05,
      "loss": 0.7991,
      "step": 344860
    },
    {
      "epoch": 1.2086833703084532,
      "grad_norm": 2.71875,
      "learning_rate": 3.317263383620074e-05,
      "loss": 0.8543,
      "step": 344870
    },
    {
      "epoch": 1.2087184178153487,
      "grad_norm": 2.796875,
      "learning_rate": 3.3171984807537044e-05,
      "loss": 0.8983,
      "step": 344880
    },
    {
      "epoch": 1.2087534653222443,
      "grad_norm": 3.125,
      "learning_rate": 3.317133577887334e-05,
      "loss": 0.8637,
      "step": 344890
    },
    {
      "epoch": 1.2087885128291398,
      "grad_norm": 3.203125,
      "learning_rate": 3.317068675020964e-05,
      "loss": 0.8335,
      "step": 344900
    },
    {
      "epoch": 1.2088235603360356,
      "grad_norm": 2.921875,
      "learning_rate": 3.3170037721545935e-05,
      "loss": 0.9083,
      "step": 344910
    },
    {
      "epoch": 1.208858607842931,
      "grad_norm": 2.71875,
      "learning_rate": 3.3169388692882236e-05,
      "loss": 0.8975,
      "step": 344920
    },
    {
      "epoch": 1.2088936553498266,
      "grad_norm": 2.640625,
      "learning_rate": 3.316873966421854e-05,
      "loss": 0.8491,
      "step": 344930
    },
    {
      "epoch": 1.2089287028567224,
      "grad_norm": 3.171875,
      "learning_rate": 3.316809063555483e-05,
      "loss": 0.913,
      "step": 344940
    },
    {
      "epoch": 1.208963750363618,
      "grad_norm": 2.65625,
      "learning_rate": 3.3167441606891134e-05,
      "loss": 0.8048,
      "step": 344950
    },
    {
      "epoch": 1.2089987978705135,
      "grad_norm": 2.546875,
      "learning_rate": 3.316679257822743e-05,
      "loss": 0.788,
      "step": 344960
    },
    {
      "epoch": 1.209033845377409,
      "grad_norm": 2.765625,
      "learning_rate": 3.316614354956372e-05,
      "loss": 0.8552,
      "step": 344970
    },
    {
      "epoch": 1.2090688928843047,
      "grad_norm": 2.859375,
      "learning_rate": 3.316549452090002e-05,
      "loss": 0.8979,
      "step": 344980
    },
    {
      "epoch": 1.2091039403912003,
      "grad_norm": 2.65625,
      "learning_rate": 3.316484549223632e-05,
      "loss": 0.8389,
      "step": 344990
    },
    {
      "epoch": 1.2091389878980958,
      "grad_norm": 3.875,
      "learning_rate": 3.316419646357261e-05,
      "loss": 0.8043,
      "step": 345000
    },
    {
      "epoch": 1.2091389878980958,
      "eval_loss": 0.8052164316177368,
      "eval_runtime": 547.559,
      "eval_samples_per_second": 694.785,
      "eval_steps_per_second": 57.899,
      "step": 345000
    },
    {
      "epoch": 1.2091740354049914,
      "grad_norm": 2.984375,
      "learning_rate": 3.3163547434908915e-05,
      "loss": 0.7658,
      "step": 345010
    },
    {
      "epoch": 1.2092090829118871,
      "grad_norm": 2.671875,
      "learning_rate": 3.3162898406245216e-05,
      "loss": 0.7863,
      "step": 345020
    },
    {
      "epoch": 1.2092441304187826,
      "grad_norm": 2.875,
      "learning_rate": 3.316224937758151e-05,
      "loss": 0.8133,
      "step": 345030
    },
    {
      "epoch": 1.2092791779256782,
      "grad_norm": 2.859375,
      "learning_rate": 3.316160034891781e-05,
      "loss": 0.7683,
      "step": 345040
    },
    {
      "epoch": 1.209314225432574,
      "grad_norm": 2.953125,
      "learning_rate": 3.316095132025411e-05,
      "loss": 0.8657,
      "step": 345050
    },
    {
      "epoch": 1.2093492729394695,
      "grad_norm": 2.546875,
      "learning_rate": 3.316030229159041e-05,
      "loss": 0.8811,
      "step": 345060
    },
    {
      "epoch": 1.209384320446365,
      "grad_norm": 2.765625,
      "learning_rate": 3.31596532629267e-05,
      "loss": 0.8433,
      "step": 345070
    },
    {
      "epoch": 1.2094193679532605,
      "grad_norm": 3.390625,
      "learning_rate": 3.3159004234263004e-05,
      "loss": 0.9047,
      "step": 345080
    },
    {
      "epoch": 1.2094544154601563,
      "grad_norm": 2.953125,
      "learning_rate": 3.31583552055993e-05,
      "loss": 0.9067,
      "step": 345090
    },
    {
      "epoch": 1.2094894629670518,
      "grad_norm": 2.78125,
      "learning_rate": 3.31577061769356e-05,
      "loss": 0.8986,
      "step": 345100
    },
    {
      "epoch": 1.2095245104739474,
      "grad_norm": 2.5625,
      "learning_rate": 3.3157057148271895e-05,
      "loss": 0.8617,
      "step": 345110
    },
    {
      "epoch": 1.209559557980843,
      "grad_norm": 3.25,
      "learning_rate": 3.3156408119608196e-05,
      "loss": 0.8994,
      "step": 345120
    },
    {
      "epoch": 1.2095946054877387,
      "grad_norm": 3.03125,
      "learning_rate": 3.315575909094449e-05,
      "loss": 0.8559,
      "step": 345130
    },
    {
      "epoch": 1.2096296529946342,
      "grad_norm": 3.078125,
      "learning_rate": 3.315511006228079e-05,
      "loss": 0.9362,
      "step": 345140
    },
    {
      "epoch": 1.2096647005015297,
      "grad_norm": 2.3125,
      "learning_rate": 3.3154461033617094e-05,
      "loss": 0.7842,
      "step": 345150
    },
    {
      "epoch": 1.2096997480084255,
      "grad_norm": 2.96875,
      "learning_rate": 3.315381200495339e-05,
      "loss": 0.8921,
      "step": 345160
    },
    {
      "epoch": 1.209734795515321,
      "grad_norm": 3.203125,
      "learning_rate": 3.315316297628969e-05,
      "loss": 0.8941,
      "step": 345170
    },
    {
      "epoch": 1.2097698430222166,
      "grad_norm": 3.125,
      "learning_rate": 3.3152513947625984e-05,
      "loss": 0.8417,
      "step": 345180
    },
    {
      "epoch": 1.2098048905291123,
      "grad_norm": 3.59375,
      "learning_rate": 3.3151864918962286e-05,
      "loss": 0.8886,
      "step": 345190
    },
    {
      "epoch": 1.2098399380360079,
      "grad_norm": 3.15625,
      "learning_rate": 3.315121589029858e-05,
      "loss": 0.8119,
      "step": 345200
    },
    {
      "epoch": 1.2098749855429034,
      "grad_norm": 2.390625,
      "learning_rate": 3.315056686163488e-05,
      "loss": 0.8889,
      "step": 345210
    },
    {
      "epoch": 1.209910033049799,
      "grad_norm": 2.9375,
      "learning_rate": 3.3149917832971176e-05,
      "loss": 0.842,
      "step": 345220
    },
    {
      "epoch": 1.2099450805566947,
      "grad_norm": 2.765625,
      "learning_rate": 3.314926880430748e-05,
      "loss": 0.7954,
      "step": 345230
    },
    {
      "epoch": 1.2099801280635902,
      "grad_norm": 2.9375,
      "learning_rate": 3.314861977564377e-05,
      "loss": 0.8739,
      "step": 345240
    },
    {
      "epoch": 1.2100151755704858,
      "grad_norm": 2.4375,
      "learning_rate": 3.3147970746980074e-05,
      "loss": 0.8716,
      "step": 345250
    },
    {
      "epoch": 1.2100502230773813,
      "grad_norm": 3.171875,
      "learning_rate": 3.314732171831637e-05,
      "loss": 0.9429,
      "step": 345260
    },
    {
      "epoch": 1.210085270584277,
      "grad_norm": 3.03125,
      "learning_rate": 3.314667268965267e-05,
      "loss": 0.9059,
      "step": 345270
    },
    {
      "epoch": 1.2101203180911726,
      "grad_norm": 3.125,
      "learning_rate": 3.3146023660988964e-05,
      "loss": 0.819,
      "step": 345280
    },
    {
      "epoch": 1.2101553655980681,
      "grad_norm": 2.828125,
      "learning_rate": 3.3145374632325266e-05,
      "loss": 0.8478,
      "step": 345290
    },
    {
      "epoch": 1.210190413104964,
      "grad_norm": 2.828125,
      "learning_rate": 3.314472560366157e-05,
      "loss": 0.8797,
      "step": 345300
    },
    {
      "epoch": 1.2102254606118594,
      "grad_norm": 3.78125,
      "learning_rate": 3.314407657499786e-05,
      "loss": 0.821,
      "step": 345310
    },
    {
      "epoch": 1.210260508118755,
      "grad_norm": 2.703125,
      "learning_rate": 3.314342754633416e-05,
      "loss": 0.8835,
      "step": 345320
    },
    {
      "epoch": 1.2102955556256505,
      "grad_norm": 2.96875,
      "learning_rate": 3.314277851767046e-05,
      "loss": 0.9081,
      "step": 345330
    },
    {
      "epoch": 1.2103306031325463,
      "grad_norm": 2.828125,
      "learning_rate": 3.314212948900676e-05,
      "loss": 0.8629,
      "step": 345340
    },
    {
      "epoch": 1.2103656506394418,
      "grad_norm": 2.90625,
      "learning_rate": 3.314148046034305e-05,
      "loss": 0.827,
      "step": 345350
    },
    {
      "epoch": 1.2104006981463373,
      "grad_norm": 2.8125,
      "learning_rate": 3.314083143167935e-05,
      "loss": 0.8548,
      "step": 345360
    },
    {
      "epoch": 1.2104357456532329,
      "grad_norm": 2.90625,
      "learning_rate": 3.314018240301564e-05,
      "loss": 0.7981,
      "step": 345370
    },
    {
      "epoch": 1.2104707931601286,
      "grad_norm": 2.84375,
      "learning_rate": 3.3139533374351944e-05,
      "loss": 0.8786,
      "step": 345380
    },
    {
      "epoch": 1.2105058406670242,
      "grad_norm": 3.09375,
      "learning_rate": 3.3138884345688246e-05,
      "loss": 0.8903,
      "step": 345390
    },
    {
      "epoch": 1.2105408881739197,
      "grad_norm": 3.125,
      "learning_rate": 3.313823531702454e-05,
      "loss": 0.979,
      "step": 345400
    },
    {
      "epoch": 1.2105759356808155,
      "grad_norm": 2.671875,
      "learning_rate": 3.313758628836084e-05,
      "loss": 0.8184,
      "step": 345410
    },
    {
      "epoch": 1.210610983187711,
      "grad_norm": 3.0625,
      "learning_rate": 3.3136937259697136e-05,
      "loss": 0.9219,
      "step": 345420
    },
    {
      "epoch": 1.2106460306946065,
      "grad_norm": 3.34375,
      "learning_rate": 3.313628823103344e-05,
      "loss": 0.7865,
      "step": 345430
    },
    {
      "epoch": 1.210681078201502,
      "grad_norm": 3.140625,
      "learning_rate": 3.313563920236973e-05,
      "loss": 0.9083,
      "step": 345440
    },
    {
      "epoch": 1.2107161257083978,
      "grad_norm": 2.671875,
      "learning_rate": 3.3134990173706034e-05,
      "loss": 0.926,
      "step": 345450
    },
    {
      "epoch": 1.2107511732152934,
      "grad_norm": 2.859375,
      "learning_rate": 3.313434114504233e-05,
      "loss": 0.9643,
      "step": 345460
    },
    {
      "epoch": 1.2107862207221889,
      "grad_norm": 3.046875,
      "learning_rate": 3.313369211637863e-05,
      "loss": 0.8195,
      "step": 345470
    },
    {
      "epoch": 1.2108212682290844,
      "grad_norm": 2.6875,
      "learning_rate": 3.3133043087714924e-05,
      "loss": 0.8682,
      "step": 345480
    },
    {
      "epoch": 1.2108563157359802,
      "grad_norm": 2.671875,
      "learning_rate": 3.3132394059051226e-05,
      "loss": 0.8162,
      "step": 345490
    },
    {
      "epoch": 1.2108913632428757,
      "grad_norm": 3.25,
      "learning_rate": 3.313174503038752e-05,
      "loss": 0.8813,
      "step": 345500
    },
    {
      "epoch": 1.2109264107497713,
      "grad_norm": 2.96875,
      "learning_rate": 3.313109600172382e-05,
      "loss": 0.8658,
      "step": 345510
    },
    {
      "epoch": 1.210961458256667,
      "grad_norm": 2.515625,
      "learning_rate": 3.313044697306012e-05,
      "loss": 0.9135,
      "step": 345520
    },
    {
      "epoch": 1.2109965057635625,
      "grad_norm": 2.671875,
      "learning_rate": 3.312979794439642e-05,
      "loss": 0.8424,
      "step": 345530
    },
    {
      "epoch": 1.211031553270458,
      "grad_norm": 2.875,
      "learning_rate": 3.312914891573272e-05,
      "loss": 0.8171,
      "step": 345540
    },
    {
      "epoch": 1.2110666007773536,
      "grad_norm": 2.953125,
      "learning_rate": 3.3128499887069014e-05,
      "loss": 0.8428,
      "step": 345550
    },
    {
      "epoch": 1.2111016482842494,
      "grad_norm": 2.78125,
      "learning_rate": 3.3127850858405315e-05,
      "loss": 0.8502,
      "step": 345560
    },
    {
      "epoch": 1.211136695791145,
      "grad_norm": 3.421875,
      "learning_rate": 3.312720182974161e-05,
      "loss": 0.9138,
      "step": 345570
    },
    {
      "epoch": 1.2111717432980404,
      "grad_norm": 2.765625,
      "learning_rate": 3.312655280107791e-05,
      "loss": 0.8662,
      "step": 345580
    },
    {
      "epoch": 1.211206790804936,
      "grad_norm": 2.578125,
      "learning_rate": 3.3125903772414206e-05,
      "loss": 0.8788,
      "step": 345590
    },
    {
      "epoch": 1.2112418383118317,
      "grad_norm": 2.609375,
      "learning_rate": 3.312525474375051e-05,
      "loss": 0.8481,
      "step": 345600
    },
    {
      "epoch": 1.2112768858187273,
      "grad_norm": 2.703125,
      "learning_rate": 3.31246057150868e-05,
      "loss": 0.8607,
      "step": 345610
    },
    {
      "epoch": 1.2113119333256228,
      "grad_norm": 2.609375,
      "learning_rate": 3.31239566864231e-05,
      "loss": 0.8497,
      "step": 345620
    },
    {
      "epoch": 1.2113469808325186,
      "grad_norm": 2.390625,
      "learning_rate": 3.31233076577594e-05,
      "loss": 0.7828,
      "step": 345630
    },
    {
      "epoch": 1.211382028339414,
      "grad_norm": 3.015625,
      "learning_rate": 3.31226586290957e-05,
      "loss": 0.9506,
      "step": 345640
    },
    {
      "epoch": 1.2114170758463096,
      "grad_norm": 2.671875,
      "learning_rate": 3.3122009600431994e-05,
      "loss": 0.7981,
      "step": 345650
    },
    {
      "epoch": 1.2114521233532052,
      "grad_norm": 2.53125,
      "learning_rate": 3.3121360571768295e-05,
      "loss": 0.9713,
      "step": 345660
    },
    {
      "epoch": 1.211487170860101,
      "grad_norm": 2.671875,
      "learning_rate": 3.3120711543104596e-05,
      "loss": 0.9136,
      "step": 345670
    },
    {
      "epoch": 1.2115222183669965,
      "grad_norm": 2.6875,
      "learning_rate": 3.312006251444089e-05,
      "loss": 0.9113,
      "step": 345680
    },
    {
      "epoch": 1.211557265873892,
      "grad_norm": 3.046875,
      "learning_rate": 3.311941348577719e-05,
      "loss": 0.8446,
      "step": 345690
    },
    {
      "epoch": 1.2115923133807875,
      "grad_norm": 2.921875,
      "learning_rate": 3.311876445711349e-05,
      "loss": 0.9415,
      "step": 345700
    },
    {
      "epoch": 1.2116273608876833,
      "grad_norm": 2.90625,
      "learning_rate": 3.311811542844979e-05,
      "loss": 0.9075,
      "step": 345710
    },
    {
      "epoch": 1.2116624083945788,
      "grad_norm": 2.78125,
      "learning_rate": 3.3117466399786076e-05,
      "loss": 0.8402,
      "step": 345720
    },
    {
      "epoch": 1.2116974559014744,
      "grad_norm": 2.78125,
      "learning_rate": 3.311681737112238e-05,
      "loss": 0.8741,
      "step": 345730
    },
    {
      "epoch": 1.2117325034083701,
      "grad_norm": 2.78125,
      "learning_rate": 3.311616834245867e-05,
      "loss": 0.8443,
      "step": 345740
    },
    {
      "epoch": 1.2117675509152657,
      "grad_norm": 3.125,
      "learning_rate": 3.3115519313794974e-05,
      "loss": 0.8951,
      "step": 345750
    },
    {
      "epoch": 1.2118025984221612,
      "grad_norm": 2.65625,
      "learning_rate": 3.3114870285131275e-05,
      "loss": 0.8396,
      "step": 345760
    },
    {
      "epoch": 1.211837645929057,
      "grad_norm": 2.78125,
      "learning_rate": 3.311422125646757e-05,
      "loss": 0.8481,
      "step": 345770
    },
    {
      "epoch": 1.2118726934359525,
      "grad_norm": 2.796875,
      "learning_rate": 3.311357222780387e-05,
      "loss": 0.8793,
      "step": 345780
    },
    {
      "epoch": 1.211907740942848,
      "grad_norm": 2.875,
      "learning_rate": 3.3112923199140166e-05,
      "loss": 0.8544,
      "step": 345790
    },
    {
      "epoch": 1.2119427884497436,
      "grad_norm": 2.53125,
      "learning_rate": 3.311227417047647e-05,
      "loss": 0.8887,
      "step": 345800
    },
    {
      "epoch": 1.211977835956639,
      "grad_norm": 2.359375,
      "learning_rate": 3.311162514181276e-05,
      "loss": 0.8617,
      "step": 345810
    },
    {
      "epoch": 1.2120128834635349,
      "grad_norm": 3.03125,
      "learning_rate": 3.311097611314906e-05,
      "loss": 0.8761,
      "step": 345820
    },
    {
      "epoch": 1.2120479309704304,
      "grad_norm": 2.453125,
      "learning_rate": 3.311032708448536e-05,
      "loss": 0.811,
      "step": 345830
    },
    {
      "epoch": 1.212082978477326,
      "grad_norm": 2.765625,
      "learning_rate": 3.310967805582166e-05,
      "loss": 0.7464,
      "step": 345840
    },
    {
      "epoch": 1.2121180259842217,
      "grad_norm": 2.671875,
      "learning_rate": 3.3109029027157954e-05,
      "loss": 0.7779,
      "step": 345850
    },
    {
      "epoch": 1.2121530734911172,
      "grad_norm": 2.6875,
      "learning_rate": 3.3108379998494255e-05,
      "loss": 0.8435,
      "step": 345860
    },
    {
      "epoch": 1.2121881209980128,
      "grad_norm": 2.53125,
      "learning_rate": 3.310773096983055e-05,
      "loss": 0.8237,
      "step": 345870
    },
    {
      "epoch": 1.2122231685049085,
      "grad_norm": 2.8125,
      "learning_rate": 3.310708194116685e-05,
      "loss": 0.9037,
      "step": 345880
    },
    {
      "epoch": 1.212258216011804,
      "grad_norm": 3.203125,
      "learning_rate": 3.310643291250315e-05,
      "loss": 0.8599,
      "step": 345890
    },
    {
      "epoch": 1.2122932635186996,
      "grad_norm": 2.625,
      "learning_rate": 3.310578388383945e-05,
      "loss": 0.8694,
      "step": 345900
    },
    {
      "epoch": 1.2123283110255951,
      "grad_norm": 2.921875,
      "learning_rate": 3.310513485517575e-05,
      "loss": 0.9466,
      "step": 345910
    },
    {
      "epoch": 1.2123633585324909,
      "grad_norm": 3.09375,
      "learning_rate": 3.310448582651204e-05,
      "loss": 0.796,
      "step": 345920
    },
    {
      "epoch": 1.2123984060393864,
      "grad_norm": 2.90625,
      "learning_rate": 3.3103836797848344e-05,
      "loss": 0.8231,
      "step": 345930
    },
    {
      "epoch": 1.212433453546282,
      "grad_norm": 3.734375,
      "learning_rate": 3.310318776918464e-05,
      "loss": 0.9305,
      "step": 345940
    },
    {
      "epoch": 1.2124685010531775,
      "grad_norm": 3.265625,
      "learning_rate": 3.310253874052094e-05,
      "loss": 0.8414,
      "step": 345950
    },
    {
      "epoch": 1.2125035485600733,
      "grad_norm": 3.09375,
      "learning_rate": 3.3101889711857235e-05,
      "loss": 0.8813,
      "step": 345960
    },
    {
      "epoch": 1.2125385960669688,
      "grad_norm": 2.515625,
      "learning_rate": 3.3101240683193536e-05,
      "loss": 0.8019,
      "step": 345970
    },
    {
      "epoch": 1.2125736435738643,
      "grad_norm": 3.078125,
      "learning_rate": 3.310059165452983e-05,
      "loss": 0.8764,
      "step": 345980
    },
    {
      "epoch": 1.21260869108076,
      "grad_norm": 2.984375,
      "learning_rate": 3.309994262586613e-05,
      "loss": 0.8584,
      "step": 345990
    },
    {
      "epoch": 1.2126437385876556,
      "grad_norm": 2.953125,
      "learning_rate": 3.309929359720243e-05,
      "loss": 0.8295,
      "step": 346000
    },
    {
      "epoch": 1.2126787860945512,
      "grad_norm": 2.5625,
      "learning_rate": 3.309864456853873e-05,
      "loss": 0.8762,
      "step": 346010
    },
    {
      "epoch": 1.2127138336014467,
      "grad_norm": 3.015625,
      "learning_rate": 3.309799553987503e-05,
      "loss": 0.8286,
      "step": 346020
    },
    {
      "epoch": 1.2127488811083424,
      "grad_norm": 2.78125,
      "learning_rate": 3.3097346511211324e-05,
      "loss": 0.8927,
      "step": 346030
    },
    {
      "epoch": 1.212783928615238,
      "grad_norm": 2.609375,
      "learning_rate": 3.3096697482547626e-05,
      "loss": 0.7615,
      "step": 346040
    },
    {
      "epoch": 1.2128189761221335,
      "grad_norm": 2.6875,
      "learning_rate": 3.309604845388392e-05,
      "loss": 0.902,
      "step": 346050
    },
    {
      "epoch": 1.212854023629029,
      "grad_norm": 2.6875,
      "learning_rate": 3.309539942522022e-05,
      "loss": 0.795,
      "step": 346060
    },
    {
      "epoch": 1.2128890711359248,
      "grad_norm": 3.390625,
      "learning_rate": 3.3094750396556516e-05,
      "loss": 0.8095,
      "step": 346070
    },
    {
      "epoch": 1.2129241186428203,
      "grad_norm": 3.09375,
      "learning_rate": 3.309410136789282e-05,
      "loss": 0.9465,
      "step": 346080
    },
    {
      "epoch": 1.2129591661497159,
      "grad_norm": 3.453125,
      "learning_rate": 3.309345233922911e-05,
      "loss": 0.8622,
      "step": 346090
    },
    {
      "epoch": 1.2129942136566116,
      "grad_norm": 2.953125,
      "learning_rate": 3.309280331056541e-05,
      "loss": 0.8412,
      "step": 346100
    },
    {
      "epoch": 1.2130292611635072,
      "grad_norm": 2.671875,
      "learning_rate": 3.309215428190171e-05,
      "loss": 0.8995,
      "step": 346110
    },
    {
      "epoch": 1.2130643086704027,
      "grad_norm": 2.859375,
      "learning_rate": 3.3091505253238e-05,
      "loss": 0.8864,
      "step": 346120
    },
    {
      "epoch": 1.2130993561772982,
      "grad_norm": 3.125,
      "learning_rate": 3.3090856224574304e-05,
      "loss": 0.8963,
      "step": 346130
    },
    {
      "epoch": 1.213134403684194,
      "grad_norm": 2.796875,
      "learning_rate": 3.30902071959106e-05,
      "loss": 0.8184,
      "step": 346140
    },
    {
      "epoch": 1.2131694511910895,
      "grad_norm": 2.546875,
      "learning_rate": 3.30895581672469e-05,
      "loss": 0.7885,
      "step": 346150
    },
    {
      "epoch": 1.213204498697985,
      "grad_norm": 2.578125,
      "learning_rate": 3.3088909138583195e-05,
      "loss": 0.8424,
      "step": 346160
    },
    {
      "epoch": 1.2132395462048806,
      "grad_norm": 3.21875,
      "learning_rate": 3.3088260109919496e-05,
      "loss": 0.8164,
      "step": 346170
    },
    {
      "epoch": 1.2132745937117764,
      "grad_norm": 3.0625,
      "learning_rate": 3.308761108125579e-05,
      "loss": 0.8493,
      "step": 346180
    },
    {
      "epoch": 1.213309641218672,
      "grad_norm": 2.984375,
      "learning_rate": 3.308696205259209e-05,
      "loss": 0.9349,
      "step": 346190
    },
    {
      "epoch": 1.2133446887255674,
      "grad_norm": 3.015625,
      "learning_rate": 3.308631302392839e-05,
      "loss": 0.8798,
      "step": 346200
    },
    {
      "epoch": 1.2133797362324632,
      "grad_norm": 2.828125,
      "learning_rate": 3.308566399526469e-05,
      "loss": 0.85,
      "step": 346210
    },
    {
      "epoch": 1.2134147837393587,
      "grad_norm": 3.09375,
      "learning_rate": 3.308501496660098e-05,
      "loss": 0.8235,
      "step": 346220
    },
    {
      "epoch": 1.2134498312462543,
      "grad_norm": 2.71875,
      "learning_rate": 3.3084365937937284e-05,
      "loss": 0.9379,
      "step": 346230
    },
    {
      "epoch": 1.2134848787531498,
      "grad_norm": 3.0,
      "learning_rate": 3.308371690927358e-05,
      "loss": 0.8269,
      "step": 346240
    },
    {
      "epoch": 1.2135199262600456,
      "grad_norm": 2.84375,
      "learning_rate": 3.308306788060988e-05,
      "loss": 0.8474,
      "step": 346250
    },
    {
      "epoch": 1.213554973766941,
      "grad_norm": 3.4375,
      "learning_rate": 3.308241885194618e-05,
      "loss": 0.8504,
      "step": 346260
    },
    {
      "epoch": 1.2135900212738366,
      "grad_norm": 3.234375,
      "learning_rate": 3.3081769823282476e-05,
      "loss": 0.8693,
      "step": 346270
    },
    {
      "epoch": 1.2136250687807322,
      "grad_norm": 3.234375,
      "learning_rate": 3.308112079461878e-05,
      "loss": 0.9316,
      "step": 346280
    },
    {
      "epoch": 1.213660116287628,
      "grad_norm": 2.84375,
      "learning_rate": 3.308047176595507e-05,
      "loss": 0.8538,
      "step": 346290
    },
    {
      "epoch": 1.2136951637945235,
      "grad_norm": 2.78125,
      "learning_rate": 3.3079822737291374e-05,
      "loss": 0.8733,
      "step": 346300
    },
    {
      "epoch": 1.213730211301419,
      "grad_norm": 3.265625,
      "learning_rate": 3.307917370862767e-05,
      "loss": 0.8027,
      "step": 346310
    },
    {
      "epoch": 1.2137652588083148,
      "grad_norm": 2.703125,
      "learning_rate": 3.307852467996397e-05,
      "loss": 0.8069,
      "step": 346320
    },
    {
      "epoch": 1.2138003063152103,
      "grad_norm": 2.703125,
      "learning_rate": 3.3077875651300264e-05,
      "loss": 0.7931,
      "step": 346330
    },
    {
      "epoch": 1.2138353538221058,
      "grad_norm": 2.71875,
      "learning_rate": 3.3077226622636566e-05,
      "loss": 0.8053,
      "step": 346340
    },
    {
      "epoch": 1.2138704013290014,
      "grad_norm": 3.328125,
      "learning_rate": 3.307657759397286e-05,
      "loss": 0.7976,
      "step": 346350
    },
    {
      "epoch": 1.2139054488358971,
      "grad_norm": 2.875,
      "learning_rate": 3.307592856530916e-05,
      "loss": 0.8976,
      "step": 346360
    },
    {
      "epoch": 1.2139404963427927,
      "grad_norm": 3.125,
      "learning_rate": 3.3075279536645456e-05,
      "loss": 0.9627,
      "step": 346370
    },
    {
      "epoch": 1.2139755438496882,
      "grad_norm": 3.140625,
      "learning_rate": 3.307463050798176e-05,
      "loss": 0.9224,
      "step": 346380
    },
    {
      "epoch": 1.2140105913565837,
      "grad_norm": 3.125,
      "learning_rate": 3.307398147931806e-05,
      "loss": 0.8438,
      "step": 346390
    },
    {
      "epoch": 1.2140456388634795,
      "grad_norm": 2.515625,
      "learning_rate": 3.3073332450654354e-05,
      "loss": 0.7769,
      "step": 346400
    },
    {
      "epoch": 1.214080686370375,
      "grad_norm": 2.671875,
      "learning_rate": 3.3072683421990655e-05,
      "loss": 0.8193,
      "step": 346410
    },
    {
      "epoch": 1.2141157338772706,
      "grad_norm": 2.96875,
      "learning_rate": 3.307203439332695e-05,
      "loss": 0.8893,
      "step": 346420
    },
    {
      "epoch": 1.2141507813841663,
      "grad_norm": 2.515625,
      "learning_rate": 3.307138536466325e-05,
      "loss": 0.8983,
      "step": 346430
    },
    {
      "epoch": 1.2141858288910619,
      "grad_norm": 2.78125,
      "learning_rate": 3.3070736335999546e-05,
      "loss": 0.8695,
      "step": 346440
    },
    {
      "epoch": 1.2142208763979574,
      "grad_norm": 3.234375,
      "learning_rate": 3.307008730733585e-05,
      "loss": 0.8874,
      "step": 346450
    },
    {
      "epoch": 1.2142559239048532,
      "grad_norm": 2.671875,
      "learning_rate": 3.306943827867214e-05,
      "loss": 0.8276,
      "step": 346460
    },
    {
      "epoch": 1.2142909714117487,
      "grad_norm": 2.6875,
      "learning_rate": 3.3068789250008436e-05,
      "loss": 0.9092,
      "step": 346470
    },
    {
      "epoch": 1.2143260189186442,
      "grad_norm": 2.890625,
      "learning_rate": 3.306814022134474e-05,
      "loss": 0.8353,
      "step": 346480
    },
    {
      "epoch": 1.2143610664255398,
      "grad_norm": 2.828125,
      "learning_rate": 3.306749119268103e-05,
      "loss": 0.9506,
      "step": 346490
    },
    {
      "epoch": 1.2143961139324353,
      "grad_norm": 2.71875,
      "learning_rate": 3.3066842164017334e-05,
      "loss": 0.8678,
      "step": 346500
    },
    {
      "epoch": 1.214431161439331,
      "grad_norm": 2.875,
      "learning_rate": 3.306619313535363e-05,
      "loss": 0.8944,
      "step": 346510
    },
    {
      "epoch": 1.2144662089462266,
      "grad_norm": 2.65625,
      "learning_rate": 3.306554410668993e-05,
      "loss": 0.8437,
      "step": 346520
    },
    {
      "epoch": 1.2145012564531221,
      "grad_norm": 3.125,
      "learning_rate": 3.3064895078026224e-05,
      "loss": 0.8773,
      "step": 346530
    },
    {
      "epoch": 1.2145363039600179,
      "grad_norm": 2.671875,
      "learning_rate": 3.3064246049362526e-05,
      "loss": 0.8739,
      "step": 346540
    },
    {
      "epoch": 1.2145713514669134,
      "grad_norm": 3.09375,
      "learning_rate": 3.306359702069882e-05,
      "loss": 0.8612,
      "step": 346550
    },
    {
      "epoch": 1.214606398973809,
      "grad_norm": 3.46875,
      "learning_rate": 3.306294799203512e-05,
      "loss": 0.9044,
      "step": 346560
    },
    {
      "epoch": 1.2146414464807047,
      "grad_norm": 2.78125,
      "learning_rate": 3.3062298963371416e-05,
      "loss": 0.9019,
      "step": 346570
    },
    {
      "epoch": 1.2146764939876002,
      "grad_norm": 2.734375,
      "learning_rate": 3.306164993470772e-05,
      "loss": 0.9587,
      "step": 346580
    },
    {
      "epoch": 1.2147115414944958,
      "grad_norm": 2.71875,
      "learning_rate": 3.306100090604401e-05,
      "loss": 0.8281,
      "step": 346590
    },
    {
      "epoch": 1.2147465890013913,
      "grad_norm": 2.921875,
      "learning_rate": 3.3060351877380314e-05,
      "loss": 0.9359,
      "step": 346600
    },
    {
      "epoch": 1.214781636508287,
      "grad_norm": 2.8125,
      "learning_rate": 3.305970284871661e-05,
      "loss": 0.8093,
      "step": 346610
    },
    {
      "epoch": 1.2148166840151826,
      "grad_norm": 2.703125,
      "learning_rate": 3.305905382005291e-05,
      "loss": 0.8698,
      "step": 346620
    },
    {
      "epoch": 1.2148517315220781,
      "grad_norm": 3.28125,
      "learning_rate": 3.305840479138921e-05,
      "loss": 0.9083,
      "step": 346630
    },
    {
      "epoch": 1.2148867790289737,
      "grad_norm": 2.8125,
      "learning_rate": 3.3057755762725506e-05,
      "loss": 0.809,
      "step": 346640
    },
    {
      "epoch": 1.2149218265358694,
      "grad_norm": 3.078125,
      "learning_rate": 3.305710673406181e-05,
      "loss": 0.8115,
      "step": 346650
    },
    {
      "epoch": 1.214956874042765,
      "grad_norm": 2.96875,
      "learning_rate": 3.30564577053981e-05,
      "loss": 0.8422,
      "step": 346660
    },
    {
      "epoch": 1.2149919215496605,
      "grad_norm": 2.875,
      "learning_rate": 3.30558086767344e-05,
      "loss": 0.8845,
      "step": 346670
    },
    {
      "epoch": 1.2150269690565563,
      "grad_norm": 3.265625,
      "learning_rate": 3.30551596480707e-05,
      "loss": 0.8718,
      "step": 346680
    },
    {
      "epoch": 1.2150620165634518,
      "grad_norm": 4.96875,
      "learning_rate": 3.3054510619407e-05,
      "loss": 0.8322,
      "step": 346690
    },
    {
      "epoch": 1.2150970640703473,
      "grad_norm": 2.484375,
      "learning_rate": 3.3053861590743294e-05,
      "loss": 0.8815,
      "step": 346700
    },
    {
      "epoch": 1.2151321115772429,
      "grad_norm": 2.84375,
      "learning_rate": 3.3053212562079595e-05,
      "loss": 0.7729,
      "step": 346710
    },
    {
      "epoch": 1.2151671590841386,
      "grad_norm": 2.9375,
      "learning_rate": 3.305256353341589e-05,
      "loss": 0.9166,
      "step": 346720
    },
    {
      "epoch": 1.2152022065910342,
      "grad_norm": 2.796875,
      "learning_rate": 3.305191450475219e-05,
      "loss": 0.8966,
      "step": 346730
    },
    {
      "epoch": 1.2152372540979297,
      "grad_norm": 2.796875,
      "learning_rate": 3.3051265476088486e-05,
      "loss": 0.7633,
      "step": 346740
    },
    {
      "epoch": 1.2152723016048252,
      "grad_norm": 2.28125,
      "learning_rate": 3.305061644742479e-05,
      "loss": 0.9092,
      "step": 346750
    },
    {
      "epoch": 1.215307349111721,
      "grad_norm": 3.390625,
      "learning_rate": 3.304996741876109e-05,
      "loss": 0.8565,
      "step": 346760
    },
    {
      "epoch": 1.2153423966186165,
      "grad_norm": 3.078125,
      "learning_rate": 3.304931839009738e-05,
      "loss": 0.8001,
      "step": 346770
    },
    {
      "epoch": 1.215377444125512,
      "grad_norm": 2.53125,
      "learning_rate": 3.3048669361433684e-05,
      "loss": 0.9201,
      "step": 346780
    },
    {
      "epoch": 1.2154124916324078,
      "grad_norm": 2.859375,
      "learning_rate": 3.304802033276998e-05,
      "loss": 0.9199,
      "step": 346790
    },
    {
      "epoch": 1.2154475391393034,
      "grad_norm": 2.859375,
      "learning_rate": 3.304737130410628e-05,
      "loss": 0.8134,
      "step": 346800
    },
    {
      "epoch": 1.215482586646199,
      "grad_norm": 2.765625,
      "learning_rate": 3.3046722275442575e-05,
      "loss": 0.8471,
      "step": 346810
    },
    {
      "epoch": 1.2155176341530944,
      "grad_norm": 3.0,
      "learning_rate": 3.3046073246778876e-05,
      "loss": 0.8633,
      "step": 346820
    },
    {
      "epoch": 1.2155526816599902,
      "grad_norm": 4.3125,
      "learning_rate": 3.304542421811517e-05,
      "loss": 0.8367,
      "step": 346830
    },
    {
      "epoch": 1.2155877291668857,
      "grad_norm": 2.578125,
      "learning_rate": 3.304477518945147e-05,
      "loss": 0.8382,
      "step": 346840
    },
    {
      "epoch": 1.2156227766737813,
      "grad_norm": 2.734375,
      "learning_rate": 3.304412616078777e-05,
      "loss": 0.8765,
      "step": 346850
    },
    {
      "epoch": 1.2156578241806768,
      "grad_norm": 3.9375,
      "learning_rate": 3.304347713212406e-05,
      "loss": 0.9486,
      "step": 346860
    },
    {
      "epoch": 1.2156928716875726,
      "grad_norm": 3.109375,
      "learning_rate": 3.304282810346036e-05,
      "loss": 0.879,
      "step": 346870
    },
    {
      "epoch": 1.215727919194468,
      "grad_norm": 3.109375,
      "learning_rate": 3.304217907479666e-05,
      "loss": 0.8121,
      "step": 346880
    },
    {
      "epoch": 1.2157629667013636,
      "grad_norm": 2.9375,
      "learning_rate": 3.304153004613296e-05,
      "loss": 0.8307,
      "step": 346890
    },
    {
      "epoch": 1.2157980142082594,
      "grad_norm": 3.234375,
      "learning_rate": 3.3040881017469254e-05,
      "loss": 0.8542,
      "step": 346900
    },
    {
      "epoch": 1.215833061715155,
      "grad_norm": 3.203125,
      "learning_rate": 3.3040231988805555e-05,
      "loss": 0.8914,
      "step": 346910
    },
    {
      "epoch": 1.2158681092220505,
      "grad_norm": 2.875,
      "learning_rate": 3.303958296014185e-05,
      "loss": 0.8674,
      "step": 346920
    },
    {
      "epoch": 1.215903156728946,
      "grad_norm": 3.140625,
      "learning_rate": 3.303893393147815e-05,
      "loss": 0.7812,
      "step": 346930
    },
    {
      "epoch": 1.2159382042358418,
      "grad_norm": 2.96875,
      "learning_rate": 3.3038284902814446e-05,
      "loss": 0.8317,
      "step": 346940
    },
    {
      "epoch": 1.2159732517427373,
      "grad_norm": 2.75,
      "learning_rate": 3.303763587415075e-05,
      "loss": 0.8443,
      "step": 346950
    },
    {
      "epoch": 1.2160082992496328,
      "grad_norm": 2.734375,
      "learning_rate": 3.303698684548704e-05,
      "loss": 0.7602,
      "step": 346960
    },
    {
      "epoch": 1.2160433467565284,
      "grad_norm": 3.125,
      "learning_rate": 3.303633781682334e-05,
      "loss": 0.9121,
      "step": 346970
    },
    {
      "epoch": 1.2160783942634241,
      "grad_norm": 2.65625,
      "learning_rate": 3.3035688788159644e-05,
      "loss": 0.8962,
      "step": 346980
    },
    {
      "epoch": 1.2161134417703197,
      "grad_norm": 3.0625,
      "learning_rate": 3.303503975949594e-05,
      "loss": 0.9154,
      "step": 346990
    },
    {
      "epoch": 1.2161484892772152,
      "grad_norm": 3.015625,
      "learning_rate": 3.303439073083224e-05,
      "loss": 0.9129,
      "step": 347000
    },
    {
      "epoch": 1.216183536784111,
      "grad_norm": 2.609375,
      "learning_rate": 3.3033741702168535e-05,
      "loss": 0.8174,
      "step": 347010
    },
    {
      "epoch": 1.2162185842910065,
      "grad_norm": 2.859375,
      "learning_rate": 3.3033092673504836e-05,
      "loss": 0.8772,
      "step": 347020
    },
    {
      "epoch": 1.216253631797902,
      "grad_norm": 3.21875,
      "learning_rate": 3.303244364484113e-05,
      "loss": 0.9421,
      "step": 347030
    },
    {
      "epoch": 1.2162886793047976,
      "grad_norm": 2.953125,
      "learning_rate": 3.303179461617743e-05,
      "loss": 0.8876,
      "step": 347040
    },
    {
      "epoch": 1.2163237268116933,
      "grad_norm": 2.421875,
      "learning_rate": 3.303114558751373e-05,
      "loss": 0.7844,
      "step": 347050
    },
    {
      "epoch": 1.2163587743185889,
      "grad_norm": 2.859375,
      "learning_rate": 3.303049655885003e-05,
      "loss": 0.8269,
      "step": 347060
    },
    {
      "epoch": 1.2163938218254844,
      "grad_norm": 3.296875,
      "learning_rate": 3.302984753018632e-05,
      "loss": 0.8112,
      "step": 347070
    },
    {
      "epoch": 1.21642886933238,
      "grad_norm": 3.15625,
      "learning_rate": 3.3029198501522624e-05,
      "loss": 0.802,
      "step": 347080
    },
    {
      "epoch": 1.2164639168392757,
      "grad_norm": 3.0,
      "learning_rate": 3.302854947285892e-05,
      "loss": 0.9369,
      "step": 347090
    },
    {
      "epoch": 1.2164989643461712,
      "grad_norm": 2.78125,
      "learning_rate": 3.302790044419522e-05,
      "loss": 0.8542,
      "step": 347100
    },
    {
      "epoch": 1.2165340118530668,
      "grad_norm": 2.84375,
      "learning_rate": 3.3027251415531515e-05,
      "loss": 0.8351,
      "step": 347110
    },
    {
      "epoch": 1.2165690593599625,
      "grad_norm": 2.90625,
      "learning_rate": 3.3026602386867816e-05,
      "loss": 0.8946,
      "step": 347120
    },
    {
      "epoch": 1.216604106866858,
      "grad_norm": 2.578125,
      "learning_rate": 3.302595335820412e-05,
      "loss": 0.8766,
      "step": 347130
    },
    {
      "epoch": 1.2166391543737536,
      "grad_norm": 2.828125,
      "learning_rate": 3.302530432954041e-05,
      "loss": 0.8523,
      "step": 347140
    },
    {
      "epoch": 1.2166742018806493,
      "grad_norm": 3.15625,
      "learning_rate": 3.3024655300876714e-05,
      "loss": 0.8085,
      "step": 347150
    },
    {
      "epoch": 1.2167092493875449,
      "grad_norm": 2.71875,
      "learning_rate": 3.302400627221301e-05,
      "loss": 0.8372,
      "step": 347160
    },
    {
      "epoch": 1.2167442968944404,
      "grad_norm": 2.703125,
      "learning_rate": 3.302335724354931e-05,
      "loss": 0.8555,
      "step": 347170
    },
    {
      "epoch": 1.216779344401336,
      "grad_norm": 2.984375,
      "learning_rate": 3.3022708214885604e-05,
      "loss": 0.8869,
      "step": 347180
    },
    {
      "epoch": 1.2168143919082315,
      "grad_norm": 3.109375,
      "learning_rate": 3.3022059186221906e-05,
      "loss": 0.7712,
      "step": 347190
    },
    {
      "epoch": 1.2168494394151272,
      "grad_norm": 3.15625,
      "learning_rate": 3.30214101575582e-05,
      "loss": 0.8725,
      "step": 347200
    },
    {
      "epoch": 1.2168844869220228,
      "grad_norm": 2.328125,
      "learning_rate": 3.30207611288945e-05,
      "loss": 0.8722,
      "step": 347210
    },
    {
      "epoch": 1.2169195344289183,
      "grad_norm": 2.203125,
      "learning_rate": 3.3020112100230796e-05,
      "loss": 0.8832,
      "step": 347220
    },
    {
      "epoch": 1.216954581935814,
      "grad_norm": 3.0625,
      "learning_rate": 3.301946307156709e-05,
      "loss": 0.7844,
      "step": 347230
    },
    {
      "epoch": 1.2169896294427096,
      "grad_norm": 3.03125,
      "learning_rate": 3.301881404290339e-05,
      "loss": 0.8306,
      "step": 347240
    },
    {
      "epoch": 1.2170246769496051,
      "grad_norm": 3.265625,
      "learning_rate": 3.301816501423969e-05,
      "loss": 0.8898,
      "step": 347250
    },
    {
      "epoch": 1.217059724456501,
      "grad_norm": 2.890625,
      "learning_rate": 3.301751598557599e-05,
      "loss": 0.8165,
      "step": 347260
    },
    {
      "epoch": 1.2170947719633964,
      "grad_norm": 3.109375,
      "learning_rate": 3.301686695691228e-05,
      "loss": 0.8538,
      "step": 347270
    },
    {
      "epoch": 1.217129819470292,
      "grad_norm": 3.0,
      "learning_rate": 3.3016217928248584e-05,
      "loss": 0.8808,
      "step": 347280
    },
    {
      "epoch": 1.2171648669771875,
      "grad_norm": 2.78125,
      "learning_rate": 3.301556889958488e-05,
      "loss": 0.844,
      "step": 347290
    },
    {
      "epoch": 1.2171999144840833,
      "grad_norm": 2.890625,
      "learning_rate": 3.301491987092118e-05,
      "loss": 0.9101,
      "step": 347300
    },
    {
      "epoch": 1.2172349619909788,
      "grad_norm": 2.640625,
      "learning_rate": 3.3014270842257475e-05,
      "loss": 0.8157,
      "step": 347310
    },
    {
      "epoch": 1.2172700094978743,
      "grad_norm": 3.046875,
      "learning_rate": 3.3013621813593776e-05,
      "loss": 0.8807,
      "step": 347320
    },
    {
      "epoch": 1.2173050570047699,
      "grad_norm": 2.609375,
      "learning_rate": 3.301297278493007e-05,
      "loss": 0.8774,
      "step": 347330
    },
    {
      "epoch": 1.2173401045116656,
      "grad_norm": 2.8125,
      "learning_rate": 3.301232375626637e-05,
      "loss": 0.8731,
      "step": 347340
    },
    {
      "epoch": 1.2173751520185612,
      "grad_norm": 3.03125,
      "learning_rate": 3.3011674727602674e-05,
      "loss": 0.8735,
      "step": 347350
    },
    {
      "epoch": 1.2174101995254567,
      "grad_norm": 2.890625,
      "learning_rate": 3.301102569893897e-05,
      "loss": 0.8427,
      "step": 347360
    },
    {
      "epoch": 1.2174452470323525,
      "grad_norm": 2.34375,
      "learning_rate": 3.301037667027527e-05,
      "loss": 0.867,
      "step": 347370
    },
    {
      "epoch": 1.217480294539248,
      "grad_norm": 2.90625,
      "learning_rate": 3.3009727641611564e-05,
      "loss": 0.934,
      "step": 347380
    },
    {
      "epoch": 1.2175153420461435,
      "grad_norm": 3.140625,
      "learning_rate": 3.3009078612947866e-05,
      "loss": 0.8734,
      "step": 347390
    },
    {
      "epoch": 1.217550389553039,
      "grad_norm": 2.625,
      "learning_rate": 3.300842958428416e-05,
      "loss": 0.8949,
      "step": 347400
    },
    {
      "epoch": 1.2175854370599348,
      "grad_norm": 3.15625,
      "learning_rate": 3.300778055562046e-05,
      "loss": 0.8942,
      "step": 347410
    },
    {
      "epoch": 1.2176204845668304,
      "grad_norm": 2.984375,
      "learning_rate": 3.3007131526956756e-05,
      "loss": 0.8731,
      "step": 347420
    },
    {
      "epoch": 1.217655532073726,
      "grad_norm": 3.046875,
      "learning_rate": 3.300648249829306e-05,
      "loss": 0.8977,
      "step": 347430
    },
    {
      "epoch": 1.2176905795806214,
      "grad_norm": 2.71875,
      "learning_rate": 3.300583346962935e-05,
      "loss": 0.9367,
      "step": 347440
    },
    {
      "epoch": 1.2177256270875172,
      "grad_norm": 2.71875,
      "learning_rate": 3.3005184440965654e-05,
      "loss": 0.8553,
      "step": 347450
    },
    {
      "epoch": 1.2177606745944127,
      "grad_norm": 3.078125,
      "learning_rate": 3.300453541230195e-05,
      "loss": 0.8032,
      "step": 347460
    },
    {
      "epoch": 1.2177957221013083,
      "grad_norm": 2.96875,
      "learning_rate": 3.300388638363825e-05,
      "loss": 0.8063,
      "step": 347470
    },
    {
      "epoch": 1.217830769608204,
      "grad_norm": 2.828125,
      "learning_rate": 3.3003237354974544e-05,
      "loss": 0.8584,
      "step": 347480
    },
    {
      "epoch": 1.2178658171150996,
      "grad_norm": 2.9375,
      "learning_rate": 3.3002588326310846e-05,
      "loss": 0.9022,
      "step": 347490
    },
    {
      "epoch": 1.217900864621995,
      "grad_norm": 3.15625,
      "learning_rate": 3.300193929764715e-05,
      "loss": 0.8279,
      "step": 347500
    },
    {
      "epoch": 1.2179359121288906,
      "grad_norm": 2.984375,
      "learning_rate": 3.300129026898344e-05,
      "loss": 0.9062,
      "step": 347510
    },
    {
      "epoch": 1.2179709596357864,
      "grad_norm": 3.234375,
      "learning_rate": 3.300064124031974e-05,
      "loss": 0.8933,
      "step": 347520
    },
    {
      "epoch": 1.218006007142682,
      "grad_norm": 2.515625,
      "learning_rate": 3.299999221165604e-05,
      "loss": 0.8703,
      "step": 347530
    },
    {
      "epoch": 1.2180410546495775,
      "grad_norm": 3.375,
      "learning_rate": 3.299934318299234e-05,
      "loss": 0.862,
      "step": 347540
    },
    {
      "epoch": 1.218076102156473,
      "grad_norm": 3.03125,
      "learning_rate": 3.2998694154328634e-05,
      "loss": 0.8553,
      "step": 347550
    },
    {
      "epoch": 1.2181111496633688,
      "grad_norm": 2.390625,
      "learning_rate": 3.2998045125664935e-05,
      "loss": 0.8382,
      "step": 347560
    },
    {
      "epoch": 1.2181461971702643,
      "grad_norm": 2.796875,
      "learning_rate": 3.299739609700123e-05,
      "loss": 0.8432,
      "step": 347570
    },
    {
      "epoch": 1.2181812446771598,
      "grad_norm": 3.0625,
      "learning_rate": 3.299674706833753e-05,
      "loss": 0.9391,
      "step": 347580
    },
    {
      "epoch": 1.2182162921840556,
      "grad_norm": 3.46875,
      "learning_rate": 3.2996098039673826e-05,
      "loss": 0.8647,
      "step": 347590
    },
    {
      "epoch": 1.2182513396909511,
      "grad_norm": 3.09375,
      "learning_rate": 3.299544901101012e-05,
      "loss": 0.861,
      "step": 347600
    },
    {
      "epoch": 1.2182863871978467,
      "grad_norm": 2.984375,
      "learning_rate": 3.299479998234642e-05,
      "loss": 0.872,
      "step": 347610
    },
    {
      "epoch": 1.2183214347047422,
      "grad_norm": 2.78125,
      "learning_rate": 3.2994150953682716e-05,
      "loss": 0.7955,
      "step": 347620
    },
    {
      "epoch": 1.218356482211638,
      "grad_norm": 2.609375,
      "learning_rate": 3.299350192501902e-05,
      "loss": 0.7719,
      "step": 347630
    },
    {
      "epoch": 1.2183915297185335,
      "grad_norm": 3.03125,
      "learning_rate": 3.299285289635531e-05,
      "loss": 0.818,
      "step": 347640
    },
    {
      "epoch": 1.218426577225429,
      "grad_norm": 3.109375,
      "learning_rate": 3.2992203867691614e-05,
      "loss": 0.8408,
      "step": 347650
    },
    {
      "epoch": 1.2184616247323246,
      "grad_norm": 3.765625,
      "learning_rate": 3.299155483902791e-05,
      "loss": 0.8406,
      "step": 347660
    },
    {
      "epoch": 1.2184966722392203,
      "grad_norm": 2.953125,
      "learning_rate": 3.299090581036421e-05,
      "loss": 0.8012,
      "step": 347670
    },
    {
      "epoch": 1.2185317197461158,
      "grad_norm": 2.828125,
      "learning_rate": 3.2990256781700504e-05,
      "loss": 0.935,
      "step": 347680
    },
    {
      "epoch": 1.2185667672530114,
      "grad_norm": 2.84375,
      "learning_rate": 3.2989607753036806e-05,
      "loss": 0.9611,
      "step": 347690
    },
    {
      "epoch": 1.2186018147599071,
      "grad_norm": 2.953125,
      "learning_rate": 3.29889587243731e-05,
      "loss": 0.8197,
      "step": 347700
    },
    {
      "epoch": 1.2186368622668027,
      "grad_norm": 2.9375,
      "learning_rate": 3.29883096957094e-05,
      "loss": 0.7849,
      "step": 347710
    },
    {
      "epoch": 1.2186719097736982,
      "grad_norm": 2.78125,
      "learning_rate": 3.29876606670457e-05,
      "loss": 0.8776,
      "step": 347720
    },
    {
      "epoch": 1.2187069572805938,
      "grad_norm": 2.78125,
      "learning_rate": 3.2987011638382e-05,
      "loss": 0.7805,
      "step": 347730
    },
    {
      "epoch": 1.2187420047874895,
      "grad_norm": 2.59375,
      "learning_rate": 3.29863626097183e-05,
      "loss": 0.8035,
      "step": 347740
    },
    {
      "epoch": 1.218777052294385,
      "grad_norm": 2.84375,
      "learning_rate": 3.2985713581054594e-05,
      "loss": 0.7995,
      "step": 347750
    },
    {
      "epoch": 1.2188120998012806,
      "grad_norm": 3.203125,
      "learning_rate": 3.2985064552390895e-05,
      "loss": 0.914,
      "step": 347760
    },
    {
      "epoch": 1.2188471473081761,
      "grad_norm": 2.90625,
      "learning_rate": 3.298441552372719e-05,
      "loss": 0.8714,
      "step": 347770
    },
    {
      "epoch": 1.2188821948150719,
      "grad_norm": 3.390625,
      "learning_rate": 3.298376649506349e-05,
      "loss": 0.9116,
      "step": 347780
    },
    {
      "epoch": 1.2189172423219674,
      "grad_norm": 3.03125,
      "learning_rate": 3.2983117466399786e-05,
      "loss": 0.8878,
      "step": 347790
    },
    {
      "epoch": 1.218952289828863,
      "grad_norm": 2.515625,
      "learning_rate": 3.298246843773609e-05,
      "loss": 0.8269,
      "step": 347800
    },
    {
      "epoch": 1.2189873373357587,
      "grad_norm": 2.859375,
      "learning_rate": 3.298181940907238e-05,
      "loss": 0.9083,
      "step": 347810
    },
    {
      "epoch": 1.2190223848426542,
      "grad_norm": 2.796875,
      "learning_rate": 3.298117038040868e-05,
      "loss": 0.8465,
      "step": 347820
    },
    {
      "epoch": 1.2190574323495498,
      "grad_norm": 2.703125,
      "learning_rate": 3.298052135174498e-05,
      "loss": 0.8665,
      "step": 347830
    },
    {
      "epoch": 1.2190924798564455,
      "grad_norm": 3.15625,
      "learning_rate": 3.297987232308128e-05,
      "loss": 0.8863,
      "step": 347840
    },
    {
      "epoch": 1.219127527363341,
      "grad_norm": 2.578125,
      "learning_rate": 3.2979223294417574e-05,
      "loss": 0.827,
      "step": 347850
    },
    {
      "epoch": 1.2191625748702366,
      "grad_norm": 3.15625,
      "learning_rate": 3.2978574265753875e-05,
      "loss": 0.9045,
      "step": 347860
    },
    {
      "epoch": 1.2191976223771321,
      "grad_norm": 2.640625,
      "learning_rate": 3.2977925237090177e-05,
      "loss": 0.8649,
      "step": 347870
    },
    {
      "epoch": 1.2192326698840277,
      "grad_norm": 2.875,
      "learning_rate": 3.297727620842647e-05,
      "loss": 0.9,
      "step": 347880
    },
    {
      "epoch": 1.2192677173909234,
      "grad_norm": 2.484375,
      "learning_rate": 3.297662717976277e-05,
      "loss": 0.8774,
      "step": 347890
    },
    {
      "epoch": 1.219302764897819,
      "grad_norm": 2.78125,
      "learning_rate": 3.297597815109907e-05,
      "loss": 0.8448,
      "step": 347900
    },
    {
      "epoch": 1.2193378124047145,
      "grad_norm": 3.265625,
      "learning_rate": 3.297532912243537e-05,
      "loss": 0.8697,
      "step": 347910
    },
    {
      "epoch": 1.2193728599116103,
      "grad_norm": 3.15625,
      "learning_rate": 3.297468009377166e-05,
      "loss": 0.8324,
      "step": 347920
    },
    {
      "epoch": 1.2194079074185058,
      "grad_norm": 3.3125,
      "learning_rate": 3.2974031065107965e-05,
      "loss": 0.8677,
      "step": 347930
    },
    {
      "epoch": 1.2194429549254013,
      "grad_norm": 3.1875,
      "learning_rate": 3.297338203644426e-05,
      "loss": 0.8432,
      "step": 347940
    },
    {
      "epoch": 1.219478002432297,
      "grad_norm": 2.578125,
      "learning_rate": 3.297273300778056e-05,
      "loss": 0.891,
      "step": 347950
    },
    {
      "epoch": 1.2195130499391926,
      "grad_norm": 2.546875,
      "learning_rate": 3.2972083979116855e-05,
      "loss": 0.8826,
      "step": 347960
    },
    {
      "epoch": 1.2195480974460882,
      "grad_norm": 2.90625,
      "learning_rate": 3.2971434950453157e-05,
      "loss": 0.8134,
      "step": 347970
    },
    {
      "epoch": 1.2195831449529837,
      "grad_norm": 2.765625,
      "learning_rate": 3.297078592178945e-05,
      "loss": 0.7898,
      "step": 347980
    },
    {
      "epoch": 1.2196181924598795,
      "grad_norm": 2.765625,
      "learning_rate": 3.2970136893125746e-05,
      "loss": 0.8619,
      "step": 347990
    },
    {
      "epoch": 1.219653239966775,
      "grad_norm": 2.8125,
      "learning_rate": 3.296948786446205e-05,
      "loss": 0.8459,
      "step": 348000
    },
    {
      "epoch": 1.2196882874736705,
      "grad_norm": 2.453125,
      "learning_rate": 3.296883883579834e-05,
      "loss": 0.8485,
      "step": 348010
    },
    {
      "epoch": 1.219723334980566,
      "grad_norm": 2.9375,
      "learning_rate": 3.296818980713464e-05,
      "loss": 0.8424,
      "step": 348020
    },
    {
      "epoch": 1.2197583824874618,
      "grad_norm": 3.03125,
      "learning_rate": 3.296754077847094e-05,
      "loss": 0.8695,
      "step": 348030
    },
    {
      "epoch": 1.2197934299943574,
      "grad_norm": 2.4375,
      "learning_rate": 3.296689174980724e-05,
      "loss": 0.757,
      "step": 348040
    },
    {
      "epoch": 1.219828477501253,
      "grad_norm": 2.71875,
      "learning_rate": 3.2966242721143534e-05,
      "loss": 0.874,
      "step": 348050
    },
    {
      "epoch": 1.2198635250081487,
      "grad_norm": 2.890625,
      "learning_rate": 3.2965593692479835e-05,
      "loss": 0.8224,
      "step": 348060
    },
    {
      "epoch": 1.2198985725150442,
      "grad_norm": 2.859375,
      "learning_rate": 3.296494466381613e-05,
      "loss": 0.8431,
      "step": 348070
    },
    {
      "epoch": 1.2199336200219397,
      "grad_norm": 3.015625,
      "learning_rate": 3.296429563515243e-05,
      "loss": 0.8398,
      "step": 348080
    },
    {
      "epoch": 1.2199686675288353,
      "grad_norm": 2.609375,
      "learning_rate": 3.296364660648873e-05,
      "loss": 0.8204,
      "step": 348090
    },
    {
      "epoch": 1.220003715035731,
      "grad_norm": 2.625,
      "learning_rate": 3.296299757782503e-05,
      "loss": 0.9109,
      "step": 348100
    },
    {
      "epoch": 1.2200387625426266,
      "grad_norm": 3.03125,
      "learning_rate": 3.296234854916133e-05,
      "loss": 0.8293,
      "step": 348110
    },
    {
      "epoch": 1.220073810049522,
      "grad_norm": 2.5625,
      "learning_rate": 3.296169952049762e-05,
      "loss": 0.8934,
      "step": 348120
    },
    {
      "epoch": 1.2201088575564176,
      "grad_norm": 2.765625,
      "learning_rate": 3.2961050491833925e-05,
      "loss": 0.8566,
      "step": 348130
    },
    {
      "epoch": 1.2201439050633134,
      "grad_norm": 2.453125,
      "learning_rate": 3.296040146317022e-05,
      "loss": 0.839,
      "step": 348140
    },
    {
      "epoch": 1.220178952570209,
      "grad_norm": 2.875,
      "learning_rate": 3.295975243450652e-05,
      "loss": 0.8277,
      "step": 348150
    },
    {
      "epoch": 1.2202140000771045,
      "grad_norm": 3.03125,
      "learning_rate": 3.2959103405842815e-05,
      "loss": 0.7747,
      "step": 348160
    },
    {
      "epoch": 1.2202490475840002,
      "grad_norm": 2.703125,
      "learning_rate": 3.2958454377179117e-05,
      "loss": 0.859,
      "step": 348170
    },
    {
      "epoch": 1.2202840950908957,
      "grad_norm": 3.0,
      "learning_rate": 3.295780534851541e-05,
      "loss": 0.8267,
      "step": 348180
    },
    {
      "epoch": 1.2203191425977913,
      "grad_norm": 2.796875,
      "learning_rate": 3.295715631985171e-05,
      "loss": 0.8458,
      "step": 348190
    },
    {
      "epoch": 1.2203541901046868,
      "grad_norm": 3.015625,
      "learning_rate": 3.295650729118801e-05,
      "loss": 0.9488,
      "step": 348200
    },
    {
      "epoch": 1.2203892376115826,
      "grad_norm": 3.296875,
      "learning_rate": 3.295585826252431e-05,
      "loss": 0.9878,
      "step": 348210
    },
    {
      "epoch": 1.2204242851184781,
      "grad_norm": 3.109375,
      "learning_rate": 3.295520923386061e-05,
      "loss": 0.8618,
      "step": 348220
    },
    {
      "epoch": 1.2204593326253736,
      "grad_norm": 2.734375,
      "learning_rate": 3.2954560205196905e-05,
      "loss": 0.7713,
      "step": 348230
    },
    {
      "epoch": 1.2204943801322692,
      "grad_norm": 3.421875,
      "learning_rate": 3.2953911176533206e-05,
      "loss": 0.8771,
      "step": 348240
    },
    {
      "epoch": 1.220529427639165,
      "grad_norm": 2.515625,
      "learning_rate": 3.29532621478695e-05,
      "loss": 0.8661,
      "step": 348250
    },
    {
      "epoch": 1.2205644751460605,
      "grad_norm": 3.171875,
      "learning_rate": 3.29526131192058e-05,
      "loss": 0.9329,
      "step": 348260
    },
    {
      "epoch": 1.220599522652956,
      "grad_norm": 2.96875,
      "learning_rate": 3.2951964090542097e-05,
      "loss": 0.8856,
      "step": 348270
    },
    {
      "epoch": 1.2206345701598518,
      "grad_norm": 3.3125,
      "learning_rate": 3.29513150618784e-05,
      "loss": 0.9402,
      "step": 348280
    },
    {
      "epoch": 1.2206696176667473,
      "grad_norm": 3.109375,
      "learning_rate": 3.295066603321469e-05,
      "loss": 0.916,
      "step": 348290
    },
    {
      "epoch": 1.2207046651736428,
      "grad_norm": 3.1875,
      "learning_rate": 3.2950017004550994e-05,
      "loss": 0.9032,
      "step": 348300
    },
    {
      "epoch": 1.2207397126805384,
      "grad_norm": 2.84375,
      "learning_rate": 3.294936797588729e-05,
      "loss": 0.8004,
      "step": 348310
    },
    {
      "epoch": 1.2207747601874341,
      "grad_norm": 3.109375,
      "learning_rate": 3.294871894722359e-05,
      "loss": 0.7845,
      "step": 348320
    },
    {
      "epoch": 1.2208098076943297,
      "grad_norm": 2.75,
      "learning_rate": 3.2948069918559885e-05,
      "loss": 0.9041,
      "step": 348330
    },
    {
      "epoch": 1.2208448552012252,
      "grad_norm": 2.78125,
      "learning_rate": 3.2947420889896186e-05,
      "loss": 0.7898,
      "step": 348340
    },
    {
      "epoch": 1.2208799027081207,
      "grad_norm": 2.84375,
      "learning_rate": 3.294677186123248e-05,
      "loss": 0.8624,
      "step": 348350
    },
    {
      "epoch": 1.2209149502150165,
      "grad_norm": 2.703125,
      "learning_rate": 3.2946122832568775e-05,
      "loss": 0.8127,
      "step": 348360
    },
    {
      "epoch": 1.220949997721912,
      "grad_norm": 3.4375,
      "learning_rate": 3.2945473803905077e-05,
      "loss": 0.9051,
      "step": 348370
    },
    {
      "epoch": 1.2209850452288076,
      "grad_norm": 3.125,
      "learning_rate": 3.294482477524137e-05,
      "loss": 0.8482,
      "step": 348380
    },
    {
      "epoch": 1.2210200927357033,
      "grad_norm": 2.984375,
      "learning_rate": 3.294417574657767e-05,
      "loss": 0.8868,
      "step": 348390
    },
    {
      "epoch": 1.2210551402425989,
      "grad_norm": 2.984375,
      "learning_rate": 3.294352671791397e-05,
      "loss": 0.8084,
      "step": 348400
    },
    {
      "epoch": 1.2210901877494944,
      "grad_norm": 2.6875,
      "learning_rate": 3.294287768925027e-05,
      "loss": 0.8596,
      "step": 348410
    },
    {
      "epoch": 1.22112523525639,
      "grad_norm": 2.625,
      "learning_rate": 3.294222866058656e-05,
      "loss": 0.8383,
      "step": 348420
    },
    {
      "epoch": 1.2211602827632857,
      "grad_norm": 2.796875,
      "learning_rate": 3.2941579631922865e-05,
      "loss": 0.7981,
      "step": 348430
    },
    {
      "epoch": 1.2211953302701812,
      "grad_norm": 2.78125,
      "learning_rate": 3.294093060325916e-05,
      "loss": 0.7905,
      "step": 348440
    },
    {
      "epoch": 1.2212303777770768,
      "grad_norm": 3.25,
      "learning_rate": 3.294028157459546e-05,
      "loss": 0.9367,
      "step": 348450
    },
    {
      "epoch": 1.2212654252839723,
      "grad_norm": 3.0,
      "learning_rate": 3.293963254593176e-05,
      "loss": 0.8766,
      "step": 348460
    },
    {
      "epoch": 1.221300472790868,
      "grad_norm": 2.703125,
      "learning_rate": 3.2938983517268057e-05,
      "loss": 0.8636,
      "step": 348470
    },
    {
      "epoch": 1.2213355202977636,
      "grad_norm": 3.046875,
      "learning_rate": 3.293833448860436e-05,
      "loss": 0.8069,
      "step": 348480
    },
    {
      "epoch": 1.2213705678046591,
      "grad_norm": 2.78125,
      "learning_rate": 3.293768545994065e-05,
      "loss": 0.7801,
      "step": 348490
    },
    {
      "epoch": 1.221405615311555,
      "grad_norm": 2.8125,
      "learning_rate": 3.2937036431276954e-05,
      "loss": 0.8452,
      "step": 348500
    },
    {
      "epoch": 1.2214406628184504,
      "grad_norm": 2.890625,
      "learning_rate": 3.293638740261325e-05,
      "loss": 0.9524,
      "step": 348510
    },
    {
      "epoch": 1.221475710325346,
      "grad_norm": 2.859375,
      "learning_rate": 3.293573837394955e-05,
      "loss": 0.9011,
      "step": 348520
    },
    {
      "epoch": 1.2215107578322417,
      "grad_norm": 3.234375,
      "learning_rate": 3.2935089345285845e-05,
      "loss": 0.9193,
      "step": 348530
    },
    {
      "epoch": 1.2215458053391373,
      "grad_norm": 3.140625,
      "learning_rate": 3.2934440316622146e-05,
      "loss": 0.9126,
      "step": 348540
    },
    {
      "epoch": 1.2215808528460328,
      "grad_norm": 3.09375,
      "learning_rate": 3.293379128795844e-05,
      "loss": 0.883,
      "step": 348550
    },
    {
      "epoch": 1.2216159003529283,
      "grad_norm": 2.921875,
      "learning_rate": 3.293314225929474e-05,
      "loss": 0.8584,
      "step": 348560
    },
    {
      "epoch": 1.221650947859824,
      "grad_norm": 2.75,
      "learning_rate": 3.2932493230631037e-05,
      "loss": 0.8759,
      "step": 348570
    },
    {
      "epoch": 1.2216859953667196,
      "grad_norm": 2.859375,
      "learning_rate": 3.293184420196734e-05,
      "loss": 0.8686,
      "step": 348580
    },
    {
      "epoch": 1.2217210428736152,
      "grad_norm": 2.703125,
      "learning_rate": 3.293119517330364e-05,
      "loss": 0.8728,
      "step": 348590
    },
    {
      "epoch": 1.2217560903805107,
      "grad_norm": 3.3125,
      "learning_rate": 3.2930546144639934e-05,
      "loss": 0.8272,
      "step": 348600
    },
    {
      "epoch": 1.2217911378874065,
      "grad_norm": 3.234375,
      "learning_rate": 3.2929897115976235e-05,
      "loss": 0.8465,
      "step": 348610
    },
    {
      "epoch": 1.221826185394302,
      "grad_norm": 2.75,
      "learning_rate": 3.292924808731253e-05,
      "loss": 0.8466,
      "step": 348620
    },
    {
      "epoch": 1.2218612329011975,
      "grad_norm": 2.796875,
      "learning_rate": 3.292859905864883e-05,
      "loss": 0.9441,
      "step": 348630
    },
    {
      "epoch": 1.2218962804080933,
      "grad_norm": 3.078125,
      "learning_rate": 3.2927950029985126e-05,
      "loss": 0.9206,
      "step": 348640
    },
    {
      "epoch": 1.2219313279149888,
      "grad_norm": 2.96875,
      "learning_rate": 3.292730100132143e-05,
      "loss": 0.9322,
      "step": 348650
    },
    {
      "epoch": 1.2219663754218844,
      "grad_norm": 2.8125,
      "learning_rate": 3.292665197265772e-05,
      "loss": 0.7722,
      "step": 348660
    },
    {
      "epoch": 1.22200142292878,
      "grad_norm": 2.9375,
      "learning_rate": 3.292600294399402e-05,
      "loss": 0.8458,
      "step": 348670
    },
    {
      "epoch": 1.2220364704356756,
      "grad_norm": 2.6875,
      "learning_rate": 3.292535391533032e-05,
      "loss": 0.9538,
      "step": 348680
    },
    {
      "epoch": 1.2220715179425712,
      "grad_norm": 3.15625,
      "learning_rate": 3.292470488666662e-05,
      "loss": 0.8096,
      "step": 348690
    },
    {
      "epoch": 1.2221065654494667,
      "grad_norm": 2.890625,
      "learning_rate": 3.2924055858002914e-05,
      "loss": 0.8081,
      "step": 348700
    },
    {
      "epoch": 1.2221416129563623,
      "grad_norm": 2.75,
      "learning_rate": 3.2923406829339215e-05,
      "loss": 0.7822,
      "step": 348710
    },
    {
      "epoch": 1.222176660463258,
      "grad_norm": 3.21875,
      "learning_rate": 3.292275780067551e-05,
      "loss": 0.9059,
      "step": 348720
    },
    {
      "epoch": 1.2222117079701535,
      "grad_norm": 3.078125,
      "learning_rate": 3.2922108772011805e-05,
      "loss": 0.8638,
      "step": 348730
    },
    {
      "epoch": 1.222246755477049,
      "grad_norm": 2.703125,
      "learning_rate": 3.2921459743348106e-05,
      "loss": 0.9439,
      "step": 348740
    },
    {
      "epoch": 1.2222818029839448,
      "grad_norm": 2.9375,
      "learning_rate": 3.29208107146844e-05,
      "loss": 0.8086,
      "step": 348750
    },
    {
      "epoch": 1.2223168504908404,
      "grad_norm": 3.375,
      "learning_rate": 3.29201616860207e-05,
      "loss": 0.885,
      "step": 348760
    },
    {
      "epoch": 1.222351897997736,
      "grad_norm": 2.75,
      "learning_rate": 3.2919512657356997e-05,
      "loss": 0.8526,
      "step": 348770
    },
    {
      "epoch": 1.2223869455046315,
      "grad_norm": 3.078125,
      "learning_rate": 3.29188636286933e-05,
      "loss": 0.9093,
      "step": 348780
    },
    {
      "epoch": 1.2224219930115272,
      "grad_norm": 3.078125,
      "learning_rate": 3.291821460002959e-05,
      "loss": 0.8454,
      "step": 348790
    },
    {
      "epoch": 1.2224570405184227,
      "grad_norm": 2.65625,
      "learning_rate": 3.2917565571365894e-05,
      "loss": 0.6773,
      "step": 348800
    },
    {
      "epoch": 1.2224920880253183,
      "grad_norm": 2.453125,
      "learning_rate": 3.291691654270219e-05,
      "loss": 0.8555,
      "step": 348810
    },
    {
      "epoch": 1.2225271355322138,
      "grad_norm": 2.765625,
      "learning_rate": 3.291626751403849e-05,
      "loss": 0.8834,
      "step": 348820
    },
    {
      "epoch": 1.2225621830391096,
      "grad_norm": 3.125,
      "learning_rate": 3.291561848537479e-05,
      "loss": 0.8638,
      "step": 348830
    },
    {
      "epoch": 1.222597230546005,
      "grad_norm": 2.4375,
      "learning_rate": 3.2914969456711086e-05,
      "loss": 0.728,
      "step": 348840
    },
    {
      "epoch": 1.2226322780529006,
      "grad_norm": 3.0,
      "learning_rate": 3.291432042804739e-05,
      "loss": 0.8227,
      "step": 348850
    },
    {
      "epoch": 1.2226673255597964,
      "grad_norm": 2.53125,
      "learning_rate": 3.291367139938368e-05,
      "loss": 0.8529,
      "step": 348860
    },
    {
      "epoch": 1.222702373066692,
      "grad_norm": 2.828125,
      "learning_rate": 3.291302237071998e-05,
      "loss": 0.7928,
      "step": 348870
    },
    {
      "epoch": 1.2227374205735875,
      "grad_norm": 3.21875,
      "learning_rate": 3.291237334205628e-05,
      "loss": 0.8792,
      "step": 348880
    },
    {
      "epoch": 1.222772468080483,
      "grad_norm": 3.03125,
      "learning_rate": 3.291172431339258e-05,
      "loss": 0.8746,
      "step": 348890
    },
    {
      "epoch": 1.2228075155873788,
      "grad_norm": 3.578125,
      "learning_rate": 3.2911075284728874e-05,
      "loss": 0.8593,
      "step": 348900
    },
    {
      "epoch": 1.2228425630942743,
      "grad_norm": 2.9375,
      "learning_rate": 3.2910426256065175e-05,
      "loss": 0.8092,
      "step": 348910
    },
    {
      "epoch": 1.2228776106011698,
      "grad_norm": 2.90625,
      "learning_rate": 3.290977722740147e-05,
      "loss": 0.9481,
      "step": 348920
    },
    {
      "epoch": 1.2229126581080654,
      "grad_norm": 3.296875,
      "learning_rate": 3.290912819873777e-05,
      "loss": 0.8184,
      "step": 348930
    },
    {
      "epoch": 1.2229477056149611,
      "grad_norm": 2.8125,
      "learning_rate": 3.2908479170074066e-05,
      "loss": 0.9511,
      "step": 348940
    },
    {
      "epoch": 1.2229827531218567,
      "grad_norm": 2.90625,
      "learning_rate": 3.290783014141037e-05,
      "loss": 0.8469,
      "step": 348950
    },
    {
      "epoch": 1.2230178006287522,
      "grad_norm": 2.59375,
      "learning_rate": 3.290718111274667e-05,
      "loss": 0.7859,
      "step": 348960
    },
    {
      "epoch": 1.223052848135648,
      "grad_norm": 2.640625,
      "learning_rate": 3.290653208408296e-05,
      "loss": 0.8599,
      "step": 348970
    },
    {
      "epoch": 1.2230878956425435,
      "grad_norm": 2.90625,
      "learning_rate": 3.2905883055419265e-05,
      "loss": 0.8935,
      "step": 348980
    },
    {
      "epoch": 1.223122943149439,
      "grad_norm": 3.296875,
      "learning_rate": 3.290523402675556e-05,
      "loss": 0.8665,
      "step": 348990
    },
    {
      "epoch": 1.2231579906563346,
      "grad_norm": 2.8125,
      "learning_rate": 3.290458499809186e-05,
      "loss": 0.8657,
      "step": 349000
    },
    {
      "epoch": 1.2231930381632303,
      "grad_norm": 2.890625,
      "learning_rate": 3.2903935969428155e-05,
      "loss": 0.8757,
      "step": 349010
    },
    {
      "epoch": 1.2232280856701259,
      "grad_norm": 3.421875,
      "learning_rate": 3.290328694076446e-05,
      "loss": 0.8458,
      "step": 349020
    },
    {
      "epoch": 1.2232631331770214,
      "grad_norm": 2.703125,
      "learning_rate": 3.290263791210075e-05,
      "loss": 0.8628,
      "step": 349030
    },
    {
      "epoch": 1.223298180683917,
      "grad_norm": 2.921875,
      "learning_rate": 3.290198888343705e-05,
      "loss": 0.8917,
      "step": 349040
    },
    {
      "epoch": 1.2233332281908127,
      "grad_norm": 3.015625,
      "learning_rate": 3.290133985477335e-05,
      "loss": 0.847,
      "step": 349050
    },
    {
      "epoch": 1.2233682756977082,
      "grad_norm": 2.953125,
      "learning_rate": 3.290069082610965e-05,
      "loss": 0.8555,
      "step": 349060
    },
    {
      "epoch": 1.2234033232046038,
      "grad_norm": 2.953125,
      "learning_rate": 3.290004179744594e-05,
      "loss": 0.8406,
      "step": 349070
    },
    {
      "epoch": 1.2234383707114995,
      "grad_norm": 3.109375,
      "learning_rate": 3.2899392768782245e-05,
      "loss": 0.8814,
      "step": 349080
    },
    {
      "epoch": 1.223473418218395,
      "grad_norm": 2.5,
      "learning_rate": 3.2898743740118546e-05,
      "loss": 0.8309,
      "step": 349090
    },
    {
      "epoch": 1.2235084657252906,
      "grad_norm": 3.328125,
      "learning_rate": 3.289809471145484e-05,
      "loss": 0.9054,
      "step": 349100
    },
    {
      "epoch": 1.2235435132321864,
      "grad_norm": 2.921875,
      "learning_rate": 3.2897445682791135e-05,
      "loss": 0.8281,
      "step": 349110
    },
    {
      "epoch": 1.2235785607390819,
      "grad_norm": 2.953125,
      "learning_rate": 3.289679665412743e-05,
      "loss": 0.8104,
      "step": 349120
    },
    {
      "epoch": 1.2236136082459774,
      "grad_norm": 3.0625,
      "learning_rate": 3.289614762546373e-05,
      "loss": 0.8851,
      "step": 349130
    },
    {
      "epoch": 1.223648655752873,
      "grad_norm": 2.90625,
      "learning_rate": 3.2895498596800026e-05,
      "loss": 0.9045,
      "step": 349140
    },
    {
      "epoch": 1.2236837032597685,
      "grad_norm": 3.015625,
      "learning_rate": 3.289484956813633e-05,
      "loss": 0.8972,
      "step": 349150
    },
    {
      "epoch": 1.2237187507666643,
      "grad_norm": 2.75,
      "learning_rate": 3.289420053947262e-05,
      "loss": 0.8813,
      "step": 349160
    },
    {
      "epoch": 1.2237537982735598,
      "grad_norm": 3.15625,
      "learning_rate": 3.289355151080892e-05,
      "loss": 0.8485,
      "step": 349170
    },
    {
      "epoch": 1.2237888457804553,
      "grad_norm": 2.265625,
      "learning_rate": 3.2892902482145225e-05,
      "loss": 0.8365,
      "step": 349180
    },
    {
      "epoch": 1.223823893287351,
      "grad_norm": 2.71875,
      "learning_rate": 3.289225345348152e-05,
      "loss": 0.8888,
      "step": 349190
    },
    {
      "epoch": 1.2238589407942466,
      "grad_norm": 2.546875,
      "learning_rate": 3.289160442481782e-05,
      "loss": 0.7782,
      "step": 349200
    },
    {
      "epoch": 1.2238939883011422,
      "grad_norm": 3.21875,
      "learning_rate": 3.2890955396154115e-05,
      "loss": 0.7897,
      "step": 349210
    },
    {
      "epoch": 1.223929035808038,
      "grad_norm": 2.828125,
      "learning_rate": 3.289030636749042e-05,
      "loss": 0.8,
      "step": 349220
    },
    {
      "epoch": 1.2239640833149334,
      "grad_norm": 2.875,
      "learning_rate": 3.288965733882671e-05,
      "loss": 0.7861,
      "step": 349230
    },
    {
      "epoch": 1.223999130821829,
      "grad_norm": 2.984375,
      "learning_rate": 3.288900831016301e-05,
      "loss": 0.8441,
      "step": 349240
    },
    {
      "epoch": 1.2240341783287245,
      "grad_norm": 2.71875,
      "learning_rate": 3.288835928149931e-05,
      "loss": 0.8986,
      "step": 349250
    },
    {
      "epoch": 1.2240692258356203,
      "grad_norm": 3.109375,
      "learning_rate": 3.288771025283561e-05,
      "loss": 0.8147,
      "step": 349260
    },
    {
      "epoch": 1.2241042733425158,
      "grad_norm": 3.40625,
      "learning_rate": 3.28870612241719e-05,
      "loss": 0.9378,
      "step": 349270
    },
    {
      "epoch": 1.2241393208494113,
      "grad_norm": 2.671875,
      "learning_rate": 3.2886412195508205e-05,
      "loss": 0.8985,
      "step": 349280
    },
    {
      "epoch": 1.2241743683563069,
      "grad_norm": 2.640625,
      "learning_rate": 3.28857631668445e-05,
      "loss": 0.8375,
      "step": 349290
    },
    {
      "epoch": 1.2242094158632026,
      "grad_norm": 2.8125,
      "learning_rate": 3.28851141381808e-05,
      "loss": 0.7716,
      "step": 349300
    },
    {
      "epoch": 1.2242444633700982,
      "grad_norm": 2.921875,
      "learning_rate": 3.2884465109517095e-05,
      "loss": 0.9069,
      "step": 349310
    },
    {
      "epoch": 1.2242795108769937,
      "grad_norm": 2.828125,
      "learning_rate": 3.28838160808534e-05,
      "loss": 0.8683,
      "step": 349320
    },
    {
      "epoch": 1.2243145583838895,
      "grad_norm": 3.390625,
      "learning_rate": 3.28831670521897e-05,
      "loss": 0.8031,
      "step": 349330
    },
    {
      "epoch": 1.224349605890785,
      "grad_norm": 2.84375,
      "learning_rate": 3.288251802352599e-05,
      "loss": 0.8523,
      "step": 349340
    },
    {
      "epoch": 1.2243846533976805,
      "grad_norm": 2.4375,
      "learning_rate": 3.2881868994862294e-05,
      "loss": 0.8163,
      "step": 349350
    },
    {
      "epoch": 1.224419700904576,
      "grad_norm": 3.140625,
      "learning_rate": 3.288121996619859e-05,
      "loss": 0.8844,
      "step": 349360
    },
    {
      "epoch": 1.2244547484114718,
      "grad_norm": 3.328125,
      "learning_rate": 3.288057093753489e-05,
      "loss": 0.9624,
      "step": 349370
    },
    {
      "epoch": 1.2244897959183674,
      "grad_norm": 2.859375,
      "learning_rate": 3.2879921908871185e-05,
      "loss": 0.8628,
      "step": 349380
    },
    {
      "epoch": 1.224524843425263,
      "grad_norm": 2.90625,
      "learning_rate": 3.2879272880207486e-05,
      "loss": 0.8634,
      "step": 349390
    },
    {
      "epoch": 1.2245598909321584,
      "grad_norm": 2.8125,
      "learning_rate": 3.287862385154378e-05,
      "loss": 0.8793,
      "step": 349400
    },
    {
      "epoch": 1.2245949384390542,
      "grad_norm": 3.046875,
      "learning_rate": 3.287797482288008e-05,
      "loss": 0.9156,
      "step": 349410
    },
    {
      "epoch": 1.2246299859459497,
      "grad_norm": 2.890625,
      "learning_rate": 3.287732579421638e-05,
      "loss": 0.9087,
      "step": 349420
    },
    {
      "epoch": 1.2246650334528453,
      "grad_norm": 2.8125,
      "learning_rate": 3.287667676555268e-05,
      "loss": 0.8376,
      "step": 349430
    },
    {
      "epoch": 1.224700080959741,
      "grad_norm": 2.671875,
      "learning_rate": 3.287602773688897e-05,
      "loss": 0.8305,
      "step": 349440
    },
    {
      "epoch": 1.2247351284666366,
      "grad_norm": 2.734375,
      "learning_rate": 3.2875378708225274e-05,
      "loss": 0.849,
      "step": 349450
    },
    {
      "epoch": 1.224770175973532,
      "grad_norm": 3.0625,
      "learning_rate": 3.2874729679561576e-05,
      "loss": 0.9155,
      "step": 349460
    },
    {
      "epoch": 1.2248052234804276,
      "grad_norm": 2.859375,
      "learning_rate": 3.287408065089787e-05,
      "loss": 0.858,
      "step": 349470
    },
    {
      "epoch": 1.2248402709873234,
      "grad_norm": 3.171875,
      "learning_rate": 3.2873431622234165e-05,
      "loss": 0.8405,
      "step": 349480
    },
    {
      "epoch": 1.224875318494219,
      "grad_norm": 2.953125,
      "learning_rate": 3.287278259357046e-05,
      "loss": 0.8258,
      "step": 349490
    },
    {
      "epoch": 1.2249103660011145,
      "grad_norm": 3.015625,
      "learning_rate": 3.287213356490676e-05,
      "loss": 0.8418,
      "step": 349500
    },
    {
      "epoch": 1.22494541350801,
      "grad_norm": 2.53125,
      "learning_rate": 3.2871484536243055e-05,
      "loss": 0.8664,
      "step": 349510
    },
    {
      "epoch": 1.2249804610149058,
      "grad_norm": 3.171875,
      "learning_rate": 3.287083550757936e-05,
      "loss": 0.8467,
      "step": 349520
    },
    {
      "epoch": 1.2250155085218013,
      "grad_norm": 3.15625,
      "learning_rate": 3.287018647891565e-05,
      "loss": 0.842,
      "step": 349530
    },
    {
      "epoch": 1.2250505560286968,
      "grad_norm": 3.140625,
      "learning_rate": 3.286953745025195e-05,
      "loss": 0.892,
      "step": 349540
    },
    {
      "epoch": 1.2250856035355926,
      "grad_norm": 2.96875,
      "learning_rate": 3.2868888421588254e-05,
      "loss": 0.8084,
      "step": 349550
    },
    {
      "epoch": 1.2251206510424881,
      "grad_norm": 2.828125,
      "learning_rate": 3.286823939292455e-05,
      "loss": 0.8009,
      "step": 349560
    },
    {
      "epoch": 1.2251556985493837,
      "grad_norm": 3.140625,
      "learning_rate": 3.286759036426085e-05,
      "loss": 0.8286,
      "step": 349570
    },
    {
      "epoch": 1.2251907460562792,
      "grad_norm": 2.5,
      "learning_rate": 3.2866941335597145e-05,
      "loss": 0.8792,
      "step": 349580
    },
    {
      "epoch": 1.225225793563175,
      "grad_norm": 2.96875,
      "learning_rate": 3.2866292306933446e-05,
      "loss": 0.8891,
      "step": 349590
    },
    {
      "epoch": 1.2252608410700705,
      "grad_norm": 2.921875,
      "learning_rate": 3.286564327826974e-05,
      "loss": 0.9113,
      "step": 349600
    },
    {
      "epoch": 1.225295888576966,
      "grad_norm": 2.71875,
      "learning_rate": 3.286499424960604e-05,
      "loss": 0.8737,
      "step": 349610
    },
    {
      "epoch": 1.2253309360838616,
      "grad_norm": 3.109375,
      "learning_rate": 3.286434522094234e-05,
      "loss": 0.9319,
      "step": 349620
    },
    {
      "epoch": 1.2253659835907573,
      "grad_norm": 2.5,
      "learning_rate": 3.286369619227864e-05,
      "loss": 0.8333,
      "step": 349630
    },
    {
      "epoch": 1.2254010310976529,
      "grad_norm": 3.046875,
      "learning_rate": 3.286304716361493e-05,
      "loss": 0.8007,
      "step": 349640
    },
    {
      "epoch": 1.2254360786045484,
      "grad_norm": 3.0,
      "learning_rate": 3.2862398134951234e-05,
      "loss": 0.8309,
      "step": 349650
    },
    {
      "epoch": 1.2254711261114442,
      "grad_norm": 2.90625,
      "learning_rate": 3.286174910628753e-05,
      "loss": 0.7978,
      "step": 349660
    },
    {
      "epoch": 1.2255061736183397,
      "grad_norm": 2.71875,
      "learning_rate": 3.286110007762383e-05,
      "loss": 0.8612,
      "step": 349670
    },
    {
      "epoch": 1.2255412211252352,
      "grad_norm": 2.921875,
      "learning_rate": 3.2860451048960125e-05,
      "loss": 0.841,
      "step": 349680
    },
    {
      "epoch": 1.2255762686321308,
      "grad_norm": 2.859375,
      "learning_rate": 3.2859802020296426e-05,
      "loss": 0.8251,
      "step": 349690
    },
    {
      "epoch": 1.2256113161390265,
      "grad_norm": 2.8125,
      "learning_rate": 3.285915299163273e-05,
      "loss": 0.8297,
      "step": 349700
    },
    {
      "epoch": 1.225646363645922,
      "grad_norm": 2.828125,
      "learning_rate": 3.285850396296902e-05,
      "loss": 0.869,
      "step": 349710
    },
    {
      "epoch": 1.2256814111528176,
      "grad_norm": 3.34375,
      "learning_rate": 3.2857854934305324e-05,
      "loss": 0.9568,
      "step": 349720
    },
    {
      "epoch": 1.2257164586597131,
      "grad_norm": 3.03125,
      "learning_rate": 3.285720590564162e-05,
      "loss": 0.7589,
      "step": 349730
    },
    {
      "epoch": 1.2257515061666089,
      "grad_norm": 2.359375,
      "learning_rate": 3.285655687697792e-05,
      "loss": 0.8625,
      "step": 349740
    },
    {
      "epoch": 1.2257865536735044,
      "grad_norm": 2.609375,
      "learning_rate": 3.2855907848314214e-05,
      "loss": 0.8364,
      "step": 349750
    },
    {
      "epoch": 1.2258216011804,
      "grad_norm": 2.734375,
      "learning_rate": 3.2855258819650516e-05,
      "loss": 0.8337,
      "step": 349760
    },
    {
      "epoch": 1.2258566486872957,
      "grad_norm": 2.796875,
      "learning_rate": 3.285460979098681e-05,
      "loss": 0.8008,
      "step": 349770
    },
    {
      "epoch": 1.2258916961941912,
      "grad_norm": 2.90625,
      "learning_rate": 3.285396076232311e-05,
      "loss": 0.9255,
      "step": 349780
    },
    {
      "epoch": 1.2259267437010868,
      "grad_norm": 3.609375,
      "learning_rate": 3.2853311733659406e-05,
      "loss": 0.8353,
      "step": 349790
    },
    {
      "epoch": 1.2259617912079825,
      "grad_norm": 2.9375,
      "learning_rate": 3.285266270499571e-05,
      "loss": 0.8408,
      "step": 349800
    },
    {
      "epoch": 1.225996838714878,
      "grad_norm": 2.90625,
      "learning_rate": 3.2852013676332e-05,
      "loss": 0.856,
      "step": 349810
    },
    {
      "epoch": 1.2260318862217736,
      "grad_norm": 2.796875,
      "learning_rate": 3.2851364647668304e-05,
      "loss": 0.8713,
      "step": 349820
    },
    {
      "epoch": 1.2260669337286692,
      "grad_norm": 3.34375,
      "learning_rate": 3.2850715619004605e-05,
      "loss": 0.9378,
      "step": 349830
    },
    {
      "epoch": 1.2261019812355647,
      "grad_norm": 3.0,
      "learning_rate": 3.28500665903409e-05,
      "loss": 0.9411,
      "step": 349840
    },
    {
      "epoch": 1.2261370287424604,
      "grad_norm": 3.140625,
      "learning_rate": 3.28494175616772e-05,
      "loss": 0.9002,
      "step": 349850
    },
    {
      "epoch": 1.226172076249356,
      "grad_norm": 2.609375,
      "learning_rate": 3.284876853301349e-05,
      "loss": 0.8477,
      "step": 349860
    },
    {
      "epoch": 1.2262071237562515,
      "grad_norm": 2.984375,
      "learning_rate": 3.284811950434979e-05,
      "loss": 0.8976,
      "step": 349870
    },
    {
      "epoch": 1.2262421712631473,
      "grad_norm": 2.8125,
      "learning_rate": 3.2847470475686085e-05,
      "loss": 0.8831,
      "step": 349880
    },
    {
      "epoch": 1.2262772187700428,
      "grad_norm": 2.84375,
      "learning_rate": 3.2846821447022386e-05,
      "loss": 0.8042,
      "step": 349890
    },
    {
      "epoch": 1.2263122662769383,
      "grad_norm": 2.71875,
      "learning_rate": 3.284617241835868e-05,
      "loss": 0.8594,
      "step": 349900
    },
    {
      "epoch": 1.226347313783834,
      "grad_norm": 3.15625,
      "learning_rate": 3.284552338969498e-05,
      "loss": 0.7953,
      "step": 349910
    },
    {
      "epoch": 1.2263823612907296,
      "grad_norm": 2.859375,
      "learning_rate": 3.2844874361031284e-05,
      "loss": 0.8742,
      "step": 349920
    },
    {
      "epoch": 1.2264174087976252,
      "grad_norm": 3.03125,
      "learning_rate": 3.284422533236758e-05,
      "loss": 0.8624,
      "step": 349930
    },
    {
      "epoch": 1.2264524563045207,
      "grad_norm": 2.8125,
      "learning_rate": 3.284357630370388e-05,
      "loss": 0.8587,
      "step": 349940
    },
    {
      "epoch": 1.2264875038114165,
      "grad_norm": 3.015625,
      "learning_rate": 3.2842927275040174e-05,
      "loss": 0.8552,
      "step": 349950
    },
    {
      "epoch": 1.226522551318312,
      "grad_norm": 2.640625,
      "learning_rate": 3.2842278246376476e-05,
      "loss": 0.7858,
      "step": 349960
    },
    {
      "epoch": 1.2265575988252075,
      "grad_norm": 2.40625,
      "learning_rate": 3.284162921771277e-05,
      "loss": 0.784,
      "step": 349970
    },
    {
      "epoch": 1.226592646332103,
      "grad_norm": 2.984375,
      "learning_rate": 3.284098018904907e-05,
      "loss": 0.8999,
      "step": 349980
    },
    {
      "epoch": 1.2266276938389988,
      "grad_norm": 2.609375,
      "learning_rate": 3.2840331160385366e-05,
      "loss": 0.8289,
      "step": 349990
    },
    {
      "epoch": 1.2266627413458944,
      "grad_norm": 3.046875,
      "learning_rate": 3.283968213172167e-05,
      "loss": 0.775,
      "step": 350000
    },
    {
      "epoch": 1.2266627413458944,
      "eval_loss": 0.805243194103241,
      "eval_runtime": 555.5591,
      "eval_samples_per_second": 684.78,
      "eval_steps_per_second": 57.065,
      "step": 350000
    },
    {
      "epoch": 1.22669778885279,
      "grad_norm": 2.46875,
      "learning_rate": 3.283903310305796e-05,
      "loss": 0.7723,
      "step": 350010
    },
    {
      "epoch": 1.2267328363596857,
      "grad_norm": 3.34375,
      "learning_rate": 3.2838384074394264e-05,
      "loss": 0.8593,
      "step": 350020
    },
    {
      "epoch": 1.2267678838665812,
      "grad_norm": 3.25,
      "learning_rate": 3.283773504573056e-05,
      "loss": 0.845,
      "step": 350030
    },
    {
      "epoch": 1.2268029313734767,
      "grad_norm": 3.125,
      "learning_rate": 3.283708601706686e-05,
      "loss": 0.8199,
      "step": 350040
    },
    {
      "epoch": 1.2268379788803723,
      "grad_norm": 2.796875,
      "learning_rate": 3.283643698840316e-05,
      "loss": 0.8499,
      "step": 350050
    },
    {
      "epoch": 1.226873026387268,
      "grad_norm": 2.46875,
      "learning_rate": 3.2835787959739456e-05,
      "loss": 0.8394,
      "step": 350060
    },
    {
      "epoch": 1.2269080738941636,
      "grad_norm": 3.015625,
      "learning_rate": 3.283513893107576e-05,
      "loss": 0.9054,
      "step": 350070
    },
    {
      "epoch": 1.226943121401059,
      "grad_norm": 2.796875,
      "learning_rate": 3.283448990241205e-05,
      "loss": 0.8221,
      "step": 350080
    },
    {
      "epoch": 1.2269781689079546,
      "grad_norm": 2.75,
      "learning_rate": 3.283384087374835e-05,
      "loss": 0.8053,
      "step": 350090
    },
    {
      "epoch": 1.2270132164148504,
      "grad_norm": 2.953125,
      "learning_rate": 3.283319184508465e-05,
      "loss": 0.8857,
      "step": 350100
    },
    {
      "epoch": 1.227048263921746,
      "grad_norm": 2.859375,
      "learning_rate": 3.283254281642095e-05,
      "loss": 0.874,
      "step": 350110
    },
    {
      "epoch": 1.2270833114286415,
      "grad_norm": 2.5625,
      "learning_rate": 3.2831893787757244e-05,
      "loss": 0.9011,
      "step": 350120
    },
    {
      "epoch": 1.2271183589355372,
      "grad_norm": 2.984375,
      "learning_rate": 3.2831244759093545e-05,
      "loss": 0.9362,
      "step": 350130
    },
    {
      "epoch": 1.2271534064424328,
      "grad_norm": 2.8125,
      "learning_rate": 3.283059573042984e-05,
      "loss": 0.8461,
      "step": 350140
    },
    {
      "epoch": 1.2271884539493283,
      "grad_norm": 2.25,
      "learning_rate": 3.282994670176614e-05,
      "loss": 0.8418,
      "step": 350150
    },
    {
      "epoch": 1.2272235014562238,
      "grad_norm": 3.140625,
      "learning_rate": 3.2829297673102436e-05,
      "loss": 0.8823,
      "step": 350160
    },
    {
      "epoch": 1.2272585489631196,
      "grad_norm": 2.78125,
      "learning_rate": 3.282864864443874e-05,
      "loss": 0.8822,
      "step": 350170
    },
    {
      "epoch": 1.2272935964700151,
      "grad_norm": 3.09375,
      "learning_rate": 3.282799961577503e-05,
      "loss": 0.8697,
      "step": 350180
    },
    {
      "epoch": 1.2273286439769107,
      "grad_norm": 2.84375,
      "learning_rate": 3.282735058711133e-05,
      "loss": 0.8295,
      "step": 350190
    },
    {
      "epoch": 1.2273636914838062,
      "grad_norm": 3.09375,
      "learning_rate": 3.2826701558447634e-05,
      "loss": 0.8325,
      "step": 350200
    },
    {
      "epoch": 1.227398738990702,
      "grad_norm": 2.640625,
      "learning_rate": 3.282605252978393e-05,
      "loss": 0.8245,
      "step": 350210
    },
    {
      "epoch": 1.2274337864975975,
      "grad_norm": 2.765625,
      "learning_rate": 3.282540350112023e-05,
      "loss": 0.8443,
      "step": 350220
    },
    {
      "epoch": 1.227468834004493,
      "grad_norm": 3.0,
      "learning_rate": 3.2824754472456525e-05,
      "loss": 0.8512,
      "step": 350230
    },
    {
      "epoch": 1.2275038815113888,
      "grad_norm": 2.8125,
      "learning_rate": 3.282410544379282e-05,
      "loss": 0.8613,
      "step": 350240
    },
    {
      "epoch": 1.2275389290182843,
      "grad_norm": 2.796875,
      "learning_rate": 3.2823456415129114e-05,
      "loss": 0.8083,
      "step": 350250
    },
    {
      "epoch": 1.2275739765251799,
      "grad_norm": 3.0,
      "learning_rate": 3.2822807386465416e-05,
      "loss": 0.8517,
      "step": 350260
    },
    {
      "epoch": 1.2276090240320754,
      "grad_norm": 3.015625,
      "learning_rate": 3.282215835780171e-05,
      "loss": 0.9592,
      "step": 350270
    },
    {
      "epoch": 1.2276440715389711,
      "grad_norm": 2.765625,
      "learning_rate": 3.282150932913801e-05,
      "loss": 0.9086,
      "step": 350280
    },
    {
      "epoch": 1.2276791190458667,
      "grad_norm": 3.0,
      "learning_rate": 3.282086030047431e-05,
      "loss": 0.8859,
      "step": 350290
    },
    {
      "epoch": 1.2277141665527622,
      "grad_norm": 3.140625,
      "learning_rate": 3.282021127181061e-05,
      "loss": 0.8216,
      "step": 350300
    },
    {
      "epoch": 1.2277492140596578,
      "grad_norm": 2.53125,
      "learning_rate": 3.281956224314691e-05,
      "loss": 0.8387,
      "step": 350310
    },
    {
      "epoch": 1.2277842615665535,
      "grad_norm": 2.546875,
      "learning_rate": 3.2818913214483204e-05,
      "loss": 0.8719,
      "step": 350320
    },
    {
      "epoch": 1.227819309073449,
      "grad_norm": 2.984375,
      "learning_rate": 3.2818264185819505e-05,
      "loss": 0.7784,
      "step": 350330
    },
    {
      "epoch": 1.2278543565803446,
      "grad_norm": 3.296875,
      "learning_rate": 3.28176151571558e-05,
      "loss": 0.8554,
      "step": 350340
    },
    {
      "epoch": 1.2278894040872403,
      "grad_norm": 2.90625,
      "learning_rate": 3.28169661284921e-05,
      "loss": 0.8426,
      "step": 350350
    },
    {
      "epoch": 1.2279244515941359,
      "grad_norm": 2.8125,
      "learning_rate": 3.2816317099828396e-05,
      "loss": 0.8116,
      "step": 350360
    },
    {
      "epoch": 1.2279594991010314,
      "grad_norm": 2.9375,
      "learning_rate": 3.28156680711647e-05,
      "loss": 0.8949,
      "step": 350370
    },
    {
      "epoch": 1.227994546607927,
      "grad_norm": 2.953125,
      "learning_rate": 3.281501904250099e-05,
      "loss": 0.7984,
      "step": 350380
    },
    {
      "epoch": 1.2280295941148227,
      "grad_norm": 3.46875,
      "learning_rate": 3.281437001383729e-05,
      "loss": 0.8588,
      "step": 350390
    },
    {
      "epoch": 1.2280646416217182,
      "grad_norm": 3.453125,
      "learning_rate": 3.281372098517359e-05,
      "loss": 0.8724,
      "step": 350400
    },
    {
      "epoch": 1.2280996891286138,
      "grad_norm": 2.578125,
      "learning_rate": 3.281307195650989e-05,
      "loss": 0.9132,
      "step": 350410
    },
    {
      "epoch": 1.2281347366355093,
      "grad_norm": 3.359375,
      "learning_rate": 3.281242292784619e-05,
      "loss": 0.8218,
      "step": 350420
    },
    {
      "epoch": 1.228169784142405,
      "grad_norm": 3.3125,
      "learning_rate": 3.2811773899182485e-05,
      "loss": 0.9073,
      "step": 350430
    },
    {
      "epoch": 1.2282048316493006,
      "grad_norm": 3.25,
      "learning_rate": 3.2811124870518786e-05,
      "loss": 0.8792,
      "step": 350440
    },
    {
      "epoch": 1.2282398791561961,
      "grad_norm": 3.125,
      "learning_rate": 3.281047584185508e-05,
      "loss": 0.8167,
      "step": 350450
    },
    {
      "epoch": 1.228274926663092,
      "grad_norm": 2.96875,
      "learning_rate": 3.280982681319138e-05,
      "loss": 0.8685,
      "step": 350460
    },
    {
      "epoch": 1.2283099741699874,
      "grad_norm": 3.109375,
      "learning_rate": 3.280917778452768e-05,
      "loss": 0.8586,
      "step": 350470
    },
    {
      "epoch": 1.228345021676883,
      "grad_norm": 2.46875,
      "learning_rate": 3.280852875586398e-05,
      "loss": 0.864,
      "step": 350480
    },
    {
      "epoch": 1.2283800691837787,
      "grad_norm": 2.90625,
      "learning_rate": 3.280787972720027e-05,
      "loss": 0.8901,
      "step": 350490
    },
    {
      "epoch": 1.2284151166906743,
      "grad_norm": 2.5625,
      "learning_rate": 3.2807230698536574e-05,
      "loss": 0.8687,
      "step": 350500
    },
    {
      "epoch": 1.2284501641975698,
      "grad_norm": 3.046875,
      "learning_rate": 3.280658166987287e-05,
      "loss": 0.8467,
      "step": 350510
    },
    {
      "epoch": 1.2284852117044653,
      "grad_norm": 2.75,
      "learning_rate": 3.280593264120917e-05,
      "loss": 0.8891,
      "step": 350520
    },
    {
      "epoch": 1.2285202592113609,
      "grad_norm": 3.46875,
      "learning_rate": 3.2805283612545465e-05,
      "loss": 0.9751,
      "step": 350530
    },
    {
      "epoch": 1.2285553067182566,
      "grad_norm": 2.796875,
      "learning_rate": 3.2804634583881766e-05,
      "loss": 0.8421,
      "step": 350540
    },
    {
      "epoch": 1.2285903542251522,
      "grad_norm": 2.34375,
      "learning_rate": 3.280398555521806e-05,
      "loss": 0.8748,
      "step": 350550
    },
    {
      "epoch": 1.2286254017320477,
      "grad_norm": 2.75,
      "learning_rate": 3.280333652655436e-05,
      "loss": 0.838,
      "step": 350560
    },
    {
      "epoch": 1.2286604492389435,
      "grad_norm": 2.6875,
      "learning_rate": 3.2802687497890664e-05,
      "loss": 0.9017,
      "step": 350570
    },
    {
      "epoch": 1.228695496745839,
      "grad_norm": 3.296875,
      "learning_rate": 3.280203846922696e-05,
      "loss": 0.8462,
      "step": 350580
    },
    {
      "epoch": 1.2287305442527345,
      "grad_norm": 2.78125,
      "learning_rate": 3.280138944056326e-05,
      "loss": 0.8869,
      "step": 350590
    },
    {
      "epoch": 1.2287655917596303,
      "grad_norm": 2.46875,
      "learning_rate": 3.2800740411899554e-05,
      "loss": 0.8129,
      "step": 350600
    },
    {
      "epoch": 1.2288006392665258,
      "grad_norm": 3.0,
      "learning_rate": 3.280009138323585e-05,
      "loss": 0.8601,
      "step": 350610
    },
    {
      "epoch": 1.2288356867734214,
      "grad_norm": 2.734375,
      "learning_rate": 3.2799442354572144e-05,
      "loss": 0.8287,
      "step": 350620
    },
    {
      "epoch": 1.228870734280317,
      "grad_norm": 2.96875,
      "learning_rate": 3.2798793325908445e-05,
      "loss": 0.9229,
      "step": 350630
    },
    {
      "epoch": 1.2289057817872127,
      "grad_norm": 3.25,
      "learning_rate": 3.279814429724474e-05,
      "loss": 0.7959,
      "step": 350640
    },
    {
      "epoch": 1.2289408292941082,
      "grad_norm": 3.15625,
      "learning_rate": 3.279749526858104e-05,
      "loss": 0.9493,
      "step": 350650
    },
    {
      "epoch": 1.2289758768010037,
      "grad_norm": 3.015625,
      "learning_rate": 3.279684623991734e-05,
      "loss": 0.8701,
      "step": 350660
    },
    {
      "epoch": 1.2290109243078993,
      "grad_norm": 2.921875,
      "learning_rate": 3.279619721125364e-05,
      "loss": 0.8841,
      "step": 350670
    },
    {
      "epoch": 1.229045971814795,
      "grad_norm": 3.03125,
      "learning_rate": 3.279554818258994e-05,
      "loss": 0.8023,
      "step": 350680
    },
    {
      "epoch": 1.2290810193216906,
      "grad_norm": 3.078125,
      "learning_rate": 3.279489915392623e-05,
      "loss": 0.8809,
      "step": 350690
    },
    {
      "epoch": 1.229116066828586,
      "grad_norm": 2.9375,
      "learning_rate": 3.2794250125262534e-05,
      "loss": 0.8587,
      "step": 350700
    },
    {
      "epoch": 1.2291511143354819,
      "grad_norm": 2.84375,
      "learning_rate": 3.279360109659883e-05,
      "loss": 0.8276,
      "step": 350710
    },
    {
      "epoch": 1.2291861618423774,
      "grad_norm": 3.265625,
      "learning_rate": 3.279295206793513e-05,
      "loss": 0.9654,
      "step": 350720
    },
    {
      "epoch": 1.229221209349273,
      "grad_norm": 2.890625,
      "learning_rate": 3.2792303039271425e-05,
      "loss": 0.8738,
      "step": 350730
    },
    {
      "epoch": 1.2292562568561685,
      "grad_norm": 3.0625,
      "learning_rate": 3.2791654010607726e-05,
      "loss": 0.9035,
      "step": 350740
    },
    {
      "epoch": 1.2292913043630642,
      "grad_norm": 2.890625,
      "learning_rate": 3.279100498194402e-05,
      "loss": 0.89,
      "step": 350750
    },
    {
      "epoch": 1.2293263518699598,
      "grad_norm": 3.078125,
      "learning_rate": 3.279035595328032e-05,
      "loss": 0.8646,
      "step": 350760
    },
    {
      "epoch": 1.2293613993768553,
      "grad_norm": 2.59375,
      "learning_rate": 3.278970692461662e-05,
      "loss": 0.8456,
      "step": 350770
    },
    {
      "epoch": 1.2293964468837508,
      "grad_norm": 3.171875,
      "learning_rate": 3.278905789595292e-05,
      "loss": 0.8935,
      "step": 350780
    },
    {
      "epoch": 1.2294314943906466,
      "grad_norm": 2.578125,
      "learning_rate": 3.278840886728922e-05,
      "loss": 0.7854,
      "step": 350790
    },
    {
      "epoch": 1.2294665418975421,
      "grad_norm": 2.921875,
      "learning_rate": 3.2787759838625514e-05,
      "loss": 0.7963,
      "step": 350800
    },
    {
      "epoch": 1.2295015894044377,
      "grad_norm": 2.890625,
      "learning_rate": 3.2787110809961816e-05,
      "loss": 0.8303,
      "step": 350810
    },
    {
      "epoch": 1.2295366369113334,
      "grad_norm": 2.9375,
      "learning_rate": 3.278646178129811e-05,
      "loss": 0.907,
      "step": 350820
    },
    {
      "epoch": 1.229571684418229,
      "grad_norm": 2.5,
      "learning_rate": 3.278581275263441e-05,
      "loss": 0.8454,
      "step": 350830
    },
    {
      "epoch": 1.2296067319251245,
      "grad_norm": 2.609375,
      "learning_rate": 3.2785163723970706e-05,
      "loss": 0.8505,
      "step": 350840
    },
    {
      "epoch": 1.22964177943202,
      "grad_norm": 2.765625,
      "learning_rate": 3.278451469530701e-05,
      "loss": 0.8085,
      "step": 350850
    },
    {
      "epoch": 1.2296768269389158,
      "grad_norm": 3.1875,
      "learning_rate": 3.27838656666433e-05,
      "loss": 0.7955,
      "step": 350860
    },
    {
      "epoch": 1.2297118744458113,
      "grad_norm": 2.734375,
      "learning_rate": 3.2783216637979604e-05,
      "loss": 0.8699,
      "step": 350870
    },
    {
      "epoch": 1.2297469219527069,
      "grad_norm": 3.234375,
      "learning_rate": 3.27825676093159e-05,
      "loss": 0.8439,
      "step": 350880
    },
    {
      "epoch": 1.2297819694596024,
      "grad_norm": 2.921875,
      "learning_rate": 3.27819185806522e-05,
      "loss": 0.8245,
      "step": 350890
    },
    {
      "epoch": 1.2298170169664981,
      "grad_norm": 2.375,
      "learning_rate": 3.2781269551988494e-05,
      "loss": 0.8901,
      "step": 350900
    },
    {
      "epoch": 1.2298520644733937,
      "grad_norm": 2.84375,
      "learning_rate": 3.2780620523324796e-05,
      "loss": 0.8656,
      "step": 350910
    },
    {
      "epoch": 1.2298871119802892,
      "grad_norm": 3.25,
      "learning_rate": 3.277997149466109e-05,
      "loss": 0.8774,
      "step": 350920
    },
    {
      "epoch": 1.229922159487185,
      "grad_norm": 2.96875,
      "learning_rate": 3.277932246599739e-05,
      "loss": 0.7873,
      "step": 350930
    },
    {
      "epoch": 1.2299572069940805,
      "grad_norm": 3.0625,
      "learning_rate": 3.277867343733369e-05,
      "loss": 0.8704,
      "step": 350940
    },
    {
      "epoch": 1.229992254500976,
      "grad_norm": 2.765625,
      "learning_rate": 3.277802440866999e-05,
      "loss": 0.8653,
      "step": 350950
    },
    {
      "epoch": 1.2300273020078716,
      "grad_norm": 2.5625,
      "learning_rate": 3.277737538000629e-05,
      "loss": 0.8991,
      "step": 350960
    },
    {
      "epoch": 1.2300623495147673,
      "grad_norm": 2.375,
      "learning_rate": 3.2776726351342584e-05,
      "loss": 0.9151,
      "step": 350970
    },
    {
      "epoch": 1.2300973970216629,
      "grad_norm": 2.921875,
      "learning_rate": 3.2776077322678885e-05,
      "loss": 0.8607,
      "step": 350980
    },
    {
      "epoch": 1.2301324445285584,
      "grad_norm": 3.4375,
      "learning_rate": 3.277542829401517e-05,
      "loss": 0.9629,
      "step": 350990
    },
    {
      "epoch": 1.230167492035454,
      "grad_norm": 3.5625,
      "learning_rate": 3.2774779265351474e-05,
      "loss": 0.8513,
      "step": 351000
    },
    {
      "epoch": 1.2302025395423497,
      "grad_norm": 3.078125,
      "learning_rate": 3.277413023668777e-05,
      "loss": 0.881,
      "step": 351010
    },
    {
      "epoch": 1.2302375870492452,
      "grad_norm": 3.1875,
      "learning_rate": 3.277348120802407e-05,
      "loss": 1.0216,
      "step": 351020
    },
    {
      "epoch": 1.2302726345561408,
      "grad_norm": 3.09375,
      "learning_rate": 3.277283217936037e-05,
      "loss": 0.9877,
      "step": 351030
    },
    {
      "epoch": 1.2303076820630365,
      "grad_norm": 2.4375,
      "learning_rate": 3.2772183150696666e-05,
      "loss": 0.8071,
      "step": 351040
    },
    {
      "epoch": 1.230342729569932,
      "grad_norm": 2.703125,
      "learning_rate": 3.277153412203297e-05,
      "loss": 0.8119,
      "step": 351050
    },
    {
      "epoch": 1.2303777770768276,
      "grad_norm": 2.96875,
      "learning_rate": 3.277088509336926e-05,
      "loss": 0.8245,
      "step": 351060
    },
    {
      "epoch": 1.2304128245837231,
      "grad_norm": 3.234375,
      "learning_rate": 3.2770236064705564e-05,
      "loss": 0.8388,
      "step": 351070
    },
    {
      "epoch": 1.230447872090619,
      "grad_norm": 2.984375,
      "learning_rate": 3.276958703604186e-05,
      "loss": 0.877,
      "step": 351080
    },
    {
      "epoch": 1.2304829195975144,
      "grad_norm": 3.078125,
      "learning_rate": 3.276893800737816e-05,
      "loss": 0.73,
      "step": 351090
    },
    {
      "epoch": 1.23051796710441,
      "grad_norm": 2.484375,
      "learning_rate": 3.2768288978714454e-05,
      "loss": 0.9751,
      "step": 351100
    },
    {
      "epoch": 1.2305530146113055,
      "grad_norm": 2.9375,
      "learning_rate": 3.2767639950050756e-05,
      "loss": 0.7974,
      "step": 351110
    },
    {
      "epoch": 1.2305880621182013,
      "grad_norm": 2.828125,
      "learning_rate": 3.276699092138705e-05,
      "loss": 0.8416,
      "step": 351120
    },
    {
      "epoch": 1.2306231096250968,
      "grad_norm": 2.953125,
      "learning_rate": 3.276634189272335e-05,
      "loss": 0.7674,
      "step": 351130
    },
    {
      "epoch": 1.2306581571319923,
      "grad_norm": 3.34375,
      "learning_rate": 3.2765692864059646e-05,
      "loss": 0.9293,
      "step": 351140
    },
    {
      "epoch": 1.230693204638888,
      "grad_norm": 2.796875,
      "learning_rate": 3.276504383539595e-05,
      "loss": 0.8742,
      "step": 351150
    },
    {
      "epoch": 1.2307282521457836,
      "grad_norm": 2.5625,
      "learning_rate": 3.276439480673225e-05,
      "loss": 0.8295,
      "step": 351160
    },
    {
      "epoch": 1.2307632996526792,
      "grad_norm": 3.109375,
      "learning_rate": 3.2763745778068544e-05,
      "loss": 0.8266,
      "step": 351170
    },
    {
      "epoch": 1.230798347159575,
      "grad_norm": 2.53125,
      "learning_rate": 3.2763096749404845e-05,
      "loss": 0.7901,
      "step": 351180
    },
    {
      "epoch": 1.2308333946664705,
      "grad_norm": 3.0625,
      "learning_rate": 3.276244772074114e-05,
      "loss": 0.9038,
      "step": 351190
    },
    {
      "epoch": 1.230868442173366,
      "grad_norm": 2.890625,
      "learning_rate": 3.276179869207744e-05,
      "loss": 0.9055,
      "step": 351200
    },
    {
      "epoch": 1.2309034896802615,
      "grad_norm": 3.03125,
      "learning_rate": 3.2761149663413736e-05,
      "loss": 0.8831,
      "step": 351210
    },
    {
      "epoch": 1.230938537187157,
      "grad_norm": 2.625,
      "learning_rate": 3.276050063475004e-05,
      "loss": 0.8479,
      "step": 351220
    },
    {
      "epoch": 1.2309735846940528,
      "grad_norm": 3.078125,
      "learning_rate": 3.275985160608633e-05,
      "loss": 0.9184,
      "step": 351230
    },
    {
      "epoch": 1.2310086322009484,
      "grad_norm": 2.75,
      "learning_rate": 3.275920257742263e-05,
      "loss": 0.8471,
      "step": 351240
    },
    {
      "epoch": 1.231043679707844,
      "grad_norm": 3.09375,
      "learning_rate": 3.275855354875893e-05,
      "loss": 0.8309,
      "step": 351250
    },
    {
      "epoch": 1.2310787272147397,
      "grad_norm": 2.875,
      "learning_rate": 3.275790452009523e-05,
      "loss": 0.879,
      "step": 351260
    },
    {
      "epoch": 1.2311137747216352,
      "grad_norm": 2.75,
      "learning_rate": 3.2757255491431524e-05,
      "loss": 0.9302,
      "step": 351270
    },
    {
      "epoch": 1.2311488222285307,
      "grad_norm": 3.09375,
      "learning_rate": 3.2756606462767825e-05,
      "loss": 0.8205,
      "step": 351280
    },
    {
      "epoch": 1.2311838697354265,
      "grad_norm": 3.171875,
      "learning_rate": 3.2755957434104126e-05,
      "loss": 0.8462,
      "step": 351290
    },
    {
      "epoch": 1.231218917242322,
      "grad_norm": 3.53125,
      "learning_rate": 3.275530840544042e-05,
      "loss": 0.8922,
      "step": 351300
    },
    {
      "epoch": 1.2312539647492176,
      "grad_norm": 2.984375,
      "learning_rate": 3.275465937677672e-05,
      "loss": 0.8741,
      "step": 351310
    },
    {
      "epoch": 1.231289012256113,
      "grad_norm": 2.875,
      "learning_rate": 3.275401034811302e-05,
      "loss": 0.8646,
      "step": 351320
    },
    {
      "epoch": 1.2313240597630088,
      "grad_norm": 2.515625,
      "learning_rate": 3.275336131944932e-05,
      "loss": 0.783,
      "step": 351330
    },
    {
      "epoch": 1.2313591072699044,
      "grad_norm": 2.734375,
      "learning_rate": 3.275271229078561e-05,
      "loss": 0.8856,
      "step": 351340
    },
    {
      "epoch": 1.2313941547768,
      "grad_norm": 3.0625,
      "learning_rate": 3.2752063262121914e-05,
      "loss": 0.8456,
      "step": 351350
    },
    {
      "epoch": 1.2314292022836955,
      "grad_norm": 3.171875,
      "learning_rate": 3.27514142334582e-05,
      "loss": 0.8955,
      "step": 351360
    },
    {
      "epoch": 1.2314642497905912,
      "grad_norm": 3.265625,
      "learning_rate": 3.2750765204794504e-05,
      "loss": 0.9439,
      "step": 351370
    },
    {
      "epoch": 1.2314992972974868,
      "grad_norm": 3.015625,
      "learning_rate": 3.2750116176130805e-05,
      "loss": 0.9128,
      "step": 351380
    },
    {
      "epoch": 1.2315343448043823,
      "grad_norm": 2.953125,
      "learning_rate": 3.27494671474671e-05,
      "loss": 0.8678,
      "step": 351390
    },
    {
      "epoch": 1.231569392311278,
      "grad_norm": 2.984375,
      "learning_rate": 3.27488181188034e-05,
      "loss": 0.8535,
      "step": 351400
    },
    {
      "epoch": 1.2316044398181736,
      "grad_norm": 3.375,
      "learning_rate": 3.2748169090139696e-05,
      "loss": 0.8961,
      "step": 351410
    },
    {
      "epoch": 1.2316394873250691,
      "grad_norm": 2.96875,
      "learning_rate": 3.2747520061476e-05,
      "loss": 0.8726,
      "step": 351420
    },
    {
      "epoch": 1.2316745348319647,
      "grad_norm": 3.046875,
      "learning_rate": 3.274687103281229e-05,
      "loss": 0.872,
      "step": 351430
    },
    {
      "epoch": 1.2317095823388604,
      "grad_norm": 2.921875,
      "learning_rate": 3.274622200414859e-05,
      "loss": 0.891,
      "step": 351440
    },
    {
      "epoch": 1.231744629845756,
      "grad_norm": 3.1875,
      "learning_rate": 3.274557297548489e-05,
      "loss": 0.7714,
      "step": 351450
    },
    {
      "epoch": 1.2317796773526515,
      "grad_norm": 3.125,
      "learning_rate": 3.274492394682119e-05,
      "loss": 0.9043,
      "step": 351460
    },
    {
      "epoch": 1.231814724859547,
      "grad_norm": 3.140625,
      "learning_rate": 3.2744274918157484e-05,
      "loss": 0.8065,
      "step": 351470
    },
    {
      "epoch": 1.2318497723664428,
      "grad_norm": 3.390625,
      "learning_rate": 3.2743625889493785e-05,
      "loss": 0.8365,
      "step": 351480
    },
    {
      "epoch": 1.2318848198733383,
      "grad_norm": 2.734375,
      "learning_rate": 3.274297686083008e-05,
      "loss": 0.8262,
      "step": 351490
    },
    {
      "epoch": 1.2319198673802338,
      "grad_norm": 2.84375,
      "learning_rate": 3.274232783216638e-05,
      "loss": 0.8657,
      "step": 351500
    },
    {
      "epoch": 1.2319549148871296,
      "grad_norm": 3.015625,
      "learning_rate": 3.2741678803502676e-05,
      "loss": 0.8381,
      "step": 351510
    },
    {
      "epoch": 1.2319899623940251,
      "grad_norm": 3.359375,
      "learning_rate": 3.274102977483898e-05,
      "loss": 0.8945,
      "step": 351520
    },
    {
      "epoch": 1.2320250099009207,
      "grad_norm": 2.34375,
      "learning_rate": 3.274038074617528e-05,
      "loss": 0.8439,
      "step": 351530
    },
    {
      "epoch": 1.2320600574078162,
      "grad_norm": 3.390625,
      "learning_rate": 3.273973171751157e-05,
      "loss": 0.959,
      "step": 351540
    },
    {
      "epoch": 1.232095104914712,
      "grad_norm": 2.828125,
      "learning_rate": 3.2739082688847874e-05,
      "loss": 0.947,
      "step": 351550
    },
    {
      "epoch": 1.2321301524216075,
      "grad_norm": 3.0,
      "learning_rate": 3.273843366018417e-05,
      "loss": 0.8304,
      "step": 351560
    },
    {
      "epoch": 1.232165199928503,
      "grad_norm": 3.015625,
      "learning_rate": 3.273778463152047e-05,
      "loss": 0.8949,
      "step": 351570
    },
    {
      "epoch": 1.2322002474353986,
      "grad_norm": 3.203125,
      "learning_rate": 3.2737135602856765e-05,
      "loss": 0.8262,
      "step": 351580
    },
    {
      "epoch": 1.2322352949422943,
      "grad_norm": 2.5,
      "learning_rate": 3.2736486574193066e-05,
      "loss": 0.8636,
      "step": 351590
    },
    {
      "epoch": 1.2322703424491899,
      "grad_norm": 3.0,
      "learning_rate": 3.273583754552936e-05,
      "loss": 0.8388,
      "step": 351600
    },
    {
      "epoch": 1.2323053899560854,
      "grad_norm": 2.96875,
      "learning_rate": 3.273518851686566e-05,
      "loss": 0.8531,
      "step": 351610
    },
    {
      "epoch": 1.2323404374629812,
      "grad_norm": 2.78125,
      "learning_rate": 3.273453948820196e-05,
      "loss": 0.8914,
      "step": 351620
    },
    {
      "epoch": 1.2323754849698767,
      "grad_norm": 2.6875,
      "learning_rate": 3.273389045953826e-05,
      "loss": 0.9029,
      "step": 351630
    },
    {
      "epoch": 1.2324105324767722,
      "grad_norm": 2.484375,
      "learning_rate": 3.273324143087455e-05,
      "loss": 0.8397,
      "step": 351640
    },
    {
      "epoch": 1.2324455799836678,
      "grad_norm": 3.015625,
      "learning_rate": 3.2732592402210854e-05,
      "loss": 0.8284,
      "step": 351650
    },
    {
      "epoch": 1.2324806274905635,
      "grad_norm": 2.71875,
      "learning_rate": 3.2731943373547156e-05,
      "loss": 0.8686,
      "step": 351660
    },
    {
      "epoch": 1.232515674997459,
      "grad_norm": 2.84375,
      "learning_rate": 3.273129434488345e-05,
      "loss": 0.8431,
      "step": 351670
    },
    {
      "epoch": 1.2325507225043546,
      "grad_norm": 3.40625,
      "learning_rate": 3.273064531621975e-05,
      "loss": 0.885,
      "step": 351680
    },
    {
      "epoch": 1.2325857700112501,
      "grad_norm": 2.984375,
      "learning_rate": 3.2729996287556046e-05,
      "loss": 0.7698,
      "step": 351690
    },
    {
      "epoch": 1.232620817518146,
      "grad_norm": 2.796875,
      "learning_rate": 3.272934725889235e-05,
      "loss": 0.9136,
      "step": 351700
    },
    {
      "epoch": 1.2326558650250414,
      "grad_norm": 2.84375,
      "learning_rate": 3.272869823022864e-05,
      "loss": 0.9757,
      "step": 351710
    },
    {
      "epoch": 1.232690912531937,
      "grad_norm": 2.984375,
      "learning_rate": 3.2728049201564944e-05,
      "loss": 0.8502,
      "step": 351720
    },
    {
      "epoch": 1.2327259600388327,
      "grad_norm": 2.578125,
      "learning_rate": 3.272740017290124e-05,
      "loss": 0.8589,
      "step": 351730
    },
    {
      "epoch": 1.2327610075457283,
      "grad_norm": 2.6875,
      "learning_rate": 3.272675114423753e-05,
      "loss": 0.9778,
      "step": 351740
    },
    {
      "epoch": 1.2327960550526238,
      "grad_norm": 2.6875,
      "learning_rate": 3.2726102115573834e-05,
      "loss": 0.8381,
      "step": 351750
    },
    {
      "epoch": 1.2328311025595193,
      "grad_norm": 3.34375,
      "learning_rate": 3.272545308691013e-05,
      "loss": 0.8308,
      "step": 351760
    },
    {
      "epoch": 1.232866150066415,
      "grad_norm": 3.21875,
      "learning_rate": 3.272480405824643e-05,
      "loss": 0.844,
      "step": 351770
    },
    {
      "epoch": 1.2329011975733106,
      "grad_norm": 2.453125,
      "learning_rate": 3.2724155029582725e-05,
      "loss": 0.8151,
      "step": 351780
    },
    {
      "epoch": 1.2329362450802062,
      "grad_norm": 2.9375,
      "learning_rate": 3.2723506000919026e-05,
      "loss": 0.7202,
      "step": 351790
    },
    {
      "epoch": 1.2329712925871017,
      "grad_norm": 3.0,
      "learning_rate": 3.272285697225532e-05,
      "loss": 0.8514,
      "step": 351800
    },
    {
      "epoch": 1.2330063400939975,
      "grad_norm": 3.296875,
      "learning_rate": 3.272220794359162e-05,
      "loss": 0.9107,
      "step": 351810
    },
    {
      "epoch": 1.233041387600893,
      "grad_norm": 2.828125,
      "learning_rate": 3.272155891492792e-05,
      "loss": 0.8906,
      "step": 351820
    },
    {
      "epoch": 1.2330764351077885,
      "grad_norm": 2.765625,
      "learning_rate": 3.272090988626422e-05,
      "loss": 0.851,
      "step": 351830
    },
    {
      "epoch": 1.2331114826146843,
      "grad_norm": 2.75,
      "learning_rate": 3.272026085760051e-05,
      "loss": 0.8619,
      "step": 351840
    },
    {
      "epoch": 1.2331465301215798,
      "grad_norm": 2.96875,
      "learning_rate": 3.2719611828936814e-05,
      "loss": 0.85,
      "step": 351850
    },
    {
      "epoch": 1.2331815776284754,
      "grad_norm": 4.28125,
      "learning_rate": 3.271896280027311e-05,
      "loss": 0.8384,
      "step": 351860
    },
    {
      "epoch": 1.2332166251353711,
      "grad_norm": 2.421875,
      "learning_rate": 3.271831377160941e-05,
      "loss": 0.8417,
      "step": 351870
    },
    {
      "epoch": 1.2332516726422666,
      "grad_norm": 2.984375,
      "learning_rate": 3.2717664742945705e-05,
      "loss": 0.9222,
      "step": 351880
    },
    {
      "epoch": 1.2332867201491622,
      "grad_norm": 3.015625,
      "learning_rate": 3.2717015714282006e-05,
      "loss": 0.8838,
      "step": 351890
    },
    {
      "epoch": 1.2333217676560577,
      "grad_norm": 2.71875,
      "learning_rate": 3.271636668561831e-05,
      "loss": 0.8443,
      "step": 351900
    },
    {
      "epoch": 1.2333568151629535,
      "grad_norm": 2.78125,
      "learning_rate": 3.27157176569546e-05,
      "loss": 0.777,
      "step": 351910
    },
    {
      "epoch": 1.233391862669849,
      "grad_norm": 3.0625,
      "learning_rate": 3.2715068628290904e-05,
      "loss": 0.8731,
      "step": 351920
    },
    {
      "epoch": 1.2334269101767446,
      "grad_norm": 3.015625,
      "learning_rate": 3.27144195996272e-05,
      "loss": 0.7921,
      "step": 351930
    },
    {
      "epoch": 1.23346195768364,
      "grad_norm": 3.171875,
      "learning_rate": 3.27137705709635e-05,
      "loss": 0.9328,
      "step": 351940
    },
    {
      "epoch": 1.2334970051905358,
      "grad_norm": 2.90625,
      "learning_rate": 3.2713121542299794e-05,
      "loss": 0.8917,
      "step": 351950
    },
    {
      "epoch": 1.2335320526974314,
      "grad_norm": 3.109375,
      "learning_rate": 3.2712472513636096e-05,
      "loss": 0.8769,
      "step": 351960
    },
    {
      "epoch": 1.233567100204327,
      "grad_norm": 2.875,
      "learning_rate": 3.271182348497239e-05,
      "loss": 0.8595,
      "step": 351970
    },
    {
      "epoch": 1.2336021477112227,
      "grad_norm": 3.0625,
      "learning_rate": 3.271117445630869e-05,
      "loss": 0.9202,
      "step": 351980
    },
    {
      "epoch": 1.2336371952181182,
      "grad_norm": 3.59375,
      "learning_rate": 3.2710525427644986e-05,
      "loss": 0.8434,
      "step": 351990
    },
    {
      "epoch": 1.2336722427250137,
      "grad_norm": 2.390625,
      "learning_rate": 3.270987639898129e-05,
      "loss": 0.8031,
      "step": 352000
    },
    {
      "epoch": 1.2337072902319093,
      "grad_norm": 3.046875,
      "learning_rate": 3.270922737031758e-05,
      "loss": 0.7661,
      "step": 352010
    },
    {
      "epoch": 1.233742337738805,
      "grad_norm": 2.53125,
      "learning_rate": 3.2708578341653884e-05,
      "loss": 0.8667,
      "step": 352020
    },
    {
      "epoch": 1.2337773852457006,
      "grad_norm": 2.8125,
      "learning_rate": 3.2707929312990185e-05,
      "loss": 0.8051,
      "step": 352030
    },
    {
      "epoch": 1.2338124327525961,
      "grad_norm": 2.984375,
      "learning_rate": 3.270728028432648e-05,
      "loss": 0.8852,
      "step": 352040
    },
    {
      "epoch": 1.2338474802594916,
      "grad_norm": 2.65625,
      "learning_rate": 3.270663125566278e-05,
      "loss": 0.8777,
      "step": 352050
    },
    {
      "epoch": 1.2338825277663874,
      "grad_norm": 2.828125,
      "learning_rate": 3.2705982226999076e-05,
      "loss": 0.8818,
      "step": 352060
    },
    {
      "epoch": 1.233917575273283,
      "grad_norm": 2.90625,
      "learning_rate": 3.270533319833538e-05,
      "loss": 0.899,
      "step": 352070
    },
    {
      "epoch": 1.2339526227801785,
      "grad_norm": 3.171875,
      "learning_rate": 3.270468416967167e-05,
      "loss": 0.8846,
      "step": 352080
    },
    {
      "epoch": 1.2339876702870742,
      "grad_norm": 3.1875,
      "learning_rate": 3.270403514100797e-05,
      "loss": 0.8586,
      "step": 352090
    },
    {
      "epoch": 1.2340227177939698,
      "grad_norm": 3.1875,
      "learning_rate": 3.270338611234427e-05,
      "loss": 0.8889,
      "step": 352100
    },
    {
      "epoch": 1.2340577653008653,
      "grad_norm": 3.140625,
      "learning_rate": 3.270273708368057e-05,
      "loss": 0.8133,
      "step": 352110
    },
    {
      "epoch": 1.2340928128077608,
      "grad_norm": 3.078125,
      "learning_rate": 3.2702088055016864e-05,
      "loss": 0.8334,
      "step": 352120
    },
    {
      "epoch": 1.2341278603146566,
      "grad_norm": 2.84375,
      "learning_rate": 3.270143902635316e-05,
      "loss": 0.8551,
      "step": 352130
    },
    {
      "epoch": 1.2341629078215521,
      "grad_norm": 3.265625,
      "learning_rate": 3.270078999768946e-05,
      "loss": 0.8327,
      "step": 352140
    },
    {
      "epoch": 1.2341979553284477,
      "grad_norm": 3.0625,
      "learning_rate": 3.2700140969025754e-05,
      "loss": 0.8576,
      "step": 352150
    },
    {
      "epoch": 1.2342330028353432,
      "grad_norm": 2.765625,
      "learning_rate": 3.2699491940362056e-05,
      "loss": 0.8299,
      "step": 352160
    },
    {
      "epoch": 1.234268050342239,
      "grad_norm": 2.546875,
      "learning_rate": 3.269884291169835e-05,
      "loss": 0.8254,
      "step": 352170
    },
    {
      "epoch": 1.2343030978491345,
      "grad_norm": 2.984375,
      "learning_rate": 3.269819388303465e-05,
      "loss": 0.9244,
      "step": 352180
    },
    {
      "epoch": 1.23433814535603,
      "grad_norm": 2.8125,
      "learning_rate": 3.2697544854370946e-05,
      "loss": 0.8195,
      "step": 352190
    },
    {
      "epoch": 1.2343731928629258,
      "grad_norm": 2.953125,
      "learning_rate": 3.269689582570725e-05,
      "loss": 0.856,
      "step": 352200
    },
    {
      "epoch": 1.2344082403698213,
      "grad_norm": 2.703125,
      "learning_rate": 3.269624679704354e-05,
      "loss": 0.8727,
      "step": 352210
    },
    {
      "epoch": 1.2344432878767169,
      "grad_norm": 3.078125,
      "learning_rate": 3.2695597768379844e-05,
      "loss": 0.8742,
      "step": 352220
    },
    {
      "epoch": 1.2344783353836124,
      "grad_norm": 3.125,
      "learning_rate": 3.269494873971614e-05,
      "loss": 0.8627,
      "step": 352230
    },
    {
      "epoch": 1.2345133828905082,
      "grad_norm": 3.171875,
      "learning_rate": 3.269429971105244e-05,
      "loss": 0.8922,
      "step": 352240
    },
    {
      "epoch": 1.2345484303974037,
      "grad_norm": 3.03125,
      "learning_rate": 3.269365068238874e-05,
      "loss": 0.9097,
      "step": 352250
    },
    {
      "epoch": 1.2345834779042992,
      "grad_norm": 2.875,
      "learning_rate": 3.2693001653725036e-05,
      "loss": 0.8563,
      "step": 352260
    },
    {
      "epoch": 1.2346185254111948,
      "grad_norm": 2.953125,
      "learning_rate": 3.269235262506134e-05,
      "loss": 0.83,
      "step": 352270
    },
    {
      "epoch": 1.2346535729180905,
      "grad_norm": 3.015625,
      "learning_rate": 3.269170359639763e-05,
      "loss": 0.9155,
      "step": 352280
    },
    {
      "epoch": 1.234688620424986,
      "grad_norm": 2.84375,
      "learning_rate": 3.269105456773393e-05,
      "loss": 0.8714,
      "step": 352290
    },
    {
      "epoch": 1.2347236679318816,
      "grad_norm": 2.71875,
      "learning_rate": 3.269040553907023e-05,
      "loss": 0.846,
      "step": 352300
    },
    {
      "epoch": 1.2347587154387774,
      "grad_norm": 2.984375,
      "learning_rate": 3.268975651040653e-05,
      "loss": 0.8969,
      "step": 352310
    },
    {
      "epoch": 1.234793762945673,
      "grad_norm": 2.8125,
      "learning_rate": 3.2689107481742824e-05,
      "loss": 0.8352,
      "step": 352320
    },
    {
      "epoch": 1.2348288104525684,
      "grad_norm": 2.296875,
      "learning_rate": 3.2688458453079125e-05,
      "loss": 0.8273,
      "step": 352330
    },
    {
      "epoch": 1.234863857959464,
      "grad_norm": 2.515625,
      "learning_rate": 3.268780942441542e-05,
      "loss": 0.7035,
      "step": 352340
    },
    {
      "epoch": 1.2348989054663597,
      "grad_norm": 2.875,
      "learning_rate": 3.268716039575172e-05,
      "loss": 0.8376,
      "step": 352350
    },
    {
      "epoch": 1.2349339529732553,
      "grad_norm": 2.703125,
      "learning_rate": 3.2686511367088016e-05,
      "loss": 0.8801,
      "step": 352360
    },
    {
      "epoch": 1.2349690004801508,
      "grad_norm": 3.578125,
      "learning_rate": 3.268586233842432e-05,
      "loss": 0.8451,
      "step": 352370
    },
    {
      "epoch": 1.2350040479870463,
      "grad_norm": 2.84375,
      "learning_rate": 3.268521330976061e-05,
      "loss": 0.8381,
      "step": 352380
    },
    {
      "epoch": 1.235039095493942,
      "grad_norm": 2.9375,
      "learning_rate": 3.268456428109691e-05,
      "loss": 0.8573,
      "step": 352390
    },
    {
      "epoch": 1.2350741430008376,
      "grad_norm": 2.875,
      "learning_rate": 3.2683915252433215e-05,
      "loss": 0.9258,
      "step": 352400
    },
    {
      "epoch": 1.2351091905077332,
      "grad_norm": 2.5,
      "learning_rate": 3.268326622376951e-05,
      "loss": 0.8183,
      "step": 352410
    },
    {
      "epoch": 1.235144238014629,
      "grad_norm": 2.21875,
      "learning_rate": 3.268261719510581e-05,
      "loss": 0.8812,
      "step": 352420
    },
    {
      "epoch": 1.2351792855215245,
      "grad_norm": 2.734375,
      "learning_rate": 3.2681968166442105e-05,
      "loss": 0.9048,
      "step": 352430
    },
    {
      "epoch": 1.23521433302842,
      "grad_norm": 2.5,
      "learning_rate": 3.2681319137778407e-05,
      "loss": 0.8859,
      "step": 352440
    },
    {
      "epoch": 1.2352493805353157,
      "grad_norm": 2.890625,
      "learning_rate": 3.26806701091147e-05,
      "loss": 0.87,
      "step": 352450
    },
    {
      "epoch": 1.2352844280422113,
      "grad_norm": 2.734375,
      "learning_rate": 3.2680021080451e-05,
      "loss": 0.8631,
      "step": 352460
    },
    {
      "epoch": 1.2353194755491068,
      "grad_norm": 3.046875,
      "learning_rate": 3.26793720517873e-05,
      "loss": 0.8583,
      "step": 352470
    },
    {
      "epoch": 1.2353545230560024,
      "grad_norm": 3.09375,
      "learning_rate": 3.26787230231236e-05,
      "loss": 0.8893,
      "step": 352480
    },
    {
      "epoch": 1.2353895705628979,
      "grad_norm": 3.046875,
      "learning_rate": 3.267807399445989e-05,
      "loss": 0.8251,
      "step": 352490
    },
    {
      "epoch": 1.2354246180697936,
      "grad_norm": 3.0625,
      "learning_rate": 3.267742496579619e-05,
      "loss": 0.912,
      "step": 352500
    },
    {
      "epoch": 1.2354596655766892,
      "grad_norm": 3.078125,
      "learning_rate": 3.267677593713249e-05,
      "loss": 0.87,
      "step": 352510
    },
    {
      "epoch": 1.2354947130835847,
      "grad_norm": 2.84375,
      "learning_rate": 3.2676126908468784e-05,
      "loss": 0.8811,
      "step": 352520
    },
    {
      "epoch": 1.2355297605904805,
      "grad_norm": 2.75,
      "learning_rate": 3.2675477879805085e-05,
      "loss": 0.7943,
      "step": 352530
    },
    {
      "epoch": 1.235564808097376,
      "grad_norm": 3.125,
      "learning_rate": 3.267482885114138e-05,
      "loss": 0.9876,
      "step": 352540
    },
    {
      "epoch": 1.2355998556042715,
      "grad_norm": 2.984375,
      "learning_rate": 3.267417982247768e-05,
      "loss": 0.8645,
      "step": 352550
    },
    {
      "epoch": 1.2356349031111673,
      "grad_norm": 2.859375,
      "learning_rate": 3.2673530793813976e-05,
      "loss": 0.824,
      "step": 352560
    },
    {
      "epoch": 1.2356699506180628,
      "grad_norm": 3.125,
      "learning_rate": 3.267288176515028e-05,
      "loss": 0.8506,
      "step": 352570
    },
    {
      "epoch": 1.2357049981249584,
      "grad_norm": 2.96875,
      "learning_rate": 3.267223273648657e-05,
      "loss": 0.7422,
      "step": 352580
    },
    {
      "epoch": 1.235740045631854,
      "grad_norm": 2.96875,
      "learning_rate": 3.267158370782287e-05,
      "loss": 0.8566,
      "step": 352590
    },
    {
      "epoch": 1.2357750931387497,
      "grad_norm": 2.4375,
      "learning_rate": 3.267093467915917e-05,
      "loss": 0.8023,
      "step": 352600
    },
    {
      "epoch": 1.2358101406456452,
      "grad_norm": 3.15625,
      "learning_rate": 3.267028565049547e-05,
      "loss": 0.7851,
      "step": 352610
    },
    {
      "epoch": 1.2358451881525407,
      "grad_norm": 3.671875,
      "learning_rate": 3.266963662183177e-05,
      "loss": 0.8185,
      "step": 352620
    },
    {
      "epoch": 1.2358802356594363,
      "grad_norm": 3.015625,
      "learning_rate": 3.2668987593168065e-05,
      "loss": 0.8691,
      "step": 352630
    },
    {
      "epoch": 1.235915283166332,
      "grad_norm": 2.703125,
      "learning_rate": 3.2668338564504367e-05,
      "loss": 0.8354,
      "step": 352640
    },
    {
      "epoch": 1.2359503306732276,
      "grad_norm": 2.890625,
      "learning_rate": 3.266768953584066e-05,
      "loss": 0.7798,
      "step": 352650
    },
    {
      "epoch": 1.235985378180123,
      "grad_norm": 2.375,
      "learning_rate": 3.266704050717696e-05,
      "loss": 0.7602,
      "step": 352660
    },
    {
      "epoch": 1.2360204256870189,
      "grad_norm": 2.65625,
      "learning_rate": 3.266639147851326e-05,
      "loss": 0.7911,
      "step": 352670
    },
    {
      "epoch": 1.2360554731939144,
      "grad_norm": 2.875,
      "learning_rate": 3.266574244984956e-05,
      "loss": 0.7716,
      "step": 352680
    },
    {
      "epoch": 1.23609052070081,
      "grad_norm": 3.71875,
      "learning_rate": 3.266509342118585e-05,
      "loss": 0.7868,
      "step": 352690
    },
    {
      "epoch": 1.2361255682077055,
      "grad_norm": 3.046875,
      "learning_rate": 3.2664444392522155e-05,
      "loss": 0.8919,
      "step": 352700
    },
    {
      "epoch": 1.2361606157146012,
      "grad_norm": 2.75,
      "learning_rate": 3.266379536385845e-05,
      "loss": 0.874,
      "step": 352710
    },
    {
      "epoch": 1.2361956632214968,
      "grad_norm": 2.859375,
      "learning_rate": 3.266314633519475e-05,
      "loss": 0.796,
      "step": 352720
    },
    {
      "epoch": 1.2362307107283923,
      "grad_norm": 2.90625,
      "learning_rate": 3.2662497306531045e-05,
      "loss": 0.8945,
      "step": 352730
    },
    {
      "epoch": 1.2362657582352878,
      "grad_norm": 3.03125,
      "learning_rate": 3.2661848277867347e-05,
      "loss": 0.8644,
      "step": 352740
    },
    {
      "epoch": 1.2363008057421836,
      "grad_norm": 2.984375,
      "learning_rate": 3.266119924920364e-05,
      "loss": 0.91,
      "step": 352750
    },
    {
      "epoch": 1.2363358532490791,
      "grad_norm": 3.265625,
      "learning_rate": 3.266055022053994e-05,
      "loss": 0.8377,
      "step": 352760
    },
    {
      "epoch": 1.2363709007559747,
      "grad_norm": 3.015625,
      "learning_rate": 3.2659901191876244e-05,
      "loss": 0.8446,
      "step": 352770
    },
    {
      "epoch": 1.2364059482628704,
      "grad_norm": 3.046875,
      "learning_rate": 3.265925216321254e-05,
      "loss": 0.9063,
      "step": 352780
    },
    {
      "epoch": 1.236440995769766,
      "grad_norm": 2.90625,
      "learning_rate": 3.265860313454884e-05,
      "loss": 0.8678,
      "step": 352790
    },
    {
      "epoch": 1.2364760432766615,
      "grad_norm": 2.515625,
      "learning_rate": 3.2657954105885135e-05,
      "loss": 0.8564,
      "step": 352800
    },
    {
      "epoch": 1.236511090783557,
      "grad_norm": 2.765625,
      "learning_rate": 3.2657305077221436e-05,
      "loss": 0.8367,
      "step": 352810
    },
    {
      "epoch": 1.2365461382904528,
      "grad_norm": 2.84375,
      "learning_rate": 3.265665604855773e-05,
      "loss": 0.7843,
      "step": 352820
    },
    {
      "epoch": 1.2365811857973483,
      "grad_norm": 3.046875,
      "learning_rate": 3.265600701989403e-05,
      "loss": 0.898,
      "step": 352830
    },
    {
      "epoch": 1.2366162333042439,
      "grad_norm": 2.75,
      "learning_rate": 3.2655357991230327e-05,
      "loss": 0.8964,
      "step": 352840
    },
    {
      "epoch": 1.2366512808111394,
      "grad_norm": 2.546875,
      "learning_rate": 3.265470896256663e-05,
      "loss": 0.8279,
      "step": 352850
    },
    {
      "epoch": 1.2366863283180352,
      "grad_norm": 2.984375,
      "learning_rate": 3.265405993390292e-05,
      "loss": 0.841,
      "step": 352860
    },
    {
      "epoch": 1.2367213758249307,
      "grad_norm": 3.046875,
      "learning_rate": 3.265341090523922e-05,
      "loss": 0.8403,
      "step": 352870
    },
    {
      "epoch": 1.2367564233318262,
      "grad_norm": 2.75,
      "learning_rate": 3.265276187657552e-05,
      "loss": 0.8174,
      "step": 352880
    },
    {
      "epoch": 1.236791470838722,
      "grad_norm": 2.890625,
      "learning_rate": 3.265211284791181e-05,
      "loss": 0.8261,
      "step": 352890
    },
    {
      "epoch": 1.2368265183456175,
      "grad_norm": 3.25,
      "learning_rate": 3.2651463819248115e-05,
      "loss": 0.8318,
      "step": 352900
    },
    {
      "epoch": 1.236861565852513,
      "grad_norm": 2.875,
      "learning_rate": 3.265081479058441e-05,
      "loss": 0.888,
      "step": 352910
    },
    {
      "epoch": 1.2368966133594086,
      "grad_norm": 2.75,
      "learning_rate": 3.265016576192071e-05,
      "loss": 0.8593,
      "step": 352920
    },
    {
      "epoch": 1.2369316608663043,
      "grad_norm": 3.25,
      "learning_rate": 3.2649516733257005e-05,
      "loss": 0.8585,
      "step": 352930
    },
    {
      "epoch": 1.2369667083731999,
      "grad_norm": 2.875,
      "learning_rate": 3.2648867704593307e-05,
      "loss": 0.9503,
      "step": 352940
    },
    {
      "epoch": 1.2370017558800954,
      "grad_norm": 3.40625,
      "learning_rate": 3.26482186759296e-05,
      "loss": 0.8215,
      "step": 352950
    },
    {
      "epoch": 1.237036803386991,
      "grad_norm": 2.765625,
      "learning_rate": 3.26475696472659e-05,
      "loss": 0.8935,
      "step": 352960
    },
    {
      "epoch": 1.2370718508938867,
      "grad_norm": 3.171875,
      "learning_rate": 3.26469206186022e-05,
      "loss": 0.8867,
      "step": 352970
    },
    {
      "epoch": 1.2371068984007823,
      "grad_norm": 2.890625,
      "learning_rate": 3.26462715899385e-05,
      "loss": 0.9326,
      "step": 352980
    },
    {
      "epoch": 1.2371419459076778,
      "grad_norm": 2.625,
      "learning_rate": 3.26456225612748e-05,
      "loss": 0.7793,
      "step": 352990
    },
    {
      "epoch": 1.2371769934145735,
      "grad_norm": 2.921875,
      "learning_rate": 3.2644973532611095e-05,
      "loss": 0.852,
      "step": 353000
    },
    {
      "epoch": 1.237212040921469,
      "grad_norm": 2.984375,
      "learning_rate": 3.2644324503947396e-05,
      "loss": 0.8499,
      "step": 353010
    },
    {
      "epoch": 1.2372470884283646,
      "grad_norm": 2.65625,
      "learning_rate": 3.264367547528369e-05,
      "loss": 0.8271,
      "step": 353020
    },
    {
      "epoch": 1.2372821359352602,
      "grad_norm": 2.734375,
      "learning_rate": 3.264302644661999e-05,
      "loss": 0.8321,
      "step": 353030
    },
    {
      "epoch": 1.237317183442156,
      "grad_norm": 3.4375,
      "learning_rate": 3.2642377417956287e-05,
      "loss": 0.9329,
      "step": 353040
    },
    {
      "epoch": 1.2373522309490514,
      "grad_norm": 2.859375,
      "learning_rate": 3.264172838929259e-05,
      "loss": 0.8785,
      "step": 353050
    },
    {
      "epoch": 1.237387278455947,
      "grad_norm": 3.078125,
      "learning_rate": 3.264107936062888e-05,
      "loss": 0.8147,
      "step": 353060
    },
    {
      "epoch": 1.2374223259628425,
      "grad_norm": 2.6875,
      "learning_rate": 3.2640430331965184e-05,
      "loss": 0.7917,
      "step": 353070
    },
    {
      "epoch": 1.2374573734697383,
      "grad_norm": 2.875,
      "learning_rate": 3.263978130330148e-05,
      "loss": 0.9004,
      "step": 353080
    },
    {
      "epoch": 1.2374924209766338,
      "grad_norm": 2.734375,
      "learning_rate": 3.263913227463778e-05,
      "loss": 0.8969,
      "step": 353090
    },
    {
      "epoch": 1.2375274684835293,
      "grad_norm": 2.421875,
      "learning_rate": 3.2638483245974075e-05,
      "loss": 0.7834,
      "step": 353100
    },
    {
      "epoch": 1.237562515990425,
      "grad_norm": 3.15625,
      "learning_rate": 3.2637834217310376e-05,
      "loss": 0.8345,
      "step": 353110
    },
    {
      "epoch": 1.2375975634973206,
      "grad_norm": 3.46875,
      "learning_rate": 3.263718518864667e-05,
      "loss": 0.8674,
      "step": 353120
    },
    {
      "epoch": 1.2376326110042162,
      "grad_norm": 3.046875,
      "learning_rate": 3.263653615998297e-05,
      "loss": 0.8036,
      "step": 353130
    },
    {
      "epoch": 1.237667658511112,
      "grad_norm": 2.8125,
      "learning_rate": 3.263588713131927e-05,
      "loss": 0.7873,
      "step": 353140
    },
    {
      "epoch": 1.2377027060180075,
      "grad_norm": 2.6875,
      "learning_rate": 3.263523810265557e-05,
      "loss": 0.8688,
      "step": 353150
    },
    {
      "epoch": 1.237737753524903,
      "grad_norm": 2.8125,
      "learning_rate": 3.263458907399187e-05,
      "loss": 0.7774,
      "step": 353160
    },
    {
      "epoch": 1.2377728010317985,
      "grad_norm": 3.296875,
      "learning_rate": 3.2633940045328164e-05,
      "loss": 0.7975,
      "step": 353170
    },
    {
      "epoch": 1.237807848538694,
      "grad_norm": 3.234375,
      "learning_rate": 3.2633291016664465e-05,
      "loss": 0.9087,
      "step": 353180
    },
    {
      "epoch": 1.2378428960455898,
      "grad_norm": 2.90625,
      "learning_rate": 3.263264198800076e-05,
      "loss": 0.813,
      "step": 353190
    },
    {
      "epoch": 1.2378779435524854,
      "grad_norm": 2.921875,
      "learning_rate": 3.263199295933706e-05,
      "loss": 0.8268,
      "step": 353200
    },
    {
      "epoch": 1.237912991059381,
      "grad_norm": 2.875,
      "learning_rate": 3.2631343930673356e-05,
      "loss": 0.8083,
      "step": 353210
    },
    {
      "epoch": 1.2379480385662767,
      "grad_norm": 2.703125,
      "learning_rate": 3.263069490200966e-05,
      "loss": 0.8114,
      "step": 353220
    },
    {
      "epoch": 1.2379830860731722,
      "grad_norm": 2.8125,
      "learning_rate": 3.263004587334595e-05,
      "loss": 0.7208,
      "step": 353230
    },
    {
      "epoch": 1.2380181335800677,
      "grad_norm": 2.953125,
      "learning_rate": 3.262939684468225e-05,
      "loss": 0.7718,
      "step": 353240
    },
    {
      "epoch": 1.2380531810869635,
      "grad_norm": 3.125,
      "learning_rate": 3.262874781601855e-05,
      "loss": 0.8716,
      "step": 353250
    },
    {
      "epoch": 1.238088228593859,
      "grad_norm": 2.5,
      "learning_rate": 3.262809878735484e-05,
      "loss": 0.874,
      "step": 353260
    },
    {
      "epoch": 1.2381232761007546,
      "grad_norm": 2.734375,
      "learning_rate": 3.2627449758691144e-05,
      "loss": 0.8772,
      "step": 353270
    },
    {
      "epoch": 1.23815832360765,
      "grad_norm": 3.078125,
      "learning_rate": 3.262680073002744e-05,
      "loss": 0.8465,
      "step": 353280
    },
    {
      "epoch": 1.2381933711145459,
      "grad_norm": 3.03125,
      "learning_rate": 3.262615170136374e-05,
      "loss": 0.8953,
      "step": 353290
    },
    {
      "epoch": 1.2382284186214414,
      "grad_norm": 2.71875,
      "learning_rate": 3.2625502672700035e-05,
      "loss": 0.8648,
      "step": 353300
    },
    {
      "epoch": 1.238263466128337,
      "grad_norm": 2.765625,
      "learning_rate": 3.2624853644036336e-05,
      "loss": 0.9191,
      "step": 353310
    },
    {
      "epoch": 1.2382985136352325,
      "grad_norm": 2.5625,
      "learning_rate": 3.262420461537263e-05,
      "loss": 0.9094,
      "step": 353320
    },
    {
      "epoch": 1.2383335611421282,
      "grad_norm": 2.96875,
      "learning_rate": 3.262355558670893e-05,
      "loss": 0.9048,
      "step": 353330
    },
    {
      "epoch": 1.2383686086490238,
      "grad_norm": 2.6875,
      "learning_rate": 3.2622906558045227e-05,
      "loss": 0.851,
      "step": 353340
    },
    {
      "epoch": 1.2384036561559193,
      "grad_norm": 2.875,
      "learning_rate": 3.262225752938153e-05,
      "loss": 0.8457,
      "step": 353350
    },
    {
      "epoch": 1.238438703662815,
      "grad_norm": 3.0,
      "learning_rate": 3.262160850071783e-05,
      "loss": 0.7775,
      "step": 353360
    },
    {
      "epoch": 1.2384737511697106,
      "grad_norm": 2.859375,
      "learning_rate": 3.2620959472054124e-05,
      "loss": 0.9406,
      "step": 353370
    },
    {
      "epoch": 1.2385087986766061,
      "grad_norm": 2.84375,
      "learning_rate": 3.2620310443390425e-05,
      "loss": 0.8778,
      "step": 353380
    },
    {
      "epoch": 1.2385438461835017,
      "grad_norm": 2.5625,
      "learning_rate": 3.261966141472672e-05,
      "loss": 0.8794,
      "step": 353390
    },
    {
      "epoch": 1.2385788936903974,
      "grad_norm": 2.5625,
      "learning_rate": 3.261901238606302e-05,
      "loss": 0.8231,
      "step": 353400
    },
    {
      "epoch": 1.238613941197293,
      "grad_norm": 2.6875,
      "learning_rate": 3.2618363357399316e-05,
      "loss": 0.8278,
      "step": 353410
    },
    {
      "epoch": 1.2386489887041885,
      "grad_norm": 3.296875,
      "learning_rate": 3.261771432873562e-05,
      "loss": 0.9188,
      "step": 353420
    },
    {
      "epoch": 1.238684036211084,
      "grad_norm": 2.609375,
      "learning_rate": 3.261706530007191e-05,
      "loss": 0.8541,
      "step": 353430
    },
    {
      "epoch": 1.2387190837179798,
      "grad_norm": 3.046875,
      "learning_rate": 3.261641627140821e-05,
      "loss": 0.8844,
      "step": 353440
    },
    {
      "epoch": 1.2387541312248753,
      "grad_norm": 2.765625,
      "learning_rate": 3.261576724274451e-05,
      "loss": 0.8568,
      "step": 353450
    },
    {
      "epoch": 1.2387891787317709,
      "grad_norm": 3.203125,
      "learning_rate": 3.261511821408081e-05,
      "loss": 0.8754,
      "step": 353460
    },
    {
      "epoch": 1.2388242262386666,
      "grad_norm": 3.1875,
      "learning_rate": 3.2614469185417104e-05,
      "loss": 0.8937,
      "step": 353470
    },
    {
      "epoch": 1.2388592737455622,
      "grad_norm": 2.953125,
      "learning_rate": 3.2613820156753405e-05,
      "loss": 0.8753,
      "step": 353480
    },
    {
      "epoch": 1.2388943212524577,
      "grad_norm": 2.9375,
      "learning_rate": 3.261317112808971e-05,
      "loss": 0.8093,
      "step": 353490
    },
    {
      "epoch": 1.2389293687593532,
      "grad_norm": 2.703125,
      "learning_rate": 3.2612522099426e-05,
      "loss": 0.8263,
      "step": 353500
    },
    {
      "epoch": 1.238964416266249,
      "grad_norm": 3.328125,
      "learning_rate": 3.26118730707623e-05,
      "loss": 0.8217,
      "step": 353510
    },
    {
      "epoch": 1.2389994637731445,
      "grad_norm": 2.9375,
      "learning_rate": 3.26112240420986e-05,
      "loss": 0.8737,
      "step": 353520
    },
    {
      "epoch": 1.23903451128004,
      "grad_norm": 3.078125,
      "learning_rate": 3.26105750134349e-05,
      "loss": 0.8677,
      "step": 353530
    },
    {
      "epoch": 1.2390695587869356,
      "grad_norm": 2.453125,
      "learning_rate": 3.260992598477119e-05,
      "loss": 0.7862,
      "step": 353540
    },
    {
      "epoch": 1.2391046062938313,
      "grad_norm": 3.046875,
      "learning_rate": 3.2609276956107495e-05,
      "loss": 0.8804,
      "step": 353550
    },
    {
      "epoch": 1.2391396538007269,
      "grad_norm": 3.4375,
      "learning_rate": 3.260862792744379e-05,
      "loss": 0.9444,
      "step": 353560
    },
    {
      "epoch": 1.2391747013076224,
      "grad_norm": 3.296875,
      "learning_rate": 3.260797889878009e-05,
      "loss": 0.8774,
      "step": 353570
    },
    {
      "epoch": 1.2392097488145182,
      "grad_norm": 3.171875,
      "learning_rate": 3.2607329870116385e-05,
      "loss": 0.94,
      "step": 353580
    },
    {
      "epoch": 1.2392447963214137,
      "grad_norm": 3.4375,
      "learning_rate": 3.260668084145269e-05,
      "loss": 0.8876,
      "step": 353590
    },
    {
      "epoch": 1.2392798438283092,
      "grad_norm": 3.015625,
      "learning_rate": 3.260603181278898e-05,
      "loss": 0.8963,
      "step": 353600
    },
    {
      "epoch": 1.2393148913352048,
      "grad_norm": 2.75,
      "learning_rate": 3.260538278412528e-05,
      "loss": 0.8051,
      "step": 353610
    },
    {
      "epoch": 1.2393499388421005,
      "grad_norm": 2.71875,
      "learning_rate": 3.260473375546158e-05,
      "loss": 0.823,
      "step": 353620
    },
    {
      "epoch": 1.239384986348996,
      "grad_norm": 3.0625,
      "learning_rate": 3.260408472679787e-05,
      "loss": 0.8609,
      "step": 353630
    },
    {
      "epoch": 1.2394200338558916,
      "grad_norm": 3.328125,
      "learning_rate": 3.260343569813417e-05,
      "loss": 0.9061,
      "step": 353640
    },
    {
      "epoch": 1.2394550813627871,
      "grad_norm": 2.265625,
      "learning_rate": 3.260278666947047e-05,
      "loss": 0.8393,
      "step": 353650
    },
    {
      "epoch": 1.239490128869683,
      "grad_norm": 2.6875,
      "learning_rate": 3.260213764080677e-05,
      "loss": 0.9194,
      "step": 353660
    },
    {
      "epoch": 1.2395251763765784,
      "grad_norm": 2.921875,
      "learning_rate": 3.2601488612143064e-05,
      "loss": 0.7888,
      "step": 353670
    },
    {
      "epoch": 1.239560223883474,
      "grad_norm": 2.921875,
      "learning_rate": 3.2600839583479365e-05,
      "loss": 0.8267,
      "step": 353680
    },
    {
      "epoch": 1.2395952713903697,
      "grad_norm": 2.84375,
      "learning_rate": 3.260019055481566e-05,
      "loss": 0.7959,
      "step": 353690
    },
    {
      "epoch": 1.2396303188972653,
      "grad_norm": 3.28125,
      "learning_rate": 3.259954152615196e-05,
      "loss": 0.9318,
      "step": 353700
    },
    {
      "epoch": 1.2396653664041608,
      "grad_norm": 3.4375,
      "learning_rate": 3.2598892497488256e-05,
      "loss": 0.8792,
      "step": 353710
    },
    {
      "epoch": 1.2397004139110563,
      "grad_norm": 2.859375,
      "learning_rate": 3.259824346882456e-05,
      "loss": 0.8417,
      "step": 353720
    },
    {
      "epoch": 1.239735461417952,
      "grad_norm": 3.234375,
      "learning_rate": 3.259759444016086e-05,
      "loss": 0.8474,
      "step": 353730
    },
    {
      "epoch": 1.2397705089248476,
      "grad_norm": 2.8125,
      "learning_rate": 3.259694541149715e-05,
      "loss": 0.8017,
      "step": 353740
    },
    {
      "epoch": 1.2398055564317432,
      "grad_norm": 2.859375,
      "learning_rate": 3.2596296382833455e-05,
      "loss": 0.8698,
      "step": 353750
    },
    {
      "epoch": 1.2398406039386387,
      "grad_norm": 2.96875,
      "learning_rate": 3.259564735416975e-05,
      "loss": 0.8499,
      "step": 353760
    },
    {
      "epoch": 1.2398756514455345,
      "grad_norm": 3.046875,
      "learning_rate": 3.259499832550605e-05,
      "loss": 0.8769,
      "step": 353770
    },
    {
      "epoch": 1.23991069895243,
      "grad_norm": 2.65625,
      "learning_rate": 3.2594349296842345e-05,
      "loss": 0.8324,
      "step": 353780
    },
    {
      "epoch": 1.2399457464593255,
      "grad_norm": 2.71875,
      "learning_rate": 3.259370026817865e-05,
      "loss": 0.8591,
      "step": 353790
    },
    {
      "epoch": 1.2399807939662213,
      "grad_norm": 3.171875,
      "learning_rate": 3.259305123951494e-05,
      "loss": 0.9128,
      "step": 353800
    },
    {
      "epoch": 1.2400158414731168,
      "grad_norm": 3.21875,
      "learning_rate": 3.259240221085124e-05,
      "loss": 0.8615,
      "step": 353810
    },
    {
      "epoch": 1.2400508889800124,
      "grad_norm": 3.03125,
      "learning_rate": 3.259175318218754e-05,
      "loss": 0.8461,
      "step": 353820
    },
    {
      "epoch": 1.2400859364869081,
      "grad_norm": 2.90625,
      "learning_rate": 3.259110415352384e-05,
      "loss": 0.8959,
      "step": 353830
    },
    {
      "epoch": 1.2401209839938037,
      "grad_norm": 2.765625,
      "learning_rate": 3.259045512486013e-05,
      "loss": 0.9237,
      "step": 353840
    },
    {
      "epoch": 1.2401560315006992,
      "grad_norm": 3.4375,
      "learning_rate": 3.2589806096196435e-05,
      "loss": 0.922,
      "step": 353850
    },
    {
      "epoch": 1.2401910790075947,
      "grad_norm": 2.515625,
      "learning_rate": 3.2589157067532736e-05,
      "loss": 0.8776,
      "step": 353860
    },
    {
      "epoch": 1.2402261265144903,
      "grad_norm": 2.8125,
      "learning_rate": 3.258850803886903e-05,
      "loss": 0.9137,
      "step": 353870
    },
    {
      "epoch": 1.240261174021386,
      "grad_norm": 2.765625,
      "learning_rate": 3.258785901020533e-05,
      "loss": 0.804,
      "step": 353880
    },
    {
      "epoch": 1.2402962215282816,
      "grad_norm": 2.921875,
      "learning_rate": 3.258720998154163e-05,
      "loss": 0.8538,
      "step": 353890
    },
    {
      "epoch": 1.240331269035177,
      "grad_norm": 2.59375,
      "learning_rate": 3.258656095287793e-05,
      "loss": 0.84,
      "step": 353900
    },
    {
      "epoch": 1.2403663165420729,
      "grad_norm": 3.203125,
      "learning_rate": 3.258591192421422e-05,
      "loss": 0.871,
      "step": 353910
    },
    {
      "epoch": 1.2404013640489684,
      "grad_norm": 3.015625,
      "learning_rate": 3.2585262895550524e-05,
      "loss": 0.9162,
      "step": 353920
    },
    {
      "epoch": 1.240436411555864,
      "grad_norm": 2.75,
      "learning_rate": 3.258461386688682e-05,
      "loss": 0.8549,
      "step": 353930
    },
    {
      "epoch": 1.2404714590627597,
      "grad_norm": 2.8125,
      "learning_rate": 3.258396483822312e-05,
      "loss": 0.7972,
      "step": 353940
    },
    {
      "epoch": 1.2405065065696552,
      "grad_norm": 2.875,
      "learning_rate": 3.2583315809559415e-05,
      "loss": 0.942,
      "step": 353950
    },
    {
      "epoch": 1.2405415540765508,
      "grad_norm": 3.390625,
      "learning_rate": 3.2582666780895716e-05,
      "loss": 0.8392,
      "step": 353960
    },
    {
      "epoch": 1.2405766015834463,
      "grad_norm": 3.25,
      "learning_rate": 3.258201775223201e-05,
      "loss": 0.8637,
      "step": 353970
    },
    {
      "epoch": 1.240611649090342,
      "grad_norm": 2.953125,
      "learning_rate": 3.258136872356831e-05,
      "loss": 0.9429,
      "step": 353980
    },
    {
      "epoch": 1.2406466965972376,
      "grad_norm": 2.828125,
      "learning_rate": 3.258071969490461e-05,
      "loss": 0.8051,
      "step": 353990
    },
    {
      "epoch": 1.2406817441041331,
      "grad_norm": 2.890625,
      "learning_rate": 3.25800706662409e-05,
      "loss": 0.7905,
      "step": 354000
    },
    {
      "epoch": 1.2407167916110287,
      "grad_norm": 3.625,
      "learning_rate": 3.25794216375772e-05,
      "loss": 0.8322,
      "step": 354010
    },
    {
      "epoch": 1.2407518391179244,
      "grad_norm": 2.984375,
      "learning_rate": 3.25787726089135e-05,
      "loss": 0.8477,
      "step": 354020
    },
    {
      "epoch": 1.24078688662482,
      "grad_norm": 2.890625,
      "learning_rate": 3.25781235802498e-05,
      "loss": 0.8419,
      "step": 354030
    },
    {
      "epoch": 1.2408219341317155,
      "grad_norm": 2.890625,
      "learning_rate": 3.257747455158609e-05,
      "loss": 0.8216,
      "step": 354040
    },
    {
      "epoch": 1.2408569816386112,
      "grad_norm": 3.078125,
      "learning_rate": 3.2576825522922395e-05,
      "loss": 0.8194,
      "step": 354050
    },
    {
      "epoch": 1.2408920291455068,
      "grad_norm": 2.90625,
      "learning_rate": 3.257617649425869e-05,
      "loss": 0.8342,
      "step": 354060
    },
    {
      "epoch": 1.2409270766524023,
      "grad_norm": 3.0,
      "learning_rate": 3.257552746559499e-05,
      "loss": 0.8561,
      "step": 354070
    },
    {
      "epoch": 1.2409621241592979,
      "grad_norm": 2.875,
      "learning_rate": 3.2574878436931285e-05,
      "loss": 0.8391,
      "step": 354080
    },
    {
      "epoch": 1.2409971716661936,
      "grad_norm": 2.9375,
      "learning_rate": 3.257422940826759e-05,
      "loss": 0.7909,
      "step": 354090
    },
    {
      "epoch": 1.2410322191730891,
      "grad_norm": 2.625,
      "learning_rate": 3.257358037960389e-05,
      "loss": 0.9233,
      "step": 354100
    },
    {
      "epoch": 1.2410672666799847,
      "grad_norm": 3.125,
      "learning_rate": 3.257293135094018e-05,
      "loss": 0.906,
      "step": 354110
    },
    {
      "epoch": 1.2411023141868802,
      "grad_norm": 2.53125,
      "learning_rate": 3.2572282322276484e-05,
      "loss": 0.8197,
      "step": 354120
    },
    {
      "epoch": 1.241137361693776,
      "grad_norm": 3.25,
      "learning_rate": 3.257163329361278e-05,
      "loss": 0.9074,
      "step": 354130
    },
    {
      "epoch": 1.2411724092006715,
      "grad_norm": 3.15625,
      "learning_rate": 3.257098426494908e-05,
      "loss": 0.8814,
      "step": 354140
    },
    {
      "epoch": 1.241207456707567,
      "grad_norm": 2.6875,
      "learning_rate": 3.2570335236285375e-05,
      "loss": 0.8467,
      "step": 354150
    },
    {
      "epoch": 1.2412425042144628,
      "grad_norm": 3.234375,
      "learning_rate": 3.2569686207621676e-05,
      "loss": 0.8246,
      "step": 354160
    },
    {
      "epoch": 1.2412775517213583,
      "grad_norm": 2.640625,
      "learning_rate": 3.256903717895797e-05,
      "loss": 0.7518,
      "step": 354170
    },
    {
      "epoch": 1.2413125992282539,
      "grad_norm": 2.875,
      "learning_rate": 3.256838815029427e-05,
      "loss": 0.8245,
      "step": 354180
    },
    {
      "epoch": 1.2413476467351494,
      "grad_norm": 2.953125,
      "learning_rate": 3.256773912163057e-05,
      "loss": 0.8781,
      "step": 354190
    },
    {
      "epoch": 1.2413826942420452,
      "grad_norm": 3.03125,
      "learning_rate": 3.256709009296687e-05,
      "loss": 0.9112,
      "step": 354200
    },
    {
      "epoch": 1.2414177417489407,
      "grad_norm": 3.046875,
      "learning_rate": 3.256644106430316e-05,
      "loss": 0.9016,
      "step": 354210
    },
    {
      "epoch": 1.2414527892558362,
      "grad_norm": 3.109375,
      "learning_rate": 3.2565792035639464e-05,
      "loss": 0.8604,
      "step": 354220
    },
    {
      "epoch": 1.2414878367627318,
      "grad_norm": 2.953125,
      "learning_rate": 3.2565143006975766e-05,
      "loss": 0.8328,
      "step": 354230
    },
    {
      "epoch": 1.2415228842696275,
      "grad_norm": 2.859375,
      "learning_rate": 3.256449397831206e-05,
      "loss": 0.8522,
      "step": 354240
    },
    {
      "epoch": 1.241557931776523,
      "grad_norm": 3.296875,
      "learning_rate": 3.256384494964836e-05,
      "loss": 0.8435,
      "step": 354250
    },
    {
      "epoch": 1.2415929792834186,
      "grad_norm": 2.984375,
      "learning_rate": 3.2563195920984656e-05,
      "loss": 0.8577,
      "step": 354260
    },
    {
      "epoch": 1.2416280267903144,
      "grad_norm": 2.609375,
      "learning_rate": 3.256254689232096e-05,
      "loss": 0.7692,
      "step": 354270
    },
    {
      "epoch": 1.24166307429721,
      "grad_norm": 2.609375,
      "learning_rate": 3.256189786365725e-05,
      "loss": 0.8736,
      "step": 354280
    },
    {
      "epoch": 1.2416981218041054,
      "grad_norm": 3.109375,
      "learning_rate": 3.2561248834993554e-05,
      "loss": 0.9251,
      "step": 354290
    },
    {
      "epoch": 1.241733169311001,
      "grad_norm": 2.96875,
      "learning_rate": 3.256059980632985e-05,
      "loss": 0.8678,
      "step": 354300
    },
    {
      "epoch": 1.2417682168178967,
      "grad_norm": 3.03125,
      "learning_rate": 3.255995077766615e-05,
      "loss": 0.7922,
      "step": 354310
    },
    {
      "epoch": 1.2418032643247923,
      "grad_norm": 3.34375,
      "learning_rate": 3.2559301749002444e-05,
      "loss": 0.8892,
      "step": 354320
    },
    {
      "epoch": 1.2418383118316878,
      "grad_norm": 2.390625,
      "learning_rate": 3.2558652720338746e-05,
      "loss": 0.8823,
      "step": 354330
    },
    {
      "epoch": 1.2418733593385833,
      "grad_norm": 2.890625,
      "learning_rate": 3.255800369167504e-05,
      "loss": 0.8767,
      "step": 354340
    },
    {
      "epoch": 1.241908406845479,
      "grad_norm": 3.1875,
      "learning_rate": 3.255735466301134e-05,
      "loss": 0.9446,
      "step": 354350
    },
    {
      "epoch": 1.2419434543523746,
      "grad_norm": 2.84375,
      "learning_rate": 3.255670563434764e-05,
      "loss": 0.8579,
      "step": 354360
    },
    {
      "epoch": 1.2419785018592702,
      "grad_norm": 2.859375,
      "learning_rate": 3.255605660568393e-05,
      "loss": 0.7895,
      "step": 354370
    },
    {
      "epoch": 1.242013549366166,
      "grad_norm": 2.984375,
      "learning_rate": 3.255540757702023e-05,
      "loss": 0.8176,
      "step": 354380
    },
    {
      "epoch": 1.2420485968730615,
      "grad_norm": 2.90625,
      "learning_rate": 3.255475854835653e-05,
      "loss": 0.9447,
      "step": 354390
    },
    {
      "epoch": 1.242083644379957,
      "grad_norm": 2.8125,
      "learning_rate": 3.255410951969283e-05,
      "loss": 0.8339,
      "step": 354400
    },
    {
      "epoch": 1.2421186918868525,
      "grad_norm": 2.890625,
      "learning_rate": 3.255346049102912e-05,
      "loss": 0.8722,
      "step": 354410
    },
    {
      "epoch": 1.2421537393937483,
      "grad_norm": 2.875,
      "learning_rate": 3.2552811462365424e-05,
      "loss": 0.8333,
      "step": 354420
    },
    {
      "epoch": 1.2421887869006438,
      "grad_norm": 3.25,
      "learning_rate": 3.255216243370172e-05,
      "loss": 0.917,
      "step": 354430
    },
    {
      "epoch": 1.2422238344075394,
      "grad_norm": 2.84375,
      "learning_rate": 3.255151340503802e-05,
      "loss": 0.8172,
      "step": 354440
    },
    {
      "epoch": 1.242258881914435,
      "grad_norm": 2.453125,
      "learning_rate": 3.255086437637432e-05,
      "loss": 0.8557,
      "step": 354450
    },
    {
      "epoch": 1.2422939294213307,
      "grad_norm": 2.796875,
      "learning_rate": 3.2550215347710616e-05,
      "loss": 0.8371,
      "step": 354460
    },
    {
      "epoch": 1.2423289769282262,
      "grad_norm": 2.796875,
      "learning_rate": 3.254956631904692e-05,
      "loss": 0.8052,
      "step": 354470
    },
    {
      "epoch": 1.2423640244351217,
      "grad_norm": 2.78125,
      "learning_rate": 3.254891729038321e-05,
      "loss": 0.9275,
      "step": 354480
    },
    {
      "epoch": 1.2423990719420175,
      "grad_norm": 2.671875,
      "learning_rate": 3.2548268261719514e-05,
      "loss": 0.8309,
      "step": 354490
    },
    {
      "epoch": 1.242434119448913,
      "grad_norm": 2.953125,
      "learning_rate": 3.254761923305581e-05,
      "loss": 0.8344,
      "step": 354500
    },
    {
      "epoch": 1.2424691669558086,
      "grad_norm": 2.984375,
      "learning_rate": 3.254697020439211e-05,
      "loss": 0.8532,
      "step": 354510
    },
    {
      "epoch": 1.2425042144627043,
      "grad_norm": 2.828125,
      "learning_rate": 3.2546321175728404e-05,
      "loss": 0.8681,
      "step": 354520
    },
    {
      "epoch": 1.2425392619695999,
      "grad_norm": 2.6875,
      "learning_rate": 3.2545672147064706e-05,
      "loss": 0.8629,
      "step": 354530
    },
    {
      "epoch": 1.2425743094764954,
      "grad_norm": 3.21875,
      "learning_rate": 3.2545023118401e-05,
      "loss": 0.8629,
      "step": 354540
    },
    {
      "epoch": 1.242609356983391,
      "grad_norm": 3.078125,
      "learning_rate": 3.25443740897373e-05,
      "loss": 0.7721,
      "step": 354550
    },
    {
      "epoch": 1.2426444044902867,
      "grad_norm": 2.625,
      "learning_rate": 3.2543725061073596e-05,
      "loss": 0.8317,
      "step": 354560
    },
    {
      "epoch": 1.2426794519971822,
      "grad_norm": 3.078125,
      "learning_rate": 3.25430760324099e-05,
      "loss": 0.8313,
      "step": 354570
    },
    {
      "epoch": 1.2427144995040778,
      "grad_norm": 2.703125,
      "learning_rate": 3.254242700374619e-05,
      "loss": 0.8426,
      "step": 354580
    },
    {
      "epoch": 1.2427495470109733,
      "grad_norm": 2.4375,
      "learning_rate": 3.2541777975082494e-05,
      "loss": 0.8441,
      "step": 354590
    },
    {
      "epoch": 1.242784594517869,
      "grad_norm": 2.859375,
      "learning_rate": 3.2541128946418795e-05,
      "loss": 0.8237,
      "step": 354600
    },
    {
      "epoch": 1.2428196420247646,
      "grad_norm": 2.890625,
      "learning_rate": 3.254047991775509e-05,
      "loss": 0.7991,
      "step": 354610
    },
    {
      "epoch": 1.2428546895316601,
      "grad_norm": 2.890625,
      "learning_rate": 3.253983088909139e-05,
      "loss": 0.8631,
      "step": 354620
    },
    {
      "epoch": 1.2428897370385559,
      "grad_norm": 2.921875,
      "learning_rate": 3.2539181860427686e-05,
      "loss": 0.8724,
      "step": 354630
    },
    {
      "epoch": 1.2429247845454514,
      "grad_norm": 3.0,
      "learning_rate": 3.253853283176399e-05,
      "loss": 0.8938,
      "step": 354640
    },
    {
      "epoch": 1.242959832052347,
      "grad_norm": 2.78125,
      "learning_rate": 3.253788380310028e-05,
      "loss": 0.7774,
      "step": 354650
    },
    {
      "epoch": 1.2429948795592425,
      "grad_norm": 3.21875,
      "learning_rate": 3.253723477443658e-05,
      "loss": 0.8409,
      "step": 354660
    },
    {
      "epoch": 1.2430299270661382,
      "grad_norm": 3.265625,
      "learning_rate": 3.253658574577288e-05,
      "loss": 0.9479,
      "step": 354670
    },
    {
      "epoch": 1.2430649745730338,
      "grad_norm": 2.625,
      "learning_rate": 3.253593671710918e-05,
      "loss": 0.8126,
      "step": 354680
    },
    {
      "epoch": 1.2431000220799293,
      "grad_norm": 3.75,
      "learning_rate": 3.2535287688445474e-05,
      "loss": 0.8227,
      "step": 354690
    },
    {
      "epoch": 1.2431350695868248,
      "grad_norm": 2.9375,
      "learning_rate": 3.2534638659781775e-05,
      "loss": 0.939,
      "step": 354700
    },
    {
      "epoch": 1.2431701170937206,
      "grad_norm": 3.015625,
      "learning_rate": 3.253398963111807e-05,
      "loss": 0.844,
      "step": 354710
    },
    {
      "epoch": 1.2432051646006161,
      "grad_norm": 2.796875,
      "learning_rate": 3.253334060245437e-05,
      "loss": 0.8767,
      "step": 354720
    },
    {
      "epoch": 1.2432402121075117,
      "grad_norm": 2.640625,
      "learning_rate": 3.253269157379067e-05,
      "loss": 0.8503,
      "step": 354730
    },
    {
      "epoch": 1.2432752596144074,
      "grad_norm": 2.53125,
      "learning_rate": 3.253204254512697e-05,
      "loss": 0.7607,
      "step": 354740
    },
    {
      "epoch": 1.243310307121303,
      "grad_norm": 3.03125,
      "learning_rate": 3.253139351646326e-05,
      "loss": 0.8719,
      "step": 354750
    },
    {
      "epoch": 1.2433453546281985,
      "grad_norm": 3.125,
      "learning_rate": 3.2530744487799556e-05,
      "loss": 0.8634,
      "step": 354760
    },
    {
      "epoch": 1.243380402135094,
      "grad_norm": 2.65625,
      "learning_rate": 3.253009545913586e-05,
      "loss": 0.8535,
      "step": 354770
    },
    {
      "epoch": 1.2434154496419898,
      "grad_norm": 2.84375,
      "learning_rate": 3.252944643047215e-05,
      "loss": 0.8107,
      "step": 354780
    },
    {
      "epoch": 1.2434504971488853,
      "grad_norm": 2.640625,
      "learning_rate": 3.2528797401808454e-05,
      "loss": 0.8008,
      "step": 354790
    },
    {
      "epoch": 1.2434855446557809,
      "grad_norm": 2.84375,
      "learning_rate": 3.252814837314475e-05,
      "loss": 0.8603,
      "step": 354800
    },
    {
      "epoch": 1.2435205921626764,
      "grad_norm": 3.484375,
      "learning_rate": 3.252749934448105e-05,
      "loss": 0.888,
      "step": 354810
    },
    {
      "epoch": 1.2435556396695722,
      "grad_norm": 2.734375,
      "learning_rate": 3.252685031581735e-05,
      "loss": 0.8427,
      "step": 354820
    },
    {
      "epoch": 1.2435906871764677,
      "grad_norm": 2.953125,
      "learning_rate": 3.2526201287153646e-05,
      "loss": 0.8784,
      "step": 354830
    },
    {
      "epoch": 1.2436257346833632,
      "grad_norm": 3.0,
      "learning_rate": 3.252555225848995e-05,
      "loss": 0.8779,
      "step": 354840
    },
    {
      "epoch": 1.243660782190259,
      "grad_norm": 2.84375,
      "learning_rate": 3.252490322982624e-05,
      "loss": 0.8401,
      "step": 354850
    },
    {
      "epoch": 1.2436958296971545,
      "grad_norm": 2.703125,
      "learning_rate": 3.252425420116254e-05,
      "loss": 0.9001,
      "step": 354860
    },
    {
      "epoch": 1.24373087720405,
      "grad_norm": 2.953125,
      "learning_rate": 3.252360517249884e-05,
      "loss": 0.8306,
      "step": 354870
    },
    {
      "epoch": 1.2437659247109456,
      "grad_norm": 2.921875,
      "learning_rate": 3.252295614383514e-05,
      "loss": 0.858,
      "step": 354880
    },
    {
      "epoch": 1.2438009722178414,
      "grad_norm": 2.53125,
      "learning_rate": 3.2522307115171434e-05,
      "loss": 0.8369,
      "step": 354890
    },
    {
      "epoch": 1.243836019724737,
      "grad_norm": 2.890625,
      "learning_rate": 3.2521658086507735e-05,
      "loss": 0.8996,
      "step": 354900
    },
    {
      "epoch": 1.2438710672316324,
      "grad_norm": 2.515625,
      "learning_rate": 3.252100905784403e-05,
      "loss": 0.7991,
      "step": 354910
    },
    {
      "epoch": 1.243906114738528,
      "grad_norm": 3.125,
      "learning_rate": 3.252036002918033e-05,
      "loss": 0.8504,
      "step": 354920
    },
    {
      "epoch": 1.2439411622454237,
      "grad_norm": 3.390625,
      "learning_rate": 3.2519711000516626e-05,
      "loss": 0.8129,
      "step": 354930
    },
    {
      "epoch": 1.2439762097523193,
      "grad_norm": 2.921875,
      "learning_rate": 3.251906197185293e-05,
      "loss": 0.8827,
      "step": 354940
    },
    {
      "epoch": 1.2440112572592148,
      "grad_norm": 3.0,
      "learning_rate": 3.251841294318922e-05,
      "loss": 0.8187,
      "step": 354950
    },
    {
      "epoch": 1.2440463047661106,
      "grad_norm": 2.609375,
      "learning_rate": 3.251776391452552e-05,
      "loss": 0.7695,
      "step": 354960
    },
    {
      "epoch": 1.244081352273006,
      "grad_norm": 2.59375,
      "learning_rate": 3.2517114885861824e-05,
      "loss": 0.8371,
      "step": 354970
    },
    {
      "epoch": 1.2441163997799016,
      "grad_norm": 3.0625,
      "learning_rate": 3.251646585719812e-05,
      "loss": 0.8725,
      "step": 354980
    },
    {
      "epoch": 1.2441514472867972,
      "grad_norm": 2.984375,
      "learning_rate": 3.251581682853442e-05,
      "loss": 0.9362,
      "step": 354990
    },
    {
      "epoch": 1.244186494793693,
      "grad_norm": 2.703125,
      "learning_rate": 3.2515167799870715e-05,
      "loss": 0.8303,
      "step": 355000
    },
    {
      "epoch": 1.244186494793693,
      "eval_loss": 0.8035743832588196,
      "eval_runtime": 553.1415,
      "eval_samples_per_second": 687.773,
      "eval_steps_per_second": 57.314,
      "step": 355000
    },
    {
      "epoch": 1.2442215423005885,
      "grad_norm": 2.78125,
      "learning_rate": 3.2514518771207016e-05,
      "loss": 0.921,
      "step": 355010
    },
    {
      "epoch": 1.244256589807484,
      "grad_norm": 2.921875,
      "learning_rate": 3.251386974254331e-05,
      "loss": 0.8865,
      "step": 355020
    },
    {
      "epoch": 1.2442916373143795,
      "grad_norm": 2.890625,
      "learning_rate": 3.251322071387961e-05,
      "loss": 0.8929,
      "step": 355030
    },
    {
      "epoch": 1.2443266848212753,
      "grad_norm": 2.703125,
      "learning_rate": 3.251257168521591e-05,
      "loss": 0.8007,
      "step": 355040
    },
    {
      "epoch": 1.2443617323281708,
      "grad_norm": 3.109375,
      "learning_rate": 3.251192265655221e-05,
      "loss": 0.912,
      "step": 355050
    },
    {
      "epoch": 1.2443967798350664,
      "grad_norm": 3.171875,
      "learning_rate": 3.25112736278885e-05,
      "loss": 0.7827,
      "step": 355060
    },
    {
      "epoch": 1.2444318273419621,
      "grad_norm": 2.96875,
      "learning_rate": 3.2510624599224804e-05,
      "loss": 0.8617,
      "step": 355070
    },
    {
      "epoch": 1.2444668748488577,
      "grad_norm": 3.1875,
      "learning_rate": 3.25099755705611e-05,
      "loss": 0.7767,
      "step": 355080
    },
    {
      "epoch": 1.2445019223557532,
      "grad_norm": 2.828125,
      "learning_rate": 3.25093265418974e-05,
      "loss": 0.8923,
      "step": 355090
    },
    {
      "epoch": 1.244536969862649,
      "grad_norm": 2.90625,
      "learning_rate": 3.25086775132337e-05,
      "loss": 0.8072,
      "step": 355100
    },
    {
      "epoch": 1.2445720173695445,
      "grad_norm": 2.828125,
      "learning_rate": 3.2508028484569996e-05,
      "loss": 0.7919,
      "step": 355110
    },
    {
      "epoch": 1.24460706487644,
      "grad_norm": 3.328125,
      "learning_rate": 3.25073794559063e-05,
      "loss": 0.8017,
      "step": 355120
    },
    {
      "epoch": 1.2446421123833356,
      "grad_norm": 3.234375,
      "learning_rate": 3.2506730427242586e-05,
      "loss": 0.8321,
      "step": 355130
    },
    {
      "epoch": 1.244677159890231,
      "grad_norm": 2.59375,
      "learning_rate": 3.250608139857889e-05,
      "loss": 0.8614,
      "step": 355140
    },
    {
      "epoch": 1.2447122073971268,
      "grad_norm": 2.71875,
      "learning_rate": 3.250543236991518e-05,
      "loss": 0.8396,
      "step": 355150
    },
    {
      "epoch": 1.2447472549040224,
      "grad_norm": 2.796875,
      "learning_rate": 3.250478334125148e-05,
      "loss": 0.9028,
      "step": 355160
    },
    {
      "epoch": 1.244782302410918,
      "grad_norm": 2.578125,
      "learning_rate": 3.250413431258778e-05,
      "loss": 0.8409,
      "step": 355170
    },
    {
      "epoch": 1.2448173499178137,
      "grad_norm": 2.609375,
      "learning_rate": 3.250348528392408e-05,
      "loss": 0.8635,
      "step": 355180
    },
    {
      "epoch": 1.2448523974247092,
      "grad_norm": 2.875,
      "learning_rate": 3.250283625526038e-05,
      "loss": 0.9011,
      "step": 355190
    },
    {
      "epoch": 1.2448874449316047,
      "grad_norm": 2.890625,
      "learning_rate": 3.2502187226596675e-05,
      "loss": 0.8255,
      "step": 355200
    },
    {
      "epoch": 1.2449224924385005,
      "grad_norm": 2.625,
      "learning_rate": 3.2501538197932976e-05,
      "loss": 0.9143,
      "step": 355210
    },
    {
      "epoch": 1.244957539945396,
      "grad_norm": 2.796875,
      "learning_rate": 3.250088916926927e-05,
      "loss": 0.7994,
      "step": 355220
    },
    {
      "epoch": 1.2449925874522916,
      "grad_norm": 2.796875,
      "learning_rate": 3.250024014060557e-05,
      "loss": 0.8289,
      "step": 355230
    },
    {
      "epoch": 1.2450276349591871,
      "grad_norm": 2.6875,
      "learning_rate": 3.249959111194187e-05,
      "loss": 0.8566,
      "step": 355240
    },
    {
      "epoch": 1.2450626824660829,
      "grad_norm": 2.796875,
      "learning_rate": 3.249894208327817e-05,
      "loss": 0.8252,
      "step": 355250
    },
    {
      "epoch": 1.2450977299729784,
      "grad_norm": 3.171875,
      "learning_rate": 3.249829305461446e-05,
      "loss": 0.8818,
      "step": 355260
    },
    {
      "epoch": 1.245132777479874,
      "grad_norm": 2.890625,
      "learning_rate": 3.2497644025950764e-05,
      "loss": 0.8527,
      "step": 355270
    },
    {
      "epoch": 1.2451678249867695,
      "grad_norm": 2.625,
      "learning_rate": 3.249699499728706e-05,
      "loss": 0.8133,
      "step": 355280
    },
    {
      "epoch": 1.2452028724936652,
      "grad_norm": 2.9375,
      "learning_rate": 3.249634596862336e-05,
      "loss": 0.7953,
      "step": 355290
    },
    {
      "epoch": 1.2452379200005608,
      "grad_norm": 2.765625,
      "learning_rate": 3.2495696939959655e-05,
      "loss": 0.8517,
      "step": 355300
    },
    {
      "epoch": 1.2452729675074563,
      "grad_norm": 2.875,
      "learning_rate": 3.2495047911295956e-05,
      "loss": 0.7937,
      "step": 355310
    },
    {
      "epoch": 1.245308015014352,
      "grad_norm": 2.875,
      "learning_rate": 3.249439888263226e-05,
      "loss": 0.8304,
      "step": 355320
    },
    {
      "epoch": 1.2453430625212476,
      "grad_norm": 2.921875,
      "learning_rate": 3.249374985396855e-05,
      "loss": 0.8452,
      "step": 355330
    },
    {
      "epoch": 1.2453781100281431,
      "grad_norm": 2.859375,
      "learning_rate": 3.2493100825304854e-05,
      "loss": 0.81,
      "step": 355340
    },
    {
      "epoch": 1.2454131575350387,
      "grad_norm": 2.515625,
      "learning_rate": 3.249245179664115e-05,
      "loss": 0.8285,
      "step": 355350
    },
    {
      "epoch": 1.2454482050419344,
      "grad_norm": 2.265625,
      "learning_rate": 3.249180276797745e-05,
      "loss": 0.7971,
      "step": 355360
    },
    {
      "epoch": 1.24548325254883,
      "grad_norm": 3.109375,
      "learning_rate": 3.2491153739313744e-05,
      "loss": 0.9084,
      "step": 355370
    },
    {
      "epoch": 1.2455183000557255,
      "grad_norm": 2.484375,
      "learning_rate": 3.2490504710650046e-05,
      "loss": 0.8032,
      "step": 355380
    },
    {
      "epoch": 1.245553347562621,
      "grad_norm": 3.046875,
      "learning_rate": 3.248985568198634e-05,
      "loss": 0.8308,
      "step": 355390
    },
    {
      "epoch": 1.2455883950695168,
      "grad_norm": 3.140625,
      "learning_rate": 3.248920665332264e-05,
      "loss": 0.8533,
      "step": 355400
    },
    {
      "epoch": 1.2456234425764123,
      "grad_norm": 3.234375,
      "learning_rate": 3.2488557624658936e-05,
      "loss": 0.8494,
      "step": 355410
    },
    {
      "epoch": 1.2456584900833079,
      "grad_norm": 2.46875,
      "learning_rate": 3.248790859599524e-05,
      "loss": 0.8314,
      "step": 355420
    },
    {
      "epoch": 1.2456935375902036,
      "grad_norm": 2.953125,
      "learning_rate": 3.248725956733153e-05,
      "loss": 0.8742,
      "step": 355430
    },
    {
      "epoch": 1.2457285850970992,
      "grad_norm": 3.046875,
      "learning_rate": 3.2486610538667834e-05,
      "loss": 0.8242,
      "step": 355440
    },
    {
      "epoch": 1.2457636326039947,
      "grad_norm": 2.859375,
      "learning_rate": 3.248596151000413e-05,
      "loss": 0.8383,
      "step": 355450
    },
    {
      "epoch": 1.2457986801108902,
      "grad_norm": 2.734375,
      "learning_rate": 3.248531248134043e-05,
      "loss": 0.7905,
      "step": 355460
    },
    {
      "epoch": 1.245833727617786,
      "grad_norm": 2.078125,
      "learning_rate": 3.248466345267673e-05,
      "loss": 0.8266,
      "step": 355470
    },
    {
      "epoch": 1.2458687751246815,
      "grad_norm": 2.875,
      "learning_rate": 3.2484014424013026e-05,
      "loss": 0.8438,
      "step": 355480
    },
    {
      "epoch": 1.245903822631577,
      "grad_norm": 2.96875,
      "learning_rate": 3.248336539534933e-05,
      "loss": 0.8155,
      "step": 355490
    },
    {
      "epoch": 1.2459388701384726,
      "grad_norm": 3.15625,
      "learning_rate": 3.2482716366685615e-05,
      "loss": 0.9141,
      "step": 355500
    },
    {
      "epoch": 1.2459739176453684,
      "grad_norm": 2.984375,
      "learning_rate": 3.2482067338021916e-05,
      "loss": 0.8706,
      "step": 355510
    },
    {
      "epoch": 1.246008965152264,
      "grad_norm": 2.828125,
      "learning_rate": 3.248141830935821e-05,
      "loss": 0.8959,
      "step": 355520
    },
    {
      "epoch": 1.2460440126591594,
      "grad_norm": 3.0625,
      "learning_rate": 3.248076928069451e-05,
      "loss": 0.8651,
      "step": 355530
    },
    {
      "epoch": 1.2460790601660552,
      "grad_norm": 3.0625,
      "learning_rate": 3.248012025203081e-05,
      "loss": 0.8824,
      "step": 355540
    },
    {
      "epoch": 1.2461141076729507,
      "grad_norm": 3.265625,
      "learning_rate": 3.247947122336711e-05,
      "loss": 0.8609,
      "step": 355550
    },
    {
      "epoch": 1.2461491551798463,
      "grad_norm": 2.625,
      "learning_rate": 3.247882219470341e-05,
      "loss": 0.8658,
      "step": 355560
    },
    {
      "epoch": 1.2461842026867418,
      "grad_norm": 3.234375,
      "learning_rate": 3.2478173166039704e-05,
      "loss": 0.8709,
      "step": 355570
    },
    {
      "epoch": 1.2462192501936376,
      "grad_norm": 2.875,
      "learning_rate": 3.2477524137376006e-05,
      "loss": 0.7933,
      "step": 355580
    },
    {
      "epoch": 1.246254297700533,
      "grad_norm": 3.0,
      "learning_rate": 3.24768751087123e-05,
      "loss": 0.8327,
      "step": 355590
    },
    {
      "epoch": 1.2462893452074286,
      "grad_norm": 2.734375,
      "learning_rate": 3.24762260800486e-05,
      "loss": 0.9192,
      "step": 355600
    },
    {
      "epoch": 1.2463243927143242,
      "grad_norm": 2.703125,
      "learning_rate": 3.2475577051384896e-05,
      "loss": 0.8472,
      "step": 355610
    },
    {
      "epoch": 1.24635944022122,
      "grad_norm": 3.296875,
      "learning_rate": 3.24749280227212e-05,
      "loss": 0.8723,
      "step": 355620
    },
    {
      "epoch": 1.2463944877281155,
      "grad_norm": 3.171875,
      "learning_rate": 3.247427899405749e-05,
      "loss": 0.8771,
      "step": 355630
    },
    {
      "epoch": 1.246429535235011,
      "grad_norm": 2.875,
      "learning_rate": 3.2473629965393794e-05,
      "loss": 0.9266,
      "step": 355640
    },
    {
      "epoch": 1.2464645827419067,
      "grad_norm": 3.34375,
      "learning_rate": 3.247298093673009e-05,
      "loss": 0.8261,
      "step": 355650
    },
    {
      "epoch": 1.2464996302488023,
      "grad_norm": 3.328125,
      "learning_rate": 3.247233190806639e-05,
      "loss": 0.9149,
      "step": 355660
    },
    {
      "epoch": 1.2465346777556978,
      "grad_norm": 2.5,
      "learning_rate": 3.2471682879402684e-05,
      "loss": 0.7932,
      "step": 355670
    },
    {
      "epoch": 1.2465697252625934,
      "grad_norm": 2.546875,
      "learning_rate": 3.2471033850738986e-05,
      "loss": 0.8011,
      "step": 355680
    },
    {
      "epoch": 1.2466047727694891,
      "grad_norm": 3.625,
      "learning_rate": 3.247038482207529e-05,
      "loss": 0.9327,
      "step": 355690
    },
    {
      "epoch": 1.2466398202763846,
      "grad_norm": 3.328125,
      "learning_rate": 3.246973579341158e-05,
      "loss": 0.9609,
      "step": 355700
    },
    {
      "epoch": 1.2466748677832802,
      "grad_norm": 2.8125,
      "learning_rate": 3.246908676474788e-05,
      "loss": 0.8524,
      "step": 355710
    },
    {
      "epoch": 1.2467099152901757,
      "grad_norm": 3.3125,
      "learning_rate": 3.246843773608418e-05,
      "loss": 0.9417,
      "step": 355720
    },
    {
      "epoch": 1.2467449627970715,
      "grad_norm": 2.890625,
      "learning_rate": 3.246778870742048e-05,
      "loss": 0.8848,
      "step": 355730
    },
    {
      "epoch": 1.246780010303967,
      "grad_norm": 3.125,
      "learning_rate": 3.2467139678756774e-05,
      "loss": 0.9139,
      "step": 355740
    },
    {
      "epoch": 1.2468150578108625,
      "grad_norm": 2.765625,
      "learning_rate": 3.2466490650093075e-05,
      "loss": 0.8316,
      "step": 355750
    },
    {
      "epoch": 1.2468501053177583,
      "grad_norm": 2.671875,
      "learning_rate": 3.246584162142937e-05,
      "loss": 0.7863,
      "step": 355760
    },
    {
      "epoch": 1.2468851528246538,
      "grad_norm": 2.890625,
      "learning_rate": 3.246519259276567e-05,
      "loss": 0.907,
      "step": 355770
    },
    {
      "epoch": 1.2469202003315494,
      "grad_norm": 2.921875,
      "learning_rate": 3.2464543564101966e-05,
      "loss": 0.8078,
      "step": 355780
    },
    {
      "epoch": 1.2469552478384451,
      "grad_norm": 2.78125,
      "learning_rate": 3.246389453543827e-05,
      "loss": 0.8748,
      "step": 355790
    },
    {
      "epoch": 1.2469902953453407,
      "grad_norm": 2.78125,
      "learning_rate": 3.246324550677456e-05,
      "loss": 0.8128,
      "step": 355800
    },
    {
      "epoch": 1.2470253428522362,
      "grad_norm": 2.6875,
      "learning_rate": 3.246259647811086e-05,
      "loss": 0.8701,
      "step": 355810
    },
    {
      "epoch": 1.2470603903591317,
      "grad_norm": 2.984375,
      "learning_rate": 3.246194744944716e-05,
      "loss": 0.8206,
      "step": 355820
    },
    {
      "epoch": 1.2470954378660273,
      "grad_norm": 2.921875,
      "learning_rate": 3.246129842078346e-05,
      "loss": 0.8673,
      "step": 355830
    },
    {
      "epoch": 1.247130485372923,
      "grad_norm": 2.90625,
      "learning_rate": 3.246064939211976e-05,
      "loss": 0.8371,
      "step": 355840
    },
    {
      "epoch": 1.2471655328798186,
      "grad_norm": 3.078125,
      "learning_rate": 3.2460000363456055e-05,
      "loss": 0.8418,
      "step": 355850
    },
    {
      "epoch": 1.247200580386714,
      "grad_norm": 2.90625,
      "learning_rate": 3.2459351334792356e-05,
      "loss": 0.9271,
      "step": 355860
    },
    {
      "epoch": 1.2472356278936099,
      "grad_norm": 3.015625,
      "learning_rate": 3.245870230612865e-05,
      "loss": 0.8129,
      "step": 355870
    },
    {
      "epoch": 1.2472706754005054,
      "grad_norm": 2.875,
      "learning_rate": 3.2458053277464946e-05,
      "loss": 0.8112,
      "step": 355880
    },
    {
      "epoch": 1.247305722907401,
      "grad_norm": 3.15625,
      "learning_rate": 3.245740424880124e-05,
      "loss": 0.9614,
      "step": 355890
    },
    {
      "epoch": 1.2473407704142967,
      "grad_norm": 3.09375,
      "learning_rate": 3.245675522013754e-05,
      "loss": 0.8172,
      "step": 355900
    },
    {
      "epoch": 1.2473758179211922,
      "grad_norm": 2.953125,
      "learning_rate": 3.2456106191473836e-05,
      "loss": 0.9187,
      "step": 355910
    },
    {
      "epoch": 1.2474108654280878,
      "grad_norm": 2.9375,
      "learning_rate": 3.245545716281014e-05,
      "loss": 0.8438,
      "step": 355920
    },
    {
      "epoch": 1.2474459129349833,
      "grad_norm": 3.125,
      "learning_rate": 3.245480813414644e-05,
      "loss": 0.8495,
      "step": 355930
    },
    {
      "epoch": 1.247480960441879,
      "grad_norm": 2.875,
      "learning_rate": 3.2454159105482734e-05,
      "loss": 0.8562,
      "step": 355940
    },
    {
      "epoch": 1.2475160079487746,
      "grad_norm": 2.859375,
      "learning_rate": 3.2453510076819035e-05,
      "loss": 0.9065,
      "step": 355950
    },
    {
      "epoch": 1.2475510554556701,
      "grad_norm": 2.859375,
      "learning_rate": 3.245286104815533e-05,
      "loss": 0.8576,
      "step": 355960
    },
    {
      "epoch": 1.2475861029625657,
      "grad_norm": 2.71875,
      "learning_rate": 3.245221201949163e-05,
      "loss": 0.8448,
      "step": 355970
    },
    {
      "epoch": 1.2476211504694614,
      "grad_norm": 3.015625,
      "learning_rate": 3.2451562990827926e-05,
      "loss": 0.906,
      "step": 355980
    },
    {
      "epoch": 1.247656197976357,
      "grad_norm": 3.375,
      "learning_rate": 3.245091396216423e-05,
      "loss": 0.8483,
      "step": 355990
    },
    {
      "epoch": 1.2476912454832525,
      "grad_norm": 3.21875,
      "learning_rate": 3.245026493350052e-05,
      "loss": 0.96,
      "step": 356000
    },
    {
      "epoch": 1.2477262929901483,
      "grad_norm": 2.859375,
      "learning_rate": 3.244961590483682e-05,
      "loss": 0.8642,
      "step": 356010
    },
    {
      "epoch": 1.2477613404970438,
      "grad_norm": 2.953125,
      "learning_rate": 3.244896687617312e-05,
      "loss": 0.9102,
      "step": 356020
    },
    {
      "epoch": 1.2477963880039393,
      "grad_norm": 2.921875,
      "learning_rate": 3.244831784750942e-05,
      "loss": 0.8193,
      "step": 356030
    },
    {
      "epoch": 1.2478314355108349,
      "grad_norm": 2.890625,
      "learning_rate": 3.2447668818845714e-05,
      "loss": 0.7772,
      "step": 356040
    },
    {
      "epoch": 1.2478664830177306,
      "grad_norm": 3.265625,
      "learning_rate": 3.2447019790182015e-05,
      "loss": 0.9078,
      "step": 356050
    },
    {
      "epoch": 1.2479015305246262,
      "grad_norm": 3.03125,
      "learning_rate": 3.2446370761518316e-05,
      "loss": 0.8818,
      "step": 356060
    },
    {
      "epoch": 1.2479365780315217,
      "grad_norm": 2.421875,
      "learning_rate": 3.244572173285461e-05,
      "loss": 0.7949,
      "step": 356070
    },
    {
      "epoch": 1.2479716255384172,
      "grad_norm": 3.015625,
      "learning_rate": 3.244507270419091e-05,
      "loss": 0.8066,
      "step": 356080
    },
    {
      "epoch": 1.248006673045313,
      "grad_norm": 2.984375,
      "learning_rate": 3.244442367552721e-05,
      "loss": 0.8918,
      "step": 356090
    },
    {
      "epoch": 1.2480417205522085,
      "grad_norm": 2.25,
      "learning_rate": 3.244377464686351e-05,
      "loss": 0.8716,
      "step": 356100
    },
    {
      "epoch": 1.248076768059104,
      "grad_norm": 2.96875,
      "learning_rate": 3.24431256181998e-05,
      "loss": 0.8433,
      "step": 356110
    },
    {
      "epoch": 1.2481118155659998,
      "grad_norm": 3.015625,
      "learning_rate": 3.2442476589536104e-05,
      "loss": 0.8313,
      "step": 356120
    },
    {
      "epoch": 1.2481468630728954,
      "grad_norm": 3.15625,
      "learning_rate": 3.24418275608724e-05,
      "loss": 0.965,
      "step": 356130
    },
    {
      "epoch": 1.2481819105797909,
      "grad_norm": 2.375,
      "learning_rate": 3.24411785322087e-05,
      "loss": 0.8305,
      "step": 356140
    },
    {
      "epoch": 1.2482169580866864,
      "grad_norm": 2.59375,
      "learning_rate": 3.2440529503544995e-05,
      "loss": 0.9212,
      "step": 356150
    },
    {
      "epoch": 1.2482520055935822,
      "grad_norm": 3.46875,
      "learning_rate": 3.2439880474881296e-05,
      "loss": 0.88,
      "step": 356160
    },
    {
      "epoch": 1.2482870531004777,
      "grad_norm": 2.8125,
      "learning_rate": 3.243923144621759e-05,
      "loss": 0.8709,
      "step": 356170
    },
    {
      "epoch": 1.2483221006073733,
      "grad_norm": 2.765625,
      "learning_rate": 3.243858241755389e-05,
      "loss": 0.843,
      "step": 356180
    },
    {
      "epoch": 1.2483571481142688,
      "grad_norm": 3.125,
      "learning_rate": 3.243793338889019e-05,
      "loss": 0.8621,
      "step": 356190
    },
    {
      "epoch": 1.2483921956211645,
      "grad_norm": 3.1875,
      "learning_rate": 3.243728436022649e-05,
      "loss": 0.852,
      "step": 356200
    },
    {
      "epoch": 1.24842724312806,
      "grad_norm": 2.609375,
      "learning_rate": 3.243663533156279e-05,
      "loss": 0.7825,
      "step": 356210
    },
    {
      "epoch": 1.2484622906349556,
      "grad_norm": 3.0625,
      "learning_rate": 3.2435986302899084e-05,
      "loss": 0.9421,
      "step": 356220
    },
    {
      "epoch": 1.2484973381418514,
      "grad_norm": 3.015625,
      "learning_rate": 3.2435337274235386e-05,
      "loss": 0.8602,
      "step": 356230
    },
    {
      "epoch": 1.248532385648747,
      "grad_norm": 3.28125,
      "learning_rate": 3.243468824557168e-05,
      "loss": 0.9237,
      "step": 356240
    },
    {
      "epoch": 1.2485674331556424,
      "grad_norm": 2.9375,
      "learning_rate": 3.2434039216907975e-05,
      "loss": 0.9767,
      "step": 356250
    },
    {
      "epoch": 1.248602480662538,
      "grad_norm": 2.78125,
      "learning_rate": 3.243339018824427e-05,
      "loss": 0.8882,
      "step": 356260
    },
    {
      "epoch": 1.2486375281694337,
      "grad_norm": 3.0,
      "learning_rate": 3.243274115958057e-05,
      "loss": 0.9419,
      "step": 356270
    },
    {
      "epoch": 1.2486725756763293,
      "grad_norm": 2.828125,
      "learning_rate": 3.2432092130916866e-05,
      "loss": 0.8874,
      "step": 356280
    },
    {
      "epoch": 1.2487076231832248,
      "grad_norm": 2.4375,
      "learning_rate": 3.243144310225317e-05,
      "loss": 0.8634,
      "step": 356290
    },
    {
      "epoch": 1.2487426706901203,
      "grad_norm": 3.1875,
      "learning_rate": 3.243079407358947e-05,
      "loss": 0.8227,
      "step": 356300
    },
    {
      "epoch": 1.248777718197016,
      "grad_norm": 2.703125,
      "learning_rate": 3.243014504492576e-05,
      "loss": 0.8064,
      "step": 356310
    },
    {
      "epoch": 1.2488127657039116,
      "grad_norm": 3.21875,
      "learning_rate": 3.2429496016262064e-05,
      "loss": 0.9284,
      "step": 356320
    },
    {
      "epoch": 1.2488478132108072,
      "grad_norm": 2.796875,
      "learning_rate": 3.242884698759836e-05,
      "loss": 0.834,
      "step": 356330
    },
    {
      "epoch": 1.248882860717703,
      "grad_norm": 2.640625,
      "learning_rate": 3.242819795893466e-05,
      "loss": 0.7534,
      "step": 356340
    },
    {
      "epoch": 1.2489179082245985,
      "grad_norm": 2.828125,
      "learning_rate": 3.2427548930270955e-05,
      "loss": 0.9377,
      "step": 356350
    },
    {
      "epoch": 1.248952955731494,
      "grad_norm": 2.40625,
      "learning_rate": 3.2426899901607256e-05,
      "loss": 0.8009,
      "step": 356360
    },
    {
      "epoch": 1.2489880032383895,
      "grad_norm": 2.65625,
      "learning_rate": 3.242625087294355e-05,
      "loss": 0.8142,
      "step": 356370
    },
    {
      "epoch": 1.2490230507452853,
      "grad_norm": 3.0,
      "learning_rate": 3.242560184427985e-05,
      "loss": 0.8132,
      "step": 356380
    },
    {
      "epoch": 1.2490580982521808,
      "grad_norm": 2.90625,
      "learning_rate": 3.242495281561615e-05,
      "loss": 0.8341,
      "step": 356390
    },
    {
      "epoch": 1.2490931457590764,
      "grad_norm": 3.015625,
      "learning_rate": 3.242430378695245e-05,
      "loss": 0.9103,
      "step": 356400
    },
    {
      "epoch": 1.249128193265972,
      "grad_norm": 2.171875,
      "learning_rate": 3.242365475828874e-05,
      "loss": 0.8833,
      "step": 356410
    },
    {
      "epoch": 1.2491632407728677,
      "grad_norm": 2.6875,
      "learning_rate": 3.2423005729625044e-05,
      "loss": 0.8138,
      "step": 356420
    },
    {
      "epoch": 1.2491982882797632,
      "grad_norm": 2.9375,
      "learning_rate": 3.2422356700961346e-05,
      "loss": 0.892,
      "step": 356430
    },
    {
      "epoch": 1.2492333357866587,
      "grad_norm": 2.796875,
      "learning_rate": 3.242170767229764e-05,
      "loss": 0.8588,
      "step": 356440
    },
    {
      "epoch": 1.2492683832935545,
      "grad_norm": 2.78125,
      "learning_rate": 3.242105864363394e-05,
      "loss": 0.872,
      "step": 356450
    },
    {
      "epoch": 1.24930343080045,
      "grad_norm": 3.03125,
      "learning_rate": 3.2420409614970236e-05,
      "loss": 0.8879,
      "step": 356460
    },
    {
      "epoch": 1.2493384783073456,
      "grad_norm": 2.875,
      "learning_rate": 3.241976058630654e-05,
      "loss": 0.9396,
      "step": 356470
    },
    {
      "epoch": 1.2493735258142413,
      "grad_norm": 3.21875,
      "learning_rate": 3.241911155764283e-05,
      "loss": 0.7911,
      "step": 356480
    },
    {
      "epoch": 1.2494085733211369,
      "grad_norm": 2.078125,
      "learning_rate": 3.2418462528979134e-05,
      "loss": 0.8155,
      "step": 356490
    },
    {
      "epoch": 1.2494436208280324,
      "grad_norm": 2.375,
      "learning_rate": 3.241781350031543e-05,
      "loss": 0.8135,
      "step": 356500
    },
    {
      "epoch": 1.249478668334928,
      "grad_norm": 2.765625,
      "learning_rate": 3.241716447165173e-05,
      "loss": 0.8808,
      "step": 356510
    },
    {
      "epoch": 1.2495137158418235,
      "grad_norm": 2.9375,
      "learning_rate": 3.2416515442988024e-05,
      "loss": 0.8166,
      "step": 356520
    },
    {
      "epoch": 1.2495487633487192,
      "grad_norm": 2.671875,
      "learning_rate": 3.2415866414324326e-05,
      "loss": 0.8575,
      "step": 356530
    },
    {
      "epoch": 1.2495838108556148,
      "grad_norm": 2.859375,
      "learning_rate": 3.241521738566062e-05,
      "loss": 0.8149,
      "step": 356540
    },
    {
      "epoch": 1.2496188583625103,
      "grad_norm": 2.75,
      "learning_rate": 3.241456835699692e-05,
      "loss": 0.8456,
      "step": 356550
    },
    {
      "epoch": 1.249653905869406,
      "grad_norm": 2.734375,
      "learning_rate": 3.241391932833322e-05,
      "loss": 0.863,
      "step": 356560
    },
    {
      "epoch": 1.2496889533763016,
      "grad_norm": 2.9375,
      "learning_rate": 3.241327029966952e-05,
      "loss": 0.8926,
      "step": 356570
    },
    {
      "epoch": 1.2497240008831971,
      "grad_norm": 2.71875,
      "learning_rate": 3.241262127100582e-05,
      "loss": 0.7229,
      "step": 356580
    },
    {
      "epoch": 1.2497590483900929,
      "grad_norm": 3.34375,
      "learning_rate": 3.2411972242342114e-05,
      "loss": 0.8598,
      "step": 356590
    },
    {
      "epoch": 1.2497940958969884,
      "grad_norm": 3.015625,
      "learning_rate": 3.2411323213678415e-05,
      "loss": 0.9322,
      "step": 356600
    },
    {
      "epoch": 1.249829143403884,
      "grad_norm": 2.859375,
      "learning_rate": 3.241067418501471e-05,
      "loss": 0.8373,
      "step": 356610
    },
    {
      "epoch": 1.2498641909107795,
      "grad_norm": 2.984375,
      "learning_rate": 3.241002515635101e-05,
      "loss": 0.8309,
      "step": 356620
    },
    {
      "epoch": 1.2498992384176753,
      "grad_norm": 2.546875,
      "learning_rate": 3.24093761276873e-05,
      "loss": 0.8183,
      "step": 356630
    },
    {
      "epoch": 1.2499342859245708,
      "grad_norm": 2.859375,
      "learning_rate": 3.24087270990236e-05,
      "loss": 0.8654,
      "step": 356640
    },
    {
      "epoch": 1.2499693334314663,
      "grad_norm": 2.640625,
      "learning_rate": 3.24080780703599e-05,
      "loss": 0.8748,
      "step": 356650
    },
    {
      "epoch": 1.2500043809383619,
      "grad_norm": 2.71875,
      "learning_rate": 3.2407429041696196e-05,
      "loss": 0.865,
      "step": 356660
    },
    {
      "epoch": 1.2500394284452576,
      "grad_norm": 3.1875,
      "learning_rate": 3.24067800130325e-05,
      "loss": 0.9521,
      "step": 356670
    },
    {
      "epoch": 1.2500744759521532,
      "grad_norm": 3.0,
      "learning_rate": 3.240613098436879e-05,
      "loss": 0.9317,
      "step": 356680
    },
    {
      "epoch": 1.2501095234590487,
      "grad_norm": 2.734375,
      "learning_rate": 3.2405481955705094e-05,
      "loss": 0.9578,
      "step": 356690
    },
    {
      "epoch": 1.2501445709659444,
      "grad_norm": 3.203125,
      "learning_rate": 3.240483292704139e-05,
      "loss": 0.8763,
      "step": 356700
    },
    {
      "epoch": 1.25017961847284,
      "grad_norm": 3.140625,
      "learning_rate": 3.240418389837769e-05,
      "loss": 0.8018,
      "step": 356710
    },
    {
      "epoch": 1.2502146659797355,
      "grad_norm": 2.90625,
      "learning_rate": 3.2403534869713984e-05,
      "loss": 0.8923,
      "step": 356720
    },
    {
      "epoch": 1.250249713486631,
      "grad_norm": 2.75,
      "learning_rate": 3.2402885841050286e-05,
      "loss": 0.8463,
      "step": 356730
    },
    {
      "epoch": 1.2502847609935266,
      "grad_norm": 2.84375,
      "learning_rate": 3.240223681238658e-05,
      "loss": 0.934,
      "step": 356740
    },
    {
      "epoch": 1.2503198085004223,
      "grad_norm": 3.171875,
      "learning_rate": 3.240158778372288e-05,
      "loss": 0.8638,
      "step": 356750
    },
    {
      "epoch": 1.2503548560073179,
      "grad_norm": 2.765625,
      "learning_rate": 3.2400938755059176e-05,
      "loss": 0.8407,
      "step": 356760
    },
    {
      "epoch": 1.2503899035142134,
      "grad_norm": 2.953125,
      "learning_rate": 3.240028972639548e-05,
      "loss": 0.8037,
      "step": 356770
    },
    {
      "epoch": 1.2504249510211092,
      "grad_norm": 2.5,
      "learning_rate": 3.239964069773177e-05,
      "loss": 0.8266,
      "step": 356780
    },
    {
      "epoch": 1.2504599985280047,
      "grad_norm": 3.015625,
      "learning_rate": 3.2398991669068074e-05,
      "loss": 0.8113,
      "step": 356790
    },
    {
      "epoch": 1.2504950460349002,
      "grad_norm": 2.453125,
      "learning_rate": 3.2398342640404375e-05,
      "loss": 0.7838,
      "step": 356800
    },
    {
      "epoch": 1.250530093541796,
      "grad_norm": 3.34375,
      "learning_rate": 3.239769361174067e-05,
      "loss": 0.9066,
      "step": 356810
    },
    {
      "epoch": 1.2505651410486915,
      "grad_norm": 2.5625,
      "learning_rate": 3.239704458307697e-05,
      "loss": 0.8219,
      "step": 356820
    },
    {
      "epoch": 1.250600188555587,
      "grad_norm": 3.03125,
      "learning_rate": 3.2396395554413266e-05,
      "loss": 0.8925,
      "step": 356830
    },
    {
      "epoch": 1.2506352360624826,
      "grad_norm": 2.875,
      "learning_rate": 3.239574652574957e-05,
      "loss": 0.8214,
      "step": 356840
    },
    {
      "epoch": 1.2506702835693784,
      "grad_norm": 2.828125,
      "learning_rate": 3.239509749708586e-05,
      "loss": 0.7482,
      "step": 356850
    },
    {
      "epoch": 1.250705331076274,
      "grad_norm": 2.734375,
      "learning_rate": 3.239444846842216e-05,
      "loss": 0.8399,
      "step": 356860
    },
    {
      "epoch": 1.2507403785831694,
      "grad_norm": 2.984375,
      "learning_rate": 3.239379943975846e-05,
      "loss": 0.8898,
      "step": 356870
    },
    {
      "epoch": 1.250775426090065,
      "grad_norm": 2.828125,
      "learning_rate": 3.239315041109476e-05,
      "loss": 0.8447,
      "step": 356880
    },
    {
      "epoch": 1.2508104735969607,
      "grad_norm": 2.875,
      "learning_rate": 3.2392501382431054e-05,
      "loss": 0.8438,
      "step": 356890
    },
    {
      "epoch": 1.2508455211038563,
      "grad_norm": 2.859375,
      "learning_rate": 3.2391852353767355e-05,
      "loss": 0.9398,
      "step": 356900
    },
    {
      "epoch": 1.2508805686107518,
      "grad_norm": 2.875,
      "learning_rate": 3.239120332510365e-05,
      "loss": 0.8014,
      "step": 356910
    },
    {
      "epoch": 1.2509156161176476,
      "grad_norm": 2.71875,
      "learning_rate": 3.239055429643995e-05,
      "loss": 0.7879,
      "step": 356920
    },
    {
      "epoch": 1.250950663624543,
      "grad_norm": 2.703125,
      "learning_rate": 3.238990526777625e-05,
      "loss": 0.8519,
      "step": 356930
    },
    {
      "epoch": 1.2509857111314386,
      "grad_norm": 3.1875,
      "learning_rate": 3.238925623911255e-05,
      "loss": 0.8634,
      "step": 356940
    },
    {
      "epoch": 1.2510207586383344,
      "grad_norm": 3.046875,
      "learning_rate": 3.238860721044885e-05,
      "loss": 0.7995,
      "step": 356950
    },
    {
      "epoch": 1.25105580614523,
      "grad_norm": 2.84375,
      "learning_rate": 3.238795818178514e-05,
      "loss": 0.7964,
      "step": 356960
    },
    {
      "epoch": 1.2510908536521255,
      "grad_norm": 2.875,
      "learning_rate": 3.2387309153121445e-05,
      "loss": 0.9434,
      "step": 356970
    },
    {
      "epoch": 1.251125901159021,
      "grad_norm": 3.015625,
      "learning_rate": 3.238666012445774e-05,
      "loss": 0.8674,
      "step": 356980
    },
    {
      "epoch": 1.2511609486659165,
      "grad_norm": 3.078125,
      "learning_rate": 3.238601109579404e-05,
      "loss": 0.851,
      "step": 356990
    },
    {
      "epoch": 1.2511959961728123,
      "grad_norm": 2.921875,
      "learning_rate": 3.2385362067130335e-05,
      "loss": 0.8588,
      "step": 357000
    },
    {
      "epoch": 1.2512310436797078,
      "grad_norm": 3.28125,
      "learning_rate": 3.238471303846663e-05,
      "loss": 0.9434,
      "step": 357010
    },
    {
      "epoch": 1.2512660911866034,
      "grad_norm": 3.125,
      "learning_rate": 3.238406400980293e-05,
      "loss": 0.9055,
      "step": 357020
    },
    {
      "epoch": 1.2513011386934991,
      "grad_norm": 2.578125,
      "learning_rate": 3.2383414981139226e-05,
      "loss": 0.9504,
      "step": 357030
    },
    {
      "epoch": 1.2513361862003947,
      "grad_norm": 2.921875,
      "learning_rate": 3.238276595247553e-05,
      "loss": 0.8194,
      "step": 357040
    },
    {
      "epoch": 1.2513712337072902,
      "grad_norm": 2.828125,
      "learning_rate": 3.238211692381182e-05,
      "loss": 0.8605,
      "step": 357050
    },
    {
      "epoch": 1.251406281214186,
      "grad_norm": 3.3125,
      "learning_rate": 3.238146789514812e-05,
      "loss": 0.7934,
      "step": 357060
    },
    {
      "epoch": 1.2514413287210815,
      "grad_norm": 3.28125,
      "learning_rate": 3.238081886648442e-05,
      "loss": 0.8837,
      "step": 357070
    },
    {
      "epoch": 1.251476376227977,
      "grad_norm": 2.75,
      "learning_rate": 3.238016983782072e-05,
      "loss": 0.8127,
      "step": 357080
    },
    {
      "epoch": 1.2515114237348726,
      "grad_norm": 3.0,
      "learning_rate": 3.2379520809157014e-05,
      "loss": 0.9089,
      "step": 357090
    },
    {
      "epoch": 1.251546471241768,
      "grad_norm": 3.015625,
      "learning_rate": 3.2378871780493315e-05,
      "loss": 0.938,
      "step": 357100
    },
    {
      "epoch": 1.2515815187486639,
      "grad_norm": 2.953125,
      "learning_rate": 3.237822275182961e-05,
      "loss": 0.8392,
      "step": 357110
    },
    {
      "epoch": 1.2516165662555594,
      "grad_norm": 3.265625,
      "learning_rate": 3.237757372316591e-05,
      "loss": 0.8765,
      "step": 357120
    },
    {
      "epoch": 1.251651613762455,
      "grad_norm": 2.71875,
      "learning_rate": 3.2376924694502206e-05,
      "loss": 0.8243,
      "step": 357130
    },
    {
      "epoch": 1.2516866612693507,
      "grad_norm": 3.0625,
      "learning_rate": 3.237627566583851e-05,
      "loss": 0.8696,
      "step": 357140
    },
    {
      "epoch": 1.2517217087762462,
      "grad_norm": 2.828125,
      "learning_rate": 3.23756266371748e-05,
      "loss": 0.8619,
      "step": 357150
    },
    {
      "epoch": 1.2517567562831418,
      "grad_norm": 3.359375,
      "learning_rate": 3.23749776085111e-05,
      "loss": 0.8296,
      "step": 357160
    },
    {
      "epoch": 1.2517918037900375,
      "grad_norm": 2.984375,
      "learning_rate": 3.2374328579847405e-05,
      "loss": 0.8175,
      "step": 357170
    },
    {
      "epoch": 1.251826851296933,
      "grad_norm": 3.484375,
      "learning_rate": 3.23736795511837e-05,
      "loss": 0.8738,
      "step": 357180
    },
    {
      "epoch": 1.2518618988038286,
      "grad_norm": 3.28125,
      "learning_rate": 3.237303052252e-05,
      "loss": 0.8413,
      "step": 357190
    },
    {
      "epoch": 1.2518969463107241,
      "grad_norm": 2.59375,
      "learning_rate": 3.2372381493856295e-05,
      "loss": 0.8423,
      "step": 357200
    },
    {
      "epoch": 1.2519319938176197,
      "grad_norm": 2.71875,
      "learning_rate": 3.2371732465192597e-05,
      "loss": 0.812,
      "step": 357210
    },
    {
      "epoch": 1.2519670413245154,
      "grad_norm": 2.796875,
      "learning_rate": 3.237108343652889e-05,
      "loss": 0.8998,
      "step": 357220
    },
    {
      "epoch": 1.252002088831411,
      "grad_norm": 3.046875,
      "learning_rate": 3.237043440786519e-05,
      "loss": 0.8809,
      "step": 357230
    },
    {
      "epoch": 1.2520371363383065,
      "grad_norm": 2.640625,
      "learning_rate": 3.236978537920149e-05,
      "loss": 0.8016,
      "step": 357240
    },
    {
      "epoch": 1.2520721838452022,
      "grad_norm": 2.53125,
      "learning_rate": 3.236913635053779e-05,
      "loss": 0.8683,
      "step": 357250
    },
    {
      "epoch": 1.2521072313520978,
      "grad_norm": 3.09375,
      "learning_rate": 3.236848732187408e-05,
      "loss": 0.7789,
      "step": 357260
    },
    {
      "epoch": 1.2521422788589933,
      "grad_norm": 2.6875,
      "learning_rate": 3.2367838293210385e-05,
      "loss": 0.8375,
      "step": 357270
    },
    {
      "epoch": 1.252177326365889,
      "grad_norm": 2.546875,
      "learning_rate": 3.236718926454668e-05,
      "loss": 0.7903,
      "step": 357280
    },
    {
      "epoch": 1.2522123738727846,
      "grad_norm": 2.90625,
      "learning_rate": 3.236654023588298e-05,
      "loss": 0.7699,
      "step": 357290
    },
    {
      "epoch": 1.2522474213796801,
      "grad_norm": 2.703125,
      "learning_rate": 3.236589120721928e-05,
      "loss": 0.8644,
      "step": 357300
    },
    {
      "epoch": 1.2522824688865757,
      "grad_norm": 3.078125,
      "learning_rate": 3.2365242178555577e-05,
      "loss": 0.869,
      "step": 357310
    },
    {
      "epoch": 1.2523175163934712,
      "grad_norm": 3.0,
      "learning_rate": 3.236459314989188e-05,
      "loss": 0.9156,
      "step": 357320
    },
    {
      "epoch": 1.252352563900367,
      "grad_norm": 2.6875,
      "learning_rate": 3.236394412122817e-05,
      "loss": 0.8501,
      "step": 357330
    },
    {
      "epoch": 1.2523876114072625,
      "grad_norm": 3.015625,
      "learning_rate": 3.2363295092564474e-05,
      "loss": 0.8822,
      "step": 357340
    },
    {
      "epoch": 1.252422658914158,
      "grad_norm": 3.078125,
      "learning_rate": 3.236264606390077e-05,
      "loss": 0.8603,
      "step": 357350
    },
    {
      "epoch": 1.2524577064210538,
      "grad_norm": 3.25,
      "learning_rate": 3.236199703523707e-05,
      "loss": 0.9231,
      "step": 357360
    },
    {
      "epoch": 1.2524927539279493,
      "grad_norm": 2.515625,
      "learning_rate": 3.2361348006573365e-05,
      "loss": 0.7925,
      "step": 357370
    },
    {
      "epoch": 1.2525278014348449,
      "grad_norm": 2.703125,
      "learning_rate": 3.236069897790966e-05,
      "loss": 0.8471,
      "step": 357380
    },
    {
      "epoch": 1.2525628489417406,
      "grad_norm": 2.828125,
      "learning_rate": 3.236004994924596e-05,
      "loss": 0.9168,
      "step": 357390
    },
    {
      "epoch": 1.2525978964486362,
      "grad_norm": 2.96875,
      "learning_rate": 3.2359400920582255e-05,
      "loss": 0.8316,
      "step": 357400
    },
    {
      "epoch": 1.2526329439555317,
      "grad_norm": 3.015625,
      "learning_rate": 3.2358751891918557e-05,
      "loss": 0.9191,
      "step": 357410
    },
    {
      "epoch": 1.2526679914624272,
      "grad_norm": 2.640625,
      "learning_rate": 3.235810286325485e-05,
      "loss": 0.8916,
      "step": 357420
    },
    {
      "epoch": 1.2527030389693228,
      "grad_norm": 3.109375,
      "learning_rate": 3.235745383459115e-05,
      "loss": 0.8776,
      "step": 357430
    },
    {
      "epoch": 1.2527380864762185,
      "grad_norm": 2.9375,
      "learning_rate": 3.235680480592745e-05,
      "loss": 0.8579,
      "step": 357440
    },
    {
      "epoch": 1.252773133983114,
      "grad_norm": 3.03125,
      "learning_rate": 3.235615577726375e-05,
      "loss": 0.8301,
      "step": 357450
    },
    {
      "epoch": 1.2528081814900096,
      "grad_norm": 3.015625,
      "learning_rate": 3.235550674860004e-05,
      "loss": 0.9179,
      "step": 357460
    },
    {
      "epoch": 1.2528432289969054,
      "grad_norm": 2.984375,
      "learning_rate": 3.2354857719936345e-05,
      "loss": 0.8519,
      "step": 357470
    },
    {
      "epoch": 1.252878276503801,
      "grad_norm": 3.046875,
      "learning_rate": 3.235420869127264e-05,
      "loss": 0.8704,
      "step": 357480
    },
    {
      "epoch": 1.2529133240106964,
      "grad_norm": 3.25,
      "learning_rate": 3.235355966260894e-05,
      "loss": 0.8733,
      "step": 357490
    },
    {
      "epoch": 1.2529483715175922,
      "grad_norm": 3.5,
      "learning_rate": 3.2352910633945235e-05,
      "loss": 0.9352,
      "step": 357500
    },
    {
      "epoch": 1.2529834190244877,
      "grad_norm": 2.65625,
      "learning_rate": 3.2352261605281537e-05,
      "loss": 0.8039,
      "step": 357510
    },
    {
      "epoch": 1.2530184665313833,
      "grad_norm": 2.890625,
      "learning_rate": 3.235161257661784e-05,
      "loss": 0.8924,
      "step": 357520
    },
    {
      "epoch": 1.2530535140382788,
      "grad_norm": 2.84375,
      "learning_rate": 3.235096354795413e-05,
      "loss": 0.7885,
      "step": 357530
    },
    {
      "epoch": 1.2530885615451746,
      "grad_norm": 2.90625,
      "learning_rate": 3.2350314519290434e-05,
      "loss": 0.8512,
      "step": 357540
    },
    {
      "epoch": 1.25312360905207,
      "grad_norm": 2.609375,
      "learning_rate": 3.234966549062673e-05,
      "loss": 0.8449,
      "step": 357550
    },
    {
      "epoch": 1.2531586565589656,
      "grad_norm": 2.875,
      "learning_rate": 3.234901646196303e-05,
      "loss": 0.8505,
      "step": 357560
    },
    {
      "epoch": 1.2531937040658612,
      "grad_norm": 3.03125,
      "learning_rate": 3.2348367433299325e-05,
      "loss": 0.834,
      "step": 357570
    },
    {
      "epoch": 1.253228751572757,
      "grad_norm": 2.4375,
      "learning_rate": 3.2347718404635626e-05,
      "loss": 0.8274,
      "step": 357580
    },
    {
      "epoch": 1.2532637990796525,
      "grad_norm": 2.796875,
      "learning_rate": 3.234706937597192e-05,
      "loss": 0.8139,
      "step": 357590
    },
    {
      "epoch": 1.253298846586548,
      "grad_norm": 2.90625,
      "learning_rate": 3.234642034730822e-05,
      "loss": 0.9095,
      "step": 357600
    },
    {
      "epoch": 1.2533338940934438,
      "grad_norm": 3.09375,
      "learning_rate": 3.2345771318644517e-05,
      "loss": 0.8603,
      "step": 357610
    },
    {
      "epoch": 1.2533689416003393,
      "grad_norm": 3.15625,
      "learning_rate": 3.234512228998082e-05,
      "loss": 0.9231,
      "step": 357620
    },
    {
      "epoch": 1.2534039891072348,
      "grad_norm": 2.9375,
      "learning_rate": 3.234447326131711e-05,
      "loss": 0.8366,
      "step": 357630
    },
    {
      "epoch": 1.2534390366141306,
      "grad_norm": 2.796875,
      "learning_rate": 3.2343824232653414e-05,
      "loss": 0.9719,
      "step": 357640
    },
    {
      "epoch": 1.2534740841210261,
      "grad_norm": 2.90625,
      "learning_rate": 3.234317520398971e-05,
      "loss": 0.7986,
      "step": 357650
    },
    {
      "epoch": 1.2535091316279217,
      "grad_norm": 3.203125,
      "learning_rate": 3.234252617532601e-05,
      "loss": 0.8344,
      "step": 357660
    },
    {
      "epoch": 1.2535441791348172,
      "grad_norm": 2.921875,
      "learning_rate": 3.234187714666231e-05,
      "loss": 0.8318,
      "step": 357670
    },
    {
      "epoch": 1.2535792266417127,
      "grad_norm": 3.046875,
      "learning_rate": 3.2341228117998606e-05,
      "loss": 0.8163,
      "step": 357680
    },
    {
      "epoch": 1.2536142741486085,
      "grad_norm": 2.546875,
      "learning_rate": 3.234057908933491e-05,
      "loss": 0.8116,
      "step": 357690
    },
    {
      "epoch": 1.253649321655504,
      "grad_norm": 2.84375,
      "learning_rate": 3.23399300606712e-05,
      "loss": 0.8106,
      "step": 357700
    },
    {
      "epoch": 1.2536843691623996,
      "grad_norm": 2.765625,
      "learning_rate": 3.23392810320075e-05,
      "loss": 0.9142,
      "step": 357710
    },
    {
      "epoch": 1.2537194166692953,
      "grad_norm": 3.21875,
      "learning_rate": 3.23386320033438e-05,
      "loss": 0.8714,
      "step": 357720
    },
    {
      "epoch": 1.2537544641761909,
      "grad_norm": 2.75,
      "learning_rate": 3.23379829746801e-05,
      "loss": 0.8937,
      "step": 357730
    },
    {
      "epoch": 1.2537895116830864,
      "grad_norm": 2.734375,
      "learning_rate": 3.2337333946016394e-05,
      "loss": 0.827,
      "step": 357740
    },
    {
      "epoch": 1.2538245591899821,
      "grad_norm": 2.6875,
      "learning_rate": 3.2336684917352695e-05,
      "loss": 0.8545,
      "step": 357750
    },
    {
      "epoch": 1.2538596066968777,
      "grad_norm": 2.875,
      "learning_rate": 3.233603588868899e-05,
      "loss": 0.8335,
      "step": 357760
    },
    {
      "epoch": 1.2538946542037732,
      "grad_norm": 2.421875,
      "learning_rate": 3.2335386860025285e-05,
      "loss": 0.8497,
      "step": 357770
    },
    {
      "epoch": 1.2539297017106688,
      "grad_norm": 2.859375,
      "learning_rate": 3.2334737831361586e-05,
      "loss": 0.7848,
      "step": 357780
    },
    {
      "epoch": 1.2539647492175643,
      "grad_norm": 2.734375,
      "learning_rate": 3.233408880269788e-05,
      "loss": 0.8759,
      "step": 357790
    },
    {
      "epoch": 1.25399979672446,
      "grad_norm": 2.921875,
      "learning_rate": 3.233343977403418e-05,
      "loss": 0.8431,
      "step": 357800
    },
    {
      "epoch": 1.2540348442313556,
      "grad_norm": 2.75,
      "learning_rate": 3.2332790745370477e-05,
      "loss": 0.9401,
      "step": 357810
    },
    {
      "epoch": 1.2540698917382511,
      "grad_norm": 2.796875,
      "learning_rate": 3.233214171670678e-05,
      "loss": 0.9323,
      "step": 357820
    },
    {
      "epoch": 1.2541049392451469,
      "grad_norm": 2.671875,
      "learning_rate": 3.233149268804307e-05,
      "loss": 0.8043,
      "step": 357830
    },
    {
      "epoch": 1.2541399867520424,
      "grad_norm": 3.328125,
      "learning_rate": 3.2330843659379374e-05,
      "loss": 0.8573,
      "step": 357840
    },
    {
      "epoch": 1.254175034258938,
      "grad_norm": 3.140625,
      "learning_rate": 3.233019463071567e-05,
      "loss": 0.9289,
      "step": 357850
    },
    {
      "epoch": 1.2542100817658337,
      "grad_norm": 2.96875,
      "learning_rate": 3.232954560205197e-05,
      "loss": 0.8456,
      "step": 357860
    },
    {
      "epoch": 1.2542451292727292,
      "grad_norm": 3.109375,
      "learning_rate": 3.2328896573388265e-05,
      "loss": 0.8782,
      "step": 357870
    },
    {
      "epoch": 1.2542801767796248,
      "grad_norm": 3.09375,
      "learning_rate": 3.2328247544724566e-05,
      "loss": 0.8618,
      "step": 357880
    },
    {
      "epoch": 1.2543152242865203,
      "grad_norm": 2.625,
      "learning_rate": 3.232759851606087e-05,
      "loss": 0.8273,
      "step": 357890
    },
    {
      "epoch": 1.2543502717934158,
      "grad_norm": 2.6875,
      "learning_rate": 3.232694948739716e-05,
      "loss": 0.8914,
      "step": 357900
    },
    {
      "epoch": 1.2543853193003116,
      "grad_norm": 2.640625,
      "learning_rate": 3.232630045873346e-05,
      "loss": 0.877,
      "step": 357910
    },
    {
      "epoch": 1.2544203668072071,
      "grad_norm": 3.0625,
      "learning_rate": 3.232565143006976e-05,
      "loss": 0.955,
      "step": 357920
    },
    {
      "epoch": 1.2544554143141027,
      "grad_norm": 2.5,
      "learning_rate": 3.232500240140606e-05,
      "loss": 0.7645,
      "step": 357930
    },
    {
      "epoch": 1.2544904618209984,
      "grad_norm": 2.96875,
      "learning_rate": 3.2324353372742354e-05,
      "loss": 0.8814,
      "step": 357940
    },
    {
      "epoch": 1.254525509327894,
      "grad_norm": 2.640625,
      "learning_rate": 3.2323704344078655e-05,
      "loss": 0.8781,
      "step": 357950
    },
    {
      "epoch": 1.2545605568347895,
      "grad_norm": 3.203125,
      "learning_rate": 3.232305531541495e-05,
      "loss": 0.897,
      "step": 357960
    },
    {
      "epoch": 1.2545956043416853,
      "grad_norm": 2.546875,
      "learning_rate": 3.232240628675125e-05,
      "loss": 0.8745,
      "step": 357970
    },
    {
      "epoch": 1.2546306518485808,
      "grad_norm": 3.1875,
      "learning_rate": 3.2321757258087546e-05,
      "loss": 0.8688,
      "step": 357980
    },
    {
      "epoch": 1.2546656993554763,
      "grad_norm": 3.09375,
      "learning_rate": 3.232110822942385e-05,
      "loss": 0.9367,
      "step": 357990
    },
    {
      "epoch": 1.2547007468623719,
      "grad_norm": 2.53125,
      "learning_rate": 3.232045920076014e-05,
      "loss": 0.7702,
      "step": 358000
    },
    {
      "epoch": 1.2547357943692674,
      "grad_norm": 3.15625,
      "learning_rate": 3.231981017209644e-05,
      "loss": 0.9075,
      "step": 358010
    },
    {
      "epoch": 1.2547708418761632,
      "grad_norm": 2.90625,
      "learning_rate": 3.231916114343274e-05,
      "loss": 0.8479,
      "step": 358020
    },
    {
      "epoch": 1.2548058893830587,
      "grad_norm": 3.078125,
      "learning_rate": 3.231851211476904e-05,
      "loss": 0.9023,
      "step": 358030
    },
    {
      "epoch": 1.2548409368899542,
      "grad_norm": 3.390625,
      "learning_rate": 3.231786308610534e-05,
      "loss": 0.8415,
      "step": 358040
    },
    {
      "epoch": 1.25487598439685,
      "grad_norm": 2.921875,
      "learning_rate": 3.2317214057441635e-05,
      "loss": 0.7981,
      "step": 358050
    },
    {
      "epoch": 1.2549110319037455,
      "grad_norm": 2.421875,
      "learning_rate": 3.231656502877794e-05,
      "loss": 0.8491,
      "step": 358060
    },
    {
      "epoch": 1.254946079410641,
      "grad_norm": 2.921875,
      "learning_rate": 3.231591600011423e-05,
      "loss": 0.8625,
      "step": 358070
    },
    {
      "epoch": 1.2549811269175368,
      "grad_norm": 2.8125,
      "learning_rate": 3.231526697145053e-05,
      "loss": 0.856,
      "step": 358080
    },
    {
      "epoch": 1.2550161744244324,
      "grad_norm": 2.875,
      "learning_rate": 3.231461794278683e-05,
      "loss": 0.8687,
      "step": 358090
    },
    {
      "epoch": 1.255051221931328,
      "grad_norm": 3.140625,
      "learning_rate": 3.231396891412313e-05,
      "loss": 0.8134,
      "step": 358100
    },
    {
      "epoch": 1.2550862694382234,
      "grad_norm": 2.84375,
      "learning_rate": 3.231331988545942e-05,
      "loss": 0.8655,
      "step": 358110
    },
    {
      "epoch": 1.2551213169451192,
      "grad_norm": 2.75,
      "learning_rate": 3.2312670856795725e-05,
      "loss": 0.923,
      "step": 358120
    },
    {
      "epoch": 1.2551563644520147,
      "grad_norm": 3.125,
      "learning_rate": 3.231202182813202e-05,
      "loss": 0.8422,
      "step": 358130
    },
    {
      "epoch": 1.2551914119589103,
      "grad_norm": 2.84375,
      "learning_rate": 3.2311372799468314e-05,
      "loss": 0.8215,
      "step": 358140
    },
    {
      "epoch": 1.2552264594658058,
      "grad_norm": 3.296875,
      "learning_rate": 3.2310723770804615e-05,
      "loss": 0.8948,
      "step": 358150
    },
    {
      "epoch": 1.2552615069727016,
      "grad_norm": 3.75,
      "learning_rate": 3.231007474214091e-05,
      "loss": 0.7501,
      "step": 358160
    },
    {
      "epoch": 1.255296554479597,
      "grad_norm": 2.515625,
      "learning_rate": 3.230942571347721e-05,
      "loss": 0.7642,
      "step": 358170
    },
    {
      "epoch": 1.2553316019864926,
      "grad_norm": 2.859375,
      "learning_rate": 3.2308776684813506e-05,
      "loss": 0.8157,
      "step": 358180
    },
    {
      "epoch": 1.2553666494933884,
      "grad_norm": 3.078125,
      "learning_rate": 3.230812765614981e-05,
      "loss": 0.8616,
      "step": 358190
    },
    {
      "epoch": 1.255401697000284,
      "grad_norm": 2.96875,
      "learning_rate": 3.23074786274861e-05,
      "loss": 0.8903,
      "step": 358200
    },
    {
      "epoch": 1.2554367445071795,
      "grad_norm": 2.796875,
      "learning_rate": 3.23068295988224e-05,
      "loss": 0.9018,
      "step": 358210
    },
    {
      "epoch": 1.2554717920140752,
      "grad_norm": 2.78125,
      "learning_rate": 3.23061805701587e-05,
      "loss": 0.8075,
      "step": 358220
    },
    {
      "epoch": 1.2555068395209708,
      "grad_norm": 2.84375,
      "learning_rate": 3.2305531541495e-05,
      "loss": 0.8972,
      "step": 358230
    },
    {
      "epoch": 1.2555418870278663,
      "grad_norm": 3.234375,
      "learning_rate": 3.2304882512831294e-05,
      "loss": 0.9318,
      "step": 358240
    },
    {
      "epoch": 1.2555769345347618,
      "grad_norm": 2.828125,
      "learning_rate": 3.2304233484167595e-05,
      "loss": 0.8379,
      "step": 358250
    },
    {
      "epoch": 1.2556119820416574,
      "grad_norm": 2.8125,
      "learning_rate": 3.23035844555039e-05,
      "loss": 0.8121,
      "step": 358260
    },
    {
      "epoch": 1.2556470295485531,
      "grad_norm": 2.34375,
      "learning_rate": 3.230293542684019e-05,
      "loss": 0.8337,
      "step": 358270
    },
    {
      "epoch": 1.2556820770554487,
      "grad_norm": 3.1875,
      "learning_rate": 3.230228639817649e-05,
      "loss": 0.8864,
      "step": 358280
    },
    {
      "epoch": 1.2557171245623442,
      "grad_norm": 3.171875,
      "learning_rate": 3.230163736951279e-05,
      "loss": 0.8875,
      "step": 358290
    },
    {
      "epoch": 1.25575217206924,
      "grad_norm": 2.953125,
      "learning_rate": 3.230098834084909e-05,
      "loss": 0.8368,
      "step": 358300
    },
    {
      "epoch": 1.2557872195761355,
      "grad_norm": 2.390625,
      "learning_rate": 3.230033931218538e-05,
      "loss": 0.8019,
      "step": 358310
    },
    {
      "epoch": 1.255822267083031,
      "grad_norm": 2.53125,
      "learning_rate": 3.2299690283521685e-05,
      "loss": 0.7959,
      "step": 358320
    },
    {
      "epoch": 1.2558573145899268,
      "grad_norm": 2.96875,
      "learning_rate": 3.229904125485798e-05,
      "loss": 0.8414,
      "step": 358330
    },
    {
      "epoch": 1.2558923620968223,
      "grad_norm": 2.703125,
      "learning_rate": 3.229839222619428e-05,
      "loss": 0.8175,
      "step": 358340
    },
    {
      "epoch": 1.2559274096037178,
      "grad_norm": 2.625,
      "learning_rate": 3.2297743197530575e-05,
      "loss": 0.8382,
      "step": 358350
    },
    {
      "epoch": 1.2559624571106134,
      "grad_norm": 3.875,
      "learning_rate": 3.229709416886688e-05,
      "loss": 0.8763,
      "step": 358360
    },
    {
      "epoch": 1.255997504617509,
      "grad_norm": 2.75,
      "learning_rate": 3.229644514020317e-05,
      "loss": 0.8107,
      "step": 358370
    },
    {
      "epoch": 1.2560325521244047,
      "grad_norm": 2.859375,
      "learning_rate": 3.229579611153947e-05,
      "loss": 0.93,
      "step": 358380
    },
    {
      "epoch": 1.2560675996313002,
      "grad_norm": 2.9375,
      "learning_rate": 3.229514708287577e-05,
      "loss": 0.8628,
      "step": 358390
    },
    {
      "epoch": 1.2561026471381957,
      "grad_norm": 2.703125,
      "learning_rate": 3.229449805421207e-05,
      "loss": 0.8435,
      "step": 358400
    },
    {
      "epoch": 1.2561376946450915,
      "grad_norm": 2.9375,
      "learning_rate": 3.229384902554837e-05,
      "loss": 0.8221,
      "step": 358410
    },
    {
      "epoch": 1.256172742151987,
      "grad_norm": 3.03125,
      "learning_rate": 3.2293199996884665e-05,
      "loss": 0.8548,
      "step": 358420
    },
    {
      "epoch": 1.2562077896588826,
      "grad_norm": 3.0,
      "learning_rate": 3.2292550968220966e-05,
      "loss": 0.8764,
      "step": 358430
    },
    {
      "epoch": 1.2562428371657783,
      "grad_norm": 2.859375,
      "learning_rate": 3.229190193955726e-05,
      "loss": 0.8505,
      "step": 358440
    },
    {
      "epoch": 1.2562778846726739,
      "grad_norm": 2.59375,
      "learning_rate": 3.229125291089356e-05,
      "loss": 0.8515,
      "step": 358450
    },
    {
      "epoch": 1.2563129321795694,
      "grad_norm": 2.859375,
      "learning_rate": 3.229060388222986e-05,
      "loss": 0.8623,
      "step": 358460
    },
    {
      "epoch": 1.256347979686465,
      "grad_norm": 2.609375,
      "learning_rate": 3.228995485356616e-05,
      "loss": 0.8245,
      "step": 358470
    },
    {
      "epoch": 1.2563830271933605,
      "grad_norm": 3.5,
      "learning_rate": 3.228930582490245e-05,
      "loss": 0.8648,
      "step": 358480
    },
    {
      "epoch": 1.2564180747002562,
      "grad_norm": 2.890625,
      "learning_rate": 3.2288656796238754e-05,
      "loss": 0.8645,
      "step": 358490
    },
    {
      "epoch": 1.2564531222071518,
      "grad_norm": 2.59375,
      "learning_rate": 3.228800776757505e-05,
      "loss": 0.8476,
      "step": 358500
    },
    {
      "epoch": 1.2564881697140473,
      "grad_norm": 2.796875,
      "learning_rate": 3.228735873891134e-05,
      "loss": 0.8566,
      "step": 358510
    },
    {
      "epoch": 1.256523217220943,
      "grad_norm": 2.625,
      "learning_rate": 3.2286709710247645e-05,
      "loss": 0.8398,
      "step": 358520
    },
    {
      "epoch": 1.2565582647278386,
      "grad_norm": 2.828125,
      "learning_rate": 3.228606068158394e-05,
      "loss": 0.8076,
      "step": 358530
    },
    {
      "epoch": 1.2565933122347341,
      "grad_norm": 2.78125,
      "learning_rate": 3.228541165292024e-05,
      "loss": 0.836,
      "step": 358540
    },
    {
      "epoch": 1.25662835974163,
      "grad_norm": 3.015625,
      "learning_rate": 3.2284762624256535e-05,
      "loss": 0.7851,
      "step": 358550
    },
    {
      "epoch": 1.2566634072485254,
      "grad_norm": 3.65625,
      "learning_rate": 3.228411359559284e-05,
      "loss": 0.8896,
      "step": 358560
    },
    {
      "epoch": 1.256698454755421,
      "grad_norm": 2.765625,
      "learning_rate": 3.228346456692913e-05,
      "loss": 0.7465,
      "step": 358570
    },
    {
      "epoch": 1.2567335022623165,
      "grad_norm": 2.75,
      "learning_rate": 3.228281553826543e-05,
      "loss": 0.8855,
      "step": 358580
    },
    {
      "epoch": 1.256768549769212,
      "grad_norm": 2.859375,
      "learning_rate": 3.228216650960173e-05,
      "loss": 0.9109,
      "step": 358590
    },
    {
      "epoch": 1.2568035972761078,
      "grad_norm": 2.765625,
      "learning_rate": 3.228151748093803e-05,
      "loss": 0.9549,
      "step": 358600
    },
    {
      "epoch": 1.2568386447830033,
      "grad_norm": 3.234375,
      "learning_rate": 3.228086845227432e-05,
      "loss": 0.8237,
      "step": 358610
    },
    {
      "epoch": 1.2568736922898989,
      "grad_norm": 3.390625,
      "learning_rate": 3.2280219423610625e-05,
      "loss": 0.8407,
      "step": 358620
    },
    {
      "epoch": 1.2569087397967946,
      "grad_norm": 2.6875,
      "learning_rate": 3.2279570394946926e-05,
      "loss": 0.8518,
      "step": 358630
    },
    {
      "epoch": 1.2569437873036902,
      "grad_norm": 3.0,
      "learning_rate": 3.227892136628322e-05,
      "loss": 0.8668,
      "step": 358640
    },
    {
      "epoch": 1.2569788348105857,
      "grad_norm": 3.296875,
      "learning_rate": 3.227827233761952e-05,
      "loss": 0.8639,
      "step": 358650
    },
    {
      "epoch": 1.2570138823174815,
      "grad_norm": 2.734375,
      "learning_rate": 3.227762330895582e-05,
      "loss": 0.7935,
      "step": 358660
    },
    {
      "epoch": 1.257048929824377,
      "grad_norm": 2.640625,
      "learning_rate": 3.227697428029212e-05,
      "loss": 0.8836,
      "step": 358670
    },
    {
      "epoch": 1.2570839773312725,
      "grad_norm": 2.96875,
      "learning_rate": 3.227632525162841e-05,
      "loss": 0.843,
      "step": 358680
    },
    {
      "epoch": 1.257119024838168,
      "grad_norm": 2.890625,
      "learning_rate": 3.2275676222964714e-05,
      "loss": 0.9078,
      "step": 358690
    },
    {
      "epoch": 1.2571540723450636,
      "grad_norm": 2.875,
      "learning_rate": 3.227502719430101e-05,
      "loss": 0.8024,
      "step": 358700
    },
    {
      "epoch": 1.2571891198519594,
      "grad_norm": 2.921875,
      "learning_rate": 3.227437816563731e-05,
      "loss": 0.895,
      "step": 358710
    },
    {
      "epoch": 1.257224167358855,
      "grad_norm": 3.0625,
      "learning_rate": 3.2273729136973605e-05,
      "loss": 0.8573,
      "step": 358720
    },
    {
      "epoch": 1.2572592148657504,
      "grad_norm": 3.3125,
      "learning_rate": 3.2273080108309906e-05,
      "loss": 0.7593,
      "step": 358730
    },
    {
      "epoch": 1.2572942623726462,
      "grad_norm": 2.890625,
      "learning_rate": 3.22724310796462e-05,
      "loss": 0.8224,
      "step": 358740
    },
    {
      "epoch": 1.2573293098795417,
      "grad_norm": 2.84375,
      "learning_rate": 3.22717820509825e-05,
      "loss": 0.8477,
      "step": 358750
    },
    {
      "epoch": 1.2573643573864373,
      "grad_norm": 3.40625,
      "learning_rate": 3.2271133022318804e-05,
      "loss": 0.8515,
      "step": 358760
    },
    {
      "epoch": 1.257399404893333,
      "grad_norm": 2.90625,
      "learning_rate": 3.22704839936551e-05,
      "loss": 0.8306,
      "step": 358770
    },
    {
      "epoch": 1.2574344524002286,
      "grad_norm": 2.875,
      "learning_rate": 3.22698349649914e-05,
      "loss": 0.8228,
      "step": 358780
    },
    {
      "epoch": 1.257469499907124,
      "grad_norm": 2.78125,
      "learning_rate": 3.2269185936327694e-05,
      "loss": 0.8515,
      "step": 358790
    },
    {
      "epoch": 1.2575045474140196,
      "grad_norm": 2.5,
      "learning_rate": 3.2268536907663996e-05,
      "loss": 0.8532,
      "step": 358800
    },
    {
      "epoch": 1.2575395949209154,
      "grad_norm": 2.921875,
      "learning_rate": 3.226788787900029e-05,
      "loss": 0.9154,
      "step": 358810
    },
    {
      "epoch": 1.257574642427811,
      "grad_norm": 3.03125,
      "learning_rate": 3.226723885033659e-05,
      "loss": 0.853,
      "step": 358820
    },
    {
      "epoch": 1.2576096899347065,
      "grad_norm": 2.828125,
      "learning_rate": 3.2266589821672886e-05,
      "loss": 0.8941,
      "step": 358830
    },
    {
      "epoch": 1.257644737441602,
      "grad_norm": 3.0,
      "learning_rate": 3.226594079300919e-05,
      "loss": 0.858,
      "step": 358840
    },
    {
      "epoch": 1.2576797849484977,
      "grad_norm": 2.953125,
      "learning_rate": 3.226529176434548e-05,
      "loss": 0.8511,
      "step": 358850
    },
    {
      "epoch": 1.2577148324553933,
      "grad_norm": 2.625,
      "learning_rate": 3.2264642735681784e-05,
      "loss": 0.7599,
      "step": 358860
    },
    {
      "epoch": 1.2577498799622888,
      "grad_norm": 2.59375,
      "learning_rate": 3.226399370701808e-05,
      "loss": 0.8251,
      "step": 358870
    },
    {
      "epoch": 1.2577849274691846,
      "grad_norm": 2.828125,
      "learning_rate": 3.226334467835438e-05,
      "loss": 0.9286,
      "step": 358880
    },
    {
      "epoch": 1.2578199749760801,
      "grad_norm": 2.703125,
      "learning_rate": 3.2262695649690674e-05,
      "loss": 0.7999,
      "step": 358890
    },
    {
      "epoch": 1.2578550224829756,
      "grad_norm": 2.90625,
      "learning_rate": 3.226204662102697e-05,
      "loss": 0.8318,
      "step": 358900
    },
    {
      "epoch": 1.2578900699898714,
      "grad_norm": 2.75,
      "learning_rate": 3.226139759236327e-05,
      "loss": 0.8034,
      "step": 358910
    },
    {
      "epoch": 1.257925117496767,
      "grad_norm": 2.796875,
      "learning_rate": 3.2260748563699565e-05,
      "loss": 0.8122,
      "step": 358920
    },
    {
      "epoch": 1.2579601650036625,
      "grad_norm": 3.25,
      "learning_rate": 3.2260099535035866e-05,
      "loss": 0.8323,
      "step": 358930
    },
    {
      "epoch": 1.257995212510558,
      "grad_norm": 3.828125,
      "learning_rate": 3.225945050637216e-05,
      "loss": 0.8402,
      "step": 358940
    },
    {
      "epoch": 1.2580302600174535,
      "grad_norm": 3.078125,
      "learning_rate": 3.225880147770846e-05,
      "loss": 0.8555,
      "step": 358950
    },
    {
      "epoch": 1.2580653075243493,
      "grad_norm": 2.53125,
      "learning_rate": 3.225815244904476e-05,
      "loss": 0.8769,
      "step": 358960
    },
    {
      "epoch": 1.2581003550312448,
      "grad_norm": 3.0625,
      "learning_rate": 3.225750342038106e-05,
      "loss": 0.8268,
      "step": 358970
    },
    {
      "epoch": 1.2581354025381404,
      "grad_norm": 2.890625,
      "learning_rate": 3.225685439171735e-05,
      "loss": 0.8695,
      "step": 358980
    },
    {
      "epoch": 1.2581704500450361,
      "grad_norm": 2.71875,
      "learning_rate": 3.2256205363053654e-05,
      "loss": 0.952,
      "step": 358990
    },
    {
      "epoch": 1.2582054975519317,
      "grad_norm": 2.96875,
      "learning_rate": 3.2255556334389956e-05,
      "loss": 0.8647,
      "step": 359000
    },
    {
      "epoch": 1.2582405450588272,
      "grad_norm": 2.765625,
      "learning_rate": 3.225490730572625e-05,
      "loss": 0.8778,
      "step": 359010
    },
    {
      "epoch": 1.258275592565723,
      "grad_norm": 3.203125,
      "learning_rate": 3.225425827706255e-05,
      "loss": 0.8733,
      "step": 359020
    },
    {
      "epoch": 1.2583106400726185,
      "grad_norm": 2.8125,
      "learning_rate": 3.2253609248398846e-05,
      "loss": 0.8516,
      "step": 359030
    },
    {
      "epoch": 1.258345687579514,
      "grad_norm": 2.6875,
      "learning_rate": 3.225296021973515e-05,
      "loss": 0.8053,
      "step": 359040
    },
    {
      "epoch": 1.2583807350864096,
      "grad_norm": 2.8125,
      "learning_rate": 3.225231119107144e-05,
      "loss": 0.8024,
      "step": 359050
    },
    {
      "epoch": 1.258415782593305,
      "grad_norm": 2.96875,
      "learning_rate": 3.2251662162407744e-05,
      "loss": 0.8054,
      "step": 359060
    },
    {
      "epoch": 1.2584508301002009,
      "grad_norm": 2.953125,
      "learning_rate": 3.225101313374404e-05,
      "loss": 0.9001,
      "step": 359070
    },
    {
      "epoch": 1.2584858776070964,
      "grad_norm": 3.21875,
      "learning_rate": 3.225036410508034e-05,
      "loss": 0.9447,
      "step": 359080
    },
    {
      "epoch": 1.258520925113992,
      "grad_norm": 3.015625,
      "learning_rate": 3.2249715076416634e-05,
      "loss": 0.9044,
      "step": 359090
    },
    {
      "epoch": 1.2585559726208877,
      "grad_norm": 2.375,
      "learning_rate": 3.2249066047752936e-05,
      "loss": 0.8853,
      "step": 359100
    },
    {
      "epoch": 1.2585910201277832,
      "grad_norm": 2.96875,
      "learning_rate": 3.224841701908923e-05,
      "loss": 0.9129,
      "step": 359110
    },
    {
      "epoch": 1.2586260676346788,
      "grad_norm": 3.171875,
      "learning_rate": 3.224776799042553e-05,
      "loss": 0.8754,
      "step": 359120
    },
    {
      "epoch": 1.2586611151415745,
      "grad_norm": 3.0625,
      "learning_rate": 3.224711896176183e-05,
      "loss": 0.9129,
      "step": 359130
    },
    {
      "epoch": 1.25869616264847,
      "grad_norm": 3.34375,
      "learning_rate": 3.224646993309813e-05,
      "loss": 0.827,
      "step": 359140
    },
    {
      "epoch": 1.2587312101553656,
      "grad_norm": 2.390625,
      "learning_rate": 3.224582090443443e-05,
      "loss": 0.8131,
      "step": 359150
    },
    {
      "epoch": 1.2587662576622611,
      "grad_norm": 2.8125,
      "learning_rate": 3.2245171875770724e-05,
      "loss": 0.8859,
      "step": 359160
    },
    {
      "epoch": 1.2588013051691567,
      "grad_norm": 2.78125,
      "learning_rate": 3.2244522847107025e-05,
      "loss": 0.8994,
      "step": 359170
    },
    {
      "epoch": 1.2588363526760524,
      "grad_norm": 3.125,
      "learning_rate": 3.224387381844332e-05,
      "loss": 0.8653,
      "step": 359180
    },
    {
      "epoch": 1.258871400182948,
      "grad_norm": 3.046875,
      "learning_rate": 3.224322478977962e-05,
      "loss": 0.8408,
      "step": 359190
    },
    {
      "epoch": 1.2589064476898435,
      "grad_norm": 3.546875,
      "learning_rate": 3.2242575761115916e-05,
      "loss": 0.941,
      "step": 359200
    },
    {
      "epoch": 1.2589414951967393,
      "grad_norm": 3.234375,
      "learning_rate": 3.224192673245222e-05,
      "loss": 0.8182,
      "step": 359210
    },
    {
      "epoch": 1.2589765427036348,
      "grad_norm": 3.28125,
      "learning_rate": 3.224127770378851e-05,
      "loss": 0.9588,
      "step": 359220
    },
    {
      "epoch": 1.2590115902105303,
      "grad_norm": 2.9375,
      "learning_rate": 3.224062867512481e-05,
      "loss": 0.7996,
      "step": 359230
    },
    {
      "epoch": 1.259046637717426,
      "grad_norm": 2.5,
      "learning_rate": 3.223997964646111e-05,
      "loss": 0.8096,
      "step": 359240
    },
    {
      "epoch": 1.2590816852243216,
      "grad_norm": 2.921875,
      "learning_rate": 3.223933061779741e-05,
      "loss": 0.9526,
      "step": 359250
    },
    {
      "epoch": 1.2591167327312172,
      "grad_norm": 2.78125,
      "learning_rate": 3.2238681589133704e-05,
      "loss": 0.8392,
      "step": 359260
    },
    {
      "epoch": 1.2591517802381127,
      "grad_norm": 2.5,
      "learning_rate": 3.223803256047e-05,
      "loss": 0.8487,
      "step": 359270
    },
    {
      "epoch": 1.2591868277450082,
      "grad_norm": 2.40625,
      "learning_rate": 3.22373835318063e-05,
      "loss": 0.7806,
      "step": 359280
    },
    {
      "epoch": 1.259221875251904,
      "grad_norm": 3.140625,
      "learning_rate": 3.2236734503142594e-05,
      "loss": 0.8538,
      "step": 359290
    },
    {
      "epoch": 1.2592569227587995,
      "grad_norm": 2.96875,
      "learning_rate": 3.2236085474478896e-05,
      "loss": 0.8388,
      "step": 359300
    },
    {
      "epoch": 1.259291970265695,
      "grad_norm": 2.90625,
      "learning_rate": 3.223543644581519e-05,
      "loss": 0.8686,
      "step": 359310
    },
    {
      "epoch": 1.2593270177725908,
      "grad_norm": 3.046875,
      "learning_rate": 3.223478741715149e-05,
      "loss": 0.8815,
      "step": 359320
    },
    {
      "epoch": 1.2593620652794864,
      "grad_norm": 3.015625,
      "learning_rate": 3.2234138388487786e-05,
      "loss": 0.8192,
      "step": 359330
    },
    {
      "epoch": 1.259397112786382,
      "grad_norm": 2.890625,
      "learning_rate": 3.223348935982409e-05,
      "loss": 0.8492,
      "step": 359340
    },
    {
      "epoch": 1.2594321602932776,
      "grad_norm": 2.78125,
      "learning_rate": 3.223284033116038e-05,
      "loss": 0.8758,
      "step": 359350
    },
    {
      "epoch": 1.2594672078001732,
      "grad_norm": 2.875,
      "learning_rate": 3.2232191302496684e-05,
      "loss": 0.9007,
      "step": 359360
    },
    {
      "epoch": 1.2595022553070687,
      "grad_norm": 2.96875,
      "learning_rate": 3.2231542273832985e-05,
      "loss": 0.8537,
      "step": 359370
    },
    {
      "epoch": 1.2595373028139643,
      "grad_norm": 2.421875,
      "learning_rate": 3.223089324516928e-05,
      "loss": 0.8934,
      "step": 359380
    },
    {
      "epoch": 1.2595723503208598,
      "grad_norm": 2.765625,
      "learning_rate": 3.223024421650558e-05,
      "loss": 0.8189,
      "step": 359390
    },
    {
      "epoch": 1.2596073978277555,
      "grad_norm": 3.109375,
      "learning_rate": 3.2229595187841876e-05,
      "loss": 0.8778,
      "step": 359400
    },
    {
      "epoch": 1.259642445334651,
      "grad_norm": 2.6875,
      "learning_rate": 3.222894615917818e-05,
      "loss": 0.9207,
      "step": 359410
    },
    {
      "epoch": 1.2596774928415466,
      "grad_norm": 2.890625,
      "learning_rate": 3.222829713051447e-05,
      "loss": 0.839,
      "step": 359420
    },
    {
      "epoch": 1.2597125403484424,
      "grad_norm": 2.859375,
      "learning_rate": 3.222764810185077e-05,
      "loss": 0.8017,
      "step": 359430
    },
    {
      "epoch": 1.259747587855338,
      "grad_norm": 2.96875,
      "learning_rate": 3.222699907318707e-05,
      "loss": 0.8024,
      "step": 359440
    },
    {
      "epoch": 1.2597826353622334,
      "grad_norm": 2.71875,
      "learning_rate": 3.222635004452337e-05,
      "loss": 0.785,
      "step": 359450
    },
    {
      "epoch": 1.2598176828691292,
      "grad_norm": 3.203125,
      "learning_rate": 3.2225701015859664e-05,
      "loss": 0.9138,
      "step": 359460
    },
    {
      "epoch": 1.2598527303760247,
      "grad_norm": 2.671875,
      "learning_rate": 3.2225051987195965e-05,
      "loss": 0.795,
      "step": 359470
    },
    {
      "epoch": 1.2598877778829203,
      "grad_norm": 2.875,
      "learning_rate": 3.222440295853226e-05,
      "loss": 0.879,
      "step": 359480
    },
    {
      "epoch": 1.2599228253898158,
      "grad_norm": 2.640625,
      "learning_rate": 3.222375392986856e-05,
      "loss": 0.8632,
      "step": 359490
    },
    {
      "epoch": 1.2599578728967116,
      "grad_norm": 3.390625,
      "learning_rate": 3.222310490120486e-05,
      "loss": 0.874,
      "step": 359500
    },
    {
      "epoch": 1.259992920403607,
      "grad_norm": 2.8125,
      "learning_rate": 3.222245587254116e-05,
      "loss": 0.8173,
      "step": 359510
    },
    {
      "epoch": 1.2600279679105026,
      "grad_norm": 3.0625,
      "learning_rate": 3.222180684387746e-05,
      "loss": 0.8685,
      "step": 359520
    },
    {
      "epoch": 1.2600630154173982,
      "grad_norm": 2.578125,
      "learning_rate": 3.222115781521375e-05,
      "loss": 0.8132,
      "step": 359530
    },
    {
      "epoch": 1.260098062924294,
      "grad_norm": 2.671875,
      "learning_rate": 3.2220508786550054e-05,
      "loss": 0.8123,
      "step": 359540
    },
    {
      "epoch": 1.2601331104311895,
      "grad_norm": 2.5,
      "learning_rate": 3.221985975788635e-05,
      "loss": 0.8151,
      "step": 359550
    },
    {
      "epoch": 1.260168157938085,
      "grad_norm": 2.953125,
      "learning_rate": 3.221921072922265e-05,
      "loss": 0.7952,
      "step": 359560
    },
    {
      "epoch": 1.2602032054449808,
      "grad_norm": 2.828125,
      "learning_rate": 3.2218561700558945e-05,
      "loss": 0.8837,
      "step": 359570
    },
    {
      "epoch": 1.2602382529518763,
      "grad_norm": 2.765625,
      "learning_rate": 3.2217912671895246e-05,
      "loss": 0.9501,
      "step": 359580
    },
    {
      "epoch": 1.2602733004587718,
      "grad_norm": 3.28125,
      "learning_rate": 3.221726364323154e-05,
      "loss": 0.8556,
      "step": 359590
    },
    {
      "epoch": 1.2603083479656676,
      "grad_norm": 2.984375,
      "learning_rate": 3.221661461456784e-05,
      "loss": 0.8455,
      "step": 359600
    },
    {
      "epoch": 1.2603433954725631,
      "grad_norm": 3.03125,
      "learning_rate": 3.221596558590414e-05,
      "loss": 0.7898,
      "step": 359610
    },
    {
      "epoch": 1.2603784429794587,
      "grad_norm": 2.859375,
      "learning_rate": 3.221531655724044e-05,
      "loss": 0.915,
      "step": 359620
    },
    {
      "epoch": 1.2604134904863542,
      "grad_norm": 2.484375,
      "learning_rate": 3.221466752857674e-05,
      "loss": 0.8264,
      "step": 359630
    },
    {
      "epoch": 1.2604485379932497,
      "grad_norm": 3.28125,
      "learning_rate": 3.221401849991303e-05,
      "loss": 0.8279,
      "step": 359640
    },
    {
      "epoch": 1.2604835855001455,
      "grad_norm": 3.109375,
      "learning_rate": 3.221336947124933e-05,
      "loss": 0.7899,
      "step": 359650
    },
    {
      "epoch": 1.260518633007041,
      "grad_norm": 2.953125,
      "learning_rate": 3.2212720442585624e-05,
      "loss": 0.9092,
      "step": 359660
    },
    {
      "epoch": 1.2605536805139366,
      "grad_norm": 2.984375,
      "learning_rate": 3.2212071413921925e-05,
      "loss": 0.8605,
      "step": 359670
    },
    {
      "epoch": 1.2605887280208323,
      "grad_norm": 3.328125,
      "learning_rate": 3.221142238525822e-05,
      "loss": 0.8953,
      "step": 359680
    },
    {
      "epoch": 1.2606237755277279,
      "grad_norm": 2.765625,
      "learning_rate": 3.221077335659452e-05,
      "loss": 0.8098,
      "step": 359690
    },
    {
      "epoch": 1.2606588230346234,
      "grad_norm": 2.671875,
      "learning_rate": 3.2210124327930816e-05,
      "loss": 0.8048,
      "step": 359700
    },
    {
      "epoch": 1.2606938705415192,
      "grad_norm": 2.671875,
      "learning_rate": 3.220947529926712e-05,
      "loss": 0.8089,
      "step": 359710
    },
    {
      "epoch": 1.2607289180484147,
      "grad_norm": 2.84375,
      "learning_rate": 3.220882627060342e-05,
      "loss": 0.8022,
      "step": 359720
    },
    {
      "epoch": 1.2607639655553102,
      "grad_norm": 3.265625,
      "learning_rate": 3.220817724193971e-05,
      "loss": 0.9602,
      "step": 359730
    },
    {
      "epoch": 1.2607990130622058,
      "grad_norm": 3.09375,
      "learning_rate": 3.2207528213276014e-05,
      "loss": 0.8991,
      "step": 359740
    },
    {
      "epoch": 1.2608340605691013,
      "grad_norm": 2.859375,
      "learning_rate": 3.220687918461231e-05,
      "loss": 0.871,
      "step": 359750
    },
    {
      "epoch": 1.260869108075997,
      "grad_norm": 3.234375,
      "learning_rate": 3.220623015594861e-05,
      "loss": 0.8876,
      "step": 359760
    },
    {
      "epoch": 1.2609041555828926,
      "grad_norm": 3.34375,
      "learning_rate": 3.2205581127284905e-05,
      "loss": 0.8078,
      "step": 359770
    },
    {
      "epoch": 1.2609392030897881,
      "grad_norm": 2.765625,
      "learning_rate": 3.2204932098621206e-05,
      "loss": 0.7975,
      "step": 359780
    },
    {
      "epoch": 1.2609742505966839,
      "grad_norm": 3.40625,
      "learning_rate": 3.22042830699575e-05,
      "loss": 0.8495,
      "step": 359790
    },
    {
      "epoch": 1.2610092981035794,
      "grad_norm": 2.890625,
      "learning_rate": 3.22036340412938e-05,
      "loss": 0.8529,
      "step": 359800
    },
    {
      "epoch": 1.261044345610475,
      "grad_norm": 3.078125,
      "learning_rate": 3.22029850126301e-05,
      "loss": 0.7735,
      "step": 359810
    },
    {
      "epoch": 1.2610793931173707,
      "grad_norm": 3.796875,
      "learning_rate": 3.22023359839664e-05,
      "loss": 0.8589,
      "step": 359820
    },
    {
      "epoch": 1.2611144406242663,
      "grad_norm": 2.984375,
      "learning_rate": 3.220168695530269e-05,
      "loss": 0.7959,
      "step": 359830
    },
    {
      "epoch": 1.2611494881311618,
      "grad_norm": 2.796875,
      "learning_rate": 3.2201037926638994e-05,
      "loss": 0.8527,
      "step": 359840
    },
    {
      "epoch": 1.2611845356380573,
      "grad_norm": 2.828125,
      "learning_rate": 3.220038889797529e-05,
      "loss": 0.8783,
      "step": 359850
    },
    {
      "epoch": 1.2612195831449529,
      "grad_norm": 3.28125,
      "learning_rate": 3.219973986931159e-05,
      "loss": 0.8658,
      "step": 359860
    },
    {
      "epoch": 1.2612546306518486,
      "grad_norm": 2.796875,
      "learning_rate": 3.219909084064789e-05,
      "loss": 0.819,
      "step": 359870
    },
    {
      "epoch": 1.2612896781587442,
      "grad_norm": 3.0625,
      "learning_rate": 3.2198441811984186e-05,
      "loss": 0.8351,
      "step": 359880
    },
    {
      "epoch": 1.2613247256656397,
      "grad_norm": 2.703125,
      "learning_rate": 3.219779278332049e-05,
      "loss": 0.8379,
      "step": 359890
    },
    {
      "epoch": 1.2613597731725354,
      "grad_norm": 3.515625,
      "learning_rate": 3.219714375465678e-05,
      "loss": 0.8801,
      "step": 359900
    },
    {
      "epoch": 1.261394820679431,
      "grad_norm": 2.578125,
      "learning_rate": 3.2196494725993084e-05,
      "loss": 0.8735,
      "step": 359910
    },
    {
      "epoch": 1.2614298681863265,
      "grad_norm": 2.953125,
      "learning_rate": 3.219584569732938e-05,
      "loss": 0.917,
      "step": 359920
    },
    {
      "epoch": 1.2614649156932223,
      "grad_norm": 2.875,
      "learning_rate": 3.219519666866568e-05,
      "loss": 0.88,
      "step": 359930
    },
    {
      "epoch": 1.2614999632001178,
      "grad_norm": 3.125,
      "learning_rate": 3.2194547640001974e-05,
      "loss": 0.8726,
      "step": 359940
    },
    {
      "epoch": 1.2615350107070133,
      "grad_norm": 3.109375,
      "learning_rate": 3.2193898611338276e-05,
      "loss": 0.8033,
      "step": 359950
    },
    {
      "epoch": 1.2615700582139089,
      "grad_norm": 2.71875,
      "learning_rate": 3.219324958267457e-05,
      "loss": 0.899,
      "step": 359960
    },
    {
      "epoch": 1.2616051057208044,
      "grad_norm": 3.0625,
      "learning_rate": 3.219260055401087e-05,
      "loss": 0.956,
      "step": 359970
    },
    {
      "epoch": 1.2616401532277002,
      "grad_norm": 3.25,
      "learning_rate": 3.2191951525347166e-05,
      "loss": 0.8064,
      "step": 359980
    },
    {
      "epoch": 1.2616752007345957,
      "grad_norm": 2.8125,
      "learning_rate": 3.219130249668347e-05,
      "loss": 0.865,
      "step": 359990
    },
    {
      "epoch": 1.2617102482414913,
      "grad_norm": 2.703125,
      "learning_rate": 3.219065346801977e-05,
      "loss": 0.8183,
      "step": 360000
    },
    {
      "epoch": 1.2617102482414913,
      "eval_loss": 0.8031307458877563,
      "eval_runtime": 556.3021,
      "eval_samples_per_second": 683.866,
      "eval_steps_per_second": 56.989,
      "step": 360000
    },
    {
      "epoch": 1.261745295748387,
      "grad_norm": 3.03125,
      "learning_rate": 3.2190004439356064e-05,
      "loss": 0.8383,
      "step": 360010
    },
    {
      "epoch": 1.2617803432552825,
      "grad_norm": 2.90625,
      "learning_rate": 3.218935541069236e-05,
      "loss": 0.9137,
      "step": 360020
    },
    {
      "epoch": 1.261815390762178,
      "grad_norm": 3.109375,
      "learning_rate": 3.218870638202865e-05,
      "loss": 0.8876,
      "step": 360030
    },
    {
      "epoch": 1.2618504382690738,
      "grad_norm": 2.8125,
      "learning_rate": 3.2188057353364954e-05,
      "loss": 0.7932,
      "step": 360040
    },
    {
      "epoch": 1.2618854857759694,
      "grad_norm": 2.5,
      "learning_rate": 3.218740832470125e-05,
      "loss": 0.8257,
      "step": 360050
    },
    {
      "epoch": 1.261920533282865,
      "grad_norm": 3.0,
      "learning_rate": 3.218675929603755e-05,
      "loss": 0.8573,
      "step": 360060
    },
    {
      "epoch": 1.2619555807897604,
      "grad_norm": 3.484375,
      "learning_rate": 3.2186110267373845e-05,
      "loss": 0.8916,
      "step": 360070
    },
    {
      "epoch": 1.261990628296656,
      "grad_norm": 3.015625,
      "learning_rate": 3.2185461238710146e-05,
      "loss": 0.7795,
      "step": 360080
    },
    {
      "epoch": 1.2620256758035517,
      "grad_norm": 2.578125,
      "learning_rate": 3.218481221004645e-05,
      "loss": 0.8367,
      "step": 360090
    },
    {
      "epoch": 1.2620607233104473,
      "grad_norm": 3.234375,
      "learning_rate": 3.218416318138274e-05,
      "loss": 0.8369,
      "step": 360100
    },
    {
      "epoch": 1.2620957708173428,
      "grad_norm": 3.375,
      "learning_rate": 3.2183514152719044e-05,
      "loss": 0.8808,
      "step": 360110
    },
    {
      "epoch": 1.2621308183242386,
      "grad_norm": 2.953125,
      "learning_rate": 3.218286512405534e-05,
      "loss": 0.8584,
      "step": 360120
    },
    {
      "epoch": 1.262165865831134,
      "grad_norm": 3.265625,
      "learning_rate": 3.218221609539164e-05,
      "loss": 0.8937,
      "step": 360130
    },
    {
      "epoch": 1.2622009133380296,
      "grad_norm": 2.828125,
      "learning_rate": 3.2181567066727934e-05,
      "loss": 0.8444,
      "step": 360140
    },
    {
      "epoch": 1.2622359608449254,
      "grad_norm": 3.046875,
      "learning_rate": 3.2180918038064236e-05,
      "loss": 0.878,
      "step": 360150
    },
    {
      "epoch": 1.262271008351821,
      "grad_norm": 3.046875,
      "learning_rate": 3.218026900940053e-05,
      "loss": 0.8911,
      "step": 360160
    },
    {
      "epoch": 1.2623060558587165,
      "grad_norm": 2.84375,
      "learning_rate": 3.217961998073683e-05,
      "loss": 0.8099,
      "step": 360170
    },
    {
      "epoch": 1.262341103365612,
      "grad_norm": 3.03125,
      "learning_rate": 3.2178970952073126e-05,
      "loss": 0.8568,
      "step": 360180
    },
    {
      "epoch": 1.2623761508725078,
      "grad_norm": 2.671875,
      "learning_rate": 3.217832192340943e-05,
      "loss": 0.8709,
      "step": 360190
    },
    {
      "epoch": 1.2624111983794033,
      "grad_norm": 2.640625,
      "learning_rate": 3.217767289474572e-05,
      "loss": 0.7869,
      "step": 360200
    },
    {
      "epoch": 1.2624462458862988,
      "grad_norm": 2.90625,
      "learning_rate": 3.2177023866082024e-05,
      "loss": 0.7946,
      "step": 360210
    },
    {
      "epoch": 1.2624812933931944,
      "grad_norm": 2.765625,
      "learning_rate": 3.217637483741832e-05,
      "loss": 0.8115,
      "step": 360220
    },
    {
      "epoch": 1.2625163409000901,
      "grad_norm": 2.546875,
      "learning_rate": 3.217572580875462e-05,
      "loss": 0.9048,
      "step": 360230
    },
    {
      "epoch": 1.2625513884069857,
      "grad_norm": 2.796875,
      "learning_rate": 3.217507678009092e-05,
      "loss": 0.9108,
      "step": 360240
    },
    {
      "epoch": 1.2625864359138812,
      "grad_norm": 3.15625,
      "learning_rate": 3.2174427751427216e-05,
      "loss": 0.7909,
      "step": 360250
    },
    {
      "epoch": 1.262621483420777,
      "grad_norm": 2.78125,
      "learning_rate": 3.217377872276352e-05,
      "loss": 0.8426,
      "step": 360260
    },
    {
      "epoch": 1.2626565309276725,
      "grad_norm": 2.984375,
      "learning_rate": 3.217312969409981e-05,
      "loss": 0.8012,
      "step": 360270
    },
    {
      "epoch": 1.262691578434568,
      "grad_norm": 3.109375,
      "learning_rate": 3.217248066543611e-05,
      "loss": 0.8027,
      "step": 360280
    },
    {
      "epoch": 1.2627266259414638,
      "grad_norm": 2.984375,
      "learning_rate": 3.217183163677241e-05,
      "loss": 0.8055,
      "step": 360290
    },
    {
      "epoch": 1.2627616734483593,
      "grad_norm": 2.828125,
      "learning_rate": 3.217118260810871e-05,
      "loss": 0.7918,
      "step": 360300
    },
    {
      "epoch": 1.2627967209552549,
      "grad_norm": 2.84375,
      "learning_rate": 3.2170533579445004e-05,
      "loss": 0.7494,
      "step": 360310
    },
    {
      "epoch": 1.2628317684621504,
      "grad_norm": 3.28125,
      "learning_rate": 3.2169884550781305e-05,
      "loss": 0.9465,
      "step": 360320
    },
    {
      "epoch": 1.262866815969046,
      "grad_norm": 2.8125,
      "learning_rate": 3.21692355221176e-05,
      "loss": 0.965,
      "step": 360330
    },
    {
      "epoch": 1.2629018634759417,
      "grad_norm": 2.921875,
      "learning_rate": 3.21685864934539e-05,
      "loss": 0.8337,
      "step": 360340
    },
    {
      "epoch": 1.2629369109828372,
      "grad_norm": 2.9375,
      "learning_rate": 3.2167937464790196e-05,
      "loss": 0.838,
      "step": 360350
    },
    {
      "epoch": 1.2629719584897328,
      "grad_norm": 3.109375,
      "learning_rate": 3.21672884361265e-05,
      "loss": 0.9607,
      "step": 360360
    },
    {
      "epoch": 1.2630070059966285,
      "grad_norm": 2.84375,
      "learning_rate": 3.21666394074628e-05,
      "loss": 0.8383,
      "step": 360370
    },
    {
      "epoch": 1.263042053503524,
      "grad_norm": 2.40625,
      "learning_rate": 3.216599037879909e-05,
      "loss": 0.8173,
      "step": 360380
    },
    {
      "epoch": 1.2630771010104196,
      "grad_norm": 2.484375,
      "learning_rate": 3.216534135013539e-05,
      "loss": 0.9337,
      "step": 360390
    },
    {
      "epoch": 1.2631121485173153,
      "grad_norm": 3.359375,
      "learning_rate": 3.216469232147168e-05,
      "loss": 0.8784,
      "step": 360400
    },
    {
      "epoch": 1.2631471960242109,
      "grad_norm": 3.203125,
      "learning_rate": 3.2164043292807984e-05,
      "loss": 0.8365,
      "step": 360410
    },
    {
      "epoch": 1.2631822435311064,
      "grad_norm": 3.234375,
      "learning_rate": 3.216339426414428e-05,
      "loss": 0.7781,
      "step": 360420
    },
    {
      "epoch": 1.263217291038002,
      "grad_norm": 2.703125,
      "learning_rate": 3.216274523548058e-05,
      "loss": 0.8231,
      "step": 360430
    },
    {
      "epoch": 1.2632523385448975,
      "grad_norm": 3.078125,
      "learning_rate": 3.2162096206816874e-05,
      "loss": 0.8214,
      "step": 360440
    },
    {
      "epoch": 1.2632873860517932,
      "grad_norm": 2.84375,
      "learning_rate": 3.2161447178153176e-05,
      "loss": 0.8567,
      "step": 360450
    },
    {
      "epoch": 1.2633224335586888,
      "grad_norm": 3.078125,
      "learning_rate": 3.216079814948948e-05,
      "loss": 0.9559,
      "step": 360460
    },
    {
      "epoch": 1.2633574810655843,
      "grad_norm": 2.828125,
      "learning_rate": 3.216014912082577e-05,
      "loss": 0.8533,
      "step": 360470
    },
    {
      "epoch": 1.26339252857248,
      "grad_norm": 2.84375,
      "learning_rate": 3.215950009216207e-05,
      "loss": 0.8117,
      "step": 360480
    },
    {
      "epoch": 1.2634275760793756,
      "grad_norm": 3.046875,
      "learning_rate": 3.215885106349837e-05,
      "loss": 0.8587,
      "step": 360490
    },
    {
      "epoch": 1.2634626235862711,
      "grad_norm": 3.359375,
      "learning_rate": 3.215820203483467e-05,
      "loss": 0.8914,
      "step": 360500
    },
    {
      "epoch": 1.263497671093167,
      "grad_norm": 3.109375,
      "learning_rate": 3.2157553006170964e-05,
      "loss": 0.8785,
      "step": 360510
    },
    {
      "epoch": 1.2635327186000624,
      "grad_norm": 2.640625,
      "learning_rate": 3.2156903977507265e-05,
      "loss": 0.8989,
      "step": 360520
    },
    {
      "epoch": 1.263567766106958,
      "grad_norm": 3.3125,
      "learning_rate": 3.215625494884356e-05,
      "loss": 0.8558,
      "step": 360530
    },
    {
      "epoch": 1.2636028136138535,
      "grad_norm": 2.9375,
      "learning_rate": 3.215560592017986e-05,
      "loss": 0.8641,
      "step": 360540
    },
    {
      "epoch": 1.263637861120749,
      "grad_norm": 3.03125,
      "learning_rate": 3.2154956891516156e-05,
      "loss": 0.9774,
      "step": 360550
    },
    {
      "epoch": 1.2636729086276448,
      "grad_norm": 2.75,
      "learning_rate": 3.215430786285246e-05,
      "loss": 0.9629,
      "step": 360560
    },
    {
      "epoch": 1.2637079561345403,
      "grad_norm": 2.75,
      "learning_rate": 3.215365883418875e-05,
      "loss": 0.8316,
      "step": 360570
    },
    {
      "epoch": 1.2637430036414359,
      "grad_norm": 3.296875,
      "learning_rate": 3.215300980552505e-05,
      "loss": 0.7962,
      "step": 360580
    },
    {
      "epoch": 1.2637780511483316,
      "grad_norm": 2.453125,
      "learning_rate": 3.2152360776861354e-05,
      "loss": 0.8754,
      "step": 360590
    },
    {
      "epoch": 1.2638130986552272,
      "grad_norm": 2.546875,
      "learning_rate": 3.215171174819765e-05,
      "loss": 0.8516,
      "step": 360600
    },
    {
      "epoch": 1.2638481461621227,
      "grad_norm": 3.625,
      "learning_rate": 3.215106271953395e-05,
      "loss": 0.8219,
      "step": 360610
    },
    {
      "epoch": 1.2638831936690185,
      "grad_norm": 2.71875,
      "learning_rate": 3.2150413690870245e-05,
      "loss": 0.9104,
      "step": 360620
    },
    {
      "epoch": 1.263918241175914,
      "grad_norm": 3.03125,
      "learning_rate": 3.2149764662206546e-05,
      "loss": 0.826,
      "step": 360630
    },
    {
      "epoch": 1.2639532886828095,
      "grad_norm": 3.265625,
      "learning_rate": 3.214911563354284e-05,
      "loss": 0.8321,
      "step": 360640
    },
    {
      "epoch": 1.263988336189705,
      "grad_norm": 2.6875,
      "learning_rate": 3.214846660487914e-05,
      "loss": 0.8532,
      "step": 360650
    },
    {
      "epoch": 1.2640233836966006,
      "grad_norm": 2.375,
      "learning_rate": 3.214781757621544e-05,
      "loss": 0.7792,
      "step": 360660
    },
    {
      "epoch": 1.2640584312034964,
      "grad_norm": 2.765625,
      "learning_rate": 3.214716854755174e-05,
      "loss": 0.8601,
      "step": 360670
    },
    {
      "epoch": 1.264093478710392,
      "grad_norm": 2.46875,
      "learning_rate": 3.214651951888803e-05,
      "loss": 0.847,
      "step": 360680
    },
    {
      "epoch": 1.2641285262172874,
      "grad_norm": 3.1875,
      "learning_rate": 3.2145870490224334e-05,
      "loss": 0.7524,
      "step": 360690
    },
    {
      "epoch": 1.2641635737241832,
      "grad_norm": 2.671875,
      "learning_rate": 3.214522146156063e-05,
      "loss": 0.9474,
      "step": 360700
    },
    {
      "epoch": 1.2641986212310787,
      "grad_norm": 2.796875,
      "learning_rate": 3.214457243289693e-05,
      "loss": 0.906,
      "step": 360710
    },
    {
      "epoch": 1.2642336687379743,
      "grad_norm": 2.96875,
      "learning_rate": 3.2143923404233225e-05,
      "loss": 0.9061,
      "step": 360720
    },
    {
      "epoch": 1.26426871624487,
      "grad_norm": 3.15625,
      "learning_rate": 3.2143274375569526e-05,
      "loss": 0.8546,
      "step": 360730
    },
    {
      "epoch": 1.2643037637517656,
      "grad_norm": 2.65625,
      "learning_rate": 3.214262534690583e-05,
      "loss": 0.8841,
      "step": 360740
    },
    {
      "epoch": 1.264338811258661,
      "grad_norm": 3.015625,
      "learning_rate": 3.214197631824212e-05,
      "loss": 0.9006,
      "step": 360750
    },
    {
      "epoch": 1.2643738587655566,
      "grad_norm": 2.6875,
      "learning_rate": 3.2141327289578424e-05,
      "loss": 0.8403,
      "step": 360760
    },
    {
      "epoch": 1.2644089062724522,
      "grad_norm": 2.421875,
      "learning_rate": 3.214067826091471e-05,
      "loss": 0.8457,
      "step": 360770
    },
    {
      "epoch": 1.264443953779348,
      "grad_norm": 2.59375,
      "learning_rate": 3.214002923225101e-05,
      "loss": 0.8606,
      "step": 360780
    },
    {
      "epoch": 1.2644790012862435,
      "grad_norm": 2.46875,
      "learning_rate": 3.213938020358731e-05,
      "loss": 0.7972,
      "step": 360790
    },
    {
      "epoch": 1.264514048793139,
      "grad_norm": 3.234375,
      "learning_rate": 3.213873117492361e-05,
      "loss": 0.8501,
      "step": 360800
    },
    {
      "epoch": 1.2645490963000348,
      "grad_norm": 2.828125,
      "learning_rate": 3.2138082146259904e-05,
      "loss": 0.9148,
      "step": 360810
    },
    {
      "epoch": 1.2645841438069303,
      "grad_norm": 3.125,
      "learning_rate": 3.2137433117596205e-05,
      "loss": 0.8433,
      "step": 360820
    },
    {
      "epoch": 1.2646191913138258,
      "grad_norm": 2.65625,
      "learning_rate": 3.2136784088932506e-05,
      "loss": 0.8575,
      "step": 360830
    },
    {
      "epoch": 1.2646542388207216,
      "grad_norm": 3.03125,
      "learning_rate": 3.21361350602688e-05,
      "loss": 0.8801,
      "step": 360840
    },
    {
      "epoch": 1.2646892863276171,
      "grad_norm": 2.96875,
      "learning_rate": 3.21354860316051e-05,
      "loss": 0.8041,
      "step": 360850
    },
    {
      "epoch": 1.2647243338345127,
      "grad_norm": 2.9375,
      "learning_rate": 3.21348370029414e-05,
      "loss": 0.8638,
      "step": 360860
    },
    {
      "epoch": 1.2647593813414084,
      "grad_norm": 3.28125,
      "learning_rate": 3.21341879742777e-05,
      "loss": 0.8861,
      "step": 360870
    },
    {
      "epoch": 1.264794428848304,
      "grad_norm": 2.921875,
      "learning_rate": 3.213353894561399e-05,
      "loss": 0.8884,
      "step": 360880
    },
    {
      "epoch": 1.2648294763551995,
      "grad_norm": 2.953125,
      "learning_rate": 3.2132889916950294e-05,
      "loss": 0.9025,
      "step": 360890
    },
    {
      "epoch": 1.264864523862095,
      "grad_norm": 2.984375,
      "learning_rate": 3.213224088828659e-05,
      "loss": 0.8741,
      "step": 360900
    },
    {
      "epoch": 1.2648995713689906,
      "grad_norm": 2.4375,
      "learning_rate": 3.213159185962289e-05,
      "loss": 0.8185,
      "step": 360910
    },
    {
      "epoch": 1.2649346188758863,
      "grad_norm": 2.828125,
      "learning_rate": 3.2130942830959185e-05,
      "loss": 0.8846,
      "step": 360920
    },
    {
      "epoch": 1.2649696663827819,
      "grad_norm": 2.671875,
      "learning_rate": 3.2130293802295486e-05,
      "loss": 0.8645,
      "step": 360930
    },
    {
      "epoch": 1.2650047138896774,
      "grad_norm": 2.84375,
      "learning_rate": 3.212964477363178e-05,
      "loss": 0.9144,
      "step": 360940
    },
    {
      "epoch": 1.2650397613965731,
      "grad_norm": 3.25,
      "learning_rate": 3.212899574496808e-05,
      "loss": 0.8636,
      "step": 360950
    },
    {
      "epoch": 1.2650748089034687,
      "grad_norm": 2.796875,
      "learning_rate": 3.2128346716304384e-05,
      "loss": 0.8275,
      "step": 360960
    },
    {
      "epoch": 1.2651098564103642,
      "grad_norm": 2.90625,
      "learning_rate": 3.212769768764068e-05,
      "loss": 0.858,
      "step": 360970
    },
    {
      "epoch": 1.26514490391726,
      "grad_norm": 3.046875,
      "learning_rate": 3.212704865897698e-05,
      "loss": 0.8753,
      "step": 360980
    },
    {
      "epoch": 1.2651799514241555,
      "grad_norm": 3.40625,
      "learning_rate": 3.2126399630313274e-05,
      "loss": 0.8863,
      "step": 360990
    },
    {
      "epoch": 1.265214998931051,
      "grad_norm": 2.8125,
      "learning_rate": 3.2125750601649576e-05,
      "loss": 0.9247,
      "step": 361000
    },
    {
      "epoch": 1.2652500464379466,
      "grad_norm": 2.921875,
      "learning_rate": 3.212510157298587e-05,
      "loss": 0.8545,
      "step": 361010
    },
    {
      "epoch": 1.2652850939448421,
      "grad_norm": 3.28125,
      "learning_rate": 3.212445254432217e-05,
      "loss": 0.866,
      "step": 361020
    },
    {
      "epoch": 1.2653201414517379,
      "grad_norm": 2.828125,
      "learning_rate": 3.2123803515658466e-05,
      "loss": 0.88,
      "step": 361030
    },
    {
      "epoch": 1.2653551889586334,
      "grad_norm": 2.515625,
      "learning_rate": 3.212315448699477e-05,
      "loss": 0.8916,
      "step": 361040
    },
    {
      "epoch": 1.265390236465529,
      "grad_norm": 2.859375,
      "learning_rate": 3.212250545833106e-05,
      "loss": 0.9132,
      "step": 361050
    },
    {
      "epoch": 1.2654252839724247,
      "grad_norm": 3.0625,
      "learning_rate": 3.2121856429667364e-05,
      "loss": 0.8575,
      "step": 361060
    },
    {
      "epoch": 1.2654603314793202,
      "grad_norm": 2.984375,
      "learning_rate": 3.212120740100366e-05,
      "loss": 0.8136,
      "step": 361070
    },
    {
      "epoch": 1.2654953789862158,
      "grad_norm": 3.4375,
      "learning_rate": 3.212055837233996e-05,
      "loss": 0.8432,
      "step": 361080
    },
    {
      "epoch": 1.2655304264931115,
      "grad_norm": 2.671875,
      "learning_rate": 3.2119909343676254e-05,
      "loss": 0.7881,
      "step": 361090
    },
    {
      "epoch": 1.265565474000007,
      "grad_norm": 3.265625,
      "learning_rate": 3.2119260315012556e-05,
      "loss": 0.9279,
      "step": 361100
    },
    {
      "epoch": 1.2656005215069026,
      "grad_norm": 2.421875,
      "learning_rate": 3.211861128634886e-05,
      "loss": 0.8472,
      "step": 361110
    },
    {
      "epoch": 1.2656355690137981,
      "grad_norm": 3.28125,
      "learning_rate": 3.211796225768515e-05,
      "loss": 0.8773,
      "step": 361120
    },
    {
      "epoch": 1.2656706165206937,
      "grad_norm": 3.46875,
      "learning_rate": 3.211731322902145e-05,
      "loss": 0.8512,
      "step": 361130
    },
    {
      "epoch": 1.2657056640275894,
      "grad_norm": 2.703125,
      "learning_rate": 3.211666420035774e-05,
      "loss": 0.8266,
      "step": 361140
    },
    {
      "epoch": 1.265740711534485,
      "grad_norm": 2.90625,
      "learning_rate": 3.211601517169404e-05,
      "loss": 0.8601,
      "step": 361150
    },
    {
      "epoch": 1.2657757590413805,
      "grad_norm": 3.140625,
      "learning_rate": 3.211536614303034e-05,
      "loss": 0.8073,
      "step": 361160
    },
    {
      "epoch": 1.2658108065482763,
      "grad_norm": 2.59375,
      "learning_rate": 3.211471711436664e-05,
      "loss": 0.8091,
      "step": 361170
    },
    {
      "epoch": 1.2658458540551718,
      "grad_norm": 2.53125,
      "learning_rate": 3.211406808570293e-05,
      "loss": 0.8072,
      "step": 361180
    },
    {
      "epoch": 1.2658809015620673,
      "grad_norm": 3.09375,
      "learning_rate": 3.2113419057039234e-05,
      "loss": 0.8489,
      "step": 361190
    },
    {
      "epoch": 1.265915949068963,
      "grad_norm": 2.875,
      "learning_rate": 3.2112770028375536e-05,
      "loss": 0.875,
      "step": 361200
    },
    {
      "epoch": 1.2659509965758586,
      "grad_norm": 2.828125,
      "learning_rate": 3.211212099971183e-05,
      "loss": 0.8279,
      "step": 361210
    },
    {
      "epoch": 1.2659860440827542,
      "grad_norm": 2.59375,
      "learning_rate": 3.211147197104813e-05,
      "loss": 0.8584,
      "step": 361220
    },
    {
      "epoch": 1.2660210915896497,
      "grad_norm": 2.859375,
      "learning_rate": 3.2110822942384426e-05,
      "loss": 0.8608,
      "step": 361230
    },
    {
      "epoch": 1.2660561390965452,
      "grad_norm": 3.046875,
      "learning_rate": 3.211017391372073e-05,
      "loss": 0.9061,
      "step": 361240
    },
    {
      "epoch": 1.266091186603441,
      "grad_norm": 2.765625,
      "learning_rate": 3.210952488505702e-05,
      "loss": 0.875,
      "step": 361250
    },
    {
      "epoch": 1.2661262341103365,
      "grad_norm": 3.109375,
      "learning_rate": 3.2108875856393324e-05,
      "loss": 0.8387,
      "step": 361260
    },
    {
      "epoch": 1.266161281617232,
      "grad_norm": 2.859375,
      "learning_rate": 3.210822682772962e-05,
      "loss": 0.8312,
      "step": 361270
    },
    {
      "epoch": 1.2661963291241278,
      "grad_norm": 2.671875,
      "learning_rate": 3.210757779906592e-05,
      "loss": 0.9014,
      "step": 361280
    },
    {
      "epoch": 1.2662313766310234,
      "grad_norm": 3.0625,
      "learning_rate": 3.2106928770402214e-05,
      "loss": 0.8563,
      "step": 361290
    },
    {
      "epoch": 1.266266424137919,
      "grad_norm": 3.171875,
      "learning_rate": 3.2106279741738516e-05,
      "loss": 0.8629,
      "step": 361300
    },
    {
      "epoch": 1.2663014716448147,
      "grad_norm": 3.15625,
      "learning_rate": 3.210563071307481e-05,
      "loss": 0.9142,
      "step": 361310
    },
    {
      "epoch": 1.2663365191517102,
      "grad_norm": 3.4375,
      "learning_rate": 3.210498168441111e-05,
      "loss": 0.9716,
      "step": 361320
    },
    {
      "epoch": 1.2663715666586057,
      "grad_norm": 2.71875,
      "learning_rate": 3.210433265574741e-05,
      "loss": 0.9092,
      "step": 361330
    },
    {
      "epoch": 1.2664066141655013,
      "grad_norm": 2.8125,
      "learning_rate": 3.210368362708371e-05,
      "loss": 0.8073,
      "step": 361340
    },
    {
      "epoch": 1.2664416616723968,
      "grad_norm": 3.03125,
      "learning_rate": 3.210303459842001e-05,
      "loss": 0.8978,
      "step": 361350
    },
    {
      "epoch": 1.2664767091792926,
      "grad_norm": 2.859375,
      "learning_rate": 3.2102385569756304e-05,
      "loss": 0.8908,
      "step": 361360
    },
    {
      "epoch": 1.266511756686188,
      "grad_norm": 2.984375,
      "learning_rate": 3.2101736541092605e-05,
      "loss": 0.8385,
      "step": 361370
    },
    {
      "epoch": 1.2665468041930836,
      "grad_norm": 2.53125,
      "learning_rate": 3.21010875124289e-05,
      "loss": 0.8511,
      "step": 361380
    },
    {
      "epoch": 1.2665818516999794,
      "grad_norm": 2.9375,
      "learning_rate": 3.21004384837652e-05,
      "loss": 0.8954,
      "step": 361390
    },
    {
      "epoch": 1.266616899206875,
      "grad_norm": 2.46875,
      "learning_rate": 3.2099789455101496e-05,
      "loss": 0.8067,
      "step": 361400
    },
    {
      "epoch": 1.2666519467137705,
      "grad_norm": 2.25,
      "learning_rate": 3.20991404264378e-05,
      "loss": 0.8345,
      "step": 361410
    },
    {
      "epoch": 1.2666869942206662,
      "grad_norm": 3.375,
      "learning_rate": 3.209849139777409e-05,
      "loss": 0.8305,
      "step": 361420
    },
    {
      "epoch": 1.2667220417275618,
      "grad_norm": 2.78125,
      "learning_rate": 3.209784236911039e-05,
      "loss": 0.7943,
      "step": 361430
    },
    {
      "epoch": 1.2667570892344573,
      "grad_norm": 3.265625,
      "learning_rate": 3.209719334044669e-05,
      "loss": 0.896,
      "step": 361440
    },
    {
      "epoch": 1.2667921367413528,
      "grad_norm": 3.03125,
      "learning_rate": 3.209654431178299e-05,
      "loss": 0.9185,
      "step": 361450
    },
    {
      "epoch": 1.2668271842482486,
      "grad_norm": 3.25,
      "learning_rate": 3.2095895283119284e-05,
      "loss": 0.8947,
      "step": 361460
    },
    {
      "epoch": 1.2668622317551441,
      "grad_norm": 2.875,
      "learning_rate": 3.2095246254455585e-05,
      "loss": 0.8874,
      "step": 361470
    },
    {
      "epoch": 1.2668972792620397,
      "grad_norm": 3.390625,
      "learning_rate": 3.2094597225791887e-05,
      "loss": 0.8159,
      "step": 361480
    },
    {
      "epoch": 1.2669323267689352,
      "grad_norm": 2.828125,
      "learning_rate": 3.209394819712818e-05,
      "loss": 0.8322,
      "step": 361490
    },
    {
      "epoch": 1.266967374275831,
      "grad_norm": 3.0625,
      "learning_rate": 3.209329916846448e-05,
      "loss": 0.8,
      "step": 361500
    },
    {
      "epoch": 1.2670024217827265,
      "grad_norm": 2.90625,
      "learning_rate": 3.209265013980078e-05,
      "loss": 0.7822,
      "step": 361510
    },
    {
      "epoch": 1.267037469289622,
      "grad_norm": 3.046875,
      "learning_rate": 3.209200111113707e-05,
      "loss": 0.8595,
      "step": 361520
    },
    {
      "epoch": 1.2670725167965178,
      "grad_norm": 2.703125,
      "learning_rate": 3.2091352082473366e-05,
      "loss": 0.8576,
      "step": 361530
    },
    {
      "epoch": 1.2671075643034133,
      "grad_norm": 3.515625,
      "learning_rate": 3.209070305380967e-05,
      "loss": 0.8662,
      "step": 361540
    },
    {
      "epoch": 1.2671426118103088,
      "grad_norm": 3.078125,
      "learning_rate": 3.209005402514597e-05,
      "loss": 0.7783,
      "step": 361550
    },
    {
      "epoch": 1.2671776593172046,
      "grad_norm": 3.03125,
      "learning_rate": 3.2089404996482264e-05,
      "loss": 0.8468,
      "step": 361560
    },
    {
      "epoch": 1.2672127068241001,
      "grad_norm": 2.84375,
      "learning_rate": 3.2088755967818565e-05,
      "loss": 0.8337,
      "step": 361570
    },
    {
      "epoch": 1.2672477543309957,
      "grad_norm": 2.765625,
      "learning_rate": 3.208810693915486e-05,
      "loss": 0.8313,
      "step": 361580
    },
    {
      "epoch": 1.2672828018378912,
      "grad_norm": 3.09375,
      "learning_rate": 3.208745791049116e-05,
      "loss": 0.8623,
      "step": 361590
    },
    {
      "epoch": 1.2673178493447868,
      "grad_norm": 2.90625,
      "learning_rate": 3.2086808881827456e-05,
      "loss": 0.7887,
      "step": 361600
    },
    {
      "epoch": 1.2673528968516825,
      "grad_norm": 3.328125,
      "learning_rate": 3.208615985316376e-05,
      "loss": 0.849,
      "step": 361610
    },
    {
      "epoch": 1.267387944358578,
      "grad_norm": 2.78125,
      "learning_rate": 3.208551082450005e-05,
      "loss": 0.9317,
      "step": 361620
    },
    {
      "epoch": 1.2674229918654736,
      "grad_norm": 2.875,
      "learning_rate": 3.208486179583635e-05,
      "loss": 0.8637,
      "step": 361630
    },
    {
      "epoch": 1.2674580393723693,
      "grad_norm": 3.09375,
      "learning_rate": 3.208421276717265e-05,
      "loss": 0.8422,
      "step": 361640
    },
    {
      "epoch": 1.2674930868792649,
      "grad_norm": 3.140625,
      "learning_rate": 3.208356373850895e-05,
      "loss": 0.8371,
      "step": 361650
    },
    {
      "epoch": 1.2675281343861604,
      "grad_norm": 3.125,
      "learning_rate": 3.2082914709845244e-05,
      "loss": 0.8188,
      "step": 361660
    },
    {
      "epoch": 1.2675631818930562,
      "grad_norm": 3.203125,
      "learning_rate": 3.2082265681181545e-05,
      "loss": 0.9042,
      "step": 361670
    },
    {
      "epoch": 1.2675982293999517,
      "grad_norm": 2.921875,
      "learning_rate": 3.208161665251784e-05,
      "loss": 0.821,
      "step": 361680
    },
    {
      "epoch": 1.2676332769068472,
      "grad_norm": 2.84375,
      "learning_rate": 3.208096762385414e-05,
      "loss": 0.8012,
      "step": 361690
    },
    {
      "epoch": 1.2676683244137428,
      "grad_norm": 2.921875,
      "learning_rate": 3.208031859519044e-05,
      "loss": 0.819,
      "step": 361700
    },
    {
      "epoch": 1.2677033719206383,
      "grad_norm": 2.84375,
      "learning_rate": 3.207966956652674e-05,
      "loss": 0.8027,
      "step": 361710
    },
    {
      "epoch": 1.267738419427534,
      "grad_norm": 3.203125,
      "learning_rate": 3.207902053786304e-05,
      "loss": 0.8982,
      "step": 361720
    },
    {
      "epoch": 1.2677734669344296,
      "grad_norm": 3.015625,
      "learning_rate": 3.207837150919933e-05,
      "loss": 0.8518,
      "step": 361730
    },
    {
      "epoch": 1.2678085144413251,
      "grad_norm": 2.78125,
      "learning_rate": 3.2077722480535635e-05,
      "loss": 0.8016,
      "step": 361740
    },
    {
      "epoch": 1.267843561948221,
      "grad_norm": 3.03125,
      "learning_rate": 3.207707345187193e-05,
      "loss": 0.8349,
      "step": 361750
    },
    {
      "epoch": 1.2678786094551164,
      "grad_norm": 3.109375,
      "learning_rate": 3.207642442320823e-05,
      "loss": 0.8966,
      "step": 361760
    },
    {
      "epoch": 1.267913656962012,
      "grad_norm": 2.984375,
      "learning_rate": 3.2075775394544525e-05,
      "loss": 0.861,
      "step": 361770
    },
    {
      "epoch": 1.2679487044689077,
      "grad_norm": 3.046875,
      "learning_rate": 3.2075126365880827e-05,
      "loss": 0.8336,
      "step": 361780
    },
    {
      "epoch": 1.2679837519758033,
      "grad_norm": 3.109375,
      "learning_rate": 3.207447733721712e-05,
      "loss": 0.8749,
      "step": 361790
    },
    {
      "epoch": 1.2680187994826988,
      "grad_norm": 2.40625,
      "learning_rate": 3.207382830855342e-05,
      "loss": 0.8317,
      "step": 361800
    },
    {
      "epoch": 1.2680538469895943,
      "grad_norm": 2.421875,
      "learning_rate": 3.207317927988972e-05,
      "loss": 0.8727,
      "step": 361810
    },
    {
      "epoch": 1.2680888944964899,
      "grad_norm": 3.140625,
      "learning_rate": 3.207253025122602e-05,
      "loss": 0.9204,
      "step": 361820
    },
    {
      "epoch": 1.2681239420033856,
      "grad_norm": 2.828125,
      "learning_rate": 3.207188122256232e-05,
      "loss": 0.9115,
      "step": 361830
    },
    {
      "epoch": 1.2681589895102812,
      "grad_norm": 2.890625,
      "learning_rate": 3.2071232193898615e-05,
      "loss": 0.8439,
      "step": 361840
    },
    {
      "epoch": 1.2681940370171767,
      "grad_norm": 2.828125,
      "learning_rate": 3.2070583165234916e-05,
      "loss": 0.8457,
      "step": 361850
    },
    {
      "epoch": 1.2682290845240725,
      "grad_norm": 2.640625,
      "learning_rate": 3.206993413657121e-05,
      "loss": 0.7666,
      "step": 361860
    },
    {
      "epoch": 1.268264132030968,
      "grad_norm": 2.796875,
      "learning_rate": 3.206928510790751e-05,
      "loss": 0.836,
      "step": 361870
    },
    {
      "epoch": 1.2682991795378635,
      "grad_norm": 2.578125,
      "learning_rate": 3.2068636079243807e-05,
      "loss": 0.8283,
      "step": 361880
    },
    {
      "epoch": 1.2683342270447593,
      "grad_norm": 2.875,
      "learning_rate": 3.206798705058011e-05,
      "loss": 0.8812,
      "step": 361890
    },
    {
      "epoch": 1.2683692745516548,
      "grad_norm": 2.703125,
      "learning_rate": 3.2067338021916396e-05,
      "loss": 0.8152,
      "step": 361900
    },
    {
      "epoch": 1.2684043220585504,
      "grad_norm": 3.3125,
      "learning_rate": 3.20666889932527e-05,
      "loss": 0.847,
      "step": 361910
    },
    {
      "epoch": 1.268439369565446,
      "grad_norm": 2.796875,
      "learning_rate": 3.2066039964589e-05,
      "loss": 0.8554,
      "step": 361920
    },
    {
      "epoch": 1.2684744170723414,
      "grad_norm": 3.265625,
      "learning_rate": 3.206539093592529e-05,
      "loss": 0.8994,
      "step": 361930
    },
    {
      "epoch": 1.2685094645792372,
      "grad_norm": 3.234375,
      "learning_rate": 3.2064741907261595e-05,
      "loss": 0.8265,
      "step": 361940
    },
    {
      "epoch": 1.2685445120861327,
      "grad_norm": 3.0625,
      "learning_rate": 3.206409287859789e-05,
      "loss": 0.7892,
      "step": 361950
    },
    {
      "epoch": 1.2685795595930283,
      "grad_norm": 2.953125,
      "learning_rate": 3.206344384993419e-05,
      "loss": 0.8684,
      "step": 361960
    },
    {
      "epoch": 1.268614607099924,
      "grad_norm": 2.875,
      "learning_rate": 3.2062794821270485e-05,
      "loss": 0.9068,
      "step": 361970
    },
    {
      "epoch": 1.2686496546068196,
      "grad_norm": 2.65625,
      "learning_rate": 3.2062145792606787e-05,
      "loss": 0.8486,
      "step": 361980
    },
    {
      "epoch": 1.268684702113715,
      "grad_norm": 2.671875,
      "learning_rate": 3.206149676394308e-05,
      "loss": 0.7824,
      "step": 361990
    },
    {
      "epoch": 1.2687197496206108,
      "grad_norm": 3.28125,
      "learning_rate": 3.206084773527938e-05,
      "loss": 0.9174,
      "step": 362000
    },
    {
      "epoch": 1.2687547971275064,
      "grad_norm": 2.75,
      "learning_rate": 3.206019870661568e-05,
      "loss": 0.8457,
      "step": 362010
    },
    {
      "epoch": 1.268789844634402,
      "grad_norm": 2.9375,
      "learning_rate": 3.205954967795198e-05,
      "loss": 0.9427,
      "step": 362020
    },
    {
      "epoch": 1.2688248921412975,
      "grad_norm": 3.34375,
      "learning_rate": 3.205890064928827e-05,
      "loss": 0.8863,
      "step": 362030
    },
    {
      "epoch": 1.268859939648193,
      "grad_norm": 3.359375,
      "learning_rate": 3.2058251620624575e-05,
      "loss": 0.8588,
      "step": 362040
    },
    {
      "epoch": 1.2688949871550887,
      "grad_norm": 3.015625,
      "learning_rate": 3.205760259196087e-05,
      "loss": 0.8067,
      "step": 362050
    },
    {
      "epoch": 1.2689300346619843,
      "grad_norm": 3.109375,
      "learning_rate": 3.205695356329717e-05,
      "loss": 0.8172,
      "step": 362060
    },
    {
      "epoch": 1.2689650821688798,
      "grad_norm": 2.734375,
      "learning_rate": 3.205630453463347e-05,
      "loss": 0.8012,
      "step": 362070
    },
    {
      "epoch": 1.2690001296757756,
      "grad_norm": 3.109375,
      "learning_rate": 3.2055655505969767e-05,
      "loss": 0.8306,
      "step": 362080
    },
    {
      "epoch": 1.2690351771826711,
      "grad_norm": 3.03125,
      "learning_rate": 3.205500647730607e-05,
      "loss": 0.9077,
      "step": 362090
    },
    {
      "epoch": 1.2690702246895667,
      "grad_norm": 3.015625,
      "learning_rate": 3.205435744864236e-05,
      "loss": 0.8524,
      "step": 362100
    },
    {
      "epoch": 1.2691052721964624,
      "grad_norm": 3.125,
      "learning_rate": 3.2053708419978664e-05,
      "loss": 0.8007,
      "step": 362110
    },
    {
      "epoch": 1.269140319703358,
      "grad_norm": 3.21875,
      "learning_rate": 3.205305939131496e-05,
      "loss": 0.8282,
      "step": 362120
    },
    {
      "epoch": 1.2691753672102535,
      "grad_norm": 3.125,
      "learning_rate": 3.205241036265126e-05,
      "loss": 0.8179,
      "step": 362130
    },
    {
      "epoch": 1.269210414717149,
      "grad_norm": 2.453125,
      "learning_rate": 3.2051761333987555e-05,
      "loss": 0.8553,
      "step": 362140
    },
    {
      "epoch": 1.2692454622240448,
      "grad_norm": 2.625,
      "learning_rate": 3.2051112305323856e-05,
      "loss": 0.8308,
      "step": 362150
    },
    {
      "epoch": 1.2692805097309403,
      "grad_norm": 3.078125,
      "learning_rate": 3.205046327666015e-05,
      "loss": 0.8238,
      "step": 362160
    },
    {
      "epoch": 1.2693155572378358,
      "grad_norm": 2.71875,
      "learning_rate": 3.204981424799645e-05,
      "loss": 0.8099,
      "step": 362170
    },
    {
      "epoch": 1.2693506047447314,
      "grad_norm": 3.5,
      "learning_rate": 3.2049165219332747e-05,
      "loss": 0.8028,
      "step": 362180
    },
    {
      "epoch": 1.2693856522516271,
      "grad_norm": 3.09375,
      "learning_rate": 3.204851619066905e-05,
      "loss": 0.8197,
      "step": 362190
    },
    {
      "epoch": 1.2694206997585227,
      "grad_norm": 3.296875,
      "learning_rate": 3.204786716200535e-05,
      "loss": 0.9055,
      "step": 362200
    },
    {
      "epoch": 1.2694557472654182,
      "grad_norm": 3.5,
      "learning_rate": 3.2047218133341644e-05,
      "loss": 0.8128,
      "step": 362210
    },
    {
      "epoch": 1.269490794772314,
      "grad_norm": 3.21875,
      "learning_rate": 3.2046569104677945e-05,
      "loss": 0.8686,
      "step": 362220
    },
    {
      "epoch": 1.2695258422792095,
      "grad_norm": 2.875,
      "learning_rate": 3.204592007601424e-05,
      "loss": 0.8229,
      "step": 362230
    },
    {
      "epoch": 1.269560889786105,
      "grad_norm": 2.96875,
      "learning_rate": 3.204527104735054e-05,
      "loss": 0.8143,
      "step": 362240
    },
    {
      "epoch": 1.2695959372930008,
      "grad_norm": 2.796875,
      "learning_rate": 3.2044622018686836e-05,
      "loss": 0.7815,
      "step": 362250
    },
    {
      "epoch": 1.2696309847998963,
      "grad_norm": 2.203125,
      "learning_rate": 3.204397299002314e-05,
      "loss": 0.872,
      "step": 362260
    },
    {
      "epoch": 1.2696660323067919,
      "grad_norm": 3.21875,
      "learning_rate": 3.2043323961359425e-05,
      "loss": 0.9153,
      "step": 362270
    },
    {
      "epoch": 1.2697010798136874,
      "grad_norm": 2.890625,
      "learning_rate": 3.2042674932695727e-05,
      "loss": 0.8372,
      "step": 362280
    },
    {
      "epoch": 1.269736127320583,
      "grad_norm": 3.21875,
      "learning_rate": 3.204202590403203e-05,
      "loss": 0.8671,
      "step": 362290
    },
    {
      "epoch": 1.2697711748274787,
      "grad_norm": 2.6875,
      "learning_rate": 3.204137687536832e-05,
      "loss": 0.8521,
      "step": 362300
    },
    {
      "epoch": 1.2698062223343742,
      "grad_norm": 3.53125,
      "learning_rate": 3.2040727846704624e-05,
      "loss": 0.8707,
      "step": 362310
    },
    {
      "epoch": 1.2698412698412698,
      "grad_norm": 3.15625,
      "learning_rate": 3.204007881804092e-05,
      "loss": 0.8813,
      "step": 362320
    },
    {
      "epoch": 1.2698763173481655,
      "grad_norm": 2.84375,
      "learning_rate": 3.203942978937722e-05,
      "loss": 0.8139,
      "step": 362330
    },
    {
      "epoch": 1.269911364855061,
      "grad_norm": 2.9375,
      "learning_rate": 3.2038780760713515e-05,
      "loss": 0.8142,
      "step": 362340
    },
    {
      "epoch": 1.2699464123619566,
      "grad_norm": 2.5625,
      "learning_rate": 3.2038131732049816e-05,
      "loss": 0.8886,
      "step": 362350
    },
    {
      "epoch": 1.2699814598688524,
      "grad_norm": 2.5,
      "learning_rate": 3.203748270338611e-05,
      "loss": 0.782,
      "step": 362360
    },
    {
      "epoch": 1.270016507375748,
      "grad_norm": 2.828125,
      "learning_rate": 3.203683367472241e-05,
      "loss": 0.8325,
      "step": 362370
    },
    {
      "epoch": 1.2700515548826434,
      "grad_norm": 3.109375,
      "learning_rate": 3.2036184646058707e-05,
      "loss": 0.7814,
      "step": 362380
    },
    {
      "epoch": 1.270086602389539,
      "grad_norm": 3.296875,
      "learning_rate": 3.203553561739501e-05,
      "loss": 0.9134,
      "step": 362390
    },
    {
      "epoch": 1.2701216498964345,
      "grad_norm": 2.96875,
      "learning_rate": 3.20348865887313e-05,
      "loss": 0.9067,
      "step": 362400
    },
    {
      "epoch": 1.2701566974033303,
      "grad_norm": 2.671875,
      "learning_rate": 3.2034237560067604e-05,
      "loss": 0.8616,
      "step": 362410
    },
    {
      "epoch": 1.2701917449102258,
      "grad_norm": 3.203125,
      "learning_rate": 3.20335885314039e-05,
      "loss": 0.9341,
      "step": 362420
    },
    {
      "epoch": 1.2702267924171213,
      "grad_norm": 2.765625,
      "learning_rate": 3.20329395027402e-05,
      "loss": 0.7686,
      "step": 362430
    },
    {
      "epoch": 1.270261839924017,
      "grad_norm": 2.90625,
      "learning_rate": 3.20322904740765e-05,
      "loss": 0.8751,
      "step": 362440
    },
    {
      "epoch": 1.2702968874309126,
      "grad_norm": 3.203125,
      "learning_rate": 3.2031641445412796e-05,
      "loss": 0.8899,
      "step": 362450
    },
    {
      "epoch": 1.2703319349378082,
      "grad_norm": 2.859375,
      "learning_rate": 3.20309924167491e-05,
      "loss": 0.8958,
      "step": 362460
    },
    {
      "epoch": 1.270366982444704,
      "grad_norm": 2.765625,
      "learning_rate": 3.203034338808539e-05,
      "loss": 0.8243,
      "step": 362470
    },
    {
      "epoch": 1.2704020299515995,
      "grad_norm": 2.671875,
      "learning_rate": 3.202969435942169e-05,
      "loss": 0.8242,
      "step": 362480
    },
    {
      "epoch": 1.270437077458495,
      "grad_norm": 2.640625,
      "learning_rate": 3.202904533075799e-05,
      "loss": 0.8009,
      "step": 362490
    },
    {
      "epoch": 1.2704721249653905,
      "grad_norm": 2.828125,
      "learning_rate": 3.202839630209429e-05,
      "loss": 0.8016,
      "step": 362500
    },
    {
      "epoch": 1.270507172472286,
      "grad_norm": 3.0,
      "learning_rate": 3.2027747273430584e-05,
      "loss": 0.9287,
      "step": 362510
    },
    {
      "epoch": 1.2705422199791818,
      "grad_norm": 3.03125,
      "learning_rate": 3.2027098244766885e-05,
      "loss": 0.9179,
      "step": 362520
    },
    {
      "epoch": 1.2705772674860774,
      "grad_norm": 2.84375,
      "learning_rate": 3.202644921610318e-05,
      "loss": 0.8903,
      "step": 362530
    },
    {
      "epoch": 1.270612314992973,
      "grad_norm": 2.65625,
      "learning_rate": 3.202580018743948e-05,
      "loss": 0.8548,
      "step": 362540
    },
    {
      "epoch": 1.2706473624998686,
      "grad_norm": 3.28125,
      "learning_rate": 3.2025151158775776e-05,
      "loss": 0.8783,
      "step": 362550
    },
    {
      "epoch": 1.2706824100067642,
      "grad_norm": 3.046875,
      "learning_rate": 3.202450213011208e-05,
      "loss": 0.9166,
      "step": 362560
    },
    {
      "epoch": 1.2707174575136597,
      "grad_norm": 2.875,
      "learning_rate": 3.202385310144838e-05,
      "loss": 0.9123,
      "step": 362570
    },
    {
      "epoch": 1.2707525050205555,
      "grad_norm": 3.15625,
      "learning_rate": 3.202320407278467e-05,
      "loss": 0.8608,
      "step": 362580
    },
    {
      "epoch": 1.270787552527451,
      "grad_norm": 2.625,
      "learning_rate": 3.2022555044120975e-05,
      "loss": 0.8505,
      "step": 362590
    },
    {
      "epoch": 1.2708226000343466,
      "grad_norm": 2.9375,
      "learning_rate": 3.202190601545727e-05,
      "loss": 0.8316,
      "step": 362600
    },
    {
      "epoch": 1.270857647541242,
      "grad_norm": 2.75,
      "learning_rate": 3.202125698679357e-05,
      "loss": 0.9025,
      "step": 362610
    },
    {
      "epoch": 1.2708926950481376,
      "grad_norm": 2.6875,
      "learning_rate": 3.2020607958129865e-05,
      "loss": 0.8502,
      "step": 362620
    },
    {
      "epoch": 1.2709277425550334,
      "grad_norm": 3.015625,
      "learning_rate": 3.201995892946617e-05,
      "loss": 0.893,
      "step": 362630
    },
    {
      "epoch": 1.270962790061929,
      "grad_norm": 3.140625,
      "learning_rate": 3.201930990080246e-05,
      "loss": 0.8199,
      "step": 362640
    },
    {
      "epoch": 1.2709978375688245,
      "grad_norm": 2.78125,
      "learning_rate": 3.2018660872138756e-05,
      "loss": 0.8037,
      "step": 362650
    },
    {
      "epoch": 1.2710328850757202,
      "grad_norm": 2.640625,
      "learning_rate": 3.201801184347506e-05,
      "loss": 0.8516,
      "step": 362660
    },
    {
      "epoch": 1.2710679325826157,
      "grad_norm": 3.25,
      "learning_rate": 3.201736281481135e-05,
      "loss": 0.816,
      "step": 362670
    },
    {
      "epoch": 1.2711029800895113,
      "grad_norm": 3.078125,
      "learning_rate": 3.201671378614765e-05,
      "loss": 0.9084,
      "step": 362680
    },
    {
      "epoch": 1.271138027596407,
      "grad_norm": 2.875,
      "learning_rate": 3.201606475748395e-05,
      "loss": 0.8829,
      "step": 362690
    },
    {
      "epoch": 1.2711730751033026,
      "grad_norm": 2.796875,
      "learning_rate": 3.201541572882025e-05,
      "loss": 0.8435,
      "step": 362700
    },
    {
      "epoch": 1.271208122610198,
      "grad_norm": 2.65625,
      "learning_rate": 3.2014766700156544e-05,
      "loss": 0.8586,
      "step": 362710
    },
    {
      "epoch": 1.2712431701170936,
      "grad_norm": 3.3125,
      "learning_rate": 3.2014117671492845e-05,
      "loss": 0.8666,
      "step": 362720
    },
    {
      "epoch": 1.2712782176239892,
      "grad_norm": 2.578125,
      "learning_rate": 3.201346864282914e-05,
      "loss": 0.8992,
      "step": 362730
    },
    {
      "epoch": 1.271313265130885,
      "grad_norm": 2.84375,
      "learning_rate": 3.201281961416544e-05,
      "loss": 0.8878,
      "step": 362740
    },
    {
      "epoch": 1.2713483126377805,
      "grad_norm": 3.234375,
      "learning_rate": 3.2012170585501736e-05,
      "loss": 0.7588,
      "step": 362750
    },
    {
      "epoch": 1.271383360144676,
      "grad_norm": 2.921875,
      "learning_rate": 3.201152155683804e-05,
      "loss": 0.8869,
      "step": 362760
    },
    {
      "epoch": 1.2714184076515718,
      "grad_norm": 2.90625,
      "learning_rate": 3.201087252817433e-05,
      "loss": 0.8835,
      "step": 362770
    },
    {
      "epoch": 1.2714534551584673,
      "grad_norm": 3.15625,
      "learning_rate": 3.201022349951063e-05,
      "loss": 0.8752,
      "step": 362780
    },
    {
      "epoch": 1.2714885026653628,
      "grad_norm": 3.28125,
      "learning_rate": 3.2009574470846935e-05,
      "loss": 0.8415,
      "step": 362790
    },
    {
      "epoch": 1.2715235501722586,
      "grad_norm": 2.625,
      "learning_rate": 3.200892544218323e-05,
      "loss": 0.9294,
      "step": 362800
    },
    {
      "epoch": 1.2715585976791541,
      "grad_norm": 3.015625,
      "learning_rate": 3.200827641351953e-05,
      "loss": 0.7957,
      "step": 362810
    },
    {
      "epoch": 1.2715936451860497,
      "grad_norm": 2.609375,
      "learning_rate": 3.2007627384855825e-05,
      "loss": 0.8535,
      "step": 362820
    },
    {
      "epoch": 1.2716286926929452,
      "grad_norm": 2.4375,
      "learning_rate": 3.200697835619213e-05,
      "loss": 0.8226,
      "step": 362830
    },
    {
      "epoch": 1.271663740199841,
      "grad_norm": 2.734375,
      "learning_rate": 3.200632932752842e-05,
      "loss": 0.8382,
      "step": 362840
    },
    {
      "epoch": 1.2716987877067365,
      "grad_norm": 3.078125,
      "learning_rate": 3.200568029886472e-05,
      "loss": 0.8458,
      "step": 362850
    },
    {
      "epoch": 1.271733835213632,
      "grad_norm": 2.59375,
      "learning_rate": 3.200503127020102e-05,
      "loss": 0.8428,
      "step": 362860
    },
    {
      "epoch": 1.2717688827205276,
      "grad_norm": 2.875,
      "learning_rate": 3.200438224153732e-05,
      "loss": 0.8848,
      "step": 362870
    },
    {
      "epoch": 1.2718039302274233,
      "grad_norm": 3.171875,
      "learning_rate": 3.200373321287361e-05,
      "loss": 0.8077,
      "step": 362880
    },
    {
      "epoch": 1.2718389777343189,
      "grad_norm": 2.921875,
      "learning_rate": 3.2003084184209915e-05,
      "loss": 0.8535,
      "step": 362890
    },
    {
      "epoch": 1.2718740252412144,
      "grad_norm": 2.859375,
      "learning_rate": 3.200243515554621e-05,
      "loss": 0.8103,
      "step": 362900
    },
    {
      "epoch": 1.2719090727481102,
      "grad_norm": 2.859375,
      "learning_rate": 3.200178612688251e-05,
      "loss": 0.8251,
      "step": 362910
    },
    {
      "epoch": 1.2719441202550057,
      "grad_norm": 2.984375,
      "learning_rate": 3.2001137098218805e-05,
      "loss": 0.9326,
      "step": 362920
    },
    {
      "epoch": 1.2719791677619012,
      "grad_norm": 3.140625,
      "learning_rate": 3.200048806955511e-05,
      "loss": 0.8609,
      "step": 362930
    },
    {
      "epoch": 1.272014215268797,
      "grad_norm": 3.03125,
      "learning_rate": 3.199983904089141e-05,
      "loss": 0.8509,
      "step": 362940
    },
    {
      "epoch": 1.2720492627756925,
      "grad_norm": 2.75,
      "learning_rate": 3.19991900122277e-05,
      "loss": 0.873,
      "step": 362950
    },
    {
      "epoch": 1.272084310282588,
      "grad_norm": 3.484375,
      "learning_rate": 3.1998540983564004e-05,
      "loss": 0.8537,
      "step": 362960
    },
    {
      "epoch": 1.2721193577894836,
      "grad_norm": 2.953125,
      "learning_rate": 3.19978919549003e-05,
      "loss": 0.8892,
      "step": 362970
    },
    {
      "epoch": 1.2721544052963791,
      "grad_norm": 2.640625,
      "learning_rate": 3.19972429262366e-05,
      "loss": 0.8388,
      "step": 362980
    },
    {
      "epoch": 1.272189452803275,
      "grad_norm": 2.671875,
      "learning_rate": 3.1996593897572895e-05,
      "loss": 0.9411,
      "step": 362990
    },
    {
      "epoch": 1.2722245003101704,
      "grad_norm": 3.234375,
      "learning_rate": 3.1995944868909196e-05,
      "loss": 0.8092,
      "step": 363000
    },
    {
      "epoch": 1.272259547817066,
      "grad_norm": 2.734375,
      "learning_rate": 3.199529584024549e-05,
      "loss": 0.7942,
      "step": 363010
    },
    {
      "epoch": 1.2722945953239617,
      "grad_norm": 2.703125,
      "learning_rate": 3.199464681158179e-05,
      "loss": 0.8167,
      "step": 363020
    },
    {
      "epoch": 1.2723296428308573,
      "grad_norm": 3.078125,
      "learning_rate": 3.199399778291809e-05,
      "loss": 0.9368,
      "step": 363030
    },
    {
      "epoch": 1.2723646903377528,
      "grad_norm": 2.3125,
      "learning_rate": 3.199334875425438e-05,
      "loss": 0.8233,
      "step": 363040
    },
    {
      "epoch": 1.2723997378446485,
      "grad_norm": 2.75,
      "learning_rate": 3.199269972559068e-05,
      "loss": 0.8161,
      "step": 363050
    },
    {
      "epoch": 1.272434785351544,
      "grad_norm": 2.84375,
      "learning_rate": 3.199205069692698e-05,
      "loss": 0.8386,
      "step": 363060
    },
    {
      "epoch": 1.2724698328584396,
      "grad_norm": 2.703125,
      "learning_rate": 3.199140166826328e-05,
      "loss": 0.8745,
      "step": 363070
    },
    {
      "epoch": 1.2725048803653352,
      "grad_norm": 3.265625,
      "learning_rate": 3.199075263959957e-05,
      "loss": 0.9035,
      "step": 363080
    },
    {
      "epoch": 1.2725399278722307,
      "grad_norm": 3.140625,
      "learning_rate": 3.1990103610935875e-05,
      "loss": 0.8777,
      "step": 363090
    },
    {
      "epoch": 1.2725749753791264,
      "grad_norm": 2.65625,
      "learning_rate": 3.198945458227217e-05,
      "loss": 0.8844,
      "step": 363100
    },
    {
      "epoch": 1.272610022886022,
      "grad_norm": 2.90625,
      "learning_rate": 3.198880555360847e-05,
      "loss": 0.8561,
      "step": 363110
    },
    {
      "epoch": 1.2726450703929175,
      "grad_norm": 2.953125,
      "learning_rate": 3.1988156524944765e-05,
      "loss": 0.8333,
      "step": 363120
    },
    {
      "epoch": 1.2726801178998133,
      "grad_norm": 3.03125,
      "learning_rate": 3.198750749628107e-05,
      "loss": 0.9778,
      "step": 363130
    },
    {
      "epoch": 1.2727151654067088,
      "grad_norm": 3.296875,
      "learning_rate": 3.198685846761736e-05,
      "loss": 0.8976,
      "step": 363140
    },
    {
      "epoch": 1.2727502129136044,
      "grad_norm": 2.515625,
      "learning_rate": 3.198620943895366e-05,
      "loss": 0.8369,
      "step": 363150
    },
    {
      "epoch": 1.2727852604205,
      "grad_norm": 2.78125,
      "learning_rate": 3.1985560410289964e-05,
      "loss": 0.8895,
      "step": 363160
    },
    {
      "epoch": 1.2728203079273956,
      "grad_norm": 2.796875,
      "learning_rate": 3.198491138162626e-05,
      "loss": 0.8495,
      "step": 363170
    },
    {
      "epoch": 1.2728553554342912,
      "grad_norm": 2.796875,
      "learning_rate": 3.198426235296256e-05,
      "loss": 0.796,
      "step": 363180
    },
    {
      "epoch": 1.2728904029411867,
      "grad_norm": 3.015625,
      "learning_rate": 3.1983613324298855e-05,
      "loss": 0.8114,
      "step": 363190
    },
    {
      "epoch": 1.2729254504480823,
      "grad_norm": 2.609375,
      "learning_rate": 3.1982964295635156e-05,
      "loss": 0.8073,
      "step": 363200
    },
    {
      "epoch": 1.272960497954978,
      "grad_norm": 3.234375,
      "learning_rate": 3.198231526697145e-05,
      "loss": 0.8317,
      "step": 363210
    },
    {
      "epoch": 1.2729955454618735,
      "grad_norm": 3.0625,
      "learning_rate": 3.198166623830775e-05,
      "loss": 0.834,
      "step": 363220
    },
    {
      "epoch": 1.273030592968769,
      "grad_norm": 2.390625,
      "learning_rate": 3.198101720964405e-05,
      "loss": 0.8595,
      "step": 363230
    },
    {
      "epoch": 1.2730656404756648,
      "grad_norm": 3.265625,
      "learning_rate": 3.198036818098035e-05,
      "loss": 0.8242,
      "step": 363240
    },
    {
      "epoch": 1.2731006879825604,
      "grad_norm": 2.875,
      "learning_rate": 3.197971915231664e-05,
      "loss": 0.8745,
      "step": 363250
    },
    {
      "epoch": 1.273135735489456,
      "grad_norm": 2.8125,
      "learning_rate": 3.1979070123652944e-05,
      "loss": 0.8354,
      "step": 363260
    },
    {
      "epoch": 1.2731707829963517,
      "grad_norm": 3.09375,
      "learning_rate": 3.197842109498924e-05,
      "loss": 0.862,
      "step": 363270
    },
    {
      "epoch": 1.2732058305032472,
      "grad_norm": 2.90625,
      "learning_rate": 3.197777206632554e-05,
      "loss": 0.8323,
      "step": 363280
    },
    {
      "epoch": 1.2732408780101427,
      "grad_norm": 2.890625,
      "learning_rate": 3.1977123037661835e-05,
      "loss": 0.9322,
      "step": 363290
    },
    {
      "epoch": 1.2732759255170383,
      "grad_norm": 2.96875,
      "learning_rate": 3.1976474008998136e-05,
      "loss": 0.8024,
      "step": 363300
    },
    {
      "epoch": 1.2733109730239338,
      "grad_norm": 3.0,
      "learning_rate": 3.197582498033444e-05,
      "loss": 0.8707,
      "step": 363310
    },
    {
      "epoch": 1.2733460205308296,
      "grad_norm": 3.078125,
      "learning_rate": 3.197517595167073e-05,
      "loss": 0.8822,
      "step": 363320
    },
    {
      "epoch": 1.273381068037725,
      "grad_norm": 2.546875,
      "learning_rate": 3.1974526923007034e-05,
      "loss": 0.8117,
      "step": 363330
    },
    {
      "epoch": 1.2734161155446206,
      "grad_norm": 3.109375,
      "learning_rate": 3.197387789434333e-05,
      "loss": 0.9034,
      "step": 363340
    },
    {
      "epoch": 1.2734511630515164,
      "grad_norm": 3.28125,
      "learning_rate": 3.197322886567963e-05,
      "loss": 0.9358,
      "step": 363350
    },
    {
      "epoch": 1.273486210558412,
      "grad_norm": 2.9375,
      "learning_rate": 3.1972579837015924e-05,
      "loss": 0.8666,
      "step": 363360
    },
    {
      "epoch": 1.2735212580653075,
      "grad_norm": 2.765625,
      "learning_rate": 3.1971930808352226e-05,
      "loss": 0.8111,
      "step": 363370
    },
    {
      "epoch": 1.2735563055722032,
      "grad_norm": 3.21875,
      "learning_rate": 3.197128177968852e-05,
      "loss": 0.9345,
      "step": 363380
    },
    {
      "epoch": 1.2735913530790988,
      "grad_norm": 2.953125,
      "learning_rate": 3.197063275102482e-05,
      "loss": 0.8572,
      "step": 363390
    },
    {
      "epoch": 1.2736264005859943,
      "grad_norm": 2.859375,
      "learning_rate": 3.1969983722361116e-05,
      "loss": 0.8012,
      "step": 363400
    },
    {
      "epoch": 1.2736614480928898,
      "grad_norm": 3.015625,
      "learning_rate": 3.196933469369741e-05,
      "loss": 0.8671,
      "step": 363410
    },
    {
      "epoch": 1.2736964955997854,
      "grad_norm": 2.640625,
      "learning_rate": 3.196868566503371e-05,
      "loss": 0.7732,
      "step": 363420
    },
    {
      "epoch": 1.2737315431066811,
      "grad_norm": 2.59375,
      "learning_rate": 3.196803663637001e-05,
      "loss": 0.8732,
      "step": 363430
    },
    {
      "epoch": 1.2737665906135767,
      "grad_norm": 3.140625,
      "learning_rate": 3.196738760770631e-05,
      "loss": 0.8631,
      "step": 363440
    },
    {
      "epoch": 1.2738016381204722,
      "grad_norm": 2.875,
      "learning_rate": 3.19667385790426e-05,
      "loss": 0.9172,
      "step": 363450
    },
    {
      "epoch": 1.273836685627368,
      "grad_norm": 2.859375,
      "learning_rate": 3.1966089550378904e-05,
      "loss": 0.9235,
      "step": 363460
    },
    {
      "epoch": 1.2738717331342635,
      "grad_norm": 2.703125,
      "learning_rate": 3.19654405217152e-05,
      "loss": 0.8374,
      "step": 363470
    },
    {
      "epoch": 1.273906780641159,
      "grad_norm": 3.3125,
      "learning_rate": 3.19647914930515e-05,
      "loss": 0.8578,
      "step": 363480
    },
    {
      "epoch": 1.2739418281480548,
      "grad_norm": 2.515625,
      "learning_rate": 3.1964142464387795e-05,
      "loss": 0.7713,
      "step": 363490
    },
    {
      "epoch": 1.2739768756549503,
      "grad_norm": 3.21875,
      "learning_rate": 3.1963493435724096e-05,
      "loss": 0.8683,
      "step": 363500
    },
    {
      "epoch": 1.2740119231618459,
      "grad_norm": 2.65625,
      "learning_rate": 3.196284440706039e-05,
      "loss": 0.7961,
      "step": 363510
    },
    {
      "epoch": 1.2740469706687414,
      "grad_norm": 3.4375,
      "learning_rate": 3.196219537839669e-05,
      "loss": 0.8347,
      "step": 363520
    },
    {
      "epoch": 1.2740820181756372,
      "grad_norm": 2.921875,
      "learning_rate": 3.1961546349732994e-05,
      "loss": 0.8131,
      "step": 363530
    },
    {
      "epoch": 1.2741170656825327,
      "grad_norm": 2.828125,
      "learning_rate": 3.196089732106929e-05,
      "loss": 0.9077,
      "step": 363540
    },
    {
      "epoch": 1.2741521131894282,
      "grad_norm": 2.6875,
      "learning_rate": 3.196024829240559e-05,
      "loss": 0.8901,
      "step": 363550
    },
    {
      "epoch": 1.2741871606963238,
      "grad_norm": 2.859375,
      "learning_rate": 3.1959599263741884e-05,
      "loss": 0.8354,
      "step": 363560
    },
    {
      "epoch": 1.2742222082032195,
      "grad_norm": 2.53125,
      "learning_rate": 3.1958950235078186e-05,
      "loss": 0.8434,
      "step": 363570
    },
    {
      "epoch": 1.274257255710115,
      "grad_norm": 2.953125,
      "learning_rate": 3.195830120641448e-05,
      "loss": 0.8803,
      "step": 363580
    },
    {
      "epoch": 1.2742923032170106,
      "grad_norm": 2.640625,
      "learning_rate": 3.195765217775078e-05,
      "loss": 0.9062,
      "step": 363590
    },
    {
      "epoch": 1.2743273507239063,
      "grad_norm": 2.875,
      "learning_rate": 3.1957003149087076e-05,
      "loss": 0.7698,
      "step": 363600
    },
    {
      "epoch": 1.2743623982308019,
      "grad_norm": 3.171875,
      "learning_rate": 3.195635412042338e-05,
      "loss": 0.8925,
      "step": 363610
    },
    {
      "epoch": 1.2743974457376974,
      "grad_norm": 2.765625,
      "learning_rate": 3.195570509175967e-05,
      "loss": 0.8675,
      "step": 363620
    },
    {
      "epoch": 1.2744324932445932,
      "grad_norm": 2.859375,
      "learning_rate": 3.1955056063095974e-05,
      "loss": 0.9602,
      "step": 363630
    },
    {
      "epoch": 1.2744675407514887,
      "grad_norm": 2.875,
      "learning_rate": 3.195440703443227e-05,
      "loss": 0.8375,
      "step": 363640
    },
    {
      "epoch": 1.2745025882583843,
      "grad_norm": 2.890625,
      "learning_rate": 3.195375800576857e-05,
      "loss": 0.8879,
      "step": 363650
    },
    {
      "epoch": 1.2745376357652798,
      "grad_norm": 2.734375,
      "learning_rate": 3.1953108977104864e-05,
      "loss": 0.8528,
      "step": 363660
    },
    {
      "epoch": 1.2745726832721753,
      "grad_norm": 3.015625,
      "learning_rate": 3.1952459948441166e-05,
      "loss": 0.8757,
      "step": 363670
    },
    {
      "epoch": 1.274607730779071,
      "grad_norm": 2.859375,
      "learning_rate": 3.195181091977747e-05,
      "loss": 0.8765,
      "step": 363680
    },
    {
      "epoch": 1.2746427782859666,
      "grad_norm": 3.265625,
      "learning_rate": 3.195116189111376e-05,
      "loss": 0.8776,
      "step": 363690
    },
    {
      "epoch": 1.2746778257928622,
      "grad_norm": 2.40625,
      "learning_rate": 3.195051286245006e-05,
      "loss": 0.8666,
      "step": 363700
    },
    {
      "epoch": 1.274712873299758,
      "grad_norm": 2.828125,
      "learning_rate": 3.194986383378636e-05,
      "loss": 0.8082,
      "step": 363710
    },
    {
      "epoch": 1.2747479208066534,
      "grad_norm": 2.9375,
      "learning_rate": 3.194921480512266e-05,
      "loss": 0.8106,
      "step": 363720
    },
    {
      "epoch": 1.274782968313549,
      "grad_norm": 2.9375,
      "learning_rate": 3.1948565776458954e-05,
      "loss": 0.8675,
      "step": 363730
    },
    {
      "epoch": 1.2748180158204447,
      "grad_norm": 2.5625,
      "learning_rate": 3.1947916747795255e-05,
      "loss": 0.8926,
      "step": 363740
    },
    {
      "epoch": 1.2748530633273403,
      "grad_norm": 3.609375,
      "learning_rate": 3.194726771913155e-05,
      "loss": 0.9127,
      "step": 363750
    },
    {
      "epoch": 1.2748881108342358,
      "grad_norm": 2.921875,
      "learning_rate": 3.194661869046785e-05,
      "loss": 0.8696,
      "step": 363760
    },
    {
      "epoch": 1.2749231583411313,
      "grad_norm": 2.515625,
      "learning_rate": 3.1945969661804146e-05,
      "loss": 0.8327,
      "step": 363770
    },
    {
      "epoch": 1.2749582058480269,
      "grad_norm": 2.96875,
      "learning_rate": 3.194532063314044e-05,
      "loss": 0.8525,
      "step": 363780
    },
    {
      "epoch": 1.2749932533549226,
      "grad_norm": 2.96875,
      "learning_rate": 3.194467160447674e-05,
      "loss": 0.7799,
      "step": 363790
    },
    {
      "epoch": 1.2750283008618182,
      "grad_norm": 3.1875,
      "learning_rate": 3.1944022575813036e-05,
      "loss": 0.8399,
      "step": 363800
    },
    {
      "epoch": 1.2750633483687137,
      "grad_norm": 3.25,
      "learning_rate": 3.194337354714934e-05,
      "loss": 0.8774,
      "step": 363810
    },
    {
      "epoch": 1.2750983958756095,
      "grad_norm": 2.71875,
      "learning_rate": 3.194272451848563e-05,
      "loss": 0.7902,
      "step": 363820
    },
    {
      "epoch": 1.275133443382505,
      "grad_norm": 3.8125,
      "learning_rate": 3.1942075489821934e-05,
      "loss": 0.8844,
      "step": 363830
    },
    {
      "epoch": 1.2751684908894005,
      "grad_norm": 2.734375,
      "learning_rate": 3.194142646115823e-05,
      "loss": 0.7902,
      "step": 363840
    },
    {
      "epoch": 1.2752035383962963,
      "grad_norm": 3.421875,
      "learning_rate": 3.194077743249453e-05,
      "loss": 0.9522,
      "step": 363850
    },
    {
      "epoch": 1.2752385859031918,
      "grad_norm": 3.125,
      "learning_rate": 3.1940128403830824e-05,
      "loss": 0.8809,
      "step": 363860
    },
    {
      "epoch": 1.2752736334100874,
      "grad_norm": 2.75,
      "learning_rate": 3.1939479375167126e-05,
      "loss": 0.8028,
      "step": 363870
    },
    {
      "epoch": 1.275308680916983,
      "grad_norm": 3.171875,
      "learning_rate": 3.193883034650342e-05,
      "loss": 0.8553,
      "step": 363880
    },
    {
      "epoch": 1.2753437284238784,
      "grad_norm": 2.625,
      "learning_rate": 3.193818131783972e-05,
      "loss": 0.8817,
      "step": 363890
    },
    {
      "epoch": 1.2753787759307742,
      "grad_norm": 3.359375,
      "learning_rate": 3.193753228917602e-05,
      "loss": 0.9423,
      "step": 363900
    },
    {
      "epoch": 1.2754138234376697,
      "grad_norm": 3.3125,
      "learning_rate": 3.193688326051232e-05,
      "loss": 0.9467,
      "step": 363910
    },
    {
      "epoch": 1.2754488709445653,
      "grad_norm": 2.875,
      "learning_rate": 3.193623423184862e-05,
      "loss": 0.8478,
      "step": 363920
    },
    {
      "epoch": 1.275483918451461,
      "grad_norm": 2.4375,
      "learning_rate": 3.1935585203184914e-05,
      "loss": 0.8384,
      "step": 363930
    },
    {
      "epoch": 1.2755189659583566,
      "grad_norm": 2.953125,
      "learning_rate": 3.1934936174521215e-05,
      "loss": 0.8775,
      "step": 363940
    },
    {
      "epoch": 1.275554013465252,
      "grad_norm": 2.796875,
      "learning_rate": 3.193428714585751e-05,
      "loss": 0.8269,
      "step": 363950
    },
    {
      "epoch": 1.2755890609721479,
      "grad_norm": 3.03125,
      "learning_rate": 3.193363811719381e-05,
      "loss": 0.8678,
      "step": 363960
    },
    {
      "epoch": 1.2756241084790434,
      "grad_norm": 3.03125,
      "learning_rate": 3.1932989088530106e-05,
      "loss": 0.8829,
      "step": 363970
    },
    {
      "epoch": 1.275659155985939,
      "grad_norm": 2.46875,
      "learning_rate": 3.193234005986641e-05,
      "loss": 0.8244,
      "step": 363980
    },
    {
      "epoch": 1.2756942034928345,
      "grad_norm": 3.078125,
      "learning_rate": 3.19316910312027e-05,
      "loss": 0.8647,
      "step": 363990
    },
    {
      "epoch": 1.27572925099973,
      "grad_norm": 2.921875,
      "learning_rate": 3.1931042002539e-05,
      "loss": 0.8927,
      "step": 364000
    },
    {
      "epoch": 1.2757642985066258,
      "grad_norm": 2.578125,
      "learning_rate": 3.19303929738753e-05,
      "loss": 0.8193,
      "step": 364010
    },
    {
      "epoch": 1.2757993460135213,
      "grad_norm": 3.046875,
      "learning_rate": 3.19297439452116e-05,
      "loss": 0.8144,
      "step": 364020
    },
    {
      "epoch": 1.2758343935204168,
      "grad_norm": 2.921875,
      "learning_rate": 3.19290949165479e-05,
      "loss": 0.8044,
      "step": 364030
    },
    {
      "epoch": 1.2758694410273126,
      "grad_norm": 3.015625,
      "learning_rate": 3.1928445887884195e-05,
      "loss": 0.9782,
      "step": 364040
    },
    {
      "epoch": 1.2759044885342081,
      "grad_norm": 2.890625,
      "learning_rate": 3.1927796859220496e-05,
      "loss": 0.865,
      "step": 364050
    },
    {
      "epoch": 1.2759395360411037,
      "grad_norm": 3.140625,
      "learning_rate": 3.192714783055679e-05,
      "loss": 0.8654,
      "step": 364060
    },
    {
      "epoch": 1.2759745835479994,
      "grad_norm": 3.421875,
      "learning_rate": 3.192649880189309e-05,
      "loss": 0.8866,
      "step": 364070
    },
    {
      "epoch": 1.276009631054895,
      "grad_norm": 3.09375,
      "learning_rate": 3.192584977322939e-05,
      "loss": 0.8057,
      "step": 364080
    },
    {
      "epoch": 1.2760446785617905,
      "grad_norm": 3.1875,
      "learning_rate": 3.192520074456569e-05,
      "loss": 0.907,
      "step": 364090
    },
    {
      "epoch": 1.276079726068686,
      "grad_norm": 3.203125,
      "learning_rate": 3.192455171590198e-05,
      "loss": 0.8329,
      "step": 364100
    },
    {
      "epoch": 1.2761147735755816,
      "grad_norm": 3.0625,
      "learning_rate": 3.1923902687238284e-05,
      "loss": 0.8669,
      "step": 364110
    },
    {
      "epoch": 1.2761498210824773,
      "grad_norm": 2.9375,
      "learning_rate": 3.192325365857458e-05,
      "loss": 0.8145,
      "step": 364120
    },
    {
      "epoch": 1.2761848685893729,
      "grad_norm": 2.421875,
      "learning_rate": 3.192260462991088e-05,
      "loss": 0.8915,
      "step": 364130
    },
    {
      "epoch": 1.2762199160962684,
      "grad_norm": 2.578125,
      "learning_rate": 3.1921955601247175e-05,
      "loss": 0.8307,
      "step": 364140
    },
    {
      "epoch": 1.2762549636031641,
      "grad_norm": 2.875,
      "learning_rate": 3.192130657258347e-05,
      "loss": 0.8971,
      "step": 364150
    },
    {
      "epoch": 1.2762900111100597,
      "grad_norm": 2.921875,
      "learning_rate": 3.192065754391977e-05,
      "loss": 0.9276,
      "step": 364160
    },
    {
      "epoch": 1.2763250586169552,
      "grad_norm": 3.203125,
      "learning_rate": 3.1920008515256066e-05,
      "loss": 0.828,
      "step": 364170
    },
    {
      "epoch": 1.276360106123851,
      "grad_norm": 2.921875,
      "learning_rate": 3.191935948659237e-05,
      "loss": 0.8319,
      "step": 364180
    },
    {
      "epoch": 1.2763951536307465,
      "grad_norm": 2.484375,
      "learning_rate": 3.191871045792866e-05,
      "loss": 0.9013,
      "step": 364190
    },
    {
      "epoch": 1.276430201137642,
      "grad_norm": 3.046875,
      "learning_rate": 3.191806142926496e-05,
      "loss": 0.9224,
      "step": 364200
    },
    {
      "epoch": 1.2764652486445378,
      "grad_norm": 2.953125,
      "learning_rate": 3.191741240060126e-05,
      "loss": 0.8803,
      "step": 364210
    },
    {
      "epoch": 1.2765002961514333,
      "grad_norm": 2.8125,
      "learning_rate": 3.191676337193756e-05,
      "loss": 0.8393,
      "step": 364220
    },
    {
      "epoch": 1.2765353436583289,
      "grad_norm": 2.859375,
      "learning_rate": 3.1916114343273854e-05,
      "loss": 0.8561,
      "step": 364230
    },
    {
      "epoch": 1.2765703911652244,
      "grad_norm": 3.46875,
      "learning_rate": 3.1915465314610155e-05,
      "loss": 0.8679,
      "step": 364240
    },
    {
      "epoch": 1.27660543867212,
      "grad_norm": 2.6875,
      "learning_rate": 3.191481628594645e-05,
      "loss": 0.8882,
      "step": 364250
    },
    {
      "epoch": 1.2766404861790157,
      "grad_norm": 3.390625,
      "learning_rate": 3.191416725728275e-05,
      "loss": 0.8361,
      "step": 364260
    },
    {
      "epoch": 1.2766755336859112,
      "grad_norm": 3.03125,
      "learning_rate": 3.191351822861905e-05,
      "loss": 0.8188,
      "step": 364270
    },
    {
      "epoch": 1.2767105811928068,
      "grad_norm": 2.859375,
      "learning_rate": 3.191286919995535e-05,
      "loss": 0.8731,
      "step": 364280
    },
    {
      "epoch": 1.2767456286997025,
      "grad_norm": 2.765625,
      "learning_rate": 3.191222017129165e-05,
      "loss": 0.8432,
      "step": 364290
    },
    {
      "epoch": 1.276780676206598,
      "grad_norm": 2.90625,
      "learning_rate": 3.191157114262794e-05,
      "loss": 0.9644,
      "step": 364300
    },
    {
      "epoch": 1.2768157237134936,
      "grad_norm": 2.8125,
      "learning_rate": 3.1910922113964244e-05,
      "loss": 0.8761,
      "step": 364310
    },
    {
      "epoch": 1.2768507712203894,
      "grad_norm": 2.8125,
      "learning_rate": 3.191027308530054e-05,
      "loss": 0.902,
      "step": 364320
    },
    {
      "epoch": 1.276885818727285,
      "grad_norm": 2.765625,
      "learning_rate": 3.190962405663684e-05,
      "loss": 0.7429,
      "step": 364330
    },
    {
      "epoch": 1.2769208662341804,
      "grad_norm": 2.84375,
      "learning_rate": 3.1908975027973135e-05,
      "loss": 0.9232,
      "step": 364340
    },
    {
      "epoch": 1.276955913741076,
      "grad_norm": 2.703125,
      "learning_rate": 3.1908325999309436e-05,
      "loss": 0.8182,
      "step": 364350
    },
    {
      "epoch": 1.2769909612479715,
      "grad_norm": 3.046875,
      "learning_rate": 3.190767697064573e-05,
      "loss": 0.8601,
      "step": 364360
    },
    {
      "epoch": 1.2770260087548673,
      "grad_norm": 2.875,
      "learning_rate": 3.190702794198203e-05,
      "loss": 0.7476,
      "step": 364370
    },
    {
      "epoch": 1.2770610562617628,
      "grad_norm": 3.203125,
      "learning_rate": 3.190637891331833e-05,
      "loss": 0.9331,
      "step": 364380
    },
    {
      "epoch": 1.2770961037686583,
      "grad_norm": 3.734375,
      "learning_rate": 3.190572988465463e-05,
      "loss": 0.7591,
      "step": 364390
    },
    {
      "epoch": 1.277131151275554,
      "grad_norm": 2.734375,
      "learning_rate": 3.190508085599093e-05,
      "loss": 0.7882,
      "step": 364400
    },
    {
      "epoch": 1.2771661987824496,
      "grad_norm": 2.921875,
      "learning_rate": 3.1904431827327224e-05,
      "loss": 0.8467,
      "step": 364410
    },
    {
      "epoch": 1.2772012462893452,
      "grad_norm": 2.625,
      "learning_rate": 3.1903782798663526e-05,
      "loss": 0.8154,
      "step": 364420
    },
    {
      "epoch": 1.277236293796241,
      "grad_norm": 2.484375,
      "learning_rate": 3.190313376999982e-05,
      "loss": 0.8547,
      "step": 364430
    },
    {
      "epoch": 1.2772713413031365,
      "grad_norm": 3.234375,
      "learning_rate": 3.190248474133612e-05,
      "loss": 0.8739,
      "step": 364440
    },
    {
      "epoch": 1.277306388810032,
      "grad_norm": 2.765625,
      "learning_rate": 3.1901835712672416e-05,
      "loss": 0.8524,
      "step": 364450
    },
    {
      "epoch": 1.2773414363169275,
      "grad_norm": 3.125,
      "learning_rate": 3.190118668400872e-05,
      "loss": 0.8674,
      "step": 364460
    },
    {
      "epoch": 1.277376483823823,
      "grad_norm": 2.859375,
      "learning_rate": 3.190053765534501e-05,
      "loss": 0.852,
      "step": 364470
    },
    {
      "epoch": 1.2774115313307188,
      "grad_norm": 2.890625,
      "learning_rate": 3.1899888626681314e-05,
      "loss": 0.8413,
      "step": 364480
    },
    {
      "epoch": 1.2774465788376144,
      "grad_norm": 2.859375,
      "learning_rate": 3.189923959801761e-05,
      "loss": 0.8202,
      "step": 364490
    },
    {
      "epoch": 1.27748162634451,
      "grad_norm": 2.796875,
      "learning_rate": 3.189859056935391e-05,
      "loss": 0.8097,
      "step": 364500
    },
    {
      "epoch": 1.2775166738514057,
      "grad_norm": 3.140625,
      "learning_rate": 3.1897941540690204e-05,
      "loss": 0.9525,
      "step": 364510
    },
    {
      "epoch": 1.2775517213583012,
      "grad_norm": 3.015625,
      "learning_rate": 3.1897292512026506e-05,
      "loss": 0.9648,
      "step": 364520
    },
    {
      "epoch": 1.2775867688651967,
      "grad_norm": 2.5625,
      "learning_rate": 3.18966434833628e-05,
      "loss": 0.8497,
      "step": 364530
    },
    {
      "epoch": 1.2776218163720925,
      "grad_norm": 2.734375,
      "learning_rate": 3.1895994454699095e-05,
      "loss": 0.9002,
      "step": 364540
    },
    {
      "epoch": 1.277656863878988,
      "grad_norm": 2.921875,
      "learning_rate": 3.1895345426035396e-05,
      "loss": 0.8572,
      "step": 364550
    },
    {
      "epoch": 1.2776919113858836,
      "grad_norm": 2.921875,
      "learning_rate": 3.189469639737169e-05,
      "loss": 0.8493,
      "step": 364560
    },
    {
      "epoch": 1.277726958892779,
      "grad_norm": 2.796875,
      "learning_rate": 3.189404736870799e-05,
      "loss": 0.9095,
      "step": 364570
    },
    {
      "epoch": 1.2777620063996746,
      "grad_norm": 2.75,
      "learning_rate": 3.189339834004429e-05,
      "loss": 0.8187,
      "step": 364580
    },
    {
      "epoch": 1.2777970539065704,
      "grad_norm": 2.859375,
      "learning_rate": 3.189274931138059e-05,
      "loss": 0.8143,
      "step": 364590
    },
    {
      "epoch": 1.277832101413466,
      "grad_norm": 3.015625,
      "learning_rate": 3.189210028271688e-05,
      "loss": 0.742,
      "step": 364600
    },
    {
      "epoch": 1.2778671489203615,
      "grad_norm": 2.6875,
      "learning_rate": 3.1891451254053184e-05,
      "loss": 0.7465,
      "step": 364610
    },
    {
      "epoch": 1.2779021964272572,
      "grad_norm": 3.203125,
      "learning_rate": 3.189080222538948e-05,
      "loss": 0.8651,
      "step": 364620
    },
    {
      "epoch": 1.2779372439341528,
      "grad_norm": 2.953125,
      "learning_rate": 3.189015319672578e-05,
      "loss": 0.8519,
      "step": 364630
    },
    {
      "epoch": 1.2779722914410483,
      "grad_norm": 3.0,
      "learning_rate": 3.188950416806208e-05,
      "loss": 0.8273,
      "step": 364640
    },
    {
      "epoch": 1.278007338947944,
      "grad_norm": 2.625,
      "learning_rate": 3.1888855139398376e-05,
      "loss": 0.8244,
      "step": 364650
    },
    {
      "epoch": 1.2780423864548396,
      "grad_norm": 3.015625,
      "learning_rate": 3.188820611073468e-05,
      "loss": 0.8705,
      "step": 364660
    },
    {
      "epoch": 1.2780774339617351,
      "grad_norm": 2.375,
      "learning_rate": 3.188755708207097e-05,
      "loss": 0.8249,
      "step": 364670
    },
    {
      "epoch": 1.2781124814686307,
      "grad_norm": 3.015625,
      "learning_rate": 3.1886908053407274e-05,
      "loss": 0.8268,
      "step": 364680
    },
    {
      "epoch": 1.2781475289755262,
      "grad_norm": 3.34375,
      "learning_rate": 3.188625902474357e-05,
      "loss": 0.9073,
      "step": 364690
    },
    {
      "epoch": 1.278182576482422,
      "grad_norm": 2.78125,
      "learning_rate": 3.188560999607987e-05,
      "loss": 0.7717,
      "step": 364700
    },
    {
      "epoch": 1.2782176239893175,
      "grad_norm": 3.0,
      "learning_rate": 3.1884960967416164e-05,
      "loss": 0.8287,
      "step": 364710
    },
    {
      "epoch": 1.278252671496213,
      "grad_norm": 2.609375,
      "learning_rate": 3.1884311938752466e-05,
      "loss": 0.8371,
      "step": 364720
    },
    {
      "epoch": 1.2782877190031088,
      "grad_norm": 2.9375,
      "learning_rate": 3.188366291008876e-05,
      "loss": 0.8378,
      "step": 364730
    },
    {
      "epoch": 1.2783227665100043,
      "grad_norm": 2.9375,
      "learning_rate": 3.188301388142506e-05,
      "loss": 0.8246,
      "step": 364740
    },
    {
      "epoch": 1.2783578140168999,
      "grad_norm": 2.84375,
      "learning_rate": 3.1882364852761356e-05,
      "loss": 0.8085,
      "step": 364750
    },
    {
      "epoch": 1.2783928615237956,
      "grad_norm": 2.796875,
      "learning_rate": 3.188171582409766e-05,
      "loss": 0.8499,
      "step": 364760
    },
    {
      "epoch": 1.2784279090306911,
      "grad_norm": 3.15625,
      "learning_rate": 3.188106679543396e-05,
      "loss": 0.888,
      "step": 364770
    },
    {
      "epoch": 1.2784629565375867,
      "grad_norm": 3.046875,
      "learning_rate": 3.1880417766770254e-05,
      "loss": 0.7735,
      "step": 364780
    },
    {
      "epoch": 1.2784980040444822,
      "grad_norm": 2.859375,
      "learning_rate": 3.1879768738106555e-05,
      "loss": 0.8533,
      "step": 364790
    },
    {
      "epoch": 1.278533051551378,
      "grad_norm": 2.90625,
      "learning_rate": 3.187911970944285e-05,
      "loss": 0.7941,
      "step": 364800
    },
    {
      "epoch": 1.2785680990582735,
      "grad_norm": 2.75,
      "learning_rate": 3.187847068077915e-05,
      "loss": 0.7248,
      "step": 364810
    },
    {
      "epoch": 1.278603146565169,
      "grad_norm": 3.34375,
      "learning_rate": 3.1877821652115446e-05,
      "loss": 0.9405,
      "step": 364820
    },
    {
      "epoch": 1.2786381940720646,
      "grad_norm": 3.203125,
      "learning_rate": 3.187717262345175e-05,
      "loss": 0.889,
      "step": 364830
    },
    {
      "epoch": 1.2786732415789603,
      "grad_norm": 2.625,
      "learning_rate": 3.187652359478804e-05,
      "loss": 0.7975,
      "step": 364840
    },
    {
      "epoch": 1.2787082890858559,
      "grad_norm": 3.046875,
      "learning_rate": 3.187587456612434e-05,
      "loss": 0.8338,
      "step": 364850
    },
    {
      "epoch": 1.2787433365927514,
      "grad_norm": 2.609375,
      "learning_rate": 3.187522553746064e-05,
      "loss": 0.8361,
      "step": 364860
    },
    {
      "epoch": 1.2787783840996472,
      "grad_norm": 2.515625,
      "learning_rate": 3.187457650879694e-05,
      "loss": 0.7857,
      "step": 364870
    },
    {
      "epoch": 1.2788134316065427,
      "grad_norm": 2.9375,
      "learning_rate": 3.1873927480133234e-05,
      "loss": 0.9126,
      "step": 364880
    },
    {
      "epoch": 1.2788484791134382,
      "grad_norm": 3.140625,
      "learning_rate": 3.1873278451469535e-05,
      "loss": 0.8759,
      "step": 364890
    },
    {
      "epoch": 1.278883526620334,
      "grad_norm": 2.734375,
      "learning_rate": 3.1872629422805836e-05,
      "loss": 0.8122,
      "step": 364900
    },
    {
      "epoch": 1.2789185741272295,
      "grad_norm": 2.890625,
      "learning_rate": 3.1871980394142124e-05,
      "loss": 0.7834,
      "step": 364910
    },
    {
      "epoch": 1.278953621634125,
      "grad_norm": 2.96875,
      "learning_rate": 3.1871331365478426e-05,
      "loss": 0.8179,
      "step": 364920
    },
    {
      "epoch": 1.2789886691410206,
      "grad_norm": 2.421875,
      "learning_rate": 3.187068233681472e-05,
      "loss": 0.8745,
      "step": 364930
    },
    {
      "epoch": 1.2790237166479161,
      "grad_norm": 2.796875,
      "learning_rate": 3.187003330815102e-05,
      "loss": 0.8648,
      "step": 364940
    },
    {
      "epoch": 1.279058764154812,
      "grad_norm": 3.484375,
      "learning_rate": 3.1869384279487316e-05,
      "loss": 0.8565,
      "step": 364950
    },
    {
      "epoch": 1.2790938116617074,
      "grad_norm": 3.0625,
      "learning_rate": 3.186873525082362e-05,
      "loss": 0.8773,
      "step": 364960
    },
    {
      "epoch": 1.279128859168603,
      "grad_norm": 3.125,
      "learning_rate": 3.186808622215991e-05,
      "loss": 0.8002,
      "step": 364970
    },
    {
      "epoch": 1.2791639066754987,
      "grad_norm": 3.09375,
      "learning_rate": 3.1867437193496214e-05,
      "loss": 0.8858,
      "step": 364980
    },
    {
      "epoch": 1.2791989541823943,
      "grad_norm": 2.6875,
      "learning_rate": 3.1866788164832515e-05,
      "loss": 0.8986,
      "step": 364990
    },
    {
      "epoch": 1.2792340016892898,
      "grad_norm": 3.0,
      "learning_rate": 3.186613913616881e-05,
      "loss": 0.866,
      "step": 365000
    },
    {
      "epoch": 1.2792340016892898,
      "eval_loss": 0.8021888732910156,
      "eval_runtime": 565.2968,
      "eval_samples_per_second": 672.984,
      "eval_steps_per_second": 56.082,
      "step": 365000
    },
    {
      "epoch": 1.2792690491961856,
      "grad_norm": 3.390625,
      "learning_rate": 3.186549010750511e-05,
      "loss": 0.8017,
      "step": 365010
    },
    {
      "epoch": 1.279304096703081,
      "grad_norm": 2.875,
      "learning_rate": 3.1864841078841406e-05,
      "loss": 0.8523,
      "step": 365020
    },
    {
      "epoch": 1.2793391442099766,
      "grad_norm": 2.78125,
      "learning_rate": 3.186419205017771e-05,
      "loss": 0.7964,
      "step": 365030
    },
    {
      "epoch": 1.2793741917168722,
      "grad_norm": 2.640625,
      "learning_rate": 3.1863543021514e-05,
      "loss": 0.8059,
      "step": 365040
    },
    {
      "epoch": 1.2794092392237677,
      "grad_norm": 3.078125,
      "learning_rate": 3.18628939928503e-05,
      "loss": 0.8706,
      "step": 365050
    },
    {
      "epoch": 1.2794442867306635,
      "grad_norm": 2.5625,
      "learning_rate": 3.18622449641866e-05,
      "loss": 0.8375,
      "step": 365060
    },
    {
      "epoch": 1.279479334237559,
      "grad_norm": 3.203125,
      "learning_rate": 3.18615959355229e-05,
      "loss": 0.8105,
      "step": 365070
    },
    {
      "epoch": 1.2795143817444545,
      "grad_norm": 3.171875,
      "learning_rate": 3.1860946906859194e-05,
      "loss": 0.9176,
      "step": 365080
    },
    {
      "epoch": 1.2795494292513503,
      "grad_norm": 3.234375,
      "learning_rate": 3.1860297878195495e-05,
      "loss": 0.8149,
      "step": 365090
    },
    {
      "epoch": 1.2795844767582458,
      "grad_norm": 2.53125,
      "learning_rate": 3.185964884953179e-05,
      "loss": 0.8337,
      "step": 365100
    },
    {
      "epoch": 1.2796195242651414,
      "grad_norm": 2.78125,
      "learning_rate": 3.185899982086809e-05,
      "loss": 0.8692,
      "step": 365110
    },
    {
      "epoch": 1.2796545717720371,
      "grad_norm": 2.71875,
      "learning_rate": 3.1858350792204386e-05,
      "loss": 0.8954,
      "step": 365120
    },
    {
      "epoch": 1.2796896192789327,
      "grad_norm": 2.875,
      "learning_rate": 3.185770176354069e-05,
      "loss": 0.9204,
      "step": 365130
    },
    {
      "epoch": 1.2797246667858282,
      "grad_norm": 2.734375,
      "learning_rate": 3.185705273487699e-05,
      "loss": 0.8034,
      "step": 365140
    },
    {
      "epoch": 1.2797597142927237,
      "grad_norm": 2.96875,
      "learning_rate": 3.185640370621328e-05,
      "loss": 0.894,
      "step": 365150
    },
    {
      "epoch": 1.2797947617996193,
      "grad_norm": 2.921875,
      "learning_rate": 3.1855754677549584e-05,
      "loss": 0.8429,
      "step": 365160
    },
    {
      "epoch": 1.279829809306515,
      "grad_norm": 2.8125,
      "learning_rate": 3.185510564888588e-05,
      "loss": 0.8398,
      "step": 365170
    },
    {
      "epoch": 1.2798648568134106,
      "grad_norm": 3.09375,
      "learning_rate": 3.185445662022218e-05,
      "loss": 0.8204,
      "step": 365180
    },
    {
      "epoch": 1.279899904320306,
      "grad_norm": 3.578125,
      "learning_rate": 3.1853807591558475e-05,
      "loss": 0.8682,
      "step": 365190
    },
    {
      "epoch": 1.2799349518272018,
      "grad_norm": 4.21875,
      "learning_rate": 3.1853158562894776e-05,
      "loss": 0.8093,
      "step": 365200
    },
    {
      "epoch": 1.2799699993340974,
      "grad_norm": 3.03125,
      "learning_rate": 3.185250953423107e-05,
      "loss": 0.8215,
      "step": 365210
    },
    {
      "epoch": 1.280005046840993,
      "grad_norm": 2.765625,
      "learning_rate": 3.185186050556737e-05,
      "loss": 0.7471,
      "step": 365220
    },
    {
      "epoch": 1.2800400943478887,
      "grad_norm": 2.859375,
      "learning_rate": 3.185121147690367e-05,
      "loss": 0.8624,
      "step": 365230
    },
    {
      "epoch": 1.2800751418547842,
      "grad_norm": 2.890625,
      "learning_rate": 3.185056244823997e-05,
      "loss": 0.9186,
      "step": 365240
    },
    {
      "epoch": 1.2801101893616798,
      "grad_norm": 2.609375,
      "learning_rate": 3.184991341957626e-05,
      "loss": 0.8127,
      "step": 365250
    },
    {
      "epoch": 1.2801452368685753,
      "grad_norm": 3.234375,
      "learning_rate": 3.1849264390912564e-05,
      "loss": 0.8242,
      "step": 365260
    },
    {
      "epoch": 1.2801802843754708,
      "grad_norm": 2.296875,
      "learning_rate": 3.1848615362248866e-05,
      "loss": 0.7881,
      "step": 365270
    },
    {
      "epoch": 1.2802153318823666,
      "grad_norm": 2.578125,
      "learning_rate": 3.1847966333585154e-05,
      "loss": 0.877,
      "step": 365280
    },
    {
      "epoch": 1.2802503793892621,
      "grad_norm": 2.796875,
      "learning_rate": 3.1847317304921455e-05,
      "loss": 0.8816,
      "step": 365290
    },
    {
      "epoch": 1.2802854268961577,
      "grad_norm": 2.6875,
      "learning_rate": 3.184666827625775e-05,
      "loss": 0.8847,
      "step": 365300
    },
    {
      "epoch": 1.2803204744030534,
      "grad_norm": 2.546875,
      "learning_rate": 3.184601924759405e-05,
      "loss": 0.8637,
      "step": 365310
    },
    {
      "epoch": 1.280355521909949,
      "grad_norm": 2.9375,
      "learning_rate": 3.1845370218930346e-05,
      "loss": 0.8914,
      "step": 365320
    },
    {
      "epoch": 1.2803905694168445,
      "grad_norm": 2.625,
      "learning_rate": 3.184472119026665e-05,
      "loss": 0.8163,
      "step": 365330
    },
    {
      "epoch": 1.2804256169237402,
      "grad_norm": 3.375,
      "learning_rate": 3.184407216160294e-05,
      "loss": 0.8246,
      "step": 365340
    },
    {
      "epoch": 1.2804606644306358,
      "grad_norm": 3.3125,
      "learning_rate": 3.184342313293924e-05,
      "loss": 0.7841,
      "step": 365350
    },
    {
      "epoch": 1.2804957119375313,
      "grad_norm": 2.59375,
      "learning_rate": 3.1842774104275544e-05,
      "loss": 0.8161,
      "step": 365360
    },
    {
      "epoch": 1.2805307594444268,
      "grad_norm": 2.625,
      "learning_rate": 3.184212507561184e-05,
      "loss": 0.8921,
      "step": 365370
    },
    {
      "epoch": 1.2805658069513224,
      "grad_norm": 2.65625,
      "learning_rate": 3.184147604694814e-05,
      "loss": 0.7933,
      "step": 365380
    },
    {
      "epoch": 1.2806008544582181,
      "grad_norm": 3.171875,
      "learning_rate": 3.1840827018284435e-05,
      "loss": 0.7814,
      "step": 365390
    },
    {
      "epoch": 1.2806359019651137,
      "grad_norm": 2.75,
      "learning_rate": 3.1840177989620736e-05,
      "loss": 0.8288,
      "step": 365400
    },
    {
      "epoch": 1.2806709494720092,
      "grad_norm": 2.78125,
      "learning_rate": 3.183952896095703e-05,
      "loss": 0.8464,
      "step": 365410
    },
    {
      "epoch": 1.280705996978905,
      "grad_norm": 2.703125,
      "learning_rate": 3.183887993229333e-05,
      "loss": 0.8852,
      "step": 365420
    },
    {
      "epoch": 1.2807410444858005,
      "grad_norm": 2.34375,
      "learning_rate": 3.183823090362963e-05,
      "loss": 0.8339,
      "step": 365430
    },
    {
      "epoch": 1.280776091992696,
      "grad_norm": 3.0625,
      "learning_rate": 3.183758187496593e-05,
      "loss": 0.8801,
      "step": 365440
    },
    {
      "epoch": 1.2808111394995918,
      "grad_norm": 3.40625,
      "learning_rate": 3.183693284630222e-05,
      "loss": 0.7975,
      "step": 365450
    },
    {
      "epoch": 1.2808461870064873,
      "grad_norm": 2.484375,
      "learning_rate": 3.1836283817638524e-05,
      "loss": 0.8053,
      "step": 365460
    },
    {
      "epoch": 1.2808812345133829,
      "grad_norm": 3.0625,
      "learning_rate": 3.183563478897482e-05,
      "loss": 0.8693,
      "step": 365470
    },
    {
      "epoch": 1.2809162820202784,
      "grad_norm": 2.6875,
      "learning_rate": 3.183498576031112e-05,
      "loss": 0.8615,
      "step": 365480
    },
    {
      "epoch": 1.2809513295271742,
      "grad_norm": 2.953125,
      "learning_rate": 3.1834336731647415e-05,
      "loss": 0.845,
      "step": 365490
    },
    {
      "epoch": 1.2809863770340697,
      "grad_norm": 2.515625,
      "learning_rate": 3.1833687702983716e-05,
      "loss": 0.8491,
      "step": 365500
    },
    {
      "epoch": 1.2810214245409652,
      "grad_norm": 2.625,
      "learning_rate": 3.183303867432002e-05,
      "loss": 0.8781,
      "step": 365510
    },
    {
      "epoch": 1.2810564720478608,
      "grad_norm": 3.09375,
      "learning_rate": 3.183238964565631e-05,
      "loss": 0.7854,
      "step": 365520
    },
    {
      "epoch": 1.2810915195547565,
      "grad_norm": 2.6875,
      "learning_rate": 3.1831740616992614e-05,
      "loss": 0.7855,
      "step": 365530
    },
    {
      "epoch": 1.281126567061652,
      "grad_norm": 3.171875,
      "learning_rate": 3.183109158832891e-05,
      "loss": 0.9029,
      "step": 365540
    },
    {
      "epoch": 1.2811616145685476,
      "grad_norm": 3.5625,
      "learning_rate": 3.183044255966521e-05,
      "loss": 0.8429,
      "step": 365550
    },
    {
      "epoch": 1.2811966620754434,
      "grad_norm": 3.390625,
      "learning_rate": 3.1829793531001504e-05,
      "loss": 0.8649,
      "step": 365560
    },
    {
      "epoch": 1.281231709582339,
      "grad_norm": 2.671875,
      "learning_rate": 3.1829144502337806e-05,
      "loss": 0.8813,
      "step": 365570
    },
    {
      "epoch": 1.2812667570892344,
      "grad_norm": 3.046875,
      "learning_rate": 3.18284954736741e-05,
      "loss": 0.8115,
      "step": 365580
    },
    {
      "epoch": 1.2813018045961302,
      "grad_norm": 3.1875,
      "learning_rate": 3.18278464450104e-05,
      "loss": 0.7823,
      "step": 365590
    },
    {
      "epoch": 1.2813368521030257,
      "grad_norm": 2.65625,
      "learning_rate": 3.1827197416346696e-05,
      "loss": 0.833,
      "step": 365600
    },
    {
      "epoch": 1.2813718996099213,
      "grad_norm": 3.140625,
      "learning_rate": 3.1826548387683e-05,
      "loss": 0.8456,
      "step": 365610
    },
    {
      "epoch": 1.2814069471168168,
      "grad_norm": 2.828125,
      "learning_rate": 3.182589935901929e-05,
      "loss": 0.8467,
      "step": 365620
    },
    {
      "epoch": 1.2814419946237123,
      "grad_norm": 3.078125,
      "learning_rate": 3.1825250330355594e-05,
      "loss": 0.971,
      "step": 365630
    },
    {
      "epoch": 1.281477042130608,
      "grad_norm": 3.109375,
      "learning_rate": 3.1824601301691895e-05,
      "loss": 0.9346,
      "step": 365640
    },
    {
      "epoch": 1.2815120896375036,
      "grad_norm": 3.296875,
      "learning_rate": 3.182395227302819e-05,
      "loss": 0.9042,
      "step": 365650
    },
    {
      "epoch": 1.2815471371443992,
      "grad_norm": 2.625,
      "learning_rate": 3.1823303244364484e-05,
      "loss": 0.7412,
      "step": 365660
    },
    {
      "epoch": 1.281582184651295,
      "grad_norm": 2.609375,
      "learning_rate": 3.182265421570078e-05,
      "loss": 0.8104,
      "step": 365670
    },
    {
      "epoch": 1.2816172321581905,
      "grad_norm": 2.734375,
      "learning_rate": 3.182200518703708e-05,
      "loss": 0.8302,
      "step": 365680
    },
    {
      "epoch": 1.281652279665086,
      "grad_norm": 2.75,
      "learning_rate": 3.1821356158373375e-05,
      "loss": 0.8347,
      "step": 365690
    },
    {
      "epoch": 1.2816873271719817,
      "grad_norm": 3.25,
      "learning_rate": 3.1820707129709676e-05,
      "loss": 0.8191,
      "step": 365700
    },
    {
      "epoch": 1.2817223746788773,
      "grad_norm": 2.859375,
      "learning_rate": 3.182005810104597e-05,
      "loss": 0.8672,
      "step": 365710
    },
    {
      "epoch": 1.2817574221857728,
      "grad_norm": 3.265625,
      "learning_rate": 3.181940907238227e-05,
      "loss": 0.9294,
      "step": 365720
    },
    {
      "epoch": 1.2817924696926684,
      "grad_norm": 2.828125,
      "learning_rate": 3.1818760043718574e-05,
      "loss": 0.8407,
      "step": 365730
    },
    {
      "epoch": 1.281827517199564,
      "grad_norm": 3.140625,
      "learning_rate": 3.181811101505487e-05,
      "loss": 0.8231,
      "step": 365740
    },
    {
      "epoch": 1.2818625647064597,
      "grad_norm": 2.984375,
      "learning_rate": 3.181746198639117e-05,
      "loss": 0.8295,
      "step": 365750
    },
    {
      "epoch": 1.2818976122133552,
      "grad_norm": 3.25,
      "learning_rate": 3.1816812957727464e-05,
      "loss": 0.7643,
      "step": 365760
    },
    {
      "epoch": 1.2819326597202507,
      "grad_norm": 2.734375,
      "learning_rate": 3.1816163929063766e-05,
      "loss": 0.8969,
      "step": 365770
    },
    {
      "epoch": 1.2819677072271465,
      "grad_norm": 3.546875,
      "learning_rate": 3.181551490040006e-05,
      "loss": 0.8401,
      "step": 365780
    },
    {
      "epoch": 1.282002754734042,
      "grad_norm": 3.015625,
      "learning_rate": 3.181486587173636e-05,
      "loss": 0.8761,
      "step": 365790
    },
    {
      "epoch": 1.2820378022409376,
      "grad_norm": 2.8125,
      "learning_rate": 3.1814216843072656e-05,
      "loss": 0.7796,
      "step": 365800
    },
    {
      "epoch": 1.2820728497478333,
      "grad_norm": 2.90625,
      "learning_rate": 3.181356781440896e-05,
      "loss": 0.8027,
      "step": 365810
    },
    {
      "epoch": 1.2821078972547288,
      "grad_norm": 3.359375,
      "learning_rate": 3.181291878574525e-05,
      "loss": 0.9007,
      "step": 365820
    },
    {
      "epoch": 1.2821429447616244,
      "grad_norm": 3.09375,
      "learning_rate": 3.1812269757081554e-05,
      "loss": 0.8935,
      "step": 365830
    },
    {
      "epoch": 1.28217799226852,
      "grad_norm": 3.03125,
      "learning_rate": 3.181162072841785e-05,
      "loss": 0.9357,
      "step": 365840
    },
    {
      "epoch": 1.2822130397754155,
      "grad_norm": 2.796875,
      "learning_rate": 3.181097169975415e-05,
      "loss": 0.8681,
      "step": 365850
    },
    {
      "epoch": 1.2822480872823112,
      "grad_norm": 2.875,
      "learning_rate": 3.181032267109045e-05,
      "loss": 0.8608,
      "step": 365860
    },
    {
      "epoch": 1.2822831347892067,
      "grad_norm": 2.921875,
      "learning_rate": 3.1809673642426746e-05,
      "loss": 0.8729,
      "step": 365870
    },
    {
      "epoch": 1.2823181822961023,
      "grad_norm": 2.78125,
      "learning_rate": 3.180902461376305e-05,
      "loss": 0.9012,
      "step": 365880
    },
    {
      "epoch": 1.282353229802998,
      "grad_norm": 3.34375,
      "learning_rate": 3.180837558509934e-05,
      "loss": 0.8346,
      "step": 365890
    },
    {
      "epoch": 1.2823882773098936,
      "grad_norm": 2.3125,
      "learning_rate": 3.180772655643564e-05,
      "loss": 0.7807,
      "step": 365900
    },
    {
      "epoch": 1.2824233248167891,
      "grad_norm": 2.625,
      "learning_rate": 3.180707752777194e-05,
      "loss": 0.8279,
      "step": 365910
    },
    {
      "epoch": 1.2824583723236849,
      "grad_norm": 3.0,
      "learning_rate": 3.180642849910824e-05,
      "loss": 0.8074,
      "step": 365920
    },
    {
      "epoch": 1.2824934198305804,
      "grad_norm": 3.046875,
      "learning_rate": 3.1805779470444534e-05,
      "loss": 0.8594,
      "step": 365930
    },
    {
      "epoch": 1.282528467337476,
      "grad_norm": 3.296875,
      "learning_rate": 3.1805130441780835e-05,
      "loss": 0.8895,
      "step": 365940
    },
    {
      "epoch": 1.2825635148443715,
      "grad_norm": 2.578125,
      "learning_rate": 3.180448141311713e-05,
      "loss": 0.8652,
      "step": 365950
    },
    {
      "epoch": 1.282598562351267,
      "grad_norm": 2.453125,
      "learning_rate": 3.180383238445343e-05,
      "loss": 0.8803,
      "step": 365960
    },
    {
      "epoch": 1.2826336098581628,
      "grad_norm": 3.109375,
      "learning_rate": 3.1803183355789726e-05,
      "loss": 0.8799,
      "step": 365970
    },
    {
      "epoch": 1.2826686573650583,
      "grad_norm": 3.140625,
      "learning_rate": 3.180253432712603e-05,
      "loss": 0.8727,
      "step": 365980
    },
    {
      "epoch": 1.2827037048719538,
      "grad_norm": 2.578125,
      "learning_rate": 3.180188529846232e-05,
      "loss": 0.8591,
      "step": 365990
    },
    {
      "epoch": 1.2827387523788496,
      "grad_norm": 2.796875,
      "learning_rate": 3.180123626979862e-05,
      "loss": 0.8705,
      "step": 366000
    },
    {
      "epoch": 1.2827737998857451,
      "grad_norm": 3.0625,
      "learning_rate": 3.1800587241134925e-05,
      "loss": 0.9243,
      "step": 366010
    },
    {
      "epoch": 1.2828088473926407,
      "grad_norm": 3.015625,
      "learning_rate": 3.179993821247122e-05,
      "loss": 0.8754,
      "step": 366020
    },
    {
      "epoch": 1.2828438948995364,
      "grad_norm": 2.890625,
      "learning_rate": 3.1799289183807514e-05,
      "loss": 0.9544,
      "step": 366030
    },
    {
      "epoch": 1.282878942406432,
      "grad_norm": 3.578125,
      "learning_rate": 3.179864015514381e-05,
      "loss": 0.8744,
      "step": 366040
    },
    {
      "epoch": 1.2829139899133275,
      "grad_norm": 2.828125,
      "learning_rate": 3.179799112648011e-05,
      "loss": 0.8677,
      "step": 366050
    },
    {
      "epoch": 1.282949037420223,
      "grad_norm": 2.890625,
      "learning_rate": 3.1797342097816404e-05,
      "loss": 0.7838,
      "step": 366060
    },
    {
      "epoch": 1.2829840849271186,
      "grad_norm": 3.09375,
      "learning_rate": 3.1796693069152706e-05,
      "loss": 0.9088,
      "step": 366070
    },
    {
      "epoch": 1.2830191324340143,
      "grad_norm": 2.53125,
      "learning_rate": 3.1796044040489e-05,
      "loss": 0.8812,
      "step": 366080
    },
    {
      "epoch": 1.2830541799409099,
      "grad_norm": 2.28125,
      "learning_rate": 3.17953950118253e-05,
      "loss": 0.7879,
      "step": 366090
    },
    {
      "epoch": 1.2830892274478054,
      "grad_norm": 2.5625,
      "learning_rate": 3.17947459831616e-05,
      "loss": 0.8322,
      "step": 366100
    },
    {
      "epoch": 1.2831242749547012,
      "grad_norm": 2.75,
      "learning_rate": 3.17940969544979e-05,
      "loss": 0.8675,
      "step": 366110
    },
    {
      "epoch": 1.2831593224615967,
      "grad_norm": 2.90625,
      "learning_rate": 3.17934479258342e-05,
      "loss": 0.7982,
      "step": 366120
    },
    {
      "epoch": 1.2831943699684922,
      "grad_norm": 2.75,
      "learning_rate": 3.1792798897170494e-05,
      "loss": 0.7966,
      "step": 366130
    },
    {
      "epoch": 1.283229417475388,
      "grad_norm": 2.640625,
      "learning_rate": 3.1792149868506795e-05,
      "loss": 0.8922,
      "step": 366140
    },
    {
      "epoch": 1.2832644649822835,
      "grad_norm": 2.8125,
      "learning_rate": 3.179150083984309e-05,
      "loss": 0.8888,
      "step": 366150
    },
    {
      "epoch": 1.283299512489179,
      "grad_norm": 3.046875,
      "learning_rate": 3.179085181117939e-05,
      "loss": 0.9233,
      "step": 366160
    },
    {
      "epoch": 1.2833345599960746,
      "grad_norm": 3.0,
      "learning_rate": 3.1790202782515686e-05,
      "loss": 0.8398,
      "step": 366170
    },
    {
      "epoch": 1.2833696075029704,
      "grad_norm": 2.375,
      "learning_rate": 3.178955375385199e-05,
      "loss": 0.8298,
      "step": 366180
    },
    {
      "epoch": 1.283404655009866,
      "grad_norm": 2.859375,
      "learning_rate": 3.178890472518828e-05,
      "loss": 0.8812,
      "step": 366190
    },
    {
      "epoch": 1.2834397025167614,
      "grad_norm": 2.421875,
      "learning_rate": 3.178825569652458e-05,
      "loss": 0.8917,
      "step": 366200
    },
    {
      "epoch": 1.283474750023657,
      "grad_norm": 3.046875,
      "learning_rate": 3.178760666786088e-05,
      "loss": 0.8827,
      "step": 366210
    },
    {
      "epoch": 1.2835097975305527,
      "grad_norm": 3.15625,
      "learning_rate": 3.178695763919718e-05,
      "loss": 0.8283,
      "step": 366220
    },
    {
      "epoch": 1.2835448450374483,
      "grad_norm": 2.75,
      "learning_rate": 3.178630861053348e-05,
      "loss": 0.8876,
      "step": 366230
    },
    {
      "epoch": 1.2835798925443438,
      "grad_norm": 3.328125,
      "learning_rate": 3.1785659581869775e-05,
      "loss": 0.8165,
      "step": 366240
    },
    {
      "epoch": 1.2836149400512396,
      "grad_norm": 3.421875,
      "learning_rate": 3.1785010553206077e-05,
      "loss": 0.8508,
      "step": 366250
    },
    {
      "epoch": 1.283649987558135,
      "grad_norm": 2.765625,
      "learning_rate": 3.178436152454237e-05,
      "loss": 0.8517,
      "step": 366260
    },
    {
      "epoch": 1.2836850350650306,
      "grad_norm": 2.96875,
      "learning_rate": 3.178371249587867e-05,
      "loss": 0.9079,
      "step": 366270
    },
    {
      "epoch": 1.2837200825719264,
      "grad_norm": 2.984375,
      "learning_rate": 3.178306346721497e-05,
      "loss": 0.9111,
      "step": 366280
    },
    {
      "epoch": 1.283755130078822,
      "grad_norm": 2.671875,
      "learning_rate": 3.178241443855127e-05,
      "loss": 0.8146,
      "step": 366290
    },
    {
      "epoch": 1.2837901775857175,
      "grad_norm": 2.59375,
      "learning_rate": 3.178176540988756e-05,
      "loss": 0.8741,
      "step": 366300
    },
    {
      "epoch": 1.283825225092613,
      "grad_norm": 2.890625,
      "learning_rate": 3.1781116381223865e-05,
      "loss": 0.8954,
      "step": 366310
    },
    {
      "epoch": 1.2838602725995085,
      "grad_norm": 2.8125,
      "learning_rate": 3.178046735256016e-05,
      "loss": 0.8563,
      "step": 366320
    },
    {
      "epoch": 1.2838953201064043,
      "grad_norm": 2.515625,
      "learning_rate": 3.177981832389646e-05,
      "loss": 0.8312,
      "step": 366330
    },
    {
      "epoch": 1.2839303676132998,
      "grad_norm": 3.109375,
      "learning_rate": 3.1779169295232755e-05,
      "loss": 0.8729,
      "step": 366340
    },
    {
      "epoch": 1.2839654151201954,
      "grad_norm": 2.71875,
      "learning_rate": 3.1778520266569057e-05,
      "loss": 0.8731,
      "step": 366350
    },
    {
      "epoch": 1.284000462627091,
      "grad_norm": 3.34375,
      "learning_rate": 3.177787123790535e-05,
      "loss": 0.8274,
      "step": 366360
    },
    {
      "epoch": 1.2840355101339866,
      "grad_norm": 2.65625,
      "learning_rate": 3.177722220924165e-05,
      "loss": 0.8425,
      "step": 366370
    },
    {
      "epoch": 1.2840705576408822,
      "grad_norm": 3.125,
      "learning_rate": 3.1776573180577954e-05,
      "loss": 0.8869,
      "step": 366380
    },
    {
      "epoch": 1.284105605147778,
      "grad_norm": 2.75,
      "learning_rate": 3.177592415191425e-05,
      "loss": 0.8482,
      "step": 366390
    },
    {
      "epoch": 1.2841406526546735,
      "grad_norm": 2.921875,
      "learning_rate": 3.177527512325055e-05,
      "loss": 0.836,
      "step": 366400
    },
    {
      "epoch": 1.284175700161569,
      "grad_norm": 3.265625,
      "learning_rate": 3.177462609458684e-05,
      "loss": 0.8774,
      "step": 366410
    },
    {
      "epoch": 1.2842107476684645,
      "grad_norm": 2.734375,
      "learning_rate": 3.177397706592314e-05,
      "loss": 0.8198,
      "step": 366420
    },
    {
      "epoch": 1.28424579517536,
      "grad_norm": 2.953125,
      "learning_rate": 3.1773328037259434e-05,
      "loss": 0.8183,
      "step": 366430
    },
    {
      "epoch": 1.2842808426822558,
      "grad_norm": 3.0,
      "learning_rate": 3.1772679008595735e-05,
      "loss": 0.9007,
      "step": 366440
    },
    {
      "epoch": 1.2843158901891514,
      "grad_norm": 2.609375,
      "learning_rate": 3.177202997993203e-05,
      "loss": 0.8688,
      "step": 366450
    },
    {
      "epoch": 1.284350937696047,
      "grad_norm": 2.9375,
      "learning_rate": 3.177138095126833e-05,
      "loss": 0.8053,
      "step": 366460
    },
    {
      "epoch": 1.2843859852029427,
      "grad_norm": 2.6875,
      "learning_rate": 3.177073192260463e-05,
      "loss": 0.8935,
      "step": 366470
    },
    {
      "epoch": 1.2844210327098382,
      "grad_norm": 2.3125,
      "learning_rate": 3.177008289394093e-05,
      "loss": 0.7579,
      "step": 366480
    },
    {
      "epoch": 1.2844560802167337,
      "grad_norm": 2.953125,
      "learning_rate": 3.176943386527723e-05,
      "loss": 0.8418,
      "step": 366490
    },
    {
      "epoch": 1.2844911277236295,
      "grad_norm": 2.875,
      "learning_rate": 3.176878483661352e-05,
      "loss": 0.8636,
      "step": 366500
    },
    {
      "epoch": 1.284526175230525,
      "grad_norm": 3.234375,
      "learning_rate": 3.1768135807949825e-05,
      "loss": 0.8318,
      "step": 366510
    },
    {
      "epoch": 1.2845612227374206,
      "grad_norm": 2.953125,
      "learning_rate": 3.176748677928612e-05,
      "loss": 0.7782,
      "step": 366520
    },
    {
      "epoch": 1.284596270244316,
      "grad_norm": 3.421875,
      "learning_rate": 3.176683775062242e-05,
      "loss": 0.9012,
      "step": 366530
    },
    {
      "epoch": 1.2846313177512116,
      "grad_norm": 2.953125,
      "learning_rate": 3.1766188721958715e-05,
      "loss": 0.893,
      "step": 366540
    },
    {
      "epoch": 1.2846663652581074,
      "grad_norm": 2.609375,
      "learning_rate": 3.1765539693295017e-05,
      "loss": 0.8494,
      "step": 366550
    },
    {
      "epoch": 1.284701412765003,
      "grad_norm": 2.5,
      "learning_rate": 3.176489066463131e-05,
      "loss": 0.8099,
      "step": 366560
    },
    {
      "epoch": 1.2847364602718985,
      "grad_norm": 2.890625,
      "learning_rate": 3.176424163596761e-05,
      "loss": 0.8599,
      "step": 366570
    },
    {
      "epoch": 1.2847715077787942,
      "grad_norm": 2.640625,
      "learning_rate": 3.176359260730391e-05,
      "loss": 0.7571,
      "step": 366580
    },
    {
      "epoch": 1.2848065552856898,
      "grad_norm": 3.15625,
      "learning_rate": 3.176294357864021e-05,
      "loss": 0.8155,
      "step": 366590
    },
    {
      "epoch": 1.2848416027925853,
      "grad_norm": 2.953125,
      "learning_rate": 3.176229454997651e-05,
      "loss": 0.7955,
      "step": 366600
    },
    {
      "epoch": 1.284876650299481,
      "grad_norm": 3.0,
      "learning_rate": 3.1761645521312805e-05,
      "loss": 0.9337,
      "step": 366610
    },
    {
      "epoch": 1.2849116978063766,
      "grad_norm": 3.171875,
      "learning_rate": 3.1760996492649106e-05,
      "loss": 0.84,
      "step": 366620
    },
    {
      "epoch": 1.2849467453132721,
      "grad_norm": 3.09375,
      "learning_rate": 3.17603474639854e-05,
      "loss": 0.8873,
      "step": 366630
    },
    {
      "epoch": 1.2849817928201677,
      "grad_norm": 3.265625,
      "learning_rate": 3.17596984353217e-05,
      "loss": 0.8368,
      "step": 366640
    },
    {
      "epoch": 1.2850168403270632,
      "grad_norm": 2.859375,
      "learning_rate": 3.1759049406657997e-05,
      "loss": 0.9609,
      "step": 366650
    },
    {
      "epoch": 1.285051887833959,
      "grad_norm": 2.609375,
      "learning_rate": 3.17584003779943e-05,
      "loss": 0.9085,
      "step": 366660
    },
    {
      "epoch": 1.2850869353408545,
      "grad_norm": 2.859375,
      "learning_rate": 3.175775134933059e-05,
      "loss": 0.9185,
      "step": 366670
    },
    {
      "epoch": 1.28512198284775,
      "grad_norm": 2.828125,
      "learning_rate": 3.1757102320666894e-05,
      "loss": 0.7829,
      "step": 366680
    },
    {
      "epoch": 1.2851570303546458,
      "grad_norm": 2.421875,
      "learning_rate": 3.175645329200319e-05,
      "loss": 0.8062,
      "step": 366690
    },
    {
      "epoch": 1.2851920778615413,
      "grad_norm": 2.703125,
      "learning_rate": 3.175580426333949e-05,
      "loss": 0.8805,
      "step": 366700
    },
    {
      "epoch": 1.2852271253684369,
      "grad_norm": 3.171875,
      "learning_rate": 3.1755155234675785e-05,
      "loss": 0.8713,
      "step": 366710
    },
    {
      "epoch": 1.2852621728753326,
      "grad_norm": 2.734375,
      "learning_rate": 3.1754506206012086e-05,
      "loss": 0.8435,
      "step": 366720
    },
    {
      "epoch": 1.2852972203822282,
      "grad_norm": 2.625,
      "learning_rate": 3.175385717734838e-05,
      "loss": 0.8694,
      "step": 366730
    },
    {
      "epoch": 1.2853322678891237,
      "grad_norm": 2.765625,
      "learning_rate": 3.175320814868468e-05,
      "loss": 0.8167,
      "step": 366740
    },
    {
      "epoch": 1.2853673153960192,
      "grad_norm": 3.09375,
      "learning_rate": 3.175255912002098e-05,
      "loss": 0.8489,
      "step": 366750
    },
    {
      "epoch": 1.2854023629029148,
      "grad_norm": 3.3125,
      "learning_rate": 3.175191009135728e-05,
      "loss": 0.8641,
      "step": 366760
    },
    {
      "epoch": 1.2854374104098105,
      "grad_norm": 3.0,
      "learning_rate": 3.175126106269358e-05,
      "loss": 0.8281,
      "step": 366770
    },
    {
      "epoch": 1.285472457916706,
      "grad_norm": 3.109375,
      "learning_rate": 3.1750612034029874e-05,
      "loss": 0.8909,
      "step": 366780
    },
    {
      "epoch": 1.2855075054236016,
      "grad_norm": 2.9375,
      "learning_rate": 3.174996300536617e-05,
      "loss": 0.81,
      "step": 366790
    },
    {
      "epoch": 1.2855425529304974,
      "grad_norm": 3.265625,
      "learning_rate": 3.174931397670246e-05,
      "loss": 0.8116,
      "step": 366800
    },
    {
      "epoch": 1.2855776004373929,
      "grad_norm": 2.546875,
      "learning_rate": 3.1748664948038765e-05,
      "loss": 0.823,
      "step": 366810
    },
    {
      "epoch": 1.2856126479442884,
      "grad_norm": 2.75,
      "learning_rate": 3.1748015919375066e-05,
      "loss": 0.8358,
      "step": 366820
    },
    {
      "epoch": 1.2856476954511842,
      "grad_norm": 3.109375,
      "learning_rate": 3.174736689071136e-05,
      "loss": 0.8794,
      "step": 366830
    },
    {
      "epoch": 1.2856827429580797,
      "grad_norm": 2.8125,
      "learning_rate": 3.174671786204766e-05,
      "loss": 0.895,
      "step": 366840
    },
    {
      "epoch": 1.2857177904649753,
      "grad_norm": 2.71875,
      "learning_rate": 3.1746068833383957e-05,
      "loss": 0.8913,
      "step": 366850
    },
    {
      "epoch": 1.2857528379718708,
      "grad_norm": 2.421875,
      "learning_rate": 3.174541980472026e-05,
      "loss": 0.892,
      "step": 366860
    },
    {
      "epoch": 1.2857878854787665,
      "grad_norm": 2.625,
      "learning_rate": 3.174477077605655e-05,
      "loss": 0.8806,
      "step": 366870
    },
    {
      "epoch": 1.285822932985662,
      "grad_norm": 3.0625,
      "learning_rate": 3.1744121747392854e-05,
      "loss": 0.8597,
      "step": 366880
    },
    {
      "epoch": 1.2858579804925576,
      "grad_norm": 2.953125,
      "learning_rate": 3.174347271872915e-05,
      "loss": 0.8265,
      "step": 366890
    },
    {
      "epoch": 1.2858930279994532,
      "grad_norm": 3.015625,
      "learning_rate": 3.174282369006545e-05,
      "loss": 0.9584,
      "step": 366900
    },
    {
      "epoch": 1.285928075506349,
      "grad_norm": 2.984375,
      "learning_rate": 3.1742174661401745e-05,
      "loss": 0.8888,
      "step": 366910
    },
    {
      "epoch": 1.2859631230132444,
      "grad_norm": 2.75,
      "learning_rate": 3.1741525632738046e-05,
      "loss": 0.9305,
      "step": 366920
    },
    {
      "epoch": 1.28599817052014,
      "grad_norm": 2.9375,
      "learning_rate": 3.174087660407434e-05,
      "loss": 0.849,
      "step": 366930
    },
    {
      "epoch": 1.2860332180270357,
      "grad_norm": 2.671875,
      "learning_rate": 3.174022757541064e-05,
      "loss": 0.9279,
      "step": 366940
    },
    {
      "epoch": 1.2860682655339313,
      "grad_norm": 3.0,
      "learning_rate": 3.1739578546746937e-05,
      "loss": 0.8392,
      "step": 366950
    },
    {
      "epoch": 1.2861033130408268,
      "grad_norm": 3.15625,
      "learning_rate": 3.173892951808324e-05,
      "loss": 0.8711,
      "step": 366960
    },
    {
      "epoch": 1.2861383605477226,
      "grad_norm": 2.875,
      "learning_rate": 3.173828048941954e-05,
      "loss": 0.7991,
      "step": 366970
    },
    {
      "epoch": 1.286173408054618,
      "grad_norm": 3.40625,
      "learning_rate": 3.1737631460755834e-05,
      "loss": 0.8321,
      "step": 366980
    },
    {
      "epoch": 1.2862084555615136,
      "grad_norm": 3.46875,
      "learning_rate": 3.1736982432092135e-05,
      "loss": 0.894,
      "step": 366990
    },
    {
      "epoch": 1.2862435030684092,
      "grad_norm": 2.6875,
      "learning_rate": 3.173633340342843e-05,
      "loss": 0.7516,
      "step": 367000
    },
    {
      "epoch": 1.2862785505753047,
      "grad_norm": 2.75,
      "learning_rate": 3.173568437476473e-05,
      "loss": 0.8104,
      "step": 367010
    },
    {
      "epoch": 1.2863135980822005,
      "grad_norm": 3.421875,
      "learning_rate": 3.1735035346101026e-05,
      "loss": 0.9222,
      "step": 367020
    },
    {
      "epoch": 1.286348645589096,
      "grad_norm": 2.96875,
      "learning_rate": 3.173438631743733e-05,
      "loss": 0.8807,
      "step": 367030
    },
    {
      "epoch": 1.2863836930959915,
      "grad_norm": 2.65625,
      "learning_rate": 3.173373728877362e-05,
      "loss": 0.7707,
      "step": 367040
    },
    {
      "epoch": 1.2864187406028873,
      "grad_norm": 3.453125,
      "learning_rate": 3.173308826010992e-05,
      "loss": 0.8696,
      "step": 367050
    },
    {
      "epoch": 1.2864537881097828,
      "grad_norm": 2.84375,
      "learning_rate": 3.173243923144622e-05,
      "loss": 0.8578,
      "step": 367060
    },
    {
      "epoch": 1.2864888356166784,
      "grad_norm": 2.875,
      "learning_rate": 3.173179020278252e-05,
      "loss": 0.8481,
      "step": 367070
    },
    {
      "epoch": 1.2865238831235741,
      "grad_norm": 3.046875,
      "learning_rate": 3.1731141174118814e-05,
      "loss": 0.9722,
      "step": 367080
    },
    {
      "epoch": 1.2865589306304697,
      "grad_norm": 2.8125,
      "learning_rate": 3.1730492145455115e-05,
      "loss": 0.8388,
      "step": 367090
    },
    {
      "epoch": 1.2865939781373652,
      "grad_norm": 2.765625,
      "learning_rate": 3.172984311679142e-05,
      "loss": 0.8605,
      "step": 367100
    },
    {
      "epoch": 1.2866290256442607,
      "grad_norm": 2.96875,
      "learning_rate": 3.172919408812771e-05,
      "loss": 0.8908,
      "step": 367110
    },
    {
      "epoch": 1.2866640731511563,
      "grad_norm": 3.046875,
      "learning_rate": 3.172854505946401e-05,
      "loss": 0.8762,
      "step": 367120
    },
    {
      "epoch": 1.286699120658052,
      "grad_norm": 3.09375,
      "learning_rate": 3.172789603080031e-05,
      "loss": 0.8203,
      "step": 367130
    },
    {
      "epoch": 1.2867341681649476,
      "grad_norm": 2.90625,
      "learning_rate": 3.172724700213661e-05,
      "loss": 0.8863,
      "step": 367140
    },
    {
      "epoch": 1.286769215671843,
      "grad_norm": 2.734375,
      "learning_rate": 3.17265979734729e-05,
      "loss": 0.8245,
      "step": 367150
    },
    {
      "epoch": 1.2868042631787389,
      "grad_norm": 2.703125,
      "learning_rate": 3.17259489448092e-05,
      "loss": 0.8517,
      "step": 367160
    },
    {
      "epoch": 1.2868393106856344,
      "grad_norm": 3.15625,
      "learning_rate": 3.172529991614549e-05,
      "loss": 0.8035,
      "step": 367170
    },
    {
      "epoch": 1.28687435819253,
      "grad_norm": 2.765625,
      "learning_rate": 3.1724650887481794e-05,
      "loss": 0.8881,
      "step": 367180
    },
    {
      "epoch": 1.2869094056994257,
      "grad_norm": 2.875,
      "learning_rate": 3.1724001858818095e-05,
      "loss": 0.7787,
      "step": 367190
    },
    {
      "epoch": 1.2869444532063212,
      "grad_norm": 3.234375,
      "learning_rate": 3.172335283015439e-05,
      "loss": 0.9342,
      "step": 367200
    },
    {
      "epoch": 1.2869795007132168,
      "grad_norm": 3.046875,
      "learning_rate": 3.172270380149069e-05,
      "loss": 0.8593,
      "step": 367210
    },
    {
      "epoch": 1.2870145482201123,
      "grad_norm": 2.859375,
      "learning_rate": 3.1722054772826986e-05,
      "loss": 0.9144,
      "step": 367220
    },
    {
      "epoch": 1.2870495957270078,
      "grad_norm": 3.046875,
      "learning_rate": 3.172140574416329e-05,
      "loss": 0.8483,
      "step": 367230
    },
    {
      "epoch": 1.2870846432339036,
      "grad_norm": 2.75,
      "learning_rate": 3.172075671549958e-05,
      "loss": 0.8562,
      "step": 367240
    },
    {
      "epoch": 1.2871196907407991,
      "grad_norm": 2.515625,
      "learning_rate": 3.172010768683588e-05,
      "loss": 0.9249,
      "step": 367250
    },
    {
      "epoch": 1.2871547382476947,
      "grad_norm": 2.703125,
      "learning_rate": 3.171945865817218e-05,
      "loss": 0.8231,
      "step": 367260
    },
    {
      "epoch": 1.2871897857545904,
      "grad_norm": 3.109375,
      "learning_rate": 3.171880962950848e-05,
      "loss": 0.8148,
      "step": 367270
    },
    {
      "epoch": 1.287224833261486,
      "grad_norm": 2.625,
      "learning_rate": 3.1718160600844774e-05,
      "loss": 0.8574,
      "step": 367280
    },
    {
      "epoch": 1.2872598807683815,
      "grad_norm": 2.71875,
      "learning_rate": 3.1717511572181075e-05,
      "loss": 0.866,
      "step": 367290
    },
    {
      "epoch": 1.2872949282752773,
      "grad_norm": 2.921875,
      "learning_rate": 3.171686254351737e-05,
      "loss": 0.9499,
      "step": 367300
    },
    {
      "epoch": 1.2873299757821728,
      "grad_norm": 3.203125,
      "learning_rate": 3.171621351485367e-05,
      "loss": 0.7415,
      "step": 367310
    },
    {
      "epoch": 1.2873650232890683,
      "grad_norm": 2.703125,
      "learning_rate": 3.1715564486189966e-05,
      "loss": 0.8078,
      "step": 367320
    },
    {
      "epoch": 1.2874000707959639,
      "grad_norm": 3.0,
      "learning_rate": 3.171491545752627e-05,
      "loss": 0.868,
      "step": 367330
    },
    {
      "epoch": 1.2874351183028594,
      "grad_norm": 2.703125,
      "learning_rate": 3.171426642886257e-05,
      "loss": 0.8705,
      "step": 367340
    },
    {
      "epoch": 1.2874701658097552,
      "grad_norm": 2.59375,
      "learning_rate": 3.171361740019886e-05,
      "loss": 0.8912,
      "step": 367350
    },
    {
      "epoch": 1.2875052133166507,
      "grad_norm": 3.15625,
      "learning_rate": 3.1712968371535165e-05,
      "loss": 0.8148,
      "step": 367360
    },
    {
      "epoch": 1.2875402608235462,
      "grad_norm": 2.75,
      "learning_rate": 3.171231934287146e-05,
      "loss": 0.8519,
      "step": 367370
    },
    {
      "epoch": 1.287575308330442,
      "grad_norm": 3.171875,
      "learning_rate": 3.171167031420776e-05,
      "loss": 0.8689,
      "step": 367380
    },
    {
      "epoch": 1.2876103558373375,
      "grad_norm": 2.859375,
      "learning_rate": 3.1711021285544055e-05,
      "loss": 0.9248,
      "step": 367390
    },
    {
      "epoch": 1.287645403344233,
      "grad_norm": 2.953125,
      "learning_rate": 3.171037225688036e-05,
      "loss": 0.8836,
      "step": 367400
    },
    {
      "epoch": 1.2876804508511288,
      "grad_norm": 3.125,
      "learning_rate": 3.170972322821665e-05,
      "loss": 0.8457,
      "step": 367410
    },
    {
      "epoch": 1.2877154983580243,
      "grad_norm": 2.671875,
      "learning_rate": 3.170907419955295e-05,
      "loss": 0.8561,
      "step": 367420
    },
    {
      "epoch": 1.2877505458649199,
      "grad_norm": 2.703125,
      "learning_rate": 3.170842517088925e-05,
      "loss": 0.8829,
      "step": 367430
    },
    {
      "epoch": 1.2877855933718154,
      "grad_norm": 3.015625,
      "learning_rate": 3.170777614222555e-05,
      "loss": 0.8853,
      "step": 367440
    },
    {
      "epoch": 1.2878206408787112,
      "grad_norm": 3.03125,
      "learning_rate": 3.170712711356184e-05,
      "loss": 0.928,
      "step": 367450
    },
    {
      "epoch": 1.2878556883856067,
      "grad_norm": 2.90625,
      "learning_rate": 3.1706478084898145e-05,
      "loss": 0.8006,
      "step": 367460
    },
    {
      "epoch": 1.2878907358925022,
      "grad_norm": 2.921875,
      "learning_rate": 3.1705829056234446e-05,
      "loss": 0.9362,
      "step": 367470
    },
    {
      "epoch": 1.2879257833993978,
      "grad_norm": 2.78125,
      "learning_rate": 3.170518002757074e-05,
      "loss": 0.8698,
      "step": 367480
    },
    {
      "epoch": 1.2879608309062935,
      "grad_norm": 2.609375,
      "learning_rate": 3.170453099890704e-05,
      "loss": 0.7889,
      "step": 367490
    },
    {
      "epoch": 1.287995878413189,
      "grad_norm": 3.28125,
      "learning_rate": 3.170388197024334e-05,
      "loss": 0.9293,
      "step": 367500
    },
    {
      "epoch": 1.2880309259200846,
      "grad_norm": 2.9375,
      "learning_rate": 3.170323294157964e-05,
      "loss": 0.9201,
      "step": 367510
    },
    {
      "epoch": 1.2880659734269804,
      "grad_norm": 2.84375,
      "learning_rate": 3.170258391291593e-05,
      "loss": 0.9136,
      "step": 367520
    },
    {
      "epoch": 1.288101020933876,
      "grad_norm": 2.765625,
      "learning_rate": 3.1701934884252234e-05,
      "loss": 0.8502,
      "step": 367530
    },
    {
      "epoch": 1.2881360684407714,
      "grad_norm": 2.84375,
      "learning_rate": 3.170128585558852e-05,
      "loss": 0.8169,
      "step": 367540
    },
    {
      "epoch": 1.2881711159476672,
      "grad_norm": 2.765625,
      "learning_rate": 3.170063682692482e-05,
      "loss": 0.8833,
      "step": 367550
    },
    {
      "epoch": 1.2882061634545627,
      "grad_norm": 2.75,
      "learning_rate": 3.1699987798261125e-05,
      "loss": 0.9028,
      "step": 367560
    },
    {
      "epoch": 1.2882412109614583,
      "grad_norm": 2.8125,
      "learning_rate": 3.169933876959742e-05,
      "loss": 0.8717,
      "step": 367570
    },
    {
      "epoch": 1.2882762584683538,
      "grad_norm": 2.640625,
      "learning_rate": 3.169868974093372e-05,
      "loss": 0.7822,
      "step": 367580
    },
    {
      "epoch": 1.2883113059752493,
      "grad_norm": 3.28125,
      "learning_rate": 3.1698040712270015e-05,
      "loss": 0.8214,
      "step": 367590
    },
    {
      "epoch": 1.288346353482145,
      "grad_norm": 2.765625,
      "learning_rate": 3.169739168360632e-05,
      "loss": 0.879,
      "step": 367600
    },
    {
      "epoch": 1.2883814009890406,
      "grad_norm": 2.828125,
      "learning_rate": 3.169674265494261e-05,
      "loss": 0.8064,
      "step": 367610
    },
    {
      "epoch": 1.2884164484959362,
      "grad_norm": 2.84375,
      "learning_rate": 3.169609362627891e-05,
      "loss": 0.9297,
      "step": 367620
    },
    {
      "epoch": 1.288451496002832,
      "grad_norm": 3.15625,
      "learning_rate": 3.169544459761521e-05,
      "loss": 0.9347,
      "step": 367630
    },
    {
      "epoch": 1.2884865435097275,
      "grad_norm": 3.28125,
      "learning_rate": 3.169479556895151e-05,
      "loss": 0.9082,
      "step": 367640
    },
    {
      "epoch": 1.288521591016623,
      "grad_norm": 2.578125,
      "learning_rate": 3.16941465402878e-05,
      "loss": 0.8118,
      "step": 367650
    },
    {
      "epoch": 1.2885566385235188,
      "grad_norm": 2.96875,
      "learning_rate": 3.1693497511624105e-05,
      "loss": 0.8498,
      "step": 367660
    },
    {
      "epoch": 1.2885916860304143,
      "grad_norm": 2.5625,
      "learning_rate": 3.16928484829604e-05,
      "loss": 0.8405,
      "step": 367670
    },
    {
      "epoch": 1.2886267335373098,
      "grad_norm": 2.734375,
      "learning_rate": 3.16921994542967e-05,
      "loss": 0.9229,
      "step": 367680
    },
    {
      "epoch": 1.2886617810442054,
      "grad_norm": 3.1875,
      "learning_rate": 3.1691550425632995e-05,
      "loss": 0.8972,
      "step": 367690
    },
    {
      "epoch": 1.288696828551101,
      "grad_norm": 2.59375,
      "learning_rate": 3.16909013969693e-05,
      "loss": 0.8988,
      "step": 367700
    },
    {
      "epoch": 1.2887318760579967,
      "grad_norm": 3.0625,
      "learning_rate": 3.16902523683056e-05,
      "loss": 0.9851,
      "step": 367710
    },
    {
      "epoch": 1.2887669235648922,
      "grad_norm": 2.859375,
      "learning_rate": 3.168960333964189e-05,
      "loss": 0.835,
      "step": 367720
    },
    {
      "epoch": 1.2888019710717877,
      "grad_norm": 3.328125,
      "learning_rate": 3.1688954310978194e-05,
      "loss": 0.9172,
      "step": 367730
    },
    {
      "epoch": 1.2888370185786835,
      "grad_norm": 3.109375,
      "learning_rate": 3.168830528231449e-05,
      "loss": 0.9442,
      "step": 367740
    },
    {
      "epoch": 1.288872066085579,
      "grad_norm": 3.0625,
      "learning_rate": 3.168765625365079e-05,
      "loss": 0.7631,
      "step": 367750
    },
    {
      "epoch": 1.2889071135924746,
      "grad_norm": 2.90625,
      "learning_rate": 3.1687007224987085e-05,
      "loss": 0.8648,
      "step": 367760
    },
    {
      "epoch": 1.2889421610993703,
      "grad_norm": 3.203125,
      "learning_rate": 3.1686358196323386e-05,
      "loss": 0.8659,
      "step": 367770
    },
    {
      "epoch": 1.2889772086062659,
      "grad_norm": 2.875,
      "learning_rate": 3.168570916765968e-05,
      "loss": 0.7904,
      "step": 367780
    },
    {
      "epoch": 1.2890122561131614,
      "grad_norm": 2.921875,
      "learning_rate": 3.168506013899598e-05,
      "loss": 0.8459,
      "step": 367790
    },
    {
      "epoch": 1.289047303620057,
      "grad_norm": 3.125,
      "learning_rate": 3.168441111033228e-05,
      "loss": 0.8256,
      "step": 367800
    },
    {
      "epoch": 1.2890823511269525,
      "grad_norm": 2.765625,
      "learning_rate": 3.168376208166858e-05,
      "loss": 0.8446,
      "step": 367810
    },
    {
      "epoch": 1.2891173986338482,
      "grad_norm": 3.03125,
      "learning_rate": 3.168311305300487e-05,
      "loss": 0.8001,
      "step": 367820
    },
    {
      "epoch": 1.2891524461407438,
      "grad_norm": 3.046875,
      "learning_rate": 3.1682464024341174e-05,
      "loss": 0.8955,
      "step": 367830
    },
    {
      "epoch": 1.2891874936476393,
      "grad_norm": 2.890625,
      "learning_rate": 3.1681814995677476e-05,
      "loss": 0.8513,
      "step": 367840
    },
    {
      "epoch": 1.289222541154535,
      "grad_norm": 3.0625,
      "learning_rate": 3.168116596701377e-05,
      "loss": 0.8843,
      "step": 367850
    },
    {
      "epoch": 1.2892575886614306,
      "grad_norm": 3.078125,
      "learning_rate": 3.168051693835007e-05,
      "loss": 0.8106,
      "step": 367860
    },
    {
      "epoch": 1.2892926361683261,
      "grad_norm": 2.890625,
      "learning_rate": 3.1679867909686366e-05,
      "loss": 0.9155,
      "step": 367870
    },
    {
      "epoch": 1.2893276836752219,
      "grad_norm": 3.03125,
      "learning_rate": 3.167921888102267e-05,
      "loss": 0.8483,
      "step": 367880
    },
    {
      "epoch": 1.2893627311821174,
      "grad_norm": 3.03125,
      "learning_rate": 3.167856985235896e-05,
      "loss": 0.8728,
      "step": 367890
    },
    {
      "epoch": 1.289397778689013,
      "grad_norm": 2.828125,
      "learning_rate": 3.1677920823695264e-05,
      "loss": 0.8137,
      "step": 367900
    },
    {
      "epoch": 1.2894328261959085,
      "grad_norm": 2.953125,
      "learning_rate": 3.167727179503156e-05,
      "loss": 0.8466,
      "step": 367910
    },
    {
      "epoch": 1.289467873702804,
      "grad_norm": 2.515625,
      "learning_rate": 3.167662276636785e-05,
      "loss": 0.861,
      "step": 367920
    },
    {
      "epoch": 1.2895029212096998,
      "grad_norm": 2.59375,
      "learning_rate": 3.1675973737704154e-05,
      "loss": 0.8719,
      "step": 367930
    },
    {
      "epoch": 1.2895379687165953,
      "grad_norm": 3.15625,
      "learning_rate": 3.167532470904045e-05,
      "loss": 0.7533,
      "step": 367940
    },
    {
      "epoch": 1.2895730162234909,
      "grad_norm": 2.703125,
      "learning_rate": 3.167467568037675e-05,
      "loss": 0.7381,
      "step": 367950
    },
    {
      "epoch": 1.2896080637303866,
      "grad_norm": 3.203125,
      "learning_rate": 3.1674026651713045e-05,
      "loss": 0.8288,
      "step": 367960
    },
    {
      "epoch": 1.2896431112372821,
      "grad_norm": 3.078125,
      "learning_rate": 3.1673377623049346e-05,
      "loss": 0.8649,
      "step": 367970
    },
    {
      "epoch": 1.2896781587441777,
      "grad_norm": 2.9375,
      "learning_rate": 3.167272859438564e-05,
      "loss": 0.8149,
      "step": 367980
    },
    {
      "epoch": 1.2897132062510734,
      "grad_norm": 3.1875,
      "learning_rate": 3.167207956572194e-05,
      "loss": 0.9285,
      "step": 367990
    },
    {
      "epoch": 1.289748253757969,
      "grad_norm": 2.875,
      "learning_rate": 3.167143053705824e-05,
      "loss": 0.8625,
      "step": 368000
    },
    {
      "epoch": 1.2897833012648645,
      "grad_norm": 3.484375,
      "learning_rate": 3.167078150839454e-05,
      "loss": 0.843,
      "step": 368010
    },
    {
      "epoch": 1.28981834877176,
      "grad_norm": 3.0,
      "learning_rate": 3.167013247973083e-05,
      "loss": 0.8623,
      "step": 368020
    },
    {
      "epoch": 1.2898533962786556,
      "grad_norm": 2.515625,
      "learning_rate": 3.1669483451067134e-05,
      "loss": 0.7912,
      "step": 368030
    },
    {
      "epoch": 1.2898884437855513,
      "grad_norm": 2.671875,
      "learning_rate": 3.166883442240343e-05,
      "loss": 0.8655,
      "step": 368040
    },
    {
      "epoch": 1.2899234912924469,
      "grad_norm": 2.875,
      "learning_rate": 3.166818539373973e-05,
      "loss": 0.8864,
      "step": 368050
    },
    {
      "epoch": 1.2899585387993424,
      "grad_norm": 2.71875,
      "learning_rate": 3.166753636507603e-05,
      "loss": 0.8801,
      "step": 368060
    },
    {
      "epoch": 1.2899935863062382,
      "grad_norm": 2.609375,
      "learning_rate": 3.1666887336412326e-05,
      "loss": 0.8445,
      "step": 368070
    },
    {
      "epoch": 1.2900286338131337,
      "grad_norm": 2.828125,
      "learning_rate": 3.166623830774863e-05,
      "loss": 0.8578,
      "step": 368080
    },
    {
      "epoch": 1.2900636813200292,
      "grad_norm": 2.796875,
      "learning_rate": 3.166558927908492e-05,
      "loss": 0.843,
      "step": 368090
    },
    {
      "epoch": 1.290098728826925,
      "grad_norm": 2.609375,
      "learning_rate": 3.1664940250421224e-05,
      "loss": 0.8093,
      "step": 368100
    },
    {
      "epoch": 1.2901337763338205,
      "grad_norm": 2.828125,
      "learning_rate": 3.166429122175752e-05,
      "loss": 0.8257,
      "step": 368110
    },
    {
      "epoch": 1.290168823840716,
      "grad_norm": 3.234375,
      "learning_rate": 3.166364219309382e-05,
      "loss": 0.8474,
      "step": 368120
    },
    {
      "epoch": 1.2902038713476116,
      "grad_norm": 2.875,
      "learning_rate": 3.1662993164430114e-05,
      "loss": 0.8749,
      "step": 368130
    },
    {
      "epoch": 1.2902389188545074,
      "grad_norm": 3.15625,
      "learning_rate": 3.1662344135766416e-05,
      "loss": 0.8804,
      "step": 368140
    },
    {
      "epoch": 1.290273966361403,
      "grad_norm": 2.90625,
      "learning_rate": 3.166169510710271e-05,
      "loss": 0.8197,
      "step": 368150
    },
    {
      "epoch": 1.2903090138682984,
      "grad_norm": 3.125,
      "learning_rate": 3.166104607843901e-05,
      "loss": 0.9015,
      "step": 368160
    },
    {
      "epoch": 1.290344061375194,
      "grad_norm": 2.875,
      "learning_rate": 3.1660397049775306e-05,
      "loss": 0.8168,
      "step": 368170
    },
    {
      "epoch": 1.2903791088820897,
      "grad_norm": 2.84375,
      "learning_rate": 3.165974802111161e-05,
      "loss": 0.9086,
      "step": 368180
    },
    {
      "epoch": 1.2904141563889853,
      "grad_norm": 2.8125,
      "learning_rate": 3.16590989924479e-05,
      "loss": 0.8542,
      "step": 368190
    },
    {
      "epoch": 1.2904492038958808,
      "grad_norm": 2.890625,
      "learning_rate": 3.1658449963784204e-05,
      "loss": 0.8331,
      "step": 368200
    },
    {
      "epoch": 1.2904842514027766,
      "grad_norm": 2.953125,
      "learning_rate": 3.1657800935120505e-05,
      "loss": 0.9121,
      "step": 368210
    },
    {
      "epoch": 1.290519298909672,
      "grad_norm": 3.140625,
      "learning_rate": 3.16571519064568e-05,
      "loss": 0.8347,
      "step": 368220
    },
    {
      "epoch": 1.2905543464165676,
      "grad_norm": 2.828125,
      "learning_rate": 3.16565028777931e-05,
      "loss": 0.7735,
      "step": 368230
    },
    {
      "epoch": 1.2905893939234634,
      "grad_norm": 2.953125,
      "learning_rate": 3.1655853849129396e-05,
      "loss": 0.8288,
      "step": 368240
    },
    {
      "epoch": 1.290624441430359,
      "grad_norm": 2.953125,
      "learning_rate": 3.16552048204657e-05,
      "loss": 0.8547,
      "step": 368250
    },
    {
      "epoch": 1.2906594889372545,
      "grad_norm": 3.0625,
      "learning_rate": 3.165455579180199e-05,
      "loss": 0.863,
      "step": 368260
    },
    {
      "epoch": 1.29069453644415,
      "grad_norm": 2.9375,
      "learning_rate": 3.165390676313829e-05,
      "loss": 0.8709,
      "step": 368270
    },
    {
      "epoch": 1.2907295839510455,
      "grad_norm": 2.71875,
      "learning_rate": 3.165325773447459e-05,
      "loss": 0.7927,
      "step": 368280
    },
    {
      "epoch": 1.2907646314579413,
      "grad_norm": 2.640625,
      "learning_rate": 3.165260870581088e-05,
      "loss": 0.8862,
      "step": 368290
    },
    {
      "epoch": 1.2907996789648368,
      "grad_norm": 2.515625,
      "learning_rate": 3.1651959677147184e-05,
      "loss": 0.7943,
      "step": 368300
    },
    {
      "epoch": 1.2908347264717324,
      "grad_norm": 3.15625,
      "learning_rate": 3.165131064848348e-05,
      "loss": 0.8183,
      "step": 368310
    },
    {
      "epoch": 1.2908697739786281,
      "grad_norm": 3.5625,
      "learning_rate": 3.165066161981978e-05,
      "loss": 0.8899,
      "step": 368320
    },
    {
      "epoch": 1.2909048214855237,
      "grad_norm": 2.609375,
      "learning_rate": 3.1650012591156074e-05,
      "loss": 0.8569,
      "step": 368330
    },
    {
      "epoch": 1.2909398689924192,
      "grad_norm": 3.0625,
      "learning_rate": 3.1649363562492376e-05,
      "loss": 0.8343,
      "step": 368340
    },
    {
      "epoch": 1.290974916499315,
      "grad_norm": 2.859375,
      "learning_rate": 3.164871453382867e-05,
      "loss": 0.8266,
      "step": 368350
    },
    {
      "epoch": 1.2910099640062105,
      "grad_norm": 3.484375,
      "learning_rate": 3.164806550516497e-05,
      "loss": 0.82,
      "step": 368360
    },
    {
      "epoch": 1.291045011513106,
      "grad_norm": 2.984375,
      "learning_rate": 3.1647416476501266e-05,
      "loss": 0.9172,
      "step": 368370
    },
    {
      "epoch": 1.2910800590200016,
      "grad_norm": 2.578125,
      "learning_rate": 3.164676744783757e-05,
      "loss": 0.7906,
      "step": 368380
    },
    {
      "epoch": 1.291115106526897,
      "grad_norm": 3.15625,
      "learning_rate": 3.164611841917386e-05,
      "loss": 0.8758,
      "step": 368390
    },
    {
      "epoch": 1.2911501540337929,
      "grad_norm": 2.671875,
      "learning_rate": 3.1645469390510164e-05,
      "loss": 0.7812,
      "step": 368400
    },
    {
      "epoch": 1.2911852015406884,
      "grad_norm": 2.890625,
      "learning_rate": 3.164482036184646e-05,
      "loss": 0.8057,
      "step": 368410
    },
    {
      "epoch": 1.291220249047584,
      "grad_norm": 2.921875,
      "learning_rate": 3.164417133318276e-05,
      "loss": 0.8575,
      "step": 368420
    },
    {
      "epoch": 1.2912552965544797,
      "grad_norm": 2.71875,
      "learning_rate": 3.164352230451906e-05,
      "loss": 0.7928,
      "step": 368430
    },
    {
      "epoch": 1.2912903440613752,
      "grad_norm": 2.796875,
      "learning_rate": 3.1642873275855356e-05,
      "loss": 0.8925,
      "step": 368440
    },
    {
      "epoch": 1.2913253915682708,
      "grad_norm": 3.015625,
      "learning_rate": 3.164222424719166e-05,
      "loss": 0.858,
      "step": 368450
    },
    {
      "epoch": 1.2913604390751665,
      "grad_norm": 2.90625,
      "learning_rate": 3.164157521852795e-05,
      "loss": 0.9495,
      "step": 368460
    },
    {
      "epoch": 1.291395486582062,
      "grad_norm": 2.84375,
      "learning_rate": 3.164092618986425e-05,
      "loss": 0.7973,
      "step": 368470
    },
    {
      "epoch": 1.2914305340889576,
      "grad_norm": 2.921875,
      "learning_rate": 3.164027716120055e-05,
      "loss": 0.8885,
      "step": 368480
    },
    {
      "epoch": 1.2914655815958531,
      "grad_norm": 2.671875,
      "learning_rate": 3.163962813253685e-05,
      "loss": 0.8294,
      "step": 368490
    },
    {
      "epoch": 1.2915006291027487,
      "grad_norm": 2.84375,
      "learning_rate": 3.1638979103873144e-05,
      "loss": 0.837,
      "step": 368500
    },
    {
      "epoch": 1.2915356766096444,
      "grad_norm": 2.75,
      "learning_rate": 3.1638330075209445e-05,
      "loss": 0.7982,
      "step": 368510
    },
    {
      "epoch": 1.29157072411654,
      "grad_norm": 3.25,
      "learning_rate": 3.163768104654574e-05,
      "loss": 0.8736,
      "step": 368520
    },
    {
      "epoch": 1.2916057716234355,
      "grad_norm": 2.671875,
      "learning_rate": 3.163703201788204e-05,
      "loss": 0.7906,
      "step": 368530
    },
    {
      "epoch": 1.2916408191303312,
      "grad_norm": 3.40625,
      "learning_rate": 3.1636382989218336e-05,
      "loss": 0.8174,
      "step": 368540
    },
    {
      "epoch": 1.2916758666372268,
      "grad_norm": 2.625,
      "learning_rate": 3.163573396055464e-05,
      "loss": 0.927,
      "step": 368550
    },
    {
      "epoch": 1.2917109141441223,
      "grad_norm": 3.296875,
      "learning_rate": 3.163508493189093e-05,
      "loss": 0.9282,
      "step": 368560
    },
    {
      "epoch": 1.291745961651018,
      "grad_norm": 3.15625,
      "learning_rate": 3.163443590322723e-05,
      "loss": 0.8634,
      "step": 368570
    },
    {
      "epoch": 1.2917810091579136,
      "grad_norm": 2.625,
      "learning_rate": 3.1633786874563534e-05,
      "loss": 0.9198,
      "step": 368580
    },
    {
      "epoch": 1.2918160566648091,
      "grad_norm": 2.9375,
      "learning_rate": 3.163313784589983e-05,
      "loss": 0.7755,
      "step": 368590
    },
    {
      "epoch": 1.2918511041717047,
      "grad_norm": 2.71875,
      "learning_rate": 3.163248881723613e-05,
      "loss": 0.9309,
      "step": 368600
    },
    {
      "epoch": 1.2918861516786002,
      "grad_norm": 3.015625,
      "learning_rate": 3.1631839788572425e-05,
      "loss": 0.8612,
      "step": 368610
    },
    {
      "epoch": 1.291921199185496,
      "grad_norm": 3.109375,
      "learning_rate": 3.1631190759908726e-05,
      "loss": 0.8347,
      "step": 368620
    },
    {
      "epoch": 1.2919562466923915,
      "grad_norm": 3.0625,
      "learning_rate": 3.163054173124502e-05,
      "loss": 0.895,
      "step": 368630
    },
    {
      "epoch": 1.291991294199287,
      "grad_norm": 2.953125,
      "learning_rate": 3.162989270258132e-05,
      "loss": 0.8308,
      "step": 368640
    },
    {
      "epoch": 1.2920263417061828,
      "grad_norm": 2.515625,
      "learning_rate": 3.162924367391762e-05,
      "loss": 0.8886,
      "step": 368650
    },
    {
      "epoch": 1.2920613892130783,
      "grad_norm": 2.796875,
      "learning_rate": 3.162859464525392e-05,
      "loss": 0.784,
      "step": 368660
    },
    {
      "epoch": 1.2920964367199739,
      "grad_norm": 2.609375,
      "learning_rate": 3.162794561659021e-05,
      "loss": 0.8305,
      "step": 368670
    },
    {
      "epoch": 1.2921314842268696,
      "grad_norm": 2.890625,
      "learning_rate": 3.162729658792651e-05,
      "loss": 0.7841,
      "step": 368680
    },
    {
      "epoch": 1.2921665317337652,
      "grad_norm": 2.921875,
      "learning_rate": 3.162664755926281e-05,
      "loss": 0.8414,
      "step": 368690
    },
    {
      "epoch": 1.2922015792406607,
      "grad_norm": 2.796875,
      "learning_rate": 3.1625998530599104e-05,
      "loss": 0.891,
      "step": 368700
    },
    {
      "epoch": 1.2922366267475562,
      "grad_norm": 2.9375,
      "learning_rate": 3.1625349501935405e-05,
      "loss": 0.9562,
      "step": 368710
    },
    {
      "epoch": 1.2922716742544518,
      "grad_norm": 3.109375,
      "learning_rate": 3.16247004732717e-05,
      "loss": 0.9265,
      "step": 368720
    },
    {
      "epoch": 1.2923067217613475,
      "grad_norm": 3.171875,
      "learning_rate": 3.1624051444608e-05,
      "loss": 0.8714,
      "step": 368730
    },
    {
      "epoch": 1.292341769268243,
      "grad_norm": 2.4375,
      "learning_rate": 3.1623402415944295e-05,
      "loss": 0.8593,
      "step": 368740
    },
    {
      "epoch": 1.2923768167751386,
      "grad_norm": 3.0625,
      "learning_rate": 3.16227533872806e-05,
      "loss": 0.8851,
      "step": 368750
    },
    {
      "epoch": 1.2924118642820344,
      "grad_norm": 3.21875,
      "learning_rate": 3.162210435861689e-05,
      "loss": 0.9047,
      "step": 368760
    },
    {
      "epoch": 1.29244691178893,
      "grad_norm": 2.875,
      "learning_rate": 3.162145532995319e-05,
      "loss": 0.8325,
      "step": 368770
    },
    {
      "epoch": 1.2924819592958254,
      "grad_norm": 3.15625,
      "learning_rate": 3.162080630128949e-05,
      "loss": 0.8265,
      "step": 368780
    },
    {
      "epoch": 1.2925170068027212,
      "grad_norm": 3.078125,
      "learning_rate": 3.162015727262579e-05,
      "loss": 0.8056,
      "step": 368790
    },
    {
      "epoch": 1.2925520543096167,
      "grad_norm": 3.203125,
      "learning_rate": 3.161950824396209e-05,
      "loss": 0.8187,
      "step": 368800
    },
    {
      "epoch": 1.2925871018165123,
      "grad_norm": 3.03125,
      "learning_rate": 3.1618859215298385e-05,
      "loss": 0.8657,
      "step": 368810
    },
    {
      "epoch": 1.2926221493234078,
      "grad_norm": 2.609375,
      "learning_rate": 3.1618210186634686e-05,
      "loss": 0.9175,
      "step": 368820
    },
    {
      "epoch": 1.2926571968303036,
      "grad_norm": 3.25,
      "learning_rate": 3.161756115797098e-05,
      "loss": 0.8455,
      "step": 368830
    },
    {
      "epoch": 1.292692244337199,
      "grad_norm": 3.40625,
      "learning_rate": 3.161691212930728e-05,
      "loss": 0.9375,
      "step": 368840
    },
    {
      "epoch": 1.2927272918440946,
      "grad_norm": 2.71875,
      "learning_rate": 3.161626310064358e-05,
      "loss": 0.8676,
      "step": 368850
    },
    {
      "epoch": 1.2927623393509902,
      "grad_norm": 2.96875,
      "learning_rate": 3.161561407197988e-05,
      "loss": 0.8167,
      "step": 368860
    },
    {
      "epoch": 1.292797386857886,
      "grad_norm": 2.765625,
      "learning_rate": 3.161496504331617e-05,
      "loss": 0.8505,
      "step": 368870
    },
    {
      "epoch": 1.2928324343647815,
      "grad_norm": 2.984375,
      "learning_rate": 3.1614316014652474e-05,
      "loss": 0.8908,
      "step": 368880
    },
    {
      "epoch": 1.292867481871677,
      "grad_norm": 2.453125,
      "learning_rate": 3.161366698598877e-05,
      "loss": 0.8647,
      "step": 368890
    },
    {
      "epoch": 1.2929025293785728,
      "grad_norm": 2.796875,
      "learning_rate": 3.161301795732507e-05,
      "loss": 0.8665,
      "step": 368900
    },
    {
      "epoch": 1.2929375768854683,
      "grad_norm": 3.109375,
      "learning_rate": 3.1612368928661365e-05,
      "loss": 0.8615,
      "step": 368910
    },
    {
      "epoch": 1.2929726243923638,
      "grad_norm": 2.40625,
      "learning_rate": 3.1611719899997666e-05,
      "loss": 0.9086,
      "step": 368920
    },
    {
      "epoch": 1.2930076718992596,
      "grad_norm": 3.0,
      "learning_rate": 3.161107087133397e-05,
      "loss": 0.8805,
      "step": 368930
    },
    {
      "epoch": 1.2930427194061551,
      "grad_norm": 3.015625,
      "learning_rate": 3.161042184267026e-05,
      "loss": 0.7768,
      "step": 368940
    },
    {
      "epoch": 1.2930777669130507,
      "grad_norm": 2.671875,
      "learning_rate": 3.1609772814006564e-05,
      "loss": 0.7832,
      "step": 368950
    },
    {
      "epoch": 1.2931128144199462,
      "grad_norm": 2.859375,
      "learning_rate": 3.160912378534286e-05,
      "loss": 0.8844,
      "step": 368960
    },
    {
      "epoch": 1.2931478619268417,
      "grad_norm": 2.84375,
      "learning_rate": 3.160847475667916e-05,
      "loss": 0.7846,
      "step": 368970
    },
    {
      "epoch": 1.2931829094337375,
      "grad_norm": 2.421875,
      "learning_rate": 3.1607825728015454e-05,
      "loss": 0.8219,
      "step": 368980
    },
    {
      "epoch": 1.293217956940633,
      "grad_norm": 3.375,
      "learning_rate": 3.1607176699351756e-05,
      "loss": 0.9133,
      "step": 368990
    },
    {
      "epoch": 1.2932530044475286,
      "grad_norm": 2.71875,
      "learning_rate": 3.160652767068805e-05,
      "loss": 0.8156,
      "step": 369000
    },
    {
      "epoch": 1.2932880519544243,
      "grad_norm": 3.34375,
      "learning_rate": 3.160587864202435e-05,
      "loss": 0.853,
      "step": 369010
    },
    {
      "epoch": 1.2933230994613198,
      "grad_norm": 2.671875,
      "learning_rate": 3.1605229613360646e-05,
      "loss": 0.9299,
      "step": 369020
    },
    {
      "epoch": 1.2933581469682154,
      "grad_norm": 3.140625,
      "learning_rate": 3.160458058469695e-05,
      "loss": 0.8361,
      "step": 369030
    },
    {
      "epoch": 1.2933931944751111,
      "grad_norm": 2.734375,
      "learning_rate": 3.160393155603324e-05,
      "loss": 0.8505,
      "step": 369040
    },
    {
      "epoch": 1.2934282419820067,
      "grad_norm": 2.859375,
      "learning_rate": 3.160328252736954e-05,
      "loss": 0.8474,
      "step": 369050
    },
    {
      "epoch": 1.2934632894889022,
      "grad_norm": 3.15625,
      "learning_rate": 3.160263349870584e-05,
      "loss": 0.9284,
      "step": 369060
    },
    {
      "epoch": 1.2934983369957977,
      "grad_norm": 2.875,
      "learning_rate": 3.160198447004213e-05,
      "loss": 0.7449,
      "step": 369070
    },
    {
      "epoch": 1.2935333845026933,
      "grad_norm": 3.109375,
      "learning_rate": 3.1601335441378434e-05,
      "loss": 0.8519,
      "step": 369080
    },
    {
      "epoch": 1.293568432009589,
      "grad_norm": 2.75,
      "learning_rate": 3.160068641271473e-05,
      "loss": 0.8384,
      "step": 369090
    },
    {
      "epoch": 1.2936034795164846,
      "grad_norm": 2.796875,
      "learning_rate": 3.160003738405103e-05,
      "loss": 0.8459,
      "step": 369100
    },
    {
      "epoch": 1.2936385270233801,
      "grad_norm": 2.578125,
      "learning_rate": 3.1599388355387325e-05,
      "loss": 0.8488,
      "step": 369110
    },
    {
      "epoch": 1.2936735745302759,
      "grad_norm": 2.421875,
      "learning_rate": 3.1598739326723626e-05,
      "loss": 0.7635,
      "step": 369120
    },
    {
      "epoch": 1.2937086220371714,
      "grad_norm": 2.96875,
      "learning_rate": 3.159809029805992e-05,
      "loss": 0.8572,
      "step": 369130
    },
    {
      "epoch": 1.293743669544067,
      "grad_norm": 2.65625,
      "learning_rate": 3.159744126939622e-05,
      "loss": 0.8229,
      "step": 369140
    },
    {
      "epoch": 1.2937787170509627,
      "grad_norm": 2.75,
      "learning_rate": 3.159679224073252e-05,
      "loss": 0.8814,
      "step": 369150
    },
    {
      "epoch": 1.2938137645578582,
      "grad_norm": 2.546875,
      "learning_rate": 3.159614321206882e-05,
      "loss": 0.8679,
      "step": 369160
    },
    {
      "epoch": 1.2938488120647538,
      "grad_norm": 2.828125,
      "learning_rate": 3.159549418340512e-05,
      "loss": 0.8351,
      "step": 369170
    },
    {
      "epoch": 1.2938838595716493,
      "grad_norm": 2.8125,
      "learning_rate": 3.1594845154741414e-05,
      "loss": 0.8606,
      "step": 369180
    },
    {
      "epoch": 1.2939189070785448,
      "grad_norm": 3.65625,
      "learning_rate": 3.1594196126077716e-05,
      "loss": 0.8963,
      "step": 369190
    },
    {
      "epoch": 1.2939539545854406,
      "grad_norm": 3.03125,
      "learning_rate": 3.159354709741401e-05,
      "loss": 0.8083,
      "step": 369200
    },
    {
      "epoch": 1.2939890020923361,
      "grad_norm": 2.78125,
      "learning_rate": 3.159289806875031e-05,
      "loss": 0.9274,
      "step": 369210
    },
    {
      "epoch": 1.2940240495992317,
      "grad_norm": 3.28125,
      "learning_rate": 3.1592249040086606e-05,
      "loss": 0.8361,
      "step": 369220
    },
    {
      "epoch": 1.2940590971061274,
      "grad_norm": 3.046875,
      "learning_rate": 3.159160001142291e-05,
      "loss": 0.8697,
      "step": 369230
    },
    {
      "epoch": 1.294094144613023,
      "grad_norm": 2.9375,
      "learning_rate": 3.15909509827592e-05,
      "loss": 0.8198,
      "step": 369240
    },
    {
      "epoch": 1.2941291921199185,
      "grad_norm": 2.578125,
      "learning_rate": 3.1590301954095504e-05,
      "loss": 0.8554,
      "step": 369250
    },
    {
      "epoch": 1.2941642396268143,
      "grad_norm": 2.640625,
      "learning_rate": 3.15896529254318e-05,
      "loss": 0.8203,
      "step": 369260
    },
    {
      "epoch": 1.2941992871337098,
      "grad_norm": 2.96875,
      "learning_rate": 3.15890038967681e-05,
      "loss": 0.8673,
      "step": 369270
    },
    {
      "epoch": 1.2942343346406053,
      "grad_norm": 2.65625,
      "learning_rate": 3.1588354868104394e-05,
      "loss": 0.7792,
      "step": 369280
    },
    {
      "epoch": 1.2942693821475009,
      "grad_norm": 2.96875,
      "learning_rate": 3.1587705839440696e-05,
      "loss": 0.8941,
      "step": 369290
    },
    {
      "epoch": 1.2943044296543964,
      "grad_norm": 3.109375,
      "learning_rate": 3.1587056810777e-05,
      "loss": 0.8194,
      "step": 369300
    },
    {
      "epoch": 1.2943394771612922,
      "grad_norm": 2.796875,
      "learning_rate": 3.158640778211329e-05,
      "loss": 0.8682,
      "step": 369310
    },
    {
      "epoch": 1.2943745246681877,
      "grad_norm": 2.78125,
      "learning_rate": 3.158575875344959e-05,
      "loss": 0.8859,
      "step": 369320
    },
    {
      "epoch": 1.2944095721750832,
      "grad_norm": 2.484375,
      "learning_rate": 3.158510972478589e-05,
      "loss": 0.7824,
      "step": 369330
    },
    {
      "epoch": 1.294444619681979,
      "grad_norm": 3.09375,
      "learning_rate": 3.158446069612219e-05,
      "loss": 0.8911,
      "step": 369340
    },
    {
      "epoch": 1.2944796671888745,
      "grad_norm": 2.859375,
      "learning_rate": 3.1583811667458484e-05,
      "loss": 0.8685,
      "step": 369350
    },
    {
      "epoch": 1.29451471469577,
      "grad_norm": 2.6875,
      "learning_rate": 3.1583162638794785e-05,
      "loss": 0.7869,
      "step": 369360
    },
    {
      "epoch": 1.2945497622026658,
      "grad_norm": 2.890625,
      "learning_rate": 3.158251361013108e-05,
      "loss": 0.8426,
      "step": 369370
    },
    {
      "epoch": 1.2945848097095614,
      "grad_norm": 3.53125,
      "learning_rate": 3.158186458146738e-05,
      "loss": 0.8877,
      "step": 369380
    },
    {
      "epoch": 1.294619857216457,
      "grad_norm": 2.625,
      "learning_rate": 3.1581215552803676e-05,
      "loss": 0.8872,
      "step": 369390
    },
    {
      "epoch": 1.2946549047233524,
      "grad_norm": 3.25,
      "learning_rate": 3.158056652413998e-05,
      "loss": 0.8468,
      "step": 369400
    },
    {
      "epoch": 1.294689952230248,
      "grad_norm": 2.6875,
      "learning_rate": 3.157991749547627e-05,
      "loss": 0.79,
      "step": 369410
    },
    {
      "epoch": 1.2947249997371437,
      "grad_norm": 3.21875,
      "learning_rate": 3.1579268466812566e-05,
      "loss": 0.7778,
      "step": 369420
    },
    {
      "epoch": 1.2947600472440393,
      "grad_norm": 2.65625,
      "learning_rate": 3.157861943814887e-05,
      "loss": 0.8012,
      "step": 369430
    },
    {
      "epoch": 1.2947950947509348,
      "grad_norm": 3.0,
      "learning_rate": 3.157797040948516e-05,
      "loss": 0.8855,
      "step": 369440
    },
    {
      "epoch": 1.2948301422578306,
      "grad_norm": 2.703125,
      "learning_rate": 3.1577321380821464e-05,
      "loss": 0.8453,
      "step": 369450
    },
    {
      "epoch": 1.294865189764726,
      "grad_norm": 2.90625,
      "learning_rate": 3.157667235215776e-05,
      "loss": 0.887,
      "step": 369460
    },
    {
      "epoch": 1.2949002372716216,
      "grad_norm": 3.140625,
      "learning_rate": 3.157602332349406e-05,
      "loss": 0.8663,
      "step": 369470
    },
    {
      "epoch": 1.2949352847785174,
      "grad_norm": 2.78125,
      "learning_rate": 3.1575374294830354e-05,
      "loss": 0.8673,
      "step": 369480
    },
    {
      "epoch": 1.294970332285413,
      "grad_norm": 2.9375,
      "learning_rate": 3.1574725266166656e-05,
      "loss": 0.7621,
      "step": 369490
    },
    {
      "epoch": 1.2950053797923085,
      "grad_norm": 2.75,
      "learning_rate": 3.157407623750295e-05,
      "loss": 0.8715,
      "step": 369500
    },
    {
      "epoch": 1.295040427299204,
      "grad_norm": 2.34375,
      "learning_rate": 3.157342720883925e-05,
      "loss": 0.8602,
      "step": 369510
    },
    {
      "epoch": 1.2950754748060997,
      "grad_norm": 2.9375,
      "learning_rate": 3.1572778180175546e-05,
      "loss": 0.8793,
      "step": 369520
    },
    {
      "epoch": 1.2951105223129953,
      "grad_norm": 3.0625,
      "learning_rate": 3.157212915151185e-05,
      "loss": 0.9196,
      "step": 369530
    },
    {
      "epoch": 1.2951455698198908,
      "grad_norm": 3.0,
      "learning_rate": 3.157148012284815e-05,
      "loss": 0.8166,
      "step": 369540
    },
    {
      "epoch": 1.2951806173267864,
      "grad_norm": 2.828125,
      "learning_rate": 3.1570831094184444e-05,
      "loss": 0.8556,
      "step": 369550
    },
    {
      "epoch": 1.2952156648336821,
      "grad_norm": 2.703125,
      "learning_rate": 3.1570182065520745e-05,
      "loss": 0.8005,
      "step": 369560
    },
    {
      "epoch": 1.2952507123405776,
      "grad_norm": 3.125,
      "learning_rate": 3.156953303685704e-05,
      "loss": 0.8355,
      "step": 369570
    },
    {
      "epoch": 1.2952857598474732,
      "grad_norm": 2.65625,
      "learning_rate": 3.156888400819334e-05,
      "loss": 0.8774,
      "step": 369580
    },
    {
      "epoch": 1.295320807354369,
      "grad_norm": 2.8125,
      "learning_rate": 3.1568234979529636e-05,
      "loss": 0.8258,
      "step": 369590
    },
    {
      "epoch": 1.2953558548612645,
      "grad_norm": 2.96875,
      "learning_rate": 3.156758595086594e-05,
      "loss": 0.8513,
      "step": 369600
    },
    {
      "epoch": 1.29539090236816,
      "grad_norm": 3.375,
      "learning_rate": 3.156693692220223e-05,
      "loss": 0.8826,
      "step": 369610
    },
    {
      "epoch": 1.2954259498750558,
      "grad_norm": 2.96875,
      "learning_rate": 3.156628789353853e-05,
      "loss": 0.8604,
      "step": 369620
    },
    {
      "epoch": 1.2954609973819513,
      "grad_norm": 2.765625,
      "learning_rate": 3.156563886487483e-05,
      "loss": 0.8363,
      "step": 369630
    },
    {
      "epoch": 1.2954960448888468,
      "grad_norm": 2.953125,
      "learning_rate": 3.156498983621113e-05,
      "loss": 0.8535,
      "step": 369640
    },
    {
      "epoch": 1.2955310923957424,
      "grad_norm": 3.375,
      "learning_rate": 3.1564340807547424e-05,
      "loss": 0.8954,
      "step": 369650
    },
    {
      "epoch": 1.295566139902638,
      "grad_norm": 3.03125,
      "learning_rate": 3.1563691778883725e-05,
      "loss": 0.8502,
      "step": 369660
    },
    {
      "epoch": 1.2956011874095337,
      "grad_norm": 2.796875,
      "learning_rate": 3.1563042750220026e-05,
      "loss": 0.8135,
      "step": 369670
    },
    {
      "epoch": 1.2956362349164292,
      "grad_norm": 2.6875,
      "learning_rate": 3.156239372155632e-05,
      "loss": 0.8749,
      "step": 369680
    },
    {
      "epoch": 1.2956712824233247,
      "grad_norm": 2.828125,
      "learning_rate": 3.156174469289262e-05,
      "loss": 0.9264,
      "step": 369690
    },
    {
      "epoch": 1.2957063299302205,
      "grad_norm": 2.640625,
      "learning_rate": 3.156109566422892e-05,
      "loss": 0.8496,
      "step": 369700
    },
    {
      "epoch": 1.295741377437116,
      "grad_norm": 3.0625,
      "learning_rate": 3.156044663556522e-05,
      "loss": 0.8687,
      "step": 369710
    },
    {
      "epoch": 1.2957764249440116,
      "grad_norm": 2.953125,
      "learning_rate": 3.155979760690151e-05,
      "loss": 0.875,
      "step": 369720
    },
    {
      "epoch": 1.2958114724509073,
      "grad_norm": 3.03125,
      "learning_rate": 3.1559148578237814e-05,
      "loss": 0.8174,
      "step": 369730
    },
    {
      "epoch": 1.2958465199578029,
      "grad_norm": 2.890625,
      "learning_rate": 3.155849954957411e-05,
      "loss": 0.8674,
      "step": 369740
    },
    {
      "epoch": 1.2958815674646984,
      "grad_norm": 2.9375,
      "learning_rate": 3.155785052091041e-05,
      "loss": 0.8592,
      "step": 369750
    },
    {
      "epoch": 1.295916614971594,
      "grad_norm": 2.796875,
      "learning_rate": 3.1557201492246705e-05,
      "loss": 0.841,
      "step": 369760
    },
    {
      "epoch": 1.2959516624784895,
      "grad_norm": 2.65625,
      "learning_rate": 3.1556552463583006e-05,
      "loss": 0.779,
      "step": 369770
    },
    {
      "epoch": 1.2959867099853852,
      "grad_norm": 2.6875,
      "learning_rate": 3.15559034349193e-05,
      "loss": 0.875,
      "step": 369780
    },
    {
      "epoch": 1.2960217574922808,
      "grad_norm": 2.828125,
      "learning_rate": 3.15552544062556e-05,
      "loss": 0.8497,
      "step": 369790
    },
    {
      "epoch": 1.2960568049991763,
      "grad_norm": 3.0,
      "learning_rate": 3.15546053775919e-05,
      "loss": 0.8204,
      "step": 369800
    },
    {
      "epoch": 1.296091852506072,
      "grad_norm": 2.765625,
      "learning_rate": 3.155395634892819e-05,
      "loss": 0.8831,
      "step": 369810
    },
    {
      "epoch": 1.2961269000129676,
      "grad_norm": 2.671875,
      "learning_rate": 3.155330732026449e-05,
      "loss": 0.8442,
      "step": 369820
    },
    {
      "epoch": 1.2961619475198631,
      "grad_norm": 2.8125,
      "learning_rate": 3.155265829160079e-05,
      "loss": 0.8342,
      "step": 369830
    },
    {
      "epoch": 1.296196995026759,
      "grad_norm": 3.015625,
      "learning_rate": 3.155200926293709e-05,
      "loss": 0.9238,
      "step": 369840
    },
    {
      "epoch": 1.2962320425336544,
      "grad_norm": 2.828125,
      "learning_rate": 3.1551360234273384e-05,
      "loss": 0.8838,
      "step": 369850
    },
    {
      "epoch": 1.29626709004055,
      "grad_norm": 3.078125,
      "learning_rate": 3.1550711205609685e-05,
      "loss": 0.8399,
      "step": 369860
    },
    {
      "epoch": 1.2963021375474455,
      "grad_norm": 2.578125,
      "learning_rate": 3.155006217694598e-05,
      "loss": 0.8033,
      "step": 369870
    },
    {
      "epoch": 1.296337185054341,
      "grad_norm": 3.265625,
      "learning_rate": 3.154941314828228e-05,
      "loss": 0.7992,
      "step": 369880
    },
    {
      "epoch": 1.2963722325612368,
      "grad_norm": 2.734375,
      "learning_rate": 3.1548764119618576e-05,
      "loss": 0.8343,
      "step": 369890
    },
    {
      "epoch": 1.2964072800681323,
      "grad_norm": 3.34375,
      "learning_rate": 3.154811509095488e-05,
      "loss": 0.8688,
      "step": 369900
    },
    {
      "epoch": 1.2964423275750279,
      "grad_norm": 2.890625,
      "learning_rate": 3.154746606229118e-05,
      "loss": 0.8233,
      "step": 369910
    },
    {
      "epoch": 1.2964773750819236,
      "grad_norm": 2.75,
      "learning_rate": 3.154681703362747e-05,
      "loss": 0.8104,
      "step": 369920
    },
    {
      "epoch": 1.2965124225888192,
      "grad_norm": 3.28125,
      "learning_rate": 3.1546168004963774e-05,
      "loss": 0.8466,
      "step": 369930
    },
    {
      "epoch": 1.2965474700957147,
      "grad_norm": 2.453125,
      "learning_rate": 3.154551897630007e-05,
      "loss": 0.8416,
      "step": 369940
    },
    {
      "epoch": 1.2965825176026105,
      "grad_norm": 2.578125,
      "learning_rate": 3.154486994763637e-05,
      "loss": 0.7967,
      "step": 369950
    },
    {
      "epoch": 1.296617565109506,
      "grad_norm": 3.0625,
      "learning_rate": 3.1544220918972665e-05,
      "loss": 0.8674,
      "step": 369960
    },
    {
      "epoch": 1.2966526126164015,
      "grad_norm": 2.734375,
      "learning_rate": 3.1543571890308966e-05,
      "loss": 0.8092,
      "step": 369970
    },
    {
      "epoch": 1.296687660123297,
      "grad_norm": 2.859375,
      "learning_rate": 3.154292286164526e-05,
      "loss": 0.843,
      "step": 369980
    },
    {
      "epoch": 1.2967227076301926,
      "grad_norm": 2.875,
      "learning_rate": 3.154227383298156e-05,
      "loss": 0.866,
      "step": 369990
    },
    {
      "epoch": 1.2967577551370884,
      "grad_norm": 2.640625,
      "learning_rate": 3.154162480431786e-05,
      "loss": 0.8116,
      "step": 370000
    },
    {
      "epoch": 1.2967577551370884,
      "eval_loss": 0.7998261451721191,
      "eval_runtime": 565.3775,
      "eval_samples_per_second": 672.888,
      "eval_steps_per_second": 56.074,
      "step": 370000
    },
    {
      "epoch": 1.2967928026439839,
      "grad_norm": 2.890625,
      "learning_rate": 3.154097577565416e-05,
      "loss": 0.911,
      "step": 370010
    },
    {
      "epoch": 1.2968278501508794,
      "grad_norm": 2.921875,
      "learning_rate": 3.154032674699045e-05,
      "loss": 0.9133,
      "step": 370020
    },
    {
      "epoch": 1.2968628976577752,
      "grad_norm": 3.0625,
      "learning_rate": 3.1539677718326754e-05,
      "loss": 0.8093,
      "step": 370030
    },
    {
      "epoch": 1.2968979451646707,
      "grad_norm": 3.296875,
      "learning_rate": 3.1539028689663056e-05,
      "loss": 0.8739,
      "step": 370040
    },
    {
      "epoch": 1.2969329926715663,
      "grad_norm": 3.609375,
      "learning_rate": 3.153837966099935e-05,
      "loss": 0.9039,
      "step": 370050
    },
    {
      "epoch": 1.296968040178462,
      "grad_norm": 3.1875,
      "learning_rate": 3.153773063233565e-05,
      "loss": 0.8475,
      "step": 370060
    },
    {
      "epoch": 1.2970030876853575,
      "grad_norm": 2.78125,
      "learning_rate": 3.1537081603671946e-05,
      "loss": 0.825,
      "step": 370070
    },
    {
      "epoch": 1.297038135192253,
      "grad_norm": 3.09375,
      "learning_rate": 3.153643257500825e-05,
      "loss": 0.8594,
      "step": 370080
    },
    {
      "epoch": 1.2970731826991486,
      "grad_norm": 2.53125,
      "learning_rate": 3.153578354634454e-05,
      "loss": 0.9109,
      "step": 370090
    },
    {
      "epoch": 1.2971082302060442,
      "grad_norm": 2.484375,
      "learning_rate": 3.1535134517680844e-05,
      "loss": 0.8678,
      "step": 370100
    },
    {
      "epoch": 1.29714327771294,
      "grad_norm": 3.296875,
      "learning_rate": 3.153448548901714e-05,
      "loss": 0.8446,
      "step": 370110
    },
    {
      "epoch": 1.2971783252198354,
      "grad_norm": 2.5,
      "learning_rate": 3.153383646035344e-05,
      "loss": 0.865,
      "step": 370120
    },
    {
      "epoch": 1.297213372726731,
      "grad_norm": 2.859375,
      "learning_rate": 3.1533187431689734e-05,
      "loss": 0.8862,
      "step": 370130
    },
    {
      "epoch": 1.2972484202336267,
      "grad_norm": 2.8125,
      "learning_rate": 3.1532538403026036e-05,
      "loss": 0.8515,
      "step": 370140
    },
    {
      "epoch": 1.2972834677405223,
      "grad_norm": 3.0625,
      "learning_rate": 3.153188937436233e-05,
      "loss": 0.8846,
      "step": 370150
    },
    {
      "epoch": 1.2973185152474178,
      "grad_norm": 2.890625,
      "learning_rate": 3.153124034569863e-05,
      "loss": 0.8143,
      "step": 370160
    },
    {
      "epoch": 1.2973535627543136,
      "grad_norm": 2.71875,
      "learning_rate": 3.1530591317034926e-05,
      "loss": 0.9729,
      "step": 370170
    },
    {
      "epoch": 1.297388610261209,
      "grad_norm": 2.671875,
      "learning_rate": 3.152994228837122e-05,
      "loss": 0.8779,
      "step": 370180
    },
    {
      "epoch": 1.2974236577681046,
      "grad_norm": 2.984375,
      "learning_rate": 3.152929325970752e-05,
      "loss": 0.8775,
      "step": 370190
    },
    {
      "epoch": 1.2974587052750004,
      "grad_norm": 2.953125,
      "learning_rate": 3.152864423104382e-05,
      "loss": 0.8568,
      "step": 370200
    },
    {
      "epoch": 1.297493752781896,
      "grad_norm": 2.671875,
      "learning_rate": 3.152799520238012e-05,
      "loss": 0.7928,
      "step": 370210
    },
    {
      "epoch": 1.2975288002887915,
      "grad_norm": 2.828125,
      "learning_rate": 3.152734617371641e-05,
      "loss": 0.8726,
      "step": 370220
    },
    {
      "epoch": 1.297563847795687,
      "grad_norm": 2.90625,
      "learning_rate": 3.1526697145052714e-05,
      "loss": 0.8756,
      "step": 370230
    },
    {
      "epoch": 1.2975988953025825,
      "grad_norm": 2.5,
      "learning_rate": 3.152604811638901e-05,
      "loss": 0.8479,
      "step": 370240
    },
    {
      "epoch": 1.2976339428094783,
      "grad_norm": 2.59375,
      "learning_rate": 3.152539908772531e-05,
      "loss": 0.9805,
      "step": 370250
    },
    {
      "epoch": 1.2976689903163738,
      "grad_norm": 2.953125,
      "learning_rate": 3.152475005906161e-05,
      "loss": 0.8998,
      "step": 370260
    },
    {
      "epoch": 1.2977040378232694,
      "grad_norm": 2.953125,
      "learning_rate": 3.1524101030397906e-05,
      "loss": 0.8322,
      "step": 370270
    },
    {
      "epoch": 1.2977390853301651,
      "grad_norm": 3.109375,
      "learning_rate": 3.152345200173421e-05,
      "loss": 0.9752,
      "step": 370280
    },
    {
      "epoch": 1.2977741328370607,
      "grad_norm": 2.828125,
      "learning_rate": 3.15228029730705e-05,
      "loss": 0.8948,
      "step": 370290
    },
    {
      "epoch": 1.2978091803439562,
      "grad_norm": 2.828125,
      "learning_rate": 3.1522153944406804e-05,
      "loss": 0.8373,
      "step": 370300
    },
    {
      "epoch": 1.297844227850852,
      "grad_norm": 2.890625,
      "learning_rate": 3.15215049157431e-05,
      "loss": 0.8828,
      "step": 370310
    },
    {
      "epoch": 1.2978792753577475,
      "grad_norm": 2.6875,
      "learning_rate": 3.15208558870794e-05,
      "loss": 0.8653,
      "step": 370320
    },
    {
      "epoch": 1.297914322864643,
      "grad_norm": 3.046875,
      "learning_rate": 3.1520206858415694e-05,
      "loss": 0.8624,
      "step": 370330
    },
    {
      "epoch": 1.2979493703715386,
      "grad_norm": 2.4375,
      "learning_rate": 3.1519557829751996e-05,
      "loss": 0.8183,
      "step": 370340
    },
    {
      "epoch": 1.297984417878434,
      "grad_norm": 2.71875,
      "learning_rate": 3.151890880108829e-05,
      "loss": 0.8642,
      "step": 370350
    },
    {
      "epoch": 1.2980194653853299,
      "grad_norm": 3.140625,
      "learning_rate": 3.151825977242459e-05,
      "loss": 0.8129,
      "step": 370360
    },
    {
      "epoch": 1.2980545128922254,
      "grad_norm": 2.875,
      "learning_rate": 3.1517610743760886e-05,
      "loss": 0.8411,
      "step": 370370
    },
    {
      "epoch": 1.298089560399121,
      "grad_norm": 2.8125,
      "learning_rate": 3.151696171509719e-05,
      "loss": 0.8605,
      "step": 370380
    },
    {
      "epoch": 1.2981246079060167,
      "grad_norm": 2.75,
      "learning_rate": 3.151631268643348e-05,
      "loss": 0.8126,
      "step": 370390
    },
    {
      "epoch": 1.2981596554129122,
      "grad_norm": 2.578125,
      "learning_rate": 3.1515663657769784e-05,
      "loss": 0.8303,
      "step": 370400
    },
    {
      "epoch": 1.2981947029198078,
      "grad_norm": 2.75,
      "learning_rate": 3.1515014629106085e-05,
      "loss": 0.8499,
      "step": 370410
    },
    {
      "epoch": 1.2982297504267035,
      "grad_norm": 3.046875,
      "learning_rate": 3.151436560044238e-05,
      "loss": 0.8667,
      "step": 370420
    },
    {
      "epoch": 1.298264797933599,
      "grad_norm": 3.078125,
      "learning_rate": 3.151371657177868e-05,
      "loss": 0.8904,
      "step": 370430
    },
    {
      "epoch": 1.2982998454404946,
      "grad_norm": 3.046875,
      "learning_rate": 3.1513067543114976e-05,
      "loss": 0.852,
      "step": 370440
    },
    {
      "epoch": 1.2983348929473901,
      "grad_norm": 2.875,
      "learning_rate": 3.151241851445128e-05,
      "loss": 0.9471,
      "step": 370450
    },
    {
      "epoch": 1.2983699404542857,
      "grad_norm": 2.734375,
      "learning_rate": 3.151176948578757e-05,
      "loss": 0.8934,
      "step": 370460
    },
    {
      "epoch": 1.2984049879611814,
      "grad_norm": 2.890625,
      "learning_rate": 3.151112045712387e-05,
      "loss": 0.809,
      "step": 370470
    },
    {
      "epoch": 1.298440035468077,
      "grad_norm": 3.203125,
      "learning_rate": 3.151047142846017e-05,
      "loss": 0.8587,
      "step": 370480
    },
    {
      "epoch": 1.2984750829749725,
      "grad_norm": 2.875,
      "learning_rate": 3.150982239979647e-05,
      "loss": 0.8979,
      "step": 370490
    },
    {
      "epoch": 1.2985101304818683,
      "grad_norm": 2.765625,
      "learning_rate": 3.1509173371132764e-05,
      "loss": 0.8229,
      "step": 370500
    },
    {
      "epoch": 1.2985451779887638,
      "grad_norm": 2.8125,
      "learning_rate": 3.1508524342469065e-05,
      "loss": 0.8992,
      "step": 370510
    },
    {
      "epoch": 1.2985802254956593,
      "grad_norm": 2.84375,
      "learning_rate": 3.150787531380536e-05,
      "loss": 0.8434,
      "step": 370520
    },
    {
      "epoch": 1.298615273002555,
      "grad_norm": 2.65625,
      "learning_rate": 3.150722628514166e-05,
      "loss": 0.8219,
      "step": 370530
    },
    {
      "epoch": 1.2986503205094506,
      "grad_norm": 3.171875,
      "learning_rate": 3.150657725647796e-05,
      "loss": 0.8857,
      "step": 370540
    },
    {
      "epoch": 1.2986853680163462,
      "grad_norm": 2.875,
      "learning_rate": 3.150592822781425e-05,
      "loss": 0.8382,
      "step": 370550
    },
    {
      "epoch": 1.2987204155232417,
      "grad_norm": 3.0625,
      "learning_rate": 3.150527919915055e-05,
      "loss": 0.8666,
      "step": 370560
    },
    {
      "epoch": 1.2987554630301372,
      "grad_norm": 2.96875,
      "learning_rate": 3.1504630170486846e-05,
      "loss": 0.8648,
      "step": 370570
    },
    {
      "epoch": 1.298790510537033,
      "grad_norm": 3.0,
      "learning_rate": 3.150398114182315e-05,
      "loss": 0.9147,
      "step": 370580
    },
    {
      "epoch": 1.2988255580439285,
      "grad_norm": 2.609375,
      "learning_rate": 3.150333211315944e-05,
      "loss": 0.934,
      "step": 370590
    },
    {
      "epoch": 1.298860605550824,
      "grad_norm": 3.203125,
      "learning_rate": 3.1502683084495744e-05,
      "loss": 0.8778,
      "step": 370600
    },
    {
      "epoch": 1.2988956530577198,
      "grad_norm": 3.046875,
      "learning_rate": 3.150203405583204e-05,
      "loss": 0.8585,
      "step": 370610
    },
    {
      "epoch": 1.2989307005646153,
      "grad_norm": 2.59375,
      "learning_rate": 3.150138502716834e-05,
      "loss": 0.8183,
      "step": 370620
    },
    {
      "epoch": 1.2989657480715109,
      "grad_norm": 2.828125,
      "learning_rate": 3.150073599850464e-05,
      "loss": 0.8366,
      "step": 370630
    },
    {
      "epoch": 1.2990007955784066,
      "grad_norm": 2.8125,
      "learning_rate": 3.1500086969840936e-05,
      "loss": 0.8885,
      "step": 370640
    },
    {
      "epoch": 1.2990358430853022,
      "grad_norm": 3.296875,
      "learning_rate": 3.149943794117724e-05,
      "loss": 0.9318,
      "step": 370650
    },
    {
      "epoch": 1.2990708905921977,
      "grad_norm": 2.75,
      "learning_rate": 3.149878891251353e-05,
      "loss": 0.851,
      "step": 370660
    },
    {
      "epoch": 1.2991059380990932,
      "grad_norm": 3.1875,
      "learning_rate": 3.149813988384983e-05,
      "loss": 0.8297,
      "step": 370670
    },
    {
      "epoch": 1.2991409856059888,
      "grad_norm": 3.109375,
      "learning_rate": 3.149749085518613e-05,
      "loss": 0.7951,
      "step": 370680
    },
    {
      "epoch": 1.2991760331128845,
      "grad_norm": 2.828125,
      "learning_rate": 3.149684182652243e-05,
      "loss": 0.9057,
      "step": 370690
    },
    {
      "epoch": 1.29921108061978,
      "grad_norm": 2.96875,
      "learning_rate": 3.1496192797858724e-05,
      "loss": 0.7814,
      "step": 370700
    },
    {
      "epoch": 1.2992461281266756,
      "grad_norm": 2.875,
      "learning_rate": 3.1495543769195025e-05,
      "loss": 0.8629,
      "step": 370710
    },
    {
      "epoch": 1.2992811756335714,
      "grad_norm": 2.671875,
      "learning_rate": 3.149489474053132e-05,
      "loss": 0.8307,
      "step": 370720
    },
    {
      "epoch": 1.299316223140467,
      "grad_norm": 2.609375,
      "learning_rate": 3.149424571186762e-05,
      "loss": 0.7322,
      "step": 370730
    },
    {
      "epoch": 1.2993512706473624,
      "grad_norm": 2.84375,
      "learning_rate": 3.1493596683203916e-05,
      "loss": 0.8298,
      "step": 370740
    },
    {
      "epoch": 1.2993863181542582,
      "grad_norm": 3.359375,
      "learning_rate": 3.149294765454022e-05,
      "loss": 0.8606,
      "step": 370750
    },
    {
      "epoch": 1.2994213656611537,
      "grad_norm": 2.65625,
      "learning_rate": 3.149229862587651e-05,
      "loss": 0.8099,
      "step": 370760
    },
    {
      "epoch": 1.2994564131680493,
      "grad_norm": 3.125,
      "learning_rate": 3.149164959721281e-05,
      "loss": 0.8175,
      "step": 370770
    },
    {
      "epoch": 1.2994914606749448,
      "grad_norm": 2.96875,
      "learning_rate": 3.1491000568549115e-05,
      "loss": 0.9048,
      "step": 370780
    },
    {
      "epoch": 1.2995265081818406,
      "grad_norm": 3.078125,
      "learning_rate": 3.149035153988541e-05,
      "loss": 0.8962,
      "step": 370790
    },
    {
      "epoch": 1.299561555688736,
      "grad_norm": 2.6875,
      "learning_rate": 3.148970251122171e-05,
      "loss": 0.8688,
      "step": 370800
    },
    {
      "epoch": 1.2995966031956316,
      "grad_norm": 2.78125,
      "learning_rate": 3.1489053482558005e-05,
      "loss": 0.8177,
      "step": 370810
    },
    {
      "epoch": 1.2996316507025272,
      "grad_norm": 2.953125,
      "learning_rate": 3.1488404453894307e-05,
      "loss": 0.8545,
      "step": 370820
    },
    {
      "epoch": 1.299666698209423,
      "grad_norm": 3.21875,
      "learning_rate": 3.14877554252306e-05,
      "loss": 0.8275,
      "step": 370830
    },
    {
      "epoch": 1.2997017457163185,
      "grad_norm": 2.65625,
      "learning_rate": 3.14871063965669e-05,
      "loss": 0.7825,
      "step": 370840
    },
    {
      "epoch": 1.299736793223214,
      "grad_norm": 2.9375,
      "learning_rate": 3.14864573679032e-05,
      "loss": 0.8484,
      "step": 370850
    },
    {
      "epoch": 1.2997718407301098,
      "grad_norm": 2.765625,
      "learning_rate": 3.14858083392395e-05,
      "loss": 0.8513,
      "step": 370860
    },
    {
      "epoch": 1.2998068882370053,
      "grad_norm": 2.796875,
      "learning_rate": 3.148515931057579e-05,
      "loss": 0.8698,
      "step": 370870
    },
    {
      "epoch": 1.2998419357439008,
      "grad_norm": 3.171875,
      "learning_rate": 3.1484510281912095e-05,
      "loss": 0.8902,
      "step": 370880
    },
    {
      "epoch": 1.2998769832507966,
      "grad_norm": 3.453125,
      "learning_rate": 3.148386125324839e-05,
      "loss": 0.8282,
      "step": 370890
    },
    {
      "epoch": 1.2999120307576921,
      "grad_norm": 2.96875,
      "learning_rate": 3.148321222458469e-05,
      "loss": 0.8082,
      "step": 370900
    },
    {
      "epoch": 1.2999470782645877,
      "grad_norm": 2.765625,
      "learning_rate": 3.148256319592099e-05,
      "loss": 0.7983,
      "step": 370910
    },
    {
      "epoch": 1.2999821257714832,
      "grad_norm": 2.796875,
      "learning_rate": 3.148191416725728e-05,
      "loss": 0.7961,
      "step": 370920
    },
    {
      "epoch": 1.3000171732783787,
      "grad_norm": 3.25,
      "learning_rate": 3.148126513859358e-05,
      "loss": 0.8011,
      "step": 370930
    },
    {
      "epoch": 1.3000522207852745,
      "grad_norm": 3.03125,
      "learning_rate": 3.1480616109929876e-05,
      "loss": 0.8737,
      "step": 370940
    },
    {
      "epoch": 1.30008726829217,
      "grad_norm": 2.625,
      "learning_rate": 3.147996708126618e-05,
      "loss": 0.8856,
      "step": 370950
    },
    {
      "epoch": 1.3001223157990656,
      "grad_norm": 3.125,
      "learning_rate": 3.147931805260247e-05,
      "loss": 0.8629,
      "step": 370960
    },
    {
      "epoch": 1.3001573633059613,
      "grad_norm": 2.9375,
      "learning_rate": 3.147866902393877e-05,
      "loss": 0.8972,
      "step": 370970
    },
    {
      "epoch": 1.3001924108128569,
      "grad_norm": 2.84375,
      "learning_rate": 3.147801999527507e-05,
      "loss": 0.8198,
      "step": 370980
    },
    {
      "epoch": 1.3002274583197524,
      "grad_norm": 2.921875,
      "learning_rate": 3.147737096661137e-05,
      "loss": 0.8959,
      "step": 370990
    },
    {
      "epoch": 1.3002625058266482,
      "grad_norm": 2.984375,
      "learning_rate": 3.147672193794767e-05,
      "loss": 0.8711,
      "step": 371000
    },
    {
      "epoch": 1.3002975533335437,
      "grad_norm": 2.859375,
      "learning_rate": 3.1476072909283965e-05,
      "loss": 0.8612,
      "step": 371010
    },
    {
      "epoch": 1.3003326008404392,
      "grad_norm": 3.0,
      "learning_rate": 3.1475423880620267e-05,
      "loss": 0.7993,
      "step": 371020
    },
    {
      "epoch": 1.3003676483473348,
      "grad_norm": 3.015625,
      "learning_rate": 3.147477485195656e-05,
      "loss": 0.9221,
      "step": 371030
    },
    {
      "epoch": 1.3004026958542303,
      "grad_norm": 2.65625,
      "learning_rate": 3.147412582329286e-05,
      "loss": 0.8857,
      "step": 371040
    },
    {
      "epoch": 1.300437743361126,
      "grad_norm": 2.890625,
      "learning_rate": 3.147347679462916e-05,
      "loss": 0.8452,
      "step": 371050
    },
    {
      "epoch": 1.3004727908680216,
      "grad_norm": 2.734375,
      "learning_rate": 3.147282776596546e-05,
      "loss": 0.8041,
      "step": 371060
    },
    {
      "epoch": 1.3005078383749171,
      "grad_norm": 3.09375,
      "learning_rate": 3.147217873730175e-05,
      "loss": 0.8646,
      "step": 371070
    },
    {
      "epoch": 1.3005428858818129,
      "grad_norm": 2.78125,
      "learning_rate": 3.1471529708638055e-05,
      "loss": 0.8427,
      "step": 371080
    },
    {
      "epoch": 1.3005779333887084,
      "grad_norm": 2.8125,
      "learning_rate": 3.147088067997435e-05,
      "loss": 0.7913,
      "step": 371090
    },
    {
      "epoch": 1.300612980895604,
      "grad_norm": 2.671875,
      "learning_rate": 3.147023165131065e-05,
      "loss": 0.8698,
      "step": 371100
    },
    {
      "epoch": 1.3006480284024997,
      "grad_norm": 3.078125,
      "learning_rate": 3.1469582622646945e-05,
      "loss": 0.8054,
      "step": 371110
    },
    {
      "epoch": 1.3006830759093952,
      "grad_norm": 2.34375,
      "learning_rate": 3.1468933593983247e-05,
      "loss": 0.7301,
      "step": 371120
    },
    {
      "epoch": 1.3007181234162908,
      "grad_norm": 2.65625,
      "learning_rate": 3.146828456531955e-05,
      "loss": 0.8461,
      "step": 371130
    },
    {
      "epoch": 1.3007531709231863,
      "grad_norm": 2.8125,
      "learning_rate": 3.146763553665584e-05,
      "loss": 0.8452,
      "step": 371140
    },
    {
      "epoch": 1.3007882184300819,
      "grad_norm": 2.8125,
      "learning_rate": 3.1466986507992144e-05,
      "loss": 0.8833,
      "step": 371150
    },
    {
      "epoch": 1.3008232659369776,
      "grad_norm": 3.09375,
      "learning_rate": 3.146633747932844e-05,
      "loss": 0.9057,
      "step": 371160
    },
    {
      "epoch": 1.3008583134438731,
      "grad_norm": 3.171875,
      "learning_rate": 3.146568845066474e-05,
      "loss": 0.7881,
      "step": 371170
    },
    {
      "epoch": 1.3008933609507687,
      "grad_norm": 3.125,
      "learning_rate": 3.1465039422001035e-05,
      "loss": 0.9509,
      "step": 371180
    },
    {
      "epoch": 1.3009284084576644,
      "grad_norm": 3.265625,
      "learning_rate": 3.1464390393337336e-05,
      "loss": 0.8399,
      "step": 371190
    },
    {
      "epoch": 1.30096345596456,
      "grad_norm": 3.0,
      "learning_rate": 3.146374136467363e-05,
      "loss": 0.7975,
      "step": 371200
    },
    {
      "epoch": 1.3009985034714555,
      "grad_norm": 2.984375,
      "learning_rate": 3.146309233600993e-05,
      "loss": 0.9028,
      "step": 371210
    },
    {
      "epoch": 1.3010335509783513,
      "grad_norm": 3.015625,
      "learning_rate": 3.1462443307346227e-05,
      "loss": 0.8911,
      "step": 371220
    },
    {
      "epoch": 1.3010685984852468,
      "grad_norm": 2.765625,
      "learning_rate": 3.146179427868253e-05,
      "loss": 0.8177,
      "step": 371230
    },
    {
      "epoch": 1.3011036459921423,
      "grad_norm": 2.515625,
      "learning_rate": 3.146114525001882e-05,
      "loss": 0.7876,
      "step": 371240
    },
    {
      "epoch": 1.3011386934990379,
      "grad_norm": 2.96875,
      "learning_rate": 3.1460496221355124e-05,
      "loss": 0.866,
      "step": 371250
    },
    {
      "epoch": 1.3011737410059334,
      "grad_norm": 3.109375,
      "learning_rate": 3.145984719269142e-05,
      "loss": 0.8057,
      "step": 371260
    },
    {
      "epoch": 1.3012087885128292,
      "grad_norm": 3.0625,
      "learning_rate": 3.145919816402772e-05,
      "loss": 0.8691,
      "step": 371270
    },
    {
      "epoch": 1.3012438360197247,
      "grad_norm": 3.265625,
      "learning_rate": 3.145854913536402e-05,
      "loss": 0.8758,
      "step": 371280
    },
    {
      "epoch": 1.3012788835266202,
      "grad_norm": 2.625,
      "learning_rate": 3.1457900106700316e-05,
      "loss": 0.8378,
      "step": 371290
    },
    {
      "epoch": 1.301313931033516,
      "grad_norm": 2.859375,
      "learning_rate": 3.145725107803661e-05,
      "loss": 0.8668,
      "step": 371300
    },
    {
      "epoch": 1.3013489785404115,
      "grad_norm": 2.6875,
      "learning_rate": 3.1456602049372905e-05,
      "loss": 0.8181,
      "step": 371310
    },
    {
      "epoch": 1.301384026047307,
      "grad_norm": 2.890625,
      "learning_rate": 3.1455953020709207e-05,
      "loss": 0.8766,
      "step": 371320
    },
    {
      "epoch": 1.3014190735542028,
      "grad_norm": 3.265625,
      "learning_rate": 3.14553039920455e-05,
      "loss": 0.9719,
      "step": 371330
    },
    {
      "epoch": 1.3014541210610984,
      "grad_norm": 2.671875,
      "learning_rate": 3.14546549633818e-05,
      "loss": 0.7937,
      "step": 371340
    },
    {
      "epoch": 1.301489168567994,
      "grad_norm": 2.6875,
      "learning_rate": 3.14540059347181e-05,
      "loss": 0.8077,
      "step": 371350
    },
    {
      "epoch": 1.3015242160748894,
      "grad_norm": 2.84375,
      "learning_rate": 3.14533569060544e-05,
      "loss": 0.8809,
      "step": 371360
    },
    {
      "epoch": 1.301559263581785,
      "grad_norm": 2.921875,
      "learning_rate": 3.14527078773907e-05,
      "loss": 0.8329,
      "step": 371370
    },
    {
      "epoch": 1.3015943110886807,
      "grad_norm": 2.921875,
      "learning_rate": 3.1452058848726995e-05,
      "loss": 0.8148,
      "step": 371380
    },
    {
      "epoch": 1.3016293585955763,
      "grad_norm": 3.34375,
      "learning_rate": 3.1451409820063296e-05,
      "loss": 0.8525,
      "step": 371390
    },
    {
      "epoch": 1.3016644061024718,
      "grad_norm": 2.6875,
      "learning_rate": 3.145076079139959e-05,
      "loss": 0.8529,
      "step": 371400
    },
    {
      "epoch": 1.3016994536093676,
      "grad_norm": 2.984375,
      "learning_rate": 3.145011176273589e-05,
      "loss": 0.8866,
      "step": 371410
    },
    {
      "epoch": 1.301734501116263,
      "grad_norm": 2.890625,
      "learning_rate": 3.1449462734072187e-05,
      "loss": 0.8783,
      "step": 371420
    },
    {
      "epoch": 1.3017695486231586,
      "grad_norm": 2.6875,
      "learning_rate": 3.144881370540849e-05,
      "loss": 0.8298,
      "step": 371430
    },
    {
      "epoch": 1.3018045961300544,
      "grad_norm": 2.921875,
      "learning_rate": 3.144816467674478e-05,
      "loss": 0.8328,
      "step": 371440
    },
    {
      "epoch": 1.30183964363695,
      "grad_norm": 2.8125,
      "learning_rate": 3.1447515648081084e-05,
      "loss": 0.8937,
      "step": 371450
    },
    {
      "epoch": 1.3018746911438455,
      "grad_norm": 2.609375,
      "learning_rate": 3.144686661941738e-05,
      "loss": 0.8588,
      "step": 371460
    },
    {
      "epoch": 1.301909738650741,
      "grad_norm": 3.375,
      "learning_rate": 3.144621759075368e-05,
      "loss": 0.8104,
      "step": 371470
    },
    {
      "epoch": 1.3019447861576368,
      "grad_norm": 3.1875,
      "learning_rate": 3.1445568562089975e-05,
      "loss": 0.8956,
      "step": 371480
    },
    {
      "epoch": 1.3019798336645323,
      "grad_norm": 2.5,
      "learning_rate": 3.1444919533426276e-05,
      "loss": 0.8577,
      "step": 371490
    },
    {
      "epoch": 1.3020148811714278,
      "grad_norm": 3.0625,
      "learning_rate": 3.144427050476258e-05,
      "loss": 0.8322,
      "step": 371500
    },
    {
      "epoch": 1.3020499286783234,
      "grad_norm": 2.828125,
      "learning_rate": 3.144362147609887e-05,
      "loss": 0.7615,
      "step": 371510
    },
    {
      "epoch": 1.3020849761852191,
      "grad_norm": 3.015625,
      "learning_rate": 3.144297244743517e-05,
      "loss": 0.7968,
      "step": 371520
    },
    {
      "epoch": 1.3021200236921147,
      "grad_norm": 2.875,
      "learning_rate": 3.144232341877147e-05,
      "loss": 0.8878,
      "step": 371530
    },
    {
      "epoch": 1.3021550711990102,
      "grad_norm": 3.109375,
      "learning_rate": 3.144167439010777e-05,
      "loss": 0.8509,
      "step": 371540
    },
    {
      "epoch": 1.302190118705906,
      "grad_norm": 2.953125,
      "learning_rate": 3.1441025361444064e-05,
      "loss": 0.846,
      "step": 371550
    },
    {
      "epoch": 1.3022251662128015,
      "grad_norm": 2.390625,
      "learning_rate": 3.1440376332780365e-05,
      "loss": 0.8335,
      "step": 371560
    },
    {
      "epoch": 1.302260213719697,
      "grad_norm": 2.8125,
      "learning_rate": 3.143972730411666e-05,
      "loss": 0.9055,
      "step": 371570
    },
    {
      "epoch": 1.3022952612265928,
      "grad_norm": 2.515625,
      "learning_rate": 3.143907827545296e-05,
      "loss": 0.7499,
      "step": 371580
    },
    {
      "epoch": 1.3023303087334883,
      "grad_norm": 2.765625,
      "learning_rate": 3.1438429246789256e-05,
      "loss": 0.9166,
      "step": 371590
    },
    {
      "epoch": 1.3023653562403839,
      "grad_norm": 3.3125,
      "learning_rate": 3.143778021812556e-05,
      "loss": 0.9214,
      "step": 371600
    },
    {
      "epoch": 1.3024004037472794,
      "grad_norm": 3.203125,
      "learning_rate": 3.143713118946185e-05,
      "loss": 0.8189,
      "step": 371610
    },
    {
      "epoch": 1.302435451254175,
      "grad_norm": 3.203125,
      "learning_rate": 3.143648216079815e-05,
      "loss": 0.8744,
      "step": 371620
    },
    {
      "epoch": 1.3024704987610707,
      "grad_norm": 3.140625,
      "learning_rate": 3.143583313213445e-05,
      "loss": 0.8456,
      "step": 371630
    },
    {
      "epoch": 1.3025055462679662,
      "grad_norm": 2.65625,
      "learning_rate": 3.143518410347075e-05,
      "loss": 0.8863,
      "step": 371640
    },
    {
      "epoch": 1.3025405937748618,
      "grad_norm": 2.765625,
      "learning_rate": 3.143453507480705e-05,
      "loss": 0.8198,
      "step": 371650
    },
    {
      "epoch": 1.3025756412817575,
      "grad_norm": 2.734375,
      "learning_rate": 3.1433886046143345e-05,
      "loss": 0.8264,
      "step": 371660
    },
    {
      "epoch": 1.302610688788653,
      "grad_norm": 2.875,
      "learning_rate": 3.143323701747965e-05,
      "loss": 0.8586,
      "step": 371670
    },
    {
      "epoch": 1.3026457362955486,
      "grad_norm": 2.765625,
      "learning_rate": 3.1432587988815935e-05,
      "loss": 0.8301,
      "step": 371680
    },
    {
      "epoch": 1.3026807838024443,
      "grad_norm": 3.0,
      "learning_rate": 3.1431938960152236e-05,
      "loss": 0.8869,
      "step": 371690
    },
    {
      "epoch": 1.3027158313093399,
      "grad_norm": 3.1875,
      "learning_rate": 3.143128993148853e-05,
      "loss": 0.8906,
      "step": 371700
    },
    {
      "epoch": 1.3027508788162354,
      "grad_norm": 2.625,
      "learning_rate": 3.143064090282483e-05,
      "loss": 0.8846,
      "step": 371710
    },
    {
      "epoch": 1.302785926323131,
      "grad_norm": 2.90625,
      "learning_rate": 3.1429991874161127e-05,
      "loss": 0.8631,
      "step": 371720
    },
    {
      "epoch": 1.3028209738300265,
      "grad_norm": 3.28125,
      "learning_rate": 3.142934284549743e-05,
      "loss": 0.8115,
      "step": 371730
    },
    {
      "epoch": 1.3028560213369222,
      "grad_norm": 2.734375,
      "learning_rate": 3.142869381683373e-05,
      "loss": 0.8076,
      "step": 371740
    },
    {
      "epoch": 1.3028910688438178,
      "grad_norm": 2.875,
      "learning_rate": 3.1428044788170024e-05,
      "loss": 0.8513,
      "step": 371750
    },
    {
      "epoch": 1.3029261163507133,
      "grad_norm": 3.046875,
      "learning_rate": 3.1427395759506325e-05,
      "loss": 0.86,
      "step": 371760
    },
    {
      "epoch": 1.302961163857609,
      "grad_norm": 2.890625,
      "learning_rate": 3.142674673084262e-05,
      "loss": 0.8375,
      "step": 371770
    },
    {
      "epoch": 1.3029962113645046,
      "grad_norm": 3.046875,
      "learning_rate": 3.142609770217892e-05,
      "loss": 0.8537,
      "step": 371780
    },
    {
      "epoch": 1.3030312588714001,
      "grad_norm": 3.109375,
      "learning_rate": 3.1425448673515216e-05,
      "loss": 0.7677,
      "step": 371790
    },
    {
      "epoch": 1.303066306378296,
      "grad_norm": 2.796875,
      "learning_rate": 3.142479964485152e-05,
      "loss": 0.9066,
      "step": 371800
    },
    {
      "epoch": 1.3031013538851914,
      "grad_norm": 3.109375,
      "learning_rate": 3.142415061618781e-05,
      "loss": 0.9234,
      "step": 371810
    },
    {
      "epoch": 1.303136401392087,
      "grad_norm": 2.9375,
      "learning_rate": 3.142350158752411e-05,
      "loss": 0.8592,
      "step": 371820
    },
    {
      "epoch": 1.3031714488989825,
      "grad_norm": 2.953125,
      "learning_rate": 3.142285255886041e-05,
      "loss": 0.8865,
      "step": 371830
    },
    {
      "epoch": 1.303206496405878,
      "grad_norm": 3.921875,
      "learning_rate": 3.142220353019671e-05,
      "loss": 0.8368,
      "step": 371840
    },
    {
      "epoch": 1.3032415439127738,
      "grad_norm": 2.9375,
      "learning_rate": 3.1421554501533004e-05,
      "loss": 0.8531,
      "step": 371850
    },
    {
      "epoch": 1.3032765914196693,
      "grad_norm": 3.03125,
      "learning_rate": 3.1420905472869305e-05,
      "loss": 0.8751,
      "step": 371860
    },
    {
      "epoch": 1.3033116389265649,
      "grad_norm": 2.90625,
      "learning_rate": 3.142025644420561e-05,
      "loss": 0.8219,
      "step": 371870
    },
    {
      "epoch": 1.3033466864334606,
      "grad_norm": 2.46875,
      "learning_rate": 3.14196074155419e-05,
      "loss": 0.8629,
      "step": 371880
    },
    {
      "epoch": 1.3033817339403562,
      "grad_norm": 2.578125,
      "learning_rate": 3.14189583868782e-05,
      "loss": 0.8668,
      "step": 371890
    },
    {
      "epoch": 1.3034167814472517,
      "grad_norm": 3.265625,
      "learning_rate": 3.14183093582145e-05,
      "loss": 0.8759,
      "step": 371900
    },
    {
      "epoch": 1.3034518289541475,
      "grad_norm": 3.0,
      "learning_rate": 3.14176603295508e-05,
      "loss": 0.8959,
      "step": 371910
    },
    {
      "epoch": 1.303486876461043,
      "grad_norm": 3.0,
      "learning_rate": 3.141701130088709e-05,
      "loss": 0.843,
      "step": 371920
    },
    {
      "epoch": 1.3035219239679385,
      "grad_norm": 3.265625,
      "learning_rate": 3.1416362272223395e-05,
      "loss": 0.8805,
      "step": 371930
    },
    {
      "epoch": 1.303556971474834,
      "grad_norm": 3.40625,
      "learning_rate": 3.141571324355969e-05,
      "loss": 0.9187,
      "step": 371940
    },
    {
      "epoch": 1.3035920189817296,
      "grad_norm": 3.25,
      "learning_rate": 3.141506421489599e-05,
      "loss": 0.8152,
      "step": 371950
    },
    {
      "epoch": 1.3036270664886254,
      "grad_norm": 2.671875,
      "learning_rate": 3.1414415186232285e-05,
      "loss": 0.7831,
      "step": 371960
    },
    {
      "epoch": 1.303662113995521,
      "grad_norm": 3.015625,
      "learning_rate": 3.141376615756859e-05,
      "loss": 0.98,
      "step": 371970
    },
    {
      "epoch": 1.3036971615024164,
      "grad_norm": 3.109375,
      "learning_rate": 3.141311712890488e-05,
      "loss": 0.8745,
      "step": 371980
    },
    {
      "epoch": 1.3037322090093122,
      "grad_norm": 3.109375,
      "learning_rate": 3.141246810024118e-05,
      "loss": 0.8373,
      "step": 371990
    },
    {
      "epoch": 1.3037672565162077,
      "grad_norm": 4.25,
      "learning_rate": 3.141181907157748e-05,
      "loss": 0.8927,
      "step": 372000
    },
    {
      "epoch": 1.3038023040231033,
      "grad_norm": 2.84375,
      "learning_rate": 3.141117004291378e-05,
      "loss": 0.7981,
      "step": 372010
    },
    {
      "epoch": 1.303837351529999,
      "grad_norm": 3.15625,
      "learning_rate": 3.141052101425008e-05,
      "loss": 0.8583,
      "step": 372020
    },
    {
      "epoch": 1.3038723990368946,
      "grad_norm": 3.203125,
      "learning_rate": 3.1409871985586375e-05,
      "loss": 0.9608,
      "step": 372030
    },
    {
      "epoch": 1.30390744654379,
      "grad_norm": 3.03125,
      "learning_rate": 3.1409222956922676e-05,
      "loss": 0.9545,
      "step": 372040
    },
    {
      "epoch": 1.3039424940506856,
      "grad_norm": 2.796875,
      "learning_rate": 3.1408573928258964e-05,
      "loss": 0.8669,
      "step": 372050
    },
    {
      "epoch": 1.3039775415575812,
      "grad_norm": 2.515625,
      "learning_rate": 3.1407924899595265e-05,
      "loss": 0.8415,
      "step": 372060
    },
    {
      "epoch": 1.304012589064477,
      "grad_norm": 2.65625,
      "learning_rate": 3.140727587093156e-05,
      "loss": 0.8452,
      "step": 372070
    },
    {
      "epoch": 1.3040476365713725,
      "grad_norm": 2.78125,
      "learning_rate": 3.140662684226786e-05,
      "loss": 0.8971,
      "step": 372080
    },
    {
      "epoch": 1.304082684078268,
      "grad_norm": 2.828125,
      "learning_rate": 3.140597781360416e-05,
      "loss": 0.824,
      "step": 372090
    },
    {
      "epoch": 1.3041177315851638,
      "grad_norm": 3.078125,
      "learning_rate": 3.140532878494046e-05,
      "loss": 0.8358,
      "step": 372100
    },
    {
      "epoch": 1.3041527790920593,
      "grad_norm": 14.1875,
      "learning_rate": 3.140467975627676e-05,
      "loss": 0.8722,
      "step": 372110
    },
    {
      "epoch": 1.3041878265989548,
      "grad_norm": 2.703125,
      "learning_rate": 3.140403072761305e-05,
      "loss": 0.7847,
      "step": 372120
    },
    {
      "epoch": 1.3042228741058506,
      "grad_norm": 3.0,
      "learning_rate": 3.1403381698949355e-05,
      "loss": 0.875,
      "step": 372130
    },
    {
      "epoch": 1.3042579216127461,
      "grad_norm": 3.09375,
      "learning_rate": 3.140273267028565e-05,
      "loss": 0.8519,
      "step": 372140
    },
    {
      "epoch": 1.3042929691196417,
      "grad_norm": 3.234375,
      "learning_rate": 3.140208364162195e-05,
      "loss": 0.9866,
      "step": 372150
    },
    {
      "epoch": 1.3043280166265372,
      "grad_norm": 2.75,
      "learning_rate": 3.1401434612958245e-05,
      "loss": 0.8755,
      "step": 372160
    },
    {
      "epoch": 1.304363064133433,
      "grad_norm": 2.765625,
      "learning_rate": 3.140078558429455e-05,
      "loss": 0.9044,
      "step": 372170
    },
    {
      "epoch": 1.3043981116403285,
      "grad_norm": 2.703125,
      "learning_rate": 3.140013655563084e-05,
      "loss": 0.9392,
      "step": 372180
    },
    {
      "epoch": 1.304433159147224,
      "grad_norm": 2.828125,
      "learning_rate": 3.139948752696714e-05,
      "loss": 0.9042,
      "step": 372190
    },
    {
      "epoch": 1.3044682066541196,
      "grad_norm": 2.796875,
      "learning_rate": 3.139883849830344e-05,
      "loss": 0.9028,
      "step": 372200
    },
    {
      "epoch": 1.3045032541610153,
      "grad_norm": 2.6875,
      "learning_rate": 3.139818946963974e-05,
      "loss": 0.86,
      "step": 372210
    },
    {
      "epoch": 1.3045383016679108,
      "grad_norm": 2.953125,
      "learning_rate": 3.139754044097603e-05,
      "loss": 0.8227,
      "step": 372220
    },
    {
      "epoch": 1.3045733491748064,
      "grad_norm": 2.921875,
      "learning_rate": 3.1396891412312335e-05,
      "loss": 0.847,
      "step": 372230
    },
    {
      "epoch": 1.3046083966817021,
      "grad_norm": 2.796875,
      "learning_rate": 3.1396242383648636e-05,
      "loss": 0.8092,
      "step": 372240
    },
    {
      "epoch": 1.3046434441885977,
      "grad_norm": 3.09375,
      "learning_rate": 3.139559335498493e-05,
      "loss": 0.8495,
      "step": 372250
    },
    {
      "epoch": 1.3046784916954932,
      "grad_norm": 3.125,
      "learning_rate": 3.139494432632123e-05,
      "loss": 0.8284,
      "step": 372260
    },
    {
      "epoch": 1.304713539202389,
      "grad_norm": 2.546875,
      "learning_rate": 3.139429529765753e-05,
      "loss": 0.8831,
      "step": 372270
    },
    {
      "epoch": 1.3047485867092845,
      "grad_norm": 2.8125,
      "learning_rate": 3.139364626899383e-05,
      "loss": 0.787,
      "step": 372280
    },
    {
      "epoch": 1.30478363421618,
      "grad_norm": 2.796875,
      "learning_rate": 3.139299724033012e-05,
      "loss": 0.8543,
      "step": 372290
    },
    {
      "epoch": 1.3048186817230756,
      "grad_norm": 2.6875,
      "learning_rate": 3.1392348211666424e-05,
      "loss": 0.8422,
      "step": 372300
    },
    {
      "epoch": 1.3048537292299711,
      "grad_norm": 3.3125,
      "learning_rate": 3.139169918300272e-05,
      "loss": 0.8561,
      "step": 372310
    },
    {
      "epoch": 1.3048887767368669,
      "grad_norm": 3.15625,
      "learning_rate": 3.139105015433902e-05,
      "loss": 0.9309,
      "step": 372320
    },
    {
      "epoch": 1.3049238242437624,
      "grad_norm": 2.921875,
      "learning_rate": 3.1390401125675315e-05,
      "loss": 0.8099,
      "step": 372330
    },
    {
      "epoch": 1.304958871750658,
      "grad_norm": 2.71875,
      "learning_rate": 3.1389752097011616e-05,
      "loss": 0.8359,
      "step": 372340
    },
    {
      "epoch": 1.3049939192575537,
      "grad_norm": 2.8125,
      "learning_rate": 3.138910306834791e-05,
      "loss": 0.8147,
      "step": 372350
    },
    {
      "epoch": 1.3050289667644492,
      "grad_norm": 2.921875,
      "learning_rate": 3.138845403968421e-05,
      "loss": 0.8104,
      "step": 372360
    },
    {
      "epoch": 1.3050640142713448,
      "grad_norm": 2.96875,
      "learning_rate": 3.1387805011020514e-05,
      "loss": 0.9157,
      "step": 372370
    },
    {
      "epoch": 1.3050990617782405,
      "grad_norm": 2.453125,
      "learning_rate": 3.138715598235681e-05,
      "loss": 0.8573,
      "step": 372380
    },
    {
      "epoch": 1.305134109285136,
      "grad_norm": 3.15625,
      "learning_rate": 3.138650695369311e-05,
      "loss": 0.8947,
      "step": 372390
    },
    {
      "epoch": 1.3051691567920316,
      "grad_norm": 2.765625,
      "learning_rate": 3.1385857925029404e-05,
      "loss": 0.8897,
      "step": 372400
    },
    {
      "epoch": 1.3052042042989271,
      "grad_norm": 2.859375,
      "learning_rate": 3.1385208896365706e-05,
      "loss": 0.7972,
      "step": 372410
    },
    {
      "epoch": 1.3052392518058227,
      "grad_norm": 3.125,
      "learning_rate": 3.1384559867702e-05,
      "loss": 0.8827,
      "step": 372420
    },
    {
      "epoch": 1.3052742993127184,
      "grad_norm": 2.625,
      "learning_rate": 3.1383910839038295e-05,
      "loss": 0.866,
      "step": 372430
    },
    {
      "epoch": 1.305309346819614,
      "grad_norm": 2.984375,
      "learning_rate": 3.138326181037459e-05,
      "loss": 0.8563,
      "step": 372440
    },
    {
      "epoch": 1.3053443943265095,
      "grad_norm": 2.96875,
      "learning_rate": 3.138261278171089e-05,
      "loss": 0.8287,
      "step": 372450
    },
    {
      "epoch": 1.3053794418334053,
      "grad_norm": 2.9375,
      "learning_rate": 3.138196375304719e-05,
      "loss": 0.837,
      "step": 372460
    },
    {
      "epoch": 1.3054144893403008,
      "grad_norm": 3.0625,
      "learning_rate": 3.138131472438349e-05,
      "loss": 0.8856,
      "step": 372470
    },
    {
      "epoch": 1.3054495368471963,
      "grad_norm": 2.921875,
      "learning_rate": 3.138066569571979e-05,
      "loss": 0.8756,
      "step": 372480
    },
    {
      "epoch": 1.305484584354092,
      "grad_norm": 2.53125,
      "learning_rate": 3.138001666705608e-05,
      "loss": 0.8623,
      "step": 372490
    },
    {
      "epoch": 1.3055196318609876,
      "grad_norm": 2.703125,
      "learning_rate": 3.1379367638392384e-05,
      "loss": 0.8736,
      "step": 372500
    },
    {
      "epoch": 1.3055546793678832,
      "grad_norm": 2.96875,
      "learning_rate": 3.137871860972868e-05,
      "loss": 0.8767,
      "step": 372510
    },
    {
      "epoch": 1.3055897268747787,
      "grad_norm": 2.9375,
      "learning_rate": 3.137806958106498e-05,
      "loss": 0.8213,
      "step": 372520
    },
    {
      "epoch": 1.3056247743816742,
      "grad_norm": 2.90625,
      "learning_rate": 3.1377420552401275e-05,
      "loss": 0.8255,
      "step": 372530
    },
    {
      "epoch": 1.30565982188857,
      "grad_norm": 2.71875,
      "learning_rate": 3.1376771523737576e-05,
      "loss": 0.9064,
      "step": 372540
    },
    {
      "epoch": 1.3056948693954655,
      "grad_norm": 3.1875,
      "learning_rate": 3.137612249507387e-05,
      "loss": 0.8673,
      "step": 372550
    },
    {
      "epoch": 1.305729916902361,
      "grad_norm": 2.75,
      "learning_rate": 3.137547346641017e-05,
      "loss": 0.8071,
      "step": 372560
    },
    {
      "epoch": 1.3057649644092568,
      "grad_norm": 3.015625,
      "learning_rate": 3.137482443774647e-05,
      "loss": 0.7994,
      "step": 372570
    },
    {
      "epoch": 1.3058000119161524,
      "grad_norm": 2.859375,
      "learning_rate": 3.137417540908277e-05,
      "loss": 0.8484,
      "step": 372580
    },
    {
      "epoch": 1.305835059423048,
      "grad_norm": 2.78125,
      "learning_rate": 3.137352638041906e-05,
      "loss": 0.8307,
      "step": 372590
    },
    {
      "epoch": 1.3058701069299437,
      "grad_norm": 2.765625,
      "learning_rate": 3.1372877351755364e-05,
      "loss": 0.8702,
      "step": 372600
    },
    {
      "epoch": 1.3059051544368392,
      "grad_norm": 2.9375,
      "learning_rate": 3.1372228323091666e-05,
      "loss": 0.8853,
      "step": 372610
    },
    {
      "epoch": 1.3059402019437347,
      "grad_norm": 2.984375,
      "learning_rate": 3.137157929442796e-05,
      "loss": 0.8798,
      "step": 372620
    },
    {
      "epoch": 1.3059752494506303,
      "grad_norm": 2.921875,
      "learning_rate": 3.137093026576426e-05,
      "loss": 0.8962,
      "step": 372630
    },
    {
      "epoch": 1.3060102969575258,
      "grad_norm": 2.546875,
      "learning_rate": 3.1370281237100556e-05,
      "loss": 0.777,
      "step": 372640
    },
    {
      "epoch": 1.3060453444644216,
      "grad_norm": 3.0625,
      "learning_rate": 3.136963220843686e-05,
      "loss": 0.8594,
      "step": 372650
    },
    {
      "epoch": 1.306080391971317,
      "grad_norm": 3.84375,
      "learning_rate": 3.136898317977315e-05,
      "loss": 0.9515,
      "step": 372660
    },
    {
      "epoch": 1.3061154394782126,
      "grad_norm": 2.5625,
      "learning_rate": 3.1368334151109454e-05,
      "loss": 0.8581,
      "step": 372670
    },
    {
      "epoch": 1.3061504869851084,
      "grad_norm": 3.234375,
      "learning_rate": 3.136768512244575e-05,
      "loss": 0.8851,
      "step": 372680
    },
    {
      "epoch": 1.306185534492004,
      "grad_norm": 2.703125,
      "learning_rate": 3.136703609378205e-05,
      "loss": 0.7646,
      "step": 372690
    },
    {
      "epoch": 1.3062205819988995,
      "grad_norm": 2.96875,
      "learning_rate": 3.1366387065118344e-05,
      "loss": 0.8592,
      "step": 372700
    },
    {
      "epoch": 1.3062556295057952,
      "grad_norm": 2.96875,
      "learning_rate": 3.1365738036454646e-05,
      "loss": 0.8948,
      "step": 372710
    },
    {
      "epoch": 1.3062906770126907,
      "grad_norm": 2.671875,
      "learning_rate": 3.136508900779094e-05,
      "loss": 0.8227,
      "step": 372720
    },
    {
      "epoch": 1.3063257245195863,
      "grad_norm": 2.65625,
      "learning_rate": 3.136443997912724e-05,
      "loss": 0.8325,
      "step": 372730
    },
    {
      "epoch": 1.3063607720264818,
      "grad_norm": 2.796875,
      "learning_rate": 3.136379095046354e-05,
      "loss": 0.8734,
      "step": 372740
    },
    {
      "epoch": 1.3063958195333774,
      "grad_norm": 3.0,
      "learning_rate": 3.136314192179984e-05,
      "loss": 0.8018,
      "step": 372750
    },
    {
      "epoch": 1.3064308670402731,
      "grad_norm": 3.125,
      "learning_rate": 3.136249289313614e-05,
      "loss": 0.9893,
      "step": 372760
    },
    {
      "epoch": 1.3064659145471686,
      "grad_norm": 2.875,
      "learning_rate": 3.1361843864472434e-05,
      "loss": 0.8157,
      "step": 372770
    },
    {
      "epoch": 1.3065009620540642,
      "grad_norm": 2.640625,
      "learning_rate": 3.1361194835808735e-05,
      "loss": 0.8382,
      "step": 372780
    },
    {
      "epoch": 1.30653600956096,
      "grad_norm": 2.421875,
      "learning_rate": 3.136054580714503e-05,
      "loss": 0.7948,
      "step": 372790
    },
    {
      "epoch": 1.3065710570678555,
      "grad_norm": 3.0625,
      "learning_rate": 3.135989677848133e-05,
      "loss": 0.8173,
      "step": 372800
    },
    {
      "epoch": 1.306606104574751,
      "grad_norm": 3.078125,
      "learning_rate": 3.135924774981762e-05,
      "loss": 0.8218,
      "step": 372810
    },
    {
      "epoch": 1.3066411520816468,
      "grad_norm": 2.609375,
      "learning_rate": 3.135859872115392e-05,
      "loss": 0.7876,
      "step": 372820
    },
    {
      "epoch": 1.3066761995885423,
      "grad_norm": 2.328125,
      "learning_rate": 3.135794969249022e-05,
      "loss": 0.8508,
      "step": 372830
    },
    {
      "epoch": 1.3067112470954378,
      "grad_norm": 3.625,
      "learning_rate": 3.1357300663826516e-05,
      "loss": 0.7964,
      "step": 372840
    },
    {
      "epoch": 1.3067462946023334,
      "grad_norm": 3.28125,
      "learning_rate": 3.135665163516282e-05,
      "loss": 0.8343,
      "step": 372850
    },
    {
      "epoch": 1.3067813421092291,
      "grad_norm": 2.890625,
      "learning_rate": 3.135600260649911e-05,
      "loss": 0.8858,
      "step": 372860
    },
    {
      "epoch": 1.3068163896161247,
      "grad_norm": 2.65625,
      "learning_rate": 3.1355353577835414e-05,
      "loss": 0.7933,
      "step": 372870
    },
    {
      "epoch": 1.3068514371230202,
      "grad_norm": 2.65625,
      "learning_rate": 3.135470454917171e-05,
      "loss": 0.8739,
      "step": 372880
    },
    {
      "epoch": 1.3068864846299157,
      "grad_norm": 2.828125,
      "learning_rate": 3.135405552050801e-05,
      "loss": 0.8536,
      "step": 372890
    },
    {
      "epoch": 1.3069215321368115,
      "grad_norm": 3.1875,
      "learning_rate": 3.1353406491844304e-05,
      "loss": 0.7957,
      "step": 372900
    },
    {
      "epoch": 1.306956579643707,
      "grad_norm": 2.546875,
      "learning_rate": 3.1352757463180606e-05,
      "loss": 0.7916,
      "step": 372910
    },
    {
      "epoch": 1.3069916271506026,
      "grad_norm": 3.0,
      "learning_rate": 3.13521084345169e-05,
      "loss": 0.8629,
      "step": 372920
    },
    {
      "epoch": 1.3070266746574983,
      "grad_norm": 2.515625,
      "learning_rate": 3.13514594058532e-05,
      "loss": 0.911,
      "step": 372930
    },
    {
      "epoch": 1.3070617221643939,
      "grad_norm": 2.75,
      "learning_rate": 3.1350810377189496e-05,
      "loss": 0.808,
      "step": 372940
    },
    {
      "epoch": 1.3070967696712894,
      "grad_norm": 3.078125,
      "learning_rate": 3.13501613485258e-05,
      "loss": 0.8742,
      "step": 372950
    },
    {
      "epoch": 1.3071318171781852,
      "grad_norm": 2.546875,
      "learning_rate": 3.134951231986209e-05,
      "loss": 0.8481,
      "step": 372960
    },
    {
      "epoch": 1.3071668646850807,
      "grad_norm": 2.859375,
      "learning_rate": 3.1348863291198394e-05,
      "loss": 0.7852,
      "step": 372970
    },
    {
      "epoch": 1.3072019121919762,
      "grad_norm": 2.453125,
      "learning_rate": 3.1348214262534695e-05,
      "loss": 0.8067,
      "step": 372980
    },
    {
      "epoch": 1.3072369596988718,
      "grad_norm": 2.328125,
      "learning_rate": 3.134756523387099e-05,
      "loss": 0.8363,
      "step": 372990
    },
    {
      "epoch": 1.3072720072057673,
      "grad_norm": 3.015625,
      "learning_rate": 3.134691620520729e-05,
      "loss": 0.8747,
      "step": 373000
    },
    {
      "epoch": 1.307307054712663,
      "grad_norm": 2.890625,
      "learning_rate": 3.1346267176543586e-05,
      "loss": 0.8425,
      "step": 373010
    },
    {
      "epoch": 1.3073421022195586,
      "grad_norm": 2.890625,
      "learning_rate": 3.134561814787989e-05,
      "loss": 0.9324,
      "step": 373020
    },
    {
      "epoch": 1.3073771497264541,
      "grad_norm": 3.03125,
      "learning_rate": 3.134496911921618e-05,
      "loss": 0.8539,
      "step": 373030
    },
    {
      "epoch": 1.30741219723335,
      "grad_norm": 2.96875,
      "learning_rate": 3.134432009055248e-05,
      "loss": 0.8613,
      "step": 373040
    },
    {
      "epoch": 1.3074472447402454,
      "grad_norm": 2.71875,
      "learning_rate": 3.134367106188878e-05,
      "loss": 0.8521,
      "step": 373050
    },
    {
      "epoch": 1.307482292247141,
      "grad_norm": 2.890625,
      "learning_rate": 3.134302203322508e-05,
      "loss": 0.9071,
      "step": 373060
    },
    {
      "epoch": 1.3075173397540367,
      "grad_norm": 2.84375,
      "learning_rate": 3.1342373004561374e-05,
      "loss": 0.7823,
      "step": 373070
    },
    {
      "epoch": 1.3075523872609323,
      "grad_norm": 2.859375,
      "learning_rate": 3.1341723975897675e-05,
      "loss": 0.8785,
      "step": 373080
    },
    {
      "epoch": 1.3075874347678278,
      "grad_norm": 3.03125,
      "learning_rate": 3.134107494723397e-05,
      "loss": 0.854,
      "step": 373090
    },
    {
      "epoch": 1.3076224822747233,
      "grad_norm": 2.703125,
      "learning_rate": 3.134042591857027e-05,
      "loss": 0.8665,
      "step": 373100
    },
    {
      "epoch": 1.3076575297816189,
      "grad_norm": 3.46875,
      "learning_rate": 3.133977688990657e-05,
      "loss": 0.9602,
      "step": 373110
    },
    {
      "epoch": 1.3076925772885146,
      "grad_norm": 2.640625,
      "learning_rate": 3.133912786124287e-05,
      "loss": 0.8752,
      "step": 373120
    },
    {
      "epoch": 1.3077276247954102,
      "grad_norm": 2.953125,
      "learning_rate": 3.133847883257917e-05,
      "loss": 0.8914,
      "step": 373130
    },
    {
      "epoch": 1.3077626723023057,
      "grad_norm": 2.703125,
      "learning_rate": 3.133782980391546e-05,
      "loss": 0.9431,
      "step": 373140
    },
    {
      "epoch": 1.3077977198092015,
      "grad_norm": 2.703125,
      "learning_rate": 3.1337180775251764e-05,
      "loss": 0.767,
      "step": 373150
    },
    {
      "epoch": 1.307832767316097,
      "grad_norm": 3.109375,
      "learning_rate": 3.133653174658806e-05,
      "loss": 0.7931,
      "step": 373160
    },
    {
      "epoch": 1.3078678148229925,
      "grad_norm": 2.703125,
      "learning_rate": 3.133588271792436e-05,
      "loss": 0.8021,
      "step": 373170
    },
    {
      "epoch": 1.3079028623298883,
      "grad_norm": 2.75,
      "learning_rate": 3.133523368926065e-05,
      "loss": 0.8634,
      "step": 373180
    },
    {
      "epoch": 1.3079379098367838,
      "grad_norm": 2.703125,
      "learning_rate": 3.133458466059695e-05,
      "loss": 0.8287,
      "step": 373190
    },
    {
      "epoch": 1.3079729573436794,
      "grad_norm": 2.71875,
      "learning_rate": 3.133393563193325e-05,
      "loss": 0.8701,
      "step": 373200
    },
    {
      "epoch": 1.308008004850575,
      "grad_norm": 2.75,
      "learning_rate": 3.1333286603269545e-05,
      "loss": 0.8907,
      "step": 373210
    },
    {
      "epoch": 1.3080430523574704,
      "grad_norm": 2.78125,
      "learning_rate": 3.133263757460585e-05,
      "loss": 0.8396,
      "step": 373220
    },
    {
      "epoch": 1.3080780998643662,
      "grad_norm": 2.46875,
      "learning_rate": 3.133198854594214e-05,
      "loss": 0.7682,
      "step": 373230
    },
    {
      "epoch": 1.3081131473712617,
      "grad_norm": 3.21875,
      "learning_rate": 3.133133951727844e-05,
      "loss": 0.7937,
      "step": 373240
    },
    {
      "epoch": 1.3081481948781573,
      "grad_norm": 3.046875,
      "learning_rate": 3.133069048861474e-05,
      "loss": 0.7984,
      "step": 373250
    },
    {
      "epoch": 1.308183242385053,
      "grad_norm": 3.21875,
      "learning_rate": 3.133004145995104e-05,
      "loss": 0.8523,
      "step": 373260
    },
    {
      "epoch": 1.3082182898919485,
      "grad_norm": 2.953125,
      "learning_rate": 3.1329392431287333e-05,
      "loss": 0.8624,
      "step": 373270
    },
    {
      "epoch": 1.308253337398844,
      "grad_norm": 2.640625,
      "learning_rate": 3.1328743402623635e-05,
      "loss": 0.8086,
      "step": 373280
    },
    {
      "epoch": 1.3082883849057398,
      "grad_norm": 3.234375,
      "learning_rate": 3.132809437395993e-05,
      "loss": 0.8457,
      "step": 373290
    },
    {
      "epoch": 1.3083234324126354,
      "grad_norm": 3.34375,
      "learning_rate": 3.132744534529623e-05,
      "loss": 0.9338,
      "step": 373300
    },
    {
      "epoch": 1.308358479919531,
      "grad_norm": 2.515625,
      "learning_rate": 3.1326796316632525e-05,
      "loss": 0.8804,
      "step": 373310
    },
    {
      "epoch": 1.3083935274264265,
      "grad_norm": 2.984375,
      "learning_rate": 3.132614728796883e-05,
      "loss": 0.7569,
      "step": 373320
    },
    {
      "epoch": 1.308428574933322,
      "grad_norm": 3.09375,
      "learning_rate": 3.132549825930513e-05,
      "loss": 0.7885,
      "step": 373330
    },
    {
      "epoch": 1.3084636224402177,
      "grad_norm": 2.46875,
      "learning_rate": 3.132484923064142e-05,
      "loss": 0.9046,
      "step": 373340
    },
    {
      "epoch": 1.3084986699471133,
      "grad_norm": 3.0,
      "learning_rate": 3.1324200201977724e-05,
      "loss": 0.8689,
      "step": 373350
    },
    {
      "epoch": 1.3085337174540088,
      "grad_norm": 2.90625,
      "learning_rate": 3.132355117331402e-05,
      "loss": 0.8112,
      "step": 373360
    },
    {
      "epoch": 1.3085687649609046,
      "grad_norm": 3.359375,
      "learning_rate": 3.132290214465032e-05,
      "loss": 0.877,
      "step": 373370
    },
    {
      "epoch": 1.3086038124678,
      "grad_norm": 2.765625,
      "learning_rate": 3.1322253115986615e-05,
      "loss": 0.8319,
      "step": 373380
    },
    {
      "epoch": 1.3086388599746956,
      "grad_norm": 2.921875,
      "learning_rate": 3.1321604087322916e-05,
      "loss": 0.8304,
      "step": 373390
    },
    {
      "epoch": 1.3086739074815914,
      "grad_norm": 2.828125,
      "learning_rate": 3.132095505865921e-05,
      "loss": 0.7635,
      "step": 373400
    },
    {
      "epoch": 1.308708954988487,
      "grad_norm": 2.921875,
      "learning_rate": 3.132030602999551e-05,
      "loss": 0.8512,
      "step": 373410
    },
    {
      "epoch": 1.3087440024953825,
      "grad_norm": 3.046875,
      "learning_rate": 3.131965700133181e-05,
      "loss": 0.9801,
      "step": 373420
    },
    {
      "epoch": 1.308779050002278,
      "grad_norm": 2.75,
      "learning_rate": 3.131900797266811e-05,
      "loss": 0.9127,
      "step": 373430
    },
    {
      "epoch": 1.3088140975091735,
      "grad_norm": 2.734375,
      "learning_rate": 3.13183589440044e-05,
      "loss": 0.8556,
      "step": 373440
    },
    {
      "epoch": 1.3088491450160693,
      "grad_norm": 2.53125,
      "learning_rate": 3.1317709915340704e-05,
      "loss": 0.7802,
      "step": 373450
    },
    {
      "epoch": 1.3088841925229648,
      "grad_norm": 3.09375,
      "learning_rate": 3.1317060886677e-05,
      "loss": 0.8679,
      "step": 373460
    },
    {
      "epoch": 1.3089192400298604,
      "grad_norm": 2.4375,
      "learning_rate": 3.13164118580133e-05,
      "loss": 0.8501,
      "step": 373470
    },
    {
      "epoch": 1.3089542875367561,
      "grad_norm": 2.953125,
      "learning_rate": 3.13157628293496e-05,
      "loss": 0.8793,
      "step": 373480
    },
    {
      "epoch": 1.3089893350436517,
      "grad_norm": 3.328125,
      "learning_rate": 3.1315113800685896e-05,
      "loss": 0.7904,
      "step": 373490
    },
    {
      "epoch": 1.3090243825505472,
      "grad_norm": 2.875,
      "learning_rate": 3.13144647720222e-05,
      "loss": 0.8229,
      "step": 373500
    },
    {
      "epoch": 1.309059430057443,
      "grad_norm": 2.8125,
      "learning_rate": 3.131381574335849e-05,
      "loss": 0.8,
      "step": 373510
    },
    {
      "epoch": 1.3090944775643385,
      "grad_norm": 2.984375,
      "learning_rate": 3.1313166714694794e-05,
      "loss": 0.8214,
      "step": 373520
    },
    {
      "epoch": 1.309129525071234,
      "grad_norm": 2.859375,
      "learning_rate": 3.131251768603109e-05,
      "loss": 0.8738,
      "step": 373530
    },
    {
      "epoch": 1.3091645725781298,
      "grad_norm": 2.921875,
      "learning_rate": 3.131186865736739e-05,
      "loss": 0.9911,
      "step": 373540
    },
    {
      "epoch": 1.3091996200850253,
      "grad_norm": 3.109375,
      "learning_rate": 3.1311219628703684e-05,
      "loss": 0.8412,
      "step": 373550
    },
    {
      "epoch": 1.3092346675919209,
      "grad_norm": 2.796875,
      "learning_rate": 3.131057060003998e-05,
      "loss": 0.7812,
      "step": 373560
    },
    {
      "epoch": 1.3092697150988164,
      "grad_norm": 2.90625,
      "learning_rate": 3.130992157137628e-05,
      "loss": 0.7988,
      "step": 373570
    },
    {
      "epoch": 1.309304762605712,
      "grad_norm": 2.390625,
      "learning_rate": 3.1309272542712575e-05,
      "loss": 0.8881,
      "step": 373580
    },
    {
      "epoch": 1.3093398101126077,
      "grad_norm": 2.921875,
      "learning_rate": 3.1308623514048876e-05,
      "loss": 0.8555,
      "step": 373590
    },
    {
      "epoch": 1.3093748576195032,
      "grad_norm": 3.046875,
      "learning_rate": 3.130797448538517e-05,
      "loss": 0.8959,
      "step": 373600
    },
    {
      "epoch": 1.3094099051263988,
      "grad_norm": 2.828125,
      "learning_rate": 3.130732545672147e-05,
      "loss": 0.7822,
      "step": 373610
    },
    {
      "epoch": 1.3094449526332945,
      "grad_norm": 2.6875,
      "learning_rate": 3.130667642805777e-05,
      "loss": 0.7379,
      "step": 373620
    },
    {
      "epoch": 1.30948000014019,
      "grad_norm": 2.9375,
      "learning_rate": 3.130602739939407e-05,
      "loss": 0.7922,
      "step": 373630
    },
    {
      "epoch": 1.3095150476470856,
      "grad_norm": 2.40625,
      "learning_rate": 3.130537837073036e-05,
      "loss": 0.8036,
      "step": 373640
    },
    {
      "epoch": 1.3095500951539814,
      "grad_norm": 3.03125,
      "learning_rate": 3.1304729342066664e-05,
      "loss": 0.9295,
      "step": 373650
    },
    {
      "epoch": 1.3095851426608769,
      "grad_norm": 3.09375,
      "learning_rate": 3.130408031340296e-05,
      "loss": 0.8194,
      "step": 373660
    },
    {
      "epoch": 1.3096201901677724,
      "grad_norm": 2.9375,
      "learning_rate": 3.130343128473926e-05,
      "loss": 0.8502,
      "step": 373670
    },
    {
      "epoch": 1.309655237674668,
      "grad_norm": 3.015625,
      "learning_rate": 3.1302782256075555e-05,
      "loss": 0.8354,
      "step": 373680
    },
    {
      "epoch": 1.3096902851815635,
      "grad_norm": 2.625,
      "learning_rate": 3.1302133227411856e-05,
      "loss": 0.7938,
      "step": 373690
    },
    {
      "epoch": 1.3097253326884593,
      "grad_norm": 3.390625,
      "learning_rate": 3.130148419874816e-05,
      "loss": 0.8569,
      "step": 373700
    },
    {
      "epoch": 1.3097603801953548,
      "grad_norm": 3.390625,
      "learning_rate": 3.130083517008445e-05,
      "loss": 0.8327,
      "step": 373710
    },
    {
      "epoch": 1.3097954277022503,
      "grad_norm": 2.953125,
      "learning_rate": 3.1300186141420754e-05,
      "loss": 0.9281,
      "step": 373720
    },
    {
      "epoch": 1.309830475209146,
      "grad_norm": 2.703125,
      "learning_rate": 3.129953711275705e-05,
      "loss": 0.826,
      "step": 373730
    },
    {
      "epoch": 1.3098655227160416,
      "grad_norm": 3.140625,
      "learning_rate": 3.129888808409335e-05,
      "loss": 0.7666,
      "step": 373740
    },
    {
      "epoch": 1.3099005702229372,
      "grad_norm": 3.09375,
      "learning_rate": 3.1298239055429644e-05,
      "loss": 0.9179,
      "step": 373750
    },
    {
      "epoch": 1.309935617729833,
      "grad_norm": 3.125,
      "learning_rate": 3.1297590026765946e-05,
      "loss": 0.8495,
      "step": 373760
    },
    {
      "epoch": 1.3099706652367284,
      "grad_norm": 2.765625,
      "learning_rate": 3.129694099810224e-05,
      "loss": 0.8489,
      "step": 373770
    },
    {
      "epoch": 1.310005712743624,
      "grad_norm": 2.75,
      "learning_rate": 3.129629196943854e-05,
      "loss": 0.7769,
      "step": 373780
    },
    {
      "epoch": 1.3100407602505195,
      "grad_norm": 3.015625,
      "learning_rate": 3.1295642940774836e-05,
      "loss": 0.9216,
      "step": 373790
    },
    {
      "epoch": 1.310075807757415,
      "grad_norm": 2.734375,
      "learning_rate": 3.129499391211114e-05,
      "loss": 0.8488,
      "step": 373800
    },
    {
      "epoch": 1.3101108552643108,
      "grad_norm": 2.71875,
      "learning_rate": 3.129434488344743e-05,
      "loss": 0.817,
      "step": 373810
    },
    {
      "epoch": 1.3101459027712063,
      "grad_norm": 2.890625,
      "learning_rate": 3.1293695854783734e-05,
      "loss": 0.8139,
      "step": 373820
    },
    {
      "epoch": 1.3101809502781019,
      "grad_norm": 2.71875,
      "learning_rate": 3.129304682612003e-05,
      "loss": 0.7855,
      "step": 373830
    },
    {
      "epoch": 1.3102159977849976,
      "grad_norm": 2.890625,
      "learning_rate": 3.129239779745633e-05,
      "loss": 0.805,
      "step": 373840
    },
    {
      "epoch": 1.3102510452918932,
      "grad_norm": 3.0625,
      "learning_rate": 3.129174876879263e-05,
      "loss": 0.86,
      "step": 373850
    },
    {
      "epoch": 1.3102860927987887,
      "grad_norm": 3.046875,
      "learning_rate": 3.1291099740128926e-05,
      "loss": 0.9291,
      "step": 373860
    },
    {
      "epoch": 1.3103211403056845,
      "grad_norm": 3.265625,
      "learning_rate": 3.129045071146523e-05,
      "loss": 0.8572,
      "step": 373870
    },
    {
      "epoch": 1.31035618781258,
      "grad_norm": 2.796875,
      "learning_rate": 3.128980168280152e-05,
      "loss": 0.9042,
      "step": 373880
    },
    {
      "epoch": 1.3103912353194755,
      "grad_norm": 2.796875,
      "learning_rate": 3.128915265413782e-05,
      "loss": 0.8883,
      "step": 373890
    },
    {
      "epoch": 1.310426282826371,
      "grad_norm": 3.03125,
      "learning_rate": 3.128850362547412e-05,
      "loss": 0.8934,
      "step": 373900
    },
    {
      "epoch": 1.3104613303332666,
      "grad_norm": 3.03125,
      "learning_rate": 3.128785459681042e-05,
      "loss": 0.8227,
      "step": 373910
    },
    {
      "epoch": 1.3104963778401624,
      "grad_norm": 2.9375,
      "learning_rate": 3.1287205568146714e-05,
      "loss": 0.8536,
      "step": 373920
    },
    {
      "epoch": 1.310531425347058,
      "grad_norm": 3.109375,
      "learning_rate": 3.128655653948301e-05,
      "loss": 0.798,
      "step": 373930
    },
    {
      "epoch": 1.3105664728539534,
      "grad_norm": 2.71875,
      "learning_rate": 3.128590751081931e-05,
      "loss": 0.862,
      "step": 373940
    },
    {
      "epoch": 1.3106015203608492,
      "grad_norm": 2.84375,
      "learning_rate": 3.1285258482155604e-05,
      "loss": 0.8645,
      "step": 373950
    },
    {
      "epoch": 1.3106365678677447,
      "grad_norm": 2.59375,
      "learning_rate": 3.1284609453491906e-05,
      "loss": 0.8848,
      "step": 373960
    },
    {
      "epoch": 1.3106716153746403,
      "grad_norm": 3.234375,
      "learning_rate": 3.12839604248282e-05,
      "loss": 0.8997,
      "step": 373970
    },
    {
      "epoch": 1.310706662881536,
      "grad_norm": 2.53125,
      "learning_rate": 3.12833113961645e-05,
      "loss": 0.8765,
      "step": 373980
    },
    {
      "epoch": 1.3107417103884316,
      "grad_norm": 2.953125,
      "learning_rate": 3.1282662367500796e-05,
      "loss": 0.9292,
      "step": 373990
    },
    {
      "epoch": 1.310776757895327,
      "grad_norm": 2.578125,
      "learning_rate": 3.12820133388371e-05,
      "loss": 0.7873,
      "step": 374000
    },
    {
      "epoch": 1.3108118054022226,
      "grad_norm": 2.9375,
      "learning_rate": 3.128136431017339e-05,
      "loss": 0.9128,
      "step": 374010
    },
    {
      "epoch": 1.3108468529091182,
      "grad_norm": 2.71875,
      "learning_rate": 3.1280715281509694e-05,
      "loss": 0.7823,
      "step": 374020
    },
    {
      "epoch": 1.310881900416014,
      "grad_norm": 3.25,
      "learning_rate": 3.128006625284599e-05,
      "loss": 0.8482,
      "step": 374030
    },
    {
      "epoch": 1.3109169479229095,
      "grad_norm": 2.71875,
      "learning_rate": 3.127941722418229e-05,
      "loss": 0.8368,
      "step": 374040
    },
    {
      "epoch": 1.310951995429805,
      "grad_norm": 2.96875,
      "learning_rate": 3.1278768195518584e-05,
      "loss": 0.9357,
      "step": 374050
    },
    {
      "epoch": 1.3109870429367008,
      "grad_norm": 2.4375,
      "learning_rate": 3.1278119166854886e-05,
      "loss": 0.7954,
      "step": 374060
    },
    {
      "epoch": 1.3110220904435963,
      "grad_norm": 3.1875,
      "learning_rate": 3.127747013819119e-05,
      "loss": 0.8574,
      "step": 374070
    },
    {
      "epoch": 1.3110571379504918,
      "grad_norm": 2.421875,
      "learning_rate": 3.127682110952748e-05,
      "loss": 0.8621,
      "step": 374080
    },
    {
      "epoch": 1.3110921854573876,
      "grad_norm": 2.953125,
      "learning_rate": 3.127617208086378e-05,
      "loss": 0.8758,
      "step": 374090
    },
    {
      "epoch": 1.3111272329642831,
      "grad_norm": 3.390625,
      "learning_rate": 3.127552305220008e-05,
      "loss": 0.8327,
      "step": 374100
    },
    {
      "epoch": 1.3111622804711787,
      "grad_norm": 2.609375,
      "learning_rate": 3.127487402353638e-05,
      "loss": 0.8375,
      "step": 374110
    },
    {
      "epoch": 1.3111973279780742,
      "grad_norm": 3.171875,
      "learning_rate": 3.1274224994872674e-05,
      "loss": 0.874,
      "step": 374120
    },
    {
      "epoch": 1.31123237548497,
      "grad_norm": 3.171875,
      "learning_rate": 3.1273575966208975e-05,
      "loss": 0.7875,
      "step": 374130
    },
    {
      "epoch": 1.3112674229918655,
      "grad_norm": 3.015625,
      "learning_rate": 3.127292693754527e-05,
      "loss": 0.8468,
      "step": 374140
    },
    {
      "epoch": 1.311302470498761,
      "grad_norm": 3.125,
      "learning_rate": 3.127227790888157e-05,
      "loss": 0.8374,
      "step": 374150
    },
    {
      "epoch": 1.3113375180056566,
      "grad_norm": 3.203125,
      "learning_rate": 3.1271628880217866e-05,
      "loss": 0.7823,
      "step": 374160
    },
    {
      "epoch": 1.3113725655125523,
      "grad_norm": 2.65625,
      "learning_rate": 3.127097985155417e-05,
      "loss": 0.9109,
      "step": 374170
    },
    {
      "epoch": 1.3114076130194479,
      "grad_norm": 3.359375,
      "learning_rate": 3.127033082289046e-05,
      "loss": 0.8111,
      "step": 374180
    },
    {
      "epoch": 1.3114426605263434,
      "grad_norm": 2.75,
      "learning_rate": 3.126968179422676e-05,
      "loss": 0.7701,
      "step": 374190
    },
    {
      "epoch": 1.3114777080332392,
      "grad_norm": 3.34375,
      "learning_rate": 3.1269032765563064e-05,
      "loss": 0.9065,
      "step": 374200
    },
    {
      "epoch": 1.3115127555401347,
      "grad_norm": 2.703125,
      "learning_rate": 3.126838373689936e-05,
      "loss": 0.8833,
      "step": 374210
    },
    {
      "epoch": 1.3115478030470302,
      "grad_norm": 2.90625,
      "learning_rate": 3.126773470823566e-05,
      "loss": 0.9421,
      "step": 374220
    },
    {
      "epoch": 1.311582850553926,
      "grad_norm": 2.734375,
      "learning_rate": 3.1267085679571955e-05,
      "loss": 0.8014,
      "step": 374230
    },
    {
      "epoch": 1.3116178980608215,
      "grad_norm": 2.453125,
      "learning_rate": 3.1266436650908256e-05,
      "loss": 0.8653,
      "step": 374240
    },
    {
      "epoch": 1.311652945567717,
      "grad_norm": 2.78125,
      "learning_rate": 3.126578762224455e-05,
      "loss": 0.8271,
      "step": 374250
    },
    {
      "epoch": 1.3116879930746126,
      "grad_norm": 2.5625,
      "learning_rate": 3.126513859358085e-05,
      "loss": 0.7911,
      "step": 374260
    },
    {
      "epoch": 1.3117230405815081,
      "grad_norm": 2.875,
      "learning_rate": 3.126448956491715e-05,
      "loss": 0.8278,
      "step": 374270
    },
    {
      "epoch": 1.3117580880884039,
      "grad_norm": 2.78125,
      "learning_rate": 3.126384053625345e-05,
      "loss": 0.8801,
      "step": 374280
    },
    {
      "epoch": 1.3117931355952994,
      "grad_norm": 2.859375,
      "learning_rate": 3.126319150758974e-05,
      "loss": 0.8516,
      "step": 374290
    },
    {
      "epoch": 1.311828183102195,
      "grad_norm": 3.140625,
      "learning_rate": 3.1262542478926044e-05,
      "loss": 0.9141,
      "step": 374300
    },
    {
      "epoch": 1.3118632306090907,
      "grad_norm": 3.359375,
      "learning_rate": 3.126189345026234e-05,
      "loss": 0.9352,
      "step": 374310
    },
    {
      "epoch": 1.3118982781159862,
      "grad_norm": 3.296875,
      "learning_rate": 3.1261244421598634e-05,
      "loss": 0.8743,
      "step": 374320
    },
    {
      "epoch": 1.3119333256228818,
      "grad_norm": 3.078125,
      "learning_rate": 3.1260595392934935e-05,
      "loss": 0.8849,
      "step": 374330
    },
    {
      "epoch": 1.3119683731297775,
      "grad_norm": 2.875,
      "learning_rate": 3.125994636427123e-05,
      "loss": 0.8254,
      "step": 374340
    },
    {
      "epoch": 1.312003420636673,
      "grad_norm": 2.46875,
      "learning_rate": 3.125929733560753e-05,
      "loss": 0.8489,
      "step": 374350
    },
    {
      "epoch": 1.3120384681435686,
      "grad_norm": 2.65625,
      "learning_rate": 3.1258648306943826e-05,
      "loss": 0.8678,
      "step": 374360
    },
    {
      "epoch": 1.3120735156504642,
      "grad_norm": 2.9375,
      "learning_rate": 3.125799927828013e-05,
      "loss": 0.8732,
      "step": 374370
    },
    {
      "epoch": 1.3121085631573597,
      "grad_norm": 2.8125,
      "learning_rate": 3.125735024961642e-05,
      "loss": 0.8095,
      "step": 374380
    },
    {
      "epoch": 1.3121436106642554,
      "grad_norm": 2.953125,
      "learning_rate": 3.125670122095272e-05,
      "loss": 0.9126,
      "step": 374390
    },
    {
      "epoch": 1.312178658171151,
      "grad_norm": 2.953125,
      "learning_rate": 3.125605219228902e-05,
      "loss": 0.9509,
      "step": 374400
    },
    {
      "epoch": 1.3122137056780465,
      "grad_norm": 3.046875,
      "learning_rate": 3.125540316362532e-05,
      "loss": 0.8752,
      "step": 374410
    },
    {
      "epoch": 1.3122487531849423,
      "grad_norm": 2.453125,
      "learning_rate": 3.1254754134961614e-05,
      "loss": 0.8972,
      "step": 374420
    },
    {
      "epoch": 1.3122838006918378,
      "grad_norm": 2.90625,
      "learning_rate": 3.1254105106297915e-05,
      "loss": 0.8806,
      "step": 374430
    },
    {
      "epoch": 1.3123188481987333,
      "grad_norm": 3.578125,
      "learning_rate": 3.1253456077634216e-05,
      "loss": 0.9031,
      "step": 374440
    },
    {
      "epoch": 1.312353895705629,
      "grad_norm": 3.046875,
      "learning_rate": 3.125280704897051e-05,
      "loss": 0.7319,
      "step": 374450
    },
    {
      "epoch": 1.3123889432125246,
      "grad_norm": 2.453125,
      "learning_rate": 3.125215802030681e-05,
      "loss": 0.8601,
      "step": 374460
    },
    {
      "epoch": 1.3124239907194202,
      "grad_norm": 2.71875,
      "learning_rate": 3.125150899164311e-05,
      "loss": 0.9106,
      "step": 374470
    },
    {
      "epoch": 1.3124590382263157,
      "grad_norm": 2.984375,
      "learning_rate": 3.125085996297941e-05,
      "loss": 0.9,
      "step": 374480
    },
    {
      "epoch": 1.3124940857332112,
      "grad_norm": 2.84375,
      "learning_rate": 3.12502109343157e-05,
      "loss": 0.8187,
      "step": 374490
    },
    {
      "epoch": 1.312529133240107,
      "grad_norm": 2.78125,
      "learning_rate": 3.1249561905652004e-05,
      "loss": 0.8896,
      "step": 374500
    },
    {
      "epoch": 1.3125641807470025,
      "grad_norm": 2.734375,
      "learning_rate": 3.12489128769883e-05,
      "loss": 0.8198,
      "step": 374510
    },
    {
      "epoch": 1.312599228253898,
      "grad_norm": 2.984375,
      "learning_rate": 3.12482638483246e-05,
      "loss": 0.8543,
      "step": 374520
    },
    {
      "epoch": 1.3126342757607938,
      "grad_norm": 2.875,
      "learning_rate": 3.1247614819660895e-05,
      "loss": 0.8137,
      "step": 374530
    },
    {
      "epoch": 1.3126693232676894,
      "grad_norm": 2.765625,
      "learning_rate": 3.1246965790997196e-05,
      "loss": 0.8704,
      "step": 374540
    },
    {
      "epoch": 1.312704370774585,
      "grad_norm": 3.21875,
      "learning_rate": 3.124631676233349e-05,
      "loss": 0.8491,
      "step": 374550
    },
    {
      "epoch": 1.3127394182814807,
      "grad_norm": 2.953125,
      "learning_rate": 3.124566773366979e-05,
      "loss": 0.813,
      "step": 374560
    },
    {
      "epoch": 1.3127744657883762,
      "grad_norm": 3.453125,
      "learning_rate": 3.1245018705006094e-05,
      "loss": 0.8254,
      "step": 374570
    },
    {
      "epoch": 1.3128095132952717,
      "grad_norm": 2.5625,
      "learning_rate": 3.124436967634239e-05,
      "loss": 0.8718,
      "step": 374580
    },
    {
      "epoch": 1.3128445608021673,
      "grad_norm": 2.90625,
      "learning_rate": 3.124372064767869e-05,
      "loss": 0.8997,
      "step": 374590
    },
    {
      "epoch": 1.3128796083090628,
      "grad_norm": 2.859375,
      "learning_rate": 3.1243071619014984e-05,
      "loss": 0.8881,
      "step": 374600
    },
    {
      "epoch": 1.3129146558159586,
      "grad_norm": 3.125,
      "learning_rate": 3.1242422590351286e-05,
      "loss": 0.8621,
      "step": 374610
    },
    {
      "epoch": 1.312949703322854,
      "grad_norm": 2.875,
      "learning_rate": 3.124177356168758e-05,
      "loss": 0.8949,
      "step": 374620
    },
    {
      "epoch": 1.3129847508297496,
      "grad_norm": 2.609375,
      "learning_rate": 3.124112453302388e-05,
      "loss": 0.8117,
      "step": 374630
    },
    {
      "epoch": 1.3130197983366454,
      "grad_norm": 3.109375,
      "learning_rate": 3.1240475504360176e-05,
      "loss": 0.8805,
      "step": 374640
    },
    {
      "epoch": 1.313054845843541,
      "grad_norm": 2.65625,
      "learning_rate": 3.123982647569648e-05,
      "loss": 0.7834,
      "step": 374650
    },
    {
      "epoch": 1.3130898933504365,
      "grad_norm": 2.875,
      "learning_rate": 3.123917744703277e-05,
      "loss": 0.9004,
      "step": 374660
    },
    {
      "epoch": 1.3131249408573322,
      "grad_norm": 2.921875,
      "learning_rate": 3.1238528418369074e-05,
      "loss": 0.8212,
      "step": 374670
    },
    {
      "epoch": 1.3131599883642278,
      "grad_norm": 2.765625,
      "learning_rate": 3.123787938970537e-05,
      "loss": 0.8272,
      "step": 374680
    },
    {
      "epoch": 1.3131950358711233,
      "grad_norm": 2.765625,
      "learning_rate": 3.123723036104166e-05,
      "loss": 0.761,
      "step": 374690
    },
    {
      "epoch": 1.3132300833780188,
      "grad_norm": 3.46875,
      "learning_rate": 3.1236581332377964e-05,
      "loss": 0.8266,
      "step": 374700
    },
    {
      "epoch": 1.3132651308849144,
      "grad_norm": 3.015625,
      "learning_rate": 3.123593230371426e-05,
      "loss": 0.8345,
      "step": 374710
    },
    {
      "epoch": 1.3133001783918101,
      "grad_norm": 3.15625,
      "learning_rate": 3.123528327505056e-05,
      "loss": 0.8728,
      "step": 374720
    },
    {
      "epoch": 1.3133352258987057,
      "grad_norm": 3.125,
      "learning_rate": 3.1234634246386855e-05,
      "loss": 0.7627,
      "step": 374730
    },
    {
      "epoch": 1.3133702734056012,
      "grad_norm": 2.921875,
      "learning_rate": 3.1233985217723156e-05,
      "loss": 0.8368,
      "step": 374740
    },
    {
      "epoch": 1.313405320912497,
      "grad_norm": 2.953125,
      "learning_rate": 3.123333618905945e-05,
      "loss": 0.7818,
      "step": 374750
    },
    {
      "epoch": 1.3134403684193925,
      "grad_norm": 2.78125,
      "learning_rate": 3.123268716039575e-05,
      "loss": 0.8845,
      "step": 374760
    },
    {
      "epoch": 1.313475415926288,
      "grad_norm": 2.9375,
      "learning_rate": 3.123203813173205e-05,
      "loss": 0.798,
      "step": 374770
    },
    {
      "epoch": 1.3135104634331838,
      "grad_norm": 2.859375,
      "learning_rate": 3.123138910306835e-05,
      "loss": 0.8029,
      "step": 374780
    },
    {
      "epoch": 1.3135455109400793,
      "grad_norm": 2.828125,
      "learning_rate": 3.123074007440464e-05,
      "loss": 0.8746,
      "step": 374790
    },
    {
      "epoch": 1.3135805584469749,
      "grad_norm": 2.609375,
      "learning_rate": 3.1230091045740944e-05,
      "loss": 0.8104,
      "step": 374800
    },
    {
      "epoch": 1.3136156059538704,
      "grad_norm": 3.03125,
      "learning_rate": 3.1229442017077246e-05,
      "loss": 0.7998,
      "step": 374810
    },
    {
      "epoch": 1.3136506534607661,
      "grad_norm": 3.234375,
      "learning_rate": 3.122879298841354e-05,
      "loss": 0.8702,
      "step": 374820
    },
    {
      "epoch": 1.3136857009676617,
      "grad_norm": 2.703125,
      "learning_rate": 3.122814395974984e-05,
      "loss": 0.8643,
      "step": 374830
    },
    {
      "epoch": 1.3137207484745572,
      "grad_norm": 2.78125,
      "learning_rate": 3.1227494931086136e-05,
      "loss": 0.7847,
      "step": 374840
    },
    {
      "epoch": 1.3137557959814528,
      "grad_norm": 2.9375,
      "learning_rate": 3.122684590242244e-05,
      "loss": 0.777,
      "step": 374850
    },
    {
      "epoch": 1.3137908434883485,
      "grad_norm": 2.765625,
      "learning_rate": 3.122619687375873e-05,
      "loss": 0.9207,
      "step": 374860
    },
    {
      "epoch": 1.313825890995244,
      "grad_norm": 2.828125,
      "learning_rate": 3.1225547845095034e-05,
      "loss": 0.9192,
      "step": 374870
    },
    {
      "epoch": 1.3138609385021396,
      "grad_norm": 2.9375,
      "learning_rate": 3.122489881643133e-05,
      "loss": 0.7905,
      "step": 374880
    },
    {
      "epoch": 1.3138959860090353,
      "grad_norm": 2.921875,
      "learning_rate": 3.122424978776763e-05,
      "loss": 0.865,
      "step": 374890
    },
    {
      "epoch": 1.3139310335159309,
      "grad_norm": 3.390625,
      "learning_rate": 3.1223600759103924e-05,
      "loss": 0.831,
      "step": 374900
    },
    {
      "epoch": 1.3139660810228264,
      "grad_norm": 2.734375,
      "learning_rate": 3.1222951730440226e-05,
      "loss": 0.8454,
      "step": 374910
    },
    {
      "epoch": 1.3140011285297222,
      "grad_norm": 3.375,
      "learning_rate": 3.122230270177652e-05,
      "loss": 0.9232,
      "step": 374920
    },
    {
      "epoch": 1.3140361760366177,
      "grad_norm": 3.234375,
      "learning_rate": 3.122165367311282e-05,
      "loss": 0.8113,
      "step": 374930
    },
    {
      "epoch": 1.3140712235435132,
      "grad_norm": 2.5625,
      "learning_rate": 3.122100464444912e-05,
      "loss": 0.7951,
      "step": 374940
    },
    {
      "epoch": 1.3141062710504088,
      "grad_norm": 2.609375,
      "learning_rate": 3.122035561578542e-05,
      "loss": 0.812,
      "step": 374950
    },
    {
      "epoch": 1.3141413185573043,
      "grad_norm": 2.859375,
      "learning_rate": 3.121970658712172e-05,
      "loss": 0.8675,
      "step": 374960
    },
    {
      "epoch": 1.3141763660642,
      "grad_norm": 2.5625,
      "learning_rate": 3.1219057558458014e-05,
      "loss": 0.7952,
      "step": 374970
    },
    {
      "epoch": 1.3142114135710956,
      "grad_norm": 3.078125,
      "learning_rate": 3.1218408529794315e-05,
      "loss": 0.8362,
      "step": 374980
    },
    {
      "epoch": 1.3142464610779911,
      "grad_norm": 3.0,
      "learning_rate": 3.121775950113061e-05,
      "loss": 0.8524,
      "step": 374990
    },
    {
      "epoch": 1.314281508584887,
      "grad_norm": 2.609375,
      "learning_rate": 3.121711047246691e-05,
      "loss": 0.8527,
      "step": 375000
    },
    {
      "epoch": 1.314281508584887,
      "eval_loss": 0.7993927001953125,
      "eval_runtime": 557.5307,
      "eval_samples_per_second": 682.359,
      "eval_steps_per_second": 56.863,
      "step": 375000
    },
    {
      "epoch": 1.3143165560917824,
      "grad_norm": 2.953125,
      "learning_rate": 3.1216461443803206e-05,
      "loss": 0.8917,
      "step": 375010
    },
    {
      "epoch": 1.314351603598678,
      "grad_norm": 2.859375,
      "learning_rate": 3.121581241513951e-05,
      "loss": 0.7948,
      "step": 375020
    },
    {
      "epoch": 1.3143866511055737,
      "grad_norm": 3.0625,
      "learning_rate": 3.12151633864758e-05,
      "loss": 0.8838,
      "step": 375030
    },
    {
      "epoch": 1.3144216986124693,
      "grad_norm": 3.171875,
      "learning_rate": 3.12145143578121e-05,
      "loss": 0.9149,
      "step": 375040
    },
    {
      "epoch": 1.3144567461193648,
      "grad_norm": 3.265625,
      "learning_rate": 3.12138653291484e-05,
      "loss": 0.8684,
      "step": 375050
    },
    {
      "epoch": 1.3144917936262603,
      "grad_norm": 3.15625,
      "learning_rate": 3.121321630048469e-05,
      "loss": 0.9285,
      "step": 375060
    },
    {
      "epoch": 1.3145268411331559,
      "grad_norm": 2.90625,
      "learning_rate": 3.1212567271820994e-05,
      "loss": 0.8564,
      "step": 375070
    },
    {
      "epoch": 1.3145618886400516,
      "grad_norm": 2.96875,
      "learning_rate": 3.121191824315729e-05,
      "loss": 0.9395,
      "step": 375080
    },
    {
      "epoch": 1.3145969361469472,
      "grad_norm": 3.1875,
      "learning_rate": 3.121126921449359e-05,
      "loss": 0.8354,
      "step": 375090
    },
    {
      "epoch": 1.3146319836538427,
      "grad_norm": 3.25,
      "learning_rate": 3.1210620185829884e-05,
      "loss": 0.8574,
      "step": 375100
    },
    {
      "epoch": 1.3146670311607385,
      "grad_norm": 3.265625,
      "learning_rate": 3.1209971157166186e-05,
      "loss": 0.9094,
      "step": 375110
    },
    {
      "epoch": 1.314702078667634,
      "grad_norm": 3.140625,
      "learning_rate": 3.120932212850248e-05,
      "loss": 0.8351,
      "step": 375120
    },
    {
      "epoch": 1.3147371261745295,
      "grad_norm": 2.328125,
      "learning_rate": 3.120867309983878e-05,
      "loss": 0.7741,
      "step": 375130
    },
    {
      "epoch": 1.3147721736814253,
      "grad_norm": 3.125,
      "learning_rate": 3.1208024071175076e-05,
      "loss": 0.8694,
      "step": 375140
    },
    {
      "epoch": 1.3148072211883208,
      "grad_norm": 3.078125,
      "learning_rate": 3.120737504251138e-05,
      "loss": 0.7737,
      "step": 375150
    },
    {
      "epoch": 1.3148422686952164,
      "grad_norm": 2.796875,
      "learning_rate": 3.120672601384767e-05,
      "loss": 0.8406,
      "step": 375160
    },
    {
      "epoch": 1.314877316202112,
      "grad_norm": 2.84375,
      "learning_rate": 3.1206076985183974e-05,
      "loss": 0.8893,
      "step": 375170
    },
    {
      "epoch": 1.3149123637090074,
      "grad_norm": 2.859375,
      "learning_rate": 3.1205427956520275e-05,
      "loss": 0.7685,
      "step": 375180
    },
    {
      "epoch": 1.3149474112159032,
      "grad_norm": 2.671875,
      "learning_rate": 3.120477892785657e-05,
      "loss": 0.8242,
      "step": 375190
    },
    {
      "epoch": 1.3149824587227987,
      "grad_norm": 2.96875,
      "learning_rate": 3.120412989919287e-05,
      "loss": 0.8913,
      "step": 375200
    },
    {
      "epoch": 1.3150175062296943,
      "grad_norm": 2.5625,
      "learning_rate": 3.1203480870529166e-05,
      "loss": 0.7909,
      "step": 375210
    },
    {
      "epoch": 1.31505255373659,
      "grad_norm": 2.578125,
      "learning_rate": 3.120283184186547e-05,
      "loss": 0.8361,
      "step": 375220
    },
    {
      "epoch": 1.3150876012434856,
      "grad_norm": 3.015625,
      "learning_rate": 3.120218281320176e-05,
      "loss": 0.9121,
      "step": 375230
    },
    {
      "epoch": 1.315122648750381,
      "grad_norm": 2.859375,
      "learning_rate": 3.120153378453806e-05,
      "loss": 0.8246,
      "step": 375240
    },
    {
      "epoch": 1.3151576962572769,
      "grad_norm": 2.734375,
      "learning_rate": 3.120088475587436e-05,
      "loss": 0.9543,
      "step": 375250
    },
    {
      "epoch": 1.3151927437641724,
      "grad_norm": 3.0625,
      "learning_rate": 3.120023572721066e-05,
      "loss": 0.8916,
      "step": 375260
    },
    {
      "epoch": 1.315227791271068,
      "grad_norm": 3.15625,
      "learning_rate": 3.1199586698546954e-05,
      "loss": 0.9076,
      "step": 375270
    },
    {
      "epoch": 1.3152628387779635,
      "grad_norm": 2.765625,
      "learning_rate": 3.1198937669883255e-05,
      "loss": 0.8752,
      "step": 375280
    },
    {
      "epoch": 1.315297886284859,
      "grad_norm": 2.90625,
      "learning_rate": 3.119828864121955e-05,
      "loss": 0.7986,
      "step": 375290
    },
    {
      "epoch": 1.3153329337917548,
      "grad_norm": 3.453125,
      "learning_rate": 3.119763961255585e-05,
      "loss": 0.8807,
      "step": 375300
    },
    {
      "epoch": 1.3153679812986503,
      "grad_norm": 3.21875,
      "learning_rate": 3.119699058389215e-05,
      "loss": 0.8595,
      "step": 375310
    },
    {
      "epoch": 1.3154030288055458,
      "grad_norm": 3.15625,
      "learning_rate": 3.119634155522845e-05,
      "loss": 0.9076,
      "step": 375320
    },
    {
      "epoch": 1.3154380763124416,
      "grad_norm": 3.203125,
      "learning_rate": 3.119569252656475e-05,
      "loss": 0.8338,
      "step": 375330
    },
    {
      "epoch": 1.3154731238193371,
      "grad_norm": 3.53125,
      "learning_rate": 3.119504349790104e-05,
      "loss": 0.8374,
      "step": 375340
    },
    {
      "epoch": 1.3155081713262327,
      "grad_norm": 3.28125,
      "learning_rate": 3.1194394469237345e-05,
      "loss": 0.8772,
      "step": 375350
    },
    {
      "epoch": 1.3155432188331284,
      "grad_norm": 2.921875,
      "learning_rate": 3.119374544057364e-05,
      "loss": 0.7743,
      "step": 375360
    },
    {
      "epoch": 1.315578266340024,
      "grad_norm": 2.9375,
      "learning_rate": 3.119309641190994e-05,
      "loss": 0.9156,
      "step": 375370
    },
    {
      "epoch": 1.3156133138469195,
      "grad_norm": 3.203125,
      "learning_rate": 3.1192447383246235e-05,
      "loss": 0.8698,
      "step": 375380
    },
    {
      "epoch": 1.315648361353815,
      "grad_norm": 2.578125,
      "learning_rate": 3.1191798354582537e-05,
      "loss": 0.8405,
      "step": 375390
    },
    {
      "epoch": 1.3156834088607106,
      "grad_norm": 2.796875,
      "learning_rate": 3.119114932591883e-05,
      "loss": 0.8374,
      "step": 375400
    },
    {
      "epoch": 1.3157184563676063,
      "grad_norm": 2.984375,
      "learning_rate": 3.119050029725513e-05,
      "loss": 0.8841,
      "step": 375410
    },
    {
      "epoch": 1.3157535038745019,
      "grad_norm": 2.734375,
      "learning_rate": 3.118985126859143e-05,
      "loss": 0.9023,
      "step": 375420
    },
    {
      "epoch": 1.3157885513813974,
      "grad_norm": 3.125,
      "learning_rate": 3.118920223992773e-05,
      "loss": 0.9605,
      "step": 375430
    },
    {
      "epoch": 1.3158235988882931,
      "grad_norm": 2.984375,
      "learning_rate": 3.118855321126402e-05,
      "loss": 0.8572,
      "step": 375440
    },
    {
      "epoch": 1.3158586463951887,
      "grad_norm": 3.046875,
      "learning_rate": 3.118790418260032e-05,
      "loss": 0.8281,
      "step": 375450
    },
    {
      "epoch": 1.3158936939020842,
      "grad_norm": 3.390625,
      "learning_rate": 3.118725515393662e-05,
      "loss": 0.8536,
      "step": 375460
    },
    {
      "epoch": 1.31592874140898,
      "grad_norm": 2.671875,
      "learning_rate": 3.1186606125272914e-05,
      "loss": 0.8234,
      "step": 375470
    },
    {
      "epoch": 1.3159637889158755,
      "grad_norm": 3.109375,
      "learning_rate": 3.1185957096609215e-05,
      "loss": 0.8866,
      "step": 375480
    },
    {
      "epoch": 1.315998836422771,
      "grad_norm": 3.328125,
      "learning_rate": 3.118530806794551e-05,
      "loss": 0.8511,
      "step": 375490
    },
    {
      "epoch": 1.3160338839296666,
      "grad_norm": 2.65625,
      "learning_rate": 3.118465903928181e-05,
      "loss": 0.8822,
      "step": 375500
    },
    {
      "epoch": 1.3160689314365623,
      "grad_norm": 3.0,
      "learning_rate": 3.1184010010618106e-05,
      "loss": 0.799,
      "step": 375510
    },
    {
      "epoch": 1.3161039789434579,
      "grad_norm": 3.65625,
      "learning_rate": 3.118336098195441e-05,
      "loss": 0.9137,
      "step": 375520
    },
    {
      "epoch": 1.3161390264503534,
      "grad_norm": 2.9375,
      "learning_rate": 3.118271195329071e-05,
      "loss": 0.8655,
      "step": 375530
    },
    {
      "epoch": 1.316174073957249,
      "grad_norm": 3.046875,
      "learning_rate": 3.1182062924627e-05,
      "loss": 0.807,
      "step": 375540
    },
    {
      "epoch": 1.3162091214641447,
      "grad_norm": 2.640625,
      "learning_rate": 3.1181413895963305e-05,
      "loss": 0.9087,
      "step": 375550
    },
    {
      "epoch": 1.3162441689710402,
      "grad_norm": 3.078125,
      "learning_rate": 3.11807648672996e-05,
      "loss": 0.8914,
      "step": 375560
    },
    {
      "epoch": 1.3162792164779358,
      "grad_norm": 2.796875,
      "learning_rate": 3.11801158386359e-05,
      "loss": 0.8373,
      "step": 375570
    },
    {
      "epoch": 1.3163142639848315,
      "grad_norm": 2.90625,
      "learning_rate": 3.1179466809972195e-05,
      "loss": 0.8166,
      "step": 375580
    },
    {
      "epoch": 1.316349311491727,
      "grad_norm": 3.09375,
      "learning_rate": 3.1178817781308497e-05,
      "loss": 0.8578,
      "step": 375590
    },
    {
      "epoch": 1.3163843589986226,
      "grad_norm": 2.671875,
      "learning_rate": 3.117816875264479e-05,
      "loss": 0.8267,
      "step": 375600
    },
    {
      "epoch": 1.3164194065055184,
      "grad_norm": 2.84375,
      "learning_rate": 3.117751972398109e-05,
      "loss": 0.8718,
      "step": 375610
    },
    {
      "epoch": 1.316454454012414,
      "grad_norm": 3.1875,
      "learning_rate": 3.117687069531739e-05,
      "loss": 0.8759,
      "step": 375620
    },
    {
      "epoch": 1.3164895015193094,
      "grad_norm": 2.9375,
      "learning_rate": 3.117622166665369e-05,
      "loss": 0.8015,
      "step": 375630
    },
    {
      "epoch": 1.316524549026205,
      "grad_norm": 3.046875,
      "learning_rate": 3.117557263798998e-05,
      "loss": 0.8756,
      "step": 375640
    },
    {
      "epoch": 1.3165595965331005,
      "grad_norm": 3.09375,
      "learning_rate": 3.1174923609326285e-05,
      "loss": 0.8507,
      "step": 375650
    },
    {
      "epoch": 1.3165946440399963,
      "grad_norm": 2.921875,
      "learning_rate": 3.117427458066258e-05,
      "loss": 0.8731,
      "step": 375660
    },
    {
      "epoch": 1.3166296915468918,
      "grad_norm": 3.171875,
      "learning_rate": 3.117362555199888e-05,
      "loss": 0.8777,
      "step": 375670
    },
    {
      "epoch": 1.3166647390537873,
      "grad_norm": 2.46875,
      "learning_rate": 3.117297652333518e-05,
      "loss": 0.8022,
      "step": 375680
    },
    {
      "epoch": 1.316699786560683,
      "grad_norm": 5.53125,
      "learning_rate": 3.1172327494671477e-05,
      "loss": 0.815,
      "step": 375690
    },
    {
      "epoch": 1.3167348340675786,
      "grad_norm": 2.921875,
      "learning_rate": 3.117167846600778e-05,
      "loss": 0.8731,
      "step": 375700
    },
    {
      "epoch": 1.3167698815744742,
      "grad_norm": 2.859375,
      "learning_rate": 3.117102943734407e-05,
      "loss": 0.8803,
      "step": 375710
    },
    {
      "epoch": 1.31680492908137,
      "grad_norm": 3.09375,
      "learning_rate": 3.1170380408680374e-05,
      "loss": 0.8433,
      "step": 375720
    },
    {
      "epoch": 1.3168399765882655,
      "grad_norm": 2.84375,
      "learning_rate": 3.116973138001667e-05,
      "loss": 0.8873,
      "step": 375730
    },
    {
      "epoch": 1.316875024095161,
      "grad_norm": 2.84375,
      "learning_rate": 3.116908235135297e-05,
      "loss": 0.9321,
      "step": 375740
    },
    {
      "epoch": 1.3169100716020565,
      "grad_norm": 2.96875,
      "learning_rate": 3.1168433322689265e-05,
      "loss": 0.827,
      "step": 375750
    },
    {
      "epoch": 1.316945119108952,
      "grad_norm": 3.140625,
      "learning_rate": 3.1167784294025566e-05,
      "loss": 0.84,
      "step": 375760
    },
    {
      "epoch": 1.3169801666158478,
      "grad_norm": 2.921875,
      "learning_rate": 3.116713526536186e-05,
      "loss": 0.8661,
      "step": 375770
    },
    {
      "epoch": 1.3170152141227434,
      "grad_norm": 2.59375,
      "learning_rate": 3.116648623669816e-05,
      "loss": 0.8504,
      "step": 375780
    },
    {
      "epoch": 1.317050261629639,
      "grad_norm": 3.046875,
      "learning_rate": 3.1165837208034457e-05,
      "loss": 0.8849,
      "step": 375790
    },
    {
      "epoch": 1.3170853091365347,
      "grad_norm": 2.875,
      "learning_rate": 3.116518817937076e-05,
      "loss": 0.7894,
      "step": 375800
    },
    {
      "epoch": 1.3171203566434302,
      "grad_norm": 2.71875,
      "learning_rate": 3.116453915070705e-05,
      "loss": 0.8297,
      "step": 375810
    },
    {
      "epoch": 1.3171554041503257,
      "grad_norm": 2.890625,
      "learning_rate": 3.116389012204335e-05,
      "loss": 0.9116,
      "step": 375820
    },
    {
      "epoch": 1.3171904516572215,
      "grad_norm": 3.0,
      "learning_rate": 3.116324109337965e-05,
      "loss": 0.8799,
      "step": 375830
    },
    {
      "epoch": 1.317225499164117,
      "grad_norm": 3.203125,
      "learning_rate": 3.116259206471594e-05,
      "loss": 0.8717,
      "step": 375840
    },
    {
      "epoch": 1.3172605466710126,
      "grad_norm": 2.78125,
      "learning_rate": 3.1161943036052245e-05,
      "loss": 0.8281,
      "step": 375850
    },
    {
      "epoch": 1.317295594177908,
      "grad_norm": 3.015625,
      "learning_rate": 3.116129400738854e-05,
      "loss": 0.9478,
      "step": 375860
    },
    {
      "epoch": 1.3173306416848036,
      "grad_norm": 3.09375,
      "learning_rate": 3.116064497872484e-05,
      "loss": 0.8609,
      "step": 375870
    },
    {
      "epoch": 1.3173656891916994,
      "grad_norm": 2.8125,
      "learning_rate": 3.1159995950061135e-05,
      "loss": 0.8187,
      "step": 375880
    },
    {
      "epoch": 1.317400736698595,
      "grad_norm": 2.40625,
      "learning_rate": 3.1159346921397437e-05,
      "loss": 0.8245,
      "step": 375890
    },
    {
      "epoch": 1.3174357842054905,
      "grad_norm": 2.84375,
      "learning_rate": 3.115869789273374e-05,
      "loss": 0.8329,
      "step": 375900
    },
    {
      "epoch": 1.3174708317123862,
      "grad_norm": 2.703125,
      "learning_rate": 3.115804886407003e-05,
      "loss": 0.8667,
      "step": 375910
    },
    {
      "epoch": 1.3175058792192818,
      "grad_norm": 2.71875,
      "learning_rate": 3.1157399835406334e-05,
      "loss": 0.8477,
      "step": 375920
    },
    {
      "epoch": 1.3175409267261773,
      "grad_norm": 3.046875,
      "learning_rate": 3.115675080674263e-05,
      "loss": 0.8592,
      "step": 375930
    },
    {
      "epoch": 1.317575974233073,
      "grad_norm": 3.203125,
      "learning_rate": 3.115610177807893e-05,
      "loss": 0.9055,
      "step": 375940
    },
    {
      "epoch": 1.3176110217399686,
      "grad_norm": 2.890625,
      "learning_rate": 3.1155452749415225e-05,
      "loss": 0.8245,
      "step": 375950
    },
    {
      "epoch": 1.3176460692468641,
      "grad_norm": 3.3125,
      "learning_rate": 3.1154803720751526e-05,
      "loss": 0.8623,
      "step": 375960
    },
    {
      "epoch": 1.3176811167537597,
      "grad_norm": 3.03125,
      "learning_rate": 3.115415469208782e-05,
      "loss": 0.8789,
      "step": 375970
    },
    {
      "epoch": 1.3177161642606552,
      "grad_norm": 2.984375,
      "learning_rate": 3.115350566342412e-05,
      "loss": 0.9063,
      "step": 375980
    },
    {
      "epoch": 1.317751211767551,
      "grad_norm": 3.078125,
      "learning_rate": 3.1152856634760417e-05,
      "loss": 0.8419,
      "step": 375990
    },
    {
      "epoch": 1.3177862592744465,
      "grad_norm": 2.96875,
      "learning_rate": 3.115220760609672e-05,
      "loss": 0.8722,
      "step": 376000
    },
    {
      "epoch": 1.317821306781342,
      "grad_norm": 2.765625,
      "learning_rate": 3.115155857743301e-05,
      "loss": 0.8184,
      "step": 376010
    },
    {
      "epoch": 1.3178563542882378,
      "grad_norm": 2.65625,
      "learning_rate": 3.1150909548769314e-05,
      "loss": 0.8657,
      "step": 376020
    },
    {
      "epoch": 1.3178914017951333,
      "grad_norm": 2.890625,
      "learning_rate": 3.115026052010561e-05,
      "loss": 0.9212,
      "step": 376030
    },
    {
      "epoch": 1.3179264493020288,
      "grad_norm": 2.734375,
      "learning_rate": 3.114961149144191e-05,
      "loss": 0.795,
      "step": 376040
    },
    {
      "epoch": 1.3179614968089246,
      "grad_norm": 2.734375,
      "learning_rate": 3.114896246277821e-05,
      "loss": 0.8344,
      "step": 376050
    },
    {
      "epoch": 1.3179965443158201,
      "grad_norm": 2.78125,
      "learning_rate": 3.1148313434114506e-05,
      "loss": 0.9104,
      "step": 376060
    },
    {
      "epoch": 1.3180315918227157,
      "grad_norm": 2.828125,
      "learning_rate": 3.114766440545081e-05,
      "loss": 0.8409,
      "step": 376070
    },
    {
      "epoch": 1.3180666393296112,
      "grad_norm": 2.765625,
      "learning_rate": 3.11470153767871e-05,
      "loss": 0.8616,
      "step": 376080
    },
    {
      "epoch": 1.3181016868365067,
      "grad_norm": 2.796875,
      "learning_rate": 3.11463663481234e-05,
      "loss": 0.9201,
      "step": 376090
    },
    {
      "epoch": 1.3181367343434025,
      "grad_norm": 2.6875,
      "learning_rate": 3.11457173194597e-05,
      "loss": 0.9021,
      "step": 376100
    },
    {
      "epoch": 1.318171781850298,
      "grad_norm": 2.703125,
      "learning_rate": 3.1145068290796e-05,
      "loss": 0.9542,
      "step": 376110
    },
    {
      "epoch": 1.3182068293571936,
      "grad_norm": 2.671875,
      "learning_rate": 3.1144419262132294e-05,
      "loss": 0.8875,
      "step": 376120
    },
    {
      "epoch": 1.3182418768640893,
      "grad_norm": 2.875,
      "learning_rate": 3.1143770233468595e-05,
      "loss": 0.85,
      "step": 376130
    },
    {
      "epoch": 1.3182769243709849,
      "grad_norm": 2.890625,
      "learning_rate": 3.114312120480489e-05,
      "loss": 0.8259,
      "step": 376140
    },
    {
      "epoch": 1.3183119718778804,
      "grad_norm": 3.21875,
      "learning_rate": 3.114247217614119e-05,
      "loss": 0.8244,
      "step": 376150
    },
    {
      "epoch": 1.3183470193847762,
      "grad_norm": 2.9375,
      "learning_rate": 3.1141823147477486e-05,
      "loss": 0.7488,
      "step": 376160
    },
    {
      "epoch": 1.3183820668916717,
      "grad_norm": 3.234375,
      "learning_rate": 3.114117411881379e-05,
      "loss": 0.8778,
      "step": 376170
    },
    {
      "epoch": 1.3184171143985672,
      "grad_norm": 3.09375,
      "learning_rate": 3.114052509015009e-05,
      "loss": 0.7474,
      "step": 376180
    },
    {
      "epoch": 1.3184521619054628,
      "grad_norm": 3.3125,
      "learning_rate": 3.1139876061486377e-05,
      "loss": 0.8593,
      "step": 376190
    },
    {
      "epoch": 1.3184872094123585,
      "grad_norm": 2.375,
      "learning_rate": 3.113922703282268e-05,
      "loss": 0.8044,
      "step": 376200
    },
    {
      "epoch": 1.318522256919254,
      "grad_norm": 3.015625,
      "learning_rate": 3.113857800415897e-05,
      "loss": 0.8892,
      "step": 376210
    },
    {
      "epoch": 1.3185573044261496,
      "grad_norm": 3.109375,
      "learning_rate": 3.1137928975495274e-05,
      "loss": 0.8974,
      "step": 376220
    },
    {
      "epoch": 1.3185923519330451,
      "grad_norm": 2.859375,
      "learning_rate": 3.113727994683157e-05,
      "loss": 0.8378,
      "step": 376230
    },
    {
      "epoch": 1.318627399439941,
      "grad_norm": 2.921875,
      "learning_rate": 3.113663091816787e-05,
      "loss": 0.8862,
      "step": 376240
    },
    {
      "epoch": 1.3186624469468364,
      "grad_norm": 3.125,
      "learning_rate": 3.1135981889504165e-05,
      "loss": 0.8666,
      "step": 376250
    },
    {
      "epoch": 1.318697494453732,
      "grad_norm": 2.890625,
      "learning_rate": 3.1135332860840466e-05,
      "loss": 0.8918,
      "step": 376260
    },
    {
      "epoch": 1.3187325419606277,
      "grad_norm": 2.984375,
      "learning_rate": 3.113468383217677e-05,
      "loss": 0.8147,
      "step": 376270
    },
    {
      "epoch": 1.3187675894675233,
      "grad_norm": 2.625,
      "learning_rate": 3.113403480351306e-05,
      "loss": 0.8307,
      "step": 376280
    },
    {
      "epoch": 1.3188026369744188,
      "grad_norm": 2.609375,
      "learning_rate": 3.113338577484936e-05,
      "loss": 0.8828,
      "step": 376290
    },
    {
      "epoch": 1.3188376844813146,
      "grad_norm": 2.625,
      "learning_rate": 3.113273674618566e-05,
      "loss": 0.8281,
      "step": 376300
    },
    {
      "epoch": 1.31887273198821,
      "grad_norm": 2.90625,
      "learning_rate": 3.113208771752196e-05,
      "loss": 0.7602,
      "step": 376310
    },
    {
      "epoch": 1.3189077794951056,
      "grad_norm": 2.71875,
      "learning_rate": 3.1131438688858254e-05,
      "loss": 0.8843,
      "step": 376320
    },
    {
      "epoch": 1.3189428270020012,
      "grad_norm": 3.0,
      "learning_rate": 3.1130789660194555e-05,
      "loss": 0.912,
      "step": 376330
    },
    {
      "epoch": 1.3189778745088967,
      "grad_norm": 2.796875,
      "learning_rate": 3.113014063153085e-05,
      "loss": 0.9828,
      "step": 376340
    },
    {
      "epoch": 1.3190129220157925,
      "grad_norm": 2.6875,
      "learning_rate": 3.112949160286715e-05,
      "loss": 0.8144,
      "step": 376350
    },
    {
      "epoch": 1.319047969522688,
      "grad_norm": 2.84375,
      "learning_rate": 3.1128842574203446e-05,
      "loss": 0.8546,
      "step": 376360
    },
    {
      "epoch": 1.3190830170295835,
      "grad_norm": 3.140625,
      "learning_rate": 3.112819354553975e-05,
      "loss": 0.8536,
      "step": 376370
    },
    {
      "epoch": 1.3191180645364793,
      "grad_norm": 3.0,
      "learning_rate": 3.112754451687604e-05,
      "loss": 0.9349,
      "step": 376380
    },
    {
      "epoch": 1.3191531120433748,
      "grad_norm": 3.3125,
      "learning_rate": 3.112689548821234e-05,
      "loss": 0.8192,
      "step": 376390
    },
    {
      "epoch": 1.3191881595502704,
      "grad_norm": 3.15625,
      "learning_rate": 3.1126246459548645e-05,
      "loss": 0.9297,
      "step": 376400
    },
    {
      "epoch": 1.3192232070571661,
      "grad_norm": 3.46875,
      "learning_rate": 3.112559743088494e-05,
      "loss": 0.8293,
      "step": 376410
    },
    {
      "epoch": 1.3192582545640616,
      "grad_norm": 2.71875,
      "learning_rate": 3.112494840222124e-05,
      "loss": 0.812,
      "step": 376420
    },
    {
      "epoch": 1.3192933020709572,
      "grad_norm": 2.84375,
      "learning_rate": 3.1124299373557535e-05,
      "loss": 0.7719,
      "step": 376430
    },
    {
      "epoch": 1.3193283495778527,
      "grad_norm": 2.859375,
      "learning_rate": 3.112365034489384e-05,
      "loss": 0.8651,
      "step": 376440
    },
    {
      "epoch": 1.3193633970847483,
      "grad_norm": 2.9375,
      "learning_rate": 3.112300131623013e-05,
      "loss": 0.891,
      "step": 376450
    },
    {
      "epoch": 1.319398444591644,
      "grad_norm": 3.03125,
      "learning_rate": 3.112235228756643e-05,
      "loss": 0.8716,
      "step": 376460
    },
    {
      "epoch": 1.3194334920985396,
      "grad_norm": 2.515625,
      "learning_rate": 3.112170325890273e-05,
      "loss": 0.8285,
      "step": 376470
    },
    {
      "epoch": 1.319468539605435,
      "grad_norm": 3.15625,
      "learning_rate": 3.112105423023903e-05,
      "loss": 0.8119,
      "step": 376480
    },
    {
      "epoch": 1.3195035871123308,
      "grad_norm": 3.171875,
      "learning_rate": 3.112040520157532e-05,
      "loss": 0.828,
      "step": 376490
    },
    {
      "epoch": 1.3195386346192264,
      "grad_norm": 2.984375,
      "learning_rate": 3.1119756172911625e-05,
      "loss": 0.8405,
      "step": 376500
    },
    {
      "epoch": 1.319573682126122,
      "grad_norm": 3.046875,
      "learning_rate": 3.111910714424792e-05,
      "loss": 0.8606,
      "step": 376510
    },
    {
      "epoch": 1.3196087296330177,
      "grad_norm": 3.015625,
      "learning_rate": 3.111845811558422e-05,
      "loss": 0.8386,
      "step": 376520
    },
    {
      "epoch": 1.3196437771399132,
      "grad_norm": 2.859375,
      "learning_rate": 3.1117809086920515e-05,
      "loss": 0.8245,
      "step": 376530
    },
    {
      "epoch": 1.3196788246468087,
      "grad_norm": 3.265625,
      "learning_rate": 3.111716005825682e-05,
      "loss": 0.808,
      "step": 376540
    },
    {
      "epoch": 1.3197138721537043,
      "grad_norm": 4.9375,
      "learning_rate": 3.111651102959312e-05,
      "loss": 0.8762,
      "step": 376550
    },
    {
      "epoch": 1.3197489196605998,
      "grad_norm": 2.796875,
      "learning_rate": 3.111586200092941e-05,
      "loss": 0.8832,
      "step": 376560
    },
    {
      "epoch": 1.3197839671674956,
      "grad_norm": 3.3125,
      "learning_rate": 3.111521297226571e-05,
      "loss": 0.8375,
      "step": 376570
    },
    {
      "epoch": 1.3198190146743911,
      "grad_norm": 2.71875,
      "learning_rate": 3.1114563943602e-05,
      "loss": 0.7904,
      "step": 376580
    },
    {
      "epoch": 1.3198540621812866,
      "grad_norm": 2.890625,
      "learning_rate": 3.11139149149383e-05,
      "loss": 0.9186,
      "step": 376590
    },
    {
      "epoch": 1.3198891096881824,
      "grad_norm": 2.421875,
      "learning_rate": 3.11132658862746e-05,
      "loss": 0.8062,
      "step": 376600
    },
    {
      "epoch": 1.319924157195078,
      "grad_norm": 3.25,
      "learning_rate": 3.11126168576109e-05,
      "loss": 0.9127,
      "step": 376610
    },
    {
      "epoch": 1.3199592047019735,
      "grad_norm": 2.75,
      "learning_rate": 3.1111967828947194e-05,
      "loss": 0.7221,
      "step": 376620
    },
    {
      "epoch": 1.3199942522088692,
      "grad_norm": 3.21875,
      "learning_rate": 3.1111318800283495e-05,
      "loss": 0.8912,
      "step": 376630
    },
    {
      "epoch": 1.3200292997157648,
      "grad_norm": 2.65625,
      "learning_rate": 3.11106697716198e-05,
      "loss": 0.819,
      "step": 376640
    },
    {
      "epoch": 1.3200643472226603,
      "grad_norm": 3.34375,
      "learning_rate": 3.111002074295609e-05,
      "loss": 0.918,
      "step": 376650
    },
    {
      "epoch": 1.3200993947295558,
      "grad_norm": 2.6875,
      "learning_rate": 3.110937171429239e-05,
      "loss": 0.8628,
      "step": 376660
    },
    {
      "epoch": 1.3201344422364514,
      "grad_norm": 2.640625,
      "learning_rate": 3.110872268562869e-05,
      "loss": 0.9068,
      "step": 376670
    },
    {
      "epoch": 1.3201694897433471,
      "grad_norm": 2.90625,
      "learning_rate": 3.110807365696499e-05,
      "loss": 0.8081,
      "step": 376680
    },
    {
      "epoch": 1.3202045372502427,
      "grad_norm": 2.71875,
      "learning_rate": 3.110742462830128e-05,
      "loss": 0.8535,
      "step": 376690
    },
    {
      "epoch": 1.3202395847571382,
      "grad_norm": 2.890625,
      "learning_rate": 3.1106775599637585e-05,
      "loss": 0.8618,
      "step": 376700
    },
    {
      "epoch": 1.320274632264034,
      "grad_norm": 3.078125,
      "learning_rate": 3.110612657097388e-05,
      "loss": 0.861,
      "step": 376710
    },
    {
      "epoch": 1.3203096797709295,
      "grad_norm": 2.703125,
      "learning_rate": 3.110547754231018e-05,
      "loss": 0.808,
      "step": 376720
    },
    {
      "epoch": 1.320344727277825,
      "grad_norm": 2.6875,
      "learning_rate": 3.1104828513646475e-05,
      "loss": 0.8979,
      "step": 376730
    },
    {
      "epoch": 1.3203797747847208,
      "grad_norm": 3.09375,
      "learning_rate": 3.110417948498278e-05,
      "loss": 0.8888,
      "step": 376740
    },
    {
      "epoch": 1.3204148222916163,
      "grad_norm": 3.15625,
      "learning_rate": 3.110353045631907e-05,
      "loss": 0.9546,
      "step": 376750
    },
    {
      "epoch": 1.3204498697985119,
      "grad_norm": 2.25,
      "learning_rate": 3.110288142765537e-05,
      "loss": 0.8334,
      "step": 376760
    },
    {
      "epoch": 1.3204849173054074,
      "grad_norm": 3.15625,
      "learning_rate": 3.1102232398991674e-05,
      "loss": 0.7765,
      "step": 376770
    },
    {
      "epoch": 1.3205199648123032,
      "grad_norm": 3.015625,
      "learning_rate": 3.110158337032797e-05,
      "loss": 0.9094,
      "step": 376780
    },
    {
      "epoch": 1.3205550123191987,
      "grad_norm": 2.734375,
      "learning_rate": 3.110093434166427e-05,
      "loss": 0.8953,
      "step": 376790
    },
    {
      "epoch": 1.3205900598260942,
      "grad_norm": 3.109375,
      "learning_rate": 3.1100285313000565e-05,
      "loss": 0.8918,
      "step": 376800
    },
    {
      "epoch": 1.3206251073329898,
      "grad_norm": 3.421875,
      "learning_rate": 3.1099636284336866e-05,
      "loss": 0.9186,
      "step": 376810
    },
    {
      "epoch": 1.3206601548398855,
      "grad_norm": 3.40625,
      "learning_rate": 3.109898725567316e-05,
      "loss": 0.8829,
      "step": 376820
    },
    {
      "epoch": 1.320695202346781,
      "grad_norm": 3.359375,
      "learning_rate": 3.109833822700946e-05,
      "loss": 0.871,
      "step": 376830
    },
    {
      "epoch": 1.3207302498536766,
      "grad_norm": 2.828125,
      "learning_rate": 3.109768919834576e-05,
      "loss": 0.9272,
      "step": 376840
    },
    {
      "epoch": 1.3207652973605724,
      "grad_norm": 2.859375,
      "learning_rate": 3.109704016968206e-05,
      "loss": 0.8212,
      "step": 376850
    },
    {
      "epoch": 1.320800344867468,
      "grad_norm": 3.109375,
      "learning_rate": 3.109639114101835e-05,
      "loss": 0.9189,
      "step": 376860
    },
    {
      "epoch": 1.3208353923743634,
      "grad_norm": 3.75,
      "learning_rate": 3.1095742112354654e-05,
      "loss": 0.8794,
      "step": 376870
    },
    {
      "epoch": 1.3208704398812592,
      "grad_norm": 2.984375,
      "learning_rate": 3.109509308369095e-05,
      "loss": 0.8379,
      "step": 376880
    },
    {
      "epoch": 1.3209054873881547,
      "grad_norm": 3.234375,
      "learning_rate": 3.109444405502725e-05,
      "loss": 0.8868,
      "step": 376890
    },
    {
      "epoch": 1.3209405348950503,
      "grad_norm": 2.609375,
      "learning_rate": 3.1093795026363545e-05,
      "loss": 0.846,
      "step": 376900
    },
    {
      "epoch": 1.3209755824019458,
      "grad_norm": 2.96875,
      "learning_rate": 3.1093145997699846e-05,
      "loss": 0.821,
      "step": 376910
    },
    {
      "epoch": 1.3210106299088413,
      "grad_norm": 3.046875,
      "learning_rate": 3.109249696903615e-05,
      "loss": 0.8294,
      "step": 376920
    },
    {
      "epoch": 1.321045677415737,
      "grad_norm": 2.953125,
      "learning_rate": 3.109184794037244e-05,
      "loss": 0.8604,
      "step": 376930
    },
    {
      "epoch": 1.3210807249226326,
      "grad_norm": 2.90625,
      "learning_rate": 3.109119891170874e-05,
      "loss": 0.8053,
      "step": 376940
    },
    {
      "epoch": 1.3211157724295282,
      "grad_norm": 3.59375,
      "learning_rate": 3.109054988304503e-05,
      "loss": 1.0192,
      "step": 376950
    },
    {
      "epoch": 1.321150819936424,
      "grad_norm": 2.84375,
      "learning_rate": 3.108990085438133e-05,
      "loss": 0.8709,
      "step": 376960
    },
    {
      "epoch": 1.3211858674433195,
      "grad_norm": 2.625,
      "learning_rate": 3.108925182571763e-05,
      "loss": 0.766,
      "step": 376970
    },
    {
      "epoch": 1.321220914950215,
      "grad_norm": 2.984375,
      "learning_rate": 3.108860279705393e-05,
      "loss": 0.8835,
      "step": 376980
    },
    {
      "epoch": 1.3212559624571107,
      "grad_norm": 2.71875,
      "learning_rate": 3.108795376839022e-05,
      "loss": 0.879,
      "step": 376990
    },
    {
      "epoch": 1.3212910099640063,
      "grad_norm": 3.125,
      "learning_rate": 3.1087304739726525e-05,
      "loss": 0.7898,
      "step": 377000
    },
    {
      "epoch": 1.3213260574709018,
      "grad_norm": 2.921875,
      "learning_rate": 3.1086655711062826e-05,
      "loss": 0.8292,
      "step": 377010
    },
    {
      "epoch": 1.3213611049777974,
      "grad_norm": 3.078125,
      "learning_rate": 3.108600668239912e-05,
      "loss": 0.8729,
      "step": 377020
    },
    {
      "epoch": 1.3213961524846929,
      "grad_norm": 3.046875,
      "learning_rate": 3.108535765373542e-05,
      "loss": 0.8032,
      "step": 377030
    },
    {
      "epoch": 1.3214311999915886,
      "grad_norm": 3.0625,
      "learning_rate": 3.108470862507172e-05,
      "loss": 0.8153,
      "step": 377040
    },
    {
      "epoch": 1.3214662474984842,
      "grad_norm": 2.453125,
      "learning_rate": 3.108405959640802e-05,
      "loss": 0.7842,
      "step": 377050
    },
    {
      "epoch": 1.3215012950053797,
      "grad_norm": 3.109375,
      "learning_rate": 3.108341056774431e-05,
      "loss": 0.8817,
      "step": 377060
    },
    {
      "epoch": 1.3215363425122755,
      "grad_norm": 3.03125,
      "learning_rate": 3.1082761539080614e-05,
      "loss": 0.8289,
      "step": 377070
    },
    {
      "epoch": 1.321571390019171,
      "grad_norm": 3.171875,
      "learning_rate": 3.108211251041691e-05,
      "loss": 0.8214,
      "step": 377080
    },
    {
      "epoch": 1.3216064375260665,
      "grad_norm": 2.5,
      "learning_rate": 3.108146348175321e-05,
      "loss": 0.7771,
      "step": 377090
    },
    {
      "epoch": 1.3216414850329623,
      "grad_norm": 2.65625,
      "learning_rate": 3.1080814453089505e-05,
      "loss": 0.8261,
      "step": 377100
    },
    {
      "epoch": 1.3216765325398578,
      "grad_norm": 2.875,
      "learning_rate": 3.1080165424425806e-05,
      "loss": 0.789,
      "step": 377110
    },
    {
      "epoch": 1.3217115800467534,
      "grad_norm": 3.0625,
      "learning_rate": 3.10795163957621e-05,
      "loss": 0.9193,
      "step": 377120
    },
    {
      "epoch": 1.321746627553649,
      "grad_norm": 3.125,
      "learning_rate": 3.10788673670984e-05,
      "loss": 0.8753,
      "step": 377130
    },
    {
      "epoch": 1.3217816750605444,
      "grad_norm": 3.109375,
      "learning_rate": 3.1078218338434704e-05,
      "loss": 0.9398,
      "step": 377140
    },
    {
      "epoch": 1.3218167225674402,
      "grad_norm": 2.53125,
      "learning_rate": 3.1077569309771e-05,
      "loss": 0.778,
      "step": 377150
    },
    {
      "epoch": 1.3218517700743357,
      "grad_norm": 2.8125,
      "learning_rate": 3.10769202811073e-05,
      "loss": 0.8961,
      "step": 377160
    },
    {
      "epoch": 1.3218868175812313,
      "grad_norm": 2.6875,
      "learning_rate": 3.1076271252443594e-05,
      "loss": 0.8156,
      "step": 377170
    },
    {
      "epoch": 1.321921865088127,
      "grad_norm": 3.15625,
      "learning_rate": 3.1075622223779896e-05,
      "loss": 0.8257,
      "step": 377180
    },
    {
      "epoch": 1.3219569125950226,
      "grad_norm": 3.421875,
      "learning_rate": 3.107497319511619e-05,
      "loss": 0.8442,
      "step": 377190
    },
    {
      "epoch": 1.321991960101918,
      "grad_norm": 2.890625,
      "learning_rate": 3.107432416645249e-05,
      "loss": 0.825,
      "step": 377200
    },
    {
      "epoch": 1.3220270076088139,
      "grad_norm": 2.6875,
      "learning_rate": 3.1073675137788786e-05,
      "loss": 0.833,
      "step": 377210
    },
    {
      "epoch": 1.3220620551157094,
      "grad_norm": 2.65625,
      "learning_rate": 3.107302610912509e-05,
      "loss": 0.8572,
      "step": 377220
    },
    {
      "epoch": 1.322097102622605,
      "grad_norm": 2.90625,
      "learning_rate": 3.107237708046138e-05,
      "loss": 0.903,
      "step": 377230
    },
    {
      "epoch": 1.3221321501295005,
      "grad_norm": 2.859375,
      "learning_rate": 3.1071728051797684e-05,
      "loss": 0.8506,
      "step": 377240
    },
    {
      "epoch": 1.322167197636396,
      "grad_norm": 2.96875,
      "learning_rate": 3.107107902313398e-05,
      "loss": 0.8705,
      "step": 377250
    },
    {
      "epoch": 1.3222022451432918,
      "grad_norm": 2.78125,
      "learning_rate": 3.107042999447028e-05,
      "loss": 0.8782,
      "step": 377260
    },
    {
      "epoch": 1.3222372926501873,
      "grad_norm": 3.125,
      "learning_rate": 3.1069780965806574e-05,
      "loss": 0.86,
      "step": 377270
    },
    {
      "epoch": 1.3222723401570828,
      "grad_norm": 3.078125,
      "learning_rate": 3.1069131937142876e-05,
      "loss": 0.8696,
      "step": 377280
    },
    {
      "epoch": 1.3223073876639786,
      "grad_norm": 2.578125,
      "learning_rate": 3.106848290847918e-05,
      "loss": 0.8021,
      "step": 377290
    },
    {
      "epoch": 1.3223424351708741,
      "grad_norm": 2.9375,
      "learning_rate": 3.106783387981547e-05,
      "loss": 0.8793,
      "step": 377300
    },
    {
      "epoch": 1.3223774826777697,
      "grad_norm": 2.3125,
      "learning_rate": 3.106718485115177e-05,
      "loss": 0.8607,
      "step": 377310
    },
    {
      "epoch": 1.3224125301846654,
      "grad_norm": 3.0,
      "learning_rate": 3.106653582248806e-05,
      "loss": 0.8664,
      "step": 377320
    },
    {
      "epoch": 1.322447577691561,
      "grad_norm": 3.125,
      "learning_rate": 3.106588679382436e-05,
      "loss": 0.8367,
      "step": 377330
    },
    {
      "epoch": 1.3224826251984565,
      "grad_norm": 3.03125,
      "learning_rate": 3.106523776516066e-05,
      "loss": 0.8154,
      "step": 377340
    },
    {
      "epoch": 1.322517672705352,
      "grad_norm": 2.890625,
      "learning_rate": 3.106458873649696e-05,
      "loss": 0.8463,
      "step": 377350
    },
    {
      "epoch": 1.3225527202122476,
      "grad_norm": 3.4375,
      "learning_rate": 3.106393970783326e-05,
      "loss": 0.9438,
      "step": 377360
    },
    {
      "epoch": 1.3225877677191433,
      "grad_norm": 2.8125,
      "learning_rate": 3.1063290679169554e-05,
      "loss": 0.806,
      "step": 377370
    },
    {
      "epoch": 1.3226228152260389,
      "grad_norm": 2.90625,
      "learning_rate": 3.1062641650505856e-05,
      "loss": 0.8584,
      "step": 377380
    },
    {
      "epoch": 1.3226578627329344,
      "grad_norm": 2.609375,
      "learning_rate": 3.106199262184215e-05,
      "loss": 0.8652,
      "step": 377390
    },
    {
      "epoch": 1.3226929102398302,
      "grad_norm": 3.390625,
      "learning_rate": 3.106134359317845e-05,
      "loss": 0.8373,
      "step": 377400
    },
    {
      "epoch": 1.3227279577467257,
      "grad_norm": 2.53125,
      "learning_rate": 3.1060694564514746e-05,
      "loss": 0.8917,
      "step": 377410
    },
    {
      "epoch": 1.3227630052536212,
      "grad_norm": 2.765625,
      "learning_rate": 3.106004553585105e-05,
      "loss": 0.8315,
      "step": 377420
    },
    {
      "epoch": 1.322798052760517,
      "grad_norm": 2.828125,
      "learning_rate": 3.105939650718734e-05,
      "loss": 0.8453,
      "step": 377430
    },
    {
      "epoch": 1.3228331002674125,
      "grad_norm": 2.84375,
      "learning_rate": 3.1058747478523644e-05,
      "loss": 0.7936,
      "step": 377440
    },
    {
      "epoch": 1.322868147774308,
      "grad_norm": 3.109375,
      "learning_rate": 3.105809844985994e-05,
      "loss": 0.7977,
      "step": 377450
    },
    {
      "epoch": 1.3229031952812036,
      "grad_norm": 2.5625,
      "learning_rate": 3.105744942119624e-05,
      "loss": 0.7858,
      "step": 377460
    },
    {
      "epoch": 1.3229382427880994,
      "grad_norm": 2.390625,
      "learning_rate": 3.1056800392532534e-05,
      "loss": 0.7977,
      "step": 377470
    },
    {
      "epoch": 1.3229732902949949,
      "grad_norm": 2.53125,
      "learning_rate": 3.1056151363868836e-05,
      "loss": 0.8074,
      "step": 377480
    },
    {
      "epoch": 1.3230083378018904,
      "grad_norm": 2.578125,
      "learning_rate": 3.105550233520513e-05,
      "loss": 0.7607,
      "step": 377490
    },
    {
      "epoch": 1.323043385308786,
      "grad_norm": 3.234375,
      "learning_rate": 3.105485330654143e-05,
      "loss": 0.8727,
      "step": 377500
    },
    {
      "epoch": 1.3230784328156817,
      "grad_norm": 2.90625,
      "learning_rate": 3.105420427787773e-05,
      "loss": 0.8717,
      "step": 377510
    },
    {
      "epoch": 1.3231134803225773,
      "grad_norm": 3.65625,
      "learning_rate": 3.105355524921403e-05,
      "loss": 0.8364,
      "step": 377520
    },
    {
      "epoch": 1.3231485278294728,
      "grad_norm": 3.46875,
      "learning_rate": 3.105290622055033e-05,
      "loss": 0.8398,
      "step": 377530
    },
    {
      "epoch": 1.3231835753363685,
      "grad_norm": 2.625,
      "learning_rate": 3.1052257191886623e-05,
      "loss": 0.7941,
      "step": 377540
    },
    {
      "epoch": 1.323218622843264,
      "grad_norm": 2.671875,
      "learning_rate": 3.1051608163222925e-05,
      "loss": 0.7757,
      "step": 377550
    },
    {
      "epoch": 1.3232536703501596,
      "grad_norm": 3.234375,
      "learning_rate": 3.105095913455922e-05,
      "loss": 0.839,
      "step": 377560
    },
    {
      "epoch": 1.3232887178570554,
      "grad_norm": 3.015625,
      "learning_rate": 3.105031010589552e-05,
      "loss": 0.8793,
      "step": 377570
    },
    {
      "epoch": 1.323323765363951,
      "grad_norm": 2.6875,
      "learning_rate": 3.1049661077231815e-05,
      "loss": 0.8564,
      "step": 377580
    },
    {
      "epoch": 1.3233588128708464,
      "grad_norm": 2.921875,
      "learning_rate": 3.104901204856812e-05,
      "loss": 0.7868,
      "step": 377590
    },
    {
      "epoch": 1.323393860377742,
      "grad_norm": 3.25,
      "learning_rate": 3.104836301990441e-05,
      "loss": 0.8333,
      "step": 377600
    },
    {
      "epoch": 1.3234289078846375,
      "grad_norm": 2.765625,
      "learning_rate": 3.104771399124071e-05,
      "loss": 0.7992,
      "step": 377610
    },
    {
      "epoch": 1.3234639553915333,
      "grad_norm": 3.078125,
      "learning_rate": 3.104706496257701e-05,
      "loss": 0.8409,
      "step": 377620
    },
    {
      "epoch": 1.3234990028984288,
      "grad_norm": 3.296875,
      "learning_rate": 3.104641593391331e-05,
      "loss": 0.918,
      "step": 377630
    },
    {
      "epoch": 1.3235340504053243,
      "grad_norm": 2.578125,
      "learning_rate": 3.104576690524961e-05,
      "loss": 0.7519,
      "step": 377640
    },
    {
      "epoch": 1.32356909791222,
      "grad_norm": 3.546875,
      "learning_rate": 3.1045117876585905e-05,
      "loss": 0.8425,
      "step": 377650
    },
    {
      "epoch": 1.3236041454191156,
      "grad_norm": 3.046875,
      "learning_rate": 3.1044468847922206e-05,
      "loss": 0.8392,
      "step": 377660
    },
    {
      "epoch": 1.3236391929260112,
      "grad_norm": 2.859375,
      "learning_rate": 3.10438198192585e-05,
      "loss": 0.8451,
      "step": 377670
    },
    {
      "epoch": 1.323674240432907,
      "grad_norm": 2.8125,
      "learning_rate": 3.10431707905948e-05,
      "loss": 0.9046,
      "step": 377680
    },
    {
      "epoch": 1.3237092879398025,
      "grad_norm": 2.84375,
      "learning_rate": 3.10425217619311e-05,
      "loss": 0.8867,
      "step": 377690
    },
    {
      "epoch": 1.323744335446698,
      "grad_norm": 2.765625,
      "learning_rate": 3.104187273326739e-05,
      "loss": 0.7497,
      "step": 377700
    },
    {
      "epoch": 1.3237793829535935,
      "grad_norm": 3.046875,
      "learning_rate": 3.1041223704603686e-05,
      "loss": 0.9162,
      "step": 377710
    },
    {
      "epoch": 1.323814430460489,
      "grad_norm": 2.828125,
      "learning_rate": 3.104057467593999e-05,
      "loss": 0.8959,
      "step": 377720
    },
    {
      "epoch": 1.3238494779673848,
      "grad_norm": 3.046875,
      "learning_rate": 3.103992564727629e-05,
      "loss": 0.831,
      "step": 377730
    },
    {
      "epoch": 1.3238845254742804,
      "grad_norm": 2.703125,
      "learning_rate": 3.1039276618612583e-05,
      "loss": 0.8042,
      "step": 377740
    },
    {
      "epoch": 1.323919572981176,
      "grad_norm": 3.0,
      "learning_rate": 3.1038627589948885e-05,
      "loss": 0.9718,
      "step": 377750
    },
    {
      "epoch": 1.3239546204880717,
      "grad_norm": 3.078125,
      "learning_rate": 3.103797856128518e-05,
      "loss": 0.8475,
      "step": 377760
    },
    {
      "epoch": 1.3239896679949672,
      "grad_norm": 2.59375,
      "learning_rate": 3.103732953262148e-05,
      "loss": 0.9202,
      "step": 377770
    },
    {
      "epoch": 1.3240247155018627,
      "grad_norm": 3.34375,
      "learning_rate": 3.1036680503957775e-05,
      "loss": 0.8191,
      "step": 377780
    },
    {
      "epoch": 1.3240597630087585,
      "grad_norm": 3.25,
      "learning_rate": 3.103603147529408e-05,
      "loss": 0.9203,
      "step": 377790
    },
    {
      "epoch": 1.324094810515654,
      "grad_norm": 3.25,
      "learning_rate": 3.103538244663037e-05,
      "loss": 0.9105,
      "step": 377800
    },
    {
      "epoch": 1.3241298580225496,
      "grad_norm": 2.84375,
      "learning_rate": 3.103473341796667e-05,
      "loss": 0.9091,
      "step": 377810
    },
    {
      "epoch": 1.324164905529445,
      "grad_norm": 2.921875,
      "learning_rate": 3.103408438930297e-05,
      "loss": 0.8698,
      "step": 377820
    },
    {
      "epoch": 1.3241999530363406,
      "grad_norm": 3.078125,
      "learning_rate": 3.103343536063927e-05,
      "loss": 0.8275,
      "step": 377830
    },
    {
      "epoch": 1.3242350005432364,
      "grad_norm": 3.203125,
      "learning_rate": 3.1032786331975563e-05,
      "loss": 0.7618,
      "step": 377840
    },
    {
      "epoch": 1.324270048050132,
      "grad_norm": 2.390625,
      "learning_rate": 3.1032137303311865e-05,
      "loss": 0.8021,
      "step": 377850
    },
    {
      "epoch": 1.3243050955570275,
      "grad_norm": 3.265625,
      "learning_rate": 3.103148827464816e-05,
      "loss": 0.8831,
      "step": 377860
    },
    {
      "epoch": 1.3243401430639232,
      "grad_norm": 2.390625,
      "learning_rate": 3.103083924598446e-05,
      "loss": 0.8749,
      "step": 377870
    },
    {
      "epoch": 1.3243751905708188,
      "grad_norm": 2.875,
      "learning_rate": 3.103019021732076e-05,
      "loss": 0.9143,
      "step": 377880
    },
    {
      "epoch": 1.3244102380777143,
      "grad_norm": 3.03125,
      "learning_rate": 3.102954118865706e-05,
      "loss": 0.8842,
      "step": 377890
    },
    {
      "epoch": 1.32444528558461,
      "grad_norm": 2.6875,
      "learning_rate": 3.102889215999336e-05,
      "loss": 0.823,
      "step": 377900
    },
    {
      "epoch": 1.3244803330915056,
      "grad_norm": 2.921875,
      "learning_rate": 3.102824313132965e-05,
      "loss": 0.7885,
      "step": 377910
    },
    {
      "epoch": 1.3245153805984011,
      "grad_norm": 2.53125,
      "learning_rate": 3.1027594102665954e-05,
      "loss": 0.8649,
      "step": 377920
    },
    {
      "epoch": 1.3245504281052967,
      "grad_norm": 3.09375,
      "learning_rate": 3.102694507400225e-05,
      "loss": 0.905,
      "step": 377930
    },
    {
      "epoch": 1.3245854756121922,
      "grad_norm": 2.796875,
      "learning_rate": 3.102629604533855e-05,
      "loss": 0.8919,
      "step": 377940
    },
    {
      "epoch": 1.324620523119088,
      "grad_norm": 3.28125,
      "learning_rate": 3.1025647016674845e-05,
      "loss": 0.8157,
      "step": 377950
    },
    {
      "epoch": 1.3246555706259835,
      "grad_norm": 2.984375,
      "learning_rate": 3.1024997988011146e-05,
      "loss": 0.8016,
      "step": 377960
    },
    {
      "epoch": 1.324690618132879,
      "grad_norm": 3.21875,
      "learning_rate": 3.102434895934744e-05,
      "loss": 0.9086,
      "step": 377970
    },
    {
      "epoch": 1.3247256656397748,
      "grad_norm": 2.875,
      "learning_rate": 3.102369993068374e-05,
      "loss": 0.7403,
      "step": 377980
    },
    {
      "epoch": 1.3247607131466703,
      "grad_norm": 2.515625,
      "learning_rate": 3.102305090202004e-05,
      "loss": 0.7825,
      "step": 377990
    },
    {
      "epoch": 1.3247957606535659,
      "grad_norm": 2.875,
      "learning_rate": 3.102240187335634e-05,
      "loss": 0.8541,
      "step": 378000
    },
    {
      "epoch": 1.3248308081604616,
      "grad_norm": 2.96875,
      "learning_rate": 3.102175284469264e-05,
      "loss": 0.8922,
      "step": 378010
    },
    {
      "epoch": 1.3248658556673572,
      "grad_norm": 2.484375,
      "learning_rate": 3.1021103816028934e-05,
      "loss": 0.7538,
      "step": 378020
    },
    {
      "epoch": 1.3249009031742527,
      "grad_norm": 2.921875,
      "learning_rate": 3.1020454787365236e-05,
      "loss": 0.8953,
      "step": 378030
    },
    {
      "epoch": 1.3249359506811482,
      "grad_norm": 2.609375,
      "learning_rate": 3.101980575870153e-05,
      "loss": 0.83,
      "step": 378040
    },
    {
      "epoch": 1.3249709981880438,
      "grad_norm": 3.078125,
      "learning_rate": 3.101915673003783e-05,
      "loss": 0.8586,
      "step": 378050
    },
    {
      "epoch": 1.3250060456949395,
      "grad_norm": 2.75,
      "learning_rate": 3.1018507701374126e-05,
      "loss": 0.8604,
      "step": 378060
    },
    {
      "epoch": 1.325041093201835,
      "grad_norm": 2.75,
      "learning_rate": 3.101785867271042e-05,
      "loss": 0.8808,
      "step": 378070
    },
    {
      "epoch": 1.3250761407087306,
      "grad_norm": 2.359375,
      "learning_rate": 3.1017209644046715e-05,
      "loss": 0.8185,
      "step": 378080
    },
    {
      "epoch": 1.3251111882156263,
      "grad_norm": 2.84375,
      "learning_rate": 3.101656061538302e-05,
      "loss": 0.8552,
      "step": 378090
    },
    {
      "epoch": 1.3251462357225219,
      "grad_norm": 3.015625,
      "learning_rate": 3.101591158671932e-05,
      "loss": 0.8956,
      "step": 378100
    },
    {
      "epoch": 1.3251812832294174,
      "grad_norm": 2.9375,
      "learning_rate": 3.101526255805561e-05,
      "loss": 0.8243,
      "step": 378110
    },
    {
      "epoch": 1.3252163307363132,
      "grad_norm": 3.171875,
      "learning_rate": 3.1014613529391914e-05,
      "loss": 0.8157,
      "step": 378120
    },
    {
      "epoch": 1.3252513782432087,
      "grad_norm": 3.375,
      "learning_rate": 3.101396450072821e-05,
      "loss": 0.8652,
      "step": 378130
    },
    {
      "epoch": 1.3252864257501042,
      "grad_norm": 3.03125,
      "learning_rate": 3.101331547206451e-05,
      "loss": 0.9083,
      "step": 378140
    },
    {
      "epoch": 1.3253214732569998,
      "grad_norm": 2.53125,
      "learning_rate": 3.1012666443400805e-05,
      "loss": 0.8884,
      "step": 378150
    },
    {
      "epoch": 1.3253565207638955,
      "grad_norm": 2.890625,
      "learning_rate": 3.1012017414737106e-05,
      "loss": 0.8138,
      "step": 378160
    },
    {
      "epoch": 1.325391568270791,
      "grad_norm": 2.75,
      "learning_rate": 3.10113683860734e-05,
      "loss": 0.8571,
      "step": 378170
    },
    {
      "epoch": 1.3254266157776866,
      "grad_norm": 2.78125,
      "learning_rate": 3.10107193574097e-05,
      "loss": 0.8863,
      "step": 378180
    },
    {
      "epoch": 1.3254616632845821,
      "grad_norm": 2.734375,
      "learning_rate": 3.1010070328746e-05,
      "loss": 0.8787,
      "step": 378190
    },
    {
      "epoch": 1.325496710791478,
      "grad_norm": 2.890625,
      "learning_rate": 3.10094213000823e-05,
      "loss": 0.8677,
      "step": 378200
    },
    {
      "epoch": 1.3255317582983734,
      "grad_norm": 2.828125,
      "learning_rate": 3.100877227141859e-05,
      "loss": 0.8793,
      "step": 378210
    },
    {
      "epoch": 1.325566805805269,
      "grad_norm": 2.9375,
      "learning_rate": 3.1008123242754894e-05,
      "loss": 0.8671,
      "step": 378220
    },
    {
      "epoch": 1.3256018533121647,
      "grad_norm": 3.015625,
      "learning_rate": 3.100747421409119e-05,
      "loss": 0.8493,
      "step": 378230
    },
    {
      "epoch": 1.3256369008190603,
      "grad_norm": 2.625,
      "learning_rate": 3.100682518542749e-05,
      "loss": 0.8934,
      "step": 378240
    },
    {
      "epoch": 1.3256719483259558,
      "grad_norm": 3.609375,
      "learning_rate": 3.100617615676379e-05,
      "loss": 0.8323,
      "step": 378250
    },
    {
      "epoch": 1.3257069958328516,
      "grad_norm": 2.84375,
      "learning_rate": 3.1005527128100086e-05,
      "loss": 0.9257,
      "step": 378260
    },
    {
      "epoch": 1.325742043339747,
      "grad_norm": 2.84375,
      "learning_rate": 3.100487809943639e-05,
      "loss": 0.7946,
      "step": 378270
    },
    {
      "epoch": 1.3257770908466426,
      "grad_norm": 2.828125,
      "learning_rate": 3.100422907077268e-05,
      "loss": 0.8097,
      "step": 378280
    },
    {
      "epoch": 1.3258121383535382,
      "grad_norm": 2.59375,
      "learning_rate": 3.1003580042108984e-05,
      "loss": 0.8567,
      "step": 378290
    },
    {
      "epoch": 1.3258471858604337,
      "grad_norm": 2.6875,
      "learning_rate": 3.100293101344528e-05,
      "loss": 0.8843,
      "step": 378300
    },
    {
      "epoch": 1.3258822333673295,
      "grad_norm": 3.0,
      "learning_rate": 3.100228198478158e-05,
      "loss": 0.8875,
      "step": 378310
    },
    {
      "epoch": 1.325917280874225,
      "grad_norm": 2.59375,
      "learning_rate": 3.1001632956117874e-05,
      "loss": 0.8531,
      "step": 378320
    },
    {
      "epoch": 1.3259523283811205,
      "grad_norm": 2.859375,
      "learning_rate": 3.1000983927454176e-05,
      "loss": 0.8718,
      "step": 378330
    },
    {
      "epoch": 1.3259873758880163,
      "grad_norm": 3.046875,
      "learning_rate": 3.100033489879047e-05,
      "loss": 0.8397,
      "step": 378340
    },
    {
      "epoch": 1.3260224233949118,
      "grad_norm": 3.171875,
      "learning_rate": 3.099968587012677e-05,
      "loss": 0.8848,
      "step": 378350
    },
    {
      "epoch": 1.3260574709018074,
      "grad_norm": 2.96875,
      "learning_rate": 3.0999036841463066e-05,
      "loss": 0.8832,
      "step": 378360
    },
    {
      "epoch": 1.3260925184087031,
      "grad_norm": 2.703125,
      "learning_rate": 3.099838781279937e-05,
      "loss": 0.8047,
      "step": 378370
    },
    {
      "epoch": 1.3261275659155987,
      "grad_norm": 2.875,
      "learning_rate": 3.099773878413567e-05,
      "loss": 0.7691,
      "step": 378380
    },
    {
      "epoch": 1.3261626134224942,
      "grad_norm": 2.8125,
      "learning_rate": 3.0997089755471964e-05,
      "loss": 0.8291,
      "step": 378390
    },
    {
      "epoch": 1.3261976609293897,
      "grad_norm": 3.0625,
      "learning_rate": 3.0996440726808265e-05,
      "loss": 0.8959,
      "step": 378400
    },
    {
      "epoch": 1.3262327084362853,
      "grad_norm": 2.9375,
      "learning_rate": 3.099579169814456e-05,
      "loss": 0.851,
      "step": 378410
    },
    {
      "epoch": 1.326267755943181,
      "grad_norm": 3.15625,
      "learning_rate": 3.099514266948086e-05,
      "loss": 0.7972,
      "step": 378420
    },
    {
      "epoch": 1.3263028034500766,
      "grad_norm": 3.25,
      "learning_rate": 3.0994493640817156e-05,
      "loss": 0.8633,
      "step": 378430
    },
    {
      "epoch": 1.326337850956972,
      "grad_norm": 3.0,
      "learning_rate": 3.099384461215346e-05,
      "loss": 0.9152,
      "step": 378440
    },
    {
      "epoch": 1.3263728984638679,
      "grad_norm": 2.9375,
      "learning_rate": 3.0993195583489745e-05,
      "loss": 0.831,
      "step": 378450
    },
    {
      "epoch": 1.3264079459707634,
      "grad_norm": 3.09375,
      "learning_rate": 3.0992546554826046e-05,
      "loss": 0.8818,
      "step": 378460
    },
    {
      "epoch": 1.326442993477659,
      "grad_norm": 2.609375,
      "learning_rate": 3.099189752616235e-05,
      "loss": 0.833,
      "step": 378470
    },
    {
      "epoch": 1.3264780409845547,
      "grad_norm": 2.734375,
      "learning_rate": 3.099124849749864e-05,
      "loss": 0.8396,
      "step": 378480
    },
    {
      "epoch": 1.3265130884914502,
      "grad_norm": 2.609375,
      "learning_rate": 3.0990599468834944e-05,
      "loss": 0.9014,
      "step": 378490
    },
    {
      "epoch": 1.3265481359983458,
      "grad_norm": 3.0,
      "learning_rate": 3.098995044017124e-05,
      "loss": 0.8463,
      "step": 378500
    },
    {
      "epoch": 1.3265831835052413,
      "grad_norm": 3.015625,
      "learning_rate": 3.098930141150754e-05,
      "loss": 0.7831,
      "step": 378510
    },
    {
      "epoch": 1.3266182310121368,
      "grad_norm": 2.8125,
      "learning_rate": 3.0988652382843834e-05,
      "loss": 0.8299,
      "step": 378520
    },
    {
      "epoch": 1.3266532785190326,
      "grad_norm": 2.828125,
      "learning_rate": 3.0988003354180136e-05,
      "loss": 0.9042,
      "step": 378530
    },
    {
      "epoch": 1.3266883260259281,
      "grad_norm": 3.203125,
      "learning_rate": 3.098735432551643e-05,
      "loss": 0.8077,
      "step": 378540
    },
    {
      "epoch": 1.3267233735328237,
      "grad_norm": 2.75,
      "learning_rate": 3.098670529685273e-05,
      "loss": 0.7813,
      "step": 378550
    },
    {
      "epoch": 1.3267584210397194,
      "grad_norm": 3.28125,
      "learning_rate": 3.0986056268189026e-05,
      "loss": 0.948,
      "step": 378560
    },
    {
      "epoch": 1.326793468546615,
      "grad_norm": 3.375,
      "learning_rate": 3.098540723952533e-05,
      "loss": 0.8867,
      "step": 378570
    },
    {
      "epoch": 1.3268285160535105,
      "grad_norm": 2.953125,
      "learning_rate": 3.098475821086162e-05,
      "loss": 0.8283,
      "step": 378580
    },
    {
      "epoch": 1.3268635635604062,
      "grad_norm": 3.296875,
      "learning_rate": 3.0984109182197924e-05,
      "loss": 0.8936,
      "step": 378590
    },
    {
      "epoch": 1.3268986110673018,
      "grad_norm": 3.1875,
      "learning_rate": 3.0983460153534225e-05,
      "loss": 0.8131,
      "step": 378600
    },
    {
      "epoch": 1.3269336585741973,
      "grad_norm": 2.703125,
      "learning_rate": 3.098281112487052e-05,
      "loss": 0.8431,
      "step": 378610
    },
    {
      "epoch": 1.3269687060810929,
      "grad_norm": 2.875,
      "learning_rate": 3.098216209620682e-05,
      "loss": 0.8334,
      "step": 378620
    },
    {
      "epoch": 1.3270037535879884,
      "grad_norm": 2.640625,
      "learning_rate": 3.0981513067543116e-05,
      "loss": 0.7355,
      "step": 378630
    },
    {
      "epoch": 1.3270388010948841,
      "grad_norm": 2.96875,
      "learning_rate": 3.098086403887942e-05,
      "loss": 0.8556,
      "step": 378640
    },
    {
      "epoch": 1.3270738486017797,
      "grad_norm": 2.53125,
      "learning_rate": 3.098021501021571e-05,
      "loss": 0.8306,
      "step": 378650
    },
    {
      "epoch": 1.3271088961086752,
      "grad_norm": 3.3125,
      "learning_rate": 3.097956598155201e-05,
      "loss": 0.8118,
      "step": 378660
    },
    {
      "epoch": 1.327143943615571,
      "grad_norm": 2.546875,
      "learning_rate": 3.097891695288831e-05,
      "loss": 0.7915,
      "step": 378670
    },
    {
      "epoch": 1.3271789911224665,
      "grad_norm": 2.859375,
      "learning_rate": 3.097826792422461e-05,
      "loss": 0.9413,
      "step": 378680
    },
    {
      "epoch": 1.327214038629362,
      "grad_norm": 2.859375,
      "learning_rate": 3.0977618895560904e-05,
      "loss": 0.8647,
      "step": 378690
    },
    {
      "epoch": 1.3272490861362578,
      "grad_norm": 2.65625,
      "learning_rate": 3.0976969866897205e-05,
      "loss": 0.9008,
      "step": 378700
    },
    {
      "epoch": 1.3272841336431533,
      "grad_norm": 2.53125,
      "learning_rate": 3.09763208382335e-05,
      "loss": 0.9278,
      "step": 378710
    },
    {
      "epoch": 1.3273191811500489,
      "grad_norm": 3.0,
      "learning_rate": 3.09756718095698e-05,
      "loss": 0.9103,
      "step": 378720
    },
    {
      "epoch": 1.3273542286569444,
      "grad_norm": 2.71875,
      "learning_rate": 3.0975022780906096e-05,
      "loss": 0.9252,
      "step": 378730
    },
    {
      "epoch": 1.32738927616384,
      "grad_norm": 2.765625,
      "learning_rate": 3.09743737522424e-05,
      "loss": 0.8301,
      "step": 378740
    },
    {
      "epoch": 1.3274243236707357,
      "grad_norm": 2.640625,
      "learning_rate": 3.09737247235787e-05,
      "loss": 0.9198,
      "step": 378750
    },
    {
      "epoch": 1.3274593711776312,
      "grad_norm": 2.84375,
      "learning_rate": 3.097307569491499e-05,
      "loss": 0.8824,
      "step": 378760
    },
    {
      "epoch": 1.3274944186845268,
      "grad_norm": 3.03125,
      "learning_rate": 3.0972426666251294e-05,
      "loss": 0.8782,
      "step": 378770
    },
    {
      "epoch": 1.3275294661914225,
      "grad_norm": 2.453125,
      "learning_rate": 3.097177763758759e-05,
      "loss": 0.7947,
      "step": 378780
    },
    {
      "epoch": 1.327564513698318,
      "grad_norm": 2.5625,
      "learning_rate": 3.097112860892389e-05,
      "loss": 0.9358,
      "step": 378790
    },
    {
      "epoch": 1.3275995612052136,
      "grad_norm": 3.0,
      "learning_rate": 3.0970479580260185e-05,
      "loss": 0.7958,
      "step": 378800
    },
    {
      "epoch": 1.3276346087121094,
      "grad_norm": 2.71875,
      "learning_rate": 3.0969830551596486e-05,
      "loss": 0.9512,
      "step": 378810
    },
    {
      "epoch": 1.327669656219005,
      "grad_norm": 3.359375,
      "learning_rate": 3.0969181522932774e-05,
      "loss": 0.8692,
      "step": 378820
    },
    {
      "epoch": 1.3277047037259004,
      "grad_norm": 2.984375,
      "learning_rate": 3.0968532494269076e-05,
      "loss": 0.8565,
      "step": 378830
    },
    {
      "epoch": 1.327739751232796,
      "grad_norm": 3.015625,
      "learning_rate": 3.096788346560538e-05,
      "loss": 0.874,
      "step": 378840
    },
    {
      "epoch": 1.3277747987396917,
      "grad_norm": 3.03125,
      "learning_rate": 3.096723443694167e-05,
      "loss": 0.8686,
      "step": 378850
    },
    {
      "epoch": 1.3278098462465873,
      "grad_norm": 3.265625,
      "learning_rate": 3.096658540827797e-05,
      "loss": 0.8042,
      "step": 378860
    },
    {
      "epoch": 1.3278448937534828,
      "grad_norm": 3.421875,
      "learning_rate": 3.096593637961427e-05,
      "loss": 0.8829,
      "step": 378870
    },
    {
      "epoch": 1.3278799412603783,
      "grad_norm": 3.078125,
      "learning_rate": 3.096528735095057e-05,
      "loss": 0.8774,
      "step": 378880
    },
    {
      "epoch": 1.327914988767274,
      "grad_norm": 2.53125,
      "learning_rate": 3.0964638322286864e-05,
      "loss": 0.8568,
      "step": 378890
    },
    {
      "epoch": 1.3279500362741696,
      "grad_norm": 3.109375,
      "learning_rate": 3.0963989293623165e-05,
      "loss": 0.8633,
      "step": 378900
    },
    {
      "epoch": 1.3279850837810652,
      "grad_norm": 3.109375,
      "learning_rate": 3.096334026495946e-05,
      "loss": 0.8594,
      "step": 378910
    },
    {
      "epoch": 1.328020131287961,
      "grad_norm": 3.140625,
      "learning_rate": 3.096269123629576e-05,
      "loss": 0.8995,
      "step": 378920
    },
    {
      "epoch": 1.3280551787948565,
      "grad_norm": 2.796875,
      "learning_rate": 3.0962042207632056e-05,
      "loss": 0.8627,
      "step": 378930
    },
    {
      "epoch": 1.328090226301752,
      "grad_norm": 2.734375,
      "learning_rate": 3.096139317896836e-05,
      "loss": 0.8789,
      "step": 378940
    },
    {
      "epoch": 1.3281252738086478,
      "grad_norm": 3.109375,
      "learning_rate": 3.096074415030465e-05,
      "loss": 0.8142,
      "step": 378950
    },
    {
      "epoch": 1.3281603213155433,
      "grad_norm": 2.8125,
      "learning_rate": 3.096009512164095e-05,
      "loss": 0.8464,
      "step": 378960
    },
    {
      "epoch": 1.3281953688224388,
      "grad_norm": 2.53125,
      "learning_rate": 3.0959446092977254e-05,
      "loss": 0.8882,
      "step": 378970
    },
    {
      "epoch": 1.3282304163293344,
      "grad_norm": 2.53125,
      "learning_rate": 3.095879706431355e-05,
      "loss": 0.8252,
      "step": 378980
    },
    {
      "epoch": 1.32826546383623,
      "grad_norm": 2.875,
      "learning_rate": 3.095814803564985e-05,
      "loss": 0.8877,
      "step": 378990
    },
    {
      "epoch": 1.3283005113431257,
      "grad_norm": 3.15625,
      "learning_rate": 3.0957499006986145e-05,
      "loss": 0.8884,
      "step": 379000
    },
    {
      "epoch": 1.3283355588500212,
      "grad_norm": 2.5,
      "learning_rate": 3.0956849978322446e-05,
      "loss": 0.8501,
      "step": 379010
    },
    {
      "epoch": 1.3283706063569167,
      "grad_norm": 3.3125,
      "learning_rate": 3.095620094965874e-05,
      "loss": 0.8573,
      "step": 379020
    },
    {
      "epoch": 1.3284056538638125,
      "grad_norm": 2.921875,
      "learning_rate": 3.095555192099504e-05,
      "loss": 0.8285,
      "step": 379030
    },
    {
      "epoch": 1.328440701370708,
      "grad_norm": 2.71875,
      "learning_rate": 3.095490289233134e-05,
      "loss": 0.8933,
      "step": 379040
    },
    {
      "epoch": 1.3284757488776036,
      "grad_norm": 2.953125,
      "learning_rate": 3.095425386366764e-05,
      "loss": 0.7899,
      "step": 379050
    },
    {
      "epoch": 1.3285107963844993,
      "grad_norm": 3.140625,
      "learning_rate": 3.095360483500393e-05,
      "loss": 0.9184,
      "step": 379060
    },
    {
      "epoch": 1.3285458438913949,
      "grad_norm": 2.328125,
      "learning_rate": 3.0952955806340234e-05,
      "loss": 0.8324,
      "step": 379070
    },
    {
      "epoch": 1.3285808913982904,
      "grad_norm": 2.53125,
      "learning_rate": 3.095230677767653e-05,
      "loss": 0.777,
      "step": 379080
    },
    {
      "epoch": 1.328615938905186,
      "grad_norm": 2.6875,
      "learning_rate": 3.095165774901283e-05,
      "loss": 0.8264,
      "step": 379090
    },
    {
      "epoch": 1.3286509864120815,
      "grad_norm": 3.125,
      "learning_rate": 3.0951008720349125e-05,
      "loss": 0.8797,
      "step": 379100
    },
    {
      "epoch": 1.3286860339189772,
      "grad_norm": 2.78125,
      "learning_rate": 3.0950359691685426e-05,
      "loss": 0.7676,
      "step": 379110
    },
    {
      "epoch": 1.3287210814258728,
      "grad_norm": 2.890625,
      "learning_rate": 3.094971066302173e-05,
      "loss": 0.8307,
      "step": 379120
    },
    {
      "epoch": 1.3287561289327683,
      "grad_norm": 3.296875,
      "learning_rate": 3.094906163435802e-05,
      "loss": 0.8574,
      "step": 379130
    },
    {
      "epoch": 1.328791176439664,
      "grad_norm": 2.984375,
      "learning_rate": 3.0948412605694324e-05,
      "loss": 0.7635,
      "step": 379140
    },
    {
      "epoch": 1.3288262239465596,
      "grad_norm": 3.046875,
      "learning_rate": 3.094776357703062e-05,
      "loss": 0.8857,
      "step": 379150
    },
    {
      "epoch": 1.3288612714534551,
      "grad_norm": 3.0,
      "learning_rate": 3.094711454836692e-05,
      "loss": 0.9302,
      "step": 379160
    },
    {
      "epoch": 1.3288963189603509,
      "grad_norm": 3.140625,
      "learning_rate": 3.0946465519703214e-05,
      "loss": 0.8323,
      "step": 379170
    },
    {
      "epoch": 1.3289313664672464,
      "grad_norm": 2.625,
      "learning_rate": 3.0945816491039516e-05,
      "loss": 0.9014,
      "step": 379180
    },
    {
      "epoch": 1.328966413974142,
      "grad_norm": 3.046875,
      "learning_rate": 3.094516746237581e-05,
      "loss": 0.8572,
      "step": 379190
    },
    {
      "epoch": 1.3290014614810375,
      "grad_norm": 2.765625,
      "learning_rate": 3.0944518433712105e-05,
      "loss": 0.8512,
      "step": 379200
    },
    {
      "epoch": 1.329036508987933,
      "grad_norm": 2.703125,
      "learning_rate": 3.0943869405048406e-05,
      "loss": 0.8625,
      "step": 379210
    },
    {
      "epoch": 1.3290715564948288,
      "grad_norm": 2.71875,
      "learning_rate": 3.09432203763847e-05,
      "loss": 0.8195,
      "step": 379220
    },
    {
      "epoch": 1.3291066040017243,
      "grad_norm": 2.65625,
      "learning_rate": 3.0942571347721e-05,
      "loss": 0.8118,
      "step": 379230
    },
    {
      "epoch": 1.3291416515086198,
      "grad_norm": 3.09375,
      "learning_rate": 3.09419223190573e-05,
      "loss": 0.9676,
      "step": 379240
    },
    {
      "epoch": 1.3291766990155156,
      "grad_norm": 3.234375,
      "learning_rate": 3.09412732903936e-05,
      "loss": 0.8191,
      "step": 379250
    },
    {
      "epoch": 1.3292117465224111,
      "grad_norm": 2.703125,
      "learning_rate": 3.094062426172989e-05,
      "loss": 0.8676,
      "step": 379260
    },
    {
      "epoch": 1.3292467940293067,
      "grad_norm": 2.640625,
      "learning_rate": 3.0939975233066194e-05,
      "loss": 0.8396,
      "step": 379270
    },
    {
      "epoch": 1.3292818415362024,
      "grad_norm": 3.203125,
      "learning_rate": 3.093932620440249e-05,
      "loss": 0.8704,
      "step": 379280
    },
    {
      "epoch": 1.329316889043098,
      "grad_norm": 3.28125,
      "learning_rate": 3.093867717573879e-05,
      "loss": 0.8917,
      "step": 379290
    },
    {
      "epoch": 1.3293519365499935,
      "grad_norm": 2.734375,
      "learning_rate": 3.0938028147075085e-05,
      "loss": 0.8984,
      "step": 379300
    },
    {
      "epoch": 1.329386984056889,
      "grad_norm": 3.125,
      "learning_rate": 3.0937379118411386e-05,
      "loss": 0.9096,
      "step": 379310
    },
    {
      "epoch": 1.3294220315637846,
      "grad_norm": 2.875,
      "learning_rate": 3.093673008974768e-05,
      "loss": 0.905,
      "step": 379320
    },
    {
      "epoch": 1.3294570790706803,
      "grad_norm": 3.671875,
      "learning_rate": 3.093608106108398e-05,
      "loss": 0.9426,
      "step": 379330
    },
    {
      "epoch": 1.3294921265775759,
      "grad_norm": 2.8125,
      "learning_rate": 3.0935432032420284e-05,
      "loss": 0.893,
      "step": 379340
    },
    {
      "epoch": 1.3295271740844714,
      "grad_norm": 2.984375,
      "learning_rate": 3.093478300375658e-05,
      "loss": 0.775,
      "step": 379350
    },
    {
      "epoch": 1.3295622215913672,
      "grad_norm": 2.390625,
      "learning_rate": 3.093413397509288e-05,
      "loss": 0.7821,
      "step": 379360
    },
    {
      "epoch": 1.3295972690982627,
      "grad_norm": 2.890625,
      "learning_rate": 3.0933484946429174e-05,
      "loss": 0.845,
      "step": 379370
    },
    {
      "epoch": 1.3296323166051582,
      "grad_norm": 2.625,
      "learning_rate": 3.0932835917765476e-05,
      "loss": 0.8425,
      "step": 379380
    },
    {
      "epoch": 1.329667364112054,
      "grad_norm": 2.765625,
      "learning_rate": 3.093218688910177e-05,
      "loss": 0.8502,
      "step": 379390
    },
    {
      "epoch": 1.3297024116189495,
      "grad_norm": 2.8125,
      "learning_rate": 3.093153786043807e-05,
      "loss": 0.8236,
      "step": 379400
    },
    {
      "epoch": 1.329737459125845,
      "grad_norm": 2.765625,
      "learning_rate": 3.0930888831774366e-05,
      "loss": 0.8075,
      "step": 379410
    },
    {
      "epoch": 1.3297725066327406,
      "grad_norm": 2.796875,
      "learning_rate": 3.093023980311067e-05,
      "loss": 0.9119,
      "step": 379420
    },
    {
      "epoch": 1.3298075541396361,
      "grad_norm": 2.828125,
      "learning_rate": 3.092959077444696e-05,
      "loss": 0.8113,
      "step": 379430
    },
    {
      "epoch": 1.329842601646532,
      "grad_norm": 2.953125,
      "learning_rate": 3.0928941745783264e-05,
      "loss": 0.9481,
      "step": 379440
    },
    {
      "epoch": 1.3298776491534274,
      "grad_norm": 2.890625,
      "learning_rate": 3.092829271711956e-05,
      "loss": 0.8416,
      "step": 379450
    },
    {
      "epoch": 1.329912696660323,
      "grad_norm": 2.796875,
      "learning_rate": 3.092764368845586e-05,
      "loss": 0.9284,
      "step": 379460
    },
    {
      "epoch": 1.3299477441672187,
      "grad_norm": 2.609375,
      "learning_rate": 3.092699465979216e-05,
      "loss": 0.8159,
      "step": 379470
    },
    {
      "epoch": 1.3299827916741143,
      "grad_norm": 2.703125,
      "learning_rate": 3.0926345631128456e-05,
      "loss": 0.8609,
      "step": 379480
    },
    {
      "epoch": 1.3300178391810098,
      "grad_norm": 2.671875,
      "learning_rate": 3.092569660246476e-05,
      "loss": 0.8042,
      "step": 379490
    },
    {
      "epoch": 1.3300528866879056,
      "grad_norm": 2.328125,
      "learning_rate": 3.092504757380105e-05,
      "loss": 0.8073,
      "step": 379500
    },
    {
      "epoch": 1.330087934194801,
      "grad_norm": 3.09375,
      "learning_rate": 3.092439854513735e-05,
      "loss": 0.8349,
      "step": 379510
    },
    {
      "epoch": 1.3301229817016966,
      "grad_norm": 3.328125,
      "learning_rate": 3.092374951647365e-05,
      "loss": 0.9286,
      "step": 379520
    },
    {
      "epoch": 1.3301580292085924,
      "grad_norm": 2.53125,
      "learning_rate": 3.092310048780995e-05,
      "loss": 0.8714,
      "step": 379530
    },
    {
      "epoch": 1.330193076715488,
      "grad_norm": 2.828125,
      "learning_rate": 3.0922451459146244e-05,
      "loss": 0.8627,
      "step": 379540
    },
    {
      "epoch": 1.3302281242223835,
      "grad_norm": 2.765625,
      "learning_rate": 3.0921802430482545e-05,
      "loss": 0.822,
      "step": 379550
    },
    {
      "epoch": 1.330263171729279,
      "grad_norm": 2.640625,
      "learning_rate": 3.092115340181884e-05,
      "loss": 0.809,
      "step": 379560
    },
    {
      "epoch": 1.3302982192361745,
      "grad_norm": 2.890625,
      "learning_rate": 3.092050437315514e-05,
      "loss": 0.8987,
      "step": 379570
    },
    {
      "epoch": 1.3303332667430703,
      "grad_norm": 2.625,
      "learning_rate": 3.0919855344491436e-05,
      "loss": 0.8503,
      "step": 379580
    },
    {
      "epoch": 1.3303683142499658,
      "grad_norm": 2.484375,
      "learning_rate": 3.091920631582773e-05,
      "loss": 0.8443,
      "step": 379590
    },
    {
      "epoch": 1.3304033617568614,
      "grad_norm": 2.421875,
      "learning_rate": 3.091855728716403e-05,
      "loss": 0.8353,
      "step": 379600
    },
    {
      "epoch": 1.3304384092637571,
      "grad_norm": 3.09375,
      "learning_rate": 3.0917908258500326e-05,
      "loss": 0.8902,
      "step": 379610
    },
    {
      "epoch": 1.3304734567706527,
      "grad_norm": 2.984375,
      "learning_rate": 3.091725922983663e-05,
      "loss": 0.8408,
      "step": 379620
    },
    {
      "epoch": 1.3305085042775482,
      "grad_norm": 3.015625,
      "learning_rate": 3.091661020117292e-05,
      "loss": 0.915,
      "step": 379630
    },
    {
      "epoch": 1.330543551784444,
      "grad_norm": 2.6875,
      "learning_rate": 3.0915961172509224e-05,
      "loss": 0.8185,
      "step": 379640
    },
    {
      "epoch": 1.3305785992913395,
      "grad_norm": 3.015625,
      "learning_rate": 3.091531214384552e-05,
      "loss": 0.7676,
      "step": 379650
    },
    {
      "epoch": 1.330613646798235,
      "grad_norm": 2.796875,
      "learning_rate": 3.091466311518182e-05,
      "loss": 0.8298,
      "step": 379660
    },
    {
      "epoch": 1.3306486943051306,
      "grad_norm": 2.71875,
      "learning_rate": 3.0914014086518114e-05,
      "loss": 0.8444,
      "step": 379670
    },
    {
      "epoch": 1.330683741812026,
      "grad_norm": 2.53125,
      "learning_rate": 3.0913365057854416e-05,
      "loss": 0.8031,
      "step": 379680
    },
    {
      "epoch": 1.3307187893189218,
      "grad_norm": 3.0625,
      "learning_rate": 3.091271602919071e-05,
      "loss": 0.8505,
      "step": 379690
    },
    {
      "epoch": 1.3307538368258174,
      "grad_norm": 3.21875,
      "learning_rate": 3.091206700052701e-05,
      "loss": 0.7928,
      "step": 379700
    },
    {
      "epoch": 1.330788884332713,
      "grad_norm": 3.0,
      "learning_rate": 3.091141797186331e-05,
      "loss": 0.853,
      "step": 379710
    },
    {
      "epoch": 1.3308239318396087,
      "grad_norm": 3.125,
      "learning_rate": 3.091076894319961e-05,
      "loss": 0.8682,
      "step": 379720
    },
    {
      "epoch": 1.3308589793465042,
      "grad_norm": 3.0,
      "learning_rate": 3.091011991453591e-05,
      "loss": 0.9166,
      "step": 379730
    },
    {
      "epoch": 1.3308940268533997,
      "grad_norm": 3.046875,
      "learning_rate": 3.0909470885872204e-05,
      "loss": 0.9118,
      "step": 379740
    },
    {
      "epoch": 1.3309290743602955,
      "grad_norm": 3.125,
      "learning_rate": 3.0908821857208505e-05,
      "loss": 0.7623,
      "step": 379750
    },
    {
      "epoch": 1.330964121867191,
      "grad_norm": 2.796875,
      "learning_rate": 3.09081728285448e-05,
      "loss": 0.885,
      "step": 379760
    },
    {
      "epoch": 1.3309991693740866,
      "grad_norm": 3.0,
      "learning_rate": 3.09075237998811e-05,
      "loss": 0.8599,
      "step": 379770
    },
    {
      "epoch": 1.3310342168809821,
      "grad_norm": 2.84375,
      "learning_rate": 3.0906874771217396e-05,
      "loss": 0.8974,
      "step": 379780
    },
    {
      "epoch": 1.3310692643878776,
      "grad_norm": 2.875,
      "learning_rate": 3.09062257425537e-05,
      "loss": 0.8665,
      "step": 379790
    },
    {
      "epoch": 1.3311043118947734,
      "grad_norm": 3.375,
      "learning_rate": 3.090557671388999e-05,
      "loss": 0.8232,
      "step": 379800
    },
    {
      "epoch": 1.331139359401669,
      "grad_norm": 2.765625,
      "learning_rate": 3.090492768522629e-05,
      "loss": 0.7819,
      "step": 379810
    },
    {
      "epoch": 1.3311744069085645,
      "grad_norm": 2.5,
      "learning_rate": 3.090427865656259e-05,
      "loss": 0.8329,
      "step": 379820
    },
    {
      "epoch": 1.3312094544154602,
      "grad_norm": 2.671875,
      "learning_rate": 3.090362962789889e-05,
      "loss": 0.9387,
      "step": 379830
    },
    {
      "epoch": 1.3312445019223558,
      "grad_norm": 3.40625,
      "learning_rate": 3.090298059923519e-05,
      "loss": 0.8443,
      "step": 379840
    },
    {
      "epoch": 1.3312795494292513,
      "grad_norm": 2.75,
      "learning_rate": 3.0902331570571485e-05,
      "loss": 0.8313,
      "step": 379850
    },
    {
      "epoch": 1.331314596936147,
      "grad_norm": 2.953125,
      "learning_rate": 3.0901682541907787e-05,
      "loss": 0.923,
      "step": 379860
    },
    {
      "epoch": 1.3313496444430426,
      "grad_norm": 2.53125,
      "learning_rate": 3.090103351324408e-05,
      "loss": 0.7711,
      "step": 379870
    },
    {
      "epoch": 1.3313846919499381,
      "grad_norm": 2.78125,
      "learning_rate": 3.090038448458038e-05,
      "loss": 0.9915,
      "step": 379880
    },
    {
      "epoch": 1.3314197394568337,
      "grad_norm": 2.359375,
      "learning_rate": 3.089973545591668e-05,
      "loss": 0.7205,
      "step": 379890
    },
    {
      "epoch": 1.3314547869637292,
      "grad_norm": 2.75,
      "learning_rate": 3.089908642725298e-05,
      "loss": 0.9343,
      "step": 379900
    },
    {
      "epoch": 1.331489834470625,
      "grad_norm": 4.09375,
      "learning_rate": 3.089843739858927e-05,
      "loss": 0.8082,
      "step": 379910
    },
    {
      "epoch": 1.3315248819775205,
      "grad_norm": 2.78125,
      "learning_rate": 3.0897788369925575e-05,
      "loss": 0.8606,
      "step": 379920
    },
    {
      "epoch": 1.331559929484416,
      "grad_norm": 3.203125,
      "learning_rate": 3.089713934126187e-05,
      "loss": 0.8458,
      "step": 379930
    },
    {
      "epoch": 1.3315949769913118,
      "grad_norm": 2.828125,
      "learning_rate": 3.089649031259817e-05,
      "loss": 0.8883,
      "step": 379940
    },
    {
      "epoch": 1.3316300244982073,
      "grad_norm": 2.875,
      "learning_rate": 3.0895841283934465e-05,
      "loss": 0.8738,
      "step": 379950
    },
    {
      "epoch": 1.3316650720051029,
      "grad_norm": 2.265625,
      "learning_rate": 3.089519225527076e-05,
      "loss": 0.8517,
      "step": 379960
    },
    {
      "epoch": 1.3317001195119986,
      "grad_norm": 2.75,
      "learning_rate": 3.089454322660706e-05,
      "loss": 0.8526,
      "step": 379970
    },
    {
      "epoch": 1.3317351670188942,
      "grad_norm": 2.734375,
      "learning_rate": 3.0893894197943356e-05,
      "loss": 0.8082,
      "step": 379980
    },
    {
      "epoch": 1.3317702145257897,
      "grad_norm": 3.125,
      "learning_rate": 3.089324516927966e-05,
      "loss": 0.9518,
      "step": 379990
    },
    {
      "epoch": 1.3318052620326852,
      "grad_norm": 2.796875,
      "learning_rate": 3.089259614061595e-05,
      "loss": 0.8165,
      "step": 380000
    },
    {
      "epoch": 1.3318052620326852,
      "eval_loss": 0.798753023147583,
      "eval_runtime": 560.1592,
      "eval_samples_per_second": 679.157,
      "eval_steps_per_second": 56.596,
      "step": 380000
    },
    {
      "epoch": 1.3318403095395808,
      "grad_norm": 2.515625,
      "learning_rate": 3.089194711195225e-05,
      "loss": 0.8825,
      "step": 380010
    },
    {
      "epoch": 1.3318753570464765,
      "grad_norm": 2.890625,
      "learning_rate": 3.089129808328855e-05,
      "loss": 0.8233,
      "step": 380020
    },
    {
      "epoch": 1.331910404553372,
      "grad_norm": 2.875,
      "learning_rate": 3.089064905462485e-05,
      "loss": 0.8264,
      "step": 380030
    },
    {
      "epoch": 1.3319454520602676,
      "grad_norm": 3.0625,
      "learning_rate": 3.0890000025961144e-05,
      "loss": 0.887,
      "step": 380040
    },
    {
      "epoch": 1.3319804995671634,
      "grad_norm": 2.6875,
      "learning_rate": 3.0889350997297445e-05,
      "loss": 0.8405,
      "step": 380050
    },
    {
      "epoch": 1.332015547074059,
      "grad_norm": 2.53125,
      "learning_rate": 3.088870196863374e-05,
      "loss": 0.8421,
      "step": 380060
    },
    {
      "epoch": 1.3320505945809544,
      "grad_norm": 2.828125,
      "learning_rate": 3.088805293997004e-05,
      "loss": 0.8612,
      "step": 380070
    },
    {
      "epoch": 1.3320856420878502,
      "grad_norm": 2.8125,
      "learning_rate": 3.088740391130634e-05,
      "loss": 0.8055,
      "step": 380080
    },
    {
      "epoch": 1.3321206895947457,
      "grad_norm": 2.71875,
      "learning_rate": 3.088675488264264e-05,
      "loss": 0.8114,
      "step": 380090
    },
    {
      "epoch": 1.3321557371016413,
      "grad_norm": 3.40625,
      "learning_rate": 3.088610585397894e-05,
      "loss": 0.9252,
      "step": 380100
    },
    {
      "epoch": 1.3321907846085368,
      "grad_norm": 2.9375,
      "learning_rate": 3.088545682531523e-05,
      "loss": 0.8266,
      "step": 380110
    },
    {
      "epoch": 1.3322258321154326,
      "grad_norm": 2.75,
      "learning_rate": 3.0884807796651535e-05,
      "loss": 0.8574,
      "step": 380120
    },
    {
      "epoch": 1.332260879622328,
      "grad_norm": 2.609375,
      "learning_rate": 3.088415876798783e-05,
      "loss": 0.7705,
      "step": 380130
    },
    {
      "epoch": 1.3322959271292236,
      "grad_norm": 2.90625,
      "learning_rate": 3.088350973932413e-05,
      "loss": 0.7718,
      "step": 380140
    },
    {
      "epoch": 1.3323309746361192,
      "grad_norm": 2.671875,
      "learning_rate": 3.0882860710660425e-05,
      "loss": 0.7518,
      "step": 380150
    },
    {
      "epoch": 1.332366022143015,
      "grad_norm": 2.96875,
      "learning_rate": 3.0882211681996727e-05,
      "loss": 0.7305,
      "step": 380160
    },
    {
      "epoch": 1.3324010696499105,
      "grad_norm": 2.65625,
      "learning_rate": 3.088156265333302e-05,
      "loss": 0.8636,
      "step": 380170
    },
    {
      "epoch": 1.332436117156806,
      "grad_norm": 2.953125,
      "learning_rate": 3.088091362466932e-05,
      "loss": 0.8721,
      "step": 380180
    },
    {
      "epoch": 1.3324711646637017,
      "grad_norm": 3.046875,
      "learning_rate": 3.088026459600562e-05,
      "loss": 0.8266,
      "step": 380190
    },
    {
      "epoch": 1.3325062121705973,
      "grad_norm": 3.15625,
      "learning_rate": 3.087961556734192e-05,
      "loss": 0.8653,
      "step": 380200
    },
    {
      "epoch": 1.3325412596774928,
      "grad_norm": 3.359375,
      "learning_rate": 3.087896653867822e-05,
      "loss": 0.8096,
      "step": 380210
    },
    {
      "epoch": 1.3325763071843886,
      "grad_norm": 2.40625,
      "learning_rate": 3.0878317510014515e-05,
      "loss": 0.8233,
      "step": 380220
    },
    {
      "epoch": 1.3326113546912841,
      "grad_norm": 2.8125,
      "learning_rate": 3.0877668481350816e-05,
      "loss": 0.8432,
      "step": 380230
    },
    {
      "epoch": 1.3326464021981796,
      "grad_norm": 2.8125,
      "learning_rate": 3.087701945268711e-05,
      "loss": 0.9196,
      "step": 380240
    },
    {
      "epoch": 1.3326814497050752,
      "grad_norm": 2.9375,
      "learning_rate": 3.087637042402341e-05,
      "loss": 0.8395,
      "step": 380250
    },
    {
      "epoch": 1.3327164972119707,
      "grad_norm": 2.703125,
      "learning_rate": 3.0875721395359707e-05,
      "loss": 0.9072,
      "step": 380260
    },
    {
      "epoch": 1.3327515447188665,
      "grad_norm": 2.5625,
      "learning_rate": 3.087507236669601e-05,
      "loss": 0.8151,
      "step": 380270
    },
    {
      "epoch": 1.332786592225762,
      "grad_norm": 3.359375,
      "learning_rate": 3.08744233380323e-05,
      "loss": 0.8274,
      "step": 380280
    },
    {
      "epoch": 1.3328216397326575,
      "grad_norm": 2.78125,
      "learning_rate": 3.0873774309368604e-05,
      "loss": 0.7736,
      "step": 380290
    },
    {
      "epoch": 1.3328566872395533,
      "grad_norm": 3.015625,
      "learning_rate": 3.08731252807049e-05,
      "loss": 0.8405,
      "step": 380300
    },
    {
      "epoch": 1.3328917347464488,
      "grad_norm": 2.484375,
      "learning_rate": 3.08724762520412e-05,
      "loss": 0.8383,
      "step": 380310
    },
    {
      "epoch": 1.3329267822533444,
      "grad_norm": 2.953125,
      "learning_rate": 3.0871827223377495e-05,
      "loss": 0.9782,
      "step": 380320
    },
    {
      "epoch": 1.3329618297602401,
      "grad_norm": 2.890625,
      "learning_rate": 3.087117819471379e-05,
      "loss": 0.852,
      "step": 380330
    },
    {
      "epoch": 1.3329968772671357,
      "grad_norm": 2.671875,
      "learning_rate": 3.087052916605009e-05,
      "loss": 0.7942,
      "step": 380340
    },
    {
      "epoch": 1.3330319247740312,
      "grad_norm": 3.109375,
      "learning_rate": 3.0869880137386385e-05,
      "loss": 0.9816,
      "step": 380350
    },
    {
      "epoch": 1.3330669722809267,
      "grad_norm": 2.890625,
      "learning_rate": 3.0869231108722687e-05,
      "loss": 0.9314,
      "step": 380360
    },
    {
      "epoch": 1.3331020197878223,
      "grad_norm": 2.59375,
      "learning_rate": 3.086858208005898e-05,
      "loss": 0.8408,
      "step": 380370
    },
    {
      "epoch": 1.333137067294718,
      "grad_norm": 2.6875,
      "learning_rate": 3.086793305139528e-05,
      "loss": 0.8366,
      "step": 380380
    },
    {
      "epoch": 1.3331721148016136,
      "grad_norm": 2.4375,
      "learning_rate": 3.086728402273158e-05,
      "loss": 0.8737,
      "step": 380390
    },
    {
      "epoch": 1.333207162308509,
      "grad_norm": 3.046875,
      "learning_rate": 3.086663499406788e-05,
      "loss": 0.8078,
      "step": 380400
    },
    {
      "epoch": 1.3332422098154049,
      "grad_norm": 2.65625,
      "learning_rate": 3.086598596540417e-05,
      "loss": 0.7859,
      "step": 380410
    },
    {
      "epoch": 1.3332772573223004,
      "grad_norm": 3.265625,
      "learning_rate": 3.0865336936740475e-05,
      "loss": 0.8497,
      "step": 380420
    },
    {
      "epoch": 1.333312304829196,
      "grad_norm": 2.78125,
      "learning_rate": 3.086468790807677e-05,
      "loss": 0.8502,
      "step": 380430
    },
    {
      "epoch": 1.3333473523360917,
      "grad_norm": 3.03125,
      "learning_rate": 3.086403887941307e-05,
      "loss": 0.7834,
      "step": 380440
    },
    {
      "epoch": 1.3333823998429872,
      "grad_norm": 2.734375,
      "learning_rate": 3.086338985074937e-05,
      "loss": 0.8198,
      "step": 380450
    },
    {
      "epoch": 1.3334174473498828,
      "grad_norm": 2.59375,
      "learning_rate": 3.0862740822085667e-05,
      "loss": 0.8863,
      "step": 380460
    },
    {
      "epoch": 1.3334524948567783,
      "grad_norm": 3.0,
      "learning_rate": 3.086209179342197e-05,
      "loss": 0.8264,
      "step": 380470
    },
    {
      "epoch": 1.3334875423636738,
      "grad_norm": 2.6875,
      "learning_rate": 3.086144276475826e-05,
      "loss": 0.8584,
      "step": 380480
    },
    {
      "epoch": 1.3335225898705696,
      "grad_norm": 3.1875,
      "learning_rate": 3.0860793736094564e-05,
      "loss": 0.9174,
      "step": 380490
    },
    {
      "epoch": 1.3335576373774651,
      "grad_norm": 2.890625,
      "learning_rate": 3.086014470743086e-05,
      "loss": 0.8354,
      "step": 380500
    },
    {
      "epoch": 1.3335926848843607,
      "grad_norm": 2.484375,
      "learning_rate": 3.085949567876716e-05,
      "loss": 0.9117,
      "step": 380510
    },
    {
      "epoch": 1.3336277323912564,
      "grad_norm": 3.03125,
      "learning_rate": 3.0858846650103455e-05,
      "loss": 0.8892,
      "step": 380520
    },
    {
      "epoch": 1.333662779898152,
      "grad_norm": 2.796875,
      "learning_rate": 3.0858197621439756e-05,
      "loss": 0.8864,
      "step": 380530
    },
    {
      "epoch": 1.3336978274050475,
      "grad_norm": 3.328125,
      "learning_rate": 3.085754859277605e-05,
      "loss": 0.8608,
      "step": 380540
    },
    {
      "epoch": 1.3337328749119433,
      "grad_norm": 2.609375,
      "learning_rate": 3.085689956411235e-05,
      "loss": 0.8457,
      "step": 380550
    },
    {
      "epoch": 1.3337679224188388,
      "grad_norm": 2.765625,
      "learning_rate": 3.0856250535448647e-05,
      "loss": 0.831,
      "step": 380560
    },
    {
      "epoch": 1.3338029699257343,
      "grad_norm": 2.625,
      "learning_rate": 3.085560150678495e-05,
      "loss": 0.9083,
      "step": 380570
    },
    {
      "epoch": 1.3338380174326299,
      "grad_norm": 3.375,
      "learning_rate": 3.085495247812125e-05,
      "loss": 0.9044,
      "step": 380580
    },
    {
      "epoch": 1.3338730649395254,
      "grad_norm": 2.9375,
      "learning_rate": 3.0854303449457544e-05,
      "loss": 0.875,
      "step": 380590
    },
    {
      "epoch": 1.3339081124464212,
      "grad_norm": 2.90625,
      "learning_rate": 3.0853654420793845e-05,
      "loss": 0.8238,
      "step": 380600
    },
    {
      "epoch": 1.3339431599533167,
      "grad_norm": 2.828125,
      "learning_rate": 3.085300539213014e-05,
      "loss": 0.9147,
      "step": 380610
    },
    {
      "epoch": 1.3339782074602122,
      "grad_norm": 3.234375,
      "learning_rate": 3.085235636346644e-05,
      "loss": 0.9109,
      "step": 380620
    },
    {
      "epoch": 1.334013254967108,
      "grad_norm": 3.09375,
      "learning_rate": 3.0851707334802736e-05,
      "loss": 0.8442,
      "step": 380630
    },
    {
      "epoch": 1.3340483024740035,
      "grad_norm": 2.984375,
      "learning_rate": 3.085105830613904e-05,
      "loss": 0.8535,
      "step": 380640
    },
    {
      "epoch": 1.334083349980899,
      "grad_norm": 2.9375,
      "learning_rate": 3.085040927747533e-05,
      "loss": 0.7628,
      "step": 380650
    },
    {
      "epoch": 1.3341183974877948,
      "grad_norm": 2.890625,
      "learning_rate": 3.084976024881163e-05,
      "loss": 0.8944,
      "step": 380660
    },
    {
      "epoch": 1.3341534449946904,
      "grad_norm": 2.453125,
      "learning_rate": 3.084911122014793e-05,
      "loss": 0.8154,
      "step": 380670
    },
    {
      "epoch": 1.3341884925015859,
      "grad_norm": 3.25,
      "learning_rate": 3.084846219148423e-05,
      "loss": 0.8835,
      "step": 380680
    },
    {
      "epoch": 1.3342235400084814,
      "grad_norm": 2.90625,
      "learning_rate": 3.0847813162820524e-05,
      "loss": 0.8308,
      "step": 380690
    },
    {
      "epoch": 1.334258587515377,
      "grad_norm": 2.96875,
      "learning_rate": 3.084716413415682e-05,
      "loss": 0.8593,
      "step": 380700
    },
    {
      "epoch": 1.3342936350222727,
      "grad_norm": 3.015625,
      "learning_rate": 3.084651510549312e-05,
      "loss": 0.9152,
      "step": 380710
    },
    {
      "epoch": 1.3343286825291683,
      "grad_norm": 2.828125,
      "learning_rate": 3.0845866076829415e-05,
      "loss": 0.9002,
      "step": 380720
    },
    {
      "epoch": 1.3343637300360638,
      "grad_norm": 2.875,
      "learning_rate": 3.0845217048165716e-05,
      "loss": 0.812,
      "step": 380730
    },
    {
      "epoch": 1.3343987775429595,
      "grad_norm": 2.875,
      "learning_rate": 3.084456801950201e-05,
      "loss": 0.8898,
      "step": 380740
    },
    {
      "epoch": 1.334433825049855,
      "grad_norm": 3.1875,
      "learning_rate": 3.084391899083831e-05,
      "loss": 0.8709,
      "step": 380750
    },
    {
      "epoch": 1.3344688725567506,
      "grad_norm": 2.65625,
      "learning_rate": 3.0843269962174607e-05,
      "loss": 0.7962,
      "step": 380760
    },
    {
      "epoch": 1.3345039200636464,
      "grad_norm": 3.0625,
      "learning_rate": 3.084262093351091e-05,
      "loss": 0.8632,
      "step": 380770
    },
    {
      "epoch": 1.334538967570542,
      "grad_norm": 2.953125,
      "learning_rate": 3.08419719048472e-05,
      "loss": 0.8438,
      "step": 380780
    },
    {
      "epoch": 1.3345740150774374,
      "grad_norm": 2.921875,
      "learning_rate": 3.0841322876183504e-05,
      "loss": 0.9325,
      "step": 380790
    },
    {
      "epoch": 1.334609062584333,
      "grad_norm": 3.09375,
      "learning_rate": 3.0840673847519805e-05,
      "loss": 0.765,
      "step": 380800
    },
    {
      "epoch": 1.3346441100912287,
      "grad_norm": 3.328125,
      "learning_rate": 3.08400248188561e-05,
      "loss": 0.8914,
      "step": 380810
    },
    {
      "epoch": 1.3346791575981243,
      "grad_norm": 2.375,
      "learning_rate": 3.08393757901924e-05,
      "loss": 0.8099,
      "step": 380820
    },
    {
      "epoch": 1.3347142051050198,
      "grad_norm": 3.234375,
      "learning_rate": 3.0838726761528696e-05,
      "loss": 0.8199,
      "step": 380830
    },
    {
      "epoch": 1.3347492526119153,
      "grad_norm": 2.671875,
      "learning_rate": 3.0838077732865e-05,
      "loss": 0.8422,
      "step": 380840
    },
    {
      "epoch": 1.334784300118811,
      "grad_norm": 2.90625,
      "learning_rate": 3.083742870420129e-05,
      "loss": 0.9308,
      "step": 380850
    },
    {
      "epoch": 1.3348193476257066,
      "grad_norm": 4.40625,
      "learning_rate": 3.083677967553759e-05,
      "loss": 0.9316,
      "step": 380860
    },
    {
      "epoch": 1.3348543951326022,
      "grad_norm": 3.125,
      "learning_rate": 3.083613064687389e-05,
      "loss": 0.9387,
      "step": 380870
    },
    {
      "epoch": 1.334889442639498,
      "grad_norm": 2.90625,
      "learning_rate": 3.083548161821019e-05,
      "loss": 0.8686,
      "step": 380880
    },
    {
      "epoch": 1.3349244901463935,
      "grad_norm": 2.8125,
      "learning_rate": 3.0834832589546484e-05,
      "loss": 0.8429,
      "step": 380890
    },
    {
      "epoch": 1.334959537653289,
      "grad_norm": 2.625,
      "learning_rate": 3.0834183560882785e-05,
      "loss": 0.8174,
      "step": 380900
    },
    {
      "epoch": 1.3349945851601848,
      "grad_norm": 3.15625,
      "learning_rate": 3.083353453221908e-05,
      "loss": 0.8511,
      "step": 380910
    },
    {
      "epoch": 1.3350296326670803,
      "grad_norm": 3.03125,
      "learning_rate": 3.083288550355538e-05,
      "loss": 0.8945,
      "step": 380920
    },
    {
      "epoch": 1.3350646801739758,
      "grad_norm": 2.703125,
      "learning_rate": 3.0832236474891676e-05,
      "loss": 0.8848,
      "step": 380930
    },
    {
      "epoch": 1.3350997276808714,
      "grad_norm": 3.09375,
      "learning_rate": 3.083158744622798e-05,
      "loss": 0.9237,
      "step": 380940
    },
    {
      "epoch": 1.335134775187767,
      "grad_norm": 2.484375,
      "learning_rate": 3.083093841756428e-05,
      "loss": 0.7982,
      "step": 380950
    },
    {
      "epoch": 1.3351698226946627,
      "grad_norm": 2.9375,
      "learning_rate": 3.083028938890057e-05,
      "loss": 0.9065,
      "step": 380960
    },
    {
      "epoch": 1.3352048702015582,
      "grad_norm": 2.625,
      "learning_rate": 3.0829640360236875e-05,
      "loss": 0.7815,
      "step": 380970
    },
    {
      "epoch": 1.3352399177084537,
      "grad_norm": 3.484375,
      "learning_rate": 3.082899133157317e-05,
      "loss": 0.9447,
      "step": 380980
    },
    {
      "epoch": 1.3352749652153495,
      "grad_norm": 3.0625,
      "learning_rate": 3.082834230290947e-05,
      "loss": 0.9082,
      "step": 380990
    },
    {
      "epoch": 1.335310012722245,
      "grad_norm": 2.53125,
      "learning_rate": 3.0827693274245765e-05,
      "loss": 0.8639,
      "step": 381000
    },
    {
      "epoch": 1.3353450602291406,
      "grad_norm": 3.109375,
      "learning_rate": 3.082704424558207e-05,
      "loss": 0.8924,
      "step": 381010
    },
    {
      "epoch": 1.3353801077360363,
      "grad_norm": 2.671875,
      "learning_rate": 3.082639521691836e-05,
      "loss": 0.8579,
      "step": 381020
    },
    {
      "epoch": 1.3354151552429319,
      "grad_norm": 2.640625,
      "learning_rate": 3.082574618825466e-05,
      "loss": 0.8493,
      "step": 381030
    },
    {
      "epoch": 1.3354502027498274,
      "grad_norm": 2.9375,
      "learning_rate": 3.082509715959096e-05,
      "loss": 0.8351,
      "step": 381040
    },
    {
      "epoch": 1.335485250256723,
      "grad_norm": 2.828125,
      "learning_rate": 3.082444813092726e-05,
      "loss": 0.8011,
      "step": 381050
    },
    {
      "epoch": 1.3355202977636185,
      "grad_norm": 3.265625,
      "learning_rate": 3.082379910226355e-05,
      "loss": 0.8589,
      "step": 381060
    },
    {
      "epoch": 1.3355553452705142,
      "grad_norm": 3.125,
      "learning_rate": 3.0823150073599855e-05,
      "loss": 0.8615,
      "step": 381070
    },
    {
      "epoch": 1.3355903927774098,
      "grad_norm": 3.296875,
      "learning_rate": 3.082250104493615e-05,
      "loss": 0.9199,
      "step": 381080
    },
    {
      "epoch": 1.3356254402843053,
      "grad_norm": 2.609375,
      "learning_rate": 3.0821852016272444e-05,
      "loss": 0.8484,
      "step": 381090
    },
    {
      "epoch": 1.335660487791201,
      "grad_norm": 2.734375,
      "learning_rate": 3.0821202987608745e-05,
      "loss": 0.8106,
      "step": 381100
    },
    {
      "epoch": 1.3356955352980966,
      "grad_norm": 2.765625,
      "learning_rate": 3.082055395894504e-05,
      "loss": 0.7988,
      "step": 381110
    },
    {
      "epoch": 1.3357305828049921,
      "grad_norm": 2.4375,
      "learning_rate": 3.081990493028134e-05,
      "loss": 0.8541,
      "step": 381120
    },
    {
      "epoch": 1.3357656303118879,
      "grad_norm": 2.953125,
      "learning_rate": 3.0819255901617636e-05,
      "loss": 0.7976,
      "step": 381130
    },
    {
      "epoch": 1.3358006778187834,
      "grad_norm": 3.421875,
      "learning_rate": 3.081860687295394e-05,
      "loss": 0.9147,
      "step": 381140
    },
    {
      "epoch": 1.335835725325679,
      "grad_norm": 3.40625,
      "learning_rate": 3.081795784429023e-05,
      "loss": 0.919,
      "step": 381150
    },
    {
      "epoch": 1.3358707728325745,
      "grad_norm": 2.875,
      "learning_rate": 3.081730881562653e-05,
      "loss": 0.8241,
      "step": 381160
    },
    {
      "epoch": 1.33590582033947,
      "grad_norm": 3.25,
      "learning_rate": 3.0816659786962835e-05,
      "loss": 0.7156,
      "step": 381170
    },
    {
      "epoch": 1.3359408678463658,
      "grad_norm": 3.4375,
      "learning_rate": 3.081601075829913e-05,
      "loss": 0.9333,
      "step": 381180
    },
    {
      "epoch": 1.3359759153532613,
      "grad_norm": 3.25,
      "learning_rate": 3.081536172963543e-05,
      "loss": 0.8639,
      "step": 381190
    },
    {
      "epoch": 1.3360109628601569,
      "grad_norm": 2.6875,
      "learning_rate": 3.0814712700971725e-05,
      "loss": 0.8471,
      "step": 381200
    },
    {
      "epoch": 1.3360460103670526,
      "grad_norm": 2.828125,
      "learning_rate": 3.081406367230803e-05,
      "loss": 0.8388,
      "step": 381210
    },
    {
      "epoch": 1.3360810578739482,
      "grad_norm": 2.515625,
      "learning_rate": 3.081341464364432e-05,
      "loss": 0.7912,
      "step": 381220
    },
    {
      "epoch": 1.3361161053808437,
      "grad_norm": 2.421875,
      "learning_rate": 3.081276561498062e-05,
      "loss": 0.8682,
      "step": 381230
    },
    {
      "epoch": 1.3361511528877394,
      "grad_norm": 3.125,
      "learning_rate": 3.081211658631692e-05,
      "loss": 0.9272,
      "step": 381240
    },
    {
      "epoch": 1.336186200394635,
      "grad_norm": 3.265625,
      "learning_rate": 3.081146755765322e-05,
      "loss": 0.917,
      "step": 381250
    },
    {
      "epoch": 1.3362212479015305,
      "grad_norm": 2.875,
      "learning_rate": 3.081081852898951e-05,
      "loss": 0.8307,
      "step": 381260
    },
    {
      "epoch": 1.336256295408426,
      "grad_norm": 3.046875,
      "learning_rate": 3.0810169500325815e-05,
      "loss": 0.9183,
      "step": 381270
    },
    {
      "epoch": 1.3362913429153216,
      "grad_norm": 2.65625,
      "learning_rate": 3.080952047166211e-05,
      "loss": 0.8383,
      "step": 381280
    },
    {
      "epoch": 1.3363263904222173,
      "grad_norm": 2.875,
      "learning_rate": 3.080887144299841e-05,
      "loss": 0.8233,
      "step": 381290
    },
    {
      "epoch": 1.3363614379291129,
      "grad_norm": 2.796875,
      "learning_rate": 3.0808222414334705e-05,
      "loss": 0.8326,
      "step": 381300
    },
    {
      "epoch": 1.3363964854360084,
      "grad_norm": 2.953125,
      "learning_rate": 3.080757338567101e-05,
      "loss": 0.9311,
      "step": 381310
    },
    {
      "epoch": 1.3364315329429042,
      "grad_norm": 2.765625,
      "learning_rate": 3.080692435700731e-05,
      "loss": 0.8165,
      "step": 381320
    },
    {
      "epoch": 1.3364665804497997,
      "grad_norm": 3.140625,
      "learning_rate": 3.08062753283436e-05,
      "loss": 0.8842,
      "step": 381330
    },
    {
      "epoch": 1.3365016279566952,
      "grad_norm": 2.90625,
      "learning_rate": 3.0805626299679904e-05,
      "loss": 0.8528,
      "step": 381340
    },
    {
      "epoch": 1.336536675463591,
      "grad_norm": 2.734375,
      "learning_rate": 3.08049772710162e-05,
      "loss": 0.8809,
      "step": 381350
    },
    {
      "epoch": 1.3365717229704865,
      "grad_norm": 2.453125,
      "learning_rate": 3.08043282423525e-05,
      "loss": 0.8545,
      "step": 381360
    },
    {
      "epoch": 1.336606770477382,
      "grad_norm": 2.4375,
      "learning_rate": 3.0803679213688795e-05,
      "loss": 0.7656,
      "step": 381370
    },
    {
      "epoch": 1.3366418179842776,
      "grad_norm": 2.9375,
      "learning_rate": 3.0803030185025096e-05,
      "loss": 0.8767,
      "step": 381380
    },
    {
      "epoch": 1.3366768654911731,
      "grad_norm": 2.703125,
      "learning_rate": 3.080238115636139e-05,
      "loss": 0.8157,
      "step": 381390
    },
    {
      "epoch": 1.336711912998069,
      "grad_norm": 2.75,
      "learning_rate": 3.080173212769769e-05,
      "loss": 0.8482,
      "step": 381400
    },
    {
      "epoch": 1.3367469605049644,
      "grad_norm": 2.625,
      "learning_rate": 3.080108309903399e-05,
      "loss": 0.8136,
      "step": 381410
    },
    {
      "epoch": 1.33678200801186,
      "grad_norm": 2.71875,
      "learning_rate": 3.080043407037029e-05,
      "loss": 0.8744,
      "step": 381420
    },
    {
      "epoch": 1.3368170555187557,
      "grad_norm": 3.046875,
      "learning_rate": 3.079978504170658e-05,
      "loss": 0.8028,
      "step": 381430
    },
    {
      "epoch": 1.3368521030256513,
      "grad_norm": 3.203125,
      "learning_rate": 3.0799136013042884e-05,
      "loss": 0.9148,
      "step": 381440
    },
    {
      "epoch": 1.3368871505325468,
      "grad_norm": 3.0,
      "learning_rate": 3.0798486984379186e-05,
      "loss": 0.8526,
      "step": 381450
    },
    {
      "epoch": 1.3369221980394426,
      "grad_norm": 3.0,
      "learning_rate": 3.079783795571547e-05,
      "loss": 0.836,
      "step": 381460
    },
    {
      "epoch": 1.336957245546338,
      "grad_norm": 3.203125,
      "learning_rate": 3.0797188927051775e-05,
      "loss": 0.9817,
      "step": 381470
    },
    {
      "epoch": 1.3369922930532336,
      "grad_norm": 2.5625,
      "learning_rate": 3.079653989838807e-05,
      "loss": 0.8549,
      "step": 381480
    },
    {
      "epoch": 1.3370273405601292,
      "grad_norm": 3.34375,
      "learning_rate": 3.079589086972437e-05,
      "loss": 0.8932,
      "step": 381490
    },
    {
      "epoch": 1.337062388067025,
      "grad_norm": 3.09375,
      "learning_rate": 3.0795241841060665e-05,
      "loss": 0.9793,
      "step": 381500
    },
    {
      "epoch": 1.3370974355739205,
      "grad_norm": 2.640625,
      "learning_rate": 3.079459281239697e-05,
      "loss": 0.8232,
      "step": 381510
    },
    {
      "epoch": 1.337132483080816,
      "grad_norm": 2.96875,
      "learning_rate": 3.079394378373326e-05,
      "loss": 0.9325,
      "step": 381520
    },
    {
      "epoch": 1.3371675305877115,
      "grad_norm": 2.734375,
      "learning_rate": 3.079329475506956e-05,
      "loss": 0.7755,
      "step": 381530
    },
    {
      "epoch": 1.3372025780946073,
      "grad_norm": 2.671875,
      "learning_rate": 3.0792645726405864e-05,
      "loss": 0.8066,
      "step": 381540
    },
    {
      "epoch": 1.3372376256015028,
      "grad_norm": 3.296875,
      "learning_rate": 3.079199669774216e-05,
      "loss": 0.7511,
      "step": 381550
    },
    {
      "epoch": 1.3372726731083984,
      "grad_norm": 2.859375,
      "learning_rate": 3.079134766907846e-05,
      "loss": 0.8938,
      "step": 381560
    },
    {
      "epoch": 1.3373077206152941,
      "grad_norm": 2.6875,
      "learning_rate": 3.0790698640414755e-05,
      "loss": 0.8684,
      "step": 381570
    },
    {
      "epoch": 1.3373427681221897,
      "grad_norm": 2.4375,
      "learning_rate": 3.0790049611751056e-05,
      "loss": 0.8339,
      "step": 381580
    },
    {
      "epoch": 1.3373778156290852,
      "grad_norm": 3.265625,
      "learning_rate": 3.078940058308735e-05,
      "loss": 0.7481,
      "step": 381590
    },
    {
      "epoch": 1.337412863135981,
      "grad_norm": 2.859375,
      "learning_rate": 3.078875155442365e-05,
      "loss": 0.8788,
      "step": 381600
    },
    {
      "epoch": 1.3374479106428765,
      "grad_norm": 3.125,
      "learning_rate": 3.078810252575995e-05,
      "loss": 0.859,
      "step": 381610
    },
    {
      "epoch": 1.337482958149772,
      "grad_norm": 3.25,
      "learning_rate": 3.078745349709625e-05,
      "loss": 0.7864,
      "step": 381620
    },
    {
      "epoch": 1.3375180056566676,
      "grad_norm": 2.953125,
      "learning_rate": 3.078680446843254e-05,
      "loss": 0.8917,
      "step": 381630
    },
    {
      "epoch": 1.337553053163563,
      "grad_norm": 2.921875,
      "learning_rate": 3.0786155439768844e-05,
      "loss": 0.9108,
      "step": 381640
    },
    {
      "epoch": 1.3375881006704589,
      "grad_norm": 3.0,
      "learning_rate": 3.078550641110514e-05,
      "loss": 0.8633,
      "step": 381650
    },
    {
      "epoch": 1.3376231481773544,
      "grad_norm": 2.828125,
      "learning_rate": 3.078485738244144e-05,
      "loss": 0.8176,
      "step": 381660
    },
    {
      "epoch": 1.33765819568425,
      "grad_norm": 3.328125,
      "learning_rate": 3.078420835377774e-05,
      "loss": 0.8468,
      "step": 381670
    },
    {
      "epoch": 1.3376932431911457,
      "grad_norm": 2.625,
      "learning_rate": 3.0783559325114036e-05,
      "loss": 0.7735,
      "step": 381680
    },
    {
      "epoch": 1.3377282906980412,
      "grad_norm": 2.34375,
      "learning_rate": 3.078291029645034e-05,
      "loss": 0.7843,
      "step": 381690
    },
    {
      "epoch": 1.3377633382049368,
      "grad_norm": 2.546875,
      "learning_rate": 3.078226126778663e-05,
      "loss": 0.8887,
      "step": 381700
    },
    {
      "epoch": 1.3377983857118325,
      "grad_norm": 2.90625,
      "learning_rate": 3.0781612239122934e-05,
      "loss": 0.893,
      "step": 381710
    },
    {
      "epoch": 1.337833433218728,
      "grad_norm": 2.84375,
      "learning_rate": 3.078096321045923e-05,
      "loss": 0.8344,
      "step": 381720
    },
    {
      "epoch": 1.3378684807256236,
      "grad_norm": 2.90625,
      "learning_rate": 3.078031418179553e-05,
      "loss": 0.8806,
      "step": 381730
    },
    {
      "epoch": 1.3379035282325191,
      "grad_norm": 2.875,
      "learning_rate": 3.0779665153131824e-05,
      "loss": 0.8181,
      "step": 381740
    },
    {
      "epoch": 1.3379385757394147,
      "grad_norm": 2.90625,
      "learning_rate": 3.0779016124468126e-05,
      "loss": 0.7728,
      "step": 381750
    },
    {
      "epoch": 1.3379736232463104,
      "grad_norm": 3.3125,
      "learning_rate": 3.077836709580442e-05,
      "loss": 0.8297,
      "step": 381760
    },
    {
      "epoch": 1.338008670753206,
      "grad_norm": 3.21875,
      "learning_rate": 3.077771806714072e-05,
      "loss": 0.8132,
      "step": 381770
    },
    {
      "epoch": 1.3380437182601015,
      "grad_norm": 2.828125,
      "learning_rate": 3.0777069038477016e-05,
      "loss": 0.8562,
      "step": 381780
    },
    {
      "epoch": 1.3380787657669972,
      "grad_norm": 3.484375,
      "learning_rate": 3.077642000981332e-05,
      "loss": 0.8114,
      "step": 381790
    },
    {
      "epoch": 1.3381138132738928,
      "grad_norm": 3.25,
      "learning_rate": 3.077577098114961e-05,
      "loss": 0.8544,
      "step": 381800
    },
    {
      "epoch": 1.3381488607807883,
      "grad_norm": 3.046875,
      "learning_rate": 3.0775121952485914e-05,
      "loss": 0.9133,
      "step": 381810
    },
    {
      "epoch": 1.338183908287684,
      "grad_norm": 2.875,
      "learning_rate": 3.0774472923822215e-05,
      "loss": 0.8144,
      "step": 381820
    },
    {
      "epoch": 1.3382189557945796,
      "grad_norm": 3.421875,
      "learning_rate": 3.07738238951585e-05,
      "loss": 0.8993,
      "step": 381830
    },
    {
      "epoch": 1.3382540033014751,
      "grad_norm": 3.015625,
      "learning_rate": 3.0773174866494804e-05,
      "loss": 0.824,
      "step": 381840
    },
    {
      "epoch": 1.3382890508083707,
      "grad_norm": 3.546875,
      "learning_rate": 3.07725258378311e-05,
      "loss": 0.9362,
      "step": 381850
    },
    {
      "epoch": 1.3383240983152662,
      "grad_norm": 2.828125,
      "learning_rate": 3.07718768091674e-05,
      "loss": 0.8445,
      "step": 381860
    },
    {
      "epoch": 1.338359145822162,
      "grad_norm": 2.921875,
      "learning_rate": 3.0771227780503695e-05,
      "loss": 0.852,
      "step": 381870
    },
    {
      "epoch": 1.3383941933290575,
      "grad_norm": 2.46875,
      "learning_rate": 3.0770578751839996e-05,
      "loss": 0.9156,
      "step": 381880
    },
    {
      "epoch": 1.338429240835953,
      "grad_norm": 2.953125,
      "learning_rate": 3.076992972317629e-05,
      "loss": 0.8684,
      "step": 381890
    },
    {
      "epoch": 1.3384642883428488,
      "grad_norm": 2.796875,
      "learning_rate": 3.076928069451259e-05,
      "loss": 0.8307,
      "step": 381900
    },
    {
      "epoch": 1.3384993358497443,
      "grad_norm": 2.625,
      "learning_rate": 3.0768631665848893e-05,
      "loss": 0.8045,
      "step": 381910
    },
    {
      "epoch": 1.3385343833566399,
      "grad_norm": 3.34375,
      "learning_rate": 3.076798263718519e-05,
      "loss": 0.9026,
      "step": 381920
    },
    {
      "epoch": 1.3385694308635356,
      "grad_norm": 2.359375,
      "learning_rate": 3.076733360852149e-05,
      "loss": 0.8541,
      "step": 381930
    },
    {
      "epoch": 1.3386044783704312,
      "grad_norm": 2.9375,
      "learning_rate": 3.0766684579857784e-05,
      "loss": 0.8076,
      "step": 381940
    },
    {
      "epoch": 1.3386395258773267,
      "grad_norm": 2.625,
      "learning_rate": 3.0766035551194085e-05,
      "loss": 0.8244,
      "step": 381950
    },
    {
      "epoch": 1.3386745733842222,
      "grad_norm": 2.828125,
      "learning_rate": 3.076538652253038e-05,
      "loss": 0.8951,
      "step": 381960
    },
    {
      "epoch": 1.3387096208911178,
      "grad_norm": 2.828125,
      "learning_rate": 3.076473749386668e-05,
      "loss": 0.8101,
      "step": 381970
    },
    {
      "epoch": 1.3387446683980135,
      "grad_norm": 2.75,
      "learning_rate": 3.0764088465202976e-05,
      "loss": 0.8048,
      "step": 381980
    },
    {
      "epoch": 1.338779715904909,
      "grad_norm": 2.6875,
      "learning_rate": 3.076343943653928e-05,
      "loss": 0.807,
      "step": 381990
    },
    {
      "epoch": 1.3388147634118046,
      "grad_norm": 2.59375,
      "learning_rate": 3.076279040787557e-05,
      "loss": 0.8205,
      "step": 382000
    },
    {
      "epoch": 1.3388498109187004,
      "grad_norm": 2.515625,
      "learning_rate": 3.0762141379211873e-05,
      "loss": 0.8101,
      "step": 382010
    },
    {
      "epoch": 1.338884858425596,
      "grad_norm": 2.65625,
      "learning_rate": 3.076149235054817e-05,
      "loss": 0.7169,
      "step": 382020
    },
    {
      "epoch": 1.3389199059324914,
      "grad_norm": 2.65625,
      "learning_rate": 3.076084332188447e-05,
      "loss": 0.8295,
      "step": 382030
    },
    {
      "epoch": 1.3389549534393872,
      "grad_norm": 3.0625,
      "learning_rate": 3.076019429322077e-05,
      "loss": 0.857,
      "step": 382040
    },
    {
      "epoch": 1.3389900009462827,
      "grad_norm": 2.96875,
      "learning_rate": 3.0759545264557065e-05,
      "loss": 0.8578,
      "step": 382050
    },
    {
      "epoch": 1.3390250484531783,
      "grad_norm": 3.671875,
      "learning_rate": 3.075889623589337e-05,
      "loss": 0.8763,
      "step": 382060
    },
    {
      "epoch": 1.3390600959600738,
      "grad_norm": 2.515625,
      "learning_rate": 3.075824720722966e-05,
      "loss": 0.7861,
      "step": 382070
    },
    {
      "epoch": 1.3390951434669693,
      "grad_norm": 2.734375,
      "learning_rate": 3.075759817856596e-05,
      "loss": 0.9107,
      "step": 382080
    },
    {
      "epoch": 1.339130190973865,
      "grad_norm": 2.96875,
      "learning_rate": 3.075694914990226e-05,
      "loss": 0.8621,
      "step": 382090
    },
    {
      "epoch": 1.3391652384807606,
      "grad_norm": 2.953125,
      "learning_rate": 3.075630012123856e-05,
      "loss": 0.8143,
      "step": 382100
    },
    {
      "epoch": 1.3392002859876562,
      "grad_norm": 2.859375,
      "learning_rate": 3.0755651092574853e-05,
      "loss": 0.7901,
      "step": 382110
    },
    {
      "epoch": 1.339235333494552,
      "grad_norm": 2.953125,
      "learning_rate": 3.0755002063911155e-05,
      "loss": 0.8897,
      "step": 382120
    },
    {
      "epoch": 1.3392703810014475,
      "grad_norm": 2.734375,
      "learning_rate": 3.075435303524745e-05,
      "loss": 0.8853,
      "step": 382130
    },
    {
      "epoch": 1.339305428508343,
      "grad_norm": 3.03125,
      "learning_rate": 3.075370400658375e-05,
      "loss": 0.8129,
      "step": 382140
    },
    {
      "epoch": 1.3393404760152388,
      "grad_norm": 3.234375,
      "learning_rate": 3.0753054977920045e-05,
      "loss": 0.8737,
      "step": 382150
    },
    {
      "epoch": 1.3393755235221343,
      "grad_norm": 3.1875,
      "learning_rate": 3.075240594925635e-05,
      "loss": 0.894,
      "step": 382160
    },
    {
      "epoch": 1.3394105710290298,
      "grad_norm": 3.140625,
      "learning_rate": 3.075175692059264e-05,
      "loss": 0.9222,
      "step": 382170
    },
    {
      "epoch": 1.3394456185359254,
      "grad_norm": 2.359375,
      "learning_rate": 3.075110789192894e-05,
      "loss": 0.8451,
      "step": 382180
    },
    {
      "epoch": 1.3394806660428211,
      "grad_norm": 2.859375,
      "learning_rate": 3.0750458863265244e-05,
      "loss": 0.8309,
      "step": 382190
    },
    {
      "epoch": 1.3395157135497167,
      "grad_norm": 2.921875,
      "learning_rate": 3.074980983460154e-05,
      "loss": 0.8892,
      "step": 382200
    },
    {
      "epoch": 1.3395507610566122,
      "grad_norm": 3.03125,
      "learning_rate": 3.0749160805937833e-05,
      "loss": 0.8029,
      "step": 382210
    },
    {
      "epoch": 1.3395858085635077,
      "grad_norm": 3.234375,
      "learning_rate": 3.074851177727413e-05,
      "loss": 0.8728,
      "step": 382220
    },
    {
      "epoch": 1.3396208560704035,
      "grad_norm": 2.796875,
      "learning_rate": 3.074786274861043e-05,
      "loss": 0.8386,
      "step": 382230
    },
    {
      "epoch": 1.339655903577299,
      "grad_norm": 3.171875,
      "learning_rate": 3.0747213719946724e-05,
      "loss": 0.8869,
      "step": 382240
    },
    {
      "epoch": 1.3396909510841946,
      "grad_norm": 3.0,
      "learning_rate": 3.0746564691283025e-05,
      "loss": 0.8883,
      "step": 382250
    },
    {
      "epoch": 1.3397259985910903,
      "grad_norm": 3.078125,
      "learning_rate": 3.074591566261932e-05,
      "loss": 0.8571,
      "step": 382260
    },
    {
      "epoch": 1.3397610460979859,
      "grad_norm": 3.03125,
      "learning_rate": 3.074526663395562e-05,
      "loss": 0.791,
      "step": 382270
    },
    {
      "epoch": 1.3397960936048814,
      "grad_norm": 3.140625,
      "learning_rate": 3.074461760529192e-05,
      "loss": 0.9135,
      "step": 382280
    },
    {
      "epoch": 1.3398311411117771,
      "grad_norm": 2.328125,
      "learning_rate": 3.074396857662822e-05,
      "loss": 0.8738,
      "step": 382290
    },
    {
      "epoch": 1.3398661886186727,
      "grad_norm": 3.109375,
      "learning_rate": 3.074331954796452e-05,
      "loss": 0.9062,
      "step": 382300
    },
    {
      "epoch": 1.3399012361255682,
      "grad_norm": 3.4375,
      "learning_rate": 3.0742670519300813e-05,
      "loss": 0.8753,
      "step": 382310
    },
    {
      "epoch": 1.3399362836324638,
      "grad_norm": 2.640625,
      "learning_rate": 3.0742021490637115e-05,
      "loss": 0.8516,
      "step": 382320
    },
    {
      "epoch": 1.3399713311393593,
      "grad_norm": 2.421875,
      "learning_rate": 3.074137246197341e-05,
      "loss": 0.852,
      "step": 382330
    },
    {
      "epoch": 1.340006378646255,
      "grad_norm": 2.6875,
      "learning_rate": 3.074072343330971e-05,
      "loss": 0.8248,
      "step": 382340
    },
    {
      "epoch": 1.3400414261531506,
      "grad_norm": 2.40625,
      "learning_rate": 3.0740074404646005e-05,
      "loss": 0.8523,
      "step": 382350
    },
    {
      "epoch": 1.3400764736600461,
      "grad_norm": 2.890625,
      "learning_rate": 3.073942537598231e-05,
      "loss": 0.8974,
      "step": 382360
    },
    {
      "epoch": 1.3401115211669419,
      "grad_norm": 2.78125,
      "learning_rate": 3.07387763473186e-05,
      "loss": 0.7562,
      "step": 382370
    },
    {
      "epoch": 1.3401465686738374,
      "grad_norm": 2.96875,
      "learning_rate": 3.07381273186549e-05,
      "loss": 0.8049,
      "step": 382380
    },
    {
      "epoch": 1.340181616180733,
      "grad_norm": 3.140625,
      "learning_rate": 3.07374782899912e-05,
      "loss": 0.8501,
      "step": 382390
    },
    {
      "epoch": 1.3402166636876287,
      "grad_norm": 2.859375,
      "learning_rate": 3.07368292613275e-05,
      "loss": 0.7589,
      "step": 382400
    },
    {
      "epoch": 1.3402517111945242,
      "grad_norm": 3.296875,
      "learning_rate": 3.07361802326638e-05,
      "loss": 0.8639,
      "step": 382410
    },
    {
      "epoch": 1.3402867587014198,
      "grad_norm": 2.671875,
      "learning_rate": 3.0735531204000095e-05,
      "loss": 0.7765,
      "step": 382420
    },
    {
      "epoch": 1.3403218062083153,
      "grad_norm": 3.0625,
      "learning_rate": 3.0734882175336396e-05,
      "loss": 0.7514,
      "step": 382430
    },
    {
      "epoch": 1.3403568537152108,
      "grad_norm": 2.265625,
      "learning_rate": 3.073423314667269e-05,
      "loss": 0.8214,
      "step": 382440
    },
    {
      "epoch": 1.3403919012221066,
      "grad_norm": 2.984375,
      "learning_rate": 3.073358411800899e-05,
      "loss": 0.8557,
      "step": 382450
    },
    {
      "epoch": 1.3404269487290021,
      "grad_norm": 3.53125,
      "learning_rate": 3.073293508934529e-05,
      "loss": 0.8313,
      "step": 382460
    },
    {
      "epoch": 1.3404619962358977,
      "grad_norm": 2.53125,
      "learning_rate": 3.073228606068159e-05,
      "loss": 0.7952,
      "step": 382470
    },
    {
      "epoch": 1.3404970437427934,
      "grad_norm": 2.984375,
      "learning_rate": 3.073163703201788e-05,
      "loss": 0.8815,
      "step": 382480
    },
    {
      "epoch": 1.340532091249689,
      "grad_norm": 3.078125,
      "learning_rate": 3.0730988003354184e-05,
      "loss": 0.877,
      "step": 382490
    },
    {
      "epoch": 1.3405671387565845,
      "grad_norm": 2.9375,
      "learning_rate": 3.073033897469048e-05,
      "loss": 0.8034,
      "step": 382500
    },
    {
      "epoch": 1.3406021862634803,
      "grad_norm": 2.546875,
      "learning_rate": 3.072968994602678e-05,
      "loss": 0.8321,
      "step": 382510
    },
    {
      "epoch": 1.3406372337703758,
      "grad_norm": 2.515625,
      "learning_rate": 3.0729040917363075e-05,
      "loss": 0.8189,
      "step": 382520
    },
    {
      "epoch": 1.3406722812772713,
      "grad_norm": 2.796875,
      "learning_rate": 3.0728391888699376e-05,
      "loss": 0.8273,
      "step": 382530
    },
    {
      "epoch": 1.3407073287841669,
      "grad_norm": 3.078125,
      "learning_rate": 3.072774286003567e-05,
      "loss": 0.949,
      "step": 382540
    },
    {
      "epoch": 1.3407423762910624,
      "grad_norm": 2.828125,
      "learning_rate": 3.072709383137197e-05,
      "loss": 0.8841,
      "step": 382550
    },
    {
      "epoch": 1.3407774237979582,
      "grad_norm": 2.59375,
      "learning_rate": 3.0726444802708274e-05,
      "loss": 0.8159,
      "step": 382560
    },
    {
      "epoch": 1.3408124713048537,
      "grad_norm": 2.59375,
      "learning_rate": 3.072579577404457e-05,
      "loss": 0.8441,
      "step": 382570
    },
    {
      "epoch": 1.3408475188117492,
      "grad_norm": 3.0625,
      "learning_rate": 3.072514674538087e-05,
      "loss": 0.8731,
      "step": 382580
    },
    {
      "epoch": 1.340882566318645,
      "grad_norm": 3.0625,
      "learning_rate": 3.072449771671716e-05,
      "loss": 0.7764,
      "step": 382590
    },
    {
      "epoch": 1.3409176138255405,
      "grad_norm": 3.21875,
      "learning_rate": 3.072384868805346e-05,
      "loss": 0.8847,
      "step": 382600
    },
    {
      "epoch": 1.340952661332436,
      "grad_norm": 2.890625,
      "learning_rate": 3.0723199659389753e-05,
      "loss": 0.8194,
      "step": 382610
    },
    {
      "epoch": 1.3409877088393318,
      "grad_norm": 2.796875,
      "learning_rate": 3.0722550630726055e-05,
      "loss": 0.821,
      "step": 382620
    },
    {
      "epoch": 1.3410227563462274,
      "grad_norm": 3.28125,
      "learning_rate": 3.0721901602062356e-05,
      "loss": 0.8301,
      "step": 382630
    },
    {
      "epoch": 1.341057803853123,
      "grad_norm": 2.9375,
      "learning_rate": 3.072125257339865e-05,
      "loss": 0.8597,
      "step": 382640
    },
    {
      "epoch": 1.3410928513600184,
      "grad_norm": 2.546875,
      "learning_rate": 3.072060354473495e-05,
      "loss": 0.9035,
      "step": 382650
    },
    {
      "epoch": 1.341127898866914,
      "grad_norm": 2.671875,
      "learning_rate": 3.071995451607125e-05,
      "loss": 0.8045,
      "step": 382660
    },
    {
      "epoch": 1.3411629463738097,
      "grad_norm": 2.53125,
      "learning_rate": 3.071930548740755e-05,
      "loss": 0.8076,
      "step": 382670
    },
    {
      "epoch": 1.3411979938807053,
      "grad_norm": 3.0625,
      "learning_rate": 3.071865645874384e-05,
      "loss": 0.8917,
      "step": 382680
    },
    {
      "epoch": 1.3412330413876008,
      "grad_norm": 3.15625,
      "learning_rate": 3.0718007430080144e-05,
      "loss": 0.865,
      "step": 382690
    },
    {
      "epoch": 1.3412680888944966,
      "grad_norm": 2.890625,
      "learning_rate": 3.071735840141644e-05,
      "loss": 0.8882,
      "step": 382700
    },
    {
      "epoch": 1.341303136401392,
      "grad_norm": 2.421875,
      "learning_rate": 3.071670937275274e-05,
      "loss": 0.8362,
      "step": 382710
    },
    {
      "epoch": 1.3413381839082876,
      "grad_norm": 2.75,
      "learning_rate": 3.0716060344089035e-05,
      "loss": 0.7972,
      "step": 382720
    },
    {
      "epoch": 1.3413732314151834,
      "grad_norm": 2.890625,
      "learning_rate": 3.0715411315425336e-05,
      "loss": 0.8628,
      "step": 382730
    },
    {
      "epoch": 1.341408278922079,
      "grad_norm": 3.15625,
      "learning_rate": 3.071476228676163e-05,
      "loss": 0.8931,
      "step": 382740
    },
    {
      "epoch": 1.3414433264289745,
      "grad_norm": 2.875,
      "learning_rate": 3.071411325809793e-05,
      "loss": 0.8147,
      "step": 382750
    },
    {
      "epoch": 1.34147837393587,
      "grad_norm": 3.453125,
      "learning_rate": 3.071346422943423e-05,
      "loss": 0.9127,
      "step": 382760
    },
    {
      "epoch": 1.3415134214427655,
      "grad_norm": 3.078125,
      "learning_rate": 3.071281520077053e-05,
      "loss": 0.8328,
      "step": 382770
    },
    {
      "epoch": 1.3415484689496613,
      "grad_norm": 2.546875,
      "learning_rate": 3.071216617210683e-05,
      "loss": 0.8111,
      "step": 382780
    },
    {
      "epoch": 1.3415835164565568,
      "grad_norm": 2.65625,
      "learning_rate": 3.0711517143443124e-05,
      "loss": 0.8682,
      "step": 382790
    },
    {
      "epoch": 1.3416185639634524,
      "grad_norm": 3.171875,
      "learning_rate": 3.0710868114779426e-05,
      "loss": 0.8397,
      "step": 382800
    },
    {
      "epoch": 1.3416536114703481,
      "grad_norm": 2.65625,
      "learning_rate": 3.071021908611572e-05,
      "loss": 0.8899,
      "step": 382810
    },
    {
      "epoch": 1.3416886589772437,
      "grad_norm": 2.953125,
      "learning_rate": 3.070957005745202e-05,
      "loss": 0.9436,
      "step": 382820
    },
    {
      "epoch": 1.3417237064841392,
      "grad_norm": 2.578125,
      "learning_rate": 3.0708921028788316e-05,
      "loss": 0.8973,
      "step": 382830
    },
    {
      "epoch": 1.341758753991035,
      "grad_norm": 3.203125,
      "learning_rate": 3.070827200012462e-05,
      "loss": 0.8303,
      "step": 382840
    },
    {
      "epoch": 1.3417938014979305,
      "grad_norm": 2.75,
      "learning_rate": 3.070762297146091e-05,
      "loss": 0.7854,
      "step": 382850
    },
    {
      "epoch": 1.341828849004826,
      "grad_norm": 3.109375,
      "learning_rate": 3.0706973942797214e-05,
      "loss": 0.8776,
      "step": 382860
    },
    {
      "epoch": 1.3418638965117218,
      "grad_norm": 2.46875,
      "learning_rate": 3.070632491413351e-05,
      "loss": 0.8614,
      "step": 382870
    },
    {
      "epoch": 1.3418989440186173,
      "grad_norm": 3.0,
      "learning_rate": 3.070567588546981e-05,
      "loss": 0.7906,
      "step": 382880
    },
    {
      "epoch": 1.3419339915255128,
      "grad_norm": 3.046875,
      "learning_rate": 3.0705026856806104e-05,
      "loss": 0.8434,
      "step": 382890
    },
    {
      "epoch": 1.3419690390324084,
      "grad_norm": 2.765625,
      "learning_rate": 3.0704377828142406e-05,
      "loss": 0.8564,
      "step": 382900
    },
    {
      "epoch": 1.342004086539304,
      "grad_norm": 3.375,
      "learning_rate": 3.070372879947871e-05,
      "loss": 0.7988,
      "step": 382910
    },
    {
      "epoch": 1.3420391340461997,
      "grad_norm": 2.4375,
      "learning_rate": 3.0703079770815e-05,
      "loss": 0.8021,
      "step": 382920
    },
    {
      "epoch": 1.3420741815530952,
      "grad_norm": 3.140625,
      "learning_rate": 3.07024307421513e-05,
      "loss": 0.8394,
      "step": 382930
    },
    {
      "epoch": 1.3421092290599907,
      "grad_norm": 2.765625,
      "learning_rate": 3.07017817134876e-05,
      "loss": 0.9051,
      "step": 382940
    },
    {
      "epoch": 1.3421442765668865,
      "grad_norm": 2.6875,
      "learning_rate": 3.07011326848239e-05,
      "loss": 0.8395,
      "step": 382950
    },
    {
      "epoch": 1.342179324073782,
      "grad_norm": 2.65625,
      "learning_rate": 3.070048365616019e-05,
      "loss": 0.8404,
      "step": 382960
    },
    {
      "epoch": 1.3422143715806776,
      "grad_norm": 2.984375,
      "learning_rate": 3.069983462749649e-05,
      "loss": 0.8666,
      "step": 382970
    },
    {
      "epoch": 1.3422494190875733,
      "grad_norm": 2.984375,
      "learning_rate": 3.069918559883278e-05,
      "loss": 0.8651,
      "step": 382980
    },
    {
      "epoch": 1.3422844665944689,
      "grad_norm": 2.84375,
      "learning_rate": 3.0698536570169084e-05,
      "loss": 0.8544,
      "step": 382990
    },
    {
      "epoch": 1.3423195141013644,
      "grad_norm": 2.921875,
      "learning_rate": 3.0697887541505386e-05,
      "loss": 0.8462,
      "step": 383000
    },
    {
      "epoch": 1.34235456160826,
      "grad_norm": 3.46875,
      "learning_rate": 3.069723851284168e-05,
      "loss": 0.7932,
      "step": 383010
    },
    {
      "epoch": 1.3423896091151555,
      "grad_norm": 3.25,
      "learning_rate": 3.069658948417798e-05,
      "loss": 0.8955,
      "step": 383020
    },
    {
      "epoch": 1.3424246566220512,
      "grad_norm": 3.265625,
      "learning_rate": 3.0695940455514276e-05,
      "loss": 0.8219,
      "step": 383030
    },
    {
      "epoch": 1.3424597041289468,
      "grad_norm": 2.921875,
      "learning_rate": 3.069529142685058e-05,
      "loss": 0.7864,
      "step": 383040
    },
    {
      "epoch": 1.3424947516358423,
      "grad_norm": 3.03125,
      "learning_rate": 3.069464239818687e-05,
      "loss": 0.8518,
      "step": 383050
    },
    {
      "epoch": 1.342529799142738,
      "grad_norm": 2.59375,
      "learning_rate": 3.0693993369523174e-05,
      "loss": 0.7285,
      "step": 383060
    },
    {
      "epoch": 1.3425648466496336,
      "grad_norm": 2.5625,
      "learning_rate": 3.069334434085947e-05,
      "loss": 0.8032,
      "step": 383070
    },
    {
      "epoch": 1.3425998941565291,
      "grad_norm": 2.84375,
      "learning_rate": 3.069269531219577e-05,
      "loss": 0.8926,
      "step": 383080
    },
    {
      "epoch": 1.342634941663425,
      "grad_norm": 2.90625,
      "learning_rate": 3.0692046283532064e-05,
      "loss": 0.8317,
      "step": 383090
    },
    {
      "epoch": 1.3426699891703204,
      "grad_norm": 3.171875,
      "learning_rate": 3.0691397254868366e-05,
      "loss": 0.8799,
      "step": 383100
    },
    {
      "epoch": 1.342705036677216,
      "grad_norm": 2.671875,
      "learning_rate": 3.069074822620466e-05,
      "loss": 0.8605,
      "step": 383110
    },
    {
      "epoch": 1.3427400841841115,
      "grad_norm": 2.46875,
      "learning_rate": 3.069009919754096e-05,
      "loss": 0.7888,
      "step": 383120
    },
    {
      "epoch": 1.342775131691007,
      "grad_norm": 2.75,
      "learning_rate": 3.0689450168877256e-05,
      "loss": 0.8514,
      "step": 383130
    },
    {
      "epoch": 1.3428101791979028,
      "grad_norm": 2.609375,
      "learning_rate": 3.068880114021356e-05,
      "loss": 0.8009,
      "step": 383140
    },
    {
      "epoch": 1.3428452267047983,
      "grad_norm": 3.421875,
      "learning_rate": 3.068815211154986e-05,
      "loss": 0.8221,
      "step": 383150
    },
    {
      "epoch": 1.3428802742116939,
      "grad_norm": 3.171875,
      "learning_rate": 3.0687503082886154e-05,
      "loss": 0.9174,
      "step": 383160
    },
    {
      "epoch": 1.3429153217185896,
      "grad_norm": 2.515625,
      "learning_rate": 3.0686854054222455e-05,
      "loss": 0.9098,
      "step": 383170
    },
    {
      "epoch": 1.3429503692254852,
      "grad_norm": 3.203125,
      "learning_rate": 3.068620502555875e-05,
      "loss": 0.8906,
      "step": 383180
    },
    {
      "epoch": 1.3429854167323807,
      "grad_norm": 2.78125,
      "learning_rate": 3.068555599689505e-05,
      "loss": 0.7729,
      "step": 383190
    },
    {
      "epoch": 1.3430204642392765,
      "grad_norm": 3.03125,
      "learning_rate": 3.0684906968231346e-05,
      "loss": 0.8388,
      "step": 383200
    },
    {
      "epoch": 1.343055511746172,
      "grad_norm": 2.6875,
      "learning_rate": 3.068425793956765e-05,
      "loss": 0.8012,
      "step": 383210
    },
    {
      "epoch": 1.3430905592530675,
      "grad_norm": 2.921875,
      "learning_rate": 3.068360891090394e-05,
      "loss": 0.8401,
      "step": 383220
    },
    {
      "epoch": 1.343125606759963,
      "grad_norm": 2.828125,
      "learning_rate": 3.068295988224024e-05,
      "loss": 0.9274,
      "step": 383230
    },
    {
      "epoch": 1.3431606542668586,
      "grad_norm": 3.28125,
      "learning_rate": 3.068231085357654e-05,
      "loss": 0.8716,
      "step": 383240
    },
    {
      "epoch": 1.3431957017737544,
      "grad_norm": 2.59375,
      "learning_rate": 3.068166182491284e-05,
      "loss": 0.8524,
      "step": 383250
    },
    {
      "epoch": 1.34323074928065,
      "grad_norm": 2.796875,
      "learning_rate": 3.0681012796249134e-05,
      "loss": 0.8527,
      "step": 383260
    },
    {
      "epoch": 1.3432657967875454,
      "grad_norm": 3.453125,
      "learning_rate": 3.0680363767585435e-05,
      "loss": 0.8492,
      "step": 383270
    },
    {
      "epoch": 1.3433008442944412,
      "grad_norm": 2.875,
      "learning_rate": 3.0679714738921736e-05,
      "loss": 0.8395,
      "step": 383280
    },
    {
      "epoch": 1.3433358918013367,
      "grad_norm": 3.34375,
      "learning_rate": 3.067906571025803e-05,
      "loss": 0.8511,
      "step": 383290
    },
    {
      "epoch": 1.3433709393082323,
      "grad_norm": 2.65625,
      "learning_rate": 3.067841668159433e-05,
      "loss": 0.8538,
      "step": 383300
    },
    {
      "epoch": 1.343405986815128,
      "grad_norm": 2.65625,
      "learning_rate": 3.067776765293063e-05,
      "loss": 0.8784,
      "step": 383310
    },
    {
      "epoch": 1.3434410343220236,
      "grad_norm": 2.828125,
      "learning_rate": 3.067711862426693e-05,
      "loss": 0.7654,
      "step": 383320
    },
    {
      "epoch": 1.343476081828919,
      "grad_norm": 3.125,
      "learning_rate": 3.067646959560322e-05,
      "loss": 0.9189,
      "step": 383330
    },
    {
      "epoch": 1.3435111293358146,
      "grad_norm": 2.578125,
      "learning_rate": 3.067582056693952e-05,
      "loss": 0.8112,
      "step": 383340
    },
    {
      "epoch": 1.3435461768427102,
      "grad_norm": 2.890625,
      "learning_rate": 3.067517153827581e-05,
      "loss": 0.8373,
      "step": 383350
    },
    {
      "epoch": 1.343581224349606,
      "grad_norm": 2.875,
      "learning_rate": 3.0674522509612114e-05,
      "loss": 0.8391,
      "step": 383360
    },
    {
      "epoch": 1.3436162718565015,
      "grad_norm": 4.0625,
      "learning_rate": 3.0673873480948415e-05,
      "loss": 0.8157,
      "step": 383370
    },
    {
      "epoch": 1.343651319363397,
      "grad_norm": 2.890625,
      "learning_rate": 3.067322445228471e-05,
      "loss": 0.8542,
      "step": 383380
    },
    {
      "epoch": 1.3436863668702927,
      "grad_norm": 2.625,
      "learning_rate": 3.067257542362101e-05,
      "loss": 0.8798,
      "step": 383390
    },
    {
      "epoch": 1.3437214143771883,
      "grad_norm": 2.90625,
      "learning_rate": 3.0671926394957306e-05,
      "loss": 0.8709,
      "step": 383400
    },
    {
      "epoch": 1.3437564618840838,
      "grad_norm": 3.40625,
      "learning_rate": 3.067127736629361e-05,
      "loss": 0.8247,
      "step": 383410
    },
    {
      "epoch": 1.3437915093909796,
      "grad_norm": 3.03125,
      "learning_rate": 3.06706283376299e-05,
      "loss": 0.8711,
      "step": 383420
    },
    {
      "epoch": 1.3438265568978751,
      "grad_norm": 2.484375,
      "learning_rate": 3.06699793089662e-05,
      "loss": 0.8344,
      "step": 383430
    },
    {
      "epoch": 1.3438616044047706,
      "grad_norm": 2.859375,
      "learning_rate": 3.06693302803025e-05,
      "loss": 0.8907,
      "step": 383440
    },
    {
      "epoch": 1.3438966519116662,
      "grad_norm": 3.0,
      "learning_rate": 3.06686812516388e-05,
      "loss": 0.7763,
      "step": 383450
    },
    {
      "epoch": 1.343931699418562,
      "grad_norm": 3.046875,
      "learning_rate": 3.0668032222975094e-05,
      "loss": 0.8627,
      "step": 383460
    },
    {
      "epoch": 1.3439667469254575,
      "grad_norm": 3.0625,
      "learning_rate": 3.0667383194311395e-05,
      "loss": 0.8221,
      "step": 383470
    },
    {
      "epoch": 1.344001794432353,
      "grad_norm": 2.96875,
      "learning_rate": 3.066673416564769e-05,
      "loss": 0.8309,
      "step": 383480
    },
    {
      "epoch": 1.3440368419392485,
      "grad_norm": 3.015625,
      "learning_rate": 3.066608513698399e-05,
      "loss": 0.795,
      "step": 383490
    },
    {
      "epoch": 1.3440718894461443,
      "grad_norm": 2.984375,
      "learning_rate": 3.0665436108320286e-05,
      "loss": 0.8141,
      "step": 383500
    },
    {
      "epoch": 1.3441069369530398,
      "grad_norm": 2.953125,
      "learning_rate": 3.066478707965659e-05,
      "loss": 0.8775,
      "step": 383510
    },
    {
      "epoch": 1.3441419844599354,
      "grad_norm": 3.4375,
      "learning_rate": 3.066413805099289e-05,
      "loss": 0.9083,
      "step": 383520
    },
    {
      "epoch": 1.3441770319668311,
      "grad_norm": 2.734375,
      "learning_rate": 3.066348902232918e-05,
      "loss": 0.8277,
      "step": 383530
    },
    {
      "epoch": 1.3442120794737267,
      "grad_norm": 3.265625,
      "learning_rate": 3.0662839993665484e-05,
      "loss": 0.8788,
      "step": 383540
    },
    {
      "epoch": 1.3442471269806222,
      "grad_norm": 3.25,
      "learning_rate": 3.066219096500178e-05,
      "loss": 0.8371,
      "step": 383550
    },
    {
      "epoch": 1.344282174487518,
      "grad_norm": 3.28125,
      "learning_rate": 3.066154193633808e-05,
      "loss": 0.8703,
      "step": 383560
    },
    {
      "epoch": 1.3443172219944135,
      "grad_norm": 2.953125,
      "learning_rate": 3.0660892907674375e-05,
      "loss": 0.7876,
      "step": 383570
    },
    {
      "epoch": 1.344352269501309,
      "grad_norm": 2.859375,
      "learning_rate": 3.0660243879010676e-05,
      "loss": 0.8167,
      "step": 383580
    },
    {
      "epoch": 1.3443873170082046,
      "grad_norm": 2.984375,
      "learning_rate": 3.065959485034697e-05,
      "loss": 0.8371,
      "step": 383590
    },
    {
      "epoch": 1.3444223645151,
      "grad_norm": 2.890625,
      "learning_rate": 3.065894582168327e-05,
      "loss": 0.8054,
      "step": 383600
    },
    {
      "epoch": 1.3444574120219959,
      "grad_norm": 3.359375,
      "learning_rate": 3.065829679301957e-05,
      "loss": 0.8213,
      "step": 383610
    },
    {
      "epoch": 1.3444924595288914,
      "grad_norm": 2.625,
      "learning_rate": 3.065764776435587e-05,
      "loss": 0.863,
      "step": 383620
    },
    {
      "epoch": 1.344527507035787,
      "grad_norm": 2.890625,
      "learning_rate": 3.065699873569216e-05,
      "loss": 0.8212,
      "step": 383630
    },
    {
      "epoch": 1.3445625545426827,
      "grad_norm": 2.765625,
      "learning_rate": 3.0656349707028464e-05,
      "loss": 0.866,
      "step": 383640
    },
    {
      "epoch": 1.3445976020495782,
      "grad_norm": 3.15625,
      "learning_rate": 3.0655700678364766e-05,
      "loss": 0.8276,
      "step": 383650
    },
    {
      "epoch": 1.3446326495564738,
      "grad_norm": 3.078125,
      "learning_rate": 3.065505164970106e-05,
      "loss": 0.7736,
      "step": 383660
    },
    {
      "epoch": 1.3446676970633695,
      "grad_norm": 3.0625,
      "learning_rate": 3.065440262103736e-05,
      "loss": 0.8871,
      "step": 383670
    },
    {
      "epoch": 1.344702744570265,
      "grad_norm": 2.828125,
      "learning_rate": 3.0653753592373656e-05,
      "loss": 0.8768,
      "step": 383680
    },
    {
      "epoch": 1.3447377920771606,
      "grad_norm": 2.890625,
      "learning_rate": 3.065310456370996e-05,
      "loss": 0.8669,
      "step": 383690
    },
    {
      "epoch": 1.3447728395840561,
      "grad_norm": 3.015625,
      "learning_rate": 3.065245553504625e-05,
      "loss": 0.9202,
      "step": 383700
    },
    {
      "epoch": 1.3448078870909517,
      "grad_norm": 2.640625,
      "learning_rate": 3.065180650638255e-05,
      "loss": 0.9168,
      "step": 383710
    },
    {
      "epoch": 1.3448429345978474,
      "grad_norm": 2.828125,
      "learning_rate": 3.065115747771884e-05,
      "loss": 0.8189,
      "step": 383720
    },
    {
      "epoch": 1.344877982104743,
      "grad_norm": 2.6875,
      "learning_rate": 3.065050844905514e-05,
      "loss": 0.9078,
      "step": 383730
    },
    {
      "epoch": 1.3449130296116385,
      "grad_norm": 2.46875,
      "learning_rate": 3.0649859420391444e-05,
      "loss": 0.7939,
      "step": 383740
    },
    {
      "epoch": 1.3449480771185343,
      "grad_norm": 2.8125,
      "learning_rate": 3.064921039172774e-05,
      "loss": 0.8422,
      "step": 383750
    },
    {
      "epoch": 1.3449831246254298,
      "grad_norm": 2.890625,
      "learning_rate": 3.064856136306404e-05,
      "loss": 0.8329,
      "step": 383760
    },
    {
      "epoch": 1.3450181721323253,
      "grad_norm": 2.890625,
      "learning_rate": 3.0647912334400335e-05,
      "loss": 0.7335,
      "step": 383770
    },
    {
      "epoch": 1.345053219639221,
      "grad_norm": 3.125,
      "learning_rate": 3.0647263305736636e-05,
      "loss": 0.9095,
      "step": 383780
    },
    {
      "epoch": 1.3450882671461166,
      "grad_norm": 2.859375,
      "learning_rate": 3.064661427707293e-05,
      "loss": 0.9159,
      "step": 383790
    },
    {
      "epoch": 1.3451233146530122,
      "grad_norm": 2.34375,
      "learning_rate": 3.064596524840923e-05,
      "loss": 0.8242,
      "step": 383800
    },
    {
      "epoch": 1.3451583621599077,
      "grad_norm": 3.546875,
      "learning_rate": 3.064531621974553e-05,
      "loss": 0.9765,
      "step": 383810
    },
    {
      "epoch": 1.3451934096668032,
      "grad_norm": 2.703125,
      "learning_rate": 3.064466719108183e-05,
      "loss": 0.8811,
      "step": 383820
    },
    {
      "epoch": 1.345228457173699,
      "grad_norm": 2.9375,
      "learning_rate": 3.064401816241812e-05,
      "loss": 0.8887,
      "step": 383830
    },
    {
      "epoch": 1.3452635046805945,
      "grad_norm": 1.984375,
      "learning_rate": 3.0643369133754424e-05,
      "loss": 0.7744,
      "step": 383840
    },
    {
      "epoch": 1.34529855218749,
      "grad_norm": 3.046875,
      "learning_rate": 3.064272010509072e-05,
      "loss": 0.8173,
      "step": 383850
    },
    {
      "epoch": 1.3453335996943858,
      "grad_norm": 3.03125,
      "learning_rate": 3.064207107642702e-05,
      "loss": 0.8431,
      "step": 383860
    },
    {
      "epoch": 1.3453686472012814,
      "grad_norm": 2.546875,
      "learning_rate": 3.064142204776332e-05,
      "loss": 0.7922,
      "step": 383870
    },
    {
      "epoch": 1.345403694708177,
      "grad_norm": 2.859375,
      "learning_rate": 3.0640773019099616e-05,
      "loss": 0.8726,
      "step": 383880
    },
    {
      "epoch": 1.3454387422150726,
      "grad_norm": 2.75,
      "learning_rate": 3.064012399043592e-05,
      "loss": 0.8338,
      "step": 383890
    },
    {
      "epoch": 1.3454737897219682,
      "grad_norm": 2.703125,
      "learning_rate": 3.063947496177221e-05,
      "loss": 0.9021,
      "step": 383900
    },
    {
      "epoch": 1.3455088372288637,
      "grad_norm": 2.5625,
      "learning_rate": 3.0638825933108514e-05,
      "loss": 0.7789,
      "step": 383910
    },
    {
      "epoch": 1.3455438847357593,
      "grad_norm": 3.15625,
      "learning_rate": 3.063817690444481e-05,
      "loss": 0.8797,
      "step": 383920
    },
    {
      "epoch": 1.3455789322426548,
      "grad_norm": 3.078125,
      "learning_rate": 3.063752787578111e-05,
      "loss": 0.891,
      "step": 383930
    },
    {
      "epoch": 1.3456139797495505,
      "grad_norm": 2.96875,
      "learning_rate": 3.0636878847117404e-05,
      "loss": 0.7989,
      "step": 383940
    },
    {
      "epoch": 1.345649027256446,
      "grad_norm": 3.46875,
      "learning_rate": 3.0636229818453706e-05,
      "loss": 0.9166,
      "step": 383950
    },
    {
      "epoch": 1.3456840747633416,
      "grad_norm": 2.96875,
      "learning_rate": 3.063558078979e-05,
      "loss": 0.8183,
      "step": 383960
    },
    {
      "epoch": 1.3457191222702374,
      "grad_norm": 3.0,
      "learning_rate": 3.06349317611263e-05,
      "loss": 0.8961,
      "step": 383970
    },
    {
      "epoch": 1.345754169777133,
      "grad_norm": 2.921875,
      "learning_rate": 3.0634282732462596e-05,
      "loss": 0.8661,
      "step": 383980
    },
    {
      "epoch": 1.3457892172840284,
      "grad_norm": 2.75,
      "learning_rate": 3.06336337037989e-05,
      "loss": 0.8311,
      "step": 383990
    },
    {
      "epoch": 1.3458242647909242,
      "grad_norm": 3.078125,
      "learning_rate": 3.063298467513519e-05,
      "loss": 0.859,
      "step": 384000
    },
    {
      "epoch": 1.3458593122978197,
      "grad_norm": 2.625,
      "learning_rate": 3.0632335646471494e-05,
      "loss": 0.8993,
      "step": 384010
    },
    {
      "epoch": 1.3458943598047153,
      "grad_norm": 2.6875,
      "learning_rate": 3.0631686617807795e-05,
      "loss": 0.7938,
      "step": 384020
    },
    {
      "epoch": 1.3459294073116108,
      "grad_norm": 2.8125,
      "learning_rate": 3.063103758914409e-05,
      "loss": 0.859,
      "step": 384030
    },
    {
      "epoch": 1.3459644548185064,
      "grad_norm": 3.078125,
      "learning_rate": 3.063038856048039e-05,
      "loss": 0.8619,
      "step": 384040
    },
    {
      "epoch": 1.345999502325402,
      "grad_norm": 2.671875,
      "learning_rate": 3.0629739531816686e-05,
      "loss": 0.8622,
      "step": 384050
    },
    {
      "epoch": 1.3460345498322976,
      "grad_norm": 2.453125,
      "learning_rate": 3.062909050315299e-05,
      "loss": 0.7684,
      "step": 384060
    },
    {
      "epoch": 1.3460695973391932,
      "grad_norm": 3.15625,
      "learning_rate": 3.062844147448928e-05,
      "loss": 0.8238,
      "step": 384070
    },
    {
      "epoch": 1.346104644846089,
      "grad_norm": 3.0,
      "learning_rate": 3.062779244582558e-05,
      "loss": 0.9388,
      "step": 384080
    },
    {
      "epoch": 1.3461396923529845,
      "grad_norm": 2.984375,
      "learning_rate": 3.062714341716187e-05,
      "loss": 0.7358,
      "step": 384090
    },
    {
      "epoch": 1.34617473985988,
      "grad_norm": 2.640625,
      "learning_rate": 3.062649438849817e-05,
      "loss": 0.8687,
      "step": 384100
    },
    {
      "epoch": 1.3462097873667758,
      "grad_norm": 2.65625,
      "learning_rate": 3.0625845359834474e-05,
      "loss": 0.8106,
      "step": 384110
    },
    {
      "epoch": 1.3462448348736713,
      "grad_norm": 2.640625,
      "learning_rate": 3.062519633117077e-05,
      "loss": 0.8738,
      "step": 384120
    },
    {
      "epoch": 1.3462798823805668,
      "grad_norm": 3.0625,
      "learning_rate": 3.062454730250707e-05,
      "loss": 0.8549,
      "step": 384130
    },
    {
      "epoch": 1.3463149298874624,
      "grad_norm": 2.890625,
      "learning_rate": 3.0623898273843364e-05,
      "loss": 0.8776,
      "step": 384140
    },
    {
      "epoch": 1.3463499773943581,
      "grad_norm": 3.265625,
      "learning_rate": 3.0623249245179666e-05,
      "loss": 0.8574,
      "step": 384150
    },
    {
      "epoch": 1.3463850249012537,
      "grad_norm": 3.390625,
      "learning_rate": 3.062260021651596e-05,
      "loss": 0.7757,
      "step": 384160
    },
    {
      "epoch": 1.3464200724081492,
      "grad_norm": 2.71875,
      "learning_rate": 3.062195118785226e-05,
      "loss": 0.7939,
      "step": 384170
    },
    {
      "epoch": 1.3464551199150447,
      "grad_norm": 2.96875,
      "learning_rate": 3.0621302159188556e-05,
      "loss": 0.8029,
      "step": 384180
    },
    {
      "epoch": 1.3464901674219405,
      "grad_norm": 2.953125,
      "learning_rate": 3.062065313052486e-05,
      "loss": 0.8233,
      "step": 384190
    },
    {
      "epoch": 1.346525214928836,
      "grad_norm": 3.625,
      "learning_rate": 3.062000410186115e-05,
      "loss": 0.8115,
      "step": 384200
    },
    {
      "epoch": 1.3465602624357316,
      "grad_norm": 3.0,
      "learning_rate": 3.0619355073197454e-05,
      "loss": 0.9426,
      "step": 384210
    },
    {
      "epoch": 1.3465953099426273,
      "grad_norm": 3.0,
      "learning_rate": 3.061870604453375e-05,
      "loss": 0.9153,
      "step": 384220
    },
    {
      "epoch": 1.3466303574495229,
      "grad_norm": 2.5,
      "learning_rate": 3.061805701587005e-05,
      "loss": 0.8364,
      "step": 384230
    },
    {
      "epoch": 1.3466654049564184,
      "grad_norm": 2.84375,
      "learning_rate": 3.061740798720635e-05,
      "loss": 0.8528,
      "step": 384240
    },
    {
      "epoch": 1.3467004524633142,
      "grad_norm": 2.609375,
      "learning_rate": 3.0616758958542646e-05,
      "loss": 0.9008,
      "step": 384250
    },
    {
      "epoch": 1.3467354999702097,
      "grad_norm": 2.796875,
      "learning_rate": 3.061610992987895e-05,
      "loss": 0.8893,
      "step": 384260
    },
    {
      "epoch": 1.3467705474771052,
      "grad_norm": 2.59375,
      "learning_rate": 3.061546090121524e-05,
      "loss": 0.8054,
      "step": 384270
    },
    {
      "epoch": 1.3468055949840008,
      "grad_norm": 3.0,
      "learning_rate": 3.061481187255154e-05,
      "loss": 0.8058,
      "step": 384280
    },
    {
      "epoch": 1.3468406424908963,
      "grad_norm": 3.15625,
      "learning_rate": 3.061416284388784e-05,
      "loss": 0.7751,
      "step": 384290
    },
    {
      "epoch": 1.346875689997792,
      "grad_norm": 2.375,
      "learning_rate": 3.061351381522414e-05,
      "loss": 0.8031,
      "step": 384300
    },
    {
      "epoch": 1.3469107375046876,
      "grad_norm": 2.59375,
      "learning_rate": 3.0612864786560434e-05,
      "loss": 0.7972,
      "step": 384310
    },
    {
      "epoch": 1.3469457850115831,
      "grad_norm": 3.296875,
      "learning_rate": 3.0612215757896735e-05,
      "loss": 0.9227,
      "step": 384320
    },
    {
      "epoch": 1.3469808325184789,
      "grad_norm": 3.0625,
      "learning_rate": 3.061156672923303e-05,
      "loss": 0.8591,
      "step": 384330
    },
    {
      "epoch": 1.3470158800253744,
      "grad_norm": 2.75,
      "learning_rate": 3.061091770056933e-05,
      "loss": 0.7628,
      "step": 384340
    },
    {
      "epoch": 1.34705092753227,
      "grad_norm": 3.078125,
      "learning_rate": 3.0610268671905626e-05,
      "loss": 0.8758,
      "step": 384350
    },
    {
      "epoch": 1.3470859750391657,
      "grad_norm": 3.125,
      "learning_rate": 3.060961964324193e-05,
      "loss": 0.71,
      "step": 384360
    },
    {
      "epoch": 1.3471210225460613,
      "grad_norm": 2.78125,
      "learning_rate": 3.060897061457822e-05,
      "loss": 0.796,
      "step": 384370
    },
    {
      "epoch": 1.3471560700529568,
      "grad_norm": 2.71875,
      "learning_rate": 3.060832158591452e-05,
      "loss": 0.9012,
      "step": 384380
    },
    {
      "epoch": 1.3471911175598523,
      "grad_norm": 2.5,
      "learning_rate": 3.0607672557250825e-05,
      "loss": 0.9358,
      "step": 384390
    },
    {
      "epoch": 1.3472261650667479,
      "grad_norm": 3.0625,
      "learning_rate": 3.060702352858712e-05,
      "loss": 0.8916,
      "step": 384400
    },
    {
      "epoch": 1.3472612125736436,
      "grad_norm": 2.46875,
      "learning_rate": 3.060637449992342e-05,
      "loss": 0.8662,
      "step": 384410
    },
    {
      "epoch": 1.3472962600805392,
      "grad_norm": 2.578125,
      "learning_rate": 3.0605725471259715e-05,
      "loss": 0.7978,
      "step": 384420
    },
    {
      "epoch": 1.3473313075874347,
      "grad_norm": 3.015625,
      "learning_rate": 3.0605076442596017e-05,
      "loss": 0.8073,
      "step": 384430
    },
    {
      "epoch": 1.3473663550943304,
      "grad_norm": 2.921875,
      "learning_rate": 3.060442741393231e-05,
      "loss": 0.9156,
      "step": 384440
    },
    {
      "epoch": 1.347401402601226,
      "grad_norm": 2.96875,
      "learning_rate": 3.060377838526861e-05,
      "loss": 0.885,
      "step": 384450
    },
    {
      "epoch": 1.3474364501081215,
      "grad_norm": 3.515625,
      "learning_rate": 3.060312935660491e-05,
      "loss": 0.8993,
      "step": 384460
    },
    {
      "epoch": 1.3474714976150173,
      "grad_norm": 3.0625,
      "learning_rate": 3.06024803279412e-05,
      "loss": 0.8473,
      "step": 384470
    },
    {
      "epoch": 1.3475065451219128,
      "grad_norm": 2.8125,
      "learning_rate": 3.06018312992775e-05,
      "loss": 0.8541,
      "step": 384480
    },
    {
      "epoch": 1.3475415926288083,
      "grad_norm": 2.625,
      "learning_rate": 3.06011822706138e-05,
      "loss": 0.7516,
      "step": 384490
    },
    {
      "epoch": 1.3475766401357039,
      "grad_norm": 2.953125,
      "learning_rate": 3.06005332419501e-05,
      "loss": 0.8716,
      "step": 384500
    },
    {
      "epoch": 1.3476116876425994,
      "grad_norm": 4.5,
      "learning_rate": 3.0599884213286394e-05,
      "loss": 0.7686,
      "step": 384510
    },
    {
      "epoch": 1.3476467351494952,
      "grad_norm": 3.0625,
      "learning_rate": 3.0599235184622695e-05,
      "loss": 0.9325,
      "step": 384520
    },
    {
      "epoch": 1.3476817826563907,
      "grad_norm": 3.390625,
      "learning_rate": 3.059858615595899e-05,
      "loss": 0.8036,
      "step": 384530
    },
    {
      "epoch": 1.3477168301632863,
      "grad_norm": 3.265625,
      "learning_rate": 3.059793712729529e-05,
      "loss": 0.9092,
      "step": 384540
    },
    {
      "epoch": 1.347751877670182,
      "grad_norm": 2.984375,
      "learning_rate": 3.0597288098631586e-05,
      "loss": 0.8558,
      "step": 384550
    },
    {
      "epoch": 1.3477869251770775,
      "grad_norm": 2.5625,
      "learning_rate": 3.059663906996789e-05,
      "loss": 0.804,
      "step": 384560
    },
    {
      "epoch": 1.347821972683973,
      "grad_norm": 2.328125,
      "learning_rate": 3.059599004130418e-05,
      "loss": 0.7841,
      "step": 384570
    },
    {
      "epoch": 1.3478570201908688,
      "grad_norm": 3.015625,
      "learning_rate": 3.059534101264048e-05,
      "loss": 0.8239,
      "step": 384580
    },
    {
      "epoch": 1.3478920676977644,
      "grad_norm": 2.875,
      "learning_rate": 3.059469198397678e-05,
      "loss": 0.8944,
      "step": 384590
    },
    {
      "epoch": 1.34792711520466,
      "grad_norm": 3.25,
      "learning_rate": 3.059404295531308e-05,
      "loss": 0.984,
      "step": 384600
    },
    {
      "epoch": 1.3479621627115554,
      "grad_norm": 3.109375,
      "learning_rate": 3.059339392664938e-05,
      "loss": 0.8128,
      "step": 384610
    },
    {
      "epoch": 1.347997210218451,
      "grad_norm": 3.015625,
      "learning_rate": 3.0592744897985675e-05,
      "loss": 0.7739,
      "step": 384620
    },
    {
      "epoch": 1.3480322577253467,
      "grad_norm": 3.3125,
      "learning_rate": 3.0592095869321977e-05,
      "loss": 0.8158,
      "step": 384630
    },
    {
      "epoch": 1.3480673052322423,
      "grad_norm": 3.046875,
      "learning_rate": 3.059144684065827e-05,
      "loss": 0.7904,
      "step": 384640
    },
    {
      "epoch": 1.3481023527391378,
      "grad_norm": 2.453125,
      "learning_rate": 3.059079781199457e-05,
      "loss": 0.8451,
      "step": 384650
    },
    {
      "epoch": 1.3481374002460336,
      "grad_norm": 2.84375,
      "learning_rate": 3.059014878333087e-05,
      "loss": 0.8984,
      "step": 384660
    },
    {
      "epoch": 1.348172447752929,
      "grad_norm": 2.875,
      "learning_rate": 3.058949975466717e-05,
      "loss": 0.762,
      "step": 384670
    },
    {
      "epoch": 1.3482074952598246,
      "grad_norm": 2.546875,
      "learning_rate": 3.058885072600346e-05,
      "loss": 0.8509,
      "step": 384680
    },
    {
      "epoch": 1.3482425427667204,
      "grad_norm": 2.75,
      "learning_rate": 3.0588201697339765e-05,
      "loss": 0.815,
      "step": 384690
    },
    {
      "epoch": 1.348277590273616,
      "grad_norm": 3.375,
      "learning_rate": 3.058755266867606e-05,
      "loss": 0.883,
      "step": 384700
    },
    {
      "epoch": 1.3483126377805115,
      "grad_norm": 2.9375,
      "learning_rate": 3.058690364001236e-05,
      "loss": 0.8342,
      "step": 384710
    },
    {
      "epoch": 1.348347685287407,
      "grad_norm": 3.28125,
      "learning_rate": 3.0586254611348655e-05,
      "loss": 0.797,
      "step": 384720
    },
    {
      "epoch": 1.3483827327943025,
      "grad_norm": 2.921875,
      "learning_rate": 3.0585605582684957e-05,
      "loss": 0.8114,
      "step": 384730
    },
    {
      "epoch": 1.3484177803011983,
      "grad_norm": 3.0,
      "learning_rate": 3.058495655402126e-05,
      "loss": 0.872,
      "step": 384740
    },
    {
      "epoch": 1.3484528278080938,
      "grad_norm": 2.953125,
      "learning_rate": 3.058430752535755e-05,
      "loss": 0.9247,
      "step": 384750
    },
    {
      "epoch": 1.3484878753149894,
      "grad_norm": 2.5625,
      "learning_rate": 3.0583658496693854e-05,
      "loss": 0.7515,
      "step": 384760
    },
    {
      "epoch": 1.3485229228218851,
      "grad_norm": 2.75,
      "learning_rate": 3.058300946803015e-05,
      "loss": 0.8598,
      "step": 384770
    },
    {
      "epoch": 1.3485579703287807,
      "grad_norm": 2.90625,
      "learning_rate": 3.058236043936645e-05,
      "loss": 0.8238,
      "step": 384780
    },
    {
      "epoch": 1.3485930178356762,
      "grad_norm": 3.03125,
      "learning_rate": 3.0581711410702745e-05,
      "loss": 0.8256,
      "step": 384790
    },
    {
      "epoch": 1.348628065342572,
      "grad_norm": 3.5,
      "learning_rate": 3.0581062382039046e-05,
      "loss": 0.9295,
      "step": 384800
    },
    {
      "epoch": 1.3486631128494675,
      "grad_norm": 2.875,
      "learning_rate": 3.058041335337534e-05,
      "loss": 0.8011,
      "step": 384810
    },
    {
      "epoch": 1.348698160356363,
      "grad_norm": 3.25,
      "learning_rate": 3.057976432471164e-05,
      "loss": 0.9022,
      "step": 384820
    },
    {
      "epoch": 1.3487332078632586,
      "grad_norm": 2.953125,
      "learning_rate": 3.0579115296047937e-05,
      "loss": 0.7777,
      "step": 384830
    },
    {
      "epoch": 1.3487682553701543,
      "grad_norm": 3.3125,
      "learning_rate": 3.057846626738423e-05,
      "loss": 0.8638,
      "step": 384840
    },
    {
      "epoch": 1.3488033028770499,
      "grad_norm": 3.234375,
      "learning_rate": 3.057781723872053e-05,
      "loss": 0.8341,
      "step": 384850
    },
    {
      "epoch": 1.3488383503839454,
      "grad_norm": 2.796875,
      "learning_rate": 3.057716821005683e-05,
      "loss": 0.8378,
      "step": 384860
    },
    {
      "epoch": 1.348873397890841,
      "grad_norm": 2.765625,
      "learning_rate": 3.057651918139313e-05,
      "loss": 0.8893,
      "step": 384870
    },
    {
      "epoch": 1.3489084453977367,
      "grad_norm": 3.203125,
      "learning_rate": 3.057587015272942e-05,
      "loss": 0.9071,
      "step": 384880
    },
    {
      "epoch": 1.3489434929046322,
      "grad_norm": 3.046875,
      "learning_rate": 3.0575221124065725e-05,
      "loss": 0.7599,
      "step": 384890
    },
    {
      "epoch": 1.3489785404115278,
      "grad_norm": 2.71875,
      "learning_rate": 3.057457209540202e-05,
      "loss": 0.8538,
      "step": 384900
    },
    {
      "epoch": 1.3490135879184235,
      "grad_norm": 2.8125,
      "learning_rate": 3.057392306673832e-05,
      "loss": 0.9304,
      "step": 384910
    },
    {
      "epoch": 1.349048635425319,
      "grad_norm": 2.34375,
      "learning_rate": 3.0573274038074615e-05,
      "loss": 0.8898,
      "step": 384920
    },
    {
      "epoch": 1.3490836829322146,
      "grad_norm": 3.0625,
      "learning_rate": 3.0572625009410917e-05,
      "loss": 0.7634,
      "step": 384930
    },
    {
      "epoch": 1.3491187304391103,
      "grad_norm": 3.015625,
      "learning_rate": 3.057197598074721e-05,
      "loss": 0.8159,
      "step": 384940
    },
    {
      "epoch": 1.3491537779460059,
      "grad_norm": 3.15625,
      "learning_rate": 3.057132695208351e-05,
      "loss": 0.8905,
      "step": 384950
    },
    {
      "epoch": 1.3491888254529014,
      "grad_norm": 3.0625,
      "learning_rate": 3.057067792341981e-05,
      "loss": 0.7707,
      "step": 384960
    },
    {
      "epoch": 1.349223872959797,
      "grad_norm": 3.015625,
      "learning_rate": 3.057002889475611e-05,
      "loss": 0.9117,
      "step": 384970
    },
    {
      "epoch": 1.3492589204666925,
      "grad_norm": 3.171875,
      "learning_rate": 3.056937986609241e-05,
      "loss": 0.8997,
      "step": 384980
    },
    {
      "epoch": 1.3492939679735882,
      "grad_norm": 2.96875,
      "learning_rate": 3.0568730837428705e-05,
      "loss": 0.8653,
      "step": 384990
    },
    {
      "epoch": 1.3493290154804838,
      "grad_norm": 3.34375,
      "learning_rate": 3.0568081808765006e-05,
      "loss": 0.8493,
      "step": 385000
    },
    {
      "epoch": 1.3493290154804838,
      "eval_loss": 0.7977620363235474,
      "eval_runtime": 557.7408,
      "eval_samples_per_second": 682.102,
      "eval_steps_per_second": 56.842,
      "step": 385000
    },
    {
      "epoch": 1.3493640629873793,
      "grad_norm": 2.640625,
      "learning_rate": 3.05674327801013e-05,
      "loss": 0.8565,
      "step": 385010
    },
    {
      "epoch": 1.349399110494275,
      "grad_norm": 2.484375,
      "learning_rate": 3.05667837514376e-05,
      "loss": 0.8261,
      "step": 385020
    },
    {
      "epoch": 1.3494341580011706,
      "grad_norm": 2.671875,
      "learning_rate": 3.0566134722773897e-05,
      "loss": 0.8502,
      "step": 385030
    },
    {
      "epoch": 1.3494692055080661,
      "grad_norm": 2.8125,
      "learning_rate": 3.05654856941102e-05,
      "loss": 0.7528,
      "step": 385040
    },
    {
      "epoch": 1.349504253014962,
      "grad_norm": 3.046875,
      "learning_rate": 3.056483666544649e-05,
      "loss": 0.8944,
      "step": 385050
    },
    {
      "epoch": 1.3495393005218574,
      "grad_norm": 3.5625,
      "learning_rate": 3.0564187636782794e-05,
      "loss": 0.8289,
      "step": 385060
    },
    {
      "epoch": 1.349574348028753,
      "grad_norm": 2.96875,
      "learning_rate": 3.056353860811909e-05,
      "loss": 0.767,
      "step": 385070
    },
    {
      "epoch": 1.3496093955356485,
      "grad_norm": 3.390625,
      "learning_rate": 3.056288957945539e-05,
      "loss": 0.8444,
      "step": 385080
    },
    {
      "epoch": 1.349644443042544,
      "grad_norm": 3.0,
      "learning_rate": 3.0562240550791685e-05,
      "loss": 0.871,
      "step": 385090
    },
    {
      "epoch": 1.3496794905494398,
      "grad_norm": 3.125,
      "learning_rate": 3.0561591522127986e-05,
      "loss": 0.8696,
      "step": 385100
    },
    {
      "epoch": 1.3497145380563353,
      "grad_norm": 2.796875,
      "learning_rate": 3.056094249346429e-05,
      "loss": 0.8974,
      "step": 385110
    },
    {
      "epoch": 1.3497495855632309,
      "grad_norm": 2.71875,
      "learning_rate": 3.056029346480058e-05,
      "loss": 0.8126,
      "step": 385120
    },
    {
      "epoch": 1.3497846330701266,
      "grad_norm": 2.484375,
      "learning_rate": 3.055964443613688e-05,
      "loss": 0.8525,
      "step": 385130
    },
    {
      "epoch": 1.3498196805770222,
      "grad_norm": 3.3125,
      "learning_rate": 3.055899540747318e-05,
      "loss": 0.8875,
      "step": 385140
    },
    {
      "epoch": 1.3498547280839177,
      "grad_norm": 3.265625,
      "learning_rate": 3.055834637880948e-05,
      "loss": 0.9307,
      "step": 385150
    },
    {
      "epoch": 1.3498897755908135,
      "grad_norm": 2.671875,
      "learning_rate": 3.0557697350145774e-05,
      "loss": 0.7691,
      "step": 385160
    },
    {
      "epoch": 1.349924823097709,
      "grad_norm": 2.828125,
      "learning_rate": 3.0557048321482075e-05,
      "loss": 0.9143,
      "step": 385170
    },
    {
      "epoch": 1.3499598706046045,
      "grad_norm": 3.046875,
      "learning_rate": 3.055639929281837e-05,
      "loss": 0.8538,
      "step": 385180
    },
    {
      "epoch": 1.3499949181115,
      "grad_norm": 2.890625,
      "learning_rate": 3.055575026415467e-05,
      "loss": 0.9025,
      "step": 385190
    },
    {
      "epoch": 1.3500299656183956,
      "grad_norm": 2.78125,
      "learning_rate": 3.0555101235490966e-05,
      "loss": 0.7534,
      "step": 385200
    },
    {
      "epoch": 1.3500650131252914,
      "grad_norm": 2.59375,
      "learning_rate": 3.055445220682727e-05,
      "loss": 0.8728,
      "step": 385210
    },
    {
      "epoch": 1.350100060632187,
      "grad_norm": 2.890625,
      "learning_rate": 3.055380317816356e-05,
      "loss": 0.8621,
      "step": 385220
    },
    {
      "epoch": 1.3501351081390824,
      "grad_norm": 2.796875,
      "learning_rate": 3.0553154149499857e-05,
      "loss": 0.8635,
      "step": 385230
    },
    {
      "epoch": 1.3501701556459782,
      "grad_norm": 2.953125,
      "learning_rate": 3.055250512083616e-05,
      "loss": 0.8611,
      "step": 385240
    },
    {
      "epoch": 1.3502052031528737,
      "grad_norm": 2.984375,
      "learning_rate": 3.055185609217245e-05,
      "loss": 0.8671,
      "step": 385250
    },
    {
      "epoch": 1.3502402506597693,
      "grad_norm": 2.53125,
      "learning_rate": 3.0551207063508754e-05,
      "loss": 0.7737,
      "step": 385260
    },
    {
      "epoch": 1.350275298166665,
      "grad_norm": 2.890625,
      "learning_rate": 3.055055803484505e-05,
      "loss": 0.8838,
      "step": 385270
    },
    {
      "epoch": 1.3503103456735606,
      "grad_norm": 2.78125,
      "learning_rate": 3.054990900618135e-05,
      "loss": 0.8892,
      "step": 385280
    },
    {
      "epoch": 1.350345393180456,
      "grad_norm": 3.203125,
      "learning_rate": 3.0549259977517645e-05,
      "loss": 0.8627,
      "step": 385290
    },
    {
      "epoch": 1.3503804406873516,
      "grad_norm": 3.03125,
      "learning_rate": 3.0548610948853946e-05,
      "loss": 0.8635,
      "step": 385300
    },
    {
      "epoch": 1.3504154881942472,
      "grad_norm": 2.84375,
      "learning_rate": 3.054796192019024e-05,
      "loss": 0.8005,
      "step": 385310
    },
    {
      "epoch": 1.350450535701143,
      "grad_norm": 3.125,
      "learning_rate": 3.054731289152654e-05,
      "loss": 0.8356,
      "step": 385320
    },
    {
      "epoch": 1.3504855832080385,
      "grad_norm": 3.265625,
      "learning_rate": 3.0546663862862837e-05,
      "loss": 0.8148,
      "step": 385330
    },
    {
      "epoch": 1.350520630714934,
      "grad_norm": 2.765625,
      "learning_rate": 3.054601483419914e-05,
      "loss": 0.9432,
      "step": 385340
    },
    {
      "epoch": 1.3505556782218298,
      "grad_norm": 2.796875,
      "learning_rate": 3.054536580553544e-05,
      "loss": 0.8442,
      "step": 385350
    },
    {
      "epoch": 1.3505907257287253,
      "grad_norm": 2.96875,
      "learning_rate": 3.0544716776871734e-05,
      "loss": 0.9458,
      "step": 385360
    },
    {
      "epoch": 1.3506257732356208,
      "grad_norm": 3.296875,
      "learning_rate": 3.0544067748208035e-05,
      "loss": 0.849,
      "step": 385370
    },
    {
      "epoch": 1.3506608207425166,
      "grad_norm": 2.5,
      "learning_rate": 3.054341871954433e-05,
      "loss": 0.8653,
      "step": 385380
    },
    {
      "epoch": 1.3506958682494121,
      "grad_norm": 2.671875,
      "learning_rate": 3.054276969088063e-05,
      "loss": 0.7939,
      "step": 385390
    },
    {
      "epoch": 1.3507309157563077,
      "grad_norm": 2.65625,
      "learning_rate": 3.0542120662216926e-05,
      "loss": 0.7404,
      "step": 385400
    },
    {
      "epoch": 1.3507659632632032,
      "grad_norm": 3.203125,
      "learning_rate": 3.054147163355323e-05,
      "loss": 0.893,
      "step": 385410
    },
    {
      "epoch": 1.3508010107700987,
      "grad_norm": 2.375,
      "learning_rate": 3.054082260488952e-05,
      "loss": 0.7599,
      "step": 385420
    },
    {
      "epoch": 1.3508360582769945,
      "grad_norm": 2.875,
      "learning_rate": 3.054017357622582e-05,
      "loss": 0.9134,
      "step": 385430
    },
    {
      "epoch": 1.35087110578389,
      "grad_norm": 2.875,
      "learning_rate": 3.053952454756212e-05,
      "loss": 0.8752,
      "step": 385440
    },
    {
      "epoch": 1.3509061532907856,
      "grad_norm": 3.109375,
      "learning_rate": 3.053887551889842e-05,
      "loss": 0.8694,
      "step": 385450
    },
    {
      "epoch": 1.3509412007976813,
      "grad_norm": 3.140625,
      "learning_rate": 3.0538226490234714e-05,
      "loss": 0.836,
      "step": 385460
    },
    {
      "epoch": 1.3509762483045769,
      "grad_norm": 2.5,
      "learning_rate": 3.0537577461571015e-05,
      "loss": 0.8416,
      "step": 385470
    },
    {
      "epoch": 1.3510112958114724,
      "grad_norm": 3.4375,
      "learning_rate": 3.053692843290732e-05,
      "loss": 0.8795,
      "step": 385480
    },
    {
      "epoch": 1.3510463433183681,
      "grad_norm": 3.125,
      "learning_rate": 3.053627940424361e-05,
      "loss": 0.9488,
      "step": 385490
    },
    {
      "epoch": 1.3510813908252637,
      "grad_norm": 2.859375,
      "learning_rate": 3.053563037557991e-05,
      "loss": 0.7844,
      "step": 385500
    },
    {
      "epoch": 1.3511164383321592,
      "grad_norm": 3.203125,
      "learning_rate": 3.053498134691621e-05,
      "loss": 0.8564,
      "step": 385510
    },
    {
      "epoch": 1.351151485839055,
      "grad_norm": 2.9375,
      "learning_rate": 3.053433231825251e-05,
      "loss": 0.8827,
      "step": 385520
    },
    {
      "epoch": 1.3511865333459505,
      "grad_norm": 2.75,
      "learning_rate": 3.05336832895888e-05,
      "loss": 0.8228,
      "step": 385530
    },
    {
      "epoch": 1.351221580852846,
      "grad_norm": 2.90625,
      "learning_rate": 3.0533034260925105e-05,
      "loss": 0.836,
      "step": 385540
    },
    {
      "epoch": 1.3512566283597416,
      "grad_norm": 2.90625,
      "learning_rate": 3.05323852322614e-05,
      "loss": 0.8272,
      "step": 385550
    },
    {
      "epoch": 1.3512916758666371,
      "grad_norm": 3.09375,
      "learning_rate": 3.05317362035977e-05,
      "loss": 0.8714,
      "step": 385560
    },
    {
      "epoch": 1.3513267233735329,
      "grad_norm": 2.34375,
      "learning_rate": 3.0531087174933995e-05,
      "loss": 0.8916,
      "step": 385570
    },
    {
      "epoch": 1.3513617708804284,
      "grad_norm": 2.859375,
      "learning_rate": 3.05304381462703e-05,
      "loss": 0.8356,
      "step": 385580
    },
    {
      "epoch": 1.351396818387324,
      "grad_norm": 2.390625,
      "learning_rate": 3.052978911760659e-05,
      "loss": 0.7554,
      "step": 385590
    },
    {
      "epoch": 1.3514318658942197,
      "grad_norm": 2.734375,
      "learning_rate": 3.0529140088942886e-05,
      "loss": 0.7377,
      "step": 385600
    },
    {
      "epoch": 1.3514669134011152,
      "grad_norm": 2.46875,
      "learning_rate": 3.052849106027919e-05,
      "loss": 0.8534,
      "step": 385610
    },
    {
      "epoch": 1.3515019609080108,
      "grad_norm": 3.265625,
      "learning_rate": 3.052784203161548e-05,
      "loss": 0.8906,
      "step": 385620
    },
    {
      "epoch": 1.3515370084149065,
      "grad_norm": 3.28125,
      "learning_rate": 3.052719300295178e-05,
      "loss": 0.8412,
      "step": 385630
    },
    {
      "epoch": 1.351572055921802,
      "grad_norm": 2.9375,
      "learning_rate": 3.052654397428808e-05,
      "loss": 0.8275,
      "step": 385640
    },
    {
      "epoch": 1.3516071034286976,
      "grad_norm": 3.125,
      "learning_rate": 3.052589494562438e-05,
      "loss": 0.8276,
      "step": 385650
    },
    {
      "epoch": 1.3516421509355931,
      "grad_norm": 2.96875,
      "learning_rate": 3.0525245916960674e-05,
      "loss": 0.8341,
      "step": 385660
    },
    {
      "epoch": 1.3516771984424887,
      "grad_norm": 3.265625,
      "learning_rate": 3.0524596888296975e-05,
      "loss": 0.8629,
      "step": 385670
    },
    {
      "epoch": 1.3517122459493844,
      "grad_norm": 2.609375,
      "learning_rate": 3.052394785963327e-05,
      "loss": 0.7945,
      "step": 385680
    },
    {
      "epoch": 1.35174729345628,
      "grad_norm": 2.96875,
      "learning_rate": 3.052329883096957e-05,
      "loss": 0.8503,
      "step": 385690
    },
    {
      "epoch": 1.3517823409631755,
      "grad_norm": 2.609375,
      "learning_rate": 3.052264980230587e-05,
      "loss": 0.8032,
      "step": 385700
    },
    {
      "epoch": 1.3518173884700713,
      "grad_norm": 2.484375,
      "learning_rate": 3.052200077364217e-05,
      "loss": 0.878,
      "step": 385710
    },
    {
      "epoch": 1.3518524359769668,
      "grad_norm": 2.953125,
      "learning_rate": 3.052135174497847e-05,
      "loss": 0.8909,
      "step": 385720
    },
    {
      "epoch": 1.3518874834838623,
      "grad_norm": 3.453125,
      "learning_rate": 3.052070271631476e-05,
      "loss": 0.8737,
      "step": 385730
    },
    {
      "epoch": 1.351922530990758,
      "grad_norm": 3.03125,
      "learning_rate": 3.0520053687651065e-05,
      "loss": 0.8702,
      "step": 385740
    },
    {
      "epoch": 1.3519575784976536,
      "grad_norm": 2.84375,
      "learning_rate": 3.051940465898736e-05,
      "loss": 0.8042,
      "step": 385750
    },
    {
      "epoch": 1.3519926260045492,
      "grad_norm": 3.078125,
      "learning_rate": 3.051875563032366e-05,
      "loss": 0.8631,
      "step": 385760
    },
    {
      "epoch": 1.3520276735114447,
      "grad_norm": 2.625,
      "learning_rate": 3.0518106601659955e-05,
      "loss": 0.8095,
      "step": 385770
    },
    {
      "epoch": 1.3520627210183402,
      "grad_norm": 2.515625,
      "learning_rate": 3.0517457572996257e-05,
      "loss": 0.8523,
      "step": 385780
    },
    {
      "epoch": 1.352097768525236,
      "grad_norm": 3.046875,
      "learning_rate": 3.0516808544332555e-05,
      "loss": 0.8264,
      "step": 385790
    },
    {
      "epoch": 1.3521328160321315,
      "grad_norm": 2.828125,
      "learning_rate": 3.0516159515668853e-05,
      "loss": 0.8393,
      "step": 385800
    },
    {
      "epoch": 1.352167863539027,
      "grad_norm": 2.78125,
      "learning_rate": 3.051551048700515e-05,
      "loss": 0.8234,
      "step": 385810
    },
    {
      "epoch": 1.3522029110459228,
      "grad_norm": 3.296875,
      "learning_rate": 3.051486145834145e-05,
      "loss": 0.7976,
      "step": 385820
    },
    {
      "epoch": 1.3522379585528184,
      "grad_norm": 2.484375,
      "learning_rate": 3.0514212429677747e-05,
      "loss": 0.8609,
      "step": 385830
    },
    {
      "epoch": 1.352273006059714,
      "grad_norm": 2.875,
      "learning_rate": 3.0513563401014045e-05,
      "loss": 0.7871,
      "step": 385840
    },
    {
      "epoch": 1.3523080535666097,
      "grad_norm": 3.109375,
      "learning_rate": 3.0512914372350343e-05,
      "loss": 0.8604,
      "step": 385850
    },
    {
      "epoch": 1.3523431010735052,
      "grad_norm": 2.28125,
      "learning_rate": 3.051226534368664e-05,
      "loss": 0.8007,
      "step": 385860
    },
    {
      "epoch": 1.3523781485804007,
      "grad_norm": 2.546875,
      "learning_rate": 3.051161631502294e-05,
      "loss": 0.9145,
      "step": 385870
    },
    {
      "epoch": 1.3524131960872963,
      "grad_norm": 2.734375,
      "learning_rate": 3.0510967286359237e-05,
      "loss": 0.8201,
      "step": 385880
    },
    {
      "epoch": 1.3524482435941918,
      "grad_norm": 2.953125,
      "learning_rate": 3.0510318257695535e-05,
      "loss": 0.7962,
      "step": 385890
    },
    {
      "epoch": 1.3524832911010876,
      "grad_norm": 2.609375,
      "learning_rate": 3.0509669229031833e-05,
      "loss": 0.7844,
      "step": 385900
    },
    {
      "epoch": 1.352518338607983,
      "grad_norm": 3.09375,
      "learning_rate": 3.0509020200368134e-05,
      "loss": 0.8502,
      "step": 385910
    },
    {
      "epoch": 1.3525533861148786,
      "grad_norm": 2.96875,
      "learning_rate": 3.0508371171704432e-05,
      "loss": 0.7313,
      "step": 385920
    },
    {
      "epoch": 1.3525884336217744,
      "grad_norm": 5.375,
      "learning_rate": 3.050772214304073e-05,
      "loss": 0.7969,
      "step": 385930
    },
    {
      "epoch": 1.35262348112867,
      "grad_norm": 2.703125,
      "learning_rate": 3.0507073114377028e-05,
      "loss": 0.7966,
      "step": 385940
    },
    {
      "epoch": 1.3526585286355655,
      "grad_norm": 2.703125,
      "learning_rate": 3.0506424085713326e-05,
      "loss": 0.8941,
      "step": 385950
    },
    {
      "epoch": 1.3526935761424612,
      "grad_norm": 2.703125,
      "learning_rate": 3.0505775057049624e-05,
      "loss": 0.7964,
      "step": 385960
    },
    {
      "epoch": 1.3527286236493568,
      "grad_norm": 2.515625,
      "learning_rate": 3.0505126028385915e-05,
      "loss": 0.8358,
      "step": 385970
    },
    {
      "epoch": 1.3527636711562523,
      "grad_norm": 2.953125,
      "learning_rate": 3.0504476999722213e-05,
      "loss": 0.922,
      "step": 385980
    },
    {
      "epoch": 1.3527987186631478,
      "grad_norm": 2.53125,
      "learning_rate": 3.050382797105851e-05,
      "loss": 0.7464,
      "step": 385990
    },
    {
      "epoch": 1.3528337661700434,
      "grad_norm": 2.890625,
      "learning_rate": 3.0503178942394813e-05,
      "loss": 0.7664,
      "step": 386000
    },
    {
      "epoch": 1.3528688136769391,
      "grad_norm": 3.21875,
      "learning_rate": 3.050252991373111e-05,
      "loss": 0.8441,
      "step": 386010
    },
    {
      "epoch": 1.3529038611838347,
      "grad_norm": 2.96875,
      "learning_rate": 3.050188088506741e-05,
      "loss": 0.9415,
      "step": 386020
    },
    {
      "epoch": 1.3529389086907302,
      "grad_norm": 2.296875,
      "learning_rate": 3.0501231856403707e-05,
      "loss": 0.8217,
      "step": 386030
    },
    {
      "epoch": 1.352973956197626,
      "grad_norm": 2.796875,
      "learning_rate": 3.0500582827740005e-05,
      "loss": 0.8303,
      "step": 386040
    },
    {
      "epoch": 1.3530090037045215,
      "grad_norm": 2.296875,
      "learning_rate": 3.0499933799076303e-05,
      "loss": 0.8553,
      "step": 386050
    },
    {
      "epoch": 1.353044051211417,
      "grad_norm": 7.3125,
      "learning_rate": 3.04992847704126e-05,
      "loss": 0.7994,
      "step": 386060
    },
    {
      "epoch": 1.3530790987183128,
      "grad_norm": 2.8125,
      "learning_rate": 3.04986357417489e-05,
      "loss": 0.8133,
      "step": 386070
    },
    {
      "epoch": 1.3531141462252083,
      "grad_norm": 2.546875,
      "learning_rate": 3.0497986713085197e-05,
      "loss": 0.8122,
      "step": 386080
    },
    {
      "epoch": 1.3531491937321038,
      "grad_norm": 2.3125,
      "learning_rate": 3.0497337684421495e-05,
      "loss": 0.809,
      "step": 386090
    },
    {
      "epoch": 1.3531842412389994,
      "grad_norm": 3.046875,
      "learning_rate": 3.0496688655757793e-05,
      "loss": 0.9527,
      "step": 386100
    },
    {
      "epoch": 1.3532192887458951,
      "grad_norm": 2.71875,
      "learning_rate": 3.049603962709409e-05,
      "loss": 0.7677,
      "step": 386110
    },
    {
      "epoch": 1.3532543362527907,
      "grad_norm": 2.640625,
      "learning_rate": 3.049539059843039e-05,
      "loss": 0.8398,
      "step": 386120
    },
    {
      "epoch": 1.3532893837596862,
      "grad_norm": 2.71875,
      "learning_rate": 3.0494741569766687e-05,
      "loss": 0.8232,
      "step": 386130
    },
    {
      "epoch": 1.3533244312665818,
      "grad_norm": 2.859375,
      "learning_rate": 3.0494092541102988e-05,
      "loss": 0.8139,
      "step": 386140
    },
    {
      "epoch": 1.3533594787734775,
      "grad_norm": 4.34375,
      "learning_rate": 3.0493443512439286e-05,
      "loss": 0.8918,
      "step": 386150
    },
    {
      "epoch": 1.353394526280373,
      "grad_norm": 2.890625,
      "learning_rate": 3.0492794483775584e-05,
      "loss": 0.9136,
      "step": 386160
    },
    {
      "epoch": 1.3534295737872686,
      "grad_norm": 2.6875,
      "learning_rate": 3.0492145455111882e-05,
      "loss": 0.8032,
      "step": 386170
    },
    {
      "epoch": 1.3534646212941643,
      "grad_norm": 2.375,
      "learning_rate": 3.049149642644818e-05,
      "loss": 0.8806,
      "step": 386180
    },
    {
      "epoch": 1.3534996688010599,
      "grad_norm": 2.84375,
      "learning_rate": 3.0490847397784478e-05,
      "loss": 0.9111,
      "step": 386190
    },
    {
      "epoch": 1.3535347163079554,
      "grad_norm": 2.859375,
      "learning_rate": 3.0490198369120776e-05,
      "loss": 0.8196,
      "step": 386200
    },
    {
      "epoch": 1.3535697638148512,
      "grad_norm": 2.59375,
      "learning_rate": 3.0489549340457074e-05,
      "loss": 0.813,
      "step": 386210
    },
    {
      "epoch": 1.3536048113217467,
      "grad_norm": 2.25,
      "learning_rate": 3.0488900311793372e-05,
      "loss": 0.8283,
      "step": 386220
    },
    {
      "epoch": 1.3536398588286422,
      "grad_norm": 2.578125,
      "learning_rate": 3.048825128312967e-05,
      "loss": 0.8311,
      "step": 386230
    },
    {
      "epoch": 1.3536749063355378,
      "grad_norm": 2.859375,
      "learning_rate": 3.0487602254465968e-05,
      "loss": 0.8458,
      "step": 386240
    },
    {
      "epoch": 1.3537099538424333,
      "grad_norm": 3.171875,
      "learning_rate": 3.0486953225802266e-05,
      "loss": 0.8707,
      "step": 386250
    },
    {
      "epoch": 1.353745001349329,
      "grad_norm": 3.046875,
      "learning_rate": 3.0486304197138564e-05,
      "loss": 0.9008,
      "step": 386260
    },
    {
      "epoch": 1.3537800488562246,
      "grad_norm": 2.65625,
      "learning_rate": 3.0485655168474862e-05,
      "loss": 0.7977,
      "step": 386270
    },
    {
      "epoch": 1.3538150963631201,
      "grad_norm": 2.640625,
      "learning_rate": 3.0485006139811163e-05,
      "loss": 0.8573,
      "step": 386280
    },
    {
      "epoch": 1.353850143870016,
      "grad_norm": 2.265625,
      "learning_rate": 3.048435711114746e-05,
      "loss": 0.8751,
      "step": 386290
    },
    {
      "epoch": 1.3538851913769114,
      "grad_norm": 2.90625,
      "learning_rate": 3.048370808248376e-05,
      "loss": 0.7802,
      "step": 386300
    },
    {
      "epoch": 1.353920238883807,
      "grad_norm": 3.234375,
      "learning_rate": 3.0483059053820057e-05,
      "loss": 0.8256,
      "step": 386310
    },
    {
      "epoch": 1.3539552863907027,
      "grad_norm": 2.546875,
      "learning_rate": 3.0482410025156355e-05,
      "loss": 0.9137,
      "step": 386320
    },
    {
      "epoch": 1.3539903338975983,
      "grad_norm": 3.125,
      "learning_rate": 3.0481760996492653e-05,
      "loss": 0.901,
      "step": 386330
    },
    {
      "epoch": 1.3540253814044938,
      "grad_norm": 3.09375,
      "learning_rate": 3.048111196782895e-05,
      "loss": 0.8562,
      "step": 386340
    },
    {
      "epoch": 1.3540604289113893,
      "grad_norm": 3.21875,
      "learning_rate": 3.0480462939165243e-05,
      "loss": 0.9123,
      "step": 386350
    },
    {
      "epoch": 1.3540954764182849,
      "grad_norm": 2.8125,
      "learning_rate": 3.047981391050154e-05,
      "loss": 0.8778,
      "step": 386360
    },
    {
      "epoch": 1.3541305239251806,
      "grad_norm": 2.890625,
      "learning_rate": 3.0479164881837842e-05,
      "loss": 0.8542,
      "step": 386370
    },
    {
      "epoch": 1.3541655714320762,
      "grad_norm": 2.890625,
      "learning_rate": 3.047851585317414e-05,
      "loss": 0.817,
      "step": 386380
    },
    {
      "epoch": 1.3542006189389717,
      "grad_norm": 3.09375,
      "learning_rate": 3.0477866824510438e-05,
      "loss": 0.8645,
      "step": 386390
    },
    {
      "epoch": 1.3542356664458675,
      "grad_norm": 3.296875,
      "learning_rate": 3.0477217795846736e-05,
      "loss": 0.9615,
      "step": 386400
    },
    {
      "epoch": 1.354270713952763,
      "grad_norm": 2.703125,
      "learning_rate": 3.0476568767183034e-05,
      "loss": 0.8916,
      "step": 386410
    },
    {
      "epoch": 1.3543057614596585,
      "grad_norm": 2.921875,
      "learning_rate": 3.0475919738519332e-05,
      "loss": 0.8806,
      "step": 386420
    },
    {
      "epoch": 1.3543408089665543,
      "grad_norm": 2.96875,
      "learning_rate": 3.047527070985563e-05,
      "loss": 0.8234,
      "step": 386430
    },
    {
      "epoch": 1.3543758564734498,
      "grad_norm": 3.0,
      "learning_rate": 3.0474621681191928e-05,
      "loss": 0.8864,
      "step": 386440
    },
    {
      "epoch": 1.3544109039803454,
      "grad_norm": 2.671875,
      "learning_rate": 3.0473972652528226e-05,
      "loss": 0.8345,
      "step": 386450
    },
    {
      "epoch": 1.354445951487241,
      "grad_norm": 3.078125,
      "learning_rate": 3.0473323623864524e-05,
      "loss": 0.9531,
      "step": 386460
    },
    {
      "epoch": 1.3544809989941364,
      "grad_norm": 2.90625,
      "learning_rate": 3.0472674595200822e-05,
      "loss": 0.8692,
      "step": 386470
    },
    {
      "epoch": 1.3545160465010322,
      "grad_norm": 2.796875,
      "learning_rate": 3.047202556653712e-05,
      "loss": 0.8935,
      "step": 386480
    },
    {
      "epoch": 1.3545510940079277,
      "grad_norm": 2.90625,
      "learning_rate": 3.0471376537873418e-05,
      "loss": 0.8198,
      "step": 386490
    },
    {
      "epoch": 1.3545861415148233,
      "grad_norm": 3.078125,
      "learning_rate": 3.0470727509209716e-05,
      "loss": 0.8645,
      "step": 386500
    },
    {
      "epoch": 1.354621189021719,
      "grad_norm": 3.265625,
      "learning_rate": 3.0470078480546017e-05,
      "loss": 0.9001,
      "step": 386510
    },
    {
      "epoch": 1.3546562365286146,
      "grad_norm": 2.9375,
      "learning_rate": 3.0469429451882315e-05,
      "loss": 0.8299,
      "step": 386520
    },
    {
      "epoch": 1.35469128403551,
      "grad_norm": 2.703125,
      "learning_rate": 3.0468780423218613e-05,
      "loss": 0.9009,
      "step": 386530
    },
    {
      "epoch": 1.3547263315424058,
      "grad_norm": 2.90625,
      "learning_rate": 3.046813139455491e-05,
      "loss": 0.8888,
      "step": 386540
    },
    {
      "epoch": 1.3547613790493014,
      "grad_norm": 2.703125,
      "learning_rate": 3.046748236589121e-05,
      "loss": 0.8651,
      "step": 386550
    },
    {
      "epoch": 1.354796426556197,
      "grad_norm": 3.40625,
      "learning_rate": 3.0466833337227507e-05,
      "loss": 0.9086,
      "step": 386560
    },
    {
      "epoch": 1.3548314740630925,
      "grad_norm": 2.796875,
      "learning_rate": 3.0466184308563805e-05,
      "loss": 0.7372,
      "step": 386570
    },
    {
      "epoch": 1.354866521569988,
      "grad_norm": 2.9375,
      "learning_rate": 3.0465535279900103e-05,
      "loss": 0.817,
      "step": 386580
    },
    {
      "epoch": 1.3549015690768837,
      "grad_norm": 2.6875,
      "learning_rate": 3.04648862512364e-05,
      "loss": 0.7969,
      "step": 386590
    },
    {
      "epoch": 1.3549366165837793,
      "grad_norm": 2.515625,
      "learning_rate": 3.04642372225727e-05,
      "loss": 0.8388,
      "step": 386600
    },
    {
      "epoch": 1.3549716640906748,
      "grad_norm": 2.953125,
      "learning_rate": 3.0463588193908997e-05,
      "loss": 0.8826,
      "step": 386610
    },
    {
      "epoch": 1.3550067115975706,
      "grad_norm": 3.015625,
      "learning_rate": 3.0462939165245295e-05,
      "loss": 0.9172,
      "step": 386620
    },
    {
      "epoch": 1.3550417591044661,
      "grad_norm": 2.90625,
      "learning_rate": 3.0462290136581593e-05,
      "loss": 0.9302,
      "step": 386630
    },
    {
      "epoch": 1.3550768066113617,
      "grad_norm": 2.90625,
      "learning_rate": 3.046164110791789e-05,
      "loss": 0.9349,
      "step": 386640
    },
    {
      "epoch": 1.3551118541182574,
      "grad_norm": 3.0625,
      "learning_rate": 3.0460992079254193e-05,
      "loss": 0.8648,
      "step": 386650
    },
    {
      "epoch": 1.355146901625153,
      "grad_norm": 3.203125,
      "learning_rate": 3.046034305059049e-05,
      "loss": 0.8121,
      "step": 386660
    },
    {
      "epoch": 1.3551819491320485,
      "grad_norm": 3.265625,
      "learning_rate": 3.045969402192679e-05,
      "loss": 0.868,
      "step": 386670
    },
    {
      "epoch": 1.355216996638944,
      "grad_norm": 2.53125,
      "learning_rate": 3.0459044993263087e-05,
      "loss": 0.8294,
      "step": 386680
    },
    {
      "epoch": 1.3552520441458396,
      "grad_norm": 2.9375,
      "learning_rate": 3.0458395964599385e-05,
      "loss": 0.9201,
      "step": 386690
    },
    {
      "epoch": 1.3552870916527353,
      "grad_norm": 2.625,
      "learning_rate": 3.0457746935935683e-05,
      "loss": 0.8429,
      "step": 386700
    },
    {
      "epoch": 1.3553221391596308,
      "grad_norm": 2.90625,
      "learning_rate": 3.045709790727198e-05,
      "loss": 0.8156,
      "step": 386710
    },
    {
      "epoch": 1.3553571866665264,
      "grad_norm": 3.390625,
      "learning_rate": 3.0456448878608272e-05,
      "loss": 0.8339,
      "step": 386720
    },
    {
      "epoch": 1.3553922341734221,
      "grad_norm": 3.1875,
      "learning_rate": 3.0455799849944573e-05,
      "loss": 0.8882,
      "step": 386730
    },
    {
      "epoch": 1.3554272816803177,
      "grad_norm": 3.15625,
      "learning_rate": 3.045515082128087e-05,
      "loss": 0.8063,
      "step": 386740
    },
    {
      "epoch": 1.3554623291872132,
      "grad_norm": 3.046875,
      "learning_rate": 3.045450179261717e-05,
      "loss": 0.9184,
      "step": 386750
    },
    {
      "epoch": 1.355497376694109,
      "grad_norm": 3.0625,
      "learning_rate": 3.0453852763953467e-05,
      "loss": 0.886,
      "step": 386760
    },
    {
      "epoch": 1.3555324242010045,
      "grad_norm": 3.0,
      "learning_rate": 3.0453203735289765e-05,
      "loss": 0.8201,
      "step": 386770
    },
    {
      "epoch": 1.3555674717079,
      "grad_norm": 2.921875,
      "learning_rate": 3.0452554706626063e-05,
      "loss": 0.8702,
      "step": 386780
    },
    {
      "epoch": 1.3556025192147956,
      "grad_norm": 3.046875,
      "learning_rate": 3.045190567796236e-05,
      "loss": 0.9066,
      "step": 386790
    },
    {
      "epoch": 1.3556375667216913,
      "grad_norm": 2.9375,
      "learning_rate": 3.045125664929866e-05,
      "loss": 0.8836,
      "step": 386800
    },
    {
      "epoch": 1.3556726142285869,
      "grad_norm": 2.9375,
      "learning_rate": 3.0450607620634957e-05,
      "loss": 0.8297,
      "step": 386810
    },
    {
      "epoch": 1.3557076617354824,
      "grad_norm": 2.859375,
      "learning_rate": 3.0449958591971255e-05,
      "loss": 0.8299,
      "step": 386820
    },
    {
      "epoch": 1.355742709242378,
      "grad_norm": 2.875,
      "learning_rate": 3.0449309563307553e-05,
      "loss": 0.8238,
      "step": 386830
    },
    {
      "epoch": 1.3557777567492737,
      "grad_norm": 2.96875,
      "learning_rate": 3.044866053464385e-05,
      "loss": 0.837,
      "step": 386840
    },
    {
      "epoch": 1.3558128042561692,
      "grad_norm": 3.109375,
      "learning_rate": 3.044801150598015e-05,
      "loss": 0.8953,
      "step": 386850
    },
    {
      "epoch": 1.3558478517630648,
      "grad_norm": 3.0,
      "learning_rate": 3.0447362477316447e-05,
      "loss": 0.8979,
      "step": 386860
    },
    {
      "epoch": 1.3558828992699605,
      "grad_norm": 3.09375,
      "learning_rate": 3.044671344865275e-05,
      "loss": 0.8136,
      "step": 386870
    },
    {
      "epoch": 1.355917946776856,
      "grad_norm": 3.390625,
      "learning_rate": 3.0446064419989047e-05,
      "loss": 0.8825,
      "step": 386880
    },
    {
      "epoch": 1.3559529942837516,
      "grad_norm": 3.125,
      "learning_rate": 3.0445415391325345e-05,
      "loss": 0.8767,
      "step": 386890
    },
    {
      "epoch": 1.3559880417906474,
      "grad_norm": 2.71875,
      "learning_rate": 3.0444766362661643e-05,
      "loss": 0.7849,
      "step": 386900
    },
    {
      "epoch": 1.356023089297543,
      "grad_norm": 3.078125,
      "learning_rate": 3.044411733399794e-05,
      "loss": 0.8098,
      "step": 386910
    },
    {
      "epoch": 1.3560581368044384,
      "grad_norm": 3.078125,
      "learning_rate": 3.044346830533424e-05,
      "loss": 0.8069,
      "step": 386920
    },
    {
      "epoch": 1.356093184311334,
      "grad_norm": 3.03125,
      "learning_rate": 3.0442819276670537e-05,
      "loss": 0.8366,
      "step": 386930
    },
    {
      "epoch": 1.3561282318182295,
      "grad_norm": 3.078125,
      "learning_rate": 3.0442170248006835e-05,
      "loss": 0.8811,
      "step": 386940
    },
    {
      "epoch": 1.3561632793251253,
      "grad_norm": 3.21875,
      "learning_rate": 3.0441521219343133e-05,
      "loss": 0.8423,
      "step": 386950
    },
    {
      "epoch": 1.3561983268320208,
      "grad_norm": 2.71875,
      "learning_rate": 3.044087219067943e-05,
      "loss": 0.8409,
      "step": 386960
    },
    {
      "epoch": 1.3562333743389163,
      "grad_norm": 3.15625,
      "learning_rate": 3.044022316201573e-05,
      "loss": 0.8742,
      "step": 386970
    },
    {
      "epoch": 1.356268421845812,
      "grad_norm": 2.625,
      "learning_rate": 3.0439574133352027e-05,
      "loss": 0.7387,
      "step": 386980
    },
    {
      "epoch": 1.3563034693527076,
      "grad_norm": 2.890625,
      "learning_rate": 3.0438925104688325e-05,
      "loss": 0.8365,
      "step": 386990
    },
    {
      "epoch": 1.3563385168596032,
      "grad_norm": 2.75,
      "learning_rate": 3.0438276076024623e-05,
      "loss": 0.9703,
      "step": 387000
    },
    {
      "epoch": 1.356373564366499,
      "grad_norm": 2.96875,
      "learning_rate": 3.0437627047360924e-05,
      "loss": 0.8052,
      "step": 387010
    },
    {
      "epoch": 1.3564086118733945,
      "grad_norm": 2.9375,
      "learning_rate": 3.0436978018697222e-05,
      "loss": 0.9009,
      "step": 387020
    },
    {
      "epoch": 1.35644365938029,
      "grad_norm": 2.96875,
      "learning_rate": 3.043632899003352e-05,
      "loss": 0.813,
      "step": 387030
    },
    {
      "epoch": 1.3564787068871855,
      "grad_norm": 2.953125,
      "learning_rate": 3.0435679961369818e-05,
      "loss": 0.8959,
      "step": 387040
    },
    {
      "epoch": 1.356513754394081,
      "grad_norm": 2.84375,
      "learning_rate": 3.0435030932706116e-05,
      "loss": 0.7857,
      "step": 387050
    },
    {
      "epoch": 1.3565488019009768,
      "grad_norm": 2.765625,
      "learning_rate": 3.0434381904042414e-05,
      "loss": 0.921,
      "step": 387060
    },
    {
      "epoch": 1.3565838494078724,
      "grad_norm": 2.734375,
      "learning_rate": 3.0433732875378712e-05,
      "loss": 0.8474,
      "step": 387070
    },
    {
      "epoch": 1.356618896914768,
      "grad_norm": 2.8125,
      "learning_rate": 3.043308384671501e-05,
      "loss": 0.8298,
      "step": 387080
    },
    {
      "epoch": 1.3566539444216636,
      "grad_norm": 2.71875,
      "learning_rate": 3.0432434818051308e-05,
      "loss": 0.8527,
      "step": 387090
    },
    {
      "epoch": 1.3566889919285592,
      "grad_norm": 2.765625,
      "learning_rate": 3.0431785789387603e-05,
      "loss": 0.9171,
      "step": 387100
    },
    {
      "epoch": 1.3567240394354547,
      "grad_norm": 2.96875,
      "learning_rate": 3.04311367607239e-05,
      "loss": 0.8213,
      "step": 387110
    },
    {
      "epoch": 1.3567590869423505,
      "grad_norm": 2.890625,
      "learning_rate": 3.04304877320602e-05,
      "loss": 0.8187,
      "step": 387120
    },
    {
      "epoch": 1.356794134449246,
      "grad_norm": 2.59375,
      "learning_rate": 3.0429838703396497e-05,
      "loss": 0.8961,
      "step": 387130
    },
    {
      "epoch": 1.3568291819561416,
      "grad_norm": 2.921875,
      "learning_rate": 3.0429189674732795e-05,
      "loss": 0.8772,
      "step": 387140
    },
    {
      "epoch": 1.356864229463037,
      "grad_norm": 2.59375,
      "learning_rate": 3.0428540646069093e-05,
      "loss": 0.7984,
      "step": 387150
    },
    {
      "epoch": 1.3568992769699326,
      "grad_norm": 2.96875,
      "learning_rate": 3.042789161740539e-05,
      "loss": 0.8485,
      "step": 387160
    },
    {
      "epoch": 1.3569343244768284,
      "grad_norm": 2.765625,
      "learning_rate": 3.042724258874169e-05,
      "loss": 0.8317,
      "step": 387170
    },
    {
      "epoch": 1.356969371983724,
      "grad_norm": 2.765625,
      "learning_rate": 3.0426593560077987e-05,
      "loss": 0.82,
      "step": 387180
    },
    {
      "epoch": 1.3570044194906195,
      "grad_norm": 2.828125,
      "learning_rate": 3.0425944531414285e-05,
      "loss": 0.8556,
      "step": 387190
    },
    {
      "epoch": 1.3570394669975152,
      "grad_norm": 2.65625,
      "learning_rate": 3.0425295502750583e-05,
      "loss": 0.8548,
      "step": 387200
    },
    {
      "epoch": 1.3570745145044107,
      "grad_norm": 2.734375,
      "learning_rate": 3.042464647408688e-05,
      "loss": 0.8477,
      "step": 387210
    },
    {
      "epoch": 1.3571095620113063,
      "grad_norm": 2.859375,
      "learning_rate": 3.042399744542318e-05,
      "loss": 0.8444,
      "step": 387220
    },
    {
      "epoch": 1.357144609518202,
      "grad_norm": 3.0,
      "learning_rate": 3.0423348416759477e-05,
      "loss": 0.8291,
      "step": 387230
    },
    {
      "epoch": 1.3571796570250976,
      "grad_norm": 2.65625,
      "learning_rate": 3.0422699388095778e-05,
      "loss": 0.8743,
      "step": 387240
    },
    {
      "epoch": 1.357214704531993,
      "grad_norm": 2.875,
      "learning_rate": 3.0422050359432076e-05,
      "loss": 0.7535,
      "step": 387250
    },
    {
      "epoch": 1.3572497520388886,
      "grad_norm": 2.8125,
      "learning_rate": 3.0421401330768374e-05,
      "loss": 0.9101,
      "step": 387260
    },
    {
      "epoch": 1.3572847995457842,
      "grad_norm": 3.34375,
      "learning_rate": 3.0420752302104672e-05,
      "loss": 0.909,
      "step": 387270
    },
    {
      "epoch": 1.35731984705268,
      "grad_norm": 2.421875,
      "learning_rate": 3.042010327344097e-05,
      "loss": 0.7905,
      "step": 387280
    },
    {
      "epoch": 1.3573548945595755,
      "grad_norm": 2.828125,
      "learning_rate": 3.0419454244777268e-05,
      "loss": 0.9211,
      "step": 387290
    },
    {
      "epoch": 1.357389942066471,
      "grad_norm": 3.0625,
      "learning_rate": 3.0418805216113566e-05,
      "loss": 0.8183,
      "step": 387300
    },
    {
      "epoch": 1.3574249895733668,
      "grad_norm": 2.484375,
      "learning_rate": 3.0418156187449864e-05,
      "loss": 0.8761,
      "step": 387310
    },
    {
      "epoch": 1.3574600370802623,
      "grad_norm": 3.125,
      "learning_rate": 3.0417507158786162e-05,
      "loss": 0.8523,
      "step": 387320
    },
    {
      "epoch": 1.3574950845871578,
      "grad_norm": 2.734375,
      "learning_rate": 3.041685813012246e-05,
      "loss": 0.9099,
      "step": 387330
    },
    {
      "epoch": 1.3575301320940536,
      "grad_norm": 2.4375,
      "learning_rate": 3.0416209101458758e-05,
      "loss": 0.7685,
      "step": 387340
    },
    {
      "epoch": 1.3575651796009491,
      "grad_norm": 2.640625,
      "learning_rate": 3.0415560072795056e-05,
      "loss": 0.906,
      "step": 387350
    },
    {
      "epoch": 1.3576002271078447,
      "grad_norm": 2.859375,
      "learning_rate": 3.0414911044131354e-05,
      "loss": 0.8514,
      "step": 387360
    },
    {
      "epoch": 1.3576352746147402,
      "grad_norm": 2.90625,
      "learning_rate": 3.0414262015467652e-05,
      "loss": 0.8351,
      "step": 387370
    },
    {
      "epoch": 1.3576703221216357,
      "grad_norm": 2.65625,
      "learning_rate": 3.0413612986803954e-05,
      "loss": 0.7771,
      "step": 387380
    },
    {
      "epoch": 1.3577053696285315,
      "grad_norm": 2.625,
      "learning_rate": 3.041296395814025e-05,
      "loss": 0.8951,
      "step": 387390
    },
    {
      "epoch": 1.357740417135427,
      "grad_norm": 2.890625,
      "learning_rate": 3.041231492947655e-05,
      "loss": 0.8756,
      "step": 387400
    },
    {
      "epoch": 1.3577754646423226,
      "grad_norm": 3.125,
      "learning_rate": 3.0411665900812848e-05,
      "loss": 0.8737,
      "step": 387410
    },
    {
      "epoch": 1.3578105121492183,
      "grad_norm": 3.1875,
      "learning_rate": 3.0411016872149146e-05,
      "loss": 0.857,
      "step": 387420
    },
    {
      "epoch": 1.3578455596561139,
      "grad_norm": 2.859375,
      "learning_rate": 3.0410367843485444e-05,
      "loss": 0.8264,
      "step": 387430
    },
    {
      "epoch": 1.3578806071630094,
      "grad_norm": 3.1875,
      "learning_rate": 3.040971881482174e-05,
      "loss": 0.8765,
      "step": 387440
    },
    {
      "epoch": 1.3579156546699052,
      "grad_norm": 2.84375,
      "learning_rate": 3.040906978615804e-05,
      "loss": 0.8033,
      "step": 387450
    },
    {
      "epoch": 1.3579507021768007,
      "grad_norm": 2.78125,
      "learning_rate": 3.0408420757494338e-05,
      "loss": 0.8747,
      "step": 387460
    },
    {
      "epoch": 1.3579857496836962,
      "grad_norm": 3.078125,
      "learning_rate": 3.0407771728830636e-05,
      "loss": 0.923,
      "step": 387470
    },
    {
      "epoch": 1.3580207971905918,
      "grad_norm": 2.84375,
      "learning_rate": 3.040712270016693e-05,
      "loss": 0.8439,
      "step": 387480
    },
    {
      "epoch": 1.3580558446974875,
      "grad_norm": 3.21875,
      "learning_rate": 3.0406473671503228e-05,
      "loss": 0.9431,
      "step": 387490
    },
    {
      "epoch": 1.358090892204383,
      "grad_norm": 3.15625,
      "learning_rate": 3.0405824642839526e-05,
      "loss": 0.8669,
      "step": 387500
    },
    {
      "epoch": 1.3581259397112786,
      "grad_norm": 3.03125,
      "learning_rate": 3.0405175614175824e-05,
      "loss": 0.9088,
      "step": 387510
    },
    {
      "epoch": 1.3581609872181741,
      "grad_norm": 3.0625,
      "learning_rate": 3.0404526585512122e-05,
      "loss": 0.758,
      "step": 387520
    },
    {
      "epoch": 1.35819603472507,
      "grad_norm": 2.15625,
      "learning_rate": 3.040387755684842e-05,
      "loss": 0.7659,
      "step": 387530
    },
    {
      "epoch": 1.3582310822319654,
      "grad_norm": 2.984375,
      "learning_rate": 3.0403228528184718e-05,
      "loss": 0.8769,
      "step": 387540
    },
    {
      "epoch": 1.358266129738861,
      "grad_norm": 2.6875,
      "learning_rate": 3.0402579499521016e-05,
      "loss": 0.9138,
      "step": 387550
    },
    {
      "epoch": 1.3583011772457567,
      "grad_norm": 2.765625,
      "learning_rate": 3.0401930470857314e-05,
      "loss": 0.8092,
      "step": 387560
    },
    {
      "epoch": 1.3583362247526523,
      "grad_norm": 3.125,
      "learning_rate": 3.0401281442193612e-05,
      "loss": 0.8105,
      "step": 387570
    },
    {
      "epoch": 1.3583712722595478,
      "grad_norm": 3.234375,
      "learning_rate": 3.040063241352991e-05,
      "loss": 0.8619,
      "step": 387580
    },
    {
      "epoch": 1.3584063197664435,
      "grad_norm": 2.796875,
      "learning_rate": 3.0399983384866208e-05,
      "loss": 0.9131,
      "step": 387590
    },
    {
      "epoch": 1.358441367273339,
      "grad_norm": 2.265625,
      "learning_rate": 3.0399334356202506e-05,
      "loss": 0.7912,
      "step": 387600
    },
    {
      "epoch": 1.3584764147802346,
      "grad_norm": 2.546875,
      "learning_rate": 3.0398685327538808e-05,
      "loss": 0.9082,
      "step": 387610
    },
    {
      "epoch": 1.3585114622871302,
      "grad_norm": 2.765625,
      "learning_rate": 3.0398036298875106e-05,
      "loss": 0.8947,
      "step": 387620
    },
    {
      "epoch": 1.3585465097940257,
      "grad_norm": 2.984375,
      "learning_rate": 3.0397387270211404e-05,
      "loss": 0.8239,
      "step": 387630
    },
    {
      "epoch": 1.3585815573009214,
      "grad_norm": 2.453125,
      "learning_rate": 3.03967382415477e-05,
      "loss": 0.7781,
      "step": 387640
    },
    {
      "epoch": 1.358616604807817,
      "grad_norm": 3.046875,
      "learning_rate": 3.0396089212884e-05,
      "loss": 0.8401,
      "step": 387650
    },
    {
      "epoch": 1.3586516523147125,
      "grad_norm": 2.609375,
      "learning_rate": 3.0395440184220298e-05,
      "loss": 0.7849,
      "step": 387660
    },
    {
      "epoch": 1.3586866998216083,
      "grad_norm": 3.609375,
      "learning_rate": 3.0394791155556596e-05,
      "loss": 0.8383,
      "step": 387670
    },
    {
      "epoch": 1.3587217473285038,
      "grad_norm": 3.359375,
      "learning_rate": 3.0394142126892894e-05,
      "loss": 0.9912,
      "step": 387680
    },
    {
      "epoch": 1.3587567948353994,
      "grad_norm": 3.046875,
      "learning_rate": 3.039349309822919e-05,
      "loss": 0.8387,
      "step": 387690
    },
    {
      "epoch": 1.358791842342295,
      "grad_norm": 2.671875,
      "learning_rate": 3.039284406956549e-05,
      "loss": 0.7956,
      "step": 387700
    },
    {
      "epoch": 1.3588268898491906,
      "grad_norm": 3.046875,
      "learning_rate": 3.0392195040901788e-05,
      "loss": 0.8492,
      "step": 387710
    },
    {
      "epoch": 1.3588619373560862,
      "grad_norm": 2.765625,
      "learning_rate": 3.0391546012238086e-05,
      "loss": 0.8038,
      "step": 387720
    },
    {
      "epoch": 1.3588969848629817,
      "grad_norm": 2.921875,
      "learning_rate": 3.0390896983574384e-05,
      "loss": 0.8086,
      "step": 387730
    },
    {
      "epoch": 1.3589320323698773,
      "grad_norm": 2.6875,
      "learning_rate": 3.039024795491068e-05,
      "loss": 0.8464,
      "step": 387740
    },
    {
      "epoch": 1.358967079876773,
      "grad_norm": 2.96875,
      "learning_rate": 3.0389598926246983e-05,
      "loss": 0.9579,
      "step": 387750
    },
    {
      "epoch": 1.3590021273836685,
      "grad_norm": 2.859375,
      "learning_rate": 3.038894989758328e-05,
      "loss": 0.8261,
      "step": 387760
    },
    {
      "epoch": 1.359037174890564,
      "grad_norm": 2.96875,
      "learning_rate": 3.038830086891958e-05,
      "loss": 0.9028,
      "step": 387770
    },
    {
      "epoch": 1.3590722223974598,
      "grad_norm": 2.703125,
      "learning_rate": 3.0387651840255877e-05,
      "loss": 0.8537,
      "step": 387780
    },
    {
      "epoch": 1.3591072699043554,
      "grad_norm": 2.765625,
      "learning_rate": 3.0387002811592175e-05,
      "loss": 0.9231,
      "step": 387790
    },
    {
      "epoch": 1.359142317411251,
      "grad_norm": 2.546875,
      "learning_rate": 3.0386353782928473e-05,
      "loss": 0.8499,
      "step": 387800
    },
    {
      "epoch": 1.3591773649181467,
      "grad_norm": 2.765625,
      "learning_rate": 3.038570475426477e-05,
      "loss": 0.8486,
      "step": 387810
    },
    {
      "epoch": 1.3592124124250422,
      "grad_norm": 2.6875,
      "learning_rate": 3.038505572560107e-05,
      "loss": 0.88,
      "step": 387820
    },
    {
      "epoch": 1.3592474599319377,
      "grad_norm": 2.921875,
      "learning_rate": 3.0384406696937367e-05,
      "loss": 0.8056,
      "step": 387830
    },
    {
      "epoch": 1.3592825074388333,
      "grad_norm": 2.875,
      "learning_rate": 3.0383757668273665e-05,
      "loss": 0.8246,
      "step": 387840
    },
    {
      "epoch": 1.3593175549457288,
      "grad_norm": 3.078125,
      "learning_rate": 3.038310863960996e-05,
      "loss": 0.8762,
      "step": 387850
    },
    {
      "epoch": 1.3593526024526246,
      "grad_norm": 2.5,
      "learning_rate": 3.0382459610946258e-05,
      "loss": 0.803,
      "step": 387860
    },
    {
      "epoch": 1.35938764995952,
      "grad_norm": 3.0,
      "learning_rate": 3.0381810582282556e-05,
      "loss": 0.8532,
      "step": 387870
    },
    {
      "epoch": 1.3594226974664156,
      "grad_norm": 2.75,
      "learning_rate": 3.0381161553618854e-05,
      "loss": 0.8006,
      "step": 387880
    },
    {
      "epoch": 1.3594577449733114,
      "grad_norm": 3.046875,
      "learning_rate": 3.038051252495515e-05,
      "loss": 0.9165,
      "step": 387890
    },
    {
      "epoch": 1.359492792480207,
      "grad_norm": 2.4375,
      "learning_rate": 3.037986349629145e-05,
      "loss": 0.7945,
      "step": 387900
    },
    {
      "epoch": 1.3595278399871025,
      "grad_norm": 2.96875,
      "learning_rate": 3.0379214467627748e-05,
      "loss": 0.8457,
      "step": 387910
    },
    {
      "epoch": 1.3595628874939982,
      "grad_norm": 2.671875,
      "learning_rate": 3.0378565438964046e-05,
      "loss": 0.7724,
      "step": 387920
    },
    {
      "epoch": 1.3595979350008938,
      "grad_norm": 3.125,
      "learning_rate": 3.0377916410300344e-05,
      "loss": 0.8587,
      "step": 387930
    },
    {
      "epoch": 1.3596329825077893,
      "grad_norm": 2.984375,
      "learning_rate": 3.037726738163664e-05,
      "loss": 0.8202,
      "step": 387940
    },
    {
      "epoch": 1.3596680300146848,
      "grad_norm": 2.71875,
      "learning_rate": 3.037661835297294e-05,
      "loss": 0.8491,
      "step": 387950
    },
    {
      "epoch": 1.3597030775215804,
      "grad_norm": 3.15625,
      "learning_rate": 3.0375969324309238e-05,
      "loss": 0.8623,
      "step": 387960
    },
    {
      "epoch": 1.3597381250284761,
      "grad_norm": 2.890625,
      "learning_rate": 3.037532029564554e-05,
      "loss": 0.7202,
      "step": 387970
    },
    {
      "epoch": 1.3597731725353717,
      "grad_norm": 2.953125,
      "learning_rate": 3.0374671266981837e-05,
      "loss": 0.8381,
      "step": 387980
    },
    {
      "epoch": 1.3598082200422672,
      "grad_norm": 2.984375,
      "learning_rate": 3.0374022238318135e-05,
      "loss": 0.8699,
      "step": 387990
    },
    {
      "epoch": 1.359843267549163,
      "grad_norm": 2.734375,
      "learning_rate": 3.0373373209654433e-05,
      "loss": 0.8325,
      "step": 388000
    },
    {
      "epoch": 1.3598783150560585,
      "grad_norm": 3.03125,
      "learning_rate": 3.037272418099073e-05,
      "loss": 0.8583,
      "step": 388010
    },
    {
      "epoch": 1.359913362562954,
      "grad_norm": 3.0625,
      "learning_rate": 3.037207515232703e-05,
      "loss": 0.8831,
      "step": 388020
    },
    {
      "epoch": 1.3599484100698498,
      "grad_norm": 2.96875,
      "learning_rate": 3.0371426123663327e-05,
      "loss": 0.8308,
      "step": 388030
    },
    {
      "epoch": 1.3599834575767453,
      "grad_norm": 2.65625,
      "learning_rate": 3.0370777094999625e-05,
      "loss": 0.8824,
      "step": 388040
    },
    {
      "epoch": 1.3600185050836409,
      "grad_norm": 3.3125,
      "learning_rate": 3.0370128066335923e-05,
      "loss": 0.8537,
      "step": 388050
    },
    {
      "epoch": 1.3600535525905364,
      "grad_norm": 2.890625,
      "learning_rate": 3.036947903767222e-05,
      "loss": 0.8485,
      "step": 388060
    },
    {
      "epoch": 1.360088600097432,
      "grad_norm": 2.8125,
      "learning_rate": 3.036883000900852e-05,
      "loss": 0.7605,
      "step": 388070
    },
    {
      "epoch": 1.3601236476043277,
      "grad_norm": 2.78125,
      "learning_rate": 3.0368180980344817e-05,
      "loss": 0.8393,
      "step": 388080
    },
    {
      "epoch": 1.3601586951112232,
      "grad_norm": 2.875,
      "learning_rate": 3.0367531951681115e-05,
      "loss": 0.8295,
      "step": 388090
    },
    {
      "epoch": 1.3601937426181188,
      "grad_norm": 3.390625,
      "learning_rate": 3.0366882923017413e-05,
      "loss": 0.8892,
      "step": 388100
    },
    {
      "epoch": 1.3602287901250145,
      "grad_norm": 2.765625,
      "learning_rate": 3.0366233894353714e-05,
      "loss": 0.847,
      "step": 388110
    },
    {
      "epoch": 1.36026383763191,
      "grad_norm": 3.015625,
      "learning_rate": 3.0365584865690012e-05,
      "loss": 0.8792,
      "step": 388120
    },
    {
      "epoch": 1.3602988851388056,
      "grad_norm": 3.0,
      "learning_rate": 3.036493583702631e-05,
      "loss": 0.8085,
      "step": 388130
    },
    {
      "epoch": 1.3603339326457013,
      "grad_norm": 2.890625,
      "learning_rate": 3.036428680836261e-05,
      "loss": 0.8675,
      "step": 388140
    },
    {
      "epoch": 1.3603689801525969,
      "grad_norm": 2.6875,
      "learning_rate": 3.0363637779698906e-05,
      "loss": 0.912,
      "step": 388150
    },
    {
      "epoch": 1.3604040276594924,
      "grad_norm": 3.390625,
      "learning_rate": 3.0362988751035204e-05,
      "loss": 0.8376,
      "step": 388160
    },
    {
      "epoch": 1.360439075166388,
      "grad_norm": 2.78125,
      "learning_rate": 3.0362339722371502e-05,
      "loss": 0.9275,
      "step": 388170
    },
    {
      "epoch": 1.3604741226732837,
      "grad_norm": 2.9375,
      "learning_rate": 3.03616906937078e-05,
      "loss": 0.7982,
      "step": 388180
    },
    {
      "epoch": 1.3605091701801793,
      "grad_norm": 3.1875,
      "learning_rate": 3.03610416650441e-05,
      "loss": 0.9588,
      "step": 388190
    },
    {
      "epoch": 1.3605442176870748,
      "grad_norm": 2.890625,
      "learning_rate": 3.0360392636380396e-05,
      "loss": 0.791,
      "step": 388200
    },
    {
      "epoch": 1.3605792651939703,
      "grad_norm": 2.734375,
      "learning_rate": 3.0359743607716694e-05,
      "loss": 0.8301,
      "step": 388210
    },
    {
      "epoch": 1.360614312700866,
      "grad_norm": 3.3125,
      "learning_rate": 3.0359094579052992e-05,
      "loss": 0.8708,
      "step": 388220
    },
    {
      "epoch": 1.3606493602077616,
      "grad_norm": 2.765625,
      "learning_rate": 3.0358445550389287e-05,
      "loss": 0.8585,
      "step": 388230
    },
    {
      "epoch": 1.3606844077146572,
      "grad_norm": 2.84375,
      "learning_rate": 3.0357796521725585e-05,
      "loss": 0.7633,
      "step": 388240
    },
    {
      "epoch": 1.360719455221553,
      "grad_norm": 2.796875,
      "learning_rate": 3.0357147493061883e-05,
      "loss": 0.8612,
      "step": 388250
    },
    {
      "epoch": 1.3607545027284484,
      "grad_norm": 3.25,
      "learning_rate": 3.035649846439818e-05,
      "loss": 0.9058,
      "step": 388260
    },
    {
      "epoch": 1.360789550235344,
      "grad_norm": 2.328125,
      "learning_rate": 3.035584943573448e-05,
      "loss": 0.7585,
      "step": 388270
    },
    {
      "epoch": 1.3608245977422397,
      "grad_norm": 2.484375,
      "learning_rate": 3.0355200407070777e-05,
      "loss": 0.8385,
      "step": 388280
    },
    {
      "epoch": 1.3608596452491353,
      "grad_norm": 2.78125,
      "learning_rate": 3.0354551378407075e-05,
      "loss": 0.8112,
      "step": 388290
    },
    {
      "epoch": 1.3608946927560308,
      "grad_norm": 2.875,
      "learning_rate": 3.0353902349743373e-05,
      "loss": 0.844,
      "step": 388300
    },
    {
      "epoch": 1.3609297402629263,
      "grad_norm": 2.78125,
      "learning_rate": 3.035325332107967e-05,
      "loss": 0.844,
      "step": 388310
    },
    {
      "epoch": 1.3609647877698219,
      "grad_norm": 2.796875,
      "learning_rate": 3.035260429241597e-05,
      "loss": 0.8131,
      "step": 388320
    },
    {
      "epoch": 1.3609998352767176,
      "grad_norm": 3.140625,
      "learning_rate": 3.0351955263752267e-05,
      "loss": 0.7577,
      "step": 388330
    },
    {
      "epoch": 1.3610348827836132,
      "grad_norm": 2.640625,
      "learning_rate": 3.035130623508857e-05,
      "loss": 0.8888,
      "step": 388340
    },
    {
      "epoch": 1.3610699302905087,
      "grad_norm": 3.109375,
      "learning_rate": 3.0350657206424866e-05,
      "loss": 0.8558,
      "step": 388350
    },
    {
      "epoch": 1.3611049777974045,
      "grad_norm": 2.46875,
      "learning_rate": 3.0350008177761164e-05,
      "loss": 0.8433,
      "step": 388360
    },
    {
      "epoch": 1.3611400253043,
      "grad_norm": 3.5625,
      "learning_rate": 3.0349359149097462e-05,
      "loss": 0.982,
      "step": 388370
    },
    {
      "epoch": 1.3611750728111955,
      "grad_norm": 2.90625,
      "learning_rate": 3.034871012043376e-05,
      "loss": 0.8374,
      "step": 388380
    },
    {
      "epoch": 1.3612101203180913,
      "grad_norm": 2.796875,
      "learning_rate": 3.034806109177006e-05,
      "loss": 0.8045,
      "step": 388390
    },
    {
      "epoch": 1.3612451678249868,
      "grad_norm": 3.28125,
      "learning_rate": 3.0347412063106356e-05,
      "loss": 0.8277,
      "step": 388400
    },
    {
      "epoch": 1.3612802153318824,
      "grad_norm": 2.875,
      "learning_rate": 3.0346763034442654e-05,
      "loss": 0.8439,
      "step": 388410
    },
    {
      "epoch": 1.361315262838778,
      "grad_norm": 2.96875,
      "learning_rate": 3.0346114005778952e-05,
      "loss": 0.9198,
      "step": 388420
    },
    {
      "epoch": 1.3613503103456734,
      "grad_norm": 2.6875,
      "learning_rate": 3.034546497711525e-05,
      "loss": 0.7982,
      "step": 388430
    },
    {
      "epoch": 1.3613853578525692,
      "grad_norm": 2.75,
      "learning_rate": 3.034481594845155e-05,
      "loss": 0.8632,
      "step": 388440
    },
    {
      "epoch": 1.3614204053594647,
      "grad_norm": 2.515625,
      "learning_rate": 3.0344166919787846e-05,
      "loss": 0.885,
      "step": 388450
    },
    {
      "epoch": 1.3614554528663603,
      "grad_norm": 3.328125,
      "learning_rate": 3.0343517891124144e-05,
      "loss": 0.8352,
      "step": 388460
    },
    {
      "epoch": 1.361490500373256,
      "grad_norm": 3.015625,
      "learning_rate": 3.0342868862460442e-05,
      "loss": 0.8844,
      "step": 388470
    },
    {
      "epoch": 1.3615255478801516,
      "grad_norm": 3.4375,
      "learning_rate": 3.0342219833796744e-05,
      "loss": 0.8871,
      "step": 388480
    },
    {
      "epoch": 1.361560595387047,
      "grad_norm": 2.546875,
      "learning_rate": 3.0341570805133042e-05,
      "loss": 0.8559,
      "step": 388490
    },
    {
      "epoch": 1.3615956428939429,
      "grad_norm": 3.59375,
      "learning_rate": 3.034092177646934e-05,
      "loss": 0.8383,
      "step": 388500
    },
    {
      "epoch": 1.3616306904008384,
      "grad_norm": 3.109375,
      "learning_rate": 3.0340272747805638e-05,
      "loss": 0.7709,
      "step": 388510
    },
    {
      "epoch": 1.361665737907734,
      "grad_norm": 2.640625,
      "learning_rate": 3.0339623719141936e-05,
      "loss": 0.7905,
      "step": 388520
    },
    {
      "epoch": 1.3617007854146295,
      "grad_norm": 3.0,
      "learning_rate": 3.0338974690478234e-05,
      "loss": 0.8894,
      "step": 388530
    },
    {
      "epoch": 1.361735832921525,
      "grad_norm": 2.96875,
      "learning_rate": 3.0338325661814532e-05,
      "loss": 0.854,
      "step": 388540
    },
    {
      "epoch": 1.3617708804284208,
      "grad_norm": 3.203125,
      "learning_rate": 3.033767663315083e-05,
      "loss": 0.7833,
      "step": 388550
    },
    {
      "epoch": 1.3618059279353163,
      "grad_norm": 2.875,
      "learning_rate": 3.0337027604487128e-05,
      "loss": 0.826,
      "step": 388560
    },
    {
      "epoch": 1.3618409754422118,
      "grad_norm": 3.234375,
      "learning_rate": 3.0336378575823426e-05,
      "loss": 0.8316,
      "step": 388570
    },
    {
      "epoch": 1.3618760229491076,
      "grad_norm": 3.0,
      "learning_rate": 3.0335729547159724e-05,
      "loss": 0.8325,
      "step": 388580
    },
    {
      "epoch": 1.3619110704560031,
      "grad_norm": 3.09375,
      "learning_rate": 3.0335080518496022e-05,
      "loss": 0.859,
      "step": 388590
    },
    {
      "epoch": 1.3619461179628987,
      "grad_norm": 3.515625,
      "learning_rate": 3.0334431489832316e-05,
      "loss": 0.8436,
      "step": 388600
    },
    {
      "epoch": 1.3619811654697944,
      "grad_norm": 3.25,
      "learning_rate": 3.0333782461168614e-05,
      "loss": 0.8984,
      "step": 388610
    },
    {
      "epoch": 1.36201621297669,
      "grad_norm": 3.109375,
      "learning_rate": 3.0333133432504912e-05,
      "loss": 0.8552,
      "step": 388620
    },
    {
      "epoch": 1.3620512604835855,
      "grad_norm": 2.9375,
      "learning_rate": 3.033248440384121e-05,
      "loss": 0.7972,
      "step": 388630
    },
    {
      "epoch": 1.362086307990481,
      "grad_norm": 2.796875,
      "learning_rate": 3.033183537517751e-05,
      "loss": 0.864,
      "step": 388640
    },
    {
      "epoch": 1.3621213554973766,
      "grad_norm": 2.53125,
      "learning_rate": 3.0331186346513806e-05,
      "loss": 0.7841,
      "step": 388650
    },
    {
      "epoch": 1.3621564030042723,
      "grad_norm": 2.875,
      "learning_rate": 3.0330537317850104e-05,
      "loss": 0.8849,
      "step": 388660
    },
    {
      "epoch": 1.3621914505111679,
      "grad_norm": 3.28125,
      "learning_rate": 3.0329888289186402e-05,
      "loss": 0.8304,
      "step": 388670
    },
    {
      "epoch": 1.3622264980180634,
      "grad_norm": 2.875,
      "learning_rate": 3.03292392605227e-05,
      "loss": 0.8248,
      "step": 388680
    },
    {
      "epoch": 1.3622615455249591,
      "grad_norm": 2.609375,
      "learning_rate": 3.0328590231859e-05,
      "loss": 0.8547,
      "step": 388690
    },
    {
      "epoch": 1.3622965930318547,
      "grad_norm": 2.578125,
      "learning_rate": 3.0327941203195296e-05,
      "loss": 0.8742,
      "step": 388700
    },
    {
      "epoch": 1.3623316405387502,
      "grad_norm": 2.6875,
      "learning_rate": 3.0327292174531598e-05,
      "loss": 0.842,
      "step": 388710
    },
    {
      "epoch": 1.362366688045646,
      "grad_norm": 3.09375,
      "learning_rate": 3.0326643145867896e-05,
      "loss": 0.8681,
      "step": 388720
    },
    {
      "epoch": 1.3624017355525415,
      "grad_norm": 3.03125,
      "learning_rate": 3.0325994117204194e-05,
      "loss": 0.8532,
      "step": 388730
    },
    {
      "epoch": 1.362436783059437,
      "grad_norm": 3.015625,
      "learning_rate": 3.0325345088540492e-05,
      "loss": 0.8618,
      "step": 388740
    },
    {
      "epoch": 1.3624718305663326,
      "grad_norm": 2.96875,
      "learning_rate": 3.032469605987679e-05,
      "loss": 0.8362,
      "step": 388750
    },
    {
      "epoch": 1.3625068780732281,
      "grad_norm": 3.03125,
      "learning_rate": 3.0324047031213088e-05,
      "loss": 0.8016,
      "step": 388760
    },
    {
      "epoch": 1.3625419255801239,
      "grad_norm": 2.78125,
      "learning_rate": 3.0323398002549386e-05,
      "loss": 0.8983,
      "step": 388770
    },
    {
      "epoch": 1.3625769730870194,
      "grad_norm": 3.015625,
      "learning_rate": 3.0322748973885684e-05,
      "loss": 0.8787,
      "step": 388780
    },
    {
      "epoch": 1.362612020593915,
      "grad_norm": 2.9375,
      "learning_rate": 3.0322099945221982e-05,
      "loss": 0.9026,
      "step": 388790
    },
    {
      "epoch": 1.3626470681008107,
      "grad_norm": 2.8125,
      "learning_rate": 3.032145091655828e-05,
      "loss": 0.8828,
      "step": 388800
    },
    {
      "epoch": 1.3626821156077062,
      "grad_norm": 2.875,
      "learning_rate": 3.0320801887894578e-05,
      "loss": 0.7608,
      "step": 388810
    },
    {
      "epoch": 1.3627171631146018,
      "grad_norm": 2.921875,
      "learning_rate": 3.0320152859230876e-05,
      "loss": 0.8628,
      "step": 388820
    },
    {
      "epoch": 1.3627522106214975,
      "grad_norm": 2.859375,
      "learning_rate": 3.0319503830567174e-05,
      "loss": 0.8769,
      "step": 388830
    },
    {
      "epoch": 1.362787258128393,
      "grad_norm": 3.015625,
      "learning_rate": 3.0318854801903472e-05,
      "loss": 0.8296,
      "step": 388840
    },
    {
      "epoch": 1.3628223056352886,
      "grad_norm": 2.703125,
      "learning_rate": 3.0318205773239773e-05,
      "loss": 0.7798,
      "step": 388850
    },
    {
      "epoch": 1.3628573531421844,
      "grad_norm": 2.796875,
      "learning_rate": 3.031755674457607e-05,
      "loss": 0.7816,
      "step": 388860
    },
    {
      "epoch": 1.36289240064908,
      "grad_norm": 2.859375,
      "learning_rate": 3.031690771591237e-05,
      "loss": 0.8213,
      "step": 388870
    },
    {
      "epoch": 1.3629274481559754,
      "grad_norm": 3.09375,
      "learning_rate": 3.0316258687248667e-05,
      "loss": 0.8231,
      "step": 388880
    },
    {
      "epoch": 1.362962495662871,
      "grad_norm": 2.890625,
      "learning_rate": 3.0315609658584965e-05,
      "loss": 0.8635,
      "step": 388890
    },
    {
      "epoch": 1.3629975431697665,
      "grad_norm": 2.859375,
      "learning_rate": 3.0314960629921263e-05,
      "loss": 0.8691,
      "step": 388900
    },
    {
      "epoch": 1.3630325906766623,
      "grad_norm": 2.5625,
      "learning_rate": 3.031431160125756e-05,
      "loss": 0.8062,
      "step": 388910
    },
    {
      "epoch": 1.3630676381835578,
      "grad_norm": 2.90625,
      "learning_rate": 3.031366257259386e-05,
      "loss": 0.9142,
      "step": 388920
    },
    {
      "epoch": 1.3631026856904533,
      "grad_norm": 2.71875,
      "learning_rate": 3.0313013543930157e-05,
      "loss": 0.8045,
      "step": 388930
    },
    {
      "epoch": 1.363137733197349,
      "grad_norm": 3.375,
      "learning_rate": 3.0312364515266455e-05,
      "loss": 0.8288,
      "step": 388940
    },
    {
      "epoch": 1.3631727807042446,
      "grad_norm": 2.65625,
      "learning_rate": 3.0311715486602753e-05,
      "loss": 0.8237,
      "step": 388950
    },
    {
      "epoch": 1.3632078282111402,
      "grad_norm": 3.1875,
      "learning_rate": 3.031106645793905e-05,
      "loss": 0.9089,
      "step": 388960
    },
    {
      "epoch": 1.363242875718036,
      "grad_norm": 3.421875,
      "learning_rate": 3.031041742927535e-05,
      "loss": 0.845,
      "step": 388970
    },
    {
      "epoch": 1.3632779232249315,
      "grad_norm": 3.03125,
      "learning_rate": 3.0309768400611644e-05,
      "loss": 0.8642,
      "step": 388980
    },
    {
      "epoch": 1.363312970731827,
      "grad_norm": 2.984375,
      "learning_rate": 3.0309119371947942e-05,
      "loss": 0.9164,
      "step": 388990
    },
    {
      "epoch": 1.3633480182387225,
      "grad_norm": 3.296875,
      "learning_rate": 3.030847034328424e-05,
      "loss": 0.7773,
      "step": 389000
    },
    {
      "epoch": 1.363383065745618,
      "grad_norm": 3.234375,
      "learning_rate": 3.0307821314620538e-05,
      "loss": 0.8808,
      "step": 389010
    },
    {
      "epoch": 1.3634181132525138,
      "grad_norm": 2.609375,
      "learning_rate": 3.0307172285956836e-05,
      "loss": 0.8795,
      "step": 389020
    },
    {
      "epoch": 1.3634531607594094,
      "grad_norm": 2.859375,
      "learning_rate": 3.0306523257293134e-05,
      "loss": 0.8413,
      "step": 389030
    },
    {
      "epoch": 1.363488208266305,
      "grad_norm": 3.078125,
      "learning_rate": 3.0305874228629432e-05,
      "loss": 0.8344,
      "step": 389040
    },
    {
      "epoch": 1.3635232557732007,
      "grad_norm": 2.671875,
      "learning_rate": 3.030522519996573e-05,
      "loss": 0.8423,
      "step": 389050
    },
    {
      "epoch": 1.3635583032800962,
      "grad_norm": 3.6875,
      "learning_rate": 3.0304576171302028e-05,
      "loss": 0.8164,
      "step": 389060
    },
    {
      "epoch": 1.3635933507869917,
      "grad_norm": 2.828125,
      "learning_rate": 3.030392714263833e-05,
      "loss": 0.8128,
      "step": 389070
    },
    {
      "epoch": 1.3636283982938875,
      "grad_norm": 3.1875,
      "learning_rate": 3.0303278113974627e-05,
      "loss": 0.8939,
      "step": 389080
    },
    {
      "epoch": 1.363663445800783,
      "grad_norm": 2.734375,
      "learning_rate": 3.0302629085310925e-05,
      "loss": 0.8597,
      "step": 389090
    },
    {
      "epoch": 1.3636984933076786,
      "grad_norm": 2.90625,
      "learning_rate": 3.0301980056647223e-05,
      "loss": 0.7881,
      "step": 389100
    },
    {
      "epoch": 1.363733540814574,
      "grad_norm": 4.46875,
      "learning_rate": 3.030133102798352e-05,
      "loss": 0.8386,
      "step": 389110
    },
    {
      "epoch": 1.3637685883214696,
      "grad_norm": 3.234375,
      "learning_rate": 3.030068199931982e-05,
      "loss": 0.8504,
      "step": 389120
    },
    {
      "epoch": 1.3638036358283654,
      "grad_norm": 3.0,
      "learning_rate": 3.0300032970656117e-05,
      "loss": 0.8959,
      "step": 389130
    },
    {
      "epoch": 1.363838683335261,
      "grad_norm": 2.6875,
      "learning_rate": 3.0299383941992415e-05,
      "loss": 0.8216,
      "step": 389140
    },
    {
      "epoch": 1.3638737308421565,
      "grad_norm": 2.765625,
      "learning_rate": 3.0298734913328713e-05,
      "loss": 0.9187,
      "step": 389150
    },
    {
      "epoch": 1.3639087783490522,
      "grad_norm": 2.734375,
      "learning_rate": 3.029808588466501e-05,
      "loss": 0.8147,
      "step": 389160
    },
    {
      "epoch": 1.3639438258559478,
      "grad_norm": 2.984375,
      "learning_rate": 3.029743685600131e-05,
      "loss": 0.8794,
      "step": 389170
    },
    {
      "epoch": 1.3639788733628433,
      "grad_norm": 2.8125,
      "learning_rate": 3.0296787827337607e-05,
      "loss": 0.831,
      "step": 389180
    },
    {
      "epoch": 1.364013920869739,
      "grad_norm": 2.765625,
      "learning_rate": 3.0296138798673905e-05,
      "loss": 0.8677,
      "step": 389190
    },
    {
      "epoch": 1.3640489683766346,
      "grad_norm": 3.1875,
      "learning_rate": 3.0295489770010203e-05,
      "loss": 0.9286,
      "step": 389200
    },
    {
      "epoch": 1.3640840158835301,
      "grad_norm": 2.5625,
      "learning_rate": 3.0294840741346505e-05,
      "loss": 0.869,
      "step": 389210
    },
    {
      "epoch": 1.3641190633904257,
      "grad_norm": 2.671875,
      "learning_rate": 3.0294191712682803e-05,
      "loss": 0.7965,
      "step": 389220
    },
    {
      "epoch": 1.3641541108973212,
      "grad_norm": 3.28125,
      "learning_rate": 3.02935426840191e-05,
      "loss": 0.8372,
      "step": 389230
    },
    {
      "epoch": 1.364189158404217,
      "grad_norm": 2.671875,
      "learning_rate": 3.02928936553554e-05,
      "loss": 0.8161,
      "step": 389240
    },
    {
      "epoch": 1.3642242059111125,
      "grad_norm": 2.5625,
      "learning_rate": 3.0292244626691697e-05,
      "loss": 0.8326,
      "step": 389250
    },
    {
      "epoch": 1.364259253418008,
      "grad_norm": 2.90625,
      "learning_rate": 3.0291595598027995e-05,
      "loss": 0.8352,
      "step": 389260
    },
    {
      "epoch": 1.3642943009249038,
      "grad_norm": 2.9375,
      "learning_rate": 3.0290946569364293e-05,
      "loss": 0.9019,
      "step": 389270
    },
    {
      "epoch": 1.3643293484317993,
      "grad_norm": 2.921875,
      "learning_rate": 3.029029754070059e-05,
      "loss": 0.7865,
      "step": 389280
    },
    {
      "epoch": 1.3643643959386949,
      "grad_norm": 3.046875,
      "learning_rate": 3.028964851203689e-05,
      "loss": 0.8623,
      "step": 389290
    },
    {
      "epoch": 1.3643994434455906,
      "grad_norm": 2.6875,
      "learning_rate": 3.0288999483373187e-05,
      "loss": 0.8231,
      "step": 389300
    },
    {
      "epoch": 1.3644344909524861,
      "grad_norm": 3.03125,
      "learning_rate": 3.0288350454709485e-05,
      "loss": 0.8622,
      "step": 389310
    },
    {
      "epoch": 1.3644695384593817,
      "grad_norm": 2.953125,
      "learning_rate": 3.0287701426045783e-05,
      "loss": 0.9099,
      "step": 389320
    },
    {
      "epoch": 1.3645045859662772,
      "grad_norm": 2.734375,
      "learning_rate": 3.028705239738208e-05,
      "loss": 0.9073,
      "step": 389330
    },
    {
      "epoch": 1.3645396334731728,
      "grad_norm": 3.0,
      "learning_rate": 3.028640336871838e-05,
      "loss": 0.8328,
      "step": 389340
    },
    {
      "epoch": 1.3645746809800685,
      "grad_norm": 2.875,
      "learning_rate": 3.028575434005468e-05,
      "loss": 0.8349,
      "step": 389350
    },
    {
      "epoch": 1.364609728486964,
      "grad_norm": 3.109375,
      "learning_rate": 3.028510531139097e-05,
      "loss": 0.8555,
      "step": 389360
    },
    {
      "epoch": 1.3646447759938596,
      "grad_norm": 2.734375,
      "learning_rate": 3.028445628272727e-05,
      "loss": 0.8618,
      "step": 389370
    },
    {
      "epoch": 1.3646798235007553,
      "grad_norm": 2.65625,
      "learning_rate": 3.0283807254063567e-05,
      "loss": 0.8745,
      "step": 389380
    },
    {
      "epoch": 1.3647148710076509,
      "grad_norm": 2.71875,
      "learning_rate": 3.0283158225399865e-05,
      "loss": 0.7795,
      "step": 389390
    },
    {
      "epoch": 1.3647499185145464,
      "grad_norm": 2.890625,
      "learning_rate": 3.0282509196736163e-05,
      "loss": 0.8058,
      "step": 389400
    },
    {
      "epoch": 1.3647849660214422,
      "grad_norm": 2.546875,
      "learning_rate": 3.028186016807246e-05,
      "loss": 0.8321,
      "step": 389410
    },
    {
      "epoch": 1.3648200135283377,
      "grad_norm": 2.265625,
      "learning_rate": 3.028121113940876e-05,
      "loss": 0.7892,
      "step": 389420
    },
    {
      "epoch": 1.3648550610352332,
      "grad_norm": 3.28125,
      "learning_rate": 3.0280562110745057e-05,
      "loss": 0.8745,
      "step": 389430
    },
    {
      "epoch": 1.3648901085421288,
      "grad_norm": 2.4375,
      "learning_rate": 3.027991308208136e-05,
      "loss": 0.9484,
      "step": 389440
    },
    {
      "epoch": 1.3649251560490245,
      "grad_norm": 2.984375,
      "learning_rate": 3.0279264053417657e-05,
      "loss": 0.8467,
      "step": 389450
    },
    {
      "epoch": 1.36496020355592,
      "grad_norm": 2.40625,
      "learning_rate": 3.0278615024753955e-05,
      "loss": 0.8625,
      "step": 389460
    },
    {
      "epoch": 1.3649952510628156,
      "grad_norm": 2.578125,
      "learning_rate": 3.0277965996090253e-05,
      "loss": 0.8995,
      "step": 389470
    },
    {
      "epoch": 1.3650302985697111,
      "grad_norm": 2.828125,
      "learning_rate": 3.027731696742655e-05,
      "loss": 0.9238,
      "step": 389480
    },
    {
      "epoch": 1.365065346076607,
      "grad_norm": 2.640625,
      "learning_rate": 3.027666793876285e-05,
      "loss": 0.689,
      "step": 389490
    },
    {
      "epoch": 1.3651003935835024,
      "grad_norm": 2.546875,
      "learning_rate": 3.0276018910099147e-05,
      "loss": 0.7992,
      "step": 389500
    },
    {
      "epoch": 1.365135441090398,
      "grad_norm": 3.09375,
      "learning_rate": 3.0275369881435445e-05,
      "loss": 0.8762,
      "step": 389510
    },
    {
      "epoch": 1.3651704885972937,
      "grad_norm": 2.578125,
      "learning_rate": 3.0274720852771743e-05,
      "loss": 0.7813,
      "step": 389520
    },
    {
      "epoch": 1.3652055361041893,
      "grad_norm": 2.625,
      "learning_rate": 3.027407182410804e-05,
      "loss": 0.8771,
      "step": 389530
    },
    {
      "epoch": 1.3652405836110848,
      "grad_norm": 3.171875,
      "learning_rate": 3.027342279544434e-05,
      "loss": 0.835,
      "step": 389540
    },
    {
      "epoch": 1.3652756311179806,
      "grad_norm": 2.5,
      "learning_rate": 3.0272773766780637e-05,
      "loss": 0.8169,
      "step": 389550
    },
    {
      "epoch": 1.365310678624876,
      "grad_norm": 2.625,
      "learning_rate": 3.0272124738116935e-05,
      "loss": 0.8208,
      "step": 389560
    },
    {
      "epoch": 1.3653457261317716,
      "grad_norm": 3.109375,
      "learning_rate": 3.0271475709453233e-05,
      "loss": 0.8427,
      "step": 389570
    },
    {
      "epoch": 1.3653807736386672,
      "grad_norm": 2.953125,
      "learning_rate": 3.0270826680789534e-05,
      "loss": 0.894,
      "step": 389580
    },
    {
      "epoch": 1.3654158211455627,
      "grad_norm": 2.578125,
      "learning_rate": 3.0270177652125832e-05,
      "loss": 0.8651,
      "step": 389590
    },
    {
      "epoch": 1.3654508686524585,
      "grad_norm": 3.0625,
      "learning_rate": 3.026952862346213e-05,
      "loss": 0.8359,
      "step": 389600
    },
    {
      "epoch": 1.365485916159354,
      "grad_norm": 2.875,
      "learning_rate": 3.0268879594798428e-05,
      "loss": 0.8465,
      "step": 389610
    },
    {
      "epoch": 1.3655209636662495,
      "grad_norm": 3.0625,
      "learning_rate": 3.0268230566134726e-05,
      "loss": 0.8717,
      "step": 389620
    },
    {
      "epoch": 1.3655560111731453,
      "grad_norm": 2.859375,
      "learning_rate": 3.0267581537471024e-05,
      "loss": 0.8524,
      "step": 389630
    },
    {
      "epoch": 1.3655910586800408,
      "grad_norm": 2.78125,
      "learning_rate": 3.0266932508807322e-05,
      "loss": 0.8434,
      "step": 389640
    },
    {
      "epoch": 1.3656261061869364,
      "grad_norm": 2.9375,
      "learning_rate": 3.026628348014362e-05,
      "loss": 0.8622,
      "step": 389650
    },
    {
      "epoch": 1.3656611536938321,
      "grad_norm": 2.203125,
      "learning_rate": 3.0265634451479918e-05,
      "loss": 0.8209,
      "step": 389660
    },
    {
      "epoch": 1.3656962012007277,
      "grad_norm": 2.96875,
      "learning_rate": 3.0264985422816216e-05,
      "loss": 0.8477,
      "step": 389670
    },
    {
      "epoch": 1.3657312487076232,
      "grad_norm": 2.953125,
      "learning_rate": 3.0264336394152514e-05,
      "loss": 0.7907,
      "step": 389680
    },
    {
      "epoch": 1.3657662962145187,
      "grad_norm": 3.328125,
      "learning_rate": 3.0263687365488812e-05,
      "loss": 0.8442,
      "step": 389690
    },
    {
      "epoch": 1.3658013437214143,
      "grad_norm": 3.015625,
      "learning_rate": 3.026303833682511e-05,
      "loss": 0.8342,
      "step": 389700
    },
    {
      "epoch": 1.36583639122831,
      "grad_norm": 2.4375,
      "learning_rate": 3.0262389308161408e-05,
      "loss": 0.7753,
      "step": 389710
    },
    {
      "epoch": 1.3658714387352056,
      "grad_norm": 3.140625,
      "learning_rate": 3.026174027949771e-05,
      "loss": 0.911,
      "step": 389720
    },
    {
      "epoch": 1.365906486242101,
      "grad_norm": 2.71875,
      "learning_rate": 3.0261091250834e-05,
      "loss": 0.9167,
      "step": 389730
    },
    {
      "epoch": 1.3659415337489969,
      "grad_norm": 2.640625,
      "learning_rate": 3.02604422221703e-05,
      "loss": 0.7867,
      "step": 389740
    },
    {
      "epoch": 1.3659765812558924,
      "grad_norm": 2.90625,
      "learning_rate": 3.0259793193506597e-05,
      "loss": 0.8581,
      "step": 389750
    },
    {
      "epoch": 1.366011628762788,
      "grad_norm": 2.9375,
      "learning_rate": 3.0259144164842895e-05,
      "loss": 0.9151,
      "step": 389760
    },
    {
      "epoch": 1.3660466762696837,
      "grad_norm": 3.0,
      "learning_rate": 3.0258495136179193e-05,
      "loss": 0.807,
      "step": 389770
    },
    {
      "epoch": 1.3660817237765792,
      "grad_norm": 2.953125,
      "learning_rate": 3.025784610751549e-05,
      "loss": 0.9146,
      "step": 389780
    },
    {
      "epoch": 1.3661167712834748,
      "grad_norm": 2.71875,
      "learning_rate": 3.025719707885179e-05,
      "loss": 0.8335,
      "step": 389790
    },
    {
      "epoch": 1.3661518187903703,
      "grad_norm": 2.828125,
      "learning_rate": 3.0256548050188087e-05,
      "loss": 0.8498,
      "step": 389800
    },
    {
      "epoch": 1.3661868662972658,
      "grad_norm": 2.59375,
      "learning_rate": 3.0255899021524388e-05,
      "loss": 0.8083,
      "step": 389810
    },
    {
      "epoch": 1.3662219138041616,
      "grad_norm": 2.734375,
      "learning_rate": 3.0255249992860686e-05,
      "loss": 0.786,
      "step": 389820
    },
    {
      "epoch": 1.3662569613110571,
      "grad_norm": 2.8125,
      "learning_rate": 3.0254600964196984e-05,
      "loss": 0.8237,
      "step": 389830
    },
    {
      "epoch": 1.3662920088179527,
      "grad_norm": 3.09375,
      "learning_rate": 3.0253951935533282e-05,
      "loss": 0.8943,
      "step": 389840
    },
    {
      "epoch": 1.3663270563248484,
      "grad_norm": 2.859375,
      "learning_rate": 3.025330290686958e-05,
      "loss": 0.856,
      "step": 389850
    },
    {
      "epoch": 1.366362103831744,
      "grad_norm": 2.71875,
      "learning_rate": 3.0252653878205878e-05,
      "loss": 0.8462,
      "step": 389860
    },
    {
      "epoch": 1.3663971513386395,
      "grad_norm": 2.828125,
      "learning_rate": 3.0252004849542176e-05,
      "loss": 0.7748,
      "step": 389870
    },
    {
      "epoch": 1.3664321988455352,
      "grad_norm": 2.671875,
      "learning_rate": 3.0251355820878474e-05,
      "loss": 0.8203,
      "step": 389880
    },
    {
      "epoch": 1.3664672463524308,
      "grad_norm": 2.453125,
      "learning_rate": 3.0250706792214772e-05,
      "loss": 0.8267,
      "step": 389890
    },
    {
      "epoch": 1.3665022938593263,
      "grad_norm": 2.796875,
      "learning_rate": 3.025005776355107e-05,
      "loss": 0.8936,
      "step": 389900
    },
    {
      "epoch": 1.3665373413662218,
      "grad_norm": 2.96875,
      "learning_rate": 3.0249408734887368e-05,
      "loss": 0.7634,
      "step": 389910
    },
    {
      "epoch": 1.3665723888731174,
      "grad_norm": 2.8125,
      "learning_rate": 3.0248759706223666e-05,
      "loss": 0.8775,
      "step": 389920
    },
    {
      "epoch": 1.3666074363800131,
      "grad_norm": 2.734375,
      "learning_rate": 3.0248110677559964e-05,
      "loss": 0.8574,
      "step": 389930
    },
    {
      "epoch": 1.3666424838869087,
      "grad_norm": 2.765625,
      "learning_rate": 3.0247461648896265e-05,
      "loss": 0.8568,
      "step": 389940
    },
    {
      "epoch": 1.3666775313938042,
      "grad_norm": 2.984375,
      "learning_rate": 3.0246812620232563e-05,
      "loss": 0.8911,
      "step": 389950
    },
    {
      "epoch": 1.3667125789007,
      "grad_norm": 3.203125,
      "learning_rate": 3.024616359156886e-05,
      "loss": 0.8739,
      "step": 389960
    },
    {
      "epoch": 1.3667476264075955,
      "grad_norm": 3.265625,
      "learning_rate": 3.024551456290516e-05,
      "loss": 0.853,
      "step": 389970
    },
    {
      "epoch": 1.366782673914491,
      "grad_norm": 2.65625,
      "learning_rate": 3.0244865534241457e-05,
      "loss": 0.9072,
      "step": 389980
    },
    {
      "epoch": 1.3668177214213868,
      "grad_norm": 2.75,
      "learning_rate": 3.0244216505577755e-05,
      "loss": 0.7958,
      "step": 389990
    },
    {
      "epoch": 1.3668527689282823,
      "grad_norm": 2.9375,
      "learning_rate": 3.0243567476914053e-05,
      "loss": 0.8133,
      "step": 390000
    },
    {
      "epoch": 1.3668527689282823,
      "eval_loss": 0.7967785596847534,
      "eval_runtime": 553.8553,
      "eval_samples_per_second": 686.887,
      "eval_steps_per_second": 57.241,
      "step": 390000
    },
    {
      "epoch": 1.3668878164351779,
      "grad_norm": 2.8125,
      "learning_rate": 3.024291844825035e-05,
      "loss": 0.796,
      "step": 390010
    },
    {
      "epoch": 1.3669228639420734,
      "grad_norm": 2.84375,
      "learning_rate": 3.024226941958665e-05,
      "loss": 0.8505,
      "step": 390020
    },
    {
      "epoch": 1.366957911448969,
      "grad_norm": 2.453125,
      "learning_rate": 3.0241620390922947e-05,
      "loss": 0.8975,
      "step": 390030
    },
    {
      "epoch": 1.3669929589558647,
      "grad_norm": 3.828125,
      "learning_rate": 3.0240971362259245e-05,
      "loss": 0.8773,
      "step": 390040
    },
    {
      "epoch": 1.3670280064627602,
      "grad_norm": 3.109375,
      "learning_rate": 3.0240322333595543e-05,
      "loss": 0.836,
      "step": 390050
    },
    {
      "epoch": 1.3670630539696558,
      "grad_norm": 3.015625,
      "learning_rate": 3.023967330493184e-05,
      "loss": 0.849,
      "step": 390060
    },
    {
      "epoch": 1.3670981014765515,
      "grad_norm": 3.0,
      "learning_rate": 3.023902427626814e-05,
      "loss": 0.9246,
      "step": 390070
    },
    {
      "epoch": 1.367133148983447,
      "grad_norm": 3.140625,
      "learning_rate": 3.023837524760444e-05,
      "loss": 0.9423,
      "step": 390080
    },
    {
      "epoch": 1.3671681964903426,
      "grad_norm": 3.0625,
      "learning_rate": 3.023772621894074e-05,
      "loss": 0.8643,
      "step": 390090
    },
    {
      "epoch": 1.3672032439972384,
      "grad_norm": 3.15625,
      "learning_rate": 3.0237077190277037e-05,
      "loss": 0.798,
      "step": 390100
    },
    {
      "epoch": 1.367238291504134,
      "grad_norm": 2.953125,
      "learning_rate": 3.0236428161613328e-05,
      "loss": 0.7628,
      "step": 390110
    },
    {
      "epoch": 1.3672733390110294,
      "grad_norm": 3.015625,
      "learning_rate": 3.0235779132949626e-05,
      "loss": 0.7737,
      "step": 390120
    },
    {
      "epoch": 1.367308386517925,
      "grad_norm": 2.984375,
      "learning_rate": 3.0235130104285924e-05,
      "loss": 0.7808,
      "step": 390130
    },
    {
      "epoch": 1.3673434340248207,
      "grad_norm": 2.765625,
      "learning_rate": 3.0234481075622222e-05,
      "loss": 0.8013,
      "step": 390140
    },
    {
      "epoch": 1.3673784815317163,
      "grad_norm": 3.0,
      "learning_rate": 3.023383204695852e-05,
      "loss": 0.8438,
      "step": 390150
    },
    {
      "epoch": 1.3674135290386118,
      "grad_norm": 2.75,
      "learning_rate": 3.0233183018294818e-05,
      "loss": 0.9002,
      "step": 390160
    },
    {
      "epoch": 1.3674485765455073,
      "grad_norm": 3.09375,
      "learning_rate": 3.023253398963112e-05,
      "loss": 0.8734,
      "step": 390170
    },
    {
      "epoch": 1.367483624052403,
      "grad_norm": 2.859375,
      "learning_rate": 3.0231884960967417e-05,
      "loss": 0.9567,
      "step": 390180
    },
    {
      "epoch": 1.3675186715592986,
      "grad_norm": 2.71875,
      "learning_rate": 3.0231235932303715e-05,
      "loss": 0.8949,
      "step": 390190
    },
    {
      "epoch": 1.3675537190661942,
      "grad_norm": 3.3125,
      "learning_rate": 3.0230586903640013e-05,
      "loss": 0.896,
      "step": 390200
    },
    {
      "epoch": 1.36758876657309,
      "grad_norm": 2.46875,
      "learning_rate": 3.022993787497631e-05,
      "loss": 0.8807,
      "step": 390210
    },
    {
      "epoch": 1.3676238140799855,
      "grad_norm": 3.15625,
      "learning_rate": 3.022928884631261e-05,
      "loss": 0.9415,
      "step": 390220
    },
    {
      "epoch": 1.367658861586881,
      "grad_norm": 3.25,
      "learning_rate": 3.0228639817648907e-05,
      "loss": 0.8382,
      "step": 390230
    },
    {
      "epoch": 1.3676939090937767,
      "grad_norm": 3.390625,
      "learning_rate": 3.0227990788985205e-05,
      "loss": 0.8896,
      "step": 390240
    },
    {
      "epoch": 1.3677289566006723,
      "grad_norm": 2.78125,
      "learning_rate": 3.0227341760321503e-05,
      "loss": 0.7753,
      "step": 390250
    },
    {
      "epoch": 1.3677640041075678,
      "grad_norm": 2.6875,
      "learning_rate": 3.02266927316578e-05,
      "loss": 0.9107,
      "step": 390260
    },
    {
      "epoch": 1.3677990516144634,
      "grad_norm": 3.078125,
      "learning_rate": 3.02260437029941e-05,
      "loss": 0.7792,
      "step": 390270
    },
    {
      "epoch": 1.367834099121359,
      "grad_norm": 2.625,
      "learning_rate": 3.0225394674330397e-05,
      "loss": 0.8948,
      "step": 390280
    },
    {
      "epoch": 1.3678691466282547,
      "grad_norm": 2.515625,
      "learning_rate": 3.0224745645666695e-05,
      "loss": 0.8856,
      "step": 390290
    },
    {
      "epoch": 1.3679041941351502,
      "grad_norm": 2.578125,
      "learning_rate": 3.0224096617002993e-05,
      "loss": 0.848,
      "step": 390300
    },
    {
      "epoch": 1.3679392416420457,
      "grad_norm": 2.921875,
      "learning_rate": 3.0223447588339295e-05,
      "loss": 0.9633,
      "step": 390310
    },
    {
      "epoch": 1.3679742891489415,
      "grad_norm": 2.890625,
      "learning_rate": 3.0222798559675593e-05,
      "loss": 0.8397,
      "step": 390320
    },
    {
      "epoch": 1.368009336655837,
      "grad_norm": 2.875,
      "learning_rate": 3.022214953101189e-05,
      "loss": 0.8167,
      "step": 390330
    },
    {
      "epoch": 1.3680443841627326,
      "grad_norm": 2.8125,
      "learning_rate": 3.022150050234819e-05,
      "loss": 0.8238,
      "step": 390340
    },
    {
      "epoch": 1.3680794316696283,
      "grad_norm": 2.640625,
      "learning_rate": 3.0220851473684487e-05,
      "loss": 0.8338,
      "step": 390350
    },
    {
      "epoch": 1.3681144791765238,
      "grad_norm": 3.171875,
      "learning_rate": 3.0220202445020785e-05,
      "loss": 0.8072,
      "step": 390360
    },
    {
      "epoch": 1.3681495266834194,
      "grad_norm": 3.03125,
      "learning_rate": 3.0219553416357083e-05,
      "loss": 0.8826,
      "step": 390370
    },
    {
      "epoch": 1.368184574190315,
      "grad_norm": 3.203125,
      "learning_rate": 3.021890438769338e-05,
      "loss": 0.9046,
      "step": 390380
    },
    {
      "epoch": 1.3682196216972105,
      "grad_norm": 2.59375,
      "learning_rate": 3.021825535902968e-05,
      "loss": 0.824,
      "step": 390390
    },
    {
      "epoch": 1.3682546692041062,
      "grad_norm": 2.953125,
      "learning_rate": 3.0217606330365977e-05,
      "loss": 0.9528,
      "step": 390400
    },
    {
      "epoch": 1.3682897167110017,
      "grad_norm": 2.71875,
      "learning_rate": 3.0216957301702275e-05,
      "loss": 0.8328,
      "step": 390410
    },
    {
      "epoch": 1.3683247642178973,
      "grad_norm": 2.5,
      "learning_rate": 3.0216308273038573e-05,
      "loss": 0.8144,
      "step": 390420
    },
    {
      "epoch": 1.368359811724793,
      "grad_norm": 3.171875,
      "learning_rate": 3.021565924437487e-05,
      "loss": 0.8531,
      "step": 390430
    },
    {
      "epoch": 1.3683948592316886,
      "grad_norm": 2.59375,
      "learning_rate": 3.021501021571117e-05,
      "loss": 0.8408,
      "step": 390440
    },
    {
      "epoch": 1.3684299067385841,
      "grad_norm": 2.78125,
      "learning_rate": 3.021436118704747e-05,
      "loss": 0.8838,
      "step": 390450
    },
    {
      "epoch": 1.3684649542454799,
      "grad_norm": 2.96875,
      "learning_rate": 3.0213712158383768e-05,
      "loss": 0.8618,
      "step": 390460
    },
    {
      "epoch": 1.3685000017523754,
      "grad_norm": 2.765625,
      "learning_rate": 3.0213063129720066e-05,
      "loss": 0.7823,
      "step": 390470
    },
    {
      "epoch": 1.368535049259271,
      "grad_norm": 3.015625,
      "learning_rate": 3.0212414101056364e-05,
      "loss": 0.8541,
      "step": 390480
    },
    {
      "epoch": 1.3685700967661665,
      "grad_norm": 2.734375,
      "learning_rate": 3.0211765072392655e-05,
      "loss": 0.937,
      "step": 390490
    },
    {
      "epoch": 1.368605144273062,
      "grad_norm": 2.734375,
      "learning_rate": 3.0211116043728953e-05,
      "loss": 0.8824,
      "step": 390500
    },
    {
      "epoch": 1.3686401917799578,
      "grad_norm": 2.859375,
      "learning_rate": 3.021046701506525e-05,
      "loss": 0.7885,
      "step": 390510
    },
    {
      "epoch": 1.3686752392868533,
      "grad_norm": 3.03125,
      "learning_rate": 3.020981798640155e-05,
      "loss": 0.8772,
      "step": 390520
    },
    {
      "epoch": 1.3687102867937488,
      "grad_norm": 2.765625,
      "learning_rate": 3.0209168957737847e-05,
      "loss": 0.8986,
      "step": 390530
    },
    {
      "epoch": 1.3687453343006446,
      "grad_norm": 2.859375,
      "learning_rate": 3.020851992907415e-05,
      "loss": 0.787,
      "step": 390540
    },
    {
      "epoch": 1.3687803818075401,
      "grad_norm": 2.859375,
      "learning_rate": 3.0207870900410447e-05,
      "loss": 0.8077,
      "step": 390550
    },
    {
      "epoch": 1.3688154293144357,
      "grad_norm": 2.8125,
      "learning_rate": 3.0207221871746745e-05,
      "loss": 0.7301,
      "step": 390560
    },
    {
      "epoch": 1.3688504768213314,
      "grad_norm": 3.046875,
      "learning_rate": 3.0206572843083043e-05,
      "loss": 0.8311,
      "step": 390570
    },
    {
      "epoch": 1.368885524328227,
      "grad_norm": 3.6875,
      "learning_rate": 3.020592381441934e-05,
      "loss": 0.916,
      "step": 390580
    },
    {
      "epoch": 1.3689205718351225,
      "grad_norm": 3.203125,
      "learning_rate": 3.020527478575564e-05,
      "loss": 0.8372,
      "step": 390590
    },
    {
      "epoch": 1.368955619342018,
      "grad_norm": 2.96875,
      "learning_rate": 3.0204625757091937e-05,
      "loss": 0.8846,
      "step": 390600
    },
    {
      "epoch": 1.3689906668489136,
      "grad_norm": 2.8125,
      "learning_rate": 3.0203976728428235e-05,
      "loss": 0.82,
      "step": 390610
    },
    {
      "epoch": 1.3690257143558093,
      "grad_norm": 2.796875,
      "learning_rate": 3.0203327699764533e-05,
      "loss": 0.9102,
      "step": 390620
    },
    {
      "epoch": 1.3690607618627049,
      "grad_norm": 2.734375,
      "learning_rate": 3.020267867110083e-05,
      "loss": 0.8662,
      "step": 390630
    },
    {
      "epoch": 1.3690958093696004,
      "grad_norm": 2.734375,
      "learning_rate": 3.020202964243713e-05,
      "loss": 0.8215,
      "step": 390640
    },
    {
      "epoch": 1.3691308568764962,
      "grad_norm": 2.9375,
      "learning_rate": 3.0201380613773427e-05,
      "loss": 0.8548,
      "step": 390650
    },
    {
      "epoch": 1.3691659043833917,
      "grad_norm": 2.90625,
      "learning_rate": 3.0200731585109725e-05,
      "loss": 0.8858,
      "step": 390660
    },
    {
      "epoch": 1.3692009518902872,
      "grad_norm": 2.65625,
      "learning_rate": 3.0200082556446023e-05,
      "loss": 0.8086,
      "step": 390670
    },
    {
      "epoch": 1.369235999397183,
      "grad_norm": 2.953125,
      "learning_rate": 3.0199433527782324e-05,
      "loss": 0.864,
      "step": 390680
    },
    {
      "epoch": 1.3692710469040785,
      "grad_norm": 3.078125,
      "learning_rate": 3.0198784499118622e-05,
      "loss": 0.8971,
      "step": 390690
    },
    {
      "epoch": 1.369306094410974,
      "grad_norm": 2.703125,
      "learning_rate": 3.019813547045492e-05,
      "loss": 0.8042,
      "step": 390700
    },
    {
      "epoch": 1.3693411419178696,
      "grad_norm": 2.890625,
      "learning_rate": 3.0197486441791218e-05,
      "loss": 0.8147,
      "step": 390710
    },
    {
      "epoch": 1.3693761894247651,
      "grad_norm": 2.828125,
      "learning_rate": 3.0196837413127516e-05,
      "loss": 0.8496,
      "step": 390720
    },
    {
      "epoch": 1.369411236931661,
      "grad_norm": 2.46875,
      "learning_rate": 3.0196188384463814e-05,
      "loss": 0.8128,
      "step": 390730
    },
    {
      "epoch": 1.3694462844385564,
      "grad_norm": 2.9375,
      "learning_rate": 3.0195539355800112e-05,
      "loss": 0.9139,
      "step": 390740
    },
    {
      "epoch": 1.369481331945452,
      "grad_norm": 2.8125,
      "learning_rate": 3.019489032713641e-05,
      "loss": 0.922,
      "step": 390750
    },
    {
      "epoch": 1.3695163794523477,
      "grad_norm": 3.234375,
      "learning_rate": 3.0194241298472708e-05,
      "loss": 0.8864,
      "step": 390760
    },
    {
      "epoch": 1.3695514269592433,
      "grad_norm": 2.890625,
      "learning_rate": 3.0193592269809006e-05,
      "loss": 0.8419,
      "step": 390770
    },
    {
      "epoch": 1.3695864744661388,
      "grad_norm": 2.734375,
      "learning_rate": 3.0192943241145304e-05,
      "loss": 0.7772,
      "step": 390780
    },
    {
      "epoch": 1.3696215219730346,
      "grad_norm": 2.984375,
      "learning_rate": 3.0192294212481602e-05,
      "loss": 0.8957,
      "step": 390790
    },
    {
      "epoch": 1.36965656947993,
      "grad_norm": 2.71875,
      "learning_rate": 3.01916451838179e-05,
      "loss": 0.8369,
      "step": 390800
    },
    {
      "epoch": 1.3696916169868256,
      "grad_norm": 3.15625,
      "learning_rate": 3.0190996155154198e-05,
      "loss": 0.9076,
      "step": 390810
    },
    {
      "epoch": 1.3697266644937212,
      "grad_norm": 3.078125,
      "learning_rate": 3.01903471264905e-05,
      "loss": 0.8487,
      "step": 390820
    },
    {
      "epoch": 1.369761712000617,
      "grad_norm": 3.046875,
      "learning_rate": 3.0189698097826797e-05,
      "loss": 0.8313,
      "step": 390830
    },
    {
      "epoch": 1.3697967595075125,
      "grad_norm": 2.84375,
      "learning_rate": 3.0189049069163095e-05,
      "loss": 0.828,
      "step": 390840
    },
    {
      "epoch": 1.369831807014408,
      "grad_norm": 3.0,
      "learning_rate": 3.0188400040499393e-05,
      "loss": 0.8777,
      "step": 390850
    },
    {
      "epoch": 1.3698668545213035,
      "grad_norm": 3.03125,
      "learning_rate": 3.0187751011835685e-05,
      "loss": 0.8405,
      "step": 390860
    },
    {
      "epoch": 1.3699019020281993,
      "grad_norm": 3.125,
      "learning_rate": 3.0187101983171983e-05,
      "loss": 0.9092,
      "step": 390870
    },
    {
      "epoch": 1.3699369495350948,
      "grad_norm": 2.6875,
      "learning_rate": 3.018645295450828e-05,
      "loss": 0.7864,
      "step": 390880
    },
    {
      "epoch": 1.3699719970419904,
      "grad_norm": 3.078125,
      "learning_rate": 3.018580392584458e-05,
      "loss": 0.8901,
      "step": 390890
    },
    {
      "epoch": 1.370007044548886,
      "grad_norm": 2.65625,
      "learning_rate": 3.0185154897180877e-05,
      "loss": 0.8614,
      "step": 390900
    },
    {
      "epoch": 1.3700420920557816,
      "grad_norm": 3.046875,
      "learning_rate": 3.0184505868517178e-05,
      "loss": 0.8033,
      "step": 390910
    },
    {
      "epoch": 1.3700771395626772,
      "grad_norm": 2.453125,
      "learning_rate": 3.0183856839853476e-05,
      "loss": 0.7934,
      "step": 390920
    },
    {
      "epoch": 1.370112187069573,
      "grad_norm": 2.671875,
      "learning_rate": 3.0183207811189774e-05,
      "loss": 0.8586,
      "step": 390930
    },
    {
      "epoch": 1.3701472345764685,
      "grad_norm": 2.515625,
      "learning_rate": 3.0182558782526072e-05,
      "loss": 0.7874,
      "step": 390940
    },
    {
      "epoch": 1.370182282083364,
      "grad_norm": 2.75,
      "learning_rate": 3.018190975386237e-05,
      "loss": 0.7149,
      "step": 390950
    },
    {
      "epoch": 1.3702173295902595,
      "grad_norm": 2.984375,
      "learning_rate": 3.0181260725198668e-05,
      "loss": 0.8182,
      "step": 390960
    },
    {
      "epoch": 1.370252377097155,
      "grad_norm": 2.640625,
      "learning_rate": 3.0180611696534966e-05,
      "loss": 0.775,
      "step": 390970
    },
    {
      "epoch": 1.3702874246040508,
      "grad_norm": 2.828125,
      "learning_rate": 3.0179962667871264e-05,
      "loss": 0.8314,
      "step": 390980
    },
    {
      "epoch": 1.3703224721109464,
      "grad_norm": 3.015625,
      "learning_rate": 3.0179313639207562e-05,
      "loss": 0.8959,
      "step": 390990
    },
    {
      "epoch": 1.370357519617842,
      "grad_norm": 2.96875,
      "learning_rate": 3.017866461054386e-05,
      "loss": 0.8958,
      "step": 391000
    },
    {
      "epoch": 1.3703925671247377,
      "grad_norm": 2.9375,
      "learning_rate": 3.0178015581880158e-05,
      "loss": 0.7819,
      "step": 391010
    },
    {
      "epoch": 1.3704276146316332,
      "grad_norm": 2.734375,
      "learning_rate": 3.0177366553216456e-05,
      "loss": 0.7967,
      "step": 391020
    },
    {
      "epoch": 1.3704626621385287,
      "grad_norm": 3.28125,
      "learning_rate": 3.0176717524552754e-05,
      "loss": 0.8436,
      "step": 391030
    },
    {
      "epoch": 1.3704977096454245,
      "grad_norm": 3.03125,
      "learning_rate": 3.0176068495889055e-05,
      "loss": 0.8764,
      "step": 391040
    },
    {
      "epoch": 1.37053275715232,
      "grad_norm": 2.90625,
      "learning_rate": 3.0175419467225353e-05,
      "loss": 0.8808,
      "step": 391050
    },
    {
      "epoch": 1.3705678046592156,
      "grad_norm": 2.75,
      "learning_rate": 3.017477043856165e-05,
      "loss": 0.8334,
      "step": 391060
    },
    {
      "epoch": 1.370602852166111,
      "grad_norm": 2.671875,
      "learning_rate": 3.017412140989795e-05,
      "loss": 0.8682,
      "step": 391070
    },
    {
      "epoch": 1.3706378996730066,
      "grad_norm": 2.640625,
      "learning_rate": 3.0173472381234247e-05,
      "loss": 0.79,
      "step": 391080
    },
    {
      "epoch": 1.3706729471799024,
      "grad_norm": 3.0625,
      "learning_rate": 3.0172823352570545e-05,
      "loss": 0.8421,
      "step": 391090
    },
    {
      "epoch": 1.370707994686798,
      "grad_norm": 3.015625,
      "learning_rate": 3.0172174323906843e-05,
      "loss": 0.9062,
      "step": 391100
    },
    {
      "epoch": 1.3707430421936935,
      "grad_norm": 2.796875,
      "learning_rate": 3.017152529524314e-05,
      "loss": 0.8601,
      "step": 391110
    },
    {
      "epoch": 1.3707780897005892,
      "grad_norm": 3.34375,
      "learning_rate": 3.017087626657944e-05,
      "loss": 0.97,
      "step": 391120
    },
    {
      "epoch": 1.3708131372074848,
      "grad_norm": 3.078125,
      "learning_rate": 3.0170227237915737e-05,
      "loss": 0.7815,
      "step": 391130
    },
    {
      "epoch": 1.3708481847143803,
      "grad_norm": 2.890625,
      "learning_rate": 3.0169578209252035e-05,
      "loss": 0.8579,
      "step": 391140
    },
    {
      "epoch": 1.370883232221276,
      "grad_norm": 2.921875,
      "learning_rate": 3.0168929180588333e-05,
      "loss": 0.8546,
      "step": 391150
    },
    {
      "epoch": 1.3709182797281716,
      "grad_norm": 3.4375,
      "learning_rate": 3.016828015192463e-05,
      "loss": 0.7969,
      "step": 391160
    },
    {
      "epoch": 1.3709533272350671,
      "grad_norm": 2.4375,
      "learning_rate": 3.016763112326093e-05,
      "loss": 0.8289,
      "step": 391170
    },
    {
      "epoch": 1.3709883747419627,
      "grad_norm": 2.796875,
      "learning_rate": 3.016698209459723e-05,
      "loss": 0.834,
      "step": 391180
    },
    {
      "epoch": 1.3710234222488582,
      "grad_norm": 3.4375,
      "learning_rate": 3.016633306593353e-05,
      "loss": 0.8661,
      "step": 391190
    },
    {
      "epoch": 1.371058469755754,
      "grad_norm": 2.71875,
      "learning_rate": 3.0165684037269827e-05,
      "loss": 0.6993,
      "step": 391200
    },
    {
      "epoch": 1.3710935172626495,
      "grad_norm": 2.71875,
      "learning_rate": 3.0165035008606125e-05,
      "loss": 0.82,
      "step": 391210
    },
    {
      "epoch": 1.371128564769545,
      "grad_norm": 2.875,
      "learning_rate": 3.0164385979942423e-05,
      "loss": 0.7793,
      "step": 391220
    },
    {
      "epoch": 1.3711636122764408,
      "grad_norm": 2.84375,
      "learning_rate": 3.016373695127872e-05,
      "loss": 0.8111,
      "step": 391230
    },
    {
      "epoch": 1.3711986597833363,
      "grad_norm": 2.875,
      "learning_rate": 3.0163087922615012e-05,
      "loss": 0.761,
      "step": 391240
    },
    {
      "epoch": 1.3712337072902319,
      "grad_norm": 2.96875,
      "learning_rate": 3.016243889395131e-05,
      "loss": 0.9155,
      "step": 391250
    },
    {
      "epoch": 1.3712687547971276,
      "grad_norm": 3.140625,
      "learning_rate": 3.0161789865287608e-05,
      "loss": 0.8503,
      "step": 391260
    },
    {
      "epoch": 1.3713038023040232,
      "grad_norm": 2.609375,
      "learning_rate": 3.016114083662391e-05,
      "loss": 0.7373,
      "step": 391270
    },
    {
      "epoch": 1.3713388498109187,
      "grad_norm": 2.96875,
      "learning_rate": 3.0160491807960207e-05,
      "loss": 0.9463,
      "step": 391280
    },
    {
      "epoch": 1.3713738973178142,
      "grad_norm": 3.265625,
      "learning_rate": 3.0159842779296505e-05,
      "loss": 0.8356,
      "step": 391290
    },
    {
      "epoch": 1.3714089448247098,
      "grad_norm": 2.9375,
      "learning_rate": 3.0159193750632803e-05,
      "loss": 0.8702,
      "step": 391300
    },
    {
      "epoch": 1.3714439923316055,
      "grad_norm": 3.4375,
      "learning_rate": 3.01585447219691e-05,
      "loss": 0.8628,
      "step": 391310
    },
    {
      "epoch": 1.371479039838501,
      "grad_norm": 3.140625,
      "learning_rate": 3.01578956933054e-05,
      "loss": 0.809,
      "step": 391320
    },
    {
      "epoch": 1.3715140873453966,
      "grad_norm": 3.0625,
      "learning_rate": 3.0157246664641697e-05,
      "loss": 0.757,
      "step": 391330
    },
    {
      "epoch": 1.3715491348522924,
      "grad_norm": 2.640625,
      "learning_rate": 3.0156597635977995e-05,
      "loss": 0.843,
      "step": 391340
    },
    {
      "epoch": 1.3715841823591879,
      "grad_norm": 3.375,
      "learning_rate": 3.0155948607314293e-05,
      "loss": 0.8615,
      "step": 391350
    },
    {
      "epoch": 1.3716192298660834,
      "grad_norm": 2.46875,
      "learning_rate": 3.015529957865059e-05,
      "loss": 0.8423,
      "step": 391360
    },
    {
      "epoch": 1.3716542773729792,
      "grad_norm": 3.078125,
      "learning_rate": 3.015465054998689e-05,
      "loss": 0.802,
      "step": 391370
    },
    {
      "epoch": 1.3716893248798747,
      "grad_norm": 2.640625,
      "learning_rate": 3.0154001521323187e-05,
      "loss": 0.8937,
      "step": 391380
    },
    {
      "epoch": 1.3717243723867703,
      "grad_norm": 3.0625,
      "learning_rate": 3.0153352492659485e-05,
      "loss": 0.8233,
      "step": 391390
    },
    {
      "epoch": 1.3717594198936658,
      "grad_norm": 2.71875,
      "learning_rate": 3.0152703463995783e-05,
      "loss": 0.8943,
      "step": 391400
    },
    {
      "epoch": 1.3717944674005613,
      "grad_norm": 3.125,
      "learning_rate": 3.0152054435332085e-05,
      "loss": 0.8103,
      "step": 391410
    },
    {
      "epoch": 1.371829514907457,
      "grad_norm": 3.4375,
      "learning_rate": 3.0151405406668383e-05,
      "loss": 0.883,
      "step": 391420
    },
    {
      "epoch": 1.3718645624143526,
      "grad_norm": 2.375,
      "learning_rate": 3.015075637800468e-05,
      "loss": 0.8479,
      "step": 391430
    },
    {
      "epoch": 1.3718996099212482,
      "grad_norm": 3.03125,
      "learning_rate": 3.015010734934098e-05,
      "loss": 0.9387,
      "step": 391440
    },
    {
      "epoch": 1.371934657428144,
      "grad_norm": 2.796875,
      "learning_rate": 3.0149458320677277e-05,
      "loss": 0.7787,
      "step": 391450
    },
    {
      "epoch": 1.3719697049350394,
      "grad_norm": 2.640625,
      "learning_rate": 3.0148809292013575e-05,
      "loss": 0.8885,
      "step": 391460
    },
    {
      "epoch": 1.372004752441935,
      "grad_norm": 2.921875,
      "learning_rate": 3.0148160263349873e-05,
      "loss": 0.838,
      "step": 391470
    },
    {
      "epoch": 1.3720397999488307,
      "grad_norm": 2.734375,
      "learning_rate": 3.014751123468617e-05,
      "loss": 0.8261,
      "step": 391480
    },
    {
      "epoch": 1.3720748474557263,
      "grad_norm": 3.078125,
      "learning_rate": 3.014686220602247e-05,
      "loss": 0.8718,
      "step": 391490
    },
    {
      "epoch": 1.3721098949626218,
      "grad_norm": 3.3125,
      "learning_rate": 3.0146213177358767e-05,
      "loss": 0.8521,
      "step": 391500
    },
    {
      "epoch": 1.3721449424695173,
      "grad_norm": 3.03125,
      "learning_rate": 3.0145564148695065e-05,
      "loss": 0.826,
      "step": 391510
    },
    {
      "epoch": 1.372179989976413,
      "grad_norm": 3.109375,
      "learning_rate": 3.0144915120031363e-05,
      "loss": 0.8342,
      "step": 391520
    },
    {
      "epoch": 1.3722150374833086,
      "grad_norm": 3.015625,
      "learning_rate": 3.014426609136766e-05,
      "loss": 0.9206,
      "step": 391530
    },
    {
      "epoch": 1.3722500849902042,
      "grad_norm": 2.953125,
      "learning_rate": 3.014361706270396e-05,
      "loss": 0.7343,
      "step": 391540
    },
    {
      "epoch": 1.3722851324970997,
      "grad_norm": 2.59375,
      "learning_rate": 3.014296803404026e-05,
      "loss": 0.7764,
      "step": 391550
    },
    {
      "epoch": 1.3723201800039955,
      "grad_norm": 2.625,
      "learning_rate": 3.0142319005376558e-05,
      "loss": 0.76,
      "step": 391560
    },
    {
      "epoch": 1.372355227510891,
      "grad_norm": 2.9375,
      "learning_rate": 3.0141669976712856e-05,
      "loss": 0.7594,
      "step": 391570
    },
    {
      "epoch": 1.3723902750177865,
      "grad_norm": 2.9375,
      "learning_rate": 3.0141020948049154e-05,
      "loss": 0.8777,
      "step": 391580
    },
    {
      "epoch": 1.3724253225246823,
      "grad_norm": 2.859375,
      "learning_rate": 3.0140371919385452e-05,
      "loss": 0.8786,
      "step": 391590
    },
    {
      "epoch": 1.3724603700315778,
      "grad_norm": 3.328125,
      "learning_rate": 3.013972289072175e-05,
      "loss": 0.9496,
      "step": 391600
    },
    {
      "epoch": 1.3724954175384734,
      "grad_norm": 3.09375,
      "learning_rate": 3.013907386205804e-05,
      "loss": 0.8673,
      "step": 391610
    },
    {
      "epoch": 1.3725304650453691,
      "grad_norm": 2.90625,
      "learning_rate": 3.013842483339434e-05,
      "loss": 0.7734,
      "step": 391620
    },
    {
      "epoch": 1.3725655125522647,
      "grad_norm": 2.640625,
      "learning_rate": 3.0137775804730637e-05,
      "loss": 0.8164,
      "step": 391630
    },
    {
      "epoch": 1.3726005600591602,
      "grad_norm": 3.234375,
      "learning_rate": 3.013712677606694e-05,
      "loss": 0.9297,
      "step": 391640
    },
    {
      "epoch": 1.3726356075660557,
      "grad_norm": 3.0,
      "learning_rate": 3.0136477747403237e-05,
      "loss": 0.8837,
      "step": 391650
    },
    {
      "epoch": 1.3726706550729513,
      "grad_norm": 2.734375,
      "learning_rate": 3.0135828718739535e-05,
      "loss": 0.8076,
      "step": 391660
    },
    {
      "epoch": 1.372705702579847,
      "grad_norm": 2.890625,
      "learning_rate": 3.0135179690075833e-05,
      "loss": 0.7681,
      "step": 391670
    },
    {
      "epoch": 1.3727407500867426,
      "grad_norm": 3.171875,
      "learning_rate": 3.013453066141213e-05,
      "loss": 0.8944,
      "step": 391680
    },
    {
      "epoch": 1.372775797593638,
      "grad_norm": 3.0625,
      "learning_rate": 3.013388163274843e-05,
      "loss": 0.8813,
      "step": 391690
    },
    {
      "epoch": 1.3728108451005339,
      "grad_norm": 2.71875,
      "learning_rate": 3.0133232604084727e-05,
      "loss": 0.8987,
      "step": 391700
    },
    {
      "epoch": 1.3728458926074294,
      "grad_norm": 2.90625,
      "learning_rate": 3.0132583575421025e-05,
      "loss": 0.8396,
      "step": 391710
    },
    {
      "epoch": 1.372880940114325,
      "grad_norm": 2.59375,
      "learning_rate": 3.0131934546757323e-05,
      "loss": 0.8305,
      "step": 391720
    },
    {
      "epoch": 1.3729159876212207,
      "grad_norm": 2.890625,
      "learning_rate": 3.013128551809362e-05,
      "loss": 0.8302,
      "step": 391730
    },
    {
      "epoch": 1.3729510351281162,
      "grad_norm": 3.015625,
      "learning_rate": 3.013063648942992e-05,
      "loss": 0.8491,
      "step": 391740
    },
    {
      "epoch": 1.3729860826350118,
      "grad_norm": 2.78125,
      "learning_rate": 3.0129987460766217e-05,
      "loss": 0.8749,
      "step": 391750
    },
    {
      "epoch": 1.3730211301419073,
      "grad_norm": 2.953125,
      "learning_rate": 3.0129338432102515e-05,
      "loss": 0.7873,
      "step": 391760
    },
    {
      "epoch": 1.3730561776488028,
      "grad_norm": 3.171875,
      "learning_rate": 3.0128689403438813e-05,
      "loss": 0.9127,
      "step": 391770
    },
    {
      "epoch": 1.3730912251556986,
      "grad_norm": 2.859375,
      "learning_rate": 3.0128040374775114e-05,
      "loss": 0.8466,
      "step": 391780
    },
    {
      "epoch": 1.3731262726625941,
      "grad_norm": 2.96875,
      "learning_rate": 3.0127391346111412e-05,
      "loss": 0.9397,
      "step": 391790
    },
    {
      "epoch": 1.3731613201694897,
      "grad_norm": 2.671875,
      "learning_rate": 3.012674231744771e-05,
      "loss": 0.8607,
      "step": 391800
    },
    {
      "epoch": 1.3731963676763854,
      "grad_norm": 3.09375,
      "learning_rate": 3.0126093288784008e-05,
      "loss": 0.8744,
      "step": 391810
    },
    {
      "epoch": 1.373231415183281,
      "grad_norm": 2.375,
      "learning_rate": 3.0125444260120306e-05,
      "loss": 0.908,
      "step": 391820
    },
    {
      "epoch": 1.3732664626901765,
      "grad_norm": 3.09375,
      "learning_rate": 3.0124795231456604e-05,
      "loss": 0.8291,
      "step": 391830
    },
    {
      "epoch": 1.3733015101970723,
      "grad_norm": 2.890625,
      "learning_rate": 3.0124146202792902e-05,
      "loss": 0.782,
      "step": 391840
    },
    {
      "epoch": 1.3733365577039678,
      "grad_norm": 2.828125,
      "learning_rate": 3.01234971741292e-05,
      "loss": 0.887,
      "step": 391850
    },
    {
      "epoch": 1.3733716052108633,
      "grad_norm": 2.96875,
      "learning_rate": 3.0122848145465498e-05,
      "loss": 0.8213,
      "step": 391860
    },
    {
      "epoch": 1.3734066527177589,
      "grad_norm": 2.71875,
      "learning_rate": 3.0122199116801796e-05,
      "loss": 0.8597,
      "step": 391870
    },
    {
      "epoch": 1.3734417002246544,
      "grad_norm": 2.96875,
      "learning_rate": 3.0121550088138094e-05,
      "loss": 0.8595,
      "step": 391880
    },
    {
      "epoch": 1.3734767477315502,
      "grad_norm": 2.828125,
      "learning_rate": 3.0120901059474392e-05,
      "loss": 0.7939,
      "step": 391890
    },
    {
      "epoch": 1.3735117952384457,
      "grad_norm": 2.5,
      "learning_rate": 3.012025203081069e-05,
      "loss": 0.7426,
      "step": 391900
    },
    {
      "epoch": 1.3735468427453412,
      "grad_norm": 3.234375,
      "learning_rate": 3.0119603002146988e-05,
      "loss": 0.875,
      "step": 391910
    },
    {
      "epoch": 1.373581890252237,
      "grad_norm": 2.71875,
      "learning_rate": 3.011895397348329e-05,
      "loss": 0.791,
      "step": 391920
    },
    {
      "epoch": 1.3736169377591325,
      "grad_norm": 3.203125,
      "learning_rate": 3.0118304944819588e-05,
      "loss": 0.8868,
      "step": 391930
    },
    {
      "epoch": 1.373651985266028,
      "grad_norm": 2.765625,
      "learning_rate": 3.0117655916155886e-05,
      "loss": 0.8679,
      "step": 391940
    },
    {
      "epoch": 1.3736870327729238,
      "grad_norm": 3.0625,
      "learning_rate": 3.0117006887492184e-05,
      "loss": 0.8253,
      "step": 391950
    },
    {
      "epoch": 1.3737220802798193,
      "grad_norm": 3.1875,
      "learning_rate": 3.011635785882848e-05,
      "loss": 0.7836,
      "step": 391960
    },
    {
      "epoch": 1.3737571277867149,
      "grad_norm": 2.875,
      "learning_rate": 3.011570883016478e-05,
      "loss": 0.8412,
      "step": 391970
    },
    {
      "epoch": 1.3737921752936104,
      "grad_norm": 2.96875,
      "learning_rate": 3.0115059801501078e-05,
      "loss": 0.8448,
      "step": 391980
    },
    {
      "epoch": 1.373827222800506,
      "grad_norm": 2.546875,
      "learning_rate": 3.011441077283737e-05,
      "loss": 0.8042,
      "step": 391990
    },
    {
      "epoch": 1.3738622703074017,
      "grad_norm": 2.96875,
      "learning_rate": 3.011376174417367e-05,
      "loss": 0.8671,
      "step": 392000
    },
    {
      "epoch": 1.3738973178142972,
      "grad_norm": 3.296875,
      "learning_rate": 3.0113112715509968e-05,
      "loss": 0.9225,
      "step": 392010
    },
    {
      "epoch": 1.3739323653211928,
      "grad_norm": 2.984375,
      "learning_rate": 3.0112463686846266e-05,
      "loss": 0.8611,
      "step": 392020
    },
    {
      "epoch": 1.3739674128280885,
      "grad_norm": 2.796875,
      "learning_rate": 3.0111814658182564e-05,
      "loss": 0.7228,
      "step": 392030
    },
    {
      "epoch": 1.374002460334984,
      "grad_norm": 3.03125,
      "learning_rate": 3.0111165629518862e-05,
      "loss": 0.889,
      "step": 392040
    },
    {
      "epoch": 1.3740375078418796,
      "grad_norm": 2.703125,
      "learning_rate": 3.011051660085516e-05,
      "loss": 0.7698,
      "step": 392050
    },
    {
      "epoch": 1.3740725553487754,
      "grad_norm": 3.078125,
      "learning_rate": 3.0109867572191458e-05,
      "loss": 0.9098,
      "step": 392060
    },
    {
      "epoch": 1.374107602855671,
      "grad_norm": 2.921875,
      "learning_rate": 3.0109218543527756e-05,
      "loss": 0.8164,
      "step": 392070
    },
    {
      "epoch": 1.3741426503625664,
      "grad_norm": 2.8125,
      "learning_rate": 3.0108569514864054e-05,
      "loss": 0.8487,
      "step": 392080
    },
    {
      "epoch": 1.374177697869462,
      "grad_norm": 3.046875,
      "learning_rate": 3.0107920486200352e-05,
      "loss": 0.8383,
      "step": 392090
    },
    {
      "epoch": 1.3742127453763577,
      "grad_norm": 3.0625,
      "learning_rate": 3.010727145753665e-05,
      "loss": 0.8418,
      "step": 392100
    },
    {
      "epoch": 1.3742477928832533,
      "grad_norm": 2.890625,
      "learning_rate": 3.0106622428872948e-05,
      "loss": 0.8508,
      "step": 392110
    },
    {
      "epoch": 1.3742828403901488,
      "grad_norm": 3.328125,
      "learning_rate": 3.0105973400209246e-05,
      "loss": 0.7696,
      "step": 392120
    },
    {
      "epoch": 1.3743178878970443,
      "grad_norm": 3.125,
      "learning_rate": 3.0105324371545544e-05,
      "loss": 0.844,
      "step": 392130
    },
    {
      "epoch": 1.37435293540394,
      "grad_norm": 2.46875,
      "learning_rate": 3.0104675342881846e-05,
      "loss": 0.8093,
      "step": 392140
    },
    {
      "epoch": 1.3743879829108356,
      "grad_norm": 2.46875,
      "learning_rate": 3.0104026314218144e-05,
      "loss": 0.8715,
      "step": 392150
    },
    {
      "epoch": 1.3744230304177312,
      "grad_norm": 2.75,
      "learning_rate": 3.010337728555444e-05,
      "loss": 0.7738,
      "step": 392160
    },
    {
      "epoch": 1.374458077924627,
      "grad_norm": 3.03125,
      "learning_rate": 3.010272825689074e-05,
      "loss": 0.873,
      "step": 392170
    },
    {
      "epoch": 1.3744931254315225,
      "grad_norm": 2.671875,
      "learning_rate": 3.0102079228227038e-05,
      "loss": 0.9008,
      "step": 392180
    },
    {
      "epoch": 1.374528172938418,
      "grad_norm": 3.40625,
      "learning_rate": 3.0101430199563336e-05,
      "loss": 0.7799,
      "step": 392190
    },
    {
      "epoch": 1.3745632204453138,
      "grad_norm": 3.046875,
      "learning_rate": 3.0100781170899634e-05,
      "loss": 0.774,
      "step": 392200
    },
    {
      "epoch": 1.3745982679522093,
      "grad_norm": 3.125,
      "learning_rate": 3.010013214223593e-05,
      "loss": 0.8631,
      "step": 392210
    },
    {
      "epoch": 1.3746333154591048,
      "grad_norm": 2.640625,
      "learning_rate": 3.009948311357223e-05,
      "loss": 0.7912,
      "step": 392220
    },
    {
      "epoch": 1.3746683629660004,
      "grad_norm": 2.84375,
      "learning_rate": 3.0098834084908528e-05,
      "loss": 0.7987,
      "step": 392230
    },
    {
      "epoch": 1.374703410472896,
      "grad_norm": 2.75,
      "learning_rate": 3.0098185056244826e-05,
      "loss": 0.8279,
      "step": 392240
    },
    {
      "epoch": 1.3747384579797917,
      "grad_norm": 2.90625,
      "learning_rate": 3.0097536027581124e-05,
      "loss": 0.8827,
      "step": 392250
    },
    {
      "epoch": 1.3747735054866872,
      "grad_norm": 3.078125,
      "learning_rate": 3.009688699891742e-05,
      "loss": 0.9004,
      "step": 392260
    },
    {
      "epoch": 1.3748085529935827,
      "grad_norm": 2.9375,
      "learning_rate": 3.009623797025372e-05,
      "loss": 0.7738,
      "step": 392270
    },
    {
      "epoch": 1.3748436005004785,
      "grad_norm": 2.828125,
      "learning_rate": 3.009558894159002e-05,
      "loss": 0.8325,
      "step": 392280
    },
    {
      "epoch": 1.374878648007374,
      "grad_norm": 3.8125,
      "learning_rate": 3.009493991292632e-05,
      "loss": 0.824,
      "step": 392290
    },
    {
      "epoch": 1.3749136955142696,
      "grad_norm": 3.09375,
      "learning_rate": 3.0094290884262617e-05,
      "loss": 0.844,
      "step": 392300
    },
    {
      "epoch": 1.3749487430211653,
      "grad_norm": 2.625,
      "learning_rate": 3.0093641855598915e-05,
      "loss": 0.8753,
      "step": 392310
    },
    {
      "epoch": 1.3749837905280609,
      "grad_norm": 2.96875,
      "learning_rate": 3.0092992826935213e-05,
      "loss": 0.8421,
      "step": 392320
    },
    {
      "epoch": 1.3750188380349564,
      "grad_norm": 3.796875,
      "learning_rate": 3.009234379827151e-05,
      "loss": 0.8231,
      "step": 392330
    },
    {
      "epoch": 1.375053885541852,
      "grad_norm": 2.515625,
      "learning_rate": 3.009169476960781e-05,
      "loss": 0.8608,
      "step": 392340
    },
    {
      "epoch": 1.3750889330487475,
      "grad_norm": 2.953125,
      "learning_rate": 3.0091045740944107e-05,
      "loss": 0.8581,
      "step": 392350
    },
    {
      "epoch": 1.3751239805556432,
      "grad_norm": 2.6875,
      "learning_rate": 3.0090396712280405e-05,
      "loss": 0.7962,
      "step": 392360
    },
    {
      "epoch": 1.3751590280625388,
      "grad_norm": 2.65625,
      "learning_rate": 3.00897476836167e-05,
      "loss": 0.8247,
      "step": 392370
    },
    {
      "epoch": 1.3751940755694343,
      "grad_norm": 2.984375,
      "learning_rate": 3.0089098654952998e-05,
      "loss": 0.833,
      "step": 392380
    },
    {
      "epoch": 1.37522912307633,
      "grad_norm": 3.21875,
      "learning_rate": 3.0088449626289296e-05,
      "loss": 0.8646,
      "step": 392390
    },
    {
      "epoch": 1.3752641705832256,
      "grad_norm": 2.9375,
      "learning_rate": 3.0087800597625594e-05,
      "loss": 0.7729,
      "step": 392400
    },
    {
      "epoch": 1.3752992180901211,
      "grad_norm": 3.25,
      "learning_rate": 3.008715156896189e-05,
      "loss": 0.8535,
      "step": 392410
    },
    {
      "epoch": 1.3753342655970169,
      "grad_norm": 2.375,
      "learning_rate": 3.008650254029819e-05,
      "loss": 0.8534,
      "step": 392420
    },
    {
      "epoch": 1.3753693131039124,
      "grad_norm": 2.9375,
      "learning_rate": 3.0085853511634488e-05,
      "loss": 0.8306,
      "step": 392430
    },
    {
      "epoch": 1.375404360610808,
      "grad_norm": 2.84375,
      "learning_rate": 3.0085204482970786e-05,
      "loss": 0.8626,
      "step": 392440
    },
    {
      "epoch": 1.3754394081177035,
      "grad_norm": 2.9375,
      "learning_rate": 3.0084555454307084e-05,
      "loss": 0.7977,
      "step": 392450
    },
    {
      "epoch": 1.375474455624599,
      "grad_norm": 2.984375,
      "learning_rate": 3.008390642564338e-05,
      "loss": 0.8485,
      "step": 392460
    },
    {
      "epoch": 1.3755095031314948,
      "grad_norm": 2.828125,
      "learning_rate": 3.008325739697968e-05,
      "loss": 0.8209,
      "step": 392470
    },
    {
      "epoch": 1.3755445506383903,
      "grad_norm": 2.59375,
      "learning_rate": 3.0082608368315978e-05,
      "loss": 0.793,
      "step": 392480
    },
    {
      "epoch": 1.3755795981452859,
      "grad_norm": 2.734375,
      "learning_rate": 3.0081959339652276e-05,
      "loss": 0.8394,
      "step": 392490
    },
    {
      "epoch": 1.3756146456521816,
      "grad_norm": 2.859375,
      "learning_rate": 3.0081310310988574e-05,
      "loss": 0.8231,
      "step": 392500
    },
    {
      "epoch": 1.3756496931590771,
      "grad_norm": 2.984375,
      "learning_rate": 3.0080661282324875e-05,
      "loss": 0.8386,
      "step": 392510
    },
    {
      "epoch": 1.3756847406659727,
      "grad_norm": 2.4375,
      "learning_rate": 3.0080012253661173e-05,
      "loss": 0.7834,
      "step": 392520
    },
    {
      "epoch": 1.3757197881728684,
      "grad_norm": 2.9375,
      "learning_rate": 3.007936322499747e-05,
      "loss": 0.8087,
      "step": 392530
    },
    {
      "epoch": 1.375754835679764,
      "grad_norm": 2.734375,
      "learning_rate": 3.007871419633377e-05,
      "loss": 0.8343,
      "step": 392540
    },
    {
      "epoch": 1.3757898831866595,
      "grad_norm": 2.609375,
      "learning_rate": 3.0078065167670067e-05,
      "loss": 0.7985,
      "step": 392550
    },
    {
      "epoch": 1.375824930693555,
      "grad_norm": 3.015625,
      "learning_rate": 3.0077416139006365e-05,
      "loss": 0.8667,
      "step": 392560
    },
    {
      "epoch": 1.3758599782004506,
      "grad_norm": 2.71875,
      "learning_rate": 3.0076767110342663e-05,
      "loss": 0.7405,
      "step": 392570
    },
    {
      "epoch": 1.3758950257073463,
      "grad_norm": 3.0625,
      "learning_rate": 3.007611808167896e-05,
      "loss": 0.7933,
      "step": 392580
    },
    {
      "epoch": 1.3759300732142419,
      "grad_norm": 3.265625,
      "learning_rate": 3.007546905301526e-05,
      "loss": 0.7992,
      "step": 392590
    },
    {
      "epoch": 1.3759651207211374,
      "grad_norm": 2.875,
      "learning_rate": 3.0074820024351557e-05,
      "loss": 0.8095,
      "step": 392600
    },
    {
      "epoch": 1.3760001682280332,
      "grad_norm": 2.859375,
      "learning_rate": 3.0074170995687855e-05,
      "loss": 0.9307,
      "step": 392610
    },
    {
      "epoch": 1.3760352157349287,
      "grad_norm": 3.15625,
      "learning_rate": 3.0073521967024153e-05,
      "loss": 0.8413,
      "step": 392620
    },
    {
      "epoch": 1.3760702632418242,
      "grad_norm": 2.703125,
      "learning_rate": 3.007287293836045e-05,
      "loss": 0.9107,
      "step": 392630
    },
    {
      "epoch": 1.37610531074872,
      "grad_norm": 3.28125,
      "learning_rate": 3.007222390969675e-05,
      "loss": 0.9482,
      "step": 392640
    },
    {
      "epoch": 1.3761403582556155,
      "grad_norm": 2.96875,
      "learning_rate": 3.007157488103305e-05,
      "loss": 0.8446,
      "step": 392650
    },
    {
      "epoch": 1.376175405762511,
      "grad_norm": 2.734375,
      "learning_rate": 3.007092585236935e-05,
      "loss": 0.8993,
      "step": 392660
    },
    {
      "epoch": 1.3762104532694066,
      "grad_norm": 2.4375,
      "learning_rate": 3.0070276823705646e-05,
      "loss": 0.8364,
      "step": 392670
    },
    {
      "epoch": 1.3762455007763021,
      "grad_norm": 3.0,
      "learning_rate": 3.0069627795041944e-05,
      "loss": 0.9176,
      "step": 392680
    },
    {
      "epoch": 1.376280548283198,
      "grad_norm": 2.671875,
      "learning_rate": 3.0068978766378242e-05,
      "loss": 0.8222,
      "step": 392690
    },
    {
      "epoch": 1.3763155957900934,
      "grad_norm": 2.375,
      "learning_rate": 3.006832973771454e-05,
      "loss": 0.8282,
      "step": 392700
    },
    {
      "epoch": 1.376350643296989,
      "grad_norm": 2.921875,
      "learning_rate": 3.006768070905084e-05,
      "loss": 0.8891,
      "step": 392710
    },
    {
      "epoch": 1.3763856908038847,
      "grad_norm": 3.3125,
      "learning_rate": 3.0067031680387136e-05,
      "loss": 0.7605,
      "step": 392720
    },
    {
      "epoch": 1.3764207383107803,
      "grad_norm": 2.984375,
      "learning_rate": 3.0066382651723434e-05,
      "loss": 0.8235,
      "step": 392730
    },
    {
      "epoch": 1.3764557858176758,
      "grad_norm": 2.75,
      "learning_rate": 3.006573362305973e-05,
      "loss": 0.8121,
      "step": 392740
    },
    {
      "epoch": 1.3764908333245716,
      "grad_norm": 2.78125,
      "learning_rate": 3.0065084594396027e-05,
      "loss": 0.8698,
      "step": 392750
    },
    {
      "epoch": 1.376525880831467,
      "grad_norm": 2.546875,
      "learning_rate": 3.0064435565732325e-05,
      "loss": 0.9469,
      "step": 392760
    },
    {
      "epoch": 1.3765609283383626,
      "grad_norm": 3.265625,
      "learning_rate": 3.0063786537068623e-05,
      "loss": 0.804,
      "step": 392770
    },
    {
      "epoch": 1.3765959758452582,
      "grad_norm": 2.84375,
      "learning_rate": 3.006313750840492e-05,
      "loss": 0.8247,
      "step": 392780
    },
    {
      "epoch": 1.376631023352154,
      "grad_norm": 2.828125,
      "learning_rate": 3.006248847974122e-05,
      "loss": 0.8661,
      "step": 392790
    },
    {
      "epoch": 1.3766660708590495,
      "grad_norm": 2.53125,
      "learning_rate": 3.0061839451077517e-05,
      "loss": 0.8243,
      "step": 392800
    },
    {
      "epoch": 1.376701118365945,
      "grad_norm": 3.234375,
      "learning_rate": 3.0061190422413815e-05,
      "loss": 0.8497,
      "step": 392810
    },
    {
      "epoch": 1.3767361658728405,
      "grad_norm": 2.765625,
      "learning_rate": 3.0060541393750113e-05,
      "loss": 0.9011,
      "step": 392820
    },
    {
      "epoch": 1.3767712133797363,
      "grad_norm": 3.03125,
      "learning_rate": 3.005989236508641e-05,
      "loss": 0.8377,
      "step": 392830
    },
    {
      "epoch": 1.3768062608866318,
      "grad_norm": 2.96875,
      "learning_rate": 3.005924333642271e-05,
      "loss": 0.8827,
      "step": 392840
    },
    {
      "epoch": 1.3768413083935274,
      "grad_norm": 3.390625,
      "learning_rate": 3.0058594307759007e-05,
      "loss": 0.8466,
      "step": 392850
    },
    {
      "epoch": 1.3768763559004231,
      "grad_norm": 3.203125,
      "learning_rate": 3.0057945279095305e-05,
      "loss": 0.8923,
      "step": 392860
    },
    {
      "epoch": 1.3769114034073187,
      "grad_norm": 2.859375,
      "learning_rate": 3.0057296250431603e-05,
      "loss": 0.8797,
      "step": 392870
    },
    {
      "epoch": 1.3769464509142142,
      "grad_norm": 2.953125,
      "learning_rate": 3.0056647221767904e-05,
      "loss": 0.8759,
      "step": 392880
    },
    {
      "epoch": 1.37698149842111,
      "grad_norm": 3.203125,
      "learning_rate": 3.0055998193104202e-05,
      "loss": 0.9212,
      "step": 392890
    },
    {
      "epoch": 1.3770165459280055,
      "grad_norm": 2.953125,
      "learning_rate": 3.00553491644405e-05,
      "loss": 0.8696,
      "step": 392900
    },
    {
      "epoch": 1.377051593434901,
      "grad_norm": 2.640625,
      "learning_rate": 3.00547001357768e-05,
      "loss": 0.9571,
      "step": 392910
    },
    {
      "epoch": 1.3770866409417966,
      "grad_norm": 2.578125,
      "learning_rate": 3.0054051107113096e-05,
      "loss": 0.8653,
      "step": 392920
    },
    {
      "epoch": 1.377121688448692,
      "grad_norm": 2.5,
      "learning_rate": 3.0053402078449394e-05,
      "loss": 0.806,
      "step": 392930
    },
    {
      "epoch": 1.3771567359555879,
      "grad_norm": 2.8125,
      "learning_rate": 3.0052753049785692e-05,
      "loss": 0.7975,
      "step": 392940
    },
    {
      "epoch": 1.3771917834624834,
      "grad_norm": 2.734375,
      "learning_rate": 3.005210402112199e-05,
      "loss": 0.7738,
      "step": 392950
    },
    {
      "epoch": 1.377226830969379,
      "grad_norm": 2.671875,
      "learning_rate": 3.005145499245829e-05,
      "loss": 0.8471,
      "step": 392960
    },
    {
      "epoch": 1.3772618784762747,
      "grad_norm": 2.59375,
      "learning_rate": 3.0050805963794586e-05,
      "loss": 0.7721,
      "step": 392970
    },
    {
      "epoch": 1.3772969259831702,
      "grad_norm": 2.609375,
      "learning_rate": 3.0050156935130884e-05,
      "loss": 0.7824,
      "step": 392980
    },
    {
      "epoch": 1.3773319734900658,
      "grad_norm": 3.34375,
      "learning_rate": 3.0049507906467182e-05,
      "loss": 0.9102,
      "step": 392990
    },
    {
      "epoch": 1.3773670209969615,
      "grad_norm": 2.875,
      "learning_rate": 3.004885887780348e-05,
      "loss": 0.8236,
      "step": 393000
    },
    {
      "epoch": 1.377402068503857,
      "grad_norm": 2.984375,
      "learning_rate": 3.004820984913978e-05,
      "loss": 0.8789,
      "step": 393010
    },
    {
      "epoch": 1.3774371160107526,
      "grad_norm": 2.515625,
      "learning_rate": 3.004756082047608e-05,
      "loss": 0.8129,
      "step": 393020
    },
    {
      "epoch": 1.3774721635176481,
      "grad_norm": 2.90625,
      "learning_rate": 3.0046911791812378e-05,
      "loss": 0.8054,
      "step": 393030
    },
    {
      "epoch": 1.3775072110245437,
      "grad_norm": 2.640625,
      "learning_rate": 3.0046262763148676e-05,
      "loss": 0.898,
      "step": 393040
    },
    {
      "epoch": 1.3775422585314394,
      "grad_norm": 3.015625,
      "learning_rate": 3.0045613734484974e-05,
      "loss": 0.9166,
      "step": 393050
    },
    {
      "epoch": 1.377577306038335,
      "grad_norm": 3.015625,
      "learning_rate": 3.0044964705821272e-05,
      "loss": 0.8551,
      "step": 393060
    },
    {
      "epoch": 1.3776123535452305,
      "grad_norm": 2.84375,
      "learning_rate": 3.004431567715757e-05,
      "loss": 0.8592,
      "step": 393070
    },
    {
      "epoch": 1.3776474010521262,
      "grad_norm": 2.90625,
      "learning_rate": 3.0043666648493868e-05,
      "loss": 0.9092,
      "step": 393080
    },
    {
      "epoch": 1.3776824485590218,
      "grad_norm": 2.8125,
      "learning_rate": 3.0043017619830166e-05,
      "loss": 0.7943,
      "step": 393090
    },
    {
      "epoch": 1.3777174960659173,
      "grad_norm": 2.546875,
      "learning_rate": 3.0042368591166464e-05,
      "loss": 0.8191,
      "step": 393100
    },
    {
      "epoch": 1.377752543572813,
      "grad_norm": 2.953125,
      "learning_rate": 3.0041719562502762e-05,
      "loss": 0.806,
      "step": 393110
    },
    {
      "epoch": 1.3777875910797086,
      "grad_norm": 2.96875,
      "learning_rate": 3.0041070533839056e-05,
      "loss": 0.9347,
      "step": 393120
    },
    {
      "epoch": 1.3778226385866041,
      "grad_norm": 3.234375,
      "learning_rate": 3.0040421505175354e-05,
      "loss": 0.8494,
      "step": 393130
    },
    {
      "epoch": 1.3778576860934997,
      "grad_norm": 2.796875,
      "learning_rate": 3.0039772476511652e-05,
      "loss": 0.7951,
      "step": 393140
    },
    {
      "epoch": 1.3778927336003952,
      "grad_norm": 2.734375,
      "learning_rate": 3.003912344784795e-05,
      "loss": 0.8605,
      "step": 393150
    },
    {
      "epoch": 1.377927781107291,
      "grad_norm": 2.78125,
      "learning_rate": 3.003847441918425e-05,
      "loss": 0.8746,
      "step": 393160
    },
    {
      "epoch": 1.3779628286141865,
      "grad_norm": 3.21875,
      "learning_rate": 3.0037825390520546e-05,
      "loss": 0.9346,
      "step": 393170
    },
    {
      "epoch": 1.377997876121082,
      "grad_norm": 3.265625,
      "learning_rate": 3.0037176361856844e-05,
      "loss": 0.9381,
      "step": 393180
    },
    {
      "epoch": 1.3780329236279778,
      "grad_norm": 3.046875,
      "learning_rate": 3.0036527333193142e-05,
      "loss": 0.9177,
      "step": 393190
    },
    {
      "epoch": 1.3780679711348733,
      "grad_norm": 3.03125,
      "learning_rate": 3.003587830452944e-05,
      "loss": 0.8351,
      "step": 393200
    },
    {
      "epoch": 1.3781030186417689,
      "grad_norm": 2.796875,
      "learning_rate": 3.003522927586574e-05,
      "loss": 0.8589,
      "step": 393210
    },
    {
      "epoch": 1.3781380661486646,
      "grad_norm": 2.921875,
      "learning_rate": 3.0034580247202036e-05,
      "loss": 0.8995,
      "step": 393220
    },
    {
      "epoch": 1.3781731136555602,
      "grad_norm": 3.25,
      "learning_rate": 3.0033931218538334e-05,
      "loss": 0.8904,
      "step": 393230
    },
    {
      "epoch": 1.3782081611624557,
      "grad_norm": 3.0,
      "learning_rate": 3.0033282189874636e-05,
      "loss": 0.815,
      "step": 393240
    },
    {
      "epoch": 1.3782432086693512,
      "grad_norm": 3.375,
      "learning_rate": 3.0032633161210934e-05,
      "loss": 0.8305,
      "step": 393250
    },
    {
      "epoch": 1.3782782561762468,
      "grad_norm": 2.75,
      "learning_rate": 3.0031984132547232e-05,
      "loss": 0.8301,
      "step": 393260
    },
    {
      "epoch": 1.3783133036831425,
      "grad_norm": 2.40625,
      "learning_rate": 3.003133510388353e-05,
      "loss": 0.8081,
      "step": 393270
    },
    {
      "epoch": 1.378348351190038,
      "grad_norm": 2.953125,
      "learning_rate": 3.0030686075219828e-05,
      "loss": 0.8395,
      "step": 393280
    },
    {
      "epoch": 1.3783833986969336,
      "grad_norm": 3.09375,
      "learning_rate": 3.0030037046556126e-05,
      "loss": 0.8442,
      "step": 393290
    },
    {
      "epoch": 1.3784184462038294,
      "grad_norm": 3.09375,
      "learning_rate": 3.0029388017892424e-05,
      "loss": 0.8402,
      "step": 393300
    },
    {
      "epoch": 1.378453493710725,
      "grad_norm": 2.765625,
      "learning_rate": 3.0028738989228722e-05,
      "loss": 0.8678,
      "step": 393310
    },
    {
      "epoch": 1.3784885412176204,
      "grad_norm": 2.828125,
      "learning_rate": 3.002808996056502e-05,
      "loss": 0.7746,
      "step": 393320
    },
    {
      "epoch": 1.3785235887245162,
      "grad_norm": 2.71875,
      "learning_rate": 3.0027440931901318e-05,
      "loss": 0.8026,
      "step": 393330
    },
    {
      "epoch": 1.3785586362314117,
      "grad_norm": 2.765625,
      "learning_rate": 3.0026791903237616e-05,
      "loss": 0.8798,
      "step": 393340
    },
    {
      "epoch": 1.3785936837383073,
      "grad_norm": 2.890625,
      "learning_rate": 3.0026142874573914e-05,
      "loss": 0.8862,
      "step": 393350
    },
    {
      "epoch": 1.3786287312452028,
      "grad_norm": 3.03125,
      "learning_rate": 3.0025493845910212e-05,
      "loss": 0.8899,
      "step": 393360
    },
    {
      "epoch": 1.3786637787520983,
      "grad_norm": 2.890625,
      "learning_rate": 3.002484481724651e-05,
      "loss": 0.8491,
      "step": 393370
    },
    {
      "epoch": 1.378698826258994,
      "grad_norm": 3.015625,
      "learning_rate": 3.002419578858281e-05,
      "loss": 0.8188,
      "step": 393380
    },
    {
      "epoch": 1.3787338737658896,
      "grad_norm": 2.625,
      "learning_rate": 3.002354675991911e-05,
      "loss": 0.814,
      "step": 393390
    },
    {
      "epoch": 1.3787689212727852,
      "grad_norm": 2.65625,
      "learning_rate": 3.0022897731255407e-05,
      "loss": 0.8774,
      "step": 393400
    },
    {
      "epoch": 1.378803968779681,
      "grad_norm": 3.0,
      "learning_rate": 3.0022248702591705e-05,
      "loss": 0.8251,
      "step": 393410
    },
    {
      "epoch": 1.3788390162865765,
      "grad_norm": 2.703125,
      "learning_rate": 3.0021599673928003e-05,
      "loss": 0.8384,
      "step": 393420
    },
    {
      "epoch": 1.378874063793472,
      "grad_norm": 2.671875,
      "learning_rate": 3.00209506452643e-05,
      "loss": 0.766,
      "step": 393430
    },
    {
      "epoch": 1.3789091113003678,
      "grad_norm": 2.6875,
      "learning_rate": 3.00203016166006e-05,
      "loss": 0.853,
      "step": 393440
    },
    {
      "epoch": 1.3789441588072633,
      "grad_norm": 3.171875,
      "learning_rate": 3.0019652587936897e-05,
      "loss": 0.8879,
      "step": 393450
    },
    {
      "epoch": 1.3789792063141588,
      "grad_norm": 3.0625,
      "learning_rate": 3.0019003559273195e-05,
      "loss": 0.8294,
      "step": 393460
    },
    {
      "epoch": 1.3790142538210544,
      "grad_norm": 2.71875,
      "learning_rate": 3.0018354530609493e-05,
      "loss": 0.932,
      "step": 393470
    },
    {
      "epoch": 1.3790493013279501,
      "grad_norm": 2.515625,
      "learning_rate": 3.001770550194579e-05,
      "loss": 0.8421,
      "step": 393480
    },
    {
      "epoch": 1.3790843488348457,
      "grad_norm": 2.671875,
      "learning_rate": 3.0017056473282086e-05,
      "loss": 0.862,
      "step": 393490
    },
    {
      "epoch": 1.3791193963417412,
      "grad_norm": 3.640625,
      "learning_rate": 3.0016407444618384e-05,
      "loss": 0.9147,
      "step": 393500
    },
    {
      "epoch": 1.3791544438486367,
      "grad_norm": 2.875,
      "learning_rate": 3.0015758415954682e-05,
      "loss": 0.855,
      "step": 393510
    },
    {
      "epoch": 1.3791894913555325,
      "grad_norm": 3.15625,
      "learning_rate": 3.001510938729098e-05,
      "loss": 0.8671,
      "step": 393520
    },
    {
      "epoch": 1.379224538862428,
      "grad_norm": 3.203125,
      "learning_rate": 3.0014460358627278e-05,
      "loss": 0.8254,
      "step": 393530
    },
    {
      "epoch": 1.3792595863693236,
      "grad_norm": 2.6875,
      "learning_rate": 3.0013811329963576e-05,
      "loss": 0.8541,
      "step": 393540
    },
    {
      "epoch": 1.3792946338762193,
      "grad_norm": 3.046875,
      "learning_rate": 3.0013162301299874e-05,
      "loss": 0.8133,
      "step": 393550
    },
    {
      "epoch": 1.3793296813831148,
      "grad_norm": 2.71875,
      "learning_rate": 3.0012513272636172e-05,
      "loss": 0.8657,
      "step": 393560
    },
    {
      "epoch": 1.3793647288900104,
      "grad_norm": 2.734375,
      "learning_rate": 3.001186424397247e-05,
      "loss": 0.8616,
      "step": 393570
    },
    {
      "epoch": 1.3793997763969061,
      "grad_norm": 3.0625,
      "learning_rate": 3.0011215215308768e-05,
      "loss": 0.9003,
      "step": 393580
    },
    {
      "epoch": 1.3794348239038017,
      "grad_norm": 2.625,
      "learning_rate": 3.0010566186645066e-05,
      "loss": 0.8737,
      "step": 393590
    },
    {
      "epoch": 1.3794698714106972,
      "grad_norm": 2.84375,
      "learning_rate": 3.0009917157981364e-05,
      "loss": 0.8248,
      "step": 393600
    },
    {
      "epoch": 1.3795049189175927,
      "grad_norm": 3.390625,
      "learning_rate": 3.0009268129317665e-05,
      "loss": 0.8956,
      "step": 393610
    },
    {
      "epoch": 1.3795399664244883,
      "grad_norm": 3.46875,
      "learning_rate": 3.0008619100653963e-05,
      "loss": 0.8297,
      "step": 393620
    },
    {
      "epoch": 1.379575013931384,
      "grad_norm": 2.84375,
      "learning_rate": 3.000797007199026e-05,
      "loss": 0.8412,
      "step": 393630
    },
    {
      "epoch": 1.3796100614382796,
      "grad_norm": 2.890625,
      "learning_rate": 3.000732104332656e-05,
      "loss": 0.8232,
      "step": 393640
    },
    {
      "epoch": 1.3796451089451751,
      "grad_norm": 2.890625,
      "learning_rate": 3.0006672014662857e-05,
      "loss": 0.8313,
      "step": 393650
    },
    {
      "epoch": 1.3796801564520709,
      "grad_norm": 3.3125,
      "learning_rate": 3.0006022985999155e-05,
      "loss": 0.8736,
      "step": 393660
    },
    {
      "epoch": 1.3797152039589664,
      "grad_norm": 2.59375,
      "learning_rate": 3.0005373957335453e-05,
      "loss": 0.9007,
      "step": 393670
    },
    {
      "epoch": 1.379750251465862,
      "grad_norm": 2.84375,
      "learning_rate": 3.000472492867175e-05,
      "loss": 0.8422,
      "step": 393680
    },
    {
      "epoch": 1.3797852989727577,
      "grad_norm": 2.828125,
      "learning_rate": 3.000407590000805e-05,
      "loss": 0.9492,
      "step": 393690
    },
    {
      "epoch": 1.3798203464796532,
      "grad_norm": 2.75,
      "learning_rate": 3.0003426871344347e-05,
      "loss": 0.8447,
      "step": 393700
    },
    {
      "epoch": 1.3798553939865488,
      "grad_norm": 3.0,
      "learning_rate": 3.0002777842680645e-05,
      "loss": 0.8332,
      "step": 393710
    },
    {
      "epoch": 1.3798904414934443,
      "grad_norm": 3.640625,
      "learning_rate": 3.0002128814016943e-05,
      "loss": 0.8917,
      "step": 393720
    },
    {
      "epoch": 1.3799254890003398,
      "grad_norm": 3.015625,
      "learning_rate": 3.000147978535324e-05,
      "loss": 0.8702,
      "step": 393730
    },
    {
      "epoch": 1.3799605365072356,
      "grad_norm": 2.453125,
      "learning_rate": 3.000083075668954e-05,
      "loss": 0.8809,
      "step": 393740
    },
    {
      "epoch": 1.3799955840141311,
      "grad_norm": 2.53125,
      "learning_rate": 3.000018172802584e-05,
      "loss": 0.8053,
      "step": 393750
    },
    {
      "epoch": 1.3800306315210267,
      "grad_norm": 3.15625,
      "learning_rate": 2.999953269936214e-05,
      "loss": 0.806,
      "step": 393760
    },
    {
      "epoch": 1.3800656790279224,
      "grad_norm": 3.1875,
      "learning_rate": 2.9998883670698437e-05,
      "loss": 0.9221,
      "step": 393770
    },
    {
      "epoch": 1.380100726534818,
      "grad_norm": 2.6875,
      "learning_rate": 2.9998234642034735e-05,
      "loss": 0.7698,
      "step": 393780
    },
    {
      "epoch": 1.3801357740417135,
      "grad_norm": 2.734375,
      "learning_rate": 2.9997585613371033e-05,
      "loss": 0.8674,
      "step": 393790
    },
    {
      "epoch": 1.3801708215486093,
      "grad_norm": 2.515625,
      "learning_rate": 2.999693658470733e-05,
      "loss": 0.8056,
      "step": 393800
    },
    {
      "epoch": 1.3802058690555048,
      "grad_norm": 2.75,
      "learning_rate": 2.999628755604363e-05,
      "loss": 0.8572,
      "step": 393810
    },
    {
      "epoch": 1.3802409165624003,
      "grad_norm": 2.96875,
      "learning_rate": 2.9995638527379927e-05,
      "loss": 0.8535,
      "step": 393820
    },
    {
      "epoch": 1.3802759640692959,
      "grad_norm": 3.21875,
      "learning_rate": 2.9994989498716225e-05,
      "loss": 0.9146,
      "step": 393830
    },
    {
      "epoch": 1.3803110115761914,
      "grad_norm": 2.984375,
      "learning_rate": 2.9994340470052523e-05,
      "loss": 0.9758,
      "step": 393840
    },
    {
      "epoch": 1.3803460590830872,
      "grad_norm": 3.140625,
      "learning_rate": 2.999369144138882e-05,
      "loss": 0.805,
      "step": 393850
    },
    {
      "epoch": 1.3803811065899827,
      "grad_norm": 2.9375,
      "learning_rate": 2.999304241272512e-05,
      "loss": 0.8218,
      "step": 393860
    },
    {
      "epoch": 1.3804161540968782,
      "grad_norm": 2.453125,
      "learning_rate": 2.9992393384061413e-05,
      "loss": 0.8364,
      "step": 393870
    },
    {
      "epoch": 1.380451201603774,
      "grad_norm": 2.9375,
      "learning_rate": 2.999174435539771e-05,
      "loss": 0.8003,
      "step": 393880
    },
    {
      "epoch": 1.3804862491106695,
      "grad_norm": 2.8125,
      "learning_rate": 2.999109532673401e-05,
      "loss": 0.8045,
      "step": 393890
    },
    {
      "epoch": 1.380521296617565,
      "grad_norm": 2.6875,
      "learning_rate": 2.9990446298070307e-05,
      "loss": 0.9344,
      "step": 393900
    },
    {
      "epoch": 1.3805563441244608,
      "grad_norm": 2.765625,
      "learning_rate": 2.9989797269406605e-05,
      "loss": 0.7343,
      "step": 393910
    },
    {
      "epoch": 1.3805913916313564,
      "grad_norm": 2.859375,
      "learning_rate": 2.9989148240742903e-05,
      "loss": 0.8898,
      "step": 393920
    },
    {
      "epoch": 1.380626439138252,
      "grad_norm": 2.75,
      "learning_rate": 2.99884992120792e-05,
      "loss": 0.7518,
      "step": 393930
    },
    {
      "epoch": 1.3806614866451474,
      "grad_norm": 2.734375,
      "learning_rate": 2.99878501834155e-05,
      "loss": 0.7988,
      "step": 393940
    },
    {
      "epoch": 1.380696534152043,
      "grad_norm": 3.046875,
      "learning_rate": 2.9987201154751797e-05,
      "loss": 0.9248,
      "step": 393950
    },
    {
      "epoch": 1.3807315816589387,
      "grad_norm": 2.34375,
      "learning_rate": 2.9986552126088095e-05,
      "loss": 0.8307,
      "step": 393960
    },
    {
      "epoch": 1.3807666291658343,
      "grad_norm": 3.15625,
      "learning_rate": 2.9985903097424393e-05,
      "loss": 0.929,
      "step": 393970
    },
    {
      "epoch": 1.3808016766727298,
      "grad_norm": 2.9375,
      "learning_rate": 2.9985254068760695e-05,
      "loss": 0.9302,
      "step": 393980
    },
    {
      "epoch": 1.3808367241796256,
      "grad_norm": 2.828125,
      "learning_rate": 2.9984605040096993e-05,
      "loss": 0.8457,
      "step": 393990
    },
    {
      "epoch": 1.380871771686521,
      "grad_norm": 2.796875,
      "learning_rate": 2.998395601143329e-05,
      "loss": 0.8586,
      "step": 394000
    },
    {
      "epoch": 1.3809068191934166,
      "grad_norm": 2.34375,
      "learning_rate": 2.998330698276959e-05,
      "loss": 0.8284,
      "step": 394010
    },
    {
      "epoch": 1.3809418667003124,
      "grad_norm": 3.375,
      "learning_rate": 2.9982657954105887e-05,
      "loss": 0.8424,
      "step": 394020
    },
    {
      "epoch": 1.380976914207208,
      "grad_norm": 2.515625,
      "learning_rate": 2.9982008925442185e-05,
      "loss": 0.801,
      "step": 394030
    },
    {
      "epoch": 1.3810119617141035,
      "grad_norm": 2.984375,
      "learning_rate": 2.9981359896778483e-05,
      "loss": 0.7986,
      "step": 394040
    },
    {
      "epoch": 1.381047009220999,
      "grad_norm": 2.59375,
      "learning_rate": 2.998071086811478e-05,
      "loss": 0.7928,
      "step": 394050
    },
    {
      "epoch": 1.3810820567278945,
      "grad_norm": 2.984375,
      "learning_rate": 2.998006183945108e-05,
      "loss": 0.8806,
      "step": 394060
    },
    {
      "epoch": 1.3811171042347903,
      "grad_norm": 2.625,
      "learning_rate": 2.9979412810787377e-05,
      "loss": 0.8333,
      "step": 394070
    },
    {
      "epoch": 1.3811521517416858,
      "grad_norm": 2.859375,
      "learning_rate": 2.9978763782123675e-05,
      "loss": 0.7926,
      "step": 394080
    },
    {
      "epoch": 1.3811871992485814,
      "grad_norm": 2.703125,
      "learning_rate": 2.9978114753459973e-05,
      "loss": 0.8537,
      "step": 394090
    },
    {
      "epoch": 1.3812222467554771,
      "grad_norm": 2.5625,
      "learning_rate": 2.997746572479627e-05,
      "loss": 0.8649,
      "step": 394100
    },
    {
      "epoch": 1.3812572942623726,
      "grad_norm": 2.359375,
      "learning_rate": 2.9976816696132572e-05,
      "loss": 0.7212,
      "step": 394110
    },
    {
      "epoch": 1.3812923417692682,
      "grad_norm": 3.3125,
      "learning_rate": 2.997616766746887e-05,
      "loss": 0.8977,
      "step": 394120
    },
    {
      "epoch": 1.381327389276164,
      "grad_norm": 3.09375,
      "learning_rate": 2.9975518638805168e-05,
      "loss": 0.8663,
      "step": 394130
    },
    {
      "epoch": 1.3813624367830595,
      "grad_norm": 3.296875,
      "learning_rate": 2.9974869610141466e-05,
      "loss": 0.8978,
      "step": 394140
    },
    {
      "epoch": 1.381397484289955,
      "grad_norm": 2.4375,
      "learning_rate": 2.9974220581477764e-05,
      "loss": 0.8228,
      "step": 394150
    },
    {
      "epoch": 1.3814325317968505,
      "grad_norm": 2.765625,
      "learning_rate": 2.9973571552814062e-05,
      "loss": 0.8628,
      "step": 394160
    },
    {
      "epoch": 1.3814675793037463,
      "grad_norm": 2.78125,
      "learning_rate": 2.997292252415036e-05,
      "loss": 0.8884,
      "step": 394170
    },
    {
      "epoch": 1.3815026268106418,
      "grad_norm": 3.0,
      "learning_rate": 2.9972273495486658e-05,
      "loss": 0.9287,
      "step": 394180
    },
    {
      "epoch": 1.3815376743175374,
      "grad_norm": 3.09375,
      "learning_rate": 2.9971624466822956e-05,
      "loss": 0.942,
      "step": 394190
    },
    {
      "epoch": 1.381572721824433,
      "grad_norm": 2.921875,
      "learning_rate": 2.9970975438159254e-05,
      "loss": 0.7979,
      "step": 394200
    },
    {
      "epoch": 1.3816077693313287,
      "grad_norm": 2.890625,
      "learning_rate": 2.9970326409495552e-05,
      "loss": 0.8645,
      "step": 394210
    },
    {
      "epoch": 1.3816428168382242,
      "grad_norm": 2.359375,
      "learning_rate": 2.996967738083185e-05,
      "loss": 0.7925,
      "step": 394220
    },
    {
      "epoch": 1.3816778643451197,
      "grad_norm": 2.890625,
      "learning_rate": 2.9969028352168148e-05,
      "loss": 0.9182,
      "step": 394230
    },
    {
      "epoch": 1.3817129118520155,
      "grad_norm": 2.390625,
      "learning_rate": 2.9968379323504446e-05,
      "loss": 0.8213,
      "step": 394240
    },
    {
      "epoch": 1.381747959358911,
      "grad_norm": 2.84375,
      "learning_rate": 2.996773029484074e-05,
      "loss": 0.868,
      "step": 394250
    },
    {
      "epoch": 1.3817830068658066,
      "grad_norm": 2.9375,
      "learning_rate": 2.996708126617704e-05,
      "loss": 0.8306,
      "step": 394260
    },
    {
      "epoch": 1.3818180543727023,
      "grad_norm": 2.796875,
      "learning_rate": 2.9966432237513337e-05,
      "loss": 0.8907,
      "step": 394270
    },
    {
      "epoch": 1.3818531018795979,
      "grad_norm": 4.09375,
      "learning_rate": 2.9965783208849635e-05,
      "loss": 0.9678,
      "step": 394280
    },
    {
      "epoch": 1.3818881493864934,
      "grad_norm": 3.421875,
      "learning_rate": 2.9965134180185933e-05,
      "loss": 0.8008,
      "step": 394290
    },
    {
      "epoch": 1.381923196893389,
      "grad_norm": 3.03125,
      "learning_rate": 2.996448515152223e-05,
      "loss": 0.8897,
      "step": 394300
    },
    {
      "epoch": 1.3819582444002845,
      "grad_norm": 2.828125,
      "learning_rate": 2.996383612285853e-05,
      "loss": 0.8584,
      "step": 394310
    },
    {
      "epoch": 1.3819932919071802,
      "grad_norm": 2.71875,
      "learning_rate": 2.9963187094194827e-05,
      "loss": 0.8865,
      "step": 394320
    },
    {
      "epoch": 1.3820283394140758,
      "grad_norm": 2.953125,
      "learning_rate": 2.9962538065531125e-05,
      "loss": 0.8237,
      "step": 394330
    },
    {
      "epoch": 1.3820633869209713,
      "grad_norm": 2.828125,
      "learning_rate": 2.9961889036867426e-05,
      "loss": 0.9052,
      "step": 394340
    },
    {
      "epoch": 1.382098434427867,
      "grad_norm": 2.8125,
      "learning_rate": 2.9961240008203724e-05,
      "loss": 0.8158,
      "step": 394350
    },
    {
      "epoch": 1.3821334819347626,
      "grad_norm": 2.984375,
      "learning_rate": 2.9960590979540022e-05,
      "loss": 0.8223,
      "step": 394360
    },
    {
      "epoch": 1.3821685294416581,
      "grad_norm": 2.828125,
      "learning_rate": 2.995994195087632e-05,
      "loss": 0.8831,
      "step": 394370
    },
    {
      "epoch": 1.382203576948554,
      "grad_norm": 2.875,
      "learning_rate": 2.9959292922212618e-05,
      "loss": 0.8539,
      "step": 394380
    },
    {
      "epoch": 1.3822386244554494,
      "grad_norm": 2.875,
      "learning_rate": 2.9958643893548916e-05,
      "loss": 0.7803,
      "step": 394390
    },
    {
      "epoch": 1.382273671962345,
      "grad_norm": 2.953125,
      "learning_rate": 2.9957994864885214e-05,
      "loss": 0.7788,
      "step": 394400
    },
    {
      "epoch": 1.3823087194692405,
      "grad_norm": 3.234375,
      "learning_rate": 2.9957345836221512e-05,
      "loss": 0.8955,
      "step": 394410
    },
    {
      "epoch": 1.382343766976136,
      "grad_norm": 3.046875,
      "learning_rate": 2.995669680755781e-05,
      "loss": 0.8612,
      "step": 394420
    },
    {
      "epoch": 1.3823788144830318,
      "grad_norm": 2.578125,
      "learning_rate": 2.9956047778894108e-05,
      "loss": 0.8493,
      "step": 394430
    },
    {
      "epoch": 1.3824138619899273,
      "grad_norm": 2.921875,
      "learning_rate": 2.9955398750230406e-05,
      "loss": 0.8768,
      "step": 394440
    },
    {
      "epoch": 1.3824489094968229,
      "grad_norm": 3.171875,
      "learning_rate": 2.9954749721566704e-05,
      "loss": 0.8369,
      "step": 394450
    },
    {
      "epoch": 1.3824839570037186,
      "grad_norm": 2.71875,
      "learning_rate": 2.9954100692903002e-05,
      "loss": 0.8069,
      "step": 394460
    },
    {
      "epoch": 1.3825190045106142,
      "grad_norm": 3.4375,
      "learning_rate": 2.99534516642393e-05,
      "loss": 0.874,
      "step": 394470
    },
    {
      "epoch": 1.3825540520175097,
      "grad_norm": 2.828125,
      "learning_rate": 2.99528026355756e-05,
      "loss": 0.8322,
      "step": 394480
    },
    {
      "epoch": 1.3825890995244055,
      "grad_norm": 2.90625,
      "learning_rate": 2.99521536069119e-05,
      "loss": 0.8192,
      "step": 394490
    },
    {
      "epoch": 1.382624147031301,
      "grad_norm": 2.703125,
      "learning_rate": 2.9951504578248197e-05,
      "loss": 0.8026,
      "step": 394500
    },
    {
      "epoch": 1.3826591945381965,
      "grad_norm": 2.96875,
      "learning_rate": 2.9950855549584495e-05,
      "loss": 0.8437,
      "step": 394510
    },
    {
      "epoch": 1.382694242045092,
      "grad_norm": 2.78125,
      "learning_rate": 2.9950206520920793e-05,
      "loss": 0.824,
      "step": 394520
    },
    {
      "epoch": 1.3827292895519876,
      "grad_norm": 3.109375,
      "learning_rate": 2.994955749225709e-05,
      "loss": 0.8567,
      "step": 394530
    },
    {
      "epoch": 1.3827643370588834,
      "grad_norm": 2.75,
      "learning_rate": 2.994890846359339e-05,
      "loss": 0.8093,
      "step": 394540
    },
    {
      "epoch": 1.3827993845657789,
      "grad_norm": 2.78125,
      "learning_rate": 2.9948259434929687e-05,
      "loss": 0.8685,
      "step": 394550
    },
    {
      "epoch": 1.3828344320726744,
      "grad_norm": 2.8125,
      "learning_rate": 2.9947610406265985e-05,
      "loss": 0.8518,
      "step": 394560
    },
    {
      "epoch": 1.3828694795795702,
      "grad_norm": 2.5,
      "learning_rate": 2.9946961377602283e-05,
      "loss": 0.7022,
      "step": 394570
    },
    {
      "epoch": 1.3829045270864657,
      "grad_norm": 2.921875,
      "learning_rate": 2.994631234893858e-05,
      "loss": 0.8998,
      "step": 394580
    },
    {
      "epoch": 1.3829395745933613,
      "grad_norm": 3.03125,
      "learning_rate": 2.994566332027488e-05,
      "loss": 0.837,
      "step": 394590
    },
    {
      "epoch": 1.382974622100257,
      "grad_norm": 3.03125,
      "learning_rate": 2.9945014291611177e-05,
      "loss": 0.7957,
      "step": 394600
    },
    {
      "epoch": 1.3830096696071525,
      "grad_norm": 3.296875,
      "learning_rate": 2.9944365262947475e-05,
      "loss": 0.8203,
      "step": 394610
    },
    {
      "epoch": 1.383044717114048,
      "grad_norm": 2.875,
      "learning_rate": 2.994371623428377e-05,
      "loss": 0.8885,
      "step": 394620
    },
    {
      "epoch": 1.3830797646209436,
      "grad_norm": 3.140625,
      "learning_rate": 2.9943067205620068e-05,
      "loss": 0.8981,
      "step": 394630
    },
    {
      "epoch": 1.3831148121278392,
      "grad_norm": 2.890625,
      "learning_rate": 2.9942418176956366e-05,
      "loss": 0.8371,
      "step": 394640
    },
    {
      "epoch": 1.383149859634735,
      "grad_norm": 2.828125,
      "learning_rate": 2.9941769148292664e-05,
      "loss": 0.835,
      "step": 394650
    },
    {
      "epoch": 1.3831849071416304,
      "grad_norm": 2.78125,
      "learning_rate": 2.9941120119628962e-05,
      "loss": 0.8772,
      "step": 394660
    },
    {
      "epoch": 1.383219954648526,
      "grad_norm": 3.0625,
      "learning_rate": 2.994047109096526e-05,
      "loss": 0.8613,
      "step": 394670
    },
    {
      "epoch": 1.3832550021554217,
      "grad_norm": 2.59375,
      "learning_rate": 2.9939822062301558e-05,
      "loss": 0.8458,
      "step": 394680
    },
    {
      "epoch": 1.3832900496623173,
      "grad_norm": 2.8125,
      "learning_rate": 2.9939173033637856e-05,
      "loss": 0.7953,
      "step": 394690
    },
    {
      "epoch": 1.3833250971692128,
      "grad_norm": 3.0,
      "learning_rate": 2.9938524004974154e-05,
      "loss": 0.8758,
      "step": 394700
    },
    {
      "epoch": 1.3833601446761086,
      "grad_norm": 2.96875,
      "learning_rate": 2.9937874976310455e-05,
      "loss": 0.7789,
      "step": 394710
    },
    {
      "epoch": 1.383395192183004,
      "grad_norm": 2.6875,
      "learning_rate": 2.9937225947646753e-05,
      "loss": 0.801,
      "step": 394720
    },
    {
      "epoch": 1.3834302396898996,
      "grad_norm": 2.890625,
      "learning_rate": 2.993657691898305e-05,
      "loss": 0.832,
      "step": 394730
    },
    {
      "epoch": 1.3834652871967952,
      "grad_norm": 3.203125,
      "learning_rate": 2.993592789031935e-05,
      "loss": 0.8774,
      "step": 394740
    },
    {
      "epoch": 1.3835003347036907,
      "grad_norm": 3.328125,
      "learning_rate": 2.9935278861655647e-05,
      "loss": 0.8771,
      "step": 394750
    },
    {
      "epoch": 1.3835353822105865,
      "grad_norm": 2.640625,
      "learning_rate": 2.9934629832991945e-05,
      "loss": 0.8361,
      "step": 394760
    },
    {
      "epoch": 1.383570429717482,
      "grad_norm": 2.640625,
      "learning_rate": 2.9933980804328243e-05,
      "loss": 0.8588,
      "step": 394770
    },
    {
      "epoch": 1.3836054772243775,
      "grad_norm": 2.71875,
      "learning_rate": 2.993333177566454e-05,
      "loss": 0.8836,
      "step": 394780
    },
    {
      "epoch": 1.3836405247312733,
      "grad_norm": 3.609375,
      "learning_rate": 2.993268274700084e-05,
      "loss": 0.9107,
      "step": 394790
    },
    {
      "epoch": 1.3836755722381688,
      "grad_norm": 3.296875,
      "learning_rate": 2.9932033718337137e-05,
      "loss": 0.8662,
      "step": 394800
    },
    {
      "epoch": 1.3837106197450644,
      "grad_norm": 2.9375,
      "learning_rate": 2.9931384689673435e-05,
      "loss": 0.8485,
      "step": 394810
    },
    {
      "epoch": 1.3837456672519601,
      "grad_norm": 3.4375,
      "learning_rate": 2.9930735661009733e-05,
      "loss": 0.8716,
      "step": 394820
    },
    {
      "epoch": 1.3837807147588557,
      "grad_norm": 2.75,
      "learning_rate": 2.993008663234603e-05,
      "loss": 0.8574,
      "step": 394830
    },
    {
      "epoch": 1.3838157622657512,
      "grad_norm": 3.171875,
      "learning_rate": 2.992943760368233e-05,
      "loss": 0.8503,
      "step": 394840
    },
    {
      "epoch": 1.383850809772647,
      "grad_norm": 3.296875,
      "learning_rate": 2.992878857501863e-05,
      "loss": 0.8871,
      "step": 394850
    },
    {
      "epoch": 1.3838858572795425,
      "grad_norm": 2.90625,
      "learning_rate": 2.992813954635493e-05,
      "loss": 0.8695,
      "step": 394860
    },
    {
      "epoch": 1.383920904786438,
      "grad_norm": 3.328125,
      "learning_rate": 2.9927490517691227e-05,
      "loss": 0.8802,
      "step": 394870
    },
    {
      "epoch": 1.3839559522933336,
      "grad_norm": 2.96875,
      "learning_rate": 2.9926841489027525e-05,
      "loss": 0.8616,
      "step": 394880
    },
    {
      "epoch": 1.383990999800229,
      "grad_norm": 2.90625,
      "learning_rate": 2.9926192460363823e-05,
      "loss": 0.7975,
      "step": 394890
    },
    {
      "epoch": 1.3840260473071249,
      "grad_norm": 2.984375,
      "learning_rate": 2.992554343170012e-05,
      "loss": 0.7508,
      "step": 394900
    },
    {
      "epoch": 1.3840610948140204,
      "grad_norm": 2.859375,
      "learning_rate": 2.992489440303642e-05,
      "loss": 0.8315,
      "step": 394910
    },
    {
      "epoch": 1.384096142320916,
      "grad_norm": 2.5625,
      "learning_rate": 2.9924245374372717e-05,
      "loss": 0.9067,
      "step": 394920
    },
    {
      "epoch": 1.3841311898278117,
      "grad_norm": 3.125,
      "learning_rate": 2.9923596345709015e-05,
      "loss": 0.8141,
      "step": 394930
    },
    {
      "epoch": 1.3841662373347072,
      "grad_norm": 3.109375,
      "learning_rate": 2.9922947317045313e-05,
      "loss": 0.9087,
      "step": 394940
    },
    {
      "epoch": 1.3842012848416028,
      "grad_norm": 2.8125,
      "learning_rate": 2.992229828838161e-05,
      "loss": 0.8837,
      "step": 394950
    },
    {
      "epoch": 1.3842363323484985,
      "grad_norm": 2.953125,
      "learning_rate": 2.992164925971791e-05,
      "loss": 0.8935,
      "step": 394960
    },
    {
      "epoch": 1.384271379855394,
      "grad_norm": 2.765625,
      "learning_rate": 2.9921000231054207e-05,
      "loss": 0.8088,
      "step": 394970
    },
    {
      "epoch": 1.3843064273622896,
      "grad_norm": 3.171875,
      "learning_rate": 2.9920351202390505e-05,
      "loss": 0.8456,
      "step": 394980
    },
    {
      "epoch": 1.3843414748691851,
      "grad_norm": 3.03125,
      "learning_rate": 2.9919702173726806e-05,
      "loss": 0.8374,
      "step": 394990
    },
    {
      "epoch": 1.3843765223760807,
      "grad_norm": 3.015625,
      "learning_rate": 2.9919053145063097e-05,
      "loss": 0.8317,
      "step": 395000
    },
    {
      "epoch": 1.3843765223760807,
      "eval_loss": 0.7976319193840027,
      "eval_runtime": 556.3762,
      "eval_samples_per_second": 683.775,
      "eval_steps_per_second": 56.981,
      "step": 395000
    },
    {
      "epoch": 1.3844115698829764,
      "grad_norm": 3.140625,
      "learning_rate": 2.9918404116399395e-05,
      "loss": 0.8852,
      "step": 395010
    },
    {
      "epoch": 1.384446617389872,
      "grad_norm": 2.578125,
      "learning_rate": 2.9917755087735693e-05,
      "loss": 0.8084,
      "step": 395020
    },
    {
      "epoch": 1.3844816648967675,
      "grad_norm": 3.265625,
      "learning_rate": 2.991710605907199e-05,
      "loss": 0.8304,
      "step": 395030
    },
    {
      "epoch": 1.3845167124036633,
      "grad_norm": 2.875,
      "learning_rate": 2.991645703040829e-05,
      "loss": 0.8589,
      "step": 395040
    },
    {
      "epoch": 1.3845517599105588,
      "grad_norm": 3.109375,
      "learning_rate": 2.9915808001744587e-05,
      "loss": 0.8367,
      "step": 395050
    },
    {
      "epoch": 1.3845868074174543,
      "grad_norm": 3.328125,
      "learning_rate": 2.9915158973080885e-05,
      "loss": 0.8496,
      "step": 395060
    },
    {
      "epoch": 1.38462185492435,
      "grad_norm": 2.6875,
      "learning_rate": 2.9914509944417183e-05,
      "loss": 0.7846,
      "step": 395070
    },
    {
      "epoch": 1.3846569024312456,
      "grad_norm": 2.796875,
      "learning_rate": 2.9913860915753485e-05,
      "loss": 0.8594,
      "step": 395080
    },
    {
      "epoch": 1.3846919499381412,
      "grad_norm": 2.75,
      "learning_rate": 2.9913211887089783e-05,
      "loss": 0.8577,
      "step": 395090
    },
    {
      "epoch": 1.3847269974450367,
      "grad_norm": 2.796875,
      "learning_rate": 2.991256285842608e-05,
      "loss": 0.7991,
      "step": 395100
    },
    {
      "epoch": 1.3847620449519322,
      "grad_norm": 2.953125,
      "learning_rate": 2.991191382976238e-05,
      "loss": 0.8561,
      "step": 395110
    },
    {
      "epoch": 1.384797092458828,
      "grad_norm": 2.421875,
      "learning_rate": 2.9911264801098677e-05,
      "loss": 0.8267,
      "step": 395120
    },
    {
      "epoch": 1.3848321399657235,
      "grad_norm": 2.734375,
      "learning_rate": 2.9910615772434975e-05,
      "loss": 0.8441,
      "step": 395130
    },
    {
      "epoch": 1.384867187472619,
      "grad_norm": 2.921875,
      "learning_rate": 2.9909966743771273e-05,
      "loss": 0.8224,
      "step": 395140
    },
    {
      "epoch": 1.3849022349795148,
      "grad_norm": 3.1875,
      "learning_rate": 2.990931771510757e-05,
      "loss": 0.8624,
      "step": 395150
    },
    {
      "epoch": 1.3849372824864103,
      "grad_norm": 2.984375,
      "learning_rate": 2.990866868644387e-05,
      "loss": 0.8707,
      "step": 395160
    },
    {
      "epoch": 1.3849723299933059,
      "grad_norm": 2.6875,
      "learning_rate": 2.9908019657780167e-05,
      "loss": 0.8686,
      "step": 395170
    },
    {
      "epoch": 1.3850073775002016,
      "grad_norm": 2.578125,
      "learning_rate": 2.9907370629116465e-05,
      "loss": 0.794,
      "step": 395180
    },
    {
      "epoch": 1.3850424250070972,
      "grad_norm": 3.078125,
      "learning_rate": 2.9906721600452763e-05,
      "loss": 0.8847,
      "step": 395190
    },
    {
      "epoch": 1.3850774725139927,
      "grad_norm": 3.109375,
      "learning_rate": 2.990607257178906e-05,
      "loss": 0.8552,
      "step": 395200
    },
    {
      "epoch": 1.3851125200208882,
      "grad_norm": 3.0625,
      "learning_rate": 2.9905423543125362e-05,
      "loss": 0.8819,
      "step": 395210
    },
    {
      "epoch": 1.3851475675277838,
      "grad_norm": 3.0,
      "learning_rate": 2.990477451446166e-05,
      "loss": 0.8521,
      "step": 395220
    },
    {
      "epoch": 1.3851826150346795,
      "grad_norm": 3.109375,
      "learning_rate": 2.9904125485797958e-05,
      "loss": 0.8024,
      "step": 395230
    },
    {
      "epoch": 1.385217662541575,
      "grad_norm": 3.140625,
      "learning_rate": 2.9903476457134256e-05,
      "loss": 0.9042,
      "step": 395240
    },
    {
      "epoch": 1.3852527100484706,
      "grad_norm": 3.015625,
      "learning_rate": 2.9902827428470554e-05,
      "loss": 0.8471,
      "step": 395250
    },
    {
      "epoch": 1.3852877575553664,
      "grad_norm": 2.921875,
      "learning_rate": 2.9902178399806852e-05,
      "loss": 0.8383,
      "step": 395260
    },
    {
      "epoch": 1.385322805062262,
      "grad_norm": 2.765625,
      "learning_rate": 2.990152937114315e-05,
      "loss": 0.8955,
      "step": 395270
    },
    {
      "epoch": 1.3853578525691574,
      "grad_norm": 3.25,
      "learning_rate": 2.9900880342479448e-05,
      "loss": 0.8063,
      "step": 395280
    },
    {
      "epoch": 1.3853929000760532,
      "grad_norm": 2.484375,
      "learning_rate": 2.9900231313815746e-05,
      "loss": 0.7904,
      "step": 395290
    },
    {
      "epoch": 1.3854279475829487,
      "grad_norm": 2.765625,
      "learning_rate": 2.9899582285152044e-05,
      "loss": 0.8921,
      "step": 395300
    },
    {
      "epoch": 1.3854629950898443,
      "grad_norm": 3.0,
      "learning_rate": 2.9898933256488342e-05,
      "loss": 0.7804,
      "step": 395310
    },
    {
      "epoch": 1.3854980425967398,
      "grad_norm": 3.09375,
      "learning_rate": 2.989828422782464e-05,
      "loss": 0.8184,
      "step": 395320
    },
    {
      "epoch": 1.3855330901036353,
      "grad_norm": 3.359375,
      "learning_rate": 2.9897635199160938e-05,
      "loss": 0.8645,
      "step": 395330
    },
    {
      "epoch": 1.385568137610531,
      "grad_norm": 2.453125,
      "learning_rate": 2.9896986170497236e-05,
      "loss": 0.8383,
      "step": 395340
    },
    {
      "epoch": 1.3856031851174266,
      "grad_norm": 2.671875,
      "learning_rate": 2.9896337141833537e-05,
      "loss": 0.8022,
      "step": 395350
    },
    {
      "epoch": 1.3856382326243222,
      "grad_norm": 3.484375,
      "learning_rate": 2.9895688113169835e-05,
      "loss": 0.8257,
      "step": 395360
    },
    {
      "epoch": 1.385673280131218,
      "grad_norm": 3.03125,
      "learning_rate": 2.9895039084506133e-05,
      "loss": 0.8386,
      "step": 395370
    },
    {
      "epoch": 1.3857083276381135,
      "grad_norm": 2.53125,
      "learning_rate": 2.9894390055842425e-05,
      "loss": 0.7949,
      "step": 395380
    },
    {
      "epoch": 1.385743375145009,
      "grad_norm": 2.765625,
      "learning_rate": 2.9893741027178723e-05,
      "loss": 0.861,
      "step": 395390
    },
    {
      "epoch": 1.3857784226519048,
      "grad_norm": 2.765625,
      "learning_rate": 2.989309199851502e-05,
      "loss": 0.8256,
      "step": 395400
    },
    {
      "epoch": 1.3858134701588003,
      "grad_norm": 2.9375,
      "learning_rate": 2.989244296985132e-05,
      "loss": 0.8597,
      "step": 395410
    },
    {
      "epoch": 1.3858485176656958,
      "grad_norm": 2.609375,
      "learning_rate": 2.9891793941187617e-05,
      "loss": 0.8372,
      "step": 395420
    },
    {
      "epoch": 1.3858835651725914,
      "grad_norm": 3.109375,
      "learning_rate": 2.9891144912523915e-05,
      "loss": 0.8082,
      "step": 395430
    },
    {
      "epoch": 1.3859186126794871,
      "grad_norm": 3.203125,
      "learning_rate": 2.9890495883860216e-05,
      "loss": 0.9099,
      "step": 395440
    },
    {
      "epoch": 1.3859536601863827,
      "grad_norm": 2.9375,
      "learning_rate": 2.9889846855196514e-05,
      "loss": 0.8519,
      "step": 395450
    },
    {
      "epoch": 1.3859887076932782,
      "grad_norm": 2.578125,
      "learning_rate": 2.9889197826532812e-05,
      "loss": 0.8956,
      "step": 395460
    },
    {
      "epoch": 1.3860237552001737,
      "grad_norm": 3.296875,
      "learning_rate": 2.988854879786911e-05,
      "loss": 0.8557,
      "step": 395470
    },
    {
      "epoch": 1.3860588027070695,
      "grad_norm": 2.921875,
      "learning_rate": 2.9887899769205408e-05,
      "loss": 0.8461,
      "step": 395480
    },
    {
      "epoch": 1.386093850213965,
      "grad_norm": 2.875,
      "learning_rate": 2.9887250740541706e-05,
      "loss": 0.8151,
      "step": 395490
    },
    {
      "epoch": 1.3861288977208606,
      "grad_norm": 2.609375,
      "learning_rate": 2.9886601711878004e-05,
      "loss": 0.7822,
      "step": 395500
    },
    {
      "epoch": 1.3861639452277563,
      "grad_norm": 2.59375,
      "learning_rate": 2.9885952683214302e-05,
      "loss": 0.7853,
      "step": 395510
    },
    {
      "epoch": 1.3861989927346519,
      "grad_norm": 2.453125,
      "learning_rate": 2.98853036545506e-05,
      "loss": 0.8371,
      "step": 395520
    },
    {
      "epoch": 1.3862340402415474,
      "grad_norm": 2.703125,
      "learning_rate": 2.9884654625886898e-05,
      "loss": 0.8375,
      "step": 395530
    },
    {
      "epoch": 1.3862690877484432,
      "grad_norm": 3.390625,
      "learning_rate": 2.9884005597223196e-05,
      "loss": 0.8277,
      "step": 395540
    },
    {
      "epoch": 1.3863041352553387,
      "grad_norm": 2.609375,
      "learning_rate": 2.9883356568559494e-05,
      "loss": 0.7942,
      "step": 395550
    },
    {
      "epoch": 1.3863391827622342,
      "grad_norm": 2.75,
      "learning_rate": 2.9882707539895792e-05,
      "loss": 0.8335,
      "step": 395560
    },
    {
      "epoch": 1.3863742302691298,
      "grad_norm": 2.796875,
      "learning_rate": 2.988205851123209e-05,
      "loss": 0.8859,
      "step": 395570
    },
    {
      "epoch": 1.3864092777760253,
      "grad_norm": 2.765625,
      "learning_rate": 2.988140948256839e-05,
      "loss": 0.8366,
      "step": 395580
    },
    {
      "epoch": 1.386444325282921,
      "grad_norm": 2.625,
      "learning_rate": 2.988076045390469e-05,
      "loss": 0.8165,
      "step": 395590
    },
    {
      "epoch": 1.3864793727898166,
      "grad_norm": 2.953125,
      "learning_rate": 2.9880111425240987e-05,
      "loss": 0.817,
      "step": 395600
    },
    {
      "epoch": 1.3865144202967121,
      "grad_norm": 2.90625,
      "learning_rate": 2.9879462396577285e-05,
      "loss": 0.8218,
      "step": 395610
    },
    {
      "epoch": 1.3865494678036079,
      "grad_norm": 2.90625,
      "learning_rate": 2.9878813367913583e-05,
      "loss": 0.8531,
      "step": 395620
    },
    {
      "epoch": 1.3865845153105034,
      "grad_norm": 2.609375,
      "learning_rate": 2.987816433924988e-05,
      "loss": 0.9676,
      "step": 395630
    },
    {
      "epoch": 1.386619562817399,
      "grad_norm": 3.171875,
      "learning_rate": 2.987751531058618e-05,
      "loss": 0.9393,
      "step": 395640
    },
    {
      "epoch": 1.3866546103242947,
      "grad_norm": 2.84375,
      "learning_rate": 2.9876866281922477e-05,
      "loss": 0.832,
      "step": 395650
    },
    {
      "epoch": 1.3866896578311902,
      "grad_norm": 2.84375,
      "learning_rate": 2.9876217253258775e-05,
      "loss": 0.8092,
      "step": 395660
    },
    {
      "epoch": 1.3867247053380858,
      "grad_norm": 2.625,
      "learning_rate": 2.9875568224595073e-05,
      "loss": 0.8191,
      "step": 395670
    },
    {
      "epoch": 1.3867597528449813,
      "grad_norm": 2.859375,
      "learning_rate": 2.987491919593137e-05,
      "loss": 0.8666,
      "step": 395680
    },
    {
      "epoch": 1.3867948003518769,
      "grad_norm": 2.875,
      "learning_rate": 2.987427016726767e-05,
      "loss": 0.8167,
      "step": 395690
    },
    {
      "epoch": 1.3868298478587726,
      "grad_norm": 3.328125,
      "learning_rate": 2.9873621138603967e-05,
      "loss": 0.8951,
      "step": 395700
    },
    {
      "epoch": 1.3868648953656681,
      "grad_norm": 3.296875,
      "learning_rate": 2.9872972109940265e-05,
      "loss": 0.9557,
      "step": 395710
    },
    {
      "epoch": 1.3868999428725637,
      "grad_norm": 2.765625,
      "learning_rate": 2.9872323081276567e-05,
      "loss": 0.8379,
      "step": 395720
    },
    {
      "epoch": 1.3869349903794594,
      "grad_norm": 2.3125,
      "learning_rate": 2.9871674052612865e-05,
      "loss": 0.8903,
      "step": 395730
    },
    {
      "epoch": 1.386970037886355,
      "grad_norm": 3.03125,
      "learning_rate": 2.9871025023949163e-05,
      "loss": 0.8455,
      "step": 395740
    },
    {
      "epoch": 1.3870050853932505,
      "grad_norm": 3.140625,
      "learning_rate": 2.9870375995285454e-05,
      "loss": 0.8203,
      "step": 395750
    },
    {
      "epoch": 1.3870401329001463,
      "grad_norm": 2.6875,
      "learning_rate": 2.9869726966621752e-05,
      "loss": 0.774,
      "step": 395760
    },
    {
      "epoch": 1.3870751804070418,
      "grad_norm": 2.5625,
      "learning_rate": 2.986907793795805e-05,
      "loss": 0.8938,
      "step": 395770
    },
    {
      "epoch": 1.3871102279139373,
      "grad_norm": 2.796875,
      "learning_rate": 2.9868428909294348e-05,
      "loss": 0.8347,
      "step": 395780
    },
    {
      "epoch": 1.3871452754208329,
      "grad_norm": 2.875,
      "learning_rate": 2.9867779880630646e-05,
      "loss": 0.8581,
      "step": 395790
    },
    {
      "epoch": 1.3871803229277284,
      "grad_norm": 2.671875,
      "learning_rate": 2.9867130851966944e-05,
      "loss": 0.8431,
      "step": 395800
    },
    {
      "epoch": 1.3872153704346242,
      "grad_norm": 3.1875,
      "learning_rate": 2.9866481823303245e-05,
      "loss": 0.9354,
      "step": 395810
    },
    {
      "epoch": 1.3872504179415197,
      "grad_norm": 2.84375,
      "learning_rate": 2.9865832794639543e-05,
      "loss": 0.8668,
      "step": 395820
    },
    {
      "epoch": 1.3872854654484152,
      "grad_norm": 2.921875,
      "learning_rate": 2.986518376597584e-05,
      "loss": 0.8245,
      "step": 395830
    },
    {
      "epoch": 1.387320512955311,
      "grad_norm": 3.03125,
      "learning_rate": 2.986453473731214e-05,
      "loss": 0.9104,
      "step": 395840
    },
    {
      "epoch": 1.3873555604622065,
      "grad_norm": 2.828125,
      "learning_rate": 2.9863885708648437e-05,
      "loss": 0.8998,
      "step": 395850
    },
    {
      "epoch": 1.387390607969102,
      "grad_norm": 3.21875,
      "learning_rate": 2.9863236679984735e-05,
      "loss": 0.7986,
      "step": 395860
    },
    {
      "epoch": 1.3874256554759978,
      "grad_norm": 3.015625,
      "learning_rate": 2.9862587651321033e-05,
      "loss": 0.8489,
      "step": 395870
    },
    {
      "epoch": 1.3874607029828934,
      "grad_norm": 2.8125,
      "learning_rate": 2.986193862265733e-05,
      "loss": 0.8253,
      "step": 395880
    },
    {
      "epoch": 1.387495750489789,
      "grad_norm": 2.84375,
      "learning_rate": 2.986128959399363e-05,
      "loss": 0.8417,
      "step": 395890
    },
    {
      "epoch": 1.3875307979966844,
      "grad_norm": 2.9375,
      "learning_rate": 2.9860640565329927e-05,
      "loss": 0.8861,
      "step": 395900
    },
    {
      "epoch": 1.38756584550358,
      "grad_norm": 3.40625,
      "learning_rate": 2.9859991536666225e-05,
      "loss": 0.8151,
      "step": 395910
    },
    {
      "epoch": 1.3876008930104757,
      "grad_norm": 3.140625,
      "learning_rate": 2.9859342508002523e-05,
      "loss": 0.8072,
      "step": 395920
    },
    {
      "epoch": 1.3876359405173713,
      "grad_norm": 3.015625,
      "learning_rate": 2.985869347933882e-05,
      "loss": 0.8365,
      "step": 395930
    },
    {
      "epoch": 1.3876709880242668,
      "grad_norm": 2.546875,
      "learning_rate": 2.985804445067512e-05,
      "loss": 0.8513,
      "step": 395940
    },
    {
      "epoch": 1.3877060355311626,
      "grad_norm": 2.734375,
      "learning_rate": 2.985739542201142e-05,
      "loss": 0.7903,
      "step": 395950
    },
    {
      "epoch": 1.387741083038058,
      "grad_norm": 3.234375,
      "learning_rate": 2.985674639334772e-05,
      "loss": 0.9086,
      "step": 395960
    },
    {
      "epoch": 1.3877761305449536,
      "grad_norm": 2.890625,
      "learning_rate": 2.9856097364684017e-05,
      "loss": 0.9342,
      "step": 395970
    },
    {
      "epoch": 1.3878111780518494,
      "grad_norm": 2.4375,
      "learning_rate": 2.9855448336020315e-05,
      "loss": 0.8535,
      "step": 395980
    },
    {
      "epoch": 1.387846225558745,
      "grad_norm": 2.3125,
      "learning_rate": 2.9854799307356613e-05,
      "loss": 0.7971,
      "step": 395990
    },
    {
      "epoch": 1.3878812730656405,
      "grad_norm": 3.546875,
      "learning_rate": 2.985415027869291e-05,
      "loss": 0.9565,
      "step": 396000
    },
    {
      "epoch": 1.387916320572536,
      "grad_norm": 2.578125,
      "learning_rate": 2.985350125002921e-05,
      "loss": 0.7918,
      "step": 396010
    },
    {
      "epoch": 1.3879513680794315,
      "grad_norm": 2.765625,
      "learning_rate": 2.9852852221365507e-05,
      "loss": 0.8333,
      "step": 396020
    },
    {
      "epoch": 1.3879864155863273,
      "grad_norm": 2.953125,
      "learning_rate": 2.9852203192701805e-05,
      "loss": 0.827,
      "step": 396030
    },
    {
      "epoch": 1.3880214630932228,
      "grad_norm": 2.6875,
      "learning_rate": 2.9851554164038103e-05,
      "loss": 0.879,
      "step": 396040
    },
    {
      "epoch": 1.3880565106001184,
      "grad_norm": 2.578125,
      "learning_rate": 2.98509051353744e-05,
      "loss": 0.8139,
      "step": 396050
    },
    {
      "epoch": 1.3880915581070141,
      "grad_norm": 2.84375,
      "learning_rate": 2.98502561067107e-05,
      "loss": 0.8986,
      "step": 396060
    },
    {
      "epoch": 1.3881266056139097,
      "grad_norm": 2.828125,
      "learning_rate": 2.9849607078046997e-05,
      "loss": 0.8098,
      "step": 396070
    },
    {
      "epoch": 1.3881616531208052,
      "grad_norm": 2.9375,
      "learning_rate": 2.9848958049383295e-05,
      "loss": 0.8302,
      "step": 396080
    },
    {
      "epoch": 1.388196700627701,
      "grad_norm": 2.796875,
      "learning_rate": 2.9848309020719596e-05,
      "loss": 0.906,
      "step": 396090
    },
    {
      "epoch": 1.3882317481345965,
      "grad_norm": 2.8125,
      "learning_rate": 2.9847659992055894e-05,
      "loss": 0.8012,
      "step": 396100
    },
    {
      "epoch": 1.388266795641492,
      "grad_norm": 2.65625,
      "learning_rate": 2.9847010963392192e-05,
      "loss": 0.804,
      "step": 396110
    },
    {
      "epoch": 1.3883018431483876,
      "grad_norm": 2.6875,
      "learning_rate": 2.984636193472849e-05,
      "loss": 0.8681,
      "step": 396120
    },
    {
      "epoch": 1.3883368906552833,
      "grad_norm": 3.015625,
      "learning_rate": 2.984571290606478e-05,
      "loss": 0.8654,
      "step": 396130
    },
    {
      "epoch": 1.3883719381621789,
      "grad_norm": 2.609375,
      "learning_rate": 2.984506387740108e-05,
      "loss": 0.8634,
      "step": 396140
    },
    {
      "epoch": 1.3884069856690744,
      "grad_norm": 2.921875,
      "learning_rate": 2.9844414848737377e-05,
      "loss": 0.7603,
      "step": 396150
    },
    {
      "epoch": 1.38844203317597,
      "grad_norm": 2.921875,
      "learning_rate": 2.9843765820073675e-05,
      "loss": 0.8449,
      "step": 396160
    },
    {
      "epoch": 1.3884770806828657,
      "grad_norm": 2.890625,
      "learning_rate": 2.9843116791409977e-05,
      "loss": 0.85,
      "step": 396170
    },
    {
      "epoch": 1.3885121281897612,
      "grad_norm": 2.828125,
      "learning_rate": 2.9842467762746275e-05,
      "loss": 0.8481,
      "step": 396180
    },
    {
      "epoch": 1.3885471756966568,
      "grad_norm": 2.890625,
      "learning_rate": 2.9841818734082573e-05,
      "loss": 0.7915,
      "step": 396190
    },
    {
      "epoch": 1.3885822232035525,
      "grad_norm": 2.84375,
      "learning_rate": 2.984116970541887e-05,
      "loss": 0.87,
      "step": 396200
    },
    {
      "epoch": 1.388617270710448,
      "grad_norm": 2.53125,
      "learning_rate": 2.984052067675517e-05,
      "loss": 0.9029,
      "step": 396210
    },
    {
      "epoch": 1.3886523182173436,
      "grad_norm": 3.40625,
      "learning_rate": 2.9839871648091467e-05,
      "loss": 0.871,
      "step": 396220
    },
    {
      "epoch": 1.3886873657242393,
      "grad_norm": 3.4375,
      "learning_rate": 2.9839222619427765e-05,
      "loss": 0.7986,
      "step": 396230
    },
    {
      "epoch": 1.3887224132311349,
      "grad_norm": 2.890625,
      "learning_rate": 2.9838573590764063e-05,
      "loss": 0.8702,
      "step": 396240
    },
    {
      "epoch": 1.3887574607380304,
      "grad_norm": 3.203125,
      "learning_rate": 2.983792456210036e-05,
      "loss": 0.886,
      "step": 396250
    },
    {
      "epoch": 1.388792508244926,
      "grad_norm": 3.0,
      "learning_rate": 2.983727553343666e-05,
      "loss": 0.8717,
      "step": 396260
    },
    {
      "epoch": 1.3888275557518215,
      "grad_norm": 3.140625,
      "learning_rate": 2.9836626504772957e-05,
      "loss": 0.8967,
      "step": 396270
    },
    {
      "epoch": 1.3888626032587172,
      "grad_norm": 3.15625,
      "learning_rate": 2.9835977476109255e-05,
      "loss": 0.8834,
      "step": 396280
    },
    {
      "epoch": 1.3888976507656128,
      "grad_norm": 2.90625,
      "learning_rate": 2.9835328447445553e-05,
      "loss": 0.8408,
      "step": 396290
    },
    {
      "epoch": 1.3889326982725083,
      "grad_norm": 2.734375,
      "learning_rate": 2.983467941878185e-05,
      "loss": 0.8484,
      "step": 396300
    },
    {
      "epoch": 1.388967745779404,
      "grad_norm": 2.84375,
      "learning_rate": 2.9834030390118152e-05,
      "loss": 0.8681,
      "step": 396310
    },
    {
      "epoch": 1.3890027932862996,
      "grad_norm": 3.015625,
      "learning_rate": 2.983338136145445e-05,
      "loss": 0.7801,
      "step": 396320
    },
    {
      "epoch": 1.3890378407931951,
      "grad_norm": 3.203125,
      "learning_rate": 2.9832732332790748e-05,
      "loss": 0.7992,
      "step": 396330
    },
    {
      "epoch": 1.389072888300091,
      "grad_norm": 3.171875,
      "learning_rate": 2.9832083304127046e-05,
      "loss": 0.8036,
      "step": 396340
    },
    {
      "epoch": 1.3891079358069864,
      "grad_norm": 3.6875,
      "learning_rate": 2.9831434275463344e-05,
      "loss": 0.8181,
      "step": 396350
    },
    {
      "epoch": 1.389142983313882,
      "grad_norm": 2.6875,
      "learning_rate": 2.9830785246799642e-05,
      "loss": 0.8597,
      "step": 396360
    },
    {
      "epoch": 1.3891780308207775,
      "grad_norm": 3.296875,
      "learning_rate": 2.983013621813594e-05,
      "loss": 0.8812,
      "step": 396370
    },
    {
      "epoch": 1.389213078327673,
      "grad_norm": 3.015625,
      "learning_rate": 2.9829487189472238e-05,
      "loss": 0.8701,
      "step": 396380
    },
    {
      "epoch": 1.3892481258345688,
      "grad_norm": 2.5625,
      "learning_rate": 2.9828838160808536e-05,
      "loss": 0.814,
      "step": 396390
    },
    {
      "epoch": 1.3892831733414643,
      "grad_norm": 2.984375,
      "learning_rate": 2.9828189132144834e-05,
      "loss": 0.7913,
      "step": 396400
    },
    {
      "epoch": 1.3893182208483599,
      "grad_norm": 2.3125,
      "learning_rate": 2.9827540103481132e-05,
      "loss": 0.8297,
      "step": 396410
    },
    {
      "epoch": 1.3893532683552556,
      "grad_norm": 2.75,
      "learning_rate": 2.982689107481743e-05,
      "loss": 0.8274,
      "step": 396420
    },
    {
      "epoch": 1.3893883158621512,
      "grad_norm": 2.921875,
      "learning_rate": 2.9826242046153728e-05,
      "loss": 0.9039,
      "step": 396430
    },
    {
      "epoch": 1.3894233633690467,
      "grad_norm": 3.0,
      "learning_rate": 2.9825593017490026e-05,
      "loss": 0.9445,
      "step": 396440
    },
    {
      "epoch": 1.3894584108759425,
      "grad_norm": 2.984375,
      "learning_rate": 2.9824943988826328e-05,
      "loss": 0.848,
      "step": 396450
    },
    {
      "epoch": 1.389493458382838,
      "grad_norm": 3.015625,
      "learning_rate": 2.9824294960162626e-05,
      "loss": 0.8706,
      "step": 396460
    },
    {
      "epoch": 1.3895285058897335,
      "grad_norm": 2.90625,
      "learning_rate": 2.9823645931498924e-05,
      "loss": 0.8133,
      "step": 396470
    },
    {
      "epoch": 1.389563553396629,
      "grad_norm": 2.96875,
      "learning_rate": 2.982299690283522e-05,
      "loss": 0.8068,
      "step": 396480
    },
    {
      "epoch": 1.3895986009035246,
      "grad_norm": 3.3125,
      "learning_rate": 2.982234787417152e-05,
      "loss": 0.8959,
      "step": 396490
    },
    {
      "epoch": 1.3896336484104204,
      "grad_norm": 2.53125,
      "learning_rate": 2.982169884550781e-05,
      "loss": 0.843,
      "step": 396500
    },
    {
      "epoch": 1.389668695917316,
      "grad_norm": 3.296875,
      "learning_rate": 2.982104981684411e-05,
      "loss": 0.8597,
      "step": 396510
    },
    {
      "epoch": 1.3897037434242114,
      "grad_norm": 2.484375,
      "learning_rate": 2.9820400788180407e-05,
      "loss": 0.7728,
      "step": 396520
    },
    {
      "epoch": 1.3897387909311072,
      "grad_norm": 2.953125,
      "learning_rate": 2.9819751759516705e-05,
      "loss": 0.8267,
      "step": 396530
    },
    {
      "epoch": 1.3897738384380027,
      "grad_norm": 2.9375,
      "learning_rate": 2.9819102730853006e-05,
      "loss": 0.751,
      "step": 396540
    },
    {
      "epoch": 1.3898088859448983,
      "grad_norm": 2.671875,
      "learning_rate": 2.9818453702189304e-05,
      "loss": 0.9024,
      "step": 396550
    },
    {
      "epoch": 1.389843933451794,
      "grad_norm": 3.46875,
      "learning_rate": 2.9817804673525602e-05,
      "loss": 0.8721,
      "step": 396560
    },
    {
      "epoch": 1.3898789809586896,
      "grad_norm": 2.703125,
      "learning_rate": 2.98171556448619e-05,
      "loss": 0.8383,
      "step": 396570
    },
    {
      "epoch": 1.389914028465585,
      "grad_norm": 2.90625,
      "learning_rate": 2.9816506616198198e-05,
      "loss": 0.9358,
      "step": 396580
    },
    {
      "epoch": 1.3899490759724806,
      "grad_norm": 2.6875,
      "learning_rate": 2.9815857587534496e-05,
      "loss": 0.8425,
      "step": 396590
    },
    {
      "epoch": 1.3899841234793762,
      "grad_norm": 2.953125,
      "learning_rate": 2.9815208558870794e-05,
      "loss": 0.8763,
      "step": 396600
    },
    {
      "epoch": 1.390019170986272,
      "grad_norm": 2.578125,
      "learning_rate": 2.9814559530207092e-05,
      "loss": 0.8257,
      "step": 396610
    },
    {
      "epoch": 1.3900542184931675,
      "grad_norm": 2.875,
      "learning_rate": 2.981391050154339e-05,
      "loss": 0.7941,
      "step": 396620
    },
    {
      "epoch": 1.390089266000063,
      "grad_norm": 2.640625,
      "learning_rate": 2.9813261472879688e-05,
      "loss": 0.7935,
      "step": 396630
    },
    {
      "epoch": 1.3901243135069588,
      "grad_norm": 2.90625,
      "learning_rate": 2.9812612444215986e-05,
      "loss": 0.864,
      "step": 396640
    },
    {
      "epoch": 1.3901593610138543,
      "grad_norm": 3.46875,
      "learning_rate": 2.9811963415552284e-05,
      "loss": 0.8698,
      "step": 396650
    },
    {
      "epoch": 1.3901944085207498,
      "grad_norm": 2.5625,
      "learning_rate": 2.9811314386888582e-05,
      "loss": 0.855,
      "step": 396660
    },
    {
      "epoch": 1.3902294560276456,
      "grad_norm": 2.671875,
      "learning_rate": 2.981066535822488e-05,
      "loss": 0.7902,
      "step": 396670
    },
    {
      "epoch": 1.3902645035345411,
      "grad_norm": 2.796875,
      "learning_rate": 2.981001632956118e-05,
      "loss": 0.8191,
      "step": 396680
    },
    {
      "epoch": 1.3902995510414367,
      "grad_norm": 2.90625,
      "learning_rate": 2.980936730089748e-05,
      "loss": 0.8998,
      "step": 396690
    },
    {
      "epoch": 1.3903345985483322,
      "grad_norm": 2.34375,
      "learning_rate": 2.9808718272233778e-05,
      "loss": 0.8043,
      "step": 396700
    },
    {
      "epoch": 1.3903696460552277,
      "grad_norm": 2.9375,
      "learning_rate": 2.9808069243570076e-05,
      "loss": 0.8117,
      "step": 396710
    },
    {
      "epoch": 1.3904046935621235,
      "grad_norm": 3.109375,
      "learning_rate": 2.9807420214906374e-05,
      "loss": 0.8514,
      "step": 396720
    },
    {
      "epoch": 1.390439741069019,
      "grad_norm": 2.53125,
      "learning_rate": 2.980677118624267e-05,
      "loss": 0.7784,
      "step": 396730
    },
    {
      "epoch": 1.3904747885759146,
      "grad_norm": 2.828125,
      "learning_rate": 2.980612215757897e-05,
      "loss": 0.8341,
      "step": 396740
    },
    {
      "epoch": 1.3905098360828103,
      "grad_norm": 2.984375,
      "learning_rate": 2.9805473128915268e-05,
      "loss": 0.8453,
      "step": 396750
    },
    {
      "epoch": 1.3905448835897058,
      "grad_norm": 3.21875,
      "learning_rate": 2.9804824100251566e-05,
      "loss": 0.8878,
      "step": 396760
    },
    {
      "epoch": 1.3905799310966014,
      "grad_norm": 2.859375,
      "learning_rate": 2.9804175071587864e-05,
      "loss": 0.8745,
      "step": 396770
    },
    {
      "epoch": 1.3906149786034971,
      "grad_norm": 2.59375,
      "learning_rate": 2.980352604292416e-05,
      "loss": 0.8401,
      "step": 396780
    },
    {
      "epoch": 1.3906500261103927,
      "grad_norm": 2.40625,
      "learning_rate": 2.980287701426046e-05,
      "loss": 0.782,
      "step": 396790
    },
    {
      "epoch": 1.3906850736172882,
      "grad_norm": 3.0625,
      "learning_rate": 2.9802227985596758e-05,
      "loss": 0.9202,
      "step": 396800
    },
    {
      "epoch": 1.3907201211241838,
      "grad_norm": 2.90625,
      "learning_rate": 2.9801578956933056e-05,
      "loss": 0.8379,
      "step": 396810
    },
    {
      "epoch": 1.3907551686310795,
      "grad_norm": 2.75,
      "learning_rate": 2.9800929928269357e-05,
      "loss": 0.8466,
      "step": 396820
    },
    {
      "epoch": 1.390790216137975,
      "grad_norm": 2.8125,
      "learning_rate": 2.9800280899605655e-05,
      "loss": 0.8927,
      "step": 396830
    },
    {
      "epoch": 1.3908252636448706,
      "grad_norm": 2.9375,
      "learning_rate": 2.9799631870941953e-05,
      "loss": 0.8313,
      "step": 396840
    },
    {
      "epoch": 1.3908603111517661,
      "grad_norm": 2.6875,
      "learning_rate": 2.979898284227825e-05,
      "loss": 0.8487,
      "step": 396850
    },
    {
      "epoch": 1.3908953586586619,
      "grad_norm": 2.546875,
      "learning_rate": 2.979833381361455e-05,
      "loss": 0.7476,
      "step": 396860
    },
    {
      "epoch": 1.3909304061655574,
      "grad_norm": 2.859375,
      "learning_rate": 2.9797684784950847e-05,
      "loss": 0.8743,
      "step": 396870
    },
    {
      "epoch": 1.390965453672453,
      "grad_norm": 3.296875,
      "learning_rate": 2.9797035756287138e-05,
      "loss": 0.9058,
      "step": 396880
    },
    {
      "epoch": 1.3910005011793487,
      "grad_norm": 2.8125,
      "learning_rate": 2.9796386727623436e-05,
      "loss": 0.8315,
      "step": 396890
    },
    {
      "epoch": 1.3910355486862442,
      "grad_norm": 2.703125,
      "learning_rate": 2.9795737698959734e-05,
      "loss": 0.8135,
      "step": 396900
    },
    {
      "epoch": 1.3910705961931398,
      "grad_norm": 3.015625,
      "learning_rate": 2.9795088670296036e-05,
      "loss": 0.8645,
      "step": 396910
    },
    {
      "epoch": 1.3911056437000355,
      "grad_norm": 3.265625,
      "learning_rate": 2.9794439641632334e-05,
      "loss": 0.867,
      "step": 396920
    },
    {
      "epoch": 1.391140691206931,
      "grad_norm": 2.96875,
      "learning_rate": 2.979379061296863e-05,
      "loss": 0.8629,
      "step": 396930
    },
    {
      "epoch": 1.3911757387138266,
      "grad_norm": 3.0625,
      "learning_rate": 2.979314158430493e-05,
      "loss": 0.8559,
      "step": 396940
    },
    {
      "epoch": 1.3912107862207221,
      "grad_norm": 2.953125,
      "learning_rate": 2.9792492555641228e-05,
      "loss": 0.7731,
      "step": 396950
    },
    {
      "epoch": 1.3912458337276177,
      "grad_norm": 3.109375,
      "learning_rate": 2.9791843526977526e-05,
      "loss": 0.8689,
      "step": 396960
    },
    {
      "epoch": 1.3912808812345134,
      "grad_norm": 2.953125,
      "learning_rate": 2.9791194498313824e-05,
      "loss": 0.7971,
      "step": 396970
    },
    {
      "epoch": 1.391315928741409,
      "grad_norm": 2.671875,
      "learning_rate": 2.979054546965012e-05,
      "loss": 0.8321,
      "step": 396980
    },
    {
      "epoch": 1.3913509762483045,
      "grad_norm": 2.765625,
      "learning_rate": 2.978989644098642e-05,
      "loss": 0.9237,
      "step": 396990
    },
    {
      "epoch": 1.3913860237552003,
      "grad_norm": 3.015625,
      "learning_rate": 2.9789247412322718e-05,
      "loss": 0.8533,
      "step": 397000
    },
    {
      "epoch": 1.3914210712620958,
      "grad_norm": 2.875,
      "learning_rate": 2.9788598383659016e-05,
      "loss": 0.8325,
      "step": 397010
    },
    {
      "epoch": 1.3914561187689913,
      "grad_norm": 2.703125,
      "learning_rate": 2.9787949354995314e-05,
      "loss": 0.8491,
      "step": 397020
    },
    {
      "epoch": 1.391491166275887,
      "grad_norm": 2.625,
      "learning_rate": 2.978730032633161e-05,
      "loss": 0.8701,
      "step": 397030
    },
    {
      "epoch": 1.3915262137827826,
      "grad_norm": 2.828125,
      "learning_rate": 2.978665129766791e-05,
      "loss": 0.8539,
      "step": 397040
    },
    {
      "epoch": 1.3915612612896782,
      "grad_norm": 3.03125,
      "learning_rate": 2.978600226900421e-05,
      "loss": 0.8296,
      "step": 397050
    },
    {
      "epoch": 1.3915963087965737,
      "grad_norm": 3.203125,
      "learning_rate": 2.978535324034051e-05,
      "loss": 0.8824,
      "step": 397060
    },
    {
      "epoch": 1.3916313563034692,
      "grad_norm": 3.03125,
      "learning_rate": 2.9784704211676807e-05,
      "loss": 0.8698,
      "step": 397070
    },
    {
      "epoch": 1.391666403810365,
      "grad_norm": 3.015625,
      "learning_rate": 2.9784055183013105e-05,
      "loss": 0.9621,
      "step": 397080
    },
    {
      "epoch": 1.3917014513172605,
      "grad_norm": 2.84375,
      "learning_rate": 2.9783406154349403e-05,
      "loss": 0.923,
      "step": 397090
    },
    {
      "epoch": 1.391736498824156,
      "grad_norm": 2.78125,
      "learning_rate": 2.97827571256857e-05,
      "loss": 0.7919,
      "step": 397100
    },
    {
      "epoch": 1.3917715463310518,
      "grad_norm": 2.828125,
      "learning_rate": 2.9782108097022e-05,
      "loss": 0.8729,
      "step": 397110
    },
    {
      "epoch": 1.3918065938379474,
      "grad_norm": 2.734375,
      "learning_rate": 2.9781459068358297e-05,
      "loss": 0.8191,
      "step": 397120
    },
    {
      "epoch": 1.391841641344843,
      "grad_norm": 3.5,
      "learning_rate": 2.9780810039694595e-05,
      "loss": 0.8973,
      "step": 397130
    },
    {
      "epoch": 1.3918766888517387,
      "grad_norm": 2.796875,
      "learning_rate": 2.9780161011030893e-05,
      "loss": 0.8502,
      "step": 397140
    },
    {
      "epoch": 1.3919117363586342,
      "grad_norm": 3.03125,
      "learning_rate": 2.977951198236719e-05,
      "loss": 0.9332,
      "step": 397150
    },
    {
      "epoch": 1.3919467838655297,
      "grad_norm": 2.875,
      "learning_rate": 2.977886295370349e-05,
      "loss": 0.7958,
      "step": 397160
    },
    {
      "epoch": 1.3919818313724253,
      "grad_norm": 2.953125,
      "learning_rate": 2.9778213925039787e-05,
      "loss": 0.8361,
      "step": 397170
    },
    {
      "epoch": 1.3920168788793208,
      "grad_norm": 2.75,
      "learning_rate": 2.9777564896376085e-05,
      "loss": 0.8473,
      "step": 397180
    },
    {
      "epoch": 1.3920519263862166,
      "grad_norm": 3.109375,
      "learning_rate": 2.9776915867712386e-05,
      "loss": 0.8913,
      "step": 397190
    },
    {
      "epoch": 1.392086973893112,
      "grad_norm": 2.625,
      "learning_rate": 2.9776266839048684e-05,
      "loss": 0.8511,
      "step": 397200
    },
    {
      "epoch": 1.3921220214000076,
      "grad_norm": 3.046875,
      "learning_rate": 2.9775617810384982e-05,
      "loss": 0.7869,
      "step": 397210
    },
    {
      "epoch": 1.3921570689069034,
      "grad_norm": 2.90625,
      "learning_rate": 2.977496878172128e-05,
      "loss": 0.8218,
      "step": 397220
    },
    {
      "epoch": 1.392192116413799,
      "grad_norm": 2.734375,
      "learning_rate": 2.977431975305758e-05,
      "loss": 0.8456,
      "step": 397230
    },
    {
      "epoch": 1.3922271639206945,
      "grad_norm": 3.578125,
      "learning_rate": 2.9773670724393876e-05,
      "loss": 0.8963,
      "step": 397240
    },
    {
      "epoch": 1.3922622114275902,
      "grad_norm": 2.984375,
      "learning_rate": 2.9773021695730174e-05,
      "loss": 0.9085,
      "step": 397250
    },
    {
      "epoch": 1.3922972589344857,
      "grad_norm": 2.65625,
      "learning_rate": 2.9772372667066466e-05,
      "loss": 0.8254,
      "step": 397260
    },
    {
      "epoch": 1.3923323064413813,
      "grad_norm": 2.90625,
      "learning_rate": 2.9771723638402767e-05,
      "loss": 0.8155,
      "step": 397270
    },
    {
      "epoch": 1.3923673539482768,
      "grad_norm": 2.734375,
      "learning_rate": 2.9771074609739065e-05,
      "loss": 0.8758,
      "step": 397280
    },
    {
      "epoch": 1.3924024014551724,
      "grad_norm": 2.90625,
      "learning_rate": 2.9770425581075363e-05,
      "loss": 0.823,
      "step": 397290
    },
    {
      "epoch": 1.3924374489620681,
      "grad_norm": 2.953125,
      "learning_rate": 2.976977655241166e-05,
      "loss": 0.8435,
      "step": 397300
    },
    {
      "epoch": 1.3924724964689636,
      "grad_norm": 3.296875,
      "learning_rate": 2.976912752374796e-05,
      "loss": 0.962,
      "step": 397310
    },
    {
      "epoch": 1.3925075439758592,
      "grad_norm": 3.015625,
      "learning_rate": 2.9768478495084257e-05,
      "loss": 0.8537,
      "step": 397320
    },
    {
      "epoch": 1.392542591482755,
      "grad_norm": 3.5625,
      "learning_rate": 2.9767829466420555e-05,
      "loss": 0.8847,
      "step": 397330
    },
    {
      "epoch": 1.3925776389896505,
      "grad_norm": 2.578125,
      "learning_rate": 2.9767180437756853e-05,
      "loss": 0.817,
      "step": 397340
    },
    {
      "epoch": 1.392612686496546,
      "grad_norm": 2.671875,
      "learning_rate": 2.976653140909315e-05,
      "loss": 0.8516,
      "step": 397350
    },
    {
      "epoch": 1.3926477340034418,
      "grad_norm": 2.96875,
      "learning_rate": 2.976588238042945e-05,
      "loss": 0.8179,
      "step": 397360
    },
    {
      "epoch": 1.3926827815103373,
      "grad_norm": 2.6875,
      "learning_rate": 2.9765233351765747e-05,
      "loss": 0.85,
      "step": 397370
    },
    {
      "epoch": 1.3927178290172328,
      "grad_norm": 2.8125,
      "learning_rate": 2.9764584323102045e-05,
      "loss": 0.8326,
      "step": 397380
    },
    {
      "epoch": 1.3927528765241284,
      "grad_norm": 2.703125,
      "learning_rate": 2.9763935294438343e-05,
      "loss": 0.8029,
      "step": 397390
    },
    {
      "epoch": 1.392787924031024,
      "grad_norm": 2.78125,
      "learning_rate": 2.976328626577464e-05,
      "loss": 0.8938,
      "step": 397400
    },
    {
      "epoch": 1.3928229715379197,
      "grad_norm": 2.71875,
      "learning_rate": 2.9762637237110942e-05,
      "loss": 0.842,
      "step": 397410
    },
    {
      "epoch": 1.3928580190448152,
      "grad_norm": 2.9375,
      "learning_rate": 2.976198820844724e-05,
      "loss": 0.8162,
      "step": 397420
    },
    {
      "epoch": 1.3928930665517107,
      "grad_norm": 2.734375,
      "learning_rate": 2.976133917978354e-05,
      "loss": 0.8058,
      "step": 397430
    },
    {
      "epoch": 1.3929281140586065,
      "grad_norm": 2.90625,
      "learning_rate": 2.9760690151119836e-05,
      "loss": 0.8047,
      "step": 397440
    },
    {
      "epoch": 1.392963161565502,
      "grad_norm": 3.296875,
      "learning_rate": 2.9760041122456134e-05,
      "loss": 0.8639,
      "step": 397450
    },
    {
      "epoch": 1.3929982090723976,
      "grad_norm": 3.296875,
      "learning_rate": 2.9759392093792432e-05,
      "loss": 0.8897,
      "step": 397460
    },
    {
      "epoch": 1.3930332565792933,
      "grad_norm": 3.1875,
      "learning_rate": 2.975874306512873e-05,
      "loss": 0.8755,
      "step": 397470
    },
    {
      "epoch": 1.3930683040861889,
      "grad_norm": 2.875,
      "learning_rate": 2.975809403646503e-05,
      "loss": 0.8695,
      "step": 397480
    },
    {
      "epoch": 1.3931033515930844,
      "grad_norm": 2.53125,
      "learning_rate": 2.9757445007801326e-05,
      "loss": 0.8241,
      "step": 397490
    },
    {
      "epoch": 1.39313839909998,
      "grad_norm": 3.109375,
      "learning_rate": 2.9756795979137624e-05,
      "loss": 0.8431,
      "step": 397500
    },
    {
      "epoch": 1.3931734466068757,
      "grad_norm": 3.578125,
      "learning_rate": 2.9756146950473922e-05,
      "loss": 0.8675,
      "step": 397510
    },
    {
      "epoch": 1.3932084941137712,
      "grad_norm": 2.765625,
      "learning_rate": 2.975549792181022e-05,
      "loss": 0.7814,
      "step": 397520
    },
    {
      "epoch": 1.3932435416206668,
      "grad_norm": 3.0,
      "learning_rate": 2.975484889314652e-05,
      "loss": 0.7835,
      "step": 397530
    },
    {
      "epoch": 1.3932785891275623,
      "grad_norm": 3.0,
      "learning_rate": 2.9754199864482816e-05,
      "loss": 0.7694,
      "step": 397540
    },
    {
      "epoch": 1.393313636634458,
      "grad_norm": 2.5625,
      "learning_rate": 2.9753550835819118e-05,
      "loss": 0.7309,
      "step": 397550
    },
    {
      "epoch": 1.3933486841413536,
      "grad_norm": 3.015625,
      "learning_rate": 2.9752901807155416e-05,
      "loss": 0.8698,
      "step": 397560
    },
    {
      "epoch": 1.3933837316482491,
      "grad_norm": 3.203125,
      "learning_rate": 2.9752252778491714e-05,
      "loss": 0.8956,
      "step": 397570
    },
    {
      "epoch": 1.393418779155145,
      "grad_norm": 3.21875,
      "learning_rate": 2.9751603749828012e-05,
      "loss": 0.9539,
      "step": 397580
    },
    {
      "epoch": 1.3934538266620404,
      "grad_norm": 2.9375,
      "learning_rate": 2.975095472116431e-05,
      "loss": 0.8762,
      "step": 397590
    },
    {
      "epoch": 1.393488874168936,
      "grad_norm": 3.046875,
      "learning_rate": 2.9750305692500608e-05,
      "loss": 0.8818,
      "step": 397600
    },
    {
      "epoch": 1.3935239216758317,
      "grad_norm": 2.734375,
      "learning_rate": 2.9749656663836906e-05,
      "loss": 0.8098,
      "step": 397610
    },
    {
      "epoch": 1.3935589691827273,
      "grad_norm": 3.71875,
      "learning_rate": 2.9749007635173204e-05,
      "loss": 0.8337,
      "step": 397620
    },
    {
      "epoch": 1.3935940166896228,
      "grad_norm": 3.109375,
      "learning_rate": 2.9748358606509495e-05,
      "loss": 0.8931,
      "step": 397630
    },
    {
      "epoch": 1.3936290641965183,
      "grad_norm": 2.6875,
      "learning_rate": 2.9747709577845796e-05,
      "loss": 0.8385,
      "step": 397640
    },
    {
      "epoch": 1.3936641117034139,
      "grad_norm": 2.953125,
      "learning_rate": 2.9747060549182094e-05,
      "loss": 0.9945,
      "step": 397650
    },
    {
      "epoch": 1.3936991592103096,
      "grad_norm": 3.125,
      "learning_rate": 2.9746411520518392e-05,
      "loss": 0.9211,
      "step": 397660
    },
    {
      "epoch": 1.3937342067172052,
      "grad_norm": 2.921875,
      "learning_rate": 2.974576249185469e-05,
      "loss": 0.8463,
      "step": 397670
    },
    {
      "epoch": 1.3937692542241007,
      "grad_norm": 2.609375,
      "learning_rate": 2.974511346319099e-05,
      "loss": 0.8391,
      "step": 397680
    },
    {
      "epoch": 1.3938043017309965,
      "grad_norm": 2.75,
      "learning_rate": 2.9744464434527286e-05,
      "loss": 0.7708,
      "step": 397690
    },
    {
      "epoch": 1.393839349237892,
      "grad_norm": 3.15625,
      "learning_rate": 2.9743815405863584e-05,
      "loss": 0.7894,
      "step": 397700
    },
    {
      "epoch": 1.3938743967447875,
      "grad_norm": 3.109375,
      "learning_rate": 2.9743166377199882e-05,
      "loss": 1.0255,
      "step": 397710
    },
    {
      "epoch": 1.3939094442516833,
      "grad_norm": 3.140625,
      "learning_rate": 2.974251734853618e-05,
      "loss": 0.8363,
      "step": 397720
    },
    {
      "epoch": 1.3939444917585788,
      "grad_norm": 3.203125,
      "learning_rate": 2.974186831987248e-05,
      "loss": 0.8762,
      "step": 397730
    },
    {
      "epoch": 1.3939795392654744,
      "grad_norm": 3.0625,
      "learning_rate": 2.9741219291208776e-05,
      "loss": 0.8559,
      "step": 397740
    },
    {
      "epoch": 1.39401458677237,
      "grad_norm": 2.8125,
      "learning_rate": 2.9740570262545074e-05,
      "loss": 0.8537,
      "step": 397750
    },
    {
      "epoch": 1.3940496342792654,
      "grad_norm": 2.796875,
      "learning_rate": 2.9739921233881372e-05,
      "loss": 0.8313,
      "step": 397760
    },
    {
      "epoch": 1.3940846817861612,
      "grad_norm": 2.546875,
      "learning_rate": 2.973927220521767e-05,
      "loss": 0.9107,
      "step": 397770
    },
    {
      "epoch": 1.3941197292930567,
      "grad_norm": 2.65625,
      "learning_rate": 2.9738623176553972e-05,
      "loss": 0.8309,
      "step": 397780
    },
    {
      "epoch": 1.3941547767999523,
      "grad_norm": 3.15625,
      "learning_rate": 2.973797414789027e-05,
      "loss": 0.8981,
      "step": 397790
    },
    {
      "epoch": 1.394189824306848,
      "grad_norm": 2.890625,
      "learning_rate": 2.9737325119226568e-05,
      "loss": 0.9037,
      "step": 397800
    },
    {
      "epoch": 1.3942248718137435,
      "grad_norm": 2.8125,
      "learning_rate": 2.9736676090562866e-05,
      "loss": 0.8152,
      "step": 397810
    },
    {
      "epoch": 1.394259919320639,
      "grad_norm": 2.84375,
      "learning_rate": 2.9736027061899164e-05,
      "loss": 0.8257,
      "step": 397820
    },
    {
      "epoch": 1.3942949668275348,
      "grad_norm": 3.265625,
      "learning_rate": 2.9735378033235462e-05,
      "loss": 0.8416,
      "step": 397830
    },
    {
      "epoch": 1.3943300143344304,
      "grad_norm": 3.203125,
      "learning_rate": 2.973472900457176e-05,
      "loss": 0.8707,
      "step": 397840
    },
    {
      "epoch": 1.394365061841326,
      "grad_norm": 3.09375,
      "learning_rate": 2.9734079975908058e-05,
      "loss": 0.9026,
      "step": 397850
    },
    {
      "epoch": 1.3944001093482215,
      "grad_norm": 2.828125,
      "learning_rate": 2.9733430947244356e-05,
      "loss": 0.8504,
      "step": 397860
    },
    {
      "epoch": 1.394435156855117,
      "grad_norm": 3.171875,
      "learning_rate": 2.9732781918580654e-05,
      "loss": 0.8195,
      "step": 397870
    },
    {
      "epoch": 1.3944702043620127,
      "grad_norm": 2.828125,
      "learning_rate": 2.9732132889916952e-05,
      "loss": 0.8341,
      "step": 397880
    },
    {
      "epoch": 1.3945052518689083,
      "grad_norm": 3.03125,
      "learning_rate": 2.973148386125325e-05,
      "loss": 0.8348,
      "step": 397890
    },
    {
      "epoch": 1.3945402993758038,
      "grad_norm": 2.84375,
      "learning_rate": 2.9730834832589548e-05,
      "loss": 0.8776,
      "step": 397900
    },
    {
      "epoch": 1.3945753468826996,
      "grad_norm": 3.203125,
      "learning_rate": 2.9730185803925846e-05,
      "loss": 0.898,
      "step": 397910
    },
    {
      "epoch": 1.394610394389595,
      "grad_norm": 3.265625,
      "learning_rate": 2.9729536775262147e-05,
      "loss": 0.9037,
      "step": 397920
    },
    {
      "epoch": 1.3946454418964906,
      "grad_norm": 2.9375,
      "learning_rate": 2.9728887746598445e-05,
      "loss": 0.8793,
      "step": 397930
    },
    {
      "epoch": 1.3946804894033864,
      "grad_norm": 3.109375,
      "learning_rate": 2.9728238717934743e-05,
      "loss": 0.8512,
      "step": 397940
    },
    {
      "epoch": 1.394715536910282,
      "grad_norm": 3.296875,
      "learning_rate": 2.972758968927104e-05,
      "loss": 0.8447,
      "step": 397950
    },
    {
      "epoch": 1.3947505844171775,
      "grad_norm": 3.03125,
      "learning_rate": 2.972694066060734e-05,
      "loss": 0.7706,
      "step": 397960
    },
    {
      "epoch": 1.394785631924073,
      "grad_norm": 2.546875,
      "learning_rate": 2.9726291631943637e-05,
      "loss": 0.8366,
      "step": 397970
    },
    {
      "epoch": 1.3948206794309685,
      "grad_norm": 2.859375,
      "learning_rate": 2.9725642603279935e-05,
      "loss": 0.8469,
      "step": 397980
    },
    {
      "epoch": 1.3948557269378643,
      "grad_norm": 2.953125,
      "learning_rate": 2.9724993574616233e-05,
      "loss": 0.8499,
      "step": 397990
    },
    {
      "epoch": 1.3948907744447598,
      "grad_norm": 2.640625,
      "learning_rate": 2.972434454595253e-05,
      "loss": 0.7894,
      "step": 398000
    },
    {
      "epoch": 1.3949258219516554,
      "grad_norm": 2.625,
      "learning_rate": 2.9723695517288826e-05,
      "loss": 0.8944,
      "step": 398010
    },
    {
      "epoch": 1.3949608694585511,
      "grad_norm": 2.828125,
      "learning_rate": 2.9723046488625124e-05,
      "loss": 0.895,
      "step": 398020
    },
    {
      "epoch": 1.3949959169654467,
      "grad_norm": 2.828125,
      "learning_rate": 2.9722397459961422e-05,
      "loss": 0.9094,
      "step": 398030
    },
    {
      "epoch": 1.3950309644723422,
      "grad_norm": 2.90625,
      "learning_rate": 2.972174843129772e-05,
      "loss": 0.879,
      "step": 398040
    },
    {
      "epoch": 1.395066011979238,
      "grad_norm": 2.546875,
      "learning_rate": 2.9721099402634018e-05,
      "loss": 0.8864,
      "step": 398050
    },
    {
      "epoch": 1.3951010594861335,
      "grad_norm": 2.4375,
      "learning_rate": 2.9720450373970316e-05,
      "loss": 0.8383,
      "step": 398060
    },
    {
      "epoch": 1.395136106993029,
      "grad_norm": 2.53125,
      "learning_rate": 2.9719801345306614e-05,
      "loss": 0.7621,
      "step": 398070
    },
    {
      "epoch": 1.3951711544999246,
      "grad_norm": 2.921875,
      "learning_rate": 2.9719152316642912e-05,
      "loss": 0.8533,
      "step": 398080
    },
    {
      "epoch": 1.39520620200682,
      "grad_norm": 3.140625,
      "learning_rate": 2.971850328797921e-05,
      "loss": 0.8109,
      "step": 398090
    },
    {
      "epoch": 1.3952412495137159,
      "grad_norm": 3.0,
      "learning_rate": 2.9717854259315508e-05,
      "loss": 0.8821,
      "step": 398100
    },
    {
      "epoch": 1.3952762970206114,
      "grad_norm": 3.15625,
      "learning_rate": 2.9717205230651806e-05,
      "loss": 0.9636,
      "step": 398110
    },
    {
      "epoch": 1.395311344527507,
      "grad_norm": 2.65625,
      "learning_rate": 2.9716556201988104e-05,
      "loss": 0.8756,
      "step": 398120
    },
    {
      "epoch": 1.3953463920344027,
      "grad_norm": 2.890625,
      "learning_rate": 2.9715907173324402e-05,
      "loss": 0.8635,
      "step": 398130
    },
    {
      "epoch": 1.3953814395412982,
      "grad_norm": 3.5,
      "learning_rate": 2.97152581446607e-05,
      "loss": 0.8798,
      "step": 398140
    },
    {
      "epoch": 1.3954164870481938,
      "grad_norm": 2.5625,
      "learning_rate": 2.9714609115997e-05,
      "loss": 0.8554,
      "step": 398150
    },
    {
      "epoch": 1.3954515345550895,
      "grad_norm": 3.28125,
      "learning_rate": 2.97139600873333e-05,
      "loss": 0.9351,
      "step": 398160
    },
    {
      "epoch": 1.395486582061985,
      "grad_norm": 2.875,
      "learning_rate": 2.9713311058669597e-05,
      "loss": 0.8593,
      "step": 398170
    },
    {
      "epoch": 1.3955216295688806,
      "grad_norm": 2.9375,
      "learning_rate": 2.9712662030005895e-05,
      "loss": 0.8521,
      "step": 398180
    },
    {
      "epoch": 1.3955566770757764,
      "grad_norm": 3.390625,
      "learning_rate": 2.9712013001342193e-05,
      "loss": 0.8716,
      "step": 398190
    },
    {
      "epoch": 1.3955917245826719,
      "grad_norm": 2.625,
      "learning_rate": 2.971136397267849e-05,
      "loss": 0.9283,
      "step": 398200
    },
    {
      "epoch": 1.3956267720895674,
      "grad_norm": 2.65625,
      "learning_rate": 2.971071494401479e-05,
      "loss": 0.7585,
      "step": 398210
    },
    {
      "epoch": 1.395661819596463,
      "grad_norm": 3.078125,
      "learning_rate": 2.9710065915351087e-05,
      "loss": 0.8681,
      "step": 398220
    },
    {
      "epoch": 1.3956968671033585,
      "grad_norm": 3.125,
      "learning_rate": 2.9709416886687385e-05,
      "loss": 0.8951,
      "step": 398230
    },
    {
      "epoch": 1.3957319146102543,
      "grad_norm": 2.984375,
      "learning_rate": 2.9708767858023683e-05,
      "loss": 0.7897,
      "step": 398240
    },
    {
      "epoch": 1.3957669621171498,
      "grad_norm": 2.65625,
      "learning_rate": 2.970811882935998e-05,
      "loss": 0.8565,
      "step": 398250
    },
    {
      "epoch": 1.3958020096240453,
      "grad_norm": 2.609375,
      "learning_rate": 2.970746980069628e-05,
      "loss": 0.7818,
      "step": 398260
    },
    {
      "epoch": 1.395837057130941,
      "grad_norm": 2.703125,
      "learning_rate": 2.9706820772032577e-05,
      "loss": 0.8558,
      "step": 398270
    },
    {
      "epoch": 1.3958721046378366,
      "grad_norm": 3.15625,
      "learning_rate": 2.9706171743368875e-05,
      "loss": 0.802,
      "step": 398280
    },
    {
      "epoch": 1.3959071521447322,
      "grad_norm": 3.125,
      "learning_rate": 2.9705522714705177e-05,
      "loss": 0.8656,
      "step": 398290
    },
    {
      "epoch": 1.395942199651628,
      "grad_norm": 2.78125,
      "learning_rate": 2.9704873686041475e-05,
      "loss": 0.8478,
      "step": 398300
    },
    {
      "epoch": 1.3959772471585234,
      "grad_norm": 2.890625,
      "learning_rate": 2.9704224657377773e-05,
      "loss": 0.9056,
      "step": 398310
    },
    {
      "epoch": 1.396012294665419,
      "grad_norm": 2.6875,
      "learning_rate": 2.970357562871407e-05,
      "loss": 0.82,
      "step": 398320
    },
    {
      "epoch": 1.3960473421723145,
      "grad_norm": 3.328125,
      "learning_rate": 2.970292660005037e-05,
      "loss": 0.8862,
      "step": 398330
    },
    {
      "epoch": 1.39608238967921,
      "grad_norm": 2.90625,
      "learning_rate": 2.9702277571386667e-05,
      "loss": 0.7982,
      "step": 398340
    },
    {
      "epoch": 1.3961174371861058,
      "grad_norm": 2.765625,
      "learning_rate": 2.9701628542722965e-05,
      "loss": 0.8745,
      "step": 398350
    },
    {
      "epoch": 1.3961524846930013,
      "grad_norm": 3.015625,
      "learning_rate": 2.9700979514059263e-05,
      "loss": 0.772,
      "step": 398360
    },
    {
      "epoch": 1.3961875321998969,
      "grad_norm": 2.796875,
      "learning_rate": 2.970033048539556e-05,
      "loss": 0.8286,
      "step": 398370
    },
    {
      "epoch": 1.3962225797067926,
      "grad_norm": 2.46875,
      "learning_rate": 2.9699681456731855e-05,
      "loss": 0.91,
      "step": 398380
    },
    {
      "epoch": 1.3962576272136882,
      "grad_norm": 2.84375,
      "learning_rate": 2.9699032428068153e-05,
      "loss": 0.8166,
      "step": 398390
    },
    {
      "epoch": 1.3962926747205837,
      "grad_norm": 3.078125,
      "learning_rate": 2.969838339940445e-05,
      "loss": 0.7963,
      "step": 398400
    },
    {
      "epoch": 1.3963277222274795,
      "grad_norm": 2.65625,
      "learning_rate": 2.969773437074075e-05,
      "loss": 0.8529,
      "step": 398410
    },
    {
      "epoch": 1.396362769734375,
      "grad_norm": 2.75,
      "learning_rate": 2.9697085342077047e-05,
      "loss": 0.8973,
      "step": 398420
    },
    {
      "epoch": 1.3963978172412705,
      "grad_norm": 3.078125,
      "learning_rate": 2.9696436313413345e-05,
      "loss": 0.8129,
      "step": 398430
    },
    {
      "epoch": 1.396432864748166,
      "grad_norm": 3.125,
      "learning_rate": 2.9695787284749643e-05,
      "loss": 0.8577,
      "step": 398440
    },
    {
      "epoch": 1.3964679122550616,
      "grad_norm": 2.984375,
      "learning_rate": 2.969513825608594e-05,
      "loss": 0.8637,
      "step": 398450
    },
    {
      "epoch": 1.3965029597619574,
      "grad_norm": 3.078125,
      "learning_rate": 2.969448922742224e-05,
      "loss": 0.8036,
      "step": 398460
    },
    {
      "epoch": 1.396538007268853,
      "grad_norm": 2.84375,
      "learning_rate": 2.9693840198758537e-05,
      "loss": 0.8233,
      "step": 398470
    },
    {
      "epoch": 1.3965730547757484,
      "grad_norm": 2.8125,
      "learning_rate": 2.9693191170094835e-05,
      "loss": 0.829,
      "step": 398480
    },
    {
      "epoch": 1.3966081022826442,
      "grad_norm": 2.90625,
      "learning_rate": 2.9692542141431133e-05,
      "loss": 0.9376,
      "step": 398490
    },
    {
      "epoch": 1.3966431497895397,
      "grad_norm": 3.078125,
      "learning_rate": 2.969189311276743e-05,
      "loss": 0.927,
      "step": 398500
    },
    {
      "epoch": 1.3966781972964353,
      "grad_norm": 3.0,
      "learning_rate": 2.9691244084103733e-05,
      "loss": 0.829,
      "step": 398510
    },
    {
      "epoch": 1.396713244803331,
      "grad_norm": 3.015625,
      "learning_rate": 2.969059505544003e-05,
      "loss": 0.9067,
      "step": 398520
    },
    {
      "epoch": 1.3967482923102266,
      "grad_norm": 2.65625,
      "learning_rate": 2.968994602677633e-05,
      "loss": 0.9167,
      "step": 398530
    },
    {
      "epoch": 1.396783339817122,
      "grad_norm": 2.796875,
      "learning_rate": 2.9689296998112627e-05,
      "loss": 0.8598,
      "step": 398540
    },
    {
      "epoch": 1.3968183873240176,
      "grad_norm": 3.28125,
      "learning_rate": 2.9688647969448925e-05,
      "loss": 0.8591,
      "step": 398550
    },
    {
      "epoch": 1.3968534348309132,
      "grad_norm": 3.265625,
      "learning_rate": 2.9687998940785223e-05,
      "loss": 0.9055,
      "step": 398560
    },
    {
      "epoch": 1.396888482337809,
      "grad_norm": 2.875,
      "learning_rate": 2.968734991212152e-05,
      "loss": 0.8728,
      "step": 398570
    },
    {
      "epoch": 1.3969235298447045,
      "grad_norm": 2.6875,
      "learning_rate": 2.968670088345782e-05,
      "loss": 0.8169,
      "step": 398580
    },
    {
      "epoch": 1.3969585773516,
      "grad_norm": 2.953125,
      "learning_rate": 2.9686051854794117e-05,
      "loss": 0.7886,
      "step": 398590
    },
    {
      "epoch": 1.3969936248584958,
      "grad_norm": 2.828125,
      "learning_rate": 2.9685402826130415e-05,
      "loss": 0.833,
      "step": 398600
    },
    {
      "epoch": 1.3970286723653913,
      "grad_norm": 2.859375,
      "learning_rate": 2.9684753797466713e-05,
      "loss": 0.8751,
      "step": 398610
    },
    {
      "epoch": 1.3970637198722868,
      "grad_norm": 2.484375,
      "learning_rate": 2.968410476880301e-05,
      "loss": 0.8963,
      "step": 398620
    },
    {
      "epoch": 1.3970987673791826,
      "grad_norm": 2.890625,
      "learning_rate": 2.968345574013931e-05,
      "loss": 0.9085,
      "step": 398630
    },
    {
      "epoch": 1.3971338148860781,
      "grad_norm": 4.0,
      "learning_rate": 2.9682806711475607e-05,
      "loss": 0.8732,
      "step": 398640
    },
    {
      "epoch": 1.3971688623929737,
      "grad_norm": 3.078125,
      "learning_rate": 2.9682157682811908e-05,
      "loss": 0.8743,
      "step": 398650
    },
    {
      "epoch": 1.3972039098998692,
      "grad_norm": 2.890625,
      "learning_rate": 2.9681508654148206e-05,
      "loss": 0.8326,
      "step": 398660
    },
    {
      "epoch": 1.3972389574067647,
      "grad_norm": 3.171875,
      "learning_rate": 2.9680859625484504e-05,
      "loss": 0.9097,
      "step": 398670
    },
    {
      "epoch": 1.3972740049136605,
      "grad_norm": 2.890625,
      "learning_rate": 2.9680210596820802e-05,
      "loss": 0.9147,
      "step": 398680
    },
    {
      "epoch": 1.397309052420556,
      "grad_norm": 2.984375,
      "learning_rate": 2.96795615681571e-05,
      "loss": 0.7549,
      "step": 398690
    },
    {
      "epoch": 1.3973440999274516,
      "grad_norm": 3.40625,
      "learning_rate": 2.9678912539493398e-05,
      "loss": 0.8198,
      "step": 398700
    },
    {
      "epoch": 1.3973791474343473,
      "grad_norm": 2.65625,
      "learning_rate": 2.9678263510829696e-05,
      "loss": 0.8704,
      "step": 398710
    },
    {
      "epoch": 1.3974141949412429,
      "grad_norm": 2.765625,
      "learning_rate": 2.9677614482165994e-05,
      "loss": 0.7984,
      "step": 398720
    },
    {
      "epoch": 1.3974492424481384,
      "grad_norm": 3.03125,
      "learning_rate": 2.9676965453502292e-05,
      "loss": 0.8731,
      "step": 398730
    },
    {
      "epoch": 1.3974842899550342,
      "grad_norm": 2.46875,
      "learning_rate": 2.967631642483859e-05,
      "loss": 0.7913,
      "step": 398740
    },
    {
      "epoch": 1.3975193374619297,
      "grad_norm": 2.953125,
      "learning_rate": 2.9675667396174888e-05,
      "loss": 0.8854,
      "step": 398750
    },
    {
      "epoch": 1.3975543849688252,
      "grad_norm": 2.796875,
      "learning_rate": 2.9675018367511183e-05,
      "loss": 0.8677,
      "step": 398760
    },
    {
      "epoch": 1.3975894324757208,
      "grad_norm": 3.3125,
      "learning_rate": 2.967436933884748e-05,
      "loss": 0.861,
      "step": 398770
    },
    {
      "epoch": 1.3976244799826165,
      "grad_norm": 2.9375,
      "learning_rate": 2.967372031018378e-05,
      "loss": 0.8182,
      "step": 398780
    },
    {
      "epoch": 1.397659527489512,
      "grad_norm": 2.8125,
      "learning_rate": 2.9673071281520077e-05,
      "loss": 0.888,
      "step": 398790
    },
    {
      "epoch": 1.3976945749964076,
      "grad_norm": 2.71875,
      "learning_rate": 2.9672422252856375e-05,
      "loss": 0.9117,
      "step": 398800
    },
    {
      "epoch": 1.3977296225033031,
      "grad_norm": 3.359375,
      "learning_rate": 2.9671773224192673e-05,
      "loss": 0.8585,
      "step": 398810
    },
    {
      "epoch": 1.3977646700101989,
      "grad_norm": 2.84375,
      "learning_rate": 2.967112419552897e-05,
      "loss": 0.7901,
      "step": 398820
    },
    {
      "epoch": 1.3977997175170944,
      "grad_norm": 2.96875,
      "learning_rate": 2.967047516686527e-05,
      "loss": 0.7926,
      "step": 398830
    },
    {
      "epoch": 1.39783476502399,
      "grad_norm": 2.640625,
      "learning_rate": 2.9669826138201567e-05,
      "loss": 0.8579,
      "step": 398840
    },
    {
      "epoch": 1.3978698125308857,
      "grad_norm": 3.078125,
      "learning_rate": 2.9669177109537865e-05,
      "loss": 0.8234,
      "step": 398850
    },
    {
      "epoch": 1.3979048600377812,
      "grad_norm": 2.796875,
      "learning_rate": 2.9668528080874163e-05,
      "loss": 0.9096,
      "step": 398860
    },
    {
      "epoch": 1.3979399075446768,
      "grad_norm": 2.84375,
      "learning_rate": 2.966787905221046e-05,
      "loss": 0.8727,
      "step": 398870
    },
    {
      "epoch": 1.3979749550515725,
      "grad_norm": 2.8125,
      "learning_rate": 2.9667230023546762e-05,
      "loss": 0.8832,
      "step": 398880
    },
    {
      "epoch": 1.398010002558468,
      "grad_norm": 2.828125,
      "learning_rate": 2.966658099488306e-05,
      "loss": 0.8967,
      "step": 398890
    },
    {
      "epoch": 1.3980450500653636,
      "grad_norm": 3.03125,
      "learning_rate": 2.9665931966219358e-05,
      "loss": 0.9057,
      "step": 398900
    },
    {
      "epoch": 1.3980800975722592,
      "grad_norm": 2.703125,
      "learning_rate": 2.9665282937555656e-05,
      "loss": 0.8034,
      "step": 398910
    },
    {
      "epoch": 1.3981151450791547,
      "grad_norm": 2.71875,
      "learning_rate": 2.9664633908891954e-05,
      "loss": 0.8461,
      "step": 398920
    },
    {
      "epoch": 1.3981501925860504,
      "grad_norm": 2.984375,
      "learning_rate": 2.9663984880228252e-05,
      "loss": 0.8508,
      "step": 398930
    },
    {
      "epoch": 1.398185240092946,
      "grad_norm": 3.34375,
      "learning_rate": 2.966333585156455e-05,
      "loss": 0.8337,
      "step": 398940
    },
    {
      "epoch": 1.3982202875998415,
      "grad_norm": 3.421875,
      "learning_rate": 2.9662686822900848e-05,
      "loss": 0.8539,
      "step": 398950
    },
    {
      "epoch": 1.3982553351067373,
      "grad_norm": 3.34375,
      "learning_rate": 2.9662037794237146e-05,
      "loss": 0.9175,
      "step": 398960
    },
    {
      "epoch": 1.3982903826136328,
      "grad_norm": 2.875,
      "learning_rate": 2.9661388765573444e-05,
      "loss": 0.8455,
      "step": 398970
    },
    {
      "epoch": 1.3983254301205283,
      "grad_norm": 3.09375,
      "learning_rate": 2.9660739736909742e-05,
      "loss": 0.8225,
      "step": 398980
    },
    {
      "epoch": 1.398360477627424,
      "grad_norm": 3.0,
      "learning_rate": 2.966009070824604e-05,
      "loss": 0.8417,
      "step": 398990
    },
    {
      "epoch": 1.3983955251343196,
      "grad_norm": 2.84375,
      "learning_rate": 2.9659441679582338e-05,
      "loss": 0.9216,
      "step": 399000
    },
    {
      "epoch": 1.3984305726412152,
      "grad_norm": 2.84375,
      "learning_rate": 2.9658792650918636e-05,
      "loss": 0.899,
      "step": 399010
    },
    {
      "epoch": 1.3984656201481107,
      "grad_norm": 2.859375,
      "learning_rate": 2.9658143622254937e-05,
      "loss": 0.8542,
      "step": 399020
    },
    {
      "epoch": 1.3985006676550062,
      "grad_norm": 2.609375,
      "learning_rate": 2.9657494593591235e-05,
      "loss": 0.8244,
      "step": 399030
    },
    {
      "epoch": 1.398535715161902,
      "grad_norm": 2.859375,
      "learning_rate": 2.9656845564927533e-05,
      "loss": 0.8695,
      "step": 399040
    },
    {
      "epoch": 1.3985707626687975,
      "grad_norm": 2.59375,
      "learning_rate": 2.965619653626383e-05,
      "loss": 0.8611,
      "step": 399050
    },
    {
      "epoch": 1.398605810175693,
      "grad_norm": 2.5625,
      "learning_rate": 2.965554750760013e-05,
      "loss": 0.844,
      "step": 399060
    },
    {
      "epoch": 1.3986408576825888,
      "grad_norm": 3.3125,
      "learning_rate": 2.9654898478936427e-05,
      "loss": 0.8832,
      "step": 399070
    },
    {
      "epoch": 1.3986759051894844,
      "grad_norm": 2.8125,
      "learning_rate": 2.9654249450272725e-05,
      "loss": 0.8213,
      "step": 399080
    },
    {
      "epoch": 1.39871095269638,
      "grad_norm": 2.84375,
      "learning_rate": 2.9653600421609023e-05,
      "loss": 0.8235,
      "step": 399090
    },
    {
      "epoch": 1.3987460002032757,
      "grad_norm": 2.984375,
      "learning_rate": 2.965295139294532e-05,
      "loss": 0.8117,
      "step": 399100
    },
    {
      "epoch": 1.3987810477101712,
      "grad_norm": 2.6875,
      "learning_rate": 2.965230236428162e-05,
      "loss": 0.8398,
      "step": 399110
    },
    {
      "epoch": 1.3988160952170667,
      "grad_norm": 2.703125,
      "learning_rate": 2.9651653335617917e-05,
      "loss": 0.8217,
      "step": 399120
    },
    {
      "epoch": 1.3988511427239623,
      "grad_norm": 2.921875,
      "learning_rate": 2.9651004306954215e-05,
      "loss": 0.8833,
      "step": 399130
    },
    {
      "epoch": 1.3988861902308578,
      "grad_norm": 2.546875,
      "learning_rate": 2.965035527829051e-05,
      "loss": 0.8634,
      "step": 399140
    },
    {
      "epoch": 1.3989212377377536,
      "grad_norm": 2.453125,
      "learning_rate": 2.9649706249626808e-05,
      "loss": 0.7374,
      "step": 399150
    },
    {
      "epoch": 1.398956285244649,
      "grad_norm": 2.8125,
      "learning_rate": 2.9649057220963106e-05,
      "loss": 0.7909,
      "step": 399160
    },
    {
      "epoch": 1.3989913327515446,
      "grad_norm": 3.5,
      "learning_rate": 2.9648408192299404e-05,
      "loss": 0.8014,
      "step": 399170
    },
    {
      "epoch": 1.3990263802584404,
      "grad_norm": 3.328125,
      "learning_rate": 2.9647759163635702e-05,
      "loss": 0.8843,
      "step": 399180
    },
    {
      "epoch": 1.399061427765336,
      "grad_norm": 2.765625,
      "learning_rate": 2.9647110134972e-05,
      "loss": 0.9415,
      "step": 399190
    },
    {
      "epoch": 1.3990964752722315,
      "grad_norm": 2.71875,
      "learning_rate": 2.9646461106308298e-05,
      "loss": 0.8677,
      "step": 399200
    },
    {
      "epoch": 1.3991315227791272,
      "grad_norm": 2.84375,
      "learning_rate": 2.9645812077644596e-05,
      "loss": 0.7727,
      "step": 399210
    },
    {
      "epoch": 1.3991665702860228,
      "grad_norm": 3.046875,
      "learning_rate": 2.9645163048980894e-05,
      "loss": 0.8314,
      "step": 399220
    },
    {
      "epoch": 1.3992016177929183,
      "grad_norm": 2.75,
      "learning_rate": 2.9644514020317192e-05,
      "loss": 0.8569,
      "step": 399230
    },
    {
      "epoch": 1.3992366652998138,
      "grad_norm": 2.921875,
      "learning_rate": 2.964386499165349e-05,
      "loss": 0.8461,
      "step": 399240
    },
    {
      "epoch": 1.3992717128067094,
      "grad_norm": 2.5,
      "learning_rate": 2.964321596298979e-05,
      "loss": 0.7895,
      "step": 399250
    },
    {
      "epoch": 1.3993067603136051,
      "grad_norm": 2.625,
      "learning_rate": 2.964256693432609e-05,
      "loss": 0.8583,
      "step": 399260
    },
    {
      "epoch": 1.3993418078205007,
      "grad_norm": 3.078125,
      "learning_rate": 2.9641917905662387e-05,
      "loss": 0.86,
      "step": 399270
    },
    {
      "epoch": 1.3993768553273962,
      "grad_norm": 2.5625,
      "learning_rate": 2.9641268876998685e-05,
      "loss": 0.869,
      "step": 399280
    },
    {
      "epoch": 1.399411902834292,
      "grad_norm": 3.0,
      "learning_rate": 2.9640619848334983e-05,
      "loss": 0.8303,
      "step": 399290
    },
    {
      "epoch": 1.3994469503411875,
      "grad_norm": 2.921875,
      "learning_rate": 2.963997081967128e-05,
      "loss": 0.8099,
      "step": 399300
    },
    {
      "epoch": 1.399481997848083,
      "grad_norm": 3.234375,
      "learning_rate": 2.963932179100758e-05,
      "loss": 0.8308,
      "step": 399310
    },
    {
      "epoch": 1.3995170453549788,
      "grad_norm": 2.859375,
      "learning_rate": 2.9638672762343877e-05,
      "loss": 0.8438,
      "step": 399320
    },
    {
      "epoch": 1.3995520928618743,
      "grad_norm": 2.78125,
      "learning_rate": 2.9638023733680175e-05,
      "loss": 0.8466,
      "step": 399330
    },
    {
      "epoch": 1.3995871403687699,
      "grad_norm": 2.9375,
      "learning_rate": 2.9637374705016473e-05,
      "loss": 0.8633,
      "step": 399340
    },
    {
      "epoch": 1.3996221878756654,
      "grad_norm": 2.65625,
      "learning_rate": 2.963672567635277e-05,
      "loss": 0.8695,
      "step": 399350
    },
    {
      "epoch": 1.399657235382561,
      "grad_norm": 2.65625,
      "learning_rate": 2.963607664768907e-05,
      "loss": 0.8726,
      "step": 399360
    },
    {
      "epoch": 1.3996922828894567,
      "grad_norm": 2.671875,
      "learning_rate": 2.9635427619025367e-05,
      "loss": 0.8338,
      "step": 399370
    },
    {
      "epoch": 1.3997273303963522,
      "grad_norm": 2.90625,
      "learning_rate": 2.963477859036167e-05,
      "loss": 0.7896,
      "step": 399380
    },
    {
      "epoch": 1.3997623779032478,
      "grad_norm": 2.515625,
      "learning_rate": 2.9634129561697967e-05,
      "loss": 0.7975,
      "step": 399390
    },
    {
      "epoch": 1.3997974254101435,
      "grad_norm": 2.828125,
      "learning_rate": 2.9633480533034265e-05,
      "loss": 0.8635,
      "step": 399400
    },
    {
      "epoch": 1.399832472917039,
      "grad_norm": 3.140625,
      "learning_rate": 2.9632831504370563e-05,
      "loss": 0.8518,
      "step": 399410
    },
    {
      "epoch": 1.3998675204239346,
      "grad_norm": 2.9375,
      "learning_rate": 2.963218247570686e-05,
      "loss": 0.9465,
      "step": 399420
    },
    {
      "epoch": 1.3999025679308303,
      "grad_norm": 2.703125,
      "learning_rate": 2.963153344704316e-05,
      "loss": 0.8415,
      "step": 399430
    },
    {
      "epoch": 1.3999376154377259,
      "grad_norm": 3.5,
      "learning_rate": 2.9630884418379457e-05,
      "loss": 0.8416,
      "step": 399440
    },
    {
      "epoch": 1.3999726629446214,
      "grad_norm": 3.84375,
      "learning_rate": 2.9630235389715755e-05,
      "loss": 0.8187,
      "step": 399450
    },
    {
      "epoch": 1.400007710451517,
      "grad_norm": 3.234375,
      "learning_rate": 2.9629586361052053e-05,
      "loss": 0.9145,
      "step": 399460
    },
    {
      "epoch": 1.4000427579584127,
      "grad_norm": 2.890625,
      "learning_rate": 2.962893733238835e-05,
      "loss": 0.8628,
      "step": 399470
    },
    {
      "epoch": 1.4000778054653082,
      "grad_norm": 2.625,
      "learning_rate": 2.962828830372465e-05,
      "loss": 0.8783,
      "step": 399480
    },
    {
      "epoch": 1.4001128529722038,
      "grad_norm": 2.78125,
      "learning_rate": 2.9627639275060947e-05,
      "loss": 0.8896,
      "step": 399490
    },
    {
      "epoch": 1.4001479004790993,
      "grad_norm": 2.984375,
      "learning_rate": 2.9626990246397245e-05,
      "loss": 0.8143,
      "step": 399500
    },
    {
      "epoch": 1.400182947985995,
      "grad_norm": 2.5625,
      "learning_rate": 2.962634121773354e-05,
      "loss": 0.8407,
      "step": 399510
    },
    {
      "epoch": 1.4002179954928906,
      "grad_norm": 3.390625,
      "learning_rate": 2.9625692189069837e-05,
      "loss": 0.8779,
      "step": 399520
    },
    {
      "epoch": 1.4002530429997861,
      "grad_norm": 3.015625,
      "learning_rate": 2.9625043160406135e-05,
      "loss": 0.8971,
      "step": 399530
    },
    {
      "epoch": 1.400288090506682,
      "grad_norm": 2.671875,
      "learning_rate": 2.9624394131742433e-05,
      "loss": 0.8168,
      "step": 399540
    },
    {
      "epoch": 1.4003231380135774,
      "grad_norm": 3.125,
      "learning_rate": 2.962374510307873e-05,
      "loss": 0.8255,
      "step": 399550
    },
    {
      "epoch": 1.400358185520473,
      "grad_norm": 2.828125,
      "learning_rate": 2.962309607441503e-05,
      "loss": 0.8055,
      "step": 399560
    },
    {
      "epoch": 1.4003932330273687,
      "grad_norm": 2.828125,
      "learning_rate": 2.9622447045751327e-05,
      "loss": 0.8049,
      "step": 399570
    },
    {
      "epoch": 1.4004282805342643,
      "grad_norm": 2.5625,
      "learning_rate": 2.9621798017087625e-05,
      "loss": 0.8473,
      "step": 399580
    },
    {
      "epoch": 1.4004633280411598,
      "grad_norm": 2.546875,
      "learning_rate": 2.9621148988423923e-05,
      "loss": 0.7776,
      "step": 399590
    },
    {
      "epoch": 1.4004983755480553,
      "grad_norm": 3.078125,
      "learning_rate": 2.962049995976022e-05,
      "loss": 0.9023,
      "step": 399600
    },
    {
      "epoch": 1.4005334230549509,
      "grad_norm": 3.375,
      "learning_rate": 2.9619850931096523e-05,
      "loss": 0.8237,
      "step": 399610
    },
    {
      "epoch": 1.4005684705618466,
      "grad_norm": 2.8125,
      "learning_rate": 2.961920190243282e-05,
      "loss": 0.8848,
      "step": 399620
    },
    {
      "epoch": 1.4006035180687422,
      "grad_norm": 2.625,
      "learning_rate": 2.961855287376912e-05,
      "loss": 0.815,
      "step": 399630
    },
    {
      "epoch": 1.4006385655756377,
      "grad_norm": 2.78125,
      "learning_rate": 2.9617903845105417e-05,
      "loss": 0.7443,
      "step": 399640
    },
    {
      "epoch": 1.4006736130825335,
      "grad_norm": 2.765625,
      "learning_rate": 2.9617254816441715e-05,
      "loss": 0.8157,
      "step": 399650
    },
    {
      "epoch": 1.400708660589429,
      "grad_norm": 2.78125,
      "learning_rate": 2.9616605787778013e-05,
      "loss": 0.7613,
      "step": 399660
    },
    {
      "epoch": 1.4007437080963245,
      "grad_norm": 3.203125,
      "learning_rate": 2.961595675911431e-05,
      "loss": 0.8272,
      "step": 399670
    },
    {
      "epoch": 1.4007787556032203,
      "grad_norm": 2.859375,
      "learning_rate": 2.961530773045061e-05,
      "loss": 0.8049,
      "step": 399680
    },
    {
      "epoch": 1.4008138031101158,
      "grad_norm": 3.5625,
      "learning_rate": 2.9614658701786907e-05,
      "loss": 0.8359,
      "step": 399690
    },
    {
      "epoch": 1.4008488506170114,
      "grad_norm": 3.125,
      "learning_rate": 2.9614009673123205e-05,
      "loss": 0.8595,
      "step": 399700
    },
    {
      "epoch": 1.400883898123907,
      "grad_norm": 2.609375,
      "learning_rate": 2.9613360644459503e-05,
      "loss": 0.8185,
      "step": 399710
    },
    {
      "epoch": 1.4009189456308024,
      "grad_norm": 2.6875,
      "learning_rate": 2.96127116157958e-05,
      "loss": 0.8512,
      "step": 399720
    },
    {
      "epoch": 1.4009539931376982,
      "grad_norm": 2.515625,
      "learning_rate": 2.96120625871321e-05,
      "loss": 0.8469,
      "step": 399730
    },
    {
      "epoch": 1.4009890406445937,
      "grad_norm": 2.65625,
      "learning_rate": 2.9611413558468397e-05,
      "loss": 0.857,
      "step": 399740
    },
    {
      "epoch": 1.4010240881514893,
      "grad_norm": 2.953125,
      "learning_rate": 2.9610764529804698e-05,
      "loss": 0.8745,
      "step": 399750
    },
    {
      "epoch": 1.401059135658385,
      "grad_norm": 2.65625,
      "learning_rate": 2.9610115501140996e-05,
      "loss": 0.8508,
      "step": 399760
    },
    {
      "epoch": 1.4010941831652806,
      "grad_norm": 2.984375,
      "learning_rate": 2.9609466472477294e-05,
      "loss": 0.8346,
      "step": 399770
    },
    {
      "epoch": 1.401129230672176,
      "grad_norm": 3.015625,
      "learning_rate": 2.9608817443813592e-05,
      "loss": 0.7817,
      "step": 399780
    },
    {
      "epoch": 1.4011642781790719,
      "grad_norm": 3.03125,
      "learning_rate": 2.960816841514989e-05,
      "loss": 0.8397,
      "step": 399790
    },
    {
      "epoch": 1.4011993256859674,
      "grad_norm": 2.8125,
      "learning_rate": 2.9607519386486188e-05,
      "loss": 0.8231,
      "step": 399800
    },
    {
      "epoch": 1.401234373192863,
      "grad_norm": 2.65625,
      "learning_rate": 2.9606870357822486e-05,
      "loss": 0.8668,
      "step": 399810
    },
    {
      "epoch": 1.4012694206997585,
      "grad_norm": 2.765625,
      "learning_rate": 2.9606221329158784e-05,
      "loss": 0.7756,
      "step": 399820
    },
    {
      "epoch": 1.401304468206654,
      "grad_norm": 2.9375,
      "learning_rate": 2.9605572300495082e-05,
      "loss": 0.873,
      "step": 399830
    },
    {
      "epoch": 1.4013395157135498,
      "grad_norm": 5.5625,
      "learning_rate": 2.960492327183138e-05,
      "loss": 0.8242,
      "step": 399840
    },
    {
      "epoch": 1.4013745632204453,
      "grad_norm": 2.96875,
      "learning_rate": 2.9604274243167678e-05,
      "loss": 0.8772,
      "step": 399850
    },
    {
      "epoch": 1.4014096107273408,
      "grad_norm": 3.578125,
      "learning_rate": 2.9603625214503976e-05,
      "loss": 0.8434,
      "step": 399860
    },
    {
      "epoch": 1.4014446582342366,
      "grad_norm": 3.328125,
      "learning_rate": 2.9602976185840274e-05,
      "loss": 0.8165,
      "step": 399870
    },
    {
      "epoch": 1.4014797057411321,
      "grad_norm": 3.78125,
      "learning_rate": 2.9602327157176572e-05,
      "loss": 0.9072,
      "step": 399880
    },
    {
      "epoch": 1.4015147532480277,
      "grad_norm": 3.078125,
      "learning_rate": 2.9601678128512867e-05,
      "loss": 0.9148,
      "step": 399890
    },
    {
      "epoch": 1.4015498007549234,
      "grad_norm": 2.265625,
      "learning_rate": 2.9601029099849165e-05,
      "loss": 0.8104,
      "step": 399900
    },
    {
      "epoch": 1.401584848261819,
      "grad_norm": 2.765625,
      "learning_rate": 2.9600380071185463e-05,
      "loss": 0.8703,
      "step": 399910
    },
    {
      "epoch": 1.4016198957687145,
      "grad_norm": 3.09375,
      "learning_rate": 2.959973104252176e-05,
      "loss": 0.8699,
      "step": 399920
    },
    {
      "epoch": 1.40165494327561,
      "grad_norm": 2.828125,
      "learning_rate": 2.959908201385806e-05,
      "loss": 0.7818,
      "step": 399930
    },
    {
      "epoch": 1.4016899907825056,
      "grad_norm": 2.765625,
      "learning_rate": 2.9598432985194357e-05,
      "loss": 0.824,
      "step": 399940
    },
    {
      "epoch": 1.4017250382894013,
      "grad_norm": 3.25,
      "learning_rate": 2.9597783956530655e-05,
      "loss": 0.8618,
      "step": 399950
    },
    {
      "epoch": 1.4017600857962969,
      "grad_norm": 2.5625,
      "learning_rate": 2.9597134927866953e-05,
      "loss": 0.8419,
      "step": 399960
    },
    {
      "epoch": 1.4017951333031924,
      "grad_norm": 2.734375,
      "learning_rate": 2.959648589920325e-05,
      "loss": 0.8765,
      "step": 399970
    },
    {
      "epoch": 1.4018301808100881,
      "grad_norm": 3.140625,
      "learning_rate": 2.9595836870539552e-05,
      "loss": 0.779,
      "step": 399980
    },
    {
      "epoch": 1.4018652283169837,
      "grad_norm": 2.734375,
      "learning_rate": 2.959518784187585e-05,
      "loss": 0.8163,
      "step": 399990
    },
    {
      "epoch": 1.4019002758238792,
      "grad_norm": 2.65625,
      "learning_rate": 2.9594538813212148e-05,
      "loss": 0.8369,
      "step": 400000
    },
    {
      "epoch": 1.4019002758238792,
      "eval_loss": 0.7960487008094788,
      "eval_runtime": 566.8583,
      "eval_samples_per_second": 671.131,
      "eval_steps_per_second": 55.928,
      "step": 400000
    },
    {
      "epoch": 1.401935323330775,
      "grad_norm": 2.890625,
      "learning_rate": 2.9593889784548446e-05,
      "loss": 0.9153,
      "step": 400010
    },
    {
      "epoch": 1.4019703708376705,
      "grad_norm": 2.859375,
      "learning_rate": 2.9593240755884744e-05,
      "loss": 0.9214,
      "step": 400020
    },
    {
      "epoch": 1.402005418344566,
      "grad_norm": 2.703125,
      "learning_rate": 2.9592591727221042e-05,
      "loss": 0.8132,
      "step": 400030
    },
    {
      "epoch": 1.4020404658514616,
      "grad_norm": 3.203125,
      "learning_rate": 2.959194269855734e-05,
      "loss": 0.8342,
      "step": 400040
    },
    {
      "epoch": 1.4020755133583571,
      "grad_norm": 2.375,
      "learning_rate": 2.9591293669893638e-05,
      "loss": 0.7632,
      "step": 400050
    },
    {
      "epoch": 1.4021105608652529,
      "grad_norm": 2.484375,
      "learning_rate": 2.9590644641229936e-05,
      "loss": 0.8311,
      "step": 400060
    },
    {
      "epoch": 1.4021456083721484,
      "grad_norm": 2.5625,
      "learning_rate": 2.9589995612566234e-05,
      "loss": 0.8197,
      "step": 400070
    },
    {
      "epoch": 1.402180655879044,
      "grad_norm": 3.328125,
      "learning_rate": 2.9589346583902532e-05,
      "loss": 0.8592,
      "step": 400080
    },
    {
      "epoch": 1.4022157033859397,
      "grad_norm": 2.96875,
      "learning_rate": 2.958869755523883e-05,
      "loss": 0.8544,
      "step": 400090
    },
    {
      "epoch": 1.4022507508928352,
      "grad_norm": 3.171875,
      "learning_rate": 2.9588048526575128e-05,
      "loss": 0.9081,
      "step": 400100
    },
    {
      "epoch": 1.4022857983997308,
      "grad_norm": 2.828125,
      "learning_rate": 2.9587399497911426e-05,
      "loss": 0.8144,
      "step": 400110
    },
    {
      "epoch": 1.4023208459066265,
      "grad_norm": 3.0625,
      "learning_rate": 2.9586750469247727e-05,
      "loss": 0.7549,
      "step": 400120
    },
    {
      "epoch": 1.402355893413522,
      "grad_norm": 2.859375,
      "learning_rate": 2.9586101440584025e-05,
      "loss": 0.8633,
      "step": 400130
    },
    {
      "epoch": 1.4023909409204176,
      "grad_norm": 3.171875,
      "learning_rate": 2.9585452411920323e-05,
      "loss": 0.9012,
      "step": 400140
    },
    {
      "epoch": 1.4024259884273131,
      "grad_norm": 2.640625,
      "learning_rate": 2.958480338325662e-05,
      "loss": 0.7938,
      "step": 400150
    },
    {
      "epoch": 1.402461035934209,
      "grad_norm": 2.78125,
      "learning_rate": 2.958415435459292e-05,
      "loss": 0.8782,
      "step": 400160
    },
    {
      "epoch": 1.4024960834411044,
      "grad_norm": 3.109375,
      "learning_rate": 2.9583505325929217e-05,
      "loss": 0.7942,
      "step": 400170
    },
    {
      "epoch": 1.402531130948,
      "grad_norm": 2.703125,
      "learning_rate": 2.9582856297265515e-05,
      "loss": 0.8233,
      "step": 400180
    },
    {
      "epoch": 1.4025661784548955,
      "grad_norm": 2.828125,
      "learning_rate": 2.9582207268601813e-05,
      "loss": 0.845,
      "step": 400190
    },
    {
      "epoch": 1.4026012259617913,
      "grad_norm": 3.15625,
      "learning_rate": 2.958155823993811e-05,
      "loss": 0.8877,
      "step": 400200
    },
    {
      "epoch": 1.4026362734686868,
      "grad_norm": 3.421875,
      "learning_rate": 2.958090921127441e-05,
      "loss": 0.796,
      "step": 400210
    },
    {
      "epoch": 1.4026713209755823,
      "grad_norm": 2.859375,
      "learning_rate": 2.9580260182610707e-05,
      "loss": 0.8244,
      "step": 400220
    },
    {
      "epoch": 1.402706368482478,
      "grad_norm": 2.625,
      "learning_rate": 2.9579611153947005e-05,
      "loss": 0.8435,
      "step": 400230
    },
    {
      "epoch": 1.4027414159893736,
      "grad_norm": 2.75,
      "learning_rate": 2.9578962125283303e-05,
      "loss": 0.7712,
      "step": 400240
    },
    {
      "epoch": 1.4027764634962692,
      "grad_norm": 2.8125,
      "learning_rate": 2.95783130966196e-05,
      "loss": 0.832,
      "step": 400250
    },
    {
      "epoch": 1.402811511003165,
      "grad_norm": 2.96875,
      "learning_rate": 2.9577664067955903e-05,
      "loss": 0.8931,
      "step": 400260
    },
    {
      "epoch": 1.4028465585100605,
      "grad_norm": 3.1875,
      "learning_rate": 2.9577015039292194e-05,
      "loss": 0.867,
      "step": 400270
    },
    {
      "epoch": 1.402881606016956,
      "grad_norm": 3.109375,
      "learning_rate": 2.9576366010628492e-05,
      "loss": 0.8592,
      "step": 400280
    },
    {
      "epoch": 1.4029166535238515,
      "grad_norm": 3.109375,
      "learning_rate": 2.957571698196479e-05,
      "loss": 0.8192,
      "step": 400290
    },
    {
      "epoch": 1.402951701030747,
      "grad_norm": 2.984375,
      "learning_rate": 2.9575067953301088e-05,
      "loss": 0.8608,
      "step": 400300
    },
    {
      "epoch": 1.4029867485376428,
      "grad_norm": 3.296875,
      "learning_rate": 2.9574418924637386e-05,
      "loss": 0.9002,
      "step": 400310
    },
    {
      "epoch": 1.4030217960445384,
      "grad_norm": 2.609375,
      "learning_rate": 2.9573769895973684e-05,
      "loss": 0.8638,
      "step": 400320
    },
    {
      "epoch": 1.403056843551434,
      "grad_norm": 2.71875,
      "learning_rate": 2.9573120867309982e-05,
      "loss": 0.7642,
      "step": 400330
    },
    {
      "epoch": 1.4030918910583297,
      "grad_norm": 2.65625,
      "learning_rate": 2.957247183864628e-05,
      "loss": 0.8549,
      "step": 400340
    },
    {
      "epoch": 1.4031269385652252,
      "grad_norm": 2.84375,
      "learning_rate": 2.957182280998258e-05,
      "loss": 0.9136,
      "step": 400350
    },
    {
      "epoch": 1.4031619860721207,
      "grad_norm": 3.234375,
      "learning_rate": 2.957117378131888e-05,
      "loss": 0.8436,
      "step": 400360
    },
    {
      "epoch": 1.4031970335790165,
      "grad_norm": 2.796875,
      "learning_rate": 2.9570524752655177e-05,
      "loss": 0.9108,
      "step": 400370
    },
    {
      "epoch": 1.403232081085912,
      "grad_norm": 2.71875,
      "learning_rate": 2.9569875723991475e-05,
      "loss": 0.7458,
      "step": 400380
    },
    {
      "epoch": 1.4032671285928076,
      "grad_norm": 2.859375,
      "learning_rate": 2.9569226695327773e-05,
      "loss": 0.9375,
      "step": 400390
    },
    {
      "epoch": 1.403302176099703,
      "grad_norm": 2.828125,
      "learning_rate": 2.956857766666407e-05,
      "loss": 0.9362,
      "step": 400400
    },
    {
      "epoch": 1.4033372236065986,
      "grad_norm": 2.75,
      "learning_rate": 2.956792863800037e-05,
      "loss": 0.7967,
      "step": 400410
    },
    {
      "epoch": 1.4033722711134944,
      "grad_norm": 2.9375,
      "learning_rate": 2.9567279609336667e-05,
      "loss": 0.9398,
      "step": 400420
    },
    {
      "epoch": 1.40340731862039,
      "grad_norm": 3.09375,
      "learning_rate": 2.9566630580672965e-05,
      "loss": 0.8247,
      "step": 400430
    },
    {
      "epoch": 1.4034423661272855,
      "grad_norm": 2.953125,
      "learning_rate": 2.9565981552009263e-05,
      "loss": 0.786,
      "step": 400440
    },
    {
      "epoch": 1.4034774136341812,
      "grad_norm": 3.125,
      "learning_rate": 2.956533252334556e-05,
      "loss": 0.8035,
      "step": 400450
    },
    {
      "epoch": 1.4035124611410768,
      "grad_norm": 2.84375,
      "learning_rate": 2.956468349468186e-05,
      "loss": 0.7741,
      "step": 400460
    },
    {
      "epoch": 1.4035475086479723,
      "grad_norm": 2.6875,
      "learning_rate": 2.9564034466018157e-05,
      "loss": 0.8248,
      "step": 400470
    },
    {
      "epoch": 1.403582556154868,
      "grad_norm": 2.96875,
      "learning_rate": 2.956338543735446e-05,
      "loss": 0.8397,
      "step": 400480
    },
    {
      "epoch": 1.4036176036617636,
      "grad_norm": 2.796875,
      "learning_rate": 2.9562736408690757e-05,
      "loss": 0.8162,
      "step": 400490
    },
    {
      "epoch": 1.4036526511686591,
      "grad_norm": 2.90625,
      "learning_rate": 2.9562087380027055e-05,
      "loss": 0.8581,
      "step": 400500
    },
    {
      "epoch": 1.4036876986755547,
      "grad_norm": 3.078125,
      "learning_rate": 2.9561438351363353e-05,
      "loss": 0.8489,
      "step": 400510
    },
    {
      "epoch": 1.4037227461824502,
      "grad_norm": 2.90625,
      "learning_rate": 2.956078932269965e-05,
      "loss": 0.7974,
      "step": 400520
    },
    {
      "epoch": 1.403757793689346,
      "grad_norm": 3.078125,
      "learning_rate": 2.956014029403595e-05,
      "loss": 0.9071,
      "step": 400530
    },
    {
      "epoch": 1.4037928411962415,
      "grad_norm": 2.5625,
      "learning_rate": 2.9559491265372247e-05,
      "loss": 0.802,
      "step": 400540
    },
    {
      "epoch": 1.403827888703137,
      "grad_norm": 3.28125,
      "learning_rate": 2.9558842236708545e-05,
      "loss": 0.8227,
      "step": 400550
    },
    {
      "epoch": 1.4038629362100328,
      "grad_norm": 3.0625,
      "learning_rate": 2.9558193208044843e-05,
      "loss": 0.8346,
      "step": 400560
    },
    {
      "epoch": 1.4038979837169283,
      "grad_norm": 2.484375,
      "learning_rate": 2.955754417938114e-05,
      "loss": 0.7772,
      "step": 400570
    },
    {
      "epoch": 1.4039330312238238,
      "grad_norm": 2.46875,
      "learning_rate": 2.955689515071744e-05,
      "loss": 0.8443,
      "step": 400580
    },
    {
      "epoch": 1.4039680787307196,
      "grad_norm": 2.75,
      "learning_rate": 2.9556246122053737e-05,
      "loss": 0.7857,
      "step": 400590
    },
    {
      "epoch": 1.4040031262376151,
      "grad_norm": 2.75,
      "learning_rate": 2.9555597093390035e-05,
      "loss": 0.7396,
      "step": 400600
    },
    {
      "epoch": 1.4040381737445107,
      "grad_norm": 2.875,
      "learning_rate": 2.9554948064726333e-05,
      "loss": 0.8063,
      "step": 400610
    },
    {
      "epoch": 1.4040732212514062,
      "grad_norm": 2.984375,
      "learning_rate": 2.9554299036062634e-05,
      "loss": 0.8284,
      "step": 400620
    },
    {
      "epoch": 1.4041082687583017,
      "grad_norm": 3.15625,
      "learning_rate": 2.9553650007398932e-05,
      "loss": 0.8692,
      "step": 400630
    },
    {
      "epoch": 1.4041433162651975,
      "grad_norm": 2.671875,
      "learning_rate": 2.9553000978735223e-05,
      "loss": 0.7812,
      "step": 400640
    },
    {
      "epoch": 1.404178363772093,
      "grad_norm": 2.953125,
      "learning_rate": 2.955235195007152e-05,
      "loss": 0.9119,
      "step": 400650
    },
    {
      "epoch": 1.4042134112789886,
      "grad_norm": 2.796875,
      "learning_rate": 2.955170292140782e-05,
      "loss": 0.9355,
      "step": 400660
    },
    {
      "epoch": 1.4042484587858843,
      "grad_norm": 2.65625,
      "learning_rate": 2.9551053892744117e-05,
      "loss": 0.7906,
      "step": 400670
    },
    {
      "epoch": 1.4042835062927799,
      "grad_norm": 2.59375,
      "learning_rate": 2.9550404864080415e-05,
      "loss": 0.8031,
      "step": 400680
    },
    {
      "epoch": 1.4043185537996754,
      "grad_norm": 2.671875,
      "learning_rate": 2.9549755835416713e-05,
      "loss": 0.878,
      "step": 400690
    },
    {
      "epoch": 1.4043536013065712,
      "grad_norm": 2.453125,
      "learning_rate": 2.954910680675301e-05,
      "loss": 0.7931,
      "step": 400700
    },
    {
      "epoch": 1.4043886488134667,
      "grad_norm": 2.609375,
      "learning_rate": 2.9548457778089313e-05,
      "loss": 0.8125,
      "step": 400710
    },
    {
      "epoch": 1.4044236963203622,
      "grad_norm": 2.71875,
      "learning_rate": 2.954780874942561e-05,
      "loss": 0.8467,
      "step": 400720
    },
    {
      "epoch": 1.4044587438272578,
      "grad_norm": 3.125,
      "learning_rate": 2.954715972076191e-05,
      "loss": 0.8483,
      "step": 400730
    },
    {
      "epoch": 1.4044937913341533,
      "grad_norm": 2.84375,
      "learning_rate": 2.9546510692098207e-05,
      "loss": 0.8432,
      "step": 400740
    },
    {
      "epoch": 1.404528838841049,
      "grad_norm": 2.328125,
      "learning_rate": 2.9545861663434505e-05,
      "loss": 0.7865,
      "step": 400750
    },
    {
      "epoch": 1.4045638863479446,
      "grad_norm": 2.890625,
      "learning_rate": 2.9545212634770803e-05,
      "loss": 0.8493,
      "step": 400760
    },
    {
      "epoch": 1.4045989338548401,
      "grad_norm": 2.890625,
      "learning_rate": 2.95445636061071e-05,
      "loss": 0.8626,
      "step": 400770
    },
    {
      "epoch": 1.404633981361736,
      "grad_norm": 2.53125,
      "learning_rate": 2.95439145774434e-05,
      "loss": 0.7675,
      "step": 400780
    },
    {
      "epoch": 1.4046690288686314,
      "grad_norm": 2.578125,
      "learning_rate": 2.9543265548779697e-05,
      "loss": 0.7378,
      "step": 400790
    },
    {
      "epoch": 1.404704076375527,
      "grad_norm": 2.65625,
      "learning_rate": 2.9542616520115995e-05,
      "loss": 0.8115,
      "step": 400800
    },
    {
      "epoch": 1.4047391238824227,
      "grad_norm": 3.046875,
      "learning_rate": 2.9541967491452293e-05,
      "loss": 0.8527,
      "step": 400810
    },
    {
      "epoch": 1.4047741713893183,
      "grad_norm": 2.90625,
      "learning_rate": 2.954131846278859e-05,
      "loss": 0.78,
      "step": 400820
    },
    {
      "epoch": 1.4048092188962138,
      "grad_norm": 2.828125,
      "learning_rate": 2.954066943412489e-05,
      "loss": 0.8677,
      "step": 400830
    },
    {
      "epoch": 1.4048442664031093,
      "grad_norm": 2.609375,
      "learning_rate": 2.9540020405461187e-05,
      "loss": 0.8409,
      "step": 400840
    },
    {
      "epoch": 1.404879313910005,
      "grad_norm": 3.21875,
      "learning_rate": 2.9539371376797488e-05,
      "loss": 0.8669,
      "step": 400850
    },
    {
      "epoch": 1.4049143614169006,
      "grad_norm": 3.296875,
      "learning_rate": 2.9538722348133786e-05,
      "loss": 0.9361,
      "step": 400860
    },
    {
      "epoch": 1.4049494089237962,
      "grad_norm": 3.15625,
      "learning_rate": 2.9538073319470084e-05,
      "loss": 0.8075,
      "step": 400870
    },
    {
      "epoch": 1.4049844564306917,
      "grad_norm": 2.96875,
      "learning_rate": 2.9537424290806382e-05,
      "loss": 0.8438,
      "step": 400880
    },
    {
      "epoch": 1.4050195039375875,
      "grad_norm": 3.0,
      "learning_rate": 2.953677526214268e-05,
      "loss": 0.8627,
      "step": 400890
    },
    {
      "epoch": 1.405054551444483,
      "grad_norm": 2.59375,
      "learning_rate": 2.9536126233478978e-05,
      "loss": 0.7469,
      "step": 400900
    },
    {
      "epoch": 1.4050895989513785,
      "grad_norm": 3.140625,
      "learning_rate": 2.9535477204815276e-05,
      "loss": 0.8824,
      "step": 400910
    },
    {
      "epoch": 1.4051246464582743,
      "grad_norm": 3.234375,
      "learning_rate": 2.9534828176151574e-05,
      "loss": 0.8298,
      "step": 400920
    },
    {
      "epoch": 1.4051596939651698,
      "grad_norm": 2.734375,
      "learning_rate": 2.9534179147487872e-05,
      "loss": 0.8385,
      "step": 400930
    },
    {
      "epoch": 1.4051947414720654,
      "grad_norm": 3.03125,
      "learning_rate": 2.953353011882417e-05,
      "loss": 0.8337,
      "step": 400940
    },
    {
      "epoch": 1.4052297889789611,
      "grad_norm": 3.265625,
      "learning_rate": 2.9532881090160468e-05,
      "loss": 0.8601,
      "step": 400950
    },
    {
      "epoch": 1.4052648364858566,
      "grad_norm": 3.125,
      "learning_rate": 2.9532232061496766e-05,
      "loss": 0.8631,
      "step": 400960
    },
    {
      "epoch": 1.4052998839927522,
      "grad_norm": 2.9375,
      "learning_rate": 2.9531583032833064e-05,
      "loss": 0.8561,
      "step": 400970
    },
    {
      "epoch": 1.4053349314996477,
      "grad_norm": 3.328125,
      "learning_rate": 2.9530934004169362e-05,
      "loss": 0.875,
      "step": 400980
    },
    {
      "epoch": 1.4053699790065433,
      "grad_norm": 3.296875,
      "learning_rate": 2.9530284975505664e-05,
      "loss": 0.9247,
      "step": 400990
    },
    {
      "epoch": 1.405405026513439,
      "grad_norm": 2.6875,
      "learning_rate": 2.952963594684196e-05,
      "loss": 0.9074,
      "step": 401000
    },
    {
      "epoch": 1.4054400740203346,
      "grad_norm": 3.078125,
      "learning_rate": 2.952898691817826e-05,
      "loss": 0.8606,
      "step": 401010
    },
    {
      "epoch": 1.40547512152723,
      "grad_norm": 2.71875,
      "learning_rate": 2.952833788951455e-05,
      "loss": 0.8379,
      "step": 401020
    },
    {
      "epoch": 1.4055101690341258,
      "grad_norm": 3.09375,
      "learning_rate": 2.952768886085085e-05,
      "loss": 0.8226,
      "step": 401030
    },
    {
      "epoch": 1.4055452165410214,
      "grad_norm": 2.390625,
      "learning_rate": 2.9527039832187147e-05,
      "loss": 0.7852,
      "step": 401040
    },
    {
      "epoch": 1.405580264047917,
      "grad_norm": 2.5,
      "learning_rate": 2.9526390803523445e-05,
      "loss": 0.8383,
      "step": 401050
    },
    {
      "epoch": 1.4056153115548127,
      "grad_norm": 2.921875,
      "learning_rate": 2.9525741774859743e-05,
      "loss": 0.9297,
      "step": 401060
    },
    {
      "epoch": 1.4056503590617082,
      "grad_norm": 2.5,
      "learning_rate": 2.952509274619604e-05,
      "loss": 0.7846,
      "step": 401070
    },
    {
      "epoch": 1.4056854065686037,
      "grad_norm": 2.90625,
      "learning_rate": 2.9524443717532342e-05,
      "loss": 0.7739,
      "step": 401080
    },
    {
      "epoch": 1.4057204540754993,
      "grad_norm": 3.046875,
      "learning_rate": 2.952379468886864e-05,
      "loss": 0.8999,
      "step": 401090
    },
    {
      "epoch": 1.4057555015823948,
      "grad_norm": 2.84375,
      "learning_rate": 2.9523145660204938e-05,
      "loss": 0.8545,
      "step": 401100
    },
    {
      "epoch": 1.4057905490892906,
      "grad_norm": 2.765625,
      "learning_rate": 2.9522496631541236e-05,
      "loss": 0.7748,
      "step": 401110
    },
    {
      "epoch": 1.4058255965961861,
      "grad_norm": 2.828125,
      "learning_rate": 2.9521847602877534e-05,
      "loss": 0.7972,
      "step": 401120
    },
    {
      "epoch": 1.4058606441030816,
      "grad_norm": 2.734375,
      "learning_rate": 2.9521198574213832e-05,
      "loss": 0.861,
      "step": 401130
    },
    {
      "epoch": 1.4058956916099774,
      "grad_norm": 2.671875,
      "learning_rate": 2.952054954555013e-05,
      "loss": 0.8336,
      "step": 401140
    },
    {
      "epoch": 1.405930739116873,
      "grad_norm": 2.796875,
      "learning_rate": 2.9519900516886428e-05,
      "loss": 0.7971,
      "step": 401150
    },
    {
      "epoch": 1.4059657866237685,
      "grad_norm": 3.109375,
      "learning_rate": 2.9519251488222726e-05,
      "loss": 0.8175,
      "step": 401160
    },
    {
      "epoch": 1.4060008341306642,
      "grad_norm": 3.03125,
      "learning_rate": 2.9518602459559024e-05,
      "loss": 0.8736,
      "step": 401170
    },
    {
      "epoch": 1.4060358816375598,
      "grad_norm": 2.953125,
      "learning_rate": 2.9517953430895322e-05,
      "loss": 0.8829,
      "step": 401180
    },
    {
      "epoch": 1.4060709291444553,
      "grad_norm": 2.625,
      "learning_rate": 2.951730440223162e-05,
      "loss": 0.9124,
      "step": 401190
    },
    {
      "epoch": 1.4061059766513508,
      "grad_norm": 3.125,
      "learning_rate": 2.9516655373567918e-05,
      "loss": 0.8932,
      "step": 401200
    },
    {
      "epoch": 1.4061410241582464,
      "grad_norm": 2.828125,
      "learning_rate": 2.9516006344904216e-05,
      "loss": 0.883,
      "step": 401210
    },
    {
      "epoch": 1.4061760716651421,
      "grad_norm": 3.421875,
      "learning_rate": 2.9515357316240518e-05,
      "loss": 0.8519,
      "step": 401220
    },
    {
      "epoch": 1.4062111191720377,
      "grad_norm": 3.09375,
      "learning_rate": 2.9514708287576816e-05,
      "loss": 0.8467,
      "step": 401230
    },
    {
      "epoch": 1.4062461666789332,
      "grad_norm": 3.171875,
      "learning_rate": 2.9514059258913114e-05,
      "loss": 0.8863,
      "step": 401240
    },
    {
      "epoch": 1.406281214185829,
      "grad_norm": 2.671875,
      "learning_rate": 2.951341023024941e-05,
      "loss": 0.8426,
      "step": 401250
    },
    {
      "epoch": 1.4063162616927245,
      "grad_norm": 3.0,
      "learning_rate": 2.951276120158571e-05,
      "loss": 0.8407,
      "step": 401260
    },
    {
      "epoch": 1.40635130919962,
      "grad_norm": 2.8125,
      "learning_rate": 2.9512112172922008e-05,
      "loss": 0.883,
      "step": 401270
    },
    {
      "epoch": 1.4063863567065158,
      "grad_norm": 2.765625,
      "learning_rate": 2.9511463144258306e-05,
      "loss": 0.9007,
      "step": 401280
    },
    {
      "epoch": 1.4064214042134113,
      "grad_norm": 2.8125,
      "learning_rate": 2.9510814115594604e-05,
      "loss": 0.8404,
      "step": 401290
    },
    {
      "epoch": 1.4064564517203069,
      "grad_norm": 2.71875,
      "learning_rate": 2.95101650869309e-05,
      "loss": 0.796,
      "step": 401300
    },
    {
      "epoch": 1.4064914992272024,
      "grad_norm": 3.296875,
      "learning_rate": 2.95095160582672e-05,
      "loss": 0.9479,
      "step": 401310
    },
    {
      "epoch": 1.406526546734098,
      "grad_norm": 2.421875,
      "learning_rate": 2.9508867029603498e-05,
      "loss": 0.9295,
      "step": 401320
    },
    {
      "epoch": 1.4065615942409937,
      "grad_norm": 2.5625,
      "learning_rate": 2.9508218000939796e-05,
      "loss": 0.8527,
      "step": 401330
    },
    {
      "epoch": 1.4065966417478892,
      "grad_norm": 3.140625,
      "learning_rate": 2.9507568972276094e-05,
      "loss": 0.9092,
      "step": 401340
    },
    {
      "epoch": 1.4066316892547848,
      "grad_norm": 2.890625,
      "learning_rate": 2.950691994361239e-05,
      "loss": 0.8796,
      "step": 401350
    },
    {
      "epoch": 1.4066667367616805,
      "grad_norm": 3.109375,
      "learning_rate": 2.9506270914948693e-05,
      "loss": 0.814,
      "step": 401360
    },
    {
      "epoch": 1.406701784268576,
      "grad_norm": 2.234375,
      "learning_rate": 2.950562188628499e-05,
      "loss": 0.8272,
      "step": 401370
    },
    {
      "epoch": 1.4067368317754716,
      "grad_norm": 2.703125,
      "learning_rate": 2.950497285762129e-05,
      "loss": 0.7977,
      "step": 401380
    },
    {
      "epoch": 1.4067718792823674,
      "grad_norm": 2.53125,
      "learning_rate": 2.950432382895758e-05,
      "loss": 0.7935,
      "step": 401390
    },
    {
      "epoch": 1.406806926789263,
      "grad_norm": 2.796875,
      "learning_rate": 2.9503674800293878e-05,
      "loss": 0.847,
      "step": 401400
    },
    {
      "epoch": 1.4068419742961584,
      "grad_norm": 2.921875,
      "learning_rate": 2.9503025771630176e-05,
      "loss": 0.7818,
      "step": 401410
    },
    {
      "epoch": 1.406877021803054,
      "grad_norm": 2.875,
      "learning_rate": 2.9502376742966474e-05,
      "loss": 0.8607,
      "step": 401420
    },
    {
      "epoch": 1.4069120693099497,
      "grad_norm": 2.75,
      "learning_rate": 2.9501727714302772e-05,
      "loss": 0.8549,
      "step": 401430
    },
    {
      "epoch": 1.4069471168168453,
      "grad_norm": 2.953125,
      "learning_rate": 2.9501078685639074e-05,
      "loss": 0.8392,
      "step": 401440
    },
    {
      "epoch": 1.4069821643237408,
      "grad_norm": 3.0625,
      "learning_rate": 2.950042965697537e-05,
      "loss": 0.8954,
      "step": 401450
    },
    {
      "epoch": 1.4070172118306363,
      "grad_norm": 3.0,
      "learning_rate": 2.949978062831167e-05,
      "loss": 0.8377,
      "step": 401460
    },
    {
      "epoch": 1.407052259337532,
      "grad_norm": 3.078125,
      "learning_rate": 2.9499131599647968e-05,
      "loss": 0.8172,
      "step": 401470
    },
    {
      "epoch": 1.4070873068444276,
      "grad_norm": 2.375,
      "learning_rate": 2.9498482570984266e-05,
      "loss": 0.8218,
      "step": 401480
    },
    {
      "epoch": 1.4071223543513232,
      "grad_norm": 2.703125,
      "learning_rate": 2.9497833542320564e-05,
      "loss": 0.8268,
      "step": 401490
    },
    {
      "epoch": 1.407157401858219,
      "grad_norm": 2.765625,
      "learning_rate": 2.949718451365686e-05,
      "loss": 0.8683,
      "step": 401500
    },
    {
      "epoch": 1.4071924493651145,
      "grad_norm": 2.765625,
      "learning_rate": 2.949653548499316e-05,
      "loss": 0.8756,
      "step": 401510
    },
    {
      "epoch": 1.40722749687201,
      "grad_norm": 3.375,
      "learning_rate": 2.9495886456329458e-05,
      "loss": 0.8492,
      "step": 401520
    },
    {
      "epoch": 1.4072625443789057,
      "grad_norm": 3.109375,
      "learning_rate": 2.9495237427665756e-05,
      "loss": 0.8422,
      "step": 401530
    },
    {
      "epoch": 1.4072975918858013,
      "grad_norm": 2.9375,
      "learning_rate": 2.9494588399002054e-05,
      "loss": 0.8128,
      "step": 401540
    },
    {
      "epoch": 1.4073326393926968,
      "grad_norm": 2.703125,
      "learning_rate": 2.949393937033835e-05,
      "loss": 0.8433,
      "step": 401550
    },
    {
      "epoch": 1.4073676868995924,
      "grad_norm": 2.640625,
      "learning_rate": 2.949329034167465e-05,
      "loss": 0.9282,
      "step": 401560
    },
    {
      "epoch": 1.4074027344064879,
      "grad_norm": 2.765625,
      "learning_rate": 2.9492641313010948e-05,
      "loss": 0.8297,
      "step": 401570
    },
    {
      "epoch": 1.4074377819133836,
      "grad_norm": 2.359375,
      "learning_rate": 2.949199228434725e-05,
      "loss": 0.8034,
      "step": 401580
    },
    {
      "epoch": 1.4074728294202792,
      "grad_norm": 2.734375,
      "learning_rate": 2.9491343255683547e-05,
      "loss": 0.8165,
      "step": 401590
    },
    {
      "epoch": 1.4075078769271747,
      "grad_norm": 2.921875,
      "learning_rate": 2.9490694227019845e-05,
      "loss": 0.8546,
      "step": 401600
    },
    {
      "epoch": 1.4075429244340705,
      "grad_norm": 2.796875,
      "learning_rate": 2.9490045198356143e-05,
      "loss": 0.857,
      "step": 401610
    },
    {
      "epoch": 1.407577971940966,
      "grad_norm": 2.953125,
      "learning_rate": 2.948939616969244e-05,
      "loss": 0.8768,
      "step": 401620
    },
    {
      "epoch": 1.4076130194478615,
      "grad_norm": 2.828125,
      "learning_rate": 2.948874714102874e-05,
      "loss": 0.8491,
      "step": 401630
    },
    {
      "epoch": 1.4076480669547573,
      "grad_norm": 2.890625,
      "learning_rate": 2.9488098112365037e-05,
      "loss": 0.8328,
      "step": 401640
    },
    {
      "epoch": 1.4076831144616528,
      "grad_norm": 2.359375,
      "learning_rate": 2.9487449083701335e-05,
      "loss": 0.8463,
      "step": 401650
    },
    {
      "epoch": 1.4077181619685484,
      "grad_norm": 3.0,
      "learning_rate": 2.9486800055037633e-05,
      "loss": 0.8273,
      "step": 401660
    },
    {
      "epoch": 1.407753209475444,
      "grad_norm": 2.796875,
      "learning_rate": 2.948615102637393e-05,
      "loss": 0.9083,
      "step": 401670
    },
    {
      "epoch": 1.4077882569823394,
      "grad_norm": 2.4375,
      "learning_rate": 2.948550199771023e-05,
      "loss": 0.819,
      "step": 401680
    },
    {
      "epoch": 1.4078233044892352,
      "grad_norm": 3.015625,
      "learning_rate": 2.9484852969046527e-05,
      "loss": 0.8018,
      "step": 401690
    },
    {
      "epoch": 1.4078583519961307,
      "grad_norm": 3.28125,
      "learning_rate": 2.9484203940382825e-05,
      "loss": 0.939,
      "step": 401700
    },
    {
      "epoch": 1.4078933995030263,
      "grad_norm": 3.0,
      "learning_rate": 2.9483554911719123e-05,
      "loss": 0.7523,
      "step": 401710
    },
    {
      "epoch": 1.407928447009922,
      "grad_norm": 2.78125,
      "learning_rate": 2.9482905883055424e-05,
      "loss": 0.8815,
      "step": 401720
    },
    {
      "epoch": 1.4079634945168176,
      "grad_norm": 3.015625,
      "learning_rate": 2.9482256854391722e-05,
      "loss": 0.87,
      "step": 401730
    },
    {
      "epoch": 1.407998542023713,
      "grad_norm": 3.34375,
      "learning_rate": 2.948160782572802e-05,
      "loss": 0.9782,
      "step": 401740
    },
    {
      "epoch": 1.4080335895306089,
      "grad_norm": 2.625,
      "learning_rate": 2.948095879706432e-05,
      "loss": 0.9214,
      "step": 401750
    },
    {
      "epoch": 1.4080686370375044,
      "grad_norm": 3.171875,
      "learning_rate": 2.9480309768400616e-05,
      "loss": 0.9152,
      "step": 401760
    },
    {
      "epoch": 1.4081036845444,
      "grad_norm": 2.734375,
      "learning_rate": 2.9479660739736908e-05,
      "loss": 0.7404,
      "step": 401770
    },
    {
      "epoch": 1.4081387320512955,
      "grad_norm": 2.953125,
      "learning_rate": 2.9479011711073206e-05,
      "loss": 0.8564,
      "step": 401780
    },
    {
      "epoch": 1.408173779558191,
      "grad_norm": 2.609375,
      "learning_rate": 2.9478362682409504e-05,
      "loss": 0.7995,
      "step": 401790
    },
    {
      "epoch": 1.4082088270650868,
      "grad_norm": 3.09375,
      "learning_rate": 2.94777136537458e-05,
      "loss": 0.8274,
      "step": 401800
    },
    {
      "epoch": 1.4082438745719823,
      "grad_norm": 2.78125,
      "learning_rate": 2.9477064625082103e-05,
      "loss": 0.8186,
      "step": 401810
    },
    {
      "epoch": 1.4082789220788778,
      "grad_norm": 2.6875,
      "learning_rate": 2.94764155964184e-05,
      "loss": 0.9512,
      "step": 401820
    },
    {
      "epoch": 1.4083139695857736,
      "grad_norm": 2.78125,
      "learning_rate": 2.94757665677547e-05,
      "loss": 0.88,
      "step": 401830
    },
    {
      "epoch": 1.4083490170926691,
      "grad_norm": 3.28125,
      "learning_rate": 2.9475117539090997e-05,
      "loss": 0.8404,
      "step": 401840
    },
    {
      "epoch": 1.4083840645995647,
      "grad_norm": 2.484375,
      "learning_rate": 2.9474468510427295e-05,
      "loss": 0.8522,
      "step": 401850
    },
    {
      "epoch": 1.4084191121064604,
      "grad_norm": 2.78125,
      "learning_rate": 2.9473819481763593e-05,
      "loss": 0.7874,
      "step": 401860
    },
    {
      "epoch": 1.408454159613356,
      "grad_norm": 3.21875,
      "learning_rate": 2.947317045309989e-05,
      "loss": 0.8356,
      "step": 401870
    },
    {
      "epoch": 1.4084892071202515,
      "grad_norm": 3.25,
      "learning_rate": 2.947252142443619e-05,
      "loss": 0.7862,
      "step": 401880
    },
    {
      "epoch": 1.408524254627147,
      "grad_norm": 2.9375,
      "learning_rate": 2.9471872395772487e-05,
      "loss": 0.8384,
      "step": 401890
    },
    {
      "epoch": 1.4085593021340426,
      "grad_norm": 2.5625,
      "learning_rate": 2.9471223367108785e-05,
      "loss": 0.8004,
      "step": 401900
    },
    {
      "epoch": 1.4085943496409383,
      "grad_norm": 2.859375,
      "learning_rate": 2.9470574338445083e-05,
      "loss": 0.8296,
      "step": 401910
    },
    {
      "epoch": 1.4086293971478339,
      "grad_norm": 3.078125,
      "learning_rate": 2.946992530978138e-05,
      "loss": 0.8265,
      "step": 401920
    },
    {
      "epoch": 1.4086644446547294,
      "grad_norm": 2.90625,
      "learning_rate": 2.946927628111768e-05,
      "loss": 0.8382,
      "step": 401930
    },
    {
      "epoch": 1.4086994921616252,
      "grad_norm": 3.4375,
      "learning_rate": 2.9468627252453977e-05,
      "loss": 0.9009,
      "step": 401940
    },
    {
      "epoch": 1.4087345396685207,
      "grad_norm": 2.890625,
      "learning_rate": 2.946797822379028e-05,
      "loss": 0.8621,
      "step": 401950
    },
    {
      "epoch": 1.4087695871754162,
      "grad_norm": 2.453125,
      "learning_rate": 2.9467329195126576e-05,
      "loss": 0.8107,
      "step": 401960
    },
    {
      "epoch": 1.408804634682312,
      "grad_norm": 3.046875,
      "learning_rate": 2.9466680166462874e-05,
      "loss": 0.8047,
      "step": 401970
    },
    {
      "epoch": 1.4088396821892075,
      "grad_norm": 2.796875,
      "learning_rate": 2.9466031137799172e-05,
      "loss": 0.8428,
      "step": 401980
    },
    {
      "epoch": 1.408874729696103,
      "grad_norm": 2.96875,
      "learning_rate": 2.946538210913547e-05,
      "loss": 0.8434,
      "step": 401990
    },
    {
      "epoch": 1.4089097772029986,
      "grad_norm": 2.609375,
      "learning_rate": 2.946473308047177e-05,
      "loss": 0.8573,
      "step": 402000
    },
    {
      "epoch": 1.4089448247098941,
      "grad_norm": 2.625,
      "learning_rate": 2.9464084051808066e-05,
      "loss": 0.8711,
      "step": 402010
    },
    {
      "epoch": 1.4089798722167899,
      "grad_norm": 2.609375,
      "learning_rate": 2.9463435023144364e-05,
      "loss": 0.8266,
      "step": 402020
    },
    {
      "epoch": 1.4090149197236854,
      "grad_norm": 3.125,
      "learning_rate": 2.9462785994480662e-05,
      "loss": 0.8562,
      "step": 402030
    },
    {
      "epoch": 1.409049967230581,
      "grad_norm": 3.03125,
      "learning_rate": 2.946213696581696e-05,
      "loss": 0.8537,
      "step": 402040
    },
    {
      "epoch": 1.4090850147374767,
      "grad_norm": 3.28125,
      "learning_rate": 2.946148793715326e-05,
      "loss": 0.8087,
      "step": 402050
    },
    {
      "epoch": 1.4091200622443723,
      "grad_norm": 2.875,
      "learning_rate": 2.9460838908489556e-05,
      "loss": 0.8229,
      "step": 402060
    },
    {
      "epoch": 1.4091551097512678,
      "grad_norm": 2.34375,
      "learning_rate": 2.9460189879825854e-05,
      "loss": 0.7877,
      "step": 402070
    },
    {
      "epoch": 1.4091901572581635,
      "grad_norm": 3.09375,
      "learning_rate": 2.9459540851162152e-05,
      "loss": 0.8747,
      "step": 402080
    },
    {
      "epoch": 1.409225204765059,
      "grad_norm": 2.890625,
      "learning_rate": 2.9458891822498454e-05,
      "loss": 0.8426,
      "step": 402090
    },
    {
      "epoch": 1.4092602522719546,
      "grad_norm": 2.734375,
      "learning_rate": 2.9458242793834752e-05,
      "loss": 0.7882,
      "step": 402100
    },
    {
      "epoch": 1.4092952997788502,
      "grad_norm": 3.109375,
      "learning_rate": 2.945759376517105e-05,
      "loss": 0.8681,
      "step": 402110
    },
    {
      "epoch": 1.409330347285746,
      "grad_norm": 2.640625,
      "learning_rate": 2.9456944736507348e-05,
      "loss": 0.7019,
      "step": 402120
    },
    {
      "epoch": 1.4093653947926414,
      "grad_norm": 3.25,
      "learning_rate": 2.9456295707843646e-05,
      "loss": 0.8865,
      "step": 402130
    },
    {
      "epoch": 1.409400442299537,
      "grad_norm": 3.140625,
      "learning_rate": 2.9455646679179944e-05,
      "loss": 0.8735,
      "step": 402140
    },
    {
      "epoch": 1.4094354898064325,
      "grad_norm": 3.078125,
      "learning_rate": 2.9454997650516235e-05,
      "loss": 0.8944,
      "step": 402150
    },
    {
      "epoch": 1.4094705373133283,
      "grad_norm": 2.921875,
      "learning_rate": 2.9454348621852533e-05,
      "loss": 0.8475,
      "step": 402160
    },
    {
      "epoch": 1.4095055848202238,
      "grad_norm": 2.078125,
      "learning_rate": 2.945369959318883e-05,
      "loss": 0.7537,
      "step": 402170
    },
    {
      "epoch": 1.4095406323271193,
      "grad_norm": 3.390625,
      "learning_rate": 2.9453050564525132e-05,
      "loss": 0.8288,
      "step": 402180
    },
    {
      "epoch": 1.409575679834015,
      "grad_norm": 2.796875,
      "learning_rate": 2.945240153586143e-05,
      "loss": 0.8509,
      "step": 402190
    },
    {
      "epoch": 1.4096107273409106,
      "grad_norm": 2.890625,
      "learning_rate": 2.945175250719773e-05,
      "loss": 0.8665,
      "step": 402200
    },
    {
      "epoch": 1.4096457748478062,
      "grad_norm": 2.90625,
      "learning_rate": 2.9451103478534026e-05,
      "loss": 0.8032,
      "step": 402210
    },
    {
      "epoch": 1.409680822354702,
      "grad_norm": 2.875,
      "learning_rate": 2.9450454449870324e-05,
      "loss": 0.8892,
      "step": 402220
    },
    {
      "epoch": 1.4097158698615975,
      "grad_norm": 3.03125,
      "learning_rate": 2.9449805421206622e-05,
      "loss": 0.7943,
      "step": 402230
    },
    {
      "epoch": 1.409750917368493,
      "grad_norm": 2.546875,
      "learning_rate": 2.944915639254292e-05,
      "loss": 0.7989,
      "step": 402240
    },
    {
      "epoch": 1.4097859648753885,
      "grad_norm": 3.34375,
      "learning_rate": 2.944850736387922e-05,
      "loss": 0.8721,
      "step": 402250
    },
    {
      "epoch": 1.409821012382284,
      "grad_norm": 2.546875,
      "learning_rate": 2.9447858335215516e-05,
      "loss": 0.8263,
      "step": 402260
    },
    {
      "epoch": 1.4098560598891798,
      "grad_norm": 2.875,
      "learning_rate": 2.9447209306551814e-05,
      "loss": 0.8264,
      "step": 402270
    },
    {
      "epoch": 1.4098911073960754,
      "grad_norm": 2.984375,
      "learning_rate": 2.9446560277888112e-05,
      "loss": 0.8998,
      "step": 402280
    },
    {
      "epoch": 1.409926154902971,
      "grad_norm": 3.0,
      "learning_rate": 2.944591124922441e-05,
      "loss": 0.846,
      "step": 402290
    },
    {
      "epoch": 1.4099612024098667,
      "grad_norm": 2.40625,
      "learning_rate": 2.944526222056071e-05,
      "loss": 0.8285,
      "step": 402300
    },
    {
      "epoch": 1.4099962499167622,
      "grad_norm": 3.015625,
      "learning_rate": 2.9444613191897006e-05,
      "loss": 0.8078,
      "step": 402310
    },
    {
      "epoch": 1.4100312974236577,
      "grad_norm": 2.734375,
      "learning_rate": 2.9443964163233308e-05,
      "loss": 0.7966,
      "step": 402320
    },
    {
      "epoch": 1.4100663449305535,
      "grad_norm": 2.84375,
      "learning_rate": 2.9443315134569606e-05,
      "loss": 0.874,
      "step": 402330
    },
    {
      "epoch": 1.410101392437449,
      "grad_norm": 3.0625,
      "learning_rate": 2.9442666105905904e-05,
      "loss": 0.8368,
      "step": 402340
    },
    {
      "epoch": 1.4101364399443446,
      "grad_norm": 2.96875,
      "learning_rate": 2.9442017077242202e-05,
      "loss": 0.8245,
      "step": 402350
    },
    {
      "epoch": 1.41017148745124,
      "grad_norm": 2.4375,
      "learning_rate": 2.94413680485785e-05,
      "loss": 0.861,
      "step": 402360
    },
    {
      "epoch": 1.4102065349581356,
      "grad_norm": 2.890625,
      "learning_rate": 2.9440719019914798e-05,
      "loss": 0.821,
      "step": 402370
    },
    {
      "epoch": 1.4102415824650314,
      "grad_norm": 2.84375,
      "learning_rate": 2.9440069991251096e-05,
      "loss": 0.7921,
      "step": 402380
    },
    {
      "epoch": 1.410276629971927,
      "grad_norm": 2.671875,
      "learning_rate": 2.9439420962587394e-05,
      "loss": 0.845,
      "step": 402390
    },
    {
      "epoch": 1.4103116774788225,
      "grad_norm": 2.796875,
      "learning_rate": 2.9438771933923692e-05,
      "loss": 0.7902,
      "step": 402400
    },
    {
      "epoch": 1.4103467249857182,
      "grad_norm": 2.828125,
      "learning_rate": 2.943812290525999e-05,
      "loss": 0.8485,
      "step": 402410
    },
    {
      "epoch": 1.4103817724926138,
      "grad_norm": 2.765625,
      "learning_rate": 2.9437473876596288e-05,
      "loss": 0.8658,
      "step": 402420
    },
    {
      "epoch": 1.4104168199995093,
      "grad_norm": 2.828125,
      "learning_rate": 2.9436824847932586e-05,
      "loss": 0.9091,
      "step": 402430
    },
    {
      "epoch": 1.410451867506405,
      "grad_norm": 2.9375,
      "learning_rate": 2.9436175819268884e-05,
      "loss": 0.7818,
      "step": 402440
    },
    {
      "epoch": 1.4104869150133006,
      "grad_norm": 3.265625,
      "learning_rate": 2.9435526790605182e-05,
      "loss": 0.8543,
      "step": 402450
    },
    {
      "epoch": 1.4105219625201961,
      "grad_norm": 2.671875,
      "learning_rate": 2.9434877761941483e-05,
      "loss": 0.8778,
      "step": 402460
    },
    {
      "epoch": 1.4105570100270917,
      "grad_norm": 2.84375,
      "learning_rate": 2.943422873327778e-05,
      "loss": 0.8297,
      "step": 402470
    },
    {
      "epoch": 1.4105920575339872,
      "grad_norm": 2.640625,
      "learning_rate": 2.943357970461408e-05,
      "loss": 0.7924,
      "step": 402480
    },
    {
      "epoch": 1.410627105040883,
      "grad_norm": 2.96875,
      "learning_rate": 2.9432930675950377e-05,
      "loss": 0.8071,
      "step": 402490
    },
    {
      "epoch": 1.4106621525477785,
      "grad_norm": 2.75,
      "learning_rate": 2.9432281647286675e-05,
      "loss": 0.852,
      "step": 402500
    },
    {
      "epoch": 1.410697200054674,
      "grad_norm": 3.078125,
      "learning_rate": 2.9431632618622973e-05,
      "loss": 0.8797,
      "step": 402510
    },
    {
      "epoch": 1.4107322475615698,
      "grad_norm": 3.265625,
      "learning_rate": 2.9430983589959264e-05,
      "loss": 0.805,
      "step": 402520
    },
    {
      "epoch": 1.4107672950684653,
      "grad_norm": 2.359375,
      "learning_rate": 2.9430334561295562e-05,
      "loss": 0.9085,
      "step": 402530
    },
    {
      "epoch": 1.4108023425753609,
      "grad_norm": 2.515625,
      "learning_rate": 2.9429685532631864e-05,
      "loss": 0.8559,
      "step": 402540
    },
    {
      "epoch": 1.4108373900822566,
      "grad_norm": 2.8125,
      "learning_rate": 2.9429036503968162e-05,
      "loss": 0.963,
      "step": 402550
    },
    {
      "epoch": 1.4108724375891522,
      "grad_norm": 3.203125,
      "learning_rate": 2.942838747530446e-05,
      "loss": 0.9423,
      "step": 402560
    },
    {
      "epoch": 1.4109074850960477,
      "grad_norm": 2.890625,
      "learning_rate": 2.9427738446640758e-05,
      "loss": 0.916,
      "step": 402570
    },
    {
      "epoch": 1.4109425326029432,
      "grad_norm": 3.4375,
      "learning_rate": 2.9427089417977056e-05,
      "loss": 0.9073,
      "step": 402580
    },
    {
      "epoch": 1.4109775801098388,
      "grad_norm": 3.125,
      "learning_rate": 2.9426440389313354e-05,
      "loss": 0.8725,
      "step": 402590
    },
    {
      "epoch": 1.4110126276167345,
      "grad_norm": 2.796875,
      "learning_rate": 2.9425791360649652e-05,
      "loss": 0.8727,
      "step": 402600
    },
    {
      "epoch": 1.41104767512363,
      "grad_norm": 2.5625,
      "learning_rate": 2.942514233198595e-05,
      "loss": 0.8013,
      "step": 402610
    },
    {
      "epoch": 1.4110827226305256,
      "grad_norm": 2.40625,
      "learning_rate": 2.9424493303322248e-05,
      "loss": 0.7767,
      "step": 402620
    },
    {
      "epoch": 1.4111177701374213,
      "grad_norm": 2.984375,
      "learning_rate": 2.9423844274658546e-05,
      "loss": 0.8175,
      "step": 402630
    },
    {
      "epoch": 1.4111528176443169,
      "grad_norm": 3.171875,
      "learning_rate": 2.9423195245994844e-05,
      "loss": 0.7929,
      "step": 402640
    },
    {
      "epoch": 1.4111878651512124,
      "grad_norm": 2.625,
      "learning_rate": 2.9422546217331142e-05,
      "loss": 0.7313,
      "step": 402650
    },
    {
      "epoch": 1.4112229126581082,
      "grad_norm": 2.71875,
      "learning_rate": 2.942189718866744e-05,
      "loss": 0.8106,
      "step": 402660
    },
    {
      "epoch": 1.4112579601650037,
      "grad_norm": 2.90625,
      "learning_rate": 2.9421248160003738e-05,
      "loss": 0.8434,
      "step": 402670
    },
    {
      "epoch": 1.4112930076718992,
      "grad_norm": 2.96875,
      "learning_rate": 2.942059913134004e-05,
      "loss": 0.901,
      "step": 402680
    },
    {
      "epoch": 1.4113280551787948,
      "grad_norm": 3.1875,
      "learning_rate": 2.9419950102676337e-05,
      "loss": 0.8462,
      "step": 402690
    },
    {
      "epoch": 1.4113631026856903,
      "grad_norm": 2.703125,
      "learning_rate": 2.9419301074012635e-05,
      "loss": 0.8312,
      "step": 402700
    },
    {
      "epoch": 1.411398150192586,
      "grad_norm": 2.890625,
      "learning_rate": 2.9418652045348933e-05,
      "loss": 0.8582,
      "step": 402710
    },
    {
      "epoch": 1.4114331976994816,
      "grad_norm": 2.90625,
      "learning_rate": 2.941800301668523e-05,
      "loss": 0.8134,
      "step": 402720
    },
    {
      "epoch": 1.4114682452063771,
      "grad_norm": 2.96875,
      "learning_rate": 2.941735398802153e-05,
      "loss": 0.8493,
      "step": 402730
    },
    {
      "epoch": 1.411503292713273,
      "grad_norm": 3.1875,
      "learning_rate": 2.9416704959357827e-05,
      "loss": 0.8609,
      "step": 402740
    },
    {
      "epoch": 1.4115383402201684,
      "grad_norm": 2.8125,
      "learning_rate": 2.9416055930694125e-05,
      "loss": 0.9374,
      "step": 402750
    },
    {
      "epoch": 1.411573387727064,
      "grad_norm": 2.453125,
      "learning_rate": 2.9415406902030423e-05,
      "loss": 0.8453,
      "step": 402760
    },
    {
      "epoch": 1.4116084352339597,
      "grad_norm": 3.28125,
      "learning_rate": 2.941475787336672e-05,
      "loss": 0.8824,
      "step": 402770
    },
    {
      "epoch": 1.4116434827408553,
      "grad_norm": 2.53125,
      "learning_rate": 2.941410884470302e-05,
      "loss": 0.8228,
      "step": 402780
    },
    {
      "epoch": 1.4116785302477508,
      "grad_norm": 3.140625,
      "learning_rate": 2.9413459816039317e-05,
      "loss": 0.85,
      "step": 402790
    },
    {
      "epoch": 1.4117135777546463,
      "grad_norm": 2.734375,
      "learning_rate": 2.9412810787375615e-05,
      "loss": 0.7994,
      "step": 402800
    },
    {
      "epoch": 1.411748625261542,
      "grad_norm": 3.0,
      "learning_rate": 2.9412161758711913e-05,
      "loss": 0.8605,
      "step": 402810
    },
    {
      "epoch": 1.4117836727684376,
      "grad_norm": 2.296875,
      "learning_rate": 2.9411512730048215e-05,
      "loss": 0.7784,
      "step": 402820
    },
    {
      "epoch": 1.4118187202753332,
      "grad_norm": 2.796875,
      "learning_rate": 2.9410863701384513e-05,
      "loss": 0.8444,
      "step": 402830
    },
    {
      "epoch": 1.4118537677822287,
      "grad_norm": 2.4375,
      "learning_rate": 2.941021467272081e-05,
      "loss": 0.8152,
      "step": 402840
    },
    {
      "epoch": 1.4118888152891245,
      "grad_norm": 3.484375,
      "learning_rate": 2.940956564405711e-05,
      "loss": 0.8623,
      "step": 402850
    },
    {
      "epoch": 1.41192386279602,
      "grad_norm": 2.90625,
      "learning_rate": 2.9408916615393407e-05,
      "loss": 0.8232,
      "step": 402860
    },
    {
      "epoch": 1.4119589103029155,
      "grad_norm": 2.921875,
      "learning_rate": 2.9408267586729705e-05,
      "loss": 0.8038,
      "step": 402870
    },
    {
      "epoch": 1.4119939578098113,
      "grad_norm": 3.015625,
      "learning_rate": 2.9407618558066003e-05,
      "loss": 0.8352,
      "step": 402880
    },
    {
      "epoch": 1.4120290053167068,
      "grad_norm": 3.078125,
      "learning_rate": 2.94069695294023e-05,
      "loss": 0.7989,
      "step": 402890
    },
    {
      "epoch": 1.4120640528236024,
      "grad_norm": 2.640625,
      "learning_rate": 2.9406320500738592e-05,
      "loss": 0.8813,
      "step": 402900
    },
    {
      "epoch": 1.4120991003304981,
      "grad_norm": 3.34375,
      "learning_rate": 2.9405671472074893e-05,
      "loss": 0.8562,
      "step": 402910
    },
    {
      "epoch": 1.4121341478373937,
      "grad_norm": 3.625,
      "learning_rate": 2.940502244341119e-05,
      "loss": 0.7669,
      "step": 402920
    },
    {
      "epoch": 1.4121691953442892,
      "grad_norm": 2.984375,
      "learning_rate": 2.940437341474749e-05,
      "loss": 0.8147,
      "step": 402930
    },
    {
      "epoch": 1.4122042428511847,
      "grad_norm": 2.765625,
      "learning_rate": 2.9403724386083787e-05,
      "loss": 0.8804,
      "step": 402940
    },
    {
      "epoch": 1.4122392903580803,
      "grad_norm": 2.90625,
      "learning_rate": 2.9403075357420085e-05,
      "loss": 0.8602,
      "step": 402950
    },
    {
      "epoch": 1.412274337864976,
      "grad_norm": 2.625,
      "learning_rate": 2.9402426328756383e-05,
      "loss": 0.8296,
      "step": 402960
    },
    {
      "epoch": 1.4123093853718716,
      "grad_norm": 3.125,
      "learning_rate": 2.940177730009268e-05,
      "loss": 0.8481,
      "step": 402970
    },
    {
      "epoch": 1.412344432878767,
      "grad_norm": 2.90625,
      "learning_rate": 2.940112827142898e-05,
      "loss": 0.8734,
      "step": 402980
    },
    {
      "epoch": 1.4123794803856629,
      "grad_norm": 2.75,
      "learning_rate": 2.9400479242765277e-05,
      "loss": 0.8434,
      "step": 402990
    },
    {
      "epoch": 1.4124145278925584,
      "grad_norm": 2.890625,
      "learning_rate": 2.9399830214101575e-05,
      "loss": 0.8466,
      "step": 403000
    },
    {
      "epoch": 1.412449575399454,
      "grad_norm": 3.078125,
      "learning_rate": 2.9399181185437873e-05,
      "loss": 0.8594,
      "step": 403010
    },
    {
      "epoch": 1.4124846229063497,
      "grad_norm": 2.96875,
      "learning_rate": 2.939853215677417e-05,
      "loss": 0.9001,
      "step": 403020
    },
    {
      "epoch": 1.4125196704132452,
      "grad_norm": 2.9375,
      "learning_rate": 2.939788312811047e-05,
      "loss": 0.8481,
      "step": 403030
    },
    {
      "epoch": 1.4125547179201408,
      "grad_norm": 2.921875,
      "learning_rate": 2.9397234099446767e-05,
      "loss": 0.8274,
      "step": 403040
    },
    {
      "epoch": 1.4125897654270363,
      "grad_norm": 2.859375,
      "learning_rate": 2.939658507078307e-05,
      "loss": 0.8486,
      "step": 403050
    },
    {
      "epoch": 1.4126248129339318,
      "grad_norm": 3.203125,
      "learning_rate": 2.9395936042119367e-05,
      "loss": 0.7564,
      "step": 403060
    },
    {
      "epoch": 1.4126598604408276,
      "grad_norm": 3.078125,
      "learning_rate": 2.9395287013455665e-05,
      "loss": 0.8549,
      "step": 403070
    },
    {
      "epoch": 1.4126949079477231,
      "grad_norm": 3.546875,
      "learning_rate": 2.9394637984791963e-05,
      "loss": 0.8781,
      "step": 403080
    },
    {
      "epoch": 1.4127299554546187,
      "grad_norm": 2.734375,
      "learning_rate": 2.939398895612826e-05,
      "loss": 0.7962,
      "step": 403090
    },
    {
      "epoch": 1.4127650029615144,
      "grad_norm": 3.109375,
      "learning_rate": 2.939333992746456e-05,
      "loss": 0.9234,
      "step": 403100
    },
    {
      "epoch": 1.41280005046841,
      "grad_norm": 2.828125,
      "learning_rate": 2.9392690898800857e-05,
      "loss": 0.8829,
      "step": 403110
    },
    {
      "epoch": 1.4128350979753055,
      "grad_norm": 2.59375,
      "learning_rate": 2.9392041870137155e-05,
      "loss": 0.8255,
      "step": 403120
    },
    {
      "epoch": 1.4128701454822012,
      "grad_norm": 2.9375,
      "learning_rate": 2.9391392841473453e-05,
      "loss": 0.85,
      "step": 403130
    },
    {
      "epoch": 1.4129051929890968,
      "grad_norm": 2.90625,
      "learning_rate": 2.939074381280975e-05,
      "loss": 0.8037,
      "step": 403140
    },
    {
      "epoch": 1.4129402404959923,
      "grad_norm": 3.078125,
      "learning_rate": 2.939009478414605e-05,
      "loss": 0.8889,
      "step": 403150
    },
    {
      "epoch": 1.4129752880028879,
      "grad_norm": 3.171875,
      "learning_rate": 2.9389445755482347e-05,
      "loss": 0.8473,
      "step": 403160
    },
    {
      "epoch": 1.4130103355097834,
      "grad_norm": 2.875,
      "learning_rate": 2.9388796726818645e-05,
      "loss": 0.8743,
      "step": 403170
    },
    {
      "epoch": 1.4130453830166791,
      "grad_norm": 3.3125,
      "learning_rate": 2.9388147698154943e-05,
      "loss": 0.8285,
      "step": 403180
    },
    {
      "epoch": 1.4130804305235747,
      "grad_norm": 3.0,
      "learning_rate": 2.9387498669491244e-05,
      "loss": 0.9009,
      "step": 403190
    },
    {
      "epoch": 1.4131154780304702,
      "grad_norm": 2.625,
      "learning_rate": 2.9386849640827542e-05,
      "loss": 0.7951,
      "step": 403200
    },
    {
      "epoch": 1.413150525537366,
      "grad_norm": 2.890625,
      "learning_rate": 2.938620061216384e-05,
      "loss": 0.8796,
      "step": 403210
    },
    {
      "epoch": 1.4131855730442615,
      "grad_norm": 3.09375,
      "learning_rate": 2.9385551583500138e-05,
      "loss": 0.908,
      "step": 403220
    },
    {
      "epoch": 1.413220620551157,
      "grad_norm": 2.859375,
      "learning_rate": 2.9384902554836436e-05,
      "loss": 0.8491,
      "step": 403230
    },
    {
      "epoch": 1.4132556680580528,
      "grad_norm": 3.359375,
      "learning_rate": 2.9384253526172734e-05,
      "loss": 0.8801,
      "step": 403240
    },
    {
      "epoch": 1.4132907155649483,
      "grad_norm": 3.078125,
      "learning_rate": 2.9383604497509032e-05,
      "loss": 0.847,
      "step": 403250
    },
    {
      "epoch": 1.4133257630718439,
      "grad_norm": 2.875,
      "learning_rate": 2.938295546884533e-05,
      "loss": 0.9256,
      "step": 403260
    },
    {
      "epoch": 1.4133608105787394,
      "grad_norm": 2.765625,
      "learning_rate": 2.938230644018162e-05,
      "loss": 0.714,
      "step": 403270
    },
    {
      "epoch": 1.413395858085635,
      "grad_norm": 3.046875,
      "learning_rate": 2.9381657411517923e-05,
      "loss": 0.8227,
      "step": 403280
    },
    {
      "epoch": 1.4134309055925307,
      "grad_norm": 2.921875,
      "learning_rate": 2.938100838285422e-05,
      "loss": 0.8652,
      "step": 403290
    },
    {
      "epoch": 1.4134659530994262,
      "grad_norm": 2.515625,
      "learning_rate": 2.938035935419052e-05,
      "loss": 0.7909,
      "step": 403300
    },
    {
      "epoch": 1.4135010006063218,
      "grad_norm": 2.734375,
      "learning_rate": 2.9379710325526817e-05,
      "loss": 0.8093,
      "step": 403310
    },
    {
      "epoch": 1.4135360481132175,
      "grad_norm": 3.6875,
      "learning_rate": 2.9379061296863115e-05,
      "loss": 0.8263,
      "step": 403320
    },
    {
      "epoch": 1.413571095620113,
      "grad_norm": 3.25,
      "learning_rate": 2.9378412268199413e-05,
      "loss": 0.9096,
      "step": 403330
    },
    {
      "epoch": 1.4136061431270086,
      "grad_norm": 2.765625,
      "learning_rate": 2.937776323953571e-05,
      "loss": 0.7858,
      "step": 403340
    },
    {
      "epoch": 1.4136411906339044,
      "grad_norm": 2.859375,
      "learning_rate": 2.937711421087201e-05,
      "loss": 0.8372,
      "step": 403350
    },
    {
      "epoch": 1.4136762381408,
      "grad_norm": 2.796875,
      "learning_rate": 2.9376465182208307e-05,
      "loss": 0.8614,
      "step": 403360
    },
    {
      "epoch": 1.4137112856476954,
      "grad_norm": 2.421875,
      "learning_rate": 2.9375816153544605e-05,
      "loss": 0.8968,
      "step": 403370
    },
    {
      "epoch": 1.413746333154591,
      "grad_norm": 3.0625,
      "learning_rate": 2.9375167124880903e-05,
      "loss": 0.8996,
      "step": 403380
    },
    {
      "epoch": 1.4137813806614865,
      "grad_norm": 2.265625,
      "learning_rate": 2.93745180962172e-05,
      "loss": 0.7651,
      "step": 403390
    },
    {
      "epoch": 1.4138164281683823,
      "grad_norm": 3.203125,
      "learning_rate": 2.93738690675535e-05,
      "loss": 0.8219,
      "step": 403400
    },
    {
      "epoch": 1.4138514756752778,
      "grad_norm": 3.125,
      "learning_rate": 2.9373220038889797e-05,
      "loss": 0.8881,
      "step": 403410
    },
    {
      "epoch": 1.4138865231821733,
      "grad_norm": 2.65625,
      "learning_rate": 2.9372571010226098e-05,
      "loss": 0.864,
      "step": 403420
    },
    {
      "epoch": 1.413921570689069,
      "grad_norm": 3.3125,
      "learning_rate": 2.9371921981562396e-05,
      "loss": 0.7946,
      "step": 403430
    },
    {
      "epoch": 1.4139566181959646,
      "grad_norm": 2.875,
      "learning_rate": 2.9371272952898694e-05,
      "loss": 0.787,
      "step": 403440
    },
    {
      "epoch": 1.4139916657028602,
      "grad_norm": 2.9375,
      "learning_rate": 2.9370623924234992e-05,
      "loss": 0.7735,
      "step": 403450
    },
    {
      "epoch": 1.414026713209756,
      "grad_norm": 3.171875,
      "learning_rate": 2.936997489557129e-05,
      "loss": 0.8817,
      "step": 403460
    },
    {
      "epoch": 1.4140617607166515,
      "grad_norm": 2.8125,
      "learning_rate": 2.9369325866907588e-05,
      "loss": 0.8681,
      "step": 403470
    },
    {
      "epoch": 1.414096808223547,
      "grad_norm": 2.5,
      "learning_rate": 2.9368676838243886e-05,
      "loss": 0.7609,
      "step": 403480
    },
    {
      "epoch": 1.4141318557304425,
      "grad_norm": 2.5625,
      "learning_rate": 2.9368027809580184e-05,
      "loss": 0.8428,
      "step": 403490
    },
    {
      "epoch": 1.4141669032373383,
      "grad_norm": 2.921875,
      "learning_rate": 2.9367378780916482e-05,
      "loss": 0.8799,
      "step": 403500
    },
    {
      "epoch": 1.4142019507442338,
      "grad_norm": 2.59375,
      "learning_rate": 2.936672975225278e-05,
      "loss": 0.7996,
      "step": 403510
    },
    {
      "epoch": 1.4142369982511294,
      "grad_norm": 2.75,
      "learning_rate": 2.9366080723589078e-05,
      "loss": 0.8741,
      "step": 403520
    },
    {
      "epoch": 1.414272045758025,
      "grad_norm": 2.734375,
      "learning_rate": 2.9365431694925376e-05,
      "loss": 0.9021,
      "step": 403530
    },
    {
      "epoch": 1.4143070932649207,
      "grad_norm": 2.90625,
      "learning_rate": 2.9364782666261674e-05,
      "loss": 0.8272,
      "step": 403540
    },
    {
      "epoch": 1.4143421407718162,
      "grad_norm": 3.0,
      "learning_rate": 2.9364133637597975e-05,
      "loss": 0.7777,
      "step": 403550
    },
    {
      "epoch": 1.4143771882787117,
      "grad_norm": 2.671875,
      "learning_rate": 2.9363484608934273e-05,
      "loss": 0.7651,
      "step": 403560
    },
    {
      "epoch": 1.4144122357856075,
      "grad_norm": 3.015625,
      "learning_rate": 2.936283558027057e-05,
      "loss": 0.8659,
      "step": 403570
    },
    {
      "epoch": 1.414447283292503,
      "grad_norm": 2.8125,
      "learning_rate": 2.936218655160687e-05,
      "loss": 0.8403,
      "step": 403580
    },
    {
      "epoch": 1.4144823307993986,
      "grad_norm": 2.8125,
      "learning_rate": 2.9361537522943167e-05,
      "loss": 0.8277,
      "step": 403590
    },
    {
      "epoch": 1.4145173783062943,
      "grad_norm": 2.625,
      "learning_rate": 2.9360888494279465e-05,
      "loss": 0.868,
      "step": 403600
    },
    {
      "epoch": 1.4145524258131899,
      "grad_norm": 3.125,
      "learning_rate": 2.9360239465615763e-05,
      "loss": 0.8365,
      "step": 403610
    },
    {
      "epoch": 1.4145874733200854,
      "grad_norm": 2.640625,
      "learning_rate": 2.935959043695206e-05,
      "loss": 0.8016,
      "step": 403620
    },
    {
      "epoch": 1.414622520826981,
      "grad_norm": 3.140625,
      "learning_rate": 2.935894140828836e-05,
      "loss": 0.8304,
      "step": 403630
    },
    {
      "epoch": 1.4146575683338765,
      "grad_norm": 2.796875,
      "learning_rate": 2.9358292379624657e-05,
      "loss": 0.8263,
      "step": 403640
    },
    {
      "epoch": 1.4146926158407722,
      "grad_norm": 2.9375,
      "learning_rate": 2.9357643350960952e-05,
      "loss": 0.7964,
      "step": 403650
    },
    {
      "epoch": 1.4147276633476678,
      "grad_norm": 2.90625,
      "learning_rate": 2.935699432229725e-05,
      "loss": 0.8004,
      "step": 403660
    },
    {
      "epoch": 1.4147627108545633,
      "grad_norm": 3.375,
      "learning_rate": 2.9356345293633548e-05,
      "loss": 0.8254,
      "step": 403670
    },
    {
      "epoch": 1.414797758361459,
      "grad_norm": 3.484375,
      "learning_rate": 2.9355696264969846e-05,
      "loss": 0.8443,
      "step": 403680
    },
    {
      "epoch": 1.4148328058683546,
      "grad_norm": 3.078125,
      "learning_rate": 2.9355047236306144e-05,
      "loss": 0.9134,
      "step": 403690
    },
    {
      "epoch": 1.4148678533752501,
      "grad_norm": 2.890625,
      "learning_rate": 2.9354398207642442e-05,
      "loss": 0.8908,
      "step": 403700
    },
    {
      "epoch": 1.4149029008821459,
      "grad_norm": 2.578125,
      "learning_rate": 2.935374917897874e-05,
      "loss": 0.8,
      "step": 403710
    },
    {
      "epoch": 1.4149379483890414,
      "grad_norm": 3.21875,
      "learning_rate": 2.9353100150315038e-05,
      "loss": 0.8247,
      "step": 403720
    },
    {
      "epoch": 1.414972995895937,
      "grad_norm": 3.390625,
      "learning_rate": 2.9352451121651336e-05,
      "loss": 0.7581,
      "step": 403730
    },
    {
      "epoch": 1.4150080434028325,
      "grad_norm": 3.34375,
      "learning_rate": 2.9351802092987634e-05,
      "loss": 0.8804,
      "step": 403740
    },
    {
      "epoch": 1.415043090909728,
      "grad_norm": 3.0,
      "learning_rate": 2.9351153064323932e-05,
      "loss": 0.8322,
      "step": 403750
    },
    {
      "epoch": 1.4150781384166238,
      "grad_norm": 2.640625,
      "learning_rate": 2.935050403566023e-05,
      "loss": 0.7851,
      "step": 403760
    },
    {
      "epoch": 1.4151131859235193,
      "grad_norm": 2.875,
      "learning_rate": 2.9349855006996528e-05,
      "loss": 0.7994,
      "step": 403770
    },
    {
      "epoch": 1.4151482334304148,
      "grad_norm": 3.0625,
      "learning_rate": 2.934920597833283e-05,
      "loss": 0.8027,
      "step": 403780
    },
    {
      "epoch": 1.4151832809373106,
      "grad_norm": 3.421875,
      "learning_rate": 2.9348556949669127e-05,
      "loss": 0.9195,
      "step": 403790
    },
    {
      "epoch": 1.4152183284442061,
      "grad_norm": 2.796875,
      "learning_rate": 2.9347907921005425e-05,
      "loss": 0.8498,
      "step": 403800
    },
    {
      "epoch": 1.4152533759511017,
      "grad_norm": 3.125,
      "learning_rate": 2.9347258892341723e-05,
      "loss": 0.8759,
      "step": 403810
    },
    {
      "epoch": 1.4152884234579974,
      "grad_norm": 2.59375,
      "learning_rate": 2.934660986367802e-05,
      "loss": 0.8591,
      "step": 403820
    },
    {
      "epoch": 1.415323470964893,
      "grad_norm": 3.0625,
      "learning_rate": 2.934596083501432e-05,
      "loss": 0.8409,
      "step": 403830
    },
    {
      "epoch": 1.4153585184717885,
      "grad_norm": 3.109375,
      "learning_rate": 2.9345311806350617e-05,
      "loss": 0.8617,
      "step": 403840
    },
    {
      "epoch": 1.415393565978684,
      "grad_norm": 3.09375,
      "learning_rate": 2.9344662777686915e-05,
      "loss": 0.8406,
      "step": 403850
    },
    {
      "epoch": 1.4154286134855796,
      "grad_norm": 3.078125,
      "learning_rate": 2.9344013749023213e-05,
      "loss": 0.9068,
      "step": 403860
    },
    {
      "epoch": 1.4154636609924753,
      "grad_norm": 2.75,
      "learning_rate": 2.934336472035951e-05,
      "loss": 0.8867,
      "step": 403870
    },
    {
      "epoch": 1.4154987084993709,
      "grad_norm": 3.078125,
      "learning_rate": 2.934271569169581e-05,
      "loss": 0.8842,
      "step": 403880
    },
    {
      "epoch": 1.4155337560062664,
      "grad_norm": 2.96875,
      "learning_rate": 2.9342066663032107e-05,
      "loss": 0.8134,
      "step": 403890
    },
    {
      "epoch": 1.4155688035131622,
      "grad_norm": 2.796875,
      "learning_rate": 2.9341417634368405e-05,
      "loss": 0.8431,
      "step": 403900
    },
    {
      "epoch": 1.4156038510200577,
      "grad_norm": 2.734375,
      "learning_rate": 2.9340768605704703e-05,
      "loss": 0.7974,
      "step": 403910
    },
    {
      "epoch": 1.4156388985269532,
      "grad_norm": 3.0,
      "learning_rate": 2.9340119577041005e-05,
      "loss": 0.8506,
      "step": 403920
    },
    {
      "epoch": 1.415673946033849,
      "grad_norm": 3.078125,
      "learning_rate": 2.9339470548377303e-05,
      "loss": 0.9014,
      "step": 403930
    },
    {
      "epoch": 1.4157089935407445,
      "grad_norm": 2.71875,
      "learning_rate": 2.93388215197136e-05,
      "loss": 0.917,
      "step": 403940
    },
    {
      "epoch": 1.41574404104764,
      "grad_norm": 3.109375,
      "learning_rate": 2.93381724910499e-05,
      "loss": 0.7894,
      "step": 403950
    },
    {
      "epoch": 1.4157790885545356,
      "grad_norm": 3.078125,
      "learning_rate": 2.9337523462386197e-05,
      "loss": 0.8372,
      "step": 403960
    },
    {
      "epoch": 1.4158141360614311,
      "grad_norm": 3.046875,
      "learning_rate": 2.9336874433722495e-05,
      "loss": 0.9406,
      "step": 403970
    },
    {
      "epoch": 1.415849183568327,
      "grad_norm": 2.625,
      "learning_rate": 2.9336225405058793e-05,
      "loss": 0.8295,
      "step": 403980
    },
    {
      "epoch": 1.4158842310752224,
      "grad_norm": 2.734375,
      "learning_rate": 2.933557637639509e-05,
      "loss": 0.8287,
      "step": 403990
    },
    {
      "epoch": 1.415919278582118,
      "grad_norm": 2.609375,
      "learning_rate": 2.933492734773139e-05,
      "loss": 0.8082,
      "step": 404000
    },
    {
      "epoch": 1.4159543260890137,
      "grad_norm": 2.515625,
      "learning_rate": 2.9334278319067687e-05,
      "loss": 0.8207,
      "step": 404010
    },
    {
      "epoch": 1.4159893735959093,
      "grad_norm": 2.9375,
      "learning_rate": 2.9333629290403985e-05,
      "loss": 0.8456,
      "step": 404020
    },
    {
      "epoch": 1.4160244211028048,
      "grad_norm": 2.21875,
      "learning_rate": 2.933298026174028e-05,
      "loss": 0.7772,
      "step": 404030
    },
    {
      "epoch": 1.4160594686097006,
      "grad_norm": 2.71875,
      "learning_rate": 2.9332331233076577e-05,
      "loss": 0.9033,
      "step": 404040
    },
    {
      "epoch": 1.416094516116596,
      "grad_norm": 2.859375,
      "learning_rate": 2.9331682204412875e-05,
      "loss": 0.8818,
      "step": 404050
    },
    {
      "epoch": 1.4161295636234916,
      "grad_norm": 2.921875,
      "learning_rate": 2.9331033175749173e-05,
      "loss": 0.8176,
      "step": 404060
    },
    {
      "epoch": 1.4161646111303872,
      "grad_norm": 3.015625,
      "learning_rate": 2.933038414708547e-05,
      "loss": 0.7993,
      "step": 404070
    },
    {
      "epoch": 1.4161996586372827,
      "grad_norm": 2.75,
      "learning_rate": 2.932973511842177e-05,
      "loss": 0.8222,
      "step": 404080
    },
    {
      "epoch": 1.4162347061441785,
      "grad_norm": 2.375,
      "learning_rate": 2.9329086089758067e-05,
      "loss": 0.8576,
      "step": 404090
    },
    {
      "epoch": 1.416269753651074,
      "grad_norm": 3.09375,
      "learning_rate": 2.9328437061094365e-05,
      "loss": 0.8301,
      "step": 404100
    },
    {
      "epoch": 1.4163048011579695,
      "grad_norm": 2.859375,
      "learning_rate": 2.9327788032430663e-05,
      "loss": 0.8272,
      "step": 404110
    },
    {
      "epoch": 1.4163398486648653,
      "grad_norm": 2.484375,
      "learning_rate": 2.932713900376696e-05,
      "loss": 0.8445,
      "step": 404120
    },
    {
      "epoch": 1.4163748961717608,
      "grad_norm": 2.625,
      "learning_rate": 2.932648997510326e-05,
      "loss": 0.8467,
      "step": 404130
    },
    {
      "epoch": 1.4164099436786564,
      "grad_norm": 2.765625,
      "learning_rate": 2.9325840946439557e-05,
      "loss": 0.8129,
      "step": 404140
    },
    {
      "epoch": 1.4164449911855521,
      "grad_norm": 3.421875,
      "learning_rate": 2.932519191777586e-05,
      "loss": 0.8746,
      "step": 404150
    },
    {
      "epoch": 1.4164800386924477,
      "grad_norm": 3.3125,
      "learning_rate": 2.9324542889112157e-05,
      "loss": 0.8361,
      "step": 404160
    },
    {
      "epoch": 1.4165150861993432,
      "grad_norm": 2.734375,
      "learning_rate": 2.9323893860448455e-05,
      "loss": 0.7109,
      "step": 404170
    },
    {
      "epoch": 1.416550133706239,
      "grad_norm": 3.234375,
      "learning_rate": 2.9323244831784753e-05,
      "loss": 0.8482,
      "step": 404180
    },
    {
      "epoch": 1.4165851812131345,
      "grad_norm": 2.9375,
      "learning_rate": 2.932259580312105e-05,
      "loss": 0.8387,
      "step": 404190
    },
    {
      "epoch": 1.41662022872003,
      "grad_norm": 2.984375,
      "learning_rate": 2.932194677445735e-05,
      "loss": 0.8644,
      "step": 404200
    },
    {
      "epoch": 1.4166552762269256,
      "grad_norm": 3.015625,
      "learning_rate": 2.9321297745793647e-05,
      "loss": 0.8607,
      "step": 404210
    },
    {
      "epoch": 1.416690323733821,
      "grad_norm": 2.8125,
      "learning_rate": 2.9320648717129945e-05,
      "loss": 0.8551,
      "step": 404220
    },
    {
      "epoch": 1.4167253712407168,
      "grad_norm": 3.03125,
      "learning_rate": 2.9319999688466243e-05,
      "loss": 0.8953,
      "step": 404230
    },
    {
      "epoch": 1.4167604187476124,
      "grad_norm": 2.515625,
      "learning_rate": 2.931935065980254e-05,
      "loss": 0.9211,
      "step": 404240
    },
    {
      "epoch": 1.416795466254508,
      "grad_norm": 3.28125,
      "learning_rate": 2.931870163113884e-05,
      "loss": 0.8929,
      "step": 404250
    },
    {
      "epoch": 1.4168305137614037,
      "grad_norm": 2.65625,
      "learning_rate": 2.9318052602475137e-05,
      "loss": 0.8716,
      "step": 404260
    },
    {
      "epoch": 1.4168655612682992,
      "grad_norm": 2.875,
      "learning_rate": 2.9317403573811435e-05,
      "loss": 0.877,
      "step": 404270
    },
    {
      "epoch": 1.4169006087751947,
      "grad_norm": 2.609375,
      "learning_rate": 2.9316754545147733e-05,
      "loss": 0.8436,
      "step": 404280
    },
    {
      "epoch": 1.4169356562820905,
      "grad_norm": 3.21875,
      "learning_rate": 2.9316105516484034e-05,
      "loss": 0.9922,
      "step": 404290
    },
    {
      "epoch": 1.416970703788986,
      "grad_norm": 3.40625,
      "learning_rate": 2.9315456487820332e-05,
      "loss": 0.8774,
      "step": 404300
    },
    {
      "epoch": 1.4170057512958816,
      "grad_norm": 3.203125,
      "learning_rate": 2.931480745915663e-05,
      "loss": 0.8365,
      "step": 404310
    },
    {
      "epoch": 1.4170407988027771,
      "grad_norm": 2.453125,
      "learning_rate": 2.9314158430492928e-05,
      "loss": 0.8356,
      "step": 404320
    },
    {
      "epoch": 1.4170758463096726,
      "grad_norm": 3.265625,
      "learning_rate": 2.9313509401829226e-05,
      "loss": 0.9525,
      "step": 404330
    },
    {
      "epoch": 1.4171108938165684,
      "grad_norm": 3.0,
      "learning_rate": 2.9312860373165524e-05,
      "loss": 0.8422,
      "step": 404340
    },
    {
      "epoch": 1.417145941323464,
      "grad_norm": 3.03125,
      "learning_rate": 2.9312211344501822e-05,
      "loss": 0.9069,
      "step": 404350
    },
    {
      "epoch": 1.4171809888303595,
      "grad_norm": 2.59375,
      "learning_rate": 2.931156231583812e-05,
      "loss": 0.7961,
      "step": 404360
    },
    {
      "epoch": 1.4172160363372552,
      "grad_norm": 3.0,
      "learning_rate": 2.9310913287174418e-05,
      "loss": 0.8899,
      "step": 404370
    },
    {
      "epoch": 1.4172510838441508,
      "grad_norm": 2.796875,
      "learning_rate": 2.9310264258510716e-05,
      "loss": 0.8938,
      "step": 404380
    },
    {
      "epoch": 1.4172861313510463,
      "grad_norm": 3.1875,
      "learning_rate": 2.9309615229847014e-05,
      "loss": 0.8749,
      "step": 404390
    },
    {
      "epoch": 1.417321178857942,
      "grad_norm": 2.890625,
      "learning_rate": 2.930896620118331e-05,
      "loss": 0.7979,
      "step": 404400
    },
    {
      "epoch": 1.4173562263648376,
      "grad_norm": 2.984375,
      "learning_rate": 2.9308317172519607e-05,
      "loss": 0.9181,
      "step": 404410
    },
    {
      "epoch": 1.4173912738717331,
      "grad_norm": 2.796875,
      "learning_rate": 2.9307668143855905e-05,
      "loss": 0.8344,
      "step": 404420
    },
    {
      "epoch": 1.4174263213786287,
      "grad_norm": 2.84375,
      "learning_rate": 2.9307019115192203e-05,
      "loss": 0.8904,
      "step": 404430
    },
    {
      "epoch": 1.4174613688855242,
      "grad_norm": 2.671875,
      "learning_rate": 2.93063700865285e-05,
      "loss": 0.8874,
      "step": 404440
    },
    {
      "epoch": 1.41749641639242,
      "grad_norm": 2.78125,
      "learning_rate": 2.93057210578648e-05,
      "loss": 0.7668,
      "step": 404450
    },
    {
      "epoch": 1.4175314638993155,
      "grad_norm": 2.90625,
      "learning_rate": 2.9305072029201097e-05,
      "loss": 0.8944,
      "step": 404460
    },
    {
      "epoch": 1.417566511406211,
      "grad_norm": 3.3125,
      "learning_rate": 2.9304423000537395e-05,
      "loss": 0.8441,
      "step": 404470
    },
    {
      "epoch": 1.4176015589131068,
      "grad_norm": 2.625,
      "learning_rate": 2.9303773971873693e-05,
      "loss": 0.8884,
      "step": 404480
    },
    {
      "epoch": 1.4176366064200023,
      "grad_norm": 2.859375,
      "learning_rate": 2.930312494320999e-05,
      "loss": 0.7725,
      "step": 404490
    },
    {
      "epoch": 1.4176716539268979,
      "grad_norm": 3.03125,
      "learning_rate": 2.930247591454629e-05,
      "loss": 0.8175,
      "step": 404500
    },
    {
      "epoch": 1.4177067014337936,
      "grad_norm": 2.953125,
      "learning_rate": 2.9301826885882587e-05,
      "loss": 0.8513,
      "step": 404510
    },
    {
      "epoch": 1.4177417489406892,
      "grad_norm": 2.5625,
      "learning_rate": 2.9301177857218888e-05,
      "loss": 0.8152,
      "step": 404520
    },
    {
      "epoch": 1.4177767964475847,
      "grad_norm": 3.1875,
      "learning_rate": 2.9300528828555186e-05,
      "loss": 0.8745,
      "step": 404530
    },
    {
      "epoch": 1.4178118439544802,
      "grad_norm": 2.875,
      "learning_rate": 2.9299879799891484e-05,
      "loss": 0.8159,
      "step": 404540
    },
    {
      "epoch": 1.4178468914613758,
      "grad_norm": 3.28125,
      "learning_rate": 2.9299230771227782e-05,
      "loss": 0.8283,
      "step": 404550
    },
    {
      "epoch": 1.4178819389682715,
      "grad_norm": 3.421875,
      "learning_rate": 2.929858174256408e-05,
      "loss": 0.8064,
      "step": 404560
    },
    {
      "epoch": 1.417916986475167,
      "grad_norm": 2.953125,
      "learning_rate": 2.9297932713900378e-05,
      "loss": 0.8194,
      "step": 404570
    },
    {
      "epoch": 1.4179520339820626,
      "grad_norm": 3.078125,
      "learning_rate": 2.9297283685236676e-05,
      "loss": 0.8288,
      "step": 404580
    },
    {
      "epoch": 1.4179870814889584,
      "grad_norm": 3.203125,
      "learning_rate": 2.9296634656572974e-05,
      "loss": 0.9011,
      "step": 404590
    },
    {
      "epoch": 1.418022128995854,
      "grad_norm": 2.90625,
      "learning_rate": 2.9295985627909272e-05,
      "loss": 0.8462,
      "step": 404600
    },
    {
      "epoch": 1.4180571765027494,
      "grad_norm": 2.6875,
      "learning_rate": 2.929533659924557e-05,
      "loss": 0.8425,
      "step": 404610
    },
    {
      "epoch": 1.4180922240096452,
      "grad_norm": 3.0625,
      "learning_rate": 2.9294687570581868e-05,
      "loss": 0.937,
      "step": 404620
    },
    {
      "epoch": 1.4181272715165407,
      "grad_norm": 2.75,
      "learning_rate": 2.9294038541918166e-05,
      "loss": 0.793,
      "step": 404630
    },
    {
      "epoch": 1.4181623190234363,
      "grad_norm": 2.953125,
      "learning_rate": 2.9293389513254464e-05,
      "loss": 0.8511,
      "step": 404640
    },
    {
      "epoch": 1.4181973665303318,
      "grad_norm": 2.828125,
      "learning_rate": 2.9292740484590765e-05,
      "loss": 0.8642,
      "step": 404650
    },
    {
      "epoch": 1.4182324140372273,
      "grad_norm": 2.90625,
      "learning_rate": 2.9292091455927063e-05,
      "loss": 0.8231,
      "step": 404660
    },
    {
      "epoch": 1.418267461544123,
      "grad_norm": 2.578125,
      "learning_rate": 2.929144242726336e-05,
      "loss": 0.7743,
      "step": 404670
    },
    {
      "epoch": 1.4183025090510186,
      "grad_norm": 3.34375,
      "learning_rate": 2.929079339859966e-05,
      "loss": 0.8973,
      "step": 404680
    },
    {
      "epoch": 1.4183375565579142,
      "grad_norm": 2.703125,
      "learning_rate": 2.9290144369935957e-05,
      "loss": 0.8765,
      "step": 404690
    },
    {
      "epoch": 1.41837260406481,
      "grad_norm": 3.171875,
      "learning_rate": 2.9289495341272255e-05,
      "loss": 0.9179,
      "step": 404700
    },
    {
      "epoch": 1.4184076515717055,
      "grad_norm": 3.015625,
      "learning_rate": 2.9288846312608553e-05,
      "loss": 0.8564,
      "step": 404710
    },
    {
      "epoch": 1.418442699078601,
      "grad_norm": 2.890625,
      "learning_rate": 2.928819728394485e-05,
      "loss": 0.8523,
      "step": 404720
    },
    {
      "epoch": 1.4184777465854967,
      "grad_norm": 3.09375,
      "learning_rate": 2.928754825528115e-05,
      "loss": 0.7442,
      "step": 404730
    },
    {
      "epoch": 1.4185127940923923,
      "grad_norm": 2.9375,
      "learning_rate": 2.9286899226617447e-05,
      "loss": 0.8326,
      "step": 404740
    },
    {
      "epoch": 1.4185478415992878,
      "grad_norm": 2.8125,
      "learning_rate": 2.9286250197953745e-05,
      "loss": 0.9297,
      "step": 404750
    },
    {
      "epoch": 1.4185828891061834,
      "grad_norm": 2.765625,
      "learning_rate": 2.9285601169290043e-05,
      "loss": 0.8595,
      "step": 404760
    },
    {
      "epoch": 1.4186179366130791,
      "grad_norm": 3.046875,
      "learning_rate": 2.928495214062634e-05,
      "loss": 0.8421,
      "step": 404770
    },
    {
      "epoch": 1.4186529841199746,
      "grad_norm": 2.8125,
      "learning_rate": 2.9284303111962636e-05,
      "loss": 0.8059,
      "step": 404780
    },
    {
      "epoch": 1.4186880316268702,
      "grad_norm": 2.859375,
      "learning_rate": 2.9283654083298934e-05,
      "loss": 0.8476,
      "step": 404790
    },
    {
      "epoch": 1.4187230791337657,
      "grad_norm": 2.78125,
      "learning_rate": 2.9283005054635232e-05,
      "loss": 0.8687,
      "step": 404800
    },
    {
      "epoch": 1.4187581266406615,
      "grad_norm": 2.546875,
      "learning_rate": 2.928235602597153e-05,
      "loss": 0.8062,
      "step": 404810
    },
    {
      "epoch": 1.418793174147557,
      "grad_norm": 2.78125,
      "learning_rate": 2.9281706997307828e-05,
      "loss": 0.8447,
      "step": 404820
    },
    {
      "epoch": 1.4188282216544525,
      "grad_norm": 2.96875,
      "learning_rate": 2.9281057968644126e-05,
      "loss": 0.8178,
      "step": 404830
    },
    {
      "epoch": 1.4188632691613483,
      "grad_norm": 2.9375,
      "learning_rate": 2.9280408939980424e-05,
      "loss": 0.8891,
      "step": 404840
    },
    {
      "epoch": 1.4188983166682438,
      "grad_norm": 2.59375,
      "learning_rate": 2.9279759911316722e-05,
      "loss": 0.8565,
      "step": 404850
    },
    {
      "epoch": 1.4189333641751394,
      "grad_norm": 3.0625,
      "learning_rate": 2.927911088265302e-05,
      "loss": 0.8949,
      "step": 404860
    },
    {
      "epoch": 1.4189684116820351,
      "grad_norm": 3.046875,
      "learning_rate": 2.9278461853989318e-05,
      "loss": 0.9218,
      "step": 404870
    },
    {
      "epoch": 1.4190034591889307,
      "grad_norm": 2.671875,
      "learning_rate": 2.927781282532562e-05,
      "loss": 0.8328,
      "step": 404880
    },
    {
      "epoch": 1.4190385066958262,
      "grad_norm": 2.5625,
      "learning_rate": 2.9277163796661917e-05,
      "loss": 0.9161,
      "step": 404890
    },
    {
      "epoch": 1.4190735542027217,
      "grad_norm": 2.484375,
      "learning_rate": 2.9276514767998215e-05,
      "loss": 0.8576,
      "step": 404900
    },
    {
      "epoch": 1.4191086017096173,
      "grad_norm": 3.0,
      "learning_rate": 2.9275865739334513e-05,
      "loss": 0.7561,
      "step": 404910
    },
    {
      "epoch": 1.419143649216513,
      "grad_norm": 2.984375,
      "learning_rate": 2.927521671067081e-05,
      "loss": 0.8562,
      "step": 404920
    },
    {
      "epoch": 1.4191786967234086,
      "grad_norm": 2.984375,
      "learning_rate": 2.927456768200711e-05,
      "loss": 0.901,
      "step": 404930
    },
    {
      "epoch": 1.419213744230304,
      "grad_norm": 2.859375,
      "learning_rate": 2.9273918653343407e-05,
      "loss": 0.7987,
      "step": 404940
    },
    {
      "epoch": 1.4192487917371999,
      "grad_norm": 2.71875,
      "learning_rate": 2.9273269624679705e-05,
      "loss": 0.8193,
      "step": 404950
    },
    {
      "epoch": 1.4192838392440954,
      "grad_norm": 2.796875,
      "learning_rate": 2.9272620596016003e-05,
      "loss": 0.8829,
      "step": 404960
    },
    {
      "epoch": 1.419318886750991,
      "grad_norm": 2.375,
      "learning_rate": 2.92719715673523e-05,
      "loss": 0.8611,
      "step": 404970
    },
    {
      "epoch": 1.4193539342578867,
      "grad_norm": 2.9375,
      "learning_rate": 2.92713225386886e-05,
      "loss": 0.8269,
      "step": 404980
    },
    {
      "epoch": 1.4193889817647822,
      "grad_norm": 3.0625,
      "learning_rate": 2.9270673510024897e-05,
      "loss": 0.8334,
      "step": 404990
    },
    {
      "epoch": 1.4194240292716778,
      "grad_norm": 3.0625,
      "learning_rate": 2.9270024481361195e-05,
      "loss": 0.8979,
      "step": 405000
    },
    {
      "epoch": 1.4194240292716778,
      "eval_loss": 0.7952214479446411,
      "eval_runtime": 563.5112,
      "eval_samples_per_second": 675.117,
      "eval_steps_per_second": 56.26,
      "step": 405000
    },
    {
      "epoch": 1.4194590767785733,
      "grad_norm": 2.5,
      "learning_rate": 2.9269375452697493e-05,
      "loss": 0.7831,
      "step": 405010
    },
    {
      "epoch": 1.4194941242854688,
      "grad_norm": 3.21875,
      "learning_rate": 2.9268726424033795e-05,
      "loss": 0.8856,
      "step": 405020
    },
    {
      "epoch": 1.4195291717923646,
      "grad_norm": 2.9375,
      "learning_rate": 2.9268077395370093e-05,
      "loss": 0.8467,
      "step": 405030
    },
    {
      "epoch": 1.4195642192992601,
      "grad_norm": 2.765625,
      "learning_rate": 2.926742836670639e-05,
      "loss": 0.9296,
      "step": 405040
    },
    {
      "epoch": 1.4195992668061557,
      "grad_norm": 2.3125,
      "learning_rate": 2.926677933804269e-05,
      "loss": 0.858,
      "step": 405050
    },
    {
      "epoch": 1.4196343143130514,
      "grad_norm": 2.421875,
      "learning_rate": 2.9266130309378987e-05,
      "loss": 0.8969,
      "step": 405060
    },
    {
      "epoch": 1.419669361819947,
      "grad_norm": 2.796875,
      "learning_rate": 2.9265481280715285e-05,
      "loss": 0.851,
      "step": 405070
    },
    {
      "epoch": 1.4197044093268425,
      "grad_norm": 3.03125,
      "learning_rate": 2.9264832252051583e-05,
      "loss": 0.869,
      "step": 405080
    },
    {
      "epoch": 1.4197394568337383,
      "grad_norm": 2.734375,
      "learning_rate": 2.926418322338788e-05,
      "loss": 0.7814,
      "step": 405090
    },
    {
      "epoch": 1.4197745043406338,
      "grad_norm": 3.046875,
      "learning_rate": 2.926353419472418e-05,
      "loss": 0.8198,
      "step": 405100
    },
    {
      "epoch": 1.4198095518475293,
      "grad_norm": 2.84375,
      "learning_rate": 2.9262885166060477e-05,
      "loss": 0.9101,
      "step": 405110
    },
    {
      "epoch": 1.4198445993544249,
      "grad_norm": 3.421875,
      "learning_rate": 2.9262236137396775e-05,
      "loss": 0.8415,
      "step": 405120
    },
    {
      "epoch": 1.4198796468613204,
      "grad_norm": 3.203125,
      "learning_rate": 2.9261587108733073e-05,
      "loss": 0.8234,
      "step": 405130
    },
    {
      "epoch": 1.4199146943682162,
      "grad_norm": 3.03125,
      "learning_rate": 2.926093808006937e-05,
      "loss": 0.8713,
      "step": 405140
    },
    {
      "epoch": 1.4199497418751117,
      "grad_norm": 3.0,
      "learning_rate": 2.926028905140567e-05,
      "loss": 0.9463,
      "step": 405150
    },
    {
      "epoch": 1.4199847893820072,
      "grad_norm": 3.03125,
      "learning_rate": 2.9259640022741963e-05,
      "loss": 0.8465,
      "step": 405160
    },
    {
      "epoch": 1.420019836888903,
      "grad_norm": 3.140625,
      "learning_rate": 2.925899099407826e-05,
      "loss": 0.8238,
      "step": 405170
    },
    {
      "epoch": 1.4200548843957985,
      "grad_norm": 2.71875,
      "learning_rate": 2.925834196541456e-05,
      "loss": 0.8588,
      "step": 405180
    },
    {
      "epoch": 1.420089931902694,
      "grad_norm": 2.765625,
      "learning_rate": 2.9257692936750857e-05,
      "loss": 0.7284,
      "step": 405190
    },
    {
      "epoch": 1.4201249794095898,
      "grad_norm": 2.796875,
      "learning_rate": 2.9257043908087155e-05,
      "loss": 0.8189,
      "step": 405200
    },
    {
      "epoch": 1.4201600269164854,
      "grad_norm": 3.3125,
      "learning_rate": 2.9256394879423453e-05,
      "loss": 0.9373,
      "step": 405210
    },
    {
      "epoch": 1.4201950744233809,
      "grad_norm": 2.859375,
      "learning_rate": 2.925574585075975e-05,
      "loss": 0.7972,
      "step": 405220
    },
    {
      "epoch": 1.4202301219302764,
      "grad_norm": 2.609375,
      "learning_rate": 2.925509682209605e-05,
      "loss": 0.8954,
      "step": 405230
    },
    {
      "epoch": 1.420265169437172,
      "grad_norm": 2.921875,
      "learning_rate": 2.9254447793432347e-05,
      "loss": 0.8358,
      "step": 405240
    },
    {
      "epoch": 1.4203002169440677,
      "grad_norm": 2.53125,
      "learning_rate": 2.925379876476865e-05,
      "loss": 0.8167,
      "step": 405250
    },
    {
      "epoch": 1.4203352644509633,
      "grad_norm": 2.515625,
      "learning_rate": 2.9253149736104947e-05,
      "loss": 0.8649,
      "step": 405260
    },
    {
      "epoch": 1.4203703119578588,
      "grad_norm": 3.0,
      "learning_rate": 2.9252500707441245e-05,
      "loss": 0.8312,
      "step": 405270
    },
    {
      "epoch": 1.4204053594647545,
      "grad_norm": 2.8125,
      "learning_rate": 2.9251851678777543e-05,
      "loss": 0.8285,
      "step": 405280
    },
    {
      "epoch": 1.42044040697165,
      "grad_norm": 2.421875,
      "learning_rate": 2.925120265011384e-05,
      "loss": 0.8425,
      "step": 405290
    },
    {
      "epoch": 1.4204754544785456,
      "grad_norm": 2.8125,
      "learning_rate": 2.925055362145014e-05,
      "loss": 0.8269,
      "step": 405300
    },
    {
      "epoch": 1.4205105019854414,
      "grad_norm": 2.6875,
      "learning_rate": 2.9249904592786437e-05,
      "loss": 0.8312,
      "step": 405310
    },
    {
      "epoch": 1.420545549492337,
      "grad_norm": 2.765625,
      "learning_rate": 2.9249255564122735e-05,
      "loss": 0.8196,
      "step": 405320
    },
    {
      "epoch": 1.4205805969992324,
      "grad_norm": 2.71875,
      "learning_rate": 2.9248606535459033e-05,
      "loss": 0.8398,
      "step": 405330
    },
    {
      "epoch": 1.420615644506128,
      "grad_norm": 2.828125,
      "learning_rate": 2.924795750679533e-05,
      "loss": 0.8129,
      "step": 405340
    },
    {
      "epoch": 1.4206506920130235,
      "grad_norm": 2.90625,
      "learning_rate": 2.924730847813163e-05,
      "loss": 0.8549,
      "step": 405350
    },
    {
      "epoch": 1.4206857395199193,
      "grad_norm": 3.015625,
      "learning_rate": 2.9246659449467927e-05,
      "loss": 0.8368,
      "step": 405360
    },
    {
      "epoch": 1.4207207870268148,
      "grad_norm": 2.890625,
      "learning_rate": 2.9246010420804225e-05,
      "loss": 0.7887,
      "step": 405370
    },
    {
      "epoch": 1.4207558345337103,
      "grad_norm": 2.90625,
      "learning_rate": 2.9245361392140523e-05,
      "loss": 0.823,
      "step": 405380
    },
    {
      "epoch": 1.420790882040606,
      "grad_norm": 2.796875,
      "learning_rate": 2.9244712363476824e-05,
      "loss": 0.8223,
      "step": 405390
    },
    {
      "epoch": 1.4208259295475016,
      "grad_norm": 2.90625,
      "learning_rate": 2.9244063334813122e-05,
      "loss": 0.923,
      "step": 405400
    },
    {
      "epoch": 1.4208609770543972,
      "grad_norm": 3.875,
      "learning_rate": 2.924341430614942e-05,
      "loss": 0.8522,
      "step": 405410
    },
    {
      "epoch": 1.420896024561293,
      "grad_norm": 2.953125,
      "learning_rate": 2.9242765277485718e-05,
      "loss": 0.8328,
      "step": 405420
    },
    {
      "epoch": 1.4209310720681885,
      "grad_norm": 2.59375,
      "learning_rate": 2.9242116248822016e-05,
      "loss": 0.8032,
      "step": 405430
    },
    {
      "epoch": 1.420966119575084,
      "grad_norm": 2.9375,
      "learning_rate": 2.9241467220158314e-05,
      "loss": 0.8011,
      "step": 405440
    },
    {
      "epoch": 1.4210011670819795,
      "grad_norm": 2.8125,
      "learning_rate": 2.9240818191494612e-05,
      "loss": 0.8537,
      "step": 405450
    },
    {
      "epoch": 1.4210362145888753,
      "grad_norm": 3.3125,
      "learning_rate": 2.924016916283091e-05,
      "loss": 0.8057,
      "step": 405460
    },
    {
      "epoch": 1.4210712620957708,
      "grad_norm": 2.875,
      "learning_rate": 2.9239520134167208e-05,
      "loss": 0.8247,
      "step": 405470
    },
    {
      "epoch": 1.4211063096026664,
      "grad_norm": 2.78125,
      "learning_rate": 2.9238871105503506e-05,
      "loss": 0.8956,
      "step": 405480
    },
    {
      "epoch": 1.421141357109562,
      "grad_norm": 3.109375,
      "learning_rate": 2.9238222076839804e-05,
      "loss": 0.9204,
      "step": 405490
    },
    {
      "epoch": 1.4211764046164577,
      "grad_norm": 2.984375,
      "learning_rate": 2.9237573048176102e-05,
      "loss": 0.8861,
      "step": 405500
    },
    {
      "epoch": 1.4212114521233532,
      "grad_norm": 2.109375,
      "learning_rate": 2.92369240195124e-05,
      "loss": 0.7891,
      "step": 405510
    },
    {
      "epoch": 1.4212464996302487,
      "grad_norm": 2.828125,
      "learning_rate": 2.9236274990848698e-05,
      "loss": 0.8049,
      "step": 405520
    },
    {
      "epoch": 1.4212815471371445,
      "grad_norm": 2.765625,
      "learning_rate": 2.9235625962184993e-05,
      "loss": 0.8141,
      "step": 405530
    },
    {
      "epoch": 1.42131659464404,
      "grad_norm": 2.90625,
      "learning_rate": 2.923497693352129e-05,
      "loss": 0.9109,
      "step": 405540
    },
    {
      "epoch": 1.4213516421509356,
      "grad_norm": 3.75,
      "learning_rate": 2.923432790485759e-05,
      "loss": 0.9169,
      "step": 405550
    },
    {
      "epoch": 1.4213866896578313,
      "grad_norm": 3.0,
      "learning_rate": 2.9233678876193887e-05,
      "loss": 0.8837,
      "step": 405560
    },
    {
      "epoch": 1.4214217371647269,
      "grad_norm": 2.84375,
      "learning_rate": 2.9233029847530185e-05,
      "loss": 0.9062,
      "step": 405570
    },
    {
      "epoch": 1.4214567846716224,
      "grad_norm": 2.828125,
      "learning_rate": 2.9232380818866483e-05,
      "loss": 0.7794,
      "step": 405580
    },
    {
      "epoch": 1.421491832178518,
      "grad_norm": 3.03125,
      "learning_rate": 2.923173179020278e-05,
      "loss": 0.7611,
      "step": 405590
    },
    {
      "epoch": 1.4215268796854135,
      "grad_norm": 3.109375,
      "learning_rate": 2.923108276153908e-05,
      "loss": 0.8249,
      "step": 405600
    },
    {
      "epoch": 1.4215619271923092,
      "grad_norm": 2.921875,
      "learning_rate": 2.9230433732875377e-05,
      "loss": 0.8574,
      "step": 405610
    },
    {
      "epoch": 1.4215969746992048,
      "grad_norm": 2.703125,
      "learning_rate": 2.9229784704211678e-05,
      "loss": 0.7561,
      "step": 405620
    },
    {
      "epoch": 1.4216320222061003,
      "grad_norm": 2.859375,
      "learning_rate": 2.9229135675547976e-05,
      "loss": 0.9261,
      "step": 405630
    },
    {
      "epoch": 1.421667069712996,
      "grad_norm": 2.65625,
      "learning_rate": 2.9228486646884274e-05,
      "loss": 0.8215,
      "step": 405640
    },
    {
      "epoch": 1.4217021172198916,
      "grad_norm": 2.71875,
      "learning_rate": 2.9227837618220572e-05,
      "loss": 0.9139,
      "step": 405650
    },
    {
      "epoch": 1.4217371647267871,
      "grad_norm": 2.875,
      "learning_rate": 2.922718858955687e-05,
      "loss": 0.8585,
      "step": 405660
    },
    {
      "epoch": 1.4217722122336829,
      "grad_norm": 2.53125,
      "learning_rate": 2.9226539560893168e-05,
      "loss": 0.8374,
      "step": 405670
    },
    {
      "epoch": 1.4218072597405784,
      "grad_norm": 3.0,
      "learning_rate": 2.9225890532229466e-05,
      "loss": 0.9615,
      "step": 405680
    },
    {
      "epoch": 1.421842307247474,
      "grad_norm": 2.890625,
      "learning_rate": 2.9225241503565764e-05,
      "loss": 0.8436,
      "step": 405690
    },
    {
      "epoch": 1.4218773547543695,
      "grad_norm": 3.015625,
      "learning_rate": 2.9224592474902062e-05,
      "loss": 0.8835,
      "step": 405700
    },
    {
      "epoch": 1.421912402261265,
      "grad_norm": 2.640625,
      "learning_rate": 2.922394344623836e-05,
      "loss": 0.8593,
      "step": 405710
    },
    {
      "epoch": 1.4219474497681608,
      "grad_norm": 2.703125,
      "learning_rate": 2.9223294417574658e-05,
      "loss": 0.836,
      "step": 405720
    },
    {
      "epoch": 1.4219824972750563,
      "grad_norm": 2.6875,
      "learning_rate": 2.9222645388910956e-05,
      "loss": 0.8721,
      "step": 405730
    },
    {
      "epoch": 1.4220175447819519,
      "grad_norm": 2.8125,
      "learning_rate": 2.9221996360247254e-05,
      "loss": 0.8952,
      "step": 405740
    },
    {
      "epoch": 1.4220525922888476,
      "grad_norm": 2.484375,
      "learning_rate": 2.9221347331583556e-05,
      "loss": 0.8182,
      "step": 405750
    },
    {
      "epoch": 1.4220876397957432,
      "grad_norm": 2.671875,
      "learning_rate": 2.9220698302919854e-05,
      "loss": 0.8336,
      "step": 405760
    },
    {
      "epoch": 1.4221226873026387,
      "grad_norm": 3.125,
      "learning_rate": 2.922004927425615e-05,
      "loss": 0.8719,
      "step": 405770
    },
    {
      "epoch": 1.4221577348095344,
      "grad_norm": 2.734375,
      "learning_rate": 2.921940024559245e-05,
      "loss": 0.8802,
      "step": 405780
    },
    {
      "epoch": 1.42219278231643,
      "grad_norm": 2.609375,
      "learning_rate": 2.9218751216928748e-05,
      "loss": 0.8806,
      "step": 405790
    },
    {
      "epoch": 1.4222278298233255,
      "grad_norm": 3.0625,
      "learning_rate": 2.9218102188265046e-05,
      "loss": 0.8941,
      "step": 405800
    },
    {
      "epoch": 1.422262877330221,
      "grad_norm": 3.015625,
      "learning_rate": 2.9217453159601344e-05,
      "loss": 0.8518,
      "step": 405810
    },
    {
      "epoch": 1.4222979248371166,
      "grad_norm": 2.53125,
      "learning_rate": 2.921680413093764e-05,
      "loss": 0.7602,
      "step": 405820
    },
    {
      "epoch": 1.4223329723440123,
      "grad_norm": 3.0625,
      "learning_rate": 2.921615510227394e-05,
      "loss": 0.8329,
      "step": 405830
    },
    {
      "epoch": 1.4223680198509079,
      "grad_norm": 2.8125,
      "learning_rate": 2.9215506073610238e-05,
      "loss": 0.8182,
      "step": 405840
    },
    {
      "epoch": 1.4224030673578034,
      "grad_norm": 2.96875,
      "learning_rate": 2.9214857044946536e-05,
      "loss": 0.8207,
      "step": 405850
    },
    {
      "epoch": 1.4224381148646992,
      "grad_norm": 2.8125,
      "learning_rate": 2.9214208016282834e-05,
      "loss": 0.8277,
      "step": 405860
    },
    {
      "epoch": 1.4224731623715947,
      "grad_norm": 2.9375,
      "learning_rate": 2.921355898761913e-05,
      "loss": 0.9041,
      "step": 405870
    },
    {
      "epoch": 1.4225082098784902,
      "grad_norm": 2.84375,
      "learning_rate": 2.921290995895543e-05,
      "loss": 0.8634,
      "step": 405880
    },
    {
      "epoch": 1.422543257385386,
      "grad_norm": 3.03125,
      "learning_rate": 2.921226093029173e-05,
      "loss": 0.889,
      "step": 405890
    },
    {
      "epoch": 1.4225783048922815,
      "grad_norm": 2.921875,
      "learning_rate": 2.921161190162803e-05,
      "loss": 0.8139,
      "step": 405900
    },
    {
      "epoch": 1.422613352399177,
      "grad_norm": 3.140625,
      "learning_rate": 2.921096287296432e-05,
      "loss": 0.8427,
      "step": 405910
    },
    {
      "epoch": 1.4226483999060726,
      "grad_norm": 2.984375,
      "learning_rate": 2.9210313844300618e-05,
      "loss": 0.8873,
      "step": 405920
    },
    {
      "epoch": 1.4226834474129681,
      "grad_norm": 2.859375,
      "learning_rate": 2.9209664815636916e-05,
      "loss": 0.9142,
      "step": 405930
    },
    {
      "epoch": 1.422718494919864,
      "grad_norm": 2.46875,
      "learning_rate": 2.9209015786973214e-05,
      "loss": 0.8453,
      "step": 405940
    },
    {
      "epoch": 1.4227535424267594,
      "grad_norm": 2.734375,
      "learning_rate": 2.9208366758309512e-05,
      "loss": 0.7904,
      "step": 405950
    },
    {
      "epoch": 1.422788589933655,
      "grad_norm": 3.171875,
      "learning_rate": 2.920771772964581e-05,
      "loss": 0.8636,
      "step": 405960
    },
    {
      "epoch": 1.4228236374405507,
      "grad_norm": 2.859375,
      "learning_rate": 2.9207068700982108e-05,
      "loss": 0.8169,
      "step": 405970
    },
    {
      "epoch": 1.4228586849474463,
      "grad_norm": 2.890625,
      "learning_rate": 2.920641967231841e-05,
      "loss": 0.8621,
      "step": 405980
    },
    {
      "epoch": 1.4228937324543418,
      "grad_norm": 2.875,
      "learning_rate": 2.9205770643654708e-05,
      "loss": 0.8597,
      "step": 405990
    },
    {
      "epoch": 1.4229287799612376,
      "grad_norm": 3.203125,
      "learning_rate": 2.9205121614991006e-05,
      "loss": 0.8632,
      "step": 406000
    },
    {
      "epoch": 1.422963827468133,
      "grad_norm": 3.171875,
      "learning_rate": 2.9204472586327304e-05,
      "loss": 0.8042,
      "step": 406010
    },
    {
      "epoch": 1.4229988749750286,
      "grad_norm": 2.78125,
      "learning_rate": 2.92038235576636e-05,
      "loss": 0.8713,
      "step": 406020
    },
    {
      "epoch": 1.4230339224819242,
      "grad_norm": 2.65625,
      "learning_rate": 2.92031745289999e-05,
      "loss": 0.7802,
      "step": 406030
    },
    {
      "epoch": 1.4230689699888197,
      "grad_norm": 2.453125,
      "learning_rate": 2.9202525500336198e-05,
      "loss": 0.8451,
      "step": 406040
    },
    {
      "epoch": 1.4231040174957155,
      "grad_norm": 2.984375,
      "learning_rate": 2.9201876471672496e-05,
      "loss": 0.8638,
      "step": 406050
    },
    {
      "epoch": 1.423139065002611,
      "grad_norm": 2.671875,
      "learning_rate": 2.9201227443008794e-05,
      "loss": 0.8271,
      "step": 406060
    },
    {
      "epoch": 1.4231741125095065,
      "grad_norm": 2.921875,
      "learning_rate": 2.920057841434509e-05,
      "loss": 0.7982,
      "step": 406070
    },
    {
      "epoch": 1.4232091600164023,
      "grad_norm": 2.671875,
      "learning_rate": 2.919992938568139e-05,
      "loss": 0.8781,
      "step": 406080
    },
    {
      "epoch": 1.4232442075232978,
      "grad_norm": 3.03125,
      "learning_rate": 2.9199280357017688e-05,
      "loss": 0.9378,
      "step": 406090
    },
    {
      "epoch": 1.4232792550301934,
      "grad_norm": 3.46875,
      "learning_rate": 2.9198631328353986e-05,
      "loss": 0.7795,
      "step": 406100
    },
    {
      "epoch": 1.4233143025370891,
      "grad_norm": 2.984375,
      "learning_rate": 2.9197982299690284e-05,
      "loss": 0.8836,
      "step": 406110
    },
    {
      "epoch": 1.4233493500439847,
      "grad_norm": 2.625,
      "learning_rate": 2.9197333271026585e-05,
      "loss": 0.8487,
      "step": 406120
    },
    {
      "epoch": 1.4233843975508802,
      "grad_norm": 3.0,
      "learning_rate": 2.9196684242362883e-05,
      "loss": 0.8453,
      "step": 406130
    },
    {
      "epoch": 1.4234194450577757,
      "grad_norm": 2.84375,
      "learning_rate": 2.919603521369918e-05,
      "loss": 0.7531,
      "step": 406140
    },
    {
      "epoch": 1.4234544925646715,
      "grad_norm": 2.6875,
      "learning_rate": 2.919538618503548e-05,
      "loss": 0.94,
      "step": 406150
    },
    {
      "epoch": 1.423489540071567,
      "grad_norm": 2.953125,
      "learning_rate": 2.9194737156371777e-05,
      "loss": 0.9405,
      "step": 406160
    },
    {
      "epoch": 1.4235245875784626,
      "grad_norm": 2.84375,
      "learning_rate": 2.9194088127708075e-05,
      "loss": 0.8938,
      "step": 406170
    },
    {
      "epoch": 1.423559635085358,
      "grad_norm": 2.859375,
      "learning_rate": 2.9193439099044373e-05,
      "loss": 0.8718,
      "step": 406180
    },
    {
      "epoch": 1.4235946825922539,
      "grad_norm": 3.25,
      "learning_rate": 2.919279007038067e-05,
      "loss": 0.9535,
      "step": 406190
    },
    {
      "epoch": 1.4236297300991494,
      "grad_norm": 2.671875,
      "learning_rate": 2.919214104171697e-05,
      "loss": 0.8126,
      "step": 406200
    },
    {
      "epoch": 1.423664777606045,
      "grad_norm": 2.765625,
      "learning_rate": 2.9191492013053267e-05,
      "loss": 0.894,
      "step": 406210
    },
    {
      "epoch": 1.4236998251129407,
      "grad_norm": 2.953125,
      "learning_rate": 2.9190842984389565e-05,
      "loss": 0.9318,
      "step": 406220
    },
    {
      "epoch": 1.4237348726198362,
      "grad_norm": 2.875,
      "learning_rate": 2.9190193955725863e-05,
      "loss": 0.8545,
      "step": 406230
    },
    {
      "epoch": 1.4237699201267318,
      "grad_norm": 3.203125,
      "learning_rate": 2.918954492706216e-05,
      "loss": 0.9971,
      "step": 406240
    },
    {
      "epoch": 1.4238049676336275,
      "grad_norm": 2.65625,
      "learning_rate": 2.918889589839846e-05,
      "loss": 0.8292,
      "step": 406250
    },
    {
      "epoch": 1.423840015140523,
      "grad_norm": 2.859375,
      "learning_rate": 2.918824686973476e-05,
      "loss": 0.8554,
      "step": 406260
    },
    {
      "epoch": 1.4238750626474186,
      "grad_norm": 3.015625,
      "learning_rate": 2.918759784107106e-05,
      "loss": 0.8468,
      "step": 406270
    },
    {
      "epoch": 1.4239101101543141,
      "grad_norm": 3.1875,
      "learning_rate": 2.918694881240735e-05,
      "loss": 0.913,
      "step": 406280
    },
    {
      "epoch": 1.4239451576612097,
      "grad_norm": 3.203125,
      "learning_rate": 2.9186299783743648e-05,
      "loss": 0.9132,
      "step": 406290
    },
    {
      "epoch": 1.4239802051681054,
      "grad_norm": 3.28125,
      "learning_rate": 2.9185650755079946e-05,
      "loss": 0.8667,
      "step": 406300
    },
    {
      "epoch": 1.424015252675001,
      "grad_norm": 2.96875,
      "learning_rate": 2.9185001726416244e-05,
      "loss": 0.8451,
      "step": 406310
    },
    {
      "epoch": 1.4240503001818965,
      "grad_norm": 3.0625,
      "learning_rate": 2.918435269775254e-05,
      "loss": 0.9257,
      "step": 406320
    },
    {
      "epoch": 1.4240853476887922,
      "grad_norm": 3.203125,
      "learning_rate": 2.918370366908884e-05,
      "loss": 0.824,
      "step": 406330
    },
    {
      "epoch": 1.4241203951956878,
      "grad_norm": 2.453125,
      "learning_rate": 2.9183054640425138e-05,
      "loss": 0.9052,
      "step": 406340
    },
    {
      "epoch": 1.4241554427025833,
      "grad_norm": 2.734375,
      "learning_rate": 2.918240561176144e-05,
      "loss": 0.8357,
      "step": 406350
    },
    {
      "epoch": 1.424190490209479,
      "grad_norm": 3.09375,
      "learning_rate": 2.9181756583097737e-05,
      "loss": 0.8513,
      "step": 406360
    },
    {
      "epoch": 1.4242255377163746,
      "grad_norm": 2.90625,
      "learning_rate": 2.9181107554434035e-05,
      "loss": 0.8202,
      "step": 406370
    },
    {
      "epoch": 1.4242605852232701,
      "grad_norm": 2.828125,
      "learning_rate": 2.9180458525770333e-05,
      "loss": 0.8959,
      "step": 406380
    },
    {
      "epoch": 1.4242956327301657,
      "grad_norm": 3.171875,
      "learning_rate": 2.917980949710663e-05,
      "loss": 0.9175,
      "step": 406390
    },
    {
      "epoch": 1.4243306802370612,
      "grad_norm": 2.9375,
      "learning_rate": 2.917916046844293e-05,
      "loss": 0.841,
      "step": 406400
    },
    {
      "epoch": 1.424365727743957,
      "grad_norm": 3.015625,
      "learning_rate": 2.9178511439779227e-05,
      "loss": 0.8616,
      "step": 406410
    },
    {
      "epoch": 1.4244007752508525,
      "grad_norm": 2.640625,
      "learning_rate": 2.9177862411115525e-05,
      "loss": 0.846,
      "step": 406420
    },
    {
      "epoch": 1.424435822757748,
      "grad_norm": 2.71875,
      "learning_rate": 2.9177213382451823e-05,
      "loss": 0.9156,
      "step": 406430
    },
    {
      "epoch": 1.4244708702646438,
      "grad_norm": 2.59375,
      "learning_rate": 2.917656435378812e-05,
      "loss": 0.8985,
      "step": 406440
    },
    {
      "epoch": 1.4245059177715393,
      "grad_norm": 2.453125,
      "learning_rate": 2.917591532512442e-05,
      "loss": 0.8446,
      "step": 406450
    },
    {
      "epoch": 1.4245409652784349,
      "grad_norm": 3.265625,
      "learning_rate": 2.9175266296460717e-05,
      "loss": 0.8825,
      "step": 406460
    },
    {
      "epoch": 1.4245760127853306,
      "grad_norm": 2.953125,
      "learning_rate": 2.9174617267797015e-05,
      "loss": 0.837,
      "step": 406470
    },
    {
      "epoch": 1.4246110602922262,
      "grad_norm": 2.71875,
      "learning_rate": 2.9173968239133313e-05,
      "loss": 0.8034,
      "step": 406480
    },
    {
      "epoch": 1.4246461077991217,
      "grad_norm": 3.0625,
      "learning_rate": 2.9173319210469614e-05,
      "loss": 0.8028,
      "step": 406490
    },
    {
      "epoch": 1.4246811553060172,
      "grad_norm": 2.5,
      "learning_rate": 2.9172670181805912e-05,
      "loss": 0.8589,
      "step": 406500
    },
    {
      "epoch": 1.4247162028129128,
      "grad_norm": 2.8125,
      "learning_rate": 2.917202115314221e-05,
      "loss": 0.8164,
      "step": 406510
    },
    {
      "epoch": 1.4247512503198085,
      "grad_norm": 2.46875,
      "learning_rate": 2.917137212447851e-05,
      "loss": 0.8424,
      "step": 406520
    },
    {
      "epoch": 1.424786297826704,
      "grad_norm": 3.125,
      "learning_rate": 2.9170723095814806e-05,
      "loss": 0.8592,
      "step": 406530
    },
    {
      "epoch": 1.4248213453335996,
      "grad_norm": 2.71875,
      "learning_rate": 2.9170074067151104e-05,
      "loss": 0.8024,
      "step": 406540
    },
    {
      "epoch": 1.4248563928404954,
      "grad_norm": 2.890625,
      "learning_rate": 2.9169425038487402e-05,
      "loss": 0.7873,
      "step": 406550
    },
    {
      "epoch": 1.424891440347391,
      "grad_norm": 3.09375,
      "learning_rate": 2.91687760098237e-05,
      "loss": 0.8662,
      "step": 406560
    },
    {
      "epoch": 1.4249264878542864,
      "grad_norm": 3.046875,
      "learning_rate": 2.916812698116e-05,
      "loss": 0.8464,
      "step": 406570
    },
    {
      "epoch": 1.4249615353611822,
      "grad_norm": 2.4375,
      "learning_rate": 2.9167477952496296e-05,
      "loss": 0.8863,
      "step": 406580
    },
    {
      "epoch": 1.4249965828680777,
      "grad_norm": 3.0625,
      "learning_rate": 2.9166828923832594e-05,
      "loss": 0.8205,
      "step": 406590
    },
    {
      "epoch": 1.4250316303749733,
      "grad_norm": 2.8125,
      "learning_rate": 2.9166179895168892e-05,
      "loss": 0.7347,
      "step": 406600
    },
    {
      "epoch": 1.4250666778818688,
      "grad_norm": 3.0,
      "learning_rate": 2.916553086650519e-05,
      "loss": 0.8217,
      "step": 406610
    },
    {
      "epoch": 1.4251017253887643,
      "grad_norm": 2.640625,
      "learning_rate": 2.916488183784149e-05,
      "loss": 0.8094,
      "step": 406620
    },
    {
      "epoch": 1.42513677289566,
      "grad_norm": 3.109375,
      "learning_rate": 2.916423280917779e-05,
      "loss": 0.8109,
      "step": 406630
    },
    {
      "epoch": 1.4251718204025556,
      "grad_norm": 2.953125,
      "learning_rate": 2.9163583780514088e-05,
      "loss": 0.839,
      "step": 406640
    },
    {
      "epoch": 1.4252068679094512,
      "grad_norm": 3.015625,
      "learning_rate": 2.9162934751850386e-05,
      "loss": 0.795,
      "step": 406650
    },
    {
      "epoch": 1.425241915416347,
      "grad_norm": 3.171875,
      "learning_rate": 2.9162285723186677e-05,
      "loss": 0.8708,
      "step": 406660
    },
    {
      "epoch": 1.4252769629232425,
      "grad_norm": 2.5,
      "learning_rate": 2.9161636694522975e-05,
      "loss": 0.8373,
      "step": 406670
    },
    {
      "epoch": 1.425312010430138,
      "grad_norm": 2.84375,
      "learning_rate": 2.9160987665859273e-05,
      "loss": 0.8769,
      "step": 406680
    },
    {
      "epoch": 1.4253470579370338,
      "grad_norm": 3.171875,
      "learning_rate": 2.916033863719557e-05,
      "loss": 0.7535,
      "step": 406690
    },
    {
      "epoch": 1.4253821054439293,
      "grad_norm": 2.671875,
      "learning_rate": 2.915968960853187e-05,
      "loss": 0.844,
      "step": 406700
    },
    {
      "epoch": 1.4254171529508248,
      "grad_norm": 2.75,
      "learning_rate": 2.915904057986817e-05,
      "loss": 0.8798,
      "step": 406710
    },
    {
      "epoch": 1.4254522004577204,
      "grad_norm": 2.90625,
      "learning_rate": 2.915839155120447e-05,
      "loss": 0.8394,
      "step": 406720
    },
    {
      "epoch": 1.425487247964616,
      "grad_norm": 2.90625,
      "learning_rate": 2.9157742522540766e-05,
      "loss": 0.8821,
      "step": 406730
    },
    {
      "epoch": 1.4255222954715117,
      "grad_norm": 2.484375,
      "learning_rate": 2.9157093493877064e-05,
      "loss": 0.7961,
      "step": 406740
    },
    {
      "epoch": 1.4255573429784072,
      "grad_norm": 2.609375,
      "learning_rate": 2.9156444465213362e-05,
      "loss": 0.88,
      "step": 406750
    },
    {
      "epoch": 1.4255923904853027,
      "grad_norm": 3.140625,
      "learning_rate": 2.915579543654966e-05,
      "loss": 0.8876,
      "step": 406760
    },
    {
      "epoch": 1.4256274379921985,
      "grad_norm": 2.671875,
      "learning_rate": 2.915514640788596e-05,
      "loss": 0.9238,
      "step": 406770
    },
    {
      "epoch": 1.425662485499094,
      "grad_norm": 2.96875,
      "learning_rate": 2.9154497379222256e-05,
      "loss": 0.7875,
      "step": 406780
    },
    {
      "epoch": 1.4256975330059896,
      "grad_norm": 2.78125,
      "learning_rate": 2.9153848350558554e-05,
      "loss": 0.9218,
      "step": 406790
    },
    {
      "epoch": 1.4257325805128853,
      "grad_norm": 2.71875,
      "learning_rate": 2.9153199321894852e-05,
      "loss": 0.8949,
      "step": 406800
    },
    {
      "epoch": 1.4257676280197809,
      "grad_norm": 2.921875,
      "learning_rate": 2.915255029323115e-05,
      "loss": 0.8883,
      "step": 406810
    },
    {
      "epoch": 1.4258026755266764,
      "grad_norm": 2.4375,
      "learning_rate": 2.915190126456745e-05,
      "loss": 0.8059,
      "step": 406820
    },
    {
      "epoch": 1.425837723033572,
      "grad_norm": 2.75,
      "learning_rate": 2.9151252235903746e-05,
      "loss": 0.7708,
      "step": 406830
    },
    {
      "epoch": 1.4258727705404677,
      "grad_norm": 3.1875,
      "learning_rate": 2.9150603207240044e-05,
      "loss": 0.9264,
      "step": 406840
    },
    {
      "epoch": 1.4259078180473632,
      "grad_norm": 2.390625,
      "learning_rate": 2.9149954178576346e-05,
      "loss": 0.7953,
      "step": 406850
    },
    {
      "epoch": 1.4259428655542588,
      "grad_norm": 2.796875,
      "learning_rate": 2.9149305149912644e-05,
      "loss": 0.7756,
      "step": 406860
    },
    {
      "epoch": 1.4259779130611543,
      "grad_norm": 3.359375,
      "learning_rate": 2.9148656121248942e-05,
      "loss": 0.9284,
      "step": 406870
    },
    {
      "epoch": 1.42601296056805,
      "grad_norm": 2.625,
      "learning_rate": 2.914800709258524e-05,
      "loss": 0.8488,
      "step": 406880
    },
    {
      "epoch": 1.4260480080749456,
      "grad_norm": 2.84375,
      "learning_rate": 2.9147358063921538e-05,
      "loss": 0.8595,
      "step": 406890
    },
    {
      "epoch": 1.4260830555818411,
      "grad_norm": 2.84375,
      "learning_rate": 2.9146709035257836e-05,
      "loss": 0.8294,
      "step": 406900
    },
    {
      "epoch": 1.4261181030887369,
      "grad_norm": 2.578125,
      "learning_rate": 2.9146060006594134e-05,
      "loss": 0.7891,
      "step": 406910
    },
    {
      "epoch": 1.4261531505956324,
      "grad_norm": 2.71875,
      "learning_rate": 2.9145410977930432e-05,
      "loss": 0.8244,
      "step": 406920
    },
    {
      "epoch": 1.426188198102528,
      "grad_norm": 3.25,
      "learning_rate": 2.914476194926673e-05,
      "loss": 0.8533,
      "step": 406930
    },
    {
      "epoch": 1.4262232456094237,
      "grad_norm": 3.375,
      "learning_rate": 2.9144112920603028e-05,
      "loss": 0.8515,
      "step": 406940
    },
    {
      "epoch": 1.4262582931163192,
      "grad_norm": 3.234375,
      "learning_rate": 2.9143463891939326e-05,
      "loss": 0.8184,
      "step": 406950
    },
    {
      "epoch": 1.4262933406232148,
      "grad_norm": 3.203125,
      "learning_rate": 2.9142814863275624e-05,
      "loss": 0.7884,
      "step": 406960
    },
    {
      "epoch": 1.4263283881301103,
      "grad_norm": 2.828125,
      "learning_rate": 2.9142165834611922e-05,
      "loss": 0.9067,
      "step": 406970
    },
    {
      "epoch": 1.4263634356370058,
      "grad_norm": 2.828125,
      "learning_rate": 2.914151680594822e-05,
      "loss": 0.8371,
      "step": 406980
    },
    {
      "epoch": 1.4263984831439016,
      "grad_norm": 2.953125,
      "learning_rate": 2.914086777728452e-05,
      "loss": 0.8693,
      "step": 406990
    },
    {
      "epoch": 1.4264335306507971,
      "grad_norm": 3.078125,
      "learning_rate": 2.914021874862082e-05,
      "loss": 0.8771,
      "step": 407000
    },
    {
      "epoch": 1.4264685781576927,
      "grad_norm": 3.140625,
      "learning_rate": 2.9139569719957117e-05,
      "loss": 0.8379,
      "step": 407010
    },
    {
      "epoch": 1.4265036256645884,
      "grad_norm": 2.953125,
      "learning_rate": 2.9138920691293415e-05,
      "loss": 0.8573,
      "step": 407020
    },
    {
      "epoch": 1.426538673171484,
      "grad_norm": 2.40625,
      "learning_rate": 2.9138271662629713e-05,
      "loss": 0.9101,
      "step": 407030
    },
    {
      "epoch": 1.4265737206783795,
      "grad_norm": 2.984375,
      "learning_rate": 2.9137622633966004e-05,
      "loss": 0.8678,
      "step": 407040
    },
    {
      "epoch": 1.4266087681852753,
      "grad_norm": 2.9375,
      "learning_rate": 2.9136973605302302e-05,
      "loss": 0.8702,
      "step": 407050
    },
    {
      "epoch": 1.4266438156921708,
      "grad_norm": 2.8125,
      "learning_rate": 2.91363245766386e-05,
      "loss": 0.8128,
      "step": 407060
    },
    {
      "epoch": 1.4266788631990663,
      "grad_norm": 2.625,
      "learning_rate": 2.91356755479749e-05,
      "loss": 0.7526,
      "step": 407070
    },
    {
      "epoch": 1.4267139107059619,
      "grad_norm": 3.171875,
      "learning_rate": 2.91350265193112e-05,
      "loss": 0.8834,
      "step": 407080
    },
    {
      "epoch": 1.4267489582128574,
      "grad_norm": 3.078125,
      "learning_rate": 2.9134377490647498e-05,
      "loss": 0.8399,
      "step": 407090
    },
    {
      "epoch": 1.4267840057197532,
      "grad_norm": 2.703125,
      "learning_rate": 2.9133728461983796e-05,
      "loss": 0.9016,
      "step": 407100
    },
    {
      "epoch": 1.4268190532266487,
      "grad_norm": 3.1875,
      "learning_rate": 2.9133079433320094e-05,
      "loss": 0.9815,
      "step": 407110
    },
    {
      "epoch": 1.4268541007335442,
      "grad_norm": 2.796875,
      "learning_rate": 2.9132430404656392e-05,
      "loss": 0.8505,
      "step": 407120
    },
    {
      "epoch": 1.42688914824044,
      "grad_norm": 2.984375,
      "learning_rate": 2.913178137599269e-05,
      "loss": 0.7959,
      "step": 407130
    },
    {
      "epoch": 1.4269241957473355,
      "grad_norm": 3.0625,
      "learning_rate": 2.9131132347328988e-05,
      "loss": 0.8314,
      "step": 407140
    },
    {
      "epoch": 1.426959243254231,
      "grad_norm": 3.015625,
      "learning_rate": 2.9130483318665286e-05,
      "loss": 0.8929,
      "step": 407150
    },
    {
      "epoch": 1.4269942907611268,
      "grad_norm": 3.0,
      "learning_rate": 2.9129834290001584e-05,
      "loss": 0.8627,
      "step": 407160
    },
    {
      "epoch": 1.4270293382680224,
      "grad_norm": 3.1875,
      "learning_rate": 2.9129185261337882e-05,
      "loss": 0.8936,
      "step": 407170
    },
    {
      "epoch": 1.427064385774918,
      "grad_norm": 2.359375,
      "learning_rate": 2.912853623267418e-05,
      "loss": 0.841,
      "step": 407180
    },
    {
      "epoch": 1.4270994332818134,
      "grad_norm": 3.015625,
      "learning_rate": 2.9127887204010478e-05,
      "loss": 0.842,
      "step": 407190
    },
    {
      "epoch": 1.427134480788709,
      "grad_norm": 2.78125,
      "learning_rate": 2.9127238175346776e-05,
      "loss": 0.8547,
      "step": 407200
    },
    {
      "epoch": 1.4271695282956047,
      "grad_norm": 2.78125,
      "learning_rate": 2.9126589146683074e-05,
      "loss": 0.8596,
      "step": 407210
    },
    {
      "epoch": 1.4272045758025003,
      "grad_norm": 3.03125,
      "learning_rate": 2.9125940118019375e-05,
      "loss": 0.8744,
      "step": 407220
    },
    {
      "epoch": 1.4272396233093958,
      "grad_norm": 16.25,
      "learning_rate": 2.9125291089355673e-05,
      "loss": 0.8085,
      "step": 407230
    },
    {
      "epoch": 1.4272746708162916,
      "grad_norm": 2.859375,
      "learning_rate": 2.912464206069197e-05,
      "loss": 0.7669,
      "step": 407240
    },
    {
      "epoch": 1.427309718323187,
      "grad_norm": 3.0625,
      "learning_rate": 2.912399303202827e-05,
      "loss": 0.8756,
      "step": 407250
    },
    {
      "epoch": 1.4273447658300826,
      "grad_norm": 3.21875,
      "learning_rate": 2.9123344003364567e-05,
      "loss": 0.8625,
      "step": 407260
    },
    {
      "epoch": 1.4273798133369784,
      "grad_norm": 3.109375,
      "learning_rate": 2.9122694974700865e-05,
      "loss": 0.8122,
      "step": 407270
    },
    {
      "epoch": 1.427414860843874,
      "grad_norm": 3.046875,
      "learning_rate": 2.9122045946037163e-05,
      "loss": 0.8722,
      "step": 407280
    },
    {
      "epoch": 1.4274499083507695,
      "grad_norm": 4.125,
      "learning_rate": 2.912139691737346e-05,
      "loss": 0.9726,
      "step": 407290
    },
    {
      "epoch": 1.427484955857665,
      "grad_norm": 2.96875,
      "learning_rate": 2.912074788870976e-05,
      "loss": 0.8462,
      "step": 407300
    },
    {
      "epoch": 1.4275200033645605,
      "grad_norm": 2.546875,
      "learning_rate": 2.9120098860046057e-05,
      "loss": 0.8452,
      "step": 407310
    },
    {
      "epoch": 1.4275550508714563,
      "grad_norm": 3.03125,
      "learning_rate": 2.9119449831382355e-05,
      "loss": 0.922,
      "step": 407320
    },
    {
      "epoch": 1.4275900983783518,
      "grad_norm": 3.0,
      "learning_rate": 2.9118800802718653e-05,
      "loss": 0.8638,
      "step": 407330
    },
    {
      "epoch": 1.4276251458852474,
      "grad_norm": 2.625,
      "learning_rate": 2.911815177405495e-05,
      "loss": 0.8798,
      "step": 407340
    },
    {
      "epoch": 1.4276601933921431,
      "grad_norm": 3.203125,
      "learning_rate": 2.911750274539125e-05,
      "loss": 0.8038,
      "step": 407350
    },
    {
      "epoch": 1.4276952408990387,
      "grad_norm": 2.828125,
      "learning_rate": 2.911685371672755e-05,
      "loss": 0.7756,
      "step": 407360
    },
    {
      "epoch": 1.4277302884059342,
      "grad_norm": 2.671875,
      "learning_rate": 2.911620468806385e-05,
      "loss": 0.7902,
      "step": 407370
    },
    {
      "epoch": 1.42776533591283,
      "grad_norm": 3.03125,
      "learning_rate": 2.9115555659400147e-05,
      "loss": 0.8323,
      "step": 407380
    },
    {
      "epoch": 1.4278003834197255,
      "grad_norm": 2.875,
      "learning_rate": 2.9114906630736445e-05,
      "loss": 0.8697,
      "step": 407390
    },
    {
      "epoch": 1.427835430926621,
      "grad_norm": 2.96875,
      "learning_rate": 2.9114257602072743e-05,
      "loss": 0.8297,
      "step": 407400
    },
    {
      "epoch": 1.4278704784335166,
      "grad_norm": 2.6875,
      "learning_rate": 2.9113608573409034e-05,
      "loss": 0.8193,
      "step": 407410
    },
    {
      "epoch": 1.427905525940412,
      "grad_norm": 3.109375,
      "learning_rate": 2.9112959544745332e-05,
      "loss": 0.8306,
      "step": 407420
    },
    {
      "epoch": 1.4279405734473078,
      "grad_norm": 3.1875,
      "learning_rate": 2.911231051608163e-05,
      "loss": 0.7925,
      "step": 407430
    },
    {
      "epoch": 1.4279756209542034,
      "grad_norm": 2.5625,
      "learning_rate": 2.9111661487417928e-05,
      "loss": 0.8036,
      "step": 407440
    },
    {
      "epoch": 1.428010668461099,
      "grad_norm": 3.046875,
      "learning_rate": 2.911101245875423e-05,
      "loss": 0.8957,
      "step": 407450
    },
    {
      "epoch": 1.4280457159679947,
      "grad_norm": 2.40625,
      "learning_rate": 2.9110363430090527e-05,
      "loss": 0.7044,
      "step": 407460
    },
    {
      "epoch": 1.4280807634748902,
      "grad_norm": 3.265625,
      "learning_rate": 2.9109714401426825e-05,
      "loss": 0.8624,
      "step": 407470
    },
    {
      "epoch": 1.4281158109817857,
      "grad_norm": 2.984375,
      "learning_rate": 2.9109065372763123e-05,
      "loss": 0.8602,
      "step": 407480
    },
    {
      "epoch": 1.4281508584886815,
      "grad_norm": 2.9375,
      "learning_rate": 2.910841634409942e-05,
      "loss": 0.8221,
      "step": 407490
    },
    {
      "epoch": 1.428185905995577,
      "grad_norm": 2.28125,
      "learning_rate": 2.910776731543572e-05,
      "loss": 0.7636,
      "step": 407500
    },
    {
      "epoch": 1.4282209535024726,
      "grad_norm": 2.6875,
      "learning_rate": 2.9107118286772017e-05,
      "loss": 0.8177,
      "step": 407510
    },
    {
      "epoch": 1.4282560010093683,
      "grad_norm": 2.640625,
      "learning_rate": 2.9106469258108315e-05,
      "loss": 0.8835,
      "step": 407520
    },
    {
      "epoch": 1.4282910485162639,
      "grad_norm": 2.5625,
      "learning_rate": 2.9105820229444613e-05,
      "loss": 0.8424,
      "step": 407530
    },
    {
      "epoch": 1.4283260960231594,
      "grad_norm": 3.03125,
      "learning_rate": 2.910517120078091e-05,
      "loss": 0.8372,
      "step": 407540
    },
    {
      "epoch": 1.428361143530055,
      "grad_norm": 3.046875,
      "learning_rate": 2.910452217211721e-05,
      "loss": 0.8449,
      "step": 407550
    },
    {
      "epoch": 1.4283961910369505,
      "grad_norm": 2.671875,
      "learning_rate": 2.9103873143453507e-05,
      "loss": 0.8741,
      "step": 407560
    },
    {
      "epoch": 1.4284312385438462,
      "grad_norm": 2.609375,
      "learning_rate": 2.9103224114789805e-05,
      "loss": 0.8356,
      "step": 407570
    },
    {
      "epoch": 1.4284662860507418,
      "grad_norm": 2.875,
      "learning_rate": 2.9102575086126103e-05,
      "loss": 0.778,
      "step": 407580
    },
    {
      "epoch": 1.4285013335576373,
      "grad_norm": 3.375,
      "learning_rate": 2.9101926057462405e-05,
      "loss": 0.8352,
      "step": 407590
    },
    {
      "epoch": 1.428536381064533,
      "grad_norm": 3.109375,
      "learning_rate": 2.9101277028798703e-05,
      "loss": 0.9107,
      "step": 407600
    },
    {
      "epoch": 1.4285714285714286,
      "grad_norm": 2.75,
      "learning_rate": 2.9100628000135e-05,
      "loss": 0.8264,
      "step": 407610
    },
    {
      "epoch": 1.4286064760783241,
      "grad_norm": 3.03125,
      "learning_rate": 2.90999789714713e-05,
      "loss": 0.8487,
      "step": 407620
    },
    {
      "epoch": 1.42864152358522,
      "grad_norm": 2.984375,
      "learning_rate": 2.9099329942807597e-05,
      "loss": 0.8822,
      "step": 407630
    },
    {
      "epoch": 1.4286765710921154,
      "grad_norm": 3.109375,
      "learning_rate": 2.9098680914143895e-05,
      "loss": 0.9497,
      "step": 407640
    },
    {
      "epoch": 1.428711618599011,
      "grad_norm": 2.359375,
      "learning_rate": 2.9098031885480193e-05,
      "loss": 0.9308,
      "step": 407650
    },
    {
      "epoch": 1.4287466661059065,
      "grad_norm": 3.078125,
      "learning_rate": 2.909738285681649e-05,
      "loss": 0.8697,
      "step": 407660
    },
    {
      "epoch": 1.428781713612802,
      "grad_norm": 3.296875,
      "learning_rate": 2.909673382815279e-05,
      "loss": 0.8227,
      "step": 407670
    },
    {
      "epoch": 1.4288167611196978,
      "grad_norm": 2.765625,
      "learning_rate": 2.9096084799489087e-05,
      "loss": 0.8539,
      "step": 407680
    },
    {
      "epoch": 1.4288518086265933,
      "grad_norm": 3.15625,
      "learning_rate": 2.9095435770825385e-05,
      "loss": 0.8699,
      "step": 407690
    },
    {
      "epoch": 1.4288868561334889,
      "grad_norm": 3.3125,
      "learning_rate": 2.9094786742161683e-05,
      "loss": 0.9231,
      "step": 407700
    },
    {
      "epoch": 1.4289219036403846,
      "grad_norm": 3.03125,
      "learning_rate": 2.909413771349798e-05,
      "loss": 0.8333,
      "step": 407710
    },
    {
      "epoch": 1.4289569511472802,
      "grad_norm": 2.953125,
      "learning_rate": 2.909348868483428e-05,
      "loss": 0.8223,
      "step": 407720
    },
    {
      "epoch": 1.4289919986541757,
      "grad_norm": 2.859375,
      "learning_rate": 2.909283965617058e-05,
      "loss": 0.7346,
      "step": 407730
    },
    {
      "epoch": 1.4290270461610715,
      "grad_norm": 2.40625,
      "learning_rate": 2.9092190627506878e-05,
      "loss": 0.7873,
      "step": 407740
    },
    {
      "epoch": 1.429062093667967,
      "grad_norm": 2.90625,
      "learning_rate": 2.9091541598843176e-05,
      "loss": 0.8201,
      "step": 407750
    },
    {
      "epoch": 1.4290971411748625,
      "grad_norm": 3.328125,
      "learning_rate": 2.9090892570179474e-05,
      "loss": 0.8611,
      "step": 407760
    },
    {
      "epoch": 1.429132188681758,
      "grad_norm": 3.5625,
      "learning_rate": 2.9090243541515772e-05,
      "loss": 0.8079,
      "step": 407770
    },
    {
      "epoch": 1.4291672361886536,
      "grad_norm": 2.71875,
      "learning_rate": 2.908959451285207e-05,
      "loss": 0.9776,
      "step": 407780
    },
    {
      "epoch": 1.4292022836955494,
      "grad_norm": 2.703125,
      "learning_rate": 2.908894548418836e-05,
      "loss": 0.862,
      "step": 407790
    },
    {
      "epoch": 1.429237331202445,
      "grad_norm": 2.71875,
      "learning_rate": 2.908829645552466e-05,
      "loss": 0.8051,
      "step": 407800
    },
    {
      "epoch": 1.4292723787093404,
      "grad_norm": 2.65625,
      "learning_rate": 2.908764742686096e-05,
      "loss": 0.8421,
      "step": 407810
    },
    {
      "epoch": 1.4293074262162362,
      "grad_norm": 3.40625,
      "learning_rate": 2.908699839819726e-05,
      "loss": 0.8308,
      "step": 407820
    },
    {
      "epoch": 1.4293424737231317,
      "grad_norm": 2.828125,
      "learning_rate": 2.9086349369533557e-05,
      "loss": 0.79,
      "step": 407830
    },
    {
      "epoch": 1.4293775212300273,
      "grad_norm": 3.140625,
      "learning_rate": 2.9085700340869855e-05,
      "loss": 0.7785,
      "step": 407840
    },
    {
      "epoch": 1.429412568736923,
      "grad_norm": 2.71875,
      "learning_rate": 2.9085051312206153e-05,
      "loss": 0.8602,
      "step": 407850
    },
    {
      "epoch": 1.4294476162438186,
      "grad_norm": 3.109375,
      "learning_rate": 2.908440228354245e-05,
      "loss": 0.9054,
      "step": 407860
    },
    {
      "epoch": 1.429482663750714,
      "grad_norm": 2.90625,
      "learning_rate": 2.908375325487875e-05,
      "loss": 0.8258,
      "step": 407870
    },
    {
      "epoch": 1.4295177112576096,
      "grad_norm": 3.125,
      "learning_rate": 2.9083104226215047e-05,
      "loss": 0.8611,
      "step": 407880
    },
    {
      "epoch": 1.4295527587645052,
      "grad_norm": 3.25,
      "learning_rate": 2.9082455197551345e-05,
      "loss": 0.8394,
      "step": 407890
    },
    {
      "epoch": 1.429587806271401,
      "grad_norm": 2.890625,
      "learning_rate": 2.9081806168887643e-05,
      "loss": 0.7627,
      "step": 407900
    },
    {
      "epoch": 1.4296228537782965,
      "grad_norm": 3.265625,
      "learning_rate": 2.908115714022394e-05,
      "loss": 0.8823,
      "step": 407910
    },
    {
      "epoch": 1.429657901285192,
      "grad_norm": 3.140625,
      "learning_rate": 2.908050811156024e-05,
      "loss": 0.7795,
      "step": 407920
    },
    {
      "epoch": 1.4296929487920877,
      "grad_norm": 2.5625,
      "learning_rate": 2.9079859082896537e-05,
      "loss": 0.7757,
      "step": 407930
    },
    {
      "epoch": 1.4297279962989833,
      "grad_norm": 3.0625,
      "learning_rate": 2.9079210054232835e-05,
      "loss": 0.9417,
      "step": 407940
    },
    {
      "epoch": 1.4297630438058788,
      "grad_norm": 2.953125,
      "learning_rate": 2.9078561025569136e-05,
      "loss": 0.852,
      "step": 407950
    },
    {
      "epoch": 1.4297980913127746,
      "grad_norm": 2.796875,
      "learning_rate": 2.9077911996905434e-05,
      "loss": 0.8788,
      "step": 407960
    },
    {
      "epoch": 1.4298331388196701,
      "grad_norm": 3.09375,
      "learning_rate": 2.9077262968241732e-05,
      "loss": 0.8922,
      "step": 407970
    },
    {
      "epoch": 1.4298681863265656,
      "grad_norm": 2.75,
      "learning_rate": 2.907661393957803e-05,
      "loss": 0.9499,
      "step": 407980
    },
    {
      "epoch": 1.4299032338334612,
      "grad_norm": 3.0,
      "learning_rate": 2.9075964910914328e-05,
      "loss": 0.9061,
      "step": 407990
    },
    {
      "epoch": 1.4299382813403567,
      "grad_norm": 2.75,
      "learning_rate": 2.9075315882250626e-05,
      "loss": 0.8565,
      "step": 408000
    },
    {
      "epoch": 1.4299733288472525,
      "grad_norm": 2.671875,
      "learning_rate": 2.9074666853586924e-05,
      "loss": 0.8656,
      "step": 408010
    },
    {
      "epoch": 1.430008376354148,
      "grad_norm": 3.15625,
      "learning_rate": 2.9074017824923222e-05,
      "loss": 0.8916,
      "step": 408020
    },
    {
      "epoch": 1.4300434238610436,
      "grad_norm": 2.53125,
      "learning_rate": 2.907336879625952e-05,
      "loss": 0.82,
      "step": 408030
    },
    {
      "epoch": 1.4300784713679393,
      "grad_norm": 2.75,
      "learning_rate": 2.9072719767595818e-05,
      "loss": 0.8896,
      "step": 408040
    },
    {
      "epoch": 1.4301135188748348,
      "grad_norm": 3.296875,
      "learning_rate": 2.9072070738932116e-05,
      "loss": 0.8963,
      "step": 408050
    },
    {
      "epoch": 1.4301485663817304,
      "grad_norm": 2.65625,
      "learning_rate": 2.9071421710268414e-05,
      "loss": 0.7974,
      "step": 408060
    },
    {
      "epoch": 1.4301836138886261,
      "grad_norm": 2.78125,
      "learning_rate": 2.9070772681604712e-05,
      "loss": 0.8201,
      "step": 408070
    },
    {
      "epoch": 1.4302186613955217,
      "grad_norm": 2.828125,
      "learning_rate": 2.907012365294101e-05,
      "loss": 0.8835,
      "step": 408080
    },
    {
      "epoch": 1.4302537089024172,
      "grad_norm": 2.9375,
      "learning_rate": 2.906947462427731e-05,
      "loss": 0.8304,
      "step": 408090
    },
    {
      "epoch": 1.4302887564093127,
      "grad_norm": 2.953125,
      "learning_rate": 2.906882559561361e-05,
      "loss": 0.7953,
      "step": 408100
    },
    {
      "epoch": 1.4303238039162085,
      "grad_norm": 2.859375,
      "learning_rate": 2.9068176566949907e-05,
      "loss": 0.8066,
      "step": 408110
    },
    {
      "epoch": 1.430358851423104,
      "grad_norm": 2.96875,
      "learning_rate": 2.9067527538286205e-05,
      "loss": 1.0073,
      "step": 408120
    },
    {
      "epoch": 1.4303938989299996,
      "grad_norm": 2.453125,
      "learning_rate": 2.9066878509622503e-05,
      "loss": 0.8392,
      "step": 408130
    },
    {
      "epoch": 1.430428946436895,
      "grad_norm": 2.75,
      "learning_rate": 2.90662294809588e-05,
      "loss": 0.7923,
      "step": 408140
    },
    {
      "epoch": 1.4304639939437909,
      "grad_norm": 3.46875,
      "learning_rate": 2.90655804522951e-05,
      "loss": 0.8691,
      "step": 408150
    },
    {
      "epoch": 1.4304990414506864,
      "grad_norm": 2.421875,
      "learning_rate": 2.906493142363139e-05,
      "loss": 0.8354,
      "step": 408160
    },
    {
      "epoch": 1.430534088957582,
      "grad_norm": 3.046875,
      "learning_rate": 2.906428239496769e-05,
      "loss": 0.8484,
      "step": 408170
    },
    {
      "epoch": 1.4305691364644777,
      "grad_norm": 2.671875,
      "learning_rate": 2.906363336630399e-05,
      "loss": 0.8063,
      "step": 408180
    },
    {
      "epoch": 1.4306041839713732,
      "grad_norm": 2.828125,
      "learning_rate": 2.9062984337640288e-05,
      "loss": 0.8593,
      "step": 408190
    },
    {
      "epoch": 1.4306392314782688,
      "grad_norm": 2.921875,
      "learning_rate": 2.9062335308976586e-05,
      "loss": 0.8868,
      "step": 408200
    },
    {
      "epoch": 1.4306742789851645,
      "grad_norm": 2.90625,
      "learning_rate": 2.9061686280312884e-05,
      "loss": 0.8917,
      "step": 408210
    },
    {
      "epoch": 1.43070932649206,
      "grad_norm": 2.9375,
      "learning_rate": 2.9061037251649182e-05,
      "loss": 0.935,
      "step": 408220
    },
    {
      "epoch": 1.4307443739989556,
      "grad_norm": 2.8125,
      "learning_rate": 2.906038822298548e-05,
      "loss": 0.859,
      "step": 408230
    },
    {
      "epoch": 1.4307794215058511,
      "grad_norm": 2.953125,
      "learning_rate": 2.9059739194321778e-05,
      "loss": 0.7422,
      "step": 408240
    },
    {
      "epoch": 1.4308144690127467,
      "grad_norm": 2.890625,
      "learning_rate": 2.9059090165658076e-05,
      "loss": 0.8987,
      "step": 408250
    },
    {
      "epoch": 1.4308495165196424,
      "grad_norm": 3.15625,
      "learning_rate": 2.9058441136994374e-05,
      "loss": 0.8119,
      "step": 408260
    },
    {
      "epoch": 1.430884564026538,
      "grad_norm": 3.375,
      "learning_rate": 2.9057792108330672e-05,
      "loss": 0.8391,
      "step": 408270
    },
    {
      "epoch": 1.4309196115334335,
      "grad_norm": 2.828125,
      "learning_rate": 2.905714307966697e-05,
      "loss": 0.8189,
      "step": 408280
    },
    {
      "epoch": 1.4309546590403293,
      "grad_norm": 2.6875,
      "learning_rate": 2.9056494051003268e-05,
      "loss": 0.7714,
      "step": 408290
    },
    {
      "epoch": 1.4309897065472248,
      "grad_norm": 3.15625,
      "learning_rate": 2.9055845022339566e-05,
      "loss": 0.8432,
      "step": 408300
    },
    {
      "epoch": 1.4310247540541203,
      "grad_norm": 2.609375,
      "learning_rate": 2.9055195993675864e-05,
      "loss": 0.791,
      "step": 408310
    },
    {
      "epoch": 1.431059801561016,
      "grad_norm": 2.609375,
      "learning_rate": 2.9054546965012165e-05,
      "loss": 0.8471,
      "step": 408320
    },
    {
      "epoch": 1.4310948490679116,
      "grad_norm": 2.96875,
      "learning_rate": 2.9053897936348463e-05,
      "loss": 0.8649,
      "step": 408330
    },
    {
      "epoch": 1.4311298965748072,
      "grad_norm": 2.421875,
      "learning_rate": 2.905324890768476e-05,
      "loss": 0.8165,
      "step": 408340
    },
    {
      "epoch": 1.4311649440817027,
      "grad_norm": 2.828125,
      "learning_rate": 2.905259987902106e-05,
      "loss": 0.8319,
      "step": 408350
    },
    {
      "epoch": 1.4311999915885982,
      "grad_norm": 2.484375,
      "learning_rate": 2.9051950850357357e-05,
      "loss": 0.9155,
      "step": 408360
    },
    {
      "epoch": 1.431235039095494,
      "grad_norm": 2.75,
      "learning_rate": 2.9051301821693655e-05,
      "loss": 0.7797,
      "step": 408370
    },
    {
      "epoch": 1.4312700866023895,
      "grad_norm": 2.671875,
      "learning_rate": 2.9050652793029953e-05,
      "loss": 0.8559,
      "step": 408380
    },
    {
      "epoch": 1.431305134109285,
      "grad_norm": 2.734375,
      "learning_rate": 2.905000376436625e-05,
      "loss": 0.8295,
      "step": 408390
    },
    {
      "epoch": 1.4313401816161808,
      "grad_norm": 3.109375,
      "learning_rate": 2.904935473570255e-05,
      "loss": 0.9073,
      "step": 408400
    },
    {
      "epoch": 1.4313752291230764,
      "grad_norm": 3.03125,
      "learning_rate": 2.9048705707038847e-05,
      "loss": 0.8054,
      "step": 408410
    },
    {
      "epoch": 1.431410276629972,
      "grad_norm": 3.109375,
      "learning_rate": 2.9048056678375145e-05,
      "loss": 0.8584,
      "step": 408420
    },
    {
      "epoch": 1.4314453241368676,
      "grad_norm": 4.34375,
      "learning_rate": 2.9047407649711443e-05,
      "loss": 0.943,
      "step": 408430
    },
    {
      "epoch": 1.4314803716437632,
      "grad_norm": 3.0,
      "learning_rate": 2.904675862104774e-05,
      "loss": 0.9029,
      "step": 408440
    },
    {
      "epoch": 1.4315154191506587,
      "grad_norm": 2.65625,
      "learning_rate": 2.904610959238404e-05,
      "loss": 0.8995,
      "step": 408450
    },
    {
      "epoch": 1.4315504666575543,
      "grad_norm": 3.46875,
      "learning_rate": 2.904546056372034e-05,
      "loss": 0.8971,
      "step": 408460
    },
    {
      "epoch": 1.4315855141644498,
      "grad_norm": 2.875,
      "learning_rate": 2.904481153505664e-05,
      "loss": 0.8071,
      "step": 408470
    },
    {
      "epoch": 1.4316205616713455,
      "grad_norm": 2.265625,
      "learning_rate": 2.9044162506392937e-05,
      "loss": 0.8057,
      "step": 408480
    },
    {
      "epoch": 1.431655609178241,
      "grad_norm": 2.84375,
      "learning_rate": 2.9043513477729235e-05,
      "loss": 0.811,
      "step": 408490
    },
    {
      "epoch": 1.4316906566851366,
      "grad_norm": 2.703125,
      "learning_rate": 2.9042864449065533e-05,
      "loss": 0.8508,
      "step": 408500
    },
    {
      "epoch": 1.4317257041920324,
      "grad_norm": 2.9375,
      "learning_rate": 2.904221542040183e-05,
      "loss": 0.7516,
      "step": 408510
    },
    {
      "epoch": 1.431760751698928,
      "grad_norm": 3.1875,
      "learning_rate": 2.904156639173813e-05,
      "loss": 0.8567,
      "step": 408520
    },
    {
      "epoch": 1.4317957992058234,
      "grad_norm": 3.296875,
      "learning_rate": 2.9040917363074427e-05,
      "loss": 0.8638,
      "step": 408530
    },
    {
      "epoch": 1.4318308467127192,
      "grad_norm": 2.609375,
      "learning_rate": 2.9040268334410718e-05,
      "loss": 0.8368,
      "step": 408540
    },
    {
      "epoch": 1.4318658942196147,
      "grad_norm": 3.125,
      "learning_rate": 2.903961930574702e-05,
      "loss": 0.916,
      "step": 408550
    },
    {
      "epoch": 1.4319009417265103,
      "grad_norm": 1.953125,
      "learning_rate": 2.9038970277083317e-05,
      "loss": 0.7942,
      "step": 408560
    },
    {
      "epoch": 1.4319359892334058,
      "grad_norm": 2.4375,
      "learning_rate": 2.9038321248419615e-05,
      "loss": 0.7302,
      "step": 408570
    },
    {
      "epoch": 1.4319710367403014,
      "grad_norm": 2.796875,
      "learning_rate": 2.9037672219755913e-05,
      "loss": 0.8803,
      "step": 408580
    },
    {
      "epoch": 1.432006084247197,
      "grad_norm": 2.765625,
      "learning_rate": 2.903702319109221e-05,
      "loss": 0.8234,
      "step": 408590
    },
    {
      "epoch": 1.4320411317540926,
      "grad_norm": 2.796875,
      "learning_rate": 2.903637416242851e-05,
      "loss": 0.9269,
      "step": 408600
    },
    {
      "epoch": 1.4320761792609882,
      "grad_norm": 2.75,
      "learning_rate": 2.9035725133764807e-05,
      "loss": 0.838,
      "step": 408610
    },
    {
      "epoch": 1.432111226767884,
      "grad_norm": 2.890625,
      "learning_rate": 2.9035076105101105e-05,
      "loss": 0.7926,
      "step": 408620
    },
    {
      "epoch": 1.4321462742747795,
      "grad_norm": 3.078125,
      "learning_rate": 2.9034427076437403e-05,
      "loss": 0.806,
      "step": 408630
    },
    {
      "epoch": 1.432181321781675,
      "grad_norm": 2.90625,
      "learning_rate": 2.90337780477737e-05,
      "loss": 0.8876,
      "step": 408640
    },
    {
      "epoch": 1.4322163692885708,
      "grad_norm": 3.1875,
      "learning_rate": 2.903312901911e-05,
      "loss": 0.8605,
      "step": 408650
    },
    {
      "epoch": 1.4322514167954663,
      "grad_norm": 2.5625,
      "learning_rate": 2.9032479990446297e-05,
      "loss": 0.9294,
      "step": 408660
    },
    {
      "epoch": 1.4322864643023618,
      "grad_norm": 3.203125,
      "learning_rate": 2.9031830961782595e-05,
      "loss": 0.8701,
      "step": 408670
    },
    {
      "epoch": 1.4323215118092574,
      "grad_norm": 3.625,
      "learning_rate": 2.9031181933118893e-05,
      "loss": 0.8592,
      "step": 408680
    },
    {
      "epoch": 1.432356559316153,
      "grad_norm": 3.09375,
      "learning_rate": 2.9030532904455195e-05,
      "loss": 0.8173,
      "step": 408690
    },
    {
      "epoch": 1.4323916068230487,
      "grad_norm": 2.78125,
      "learning_rate": 2.9029883875791493e-05,
      "loss": 0.7923,
      "step": 408700
    },
    {
      "epoch": 1.4324266543299442,
      "grad_norm": 2.78125,
      "learning_rate": 2.902923484712779e-05,
      "loss": 0.8935,
      "step": 408710
    },
    {
      "epoch": 1.4324617018368397,
      "grad_norm": 2.484375,
      "learning_rate": 2.902858581846409e-05,
      "loss": 0.8141,
      "step": 408720
    },
    {
      "epoch": 1.4324967493437355,
      "grad_norm": 17.75,
      "learning_rate": 2.9027936789800387e-05,
      "loss": 0.8159,
      "step": 408730
    },
    {
      "epoch": 1.432531796850631,
      "grad_norm": 2.5625,
      "learning_rate": 2.9027287761136685e-05,
      "loss": 0.7816,
      "step": 408740
    },
    {
      "epoch": 1.4325668443575266,
      "grad_norm": 2.65625,
      "learning_rate": 2.9026638732472983e-05,
      "loss": 0.8057,
      "step": 408750
    },
    {
      "epoch": 1.4326018918644223,
      "grad_norm": 2.71875,
      "learning_rate": 2.902598970380928e-05,
      "loss": 0.7959,
      "step": 408760
    },
    {
      "epoch": 1.4326369393713179,
      "grad_norm": 2.703125,
      "learning_rate": 2.902534067514558e-05,
      "loss": 0.8534,
      "step": 408770
    },
    {
      "epoch": 1.4326719868782134,
      "grad_norm": 2.734375,
      "learning_rate": 2.9024691646481877e-05,
      "loss": 0.7812,
      "step": 408780
    },
    {
      "epoch": 1.432707034385109,
      "grad_norm": 3.296875,
      "learning_rate": 2.9024042617818175e-05,
      "loss": 0.8048,
      "step": 408790
    },
    {
      "epoch": 1.4327420818920047,
      "grad_norm": 2.921875,
      "learning_rate": 2.9023393589154473e-05,
      "loss": 0.8316,
      "step": 408800
    },
    {
      "epoch": 1.4327771293989002,
      "grad_norm": 2.9375,
      "learning_rate": 2.902274456049077e-05,
      "loss": 0.8421,
      "step": 408810
    },
    {
      "epoch": 1.4328121769057958,
      "grad_norm": 2.453125,
      "learning_rate": 2.9022095531827072e-05,
      "loss": 0.7543,
      "step": 408820
    },
    {
      "epoch": 1.4328472244126913,
      "grad_norm": 3.015625,
      "learning_rate": 2.902144650316337e-05,
      "loss": 0.8756,
      "step": 408830
    },
    {
      "epoch": 1.432882271919587,
      "grad_norm": 2.453125,
      "learning_rate": 2.9020797474499668e-05,
      "loss": 0.768,
      "step": 408840
    },
    {
      "epoch": 1.4329173194264826,
      "grad_norm": 3.078125,
      "learning_rate": 2.9020148445835966e-05,
      "loss": 0.8327,
      "step": 408850
    },
    {
      "epoch": 1.4329523669333781,
      "grad_norm": 2.53125,
      "learning_rate": 2.9019499417172264e-05,
      "loss": 0.8427,
      "step": 408860
    },
    {
      "epoch": 1.4329874144402739,
      "grad_norm": 2.828125,
      "learning_rate": 2.9018850388508562e-05,
      "loss": 0.7894,
      "step": 408870
    },
    {
      "epoch": 1.4330224619471694,
      "grad_norm": 2.5625,
      "learning_rate": 2.901820135984486e-05,
      "loss": 0.8409,
      "step": 408880
    },
    {
      "epoch": 1.433057509454065,
      "grad_norm": 2.921875,
      "learning_rate": 2.9017552331181158e-05,
      "loss": 0.8068,
      "step": 408890
    },
    {
      "epoch": 1.4330925569609607,
      "grad_norm": 3.09375,
      "learning_rate": 2.9016903302517456e-05,
      "loss": 0.8341,
      "step": 408900
    },
    {
      "epoch": 1.4331276044678563,
      "grad_norm": 2.640625,
      "learning_rate": 2.9016254273853754e-05,
      "loss": 0.8069,
      "step": 408910
    },
    {
      "epoch": 1.4331626519747518,
      "grad_norm": 3.140625,
      "learning_rate": 2.901560524519005e-05,
      "loss": 0.8373,
      "step": 408920
    },
    {
      "epoch": 1.4331976994816473,
      "grad_norm": 3.203125,
      "learning_rate": 2.9014956216526347e-05,
      "loss": 0.885,
      "step": 408930
    },
    {
      "epoch": 1.4332327469885429,
      "grad_norm": 2.78125,
      "learning_rate": 2.9014307187862645e-05,
      "loss": 0.7841,
      "step": 408940
    },
    {
      "epoch": 1.4332677944954386,
      "grad_norm": 2.75,
      "learning_rate": 2.9013658159198943e-05,
      "loss": 0.812,
      "step": 408950
    },
    {
      "epoch": 1.4333028420023342,
      "grad_norm": 2.84375,
      "learning_rate": 2.901300913053524e-05,
      "loss": 0.8827,
      "step": 408960
    },
    {
      "epoch": 1.4333378895092297,
      "grad_norm": 6.21875,
      "learning_rate": 2.901236010187154e-05,
      "loss": 0.874,
      "step": 408970
    },
    {
      "epoch": 1.4333729370161254,
      "grad_norm": 3.28125,
      "learning_rate": 2.9011711073207837e-05,
      "loss": 0.9187,
      "step": 408980
    },
    {
      "epoch": 1.433407984523021,
      "grad_norm": 2.859375,
      "learning_rate": 2.9011062044544135e-05,
      "loss": 0.8107,
      "step": 408990
    },
    {
      "epoch": 1.4334430320299165,
      "grad_norm": 2.8125,
      "learning_rate": 2.9010413015880433e-05,
      "loss": 0.8633,
      "step": 409000
    },
    {
      "epoch": 1.4334780795368123,
      "grad_norm": 2.859375,
      "learning_rate": 2.900976398721673e-05,
      "loss": 0.8287,
      "step": 409010
    },
    {
      "epoch": 1.4335131270437078,
      "grad_norm": 2.796875,
      "learning_rate": 2.900911495855303e-05,
      "loss": 0.8591,
      "step": 409020
    },
    {
      "epoch": 1.4335481745506033,
      "grad_norm": 2.8125,
      "learning_rate": 2.9008465929889327e-05,
      "loss": 0.8461,
      "step": 409030
    },
    {
      "epoch": 1.4335832220574989,
      "grad_norm": 2.421875,
      "learning_rate": 2.9007816901225625e-05,
      "loss": 0.8636,
      "step": 409040
    },
    {
      "epoch": 1.4336182695643944,
      "grad_norm": 2.609375,
      "learning_rate": 2.9007167872561926e-05,
      "loss": 0.7631,
      "step": 409050
    },
    {
      "epoch": 1.4336533170712902,
      "grad_norm": 3.015625,
      "learning_rate": 2.9006518843898224e-05,
      "loss": 0.7939,
      "step": 409060
    },
    {
      "epoch": 1.4336883645781857,
      "grad_norm": 3.015625,
      "learning_rate": 2.9005869815234522e-05,
      "loss": 0.7982,
      "step": 409070
    },
    {
      "epoch": 1.4337234120850813,
      "grad_norm": 2.96875,
      "learning_rate": 2.900522078657082e-05,
      "loss": 0.7964,
      "step": 409080
    },
    {
      "epoch": 1.433758459591977,
      "grad_norm": 3.375,
      "learning_rate": 2.9004571757907118e-05,
      "loss": 0.87,
      "step": 409090
    },
    {
      "epoch": 1.4337935070988725,
      "grad_norm": 2.96875,
      "learning_rate": 2.9003922729243416e-05,
      "loss": 0.8751,
      "step": 409100
    },
    {
      "epoch": 1.433828554605768,
      "grad_norm": 2.9375,
      "learning_rate": 2.9003273700579714e-05,
      "loss": 0.7957,
      "step": 409110
    },
    {
      "epoch": 1.4338636021126638,
      "grad_norm": 2.984375,
      "learning_rate": 2.9002624671916012e-05,
      "loss": 0.9048,
      "step": 409120
    },
    {
      "epoch": 1.4338986496195594,
      "grad_norm": 3.0625,
      "learning_rate": 2.900197564325231e-05,
      "loss": 0.8174,
      "step": 409130
    },
    {
      "epoch": 1.433933697126455,
      "grad_norm": 2.65625,
      "learning_rate": 2.9001326614588608e-05,
      "loss": 0.7984,
      "step": 409140
    },
    {
      "epoch": 1.4339687446333504,
      "grad_norm": 3.03125,
      "learning_rate": 2.9000677585924906e-05,
      "loss": 0.8464,
      "step": 409150
    },
    {
      "epoch": 1.434003792140246,
      "grad_norm": 2.96875,
      "learning_rate": 2.9000028557261204e-05,
      "loss": 0.8396,
      "step": 409160
    },
    {
      "epoch": 1.4340388396471417,
      "grad_norm": 3.09375,
      "learning_rate": 2.8999379528597502e-05,
      "loss": 0.7775,
      "step": 409170
    },
    {
      "epoch": 1.4340738871540373,
      "grad_norm": 3.09375,
      "learning_rate": 2.89987304999338e-05,
      "loss": 0.9197,
      "step": 409180
    },
    {
      "epoch": 1.4341089346609328,
      "grad_norm": 3.390625,
      "learning_rate": 2.89980814712701e-05,
      "loss": 0.9403,
      "step": 409190
    },
    {
      "epoch": 1.4341439821678286,
      "grad_norm": 3.234375,
      "learning_rate": 2.89974324426064e-05,
      "loss": 0.8714,
      "step": 409200
    },
    {
      "epoch": 1.434179029674724,
      "grad_norm": 3.109375,
      "learning_rate": 2.8996783413942697e-05,
      "loss": 0.9211,
      "step": 409210
    },
    {
      "epoch": 1.4342140771816196,
      "grad_norm": 2.875,
      "learning_rate": 2.8996134385278995e-05,
      "loss": 0.8551,
      "step": 409220
    },
    {
      "epoch": 1.4342491246885154,
      "grad_norm": 2.625,
      "learning_rate": 2.8995485356615293e-05,
      "loss": 0.7917,
      "step": 409230
    },
    {
      "epoch": 1.434284172195411,
      "grad_norm": 2.828125,
      "learning_rate": 2.899483632795159e-05,
      "loss": 0.7947,
      "step": 409240
    },
    {
      "epoch": 1.4343192197023065,
      "grad_norm": 2.546875,
      "learning_rate": 2.899418729928789e-05,
      "loss": 0.8178,
      "step": 409250
    },
    {
      "epoch": 1.434354267209202,
      "grad_norm": 3.203125,
      "learning_rate": 2.8993538270624187e-05,
      "loss": 0.8516,
      "step": 409260
    },
    {
      "epoch": 1.4343893147160975,
      "grad_norm": 2.6875,
      "learning_rate": 2.8992889241960485e-05,
      "loss": 0.8329,
      "step": 409270
    },
    {
      "epoch": 1.4344243622229933,
      "grad_norm": 2.9375,
      "learning_rate": 2.8992240213296783e-05,
      "loss": 0.8583,
      "step": 409280
    },
    {
      "epoch": 1.4344594097298888,
      "grad_norm": 2.828125,
      "learning_rate": 2.8991591184633078e-05,
      "loss": 0.8512,
      "step": 409290
    },
    {
      "epoch": 1.4344944572367844,
      "grad_norm": 3.0625,
      "learning_rate": 2.8990942155969376e-05,
      "loss": 0.8322,
      "step": 409300
    },
    {
      "epoch": 1.4345295047436801,
      "grad_norm": 2.9375,
      "learning_rate": 2.8990293127305674e-05,
      "loss": 0.7961,
      "step": 409310
    },
    {
      "epoch": 1.4345645522505757,
      "grad_norm": 2.125,
      "learning_rate": 2.8989644098641972e-05,
      "loss": 0.794,
      "step": 409320
    },
    {
      "epoch": 1.4345995997574712,
      "grad_norm": 2.8125,
      "learning_rate": 2.898899506997827e-05,
      "loss": 0.7887,
      "step": 409330
    },
    {
      "epoch": 1.434634647264367,
      "grad_norm": 2.703125,
      "learning_rate": 2.8988346041314568e-05,
      "loss": 0.792,
      "step": 409340
    },
    {
      "epoch": 1.4346696947712625,
      "grad_norm": 3.015625,
      "learning_rate": 2.8987697012650866e-05,
      "loss": 0.8095,
      "step": 409350
    },
    {
      "epoch": 1.434704742278158,
      "grad_norm": 2.78125,
      "learning_rate": 2.8987047983987164e-05,
      "loss": 0.8016,
      "step": 409360
    },
    {
      "epoch": 1.4347397897850536,
      "grad_norm": 2.875,
      "learning_rate": 2.8986398955323462e-05,
      "loss": 0.8539,
      "step": 409370
    },
    {
      "epoch": 1.434774837291949,
      "grad_norm": 2.625,
      "learning_rate": 2.898574992665976e-05,
      "loss": 0.7907,
      "step": 409380
    },
    {
      "epoch": 1.4348098847988449,
      "grad_norm": 3.484375,
      "learning_rate": 2.8985100897996058e-05,
      "loss": 0.9637,
      "step": 409390
    },
    {
      "epoch": 1.4348449323057404,
      "grad_norm": 2.578125,
      "learning_rate": 2.8984451869332356e-05,
      "loss": 0.7633,
      "step": 409400
    },
    {
      "epoch": 1.434879979812636,
      "grad_norm": 2.96875,
      "learning_rate": 2.8983802840668654e-05,
      "loss": 0.8192,
      "step": 409410
    },
    {
      "epoch": 1.4349150273195317,
      "grad_norm": 3.1875,
      "learning_rate": 2.8983153812004955e-05,
      "loss": 0.8727,
      "step": 409420
    },
    {
      "epoch": 1.4349500748264272,
      "grad_norm": 3.125,
      "learning_rate": 2.8982504783341253e-05,
      "loss": 0.9021,
      "step": 409430
    },
    {
      "epoch": 1.4349851223333228,
      "grad_norm": 3.3125,
      "learning_rate": 2.898185575467755e-05,
      "loss": 0.8432,
      "step": 409440
    },
    {
      "epoch": 1.4350201698402185,
      "grad_norm": 2.6875,
      "learning_rate": 2.898120672601385e-05,
      "loss": 0.7875,
      "step": 409450
    },
    {
      "epoch": 1.435055217347114,
      "grad_norm": 3.171875,
      "learning_rate": 2.8980557697350147e-05,
      "loss": 0.8564,
      "step": 409460
    },
    {
      "epoch": 1.4350902648540096,
      "grad_norm": 2.734375,
      "learning_rate": 2.8979908668686445e-05,
      "loss": 0.8615,
      "step": 409470
    },
    {
      "epoch": 1.4351253123609051,
      "grad_norm": 2.828125,
      "learning_rate": 2.8979259640022743e-05,
      "loss": 0.8866,
      "step": 409480
    },
    {
      "epoch": 1.4351603598678009,
      "grad_norm": 2.46875,
      "learning_rate": 2.897861061135904e-05,
      "loss": 0.7903,
      "step": 409490
    },
    {
      "epoch": 1.4351954073746964,
      "grad_norm": 2.484375,
      "learning_rate": 2.897796158269534e-05,
      "loss": 0.9152,
      "step": 409500
    },
    {
      "epoch": 1.435230454881592,
      "grad_norm": 3.15625,
      "learning_rate": 2.8977312554031637e-05,
      "loss": 0.8559,
      "step": 409510
    },
    {
      "epoch": 1.4352655023884875,
      "grad_norm": 2.984375,
      "learning_rate": 2.8976663525367935e-05,
      "loss": 0.8215,
      "step": 409520
    },
    {
      "epoch": 1.4353005498953832,
      "grad_norm": 3.0,
      "learning_rate": 2.8976014496704233e-05,
      "loss": 0.8162,
      "step": 409530
    },
    {
      "epoch": 1.4353355974022788,
      "grad_norm": 2.8125,
      "learning_rate": 2.897536546804053e-05,
      "loss": 0.8628,
      "step": 409540
    },
    {
      "epoch": 1.4353706449091743,
      "grad_norm": 3.15625,
      "learning_rate": 2.897471643937683e-05,
      "loss": 0.8579,
      "step": 409550
    },
    {
      "epoch": 1.43540569241607,
      "grad_norm": 2.28125,
      "learning_rate": 2.897406741071313e-05,
      "loss": 0.8238,
      "step": 409560
    },
    {
      "epoch": 1.4354407399229656,
      "grad_norm": 2.9375,
      "learning_rate": 2.897341838204943e-05,
      "loss": 0.7587,
      "step": 409570
    },
    {
      "epoch": 1.4354757874298611,
      "grad_norm": 2.640625,
      "learning_rate": 2.8972769353385727e-05,
      "loss": 0.8462,
      "step": 409580
    },
    {
      "epoch": 1.435510834936757,
      "grad_norm": 2.875,
      "learning_rate": 2.8972120324722025e-05,
      "loss": 0.7951,
      "step": 409590
    },
    {
      "epoch": 1.4355458824436524,
      "grad_norm": 3.015625,
      "learning_rate": 2.8971471296058323e-05,
      "loss": 0.8361,
      "step": 409600
    },
    {
      "epoch": 1.435580929950548,
      "grad_norm": 2.8125,
      "learning_rate": 2.897082226739462e-05,
      "loss": 0.8565,
      "step": 409610
    },
    {
      "epoch": 1.4356159774574435,
      "grad_norm": 2.96875,
      "learning_rate": 2.897017323873092e-05,
      "loss": 0.8795,
      "step": 409620
    },
    {
      "epoch": 1.435651024964339,
      "grad_norm": 2.71875,
      "learning_rate": 2.8969524210067217e-05,
      "loss": 0.8416,
      "step": 409630
    },
    {
      "epoch": 1.4356860724712348,
      "grad_norm": 2.921875,
      "learning_rate": 2.8968875181403515e-05,
      "loss": 0.7864,
      "step": 409640
    },
    {
      "epoch": 1.4357211199781303,
      "grad_norm": 2.796875,
      "learning_rate": 2.8968226152739813e-05,
      "loss": 0.7754,
      "step": 409650
    },
    {
      "epoch": 1.4357561674850259,
      "grad_norm": 3.203125,
      "learning_rate": 2.896757712407611e-05,
      "loss": 0.8955,
      "step": 409660
    },
    {
      "epoch": 1.4357912149919216,
      "grad_norm": 2.8125,
      "learning_rate": 2.8966928095412405e-05,
      "loss": 0.8719,
      "step": 409670
    },
    {
      "epoch": 1.4358262624988172,
      "grad_norm": 2.75,
      "learning_rate": 2.8966279066748703e-05,
      "loss": 0.7832,
      "step": 409680
    },
    {
      "epoch": 1.4358613100057127,
      "grad_norm": 2.796875,
      "learning_rate": 2.8965630038085e-05,
      "loss": 0.792,
      "step": 409690
    },
    {
      "epoch": 1.4358963575126085,
      "grad_norm": 2.53125,
      "learning_rate": 2.89649810094213e-05,
      "loss": 0.8154,
      "step": 409700
    },
    {
      "epoch": 1.435931405019504,
      "grad_norm": 2.484375,
      "learning_rate": 2.8964331980757597e-05,
      "loss": 0.8004,
      "step": 409710
    },
    {
      "epoch": 1.4359664525263995,
      "grad_norm": 3.171875,
      "learning_rate": 2.8963682952093895e-05,
      "loss": 0.7698,
      "step": 409720
    },
    {
      "epoch": 1.436001500033295,
      "grad_norm": 2.75,
      "learning_rate": 2.8963033923430193e-05,
      "loss": 0.8766,
      "step": 409730
    },
    {
      "epoch": 1.4360365475401906,
      "grad_norm": 2.625,
      "learning_rate": 2.896238489476649e-05,
      "loss": 0.8637,
      "step": 409740
    },
    {
      "epoch": 1.4360715950470864,
      "grad_norm": 2.890625,
      "learning_rate": 2.896173586610279e-05,
      "loss": 0.8064,
      "step": 409750
    },
    {
      "epoch": 1.436106642553982,
      "grad_norm": 2.640625,
      "learning_rate": 2.8961086837439087e-05,
      "loss": 0.9021,
      "step": 409760
    },
    {
      "epoch": 1.4361416900608774,
      "grad_norm": 3.140625,
      "learning_rate": 2.8960437808775385e-05,
      "loss": 0.8546,
      "step": 409770
    },
    {
      "epoch": 1.4361767375677732,
      "grad_norm": 2.953125,
      "learning_rate": 2.8959788780111683e-05,
      "loss": 0.8657,
      "step": 409780
    },
    {
      "epoch": 1.4362117850746687,
      "grad_norm": 3.125,
      "learning_rate": 2.8959139751447985e-05,
      "loss": 0.8598,
      "step": 409790
    },
    {
      "epoch": 1.4362468325815643,
      "grad_norm": 2.546875,
      "learning_rate": 2.8958490722784283e-05,
      "loss": 0.8582,
      "step": 409800
    },
    {
      "epoch": 1.43628188008846,
      "grad_norm": 2.796875,
      "learning_rate": 2.895784169412058e-05,
      "loss": 0.859,
      "step": 409810
    },
    {
      "epoch": 1.4363169275953556,
      "grad_norm": 3.421875,
      "learning_rate": 2.895719266545688e-05,
      "loss": 0.9459,
      "step": 409820
    },
    {
      "epoch": 1.436351975102251,
      "grad_norm": 3.046875,
      "learning_rate": 2.8956543636793177e-05,
      "loss": 0.8454,
      "step": 409830
    },
    {
      "epoch": 1.4363870226091466,
      "grad_norm": 3.0625,
      "learning_rate": 2.8955894608129475e-05,
      "loss": 0.8958,
      "step": 409840
    },
    {
      "epoch": 1.4364220701160422,
      "grad_norm": 3.25,
      "learning_rate": 2.8955245579465773e-05,
      "loss": 0.8665,
      "step": 409850
    },
    {
      "epoch": 1.436457117622938,
      "grad_norm": 2.859375,
      "learning_rate": 2.895459655080207e-05,
      "loss": 0.8677,
      "step": 409860
    },
    {
      "epoch": 1.4364921651298335,
      "grad_norm": 3.34375,
      "learning_rate": 2.895394752213837e-05,
      "loss": 0.87,
      "step": 409870
    },
    {
      "epoch": 1.436527212636729,
      "grad_norm": 3.015625,
      "learning_rate": 2.8953298493474667e-05,
      "loss": 0.9318,
      "step": 409880
    },
    {
      "epoch": 1.4365622601436248,
      "grad_norm": 2.890625,
      "learning_rate": 2.8952649464810965e-05,
      "loss": 0.8342,
      "step": 409890
    },
    {
      "epoch": 1.4365973076505203,
      "grad_norm": 2.796875,
      "learning_rate": 2.8952000436147263e-05,
      "loss": 0.8859,
      "step": 409900
    },
    {
      "epoch": 1.4366323551574158,
      "grad_norm": 3.25,
      "learning_rate": 2.895135140748356e-05,
      "loss": 0.8329,
      "step": 409910
    },
    {
      "epoch": 1.4366674026643116,
      "grad_norm": 2.984375,
      "learning_rate": 2.8950702378819862e-05,
      "loss": 0.8355,
      "step": 409920
    },
    {
      "epoch": 1.4367024501712071,
      "grad_norm": 3.078125,
      "learning_rate": 2.895005335015616e-05,
      "loss": 0.8627,
      "step": 409930
    },
    {
      "epoch": 1.4367374976781027,
      "grad_norm": 3.171875,
      "learning_rate": 2.8949404321492458e-05,
      "loss": 0.8702,
      "step": 409940
    },
    {
      "epoch": 1.4367725451849982,
      "grad_norm": 3.203125,
      "learning_rate": 2.8948755292828756e-05,
      "loss": 0.8812,
      "step": 409950
    },
    {
      "epoch": 1.4368075926918937,
      "grad_norm": 2.59375,
      "learning_rate": 2.8948106264165054e-05,
      "loss": 0.8238,
      "step": 409960
    },
    {
      "epoch": 1.4368426401987895,
      "grad_norm": 2.71875,
      "learning_rate": 2.8947457235501352e-05,
      "loss": 0.7116,
      "step": 409970
    },
    {
      "epoch": 1.436877687705685,
      "grad_norm": 2.890625,
      "learning_rate": 2.894680820683765e-05,
      "loss": 0.8466,
      "step": 409980
    },
    {
      "epoch": 1.4369127352125806,
      "grad_norm": 2.703125,
      "learning_rate": 2.8946159178173948e-05,
      "loss": 0.849,
      "step": 409990
    },
    {
      "epoch": 1.4369477827194763,
      "grad_norm": 2.953125,
      "learning_rate": 2.8945510149510246e-05,
      "loss": 0.8643,
      "step": 410000
    },
    {
      "epoch": 1.4369477827194763,
      "eval_loss": 0.7942125201225281,
      "eval_runtime": 554.8933,
      "eval_samples_per_second": 685.602,
      "eval_steps_per_second": 57.134,
      "step": 410000
    },
    {
      "epoch": 1.4369828302263719,
      "grad_norm": 2.71875,
      "learning_rate": 2.8944861120846544e-05,
      "loss": 0.8628,
      "step": 410010
    },
    {
      "epoch": 1.4370178777332674,
      "grad_norm": 2.703125,
      "learning_rate": 2.8944212092182842e-05,
      "loss": 0.7889,
      "step": 410020
    },
    {
      "epoch": 1.4370529252401631,
      "grad_norm": 2.78125,
      "learning_rate": 2.894356306351914e-05,
      "loss": 0.8573,
      "step": 410030
    },
    {
      "epoch": 1.4370879727470587,
      "grad_norm": 2.703125,
      "learning_rate": 2.8942914034855438e-05,
      "loss": 0.7992,
      "step": 410040
    },
    {
      "epoch": 1.4371230202539542,
      "grad_norm": 2.59375,
      "learning_rate": 2.8942265006191733e-05,
      "loss": 0.7895,
      "step": 410050
    },
    {
      "epoch": 1.4371580677608498,
      "grad_norm": 3.1875,
      "learning_rate": 2.894161597752803e-05,
      "loss": 0.8718,
      "step": 410060
    },
    {
      "epoch": 1.4371931152677453,
      "grad_norm": 2.734375,
      "learning_rate": 2.894096694886433e-05,
      "loss": 0.8497,
      "step": 410070
    },
    {
      "epoch": 1.437228162774641,
      "grad_norm": 3.515625,
      "learning_rate": 2.8940317920200627e-05,
      "loss": 0.9088,
      "step": 410080
    },
    {
      "epoch": 1.4372632102815366,
      "grad_norm": 2.5625,
      "learning_rate": 2.8939668891536925e-05,
      "loss": 0.8866,
      "step": 410090
    },
    {
      "epoch": 1.4372982577884321,
      "grad_norm": 3.0625,
      "learning_rate": 2.8939019862873223e-05,
      "loss": 0.865,
      "step": 410100
    },
    {
      "epoch": 1.4373333052953279,
      "grad_norm": 2.90625,
      "learning_rate": 2.893837083420952e-05,
      "loss": 0.9055,
      "step": 410110
    },
    {
      "epoch": 1.4373683528022234,
      "grad_norm": 2.765625,
      "learning_rate": 2.893772180554582e-05,
      "loss": 0.9376,
      "step": 410120
    },
    {
      "epoch": 1.437403400309119,
      "grad_norm": 3.046875,
      "learning_rate": 2.8937072776882117e-05,
      "loss": 0.8285,
      "step": 410130
    },
    {
      "epoch": 1.4374384478160147,
      "grad_norm": 2.890625,
      "learning_rate": 2.8936423748218415e-05,
      "loss": 0.885,
      "step": 410140
    },
    {
      "epoch": 1.4374734953229102,
      "grad_norm": 2.46875,
      "learning_rate": 2.8935774719554716e-05,
      "loss": 0.8841,
      "step": 410150
    },
    {
      "epoch": 1.4375085428298058,
      "grad_norm": 2.6875,
      "learning_rate": 2.8935125690891014e-05,
      "loss": 0.8751,
      "step": 410160
    },
    {
      "epoch": 1.4375435903367013,
      "grad_norm": 3.078125,
      "learning_rate": 2.8934476662227312e-05,
      "loss": 0.8202,
      "step": 410170
    },
    {
      "epoch": 1.437578637843597,
      "grad_norm": 3.296875,
      "learning_rate": 2.893382763356361e-05,
      "loss": 0.8053,
      "step": 410180
    },
    {
      "epoch": 1.4376136853504926,
      "grad_norm": 2.75,
      "learning_rate": 2.8933178604899908e-05,
      "loss": 0.9123,
      "step": 410190
    },
    {
      "epoch": 1.4376487328573881,
      "grad_norm": 2.828125,
      "learning_rate": 2.8932529576236206e-05,
      "loss": 0.8758,
      "step": 410200
    },
    {
      "epoch": 1.4376837803642837,
      "grad_norm": 2.859375,
      "learning_rate": 2.8931880547572504e-05,
      "loss": 0.8223,
      "step": 410210
    },
    {
      "epoch": 1.4377188278711794,
      "grad_norm": 3.25,
      "learning_rate": 2.8931231518908802e-05,
      "loss": 0.7799,
      "step": 410220
    },
    {
      "epoch": 1.437753875378075,
      "grad_norm": 2.953125,
      "learning_rate": 2.89305824902451e-05,
      "loss": 0.914,
      "step": 410230
    },
    {
      "epoch": 1.4377889228849705,
      "grad_norm": 3.265625,
      "learning_rate": 2.8929933461581398e-05,
      "loss": 0.9117,
      "step": 410240
    },
    {
      "epoch": 1.4378239703918663,
      "grad_norm": 3.3125,
      "learning_rate": 2.8929284432917696e-05,
      "loss": 0.89,
      "step": 410250
    },
    {
      "epoch": 1.4378590178987618,
      "grad_norm": 3.328125,
      "learning_rate": 2.8928635404253994e-05,
      "loss": 0.9503,
      "step": 410260
    },
    {
      "epoch": 1.4378940654056573,
      "grad_norm": 3.09375,
      "learning_rate": 2.8927986375590292e-05,
      "loss": 0.9496,
      "step": 410270
    },
    {
      "epoch": 1.437929112912553,
      "grad_norm": 2.8125,
      "learning_rate": 2.892733734692659e-05,
      "loss": 0.8431,
      "step": 410280
    },
    {
      "epoch": 1.4379641604194486,
      "grad_norm": 2.671875,
      "learning_rate": 2.892668831826289e-05,
      "loss": 0.9015,
      "step": 410290
    },
    {
      "epoch": 1.4379992079263442,
      "grad_norm": 2.6875,
      "learning_rate": 2.892603928959919e-05,
      "loss": 0.8443,
      "step": 410300
    },
    {
      "epoch": 1.4380342554332397,
      "grad_norm": 3.109375,
      "learning_rate": 2.8925390260935488e-05,
      "loss": 0.9221,
      "step": 410310
    },
    {
      "epoch": 1.4380693029401352,
      "grad_norm": 2.8125,
      "learning_rate": 2.8924741232271786e-05,
      "loss": 0.8585,
      "step": 410320
    },
    {
      "epoch": 1.438104350447031,
      "grad_norm": 3.078125,
      "learning_rate": 2.8924092203608084e-05,
      "loss": 0.779,
      "step": 410330
    },
    {
      "epoch": 1.4381393979539265,
      "grad_norm": 2.75,
      "learning_rate": 2.892344317494438e-05,
      "loss": 0.8091,
      "step": 410340
    },
    {
      "epoch": 1.438174445460822,
      "grad_norm": 3.203125,
      "learning_rate": 2.892279414628068e-05,
      "loss": 0.8537,
      "step": 410350
    },
    {
      "epoch": 1.4382094929677178,
      "grad_norm": 4.375,
      "learning_rate": 2.8922145117616978e-05,
      "loss": 0.8804,
      "step": 410360
    },
    {
      "epoch": 1.4382445404746134,
      "grad_norm": 3.390625,
      "learning_rate": 2.8921496088953276e-05,
      "loss": 0.8016,
      "step": 410370
    },
    {
      "epoch": 1.438279587981509,
      "grad_norm": 2.890625,
      "learning_rate": 2.8920847060289574e-05,
      "loss": 0.8032,
      "step": 410380
    },
    {
      "epoch": 1.4383146354884047,
      "grad_norm": 3.046875,
      "learning_rate": 2.892019803162587e-05,
      "loss": 0.9187,
      "step": 410390
    },
    {
      "epoch": 1.4383496829953002,
      "grad_norm": 3.1875,
      "learning_rate": 2.891954900296217e-05,
      "loss": 0.8435,
      "step": 410400
    },
    {
      "epoch": 1.4383847305021957,
      "grad_norm": 2.90625,
      "learning_rate": 2.8918899974298468e-05,
      "loss": 0.7834,
      "step": 410410
    },
    {
      "epoch": 1.4384197780090913,
      "grad_norm": 3.203125,
      "learning_rate": 2.8918250945634762e-05,
      "loss": 0.8571,
      "step": 410420
    },
    {
      "epoch": 1.4384548255159868,
      "grad_norm": 2.765625,
      "learning_rate": 2.891760191697106e-05,
      "loss": 0.8606,
      "step": 410430
    },
    {
      "epoch": 1.4384898730228826,
      "grad_norm": 2.96875,
      "learning_rate": 2.8916952888307358e-05,
      "loss": 0.768,
      "step": 410440
    },
    {
      "epoch": 1.438524920529778,
      "grad_norm": 2.90625,
      "learning_rate": 2.8916303859643656e-05,
      "loss": 0.8499,
      "step": 410450
    },
    {
      "epoch": 1.4385599680366736,
      "grad_norm": 2.203125,
      "learning_rate": 2.8915654830979954e-05,
      "loss": 0.8739,
      "step": 410460
    },
    {
      "epoch": 1.4385950155435694,
      "grad_norm": 2.953125,
      "learning_rate": 2.8915005802316252e-05,
      "loss": 0.8663,
      "step": 410470
    },
    {
      "epoch": 1.438630063050465,
      "grad_norm": 2.40625,
      "learning_rate": 2.891435677365255e-05,
      "loss": 0.8239,
      "step": 410480
    },
    {
      "epoch": 1.4386651105573605,
      "grad_norm": 2.765625,
      "learning_rate": 2.8913707744988848e-05,
      "loss": 0.8344,
      "step": 410490
    },
    {
      "epoch": 1.4387001580642562,
      "grad_norm": 3.375,
      "learning_rate": 2.8913058716325146e-05,
      "loss": 0.8775,
      "step": 410500
    },
    {
      "epoch": 1.4387352055711518,
      "grad_norm": 3.0,
      "learning_rate": 2.8912409687661444e-05,
      "loss": 0.7593,
      "step": 410510
    },
    {
      "epoch": 1.4387702530780473,
      "grad_norm": 3.03125,
      "learning_rate": 2.8911760658997746e-05,
      "loss": 0.8238,
      "step": 410520
    },
    {
      "epoch": 1.4388053005849428,
      "grad_norm": 3.359375,
      "learning_rate": 2.8911111630334044e-05,
      "loss": 0.8288,
      "step": 410530
    },
    {
      "epoch": 1.4388403480918384,
      "grad_norm": 2.578125,
      "learning_rate": 2.891046260167034e-05,
      "loss": 0.7788,
      "step": 410540
    },
    {
      "epoch": 1.4388753955987341,
      "grad_norm": 3.015625,
      "learning_rate": 2.890981357300664e-05,
      "loss": 0.8118,
      "step": 410550
    },
    {
      "epoch": 1.4389104431056297,
      "grad_norm": 2.640625,
      "learning_rate": 2.8909164544342938e-05,
      "loss": 0.8707,
      "step": 410560
    },
    {
      "epoch": 1.4389454906125252,
      "grad_norm": 2.9375,
      "learning_rate": 2.8908515515679236e-05,
      "loss": 0.8678,
      "step": 410570
    },
    {
      "epoch": 1.438980538119421,
      "grad_norm": 3.0,
      "learning_rate": 2.8907866487015534e-05,
      "loss": 0.8262,
      "step": 410580
    },
    {
      "epoch": 1.4390155856263165,
      "grad_norm": 3.046875,
      "learning_rate": 2.890721745835183e-05,
      "loss": 0.8855,
      "step": 410590
    },
    {
      "epoch": 1.439050633133212,
      "grad_norm": 3.125,
      "learning_rate": 2.890656842968813e-05,
      "loss": 0.9032,
      "step": 410600
    },
    {
      "epoch": 1.4390856806401078,
      "grad_norm": 2.375,
      "learning_rate": 2.8905919401024428e-05,
      "loss": 0.7701,
      "step": 410610
    },
    {
      "epoch": 1.4391207281470033,
      "grad_norm": 2.59375,
      "learning_rate": 2.8905270372360726e-05,
      "loss": 0.8005,
      "step": 410620
    },
    {
      "epoch": 1.4391557756538988,
      "grad_norm": 3.3125,
      "learning_rate": 2.8904621343697024e-05,
      "loss": 0.8136,
      "step": 410630
    },
    {
      "epoch": 1.4391908231607944,
      "grad_norm": 2.625,
      "learning_rate": 2.890397231503332e-05,
      "loss": 0.7525,
      "step": 410640
    },
    {
      "epoch": 1.43922587066769,
      "grad_norm": 2.828125,
      "learning_rate": 2.890332328636962e-05,
      "loss": 0.8107,
      "step": 410650
    },
    {
      "epoch": 1.4392609181745857,
      "grad_norm": 2.859375,
      "learning_rate": 2.890267425770592e-05,
      "loss": 0.7799,
      "step": 410660
    },
    {
      "epoch": 1.4392959656814812,
      "grad_norm": 3.015625,
      "learning_rate": 2.890202522904222e-05,
      "loss": 0.9216,
      "step": 410670
    },
    {
      "epoch": 1.4393310131883768,
      "grad_norm": 2.9375,
      "learning_rate": 2.8901376200378517e-05,
      "loss": 0.8787,
      "step": 410680
    },
    {
      "epoch": 1.4393660606952725,
      "grad_norm": 2.359375,
      "learning_rate": 2.8900727171714815e-05,
      "loss": 0.9355,
      "step": 410690
    },
    {
      "epoch": 1.439401108202168,
      "grad_norm": 2.96875,
      "learning_rate": 2.8900078143051113e-05,
      "loss": 0.7952,
      "step": 410700
    },
    {
      "epoch": 1.4394361557090636,
      "grad_norm": 2.84375,
      "learning_rate": 2.889942911438741e-05,
      "loss": 0.9064,
      "step": 410710
    },
    {
      "epoch": 1.4394712032159593,
      "grad_norm": 2.84375,
      "learning_rate": 2.889878008572371e-05,
      "loss": 0.8296,
      "step": 410720
    },
    {
      "epoch": 1.4395062507228549,
      "grad_norm": 3.078125,
      "learning_rate": 2.8898131057060007e-05,
      "loss": 0.7984,
      "step": 410730
    },
    {
      "epoch": 1.4395412982297504,
      "grad_norm": 3.078125,
      "learning_rate": 2.8897482028396305e-05,
      "loss": 0.8234,
      "step": 410740
    },
    {
      "epoch": 1.439576345736646,
      "grad_norm": 3.265625,
      "learning_rate": 2.8896832999732603e-05,
      "loss": 0.9357,
      "step": 410750
    },
    {
      "epoch": 1.4396113932435417,
      "grad_norm": 2.640625,
      "learning_rate": 2.88961839710689e-05,
      "loss": 0.9047,
      "step": 410760
    },
    {
      "epoch": 1.4396464407504372,
      "grad_norm": 2.671875,
      "learning_rate": 2.88955349424052e-05,
      "loss": 0.882,
      "step": 410770
    },
    {
      "epoch": 1.4396814882573328,
      "grad_norm": 2.75,
      "learning_rate": 2.8894885913741497e-05,
      "loss": 0.81,
      "step": 410780
    },
    {
      "epoch": 1.4397165357642283,
      "grad_norm": 3.359375,
      "learning_rate": 2.8894236885077795e-05,
      "loss": 0.8666,
      "step": 410790
    },
    {
      "epoch": 1.439751583271124,
      "grad_norm": 3.03125,
      "learning_rate": 2.889358785641409e-05,
      "loss": 0.9488,
      "step": 410800
    },
    {
      "epoch": 1.4397866307780196,
      "grad_norm": 2.671875,
      "learning_rate": 2.8892938827750388e-05,
      "loss": 0.8196,
      "step": 410810
    },
    {
      "epoch": 1.4398216782849151,
      "grad_norm": 3.15625,
      "learning_rate": 2.8892289799086686e-05,
      "loss": 0.8,
      "step": 410820
    },
    {
      "epoch": 1.439856725791811,
      "grad_norm": 3.40625,
      "learning_rate": 2.8891640770422984e-05,
      "loss": 0.9084,
      "step": 410830
    },
    {
      "epoch": 1.4398917732987064,
      "grad_norm": 2.703125,
      "learning_rate": 2.889099174175928e-05,
      "loss": 0.8819,
      "step": 410840
    },
    {
      "epoch": 1.439926820805602,
      "grad_norm": 2.484375,
      "learning_rate": 2.889034271309558e-05,
      "loss": 0.9145,
      "step": 410850
    },
    {
      "epoch": 1.4399618683124977,
      "grad_norm": 3.09375,
      "learning_rate": 2.8889693684431878e-05,
      "loss": 0.8433,
      "step": 410860
    },
    {
      "epoch": 1.4399969158193933,
      "grad_norm": 3.203125,
      "learning_rate": 2.8889044655768176e-05,
      "loss": 0.7857,
      "step": 410870
    },
    {
      "epoch": 1.4400319633262888,
      "grad_norm": 2.78125,
      "learning_rate": 2.8888395627104477e-05,
      "loss": 0.8249,
      "step": 410880
    },
    {
      "epoch": 1.4400670108331843,
      "grad_norm": 3.015625,
      "learning_rate": 2.8887746598440775e-05,
      "loss": 0.828,
      "step": 410890
    },
    {
      "epoch": 1.4401020583400799,
      "grad_norm": 2.703125,
      "learning_rate": 2.8887097569777073e-05,
      "loss": 0.7913,
      "step": 410900
    },
    {
      "epoch": 1.4401371058469756,
      "grad_norm": 2.5625,
      "learning_rate": 2.888644854111337e-05,
      "loss": 0.8218,
      "step": 410910
    },
    {
      "epoch": 1.4401721533538712,
      "grad_norm": 2.96875,
      "learning_rate": 2.888579951244967e-05,
      "loss": 0.9174,
      "step": 410920
    },
    {
      "epoch": 1.4402072008607667,
      "grad_norm": 2.828125,
      "learning_rate": 2.8885150483785967e-05,
      "loss": 0.7938,
      "step": 410930
    },
    {
      "epoch": 1.4402422483676625,
      "grad_norm": 2.984375,
      "learning_rate": 2.8884501455122265e-05,
      "loss": 0.8737,
      "step": 410940
    },
    {
      "epoch": 1.440277295874558,
      "grad_norm": 2.984375,
      "learning_rate": 2.8883852426458563e-05,
      "loss": 0.8746,
      "step": 410950
    },
    {
      "epoch": 1.4403123433814535,
      "grad_norm": 3.171875,
      "learning_rate": 2.888320339779486e-05,
      "loss": 0.8673,
      "step": 410960
    },
    {
      "epoch": 1.4403473908883493,
      "grad_norm": 2.890625,
      "learning_rate": 2.888255436913116e-05,
      "loss": 0.8342,
      "step": 410970
    },
    {
      "epoch": 1.4403824383952448,
      "grad_norm": 2.53125,
      "learning_rate": 2.8881905340467457e-05,
      "loss": 0.7867,
      "step": 410980
    },
    {
      "epoch": 1.4404174859021404,
      "grad_norm": 2.765625,
      "learning_rate": 2.8881256311803755e-05,
      "loss": 0.8044,
      "step": 410990
    },
    {
      "epoch": 1.440452533409036,
      "grad_norm": 2.6875,
      "learning_rate": 2.8880607283140053e-05,
      "loss": 0.8426,
      "step": 411000
    },
    {
      "epoch": 1.4404875809159314,
      "grad_norm": 2.484375,
      "learning_rate": 2.887995825447635e-05,
      "loss": 0.7758,
      "step": 411010
    },
    {
      "epoch": 1.4405226284228272,
      "grad_norm": 2.78125,
      "learning_rate": 2.8879309225812652e-05,
      "loss": 0.7949,
      "step": 411020
    },
    {
      "epoch": 1.4405576759297227,
      "grad_norm": 3.109375,
      "learning_rate": 2.887866019714895e-05,
      "loss": 0.8357,
      "step": 411030
    },
    {
      "epoch": 1.4405927234366183,
      "grad_norm": 2.578125,
      "learning_rate": 2.887801116848525e-05,
      "loss": 0.8591,
      "step": 411040
    },
    {
      "epoch": 1.440627770943514,
      "grad_norm": 3.15625,
      "learning_rate": 2.8877362139821546e-05,
      "loss": 0.8586,
      "step": 411050
    },
    {
      "epoch": 1.4406628184504096,
      "grad_norm": 3.03125,
      "learning_rate": 2.8876713111157844e-05,
      "loss": 0.9362,
      "step": 411060
    },
    {
      "epoch": 1.440697865957305,
      "grad_norm": 2.921875,
      "learning_rate": 2.8876064082494142e-05,
      "loss": 0.8404,
      "step": 411070
    },
    {
      "epoch": 1.4407329134642008,
      "grad_norm": 2.8125,
      "learning_rate": 2.887541505383044e-05,
      "loss": 0.8306,
      "step": 411080
    },
    {
      "epoch": 1.4407679609710964,
      "grad_norm": 3.03125,
      "learning_rate": 2.887476602516674e-05,
      "loss": 0.8883,
      "step": 411090
    },
    {
      "epoch": 1.440803008477992,
      "grad_norm": 3.0625,
      "learning_rate": 2.8874116996503036e-05,
      "loss": 0.9243,
      "step": 411100
    },
    {
      "epoch": 1.4408380559848875,
      "grad_norm": 3.03125,
      "learning_rate": 2.8873467967839334e-05,
      "loss": 0.8593,
      "step": 411110
    },
    {
      "epoch": 1.440873103491783,
      "grad_norm": 3.15625,
      "learning_rate": 2.8872818939175632e-05,
      "loss": 0.8413,
      "step": 411120
    },
    {
      "epoch": 1.4409081509986787,
      "grad_norm": 2.765625,
      "learning_rate": 2.887216991051193e-05,
      "loss": 0.8586,
      "step": 411130
    },
    {
      "epoch": 1.4409431985055743,
      "grad_norm": 2.921875,
      "learning_rate": 2.887152088184823e-05,
      "loss": 0.8112,
      "step": 411140
    },
    {
      "epoch": 1.4409782460124698,
      "grad_norm": 3.71875,
      "learning_rate": 2.8870871853184526e-05,
      "loss": 0.8648,
      "step": 411150
    },
    {
      "epoch": 1.4410132935193656,
      "grad_norm": 3.109375,
      "learning_rate": 2.8870222824520828e-05,
      "loss": 0.8175,
      "step": 411160
    },
    {
      "epoch": 1.4410483410262611,
      "grad_norm": 3.046875,
      "learning_rate": 2.886957379585712e-05,
      "loss": 0.8422,
      "step": 411170
    },
    {
      "epoch": 1.4410833885331567,
      "grad_norm": 2.46875,
      "learning_rate": 2.8868924767193417e-05,
      "loss": 0.8282,
      "step": 411180
    },
    {
      "epoch": 1.4411184360400524,
      "grad_norm": 2.859375,
      "learning_rate": 2.8868275738529715e-05,
      "loss": 0.8995,
      "step": 411190
    },
    {
      "epoch": 1.441153483546948,
      "grad_norm": 2.8125,
      "learning_rate": 2.8867626709866013e-05,
      "loss": 0.879,
      "step": 411200
    },
    {
      "epoch": 1.4411885310538435,
      "grad_norm": 3.0,
      "learning_rate": 2.886697768120231e-05,
      "loss": 0.8872,
      "step": 411210
    },
    {
      "epoch": 1.441223578560739,
      "grad_norm": 2.6875,
      "learning_rate": 2.886632865253861e-05,
      "loss": 0.8323,
      "step": 411220
    },
    {
      "epoch": 1.4412586260676346,
      "grad_norm": 3.953125,
      "learning_rate": 2.8865679623874907e-05,
      "loss": 0.9309,
      "step": 411230
    },
    {
      "epoch": 1.4412936735745303,
      "grad_norm": 2.9375,
      "learning_rate": 2.8865030595211205e-05,
      "loss": 0.9111,
      "step": 411240
    },
    {
      "epoch": 1.4413287210814258,
      "grad_norm": 2.796875,
      "learning_rate": 2.8864381566547506e-05,
      "loss": 0.8152,
      "step": 411250
    },
    {
      "epoch": 1.4413637685883214,
      "grad_norm": 3.125,
      "learning_rate": 2.8863732537883804e-05,
      "loss": 0.7826,
      "step": 411260
    },
    {
      "epoch": 1.4413988160952171,
      "grad_norm": 2.984375,
      "learning_rate": 2.8863083509220102e-05,
      "loss": 0.8448,
      "step": 411270
    },
    {
      "epoch": 1.4414338636021127,
      "grad_norm": 2.8125,
      "learning_rate": 2.88624344805564e-05,
      "loss": 0.8132,
      "step": 411280
    },
    {
      "epoch": 1.4414689111090082,
      "grad_norm": 2.984375,
      "learning_rate": 2.88617854518927e-05,
      "loss": 0.8137,
      "step": 411290
    },
    {
      "epoch": 1.441503958615904,
      "grad_norm": 2.953125,
      "learning_rate": 2.8861136423228996e-05,
      "loss": 0.8398,
      "step": 411300
    },
    {
      "epoch": 1.4415390061227995,
      "grad_norm": 3.0625,
      "learning_rate": 2.8860487394565294e-05,
      "loss": 0.8618,
      "step": 411310
    },
    {
      "epoch": 1.441574053629695,
      "grad_norm": 2.640625,
      "learning_rate": 2.8859838365901592e-05,
      "loss": 0.8809,
      "step": 411320
    },
    {
      "epoch": 1.4416091011365906,
      "grad_norm": 2.8125,
      "learning_rate": 2.885918933723789e-05,
      "loss": 0.8531,
      "step": 411330
    },
    {
      "epoch": 1.4416441486434861,
      "grad_norm": 2.625,
      "learning_rate": 2.885854030857419e-05,
      "loss": 0.8143,
      "step": 411340
    },
    {
      "epoch": 1.4416791961503819,
      "grad_norm": 2.859375,
      "learning_rate": 2.8857891279910486e-05,
      "loss": 0.871,
      "step": 411350
    },
    {
      "epoch": 1.4417142436572774,
      "grad_norm": 2.59375,
      "learning_rate": 2.8857242251246784e-05,
      "loss": 0.8159,
      "step": 411360
    },
    {
      "epoch": 1.441749291164173,
      "grad_norm": 2.75,
      "learning_rate": 2.8856593222583082e-05,
      "loss": 0.8691,
      "step": 411370
    },
    {
      "epoch": 1.4417843386710687,
      "grad_norm": 2.984375,
      "learning_rate": 2.885594419391938e-05,
      "loss": 0.8563,
      "step": 411380
    },
    {
      "epoch": 1.4418193861779642,
      "grad_norm": 3.21875,
      "learning_rate": 2.8855295165255682e-05,
      "loss": 0.7824,
      "step": 411390
    },
    {
      "epoch": 1.4418544336848598,
      "grad_norm": 3.171875,
      "learning_rate": 2.885464613659198e-05,
      "loss": 0.8224,
      "step": 411400
    },
    {
      "epoch": 1.4418894811917555,
      "grad_norm": 2.84375,
      "learning_rate": 2.8853997107928278e-05,
      "loss": 0.8629,
      "step": 411410
    },
    {
      "epoch": 1.441924528698651,
      "grad_norm": 3.0625,
      "learning_rate": 2.8853348079264576e-05,
      "loss": 0.8416,
      "step": 411420
    },
    {
      "epoch": 1.4419595762055466,
      "grad_norm": 2.640625,
      "learning_rate": 2.8852699050600874e-05,
      "loss": 0.8131,
      "step": 411430
    },
    {
      "epoch": 1.4419946237124421,
      "grad_norm": 3.1875,
      "learning_rate": 2.8852050021937172e-05,
      "loss": 0.7767,
      "step": 411440
    },
    {
      "epoch": 1.442029671219338,
      "grad_norm": 2.515625,
      "learning_rate": 2.885140099327347e-05,
      "loss": 0.9113,
      "step": 411450
    },
    {
      "epoch": 1.4420647187262334,
      "grad_norm": 2.40625,
      "learning_rate": 2.8850751964609768e-05,
      "loss": 0.8174,
      "step": 411460
    },
    {
      "epoch": 1.442099766233129,
      "grad_norm": 2.859375,
      "learning_rate": 2.8850102935946066e-05,
      "loss": 0.9361,
      "step": 411470
    },
    {
      "epoch": 1.4421348137400245,
      "grad_norm": 2.6875,
      "learning_rate": 2.8849453907282364e-05,
      "loss": 0.911,
      "step": 411480
    },
    {
      "epoch": 1.4421698612469203,
      "grad_norm": 3.046875,
      "learning_rate": 2.8848804878618662e-05,
      "loss": 0.847,
      "step": 411490
    },
    {
      "epoch": 1.4422049087538158,
      "grad_norm": 3.296875,
      "learning_rate": 2.884815584995496e-05,
      "loss": 0.8506,
      "step": 411500
    },
    {
      "epoch": 1.4422399562607113,
      "grad_norm": 2.65625,
      "learning_rate": 2.8847506821291258e-05,
      "loss": 0.8208,
      "step": 411510
    },
    {
      "epoch": 1.442275003767607,
      "grad_norm": 3.625,
      "learning_rate": 2.8846857792627556e-05,
      "loss": 0.8828,
      "step": 411520
    },
    {
      "epoch": 1.4423100512745026,
      "grad_norm": 2.71875,
      "learning_rate": 2.8846208763963857e-05,
      "loss": 0.814,
      "step": 411530
    },
    {
      "epoch": 1.4423450987813982,
      "grad_norm": 2.46875,
      "learning_rate": 2.8845559735300155e-05,
      "loss": 0.9039,
      "step": 411540
    },
    {
      "epoch": 1.442380146288294,
      "grad_norm": 2.9375,
      "learning_rate": 2.8844910706636446e-05,
      "loss": 0.8555,
      "step": 411550
    },
    {
      "epoch": 1.4424151937951895,
      "grad_norm": 2.84375,
      "learning_rate": 2.8844261677972744e-05,
      "loss": 0.8853,
      "step": 411560
    },
    {
      "epoch": 1.442450241302085,
      "grad_norm": 2.765625,
      "learning_rate": 2.8843612649309042e-05,
      "loss": 0.7973,
      "step": 411570
    },
    {
      "epoch": 1.4424852888089805,
      "grad_norm": 2.765625,
      "learning_rate": 2.884296362064534e-05,
      "loss": 0.8114,
      "step": 411580
    },
    {
      "epoch": 1.442520336315876,
      "grad_norm": 3.0,
      "learning_rate": 2.884231459198164e-05,
      "loss": 0.9603,
      "step": 411590
    },
    {
      "epoch": 1.4425553838227718,
      "grad_norm": 2.65625,
      "learning_rate": 2.8841665563317936e-05,
      "loss": 0.7493,
      "step": 411600
    },
    {
      "epoch": 1.4425904313296674,
      "grad_norm": 2.546875,
      "learning_rate": 2.8841016534654234e-05,
      "loss": 0.8605,
      "step": 411610
    },
    {
      "epoch": 1.442625478836563,
      "grad_norm": 3.703125,
      "learning_rate": 2.8840367505990536e-05,
      "loss": 0.924,
      "step": 411620
    },
    {
      "epoch": 1.4426605263434586,
      "grad_norm": 2.984375,
      "learning_rate": 2.8839718477326834e-05,
      "loss": 0.8301,
      "step": 411630
    },
    {
      "epoch": 1.4426955738503542,
      "grad_norm": 2.9375,
      "learning_rate": 2.8839069448663132e-05,
      "loss": 0.9121,
      "step": 411640
    },
    {
      "epoch": 1.4427306213572497,
      "grad_norm": 3.03125,
      "learning_rate": 2.883842041999943e-05,
      "loss": 0.8298,
      "step": 411650
    },
    {
      "epoch": 1.4427656688641455,
      "grad_norm": 3.296875,
      "learning_rate": 2.8837771391335728e-05,
      "loss": 0.7752,
      "step": 411660
    },
    {
      "epoch": 1.442800716371041,
      "grad_norm": 2.578125,
      "learning_rate": 2.8837122362672026e-05,
      "loss": 0.8272,
      "step": 411670
    },
    {
      "epoch": 1.4428357638779366,
      "grad_norm": 2.96875,
      "learning_rate": 2.8836473334008324e-05,
      "loss": 0.8385,
      "step": 411680
    },
    {
      "epoch": 1.442870811384832,
      "grad_norm": 2.875,
      "learning_rate": 2.8835824305344622e-05,
      "loss": 0.8417,
      "step": 411690
    },
    {
      "epoch": 1.4429058588917276,
      "grad_norm": 2.640625,
      "learning_rate": 2.883517527668092e-05,
      "loss": 0.8928,
      "step": 411700
    },
    {
      "epoch": 1.4429409063986234,
      "grad_norm": 3.0625,
      "learning_rate": 2.8834526248017218e-05,
      "loss": 0.8812,
      "step": 411710
    },
    {
      "epoch": 1.442975953905519,
      "grad_norm": 2.390625,
      "learning_rate": 2.8833877219353516e-05,
      "loss": 0.9164,
      "step": 411720
    },
    {
      "epoch": 1.4430110014124145,
      "grad_norm": 3.046875,
      "learning_rate": 2.8833228190689814e-05,
      "loss": 0.7858,
      "step": 411730
    },
    {
      "epoch": 1.4430460489193102,
      "grad_norm": 2.71875,
      "learning_rate": 2.8832579162026112e-05,
      "loss": 0.8296,
      "step": 411740
    },
    {
      "epoch": 1.4430810964262057,
      "grad_norm": 3.421875,
      "learning_rate": 2.883193013336241e-05,
      "loss": 0.8957,
      "step": 411750
    },
    {
      "epoch": 1.4431161439331013,
      "grad_norm": 3.046875,
      "learning_rate": 2.883128110469871e-05,
      "loss": 0.8097,
      "step": 411760
    },
    {
      "epoch": 1.443151191439997,
      "grad_norm": 4.53125,
      "learning_rate": 2.883063207603501e-05,
      "loss": 0.7888,
      "step": 411770
    },
    {
      "epoch": 1.4431862389468926,
      "grad_norm": 2.890625,
      "learning_rate": 2.8829983047371307e-05,
      "loss": 0.7624,
      "step": 411780
    },
    {
      "epoch": 1.443221286453788,
      "grad_norm": 3.046875,
      "learning_rate": 2.8829334018707605e-05,
      "loss": 0.8266,
      "step": 411790
    },
    {
      "epoch": 1.4432563339606836,
      "grad_norm": 2.828125,
      "learning_rate": 2.8828684990043903e-05,
      "loss": 0.9104,
      "step": 411800
    },
    {
      "epoch": 1.4432913814675792,
      "grad_norm": 2.703125,
      "learning_rate": 2.88280359613802e-05,
      "loss": 0.8204,
      "step": 411810
    },
    {
      "epoch": 1.443326428974475,
      "grad_norm": 2.6875,
      "learning_rate": 2.88273869327165e-05,
      "loss": 0.832,
      "step": 411820
    },
    {
      "epoch": 1.4433614764813705,
      "grad_norm": 2.765625,
      "learning_rate": 2.8826737904052797e-05,
      "loss": 0.8868,
      "step": 411830
    },
    {
      "epoch": 1.443396523988266,
      "grad_norm": 2.796875,
      "learning_rate": 2.8826088875389095e-05,
      "loss": 0.8923,
      "step": 411840
    },
    {
      "epoch": 1.4434315714951618,
      "grad_norm": 3.078125,
      "learning_rate": 2.8825439846725393e-05,
      "loss": 0.7966,
      "step": 411850
    },
    {
      "epoch": 1.4434666190020573,
      "grad_norm": 3.0625,
      "learning_rate": 2.882479081806169e-05,
      "loss": 0.891,
      "step": 411860
    },
    {
      "epoch": 1.4435016665089528,
      "grad_norm": 2.71875,
      "learning_rate": 2.882414178939799e-05,
      "loss": 0.8039,
      "step": 411870
    },
    {
      "epoch": 1.4435367140158486,
      "grad_norm": 2.890625,
      "learning_rate": 2.8823492760734287e-05,
      "loss": 0.8081,
      "step": 411880
    },
    {
      "epoch": 1.4435717615227441,
      "grad_norm": 3.0,
      "learning_rate": 2.8822843732070585e-05,
      "loss": 0.8546,
      "step": 411890
    },
    {
      "epoch": 1.4436068090296397,
      "grad_norm": 3.375,
      "learning_rate": 2.8822194703406887e-05,
      "loss": 0.9163,
      "step": 411900
    },
    {
      "epoch": 1.4436418565365352,
      "grad_norm": 2.6875,
      "learning_rate": 2.8821545674743185e-05,
      "loss": 0.8639,
      "step": 411910
    },
    {
      "epoch": 1.4436769040434307,
      "grad_norm": 2.4375,
      "learning_rate": 2.8820896646079483e-05,
      "loss": 0.8368,
      "step": 411920
    },
    {
      "epoch": 1.4437119515503265,
      "grad_norm": 3.109375,
      "learning_rate": 2.8820247617415774e-05,
      "loss": 0.8427,
      "step": 411930
    },
    {
      "epoch": 1.443746999057222,
      "grad_norm": 2.5625,
      "learning_rate": 2.8819598588752072e-05,
      "loss": 0.8679,
      "step": 411940
    },
    {
      "epoch": 1.4437820465641176,
      "grad_norm": 3.25,
      "learning_rate": 2.881894956008837e-05,
      "loss": 0.9032,
      "step": 411950
    },
    {
      "epoch": 1.4438170940710133,
      "grad_norm": 2.578125,
      "learning_rate": 2.8818300531424668e-05,
      "loss": 0.8404,
      "step": 411960
    },
    {
      "epoch": 1.4438521415779089,
      "grad_norm": 2.390625,
      "learning_rate": 2.8817651502760966e-05,
      "loss": 0.8636,
      "step": 411970
    },
    {
      "epoch": 1.4438871890848044,
      "grad_norm": 2.828125,
      "learning_rate": 2.8817002474097267e-05,
      "loss": 0.7928,
      "step": 411980
    },
    {
      "epoch": 1.4439222365917002,
      "grad_norm": 3.15625,
      "learning_rate": 2.8816353445433565e-05,
      "loss": 0.8683,
      "step": 411990
    },
    {
      "epoch": 1.4439572840985957,
      "grad_norm": 3.03125,
      "learning_rate": 2.8815704416769863e-05,
      "loss": 0.8535,
      "step": 412000
    },
    {
      "epoch": 1.4439923316054912,
      "grad_norm": 2.984375,
      "learning_rate": 2.881505538810616e-05,
      "loss": 0.8214,
      "step": 412010
    },
    {
      "epoch": 1.4440273791123868,
      "grad_norm": 2.859375,
      "learning_rate": 2.881440635944246e-05,
      "loss": 0.8145,
      "step": 412020
    },
    {
      "epoch": 1.4440624266192823,
      "grad_norm": 2.859375,
      "learning_rate": 2.8813757330778757e-05,
      "loss": 0.7869,
      "step": 412030
    },
    {
      "epoch": 1.444097474126178,
      "grad_norm": 2.71875,
      "learning_rate": 2.8813108302115055e-05,
      "loss": 0.8401,
      "step": 412040
    },
    {
      "epoch": 1.4441325216330736,
      "grad_norm": 2.90625,
      "learning_rate": 2.8812459273451353e-05,
      "loss": 0.8698,
      "step": 412050
    },
    {
      "epoch": 1.4441675691399691,
      "grad_norm": 3.265625,
      "learning_rate": 2.881181024478765e-05,
      "loss": 0.8529,
      "step": 412060
    },
    {
      "epoch": 1.444202616646865,
      "grad_norm": 3.015625,
      "learning_rate": 2.881116121612395e-05,
      "loss": 0.7748,
      "step": 412070
    },
    {
      "epoch": 1.4442376641537604,
      "grad_norm": 3.296875,
      "learning_rate": 2.8810512187460247e-05,
      "loss": 0.8818,
      "step": 412080
    },
    {
      "epoch": 1.444272711660656,
      "grad_norm": 3.15625,
      "learning_rate": 2.8809863158796545e-05,
      "loss": 0.8187,
      "step": 412090
    },
    {
      "epoch": 1.4443077591675517,
      "grad_norm": 3.078125,
      "learning_rate": 2.8809214130132843e-05,
      "loss": 0.8968,
      "step": 412100
    },
    {
      "epoch": 1.4443428066744473,
      "grad_norm": 2.875,
      "learning_rate": 2.880856510146914e-05,
      "loss": 0.8315,
      "step": 412110
    },
    {
      "epoch": 1.4443778541813428,
      "grad_norm": 2.84375,
      "learning_rate": 2.8807916072805443e-05,
      "loss": 0.9097,
      "step": 412120
    },
    {
      "epoch": 1.4444129016882383,
      "grad_norm": 2.5625,
      "learning_rate": 2.880726704414174e-05,
      "loss": 0.7978,
      "step": 412130
    },
    {
      "epoch": 1.444447949195134,
      "grad_norm": 3.328125,
      "learning_rate": 2.880661801547804e-05,
      "loss": 0.9143,
      "step": 412140
    },
    {
      "epoch": 1.4444829967020296,
      "grad_norm": 2.5625,
      "learning_rate": 2.8805968986814337e-05,
      "loss": 0.8309,
      "step": 412150
    },
    {
      "epoch": 1.4445180442089252,
      "grad_norm": 2.53125,
      "learning_rate": 2.8805319958150635e-05,
      "loss": 0.7815,
      "step": 412160
    },
    {
      "epoch": 1.4445530917158207,
      "grad_norm": 3.3125,
      "learning_rate": 2.8804670929486933e-05,
      "loss": 0.7712,
      "step": 412170
    },
    {
      "epoch": 1.4445881392227164,
      "grad_norm": 2.859375,
      "learning_rate": 2.880402190082323e-05,
      "loss": 0.8172,
      "step": 412180
    },
    {
      "epoch": 1.444623186729612,
      "grad_norm": 3.25,
      "learning_rate": 2.880337287215953e-05,
      "loss": 0.8167,
      "step": 412190
    },
    {
      "epoch": 1.4446582342365075,
      "grad_norm": 2.953125,
      "learning_rate": 2.8802723843495827e-05,
      "loss": 0.8337,
      "step": 412200
    },
    {
      "epoch": 1.4446932817434033,
      "grad_norm": 3.265625,
      "learning_rate": 2.8802074814832125e-05,
      "loss": 0.9479,
      "step": 412210
    },
    {
      "epoch": 1.4447283292502988,
      "grad_norm": 3.046875,
      "learning_rate": 2.8801425786168423e-05,
      "loss": 0.845,
      "step": 412220
    },
    {
      "epoch": 1.4447633767571944,
      "grad_norm": 2.390625,
      "learning_rate": 2.880077675750472e-05,
      "loss": 0.8488,
      "step": 412230
    },
    {
      "epoch": 1.44479842426409,
      "grad_norm": 3.375,
      "learning_rate": 2.880012772884102e-05,
      "loss": 0.8397,
      "step": 412240
    },
    {
      "epoch": 1.4448334717709856,
      "grad_norm": 2.796875,
      "learning_rate": 2.8799478700177317e-05,
      "loss": 0.9223,
      "step": 412250
    },
    {
      "epoch": 1.4448685192778812,
      "grad_norm": 2.875,
      "learning_rate": 2.8798829671513618e-05,
      "loss": 0.8142,
      "step": 412260
    },
    {
      "epoch": 1.4449035667847767,
      "grad_norm": 2.890625,
      "learning_rate": 2.8798180642849916e-05,
      "loss": 0.7584,
      "step": 412270
    },
    {
      "epoch": 1.4449386142916723,
      "grad_norm": 2.734375,
      "learning_rate": 2.8797531614186214e-05,
      "loss": 0.8278,
      "step": 412280
    },
    {
      "epoch": 1.444973661798568,
      "grad_norm": 3.0625,
      "learning_rate": 2.8796882585522512e-05,
      "loss": 0.8407,
      "step": 412290
    },
    {
      "epoch": 1.4450087093054635,
      "grad_norm": 3.40625,
      "learning_rate": 2.8796233556858803e-05,
      "loss": 0.8642,
      "step": 412300
    },
    {
      "epoch": 1.445043756812359,
      "grad_norm": 2.765625,
      "learning_rate": 2.87955845281951e-05,
      "loss": 0.8094,
      "step": 412310
    },
    {
      "epoch": 1.4450788043192548,
      "grad_norm": 2.15625,
      "learning_rate": 2.87949354995314e-05,
      "loss": 0.819,
      "step": 412320
    },
    {
      "epoch": 1.4451138518261504,
      "grad_norm": 3.0,
      "learning_rate": 2.8794286470867697e-05,
      "loss": 0.7981,
      "step": 412330
    },
    {
      "epoch": 1.445148899333046,
      "grad_norm": 2.96875,
      "learning_rate": 2.8793637442203995e-05,
      "loss": 0.8606,
      "step": 412340
    },
    {
      "epoch": 1.4451839468399417,
      "grad_norm": 2.625,
      "learning_rate": 2.8792988413540297e-05,
      "loss": 0.8119,
      "step": 412350
    },
    {
      "epoch": 1.4452189943468372,
      "grad_norm": 3.09375,
      "learning_rate": 2.8792339384876595e-05,
      "loss": 0.8797,
      "step": 412360
    },
    {
      "epoch": 1.4452540418537327,
      "grad_norm": 2.703125,
      "learning_rate": 2.8791690356212893e-05,
      "loss": 0.8237,
      "step": 412370
    },
    {
      "epoch": 1.4452890893606283,
      "grad_norm": 3.078125,
      "learning_rate": 2.879104132754919e-05,
      "loss": 0.8525,
      "step": 412380
    },
    {
      "epoch": 1.4453241368675238,
      "grad_norm": 2.859375,
      "learning_rate": 2.879039229888549e-05,
      "loss": 0.8998,
      "step": 412390
    },
    {
      "epoch": 1.4453591843744196,
      "grad_norm": 2.421875,
      "learning_rate": 2.8789743270221787e-05,
      "loss": 0.81,
      "step": 412400
    },
    {
      "epoch": 1.445394231881315,
      "grad_norm": 3.0,
      "learning_rate": 2.8789094241558085e-05,
      "loss": 0.8745,
      "step": 412410
    },
    {
      "epoch": 1.4454292793882106,
      "grad_norm": 3.140625,
      "learning_rate": 2.8788445212894383e-05,
      "loss": 0.8821,
      "step": 412420
    },
    {
      "epoch": 1.4454643268951064,
      "grad_norm": 2.75,
      "learning_rate": 2.878779618423068e-05,
      "loss": 0.8596,
      "step": 412430
    },
    {
      "epoch": 1.445499374402002,
      "grad_norm": 2.953125,
      "learning_rate": 2.878714715556698e-05,
      "loss": 0.823,
      "step": 412440
    },
    {
      "epoch": 1.4455344219088975,
      "grad_norm": 3.0,
      "learning_rate": 2.8786498126903277e-05,
      "loss": 0.8023,
      "step": 412450
    },
    {
      "epoch": 1.4455694694157932,
      "grad_norm": 2.59375,
      "learning_rate": 2.8785849098239575e-05,
      "loss": 0.7344,
      "step": 412460
    },
    {
      "epoch": 1.4456045169226888,
      "grad_norm": 3.03125,
      "learning_rate": 2.8785200069575873e-05,
      "loss": 0.8755,
      "step": 412470
    },
    {
      "epoch": 1.4456395644295843,
      "grad_norm": 3.0,
      "learning_rate": 2.878455104091217e-05,
      "loss": 0.8338,
      "step": 412480
    },
    {
      "epoch": 1.4456746119364798,
      "grad_norm": 2.90625,
      "learning_rate": 2.8783902012248472e-05,
      "loss": 0.8818,
      "step": 412490
    },
    {
      "epoch": 1.4457096594433754,
      "grad_norm": 3.0625,
      "learning_rate": 2.878325298358477e-05,
      "loss": 0.8156,
      "step": 412500
    },
    {
      "epoch": 1.4457447069502711,
      "grad_norm": 2.9375,
      "learning_rate": 2.8782603954921068e-05,
      "loss": 0.8349,
      "step": 412510
    },
    {
      "epoch": 1.4457797544571667,
      "grad_norm": 2.71875,
      "learning_rate": 2.8781954926257366e-05,
      "loss": 0.9059,
      "step": 412520
    },
    {
      "epoch": 1.4458148019640622,
      "grad_norm": 3.1875,
      "learning_rate": 2.8781305897593664e-05,
      "loss": 0.9227,
      "step": 412530
    },
    {
      "epoch": 1.445849849470958,
      "grad_norm": 3.03125,
      "learning_rate": 2.8780656868929962e-05,
      "loss": 0.7738,
      "step": 412540
    },
    {
      "epoch": 1.4458848969778535,
      "grad_norm": 2.96875,
      "learning_rate": 2.878000784026626e-05,
      "loss": 0.8536,
      "step": 412550
    },
    {
      "epoch": 1.445919944484749,
      "grad_norm": 2.703125,
      "learning_rate": 2.8779358811602558e-05,
      "loss": 0.9059,
      "step": 412560
    },
    {
      "epoch": 1.4459549919916448,
      "grad_norm": 2.8125,
      "learning_rate": 2.8778709782938856e-05,
      "loss": 0.7744,
      "step": 412570
    },
    {
      "epoch": 1.4459900394985403,
      "grad_norm": 3.125,
      "learning_rate": 2.8778060754275154e-05,
      "loss": 0.7596,
      "step": 412580
    },
    {
      "epoch": 1.4460250870054359,
      "grad_norm": 2.9375,
      "learning_rate": 2.8777411725611452e-05,
      "loss": 0.8146,
      "step": 412590
    },
    {
      "epoch": 1.4460601345123314,
      "grad_norm": 2.875,
      "learning_rate": 2.877676269694775e-05,
      "loss": 0.8574,
      "step": 412600
    },
    {
      "epoch": 1.446095182019227,
      "grad_norm": 2.875,
      "learning_rate": 2.8776113668284048e-05,
      "loss": 0.7937,
      "step": 412610
    },
    {
      "epoch": 1.4461302295261227,
      "grad_norm": 2.65625,
      "learning_rate": 2.8775464639620346e-05,
      "loss": 0.8319,
      "step": 412620
    },
    {
      "epoch": 1.4461652770330182,
      "grad_norm": 2.953125,
      "learning_rate": 2.8774815610956647e-05,
      "loss": 0.8676,
      "step": 412630
    },
    {
      "epoch": 1.4462003245399138,
      "grad_norm": 2.796875,
      "learning_rate": 2.8774166582292945e-05,
      "loss": 0.7905,
      "step": 412640
    },
    {
      "epoch": 1.4462353720468095,
      "grad_norm": 2.71875,
      "learning_rate": 2.8773517553629243e-05,
      "loss": 0.809,
      "step": 412650
    },
    {
      "epoch": 1.446270419553705,
      "grad_norm": 3.0625,
      "learning_rate": 2.877286852496554e-05,
      "loss": 0.7991,
      "step": 412660
    },
    {
      "epoch": 1.4463054670606006,
      "grad_norm": 3.046875,
      "learning_rate": 2.877221949630184e-05,
      "loss": 0.8831,
      "step": 412670
    },
    {
      "epoch": 1.4463405145674963,
      "grad_norm": 2.265625,
      "learning_rate": 2.877157046763813e-05,
      "loss": 0.8061,
      "step": 412680
    },
    {
      "epoch": 1.4463755620743919,
      "grad_norm": 2.5625,
      "learning_rate": 2.877092143897443e-05,
      "loss": 0.818,
      "step": 412690
    },
    {
      "epoch": 1.4464106095812874,
      "grad_norm": 2.609375,
      "learning_rate": 2.8770272410310727e-05,
      "loss": 0.9332,
      "step": 412700
    },
    {
      "epoch": 1.446445657088183,
      "grad_norm": 2.734375,
      "learning_rate": 2.8769623381647025e-05,
      "loss": 0.8271,
      "step": 412710
    },
    {
      "epoch": 1.4464807045950785,
      "grad_norm": 3.015625,
      "learning_rate": 2.8768974352983326e-05,
      "loss": 0.9354,
      "step": 412720
    },
    {
      "epoch": 1.4465157521019743,
      "grad_norm": 2.96875,
      "learning_rate": 2.8768325324319624e-05,
      "loss": 0.8069,
      "step": 412730
    },
    {
      "epoch": 1.4465507996088698,
      "grad_norm": 3.0,
      "learning_rate": 2.8767676295655922e-05,
      "loss": 0.8202,
      "step": 412740
    },
    {
      "epoch": 1.4465858471157653,
      "grad_norm": 2.84375,
      "learning_rate": 2.876702726699222e-05,
      "loss": 0.8146,
      "step": 412750
    },
    {
      "epoch": 1.446620894622661,
      "grad_norm": 2.9375,
      "learning_rate": 2.8766378238328518e-05,
      "loss": 0.9022,
      "step": 412760
    },
    {
      "epoch": 1.4466559421295566,
      "grad_norm": 2.890625,
      "learning_rate": 2.8765729209664816e-05,
      "loss": 0.8508,
      "step": 412770
    },
    {
      "epoch": 1.4466909896364522,
      "grad_norm": 2.453125,
      "learning_rate": 2.8765080181001114e-05,
      "loss": 0.859,
      "step": 412780
    },
    {
      "epoch": 1.446726037143348,
      "grad_norm": 2.609375,
      "learning_rate": 2.8764431152337412e-05,
      "loss": 0.8453,
      "step": 412790
    },
    {
      "epoch": 1.4467610846502434,
      "grad_norm": 2.65625,
      "learning_rate": 2.876378212367371e-05,
      "loss": 0.7947,
      "step": 412800
    },
    {
      "epoch": 1.446796132157139,
      "grad_norm": 2.96875,
      "learning_rate": 2.8763133095010008e-05,
      "loss": 0.8512,
      "step": 412810
    },
    {
      "epoch": 1.4468311796640345,
      "grad_norm": 3.015625,
      "learning_rate": 2.8762484066346306e-05,
      "loss": 0.8507,
      "step": 412820
    },
    {
      "epoch": 1.4468662271709303,
      "grad_norm": 3.390625,
      "learning_rate": 2.8761835037682604e-05,
      "loss": 0.8232,
      "step": 412830
    },
    {
      "epoch": 1.4469012746778258,
      "grad_norm": 2.84375,
      "learning_rate": 2.8761186009018902e-05,
      "loss": 0.8399,
      "step": 412840
    },
    {
      "epoch": 1.4469363221847213,
      "grad_norm": 2.984375,
      "learning_rate": 2.87605369803552e-05,
      "loss": 0.8165,
      "step": 412850
    },
    {
      "epoch": 1.4469713696916169,
      "grad_norm": 2.609375,
      "learning_rate": 2.87598879516915e-05,
      "loss": 0.6894,
      "step": 412860
    },
    {
      "epoch": 1.4470064171985126,
      "grad_norm": 2.515625,
      "learning_rate": 2.87592389230278e-05,
      "loss": 0.7303,
      "step": 412870
    },
    {
      "epoch": 1.4470414647054082,
      "grad_norm": 2.625,
      "learning_rate": 2.8758589894364097e-05,
      "loss": 0.8804,
      "step": 412880
    },
    {
      "epoch": 1.4470765122123037,
      "grad_norm": 2.953125,
      "learning_rate": 2.8757940865700395e-05,
      "loss": 0.859,
      "step": 412890
    },
    {
      "epoch": 1.4471115597191995,
      "grad_norm": 3.03125,
      "learning_rate": 2.8757291837036693e-05,
      "loss": 0.8822,
      "step": 412900
    },
    {
      "epoch": 1.447146607226095,
      "grad_norm": 3.21875,
      "learning_rate": 2.875664280837299e-05,
      "loss": 0.8524,
      "step": 412910
    },
    {
      "epoch": 1.4471816547329905,
      "grad_norm": 2.640625,
      "learning_rate": 2.875599377970929e-05,
      "loss": 0.8147,
      "step": 412920
    },
    {
      "epoch": 1.4472167022398863,
      "grad_norm": 2.875,
      "learning_rate": 2.8755344751045587e-05,
      "loss": 0.8504,
      "step": 412930
    },
    {
      "epoch": 1.4472517497467818,
      "grad_norm": 2.5625,
      "learning_rate": 2.8754695722381885e-05,
      "loss": 0.7901,
      "step": 412940
    },
    {
      "epoch": 1.4472867972536774,
      "grad_norm": 3.1875,
      "learning_rate": 2.8754046693718183e-05,
      "loss": 0.8266,
      "step": 412950
    },
    {
      "epoch": 1.447321844760573,
      "grad_norm": 2.96875,
      "learning_rate": 2.875339766505448e-05,
      "loss": 0.8859,
      "step": 412960
    },
    {
      "epoch": 1.4473568922674684,
      "grad_norm": 2.703125,
      "learning_rate": 2.875274863639078e-05,
      "loss": 0.8587,
      "step": 412970
    },
    {
      "epoch": 1.4473919397743642,
      "grad_norm": 3.28125,
      "learning_rate": 2.8752099607727077e-05,
      "loss": 0.8854,
      "step": 412980
    },
    {
      "epoch": 1.4474269872812597,
      "grad_norm": 2.90625,
      "learning_rate": 2.8751450579063375e-05,
      "loss": 0.7886,
      "step": 412990
    },
    {
      "epoch": 1.4474620347881553,
      "grad_norm": 2.796875,
      "learning_rate": 2.8750801550399677e-05,
      "loss": 0.8499,
      "step": 413000
    },
    {
      "epoch": 1.447497082295051,
      "grad_norm": 2.671875,
      "learning_rate": 2.8750152521735975e-05,
      "loss": 0.8659,
      "step": 413010
    },
    {
      "epoch": 1.4475321298019466,
      "grad_norm": 2.828125,
      "learning_rate": 2.8749503493072273e-05,
      "loss": 0.8315,
      "step": 413020
    },
    {
      "epoch": 1.447567177308842,
      "grad_norm": 2.6875,
      "learning_rate": 2.874885446440857e-05,
      "loss": 0.8117,
      "step": 413030
    },
    {
      "epoch": 1.4476022248157379,
      "grad_norm": 3.171875,
      "learning_rate": 2.874820543574487e-05,
      "loss": 0.8206,
      "step": 413040
    },
    {
      "epoch": 1.4476372723226334,
      "grad_norm": 3.328125,
      "learning_rate": 2.874755640708116e-05,
      "loss": 0.8566,
      "step": 413050
    },
    {
      "epoch": 1.447672319829529,
      "grad_norm": 2.96875,
      "learning_rate": 2.8746907378417458e-05,
      "loss": 0.8987,
      "step": 413060
    },
    {
      "epoch": 1.4477073673364245,
      "grad_norm": 2.859375,
      "learning_rate": 2.8746258349753756e-05,
      "loss": 0.8605,
      "step": 413070
    },
    {
      "epoch": 1.44774241484332,
      "grad_norm": 3.15625,
      "learning_rate": 2.8745609321090057e-05,
      "loss": 0.9495,
      "step": 413080
    },
    {
      "epoch": 1.4477774623502158,
      "grad_norm": 3.109375,
      "learning_rate": 2.8744960292426355e-05,
      "loss": 0.8556,
      "step": 413090
    },
    {
      "epoch": 1.4478125098571113,
      "grad_norm": 2.890625,
      "learning_rate": 2.8744311263762653e-05,
      "loss": 0.8519,
      "step": 413100
    },
    {
      "epoch": 1.4478475573640068,
      "grad_norm": 3.078125,
      "learning_rate": 2.874366223509895e-05,
      "loss": 0.8278,
      "step": 413110
    },
    {
      "epoch": 1.4478826048709026,
      "grad_norm": 2.71875,
      "learning_rate": 2.874301320643525e-05,
      "loss": 0.9397,
      "step": 413120
    },
    {
      "epoch": 1.4479176523777981,
      "grad_norm": 2.828125,
      "learning_rate": 2.8742364177771547e-05,
      "loss": 0.8512,
      "step": 413130
    },
    {
      "epoch": 1.4479526998846937,
      "grad_norm": 3.046875,
      "learning_rate": 2.8741715149107845e-05,
      "loss": 0.7359,
      "step": 413140
    },
    {
      "epoch": 1.4479877473915894,
      "grad_norm": 2.765625,
      "learning_rate": 2.8741066120444143e-05,
      "loss": 0.7657,
      "step": 413150
    },
    {
      "epoch": 1.448022794898485,
      "grad_norm": 2.859375,
      "learning_rate": 2.874041709178044e-05,
      "loss": 0.9175,
      "step": 413160
    },
    {
      "epoch": 1.4480578424053805,
      "grad_norm": 2.84375,
      "learning_rate": 2.873976806311674e-05,
      "loss": 0.9067,
      "step": 413170
    },
    {
      "epoch": 1.448092889912276,
      "grad_norm": 3.234375,
      "learning_rate": 2.8739119034453037e-05,
      "loss": 0.8399,
      "step": 413180
    },
    {
      "epoch": 1.4481279374191716,
      "grad_norm": 2.5,
      "learning_rate": 2.8738470005789335e-05,
      "loss": 0.8977,
      "step": 413190
    },
    {
      "epoch": 1.4481629849260673,
      "grad_norm": 2.953125,
      "learning_rate": 2.8737820977125633e-05,
      "loss": 0.8118,
      "step": 413200
    },
    {
      "epoch": 1.4481980324329629,
      "grad_norm": 3.0625,
      "learning_rate": 2.873717194846193e-05,
      "loss": 0.837,
      "step": 413210
    },
    {
      "epoch": 1.4482330799398584,
      "grad_norm": 3.171875,
      "learning_rate": 2.8736522919798233e-05,
      "loss": 0.813,
      "step": 413220
    },
    {
      "epoch": 1.4482681274467541,
      "grad_norm": 3.3125,
      "learning_rate": 2.873587389113453e-05,
      "loss": 0.8412,
      "step": 413230
    },
    {
      "epoch": 1.4483031749536497,
      "grad_norm": 2.671875,
      "learning_rate": 2.873522486247083e-05,
      "loss": 0.8538,
      "step": 413240
    },
    {
      "epoch": 1.4483382224605452,
      "grad_norm": 3.03125,
      "learning_rate": 2.8734575833807127e-05,
      "loss": 0.8746,
      "step": 413250
    },
    {
      "epoch": 1.448373269967441,
      "grad_norm": 2.90625,
      "learning_rate": 2.8733926805143425e-05,
      "loss": 0.7888,
      "step": 413260
    },
    {
      "epoch": 1.4484083174743365,
      "grad_norm": 2.703125,
      "learning_rate": 2.8733277776479723e-05,
      "loss": 0.7523,
      "step": 413270
    },
    {
      "epoch": 1.448443364981232,
      "grad_norm": 3.390625,
      "learning_rate": 2.873262874781602e-05,
      "loss": 0.9372,
      "step": 413280
    },
    {
      "epoch": 1.4484784124881276,
      "grad_norm": 2.5,
      "learning_rate": 2.873197971915232e-05,
      "loss": 0.8131,
      "step": 413290
    },
    {
      "epoch": 1.4485134599950231,
      "grad_norm": 2.3125,
      "learning_rate": 2.8731330690488617e-05,
      "loss": 0.8578,
      "step": 413300
    },
    {
      "epoch": 1.4485485075019189,
      "grad_norm": 3.265625,
      "learning_rate": 2.8730681661824915e-05,
      "loss": 0.8071,
      "step": 413310
    },
    {
      "epoch": 1.4485835550088144,
      "grad_norm": 2.53125,
      "learning_rate": 2.8730032633161213e-05,
      "loss": 0.8738,
      "step": 413320
    },
    {
      "epoch": 1.44861860251571,
      "grad_norm": 2.5625,
      "learning_rate": 2.872938360449751e-05,
      "loss": 0.785,
      "step": 413330
    },
    {
      "epoch": 1.4486536500226057,
      "grad_norm": 2.953125,
      "learning_rate": 2.872873457583381e-05,
      "loss": 0.8725,
      "step": 413340
    },
    {
      "epoch": 1.4486886975295012,
      "grad_norm": 3.15625,
      "learning_rate": 2.8728085547170107e-05,
      "loss": 0.848,
      "step": 413350
    },
    {
      "epoch": 1.4487237450363968,
      "grad_norm": 3.3125,
      "learning_rate": 2.8727436518506408e-05,
      "loss": 0.9245,
      "step": 413360
    },
    {
      "epoch": 1.4487587925432925,
      "grad_norm": 2.953125,
      "learning_rate": 2.8726787489842706e-05,
      "loss": 0.9139,
      "step": 413370
    },
    {
      "epoch": 1.448793840050188,
      "grad_norm": 2.71875,
      "learning_rate": 2.8726138461179004e-05,
      "loss": 0.8177,
      "step": 413380
    },
    {
      "epoch": 1.4488288875570836,
      "grad_norm": 3.015625,
      "learning_rate": 2.8725489432515302e-05,
      "loss": 0.8763,
      "step": 413390
    },
    {
      "epoch": 1.4488639350639791,
      "grad_norm": 2.609375,
      "learning_rate": 2.87248404038516e-05,
      "loss": 0.801,
      "step": 413400
    },
    {
      "epoch": 1.4488989825708747,
      "grad_norm": 2.75,
      "learning_rate": 2.8724191375187898e-05,
      "loss": 0.7695,
      "step": 413410
    },
    {
      "epoch": 1.4489340300777704,
      "grad_norm": 2.71875,
      "learning_rate": 2.8723542346524196e-05,
      "loss": 0.7985,
      "step": 413420
    },
    {
      "epoch": 1.448969077584666,
      "grad_norm": 3.03125,
      "learning_rate": 2.8722893317860487e-05,
      "loss": 0.815,
      "step": 413430
    },
    {
      "epoch": 1.4490041250915615,
      "grad_norm": 2.84375,
      "learning_rate": 2.8722244289196785e-05,
      "loss": 0.8719,
      "step": 413440
    },
    {
      "epoch": 1.4490391725984573,
      "grad_norm": 3.3125,
      "learning_rate": 2.8721595260533087e-05,
      "loss": 0.8597,
      "step": 413450
    },
    {
      "epoch": 1.4490742201053528,
      "grad_norm": 3.34375,
      "learning_rate": 2.8720946231869385e-05,
      "loss": 0.887,
      "step": 413460
    },
    {
      "epoch": 1.4491092676122483,
      "grad_norm": 2.671875,
      "learning_rate": 2.8720297203205683e-05,
      "loss": 0.855,
      "step": 413470
    },
    {
      "epoch": 1.449144315119144,
      "grad_norm": 3.234375,
      "learning_rate": 2.871964817454198e-05,
      "loss": 0.8708,
      "step": 413480
    },
    {
      "epoch": 1.4491793626260396,
      "grad_norm": 3.1875,
      "learning_rate": 2.871899914587828e-05,
      "loss": 0.8909,
      "step": 413490
    },
    {
      "epoch": 1.4492144101329352,
      "grad_norm": 2.5,
      "learning_rate": 2.8718350117214577e-05,
      "loss": 0.7777,
      "step": 413500
    },
    {
      "epoch": 1.449249457639831,
      "grad_norm": 2.625,
      "learning_rate": 2.8717701088550875e-05,
      "loss": 0.8203,
      "step": 413510
    },
    {
      "epoch": 1.4492845051467265,
      "grad_norm": 2.65625,
      "learning_rate": 2.8717052059887173e-05,
      "loss": 0.8857,
      "step": 413520
    },
    {
      "epoch": 1.449319552653622,
      "grad_norm": 2.90625,
      "learning_rate": 2.871640303122347e-05,
      "loss": 0.8716,
      "step": 413530
    },
    {
      "epoch": 1.4493546001605175,
      "grad_norm": 3.1875,
      "learning_rate": 2.871575400255977e-05,
      "loss": 0.7974,
      "step": 413540
    },
    {
      "epoch": 1.449389647667413,
      "grad_norm": 2.640625,
      "learning_rate": 2.8715104973896067e-05,
      "loss": 0.8762,
      "step": 413550
    },
    {
      "epoch": 1.4494246951743088,
      "grad_norm": 2.78125,
      "learning_rate": 2.8714455945232365e-05,
      "loss": 0.8231,
      "step": 413560
    },
    {
      "epoch": 1.4494597426812044,
      "grad_norm": 3.578125,
      "learning_rate": 2.8713806916568663e-05,
      "loss": 0.7926,
      "step": 413570
    },
    {
      "epoch": 1.4494947901881,
      "grad_norm": 2.515625,
      "learning_rate": 2.871315788790496e-05,
      "loss": 0.7815,
      "step": 413580
    },
    {
      "epoch": 1.4495298376949957,
      "grad_norm": 2.8125,
      "learning_rate": 2.8712508859241262e-05,
      "loss": 0.8622,
      "step": 413590
    },
    {
      "epoch": 1.4495648852018912,
      "grad_norm": 3.0625,
      "learning_rate": 2.871185983057756e-05,
      "loss": 0.82,
      "step": 413600
    },
    {
      "epoch": 1.4495999327087867,
      "grad_norm": 3.15625,
      "learning_rate": 2.8711210801913858e-05,
      "loss": 0.9155,
      "step": 413610
    },
    {
      "epoch": 1.4496349802156825,
      "grad_norm": 2.359375,
      "learning_rate": 2.8710561773250156e-05,
      "loss": 0.7846,
      "step": 413620
    },
    {
      "epoch": 1.449670027722578,
      "grad_norm": 3.53125,
      "learning_rate": 2.8709912744586454e-05,
      "loss": 0.8663,
      "step": 413630
    },
    {
      "epoch": 1.4497050752294736,
      "grad_norm": 2.890625,
      "learning_rate": 2.8709263715922752e-05,
      "loss": 0.8027,
      "step": 413640
    },
    {
      "epoch": 1.449740122736369,
      "grad_norm": 2.9375,
      "learning_rate": 2.870861468725905e-05,
      "loss": 0.7269,
      "step": 413650
    },
    {
      "epoch": 1.4497751702432646,
      "grad_norm": 3.21875,
      "learning_rate": 2.8707965658595348e-05,
      "loss": 0.9626,
      "step": 413660
    },
    {
      "epoch": 1.4498102177501604,
      "grad_norm": 3.109375,
      "learning_rate": 2.8707316629931646e-05,
      "loss": 0.8612,
      "step": 413670
    },
    {
      "epoch": 1.449845265257056,
      "grad_norm": 2.90625,
      "learning_rate": 2.8706667601267944e-05,
      "loss": 0.8362,
      "step": 413680
    },
    {
      "epoch": 1.4498803127639515,
      "grad_norm": 3.015625,
      "learning_rate": 2.8706018572604242e-05,
      "loss": 0.8498,
      "step": 413690
    },
    {
      "epoch": 1.4499153602708472,
      "grad_norm": 2.59375,
      "learning_rate": 2.870536954394054e-05,
      "loss": 0.8397,
      "step": 413700
    },
    {
      "epoch": 1.4499504077777428,
      "grad_norm": 2.625,
      "learning_rate": 2.8704720515276838e-05,
      "loss": 0.7111,
      "step": 413710
    },
    {
      "epoch": 1.4499854552846383,
      "grad_norm": 2.375,
      "learning_rate": 2.8704071486613136e-05,
      "loss": 0.7752,
      "step": 413720
    },
    {
      "epoch": 1.450020502791534,
      "grad_norm": 2.609375,
      "learning_rate": 2.8703422457949437e-05,
      "loss": 0.8745,
      "step": 413730
    },
    {
      "epoch": 1.4500555502984296,
      "grad_norm": 3.0625,
      "learning_rate": 2.8702773429285735e-05,
      "loss": 0.9132,
      "step": 413740
    },
    {
      "epoch": 1.4500905978053251,
      "grad_norm": 2.90625,
      "learning_rate": 2.8702124400622033e-05,
      "loss": 0.85,
      "step": 413750
    },
    {
      "epoch": 1.4501256453122207,
      "grad_norm": 2.90625,
      "learning_rate": 2.870147537195833e-05,
      "loss": 0.8632,
      "step": 413760
    },
    {
      "epoch": 1.4501606928191162,
      "grad_norm": 2.84375,
      "learning_rate": 2.870082634329463e-05,
      "loss": 0.7897,
      "step": 413770
    },
    {
      "epoch": 1.450195740326012,
      "grad_norm": 2.5,
      "learning_rate": 2.8700177314630927e-05,
      "loss": 0.8771,
      "step": 413780
    },
    {
      "epoch": 1.4502307878329075,
      "grad_norm": 3.015625,
      "learning_rate": 2.8699528285967225e-05,
      "loss": 0.7938,
      "step": 413790
    },
    {
      "epoch": 1.450265835339803,
      "grad_norm": 3.265625,
      "learning_rate": 2.8698879257303523e-05,
      "loss": 0.8907,
      "step": 413800
    },
    {
      "epoch": 1.4503008828466988,
      "grad_norm": 2.671875,
      "learning_rate": 2.8698230228639815e-05,
      "loss": 0.8785,
      "step": 413810
    },
    {
      "epoch": 1.4503359303535943,
      "grad_norm": 2.515625,
      "learning_rate": 2.8697581199976116e-05,
      "loss": 0.8689,
      "step": 413820
    },
    {
      "epoch": 1.4503709778604899,
      "grad_norm": 2.765625,
      "learning_rate": 2.8696932171312414e-05,
      "loss": 0.7568,
      "step": 413830
    },
    {
      "epoch": 1.4504060253673856,
      "grad_norm": 2.859375,
      "learning_rate": 2.8696283142648712e-05,
      "loss": 0.721,
      "step": 413840
    },
    {
      "epoch": 1.4504410728742811,
      "grad_norm": 3.171875,
      "learning_rate": 2.869563411398501e-05,
      "loss": 0.914,
      "step": 413850
    },
    {
      "epoch": 1.4504761203811767,
      "grad_norm": 3.125,
      "learning_rate": 2.8694985085321308e-05,
      "loss": 0.9229,
      "step": 413860
    },
    {
      "epoch": 1.4505111678880722,
      "grad_norm": 2.734375,
      "learning_rate": 2.8694336056657606e-05,
      "loss": 0.754,
      "step": 413870
    },
    {
      "epoch": 1.4505462153949678,
      "grad_norm": 2.640625,
      "learning_rate": 2.8693687027993904e-05,
      "loss": 0.756,
      "step": 413880
    },
    {
      "epoch": 1.4505812629018635,
      "grad_norm": 2.9375,
      "learning_rate": 2.8693037999330202e-05,
      "loss": 0.9196,
      "step": 413890
    },
    {
      "epoch": 1.450616310408759,
      "grad_norm": 2.765625,
      "learning_rate": 2.86923889706665e-05,
      "loss": 0.8953,
      "step": 413900
    },
    {
      "epoch": 1.4506513579156546,
      "grad_norm": 2.53125,
      "learning_rate": 2.8691739942002798e-05,
      "loss": 0.8451,
      "step": 413910
    },
    {
      "epoch": 1.4506864054225503,
      "grad_norm": 3.109375,
      "learning_rate": 2.8691090913339096e-05,
      "loss": 0.8103,
      "step": 413920
    },
    {
      "epoch": 1.4507214529294459,
      "grad_norm": 2.703125,
      "learning_rate": 2.8690441884675394e-05,
      "loss": 0.861,
      "step": 413930
    },
    {
      "epoch": 1.4507565004363414,
      "grad_norm": 3.03125,
      "learning_rate": 2.8689792856011692e-05,
      "loss": 0.8625,
      "step": 413940
    },
    {
      "epoch": 1.4507915479432372,
      "grad_norm": 3.25,
      "learning_rate": 2.868914382734799e-05,
      "loss": 0.8328,
      "step": 413950
    },
    {
      "epoch": 1.4508265954501327,
      "grad_norm": 2.5,
      "learning_rate": 2.868849479868429e-05,
      "loss": 0.7941,
      "step": 413960
    },
    {
      "epoch": 1.4508616429570282,
      "grad_norm": 2.625,
      "learning_rate": 2.868784577002059e-05,
      "loss": 0.8392,
      "step": 413970
    },
    {
      "epoch": 1.4508966904639238,
      "grad_norm": 3.109375,
      "learning_rate": 2.8687196741356887e-05,
      "loss": 0.8835,
      "step": 413980
    },
    {
      "epoch": 1.4509317379708193,
      "grad_norm": 2.984375,
      "learning_rate": 2.8686547712693185e-05,
      "loss": 0.8492,
      "step": 413990
    },
    {
      "epoch": 1.450966785477715,
      "grad_norm": 2.75,
      "learning_rate": 2.8685898684029483e-05,
      "loss": 0.8465,
      "step": 414000
    },
    {
      "epoch": 1.4510018329846106,
      "grad_norm": 2.984375,
      "learning_rate": 2.868524965536578e-05,
      "loss": 0.8957,
      "step": 414010
    },
    {
      "epoch": 1.4510368804915061,
      "grad_norm": 2.796875,
      "learning_rate": 2.868460062670208e-05,
      "loss": 0.8912,
      "step": 414020
    },
    {
      "epoch": 1.451071927998402,
      "grad_norm": 3.15625,
      "learning_rate": 2.8683951598038377e-05,
      "loss": 0.8376,
      "step": 414030
    },
    {
      "epoch": 1.4511069755052974,
      "grad_norm": 2.78125,
      "learning_rate": 2.8683302569374675e-05,
      "loss": 0.7881,
      "step": 414040
    },
    {
      "epoch": 1.451142023012193,
      "grad_norm": 2.875,
      "learning_rate": 2.8682653540710973e-05,
      "loss": 0.8252,
      "step": 414050
    },
    {
      "epoch": 1.4511770705190887,
      "grad_norm": 2.984375,
      "learning_rate": 2.868200451204727e-05,
      "loss": 0.9326,
      "step": 414060
    },
    {
      "epoch": 1.4512121180259843,
      "grad_norm": 2.78125,
      "learning_rate": 2.868135548338357e-05,
      "loss": 0.879,
      "step": 414070
    },
    {
      "epoch": 1.4512471655328798,
      "grad_norm": 3.0625,
      "learning_rate": 2.8680706454719867e-05,
      "loss": 0.8377,
      "step": 414080
    },
    {
      "epoch": 1.4512822130397753,
      "grad_norm": 3.1875,
      "learning_rate": 2.868005742605617e-05,
      "loss": 0.8276,
      "step": 414090
    },
    {
      "epoch": 1.451317260546671,
      "grad_norm": 2.828125,
      "learning_rate": 2.8679408397392467e-05,
      "loss": 0.8196,
      "step": 414100
    },
    {
      "epoch": 1.4513523080535666,
      "grad_norm": 3.0,
      "learning_rate": 2.8678759368728765e-05,
      "loss": 0.9543,
      "step": 414110
    },
    {
      "epoch": 1.4513873555604622,
      "grad_norm": 2.8125,
      "learning_rate": 2.8678110340065063e-05,
      "loss": 0.8014,
      "step": 414120
    },
    {
      "epoch": 1.4514224030673577,
      "grad_norm": 3.125,
      "learning_rate": 2.867746131140136e-05,
      "loss": 0.9478,
      "step": 414130
    },
    {
      "epoch": 1.4514574505742535,
      "grad_norm": 2.671875,
      "learning_rate": 2.867681228273766e-05,
      "loss": 0.7447,
      "step": 414140
    },
    {
      "epoch": 1.451492498081149,
      "grad_norm": 3.25,
      "learning_rate": 2.8676163254073957e-05,
      "loss": 0.9398,
      "step": 414150
    },
    {
      "epoch": 1.4515275455880445,
      "grad_norm": 3.0,
      "learning_rate": 2.8675514225410255e-05,
      "loss": 0.7598,
      "step": 414160
    },
    {
      "epoch": 1.4515625930949403,
      "grad_norm": 2.796875,
      "learning_rate": 2.8674865196746553e-05,
      "loss": 0.7917,
      "step": 414170
    },
    {
      "epoch": 1.4515976406018358,
      "grad_norm": 2.921875,
      "learning_rate": 2.8674216168082847e-05,
      "loss": 0.8707,
      "step": 414180
    },
    {
      "epoch": 1.4516326881087314,
      "grad_norm": 3.359375,
      "learning_rate": 2.8673567139419145e-05,
      "loss": 0.9049,
      "step": 414190
    },
    {
      "epoch": 1.4516677356156271,
      "grad_norm": 2.5,
      "learning_rate": 2.8672918110755443e-05,
      "loss": 0.7909,
      "step": 414200
    },
    {
      "epoch": 1.4517027831225227,
      "grad_norm": 3.375,
      "learning_rate": 2.867226908209174e-05,
      "loss": 0.8425,
      "step": 414210
    },
    {
      "epoch": 1.4517378306294182,
      "grad_norm": 3.296875,
      "learning_rate": 2.867162005342804e-05,
      "loss": 0.9055,
      "step": 414220
    },
    {
      "epoch": 1.4517728781363137,
      "grad_norm": 2.84375,
      "learning_rate": 2.8670971024764337e-05,
      "loss": 0.8352,
      "step": 414230
    },
    {
      "epoch": 1.4518079256432093,
      "grad_norm": 2.625,
      "learning_rate": 2.8670321996100635e-05,
      "loss": 0.9022,
      "step": 414240
    },
    {
      "epoch": 1.451842973150105,
      "grad_norm": 2.421875,
      "learning_rate": 2.8669672967436933e-05,
      "loss": 0.866,
      "step": 414250
    },
    {
      "epoch": 1.4518780206570006,
      "grad_norm": 3.0625,
      "learning_rate": 2.866902393877323e-05,
      "loss": 0.8234,
      "step": 414260
    },
    {
      "epoch": 1.451913068163896,
      "grad_norm": 3.265625,
      "learning_rate": 2.866837491010953e-05,
      "loss": 0.7987,
      "step": 414270
    },
    {
      "epoch": 1.4519481156707919,
      "grad_norm": 3.03125,
      "learning_rate": 2.8667725881445827e-05,
      "loss": 0.9789,
      "step": 414280
    },
    {
      "epoch": 1.4519831631776874,
      "grad_norm": 2.484375,
      "learning_rate": 2.8667076852782125e-05,
      "loss": 0.7637,
      "step": 414290
    },
    {
      "epoch": 1.452018210684583,
      "grad_norm": 2.90625,
      "learning_rate": 2.8666427824118423e-05,
      "loss": 0.8696,
      "step": 414300
    },
    {
      "epoch": 1.4520532581914787,
      "grad_norm": 2.8125,
      "learning_rate": 2.866577879545472e-05,
      "loss": 0.8488,
      "step": 414310
    },
    {
      "epoch": 1.4520883056983742,
      "grad_norm": 3.265625,
      "learning_rate": 2.8665129766791023e-05,
      "loss": 0.9476,
      "step": 414320
    },
    {
      "epoch": 1.4521233532052698,
      "grad_norm": 3.0,
      "learning_rate": 2.866448073812732e-05,
      "loss": 0.8316,
      "step": 414330
    },
    {
      "epoch": 1.4521584007121653,
      "grad_norm": 3.015625,
      "learning_rate": 2.866383170946362e-05,
      "loss": 0.9421,
      "step": 414340
    },
    {
      "epoch": 1.4521934482190608,
      "grad_norm": 3.359375,
      "learning_rate": 2.8663182680799917e-05,
      "loss": 0.9209,
      "step": 414350
    },
    {
      "epoch": 1.4522284957259566,
      "grad_norm": 2.65625,
      "learning_rate": 2.8662533652136215e-05,
      "loss": 0.8822,
      "step": 414360
    },
    {
      "epoch": 1.4522635432328521,
      "grad_norm": 2.703125,
      "learning_rate": 2.8661884623472513e-05,
      "loss": 0.8754,
      "step": 414370
    },
    {
      "epoch": 1.4522985907397477,
      "grad_norm": 3.09375,
      "learning_rate": 2.866123559480881e-05,
      "loss": 0.8287,
      "step": 414380
    },
    {
      "epoch": 1.4523336382466434,
      "grad_norm": 2.84375,
      "learning_rate": 2.866058656614511e-05,
      "loss": 0.8454,
      "step": 414390
    },
    {
      "epoch": 1.452368685753539,
      "grad_norm": 2.90625,
      "learning_rate": 2.8659937537481407e-05,
      "loss": 0.8521,
      "step": 414400
    },
    {
      "epoch": 1.4524037332604345,
      "grad_norm": 2.703125,
      "learning_rate": 2.8659288508817705e-05,
      "loss": 0.8791,
      "step": 414410
    },
    {
      "epoch": 1.4524387807673302,
      "grad_norm": 2.796875,
      "learning_rate": 2.8658639480154003e-05,
      "loss": 0.7971,
      "step": 414420
    },
    {
      "epoch": 1.4524738282742258,
      "grad_norm": 2.828125,
      "learning_rate": 2.86579904514903e-05,
      "loss": 0.8326,
      "step": 414430
    },
    {
      "epoch": 1.4525088757811213,
      "grad_norm": 3.03125,
      "learning_rate": 2.86573414228266e-05,
      "loss": 0.7906,
      "step": 414440
    },
    {
      "epoch": 1.4525439232880168,
      "grad_norm": 3.1875,
      "learning_rate": 2.8656692394162897e-05,
      "loss": 0.8526,
      "step": 414450
    },
    {
      "epoch": 1.4525789707949124,
      "grad_norm": 3.015625,
      "learning_rate": 2.8656043365499198e-05,
      "loss": 0.8595,
      "step": 414460
    },
    {
      "epoch": 1.4526140183018081,
      "grad_norm": 2.953125,
      "learning_rate": 2.8655394336835496e-05,
      "loss": 0.915,
      "step": 414470
    },
    {
      "epoch": 1.4526490658087037,
      "grad_norm": 3.203125,
      "learning_rate": 2.8654745308171794e-05,
      "loss": 0.8422,
      "step": 414480
    },
    {
      "epoch": 1.4526841133155992,
      "grad_norm": 2.734375,
      "learning_rate": 2.8654096279508092e-05,
      "loss": 0.8391,
      "step": 414490
    },
    {
      "epoch": 1.452719160822495,
      "grad_norm": 2.90625,
      "learning_rate": 2.865344725084439e-05,
      "loss": 0.871,
      "step": 414500
    },
    {
      "epoch": 1.4527542083293905,
      "grad_norm": 2.90625,
      "learning_rate": 2.8652798222180688e-05,
      "loss": 0.8167,
      "step": 414510
    },
    {
      "epoch": 1.452789255836286,
      "grad_norm": 2.828125,
      "learning_rate": 2.8652149193516986e-05,
      "loss": 0.8559,
      "step": 414520
    },
    {
      "epoch": 1.4528243033431818,
      "grad_norm": 4.25,
      "learning_rate": 2.8651500164853284e-05,
      "loss": 0.7907,
      "step": 414530
    },
    {
      "epoch": 1.4528593508500773,
      "grad_norm": 2.9375,
      "learning_rate": 2.8650851136189582e-05,
      "loss": 0.8684,
      "step": 414540
    },
    {
      "epoch": 1.4528943983569729,
      "grad_norm": 3.296875,
      "learning_rate": 2.865020210752588e-05,
      "loss": 0.8258,
      "step": 414550
    },
    {
      "epoch": 1.4529294458638684,
      "grad_norm": 3.265625,
      "learning_rate": 2.8649553078862175e-05,
      "loss": 0.8191,
      "step": 414560
    },
    {
      "epoch": 1.452964493370764,
      "grad_norm": 3.375,
      "learning_rate": 2.8648904050198473e-05,
      "loss": 0.866,
      "step": 414570
    },
    {
      "epoch": 1.4529995408776597,
      "grad_norm": 3.140625,
      "learning_rate": 2.864825502153477e-05,
      "loss": 0.7827,
      "step": 414580
    },
    {
      "epoch": 1.4530345883845552,
      "grad_norm": 4.15625,
      "learning_rate": 2.864760599287107e-05,
      "loss": 0.8714,
      "step": 414590
    },
    {
      "epoch": 1.4530696358914508,
      "grad_norm": 2.9375,
      "learning_rate": 2.8646956964207367e-05,
      "loss": 0.9077,
      "step": 414600
    },
    {
      "epoch": 1.4531046833983465,
      "grad_norm": 2.96875,
      "learning_rate": 2.8646307935543665e-05,
      "loss": 0.8356,
      "step": 414610
    },
    {
      "epoch": 1.453139730905242,
      "grad_norm": 2.703125,
      "learning_rate": 2.8645658906879963e-05,
      "loss": 0.8307,
      "step": 414620
    },
    {
      "epoch": 1.4531747784121376,
      "grad_norm": 2.359375,
      "learning_rate": 2.864500987821626e-05,
      "loss": 0.8489,
      "step": 414630
    },
    {
      "epoch": 1.4532098259190334,
      "grad_norm": 2.921875,
      "learning_rate": 2.864436084955256e-05,
      "loss": 0.7806,
      "step": 414640
    },
    {
      "epoch": 1.453244873425929,
      "grad_norm": 2.859375,
      "learning_rate": 2.8643711820888857e-05,
      "loss": 0.7911,
      "step": 414650
    },
    {
      "epoch": 1.4532799209328244,
      "grad_norm": 3.171875,
      "learning_rate": 2.8643062792225155e-05,
      "loss": 0.9076,
      "step": 414660
    },
    {
      "epoch": 1.45331496843972,
      "grad_norm": 2.65625,
      "learning_rate": 2.8642413763561453e-05,
      "loss": 0.8635,
      "step": 414670
    },
    {
      "epoch": 1.4533500159466155,
      "grad_norm": 2.96875,
      "learning_rate": 2.864176473489775e-05,
      "loss": 0.8628,
      "step": 414680
    },
    {
      "epoch": 1.4533850634535113,
      "grad_norm": 2.921875,
      "learning_rate": 2.8641115706234052e-05,
      "loss": 0.8235,
      "step": 414690
    },
    {
      "epoch": 1.4534201109604068,
      "grad_norm": 2.765625,
      "learning_rate": 2.864046667757035e-05,
      "loss": 0.8481,
      "step": 414700
    },
    {
      "epoch": 1.4534551584673023,
      "grad_norm": 2.734375,
      "learning_rate": 2.8639817648906648e-05,
      "loss": 0.8161,
      "step": 414710
    },
    {
      "epoch": 1.453490205974198,
      "grad_norm": 2.921875,
      "learning_rate": 2.8639168620242946e-05,
      "loss": 0.8573,
      "step": 414720
    },
    {
      "epoch": 1.4535252534810936,
      "grad_norm": 3.1875,
      "learning_rate": 2.8638519591579244e-05,
      "loss": 0.8273,
      "step": 414730
    },
    {
      "epoch": 1.4535603009879892,
      "grad_norm": 3.078125,
      "learning_rate": 2.8637870562915542e-05,
      "loss": 0.8239,
      "step": 414740
    },
    {
      "epoch": 1.453595348494885,
      "grad_norm": 2.71875,
      "learning_rate": 2.863722153425184e-05,
      "loss": 0.8548,
      "step": 414750
    },
    {
      "epoch": 1.4536303960017805,
      "grad_norm": 3.125,
      "learning_rate": 2.8636572505588138e-05,
      "loss": 0.7678,
      "step": 414760
    },
    {
      "epoch": 1.453665443508676,
      "grad_norm": 2.8125,
      "learning_rate": 2.8635923476924436e-05,
      "loss": 0.8567,
      "step": 414770
    },
    {
      "epoch": 1.4537004910155715,
      "grad_norm": 3.015625,
      "learning_rate": 2.8635274448260734e-05,
      "loss": 0.8284,
      "step": 414780
    },
    {
      "epoch": 1.4537355385224673,
      "grad_norm": 2.6875,
      "learning_rate": 2.8634625419597032e-05,
      "loss": 0.8835,
      "step": 414790
    },
    {
      "epoch": 1.4537705860293628,
      "grad_norm": 2.90625,
      "learning_rate": 2.863397639093333e-05,
      "loss": 0.8928,
      "step": 414800
    },
    {
      "epoch": 1.4538056335362584,
      "grad_norm": 3.3125,
      "learning_rate": 2.8633327362269628e-05,
      "loss": 0.8161,
      "step": 414810
    },
    {
      "epoch": 1.453840681043154,
      "grad_norm": 2.671875,
      "learning_rate": 2.8632678333605926e-05,
      "loss": 0.8062,
      "step": 414820
    },
    {
      "epoch": 1.4538757285500497,
      "grad_norm": 2.875,
      "learning_rate": 2.8632029304942228e-05,
      "loss": 0.837,
      "step": 414830
    },
    {
      "epoch": 1.4539107760569452,
      "grad_norm": 2.859375,
      "learning_rate": 2.8631380276278526e-05,
      "loss": 0.8409,
      "step": 414840
    },
    {
      "epoch": 1.4539458235638407,
      "grad_norm": 2.828125,
      "learning_rate": 2.8630731247614824e-05,
      "loss": 0.7964,
      "step": 414850
    },
    {
      "epoch": 1.4539808710707365,
      "grad_norm": 2.828125,
      "learning_rate": 2.863008221895112e-05,
      "loss": 0.9113,
      "step": 414860
    },
    {
      "epoch": 1.454015918577632,
      "grad_norm": 2.6875,
      "learning_rate": 2.862943319028742e-05,
      "loss": 0.7881,
      "step": 414870
    },
    {
      "epoch": 1.4540509660845276,
      "grad_norm": 2.40625,
      "learning_rate": 2.8628784161623718e-05,
      "loss": 0.7938,
      "step": 414880
    },
    {
      "epoch": 1.4540860135914233,
      "grad_norm": 2.609375,
      "learning_rate": 2.8628135132960016e-05,
      "loss": 0.834,
      "step": 414890
    },
    {
      "epoch": 1.4541210610983188,
      "grad_norm": 2.78125,
      "learning_rate": 2.8627486104296314e-05,
      "loss": 0.8118,
      "step": 414900
    },
    {
      "epoch": 1.4541561086052144,
      "grad_norm": 3.265625,
      "learning_rate": 2.862683707563261e-05,
      "loss": 0.8071,
      "step": 414910
    },
    {
      "epoch": 1.45419115611211,
      "grad_norm": 2.640625,
      "learning_rate": 2.862618804696891e-05,
      "loss": 0.8776,
      "step": 414920
    },
    {
      "epoch": 1.4542262036190055,
      "grad_norm": 3.203125,
      "learning_rate": 2.8625539018305208e-05,
      "loss": 0.901,
      "step": 414930
    },
    {
      "epoch": 1.4542612511259012,
      "grad_norm": 2.859375,
      "learning_rate": 2.8624889989641502e-05,
      "loss": 0.8174,
      "step": 414940
    },
    {
      "epoch": 1.4542962986327967,
      "grad_norm": 3.125,
      "learning_rate": 2.86242409609778e-05,
      "loss": 0.8076,
      "step": 414950
    },
    {
      "epoch": 1.4543313461396923,
      "grad_norm": 3.078125,
      "learning_rate": 2.8623591932314098e-05,
      "loss": 0.9394,
      "step": 414960
    },
    {
      "epoch": 1.454366393646588,
      "grad_norm": 3.109375,
      "learning_rate": 2.8622942903650396e-05,
      "loss": 0.9258,
      "step": 414970
    },
    {
      "epoch": 1.4544014411534836,
      "grad_norm": 3.21875,
      "learning_rate": 2.8622293874986694e-05,
      "loss": 0.9028,
      "step": 414980
    },
    {
      "epoch": 1.4544364886603791,
      "grad_norm": 3.609375,
      "learning_rate": 2.8621644846322992e-05,
      "loss": 0.9201,
      "step": 414990
    },
    {
      "epoch": 1.4544715361672749,
      "grad_norm": 3.40625,
      "learning_rate": 2.862099581765929e-05,
      "loss": 0.9771,
      "step": 415000
    },
    {
      "epoch": 1.4544715361672749,
      "eval_loss": 0.7921142578125,
      "eval_runtime": 560.4113,
      "eval_samples_per_second": 678.851,
      "eval_steps_per_second": 56.571,
      "step": 415000
    },
    {
      "epoch": 1.4545065836741704,
      "grad_norm": 3.21875,
      "learning_rate": 2.8620346788995588e-05,
      "loss": 0.8523,
      "step": 415010
    },
    {
      "epoch": 1.454541631181066,
      "grad_norm": 2.984375,
      "learning_rate": 2.8619697760331886e-05,
      "loss": 0.848,
      "step": 415020
    },
    {
      "epoch": 1.4545766786879615,
      "grad_norm": 2.953125,
      "learning_rate": 2.8619048731668184e-05,
      "loss": 0.8509,
      "step": 415030
    },
    {
      "epoch": 1.454611726194857,
      "grad_norm": 2.53125,
      "learning_rate": 2.8618399703004482e-05,
      "loss": 0.8121,
      "step": 415040
    },
    {
      "epoch": 1.4546467737017528,
      "grad_norm": 2.203125,
      "learning_rate": 2.861775067434078e-05,
      "loss": 0.8308,
      "step": 415050
    },
    {
      "epoch": 1.4546818212086483,
      "grad_norm": 2.828125,
      "learning_rate": 2.861710164567708e-05,
      "loss": 0.7919,
      "step": 415060
    },
    {
      "epoch": 1.4547168687155438,
      "grad_norm": 2.703125,
      "learning_rate": 2.861645261701338e-05,
      "loss": 0.8177,
      "step": 415070
    },
    {
      "epoch": 1.4547519162224396,
      "grad_norm": 2.890625,
      "learning_rate": 2.8615803588349678e-05,
      "loss": 0.8849,
      "step": 415080
    },
    {
      "epoch": 1.4547869637293351,
      "grad_norm": 2.96875,
      "learning_rate": 2.8615154559685976e-05,
      "loss": 0.862,
      "step": 415090
    },
    {
      "epoch": 1.4548220112362307,
      "grad_norm": 2.40625,
      "learning_rate": 2.8614505531022274e-05,
      "loss": 0.791,
      "step": 415100
    },
    {
      "epoch": 1.4548570587431264,
      "grad_norm": 2.90625,
      "learning_rate": 2.861385650235857e-05,
      "loss": 0.7949,
      "step": 415110
    },
    {
      "epoch": 1.454892106250022,
      "grad_norm": 3.15625,
      "learning_rate": 2.861320747369487e-05,
      "loss": 0.8757,
      "step": 415120
    },
    {
      "epoch": 1.4549271537569175,
      "grad_norm": 2.96875,
      "learning_rate": 2.8612558445031168e-05,
      "loss": 0.8317,
      "step": 415130
    },
    {
      "epoch": 1.454962201263813,
      "grad_norm": 3.015625,
      "learning_rate": 2.8611909416367466e-05,
      "loss": 0.8331,
      "step": 415140
    },
    {
      "epoch": 1.4549972487707086,
      "grad_norm": 2.5625,
      "learning_rate": 2.8611260387703764e-05,
      "loss": 0.8212,
      "step": 415150
    },
    {
      "epoch": 1.4550322962776043,
      "grad_norm": 3.0625,
      "learning_rate": 2.861061135904006e-05,
      "loss": 0.8539,
      "step": 415160
    },
    {
      "epoch": 1.4550673437844999,
      "grad_norm": 2.890625,
      "learning_rate": 2.860996233037636e-05,
      "loss": 0.8144,
      "step": 415170
    },
    {
      "epoch": 1.4551023912913954,
      "grad_norm": 3.140625,
      "learning_rate": 2.8609313301712658e-05,
      "loss": 0.817,
      "step": 415180
    },
    {
      "epoch": 1.4551374387982912,
      "grad_norm": 2.8125,
      "learning_rate": 2.860866427304896e-05,
      "loss": 0.841,
      "step": 415190
    },
    {
      "epoch": 1.4551724863051867,
      "grad_norm": 2.890625,
      "learning_rate": 2.8608015244385257e-05,
      "loss": 0.8773,
      "step": 415200
    },
    {
      "epoch": 1.4552075338120822,
      "grad_norm": 3.515625,
      "learning_rate": 2.8607366215721555e-05,
      "loss": 0.8703,
      "step": 415210
    },
    {
      "epoch": 1.455242581318978,
      "grad_norm": 2.75,
      "learning_rate": 2.8606717187057853e-05,
      "loss": 0.7205,
      "step": 415220
    },
    {
      "epoch": 1.4552776288258735,
      "grad_norm": 2.984375,
      "learning_rate": 2.860606815839415e-05,
      "loss": 0.8416,
      "step": 415230
    },
    {
      "epoch": 1.455312676332769,
      "grad_norm": 2.90625,
      "learning_rate": 2.860541912973045e-05,
      "loss": 0.8509,
      "step": 415240
    },
    {
      "epoch": 1.4553477238396646,
      "grad_norm": 3.03125,
      "learning_rate": 2.8604770101066747e-05,
      "loss": 0.7554,
      "step": 415250
    },
    {
      "epoch": 1.4553827713465601,
      "grad_norm": 3.140625,
      "learning_rate": 2.8604121072403045e-05,
      "loss": 0.8812,
      "step": 415260
    },
    {
      "epoch": 1.455417818853456,
      "grad_norm": 2.59375,
      "learning_rate": 2.8603472043739343e-05,
      "loss": 0.8022,
      "step": 415270
    },
    {
      "epoch": 1.4554528663603514,
      "grad_norm": 2.703125,
      "learning_rate": 2.860282301507564e-05,
      "loss": 0.8244,
      "step": 415280
    },
    {
      "epoch": 1.455487913867247,
      "grad_norm": 3.140625,
      "learning_rate": 2.860217398641194e-05,
      "loss": 0.817,
      "step": 415290
    },
    {
      "epoch": 1.4555229613741427,
      "grad_norm": 2.75,
      "learning_rate": 2.8601524957748237e-05,
      "loss": 0.9051,
      "step": 415300
    },
    {
      "epoch": 1.4555580088810383,
      "grad_norm": 2.890625,
      "learning_rate": 2.860087592908453e-05,
      "loss": 0.8786,
      "step": 415310
    },
    {
      "epoch": 1.4555930563879338,
      "grad_norm": 2.34375,
      "learning_rate": 2.860022690042083e-05,
      "loss": 0.874,
      "step": 415320
    },
    {
      "epoch": 1.4556281038948296,
      "grad_norm": 3.515625,
      "learning_rate": 2.8599577871757128e-05,
      "loss": 0.8562,
      "step": 415330
    },
    {
      "epoch": 1.455663151401725,
      "grad_norm": 2.84375,
      "learning_rate": 2.8598928843093426e-05,
      "loss": 0.9248,
      "step": 415340
    },
    {
      "epoch": 1.4556981989086206,
      "grad_norm": 2.71875,
      "learning_rate": 2.8598279814429724e-05,
      "loss": 0.8267,
      "step": 415350
    },
    {
      "epoch": 1.4557332464155162,
      "grad_norm": 3.203125,
      "learning_rate": 2.859763078576602e-05,
      "loss": 0.8919,
      "step": 415360
    },
    {
      "epoch": 1.4557682939224117,
      "grad_norm": 3.015625,
      "learning_rate": 2.859698175710232e-05,
      "loss": 0.8508,
      "step": 415370
    },
    {
      "epoch": 1.4558033414293075,
      "grad_norm": 2.859375,
      "learning_rate": 2.8596332728438618e-05,
      "loss": 0.8943,
      "step": 415380
    },
    {
      "epoch": 1.455838388936203,
      "grad_norm": 2.78125,
      "learning_rate": 2.8595683699774916e-05,
      "loss": 0.8926,
      "step": 415390
    },
    {
      "epoch": 1.4558734364430985,
      "grad_norm": 3.03125,
      "learning_rate": 2.8595034671111214e-05,
      "loss": 0.8094,
      "step": 415400
    },
    {
      "epoch": 1.4559084839499943,
      "grad_norm": 3.296875,
      "learning_rate": 2.859438564244751e-05,
      "loss": 0.832,
      "step": 415410
    },
    {
      "epoch": 1.4559435314568898,
      "grad_norm": 3.0,
      "learning_rate": 2.8593736613783813e-05,
      "loss": 0.8639,
      "step": 415420
    },
    {
      "epoch": 1.4559785789637854,
      "grad_norm": 2.578125,
      "learning_rate": 2.859308758512011e-05,
      "loss": 0.8609,
      "step": 415430
    },
    {
      "epoch": 1.456013626470681,
      "grad_norm": 2.578125,
      "learning_rate": 2.859243855645641e-05,
      "loss": 0.8343,
      "step": 415440
    },
    {
      "epoch": 1.4560486739775766,
      "grad_norm": 3.421875,
      "learning_rate": 2.8591789527792707e-05,
      "loss": 0.8466,
      "step": 415450
    },
    {
      "epoch": 1.4560837214844722,
      "grad_norm": 2.875,
      "learning_rate": 2.8591140499129005e-05,
      "loss": 0.8384,
      "step": 415460
    },
    {
      "epoch": 1.4561187689913677,
      "grad_norm": 2.75,
      "learning_rate": 2.8590491470465303e-05,
      "loss": 0.6783,
      "step": 415470
    },
    {
      "epoch": 1.4561538164982635,
      "grad_norm": 3.296875,
      "learning_rate": 2.85898424418016e-05,
      "loss": 0.932,
      "step": 415480
    },
    {
      "epoch": 1.456188864005159,
      "grad_norm": 2.828125,
      "learning_rate": 2.85891934131379e-05,
      "loss": 0.7643,
      "step": 415490
    },
    {
      "epoch": 1.4562239115120545,
      "grad_norm": 2.96875,
      "learning_rate": 2.8588544384474197e-05,
      "loss": 0.8736,
      "step": 415500
    },
    {
      "epoch": 1.45625895901895,
      "grad_norm": 2.953125,
      "learning_rate": 2.8587895355810495e-05,
      "loss": 0.8231,
      "step": 415510
    },
    {
      "epoch": 1.4562940065258458,
      "grad_norm": 2.59375,
      "learning_rate": 2.8587246327146793e-05,
      "loss": 0.8038,
      "step": 415520
    },
    {
      "epoch": 1.4563290540327414,
      "grad_norm": 2.859375,
      "learning_rate": 2.858659729848309e-05,
      "loss": 0.8362,
      "step": 415530
    },
    {
      "epoch": 1.456364101539637,
      "grad_norm": 2.9375,
      "learning_rate": 2.858594826981939e-05,
      "loss": 0.866,
      "step": 415540
    },
    {
      "epoch": 1.4563991490465327,
      "grad_norm": 2.765625,
      "learning_rate": 2.8585299241155687e-05,
      "loss": 0.8148,
      "step": 415550
    },
    {
      "epoch": 1.4564341965534282,
      "grad_norm": 2.984375,
      "learning_rate": 2.858465021249199e-05,
      "loss": 0.9298,
      "step": 415560
    },
    {
      "epoch": 1.4564692440603237,
      "grad_norm": 2.75,
      "learning_rate": 2.8584001183828286e-05,
      "loss": 0.7222,
      "step": 415570
    },
    {
      "epoch": 1.4565042915672195,
      "grad_norm": 2.640625,
      "learning_rate": 2.8583352155164584e-05,
      "loss": 0.8171,
      "step": 415580
    },
    {
      "epoch": 1.456539339074115,
      "grad_norm": 3.28125,
      "learning_rate": 2.8582703126500882e-05,
      "loss": 0.8272,
      "step": 415590
    },
    {
      "epoch": 1.4565743865810106,
      "grad_norm": 2.734375,
      "learning_rate": 2.858205409783718e-05,
      "loss": 0.8486,
      "step": 415600
    },
    {
      "epoch": 1.456609434087906,
      "grad_norm": 3.171875,
      "learning_rate": 2.858140506917348e-05,
      "loss": 0.9106,
      "step": 415610
    },
    {
      "epoch": 1.4566444815948016,
      "grad_norm": 2.671875,
      "learning_rate": 2.8580756040509776e-05,
      "loss": 0.8592,
      "step": 415620
    },
    {
      "epoch": 1.4566795291016974,
      "grad_norm": 3.125,
      "learning_rate": 2.8580107011846074e-05,
      "loss": 0.8352,
      "step": 415630
    },
    {
      "epoch": 1.456714576608593,
      "grad_norm": 2.53125,
      "learning_rate": 2.8579457983182372e-05,
      "loss": 0.8004,
      "step": 415640
    },
    {
      "epoch": 1.4567496241154885,
      "grad_norm": 2.96875,
      "learning_rate": 2.857880895451867e-05,
      "loss": 0.8067,
      "step": 415650
    },
    {
      "epoch": 1.4567846716223842,
      "grad_norm": 2.671875,
      "learning_rate": 2.857815992585497e-05,
      "loss": 0.835,
      "step": 415660
    },
    {
      "epoch": 1.4568197191292798,
      "grad_norm": 2.671875,
      "learning_rate": 2.8577510897191266e-05,
      "loss": 0.7385,
      "step": 415670
    },
    {
      "epoch": 1.4568547666361753,
      "grad_norm": 3.046875,
      "learning_rate": 2.8576861868527564e-05,
      "loss": 0.8525,
      "step": 415680
    },
    {
      "epoch": 1.456889814143071,
      "grad_norm": 2.5625,
      "learning_rate": 2.857621283986386e-05,
      "loss": 0.8355,
      "step": 415690
    },
    {
      "epoch": 1.4569248616499666,
      "grad_norm": 2.734375,
      "learning_rate": 2.8575563811200157e-05,
      "loss": 0.8072,
      "step": 415700
    },
    {
      "epoch": 1.4569599091568621,
      "grad_norm": 2.921875,
      "learning_rate": 2.8574914782536455e-05,
      "loss": 0.8494,
      "step": 415710
    },
    {
      "epoch": 1.4569949566637577,
      "grad_norm": 3.0,
      "learning_rate": 2.8574265753872753e-05,
      "loss": 0.9269,
      "step": 415720
    },
    {
      "epoch": 1.4570300041706532,
      "grad_norm": 2.953125,
      "learning_rate": 2.857361672520905e-05,
      "loss": 0.8706,
      "step": 415730
    },
    {
      "epoch": 1.457065051677549,
      "grad_norm": 2.75,
      "learning_rate": 2.857296769654535e-05,
      "loss": 0.7395,
      "step": 415740
    },
    {
      "epoch": 1.4571000991844445,
      "grad_norm": 2.875,
      "learning_rate": 2.8572318667881647e-05,
      "loss": 0.8444,
      "step": 415750
    },
    {
      "epoch": 1.45713514669134,
      "grad_norm": 2.734375,
      "learning_rate": 2.8571669639217945e-05,
      "loss": 0.8146,
      "step": 415760
    },
    {
      "epoch": 1.4571701941982358,
      "grad_norm": 2.96875,
      "learning_rate": 2.8571020610554243e-05,
      "loss": 0.8438,
      "step": 415770
    },
    {
      "epoch": 1.4572052417051313,
      "grad_norm": 3.078125,
      "learning_rate": 2.857037158189054e-05,
      "loss": 0.8285,
      "step": 415780
    },
    {
      "epoch": 1.4572402892120269,
      "grad_norm": 3.3125,
      "learning_rate": 2.8569722553226842e-05,
      "loss": 0.8485,
      "step": 415790
    },
    {
      "epoch": 1.4572753367189226,
      "grad_norm": 2.375,
      "learning_rate": 2.856907352456314e-05,
      "loss": 0.7023,
      "step": 415800
    },
    {
      "epoch": 1.4573103842258182,
      "grad_norm": 2.453125,
      "learning_rate": 2.856842449589944e-05,
      "loss": 0.8389,
      "step": 415810
    },
    {
      "epoch": 1.4573454317327137,
      "grad_norm": 3.359375,
      "learning_rate": 2.8567775467235736e-05,
      "loss": 0.873,
      "step": 415820
    },
    {
      "epoch": 1.4573804792396092,
      "grad_norm": 3.078125,
      "learning_rate": 2.8567126438572034e-05,
      "loss": 0.8672,
      "step": 415830
    },
    {
      "epoch": 1.4574155267465048,
      "grad_norm": 2.96875,
      "learning_rate": 2.8566477409908332e-05,
      "loss": 0.8285,
      "step": 415840
    },
    {
      "epoch": 1.4574505742534005,
      "grad_norm": 3.203125,
      "learning_rate": 2.856582838124463e-05,
      "loss": 0.8499,
      "step": 415850
    },
    {
      "epoch": 1.457485621760296,
      "grad_norm": 3.109375,
      "learning_rate": 2.856517935258093e-05,
      "loss": 0.8084,
      "step": 415860
    },
    {
      "epoch": 1.4575206692671916,
      "grad_norm": 3.03125,
      "learning_rate": 2.8564530323917226e-05,
      "loss": 0.893,
      "step": 415870
    },
    {
      "epoch": 1.4575557167740874,
      "grad_norm": 2.796875,
      "learning_rate": 2.8563881295253524e-05,
      "loss": 0.8382,
      "step": 415880
    },
    {
      "epoch": 1.4575907642809829,
      "grad_norm": 3.0625,
      "learning_rate": 2.8563232266589822e-05,
      "loss": 0.9036,
      "step": 415890
    },
    {
      "epoch": 1.4576258117878784,
      "grad_norm": 3.046875,
      "learning_rate": 2.856258323792612e-05,
      "loss": 0.8085,
      "step": 415900
    },
    {
      "epoch": 1.4576608592947742,
      "grad_norm": 2.953125,
      "learning_rate": 2.856193420926242e-05,
      "loss": 0.8452,
      "step": 415910
    },
    {
      "epoch": 1.4576959068016697,
      "grad_norm": 2.859375,
      "learning_rate": 2.8561285180598716e-05,
      "loss": 0.7687,
      "step": 415920
    },
    {
      "epoch": 1.4577309543085653,
      "grad_norm": 2.703125,
      "learning_rate": 2.8560636151935018e-05,
      "loss": 0.8298,
      "step": 415930
    },
    {
      "epoch": 1.4577660018154608,
      "grad_norm": 2.859375,
      "learning_rate": 2.8559987123271316e-05,
      "loss": 0.8892,
      "step": 415940
    },
    {
      "epoch": 1.4578010493223563,
      "grad_norm": 3.125,
      "learning_rate": 2.8559338094607614e-05,
      "loss": 0.8161,
      "step": 415950
    },
    {
      "epoch": 1.457836096829252,
      "grad_norm": 2.90625,
      "learning_rate": 2.8558689065943912e-05,
      "loss": 0.8682,
      "step": 415960
    },
    {
      "epoch": 1.4578711443361476,
      "grad_norm": 2.546875,
      "learning_rate": 2.855804003728021e-05,
      "loss": 0.753,
      "step": 415970
    },
    {
      "epoch": 1.4579061918430432,
      "grad_norm": 3.09375,
      "learning_rate": 2.8557391008616508e-05,
      "loss": 0.8395,
      "step": 415980
    },
    {
      "epoch": 1.457941239349939,
      "grad_norm": 2.96875,
      "learning_rate": 2.8556741979952806e-05,
      "loss": 0.8456,
      "step": 415990
    },
    {
      "epoch": 1.4579762868568344,
      "grad_norm": 2.75,
      "learning_rate": 2.8556092951289104e-05,
      "loss": 0.8365,
      "step": 416000
    },
    {
      "epoch": 1.45801133436373,
      "grad_norm": 2.859375,
      "learning_rate": 2.8555443922625402e-05,
      "loss": 0.8301,
      "step": 416010
    },
    {
      "epoch": 1.4580463818706257,
      "grad_norm": 2.953125,
      "learning_rate": 2.85547948939617e-05,
      "loss": 0.8774,
      "step": 416020
    },
    {
      "epoch": 1.4580814293775213,
      "grad_norm": 2.484375,
      "learning_rate": 2.8554145865297998e-05,
      "loss": 0.8138,
      "step": 416030
    },
    {
      "epoch": 1.4581164768844168,
      "grad_norm": 3.09375,
      "learning_rate": 2.8553496836634296e-05,
      "loss": 0.8363,
      "step": 416040
    },
    {
      "epoch": 1.4581515243913123,
      "grad_norm": 3.234375,
      "learning_rate": 2.8552847807970594e-05,
      "loss": 0.857,
      "step": 416050
    },
    {
      "epoch": 1.4581865718982079,
      "grad_norm": 2.75,
      "learning_rate": 2.855219877930689e-05,
      "loss": 0.8081,
      "step": 416060
    },
    {
      "epoch": 1.4582216194051036,
      "grad_norm": 2.8125,
      "learning_rate": 2.8551549750643186e-05,
      "loss": 0.8762,
      "step": 416070
    },
    {
      "epoch": 1.4582566669119992,
      "grad_norm": 2.890625,
      "learning_rate": 2.8550900721979484e-05,
      "loss": 0.859,
      "step": 416080
    },
    {
      "epoch": 1.4582917144188947,
      "grad_norm": 2.90625,
      "learning_rate": 2.8550251693315782e-05,
      "loss": 0.8221,
      "step": 416090
    },
    {
      "epoch": 1.4583267619257905,
      "grad_norm": 3.3125,
      "learning_rate": 2.854960266465208e-05,
      "loss": 0.8349,
      "step": 416100
    },
    {
      "epoch": 1.458361809432686,
      "grad_norm": 3.0,
      "learning_rate": 2.854895363598838e-05,
      "loss": 0.8493,
      "step": 416110
    },
    {
      "epoch": 1.4583968569395815,
      "grad_norm": 3.296875,
      "learning_rate": 2.8548304607324676e-05,
      "loss": 0.834,
      "step": 416120
    },
    {
      "epoch": 1.4584319044464773,
      "grad_norm": 2.90625,
      "learning_rate": 2.8547655578660974e-05,
      "loss": 0.8046,
      "step": 416130
    },
    {
      "epoch": 1.4584669519533728,
      "grad_norm": 2.984375,
      "learning_rate": 2.8547006549997272e-05,
      "loss": 0.8001,
      "step": 416140
    },
    {
      "epoch": 1.4585019994602684,
      "grad_norm": 2.796875,
      "learning_rate": 2.8546357521333574e-05,
      "loss": 0.9315,
      "step": 416150
    },
    {
      "epoch": 1.458537046967164,
      "grad_norm": 3.0,
      "learning_rate": 2.8545708492669872e-05,
      "loss": 0.8349,
      "step": 416160
    },
    {
      "epoch": 1.4585720944740597,
      "grad_norm": 2.859375,
      "learning_rate": 2.854505946400617e-05,
      "loss": 0.7941,
      "step": 416170
    },
    {
      "epoch": 1.4586071419809552,
      "grad_norm": 3.234375,
      "learning_rate": 2.8544410435342468e-05,
      "loss": 0.8663,
      "step": 416180
    },
    {
      "epoch": 1.4586421894878507,
      "grad_norm": 2.796875,
      "learning_rate": 2.8543761406678766e-05,
      "loss": 0.7802,
      "step": 416190
    },
    {
      "epoch": 1.4586772369947463,
      "grad_norm": 3.109375,
      "learning_rate": 2.8543112378015064e-05,
      "loss": 0.8694,
      "step": 416200
    },
    {
      "epoch": 1.458712284501642,
      "grad_norm": 2.671875,
      "learning_rate": 2.8542463349351362e-05,
      "loss": 0.8344,
      "step": 416210
    },
    {
      "epoch": 1.4587473320085376,
      "grad_norm": 2.625,
      "learning_rate": 2.854181432068766e-05,
      "loss": 0.9062,
      "step": 416220
    },
    {
      "epoch": 1.458782379515433,
      "grad_norm": 2.609375,
      "learning_rate": 2.8541165292023958e-05,
      "loss": 0.9285,
      "step": 416230
    },
    {
      "epoch": 1.4588174270223289,
      "grad_norm": 2.640625,
      "learning_rate": 2.8540516263360256e-05,
      "loss": 0.7677,
      "step": 416240
    },
    {
      "epoch": 1.4588524745292244,
      "grad_norm": 3.234375,
      "learning_rate": 2.8539867234696554e-05,
      "loss": 0.7875,
      "step": 416250
    },
    {
      "epoch": 1.45888752203612,
      "grad_norm": 2.953125,
      "learning_rate": 2.8539218206032852e-05,
      "loss": 0.8599,
      "step": 416260
    },
    {
      "epoch": 1.4589225695430157,
      "grad_norm": 3.03125,
      "learning_rate": 2.853856917736915e-05,
      "loss": 0.8518,
      "step": 416270
    },
    {
      "epoch": 1.4589576170499112,
      "grad_norm": 2.984375,
      "learning_rate": 2.8537920148705448e-05,
      "loss": 0.818,
      "step": 416280
    },
    {
      "epoch": 1.4589926645568068,
      "grad_norm": 2.59375,
      "learning_rate": 2.853727112004175e-05,
      "loss": 0.778,
      "step": 416290
    },
    {
      "epoch": 1.4590277120637023,
      "grad_norm": 2.5625,
      "learning_rate": 2.8536622091378047e-05,
      "loss": 0.8817,
      "step": 416300
    },
    {
      "epoch": 1.4590627595705978,
      "grad_norm": 2.640625,
      "learning_rate": 2.8535973062714345e-05,
      "loss": 0.8441,
      "step": 416310
    },
    {
      "epoch": 1.4590978070774936,
      "grad_norm": 2.953125,
      "learning_rate": 2.8535324034050643e-05,
      "loss": 0.8254,
      "step": 416320
    },
    {
      "epoch": 1.4591328545843891,
      "grad_norm": 2.75,
      "learning_rate": 2.853467500538694e-05,
      "loss": 0.7709,
      "step": 416330
    },
    {
      "epoch": 1.4591679020912847,
      "grad_norm": 2.8125,
      "learning_rate": 2.853402597672324e-05,
      "loss": 0.8331,
      "step": 416340
    },
    {
      "epoch": 1.4592029495981804,
      "grad_norm": 3.15625,
      "learning_rate": 2.8533376948059537e-05,
      "loss": 0.9097,
      "step": 416350
    },
    {
      "epoch": 1.459237997105076,
      "grad_norm": 2.53125,
      "learning_rate": 2.8532727919395835e-05,
      "loss": 0.724,
      "step": 416360
    },
    {
      "epoch": 1.4592730446119715,
      "grad_norm": 3.21875,
      "learning_rate": 2.8532078890732133e-05,
      "loss": 0.9237,
      "step": 416370
    },
    {
      "epoch": 1.4593080921188673,
      "grad_norm": 2.578125,
      "learning_rate": 2.853142986206843e-05,
      "loss": 0.8587,
      "step": 416380
    },
    {
      "epoch": 1.4593431396257628,
      "grad_norm": 3.625,
      "learning_rate": 2.853078083340473e-05,
      "loss": 0.8672,
      "step": 416390
    },
    {
      "epoch": 1.4593781871326583,
      "grad_norm": 2.890625,
      "learning_rate": 2.8530131804741027e-05,
      "loss": 0.7867,
      "step": 416400
    },
    {
      "epoch": 1.4594132346395539,
      "grad_norm": 2.59375,
      "learning_rate": 2.8529482776077325e-05,
      "loss": 0.849,
      "step": 416410
    },
    {
      "epoch": 1.4594482821464494,
      "grad_norm": 2.875,
      "learning_rate": 2.8528833747413623e-05,
      "loss": 0.8259,
      "step": 416420
    },
    {
      "epoch": 1.4594833296533452,
      "grad_norm": 3.015625,
      "learning_rate": 2.8528184718749925e-05,
      "loss": 0.8311,
      "step": 416430
    },
    {
      "epoch": 1.4595183771602407,
      "grad_norm": 3.125,
      "learning_rate": 2.8527535690086216e-05,
      "loss": 0.8082,
      "step": 416440
    },
    {
      "epoch": 1.4595534246671362,
      "grad_norm": 2.859375,
      "learning_rate": 2.8526886661422514e-05,
      "loss": 0.8878,
      "step": 416450
    },
    {
      "epoch": 1.459588472174032,
      "grad_norm": 3.125,
      "learning_rate": 2.8526237632758812e-05,
      "loss": 0.8646,
      "step": 416460
    },
    {
      "epoch": 1.4596235196809275,
      "grad_norm": 3.1875,
      "learning_rate": 2.852558860409511e-05,
      "loss": 0.8171,
      "step": 416470
    },
    {
      "epoch": 1.459658567187823,
      "grad_norm": 2.71875,
      "learning_rate": 2.8524939575431408e-05,
      "loss": 0.8262,
      "step": 416480
    },
    {
      "epoch": 1.4596936146947188,
      "grad_norm": 2.796875,
      "learning_rate": 2.8524290546767706e-05,
      "loss": 0.8084,
      "step": 416490
    },
    {
      "epoch": 1.4597286622016143,
      "grad_norm": 2.953125,
      "learning_rate": 2.8523641518104004e-05,
      "loss": 0.9048,
      "step": 416500
    },
    {
      "epoch": 1.4597637097085099,
      "grad_norm": 2.609375,
      "learning_rate": 2.8522992489440302e-05,
      "loss": 0.765,
      "step": 416510
    },
    {
      "epoch": 1.4597987572154054,
      "grad_norm": 3.03125,
      "learning_rate": 2.8522343460776603e-05,
      "loss": 0.8808,
      "step": 416520
    },
    {
      "epoch": 1.459833804722301,
      "grad_norm": 3.078125,
      "learning_rate": 2.85216944321129e-05,
      "loss": 0.8869,
      "step": 416530
    },
    {
      "epoch": 1.4598688522291967,
      "grad_norm": 3.0,
      "learning_rate": 2.85210454034492e-05,
      "loss": 0.881,
      "step": 416540
    },
    {
      "epoch": 1.4599038997360922,
      "grad_norm": 3.28125,
      "learning_rate": 2.8520396374785497e-05,
      "loss": 0.8671,
      "step": 416550
    },
    {
      "epoch": 1.4599389472429878,
      "grad_norm": 3.140625,
      "learning_rate": 2.8519747346121795e-05,
      "loss": 0.971,
      "step": 416560
    },
    {
      "epoch": 1.4599739947498835,
      "grad_norm": 3.109375,
      "learning_rate": 2.8519098317458093e-05,
      "loss": 0.8309,
      "step": 416570
    },
    {
      "epoch": 1.460009042256779,
      "grad_norm": 2.53125,
      "learning_rate": 2.851844928879439e-05,
      "loss": 0.7951,
      "step": 416580
    },
    {
      "epoch": 1.4600440897636746,
      "grad_norm": 2.625,
      "learning_rate": 2.851780026013069e-05,
      "loss": 0.8446,
      "step": 416590
    },
    {
      "epoch": 1.4600791372705704,
      "grad_norm": 2.8125,
      "learning_rate": 2.8517151231466987e-05,
      "loss": 0.8335,
      "step": 416600
    },
    {
      "epoch": 1.460114184777466,
      "grad_norm": 2.8125,
      "learning_rate": 2.8516502202803285e-05,
      "loss": 0.8429,
      "step": 416610
    },
    {
      "epoch": 1.4601492322843614,
      "grad_norm": 3.234375,
      "learning_rate": 2.8515853174139583e-05,
      "loss": 0.888,
      "step": 416620
    },
    {
      "epoch": 1.460184279791257,
      "grad_norm": 2.828125,
      "learning_rate": 2.851520414547588e-05,
      "loss": 0.7942,
      "step": 416630
    },
    {
      "epoch": 1.4602193272981525,
      "grad_norm": 3.046875,
      "learning_rate": 2.851455511681218e-05,
      "loss": 0.8442,
      "step": 416640
    },
    {
      "epoch": 1.4602543748050483,
      "grad_norm": 2.609375,
      "learning_rate": 2.8513906088148477e-05,
      "loss": 0.7481,
      "step": 416650
    },
    {
      "epoch": 1.4602894223119438,
      "grad_norm": 2.984375,
      "learning_rate": 2.851325705948478e-05,
      "loss": 0.8676,
      "step": 416660
    },
    {
      "epoch": 1.4603244698188393,
      "grad_norm": 2.8125,
      "learning_rate": 2.8512608030821077e-05,
      "loss": 0.894,
      "step": 416670
    },
    {
      "epoch": 1.460359517325735,
      "grad_norm": 2.890625,
      "learning_rate": 2.8511959002157375e-05,
      "loss": 0.8428,
      "step": 416680
    },
    {
      "epoch": 1.4603945648326306,
      "grad_norm": 2.953125,
      "learning_rate": 2.8511309973493673e-05,
      "loss": 0.8901,
      "step": 416690
    },
    {
      "epoch": 1.4604296123395262,
      "grad_norm": 2.859375,
      "learning_rate": 2.851066094482997e-05,
      "loss": 0.8643,
      "step": 416700
    },
    {
      "epoch": 1.460464659846422,
      "grad_norm": 3.078125,
      "learning_rate": 2.851001191616627e-05,
      "loss": 0.8373,
      "step": 416710
    },
    {
      "epoch": 1.4604997073533175,
      "grad_norm": 2.90625,
      "learning_rate": 2.8509362887502567e-05,
      "loss": 0.8306,
      "step": 416720
    },
    {
      "epoch": 1.460534754860213,
      "grad_norm": 3.0,
      "learning_rate": 2.8508713858838865e-05,
      "loss": 0.8487,
      "step": 416730
    },
    {
      "epoch": 1.4605698023671085,
      "grad_norm": 2.546875,
      "learning_rate": 2.8508064830175163e-05,
      "loss": 0.8619,
      "step": 416740
    },
    {
      "epoch": 1.460604849874004,
      "grad_norm": 3.28125,
      "learning_rate": 2.850741580151146e-05,
      "loss": 0.873,
      "step": 416750
    },
    {
      "epoch": 1.4606398973808998,
      "grad_norm": 2.96875,
      "learning_rate": 2.850676677284776e-05,
      "loss": 0.8536,
      "step": 416760
    },
    {
      "epoch": 1.4606749448877954,
      "grad_norm": 3.171875,
      "learning_rate": 2.8506117744184057e-05,
      "loss": 0.8246,
      "step": 416770
    },
    {
      "epoch": 1.460709992394691,
      "grad_norm": 2.4375,
      "learning_rate": 2.8505468715520355e-05,
      "loss": 0.8438,
      "step": 416780
    },
    {
      "epoch": 1.4607450399015867,
      "grad_norm": 3.0,
      "learning_rate": 2.8504819686856653e-05,
      "loss": 0.8162,
      "step": 416790
    },
    {
      "epoch": 1.4607800874084822,
      "grad_norm": 3.03125,
      "learning_rate": 2.8504170658192954e-05,
      "loss": 0.8782,
      "step": 416800
    },
    {
      "epoch": 1.4608151349153777,
      "grad_norm": 2.84375,
      "learning_rate": 2.8503521629529252e-05,
      "loss": 0.81,
      "step": 416810
    },
    {
      "epoch": 1.4608501824222735,
      "grad_norm": 2.859375,
      "learning_rate": 2.8502872600865543e-05,
      "loss": 0.8161,
      "step": 416820
    },
    {
      "epoch": 1.460885229929169,
      "grad_norm": 3.078125,
      "learning_rate": 2.850222357220184e-05,
      "loss": 0.7692,
      "step": 416830
    },
    {
      "epoch": 1.4609202774360646,
      "grad_norm": 3.0,
      "learning_rate": 2.850157454353814e-05,
      "loss": 0.7863,
      "step": 416840
    },
    {
      "epoch": 1.4609553249429603,
      "grad_norm": 2.96875,
      "learning_rate": 2.8500925514874437e-05,
      "loss": 0.8239,
      "step": 416850
    },
    {
      "epoch": 1.4609903724498559,
      "grad_norm": 3.0625,
      "learning_rate": 2.8500276486210735e-05,
      "loss": 0.8451,
      "step": 416860
    },
    {
      "epoch": 1.4610254199567514,
      "grad_norm": 2.46875,
      "learning_rate": 2.8499627457547033e-05,
      "loss": 0.7511,
      "step": 416870
    },
    {
      "epoch": 1.461060467463647,
      "grad_norm": 2.765625,
      "learning_rate": 2.849897842888333e-05,
      "loss": 0.889,
      "step": 416880
    },
    {
      "epoch": 1.4610955149705425,
      "grad_norm": 3.109375,
      "learning_rate": 2.8498329400219633e-05,
      "loss": 0.8234,
      "step": 416890
    },
    {
      "epoch": 1.4611305624774382,
      "grad_norm": 2.59375,
      "learning_rate": 2.849768037155593e-05,
      "loss": 0.852,
      "step": 416900
    },
    {
      "epoch": 1.4611656099843338,
      "grad_norm": 2.890625,
      "learning_rate": 2.849703134289223e-05,
      "loss": 0.8633,
      "step": 416910
    },
    {
      "epoch": 1.4612006574912293,
      "grad_norm": 2.8125,
      "learning_rate": 2.8496382314228527e-05,
      "loss": 0.7686,
      "step": 416920
    },
    {
      "epoch": 1.461235704998125,
      "grad_norm": 2.75,
      "learning_rate": 2.8495733285564825e-05,
      "loss": 0.8415,
      "step": 416930
    },
    {
      "epoch": 1.4612707525050206,
      "grad_norm": 3.15625,
      "learning_rate": 2.8495084256901123e-05,
      "loss": 0.9269,
      "step": 416940
    },
    {
      "epoch": 1.4613058000119161,
      "grad_norm": 2.859375,
      "learning_rate": 2.849443522823742e-05,
      "loss": 0.8533,
      "step": 416950
    },
    {
      "epoch": 1.4613408475188119,
      "grad_norm": 2.984375,
      "learning_rate": 2.849378619957372e-05,
      "loss": 0.8947,
      "step": 416960
    },
    {
      "epoch": 1.4613758950257074,
      "grad_norm": 3.4375,
      "learning_rate": 2.8493137170910017e-05,
      "loss": 0.8208,
      "step": 416970
    },
    {
      "epoch": 1.461410942532603,
      "grad_norm": 3.234375,
      "learning_rate": 2.8492488142246315e-05,
      "loss": 0.9076,
      "step": 416980
    },
    {
      "epoch": 1.4614459900394985,
      "grad_norm": 2.703125,
      "learning_rate": 2.8491839113582613e-05,
      "loss": 0.8504,
      "step": 416990
    },
    {
      "epoch": 1.461481037546394,
      "grad_norm": 2.78125,
      "learning_rate": 2.849119008491891e-05,
      "loss": 0.8027,
      "step": 417000
    },
    {
      "epoch": 1.4615160850532898,
      "grad_norm": 2.796875,
      "learning_rate": 2.849054105625521e-05,
      "loss": 0.8043,
      "step": 417010
    },
    {
      "epoch": 1.4615511325601853,
      "grad_norm": 3.28125,
      "learning_rate": 2.8489892027591507e-05,
      "loss": 0.8341,
      "step": 417020
    },
    {
      "epoch": 1.4615861800670809,
      "grad_norm": 3.0,
      "learning_rate": 2.8489242998927808e-05,
      "loss": 0.8198,
      "step": 417030
    },
    {
      "epoch": 1.4616212275739766,
      "grad_norm": 2.90625,
      "learning_rate": 2.8488593970264106e-05,
      "loss": 0.8703,
      "step": 417040
    },
    {
      "epoch": 1.4616562750808721,
      "grad_norm": 3.140625,
      "learning_rate": 2.8487944941600404e-05,
      "loss": 0.8524,
      "step": 417050
    },
    {
      "epoch": 1.4616913225877677,
      "grad_norm": 2.609375,
      "learning_rate": 2.8487295912936702e-05,
      "loss": 0.7931,
      "step": 417060
    },
    {
      "epoch": 1.4617263700946634,
      "grad_norm": 2.71875,
      "learning_rate": 2.8486646884273e-05,
      "loss": 0.8498,
      "step": 417070
    },
    {
      "epoch": 1.461761417601559,
      "grad_norm": 2.90625,
      "learning_rate": 2.8485997855609298e-05,
      "loss": 0.8749,
      "step": 417080
    },
    {
      "epoch": 1.4617964651084545,
      "grad_norm": 3.078125,
      "learning_rate": 2.8485348826945596e-05,
      "loss": 0.8599,
      "step": 417090
    },
    {
      "epoch": 1.46183151261535,
      "grad_norm": 2.921875,
      "learning_rate": 2.8484699798281894e-05,
      "loss": 0.8473,
      "step": 417100
    },
    {
      "epoch": 1.4618665601222456,
      "grad_norm": 3.265625,
      "learning_rate": 2.8484050769618192e-05,
      "loss": 0.8872,
      "step": 417110
    },
    {
      "epoch": 1.4619016076291413,
      "grad_norm": 2.578125,
      "learning_rate": 2.848340174095449e-05,
      "loss": 0.787,
      "step": 417120
    },
    {
      "epoch": 1.4619366551360369,
      "grad_norm": 3.046875,
      "learning_rate": 2.8482752712290788e-05,
      "loss": 0.8253,
      "step": 417130
    },
    {
      "epoch": 1.4619717026429324,
      "grad_norm": 2.984375,
      "learning_rate": 2.8482103683627086e-05,
      "loss": 0.8429,
      "step": 417140
    },
    {
      "epoch": 1.4620067501498282,
      "grad_norm": 2.65625,
      "learning_rate": 2.8481454654963384e-05,
      "loss": 0.872,
      "step": 417150
    },
    {
      "epoch": 1.4620417976567237,
      "grad_norm": 2.671875,
      "learning_rate": 2.8480805626299682e-05,
      "loss": 0.8459,
      "step": 417160
    },
    {
      "epoch": 1.4620768451636192,
      "grad_norm": 3.0625,
      "learning_rate": 2.8480156597635983e-05,
      "loss": 0.8263,
      "step": 417170
    },
    {
      "epoch": 1.462111892670515,
      "grad_norm": 2.875,
      "learning_rate": 2.847950756897228e-05,
      "loss": 0.8404,
      "step": 417180
    },
    {
      "epoch": 1.4621469401774105,
      "grad_norm": 2.859375,
      "learning_rate": 2.8478858540308573e-05,
      "loss": 0.9382,
      "step": 417190
    },
    {
      "epoch": 1.462181987684306,
      "grad_norm": 2.65625,
      "learning_rate": 2.847820951164487e-05,
      "loss": 0.8065,
      "step": 417200
    },
    {
      "epoch": 1.4622170351912016,
      "grad_norm": 2.84375,
      "learning_rate": 2.847756048298117e-05,
      "loss": 0.807,
      "step": 417210
    },
    {
      "epoch": 1.4622520826980971,
      "grad_norm": 2.640625,
      "learning_rate": 2.8476911454317467e-05,
      "loss": 0.8957,
      "step": 417220
    },
    {
      "epoch": 1.462287130204993,
      "grad_norm": 2.921875,
      "learning_rate": 2.8476262425653765e-05,
      "loss": 0.9008,
      "step": 417230
    },
    {
      "epoch": 1.4623221777118884,
      "grad_norm": 3.5,
      "learning_rate": 2.8475613396990063e-05,
      "loss": 0.8533,
      "step": 417240
    },
    {
      "epoch": 1.462357225218784,
      "grad_norm": 2.9375,
      "learning_rate": 2.8474964368326364e-05,
      "loss": 0.9552,
      "step": 417250
    },
    {
      "epoch": 1.4623922727256797,
      "grad_norm": 3.015625,
      "learning_rate": 2.8474315339662662e-05,
      "loss": 0.7913,
      "step": 417260
    },
    {
      "epoch": 1.4624273202325753,
      "grad_norm": 2.9375,
      "learning_rate": 2.847366631099896e-05,
      "loss": 0.8607,
      "step": 417270
    },
    {
      "epoch": 1.4624623677394708,
      "grad_norm": 3.046875,
      "learning_rate": 2.8473017282335258e-05,
      "loss": 0.7814,
      "step": 417280
    },
    {
      "epoch": 1.4624974152463666,
      "grad_norm": 2.75,
      "learning_rate": 2.8472368253671556e-05,
      "loss": 0.8309,
      "step": 417290
    },
    {
      "epoch": 1.462532462753262,
      "grad_norm": 3.125,
      "learning_rate": 2.8471719225007854e-05,
      "loss": 0.8398,
      "step": 417300
    },
    {
      "epoch": 1.4625675102601576,
      "grad_norm": 2.375,
      "learning_rate": 2.8471070196344152e-05,
      "loss": 0.7361,
      "step": 417310
    },
    {
      "epoch": 1.4626025577670532,
      "grad_norm": 2.8125,
      "learning_rate": 2.847042116768045e-05,
      "loss": 0.8489,
      "step": 417320
    },
    {
      "epoch": 1.4626376052739487,
      "grad_norm": 2.75,
      "learning_rate": 2.8469772139016748e-05,
      "loss": 0.8092,
      "step": 417330
    },
    {
      "epoch": 1.4626726527808445,
      "grad_norm": 2.9375,
      "learning_rate": 2.8469123110353046e-05,
      "loss": 0.8064,
      "step": 417340
    },
    {
      "epoch": 1.46270770028774,
      "grad_norm": 3.359375,
      "learning_rate": 2.8468474081689344e-05,
      "loss": 0.7357,
      "step": 417350
    },
    {
      "epoch": 1.4627427477946355,
      "grad_norm": 3.1875,
      "learning_rate": 2.8467825053025642e-05,
      "loss": 0.8164,
      "step": 417360
    },
    {
      "epoch": 1.4627777953015313,
      "grad_norm": 2.671875,
      "learning_rate": 2.846717602436194e-05,
      "loss": 0.941,
      "step": 417370
    },
    {
      "epoch": 1.4628128428084268,
      "grad_norm": 2.6875,
      "learning_rate": 2.8466526995698238e-05,
      "loss": 0.89,
      "step": 417380
    },
    {
      "epoch": 1.4628478903153224,
      "grad_norm": 2.8125,
      "learning_rate": 2.846587796703454e-05,
      "loss": 0.8015,
      "step": 417390
    },
    {
      "epoch": 1.4628829378222181,
      "grad_norm": 2.75,
      "learning_rate": 2.8465228938370837e-05,
      "loss": 0.9193,
      "step": 417400
    },
    {
      "epoch": 1.4629179853291137,
      "grad_norm": 2.40625,
      "learning_rate": 2.8464579909707135e-05,
      "loss": 0.8305,
      "step": 417410
    },
    {
      "epoch": 1.4629530328360092,
      "grad_norm": 2.671875,
      "learning_rate": 2.8463930881043433e-05,
      "loss": 0.7994,
      "step": 417420
    },
    {
      "epoch": 1.4629880803429047,
      "grad_norm": 2.984375,
      "learning_rate": 2.846328185237973e-05,
      "loss": 0.7253,
      "step": 417430
    },
    {
      "epoch": 1.4630231278498005,
      "grad_norm": 2.9375,
      "learning_rate": 2.846263282371603e-05,
      "loss": 0.8371,
      "step": 417440
    },
    {
      "epoch": 1.463058175356696,
      "grad_norm": 2.921875,
      "learning_rate": 2.8461983795052327e-05,
      "loss": 0.861,
      "step": 417450
    },
    {
      "epoch": 1.4630932228635916,
      "grad_norm": 2.96875,
      "learning_rate": 2.8461334766388625e-05,
      "loss": 0.8238,
      "step": 417460
    },
    {
      "epoch": 1.463128270370487,
      "grad_norm": 3.046875,
      "learning_rate": 2.8460685737724923e-05,
      "loss": 0.9007,
      "step": 417470
    },
    {
      "epoch": 1.4631633178773829,
      "grad_norm": 2.6875,
      "learning_rate": 2.846003670906122e-05,
      "loss": 0.8384,
      "step": 417480
    },
    {
      "epoch": 1.4631983653842784,
      "grad_norm": 3.0625,
      "learning_rate": 2.845938768039752e-05,
      "loss": 0.8773,
      "step": 417490
    },
    {
      "epoch": 1.463233412891174,
      "grad_norm": 2.59375,
      "learning_rate": 2.8458738651733817e-05,
      "loss": 0.8023,
      "step": 417500
    },
    {
      "epoch": 1.4632684603980697,
      "grad_norm": 2.84375,
      "learning_rate": 2.8458089623070115e-05,
      "loss": 0.8695,
      "step": 417510
    },
    {
      "epoch": 1.4633035079049652,
      "grad_norm": 2.703125,
      "learning_rate": 2.8457440594406413e-05,
      "loss": 0.876,
      "step": 417520
    },
    {
      "epoch": 1.4633385554118608,
      "grad_norm": 3.109375,
      "learning_rate": 2.8456791565742715e-05,
      "loss": 0.8767,
      "step": 417530
    },
    {
      "epoch": 1.4633736029187565,
      "grad_norm": 2.96875,
      "learning_rate": 2.8456142537079013e-05,
      "loss": 0.8414,
      "step": 417540
    },
    {
      "epoch": 1.463408650425652,
      "grad_norm": 3.03125,
      "learning_rate": 2.845549350841531e-05,
      "loss": 0.8618,
      "step": 417550
    },
    {
      "epoch": 1.4634436979325476,
      "grad_norm": 3.015625,
      "learning_rate": 2.845484447975161e-05,
      "loss": 0.9082,
      "step": 417560
    },
    {
      "epoch": 1.4634787454394431,
      "grad_norm": 3.375,
      "learning_rate": 2.84541954510879e-05,
      "loss": 0.9458,
      "step": 417570
    },
    {
      "epoch": 1.4635137929463387,
      "grad_norm": 2.875,
      "learning_rate": 2.8453546422424198e-05,
      "loss": 0.8825,
      "step": 417580
    },
    {
      "epoch": 1.4635488404532344,
      "grad_norm": 3.171875,
      "learning_rate": 2.8452897393760496e-05,
      "loss": 0.877,
      "step": 417590
    },
    {
      "epoch": 1.46358388796013,
      "grad_norm": 3.28125,
      "learning_rate": 2.8452248365096794e-05,
      "loss": 0.7601,
      "step": 417600
    },
    {
      "epoch": 1.4636189354670255,
      "grad_norm": 2.765625,
      "learning_rate": 2.8451599336433092e-05,
      "loss": 0.843,
      "step": 417610
    },
    {
      "epoch": 1.4636539829739212,
      "grad_norm": 2.828125,
      "learning_rate": 2.8450950307769393e-05,
      "loss": 0.8535,
      "step": 417620
    },
    {
      "epoch": 1.4636890304808168,
      "grad_norm": 3.015625,
      "learning_rate": 2.845030127910569e-05,
      "loss": 0.8425,
      "step": 417630
    },
    {
      "epoch": 1.4637240779877123,
      "grad_norm": 2.8125,
      "learning_rate": 2.844965225044199e-05,
      "loss": 0.811,
      "step": 417640
    },
    {
      "epoch": 1.463759125494608,
      "grad_norm": 2.90625,
      "learning_rate": 2.8449003221778287e-05,
      "loss": 0.8578,
      "step": 417650
    },
    {
      "epoch": 1.4637941730015036,
      "grad_norm": 3.234375,
      "learning_rate": 2.8448354193114585e-05,
      "loss": 0.8477,
      "step": 417660
    },
    {
      "epoch": 1.4638292205083991,
      "grad_norm": 2.609375,
      "learning_rate": 2.8447705164450883e-05,
      "loss": 0.8692,
      "step": 417670
    },
    {
      "epoch": 1.4638642680152947,
      "grad_norm": 2.734375,
      "learning_rate": 2.844705613578718e-05,
      "loss": 0.8428,
      "step": 417680
    },
    {
      "epoch": 1.4638993155221902,
      "grad_norm": 3.234375,
      "learning_rate": 2.844640710712348e-05,
      "loss": 0.8447,
      "step": 417690
    },
    {
      "epoch": 1.463934363029086,
      "grad_norm": 2.46875,
      "learning_rate": 2.8445758078459777e-05,
      "loss": 0.7605,
      "step": 417700
    },
    {
      "epoch": 1.4639694105359815,
      "grad_norm": 2.703125,
      "learning_rate": 2.8445109049796075e-05,
      "loss": 0.8915,
      "step": 417710
    },
    {
      "epoch": 1.464004458042877,
      "grad_norm": 2.65625,
      "learning_rate": 2.8444460021132373e-05,
      "loss": 0.7887,
      "step": 417720
    },
    {
      "epoch": 1.4640395055497728,
      "grad_norm": 3.015625,
      "learning_rate": 2.844381099246867e-05,
      "loss": 0.8866,
      "step": 417730
    },
    {
      "epoch": 1.4640745530566683,
      "grad_norm": 2.796875,
      "learning_rate": 2.844316196380497e-05,
      "loss": 0.8505,
      "step": 417740
    },
    {
      "epoch": 1.4641096005635639,
      "grad_norm": 2.734375,
      "learning_rate": 2.8442512935141267e-05,
      "loss": 0.8253,
      "step": 417750
    },
    {
      "epoch": 1.4641446480704596,
      "grad_norm": 3.078125,
      "learning_rate": 2.844186390647757e-05,
      "loss": 0.7658,
      "step": 417760
    },
    {
      "epoch": 1.4641796955773552,
      "grad_norm": 2.734375,
      "learning_rate": 2.8441214877813867e-05,
      "loss": 0.809,
      "step": 417770
    },
    {
      "epoch": 1.4642147430842507,
      "grad_norm": 3.171875,
      "learning_rate": 2.8440565849150165e-05,
      "loss": 0.8346,
      "step": 417780
    },
    {
      "epoch": 1.4642497905911462,
      "grad_norm": 2.75,
      "learning_rate": 2.8439916820486463e-05,
      "loss": 0.8457,
      "step": 417790
    },
    {
      "epoch": 1.4642848380980418,
      "grad_norm": 3.21875,
      "learning_rate": 2.843926779182276e-05,
      "loss": 0.9252,
      "step": 417800
    },
    {
      "epoch": 1.4643198856049375,
      "grad_norm": 2.796875,
      "learning_rate": 2.843861876315906e-05,
      "loss": 0.8335,
      "step": 417810
    },
    {
      "epoch": 1.464354933111833,
      "grad_norm": 2.515625,
      "learning_rate": 2.8437969734495357e-05,
      "loss": 0.8155,
      "step": 417820
    },
    {
      "epoch": 1.4643899806187286,
      "grad_norm": 2.984375,
      "learning_rate": 2.8437320705831655e-05,
      "loss": 0.8384,
      "step": 417830
    },
    {
      "epoch": 1.4644250281256244,
      "grad_norm": 3.0,
      "learning_rate": 2.8436671677167953e-05,
      "loss": 0.8663,
      "step": 417840
    },
    {
      "epoch": 1.46446007563252,
      "grad_norm": 2.65625,
      "learning_rate": 2.843602264850425e-05,
      "loss": 0.8089,
      "step": 417850
    },
    {
      "epoch": 1.4644951231394154,
      "grad_norm": 2.96875,
      "learning_rate": 2.843537361984055e-05,
      "loss": 0.8608,
      "step": 417860
    },
    {
      "epoch": 1.4645301706463112,
      "grad_norm": 2.640625,
      "learning_rate": 2.8434724591176847e-05,
      "loss": 0.8395,
      "step": 417870
    },
    {
      "epoch": 1.4645652181532067,
      "grad_norm": 3.078125,
      "learning_rate": 2.8434075562513145e-05,
      "loss": 0.7965,
      "step": 417880
    },
    {
      "epoch": 1.4646002656601023,
      "grad_norm": 2.921875,
      "learning_rate": 2.8433426533849443e-05,
      "loss": 0.86,
      "step": 417890
    },
    {
      "epoch": 1.4646353131669978,
      "grad_norm": 2.828125,
      "learning_rate": 2.8432777505185744e-05,
      "loss": 0.8788,
      "step": 417900
    },
    {
      "epoch": 1.4646703606738933,
      "grad_norm": 3.0,
      "learning_rate": 2.8432128476522042e-05,
      "loss": 0.8511,
      "step": 417910
    },
    {
      "epoch": 1.464705408180789,
      "grad_norm": 2.84375,
      "learning_rate": 2.843147944785834e-05,
      "loss": 0.8968,
      "step": 417920
    },
    {
      "epoch": 1.4647404556876846,
      "grad_norm": 2.84375,
      "learning_rate": 2.8430830419194638e-05,
      "loss": 0.8454,
      "step": 417930
    },
    {
      "epoch": 1.4647755031945802,
      "grad_norm": 3.125,
      "learning_rate": 2.8430181390530936e-05,
      "loss": 0.9147,
      "step": 417940
    },
    {
      "epoch": 1.464810550701476,
      "grad_norm": 2.796875,
      "learning_rate": 2.8429532361867227e-05,
      "loss": 0.7641,
      "step": 417950
    },
    {
      "epoch": 1.4648455982083715,
      "grad_norm": 2.6875,
      "learning_rate": 2.8428883333203525e-05,
      "loss": 0.9059,
      "step": 417960
    },
    {
      "epoch": 1.464880645715267,
      "grad_norm": 3.109375,
      "learning_rate": 2.8428234304539823e-05,
      "loss": 0.8622,
      "step": 417970
    },
    {
      "epoch": 1.4649156932221628,
      "grad_norm": 2.859375,
      "learning_rate": 2.842758527587612e-05,
      "loss": 0.8117,
      "step": 417980
    },
    {
      "epoch": 1.4649507407290583,
      "grad_norm": 3.109375,
      "learning_rate": 2.8426936247212423e-05,
      "loss": 0.7771,
      "step": 417990
    },
    {
      "epoch": 1.4649857882359538,
      "grad_norm": 3.484375,
      "learning_rate": 2.842628721854872e-05,
      "loss": 0.8732,
      "step": 418000
    },
    {
      "epoch": 1.4650208357428494,
      "grad_norm": 2.671875,
      "learning_rate": 2.842563818988502e-05,
      "loss": 0.8089,
      "step": 418010
    },
    {
      "epoch": 1.465055883249745,
      "grad_norm": 3.28125,
      "learning_rate": 2.8424989161221317e-05,
      "loss": 0.7928,
      "step": 418020
    },
    {
      "epoch": 1.4650909307566407,
      "grad_norm": 2.859375,
      "learning_rate": 2.8424340132557615e-05,
      "loss": 0.892,
      "step": 418030
    },
    {
      "epoch": 1.4651259782635362,
      "grad_norm": 2.90625,
      "learning_rate": 2.8423691103893913e-05,
      "loss": 0.8219,
      "step": 418040
    },
    {
      "epoch": 1.4651610257704317,
      "grad_norm": 2.9375,
      "learning_rate": 2.842304207523021e-05,
      "loss": 0.7936,
      "step": 418050
    },
    {
      "epoch": 1.4651960732773275,
      "grad_norm": 2.890625,
      "learning_rate": 2.842239304656651e-05,
      "loss": 0.8525,
      "step": 418060
    },
    {
      "epoch": 1.465231120784223,
      "grad_norm": 3.171875,
      "learning_rate": 2.8421744017902807e-05,
      "loss": 0.8036,
      "step": 418070
    },
    {
      "epoch": 1.4652661682911186,
      "grad_norm": 3.046875,
      "learning_rate": 2.8421094989239105e-05,
      "loss": 0.8693,
      "step": 418080
    },
    {
      "epoch": 1.4653012157980143,
      "grad_norm": 2.875,
      "learning_rate": 2.8420445960575403e-05,
      "loss": 0.8357,
      "step": 418090
    },
    {
      "epoch": 1.4653362633049098,
      "grad_norm": 2.390625,
      "learning_rate": 2.84197969319117e-05,
      "loss": 0.8443,
      "step": 418100
    },
    {
      "epoch": 1.4653713108118054,
      "grad_norm": 2.6875,
      "learning_rate": 2.8419147903248e-05,
      "loss": 0.8382,
      "step": 418110
    },
    {
      "epoch": 1.465406358318701,
      "grad_norm": 2.9375,
      "learning_rate": 2.8418498874584297e-05,
      "loss": 0.8589,
      "step": 418120
    },
    {
      "epoch": 1.4654414058255967,
      "grad_norm": 3.484375,
      "learning_rate": 2.8417849845920598e-05,
      "loss": 0.8728,
      "step": 418130
    },
    {
      "epoch": 1.4654764533324922,
      "grad_norm": 3.46875,
      "learning_rate": 2.8417200817256896e-05,
      "loss": 0.8545,
      "step": 418140
    },
    {
      "epoch": 1.4655115008393877,
      "grad_norm": 2.765625,
      "learning_rate": 2.8416551788593194e-05,
      "loss": 0.8529,
      "step": 418150
    },
    {
      "epoch": 1.4655465483462833,
      "grad_norm": 3.0,
      "learning_rate": 2.8415902759929492e-05,
      "loss": 0.7892,
      "step": 418160
    },
    {
      "epoch": 1.465581595853179,
      "grad_norm": 2.734375,
      "learning_rate": 2.841525373126579e-05,
      "loss": 0.9551,
      "step": 418170
    },
    {
      "epoch": 1.4656166433600746,
      "grad_norm": 2.953125,
      "learning_rate": 2.8414604702602088e-05,
      "loss": 0.8468,
      "step": 418180
    },
    {
      "epoch": 1.4656516908669701,
      "grad_norm": 2.921875,
      "learning_rate": 2.8413955673938386e-05,
      "loss": 0.8735,
      "step": 418190
    },
    {
      "epoch": 1.4656867383738659,
      "grad_norm": 3.046875,
      "learning_rate": 2.8413306645274684e-05,
      "loss": 0.762,
      "step": 418200
    },
    {
      "epoch": 1.4657217858807614,
      "grad_norm": 3.328125,
      "learning_rate": 2.8412657616610982e-05,
      "loss": 0.8581,
      "step": 418210
    },
    {
      "epoch": 1.465756833387657,
      "grad_norm": 2.578125,
      "learning_rate": 2.841200858794728e-05,
      "loss": 0.8564,
      "step": 418220
    },
    {
      "epoch": 1.4657918808945527,
      "grad_norm": 3.28125,
      "learning_rate": 2.8411359559283578e-05,
      "loss": 0.8733,
      "step": 418230
    },
    {
      "epoch": 1.4658269284014482,
      "grad_norm": 2.9375,
      "learning_rate": 2.8410710530619876e-05,
      "loss": 0.8432,
      "step": 418240
    },
    {
      "epoch": 1.4658619759083438,
      "grad_norm": 2.390625,
      "learning_rate": 2.8410061501956174e-05,
      "loss": 0.7702,
      "step": 418250
    },
    {
      "epoch": 1.4658970234152393,
      "grad_norm": 3.0625,
      "learning_rate": 2.8409412473292475e-05,
      "loss": 0.9192,
      "step": 418260
    },
    {
      "epoch": 1.4659320709221348,
      "grad_norm": 2.703125,
      "learning_rate": 2.8408763444628773e-05,
      "loss": 0.7936,
      "step": 418270
    },
    {
      "epoch": 1.4659671184290306,
      "grad_norm": 3.140625,
      "learning_rate": 2.840811441596507e-05,
      "loss": 0.9165,
      "step": 418280
    },
    {
      "epoch": 1.4660021659359261,
      "grad_norm": 2.78125,
      "learning_rate": 2.840746538730137e-05,
      "loss": 0.838,
      "step": 418290
    },
    {
      "epoch": 1.4660372134428217,
      "grad_norm": 2.625,
      "learning_rate": 2.8406816358637667e-05,
      "loss": 0.855,
      "step": 418300
    },
    {
      "epoch": 1.4660722609497174,
      "grad_norm": 2.5,
      "learning_rate": 2.8406167329973965e-05,
      "loss": 0.8075,
      "step": 418310
    },
    {
      "epoch": 1.466107308456613,
      "grad_norm": 3.09375,
      "learning_rate": 2.8405518301310257e-05,
      "loss": 0.8353,
      "step": 418320
    },
    {
      "epoch": 1.4661423559635085,
      "grad_norm": 3.03125,
      "learning_rate": 2.8404869272646555e-05,
      "loss": 0.8518,
      "step": 418330
    },
    {
      "epoch": 1.4661774034704043,
      "grad_norm": 3.0625,
      "learning_rate": 2.8404220243982853e-05,
      "loss": 0.8993,
      "step": 418340
    },
    {
      "epoch": 1.4662124509772998,
      "grad_norm": 2.9375,
      "learning_rate": 2.8403571215319154e-05,
      "loss": 0.8047,
      "step": 418350
    },
    {
      "epoch": 1.4662474984841953,
      "grad_norm": 3.1875,
      "learning_rate": 2.8402922186655452e-05,
      "loss": 0.7851,
      "step": 418360
    },
    {
      "epoch": 1.4662825459910909,
      "grad_norm": 2.46875,
      "learning_rate": 2.840227315799175e-05,
      "loss": 0.8802,
      "step": 418370
    },
    {
      "epoch": 1.4663175934979864,
      "grad_norm": 3.96875,
      "learning_rate": 2.8401624129328048e-05,
      "loss": 0.8869,
      "step": 418380
    },
    {
      "epoch": 1.4663526410048822,
      "grad_norm": 2.5,
      "learning_rate": 2.8400975100664346e-05,
      "loss": 0.8228,
      "step": 418390
    },
    {
      "epoch": 1.4663876885117777,
      "grad_norm": 2.5,
      "learning_rate": 2.8400326072000644e-05,
      "loss": 0.7636,
      "step": 418400
    },
    {
      "epoch": 1.4664227360186732,
      "grad_norm": 3.171875,
      "learning_rate": 2.8399677043336942e-05,
      "loss": 0.9404,
      "step": 418410
    },
    {
      "epoch": 1.466457783525569,
      "grad_norm": 3.171875,
      "learning_rate": 2.839902801467324e-05,
      "loss": 0.8846,
      "step": 418420
    },
    {
      "epoch": 1.4664928310324645,
      "grad_norm": 3.65625,
      "learning_rate": 2.8398378986009538e-05,
      "loss": 0.9121,
      "step": 418430
    },
    {
      "epoch": 1.46652787853936,
      "grad_norm": 2.8125,
      "learning_rate": 2.8397729957345836e-05,
      "loss": 0.881,
      "step": 418440
    },
    {
      "epoch": 1.4665629260462558,
      "grad_norm": 2.984375,
      "learning_rate": 2.8397080928682134e-05,
      "loss": 0.9016,
      "step": 418450
    },
    {
      "epoch": 1.4665979735531514,
      "grad_norm": 2.703125,
      "learning_rate": 2.8396431900018432e-05,
      "loss": 0.8299,
      "step": 418460
    },
    {
      "epoch": 1.466633021060047,
      "grad_norm": 2.9375,
      "learning_rate": 2.839578287135473e-05,
      "loss": 0.8685,
      "step": 418470
    },
    {
      "epoch": 1.4666680685669424,
      "grad_norm": 2.859375,
      "learning_rate": 2.8395133842691028e-05,
      "loss": 0.8075,
      "step": 418480
    },
    {
      "epoch": 1.466703116073838,
      "grad_norm": 3.265625,
      "learning_rate": 2.839448481402733e-05,
      "loss": 0.9137,
      "step": 418490
    },
    {
      "epoch": 1.4667381635807337,
      "grad_norm": 3.046875,
      "learning_rate": 2.8393835785363627e-05,
      "loss": 0.9313,
      "step": 418500
    },
    {
      "epoch": 1.4667732110876293,
      "grad_norm": 2.453125,
      "learning_rate": 2.8393186756699925e-05,
      "loss": 0.8297,
      "step": 418510
    },
    {
      "epoch": 1.4668082585945248,
      "grad_norm": 2.734375,
      "learning_rate": 2.8392537728036223e-05,
      "loss": 0.8821,
      "step": 418520
    },
    {
      "epoch": 1.4668433061014206,
      "grad_norm": 3.140625,
      "learning_rate": 2.839188869937252e-05,
      "loss": 0.8354,
      "step": 418530
    },
    {
      "epoch": 1.466878353608316,
      "grad_norm": 3.046875,
      "learning_rate": 2.839123967070882e-05,
      "loss": 0.8662,
      "step": 418540
    },
    {
      "epoch": 1.4669134011152116,
      "grad_norm": 3.0,
      "learning_rate": 2.8390590642045117e-05,
      "loss": 0.9114,
      "step": 418550
    },
    {
      "epoch": 1.4669484486221074,
      "grad_norm": 3.15625,
      "learning_rate": 2.8389941613381415e-05,
      "loss": 0.8009,
      "step": 418560
    },
    {
      "epoch": 1.466983496129003,
      "grad_norm": 2.875,
      "learning_rate": 2.8389292584717713e-05,
      "loss": 0.8339,
      "step": 418570
    },
    {
      "epoch": 1.4670185436358985,
      "grad_norm": 2.875,
      "learning_rate": 2.838864355605401e-05,
      "loss": 0.8768,
      "step": 418580
    },
    {
      "epoch": 1.467053591142794,
      "grad_norm": 2.828125,
      "learning_rate": 2.838799452739031e-05,
      "loss": 0.862,
      "step": 418590
    },
    {
      "epoch": 1.4670886386496895,
      "grad_norm": 2.609375,
      "learning_rate": 2.8387345498726607e-05,
      "loss": 0.8006,
      "step": 418600
    },
    {
      "epoch": 1.4671236861565853,
      "grad_norm": 3.03125,
      "learning_rate": 2.8386696470062905e-05,
      "loss": 0.8609,
      "step": 418610
    },
    {
      "epoch": 1.4671587336634808,
      "grad_norm": 2.96875,
      "learning_rate": 2.8386047441399203e-05,
      "loss": 0.8424,
      "step": 418620
    },
    {
      "epoch": 1.4671937811703764,
      "grad_norm": 2.75,
      "learning_rate": 2.8385398412735505e-05,
      "loss": 0.7951,
      "step": 418630
    },
    {
      "epoch": 1.4672288286772721,
      "grad_norm": 2.578125,
      "learning_rate": 2.8384749384071803e-05,
      "loss": 0.8529,
      "step": 418640
    },
    {
      "epoch": 1.4672638761841676,
      "grad_norm": 3.078125,
      "learning_rate": 2.83841003554081e-05,
      "loss": 0.798,
      "step": 418650
    },
    {
      "epoch": 1.4672989236910632,
      "grad_norm": 3.21875,
      "learning_rate": 2.83834513267444e-05,
      "loss": 0.795,
      "step": 418660
    },
    {
      "epoch": 1.467333971197959,
      "grad_norm": 2.78125,
      "learning_rate": 2.8382802298080697e-05,
      "loss": 0.8635,
      "step": 418670
    },
    {
      "epoch": 1.4673690187048545,
      "grad_norm": 3.15625,
      "learning_rate": 2.8382153269416995e-05,
      "loss": 0.8536,
      "step": 418680
    },
    {
      "epoch": 1.46740406621175,
      "grad_norm": 2.5,
      "learning_rate": 2.8381504240753293e-05,
      "loss": 0.8432,
      "step": 418690
    },
    {
      "epoch": 1.4674391137186455,
      "grad_norm": 2.78125,
      "learning_rate": 2.8380855212089584e-05,
      "loss": 0.8,
      "step": 418700
    },
    {
      "epoch": 1.467474161225541,
      "grad_norm": 2.671875,
      "learning_rate": 2.8380206183425882e-05,
      "loss": 0.8883,
      "step": 418710
    },
    {
      "epoch": 1.4675092087324368,
      "grad_norm": 3.3125,
      "learning_rate": 2.8379557154762183e-05,
      "loss": 0.8902,
      "step": 418720
    },
    {
      "epoch": 1.4675442562393324,
      "grad_norm": 3.421875,
      "learning_rate": 2.837890812609848e-05,
      "loss": 0.8653,
      "step": 418730
    },
    {
      "epoch": 1.467579303746228,
      "grad_norm": 2.828125,
      "learning_rate": 2.837825909743478e-05,
      "loss": 0.8512,
      "step": 418740
    },
    {
      "epoch": 1.4676143512531237,
      "grad_norm": 2.953125,
      "learning_rate": 2.8377610068771077e-05,
      "loss": 0.8874,
      "step": 418750
    },
    {
      "epoch": 1.4676493987600192,
      "grad_norm": 3.21875,
      "learning_rate": 2.8376961040107375e-05,
      "loss": 0.7992,
      "step": 418760
    },
    {
      "epoch": 1.4676844462669147,
      "grad_norm": 2.734375,
      "learning_rate": 2.8376312011443673e-05,
      "loss": 0.741,
      "step": 418770
    },
    {
      "epoch": 1.4677194937738105,
      "grad_norm": 2.8125,
      "learning_rate": 2.837566298277997e-05,
      "loss": 0.7661,
      "step": 418780
    },
    {
      "epoch": 1.467754541280706,
      "grad_norm": 3.1875,
      "learning_rate": 2.837501395411627e-05,
      "loss": 0.8263,
      "step": 418790
    },
    {
      "epoch": 1.4677895887876016,
      "grad_norm": 2.953125,
      "learning_rate": 2.8374364925452567e-05,
      "loss": 0.8832,
      "step": 418800
    },
    {
      "epoch": 1.467824636294497,
      "grad_norm": 2.859375,
      "learning_rate": 2.8373715896788865e-05,
      "loss": 0.8914,
      "step": 418810
    },
    {
      "epoch": 1.4678596838013929,
      "grad_norm": 2.75,
      "learning_rate": 2.8373066868125163e-05,
      "loss": 0.7927,
      "step": 418820
    },
    {
      "epoch": 1.4678947313082884,
      "grad_norm": 3.125,
      "learning_rate": 2.837241783946146e-05,
      "loss": 0.9341,
      "step": 418830
    },
    {
      "epoch": 1.467929778815184,
      "grad_norm": 2.890625,
      "learning_rate": 2.837176881079776e-05,
      "loss": 0.8671,
      "step": 418840
    },
    {
      "epoch": 1.4679648263220795,
      "grad_norm": 2.875,
      "learning_rate": 2.8371119782134057e-05,
      "loss": 0.8608,
      "step": 418850
    },
    {
      "epoch": 1.4679998738289752,
      "grad_norm": 2.703125,
      "learning_rate": 2.837047075347036e-05,
      "loss": 0.8968,
      "step": 418860
    },
    {
      "epoch": 1.4680349213358708,
      "grad_norm": 2.890625,
      "learning_rate": 2.8369821724806657e-05,
      "loss": 0.7408,
      "step": 418870
    },
    {
      "epoch": 1.4680699688427663,
      "grad_norm": 2.65625,
      "learning_rate": 2.8369172696142955e-05,
      "loss": 0.8297,
      "step": 418880
    },
    {
      "epoch": 1.468105016349662,
      "grad_norm": 3.0625,
      "learning_rate": 2.8368523667479253e-05,
      "loss": 0.8311,
      "step": 418890
    },
    {
      "epoch": 1.4681400638565576,
      "grad_norm": 3.015625,
      "learning_rate": 2.836787463881555e-05,
      "loss": 0.9769,
      "step": 418900
    },
    {
      "epoch": 1.4681751113634531,
      "grad_norm": 2.78125,
      "learning_rate": 2.836722561015185e-05,
      "loss": 0.8806,
      "step": 418910
    },
    {
      "epoch": 1.468210158870349,
      "grad_norm": 3.203125,
      "learning_rate": 2.8366576581488147e-05,
      "loss": 0.8447,
      "step": 418920
    },
    {
      "epoch": 1.4682452063772444,
      "grad_norm": 2.84375,
      "learning_rate": 2.8365927552824445e-05,
      "loss": 0.7821,
      "step": 418930
    },
    {
      "epoch": 1.46828025388414,
      "grad_norm": 3.03125,
      "learning_rate": 2.8365278524160743e-05,
      "loss": 0.9519,
      "step": 418940
    },
    {
      "epoch": 1.4683153013910355,
      "grad_norm": 2.59375,
      "learning_rate": 2.836462949549704e-05,
      "loss": 0.8614,
      "step": 418950
    },
    {
      "epoch": 1.468350348897931,
      "grad_norm": 3.03125,
      "learning_rate": 2.836398046683334e-05,
      "loss": 0.8597,
      "step": 418960
    },
    {
      "epoch": 1.4683853964048268,
      "grad_norm": 2.5,
      "learning_rate": 2.8363331438169637e-05,
      "loss": 0.8559,
      "step": 418970
    },
    {
      "epoch": 1.4684204439117223,
      "grad_norm": 2.921875,
      "learning_rate": 2.8362682409505935e-05,
      "loss": 0.8871,
      "step": 418980
    },
    {
      "epoch": 1.4684554914186179,
      "grad_norm": 3.09375,
      "learning_rate": 2.8362033380842233e-05,
      "loss": 0.8455,
      "step": 418990
    },
    {
      "epoch": 1.4684905389255136,
      "grad_norm": 3.671875,
      "learning_rate": 2.8361384352178534e-05,
      "loss": 0.8417,
      "step": 419000
    },
    {
      "epoch": 1.4685255864324092,
      "grad_norm": 2.515625,
      "learning_rate": 2.8360735323514832e-05,
      "loss": 0.862,
      "step": 419010
    },
    {
      "epoch": 1.4685606339393047,
      "grad_norm": 3.03125,
      "learning_rate": 2.836008629485113e-05,
      "loss": 0.7397,
      "step": 419020
    },
    {
      "epoch": 1.4685956814462005,
      "grad_norm": 3.21875,
      "learning_rate": 2.8359437266187428e-05,
      "loss": 0.8555,
      "step": 419030
    },
    {
      "epoch": 1.468630728953096,
      "grad_norm": 3.28125,
      "learning_rate": 2.8358788237523726e-05,
      "loss": 0.8331,
      "step": 419040
    },
    {
      "epoch": 1.4686657764599915,
      "grad_norm": 2.71875,
      "learning_rate": 2.8358139208860024e-05,
      "loss": 0.8408,
      "step": 419050
    },
    {
      "epoch": 1.468700823966887,
      "grad_norm": 2.484375,
      "learning_rate": 2.8357490180196322e-05,
      "loss": 0.8191,
      "step": 419060
    },
    {
      "epoch": 1.4687358714737826,
      "grad_norm": 2.34375,
      "learning_rate": 2.8356841151532613e-05,
      "loss": 0.8274,
      "step": 419070
    },
    {
      "epoch": 1.4687709189806784,
      "grad_norm": 3.09375,
      "learning_rate": 2.835619212286891e-05,
      "loss": 0.8736,
      "step": 419080
    },
    {
      "epoch": 1.4688059664875739,
      "grad_norm": 2.859375,
      "learning_rate": 2.8355543094205213e-05,
      "loss": 0.8198,
      "step": 419090
    },
    {
      "epoch": 1.4688410139944694,
      "grad_norm": 3.34375,
      "learning_rate": 2.835489406554151e-05,
      "loss": 0.8699,
      "step": 419100
    },
    {
      "epoch": 1.4688760615013652,
      "grad_norm": 3.03125,
      "learning_rate": 2.835424503687781e-05,
      "loss": 0.8283,
      "step": 419110
    },
    {
      "epoch": 1.4689111090082607,
      "grad_norm": 3.296875,
      "learning_rate": 2.8353596008214107e-05,
      "loss": 0.8188,
      "step": 419120
    },
    {
      "epoch": 1.4689461565151563,
      "grad_norm": 2.765625,
      "learning_rate": 2.8352946979550405e-05,
      "loss": 0.8283,
      "step": 419130
    },
    {
      "epoch": 1.468981204022052,
      "grad_norm": 3.09375,
      "learning_rate": 2.8352297950886703e-05,
      "loss": 0.8298,
      "step": 419140
    },
    {
      "epoch": 1.4690162515289475,
      "grad_norm": 2.984375,
      "learning_rate": 2.8351648922223e-05,
      "loss": 0.8904,
      "step": 419150
    },
    {
      "epoch": 1.469051299035843,
      "grad_norm": 3.25,
      "learning_rate": 2.83509998935593e-05,
      "loss": 0.8344,
      "step": 419160
    },
    {
      "epoch": 1.4690863465427386,
      "grad_norm": 3.3125,
      "learning_rate": 2.8350350864895597e-05,
      "loss": 0.8265,
      "step": 419170
    },
    {
      "epoch": 1.4691213940496342,
      "grad_norm": 2.65625,
      "learning_rate": 2.8349701836231895e-05,
      "loss": 0.8668,
      "step": 419180
    },
    {
      "epoch": 1.46915644155653,
      "grad_norm": 2.671875,
      "learning_rate": 2.8349052807568193e-05,
      "loss": 0.8272,
      "step": 419190
    },
    {
      "epoch": 1.4691914890634254,
      "grad_norm": 2.3125,
      "learning_rate": 2.834840377890449e-05,
      "loss": 0.8833,
      "step": 419200
    },
    {
      "epoch": 1.469226536570321,
      "grad_norm": 3.28125,
      "learning_rate": 2.834775475024079e-05,
      "loss": 0.7875,
      "step": 419210
    },
    {
      "epoch": 1.4692615840772167,
      "grad_norm": 2.84375,
      "learning_rate": 2.8347105721577087e-05,
      "loss": 0.8005,
      "step": 419220
    },
    {
      "epoch": 1.4692966315841123,
      "grad_norm": 3.375,
      "learning_rate": 2.8346456692913388e-05,
      "loss": 0.8586,
      "step": 419230
    },
    {
      "epoch": 1.4693316790910078,
      "grad_norm": 2.71875,
      "learning_rate": 2.8345807664249686e-05,
      "loss": 0.8585,
      "step": 419240
    },
    {
      "epoch": 1.4693667265979036,
      "grad_norm": 3.234375,
      "learning_rate": 2.8345158635585984e-05,
      "loss": 0.8285,
      "step": 419250
    },
    {
      "epoch": 1.469401774104799,
      "grad_norm": 3.21875,
      "learning_rate": 2.8344509606922282e-05,
      "loss": 0.878,
      "step": 419260
    },
    {
      "epoch": 1.4694368216116946,
      "grad_norm": 2.765625,
      "learning_rate": 2.834386057825858e-05,
      "loss": 0.8624,
      "step": 419270
    },
    {
      "epoch": 1.4694718691185902,
      "grad_norm": 2.28125,
      "learning_rate": 2.8343211549594878e-05,
      "loss": 0.7853,
      "step": 419280
    },
    {
      "epoch": 1.4695069166254857,
      "grad_norm": 2.96875,
      "learning_rate": 2.8342562520931176e-05,
      "loss": 0.8169,
      "step": 419290
    },
    {
      "epoch": 1.4695419641323815,
      "grad_norm": 2.9375,
      "learning_rate": 2.8341913492267474e-05,
      "loss": 0.8666,
      "step": 419300
    },
    {
      "epoch": 1.469577011639277,
      "grad_norm": 2.953125,
      "learning_rate": 2.8341264463603772e-05,
      "loss": 0.8248,
      "step": 419310
    },
    {
      "epoch": 1.4696120591461725,
      "grad_norm": 2.875,
      "learning_rate": 2.834061543494007e-05,
      "loss": 0.7936,
      "step": 419320
    },
    {
      "epoch": 1.4696471066530683,
      "grad_norm": 3.078125,
      "learning_rate": 2.8339966406276368e-05,
      "loss": 0.8267,
      "step": 419330
    },
    {
      "epoch": 1.4696821541599638,
      "grad_norm": 2.65625,
      "learning_rate": 2.8339317377612666e-05,
      "loss": 0.8583,
      "step": 419340
    },
    {
      "epoch": 1.4697172016668594,
      "grad_norm": 2.59375,
      "learning_rate": 2.8338668348948964e-05,
      "loss": 0.8533,
      "step": 419350
    },
    {
      "epoch": 1.4697522491737551,
      "grad_norm": 2.46875,
      "learning_rate": 2.8338019320285266e-05,
      "loss": 0.8048,
      "step": 419360
    },
    {
      "epoch": 1.4697872966806507,
      "grad_norm": 2.890625,
      "learning_rate": 2.8337370291621564e-05,
      "loss": 0.8039,
      "step": 419370
    },
    {
      "epoch": 1.4698223441875462,
      "grad_norm": 2.90625,
      "learning_rate": 2.833672126295786e-05,
      "loss": 0.7613,
      "step": 419380
    },
    {
      "epoch": 1.4698573916944417,
      "grad_norm": 2.953125,
      "learning_rate": 2.833607223429416e-05,
      "loss": 0.9182,
      "step": 419390
    },
    {
      "epoch": 1.4698924392013373,
      "grad_norm": 2.828125,
      "learning_rate": 2.8335423205630458e-05,
      "loss": 0.8672,
      "step": 419400
    },
    {
      "epoch": 1.469927486708233,
      "grad_norm": 2.96875,
      "learning_rate": 2.8334774176966756e-05,
      "loss": 0.8012,
      "step": 419410
    },
    {
      "epoch": 1.4699625342151286,
      "grad_norm": 2.828125,
      "learning_rate": 2.8334125148303054e-05,
      "loss": 0.9211,
      "step": 419420
    },
    {
      "epoch": 1.469997581722024,
      "grad_norm": 3.328125,
      "learning_rate": 2.833347611963935e-05,
      "loss": 0.7936,
      "step": 419430
    },
    {
      "epoch": 1.4700326292289199,
      "grad_norm": 2.984375,
      "learning_rate": 2.833282709097565e-05,
      "loss": 0.861,
      "step": 419440
    },
    {
      "epoch": 1.4700676767358154,
      "grad_norm": 2.75,
      "learning_rate": 2.8332178062311944e-05,
      "loss": 0.8996,
      "step": 419450
    },
    {
      "epoch": 1.470102724242711,
      "grad_norm": 3.109375,
      "learning_rate": 2.8331529033648242e-05,
      "loss": 0.9406,
      "step": 419460
    },
    {
      "epoch": 1.4701377717496067,
      "grad_norm": 3.109375,
      "learning_rate": 2.833088000498454e-05,
      "loss": 0.8361,
      "step": 419470
    },
    {
      "epoch": 1.4701728192565022,
      "grad_norm": 2.8125,
      "learning_rate": 2.8330230976320838e-05,
      "loss": 0.8963,
      "step": 419480
    },
    {
      "epoch": 1.4702078667633978,
      "grad_norm": 2.78125,
      "learning_rate": 2.8329581947657136e-05,
      "loss": 0.8091,
      "step": 419490
    },
    {
      "epoch": 1.4702429142702933,
      "grad_norm": 2.875,
      "learning_rate": 2.8328932918993434e-05,
      "loss": 0.8677,
      "step": 419500
    },
    {
      "epoch": 1.470277961777189,
      "grad_norm": 3.203125,
      "learning_rate": 2.8328283890329732e-05,
      "loss": 0.8712,
      "step": 419510
    },
    {
      "epoch": 1.4703130092840846,
      "grad_norm": 2.84375,
      "learning_rate": 2.832763486166603e-05,
      "loss": 0.8052,
      "step": 419520
    },
    {
      "epoch": 1.4703480567909801,
      "grad_norm": 3.046875,
      "learning_rate": 2.8326985833002328e-05,
      "loss": 0.9523,
      "step": 419530
    },
    {
      "epoch": 1.4703831042978757,
      "grad_norm": 2.96875,
      "learning_rate": 2.8326336804338626e-05,
      "loss": 0.8839,
      "step": 419540
    },
    {
      "epoch": 1.4704181518047714,
      "grad_norm": 3.015625,
      "learning_rate": 2.8325687775674924e-05,
      "loss": 0.8465,
      "step": 419550
    },
    {
      "epoch": 1.470453199311667,
      "grad_norm": 2.875,
      "learning_rate": 2.8325038747011222e-05,
      "loss": 0.8659,
      "step": 419560
    },
    {
      "epoch": 1.4704882468185625,
      "grad_norm": 2.390625,
      "learning_rate": 2.832438971834752e-05,
      "loss": 0.8581,
      "step": 419570
    },
    {
      "epoch": 1.4705232943254583,
      "grad_norm": 3.171875,
      "learning_rate": 2.8323740689683818e-05,
      "loss": 0.8384,
      "step": 419580
    },
    {
      "epoch": 1.4705583418323538,
      "grad_norm": 3.359375,
      "learning_rate": 2.832309166102012e-05,
      "loss": 0.8411,
      "step": 419590
    },
    {
      "epoch": 1.4705933893392493,
      "grad_norm": 3.5,
      "learning_rate": 2.8322442632356418e-05,
      "loss": 0.914,
      "step": 419600
    },
    {
      "epoch": 1.470628436846145,
      "grad_norm": 3.328125,
      "learning_rate": 2.8321793603692716e-05,
      "loss": 0.8179,
      "step": 419610
    },
    {
      "epoch": 1.4706634843530406,
      "grad_norm": 2.71875,
      "learning_rate": 2.8321144575029014e-05,
      "loss": 0.8556,
      "step": 419620
    },
    {
      "epoch": 1.4706985318599362,
      "grad_norm": 3.046875,
      "learning_rate": 2.832049554636531e-05,
      "loss": 0.8149,
      "step": 419630
    },
    {
      "epoch": 1.4707335793668317,
      "grad_norm": 3.46875,
      "learning_rate": 2.831984651770161e-05,
      "loss": 0.9105,
      "step": 419640
    },
    {
      "epoch": 1.4707686268737272,
      "grad_norm": 2.875,
      "learning_rate": 2.8319197489037908e-05,
      "loss": 0.8543,
      "step": 419650
    },
    {
      "epoch": 1.470803674380623,
      "grad_norm": 2.828125,
      "learning_rate": 2.8318548460374206e-05,
      "loss": 0.83,
      "step": 419660
    },
    {
      "epoch": 1.4708387218875185,
      "grad_norm": 2.921875,
      "learning_rate": 2.8317899431710504e-05,
      "loss": 0.7616,
      "step": 419670
    },
    {
      "epoch": 1.470873769394414,
      "grad_norm": 2.828125,
      "learning_rate": 2.83172504030468e-05,
      "loss": 0.7893,
      "step": 419680
    },
    {
      "epoch": 1.4709088169013098,
      "grad_norm": 3.40625,
      "learning_rate": 2.83166013743831e-05,
      "loss": 0.9065,
      "step": 419690
    },
    {
      "epoch": 1.4709438644082053,
      "grad_norm": 2.96875,
      "learning_rate": 2.8315952345719398e-05,
      "loss": 0.794,
      "step": 419700
    },
    {
      "epoch": 1.4709789119151009,
      "grad_norm": 3.0,
      "learning_rate": 2.8315303317055696e-05,
      "loss": 0.8391,
      "step": 419710
    },
    {
      "epoch": 1.4710139594219966,
      "grad_norm": 3.15625,
      "learning_rate": 2.8314654288391994e-05,
      "loss": 0.8617,
      "step": 419720
    },
    {
      "epoch": 1.4710490069288922,
      "grad_norm": 3.375,
      "learning_rate": 2.8314005259728295e-05,
      "loss": 0.8486,
      "step": 419730
    },
    {
      "epoch": 1.4710840544357877,
      "grad_norm": 3.09375,
      "learning_rate": 2.8313356231064593e-05,
      "loss": 0.8198,
      "step": 419740
    },
    {
      "epoch": 1.4711191019426832,
      "grad_norm": 2.421875,
      "learning_rate": 2.831270720240089e-05,
      "loss": 0.8122,
      "step": 419750
    },
    {
      "epoch": 1.4711541494495788,
      "grad_norm": 2.859375,
      "learning_rate": 2.831205817373719e-05,
      "loss": 0.812,
      "step": 419760
    },
    {
      "epoch": 1.4711891969564745,
      "grad_norm": 3.03125,
      "learning_rate": 2.8311409145073487e-05,
      "loss": 0.8425,
      "step": 419770
    },
    {
      "epoch": 1.47122424446337,
      "grad_norm": 2.84375,
      "learning_rate": 2.8310760116409785e-05,
      "loss": 0.8735,
      "step": 419780
    },
    {
      "epoch": 1.4712592919702656,
      "grad_norm": 2.671875,
      "learning_rate": 2.8310111087746083e-05,
      "loss": 0.8596,
      "step": 419790
    },
    {
      "epoch": 1.4712943394771614,
      "grad_norm": 2.84375,
      "learning_rate": 2.830946205908238e-05,
      "loss": 0.7641,
      "step": 419800
    },
    {
      "epoch": 1.471329386984057,
      "grad_norm": 3.0,
      "learning_rate": 2.830881303041868e-05,
      "loss": 0.9077,
      "step": 419810
    },
    {
      "epoch": 1.4713644344909524,
      "grad_norm": 3.453125,
      "learning_rate": 2.8308164001754977e-05,
      "loss": 0.8841,
      "step": 419820
    },
    {
      "epoch": 1.4713994819978482,
      "grad_norm": 3.09375,
      "learning_rate": 2.830751497309127e-05,
      "loss": 0.7957,
      "step": 419830
    },
    {
      "epoch": 1.4714345295047437,
      "grad_norm": 2.953125,
      "learning_rate": 2.830686594442757e-05,
      "loss": 0.9016,
      "step": 419840
    },
    {
      "epoch": 1.4714695770116393,
      "grad_norm": 2.640625,
      "learning_rate": 2.8306216915763868e-05,
      "loss": 0.76,
      "step": 419850
    },
    {
      "epoch": 1.4715046245185348,
      "grad_norm": 2.96875,
      "learning_rate": 2.8305567887100166e-05,
      "loss": 0.7998,
      "step": 419860
    },
    {
      "epoch": 1.4715396720254303,
      "grad_norm": 3.0625,
      "learning_rate": 2.8304918858436464e-05,
      "loss": 0.8297,
      "step": 419870
    },
    {
      "epoch": 1.471574719532326,
      "grad_norm": 3.0,
      "learning_rate": 2.830426982977276e-05,
      "loss": 0.831,
      "step": 419880
    },
    {
      "epoch": 1.4716097670392216,
      "grad_norm": 2.625,
      "learning_rate": 2.830362080110906e-05,
      "loss": 0.8686,
      "step": 419890
    },
    {
      "epoch": 1.4716448145461172,
      "grad_norm": 2.796875,
      "learning_rate": 2.8302971772445358e-05,
      "loss": 0.8954,
      "step": 419900
    },
    {
      "epoch": 1.471679862053013,
      "grad_norm": 2.890625,
      "learning_rate": 2.8302322743781656e-05,
      "loss": 0.8464,
      "step": 419910
    },
    {
      "epoch": 1.4717149095599085,
      "grad_norm": 2.65625,
      "learning_rate": 2.8301673715117954e-05,
      "loss": 0.8481,
      "step": 419920
    },
    {
      "epoch": 1.471749957066804,
      "grad_norm": 2.8125,
      "learning_rate": 2.830102468645425e-05,
      "loss": 0.9285,
      "step": 419930
    },
    {
      "epoch": 1.4717850045736998,
      "grad_norm": 3.34375,
      "learning_rate": 2.830037565779055e-05,
      "loss": 0.8803,
      "step": 419940
    },
    {
      "epoch": 1.4718200520805953,
      "grad_norm": 2.9375,
      "learning_rate": 2.8299726629126848e-05,
      "loss": 0.8194,
      "step": 419950
    },
    {
      "epoch": 1.4718550995874908,
      "grad_norm": 3.03125,
      "learning_rate": 2.829907760046315e-05,
      "loss": 0.7462,
      "step": 419960
    },
    {
      "epoch": 1.4718901470943864,
      "grad_norm": 2.984375,
      "learning_rate": 2.8298428571799447e-05,
      "loss": 0.9206,
      "step": 419970
    },
    {
      "epoch": 1.471925194601282,
      "grad_norm": 2.53125,
      "learning_rate": 2.8297779543135745e-05,
      "loss": 0.8164,
      "step": 419980
    },
    {
      "epoch": 1.4719602421081777,
      "grad_norm": 3.09375,
      "learning_rate": 2.8297130514472043e-05,
      "loss": 0.8922,
      "step": 419990
    },
    {
      "epoch": 1.4719952896150732,
      "grad_norm": 2.75,
      "learning_rate": 2.829648148580834e-05,
      "loss": 0.8704,
      "step": 420000
    },
    {
      "epoch": 1.4719952896150732,
      "eval_loss": 0.7908480167388916,
      "eval_runtime": 564.886,
      "eval_samples_per_second": 673.474,
      "eval_steps_per_second": 56.123,
      "step": 420000
    },
    {
      "epoch": 1.4720303371219687,
      "grad_norm": 2.953125,
      "learning_rate": 2.829583245714464e-05,
      "loss": 0.8506,
      "step": 420010
    },
    {
      "epoch": 1.4720653846288645,
      "grad_norm": 2.890625,
      "learning_rate": 2.8295183428480937e-05,
      "loss": 0.8329,
      "step": 420020
    },
    {
      "epoch": 1.47210043213576,
      "grad_norm": 3.1875,
      "learning_rate": 2.8294534399817235e-05,
      "loss": 0.9171,
      "step": 420030
    },
    {
      "epoch": 1.4721354796426556,
      "grad_norm": 3.203125,
      "learning_rate": 2.8293885371153533e-05,
      "loss": 0.8545,
      "step": 420040
    },
    {
      "epoch": 1.4721705271495513,
      "grad_norm": 3.6875,
      "learning_rate": 2.829323634248983e-05,
      "loss": 0.8326,
      "step": 420050
    },
    {
      "epoch": 1.4722055746564469,
      "grad_norm": 2.9375,
      "learning_rate": 2.829258731382613e-05,
      "loss": 0.8823,
      "step": 420060
    },
    {
      "epoch": 1.4722406221633424,
      "grad_norm": 2.6875,
      "learning_rate": 2.8291938285162427e-05,
      "loss": 0.8415,
      "step": 420070
    },
    {
      "epoch": 1.472275669670238,
      "grad_norm": 2.671875,
      "learning_rate": 2.8291289256498725e-05,
      "loss": 0.8332,
      "step": 420080
    },
    {
      "epoch": 1.4723107171771337,
      "grad_norm": 2.96875,
      "learning_rate": 2.8290640227835023e-05,
      "loss": 0.8028,
      "step": 420090
    },
    {
      "epoch": 1.4723457646840292,
      "grad_norm": 3.453125,
      "learning_rate": 2.8289991199171324e-05,
      "loss": 0.8735,
      "step": 420100
    },
    {
      "epoch": 1.4723808121909248,
      "grad_norm": 2.8125,
      "learning_rate": 2.8289342170507622e-05,
      "loss": 0.8298,
      "step": 420110
    },
    {
      "epoch": 1.4724158596978203,
      "grad_norm": 2.96875,
      "learning_rate": 2.828869314184392e-05,
      "loss": 0.787,
      "step": 420120
    },
    {
      "epoch": 1.472450907204716,
      "grad_norm": 2.84375,
      "learning_rate": 2.828804411318022e-05,
      "loss": 0.8311,
      "step": 420130
    },
    {
      "epoch": 1.4724859547116116,
      "grad_norm": 2.703125,
      "learning_rate": 2.8287395084516516e-05,
      "loss": 0.8673,
      "step": 420140
    },
    {
      "epoch": 1.4725210022185071,
      "grad_norm": 2.84375,
      "learning_rate": 2.8286746055852814e-05,
      "loss": 0.786,
      "step": 420150
    },
    {
      "epoch": 1.4725560497254029,
      "grad_norm": 4.78125,
      "learning_rate": 2.8286097027189112e-05,
      "loss": 0.8325,
      "step": 420160
    },
    {
      "epoch": 1.4725910972322984,
      "grad_norm": 2.625,
      "learning_rate": 2.828544799852541e-05,
      "loss": 0.8329,
      "step": 420170
    },
    {
      "epoch": 1.472626144739194,
      "grad_norm": 3.140625,
      "learning_rate": 2.828479896986171e-05,
      "loss": 0.8851,
      "step": 420180
    },
    {
      "epoch": 1.4726611922460897,
      "grad_norm": 2.9375,
      "learning_rate": 2.8284149941198006e-05,
      "loss": 0.8776,
      "step": 420190
    },
    {
      "epoch": 1.4726962397529852,
      "grad_norm": 2.75,
      "learning_rate": 2.82835009125343e-05,
      "loss": 0.8234,
      "step": 420200
    },
    {
      "epoch": 1.4727312872598808,
      "grad_norm": 2.96875,
      "learning_rate": 2.82828518838706e-05,
      "loss": 0.9181,
      "step": 420210
    },
    {
      "epoch": 1.4727663347667763,
      "grad_norm": 2.890625,
      "learning_rate": 2.8282202855206897e-05,
      "loss": 0.7976,
      "step": 420220
    },
    {
      "epoch": 1.4728013822736719,
      "grad_norm": 2.90625,
      "learning_rate": 2.8281553826543195e-05,
      "loss": 0.856,
      "step": 420230
    },
    {
      "epoch": 1.4728364297805676,
      "grad_norm": 2.734375,
      "learning_rate": 2.8280904797879493e-05,
      "loss": 0.843,
      "step": 420240
    },
    {
      "epoch": 1.4728714772874631,
      "grad_norm": 2.640625,
      "learning_rate": 2.828025576921579e-05,
      "loss": 0.8919,
      "step": 420250
    },
    {
      "epoch": 1.4729065247943587,
      "grad_norm": 2.609375,
      "learning_rate": 2.827960674055209e-05,
      "loss": 0.8351,
      "step": 420260
    },
    {
      "epoch": 1.4729415723012544,
      "grad_norm": 3.0,
      "learning_rate": 2.8278957711888387e-05,
      "loss": 0.8523,
      "step": 420270
    },
    {
      "epoch": 1.47297661980815,
      "grad_norm": 3.15625,
      "learning_rate": 2.8278308683224685e-05,
      "loss": 0.8008,
      "step": 420280
    },
    {
      "epoch": 1.4730116673150455,
      "grad_norm": 3.09375,
      "learning_rate": 2.8277659654560983e-05,
      "loss": 0.844,
      "step": 420290
    },
    {
      "epoch": 1.4730467148219413,
      "grad_norm": 2.375,
      "learning_rate": 2.827701062589728e-05,
      "loss": 0.7648,
      "step": 420300
    },
    {
      "epoch": 1.4730817623288368,
      "grad_norm": 2.9375,
      "learning_rate": 2.827636159723358e-05,
      "loss": 0.8325,
      "step": 420310
    },
    {
      "epoch": 1.4731168098357323,
      "grad_norm": 2.71875,
      "learning_rate": 2.827571256856988e-05,
      "loss": 0.8123,
      "step": 420320
    },
    {
      "epoch": 1.4731518573426279,
      "grad_norm": 2.703125,
      "learning_rate": 2.827506353990618e-05,
      "loss": 0.8807,
      "step": 420330
    },
    {
      "epoch": 1.4731869048495234,
      "grad_norm": 3.265625,
      "learning_rate": 2.8274414511242476e-05,
      "loss": 0.8611,
      "step": 420340
    },
    {
      "epoch": 1.4732219523564192,
      "grad_norm": 2.8125,
      "learning_rate": 2.8273765482578774e-05,
      "loss": 0.8262,
      "step": 420350
    },
    {
      "epoch": 1.4732569998633147,
      "grad_norm": 2.921875,
      "learning_rate": 2.8273116453915072e-05,
      "loss": 0.7846,
      "step": 420360
    },
    {
      "epoch": 1.4732920473702102,
      "grad_norm": 2.890625,
      "learning_rate": 2.827246742525137e-05,
      "loss": 0.7861,
      "step": 420370
    },
    {
      "epoch": 1.473327094877106,
      "grad_norm": 2.859375,
      "learning_rate": 2.827181839658767e-05,
      "loss": 0.843,
      "step": 420380
    },
    {
      "epoch": 1.4733621423840015,
      "grad_norm": 3.078125,
      "learning_rate": 2.8271169367923966e-05,
      "loss": 0.8329,
      "step": 420390
    },
    {
      "epoch": 1.473397189890897,
      "grad_norm": 2.984375,
      "learning_rate": 2.8270520339260264e-05,
      "loss": 0.8028,
      "step": 420400
    },
    {
      "epoch": 1.4734322373977928,
      "grad_norm": 2.765625,
      "learning_rate": 2.8269871310596562e-05,
      "loss": 0.868,
      "step": 420410
    },
    {
      "epoch": 1.4734672849046884,
      "grad_norm": 2.703125,
      "learning_rate": 2.826922228193286e-05,
      "loss": 0.8924,
      "step": 420420
    },
    {
      "epoch": 1.473502332411584,
      "grad_norm": 3.390625,
      "learning_rate": 2.826857325326916e-05,
      "loss": 0.8703,
      "step": 420430
    },
    {
      "epoch": 1.4735373799184794,
      "grad_norm": 2.828125,
      "learning_rate": 2.8267924224605456e-05,
      "loss": 0.871,
      "step": 420440
    },
    {
      "epoch": 1.473572427425375,
      "grad_norm": 3.078125,
      "learning_rate": 2.8267275195941754e-05,
      "loss": 0.7988,
      "step": 420450
    },
    {
      "epoch": 1.4736074749322707,
      "grad_norm": 2.796875,
      "learning_rate": 2.8266626167278056e-05,
      "loss": 0.8683,
      "step": 420460
    },
    {
      "epoch": 1.4736425224391663,
      "grad_norm": 2.765625,
      "learning_rate": 2.8265977138614354e-05,
      "loss": 0.8106,
      "step": 420470
    },
    {
      "epoch": 1.4736775699460618,
      "grad_norm": 3.140625,
      "learning_rate": 2.8265328109950652e-05,
      "loss": 0.8933,
      "step": 420480
    },
    {
      "epoch": 1.4737126174529576,
      "grad_norm": 2.96875,
      "learning_rate": 2.826467908128695e-05,
      "loss": 0.8034,
      "step": 420490
    },
    {
      "epoch": 1.473747664959853,
      "grad_norm": 2.609375,
      "learning_rate": 2.8264030052623248e-05,
      "loss": 0.8331,
      "step": 420500
    },
    {
      "epoch": 1.4737827124667486,
      "grad_norm": 2.90625,
      "learning_rate": 2.8263381023959546e-05,
      "loss": 0.7725,
      "step": 420510
    },
    {
      "epoch": 1.4738177599736444,
      "grad_norm": 3.34375,
      "learning_rate": 2.8262731995295844e-05,
      "loss": 0.8235,
      "step": 420520
    },
    {
      "epoch": 1.47385280748054,
      "grad_norm": 2.6875,
      "learning_rate": 2.8262082966632142e-05,
      "loss": 0.8429,
      "step": 420530
    },
    {
      "epoch": 1.4738878549874355,
      "grad_norm": 3.328125,
      "learning_rate": 2.826143393796844e-05,
      "loss": 0.9008,
      "step": 420540
    },
    {
      "epoch": 1.473922902494331,
      "grad_norm": 2.90625,
      "learning_rate": 2.8260784909304738e-05,
      "loss": 0.8794,
      "step": 420550
    },
    {
      "epoch": 1.4739579500012265,
      "grad_norm": 2.8125,
      "learning_rate": 2.8260135880641036e-05,
      "loss": 0.85,
      "step": 420560
    },
    {
      "epoch": 1.4739929975081223,
      "grad_norm": 3.765625,
      "learning_rate": 2.8259486851977334e-05,
      "loss": 0.87,
      "step": 420570
    },
    {
      "epoch": 1.4740280450150178,
      "grad_norm": 2.953125,
      "learning_rate": 2.825883782331363e-05,
      "loss": 0.8709,
      "step": 420580
    },
    {
      "epoch": 1.4740630925219134,
      "grad_norm": 2.84375,
      "learning_rate": 2.8258188794649926e-05,
      "loss": 0.8726,
      "step": 420590
    },
    {
      "epoch": 1.4740981400288091,
      "grad_norm": 2.84375,
      "learning_rate": 2.8257539765986224e-05,
      "loss": 0.8967,
      "step": 420600
    },
    {
      "epoch": 1.4741331875357047,
      "grad_norm": 2.5625,
      "learning_rate": 2.8256890737322522e-05,
      "loss": 0.7477,
      "step": 420610
    },
    {
      "epoch": 1.4741682350426002,
      "grad_norm": 2.796875,
      "learning_rate": 2.825624170865882e-05,
      "loss": 0.9194,
      "step": 420620
    },
    {
      "epoch": 1.474203282549496,
      "grad_norm": 3.15625,
      "learning_rate": 2.825559267999512e-05,
      "loss": 0.9594,
      "step": 420630
    },
    {
      "epoch": 1.4742383300563915,
      "grad_norm": 3.203125,
      "learning_rate": 2.8254943651331416e-05,
      "loss": 0.7987,
      "step": 420640
    },
    {
      "epoch": 1.474273377563287,
      "grad_norm": 3.171875,
      "learning_rate": 2.8254294622667714e-05,
      "loss": 0.904,
      "step": 420650
    },
    {
      "epoch": 1.4743084250701826,
      "grad_norm": 2.828125,
      "learning_rate": 2.8253645594004012e-05,
      "loss": 0.8642,
      "step": 420660
    },
    {
      "epoch": 1.474343472577078,
      "grad_norm": 3.046875,
      "learning_rate": 2.825299656534031e-05,
      "loss": 0.911,
      "step": 420670
    },
    {
      "epoch": 1.4743785200839739,
      "grad_norm": 3.15625,
      "learning_rate": 2.825234753667661e-05,
      "loss": 0.9473,
      "step": 420680
    },
    {
      "epoch": 1.4744135675908694,
      "grad_norm": 3.046875,
      "learning_rate": 2.825169850801291e-05,
      "loss": 0.8169,
      "step": 420690
    },
    {
      "epoch": 1.474448615097765,
      "grad_norm": 2.96875,
      "learning_rate": 2.8251049479349208e-05,
      "loss": 0.7896,
      "step": 420700
    },
    {
      "epoch": 1.4744836626046607,
      "grad_norm": 3.328125,
      "learning_rate": 2.8250400450685506e-05,
      "loss": 0.8738,
      "step": 420710
    },
    {
      "epoch": 1.4745187101115562,
      "grad_norm": 2.875,
      "learning_rate": 2.8249751422021804e-05,
      "loss": 0.8225,
      "step": 420720
    },
    {
      "epoch": 1.4745537576184518,
      "grad_norm": 3.09375,
      "learning_rate": 2.8249102393358102e-05,
      "loss": 0.7764,
      "step": 420730
    },
    {
      "epoch": 1.4745888051253475,
      "grad_norm": 2.90625,
      "learning_rate": 2.82484533646944e-05,
      "loss": 0.8377,
      "step": 420740
    },
    {
      "epoch": 1.474623852632243,
      "grad_norm": 2.921875,
      "learning_rate": 2.8247804336030698e-05,
      "loss": 0.8064,
      "step": 420750
    },
    {
      "epoch": 1.4746589001391386,
      "grad_norm": 2.84375,
      "learning_rate": 2.8247155307366996e-05,
      "loss": 0.803,
      "step": 420760
    },
    {
      "epoch": 1.4746939476460341,
      "grad_norm": 2.953125,
      "learning_rate": 2.8246506278703294e-05,
      "loss": 0.8792,
      "step": 420770
    },
    {
      "epoch": 1.4747289951529299,
      "grad_norm": 2.84375,
      "learning_rate": 2.8245857250039592e-05,
      "loss": 0.933,
      "step": 420780
    },
    {
      "epoch": 1.4747640426598254,
      "grad_norm": 2.8125,
      "learning_rate": 2.824520822137589e-05,
      "loss": 0.7748,
      "step": 420790
    },
    {
      "epoch": 1.474799090166721,
      "grad_norm": 3.03125,
      "learning_rate": 2.8244559192712188e-05,
      "loss": 0.9103,
      "step": 420800
    },
    {
      "epoch": 1.4748341376736165,
      "grad_norm": 2.625,
      "learning_rate": 2.8243910164048486e-05,
      "loss": 0.8205,
      "step": 420810
    },
    {
      "epoch": 1.4748691851805122,
      "grad_norm": 3.21875,
      "learning_rate": 2.8243261135384784e-05,
      "loss": 0.8777,
      "step": 420820
    },
    {
      "epoch": 1.4749042326874078,
      "grad_norm": 3.328125,
      "learning_rate": 2.8242612106721085e-05,
      "loss": 0.8263,
      "step": 420830
    },
    {
      "epoch": 1.4749392801943033,
      "grad_norm": 3.15625,
      "learning_rate": 2.8241963078057383e-05,
      "loss": 0.833,
      "step": 420840
    },
    {
      "epoch": 1.474974327701199,
      "grad_norm": 3.34375,
      "learning_rate": 2.824131404939368e-05,
      "loss": 0.864,
      "step": 420850
    },
    {
      "epoch": 1.4750093752080946,
      "grad_norm": 2.9375,
      "learning_rate": 2.824066502072998e-05,
      "loss": 0.8555,
      "step": 420860
    },
    {
      "epoch": 1.4750444227149901,
      "grad_norm": 3.03125,
      "learning_rate": 2.8240015992066277e-05,
      "loss": 0.7871,
      "step": 420870
    },
    {
      "epoch": 1.475079470221886,
      "grad_norm": 2.5,
      "learning_rate": 2.8239366963402575e-05,
      "loss": 0.7129,
      "step": 420880
    },
    {
      "epoch": 1.4751145177287814,
      "grad_norm": 3.109375,
      "learning_rate": 2.8238717934738873e-05,
      "loss": 0.8179,
      "step": 420890
    },
    {
      "epoch": 1.475149565235677,
      "grad_norm": 3.109375,
      "learning_rate": 2.823806890607517e-05,
      "loss": 0.8817,
      "step": 420900
    },
    {
      "epoch": 1.4751846127425725,
      "grad_norm": 2.9375,
      "learning_rate": 2.823741987741147e-05,
      "loss": 0.9242,
      "step": 420910
    },
    {
      "epoch": 1.475219660249468,
      "grad_norm": 2.53125,
      "learning_rate": 2.8236770848747767e-05,
      "loss": 0.7845,
      "step": 420920
    },
    {
      "epoch": 1.4752547077563638,
      "grad_norm": 2.609375,
      "learning_rate": 2.8236121820084065e-05,
      "loss": 0.7945,
      "step": 420930
    },
    {
      "epoch": 1.4752897552632593,
      "grad_norm": 3.296875,
      "learning_rate": 2.8235472791420363e-05,
      "loss": 0.7965,
      "step": 420940
    },
    {
      "epoch": 1.4753248027701549,
      "grad_norm": 2.84375,
      "learning_rate": 2.8234823762756658e-05,
      "loss": 0.7969,
      "step": 420950
    },
    {
      "epoch": 1.4753598502770506,
      "grad_norm": 3.09375,
      "learning_rate": 2.8234174734092956e-05,
      "loss": 0.956,
      "step": 420960
    },
    {
      "epoch": 1.4753948977839462,
      "grad_norm": 2.828125,
      "learning_rate": 2.8233525705429254e-05,
      "loss": 0.7901,
      "step": 420970
    },
    {
      "epoch": 1.4754299452908417,
      "grad_norm": 2.953125,
      "learning_rate": 2.8232876676765552e-05,
      "loss": 0.8663,
      "step": 420980
    },
    {
      "epoch": 1.4754649927977375,
      "grad_norm": 2.96875,
      "learning_rate": 2.823222764810185e-05,
      "loss": 0.7993,
      "step": 420990
    },
    {
      "epoch": 1.475500040304633,
      "grad_norm": 2.6875,
      "learning_rate": 2.8231578619438148e-05,
      "loss": 0.8059,
      "step": 421000
    },
    {
      "epoch": 1.4755350878115285,
      "grad_norm": 3.078125,
      "learning_rate": 2.8230929590774446e-05,
      "loss": 0.8776,
      "step": 421010
    },
    {
      "epoch": 1.475570135318424,
      "grad_norm": 2.78125,
      "learning_rate": 2.8230280562110744e-05,
      "loss": 0.8482,
      "step": 421020
    },
    {
      "epoch": 1.4756051828253196,
      "grad_norm": 2.59375,
      "learning_rate": 2.8229631533447042e-05,
      "loss": 0.8563,
      "step": 421030
    },
    {
      "epoch": 1.4756402303322154,
      "grad_norm": 3.0625,
      "learning_rate": 2.822898250478334e-05,
      "loss": 0.7925,
      "step": 421040
    },
    {
      "epoch": 1.475675277839111,
      "grad_norm": 2.984375,
      "learning_rate": 2.8228333476119638e-05,
      "loss": 0.8161,
      "step": 421050
    },
    {
      "epoch": 1.4757103253460064,
      "grad_norm": 3.046875,
      "learning_rate": 2.822768444745594e-05,
      "loss": 0.9069,
      "step": 421060
    },
    {
      "epoch": 1.4757453728529022,
      "grad_norm": 2.859375,
      "learning_rate": 2.8227035418792237e-05,
      "loss": 0.7961,
      "step": 421070
    },
    {
      "epoch": 1.4757804203597977,
      "grad_norm": 2.859375,
      "learning_rate": 2.8226386390128535e-05,
      "loss": 0.8221,
      "step": 421080
    },
    {
      "epoch": 1.4758154678666933,
      "grad_norm": 3.328125,
      "learning_rate": 2.8225737361464833e-05,
      "loss": 0.8136,
      "step": 421090
    },
    {
      "epoch": 1.475850515373589,
      "grad_norm": 2.765625,
      "learning_rate": 2.822508833280113e-05,
      "loss": 0.8337,
      "step": 421100
    },
    {
      "epoch": 1.4758855628804846,
      "grad_norm": 2.828125,
      "learning_rate": 2.822443930413743e-05,
      "loss": 0.8239,
      "step": 421110
    },
    {
      "epoch": 1.47592061038738,
      "grad_norm": 2.703125,
      "learning_rate": 2.8223790275473727e-05,
      "loss": 0.8133,
      "step": 421120
    },
    {
      "epoch": 1.4759556578942756,
      "grad_norm": 2.8125,
      "learning_rate": 2.8223141246810025e-05,
      "loss": 0.8298,
      "step": 421130
    },
    {
      "epoch": 1.4759907054011712,
      "grad_norm": 2.8125,
      "learning_rate": 2.8222492218146323e-05,
      "loss": 0.8516,
      "step": 421140
    },
    {
      "epoch": 1.476025752908067,
      "grad_norm": 3.109375,
      "learning_rate": 2.822184318948262e-05,
      "loss": 0.7639,
      "step": 421150
    },
    {
      "epoch": 1.4760608004149625,
      "grad_norm": 2.828125,
      "learning_rate": 2.822119416081892e-05,
      "loss": 0.8278,
      "step": 421160
    },
    {
      "epoch": 1.476095847921858,
      "grad_norm": 2.96875,
      "learning_rate": 2.8220545132155217e-05,
      "loss": 0.7916,
      "step": 421170
    },
    {
      "epoch": 1.4761308954287538,
      "grad_norm": 2.9375,
      "learning_rate": 2.8219896103491515e-05,
      "loss": 0.8354,
      "step": 421180
    },
    {
      "epoch": 1.4761659429356493,
      "grad_norm": 2.84375,
      "learning_rate": 2.8219247074827813e-05,
      "loss": 0.8444,
      "step": 421190
    },
    {
      "epoch": 1.4762009904425448,
      "grad_norm": 2.9375,
      "learning_rate": 2.8218598046164115e-05,
      "loss": 0.8741,
      "step": 421200
    },
    {
      "epoch": 1.4762360379494406,
      "grad_norm": 2.921875,
      "learning_rate": 2.8217949017500413e-05,
      "loss": 0.8051,
      "step": 421210
    },
    {
      "epoch": 1.4762710854563361,
      "grad_norm": 2.9375,
      "learning_rate": 2.821729998883671e-05,
      "loss": 0.8666,
      "step": 421220
    },
    {
      "epoch": 1.4763061329632317,
      "grad_norm": 2.8125,
      "learning_rate": 2.821665096017301e-05,
      "loss": 0.8692,
      "step": 421230
    },
    {
      "epoch": 1.4763411804701272,
      "grad_norm": 2.796875,
      "learning_rate": 2.8216001931509307e-05,
      "loss": 0.9048,
      "step": 421240
    },
    {
      "epoch": 1.4763762279770227,
      "grad_norm": 2.984375,
      "learning_rate": 2.8215352902845605e-05,
      "loss": 0.9014,
      "step": 421250
    },
    {
      "epoch": 1.4764112754839185,
      "grad_norm": 2.625,
      "learning_rate": 2.8214703874181903e-05,
      "loss": 0.7744,
      "step": 421260
    },
    {
      "epoch": 1.476446322990814,
      "grad_norm": 3.109375,
      "learning_rate": 2.82140548455182e-05,
      "loss": 0.8765,
      "step": 421270
    },
    {
      "epoch": 1.4764813704977096,
      "grad_norm": 2.921875,
      "learning_rate": 2.82134058168545e-05,
      "loss": 0.8249,
      "step": 421280
    },
    {
      "epoch": 1.4765164180046053,
      "grad_norm": 3.1875,
      "learning_rate": 2.8212756788190797e-05,
      "loss": 0.8716,
      "step": 421290
    },
    {
      "epoch": 1.4765514655115008,
      "grad_norm": 3.125,
      "learning_rate": 2.8212107759527095e-05,
      "loss": 0.9007,
      "step": 421300
    },
    {
      "epoch": 1.4765865130183964,
      "grad_norm": 3.046875,
      "learning_rate": 2.8211458730863393e-05,
      "loss": 0.9914,
      "step": 421310
    },
    {
      "epoch": 1.4766215605252921,
      "grad_norm": 2.890625,
      "learning_rate": 2.821080970219969e-05,
      "loss": 0.7854,
      "step": 421320
    },
    {
      "epoch": 1.4766566080321877,
      "grad_norm": 3.015625,
      "learning_rate": 2.8210160673535985e-05,
      "loss": 0.8548,
      "step": 421330
    },
    {
      "epoch": 1.4766916555390832,
      "grad_norm": 3.0625,
      "learning_rate": 2.8209511644872283e-05,
      "loss": 0.7398,
      "step": 421340
    },
    {
      "epoch": 1.4767267030459788,
      "grad_norm": 3.0,
      "learning_rate": 2.820886261620858e-05,
      "loss": 0.8689,
      "step": 421350
    },
    {
      "epoch": 1.4767617505528743,
      "grad_norm": 2.703125,
      "learning_rate": 2.820821358754488e-05,
      "loss": 0.8036,
      "step": 421360
    },
    {
      "epoch": 1.47679679805977,
      "grad_norm": 3.515625,
      "learning_rate": 2.8207564558881177e-05,
      "loss": 0.8716,
      "step": 421370
    },
    {
      "epoch": 1.4768318455666656,
      "grad_norm": 3.15625,
      "learning_rate": 2.8206915530217475e-05,
      "loss": 0.8287,
      "step": 421380
    },
    {
      "epoch": 1.4768668930735611,
      "grad_norm": 2.78125,
      "learning_rate": 2.8206266501553773e-05,
      "loss": 0.8352,
      "step": 421390
    },
    {
      "epoch": 1.4769019405804569,
      "grad_norm": 3.234375,
      "learning_rate": 2.820561747289007e-05,
      "loss": 0.7908,
      "step": 421400
    },
    {
      "epoch": 1.4769369880873524,
      "grad_norm": 3.078125,
      "learning_rate": 2.820496844422637e-05,
      "loss": 0.9248,
      "step": 421410
    },
    {
      "epoch": 1.476972035594248,
      "grad_norm": 2.796875,
      "learning_rate": 2.820431941556267e-05,
      "loss": 0.8144,
      "step": 421420
    },
    {
      "epoch": 1.4770070831011437,
      "grad_norm": 3.046875,
      "learning_rate": 2.820367038689897e-05,
      "loss": 0.8871,
      "step": 421430
    },
    {
      "epoch": 1.4770421306080392,
      "grad_norm": 3.0,
      "learning_rate": 2.8203021358235267e-05,
      "loss": 0.8472,
      "step": 421440
    },
    {
      "epoch": 1.4770771781149348,
      "grad_norm": 3.578125,
      "learning_rate": 2.8202372329571565e-05,
      "loss": 0.8073,
      "step": 421450
    },
    {
      "epoch": 1.4771122256218303,
      "grad_norm": 3.1875,
      "learning_rate": 2.8201723300907863e-05,
      "loss": 0.9059,
      "step": 421460
    },
    {
      "epoch": 1.477147273128726,
      "grad_norm": 2.84375,
      "learning_rate": 2.820107427224416e-05,
      "loss": 0.8604,
      "step": 421470
    },
    {
      "epoch": 1.4771823206356216,
      "grad_norm": 2.84375,
      "learning_rate": 2.820042524358046e-05,
      "loss": 0.8274,
      "step": 421480
    },
    {
      "epoch": 1.4772173681425171,
      "grad_norm": 3.09375,
      "learning_rate": 2.8199776214916757e-05,
      "loss": 0.8539,
      "step": 421490
    },
    {
      "epoch": 1.4772524156494127,
      "grad_norm": 3.0625,
      "learning_rate": 2.8199127186253055e-05,
      "loss": 0.8133,
      "step": 421500
    },
    {
      "epoch": 1.4772874631563084,
      "grad_norm": 2.453125,
      "learning_rate": 2.8198478157589353e-05,
      "loss": 0.8694,
      "step": 421510
    },
    {
      "epoch": 1.477322510663204,
      "grad_norm": 2.765625,
      "learning_rate": 2.819782912892565e-05,
      "loss": 0.8396,
      "step": 421520
    },
    {
      "epoch": 1.4773575581700995,
      "grad_norm": 2.953125,
      "learning_rate": 2.819718010026195e-05,
      "loss": 0.9115,
      "step": 421530
    },
    {
      "epoch": 1.4773926056769953,
      "grad_norm": 2.734375,
      "learning_rate": 2.8196531071598247e-05,
      "loss": 0.8029,
      "step": 421540
    },
    {
      "epoch": 1.4774276531838908,
      "grad_norm": 2.96875,
      "learning_rate": 2.8195882042934545e-05,
      "loss": 0.8473,
      "step": 421550
    },
    {
      "epoch": 1.4774627006907863,
      "grad_norm": 3.296875,
      "learning_rate": 2.8195233014270846e-05,
      "loss": 0.8499,
      "step": 421560
    },
    {
      "epoch": 1.477497748197682,
      "grad_norm": 2.40625,
      "learning_rate": 2.8194583985607144e-05,
      "loss": 0.7638,
      "step": 421570
    },
    {
      "epoch": 1.4775327957045776,
      "grad_norm": 2.859375,
      "learning_rate": 2.8193934956943442e-05,
      "loss": 0.9039,
      "step": 421580
    },
    {
      "epoch": 1.4775678432114732,
      "grad_norm": 3.171875,
      "learning_rate": 2.819328592827974e-05,
      "loss": 0.792,
      "step": 421590
    },
    {
      "epoch": 1.4776028907183687,
      "grad_norm": 2.546875,
      "learning_rate": 2.8192636899616038e-05,
      "loss": 0.9329,
      "step": 421600
    },
    {
      "epoch": 1.4776379382252642,
      "grad_norm": 2.953125,
      "learning_rate": 2.8191987870952336e-05,
      "loss": 0.8196,
      "step": 421610
    },
    {
      "epoch": 1.47767298573216,
      "grad_norm": 2.8125,
      "learning_rate": 2.8191338842288634e-05,
      "loss": 0.8344,
      "step": 421620
    },
    {
      "epoch": 1.4777080332390555,
      "grad_norm": 2.828125,
      "learning_rate": 2.8190689813624932e-05,
      "loss": 0.8438,
      "step": 421630
    },
    {
      "epoch": 1.477743080745951,
      "grad_norm": 2.703125,
      "learning_rate": 2.819004078496123e-05,
      "loss": 0.8489,
      "step": 421640
    },
    {
      "epoch": 1.4777781282528468,
      "grad_norm": 2.890625,
      "learning_rate": 2.8189391756297528e-05,
      "loss": 0.8396,
      "step": 421650
    },
    {
      "epoch": 1.4778131757597424,
      "grad_norm": 2.5625,
      "learning_rate": 2.8188742727633826e-05,
      "loss": 0.8726,
      "step": 421660
    },
    {
      "epoch": 1.477848223266638,
      "grad_norm": 2.8125,
      "learning_rate": 2.8188093698970124e-05,
      "loss": 0.8317,
      "step": 421670
    },
    {
      "epoch": 1.4778832707735337,
      "grad_norm": 2.859375,
      "learning_rate": 2.8187444670306422e-05,
      "loss": 0.8725,
      "step": 421680
    },
    {
      "epoch": 1.4779183182804292,
      "grad_norm": 2.828125,
      "learning_rate": 2.818679564164272e-05,
      "loss": 0.8123,
      "step": 421690
    },
    {
      "epoch": 1.4779533657873247,
      "grad_norm": 2.953125,
      "learning_rate": 2.818614661297902e-05,
      "loss": 0.8971,
      "step": 421700
    },
    {
      "epoch": 1.4779884132942203,
      "grad_norm": 2.859375,
      "learning_rate": 2.8185497584315313e-05,
      "loss": 0.8598,
      "step": 421710
    },
    {
      "epoch": 1.4780234608011158,
      "grad_norm": 3.046875,
      "learning_rate": 2.818484855565161e-05,
      "loss": 0.8176,
      "step": 421720
    },
    {
      "epoch": 1.4780585083080116,
      "grad_norm": 2.796875,
      "learning_rate": 2.818419952698791e-05,
      "loss": 0.8262,
      "step": 421730
    },
    {
      "epoch": 1.478093555814907,
      "grad_norm": 2.28125,
      "learning_rate": 2.8183550498324207e-05,
      "loss": 0.7683,
      "step": 421740
    },
    {
      "epoch": 1.4781286033218026,
      "grad_norm": 3.125,
      "learning_rate": 2.8182901469660505e-05,
      "loss": 0.7927,
      "step": 421750
    },
    {
      "epoch": 1.4781636508286984,
      "grad_norm": 3.109375,
      "learning_rate": 2.8182252440996803e-05,
      "loss": 0.8623,
      "step": 421760
    },
    {
      "epoch": 1.478198698335594,
      "grad_norm": 2.671875,
      "learning_rate": 2.81816034123331e-05,
      "loss": 0.8802,
      "step": 421770
    },
    {
      "epoch": 1.4782337458424895,
      "grad_norm": 2.84375,
      "learning_rate": 2.81809543836694e-05,
      "loss": 0.9011,
      "step": 421780
    },
    {
      "epoch": 1.4782687933493852,
      "grad_norm": 3.140625,
      "learning_rate": 2.81803053550057e-05,
      "loss": 0.8233,
      "step": 421790
    },
    {
      "epoch": 1.4783038408562807,
      "grad_norm": 3.234375,
      "learning_rate": 2.8179656326341998e-05,
      "loss": 0.8545,
      "step": 421800
    },
    {
      "epoch": 1.4783388883631763,
      "grad_norm": 2.4375,
      "learning_rate": 2.8179007297678296e-05,
      "loss": 0.8436,
      "step": 421810
    },
    {
      "epoch": 1.4783739358700718,
      "grad_norm": 3.609375,
      "learning_rate": 2.8178358269014594e-05,
      "loss": 0.8157,
      "step": 421820
    },
    {
      "epoch": 1.4784089833769674,
      "grad_norm": 2.90625,
      "learning_rate": 2.8177709240350892e-05,
      "loss": 0.9311,
      "step": 421830
    },
    {
      "epoch": 1.4784440308838631,
      "grad_norm": 3.515625,
      "learning_rate": 2.817706021168719e-05,
      "loss": 0.831,
      "step": 421840
    },
    {
      "epoch": 1.4784790783907586,
      "grad_norm": 3.15625,
      "learning_rate": 2.8176411183023488e-05,
      "loss": 0.8526,
      "step": 421850
    },
    {
      "epoch": 1.4785141258976542,
      "grad_norm": 2.609375,
      "learning_rate": 2.8175762154359786e-05,
      "loss": 0.7525,
      "step": 421860
    },
    {
      "epoch": 1.47854917340455,
      "grad_norm": 3.21875,
      "learning_rate": 2.8175113125696084e-05,
      "loss": 0.9155,
      "step": 421870
    },
    {
      "epoch": 1.4785842209114455,
      "grad_norm": 2.578125,
      "learning_rate": 2.8174464097032382e-05,
      "loss": 0.806,
      "step": 421880
    },
    {
      "epoch": 1.478619268418341,
      "grad_norm": 2.609375,
      "learning_rate": 2.817381506836868e-05,
      "loss": 0.8433,
      "step": 421890
    },
    {
      "epoch": 1.4786543159252368,
      "grad_norm": 2.890625,
      "learning_rate": 2.8173166039704978e-05,
      "loss": 0.7868,
      "step": 421900
    },
    {
      "epoch": 1.4786893634321323,
      "grad_norm": 2.828125,
      "learning_rate": 2.8172517011041276e-05,
      "loss": 0.802,
      "step": 421910
    },
    {
      "epoch": 1.4787244109390278,
      "grad_norm": 3.28125,
      "learning_rate": 2.8171867982377574e-05,
      "loss": 0.7258,
      "step": 421920
    },
    {
      "epoch": 1.4787594584459234,
      "grad_norm": 2.65625,
      "learning_rate": 2.8171218953713875e-05,
      "loss": 0.7851,
      "step": 421930
    },
    {
      "epoch": 1.478794505952819,
      "grad_norm": 2.203125,
      "learning_rate": 2.8170569925050173e-05,
      "loss": 0.8623,
      "step": 421940
    },
    {
      "epoch": 1.4788295534597147,
      "grad_norm": 2.8125,
      "learning_rate": 2.816992089638647e-05,
      "loss": 0.7935,
      "step": 421950
    },
    {
      "epoch": 1.4788646009666102,
      "grad_norm": 2.65625,
      "learning_rate": 2.816927186772277e-05,
      "loss": 0.8621,
      "step": 421960
    },
    {
      "epoch": 1.4788996484735057,
      "grad_norm": 3.46875,
      "learning_rate": 2.8168622839059067e-05,
      "loss": 0.8674,
      "step": 421970
    },
    {
      "epoch": 1.4789346959804015,
      "grad_norm": 2.421875,
      "learning_rate": 2.8167973810395365e-05,
      "loss": 0.8692,
      "step": 421980
    },
    {
      "epoch": 1.478969743487297,
      "grad_norm": 2.5,
      "learning_rate": 2.8167324781731663e-05,
      "loss": 0.8096,
      "step": 421990
    },
    {
      "epoch": 1.4790047909941926,
      "grad_norm": 2.78125,
      "learning_rate": 2.816667575306796e-05,
      "loss": 0.8568,
      "step": 422000
    },
    {
      "epoch": 1.4790398385010883,
      "grad_norm": 3.046875,
      "learning_rate": 2.816602672440426e-05,
      "loss": 0.8595,
      "step": 422010
    },
    {
      "epoch": 1.4790748860079839,
      "grad_norm": 3.0625,
      "learning_rate": 2.8165377695740557e-05,
      "loss": 0.8052,
      "step": 422020
    },
    {
      "epoch": 1.4791099335148794,
      "grad_norm": 2.75,
      "learning_rate": 2.8164728667076855e-05,
      "loss": 0.8236,
      "step": 422030
    },
    {
      "epoch": 1.479144981021775,
      "grad_norm": 2.84375,
      "learning_rate": 2.8164079638413153e-05,
      "loss": 0.8596,
      "step": 422040
    },
    {
      "epoch": 1.4791800285286705,
      "grad_norm": 2.875,
      "learning_rate": 2.816343060974945e-05,
      "loss": 0.8541,
      "step": 422050
    },
    {
      "epoch": 1.4792150760355662,
      "grad_norm": 2.859375,
      "learning_rate": 2.816278158108575e-05,
      "loss": 0.8264,
      "step": 422060
    },
    {
      "epoch": 1.4792501235424618,
      "grad_norm": 2.90625,
      "learning_rate": 2.816213255242205e-05,
      "loss": 0.8218,
      "step": 422070
    },
    {
      "epoch": 1.4792851710493573,
      "grad_norm": 3.140625,
      "learning_rate": 2.8161483523758342e-05,
      "loss": 0.7534,
      "step": 422080
    },
    {
      "epoch": 1.479320218556253,
      "grad_norm": 2.625,
      "learning_rate": 2.816083449509464e-05,
      "loss": 0.8154,
      "step": 422090
    },
    {
      "epoch": 1.4793552660631486,
      "grad_norm": 3.21875,
      "learning_rate": 2.8160185466430938e-05,
      "loss": 0.793,
      "step": 422100
    },
    {
      "epoch": 1.4793903135700441,
      "grad_norm": 3.203125,
      "learning_rate": 2.8159536437767236e-05,
      "loss": 0.9066,
      "step": 422110
    },
    {
      "epoch": 1.47942536107694,
      "grad_norm": 2.734375,
      "learning_rate": 2.8158887409103534e-05,
      "loss": 0.8449,
      "step": 422120
    },
    {
      "epoch": 1.4794604085838354,
      "grad_norm": 2.671875,
      "learning_rate": 2.8158238380439832e-05,
      "loss": 0.8342,
      "step": 422130
    },
    {
      "epoch": 1.479495456090731,
      "grad_norm": 2.65625,
      "learning_rate": 2.815758935177613e-05,
      "loss": 0.9648,
      "step": 422140
    },
    {
      "epoch": 1.4795305035976265,
      "grad_norm": 2.796875,
      "learning_rate": 2.8156940323112428e-05,
      "loss": 0.8739,
      "step": 422150
    },
    {
      "epoch": 1.4795655511045223,
      "grad_norm": 2.9375,
      "learning_rate": 2.815629129444873e-05,
      "loss": 0.8531,
      "step": 422160
    },
    {
      "epoch": 1.4796005986114178,
      "grad_norm": 2.921875,
      "learning_rate": 2.8155642265785027e-05,
      "loss": 0.8161,
      "step": 422170
    },
    {
      "epoch": 1.4796356461183133,
      "grad_norm": 3.203125,
      "learning_rate": 2.8154993237121325e-05,
      "loss": 0.8388,
      "step": 422180
    },
    {
      "epoch": 1.4796706936252089,
      "grad_norm": 2.375,
      "learning_rate": 2.8154344208457623e-05,
      "loss": 0.7917,
      "step": 422190
    },
    {
      "epoch": 1.4797057411321046,
      "grad_norm": 3.15625,
      "learning_rate": 2.815369517979392e-05,
      "loss": 0.9686,
      "step": 422200
    },
    {
      "epoch": 1.4797407886390002,
      "grad_norm": 2.78125,
      "learning_rate": 2.815304615113022e-05,
      "loss": 0.851,
      "step": 422210
    },
    {
      "epoch": 1.4797758361458957,
      "grad_norm": 2.84375,
      "learning_rate": 2.8152397122466517e-05,
      "loss": 0.8879,
      "step": 422220
    },
    {
      "epoch": 1.4798108836527915,
      "grad_norm": 2.65625,
      "learning_rate": 2.8151748093802815e-05,
      "loss": 0.8004,
      "step": 422230
    },
    {
      "epoch": 1.479845931159687,
      "grad_norm": 3.140625,
      "learning_rate": 2.8151099065139113e-05,
      "loss": 0.8865,
      "step": 422240
    },
    {
      "epoch": 1.4798809786665825,
      "grad_norm": 2.53125,
      "learning_rate": 2.815045003647541e-05,
      "loss": 0.822,
      "step": 422250
    },
    {
      "epoch": 1.4799160261734783,
      "grad_norm": 2.671875,
      "learning_rate": 2.814980100781171e-05,
      "loss": 0.8659,
      "step": 422260
    },
    {
      "epoch": 1.4799510736803738,
      "grad_norm": 3.0625,
      "learning_rate": 2.8149151979148007e-05,
      "loss": 0.8196,
      "step": 422270
    },
    {
      "epoch": 1.4799861211872694,
      "grad_norm": 2.6875,
      "learning_rate": 2.8148502950484305e-05,
      "loss": 0.832,
      "step": 422280
    },
    {
      "epoch": 1.480021168694165,
      "grad_norm": 2.796875,
      "learning_rate": 2.8147853921820603e-05,
      "loss": 0.9348,
      "step": 422290
    },
    {
      "epoch": 1.4800562162010604,
      "grad_norm": 2.703125,
      "learning_rate": 2.8147204893156905e-05,
      "loss": 0.8679,
      "step": 422300
    },
    {
      "epoch": 1.4800912637079562,
      "grad_norm": 3.140625,
      "learning_rate": 2.8146555864493203e-05,
      "loss": 0.8204,
      "step": 422310
    },
    {
      "epoch": 1.4801263112148517,
      "grad_norm": 2.796875,
      "learning_rate": 2.81459068358295e-05,
      "loss": 0.8727,
      "step": 422320
    },
    {
      "epoch": 1.4801613587217473,
      "grad_norm": 2.78125,
      "learning_rate": 2.81452578071658e-05,
      "loss": 0.8624,
      "step": 422330
    },
    {
      "epoch": 1.480196406228643,
      "grad_norm": 3.015625,
      "learning_rate": 2.8144608778502097e-05,
      "loss": 0.8752,
      "step": 422340
    },
    {
      "epoch": 1.4802314537355385,
      "grad_norm": 3.15625,
      "learning_rate": 2.8143959749838395e-05,
      "loss": 0.8026,
      "step": 422350
    },
    {
      "epoch": 1.480266501242434,
      "grad_norm": 2.71875,
      "learning_rate": 2.8143310721174693e-05,
      "loss": 0.8762,
      "step": 422360
    },
    {
      "epoch": 1.4803015487493298,
      "grad_norm": 2.859375,
      "learning_rate": 2.814266169251099e-05,
      "loss": 0.8354,
      "step": 422370
    },
    {
      "epoch": 1.4803365962562254,
      "grad_norm": 3.375,
      "learning_rate": 2.814201266384729e-05,
      "loss": 0.8637,
      "step": 422380
    },
    {
      "epoch": 1.480371643763121,
      "grad_norm": 2.34375,
      "learning_rate": 2.8141363635183587e-05,
      "loss": 0.8304,
      "step": 422390
    },
    {
      "epoch": 1.4804066912700165,
      "grad_norm": 2.921875,
      "learning_rate": 2.8140714606519885e-05,
      "loss": 0.8591,
      "step": 422400
    },
    {
      "epoch": 1.480441738776912,
      "grad_norm": 2.828125,
      "learning_rate": 2.8140065577856183e-05,
      "loss": 0.875,
      "step": 422410
    },
    {
      "epoch": 1.4804767862838077,
      "grad_norm": 2.84375,
      "learning_rate": 2.813941654919248e-05,
      "loss": 0.8645,
      "step": 422420
    },
    {
      "epoch": 1.4805118337907033,
      "grad_norm": 2.796875,
      "learning_rate": 2.813876752052878e-05,
      "loss": 0.8399,
      "step": 422430
    },
    {
      "epoch": 1.4805468812975988,
      "grad_norm": 2.921875,
      "learning_rate": 2.813811849186508e-05,
      "loss": 0.8566,
      "step": 422440
    },
    {
      "epoch": 1.4805819288044946,
      "grad_norm": 3.015625,
      "learning_rate": 2.8137469463201378e-05,
      "loss": 0.8969,
      "step": 422450
    },
    {
      "epoch": 1.48061697631139,
      "grad_norm": 2.953125,
      "learning_rate": 2.813682043453767e-05,
      "loss": 0.9439,
      "step": 422460
    },
    {
      "epoch": 1.4806520238182856,
      "grad_norm": 2.796875,
      "learning_rate": 2.8136171405873967e-05,
      "loss": 0.8348,
      "step": 422470
    },
    {
      "epoch": 1.4806870713251814,
      "grad_norm": 2.640625,
      "learning_rate": 2.8135522377210265e-05,
      "loss": 0.8116,
      "step": 422480
    },
    {
      "epoch": 1.480722118832077,
      "grad_norm": 2.671875,
      "learning_rate": 2.8134873348546563e-05,
      "loss": 0.9095,
      "step": 422490
    },
    {
      "epoch": 1.4807571663389725,
      "grad_norm": 2.953125,
      "learning_rate": 2.813422431988286e-05,
      "loss": 0.8824,
      "step": 422500
    },
    {
      "epoch": 1.480792213845868,
      "grad_norm": 3.328125,
      "learning_rate": 2.813357529121916e-05,
      "loss": 0.8848,
      "step": 422510
    },
    {
      "epoch": 1.4808272613527635,
      "grad_norm": 2.59375,
      "learning_rate": 2.813292626255546e-05,
      "loss": 0.8456,
      "step": 422520
    },
    {
      "epoch": 1.4808623088596593,
      "grad_norm": 3.09375,
      "learning_rate": 2.813227723389176e-05,
      "loss": 0.9,
      "step": 422530
    },
    {
      "epoch": 1.4808973563665548,
      "grad_norm": 3.0,
      "learning_rate": 2.8131628205228057e-05,
      "loss": 0.8416,
      "step": 422540
    },
    {
      "epoch": 1.4809324038734504,
      "grad_norm": 2.734375,
      "learning_rate": 2.8130979176564355e-05,
      "loss": 0.8766,
      "step": 422550
    },
    {
      "epoch": 1.4809674513803461,
      "grad_norm": 3.078125,
      "learning_rate": 2.8130330147900653e-05,
      "loss": 0.9062,
      "step": 422560
    },
    {
      "epoch": 1.4810024988872417,
      "grad_norm": 3.484375,
      "learning_rate": 2.812968111923695e-05,
      "loss": 0.8867,
      "step": 422570
    },
    {
      "epoch": 1.4810375463941372,
      "grad_norm": 2.6875,
      "learning_rate": 2.812903209057325e-05,
      "loss": 0.8687,
      "step": 422580
    },
    {
      "epoch": 1.481072593901033,
      "grad_norm": 2.609375,
      "learning_rate": 2.8128383061909547e-05,
      "loss": 0.8194,
      "step": 422590
    },
    {
      "epoch": 1.4811076414079285,
      "grad_norm": 2.765625,
      "learning_rate": 2.8127734033245845e-05,
      "loss": 0.8455,
      "step": 422600
    },
    {
      "epoch": 1.481142688914824,
      "grad_norm": 2.375,
      "learning_rate": 2.8127085004582143e-05,
      "loss": 0.7561,
      "step": 422610
    },
    {
      "epoch": 1.4811777364217196,
      "grad_norm": 2.46875,
      "learning_rate": 2.812643597591844e-05,
      "loss": 0.7413,
      "step": 422620
    },
    {
      "epoch": 1.481212783928615,
      "grad_norm": 2.625,
      "learning_rate": 2.812578694725474e-05,
      "loss": 0.879,
      "step": 422630
    },
    {
      "epoch": 1.4812478314355109,
      "grad_norm": 2.59375,
      "learning_rate": 2.8125137918591037e-05,
      "loss": 0.8396,
      "step": 422640
    },
    {
      "epoch": 1.4812828789424064,
      "grad_norm": 3.1875,
      "learning_rate": 2.8124488889927335e-05,
      "loss": 0.8668,
      "step": 422650
    },
    {
      "epoch": 1.481317926449302,
      "grad_norm": 2.625,
      "learning_rate": 2.8123839861263636e-05,
      "loss": 0.8114,
      "step": 422660
    },
    {
      "epoch": 1.4813529739561977,
      "grad_norm": 3.390625,
      "learning_rate": 2.8123190832599934e-05,
      "loss": 0.8546,
      "step": 422670
    },
    {
      "epoch": 1.4813880214630932,
      "grad_norm": 2.96875,
      "learning_rate": 2.8122541803936232e-05,
      "loss": 0.8356,
      "step": 422680
    },
    {
      "epoch": 1.4814230689699888,
      "grad_norm": 2.96875,
      "learning_rate": 2.812189277527253e-05,
      "loss": 0.7805,
      "step": 422690
    },
    {
      "epoch": 1.4814581164768845,
      "grad_norm": 2.5,
      "learning_rate": 2.8121243746608828e-05,
      "loss": 0.812,
      "step": 422700
    },
    {
      "epoch": 1.48149316398378,
      "grad_norm": 3.109375,
      "learning_rate": 2.8120594717945126e-05,
      "loss": 0.8487,
      "step": 422710
    },
    {
      "epoch": 1.4815282114906756,
      "grad_norm": 3.109375,
      "learning_rate": 2.8119945689281424e-05,
      "loss": 0.8863,
      "step": 422720
    },
    {
      "epoch": 1.4815632589975711,
      "grad_norm": 3.015625,
      "learning_rate": 2.8119296660617722e-05,
      "loss": 0.8866,
      "step": 422730
    },
    {
      "epoch": 1.4815983065044667,
      "grad_norm": 2.46875,
      "learning_rate": 2.811864763195402e-05,
      "loss": 0.8449,
      "step": 422740
    },
    {
      "epoch": 1.4816333540113624,
      "grad_norm": 3.09375,
      "learning_rate": 2.8117998603290318e-05,
      "loss": 0.8993,
      "step": 422750
    },
    {
      "epoch": 1.481668401518258,
      "grad_norm": 2.625,
      "learning_rate": 2.8117349574626616e-05,
      "loss": 0.7929,
      "step": 422760
    },
    {
      "epoch": 1.4817034490251535,
      "grad_norm": 2.796875,
      "learning_rate": 2.8116700545962914e-05,
      "loss": 0.9111,
      "step": 422770
    },
    {
      "epoch": 1.4817384965320493,
      "grad_norm": 3.171875,
      "learning_rate": 2.8116051517299212e-05,
      "loss": 0.8636,
      "step": 422780
    },
    {
      "epoch": 1.4817735440389448,
      "grad_norm": 2.890625,
      "learning_rate": 2.811540248863551e-05,
      "loss": 0.8485,
      "step": 422790
    },
    {
      "epoch": 1.4818085915458403,
      "grad_norm": 2.75,
      "learning_rate": 2.811475345997181e-05,
      "loss": 0.7867,
      "step": 422800
    },
    {
      "epoch": 1.481843639052736,
      "grad_norm": 3.0,
      "learning_rate": 2.811410443130811e-05,
      "loss": 0.8513,
      "step": 422810
    },
    {
      "epoch": 1.4818786865596316,
      "grad_norm": 3.203125,
      "learning_rate": 2.8113455402644407e-05,
      "loss": 0.8745,
      "step": 422820
    },
    {
      "epoch": 1.4819137340665272,
      "grad_norm": 3.015625,
      "learning_rate": 2.8112806373980705e-05,
      "loss": 0.9082,
      "step": 422830
    },
    {
      "epoch": 1.481948781573423,
      "grad_norm": 3.234375,
      "learning_rate": 2.8112157345316997e-05,
      "loss": 0.8073,
      "step": 422840
    },
    {
      "epoch": 1.4819838290803184,
      "grad_norm": 2.84375,
      "learning_rate": 2.8111508316653295e-05,
      "loss": 0.8608,
      "step": 422850
    },
    {
      "epoch": 1.482018876587214,
      "grad_norm": 3.09375,
      "learning_rate": 2.8110859287989593e-05,
      "loss": 0.8543,
      "step": 422860
    },
    {
      "epoch": 1.4820539240941095,
      "grad_norm": 2.75,
      "learning_rate": 2.811021025932589e-05,
      "loss": 0.8113,
      "step": 422870
    },
    {
      "epoch": 1.482088971601005,
      "grad_norm": 2.734375,
      "learning_rate": 2.810956123066219e-05,
      "loss": 0.828,
      "step": 422880
    },
    {
      "epoch": 1.4821240191079008,
      "grad_norm": 2.84375,
      "learning_rate": 2.810891220199849e-05,
      "loss": 0.8845,
      "step": 422890
    },
    {
      "epoch": 1.4821590666147963,
      "grad_norm": 3.078125,
      "learning_rate": 2.8108263173334788e-05,
      "loss": 0.8597,
      "step": 422900
    },
    {
      "epoch": 1.4821941141216919,
      "grad_norm": 2.90625,
      "learning_rate": 2.8107614144671086e-05,
      "loss": 0.8857,
      "step": 422910
    },
    {
      "epoch": 1.4822291616285876,
      "grad_norm": 2.84375,
      "learning_rate": 2.8106965116007384e-05,
      "loss": 0.8137,
      "step": 422920
    },
    {
      "epoch": 1.4822642091354832,
      "grad_norm": 3.296875,
      "learning_rate": 2.8106316087343682e-05,
      "loss": 0.9784,
      "step": 422930
    },
    {
      "epoch": 1.4822992566423787,
      "grad_norm": 3.484375,
      "learning_rate": 2.810566705867998e-05,
      "loss": 0.8476,
      "step": 422940
    },
    {
      "epoch": 1.4823343041492745,
      "grad_norm": 2.625,
      "learning_rate": 2.8105018030016278e-05,
      "loss": 0.841,
      "step": 422950
    },
    {
      "epoch": 1.48236935165617,
      "grad_norm": 3.296875,
      "learning_rate": 2.8104369001352576e-05,
      "loss": 0.9775,
      "step": 422960
    },
    {
      "epoch": 1.4824043991630655,
      "grad_norm": 2.90625,
      "learning_rate": 2.8103719972688874e-05,
      "loss": 0.8012,
      "step": 422970
    },
    {
      "epoch": 1.482439446669961,
      "grad_norm": 3.390625,
      "learning_rate": 2.8103070944025172e-05,
      "loss": 0.8146,
      "step": 422980
    },
    {
      "epoch": 1.4824744941768566,
      "grad_norm": 3.34375,
      "learning_rate": 2.810242191536147e-05,
      "loss": 0.8062,
      "step": 422990
    },
    {
      "epoch": 1.4825095416837524,
      "grad_norm": 2.953125,
      "learning_rate": 2.8101772886697768e-05,
      "loss": 0.802,
      "step": 423000
    },
    {
      "epoch": 1.482544589190648,
      "grad_norm": 3.203125,
      "learning_rate": 2.8101123858034066e-05,
      "loss": 0.8532,
      "step": 423010
    },
    {
      "epoch": 1.4825796366975434,
      "grad_norm": 2.6875,
      "learning_rate": 2.8100474829370364e-05,
      "loss": 0.8166,
      "step": 423020
    },
    {
      "epoch": 1.4826146842044392,
      "grad_norm": 2.953125,
      "learning_rate": 2.8099825800706665e-05,
      "loss": 0.7451,
      "step": 423030
    },
    {
      "epoch": 1.4826497317113347,
      "grad_norm": 2.984375,
      "learning_rate": 2.8099176772042963e-05,
      "loss": 0.9176,
      "step": 423040
    },
    {
      "epoch": 1.4826847792182303,
      "grad_norm": 3.4375,
      "learning_rate": 2.809852774337926e-05,
      "loss": 0.8678,
      "step": 423050
    },
    {
      "epoch": 1.482719826725126,
      "grad_norm": 4.4375,
      "learning_rate": 2.809787871471556e-05,
      "loss": 0.8697,
      "step": 423060
    },
    {
      "epoch": 1.4827548742320216,
      "grad_norm": 2.984375,
      "learning_rate": 2.8097229686051857e-05,
      "loss": 0.859,
      "step": 423070
    },
    {
      "epoch": 1.482789921738917,
      "grad_norm": 2.875,
      "learning_rate": 2.8096580657388155e-05,
      "loss": 0.8209,
      "step": 423080
    },
    {
      "epoch": 1.4828249692458126,
      "grad_norm": 2.765625,
      "learning_rate": 2.8095931628724453e-05,
      "loss": 0.7969,
      "step": 423090
    },
    {
      "epoch": 1.4828600167527082,
      "grad_norm": 3.15625,
      "learning_rate": 2.809528260006075e-05,
      "loss": 0.8533,
      "step": 423100
    },
    {
      "epoch": 1.482895064259604,
      "grad_norm": 2.59375,
      "learning_rate": 2.809463357139705e-05,
      "loss": 0.843,
      "step": 423110
    },
    {
      "epoch": 1.4829301117664995,
      "grad_norm": 2.890625,
      "learning_rate": 2.8093984542733347e-05,
      "loss": 0.7859,
      "step": 423120
    },
    {
      "epoch": 1.482965159273395,
      "grad_norm": 2.796875,
      "learning_rate": 2.8093335514069645e-05,
      "loss": 0.7966,
      "step": 423130
    },
    {
      "epoch": 1.4830002067802908,
      "grad_norm": 2.796875,
      "learning_rate": 2.8092686485405943e-05,
      "loss": 0.853,
      "step": 423140
    },
    {
      "epoch": 1.4830352542871863,
      "grad_norm": 2.671875,
      "learning_rate": 2.809203745674224e-05,
      "loss": 0.9128,
      "step": 423150
    },
    {
      "epoch": 1.4830703017940818,
      "grad_norm": 2.9375,
      "learning_rate": 2.809138842807854e-05,
      "loss": 0.8469,
      "step": 423160
    },
    {
      "epoch": 1.4831053493009776,
      "grad_norm": 3.03125,
      "learning_rate": 2.809073939941484e-05,
      "loss": 0.8883,
      "step": 423170
    },
    {
      "epoch": 1.4831403968078731,
      "grad_norm": 3.390625,
      "learning_rate": 2.809009037075114e-05,
      "loss": 0.8688,
      "step": 423180
    },
    {
      "epoch": 1.4831754443147687,
      "grad_norm": 2.984375,
      "learning_rate": 2.8089441342087437e-05,
      "loss": 0.8674,
      "step": 423190
    },
    {
      "epoch": 1.4832104918216642,
      "grad_norm": 3.078125,
      "learning_rate": 2.8088792313423735e-05,
      "loss": 0.8141,
      "step": 423200
    },
    {
      "epoch": 1.4832455393285597,
      "grad_norm": 2.5,
      "learning_rate": 2.8088143284760026e-05,
      "loss": 0.8179,
      "step": 423210
    },
    {
      "epoch": 1.4832805868354555,
      "grad_norm": 2.84375,
      "learning_rate": 2.8087494256096324e-05,
      "loss": 0.8309,
      "step": 423220
    },
    {
      "epoch": 1.483315634342351,
      "grad_norm": 2.78125,
      "learning_rate": 2.8086845227432622e-05,
      "loss": 0.845,
      "step": 423230
    },
    {
      "epoch": 1.4833506818492466,
      "grad_norm": 2.796875,
      "learning_rate": 2.808619619876892e-05,
      "loss": 0.8827,
      "step": 423240
    },
    {
      "epoch": 1.4833857293561423,
      "grad_norm": 3.46875,
      "learning_rate": 2.8085547170105218e-05,
      "loss": 0.9317,
      "step": 423250
    },
    {
      "epoch": 1.4834207768630379,
      "grad_norm": 2.6875,
      "learning_rate": 2.808489814144152e-05,
      "loss": 0.8613,
      "step": 423260
    },
    {
      "epoch": 1.4834558243699334,
      "grad_norm": 3.1875,
      "learning_rate": 2.8084249112777817e-05,
      "loss": 0.9376,
      "step": 423270
    },
    {
      "epoch": 1.4834908718768292,
      "grad_norm": 3.015625,
      "learning_rate": 2.8083600084114115e-05,
      "loss": 0.858,
      "step": 423280
    },
    {
      "epoch": 1.4835259193837247,
      "grad_norm": 2.734375,
      "learning_rate": 2.8082951055450413e-05,
      "loss": 0.8118,
      "step": 423290
    },
    {
      "epoch": 1.4835609668906202,
      "grad_norm": 2.515625,
      "learning_rate": 2.808230202678671e-05,
      "loss": 0.8736,
      "step": 423300
    },
    {
      "epoch": 1.4835960143975158,
      "grad_norm": 2.578125,
      "learning_rate": 2.808165299812301e-05,
      "loss": 0.8596,
      "step": 423310
    },
    {
      "epoch": 1.4836310619044113,
      "grad_norm": 3.25,
      "learning_rate": 2.8081003969459307e-05,
      "loss": 0.8323,
      "step": 423320
    },
    {
      "epoch": 1.483666109411307,
      "grad_norm": 3.0,
      "learning_rate": 2.8080354940795605e-05,
      "loss": 0.8391,
      "step": 423330
    },
    {
      "epoch": 1.4837011569182026,
      "grad_norm": 3.03125,
      "learning_rate": 2.8079705912131903e-05,
      "loss": 0.8997,
      "step": 423340
    },
    {
      "epoch": 1.4837362044250981,
      "grad_norm": 3.046875,
      "learning_rate": 2.80790568834682e-05,
      "loss": 0.9409,
      "step": 423350
    },
    {
      "epoch": 1.4837712519319939,
      "grad_norm": 2.875,
      "learning_rate": 2.80784078548045e-05,
      "loss": 0.8464,
      "step": 423360
    },
    {
      "epoch": 1.4838062994388894,
      "grad_norm": 3.3125,
      "learning_rate": 2.8077758826140797e-05,
      "loss": 0.82,
      "step": 423370
    },
    {
      "epoch": 1.483841346945785,
      "grad_norm": 3.03125,
      "learning_rate": 2.8077109797477095e-05,
      "loss": 0.9279,
      "step": 423380
    },
    {
      "epoch": 1.4838763944526807,
      "grad_norm": 3.1875,
      "learning_rate": 2.8076460768813393e-05,
      "loss": 0.9025,
      "step": 423390
    },
    {
      "epoch": 1.4839114419595762,
      "grad_norm": 2.90625,
      "learning_rate": 2.8075811740149695e-05,
      "loss": 0.8234,
      "step": 423400
    },
    {
      "epoch": 1.4839464894664718,
      "grad_norm": 2.890625,
      "learning_rate": 2.8075162711485993e-05,
      "loss": 0.86,
      "step": 423410
    },
    {
      "epoch": 1.4839815369733673,
      "grad_norm": 2.390625,
      "learning_rate": 2.807451368282229e-05,
      "loss": 0.8546,
      "step": 423420
    },
    {
      "epoch": 1.484016584480263,
      "grad_norm": 3.109375,
      "learning_rate": 2.807386465415859e-05,
      "loss": 0.7983,
      "step": 423430
    },
    {
      "epoch": 1.4840516319871586,
      "grad_norm": 2.625,
      "learning_rate": 2.8073215625494887e-05,
      "loss": 0.8521,
      "step": 423440
    },
    {
      "epoch": 1.4840866794940542,
      "grad_norm": 3.265625,
      "learning_rate": 2.8072566596831185e-05,
      "loss": 0.895,
      "step": 423450
    },
    {
      "epoch": 1.4841217270009497,
      "grad_norm": 2.609375,
      "learning_rate": 2.8071917568167483e-05,
      "loss": 0.837,
      "step": 423460
    },
    {
      "epoch": 1.4841567745078454,
      "grad_norm": 2.78125,
      "learning_rate": 2.807126853950378e-05,
      "loss": 0.8571,
      "step": 423470
    },
    {
      "epoch": 1.484191822014741,
      "grad_norm": 3.015625,
      "learning_rate": 2.807061951084008e-05,
      "loss": 0.7098,
      "step": 423480
    },
    {
      "epoch": 1.4842268695216365,
      "grad_norm": 3.296875,
      "learning_rate": 2.8069970482176377e-05,
      "loss": 0.8444,
      "step": 423490
    },
    {
      "epoch": 1.4842619170285323,
      "grad_norm": 2.796875,
      "learning_rate": 2.8069321453512675e-05,
      "loss": 0.841,
      "step": 423500
    },
    {
      "epoch": 1.4842969645354278,
      "grad_norm": 3.3125,
      "learning_rate": 2.8068672424848973e-05,
      "loss": 0.8827,
      "step": 423510
    },
    {
      "epoch": 1.4843320120423233,
      "grad_norm": 3.109375,
      "learning_rate": 2.806802339618527e-05,
      "loss": 0.8826,
      "step": 423520
    },
    {
      "epoch": 1.484367059549219,
      "grad_norm": 2.796875,
      "learning_rate": 2.8067374367521572e-05,
      "loss": 0.8617,
      "step": 423530
    },
    {
      "epoch": 1.4844021070561146,
      "grad_norm": 2.90625,
      "learning_rate": 2.806672533885787e-05,
      "loss": 0.9012,
      "step": 423540
    },
    {
      "epoch": 1.4844371545630102,
      "grad_norm": 3.125,
      "learning_rate": 2.8066076310194168e-05,
      "loss": 0.8379,
      "step": 423550
    },
    {
      "epoch": 1.4844722020699057,
      "grad_norm": 2.703125,
      "learning_rate": 2.8065427281530466e-05,
      "loss": 0.8493,
      "step": 423560
    },
    {
      "epoch": 1.4845072495768012,
      "grad_norm": 2.765625,
      "learning_rate": 2.8064778252866764e-05,
      "loss": 0.8826,
      "step": 423570
    },
    {
      "epoch": 1.484542297083697,
      "grad_norm": 2.765625,
      "learning_rate": 2.8064129224203062e-05,
      "loss": 0.8684,
      "step": 423580
    },
    {
      "epoch": 1.4845773445905925,
      "grad_norm": 3.21875,
      "learning_rate": 2.8063480195539353e-05,
      "loss": 0.8331,
      "step": 423590
    },
    {
      "epoch": 1.484612392097488,
      "grad_norm": 2.640625,
      "learning_rate": 2.806283116687565e-05,
      "loss": 0.7461,
      "step": 423600
    },
    {
      "epoch": 1.4846474396043838,
      "grad_norm": 2.34375,
      "learning_rate": 2.806218213821195e-05,
      "loss": 0.8512,
      "step": 423610
    },
    {
      "epoch": 1.4846824871112794,
      "grad_norm": 2.8125,
      "learning_rate": 2.806153310954825e-05,
      "loss": 0.8858,
      "step": 423620
    },
    {
      "epoch": 1.484717534618175,
      "grad_norm": 2.953125,
      "learning_rate": 2.806088408088455e-05,
      "loss": 0.8769,
      "step": 423630
    },
    {
      "epoch": 1.4847525821250707,
      "grad_norm": 2.90625,
      "learning_rate": 2.8060235052220847e-05,
      "loss": 0.8594,
      "step": 423640
    },
    {
      "epoch": 1.4847876296319662,
      "grad_norm": 3.390625,
      "learning_rate": 2.8059586023557145e-05,
      "loss": 0.8662,
      "step": 423650
    },
    {
      "epoch": 1.4848226771388617,
      "grad_norm": 2.984375,
      "learning_rate": 2.8058936994893443e-05,
      "loss": 0.8114,
      "step": 423660
    },
    {
      "epoch": 1.4848577246457573,
      "grad_norm": 3.125,
      "learning_rate": 2.805828796622974e-05,
      "loss": 0.8634,
      "step": 423670
    },
    {
      "epoch": 1.4848927721526528,
      "grad_norm": 3.140625,
      "learning_rate": 2.805763893756604e-05,
      "loss": 0.8199,
      "step": 423680
    },
    {
      "epoch": 1.4849278196595486,
      "grad_norm": 2.71875,
      "learning_rate": 2.8056989908902337e-05,
      "loss": 0.8387,
      "step": 423690
    },
    {
      "epoch": 1.484962867166444,
      "grad_norm": 2.671875,
      "learning_rate": 2.8056340880238635e-05,
      "loss": 0.7515,
      "step": 423700
    },
    {
      "epoch": 1.4849979146733396,
      "grad_norm": 3.171875,
      "learning_rate": 2.8055691851574933e-05,
      "loss": 0.8655,
      "step": 423710
    },
    {
      "epoch": 1.4850329621802354,
      "grad_norm": 2.75,
      "learning_rate": 2.805504282291123e-05,
      "loss": 0.8429,
      "step": 423720
    },
    {
      "epoch": 1.485068009687131,
      "grad_norm": 3.109375,
      "learning_rate": 2.805439379424753e-05,
      "loss": 0.8741,
      "step": 423730
    },
    {
      "epoch": 1.4851030571940265,
      "grad_norm": 2.9375,
      "learning_rate": 2.8053744765583827e-05,
      "loss": 0.9188,
      "step": 423740
    },
    {
      "epoch": 1.4851381047009222,
      "grad_norm": 3.1875,
      "learning_rate": 2.8053095736920125e-05,
      "loss": 0.8622,
      "step": 423750
    },
    {
      "epoch": 1.4851731522078178,
      "grad_norm": 2.671875,
      "learning_rate": 2.8052446708256426e-05,
      "loss": 0.8529,
      "step": 423760
    },
    {
      "epoch": 1.4852081997147133,
      "grad_norm": 2.75,
      "learning_rate": 2.8051797679592724e-05,
      "loss": 0.774,
      "step": 423770
    },
    {
      "epoch": 1.4852432472216088,
      "grad_norm": 2.8125,
      "learning_rate": 2.8051148650929022e-05,
      "loss": 0.8586,
      "step": 423780
    },
    {
      "epoch": 1.4852782947285044,
      "grad_norm": 2.953125,
      "learning_rate": 2.805049962226532e-05,
      "loss": 0.8628,
      "step": 423790
    },
    {
      "epoch": 1.4853133422354001,
      "grad_norm": 2.921875,
      "learning_rate": 2.8049850593601618e-05,
      "loss": 0.8511,
      "step": 423800
    },
    {
      "epoch": 1.4853483897422957,
      "grad_norm": 3.359375,
      "learning_rate": 2.8049201564937916e-05,
      "loss": 0.8064,
      "step": 423810
    },
    {
      "epoch": 1.4853834372491912,
      "grad_norm": 2.640625,
      "learning_rate": 2.8048552536274214e-05,
      "loss": 0.8303,
      "step": 423820
    },
    {
      "epoch": 1.485418484756087,
      "grad_norm": 3.03125,
      "learning_rate": 2.8047903507610512e-05,
      "loss": 0.8976,
      "step": 423830
    },
    {
      "epoch": 1.4854535322629825,
      "grad_norm": 2.984375,
      "learning_rate": 2.804725447894681e-05,
      "loss": 0.878,
      "step": 423840
    },
    {
      "epoch": 1.485488579769878,
      "grad_norm": 2.625,
      "learning_rate": 2.8046605450283108e-05,
      "loss": 0.8788,
      "step": 423850
    },
    {
      "epoch": 1.4855236272767738,
      "grad_norm": 2.921875,
      "learning_rate": 2.8045956421619406e-05,
      "loss": 0.8023,
      "step": 423860
    },
    {
      "epoch": 1.4855586747836693,
      "grad_norm": 2.609375,
      "learning_rate": 2.8045307392955704e-05,
      "loss": 0.7758,
      "step": 423870
    },
    {
      "epoch": 1.4855937222905649,
      "grad_norm": 2.765625,
      "learning_rate": 2.8044658364292002e-05,
      "loss": 0.7988,
      "step": 423880
    },
    {
      "epoch": 1.4856287697974604,
      "grad_norm": 2.9375,
      "learning_rate": 2.80440093356283e-05,
      "loss": 0.8221,
      "step": 423890
    },
    {
      "epoch": 1.485663817304356,
      "grad_norm": 3.203125,
      "learning_rate": 2.80433603069646e-05,
      "loss": 0.8529,
      "step": 423900
    },
    {
      "epoch": 1.4856988648112517,
      "grad_norm": 3.03125,
      "learning_rate": 2.80427112783009e-05,
      "loss": 0.8069,
      "step": 423910
    },
    {
      "epoch": 1.4857339123181472,
      "grad_norm": 2.75,
      "learning_rate": 2.8042062249637198e-05,
      "loss": 0.8097,
      "step": 423920
    },
    {
      "epoch": 1.4857689598250428,
      "grad_norm": 3.21875,
      "learning_rate": 2.8041413220973496e-05,
      "loss": 0.881,
      "step": 423930
    },
    {
      "epoch": 1.4858040073319385,
      "grad_norm": 2.796875,
      "learning_rate": 2.8040764192309794e-05,
      "loss": 0.8153,
      "step": 423940
    },
    {
      "epoch": 1.485839054838834,
      "grad_norm": 3.203125,
      "learning_rate": 2.804011516364609e-05,
      "loss": 0.8596,
      "step": 423950
    },
    {
      "epoch": 1.4858741023457296,
      "grad_norm": 2.875,
      "learning_rate": 2.8039466134982383e-05,
      "loss": 0.79,
      "step": 423960
    },
    {
      "epoch": 1.4859091498526253,
      "grad_norm": 3.21875,
      "learning_rate": 2.803881710631868e-05,
      "loss": 0.8525,
      "step": 423970
    },
    {
      "epoch": 1.4859441973595209,
      "grad_norm": 2.96875,
      "learning_rate": 2.803816807765498e-05,
      "loss": 0.8523,
      "step": 423980
    },
    {
      "epoch": 1.4859792448664164,
      "grad_norm": 3.15625,
      "learning_rate": 2.803751904899128e-05,
      "loss": 0.8299,
      "step": 423990
    },
    {
      "epoch": 1.486014292373312,
      "grad_norm": 2.921875,
      "learning_rate": 2.8036870020327578e-05,
      "loss": 0.8271,
      "step": 424000
    },
    {
      "epoch": 1.4860493398802075,
      "grad_norm": 2.828125,
      "learning_rate": 2.8036220991663876e-05,
      "loss": 0.8109,
      "step": 424010
    },
    {
      "epoch": 1.4860843873871032,
      "grad_norm": 2.75,
      "learning_rate": 2.8035571963000174e-05,
      "loss": 0.7987,
      "step": 424020
    },
    {
      "epoch": 1.4861194348939988,
      "grad_norm": 2.671875,
      "learning_rate": 2.8034922934336472e-05,
      "loss": 0.8214,
      "step": 424030
    },
    {
      "epoch": 1.4861544824008943,
      "grad_norm": 2.6875,
      "learning_rate": 2.803427390567277e-05,
      "loss": 0.8244,
      "step": 424040
    },
    {
      "epoch": 1.48618952990779,
      "grad_norm": 3.15625,
      "learning_rate": 2.8033624877009068e-05,
      "loss": 0.9061,
      "step": 424050
    },
    {
      "epoch": 1.4862245774146856,
      "grad_norm": 2.96875,
      "learning_rate": 2.8032975848345366e-05,
      "loss": 0.9118,
      "step": 424060
    },
    {
      "epoch": 1.4862596249215811,
      "grad_norm": 3.109375,
      "learning_rate": 2.8032326819681664e-05,
      "loss": 0.8329,
      "step": 424070
    },
    {
      "epoch": 1.486294672428477,
      "grad_norm": 2.796875,
      "learning_rate": 2.8031677791017962e-05,
      "loss": 0.8435,
      "step": 424080
    },
    {
      "epoch": 1.4863297199353724,
      "grad_norm": 2.90625,
      "learning_rate": 2.803102876235426e-05,
      "loss": 0.8137,
      "step": 424090
    },
    {
      "epoch": 1.486364767442268,
      "grad_norm": 2.734375,
      "learning_rate": 2.8030379733690558e-05,
      "loss": 0.862,
      "step": 424100
    },
    {
      "epoch": 1.4863998149491635,
      "grad_norm": 3.15625,
      "learning_rate": 2.8029730705026856e-05,
      "loss": 0.8197,
      "step": 424110
    },
    {
      "epoch": 1.4864348624560593,
      "grad_norm": 2.84375,
      "learning_rate": 2.8029081676363154e-05,
      "loss": 0.8555,
      "step": 424120
    },
    {
      "epoch": 1.4864699099629548,
      "grad_norm": 2.75,
      "learning_rate": 2.8028432647699456e-05,
      "loss": 0.8509,
      "step": 424130
    },
    {
      "epoch": 1.4865049574698503,
      "grad_norm": 3.90625,
      "learning_rate": 2.8027783619035754e-05,
      "loss": 0.8291,
      "step": 424140
    },
    {
      "epoch": 1.4865400049767459,
      "grad_norm": 3.140625,
      "learning_rate": 2.802713459037205e-05,
      "loss": 0.8818,
      "step": 424150
    },
    {
      "epoch": 1.4865750524836416,
      "grad_norm": 3.0625,
      "learning_rate": 2.802648556170835e-05,
      "loss": 0.9107,
      "step": 424160
    },
    {
      "epoch": 1.4866100999905372,
      "grad_norm": 2.65625,
      "learning_rate": 2.8025836533044648e-05,
      "loss": 0.8572,
      "step": 424170
    },
    {
      "epoch": 1.4866451474974327,
      "grad_norm": 2.828125,
      "learning_rate": 2.8025187504380946e-05,
      "loss": 0.8699,
      "step": 424180
    },
    {
      "epoch": 1.4866801950043285,
      "grad_norm": 2.328125,
      "learning_rate": 2.8024538475717244e-05,
      "loss": 0.83,
      "step": 424190
    },
    {
      "epoch": 1.486715242511224,
      "grad_norm": 2.734375,
      "learning_rate": 2.802388944705354e-05,
      "loss": 0.9072,
      "step": 424200
    },
    {
      "epoch": 1.4867502900181195,
      "grad_norm": 3.234375,
      "learning_rate": 2.802324041838984e-05,
      "loss": 0.9372,
      "step": 424210
    },
    {
      "epoch": 1.4867853375250153,
      "grad_norm": 2.671875,
      "learning_rate": 2.8022591389726138e-05,
      "loss": 0.7932,
      "step": 424220
    },
    {
      "epoch": 1.4868203850319108,
      "grad_norm": 2.828125,
      "learning_rate": 2.8021942361062436e-05,
      "loss": 0.8371,
      "step": 424230
    },
    {
      "epoch": 1.4868554325388064,
      "grad_norm": 2.6875,
      "learning_rate": 2.8021293332398734e-05,
      "loss": 0.8416,
      "step": 424240
    },
    {
      "epoch": 1.486890480045702,
      "grad_norm": 3.171875,
      "learning_rate": 2.802064430373503e-05,
      "loss": 0.8252,
      "step": 424250
    },
    {
      "epoch": 1.4869255275525974,
      "grad_norm": 2.625,
      "learning_rate": 2.801999527507133e-05,
      "loss": 0.8792,
      "step": 424260
    },
    {
      "epoch": 1.4869605750594932,
      "grad_norm": 3.0,
      "learning_rate": 2.801934624640763e-05,
      "loss": 0.786,
      "step": 424270
    },
    {
      "epoch": 1.4869956225663887,
      "grad_norm": 3.25,
      "learning_rate": 2.801869721774393e-05,
      "loss": 0.9062,
      "step": 424280
    },
    {
      "epoch": 1.4870306700732843,
      "grad_norm": 2.671875,
      "learning_rate": 2.8018048189080227e-05,
      "loss": 0.8903,
      "step": 424290
    },
    {
      "epoch": 1.48706571758018,
      "grad_norm": 2.890625,
      "learning_rate": 2.8017399160416525e-05,
      "loss": 0.8621,
      "step": 424300
    },
    {
      "epoch": 1.4871007650870756,
      "grad_norm": 3.234375,
      "learning_rate": 2.8016750131752823e-05,
      "loss": 0.8336,
      "step": 424310
    },
    {
      "epoch": 1.487135812593971,
      "grad_norm": 3.140625,
      "learning_rate": 2.801610110308912e-05,
      "loss": 0.8701,
      "step": 424320
    },
    {
      "epoch": 1.4871708601008669,
      "grad_norm": 2.53125,
      "learning_rate": 2.801545207442542e-05,
      "loss": 0.8679,
      "step": 424330
    },
    {
      "epoch": 1.4872059076077624,
      "grad_norm": 2.75,
      "learning_rate": 2.801480304576171e-05,
      "loss": 0.888,
      "step": 424340
    },
    {
      "epoch": 1.487240955114658,
      "grad_norm": 2.859375,
      "learning_rate": 2.8014154017098008e-05,
      "loss": 0.8177,
      "step": 424350
    },
    {
      "epoch": 1.4872760026215535,
      "grad_norm": 2.609375,
      "learning_rate": 2.801350498843431e-05,
      "loss": 0.774,
      "step": 424360
    },
    {
      "epoch": 1.487311050128449,
      "grad_norm": 3.28125,
      "learning_rate": 2.8012855959770608e-05,
      "loss": 0.8326,
      "step": 424370
    },
    {
      "epoch": 1.4873460976353448,
      "grad_norm": 2.796875,
      "learning_rate": 2.8012206931106906e-05,
      "loss": 0.8239,
      "step": 424380
    },
    {
      "epoch": 1.4873811451422403,
      "grad_norm": 3.09375,
      "learning_rate": 2.8011557902443204e-05,
      "loss": 0.8592,
      "step": 424390
    },
    {
      "epoch": 1.4874161926491358,
      "grad_norm": 2.90625,
      "learning_rate": 2.80109088737795e-05,
      "loss": 0.9483,
      "step": 424400
    },
    {
      "epoch": 1.4874512401560316,
      "grad_norm": 2.984375,
      "learning_rate": 2.80102598451158e-05,
      "loss": 0.773,
      "step": 424410
    },
    {
      "epoch": 1.4874862876629271,
      "grad_norm": 2.859375,
      "learning_rate": 2.8009610816452098e-05,
      "loss": 0.8869,
      "step": 424420
    },
    {
      "epoch": 1.4875213351698227,
      "grad_norm": 3.25,
      "learning_rate": 2.8008961787788396e-05,
      "loss": 0.8558,
      "step": 424430
    },
    {
      "epoch": 1.4875563826767184,
      "grad_norm": 3.15625,
      "learning_rate": 2.8008312759124694e-05,
      "loss": 0.8568,
      "step": 424440
    },
    {
      "epoch": 1.487591430183614,
      "grad_norm": 2.71875,
      "learning_rate": 2.800766373046099e-05,
      "loss": 0.8916,
      "step": 424450
    },
    {
      "epoch": 1.4876264776905095,
      "grad_norm": 2.875,
      "learning_rate": 2.800701470179729e-05,
      "loss": 0.7616,
      "step": 424460
    },
    {
      "epoch": 1.487661525197405,
      "grad_norm": 2.75,
      "learning_rate": 2.8006365673133588e-05,
      "loss": 0.9038,
      "step": 424470
    },
    {
      "epoch": 1.4876965727043006,
      "grad_norm": 2.484375,
      "learning_rate": 2.8005716644469886e-05,
      "loss": 0.7551,
      "step": 424480
    },
    {
      "epoch": 1.4877316202111963,
      "grad_norm": 2.796875,
      "learning_rate": 2.8005067615806184e-05,
      "loss": 0.7975,
      "step": 424490
    },
    {
      "epoch": 1.4877666677180919,
      "grad_norm": 3.0625,
      "learning_rate": 2.8004418587142485e-05,
      "loss": 0.8261,
      "step": 424500
    },
    {
      "epoch": 1.4878017152249874,
      "grad_norm": 2.6875,
      "learning_rate": 2.8003769558478783e-05,
      "loss": 0.7865,
      "step": 424510
    },
    {
      "epoch": 1.4878367627318831,
      "grad_norm": 2.671875,
      "learning_rate": 2.800312052981508e-05,
      "loss": 0.8177,
      "step": 424520
    },
    {
      "epoch": 1.4878718102387787,
      "grad_norm": 3.0,
      "learning_rate": 2.800247150115138e-05,
      "loss": 0.8324,
      "step": 424530
    },
    {
      "epoch": 1.4879068577456742,
      "grad_norm": 2.515625,
      "learning_rate": 2.8001822472487677e-05,
      "loss": 0.8008,
      "step": 424540
    },
    {
      "epoch": 1.48794190525257,
      "grad_norm": 3.15625,
      "learning_rate": 2.8001173443823975e-05,
      "loss": 0.9043,
      "step": 424550
    },
    {
      "epoch": 1.4879769527594655,
      "grad_norm": 3.140625,
      "learning_rate": 2.8000524415160273e-05,
      "loss": 0.7482,
      "step": 424560
    },
    {
      "epoch": 1.488012000266361,
      "grad_norm": 2.71875,
      "learning_rate": 2.799987538649657e-05,
      "loss": 0.7574,
      "step": 424570
    },
    {
      "epoch": 1.4880470477732566,
      "grad_norm": 3.421875,
      "learning_rate": 2.799922635783287e-05,
      "loss": 0.9195,
      "step": 424580
    },
    {
      "epoch": 1.4880820952801521,
      "grad_norm": 3.03125,
      "learning_rate": 2.7998577329169167e-05,
      "loss": 0.9063,
      "step": 424590
    },
    {
      "epoch": 1.4881171427870479,
      "grad_norm": 2.890625,
      "learning_rate": 2.7997928300505465e-05,
      "loss": 0.7847,
      "step": 424600
    },
    {
      "epoch": 1.4881521902939434,
      "grad_norm": 2.734375,
      "learning_rate": 2.7997279271841763e-05,
      "loss": 0.8244,
      "step": 424610
    },
    {
      "epoch": 1.488187237800839,
      "grad_norm": 2.953125,
      "learning_rate": 2.799663024317806e-05,
      "loss": 0.8449,
      "step": 424620
    },
    {
      "epoch": 1.4882222853077347,
      "grad_norm": 2.640625,
      "learning_rate": 2.7995981214514362e-05,
      "loss": 0.7261,
      "step": 424630
    },
    {
      "epoch": 1.4882573328146302,
      "grad_norm": 2.984375,
      "learning_rate": 2.799533218585066e-05,
      "loss": 0.8849,
      "step": 424640
    },
    {
      "epoch": 1.4882923803215258,
      "grad_norm": 2.484375,
      "learning_rate": 2.799468315718696e-05,
      "loss": 0.8333,
      "step": 424650
    },
    {
      "epoch": 1.4883274278284215,
      "grad_norm": 2.953125,
      "learning_rate": 2.7994034128523256e-05,
      "loss": 0.8708,
      "step": 424660
    },
    {
      "epoch": 1.488362475335317,
      "grad_norm": 2.90625,
      "learning_rate": 2.7993385099859554e-05,
      "loss": 0.8739,
      "step": 424670
    },
    {
      "epoch": 1.4883975228422126,
      "grad_norm": 2.71875,
      "learning_rate": 2.7992736071195852e-05,
      "loss": 0.7954,
      "step": 424680
    },
    {
      "epoch": 1.4884325703491081,
      "grad_norm": 2.75,
      "learning_rate": 2.799208704253215e-05,
      "loss": 0.8272,
      "step": 424690
    },
    {
      "epoch": 1.4884676178560037,
      "grad_norm": 2.734375,
      "learning_rate": 2.799143801386845e-05,
      "loss": 0.7779,
      "step": 424700
    },
    {
      "epoch": 1.4885026653628994,
      "grad_norm": 2.796875,
      "learning_rate": 2.7990788985204746e-05,
      "loss": 0.8145,
      "step": 424710
    },
    {
      "epoch": 1.488537712869795,
      "grad_norm": 3.0,
      "learning_rate": 2.799013995654104e-05,
      "loss": 0.8526,
      "step": 424720
    },
    {
      "epoch": 1.4885727603766905,
      "grad_norm": 3.234375,
      "learning_rate": 2.798949092787734e-05,
      "loss": 0.8217,
      "step": 424730
    },
    {
      "epoch": 1.4886078078835863,
      "grad_norm": 3.5625,
      "learning_rate": 2.7988841899213637e-05,
      "loss": 0.8581,
      "step": 424740
    },
    {
      "epoch": 1.4886428553904818,
      "grad_norm": 2.9375,
      "learning_rate": 2.7988192870549935e-05,
      "loss": 0.8538,
      "step": 424750
    },
    {
      "epoch": 1.4886779028973773,
      "grad_norm": 2.65625,
      "learning_rate": 2.7987543841886233e-05,
      "loss": 0.8104,
      "step": 424760
    },
    {
      "epoch": 1.488712950404273,
      "grad_norm": 2.8125,
      "learning_rate": 2.798689481322253e-05,
      "loss": 0.7871,
      "step": 424770
    },
    {
      "epoch": 1.4887479979111686,
      "grad_norm": 3.09375,
      "learning_rate": 2.798624578455883e-05,
      "loss": 0.7696,
      "step": 424780
    },
    {
      "epoch": 1.4887830454180642,
      "grad_norm": 3.125,
      "learning_rate": 2.7985596755895127e-05,
      "loss": 0.9075,
      "step": 424790
    },
    {
      "epoch": 1.4888180929249597,
      "grad_norm": 3.421875,
      "learning_rate": 2.7984947727231425e-05,
      "loss": 0.9101,
      "step": 424800
    },
    {
      "epoch": 1.4888531404318555,
      "grad_norm": 3.234375,
      "learning_rate": 2.7984298698567723e-05,
      "loss": 0.8129,
      "step": 424810
    },
    {
      "epoch": 1.488888187938751,
      "grad_norm": 2.96875,
      "learning_rate": 2.798364966990402e-05,
      "loss": 0.8787,
      "step": 424820
    },
    {
      "epoch": 1.4889232354456465,
      "grad_norm": 2.734375,
      "learning_rate": 2.798300064124032e-05,
      "loss": 0.7995,
      "step": 424830
    },
    {
      "epoch": 1.488958282952542,
      "grad_norm": 3.734375,
      "learning_rate": 2.7982351612576617e-05,
      "loss": 0.8507,
      "step": 424840
    },
    {
      "epoch": 1.4889933304594378,
      "grad_norm": 3.125,
      "learning_rate": 2.7981702583912915e-05,
      "loss": 0.7967,
      "step": 424850
    },
    {
      "epoch": 1.4890283779663334,
      "grad_norm": 2.8125,
      "learning_rate": 2.7981053555249216e-05,
      "loss": 0.7903,
      "step": 424860
    },
    {
      "epoch": 1.489063425473229,
      "grad_norm": 3.078125,
      "learning_rate": 2.7980404526585514e-05,
      "loss": 0.867,
      "step": 424870
    },
    {
      "epoch": 1.4890984729801247,
      "grad_norm": 2.71875,
      "learning_rate": 2.7979755497921812e-05,
      "loss": 0.8448,
      "step": 424880
    },
    {
      "epoch": 1.4891335204870202,
      "grad_norm": 2.953125,
      "learning_rate": 2.797910646925811e-05,
      "loss": 0.8356,
      "step": 424890
    },
    {
      "epoch": 1.4891685679939157,
      "grad_norm": 3.09375,
      "learning_rate": 2.797845744059441e-05,
      "loss": 0.8713,
      "step": 424900
    },
    {
      "epoch": 1.4892036155008115,
      "grad_norm": 3.140625,
      "learning_rate": 2.7977808411930706e-05,
      "loss": 0.8975,
      "step": 424910
    },
    {
      "epoch": 1.489238663007707,
      "grad_norm": 3.046875,
      "learning_rate": 2.7977159383267004e-05,
      "loss": 0.8903,
      "step": 424920
    },
    {
      "epoch": 1.4892737105146026,
      "grad_norm": 2.734375,
      "learning_rate": 2.7976510354603302e-05,
      "loss": 0.7548,
      "step": 424930
    },
    {
      "epoch": 1.489308758021498,
      "grad_norm": 2.890625,
      "learning_rate": 2.79758613259396e-05,
      "loss": 0.896,
      "step": 424940
    },
    {
      "epoch": 1.4893438055283936,
      "grad_norm": 2.84375,
      "learning_rate": 2.79752122972759e-05,
      "loss": 0.7865,
      "step": 424950
    },
    {
      "epoch": 1.4893788530352894,
      "grad_norm": 3.390625,
      "learning_rate": 2.7974563268612196e-05,
      "loss": 0.828,
      "step": 424960
    },
    {
      "epoch": 1.489413900542185,
      "grad_norm": 2.890625,
      "learning_rate": 2.7973914239948494e-05,
      "loss": 0.8184,
      "step": 424970
    },
    {
      "epoch": 1.4894489480490805,
      "grad_norm": 2.9375,
      "learning_rate": 2.7973265211284792e-05,
      "loss": 0.8337,
      "step": 424980
    },
    {
      "epoch": 1.4894839955559762,
      "grad_norm": 2.8125,
      "learning_rate": 2.797261618262109e-05,
      "loss": 0.836,
      "step": 424990
    },
    {
      "epoch": 1.4895190430628718,
      "grad_norm": 2.921875,
      "learning_rate": 2.7971967153957392e-05,
      "loss": 0.7707,
      "step": 425000
    },
    {
      "epoch": 1.4895190430628718,
      "eval_loss": 0.7903019785881042,
      "eval_runtime": 553.7552,
      "eval_samples_per_second": 687.011,
      "eval_steps_per_second": 57.251,
      "step": 425000
    },
    {
      "epoch": 1.4895540905697673,
      "grad_norm": 2.9375,
      "learning_rate": 2.797131812529369e-05,
      "loss": 0.9138,
      "step": 425010
    },
    {
      "epoch": 1.489589138076663,
      "grad_norm": 2.859375,
      "learning_rate": 2.7970669096629988e-05,
      "loss": 0.8716,
      "step": 425020
    },
    {
      "epoch": 1.4896241855835586,
      "grad_norm": 2.53125,
      "learning_rate": 2.7970020067966286e-05,
      "loss": 0.7551,
      "step": 425030
    },
    {
      "epoch": 1.4896592330904541,
      "grad_norm": 2.859375,
      "learning_rate": 2.7969371039302584e-05,
      "loss": 0.7738,
      "step": 425040
    },
    {
      "epoch": 1.4896942805973497,
      "grad_norm": 2.703125,
      "learning_rate": 2.7968722010638882e-05,
      "loss": 0.8432,
      "step": 425050
    },
    {
      "epoch": 1.4897293281042452,
      "grad_norm": 3.421875,
      "learning_rate": 2.796807298197518e-05,
      "loss": 0.8196,
      "step": 425060
    },
    {
      "epoch": 1.489764375611141,
      "grad_norm": 3.171875,
      "learning_rate": 2.7967423953311478e-05,
      "loss": 0.8627,
      "step": 425070
    },
    {
      "epoch": 1.4897994231180365,
      "grad_norm": 3.25,
      "learning_rate": 2.7966774924647776e-05,
      "loss": 0.8902,
      "step": 425080
    },
    {
      "epoch": 1.489834470624932,
      "grad_norm": 2.96875,
      "learning_rate": 2.796612589598407e-05,
      "loss": 0.9105,
      "step": 425090
    },
    {
      "epoch": 1.4898695181318278,
      "grad_norm": 3.125,
      "learning_rate": 2.796547686732037e-05,
      "loss": 0.7884,
      "step": 425100
    },
    {
      "epoch": 1.4899045656387233,
      "grad_norm": 3.078125,
      "learning_rate": 2.7964827838656666e-05,
      "loss": 0.8606,
      "step": 425110
    },
    {
      "epoch": 1.4899396131456188,
      "grad_norm": 2.859375,
      "learning_rate": 2.7964178809992964e-05,
      "loss": 0.8553,
      "step": 425120
    },
    {
      "epoch": 1.4899746606525146,
      "grad_norm": 3.234375,
      "learning_rate": 2.7963529781329262e-05,
      "loss": 0.7505,
      "step": 425130
    },
    {
      "epoch": 1.4900097081594101,
      "grad_norm": 2.75,
      "learning_rate": 2.796288075266556e-05,
      "loss": 0.7624,
      "step": 425140
    },
    {
      "epoch": 1.4900447556663057,
      "grad_norm": 2.796875,
      "learning_rate": 2.796223172400186e-05,
      "loss": 0.8533,
      "step": 425150
    },
    {
      "epoch": 1.4900798031732012,
      "grad_norm": 2.75,
      "learning_rate": 2.7961582695338156e-05,
      "loss": 0.8573,
      "step": 425160
    },
    {
      "epoch": 1.4901148506800967,
      "grad_norm": 13.75,
      "learning_rate": 2.7960933666674454e-05,
      "loss": 0.8953,
      "step": 425170
    },
    {
      "epoch": 1.4901498981869925,
      "grad_norm": 2.796875,
      "learning_rate": 2.7960284638010752e-05,
      "loss": 0.9263,
      "step": 425180
    },
    {
      "epoch": 1.490184945693888,
      "grad_norm": 2.609375,
      "learning_rate": 2.795963560934705e-05,
      "loss": 0.8155,
      "step": 425190
    },
    {
      "epoch": 1.4902199932007836,
      "grad_norm": 3.125,
      "learning_rate": 2.795898658068335e-05,
      "loss": 0.8288,
      "step": 425200
    },
    {
      "epoch": 1.4902550407076793,
      "grad_norm": 3.015625,
      "learning_rate": 2.7958337552019646e-05,
      "loss": 0.8581,
      "step": 425210
    },
    {
      "epoch": 1.4902900882145749,
      "grad_norm": 2.59375,
      "learning_rate": 2.7957688523355944e-05,
      "loss": 0.849,
      "step": 425220
    },
    {
      "epoch": 1.4903251357214704,
      "grad_norm": 2.875,
      "learning_rate": 2.7957039494692246e-05,
      "loss": 0.9055,
      "step": 425230
    },
    {
      "epoch": 1.4903601832283662,
      "grad_norm": 2.40625,
      "learning_rate": 2.7956390466028544e-05,
      "loss": 0.837,
      "step": 425240
    },
    {
      "epoch": 1.4903952307352617,
      "grad_norm": 2.4375,
      "learning_rate": 2.7955741437364842e-05,
      "loss": 0.8053,
      "step": 425250
    },
    {
      "epoch": 1.4904302782421572,
      "grad_norm": 3.65625,
      "learning_rate": 2.795509240870114e-05,
      "loss": 0.8015,
      "step": 425260
    },
    {
      "epoch": 1.4904653257490528,
      "grad_norm": 2.71875,
      "learning_rate": 2.7954443380037438e-05,
      "loss": 0.8981,
      "step": 425270
    },
    {
      "epoch": 1.4905003732559483,
      "grad_norm": 2.8125,
      "learning_rate": 2.7953794351373736e-05,
      "loss": 0.8802,
      "step": 425280
    },
    {
      "epoch": 1.490535420762844,
      "grad_norm": 3.0,
      "learning_rate": 2.7953145322710034e-05,
      "loss": 0.8046,
      "step": 425290
    },
    {
      "epoch": 1.4905704682697396,
      "grad_norm": 2.859375,
      "learning_rate": 2.7952496294046332e-05,
      "loss": 0.8978,
      "step": 425300
    },
    {
      "epoch": 1.4906055157766351,
      "grad_norm": 3.09375,
      "learning_rate": 2.795184726538263e-05,
      "loss": 0.8963,
      "step": 425310
    },
    {
      "epoch": 1.490640563283531,
      "grad_norm": 2.96875,
      "learning_rate": 2.7951198236718928e-05,
      "loss": 0.8395,
      "step": 425320
    },
    {
      "epoch": 1.4906756107904264,
      "grad_norm": 3.546875,
      "learning_rate": 2.7950549208055226e-05,
      "loss": 0.8069,
      "step": 425330
    },
    {
      "epoch": 1.490710658297322,
      "grad_norm": 2.546875,
      "learning_rate": 2.7949900179391524e-05,
      "loss": 0.8116,
      "step": 425340
    },
    {
      "epoch": 1.4907457058042177,
      "grad_norm": 2.796875,
      "learning_rate": 2.7949251150727822e-05,
      "loss": 0.8371,
      "step": 425350
    },
    {
      "epoch": 1.4907807533111133,
      "grad_norm": 2.890625,
      "learning_rate": 2.794860212206412e-05,
      "loss": 0.8624,
      "step": 425360
    },
    {
      "epoch": 1.4908158008180088,
      "grad_norm": 2.65625,
      "learning_rate": 2.794795309340042e-05,
      "loss": 0.8243,
      "step": 425370
    },
    {
      "epoch": 1.4908508483249043,
      "grad_norm": 2.265625,
      "learning_rate": 2.794730406473672e-05,
      "loss": 0.7969,
      "step": 425380
    },
    {
      "epoch": 1.4908858958317999,
      "grad_norm": 3.046875,
      "learning_rate": 2.7946655036073017e-05,
      "loss": 0.9057,
      "step": 425390
    },
    {
      "epoch": 1.4909209433386956,
      "grad_norm": 2.578125,
      "learning_rate": 2.7946006007409315e-05,
      "loss": 0.7926,
      "step": 425400
    },
    {
      "epoch": 1.4909559908455912,
      "grad_norm": 2.90625,
      "learning_rate": 2.7945356978745613e-05,
      "loss": 0.8242,
      "step": 425410
    },
    {
      "epoch": 1.4909910383524867,
      "grad_norm": 2.921875,
      "learning_rate": 2.794470795008191e-05,
      "loss": 0.8101,
      "step": 425420
    },
    {
      "epoch": 1.4910260858593825,
      "grad_norm": 3.25,
      "learning_rate": 2.794405892141821e-05,
      "loss": 0.767,
      "step": 425430
    },
    {
      "epoch": 1.491061133366278,
      "grad_norm": 3.25,
      "learning_rate": 2.7943409892754507e-05,
      "loss": 0.8724,
      "step": 425440
    },
    {
      "epoch": 1.4910961808731735,
      "grad_norm": 2.90625,
      "learning_rate": 2.7942760864090805e-05,
      "loss": 0.9064,
      "step": 425450
    },
    {
      "epoch": 1.4911312283800693,
      "grad_norm": 2.890625,
      "learning_rate": 2.7942111835427103e-05,
      "loss": 0.8445,
      "step": 425460
    },
    {
      "epoch": 1.4911662758869648,
      "grad_norm": 2.828125,
      "learning_rate": 2.7941462806763398e-05,
      "loss": 0.8356,
      "step": 425470
    },
    {
      "epoch": 1.4912013233938604,
      "grad_norm": 2.71875,
      "learning_rate": 2.7940813778099696e-05,
      "loss": 0.798,
      "step": 425480
    },
    {
      "epoch": 1.491236370900756,
      "grad_norm": 2.59375,
      "learning_rate": 2.7940164749435994e-05,
      "loss": 0.7875,
      "step": 425490
    },
    {
      "epoch": 1.4912714184076516,
      "grad_norm": 3.25,
      "learning_rate": 2.7939515720772292e-05,
      "loss": 0.8865,
      "step": 425500
    },
    {
      "epoch": 1.4913064659145472,
      "grad_norm": 2.8125,
      "learning_rate": 2.793886669210859e-05,
      "loss": 0.8741,
      "step": 425510
    },
    {
      "epoch": 1.4913415134214427,
      "grad_norm": 2.515625,
      "learning_rate": 2.7938217663444888e-05,
      "loss": 0.773,
      "step": 425520
    },
    {
      "epoch": 1.4913765609283383,
      "grad_norm": 2.921875,
      "learning_rate": 2.7937568634781186e-05,
      "loss": 0.9115,
      "step": 425530
    },
    {
      "epoch": 1.491411608435234,
      "grad_norm": 2.84375,
      "learning_rate": 2.7936919606117484e-05,
      "loss": 0.7574,
      "step": 425540
    },
    {
      "epoch": 1.4914466559421296,
      "grad_norm": 2.75,
      "learning_rate": 2.7936270577453782e-05,
      "loss": 0.8505,
      "step": 425550
    },
    {
      "epoch": 1.491481703449025,
      "grad_norm": 2.796875,
      "learning_rate": 2.793562154879008e-05,
      "loss": 0.8182,
      "step": 425560
    },
    {
      "epoch": 1.4915167509559208,
      "grad_norm": 2.734375,
      "learning_rate": 2.7934972520126378e-05,
      "loss": 0.9056,
      "step": 425570
    },
    {
      "epoch": 1.4915517984628164,
      "grad_norm": 2.859375,
      "learning_rate": 2.7934323491462676e-05,
      "loss": 0.9379,
      "step": 425580
    },
    {
      "epoch": 1.491586845969712,
      "grad_norm": 2.9375,
      "learning_rate": 2.7933674462798977e-05,
      "loss": 0.8929,
      "step": 425590
    },
    {
      "epoch": 1.4916218934766077,
      "grad_norm": 2.875,
      "learning_rate": 2.7933025434135275e-05,
      "loss": 0.8216,
      "step": 425600
    },
    {
      "epoch": 1.4916569409835032,
      "grad_norm": 2.890625,
      "learning_rate": 2.7932376405471573e-05,
      "loss": 0.8648,
      "step": 425610
    },
    {
      "epoch": 1.4916919884903987,
      "grad_norm": 2.75,
      "learning_rate": 2.793172737680787e-05,
      "loss": 0.8371,
      "step": 425620
    },
    {
      "epoch": 1.4917270359972943,
      "grad_norm": 2.75,
      "learning_rate": 2.793107834814417e-05,
      "loss": 0.8827,
      "step": 425630
    },
    {
      "epoch": 1.4917620835041898,
      "grad_norm": 2.515625,
      "learning_rate": 2.7930429319480467e-05,
      "loss": 0.8295,
      "step": 425640
    },
    {
      "epoch": 1.4917971310110856,
      "grad_norm": 2.8125,
      "learning_rate": 2.7929780290816765e-05,
      "loss": 0.8591,
      "step": 425650
    },
    {
      "epoch": 1.4918321785179811,
      "grad_norm": 2.859375,
      "learning_rate": 2.7929131262153063e-05,
      "loss": 0.8844,
      "step": 425660
    },
    {
      "epoch": 1.4918672260248766,
      "grad_norm": 2.9375,
      "learning_rate": 2.792848223348936e-05,
      "loss": 0.925,
      "step": 425670
    },
    {
      "epoch": 1.4919022735317724,
      "grad_norm": 2.6875,
      "learning_rate": 2.792783320482566e-05,
      "loss": 0.8196,
      "step": 425680
    },
    {
      "epoch": 1.491937321038668,
      "grad_norm": 2.671875,
      "learning_rate": 2.7927184176161957e-05,
      "loss": 0.8186,
      "step": 425690
    },
    {
      "epoch": 1.4919723685455635,
      "grad_norm": 3.125,
      "learning_rate": 2.7926535147498255e-05,
      "loss": 0.8946,
      "step": 425700
    },
    {
      "epoch": 1.4920074160524592,
      "grad_norm": 2.6875,
      "learning_rate": 2.7925886118834553e-05,
      "loss": 0.8718,
      "step": 425710
    },
    {
      "epoch": 1.4920424635593548,
      "grad_norm": 2.71875,
      "learning_rate": 2.792523709017085e-05,
      "loss": 0.7796,
      "step": 425720
    },
    {
      "epoch": 1.4920775110662503,
      "grad_norm": 3.40625,
      "learning_rate": 2.7924588061507153e-05,
      "loss": 0.8643,
      "step": 425730
    },
    {
      "epoch": 1.4921125585731458,
      "grad_norm": 2.859375,
      "learning_rate": 2.792393903284345e-05,
      "loss": 0.796,
      "step": 425740
    },
    {
      "epoch": 1.4921476060800414,
      "grad_norm": 2.96875,
      "learning_rate": 2.792329000417975e-05,
      "loss": 0.8555,
      "step": 425750
    },
    {
      "epoch": 1.4921826535869371,
      "grad_norm": 2.6875,
      "learning_rate": 2.7922640975516047e-05,
      "loss": 0.8475,
      "step": 425760
    },
    {
      "epoch": 1.4922177010938327,
      "grad_norm": 3.0625,
      "learning_rate": 2.7921991946852345e-05,
      "loss": 0.8367,
      "step": 425770
    },
    {
      "epoch": 1.4922527486007282,
      "grad_norm": 2.625,
      "learning_rate": 2.7921342918188643e-05,
      "loss": 0.8651,
      "step": 425780
    },
    {
      "epoch": 1.492287796107624,
      "grad_norm": 3.40625,
      "learning_rate": 2.792069388952494e-05,
      "loss": 0.7822,
      "step": 425790
    },
    {
      "epoch": 1.4923228436145195,
      "grad_norm": 2.71875,
      "learning_rate": 2.792004486086124e-05,
      "loss": 0.8588,
      "step": 425800
    },
    {
      "epoch": 1.492357891121415,
      "grad_norm": 2.84375,
      "learning_rate": 2.7919395832197537e-05,
      "loss": 0.7553,
      "step": 425810
    },
    {
      "epoch": 1.4923929386283108,
      "grad_norm": 2.921875,
      "learning_rate": 2.7918746803533835e-05,
      "loss": 0.8701,
      "step": 425820
    },
    {
      "epoch": 1.4924279861352063,
      "grad_norm": 2.9375,
      "learning_rate": 2.7918097774870133e-05,
      "loss": 0.7962,
      "step": 425830
    },
    {
      "epoch": 1.4924630336421019,
      "grad_norm": 2.875,
      "learning_rate": 2.7917448746206427e-05,
      "loss": 0.8542,
      "step": 425840
    },
    {
      "epoch": 1.4924980811489974,
      "grad_norm": 2.78125,
      "learning_rate": 2.7916799717542725e-05,
      "loss": 0.8315,
      "step": 425850
    },
    {
      "epoch": 1.492533128655893,
      "grad_norm": 3.1875,
      "learning_rate": 2.7916150688879023e-05,
      "loss": 0.8018,
      "step": 425860
    },
    {
      "epoch": 1.4925681761627887,
      "grad_norm": 3.09375,
      "learning_rate": 2.791550166021532e-05,
      "loss": 0.8571,
      "step": 425870
    },
    {
      "epoch": 1.4926032236696842,
      "grad_norm": 3.109375,
      "learning_rate": 2.791485263155162e-05,
      "loss": 0.8566,
      "step": 425880
    },
    {
      "epoch": 1.4926382711765798,
      "grad_norm": 2.640625,
      "learning_rate": 2.7914203602887917e-05,
      "loss": 0.7677,
      "step": 425890
    },
    {
      "epoch": 1.4926733186834755,
      "grad_norm": 2.6875,
      "learning_rate": 2.7913554574224215e-05,
      "loss": 0.8757,
      "step": 425900
    },
    {
      "epoch": 1.492708366190371,
      "grad_norm": 2.59375,
      "learning_rate": 2.7912905545560513e-05,
      "loss": 0.7806,
      "step": 425910
    },
    {
      "epoch": 1.4927434136972666,
      "grad_norm": 3.03125,
      "learning_rate": 2.791225651689681e-05,
      "loss": 0.8848,
      "step": 425920
    },
    {
      "epoch": 1.4927784612041624,
      "grad_norm": 3.40625,
      "learning_rate": 2.791160748823311e-05,
      "loss": 0.8778,
      "step": 425930
    },
    {
      "epoch": 1.492813508711058,
      "grad_norm": 2.953125,
      "learning_rate": 2.7910958459569407e-05,
      "loss": 0.818,
      "step": 425940
    },
    {
      "epoch": 1.4928485562179534,
      "grad_norm": 2.734375,
      "learning_rate": 2.7910309430905705e-05,
      "loss": 0.9109,
      "step": 425950
    },
    {
      "epoch": 1.492883603724849,
      "grad_norm": 2.0,
      "learning_rate": 2.7909660402242007e-05,
      "loss": 0.8059,
      "step": 425960
    },
    {
      "epoch": 1.4929186512317445,
      "grad_norm": 2.703125,
      "learning_rate": 2.7909011373578305e-05,
      "loss": 0.8366,
      "step": 425970
    },
    {
      "epoch": 1.4929536987386403,
      "grad_norm": 2.890625,
      "learning_rate": 2.7908362344914603e-05,
      "loss": 0.8341,
      "step": 425980
    },
    {
      "epoch": 1.4929887462455358,
      "grad_norm": 2.515625,
      "learning_rate": 2.79077133162509e-05,
      "loss": 0.8336,
      "step": 425990
    },
    {
      "epoch": 1.4930237937524313,
      "grad_norm": 2.734375,
      "learning_rate": 2.79070642875872e-05,
      "loss": 0.8708,
      "step": 426000
    },
    {
      "epoch": 1.493058841259327,
      "grad_norm": 2.453125,
      "learning_rate": 2.7906415258923497e-05,
      "loss": 0.8373,
      "step": 426010
    },
    {
      "epoch": 1.4930938887662226,
      "grad_norm": 3.109375,
      "learning_rate": 2.7905766230259795e-05,
      "loss": 0.8263,
      "step": 426020
    },
    {
      "epoch": 1.4931289362731182,
      "grad_norm": 3.015625,
      "learning_rate": 2.7905117201596093e-05,
      "loss": 0.8907,
      "step": 426030
    },
    {
      "epoch": 1.493163983780014,
      "grad_norm": 2.9375,
      "learning_rate": 2.790446817293239e-05,
      "loss": 0.7787,
      "step": 426040
    },
    {
      "epoch": 1.4931990312869095,
      "grad_norm": 2.890625,
      "learning_rate": 2.790381914426869e-05,
      "loss": 0.804,
      "step": 426050
    },
    {
      "epoch": 1.493234078793805,
      "grad_norm": 2.375,
      "learning_rate": 2.7903170115604987e-05,
      "loss": 0.8294,
      "step": 426060
    },
    {
      "epoch": 1.4932691263007005,
      "grad_norm": 2.71875,
      "learning_rate": 2.7902521086941285e-05,
      "loss": 0.81,
      "step": 426070
    },
    {
      "epoch": 1.493304173807596,
      "grad_norm": 3.078125,
      "learning_rate": 2.7901872058277583e-05,
      "loss": 0.8337,
      "step": 426080
    },
    {
      "epoch": 1.4933392213144918,
      "grad_norm": 3.28125,
      "learning_rate": 2.790122302961388e-05,
      "loss": 0.8974,
      "step": 426090
    },
    {
      "epoch": 1.4933742688213874,
      "grad_norm": 2.796875,
      "learning_rate": 2.7900574000950182e-05,
      "loss": 0.7846,
      "step": 426100
    },
    {
      "epoch": 1.4934093163282829,
      "grad_norm": 3.15625,
      "learning_rate": 2.789992497228648e-05,
      "loss": 0.847,
      "step": 426110
    },
    {
      "epoch": 1.4934443638351786,
      "grad_norm": 2.8125,
      "learning_rate": 2.7899275943622778e-05,
      "loss": 0.8812,
      "step": 426120
    },
    {
      "epoch": 1.4934794113420742,
      "grad_norm": 3.0,
      "learning_rate": 2.7898626914959076e-05,
      "loss": 0.8227,
      "step": 426130
    },
    {
      "epoch": 1.4935144588489697,
      "grad_norm": 2.84375,
      "learning_rate": 2.7897977886295374e-05,
      "loss": 0.8479,
      "step": 426140
    },
    {
      "epoch": 1.4935495063558655,
      "grad_norm": 2.546875,
      "learning_rate": 2.7897328857631672e-05,
      "loss": 0.8027,
      "step": 426150
    },
    {
      "epoch": 1.493584553862761,
      "grad_norm": 3.25,
      "learning_rate": 2.789667982896797e-05,
      "loss": 0.8337,
      "step": 426160
    },
    {
      "epoch": 1.4936196013696565,
      "grad_norm": 2.53125,
      "learning_rate": 2.7896030800304268e-05,
      "loss": 0.8256,
      "step": 426170
    },
    {
      "epoch": 1.4936546488765523,
      "grad_norm": 3.296875,
      "learning_rate": 2.7895381771640566e-05,
      "loss": 0.7903,
      "step": 426180
    },
    {
      "epoch": 1.4936896963834478,
      "grad_norm": 2.96875,
      "learning_rate": 2.7894732742976864e-05,
      "loss": 0.8284,
      "step": 426190
    },
    {
      "epoch": 1.4937247438903434,
      "grad_norm": 2.875,
      "learning_rate": 2.7894083714313162e-05,
      "loss": 0.8037,
      "step": 426200
    },
    {
      "epoch": 1.493759791397239,
      "grad_norm": 2.828125,
      "learning_rate": 2.789343468564946e-05,
      "loss": 0.8712,
      "step": 426210
    },
    {
      "epoch": 1.4937948389041344,
      "grad_norm": 2.96875,
      "learning_rate": 2.7892785656985755e-05,
      "loss": 0.8198,
      "step": 426220
    },
    {
      "epoch": 1.4938298864110302,
      "grad_norm": 3.21875,
      "learning_rate": 2.7892136628322053e-05,
      "loss": 0.8501,
      "step": 426230
    },
    {
      "epoch": 1.4938649339179257,
      "grad_norm": 2.75,
      "learning_rate": 2.789148759965835e-05,
      "loss": 0.8052,
      "step": 426240
    },
    {
      "epoch": 1.4938999814248213,
      "grad_norm": 2.4375,
      "learning_rate": 2.789083857099465e-05,
      "loss": 0.7825,
      "step": 426250
    },
    {
      "epoch": 1.493935028931717,
      "grad_norm": 2.84375,
      "learning_rate": 2.7890189542330947e-05,
      "loss": 0.9191,
      "step": 426260
    },
    {
      "epoch": 1.4939700764386126,
      "grad_norm": 2.625,
      "learning_rate": 2.7889540513667245e-05,
      "loss": 0.9632,
      "step": 426270
    },
    {
      "epoch": 1.494005123945508,
      "grad_norm": 2.84375,
      "learning_rate": 2.7888891485003543e-05,
      "loss": 0.8486,
      "step": 426280
    },
    {
      "epoch": 1.4940401714524039,
      "grad_norm": 2.71875,
      "learning_rate": 2.788824245633984e-05,
      "loss": 0.8894,
      "step": 426290
    },
    {
      "epoch": 1.4940752189592994,
      "grad_norm": 2.6875,
      "learning_rate": 2.788759342767614e-05,
      "loss": 0.8574,
      "step": 426300
    },
    {
      "epoch": 1.494110266466195,
      "grad_norm": 3.15625,
      "learning_rate": 2.7886944399012437e-05,
      "loss": 0.9225,
      "step": 426310
    },
    {
      "epoch": 1.4941453139730905,
      "grad_norm": 3.21875,
      "learning_rate": 2.7886295370348735e-05,
      "loss": 0.8121,
      "step": 426320
    },
    {
      "epoch": 1.494180361479986,
      "grad_norm": 2.78125,
      "learning_rate": 2.7885646341685036e-05,
      "loss": 0.8796,
      "step": 426330
    },
    {
      "epoch": 1.4942154089868818,
      "grad_norm": 3.265625,
      "learning_rate": 2.7884997313021334e-05,
      "loss": 0.9138,
      "step": 426340
    },
    {
      "epoch": 1.4942504564937773,
      "grad_norm": 3.203125,
      "learning_rate": 2.7884348284357632e-05,
      "loss": 0.8539,
      "step": 426350
    },
    {
      "epoch": 1.4942855040006728,
      "grad_norm": 2.546875,
      "learning_rate": 2.788369925569393e-05,
      "loss": 0.9348,
      "step": 426360
    },
    {
      "epoch": 1.4943205515075686,
      "grad_norm": 2.890625,
      "learning_rate": 2.7883050227030228e-05,
      "loss": 0.9015,
      "step": 426370
    },
    {
      "epoch": 1.4943555990144641,
      "grad_norm": 2.890625,
      "learning_rate": 2.7882401198366526e-05,
      "loss": 0.8507,
      "step": 426380
    },
    {
      "epoch": 1.4943906465213597,
      "grad_norm": 2.65625,
      "learning_rate": 2.7881752169702824e-05,
      "loss": 0.8771,
      "step": 426390
    },
    {
      "epoch": 1.4944256940282554,
      "grad_norm": 2.953125,
      "learning_rate": 2.7881103141039122e-05,
      "loss": 0.8174,
      "step": 426400
    },
    {
      "epoch": 1.494460741535151,
      "grad_norm": 3.09375,
      "learning_rate": 2.788045411237542e-05,
      "loss": 0.8105,
      "step": 426410
    },
    {
      "epoch": 1.4944957890420465,
      "grad_norm": 3.078125,
      "learning_rate": 2.7879805083711718e-05,
      "loss": 0.7886,
      "step": 426420
    },
    {
      "epoch": 1.494530836548942,
      "grad_norm": 3.3125,
      "learning_rate": 2.7879156055048016e-05,
      "loss": 0.9044,
      "step": 426430
    },
    {
      "epoch": 1.4945658840558376,
      "grad_norm": 2.921875,
      "learning_rate": 2.7878507026384314e-05,
      "loss": 0.8142,
      "step": 426440
    },
    {
      "epoch": 1.4946009315627333,
      "grad_norm": 3.28125,
      "learning_rate": 2.7877857997720612e-05,
      "loss": 0.9041,
      "step": 426450
    },
    {
      "epoch": 1.4946359790696289,
      "grad_norm": 3.0625,
      "learning_rate": 2.787720896905691e-05,
      "loss": 0.932,
      "step": 426460
    },
    {
      "epoch": 1.4946710265765244,
      "grad_norm": 2.46875,
      "learning_rate": 2.787655994039321e-05,
      "loss": 0.83,
      "step": 426470
    },
    {
      "epoch": 1.4947060740834202,
      "grad_norm": 3.21875,
      "learning_rate": 2.787591091172951e-05,
      "loss": 0.9249,
      "step": 426480
    },
    {
      "epoch": 1.4947411215903157,
      "grad_norm": 2.671875,
      "learning_rate": 2.7875261883065807e-05,
      "loss": 0.8688,
      "step": 426490
    },
    {
      "epoch": 1.4947761690972112,
      "grad_norm": 3.078125,
      "learning_rate": 2.7874612854402105e-05,
      "loss": 0.8766,
      "step": 426500
    },
    {
      "epoch": 1.494811216604107,
      "grad_norm": 2.6875,
      "learning_rate": 2.7873963825738403e-05,
      "loss": 0.8657,
      "step": 426510
    },
    {
      "epoch": 1.4948462641110025,
      "grad_norm": 3.0625,
      "learning_rate": 2.78733147970747e-05,
      "loss": 0.8889,
      "step": 426520
    },
    {
      "epoch": 1.494881311617898,
      "grad_norm": 2.890625,
      "learning_rate": 2.7872665768411e-05,
      "loss": 0.8101,
      "step": 426530
    },
    {
      "epoch": 1.4949163591247936,
      "grad_norm": 2.921875,
      "learning_rate": 2.7872016739747297e-05,
      "loss": 0.8498,
      "step": 426540
    },
    {
      "epoch": 1.4949514066316891,
      "grad_norm": 3.0,
      "learning_rate": 2.7871367711083595e-05,
      "loss": 0.8647,
      "step": 426550
    },
    {
      "epoch": 1.4949864541385849,
      "grad_norm": 2.59375,
      "learning_rate": 2.7870718682419893e-05,
      "loss": 0.8611,
      "step": 426560
    },
    {
      "epoch": 1.4950215016454804,
      "grad_norm": 2.84375,
      "learning_rate": 2.787006965375619e-05,
      "loss": 0.8453,
      "step": 426570
    },
    {
      "epoch": 1.495056549152376,
      "grad_norm": 3.3125,
      "learning_rate": 2.786942062509249e-05,
      "loss": 0.8494,
      "step": 426580
    },
    {
      "epoch": 1.4950915966592717,
      "grad_norm": 2.828125,
      "learning_rate": 2.7868771596428787e-05,
      "loss": 0.9313,
      "step": 426590
    },
    {
      "epoch": 1.4951266441661673,
      "grad_norm": 2.796875,
      "learning_rate": 2.7868122567765082e-05,
      "loss": 0.8781,
      "step": 426600
    },
    {
      "epoch": 1.4951616916730628,
      "grad_norm": 3.1875,
      "learning_rate": 2.786747353910138e-05,
      "loss": 0.9299,
      "step": 426610
    },
    {
      "epoch": 1.4951967391799585,
      "grad_norm": 2.703125,
      "learning_rate": 2.7866824510437678e-05,
      "loss": 0.8527,
      "step": 426620
    },
    {
      "epoch": 1.495231786686854,
      "grad_norm": 3.140625,
      "learning_rate": 2.7866175481773976e-05,
      "loss": 0.8272,
      "step": 426630
    },
    {
      "epoch": 1.4952668341937496,
      "grad_norm": 2.921875,
      "learning_rate": 2.7865526453110274e-05,
      "loss": 0.8882,
      "step": 426640
    },
    {
      "epoch": 1.4953018817006452,
      "grad_norm": 2.96875,
      "learning_rate": 2.7864877424446572e-05,
      "loss": 0.7717,
      "step": 426650
    },
    {
      "epoch": 1.4953369292075407,
      "grad_norm": 2.8125,
      "learning_rate": 2.786422839578287e-05,
      "loss": 0.8301,
      "step": 426660
    },
    {
      "epoch": 1.4953719767144364,
      "grad_norm": 2.75,
      "learning_rate": 2.7863579367119168e-05,
      "loss": 0.8151,
      "step": 426670
    },
    {
      "epoch": 1.495407024221332,
      "grad_norm": 2.875,
      "learning_rate": 2.7862930338455466e-05,
      "loss": 0.8668,
      "step": 426680
    },
    {
      "epoch": 1.4954420717282275,
      "grad_norm": 3.03125,
      "learning_rate": 2.7862281309791767e-05,
      "loss": 0.8235,
      "step": 426690
    },
    {
      "epoch": 1.4954771192351233,
      "grad_norm": 2.4375,
      "learning_rate": 2.7861632281128065e-05,
      "loss": 0.8074,
      "step": 426700
    },
    {
      "epoch": 1.4955121667420188,
      "grad_norm": 3.25,
      "learning_rate": 2.7860983252464363e-05,
      "loss": 0.9448,
      "step": 426710
    },
    {
      "epoch": 1.4955472142489143,
      "grad_norm": 2.921875,
      "learning_rate": 2.786033422380066e-05,
      "loss": 0.8235,
      "step": 426720
    },
    {
      "epoch": 1.49558226175581,
      "grad_norm": 2.859375,
      "learning_rate": 2.785968519513696e-05,
      "loss": 0.8778,
      "step": 426730
    },
    {
      "epoch": 1.4956173092627056,
      "grad_norm": 2.875,
      "learning_rate": 2.7859036166473257e-05,
      "loss": 0.7993,
      "step": 426740
    },
    {
      "epoch": 1.4956523567696012,
      "grad_norm": 2.78125,
      "learning_rate": 2.7858387137809555e-05,
      "loss": 0.8163,
      "step": 426750
    },
    {
      "epoch": 1.4956874042764967,
      "grad_norm": 2.78125,
      "learning_rate": 2.7857738109145853e-05,
      "loss": 0.8837,
      "step": 426760
    },
    {
      "epoch": 1.4957224517833925,
      "grad_norm": 2.890625,
      "learning_rate": 2.785708908048215e-05,
      "loss": 0.9937,
      "step": 426770
    },
    {
      "epoch": 1.495757499290288,
      "grad_norm": 3.15625,
      "learning_rate": 2.785644005181845e-05,
      "loss": 0.8656,
      "step": 426780
    },
    {
      "epoch": 1.4957925467971835,
      "grad_norm": 3.015625,
      "learning_rate": 2.7855791023154747e-05,
      "loss": 0.8141,
      "step": 426790
    },
    {
      "epoch": 1.495827594304079,
      "grad_norm": 2.96875,
      "learning_rate": 2.7855141994491045e-05,
      "loss": 0.8609,
      "step": 426800
    },
    {
      "epoch": 1.4958626418109748,
      "grad_norm": 2.515625,
      "learning_rate": 2.7854492965827343e-05,
      "loss": 0.7951,
      "step": 426810
    },
    {
      "epoch": 1.4958976893178704,
      "grad_norm": 3.125,
      "learning_rate": 2.785384393716364e-05,
      "loss": 0.907,
      "step": 426820
    },
    {
      "epoch": 1.495932736824766,
      "grad_norm": 3.078125,
      "learning_rate": 2.7853194908499943e-05,
      "loss": 0.9431,
      "step": 426830
    },
    {
      "epoch": 1.4959677843316617,
      "grad_norm": 2.921875,
      "learning_rate": 2.785254587983624e-05,
      "loss": 0.8591,
      "step": 426840
    },
    {
      "epoch": 1.4960028318385572,
      "grad_norm": 3.1875,
      "learning_rate": 2.785189685117254e-05,
      "loss": 0.8376,
      "step": 426850
    },
    {
      "epoch": 1.4960378793454527,
      "grad_norm": 3.15625,
      "learning_rate": 2.7851247822508837e-05,
      "loss": 0.8167,
      "step": 426860
    },
    {
      "epoch": 1.4960729268523485,
      "grad_norm": 2.921875,
      "learning_rate": 2.7850598793845135e-05,
      "loss": 0.8836,
      "step": 426870
    },
    {
      "epoch": 1.496107974359244,
      "grad_norm": 3.28125,
      "learning_rate": 2.7849949765181433e-05,
      "loss": 0.8756,
      "step": 426880
    },
    {
      "epoch": 1.4961430218661396,
      "grad_norm": 3.34375,
      "learning_rate": 2.784930073651773e-05,
      "loss": 0.8884,
      "step": 426890
    },
    {
      "epoch": 1.496178069373035,
      "grad_norm": 3.078125,
      "learning_rate": 2.784865170785403e-05,
      "loss": 0.849,
      "step": 426900
    },
    {
      "epoch": 1.4962131168799306,
      "grad_norm": 2.859375,
      "learning_rate": 2.7848002679190327e-05,
      "loss": 0.9389,
      "step": 426910
    },
    {
      "epoch": 1.4962481643868264,
      "grad_norm": 2.71875,
      "learning_rate": 2.7847353650526625e-05,
      "loss": 0.8464,
      "step": 426920
    },
    {
      "epoch": 1.496283211893722,
      "grad_norm": 3.21875,
      "learning_rate": 2.7846704621862923e-05,
      "loss": 0.7743,
      "step": 426930
    },
    {
      "epoch": 1.4963182594006175,
      "grad_norm": 2.671875,
      "learning_rate": 2.784605559319922e-05,
      "loss": 0.8603,
      "step": 426940
    },
    {
      "epoch": 1.4963533069075132,
      "grad_norm": 3.078125,
      "learning_rate": 2.784540656453552e-05,
      "loss": 0.8784,
      "step": 426950
    },
    {
      "epoch": 1.4963883544144088,
      "grad_norm": 2.9375,
      "learning_rate": 2.7844757535871817e-05,
      "loss": 0.9145,
      "step": 426960
    },
    {
      "epoch": 1.4964234019213043,
      "grad_norm": 3.3125,
      "learning_rate": 2.784410850720811e-05,
      "loss": 0.8478,
      "step": 426970
    },
    {
      "epoch": 1.4964584494282,
      "grad_norm": 2.25,
      "learning_rate": 2.784345947854441e-05,
      "loss": 0.7904,
      "step": 426980
    },
    {
      "epoch": 1.4964934969350956,
      "grad_norm": 2.875,
      "learning_rate": 2.7842810449880707e-05,
      "loss": 0.8889,
      "step": 426990
    },
    {
      "epoch": 1.4965285444419911,
      "grad_norm": 2.984375,
      "learning_rate": 2.7842161421217005e-05,
      "loss": 0.8652,
      "step": 427000
    },
    {
      "epoch": 1.4965635919488867,
      "grad_norm": 2.75,
      "learning_rate": 2.7841512392553303e-05,
      "loss": 0.7138,
      "step": 427010
    },
    {
      "epoch": 1.4965986394557822,
      "grad_norm": 3.03125,
      "learning_rate": 2.78408633638896e-05,
      "loss": 0.8299,
      "step": 427020
    },
    {
      "epoch": 1.496633686962678,
      "grad_norm": 2.90625,
      "learning_rate": 2.78402143352259e-05,
      "loss": 0.8394,
      "step": 427030
    },
    {
      "epoch": 1.4966687344695735,
      "grad_norm": 2.828125,
      "learning_rate": 2.7839565306562197e-05,
      "loss": 0.8806,
      "step": 427040
    },
    {
      "epoch": 1.496703781976469,
      "grad_norm": 2.765625,
      "learning_rate": 2.7838916277898495e-05,
      "loss": 0.7855,
      "step": 427050
    },
    {
      "epoch": 1.4967388294833648,
      "grad_norm": 2.859375,
      "learning_rate": 2.7838267249234797e-05,
      "loss": 0.9219,
      "step": 427060
    },
    {
      "epoch": 1.4967738769902603,
      "grad_norm": 3.21875,
      "learning_rate": 2.7837618220571095e-05,
      "loss": 0.9046,
      "step": 427070
    },
    {
      "epoch": 1.4968089244971559,
      "grad_norm": 2.78125,
      "learning_rate": 2.7836969191907393e-05,
      "loss": 0.8619,
      "step": 427080
    },
    {
      "epoch": 1.4968439720040516,
      "grad_norm": 2.984375,
      "learning_rate": 2.783632016324369e-05,
      "loss": 0.793,
      "step": 427090
    },
    {
      "epoch": 1.4968790195109472,
      "grad_norm": 2.828125,
      "learning_rate": 2.783567113457999e-05,
      "loss": 0.848,
      "step": 427100
    },
    {
      "epoch": 1.4969140670178427,
      "grad_norm": 2.5625,
      "learning_rate": 2.7835022105916287e-05,
      "loss": 0.7041,
      "step": 427110
    },
    {
      "epoch": 1.4969491145247382,
      "grad_norm": 3.109375,
      "learning_rate": 2.7834373077252585e-05,
      "loss": 0.8605,
      "step": 427120
    },
    {
      "epoch": 1.4969841620316338,
      "grad_norm": 2.453125,
      "learning_rate": 2.7833724048588883e-05,
      "loss": 0.7783,
      "step": 427130
    },
    {
      "epoch": 1.4970192095385295,
      "grad_norm": 2.65625,
      "learning_rate": 2.783307501992518e-05,
      "loss": 0.7856,
      "step": 427140
    },
    {
      "epoch": 1.497054257045425,
      "grad_norm": 3.21875,
      "learning_rate": 2.783242599126148e-05,
      "loss": 0.8427,
      "step": 427150
    },
    {
      "epoch": 1.4970893045523206,
      "grad_norm": 3.0,
      "learning_rate": 2.7831776962597777e-05,
      "loss": 0.7957,
      "step": 427160
    },
    {
      "epoch": 1.4971243520592163,
      "grad_norm": 3.3125,
      "learning_rate": 2.7831127933934075e-05,
      "loss": 0.8971,
      "step": 427170
    },
    {
      "epoch": 1.4971593995661119,
      "grad_norm": 3.3125,
      "learning_rate": 2.7830478905270373e-05,
      "loss": 0.8531,
      "step": 427180
    },
    {
      "epoch": 1.4971944470730074,
      "grad_norm": 2.625,
      "learning_rate": 2.782982987660667e-05,
      "loss": 0.7649,
      "step": 427190
    },
    {
      "epoch": 1.4972294945799032,
      "grad_norm": 3.5,
      "learning_rate": 2.7829180847942972e-05,
      "loss": 0.7807,
      "step": 427200
    },
    {
      "epoch": 1.4972645420867987,
      "grad_norm": 2.640625,
      "learning_rate": 2.782853181927927e-05,
      "loss": 0.7655,
      "step": 427210
    },
    {
      "epoch": 1.4972995895936942,
      "grad_norm": 3.046875,
      "learning_rate": 2.7827882790615568e-05,
      "loss": 0.8104,
      "step": 427220
    },
    {
      "epoch": 1.4973346371005898,
      "grad_norm": 2.296875,
      "learning_rate": 2.7827233761951866e-05,
      "loss": 0.7936,
      "step": 427230
    },
    {
      "epoch": 1.4973696846074853,
      "grad_norm": 3.140625,
      "learning_rate": 2.7826584733288164e-05,
      "loss": 0.8408,
      "step": 427240
    },
    {
      "epoch": 1.497404732114381,
      "grad_norm": 3.171875,
      "learning_rate": 2.7825935704624462e-05,
      "loss": 0.7864,
      "step": 427250
    },
    {
      "epoch": 1.4974397796212766,
      "grad_norm": 2.734375,
      "learning_rate": 2.782528667596076e-05,
      "loss": 0.7889,
      "step": 427260
    },
    {
      "epoch": 1.4974748271281721,
      "grad_norm": 3.15625,
      "learning_rate": 2.7824637647297058e-05,
      "loss": 0.8719,
      "step": 427270
    },
    {
      "epoch": 1.497509874635068,
      "grad_norm": 2.8125,
      "learning_rate": 2.7823988618633356e-05,
      "loss": 0.847,
      "step": 427280
    },
    {
      "epoch": 1.4975449221419634,
      "grad_norm": 3.25,
      "learning_rate": 2.7823339589969654e-05,
      "loss": 0.8866,
      "step": 427290
    },
    {
      "epoch": 1.497579969648859,
      "grad_norm": 2.53125,
      "learning_rate": 2.7822690561305952e-05,
      "loss": 0.9,
      "step": 427300
    },
    {
      "epoch": 1.4976150171557547,
      "grad_norm": 2.734375,
      "learning_rate": 2.782204153264225e-05,
      "loss": 0.7231,
      "step": 427310
    },
    {
      "epoch": 1.4976500646626503,
      "grad_norm": 2.921875,
      "learning_rate": 2.7821392503978548e-05,
      "loss": 0.7701,
      "step": 427320
    },
    {
      "epoch": 1.4976851121695458,
      "grad_norm": 3.140625,
      "learning_rate": 2.7820743475314846e-05,
      "loss": 0.8547,
      "step": 427330
    },
    {
      "epoch": 1.4977201596764413,
      "grad_norm": 2.921875,
      "learning_rate": 2.7820094446651147e-05,
      "loss": 0.8539,
      "step": 427340
    },
    {
      "epoch": 1.4977552071833369,
      "grad_norm": 3.453125,
      "learning_rate": 2.781944541798744e-05,
      "loss": 0.8101,
      "step": 427350
    },
    {
      "epoch": 1.4977902546902326,
      "grad_norm": 3.078125,
      "learning_rate": 2.7818796389323737e-05,
      "loss": 0.8671,
      "step": 427360
    },
    {
      "epoch": 1.4978253021971282,
      "grad_norm": 3.171875,
      "learning_rate": 2.7818147360660035e-05,
      "loss": 0.8997,
      "step": 427370
    },
    {
      "epoch": 1.4978603497040237,
      "grad_norm": 2.578125,
      "learning_rate": 2.7817498331996333e-05,
      "loss": 0.8794,
      "step": 427380
    },
    {
      "epoch": 1.4978953972109195,
      "grad_norm": 3.53125,
      "learning_rate": 2.781684930333263e-05,
      "loss": 0.8073,
      "step": 427390
    },
    {
      "epoch": 1.497930444717815,
      "grad_norm": 2.53125,
      "learning_rate": 2.781620027466893e-05,
      "loss": 0.8826,
      "step": 427400
    },
    {
      "epoch": 1.4979654922247105,
      "grad_norm": 3.078125,
      "learning_rate": 2.7815551246005227e-05,
      "loss": 0.9158,
      "step": 427410
    },
    {
      "epoch": 1.4980005397316063,
      "grad_norm": 3.109375,
      "learning_rate": 2.7814902217341525e-05,
      "loss": 0.8241,
      "step": 427420
    },
    {
      "epoch": 1.4980355872385018,
      "grad_norm": 2.90625,
      "learning_rate": 2.7814253188677826e-05,
      "loss": 0.8387,
      "step": 427430
    },
    {
      "epoch": 1.4980706347453974,
      "grad_norm": 2.890625,
      "learning_rate": 2.7813604160014124e-05,
      "loss": 0.9324,
      "step": 427440
    },
    {
      "epoch": 1.498105682252293,
      "grad_norm": 2.78125,
      "learning_rate": 2.7812955131350422e-05,
      "loss": 0.9171,
      "step": 427450
    },
    {
      "epoch": 1.4981407297591887,
      "grad_norm": 2.765625,
      "learning_rate": 2.781230610268672e-05,
      "loss": 0.8313,
      "step": 427460
    },
    {
      "epoch": 1.4981757772660842,
      "grad_norm": 2.4375,
      "learning_rate": 2.7811657074023018e-05,
      "loss": 0.8298,
      "step": 427470
    },
    {
      "epoch": 1.4982108247729797,
      "grad_norm": 3.421875,
      "learning_rate": 2.7811008045359316e-05,
      "loss": 0.8598,
      "step": 427480
    },
    {
      "epoch": 1.4982458722798753,
      "grad_norm": 2.8125,
      "learning_rate": 2.7810359016695614e-05,
      "loss": 0.7847,
      "step": 427490
    },
    {
      "epoch": 1.498280919786771,
      "grad_norm": 2.46875,
      "learning_rate": 2.7809709988031912e-05,
      "loss": 0.8339,
      "step": 427500
    },
    {
      "epoch": 1.4983159672936666,
      "grad_norm": 2.84375,
      "learning_rate": 2.780906095936821e-05,
      "loss": 0.8496,
      "step": 427510
    },
    {
      "epoch": 1.498351014800562,
      "grad_norm": 2.46875,
      "learning_rate": 2.7808411930704508e-05,
      "loss": 0.9296,
      "step": 427520
    },
    {
      "epoch": 1.4983860623074579,
      "grad_norm": 3.0,
      "learning_rate": 2.7807762902040806e-05,
      "loss": 0.8023,
      "step": 427530
    },
    {
      "epoch": 1.4984211098143534,
      "grad_norm": 2.71875,
      "learning_rate": 2.7807113873377104e-05,
      "loss": 0.8394,
      "step": 427540
    },
    {
      "epoch": 1.498456157321249,
      "grad_norm": 3.21875,
      "learning_rate": 2.7806464844713402e-05,
      "loss": 0.8165,
      "step": 427550
    },
    {
      "epoch": 1.4984912048281447,
      "grad_norm": 2.796875,
      "learning_rate": 2.78058158160497e-05,
      "loss": 0.8235,
      "step": 427560
    },
    {
      "epoch": 1.4985262523350402,
      "grad_norm": 2.859375,
      "learning_rate": 2.7805166787386e-05,
      "loss": 0.8483,
      "step": 427570
    },
    {
      "epoch": 1.4985612998419358,
      "grad_norm": 3.0,
      "learning_rate": 2.78045177587223e-05,
      "loss": 0.8756,
      "step": 427580
    },
    {
      "epoch": 1.4985963473488313,
      "grad_norm": 2.75,
      "learning_rate": 2.7803868730058597e-05,
      "loss": 0.7957,
      "step": 427590
    },
    {
      "epoch": 1.4986313948557268,
      "grad_norm": 2.703125,
      "learning_rate": 2.7803219701394895e-05,
      "loss": 0.8114,
      "step": 427600
    },
    {
      "epoch": 1.4986664423626226,
      "grad_norm": 2.609375,
      "learning_rate": 2.7802570672731193e-05,
      "loss": 0.8432,
      "step": 427610
    },
    {
      "epoch": 1.4987014898695181,
      "grad_norm": 2.53125,
      "learning_rate": 2.780192164406749e-05,
      "loss": 0.8374,
      "step": 427620
    },
    {
      "epoch": 1.4987365373764137,
      "grad_norm": 2.71875,
      "learning_rate": 2.780127261540379e-05,
      "loss": 0.8092,
      "step": 427630
    },
    {
      "epoch": 1.4987715848833094,
      "grad_norm": 2.65625,
      "learning_rate": 2.7800623586740087e-05,
      "loss": 0.8094,
      "step": 427640
    },
    {
      "epoch": 1.498806632390205,
      "grad_norm": 3.171875,
      "learning_rate": 2.7799974558076385e-05,
      "loss": 0.8469,
      "step": 427650
    },
    {
      "epoch": 1.4988416798971005,
      "grad_norm": 3.59375,
      "learning_rate": 2.7799325529412683e-05,
      "loss": 0.8478,
      "step": 427660
    },
    {
      "epoch": 1.4988767274039962,
      "grad_norm": 3.515625,
      "learning_rate": 2.779867650074898e-05,
      "loss": 0.866,
      "step": 427670
    },
    {
      "epoch": 1.4989117749108918,
      "grad_norm": 2.578125,
      "learning_rate": 2.779802747208528e-05,
      "loss": 0.8791,
      "step": 427680
    },
    {
      "epoch": 1.4989468224177873,
      "grad_norm": 2.890625,
      "learning_rate": 2.7797378443421577e-05,
      "loss": 0.8365,
      "step": 427690
    },
    {
      "epoch": 1.4989818699246829,
      "grad_norm": 3.421875,
      "learning_rate": 2.779672941475788e-05,
      "loss": 0.9478,
      "step": 427700
    },
    {
      "epoch": 1.4990169174315784,
      "grad_norm": 3.34375,
      "learning_rate": 2.7796080386094177e-05,
      "loss": 0.8231,
      "step": 427710
    },
    {
      "epoch": 1.4990519649384741,
      "grad_norm": 2.515625,
      "learning_rate": 2.7795431357430475e-05,
      "loss": 0.8312,
      "step": 427720
    },
    {
      "epoch": 1.4990870124453697,
      "grad_norm": 2.78125,
      "learning_rate": 2.7794782328766766e-05,
      "loss": 0.79,
      "step": 427730
    },
    {
      "epoch": 1.4991220599522652,
      "grad_norm": 3.09375,
      "learning_rate": 2.7794133300103064e-05,
      "loss": 0.9011,
      "step": 427740
    },
    {
      "epoch": 1.499157107459161,
      "grad_norm": 2.921875,
      "learning_rate": 2.7793484271439362e-05,
      "loss": 0.8078,
      "step": 427750
    },
    {
      "epoch": 1.4991921549660565,
      "grad_norm": 2.890625,
      "learning_rate": 2.779283524277566e-05,
      "loss": 0.8431,
      "step": 427760
    },
    {
      "epoch": 1.499227202472952,
      "grad_norm": 2.359375,
      "learning_rate": 2.7792186214111958e-05,
      "loss": 0.8315,
      "step": 427770
    },
    {
      "epoch": 1.4992622499798478,
      "grad_norm": 3.265625,
      "learning_rate": 2.7791537185448256e-05,
      "loss": 0.8293,
      "step": 427780
    },
    {
      "epoch": 1.4992972974867433,
      "grad_norm": 3.265625,
      "learning_rate": 2.7790888156784557e-05,
      "loss": 0.8411,
      "step": 427790
    },
    {
      "epoch": 1.4993323449936389,
      "grad_norm": 3.25,
      "learning_rate": 2.7790239128120855e-05,
      "loss": 0.8678,
      "step": 427800
    },
    {
      "epoch": 1.4993673925005344,
      "grad_norm": 3.0625,
      "learning_rate": 2.7789590099457153e-05,
      "loss": 0.8246,
      "step": 427810
    },
    {
      "epoch": 1.49940244000743,
      "grad_norm": 2.984375,
      "learning_rate": 2.778894107079345e-05,
      "loss": 0.8503,
      "step": 427820
    },
    {
      "epoch": 1.4994374875143257,
      "grad_norm": 2.84375,
      "learning_rate": 2.778829204212975e-05,
      "loss": 0.9125,
      "step": 427830
    },
    {
      "epoch": 1.4994725350212212,
      "grad_norm": 2.8125,
      "learning_rate": 2.7787643013466047e-05,
      "loss": 0.8329,
      "step": 427840
    },
    {
      "epoch": 1.4995075825281168,
      "grad_norm": 2.765625,
      "learning_rate": 2.7786993984802345e-05,
      "loss": 0.88,
      "step": 427850
    },
    {
      "epoch": 1.4995426300350125,
      "grad_norm": 2.453125,
      "learning_rate": 2.7786344956138643e-05,
      "loss": 0.7646,
      "step": 427860
    },
    {
      "epoch": 1.499577677541908,
      "grad_norm": 2.890625,
      "learning_rate": 2.778569592747494e-05,
      "loss": 0.7815,
      "step": 427870
    },
    {
      "epoch": 1.4996127250488036,
      "grad_norm": 2.890625,
      "learning_rate": 2.778504689881124e-05,
      "loss": 0.907,
      "step": 427880
    },
    {
      "epoch": 1.4996477725556994,
      "grad_norm": 2.765625,
      "learning_rate": 2.7784397870147537e-05,
      "loss": 0.9001,
      "step": 427890
    },
    {
      "epoch": 1.499682820062595,
      "grad_norm": 2.359375,
      "learning_rate": 2.7783748841483835e-05,
      "loss": 0.8676,
      "step": 427900
    },
    {
      "epoch": 1.4997178675694904,
      "grad_norm": 3.0,
      "learning_rate": 2.7783099812820133e-05,
      "loss": 0.8424,
      "step": 427910
    },
    {
      "epoch": 1.499752915076386,
      "grad_norm": 3.09375,
      "learning_rate": 2.778245078415643e-05,
      "loss": 0.8177,
      "step": 427920
    },
    {
      "epoch": 1.4997879625832815,
      "grad_norm": 2.859375,
      "learning_rate": 2.7781801755492733e-05,
      "loss": 0.8403,
      "step": 427930
    },
    {
      "epoch": 1.4998230100901773,
      "grad_norm": 4.0625,
      "learning_rate": 2.778115272682903e-05,
      "loss": 0.7572,
      "step": 427940
    },
    {
      "epoch": 1.4998580575970728,
      "grad_norm": 3.40625,
      "learning_rate": 2.778050369816533e-05,
      "loss": 0.8937,
      "step": 427950
    },
    {
      "epoch": 1.4998931051039683,
      "grad_norm": 3.078125,
      "learning_rate": 2.7779854669501627e-05,
      "loss": 0.75,
      "step": 427960
    },
    {
      "epoch": 1.499928152610864,
      "grad_norm": 2.8125,
      "learning_rate": 2.7779205640837925e-05,
      "loss": 0.852,
      "step": 427970
    },
    {
      "epoch": 1.4999632001177596,
      "grad_norm": 2.875,
      "learning_rate": 2.7778556612174223e-05,
      "loss": 0.8518,
      "step": 427980
    },
    {
      "epoch": 1.4999982476246552,
      "grad_norm": 2.859375,
      "learning_rate": 2.777790758351052e-05,
      "loss": 0.8408,
      "step": 427990
    },
    {
      "epoch": 1.500033295131551,
      "grad_norm": 2.84375,
      "learning_rate": 2.777725855484682e-05,
      "loss": 0.881,
      "step": 428000
    },
    {
      "epoch": 1.5000683426384465,
      "grad_norm": 2.71875,
      "learning_rate": 2.7776609526183117e-05,
      "loss": 0.82,
      "step": 428010
    },
    {
      "epoch": 1.500103390145342,
      "grad_norm": 3.046875,
      "learning_rate": 2.7775960497519415e-05,
      "loss": 0.8685,
      "step": 428020
    },
    {
      "epoch": 1.5001384376522378,
      "grad_norm": 2.65625,
      "learning_rate": 2.7775311468855713e-05,
      "loss": 0.8425,
      "step": 428030
    },
    {
      "epoch": 1.500173485159133,
      "grad_norm": 3.078125,
      "learning_rate": 2.777466244019201e-05,
      "loss": 0.846,
      "step": 428040
    },
    {
      "epoch": 1.5002085326660288,
      "grad_norm": 3.34375,
      "learning_rate": 2.777401341152831e-05,
      "loss": 0.7793,
      "step": 428050
    },
    {
      "epoch": 1.5002435801729244,
      "grad_norm": 2.859375,
      "learning_rate": 2.7773364382864607e-05,
      "loss": 0.7703,
      "step": 428060
    },
    {
      "epoch": 1.50027862767982,
      "grad_norm": 3.0,
      "learning_rate": 2.7772715354200908e-05,
      "loss": 0.8125,
      "step": 428070
    },
    {
      "epoch": 1.5003136751867157,
      "grad_norm": 2.96875,
      "learning_rate": 2.7772066325537206e-05,
      "loss": 0.9311,
      "step": 428080
    },
    {
      "epoch": 1.5003487226936112,
      "grad_norm": 2.984375,
      "learning_rate": 2.7771417296873504e-05,
      "loss": 0.8394,
      "step": 428090
    },
    {
      "epoch": 1.5003837702005067,
      "grad_norm": 3.28125,
      "learning_rate": 2.7770768268209795e-05,
      "loss": 0.8462,
      "step": 428100
    },
    {
      "epoch": 1.5004188177074025,
      "grad_norm": 2.59375,
      "learning_rate": 2.7770119239546093e-05,
      "loss": 0.843,
      "step": 428110
    },
    {
      "epoch": 1.500453865214298,
      "grad_norm": 3.421875,
      "learning_rate": 2.776947021088239e-05,
      "loss": 0.8536,
      "step": 428120
    },
    {
      "epoch": 1.5004889127211936,
      "grad_norm": 2.453125,
      "learning_rate": 2.776882118221869e-05,
      "loss": 0.8182,
      "step": 428130
    },
    {
      "epoch": 1.5005239602280893,
      "grad_norm": 3.03125,
      "learning_rate": 2.7768172153554987e-05,
      "loss": 0.816,
      "step": 428140
    },
    {
      "epoch": 1.5005590077349846,
      "grad_norm": 3.140625,
      "learning_rate": 2.7767523124891285e-05,
      "loss": 0.9109,
      "step": 428150
    },
    {
      "epoch": 1.5005940552418804,
      "grad_norm": 2.8125,
      "learning_rate": 2.7766874096227587e-05,
      "loss": 0.8275,
      "step": 428160
    },
    {
      "epoch": 1.500629102748776,
      "grad_norm": 2.890625,
      "learning_rate": 2.7766225067563885e-05,
      "loss": 0.806,
      "step": 428170
    },
    {
      "epoch": 1.5006641502556715,
      "grad_norm": 2.609375,
      "learning_rate": 2.7765576038900183e-05,
      "loss": 0.8179,
      "step": 428180
    },
    {
      "epoch": 1.5006991977625672,
      "grad_norm": 3.0625,
      "learning_rate": 2.776492701023648e-05,
      "loss": 0.8905,
      "step": 428190
    },
    {
      "epoch": 1.5007342452694628,
      "grad_norm": 2.375,
      "learning_rate": 2.776427798157278e-05,
      "loss": 0.7635,
      "step": 428200
    },
    {
      "epoch": 1.5007692927763583,
      "grad_norm": 2.9375,
      "learning_rate": 2.7763628952909077e-05,
      "loss": 0.8382,
      "step": 428210
    },
    {
      "epoch": 1.500804340283254,
      "grad_norm": 2.71875,
      "learning_rate": 2.7762979924245375e-05,
      "loss": 0.8139,
      "step": 428220
    },
    {
      "epoch": 1.5008393877901496,
      "grad_norm": 3.1875,
      "learning_rate": 2.7762330895581673e-05,
      "loss": 0.9072,
      "step": 428230
    },
    {
      "epoch": 1.5008744352970451,
      "grad_norm": 2.8125,
      "learning_rate": 2.776168186691797e-05,
      "loss": 0.8887,
      "step": 428240
    },
    {
      "epoch": 1.5009094828039409,
      "grad_norm": 2.65625,
      "learning_rate": 2.776103283825427e-05,
      "loss": 0.9406,
      "step": 428250
    },
    {
      "epoch": 1.5009445303108362,
      "grad_norm": 3.0625,
      "learning_rate": 2.7760383809590567e-05,
      "loss": 0.8019,
      "step": 428260
    },
    {
      "epoch": 1.500979577817732,
      "grad_norm": 2.359375,
      "learning_rate": 2.7759734780926865e-05,
      "loss": 0.7703,
      "step": 428270
    },
    {
      "epoch": 1.5010146253246275,
      "grad_norm": 3.0625,
      "learning_rate": 2.7759085752263163e-05,
      "loss": 0.8317,
      "step": 428280
    },
    {
      "epoch": 1.501049672831523,
      "grad_norm": 2.703125,
      "learning_rate": 2.775843672359946e-05,
      "loss": 0.8333,
      "step": 428290
    },
    {
      "epoch": 1.5010847203384188,
      "grad_norm": 3.0,
      "learning_rate": 2.7757787694935762e-05,
      "loss": 0.8526,
      "step": 428300
    },
    {
      "epoch": 1.5011197678453143,
      "grad_norm": 2.859375,
      "learning_rate": 2.775713866627206e-05,
      "loss": 0.8965,
      "step": 428310
    },
    {
      "epoch": 1.5011548153522098,
      "grad_norm": 2.46875,
      "learning_rate": 2.7756489637608358e-05,
      "loss": 0.9119,
      "step": 428320
    },
    {
      "epoch": 1.5011898628591056,
      "grad_norm": 2.703125,
      "learning_rate": 2.7755840608944656e-05,
      "loss": 0.8842,
      "step": 428330
    },
    {
      "epoch": 1.5012249103660011,
      "grad_norm": 2.859375,
      "learning_rate": 2.7755191580280954e-05,
      "loss": 0.8684,
      "step": 428340
    },
    {
      "epoch": 1.5012599578728967,
      "grad_norm": 2.953125,
      "learning_rate": 2.7754542551617252e-05,
      "loss": 0.7341,
      "step": 428350
    },
    {
      "epoch": 1.5012950053797924,
      "grad_norm": 2.8125,
      "learning_rate": 2.775389352295355e-05,
      "loss": 0.8545,
      "step": 428360
    },
    {
      "epoch": 1.5013300528866877,
      "grad_norm": 3.421875,
      "learning_rate": 2.7753244494289848e-05,
      "loss": 0.7351,
      "step": 428370
    },
    {
      "epoch": 1.5013651003935835,
      "grad_norm": 2.84375,
      "learning_rate": 2.7752595465626146e-05,
      "loss": 0.8582,
      "step": 428380
    },
    {
      "epoch": 1.5014001479004793,
      "grad_norm": 3.09375,
      "learning_rate": 2.7751946436962444e-05,
      "loss": 0.8106,
      "step": 428390
    },
    {
      "epoch": 1.5014351954073746,
      "grad_norm": 2.4375,
      "learning_rate": 2.7751297408298742e-05,
      "loss": 0.789,
      "step": 428400
    },
    {
      "epoch": 1.5014702429142703,
      "grad_norm": 2.875,
      "learning_rate": 2.775064837963504e-05,
      "loss": 0.8931,
      "step": 428410
    },
    {
      "epoch": 1.5015052904211659,
      "grad_norm": 2.8125,
      "learning_rate": 2.7749999350971338e-05,
      "loss": 0.9119,
      "step": 428420
    },
    {
      "epoch": 1.5015403379280614,
      "grad_norm": 3.046875,
      "learning_rate": 2.7749350322307636e-05,
      "loss": 0.8937,
      "step": 428430
    },
    {
      "epoch": 1.5015753854349572,
      "grad_norm": 2.96875,
      "learning_rate": 2.7748701293643938e-05,
      "loss": 0.7344,
      "step": 428440
    },
    {
      "epoch": 1.5016104329418527,
      "grad_norm": 2.75,
      "learning_rate": 2.7748052264980236e-05,
      "loss": 0.9072,
      "step": 428450
    },
    {
      "epoch": 1.5016454804487482,
      "grad_norm": 2.859375,
      "learning_rate": 2.7747403236316534e-05,
      "loss": 0.8939,
      "step": 428460
    },
    {
      "epoch": 1.501680527955644,
      "grad_norm": 3.125,
      "learning_rate": 2.774675420765283e-05,
      "loss": 0.811,
      "step": 428470
    },
    {
      "epoch": 1.5017155754625393,
      "grad_norm": 2.8125,
      "learning_rate": 2.7746105178989123e-05,
      "loss": 0.8004,
      "step": 428480
    },
    {
      "epoch": 1.501750622969435,
      "grad_norm": 2.921875,
      "learning_rate": 2.774545615032542e-05,
      "loss": 0.8273,
      "step": 428490
    },
    {
      "epoch": 1.5017856704763308,
      "grad_norm": 2.75,
      "learning_rate": 2.774480712166172e-05,
      "loss": 0.8213,
      "step": 428500
    },
    {
      "epoch": 1.5018207179832261,
      "grad_norm": 2.84375,
      "learning_rate": 2.7744158092998017e-05,
      "loss": 0.84,
      "step": 428510
    },
    {
      "epoch": 1.501855765490122,
      "grad_norm": 2.875,
      "learning_rate": 2.7743509064334315e-05,
      "loss": 0.8426,
      "step": 428520
    },
    {
      "epoch": 1.5018908129970174,
      "grad_norm": 2.8125,
      "learning_rate": 2.7742860035670616e-05,
      "loss": 0.8014,
      "step": 428530
    },
    {
      "epoch": 1.501925860503913,
      "grad_norm": 3.21875,
      "learning_rate": 2.7742211007006914e-05,
      "loss": 0.7983,
      "step": 428540
    },
    {
      "epoch": 1.5019609080108087,
      "grad_norm": 3.234375,
      "learning_rate": 2.7741561978343212e-05,
      "loss": 0.8937,
      "step": 428550
    },
    {
      "epoch": 1.5019959555177043,
      "grad_norm": 3.125,
      "learning_rate": 2.774091294967951e-05,
      "loss": 0.7976,
      "step": 428560
    },
    {
      "epoch": 1.5020310030245998,
      "grad_norm": 2.65625,
      "learning_rate": 2.7740263921015808e-05,
      "loss": 0.8808,
      "step": 428570
    },
    {
      "epoch": 1.5020660505314956,
      "grad_norm": 3.03125,
      "learning_rate": 2.7739614892352106e-05,
      "loss": 0.8514,
      "step": 428580
    },
    {
      "epoch": 1.502101098038391,
      "grad_norm": 2.90625,
      "learning_rate": 2.7738965863688404e-05,
      "loss": 0.839,
      "step": 428590
    },
    {
      "epoch": 1.5021361455452866,
      "grad_norm": 3.09375,
      "learning_rate": 2.7738316835024702e-05,
      "loss": 0.8192,
      "step": 428600
    },
    {
      "epoch": 1.5021711930521824,
      "grad_norm": 2.890625,
      "learning_rate": 2.7737667806361e-05,
      "loss": 0.8304,
      "step": 428610
    },
    {
      "epoch": 1.5022062405590777,
      "grad_norm": 3.125,
      "learning_rate": 2.7737018777697298e-05,
      "loss": 0.8676,
      "step": 428620
    },
    {
      "epoch": 1.5022412880659735,
      "grad_norm": 3.09375,
      "learning_rate": 2.7736369749033596e-05,
      "loss": 0.8861,
      "step": 428630
    },
    {
      "epoch": 1.502276335572869,
      "grad_norm": 2.5,
      "learning_rate": 2.7735720720369894e-05,
      "loss": 0.8366,
      "step": 428640
    },
    {
      "epoch": 1.5023113830797645,
      "grad_norm": 3.046875,
      "learning_rate": 2.7735071691706192e-05,
      "loss": 0.8631,
      "step": 428650
    },
    {
      "epoch": 1.5023464305866603,
      "grad_norm": 2.453125,
      "learning_rate": 2.773442266304249e-05,
      "loss": 0.8905,
      "step": 428660
    },
    {
      "epoch": 1.5023814780935558,
      "grad_norm": 3.1875,
      "learning_rate": 2.773377363437879e-05,
      "loss": 0.8394,
      "step": 428670
    },
    {
      "epoch": 1.5024165256004514,
      "grad_norm": 2.859375,
      "learning_rate": 2.773312460571509e-05,
      "loss": 0.8179,
      "step": 428680
    },
    {
      "epoch": 1.5024515731073471,
      "grad_norm": 2.875,
      "learning_rate": 2.7732475577051388e-05,
      "loss": 0.8633,
      "step": 428690
    },
    {
      "epoch": 1.5024866206142427,
      "grad_norm": 3.203125,
      "learning_rate": 2.7731826548387686e-05,
      "loss": 0.7618,
      "step": 428700
    },
    {
      "epoch": 1.5025216681211382,
      "grad_norm": 3.078125,
      "learning_rate": 2.7731177519723984e-05,
      "loss": 0.801,
      "step": 428710
    },
    {
      "epoch": 1.502556715628034,
      "grad_norm": 2.953125,
      "learning_rate": 2.773052849106028e-05,
      "loss": 0.8329,
      "step": 428720
    },
    {
      "epoch": 1.5025917631349293,
      "grad_norm": 2.703125,
      "learning_rate": 2.772987946239658e-05,
      "loss": 0.7984,
      "step": 428730
    },
    {
      "epoch": 1.502626810641825,
      "grad_norm": 2.546875,
      "learning_rate": 2.7729230433732878e-05,
      "loss": 0.8426,
      "step": 428740
    },
    {
      "epoch": 1.5026618581487206,
      "grad_norm": 3.0,
      "learning_rate": 2.7728581405069176e-05,
      "loss": 0.8653,
      "step": 428750
    },
    {
      "epoch": 1.502696905655616,
      "grad_norm": 2.734375,
      "learning_rate": 2.7727932376405474e-05,
      "loss": 0.8736,
      "step": 428760
    },
    {
      "epoch": 1.5027319531625118,
      "grad_norm": 2.9375,
      "learning_rate": 2.772728334774177e-05,
      "loss": 0.8263,
      "step": 428770
    },
    {
      "epoch": 1.5027670006694074,
      "grad_norm": 2.5,
      "learning_rate": 2.772663431907807e-05,
      "loss": 0.7483,
      "step": 428780
    },
    {
      "epoch": 1.502802048176303,
      "grad_norm": 2.6875,
      "learning_rate": 2.7725985290414368e-05,
      "loss": 0.7943,
      "step": 428790
    },
    {
      "epoch": 1.5028370956831987,
      "grad_norm": 2.546875,
      "learning_rate": 2.772533626175067e-05,
      "loss": 0.8228,
      "step": 428800
    },
    {
      "epoch": 1.5028721431900942,
      "grad_norm": 3.078125,
      "learning_rate": 2.7724687233086967e-05,
      "loss": 0.8355,
      "step": 428810
    },
    {
      "epoch": 1.5029071906969897,
      "grad_norm": 3.234375,
      "learning_rate": 2.7724038204423265e-05,
      "loss": 0.8816,
      "step": 428820
    },
    {
      "epoch": 1.5029422382038855,
      "grad_norm": 3.453125,
      "learning_rate": 2.7723389175759563e-05,
      "loss": 0.8215,
      "step": 428830
    },
    {
      "epoch": 1.5029772857107808,
      "grad_norm": 2.84375,
      "learning_rate": 2.772274014709586e-05,
      "loss": 0.8828,
      "step": 428840
    },
    {
      "epoch": 1.5030123332176766,
      "grad_norm": 2.828125,
      "learning_rate": 2.7722091118432152e-05,
      "loss": 0.806,
      "step": 428850
    },
    {
      "epoch": 1.5030473807245721,
      "grad_norm": 2.890625,
      "learning_rate": 2.772144208976845e-05,
      "loss": 0.8673,
      "step": 428860
    },
    {
      "epoch": 1.5030824282314676,
      "grad_norm": 2.75,
      "learning_rate": 2.7720793061104748e-05,
      "loss": 0.7621,
      "step": 428870
    },
    {
      "epoch": 1.5031174757383634,
      "grad_norm": 3.25,
      "learning_rate": 2.7720144032441046e-05,
      "loss": 0.8147,
      "step": 428880
    },
    {
      "epoch": 1.503152523245259,
      "grad_norm": 2.921875,
      "learning_rate": 2.7719495003777348e-05,
      "loss": 0.8207,
      "step": 428890
    },
    {
      "epoch": 1.5031875707521545,
      "grad_norm": 3.125,
      "learning_rate": 2.7718845975113646e-05,
      "loss": 0.8464,
      "step": 428900
    },
    {
      "epoch": 1.5032226182590502,
      "grad_norm": 3.015625,
      "learning_rate": 2.7718196946449944e-05,
      "loss": 0.8034,
      "step": 428910
    },
    {
      "epoch": 1.5032576657659458,
      "grad_norm": 2.984375,
      "learning_rate": 2.771754791778624e-05,
      "loss": 0.8724,
      "step": 428920
    },
    {
      "epoch": 1.5032927132728413,
      "grad_norm": 2.75,
      "learning_rate": 2.771689888912254e-05,
      "loss": 0.8874,
      "step": 428930
    },
    {
      "epoch": 1.503327760779737,
      "grad_norm": 2.90625,
      "learning_rate": 2.7716249860458838e-05,
      "loss": 0.9187,
      "step": 428940
    },
    {
      "epoch": 1.5033628082866324,
      "grad_norm": 3.15625,
      "learning_rate": 2.7715600831795136e-05,
      "loss": 0.9193,
      "step": 428950
    },
    {
      "epoch": 1.5033978557935281,
      "grad_norm": 2.703125,
      "learning_rate": 2.7714951803131434e-05,
      "loss": 0.8621,
      "step": 428960
    },
    {
      "epoch": 1.5034329033004237,
      "grad_norm": 2.6875,
      "learning_rate": 2.771430277446773e-05,
      "loss": 0.8691,
      "step": 428970
    },
    {
      "epoch": 1.5034679508073192,
      "grad_norm": 2.75,
      "learning_rate": 2.771365374580403e-05,
      "loss": 0.8599,
      "step": 428980
    },
    {
      "epoch": 1.503502998314215,
      "grad_norm": 3.046875,
      "learning_rate": 2.7713004717140328e-05,
      "loss": 0.8018,
      "step": 428990
    },
    {
      "epoch": 1.5035380458211105,
      "grad_norm": 2.703125,
      "learning_rate": 2.7712355688476626e-05,
      "loss": 0.8973,
      "step": 429000
    },
    {
      "epoch": 1.503573093328006,
      "grad_norm": 2.5625,
      "learning_rate": 2.7711706659812924e-05,
      "loss": 0.7955,
      "step": 429010
    },
    {
      "epoch": 1.5036081408349018,
      "grad_norm": 2.625,
      "learning_rate": 2.771105763114922e-05,
      "loss": 0.8304,
      "step": 429020
    },
    {
      "epoch": 1.5036431883417973,
      "grad_norm": 2.8125,
      "learning_rate": 2.7710408602485523e-05,
      "loss": 0.8643,
      "step": 429030
    },
    {
      "epoch": 1.5036782358486929,
      "grad_norm": 3.109375,
      "learning_rate": 2.770975957382182e-05,
      "loss": 0.8783,
      "step": 429040
    },
    {
      "epoch": 1.5037132833555886,
      "grad_norm": 2.953125,
      "learning_rate": 2.770911054515812e-05,
      "loss": 0.7954,
      "step": 429050
    },
    {
      "epoch": 1.503748330862484,
      "grad_norm": 2.765625,
      "learning_rate": 2.7708461516494417e-05,
      "loss": 0.8116,
      "step": 429060
    },
    {
      "epoch": 1.5037833783693797,
      "grad_norm": 2.9375,
      "learning_rate": 2.7707812487830715e-05,
      "loss": 0.8605,
      "step": 429070
    },
    {
      "epoch": 1.5038184258762755,
      "grad_norm": 2.71875,
      "learning_rate": 2.7707163459167013e-05,
      "loss": 0.8504,
      "step": 429080
    },
    {
      "epoch": 1.5038534733831708,
      "grad_norm": 3.171875,
      "learning_rate": 2.770651443050331e-05,
      "loss": 0.8166,
      "step": 429090
    },
    {
      "epoch": 1.5038885208900665,
      "grad_norm": 2.828125,
      "learning_rate": 2.770586540183961e-05,
      "loss": 0.7878,
      "step": 429100
    },
    {
      "epoch": 1.503923568396962,
      "grad_norm": 3.046875,
      "learning_rate": 2.7705216373175907e-05,
      "loss": 0.9121,
      "step": 429110
    },
    {
      "epoch": 1.5039586159038576,
      "grad_norm": 2.859375,
      "learning_rate": 2.7704567344512205e-05,
      "loss": 0.8564,
      "step": 429120
    },
    {
      "epoch": 1.5039936634107534,
      "grad_norm": 3.171875,
      "learning_rate": 2.7703918315848503e-05,
      "loss": 0.8257,
      "step": 429130
    },
    {
      "epoch": 1.504028710917649,
      "grad_norm": 2.96875,
      "learning_rate": 2.77032692871848e-05,
      "loss": 0.8481,
      "step": 429140
    },
    {
      "epoch": 1.5040637584245444,
      "grad_norm": 2.515625,
      "learning_rate": 2.77026202585211e-05,
      "loss": 0.8022,
      "step": 429150
    },
    {
      "epoch": 1.5040988059314402,
      "grad_norm": 2.828125,
      "learning_rate": 2.7701971229857397e-05,
      "loss": 0.8887,
      "step": 429160
    },
    {
      "epoch": 1.5041338534383355,
      "grad_norm": 3.203125,
      "learning_rate": 2.77013222011937e-05,
      "loss": 0.8777,
      "step": 429170
    },
    {
      "epoch": 1.5041689009452313,
      "grad_norm": 2.734375,
      "learning_rate": 2.7700673172529996e-05,
      "loss": 0.8228,
      "step": 429180
    },
    {
      "epoch": 1.504203948452127,
      "grad_norm": 2.828125,
      "learning_rate": 2.7700024143866294e-05,
      "loss": 0.8472,
      "step": 429190
    },
    {
      "epoch": 1.5042389959590223,
      "grad_norm": 2.5,
      "learning_rate": 2.7699375115202592e-05,
      "loss": 0.854,
      "step": 429200
    },
    {
      "epoch": 1.504274043465918,
      "grad_norm": 2.921875,
      "learning_rate": 2.769872608653889e-05,
      "loss": 0.7878,
      "step": 429210
    },
    {
      "epoch": 1.5043090909728136,
      "grad_norm": 2.453125,
      "learning_rate": 2.769807705787519e-05,
      "loss": 0.9102,
      "step": 429220
    },
    {
      "epoch": 1.5043441384797092,
      "grad_norm": 2.8125,
      "learning_rate": 2.769742802921148e-05,
      "loss": 0.8667,
      "step": 429230
    },
    {
      "epoch": 1.504379185986605,
      "grad_norm": 3.09375,
      "learning_rate": 2.7696779000547778e-05,
      "loss": 0.8968,
      "step": 429240
    },
    {
      "epoch": 1.5044142334935005,
      "grad_norm": 3.0,
      "learning_rate": 2.7696129971884076e-05,
      "loss": 0.8834,
      "step": 429250
    },
    {
      "epoch": 1.504449281000396,
      "grad_norm": 3.234375,
      "learning_rate": 2.7695480943220377e-05,
      "loss": 0.8491,
      "step": 429260
    },
    {
      "epoch": 1.5044843285072917,
      "grad_norm": 2.9375,
      "learning_rate": 2.7694831914556675e-05,
      "loss": 0.9093,
      "step": 429270
    },
    {
      "epoch": 1.5045193760141873,
      "grad_norm": 2.765625,
      "learning_rate": 2.7694182885892973e-05,
      "loss": 0.8541,
      "step": 429280
    },
    {
      "epoch": 1.5045544235210828,
      "grad_norm": 2.421875,
      "learning_rate": 2.769353385722927e-05,
      "loss": 0.8488,
      "step": 429290
    },
    {
      "epoch": 1.5045894710279786,
      "grad_norm": 3.1875,
      "learning_rate": 2.769288482856557e-05,
      "loss": 0.8022,
      "step": 429300
    },
    {
      "epoch": 1.504624518534874,
      "grad_norm": 3.5,
      "learning_rate": 2.7692235799901867e-05,
      "loss": 0.9182,
      "step": 429310
    },
    {
      "epoch": 1.5046595660417696,
      "grad_norm": 2.78125,
      "learning_rate": 2.7691586771238165e-05,
      "loss": 0.8283,
      "step": 429320
    },
    {
      "epoch": 1.5046946135486652,
      "grad_norm": 2.75,
      "learning_rate": 2.7690937742574463e-05,
      "loss": 0.853,
      "step": 429330
    },
    {
      "epoch": 1.5047296610555607,
      "grad_norm": 2.609375,
      "learning_rate": 2.769028871391076e-05,
      "loss": 0.8055,
      "step": 429340
    },
    {
      "epoch": 1.5047647085624565,
      "grad_norm": 2.71875,
      "learning_rate": 2.768963968524706e-05,
      "loss": 0.8514,
      "step": 429350
    },
    {
      "epoch": 1.504799756069352,
      "grad_norm": 2.34375,
      "learning_rate": 2.7688990656583357e-05,
      "loss": 0.8556,
      "step": 429360
    },
    {
      "epoch": 1.5048348035762475,
      "grad_norm": 2.859375,
      "learning_rate": 2.7688341627919655e-05,
      "loss": 0.7567,
      "step": 429370
    },
    {
      "epoch": 1.5048698510831433,
      "grad_norm": 2.671875,
      "learning_rate": 2.7687692599255953e-05,
      "loss": 0.8658,
      "step": 429380
    },
    {
      "epoch": 1.5049048985900388,
      "grad_norm": 3.078125,
      "learning_rate": 2.768704357059225e-05,
      "loss": 0.8862,
      "step": 429390
    },
    {
      "epoch": 1.5049399460969344,
      "grad_norm": 3.1875,
      "learning_rate": 2.7686394541928552e-05,
      "loss": 0.8677,
      "step": 429400
    },
    {
      "epoch": 1.5049749936038301,
      "grad_norm": 2.828125,
      "learning_rate": 2.768574551326485e-05,
      "loss": 0.7649,
      "step": 429410
    },
    {
      "epoch": 1.5050100411107254,
      "grad_norm": 2.375,
      "learning_rate": 2.768509648460115e-05,
      "loss": 0.8008,
      "step": 429420
    },
    {
      "epoch": 1.5050450886176212,
      "grad_norm": 2.859375,
      "learning_rate": 2.7684447455937446e-05,
      "loss": 0.8882,
      "step": 429430
    },
    {
      "epoch": 1.5050801361245167,
      "grad_norm": 2.75,
      "learning_rate": 2.7683798427273744e-05,
      "loss": 0.845,
      "step": 429440
    },
    {
      "epoch": 1.5051151836314123,
      "grad_norm": 2.515625,
      "learning_rate": 2.7683149398610042e-05,
      "loss": 0.8768,
      "step": 429450
    },
    {
      "epoch": 1.505150231138308,
      "grad_norm": 3.515625,
      "learning_rate": 2.768250036994634e-05,
      "loss": 0.8704,
      "step": 429460
    },
    {
      "epoch": 1.5051852786452036,
      "grad_norm": 2.796875,
      "learning_rate": 2.768185134128264e-05,
      "loss": 0.8401,
      "step": 429470
    },
    {
      "epoch": 1.505220326152099,
      "grad_norm": 2.8125,
      "learning_rate": 2.7681202312618936e-05,
      "loss": 0.7514,
      "step": 429480
    },
    {
      "epoch": 1.5052553736589949,
      "grad_norm": 2.90625,
      "learning_rate": 2.7680553283955234e-05,
      "loss": 0.877,
      "step": 429490
    },
    {
      "epoch": 1.5052904211658904,
      "grad_norm": 3.21875,
      "learning_rate": 2.7679904255291532e-05,
      "loss": 0.9399,
      "step": 429500
    },
    {
      "epoch": 1.505325468672786,
      "grad_norm": 3.0,
      "learning_rate": 2.767925522662783e-05,
      "loss": 0.8382,
      "step": 429510
    },
    {
      "epoch": 1.5053605161796817,
      "grad_norm": 2.84375,
      "learning_rate": 2.767860619796413e-05,
      "loss": 0.9004,
      "step": 429520
    },
    {
      "epoch": 1.505395563686577,
      "grad_norm": 3.234375,
      "learning_rate": 2.7677957169300426e-05,
      "loss": 0.8963,
      "step": 429530
    },
    {
      "epoch": 1.5054306111934728,
      "grad_norm": 2.890625,
      "learning_rate": 2.7677308140636728e-05,
      "loss": 0.8893,
      "step": 429540
    },
    {
      "epoch": 1.5054656587003683,
      "grad_norm": 2.625,
      "learning_rate": 2.7676659111973026e-05,
      "loss": 0.8387,
      "step": 429550
    },
    {
      "epoch": 1.5055007062072638,
      "grad_norm": 3.328125,
      "learning_rate": 2.7676010083309324e-05,
      "loss": 0.8985,
      "step": 429560
    },
    {
      "epoch": 1.5055357537141596,
      "grad_norm": 2.578125,
      "learning_rate": 2.7675361054645622e-05,
      "loss": 0.8507,
      "step": 429570
    },
    {
      "epoch": 1.5055708012210551,
      "grad_norm": 2.640625,
      "learning_rate": 2.767471202598192e-05,
      "loss": 0.8181,
      "step": 429580
    },
    {
      "epoch": 1.5056058487279507,
      "grad_norm": 3.21875,
      "learning_rate": 2.7674062997318218e-05,
      "loss": 0.8468,
      "step": 429590
    },
    {
      "epoch": 1.5056408962348464,
      "grad_norm": 2.90625,
      "learning_rate": 2.7673413968654516e-05,
      "loss": 0.949,
      "step": 429600
    },
    {
      "epoch": 1.505675943741742,
      "grad_norm": 3.53125,
      "learning_rate": 2.7672764939990807e-05,
      "loss": 0.8286,
      "step": 429610
    },
    {
      "epoch": 1.5057109912486375,
      "grad_norm": 2.578125,
      "learning_rate": 2.7672115911327105e-05,
      "loss": 0.8179,
      "step": 429620
    },
    {
      "epoch": 1.5057460387555333,
      "grad_norm": 2.875,
      "learning_rate": 2.7671466882663406e-05,
      "loss": 0.8381,
      "step": 429630
    },
    {
      "epoch": 1.5057810862624286,
      "grad_norm": 2.828125,
      "learning_rate": 2.7670817853999704e-05,
      "loss": 0.9621,
      "step": 429640
    },
    {
      "epoch": 1.5058161337693243,
      "grad_norm": 2.875,
      "learning_rate": 2.7670168825336002e-05,
      "loss": 0.8289,
      "step": 429650
    },
    {
      "epoch": 1.5058511812762199,
      "grad_norm": 3.15625,
      "learning_rate": 2.76695197966723e-05,
      "loss": 0.83,
      "step": 429660
    },
    {
      "epoch": 1.5058862287831154,
      "grad_norm": 3.0,
      "learning_rate": 2.76688707680086e-05,
      "loss": 0.897,
      "step": 429670
    },
    {
      "epoch": 1.5059212762900112,
      "grad_norm": 2.890625,
      "learning_rate": 2.7668221739344896e-05,
      "loss": 0.8477,
      "step": 429680
    },
    {
      "epoch": 1.5059563237969067,
      "grad_norm": 3.171875,
      "learning_rate": 2.7667572710681194e-05,
      "loss": 0.883,
      "step": 429690
    },
    {
      "epoch": 1.5059913713038022,
      "grad_norm": 2.890625,
      "learning_rate": 2.7666923682017492e-05,
      "loss": 0.8506,
      "step": 429700
    },
    {
      "epoch": 1.506026418810698,
      "grad_norm": 2.875,
      "learning_rate": 2.766627465335379e-05,
      "loss": 0.768,
      "step": 429710
    },
    {
      "epoch": 1.5060614663175935,
      "grad_norm": 3.078125,
      "learning_rate": 2.766562562469009e-05,
      "loss": 0.8225,
      "step": 429720
    },
    {
      "epoch": 1.506096513824489,
      "grad_norm": 2.890625,
      "learning_rate": 2.7664976596026386e-05,
      "loss": 0.8761,
      "step": 429730
    },
    {
      "epoch": 1.5061315613313848,
      "grad_norm": 3.25,
      "learning_rate": 2.7664327567362684e-05,
      "loss": 0.9669,
      "step": 429740
    },
    {
      "epoch": 1.5061666088382801,
      "grad_norm": 2.75,
      "learning_rate": 2.7663678538698982e-05,
      "loss": 0.8571,
      "step": 429750
    },
    {
      "epoch": 1.5062016563451759,
      "grad_norm": 3.03125,
      "learning_rate": 2.7663029510035284e-05,
      "loss": 0.8351,
      "step": 429760
    },
    {
      "epoch": 1.5062367038520716,
      "grad_norm": 2.984375,
      "learning_rate": 2.7662380481371582e-05,
      "loss": 0.8591,
      "step": 429770
    },
    {
      "epoch": 1.506271751358967,
      "grad_norm": 2.484375,
      "learning_rate": 2.766173145270788e-05,
      "loss": 0.7946,
      "step": 429780
    },
    {
      "epoch": 1.5063067988658627,
      "grad_norm": 3.171875,
      "learning_rate": 2.7661082424044178e-05,
      "loss": 0.8507,
      "step": 429790
    },
    {
      "epoch": 1.5063418463727583,
      "grad_norm": 2.875,
      "learning_rate": 2.7660433395380476e-05,
      "loss": 0.6997,
      "step": 429800
    },
    {
      "epoch": 1.5063768938796538,
      "grad_norm": 2.78125,
      "learning_rate": 2.7659784366716774e-05,
      "loss": 0.8142,
      "step": 429810
    },
    {
      "epoch": 1.5064119413865495,
      "grad_norm": 2.875,
      "learning_rate": 2.7659135338053072e-05,
      "loss": 0.8147,
      "step": 429820
    },
    {
      "epoch": 1.506446988893445,
      "grad_norm": 2.6875,
      "learning_rate": 2.765848630938937e-05,
      "loss": 0.8727,
      "step": 429830
    },
    {
      "epoch": 1.5064820364003406,
      "grad_norm": 3.015625,
      "learning_rate": 2.7657837280725668e-05,
      "loss": 0.8204,
      "step": 429840
    },
    {
      "epoch": 1.5065170839072364,
      "grad_norm": 2.796875,
      "learning_rate": 2.7657188252061966e-05,
      "loss": 0.817,
      "step": 429850
    },
    {
      "epoch": 1.5065521314141317,
      "grad_norm": 3.109375,
      "learning_rate": 2.7656539223398264e-05,
      "loss": 0.9268,
      "step": 429860
    },
    {
      "epoch": 1.5065871789210274,
      "grad_norm": 3.0625,
      "learning_rate": 2.7655890194734562e-05,
      "loss": 0.8928,
      "step": 429870
    },
    {
      "epoch": 1.5066222264279232,
      "grad_norm": 2.75,
      "learning_rate": 2.765524116607086e-05,
      "loss": 0.8036,
      "step": 429880
    },
    {
      "epoch": 1.5066572739348185,
      "grad_norm": 2.734375,
      "learning_rate": 2.7654592137407158e-05,
      "loss": 0.7809,
      "step": 429890
    },
    {
      "epoch": 1.5066923214417143,
      "grad_norm": 2.53125,
      "learning_rate": 2.765394310874346e-05,
      "loss": 0.8205,
      "step": 429900
    },
    {
      "epoch": 1.5067273689486098,
      "grad_norm": 3.140625,
      "learning_rate": 2.7653294080079757e-05,
      "loss": 0.8746,
      "step": 429910
    },
    {
      "epoch": 1.5067624164555053,
      "grad_norm": 3.1875,
      "learning_rate": 2.7652645051416055e-05,
      "loss": 0.8429,
      "step": 429920
    },
    {
      "epoch": 1.506797463962401,
      "grad_norm": 2.765625,
      "learning_rate": 2.7651996022752353e-05,
      "loss": 0.864,
      "step": 429930
    },
    {
      "epoch": 1.5068325114692966,
      "grad_norm": 3.21875,
      "learning_rate": 2.765134699408865e-05,
      "loss": 0.9092,
      "step": 429940
    },
    {
      "epoch": 1.5068675589761922,
      "grad_norm": 2.734375,
      "learning_rate": 2.765069796542495e-05,
      "loss": 0.7875,
      "step": 429950
    },
    {
      "epoch": 1.506902606483088,
      "grad_norm": 2.59375,
      "learning_rate": 2.7650048936761247e-05,
      "loss": 0.8437,
      "step": 429960
    },
    {
      "epoch": 1.5069376539899835,
      "grad_norm": 3.09375,
      "learning_rate": 2.7649399908097545e-05,
      "loss": 0.985,
      "step": 429970
    },
    {
      "epoch": 1.506972701496879,
      "grad_norm": 2.984375,
      "learning_rate": 2.7648750879433836e-05,
      "loss": 0.8289,
      "step": 429980
    },
    {
      "epoch": 1.5070077490037748,
      "grad_norm": 3.171875,
      "learning_rate": 2.7648101850770138e-05,
      "loss": 0.8644,
      "step": 429990
    },
    {
      "epoch": 1.50704279651067,
      "grad_norm": 2.953125,
      "learning_rate": 2.7647452822106436e-05,
      "loss": 0.9384,
      "step": 430000
    },
    {
      "epoch": 1.50704279651067,
      "eval_loss": 0.7893469333648682,
      "eval_runtime": 554.0296,
      "eval_samples_per_second": 686.671,
      "eval_steps_per_second": 57.223,
      "step": 430000
    },
    {
      "epoch": 1.5070778440175658,
      "grad_norm": 2.78125,
      "learning_rate": 2.7646803793442734e-05,
      "loss": 0.8395,
      "step": 430010
    },
    {
      "epoch": 1.5071128915244614,
      "grad_norm": 3.1875,
      "learning_rate": 2.7646154764779032e-05,
      "loss": 0.8729,
      "step": 430020
    },
    {
      "epoch": 1.507147939031357,
      "grad_norm": 2.46875,
      "learning_rate": 2.764550573611533e-05,
      "loss": 0.8648,
      "step": 430030
    },
    {
      "epoch": 1.5071829865382527,
      "grad_norm": 2.90625,
      "learning_rate": 2.7644856707451628e-05,
      "loss": 0.765,
      "step": 430040
    },
    {
      "epoch": 1.5072180340451482,
      "grad_norm": 3.453125,
      "learning_rate": 2.7644207678787926e-05,
      "loss": 0.8891,
      "step": 430050
    },
    {
      "epoch": 1.5072530815520437,
      "grad_norm": 3.09375,
      "learning_rate": 2.7643558650124224e-05,
      "loss": 0.8588,
      "step": 430060
    },
    {
      "epoch": 1.5072881290589395,
      "grad_norm": 2.984375,
      "learning_rate": 2.7642909621460522e-05,
      "loss": 0.757,
      "step": 430070
    },
    {
      "epoch": 1.507323176565835,
      "grad_norm": 2.328125,
      "learning_rate": 2.764226059279682e-05,
      "loss": 0.8016,
      "step": 430080
    },
    {
      "epoch": 1.5073582240727306,
      "grad_norm": 2.46875,
      "learning_rate": 2.7641611564133118e-05,
      "loss": 0.8365,
      "step": 430090
    },
    {
      "epoch": 1.5073932715796263,
      "grad_norm": 2.796875,
      "learning_rate": 2.7640962535469416e-05,
      "loss": 0.7449,
      "step": 430100
    },
    {
      "epoch": 1.5074283190865216,
      "grad_norm": 2.9375,
      "learning_rate": 2.7640313506805714e-05,
      "loss": 0.778,
      "step": 430110
    },
    {
      "epoch": 1.5074633665934174,
      "grad_norm": 2.84375,
      "learning_rate": 2.7639664478142012e-05,
      "loss": 0.8717,
      "step": 430120
    },
    {
      "epoch": 1.507498414100313,
      "grad_norm": 2.875,
      "learning_rate": 2.7639015449478313e-05,
      "loss": 0.7943,
      "step": 430130
    },
    {
      "epoch": 1.5075334616072085,
      "grad_norm": 2.921875,
      "learning_rate": 2.763836642081461e-05,
      "loss": 0.8822,
      "step": 430140
    },
    {
      "epoch": 1.5075685091141042,
      "grad_norm": 2.515625,
      "learning_rate": 2.763771739215091e-05,
      "loss": 0.836,
      "step": 430150
    },
    {
      "epoch": 1.5076035566209998,
      "grad_norm": 3.46875,
      "learning_rate": 2.7637068363487207e-05,
      "loss": 0.903,
      "step": 430160
    },
    {
      "epoch": 1.5076386041278953,
      "grad_norm": 2.5625,
      "learning_rate": 2.7636419334823505e-05,
      "loss": 0.7853,
      "step": 430170
    },
    {
      "epoch": 1.507673651634791,
      "grad_norm": 2.78125,
      "learning_rate": 2.7635770306159803e-05,
      "loss": 0.8469,
      "step": 430180
    },
    {
      "epoch": 1.5077086991416866,
      "grad_norm": 3.609375,
      "learning_rate": 2.76351212774961e-05,
      "loss": 0.8872,
      "step": 430190
    },
    {
      "epoch": 1.5077437466485821,
      "grad_norm": 2.90625,
      "learning_rate": 2.76344722488324e-05,
      "loss": 0.9375,
      "step": 430200
    },
    {
      "epoch": 1.5077787941554779,
      "grad_norm": 3.0625,
      "learning_rate": 2.7633823220168697e-05,
      "loss": 0.8517,
      "step": 430210
    },
    {
      "epoch": 1.5078138416623732,
      "grad_norm": 2.859375,
      "learning_rate": 2.7633174191504995e-05,
      "loss": 0.8385,
      "step": 430220
    },
    {
      "epoch": 1.507848889169269,
      "grad_norm": 3.265625,
      "learning_rate": 2.7632525162841293e-05,
      "loss": 0.9726,
      "step": 430230
    },
    {
      "epoch": 1.5078839366761645,
      "grad_norm": 2.328125,
      "learning_rate": 2.763187613417759e-05,
      "loss": 0.8872,
      "step": 430240
    },
    {
      "epoch": 1.50791898418306,
      "grad_norm": 2.5625,
      "learning_rate": 2.763122710551389e-05,
      "loss": 0.8754,
      "step": 430250
    },
    {
      "epoch": 1.5079540316899558,
      "grad_norm": 3.1875,
      "learning_rate": 2.7630578076850187e-05,
      "loss": 0.8325,
      "step": 430260
    },
    {
      "epoch": 1.5079890791968513,
      "grad_norm": 2.734375,
      "learning_rate": 2.762992904818649e-05,
      "loss": 0.7889,
      "step": 430270
    },
    {
      "epoch": 1.5080241267037469,
      "grad_norm": 2.734375,
      "learning_rate": 2.7629280019522787e-05,
      "loss": 0.8973,
      "step": 430280
    },
    {
      "epoch": 1.5080591742106426,
      "grad_norm": 3.296875,
      "learning_rate": 2.7628630990859085e-05,
      "loss": 0.8427,
      "step": 430290
    },
    {
      "epoch": 1.5080942217175382,
      "grad_norm": 2.734375,
      "learning_rate": 2.7627981962195383e-05,
      "loss": 0.81,
      "step": 430300
    },
    {
      "epoch": 1.5081292692244337,
      "grad_norm": 3.140625,
      "learning_rate": 2.762733293353168e-05,
      "loss": 0.8308,
      "step": 430310
    },
    {
      "epoch": 1.5081643167313294,
      "grad_norm": 2.875,
      "learning_rate": 2.762668390486798e-05,
      "loss": 0.9326,
      "step": 430320
    },
    {
      "epoch": 1.5081993642382248,
      "grad_norm": 3.03125,
      "learning_rate": 2.7626034876204277e-05,
      "loss": 0.8075,
      "step": 430330
    },
    {
      "epoch": 1.5082344117451205,
      "grad_norm": 2.828125,
      "learning_rate": 2.7625385847540575e-05,
      "loss": 0.8786,
      "step": 430340
    },
    {
      "epoch": 1.508269459252016,
      "grad_norm": 2.78125,
      "learning_rate": 2.7624736818876873e-05,
      "loss": 0.8667,
      "step": 430350
    },
    {
      "epoch": 1.5083045067589116,
      "grad_norm": 3.28125,
      "learning_rate": 2.7624087790213167e-05,
      "loss": 0.8824,
      "step": 430360
    },
    {
      "epoch": 1.5083395542658073,
      "grad_norm": 2.5,
      "learning_rate": 2.7623438761549465e-05,
      "loss": 0.796,
      "step": 430370
    },
    {
      "epoch": 1.5083746017727029,
      "grad_norm": 2.625,
      "learning_rate": 2.7622789732885763e-05,
      "loss": 0.8046,
      "step": 430380
    },
    {
      "epoch": 1.5084096492795984,
      "grad_norm": 2.390625,
      "learning_rate": 2.762214070422206e-05,
      "loss": 0.7977,
      "step": 430390
    },
    {
      "epoch": 1.5084446967864942,
      "grad_norm": 2.765625,
      "learning_rate": 2.762149167555836e-05,
      "loss": 0.8657,
      "step": 430400
    },
    {
      "epoch": 1.5084797442933897,
      "grad_norm": 3.015625,
      "learning_rate": 2.7620842646894657e-05,
      "loss": 0.8211,
      "step": 430410
    },
    {
      "epoch": 1.5085147918002852,
      "grad_norm": 3.125,
      "learning_rate": 2.7620193618230955e-05,
      "loss": 0.8963,
      "step": 430420
    },
    {
      "epoch": 1.508549839307181,
      "grad_norm": 3.03125,
      "learning_rate": 2.7619544589567253e-05,
      "loss": 0.9098,
      "step": 430430
    },
    {
      "epoch": 1.5085848868140763,
      "grad_norm": 2.984375,
      "learning_rate": 2.761889556090355e-05,
      "loss": 0.8706,
      "step": 430440
    },
    {
      "epoch": 1.508619934320972,
      "grad_norm": 2.71875,
      "learning_rate": 2.761824653223985e-05,
      "loss": 0.8736,
      "step": 430450
    },
    {
      "epoch": 1.5086549818278678,
      "grad_norm": 3.40625,
      "learning_rate": 2.7617597503576147e-05,
      "loss": 0.8354,
      "step": 430460
    },
    {
      "epoch": 1.5086900293347631,
      "grad_norm": 2.765625,
      "learning_rate": 2.7616948474912445e-05,
      "loss": 0.9425,
      "step": 430470
    },
    {
      "epoch": 1.508725076841659,
      "grad_norm": 2.984375,
      "learning_rate": 2.7616299446248743e-05,
      "loss": 0.8324,
      "step": 430480
    },
    {
      "epoch": 1.5087601243485544,
      "grad_norm": 2.65625,
      "learning_rate": 2.761565041758504e-05,
      "loss": 0.8896,
      "step": 430490
    },
    {
      "epoch": 1.50879517185545,
      "grad_norm": 3.171875,
      "learning_rate": 2.7615001388921343e-05,
      "loss": 0.8478,
      "step": 430500
    },
    {
      "epoch": 1.5088302193623457,
      "grad_norm": 2.6875,
      "learning_rate": 2.761435236025764e-05,
      "loss": 0.8658,
      "step": 430510
    },
    {
      "epoch": 1.5088652668692413,
      "grad_norm": 2.640625,
      "learning_rate": 2.761370333159394e-05,
      "loss": 0.8013,
      "step": 430520
    },
    {
      "epoch": 1.5089003143761368,
      "grad_norm": 3.125,
      "learning_rate": 2.7613054302930237e-05,
      "loss": 0.7774,
      "step": 430530
    },
    {
      "epoch": 1.5089353618830326,
      "grad_norm": 2.734375,
      "learning_rate": 2.7612405274266535e-05,
      "loss": 0.879,
      "step": 430540
    },
    {
      "epoch": 1.508970409389928,
      "grad_norm": 3.140625,
      "learning_rate": 2.7611756245602833e-05,
      "loss": 0.845,
      "step": 430550
    },
    {
      "epoch": 1.5090054568968236,
      "grad_norm": 3.3125,
      "learning_rate": 2.761110721693913e-05,
      "loss": 0.8636,
      "step": 430560
    },
    {
      "epoch": 1.5090405044037194,
      "grad_norm": 2.921875,
      "learning_rate": 2.761045818827543e-05,
      "loss": 0.8516,
      "step": 430570
    },
    {
      "epoch": 1.5090755519106147,
      "grad_norm": 2.953125,
      "learning_rate": 2.7609809159611727e-05,
      "loss": 0.8899,
      "step": 430580
    },
    {
      "epoch": 1.5091105994175105,
      "grad_norm": 3.15625,
      "learning_rate": 2.7609160130948025e-05,
      "loss": 0.8148,
      "step": 430590
    },
    {
      "epoch": 1.509145646924406,
      "grad_norm": 2.9375,
      "learning_rate": 2.7608511102284323e-05,
      "loss": 0.8102,
      "step": 430600
    },
    {
      "epoch": 1.5091806944313015,
      "grad_norm": 2.765625,
      "learning_rate": 2.760786207362062e-05,
      "loss": 0.7969,
      "step": 430610
    },
    {
      "epoch": 1.5092157419381973,
      "grad_norm": 2.90625,
      "learning_rate": 2.760721304495692e-05,
      "loss": 0.8152,
      "step": 430620
    },
    {
      "epoch": 1.5092507894450928,
      "grad_norm": 3.03125,
      "learning_rate": 2.7606564016293217e-05,
      "loss": 0.9029,
      "step": 430630
    },
    {
      "epoch": 1.5092858369519884,
      "grad_norm": 3.015625,
      "learning_rate": 2.7605914987629518e-05,
      "loss": 0.8175,
      "step": 430640
    },
    {
      "epoch": 1.5093208844588841,
      "grad_norm": 3.09375,
      "learning_rate": 2.7605265958965816e-05,
      "loss": 0.7888,
      "step": 430650
    },
    {
      "epoch": 1.5093559319657797,
      "grad_norm": 2.5625,
      "learning_rate": 2.7604616930302114e-05,
      "loss": 0.8951,
      "step": 430660
    },
    {
      "epoch": 1.5093909794726752,
      "grad_norm": 2.953125,
      "learning_rate": 2.7603967901638412e-05,
      "loss": 0.8964,
      "step": 430670
    },
    {
      "epoch": 1.509426026979571,
      "grad_norm": 2.21875,
      "learning_rate": 2.760331887297471e-05,
      "loss": 0.8537,
      "step": 430680
    },
    {
      "epoch": 1.5094610744864663,
      "grad_norm": 3.1875,
      "learning_rate": 2.7602669844311008e-05,
      "loss": 0.8393,
      "step": 430690
    },
    {
      "epoch": 1.509496121993362,
      "grad_norm": 2.96875,
      "learning_rate": 2.7602020815647306e-05,
      "loss": 0.8751,
      "step": 430700
    },
    {
      "epoch": 1.5095311695002576,
      "grad_norm": 2.890625,
      "learning_rate": 2.7601371786983604e-05,
      "loss": 0.842,
      "step": 430710
    },
    {
      "epoch": 1.509566217007153,
      "grad_norm": 2.875,
      "learning_rate": 2.7600722758319902e-05,
      "loss": 0.8745,
      "step": 430720
    },
    {
      "epoch": 1.5096012645140489,
      "grad_norm": 2.765625,
      "learning_rate": 2.7600073729656197e-05,
      "loss": 0.7467,
      "step": 430730
    },
    {
      "epoch": 1.5096363120209444,
      "grad_norm": 2.578125,
      "learning_rate": 2.7599424700992495e-05,
      "loss": 0.8749,
      "step": 430740
    },
    {
      "epoch": 1.50967135952784,
      "grad_norm": 2.6875,
      "learning_rate": 2.7598775672328793e-05,
      "loss": 0.8708,
      "step": 430750
    },
    {
      "epoch": 1.5097064070347357,
      "grad_norm": 2.640625,
      "learning_rate": 2.759812664366509e-05,
      "loss": 0.8081,
      "step": 430760
    },
    {
      "epoch": 1.5097414545416312,
      "grad_norm": 2.625,
      "learning_rate": 2.759747761500139e-05,
      "loss": 0.8215,
      "step": 430770
    },
    {
      "epoch": 1.5097765020485268,
      "grad_norm": 3.234375,
      "learning_rate": 2.7596828586337687e-05,
      "loss": 0.8706,
      "step": 430780
    },
    {
      "epoch": 1.5098115495554225,
      "grad_norm": 2.5,
      "learning_rate": 2.7596179557673985e-05,
      "loss": 0.8336,
      "step": 430790
    },
    {
      "epoch": 1.5098465970623178,
      "grad_norm": 2.796875,
      "learning_rate": 2.7595530529010283e-05,
      "loss": 0.9461,
      "step": 430800
    },
    {
      "epoch": 1.5098816445692136,
      "grad_norm": 2.765625,
      "learning_rate": 2.759488150034658e-05,
      "loss": 0.8642,
      "step": 430810
    },
    {
      "epoch": 1.5099166920761091,
      "grad_norm": 2.703125,
      "learning_rate": 2.759423247168288e-05,
      "loss": 0.8153,
      "step": 430820
    },
    {
      "epoch": 1.5099517395830047,
      "grad_norm": 2.640625,
      "learning_rate": 2.7593583443019177e-05,
      "loss": 0.9025,
      "step": 430830
    },
    {
      "epoch": 1.5099867870899004,
      "grad_norm": 3.203125,
      "learning_rate": 2.7592934414355475e-05,
      "loss": 0.8596,
      "step": 430840
    },
    {
      "epoch": 1.510021834596796,
      "grad_norm": 2.921875,
      "learning_rate": 2.7592285385691773e-05,
      "loss": 0.8147,
      "step": 430850
    },
    {
      "epoch": 1.5100568821036915,
      "grad_norm": 2.9375,
      "learning_rate": 2.7591636357028074e-05,
      "loss": 0.8366,
      "step": 430860
    },
    {
      "epoch": 1.5100919296105872,
      "grad_norm": 2.859375,
      "learning_rate": 2.7590987328364372e-05,
      "loss": 0.9897,
      "step": 430870
    },
    {
      "epoch": 1.5101269771174828,
      "grad_norm": 3.015625,
      "learning_rate": 2.759033829970067e-05,
      "loss": 0.7587,
      "step": 430880
    },
    {
      "epoch": 1.5101620246243783,
      "grad_norm": 2.953125,
      "learning_rate": 2.7589689271036968e-05,
      "loss": 0.8309,
      "step": 430890
    },
    {
      "epoch": 1.510197072131274,
      "grad_norm": 3.015625,
      "learning_rate": 2.7589040242373266e-05,
      "loss": 0.8265,
      "step": 430900
    },
    {
      "epoch": 1.5102321196381694,
      "grad_norm": 2.875,
      "learning_rate": 2.7588391213709564e-05,
      "loss": 0.8035,
      "step": 430910
    },
    {
      "epoch": 1.5102671671450651,
      "grad_norm": 2.875,
      "learning_rate": 2.7587742185045862e-05,
      "loss": 0.9073,
      "step": 430920
    },
    {
      "epoch": 1.5103022146519607,
      "grad_norm": 2.59375,
      "learning_rate": 2.758709315638216e-05,
      "loss": 0.8314,
      "step": 430930
    },
    {
      "epoch": 1.5103372621588562,
      "grad_norm": 3.234375,
      "learning_rate": 2.7586444127718458e-05,
      "loss": 0.872,
      "step": 430940
    },
    {
      "epoch": 1.510372309665752,
      "grad_norm": 2.9375,
      "learning_rate": 2.7585795099054756e-05,
      "loss": 0.8698,
      "step": 430950
    },
    {
      "epoch": 1.5104073571726475,
      "grad_norm": 2.703125,
      "learning_rate": 2.7585146070391054e-05,
      "loss": 0.9168,
      "step": 430960
    },
    {
      "epoch": 1.510442404679543,
      "grad_norm": 2.875,
      "learning_rate": 2.7584497041727352e-05,
      "loss": 0.8757,
      "step": 430970
    },
    {
      "epoch": 1.5104774521864388,
      "grad_norm": 2.734375,
      "learning_rate": 2.758384801306365e-05,
      "loss": 0.8792,
      "step": 430980
    },
    {
      "epoch": 1.5105124996933343,
      "grad_norm": 2.6875,
      "learning_rate": 2.7583198984399948e-05,
      "loss": 0.8639,
      "step": 430990
    },
    {
      "epoch": 1.5105475472002299,
      "grad_norm": 2.671875,
      "learning_rate": 2.758254995573625e-05,
      "loss": 0.8723,
      "step": 431000
    },
    {
      "epoch": 1.5105825947071256,
      "grad_norm": 3.5,
      "learning_rate": 2.7581900927072547e-05,
      "loss": 0.7712,
      "step": 431010
    },
    {
      "epoch": 1.510617642214021,
      "grad_norm": 2.578125,
      "learning_rate": 2.7581251898408845e-05,
      "loss": 0.8154,
      "step": 431020
    },
    {
      "epoch": 1.5106526897209167,
      "grad_norm": 2.9375,
      "learning_rate": 2.7580602869745143e-05,
      "loss": 0.7213,
      "step": 431030
    },
    {
      "epoch": 1.5106877372278125,
      "grad_norm": 3.3125,
      "learning_rate": 2.757995384108144e-05,
      "loss": 0.8704,
      "step": 431040
    },
    {
      "epoch": 1.5107227847347078,
      "grad_norm": 2.6875,
      "learning_rate": 2.757930481241774e-05,
      "loss": 0.8191,
      "step": 431050
    },
    {
      "epoch": 1.5107578322416035,
      "grad_norm": 2.75,
      "learning_rate": 2.7578655783754037e-05,
      "loss": 0.86,
      "step": 431060
    },
    {
      "epoch": 1.510792879748499,
      "grad_norm": 3.046875,
      "learning_rate": 2.7578006755090335e-05,
      "loss": 0.7848,
      "step": 431070
    },
    {
      "epoch": 1.5108279272553946,
      "grad_norm": 2.65625,
      "learning_rate": 2.7577357726426633e-05,
      "loss": 0.8239,
      "step": 431080
    },
    {
      "epoch": 1.5108629747622904,
      "grad_norm": 2.578125,
      "learning_rate": 2.757670869776293e-05,
      "loss": 0.833,
      "step": 431090
    },
    {
      "epoch": 1.510898022269186,
      "grad_norm": 3.421875,
      "learning_rate": 2.757605966909923e-05,
      "loss": 0.7848,
      "step": 431100
    },
    {
      "epoch": 1.5109330697760814,
      "grad_norm": 2.8125,
      "learning_rate": 2.7575410640435524e-05,
      "loss": 0.7728,
      "step": 431110
    },
    {
      "epoch": 1.5109681172829772,
      "grad_norm": 2.9375,
      "learning_rate": 2.7574761611771822e-05,
      "loss": 0.8563,
      "step": 431120
    },
    {
      "epoch": 1.5110031647898725,
      "grad_norm": 3.109375,
      "learning_rate": 2.757411258310812e-05,
      "loss": 0.8188,
      "step": 431130
    },
    {
      "epoch": 1.5110382122967683,
      "grad_norm": 2.875,
      "learning_rate": 2.7573463554444418e-05,
      "loss": 0.7946,
      "step": 431140
    },
    {
      "epoch": 1.511073259803664,
      "grad_norm": 3.109375,
      "learning_rate": 2.7572814525780716e-05,
      "loss": 0.8373,
      "step": 431150
    },
    {
      "epoch": 1.5111083073105593,
      "grad_norm": 3.03125,
      "learning_rate": 2.7572165497117014e-05,
      "loss": 0.8346,
      "step": 431160
    },
    {
      "epoch": 1.511143354817455,
      "grad_norm": 3.0625,
      "learning_rate": 2.7571516468453312e-05,
      "loss": 0.8763,
      "step": 431170
    },
    {
      "epoch": 1.5111784023243506,
      "grad_norm": 3.140625,
      "learning_rate": 2.757086743978961e-05,
      "loss": 0.8194,
      "step": 431180
    },
    {
      "epoch": 1.5112134498312462,
      "grad_norm": 2.953125,
      "learning_rate": 2.7570218411125908e-05,
      "loss": 0.8026,
      "step": 431190
    },
    {
      "epoch": 1.511248497338142,
      "grad_norm": 2.546875,
      "learning_rate": 2.7569569382462206e-05,
      "loss": 0.7465,
      "step": 431200
    },
    {
      "epoch": 1.5112835448450375,
      "grad_norm": 3.28125,
      "learning_rate": 2.7568920353798504e-05,
      "loss": 0.8371,
      "step": 431210
    },
    {
      "epoch": 1.511318592351933,
      "grad_norm": 3.140625,
      "learning_rate": 2.7568271325134802e-05,
      "loss": 0.812,
      "step": 431220
    },
    {
      "epoch": 1.5113536398588288,
      "grad_norm": 3.09375,
      "learning_rate": 2.7567622296471103e-05,
      "loss": 0.8607,
      "step": 431230
    },
    {
      "epoch": 1.5113886873657243,
      "grad_norm": 3.140625,
      "learning_rate": 2.75669732678074e-05,
      "loss": 0.8681,
      "step": 431240
    },
    {
      "epoch": 1.5114237348726198,
      "grad_norm": 2.96875,
      "learning_rate": 2.75663242391437e-05,
      "loss": 0.8791,
      "step": 431250
    },
    {
      "epoch": 1.5114587823795156,
      "grad_norm": 2.609375,
      "learning_rate": 2.7565675210479997e-05,
      "loss": 0.8581,
      "step": 431260
    },
    {
      "epoch": 1.511493829886411,
      "grad_norm": 2.9375,
      "learning_rate": 2.7565026181816295e-05,
      "loss": 0.8721,
      "step": 431270
    },
    {
      "epoch": 1.5115288773933067,
      "grad_norm": 2.921875,
      "learning_rate": 2.7564377153152593e-05,
      "loss": 0.8121,
      "step": 431280
    },
    {
      "epoch": 1.5115639249002022,
      "grad_norm": 2.859375,
      "learning_rate": 2.756372812448889e-05,
      "loss": 0.8288,
      "step": 431290
    },
    {
      "epoch": 1.5115989724070977,
      "grad_norm": 2.71875,
      "learning_rate": 2.756307909582519e-05,
      "loss": 0.8182,
      "step": 431300
    },
    {
      "epoch": 1.5116340199139935,
      "grad_norm": 2.96875,
      "learning_rate": 2.7562430067161487e-05,
      "loss": 0.8357,
      "step": 431310
    },
    {
      "epoch": 1.511669067420889,
      "grad_norm": 3.15625,
      "learning_rate": 2.7561781038497785e-05,
      "loss": 0.7968,
      "step": 431320
    },
    {
      "epoch": 1.5117041149277846,
      "grad_norm": 3.046875,
      "learning_rate": 2.7561132009834083e-05,
      "loss": 0.8742,
      "step": 431330
    },
    {
      "epoch": 1.5117391624346803,
      "grad_norm": 3.03125,
      "learning_rate": 2.756048298117038e-05,
      "loss": 0.767,
      "step": 431340
    },
    {
      "epoch": 1.5117742099415759,
      "grad_norm": 2.765625,
      "learning_rate": 2.755983395250668e-05,
      "loss": 0.7231,
      "step": 431350
    },
    {
      "epoch": 1.5118092574484714,
      "grad_norm": 2.5625,
      "learning_rate": 2.7559184923842977e-05,
      "loss": 0.792,
      "step": 431360
    },
    {
      "epoch": 1.5118443049553671,
      "grad_norm": 2.75,
      "learning_rate": 2.755853589517928e-05,
      "loss": 0.8349,
      "step": 431370
    },
    {
      "epoch": 1.5118793524622625,
      "grad_norm": 3.09375,
      "learning_rate": 2.7557886866515577e-05,
      "loss": 0.9149,
      "step": 431380
    },
    {
      "epoch": 1.5119143999691582,
      "grad_norm": 3.1875,
      "learning_rate": 2.7557237837851875e-05,
      "loss": 0.8678,
      "step": 431390
    },
    {
      "epoch": 1.5119494474760538,
      "grad_norm": 2.8125,
      "learning_rate": 2.7556588809188173e-05,
      "loss": 0.83,
      "step": 431400
    },
    {
      "epoch": 1.5119844949829493,
      "grad_norm": 2.71875,
      "learning_rate": 2.755593978052447e-05,
      "loss": 0.8684,
      "step": 431410
    },
    {
      "epoch": 1.512019542489845,
      "grad_norm": 3.03125,
      "learning_rate": 2.755529075186077e-05,
      "loss": 0.8821,
      "step": 431420
    },
    {
      "epoch": 1.5120545899967406,
      "grad_norm": 2.953125,
      "learning_rate": 2.7554641723197067e-05,
      "loss": 0.8564,
      "step": 431430
    },
    {
      "epoch": 1.5120896375036361,
      "grad_norm": 2.796875,
      "learning_rate": 2.7553992694533365e-05,
      "loss": 0.8704,
      "step": 431440
    },
    {
      "epoch": 1.5121246850105319,
      "grad_norm": 3.1875,
      "learning_rate": 2.7553343665869663e-05,
      "loss": 0.8474,
      "step": 431450
    },
    {
      "epoch": 1.5121597325174274,
      "grad_norm": 2.828125,
      "learning_rate": 2.755269463720596e-05,
      "loss": 0.8037,
      "step": 431460
    },
    {
      "epoch": 1.512194780024323,
      "grad_norm": 2.734375,
      "learning_rate": 2.755204560854226e-05,
      "loss": 0.8636,
      "step": 431470
    },
    {
      "epoch": 1.5122298275312187,
      "grad_norm": 2.90625,
      "learning_rate": 2.7551396579878557e-05,
      "loss": 0.9371,
      "step": 431480
    },
    {
      "epoch": 1.512264875038114,
      "grad_norm": 2.8125,
      "learning_rate": 2.755074755121485e-05,
      "loss": 0.8834,
      "step": 431490
    },
    {
      "epoch": 1.5122999225450098,
      "grad_norm": 2.640625,
      "learning_rate": 2.755009852255115e-05,
      "loss": 0.8375,
      "step": 431500
    },
    {
      "epoch": 1.5123349700519053,
      "grad_norm": 3.375,
      "learning_rate": 2.7549449493887447e-05,
      "loss": 0.8667,
      "step": 431510
    },
    {
      "epoch": 1.5123700175588008,
      "grad_norm": 2.78125,
      "learning_rate": 2.7548800465223745e-05,
      "loss": 0.8136,
      "step": 431520
    },
    {
      "epoch": 1.5124050650656966,
      "grad_norm": 2.859375,
      "learning_rate": 2.7548151436560043e-05,
      "loss": 0.8295,
      "step": 431530
    },
    {
      "epoch": 1.5124401125725921,
      "grad_norm": 3.265625,
      "learning_rate": 2.754750240789634e-05,
      "loss": 0.7775,
      "step": 431540
    },
    {
      "epoch": 1.5124751600794877,
      "grad_norm": 3.09375,
      "learning_rate": 2.754685337923264e-05,
      "loss": 0.7982,
      "step": 431550
    },
    {
      "epoch": 1.5125102075863834,
      "grad_norm": 2.84375,
      "learning_rate": 2.7546204350568937e-05,
      "loss": 0.8245,
      "step": 431560
    },
    {
      "epoch": 1.512545255093279,
      "grad_norm": 2.859375,
      "learning_rate": 2.7545555321905235e-05,
      "loss": 0.8376,
      "step": 431570
    },
    {
      "epoch": 1.5125803026001745,
      "grad_norm": 2.75,
      "learning_rate": 2.7544906293241533e-05,
      "loss": 0.9561,
      "step": 431580
    },
    {
      "epoch": 1.5126153501070703,
      "grad_norm": 2.4375,
      "learning_rate": 2.754425726457783e-05,
      "loss": 0.8129,
      "step": 431590
    },
    {
      "epoch": 1.5126503976139656,
      "grad_norm": 2.921875,
      "learning_rate": 2.7543608235914133e-05,
      "loss": 0.8748,
      "step": 431600
    },
    {
      "epoch": 1.5126854451208613,
      "grad_norm": 2.875,
      "learning_rate": 2.754295920725043e-05,
      "loss": 0.7383,
      "step": 431610
    },
    {
      "epoch": 1.5127204926277569,
      "grad_norm": 2.921875,
      "learning_rate": 2.754231017858673e-05,
      "loss": 0.7674,
      "step": 431620
    },
    {
      "epoch": 1.5127555401346524,
      "grad_norm": 3.125,
      "learning_rate": 2.7541661149923027e-05,
      "loss": 0.8445,
      "step": 431630
    },
    {
      "epoch": 1.5127905876415482,
      "grad_norm": 2.8125,
      "learning_rate": 2.7541012121259325e-05,
      "loss": 0.8952,
      "step": 431640
    },
    {
      "epoch": 1.5128256351484437,
      "grad_norm": 2.84375,
      "learning_rate": 2.7540363092595623e-05,
      "loss": 0.8141,
      "step": 431650
    },
    {
      "epoch": 1.5128606826553392,
      "grad_norm": 2.96875,
      "learning_rate": 2.753971406393192e-05,
      "loss": 0.8579,
      "step": 431660
    },
    {
      "epoch": 1.512895730162235,
      "grad_norm": 2.875,
      "learning_rate": 2.753906503526822e-05,
      "loss": 0.8353,
      "step": 431670
    },
    {
      "epoch": 1.5129307776691305,
      "grad_norm": 2.78125,
      "learning_rate": 2.7538416006604517e-05,
      "loss": 0.8595,
      "step": 431680
    },
    {
      "epoch": 1.512965825176026,
      "grad_norm": 2.890625,
      "learning_rate": 2.7537766977940815e-05,
      "loss": 0.8595,
      "step": 431690
    },
    {
      "epoch": 1.5130008726829218,
      "grad_norm": 2.6875,
      "learning_rate": 2.7537117949277113e-05,
      "loss": 0.8732,
      "step": 431700
    },
    {
      "epoch": 1.5130359201898171,
      "grad_norm": 2.375,
      "learning_rate": 2.753646892061341e-05,
      "loss": 0.7972,
      "step": 431710
    },
    {
      "epoch": 1.513070967696713,
      "grad_norm": 2.859375,
      "learning_rate": 2.753581989194971e-05,
      "loss": 0.8951,
      "step": 431720
    },
    {
      "epoch": 1.5131060152036087,
      "grad_norm": 3.109375,
      "learning_rate": 2.7535170863286007e-05,
      "loss": 0.9163,
      "step": 431730
    },
    {
      "epoch": 1.513141062710504,
      "grad_norm": 3.015625,
      "learning_rate": 2.7534521834622308e-05,
      "loss": 0.8783,
      "step": 431740
    },
    {
      "epoch": 1.5131761102173997,
      "grad_norm": 2.53125,
      "learning_rate": 2.7533872805958606e-05,
      "loss": 0.7937,
      "step": 431750
    },
    {
      "epoch": 1.5132111577242953,
      "grad_norm": 2.90625,
      "learning_rate": 2.7533223777294904e-05,
      "loss": 0.8605,
      "step": 431760
    },
    {
      "epoch": 1.5132462052311908,
      "grad_norm": 3.09375,
      "learning_rate": 2.7532574748631202e-05,
      "loss": 0.8896,
      "step": 431770
    },
    {
      "epoch": 1.5132812527380866,
      "grad_norm": 3.15625,
      "learning_rate": 2.75319257199675e-05,
      "loss": 0.9054,
      "step": 431780
    },
    {
      "epoch": 1.513316300244982,
      "grad_norm": 2.875,
      "learning_rate": 2.7531276691303798e-05,
      "loss": 0.8104,
      "step": 431790
    },
    {
      "epoch": 1.5133513477518776,
      "grad_norm": 2.71875,
      "learning_rate": 2.7530627662640096e-05,
      "loss": 0.7871,
      "step": 431800
    },
    {
      "epoch": 1.5133863952587734,
      "grad_norm": 2.65625,
      "learning_rate": 2.7529978633976394e-05,
      "loss": 0.817,
      "step": 431810
    },
    {
      "epoch": 1.5134214427656687,
      "grad_norm": 3.421875,
      "learning_rate": 2.7529329605312692e-05,
      "loss": 0.8739,
      "step": 431820
    },
    {
      "epoch": 1.5134564902725645,
      "grad_norm": 2.578125,
      "learning_rate": 2.752868057664899e-05,
      "loss": 0.8735,
      "step": 431830
    },
    {
      "epoch": 1.5134915377794602,
      "grad_norm": 2.84375,
      "learning_rate": 2.7528031547985288e-05,
      "loss": 0.8435,
      "step": 431840
    },
    {
      "epoch": 1.5135265852863555,
      "grad_norm": 3.015625,
      "learning_rate": 2.7527382519321586e-05,
      "loss": 0.7483,
      "step": 431850
    },
    {
      "epoch": 1.5135616327932513,
      "grad_norm": 2.96875,
      "learning_rate": 2.752673349065788e-05,
      "loss": 0.7577,
      "step": 431860
    },
    {
      "epoch": 1.5135966803001468,
      "grad_norm": 2.84375,
      "learning_rate": 2.752608446199418e-05,
      "loss": 0.7755,
      "step": 431870
    },
    {
      "epoch": 1.5136317278070424,
      "grad_norm": 2.765625,
      "learning_rate": 2.7525435433330477e-05,
      "loss": 0.8452,
      "step": 431880
    },
    {
      "epoch": 1.5136667753139381,
      "grad_norm": 2.125,
      "learning_rate": 2.7524786404666775e-05,
      "loss": 0.7999,
      "step": 431890
    },
    {
      "epoch": 1.5137018228208337,
      "grad_norm": 2.828125,
      "learning_rate": 2.7524137376003073e-05,
      "loss": 0.841,
      "step": 431900
    },
    {
      "epoch": 1.5137368703277292,
      "grad_norm": 2.8125,
      "learning_rate": 2.752348834733937e-05,
      "loss": 0.7464,
      "step": 431910
    },
    {
      "epoch": 1.513771917834625,
      "grad_norm": 2.609375,
      "learning_rate": 2.752283931867567e-05,
      "loss": 0.8514,
      "step": 431920
    },
    {
      "epoch": 1.5138069653415205,
      "grad_norm": 3.03125,
      "learning_rate": 2.7522190290011967e-05,
      "loss": 0.8682,
      "step": 431930
    },
    {
      "epoch": 1.513842012848416,
      "grad_norm": 2.796875,
      "learning_rate": 2.7521541261348265e-05,
      "loss": 0.8328,
      "step": 431940
    },
    {
      "epoch": 1.5138770603553118,
      "grad_norm": 3.328125,
      "learning_rate": 2.7520892232684563e-05,
      "loss": 0.8633,
      "step": 431950
    },
    {
      "epoch": 1.513912107862207,
      "grad_norm": 2.6875,
      "learning_rate": 2.7520243204020864e-05,
      "loss": 0.816,
      "step": 431960
    },
    {
      "epoch": 1.5139471553691028,
      "grad_norm": 3.0625,
      "learning_rate": 2.7519594175357162e-05,
      "loss": 0.9416,
      "step": 431970
    },
    {
      "epoch": 1.5139822028759984,
      "grad_norm": 3.0,
      "learning_rate": 2.751894514669346e-05,
      "loss": 0.8549,
      "step": 431980
    },
    {
      "epoch": 1.514017250382894,
      "grad_norm": 2.46875,
      "learning_rate": 2.7518296118029758e-05,
      "loss": 0.8063,
      "step": 431990
    },
    {
      "epoch": 1.5140522978897897,
      "grad_norm": 2.796875,
      "learning_rate": 2.7517647089366056e-05,
      "loss": 0.8191,
      "step": 432000
    },
    {
      "epoch": 1.5140873453966852,
      "grad_norm": 3.390625,
      "learning_rate": 2.7516998060702354e-05,
      "loss": 0.8867,
      "step": 432010
    },
    {
      "epoch": 1.5141223929035807,
      "grad_norm": 3.703125,
      "learning_rate": 2.7516349032038652e-05,
      "loss": 0.9411,
      "step": 432020
    },
    {
      "epoch": 1.5141574404104765,
      "grad_norm": 3.0625,
      "learning_rate": 2.751570000337495e-05,
      "loss": 0.8774,
      "step": 432030
    },
    {
      "epoch": 1.514192487917372,
      "grad_norm": 2.578125,
      "learning_rate": 2.7515050974711248e-05,
      "loss": 0.8136,
      "step": 432040
    },
    {
      "epoch": 1.5142275354242676,
      "grad_norm": 2.90625,
      "learning_rate": 2.7514401946047546e-05,
      "loss": 0.74,
      "step": 432050
    },
    {
      "epoch": 1.5142625829311633,
      "grad_norm": 2.859375,
      "learning_rate": 2.7513752917383844e-05,
      "loss": 0.8198,
      "step": 432060
    },
    {
      "epoch": 1.5142976304380587,
      "grad_norm": 3.125,
      "learning_rate": 2.7513103888720142e-05,
      "loss": 0.8126,
      "step": 432070
    },
    {
      "epoch": 1.5143326779449544,
      "grad_norm": 2.984375,
      "learning_rate": 2.751245486005644e-05,
      "loss": 0.8905,
      "step": 432080
    },
    {
      "epoch": 1.51436772545185,
      "grad_norm": 2.609375,
      "learning_rate": 2.7511805831392738e-05,
      "loss": 0.8118,
      "step": 432090
    },
    {
      "epoch": 1.5144027729587455,
      "grad_norm": 3.0625,
      "learning_rate": 2.751115680272904e-05,
      "loss": 0.8794,
      "step": 432100
    },
    {
      "epoch": 1.5144378204656412,
      "grad_norm": 2.765625,
      "learning_rate": 2.7510507774065337e-05,
      "loss": 0.8291,
      "step": 432110
    },
    {
      "epoch": 1.5144728679725368,
      "grad_norm": 2.484375,
      "learning_rate": 2.7509858745401635e-05,
      "loss": 0.7664,
      "step": 432120
    },
    {
      "epoch": 1.5145079154794323,
      "grad_norm": 2.71875,
      "learning_rate": 2.7509209716737933e-05,
      "loss": 0.8728,
      "step": 432130
    },
    {
      "epoch": 1.514542962986328,
      "grad_norm": 2.921875,
      "learning_rate": 2.750856068807423e-05,
      "loss": 0.7731,
      "step": 432140
    },
    {
      "epoch": 1.5145780104932236,
      "grad_norm": 3.265625,
      "learning_rate": 2.750791165941053e-05,
      "loss": 0.8188,
      "step": 432150
    },
    {
      "epoch": 1.5146130580001191,
      "grad_norm": 3.296875,
      "learning_rate": 2.7507262630746827e-05,
      "loss": 0.7816,
      "step": 432160
    },
    {
      "epoch": 1.514648105507015,
      "grad_norm": 2.734375,
      "learning_rate": 2.7506613602083125e-05,
      "loss": 0.7494,
      "step": 432170
    },
    {
      "epoch": 1.5146831530139102,
      "grad_norm": 2.9375,
      "learning_rate": 2.7505964573419423e-05,
      "loss": 0.855,
      "step": 432180
    },
    {
      "epoch": 1.514718200520806,
      "grad_norm": 2.4375,
      "learning_rate": 2.750531554475572e-05,
      "loss": 0.8886,
      "step": 432190
    },
    {
      "epoch": 1.5147532480277015,
      "grad_norm": 2.953125,
      "learning_rate": 2.750466651609202e-05,
      "loss": 0.8609,
      "step": 432200
    },
    {
      "epoch": 1.514788295534597,
      "grad_norm": 3.3125,
      "learning_rate": 2.7504017487428317e-05,
      "loss": 0.8305,
      "step": 432210
    },
    {
      "epoch": 1.5148233430414928,
      "grad_norm": 2.953125,
      "learning_rate": 2.7503368458764615e-05,
      "loss": 0.8518,
      "step": 432220
    },
    {
      "epoch": 1.5148583905483883,
      "grad_norm": 2.28125,
      "learning_rate": 2.7502719430100913e-05,
      "loss": 0.9349,
      "step": 432230
    },
    {
      "epoch": 1.5148934380552839,
      "grad_norm": 3.484375,
      "learning_rate": 2.7502070401437208e-05,
      "loss": 0.9184,
      "step": 432240
    },
    {
      "epoch": 1.5149284855621796,
      "grad_norm": 3.53125,
      "learning_rate": 2.7501421372773506e-05,
      "loss": 0.8157,
      "step": 432250
    },
    {
      "epoch": 1.5149635330690752,
      "grad_norm": 3.359375,
      "learning_rate": 2.7500772344109804e-05,
      "loss": 0.8816,
      "step": 432260
    },
    {
      "epoch": 1.5149985805759707,
      "grad_norm": 3.125,
      "learning_rate": 2.7500123315446102e-05,
      "loss": 0.8654,
      "step": 432270
    },
    {
      "epoch": 1.5150336280828665,
      "grad_norm": 3.15625,
      "learning_rate": 2.74994742867824e-05,
      "loss": 0.845,
      "step": 432280
    },
    {
      "epoch": 1.5150686755897618,
      "grad_norm": 2.859375,
      "learning_rate": 2.7498825258118698e-05,
      "loss": 0.889,
      "step": 432290
    },
    {
      "epoch": 1.5151037230966575,
      "grad_norm": 3.390625,
      "learning_rate": 2.7498176229454996e-05,
      "loss": 0.7736,
      "step": 432300
    },
    {
      "epoch": 1.515138770603553,
      "grad_norm": 3.15625,
      "learning_rate": 2.7497527200791294e-05,
      "loss": 0.8556,
      "step": 432310
    },
    {
      "epoch": 1.5151738181104486,
      "grad_norm": 3.109375,
      "learning_rate": 2.7496878172127592e-05,
      "loss": 0.8284,
      "step": 432320
    },
    {
      "epoch": 1.5152088656173444,
      "grad_norm": 3.40625,
      "learning_rate": 2.7496229143463893e-05,
      "loss": 0.8882,
      "step": 432330
    },
    {
      "epoch": 1.51524391312424,
      "grad_norm": 2.515625,
      "learning_rate": 2.749558011480019e-05,
      "loss": 0.859,
      "step": 432340
    },
    {
      "epoch": 1.5152789606311354,
      "grad_norm": 2.703125,
      "learning_rate": 2.749493108613649e-05,
      "loss": 0.7933,
      "step": 432350
    },
    {
      "epoch": 1.5153140081380312,
      "grad_norm": 2.125,
      "learning_rate": 2.7494282057472787e-05,
      "loss": 0.7778,
      "step": 432360
    },
    {
      "epoch": 1.5153490556449267,
      "grad_norm": 3.0625,
      "learning_rate": 2.7493633028809085e-05,
      "loss": 0.7877,
      "step": 432370
    },
    {
      "epoch": 1.5153841031518223,
      "grad_norm": 3.09375,
      "learning_rate": 2.7492984000145383e-05,
      "loss": 0.8416,
      "step": 432380
    },
    {
      "epoch": 1.515419150658718,
      "grad_norm": 2.625,
      "learning_rate": 2.749233497148168e-05,
      "loss": 0.7514,
      "step": 432390
    },
    {
      "epoch": 1.5154541981656133,
      "grad_norm": 2.734375,
      "learning_rate": 2.749168594281798e-05,
      "loss": 0.8951,
      "step": 432400
    },
    {
      "epoch": 1.515489245672509,
      "grad_norm": 3.34375,
      "learning_rate": 2.7491036914154277e-05,
      "loss": 0.9207,
      "step": 432410
    },
    {
      "epoch": 1.5155242931794048,
      "grad_norm": 2.921875,
      "learning_rate": 2.7490387885490575e-05,
      "loss": 0.8414,
      "step": 432420
    },
    {
      "epoch": 1.5155593406863002,
      "grad_norm": 2.90625,
      "learning_rate": 2.7489738856826873e-05,
      "loss": 0.8425,
      "step": 432430
    },
    {
      "epoch": 1.515594388193196,
      "grad_norm": 3.203125,
      "learning_rate": 2.748908982816317e-05,
      "loss": 0.8562,
      "step": 432440
    },
    {
      "epoch": 1.5156294357000915,
      "grad_norm": 3.078125,
      "learning_rate": 2.748844079949947e-05,
      "loss": 0.8045,
      "step": 432450
    },
    {
      "epoch": 1.515664483206987,
      "grad_norm": 3.015625,
      "learning_rate": 2.7487791770835767e-05,
      "loss": 0.8359,
      "step": 432460
    },
    {
      "epoch": 1.5156995307138827,
      "grad_norm": 3.0,
      "learning_rate": 2.748714274217207e-05,
      "loss": 0.8194,
      "step": 432470
    },
    {
      "epoch": 1.5157345782207783,
      "grad_norm": 2.953125,
      "learning_rate": 2.7486493713508367e-05,
      "loss": 0.9192,
      "step": 432480
    },
    {
      "epoch": 1.5157696257276738,
      "grad_norm": 2.796875,
      "learning_rate": 2.7485844684844665e-05,
      "loss": 0.8222,
      "step": 432490
    },
    {
      "epoch": 1.5158046732345696,
      "grad_norm": 3.609375,
      "learning_rate": 2.7485195656180963e-05,
      "loss": 0.8622,
      "step": 432500
    },
    {
      "epoch": 1.515839720741465,
      "grad_norm": 2.421875,
      "learning_rate": 2.748454662751726e-05,
      "loss": 0.8767,
      "step": 432510
    },
    {
      "epoch": 1.5158747682483606,
      "grad_norm": 2.875,
      "learning_rate": 2.748389759885356e-05,
      "loss": 0.9407,
      "step": 432520
    },
    {
      "epoch": 1.5159098157552564,
      "grad_norm": 3.296875,
      "learning_rate": 2.7483248570189857e-05,
      "loss": 0.8881,
      "step": 432530
    },
    {
      "epoch": 1.5159448632621517,
      "grad_norm": 2.546875,
      "learning_rate": 2.7482599541526155e-05,
      "loss": 0.7949,
      "step": 432540
    },
    {
      "epoch": 1.5159799107690475,
      "grad_norm": 2.859375,
      "learning_rate": 2.7481950512862453e-05,
      "loss": 0.834,
      "step": 432550
    },
    {
      "epoch": 1.516014958275943,
      "grad_norm": 2.9375,
      "learning_rate": 2.748130148419875e-05,
      "loss": 0.8316,
      "step": 432560
    },
    {
      "epoch": 1.5160500057828386,
      "grad_norm": 3.203125,
      "learning_rate": 2.748065245553505e-05,
      "loss": 0.8505,
      "step": 432570
    },
    {
      "epoch": 1.5160850532897343,
      "grad_norm": 3.0625,
      "learning_rate": 2.7480003426871347e-05,
      "loss": 0.8422,
      "step": 432580
    },
    {
      "epoch": 1.5161201007966298,
      "grad_norm": 3.328125,
      "learning_rate": 2.7479354398207645e-05,
      "loss": 0.8904,
      "step": 432590
    },
    {
      "epoch": 1.5161551483035254,
      "grad_norm": 2.703125,
      "learning_rate": 2.7478705369543943e-05,
      "loss": 0.9408,
      "step": 432600
    },
    {
      "epoch": 1.5161901958104211,
      "grad_norm": 3.0,
      "learning_rate": 2.7478056340880244e-05,
      "loss": 0.8513,
      "step": 432610
    },
    {
      "epoch": 1.5162252433173167,
      "grad_norm": 2.953125,
      "learning_rate": 2.7477407312216535e-05,
      "loss": 0.8107,
      "step": 432620
    },
    {
      "epoch": 1.5162602908242122,
      "grad_norm": 2.984375,
      "learning_rate": 2.7476758283552833e-05,
      "loss": 0.784,
      "step": 432630
    },
    {
      "epoch": 1.516295338331108,
      "grad_norm": 2.796875,
      "learning_rate": 2.747610925488913e-05,
      "loss": 0.8171,
      "step": 432640
    },
    {
      "epoch": 1.5163303858380033,
      "grad_norm": 3.390625,
      "learning_rate": 2.747546022622543e-05,
      "loss": 0.9043,
      "step": 432650
    },
    {
      "epoch": 1.516365433344899,
      "grad_norm": 2.890625,
      "learning_rate": 2.7474811197561727e-05,
      "loss": 0.8264,
      "step": 432660
    },
    {
      "epoch": 1.5164004808517946,
      "grad_norm": 2.828125,
      "learning_rate": 2.7474162168898025e-05,
      "loss": 0.9324,
      "step": 432670
    },
    {
      "epoch": 1.51643552835869,
      "grad_norm": 2.78125,
      "learning_rate": 2.7473513140234323e-05,
      "loss": 0.857,
      "step": 432680
    },
    {
      "epoch": 1.5164705758655859,
      "grad_norm": 2.8125,
      "learning_rate": 2.747286411157062e-05,
      "loss": 0.7922,
      "step": 432690
    },
    {
      "epoch": 1.5165056233724814,
      "grad_norm": 2.875,
      "learning_rate": 2.7472215082906923e-05,
      "loss": 0.8373,
      "step": 432700
    },
    {
      "epoch": 1.516540670879377,
      "grad_norm": 3.21875,
      "learning_rate": 2.747156605424322e-05,
      "loss": 0.8311,
      "step": 432710
    },
    {
      "epoch": 1.5165757183862727,
      "grad_norm": 2.84375,
      "learning_rate": 2.747091702557952e-05,
      "loss": 0.829,
      "step": 432720
    },
    {
      "epoch": 1.5166107658931682,
      "grad_norm": 3.015625,
      "learning_rate": 2.7470267996915817e-05,
      "loss": 0.8948,
      "step": 432730
    },
    {
      "epoch": 1.5166458134000638,
      "grad_norm": 2.5,
      "learning_rate": 2.7469618968252115e-05,
      "loss": 0.8703,
      "step": 432740
    },
    {
      "epoch": 1.5166808609069595,
      "grad_norm": 2.40625,
      "learning_rate": 2.7468969939588413e-05,
      "loss": 0.7725,
      "step": 432750
    },
    {
      "epoch": 1.5167159084138548,
      "grad_norm": 3.09375,
      "learning_rate": 2.746832091092471e-05,
      "loss": 0.8332,
      "step": 432760
    },
    {
      "epoch": 1.5167509559207506,
      "grad_norm": 3.0,
      "learning_rate": 2.746767188226101e-05,
      "loss": 0.7786,
      "step": 432770
    },
    {
      "epoch": 1.5167860034276461,
      "grad_norm": 2.65625,
      "learning_rate": 2.7467022853597307e-05,
      "loss": 0.8694,
      "step": 432780
    },
    {
      "epoch": 1.5168210509345417,
      "grad_norm": 2.984375,
      "learning_rate": 2.7466373824933605e-05,
      "loss": 0.8905,
      "step": 432790
    },
    {
      "epoch": 1.5168560984414374,
      "grad_norm": 2.84375,
      "learning_rate": 2.7465724796269903e-05,
      "loss": 0.8613,
      "step": 432800
    },
    {
      "epoch": 1.516891145948333,
      "grad_norm": 3.0625,
      "learning_rate": 2.74650757676062e-05,
      "loss": 0.9288,
      "step": 432810
    },
    {
      "epoch": 1.5169261934552285,
      "grad_norm": 3.015625,
      "learning_rate": 2.74644267389425e-05,
      "loss": 0.8117,
      "step": 432820
    },
    {
      "epoch": 1.5169612409621243,
      "grad_norm": 2.96875,
      "learning_rate": 2.7463777710278797e-05,
      "loss": 0.8345,
      "step": 432830
    },
    {
      "epoch": 1.5169962884690198,
      "grad_norm": 2.046875,
      "learning_rate": 2.7463128681615098e-05,
      "loss": 0.8416,
      "step": 432840
    },
    {
      "epoch": 1.5170313359759153,
      "grad_norm": 2.5,
      "learning_rate": 2.7462479652951396e-05,
      "loss": 0.9061,
      "step": 432850
    },
    {
      "epoch": 1.517066383482811,
      "grad_norm": 2.890625,
      "learning_rate": 2.7461830624287694e-05,
      "loss": 0.8471,
      "step": 432860
    },
    {
      "epoch": 1.5171014309897064,
      "grad_norm": 2.5625,
      "learning_rate": 2.7461181595623992e-05,
      "loss": 0.8069,
      "step": 432870
    },
    {
      "epoch": 1.5171364784966022,
      "grad_norm": 2.75,
      "learning_rate": 2.746053256696029e-05,
      "loss": 0.8238,
      "step": 432880
    },
    {
      "epoch": 1.5171715260034977,
      "grad_norm": 2.796875,
      "learning_rate": 2.7459883538296588e-05,
      "loss": 0.9019,
      "step": 432890
    },
    {
      "epoch": 1.5172065735103932,
      "grad_norm": 3.171875,
      "learning_rate": 2.7459234509632886e-05,
      "loss": 0.8781,
      "step": 432900
    },
    {
      "epoch": 1.517241621017289,
      "grad_norm": 3.21875,
      "learning_rate": 2.7458585480969184e-05,
      "loss": 0.8269,
      "step": 432910
    },
    {
      "epoch": 1.5172766685241845,
      "grad_norm": 2.953125,
      "learning_rate": 2.7457936452305482e-05,
      "loss": 0.8764,
      "step": 432920
    },
    {
      "epoch": 1.51731171603108,
      "grad_norm": 3.296875,
      "learning_rate": 2.745728742364178e-05,
      "loss": 0.8114,
      "step": 432930
    },
    {
      "epoch": 1.5173467635379758,
      "grad_norm": 2.875,
      "learning_rate": 2.7456638394978078e-05,
      "loss": 0.8483,
      "step": 432940
    },
    {
      "epoch": 1.5173818110448714,
      "grad_norm": 3.46875,
      "learning_rate": 2.7455989366314376e-05,
      "loss": 0.7706,
      "step": 432950
    },
    {
      "epoch": 1.517416858551767,
      "grad_norm": 2.984375,
      "learning_rate": 2.7455340337650674e-05,
      "loss": 0.8938,
      "step": 432960
    },
    {
      "epoch": 1.5174519060586626,
      "grad_norm": 2.65625,
      "learning_rate": 2.7454691308986976e-05,
      "loss": 0.8362,
      "step": 432970
    },
    {
      "epoch": 1.517486953565558,
      "grad_norm": 2.875,
      "learning_rate": 2.7454042280323274e-05,
      "loss": 0.79,
      "step": 432980
    },
    {
      "epoch": 1.5175220010724537,
      "grad_norm": 3.3125,
      "learning_rate": 2.7453393251659565e-05,
      "loss": 0.7981,
      "step": 432990
    },
    {
      "epoch": 1.5175570485793493,
      "grad_norm": 2.890625,
      "learning_rate": 2.7452744222995863e-05,
      "loss": 0.8642,
      "step": 433000
    },
    {
      "epoch": 1.5175920960862448,
      "grad_norm": 2.609375,
      "learning_rate": 2.745209519433216e-05,
      "loss": 0.8177,
      "step": 433010
    },
    {
      "epoch": 1.5176271435931405,
      "grad_norm": 2.828125,
      "learning_rate": 2.745144616566846e-05,
      "loss": 0.8896,
      "step": 433020
    },
    {
      "epoch": 1.517662191100036,
      "grad_norm": 2.890625,
      "learning_rate": 2.7450797137004757e-05,
      "loss": 0.7851,
      "step": 433030
    },
    {
      "epoch": 1.5176972386069316,
      "grad_norm": 2.90625,
      "learning_rate": 2.7450148108341055e-05,
      "loss": 0.8502,
      "step": 433040
    },
    {
      "epoch": 1.5177322861138274,
      "grad_norm": 2.671875,
      "learning_rate": 2.7449499079677353e-05,
      "loss": 0.8503,
      "step": 433050
    },
    {
      "epoch": 1.517767333620723,
      "grad_norm": 2.546875,
      "learning_rate": 2.7448850051013654e-05,
      "loss": 0.85,
      "step": 433060
    },
    {
      "epoch": 1.5178023811276184,
      "grad_norm": 3.21875,
      "learning_rate": 2.7448201022349952e-05,
      "loss": 0.8763,
      "step": 433070
    },
    {
      "epoch": 1.5178374286345142,
      "grad_norm": 3.0625,
      "learning_rate": 2.744755199368625e-05,
      "loss": 0.8854,
      "step": 433080
    },
    {
      "epoch": 1.5178724761414095,
      "grad_norm": 2.734375,
      "learning_rate": 2.7446902965022548e-05,
      "loss": 0.8211,
      "step": 433090
    },
    {
      "epoch": 1.5179075236483053,
      "grad_norm": 2.859375,
      "learning_rate": 2.7446253936358846e-05,
      "loss": 0.9532,
      "step": 433100
    },
    {
      "epoch": 1.517942571155201,
      "grad_norm": 3.25,
      "learning_rate": 2.7445604907695144e-05,
      "loss": 0.7928,
      "step": 433110
    },
    {
      "epoch": 1.5179776186620964,
      "grad_norm": 2.609375,
      "learning_rate": 2.7444955879031442e-05,
      "loss": 0.811,
      "step": 433120
    },
    {
      "epoch": 1.518012666168992,
      "grad_norm": 2.9375,
      "learning_rate": 2.744430685036774e-05,
      "loss": 0.8463,
      "step": 433130
    },
    {
      "epoch": 1.5180477136758876,
      "grad_norm": 3.203125,
      "learning_rate": 2.7443657821704038e-05,
      "loss": 0.831,
      "step": 433140
    },
    {
      "epoch": 1.5180827611827832,
      "grad_norm": 3.125,
      "learning_rate": 2.7443008793040336e-05,
      "loss": 0.8038,
      "step": 433150
    },
    {
      "epoch": 1.518117808689679,
      "grad_norm": 2.578125,
      "learning_rate": 2.7442359764376634e-05,
      "loss": 0.7858,
      "step": 433160
    },
    {
      "epoch": 1.5181528561965745,
      "grad_norm": 3.109375,
      "learning_rate": 2.7441710735712932e-05,
      "loss": 0.9171,
      "step": 433170
    },
    {
      "epoch": 1.51818790370347,
      "grad_norm": 2.765625,
      "learning_rate": 2.744106170704923e-05,
      "loss": 0.8555,
      "step": 433180
    },
    {
      "epoch": 1.5182229512103658,
      "grad_norm": 3.125,
      "learning_rate": 2.7440412678385528e-05,
      "loss": 0.7982,
      "step": 433190
    },
    {
      "epoch": 1.518257998717261,
      "grad_norm": 2.984375,
      "learning_rate": 2.743976364972183e-05,
      "loss": 0.8097,
      "step": 433200
    },
    {
      "epoch": 1.5182930462241568,
      "grad_norm": 3.015625,
      "learning_rate": 2.7439114621058128e-05,
      "loss": 0.854,
      "step": 433210
    },
    {
      "epoch": 1.5183280937310526,
      "grad_norm": 3.3125,
      "learning_rate": 2.7438465592394426e-05,
      "loss": 0.7386,
      "step": 433220
    },
    {
      "epoch": 1.518363141237948,
      "grad_norm": 2.953125,
      "learning_rate": 2.7437816563730724e-05,
      "loss": 0.8426,
      "step": 433230
    },
    {
      "epoch": 1.5183981887448437,
      "grad_norm": 3.046875,
      "learning_rate": 2.743716753506702e-05,
      "loss": 0.8698,
      "step": 433240
    },
    {
      "epoch": 1.5184332362517392,
      "grad_norm": 3.234375,
      "learning_rate": 2.743651850640332e-05,
      "loss": 0.8659,
      "step": 433250
    },
    {
      "epoch": 1.5184682837586347,
      "grad_norm": 2.78125,
      "learning_rate": 2.7435869477739618e-05,
      "loss": 0.8498,
      "step": 433260
    },
    {
      "epoch": 1.5185033312655305,
      "grad_norm": 2.765625,
      "learning_rate": 2.7435220449075916e-05,
      "loss": 0.8497,
      "step": 433270
    },
    {
      "epoch": 1.518538378772426,
      "grad_norm": 2.78125,
      "learning_rate": 2.7434571420412214e-05,
      "loss": 0.7645,
      "step": 433280
    },
    {
      "epoch": 1.5185734262793216,
      "grad_norm": 3.15625,
      "learning_rate": 2.743392239174851e-05,
      "loss": 0.7989,
      "step": 433290
    },
    {
      "epoch": 1.5186084737862173,
      "grad_norm": 2.9375,
      "learning_rate": 2.743327336308481e-05,
      "loss": 0.8838,
      "step": 433300
    },
    {
      "epoch": 1.5186435212931129,
      "grad_norm": 3.125,
      "learning_rate": 2.7432624334421108e-05,
      "loss": 0.8407,
      "step": 433310
    },
    {
      "epoch": 1.5186785688000084,
      "grad_norm": 3.015625,
      "learning_rate": 2.7431975305757406e-05,
      "loss": 0.8212,
      "step": 433320
    },
    {
      "epoch": 1.5187136163069042,
      "grad_norm": 2.421875,
      "learning_rate": 2.7431326277093704e-05,
      "loss": 0.9054,
      "step": 433330
    },
    {
      "epoch": 1.5187486638137995,
      "grad_norm": 3.0625,
      "learning_rate": 2.7430677248430005e-05,
      "loss": 0.934,
      "step": 433340
    },
    {
      "epoch": 1.5187837113206952,
      "grad_norm": 2.875,
      "learning_rate": 2.7430028219766303e-05,
      "loss": 0.8232,
      "step": 433350
    },
    {
      "epoch": 1.5188187588275908,
      "grad_norm": 2.765625,
      "learning_rate": 2.74293791911026e-05,
      "loss": 0.8028,
      "step": 433360
    },
    {
      "epoch": 1.5188538063344863,
      "grad_norm": 2.296875,
      "learning_rate": 2.7428730162438892e-05,
      "loss": 0.7556,
      "step": 433370
    },
    {
      "epoch": 1.518888853841382,
      "grad_norm": 3.3125,
      "learning_rate": 2.742808113377519e-05,
      "loss": 0.8704,
      "step": 433380
    },
    {
      "epoch": 1.5189239013482776,
      "grad_norm": 3.21875,
      "learning_rate": 2.7427432105111488e-05,
      "loss": 0.7837,
      "step": 433390
    },
    {
      "epoch": 1.5189589488551731,
      "grad_norm": 2.96875,
      "learning_rate": 2.7426783076447786e-05,
      "loss": 0.8133,
      "step": 433400
    },
    {
      "epoch": 1.5189939963620689,
      "grad_norm": 2.65625,
      "learning_rate": 2.7426134047784084e-05,
      "loss": 0.8548,
      "step": 433410
    },
    {
      "epoch": 1.5190290438689644,
      "grad_norm": 2.8125,
      "learning_rate": 2.7425485019120382e-05,
      "loss": 0.8635,
      "step": 433420
    },
    {
      "epoch": 1.51906409137586,
      "grad_norm": 3.015625,
      "learning_rate": 2.7424835990456684e-05,
      "loss": 0.905,
      "step": 433430
    },
    {
      "epoch": 1.5190991388827557,
      "grad_norm": 2.9375,
      "learning_rate": 2.742418696179298e-05,
      "loss": 0.8211,
      "step": 433440
    },
    {
      "epoch": 1.519134186389651,
      "grad_norm": 2.796875,
      "learning_rate": 2.742353793312928e-05,
      "loss": 0.8706,
      "step": 433450
    },
    {
      "epoch": 1.5191692338965468,
      "grad_norm": 2.953125,
      "learning_rate": 2.7422888904465578e-05,
      "loss": 0.8496,
      "step": 433460
    },
    {
      "epoch": 1.5192042814034423,
      "grad_norm": 2.796875,
      "learning_rate": 2.7422239875801876e-05,
      "loss": 0.8916,
      "step": 433470
    },
    {
      "epoch": 1.5192393289103379,
      "grad_norm": 2.375,
      "learning_rate": 2.7421590847138174e-05,
      "loss": 0.7715,
      "step": 433480
    },
    {
      "epoch": 1.5192743764172336,
      "grad_norm": 3.0625,
      "learning_rate": 2.742094181847447e-05,
      "loss": 0.8528,
      "step": 433490
    },
    {
      "epoch": 1.5193094239241292,
      "grad_norm": 2.78125,
      "learning_rate": 2.742029278981077e-05,
      "loss": 0.9077,
      "step": 433500
    },
    {
      "epoch": 1.5193444714310247,
      "grad_norm": 2.546875,
      "learning_rate": 2.7419643761147068e-05,
      "loss": 0.9024,
      "step": 433510
    },
    {
      "epoch": 1.5193795189379204,
      "grad_norm": 2.53125,
      "learning_rate": 2.7418994732483366e-05,
      "loss": 0.8449,
      "step": 433520
    },
    {
      "epoch": 1.519414566444816,
      "grad_norm": 2.953125,
      "learning_rate": 2.7418345703819664e-05,
      "loss": 0.8661,
      "step": 433530
    },
    {
      "epoch": 1.5194496139517115,
      "grad_norm": 3.0625,
      "learning_rate": 2.741769667515596e-05,
      "loss": 0.8058,
      "step": 433540
    },
    {
      "epoch": 1.5194846614586073,
      "grad_norm": 2.78125,
      "learning_rate": 2.741704764649226e-05,
      "loss": 0.7709,
      "step": 433550
    },
    {
      "epoch": 1.5195197089655026,
      "grad_norm": 3.515625,
      "learning_rate": 2.7416398617828558e-05,
      "loss": 0.8594,
      "step": 433560
    },
    {
      "epoch": 1.5195547564723983,
      "grad_norm": 2.75,
      "learning_rate": 2.741574958916486e-05,
      "loss": 0.7573,
      "step": 433570
    },
    {
      "epoch": 1.5195898039792939,
      "grad_norm": 2.734375,
      "learning_rate": 2.7415100560501157e-05,
      "loss": 0.8135,
      "step": 433580
    },
    {
      "epoch": 1.5196248514861894,
      "grad_norm": 3.0625,
      "learning_rate": 2.7414451531837455e-05,
      "loss": 0.8369,
      "step": 433590
    },
    {
      "epoch": 1.5196598989930852,
      "grad_norm": 2.828125,
      "learning_rate": 2.7413802503173753e-05,
      "loss": 0.7421,
      "step": 433600
    },
    {
      "epoch": 1.5196949464999807,
      "grad_norm": 3.078125,
      "learning_rate": 2.741315347451005e-05,
      "loss": 0.8634,
      "step": 433610
    },
    {
      "epoch": 1.5197299940068763,
      "grad_norm": 2.609375,
      "learning_rate": 2.741250444584635e-05,
      "loss": 0.8708,
      "step": 433620
    },
    {
      "epoch": 1.519765041513772,
      "grad_norm": 2.8125,
      "learning_rate": 2.7411855417182647e-05,
      "loss": 0.8824,
      "step": 433630
    },
    {
      "epoch": 1.5198000890206675,
      "grad_norm": 3.328125,
      "learning_rate": 2.7411206388518945e-05,
      "loss": 0.8352,
      "step": 433640
    },
    {
      "epoch": 1.519835136527563,
      "grad_norm": 3.046875,
      "learning_rate": 2.7410557359855243e-05,
      "loss": 0.8605,
      "step": 433650
    },
    {
      "epoch": 1.5198701840344588,
      "grad_norm": 2.9375,
      "learning_rate": 2.740990833119154e-05,
      "loss": 0.8723,
      "step": 433660
    },
    {
      "epoch": 1.5199052315413542,
      "grad_norm": 3.265625,
      "learning_rate": 2.740925930252784e-05,
      "loss": 0.8249,
      "step": 433670
    },
    {
      "epoch": 1.51994027904825,
      "grad_norm": 2.703125,
      "learning_rate": 2.7408610273864137e-05,
      "loss": 0.8458,
      "step": 433680
    },
    {
      "epoch": 1.5199753265551454,
      "grad_norm": 2.96875,
      "learning_rate": 2.7407961245200435e-05,
      "loss": 0.7978,
      "step": 433690
    },
    {
      "epoch": 1.520010374062041,
      "grad_norm": 3.296875,
      "learning_rate": 2.7407312216536733e-05,
      "loss": 0.7904,
      "step": 433700
    },
    {
      "epoch": 1.5200454215689367,
      "grad_norm": 2.765625,
      "learning_rate": 2.7406663187873034e-05,
      "loss": 0.8325,
      "step": 433710
    },
    {
      "epoch": 1.5200804690758323,
      "grad_norm": 2.734375,
      "learning_rate": 2.7406014159209332e-05,
      "loss": 0.887,
      "step": 433720
    },
    {
      "epoch": 1.5201155165827278,
      "grad_norm": 2.9375,
      "learning_rate": 2.740536513054563e-05,
      "loss": 0.8116,
      "step": 433730
    },
    {
      "epoch": 1.5201505640896236,
      "grad_norm": 3.0625,
      "learning_rate": 2.740471610188192e-05,
      "loss": 0.8774,
      "step": 433740
    },
    {
      "epoch": 1.520185611596519,
      "grad_norm": 2.5,
      "learning_rate": 2.740406707321822e-05,
      "loss": 0.8633,
      "step": 433750
    },
    {
      "epoch": 1.5202206591034146,
      "grad_norm": 3.0625,
      "learning_rate": 2.7403418044554518e-05,
      "loss": 0.7952,
      "step": 433760
    },
    {
      "epoch": 1.5202557066103104,
      "grad_norm": 2.75,
      "learning_rate": 2.7402769015890816e-05,
      "loss": 0.7591,
      "step": 433770
    },
    {
      "epoch": 1.5202907541172057,
      "grad_norm": 3.5,
      "learning_rate": 2.7402119987227114e-05,
      "loss": 0.8271,
      "step": 433780
    },
    {
      "epoch": 1.5203258016241015,
      "grad_norm": 2.59375,
      "learning_rate": 2.740147095856341e-05,
      "loss": 0.8349,
      "step": 433790
    },
    {
      "epoch": 1.5203608491309972,
      "grad_norm": 2.9375,
      "learning_rate": 2.7400821929899713e-05,
      "loss": 0.8725,
      "step": 433800
    },
    {
      "epoch": 1.5203958966378925,
      "grad_norm": 3.1875,
      "learning_rate": 2.740017290123601e-05,
      "loss": 0.854,
      "step": 433810
    },
    {
      "epoch": 1.5204309441447883,
      "grad_norm": 2.5,
      "learning_rate": 2.739952387257231e-05,
      "loss": 0.9064,
      "step": 433820
    },
    {
      "epoch": 1.5204659916516838,
      "grad_norm": 2.921875,
      "learning_rate": 2.7398874843908607e-05,
      "loss": 0.8926,
      "step": 433830
    },
    {
      "epoch": 1.5205010391585794,
      "grad_norm": 2.671875,
      "learning_rate": 2.7398225815244905e-05,
      "loss": 0.7546,
      "step": 433840
    },
    {
      "epoch": 1.5205360866654751,
      "grad_norm": 2.953125,
      "learning_rate": 2.7397576786581203e-05,
      "loss": 0.8226,
      "step": 433850
    },
    {
      "epoch": 1.5205711341723707,
      "grad_norm": 2.703125,
      "learning_rate": 2.73969277579175e-05,
      "loss": 0.8065,
      "step": 433860
    },
    {
      "epoch": 1.5206061816792662,
      "grad_norm": 2.78125,
      "learning_rate": 2.73962787292538e-05,
      "loss": 0.9395,
      "step": 433870
    },
    {
      "epoch": 1.520641229186162,
      "grad_norm": 2.78125,
      "learning_rate": 2.7395629700590097e-05,
      "loss": 0.8059,
      "step": 433880
    },
    {
      "epoch": 1.5206762766930575,
      "grad_norm": 2.796875,
      "learning_rate": 2.7394980671926395e-05,
      "loss": 0.8545,
      "step": 433890
    },
    {
      "epoch": 1.520711324199953,
      "grad_norm": 3.015625,
      "learning_rate": 2.7394331643262693e-05,
      "loss": 0.8197,
      "step": 433900
    },
    {
      "epoch": 1.5207463717068488,
      "grad_norm": 2.984375,
      "learning_rate": 2.739368261459899e-05,
      "loss": 0.8733,
      "step": 433910
    },
    {
      "epoch": 1.520781419213744,
      "grad_norm": 2.875,
      "learning_rate": 2.739303358593529e-05,
      "loss": 0.7778,
      "step": 433920
    },
    {
      "epoch": 1.5208164667206399,
      "grad_norm": 2.765625,
      "learning_rate": 2.7392384557271587e-05,
      "loss": 0.8845,
      "step": 433930
    },
    {
      "epoch": 1.5208515142275354,
      "grad_norm": 3.015625,
      "learning_rate": 2.739173552860789e-05,
      "loss": 0.7708,
      "step": 433940
    },
    {
      "epoch": 1.520886561734431,
      "grad_norm": 3.015625,
      "learning_rate": 2.7391086499944186e-05,
      "loss": 0.8532,
      "step": 433950
    },
    {
      "epoch": 1.5209216092413267,
      "grad_norm": 2.765625,
      "learning_rate": 2.7390437471280484e-05,
      "loss": 0.8402,
      "step": 433960
    },
    {
      "epoch": 1.5209566567482222,
      "grad_norm": 3.453125,
      "learning_rate": 2.7389788442616782e-05,
      "loss": 0.8089,
      "step": 433970
    },
    {
      "epoch": 1.5209917042551178,
      "grad_norm": 2.453125,
      "learning_rate": 2.738913941395308e-05,
      "loss": 0.9557,
      "step": 433980
    },
    {
      "epoch": 1.5210267517620135,
      "grad_norm": 2.859375,
      "learning_rate": 2.738849038528938e-05,
      "loss": 0.7738,
      "step": 433990
    },
    {
      "epoch": 1.521061799268909,
      "grad_norm": 2.6875,
      "learning_rate": 2.7387841356625676e-05,
      "loss": 0.8375,
      "step": 434000
    },
    {
      "epoch": 1.5210968467758046,
      "grad_norm": 2.953125,
      "learning_rate": 2.7387192327961974e-05,
      "loss": 0.805,
      "step": 434010
    },
    {
      "epoch": 1.5211318942827003,
      "grad_norm": 2.9375,
      "learning_rate": 2.7386543299298272e-05,
      "loss": 0.8366,
      "step": 434020
    },
    {
      "epoch": 1.5211669417895957,
      "grad_norm": 2.5,
      "learning_rate": 2.738589427063457e-05,
      "loss": 0.7749,
      "step": 434030
    },
    {
      "epoch": 1.5212019892964914,
      "grad_norm": 2.703125,
      "learning_rate": 2.738524524197087e-05,
      "loss": 0.8095,
      "step": 434040
    },
    {
      "epoch": 1.521237036803387,
      "grad_norm": 3.25,
      "learning_rate": 2.7384596213307166e-05,
      "loss": 0.882,
      "step": 434050
    },
    {
      "epoch": 1.5212720843102825,
      "grad_norm": 2.625,
      "learning_rate": 2.7383947184643464e-05,
      "loss": 0.8006,
      "step": 434060
    },
    {
      "epoch": 1.5213071318171782,
      "grad_norm": 3.484375,
      "learning_rate": 2.7383298155979766e-05,
      "loss": 0.8761,
      "step": 434070
    },
    {
      "epoch": 1.5213421793240738,
      "grad_norm": 3.15625,
      "learning_rate": 2.7382649127316064e-05,
      "loss": 0.8441,
      "step": 434080
    },
    {
      "epoch": 1.5213772268309693,
      "grad_norm": 3.265625,
      "learning_rate": 2.7382000098652362e-05,
      "loss": 0.8566,
      "step": 434090
    },
    {
      "epoch": 1.521412274337865,
      "grad_norm": 2.796875,
      "learning_rate": 2.738135106998866e-05,
      "loss": 0.7998,
      "step": 434100
    },
    {
      "epoch": 1.5214473218447606,
      "grad_norm": 2.59375,
      "learning_rate": 2.7380702041324958e-05,
      "loss": 0.7646,
      "step": 434110
    },
    {
      "epoch": 1.5214823693516561,
      "grad_norm": 3.203125,
      "learning_rate": 2.738005301266125e-05,
      "loss": 0.8422,
      "step": 434120
    },
    {
      "epoch": 1.521517416858552,
      "grad_norm": 2.953125,
      "learning_rate": 2.7379403983997547e-05,
      "loss": 0.8087,
      "step": 434130
    },
    {
      "epoch": 1.5215524643654472,
      "grad_norm": 2.953125,
      "learning_rate": 2.7378754955333845e-05,
      "loss": 0.8723,
      "step": 434140
    },
    {
      "epoch": 1.521587511872343,
      "grad_norm": 2.9375,
      "learning_rate": 2.7378105926670143e-05,
      "loss": 0.899,
      "step": 434150
    },
    {
      "epoch": 1.5216225593792385,
      "grad_norm": 2.828125,
      "learning_rate": 2.7377456898006444e-05,
      "loss": 0.8612,
      "step": 434160
    },
    {
      "epoch": 1.521657606886134,
      "grad_norm": 2.71875,
      "learning_rate": 2.7376807869342742e-05,
      "loss": 0.7798,
      "step": 434170
    },
    {
      "epoch": 1.5216926543930298,
      "grad_norm": 2.71875,
      "learning_rate": 2.737615884067904e-05,
      "loss": 0.7812,
      "step": 434180
    },
    {
      "epoch": 1.5217277018999253,
      "grad_norm": 2.703125,
      "learning_rate": 2.737550981201534e-05,
      "loss": 0.9297,
      "step": 434190
    },
    {
      "epoch": 1.5217627494068209,
      "grad_norm": 2.71875,
      "learning_rate": 2.7374860783351636e-05,
      "loss": 0.8374,
      "step": 434200
    },
    {
      "epoch": 1.5217977969137166,
      "grad_norm": 3.234375,
      "learning_rate": 2.7374211754687934e-05,
      "loss": 0.8766,
      "step": 434210
    },
    {
      "epoch": 1.5218328444206122,
      "grad_norm": 3.03125,
      "learning_rate": 2.7373562726024232e-05,
      "loss": 0.8443,
      "step": 434220
    },
    {
      "epoch": 1.5218678919275077,
      "grad_norm": 3.0625,
      "learning_rate": 2.737291369736053e-05,
      "loss": 0.816,
      "step": 434230
    },
    {
      "epoch": 1.5219029394344035,
      "grad_norm": 2.984375,
      "learning_rate": 2.737226466869683e-05,
      "loss": 0.8951,
      "step": 434240
    },
    {
      "epoch": 1.5219379869412988,
      "grad_norm": 2.890625,
      "learning_rate": 2.7371615640033126e-05,
      "loss": 0.8067,
      "step": 434250
    },
    {
      "epoch": 1.5219730344481945,
      "grad_norm": 3.140625,
      "learning_rate": 2.7370966611369424e-05,
      "loss": 0.895,
      "step": 434260
    },
    {
      "epoch": 1.52200808195509,
      "grad_norm": 2.484375,
      "learning_rate": 2.7370317582705722e-05,
      "loss": 0.8176,
      "step": 434270
    },
    {
      "epoch": 1.5220431294619856,
      "grad_norm": 4.28125,
      "learning_rate": 2.736966855404202e-05,
      "loss": 0.9033,
      "step": 434280
    },
    {
      "epoch": 1.5220781769688814,
      "grad_norm": 3.234375,
      "learning_rate": 2.736901952537832e-05,
      "loss": 0.9173,
      "step": 434290
    },
    {
      "epoch": 1.522113224475777,
      "grad_norm": 2.890625,
      "learning_rate": 2.736837049671462e-05,
      "loss": 0.7885,
      "step": 434300
    },
    {
      "epoch": 1.5221482719826724,
      "grad_norm": 2.90625,
      "learning_rate": 2.7367721468050918e-05,
      "loss": 0.8872,
      "step": 434310
    },
    {
      "epoch": 1.5221833194895682,
      "grad_norm": 2.84375,
      "learning_rate": 2.7367072439387216e-05,
      "loss": 0.8139,
      "step": 434320
    },
    {
      "epoch": 1.5222183669964637,
      "grad_norm": 3.34375,
      "learning_rate": 2.7366423410723514e-05,
      "loss": 0.9171,
      "step": 434330
    },
    {
      "epoch": 1.5222534145033593,
      "grad_norm": 2.703125,
      "learning_rate": 2.7365774382059812e-05,
      "loss": 0.7985,
      "step": 434340
    },
    {
      "epoch": 1.522288462010255,
      "grad_norm": 3.1875,
      "learning_rate": 2.736512535339611e-05,
      "loss": 0.8934,
      "step": 434350
    },
    {
      "epoch": 1.5223235095171503,
      "grad_norm": 2.9375,
      "learning_rate": 2.7364476324732408e-05,
      "loss": 0.8944,
      "step": 434360
    },
    {
      "epoch": 1.522358557024046,
      "grad_norm": 2.546875,
      "learning_rate": 2.7363827296068706e-05,
      "loss": 0.7897,
      "step": 434370
    },
    {
      "epoch": 1.5223936045309419,
      "grad_norm": 2.75,
      "learning_rate": 2.7363178267405004e-05,
      "loss": 0.8302,
      "step": 434380
    },
    {
      "epoch": 1.5224286520378372,
      "grad_norm": 3.40625,
      "learning_rate": 2.7362529238741302e-05,
      "loss": 0.9309,
      "step": 434390
    },
    {
      "epoch": 1.522463699544733,
      "grad_norm": 3.0,
      "learning_rate": 2.73618802100776e-05,
      "loss": 0.8368,
      "step": 434400
    },
    {
      "epoch": 1.5224987470516285,
      "grad_norm": 2.6875,
      "learning_rate": 2.7361231181413898e-05,
      "loss": 0.84,
      "step": 434410
    },
    {
      "epoch": 1.522533794558524,
      "grad_norm": 2.90625,
      "learning_rate": 2.7360582152750196e-05,
      "loss": 0.8082,
      "step": 434420
    },
    {
      "epoch": 1.5225688420654198,
      "grad_norm": 3.3125,
      "learning_rate": 2.7359933124086494e-05,
      "loss": 0.9028,
      "step": 434430
    },
    {
      "epoch": 1.5226038895723153,
      "grad_norm": 3.0625,
      "learning_rate": 2.7359284095422795e-05,
      "loss": 0.8241,
      "step": 434440
    },
    {
      "epoch": 1.5226389370792108,
      "grad_norm": 2.515625,
      "learning_rate": 2.7358635066759093e-05,
      "loss": 0.8673,
      "step": 434450
    },
    {
      "epoch": 1.5226739845861066,
      "grad_norm": 3.53125,
      "learning_rate": 2.735798603809539e-05,
      "loss": 0.8108,
      "step": 434460
    },
    {
      "epoch": 1.522709032093002,
      "grad_norm": 2.625,
      "learning_rate": 2.735733700943169e-05,
      "loss": 0.7364,
      "step": 434470
    },
    {
      "epoch": 1.5227440795998977,
      "grad_norm": 3.03125,
      "learning_rate": 2.7356687980767987e-05,
      "loss": 0.888,
      "step": 434480
    },
    {
      "epoch": 1.5227791271067934,
      "grad_norm": 3.046875,
      "learning_rate": 2.7356038952104285e-05,
      "loss": 0.8576,
      "step": 434490
    },
    {
      "epoch": 1.5228141746136887,
      "grad_norm": 2.609375,
      "learning_rate": 2.7355389923440576e-05,
      "loss": 0.9046,
      "step": 434500
    },
    {
      "epoch": 1.5228492221205845,
      "grad_norm": 2.671875,
      "learning_rate": 2.7354740894776874e-05,
      "loss": 0.7821,
      "step": 434510
    },
    {
      "epoch": 1.52288426962748,
      "grad_norm": 2.4375,
      "learning_rate": 2.7354091866113172e-05,
      "loss": 0.7749,
      "step": 434520
    },
    {
      "epoch": 1.5229193171343756,
      "grad_norm": 3.046875,
      "learning_rate": 2.7353442837449474e-05,
      "loss": 0.9224,
      "step": 434530
    },
    {
      "epoch": 1.5229543646412713,
      "grad_norm": 2.578125,
      "learning_rate": 2.7352793808785772e-05,
      "loss": 0.8505,
      "step": 434540
    },
    {
      "epoch": 1.5229894121481669,
      "grad_norm": 2.890625,
      "learning_rate": 2.735214478012207e-05,
      "loss": 0.7772,
      "step": 434550
    },
    {
      "epoch": 1.5230244596550624,
      "grad_norm": 2.96875,
      "learning_rate": 2.7351495751458368e-05,
      "loss": 0.8122,
      "step": 434560
    },
    {
      "epoch": 1.5230595071619581,
      "grad_norm": 3.34375,
      "learning_rate": 2.7350846722794666e-05,
      "loss": 0.8682,
      "step": 434570
    },
    {
      "epoch": 1.5230945546688537,
      "grad_norm": 2.640625,
      "learning_rate": 2.7350197694130964e-05,
      "loss": 0.7509,
      "step": 434580
    },
    {
      "epoch": 1.5231296021757492,
      "grad_norm": 2.53125,
      "learning_rate": 2.7349548665467262e-05,
      "loss": 0.8432,
      "step": 434590
    },
    {
      "epoch": 1.523164649682645,
      "grad_norm": 2.640625,
      "learning_rate": 2.734889963680356e-05,
      "loss": 0.8161,
      "step": 434600
    },
    {
      "epoch": 1.5231996971895403,
      "grad_norm": 2.828125,
      "learning_rate": 2.7348250608139858e-05,
      "loss": 0.8115,
      "step": 434610
    },
    {
      "epoch": 1.523234744696436,
      "grad_norm": 3.046875,
      "learning_rate": 2.7347601579476156e-05,
      "loss": 0.7894,
      "step": 434620
    },
    {
      "epoch": 1.5232697922033316,
      "grad_norm": 3.109375,
      "learning_rate": 2.7346952550812454e-05,
      "loss": 0.8489,
      "step": 434630
    },
    {
      "epoch": 1.5233048397102271,
      "grad_norm": 2.671875,
      "learning_rate": 2.7346303522148752e-05,
      "loss": 0.8194,
      "step": 434640
    },
    {
      "epoch": 1.5233398872171229,
      "grad_norm": 3.015625,
      "learning_rate": 2.734565449348505e-05,
      "loss": 0.8147,
      "step": 434650
    },
    {
      "epoch": 1.5233749347240184,
      "grad_norm": 2.859375,
      "learning_rate": 2.7345005464821348e-05,
      "loss": 0.8921,
      "step": 434660
    },
    {
      "epoch": 1.523409982230914,
      "grad_norm": 3.0,
      "learning_rate": 2.734435643615765e-05,
      "loss": 0.8447,
      "step": 434670
    },
    {
      "epoch": 1.5234450297378097,
      "grad_norm": 2.515625,
      "learning_rate": 2.7343707407493947e-05,
      "loss": 0.8541,
      "step": 434680
    },
    {
      "epoch": 1.5234800772447052,
      "grad_norm": 2.765625,
      "learning_rate": 2.7343058378830245e-05,
      "loss": 0.7253,
      "step": 434690
    },
    {
      "epoch": 1.5235151247516008,
      "grad_norm": 3.140625,
      "learning_rate": 2.7342409350166543e-05,
      "loss": 0.885,
      "step": 434700
    },
    {
      "epoch": 1.5235501722584965,
      "grad_norm": 2.703125,
      "learning_rate": 2.734176032150284e-05,
      "loss": 0.8524,
      "step": 434710
    },
    {
      "epoch": 1.5235852197653919,
      "grad_norm": 3.125,
      "learning_rate": 2.734111129283914e-05,
      "loss": 0.8054,
      "step": 434720
    },
    {
      "epoch": 1.5236202672722876,
      "grad_norm": 2.890625,
      "learning_rate": 2.7340462264175437e-05,
      "loss": 0.7929,
      "step": 434730
    },
    {
      "epoch": 1.5236553147791831,
      "grad_norm": 3.125,
      "learning_rate": 2.7339813235511735e-05,
      "loss": 0.7716,
      "step": 434740
    },
    {
      "epoch": 1.5236903622860787,
      "grad_norm": 2.9375,
      "learning_rate": 2.7339164206848033e-05,
      "loss": 0.8432,
      "step": 434750
    },
    {
      "epoch": 1.5237254097929744,
      "grad_norm": 3.15625,
      "learning_rate": 2.733851517818433e-05,
      "loss": 0.8126,
      "step": 434760
    },
    {
      "epoch": 1.52376045729987,
      "grad_norm": 2.859375,
      "learning_rate": 2.733786614952063e-05,
      "loss": 0.8252,
      "step": 434770
    },
    {
      "epoch": 1.5237955048067655,
      "grad_norm": 3.265625,
      "learning_rate": 2.7337217120856927e-05,
      "loss": 0.8312,
      "step": 434780
    },
    {
      "epoch": 1.5238305523136613,
      "grad_norm": 2.71875,
      "learning_rate": 2.7336568092193225e-05,
      "loss": 0.7871,
      "step": 434790
    },
    {
      "epoch": 1.5238655998205568,
      "grad_norm": 2.8125,
      "learning_rate": 2.7335919063529523e-05,
      "loss": 0.8489,
      "step": 434800
    },
    {
      "epoch": 1.5239006473274523,
      "grad_norm": 2.859375,
      "learning_rate": 2.7335270034865825e-05,
      "loss": 0.719,
      "step": 434810
    },
    {
      "epoch": 1.523935694834348,
      "grad_norm": 3.421875,
      "learning_rate": 2.7334621006202123e-05,
      "loss": 0.8649,
      "step": 434820
    },
    {
      "epoch": 1.5239707423412434,
      "grad_norm": 3.0625,
      "learning_rate": 2.733397197753842e-05,
      "loss": 0.8688,
      "step": 434830
    },
    {
      "epoch": 1.5240057898481392,
      "grad_norm": 2.890625,
      "learning_rate": 2.733332294887472e-05,
      "loss": 0.7861,
      "step": 434840
    },
    {
      "epoch": 1.5240408373550347,
      "grad_norm": 2.78125,
      "learning_rate": 2.7332673920211017e-05,
      "loss": 0.8961,
      "step": 434850
    },
    {
      "epoch": 1.5240758848619302,
      "grad_norm": 3.46875,
      "learning_rate": 2.7332024891547315e-05,
      "loss": 0.9345,
      "step": 434860
    },
    {
      "epoch": 1.524110932368826,
      "grad_norm": 2.828125,
      "learning_rate": 2.7331375862883606e-05,
      "loss": 0.7391,
      "step": 434870
    },
    {
      "epoch": 1.5241459798757215,
      "grad_norm": 2.734375,
      "learning_rate": 2.7330726834219904e-05,
      "loss": 0.8193,
      "step": 434880
    },
    {
      "epoch": 1.524181027382617,
      "grad_norm": 3.046875,
      "learning_rate": 2.7330077805556202e-05,
      "loss": 0.9014,
      "step": 434890
    },
    {
      "epoch": 1.5242160748895128,
      "grad_norm": 3.046875,
      "learning_rate": 2.7329428776892503e-05,
      "loss": 0.7783,
      "step": 434900
    },
    {
      "epoch": 1.5242511223964084,
      "grad_norm": 3.0625,
      "learning_rate": 2.73287797482288e-05,
      "loss": 0.8759,
      "step": 434910
    },
    {
      "epoch": 1.524286169903304,
      "grad_norm": 3.296875,
      "learning_rate": 2.73281307195651e-05,
      "loss": 0.779,
      "step": 434920
    },
    {
      "epoch": 1.5243212174101997,
      "grad_norm": 2.96875,
      "learning_rate": 2.7327481690901397e-05,
      "loss": 0.8137,
      "step": 434930
    },
    {
      "epoch": 1.524356264917095,
      "grad_norm": 2.859375,
      "learning_rate": 2.7326832662237695e-05,
      "loss": 0.8028,
      "step": 434940
    },
    {
      "epoch": 1.5243913124239907,
      "grad_norm": 2.578125,
      "learning_rate": 2.7326183633573993e-05,
      "loss": 0.814,
      "step": 434950
    },
    {
      "epoch": 1.5244263599308863,
      "grad_norm": 2.921875,
      "learning_rate": 2.732553460491029e-05,
      "loss": 0.7728,
      "step": 434960
    },
    {
      "epoch": 1.5244614074377818,
      "grad_norm": 3.0,
      "learning_rate": 2.732488557624659e-05,
      "loss": 0.7904,
      "step": 434970
    },
    {
      "epoch": 1.5244964549446776,
      "grad_norm": 2.703125,
      "learning_rate": 2.7324236547582887e-05,
      "loss": 0.8483,
      "step": 434980
    },
    {
      "epoch": 1.524531502451573,
      "grad_norm": 2.921875,
      "learning_rate": 2.7323587518919185e-05,
      "loss": 0.7834,
      "step": 434990
    },
    {
      "epoch": 1.5245665499584686,
      "grad_norm": 3.015625,
      "learning_rate": 2.7322938490255483e-05,
      "loss": 0.8376,
      "step": 435000
    },
    {
      "epoch": 1.5245665499584686,
      "eval_loss": 0.7916578054428101,
      "eval_runtime": 551.3804,
      "eval_samples_per_second": 689.97,
      "eval_steps_per_second": 57.498,
      "step": 435000
    },
    {
      "epoch": 1.5246015974653644,
      "grad_norm": 3.09375,
      "learning_rate": 2.732228946159178e-05,
      "loss": 0.9061,
      "step": 435010
    },
    {
      "epoch": 1.52463664497226,
      "grad_norm": 2.703125,
      "learning_rate": 2.732164043292808e-05,
      "loss": 0.7631,
      "step": 435020
    },
    {
      "epoch": 1.5246716924791555,
      "grad_norm": 3.125,
      "learning_rate": 2.732099140426438e-05,
      "loss": 0.8605,
      "step": 435030
    },
    {
      "epoch": 1.5247067399860512,
      "grad_norm": 3.1875,
      "learning_rate": 2.732034237560068e-05,
      "loss": 0.8633,
      "step": 435040
    },
    {
      "epoch": 1.5247417874929465,
      "grad_norm": 2.921875,
      "learning_rate": 2.7319693346936977e-05,
      "loss": 0.8512,
      "step": 435050
    },
    {
      "epoch": 1.5247768349998423,
      "grad_norm": 3.296875,
      "learning_rate": 2.7319044318273275e-05,
      "loss": 0.8357,
      "step": 435060
    },
    {
      "epoch": 1.524811882506738,
      "grad_norm": 3.0625,
      "learning_rate": 2.7318395289609573e-05,
      "loss": 0.8081,
      "step": 435070
    },
    {
      "epoch": 1.5248469300136334,
      "grad_norm": 2.9375,
      "learning_rate": 2.731774626094587e-05,
      "loss": 0.8411,
      "step": 435080
    },
    {
      "epoch": 1.5248819775205291,
      "grad_norm": 3.375,
      "learning_rate": 2.731709723228217e-05,
      "loss": 0.8997,
      "step": 435090
    },
    {
      "epoch": 1.5249170250274247,
      "grad_norm": 2.78125,
      "learning_rate": 2.7316448203618467e-05,
      "loss": 0.892,
      "step": 435100
    },
    {
      "epoch": 1.5249520725343202,
      "grad_norm": 2.265625,
      "learning_rate": 2.7315799174954765e-05,
      "loss": 0.7902,
      "step": 435110
    },
    {
      "epoch": 1.524987120041216,
      "grad_norm": 2.921875,
      "learning_rate": 2.7315150146291063e-05,
      "loss": 0.8445,
      "step": 435120
    },
    {
      "epoch": 1.5250221675481115,
      "grad_norm": 3.140625,
      "learning_rate": 2.731450111762736e-05,
      "loss": 0.8751,
      "step": 435130
    },
    {
      "epoch": 1.525057215055007,
      "grad_norm": 2.921875,
      "learning_rate": 2.731385208896366e-05,
      "loss": 0.8462,
      "step": 435140
    },
    {
      "epoch": 1.5250922625619028,
      "grad_norm": 2.34375,
      "learning_rate": 2.7313203060299957e-05,
      "loss": 0.722,
      "step": 435150
    },
    {
      "epoch": 1.525127310068798,
      "grad_norm": 2.859375,
      "learning_rate": 2.7312554031636255e-05,
      "loss": 0.8061,
      "step": 435160
    },
    {
      "epoch": 1.5251623575756939,
      "grad_norm": 2.9375,
      "learning_rate": 2.7311905002972556e-05,
      "loss": 0.8227,
      "step": 435170
    },
    {
      "epoch": 1.5251974050825896,
      "grad_norm": 2.734375,
      "learning_rate": 2.7311255974308854e-05,
      "loss": 0.824,
      "step": 435180
    },
    {
      "epoch": 1.525232452589485,
      "grad_norm": 2.921875,
      "learning_rate": 2.7310606945645152e-05,
      "loss": 0.8971,
      "step": 435190
    },
    {
      "epoch": 1.5252675000963807,
      "grad_norm": 2.890625,
      "learning_rate": 2.730995791698145e-05,
      "loss": 0.7882,
      "step": 435200
    },
    {
      "epoch": 1.5253025476032762,
      "grad_norm": 2.375,
      "learning_rate": 2.7309308888317748e-05,
      "loss": 0.8148,
      "step": 435210
    },
    {
      "epoch": 1.5253375951101718,
      "grad_norm": 2.640625,
      "learning_rate": 2.7308659859654046e-05,
      "loss": 0.7834,
      "step": 435220
    },
    {
      "epoch": 1.5253726426170675,
      "grad_norm": 2.65625,
      "learning_rate": 2.7308010830990344e-05,
      "loss": 0.8017,
      "step": 435230
    },
    {
      "epoch": 1.525407690123963,
      "grad_norm": 2.734375,
      "learning_rate": 2.7307361802326642e-05,
      "loss": 0.8551,
      "step": 435240
    },
    {
      "epoch": 1.5254427376308586,
      "grad_norm": 2.84375,
      "learning_rate": 2.7306712773662933e-05,
      "loss": 0.834,
      "step": 435250
    },
    {
      "epoch": 1.5254777851377543,
      "grad_norm": 3.140625,
      "learning_rate": 2.7306063744999235e-05,
      "loss": 0.8298,
      "step": 435260
    },
    {
      "epoch": 1.5255128326446499,
      "grad_norm": 2.96875,
      "learning_rate": 2.7305414716335533e-05,
      "loss": 0.8262,
      "step": 435270
    },
    {
      "epoch": 1.5255478801515454,
      "grad_norm": 2.375,
      "learning_rate": 2.730476568767183e-05,
      "loss": 0.7952,
      "step": 435280
    },
    {
      "epoch": 1.5255829276584412,
      "grad_norm": 2.625,
      "learning_rate": 2.730411665900813e-05,
      "loss": 0.7932,
      "step": 435290
    },
    {
      "epoch": 1.5256179751653365,
      "grad_norm": 2.953125,
      "learning_rate": 2.7303467630344427e-05,
      "loss": 0.7743,
      "step": 435300
    },
    {
      "epoch": 1.5256530226722322,
      "grad_norm": 2.625,
      "learning_rate": 2.7302818601680725e-05,
      "loss": 0.8,
      "step": 435310
    },
    {
      "epoch": 1.5256880701791278,
      "grad_norm": 3.328125,
      "learning_rate": 2.7302169573017023e-05,
      "loss": 0.9195,
      "step": 435320
    },
    {
      "epoch": 1.5257231176860233,
      "grad_norm": 2.734375,
      "learning_rate": 2.730152054435332e-05,
      "loss": 0.8473,
      "step": 435330
    },
    {
      "epoch": 1.525758165192919,
      "grad_norm": 3.125,
      "learning_rate": 2.730087151568962e-05,
      "loss": 0.8036,
      "step": 435340
    },
    {
      "epoch": 1.5257932126998146,
      "grad_norm": 2.6875,
      "learning_rate": 2.7300222487025917e-05,
      "loss": 0.8249,
      "step": 435350
    },
    {
      "epoch": 1.5258282602067101,
      "grad_norm": 2.8125,
      "learning_rate": 2.7299573458362215e-05,
      "loss": 0.9414,
      "step": 435360
    },
    {
      "epoch": 1.525863307713606,
      "grad_norm": 3.078125,
      "learning_rate": 2.7298924429698513e-05,
      "loss": 0.869,
      "step": 435370
    },
    {
      "epoch": 1.5258983552205014,
      "grad_norm": 3.03125,
      "learning_rate": 2.729827540103481e-05,
      "loss": 0.921,
      "step": 435380
    },
    {
      "epoch": 1.525933402727397,
      "grad_norm": 2.875,
      "learning_rate": 2.729762637237111e-05,
      "loss": 0.8049,
      "step": 435390
    },
    {
      "epoch": 1.5259684502342927,
      "grad_norm": 3.484375,
      "learning_rate": 2.729697734370741e-05,
      "loss": 0.9289,
      "step": 435400
    },
    {
      "epoch": 1.526003497741188,
      "grad_norm": 2.84375,
      "learning_rate": 2.7296328315043708e-05,
      "loss": 0.8783,
      "step": 435410
    },
    {
      "epoch": 1.5260385452480838,
      "grad_norm": 2.96875,
      "learning_rate": 2.7295679286380006e-05,
      "loss": 0.8777,
      "step": 435420
    },
    {
      "epoch": 1.5260735927549793,
      "grad_norm": 2.984375,
      "learning_rate": 2.7295030257716304e-05,
      "loss": 0.7655,
      "step": 435430
    },
    {
      "epoch": 1.5261086402618749,
      "grad_norm": 3.0,
      "learning_rate": 2.7294381229052602e-05,
      "loss": 0.8262,
      "step": 435440
    },
    {
      "epoch": 1.5261436877687706,
      "grad_norm": 3.65625,
      "learning_rate": 2.72937322003889e-05,
      "loss": 0.8966,
      "step": 435450
    },
    {
      "epoch": 1.5261787352756662,
      "grad_norm": 3.046875,
      "learning_rate": 2.7293083171725198e-05,
      "loss": 0.8806,
      "step": 435460
    },
    {
      "epoch": 1.5262137827825617,
      "grad_norm": 3.5,
      "learning_rate": 2.7292434143061496e-05,
      "loss": 0.9505,
      "step": 435470
    },
    {
      "epoch": 1.5262488302894575,
      "grad_norm": 3.09375,
      "learning_rate": 2.7291785114397794e-05,
      "loss": 0.8557,
      "step": 435480
    },
    {
      "epoch": 1.526283877796353,
      "grad_norm": 2.453125,
      "learning_rate": 2.7291136085734092e-05,
      "loss": 0.7511,
      "step": 435490
    },
    {
      "epoch": 1.5263189253032485,
      "grad_norm": 2.890625,
      "learning_rate": 2.729048705707039e-05,
      "loss": 0.8158,
      "step": 435500
    },
    {
      "epoch": 1.5263539728101443,
      "grad_norm": 3.1875,
      "learning_rate": 2.7289838028406688e-05,
      "loss": 0.8261,
      "step": 435510
    },
    {
      "epoch": 1.5263890203170396,
      "grad_norm": 4.28125,
      "learning_rate": 2.7289188999742986e-05,
      "loss": 0.8701,
      "step": 435520
    },
    {
      "epoch": 1.5264240678239354,
      "grad_norm": 2.8125,
      "learning_rate": 2.7288539971079284e-05,
      "loss": 0.8138,
      "step": 435530
    },
    {
      "epoch": 1.526459115330831,
      "grad_norm": 3.203125,
      "learning_rate": 2.7287890942415585e-05,
      "loss": 0.8296,
      "step": 435540
    },
    {
      "epoch": 1.5264941628377264,
      "grad_norm": 2.765625,
      "learning_rate": 2.7287241913751883e-05,
      "loss": 0.7942,
      "step": 435550
    },
    {
      "epoch": 1.5265292103446222,
      "grad_norm": 2.75,
      "learning_rate": 2.728659288508818e-05,
      "loss": 0.8447,
      "step": 435560
    },
    {
      "epoch": 1.5265642578515177,
      "grad_norm": 3.171875,
      "learning_rate": 2.728594385642448e-05,
      "loss": 0.8656,
      "step": 435570
    },
    {
      "epoch": 1.5265993053584133,
      "grad_norm": 2.65625,
      "learning_rate": 2.7285294827760777e-05,
      "loss": 0.7978,
      "step": 435580
    },
    {
      "epoch": 1.526634352865309,
      "grad_norm": 3.203125,
      "learning_rate": 2.7284645799097075e-05,
      "loss": 0.916,
      "step": 435590
    },
    {
      "epoch": 1.5266694003722046,
      "grad_norm": 3.15625,
      "learning_rate": 2.7283996770433373e-05,
      "loss": 0.8726,
      "step": 435600
    },
    {
      "epoch": 1.5267044478791,
      "grad_norm": 2.96875,
      "learning_rate": 2.728334774176967e-05,
      "loss": 0.8466,
      "step": 435610
    },
    {
      "epoch": 1.5267394953859958,
      "grad_norm": 2.671875,
      "learning_rate": 2.7282698713105963e-05,
      "loss": 0.7721,
      "step": 435620
    },
    {
      "epoch": 1.5267745428928912,
      "grad_norm": 2.9375,
      "learning_rate": 2.7282049684442264e-05,
      "loss": 0.8761,
      "step": 435630
    },
    {
      "epoch": 1.526809590399787,
      "grad_norm": 2.578125,
      "learning_rate": 2.7281400655778562e-05,
      "loss": 0.8329,
      "step": 435640
    },
    {
      "epoch": 1.5268446379066825,
      "grad_norm": 3.484375,
      "learning_rate": 2.728075162711486e-05,
      "loss": 0.9015,
      "step": 435650
    },
    {
      "epoch": 1.526879685413578,
      "grad_norm": 2.609375,
      "learning_rate": 2.7280102598451158e-05,
      "loss": 0.7638,
      "step": 435660
    },
    {
      "epoch": 1.5269147329204737,
      "grad_norm": 2.921875,
      "learning_rate": 2.7279453569787456e-05,
      "loss": 0.8403,
      "step": 435670
    },
    {
      "epoch": 1.5269497804273693,
      "grad_norm": 2.96875,
      "learning_rate": 2.7278804541123754e-05,
      "loss": 0.8381,
      "step": 435680
    },
    {
      "epoch": 1.5269848279342648,
      "grad_norm": 2.84375,
      "learning_rate": 2.7278155512460052e-05,
      "loss": 0.8148,
      "step": 435690
    },
    {
      "epoch": 1.5270198754411606,
      "grad_norm": 2.765625,
      "learning_rate": 2.727750648379635e-05,
      "loss": 0.7359,
      "step": 435700
    },
    {
      "epoch": 1.5270549229480561,
      "grad_norm": 2.9375,
      "learning_rate": 2.7276857455132648e-05,
      "loss": 0.8399,
      "step": 435710
    },
    {
      "epoch": 1.5270899704549517,
      "grad_norm": 2.90625,
      "learning_rate": 2.7276208426468946e-05,
      "loss": 0.841,
      "step": 435720
    },
    {
      "epoch": 1.5271250179618474,
      "grad_norm": 2.859375,
      "learning_rate": 2.7275559397805244e-05,
      "loss": 0.883,
      "step": 435730
    },
    {
      "epoch": 1.5271600654687427,
      "grad_norm": 2.578125,
      "learning_rate": 2.7274910369141542e-05,
      "loss": 0.8017,
      "step": 435740
    },
    {
      "epoch": 1.5271951129756385,
      "grad_norm": 3.09375,
      "learning_rate": 2.727426134047784e-05,
      "loss": 0.8488,
      "step": 435750
    },
    {
      "epoch": 1.5272301604825342,
      "grad_norm": 2.640625,
      "learning_rate": 2.7273612311814138e-05,
      "loss": 0.872,
      "step": 435760
    },
    {
      "epoch": 1.5272652079894296,
      "grad_norm": 2.75,
      "learning_rate": 2.727296328315044e-05,
      "loss": 0.8779,
      "step": 435770
    },
    {
      "epoch": 1.5273002554963253,
      "grad_norm": 3.0625,
      "learning_rate": 2.7272314254486737e-05,
      "loss": 0.8379,
      "step": 435780
    },
    {
      "epoch": 1.5273353030032208,
      "grad_norm": 3.078125,
      "learning_rate": 2.7271665225823035e-05,
      "loss": 0.8558,
      "step": 435790
    },
    {
      "epoch": 1.5273703505101164,
      "grad_norm": 2.890625,
      "learning_rate": 2.7271016197159333e-05,
      "loss": 0.9422,
      "step": 435800
    },
    {
      "epoch": 1.5274053980170121,
      "grad_norm": 4.46875,
      "learning_rate": 2.727036716849563e-05,
      "loss": 0.8499,
      "step": 435810
    },
    {
      "epoch": 1.5274404455239077,
      "grad_norm": 2.6875,
      "learning_rate": 2.726971813983193e-05,
      "loss": 0.8397,
      "step": 435820
    },
    {
      "epoch": 1.5274754930308032,
      "grad_norm": 2.71875,
      "learning_rate": 2.7269069111168227e-05,
      "loss": 0.8638,
      "step": 435830
    },
    {
      "epoch": 1.527510540537699,
      "grad_norm": 2.609375,
      "learning_rate": 2.7268420082504525e-05,
      "loss": 0.7174,
      "step": 435840
    },
    {
      "epoch": 1.5275455880445943,
      "grad_norm": 2.703125,
      "learning_rate": 2.7267771053840823e-05,
      "loss": 0.7677,
      "step": 435850
    },
    {
      "epoch": 1.52758063555149,
      "grad_norm": 2.8125,
      "learning_rate": 2.726712202517712e-05,
      "loss": 0.8852,
      "step": 435860
    },
    {
      "epoch": 1.5276156830583858,
      "grad_norm": 2.890625,
      "learning_rate": 2.726647299651342e-05,
      "loss": 0.777,
      "step": 435870
    },
    {
      "epoch": 1.5276507305652811,
      "grad_norm": 3.140625,
      "learning_rate": 2.7265823967849717e-05,
      "loss": 0.8503,
      "step": 435880
    },
    {
      "epoch": 1.5276857780721769,
      "grad_norm": 2.578125,
      "learning_rate": 2.7265174939186015e-05,
      "loss": 0.7887,
      "step": 435890
    },
    {
      "epoch": 1.5277208255790724,
      "grad_norm": 2.9375,
      "learning_rate": 2.7264525910522313e-05,
      "loss": 0.8279,
      "step": 435900
    },
    {
      "epoch": 1.527755873085968,
      "grad_norm": 2.828125,
      "learning_rate": 2.7263876881858615e-05,
      "loss": 0.8685,
      "step": 435910
    },
    {
      "epoch": 1.5277909205928637,
      "grad_norm": 3.171875,
      "learning_rate": 2.7263227853194913e-05,
      "loss": 0.8212,
      "step": 435920
    },
    {
      "epoch": 1.5278259680997592,
      "grad_norm": 2.8125,
      "learning_rate": 2.726257882453121e-05,
      "loss": 0.8549,
      "step": 435930
    },
    {
      "epoch": 1.5278610156066548,
      "grad_norm": 2.359375,
      "learning_rate": 2.726192979586751e-05,
      "loss": 0.7923,
      "step": 435940
    },
    {
      "epoch": 1.5278960631135505,
      "grad_norm": 3.453125,
      "learning_rate": 2.7261280767203807e-05,
      "loss": 0.7979,
      "step": 435950
    },
    {
      "epoch": 1.527931110620446,
      "grad_norm": 3.265625,
      "learning_rate": 2.7260631738540105e-05,
      "loss": 0.8523,
      "step": 435960
    },
    {
      "epoch": 1.5279661581273416,
      "grad_norm": 2.984375,
      "learning_rate": 2.7259982709876403e-05,
      "loss": 0.8501,
      "step": 435970
    },
    {
      "epoch": 1.5280012056342374,
      "grad_norm": 2.859375,
      "learning_rate": 2.72593336812127e-05,
      "loss": 0.8972,
      "step": 435980
    },
    {
      "epoch": 1.5280362531411327,
      "grad_norm": 2.78125,
      "learning_rate": 2.7258684652549e-05,
      "loss": 0.832,
      "step": 435990
    },
    {
      "epoch": 1.5280713006480284,
      "grad_norm": 2.96875,
      "learning_rate": 2.7258035623885293e-05,
      "loss": 0.8221,
      "step": 436000
    },
    {
      "epoch": 1.528106348154924,
      "grad_norm": 3.265625,
      "learning_rate": 2.725738659522159e-05,
      "loss": 0.8527,
      "step": 436010
    },
    {
      "epoch": 1.5281413956618195,
      "grad_norm": 2.984375,
      "learning_rate": 2.725673756655789e-05,
      "loss": 0.8128,
      "step": 436020
    },
    {
      "epoch": 1.5281764431687153,
      "grad_norm": 3.4375,
      "learning_rate": 2.7256088537894187e-05,
      "loss": 0.8809,
      "step": 436030
    },
    {
      "epoch": 1.5282114906756108,
      "grad_norm": 2.6875,
      "learning_rate": 2.7255439509230485e-05,
      "loss": 0.8517,
      "step": 436040
    },
    {
      "epoch": 1.5282465381825063,
      "grad_norm": 3.09375,
      "learning_rate": 2.7254790480566783e-05,
      "loss": 0.8651,
      "step": 436050
    },
    {
      "epoch": 1.528281585689402,
      "grad_norm": 3.03125,
      "learning_rate": 2.725414145190308e-05,
      "loss": 0.888,
      "step": 436060
    },
    {
      "epoch": 1.5283166331962976,
      "grad_norm": 3.015625,
      "learning_rate": 2.725349242323938e-05,
      "loss": 0.8516,
      "step": 436070
    },
    {
      "epoch": 1.5283516807031932,
      "grad_norm": 3.1875,
      "learning_rate": 2.7252843394575677e-05,
      "loss": 0.8807,
      "step": 436080
    },
    {
      "epoch": 1.528386728210089,
      "grad_norm": 3.1875,
      "learning_rate": 2.7252194365911975e-05,
      "loss": 0.7905,
      "step": 436090
    },
    {
      "epoch": 1.5284217757169842,
      "grad_norm": 2.890625,
      "learning_rate": 2.7251545337248273e-05,
      "loss": 0.8029,
      "step": 436100
    },
    {
      "epoch": 1.52845682322388,
      "grad_norm": 3.34375,
      "learning_rate": 2.725089630858457e-05,
      "loss": 0.856,
      "step": 436110
    },
    {
      "epoch": 1.5284918707307755,
      "grad_norm": 2.5625,
      "learning_rate": 2.725024727992087e-05,
      "loss": 0.8957,
      "step": 436120
    },
    {
      "epoch": 1.528526918237671,
      "grad_norm": 2.90625,
      "learning_rate": 2.724959825125717e-05,
      "loss": 0.8709,
      "step": 436130
    },
    {
      "epoch": 1.5285619657445668,
      "grad_norm": 2.703125,
      "learning_rate": 2.724894922259347e-05,
      "loss": 0.8224,
      "step": 436140
    },
    {
      "epoch": 1.5285970132514624,
      "grad_norm": 2.59375,
      "learning_rate": 2.7248300193929767e-05,
      "loss": 0.837,
      "step": 436150
    },
    {
      "epoch": 1.528632060758358,
      "grad_norm": 2.53125,
      "learning_rate": 2.7247651165266065e-05,
      "loss": 0.8976,
      "step": 436160
    },
    {
      "epoch": 1.5286671082652536,
      "grad_norm": 2.84375,
      "learning_rate": 2.7247002136602363e-05,
      "loss": 0.8111,
      "step": 436170
    },
    {
      "epoch": 1.5287021557721492,
      "grad_norm": 2.8125,
      "learning_rate": 2.724635310793866e-05,
      "loss": 0.7981,
      "step": 436180
    },
    {
      "epoch": 1.5287372032790447,
      "grad_norm": 3.15625,
      "learning_rate": 2.724570407927496e-05,
      "loss": 0.8691,
      "step": 436190
    },
    {
      "epoch": 1.5287722507859405,
      "grad_norm": 3.609375,
      "learning_rate": 2.7245055050611257e-05,
      "loss": 0.9203,
      "step": 436200
    },
    {
      "epoch": 1.5288072982928358,
      "grad_norm": 3.0,
      "learning_rate": 2.7244406021947555e-05,
      "loss": 0.8276,
      "step": 436210
    },
    {
      "epoch": 1.5288423457997316,
      "grad_norm": 2.921875,
      "learning_rate": 2.7243756993283853e-05,
      "loss": 0.8482,
      "step": 436220
    },
    {
      "epoch": 1.528877393306627,
      "grad_norm": 3.265625,
      "learning_rate": 2.724310796462015e-05,
      "loss": 0.7872,
      "step": 436230
    },
    {
      "epoch": 1.5289124408135226,
      "grad_norm": 2.90625,
      "learning_rate": 2.724245893595645e-05,
      "loss": 0.8582,
      "step": 436240
    },
    {
      "epoch": 1.5289474883204184,
      "grad_norm": 3.71875,
      "learning_rate": 2.7241809907292747e-05,
      "loss": 0.8532,
      "step": 436250
    },
    {
      "epoch": 1.528982535827314,
      "grad_norm": 2.5,
      "learning_rate": 2.7241160878629045e-05,
      "loss": 0.7147,
      "step": 436260
    },
    {
      "epoch": 1.5290175833342095,
      "grad_norm": 3.546875,
      "learning_rate": 2.7240511849965346e-05,
      "loss": 0.8146,
      "step": 436270
    },
    {
      "epoch": 1.5290526308411052,
      "grad_norm": 2.515625,
      "learning_rate": 2.7239862821301644e-05,
      "loss": 0.8592,
      "step": 436280
    },
    {
      "epoch": 1.5290876783480007,
      "grad_norm": 2.765625,
      "learning_rate": 2.7239213792637942e-05,
      "loss": 0.8558,
      "step": 436290
    },
    {
      "epoch": 1.5291227258548963,
      "grad_norm": 2.75,
      "learning_rate": 2.723856476397424e-05,
      "loss": 0.8151,
      "step": 436300
    },
    {
      "epoch": 1.529157773361792,
      "grad_norm": 2.859375,
      "learning_rate": 2.7237915735310538e-05,
      "loss": 0.7933,
      "step": 436310
    },
    {
      "epoch": 1.5291928208686874,
      "grad_norm": 3.0,
      "learning_rate": 2.7237266706646836e-05,
      "loss": 0.8699,
      "step": 436320
    },
    {
      "epoch": 1.529227868375583,
      "grad_norm": 2.890625,
      "learning_rate": 2.7236617677983134e-05,
      "loss": 0.8533,
      "step": 436330
    },
    {
      "epoch": 1.5292629158824786,
      "grad_norm": 2.5625,
      "learning_rate": 2.7235968649319432e-05,
      "loss": 0.8509,
      "step": 436340
    },
    {
      "epoch": 1.5292979633893742,
      "grad_norm": 3.15625,
      "learning_rate": 2.723531962065573e-05,
      "loss": 0.8526,
      "step": 436350
    },
    {
      "epoch": 1.52933301089627,
      "grad_norm": 3.15625,
      "learning_rate": 2.7234670591992028e-05,
      "loss": 0.8608,
      "step": 436360
    },
    {
      "epoch": 1.5293680584031655,
      "grad_norm": 2.921875,
      "learning_rate": 2.7234021563328326e-05,
      "loss": 0.8205,
      "step": 436370
    },
    {
      "epoch": 1.529403105910061,
      "grad_norm": 2.828125,
      "learning_rate": 2.723337253466462e-05,
      "loss": 0.8237,
      "step": 436380
    },
    {
      "epoch": 1.5294381534169568,
      "grad_norm": 2.90625,
      "learning_rate": 2.723272350600092e-05,
      "loss": 0.7869,
      "step": 436390
    },
    {
      "epoch": 1.5294732009238523,
      "grad_norm": 2.46875,
      "learning_rate": 2.7232074477337217e-05,
      "loss": 0.8091,
      "step": 436400
    },
    {
      "epoch": 1.5295082484307478,
      "grad_norm": 2.921875,
      "learning_rate": 2.7231425448673515e-05,
      "loss": 0.8469,
      "step": 436410
    },
    {
      "epoch": 1.5295432959376436,
      "grad_norm": 3.078125,
      "learning_rate": 2.7230776420009813e-05,
      "loss": 0.7485,
      "step": 436420
    },
    {
      "epoch": 1.529578343444539,
      "grad_norm": 2.84375,
      "learning_rate": 2.723012739134611e-05,
      "loss": 0.9034,
      "step": 436430
    },
    {
      "epoch": 1.5296133909514347,
      "grad_norm": 2.875,
      "learning_rate": 2.722947836268241e-05,
      "loss": 0.7686,
      "step": 436440
    },
    {
      "epoch": 1.5296484384583304,
      "grad_norm": 2.953125,
      "learning_rate": 2.7228829334018707e-05,
      "loss": 0.8936,
      "step": 436450
    },
    {
      "epoch": 1.5296834859652257,
      "grad_norm": 2.9375,
      "learning_rate": 2.7228180305355005e-05,
      "loss": 0.8957,
      "step": 436460
    },
    {
      "epoch": 1.5297185334721215,
      "grad_norm": 2.5,
      "learning_rate": 2.7227531276691303e-05,
      "loss": 0.8497,
      "step": 436470
    },
    {
      "epoch": 1.529753580979017,
      "grad_norm": 2.78125,
      "learning_rate": 2.72268822480276e-05,
      "loss": 0.7486,
      "step": 436480
    },
    {
      "epoch": 1.5297886284859126,
      "grad_norm": 2.984375,
      "learning_rate": 2.72262332193639e-05,
      "loss": 0.7971,
      "step": 436490
    },
    {
      "epoch": 1.5298236759928083,
      "grad_norm": 2.9375,
      "learning_rate": 2.72255841907002e-05,
      "loss": 0.8111,
      "step": 436500
    },
    {
      "epoch": 1.5298587234997039,
      "grad_norm": 3.203125,
      "learning_rate": 2.7224935162036498e-05,
      "loss": 0.8859,
      "step": 436510
    },
    {
      "epoch": 1.5298937710065994,
      "grad_norm": 2.84375,
      "learning_rate": 2.7224286133372796e-05,
      "loss": 0.7739,
      "step": 436520
    },
    {
      "epoch": 1.5299288185134952,
      "grad_norm": 2.96875,
      "learning_rate": 2.7223637104709094e-05,
      "loss": 0.782,
      "step": 436530
    },
    {
      "epoch": 1.5299638660203907,
      "grad_norm": 3.109375,
      "learning_rate": 2.7222988076045392e-05,
      "loss": 0.7565,
      "step": 436540
    },
    {
      "epoch": 1.5299989135272862,
      "grad_norm": 3.125,
      "learning_rate": 2.722233904738169e-05,
      "loss": 0.8968,
      "step": 436550
    },
    {
      "epoch": 1.530033961034182,
      "grad_norm": 2.6875,
      "learning_rate": 2.7221690018717988e-05,
      "loss": 0.8391,
      "step": 436560
    },
    {
      "epoch": 1.5300690085410773,
      "grad_norm": 3.078125,
      "learning_rate": 2.7221040990054286e-05,
      "loss": 0.8489,
      "step": 436570
    },
    {
      "epoch": 1.530104056047973,
      "grad_norm": 3.15625,
      "learning_rate": 2.7220391961390584e-05,
      "loss": 0.9225,
      "step": 436580
    },
    {
      "epoch": 1.5301391035548686,
      "grad_norm": 3.078125,
      "learning_rate": 2.7219742932726882e-05,
      "loss": 0.8213,
      "step": 436590
    },
    {
      "epoch": 1.5301741510617641,
      "grad_norm": 2.984375,
      "learning_rate": 2.721909390406318e-05,
      "loss": 0.9349,
      "step": 436600
    },
    {
      "epoch": 1.53020919856866,
      "grad_norm": 2.96875,
      "learning_rate": 2.7218444875399478e-05,
      "loss": 0.8496,
      "step": 436610
    },
    {
      "epoch": 1.5302442460755554,
      "grad_norm": 2.875,
      "learning_rate": 2.7217795846735776e-05,
      "loss": 0.8299,
      "step": 436620
    },
    {
      "epoch": 1.530279293582451,
      "grad_norm": 2.921875,
      "learning_rate": 2.7217146818072074e-05,
      "loss": 0.8059,
      "step": 436630
    },
    {
      "epoch": 1.5303143410893467,
      "grad_norm": 3.203125,
      "learning_rate": 2.7216497789408375e-05,
      "loss": 0.8433,
      "step": 436640
    },
    {
      "epoch": 1.5303493885962423,
      "grad_norm": 3.171875,
      "learning_rate": 2.7215848760744673e-05,
      "loss": 0.8682,
      "step": 436650
    },
    {
      "epoch": 1.5303844361031378,
      "grad_norm": 2.875,
      "learning_rate": 2.721519973208097e-05,
      "loss": 0.8144,
      "step": 436660
    },
    {
      "epoch": 1.5304194836100335,
      "grad_norm": 2.46875,
      "learning_rate": 2.721455070341727e-05,
      "loss": 0.8209,
      "step": 436670
    },
    {
      "epoch": 1.5304545311169289,
      "grad_norm": 2.609375,
      "learning_rate": 2.7213901674753567e-05,
      "loss": 0.821,
      "step": 436680
    },
    {
      "epoch": 1.5304895786238246,
      "grad_norm": 3.5,
      "learning_rate": 2.7213252646089865e-05,
      "loss": 0.9051,
      "step": 436690
    },
    {
      "epoch": 1.5305246261307202,
      "grad_norm": 2.984375,
      "learning_rate": 2.7212603617426163e-05,
      "loss": 0.7726,
      "step": 436700
    },
    {
      "epoch": 1.5305596736376157,
      "grad_norm": 2.375,
      "learning_rate": 2.721195458876246e-05,
      "loss": 0.783,
      "step": 436710
    },
    {
      "epoch": 1.5305947211445114,
      "grad_norm": 3.015625,
      "learning_rate": 2.721130556009876e-05,
      "loss": 0.8753,
      "step": 436720
    },
    {
      "epoch": 1.530629768651407,
      "grad_norm": 2.84375,
      "learning_rate": 2.7210656531435057e-05,
      "loss": 0.8933,
      "step": 436730
    },
    {
      "epoch": 1.5306648161583025,
      "grad_norm": 2.765625,
      "learning_rate": 2.7210007502771355e-05,
      "loss": 0.8326,
      "step": 436740
    },
    {
      "epoch": 1.5306998636651983,
      "grad_norm": 3.1875,
      "learning_rate": 2.720935847410765e-05,
      "loss": 0.816,
      "step": 436750
    },
    {
      "epoch": 1.5307349111720938,
      "grad_norm": 2.984375,
      "learning_rate": 2.7208709445443948e-05,
      "loss": 0.7328,
      "step": 436760
    },
    {
      "epoch": 1.5307699586789894,
      "grad_norm": 2.890625,
      "learning_rate": 2.7208060416780246e-05,
      "loss": 0.806,
      "step": 436770
    },
    {
      "epoch": 1.530805006185885,
      "grad_norm": 2.703125,
      "learning_rate": 2.7207411388116544e-05,
      "loss": 0.841,
      "step": 436780
    },
    {
      "epoch": 1.5308400536927804,
      "grad_norm": 2.4375,
      "learning_rate": 2.7206762359452842e-05,
      "loss": 0.7488,
      "step": 436790
    },
    {
      "epoch": 1.5308751011996762,
      "grad_norm": 2.421875,
      "learning_rate": 2.720611333078914e-05,
      "loss": 0.734,
      "step": 436800
    },
    {
      "epoch": 1.5309101487065717,
      "grad_norm": 2.8125,
      "learning_rate": 2.7205464302125438e-05,
      "loss": 0.8413,
      "step": 436810
    },
    {
      "epoch": 1.5309451962134673,
      "grad_norm": 3.1875,
      "learning_rate": 2.7204815273461736e-05,
      "loss": 0.9434,
      "step": 436820
    },
    {
      "epoch": 1.530980243720363,
      "grad_norm": 2.875,
      "learning_rate": 2.7204166244798034e-05,
      "loss": 0.8464,
      "step": 436830
    },
    {
      "epoch": 1.5310152912272585,
      "grad_norm": 2.5625,
      "learning_rate": 2.7203517216134332e-05,
      "loss": 0.8233,
      "step": 436840
    },
    {
      "epoch": 1.531050338734154,
      "grad_norm": 3.0625,
      "learning_rate": 2.720286818747063e-05,
      "loss": 0.8212,
      "step": 436850
    },
    {
      "epoch": 1.5310853862410498,
      "grad_norm": 2.96875,
      "learning_rate": 2.7202219158806928e-05,
      "loss": 0.8822,
      "step": 436860
    },
    {
      "epoch": 1.5311204337479454,
      "grad_norm": 2.484375,
      "learning_rate": 2.720157013014323e-05,
      "loss": 0.8426,
      "step": 436870
    },
    {
      "epoch": 1.531155481254841,
      "grad_norm": 3.0,
      "learning_rate": 2.7200921101479527e-05,
      "loss": 0.77,
      "step": 436880
    },
    {
      "epoch": 1.5311905287617367,
      "grad_norm": 3.109375,
      "learning_rate": 2.7200272072815825e-05,
      "loss": 0.8564,
      "step": 436890
    },
    {
      "epoch": 1.531225576268632,
      "grad_norm": 2.609375,
      "learning_rate": 2.7199623044152123e-05,
      "loss": 0.8542,
      "step": 436900
    },
    {
      "epoch": 1.5312606237755277,
      "grad_norm": 2.765625,
      "learning_rate": 2.719897401548842e-05,
      "loss": 0.8174,
      "step": 436910
    },
    {
      "epoch": 1.5312956712824233,
      "grad_norm": 2.59375,
      "learning_rate": 2.719832498682472e-05,
      "loss": 0.7829,
      "step": 436920
    },
    {
      "epoch": 1.5313307187893188,
      "grad_norm": 2.46875,
      "learning_rate": 2.7197675958161017e-05,
      "loss": 0.8827,
      "step": 436930
    },
    {
      "epoch": 1.5313657662962146,
      "grad_norm": 2.578125,
      "learning_rate": 2.7197026929497315e-05,
      "loss": 0.8016,
      "step": 436940
    },
    {
      "epoch": 1.53140081380311,
      "grad_norm": 3.421875,
      "learning_rate": 2.7196377900833613e-05,
      "loss": 0.8262,
      "step": 436950
    },
    {
      "epoch": 1.5314358613100056,
      "grad_norm": 2.734375,
      "learning_rate": 2.719572887216991e-05,
      "loss": 0.7598,
      "step": 436960
    },
    {
      "epoch": 1.5314709088169014,
      "grad_norm": 2.796875,
      "learning_rate": 2.719507984350621e-05,
      "loss": 0.8689,
      "step": 436970
    },
    {
      "epoch": 1.531505956323797,
      "grad_norm": 2.8125,
      "learning_rate": 2.7194430814842507e-05,
      "loss": 0.8818,
      "step": 436980
    },
    {
      "epoch": 1.5315410038306925,
      "grad_norm": 2.546875,
      "learning_rate": 2.7193781786178805e-05,
      "loss": 0.8198,
      "step": 436990
    },
    {
      "epoch": 1.5315760513375882,
      "grad_norm": 3.171875,
      "learning_rate": 2.7193132757515103e-05,
      "loss": 0.7854,
      "step": 437000
    },
    {
      "epoch": 1.5316110988444835,
      "grad_norm": 2.53125,
      "learning_rate": 2.7192483728851405e-05,
      "loss": 0.8998,
      "step": 437010
    },
    {
      "epoch": 1.5316461463513793,
      "grad_norm": 2.578125,
      "learning_rate": 2.7191834700187703e-05,
      "loss": 0.8361,
      "step": 437020
    },
    {
      "epoch": 1.5316811938582748,
      "grad_norm": 2.78125,
      "learning_rate": 2.7191185671524e-05,
      "loss": 0.7729,
      "step": 437030
    },
    {
      "epoch": 1.5317162413651704,
      "grad_norm": 2.921875,
      "learning_rate": 2.71905366428603e-05,
      "loss": 0.8038,
      "step": 437040
    },
    {
      "epoch": 1.5317512888720661,
      "grad_norm": 2.515625,
      "learning_rate": 2.7189887614196597e-05,
      "loss": 0.7378,
      "step": 437050
    },
    {
      "epoch": 1.5317863363789617,
      "grad_norm": 2.953125,
      "learning_rate": 2.7189238585532895e-05,
      "loss": 0.8189,
      "step": 437060
    },
    {
      "epoch": 1.5318213838858572,
      "grad_norm": 2.8125,
      "learning_rate": 2.7188589556869193e-05,
      "loss": 0.8206,
      "step": 437070
    },
    {
      "epoch": 1.531856431392753,
      "grad_norm": 2.765625,
      "learning_rate": 2.718794052820549e-05,
      "loss": 0.8304,
      "step": 437080
    },
    {
      "epoch": 1.5318914788996485,
      "grad_norm": 2.375,
      "learning_rate": 2.718729149954179e-05,
      "loss": 0.7952,
      "step": 437090
    },
    {
      "epoch": 1.531926526406544,
      "grad_norm": 2.9375,
      "learning_rate": 2.7186642470878087e-05,
      "loss": 0.7598,
      "step": 437100
    },
    {
      "epoch": 1.5319615739134398,
      "grad_norm": 2.78125,
      "learning_rate": 2.7185993442214385e-05,
      "loss": 0.884,
      "step": 437110
    },
    {
      "epoch": 1.531996621420335,
      "grad_norm": 2.796875,
      "learning_rate": 2.7185344413550683e-05,
      "loss": 0.8558,
      "step": 437120
    },
    {
      "epoch": 1.5320316689272309,
      "grad_norm": 2.59375,
      "learning_rate": 2.7184695384886977e-05,
      "loss": 0.8606,
      "step": 437130
    },
    {
      "epoch": 1.5320667164341266,
      "grad_norm": 2.90625,
      "learning_rate": 2.7184046356223275e-05,
      "loss": 0.8458,
      "step": 437140
    },
    {
      "epoch": 1.532101763941022,
      "grad_norm": 2.859375,
      "learning_rate": 2.7183397327559573e-05,
      "loss": 0.8107,
      "step": 437150
    },
    {
      "epoch": 1.5321368114479177,
      "grad_norm": 3.0625,
      "learning_rate": 2.718274829889587e-05,
      "loss": 0.805,
      "step": 437160
    },
    {
      "epoch": 1.5321718589548132,
      "grad_norm": 2.953125,
      "learning_rate": 2.718209927023217e-05,
      "loss": 0.828,
      "step": 437170
    },
    {
      "epoch": 1.5322069064617088,
      "grad_norm": 2.953125,
      "learning_rate": 2.7181450241568467e-05,
      "loss": 0.8674,
      "step": 437180
    },
    {
      "epoch": 1.5322419539686045,
      "grad_norm": 2.953125,
      "learning_rate": 2.7180801212904765e-05,
      "loss": 0.8603,
      "step": 437190
    },
    {
      "epoch": 1.5322770014755,
      "grad_norm": 2.859375,
      "learning_rate": 2.7180152184241063e-05,
      "loss": 0.7499,
      "step": 437200
    },
    {
      "epoch": 1.5323120489823956,
      "grad_norm": 2.65625,
      "learning_rate": 2.717950315557736e-05,
      "loss": 0.8295,
      "step": 437210
    },
    {
      "epoch": 1.5323470964892913,
      "grad_norm": 2.453125,
      "learning_rate": 2.717885412691366e-05,
      "loss": 0.8814,
      "step": 437220
    },
    {
      "epoch": 1.5323821439961869,
      "grad_norm": 2.625,
      "learning_rate": 2.717820509824996e-05,
      "loss": 0.8805,
      "step": 437230
    },
    {
      "epoch": 1.5324171915030824,
      "grad_norm": 3.046875,
      "learning_rate": 2.717755606958626e-05,
      "loss": 0.8119,
      "step": 437240
    },
    {
      "epoch": 1.5324522390099782,
      "grad_norm": 3.046875,
      "learning_rate": 2.7176907040922557e-05,
      "loss": 0.8599,
      "step": 437250
    },
    {
      "epoch": 1.5324872865168735,
      "grad_norm": 3.25,
      "learning_rate": 2.7176258012258855e-05,
      "loss": 0.8603,
      "step": 437260
    },
    {
      "epoch": 1.5325223340237693,
      "grad_norm": 3.453125,
      "learning_rate": 2.7175608983595153e-05,
      "loss": 0.8104,
      "step": 437270
    },
    {
      "epoch": 1.5325573815306648,
      "grad_norm": 2.8125,
      "learning_rate": 2.717495995493145e-05,
      "loss": 0.8384,
      "step": 437280
    },
    {
      "epoch": 1.5325924290375603,
      "grad_norm": 3.03125,
      "learning_rate": 2.717431092626775e-05,
      "loss": 0.824,
      "step": 437290
    },
    {
      "epoch": 1.532627476544456,
      "grad_norm": 2.296875,
      "learning_rate": 2.7173661897604047e-05,
      "loss": 0.8027,
      "step": 437300
    },
    {
      "epoch": 1.5326625240513516,
      "grad_norm": 2.84375,
      "learning_rate": 2.7173012868940345e-05,
      "loss": 0.76,
      "step": 437310
    },
    {
      "epoch": 1.5326975715582472,
      "grad_norm": 3.09375,
      "learning_rate": 2.7172363840276643e-05,
      "loss": 0.828,
      "step": 437320
    },
    {
      "epoch": 1.532732619065143,
      "grad_norm": 3.125,
      "learning_rate": 2.717171481161294e-05,
      "loss": 0.8231,
      "step": 437330
    },
    {
      "epoch": 1.5327676665720384,
      "grad_norm": 3.015625,
      "learning_rate": 2.717106578294924e-05,
      "loss": 0.8913,
      "step": 437340
    },
    {
      "epoch": 1.532802714078934,
      "grad_norm": 3.359375,
      "learning_rate": 2.7170416754285537e-05,
      "loss": 0.8037,
      "step": 437350
    },
    {
      "epoch": 1.5328377615858297,
      "grad_norm": 3.28125,
      "learning_rate": 2.7169767725621835e-05,
      "loss": 0.7926,
      "step": 437360
    },
    {
      "epoch": 1.532872809092725,
      "grad_norm": 2.796875,
      "learning_rate": 2.7169118696958136e-05,
      "loss": 0.8049,
      "step": 437370
    },
    {
      "epoch": 1.5329078565996208,
      "grad_norm": 3.140625,
      "learning_rate": 2.7168469668294434e-05,
      "loss": 0.8767,
      "step": 437380
    },
    {
      "epoch": 1.5329429041065163,
      "grad_norm": 3.140625,
      "learning_rate": 2.7167820639630732e-05,
      "loss": 0.9225,
      "step": 437390
    },
    {
      "epoch": 1.5329779516134119,
      "grad_norm": 2.6875,
      "learning_rate": 2.716717161096703e-05,
      "loss": 0.7781,
      "step": 437400
    },
    {
      "epoch": 1.5330129991203076,
      "grad_norm": 2.765625,
      "learning_rate": 2.7166522582303328e-05,
      "loss": 0.7554,
      "step": 437410
    },
    {
      "epoch": 1.5330480466272032,
      "grad_norm": 4.25,
      "learning_rate": 2.7165873553639626e-05,
      "loss": 0.9007,
      "step": 437420
    },
    {
      "epoch": 1.5330830941340987,
      "grad_norm": 3.15625,
      "learning_rate": 2.7165224524975924e-05,
      "loss": 0.8724,
      "step": 437430
    },
    {
      "epoch": 1.5331181416409945,
      "grad_norm": 2.6875,
      "learning_rate": 2.7164575496312222e-05,
      "loss": 0.8163,
      "step": 437440
    },
    {
      "epoch": 1.53315318914789,
      "grad_norm": 2.84375,
      "learning_rate": 2.716392646764852e-05,
      "loss": 0.8955,
      "step": 437450
    },
    {
      "epoch": 1.5331882366547855,
      "grad_norm": 2.9375,
      "learning_rate": 2.7163277438984818e-05,
      "loss": 0.8367,
      "step": 437460
    },
    {
      "epoch": 1.5332232841616813,
      "grad_norm": 2.796875,
      "learning_rate": 2.7162628410321116e-05,
      "loss": 0.8344,
      "step": 437470
    },
    {
      "epoch": 1.5332583316685766,
      "grad_norm": 2.75,
      "learning_rate": 2.7161979381657414e-05,
      "loss": 0.8536,
      "step": 437480
    },
    {
      "epoch": 1.5332933791754724,
      "grad_norm": 3.359375,
      "learning_rate": 2.7161330352993712e-05,
      "loss": 0.821,
      "step": 437490
    },
    {
      "epoch": 1.533328426682368,
      "grad_norm": 3.359375,
      "learning_rate": 2.716068132433001e-05,
      "loss": 0.9283,
      "step": 437500
    },
    {
      "epoch": 1.5333634741892634,
      "grad_norm": 2.6875,
      "learning_rate": 2.7160032295666305e-05,
      "loss": 0.8494,
      "step": 437510
    },
    {
      "epoch": 1.5333985216961592,
      "grad_norm": 3.15625,
      "learning_rate": 2.7159383267002603e-05,
      "loss": 0.8643,
      "step": 437520
    },
    {
      "epoch": 1.5334335692030547,
      "grad_norm": 2.5625,
      "learning_rate": 2.71587342383389e-05,
      "loss": 0.773,
      "step": 437530
    },
    {
      "epoch": 1.5334686167099503,
      "grad_norm": 2.921875,
      "learning_rate": 2.71580852096752e-05,
      "loss": 0.8253,
      "step": 437540
    },
    {
      "epoch": 1.533503664216846,
      "grad_norm": 2.828125,
      "learning_rate": 2.7157436181011497e-05,
      "loss": 0.8313,
      "step": 437550
    },
    {
      "epoch": 1.5335387117237416,
      "grad_norm": 2.609375,
      "learning_rate": 2.7156787152347795e-05,
      "loss": 0.8233,
      "step": 437560
    },
    {
      "epoch": 1.533573759230637,
      "grad_norm": 2.984375,
      "learning_rate": 2.7156138123684093e-05,
      "loss": 0.8856,
      "step": 437570
    },
    {
      "epoch": 1.5336088067375329,
      "grad_norm": 2.59375,
      "learning_rate": 2.715548909502039e-05,
      "loss": 0.8234,
      "step": 437580
    },
    {
      "epoch": 1.5336438542444282,
      "grad_norm": 3.296875,
      "learning_rate": 2.715484006635669e-05,
      "loss": 0.8739,
      "step": 437590
    },
    {
      "epoch": 1.533678901751324,
      "grad_norm": 2.90625,
      "learning_rate": 2.715419103769299e-05,
      "loss": 0.8265,
      "step": 437600
    },
    {
      "epoch": 1.5337139492582195,
      "grad_norm": 2.890625,
      "learning_rate": 2.7153542009029288e-05,
      "loss": 0.7995,
      "step": 437610
    },
    {
      "epoch": 1.533748996765115,
      "grad_norm": 3.109375,
      "learning_rate": 2.7152892980365586e-05,
      "loss": 0.9108,
      "step": 437620
    },
    {
      "epoch": 1.5337840442720108,
      "grad_norm": 2.6875,
      "learning_rate": 2.7152243951701884e-05,
      "loss": 0.8459,
      "step": 437630
    },
    {
      "epoch": 1.5338190917789063,
      "grad_norm": 2.46875,
      "learning_rate": 2.7151594923038182e-05,
      "loss": 0.847,
      "step": 437640
    },
    {
      "epoch": 1.5338541392858018,
      "grad_norm": 2.78125,
      "learning_rate": 2.715094589437448e-05,
      "loss": 0.766,
      "step": 437650
    },
    {
      "epoch": 1.5338891867926976,
      "grad_norm": 3.078125,
      "learning_rate": 2.7150296865710778e-05,
      "loss": 0.8338,
      "step": 437660
    },
    {
      "epoch": 1.5339242342995931,
      "grad_norm": 2.90625,
      "learning_rate": 2.7149647837047076e-05,
      "loss": 0.7633,
      "step": 437670
    },
    {
      "epoch": 1.5339592818064887,
      "grad_norm": 2.78125,
      "learning_rate": 2.7148998808383374e-05,
      "loss": 0.8635,
      "step": 437680
    },
    {
      "epoch": 1.5339943293133844,
      "grad_norm": 3.46875,
      "learning_rate": 2.7148349779719672e-05,
      "loss": 0.8228,
      "step": 437690
    },
    {
      "epoch": 1.5340293768202797,
      "grad_norm": 2.96875,
      "learning_rate": 2.714770075105597e-05,
      "loss": 0.8235,
      "step": 437700
    },
    {
      "epoch": 1.5340644243271755,
      "grad_norm": 3.234375,
      "learning_rate": 2.7147051722392268e-05,
      "loss": 0.8081,
      "step": 437710
    },
    {
      "epoch": 1.5340994718340712,
      "grad_norm": 2.859375,
      "learning_rate": 2.7146402693728566e-05,
      "loss": 0.8437,
      "step": 437720
    },
    {
      "epoch": 1.5341345193409666,
      "grad_norm": 3.015625,
      "learning_rate": 2.7145753665064864e-05,
      "loss": 0.8181,
      "step": 437730
    },
    {
      "epoch": 1.5341695668478623,
      "grad_norm": 2.359375,
      "learning_rate": 2.7145104636401166e-05,
      "loss": 0.8233,
      "step": 437740
    },
    {
      "epoch": 1.5342046143547579,
      "grad_norm": 2.578125,
      "learning_rate": 2.7144455607737464e-05,
      "loss": 0.8351,
      "step": 437750
    },
    {
      "epoch": 1.5342396618616534,
      "grad_norm": 3.1875,
      "learning_rate": 2.714380657907376e-05,
      "loss": 0.8707,
      "step": 437760
    },
    {
      "epoch": 1.5342747093685491,
      "grad_norm": 3.25,
      "learning_rate": 2.714315755041006e-05,
      "loss": 0.7712,
      "step": 437770
    },
    {
      "epoch": 1.5343097568754447,
      "grad_norm": 2.828125,
      "learning_rate": 2.7142508521746358e-05,
      "loss": 0.7809,
      "step": 437780
    },
    {
      "epoch": 1.5343448043823402,
      "grad_norm": 2.46875,
      "learning_rate": 2.7141859493082656e-05,
      "loss": 0.8031,
      "step": 437790
    },
    {
      "epoch": 1.534379851889236,
      "grad_norm": 2.78125,
      "learning_rate": 2.7141210464418954e-05,
      "loss": 0.8308,
      "step": 437800
    },
    {
      "epoch": 1.5344148993961313,
      "grad_norm": 3.0625,
      "learning_rate": 2.714056143575525e-05,
      "loss": 0.8188,
      "step": 437810
    },
    {
      "epoch": 1.534449946903027,
      "grad_norm": 3.171875,
      "learning_rate": 2.713991240709155e-05,
      "loss": 0.9069,
      "step": 437820
    },
    {
      "epoch": 1.5344849944099228,
      "grad_norm": 2.890625,
      "learning_rate": 2.7139263378427848e-05,
      "loss": 0.7636,
      "step": 437830
    },
    {
      "epoch": 1.5345200419168181,
      "grad_norm": 2.96875,
      "learning_rate": 2.7138614349764146e-05,
      "loss": 0.863,
      "step": 437840
    },
    {
      "epoch": 1.5345550894237139,
      "grad_norm": 2.71875,
      "learning_rate": 2.7137965321100444e-05,
      "loss": 0.7943,
      "step": 437850
    },
    {
      "epoch": 1.5345901369306094,
      "grad_norm": 2.765625,
      "learning_rate": 2.713731629243674e-05,
      "loss": 0.8821,
      "step": 437860
    },
    {
      "epoch": 1.534625184437505,
      "grad_norm": 2.78125,
      "learning_rate": 2.713666726377304e-05,
      "loss": 0.8514,
      "step": 437870
    },
    {
      "epoch": 1.5346602319444007,
      "grad_norm": 2.78125,
      "learning_rate": 2.7136018235109334e-05,
      "loss": 0.8378,
      "step": 437880
    },
    {
      "epoch": 1.5346952794512962,
      "grad_norm": 2.390625,
      "learning_rate": 2.7135369206445632e-05,
      "loss": 0.8143,
      "step": 437890
    },
    {
      "epoch": 1.5347303269581918,
      "grad_norm": 3.046875,
      "learning_rate": 2.713472017778193e-05,
      "loss": 0.8674,
      "step": 437900
    },
    {
      "epoch": 1.5347653744650875,
      "grad_norm": 2.96875,
      "learning_rate": 2.7134071149118228e-05,
      "loss": 0.9357,
      "step": 437910
    },
    {
      "epoch": 1.534800421971983,
      "grad_norm": 2.703125,
      "learning_rate": 2.7133422120454526e-05,
      "loss": 0.8234,
      "step": 437920
    },
    {
      "epoch": 1.5348354694788786,
      "grad_norm": 2.859375,
      "learning_rate": 2.7132773091790824e-05,
      "loss": 0.7388,
      "step": 437930
    },
    {
      "epoch": 1.5348705169857744,
      "grad_norm": 3.59375,
      "learning_rate": 2.7132124063127122e-05,
      "loss": 0.9119,
      "step": 437940
    },
    {
      "epoch": 1.5349055644926697,
      "grad_norm": 2.328125,
      "learning_rate": 2.713147503446342e-05,
      "loss": 0.853,
      "step": 437950
    },
    {
      "epoch": 1.5349406119995654,
      "grad_norm": 3.203125,
      "learning_rate": 2.7130826005799718e-05,
      "loss": 0.8846,
      "step": 437960
    },
    {
      "epoch": 1.534975659506461,
      "grad_norm": 2.46875,
      "learning_rate": 2.713017697713602e-05,
      "loss": 0.7969,
      "step": 437970
    },
    {
      "epoch": 1.5350107070133565,
      "grad_norm": 2.96875,
      "learning_rate": 2.7129527948472318e-05,
      "loss": 0.7754,
      "step": 437980
    },
    {
      "epoch": 1.5350457545202523,
      "grad_norm": 3.140625,
      "learning_rate": 2.7128878919808616e-05,
      "loss": 0.8675,
      "step": 437990
    },
    {
      "epoch": 1.5350808020271478,
      "grad_norm": 3.234375,
      "learning_rate": 2.7128229891144914e-05,
      "loss": 0.8458,
      "step": 438000
    },
    {
      "epoch": 1.5351158495340433,
      "grad_norm": 2.78125,
      "learning_rate": 2.712758086248121e-05,
      "loss": 0.7821,
      "step": 438010
    },
    {
      "epoch": 1.535150897040939,
      "grad_norm": 2.765625,
      "learning_rate": 2.712693183381751e-05,
      "loss": 0.8241,
      "step": 438020
    },
    {
      "epoch": 1.5351859445478346,
      "grad_norm": 3.203125,
      "learning_rate": 2.7126282805153808e-05,
      "loss": 0.8034,
      "step": 438030
    },
    {
      "epoch": 1.5352209920547302,
      "grad_norm": 2.890625,
      "learning_rate": 2.7125633776490106e-05,
      "loss": 0.867,
      "step": 438040
    },
    {
      "epoch": 1.535256039561626,
      "grad_norm": 2.6875,
      "learning_rate": 2.7124984747826404e-05,
      "loss": 0.9251,
      "step": 438050
    },
    {
      "epoch": 1.5352910870685212,
      "grad_norm": 2.90625,
      "learning_rate": 2.71243357191627e-05,
      "loss": 0.845,
      "step": 438060
    },
    {
      "epoch": 1.535326134575417,
      "grad_norm": 3.6875,
      "learning_rate": 2.7123686690499e-05,
      "loss": 0.9147,
      "step": 438070
    },
    {
      "epoch": 1.5353611820823125,
      "grad_norm": 3.1875,
      "learning_rate": 2.7123037661835298e-05,
      "loss": 0.8151,
      "step": 438080
    },
    {
      "epoch": 1.535396229589208,
      "grad_norm": 2.859375,
      "learning_rate": 2.7122388633171596e-05,
      "loss": 0.8502,
      "step": 438090
    },
    {
      "epoch": 1.5354312770961038,
      "grad_norm": 3.078125,
      "learning_rate": 2.7121739604507894e-05,
      "loss": 0.9099,
      "step": 438100
    },
    {
      "epoch": 1.5354663246029994,
      "grad_norm": 3.1875,
      "learning_rate": 2.7121090575844195e-05,
      "loss": 0.8469,
      "step": 438110
    },
    {
      "epoch": 1.535501372109895,
      "grad_norm": 3.03125,
      "learning_rate": 2.7120441547180493e-05,
      "loss": 0.8358,
      "step": 438120
    },
    {
      "epoch": 1.5355364196167907,
      "grad_norm": 2.609375,
      "learning_rate": 2.711979251851679e-05,
      "loss": 0.8541,
      "step": 438130
    },
    {
      "epoch": 1.5355714671236862,
      "grad_norm": 2.90625,
      "learning_rate": 2.711914348985309e-05,
      "loss": 0.7793,
      "step": 438140
    },
    {
      "epoch": 1.5356065146305817,
      "grad_norm": 2.546875,
      "learning_rate": 2.7118494461189387e-05,
      "loss": 0.8819,
      "step": 438150
    },
    {
      "epoch": 1.5356415621374775,
      "grad_norm": 2.65625,
      "learning_rate": 2.7117845432525685e-05,
      "loss": 0.8535,
      "step": 438160
    },
    {
      "epoch": 1.5356766096443728,
      "grad_norm": 2.71875,
      "learning_rate": 2.7117196403861983e-05,
      "loss": 0.8459,
      "step": 438170
    },
    {
      "epoch": 1.5357116571512686,
      "grad_norm": 2.671875,
      "learning_rate": 2.711654737519828e-05,
      "loss": 0.7882,
      "step": 438180
    },
    {
      "epoch": 1.535746704658164,
      "grad_norm": 2.9375,
      "learning_rate": 2.711589834653458e-05,
      "loss": 0.8914,
      "step": 438190
    },
    {
      "epoch": 1.5357817521650596,
      "grad_norm": 2.921875,
      "learning_rate": 2.7115249317870877e-05,
      "loss": 0.8723,
      "step": 438200
    },
    {
      "epoch": 1.5358167996719554,
      "grad_norm": 2.875,
      "learning_rate": 2.7114600289207175e-05,
      "loss": 0.8127,
      "step": 438210
    },
    {
      "epoch": 1.535851847178851,
      "grad_norm": 2.875,
      "learning_rate": 2.7113951260543473e-05,
      "loss": 0.8236,
      "step": 438220
    },
    {
      "epoch": 1.5358868946857465,
      "grad_norm": 2.75,
      "learning_rate": 2.711330223187977e-05,
      "loss": 0.8015,
      "step": 438230
    },
    {
      "epoch": 1.5359219421926422,
      "grad_norm": 2.84375,
      "learning_rate": 2.7112653203216072e-05,
      "loss": 0.7634,
      "step": 438240
    },
    {
      "epoch": 1.5359569896995378,
      "grad_norm": 2.671875,
      "learning_rate": 2.711200417455237e-05,
      "loss": 0.7698,
      "step": 438250
    },
    {
      "epoch": 1.5359920372064333,
      "grad_norm": 2.640625,
      "learning_rate": 2.711135514588866e-05,
      "loss": 0.8302,
      "step": 438260
    },
    {
      "epoch": 1.536027084713329,
      "grad_norm": 3.03125,
      "learning_rate": 2.711070611722496e-05,
      "loss": 0.7664,
      "step": 438270
    },
    {
      "epoch": 1.5360621322202244,
      "grad_norm": 3.140625,
      "learning_rate": 2.7110057088561258e-05,
      "loss": 0.8313,
      "step": 438280
    },
    {
      "epoch": 1.5360971797271201,
      "grad_norm": 2.703125,
      "learning_rate": 2.7109408059897556e-05,
      "loss": 0.8352,
      "step": 438290
    },
    {
      "epoch": 1.5361322272340157,
      "grad_norm": 2.828125,
      "learning_rate": 2.7108759031233854e-05,
      "loss": 0.8841,
      "step": 438300
    },
    {
      "epoch": 1.5361672747409112,
      "grad_norm": 2.765625,
      "learning_rate": 2.710811000257015e-05,
      "loss": 0.8793,
      "step": 438310
    },
    {
      "epoch": 1.536202322247807,
      "grad_norm": 2.90625,
      "learning_rate": 2.710746097390645e-05,
      "loss": 0.8131,
      "step": 438320
    },
    {
      "epoch": 1.5362373697547025,
      "grad_norm": 2.796875,
      "learning_rate": 2.710681194524275e-05,
      "loss": 0.871,
      "step": 438330
    },
    {
      "epoch": 1.536272417261598,
      "grad_norm": 2.828125,
      "learning_rate": 2.710616291657905e-05,
      "loss": 0.7848,
      "step": 438340
    },
    {
      "epoch": 1.5363074647684938,
      "grad_norm": 2.734375,
      "learning_rate": 2.7105513887915347e-05,
      "loss": 0.842,
      "step": 438350
    },
    {
      "epoch": 1.5363425122753893,
      "grad_norm": 3.046875,
      "learning_rate": 2.7104864859251645e-05,
      "loss": 0.7946,
      "step": 438360
    },
    {
      "epoch": 1.5363775597822849,
      "grad_norm": 3.171875,
      "learning_rate": 2.7104215830587943e-05,
      "loss": 0.8686,
      "step": 438370
    },
    {
      "epoch": 1.5364126072891806,
      "grad_norm": 3.15625,
      "learning_rate": 2.710356680192424e-05,
      "loss": 0.8446,
      "step": 438380
    },
    {
      "epoch": 1.536447654796076,
      "grad_norm": 3.078125,
      "learning_rate": 2.710291777326054e-05,
      "loss": 0.8484,
      "step": 438390
    },
    {
      "epoch": 1.5364827023029717,
      "grad_norm": 2.640625,
      "learning_rate": 2.7102268744596837e-05,
      "loss": 0.7749,
      "step": 438400
    },
    {
      "epoch": 1.5365177498098674,
      "grad_norm": 2.984375,
      "learning_rate": 2.7101619715933135e-05,
      "loss": 0.816,
      "step": 438410
    },
    {
      "epoch": 1.5365527973167628,
      "grad_norm": 3.015625,
      "learning_rate": 2.7100970687269433e-05,
      "loss": 0.8285,
      "step": 438420
    },
    {
      "epoch": 1.5365878448236585,
      "grad_norm": 3.359375,
      "learning_rate": 2.710032165860573e-05,
      "loss": 0.8645,
      "step": 438430
    },
    {
      "epoch": 1.536622892330554,
      "grad_norm": 2.859375,
      "learning_rate": 2.709967262994203e-05,
      "loss": 0.8656,
      "step": 438440
    },
    {
      "epoch": 1.5366579398374496,
      "grad_norm": 3.265625,
      "learning_rate": 2.7099023601278327e-05,
      "loss": 0.9454,
      "step": 438450
    },
    {
      "epoch": 1.5366929873443453,
      "grad_norm": 2.78125,
      "learning_rate": 2.7098374572614625e-05,
      "loss": 0.892,
      "step": 438460
    },
    {
      "epoch": 1.5367280348512409,
      "grad_norm": 2.625,
      "learning_rate": 2.7097725543950926e-05,
      "loss": 0.8521,
      "step": 438470
    },
    {
      "epoch": 1.5367630823581364,
      "grad_norm": 3.078125,
      "learning_rate": 2.7097076515287224e-05,
      "loss": 0.8218,
      "step": 438480
    },
    {
      "epoch": 1.5367981298650322,
      "grad_norm": 3.0,
      "learning_rate": 2.7096427486623522e-05,
      "loss": 0.8485,
      "step": 438490
    },
    {
      "epoch": 1.5368331773719275,
      "grad_norm": 3.046875,
      "learning_rate": 2.709577845795982e-05,
      "loss": 0.8276,
      "step": 438500
    },
    {
      "epoch": 1.5368682248788232,
      "grad_norm": 2.90625,
      "learning_rate": 2.709512942929612e-05,
      "loss": 0.8442,
      "step": 438510
    },
    {
      "epoch": 1.536903272385719,
      "grad_norm": 3.03125,
      "learning_rate": 2.7094480400632416e-05,
      "loss": 0.8051,
      "step": 438520
    },
    {
      "epoch": 1.5369383198926143,
      "grad_norm": 3.1875,
      "learning_rate": 2.7093831371968714e-05,
      "loss": 0.8021,
      "step": 438530
    },
    {
      "epoch": 1.53697336739951,
      "grad_norm": 2.984375,
      "learning_rate": 2.7093182343305012e-05,
      "loss": 0.8844,
      "step": 438540
    },
    {
      "epoch": 1.5370084149064056,
      "grad_norm": 2.921875,
      "learning_rate": 2.709253331464131e-05,
      "loss": 0.8889,
      "step": 438550
    },
    {
      "epoch": 1.5370434624133011,
      "grad_norm": 2.921875,
      "learning_rate": 2.709188428597761e-05,
      "loss": 0.83,
      "step": 438560
    },
    {
      "epoch": 1.537078509920197,
      "grad_norm": 2.84375,
      "learning_rate": 2.7091235257313906e-05,
      "loss": 0.8497,
      "step": 438570
    },
    {
      "epoch": 1.5371135574270924,
      "grad_norm": 2.875,
      "learning_rate": 2.7090586228650204e-05,
      "loss": 0.8414,
      "step": 438580
    },
    {
      "epoch": 1.537148604933988,
      "grad_norm": 2.640625,
      "learning_rate": 2.7089937199986502e-05,
      "loss": 0.769,
      "step": 438590
    },
    {
      "epoch": 1.5371836524408837,
      "grad_norm": 2.8125,
      "learning_rate": 2.70892881713228e-05,
      "loss": 0.8746,
      "step": 438600
    },
    {
      "epoch": 1.5372186999477793,
      "grad_norm": 2.34375,
      "learning_rate": 2.7088639142659102e-05,
      "loss": 0.791,
      "step": 438610
    },
    {
      "epoch": 1.5372537474546748,
      "grad_norm": 2.71875,
      "learning_rate": 2.70879901139954e-05,
      "loss": 0.8426,
      "step": 438620
    },
    {
      "epoch": 1.5372887949615706,
      "grad_norm": 3.015625,
      "learning_rate": 2.708734108533169e-05,
      "loss": 0.7886,
      "step": 438630
    },
    {
      "epoch": 1.5373238424684659,
      "grad_norm": 3.171875,
      "learning_rate": 2.708669205666799e-05,
      "loss": 0.9296,
      "step": 438640
    },
    {
      "epoch": 1.5373588899753616,
      "grad_norm": 3.515625,
      "learning_rate": 2.7086043028004287e-05,
      "loss": 0.8646,
      "step": 438650
    },
    {
      "epoch": 1.5373939374822572,
      "grad_norm": 2.84375,
      "learning_rate": 2.7085393999340585e-05,
      "loss": 0.8881,
      "step": 438660
    },
    {
      "epoch": 1.5374289849891527,
      "grad_norm": 2.984375,
      "learning_rate": 2.7084744970676883e-05,
      "loss": 0.8193,
      "step": 438670
    },
    {
      "epoch": 1.5374640324960485,
      "grad_norm": 2.734375,
      "learning_rate": 2.708409594201318e-05,
      "loss": 0.8594,
      "step": 438680
    },
    {
      "epoch": 1.537499080002944,
      "grad_norm": 2.484375,
      "learning_rate": 2.708344691334948e-05,
      "loss": 0.8039,
      "step": 438690
    },
    {
      "epoch": 1.5375341275098395,
      "grad_norm": 3.0625,
      "learning_rate": 2.708279788468578e-05,
      "loss": 0.8075,
      "step": 438700
    },
    {
      "epoch": 1.5375691750167353,
      "grad_norm": 2.75,
      "learning_rate": 2.708214885602208e-05,
      "loss": 0.9369,
      "step": 438710
    },
    {
      "epoch": 1.5376042225236308,
      "grad_norm": 2.25,
      "learning_rate": 2.7081499827358376e-05,
      "loss": 0.7984,
      "step": 438720
    },
    {
      "epoch": 1.5376392700305264,
      "grad_norm": 2.671875,
      "learning_rate": 2.7080850798694674e-05,
      "loss": 0.8117,
      "step": 438730
    },
    {
      "epoch": 1.5376743175374221,
      "grad_norm": 2.796875,
      "learning_rate": 2.7080201770030972e-05,
      "loss": 0.7915,
      "step": 438740
    },
    {
      "epoch": 1.5377093650443174,
      "grad_norm": 2.65625,
      "learning_rate": 2.707955274136727e-05,
      "loss": 0.8602,
      "step": 438750
    },
    {
      "epoch": 1.5377444125512132,
      "grad_norm": 3.09375,
      "learning_rate": 2.707890371270357e-05,
      "loss": 0.8309,
      "step": 438760
    },
    {
      "epoch": 1.5377794600581087,
      "grad_norm": 2.59375,
      "learning_rate": 2.7078254684039866e-05,
      "loss": 0.886,
      "step": 438770
    },
    {
      "epoch": 1.5378145075650043,
      "grad_norm": 2.375,
      "learning_rate": 2.7077605655376164e-05,
      "loss": 0.7807,
      "step": 438780
    },
    {
      "epoch": 1.5378495550719,
      "grad_norm": 2.953125,
      "learning_rate": 2.7076956626712462e-05,
      "loss": 0.7886,
      "step": 438790
    },
    {
      "epoch": 1.5378846025787956,
      "grad_norm": 2.578125,
      "learning_rate": 2.707630759804876e-05,
      "loss": 0.8416,
      "step": 438800
    },
    {
      "epoch": 1.537919650085691,
      "grad_norm": 2.84375,
      "learning_rate": 2.707565856938506e-05,
      "loss": 0.8545,
      "step": 438810
    },
    {
      "epoch": 1.5379546975925869,
      "grad_norm": 3.109375,
      "learning_rate": 2.7075009540721356e-05,
      "loss": 0.8237,
      "step": 438820
    },
    {
      "epoch": 1.5379897450994824,
      "grad_norm": 2.734375,
      "learning_rate": 2.7074360512057654e-05,
      "loss": 0.7486,
      "step": 438830
    },
    {
      "epoch": 1.538024792606378,
      "grad_norm": 2.890625,
      "learning_rate": 2.7073711483393956e-05,
      "loss": 0.8103,
      "step": 438840
    },
    {
      "epoch": 1.5380598401132737,
      "grad_norm": 3.125,
      "learning_rate": 2.7073062454730254e-05,
      "loss": 0.8298,
      "step": 438850
    },
    {
      "epoch": 1.538094887620169,
      "grad_norm": 2.984375,
      "learning_rate": 2.7072413426066552e-05,
      "loss": 0.821,
      "step": 438860
    },
    {
      "epoch": 1.5381299351270648,
      "grad_norm": 3.0,
      "learning_rate": 2.707176439740285e-05,
      "loss": 0.8265,
      "step": 438870
    },
    {
      "epoch": 1.5381649826339603,
      "grad_norm": 3.1875,
      "learning_rate": 2.7071115368739148e-05,
      "loss": 0.8721,
      "step": 438880
    },
    {
      "epoch": 1.5382000301408558,
      "grad_norm": 2.953125,
      "learning_rate": 2.7070466340075446e-05,
      "loss": 0.7232,
      "step": 438890
    },
    {
      "epoch": 1.5382350776477516,
      "grad_norm": 2.765625,
      "learning_rate": 2.7069817311411744e-05,
      "loss": 0.8686,
      "step": 438900
    },
    {
      "epoch": 1.5382701251546471,
      "grad_norm": 2.921875,
      "learning_rate": 2.7069168282748042e-05,
      "loss": 0.8198,
      "step": 438910
    },
    {
      "epoch": 1.5383051726615427,
      "grad_norm": 3.078125,
      "learning_rate": 2.706851925408434e-05,
      "loss": 0.8511,
      "step": 438920
    },
    {
      "epoch": 1.5383402201684384,
      "grad_norm": 2.953125,
      "learning_rate": 2.7067870225420638e-05,
      "loss": 0.868,
      "step": 438930
    },
    {
      "epoch": 1.538375267675334,
      "grad_norm": 2.59375,
      "learning_rate": 2.7067221196756936e-05,
      "loss": 0.8597,
      "step": 438940
    },
    {
      "epoch": 1.5384103151822295,
      "grad_norm": 2.9375,
      "learning_rate": 2.7066572168093234e-05,
      "loss": 0.8268,
      "step": 438950
    },
    {
      "epoch": 1.5384453626891252,
      "grad_norm": 3.109375,
      "learning_rate": 2.7065923139429532e-05,
      "loss": 0.8464,
      "step": 438960
    },
    {
      "epoch": 1.5384804101960206,
      "grad_norm": 3.25,
      "learning_rate": 2.706527411076583e-05,
      "loss": 0.8676,
      "step": 438970
    },
    {
      "epoch": 1.5385154577029163,
      "grad_norm": 3.140625,
      "learning_rate": 2.706462508210213e-05,
      "loss": 0.8962,
      "step": 438980
    },
    {
      "epoch": 1.5385505052098118,
      "grad_norm": 2.515625,
      "learning_rate": 2.706397605343843e-05,
      "loss": 0.7668,
      "step": 438990
    },
    {
      "epoch": 1.5385855527167074,
      "grad_norm": 2.734375,
      "learning_rate": 2.7063327024774727e-05,
      "loss": 0.8397,
      "step": 439000
    },
    {
      "epoch": 1.5386206002236031,
      "grad_norm": 3.21875,
      "learning_rate": 2.706267799611102e-05,
      "loss": 0.8487,
      "step": 439010
    },
    {
      "epoch": 1.5386556477304987,
      "grad_norm": 3.125,
      "learning_rate": 2.7062028967447316e-05,
      "loss": 0.7694,
      "step": 439020
    },
    {
      "epoch": 1.5386906952373942,
      "grad_norm": 3.109375,
      "learning_rate": 2.7061379938783614e-05,
      "loss": 0.9374,
      "step": 439030
    },
    {
      "epoch": 1.53872574274429,
      "grad_norm": 2.828125,
      "learning_rate": 2.7060730910119912e-05,
      "loss": 0.8164,
      "step": 439040
    },
    {
      "epoch": 1.5387607902511855,
      "grad_norm": 3.546875,
      "learning_rate": 2.706008188145621e-05,
      "loss": 0.8625,
      "step": 439050
    },
    {
      "epoch": 1.538795837758081,
      "grad_norm": 2.5625,
      "learning_rate": 2.705943285279251e-05,
      "loss": 0.8245,
      "step": 439060
    },
    {
      "epoch": 1.5388308852649768,
      "grad_norm": 2.84375,
      "learning_rate": 2.705878382412881e-05,
      "loss": 0.7963,
      "step": 439070
    },
    {
      "epoch": 1.5388659327718721,
      "grad_norm": 2.78125,
      "learning_rate": 2.7058134795465108e-05,
      "loss": 0.8629,
      "step": 439080
    },
    {
      "epoch": 1.5389009802787679,
      "grad_norm": 2.625,
      "learning_rate": 2.7057485766801406e-05,
      "loss": 0.8029,
      "step": 439090
    },
    {
      "epoch": 1.5389360277856636,
      "grad_norm": 2.9375,
      "learning_rate": 2.7056836738137704e-05,
      "loss": 0.8702,
      "step": 439100
    },
    {
      "epoch": 1.538971075292559,
      "grad_norm": 2.8125,
      "learning_rate": 2.7056187709474002e-05,
      "loss": 0.8275,
      "step": 439110
    },
    {
      "epoch": 1.5390061227994547,
      "grad_norm": 2.90625,
      "learning_rate": 2.70555386808103e-05,
      "loss": 0.8952,
      "step": 439120
    },
    {
      "epoch": 1.5390411703063502,
      "grad_norm": 3.140625,
      "learning_rate": 2.7054889652146598e-05,
      "loss": 0.7945,
      "step": 439130
    },
    {
      "epoch": 1.5390762178132458,
      "grad_norm": 3.015625,
      "learning_rate": 2.7054240623482896e-05,
      "loss": 0.8225,
      "step": 439140
    },
    {
      "epoch": 1.5391112653201415,
      "grad_norm": 2.796875,
      "learning_rate": 2.7053591594819194e-05,
      "loss": 0.779,
      "step": 439150
    },
    {
      "epoch": 1.539146312827037,
      "grad_norm": 2.96875,
      "learning_rate": 2.7052942566155492e-05,
      "loss": 0.8917,
      "step": 439160
    },
    {
      "epoch": 1.5391813603339326,
      "grad_norm": 3.203125,
      "learning_rate": 2.705229353749179e-05,
      "loss": 0.7611,
      "step": 439170
    },
    {
      "epoch": 1.5392164078408284,
      "grad_norm": 3.109375,
      "learning_rate": 2.7051644508828088e-05,
      "loss": 0.8297,
      "step": 439180
    },
    {
      "epoch": 1.5392514553477237,
      "grad_norm": 3.140625,
      "learning_rate": 2.7050995480164386e-05,
      "loss": 0.7964,
      "step": 439190
    },
    {
      "epoch": 1.5392865028546194,
      "grad_norm": 2.515625,
      "learning_rate": 2.7050346451500684e-05,
      "loss": 0.8225,
      "step": 439200
    },
    {
      "epoch": 1.5393215503615152,
      "grad_norm": 2.921875,
      "learning_rate": 2.7049697422836985e-05,
      "loss": 0.8491,
      "step": 439210
    },
    {
      "epoch": 1.5393565978684105,
      "grad_norm": 3.3125,
      "learning_rate": 2.7049048394173283e-05,
      "loss": 0.8674,
      "step": 439220
    },
    {
      "epoch": 1.5393916453753063,
      "grad_norm": 2.671875,
      "learning_rate": 2.704839936550958e-05,
      "loss": 0.7985,
      "step": 439230
    },
    {
      "epoch": 1.5394266928822018,
      "grad_norm": 3.09375,
      "learning_rate": 2.704775033684588e-05,
      "loss": 0.8374,
      "step": 439240
    },
    {
      "epoch": 1.5394617403890973,
      "grad_norm": 3.1875,
      "learning_rate": 2.7047101308182177e-05,
      "loss": 0.8771,
      "step": 439250
    },
    {
      "epoch": 1.539496787895993,
      "grad_norm": 3.21875,
      "learning_rate": 2.7046452279518475e-05,
      "loss": 0.8616,
      "step": 439260
    },
    {
      "epoch": 1.5395318354028886,
      "grad_norm": 3.515625,
      "learning_rate": 2.7045803250854773e-05,
      "loss": 0.8759,
      "step": 439270
    },
    {
      "epoch": 1.5395668829097842,
      "grad_norm": 2.984375,
      "learning_rate": 2.704515422219107e-05,
      "loss": 0.9049,
      "step": 439280
    },
    {
      "epoch": 1.53960193041668,
      "grad_norm": 2.921875,
      "learning_rate": 2.704450519352737e-05,
      "loss": 0.7148,
      "step": 439290
    },
    {
      "epoch": 1.5396369779235755,
      "grad_norm": 2.671875,
      "learning_rate": 2.7043856164863667e-05,
      "loss": 0.7773,
      "step": 439300
    },
    {
      "epoch": 1.539672025430471,
      "grad_norm": 2.640625,
      "learning_rate": 2.7043207136199965e-05,
      "loss": 0.802,
      "step": 439310
    },
    {
      "epoch": 1.5397070729373667,
      "grad_norm": 3.390625,
      "learning_rate": 2.7042558107536263e-05,
      "loss": 0.9096,
      "step": 439320
    },
    {
      "epoch": 1.539742120444262,
      "grad_norm": 2.953125,
      "learning_rate": 2.704190907887256e-05,
      "loss": 0.8198,
      "step": 439330
    },
    {
      "epoch": 1.5397771679511578,
      "grad_norm": 2.828125,
      "learning_rate": 2.7041260050208863e-05,
      "loss": 0.8144,
      "step": 439340
    },
    {
      "epoch": 1.5398122154580534,
      "grad_norm": 2.671875,
      "learning_rate": 2.704061102154516e-05,
      "loss": 0.7696,
      "step": 439350
    },
    {
      "epoch": 1.539847262964949,
      "grad_norm": 3.171875,
      "learning_rate": 2.703996199288146e-05,
      "loss": 0.8692,
      "step": 439360
    },
    {
      "epoch": 1.5398823104718447,
      "grad_norm": 3.0,
      "learning_rate": 2.7039312964217757e-05,
      "loss": 0.7337,
      "step": 439370
    },
    {
      "epoch": 1.5399173579787402,
      "grad_norm": 2.953125,
      "learning_rate": 2.7038663935554055e-05,
      "loss": 0.8689,
      "step": 439380
    },
    {
      "epoch": 1.5399524054856357,
      "grad_norm": 3.109375,
      "learning_rate": 2.7038014906890346e-05,
      "loss": 0.9123,
      "step": 439390
    },
    {
      "epoch": 1.5399874529925315,
      "grad_norm": 2.59375,
      "learning_rate": 2.7037365878226644e-05,
      "loss": 0.786,
      "step": 439400
    },
    {
      "epoch": 1.540022500499427,
      "grad_norm": 3.078125,
      "learning_rate": 2.7036716849562942e-05,
      "loss": 0.9043,
      "step": 439410
    },
    {
      "epoch": 1.5400575480063226,
      "grad_norm": 3.3125,
      "learning_rate": 2.703606782089924e-05,
      "loss": 0.8813,
      "step": 439420
    },
    {
      "epoch": 1.5400925955132183,
      "grad_norm": 2.96875,
      "learning_rate": 2.703541879223554e-05,
      "loss": 0.8454,
      "step": 439430
    },
    {
      "epoch": 1.5401276430201136,
      "grad_norm": 2.703125,
      "learning_rate": 2.703476976357184e-05,
      "loss": 0.8065,
      "step": 439440
    },
    {
      "epoch": 1.5401626905270094,
      "grad_norm": 2.828125,
      "learning_rate": 2.7034120734908137e-05,
      "loss": 0.807,
      "step": 439450
    },
    {
      "epoch": 1.540197738033905,
      "grad_norm": 2.59375,
      "learning_rate": 2.7033471706244435e-05,
      "loss": 0.8412,
      "step": 439460
    },
    {
      "epoch": 1.5402327855408005,
      "grad_norm": 3.046875,
      "learning_rate": 2.7032822677580733e-05,
      "loss": 0.9062,
      "step": 439470
    },
    {
      "epoch": 1.5402678330476962,
      "grad_norm": 2.765625,
      "learning_rate": 2.703217364891703e-05,
      "loss": 0.823,
      "step": 439480
    },
    {
      "epoch": 1.5403028805545917,
      "grad_norm": 2.859375,
      "learning_rate": 2.703152462025333e-05,
      "loss": 0.8237,
      "step": 439490
    },
    {
      "epoch": 1.5403379280614873,
      "grad_norm": 2.640625,
      "learning_rate": 2.7030875591589627e-05,
      "loss": 0.81,
      "step": 439500
    },
    {
      "epoch": 1.540372975568383,
      "grad_norm": 3.0625,
      "learning_rate": 2.7030226562925925e-05,
      "loss": 0.8134,
      "step": 439510
    },
    {
      "epoch": 1.5404080230752786,
      "grad_norm": 3.09375,
      "learning_rate": 2.7029577534262223e-05,
      "loss": 0.8113,
      "step": 439520
    },
    {
      "epoch": 1.5404430705821741,
      "grad_norm": 3.0625,
      "learning_rate": 2.702892850559852e-05,
      "loss": 0.912,
      "step": 439530
    },
    {
      "epoch": 1.5404781180890699,
      "grad_norm": 3.078125,
      "learning_rate": 2.702827947693482e-05,
      "loss": 0.869,
      "step": 439540
    },
    {
      "epoch": 1.5405131655959652,
      "grad_norm": 2.875,
      "learning_rate": 2.7027630448271117e-05,
      "loss": 0.7908,
      "step": 439550
    },
    {
      "epoch": 1.540548213102861,
      "grad_norm": 2.9375,
      "learning_rate": 2.7026981419607415e-05,
      "loss": 0.836,
      "step": 439560
    },
    {
      "epoch": 1.5405832606097565,
      "grad_norm": 3.140625,
      "learning_rate": 2.7026332390943717e-05,
      "loss": 0.8512,
      "step": 439570
    },
    {
      "epoch": 1.540618308116652,
      "grad_norm": 2.625,
      "learning_rate": 2.7025683362280015e-05,
      "loss": 0.8439,
      "step": 439580
    },
    {
      "epoch": 1.5406533556235478,
      "grad_norm": 2.859375,
      "learning_rate": 2.7025034333616313e-05,
      "loss": 0.8543,
      "step": 439590
    },
    {
      "epoch": 1.5406884031304433,
      "grad_norm": 3.15625,
      "learning_rate": 2.702438530495261e-05,
      "loss": 0.8577,
      "step": 439600
    },
    {
      "epoch": 1.5407234506373388,
      "grad_norm": 3.203125,
      "learning_rate": 2.702373627628891e-05,
      "loss": 0.8711,
      "step": 439610
    },
    {
      "epoch": 1.5407584981442346,
      "grad_norm": 2.671875,
      "learning_rate": 2.7023087247625207e-05,
      "loss": 0.8309,
      "step": 439620
    },
    {
      "epoch": 1.5407935456511301,
      "grad_norm": 3.046875,
      "learning_rate": 2.7022438218961505e-05,
      "loss": 0.8214,
      "step": 439630
    },
    {
      "epoch": 1.5408285931580257,
      "grad_norm": 3.15625,
      "learning_rate": 2.7021789190297803e-05,
      "loss": 0.8177,
      "step": 439640
    },
    {
      "epoch": 1.5408636406649214,
      "grad_norm": 2.484375,
      "learning_rate": 2.70211401616341e-05,
      "loss": 0.8668,
      "step": 439650
    },
    {
      "epoch": 1.5408986881718167,
      "grad_norm": 2.90625,
      "learning_rate": 2.70204911329704e-05,
      "loss": 0.8626,
      "step": 439660
    },
    {
      "epoch": 1.5409337356787125,
      "grad_norm": 3.265625,
      "learning_rate": 2.7019842104306697e-05,
      "loss": 0.8284,
      "step": 439670
    },
    {
      "epoch": 1.540968783185608,
      "grad_norm": 3.140625,
      "learning_rate": 2.7019193075642995e-05,
      "loss": 0.8582,
      "step": 439680
    },
    {
      "epoch": 1.5410038306925036,
      "grad_norm": 3.109375,
      "learning_rate": 2.7018544046979293e-05,
      "loss": 0.8571,
      "step": 439690
    },
    {
      "epoch": 1.5410388781993993,
      "grad_norm": 2.671875,
      "learning_rate": 2.701789501831559e-05,
      "loss": 0.8599,
      "step": 439700
    },
    {
      "epoch": 1.5410739257062949,
      "grad_norm": 2.9375,
      "learning_rate": 2.7017245989651892e-05,
      "loss": 0.8415,
      "step": 439710
    },
    {
      "epoch": 1.5411089732131904,
      "grad_norm": 2.953125,
      "learning_rate": 2.701659696098819e-05,
      "loss": 0.8133,
      "step": 439720
    },
    {
      "epoch": 1.5411440207200862,
      "grad_norm": 3.390625,
      "learning_rate": 2.7015947932324488e-05,
      "loss": 0.888,
      "step": 439730
    },
    {
      "epoch": 1.5411790682269817,
      "grad_norm": 3.390625,
      "learning_rate": 2.7015298903660786e-05,
      "loss": 0.8973,
      "step": 439740
    },
    {
      "epoch": 1.5412141157338772,
      "grad_norm": 2.59375,
      "learning_rate": 2.7014649874997084e-05,
      "loss": 0.8211,
      "step": 439750
    },
    {
      "epoch": 1.541249163240773,
      "grad_norm": 3.1875,
      "learning_rate": 2.7014000846333375e-05,
      "loss": 0.8365,
      "step": 439760
    },
    {
      "epoch": 1.5412842107476683,
      "grad_norm": 2.671875,
      "learning_rate": 2.7013351817669673e-05,
      "loss": 0.8185,
      "step": 439770
    },
    {
      "epoch": 1.541319258254564,
      "grad_norm": 3.34375,
      "learning_rate": 2.701270278900597e-05,
      "loss": 0.8409,
      "step": 439780
    },
    {
      "epoch": 1.5413543057614598,
      "grad_norm": 2.703125,
      "learning_rate": 2.701205376034227e-05,
      "loss": 0.8696,
      "step": 439790
    },
    {
      "epoch": 1.5413893532683551,
      "grad_norm": 2.9375,
      "learning_rate": 2.701140473167857e-05,
      "loss": 0.8081,
      "step": 439800
    },
    {
      "epoch": 1.541424400775251,
      "grad_norm": 2.5,
      "learning_rate": 2.701075570301487e-05,
      "loss": 0.8667,
      "step": 439810
    },
    {
      "epoch": 1.5414594482821464,
      "grad_norm": 2.859375,
      "learning_rate": 2.7010106674351167e-05,
      "loss": 0.7939,
      "step": 439820
    },
    {
      "epoch": 1.541494495789042,
      "grad_norm": 3.0625,
      "learning_rate": 2.7009457645687465e-05,
      "loss": 0.8981,
      "step": 439830
    },
    {
      "epoch": 1.5415295432959377,
      "grad_norm": 2.875,
      "learning_rate": 2.7008808617023763e-05,
      "loss": 0.7869,
      "step": 439840
    },
    {
      "epoch": 1.5415645908028333,
      "grad_norm": 2.8125,
      "learning_rate": 2.700815958836006e-05,
      "loss": 0.9297,
      "step": 439850
    },
    {
      "epoch": 1.5415996383097288,
      "grad_norm": 3.53125,
      "learning_rate": 2.700751055969636e-05,
      "loss": 0.7486,
      "step": 439860
    },
    {
      "epoch": 1.5416346858166246,
      "grad_norm": 2.671875,
      "learning_rate": 2.7006861531032657e-05,
      "loss": 0.8207,
      "step": 439870
    },
    {
      "epoch": 1.54166973332352,
      "grad_norm": 2.703125,
      "learning_rate": 2.7006212502368955e-05,
      "loss": 0.8537,
      "step": 439880
    },
    {
      "epoch": 1.5417047808304156,
      "grad_norm": 2.96875,
      "learning_rate": 2.7005563473705253e-05,
      "loss": 0.8473,
      "step": 439890
    },
    {
      "epoch": 1.5417398283373114,
      "grad_norm": 2.609375,
      "learning_rate": 2.700491444504155e-05,
      "loss": 0.766,
      "step": 439900
    },
    {
      "epoch": 1.5417748758442067,
      "grad_norm": 2.9375,
      "learning_rate": 2.700426541637785e-05,
      "loss": 0.8467,
      "step": 439910
    },
    {
      "epoch": 1.5418099233511025,
      "grad_norm": 2.6875,
      "learning_rate": 2.7003616387714147e-05,
      "loss": 0.7365,
      "step": 439920
    },
    {
      "epoch": 1.541844970857998,
      "grad_norm": 2.71875,
      "learning_rate": 2.7002967359050445e-05,
      "loss": 0.9259,
      "step": 439930
    },
    {
      "epoch": 1.5418800183648935,
      "grad_norm": 2.578125,
      "learning_rate": 2.7002318330386746e-05,
      "loss": 0.8723,
      "step": 439940
    },
    {
      "epoch": 1.5419150658717893,
      "grad_norm": 2.703125,
      "learning_rate": 2.7001669301723044e-05,
      "loss": 0.7919,
      "step": 439950
    },
    {
      "epoch": 1.5419501133786848,
      "grad_norm": 3.078125,
      "learning_rate": 2.7001020273059342e-05,
      "loss": 0.8998,
      "step": 439960
    },
    {
      "epoch": 1.5419851608855804,
      "grad_norm": 2.90625,
      "learning_rate": 2.700037124439564e-05,
      "loss": 0.8299,
      "step": 439970
    },
    {
      "epoch": 1.542020208392476,
      "grad_norm": 2.46875,
      "learning_rate": 2.6999722215731938e-05,
      "loss": 0.8131,
      "step": 439980
    },
    {
      "epoch": 1.5420552558993716,
      "grad_norm": 3.09375,
      "learning_rate": 2.6999073187068236e-05,
      "loss": 0.8061,
      "step": 439990
    },
    {
      "epoch": 1.5420903034062672,
      "grad_norm": 2.53125,
      "learning_rate": 2.6998424158404534e-05,
      "loss": 0.7491,
      "step": 440000
    },
    {
      "epoch": 1.5420903034062672,
      "eval_loss": 0.790317714214325,
      "eval_runtime": 555.1303,
      "eval_samples_per_second": 685.309,
      "eval_steps_per_second": 57.109,
      "step": 440000
    },
    {
      "epoch": 1.542125350913163,
      "grad_norm": 2.71875,
      "learning_rate": 2.6997775129740832e-05,
      "loss": 0.79,
      "step": 440010
    },
    {
      "epoch": 1.5421603984200583,
      "grad_norm": 2.65625,
      "learning_rate": 2.699712610107713e-05,
      "loss": 0.8861,
      "step": 440020
    },
    {
      "epoch": 1.542195445926954,
      "grad_norm": 2.6875,
      "learning_rate": 2.6996477072413428e-05,
      "loss": 0.8037,
      "step": 440030
    },
    {
      "epoch": 1.5422304934338495,
      "grad_norm": 2.71875,
      "learning_rate": 2.6995828043749726e-05,
      "loss": 0.7852,
      "step": 440040
    },
    {
      "epoch": 1.542265540940745,
      "grad_norm": 3.078125,
      "learning_rate": 2.6995179015086024e-05,
      "loss": 0.7528,
      "step": 440050
    },
    {
      "epoch": 1.5423005884476408,
      "grad_norm": 2.90625,
      "learning_rate": 2.6994529986422322e-05,
      "loss": 0.7324,
      "step": 440060
    },
    {
      "epoch": 1.5423356359545364,
      "grad_norm": 2.703125,
      "learning_rate": 2.699388095775862e-05,
      "loss": 0.8376,
      "step": 440070
    },
    {
      "epoch": 1.542370683461432,
      "grad_norm": 3.15625,
      "learning_rate": 2.699323192909492e-05,
      "loss": 0.8479,
      "step": 440080
    },
    {
      "epoch": 1.5424057309683277,
      "grad_norm": 2.5625,
      "learning_rate": 2.699258290043122e-05,
      "loss": 0.8387,
      "step": 440090
    },
    {
      "epoch": 1.5424407784752232,
      "grad_norm": 2.71875,
      "learning_rate": 2.6991933871767517e-05,
      "loss": 0.8945,
      "step": 440100
    },
    {
      "epoch": 1.5424758259821187,
      "grad_norm": 3.40625,
      "learning_rate": 2.6991284843103815e-05,
      "loss": 0.9337,
      "step": 440110
    },
    {
      "epoch": 1.5425108734890145,
      "grad_norm": 3.46875,
      "learning_rate": 2.6990635814440113e-05,
      "loss": 0.8521,
      "step": 440120
    },
    {
      "epoch": 1.5425459209959098,
      "grad_norm": 3.125,
      "learning_rate": 2.698998678577641e-05,
      "loss": 0.796,
      "step": 440130
    },
    {
      "epoch": 1.5425809685028056,
      "grad_norm": 2.4375,
      "learning_rate": 2.6989337757112703e-05,
      "loss": 0.9063,
      "step": 440140
    },
    {
      "epoch": 1.542616016009701,
      "grad_norm": 3.28125,
      "learning_rate": 2.6988688728449e-05,
      "loss": 0.8606,
      "step": 440150
    },
    {
      "epoch": 1.5426510635165966,
      "grad_norm": 2.65625,
      "learning_rate": 2.69880396997853e-05,
      "loss": 0.8319,
      "step": 440160
    },
    {
      "epoch": 1.5426861110234924,
      "grad_norm": 2.859375,
      "learning_rate": 2.69873906711216e-05,
      "loss": 0.9347,
      "step": 440170
    },
    {
      "epoch": 1.542721158530388,
      "grad_norm": 2.890625,
      "learning_rate": 2.6986741642457898e-05,
      "loss": 0.8395,
      "step": 440180
    },
    {
      "epoch": 1.5427562060372835,
      "grad_norm": 3.15625,
      "learning_rate": 2.6986092613794196e-05,
      "loss": 0.8864,
      "step": 440190
    },
    {
      "epoch": 1.5427912535441792,
      "grad_norm": 2.3125,
      "learning_rate": 2.6985443585130494e-05,
      "loss": 0.8331,
      "step": 440200
    },
    {
      "epoch": 1.5428263010510748,
      "grad_norm": 2.90625,
      "learning_rate": 2.6984794556466792e-05,
      "loss": 0.8579,
      "step": 440210
    },
    {
      "epoch": 1.5428613485579703,
      "grad_norm": 2.921875,
      "learning_rate": 2.698414552780309e-05,
      "loss": 0.8533,
      "step": 440220
    },
    {
      "epoch": 1.542896396064866,
      "grad_norm": 2.796875,
      "learning_rate": 2.6983496499139388e-05,
      "loss": 0.8833,
      "step": 440230
    },
    {
      "epoch": 1.5429314435717614,
      "grad_norm": 3.3125,
      "learning_rate": 2.6982847470475686e-05,
      "loss": 0.8546,
      "step": 440240
    },
    {
      "epoch": 1.5429664910786571,
      "grad_norm": 3.15625,
      "learning_rate": 2.6982198441811984e-05,
      "loss": 0.7877,
      "step": 440250
    },
    {
      "epoch": 1.5430015385855527,
      "grad_norm": 2.9375,
      "learning_rate": 2.6981549413148282e-05,
      "loss": 0.8982,
      "step": 440260
    },
    {
      "epoch": 1.5430365860924482,
      "grad_norm": 2.953125,
      "learning_rate": 2.698090038448458e-05,
      "loss": 0.85,
      "step": 440270
    },
    {
      "epoch": 1.543071633599344,
      "grad_norm": 2.5625,
      "learning_rate": 2.6980251355820878e-05,
      "loss": 0.7629,
      "step": 440280
    },
    {
      "epoch": 1.5431066811062395,
      "grad_norm": 2.953125,
      "learning_rate": 2.6979602327157176e-05,
      "loss": 0.8871,
      "step": 440290
    },
    {
      "epoch": 1.543141728613135,
      "grad_norm": 2.96875,
      "learning_rate": 2.6978953298493477e-05,
      "loss": 0.9222,
      "step": 440300
    },
    {
      "epoch": 1.5431767761200308,
      "grad_norm": 3.34375,
      "learning_rate": 2.6978304269829775e-05,
      "loss": 0.8505,
      "step": 440310
    },
    {
      "epoch": 1.5432118236269263,
      "grad_norm": 2.859375,
      "learning_rate": 2.6977655241166073e-05,
      "loss": 0.8519,
      "step": 440320
    },
    {
      "epoch": 1.5432468711338219,
      "grad_norm": 2.765625,
      "learning_rate": 2.697700621250237e-05,
      "loss": 0.7753,
      "step": 440330
    },
    {
      "epoch": 1.5432819186407176,
      "grad_norm": 3.15625,
      "learning_rate": 2.697635718383867e-05,
      "loss": 0.8166,
      "step": 440340
    },
    {
      "epoch": 1.543316966147613,
      "grad_norm": 3.234375,
      "learning_rate": 2.6975708155174967e-05,
      "loss": 0.8679,
      "step": 440350
    },
    {
      "epoch": 1.5433520136545087,
      "grad_norm": 2.796875,
      "learning_rate": 2.6975059126511265e-05,
      "loss": 0.8824,
      "step": 440360
    },
    {
      "epoch": 1.5433870611614044,
      "grad_norm": 2.78125,
      "learning_rate": 2.6974410097847563e-05,
      "loss": 0.8721,
      "step": 440370
    },
    {
      "epoch": 1.5434221086682998,
      "grad_norm": 2.4375,
      "learning_rate": 2.697376106918386e-05,
      "loss": 0.8309,
      "step": 440380
    },
    {
      "epoch": 1.5434571561751955,
      "grad_norm": 2.984375,
      "learning_rate": 2.697311204052016e-05,
      "loss": 0.9179,
      "step": 440390
    },
    {
      "epoch": 1.543492203682091,
      "grad_norm": 3.03125,
      "learning_rate": 2.6972463011856457e-05,
      "loss": 0.7933,
      "step": 440400
    },
    {
      "epoch": 1.5435272511889866,
      "grad_norm": 2.734375,
      "learning_rate": 2.6971813983192755e-05,
      "loss": 0.7696,
      "step": 440410
    },
    {
      "epoch": 1.5435622986958824,
      "grad_norm": 3.015625,
      "learning_rate": 2.6971164954529053e-05,
      "loss": 0.8393,
      "step": 440420
    },
    {
      "epoch": 1.5435973462027779,
      "grad_norm": 3.3125,
      "learning_rate": 2.697051592586535e-05,
      "loss": 1.0092,
      "step": 440430
    },
    {
      "epoch": 1.5436323937096734,
      "grad_norm": 2.578125,
      "learning_rate": 2.6969866897201653e-05,
      "loss": 0.8682,
      "step": 440440
    },
    {
      "epoch": 1.5436674412165692,
      "grad_norm": 3.125,
      "learning_rate": 2.696921786853795e-05,
      "loss": 0.8291,
      "step": 440450
    },
    {
      "epoch": 1.5437024887234645,
      "grad_norm": 3.921875,
      "learning_rate": 2.696856883987425e-05,
      "loss": 0.8685,
      "step": 440460
    },
    {
      "epoch": 1.5437375362303603,
      "grad_norm": 2.796875,
      "learning_rate": 2.6967919811210547e-05,
      "loss": 0.8912,
      "step": 440470
    },
    {
      "epoch": 1.543772583737256,
      "grad_norm": 3.1875,
      "learning_rate": 2.6967270782546845e-05,
      "loss": 0.9148,
      "step": 440480
    },
    {
      "epoch": 1.5438076312441513,
      "grad_norm": 3.140625,
      "learning_rate": 2.6966621753883143e-05,
      "loss": 0.917,
      "step": 440490
    },
    {
      "epoch": 1.543842678751047,
      "grad_norm": 2.890625,
      "learning_rate": 2.696597272521944e-05,
      "loss": 0.8832,
      "step": 440500
    },
    {
      "epoch": 1.5438777262579426,
      "grad_norm": 2.78125,
      "learning_rate": 2.6965323696555732e-05,
      "loss": 0.8024,
      "step": 440510
    },
    {
      "epoch": 1.5439127737648382,
      "grad_norm": 2.875,
      "learning_rate": 2.696467466789203e-05,
      "loss": 0.8927,
      "step": 440520
    },
    {
      "epoch": 1.543947821271734,
      "grad_norm": 2.90625,
      "learning_rate": 2.696402563922833e-05,
      "loss": 0.8245,
      "step": 440530
    },
    {
      "epoch": 1.5439828687786294,
      "grad_norm": 3.109375,
      "learning_rate": 2.696337661056463e-05,
      "loss": 0.8256,
      "step": 440540
    },
    {
      "epoch": 1.544017916285525,
      "grad_norm": 2.265625,
      "learning_rate": 2.6962727581900927e-05,
      "loss": 0.8252,
      "step": 440550
    },
    {
      "epoch": 1.5440529637924207,
      "grad_norm": 2.515625,
      "learning_rate": 2.6962078553237225e-05,
      "loss": 0.7879,
      "step": 440560
    },
    {
      "epoch": 1.5440880112993163,
      "grad_norm": 3.078125,
      "learning_rate": 2.6961429524573523e-05,
      "loss": 0.8377,
      "step": 440570
    },
    {
      "epoch": 1.5441230588062118,
      "grad_norm": 3.171875,
      "learning_rate": 2.696078049590982e-05,
      "loss": 0.9754,
      "step": 440580
    },
    {
      "epoch": 1.5441581063131076,
      "grad_norm": 2.890625,
      "learning_rate": 2.696013146724612e-05,
      "loss": 0.8957,
      "step": 440590
    },
    {
      "epoch": 1.5441931538200029,
      "grad_norm": 2.34375,
      "learning_rate": 2.6959482438582417e-05,
      "loss": 0.7985,
      "step": 440600
    },
    {
      "epoch": 1.5442282013268986,
      "grad_norm": 2.5625,
      "learning_rate": 2.6958833409918715e-05,
      "loss": 0.8762,
      "step": 440610
    },
    {
      "epoch": 1.5442632488337942,
      "grad_norm": 3.25,
      "learning_rate": 2.6958184381255013e-05,
      "loss": 0.9487,
      "step": 440620
    },
    {
      "epoch": 1.5442982963406897,
      "grad_norm": 3.234375,
      "learning_rate": 2.695753535259131e-05,
      "loss": 0.9361,
      "step": 440630
    },
    {
      "epoch": 1.5443333438475855,
      "grad_norm": 3.5,
      "learning_rate": 2.695688632392761e-05,
      "loss": 0.8884,
      "step": 440640
    },
    {
      "epoch": 1.544368391354481,
      "grad_norm": 2.984375,
      "learning_rate": 2.6956237295263907e-05,
      "loss": 0.9577,
      "step": 440650
    },
    {
      "epoch": 1.5444034388613765,
      "grad_norm": 2.609375,
      "learning_rate": 2.6955588266600205e-05,
      "loss": 0.8332,
      "step": 440660
    },
    {
      "epoch": 1.5444384863682723,
      "grad_norm": 2.734375,
      "learning_rate": 2.6954939237936507e-05,
      "loss": 0.8666,
      "step": 440670
    },
    {
      "epoch": 1.5444735338751678,
      "grad_norm": 3.296875,
      "learning_rate": 2.6954290209272805e-05,
      "loss": 0.8998,
      "step": 440680
    },
    {
      "epoch": 1.5445085813820634,
      "grad_norm": 2.734375,
      "learning_rate": 2.6953641180609103e-05,
      "loss": 0.8509,
      "step": 440690
    },
    {
      "epoch": 1.5445436288889591,
      "grad_norm": 3.09375,
      "learning_rate": 2.69529921519454e-05,
      "loss": 0.7927,
      "step": 440700
    },
    {
      "epoch": 1.5445786763958544,
      "grad_norm": 2.796875,
      "learning_rate": 2.69523431232817e-05,
      "loss": 0.8105,
      "step": 440710
    },
    {
      "epoch": 1.5446137239027502,
      "grad_norm": 2.8125,
      "learning_rate": 2.6951694094617997e-05,
      "loss": 0.8505,
      "step": 440720
    },
    {
      "epoch": 1.5446487714096457,
      "grad_norm": 3.234375,
      "learning_rate": 2.6951045065954295e-05,
      "loss": 0.8834,
      "step": 440730
    },
    {
      "epoch": 1.5446838189165413,
      "grad_norm": 2.8125,
      "learning_rate": 2.6950396037290593e-05,
      "loss": 0.7973,
      "step": 440740
    },
    {
      "epoch": 1.544718866423437,
      "grad_norm": 2.765625,
      "learning_rate": 2.694974700862689e-05,
      "loss": 0.8833,
      "step": 440750
    },
    {
      "epoch": 1.5447539139303326,
      "grad_norm": 2.8125,
      "learning_rate": 2.694909797996319e-05,
      "loss": 0.8288,
      "step": 440760
    },
    {
      "epoch": 1.544788961437228,
      "grad_norm": 2.6875,
      "learning_rate": 2.6948448951299487e-05,
      "loss": 0.8416,
      "step": 440770
    },
    {
      "epoch": 1.5448240089441239,
      "grad_norm": 3.25,
      "learning_rate": 2.6947799922635785e-05,
      "loss": 0.8,
      "step": 440780
    },
    {
      "epoch": 1.5448590564510194,
      "grad_norm": 2.921875,
      "learning_rate": 2.6947150893972083e-05,
      "loss": 0.8492,
      "step": 440790
    },
    {
      "epoch": 1.544894103957915,
      "grad_norm": 3.015625,
      "learning_rate": 2.694650186530838e-05,
      "loss": 0.8163,
      "step": 440800
    },
    {
      "epoch": 1.5449291514648107,
      "grad_norm": 2.625,
      "learning_rate": 2.6945852836644682e-05,
      "loss": 0.8035,
      "step": 440810
    },
    {
      "epoch": 1.544964198971706,
      "grad_norm": 3.171875,
      "learning_rate": 2.694520380798098e-05,
      "loss": 0.8638,
      "step": 440820
    },
    {
      "epoch": 1.5449992464786018,
      "grad_norm": 3.140625,
      "learning_rate": 2.6944554779317278e-05,
      "loss": 0.8297,
      "step": 440830
    },
    {
      "epoch": 1.5450342939854973,
      "grad_norm": 2.8125,
      "learning_rate": 2.6943905750653576e-05,
      "loss": 0.7786,
      "step": 440840
    },
    {
      "epoch": 1.5450693414923928,
      "grad_norm": 2.796875,
      "learning_rate": 2.6943256721989874e-05,
      "loss": 0.8601,
      "step": 440850
    },
    {
      "epoch": 1.5451043889992886,
      "grad_norm": 2.90625,
      "learning_rate": 2.6942607693326172e-05,
      "loss": 0.8121,
      "step": 440860
    },
    {
      "epoch": 1.5451394365061841,
      "grad_norm": 2.671875,
      "learning_rate": 2.694195866466247e-05,
      "loss": 0.7918,
      "step": 440870
    },
    {
      "epoch": 1.5451744840130797,
      "grad_norm": 2.75,
      "learning_rate": 2.6941309635998768e-05,
      "loss": 0.8563,
      "step": 440880
    },
    {
      "epoch": 1.5452095315199754,
      "grad_norm": 2.53125,
      "learning_rate": 2.694066060733506e-05,
      "loss": 0.8272,
      "step": 440890
    },
    {
      "epoch": 1.545244579026871,
      "grad_norm": 2.84375,
      "learning_rate": 2.694001157867136e-05,
      "loss": 0.9019,
      "step": 440900
    },
    {
      "epoch": 1.5452796265337665,
      "grad_norm": 2.734375,
      "learning_rate": 2.693936255000766e-05,
      "loss": 0.8811,
      "step": 440910
    },
    {
      "epoch": 1.5453146740406623,
      "grad_norm": 2.59375,
      "learning_rate": 2.6938713521343957e-05,
      "loss": 0.8259,
      "step": 440920
    },
    {
      "epoch": 1.5453497215475576,
      "grad_norm": 2.875,
      "learning_rate": 2.6938064492680255e-05,
      "loss": 0.8145,
      "step": 440930
    },
    {
      "epoch": 1.5453847690544533,
      "grad_norm": 3.25,
      "learning_rate": 2.6937415464016553e-05,
      "loss": 0.8129,
      "step": 440940
    },
    {
      "epoch": 1.5454198165613489,
      "grad_norm": 3.734375,
      "learning_rate": 2.693676643535285e-05,
      "loss": 0.8044,
      "step": 440950
    },
    {
      "epoch": 1.5454548640682444,
      "grad_norm": 2.65625,
      "learning_rate": 2.693611740668915e-05,
      "loss": 0.8983,
      "step": 440960
    },
    {
      "epoch": 1.5454899115751402,
      "grad_norm": 2.78125,
      "learning_rate": 2.6935468378025447e-05,
      "loss": 0.8661,
      "step": 440970
    },
    {
      "epoch": 1.5455249590820357,
      "grad_norm": 2.953125,
      "learning_rate": 2.6934819349361745e-05,
      "loss": 0.8279,
      "step": 440980
    },
    {
      "epoch": 1.5455600065889312,
      "grad_norm": 2.421875,
      "learning_rate": 2.6934170320698043e-05,
      "loss": 0.7921,
      "step": 440990
    },
    {
      "epoch": 1.545595054095827,
      "grad_norm": 3.40625,
      "learning_rate": 2.693352129203434e-05,
      "loss": 0.8735,
      "step": 441000
    },
    {
      "epoch": 1.5456301016027225,
      "grad_norm": 2.828125,
      "learning_rate": 2.693287226337064e-05,
      "loss": 0.8216,
      "step": 441010
    },
    {
      "epoch": 1.545665149109618,
      "grad_norm": 3.140625,
      "learning_rate": 2.6932223234706937e-05,
      "loss": 0.8011,
      "step": 441020
    },
    {
      "epoch": 1.5457001966165138,
      "grad_norm": 2.65625,
      "learning_rate": 2.6931574206043235e-05,
      "loss": 0.6919,
      "step": 441030
    },
    {
      "epoch": 1.5457352441234091,
      "grad_norm": 2.859375,
      "learning_rate": 2.6930925177379536e-05,
      "loss": 0.7854,
      "step": 441040
    },
    {
      "epoch": 1.5457702916303049,
      "grad_norm": 2.6875,
      "learning_rate": 2.6930276148715834e-05,
      "loss": 0.809,
      "step": 441050
    },
    {
      "epoch": 1.5458053391372006,
      "grad_norm": 2.828125,
      "learning_rate": 2.6929627120052132e-05,
      "loss": 0.836,
      "step": 441060
    },
    {
      "epoch": 1.545840386644096,
      "grad_norm": 3.234375,
      "learning_rate": 2.692897809138843e-05,
      "loss": 0.8488,
      "step": 441070
    },
    {
      "epoch": 1.5458754341509917,
      "grad_norm": 2.90625,
      "learning_rate": 2.6928329062724728e-05,
      "loss": 0.8624,
      "step": 441080
    },
    {
      "epoch": 1.5459104816578872,
      "grad_norm": 2.921875,
      "learning_rate": 2.6927680034061026e-05,
      "loss": 0.7658,
      "step": 441090
    },
    {
      "epoch": 1.5459455291647828,
      "grad_norm": 2.265625,
      "learning_rate": 2.6927031005397324e-05,
      "loss": 0.7754,
      "step": 441100
    },
    {
      "epoch": 1.5459805766716785,
      "grad_norm": 3.09375,
      "learning_rate": 2.6926381976733622e-05,
      "loss": 0.8855,
      "step": 441110
    },
    {
      "epoch": 1.546015624178574,
      "grad_norm": 2.765625,
      "learning_rate": 2.692573294806992e-05,
      "loss": 0.7553,
      "step": 441120
    },
    {
      "epoch": 1.5460506716854696,
      "grad_norm": 3.078125,
      "learning_rate": 2.6925083919406218e-05,
      "loss": 0.8611,
      "step": 441130
    },
    {
      "epoch": 1.5460857191923654,
      "grad_norm": 3.234375,
      "learning_rate": 2.6924434890742516e-05,
      "loss": 0.8254,
      "step": 441140
    },
    {
      "epoch": 1.5461207666992607,
      "grad_norm": 2.671875,
      "learning_rate": 2.6923785862078814e-05,
      "loss": 0.8215,
      "step": 441150
    },
    {
      "epoch": 1.5461558142061564,
      "grad_norm": 3.078125,
      "learning_rate": 2.6923136833415112e-05,
      "loss": 0.9369,
      "step": 441160
    },
    {
      "epoch": 1.5461908617130522,
      "grad_norm": 2.96875,
      "learning_rate": 2.692248780475141e-05,
      "loss": 0.7896,
      "step": 441170
    },
    {
      "epoch": 1.5462259092199475,
      "grad_norm": 3.546875,
      "learning_rate": 2.692183877608771e-05,
      "loss": 0.9059,
      "step": 441180
    },
    {
      "epoch": 1.5462609567268433,
      "grad_norm": 3.421875,
      "learning_rate": 2.692118974742401e-05,
      "loss": 0.8509,
      "step": 441190
    },
    {
      "epoch": 1.5462960042337388,
      "grad_norm": 2.765625,
      "learning_rate": 2.6920540718760307e-05,
      "loss": 0.83,
      "step": 441200
    },
    {
      "epoch": 1.5463310517406343,
      "grad_norm": 3.046875,
      "learning_rate": 2.6919891690096605e-05,
      "loss": 0.8274,
      "step": 441210
    },
    {
      "epoch": 1.54636609924753,
      "grad_norm": 2.96875,
      "learning_rate": 2.6919242661432903e-05,
      "loss": 0.867,
      "step": 441220
    },
    {
      "epoch": 1.5464011467544256,
      "grad_norm": 2.65625,
      "learning_rate": 2.69185936327692e-05,
      "loss": 0.7967,
      "step": 441230
    },
    {
      "epoch": 1.5464361942613212,
      "grad_norm": 2.984375,
      "learning_rate": 2.69179446041055e-05,
      "loss": 0.848,
      "step": 441240
    },
    {
      "epoch": 1.546471241768217,
      "grad_norm": 2.75,
      "learning_rate": 2.6917295575441797e-05,
      "loss": 0.7956,
      "step": 441250
    },
    {
      "epoch": 1.5465062892751125,
      "grad_norm": 3.046875,
      "learning_rate": 2.6916646546778095e-05,
      "loss": 0.8873,
      "step": 441260
    },
    {
      "epoch": 1.546541336782008,
      "grad_norm": 2.78125,
      "learning_rate": 2.691599751811439e-05,
      "loss": 0.9296,
      "step": 441270
    },
    {
      "epoch": 1.5465763842889038,
      "grad_norm": 2.953125,
      "learning_rate": 2.6915348489450688e-05,
      "loss": 0.8015,
      "step": 441280
    },
    {
      "epoch": 1.546611431795799,
      "grad_norm": 2.921875,
      "learning_rate": 2.6914699460786986e-05,
      "loss": 0.8096,
      "step": 441290
    },
    {
      "epoch": 1.5466464793026948,
      "grad_norm": 2.78125,
      "learning_rate": 2.6914050432123284e-05,
      "loss": 0.8793,
      "step": 441300
    },
    {
      "epoch": 1.5466815268095904,
      "grad_norm": 3.046875,
      "learning_rate": 2.6913401403459582e-05,
      "loss": 0.8695,
      "step": 441310
    },
    {
      "epoch": 1.546716574316486,
      "grad_norm": 3.0,
      "learning_rate": 2.691275237479588e-05,
      "loss": 0.8958,
      "step": 441320
    },
    {
      "epoch": 1.5467516218233817,
      "grad_norm": 2.578125,
      "learning_rate": 2.6912103346132178e-05,
      "loss": 0.8364,
      "step": 441330
    },
    {
      "epoch": 1.5467866693302772,
      "grad_norm": 2.5,
      "learning_rate": 2.6911454317468476e-05,
      "loss": 0.919,
      "step": 441340
    },
    {
      "epoch": 1.5468217168371727,
      "grad_norm": 3.203125,
      "learning_rate": 2.6910805288804774e-05,
      "loss": 0.87,
      "step": 441350
    },
    {
      "epoch": 1.5468567643440685,
      "grad_norm": 3.125,
      "learning_rate": 2.6910156260141072e-05,
      "loss": 0.8619,
      "step": 441360
    },
    {
      "epoch": 1.546891811850964,
      "grad_norm": 2.71875,
      "learning_rate": 2.690950723147737e-05,
      "loss": 0.8549,
      "step": 441370
    },
    {
      "epoch": 1.5469268593578596,
      "grad_norm": 2.09375,
      "learning_rate": 2.6908858202813668e-05,
      "loss": 0.8204,
      "step": 441380
    },
    {
      "epoch": 1.5469619068647553,
      "grad_norm": 3.015625,
      "learning_rate": 2.6908209174149966e-05,
      "loss": 0.8027,
      "step": 441390
    },
    {
      "epoch": 1.5469969543716506,
      "grad_norm": 3.171875,
      "learning_rate": 2.6907560145486267e-05,
      "loss": 0.8668,
      "step": 441400
    },
    {
      "epoch": 1.5470320018785464,
      "grad_norm": 2.6875,
      "learning_rate": 2.6906911116822565e-05,
      "loss": 0.8592,
      "step": 441410
    },
    {
      "epoch": 1.547067049385442,
      "grad_norm": 2.484375,
      "learning_rate": 2.6906262088158863e-05,
      "loss": 0.8515,
      "step": 441420
    },
    {
      "epoch": 1.5471020968923375,
      "grad_norm": 2.796875,
      "learning_rate": 2.690561305949516e-05,
      "loss": 0.7325,
      "step": 441430
    },
    {
      "epoch": 1.5471371443992332,
      "grad_norm": 2.640625,
      "learning_rate": 2.690496403083146e-05,
      "loss": 0.8183,
      "step": 441440
    },
    {
      "epoch": 1.5471721919061288,
      "grad_norm": 3.296875,
      "learning_rate": 2.6904315002167757e-05,
      "loss": 0.8105,
      "step": 441450
    },
    {
      "epoch": 1.5472072394130243,
      "grad_norm": 3.421875,
      "learning_rate": 2.6903665973504055e-05,
      "loss": 0.8394,
      "step": 441460
    },
    {
      "epoch": 1.54724228691992,
      "grad_norm": 3.0,
      "learning_rate": 2.6903016944840353e-05,
      "loss": 0.8715,
      "step": 441470
    },
    {
      "epoch": 1.5472773344268156,
      "grad_norm": 2.90625,
      "learning_rate": 2.690236791617665e-05,
      "loss": 0.8386,
      "step": 441480
    },
    {
      "epoch": 1.5473123819337111,
      "grad_norm": 2.890625,
      "learning_rate": 2.690171888751295e-05,
      "loss": 0.8407,
      "step": 441490
    },
    {
      "epoch": 1.5473474294406069,
      "grad_norm": 2.984375,
      "learning_rate": 2.6901069858849247e-05,
      "loss": 0.8671,
      "step": 441500
    },
    {
      "epoch": 1.5473824769475022,
      "grad_norm": 2.796875,
      "learning_rate": 2.6900420830185545e-05,
      "loss": 0.855,
      "step": 441510
    },
    {
      "epoch": 1.547417524454398,
      "grad_norm": 3.125,
      "learning_rate": 2.6899771801521843e-05,
      "loss": 0.8592,
      "step": 441520
    },
    {
      "epoch": 1.5474525719612935,
      "grad_norm": 2.921875,
      "learning_rate": 2.689912277285814e-05,
      "loss": 0.9393,
      "step": 441530
    },
    {
      "epoch": 1.547487619468189,
      "grad_norm": 2.875,
      "learning_rate": 2.6898473744194443e-05,
      "loss": 0.8817,
      "step": 441540
    },
    {
      "epoch": 1.5475226669750848,
      "grad_norm": 2.921875,
      "learning_rate": 2.689782471553074e-05,
      "loss": 0.9134,
      "step": 441550
    },
    {
      "epoch": 1.5475577144819803,
      "grad_norm": 2.578125,
      "learning_rate": 2.689717568686704e-05,
      "loss": 0.7507,
      "step": 441560
    },
    {
      "epoch": 1.5475927619888759,
      "grad_norm": 2.890625,
      "learning_rate": 2.6896526658203337e-05,
      "loss": 0.8205,
      "step": 441570
    },
    {
      "epoch": 1.5476278094957716,
      "grad_norm": 3.0,
      "learning_rate": 2.6895877629539635e-05,
      "loss": 0.8464,
      "step": 441580
    },
    {
      "epoch": 1.5476628570026671,
      "grad_norm": 3.234375,
      "learning_rate": 2.6895228600875933e-05,
      "loss": 0.8744,
      "step": 441590
    },
    {
      "epoch": 1.5476979045095627,
      "grad_norm": 3.09375,
      "learning_rate": 2.689457957221223e-05,
      "loss": 0.8065,
      "step": 441600
    },
    {
      "epoch": 1.5477329520164584,
      "grad_norm": 2.4375,
      "learning_rate": 2.689393054354853e-05,
      "loss": 0.7772,
      "step": 441610
    },
    {
      "epoch": 1.5477679995233538,
      "grad_norm": 2.90625,
      "learning_rate": 2.6893281514884827e-05,
      "loss": 0.8108,
      "step": 441620
    },
    {
      "epoch": 1.5478030470302495,
      "grad_norm": 2.96875,
      "learning_rate": 2.6892632486221125e-05,
      "loss": 0.8903,
      "step": 441630
    },
    {
      "epoch": 1.547838094537145,
      "grad_norm": 3.109375,
      "learning_rate": 2.689198345755742e-05,
      "loss": 0.8783,
      "step": 441640
    },
    {
      "epoch": 1.5478731420440406,
      "grad_norm": 2.265625,
      "learning_rate": 2.6891334428893717e-05,
      "loss": 0.823,
      "step": 441650
    },
    {
      "epoch": 1.5479081895509363,
      "grad_norm": 3.015625,
      "learning_rate": 2.6890685400230015e-05,
      "loss": 0.8175,
      "step": 441660
    },
    {
      "epoch": 1.5479432370578319,
      "grad_norm": 2.453125,
      "learning_rate": 2.6890036371566313e-05,
      "loss": 0.7721,
      "step": 441670
    },
    {
      "epoch": 1.5479782845647274,
      "grad_norm": 2.65625,
      "learning_rate": 2.688938734290261e-05,
      "loss": 0.8413,
      "step": 441680
    },
    {
      "epoch": 1.5480133320716232,
      "grad_norm": 2.609375,
      "learning_rate": 2.688873831423891e-05,
      "loss": 0.8048,
      "step": 441690
    },
    {
      "epoch": 1.5480483795785187,
      "grad_norm": 3.90625,
      "learning_rate": 2.6888089285575207e-05,
      "loss": 0.8314,
      "step": 441700
    },
    {
      "epoch": 1.5480834270854142,
      "grad_norm": 2.984375,
      "learning_rate": 2.6887440256911505e-05,
      "loss": 0.8404,
      "step": 441710
    },
    {
      "epoch": 1.54811847459231,
      "grad_norm": 2.859375,
      "learning_rate": 2.6886791228247803e-05,
      "loss": 0.8485,
      "step": 441720
    },
    {
      "epoch": 1.5481535220992053,
      "grad_norm": 3.234375,
      "learning_rate": 2.68861421995841e-05,
      "loss": 0.8377,
      "step": 441730
    },
    {
      "epoch": 1.548188569606101,
      "grad_norm": 2.5625,
      "learning_rate": 2.68854931709204e-05,
      "loss": 0.756,
      "step": 441740
    },
    {
      "epoch": 1.5482236171129968,
      "grad_norm": 2.859375,
      "learning_rate": 2.6884844142256697e-05,
      "loss": 0.8328,
      "step": 441750
    },
    {
      "epoch": 1.5482586646198921,
      "grad_norm": 2.484375,
      "learning_rate": 2.6884195113592995e-05,
      "loss": 0.8612,
      "step": 441760
    },
    {
      "epoch": 1.548293712126788,
      "grad_norm": 2.5625,
      "learning_rate": 2.6883546084929297e-05,
      "loss": 0.848,
      "step": 441770
    },
    {
      "epoch": 1.5483287596336834,
      "grad_norm": 3.03125,
      "learning_rate": 2.6882897056265595e-05,
      "loss": 0.8469,
      "step": 441780
    },
    {
      "epoch": 1.548363807140579,
      "grad_norm": 2.90625,
      "learning_rate": 2.6882248027601893e-05,
      "loss": 0.8052,
      "step": 441790
    },
    {
      "epoch": 1.5483988546474747,
      "grad_norm": 3.21875,
      "learning_rate": 2.688159899893819e-05,
      "loss": 0.796,
      "step": 441800
    },
    {
      "epoch": 1.5484339021543703,
      "grad_norm": 2.703125,
      "learning_rate": 2.688094997027449e-05,
      "loss": 0.737,
      "step": 441810
    },
    {
      "epoch": 1.5484689496612658,
      "grad_norm": 2.90625,
      "learning_rate": 2.6880300941610787e-05,
      "loss": 0.8342,
      "step": 441820
    },
    {
      "epoch": 1.5485039971681616,
      "grad_norm": 2.25,
      "learning_rate": 2.6879651912947085e-05,
      "loss": 0.847,
      "step": 441830
    },
    {
      "epoch": 1.5485390446750569,
      "grad_norm": 3.0,
      "learning_rate": 2.6879002884283383e-05,
      "loss": 0.9151,
      "step": 441840
    },
    {
      "epoch": 1.5485740921819526,
      "grad_norm": 2.84375,
      "learning_rate": 2.687835385561968e-05,
      "loss": 0.8137,
      "step": 441850
    },
    {
      "epoch": 1.5486091396888484,
      "grad_norm": 3.375,
      "learning_rate": 2.687770482695598e-05,
      "loss": 0.9349,
      "step": 441860
    },
    {
      "epoch": 1.5486441871957437,
      "grad_norm": 3.03125,
      "learning_rate": 2.6877055798292277e-05,
      "loss": 0.8721,
      "step": 441870
    },
    {
      "epoch": 1.5486792347026395,
      "grad_norm": 2.84375,
      "learning_rate": 2.6876406769628575e-05,
      "loss": 0.821,
      "step": 441880
    },
    {
      "epoch": 1.548714282209535,
      "grad_norm": 2.671875,
      "learning_rate": 2.6875757740964873e-05,
      "loss": 0.7994,
      "step": 441890
    },
    {
      "epoch": 1.5487493297164305,
      "grad_norm": 3.375,
      "learning_rate": 2.687510871230117e-05,
      "loss": 0.8574,
      "step": 441900
    },
    {
      "epoch": 1.5487843772233263,
      "grad_norm": 2.796875,
      "learning_rate": 2.6874459683637472e-05,
      "loss": 0.8379,
      "step": 441910
    },
    {
      "epoch": 1.5488194247302218,
      "grad_norm": 2.953125,
      "learning_rate": 2.687381065497377e-05,
      "loss": 0.8857,
      "step": 441920
    },
    {
      "epoch": 1.5488544722371174,
      "grad_norm": 2.640625,
      "learning_rate": 2.6873161626310068e-05,
      "loss": 0.8536,
      "step": 441930
    },
    {
      "epoch": 1.5488895197440131,
      "grad_norm": 2.953125,
      "learning_rate": 2.6872512597646366e-05,
      "loss": 0.868,
      "step": 441940
    },
    {
      "epoch": 1.5489245672509087,
      "grad_norm": 2.765625,
      "learning_rate": 2.6871863568982664e-05,
      "loss": 0.7803,
      "step": 441950
    },
    {
      "epoch": 1.5489596147578042,
      "grad_norm": 3.015625,
      "learning_rate": 2.6871214540318962e-05,
      "loss": 0.8725,
      "step": 441960
    },
    {
      "epoch": 1.5489946622647,
      "grad_norm": 2.875,
      "learning_rate": 2.687056551165526e-05,
      "loss": 0.8249,
      "step": 441970
    },
    {
      "epoch": 1.5490297097715953,
      "grad_norm": 2.84375,
      "learning_rate": 2.6869916482991558e-05,
      "loss": 0.841,
      "step": 441980
    },
    {
      "epoch": 1.549064757278491,
      "grad_norm": 3.046875,
      "learning_rate": 2.6869267454327856e-05,
      "loss": 0.8508,
      "step": 441990
    },
    {
      "epoch": 1.5490998047853866,
      "grad_norm": 3.203125,
      "learning_rate": 2.6868618425664154e-05,
      "loss": 0.7947,
      "step": 442000
    },
    {
      "epoch": 1.549134852292282,
      "grad_norm": 2.65625,
      "learning_rate": 2.6867969397000452e-05,
      "loss": 0.8544,
      "step": 442010
    },
    {
      "epoch": 1.5491698997991779,
      "grad_norm": 3.234375,
      "learning_rate": 2.6867320368336747e-05,
      "loss": 0.8553,
      "step": 442020
    },
    {
      "epoch": 1.5492049473060734,
      "grad_norm": 2.90625,
      "learning_rate": 2.6866671339673045e-05,
      "loss": 0.8735,
      "step": 442030
    },
    {
      "epoch": 1.549239994812969,
      "grad_norm": 2.859375,
      "learning_rate": 2.6866022311009343e-05,
      "loss": 0.8608,
      "step": 442040
    },
    {
      "epoch": 1.5492750423198647,
      "grad_norm": 2.4375,
      "learning_rate": 2.686537328234564e-05,
      "loss": 0.903,
      "step": 442050
    },
    {
      "epoch": 1.5493100898267602,
      "grad_norm": 3.015625,
      "learning_rate": 2.686472425368194e-05,
      "loss": 0.8068,
      "step": 442060
    },
    {
      "epoch": 1.5493451373336558,
      "grad_norm": 3.015625,
      "learning_rate": 2.6864075225018237e-05,
      "loss": 0.7877,
      "step": 442070
    },
    {
      "epoch": 1.5493801848405515,
      "grad_norm": 3.171875,
      "learning_rate": 2.6863426196354535e-05,
      "loss": 0.8941,
      "step": 442080
    },
    {
      "epoch": 1.5494152323474468,
      "grad_norm": 2.96875,
      "learning_rate": 2.6862777167690833e-05,
      "loss": 0.8063,
      "step": 442090
    },
    {
      "epoch": 1.5494502798543426,
      "grad_norm": 3.15625,
      "learning_rate": 2.686212813902713e-05,
      "loss": 0.8694,
      "step": 442100
    },
    {
      "epoch": 1.5494853273612381,
      "grad_norm": 2.484375,
      "learning_rate": 2.686147911036343e-05,
      "loss": 0.7917,
      "step": 442110
    },
    {
      "epoch": 1.5495203748681337,
      "grad_norm": 2.5,
      "learning_rate": 2.6860830081699727e-05,
      "loss": 0.864,
      "step": 442120
    },
    {
      "epoch": 1.5495554223750294,
      "grad_norm": 3.21875,
      "learning_rate": 2.6860181053036025e-05,
      "loss": 0.8793,
      "step": 442130
    },
    {
      "epoch": 1.549590469881925,
      "grad_norm": 3.015625,
      "learning_rate": 2.6859532024372326e-05,
      "loss": 0.7926,
      "step": 442140
    },
    {
      "epoch": 1.5496255173888205,
      "grad_norm": 2.703125,
      "learning_rate": 2.6858882995708624e-05,
      "loss": 0.8883,
      "step": 442150
    },
    {
      "epoch": 1.5496605648957162,
      "grad_norm": 3.09375,
      "learning_rate": 2.6858233967044922e-05,
      "loss": 0.902,
      "step": 442160
    },
    {
      "epoch": 1.5496956124026118,
      "grad_norm": 2.84375,
      "learning_rate": 2.685758493838122e-05,
      "loss": 0.8913,
      "step": 442170
    },
    {
      "epoch": 1.5497306599095073,
      "grad_norm": 3.171875,
      "learning_rate": 2.6856935909717518e-05,
      "loss": 0.8448,
      "step": 442180
    },
    {
      "epoch": 1.549765707416403,
      "grad_norm": 3.1875,
      "learning_rate": 2.6856286881053816e-05,
      "loss": 0.8357,
      "step": 442190
    },
    {
      "epoch": 1.5498007549232984,
      "grad_norm": 2.890625,
      "learning_rate": 2.6855637852390114e-05,
      "loss": 0.8467,
      "step": 442200
    },
    {
      "epoch": 1.5498358024301941,
      "grad_norm": 2.890625,
      "learning_rate": 2.6854988823726412e-05,
      "loss": 0.8207,
      "step": 442210
    },
    {
      "epoch": 1.5498708499370897,
      "grad_norm": 2.65625,
      "learning_rate": 2.685433979506271e-05,
      "loss": 0.9277,
      "step": 442220
    },
    {
      "epoch": 1.5499058974439852,
      "grad_norm": 3.28125,
      "learning_rate": 2.6853690766399008e-05,
      "loss": 0.9011,
      "step": 442230
    },
    {
      "epoch": 1.549940944950881,
      "grad_norm": 3.0,
      "learning_rate": 2.6853041737735306e-05,
      "loss": 0.88,
      "step": 442240
    },
    {
      "epoch": 1.5499759924577765,
      "grad_norm": 3.0,
      "learning_rate": 2.6852392709071604e-05,
      "loss": 0.8085,
      "step": 442250
    },
    {
      "epoch": 1.550011039964672,
      "grad_norm": 2.671875,
      "learning_rate": 2.6851743680407902e-05,
      "loss": 0.8536,
      "step": 442260
    },
    {
      "epoch": 1.5500460874715678,
      "grad_norm": 2.96875,
      "learning_rate": 2.68510946517442e-05,
      "loss": 0.8493,
      "step": 442270
    },
    {
      "epoch": 1.5500811349784633,
      "grad_norm": 2.828125,
      "learning_rate": 2.68504456230805e-05,
      "loss": 0.8928,
      "step": 442280
    },
    {
      "epoch": 1.5501161824853589,
      "grad_norm": 3.28125,
      "learning_rate": 2.68497965944168e-05,
      "loss": 0.8644,
      "step": 442290
    },
    {
      "epoch": 1.5501512299922546,
      "grad_norm": 2.703125,
      "learning_rate": 2.6849147565753098e-05,
      "loss": 0.9034,
      "step": 442300
    },
    {
      "epoch": 1.55018627749915,
      "grad_norm": 2.84375,
      "learning_rate": 2.6848498537089396e-05,
      "loss": 0.8493,
      "step": 442310
    },
    {
      "epoch": 1.5502213250060457,
      "grad_norm": 2.90625,
      "learning_rate": 2.6847849508425694e-05,
      "loss": 0.8224,
      "step": 442320
    },
    {
      "epoch": 1.5502563725129412,
      "grad_norm": 3.203125,
      "learning_rate": 2.684720047976199e-05,
      "loss": 0.8475,
      "step": 442330
    },
    {
      "epoch": 1.5502914200198368,
      "grad_norm": 2.609375,
      "learning_rate": 2.684655145109829e-05,
      "loss": 0.8211,
      "step": 442340
    },
    {
      "epoch": 1.5503264675267325,
      "grad_norm": 3.546875,
      "learning_rate": 2.6845902422434588e-05,
      "loss": 0.8587,
      "step": 442350
    },
    {
      "epoch": 1.550361515033628,
      "grad_norm": 3.109375,
      "learning_rate": 2.6845253393770886e-05,
      "loss": 0.8578,
      "step": 442360
    },
    {
      "epoch": 1.5503965625405236,
      "grad_norm": 2.75,
      "learning_rate": 2.6844604365107184e-05,
      "loss": 0.8061,
      "step": 442370
    },
    {
      "epoch": 1.5504316100474194,
      "grad_norm": 2.953125,
      "learning_rate": 2.684395533644348e-05,
      "loss": 0.8773,
      "step": 442380
    },
    {
      "epoch": 1.550466657554315,
      "grad_norm": 2.625,
      "learning_rate": 2.684330630777978e-05,
      "loss": 0.87,
      "step": 442390
    },
    {
      "epoch": 1.5505017050612104,
      "grad_norm": 3.125,
      "learning_rate": 2.6842657279116074e-05,
      "loss": 0.863,
      "step": 442400
    },
    {
      "epoch": 1.5505367525681062,
      "grad_norm": 2.21875,
      "learning_rate": 2.6842008250452372e-05,
      "loss": 0.8468,
      "step": 442410
    },
    {
      "epoch": 1.5505718000750015,
      "grad_norm": 3.078125,
      "learning_rate": 2.684135922178867e-05,
      "loss": 0.8258,
      "step": 442420
    },
    {
      "epoch": 1.5506068475818973,
      "grad_norm": 3.03125,
      "learning_rate": 2.6840710193124968e-05,
      "loss": 0.7791,
      "step": 442430
    },
    {
      "epoch": 1.550641895088793,
      "grad_norm": 3.015625,
      "learning_rate": 2.6840061164461266e-05,
      "loss": 0.8574,
      "step": 442440
    },
    {
      "epoch": 1.5506769425956883,
      "grad_norm": 2.9375,
      "learning_rate": 2.6839412135797564e-05,
      "loss": 0.8072,
      "step": 442450
    },
    {
      "epoch": 1.550711990102584,
      "grad_norm": 2.78125,
      "learning_rate": 2.6838763107133862e-05,
      "loss": 0.7881,
      "step": 442460
    },
    {
      "epoch": 1.5507470376094796,
      "grad_norm": 2.734375,
      "learning_rate": 2.683811407847016e-05,
      "loss": 0.8785,
      "step": 442470
    },
    {
      "epoch": 1.5507820851163752,
      "grad_norm": 3.3125,
      "learning_rate": 2.6837465049806458e-05,
      "loss": 0.8287,
      "step": 442480
    },
    {
      "epoch": 1.550817132623271,
      "grad_norm": 3.015625,
      "learning_rate": 2.6836816021142756e-05,
      "loss": 0.8912,
      "step": 442490
    },
    {
      "epoch": 1.5508521801301665,
      "grad_norm": 2.859375,
      "learning_rate": 2.6836166992479058e-05,
      "loss": 0.8449,
      "step": 442500
    },
    {
      "epoch": 1.550887227637062,
      "grad_norm": 2.875,
      "learning_rate": 2.6835517963815356e-05,
      "loss": 0.8078,
      "step": 442510
    },
    {
      "epoch": 1.5509222751439578,
      "grad_norm": 3.0,
      "learning_rate": 2.6834868935151654e-05,
      "loss": 0.8016,
      "step": 442520
    },
    {
      "epoch": 1.550957322650853,
      "grad_norm": 2.75,
      "learning_rate": 2.683421990648795e-05,
      "loss": 0.7838,
      "step": 442530
    },
    {
      "epoch": 1.5509923701577488,
      "grad_norm": 2.859375,
      "learning_rate": 2.683357087782425e-05,
      "loss": 0.9082,
      "step": 442540
    },
    {
      "epoch": 1.5510274176646446,
      "grad_norm": 2.234375,
      "learning_rate": 2.6832921849160548e-05,
      "loss": 0.8318,
      "step": 442550
    },
    {
      "epoch": 1.55106246517154,
      "grad_norm": 2.53125,
      "learning_rate": 2.6832272820496846e-05,
      "loss": 0.8673,
      "step": 442560
    },
    {
      "epoch": 1.5510975126784357,
      "grad_norm": 2.515625,
      "learning_rate": 2.6831623791833144e-05,
      "loss": 0.7871,
      "step": 442570
    },
    {
      "epoch": 1.5511325601853312,
      "grad_norm": 2.703125,
      "learning_rate": 2.683097476316944e-05,
      "loss": 0.8278,
      "step": 442580
    },
    {
      "epoch": 1.5511676076922267,
      "grad_norm": 3.125,
      "learning_rate": 2.683032573450574e-05,
      "loss": 0.8959,
      "step": 442590
    },
    {
      "epoch": 1.5512026551991225,
      "grad_norm": 2.734375,
      "learning_rate": 2.6829676705842038e-05,
      "loss": 0.8316,
      "step": 442600
    },
    {
      "epoch": 1.551237702706018,
      "grad_norm": 2.953125,
      "learning_rate": 2.6829027677178336e-05,
      "loss": 0.7974,
      "step": 442610
    },
    {
      "epoch": 1.5512727502129136,
      "grad_norm": 2.75,
      "learning_rate": 2.6828378648514634e-05,
      "loss": 0.8879,
      "step": 442620
    },
    {
      "epoch": 1.5513077977198093,
      "grad_norm": 3.125,
      "learning_rate": 2.682772961985093e-05,
      "loss": 0.8119,
      "step": 442630
    },
    {
      "epoch": 1.5513428452267048,
      "grad_norm": 2.96875,
      "learning_rate": 2.6827080591187233e-05,
      "loss": 0.8299,
      "step": 442640
    },
    {
      "epoch": 1.5513778927336004,
      "grad_norm": 2.671875,
      "learning_rate": 2.682643156252353e-05,
      "loss": 0.8394,
      "step": 442650
    },
    {
      "epoch": 1.5514129402404961,
      "grad_norm": 3.0,
      "learning_rate": 2.682578253385983e-05,
      "loss": 0.7783,
      "step": 442660
    },
    {
      "epoch": 1.5514479877473915,
      "grad_norm": 2.765625,
      "learning_rate": 2.6825133505196127e-05,
      "loss": 0.8462,
      "step": 442670
    },
    {
      "epoch": 1.5514830352542872,
      "grad_norm": 2.890625,
      "learning_rate": 2.6824484476532425e-05,
      "loss": 0.8144,
      "step": 442680
    },
    {
      "epoch": 1.5515180827611827,
      "grad_norm": 3.59375,
      "learning_rate": 2.6823835447868723e-05,
      "loss": 0.7551,
      "step": 442690
    },
    {
      "epoch": 1.5515531302680783,
      "grad_norm": 3.40625,
      "learning_rate": 2.682318641920502e-05,
      "loss": 0.9026,
      "step": 442700
    },
    {
      "epoch": 1.551588177774974,
      "grad_norm": 2.9375,
      "learning_rate": 2.682253739054132e-05,
      "loss": 0.8657,
      "step": 442710
    },
    {
      "epoch": 1.5516232252818696,
      "grad_norm": 2.40625,
      "learning_rate": 2.6821888361877617e-05,
      "loss": 0.8373,
      "step": 442720
    },
    {
      "epoch": 1.5516582727887651,
      "grad_norm": 2.96875,
      "learning_rate": 2.6821239333213915e-05,
      "loss": 0.8865,
      "step": 442730
    },
    {
      "epoch": 1.5516933202956609,
      "grad_norm": 3.265625,
      "learning_rate": 2.6820590304550213e-05,
      "loss": 0.8591,
      "step": 442740
    },
    {
      "epoch": 1.5517283678025564,
      "grad_norm": 2.84375,
      "learning_rate": 2.681994127588651e-05,
      "loss": 0.8139,
      "step": 442750
    },
    {
      "epoch": 1.551763415309452,
      "grad_norm": 2.6875,
      "learning_rate": 2.681929224722281e-05,
      "loss": 0.8097,
      "step": 442760
    },
    {
      "epoch": 1.5517984628163477,
      "grad_norm": 3.171875,
      "learning_rate": 2.6818643218559104e-05,
      "loss": 0.7663,
      "step": 442770
    },
    {
      "epoch": 1.551833510323243,
      "grad_norm": 2.765625,
      "learning_rate": 2.68179941898954e-05,
      "loss": 0.9092,
      "step": 442780
    },
    {
      "epoch": 1.5518685578301388,
      "grad_norm": 2.75,
      "learning_rate": 2.68173451612317e-05,
      "loss": 0.8448,
      "step": 442790
    },
    {
      "epoch": 1.5519036053370343,
      "grad_norm": 3.109375,
      "learning_rate": 2.6816696132567998e-05,
      "loss": 0.766,
      "step": 442800
    },
    {
      "epoch": 1.5519386528439298,
      "grad_norm": 2.875,
      "learning_rate": 2.6816047103904296e-05,
      "loss": 0.871,
      "step": 442810
    },
    {
      "epoch": 1.5519737003508256,
      "grad_norm": 3.296875,
      "learning_rate": 2.6815398075240594e-05,
      "loss": 0.9412,
      "step": 442820
    },
    {
      "epoch": 1.5520087478577211,
      "grad_norm": 3.359375,
      "learning_rate": 2.681474904657689e-05,
      "loss": 0.8258,
      "step": 442830
    },
    {
      "epoch": 1.5520437953646167,
      "grad_norm": 2.765625,
      "learning_rate": 2.681410001791319e-05,
      "loss": 0.8538,
      "step": 442840
    },
    {
      "epoch": 1.5520788428715124,
      "grad_norm": 3.21875,
      "learning_rate": 2.6813450989249488e-05,
      "loss": 0.769,
      "step": 442850
    },
    {
      "epoch": 1.552113890378408,
      "grad_norm": 3.015625,
      "learning_rate": 2.6812801960585786e-05,
      "loss": 0.868,
      "step": 442860
    },
    {
      "epoch": 1.5521489378853035,
      "grad_norm": 2.65625,
      "learning_rate": 2.6812152931922087e-05,
      "loss": 0.8664,
      "step": 442870
    },
    {
      "epoch": 1.5521839853921993,
      "grad_norm": 3.3125,
      "learning_rate": 2.6811503903258385e-05,
      "loss": 0.7903,
      "step": 442880
    },
    {
      "epoch": 1.5522190328990946,
      "grad_norm": 3.0,
      "learning_rate": 2.6810854874594683e-05,
      "loss": 0.8764,
      "step": 442890
    },
    {
      "epoch": 1.5522540804059903,
      "grad_norm": 3.3125,
      "learning_rate": 2.681020584593098e-05,
      "loss": 0.9193,
      "step": 442900
    },
    {
      "epoch": 1.5522891279128859,
      "grad_norm": 3.0625,
      "learning_rate": 2.680955681726728e-05,
      "loss": 0.7736,
      "step": 442910
    },
    {
      "epoch": 1.5523241754197814,
      "grad_norm": 2.578125,
      "learning_rate": 2.6808907788603577e-05,
      "loss": 0.8729,
      "step": 442920
    },
    {
      "epoch": 1.5523592229266772,
      "grad_norm": 3.0625,
      "learning_rate": 2.6808258759939875e-05,
      "loss": 0.8149,
      "step": 442930
    },
    {
      "epoch": 1.5523942704335727,
      "grad_norm": 2.6875,
      "learning_rate": 2.6807609731276173e-05,
      "loss": 0.8327,
      "step": 442940
    },
    {
      "epoch": 1.5524293179404682,
      "grad_norm": 2.890625,
      "learning_rate": 2.680696070261247e-05,
      "loss": 0.8101,
      "step": 442950
    },
    {
      "epoch": 1.552464365447364,
      "grad_norm": 2.75,
      "learning_rate": 2.680631167394877e-05,
      "loss": 0.8562,
      "step": 442960
    },
    {
      "epoch": 1.5524994129542595,
      "grad_norm": 3.0,
      "learning_rate": 2.6805662645285067e-05,
      "loss": 0.8435,
      "step": 442970
    },
    {
      "epoch": 1.552534460461155,
      "grad_norm": 3.296875,
      "learning_rate": 2.6805013616621365e-05,
      "loss": 0.8491,
      "step": 442980
    },
    {
      "epoch": 1.5525695079680508,
      "grad_norm": 2.765625,
      "learning_rate": 2.6804364587957663e-05,
      "loss": 0.8142,
      "step": 442990
    },
    {
      "epoch": 1.5526045554749461,
      "grad_norm": 2.625,
      "learning_rate": 2.680371555929396e-05,
      "loss": 0.7576,
      "step": 443000
    },
    {
      "epoch": 1.552639602981842,
      "grad_norm": 3.03125,
      "learning_rate": 2.6803066530630262e-05,
      "loss": 0.8295,
      "step": 443010
    },
    {
      "epoch": 1.5526746504887374,
      "grad_norm": 2.890625,
      "learning_rate": 2.680241750196656e-05,
      "loss": 0.8985,
      "step": 443020
    },
    {
      "epoch": 1.552709697995633,
      "grad_norm": 2.8125,
      "learning_rate": 2.680176847330286e-05,
      "loss": 0.8006,
      "step": 443030
    },
    {
      "epoch": 1.5527447455025287,
      "grad_norm": 2.984375,
      "learning_rate": 2.6801119444639156e-05,
      "loss": 0.8041,
      "step": 443040
    },
    {
      "epoch": 1.5527797930094243,
      "grad_norm": 2.6875,
      "learning_rate": 2.6800470415975454e-05,
      "loss": 0.8123,
      "step": 443050
    },
    {
      "epoch": 1.5528148405163198,
      "grad_norm": 2.4375,
      "learning_rate": 2.6799821387311752e-05,
      "loss": 0.817,
      "step": 443060
    },
    {
      "epoch": 1.5528498880232156,
      "grad_norm": 3.21875,
      "learning_rate": 2.679917235864805e-05,
      "loss": 0.9011,
      "step": 443070
    },
    {
      "epoch": 1.552884935530111,
      "grad_norm": 2.71875,
      "learning_rate": 2.679852332998435e-05,
      "loss": 0.8302,
      "step": 443080
    },
    {
      "epoch": 1.5529199830370066,
      "grad_norm": 2.5625,
      "learning_rate": 2.6797874301320646e-05,
      "loss": 0.7894,
      "step": 443090
    },
    {
      "epoch": 1.5529550305439024,
      "grad_norm": 3.1875,
      "learning_rate": 2.6797225272656944e-05,
      "loss": 0.9069,
      "step": 443100
    },
    {
      "epoch": 1.5529900780507977,
      "grad_norm": 3.546875,
      "learning_rate": 2.6796576243993242e-05,
      "loss": 0.9113,
      "step": 443110
    },
    {
      "epoch": 1.5530251255576935,
      "grad_norm": 3.375,
      "learning_rate": 2.679592721532954e-05,
      "loss": 0.789,
      "step": 443120
    },
    {
      "epoch": 1.5530601730645892,
      "grad_norm": 3.1875,
      "learning_rate": 2.679527818666584e-05,
      "loss": 0.9306,
      "step": 443130
    },
    {
      "epoch": 1.5530952205714845,
      "grad_norm": 3.421875,
      "learning_rate": 2.6794629158002136e-05,
      "loss": 0.808,
      "step": 443140
    },
    {
      "epoch": 1.5531302680783803,
      "grad_norm": 3.03125,
      "learning_rate": 2.679398012933843e-05,
      "loss": 0.8561,
      "step": 443150
    },
    {
      "epoch": 1.5531653155852758,
      "grad_norm": 3.3125,
      "learning_rate": 2.679333110067473e-05,
      "loss": 0.8492,
      "step": 443160
    },
    {
      "epoch": 1.5532003630921714,
      "grad_norm": 3.484375,
      "learning_rate": 2.6792682072011027e-05,
      "loss": 0.8733,
      "step": 443170
    },
    {
      "epoch": 1.5532354105990671,
      "grad_norm": 3.359375,
      "learning_rate": 2.6792033043347325e-05,
      "loss": 0.8455,
      "step": 443180
    },
    {
      "epoch": 1.5532704581059626,
      "grad_norm": 2.84375,
      "learning_rate": 2.6791384014683623e-05,
      "loss": 0.8058,
      "step": 443190
    },
    {
      "epoch": 1.5533055056128582,
      "grad_norm": 2.6875,
      "learning_rate": 2.679073498601992e-05,
      "loss": 0.8648,
      "step": 443200
    },
    {
      "epoch": 1.553340553119754,
      "grad_norm": 2.859375,
      "learning_rate": 2.679008595735622e-05,
      "loss": 0.8666,
      "step": 443210
    },
    {
      "epoch": 1.5533756006266495,
      "grad_norm": 3.140625,
      "learning_rate": 2.6789436928692517e-05,
      "loss": 0.7775,
      "step": 443220
    },
    {
      "epoch": 1.553410648133545,
      "grad_norm": 2.90625,
      "learning_rate": 2.6788787900028815e-05,
      "loss": 0.7944,
      "step": 443230
    },
    {
      "epoch": 1.5534456956404408,
      "grad_norm": 2.84375,
      "learning_rate": 2.6788138871365116e-05,
      "loss": 0.8047,
      "step": 443240
    },
    {
      "epoch": 1.553480743147336,
      "grad_norm": 3.03125,
      "learning_rate": 2.6787489842701414e-05,
      "loss": 0.864,
      "step": 443250
    },
    {
      "epoch": 1.5535157906542318,
      "grad_norm": 2.828125,
      "learning_rate": 2.6786840814037712e-05,
      "loss": 0.8421,
      "step": 443260
    },
    {
      "epoch": 1.5535508381611274,
      "grad_norm": 3.015625,
      "learning_rate": 2.678619178537401e-05,
      "loss": 0.8473,
      "step": 443270
    },
    {
      "epoch": 1.553585885668023,
      "grad_norm": 2.875,
      "learning_rate": 2.678554275671031e-05,
      "loss": 0.8467,
      "step": 443280
    },
    {
      "epoch": 1.5536209331749187,
      "grad_norm": 3.25,
      "learning_rate": 2.6784893728046606e-05,
      "loss": 0.9158,
      "step": 443290
    },
    {
      "epoch": 1.5536559806818142,
      "grad_norm": 2.765625,
      "learning_rate": 2.6784244699382904e-05,
      "loss": 0.9044,
      "step": 443300
    },
    {
      "epoch": 1.5536910281887097,
      "grad_norm": 2.765625,
      "learning_rate": 2.6783595670719202e-05,
      "loss": 0.7818,
      "step": 443310
    },
    {
      "epoch": 1.5537260756956055,
      "grad_norm": 2.921875,
      "learning_rate": 2.67829466420555e-05,
      "loss": 0.8412,
      "step": 443320
    },
    {
      "epoch": 1.553761123202501,
      "grad_norm": 2.875,
      "learning_rate": 2.67822976133918e-05,
      "loss": 0.8308,
      "step": 443330
    },
    {
      "epoch": 1.5537961707093966,
      "grad_norm": 2.640625,
      "learning_rate": 2.6781648584728096e-05,
      "loss": 0.8271,
      "step": 443340
    },
    {
      "epoch": 1.5538312182162923,
      "grad_norm": 3.15625,
      "learning_rate": 2.6780999556064394e-05,
      "loss": 0.7772,
      "step": 443350
    },
    {
      "epoch": 1.5538662657231876,
      "grad_norm": 2.859375,
      "learning_rate": 2.6780350527400692e-05,
      "loss": 0.79,
      "step": 443360
    },
    {
      "epoch": 1.5539013132300834,
      "grad_norm": 2.46875,
      "learning_rate": 2.677970149873699e-05,
      "loss": 0.8793,
      "step": 443370
    },
    {
      "epoch": 1.553936360736979,
      "grad_norm": 3.34375,
      "learning_rate": 2.6779052470073292e-05,
      "loss": 0.8132,
      "step": 443380
    },
    {
      "epoch": 1.5539714082438745,
      "grad_norm": 3.34375,
      "learning_rate": 2.677840344140959e-05,
      "loss": 0.9123,
      "step": 443390
    },
    {
      "epoch": 1.5540064557507702,
      "grad_norm": 2.640625,
      "learning_rate": 2.6777754412745888e-05,
      "loss": 0.7716,
      "step": 443400
    },
    {
      "epoch": 1.5540415032576658,
      "grad_norm": 3.15625,
      "learning_rate": 2.6777105384082186e-05,
      "loss": 0.8546,
      "step": 443410
    },
    {
      "epoch": 1.5540765507645613,
      "grad_norm": 2.9375,
      "learning_rate": 2.6776456355418484e-05,
      "loss": 0.7969,
      "step": 443420
    },
    {
      "epoch": 1.554111598271457,
      "grad_norm": 2.734375,
      "learning_rate": 2.6775807326754782e-05,
      "loss": 0.765,
      "step": 443430
    },
    {
      "epoch": 1.5541466457783526,
      "grad_norm": 3.09375,
      "learning_rate": 2.677515829809108e-05,
      "loss": 0.8498,
      "step": 443440
    },
    {
      "epoch": 1.5541816932852481,
      "grad_norm": 2.734375,
      "learning_rate": 2.6774509269427378e-05,
      "loss": 0.8622,
      "step": 443450
    },
    {
      "epoch": 1.554216740792144,
      "grad_norm": 3.125,
      "learning_rate": 2.6773860240763676e-05,
      "loss": 0.8219,
      "step": 443460
    },
    {
      "epoch": 1.5542517882990392,
      "grad_norm": 2.53125,
      "learning_rate": 2.6773211212099974e-05,
      "loss": 0.8751,
      "step": 443470
    },
    {
      "epoch": 1.554286835805935,
      "grad_norm": 2.875,
      "learning_rate": 2.6772562183436272e-05,
      "loss": 0.8785,
      "step": 443480
    },
    {
      "epoch": 1.5543218833128305,
      "grad_norm": 2.703125,
      "learning_rate": 2.677191315477257e-05,
      "loss": 0.8438,
      "step": 443490
    },
    {
      "epoch": 1.554356930819726,
      "grad_norm": 3.5,
      "learning_rate": 2.6771264126108868e-05,
      "loss": 0.8516,
      "step": 443500
    },
    {
      "epoch": 1.5543919783266218,
      "grad_norm": 3.0625,
      "learning_rate": 2.677061509744517e-05,
      "loss": 0.901,
      "step": 443510
    },
    {
      "epoch": 1.5544270258335173,
      "grad_norm": 2.46875,
      "learning_rate": 2.676996606878146e-05,
      "loss": 0.7474,
      "step": 443520
    },
    {
      "epoch": 1.5544620733404129,
      "grad_norm": 2.921875,
      "learning_rate": 2.676931704011776e-05,
      "loss": 0.8636,
      "step": 443530
    },
    {
      "epoch": 1.5544971208473086,
      "grad_norm": 2.96875,
      "learning_rate": 2.6768668011454056e-05,
      "loss": 0.872,
      "step": 443540
    },
    {
      "epoch": 1.5545321683542042,
      "grad_norm": 2.59375,
      "learning_rate": 2.6768018982790354e-05,
      "loss": 0.8333,
      "step": 443550
    },
    {
      "epoch": 1.5545672158610997,
      "grad_norm": 2.875,
      "learning_rate": 2.6767369954126652e-05,
      "loss": 0.8688,
      "step": 443560
    },
    {
      "epoch": 1.5546022633679955,
      "grad_norm": 2.5625,
      "learning_rate": 2.676672092546295e-05,
      "loss": 0.8103,
      "step": 443570
    },
    {
      "epoch": 1.5546373108748908,
      "grad_norm": 2.59375,
      "learning_rate": 2.676607189679925e-05,
      "loss": 0.8202,
      "step": 443580
    },
    {
      "epoch": 1.5546723583817865,
      "grad_norm": 3.109375,
      "learning_rate": 2.6765422868135546e-05,
      "loss": 0.8589,
      "step": 443590
    },
    {
      "epoch": 1.554707405888682,
      "grad_norm": 2.765625,
      "learning_rate": 2.6764773839471848e-05,
      "loss": 0.9146,
      "step": 443600
    },
    {
      "epoch": 1.5547424533955776,
      "grad_norm": 2.65625,
      "learning_rate": 2.6764124810808146e-05,
      "loss": 0.9013,
      "step": 443610
    },
    {
      "epoch": 1.5547775009024734,
      "grad_norm": 3.203125,
      "learning_rate": 2.6763475782144444e-05,
      "loss": 0.8872,
      "step": 443620
    },
    {
      "epoch": 1.5548125484093689,
      "grad_norm": 2.96875,
      "learning_rate": 2.6762826753480742e-05,
      "loss": 0.7505,
      "step": 443630
    },
    {
      "epoch": 1.5548475959162644,
      "grad_norm": 2.796875,
      "learning_rate": 2.676217772481704e-05,
      "loss": 0.8951,
      "step": 443640
    },
    {
      "epoch": 1.5548826434231602,
      "grad_norm": 3.15625,
      "learning_rate": 2.6761528696153338e-05,
      "loss": 0.8748,
      "step": 443650
    },
    {
      "epoch": 1.5549176909300557,
      "grad_norm": 3.25,
      "learning_rate": 2.6760879667489636e-05,
      "loss": 0.7664,
      "step": 443660
    },
    {
      "epoch": 1.5549527384369513,
      "grad_norm": 3.171875,
      "learning_rate": 2.6760230638825934e-05,
      "loss": 0.7892,
      "step": 443670
    },
    {
      "epoch": 1.554987785943847,
      "grad_norm": 2.9375,
      "learning_rate": 2.6759581610162232e-05,
      "loss": 0.8115,
      "step": 443680
    },
    {
      "epoch": 1.5550228334507423,
      "grad_norm": 3.390625,
      "learning_rate": 2.675893258149853e-05,
      "loss": 0.8166,
      "step": 443690
    },
    {
      "epoch": 1.555057880957638,
      "grad_norm": 3.0,
      "learning_rate": 2.6758283552834828e-05,
      "loss": 0.8504,
      "step": 443700
    },
    {
      "epoch": 1.5550929284645338,
      "grad_norm": 3.125,
      "learning_rate": 2.6757634524171126e-05,
      "loss": 0.818,
      "step": 443710
    },
    {
      "epoch": 1.5551279759714292,
      "grad_norm": 2.859375,
      "learning_rate": 2.6756985495507424e-05,
      "loss": 0.8251,
      "step": 443720
    },
    {
      "epoch": 1.555163023478325,
      "grad_norm": 3.265625,
      "learning_rate": 2.6756336466843722e-05,
      "loss": 0.763,
      "step": 443730
    },
    {
      "epoch": 1.5551980709852204,
      "grad_norm": 2.421875,
      "learning_rate": 2.6755687438180023e-05,
      "loss": 0.8325,
      "step": 443740
    },
    {
      "epoch": 1.555233118492116,
      "grad_norm": 2.96875,
      "learning_rate": 2.675503840951632e-05,
      "loss": 0.8434,
      "step": 443750
    },
    {
      "epoch": 1.5552681659990117,
      "grad_norm": 2.828125,
      "learning_rate": 2.675438938085262e-05,
      "loss": 0.8586,
      "step": 443760
    },
    {
      "epoch": 1.5553032135059073,
      "grad_norm": 3.0625,
      "learning_rate": 2.6753740352188917e-05,
      "loss": 0.8809,
      "step": 443770
    },
    {
      "epoch": 1.5553382610128028,
      "grad_norm": 2.765625,
      "learning_rate": 2.6753091323525215e-05,
      "loss": 0.8873,
      "step": 443780
    },
    {
      "epoch": 1.5553733085196986,
      "grad_norm": 2.78125,
      "learning_rate": 2.6752442294861513e-05,
      "loss": 0.7716,
      "step": 443790
    },
    {
      "epoch": 1.5554083560265939,
      "grad_norm": 2.875,
      "learning_rate": 2.675179326619781e-05,
      "loss": 0.8005,
      "step": 443800
    },
    {
      "epoch": 1.5554434035334896,
      "grad_norm": 3.109375,
      "learning_rate": 2.675114423753411e-05,
      "loss": 0.8521,
      "step": 443810
    },
    {
      "epoch": 1.5554784510403854,
      "grad_norm": 2.84375,
      "learning_rate": 2.6750495208870407e-05,
      "loss": 0.8004,
      "step": 443820
    },
    {
      "epoch": 1.5555134985472807,
      "grad_norm": 3.0,
      "learning_rate": 2.6749846180206705e-05,
      "loss": 0.8357,
      "step": 443830
    },
    {
      "epoch": 1.5555485460541765,
      "grad_norm": 3.4375,
      "learning_rate": 2.6749197151543003e-05,
      "loss": 0.7708,
      "step": 443840
    },
    {
      "epoch": 1.555583593561072,
      "grad_norm": 3.5625,
      "learning_rate": 2.67485481228793e-05,
      "loss": 0.7818,
      "step": 443850
    },
    {
      "epoch": 1.5556186410679675,
      "grad_norm": 2.96875,
      "learning_rate": 2.67478990942156e-05,
      "loss": 0.8977,
      "step": 443860
    },
    {
      "epoch": 1.5556536885748633,
      "grad_norm": 2.9375,
      "learning_rate": 2.6747250065551897e-05,
      "loss": 0.8423,
      "step": 443870
    },
    {
      "epoch": 1.5556887360817588,
      "grad_norm": 2.59375,
      "learning_rate": 2.67466010368882e-05,
      "loss": 0.7986,
      "step": 443880
    },
    {
      "epoch": 1.5557237835886544,
      "grad_norm": 3.359375,
      "learning_rate": 2.6745952008224497e-05,
      "loss": 0.8251,
      "step": 443890
    },
    {
      "epoch": 1.5557588310955501,
      "grad_norm": 2.84375,
      "learning_rate": 2.6745302979560788e-05,
      "loss": 0.8528,
      "step": 443900
    },
    {
      "epoch": 1.5557938786024457,
      "grad_norm": 2.90625,
      "learning_rate": 2.6744653950897086e-05,
      "loss": 0.7878,
      "step": 443910
    },
    {
      "epoch": 1.5558289261093412,
      "grad_norm": 3.09375,
      "learning_rate": 2.6744004922233384e-05,
      "loss": 0.9138,
      "step": 443920
    },
    {
      "epoch": 1.555863973616237,
      "grad_norm": 2.890625,
      "learning_rate": 2.6743355893569682e-05,
      "loss": 0.8749,
      "step": 443930
    },
    {
      "epoch": 1.5558990211231323,
      "grad_norm": 2.65625,
      "learning_rate": 2.674270686490598e-05,
      "loss": 0.9555,
      "step": 443940
    },
    {
      "epoch": 1.555934068630028,
      "grad_norm": 2.75,
      "learning_rate": 2.6742057836242278e-05,
      "loss": 0.9097,
      "step": 443950
    },
    {
      "epoch": 1.5559691161369236,
      "grad_norm": 2.96875,
      "learning_rate": 2.6741408807578576e-05,
      "loss": 0.9088,
      "step": 443960
    },
    {
      "epoch": 1.556004163643819,
      "grad_norm": 2.34375,
      "learning_rate": 2.6740759778914877e-05,
      "loss": 0.7304,
      "step": 443970
    },
    {
      "epoch": 1.5560392111507149,
      "grad_norm": 2.671875,
      "learning_rate": 2.6740110750251175e-05,
      "loss": 0.8796,
      "step": 443980
    },
    {
      "epoch": 1.5560742586576104,
      "grad_norm": 2.640625,
      "learning_rate": 2.6739461721587473e-05,
      "loss": 0.854,
      "step": 443990
    },
    {
      "epoch": 1.556109306164506,
      "grad_norm": 3.140625,
      "learning_rate": 2.673881269292377e-05,
      "loss": 0.797,
      "step": 444000
    },
    {
      "epoch": 1.5561443536714017,
      "grad_norm": 3.015625,
      "learning_rate": 2.673816366426007e-05,
      "loss": 0.8761,
      "step": 444010
    },
    {
      "epoch": 1.5561794011782972,
      "grad_norm": 2.828125,
      "learning_rate": 2.6737514635596367e-05,
      "loss": 0.8299,
      "step": 444020
    },
    {
      "epoch": 1.5562144486851928,
      "grad_norm": 2.515625,
      "learning_rate": 2.6736865606932665e-05,
      "loss": 0.806,
      "step": 444030
    },
    {
      "epoch": 1.5562494961920885,
      "grad_norm": 2.984375,
      "learning_rate": 2.6736216578268963e-05,
      "loss": 0.8978,
      "step": 444040
    },
    {
      "epoch": 1.5562845436989838,
      "grad_norm": 2.953125,
      "learning_rate": 2.673556754960526e-05,
      "loss": 0.8152,
      "step": 444050
    },
    {
      "epoch": 1.5563195912058796,
      "grad_norm": 3.4375,
      "learning_rate": 2.673491852094156e-05,
      "loss": 0.9095,
      "step": 444060
    },
    {
      "epoch": 1.5563546387127751,
      "grad_norm": 2.734375,
      "learning_rate": 2.6734269492277857e-05,
      "loss": 0.8308,
      "step": 444070
    },
    {
      "epoch": 1.5563896862196707,
      "grad_norm": 2.953125,
      "learning_rate": 2.6733620463614155e-05,
      "loss": 0.8412,
      "step": 444080
    },
    {
      "epoch": 1.5564247337265664,
      "grad_norm": 3.125,
      "learning_rate": 2.6732971434950453e-05,
      "loss": 0.8632,
      "step": 444090
    },
    {
      "epoch": 1.556459781233462,
      "grad_norm": 3.046875,
      "learning_rate": 2.673232240628675e-05,
      "loss": 0.932,
      "step": 444100
    },
    {
      "epoch": 1.5564948287403575,
      "grad_norm": 2.78125,
      "learning_rate": 2.6731673377623053e-05,
      "loss": 0.805,
      "step": 444110
    },
    {
      "epoch": 1.5565298762472533,
      "grad_norm": 3.65625,
      "learning_rate": 2.673102434895935e-05,
      "loss": 0.8661,
      "step": 444120
    },
    {
      "epoch": 1.5565649237541488,
      "grad_norm": 3.109375,
      "learning_rate": 2.673037532029565e-05,
      "loss": 0.8298,
      "step": 444130
    },
    {
      "epoch": 1.5565999712610443,
      "grad_norm": 3.234375,
      "learning_rate": 2.6729726291631947e-05,
      "loss": 0.8439,
      "step": 444140
    },
    {
      "epoch": 1.55663501876794,
      "grad_norm": 2.71875,
      "learning_rate": 2.6729077262968245e-05,
      "loss": 0.8361,
      "step": 444150
    },
    {
      "epoch": 1.5566700662748354,
      "grad_norm": 3.03125,
      "learning_rate": 2.6728428234304543e-05,
      "loss": 0.8341,
      "step": 444160
    },
    {
      "epoch": 1.5567051137817312,
      "grad_norm": 2.671875,
      "learning_rate": 2.672777920564084e-05,
      "loss": 0.8686,
      "step": 444170
    },
    {
      "epoch": 1.5567401612886267,
      "grad_norm": 2.734375,
      "learning_rate": 2.672713017697714e-05,
      "loss": 0.7589,
      "step": 444180
    },
    {
      "epoch": 1.5567752087955222,
      "grad_norm": 2.75,
      "learning_rate": 2.6726481148313437e-05,
      "loss": 0.8446,
      "step": 444190
    },
    {
      "epoch": 1.556810256302418,
      "grad_norm": 3.109375,
      "learning_rate": 2.6725832119649735e-05,
      "loss": 0.7869,
      "step": 444200
    },
    {
      "epoch": 1.5568453038093135,
      "grad_norm": 3.53125,
      "learning_rate": 2.6725183090986033e-05,
      "loss": 0.9361,
      "step": 444210
    },
    {
      "epoch": 1.556880351316209,
      "grad_norm": 3.09375,
      "learning_rate": 2.672453406232233e-05,
      "loss": 0.8278,
      "step": 444220
    },
    {
      "epoch": 1.5569153988231048,
      "grad_norm": 2.796875,
      "learning_rate": 2.672388503365863e-05,
      "loss": 0.8976,
      "step": 444230
    },
    {
      "epoch": 1.5569504463300003,
      "grad_norm": 2.6875,
      "learning_rate": 2.6723236004994927e-05,
      "loss": 0.7228,
      "step": 444240
    },
    {
      "epoch": 1.5569854938368959,
      "grad_norm": 3.0,
      "learning_rate": 2.6722586976331228e-05,
      "loss": 0.8805,
      "step": 444250
    },
    {
      "epoch": 1.5570205413437916,
      "grad_norm": 2.984375,
      "learning_rate": 2.6721937947667526e-05,
      "loss": 0.8534,
      "step": 444260
    },
    {
      "epoch": 1.557055588850687,
      "grad_norm": 3.015625,
      "learning_rate": 2.6721288919003824e-05,
      "loss": 0.8599,
      "step": 444270
    },
    {
      "epoch": 1.5570906363575827,
      "grad_norm": 2.71875,
      "learning_rate": 2.6720639890340115e-05,
      "loss": 0.836,
      "step": 444280
    },
    {
      "epoch": 1.5571256838644782,
      "grad_norm": 2.921875,
      "learning_rate": 2.6719990861676413e-05,
      "loss": 0.8784,
      "step": 444290
    },
    {
      "epoch": 1.5571607313713738,
      "grad_norm": 2.953125,
      "learning_rate": 2.671934183301271e-05,
      "loss": 0.8222,
      "step": 444300
    },
    {
      "epoch": 1.5571957788782695,
      "grad_norm": 2.859375,
      "learning_rate": 2.671869280434901e-05,
      "loss": 0.7609,
      "step": 444310
    },
    {
      "epoch": 1.557230826385165,
      "grad_norm": 3.03125,
      "learning_rate": 2.6718043775685307e-05,
      "loss": 0.8591,
      "step": 444320
    },
    {
      "epoch": 1.5572658738920606,
      "grad_norm": 3.171875,
      "learning_rate": 2.6717394747021605e-05,
      "loss": 0.7768,
      "step": 444330
    },
    {
      "epoch": 1.5573009213989564,
      "grad_norm": 3.140625,
      "learning_rate": 2.6716745718357907e-05,
      "loss": 0.8007,
      "step": 444340
    },
    {
      "epoch": 1.557335968905852,
      "grad_norm": 2.9375,
      "learning_rate": 2.6716096689694205e-05,
      "loss": 0.8993,
      "step": 444350
    },
    {
      "epoch": 1.5573710164127474,
      "grad_norm": 2.875,
      "learning_rate": 2.6715447661030503e-05,
      "loss": 0.8046,
      "step": 444360
    },
    {
      "epoch": 1.5574060639196432,
      "grad_norm": 2.65625,
      "learning_rate": 2.67147986323668e-05,
      "loss": 0.7588,
      "step": 444370
    },
    {
      "epoch": 1.5574411114265385,
      "grad_norm": 2.65625,
      "learning_rate": 2.67141496037031e-05,
      "loss": 0.8615,
      "step": 444380
    },
    {
      "epoch": 1.5574761589334343,
      "grad_norm": 2.65625,
      "learning_rate": 2.6713500575039397e-05,
      "loss": 0.8064,
      "step": 444390
    },
    {
      "epoch": 1.55751120644033,
      "grad_norm": 2.84375,
      "learning_rate": 2.6712851546375695e-05,
      "loss": 0.8775,
      "step": 444400
    },
    {
      "epoch": 1.5575462539472253,
      "grad_norm": 2.890625,
      "learning_rate": 2.6712202517711993e-05,
      "loss": 0.8924,
      "step": 444410
    },
    {
      "epoch": 1.557581301454121,
      "grad_norm": 2.28125,
      "learning_rate": 2.671155348904829e-05,
      "loss": 0.9282,
      "step": 444420
    },
    {
      "epoch": 1.5576163489610166,
      "grad_norm": 2.875,
      "learning_rate": 2.671090446038459e-05,
      "loss": 0.8845,
      "step": 444430
    },
    {
      "epoch": 1.5576513964679122,
      "grad_norm": 2.765625,
      "learning_rate": 2.6710255431720887e-05,
      "loss": 0.8562,
      "step": 444440
    },
    {
      "epoch": 1.557686443974808,
      "grad_norm": 2.140625,
      "learning_rate": 2.6709606403057185e-05,
      "loss": 0.7495,
      "step": 444450
    },
    {
      "epoch": 1.5577214914817035,
      "grad_norm": 2.703125,
      "learning_rate": 2.6708957374393483e-05,
      "loss": 0.7917,
      "step": 444460
    },
    {
      "epoch": 1.557756538988599,
      "grad_norm": 2.1875,
      "learning_rate": 2.6708308345729784e-05,
      "loss": 0.7623,
      "step": 444470
    },
    {
      "epoch": 1.5577915864954948,
      "grad_norm": 3.1875,
      "learning_rate": 2.6707659317066082e-05,
      "loss": 0.8303,
      "step": 444480
    },
    {
      "epoch": 1.55782663400239,
      "grad_norm": 2.3125,
      "learning_rate": 2.670701028840238e-05,
      "loss": 0.8255,
      "step": 444490
    },
    {
      "epoch": 1.5578616815092858,
      "grad_norm": 3.265625,
      "learning_rate": 2.6706361259738678e-05,
      "loss": 0.8193,
      "step": 444500
    },
    {
      "epoch": 1.5578967290161816,
      "grad_norm": 3.453125,
      "learning_rate": 2.6705712231074976e-05,
      "loss": 0.8194,
      "step": 444510
    },
    {
      "epoch": 1.557931776523077,
      "grad_norm": 2.734375,
      "learning_rate": 2.6705063202411274e-05,
      "loss": 0.7592,
      "step": 444520
    },
    {
      "epoch": 1.5579668240299727,
      "grad_norm": 2.40625,
      "learning_rate": 2.6704414173747572e-05,
      "loss": 0.8949,
      "step": 444530
    },
    {
      "epoch": 1.5580018715368682,
      "grad_norm": 3.15625,
      "learning_rate": 2.670376514508387e-05,
      "loss": 0.8405,
      "step": 444540
    },
    {
      "epoch": 1.5580369190437637,
      "grad_norm": 3.078125,
      "learning_rate": 2.6703116116420168e-05,
      "loss": 0.8065,
      "step": 444550
    },
    {
      "epoch": 1.5580719665506595,
      "grad_norm": 3.140625,
      "learning_rate": 2.6702467087756466e-05,
      "loss": 0.8102,
      "step": 444560
    },
    {
      "epoch": 1.558107014057555,
      "grad_norm": 2.484375,
      "learning_rate": 2.6701818059092764e-05,
      "loss": 0.8725,
      "step": 444570
    },
    {
      "epoch": 1.5581420615644506,
      "grad_norm": 2.953125,
      "learning_rate": 2.6701169030429062e-05,
      "loss": 0.7889,
      "step": 444580
    },
    {
      "epoch": 1.5581771090713463,
      "grad_norm": 3.234375,
      "learning_rate": 2.670052000176536e-05,
      "loss": 0.9,
      "step": 444590
    },
    {
      "epoch": 1.5582121565782419,
      "grad_norm": 3.078125,
      "learning_rate": 2.6699870973101658e-05,
      "loss": 0.7979,
      "step": 444600
    },
    {
      "epoch": 1.5582472040851374,
      "grad_norm": 3.359375,
      "learning_rate": 2.669922194443796e-05,
      "loss": 0.8575,
      "step": 444610
    },
    {
      "epoch": 1.5582822515920332,
      "grad_norm": 2.609375,
      "learning_rate": 2.6698572915774257e-05,
      "loss": 0.7496,
      "step": 444620
    },
    {
      "epoch": 1.5583172990989285,
      "grad_norm": 3.03125,
      "learning_rate": 2.6697923887110555e-05,
      "loss": 0.8315,
      "step": 444630
    },
    {
      "epoch": 1.5583523466058242,
      "grad_norm": 2.515625,
      "learning_rate": 2.6697274858446853e-05,
      "loss": 0.7579,
      "step": 444640
    },
    {
      "epoch": 1.5583873941127198,
      "grad_norm": 3.0,
      "learning_rate": 2.6696625829783145e-05,
      "loss": 0.8392,
      "step": 444650
    },
    {
      "epoch": 1.5584224416196153,
      "grad_norm": 2.9375,
      "learning_rate": 2.6695976801119443e-05,
      "loss": 0.9122,
      "step": 444660
    },
    {
      "epoch": 1.558457489126511,
      "grad_norm": 3.046875,
      "learning_rate": 2.669532777245574e-05,
      "loss": 0.8262,
      "step": 444670
    },
    {
      "epoch": 1.5584925366334066,
      "grad_norm": 2.46875,
      "learning_rate": 2.669467874379204e-05,
      "loss": 0.8295,
      "step": 444680
    },
    {
      "epoch": 1.5585275841403021,
      "grad_norm": 3.03125,
      "learning_rate": 2.6694029715128337e-05,
      "loss": 0.826,
      "step": 444690
    },
    {
      "epoch": 1.5585626316471979,
      "grad_norm": 2.8125,
      "learning_rate": 2.6693380686464638e-05,
      "loss": 0.8223,
      "step": 444700
    },
    {
      "epoch": 1.5585976791540934,
      "grad_norm": 3.0,
      "learning_rate": 2.6692731657800936e-05,
      "loss": 0.886,
      "step": 444710
    },
    {
      "epoch": 1.558632726660989,
      "grad_norm": 2.609375,
      "learning_rate": 2.6692082629137234e-05,
      "loss": 0.9027,
      "step": 444720
    },
    {
      "epoch": 1.5586677741678847,
      "grad_norm": 2.953125,
      "learning_rate": 2.6691433600473532e-05,
      "loss": 0.7844,
      "step": 444730
    },
    {
      "epoch": 1.55870282167478,
      "grad_norm": 2.84375,
      "learning_rate": 2.669078457180983e-05,
      "loss": 0.8601,
      "step": 444740
    },
    {
      "epoch": 1.5587378691816758,
      "grad_norm": 2.6875,
      "learning_rate": 2.6690135543146128e-05,
      "loss": 0.8921,
      "step": 444750
    },
    {
      "epoch": 1.5587729166885713,
      "grad_norm": 2.546875,
      "learning_rate": 2.6689486514482426e-05,
      "loss": 0.8472,
      "step": 444760
    },
    {
      "epoch": 1.5588079641954669,
      "grad_norm": 3.109375,
      "learning_rate": 2.6688837485818724e-05,
      "loss": 0.8505,
      "step": 444770
    },
    {
      "epoch": 1.5588430117023626,
      "grad_norm": 2.703125,
      "learning_rate": 2.6688188457155022e-05,
      "loss": 0.8386,
      "step": 444780
    },
    {
      "epoch": 1.5588780592092581,
      "grad_norm": 2.96875,
      "learning_rate": 2.668753942849132e-05,
      "loss": 0.9008,
      "step": 444790
    },
    {
      "epoch": 1.5589131067161537,
      "grad_norm": 2.578125,
      "learning_rate": 2.6686890399827618e-05,
      "loss": 0.8427,
      "step": 444800
    },
    {
      "epoch": 1.5589481542230494,
      "grad_norm": 2.390625,
      "learning_rate": 2.6686241371163916e-05,
      "loss": 0.7917,
      "step": 444810
    },
    {
      "epoch": 1.558983201729945,
      "grad_norm": 2.828125,
      "learning_rate": 2.6685592342500214e-05,
      "loss": 0.8493,
      "step": 444820
    },
    {
      "epoch": 1.5590182492368405,
      "grad_norm": 2.671875,
      "learning_rate": 2.6684943313836512e-05,
      "loss": 0.8878,
      "step": 444830
    },
    {
      "epoch": 1.5590532967437363,
      "grad_norm": 2.890625,
      "learning_rate": 2.6684294285172813e-05,
      "loss": 0.8239,
      "step": 444840
    },
    {
      "epoch": 1.5590883442506316,
      "grad_norm": 3.0,
      "learning_rate": 2.668364525650911e-05,
      "loss": 0.8735,
      "step": 444850
    },
    {
      "epoch": 1.5591233917575273,
      "grad_norm": 2.75,
      "learning_rate": 2.668299622784541e-05,
      "loss": 0.8271,
      "step": 444860
    },
    {
      "epoch": 1.5591584392644229,
      "grad_norm": 3.4375,
      "learning_rate": 2.6682347199181707e-05,
      "loss": 0.8961,
      "step": 444870
    },
    {
      "epoch": 1.5591934867713184,
      "grad_norm": 2.6875,
      "learning_rate": 2.6681698170518005e-05,
      "loss": 0.8421,
      "step": 444880
    },
    {
      "epoch": 1.5592285342782142,
      "grad_norm": 3.203125,
      "learning_rate": 2.6681049141854303e-05,
      "loss": 0.8567,
      "step": 444890
    },
    {
      "epoch": 1.5592635817851097,
      "grad_norm": 3.109375,
      "learning_rate": 2.66804001131906e-05,
      "loss": 0.8159,
      "step": 444900
    },
    {
      "epoch": 1.5592986292920052,
      "grad_norm": 3.0625,
      "learning_rate": 2.66797510845269e-05,
      "loss": 0.892,
      "step": 444910
    },
    {
      "epoch": 1.559333676798901,
      "grad_norm": 3.40625,
      "learning_rate": 2.6679102055863197e-05,
      "loss": 0.8594,
      "step": 444920
    },
    {
      "epoch": 1.5593687243057965,
      "grad_norm": 2.875,
      "learning_rate": 2.6678453027199495e-05,
      "loss": 0.8497,
      "step": 444930
    },
    {
      "epoch": 1.559403771812692,
      "grad_norm": 2.96875,
      "learning_rate": 2.6677803998535793e-05,
      "loss": 0.8262,
      "step": 444940
    },
    {
      "epoch": 1.5594388193195878,
      "grad_norm": 2.6875,
      "learning_rate": 2.667715496987209e-05,
      "loss": 0.7816,
      "step": 444950
    },
    {
      "epoch": 1.5594738668264831,
      "grad_norm": 3.09375,
      "learning_rate": 2.667650594120839e-05,
      "loss": 0.8191,
      "step": 444960
    },
    {
      "epoch": 1.559508914333379,
      "grad_norm": 2.9375,
      "learning_rate": 2.6675856912544687e-05,
      "loss": 0.8395,
      "step": 444970
    },
    {
      "epoch": 1.5595439618402744,
      "grad_norm": 2.90625,
      "learning_rate": 2.667520788388099e-05,
      "loss": 0.8629,
      "step": 444980
    },
    {
      "epoch": 1.55957900934717,
      "grad_norm": 2.765625,
      "learning_rate": 2.6674558855217287e-05,
      "loss": 0.9219,
      "step": 444990
    },
    {
      "epoch": 1.5596140568540657,
      "grad_norm": 2.75,
      "learning_rate": 2.6673909826553585e-05,
      "loss": 0.7882,
      "step": 445000
    },
    {
      "epoch": 1.5596140568540657,
      "eval_loss": 0.7880002856254578,
      "eval_runtime": 558.998,
      "eval_samples_per_second": 680.568,
      "eval_steps_per_second": 56.714,
      "step": 445000
    },
    {
      "epoch": 1.5596491043609613,
      "grad_norm": 2.84375,
      "learning_rate": 2.6673260797889883e-05,
      "loss": 0.7969,
      "step": 445010
    },
    {
      "epoch": 1.5596841518678568,
      "grad_norm": 2.421875,
      "learning_rate": 2.667261176922618e-05,
      "loss": 0.8043,
      "step": 445020
    },
    {
      "epoch": 1.5597191993747526,
      "grad_norm": 3.15625,
      "learning_rate": 2.6671962740562472e-05,
      "loss": 0.9265,
      "step": 445030
    },
    {
      "epoch": 1.559754246881648,
      "grad_norm": 3.125,
      "learning_rate": 2.667131371189877e-05,
      "loss": 0.9338,
      "step": 445040
    },
    {
      "epoch": 1.5597892943885436,
      "grad_norm": 2.703125,
      "learning_rate": 2.6670664683235068e-05,
      "loss": 0.9564,
      "step": 445050
    },
    {
      "epoch": 1.5598243418954394,
      "grad_norm": 3.109375,
      "learning_rate": 2.6670015654571366e-05,
      "loss": 0.8578,
      "step": 445060
    },
    {
      "epoch": 1.5598593894023347,
      "grad_norm": 3.0625,
      "learning_rate": 2.6669366625907667e-05,
      "loss": 0.7984,
      "step": 445070
    },
    {
      "epoch": 1.5598944369092305,
      "grad_norm": 2.546875,
      "learning_rate": 2.6668717597243965e-05,
      "loss": 0.8709,
      "step": 445080
    },
    {
      "epoch": 1.5599294844161262,
      "grad_norm": 2.78125,
      "learning_rate": 2.6668068568580263e-05,
      "loss": 0.8115,
      "step": 445090
    },
    {
      "epoch": 1.5599645319230215,
      "grad_norm": 2.84375,
      "learning_rate": 2.666741953991656e-05,
      "loss": 0.8701,
      "step": 445100
    },
    {
      "epoch": 1.5599995794299173,
      "grad_norm": 2.78125,
      "learning_rate": 2.666677051125286e-05,
      "loss": 0.8698,
      "step": 445110
    },
    {
      "epoch": 1.5600346269368128,
      "grad_norm": 3.5,
      "learning_rate": 2.6666121482589157e-05,
      "loss": 0.8475,
      "step": 445120
    },
    {
      "epoch": 1.5600696744437084,
      "grad_norm": 2.75,
      "learning_rate": 2.6665472453925455e-05,
      "loss": 0.8745,
      "step": 445130
    },
    {
      "epoch": 1.5601047219506041,
      "grad_norm": 2.796875,
      "learning_rate": 2.6664823425261753e-05,
      "loss": 0.8089,
      "step": 445140
    },
    {
      "epoch": 1.5601397694574997,
      "grad_norm": 3.15625,
      "learning_rate": 2.666417439659805e-05,
      "loss": 0.8239,
      "step": 445150
    },
    {
      "epoch": 1.5601748169643952,
      "grad_norm": 2.328125,
      "learning_rate": 2.666352536793435e-05,
      "loss": 0.7197,
      "step": 445160
    },
    {
      "epoch": 1.560209864471291,
      "grad_norm": 2.796875,
      "learning_rate": 2.6662876339270647e-05,
      "loss": 0.8774,
      "step": 445170
    },
    {
      "epoch": 1.5602449119781863,
      "grad_norm": 2.75,
      "learning_rate": 2.6662227310606945e-05,
      "loss": 0.8245,
      "step": 445180
    },
    {
      "epoch": 1.560279959485082,
      "grad_norm": 2.90625,
      "learning_rate": 2.6661578281943243e-05,
      "loss": 0.8075,
      "step": 445190
    },
    {
      "epoch": 1.5603150069919778,
      "grad_norm": 3.390625,
      "learning_rate": 2.666092925327954e-05,
      "loss": 0.8909,
      "step": 445200
    },
    {
      "epoch": 1.560350054498873,
      "grad_norm": 2.734375,
      "learning_rate": 2.6660280224615843e-05,
      "loss": 0.8341,
      "step": 445210
    },
    {
      "epoch": 1.5603851020057689,
      "grad_norm": 3.15625,
      "learning_rate": 2.665963119595214e-05,
      "loss": 0.872,
      "step": 445220
    },
    {
      "epoch": 1.5604201495126644,
      "grad_norm": 3.046875,
      "learning_rate": 2.665898216728844e-05,
      "loss": 0.8623,
      "step": 445230
    },
    {
      "epoch": 1.56045519701956,
      "grad_norm": 2.921875,
      "learning_rate": 2.6658333138624737e-05,
      "loss": 0.8533,
      "step": 445240
    },
    {
      "epoch": 1.5604902445264557,
      "grad_norm": 3.796875,
      "learning_rate": 2.6657684109961035e-05,
      "loss": 0.8849,
      "step": 445250
    },
    {
      "epoch": 1.5605252920333512,
      "grad_norm": 2.828125,
      "learning_rate": 2.6657035081297333e-05,
      "loss": 0.798,
      "step": 445260
    },
    {
      "epoch": 1.5605603395402468,
      "grad_norm": 3.078125,
      "learning_rate": 2.665638605263363e-05,
      "loss": 0.8391,
      "step": 445270
    },
    {
      "epoch": 1.5605953870471425,
      "grad_norm": 3.03125,
      "learning_rate": 2.665573702396993e-05,
      "loss": 0.8495,
      "step": 445280
    },
    {
      "epoch": 1.560630434554038,
      "grad_norm": 2.8125,
      "learning_rate": 2.6655087995306227e-05,
      "loss": 0.8534,
      "step": 445290
    },
    {
      "epoch": 1.5606654820609336,
      "grad_norm": 2.703125,
      "learning_rate": 2.6654438966642525e-05,
      "loss": 0.7998,
      "step": 445300
    },
    {
      "epoch": 1.5607005295678293,
      "grad_norm": 2.859375,
      "learning_rate": 2.6653789937978823e-05,
      "loss": 0.9119,
      "step": 445310
    },
    {
      "epoch": 1.5607355770747247,
      "grad_norm": 3.09375,
      "learning_rate": 2.665314090931512e-05,
      "loss": 0.8669,
      "step": 445320
    },
    {
      "epoch": 1.5607706245816204,
      "grad_norm": 2.859375,
      "learning_rate": 2.665249188065142e-05,
      "loss": 0.8472,
      "step": 445330
    },
    {
      "epoch": 1.560805672088516,
      "grad_norm": 2.90625,
      "learning_rate": 2.6651842851987717e-05,
      "loss": 0.8847,
      "step": 445340
    },
    {
      "epoch": 1.5608407195954115,
      "grad_norm": 2.6875,
      "learning_rate": 2.6651193823324018e-05,
      "loss": 0.7895,
      "step": 445350
    },
    {
      "epoch": 1.5608757671023072,
      "grad_norm": 3.015625,
      "learning_rate": 2.6650544794660316e-05,
      "loss": 0.844,
      "step": 445360
    },
    {
      "epoch": 1.5609108146092028,
      "grad_norm": 3.109375,
      "learning_rate": 2.6649895765996614e-05,
      "loss": 0.7996,
      "step": 445370
    },
    {
      "epoch": 1.5609458621160983,
      "grad_norm": 2.90625,
      "learning_rate": 2.6649246737332912e-05,
      "loss": 0.7407,
      "step": 445380
    },
    {
      "epoch": 1.560980909622994,
      "grad_norm": 3.0625,
      "learning_rate": 2.664859770866921e-05,
      "loss": 0.781,
      "step": 445390
    },
    {
      "epoch": 1.5610159571298896,
      "grad_norm": 3.0625,
      "learning_rate": 2.66479486800055e-05,
      "loss": 0.8586,
      "step": 445400
    },
    {
      "epoch": 1.5610510046367851,
      "grad_norm": 3.0625,
      "learning_rate": 2.66472996513418e-05,
      "loss": 0.7654,
      "step": 445410
    },
    {
      "epoch": 1.561086052143681,
      "grad_norm": 3.03125,
      "learning_rate": 2.6646650622678097e-05,
      "loss": 0.8493,
      "step": 445420
    },
    {
      "epoch": 1.5611210996505762,
      "grad_norm": 3.171875,
      "learning_rate": 2.6646001594014395e-05,
      "loss": 0.8595,
      "step": 445430
    },
    {
      "epoch": 1.561156147157472,
      "grad_norm": 2.828125,
      "learning_rate": 2.6645352565350697e-05,
      "loss": 0.7759,
      "step": 445440
    },
    {
      "epoch": 1.5611911946643675,
      "grad_norm": 3.4375,
      "learning_rate": 2.6644703536686995e-05,
      "loss": 0.8283,
      "step": 445450
    },
    {
      "epoch": 1.561226242171263,
      "grad_norm": 3.140625,
      "learning_rate": 2.6644054508023293e-05,
      "loss": 0.8703,
      "step": 445460
    },
    {
      "epoch": 1.5612612896781588,
      "grad_norm": 2.78125,
      "learning_rate": 2.664340547935959e-05,
      "loss": 0.8502,
      "step": 445470
    },
    {
      "epoch": 1.5612963371850543,
      "grad_norm": 2.640625,
      "learning_rate": 2.664275645069589e-05,
      "loss": 0.8191,
      "step": 445480
    },
    {
      "epoch": 1.5613313846919499,
      "grad_norm": 2.25,
      "learning_rate": 2.6642107422032187e-05,
      "loss": 0.7593,
      "step": 445490
    },
    {
      "epoch": 1.5613664321988456,
      "grad_norm": 2.96875,
      "learning_rate": 2.6641458393368485e-05,
      "loss": 0.7594,
      "step": 445500
    },
    {
      "epoch": 1.5614014797057412,
      "grad_norm": 2.828125,
      "learning_rate": 2.6640809364704783e-05,
      "loss": 0.8596,
      "step": 445510
    },
    {
      "epoch": 1.5614365272126367,
      "grad_norm": 2.9375,
      "learning_rate": 2.664016033604108e-05,
      "loss": 0.9195,
      "step": 445520
    },
    {
      "epoch": 1.5614715747195325,
      "grad_norm": 2.609375,
      "learning_rate": 2.663951130737738e-05,
      "loss": 0.8044,
      "step": 445530
    },
    {
      "epoch": 1.5615066222264278,
      "grad_norm": 3.15625,
      "learning_rate": 2.6638862278713677e-05,
      "loss": 0.8665,
      "step": 445540
    },
    {
      "epoch": 1.5615416697333235,
      "grad_norm": 2.84375,
      "learning_rate": 2.6638213250049975e-05,
      "loss": 0.8338,
      "step": 445550
    },
    {
      "epoch": 1.561576717240219,
      "grad_norm": 2.796875,
      "learning_rate": 2.6637564221386273e-05,
      "loss": 0.7792,
      "step": 445560
    },
    {
      "epoch": 1.5616117647471146,
      "grad_norm": 2.484375,
      "learning_rate": 2.6636915192722574e-05,
      "loss": 0.8206,
      "step": 445570
    },
    {
      "epoch": 1.5616468122540104,
      "grad_norm": 2.8125,
      "learning_rate": 2.6636266164058872e-05,
      "loss": 0.8249,
      "step": 445580
    },
    {
      "epoch": 1.561681859760906,
      "grad_norm": 2.859375,
      "learning_rate": 2.663561713539517e-05,
      "loss": 0.779,
      "step": 445590
    },
    {
      "epoch": 1.5617169072678014,
      "grad_norm": 3.328125,
      "learning_rate": 2.6634968106731468e-05,
      "loss": 0.8084,
      "step": 445600
    },
    {
      "epoch": 1.5617519547746972,
      "grad_norm": 2.921875,
      "learning_rate": 2.6634319078067766e-05,
      "loss": 0.813,
      "step": 445610
    },
    {
      "epoch": 1.5617870022815927,
      "grad_norm": 3.046875,
      "learning_rate": 2.6633670049404064e-05,
      "loss": 0.8592,
      "step": 445620
    },
    {
      "epoch": 1.5618220497884883,
      "grad_norm": 3.203125,
      "learning_rate": 2.6633021020740362e-05,
      "loss": 0.8381,
      "step": 445630
    },
    {
      "epoch": 1.561857097295384,
      "grad_norm": 2.625,
      "learning_rate": 2.663237199207666e-05,
      "loss": 0.8243,
      "step": 445640
    },
    {
      "epoch": 1.5618921448022793,
      "grad_norm": 3.21875,
      "learning_rate": 2.6631722963412958e-05,
      "loss": 0.8278,
      "step": 445650
    },
    {
      "epoch": 1.561927192309175,
      "grad_norm": 2.75,
      "learning_rate": 2.6631073934749256e-05,
      "loss": 0.7888,
      "step": 445660
    },
    {
      "epoch": 1.5619622398160706,
      "grad_norm": 2.828125,
      "learning_rate": 2.6630424906085554e-05,
      "loss": 0.8036,
      "step": 445670
    },
    {
      "epoch": 1.5619972873229662,
      "grad_norm": 3.0,
      "learning_rate": 2.6629775877421852e-05,
      "loss": 0.8766,
      "step": 445680
    },
    {
      "epoch": 1.562032334829862,
      "grad_norm": 3.078125,
      "learning_rate": 2.662912684875815e-05,
      "loss": 0.8051,
      "step": 445690
    },
    {
      "epoch": 1.5620673823367575,
      "grad_norm": 2.671875,
      "learning_rate": 2.6628477820094448e-05,
      "loss": 0.8075,
      "step": 445700
    },
    {
      "epoch": 1.562102429843653,
      "grad_norm": 2.59375,
      "learning_rate": 2.662782879143075e-05,
      "loss": 0.8666,
      "step": 445710
    },
    {
      "epoch": 1.5621374773505488,
      "grad_norm": 2.75,
      "learning_rate": 2.6627179762767047e-05,
      "loss": 0.9007,
      "step": 445720
    },
    {
      "epoch": 1.5621725248574443,
      "grad_norm": 2.734375,
      "learning_rate": 2.6626530734103345e-05,
      "loss": 0.8024,
      "step": 445730
    },
    {
      "epoch": 1.5622075723643398,
      "grad_norm": 2.6875,
      "learning_rate": 2.6625881705439643e-05,
      "loss": 0.9004,
      "step": 445740
    },
    {
      "epoch": 1.5622426198712356,
      "grad_norm": 2.84375,
      "learning_rate": 2.662523267677594e-05,
      "loss": 0.8477,
      "step": 445750
    },
    {
      "epoch": 1.562277667378131,
      "grad_norm": 2.890625,
      "learning_rate": 2.662458364811224e-05,
      "loss": 0.7746,
      "step": 445760
    },
    {
      "epoch": 1.5623127148850267,
      "grad_norm": 2.96875,
      "learning_rate": 2.6623934619448537e-05,
      "loss": 0.8132,
      "step": 445770
    },
    {
      "epoch": 1.5623477623919224,
      "grad_norm": 2.59375,
      "learning_rate": 2.662328559078483e-05,
      "loss": 0.8983,
      "step": 445780
    },
    {
      "epoch": 1.5623828098988177,
      "grad_norm": 3.015625,
      "learning_rate": 2.6622636562121127e-05,
      "loss": 0.8573,
      "step": 445790
    },
    {
      "epoch": 1.5624178574057135,
      "grad_norm": 2.796875,
      "learning_rate": 2.6621987533457428e-05,
      "loss": 0.7699,
      "step": 445800
    },
    {
      "epoch": 1.562452904912609,
      "grad_norm": 3.171875,
      "learning_rate": 2.6621338504793726e-05,
      "loss": 0.9028,
      "step": 445810
    },
    {
      "epoch": 1.5624879524195046,
      "grad_norm": 2.65625,
      "learning_rate": 2.6620689476130024e-05,
      "loss": 0.8563,
      "step": 445820
    },
    {
      "epoch": 1.5625229999264003,
      "grad_norm": 3.21875,
      "learning_rate": 2.6620040447466322e-05,
      "loss": 0.8409,
      "step": 445830
    },
    {
      "epoch": 1.5625580474332958,
      "grad_norm": 2.828125,
      "learning_rate": 2.661939141880262e-05,
      "loss": 0.9083,
      "step": 445840
    },
    {
      "epoch": 1.5625930949401914,
      "grad_norm": 2.90625,
      "learning_rate": 2.6618742390138918e-05,
      "loss": 0.8403,
      "step": 445850
    },
    {
      "epoch": 1.5626281424470871,
      "grad_norm": 2.984375,
      "learning_rate": 2.6618093361475216e-05,
      "loss": 0.891,
      "step": 445860
    },
    {
      "epoch": 1.5626631899539827,
      "grad_norm": 2.703125,
      "learning_rate": 2.6617444332811514e-05,
      "loss": 0.8748,
      "step": 445870
    },
    {
      "epoch": 1.5626982374608782,
      "grad_norm": 3.203125,
      "learning_rate": 2.6616795304147812e-05,
      "loss": 0.8408,
      "step": 445880
    },
    {
      "epoch": 1.562733284967774,
      "grad_norm": 2.71875,
      "learning_rate": 2.661614627548411e-05,
      "loss": 0.8759,
      "step": 445890
    },
    {
      "epoch": 1.5627683324746693,
      "grad_norm": 3.3125,
      "learning_rate": 2.6615497246820408e-05,
      "loss": 0.8802,
      "step": 445900
    },
    {
      "epoch": 1.562803379981565,
      "grad_norm": 2.828125,
      "learning_rate": 2.6614848218156706e-05,
      "loss": 0.8331,
      "step": 445910
    },
    {
      "epoch": 1.5628384274884606,
      "grad_norm": 2.6875,
      "learning_rate": 2.6614199189493004e-05,
      "loss": 0.7524,
      "step": 445920
    },
    {
      "epoch": 1.5628734749953561,
      "grad_norm": 2.859375,
      "learning_rate": 2.6613550160829302e-05,
      "loss": 0.8934,
      "step": 445930
    },
    {
      "epoch": 1.5629085225022519,
      "grad_norm": 3.15625,
      "learning_rate": 2.6612901132165603e-05,
      "loss": 0.8813,
      "step": 445940
    },
    {
      "epoch": 1.5629435700091474,
      "grad_norm": 2.578125,
      "learning_rate": 2.66122521035019e-05,
      "loss": 0.8527,
      "step": 445950
    },
    {
      "epoch": 1.562978617516043,
      "grad_norm": 4.28125,
      "learning_rate": 2.66116030748382e-05,
      "loss": 0.8203,
      "step": 445960
    },
    {
      "epoch": 1.5630136650229387,
      "grad_norm": 2.765625,
      "learning_rate": 2.6610954046174497e-05,
      "loss": 0.8271,
      "step": 445970
    },
    {
      "epoch": 1.5630487125298342,
      "grad_norm": 3.40625,
      "learning_rate": 2.6610305017510795e-05,
      "loss": 0.8624,
      "step": 445980
    },
    {
      "epoch": 1.5630837600367298,
      "grad_norm": 2.921875,
      "learning_rate": 2.6609655988847093e-05,
      "loss": 0.8508,
      "step": 445990
    },
    {
      "epoch": 1.5631188075436255,
      "grad_norm": 3.203125,
      "learning_rate": 2.660900696018339e-05,
      "loss": 0.7338,
      "step": 446000
    },
    {
      "epoch": 1.5631538550505208,
      "grad_norm": 2.84375,
      "learning_rate": 2.660835793151969e-05,
      "loss": 0.8287,
      "step": 446010
    },
    {
      "epoch": 1.5631889025574166,
      "grad_norm": 2.59375,
      "learning_rate": 2.6607708902855987e-05,
      "loss": 0.8369,
      "step": 446020
    },
    {
      "epoch": 1.5632239500643121,
      "grad_norm": 2.78125,
      "learning_rate": 2.6607059874192285e-05,
      "loss": 0.7675,
      "step": 446030
    },
    {
      "epoch": 1.5632589975712077,
      "grad_norm": 2.78125,
      "learning_rate": 2.6606410845528583e-05,
      "loss": 0.7984,
      "step": 446040
    },
    {
      "epoch": 1.5632940450781034,
      "grad_norm": 2.96875,
      "learning_rate": 2.660576181686488e-05,
      "loss": 0.8391,
      "step": 446050
    },
    {
      "epoch": 1.563329092584999,
      "grad_norm": 2.59375,
      "learning_rate": 2.660511278820118e-05,
      "loss": 0.8088,
      "step": 446060
    },
    {
      "epoch": 1.5633641400918945,
      "grad_norm": 2.9375,
      "learning_rate": 2.6604463759537477e-05,
      "loss": 0.8133,
      "step": 446070
    },
    {
      "epoch": 1.5633991875987903,
      "grad_norm": 3.375,
      "learning_rate": 2.660381473087378e-05,
      "loss": 0.8355,
      "step": 446080
    },
    {
      "epoch": 1.5634342351056858,
      "grad_norm": 2.84375,
      "learning_rate": 2.6603165702210077e-05,
      "loss": 0.7434,
      "step": 446090
    },
    {
      "epoch": 1.5634692826125813,
      "grad_norm": 2.421875,
      "learning_rate": 2.6602516673546375e-05,
      "loss": 0.7775,
      "step": 446100
    },
    {
      "epoch": 1.563504330119477,
      "grad_norm": 3.03125,
      "learning_rate": 2.6601867644882673e-05,
      "loss": 0.8721,
      "step": 446110
    },
    {
      "epoch": 1.5635393776263724,
      "grad_norm": 2.515625,
      "learning_rate": 2.660121861621897e-05,
      "loss": 0.8,
      "step": 446120
    },
    {
      "epoch": 1.5635744251332682,
      "grad_norm": 2.828125,
      "learning_rate": 2.660056958755527e-05,
      "loss": 0.8107,
      "step": 446130
    },
    {
      "epoch": 1.5636094726401637,
      "grad_norm": 2.78125,
      "learning_rate": 2.6599920558891567e-05,
      "loss": 0.8266,
      "step": 446140
    },
    {
      "epoch": 1.5636445201470592,
      "grad_norm": 2.4375,
      "learning_rate": 2.6599271530227865e-05,
      "loss": 0.7531,
      "step": 446150
    },
    {
      "epoch": 1.563679567653955,
      "grad_norm": 3.21875,
      "learning_rate": 2.6598622501564156e-05,
      "loss": 0.8815,
      "step": 446160
    },
    {
      "epoch": 1.5637146151608505,
      "grad_norm": 2.65625,
      "learning_rate": 2.6597973472900457e-05,
      "loss": 0.8531,
      "step": 446170
    },
    {
      "epoch": 1.563749662667746,
      "grad_norm": 2.625,
      "learning_rate": 2.6597324444236755e-05,
      "loss": 0.8818,
      "step": 446180
    },
    {
      "epoch": 1.5637847101746418,
      "grad_norm": 2.453125,
      "learning_rate": 2.6596675415573053e-05,
      "loss": 0.8234,
      "step": 446190
    },
    {
      "epoch": 1.5638197576815374,
      "grad_norm": 3.328125,
      "learning_rate": 2.659602638690935e-05,
      "loss": 0.8867,
      "step": 446200
    },
    {
      "epoch": 1.563854805188433,
      "grad_norm": 3.078125,
      "learning_rate": 2.659537735824565e-05,
      "loss": 0.7817,
      "step": 446210
    },
    {
      "epoch": 1.5638898526953287,
      "grad_norm": 2.75,
      "learning_rate": 2.6594728329581947e-05,
      "loss": 0.807,
      "step": 446220
    },
    {
      "epoch": 1.563924900202224,
      "grad_norm": 2.859375,
      "learning_rate": 2.6594079300918245e-05,
      "loss": 0.8123,
      "step": 446230
    },
    {
      "epoch": 1.5639599477091197,
      "grad_norm": 3.40625,
      "learning_rate": 2.6593430272254543e-05,
      "loss": 0.8209,
      "step": 446240
    },
    {
      "epoch": 1.5639949952160153,
      "grad_norm": 2.953125,
      "learning_rate": 2.659278124359084e-05,
      "loss": 0.8572,
      "step": 446250
    },
    {
      "epoch": 1.5640300427229108,
      "grad_norm": 2.953125,
      "learning_rate": 2.659213221492714e-05,
      "loss": 0.8504,
      "step": 446260
    },
    {
      "epoch": 1.5640650902298066,
      "grad_norm": 3.0,
      "learning_rate": 2.6591483186263437e-05,
      "loss": 0.7424,
      "step": 446270
    },
    {
      "epoch": 1.564100137736702,
      "grad_norm": 2.90625,
      "learning_rate": 2.6590834157599735e-05,
      "loss": 0.8448,
      "step": 446280
    },
    {
      "epoch": 1.5641351852435976,
      "grad_norm": 3.109375,
      "learning_rate": 2.6590185128936033e-05,
      "loss": 0.9375,
      "step": 446290
    },
    {
      "epoch": 1.5641702327504934,
      "grad_norm": 3.140625,
      "learning_rate": 2.658953610027233e-05,
      "loss": 0.8817,
      "step": 446300
    },
    {
      "epoch": 1.564205280257389,
      "grad_norm": 2.78125,
      "learning_rate": 2.6588887071608633e-05,
      "loss": 0.7378,
      "step": 446310
    },
    {
      "epoch": 1.5642403277642845,
      "grad_norm": 2.78125,
      "learning_rate": 2.658823804294493e-05,
      "loss": 0.8001,
      "step": 446320
    },
    {
      "epoch": 1.5642753752711802,
      "grad_norm": 3.25,
      "learning_rate": 2.658758901428123e-05,
      "loss": 0.8363,
      "step": 446330
    },
    {
      "epoch": 1.5643104227780755,
      "grad_norm": 3.0,
      "learning_rate": 2.6586939985617527e-05,
      "loss": 0.8896,
      "step": 446340
    },
    {
      "epoch": 1.5643454702849713,
      "grad_norm": 3.03125,
      "learning_rate": 2.6586290956953825e-05,
      "loss": 0.8664,
      "step": 446350
    },
    {
      "epoch": 1.5643805177918668,
      "grad_norm": 3.125,
      "learning_rate": 2.6585641928290123e-05,
      "loss": 0.8353,
      "step": 446360
    },
    {
      "epoch": 1.5644155652987624,
      "grad_norm": 2.890625,
      "learning_rate": 2.658499289962642e-05,
      "loss": 0.8242,
      "step": 446370
    },
    {
      "epoch": 1.5644506128056581,
      "grad_norm": 2.84375,
      "learning_rate": 2.658434387096272e-05,
      "loss": 0.8369,
      "step": 446380
    },
    {
      "epoch": 1.5644856603125536,
      "grad_norm": 2.65625,
      "learning_rate": 2.6583694842299017e-05,
      "loss": 0.878,
      "step": 446390
    },
    {
      "epoch": 1.5645207078194492,
      "grad_norm": 2.765625,
      "learning_rate": 2.6583045813635315e-05,
      "loss": 0.7885,
      "step": 446400
    },
    {
      "epoch": 1.564555755326345,
      "grad_norm": 2.671875,
      "learning_rate": 2.6582396784971613e-05,
      "loss": 0.8662,
      "step": 446410
    },
    {
      "epoch": 1.5645908028332405,
      "grad_norm": 3.125,
      "learning_rate": 2.658174775630791e-05,
      "loss": 0.8384,
      "step": 446420
    },
    {
      "epoch": 1.564625850340136,
      "grad_norm": 2.9375,
      "learning_rate": 2.658109872764421e-05,
      "loss": 0.8638,
      "step": 446430
    },
    {
      "epoch": 1.5646608978470318,
      "grad_norm": 2.84375,
      "learning_rate": 2.6580449698980507e-05,
      "loss": 0.8915,
      "step": 446440
    },
    {
      "epoch": 1.564695945353927,
      "grad_norm": 3.390625,
      "learning_rate": 2.6579800670316808e-05,
      "loss": 0.8268,
      "step": 446450
    },
    {
      "epoch": 1.5647309928608228,
      "grad_norm": 3.171875,
      "learning_rate": 2.6579151641653106e-05,
      "loss": 0.8821,
      "step": 446460
    },
    {
      "epoch": 1.5647660403677186,
      "grad_norm": 2.765625,
      "learning_rate": 2.6578502612989404e-05,
      "loss": 0.8018,
      "step": 446470
    },
    {
      "epoch": 1.564801087874614,
      "grad_norm": 3.0625,
      "learning_rate": 2.6577853584325702e-05,
      "loss": 0.8888,
      "step": 446480
    },
    {
      "epoch": 1.5648361353815097,
      "grad_norm": 2.875,
      "learning_rate": 2.6577204555662e-05,
      "loss": 0.8132,
      "step": 446490
    },
    {
      "epoch": 1.5648711828884052,
      "grad_norm": 3.046875,
      "learning_rate": 2.6576555526998298e-05,
      "loss": 0.7862,
      "step": 446500
    },
    {
      "epoch": 1.5649062303953007,
      "grad_norm": 2.765625,
      "learning_rate": 2.6575906498334596e-05,
      "loss": 0.8889,
      "step": 446510
    },
    {
      "epoch": 1.5649412779021965,
      "grad_norm": 2.75,
      "learning_rate": 2.6575257469670894e-05,
      "loss": 0.8991,
      "step": 446520
    },
    {
      "epoch": 1.564976325409092,
      "grad_norm": 3.0,
      "learning_rate": 2.657460844100719e-05,
      "loss": 0.9054,
      "step": 446530
    },
    {
      "epoch": 1.5650113729159876,
      "grad_norm": 3.015625,
      "learning_rate": 2.6573959412343487e-05,
      "loss": 0.8267,
      "step": 446540
    },
    {
      "epoch": 1.5650464204228833,
      "grad_norm": 3.21875,
      "learning_rate": 2.6573310383679785e-05,
      "loss": 0.8421,
      "step": 446550
    },
    {
      "epoch": 1.5650814679297789,
      "grad_norm": 3.234375,
      "learning_rate": 2.6572661355016083e-05,
      "loss": 0.7604,
      "step": 446560
    },
    {
      "epoch": 1.5651165154366744,
      "grad_norm": 2.75,
      "learning_rate": 2.657201232635238e-05,
      "loss": 0.862,
      "step": 446570
    },
    {
      "epoch": 1.5651515629435702,
      "grad_norm": 2.671875,
      "learning_rate": 2.657136329768868e-05,
      "loss": 0.783,
      "step": 446580
    },
    {
      "epoch": 1.5651866104504655,
      "grad_norm": 3.125,
      "learning_rate": 2.6570714269024977e-05,
      "loss": 0.8831,
      "step": 446590
    },
    {
      "epoch": 1.5652216579573612,
      "grad_norm": 2.5,
      "learning_rate": 2.6570065240361275e-05,
      "loss": 0.8183,
      "step": 446600
    },
    {
      "epoch": 1.5652567054642568,
      "grad_norm": 2.8125,
      "learning_rate": 2.6569416211697573e-05,
      "loss": 0.8976,
      "step": 446610
    },
    {
      "epoch": 1.5652917529711523,
      "grad_norm": 2.796875,
      "learning_rate": 2.656876718303387e-05,
      "loss": 0.8219,
      "step": 446620
    },
    {
      "epoch": 1.565326800478048,
      "grad_norm": 3.03125,
      "learning_rate": 2.656811815437017e-05,
      "loss": 0.8618,
      "step": 446630
    },
    {
      "epoch": 1.5653618479849436,
      "grad_norm": 2.78125,
      "learning_rate": 2.6567469125706467e-05,
      "loss": 0.857,
      "step": 446640
    },
    {
      "epoch": 1.5653968954918391,
      "grad_norm": 2.484375,
      "learning_rate": 2.6566820097042765e-05,
      "loss": 0.8273,
      "step": 446650
    },
    {
      "epoch": 1.565431942998735,
      "grad_norm": 2.828125,
      "learning_rate": 2.6566171068379063e-05,
      "loss": 0.8147,
      "step": 446660
    },
    {
      "epoch": 1.5654669905056304,
      "grad_norm": 2.453125,
      "learning_rate": 2.6565522039715364e-05,
      "loss": 0.8332,
      "step": 446670
    },
    {
      "epoch": 1.565502038012526,
      "grad_norm": 2.53125,
      "learning_rate": 2.6564873011051662e-05,
      "loss": 0.7657,
      "step": 446680
    },
    {
      "epoch": 1.5655370855194217,
      "grad_norm": 2.578125,
      "learning_rate": 2.656422398238796e-05,
      "loss": 0.8879,
      "step": 446690
    },
    {
      "epoch": 1.565572133026317,
      "grad_norm": 3.140625,
      "learning_rate": 2.6563574953724258e-05,
      "loss": 0.822,
      "step": 446700
    },
    {
      "epoch": 1.5656071805332128,
      "grad_norm": 3.203125,
      "learning_rate": 2.6562925925060556e-05,
      "loss": 0.9562,
      "step": 446710
    },
    {
      "epoch": 1.5656422280401083,
      "grad_norm": 2.609375,
      "learning_rate": 2.6562276896396854e-05,
      "loss": 0.7807,
      "step": 446720
    },
    {
      "epoch": 1.5656772755470039,
      "grad_norm": 2.84375,
      "learning_rate": 2.6561627867733152e-05,
      "loss": 0.8264,
      "step": 446730
    },
    {
      "epoch": 1.5657123230538996,
      "grad_norm": 3.234375,
      "learning_rate": 2.656097883906945e-05,
      "loss": 0.8382,
      "step": 446740
    },
    {
      "epoch": 1.5657473705607952,
      "grad_norm": 3.0625,
      "learning_rate": 2.6560329810405748e-05,
      "loss": 0.8712,
      "step": 446750
    },
    {
      "epoch": 1.5657824180676907,
      "grad_norm": 2.765625,
      "learning_rate": 2.6559680781742046e-05,
      "loss": 0.8989,
      "step": 446760
    },
    {
      "epoch": 1.5658174655745865,
      "grad_norm": 2.828125,
      "learning_rate": 2.6559031753078344e-05,
      "loss": 0.8137,
      "step": 446770
    },
    {
      "epoch": 1.565852513081482,
      "grad_norm": 3.046875,
      "learning_rate": 2.6558382724414642e-05,
      "loss": 0.7451,
      "step": 446780
    },
    {
      "epoch": 1.5658875605883775,
      "grad_norm": 3.015625,
      "learning_rate": 2.655773369575094e-05,
      "loss": 0.8204,
      "step": 446790
    },
    {
      "epoch": 1.5659226080952733,
      "grad_norm": 2.75,
      "learning_rate": 2.6557084667087238e-05,
      "loss": 0.869,
      "step": 446800
    },
    {
      "epoch": 1.5659576556021686,
      "grad_norm": 3.03125,
      "learning_rate": 2.655643563842354e-05,
      "loss": 0.8337,
      "step": 446810
    },
    {
      "epoch": 1.5659927031090644,
      "grad_norm": 2.8125,
      "learning_rate": 2.6555786609759838e-05,
      "loss": 0.8462,
      "step": 446820
    },
    {
      "epoch": 1.56602775061596,
      "grad_norm": 3.40625,
      "learning_rate": 2.6555137581096136e-05,
      "loss": 0.8368,
      "step": 446830
    },
    {
      "epoch": 1.5660627981228554,
      "grad_norm": 2.515625,
      "learning_rate": 2.6554488552432434e-05,
      "loss": 0.8175,
      "step": 446840
    },
    {
      "epoch": 1.5660978456297512,
      "grad_norm": 2.8125,
      "learning_rate": 2.655383952376873e-05,
      "loss": 0.9322,
      "step": 446850
    },
    {
      "epoch": 1.5661328931366467,
      "grad_norm": 2.84375,
      "learning_rate": 2.655319049510503e-05,
      "loss": 0.8968,
      "step": 446860
    },
    {
      "epoch": 1.5661679406435423,
      "grad_norm": 3.03125,
      "learning_rate": 2.6552541466441328e-05,
      "loss": 0.8571,
      "step": 446870
    },
    {
      "epoch": 1.566202988150438,
      "grad_norm": 2.59375,
      "learning_rate": 2.6551892437777626e-05,
      "loss": 0.8412,
      "step": 446880
    },
    {
      "epoch": 1.5662380356573335,
      "grad_norm": 2.90625,
      "learning_rate": 2.6551243409113924e-05,
      "loss": 0.7592,
      "step": 446890
    },
    {
      "epoch": 1.566273083164229,
      "grad_norm": 3.265625,
      "learning_rate": 2.655059438045022e-05,
      "loss": 0.8508,
      "step": 446900
    },
    {
      "epoch": 1.5663081306711248,
      "grad_norm": 2.859375,
      "learning_rate": 2.6549945351786516e-05,
      "loss": 0.8019,
      "step": 446910
    },
    {
      "epoch": 1.5663431781780202,
      "grad_norm": 3.125,
      "learning_rate": 2.6549296323122814e-05,
      "loss": 0.8921,
      "step": 446920
    },
    {
      "epoch": 1.566378225684916,
      "grad_norm": 3.09375,
      "learning_rate": 2.6548647294459112e-05,
      "loss": 0.8338,
      "step": 446930
    },
    {
      "epoch": 1.5664132731918115,
      "grad_norm": 3.625,
      "learning_rate": 2.654799826579541e-05,
      "loss": 0.922,
      "step": 446940
    },
    {
      "epoch": 1.566448320698707,
      "grad_norm": 2.6875,
      "learning_rate": 2.6547349237131708e-05,
      "loss": 0.8612,
      "step": 446950
    },
    {
      "epoch": 1.5664833682056027,
      "grad_norm": 3.15625,
      "learning_rate": 2.6546700208468006e-05,
      "loss": 0.8848,
      "step": 446960
    },
    {
      "epoch": 1.5665184157124983,
      "grad_norm": 3.046875,
      "learning_rate": 2.6546051179804304e-05,
      "loss": 0.8614,
      "step": 446970
    },
    {
      "epoch": 1.5665534632193938,
      "grad_norm": 2.828125,
      "learning_rate": 2.6545402151140602e-05,
      "loss": 0.8272,
      "step": 446980
    },
    {
      "epoch": 1.5665885107262896,
      "grad_norm": 2.546875,
      "learning_rate": 2.65447531224769e-05,
      "loss": 0.8919,
      "step": 446990
    },
    {
      "epoch": 1.566623558233185,
      "grad_norm": 3.109375,
      "learning_rate": 2.6544104093813198e-05,
      "loss": 0.978,
      "step": 447000
    },
    {
      "epoch": 1.5666586057400806,
      "grad_norm": 3.015625,
      "learning_rate": 2.6543455065149496e-05,
      "loss": 0.7822,
      "step": 447010
    },
    {
      "epoch": 1.5666936532469764,
      "grad_norm": 2.90625,
      "learning_rate": 2.6542806036485794e-05,
      "loss": 0.8276,
      "step": 447020
    },
    {
      "epoch": 1.5667287007538717,
      "grad_norm": 3.234375,
      "learning_rate": 2.6542157007822092e-05,
      "loss": 0.8062,
      "step": 447030
    },
    {
      "epoch": 1.5667637482607675,
      "grad_norm": 2.984375,
      "learning_rate": 2.6541507979158394e-05,
      "loss": 0.8792,
      "step": 447040
    },
    {
      "epoch": 1.5667987957676632,
      "grad_norm": 2.9375,
      "learning_rate": 2.654085895049469e-05,
      "loss": 0.8652,
      "step": 447050
    },
    {
      "epoch": 1.5668338432745585,
      "grad_norm": 3.1875,
      "learning_rate": 2.654020992183099e-05,
      "loss": 0.856,
      "step": 447060
    },
    {
      "epoch": 1.5668688907814543,
      "grad_norm": 2.578125,
      "learning_rate": 2.6539560893167288e-05,
      "loss": 0.7943,
      "step": 447070
    },
    {
      "epoch": 1.5669039382883498,
      "grad_norm": 2.453125,
      "learning_rate": 2.6538911864503586e-05,
      "loss": 0.7832,
      "step": 447080
    },
    {
      "epoch": 1.5669389857952454,
      "grad_norm": 2.796875,
      "learning_rate": 2.6538262835839884e-05,
      "loss": 0.7655,
      "step": 447090
    },
    {
      "epoch": 1.5669740333021411,
      "grad_norm": 2.890625,
      "learning_rate": 2.653761380717618e-05,
      "loss": 0.8957,
      "step": 447100
    },
    {
      "epoch": 1.5670090808090367,
      "grad_norm": 2.40625,
      "learning_rate": 2.653696477851248e-05,
      "loss": 0.8009,
      "step": 447110
    },
    {
      "epoch": 1.5670441283159322,
      "grad_norm": 2.46875,
      "learning_rate": 2.6536315749848778e-05,
      "loss": 0.8657,
      "step": 447120
    },
    {
      "epoch": 1.567079175822828,
      "grad_norm": 2.421875,
      "learning_rate": 2.6535666721185076e-05,
      "loss": 0.8162,
      "step": 447130
    },
    {
      "epoch": 1.5671142233297233,
      "grad_norm": 2.703125,
      "learning_rate": 2.6535017692521374e-05,
      "loss": 0.8703,
      "step": 447140
    },
    {
      "epoch": 1.567149270836619,
      "grad_norm": 2.84375,
      "learning_rate": 2.653436866385767e-05,
      "loss": 0.7812,
      "step": 447150
    },
    {
      "epoch": 1.5671843183435148,
      "grad_norm": 2.515625,
      "learning_rate": 2.653371963519397e-05,
      "loss": 0.8141,
      "step": 447160
    },
    {
      "epoch": 1.56721936585041,
      "grad_norm": 3.015625,
      "learning_rate": 2.6533070606530268e-05,
      "loss": 0.8043,
      "step": 447170
    },
    {
      "epoch": 1.5672544133573059,
      "grad_norm": 2.734375,
      "learning_rate": 2.653242157786657e-05,
      "loss": 0.7829,
      "step": 447180
    },
    {
      "epoch": 1.5672894608642014,
      "grad_norm": 2.609375,
      "learning_rate": 2.6531772549202867e-05,
      "loss": 0.8826,
      "step": 447190
    },
    {
      "epoch": 1.567324508371097,
      "grad_norm": 3.125,
      "learning_rate": 2.6531123520539165e-05,
      "loss": 0.9156,
      "step": 447200
    },
    {
      "epoch": 1.5673595558779927,
      "grad_norm": 2.546875,
      "learning_rate": 2.6530474491875463e-05,
      "loss": 0.8442,
      "step": 447210
    },
    {
      "epoch": 1.5673946033848882,
      "grad_norm": 2.640625,
      "learning_rate": 2.652982546321176e-05,
      "loss": 0.8634,
      "step": 447220
    },
    {
      "epoch": 1.5674296508917838,
      "grad_norm": 2.671875,
      "learning_rate": 2.652917643454806e-05,
      "loss": 0.8436,
      "step": 447230
    },
    {
      "epoch": 1.5674646983986795,
      "grad_norm": 2.953125,
      "learning_rate": 2.6528527405884357e-05,
      "loss": 0.7599,
      "step": 447240
    },
    {
      "epoch": 1.567499745905575,
      "grad_norm": 2.796875,
      "learning_rate": 2.6527878377220655e-05,
      "loss": 0.8393,
      "step": 447250
    },
    {
      "epoch": 1.5675347934124706,
      "grad_norm": 3.0,
      "learning_rate": 2.6527229348556953e-05,
      "loss": 0.8154,
      "step": 447260
    },
    {
      "epoch": 1.5675698409193664,
      "grad_norm": 2.421875,
      "learning_rate": 2.652658031989325e-05,
      "loss": 0.8334,
      "step": 447270
    },
    {
      "epoch": 1.5676048884262617,
      "grad_norm": 2.296875,
      "learning_rate": 2.652593129122955e-05,
      "loss": 0.7333,
      "step": 447280
    },
    {
      "epoch": 1.5676399359331574,
      "grad_norm": 2.890625,
      "learning_rate": 2.6525282262565844e-05,
      "loss": 0.8608,
      "step": 447290
    },
    {
      "epoch": 1.567674983440053,
      "grad_norm": 3.484375,
      "learning_rate": 2.652463323390214e-05,
      "loss": 0.9618,
      "step": 447300
    },
    {
      "epoch": 1.5677100309469485,
      "grad_norm": 3.0,
      "learning_rate": 2.652398420523844e-05,
      "loss": 0.7824,
      "step": 447310
    },
    {
      "epoch": 1.5677450784538443,
      "grad_norm": 2.90625,
      "learning_rate": 2.6523335176574738e-05,
      "loss": 0.9375,
      "step": 447320
    },
    {
      "epoch": 1.5677801259607398,
      "grad_norm": 3.171875,
      "learning_rate": 2.6522686147911036e-05,
      "loss": 0.7842,
      "step": 447330
    },
    {
      "epoch": 1.5678151734676353,
      "grad_norm": 2.984375,
      "learning_rate": 2.6522037119247334e-05,
      "loss": 0.8368,
      "step": 447340
    },
    {
      "epoch": 1.567850220974531,
      "grad_norm": 2.96875,
      "learning_rate": 2.652138809058363e-05,
      "loss": 0.8507,
      "step": 447350
    },
    {
      "epoch": 1.5678852684814266,
      "grad_norm": 3.046875,
      "learning_rate": 2.652073906191993e-05,
      "loss": 0.7875,
      "step": 447360
    },
    {
      "epoch": 1.5679203159883222,
      "grad_norm": 3.515625,
      "learning_rate": 2.6520090033256228e-05,
      "loss": 0.8777,
      "step": 447370
    },
    {
      "epoch": 1.567955363495218,
      "grad_norm": 2.984375,
      "learning_rate": 2.6519441004592526e-05,
      "loss": 0.8615,
      "step": 447380
    },
    {
      "epoch": 1.5679904110021132,
      "grad_norm": 3.3125,
      "learning_rate": 2.6518791975928824e-05,
      "loss": 0.8805,
      "step": 447390
    },
    {
      "epoch": 1.568025458509009,
      "grad_norm": 3.375,
      "learning_rate": 2.651814294726512e-05,
      "loss": 0.8089,
      "step": 447400
    },
    {
      "epoch": 1.5680605060159045,
      "grad_norm": 2.6875,
      "learning_rate": 2.6517493918601423e-05,
      "loss": 0.8377,
      "step": 447410
    },
    {
      "epoch": 1.5680955535228,
      "grad_norm": 2.625,
      "learning_rate": 2.651684488993772e-05,
      "loss": 0.8229,
      "step": 447420
    },
    {
      "epoch": 1.5681306010296958,
      "grad_norm": 3.09375,
      "learning_rate": 2.651619586127402e-05,
      "loss": 0.8986,
      "step": 447430
    },
    {
      "epoch": 1.5681656485365914,
      "grad_norm": 2.6875,
      "learning_rate": 2.6515546832610317e-05,
      "loss": 0.8465,
      "step": 447440
    },
    {
      "epoch": 1.5682006960434869,
      "grad_norm": 3.09375,
      "learning_rate": 2.6514897803946615e-05,
      "loss": 0.8313,
      "step": 447450
    },
    {
      "epoch": 1.5682357435503826,
      "grad_norm": 3.28125,
      "learning_rate": 2.6514248775282913e-05,
      "loss": 0.9014,
      "step": 447460
    },
    {
      "epoch": 1.5682707910572782,
      "grad_norm": 3.21875,
      "learning_rate": 2.651359974661921e-05,
      "loss": 0.9003,
      "step": 447470
    },
    {
      "epoch": 1.5683058385641737,
      "grad_norm": 2.96875,
      "learning_rate": 2.651295071795551e-05,
      "loss": 0.8167,
      "step": 447480
    },
    {
      "epoch": 1.5683408860710695,
      "grad_norm": 2.890625,
      "learning_rate": 2.6512301689291807e-05,
      "loss": 0.878,
      "step": 447490
    },
    {
      "epoch": 1.5683759335779648,
      "grad_norm": 3.328125,
      "learning_rate": 2.6511652660628105e-05,
      "loss": 0.8496,
      "step": 447500
    },
    {
      "epoch": 1.5684109810848605,
      "grad_norm": 2.890625,
      "learning_rate": 2.6511003631964403e-05,
      "loss": 0.7829,
      "step": 447510
    },
    {
      "epoch": 1.568446028591756,
      "grad_norm": 2.984375,
      "learning_rate": 2.65103546033007e-05,
      "loss": 0.9617,
      "step": 447520
    },
    {
      "epoch": 1.5684810760986516,
      "grad_norm": 3.15625,
      "learning_rate": 2.6509705574637e-05,
      "loss": 0.8666,
      "step": 447530
    },
    {
      "epoch": 1.5685161236055474,
      "grad_norm": 3.09375,
      "learning_rate": 2.6509056545973297e-05,
      "loss": 0.8023,
      "step": 447540
    },
    {
      "epoch": 1.568551171112443,
      "grad_norm": 2.96875,
      "learning_rate": 2.65084075173096e-05,
      "loss": 0.8012,
      "step": 447550
    },
    {
      "epoch": 1.5685862186193384,
      "grad_norm": 2.65625,
      "learning_rate": 2.6507758488645896e-05,
      "loss": 0.8126,
      "step": 447560
    },
    {
      "epoch": 1.5686212661262342,
      "grad_norm": 2.78125,
      "learning_rate": 2.6507109459982194e-05,
      "loss": 0.7959,
      "step": 447570
    },
    {
      "epoch": 1.5686563136331297,
      "grad_norm": 2.875,
      "learning_rate": 2.6506460431318492e-05,
      "loss": 0.8,
      "step": 447580
    },
    {
      "epoch": 1.5686913611400253,
      "grad_norm": 3.328125,
      "learning_rate": 2.650581140265479e-05,
      "loss": 0.8348,
      "step": 447590
    },
    {
      "epoch": 1.568726408646921,
      "grad_norm": 3.09375,
      "learning_rate": 2.650516237399109e-05,
      "loss": 0.8645,
      "step": 447600
    },
    {
      "epoch": 1.5687614561538163,
      "grad_norm": 2.375,
      "learning_rate": 2.6504513345327386e-05,
      "loss": 0.7596,
      "step": 447610
    },
    {
      "epoch": 1.568796503660712,
      "grad_norm": 2.984375,
      "learning_rate": 2.6503864316663684e-05,
      "loss": 0.7341,
      "step": 447620
    },
    {
      "epoch": 1.5688315511676076,
      "grad_norm": 2.90625,
      "learning_rate": 2.6503215287999982e-05,
      "loss": 0.8148,
      "step": 447630
    },
    {
      "epoch": 1.5688665986745032,
      "grad_norm": 3.0,
      "learning_rate": 2.650256625933628e-05,
      "loss": 0.8818,
      "step": 447640
    },
    {
      "epoch": 1.568901646181399,
      "grad_norm": 3.125,
      "learning_rate": 2.650191723067258e-05,
      "loss": 0.7946,
      "step": 447650
    },
    {
      "epoch": 1.5689366936882945,
      "grad_norm": 2.921875,
      "learning_rate": 2.6501268202008873e-05,
      "loss": 0.8091,
      "step": 447660
    },
    {
      "epoch": 1.56897174119519,
      "grad_norm": 3.046875,
      "learning_rate": 2.650061917334517e-05,
      "loss": 0.7918,
      "step": 447670
    },
    {
      "epoch": 1.5690067887020858,
      "grad_norm": 2.171875,
      "learning_rate": 2.649997014468147e-05,
      "loss": 0.7645,
      "step": 447680
    },
    {
      "epoch": 1.5690418362089813,
      "grad_norm": 2.625,
      "learning_rate": 2.6499321116017767e-05,
      "loss": 0.7752,
      "step": 447690
    },
    {
      "epoch": 1.5690768837158768,
      "grad_norm": 2.921875,
      "learning_rate": 2.6498672087354065e-05,
      "loss": 0.8508,
      "step": 447700
    },
    {
      "epoch": 1.5691119312227726,
      "grad_norm": 2.984375,
      "learning_rate": 2.6498023058690363e-05,
      "loss": 0.831,
      "step": 447710
    },
    {
      "epoch": 1.569146978729668,
      "grad_norm": 2.828125,
      "learning_rate": 2.649737403002666e-05,
      "loss": 0.834,
      "step": 447720
    },
    {
      "epoch": 1.5691820262365637,
      "grad_norm": 2.59375,
      "learning_rate": 2.649672500136296e-05,
      "loss": 0.7806,
      "step": 447730
    },
    {
      "epoch": 1.5692170737434594,
      "grad_norm": 2.546875,
      "learning_rate": 2.6496075972699257e-05,
      "loss": 0.8373,
      "step": 447740
    },
    {
      "epoch": 1.5692521212503547,
      "grad_norm": 3.265625,
      "learning_rate": 2.6495426944035555e-05,
      "loss": 0.9028,
      "step": 447750
    },
    {
      "epoch": 1.5692871687572505,
      "grad_norm": 2.71875,
      "learning_rate": 2.6494777915371853e-05,
      "loss": 0.7726,
      "step": 447760
    },
    {
      "epoch": 1.569322216264146,
      "grad_norm": 3.25,
      "learning_rate": 2.6494128886708154e-05,
      "loss": 0.8128,
      "step": 447770
    },
    {
      "epoch": 1.5693572637710416,
      "grad_norm": 2.96875,
      "learning_rate": 2.6493479858044452e-05,
      "loss": 0.8834,
      "step": 447780
    },
    {
      "epoch": 1.5693923112779373,
      "grad_norm": 3.0,
      "learning_rate": 2.649283082938075e-05,
      "loss": 0.9223,
      "step": 447790
    },
    {
      "epoch": 1.5694273587848329,
      "grad_norm": 2.6875,
      "learning_rate": 2.649218180071705e-05,
      "loss": 0.8277,
      "step": 447800
    },
    {
      "epoch": 1.5694624062917284,
      "grad_norm": 2.796875,
      "learning_rate": 2.6491532772053346e-05,
      "loss": 0.8811,
      "step": 447810
    },
    {
      "epoch": 1.5694974537986242,
      "grad_norm": 2.671875,
      "learning_rate": 2.6490883743389644e-05,
      "loss": 0.8277,
      "step": 447820
    },
    {
      "epoch": 1.5695325013055195,
      "grad_norm": 2.484375,
      "learning_rate": 2.6490234714725942e-05,
      "loss": 0.8263,
      "step": 447830
    },
    {
      "epoch": 1.5695675488124152,
      "grad_norm": 2.8125,
      "learning_rate": 2.648958568606224e-05,
      "loss": 0.8351,
      "step": 447840
    },
    {
      "epoch": 1.569602596319311,
      "grad_norm": 2.96875,
      "learning_rate": 2.648893665739854e-05,
      "loss": 0.8443,
      "step": 447850
    },
    {
      "epoch": 1.5696376438262063,
      "grad_norm": 3.15625,
      "learning_rate": 2.6488287628734836e-05,
      "loss": 0.9222,
      "step": 447860
    },
    {
      "epoch": 1.569672691333102,
      "grad_norm": 2.984375,
      "learning_rate": 2.6487638600071134e-05,
      "loss": 0.7802,
      "step": 447870
    },
    {
      "epoch": 1.5697077388399976,
      "grad_norm": 3.09375,
      "learning_rate": 2.6486989571407432e-05,
      "loss": 0.8534,
      "step": 447880
    },
    {
      "epoch": 1.5697427863468931,
      "grad_norm": 3.015625,
      "learning_rate": 2.648634054274373e-05,
      "loss": 0.8207,
      "step": 447890
    },
    {
      "epoch": 1.5697778338537889,
      "grad_norm": 2.6875,
      "learning_rate": 2.648569151408003e-05,
      "loss": 0.7991,
      "step": 447900
    },
    {
      "epoch": 1.5698128813606844,
      "grad_norm": 3.234375,
      "learning_rate": 2.648504248541633e-05,
      "loss": 0.8937,
      "step": 447910
    },
    {
      "epoch": 1.56984792886758,
      "grad_norm": 2.84375,
      "learning_rate": 2.6484393456752628e-05,
      "loss": 0.8422,
      "step": 447920
    },
    {
      "epoch": 1.5698829763744757,
      "grad_norm": 2.03125,
      "learning_rate": 2.6483744428088926e-05,
      "loss": 0.8115,
      "step": 447930
    },
    {
      "epoch": 1.5699180238813712,
      "grad_norm": 3.109375,
      "learning_rate": 2.6483095399425224e-05,
      "loss": 0.855,
      "step": 447940
    },
    {
      "epoch": 1.5699530713882668,
      "grad_norm": 3.328125,
      "learning_rate": 2.6482446370761522e-05,
      "loss": 0.8949,
      "step": 447950
    },
    {
      "epoch": 1.5699881188951625,
      "grad_norm": 2.8125,
      "learning_rate": 2.648179734209782e-05,
      "loss": 0.8492,
      "step": 447960
    },
    {
      "epoch": 1.5700231664020579,
      "grad_norm": 2.953125,
      "learning_rate": 2.6481148313434118e-05,
      "loss": 0.8489,
      "step": 447970
    },
    {
      "epoch": 1.5700582139089536,
      "grad_norm": 2.703125,
      "learning_rate": 2.6480499284770416e-05,
      "loss": 0.8404,
      "step": 447980
    },
    {
      "epoch": 1.5700932614158492,
      "grad_norm": 2.953125,
      "learning_rate": 2.6479850256106714e-05,
      "loss": 0.8115,
      "step": 447990
    },
    {
      "epoch": 1.5701283089227447,
      "grad_norm": 2.953125,
      "learning_rate": 2.6479201227443012e-05,
      "loss": 0.8041,
      "step": 448000
    },
    {
      "epoch": 1.5701633564296404,
      "grad_norm": 2.390625,
      "learning_rate": 2.647855219877931e-05,
      "loss": 0.8794,
      "step": 448010
    },
    {
      "epoch": 1.570198403936536,
      "grad_norm": 2.6875,
      "learning_rate": 2.6477903170115608e-05,
      "loss": 0.8033,
      "step": 448020
    },
    {
      "epoch": 1.5702334514434315,
      "grad_norm": 2.921875,
      "learning_rate": 2.6477254141451906e-05,
      "loss": 0.8388,
      "step": 448030
    },
    {
      "epoch": 1.5702684989503273,
      "grad_norm": 2.734375,
      "learning_rate": 2.64766051127882e-05,
      "loss": 0.7938,
      "step": 448040
    },
    {
      "epoch": 1.5703035464572228,
      "grad_norm": 2.84375,
      "learning_rate": 2.64759560841245e-05,
      "loss": 0.786,
      "step": 448050
    },
    {
      "epoch": 1.5703385939641183,
      "grad_norm": 2.828125,
      "learning_rate": 2.6475307055460796e-05,
      "loss": 0.8222,
      "step": 448060
    },
    {
      "epoch": 1.570373641471014,
      "grad_norm": 2.671875,
      "learning_rate": 2.6474658026797094e-05,
      "loss": 0.6971,
      "step": 448070
    },
    {
      "epoch": 1.5704086889779094,
      "grad_norm": 3.078125,
      "learning_rate": 2.6474008998133392e-05,
      "loss": 0.7772,
      "step": 448080
    },
    {
      "epoch": 1.5704437364848052,
      "grad_norm": 3.28125,
      "learning_rate": 2.647335996946969e-05,
      "loss": 0.8421,
      "step": 448090
    },
    {
      "epoch": 1.5704787839917007,
      "grad_norm": 3.328125,
      "learning_rate": 2.647271094080599e-05,
      "loss": 0.9016,
      "step": 448100
    },
    {
      "epoch": 1.5705138314985962,
      "grad_norm": 2.5625,
      "learning_rate": 2.6472061912142286e-05,
      "loss": 0.8156,
      "step": 448110
    },
    {
      "epoch": 1.570548879005492,
      "grad_norm": 2.484375,
      "learning_rate": 2.6471412883478584e-05,
      "loss": 0.7676,
      "step": 448120
    },
    {
      "epoch": 1.5705839265123875,
      "grad_norm": 2.765625,
      "learning_rate": 2.6470763854814882e-05,
      "loss": 0.8516,
      "step": 448130
    },
    {
      "epoch": 1.570618974019283,
      "grad_norm": 3.3125,
      "learning_rate": 2.6470114826151184e-05,
      "loss": 0.8319,
      "step": 448140
    },
    {
      "epoch": 1.5706540215261788,
      "grad_norm": 2.984375,
      "learning_rate": 2.6469465797487482e-05,
      "loss": 0.8462,
      "step": 448150
    },
    {
      "epoch": 1.5706890690330744,
      "grad_norm": 3.046875,
      "learning_rate": 2.646881676882378e-05,
      "loss": 0.7629,
      "step": 448160
    },
    {
      "epoch": 1.57072411653997,
      "grad_norm": 2.640625,
      "learning_rate": 2.6468167740160078e-05,
      "loss": 0.7454,
      "step": 448170
    },
    {
      "epoch": 1.5707591640468657,
      "grad_norm": 2.875,
      "learning_rate": 2.6467518711496376e-05,
      "loss": 0.8437,
      "step": 448180
    },
    {
      "epoch": 1.570794211553761,
      "grad_norm": 2.703125,
      "learning_rate": 2.6466869682832674e-05,
      "loss": 0.8211,
      "step": 448190
    },
    {
      "epoch": 1.5708292590606567,
      "grad_norm": 2.796875,
      "learning_rate": 2.6466220654168972e-05,
      "loss": 0.7915,
      "step": 448200
    },
    {
      "epoch": 1.5708643065675523,
      "grad_norm": 2.59375,
      "learning_rate": 2.646557162550527e-05,
      "loss": 0.8036,
      "step": 448210
    },
    {
      "epoch": 1.5708993540744478,
      "grad_norm": 3.09375,
      "learning_rate": 2.6464922596841568e-05,
      "loss": 0.8279,
      "step": 448220
    },
    {
      "epoch": 1.5709344015813436,
      "grad_norm": 3.1875,
      "learning_rate": 2.6464273568177866e-05,
      "loss": 0.8793,
      "step": 448230
    },
    {
      "epoch": 1.570969449088239,
      "grad_norm": 2.625,
      "learning_rate": 2.6463624539514164e-05,
      "loss": 0.848,
      "step": 448240
    },
    {
      "epoch": 1.5710044965951346,
      "grad_norm": 2.703125,
      "learning_rate": 2.6462975510850462e-05,
      "loss": 0.7908,
      "step": 448250
    },
    {
      "epoch": 1.5710395441020304,
      "grad_norm": 2.796875,
      "learning_rate": 2.646232648218676e-05,
      "loss": 0.8342,
      "step": 448260
    },
    {
      "epoch": 1.571074591608926,
      "grad_norm": 2.84375,
      "learning_rate": 2.6461677453523058e-05,
      "loss": 0.8499,
      "step": 448270
    },
    {
      "epoch": 1.5711096391158215,
      "grad_norm": 2.609375,
      "learning_rate": 2.646102842485936e-05,
      "loss": 0.7705,
      "step": 448280
    },
    {
      "epoch": 1.5711446866227172,
      "grad_norm": 2.734375,
      "learning_rate": 2.6460379396195657e-05,
      "loss": 0.7833,
      "step": 448290
    },
    {
      "epoch": 1.5711797341296125,
      "grad_norm": 2.796875,
      "learning_rate": 2.6459730367531955e-05,
      "loss": 0.893,
      "step": 448300
    },
    {
      "epoch": 1.5712147816365083,
      "grad_norm": 2.71875,
      "learning_rate": 2.6459081338868253e-05,
      "loss": 0.8396,
      "step": 448310
    },
    {
      "epoch": 1.5712498291434038,
      "grad_norm": 2.8125,
      "learning_rate": 2.645843231020455e-05,
      "loss": 0.8302,
      "step": 448320
    },
    {
      "epoch": 1.5712848766502994,
      "grad_norm": 2.84375,
      "learning_rate": 2.645778328154085e-05,
      "loss": 0.76,
      "step": 448330
    },
    {
      "epoch": 1.5713199241571951,
      "grad_norm": 3.296875,
      "learning_rate": 2.6457134252877147e-05,
      "loss": 0.7832,
      "step": 448340
    },
    {
      "epoch": 1.5713549716640907,
      "grad_norm": 2.6875,
      "learning_rate": 2.6456485224213445e-05,
      "loss": 0.7979,
      "step": 448350
    },
    {
      "epoch": 1.5713900191709862,
      "grad_norm": 2.640625,
      "learning_rate": 2.6455836195549743e-05,
      "loss": 0.7955,
      "step": 448360
    },
    {
      "epoch": 1.571425066677882,
      "grad_norm": 2.953125,
      "learning_rate": 2.645518716688604e-05,
      "loss": 0.8309,
      "step": 448370
    },
    {
      "epoch": 1.5714601141847775,
      "grad_norm": 3.0625,
      "learning_rate": 2.645453813822234e-05,
      "loss": 0.8488,
      "step": 448380
    },
    {
      "epoch": 1.571495161691673,
      "grad_norm": 2.953125,
      "learning_rate": 2.6453889109558637e-05,
      "loss": 0.844,
      "step": 448390
    },
    {
      "epoch": 1.5715302091985688,
      "grad_norm": 3.09375,
      "learning_rate": 2.6453240080894935e-05,
      "loss": 0.8358,
      "step": 448400
    },
    {
      "epoch": 1.571565256705464,
      "grad_norm": 3.375,
      "learning_rate": 2.645259105223123e-05,
      "loss": 0.8167,
      "step": 448410
    },
    {
      "epoch": 1.5716003042123599,
      "grad_norm": 3.140625,
      "learning_rate": 2.6451942023567528e-05,
      "loss": 0.8789,
      "step": 448420
    },
    {
      "epoch": 1.5716353517192556,
      "grad_norm": 2.9375,
      "learning_rate": 2.6451292994903826e-05,
      "loss": 0.8622,
      "step": 448430
    },
    {
      "epoch": 1.571670399226151,
      "grad_norm": 3.265625,
      "learning_rate": 2.6450643966240124e-05,
      "loss": 0.9224,
      "step": 448440
    },
    {
      "epoch": 1.5717054467330467,
      "grad_norm": 2.84375,
      "learning_rate": 2.6449994937576422e-05,
      "loss": 0.7812,
      "step": 448450
    },
    {
      "epoch": 1.5717404942399422,
      "grad_norm": 3.03125,
      "learning_rate": 2.644934590891272e-05,
      "loss": 0.8198,
      "step": 448460
    },
    {
      "epoch": 1.5717755417468378,
      "grad_norm": 3.28125,
      "learning_rate": 2.6448696880249018e-05,
      "loss": 0.8524,
      "step": 448470
    },
    {
      "epoch": 1.5718105892537335,
      "grad_norm": 2.796875,
      "learning_rate": 2.6448047851585316e-05,
      "loss": 0.9288,
      "step": 448480
    },
    {
      "epoch": 1.571845636760629,
      "grad_norm": 2.734375,
      "learning_rate": 2.6447398822921614e-05,
      "loss": 0.9117,
      "step": 448490
    },
    {
      "epoch": 1.5718806842675246,
      "grad_norm": 3.09375,
      "learning_rate": 2.6446749794257912e-05,
      "loss": 0.8318,
      "step": 448500
    },
    {
      "epoch": 1.5719157317744203,
      "grad_norm": 2.921875,
      "learning_rate": 2.6446100765594213e-05,
      "loss": 0.8965,
      "step": 448510
    },
    {
      "epoch": 1.5719507792813157,
      "grad_norm": 2.703125,
      "learning_rate": 2.644545173693051e-05,
      "loss": 0.8307,
      "step": 448520
    },
    {
      "epoch": 1.5719858267882114,
      "grad_norm": 2.78125,
      "learning_rate": 2.644480270826681e-05,
      "loss": 0.8041,
      "step": 448530
    },
    {
      "epoch": 1.5720208742951072,
      "grad_norm": 3.34375,
      "learning_rate": 2.6444153679603107e-05,
      "loss": 0.8863,
      "step": 448540
    },
    {
      "epoch": 1.5720559218020025,
      "grad_norm": 2.59375,
      "learning_rate": 2.6443504650939405e-05,
      "loss": 0.8312,
      "step": 448550
    },
    {
      "epoch": 1.5720909693088982,
      "grad_norm": 2.96875,
      "learning_rate": 2.6442855622275703e-05,
      "loss": 0.8823,
      "step": 448560
    },
    {
      "epoch": 1.5721260168157938,
      "grad_norm": 2.859375,
      "learning_rate": 2.6442206593612e-05,
      "loss": 0.8714,
      "step": 448570
    },
    {
      "epoch": 1.5721610643226893,
      "grad_norm": 3.03125,
      "learning_rate": 2.64415575649483e-05,
      "loss": 0.8571,
      "step": 448580
    },
    {
      "epoch": 1.572196111829585,
      "grad_norm": 3.0625,
      "learning_rate": 2.6440908536284597e-05,
      "loss": 0.8076,
      "step": 448590
    },
    {
      "epoch": 1.5722311593364806,
      "grad_norm": 2.8125,
      "learning_rate": 2.6440259507620895e-05,
      "loss": 0.8586,
      "step": 448600
    },
    {
      "epoch": 1.5722662068433761,
      "grad_norm": 3.0,
      "learning_rate": 2.6439610478957193e-05,
      "loss": 0.8621,
      "step": 448610
    },
    {
      "epoch": 1.572301254350272,
      "grad_norm": 3.0,
      "learning_rate": 2.643896145029349e-05,
      "loss": 0.8434,
      "step": 448620
    },
    {
      "epoch": 1.5723363018571674,
      "grad_norm": 2.96875,
      "learning_rate": 2.643831242162979e-05,
      "loss": 0.9471,
      "step": 448630
    },
    {
      "epoch": 1.572371349364063,
      "grad_norm": 2.59375,
      "learning_rate": 2.6437663392966087e-05,
      "loss": 0.8442,
      "step": 448640
    },
    {
      "epoch": 1.5724063968709587,
      "grad_norm": 2.875,
      "learning_rate": 2.643701436430239e-05,
      "loss": 0.9138,
      "step": 448650
    },
    {
      "epoch": 1.572441444377854,
      "grad_norm": 2.9375,
      "learning_rate": 2.6436365335638687e-05,
      "loss": 0.8139,
      "step": 448660
    },
    {
      "epoch": 1.5724764918847498,
      "grad_norm": 2.96875,
      "learning_rate": 2.6435716306974985e-05,
      "loss": 0.8847,
      "step": 448670
    },
    {
      "epoch": 1.5725115393916453,
      "grad_norm": 2.71875,
      "learning_rate": 2.6435067278311283e-05,
      "loss": 0.8595,
      "step": 448680
    },
    {
      "epoch": 1.5725465868985409,
      "grad_norm": 2.6875,
      "learning_rate": 2.643441824964758e-05,
      "loss": 0.8578,
      "step": 448690
    },
    {
      "epoch": 1.5725816344054366,
      "grad_norm": 2.453125,
      "learning_rate": 2.643376922098388e-05,
      "loss": 0.8661,
      "step": 448700
    },
    {
      "epoch": 1.5726166819123322,
      "grad_norm": 2.6875,
      "learning_rate": 2.6433120192320177e-05,
      "loss": 0.8461,
      "step": 448710
    },
    {
      "epoch": 1.5726517294192277,
      "grad_norm": 3.0625,
      "learning_rate": 2.6432471163656475e-05,
      "loss": 0.8224,
      "step": 448720
    },
    {
      "epoch": 1.5726867769261235,
      "grad_norm": 3.125,
      "learning_rate": 2.6431822134992773e-05,
      "loss": 0.8528,
      "step": 448730
    },
    {
      "epoch": 1.572721824433019,
      "grad_norm": 2.9375,
      "learning_rate": 2.643117310632907e-05,
      "loss": 0.8515,
      "step": 448740
    },
    {
      "epoch": 1.5727568719399145,
      "grad_norm": 2.828125,
      "learning_rate": 2.643052407766537e-05,
      "loss": 0.814,
      "step": 448750
    },
    {
      "epoch": 1.5727919194468103,
      "grad_norm": 2.796875,
      "learning_rate": 2.6429875049001667e-05,
      "loss": 0.8657,
      "step": 448760
    },
    {
      "epoch": 1.5728269669537056,
      "grad_norm": 2.96875,
      "learning_rate": 2.6429226020337965e-05,
      "loss": 0.8087,
      "step": 448770
    },
    {
      "epoch": 1.5728620144606014,
      "grad_norm": 2.65625,
      "learning_rate": 2.6428576991674266e-05,
      "loss": 0.9008,
      "step": 448780
    },
    {
      "epoch": 1.572897061967497,
      "grad_norm": 3.09375,
      "learning_rate": 2.6427927963010557e-05,
      "loss": 0.9215,
      "step": 448790
    },
    {
      "epoch": 1.5729321094743924,
      "grad_norm": 3.25,
      "learning_rate": 2.6427278934346855e-05,
      "loss": 0.8248,
      "step": 448800
    },
    {
      "epoch": 1.5729671569812882,
      "grad_norm": 2.6875,
      "learning_rate": 2.6426629905683153e-05,
      "loss": 0.8298,
      "step": 448810
    },
    {
      "epoch": 1.5730022044881837,
      "grad_norm": 2.9375,
      "learning_rate": 2.642598087701945e-05,
      "loss": 0.8614,
      "step": 448820
    },
    {
      "epoch": 1.5730372519950793,
      "grad_norm": 2.8125,
      "learning_rate": 2.642533184835575e-05,
      "loss": 0.8522,
      "step": 448830
    },
    {
      "epoch": 1.573072299501975,
      "grad_norm": 3.34375,
      "learning_rate": 2.6424682819692047e-05,
      "loss": 0.9352,
      "step": 448840
    },
    {
      "epoch": 1.5731073470088706,
      "grad_norm": 2.640625,
      "learning_rate": 2.6424033791028345e-05,
      "loss": 0.7727,
      "step": 448850
    },
    {
      "epoch": 1.573142394515766,
      "grad_norm": 3.3125,
      "learning_rate": 2.6423384762364643e-05,
      "loss": 0.9056,
      "step": 448860
    },
    {
      "epoch": 1.5731774420226619,
      "grad_norm": 3.25,
      "learning_rate": 2.6422735733700945e-05,
      "loss": 0.9048,
      "step": 448870
    },
    {
      "epoch": 1.5732124895295572,
      "grad_norm": 2.84375,
      "learning_rate": 2.6422086705037243e-05,
      "loss": 0.816,
      "step": 448880
    },
    {
      "epoch": 1.573247537036453,
      "grad_norm": 3.234375,
      "learning_rate": 2.642143767637354e-05,
      "loss": 0.8985,
      "step": 448890
    },
    {
      "epoch": 1.5732825845433485,
      "grad_norm": 3.421875,
      "learning_rate": 2.642078864770984e-05,
      "loss": 0.9111,
      "step": 448900
    },
    {
      "epoch": 1.573317632050244,
      "grad_norm": 2.8125,
      "learning_rate": 2.6420139619046137e-05,
      "loss": 0.8583,
      "step": 448910
    },
    {
      "epoch": 1.5733526795571398,
      "grad_norm": 2.734375,
      "learning_rate": 2.6419490590382435e-05,
      "loss": 0.9091,
      "step": 448920
    },
    {
      "epoch": 1.5733877270640353,
      "grad_norm": 3.25,
      "learning_rate": 2.6418841561718733e-05,
      "loss": 0.7662,
      "step": 448930
    },
    {
      "epoch": 1.5734227745709308,
      "grad_norm": 2.859375,
      "learning_rate": 2.641819253305503e-05,
      "loss": 0.7224,
      "step": 448940
    },
    {
      "epoch": 1.5734578220778266,
      "grad_norm": 3.140625,
      "learning_rate": 2.641754350439133e-05,
      "loss": 0.9125,
      "step": 448950
    },
    {
      "epoch": 1.5734928695847221,
      "grad_norm": 3.015625,
      "learning_rate": 2.6416894475727627e-05,
      "loss": 0.737,
      "step": 448960
    },
    {
      "epoch": 1.5735279170916177,
      "grad_norm": 3.203125,
      "learning_rate": 2.6416245447063925e-05,
      "loss": 0.8609,
      "step": 448970
    },
    {
      "epoch": 1.5735629645985134,
      "grad_norm": 3.015625,
      "learning_rate": 2.6415596418400223e-05,
      "loss": 0.8151,
      "step": 448980
    },
    {
      "epoch": 1.5735980121054087,
      "grad_norm": 2.734375,
      "learning_rate": 2.641494738973652e-05,
      "loss": 0.8363,
      "step": 448990
    },
    {
      "epoch": 1.5736330596123045,
      "grad_norm": 3.328125,
      "learning_rate": 2.641429836107282e-05,
      "loss": 0.8374,
      "step": 449000
    },
    {
      "epoch": 1.5736681071192,
      "grad_norm": 2.5625,
      "learning_rate": 2.641364933240912e-05,
      "loss": 0.7752,
      "step": 449010
    },
    {
      "epoch": 1.5737031546260956,
      "grad_norm": 2.96875,
      "learning_rate": 2.6413000303745418e-05,
      "loss": 0.7802,
      "step": 449020
    },
    {
      "epoch": 1.5737382021329913,
      "grad_norm": 2.65625,
      "learning_rate": 2.6412351275081716e-05,
      "loss": 0.7968,
      "step": 449030
    },
    {
      "epoch": 1.5737732496398869,
      "grad_norm": 2.6875,
      "learning_rate": 2.6411702246418014e-05,
      "loss": 0.8593,
      "step": 449040
    },
    {
      "epoch": 1.5738082971467824,
      "grad_norm": 3.0,
      "learning_rate": 2.6411053217754312e-05,
      "loss": 0.9488,
      "step": 449050
    },
    {
      "epoch": 1.5738433446536781,
      "grad_norm": 2.546875,
      "learning_rate": 2.641040418909061e-05,
      "loss": 0.8123,
      "step": 449060
    },
    {
      "epoch": 1.5738783921605737,
      "grad_norm": 2.765625,
      "learning_rate": 2.6409755160426908e-05,
      "loss": 0.8286,
      "step": 449070
    },
    {
      "epoch": 1.5739134396674692,
      "grad_norm": 2.75,
      "learning_rate": 2.6409106131763206e-05,
      "loss": 0.9357,
      "step": 449080
    },
    {
      "epoch": 1.573948487174365,
      "grad_norm": 3.234375,
      "learning_rate": 2.6408457103099504e-05,
      "loss": 0.8721,
      "step": 449090
    },
    {
      "epoch": 1.5739835346812603,
      "grad_norm": 2.890625,
      "learning_rate": 2.6407808074435802e-05,
      "loss": 0.938,
      "step": 449100
    },
    {
      "epoch": 1.574018582188156,
      "grad_norm": 2.671875,
      "learning_rate": 2.64071590457721e-05,
      "loss": 0.8225,
      "step": 449110
    },
    {
      "epoch": 1.5740536296950518,
      "grad_norm": 3.609375,
      "learning_rate": 2.6406510017108398e-05,
      "loss": 0.8461,
      "step": 449120
    },
    {
      "epoch": 1.5740886772019471,
      "grad_norm": 2.9375,
      "learning_rate": 2.6405860988444696e-05,
      "loss": 0.8674,
      "step": 449130
    },
    {
      "epoch": 1.5741237247088429,
      "grad_norm": 2.921875,
      "learning_rate": 2.6405211959780994e-05,
      "loss": 0.7523,
      "step": 449140
    },
    {
      "epoch": 1.5741587722157384,
      "grad_norm": 2.625,
      "learning_rate": 2.6404562931117295e-05,
      "loss": 0.8216,
      "step": 449150
    },
    {
      "epoch": 1.574193819722634,
      "grad_norm": 2.953125,
      "learning_rate": 2.6403913902453593e-05,
      "loss": 0.8154,
      "step": 449160
    },
    {
      "epoch": 1.5742288672295297,
      "grad_norm": 3.0,
      "learning_rate": 2.6403264873789885e-05,
      "loss": 0.8663,
      "step": 449170
    },
    {
      "epoch": 1.5742639147364252,
      "grad_norm": 3.109375,
      "learning_rate": 2.6402615845126183e-05,
      "loss": 0.8123,
      "step": 449180
    },
    {
      "epoch": 1.5742989622433208,
      "grad_norm": 2.6875,
      "learning_rate": 2.640196681646248e-05,
      "loss": 0.7711,
      "step": 449190
    },
    {
      "epoch": 1.5743340097502165,
      "grad_norm": 2.625,
      "learning_rate": 2.640131778779878e-05,
      "loss": 0.8274,
      "step": 449200
    },
    {
      "epoch": 1.574369057257112,
      "grad_norm": 3.015625,
      "learning_rate": 2.6400668759135077e-05,
      "loss": 0.8027,
      "step": 449210
    },
    {
      "epoch": 1.5744041047640076,
      "grad_norm": 3.6875,
      "learning_rate": 2.6400019730471375e-05,
      "loss": 0.7499,
      "step": 449220
    },
    {
      "epoch": 1.5744391522709034,
      "grad_norm": 2.78125,
      "learning_rate": 2.6399370701807673e-05,
      "loss": 0.8657,
      "step": 449230
    },
    {
      "epoch": 1.5744741997777987,
      "grad_norm": 3.0,
      "learning_rate": 2.6398721673143974e-05,
      "loss": 0.8132,
      "step": 449240
    },
    {
      "epoch": 1.5745092472846944,
      "grad_norm": 3.296875,
      "learning_rate": 2.6398072644480272e-05,
      "loss": 0.7924,
      "step": 449250
    },
    {
      "epoch": 1.57454429479159,
      "grad_norm": 2.546875,
      "learning_rate": 2.639742361581657e-05,
      "loss": 0.7727,
      "step": 449260
    },
    {
      "epoch": 1.5745793422984855,
      "grad_norm": 3.15625,
      "learning_rate": 2.6396774587152868e-05,
      "loss": 0.8557,
      "step": 449270
    },
    {
      "epoch": 1.5746143898053813,
      "grad_norm": 3.03125,
      "learning_rate": 2.6396125558489166e-05,
      "loss": 0.7399,
      "step": 449280
    },
    {
      "epoch": 1.5746494373122768,
      "grad_norm": 2.96875,
      "learning_rate": 2.6395476529825464e-05,
      "loss": 0.7807,
      "step": 449290
    },
    {
      "epoch": 1.5746844848191723,
      "grad_norm": 2.71875,
      "learning_rate": 2.6394827501161762e-05,
      "loss": 0.7117,
      "step": 449300
    },
    {
      "epoch": 1.574719532326068,
      "grad_norm": 2.78125,
      "learning_rate": 2.639417847249806e-05,
      "loss": 0.773,
      "step": 449310
    },
    {
      "epoch": 1.5747545798329636,
      "grad_norm": 3.046875,
      "learning_rate": 2.6393529443834358e-05,
      "loss": 0.8438,
      "step": 449320
    },
    {
      "epoch": 1.5747896273398592,
      "grad_norm": 2.828125,
      "learning_rate": 2.6392880415170656e-05,
      "loss": 0.859,
      "step": 449330
    },
    {
      "epoch": 1.574824674846755,
      "grad_norm": 2.859375,
      "learning_rate": 2.6392231386506954e-05,
      "loss": 0.7936,
      "step": 449340
    },
    {
      "epoch": 1.5748597223536502,
      "grad_norm": 3.109375,
      "learning_rate": 2.6391582357843252e-05,
      "loss": 0.7525,
      "step": 449350
    },
    {
      "epoch": 1.574894769860546,
      "grad_norm": 3.015625,
      "learning_rate": 2.639093332917955e-05,
      "loss": 0.7926,
      "step": 449360
    },
    {
      "epoch": 1.5749298173674415,
      "grad_norm": 2.734375,
      "learning_rate": 2.6390284300515848e-05,
      "loss": 0.8233,
      "step": 449370
    },
    {
      "epoch": 1.574964864874337,
      "grad_norm": 2.765625,
      "learning_rate": 2.638963527185215e-05,
      "loss": 0.8678,
      "step": 449380
    },
    {
      "epoch": 1.5749999123812328,
      "grad_norm": 3.09375,
      "learning_rate": 2.6388986243188447e-05,
      "loss": 0.8558,
      "step": 449390
    },
    {
      "epoch": 1.5750349598881284,
      "grad_norm": 2.46875,
      "learning_rate": 2.6388337214524745e-05,
      "loss": 0.8426,
      "step": 449400
    },
    {
      "epoch": 1.575070007395024,
      "grad_norm": 3.046875,
      "learning_rate": 2.6387688185861043e-05,
      "loss": 0.8915,
      "step": 449410
    },
    {
      "epoch": 1.5751050549019197,
      "grad_norm": 3.0625,
      "learning_rate": 2.638703915719734e-05,
      "loss": 0.8459,
      "step": 449420
    },
    {
      "epoch": 1.5751401024088152,
      "grad_norm": 2.59375,
      "learning_rate": 2.638639012853364e-05,
      "loss": 0.8335,
      "step": 449430
    },
    {
      "epoch": 1.5751751499157107,
      "grad_norm": 3.078125,
      "learning_rate": 2.6385741099869937e-05,
      "loss": 0.8526,
      "step": 449440
    },
    {
      "epoch": 1.5752101974226065,
      "grad_norm": 2.8125,
      "learning_rate": 2.6385092071206235e-05,
      "loss": 0.8541,
      "step": 449450
    },
    {
      "epoch": 1.5752452449295018,
      "grad_norm": 3.203125,
      "learning_rate": 2.6384443042542533e-05,
      "loss": 0.8472,
      "step": 449460
    },
    {
      "epoch": 1.5752802924363976,
      "grad_norm": 2.6875,
      "learning_rate": 2.638379401387883e-05,
      "loss": 0.8878,
      "step": 449470
    },
    {
      "epoch": 1.575315339943293,
      "grad_norm": 2.90625,
      "learning_rate": 2.638314498521513e-05,
      "loss": 0.8407,
      "step": 449480
    },
    {
      "epoch": 1.5753503874501886,
      "grad_norm": 2.875,
      "learning_rate": 2.6382495956551427e-05,
      "loss": 0.8251,
      "step": 449490
    },
    {
      "epoch": 1.5753854349570844,
      "grad_norm": 2.90625,
      "learning_rate": 2.6381846927887725e-05,
      "loss": 0.8074,
      "step": 449500
    },
    {
      "epoch": 1.57542048246398,
      "grad_norm": 2.984375,
      "learning_rate": 2.6381197899224023e-05,
      "loss": 0.8664,
      "step": 449510
    },
    {
      "epoch": 1.5754555299708755,
      "grad_norm": 2.984375,
      "learning_rate": 2.6380548870560325e-05,
      "loss": 0.7952,
      "step": 449520
    },
    {
      "epoch": 1.5754905774777712,
      "grad_norm": 3.296875,
      "learning_rate": 2.6379899841896623e-05,
      "loss": 0.7968,
      "step": 449530
    },
    {
      "epoch": 1.5755256249846668,
      "grad_norm": 2.984375,
      "learning_rate": 2.6379250813232914e-05,
      "loss": 0.881,
      "step": 449540
    },
    {
      "epoch": 1.5755606724915623,
      "grad_norm": 3.140625,
      "learning_rate": 2.6378601784569212e-05,
      "loss": 0.8775,
      "step": 449550
    },
    {
      "epoch": 1.575595719998458,
      "grad_norm": 3.0625,
      "learning_rate": 2.637795275590551e-05,
      "loss": 0.8408,
      "step": 449560
    },
    {
      "epoch": 1.5756307675053534,
      "grad_norm": 3.375,
      "learning_rate": 2.6377303727241808e-05,
      "loss": 0.925,
      "step": 449570
    },
    {
      "epoch": 1.5756658150122491,
      "grad_norm": 2.90625,
      "learning_rate": 2.6376654698578106e-05,
      "loss": 0.789,
      "step": 449580
    },
    {
      "epoch": 1.5757008625191447,
      "grad_norm": 2.828125,
      "learning_rate": 2.6376005669914404e-05,
      "loss": 0.8054,
      "step": 449590
    },
    {
      "epoch": 1.5757359100260402,
      "grad_norm": 3.375,
      "learning_rate": 2.6375356641250702e-05,
      "loss": 0.8367,
      "step": 449600
    },
    {
      "epoch": 1.575770957532936,
      "grad_norm": 3.078125,
      "learning_rate": 2.6374707612587003e-05,
      "loss": 0.7902,
      "step": 449610
    },
    {
      "epoch": 1.5758060050398315,
      "grad_norm": 3.15625,
      "learning_rate": 2.63740585839233e-05,
      "loss": 0.8652,
      "step": 449620
    },
    {
      "epoch": 1.575841052546727,
      "grad_norm": 3.046875,
      "learning_rate": 2.63734095552596e-05,
      "loss": 0.8251,
      "step": 449630
    },
    {
      "epoch": 1.5758761000536228,
      "grad_norm": 3.5,
      "learning_rate": 2.6372760526595897e-05,
      "loss": 0.8561,
      "step": 449640
    },
    {
      "epoch": 1.5759111475605183,
      "grad_norm": 2.734375,
      "learning_rate": 2.6372111497932195e-05,
      "loss": 0.874,
      "step": 449650
    },
    {
      "epoch": 1.5759461950674138,
      "grad_norm": 2.984375,
      "learning_rate": 2.6371462469268493e-05,
      "loss": 0.7383,
      "step": 449660
    },
    {
      "epoch": 1.5759812425743096,
      "grad_norm": 3.0625,
      "learning_rate": 2.637081344060479e-05,
      "loss": 0.8138,
      "step": 449670
    },
    {
      "epoch": 1.576016290081205,
      "grad_norm": 2.875,
      "learning_rate": 2.637016441194109e-05,
      "loss": 0.8561,
      "step": 449680
    },
    {
      "epoch": 1.5760513375881007,
      "grad_norm": 3.203125,
      "learning_rate": 2.6369515383277387e-05,
      "loss": 0.8972,
      "step": 449690
    },
    {
      "epoch": 1.5760863850949964,
      "grad_norm": 2.75,
      "learning_rate": 2.6368866354613685e-05,
      "loss": 0.8293,
      "step": 449700
    },
    {
      "epoch": 1.5761214326018917,
      "grad_norm": 2.859375,
      "learning_rate": 2.6368217325949983e-05,
      "loss": 0.8618,
      "step": 449710
    },
    {
      "epoch": 1.5761564801087875,
      "grad_norm": 3.1875,
      "learning_rate": 2.636756829728628e-05,
      "loss": 0.8597,
      "step": 449720
    },
    {
      "epoch": 1.576191527615683,
      "grad_norm": 3.1875,
      "learning_rate": 2.636691926862258e-05,
      "loss": 0.8243,
      "step": 449730
    },
    {
      "epoch": 1.5762265751225786,
      "grad_norm": 2.75,
      "learning_rate": 2.636627023995888e-05,
      "loss": 0.8902,
      "step": 449740
    },
    {
      "epoch": 1.5762616226294743,
      "grad_norm": 2.640625,
      "learning_rate": 2.636562121129518e-05,
      "loss": 0.8405,
      "step": 449750
    },
    {
      "epoch": 1.5762966701363699,
      "grad_norm": 3.03125,
      "learning_rate": 2.6364972182631477e-05,
      "loss": 0.8162,
      "step": 449760
    },
    {
      "epoch": 1.5763317176432654,
      "grad_norm": 3.0,
      "learning_rate": 2.6364323153967775e-05,
      "loss": 0.8083,
      "step": 449770
    },
    {
      "epoch": 1.5763667651501612,
      "grad_norm": 3.03125,
      "learning_rate": 2.6363674125304073e-05,
      "loss": 0.7938,
      "step": 449780
    },
    {
      "epoch": 1.5764018126570565,
      "grad_norm": 3.171875,
      "learning_rate": 2.636302509664037e-05,
      "loss": 0.8402,
      "step": 449790
    },
    {
      "epoch": 1.5764368601639522,
      "grad_norm": 2.765625,
      "learning_rate": 2.636237606797667e-05,
      "loss": 0.7864,
      "step": 449800
    },
    {
      "epoch": 1.576471907670848,
      "grad_norm": 3.109375,
      "learning_rate": 2.6361727039312967e-05,
      "loss": 0.9002,
      "step": 449810
    },
    {
      "epoch": 1.5765069551777433,
      "grad_norm": 2.734375,
      "learning_rate": 2.6361078010649265e-05,
      "loss": 0.9218,
      "step": 449820
    },
    {
      "epoch": 1.576542002684639,
      "grad_norm": 2.578125,
      "learning_rate": 2.6360428981985563e-05,
      "loss": 0.8362,
      "step": 449830
    },
    {
      "epoch": 1.5765770501915346,
      "grad_norm": 2.625,
      "learning_rate": 2.635977995332186e-05,
      "loss": 0.7994,
      "step": 449840
    },
    {
      "epoch": 1.5766120976984301,
      "grad_norm": 2.71875,
      "learning_rate": 2.635913092465816e-05,
      "loss": 0.8118,
      "step": 449850
    },
    {
      "epoch": 1.576647145205326,
      "grad_norm": 3.171875,
      "learning_rate": 2.6358481895994457e-05,
      "loss": 0.8747,
      "step": 449860
    },
    {
      "epoch": 1.5766821927122214,
      "grad_norm": 2.5625,
      "learning_rate": 2.6357832867330755e-05,
      "loss": 0.8907,
      "step": 449870
    },
    {
      "epoch": 1.576717240219117,
      "grad_norm": 2.671875,
      "learning_rate": 2.6357183838667056e-05,
      "loss": 0.8388,
      "step": 449880
    },
    {
      "epoch": 1.5767522877260127,
      "grad_norm": 3.015625,
      "learning_rate": 2.6356534810003354e-05,
      "loss": 0.8653,
      "step": 449890
    },
    {
      "epoch": 1.5767873352329083,
      "grad_norm": 2.59375,
      "learning_rate": 2.6355885781339652e-05,
      "loss": 0.8221,
      "step": 449900
    },
    {
      "epoch": 1.5768223827398038,
      "grad_norm": 2.953125,
      "learning_rate": 2.635523675267595e-05,
      "loss": 0.8406,
      "step": 449910
    },
    {
      "epoch": 1.5768574302466996,
      "grad_norm": 2.765625,
      "learning_rate": 2.635458772401224e-05,
      "loss": 0.8283,
      "step": 449920
    },
    {
      "epoch": 1.5768924777535949,
      "grad_norm": 2.984375,
      "learning_rate": 2.635393869534854e-05,
      "loss": 0.8406,
      "step": 449930
    },
    {
      "epoch": 1.5769275252604906,
      "grad_norm": 3.09375,
      "learning_rate": 2.6353289666684837e-05,
      "loss": 0.7732,
      "step": 449940
    },
    {
      "epoch": 1.5769625727673862,
      "grad_norm": 3.515625,
      "learning_rate": 2.6352640638021135e-05,
      "loss": 0.8749,
      "step": 449950
    },
    {
      "epoch": 1.5769976202742817,
      "grad_norm": 3.171875,
      "learning_rate": 2.6351991609357433e-05,
      "loss": 0.9396,
      "step": 449960
    },
    {
      "epoch": 1.5770326677811775,
      "grad_norm": 2.484375,
      "learning_rate": 2.6351342580693735e-05,
      "loss": 0.7695,
      "step": 449970
    },
    {
      "epoch": 1.577067715288073,
      "grad_norm": 3.09375,
      "learning_rate": 2.6350693552030033e-05,
      "loss": 0.8,
      "step": 449980
    },
    {
      "epoch": 1.5771027627949685,
      "grad_norm": 2.234375,
      "learning_rate": 2.635004452336633e-05,
      "loss": 0.7929,
      "step": 449990
    },
    {
      "epoch": 1.5771378103018643,
      "grad_norm": 3.140625,
      "learning_rate": 2.634939549470263e-05,
      "loss": 0.8461,
      "step": 450000
    },
    {
      "epoch": 1.5771378103018643,
      "eval_loss": 0.7874642610549927,
      "eval_runtime": 554.0984,
      "eval_samples_per_second": 686.586,
      "eval_steps_per_second": 57.215,
      "step": 450000
    },
    {
      "epoch": 1.5771728578087598,
      "grad_norm": 3.03125,
      "learning_rate": 2.6348746466038927e-05,
      "loss": 0.8103,
      "step": 450010
    },
    {
      "epoch": 1.5772079053156554,
      "grad_norm": 2.46875,
      "learning_rate": 2.6348097437375225e-05,
      "loss": 0.7247,
      "step": 450020
    },
    {
      "epoch": 1.5772429528225511,
      "grad_norm": 3.265625,
      "learning_rate": 2.6347448408711523e-05,
      "loss": 0.9006,
      "step": 450030
    },
    {
      "epoch": 1.5772780003294464,
      "grad_norm": 2.875,
      "learning_rate": 2.634679938004782e-05,
      "loss": 0.8262,
      "step": 450040
    },
    {
      "epoch": 1.5773130478363422,
      "grad_norm": 2.703125,
      "learning_rate": 2.634615035138412e-05,
      "loss": 0.899,
      "step": 450050
    },
    {
      "epoch": 1.5773480953432377,
      "grad_norm": 2.828125,
      "learning_rate": 2.6345501322720417e-05,
      "loss": 0.8809,
      "step": 450060
    },
    {
      "epoch": 1.5773831428501333,
      "grad_norm": 3.125,
      "learning_rate": 2.6344852294056715e-05,
      "loss": 0.8472,
      "step": 450070
    },
    {
      "epoch": 1.577418190357029,
      "grad_norm": 3.71875,
      "learning_rate": 2.6344203265393013e-05,
      "loss": 0.883,
      "step": 450080
    },
    {
      "epoch": 1.5774532378639246,
      "grad_norm": 3.75,
      "learning_rate": 2.634355423672931e-05,
      "loss": 0.8372,
      "step": 450090
    },
    {
      "epoch": 1.57748828537082,
      "grad_norm": 3.015625,
      "learning_rate": 2.634290520806561e-05,
      "loss": 0.8957,
      "step": 450100
    },
    {
      "epoch": 1.5775233328777158,
      "grad_norm": 2.359375,
      "learning_rate": 2.634225617940191e-05,
      "loss": 0.8352,
      "step": 450110
    },
    {
      "epoch": 1.5775583803846114,
      "grad_norm": 2.78125,
      "learning_rate": 2.6341607150738208e-05,
      "loss": 0.8559,
      "step": 450120
    },
    {
      "epoch": 1.577593427891507,
      "grad_norm": 2.671875,
      "learning_rate": 2.6340958122074506e-05,
      "loss": 0.7736,
      "step": 450130
    },
    {
      "epoch": 1.5776284753984027,
      "grad_norm": 2.984375,
      "learning_rate": 2.6340309093410804e-05,
      "loss": 0.9422,
      "step": 450140
    },
    {
      "epoch": 1.577663522905298,
      "grad_norm": 2.625,
      "learning_rate": 2.6339660064747102e-05,
      "loss": 0.8279,
      "step": 450150
    },
    {
      "epoch": 1.5776985704121937,
      "grad_norm": 3.140625,
      "learning_rate": 2.63390110360834e-05,
      "loss": 0.7965,
      "step": 450160
    },
    {
      "epoch": 1.5777336179190893,
      "grad_norm": 3.171875,
      "learning_rate": 2.6338362007419698e-05,
      "loss": 0.861,
      "step": 450170
    },
    {
      "epoch": 1.5777686654259848,
      "grad_norm": 3.125,
      "learning_rate": 2.6337712978755996e-05,
      "loss": 0.7939,
      "step": 450180
    },
    {
      "epoch": 1.5778037129328806,
      "grad_norm": 3.25,
      "learning_rate": 2.6337063950092294e-05,
      "loss": 0.8931,
      "step": 450190
    },
    {
      "epoch": 1.5778387604397761,
      "grad_norm": 2.65625,
      "learning_rate": 2.6336414921428592e-05,
      "loss": 0.7585,
      "step": 450200
    },
    {
      "epoch": 1.5778738079466716,
      "grad_norm": 2.828125,
      "learning_rate": 2.633576589276489e-05,
      "loss": 0.771,
      "step": 450210
    },
    {
      "epoch": 1.5779088554535674,
      "grad_norm": 3.140625,
      "learning_rate": 2.6335116864101188e-05,
      "loss": 0.8507,
      "step": 450220
    },
    {
      "epoch": 1.577943902960463,
      "grad_norm": 2.640625,
      "learning_rate": 2.6334467835437486e-05,
      "loss": 0.8551,
      "step": 450230
    },
    {
      "epoch": 1.5779789504673585,
      "grad_norm": 3.15625,
      "learning_rate": 2.6333818806773784e-05,
      "loss": 0.8953,
      "step": 450240
    },
    {
      "epoch": 1.5780139979742542,
      "grad_norm": 3.28125,
      "learning_rate": 2.6333169778110085e-05,
      "loss": 0.8263,
      "step": 450250
    },
    {
      "epoch": 1.5780490454811495,
      "grad_norm": 2.78125,
      "learning_rate": 2.6332520749446383e-05,
      "loss": 0.8296,
      "step": 450260
    },
    {
      "epoch": 1.5780840929880453,
      "grad_norm": 2.96875,
      "learning_rate": 2.633187172078268e-05,
      "loss": 0.9137,
      "step": 450270
    },
    {
      "epoch": 1.5781191404949408,
      "grad_norm": 3.109375,
      "learning_rate": 2.633122269211898e-05,
      "loss": 0.8313,
      "step": 450280
    },
    {
      "epoch": 1.5781541880018364,
      "grad_norm": 2.640625,
      "learning_rate": 2.6330573663455277e-05,
      "loss": 0.8416,
      "step": 450290
    },
    {
      "epoch": 1.5781892355087321,
      "grad_norm": 3.15625,
      "learning_rate": 2.632992463479157e-05,
      "loss": 0.8218,
      "step": 450300
    },
    {
      "epoch": 1.5782242830156277,
      "grad_norm": 2.96875,
      "learning_rate": 2.6329275606127867e-05,
      "loss": 0.8736,
      "step": 450310
    },
    {
      "epoch": 1.5782593305225232,
      "grad_norm": 2.640625,
      "learning_rate": 2.6328626577464165e-05,
      "loss": 0.9243,
      "step": 450320
    },
    {
      "epoch": 1.578294378029419,
      "grad_norm": 2.6875,
      "learning_rate": 2.6327977548800463e-05,
      "loss": 0.8354,
      "step": 450330
    },
    {
      "epoch": 1.5783294255363145,
      "grad_norm": 2.96875,
      "learning_rate": 2.6327328520136764e-05,
      "loss": 0.8647,
      "step": 450340
    },
    {
      "epoch": 1.57836447304321,
      "grad_norm": 2.984375,
      "learning_rate": 2.6326679491473062e-05,
      "loss": 0.9192,
      "step": 450350
    },
    {
      "epoch": 1.5783995205501058,
      "grad_norm": 2.6875,
      "learning_rate": 2.632603046280936e-05,
      "loss": 0.8427,
      "step": 450360
    },
    {
      "epoch": 1.578434568057001,
      "grad_norm": 2.796875,
      "learning_rate": 2.6325381434145658e-05,
      "loss": 0.8148,
      "step": 450370
    },
    {
      "epoch": 1.5784696155638969,
      "grad_norm": 2.796875,
      "learning_rate": 2.6324732405481956e-05,
      "loss": 0.8742,
      "step": 450380
    },
    {
      "epoch": 1.5785046630707926,
      "grad_norm": 2.671875,
      "learning_rate": 2.6324083376818254e-05,
      "loss": 0.8589,
      "step": 450390
    },
    {
      "epoch": 1.578539710577688,
      "grad_norm": 3.203125,
      "learning_rate": 2.6323434348154552e-05,
      "loss": 0.8902,
      "step": 450400
    },
    {
      "epoch": 1.5785747580845837,
      "grad_norm": 3.15625,
      "learning_rate": 2.632278531949085e-05,
      "loss": 0.8717,
      "step": 450410
    },
    {
      "epoch": 1.5786098055914792,
      "grad_norm": 3.109375,
      "learning_rate": 2.6322136290827148e-05,
      "loss": 0.9382,
      "step": 450420
    },
    {
      "epoch": 1.5786448530983748,
      "grad_norm": 2.875,
      "learning_rate": 2.6321487262163446e-05,
      "loss": 0.861,
      "step": 450430
    },
    {
      "epoch": 1.5786799006052705,
      "grad_norm": 2.84375,
      "learning_rate": 2.6320838233499744e-05,
      "loss": 0.8244,
      "step": 450440
    },
    {
      "epoch": 1.578714948112166,
      "grad_norm": 2.4375,
      "learning_rate": 2.6320189204836042e-05,
      "loss": 0.8064,
      "step": 450450
    },
    {
      "epoch": 1.5787499956190616,
      "grad_norm": 2.515625,
      "learning_rate": 2.631954017617234e-05,
      "loss": 0.7932,
      "step": 450460
    },
    {
      "epoch": 1.5787850431259574,
      "grad_norm": 2.796875,
      "learning_rate": 2.6318891147508638e-05,
      "loss": 0.8752,
      "step": 450470
    },
    {
      "epoch": 1.5788200906328527,
      "grad_norm": 3.125,
      "learning_rate": 2.631824211884494e-05,
      "loss": 0.9069,
      "step": 450480
    },
    {
      "epoch": 1.5788551381397484,
      "grad_norm": 3.015625,
      "learning_rate": 2.6317593090181237e-05,
      "loss": 0.8128,
      "step": 450490
    },
    {
      "epoch": 1.5788901856466442,
      "grad_norm": 2.734375,
      "learning_rate": 2.6316944061517535e-05,
      "loss": 0.8744,
      "step": 450500
    },
    {
      "epoch": 1.5789252331535395,
      "grad_norm": 3.265625,
      "learning_rate": 2.6316295032853833e-05,
      "loss": 0.8422,
      "step": 450510
    },
    {
      "epoch": 1.5789602806604353,
      "grad_norm": 2.765625,
      "learning_rate": 2.631564600419013e-05,
      "loss": 0.826,
      "step": 450520
    },
    {
      "epoch": 1.5789953281673308,
      "grad_norm": 3.109375,
      "learning_rate": 2.631499697552643e-05,
      "loss": 0.8863,
      "step": 450530
    },
    {
      "epoch": 1.5790303756742263,
      "grad_norm": 2.53125,
      "learning_rate": 2.6314347946862727e-05,
      "loss": 0.8226,
      "step": 450540
    },
    {
      "epoch": 1.579065423181122,
      "grad_norm": 2.71875,
      "learning_rate": 2.6313698918199025e-05,
      "loss": 0.8305,
      "step": 450550
    },
    {
      "epoch": 1.5791004706880176,
      "grad_norm": 2.671875,
      "learning_rate": 2.6313049889535323e-05,
      "loss": 0.87,
      "step": 450560
    },
    {
      "epoch": 1.5791355181949132,
      "grad_norm": 2.71875,
      "learning_rate": 2.631240086087162e-05,
      "loss": 0.8843,
      "step": 450570
    },
    {
      "epoch": 1.579170565701809,
      "grad_norm": 3.0625,
      "learning_rate": 2.631175183220792e-05,
      "loss": 0.862,
      "step": 450580
    },
    {
      "epoch": 1.5792056132087045,
      "grad_norm": 2.046875,
      "learning_rate": 2.6311102803544217e-05,
      "loss": 0.7936,
      "step": 450590
    },
    {
      "epoch": 1.5792406607156,
      "grad_norm": 2.796875,
      "learning_rate": 2.6310453774880515e-05,
      "loss": 0.8413,
      "step": 450600
    },
    {
      "epoch": 1.5792757082224957,
      "grad_norm": 2.953125,
      "learning_rate": 2.6309804746216813e-05,
      "loss": 0.877,
      "step": 450610
    },
    {
      "epoch": 1.579310755729391,
      "grad_norm": 2.734375,
      "learning_rate": 2.6309155717553115e-05,
      "loss": 0.8092,
      "step": 450620
    },
    {
      "epoch": 1.5793458032362868,
      "grad_norm": 3.515625,
      "learning_rate": 2.6308506688889413e-05,
      "loss": 0.7595,
      "step": 450630
    },
    {
      "epoch": 1.5793808507431824,
      "grad_norm": 3.296875,
      "learning_rate": 2.630785766022571e-05,
      "loss": 0.838,
      "step": 450640
    },
    {
      "epoch": 1.5794158982500779,
      "grad_norm": 2.640625,
      "learning_rate": 2.630720863156201e-05,
      "loss": 0.7722,
      "step": 450650
    },
    {
      "epoch": 1.5794509457569736,
      "grad_norm": 2.734375,
      "learning_rate": 2.6306559602898307e-05,
      "loss": 0.8967,
      "step": 450660
    },
    {
      "epoch": 1.5794859932638692,
      "grad_norm": 2.875,
      "learning_rate": 2.6305910574234598e-05,
      "loss": 0.8607,
      "step": 450670
    },
    {
      "epoch": 1.5795210407707647,
      "grad_norm": 2.375,
      "learning_rate": 2.6305261545570896e-05,
      "loss": 0.8816,
      "step": 450680
    },
    {
      "epoch": 1.5795560882776605,
      "grad_norm": 3.25,
      "learning_rate": 2.6304612516907194e-05,
      "loss": 0.8898,
      "step": 450690
    },
    {
      "epoch": 1.579591135784556,
      "grad_norm": 2.71875,
      "learning_rate": 2.6303963488243492e-05,
      "loss": 0.8421,
      "step": 450700
    },
    {
      "epoch": 1.5796261832914515,
      "grad_norm": 2.953125,
      "learning_rate": 2.6303314459579793e-05,
      "loss": 0.789,
      "step": 450710
    },
    {
      "epoch": 1.5796612307983473,
      "grad_norm": 2.578125,
      "learning_rate": 2.630266543091609e-05,
      "loss": 0.8969,
      "step": 450720
    },
    {
      "epoch": 1.5796962783052426,
      "grad_norm": 3.53125,
      "learning_rate": 2.630201640225239e-05,
      "loss": 0.8299,
      "step": 450730
    },
    {
      "epoch": 1.5797313258121384,
      "grad_norm": 2.390625,
      "learning_rate": 2.6301367373588687e-05,
      "loss": 0.8028,
      "step": 450740
    },
    {
      "epoch": 1.579766373319034,
      "grad_norm": 2.734375,
      "learning_rate": 2.6300718344924985e-05,
      "loss": 0.7863,
      "step": 450750
    },
    {
      "epoch": 1.5798014208259294,
      "grad_norm": 2.953125,
      "learning_rate": 2.6300069316261283e-05,
      "loss": 0.7789,
      "step": 450760
    },
    {
      "epoch": 1.5798364683328252,
      "grad_norm": 2.8125,
      "learning_rate": 2.629942028759758e-05,
      "loss": 0.7949,
      "step": 450770
    },
    {
      "epoch": 1.5798715158397207,
      "grad_norm": 3.234375,
      "learning_rate": 2.629877125893388e-05,
      "loss": 0.8739,
      "step": 450780
    },
    {
      "epoch": 1.5799065633466163,
      "grad_norm": 2.9375,
      "learning_rate": 2.6298122230270177e-05,
      "loss": 0.8448,
      "step": 450790
    },
    {
      "epoch": 1.579941610853512,
      "grad_norm": 2.6875,
      "learning_rate": 2.6297473201606475e-05,
      "loss": 0.8731,
      "step": 450800
    },
    {
      "epoch": 1.5799766583604076,
      "grad_norm": 2.75,
      "learning_rate": 2.6296824172942773e-05,
      "loss": 0.8076,
      "step": 450810
    },
    {
      "epoch": 1.580011705867303,
      "grad_norm": 2.859375,
      "learning_rate": 2.629617514427907e-05,
      "loss": 0.9326,
      "step": 450820
    },
    {
      "epoch": 1.5800467533741989,
      "grad_norm": 2.84375,
      "learning_rate": 2.629552611561537e-05,
      "loss": 0.819,
      "step": 450830
    },
    {
      "epoch": 1.5800818008810942,
      "grad_norm": 3.203125,
      "learning_rate": 2.629487708695167e-05,
      "loss": 0.8493,
      "step": 450840
    },
    {
      "epoch": 1.58011684838799,
      "grad_norm": 3.046875,
      "learning_rate": 2.629422805828797e-05,
      "loss": 0.7521,
      "step": 450850
    },
    {
      "epoch": 1.5801518958948855,
      "grad_norm": 2.359375,
      "learning_rate": 2.6293579029624267e-05,
      "loss": 0.8109,
      "step": 450860
    },
    {
      "epoch": 1.580186943401781,
      "grad_norm": 3.296875,
      "learning_rate": 2.6292930000960565e-05,
      "loss": 0.9355,
      "step": 450870
    },
    {
      "epoch": 1.5802219909086768,
      "grad_norm": 3.15625,
      "learning_rate": 2.6292280972296863e-05,
      "loss": 0.7319,
      "step": 450880
    },
    {
      "epoch": 1.5802570384155723,
      "grad_norm": 2.484375,
      "learning_rate": 2.629163194363316e-05,
      "loss": 0.8706,
      "step": 450890
    },
    {
      "epoch": 1.5802920859224678,
      "grad_norm": 3.203125,
      "learning_rate": 2.629098291496946e-05,
      "loss": 0.803,
      "step": 450900
    },
    {
      "epoch": 1.5803271334293636,
      "grad_norm": 3.0625,
      "learning_rate": 2.6290333886305757e-05,
      "loss": 0.8284,
      "step": 450910
    },
    {
      "epoch": 1.5803621809362591,
      "grad_norm": 2.625,
      "learning_rate": 2.6289684857642055e-05,
      "loss": 0.8191,
      "step": 450920
    },
    {
      "epoch": 1.5803972284431547,
      "grad_norm": 2.828125,
      "learning_rate": 2.6289035828978353e-05,
      "loss": 0.8238,
      "step": 450930
    },
    {
      "epoch": 1.5804322759500504,
      "grad_norm": 2.9375,
      "learning_rate": 2.628838680031465e-05,
      "loss": 0.8777,
      "step": 450940
    },
    {
      "epoch": 1.5804673234569457,
      "grad_norm": 3.1875,
      "learning_rate": 2.628773777165095e-05,
      "loss": 0.731,
      "step": 450950
    },
    {
      "epoch": 1.5805023709638415,
      "grad_norm": 2.703125,
      "learning_rate": 2.6287088742987247e-05,
      "loss": 0.7793,
      "step": 450960
    },
    {
      "epoch": 1.580537418470737,
      "grad_norm": 2.515625,
      "learning_rate": 2.6286439714323545e-05,
      "loss": 0.8269,
      "step": 450970
    },
    {
      "epoch": 1.5805724659776326,
      "grad_norm": 3.0,
      "learning_rate": 2.6285790685659846e-05,
      "loss": 0.8601,
      "step": 450980
    },
    {
      "epoch": 1.5806075134845283,
      "grad_norm": 3.171875,
      "learning_rate": 2.6285141656996144e-05,
      "loss": 0.7743,
      "step": 450990
    },
    {
      "epoch": 1.5806425609914239,
      "grad_norm": 2.5,
      "learning_rate": 2.6284492628332442e-05,
      "loss": 0.8342,
      "step": 451000
    },
    {
      "epoch": 1.5806776084983194,
      "grad_norm": 3.171875,
      "learning_rate": 2.628384359966874e-05,
      "loss": 0.8501,
      "step": 451010
    },
    {
      "epoch": 1.5807126560052152,
      "grad_norm": 3.078125,
      "learning_rate": 2.6283194571005038e-05,
      "loss": 0.8838,
      "step": 451020
    },
    {
      "epoch": 1.5807477035121107,
      "grad_norm": 2.5,
      "learning_rate": 2.6282545542341336e-05,
      "loss": 0.8407,
      "step": 451030
    },
    {
      "epoch": 1.5807827510190062,
      "grad_norm": 2.8125,
      "learning_rate": 2.6281896513677634e-05,
      "loss": 0.9065,
      "step": 451040
    },
    {
      "epoch": 1.580817798525902,
      "grad_norm": 2.890625,
      "learning_rate": 2.6281247485013925e-05,
      "loss": 0.8271,
      "step": 451050
    },
    {
      "epoch": 1.5808528460327973,
      "grad_norm": 3.078125,
      "learning_rate": 2.6280598456350223e-05,
      "loss": 0.7747,
      "step": 451060
    },
    {
      "epoch": 1.580887893539693,
      "grad_norm": 2.734375,
      "learning_rate": 2.6279949427686525e-05,
      "loss": 0.8795,
      "step": 451070
    },
    {
      "epoch": 1.5809229410465888,
      "grad_norm": 2.390625,
      "learning_rate": 2.6279300399022823e-05,
      "loss": 0.8499,
      "step": 451080
    },
    {
      "epoch": 1.5809579885534841,
      "grad_norm": 2.953125,
      "learning_rate": 2.627865137035912e-05,
      "loss": 0.7819,
      "step": 451090
    },
    {
      "epoch": 1.5809930360603799,
      "grad_norm": 2.5625,
      "learning_rate": 2.627800234169542e-05,
      "loss": 0.8847,
      "step": 451100
    },
    {
      "epoch": 1.5810280835672754,
      "grad_norm": 2.96875,
      "learning_rate": 2.6277353313031717e-05,
      "loss": 0.8628,
      "step": 451110
    },
    {
      "epoch": 1.581063131074171,
      "grad_norm": 2.828125,
      "learning_rate": 2.6276704284368015e-05,
      "loss": 0.7151,
      "step": 451120
    },
    {
      "epoch": 1.5810981785810667,
      "grad_norm": 2.640625,
      "learning_rate": 2.6276055255704313e-05,
      "loss": 0.7476,
      "step": 451130
    },
    {
      "epoch": 1.5811332260879623,
      "grad_norm": 2.53125,
      "learning_rate": 2.627540622704061e-05,
      "loss": 0.8412,
      "step": 451140
    },
    {
      "epoch": 1.5811682735948578,
      "grad_norm": 2.78125,
      "learning_rate": 2.627475719837691e-05,
      "loss": 0.7999,
      "step": 451150
    },
    {
      "epoch": 1.5812033211017535,
      "grad_norm": 3.21875,
      "learning_rate": 2.6274108169713207e-05,
      "loss": 0.829,
      "step": 451160
    },
    {
      "epoch": 1.5812383686086489,
      "grad_norm": 3.5625,
      "learning_rate": 2.6273459141049505e-05,
      "loss": 0.8715,
      "step": 451170
    },
    {
      "epoch": 1.5812734161155446,
      "grad_norm": 3.109375,
      "learning_rate": 2.6272810112385803e-05,
      "loss": 0.8811,
      "step": 451180
    },
    {
      "epoch": 1.5813084636224404,
      "grad_norm": 3.34375,
      "learning_rate": 2.62721610837221e-05,
      "loss": 0.8508,
      "step": 451190
    },
    {
      "epoch": 1.5813435111293357,
      "grad_norm": 3.140625,
      "learning_rate": 2.62715120550584e-05,
      "loss": 0.8261,
      "step": 451200
    },
    {
      "epoch": 1.5813785586362314,
      "grad_norm": 2.421875,
      "learning_rate": 2.62708630263947e-05,
      "loss": 0.7712,
      "step": 451210
    },
    {
      "epoch": 1.581413606143127,
      "grad_norm": 2.34375,
      "learning_rate": 2.6270213997730998e-05,
      "loss": 0.804,
      "step": 451220
    },
    {
      "epoch": 1.5814486536500225,
      "grad_norm": 3.46875,
      "learning_rate": 2.6269564969067296e-05,
      "loss": 0.8166,
      "step": 451230
    },
    {
      "epoch": 1.5814837011569183,
      "grad_norm": 2.328125,
      "learning_rate": 2.6268915940403594e-05,
      "loss": 0.828,
      "step": 451240
    },
    {
      "epoch": 1.5815187486638138,
      "grad_norm": 3.21875,
      "learning_rate": 2.6268266911739892e-05,
      "loss": 0.8053,
      "step": 451250
    },
    {
      "epoch": 1.5815537961707093,
      "grad_norm": 2.765625,
      "learning_rate": 2.626761788307619e-05,
      "loss": 0.8227,
      "step": 451260
    },
    {
      "epoch": 1.581588843677605,
      "grad_norm": 2.828125,
      "learning_rate": 2.6266968854412488e-05,
      "loss": 0.8438,
      "step": 451270
    },
    {
      "epoch": 1.5816238911845006,
      "grad_norm": 3.265625,
      "learning_rate": 2.6266319825748786e-05,
      "loss": 0.863,
      "step": 451280
    },
    {
      "epoch": 1.5816589386913962,
      "grad_norm": 2.90625,
      "learning_rate": 2.6265670797085084e-05,
      "loss": 0.811,
      "step": 451290
    },
    {
      "epoch": 1.581693986198292,
      "grad_norm": 2.90625,
      "learning_rate": 2.6265021768421382e-05,
      "loss": 0.791,
      "step": 451300
    },
    {
      "epoch": 1.5817290337051872,
      "grad_norm": 3.375,
      "learning_rate": 2.626437273975768e-05,
      "loss": 0.8253,
      "step": 451310
    },
    {
      "epoch": 1.581764081212083,
      "grad_norm": 2.6875,
      "learning_rate": 2.6263723711093978e-05,
      "loss": 0.8965,
      "step": 451320
    },
    {
      "epoch": 1.5817991287189785,
      "grad_norm": 3.34375,
      "learning_rate": 2.6263074682430276e-05,
      "loss": 0.8371,
      "step": 451330
    },
    {
      "epoch": 1.581834176225874,
      "grad_norm": 2.75,
      "learning_rate": 2.6262425653766574e-05,
      "loss": 0.8337,
      "step": 451340
    },
    {
      "epoch": 1.5818692237327698,
      "grad_norm": 2.890625,
      "learning_rate": 2.6261776625102876e-05,
      "loss": 0.8221,
      "step": 451350
    },
    {
      "epoch": 1.5819042712396654,
      "grad_norm": 2.6875,
      "learning_rate": 2.6261127596439174e-05,
      "loss": 0.7716,
      "step": 451360
    },
    {
      "epoch": 1.581939318746561,
      "grad_norm": 2.84375,
      "learning_rate": 2.626047856777547e-05,
      "loss": 0.8606,
      "step": 451370
    },
    {
      "epoch": 1.5819743662534567,
      "grad_norm": 2.890625,
      "learning_rate": 2.625982953911177e-05,
      "loss": 0.7686,
      "step": 451380
    },
    {
      "epoch": 1.5820094137603522,
      "grad_norm": 3.109375,
      "learning_rate": 2.6259180510448068e-05,
      "loss": 0.8979,
      "step": 451390
    },
    {
      "epoch": 1.5820444612672477,
      "grad_norm": 3.125,
      "learning_rate": 2.6258531481784366e-05,
      "loss": 0.8576,
      "step": 451400
    },
    {
      "epoch": 1.5820795087741435,
      "grad_norm": 2.640625,
      "learning_rate": 2.6257882453120664e-05,
      "loss": 0.8177,
      "step": 451410
    },
    {
      "epoch": 1.5821145562810388,
      "grad_norm": 2.859375,
      "learning_rate": 2.6257233424456955e-05,
      "loss": 0.7945,
      "step": 451420
    },
    {
      "epoch": 1.5821496037879346,
      "grad_norm": 3.296875,
      "learning_rate": 2.6256584395793253e-05,
      "loss": 0.8216,
      "step": 451430
    },
    {
      "epoch": 1.58218465129483,
      "grad_norm": 2.890625,
      "learning_rate": 2.6255935367129554e-05,
      "loss": 0.8503,
      "step": 451440
    },
    {
      "epoch": 1.5822196988017256,
      "grad_norm": 2.703125,
      "learning_rate": 2.6255286338465852e-05,
      "loss": 0.8462,
      "step": 451450
    },
    {
      "epoch": 1.5822547463086214,
      "grad_norm": 2.84375,
      "learning_rate": 2.625463730980215e-05,
      "loss": 0.8966,
      "step": 451460
    },
    {
      "epoch": 1.582289793815517,
      "grad_norm": 3.15625,
      "learning_rate": 2.6253988281138448e-05,
      "loss": 0.7825,
      "step": 451470
    },
    {
      "epoch": 1.5823248413224125,
      "grad_norm": 2.875,
      "learning_rate": 2.6253339252474746e-05,
      "loss": 0.7871,
      "step": 451480
    },
    {
      "epoch": 1.5823598888293082,
      "grad_norm": 2.84375,
      "learning_rate": 2.6252690223811044e-05,
      "loss": 0.8603,
      "step": 451490
    },
    {
      "epoch": 1.5823949363362038,
      "grad_norm": 2.609375,
      "learning_rate": 2.6252041195147342e-05,
      "loss": 0.8124,
      "step": 451500
    },
    {
      "epoch": 1.5824299838430993,
      "grad_norm": 3.5,
      "learning_rate": 2.625139216648364e-05,
      "loss": 0.8665,
      "step": 451510
    },
    {
      "epoch": 1.582465031349995,
      "grad_norm": 2.90625,
      "learning_rate": 2.6250743137819938e-05,
      "loss": 0.9113,
      "step": 451520
    },
    {
      "epoch": 1.5825000788568904,
      "grad_norm": 3.5625,
      "learning_rate": 2.6250094109156236e-05,
      "loss": 0.7886,
      "step": 451530
    },
    {
      "epoch": 1.5825351263637861,
      "grad_norm": 3.0625,
      "learning_rate": 2.6249445080492534e-05,
      "loss": 0.8174,
      "step": 451540
    },
    {
      "epoch": 1.5825701738706817,
      "grad_norm": 3.0,
      "learning_rate": 2.6248796051828832e-05,
      "loss": 0.8242,
      "step": 451550
    },
    {
      "epoch": 1.5826052213775772,
      "grad_norm": 3.03125,
      "learning_rate": 2.624814702316513e-05,
      "loss": 0.7778,
      "step": 451560
    },
    {
      "epoch": 1.582640268884473,
      "grad_norm": 3.03125,
      "learning_rate": 2.6247497994501428e-05,
      "loss": 0.8758,
      "step": 451570
    },
    {
      "epoch": 1.5826753163913685,
      "grad_norm": 2.78125,
      "learning_rate": 2.624684896583773e-05,
      "loss": 0.7971,
      "step": 451580
    },
    {
      "epoch": 1.582710363898264,
      "grad_norm": 3.046875,
      "learning_rate": 2.6246199937174028e-05,
      "loss": 0.8328,
      "step": 451590
    },
    {
      "epoch": 1.5827454114051598,
      "grad_norm": 2.796875,
      "learning_rate": 2.6245550908510326e-05,
      "loss": 0.819,
      "step": 451600
    },
    {
      "epoch": 1.5827804589120553,
      "grad_norm": 2.8125,
      "learning_rate": 2.6244901879846624e-05,
      "loss": 0.7819,
      "step": 451610
    },
    {
      "epoch": 1.5828155064189509,
      "grad_norm": 3.046875,
      "learning_rate": 2.624425285118292e-05,
      "loss": 0.8519,
      "step": 451620
    },
    {
      "epoch": 1.5828505539258466,
      "grad_norm": 2.765625,
      "learning_rate": 2.624360382251922e-05,
      "loss": 0.8585,
      "step": 451630
    },
    {
      "epoch": 1.582885601432742,
      "grad_norm": 3.28125,
      "learning_rate": 2.6242954793855518e-05,
      "loss": 0.8112,
      "step": 451640
    },
    {
      "epoch": 1.5829206489396377,
      "grad_norm": 2.984375,
      "learning_rate": 2.6242305765191816e-05,
      "loss": 0.8515,
      "step": 451650
    },
    {
      "epoch": 1.5829556964465332,
      "grad_norm": 2.859375,
      "learning_rate": 2.6241656736528114e-05,
      "loss": 0.8627,
      "step": 451660
    },
    {
      "epoch": 1.5829907439534288,
      "grad_norm": 3.34375,
      "learning_rate": 2.624100770786441e-05,
      "loss": 0.8803,
      "step": 451670
    },
    {
      "epoch": 1.5830257914603245,
      "grad_norm": 2.9375,
      "learning_rate": 2.624035867920071e-05,
      "loss": 0.8787,
      "step": 451680
    },
    {
      "epoch": 1.58306083896722,
      "grad_norm": 2.515625,
      "learning_rate": 2.6239709650537008e-05,
      "loss": 0.8292,
      "step": 451690
    },
    {
      "epoch": 1.5830958864741156,
      "grad_norm": 2.9375,
      "learning_rate": 2.6239060621873306e-05,
      "loss": 0.8469,
      "step": 451700
    },
    {
      "epoch": 1.5831309339810113,
      "grad_norm": 2.578125,
      "learning_rate": 2.6238411593209604e-05,
      "loss": 0.8448,
      "step": 451710
    },
    {
      "epoch": 1.5831659814879069,
      "grad_norm": 2.453125,
      "learning_rate": 2.6237762564545905e-05,
      "loss": 0.8565,
      "step": 451720
    },
    {
      "epoch": 1.5832010289948024,
      "grad_norm": 3.078125,
      "learning_rate": 2.6237113535882203e-05,
      "loss": 0.8899,
      "step": 451730
    },
    {
      "epoch": 1.5832360765016982,
      "grad_norm": 2.875,
      "learning_rate": 2.62364645072185e-05,
      "loss": 0.8242,
      "step": 451740
    },
    {
      "epoch": 1.5832711240085935,
      "grad_norm": 3.0,
      "learning_rate": 2.62358154785548e-05,
      "loss": 0.8916,
      "step": 451750
    },
    {
      "epoch": 1.5833061715154892,
      "grad_norm": 2.703125,
      "learning_rate": 2.6235166449891097e-05,
      "loss": 0.8437,
      "step": 451760
    },
    {
      "epoch": 1.583341219022385,
      "grad_norm": 3.015625,
      "learning_rate": 2.6234517421227395e-05,
      "loss": 0.7635,
      "step": 451770
    },
    {
      "epoch": 1.5833762665292803,
      "grad_norm": 2.828125,
      "learning_rate": 2.6233868392563693e-05,
      "loss": 0.8107,
      "step": 451780
    },
    {
      "epoch": 1.583411314036176,
      "grad_norm": 3.0,
      "learning_rate": 2.623321936389999e-05,
      "loss": 0.8543,
      "step": 451790
    },
    {
      "epoch": 1.5834463615430716,
      "grad_norm": 3.25,
      "learning_rate": 2.6232570335236286e-05,
      "loss": 0.8541,
      "step": 451800
    },
    {
      "epoch": 1.5834814090499671,
      "grad_norm": 3.140625,
      "learning_rate": 2.6231921306572584e-05,
      "loss": 0.8105,
      "step": 451810
    },
    {
      "epoch": 1.583516456556863,
      "grad_norm": 2.75,
      "learning_rate": 2.623127227790888e-05,
      "loss": 0.8107,
      "step": 451820
    },
    {
      "epoch": 1.5835515040637584,
      "grad_norm": 3.34375,
      "learning_rate": 2.623062324924518e-05,
      "loss": 0.8408,
      "step": 451830
    },
    {
      "epoch": 1.583586551570654,
      "grad_norm": 2.890625,
      "learning_rate": 2.6229974220581478e-05,
      "loss": 0.7795,
      "step": 451840
    },
    {
      "epoch": 1.5836215990775497,
      "grad_norm": 3.21875,
      "learning_rate": 2.6229325191917776e-05,
      "loss": 0.8056,
      "step": 451850
    },
    {
      "epoch": 1.583656646584445,
      "grad_norm": 2.578125,
      "learning_rate": 2.6228676163254074e-05,
      "loss": 0.7756,
      "step": 451860
    },
    {
      "epoch": 1.5836916940913408,
      "grad_norm": 2.5,
      "learning_rate": 2.622802713459037e-05,
      "loss": 0.8019,
      "step": 451870
    },
    {
      "epoch": 1.5837267415982366,
      "grad_norm": 2.65625,
      "learning_rate": 2.622737810592667e-05,
      "loss": 0.9269,
      "step": 451880
    },
    {
      "epoch": 1.5837617891051319,
      "grad_norm": 2.703125,
      "learning_rate": 2.6226729077262968e-05,
      "loss": 0.8359,
      "step": 451890
    },
    {
      "epoch": 1.5837968366120276,
      "grad_norm": 2.765625,
      "learning_rate": 2.6226080048599266e-05,
      "loss": 0.7627,
      "step": 451900
    },
    {
      "epoch": 1.5838318841189232,
      "grad_norm": 2.765625,
      "learning_rate": 2.6225431019935564e-05,
      "loss": 0.907,
      "step": 451910
    },
    {
      "epoch": 1.5838669316258187,
      "grad_norm": 2.71875,
      "learning_rate": 2.622478199127186e-05,
      "loss": 0.7592,
      "step": 451920
    },
    {
      "epoch": 1.5839019791327145,
      "grad_norm": 3.0,
      "learning_rate": 2.622413296260816e-05,
      "loss": 0.9009,
      "step": 451930
    },
    {
      "epoch": 1.58393702663961,
      "grad_norm": 3.296875,
      "learning_rate": 2.622348393394446e-05,
      "loss": 0.7632,
      "step": 451940
    },
    {
      "epoch": 1.5839720741465055,
      "grad_norm": 3.140625,
      "learning_rate": 2.622283490528076e-05,
      "loss": 0.8541,
      "step": 451950
    },
    {
      "epoch": 1.5840071216534013,
      "grad_norm": 2.9375,
      "learning_rate": 2.6222185876617057e-05,
      "loss": 0.877,
      "step": 451960
    },
    {
      "epoch": 1.5840421691602968,
      "grad_norm": 2.84375,
      "learning_rate": 2.6221536847953355e-05,
      "loss": 0.7982,
      "step": 451970
    },
    {
      "epoch": 1.5840772166671924,
      "grad_norm": 2.875,
      "learning_rate": 2.6220887819289653e-05,
      "loss": 0.8016,
      "step": 451980
    },
    {
      "epoch": 1.5841122641740881,
      "grad_norm": 2.875,
      "learning_rate": 2.622023879062595e-05,
      "loss": 0.8574,
      "step": 451990
    },
    {
      "epoch": 1.5841473116809834,
      "grad_norm": 3.328125,
      "learning_rate": 2.621958976196225e-05,
      "loss": 0.795,
      "step": 452000
    },
    {
      "epoch": 1.5841823591878792,
      "grad_norm": 2.609375,
      "learning_rate": 2.6218940733298547e-05,
      "loss": 0.7995,
      "step": 452010
    },
    {
      "epoch": 1.5842174066947747,
      "grad_norm": 3.0,
      "learning_rate": 2.6218291704634845e-05,
      "loss": 0.9052,
      "step": 452020
    },
    {
      "epoch": 1.5842524542016703,
      "grad_norm": 2.953125,
      "learning_rate": 2.6217642675971143e-05,
      "loss": 0.8577,
      "step": 452030
    },
    {
      "epoch": 1.584287501708566,
      "grad_norm": 2.75,
      "learning_rate": 2.621699364730744e-05,
      "loss": 0.8818,
      "step": 452040
    },
    {
      "epoch": 1.5843225492154616,
      "grad_norm": 2.765625,
      "learning_rate": 2.621634461864374e-05,
      "loss": 0.8543,
      "step": 452050
    },
    {
      "epoch": 1.584357596722357,
      "grad_norm": 2.6875,
      "learning_rate": 2.6215695589980037e-05,
      "loss": 0.8515,
      "step": 452060
    },
    {
      "epoch": 1.5843926442292529,
      "grad_norm": 2.953125,
      "learning_rate": 2.6215046561316335e-05,
      "loss": 0.883,
      "step": 452070
    },
    {
      "epoch": 1.5844276917361484,
      "grad_norm": 2.796875,
      "learning_rate": 2.6214397532652636e-05,
      "loss": 0.7999,
      "step": 452080
    },
    {
      "epoch": 1.584462739243044,
      "grad_norm": 3.078125,
      "learning_rate": 2.6213748503988934e-05,
      "loss": 0.831,
      "step": 452090
    },
    {
      "epoch": 1.5844977867499397,
      "grad_norm": 2.765625,
      "learning_rate": 2.6213099475325232e-05,
      "loss": 0.8548,
      "step": 452100
    },
    {
      "epoch": 1.584532834256835,
      "grad_norm": 3.34375,
      "learning_rate": 2.621245044666153e-05,
      "loss": 0.8538,
      "step": 452110
    },
    {
      "epoch": 1.5845678817637308,
      "grad_norm": 3.53125,
      "learning_rate": 2.621180141799783e-05,
      "loss": 0.7929,
      "step": 452120
    },
    {
      "epoch": 1.5846029292706263,
      "grad_norm": 3.1875,
      "learning_rate": 2.6211152389334126e-05,
      "loss": 0.7991,
      "step": 452130
    },
    {
      "epoch": 1.5846379767775218,
      "grad_norm": 3.375,
      "learning_rate": 2.6210503360670424e-05,
      "loss": 0.8339,
      "step": 452140
    },
    {
      "epoch": 1.5846730242844176,
      "grad_norm": 2.71875,
      "learning_rate": 2.6209854332006722e-05,
      "loss": 0.848,
      "step": 452150
    },
    {
      "epoch": 1.5847080717913131,
      "grad_norm": 2.453125,
      "learning_rate": 2.620920530334302e-05,
      "loss": 0.8558,
      "step": 452160
    },
    {
      "epoch": 1.5847431192982087,
      "grad_norm": 3.171875,
      "learning_rate": 2.620855627467932e-05,
      "loss": 0.7781,
      "step": 452170
    },
    {
      "epoch": 1.5847781668051044,
      "grad_norm": 2.8125,
      "learning_rate": 2.6207907246015613e-05,
      "loss": 0.8839,
      "step": 452180
    },
    {
      "epoch": 1.584813214312,
      "grad_norm": 3.015625,
      "learning_rate": 2.620725821735191e-05,
      "loss": 0.8358,
      "step": 452190
    },
    {
      "epoch": 1.5848482618188955,
      "grad_norm": 2.453125,
      "learning_rate": 2.620660918868821e-05,
      "loss": 0.7661,
      "step": 452200
    },
    {
      "epoch": 1.5848833093257912,
      "grad_norm": 3.25,
      "learning_rate": 2.6205960160024507e-05,
      "loss": 0.8707,
      "step": 452210
    },
    {
      "epoch": 1.5849183568326866,
      "grad_norm": 3.125,
      "learning_rate": 2.6205311131360805e-05,
      "loss": 0.9311,
      "step": 452220
    },
    {
      "epoch": 1.5849534043395823,
      "grad_norm": 3.140625,
      "learning_rate": 2.6204662102697103e-05,
      "loss": 0.9267,
      "step": 452230
    },
    {
      "epoch": 1.5849884518464779,
      "grad_norm": 2.703125,
      "learning_rate": 2.62040130740334e-05,
      "loss": 0.8381,
      "step": 452240
    },
    {
      "epoch": 1.5850234993533734,
      "grad_norm": 2.625,
      "learning_rate": 2.62033640453697e-05,
      "loss": 0.8989,
      "step": 452250
    },
    {
      "epoch": 1.5850585468602691,
      "grad_norm": 2.6875,
      "learning_rate": 2.6202715016705997e-05,
      "loss": 0.8255,
      "step": 452260
    },
    {
      "epoch": 1.5850935943671647,
      "grad_norm": 2.484375,
      "learning_rate": 2.6202065988042295e-05,
      "loss": 0.771,
      "step": 452270
    },
    {
      "epoch": 1.5851286418740602,
      "grad_norm": 3.234375,
      "learning_rate": 2.6201416959378593e-05,
      "loss": 0.8124,
      "step": 452280
    },
    {
      "epoch": 1.585163689380956,
      "grad_norm": 2.65625,
      "learning_rate": 2.620076793071489e-05,
      "loss": 0.7974,
      "step": 452290
    },
    {
      "epoch": 1.5851987368878515,
      "grad_norm": 3.0625,
      "learning_rate": 2.620011890205119e-05,
      "loss": 0.8453,
      "step": 452300
    },
    {
      "epoch": 1.585233784394747,
      "grad_norm": 2.75,
      "learning_rate": 2.619946987338749e-05,
      "loss": 0.9109,
      "step": 452310
    },
    {
      "epoch": 1.5852688319016428,
      "grad_norm": 2.875,
      "learning_rate": 2.619882084472379e-05,
      "loss": 0.8122,
      "step": 452320
    },
    {
      "epoch": 1.5853038794085381,
      "grad_norm": 2.890625,
      "learning_rate": 2.6198171816060086e-05,
      "loss": 0.7367,
      "step": 452330
    },
    {
      "epoch": 1.5853389269154339,
      "grad_norm": 2.890625,
      "learning_rate": 2.6197522787396384e-05,
      "loss": 0.824,
      "step": 452340
    },
    {
      "epoch": 1.5853739744223294,
      "grad_norm": 2.640625,
      "learning_rate": 2.6196873758732682e-05,
      "loss": 0.8576,
      "step": 452350
    },
    {
      "epoch": 1.585409021929225,
      "grad_norm": 3.078125,
      "learning_rate": 2.619622473006898e-05,
      "loss": 0.7416,
      "step": 452360
    },
    {
      "epoch": 1.5854440694361207,
      "grad_norm": 3.0625,
      "learning_rate": 2.619557570140528e-05,
      "loss": 0.852,
      "step": 452370
    },
    {
      "epoch": 1.5854791169430162,
      "grad_norm": 3.078125,
      "learning_rate": 2.6194926672741576e-05,
      "loss": 0.8053,
      "step": 452380
    },
    {
      "epoch": 1.5855141644499118,
      "grad_norm": 3.1875,
      "learning_rate": 2.6194277644077874e-05,
      "loss": 0.8985,
      "step": 452390
    },
    {
      "epoch": 1.5855492119568075,
      "grad_norm": 2.734375,
      "learning_rate": 2.6193628615414172e-05,
      "loss": 0.8712,
      "step": 452400
    },
    {
      "epoch": 1.585584259463703,
      "grad_norm": 3.28125,
      "learning_rate": 2.619297958675047e-05,
      "loss": 0.8406,
      "step": 452410
    },
    {
      "epoch": 1.5856193069705986,
      "grad_norm": 3.375,
      "learning_rate": 2.619233055808677e-05,
      "loss": 0.8177,
      "step": 452420
    },
    {
      "epoch": 1.5856543544774944,
      "grad_norm": 2.78125,
      "learning_rate": 2.6191681529423066e-05,
      "loss": 0.7927,
      "step": 452430
    },
    {
      "epoch": 1.5856894019843897,
      "grad_norm": 2.96875,
      "learning_rate": 2.6191032500759364e-05,
      "loss": 0.8423,
      "step": 452440
    },
    {
      "epoch": 1.5857244494912854,
      "grad_norm": 3.203125,
      "learning_rate": 2.6190383472095666e-05,
      "loss": 0.8019,
      "step": 452450
    },
    {
      "epoch": 1.5857594969981812,
      "grad_norm": 2.703125,
      "learning_rate": 2.6189734443431964e-05,
      "loss": 0.7944,
      "step": 452460
    },
    {
      "epoch": 1.5857945445050765,
      "grad_norm": 3.109375,
      "learning_rate": 2.6189085414768262e-05,
      "loss": 0.9249,
      "step": 452470
    },
    {
      "epoch": 1.5858295920119723,
      "grad_norm": 2.859375,
      "learning_rate": 2.618843638610456e-05,
      "loss": 0.949,
      "step": 452480
    },
    {
      "epoch": 1.5858646395188678,
      "grad_norm": 2.859375,
      "learning_rate": 2.6187787357440858e-05,
      "loss": 0.839,
      "step": 452490
    },
    {
      "epoch": 1.5858996870257633,
      "grad_norm": 2.921875,
      "learning_rate": 2.6187138328777156e-05,
      "loss": 0.7521,
      "step": 452500
    },
    {
      "epoch": 1.585934734532659,
      "grad_norm": 3.046875,
      "learning_rate": 2.6186489300113454e-05,
      "loss": 0.8269,
      "step": 452510
    },
    {
      "epoch": 1.5859697820395546,
      "grad_norm": 3.015625,
      "learning_rate": 2.6185840271449752e-05,
      "loss": 0.8251,
      "step": 452520
    },
    {
      "epoch": 1.5860048295464502,
      "grad_norm": 3.25,
      "learning_rate": 2.618519124278605e-05,
      "loss": 0.8325,
      "step": 452530
    },
    {
      "epoch": 1.586039877053346,
      "grad_norm": 2.59375,
      "learning_rate": 2.6184542214122348e-05,
      "loss": 0.8465,
      "step": 452540
    },
    {
      "epoch": 1.5860749245602415,
      "grad_norm": 2.6875,
      "learning_rate": 2.6183893185458642e-05,
      "loss": 0.7751,
      "step": 452550
    },
    {
      "epoch": 1.586109972067137,
      "grad_norm": 3.109375,
      "learning_rate": 2.618324415679494e-05,
      "loss": 0.8322,
      "step": 452560
    },
    {
      "epoch": 1.5861450195740328,
      "grad_norm": 2.8125,
      "learning_rate": 2.618259512813124e-05,
      "loss": 0.8745,
      "step": 452570
    },
    {
      "epoch": 1.586180067080928,
      "grad_norm": 2.84375,
      "learning_rate": 2.6181946099467536e-05,
      "loss": 0.8714,
      "step": 452580
    },
    {
      "epoch": 1.5862151145878238,
      "grad_norm": 2.890625,
      "learning_rate": 2.6181297070803834e-05,
      "loss": 0.8732,
      "step": 452590
    },
    {
      "epoch": 1.5862501620947194,
      "grad_norm": 2.8125,
      "learning_rate": 2.6180648042140132e-05,
      "loss": 0.7694,
      "step": 452600
    },
    {
      "epoch": 1.586285209601615,
      "grad_norm": 2.828125,
      "learning_rate": 2.617999901347643e-05,
      "loss": 0.9,
      "step": 452610
    },
    {
      "epoch": 1.5863202571085107,
      "grad_norm": 3.265625,
      "learning_rate": 2.617934998481273e-05,
      "loss": 0.823,
      "step": 452620
    },
    {
      "epoch": 1.5863553046154062,
      "grad_norm": 3.0625,
      "learning_rate": 2.6178700956149026e-05,
      "loss": 0.805,
      "step": 452630
    },
    {
      "epoch": 1.5863903521223017,
      "grad_norm": 2.46875,
      "learning_rate": 2.6178051927485324e-05,
      "loss": 0.7528,
      "step": 452640
    },
    {
      "epoch": 1.5864253996291975,
      "grad_norm": 3.0625,
      "learning_rate": 2.6177402898821622e-05,
      "loss": 0.796,
      "step": 452650
    },
    {
      "epoch": 1.586460447136093,
      "grad_norm": 2.953125,
      "learning_rate": 2.617675387015792e-05,
      "loss": 0.8313,
      "step": 452660
    },
    {
      "epoch": 1.5864954946429886,
      "grad_norm": 3.0,
      "learning_rate": 2.617610484149422e-05,
      "loss": 0.8642,
      "step": 452670
    },
    {
      "epoch": 1.5865305421498843,
      "grad_norm": 2.875,
      "learning_rate": 2.617545581283052e-05,
      "loss": 0.8381,
      "step": 452680
    },
    {
      "epoch": 1.5865655896567796,
      "grad_norm": 3.03125,
      "learning_rate": 2.6174806784166818e-05,
      "loss": 0.8527,
      "step": 452690
    },
    {
      "epoch": 1.5866006371636754,
      "grad_norm": 2.75,
      "learning_rate": 2.6174157755503116e-05,
      "loss": 0.8373,
      "step": 452700
    },
    {
      "epoch": 1.586635684670571,
      "grad_norm": 2.96875,
      "learning_rate": 2.6173508726839414e-05,
      "loss": 0.8046,
      "step": 452710
    },
    {
      "epoch": 1.5866707321774665,
      "grad_norm": 2.921875,
      "learning_rate": 2.6172859698175712e-05,
      "loss": 0.8416,
      "step": 452720
    },
    {
      "epoch": 1.5867057796843622,
      "grad_norm": 3.34375,
      "learning_rate": 2.617221066951201e-05,
      "loss": 0.9086,
      "step": 452730
    },
    {
      "epoch": 1.5867408271912578,
      "grad_norm": 2.96875,
      "learning_rate": 2.6171561640848308e-05,
      "loss": 0.8586,
      "step": 452740
    },
    {
      "epoch": 1.5867758746981533,
      "grad_norm": 2.625,
      "learning_rate": 2.6170912612184606e-05,
      "loss": 0.8703,
      "step": 452750
    },
    {
      "epoch": 1.586810922205049,
      "grad_norm": 2.90625,
      "learning_rate": 2.6170263583520904e-05,
      "loss": 0.8477,
      "step": 452760
    },
    {
      "epoch": 1.5868459697119446,
      "grad_norm": 2.921875,
      "learning_rate": 2.6169614554857202e-05,
      "loss": 0.8136,
      "step": 452770
    },
    {
      "epoch": 1.5868810172188401,
      "grad_norm": 2.796875,
      "learning_rate": 2.61689655261935e-05,
      "loss": 0.851,
      "step": 452780
    },
    {
      "epoch": 1.5869160647257359,
      "grad_norm": 3.28125,
      "learning_rate": 2.6168316497529798e-05,
      "loss": 0.8855,
      "step": 452790
    },
    {
      "epoch": 1.5869511122326312,
      "grad_norm": 2.828125,
      "learning_rate": 2.6167667468866096e-05,
      "loss": 0.9073,
      "step": 452800
    },
    {
      "epoch": 1.586986159739527,
      "grad_norm": 2.59375,
      "learning_rate": 2.6167018440202394e-05,
      "loss": 0.7747,
      "step": 452810
    },
    {
      "epoch": 1.5870212072464225,
      "grad_norm": 2.8125,
      "learning_rate": 2.6166369411538695e-05,
      "loss": 0.8068,
      "step": 452820
    },
    {
      "epoch": 1.587056254753318,
      "grad_norm": 2.78125,
      "learning_rate": 2.6165720382874993e-05,
      "loss": 0.8342,
      "step": 452830
    },
    {
      "epoch": 1.5870913022602138,
      "grad_norm": 2.703125,
      "learning_rate": 2.616507135421129e-05,
      "loss": 0.802,
      "step": 452840
    },
    {
      "epoch": 1.5871263497671093,
      "grad_norm": 3.109375,
      "learning_rate": 2.616442232554759e-05,
      "loss": 0.8268,
      "step": 452850
    },
    {
      "epoch": 1.5871613972740048,
      "grad_norm": 3.359375,
      "learning_rate": 2.6163773296883887e-05,
      "loss": 0.8483,
      "step": 452860
    },
    {
      "epoch": 1.5871964447809006,
      "grad_norm": 2.40625,
      "learning_rate": 2.6163124268220185e-05,
      "loss": 0.8371,
      "step": 452870
    },
    {
      "epoch": 1.5872314922877961,
      "grad_norm": 2.71875,
      "learning_rate": 2.6162475239556483e-05,
      "loss": 0.828,
      "step": 452880
    },
    {
      "epoch": 1.5872665397946917,
      "grad_norm": 2.78125,
      "learning_rate": 2.616182621089278e-05,
      "loss": 0.7849,
      "step": 452890
    },
    {
      "epoch": 1.5873015873015874,
      "grad_norm": 2.9375,
      "learning_rate": 2.616117718222908e-05,
      "loss": 0.7662,
      "step": 452900
    },
    {
      "epoch": 1.5873366348084827,
      "grad_norm": 2.625,
      "learning_rate": 2.6160528153565377e-05,
      "loss": 0.8726,
      "step": 452910
    },
    {
      "epoch": 1.5873716823153785,
      "grad_norm": 2.6875,
      "learning_rate": 2.6159879124901675e-05,
      "loss": 0.8559,
      "step": 452920
    },
    {
      "epoch": 1.587406729822274,
      "grad_norm": 2.59375,
      "learning_rate": 2.615923009623797e-05,
      "loss": 0.779,
      "step": 452930
    },
    {
      "epoch": 1.5874417773291696,
      "grad_norm": 2.859375,
      "learning_rate": 2.6158581067574268e-05,
      "loss": 0.8327,
      "step": 452940
    },
    {
      "epoch": 1.5874768248360653,
      "grad_norm": 3.046875,
      "learning_rate": 2.6157932038910566e-05,
      "loss": 0.8136,
      "step": 452950
    },
    {
      "epoch": 1.5875118723429609,
      "grad_norm": 3.109375,
      "learning_rate": 2.6157283010246864e-05,
      "loss": 0.8812,
      "step": 452960
    },
    {
      "epoch": 1.5875469198498564,
      "grad_norm": 3.40625,
      "learning_rate": 2.6156633981583162e-05,
      "loss": 0.901,
      "step": 452970
    },
    {
      "epoch": 1.5875819673567522,
      "grad_norm": 3.53125,
      "learning_rate": 2.615598495291946e-05,
      "loss": 0.8608,
      "step": 452980
    },
    {
      "epoch": 1.5876170148636477,
      "grad_norm": 2.765625,
      "learning_rate": 2.6155335924255758e-05,
      "loss": 0.8485,
      "step": 452990
    },
    {
      "epoch": 1.5876520623705432,
      "grad_norm": 2.734375,
      "learning_rate": 2.6154686895592056e-05,
      "loss": 0.8197,
      "step": 453000
    },
    {
      "epoch": 1.587687109877439,
      "grad_norm": 2.890625,
      "learning_rate": 2.6154037866928354e-05,
      "loss": 0.7673,
      "step": 453010
    },
    {
      "epoch": 1.5877221573843343,
      "grad_norm": 3.421875,
      "learning_rate": 2.6153388838264652e-05,
      "loss": 0.9027,
      "step": 453020
    },
    {
      "epoch": 1.58775720489123,
      "grad_norm": 2.84375,
      "learning_rate": 2.615273980960095e-05,
      "loss": 0.8506,
      "step": 453030
    },
    {
      "epoch": 1.5877922523981258,
      "grad_norm": 2.71875,
      "learning_rate": 2.615209078093725e-05,
      "loss": 0.8147,
      "step": 453040
    },
    {
      "epoch": 1.5878272999050211,
      "grad_norm": 2.625,
      "learning_rate": 2.615144175227355e-05,
      "loss": 0.8286,
      "step": 453050
    },
    {
      "epoch": 1.587862347411917,
      "grad_norm": 2.4375,
      "learning_rate": 2.6150792723609847e-05,
      "loss": 0.7508,
      "step": 453060
    },
    {
      "epoch": 1.5878973949188124,
      "grad_norm": 3.4375,
      "learning_rate": 2.6150143694946145e-05,
      "loss": 0.8707,
      "step": 453070
    },
    {
      "epoch": 1.587932442425708,
      "grad_norm": 2.96875,
      "learning_rate": 2.6149494666282443e-05,
      "loss": 0.8505,
      "step": 453080
    },
    {
      "epoch": 1.5879674899326037,
      "grad_norm": 2.875,
      "learning_rate": 2.614884563761874e-05,
      "loss": 0.8959,
      "step": 453090
    },
    {
      "epoch": 1.5880025374394993,
      "grad_norm": 3.375,
      "learning_rate": 2.614819660895504e-05,
      "loss": 0.815,
      "step": 453100
    },
    {
      "epoch": 1.5880375849463948,
      "grad_norm": 3.390625,
      "learning_rate": 2.6147547580291337e-05,
      "loss": 0.8325,
      "step": 453110
    },
    {
      "epoch": 1.5880726324532906,
      "grad_norm": 2.53125,
      "learning_rate": 2.6146898551627635e-05,
      "loss": 0.8213,
      "step": 453120
    },
    {
      "epoch": 1.5881076799601859,
      "grad_norm": 2.421875,
      "learning_rate": 2.6146249522963933e-05,
      "loss": 0.8243,
      "step": 453130
    },
    {
      "epoch": 1.5881427274670816,
      "grad_norm": 2.890625,
      "learning_rate": 2.614560049430023e-05,
      "loss": 0.8236,
      "step": 453140
    },
    {
      "epoch": 1.5881777749739774,
      "grad_norm": 2.796875,
      "learning_rate": 2.614495146563653e-05,
      "loss": 0.8292,
      "step": 453150
    },
    {
      "epoch": 1.5882128224808727,
      "grad_norm": 2.96875,
      "learning_rate": 2.6144302436972827e-05,
      "loss": 0.8082,
      "step": 453160
    },
    {
      "epoch": 1.5882478699877685,
      "grad_norm": 2.84375,
      "learning_rate": 2.6143653408309125e-05,
      "loss": 0.9515,
      "step": 453170
    },
    {
      "epoch": 1.588282917494664,
      "grad_norm": 3.28125,
      "learning_rate": 2.6143004379645427e-05,
      "loss": 0.9097,
      "step": 453180
    },
    {
      "epoch": 1.5883179650015595,
      "grad_norm": 2.875,
      "learning_rate": 2.6142355350981725e-05,
      "loss": 0.7755,
      "step": 453190
    },
    {
      "epoch": 1.5883530125084553,
      "grad_norm": 2.40625,
      "learning_rate": 2.6141706322318023e-05,
      "loss": 0.7659,
      "step": 453200
    },
    {
      "epoch": 1.5883880600153508,
      "grad_norm": 2.609375,
      "learning_rate": 2.614105729365432e-05,
      "loss": 0.8082,
      "step": 453210
    },
    {
      "epoch": 1.5884231075222464,
      "grad_norm": 2.796875,
      "learning_rate": 2.614040826499062e-05,
      "loss": 0.7909,
      "step": 453220
    },
    {
      "epoch": 1.5884581550291421,
      "grad_norm": 3.15625,
      "learning_rate": 2.6139759236326917e-05,
      "loss": 0.7762,
      "step": 453230
    },
    {
      "epoch": 1.5884932025360377,
      "grad_norm": 3.25,
      "learning_rate": 2.6139110207663215e-05,
      "loss": 0.8226,
      "step": 453240
    },
    {
      "epoch": 1.5885282500429332,
      "grad_norm": 3.234375,
      "learning_rate": 2.6138461178999513e-05,
      "loss": 0.8615,
      "step": 453250
    },
    {
      "epoch": 1.588563297549829,
      "grad_norm": 3.1875,
      "learning_rate": 2.613781215033581e-05,
      "loss": 0.7621,
      "step": 453260
    },
    {
      "epoch": 1.5885983450567243,
      "grad_norm": 2.390625,
      "learning_rate": 2.613716312167211e-05,
      "loss": 0.7878,
      "step": 453270
    },
    {
      "epoch": 1.58863339256362,
      "grad_norm": 2.53125,
      "learning_rate": 2.6136514093008407e-05,
      "loss": 0.8939,
      "step": 453280
    },
    {
      "epoch": 1.5886684400705156,
      "grad_norm": 2.96875,
      "learning_rate": 2.6135865064344705e-05,
      "loss": 0.8746,
      "step": 453290
    },
    {
      "epoch": 1.588703487577411,
      "grad_norm": 3.203125,
      "learning_rate": 2.6135216035681e-05,
      "loss": 0.8425,
      "step": 453300
    },
    {
      "epoch": 1.5887385350843068,
      "grad_norm": 2.875,
      "learning_rate": 2.6134567007017297e-05,
      "loss": 0.8928,
      "step": 453310
    },
    {
      "epoch": 1.5887735825912024,
      "grad_norm": 2.953125,
      "learning_rate": 2.6133917978353595e-05,
      "loss": 0.9602,
      "step": 453320
    },
    {
      "epoch": 1.588808630098098,
      "grad_norm": 2.34375,
      "learning_rate": 2.6133268949689893e-05,
      "loss": 0.7759,
      "step": 453330
    },
    {
      "epoch": 1.5888436776049937,
      "grad_norm": 2.890625,
      "learning_rate": 2.613261992102619e-05,
      "loss": 0.865,
      "step": 453340
    },
    {
      "epoch": 1.5888787251118892,
      "grad_norm": 2.796875,
      "learning_rate": 2.613197089236249e-05,
      "loss": 0.8395,
      "step": 453350
    },
    {
      "epoch": 1.5889137726187847,
      "grad_norm": 2.46875,
      "learning_rate": 2.6131321863698787e-05,
      "loss": 0.8583,
      "step": 453360
    },
    {
      "epoch": 1.5889488201256805,
      "grad_norm": 3.015625,
      "learning_rate": 2.6130672835035085e-05,
      "loss": 0.7965,
      "step": 453370
    },
    {
      "epoch": 1.5889838676325758,
      "grad_norm": 2.78125,
      "learning_rate": 2.6130023806371383e-05,
      "loss": 0.7901,
      "step": 453380
    },
    {
      "epoch": 1.5890189151394716,
      "grad_norm": 2.71875,
      "learning_rate": 2.612937477770768e-05,
      "loss": 0.7758,
      "step": 453390
    },
    {
      "epoch": 1.5890539626463671,
      "grad_norm": 2.453125,
      "learning_rate": 2.612872574904398e-05,
      "loss": 0.8029,
      "step": 453400
    },
    {
      "epoch": 1.5890890101532626,
      "grad_norm": 3.53125,
      "learning_rate": 2.612807672038028e-05,
      "loss": 0.7986,
      "step": 453410
    },
    {
      "epoch": 1.5891240576601584,
      "grad_norm": 2.6875,
      "learning_rate": 2.612742769171658e-05,
      "loss": 0.8207,
      "step": 453420
    },
    {
      "epoch": 1.589159105167054,
      "grad_norm": 3.078125,
      "learning_rate": 2.6126778663052877e-05,
      "loss": 0.8255,
      "step": 453430
    },
    {
      "epoch": 1.5891941526739495,
      "grad_norm": 3.0625,
      "learning_rate": 2.6126129634389175e-05,
      "loss": 0.8328,
      "step": 453440
    },
    {
      "epoch": 1.5892292001808452,
      "grad_norm": 3.203125,
      "learning_rate": 2.6125480605725473e-05,
      "loss": 0.8958,
      "step": 453450
    },
    {
      "epoch": 1.5892642476877408,
      "grad_norm": 3.34375,
      "learning_rate": 2.612483157706177e-05,
      "loss": 0.8661,
      "step": 453460
    },
    {
      "epoch": 1.5892992951946363,
      "grad_norm": 2.953125,
      "learning_rate": 2.612418254839807e-05,
      "loss": 0.8806,
      "step": 453470
    },
    {
      "epoch": 1.589334342701532,
      "grad_norm": 2.609375,
      "learning_rate": 2.6123533519734367e-05,
      "loss": 0.8094,
      "step": 453480
    },
    {
      "epoch": 1.5893693902084274,
      "grad_norm": 2.4375,
      "learning_rate": 2.6122884491070665e-05,
      "loss": 0.8245,
      "step": 453490
    },
    {
      "epoch": 1.5894044377153231,
      "grad_norm": 2.875,
      "learning_rate": 2.6122235462406963e-05,
      "loss": 0.779,
      "step": 453500
    },
    {
      "epoch": 1.5894394852222187,
      "grad_norm": 2.9375,
      "learning_rate": 2.612158643374326e-05,
      "loss": 0.757,
      "step": 453510
    },
    {
      "epoch": 1.5894745327291142,
      "grad_norm": 3.046875,
      "learning_rate": 2.612093740507956e-05,
      "loss": 0.8548,
      "step": 453520
    },
    {
      "epoch": 1.58950958023601,
      "grad_norm": 2.859375,
      "learning_rate": 2.6120288376415857e-05,
      "loss": 0.8315,
      "step": 453530
    },
    {
      "epoch": 1.5895446277429055,
      "grad_norm": 3.21875,
      "learning_rate": 2.6119639347752155e-05,
      "loss": 0.8292,
      "step": 453540
    },
    {
      "epoch": 1.589579675249801,
      "grad_norm": 3.015625,
      "learning_rate": 2.6118990319088456e-05,
      "loss": 0.8476,
      "step": 453550
    },
    {
      "epoch": 1.5896147227566968,
      "grad_norm": 2.71875,
      "learning_rate": 2.6118341290424754e-05,
      "loss": 0.7177,
      "step": 453560
    },
    {
      "epoch": 1.5896497702635923,
      "grad_norm": 3.03125,
      "learning_rate": 2.6117692261761052e-05,
      "loss": 0.8249,
      "step": 453570
    },
    {
      "epoch": 1.5896848177704879,
      "grad_norm": 2.921875,
      "learning_rate": 2.611704323309735e-05,
      "loss": 0.8911,
      "step": 453580
    },
    {
      "epoch": 1.5897198652773836,
      "grad_norm": 2.6875,
      "learning_rate": 2.6116394204433648e-05,
      "loss": 0.7716,
      "step": 453590
    },
    {
      "epoch": 1.589754912784279,
      "grad_norm": 3.40625,
      "learning_rate": 2.6115745175769946e-05,
      "loss": 0.9196,
      "step": 453600
    },
    {
      "epoch": 1.5897899602911747,
      "grad_norm": 2.796875,
      "learning_rate": 2.6115096147106244e-05,
      "loss": 0.8775,
      "step": 453610
    },
    {
      "epoch": 1.5898250077980702,
      "grad_norm": 3.0,
      "learning_rate": 2.6114447118442542e-05,
      "loss": 0.8823,
      "step": 453620
    },
    {
      "epoch": 1.5898600553049658,
      "grad_norm": 2.59375,
      "learning_rate": 2.611379808977884e-05,
      "loss": 0.8138,
      "step": 453630
    },
    {
      "epoch": 1.5898951028118615,
      "grad_norm": 3.34375,
      "learning_rate": 2.6113149061115138e-05,
      "loss": 0.8825,
      "step": 453640
    },
    {
      "epoch": 1.589930150318757,
      "grad_norm": 2.875,
      "learning_rate": 2.6112500032451436e-05,
      "loss": 0.8624,
      "step": 453650
    },
    {
      "epoch": 1.5899651978256526,
      "grad_norm": 3.171875,
      "learning_rate": 2.6111851003787734e-05,
      "loss": 0.8004,
      "step": 453660
    },
    {
      "epoch": 1.5900002453325484,
      "grad_norm": 2.921875,
      "learning_rate": 2.6111201975124032e-05,
      "loss": 0.891,
      "step": 453670
    },
    {
      "epoch": 1.590035292839444,
      "grad_norm": 2.671875,
      "learning_rate": 2.6110552946460327e-05,
      "loss": 0.8176,
      "step": 453680
    },
    {
      "epoch": 1.5900703403463394,
      "grad_norm": 2.84375,
      "learning_rate": 2.6109903917796625e-05,
      "loss": 0.9511,
      "step": 453690
    },
    {
      "epoch": 1.5901053878532352,
      "grad_norm": 2.8125,
      "learning_rate": 2.6109254889132923e-05,
      "loss": 0.8203,
      "step": 453700
    },
    {
      "epoch": 1.5901404353601305,
      "grad_norm": 3.015625,
      "learning_rate": 2.610860586046922e-05,
      "loss": 0.8616,
      "step": 453710
    },
    {
      "epoch": 1.5901754828670263,
      "grad_norm": 3.203125,
      "learning_rate": 2.610795683180552e-05,
      "loss": 0.8276,
      "step": 453720
    },
    {
      "epoch": 1.590210530373922,
      "grad_norm": 2.890625,
      "learning_rate": 2.6107307803141817e-05,
      "loss": 0.8161,
      "step": 453730
    },
    {
      "epoch": 1.5902455778808173,
      "grad_norm": 3.15625,
      "learning_rate": 2.6106658774478115e-05,
      "loss": 0.822,
      "step": 453740
    },
    {
      "epoch": 1.590280625387713,
      "grad_norm": 3.0,
      "learning_rate": 2.6106009745814413e-05,
      "loss": 0.795,
      "step": 453750
    },
    {
      "epoch": 1.5903156728946086,
      "grad_norm": 2.859375,
      "learning_rate": 2.610536071715071e-05,
      "loss": 0.9667,
      "step": 453760
    },
    {
      "epoch": 1.5903507204015042,
      "grad_norm": 2.6875,
      "learning_rate": 2.610471168848701e-05,
      "loss": 0.845,
      "step": 453770
    },
    {
      "epoch": 1.5903857679084,
      "grad_norm": 2.671875,
      "learning_rate": 2.610406265982331e-05,
      "loss": 0.8203,
      "step": 453780
    },
    {
      "epoch": 1.5904208154152955,
      "grad_norm": 3.328125,
      "learning_rate": 2.6103413631159608e-05,
      "loss": 0.8267,
      "step": 453790
    },
    {
      "epoch": 1.590455862922191,
      "grad_norm": 3.46875,
      "learning_rate": 2.6102764602495906e-05,
      "loss": 0.8694,
      "step": 453800
    },
    {
      "epoch": 1.5904909104290867,
      "grad_norm": 3.46875,
      "learning_rate": 2.6102115573832204e-05,
      "loss": 0.8717,
      "step": 453810
    },
    {
      "epoch": 1.590525957935982,
      "grad_norm": 2.6875,
      "learning_rate": 2.6101466545168502e-05,
      "loss": 0.8495,
      "step": 453820
    },
    {
      "epoch": 1.5905610054428778,
      "grad_norm": 2.609375,
      "learning_rate": 2.61008175165048e-05,
      "loss": 0.8636,
      "step": 453830
    },
    {
      "epoch": 1.5905960529497736,
      "grad_norm": 3.328125,
      "learning_rate": 2.6100168487841098e-05,
      "loss": 0.8115,
      "step": 453840
    },
    {
      "epoch": 1.590631100456669,
      "grad_norm": 3.0625,
      "learning_rate": 2.6099519459177396e-05,
      "loss": 0.8019,
      "step": 453850
    },
    {
      "epoch": 1.5906661479635646,
      "grad_norm": 3.09375,
      "learning_rate": 2.6098870430513694e-05,
      "loss": 0.8056,
      "step": 453860
    },
    {
      "epoch": 1.5907011954704602,
      "grad_norm": 3.390625,
      "learning_rate": 2.6098221401849992e-05,
      "loss": 0.8505,
      "step": 453870
    },
    {
      "epoch": 1.5907362429773557,
      "grad_norm": 2.78125,
      "learning_rate": 2.609757237318629e-05,
      "loss": 0.8151,
      "step": 453880
    },
    {
      "epoch": 1.5907712904842515,
      "grad_norm": 2.828125,
      "learning_rate": 2.6096923344522588e-05,
      "loss": 0.8048,
      "step": 453890
    },
    {
      "epoch": 1.590806337991147,
      "grad_norm": 2.609375,
      "learning_rate": 2.6096274315858886e-05,
      "loss": 0.8528,
      "step": 453900
    },
    {
      "epoch": 1.5908413854980425,
      "grad_norm": 2.90625,
      "learning_rate": 2.6095625287195187e-05,
      "loss": 0.7646,
      "step": 453910
    },
    {
      "epoch": 1.5908764330049383,
      "grad_norm": 2.765625,
      "learning_rate": 2.6094976258531485e-05,
      "loss": 0.797,
      "step": 453920
    },
    {
      "epoch": 1.5909114805118338,
      "grad_norm": 3.296875,
      "learning_rate": 2.6094327229867783e-05,
      "loss": 0.8567,
      "step": 453930
    },
    {
      "epoch": 1.5909465280187294,
      "grad_norm": 3.0,
      "learning_rate": 2.609367820120408e-05,
      "loss": 0.9033,
      "step": 453940
    },
    {
      "epoch": 1.5909815755256251,
      "grad_norm": 2.90625,
      "learning_rate": 2.609302917254038e-05,
      "loss": 0.7892,
      "step": 453950
    },
    {
      "epoch": 1.5910166230325204,
      "grad_norm": 3.109375,
      "learning_rate": 2.6092380143876677e-05,
      "loss": 0.8379,
      "step": 453960
    },
    {
      "epoch": 1.5910516705394162,
      "grad_norm": 2.859375,
      "learning_rate": 2.6091731115212975e-05,
      "loss": 0.837,
      "step": 453970
    },
    {
      "epoch": 1.5910867180463117,
      "grad_norm": 3.234375,
      "learning_rate": 2.6091082086549273e-05,
      "loss": 0.8716,
      "step": 453980
    },
    {
      "epoch": 1.5911217655532073,
      "grad_norm": 3.5625,
      "learning_rate": 2.609043305788557e-05,
      "loss": 0.7876,
      "step": 453990
    },
    {
      "epoch": 1.591156813060103,
      "grad_norm": 2.828125,
      "learning_rate": 2.608978402922187e-05,
      "loss": 0.8959,
      "step": 454000
    },
    {
      "epoch": 1.5911918605669986,
      "grad_norm": 2.90625,
      "learning_rate": 2.6089135000558167e-05,
      "loss": 0.8457,
      "step": 454010
    },
    {
      "epoch": 1.591226908073894,
      "grad_norm": 2.84375,
      "learning_rate": 2.6088485971894465e-05,
      "loss": 0.8079,
      "step": 454020
    },
    {
      "epoch": 1.5912619555807899,
      "grad_norm": 2.53125,
      "learning_rate": 2.6087836943230763e-05,
      "loss": 0.8537,
      "step": 454030
    },
    {
      "epoch": 1.5912970030876854,
      "grad_norm": 2.5625,
      "learning_rate": 2.608718791456706e-05,
      "loss": 0.899,
      "step": 454040
    },
    {
      "epoch": 1.591332050594581,
      "grad_norm": 3.0625,
      "learning_rate": 2.6086538885903363e-05,
      "loss": 0.8599,
      "step": 454050
    },
    {
      "epoch": 1.5913670981014767,
      "grad_norm": 2.96875,
      "learning_rate": 2.6085889857239654e-05,
      "loss": 0.8331,
      "step": 454060
    },
    {
      "epoch": 1.591402145608372,
      "grad_norm": 2.765625,
      "learning_rate": 2.6085240828575952e-05,
      "loss": 0.8302,
      "step": 454070
    },
    {
      "epoch": 1.5914371931152678,
      "grad_norm": 2.9375,
      "learning_rate": 2.608459179991225e-05,
      "loss": 0.8395,
      "step": 454080
    },
    {
      "epoch": 1.5914722406221633,
      "grad_norm": 3.25,
      "learning_rate": 2.6083942771248548e-05,
      "loss": 0.8939,
      "step": 454090
    },
    {
      "epoch": 1.5915072881290588,
      "grad_norm": 3.4375,
      "learning_rate": 2.6083293742584846e-05,
      "loss": 0.7847,
      "step": 454100
    },
    {
      "epoch": 1.5915423356359546,
      "grad_norm": 2.78125,
      "learning_rate": 2.6082644713921144e-05,
      "loss": 0.8483,
      "step": 454110
    },
    {
      "epoch": 1.5915773831428501,
      "grad_norm": 2.875,
      "learning_rate": 2.6081995685257442e-05,
      "loss": 0.8079,
      "step": 454120
    },
    {
      "epoch": 1.5916124306497457,
      "grad_norm": 3.171875,
      "learning_rate": 2.608134665659374e-05,
      "loss": 0.8816,
      "step": 454130
    },
    {
      "epoch": 1.5916474781566414,
      "grad_norm": 2.4375,
      "learning_rate": 2.608069762793004e-05,
      "loss": 0.819,
      "step": 454140
    },
    {
      "epoch": 1.591682525663537,
      "grad_norm": 2.953125,
      "learning_rate": 2.608004859926634e-05,
      "loss": 0.851,
      "step": 454150
    },
    {
      "epoch": 1.5917175731704325,
      "grad_norm": 3.0,
      "learning_rate": 2.6079399570602637e-05,
      "loss": 0.8519,
      "step": 454160
    },
    {
      "epoch": 1.5917526206773283,
      "grad_norm": 2.953125,
      "learning_rate": 2.6078750541938935e-05,
      "loss": 0.8542,
      "step": 454170
    },
    {
      "epoch": 1.5917876681842236,
      "grad_norm": 3.203125,
      "learning_rate": 2.6078101513275233e-05,
      "loss": 0.8452,
      "step": 454180
    },
    {
      "epoch": 1.5918227156911193,
      "grad_norm": 2.890625,
      "learning_rate": 2.607745248461153e-05,
      "loss": 0.7568,
      "step": 454190
    },
    {
      "epoch": 1.5918577631980149,
      "grad_norm": 3.09375,
      "learning_rate": 2.607680345594783e-05,
      "loss": 0.8328,
      "step": 454200
    },
    {
      "epoch": 1.5918928107049104,
      "grad_norm": 2.75,
      "learning_rate": 2.6076154427284127e-05,
      "loss": 0.9181,
      "step": 454210
    },
    {
      "epoch": 1.5919278582118062,
      "grad_norm": 2.8125,
      "learning_rate": 2.6075505398620425e-05,
      "loss": 0.9209,
      "step": 454220
    },
    {
      "epoch": 1.5919629057187017,
      "grad_norm": 2.421875,
      "learning_rate": 2.6074856369956723e-05,
      "loss": 0.8563,
      "step": 454230
    },
    {
      "epoch": 1.5919979532255972,
      "grad_norm": 2.765625,
      "learning_rate": 2.607420734129302e-05,
      "loss": 0.7768,
      "step": 454240
    },
    {
      "epoch": 1.592033000732493,
      "grad_norm": 2.390625,
      "learning_rate": 2.607355831262932e-05,
      "loss": 0.8119,
      "step": 454250
    },
    {
      "epoch": 1.5920680482393885,
      "grad_norm": 3.046875,
      "learning_rate": 2.6072909283965617e-05,
      "loss": 0.8687,
      "step": 454260
    },
    {
      "epoch": 1.592103095746284,
      "grad_norm": 2.796875,
      "learning_rate": 2.6072260255301915e-05,
      "loss": 0.8185,
      "step": 454270
    },
    {
      "epoch": 1.5921381432531798,
      "grad_norm": 2.78125,
      "learning_rate": 2.6071611226638217e-05,
      "loss": 0.8174,
      "step": 454280
    },
    {
      "epoch": 1.5921731907600751,
      "grad_norm": 2.4375,
      "learning_rate": 2.6070962197974515e-05,
      "loss": 0.7774,
      "step": 454290
    },
    {
      "epoch": 1.5922082382669709,
      "grad_norm": 2.96875,
      "learning_rate": 2.6070313169310813e-05,
      "loss": 0.8745,
      "step": 454300
    },
    {
      "epoch": 1.5922432857738664,
      "grad_norm": 3.3125,
      "learning_rate": 2.606966414064711e-05,
      "loss": 0.8015,
      "step": 454310
    },
    {
      "epoch": 1.592278333280762,
      "grad_norm": 3.03125,
      "learning_rate": 2.606901511198341e-05,
      "loss": 0.7389,
      "step": 454320
    },
    {
      "epoch": 1.5923133807876577,
      "grad_norm": 2.921875,
      "learning_rate": 2.6068366083319707e-05,
      "loss": 0.79,
      "step": 454330
    },
    {
      "epoch": 1.5923484282945533,
      "grad_norm": 2.96875,
      "learning_rate": 2.6067717054656005e-05,
      "loss": 0.8595,
      "step": 454340
    },
    {
      "epoch": 1.5923834758014488,
      "grad_norm": 2.640625,
      "learning_rate": 2.6067068025992303e-05,
      "loss": 0.9136,
      "step": 454350
    },
    {
      "epoch": 1.5924185233083445,
      "grad_norm": 3.140625,
      "learning_rate": 2.60664189973286e-05,
      "loss": 0.895,
      "step": 454360
    },
    {
      "epoch": 1.59245357081524,
      "grad_norm": 2.71875,
      "learning_rate": 2.60657699686649e-05,
      "loss": 0.805,
      "step": 454370
    },
    {
      "epoch": 1.5924886183221356,
      "grad_norm": 2.71875,
      "learning_rate": 2.6065120940001197e-05,
      "loss": 0.9149,
      "step": 454380
    },
    {
      "epoch": 1.5925236658290314,
      "grad_norm": 2.78125,
      "learning_rate": 2.6064471911337495e-05,
      "loss": 0.8463,
      "step": 454390
    },
    {
      "epoch": 1.5925587133359267,
      "grad_norm": 2.828125,
      "learning_rate": 2.6063822882673793e-05,
      "loss": 0.8411,
      "step": 454400
    },
    {
      "epoch": 1.5925937608428224,
      "grad_norm": 2.40625,
      "learning_rate": 2.606317385401009e-05,
      "loss": 0.8822,
      "step": 454410
    },
    {
      "epoch": 1.5926288083497182,
      "grad_norm": 2.9375,
      "learning_rate": 2.6062524825346392e-05,
      "loss": 0.7997,
      "step": 454420
    },
    {
      "epoch": 1.5926638558566135,
      "grad_norm": 2.671875,
      "learning_rate": 2.6061875796682683e-05,
      "loss": 0.8718,
      "step": 454430
    },
    {
      "epoch": 1.5926989033635093,
      "grad_norm": 2.96875,
      "learning_rate": 2.606122676801898e-05,
      "loss": 0.8971,
      "step": 454440
    },
    {
      "epoch": 1.5927339508704048,
      "grad_norm": 3.015625,
      "learning_rate": 2.606057773935528e-05,
      "loss": 0.8313,
      "step": 454450
    },
    {
      "epoch": 1.5927689983773003,
      "grad_norm": 2.90625,
      "learning_rate": 2.6059928710691577e-05,
      "loss": 0.8908,
      "step": 454460
    },
    {
      "epoch": 1.592804045884196,
      "grad_norm": 3.15625,
      "learning_rate": 2.6059279682027875e-05,
      "loss": 0.8181,
      "step": 454470
    },
    {
      "epoch": 1.5928390933910916,
      "grad_norm": 2.875,
      "learning_rate": 2.6058630653364173e-05,
      "loss": 0.737,
      "step": 454480
    },
    {
      "epoch": 1.5928741408979872,
      "grad_norm": 3.234375,
      "learning_rate": 2.605798162470047e-05,
      "loss": 0.7939,
      "step": 454490
    },
    {
      "epoch": 1.592909188404883,
      "grad_norm": 3.03125,
      "learning_rate": 2.605733259603677e-05,
      "loss": 0.8581,
      "step": 454500
    },
    {
      "epoch": 1.5929442359117783,
      "grad_norm": 2.65625,
      "learning_rate": 2.605668356737307e-05,
      "loss": 0.8075,
      "step": 454510
    },
    {
      "epoch": 1.592979283418674,
      "grad_norm": 3.125,
      "learning_rate": 2.605603453870937e-05,
      "loss": 0.8099,
      "step": 454520
    },
    {
      "epoch": 1.5930143309255698,
      "grad_norm": 2.421875,
      "learning_rate": 2.6055385510045667e-05,
      "loss": 0.7688,
      "step": 454530
    },
    {
      "epoch": 1.593049378432465,
      "grad_norm": 2.828125,
      "learning_rate": 2.6054736481381965e-05,
      "loss": 0.8023,
      "step": 454540
    },
    {
      "epoch": 1.5930844259393608,
      "grad_norm": 3.078125,
      "learning_rate": 2.6054087452718263e-05,
      "loss": 0.7733,
      "step": 454550
    },
    {
      "epoch": 1.5931194734462564,
      "grad_norm": 2.953125,
      "learning_rate": 2.605343842405456e-05,
      "loss": 0.871,
      "step": 454560
    },
    {
      "epoch": 1.593154520953152,
      "grad_norm": 2.625,
      "learning_rate": 2.605278939539086e-05,
      "loss": 0.7885,
      "step": 454570
    },
    {
      "epoch": 1.5931895684600477,
      "grad_norm": 3.15625,
      "learning_rate": 2.6052140366727157e-05,
      "loss": 0.9155,
      "step": 454580
    },
    {
      "epoch": 1.5932246159669432,
      "grad_norm": 3.125,
      "learning_rate": 2.6051491338063455e-05,
      "loss": 0.9239,
      "step": 454590
    },
    {
      "epoch": 1.5932596634738387,
      "grad_norm": 2.625,
      "learning_rate": 2.6050842309399753e-05,
      "loss": 0.8675,
      "step": 454600
    },
    {
      "epoch": 1.5932947109807345,
      "grad_norm": 2.65625,
      "learning_rate": 2.605019328073605e-05,
      "loss": 0.8881,
      "step": 454610
    },
    {
      "epoch": 1.59332975848763,
      "grad_norm": 2.53125,
      "learning_rate": 2.604954425207235e-05,
      "loss": 0.7883,
      "step": 454620
    },
    {
      "epoch": 1.5933648059945256,
      "grad_norm": 2.71875,
      "learning_rate": 2.6048895223408647e-05,
      "loss": 0.8673,
      "step": 454630
    },
    {
      "epoch": 1.5933998535014213,
      "grad_norm": 2.96875,
      "learning_rate": 2.6048246194744945e-05,
      "loss": 0.8768,
      "step": 454640
    },
    {
      "epoch": 1.5934349010083166,
      "grad_norm": 2.375,
      "learning_rate": 2.6047597166081246e-05,
      "loss": 0.8304,
      "step": 454650
    },
    {
      "epoch": 1.5934699485152124,
      "grad_norm": 2.625,
      "learning_rate": 2.6046948137417544e-05,
      "loss": 0.7841,
      "step": 454660
    },
    {
      "epoch": 1.593504996022108,
      "grad_norm": 2.984375,
      "learning_rate": 2.6046299108753842e-05,
      "loss": 0.814,
      "step": 454670
    },
    {
      "epoch": 1.5935400435290035,
      "grad_norm": 2.578125,
      "learning_rate": 2.604565008009014e-05,
      "loss": 0.8022,
      "step": 454680
    },
    {
      "epoch": 1.5935750910358992,
      "grad_norm": 2.609375,
      "learning_rate": 2.6045001051426438e-05,
      "loss": 0.7939,
      "step": 454690
    },
    {
      "epoch": 1.5936101385427948,
      "grad_norm": 3.8125,
      "learning_rate": 2.6044352022762736e-05,
      "loss": 0.8309,
      "step": 454700
    },
    {
      "epoch": 1.5936451860496903,
      "grad_norm": 3.390625,
      "learning_rate": 2.6043702994099034e-05,
      "loss": 0.9099,
      "step": 454710
    },
    {
      "epoch": 1.593680233556586,
      "grad_norm": 2.78125,
      "learning_rate": 2.6043053965435332e-05,
      "loss": 0.8708,
      "step": 454720
    },
    {
      "epoch": 1.5937152810634816,
      "grad_norm": 2.796875,
      "learning_rate": 2.604240493677163e-05,
      "loss": 0.8324,
      "step": 454730
    },
    {
      "epoch": 1.5937503285703771,
      "grad_norm": 3.34375,
      "learning_rate": 2.6041755908107928e-05,
      "loss": 0.8978,
      "step": 454740
    },
    {
      "epoch": 1.5937853760772729,
      "grad_norm": 3.1875,
      "learning_rate": 2.6041106879444226e-05,
      "loss": 0.8129,
      "step": 454750
    },
    {
      "epoch": 1.5938204235841682,
      "grad_norm": 2.921875,
      "learning_rate": 2.6040457850780524e-05,
      "loss": 0.8261,
      "step": 454760
    },
    {
      "epoch": 1.593855471091064,
      "grad_norm": 3.171875,
      "learning_rate": 2.6039808822116822e-05,
      "loss": 0.8309,
      "step": 454770
    },
    {
      "epoch": 1.5938905185979595,
      "grad_norm": 3.0625,
      "learning_rate": 2.603915979345312e-05,
      "loss": 0.8874,
      "step": 454780
    },
    {
      "epoch": 1.593925566104855,
      "grad_norm": 3.0,
      "learning_rate": 2.603851076478942e-05,
      "loss": 0.8042,
      "step": 454790
    },
    {
      "epoch": 1.5939606136117508,
      "grad_norm": 2.59375,
      "learning_rate": 2.603786173612572e-05,
      "loss": 0.868,
      "step": 454800
    },
    {
      "epoch": 1.5939956611186463,
      "grad_norm": 2.953125,
      "learning_rate": 2.603721270746201e-05,
      "loss": 0.8123,
      "step": 454810
    },
    {
      "epoch": 1.5940307086255419,
      "grad_norm": 2.921875,
      "learning_rate": 2.603656367879831e-05,
      "loss": 0.8222,
      "step": 454820
    },
    {
      "epoch": 1.5940657561324376,
      "grad_norm": 3.125,
      "learning_rate": 2.6035914650134607e-05,
      "loss": 0.7664,
      "step": 454830
    },
    {
      "epoch": 1.5941008036393332,
      "grad_norm": 2.859375,
      "learning_rate": 2.6035265621470905e-05,
      "loss": 0.8856,
      "step": 454840
    },
    {
      "epoch": 1.5941358511462287,
      "grad_norm": 3.0,
      "learning_rate": 2.6034616592807203e-05,
      "loss": 0.8419,
      "step": 454850
    },
    {
      "epoch": 1.5941708986531244,
      "grad_norm": 2.515625,
      "learning_rate": 2.60339675641435e-05,
      "loss": 0.9087,
      "step": 454860
    },
    {
      "epoch": 1.5942059461600198,
      "grad_norm": 3.109375,
      "learning_rate": 2.60333185354798e-05,
      "loss": 0.8501,
      "step": 454870
    },
    {
      "epoch": 1.5942409936669155,
      "grad_norm": 2.90625,
      "learning_rate": 2.60326695068161e-05,
      "loss": 0.8552,
      "step": 454880
    },
    {
      "epoch": 1.594276041173811,
      "grad_norm": 2.6875,
      "learning_rate": 2.6032020478152398e-05,
      "loss": 0.853,
      "step": 454890
    },
    {
      "epoch": 1.5943110886807066,
      "grad_norm": 2.71875,
      "learning_rate": 2.6031371449488696e-05,
      "loss": 0.8405,
      "step": 454900
    },
    {
      "epoch": 1.5943461361876023,
      "grad_norm": 2.796875,
      "learning_rate": 2.6030722420824994e-05,
      "loss": 0.8142,
      "step": 454910
    },
    {
      "epoch": 1.5943811836944979,
      "grad_norm": 2.703125,
      "learning_rate": 2.6030073392161292e-05,
      "loss": 0.7821,
      "step": 454920
    },
    {
      "epoch": 1.5944162312013934,
      "grad_norm": 2.625,
      "learning_rate": 2.602942436349759e-05,
      "loss": 0.8097,
      "step": 454930
    },
    {
      "epoch": 1.5944512787082892,
      "grad_norm": 3.53125,
      "learning_rate": 2.6028775334833888e-05,
      "loss": 0.8422,
      "step": 454940
    },
    {
      "epoch": 1.5944863262151847,
      "grad_norm": 3.0625,
      "learning_rate": 2.6028126306170186e-05,
      "loss": 0.8627,
      "step": 454950
    },
    {
      "epoch": 1.5945213737220802,
      "grad_norm": 3.125,
      "learning_rate": 2.6027477277506484e-05,
      "loss": 0.8631,
      "step": 454960
    },
    {
      "epoch": 1.594556421228976,
      "grad_norm": 2.9375,
      "learning_rate": 2.6026828248842782e-05,
      "loss": 0.8182,
      "step": 454970
    },
    {
      "epoch": 1.5945914687358713,
      "grad_norm": 2.625,
      "learning_rate": 2.602617922017908e-05,
      "loss": 0.7956,
      "step": 454980
    },
    {
      "epoch": 1.594626516242767,
      "grad_norm": 2.90625,
      "learning_rate": 2.6025530191515378e-05,
      "loss": 0.832,
      "step": 454990
    },
    {
      "epoch": 1.5946615637496626,
      "grad_norm": 2.828125,
      "learning_rate": 2.6024881162851676e-05,
      "loss": 0.8314,
      "step": 455000
    },
    {
      "epoch": 1.5946615637496626,
      "eval_loss": 0.787390410900116,
      "eval_runtime": 565.1221,
      "eval_samples_per_second": 673.193,
      "eval_steps_per_second": 56.099,
      "step": 455000
    },
    {
      "epoch": 1.5946966112565581,
      "grad_norm": 3.0625,
      "learning_rate": 2.6024232134187977e-05,
      "loss": 0.8021,
      "step": 455010
    },
    {
      "epoch": 1.594731658763454,
      "grad_norm": 2.875,
      "learning_rate": 2.6023583105524275e-05,
      "loss": 0.8425,
      "step": 455020
    },
    {
      "epoch": 1.5947667062703494,
      "grad_norm": 2.96875,
      "learning_rate": 2.6022934076860573e-05,
      "loss": 0.9073,
      "step": 455030
    },
    {
      "epoch": 1.594801753777245,
      "grad_norm": 2.9375,
      "learning_rate": 2.602228504819687e-05,
      "loss": 0.7949,
      "step": 455040
    },
    {
      "epoch": 1.5948368012841407,
      "grad_norm": 2.953125,
      "learning_rate": 2.602163601953317e-05,
      "loss": 0.9684,
      "step": 455050
    },
    {
      "epoch": 1.5948718487910363,
      "grad_norm": 2.96875,
      "learning_rate": 2.6020986990869467e-05,
      "loss": 0.7978,
      "step": 455060
    },
    {
      "epoch": 1.5949068962979318,
      "grad_norm": 3.078125,
      "learning_rate": 2.6020337962205765e-05,
      "loss": 0.7864,
      "step": 455070
    },
    {
      "epoch": 1.5949419438048276,
      "grad_norm": 3.3125,
      "learning_rate": 2.6019688933542063e-05,
      "loss": 0.8394,
      "step": 455080
    },
    {
      "epoch": 1.5949769913117229,
      "grad_norm": 3.359375,
      "learning_rate": 2.601903990487836e-05,
      "loss": 0.8883,
      "step": 455090
    },
    {
      "epoch": 1.5950120388186186,
      "grad_norm": 2.8125,
      "learning_rate": 2.601839087621466e-05,
      "loss": 0.7948,
      "step": 455100
    },
    {
      "epoch": 1.5950470863255144,
      "grad_norm": 2.796875,
      "learning_rate": 2.6017741847550957e-05,
      "loss": 0.8831,
      "step": 455110
    },
    {
      "epoch": 1.5950821338324097,
      "grad_norm": 2.71875,
      "learning_rate": 2.6017092818887255e-05,
      "loss": 0.7866,
      "step": 455120
    },
    {
      "epoch": 1.5951171813393055,
      "grad_norm": 2.90625,
      "learning_rate": 2.6016443790223553e-05,
      "loss": 0.8712,
      "step": 455130
    },
    {
      "epoch": 1.595152228846201,
      "grad_norm": 2.546875,
      "learning_rate": 2.601579476155985e-05,
      "loss": 0.8474,
      "step": 455140
    },
    {
      "epoch": 1.5951872763530965,
      "grad_norm": 3.078125,
      "learning_rate": 2.6015145732896153e-05,
      "loss": 0.8156,
      "step": 455150
    },
    {
      "epoch": 1.5952223238599923,
      "grad_norm": 2.859375,
      "learning_rate": 2.601449670423245e-05,
      "loss": 0.9505,
      "step": 455160
    },
    {
      "epoch": 1.5952573713668878,
      "grad_norm": 3.140625,
      "learning_rate": 2.601384767556875e-05,
      "loss": 0.8502,
      "step": 455170
    },
    {
      "epoch": 1.5952924188737834,
      "grad_norm": 2.953125,
      "learning_rate": 2.6013198646905047e-05,
      "loss": 0.8707,
      "step": 455180
    },
    {
      "epoch": 1.5953274663806791,
      "grad_norm": 2.53125,
      "learning_rate": 2.6012549618241338e-05,
      "loss": 0.8154,
      "step": 455190
    },
    {
      "epoch": 1.5953625138875747,
      "grad_norm": 3.453125,
      "learning_rate": 2.6011900589577636e-05,
      "loss": 0.8207,
      "step": 455200
    },
    {
      "epoch": 1.5953975613944702,
      "grad_norm": 3.1875,
      "learning_rate": 2.6011251560913934e-05,
      "loss": 0.7847,
      "step": 455210
    },
    {
      "epoch": 1.595432608901366,
      "grad_norm": 2.5,
      "learning_rate": 2.6010602532250232e-05,
      "loss": 0.7916,
      "step": 455220
    },
    {
      "epoch": 1.5954676564082613,
      "grad_norm": 2.828125,
      "learning_rate": 2.600995350358653e-05,
      "loss": 0.7887,
      "step": 455230
    },
    {
      "epoch": 1.595502703915157,
      "grad_norm": 2.734375,
      "learning_rate": 2.600930447492283e-05,
      "loss": 0.8052,
      "step": 455240
    },
    {
      "epoch": 1.5955377514220526,
      "grad_norm": 2.90625,
      "learning_rate": 2.600865544625913e-05,
      "loss": 0.8102,
      "step": 455250
    },
    {
      "epoch": 1.595572798928948,
      "grad_norm": 2.9375,
      "learning_rate": 2.6008006417595427e-05,
      "loss": 0.8333,
      "step": 455260
    },
    {
      "epoch": 1.5956078464358439,
      "grad_norm": 3.0625,
      "learning_rate": 2.6007357388931725e-05,
      "loss": 0.7241,
      "step": 455270
    },
    {
      "epoch": 1.5956428939427394,
      "grad_norm": 2.96875,
      "learning_rate": 2.6006708360268023e-05,
      "loss": 0.8578,
      "step": 455280
    },
    {
      "epoch": 1.595677941449635,
      "grad_norm": 2.984375,
      "learning_rate": 2.600605933160432e-05,
      "loss": 0.7766,
      "step": 455290
    },
    {
      "epoch": 1.5957129889565307,
      "grad_norm": 2.4375,
      "learning_rate": 2.600541030294062e-05,
      "loss": 0.8566,
      "step": 455300
    },
    {
      "epoch": 1.5957480364634262,
      "grad_norm": 2.5,
      "learning_rate": 2.6004761274276917e-05,
      "loss": 0.887,
      "step": 455310
    },
    {
      "epoch": 1.5957830839703218,
      "grad_norm": 3.375,
      "learning_rate": 2.6004112245613215e-05,
      "loss": 0.9259,
      "step": 455320
    },
    {
      "epoch": 1.5958181314772175,
      "grad_norm": 2.484375,
      "learning_rate": 2.6003463216949513e-05,
      "loss": 0.8042,
      "step": 455330
    },
    {
      "epoch": 1.5958531789841128,
      "grad_norm": 2.703125,
      "learning_rate": 2.600281418828581e-05,
      "loss": 0.779,
      "step": 455340
    },
    {
      "epoch": 1.5958882264910086,
      "grad_norm": 2.921875,
      "learning_rate": 2.600216515962211e-05,
      "loss": 0.8321,
      "step": 455350
    },
    {
      "epoch": 1.5959232739979041,
      "grad_norm": 2.953125,
      "learning_rate": 2.6001516130958407e-05,
      "loss": 0.7829,
      "step": 455360
    },
    {
      "epoch": 1.5959583215047997,
      "grad_norm": 2.984375,
      "learning_rate": 2.6000867102294705e-05,
      "loss": 0.7919,
      "step": 455370
    },
    {
      "epoch": 1.5959933690116954,
      "grad_norm": 3.078125,
      "learning_rate": 2.6000218073631007e-05,
      "loss": 0.7334,
      "step": 455380
    },
    {
      "epoch": 1.596028416518591,
      "grad_norm": 2.953125,
      "learning_rate": 2.5999569044967305e-05,
      "loss": 0.8835,
      "step": 455390
    },
    {
      "epoch": 1.5960634640254865,
      "grad_norm": 3.015625,
      "learning_rate": 2.5998920016303603e-05,
      "loss": 0.8906,
      "step": 455400
    },
    {
      "epoch": 1.5960985115323822,
      "grad_norm": 2.71875,
      "learning_rate": 2.59982709876399e-05,
      "loss": 0.8828,
      "step": 455410
    },
    {
      "epoch": 1.5961335590392778,
      "grad_norm": 2.625,
      "learning_rate": 2.59976219589762e-05,
      "loss": 0.8646,
      "step": 455420
    },
    {
      "epoch": 1.5961686065461733,
      "grad_norm": 2.703125,
      "learning_rate": 2.5996972930312497e-05,
      "loss": 0.8035,
      "step": 455430
    },
    {
      "epoch": 1.596203654053069,
      "grad_norm": 2.6875,
      "learning_rate": 2.5996323901648795e-05,
      "loss": 0.8311,
      "step": 455440
    },
    {
      "epoch": 1.5962387015599644,
      "grad_norm": 3.171875,
      "learning_rate": 2.5995674872985093e-05,
      "loss": 0.8549,
      "step": 455450
    },
    {
      "epoch": 1.5962737490668601,
      "grad_norm": 2.84375,
      "learning_rate": 2.599502584432139e-05,
      "loss": 0.7316,
      "step": 455460
    },
    {
      "epoch": 1.5963087965737557,
      "grad_norm": 3.265625,
      "learning_rate": 2.599437681565769e-05,
      "loss": 0.8434,
      "step": 455470
    },
    {
      "epoch": 1.5963438440806512,
      "grad_norm": 2.59375,
      "learning_rate": 2.5993727786993987e-05,
      "loss": 0.8736,
      "step": 455480
    },
    {
      "epoch": 1.596378891587547,
      "grad_norm": 2.9375,
      "learning_rate": 2.5993078758330285e-05,
      "loss": 0.8519,
      "step": 455490
    },
    {
      "epoch": 1.5964139390944425,
      "grad_norm": 2.5625,
      "learning_rate": 2.5992429729666583e-05,
      "loss": 0.8366,
      "step": 455500
    },
    {
      "epoch": 1.596448986601338,
      "grad_norm": 3.25,
      "learning_rate": 2.599178070100288e-05,
      "loss": 0.8291,
      "step": 455510
    },
    {
      "epoch": 1.5964840341082338,
      "grad_norm": 2.875,
      "learning_rate": 2.5991131672339182e-05,
      "loss": 0.7672,
      "step": 455520
    },
    {
      "epoch": 1.5965190816151293,
      "grad_norm": 2.828125,
      "learning_rate": 2.599048264367548e-05,
      "loss": 0.8926,
      "step": 455530
    },
    {
      "epoch": 1.5965541291220249,
      "grad_norm": 2.390625,
      "learning_rate": 2.5989833615011778e-05,
      "loss": 0.82,
      "step": 455540
    },
    {
      "epoch": 1.5965891766289206,
      "grad_norm": 3.171875,
      "learning_rate": 2.5989184586348076e-05,
      "loss": 0.8949,
      "step": 455550
    },
    {
      "epoch": 1.596624224135816,
      "grad_norm": 2.890625,
      "learning_rate": 2.5988535557684367e-05,
      "loss": 0.852,
      "step": 455560
    },
    {
      "epoch": 1.5966592716427117,
      "grad_norm": 2.5,
      "learning_rate": 2.5987886529020665e-05,
      "loss": 0.85,
      "step": 455570
    },
    {
      "epoch": 1.5966943191496072,
      "grad_norm": 3.390625,
      "learning_rate": 2.5987237500356963e-05,
      "loss": 0.8162,
      "step": 455580
    },
    {
      "epoch": 1.5967293666565028,
      "grad_norm": 2.671875,
      "learning_rate": 2.598658847169326e-05,
      "loss": 0.8671,
      "step": 455590
    },
    {
      "epoch": 1.5967644141633985,
      "grad_norm": 2.75,
      "learning_rate": 2.598593944302956e-05,
      "loss": 0.8885,
      "step": 455600
    },
    {
      "epoch": 1.596799461670294,
      "grad_norm": 2.53125,
      "learning_rate": 2.598529041436586e-05,
      "loss": 0.7674,
      "step": 455610
    },
    {
      "epoch": 1.5968345091771896,
      "grad_norm": 2.921875,
      "learning_rate": 2.598464138570216e-05,
      "loss": 0.9104,
      "step": 455620
    },
    {
      "epoch": 1.5968695566840854,
      "grad_norm": 3.078125,
      "learning_rate": 2.5983992357038457e-05,
      "loss": 0.8394,
      "step": 455630
    },
    {
      "epoch": 1.596904604190981,
      "grad_norm": 3.078125,
      "learning_rate": 2.5983343328374755e-05,
      "loss": 0.8153,
      "step": 455640
    },
    {
      "epoch": 1.5969396516978764,
      "grad_norm": 2.84375,
      "learning_rate": 2.5982694299711053e-05,
      "loss": 0.7519,
      "step": 455650
    },
    {
      "epoch": 1.5969746992047722,
      "grad_norm": 3.125,
      "learning_rate": 2.598204527104735e-05,
      "loss": 0.8521,
      "step": 455660
    },
    {
      "epoch": 1.5970097467116675,
      "grad_norm": 2.8125,
      "learning_rate": 2.598139624238365e-05,
      "loss": 0.8325,
      "step": 455670
    },
    {
      "epoch": 1.5970447942185633,
      "grad_norm": 3.046875,
      "learning_rate": 2.5980747213719947e-05,
      "loss": 0.7995,
      "step": 455680
    },
    {
      "epoch": 1.5970798417254588,
      "grad_norm": 2.84375,
      "learning_rate": 2.5980098185056245e-05,
      "loss": 0.7676,
      "step": 455690
    },
    {
      "epoch": 1.5971148892323543,
      "grad_norm": 2.859375,
      "learning_rate": 2.5979449156392543e-05,
      "loss": 0.7919,
      "step": 455700
    },
    {
      "epoch": 1.59714993673925,
      "grad_norm": 2.625,
      "learning_rate": 2.597880012772884e-05,
      "loss": 0.7924,
      "step": 455710
    },
    {
      "epoch": 1.5971849842461456,
      "grad_norm": 2.859375,
      "learning_rate": 2.597815109906514e-05,
      "loss": 0.8679,
      "step": 455720
    },
    {
      "epoch": 1.5972200317530412,
      "grad_norm": 2.953125,
      "learning_rate": 2.5977502070401437e-05,
      "loss": 0.9085,
      "step": 455730
    },
    {
      "epoch": 1.597255079259937,
      "grad_norm": 2.6875,
      "learning_rate": 2.5976853041737735e-05,
      "loss": 0.8983,
      "step": 455740
    },
    {
      "epoch": 1.5972901267668325,
      "grad_norm": 3.0,
      "learning_rate": 2.5976204013074036e-05,
      "loss": 0.7515,
      "step": 455750
    },
    {
      "epoch": 1.597325174273728,
      "grad_norm": 3.15625,
      "learning_rate": 2.5975554984410334e-05,
      "loss": 0.8838,
      "step": 455760
    },
    {
      "epoch": 1.5973602217806238,
      "grad_norm": 3.015625,
      "learning_rate": 2.5974905955746632e-05,
      "loss": 0.8205,
      "step": 455770
    },
    {
      "epoch": 1.597395269287519,
      "grad_norm": 2.828125,
      "learning_rate": 2.597425692708293e-05,
      "loss": 0.7666,
      "step": 455780
    },
    {
      "epoch": 1.5974303167944148,
      "grad_norm": 3.171875,
      "learning_rate": 2.5973607898419228e-05,
      "loss": 0.8009,
      "step": 455790
    },
    {
      "epoch": 1.5974653643013106,
      "grad_norm": 2.84375,
      "learning_rate": 2.5972958869755526e-05,
      "loss": 0.7924,
      "step": 455800
    },
    {
      "epoch": 1.597500411808206,
      "grad_norm": 2.484375,
      "learning_rate": 2.5972309841091824e-05,
      "loss": 0.8336,
      "step": 455810
    },
    {
      "epoch": 1.5975354593151017,
      "grad_norm": 3.046875,
      "learning_rate": 2.5971660812428122e-05,
      "loss": 0.7752,
      "step": 455820
    },
    {
      "epoch": 1.5975705068219972,
      "grad_norm": 3.390625,
      "learning_rate": 2.597101178376442e-05,
      "loss": 0.842,
      "step": 455830
    },
    {
      "epoch": 1.5976055543288927,
      "grad_norm": 3.171875,
      "learning_rate": 2.5970362755100718e-05,
      "loss": 0.8789,
      "step": 455840
    },
    {
      "epoch": 1.5976406018357885,
      "grad_norm": 3.0,
      "learning_rate": 2.5969713726437016e-05,
      "loss": 0.951,
      "step": 455850
    },
    {
      "epoch": 1.597675649342684,
      "grad_norm": 2.6875,
      "learning_rate": 2.5969064697773314e-05,
      "loss": 0.7936,
      "step": 455860
    },
    {
      "epoch": 1.5977106968495796,
      "grad_norm": 2.953125,
      "learning_rate": 2.5968415669109612e-05,
      "loss": 0.8262,
      "step": 455870
    },
    {
      "epoch": 1.5977457443564753,
      "grad_norm": 2.90625,
      "learning_rate": 2.596776664044591e-05,
      "loss": 0.787,
      "step": 455880
    },
    {
      "epoch": 1.5977807918633709,
      "grad_norm": 2.5,
      "learning_rate": 2.596711761178221e-05,
      "loss": 0.7929,
      "step": 455890
    },
    {
      "epoch": 1.5978158393702664,
      "grad_norm": 3.0625,
      "learning_rate": 2.596646858311851e-05,
      "loss": 0.8752,
      "step": 455900
    },
    {
      "epoch": 1.5978508868771621,
      "grad_norm": 2.71875,
      "learning_rate": 2.5965819554454808e-05,
      "loss": 0.8849,
      "step": 455910
    },
    {
      "epoch": 1.5978859343840575,
      "grad_norm": 2.84375,
      "learning_rate": 2.5965170525791106e-05,
      "loss": 0.8141,
      "step": 455920
    },
    {
      "epoch": 1.5979209818909532,
      "grad_norm": 3.6875,
      "learning_rate": 2.5964521497127404e-05,
      "loss": 0.8781,
      "step": 455930
    },
    {
      "epoch": 1.5979560293978488,
      "grad_norm": 2.984375,
      "learning_rate": 2.5963872468463695e-05,
      "loss": 0.8399,
      "step": 455940
    },
    {
      "epoch": 1.5979910769047443,
      "grad_norm": 2.953125,
      "learning_rate": 2.5963223439799993e-05,
      "loss": 0.8292,
      "step": 455950
    },
    {
      "epoch": 1.59802612441164,
      "grad_norm": 2.40625,
      "learning_rate": 2.596257441113629e-05,
      "loss": 0.7918,
      "step": 455960
    },
    {
      "epoch": 1.5980611719185356,
      "grad_norm": 2.875,
      "learning_rate": 2.5961925382472592e-05,
      "loss": 0.8409,
      "step": 455970
    },
    {
      "epoch": 1.5980962194254311,
      "grad_norm": 3.109375,
      "learning_rate": 2.596127635380889e-05,
      "loss": 0.8328,
      "step": 455980
    },
    {
      "epoch": 1.5981312669323269,
      "grad_norm": 2.84375,
      "learning_rate": 2.5960627325145188e-05,
      "loss": 0.8651,
      "step": 455990
    },
    {
      "epoch": 1.5981663144392224,
      "grad_norm": 3.265625,
      "learning_rate": 2.5959978296481486e-05,
      "loss": 0.8308,
      "step": 456000
    },
    {
      "epoch": 1.598201361946118,
      "grad_norm": 3.03125,
      "learning_rate": 2.5959329267817784e-05,
      "loss": 0.9171,
      "step": 456010
    },
    {
      "epoch": 1.5982364094530137,
      "grad_norm": 2.359375,
      "learning_rate": 2.5958680239154082e-05,
      "loss": 0.8953,
      "step": 456020
    },
    {
      "epoch": 1.598271456959909,
      "grad_norm": 3.0,
      "learning_rate": 2.595803121049038e-05,
      "loss": 0.8552,
      "step": 456030
    },
    {
      "epoch": 1.5983065044668048,
      "grad_norm": 3.21875,
      "learning_rate": 2.5957382181826678e-05,
      "loss": 0.8533,
      "step": 456040
    },
    {
      "epoch": 1.5983415519737003,
      "grad_norm": 2.484375,
      "learning_rate": 2.5956733153162976e-05,
      "loss": 0.7649,
      "step": 456050
    },
    {
      "epoch": 1.5983765994805958,
      "grad_norm": 3.90625,
      "learning_rate": 2.5956084124499274e-05,
      "loss": 0.8321,
      "step": 456060
    },
    {
      "epoch": 1.5984116469874916,
      "grad_norm": 2.59375,
      "learning_rate": 2.5955435095835572e-05,
      "loss": 0.7993,
      "step": 456070
    },
    {
      "epoch": 1.5984466944943871,
      "grad_norm": 3.234375,
      "learning_rate": 2.595478606717187e-05,
      "loss": 0.8399,
      "step": 456080
    },
    {
      "epoch": 1.5984817420012827,
      "grad_norm": 2.78125,
      "learning_rate": 2.5954137038508168e-05,
      "loss": 0.8208,
      "step": 456090
    },
    {
      "epoch": 1.5985167895081784,
      "grad_norm": 2.90625,
      "learning_rate": 2.5953488009844466e-05,
      "loss": 0.864,
      "step": 456100
    },
    {
      "epoch": 1.598551837015074,
      "grad_norm": 2.765625,
      "learning_rate": 2.5952838981180768e-05,
      "loss": 0.849,
      "step": 456110
    },
    {
      "epoch": 1.5985868845219695,
      "grad_norm": 2.90625,
      "learning_rate": 2.5952189952517066e-05,
      "loss": 0.8426,
      "step": 456120
    },
    {
      "epoch": 1.5986219320288653,
      "grad_norm": 2.875,
      "learning_rate": 2.5951540923853364e-05,
      "loss": 0.8728,
      "step": 456130
    },
    {
      "epoch": 1.5986569795357606,
      "grad_norm": 3.046875,
      "learning_rate": 2.595089189518966e-05,
      "loss": 0.9007,
      "step": 456140
    },
    {
      "epoch": 1.5986920270426563,
      "grad_norm": 2.703125,
      "learning_rate": 2.595024286652596e-05,
      "loss": 0.8243,
      "step": 456150
    },
    {
      "epoch": 1.5987270745495519,
      "grad_norm": 2.734375,
      "learning_rate": 2.5949593837862258e-05,
      "loss": 0.8086,
      "step": 456160
    },
    {
      "epoch": 1.5987621220564474,
      "grad_norm": 3.296875,
      "learning_rate": 2.5948944809198556e-05,
      "loss": 0.9063,
      "step": 456170
    },
    {
      "epoch": 1.5987971695633432,
      "grad_norm": 3.390625,
      "learning_rate": 2.5948295780534854e-05,
      "loss": 0.8657,
      "step": 456180
    },
    {
      "epoch": 1.5988322170702387,
      "grad_norm": 2.984375,
      "learning_rate": 2.594764675187115e-05,
      "loss": 0.8574,
      "step": 456190
    },
    {
      "epoch": 1.5988672645771342,
      "grad_norm": 3.1875,
      "learning_rate": 2.594699772320745e-05,
      "loss": 0.8469,
      "step": 456200
    },
    {
      "epoch": 1.59890231208403,
      "grad_norm": 3.03125,
      "learning_rate": 2.5946348694543748e-05,
      "loss": 0.9125,
      "step": 456210
    },
    {
      "epoch": 1.5989373595909255,
      "grad_norm": 2.734375,
      "learning_rate": 2.5945699665880046e-05,
      "loss": 0.8184,
      "step": 456220
    },
    {
      "epoch": 1.598972407097821,
      "grad_norm": 3.328125,
      "learning_rate": 2.5945050637216344e-05,
      "loss": 0.8946,
      "step": 456230
    },
    {
      "epoch": 1.5990074546047168,
      "grad_norm": 2.75,
      "learning_rate": 2.594440160855264e-05,
      "loss": 0.7487,
      "step": 456240
    },
    {
      "epoch": 1.5990425021116121,
      "grad_norm": 3.25,
      "learning_rate": 2.5943752579888943e-05,
      "loss": 0.889,
      "step": 456250
    },
    {
      "epoch": 1.599077549618508,
      "grad_norm": 3.15625,
      "learning_rate": 2.594310355122524e-05,
      "loss": 0.7957,
      "step": 456260
    },
    {
      "epoch": 1.5991125971254034,
      "grad_norm": 2.96875,
      "learning_rate": 2.594245452256154e-05,
      "loss": 0.8328,
      "step": 456270
    },
    {
      "epoch": 1.599147644632299,
      "grad_norm": 2.96875,
      "learning_rate": 2.5941805493897837e-05,
      "loss": 0.9062,
      "step": 456280
    },
    {
      "epoch": 1.5991826921391947,
      "grad_norm": 2.5625,
      "learning_rate": 2.5941156465234135e-05,
      "loss": 0.8216,
      "step": 456290
    },
    {
      "epoch": 1.5992177396460903,
      "grad_norm": 2.65625,
      "learning_rate": 2.5940507436570433e-05,
      "loss": 0.8224,
      "step": 456300
    },
    {
      "epoch": 1.5992527871529858,
      "grad_norm": 3.078125,
      "learning_rate": 2.5939858407906724e-05,
      "loss": 0.8831,
      "step": 456310
    },
    {
      "epoch": 1.5992878346598816,
      "grad_norm": 2.734375,
      "learning_rate": 2.5939209379243022e-05,
      "loss": 0.9147,
      "step": 456320
    },
    {
      "epoch": 1.599322882166777,
      "grad_norm": 2.953125,
      "learning_rate": 2.593856035057932e-05,
      "loss": 0.9005,
      "step": 456330
    },
    {
      "epoch": 1.5993579296736726,
      "grad_norm": 2.546875,
      "learning_rate": 2.593791132191562e-05,
      "loss": 0.7912,
      "step": 456340
    },
    {
      "epoch": 1.5993929771805684,
      "grad_norm": 2.359375,
      "learning_rate": 2.593726229325192e-05,
      "loss": 0.7616,
      "step": 456350
    },
    {
      "epoch": 1.5994280246874637,
      "grad_norm": 3.015625,
      "learning_rate": 2.5936613264588218e-05,
      "loss": 0.8029,
      "step": 456360
    },
    {
      "epoch": 1.5994630721943595,
      "grad_norm": 2.71875,
      "learning_rate": 2.5935964235924516e-05,
      "loss": 0.9022,
      "step": 456370
    },
    {
      "epoch": 1.5994981197012552,
      "grad_norm": 2.65625,
      "learning_rate": 2.5935315207260814e-05,
      "loss": 0.7699,
      "step": 456380
    },
    {
      "epoch": 1.5995331672081505,
      "grad_norm": 2.5,
      "learning_rate": 2.593466617859711e-05,
      "loss": 0.8809,
      "step": 456390
    },
    {
      "epoch": 1.5995682147150463,
      "grad_norm": 3.15625,
      "learning_rate": 2.593401714993341e-05,
      "loss": 0.923,
      "step": 456400
    },
    {
      "epoch": 1.5996032622219418,
      "grad_norm": 2.9375,
      "learning_rate": 2.5933368121269708e-05,
      "loss": 0.7709,
      "step": 456410
    },
    {
      "epoch": 1.5996383097288374,
      "grad_norm": 3.21875,
      "learning_rate": 2.5932719092606006e-05,
      "loss": 0.8455,
      "step": 456420
    },
    {
      "epoch": 1.5996733572357331,
      "grad_norm": 2.96875,
      "learning_rate": 2.5932070063942304e-05,
      "loss": 0.843,
      "step": 456430
    },
    {
      "epoch": 1.5997084047426287,
      "grad_norm": 3.203125,
      "learning_rate": 2.59314210352786e-05,
      "loss": 0.8761,
      "step": 456440
    },
    {
      "epoch": 1.5997434522495242,
      "grad_norm": 2.8125,
      "learning_rate": 2.59307720066149e-05,
      "loss": 0.8588,
      "step": 456450
    },
    {
      "epoch": 1.59977849975642,
      "grad_norm": 2.546875,
      "learning_rate": 2.5930122977951198e-05,
      "loss": 0.8487,
      "step": 456460
    },
    {
      "epoch": 1.5998135472633153,
      "grad_norm": 3.421875,
      "learning_rate": 2.5929473949287496e-05,
      "loss": 0.7785,
      "step": 456470
    },
    {
      "epoch": 1.599848594770211,
      "grad_norm": 2.921875,
      "learning_rate": 2.5928824920623797e-05,
      "loss": 0.9006,
      "step": 456480
    },
    {
      "epoch": 1.5998836422771068,
      "grad_norm": 3.359375,
      "learning_rate": 2.5928175891960095e-05,
      "loss": 0.857,
      "step": 456490
    },
    {
      "epoch": 1.599918689784002,
      "grad_norm": 2.75,
      "learning_rate": 2.5927526863296393e-05,
      "loss": 0.8409,
      "step": 456500
    },
    {
      "epoch": 1.5999537372908978,
      "grad_norm": 2.8125,
      "learning_rate": 2.592687783463269e-05,
      "loss": 0.8853,
      "step": 456510
    },
    {
      "epoch": 1.5999887847977934,
      "grad_norm": 3.171875,
      "learning_rate": 2.592622880596899e-05,
      "loss": 0.8558,
      "step": 456520
    },
    {
      "epoch": 1.600023832304689,
      "grad_norm": 2.828125,
      "learning_rate": 2.5925579777305287e-05,
      "loss": 0.8509,
      "step": 456530
    },
    {
      "epoch": 1.6000588798115847,
      "grad_norm": 2.84375,
      "learning_rate": 2.5924930748641585e-05,
      "loss": 0.8964,
      "step": 456540
    },
    {
      "epoch": 1.6000939273184802,
      "grad_norm": 2.703125,
      "learning_rate": 2.5924281719977883e-05,
      "loss": 0.8277,
      "step": 456550
    },
    {
      "epoch": 1.6001289748253757,
      "grad_norm": 3.40625,
      "learning_rate": 2.592363269131418e-05,
      "loss": 0.888,
      "step": 456560
    },
    {
      "epoch": 1.6001640223322715,
      "grad_norm": 2.65625,
      "learning_rate": 2.592298366265048e-05,
      "loss": 0.8284,
      "step": 456570
    },
    {
      "epoch": 1.600199069839167,
      "grad_norm": 2.734375,
      "learning_rate": 2.5922334633986777e-05,
      "loss": 0.8689,
      "step": 456580
    },
    {
      "epoch": 1.6002341173460626,
      "grad_norm": 2.53125,
      "learning_rate": 2.5921685605323075e-05,
      "loss": 0.9376,
      "step": 456590
    },
    {
      "epoch": 1.6002691648529583,
      "grad_norm": 2.6875,
      "learning_rate": 2.5921036576659373e-05,
      "loss": 0.8649,
      "step": 456600
    },
    {
      "epoch": 1.6003042123598537,
      "grad_norm": 2.90625,
      "learning_rate": 2.592038754799567e-05,
      "loss": 0.8466,
      "step": 456610
    },
    {
      "epoch": 1.6003392598667494,
      "grad_norm": 2.8125,
      "learning_rate": 2.5919738519331972e-05,
      "loss": 0.8665,
      "step": 456620
    },
    {
      "epoch": 1.600374307373645,
      "grad_norm": 3.09375,
      "learning_rate": 2.591908949066827e-05,
      "loss": 0.8413,
      "step": 456630
    },
    {
      "epoch": 1.6004093548805405,
      "grad_norm": 2.71875,
      "learning_rate": 2.591844046200457e-05,
      "loss": 0.8072,
      "step": 456640
    },
    {
      "epoch": 1.6004444023874362,
      "grad_norm": 3.21875,
      "learning_rate": 2.5917791433340866e-05,
      "loss": 0.8104,
      "step": 456650
    },
    {
      "epoch": 1.6004794498943318,
      "grad_norm": 2.5625,
      "learning_rate": 2.5917142404677164e-05,
      "loss": 0.8724,
      "step": 456660
    },
    {
      "epoch": 1.6005144974012273,
      "grad_norm": 2.671875,
      "learning_rate": 2.5916493376013462e-05,
      "loss": 0.7408,
      "step": 456670
    },
    {
      "epoch": 1.600549544908123,
      "grad_norm": 3.0625,
      "learning_rate": 2.591584434734976e-05,
      "loss": 0.8973,
      "step": 456680
    },
    {
      "epoch": 1.6005845924150186,
      "grad_norm": 2.796875,
      "learning_rate": 2.591519531868605e-05,
      "loss": 0.7949,
      "step": 456690
    },
    {
      "epoch": 1.6006196399219141,
      "grad_norm": 2.796875,
      "learning_rate": 2.591454629002235e-05,
      "loss": 0.8715,
      "step": 456700
    },
    {
      "epoch": 1.60065468742881,
      "grad_norm": 2.984375,
      "learning_rate": 2.591389726135865e-05,
      "loss": 0.8208,
      "step": 456710
    },
    {
      "epoch": 1.6006897349357052,
      "grad_norm": 2.734375,
      "learning_rate": 2.591324823269495e-05,
      "loss": 0.7692,
      "step": 456720
    },
    {
      "epoch": 1.600724782442601,
      "grad_norm": 2.765625,
      "learning_rate": 2.5912599204031247e-05,
      "loss": 0.8415,
      "step": 456730
    },
    {
      "epoch": 1.6007598299494965,
      "grad_norm": 2.890625,
      "learning_rate": 2.5911950175367545e-05,
      "loss": 0.8852,
      "step": 456740
    },
    {
      "epoch": 1.600794877456392,
      "grad_norm": 3.140625,
      "learning_rate": 2.5911301146703843e-05,
      "loss": 0.8222,
      "step": 456750
    },
    {
      "epoch": 1.6008299249632878,
      "grad_norm": 3.046875,
      "learning_rate": 2.591065211804014e-05,
      "loss": 0.9147,
      "step": 456760
    },
    {
      "epoch": 1.6008649724701833,
      "grad_norm": 2.53125,
      "learning_rate": 2.591000308937644e-05,
      "loss": 0.83,
      "step": 456770
    },
    {
      "epoch": 1.6009000199770789,
      "grad_norm": 2.421875,
      "learning_rate": 2.5909354060712737e-05,
      "loss": 0.8101,
      "step": 456780
    },
    {
      "epoch": 1.6009350674839746,
      "grad_norm": 2.875,
      "learning_rate": 2.5908705032049035e-05,
      "loss": 0.824,
      "step": 456790
    },
    {
      "epoch": 1.6009701149908702,
      "grad_norm": 2.921875,
      "learning_rate": 2.5908056003385333e-05,
      "loss": 0.8482,
      "step": 456800
    },
    {
      "epoch": 1.6010051624977657,
      "grad_norm": 3.0,
      "learning_rate": 2.590740697472163e-05,
      "loss": 0.7764,
      "step": 456810
    },
    {
      "epoch": 1.6010402100046615,
      "grad_norm": 2.9375,
      "learning_rate": 2.590675794605793e-05,
      "loss": 0.8958,
      "step": 456820
    },
    {
      "epoch": 1.6010752575115568,
      "grad_norm": 2.8125,
      "learning_rate": 2.5906108917394227e-05,
      "loss": 0.6901,
      "step": 456830
    },
    {
      "epoch": 1.6011103050184525,
      "grad_norm": 2.53125,
      "learning_rate": 2.5905459888730525e-05,
      "loss": 0.8366,
      "step": 456840
    },
    {
      "epoch": 1.601145352525348,
      "grad_norm": 2.96875,
      "learning_rate": 2.5904810860066826e-05,
      "loss": 0.8005,
      "step": 456850
    },
    {
      "epoch": 1.6011804000322436,
      "grad_norm": 2.59375,
      "learning_rate": 2.5904161831403124e-05,
      "loss": 0.8017,
      "step": 456860
    },
    {
      "epoch": 1.6012154475391394,
      "grad_norm": 3.046875,
      "learning_rate": 2.5903512802739422e-05,
      "loss": 0.7873,
      "step": 456870
    },
    {
      "epoch": 1.601250495046035,
      "grad_norm": 2.65625,
      "learning_rate": 2.590286377407572e-05,
      "loss": 0.8106,
      "step": 456880
    },
    {
      "epoch": 1.6012855425529304,
      "grad_norm": 3.265625,
      "learning_rate": 2.590221474541202e-05,
      "loss": 0.8841,
      "step": 456890
    },
    {
      "epoch": 1.6013205900598262,
      "grad_norm": 2.96875,
      "learning_rate": 2.5901565716748316e-05,
      "loss": 0.8864,
      "step": 456900
    },
    {
      "epoch": 1.6013556375667217,
      "grad_norm": 3.34375,
      "learning_rate": 2.5900916688084614e-05,
      "loss": 0.8215,
      "step": 456910
    },
    {
      "epoch": 1.6013906850736173,
      "grad_norm": 2.90625,
      "learning_rate": 2.5900267659420912e-05,
      "loss": 0.8826,
      "step": 456920
    },
    {
      "epoch": 1.601425732580513,
      "grad_norm": 2.90625,
      "learning_rate": 2.589961863075721e-05,
      "loss": 0.8519,
      "step": 456930
    },
    {
      "epoch": 1.6014607800874083,
      "grad_norm": 2.9375,
      "learning_rate": 2.589896960209351e-05,
      "loss": 0.7866,
      "step": 456940
    },
    {
      "epoch": 1.601495827594304,
      "grad_norm": 3.109375,
      "learning_rate": 2.5898320573429806e-05,
      "loss": 0.9095,
      "step": 456950
    },
    {
      "epoch": 1.6015308751011996,
      "grad_norm": 2.71875,
      "learning_rate": 2.5897671544766104e-05,
      "loss": 0.8349,
      "step": 456960
    },
    {
      "epoch": 1.6015659226080952,
      "grad_norm": 2.828125,
      "learning_rate": 2.5897022516102402e-05,
      "loss": 0.8423,
      "step": 456970
    },
    {
      "epoch": 1.601600970114991,
      "grad_norm": 2.703125,
      "learning_rate": 2.58963734874387e-05,
      "loss": 0.8614,
      "step": 456980
    },
    {
      "epoch": 1.6016360176218865,
      "grad_norm": 2.296875,
      "learning_rate": 2.5895724458775002e-05,
      "loss": 0.8873,
      "step": 456990
    },
    {
      "epoch": 1.601671065128782,
      "grad_norm": 2.671875,
      "learning_rate": 2.58950754301113e-05,
      "loss": 0.8182,
      "step": 457000
    },
    {
      "epoch": 1.6017061126356777,
      "grad_norm": 3.28125,
      "learning_rate": 2.5894426401447598e-05,
      "loss": 0.8705,
      "step": 457010
    },
    {
      "epoch": 1.6017411601425733,
      "grad_norm": 2.90625,
      "learning_rate": 2.5893777372783896e-05,
      "loss": 0.8698,
      "step": 457020
    },
    {
      "epoch": 1.6017762076494688,
      "grad_norm": 2.984375,
      "learning_rate": 2.5893128344120194e-05,
      "loss": 0.8424,
      "step": 457030
    },
    {
      "epoch": 1.6018112551563646,
      "grad_norm": 2.96875,
      "learning_rate": 2.5892479315456492e-05,
      "loss": 0.7928,
      "step": 457040
    },
    {
      "epoch": 1.60184630266326,
      "grad_norm": 3.21875,
      "learning_rate": 2.589183028679279e-05,
      "loss": 0.8644,
      "step": 457050
    },
    {
      "epoch": 1.6018813501701556,
      "grad_norm": 2.578125,
      "learning_rate": 2.5891181258129088e-05,
      "loss": 0.9137,
      "step": 457060
    },
    {
      "epoch": 1.6019163976770514,
      "grad_norm": 2.671875,
      "learning_rate": 2.5890532229465382e-05,
      "loss": 0.8308,
      "step": 457070
    },
    {
      "epoch": 1.6019514451839467,
      "grad_norm": 3.421875,
      "learning_rate": 2.588988320080168e-05,
      "loss": 0.8194,
      "step": 457080
    },
    {
      "epoch": 1.6019864926908425,
      "grad_norm": 2.671875,
      "learning_rate": 2.588923417213798e-05,
      "loss": 0.8558,
      "step": 457090
    },
    {
      "epoch": 1.602021540197738,
      "grad_norm": 3.171875,
      "learning_rate": 2.5888585143474276e-05,
      "loss": 0.7921,
      "step": 457100
    },
    {
      "epoch": 1.6020565877046336,
      "grad_norm": 3.0625,
      "learning_rate": 2.5887936114810574e-05,
      "loss": 0.7482,
      "step": 457110
    },
    {
      "epoch": 1.6020916352115293,
      "grad_norm": 3.03125,
      "learning_rate": 2.5887287086146872e-05,
      "loss": 0.783,
      "step": 457120
    },
    {
      "epoch": 1.6021266827184248,
      "grad_norm": 2.921875,
      "learning_rate": 2.588663805748317e-05,
      "loss": 0.8292,
      "step": 457130
    },
    {
      "epoch": 1.6021617302253204,
      "grad_norm": 2.984375,
      "learning_rate": 2.588598902881947e-05,
      "loss": 0.8676,
      "step": 457140
    },
    {
      "epoch": 1.6021967777322161,
      "grad_norm": 2.8125,
      "learning_rate": 2.5885340000155766e-05,
      "loss": 0.8771,
      "step": 457150
    },
    {
      "epoch": 1.6022318252391115,
      "grad_norm": 2.921875,
      "learning_rate": 2.5884690971492064e-05,
      "loss": 0.8413,
      "step": 457160
    },
    {
      "epoch": 1.6022668727460072,
      "grad_norm": 2.984375,
      "learning_rate": 2.5884041942828362e-05,
      "loss": 0.8705,
      "step": 457170
    },
    {
      "epoch": 1.602301920252903,
      "grad_norm": 2.8125,
      "learning_rate": 2.588339291416466e-05,
      "loss": 0.818,
      "step": 457180
    },
    {
      "epoch": 1.6023369677597983,
      "grad_norm": 2.859375,
      "learning_rate": 2.588274388550096e-05,
      "loss": 0.8551,
      "step": 457190
    },
    {
      "epoch": 1.602372015266694,
      "grad_norm": 2.546875,
      "learning_rate": 2.5882094856837256e-05,
      "loss": 0.8603,
      "step": 457200
    },
    {
      "epoch": 1.6024070627735896,
      "grad_norm": 2.546875,
      "learning_rate": 2.5881445828173558e-05,
      "loss": 0.8336,
      "step": 457210
    },
    {
      "epoch": 1.602442110280485,
      "grad_norm": 2.578125,
      "learning_rate": 2.5880796799509856e-05,
      "loss": 0.9206,
      "step": 457220
    },
    {
      "epoch": 1.6024771577873809,
      "grad_norm": 3.265625,
      "learning_rate": 2.5880147770846154e-05,
      "loss": 0.8305,
      "step": 457230
    },
    {
      "epoch": 1.6025122052942764,
      "grad_norm": 2.6875,
      "learning_rate": 2.5879498742182452e-05,
      "loss": 0.8486,
      "step": 457240
    },
    {
      "epoch": 1.602547252801172,
      "grad_norm": 3.03125,
      "learning_rate": 2.587884971351875e-05,
      "loss": 0.7904,
      "step": 457250
    },
    {
      "epoch": 1.6025823003080677,
      "grad_norm": 2.953125,
      "learning_rate": 2.5878200684855048e-05,
      "loss": 0.8493,
      "step": 457260
    },
    {
      "epoch": 1.6026173478149632,
      "grad_norm": 2.390625,
      "learning_rate": 2.5877551656191346e-05,
      "loss": 0.8302,
      "step": 457270
    },
    {
      "epoch": 1.6026523953218588,
      "grad_norm": 2.90625,
      "learning_rate": 2.5876902627527644e-05,
      "loss": 0.8619,
      "step": 457280
    },
    {
      "epoch": 1.6026874428287545,
      "grad_norm": 3.125,
      "learning_rate": 2.5876253598863942e-05,
      "loss": 0.87,
      "step": 457290
    },
    {
      "epoch": 1.6027224903356498,
      "grad_norm": 2.859375,
      "learning_rate": 2.587560457020024e-05,
      "loss": 0.8051,
      "step": 457300
    },
    {
      "epoch": 1.6027575378425456,
      "grad_norm": 2.6875,
      "learning_rate": 2.5874955541536538e-05,
      "loss": 0.7673,
      "step": 457310
    },
    {
      "epoch": 1.6027925853494411,
      "grad_norm": 2.921875,
      "learning_rate": 2.5874306512872836e-05,
      "loss": 0.753,
      "step": 457320
    },
    {
      "epoch": 1.6028276328563367,
      "grad_norm": 3.75,
      "learning_rate": 2.5873657484209134e-05,
      "loss": 0.8982,
      "step": 457330
    },
    {
      "epoch": 1.6028626803632324,
      "grad_norm": 2.9375,
      "learning_rate": 2.5873008455545432e-05,
      "loss": 0.7349,
      "step": 457340
    },
    {
      "epoch": 1.602897727870128,
      "grad_norm": 3.359375,
      "learning_rate": 2.5872359426881733e-05,
      "loss": 0.8752,
      "step": 457350
    },
    {
      "epoch": 1.6029327753770235,
      "grad_norm": 2.90625,
      "learning_rate": 2.587171039821803e-05,
      "loss": 0.8473,
      "step": 457360
    },
    {
      "epoch": 1.6029678228839193,
      "grad_norm": 2.96875,
      "learning_rate": 2.587106136955433e-05,
      "loss": 0.8277,
      "step": 457370
    },
    {
      "epoch": 1.6030028703908148,
      "grad_norm": 3.203125,
      "learning_rate": 2.5870412340890627e-05,
      "loss": 0.8511,
      "step": 457380
    },
    {
      "epoch": 1.6030379178977103,
      "grad_norm": 2.703125,
      "learning_rate": 2.5869763312226925e-05,
      "loss": 0.8789,
      "step": 457390
    },
    {
      "epoch": 1.603072965404606,
      "grad_norm": 2.359375,
      "learning_rate": 2.5869114283563223e-05,
      "loss": 0.8758,
      "step": 457400
    },
    {
      "epoch": 1.6031080129115014,
      "grad_norm": 2.75,
      "learning_rate": 2.586846525489952e-05,
      "loss": 0.7398,
      "step": 457410
    },
    {
      "epoch": 1.6031430604183972,
      "grad_norm": 2.625,
      "learning_rate": 2.586781622623582e-05,
      "loss": 0.7707,
      "step": 457420
    },
    {
      "epoch": 1.6031781079252927,
      "grad_norm": 2.859375,
      "learning_rate": 2.5867167197572117e-05,
      "loss": 0.8295,
      "step": 457430
    },
    {
      "epoch": 1.6032131554321882,
      "grad_norm": 2.609375,
      "learning_rate": 2.5866518168908412e-05,
      "loss": 0.8449,
      "step": 457440
    },
    {
      "epoch": 1.603248202939084,
      "grad_norm": 2.8125,
      "learning_rate": 2.586586914024471e-05,
      "loss": 0.8473,
      "step": 457450
    },
    {
      "epoch": 1.6032832504459795,
      "grad_norm": 2.171875,
      "learning_rate": 2.5865220111581008e-05,
      "loss": 0.7879,
      "step": 457460
    },
    {
      "epoch": 1.603318297952875,
      "grad_norm": 2.5625,
      "learning_rate": 2.5864571082917306e-05,
      "loss": 0.789,
      "step": 457470
    },
    {
      "epoch": 1.6033533454597708,
      "grad_norm": 3.15625,
      "learning_rate": 2.5863922054253604e-05,
      "loss": 0.8484,
      "step": 457480
    },
    {
      "epoch": 1.6033883929666664,
      "grad_norm": 3.15625,
      "learning_rate": 2.5863273025589902e-05,
      "loss": 0.8011,
      "step": 457490
    },
    {
      "epoch": 1.603423440473562,
      "grad_norm": 3.171875,
      "learning_rate": 2.58626239969262e-05,
      "loss": 0.9622,
      "step": 457500
    },
    {
      "epoch": 1.6034584879804576,
      "grad_norm": 3.359375,
      "learning_rate": 2.5861974968262498e-05,
      "loss": 0.8731,
      "step": 457510
    },
    {
      "epoch": 1.603493535487353,
      "grad_norm": 2.90625,
      "learning_rate": 2.5861325939598796e-05,
      "loss": 0.8313,
      "step": 457520
    },
    {
      "epoch": 1.6035285829942487,
      "grad_norm": 3.78125,
      "learning_rate": 2.5860676910935094e-05,
      "loss": 0.9286,
      "step": 457530
    },
    {
      "epoch": 1.6035636305011443,
      "grad_norm": 2.734375,
      "learning_rate": 2.5860027882271392e-05,
      "loss": 0.8129,
      "step": 457540
    },
    {
      "epoch": 1.6035986780080398,
      "grad_norm": 2.734375,
      "learning_rate": 2.585937885360769e-05,
      "loss": 0.7853,
      "step": 457550
    },
    {
      "epoch": 1.6036337255149355,
      "grad_norm": 3.28125,
      "learning_rate": 2.5858729824943988e-05,
      "loss": 0.9024,
      "step": 457560
    },
    {
      "epoch": 1.603668773021831,
      "grad_norm": 3.09375,
      "learning_rate": 2.5858080796280286e-05,
      "loss": 0.8227,
      "step": 457570
    },
    {
      "epoch": 1.6037038205287266,
      "grad_norm": 3.078125,
      "learning_rate": 2.5857431767616587e-05,
      "loss": 0.7848,
      "step": 457580
    },
    {
      "epoch": 1.6037388680356224,
      "grad_norm": 2.65625,
      "learning_rate": 2.5856782738952885e-05,
      "loss": 0.8405,
      "step": 457590
    },
    {
      "epoch": 1.603773915542518,
      "grad_norm": 2.953125,
      "learning_rate": 2.5856133710289183e-05,
      "loss": 0.8631,
      "step": 457600
    },
    {
      "epoch": 1.6038089630494134,
      "grad_norm": 2.84375,
      "learning_rate": 2.585548468162548e-05,
      "loss": 0.7908,
      "step": 457610
    },
    {
      "epoch": 1.6038440105563092,
      "grad_norm": 2.546875,
      "learning_rate": 2.585483565296178e-05,
      "loss": 0.793,
      "step": 457620
    },
    {
      "epoch": 1.6038790580632045,
      "grad_norm": 3.203125,
      "learning_rate": 2.5854186624298077e-05,
      "loss": 0.8453,
      "step": 457630
    },
    {
      "epoch": 1.6039141055701003,
      "grad_norm": 2.953125,
      "learning_rate": 2.5853537595634375e-05,
      "loss": 0.9164,
      "step": 457640
    },
    {
      "epoch": 1.6039491530769958,
      "grad_norm": 2.765625,
      "learning_rate": 2.5852888566970673e-05,
      "loss": 0.8166,
      "step": 457650
    },
    {
      "epoch": 1.6039842005838914,
      "grad_norm": 3.65625,
      "learning_rate": 2.585223953830697e-05,
      "loss": 0.8506,
      "step": 457660
    },
    {
      "epoch": 1.604019248090787,
      "grad_norm": 2.84375,
      "learning_rate": 2.585159050964327e-05,
      "loss": 0.8325,
      "step": 457670
    },
    {
      "epoch": 1.6040542955976826,
      "grad_norm": 2.921875,
      "learning_rate": 2.5850941480979567e-05,
      "loss": 0.7961,
      "step": 457680
    },
    {
      "epoch": 1.6040893431045782,
      "grad_norm": 2.953125,
      "learning_rate": 2.5850292452315865e-05,
      "loss": 0.8713,
      "step": 457690
    },
    {
      "epoch": 1.604124390611474,
      "grad_norm": 2.578125,
      "learning_rate": 2.5849643423652163e-05,
      "loss": 0.7884,
      "step": 457700
    },
    {
      "epoch": 1.6041594381183695,
      "grad_norm": 2.65625,
      "learning_rate": 2.584899439498846e-05,
      "loss": 0.8424,
      "step": 457710
    },
    {
      "epoch": 1.604194485625265,
      "grad_norm": 3.328125,
      "learning_rate": 2.5848345366324763e-05,
      "loss": 0.905,
      "step": 457720
    },
    {
      "epoch": 1.6042295331321608,
      "grad_norm": 2.984375,
      "learning_rate": 2.584769633766106e-05,
      "loss": 0.8765,
      "step": 457730
    },
    {
      "epoch": 1.604264580639056,
      "grad_norm": 2.78125,
      "learning_rate": 2.584704730899736e-05,
      "loss": 0.9153,
      "step": 457740
    },
    {
      "epoch": 1.6042996281459518,
      "grad_norm": 3.0,
      "learning_rate": 2.5846398280333657e-05,
      "loss": 0.8538,
      "step": 457750
    },
    {
      "epoch": 1.6043346756528476,
      "grad_norm": 3.03125,
      "learning_rate": 2.5845749251669955e-05,
      "loss": 0.8125,
      "step": 457760
    },
    {
      "epoch": 1.604369723159743,
      "grad_norm": 3.0,
      "learning_rate": 2.5845100223006253e-05,
      "loss": 0.8979,
      "step": 457770
    },
    {
      "epoch": 1.6044047706666387,
      "grad_norm": 2.796875,
      "learning_rate": 2.584445119434255e-05,
      "loss": 0.731,
      "step": 457780
    },
    {
      "epoch": 1.6044398181735342,
      "grad_norm": 3.375,
      "learning_rate": 2.584380216567885e-05,
      "loss": 0.8222,
      "step": 457790
    },
    {
      "epoch": 1.6044748656804297,
      "grad_norm": 2.71875,
      "learning_rate": 2.5843153137015147e-05,
      "loss": 0.7806,
      "step": 457800
    },
    {
      "epoch": 1.6045099131873255,
      "grad_norm": 2.765625,
      "learning_rate": 2.5842504108351445e-05,
      "loss": 0.7676,
      "step": 457810
    },
    {
      "epoch": 1.604544960694221,
      "grad_norm": 3.1875,
      "learning_rate": 2.584185507968774e-05,
      "loss": 0.9263,
      "step": 457820
    },
    {
      "epoch": 1.6045800082011166,
      "grad_norm": 3.046875,
      "learning_rate": 2.5841206051024037e-05,
      "loss": 0.8761,
      "step": 457830
    },
    {
      "epoch": 1.6046150557080123,
      "grad_norm": 2.703125,
      "learning_rate": 2.5840557022360335e-05,
      "loss": 0.7968,
      "step": 457840
    },
    {
      "epoch": 1.6046501032149076,
      "grad_norm": 2.90625,
      "learning_rate": 2.5839907993696633e-05,
      "loss": 0.8085,
      "step": 457850
    },
    {
      "epoch": 1.6046851507218034,
      "grad_norm": 2.890625,
      "learning_rate": 2.583925896503293e-05,
      "loss": 0.8257,
      "step": 457860
    },
    {
      "epoch": 1.6047201982286992,
      "grad_norm": 3.03125,
      "learning_rate": 2.583860993636923e-05,
      "loss": 0.7877,
      "step": 457870
    },
    {
      "epoch": 1.6047552457355945,
      "grad_norm": 2.828125,
      "learning_rate": 2.5837960907705527e-05,
      "loss": 0.8274,
      "step": 457880
    },
    {
      "epoch": 1.6047902932424902,
      "grad_norm": 2.890625,
      "learning_rate": 2.5837311879041825e-05,
      "loss": 0.9093,
      "step": 457890
    },
    {
      "epoch": 1.6048253407493858,
      "grad_norm": 3.046875,
      "learning_rate": 2.5836662850378123e-05,
      "loss": 0.8197,
      "step": 457900
    },
    {
      "epoch": 1.6048603882562813,
      "grad_norm": 3.171875,
      "learning_rate": 2.583601382171442e-05,
      "loss": 0.8557,
      "step": 457910
    },
    {
      "epoch": 1.604895435763177,
      "grad_norm": 2.71875,
      "learning_rate": 2.583536479305072e-05,
      "loss": 0.8274,
      "step": 457920
    },
    {
      "epoch": 1.6049304832700726,
      "grad_norm": 3.515625,
      "learning_rate": 2.5834715764387017e-05,
      "loss": 0.7894,
      "step": 457930
    },
    {
      "epoch": 1.6049655307769681,
      "grad_norm": 2.8125,
      "learning_rate": 2.5834066735723315e-05,
      "loss": 0.901,
      "step": 457940
    },
    {
      "epoch": 1.6050005782838639,
      "grad_norm": 2.734375,
      "learning_rate": 2.5833417707059617e-05,
      "loss": 0.7856,
      "step": 457950
    },
    {
      "epoch": 1.6050356257907594,
      "grad_norm": 3.0625,
      "learning_rate": 2.5832768678395915e-05,
      "loss": 0.8733,
      "step": 457960
    },
    {
      "epoch": 1.605070673297655,
      "grad_norm": 2.8125,
      "learning_rate": 2.5832119649732213e-05,
      "loss": 0.8623,
      "step": 457970
    },
    {
      "epoch": 1.6051057208045507,
      "grad_norm": 2.75,
      "learning_rate": 2.583147062106851e-05,
      "loss": 0.8793,
      "step": 457980
    },
    {
      "epoch": 1.605140768311446,
      "grad_norm": 2.9375,
      "learning_rate": 2.583082159240481e-05,
      "loss": 0.8041,
      "step": 457990
    },
    {
      "epoch": 1.6051758158183418,
      "grad_norm": 3.21875,
      "learning_rate": 2.5830172563741107e-05,
      "loss": 0.8272,
      "step": 458000
    },
    {
      "epoch": 1.6052108633252373,
      "grad_norm": 2.71875,
      "learning_rate": 2.5829523535077405e-05,
      "loss": 0.8043,
      "step": 458010
    },
    {
      "epoch": 1.6052459108321329,
      "grad_norm": 3.546875,
      "learning_rate": 2.5828874506413703e-05,
      "loss": 0.8737,
      "step": 458020
    },
    {
      "epoch": 1.6052809583390286,
      "grad_norm": 2.96875,
      "learning_rate": 2.582822547775e-05,
      "loss": 0.875,
      "step": 458030
    },
    {
      "epoch": 1.6053160058459242,
      "grad_norm": 2.765625,
      "learning_rate": 2.58275764490863e-05,
      "loss": 0.7776,
      "step": 458040
    },
    {
      "epoch": 1.6053510533528197,
      "grad_norm": 3.296875,
      "learning_rate": 2.5826927420422597e-05,
      "loss": 0.8186,
      "step": 458050
    },
    {
      "epoch": 1.6053861008597154,
      "grad_norm": 2.578125,
      "learning_rate": 2.5826278391758895e-05,
      "loss": 0.8426,
      "step": 458060
    },
    {
      "epoch": 1.605421148366611,
      "grad_norm": 2.78125,
      "learning_rate": 2.5825629363095193e-05,
      "loss": 0.8543,
      "step": 458070
    },
    {
      "epoch": 1.6054561958735065,
      "grad_norm": 3.140625,
      "learning_rate": 2.582498033443149e-05,
      "loss": 0.8319,
      "step": 458080
    },
    {
      "epoch": 1.6054912433804023,
      "grad_norm": 2.8125,
      "learning_rate": 2.5824331305767792e-05,
      "loss": 0.7996,
      "step": 458090
    },
    {
      "epoch": 1.6055262908872976,
      "grad_norm": 2.890625,
      "learning_rate": 2.582368227710409e-05,
      "loss": 0.7943,
      "step": 458100
    },
    {
      "epoch": 1.6055613383941933,
      "grad_norm": 2.453125,
      "learning_rate": 2.5823033248440388e-05,
      "loss": 0.8821,
      "step": 458110
    },
    {
      "epoch": 1.6055963859010889,
      "grad_norm": 3.546875,
      "learning_rate": 2.5822384219776686e-05,
      "loss": 0.8655,
      "step": 458120
    },
    {
      "epoch": 1.6056314334079844,
      "grad_norm": 2.625,
      "learning_rate": 2.5821735191112984e-05,
      "loss": 0.8319,
      "step": 458130
    },
    {
      "epoch": 1.6056664809148802,
      "grad_norm": 2.453125,
      "learning_rate": 2.5821086162449282e-05,
      "loss": 0.8301,
      "step": 458140
    },
    {
      "epoch": 1.6057015284217757,
      "grad_norm": 2.953125,
      "learning_rate": 2.582043713378558e-05,
      "loss": 0.8529,
      "step": 458150
    },
    {
      "epoch": 1.6057365759286713,
      "grad_norm": 2.65625,
      "learning_rate": 2.5819788105121878e-05,
      "loss": 0.8272,
      "step": 458160
    },
    {
      "epoch": 1.605771623435567,
      "grad_norm": 2.578125,
      "learning_rate": 2.5819139076458176e-05,
      "loss": 0.8627,
      "step": 458170
    },
    {
      "epoch": 1.6058066709424625,
      "grad_norm": 3.125,
      "learning_rate": 2.5818490047794474e-05,
      "loss": 0.8421,
      "step": 458180
    },
    {
      "epoch": 1.605841718449358,
      "grad_norm": 3.578125,
      "learning_rate": 2.581784101913077e-05,
      "loss": 0.8989,
      "step": 458190
    },
    {
      "epoch": 1.6058767659562538,
      "grad_norm": 2.921875,
      "learning_rate": 2.5817191990467067e-05,
      "loss": 0.8152,
      "step": 458200
    },
    {
      "epoch": 1.6059118134631492,
      "grad_norm": 2.921875,
      "learning_rate": 2.5816542961803365e-05,
      "loss": 0.9092,
      "step": 458210
    },
    {
      "epoch": 1.605946860970045,
      "grad_norm": 3.015625,
      "learning_rate": 2.5815893933139663e-05,
      "loss": 0.8322,
      "step": 458220
    },
    {
      "epoch": 1.6059819084769404,
      "grad_norm": 3.015625,
      "learning_rate": 2.581524490447596e-05,
      "loss": 0.8672,
      "step": 458230
    },
    {
      "epoch": 1.606016955983836,
      "grad_norm": 3.09375,
      "learning_rate": 2.581459587581226e-05,
      "loss": 0.9303,
      "step": 458240
    },
    {
      "epoch": 1.6060520034907317,
      "grad_norm": 2.234375,
      "learning_rate": 2.5813946847148557e-05,
      "loss": 0.7822,
      "step": 458250
    },
    {
      "epoch": 1.6060870509976273,
      "grad_norm": 3.125,
      "learning_rate": 2.5813297818484855e-05,
      "loss": 0.8834,
      "step": 458260
    },
    {
      "epoch": 1.6061220985045228,
      "grad_norm": 3.15625,
      "learning_rate": 2.5812648789821153e-05,
      "loss": 0.8742,
      "step": 458270
    },
    {
      "epoch": 1.6061571460114186,
      "grad_norm": 2.46875,
      "learning_rate": 2.581199976115745e-05,
      "loss": 0.7995,
      "step": 458280
    },
    {
      "epoch": 1.606192193518314,
      "grad_norm": 2.484375,
      "learning_rate": 2.581135073249375e-05,
      "loss": 0.8658,
      "step": 458290
    },
    {
      "epoch": 1.6062272410252096,
      "grad_norm": 3.171875,
      "learning_rate": 2.5810701703830047e-05,
      "loss": 0.8393,
      "step": 458300
    },
    {
      "epoch": 1.6062622885321054,
      "grad_norm": 2.546875,
      "learning_rate": 2.5810052675166348e-05,
      "loss": 0.8081,
      "step": 458310
    },
    {
      "epoch": 1.6062973360390007,
      "grad_norm": 2.921875,
      "learning_rate": 2.5809403646502646e-05,
      "loss": 0.7824,
      "step": 458320
    },
    {
      "epoch": 1.6063323835458965,
      "grad_norm": 2.265625,
      "learning_rate": 2.5808754617838944e-05,
      "loss": 0.7345,
      "step": 458330
    },
    {
      "epoch": 1.606367431052792,
      "grad_norm": 2.984375,
      "learning_rate": 2.5808105589175242e-05,
      "loss": 0.7899,
      "step": 458340
    },
    {
      "epoch": 1.6064024785596875,
      "grad_norm": 2.875,
      "learning_rate": 2.580745656051154e-05,
      "loss": 0.8886,
      "step": 458350
    },
    {
      "epoch": 1.6064375260665833,
      "grad_norm": 2.75,
      "learning_rate": 2.5806807531847838e-05,
      "loss": 0.827,
      "step": 458360
    },
    {
      "epoch": 1.6064725735734788,
      "grad_norm": 2.625,
      "learning_rate": 2.5806158503184136e-05,
      "loss": 0.826,
      "step": 458370
    },
    {
      "epoch": 1.6065076210803744,
      "grad_norm": 2.78125,
      "learning_rate": 2.5805509474520434e-05,
      "loss": 0.8414,
      "step": 458380
    },
    {
      "epoch": 1.6065426685872701,
      "grad_norm": 2.625,
      "learning_rate": 2.5804860445856732e-05,
      "loss": 0.8238,
      "step": 458390
    },
    {
      "epoch": 1.6065777160941657,
      "grad_norm": 2.703125,
      "learning_rate": 2.580421141719303e-05,
      "loss": 0.8527,
      "step": 458400
    },
    {
      "epoch": 1.6066127636010612,
      "grad_norm": 2.703125,
      "learning_rate": 2.5803562388529328e-05,
      "loss": 0.8176,
      "step": 458410
    },
    {
      "epoch": 1.606647811107957,
      "grad_norm": 3.421875,
      "learning_rate": 2.5802913359865626e-05,
      "loss": 0.8433,
      "step": 458420
    },
    {
      "epoch": 1.6066828586148523,
      "grad_norm": 2.828125,
      "learning_rate": 2.5802264331201924e-05,
      "loss": 0.8009,
      "step": 458430
    },
    {
      "epoch": 1.606717906121748,
      "grad_norm": 2.6875,
      "learning_rate": 2.5801615302538222e-05,
      "loss": 0.872,
      "step": 458440
    },
    {
      "epoch": 1.6067529536286438,
      "grad_norm": 2.578125,
      "learning_rate": 2.5800966273874523e-05,
      "loss": 0.8296,
      "step": 458450
    },
    {
      "epoch": 1.606788001135539,
      "grad_norm": 2.78125,
      "learning_rate": 2.580031724521082e-05,
      "loss": 0.7307,
      "step": 458460
    },
    {
      "epoch": 1.6068230486424349,
      "grad_norm": 2.921875,
      "learning_rate": 2.579966821654712e-05,
      "loss": 0.8469,
      "step": 458470
    },
    {
      "epoch": 1.6068580961493304,
      "grad_norm": 2.703125,
      "learning_rate": 2.5799019187883417e-05,
      "loss": 0.8822,
      "step": 458480
    },
    {
      "epoch": 1.606893143656226,
      "grad_norm": 3.09375,
      "learning_rate": 2.5798370159219715e-05,
      "loss": 0.7601,
      "step": 458490
    },
    {
      "epoch": 1.6069281911631217,
      "grad_norm": 2.40625,
      "learning_rate": 2.5797721130556013e-05,
      "loss": 0.8876,
      "step": 458500
    },
    {
      "epoch": 1.6069632386700172,
      "grad_norm": 2.828125,
      "learning_rate": 2.579707210189231e-05,
      "loss": 0.7905,
      "step": 458510
    },
    {
      "epoch": 1.6069982861769128,
      "grad_norm": 2.921875,
      "learning_rate": 2.579642307322861e-05,
      "loss": 0.8612,
      "step": 458520
    },
    {
      "epoch": 1.6070333336838085,
      "grad_norm": 3.375,
      "learning_rate": 2.5795774044564907e-05,
      "loss": 0.7597,
      "step": 458530
    },
    {
      "epoch": 1.607068381190704,
      "grad_norm": 3.0625,
      "learning_rate": 2.5795125015901205e-05,
      "loss": 0.9495,
      "step": 458540
    },
    {
      "epoch": 1.6071034286975996,
      "grad_norm": 2.96875,
      "learning_rate": 2.5794475987237503e-05,
      "loss": 0.8409,
      "step": 458550
    },
    {
      "epoch": 1.6071384762044953,
      "grad_norm": 2.921875,
      "learning_rate": 2.57938269585738e-05,
      "loss": 0.8655,
      "step": 458560
    },
    {
      "epoch": 1.6071735237113907,
      "grad_norm": 2.875,
      "learning_rate": 2.5793177929910096e-05,
      "loss": 0.9046,
      "step": 458570
    },
    {
      "epoch": 1.6072085712182864,
      "grad_norm": 2.6875,
      "learning_rate": 2.5792528901246394e-05,
      "loss": 0.7771,
      "step": 458580
    },
    {
      "epoch": 1.607243618725182,
      "grad_norm": 2.75,
      "learning_rate": 2.5791879872582692e-05,
      "loss": 0.885,
      "step": 458590
    },
    {
      "epoch": 1.6072786662320775,
      "grad_norm": 2.71875,
      "learning_rate": 2.579123084391899e-05,
      "loss": 0.7847,
      "step": 458600
    },
    {
      "epoch": 1.6073137137389732,
      "grad_norm": 2.78125,
      "learning_rate": 2.5790581815255288e-05,
      "loss": 0.7831,
      "step": 458610
    },
    {
      "epoch": 1.6073487612458688,
      "grad_norm": 3.109375,
      "learning_rate": 2.5789932786591586e-05,
      "loss": 0.8551,
      "step": 458620
    },
    {
      "epoch": 1.6073838087527643,
      "grad_norm": 2.984375,
      "learning_rate": 2.5789283757927884e-05,
      "loss": 0.8536,
      "step": 458630
    },
    {
      "epoch": 1.60741885625966,
      "grad_norm": 2.90625,
      "learning_rate": 2.5788634729264182e-05,
      "loss": 0.7672,
      "step": 458640
    },
    {
      "epoch": 1.6074539037665556,
      "grad_norm": 3.34375,
      "learning_rate": 2.578798570060048e-05,
      "loss": 0.832,
      "step": 458650
    },
    {
      "epoch": 1.6074889512734511,
      "grad_norm": 3.109375,
      "learning_rate": 2.5787336671936778e-05,
      "loss": 0.7808,
      "step": 458660
    },
    {
      "epoch": 1.607523998780347,
      "grad_norm": 3.0,
      "learning_rate": 2.5786687643273076e-05,
      "loss": 0.8539,
      "step": 458670
    },
    {
      "epoch": 1.6075590462872422,
      "grad_norm": 3.46875,
      "learning_rate": 2.5786038614609377e-05,
      "loss": 0.7989,
      "step": 458680
    },
    {
      "epoch": 1.607594093794138,
      "grad_norm": 2.6875,
      "learning_rate": 2.5785389585945675e-05,
      "loss": 0.9222,
      "step": 458690
    },
    {
      "epoch": 1.6076291413010335,
      "grad_norm": 2.671875,
      "learning_rate": 2.5784740557281973e-05,
      "loss": 0.8407,
      "step": 458700
    },
    {
      "epoch": 1.607664188807929,
      "grad_norm": 3.28125,
      "learning_rate": 2.578409152861827e-05,
      "loss": 0.9381,
      "step": 458710
    },
    {
      "epoch": 1.6076992363148248,
      "grad_norm": 2.890625,
      "learning_rate": 2.578344249995457e-05,
      "loss": 0.7856,
      "step": 458720
    },
    {
      "epoch": 1.6077342838217203,
      "grad_norm": 2.96875,
      "learning_rate": 2.5782793471290867e-05,
      "loss": 0.8664,
      "step": 458730
    },
    {
      "epoch": 1.6077693313286159,
      "grad_norm": 2.890625,
      "learning_rate": 2.5782144442627165e-05,
      "loss": 0.8995,
      "step": 458740
    },
    {
      "epoch": 1.6078043788355116,
      "grad_norm": 3.140625,
      "learning_rate": 2.5781495413963463e-05,
      "loss": 0.8433,
      "step": 458750
    },
    {
      "epoch": 1.6078394263424072,
      "grad_norm": 3.46875,
      "learning_rate": 2.578084638529976e-05,
      "loss": 0.8565,
      "step": 458760
    },
    {
      "epoch": 1.6078744738493027,
      "grad_norm": 2.53125,
      "learning_rate": 2.578019735663606e-05,
      "loss": 0.7874,
      "step": 458770
    },
    {
      "epoch": 1.6079095213561985,
      "grad_norm": 2.59375,
      "learning_rate": 2.5779548327972357e-05,
      "loss": 0.8209,
      "step": 458780
    },
    {
      "epoch": 1.6079445688630938,
      "grad_norm": 2.734375,
      "learning_rate": 2.5778899299308655e-05,
      "loss": 0.9173,
      "step": 458790
    },
    {
      "epoch": 1.6079796163699895,
      "grad_norm": 2.890625,
      "learning_rate": 2.5778250270644953e-05,
      "loss": 0.9005,
      "step": 458800
    },
    {
      "epoch": 1.608014663876885,
      "grad_norm": 2.921875,
      "learning_rate": 2.577760124198125e-05,
      "loss": 0.8029,
      "step": 458810
    },
    {
      "epoch": 1.6080497113837806,
      "grad_norm": 3.40625,
      "learning_rate": 2.5776952213317553e-05,
      "loss": 0.8662,
      "step": 458820
    },
    {
      "epoch": 1.6080847588906764,
      "grad_norm": 2.796875,
      "learning_rate": 2.577630318465385e-05,
      "loss": 0.8246,
      "step": 458830
    },
    {
      "epoch": 1.608119806397572,
      "grad_norm": 2.9375,
      "learning_rate": 2.577565415599015e-05,
      "loss": 0.8123,
      "step": 458840
    },
    {
      "epoch": 1.6081548539044674,
      "grad_norm": 2.75,
      "learning_rate": 2.5775005127326447e-05,
      "loss": 0.8293,
      "step": 458850
    },
    {
      "epoch": 1.6081899014113632,
      "grad_norm": 2.65625,
      "learning_rate": 2.5774356098662745e-05,
      "loss": 0.7616,
      "step": 458860
    },
    {
      "epoch": 1.6082249489182587,
      "grad_norm": 2.953125,
      "learning_rate": 2.5773707069999043e-05,
      "loss": 0.8318,
      "step": 458870
    },
    {
      "epoch": 1.6082599964251543,
      "grad_norm": 2.609375,
      "learning_rate": 2.577305804133534e-05,
      "loss": 0.8904,
      "step": 458880
    },
    {
      "epoch": 1.60829504393205,
      "grad_norm": 3.3125,
      "learning_rate": 2.577240901267164e-05,
      "loss": 0.8507,
      "step": 458890
    },
    {
      "epoch": 1.6083300914389453,
      "grad_norm": 2.6875,
      "learning_rate": 2.5771759984007937e-05,
      "loss": 0.8581,
      "step": 458900
    },
    {
      "epoch": 1.608365138945841,
      "grad_norm": 2.84375,
      "learning_rate": 2.5771110955344235e-05,
      "loss": 0.8395,
      "step": 458910
    },
    {
      "epoch": 1.6084001864527366,
      "grad_norm": 2.890625,
      "learning_rate": 2.5770461926680533e-05,
      "loss": 0.841,
      "step": 458920
    },
    {
      "epoch": 1.6084352339596322,
      "grad_norm": 3.28125,
      "learning_rate": 2.576981289801683e-05,
      "loss": 0.8485,
      "step": 458930
    },
    {
      "epoch": 1.608470281466528,
      "grad_norm": 4.34375,
      "learning_rate": 2.576916386935313e-05,
      "loss": 0.8494,
      "step": 458940
    },
    {
      "epoch": 1.6085053289734235,
      "grad_norm": 3.015625,
      "learning_rate": 2.5768514840689423e-05,
      "loss": 0.7968,
      "step": 458950
    },
    {
      "epoch": 1.608540376480319,
      "grad_norm": 2.578125,
      "learning_rate": 2.576786581202572e-05,
      "loss": 0.8092,
      "step": 458960
    },
    {
      "epoch": 1.6085754239872148,
      "grad_norm": 2.90625,
      "learning_rate": 2.576721678336202e-05,
      "loss": 0.8994,
      "step": 458970
    },
    {
      "epoch": 1.6086104714941103,
      "grad_norm": 2.640625,
      "learning_rate": 2.5766567754698317e-05,
      "loss": 0.878,
      "step": 458980
    },
    {
      "epoch": 1.6086455190010058,
      "grad_norm": 3.1875,
      "learning_rate": 2.5765918726034615e-05,
      "loss": 0.8715,
      "step": 458990
    },
    {
      "epoch": 1.6086805665079016,
      "grad_norm": 2.9375,
      "learning_rate": 2.5765269697370913e-05,
      "loss": 0.8662,
      "step": 459000
    },
    {
      "epoch": 1.608715614014797,
      "grad_norm": 2.84375,
      "learning_rate": 2.576462066870721e-05,
      "loss": 0.7848,
      "step": 459010
    },
    {
      "epoch": 1.6087506615216927,
      "grad_norm": 2.875,
      "learning_rate": 2.576397164004351e-05,
      "loss": 0.8851,
      "step": 459020
    },
    {
      "epoch": 1.6087857090285884,
      "grad_norm": 2.65625,
      "learning_rate": 2.5763322611379807e-05,
      "loss": 0.7714,
      "step": 459030
    },
    {
      "epoch": 1.6088207565354837,
      "grad_norm": 3.140625,
      "learning_rate": 2.5762673582716105e-05,
      "loss": 0.9231,
      "step": 459040
    },
    {
      "epoch": 1.6088558040423795,
      "grad_norm": 2.9375,
      "learning_rate": 2.5762024554052407e-05,
      "loss": 0.8044,
      "step": 459050
    },
    {
      "epoch": 1.608890851549275,
      "grad_norm": 3.21875,
      "learning_rate": 2.5761375525388705e-05,
      "loss": 0.8663,
      "step": 459060
    },
    {
      "epoch": 1.6089258990561706,
      "grad_norm": 3.25,
      "learning_rate": 2.5760726496725003e-05,
      "loss": 0.8336,
      "step": 459070
    },
    {
      "epoch": 1.6089609465630663,
      "grad_norm": 3.046875,
      "learning_rate": 2.57600774680613e-05,
      "loss": 0.7979,
      "step": 459080
    },
    {
      "epoch": 1.6089959940699619,
      "grad_norm": 2.8125,
      "learning_rate": 2.57594284393976e-05,
      "loss": 0.8859,
      "step": 459090
    },
    {
      "epoch": 1.6090310415768574,
      "grad_norm": 2.6875,
      "learning_rate": 2.5758779410733897e-05,
      "loss": 0.7902,
      "step": 459100
    },
    {
      "epoch": 1.6090660890837531,
      "grad_norm": 2.625,
      "learning_rate": 2.5758130382070195e-05,
      "loss": 0.8003,
      "step": 459110
    },
    {
      "epoch": 1.6091011365906485,
      "grad_norm": 2.65625,
      "learning_rate": 2.5757481353406493e-05,
      "loss": 0.8665,
      "step": 459120
    },
    {
      "epoch": 1.6091361840975442,
      "grad_norm": 2.8125,
      "learning_rate": 2.575683232474279e-05,
      "loss": 0.8599,
      "step": 459130
    },
    {
      "epoch": 1.60917123160444,
      "grad_norm": 2.96875,
      "learning_rate": 2.575618329607909e-05,
      "loss": 0.9871,
      "step": 459140
    },
    {
      "epoch": 1.6092062791113353,
      "grad_norm": 2.875,
      "learning_rate": 2.5755534267415387e-05,
      "loss": 0.7115,
      "step": 459150
    },
    {
      "epoch": 1.609241326618231,
      "grad_norm": 3.5,
      "learning_rate": 2.5754885238751685e-05,
      "loss": 0.7227,
      "step": 459160
    },
    {
      "epoch": 1.6092763741251266,
      "grad_norm": 2.375,
      "learning_rate": 2.5754236210087983e-05,
      "loss": 0.8002,
      "step": 459170
    },
    {
      "epoch": 1.6093114216320221,
      "grad_norm": 2.875,
      "learning_rate": 2.5753587181424284e-05,
      "loss": 0.7976,
      "step": 459180
    },
    {
      "epoch": 1.6093464691389179,
      "grad_norm": 2.359375,
      "learning_rate": 2.5752938152760582e-05,
      "loss": 0.8,
      "step": 459190
    },
    {
      "epoch": 1.6093815166458134,
      "grad_norm": 3.125,
      "learning_rate": 2.575228912409688e-05,
      "loss": 0.887,
      "step": 459200
    },
    {
      "epoch": 1.609416564152709,
      "grad_norm": 3.03125,
      "learning_rate": 2.5751640095433178e-05,
      "loss": 0.7433,
      "step": 459210
    },
    {
      "epoch": 1.6094516116596047,
      "grad_norm": 2.5,
      "learning_rate": 2.5750991066769476e-05,
      "loss": 0.7639,
      "step": 459220
    },
    {
      "epoch": 1.6094866591665002,
      "grad_norm": 3.5625,
      "learning_rate": 2.5750342038105774e-05,
      "loss": 0.7941,
      "step": 459230
    },
    {
      "epoch": 1.6095217066733958,
      "grad_norm": 2.671875,
      "learning_rate": 2.5749693009442072e-05,
      "loss": 0.8395,
      "step": 459240
    },
    {
      "epoch": 1.6095567541802915,
      "grad_norm": 2.921875,
      "learning_rate": 2.574904398077837e-05,
      "loss": 0.899,
      "step": 459250
    },
    {
      "epoch": 1.6095918016871869,
      "grad_norm": 2.890625,
      "learning_rate": 2.5748394952114668e-05,
      "loss": 0.848,
      "step": 459260
    },
    {
      "epoch": 1.6096268491940826,
      "grad_norm": 3.015625,
      "learning_rate": 2.5747745923450966e-05,
      "loss": 0.8702,
      "step": 459270
    },
    {
      "epoch": 1.6096618967009781,
      "grad_norm": 2.9375,
      "learning_rate": 2.5747096894787264e-05,
      "loss": 0.7444,
      "step": 459280
    },
    {
      "epoch": 1.6096969442078737,
      "grad_norm": 3.4375,
      "learning_rate": 2.5746447866123562e-05,
      "loss": 0.8515,
      "step": 459290
    },
    {
      "epoch": 1.6097319917147694,
      "grad_norm": 2.734375,
      "learning_rate": 2.574579883745986e-05,
      "loss": 0.8571,
      "step": 459300
    },
    {
      "epoch": 1.609767039221665,
      "grad_norm": 2.53125,
      "learning_rate": 2.5745149808796158e-05,
      "loss": 0.6851,
      "step": 459310
    },
    {
      "epoch": 1.6098020867285605,
      "grad_norm": 2.921875,
      "learning_rate": 2.5744500780132453e-05,
      "loss": 0.8236,
      "step": 459320
    },
    {
      "epoch": 1.6098371342354563,
      "grad_norm": 2.984375,
      "learning_rate": 2.574385175146875e-05,
      "loss": 0.8323,
      "step": 459330
    },
    {
      "epoch": 1.6098721817423518,
      "grad_norm": 2.734375,
      "learning_rate": 2.574320272280505e-05,
      "loss": 0.9144,
      "step": 459340
    },
    {
      "epoch": 1.6099072292492473,
      "grad_norm": 2.578125,
      "learning_rate": 2.5742553694141347e-05,
      "loss": 0.6995,
      "step": 459350
    },
    {
      "epoch": 1.609942276756143,
      "grad_norm": 2.640625,
      "learning_rate": 2.5741904665477645e-05,
      "loss": 0.8118,
      "step": 459360
    },
    {
      "epoch": 1.6099773242630384,
      "grad_norm": 2.578125,
      "learning_rate": 2.5741255636813943e-05,
      "loss": 0.8784,
      "step": 459370
    },
    {
      "epoch": 1.6100123717699342,
      "grad_norm": 2.578125,
      "learning_rate": 2.574060660815024e-05,
      "loss": 0.8743,
      "step": 459380
    },
    {
      "epoch": 1.6100474192768297,
      "grad_norm": 2.703125,
      "learning_rate": 2.573995757948654e-05,
      "loss": 0.7707,
      "step": 459390
    },
    {
      "epoch": 1.6100824667837252,
      "grad_norm": 2.578125,
      "learning_rate": 2.5739308550822837e-05,
      "loss": 0.8319,
      "step": 459400
    },
    {
      "epoch": 1.610117514290621,
      "grad_norm": 2.4375,
      "learning_rate": 2.5738659522159138e-05,
      "loss": 0.8512,
      "step": 459410
    },
    {
      "epoch": 1.6101525617975165,
      "grad_norm": 2.90625,
      "learning_rate": 2.5738010493495436e-05,
      "loss": 0.7681,
      "step": 459420
    },
    {
      "epoch": 1.610187609304412,
      "grad_norm": 2.859375,
      "learning_rate": 2.5737361464831734e-05,
      "loss": 0.9018,
      "step": 459430
    },
    {
      "epoch": 1.6102226568113078,
      "grad_norm": 3.34375,
      "learning_rate": 2.5736712436168032e-05,
      "loss": 0.8809,
      "step": 459440
    },
    {
      "epoch": 1.6102577043182034,
      "grad_norm": 2.75,
      "learning_rate": 2.573606340750433e-05,
      "loss": 0.7985,
      "step": 459450
    },
    {
      "epoch": 1.610292751825099,
      "grad_norm": 3.5,
      "learning_rate": 2.5735414378840628e-05,
      "loss": 0.8594,
      "step": 459460
    },
    {
      "epoch": 1.6103277993319947,
      "grad_norm": 2.875,
      "learning_rate": 2.5734765350176926e-05,
      "loss": 0.94,
      "step": 459470
    },
    {
      "epoch": 1.61036284683889,
      "grad_norm": 2.78125,
      "learning_rate": 2.5734116321513224e-05,
      "loss": 0.8442,
      "step": 459480
    },
    {
      "epoch": 1.6103978943457857,
      "grad_norm": 3.03125,
      "learning_rate": 2.5733467292849522e-05,
      "loss": 0.7389,
      "step": 459490
    },
    {
      "epoch": 1.6104329418526813,
      "grad_norm": 3.140625,
      "learning_rate": 2.573281826418582e-05,
      "loss": 0.8694,
      "step": 459500
    },
    {
      "epoch": 1.6104679893595768,
      "grad_norm": 3.0625,
      "learning_rate": 2.5732169235522118e-05,
      "loss": 0.8083,
      "step": 459510
    },
    {
      "epoch": 1.6105030368664726,
      "grad_norm": 3.203125,
      "learning_rate": 2.5731520206858416e-05,
      "loss": 0.8734,
      "step": 459520
    },
    {
      "epoch": 1.610538084373368,
      "grad_norm": 2.859375,
      "learning_rate": 2.5730871178194714e-05,
      "loss": 0.8869,
      "step": 459530
    },
    {
      "epoch": 1.6105731318802636,
      "grad_norm": 3.5625,
      "learning_rate": 2.5730222149531012e-05,
      "loss": 0.8459,
      "step": 459540
    },
    {
      "epoch": 1.6106081793871594,
      "grad_norm": 2.65625,
      "learning_rate": 2.5729573120867313e-05,
      "loss": 0.8396,
      "step": 459550
    },
    {
      "epoch": 1.610643226894055,
      "grad_norm": 3.09375,
      "learning_rate": 2.572892409220361e-05,
      "loss": 0.7894,
      "step": 459560
    },
    {
      "epoch": 1.6106782744009505,
      "grad_norm": 2.90625,
      "learning_rate": 2.572827506353991e-05,
      "loss": 0.864,
      "step": 459570
    },
    {
      "epoch": 1.6107133219078462,
      "grad_norm": 3.296875,
      "learning_rate": 2.5727626034876207e-05,
      "loss": 0.8552,
      "step": 459580
    },
    {
      "epoch": 1.6107483694147415,
      "grad_norm": 2.703125,
      "learning_rate": 2.5726977006212505e-05,
      "loss": 0.8272,
      "step": 459590
    },
    {
      "epoch": 1.6107834169216373,
      "grad_norm": 2.9375,
      "learning_rate": 2.5726327977548803e-05,
      "loss": 0.8361,
      "step": 459600
    },
    {
      "epoch": 1.6108184644285328,
      "grad_norm": 2.515625,
      "learning_rate": 2.57256789488851e-05,
      "loss": 0.7525,
      "step": 459610
    },
    {
      "epoch": 1.6108535119354284,
      "grad_norm": 3.046875,
      "learning_rate": 2.57250299202214e-05,
      "loss": 0.7816,
      "step": 459620
    },
    {
      "epoch": 1.6108885594423241,
      "grad_norm": 3.296875,
      "learning_rate": 2.5724380891557697e-05,
      "loss": 0.9177,
      "step": 459630
    },
    {
      "epoch": 1.6109236069492197,
      "grad_norm": 2.921875,
      "learning_rate": 2.5723731862893995e-05,
      "loss": 0.8394,
      "step": 459640
    },
    {
      "epoch": 1.6109586544561152,
      "grad_norm": 2.90625,
      "learning_rate": 2.5723082834230293e-05,
      "loss": 0.9245,
      "step": 459650
    },
    {
      "epoch": 1.610993701963011,
      "grad_norm": 2.984375,
      "learning_rate": 2.572243380556659e-05,
      "loss": 0.8311,
      "step": 459660
    },
    {
      "epoch": 1.6110287494699065,
      "grad_norm": 2.84375,
      "learning_rate": 2.572178477690289e-05,
      "loss": 0.8617,
      "step": 459670
    },
    {
      "epoch": 1.611063796976802,
      "grad_norm": 2.8125,
      "learning_rate": 2.5721135748239187e-05,
      "loss": 0.8486,
      "step": 459680
    },
    {
      "epoch": 1.6110988444836978,
      "grad_norm": 2.5625,
      "learning_rate": 2.572048671957549e-05,
      "loss": 0.7579,
      "step": 459690
    },
    {
      "epoch": 1.611133891990593,
      "grad_norm": 3.140625,
      "learning_rate": 2.571983769091178e-05,
      "loss": 0.8973,
      "step": 459700
    },
    {
      "epoch": 1.6111689394974889,
      "grad_norm": 2.59375,
      "learning_rate": 2.5719188662248078e-05,
      "loss": 0.79,
      "step": 459710
    },
    {
      "epoch": 1.6112039870043846,
      "grad_norm": 3.03125,
      "learning_rate": 2.5718539633584376e-05,
      "loss": 0.8184,
      "step": 459720
    },
    {
      "epoch": 1.61123903451128,
      "grad_norm": 3.25,
      "learning_rate": 2.5717890604920674e-05,
      "loss": 0.841,
      "step": 459730
    },
    {
      "epoch": 1.6112740820181757,
      "grad_norm": 3.453125,
      "learning_rate": 2.5717241576256972e-05,
      "loss": 0.9015,
      "step": 459740
    },
    {
      "epoch": 1.6113091295250712,
      "grad_norm": 2.75,
      "learning_rate": 2.571659254759327e-05,
      "loss": 0.7573,
      "step": 459750
    },
    {
      "epoch": 1.6113441770319668,
      "grad_norm": 3.0,
      "learning_rate": 2.5715943518929568e-05,
      "loss": 0.8699,
      "step": 459760
    },
    {
      "epoch": 1.6113792245388625,
      "grad_norm": 2.84375,
      "learning_rate": 2.5715294490265866e-05,
      "loss": 0.8615,
      "step": 459770
    },
    {
      "epoch": 1.611414272045758,
      "grad_norm": 3.0,
      "learning_rate": 2.5714645461602167e-05,
      "loss": 0.9406,
      "step": 459780
    },
    {
      "epoch": 1.6114493195526536,
      "grad_norm": 3.0,
      "learning_rate": 2.5713996432938465e-05,
      "loss": 0.8218,
      "step": 459790
    },
    {
      "epoch": 1.6114843670595493,
      "grad_norm": 2.859375,
      "learning_rate": 2.5713347404274763e-05,
      "loss": 0.8728,
      "step": 459800
    },
    {
      "epoch": 1.6115194145664447,
      "grad_norm": 2.8125,
      "learning_rate": 2.571269837561106e-05,
      "loss": 0.8494,
      "step": 459810
    },
    {
      "epoch": 1.6115544620733404,
      "grad_norm": 2.890625,
      "learning_rate": 2.571204934694736e-05,
      "loss": 0.752,
      "step": 459820
    },
    {
      "epoch": 1.6115895095802362,
      "grad_norm": 3.09375,
      "learning_rate": 2.5711400318283657e-05,
      "loss": 0.8298,
      "step": 459830
    },
    {
      "epoch": 1.6116245570871315,
      "grad_norm": 3.265625,
      "learning_rate": 2.5710751289619955e-05,
      "loss": 0.8943,
      "step": 459840
    },
    {
      "epoch": 1.6116596045940272,
      "grad_norm": 2.890625,
      "learning_rate": 2.5710102260956253e-05,
      "loss": 0.8189,
      "step": 459850
    },
    {
      "epoch": 1.6116946521009228,
      "grad_norm": 2.359375,
      "learning_rate": 2.570945323229255e-05,
      "loss": 0.8396,
      "step": 459860
    },
    {
      "epoch": 1.6117296996078183,
      "grad_norm": 2.6875,
      "learning_rate": 2.570880420362885e-05,
      "loss": 0.8716,
      "step": 459870
    },
    {
      "epoch": 1.611764747114714,
      "grad_norm": 2.703125,
      "learning_rate": 2.5708155174965147e-05,
      "loss": 0.7906,
      "step": 459880
    },
    {
      "epoch": 1.6117997946216096,
      "grad_norm": 2.6875,
      "learning_rate": 2.5707506146301445e-05,
      "loss": 0.811,
      "step": 459890
    },
    {
      "epoch": 1.6118348421285051,
      "grad_norm": 3.046875,
      "learning_rate": 2.5706857117637743e-05,
      "loss": 0.8857,
      "step": 459900
    },
    {
      "epoch": 1.611869889635401,
      "grad_norm": 3.015625,
      "learning_rate": 2.570620808897404e-05,
      "loss": 0.9073,
      "step": 459910
    },
    {
      "epoch": 1.6119049371422964,
      "grad_norm": 3.140625,
      "learning_rate": 2.5705559060310343e-05,
      "loss": 0.8116,
      "step": 459920
    },
    {
      "epoch": 1.611939984649192,
      "grad_norm": 2.875,
      "learning_rate": 2.570491003164664e-05,
      "loss": 0.8557,
      "step": 459930
    },
    {
      "epoch": 1.6119750321560877,
      "grad_norm": 2.953125,
      "learning_rate": 2.570426100298294e-05,
      "loss": 0.852,
      "step": 459940
    },
    {
      "epoch": 1.612010079662983,
      "grad_norm": 3.015625,
      "learning_rate": 2.5703611974319237e-05,
      "loss": 0.7543,
      "step": 459950
    },
    {
      "epoch": 1.6120451271698788,
      "grad_norm": 3.171875,
      "learning_rate": 2.5702962945655535e-05,
      "loss": 0.8925,
      "step": 459960
    },
    {
      "epoch": 1.6120801746767743,
      "grad_norm": 3.015625,
      "learning_rate": 2.5702313916991833e-05,
      "loss": 0.7933,
      "step": 459970
    },
    {
      "epoch": 1.6121152221836699,
      "grad_norm": 2.71875,
      "learning_rate": 2.570166488832813e-05,
      "loss": 0.7789,
      "step": 459980
    },
    {
      "epoch": 1.6121502696905656,
      "grad_norm": 3.0625,
      "learning_rate": 2.570101585966443e-05,
      "loss": 0.7934,
      "step": 459990
    },
    {
      "epoch": 1.6121853171974612,
      "grad_norm": 3.765625,
      "learning_rate": 2.5700366831000727e-05,
      "loss": 0.8751,
      "step": 460000
    },
    {
      "epoch": 1.6121853171974612,
      "eval_loss": 0.7857187986373901,
      "eval_runtime": 552.3972,
      "eval_samples_per_second": 688.7,
      "eval_steps_per_second": 57.392,
      "step": 460000
    },
    {
      "epoch": 1.6122203647043567,
      "grad_norm": 2.9375,
      "learning_rate": 2.5699717802337025e-05,
      "loss": 0.9465,
      "step": 460010
    },
    {
      "epoch": 1.6122554122112525,
      "grad_norm": 2.765625,
      "learning_rate": 2.5699068773673323e-05,
      "loss": 0.7866,
      "step": 460020
    },
    {
      "epoch": 1.612290459718148,
      "grad_norm": 2.703125,
      "learning_rate": 2.569841974500962e-05,
      "loss": 0.8942,
      "step": 460030
    },
    {
      "epoch": 1.6123255072250435,
      "grad_norm": 3.015625,
      "learning_rate": 2.569777071634592e-05,
      "loss": 0.8294,
      "step": 460040
    },
    {
      "epoch": 1.6123605547319393,
      "grad_norm": 3.6875,
      "learning_rate": 2.5697121687682217e-05,
      "loss": 0.8273,
      "step": 460050
    },
    {
      "epoch": 1.6123956022388346,
      "grad_norm": 2.875,
      "learning_rate": 2.5696472659018518e-05,
      "loss": 0.9451,
      "step": 460060
    },
    {
      "epoch": 1.6124306497457304,
      "grad_norm": 3.203125,
      "learning_rate": 2.5695823630354816e-05,
      "loss": 0.8107,
      "step": 460070
    },
    {
      "epoch": 1.612465697252626,
      "grad_norm": 3.0,
      "learning_rate": 2.5695174601691107e-05,
      "loss": 0.7805,
      "step": 460080
    },
    {
      "epoch": 1.6125007447595214,
      "grad_norm": 2.921875,
      "learning_rate": 2.5694525573027405e-05,
      "loss": 0.9071,
      "step": 460090
    },
    {
      "epoch": 1.6125357922664172,
      "grad_norm": 2.59375,
      "learning_rate": 2.5693876544363703e-05,
      "loss": 0.8255,
      "step": 460100
    },
    {
      "epoch": 1.6125708397733127,
      "grad_norm": 2.578125,
      "learning_rate": 2.56932275157e-05,
      "loss": 0.8121,
      "step": 460110
    },
    {
      "epoch": 1.6126058872802083,
      "grad_norm": 2.8125,
      "learning_rate": 2.56925784870363e-05,
      "loss": 0.7755,
      "step": 460120
    },
    {
      "epoch": 1.612640934787104,
      "grad_norm": 2.890625,
      "learning_rate": 2.5691929458372597e-05,
      "loss": 0.8497,
      "step": 460130
    },
    {
      "epoch": 1.6126759822939996,
      "grad_norm": 2.921875,
      "learning_rate": 2.5691280429708895e-05,
      "loss": 0.8013,
      "step": 460140
    },
    {
      "epoch": 1.612711029800895,
      "grad_norm": 3.125,
      "learning_rate": 2.5690631401045197e-05,
      "loss": 0.8144,
      "step": 460150
    },
    {
      "epoch": 1.6127460773077908,
      "grad_norm": 2.71875,
      "learning_rate": 2.5689982372381495e-05,
      "loss": 0.7956,
      "step": 460160
    },
    {
      "epoch": 1.6127811248146862,
      "grad_norm": 2.828125,
      "learning_rate": 2.5689333343717793e-05,
      "loss": 0.8266,
      "step": 460170
    },
    {
      "epoch": 1.612816172321582,
      "grad_norm": 3.0625,
      "learning_rate": 2.568868431505409e-05,
      "loss": 0.833,
      "step": 460180
    },
    {
      "epoch": 1.6128512198284775,
      "grad_norm": 2.6875,
      "learning_rate": 2.568803528639039e-05,
      "loss": 0.8887,
      "step": 460190
    },
    {
      "epoch": 1.612886267335373,
      "grad_norm": 2.609375,
      "learning_rate": 2.5687386257726687e-05,
      "loss": 0.8844,
      "step": 460200
    },
    {
      "epoch": 1.6129213148422687,
      "grad_norm": 2.578125,
      "learning_rate": 2.5686737229062985e-05,
      "loss": 0.7667,
      "step": 460210
    },
    {
      "epoch": 1.6129563623491643,
      "grad_norm": 3.328125,
      "learning_rate": 2.5686088200399283e-05,
      "loss": 0.8928,
      "step": 460220
    },
    {
      "epoch": 1.6129914098560598,
      "grad_norm": 3.140625,
      "learning_rate": 2.568543917173558e-05,
      "loss": 0.79,
      "step": 460230
    },
    {
      "epoch": 1.6130264573629556,
      "grad_norm": 2.5,
      "learning_rate": 2.568479014307188e-05,
      "loss": 0.8554,
      "step": 460240
    },
    {
      "epoch": 1.6130615048698511,
      "grad_norm": 3.421875,
      "learning_rate": 2.5684141114408177e-05,
      "loss": 0.8874,
      "step": 460250
    },
    {
      "epoch": 1.6130965523767467,
      "grad_norm": 3.53125,
      "learning_rate": 2.5683492085744475e-05,
      "loss": 0.7258,
      "step": 460260
    },
    {
      "epoch": 1.6131315998836424,
      "grad_norm": 2.84375,
      "learning_rate": 2.5682843057080773e-05,
      "loss": 0.8362,
      "step": 460270
    },
    {
      "epoch": 1.6131666473905377,
      "grad_norm": 3.328125,
      "learning_rate": 2.5682194028417074e-05,
      "loss": 0.8782,
      "step": 460280
    },
    {
      "epoch": 1.6132016948974335,
      "grad_norm": 2.859375,
      "learning_rate": 2.5681544999753372e-05,
      "loss": 0.833,
      "step": 460290
    },
    {
      "epoch": 1.613236742404329,
      "grad_norm": 3.078125,
      "learning_rate": 2.568089597108967e-05,
      "loss": 0.891,
      "step": 460300
    },
    {
      "epoch": 1.6132717899112246,
      "grad_norm": 3.359375,
      "learning_rate": 2.5680246942425968e-05,
      "loss": 0.7542,
      "step": 460310
    },
    {
      "epoch": 1.6133068374181203,
      "grad_norm": 2.9375,
      "learning_rate": 2.5679597913762266e-05,
      "loss": 0.9086,
      "step": 460320
    },
    {
      "epoch": 1.6133418849250158,
      "grad_norm": 2.9375,
      "learning_rate": 2.5678948885098564e-05,
      "loss": 0.8497,
      "step": 460330
    },
    {
      "epoch": 1.6133769324319114,
      "grad_norm": 3.46875,
      "learning_rate": 2.5678299856434862e-05,
      "loss": 0.9711,
      "step": 460340
    },
    {
      "epoch": 1.6134119799388071,
      "grad_norm": 3.171875,
      "learning_rate": 2.567765082777116e-05,
      "loss": 0.9001,
      "step": 460350
    },
    {
      "epoch": 1.6134470274457027,
      "grad_norm": 2.796875,
      "learning_rate": 2.5677001799107458e-05,
      "loss": 0.8087,
      "step": 460360
    },
    {
      "epoch": 1.6134820749525982,
      "grad_norm": 3.234375,
      "learning_rate": 2.5676352770443756e-05,
      "loss": 0.8317,
      "step": 460370
    },
    {
      "epoch": 1.613517122459494,
      "grad_norm": 2.859375,
      "learning_rate": 2.5675703741780054e-05,
      "loss": 0.8777,
      "step": 460380
    },
    {
      "epoch": 1.6135521699663893,
      "grad_norm": 3.0625,
      "learning_rate": 2.5675054713116352e-05,
      "loss": 0.9189,
      "step": 460390
    },
    {
      "epoch": 1.613587217473285,
      "grad_norm": 2.6875,
      "learning_rate": 2.567440568445265e-05,
      "loss": 0.8088,
      "step": 460400
    },
    {
      "epoch": 1.6136222649801808,
      "grad_norm": 2.46875,
      "learning_rate": 2.5673756655788948e-05,
      "loss": 0.8188,
      "step": 460410
    },
    {
      "epoch": 1.6136573124870761,
      "grad_norm": 3.078125,
      "learning_rate": 2.567310762712525e-05,
      "loss": 0.8656,
      "step": 460420
    },
    {
      "epoch": 1.6136923599939719,
      "grad_norm": 3.25,
      "learning_rate": 2.5672458598461548e-05,
      "loss": 0.8612,
      "step": 460430
    },
    {
      "epoch": 1.6137274075008674,
      "grad_norm": 3.390625,
      "learning_rate": 2.5671809569797846e-05,
      "loss": 0.9078,
      "step": 460440
    },
    {
      "epoch": 1.613762455007763,
      "grad_norm": 3.015625,
      "learning_rate": 2.5671160541134137e-05,
      "loss": 0.8547,
      "step": 460450
    },
    {
      "epoch": 1.6137975025146587,
      "grad_norm": 2.765625,
      "learning_rate": 2.5670511512470435e-05,
      "loss": 0.8386,
      "step": 460460
    },
    {
      "epoch": 1.6138325500215542,
      "grad_norm": 3.171875,
      "learning_rate": 2.5669862483806733e-05,
      "loss": 0.799,
      "step": 460470
    },
    {
      "epoch": 1.6138675975284498,
      "grad_norm": 2.859375,
      "learning_rate": 2.566921345514303e-05,
      "loss": 0.9197,
      "step": 460480
    },
    {
      "epoch": 1.6139026450353455,
      "grad_norm": 2.75,
      "learning_rate": 2.566856442647933e-05,
      "loss": 0.8475,
      "step": 460490
    },
    {
      "epoch": 1.6139376925422408,
      "grad_norm": 2.734375,
      "learning_rate": 2.5667915397815627e-05,
      "loss": 0.8809,
      "step": 460500
    },
    {
      "epoch": 1.6139727400491366,
      "grad_norm": 2.625,
      "learning_rate": 2.5667266369151928e-05,
      "loss": 0.8731,
      "step": 460510
    },
    {
      "epoch": 1.6140077875560324,
      "grad_norm": 3.25,
      "learning_rate": 2.5666617340488226e-05,
      "loss": 0.8999,
      "step": 460520
    },
    {
      "epoch": 1.6140428350629277,
      "grad_norm": 2.71875,
      "learning_rate": 2.5665968311824524e-05,
      "loss": 0.7969,
      "step": 460530
    },
    {
      "epoch": 1.6140778825698234,
      "grad_norm": 2.78125,
      "learning_rate": 2.5665319283160822e-05,
      "loss": 0.8465,
      "step": 460540
    },
    {
      "epoch": 1.614112930076719,
      "grad_norm": 3.203125,
      "learning_rate": 2.566467025449712e-05,
      "loss": 0.8106,
      "step": 460550
    },
    {
      "epoch": 1.6141479775836145,
      "grad_norm": 3.109375,
      "learning_rate": 2.5664021225833418e-05,
      "loss": 0.8166,
      "step": 460560
    },
    {
      "epoch": 1.6141830250905103,
      "grad_norm": 2.859375,
      "learning_rate": 2.5663372197169716e-05,
      "loss": 0.9001,
      "step": 460570
    },
    {
      "epoch": 1.6142180725974058,
      "grad_norm": 3.640625,
      "learning_rate": 2.5662723168506014e-05,
      "loss": 0.8938,
      "step": 460580
    },
    {
      "epoch": 1.6142531201043013,
      "grad_norm": 3.015625,
      "learning_rate": 2.5662074139842312e-05,
      "loss": 0.8483,
      "step": 460590
    },
    {
      "epoch": 1.614288167611197,
      "grad_norm": 2.890625,
      "learning_rate": 2.566142511117861e-05,
      "loss": 0.8008,
      "step": 460600
    },
    {
      "epoch": 1.6143232151180926,
      "grad_norm": 3.390625,
      "learning_rate": 2.5660776082514908e-05,
      "loss": 0.8457,
      "step": 460610
    },
    {
      "epoch": 1.6143582626249882,
      "grad_norm": 2.671875,
      "learning_rate": 2.5660127053851206e-05,
      "loss": 0.8232,
      "step": 460620
    },
    {
      "epoch": 1.614393310131884,
      "grad_norm": 3.046875,
      "learning_rate": 2.5659478025187504e-05,
      "loss": 0.8395,
      "step": 460630
    },
    {
      "epoch": 1.6144283576387792,
      "grad_norm": 3.078125,
      "learning_rate": 2.5658828996523802e-05,
      "loss": 0.8423,
      "step": 460640
    },
    {
      "epoch": 1.614463405145675,
      "grad_norm": 2.890625,
      "learning_rate": 2.5658179967860104e-05,
      "loss": 0.823,
      "step": 460650
    },
    {
      "epoch": 1.6144984526525705,
      "grad_norm": 2.828125,
      "learning_rate": 2.56575309391964e-05,
      "loss": 0.7659,
      "step": 460660
    },
    {
      "epoch": 1.614533500159466,
      "grad_norm": 2.90625,
      "learning_rate": 2.56568819105327e-05,
      "loss": 0.8634,
      "step": 460670
    },
    {
      "epoch": 1.6145685476663618,
      "grad_norm": 2.84375,
      "learning_rate": 2.5656232881868998e-05,
      "loss": 0.8541,
      "step": 460680
    },
    {
      "epoch": 1.6146035951732574,
      "grad_norm": 2.734375,
      "learning_rate": 2.5655583853205296e-05,
      "loss": 0.8228,
      "step": 460690
    },
    {
      "epoch": 1.614638642680153,
      "grad_norm": 2.765625,
      "learning_rate": 2.5654934824541594e-05,
      "loss": 0.7598,
      "step": 460700
    },
    {
      "epoch": 1.6146736901870486,
      "grad_norm": 3.171875,
      "learning_rate": 2.565428579587789e-05,
      "loss": 0.8821,
      "step": 460710
    },
    {
      "epoch": 1.6147087376939442,
      "grad_norm": 2.890625,
      "learning_rate": 2.565363676721419e-05,
      "loss": 0.9123,
      "step": 460720
    },
    {
      "epoch": 1.6147437852008397,
      "grad_norm": 2.859375,
      "learning_rate": 2.5652987738550488e-05,
      "loss": 0.8027,
      "step": 460730
    },
    {
      "epoch": 1.6147788327077355,
      "grad_norm": 2.78125,
      "learning_rate": 2.5652338709886786e-05,
      "loss": 0.7735,
      "step": 460740
    },
    {
      "epoch": 1.6148138802146308,
      "grad_norm": 2.953125,
      "learning_rate": 2.5651689681223084e-05,
      "loss": 0.879,
      "step": 460750
    },
    {
      "epoch": 1.6148489277215266,
      "grad_norm": 2.75,
      "learning_rate": 2.565104065255938e-05,
      "loss": 0.7959,
      "step": 460760
    },
    {
      "epoch": 1.614883975228422,
      "grad_norm": 3.15625,
      "learning_rate": 2.565039162389568e-05,
      "loss": 0.822,
      "step": 460770
    },
    {
      "epoch": 1.6149190227353176,
      "grad_norm": 3.28125,
      "learning_rate": 2.5649742595231978e-05,
      "loss": 0.814,
      "step": 460780
    },
    {
      "epoch": 1.6149540702422134,
      "grad_norm": 2.609375,
      "learning_rate": 2.564909356656828e-05,
      "loss": 0.9048,
      "step": 460790
    },
    {
      "epoch": 1.614989117749109,
      "grad_norm": 2.828125,
      "learning_rate": 2.5648444537904577e-05,
      "loss": 0.7791,
      "step": 460800
    },
    {
      "epoch": 1.6150241652560045,
      "grad_norm": 2.75,
      "learning_rate": 2.5647795509240875e-05,
      "loss": 0.8356,
      "step": 460810
    },
    {
      "epoch": 1.6150592127629002,
      "grad_norm": 3.140625,
      "learning_rate": 2.5647146480577173e-05,
      "loss": 0.8076,
      "step": 460820
    },
    {
      "epoch": 1.6150942602697957,
      "grad_norm": 2.28125,
      "learning_rate": 2.5646497451913464e-05,
      "loss": 0.786,
      "step": 460830
    },
    {
      "epoch": 1.6151293077766913,
      "grad_norm": 2.875,
      "learning_rate": 2.5645848423249762e-05,
      "loss": 0.79,
      "step": 460840
    },
    {
      "epoch": 1.615164355283587,
      "grad_norm": 2.8125,
      "learning_rate": 2.564519939458606e-05,
      "loss": 0.8373,
      "step": 460850
    },
    {
      "epoch": 1.6151994027904824,
      "grad_norm": 3.5625,
      "learning_rate": 2.5644550365922358e-05,
      "loss": 0.8441,
      "step": 460860
    },
    {
      "epoch": 1.615234450297378,
      "grad_norm": 2.859375,
      "learning_rate": 2.5643901337258656e-05,
      "loss": 0.8564,
      "step": 460870
    },
    {
      "epoch": 1.6152694978042736,
      "grad_norm": 3.21875,
      "learning_rate": 2.5643252308594958e-05,
      "loss": 0.9098,
      "step": 460880
    },
    {
      "epoch": 1.6153045453111692,
      "grad_norm": 2.71875,
      "learning_rate": 2.5642603279931256e-05,
      "loss": 0.7898,
      "step": 460890
    },
    {
      "epoch": 1.615339592818065,
      "grad_norm": 2.75,
      "learning_rate": 2.5641954251267554e-05,
      "loss": 0.8719,
      "step": 460900
    },
    {
      "epoch": 1.6153746403249605,
      "grad_norm": 2.859375,
      "learning_rate": 2.564130522260385e-05,
      "loss": 0.7764,
      "step": 460910
    },
    {
      "epoch": 1.615409687831856,
      "grad_norm": 2.828125,
      "learning_rate": 2.564065619394015e-05,
      "loss": 0.8535,
      "step": 460920
    },
    {
      "epoch": 1.6154447353387518,
      "grad_norm": 3.046875,
      "learning_rate": 2.5640007165276448e-05,
      "loss": 0.7758,
      "step": 460930
    },
    {
      "epoch": 1.6154797828456473,
      "grad_norm": 3.046875,
      "learning_rate": 2.5639358136612746e-05,
      "loss": 0.9557,
      "step": 460940
    },
    {
      "epoch": 1.6155148303525428,
      "grad_norm": 2.578125,
      "learning_rate": 2.5638709107949044e-05,
      "loss": 0.8456,
      "step": 460950
    },
    {
      "epoch": 1.6155498778594386,
      "grad_norm": 2.75,
      "learning_rate": 2.563806007928534e-05,
      "loss": 0.7693,
      "step": 460960
    },
    {
      "epoch": 1.615584925366334,
      "grad_norm": 2.75,
      "learning_rate": 2.563741105062164e-05,
      "loss": 0.8008,
      "step": 460970
    },
    {
      "epoch": 1.6156199728732297,
      "grad_norm": 2.796875,
      "learning_rate": 2.5636762021957938e-05,
      "loss": 0.8983,
      "step": 460980
    },
    {
      "epoch": 1.6156550203801252,
      "grad_norm": 3.265625,
      "learning_rate": 2.5636112993294236e-05,
      "loss": 0.811,
      "step": 460990
    },
    {
      "epoch": 1.6156900678870207,
      "grad_norm": 2.96875,
      "learning_rate": 2.5635463964630534e-05,
      "loss": 0.8302,
      "step": 461000
    },
    {
      "epoch": 1.6157251153939165,
      "grad_norm": 2.578125,
      "learning_rate": 2.563481493596683e-05,
      "loss": 0.8971,
      "step": 461010
    },
    {
      "epoch": 1.615760162900812,
      "grad_norm": 2.734375,
      "learning_rate": 2.5634165907303133e-05,
      "loss": 0.7553,
      "step": 461020
    },
    {
      "epoch": 1.6157952104077076,
      "grad_norm": 2.8125,
      "learning_rate": 2.563351687863943e-05,
      "loss": 0.7959,
      "step": 461030
    },
    {
      "epoch": 1.6158302579146033,
      "grad_norm": 2.71875,
      "learning_rate": 2.563286784997573e-05,
      "loss": 0.8426,
      "step": 461040
    },
    {
      "epoch": 1.6158653054214989,
      "grad_norm": 2.84375,
      "learning_rate": 2.5632218821312027e-05,
      "loss": 0.8425,
      "step": 461050
    },
    {
      "epoch": 1.6159003529283944,
      "grad_norm": 2.9375,
      "learning_rate": 2.5631569792648325e-05,
      "loss": 0.8726,
      "step": 461060
    },
    {
      "epoch": 1.6159354004352902,
      "grad_norm": 2.984375,
      "learning_rate": 2.5630920763984623e-05,
      "loss": 0.853,
      "step": 461070
    },
    {
      "epoch": 1.6159704479421855,
      "grad_norm": 2.75,
      "learning_rate": 2.563027173532092e-05,
      "loss": 0.8588,
      "step": 461080
    },
    {
      "epoch": 1.6160054954490812,
      "grad_norm": 2.796875,
      "learning_rate": 2.562962270665722e-05,
      "loss": 0.8587,
      "step": 461090
    },
    {
      "epoch": 1.616040542955977,
      "grad_norm": 3.03125,
      "learning_rate": 2.5628973677993517e-05,
      "loss": 0.7904,
      "step": 461100
    },
    {
      "epoch": 1.6160755904628723,
      "grad_norm": 2.953125,
      "learning_rate": 2.5628324649329815e-05,
      "loss": 0.8372,
      "step": 461110
    },
    {
      "epoch": 1.616110637969768,
      "grad_norm": 2.75,
      "learning_rate": 2.5627675620666113e-05,
      "loss": 0.7806,
      "step": 461120
    },
    {
      "epoch": 1.6161456854766636,
      "grad_norm": 2.5625,
      "learning_rate": 2.562702659200241e-05,
      "loss": 0.843,
      "step": 461130
    },
    {
      "epoch": 1.6161807329835591,
      "grad_norm": 3.203125,
      "learning_rate": 2.562637756333871e-05,
      "loss": 0.9167,
      "step": 461140
    },
    {
      "epoch": 1.616215780490455,
      "grad_norm": 2.765625,
      "learning_rate": 2.5625728534675007e-05,
      "loss": 0.8742,
      "step": 461150
    },
    {
      "epoch": 1.6162508279973504,
      "grad_norm": 3.03125,
      "learning_rate": 2.562507950601131e-05,
      "loss": 0.7738,
      "step": 461160
    },
    {
      "epoch": 1.616285875504246,
      "grad_norm": 3.015625,
      "learning_rate": 2.5624430477347606e-05,
      "loss": 0.8364,
      "step": 461170
    },
    {
      "epoch": 1.6163209230111417,
      "grad_norm": 2.859375,
      "learning_rate": 2.5623781448683904e-05,
      "loss": 0.8568,
      "step": 461180
    },
    {
      "epoch": 1.616355970518037,
      "grad_norm": 3.265625,
      "learning_rate": 2.5623132420020202e-05,
      "loss": 0.8822,
      "step": 461190
    },
    {
      "epoch": 1.6163910180249328,
      "grad_norm": 2.703125,
      "learning_rate": 2.5622483391356494e-05,
      "loss": 0.8427,
      "step": 461200
    },
    {
      "epoch": 1.6164260655318285,
      "grad_norm": 2.90625,
      "learning_rate": 2.562183436269279e-05,
      "loss": 0.8249,
      "step": 461210
    },
    {
      "epoch": 1.6164611130387239,
      "grad_norm": 3.03125,
      "learning_rate": 2.562118533402909e-05,
      "loss": 0.8129,
      "step": 461220
    },
    {
      "epoch": 1.6164961605456196,
      "grad_norm": 2.828125,
      "learning_rate": 2.5620536305365388e-05,
      "loss": 0.7581,
      "step": 461230
    },
    {
      "epoch": 1.6165312080525152,
      "grad_norm": 2.671875,
      "learning_rate": 2.561988727670169e-05,
      "loss": 0.8933,
      "step": 461240
    },
    {
      "epoch": 1.6165662555594107,
      "grad_norm": 3.296875,
      "learning_rate": 2.5619238248037987e-05,
      "loss": 0.7803,
      "step": 461250
    },
    {
      "epoch": 1.6166013030663064,
      "grad_norm": 2.515625,
      "learning_rate": 2.5618589219374285e-05,
      "loss": 0.7958,
      "step": 461260
    },
    {
      "epoch": 1.616636350573202,
      "grad_norm": 2.75,
      "learning_rate": 2.5617940190710583e-05,
      "loss": 0.7664,
      "step": 461270
    },
    {
      "epoch": 1.6166713980800975,
      "grad_norm": 2.953125,
      "learning_rate": 2.561729116204688e-05,
      "loss": 0.8095,
      "step": 461280
    },
    {
      "epoch": 1.6167064455869933,
      "grad_norm": 2.796875,
      "learning_rate": 2.561664213338318e-05,
      "loss": 0.8356,
      "step": 461290
    },
    {
      "epoch": 1.6167414930938888,
      "grad_norm": 3.71875,
      "learning_rate": 2.5615993104719477e-05,
      "loss": 0.794,
      "step": 461300
    },
    {
      "epoch": 1.6167765406007844,
      "grad_norm": 2.875,
      "learning_rate": 2.5615344076055775e-05,
      "loss": 0.8397,
      "step": 461310
    },
    {
      "epoch": 1.61681158810768,
      "grad_norm": 2.765625,
      "learning_rate": 2.5614695047392073e-05,
      "loss": 0.7884,
      "step": 461320
    },
    {
      "epoch": 1.6168466356145754,
      "grad_norm": 2.9375,
      "learning_rate": 2.561404601872837e-05,
      "loss": 0.7985,
      "step": 461330
    },
    {
      "epoch": 1.6168816831214712,
      "grad_norm": 3.53125,
      "learning_rate": 2.561339699006467e-05,
      "loss": 0.8179,
      "step": 461340
    },
    {
      "epoch": 1.6169167306283667,
      "grad_norm": 2.734375,
      "learning_rate": 2.5612747961400967e-05,
      "loss": 0.8181,
      "step": 461350
    },
    {
      "epoch": 1.6169517781352623,
      "grad_norm": 3.21875,
      "learning_rate": 2.5612098932737265e-05,
      "loss": 0.9505,
      "step": 461360
    },
    {
      "epoch": 1.616986825642158,
      "grad_norm": 2.640625,
      "learning_rate": 2.5611449904073563e-05,
      "loss": 0.8118,
      "step": 461370
    },
    {
      "epoch": 1.6170218731490535,
      "grad_norm": 2.640625,
      "learning_rate": 2.5610800875409864e-05,
      "loss": 0.8776,
      "step": 461380
    },
    {
      "epoch": 1.617056920655949,
      "grad_norm": 3.15625,
      "learning_rate": 2.5610151846746162e-05,
      "loss": 0.825,
      "step": 461390
    },
    {
      "epoch": 1.6170919681628448,
      "grad_norm": 3.203125,
      "learning_rate": 2.560950281808246e-05,
      "loss": 0.8733,
      "step": 461400
    },
    {
      "epoch": 1.6171270156697404,
      "grad_norm": 3.296875,
      "learning_rate": 2.560885378941876e-05,
      "loss": 0.7861,
      "step": 461410
    },
    {
      "epoch": 1.617162063176636,
      "grad_norm": 3.34375,
      "learning_rate": 2.5608204760755056e-05,
      "loss": 0.7727,
      "step": 461420
    },
    {
      "epoch": 1.6171971106835317,
      "grad_norm": 3.375,
      "learning_rate": 2.5607555732091354e-05,
      "loss": 0.8243,
      "step": 461430
    },
    {
      "epoch": 1.617232158190427,
      "grad_norm": 2.875,
      "learning_rate": 2.5606906703427652e-05,
      "loss": 0.8135,
      "step": 461440
    },
    {
      "epoch": 1.6172672056973227,
      "grad_norm": 2.515625,
      "learning_rate": 2.560625767476395e-05,
      "loss": 0.8797,
      "step": 461450
    },
    {
      "epoch": 1.6173022532042183,
      "grad_norm": 2.953125,
      "learning_rate": 2.560560864610025e-05,
      "loss": 0.87,
      "step": 461460
    },
    {
      "epoch": 1.6173373007111138,
      "grad_norm": 2.65625,
      "learning_rate": 2.5604959617436546e-05,
      "loss": 0.8558,
      "step": 461470
    },
    {
      "epoch": 1.6173723482180096,
      "grad_norm": 3.21875,
      "learning_rate": 2.5604310588772844e-05,
      "loss": 0.7779,
      "step": 461480
    },
    {
      "epoch": 1.617407395724905,
      "grad_norm": 2.703125,
      "learning_rate": 2.5603661560109142e-05,
      "loss": 0.8415,
      "step": 461490
    },
    {
      "epoch": 1.6174424432318006,
      "grad_norm": 2.609375,
      "learning_rate": 2.560301253144544e-05,
      "loss": 0.8417,
      "step": 461500
    },
    {
      "epoch": 1.6174774907386964,
      "grad_norm": 2.9375,
      "learning_rate": 2.560236350278174e-05,
      "loss": 0.8512,
      "step": 461510
    },
    {
      "epoch": 1.617512538245592,
      "grad_norm": 3.0,
      "learning_rate": 2.560171447411804e-05,
      "loss": 0.8574,
      "step": 461520
    },
    {
      "epoch": 1.6175475857524875,
      "grad_norm": 3.453125,
      "learning_rate": 2.5601065445454338e-05,
      "loss": 0.9431,
      "step": 461530
    },
    {
      "epoch": 1.6175826332593832,
      "grad_norm": 2.984375,
      "learning_rate": 2.5600416416790636e-05,
      "loss": 0.7768,
      "step": 461540
    },
    {
      "epoch": 1.6176176807662785,
      "grad_norm": 2.78125,
      "learning_rate": 2.5599767388126934e-05,
      "loss": 0.8399,
      "step": 461550
    },
    {
      "epoch": 1.6176527282731743,
      "grad_norm": 2.984375,
      "learning_rate": 2.5599118359463232e-05,
      "loss": 0.8987,
      "step": 461560
    },
    {
      "epoch": 1.6176877757800698,
      "grad_norm": 2.78125,
      "learning_rate": 2.559846933079953e-05,
      "loss": 0.8506,
      "step": 461570
    },
    {
      "epoch": 1.6177228232869654,
      "grad_norm": 3.203125,
      "learning_rate": 2.559782030213582e-05,
      "loss": 0.8784,
      "step": 461580
    },
    {
      "epoch": 1.6177578707938611,
      "grad_norm": 2.6875,
      "learning_rate": 2.559717127347212e-05,
      "loss": 0.8696,
      "step": 461590
    },
    {
      "epoch": 1.6177929183007567,
      "grad_norm": 2.515625,
      "learning_rate": 2.5596522244808417e-05,
      "loss": 0.8915,
      "step": 461600
    },
    {
      "epoch": 1.6178279658076522,
      "grad_norm": 2.859375,
      "learning_rate": 2.559587321614472e-05,
      "loss": 0.8196,
      "step": 461610
    },
    {
      "epoch": 1.617863013314548,
      "grad_norm": 3.109375,
      "learning_rate": 2.5595224187481016e-05,
      "loss": 0.8514,
      "step": 461620
    },
    {
      "epoch": 1.6178980608214435,
      "grad_norm": 2.9375,
      "learning_rate": 2.5594575158817314e-05,
      "loss": 0.8082,
      "step": 461630
    },
    {
      "epoch": 1.617933108328339,
      "grad_norm": 2.6875,
      "learning_rate": 2.5593926130153612e-05,
      "loss": 0.7853,
      "step": 461640
    },
    {
      "epoch": 1.6179681558352348,
      "grad_norm": 2.84375,
      "learning_rate": 2.559327710148991e-05,
      "loss": 0.8949,
      "step": 461650
    },
    {
      "epoch": 1.61800320334213,
      "grad_norm": 3.15625,
      "learning_rate": 2.559262807282621e-05,
      "loss": 0.8604,
      "step": 461660
    },
    {
      "epoch": 1.6180382508490259,
      "grad_norm": 2.734375,
      "learning_rate": 2.5591979044162506e-05,
      "loss": 0.8287,
      "step": 461670
    },
    {
      "epoch": 1.6180732983559214,
      "grad_norm": 3.109375,
      "learning_rate": 2.5591330015498804e-05,
      "loss": 0.8332,
      "step": 461680
    },
    {
      "epoch": 1.618108345862817,
      "grad_norm": 2.921875,
      "learning_rate": 2.5590680986835102e-05,
      "loss": 0.8644,
      "step": 461690
    },
    {
      "epoch": 1.6181433933697127,
      "grad_norm": 2.9375,
      "learning_rate": 2.55900319581714e-05,
      "loss": 0.8487,
      "step": 461700
    },
    {
      "epoch": 1.6181784408766082,
      "grad_norm": 2.875,
      "learning_rate": 2.55893829295077e-05,
      "loss": 0.8655,
      "step": 461710
    },
    {
      "epoch": 1.6182134883835038,
      "grad_norm": 2.953125,
      "learning_rate": 2.5588733900843996e-05,
      "loss": 0.8453,
      "step": 461720
    },
    {
      "epoch": 1.6182485358903995,
      "grad_norm": 3.109375,
      "learning_rate": 2.5588084872180294e-05,
      "loss": 0.8191,
      "step": 461730
    },
    {
      "epoch": 1.618283583397295,
      "grad_norm": 2.625,
      "learning_rate": 2.5587435843516592e-05,
      "loss": 0.8262,
      "step": 461740
    },
    {
      "epoch": 1.6183186309041906,
      "grad_norm": 2.859375,
      "learning_rate": 2.5586786814852894e-05,
      "loss": 0.8221,
      "step": 461750
    },
    {
      "epoch": 1.6183536784110863,
      "grad_norm": 3.09375,
      "learning_rate": 2.5586137786189192e-05,
      "loss": 0.8921,
      "step": 461760
    },
    {
      "epoch": 1.6183887259179817,
      "grad_norm": 3.390625,
      "learning_rate": 2.558548875752549e-05,
      "loss": 0.7727,
      "step": 461770
    },
    {
      "epoch": 1.6184237734248774,
      "grad_norm": 2.828125,
      "learning_rate": 2.5584839728861788e-05,
      "loss": 0.8721,
      "step": 461780
    },
    {
      "epoch": 1.6184588209317732,
      "grad_norm": 3.140625,
      "learning_rate": 2.5584190700198086e-05,
      "loss": 0.7641,
      "step": 461790
    },
    {
      "epoch": 1.6184938684386685,
      "grad_norm": 2.84375,
      "learning_rate": 2.5583541671534384e-05,
      "loss": 0.8503,
      "step": 461800
    },
    {
      "epoch": 1.6185289159455643,
      "grad_norm": 3.0,
      "learning_rate": 2.5582892642870682e-05,
      "loss": 0.8062,
      "step": 461810
    },
    {
      "epoch": 1.6185639634524598,
      "grad_norm": 3.234375,
      "learning_rate": 2.558224361420698e-05,
      "loss": 0.7961,
      "step": 461820
    },
    {
      "epoch": 1.6185990109593553,
      "grad_norm": 2.71875,
      "learning_rate": 2.5581594585543278e-05,
      "loss": 0.8802,
      "step": 461830
    },
    {
      "epoch": 1.618634058466251,
      "grad_norm": 3.015625,
      "learning_rate": 2.5580945556879576e-05,
      "loss": 0.8328,
      "step": 461840
    },
    {
      "epoch": 1.6186691059731466,
      "grad_norm": 2.5,
      "learning_rate": 2.5580296528215874e-05,
      "loss": 0.8096,
      "step": 461850
    },
    {
      "epoch": 1.6187041534800422,
      "grad_norm": 2.484375,
      "learning_rate": 2.5579647499552172e-05,
      "loss": 0.8109,
      "step": 461860
    },
    {
      "epoch": 1.618739200986938,
      "grad_norm": 3.359375,
      "learning_rate": 2.557899847088847e-05,
      "loss": 0.7862,
      "step": 461870
    },
    {
      "epoch": 1.6187742484938334,
      "grad_norm": 3.1875,
      "learning_rate": 2.5578349442224768e-05,
      "loss": 0.8773,
      "step": 461880
    },
    {
      "epoch": 1.618809296000729,
      "grad_norm": 2.703125,
      "learning_rate": 2.557770041356107e-05,
      "loss": 0.8975,
      "step": 461890
    },
    {
      "epoch": 1.6188443435076247,
      "grad_norm": 2.53125,
      "learning_rate": 2.5577051384897367e-05,
      "loss": 0.7759,
      "step": 461900
    },
    {
      "epoch": 1.61887939101452,
      "grad_norm": 3.03125,
      "learning_rate": 2.5576402356233665e-05,
      "loss": 0.8702,
      "step": 461910
    },
    {
      "epoch": 1.6189144385214158,
      "grad_norm": 3.265625,
      "learning_rate": 2.5575753327569963e-05,
      "loss": 0.8305,
      "step": 461920
    },
    {
      "epoch": 1.6189494860283113,
      "grad_norm": 3.03125,
      "learning_rate": 2.557510429890626e-05,
      "loss": 0.8266,
      "step": 461930
    },
    {
      "epoch": 1.6189845335352069,
      "grad_norm": 2.859375,
      "learning_rate": 2.557445527024256e-05,
      "loss": 0.8462,
      "step": 461940
    },
    {
      "epoch": 1.6190195810421026,
      "grad_norm": 3.03125,
      "learning_rate": 2.5573806241578857e-05,
      "loss": 0.8617,
      "step": 461950
    },
    {
      "epoch": 1.6190546285489982,
      "grad_norm": 2.734375,
      "learning_rate": 2.557315721291515e-05,
      "loss": 0.8401,
      "step": 461960
    },
    {
      "epoch": 1.6190896760558937,
      "grad_norm": 2.953125,
      "learning_rate": 2.5572508184251446e-05,
      "loss": 0.8133,
      "step": 461970
    },
    {
      "epoch": 1.6191247235627895,
      "grad_norm": 2.890625,
      "learning_rate": 2.5571859155587748e-05,
      "loss": 0.8772,
      "step": 461980
    },
    {
      "epoch": 1.619159771069685,
      "grad_norm": 2.96875,
      "learning_rate": 2.5571210126924046e-05,
      "loss": 0.8355,
      "step": 461990
    },
    {
      "epoch": 1.6191948185765805,
      "grad_norm": 2.890625,
      "learning_rate": 2.5570561098260344e-05,
      "loss": 0.8918,
      "step": 462000
    },
    {
      "epoch": 1.6192298660834763,
      "grad_norm": 2.84375,
      "learning_rate": 2.5569912069596642e-05,
      "loss": 0.8643,
      "step": 462010
    },
    {
      "epoch": 1.6192649135903716,
      "grad_norm": 3.125,
      "learning_rate": 2.556926304093294e-05,
      "loss": 0.8887,
      "step": 462020
    },
    {
      "epoch": 1.6192999610972674,
      "grad_norm": 2.75,
      "learning_rate": 2.5568614012269238e-05,
      "loss": 0.8895,
      "step": 462030
    },
    {
      "epoch": 1.619335008604163,
      "grad_norm": 2.9375,
      "learning_rate": 2.5567964983605536e-05,
      "loss": 0.7343,
      "step": 462040
    },
    {
      "epoch": 1.6193700561110584,
      "grad_norm": 2.671875,
      "learning_rate": 2.5567315954941834e-05,
      "loss": 0.7975,
      "step": 462050
    },
    {
      "epoch": 1.6194051036179542,
      "grad_norm": 2.765625,
      "learning_rate": 2.5566666926278132e-05,
      "loss": 0.8581,
      "step": 462060
    },
    {
      "epoch": 1.6194401511248497,
      "grad_norm": 3.28125,
      "learning_rate": 2.556601789761443e-05,
      "loss": 0.9038,
      "step": 462070
    },
    {
      "epoch": 1.6194751986317453,
      "grad_norm": 3.265625,
      "learning_rate": 2.5565368868950728e-05,
      "loss": 0.8222,
      "step": 462080
    },
    {
      "epoch": 1.619510246138641,
      "grad_norm": 2.6875,
      "learning_rate": 2.5564719840287026e-05,
      "loss": 0.8103,
      "step": 462090
    },
    {
      "epoch": 1.6195452936455366,
      "grad_norm": 2.703125,
      "learning_rate": 2.5564070811623324e-05,
      "loss": 0.87,
      "step": 462100
    },
    {
      "epoch": 1.619580341152432,
      "grad_norm": 2.703125,
      "learning_rate": 2.5563421782959622e-05,
      "loss": 0.8123,
      "step": 462110
    },
    {
      "epoch": 1.6196153886593279,
      "grad_norm": 2.953125,
      "learning_rate": 2.5562772754295923e-05,
      "loss": 0.7632,
      "step": 462120
    },
    {
      "epoch": 1.6196504361662232,
      "grad_norm": 2.71875,
      "learning_rate": 2.556212372563222e-05,
      "loss": 0.93,
      "step": 462130
    },
    {
      "epoch": 1.619685483673119,
      "grad_norm": 3.25,
      "learning_rate": 2.556147469696852e-05,
      "loss": 0.8307,
      "step": 462140
    },
    {
      "epoch": 1.6197205311800145,
      "grad_norm": 3.046875,
      "learning_rate": 2.5560825668304817e-05,
      "loss": 0.8038,
      "step": 462150
    },
    {
      "epoch": 1.61975557868691,
      "grad_norm": 2.90625,
      "learning_rate": 2.5560176639641115e-05,
      "loss": 0.8682,
      "step": 462160
    },
    {
      "epoch": 1.6197906261938058,
      "grad_norm": 2.984375,
      "learning_rate": 2.5559527610977413e-05,
      "loss": 0.7773,
      "step": 462170
    },
    {
      "epoch": 1.6198256737007013,
      "grad_norm": 2.5,
      "learning_rate": 2.555887858231371e-05,
      "loss": 0.8787,
      "step": 462180
    },
    {
      "epoch": 1.6198607212075968,
      "grad_norm": 3.015625,
      "learning_rate": 2.555822955365001e-05,
      "loss": 0.8113,
      "step": 462190
    },
    {
      "epoch": 1.6198957687144926,
      "grad_norm": 2.796875,
      "learning_rate": 2.5557580524986307e-05,
      "loss": 0.8162,
      "step": 462200
    },
    {
      "epoch": 1.6199308162213881,
      "grad_norm": 2.984375,
      "learning_rate": 2.5556931496322605e-05,
      "loss": 0.7124,
      "step": 462210
    },
    {
      "epoch": 1.6199658637282837,
      "grad_norm": 3.125,
      "learning_rate": 2.5556282467658903e-05,
      "loss": 0.8465,
      "step": 462220
    },
    {
      "epoch": 1.6200009112351794,
      "grad_norm": 2.765625,
      "learning_rate": 2.55556334389952e-05,
      "loss": 0.8943,
      "step": 462230
    },
    {
      "epoch": 1.6200359587420747,
      "grad_norm": 2.640625,
      "learning_rate": 2.55549844103315e-05,
      "loss": 0.8478,
      "step": 462240
    },
    {
      "epoch": 1.6200710062489705,
      "grad_norm": 2.796875,
      "learning_rate": 2.5554335381667797e-05,
      "loss": 0.8178,
      "step": 462250
    },
    {
      "epoch": 1.620106053755866,
      "grad_norm": 2.953125,
      "learning_rate": 2.55536863530041e-05,
      "loss": 0.8197,
      "step": 462260
    },
    {
      "epoch": 1.6201411012627616,
      "grad_norm": 2.9375,
      "learning_rate": 2.5553037324340397e-05,
      "loss": 0.8072,
      "step": 462270
    },
    {
      "epoch": 1.6201761487696573,
      "grad_norm": 4.09375,
      "learning_rate": 2.5552388295676695e-05,
      "loss": 0.7873,
      "step": 462280
    },
    {
      "epoch": 1.6202111962765529,
      "grad_norm": 2.953125,
      "learning_rate": 2.5551739267012993e-05,
      "loss": 0.77,
      "step": 462290
    },
    {
      "epoch": 1.6202462437834484,
      "grad_norm": 2.828125,
      "learning_rate": 2.555109023834929e-05,
      "loss": 0.755,
      "step": 462300
    },
    {
      "epoch": 1.6202812912903442,
      "grad_norm": 2.53125,
      "learning_rate": 2.555044120968559e-05,
      "loss": 0.8284,
      "step": 462310
    },
    {
      "epoch": 1.6203163387972397,
      "grad_norm": 2.78125,
      "learning_rate": 2.5549792181021887e-05,
      "loss": 0.8689,
      "step": 462320
    },
    {
      "epoch": 1.6203513863041352,
      "grad_norm": 3.015625,
      "learning_rate": 2.5549143152358178e-05,
      "loss": 0.7559,
      "step": 462330
    },
    {
      "epoch": 1.620386433811031,
      "grad_norm": 2.9375,
      "learning_rate": 2.554849412369448e-05,
      "loss": 0.8191,
      "step": 462340
    },
    {
      "epoch": 1.6204214813179263,
      "grad_norm": 2.65625,
      "learning_rate": 2.5547845095030777e-05,
      "loss": 0.8462,
      "step": 462350
    },
    {
      "epoch": 1.620456528824822,
      "grad_norm": 5.5,
      "learning_rate": 2.5547196066367075e-05,
      "loss": 0.7679,
      "step": 462360
    },
    {
      "epoch": 1.6204915763317178,
      "grad_norm": 2.8125,
      "learning_rate": 2.5546547037703373e-05,
      "loss": 0.8133,
      "step": 462370
    },
    {
      "epoch": 1.6205266238386131,
      "grad_norm": 2.921875,
      "learning_rate": 2.554589800903967e-05,
      "loss": 0.8748,
      "step": 462380
    },
    {
      "epoch": 1.6205616713455089,
      "grad_norm": 3.609375,
      "learning_rate": 2.554524898037597e-05,
      "loss": 0.9715,
      "step": 462390
    },
    {
      "epoch": 1.6205967188524044,
      "grad_norm": 2.78125,
      "learning_rate": 2.5544599951712267e-05,
      "loss": 0.8674,
      "step": 462400
    },
    {
      "epoch": 1.6206317663593,
      "grad_norm": 2.984375,
      "learning_rate": 2.5543950923048565e-05,
      "loss": 0.9013,
      "step": 462410
    },
    {
      "epoch": 1.6206668138661957,
      "grad_norm": 3.21875,
      "learning_rate": 2.5543301894384863e-05,
      "loss": 0.78,
      "step": 462420
    },
    {
      "epoch": 1.6207018613730912,
      "grad_norm": 2.65625,
      "learning_rate": 2.554265286572116e-05,
      "loss": 0.8939,
      "step": 462430
    },
    {
      "epoch": 1.6207369088799868,
      "grad_norm": 2.5,
      "learning_rate": 2.554200383705746e-05,
      "loss": 0.8235,
      "step": 462440
    },
    {
      "epoch": 1.6207719563868825,
      "grad_norm": 3.015625,
      "learning_rate": 2.5541354808393757e-05,
      "loss": 0.8079,
      "step": 462450
    },
    {
      "epoch": 1.6208070038937779,
      "grad_norm": 2.90625,
      "learning_rate": 2.5540705779730055e-05,
      "loss": 0.8209,
      "step": 462460
    },
    {
      "epoch": 1.6208420514006736,
      "grad_norm": 2.75,
      "learning_rate": 2.5540056751066353e-05,
      "loss": 0.7606,
      "step": 462470
    },
    {
      "epoch": 1.6208770989075694,
      "grad_norm": 2.828125,
      "learning_rate": 2.5539407722402655e-05,
      "loss": 0.8173,
      "step": 462480
    },
    {
      "epoch": 1.6209121464144647,
      "grad_norm": 2.875,
      "learning_rate": 2.5538758693738953e-05,
      "loss": 0.7949,
      "step": 462490
    },
    {
      "epoch": 1.6209471939213604,
      "grad_norm": 2.75,
      "learning_rate": 2.553810966507525e-05,
      "loss": 0.8153,
      "step": 462500
    },
    {
      "epoch": 1.620982241428256,
      "grad_norm": 2.703125,
      "learning_rate": 2.553746063641155e-05,
      "loss": 0.8208,
      "step": 462510
    },
    {
      "epoch": 1.6210172889351515,
      "grad_norm": 3.296875,
      "learning_rate": 2.5536811607747847e-05,
      "loss": 0.7865,
      "step": 462520
    },
    {
      "epoch": 1.6210523364420473,
      "grad_norm": 3.265625,
      "learning_rate": 2.5536162579084145e-05,
      "loss": 0.8755,
      "step": 462530
    },
    {
      "epoch": 1.6210873839489428,
      "grad_norm": 3.421875,
      "learning_rate": 2.5535513550420443e-05,
      "loss": 0.8579,
      "step": 462540
    },
    {
      "epoch": 1.6211224314558383,
      "grad_norm": 3.265625,
      "learning_rate": 2.553486452175674e-05,
      "loss": 0.8628,
      "step": 462550
    },
    {
      "epoch": 1.621157478962734,
      "grad_norm": 2.828125,
      "learning_rate": 2.553421549309304e-05,
      "loss": 0.8965,
      "step": 462560
    },
    {
      "epoch": 1.6211925264696296,
      "grad_norm": 2.484375,
      "learning_rate": 2.5533566464429337e-05,
      "loss": 0.7012,
      "step": 462570
    },
    {
      "epoch": 1.6212275739765252,
      "grad_norm": 2.328125,
      "learning_rate": 2.5532917435765635e-05,
      "loss": 0.7851,
      "step": 462580
    },
    {
      "epoch": 1.621262621483421,
      "grad_norm": 3.078125,
      "learning_rate": 2.5532268407101933e-05,
      "loss": 0.8128,
      "step": 462590
    },
    {
      "epoch": 1.6212976689903162,
      "grad_norm": 2.875,
      "learning_rate": 2.553161937843823e-05,
      "loss": 0.8662,
      "step": 462600
    },
    {
      "epoch": 1.621332716497212,
      "grad_norm": 2.734375,
      "learning_rate": 2.553097034977453e-05,
      "loss": 0.8038,
      "step": 462610
    },
    {
      "epoch": 1.6213677640041075,
      "grad_norm": 3.0,
      "learning_rate": 2.553032132111083e-05,
      "loss": 0.8239,
      "step": 462620
    },
    {
      "epoch": 1.621402811511003,
      "grad_norm": 2.734375,
      "learning_rate": 2.5529672292447128e-05,
      "loss": 0.8809,
      "step": 462630
    },
    {
      "epoch": 1.6214378590178988,
      "grad_norm": 2.59375,
      "learning_rate": 2.5529023263783426e-05,
      "loss": 0.8397,
      "step": 462640
    },
    {
      "epoch": 1.6214729065247944,
      "grad_norm": 2.703125,
      "learning_rate": 2.5528374235119724e-05,
      "loss": 0.8498,
      "step": 462650
    },
    {
      "epoch": 1.62150795403169,
      "grad_norm": 2.21875,
      "learning_rate": 2.5527725206456022e-05,
      "loss": 0.8024,
      "step": 462660
    },
    {
      "epoch": 1.6215430015385857,
      "grad_norm": 3.40625,
      "learning_rate": 2.552707617779232e-05,
      "loss": 0.8834,
      "step": 462670
    },
    {
      "epoch": 1.6215780490454812,
      "grad_norm": 2.5,
      "learning_rate": 2.5526427149128618e-05,
      "loss": 0.8055,
      "step": 462680
    },
    {
      "epoch": 1.6216130965523767,
      "grad_norm": 3.203125,
      "learning_rate": 2.5525778120464916e-05,
      "loss": 0.8365,
      "step": 462690
    },
    {
      "epoch": 1.6216481440592725,
      "grad_norm": 3.09375,
      "learning_rate": 2.5525129091801214e-05,
      "loss": 0.8198,
      "step": 462700
    },
    {
      "epoch": 1.6216831915661678,
      "grad_norm": 2.90625,
      "learning_rate": 2.552448006313751e-05,
      "loss": 0.8587,
      "step": 462710
    },
    {
      "epoch": 1.6217182390730636,
      "grad_norm": 2.671875,
      "learning_rate": 2.5523831034473807e-05,
      "loss": 0.8484,
      "step": 462720
    },
    {
      "epoch": 1.621753286579959,
      "grad_norm": 2.609375,
      "learning_rate": 2.5523182005810105e-05,
      "loss": 0.8508,
      "step": 462730
    },
    {
      "epoch": 1.6217883340868546,
      "grad_norm": 2.703125,
      "learning_rate": 2.5522532977146403e-05,
      "loss": 0.813,
      "step": 462740
    },
    {
      "epoch": 1.6218233815937504,
      "grad_norm": 2.984375,
      "learning_rate": 2.55218839484827e-05,
      "loss": 0.7471,
      "step": 462750
    },
    {
      "epoch": 1.621858429100646,
      "grad_norm": 3.140625,
      "learning_rate": 2.5521234919819e-05,
      "loss": 0.8675,
      "step": 462760
    },
    {
      "epoch": 1.6218934766075415,
      "grad_norm": 3.21875,
      "learning_rate": 2.5520585891155297e-05,
      "loss": 0.8297,
      "step": 462770
    },
    {
      "epoch": 1.6219285241144372,
      "grad_norm": 2.703125,
      "learning_rate": 2.5519936862491595e-05,
      "loss": 0.7954,
      "step": 462780
    },
    {
      "epoch": 1.6219635716213328,
      "grad_norm": 3.0625,
      "learning_rate": 2.5519287833827893e-05,
      "loss": 0.8784,
      "step": 462790
    },
    {
      "epoch": 1.6219986191282283,
      "grad_norm": 2.6875,
      "learning_rate": 2.551863880516419e-05,
      "loss": 0.7394,
      "step": 462800
    },
    {
      "epoch": 1.622033666635124,
      "grad_norm": 3.015625,
      "learning_rate": 2.551798977650049e-05,
      "loss": 0.8242,
      "step": 462810
    },
    {
      "epoch": 1.6220687141420194,
      "grad_norm": 2.8125,
      "learning_rate": 2.5517340747836787e-05,
      "loss": 0.7949,
      "step": 462820
    },
    {
      "epoch": 1.6221037616489151,
      "grad_norm": 3.328125,
      "learning_rate": 2.5516691719173085e-05,
      "loss": 0.7933,
      "step": 462830
    },
    {
      "epoch": 1.6221388091558107,
      "grad_norm": 3.03125,
      "learning_rate": 2.5516042690509383e-05,
      "loss": 0.7948,
      "step": 462840
    },
    {
      "epoch": 1.6221738566627062,
      "grad_norm": 3.21875,
      "learning_rate": 2.5515393661845684e-05,
      "loss": 0.7532,
      "step": 462850
    },
    {
      "epoch": 1.622208904169602,
      "grad_norm": 2.6875,
      "learning_rate": 2.5514744633181982e-05,
      "loss": 0.8629,
      "step": 462860
    },
    {
      "epoch": 1.6222439516764975,
      "grad_norm": 3.28125,
      "learning_rate": 2.551409560451828e-05,
      "loss": 0.8404,
      "step": 462870
    },
    {
      "epoch": 1.622278999183393,
      "grad_norm": 2.921875,
      "learning_rate": 2.5513446575854578e-05,
      "loss": 0.8483,
      "step": 462880
    },
    {
      "epoch": 1.6223140466902888,
      "grad_norm": 3.046875,
      "learning_rate": 2.5512797547190876e-05,
      "loss": 0.7919,
      "step": 462890
    },
    {
      "epoch": 1.6223490941971843,
      "grad_norm": 3.0625,
      "learning_rate": 2.5512148518527174e-05,
      "loss": 0.8574,
      "step": 462900
    },
    {
      "epoch": 1.6223841417040799,
      "grad_norm": 2.875,
      "learning_rate": 2.5511499489863472e-05,
      "loss": 0.8642,
      "step": 462910
    },
    {
      "epoch": 1.6224191892109756,
      "grad_norm": 3.390625,
      "learning_rate": 2.551085046119977e-05,
      "loss": 0.8863,
      "step": 462920
    },
    {
      "epoch": 1.622454236717871,
      "grad_norm": 3.3125,
      "learning_rate": 2.5510201432536068e-05,
      "loss": 0.8818,
      "step": 462930
    },
    {
      "epoch": 1.6224892842247667,
      "grad_norm": 2.96875,
      "learning_rate": 2.5509552403872366e-05,
      "loss": 0.8672,
      "step": 462940
    },
    {
      "epoch": 1.6225243317316622,
      "grad_norm": 3.140625,
      "learning_rate": 2.5508903375208664e-05,
      "loss": 0.8341,
      "step": 462950
    },
    {
      "epoch": 1.6225593792385578,
      "grad_norm": 3.0,
      "learning_rate": 2.5508254346544962e-05,
      "loss": 0.8638,
      "step": 462960
    },
    {
      "epoch": 1.6225944267454535,
      "grad_norm": 3.0625,
      "learning_rate": 2.550760531788126e-05,
      "loss": 0.8703,
      "step": 462970
    },
    {
      "epoch": 1.622629474252349,
      "grad_norm": 2.984375,
      "learning_rate": 2.5506956289217558e-05,
      "loss": 0.7871,
      "step": 462980
    },
    {
      "epoch": 1.6226645217592446,
      "grad_norm": 3.09375,
      "learning_rate": 2.550630726055386e-05,
      "loss": 0.9236,
      "step": 462990
    },
    {
      "epoch": 1.6226995692661403,
      "grad_norm": 2.328125,
      "learning_rate": 2.5505658231890157e-05,
      "loss": 0.8258,
      "step": 463000
    },
    {
      "epoch": 1.6227346167730359,
      "grad_norm": 2.40625,
      "learning_rate": 2.5505009203226455e-05,
      "loss": 0.8029,
      "step": 463010
    },
    {
      "epoch": 1.6227696642799314,
      "grad_norm": 2.96875,
      "learning_rate": 2.5504360174562753e-05,
      "loss": 0.8333,
      "step": 463020
    },
    {
      "epoch": 1.6228047117868272,
      "grad_norm": 3.171875,
      "learning_rate": 2.550371114589905e-05,
      "loss": 0.8332,
      "step": 463030
    },
    {
      "epoch": 1.6228397592937225,
      "grad_norm": 3.078125,
      "learning_rate": 2.550306211723535e-05,
      "loss": 0.8345,
      "step": 463040
    },
    {
      "epoch": 1.6228748068006182,
      "grad_norm": 2.5625,
      "learning_rate": 2.5502413088571647e-05,
      "loss": 0.794,
      "step": 463050
    },
    {
      "epoch": 1.622909854307514,
      "grad_norm": 2.921875,
      "learning_rate": 2.5501764059907945e-05,
      "loss": 0.862,
      "step": 463060
    },
    {
      "epoch": 1.6229449018144093,
      "grad_norm": 3.125,
      "learning_rate": 2.5501115031244243e-05,
      "loss": 0.8948,
      "step": 463070
    },
    {
      "epoch": 1.622979949321305,
      "grad_norm": 2.59375,
      "learning_rate": 2.5500466002580538e-05,
      "loss": 0.8434,
      "step": 463080
    },
    {
      "epoch": 1.6230149968282006,
      "grad_norm": 3.0625,
      "learning_rate": 2.5499816973916836e-05,
      "loss": 0.7929,
      "step": 463090
    },
    {
      "epoch": 1.6230500443350961,
      "grad_norm": 2.734375,
      "learning_rate": 2.5499167945253134e-05,
      "loss": 0.8996,
      "step": 463100
    },
    {
      "epoch": 1.623085091841992,
      "grad_norm": 3.078125,
      "learning_rate": 2.5498518916589432e-05,
      "loss": 0.8537,
      "step": 463110
    },
    {
      "epoch": 1.6231201393488874,
      "grad_norm": 3.234375,
      "learning_rate": 2.549786988792573e-05,
      "loss": 0.8945,
      "step": 463120
    },
    {
      "epoch": 1.623155186855783,
      "grad_norm": 3.25,
      "learning_rate": 2.5497220859262028e-05,
      "loss": 0.8192,
      "step": 463130
    },
    {
      "epoch": 1.6231902343626787,
      "grad_norm": 3.1875,
      "learning_rate": 2.5496571830598326e-05,
      "loss": 0.8186,
      "step": 463140
    },
    {
      "epoch": 1.623225281869574,
      "grad_norm": 2.9375,
      "learning_rate": 2.5495922801934624e-05,
      "loss": 0.811,
      "step": 463150
    },
    {
      "epoch": 1.6232603293764698,
      "grad_norm": 2.953125,
      "learning_rate": 2.5495273773270922e-05,
      "loss": 0.9353,
      "step": 463160
    },
    {
      "epoch": 1.6232953768833656,
      "grad_norm": 3.015625,
      "learning_rate": 2.549462474460722e-05,
      "loss": 0.8187,
      "step": 463170
    },
    {
      "epoch": 1.6233304243902609,
      "grad_norm": 2.765625,
      "learning_rate": 2.5493975715943518e-05,
      "loss": 0.7569,
      "step": 463180
    },
    {
      "epoch": 1.6233654718971566,
      "grad_norm": 2.765625,
      "learning_rate": 2.5493326687279816e-05,
      "loss": 0.8111,
      "step": 463190
    },
    {
      "epoch": 1.6234005194040522,
      "grad_norm": 3.359375,
      "learning_rate": 2.5492677658616114e-05,
      "loss": 0.8047,
      "step": 463200
    },
    {
      "epoch": 1.6234355669109477,
      "grad_norm": 3.0,
      "learning_rate": 2.5492028629952412e-05,
      "loss": 0.8364,
      "step": 463210
    },
    {
      "epoch": 1.6234706144178435,
      "grad_norm": 2.53125,
      "learning_rate": 2.5491379601288713e-05,
      "loss": 0.8147,
      "step": 463220
    },
    {
      "epoch": 1.623505661924739,
      "grad_norm": 3.484375,
      "learning_rate": 2.549073057262501e-05,
      "loss": 0.8549,
      "step": 463230
    },
    {
      "epoch": 1.6235407094316345,
      "grad_norm": 2.875,
      "learning_rate": 2.549008154396131e-05,
      "loss": 0.8475,
      "step": 463240
    },
    {
      "epoch": 1.6235757569385303,
      "grad_norm": 2.78125,
      "learning_rate": 2.5489432515297607e-05,
      "loss": 0.8588,
      "step": 463250
    },
    {
      "epoch": 1.6236108044454258,
      "grad_norm": 3.15625,
      "learning_rate": 2.5488783486633905e-05,
      "loss": 0.8418,
      "step": 463260
    },
    {
      "epoch": 1.6236458519523214,
      "grad_norm": 3.203125,
      "learning_rate": 2.5488134457970203e-05,
      "loss": 0.7693,
      "step": 463270
    },
    {
      "epoch": 1.6236808994592171,
      "grad_norm": 2.875,
      "learning_rate": 2.54874854293065e-05,
      "loss": 0.8154,
      "step": 463280
    },
    {
      "epoch": 1.6237159469661124,
      "grad_norm": 3.09375,
      "learning_rate": 2.54868364006428e-05,
      "loss": 0.807,
      "step": 463290
    },
    {
      "epoch": 1.6237509944730082,
      "grad_norm": 2.546875,
      "learning_rate": 2.5486187371979097e-05,
      "loss": 0.7979,
      "step": 463300
    },
    {
      "epoch": 1.6237860419799037,
      "grad_norm": 2.875,
      "learning_rate": 2.5485538343315395e-05,
      "loss": 0.8597,
      "step": 463310
    },
    {
      "epoch": 1.6238210894867993,
      "grad_norm": 2.484375,
      "learning_rate": 2.5484889314651693e-05,
      "loss": 0.86,
      "step": 463320
    },
    {
      "epoch": 1.623856136993695,
      "grad_norm": 3.28125,
      "learning_rate": 2.548424028598799e-05,
      "loss": 0.8297,
      "step": 463330
    },
    {
      "epoch": 1.6238911845005906,
      "grad_norm": 2.59375,
      "learning_rate": 2.548359125732429e-05,
      "loss": 0.8029,
      "step": 463340
    },
    {
      "epoch": 1.623926232007486,
      "grad_norm": 3.125,
      "learning_rate": 2.5482942228660587e-05,
      "loss": 0.8333,
      "step": 463350
    },
    {
      "epoch": 1.6239612795143819,
      "grad_norm": 3.03125,
      "learning_rate": 2.548229319999689e-05,
      "loss": 0.8221,
      "step": 463360
    },
    {
      "epoch": 1.6239963270212774,
      "grad_norm": 2.78125,
      "learning_rate": 2.5481644171333187e-05,
      "loss": 0.836,
      "step": 463370
    },
    {
      "epoch": 1.624031374528173,
      "grad_norm": 2.3125,
      "learning_rate": 2.5480995142669485e-05,
      "loss": 0.8185,
      "step": 463380
    },
    {
      "epoch": 1.6240664220350687,
      "grad_norm": 2.671875,
      "learning_rate": 2.5480346114005783e-05,
      "loss": 0.9363,
      "step": 463390
    },
    {
      "epoch": 1.624101469541964,
      "grad_norm": 3.390625,
      "learning_rate": 2.547969708534208e-05,
      "loss": 0.832,
      "step": 463400
    },
    {
      "epoch": 1.6241365170488598,
      "grad_norm": 3.0625,
      "learning_rate": 2.547904805667838e-05,
      "loss": 0.8246,
      "step": 463410
    },
    {
      "epoch": 1.6241715645557553,
      "grad_norm": 2.890625,
      "learning_rate": 2.5478399028014677e-05,
      "loss": 0.7954,
      "step": 463420
    },
    {
      "epoch": 1.6242066120626508,
      "grad_norm": 2.8125,
      "learning_rate": 2.5477749999350975e-05,
      "loss": 0.8737,
      "step": 463430
    },
    {
      "epoch": 1.6242416595695466,
      "grad_norm": 2.6875,
      "learning_rate": 2.5477100970687273e-05,
      "loss": 0.8119,
      "step": 463440
    },
    {
      "epoch": 1.6242767070764421,
      "grad_norm": 3.28125,
      "learning_rate": 2.547645194202357e-05,
      "loss": 0.8364,
      "step": 463450
    },
    {
      "epoch": 1.6243117545833377,
      "grad_norm": 2.75,
      "learning_rate": 2.5475802913359865e-05,
      "loss": 0.887,
      "step": 463460
    },
    {
      "epoch": 1.6243468020902334,
      "grad_norm": 3.125,
      "learning_rate": 2.5475153884696163e-05,
      "loss": 0.7462,
      "step": 463470
    },
    {
      "epoch": 1.624381849597129,
      "grad_norm": 2.859375,
      "learning_rate": 2.547450485603246e-05,
      "loss": 0.788,
      "step": 463480
    },
    {
      "epoch": 1.6244168971040245,
      "grad_norm": 2.734375,
      "learning_rate": 2.547385582736876e-05,
      "loss": 0.7762,
      "step": 463490
    },
    {
      "epoch": 1.6244519446109202,
      "grad_norm": 2.625,
      "learning_rate": 2.5473206798705057e-05,
      "loss": 0.7996,
      "step": 463500
    },
    {
      "epoch": 1.6244869921178156,
      "grad_norm": 2.515625,
      "learning_rate": 2.5472557770041355e-05,
      "loss": 0.8359,
      "step": 463510
    },
    {
      "epoch": 1.6245220396247113,
      "grad_norm": 2.890625,
      "learning_rate": 2.5471908741377653e-05,
      "loss": 0.8997,
      "step": 463520
    },
    {
      "epoch": 1.6245570871316068,
      "grad_norm": 2.84375,
      "learning_rate": 2.547125971271395e-05,
      "loss": 0.7689,
      "step": 463530
    },
    {
      "epoch": 1.6245921346385024,
      "grad_norm": 2.890625,
      "learning_rate": 2.547061068405025e-05,
      "loss": 0.8446,
      "step": 463540
    },
    {
      "epoch": 1.6246271821453981,
      "grad_norm": 3.4375,
      "learning_rate": 2.5469961655386547e-05,
      "loss": 0.9178,
      "step": 463550
    },
    {
      "epoch": 1.6246622296522937,
      "grad_norm": 2.71875,
      "learning_rate": 2.5469312626722845e-05,
      "loss": 0.8178,
      "step": 463560
    },
    {
      "epoch": 1.6246972771591892,
      "grad_norm": 2.984375,
      "learning_rate": 2.5468663598059143e-05,
      "loss": 0.8095,
      "step": 463570
    },
    {
      "epoch": 1.624732324666085,
      "grad_norm": 2.53125,
      "learning_rate": 2.5468014569395445e-05,
      "loss": 0.8258,
      "step": 463580
    },
    {
      "epoch": 1.6247673721729805,
      "grad_norm": 3.046875,
      "learning_rate": 2.5467365540731743e-05,
      "loss": 0.8083,
      "step": 463590
    },
    {
      "epoch": 1.624802419679876,
      "grad_norm": 2.796875,
      "learning_rate": 2.546671651206804e-05,
      "loss": 0.8641,
      "step": 463600
    },
    {
      "epoch": 1.6248374671867718,
      "grad_norm": 2.765625,
      "learning_rate": 2.546606748340434e-05,
      "loss": 0.8226,
      "step": 463610
    },
    {
      "epoch": 1.6248725146936671,
      "grad_norm": 2.75,
      "learning_rate": 2.5465418454740637e-05,
      "loss": 0.8293,
      "step": 463620
    },
    {
      "epoch": 1.6249075622005629,
      "grad_norm": 2.9375,
      "learning_rate": 2.5464769426076935e-05,
      "loss": 0.8632,
      "step": 463630
    },
    {
      "epoch": 1.6249426097074584,
      "grad_norm": 3.171875,
      "learning_rate": 2.5464120397413233e-05,
      "loss": 0.8082,
      "step": 463640
    },
    {
      "epoch": 1.624977657214354,
      "grad_norm": 3.09375,
      "learning_rate": 2.546347136874953e-05,
      "loss": 0.8546,
      "step": 463650
    },
    {
      "epoch": 1.6250127047212497,
      "grad_norm": 3.515625,
      "learning_rate": 2.546282234008583e-05,
      "loss": 0.8833,
      "step": 463660
    },
    {
      "epoch": 1.6250477522281452,
      "grad_norm": 2.65625,
      "learning_rate": 2.5462173311422127e-05,
      "loss": 0.8467,
      "step": 463670
    },
    {
      "epoch": 1.6250827997350408,
      "grad_norm": 2.953125,
      "learning_rate": 2.5461524282758425e-05,
      "loss": 0.8424,
      "step": 463680
    },
    {
      "epoch": 1.6251178472419365,
      "grad_norm": 2.6875,
      "learning_rate": 2.5460875254094723e-05,
      "loss": 0.9086,
      "step": 463690
    },
    {
      "epoch": 1.625152894748832,
      "grad_norm": 2.234375,
      "learning_rate": 2.546022622543102e-05,
      "loss": 0.8581,
      "step": 463700
    },
    {
      "epoch": 1.6251879422557276,
      "grad_norm": 3.015625,
      "learning_rate": 2.545957719676732e-05,
      "loss": 0.898,
      "step": 463710
    },
    {
      "epoch": 1.6252229897626234,
      "grad_norm": 2.734375,
      "learning_rate": 2.545892816810362e-05,
      "loss": 0.8956,
      "step": 463720
    },
    {
      "epoch": 1.6252580372695187,
      "grad_norm": 2.8125,
      "learning_rate": 2.5458279139439918e-05,
      "loss": 0.7781,
      "step": 463730
    },
    {
      "epoch": 1.6252930847764144,
      "grad_norm": 3.078125,
      "learning_rate": 2.5457630110776216e-05,
      "loss": 0.8727,
      "step": 463740
    },
    {
      "epoch": 1.6253281322833102,
      "grad_norm": 2.984375,
      "learning_rate": 2.5456981082112514e-05,
      "loss": 0.8387,
      "step": 463750
    },
    {
      "epoch": 1.6253631797902055,
      "grad_norm": 3.46875,
      "learning_rate": 2.5456332053448812e-05,
      "loss": 0.8236,
      "step": 463760
    },
    {
      "epoch": 1.6253982272971013,
      "grad_norm": 3.140625,
      "learning_rate": 2.545568302478511e-05,
      "loss": 0.8771,
      "step": 463770
    },
    {
      "epoch": 1.6254332748039968,
      "grad_norm": 2.796875,
      "learning_rate": 2.5455033996121408e-05,
      "loss": 0.8133,
      "step": 463780
    },
    {
      "epoch": 1.6254683223108923,
      "grad_norm": 2.6875,
      "learning_rate": 2.5454384967457706e-05,
      "loss": 0.7648,
      "step": 463790
    },
    {
      "epoch": 1.625503369817788,
      "grad_norm": 2.90625,
      "learning_rate": 2.5453735938794004e-05,
      "loss": 0.8061,
      "step": 463800
    },
    {
      "epoch": 1.6255384173246836,
      "grad_norm": 3.046875,
      "learning_rate": 2.5453086910130302e-05,
      "loss": 0.7846,
      "step": 463810
    },
    {
      "epoch": 1.6255734648315792,
      "grad_norm": 2.46875,
      "learning_rate": 2.54524378814666e-05,
      "loss": 0.7517,
      "step": 463820
    },
    {
      "epoch": 1.625608512338475,
      "grad_norm": 3.28125,
      "learning_rate": 2.5451788852802898e-05,
      "loss": 0.8114,
      "step": 463830
    },
    {
      "epoch": 1.6256435598453702,
      "grad_norm": 2.75,
      "learning_rate": 2.5451139824139193e-05,
      "loss": 0.7866,
      "step": 463840
    },
    {
      "epoch": 1.625678607352266,
      "grad_norm": 2.734375,
      "learning_rate": 2.545049079547549e-05,
      "loss": 0.8891,
      "step": 463850
    },
    {
      "epoch": 1.6257136548591617,
      "grad_norm": 2.75,
      "learning_rate": 2.544984176681179e-05,
      "loss": 0.8705,
      "step": 463860
    },
    {
      "epoch": 1.625748702366057,
      "grad_norm": 3.0,
      "learning_rate": 2.5449192738148087e-05,
      "loss": 0.8324,
      "step": 463870
    },
    {
      "epoch": 1.6257837498729528,
      "grad_norm": 2.65625,
      "learning_rate": 2.5448543709484385e-05,
      "loss": 0.8786,
      "step": 463880
    },
    {
      "epoch": 1.6258187973798484,
      "grad_norm": 2.640625,
      "learning_rate": 2.5447894680820683e-05,
      "loss": 0.7868,
      "step": 463890
    },
    {
      "epoch": 1.625853844886744,
      "grad_norm": 3.015625,
      "learning_rate": 2.544724565215698e-05,
      "loss": 0.8,
      "step": 463900
    },
    {
      "epoch": 1.6258888923936397,
      "grad_norm": 3.171875,
      "learning_rate": 2.544659662349328e-05,
      "loss": 0.8167,
      "step": 463910
    },
    {
      "epoch": 1.6259239399005352,
      "grad_norm": 2.28125,
      "learning_rate": 2.5445947594829577e-05,
      "loss": 0.885,
      "step": 463920
    },
    {
      "epoch": 1.6259589874074307,
      "grad_norm": 3.046875,
      "learning_rate": 2.5445298566165875e-05,
      "loss": 0.8694,
      "step": 463930
    },
    {
      "epoch": 1.6259940349143265,
      "grad_norm": 2.984375,
      "learning_rate": 2.5444649537502173e-05,
      "loss": 0.8433,
      "step": 463940
    },
    {
      "epoch": 1.626029082421222,
      "grad_norm": 2.890625,
      "learning_rate": 2.5444000508838474e-05,
      "loss": 0.8265,
      "step": 463950
    },
    {
      "epoch": 1.6260641299281176,
      "grad_norm": 2.78125,
      "learning_rate": 2.5443351480174772e-05,
      "loss": 0.8489,
      "step": 463960
    },
    {
      "epoch": 1.6260991774350133,
      "grad_norm": 2.796875,
      "learning_rate": 2.544270245151107e-05,
      "loss": 0.8395,
      "step": 463970
    },
    {
      "epoch": 1.6261342249419086,
      "grad_norm": 3.296875,
      "learning_rate": 2.5442053422847368e-05,
      "loss": 0.8629,
      "step": 463980
    },
    {
      "epoch": 1.6261692724488044,
      "grad_norm": 2.90625,
      "learning_rate": 2.5441404394183666e-05,
      "loss": 0.8925,
      "step": 463990
    },
    {
      "epoch": 1.6262043199557,
      "grad_norm": 2.875,
      "learning_rate": 2.5440755365519964e-05,
      "loss": 0.8291,
      "step": 464000
    },
    {
      "epoch": 1.6262393674625955,
      "grad_norm": 3.40625,
      "learning_rate": 2.5440106336856262e-05,
      "loss": 0.832,
      "step": 464010
    },
    {
      "epoch": 1.6262744149694912,
      "grad_norm": 2.625,
      "learning_rate": 2.543945730819256e-05,
      "loss": 0.8759,
      "step": 464020
    },
    {
      "epoch": 1.6263094624763867,
      "grad_norm": 3.3125,
      "learning_rate": 2.5438808279528858e-05,
      "loss": 0.8325,
      "step": 464030
    },
    {
      "epoch": 1.6263445099832823,
      "grad_norm": 3.265625,
      "learning_rate": 2.5438159250865156e-05,
      "loss": 0.8628,
      "step": 464040
    },
    {
      "epoch": 1.626379557490178,
      "grad_norm": 2.921875,
      "learning_rate": 2.5437510222201454e-05,
      "loss": 0.7913,
      "step": 464050
    },
    {
      "epoch": 1.6264146049970736,
      "grad_norm": 3.25,
      "learning_rate": 2.5436861193537752e-05,
      "loss": 0.8053,
      "step": 464060
    },
    {
      "epoch": 1.6264496525039691,
      "grad_norm": 3.015625,
      "learning_rate": 2.543621216487405e-05,
      "loss": 0.8545,
      "step": 464070
    },
    {
      "epoch": 1.6264847000108649,
      "grad_norm": 2.875,
      "learning_rate": 2.5435563136210348e-05,
      "loss": 0.7617,
      "step": 464080
    },
    {
      "epoch": 1.6265197475177602,
      "grad_norm": 2.953125,
      "learning_rate": 2.543491410754665e-05,
      "loss": 0.8805,
      "step": 464090
    },
    {
      "epoch": 1.626554795024656,
      "grad_norm": 2.828125,
      "learning_rate": 2.5434265078882947e-05,
      "loss": 0.797,
      "step": 464100
    },
    {
      "epoch": 1.6265898425315515,
      "grad_norm": 2.78125,
      "learning_rate": 2.5433616050219245e-05,
      "loss": 0.8058,
      "step": 464110
    },
    {
      "epoch": 1.626624890038447,
      "grad_norm": 2.71875,
      "learning_rate": 2.5432967021555543e-05,
      "loss": 0.846,
      "step": 464120
    },
    {
      "epoch": 1.6266599375453428,
      "grad_norm": 2.625,
      "learning_rate": 2.543231799289184e-05,
      "loss": 0.8474,
      "step": 464130
    },
    {
      "epoch": 1.6266949850522383,
      "grad_norm": 3.296875,
      "learning_rate": 2.543166896422814e-05,
      "loss": 0.8398,
      "step": 464140
    },
    {
      "epoch": 1.6267300325591338,
      "grad_norm": 2.796875,
      "learning_rate": 2.5431019935564437e-05,
      "loss": 0.8211,
      "step": 464150
    },
    {
      "epoch": 1.6267650800660296,
      "grad_norm": 2.453125,
      "learning_rate": 2.5430370906900735e-05,
      "loss": 0.8309,
      "step": 464160
    },
    {
      "epoch": 1.6268001275729251,
      "grad_norm": 3.109375,
      "learning_rate": 2.5429721878237033e-05,
      "loss": 0.8667,
      "step": 464170
    },
    {
      "epoch": 1.6268351750798207,
      "grad_norm": 2.84375,
      "learning_rate": 2.542907284957333e-05,
      "loss": 0.7925,
      "step": 464180
    },
    {
      "epoch": 1.6268702225867164,
      "grad_norm": 3.078125,
      "learning_rate": 2.542842382090963e-05,
      "loss": 0.7922,
      "step": 464190
    },
    {
      "epoch": 1.6269052700936117,
      "grad_norm": 2.828125,
      "learning_rate": 2.5427774792245927e-05,
      "loss": 0.8416,
      "step": 464200
    },
    {
      "epoch": 1.6269403176005075,
      "grad_norm": 3.140625,
      "learning_rate": 2.5427125763582222e-05,
      "loss": 0.8235,
      "step": 464210
    },
    {
      "epoch": 1.626975365107403,
      "grad_norm": 3.671875,
      "learning_rate": 2.542647673491852e-05,
      "loss": 0.8304,
      "step": 464220
    },
    {
      "epoch": 1.6270104126142986,
      "grad_norm": 3.09375,
      "learning_rate": 2.5425827706254818e-05,
      "loss": 0.7884,
      "step": 464230
    },
    {
      "epoch": 1.6270454601211943,
      "grad_norm": 3.125,
      "learning_rate": 2.5425178677591116e-05,
      "loss": 0.8528,
      "step": 464240
    },
    {
      "epoch": 1.6270805076280899,
      "grad_norm": 3.046875,
      "learning_rate": 2.5424529648927414e-05,
      "loss": 0.9003,
      "step": 464250
    },
    {
      "epoch": 1.6271155551349854,
      "grad_norm": 3.328125,
      "learning_rate": 2.5423880620263712e-05,
      "loss": 0.945,
      "step": 464260
    },
    {
      "epoch": 1.6271506026418812,
      "grad_norm": 2.859375,
      "learning_rate": 2.542323159160001e-05,
      "loss": 0.8613,
      "step": 464270
    },
    {
      "epoch": 1.6271856501487767,
      "grad_norm": 3.015625,
      "learning_rate": 2.5422582562936308e-05,
      "loss": 0.8264,
      "step": 464280
    },
    {
      "epoch": 1.6272206976556722,
      "grad_norm": 3.0,
      "learning_rate": 2.5421933534272606e-05,
      "loss": 0.886,
      "step": 464290
    },
    {
      "epoch": 1.627255745162568,
      "grad_norm": 2.734375,
      "learning_rate": 2.5421284505608904e-05,
      "loss": 0.8323,
      "step": 464300
    },
    {
      "epoch": 1.6272907926694633,
      "grad_norm": 2.78125,
      "learning_rate": 2.5420635476945202e-05,
      "loss": 0.8415,
      "step": 464310
    },
    {
      "epoch": 1.627325840176359,
      "grad_norm": 3.109375,
      "learning_rate": 2.5419986448281503e-05,
      "loss": 0.8,
      "step": 464320
    },
    {
      "epoch": 1.6273608876832546,
      "grad_norm": 2.90625,
      "learning_rate": 2.54193374196178e-05,
      "loss": 0.9116,
      "step": 464330
    },
    {
      "epoch": 1.6273959351901501,
      "grad_norm": 2.765625,
      "learning_rate": 2.54186883909541e-05,
      "loss": 0.9139,
      "step": 464340
    },
    {
      "epoch": 1.627430982697046,
      "grad_norm": 2.953125,
      "learning_rate": 2.5418039362290397e-05,
      "loss": 0.7884,
      "step": 464350
    },
    {
      "epoch": 1.6274660302039414,
      "grad_norm": 3.171875,
      "learning_rate": 2.5417390333626695e-05,
      "loss": 0.8138,
      "step": 464360
    },
    {
      "epoch": 1.627501077710837,
      "grad_norm": 3.3125,
      "learning_rate": 2.5416741304962993e-05,
      "loss": 0.7263,
      "step": 464370
    },
    {
      "epoch": 1.6275361252177327,
      "grad_norm": 2.859375,
      "learning_rate": 2.541609227629929e-05,
      "loss": 0.8438,
      "step": 464380
    },
    {
      "epoch": 1.6275711727246283,
      "grad_norm": 2.46875,
      "learning_rate": 2.541544324763559e-05,
      "loss": 0.7941,
      "step": 464390
    },
    {
      "epoch": 1.6276062202315238,
      "grad_norm": 2.5625,
      "learning_rate": 2.5414794218971887e-05,
      "loss": 0.7681,
      "step": 464400
    },
    {
      "epoch": 1.6276412677384196,
      "grad_norm": 2.875,
      "learning_rate": 2.5414145190308185e-05,
      "loss": 0.8968,
      "step": 464410
    },
    {
      "epoch": 1.6276763152453149,
      "grad_norm": 3.109375,
      "learning_rate": 2.5413496161644483e-05,
      "loss": 0.7631,
      "step": 464420
    },
    {
      "epoch": 1.6277113627522106,
      "grad_norm": 3.03125,
      "learning_rate": 2.541284713298078e-05,
      "loss": 0.8312,
      "step": 464430
    },
    {
      "epoch": 1.6277464102591064,
      "grad_norm": 2.6875,
      "learning_rate": 2.541219810431708e-05,
      "loss": 0.815,
      "step": 464440
    },
    {
      "epoch": 1.6277814577660017,
      "grad_norm": 2.546875,
      "learning_rate": 2.541154907565338e-05,
      "loss": 0.798,
      "step": 464450
    },
    {
      "epoch": 1.6278165052728975,
      "grad_norm": 2.84375,
      "learning_rate": 2.541090004698968e-05,
      "loss": 0.8417,
      "step": 464460
    },
    {
      "epoch": 1.627851552779793,
      "grad_norm": 2.6875,
      "learning_rate": 2.5410251018325977e-05,
      "loss": 0.8349,
      "step": 464470
    },
    {
      "epoch": 1.6278866002866885,
      "grad_norm": 2.984375,
      "learning_rate": 2.5409601989662275e-05,
      "loss": 0.8857,
      "step": 464480
    },
    {
      "epoch": 1.6279216477935843,
      "grad_norm": 2.921875,
      "learning_rate": 2.5408952960998573e-05,
      "loss": 0.904,
      "step": 464490
    },
    {
      "epoch": 1.6279566953004798,
      "grad_norm": 2.515625,
      "learning_rate": 2.540830393233487e-05,
      "loss": 0.7773,
      "step": 464500
    },
    {
      "epoch": 1.6279917428073754,
      "grad_norm": 2.875,
      "learning_rate": 2.540765490367117e-05,
      "loss": 0.7786,
      "step": 464510
    },
    {
      "epoch": 1.628026790314271,
      "grad_norm": 2.390625,
      "learning_rate": 2.5407005875007467e-05,
      "loss": 0.8098,
      "step": 464520
    },
    {
      "epoch": 1.6280618378211666,
      "grad_norm": 3.359375,
      "learning_rate": 2.5406356846343765e-05,
      "loss": 0.8925,
      "step": 464530
    },
    {
      "epoch": 1.6280968853280622,
      "grad_norm": 3.328125,
      "learning_rate": 2.5405707817680063e-05,
      "loss": 0.7653,
      "step": 464540
    },
    {
      "epoch": 1.628131932834958,
      "grad_norm": 3.234375,
      "learning_rate": 2.540505878901636e-05,
      "loss": 0.9288,
      "step": 464550
    },
    {
      "epoch": 1.6281669803418533,
      "grad_norm": 3.234375,
      "learning_rate": 2.540440976035266e-05,
      "loss": 0.8008,
      "step": 464560
    },
    {
      "epoch": 1.628202027848749,
      "grad_norm": 2.5625,
      "learning_rate": 2.5403760731688957e-05,
      "loss": 0.8023,
      "step": 464570
    },
    {
      "epoch": 1.6282370753556445,
      "grad_norm": 2.5,
      "learning_rate": 2.5403111703025255e-05,
      "loss": 0.7825,
      "step": 464580
    },
    {
      "epoch": 1.62827212286254,
      "grad_norm": 2.96875,
      "learning_rate": 2.540246267436155e-05,
      "loss": 0.8879,
      "step": 464590
    },
    {
      "epoch": 1.6283071703694358,
      "grad_norm": 3.0625,
      "learning_rate": 2.5401813645697847e-05,
      "loss": 0.7859,
      "step": 464600
    },
    {
      "epoch": 1.6283422178763314,
      "grad_norm": 2.546875,
      "learning_rate": 2.5401164617034145e-05,
      "loss": 0.8255,
      "step": 464610
    },
    {
      "epoch": 1.628377265383227,
      "grad_norm": 2.984375,
      "learning_rate": 2.5400515588370443e-05,
      "loss": 0.815,
      "step": 464620
    },
    {
      "epoch": 1.6284123128901227,
      "grad_norm": 2.546875,
      "learning_rate": 2.539986655970674e-05,
      "loss": 0.7783,
      "step": 464630
    },
    {
      "epoch": 1.6284473603970182,
      "grad_norm": 2.96875,
      "learning_rate": 2.539921753104304e-05,
      "loss": 0.8971,
      "step": 464640
    },
    {
      "epoch": 1.6284824079039137,
      "grad_norm": 3.40625,
      "learning_rate": 2.5398568502379337e-05,
      "loss": 0.8313,
      "step": 464650
    },
    {
      "epoch": 1.6285174554108095,
      "grad_norm": 3.09375,
      "learning_rate": 2.5397919473715635e-05,
      "loss": 0.8728,
      "step": 464660
    },
    {
      "epoch": 1.6285525029177048,
      "grad_norm": 2.921875,
      "learning_rate": 2.5397270445051933e-05,
      "loss": 0.8062,
      "step": 464670
    },
    {
      "epoch": 1.6285875504246006,
      "grad_norm": 2.96875,
      "learning_rate": 2.5396621416388235e-05,
      "loss": 0.8524,
      "step": 464680
    },
    {
      "epoch": 1.628622597931496,
      "grad_norm": 3.078125,
      "learning_rate": 2.5395972387724533e-05,
      "loss": 0.8066,
      "step": 464690
    },
    {
      "epoch": 1.6286576454383916,
      "grad_norm": 2.578125,
      "learning_rate": 2.539532335906083e-05,
      "loss": 0.7695,
      "step": 464700
    },
    {
      "epoch": 1.6286926929452874,
      "grad_norm": 2.6875,
      "learning_rate": 2.539467433039713e-05,
      "loss": 0.7415,
      "step": 464710
    },
    {
      "epoch": 1.628727740452183,
      "grad_norm": 3.15625,
      "learning_rate": 2.5394025301733427e-05,
      "loss": 0.8408,
      "step": 464720
    },
    {
      "epoch": 1.6287627879590785,
      "grad_norm": 3.59375,
      "learning_rate": 2.5393376273069725e-05,
      "loss": 0.8275,
      "step": 464730
    },
    {
      "epoch": 1.6287978354659742,
      "grad_norm": 2.890625,
      "learning_rate": 2.5392727244406023e-05,
      "loss": 0.8914,
      "step": 464740
    },
    {
      "epoch": 1.6288328829728698,
      "grad_norm": 2.890625,
      "learning_rate": 2.539207821574232e-05,
      "loss": 0.7961,
      "step": 464750
    },
    {
      "epoch": 1.6288679304797653,
      "grad_norm": 2.8125,
      "learning_rate": 2.539142918707862e-05,
      "loss": 0.8619,
      "step": 464760
    },
    {
      "epoch": 1.628902977986661,
      "grad_norm": 2.65625,
      "learning_rate": 2.5390780158414917e-05,
      "loss": 0.794,
      "step": 464770
    },
    {
      "epoch": 1.6289380254935564,
      "grad_norm": 2.921875,
      "learning_rate": 2.5390131129751215e-05,
      "loss": 0.9526,
      "step": 464780
    },
    {
      "epoch": 1.6289730730004521,
      "grad_norm": 3.1875,
      "learning_rate": 2.5389482101087513e-05,
      "loss": 0.8281,
      "step": 464790
    },
    {
      "epoch": 1.6290081205073477,
      "grad_norm": 2.796875,
      "learning_rate": 2.538883307242381e-05,
      "loss": 0.8015,
      "step": 464800
    },
    {
      "epoch": 1.6290431680142432,
      "grad_norm": 3.203125,
      "learning_rate": 2.538818404376011e-05,
      "loss": 0.9447,
      "step": 464810
    },
    {
      "epoch": 1.629078215521139,
      "grad_norm": 2.453125,
      "learning_rate": 2.538753501509641e-05,
      "loss": 0.8133,
      "step": 464820
    },
    {
      "epoch": 1.6291132630280345,
      "grad_norm": 3.015625,
      "learning_rate": 2.5386885986432708e-05,
      "loss": 0.8736,
      "step": 464830
    },
    {
      "epoch": 1.62914831053493,
      "grad_norm": 2.421875,
      "learning_rate": 2.5386236957769006e-05,
      "loss": 0.827,
      "step": 464840
    },
    {
      "epoch": 1.6291833580418258,
      "grad_norm": 2.8125,
      "learning_rate": 2.5385587929105304e-05,
      "loss": 0.7661,
      "step": 464850
    },
    {
      "epoch": 1.6292184055487213,
      "grad_norm": 2.640625,
      "learning_rate": 2.5384938900441602e-05,
      "loss": 0.8199,
      "step": 464860
    },
    {
      "epoch": 1.6292534530556169,
      "grad_norm": 2.953125,
      "learning_rate": 2.53842898717779e-05,
      "loss": 0.7868,
      "step": 464870
    },
    {
      "epoch": 1.6292885005625126,
      "grad_norm": 2.515625,
      "learning_rate": 2.5383640843114198e-05,
      "loss": 0.7768,
      "step": 464880
    },
    {
      "epoch": 1.629323548069408,
      "grad_norm": 2.828125,
      "learning_rate": 2.5382991814450496e-05,
      "loss": 0.8702,
      "step": 464890
    },
    {
      "epoch": 1.6293585955763037,
      "grad_norm": 3.09375,
      "learning_rate": 2.5382342785786794e-05,
      "loss": 0.9259,
      "step": 464900
    },
    {
      "epoch": 1.6293936430831992,
      "grad_norm": 2.6875,
      "learning_rate": 2.5381693757123092e-05,
      "loss": 0.8139,
      "step": 464910
    },
    {
      "epoch": 1.6294286905900948,
      "grad_norm": 3.234375,
      "learning_rate": 2.538104472845939e-05,
      "loss": 0.9107,
      "step": 464920
    },
    {
      "epoch": 1.6294637380969905,
      "grad_norm": 2.90625,
      "learning_rate": 2.5380395699795688e-05,
      "loss": 0.8893,
      "step": 464930
    },
    {
      "epoch": 1.629498785603886,
      "grad_norm": 3.09375,
      "learning_rate": 2.5379746671131986e-05,
      "loss": 0.897,
      "step": 464940
    },
    {
      "epoch": 1.6295338331107816,
      "grad_norm": 2.90625,
      "learning_rate": 2.5379097642468284e-05,
      "loss": 0.7367,
      "step": 464950
    },
    {
      "epoch": 1.6295688806176774,
      "grad_norm": 2.6875,
      "learning_rate": 2.5378448613804586e-05,
      "loss": 0.8596,
      "step": 464960
    },
    {
      "epoch": 1.6296039281245729,
      "grad_norm": 2.96875,
      "learning_rate": 2.5377799585140877e-05,
      "loss": 0.7949,
      "step": 464970
    },
    {
      "epoch": 1.6296389756314684,
      "grad_norm": 2.546875,
      "learning_rate": 2.5377150556477175e-05,
      "loss": 0.8145,
      "step": 464980
    },
    {
      "epoch": 1.6296740231383642,
      "grad_norm": 3.15625,
      "learning_rate": 2.5376501527813473e-05,
      "loss": 0.9036,
      "step": 464990
    },
    {
      "epoch": 1.6297090706452595,
      "grad_norm": 3.0625,
      "learning_rate": 2.537585249914977e-05,
      "loss": 0.8106,
      "step": 465000
    },
    {
      "epoch": 1.6297090706452595,
      "eval_loss": 0.7856552004814148,
      "eval_runtime": 562.5062,
      "eval_samples_per_second": 676.323,
      "eval_steps_per_second": 56.36,
      "step": 465000
    },
    {
      "epoch": 1.6297441181521553,
      "grad_norm": 2.765625,
      "learning_rate": 2.537520347048607e-05,
      "loss": 0.7942,
      "step": 465010
    },
    {
      "epoch": 1.6297791656590508,
      "grad_norm": 2.984375,
      "learning_rate": 2.5374554441822367e-05,
      "loss": 0.8459,
      "step": 465020
    },
    {
      "epoch": 1.6298142131659463,
      "grad_norm": 2.859375,
      "learning_rate": 2.5373905413158665e-05,
      "loss": 0.8608,
      "step": 465030
    },
    {
      "epoch": 1.629849260672842,
      "grad_norm": 2.609375,
      "learning_rate": 2.5373256384494963e-05,
      "loss": 0.8716,
      "step": 465040
    },
    {
      "epoch": 1.6298843081797376,
      "grad_norm": 3.0625,
      "learning_rate": 2.5372607355831264e-05,
      "loss": 0.7706,
      "step": 465050
    },
    {
      "epoch": 1.6299193556866332,
      "grad_norm": 2.6875,
      "learning_rate": 2.5371958327167562e-05,
      "loss": 0.8094,
      "step": 465060
    },
    {
      "epoch": 1.629954403193529,
      "grad_norm": 2.75,
      "learning_rate": 2.537130929850386e-05,
      "loss": 0.7138,
      "step": 465070
    },
    {
      "epoch": 1.6299894507004244,
      "grad_norm": 3.0,
      "learning_rate": 2.5370660269840158e-05,
      "loss": 0.8553,
      "step": 465080
    },
    {
      "epoch": 1.63002449820732,
      "grad_norm": 2.84375,
      "learning_rate": 2.5370011241176456e-05,
      "loss": 0.7961,
      "step": 465090
    },
    {
      "epoch": 1.6300595457142157,
      "grad_norm": 2.71875,
      "learning_rate": 2.5369362212512754e-05,
      "loss": 0.6858,
      "step": 465100
    },
    {
      "epoch": 1.630094593221111,
      "grad_norm": 3.421875,
      "learning_rate": 2.5368713183849052e-05,
      "loss": 0.8434,
      "step": 465110
    },
    {
      "epoch": 1.6301296407280068,
      "grad_norm": 2.703125,
      "learning_rate": 2.536806415518535e-05,
      "loss": 0.8905,
      "step": 465120
    },
    {
      "epoch": 1.6301646882349026,
      "grad_norm": 2.65625,
      "learning_rate": 2.5367415126521648e-05,
      "loss": 0.8249,
      "step": 465130
    },
    {
      "epoch": 1.6301997357417979,
      "grad_norm": 3.296875,
      "learning_rate": 2.5366766097857946e-05,
      "loss": 0.8709,
      "step": 465140
    },
    {
      "epoch": 1.6302347832486936,
      "grad_norm": 3.265625,
      "learning_rate": 2.5366117069194244e-05,
      "loss": 0.7922,
      "step": 465150
    },
    {
      "epoch": 1.6302698307555892,
      "grad_norm": 3.0,
      "learning_rate": 2.5365468040530542e-05,
      "loss": 0.7879,
      "step": 465160
    },
    {
      "epoch": 1.6303048782624847,
      "grad_norm": 2.9375,
      "learning_rate": 2.536481901186684e-05,
      "loss": 0.8604,
      "step": 465170
    },
    {
      "epoch": 1.6303399257693805,
      "grad_norm": 3.25,
      "learning_rate": 2.5364169983203138e-05,
      "loss": 0.7811,
      "step": 465180
    },
    {
      "epoch": 1.630374973276276,
      "grad_norm": 2.6875,
      "learning_rate": 2.536352095453944e-05,
      "loss": 0.7734,
      "step": 465190
    },
    {
      "epoch": 1.6304100207831715,
      "grad_norm": 3.0625,
      "learning_rate": 2.5362871925875738e-05,
      "loss": 0.827,
      "step": 465200
    },
    {
      "epoch": 1.6304450682900673,
      "grad_norm": 2.984375,
      "learning_rate": 2.5362222897212036e-05,
      "loss": 0.8471,
      "step": 465210
    },
    {
      "epoch": 1.6304801157969628,
      "grad_norm": 2.90625,
      "learning_rate": 2.5361573868548334e-05,
      "loss": 0.741,
      "step": 465220
    },
    {
      "epoch": 1.6305151633038584,
      "grad_norm": 2.6875,
      "learning_rate": 2.536092483988463e-05,
      "loss": 0.815,
      "step": 465230
    },
    {
      "epoch": 1.6305502108107541,
      "grad_norm": 2.9375,
      "learning_rate": 2.536027581122093e-05,
      "loss": 0.8315,
      "step": 465240
    },
    {
      "epoch": 1.6305852583176494,
      "grad_norm": 2.5625,
      "learning_rate": 2.5359626782557228e-05,
      "loss": 0.8263,
      "step": 465250
    },
    {
      "epoch": 1.6306203058245452,
      "grad_norm": 2.453125,
      "learning_rate": 2.5358977753893526e-05,
      "loss": 0.8035,
      "step": 465260
    },
    {
      "epoch": 1.6306553533314407,
      "grad_norm": 2.828125,
      "learning_rate": 2.5358328725229824e-05,
      "loss": 0.832,
      "step": 465270
    },
    {
      "epoch": 1.6306904008383363,
      "grad_norm": 3.703125,
      "learning_rate": 2.535767969656612e-05,
      "loss": 0.8824,
      "step": 465280
    },
    {
      "epoch": 1.630725448345232,
      "grad_norm": 2.609375,
      "learning_rate": 2.535703066790242e-05,
      "loss": 0.7842,
      "step": 465290
    },
    {
      "epoch": 1.6307604958521276,
      "grad_norm": 2.9375,
      "learning_rate": 2.5356381639238718e-05,
      "loss": 0.9031,
      "step": 465300
    },
    {
      "epoch": 1.630795543359023,
      "grad_norm": 3.109375,
      "learning_rate": 2.5355732610575016e-05,
      "loss": 0.8792,
      "step": 465310
    },
    {
      "epoch": 1.6308305908659189,
      "grad_norm": 2.890625,
      "learning_rate": 2.5355083581911314e-05,
      "loss": 0.8387,
      "step": 465320
    },
    {
      "epoch": 1.6308656383728144,
      "grad_norm": 2.578125,
      "learning_rate": 2.5354434553247615e-05,
      "loss": 0.8345,
      "step": 465330
    },
    {
      "epoch": 1.63090068587971,
      "grad_norm": 3.234375,
      "learning_rate": 2.5353785524583906e-05,
      "loss": 0.8253,
      "step": 465340
    },
    {
      "epoch": 1.6309357333866057,
      "grad_norm": 2.90625,
      "learning_rate": 2.5353136495920204e-05,
      "loss": 0.7993,
      "step": 465350
    },
    {
      "epoch": 1.630970780893501,
      "grad_norm": 2.53125,
      "learning_rate": 2.5352487467256502e-05,
      "loss": 0.8141,
      "step": 465360
    },
    {
      "epoch": 1.6310058284003968,
      "grad_norm": 2.6875,
      "learning_rate": 2.53518384385928e-05,
      "loss": 0.8051,
      "step": 465370
    },
    {
      "epoch": 1.6310408759072923,
      "grad_norm": 3.140625,
      "learning_rate": 2.5351189409929098e-05,
      "loss": 0.8133,
      "step": 465380
    },
    {
      "epoch": 1.6310759234141878,
      "grad_norm": 2.625,
      "learning_rate": 2.5350540381265396e-05,
      "loss": 0.8513,
      "step": 465390
    },
    {
      "epoch": 1.6311109709210836,
      "grad_norm": 2.75,
      "learning_rate": 2.5349891352601694e-05,
      "loss": 0.8413,
      "step": 465400
    },
    {
      "epoch": 1.6311460184279791,
      "grad_norm": 2.765625,
      "learning_rate": 2.5349242323937992e-05,
      "loss": 0.8307,
      "step": 465410
    },
    {
      "epoch": 1.6311810659348747,
      "grad_norm": 2.546875,
      "learning_rate": 2.5348593295274294e-05,
      "loss": 0.7446,
      "step": 465420
    },
    {
      "epoch": 1.6312161134417704,
      "grad_norm": 2.75,
      "learning_rate": 2.534794426661059e-05,
      "loss": 0.8262,
      "step": 465430
    },
    {
      "epoch": 1.631251160948666,
      "grad_norm": 3.03125,
      "learning_rate": 2.534729523794689e-05,
      "loss": 0.8429,
      "step": 465440
    },
    {
      "epoch": 1.6312862084555615,
      "grad_norm": 3.046875,
      "learning_rate": 2.5346646209283188e-05,
      "loss": 0.7931,
      "step": 465450
    },
    {
      "epoch": 1.6313212559624573,
      "grad_norm": 2.96875,
      "learning_rate": 2.5345997180619486e-05,
      "loss": 0.8482,
      "step": 465460
    },
    {
      "epoch": 1.6313563034693526,
      "grad_norm": 2.8125,
      "learning_rate": 2.5345348151955784e-05,
      "loss": 0.8145,
      "step": 465470
    },
    {
      "epoch": 1.6313913509762483,
      "grad_norm": 3.21875,
      "learning_rate": 2.534469912329208e-05,
      "loss": 0.8113,
      "step": 465480
    },
    {
      "epoch": 1.6314263984831439,
      "grad_norm": 3.203125,
      "learning_rate": 2.534405009462838e-05,
      "loss": 0.9353,
      "step": 465490
    },
    {
      "epoch": 1.6314614459900394,
      "grad_norm": 2.625,
      "learning_rate": 2.5343401065964678e-05,
      "loss": 0.8943,
      "step": 465500
    },
    {
      "epoch": 1.6314964934969352,
      "grad_norm": 2.828125,
      "learning_rate": 2.5342752037300976e-05,
      "loss": 0.8126,
      "step": 465510
    },
    {
      "epoch": 1.6315315410038307,
      "grad_norm": 2.921875,
      "learning_rate": 2.5342103008637274e-05,
      "loss": 0.8412,
      "step": 465520
    },
    {
      "epoch": 1.6315665885107262,
      "grad_norm": 3.4375,
      "learning_rate": 2.534145397997357e-05,
      "loss": 0.8538,
      "step": 465530
    },
    {
      "epoch": 1.631601636017622,
      "grad_norm": 3.328125,
      "learning_rate": 2.534080495130987e-05,
      "loss": 0.8639,
      "step": 465540
    },
    {
      "epoch": 1.6316366835245175,
      "grad_norm": 2.515625,
      "learning_rate": 2.534015592264617e-05,
      "loss": 0.8434,
      "step": 465550
    },
    {
      "epoch": 1.631671731031413,
      "grad_norm": 2.796875,
      "learning_rate": 2.533950689398247e-05,
      "loss": 0.7975,
      "step": 465560
    },
    {
      "epoch": 1.6317067785383088,
      "grad_norm": 3.03125,
      "learning_rate": 2.5338857865318767e-05,
      "loss": 0.8968,
      "step": 465570
    },
    {
      "epoch": 1.6317418260452041,
      "grad_norm": 2.375,
      "learning_rate": 2.5338208836655065e-05,
      "loss": 0.8913,
      "step": 465580
    },
    {
      "epoch": 1.6317768735520999,
      "grad_norm": 2.796875,
      "learning_rate": 2.5337559807991363e-05,
      "loss": 0.8556,
      "step": 465590
    },
    {
      "epoch": 1.6318119210589954,
      "grad_norm": 3.28125,
      "learning_rate": 2.533691077932766e-05,
      "loss": 0.9123,
      "step": 465600
    },
    {
      "epoch": 1.631846968565891,
      "grad_norm": 2.40625,
      "learning_rate": 2.533626175066396e-05,
      "loss": 0.8576,
      "step": 465610
    },
    {
      "epoch": 1.6318820160727867,
      "grad_norm": 2.65625,
      "learning_rate": 2.5335612722000257e-05,
      "loss": 0.8567,
      "step": 465620
    },
    {
      "epoch": 1.6319170635796822,
      "grad_norm": 3.203125,
      "learning_rate": 2.5334963693336555e-05,
      "loss": 0.832,
      "step": 465630
    },
    {
      "epoch": 1.6319521110865778,
      "grad_norm": 2.71875,
      "learning_rate": 2.5334314664672853e-05,
      "loss": 0.84,
      "step": 465640
    },
    {
      "epoch": 1.6319871585934735,
      "grad_norm": 3.328125,
      "learning_rate": 2.533366563600915e-05,
      "loss": 0.7463,
      "step": 465650
    },
    {
      "epoch": 1.632022206100369,
      "grad_norm": 2.703125,
      "learning_rate": 2.533301660734545e-05,
      "loss": 0.7983,
      "step": 465660
    },
    {
      "epoch": 1.6320572536072646,
      "grad_norm": 2.734375,
      "learning_rate": 2.5332367578681747e-05,
      "loss": 0.7349,
      "step": 465670
    },
    {
      "epoch": 1.6320923011141604,
      "grad_norm": 2.96875,
      "learning_rate": 2.5331718550018045e-05,
      "loss": 0.9177,
      "step": 465680
    },
    {
      "epoch": 1.6321273486210557,
      "grad_norm": 2.59375,
      "learning_rate": 2.5331069521354346e-05,
      "loss": 0.7639,
      "step": 465690
    },
    {
      "epoch": 1.6321623961279514,
      "grad_norm": 2.9375,
      "learning_rate": 2.5330420492690644e-05,
      "loss": 0.7286,
      "step": 465700
    },
    {
      "epoch": 1.6321974436348472,
      "grad_norm": 2.875,
      "learning_rate": 2.5329771464026942e-05,
      "loss": 0.911,
      "step": 465710
    },
    {
      "epoch": 1.6322324911417425,
      "grad_norm": 2.78125,
      "learning_rate": 2.5329122435363234e-05,
      "loss": 0.7917,
      "step": 465720
    },
    {
      "epoch": 1.6322675386486383,
      "grad_norm": 2.28125,
      "learning_rate": 2.532847340669953e-05,
      "loss": 0.8569,
      "step": 465730
    },
    {
      "epoch": 1.6323025861555338,
      "grad_norm": 3.03125,
      "learning_rate": 2.532782437803583e-05,
      "loss": 0.8996,
      "step": 465740
    },
    {
      "epoch": 1.6323376336624293,
      "grad_norm": 2.65625,
      "learning_rate": 2.5327175349372128e-05,
      "loss": 0.8486,
      "step": 465750
    },
    {
      "epoch": 1.632372681169325,
      "grad_norm": 2.875,
      "learning_rate": 2.5326526320708426e-05,
      "loss": 0.797,
      "step": 465760
    },
    {
      "epoch": 1.6324077286762206,
      "grad_norm": 2.625,
      "learning_rate": 2.5325877292044724e-05,
      "loss": 0.8101,
      "step": 465770
    },
    {
      "epoch": 1.6324427761831162,
      "grad_norm": 2.828125,
      "learning_rate": 2.5325228263381025e-05,
      "loss": 0.8077,
      "step": 465780
    },
    {
      "epoch": 1.632477823690012,
      "grad_norm": 2.78125,
      "learning_rate": 2.5324579234717323e-05,
      "loss": 0.8842,
      "step": 465790
    },
    {
      "epoch": 1.6325128711969072,
      "grad_norm": 2.625,
      "learning_rate": 2.532393020605362e-05,
      "loss": 0.9386,
      "step": 465800
    },
    {
      "epoch": 1.632547918703803,
      "grad_norm": 2.9375,
      "learning_rate": 2.532328117738992e-05,
      "loss": 0.8859,
      "step": 465810
    },
    {
      "epoch": 1.6325829662106988,
      "grad_norm": 2.90625,
      "learning_rate": 2.5322632148726217e-05,
      "loss": 0.8272,
      "step": 465820
    },
    {
      "epoch": 1.632618013717594,
      "grad_norm": 2.828125,
      "learning_rate": 2.5321983120062515e-05,
      "loss": 0.8179,
      "step": 465830
    },
    {
      "epoch": 1.6326530612244898,
      "grad_norm": 3.21875,
      "learning_rate": 2.5321334091398813e-05,
      "loss": 0.8037,
      "step": 465840
    },
    {
      "epoch": 1.6326881087313854,
      "grad_norm": 2.953125,
      "learning_rate": 2.532068506273511e-05,
      "loss": 0.8607,
      "step": 465850
    },
    {
      "epoch": 1.632723156238281,
      "grad_norm": 3.125,
      "learning_rate": 2.532003603407141e-05,
      "loss": 0.8646,
      "step": 465860
    },
    {
      "epoch": 1.6327582037451767,
      "grad_norm": 2.890625,
      "learning_rate": 2.5319387005407707e-05,
      "loss": 0.7856,
      "step": 465870
    },
    {
      "epoch": 1.6327932512520722,
      "grad_norm": 2.71875,
      "learning_rate": 2.5318737976744005e-05,
      "loss": 0.8942,
      "step": 465880
    },
    {
      "epoch": 1.6328282987589677,
      "grad_norm": 2.9375,
      "learning_rate": 2.5318088948080303e-05,
      "loss": 0.8627,
      "step": 465890
    },
    {
      "epoch": 1.6328633462658635,
      "grad_norm": 3.15625,
      "learning_rate": 2.53174399194166e-05,
      "loss": 0.8106,
      "step": 465900
    },
    {
      "epoch": 1.632898393772759,
      "grad_norm": 2.671875,
      "learning_rate": 2.53167908907529e-05,
      "loss": 0.7829,
      "step": 465910
    },
    {
      "epoch": 1.6329334412796546,
      "grad_norm": 2.90625,
      "learning_rate": 2.53161418620892e-05,
      "loss": 0.7945,
      "step": 465920
    },
    {
      "epoch": 1.6329684887865503,
      "grad_norm": 2.890625,
      "learning_rate": 2.53154928334255e-05,
      "loss": 0.8823,
      "step": 465930
    },
    {
      "epoch": 1.6330035362934456,
      "grad_norm": 3.078125,
      "learning_rate": 2.5314843804761796e-05,
      "loss": 0.8166,
      "step": 465940
    },
    {
      "epoch": 1.6330385838003414,
      "grad_norm": 3.03125,
      "learning_rate": 2.5314194776098094e-05,
      "loss": 0.8279,
      "step": 465950
    },
    {
      "epoch": 1.633073631307237,
      "grad_norm": 2.546875,
      "learning_rate": 2.5313545747434392e-05,
      "loss": 0.902,
      "step": 465960
    },
    {
      "epoch": 1.6331086788141325,
      "grad_norm": 2.921875,
      "learning_rate": 2.531289671877069e-05,
      "loss": 0.8949,
      "step": 465970
    },
    {
      "epoch": 1.6331437263210282,
      "grad_norm": 2.953125,
      "learning_rate": 2.531224769010699e-05,
      "loss": 0.7844,
      "step": 465980
    },
    {
      "epoch": 1.6331787738279238,
      "grad_norm": 2.5625,
      "learning_rate": 2.5311598661443286e-05,
      "loss": 0.7451,
      "step": 465990
    },
    {
      "epoch": 1.6332138213348193,
      "grad_norm": 3.109375,
      "learning_rate": 2.5310949632779584e-05,
      "loss": 0.742,
      "step": 466000
    },
    {
      "epoch": 1.633248868841715,
      "grad_norm": 3.234375,
      "learning_rate": 2.5310300604115882e-05,
      "loss": 0.9558,
      "step": 466010
    },
    {
      "epoch": 1.6332839163486106,
      "grad_norm": 3.4375,
      "learning_rate": 2.530965157545218e-05,
      "loss": 0.9961,
      "step": 466020
    },
    {
      "epoch": 1.6333189638555061,
      "grad_norm": 2.671875,
      "learning_rate": 2.530900254678848e-05,
      "loss": 0.8489,
      "step": 466030
    },
    {
      "epoch": 1.6333540113624019,
      "grad_norm": 2.40625,
      "learning_rate": 2.5308353518124776e-05,
      "loss": 0.7903,
      "step": 466040
    },
    {
      "epoch": 1.6333890588692972,
      "grad_norm": 2.546875,
      "learning_rate": 2.5307704489461074e-05,
      "loss": 0.8191,
      "step": 466050
    },
    {
      "epoch": 1.633424106376193,
      "grad_norm": 2.578125,
      "learning_rate": 2.5307055460797376e-05,
      "loss": 0.8104,
      "step": 466060
    },
    {
      "epoch": 1.6334591538830885,
      "grad_norm": 2.71875,
      "learning_rate": 2.5306406432133674e-05,
      "loss": 0.8798,
      "step": 466070
    },
    {
      "epoch": 1.633494201389984,
      "grad_norm": 2.8125,
      "learning_rate": 2.5305757403469972e-05,
      "loss": 0.8012,
      "step": 466080
    },
    {
      "epoch": 1.6335292488968798,
      "grad_norm": 2.5,
      "learning_rate": 2.5305108374806263e-05,
      "loss": 0.7897,
      "step": 466090
    },
    {
      "epoch": 1.6335642964037753,
      "grad_norm": 2.734375,
      "learning_rate": 2.530445934614256e-05,
      "loss": 0.7858,
      "step": 466100
    },
    {
      "epoch": 1.6335993439106709,
      "grad_norm": 2.84375,
      "learning_rate": 2.530381031747886e-05,
      "loss": 0.8258,
      "step": 466110
    },
    {
      "epoch": 1.6336343914175666,
      "grad_norm": 3.15625,
      "learning_rate": 2.5303161288815157e-05,
      "loss": 0.8125,
      "step": 466120
    },
    {
      "epoch": 1.6336694389244621,
      "grad_norm": 2.65625,
      "learning_rate": 2.5302512260151455e-05,
      "loss": 0.8113,
      "step": 466130
    },
    {
      "epoch": 1.6337044864313577,
      "grad_norm": 2.78125,
      "learning_rate": 2.5301863231487753e-05,
      "loss": 0.841,
      "step": 466140
    },
    {
      "epoch": 1.6337395339382534,
      "grad_norm": 3.046875,
      "learning_rate": 2.5301214202824054e-05,
      "loss": 0.7918,
      "step": 466150
    },
    {
      "epoch": 1.6337745814451488,
      "grad_norm": 2.859375,
      "learning_rate": 2.5300565174160352e-05,
      "loss": 0.8331,
      "step": 466160
    },
    {
      "epoch": 1.6338096289520445,
      "grad_norm": 2.875,
      "learning_rate": 2.529991614549665e-05,
      "loss": 0.8727,
      "step": 466170
    },
    {
      "epoch": 1.63384467645894,
      "grad_norm": 2.90625,
      "learning_rate": 2.529926711683295e-05,
      "loss": 0.8131,
      "step": 466180
    },
    {
      "epoch": 1.6338797239658356,
      "grad_norm": 2.75,
      "learning_rate": 2.5298618088169246e-05,
      "loss": 0.7956,
      "step": 466190
    },
    {
      "epoch": 1.6339147714727313,
      "grad_norm": 2.890625,
      "learning_rate": 2.5297969059505544e-05,
      "loss": 0.8261,
      "step": 466200
    },
    {
      "epoch": 1.6339498189796269,
      "grad_norm": 2.515625,
      "learning_rate": 2.5297320030841842e-05,
      "loss": 0.8254,
      "step": 466210
    },
    {
      "epoch": 1.6339848664865224,
      "grad_norm": 3.109375,
      "learning_rate": 2.529667100217814e-05,
      "loss": 0.8264,
      "step": 466220
    },
    {
      "epoch": 1.6340199139934182,
      "grad_norm": 5.3125,
      "learning_rate": 2.529602197351444e-05,
      "loss": 0.9829,
      "step": 466230
    },
    {
      "epoch": 1.6340549615003137,
      "grad_norm": 3.546875,
      "learning_rate": 2.5295372944850736e-05,
      "loss": 0.8593,
      "step": 466240
    },
    {
      "epoch": 1.6340900090072092,
      "grad_norm": 2.75,
      "learning_rate": 2.5294723916187034e-05,
      "loss": 0.8331,
      "step": 466250
    },
    {
      "epoch": 1.634125056514105,
      "grad_norm": 2.921875,
      "learning_rate": 2.5294074887523332e-05,
      "loss": 0.8827,
      "step": 466260
    },
    {
      "epoch": 1.6341601040210003,
      "grad_norm": 2.890625,
      "learning_rate": 2.529342585885963e-05,
      "loss": 0.8466,
      "step": 466270
    },
    {
      "epoch": 1.634195151527896,
      "grad_norm": 2.515625,
      "learning_rate": 2.529277683019593e-05,
      "loss": 0.9081,
      "step": 466280
    },
    {
      "epoch": 1.6342301990347916,
      "grad_norm": 3.203125,
      "learning_rate": 2.529212780153223e-05,
      "loss": 0.8493,
      "step": 466290
    },
    {
      "epoch": 1.6342652465416871,
      "grad_norm": 2.96875,
      "learning_rate": 2.5291478772868528e-05,
      "loss": 0.7343,
      "step": 466300
    },
    {
      "epoch": 1.634300294048583,
      "grad_norm": 2.90625,
      "learning_rate": 2.5290829744204826e-05,
      "loss": 0.7534,
      "step": 466310
    },
    {
      "epoch": 1.6343353415554784,
      "grad_norm": 3.015625,
      "learning_rate": 2.5290180715541124e-05,
      "loss": 0.7648,
      "step": 466320
    },
    {
      "epoch": 1.634370389062374,
      "grad_norm": 2.609375,
      "learning_rate": 2.5289531686877422e-05,
      "loss": 0.822,
      "step": 466330
    },
    {
      "epoch": 1.6344054365692697,
      "grad_norm": 3.453125,
      "learning_rate": 2.528888265821372e-05,
      "loss": 0.9015,
      "step": 466340
    },
    {
      "epoch": 1.6344404840761653,
      "grad_norm": 2.75,
      "learning_rate": 2.5288233629550018e-05,
      "loss": 0.796,
      "step": 466350
    },
    {
      "epoch": 1.6344755315830608,
      "grad_norm": 2.234375,
      "learning_rate": 2.5287584600886316e-05,
      "loss": 0.7833,
      "step": 466360
    },
    {
      "epoch": 1.6345105790899566,
      "grad_norm": 3.28125,
      "learning_rate": 2.5286935572222614e-05,
      "loss": 0.9125,
      "step": 466370
    },
    {
      "epoch": 1.6345456265968519,
      "grad_norm": 3.046875,
      "learning_rate": 2.5286286543558912e-05,
      "loss": 0.8712,
      "step": 466380
    },
    {
      "epoch": 1.6345806741037476,
      "grad_norm": 2.953125,
      "learning_rate": 2.528563751489521e-05,
      "loss": 0.9413,
      "step": 466390
    },
    {
      "epoch": 1.6346157216106434,
      "grad_norm": 3.109375,
      "learning_rate": 2.5284988486231508e-05,
      "loss": 0.7574,
      "step": 466400
    },
    {
      "epoch": 1.6346507691175387,
      "grad_norm": 3.015625,
      "learning_rate": 2.5284339457567806e-05,
      "loss": 0.7479,
      "step": 466410
    },
    {
      "epoch": 1.6346858166244345,
      "grad_norm": 2.9375,
      "learning_rate": 2.5283690428904104e-05,
      "loss": 0.875,
      "step": 466420
    },
    {
      "epoch": 1.63472086413133,
      "grad_norm": 2.71875,
      "learning_rate": 2.5283041400240405e-05,
      "loss": 0.8076,
      "step": 466430
    },
    {
      "epoch": 1.6347559116382255,
      "grad_norm": 3.390625,
      "learning_rate": 2.5282392371576703e-05,
      "loss": 0.8651,
      "step": 466440
    },
    {
      "epoch": 1.6347909591451213,
      "grad_norm": 3.015625,
      "learning_rate": 2.5281743342913e-05,
      "loss": 0.8654,
      "step": 466450
    },
    {
      "epoch": 1.6348260066520168,
      "grad_norm": 2.859375,
      "learning_rate": 2.52810943142493e-05,
      "loss": 0.8398,
      "step": 466460
    },
    {
      "epoch": 1.6348610541589124,
      "grad_norm": 2.9375,
      "learning_rate": 2.528044528558559e-05,
      "loss": 0.9152,
      "step": 466470
    },
    {
      "epoch": 1.6348961016658081,
      "grad_norm": 3.015625,
      "learning_rate": 2.527979625692189e-05,
      "loss": 0.8801,
      "step": 466480
    },
    {
      "epoch": 1.6349311491727034,
      "grad_norm": 3.296875,
      "learning_rate": 2.5279147228258186e-05,
      "loss": 0.8933,
      "step": 466490
    },
    {
      "epoch": 1.6349661966795992,
      "grad_norm": 3.34375,
      "learning_rate": 2.5278498199594484e-05,
      "loss": 0.9225,
      "step": 466500
    },
    {
      "epoch": 1.635001244186495,
      "grad_norm": 2.9375,
      "learning_rate": 2.5277849170930786e-05,
      "loss": 0.8541,
      "step": 466510
    },
    {
      "epoch": 1.6350362916933903,
      "grad_norm": 3.015625,
      "learning_rate": 2.5277200142267084e-05,
      "loss": 0.8588,
      "step": 466520
    },
    {
      "epoch": 1.635071339200286,
      "grad_norm": 2.890625,
      "learning_rate": 2.5276551113603382e-05,
      "loss": 0.8154,
      "step": 466530
    },
    {
      "epoch": 1.6351063867071816,
      "grad_norm": 3.078125,
      "learning_rate": 2.527590208493968e-05,
      "loss": 0.808,
      "step": 466540
    },
    {
      "epoch": 1.635141434214077,
      "grad_norm": 2.890625,
      "learning_rate": 2.5275253056275978e-05,
      "loss": 0.8257,
      "step": 466550
    },
    {
      "epoch": 1.6351764817209729,
      "grad_norm": 2.6875,
      "learning_rate": 2.5274604027612276e-05,
      "loss": 0.7793,
      "step": 466560
    },
    {
      "epoch": 1.6352115292278684,
      "grad_norm": 3.234375,
      "learning_rate": 2.5273954998948574e-05,
      "loss": 0.8605,
      "step": 466570
    },
    {
      "epoch": 1.635246576734764,
      "grad_norm": 2.875,
      "learning_rate": 2.5273305970284872e-05,
      "loss": 0.814,
      "step": 466580
    },
    {
      "epoch": 1.6352816242416597,
      "grad_norm": 3.296875,
      "learning_rate": 2.527265694162117e-05,
      "loss": 0.8976,
      "step": 466590
    },
    {
      "epoch": 1.6353166717485552,
      "grad_norm": 2.84375,
      "learning_rate": 2.5272007912957468e-05,
      "loss": 0.8114,
      "step": 466600
    },
    {
      "epoch": 1.6353517192554508,
      "grad_norm": 2.484375,
      "learning_rate": 2.5271358884293766e-05,
      "loss": 0.8329,
      "step": 466610
    },
    {
      "epoch": 1.6353867667623465,
      "grad_norm": 2.75,
      "learning_rate": 2.5270709855630064e-05,
      "loss": 0.8508,
      "step": 466620
    },
    {
      "epoch": 1.6354218142692418,
      "grad_norm": 2.6875,
      "learning_rate": 2.5270060826966362e-05,
      "loss": 0.8657,
      "step": 466630
    },
    {
      "epoch": 1.6354568617761376,
      "grad_norm": 2.34375,
      "learning_rate": 2.526941179830266e-05,
      "loss": 0.8031,
      "step": 466640
    },
    {
      "epoch": 1.6354919092830331,
      "grad_norm": 2.8125,
      "learning_rate": 2.526876276963896e-05,
      "loss": 0.842,
      "step": 466650
    },
    {
      "epoch": 1.6355269567899287,
      "grad_norm": 2.984375,
      "learning_rate": 2.526811374097526e-05,
      "loss": 0.8127,
      "step": 466660
    },
    {
      "epoch": 1.6355620042968244,
      "grad_norm": 3.015625,
      "learning_rate": 2.5267464712311557e-05,
      "loss": 0.8761,
      "step": 466670
    },
    {
      "epoch": 1.63559705180372,
      "grad_norm": 3.296875,
      "learning_rate": 2.5266815683647855e-05,
      "loss": 0.859,
      "step": 466680
    },
    {
      "epoch": 1.6356320993106155,
      "grad_norm": 2.875,
      "learning_rate": 2.5266166654984153e-05,
      "loss": 0.8181,
      "step": 466690
    },
    {
      "epoch": 1.6356671468175112,
      "grad_norm": 2.703125,
      "learning_rate": 2.526551762632045e-05,
      "loss": 0.8587,
      "step": 466700
    },
    {
      "epoch": 1.6357021943244068,
      "grad_norm": 2.9375,
      "learning_rate": 2.526486859765675e-05,
      "loss": 0.7937,
      "step": 466710
    },
    {
      "epoch": 1.6357372418313023,
      "grad_norm": 2.90625,
      "learning_rate": 2.5264219568993047e-05,
      "loss": 0.8587,
      "step": 466720
    },
    {
      "epoch": 1.635772289338198,
      "grad_norm": 2.59375,
      "learning_rate": 2.5263570540329345e-05,
      "loss": 0.8505,
      "step": 466730
    },
    {
      "epoch": 1.6358073368450934,
      "grad_norm": 2.5625,
      "learning_rate": 2.5262921511665643e-05,
      "loss": 0.8271,
      "step": 466740
    },
    {
      "epoch": 1.6358423843519891,
      "grad_norm": 2.546875,
      "learning_rate": 2.526227248300194e-05,
      "loss": 0.8293,
      "step": 466750
    },
    {
      "epoch": 1.6358774318588847,
      "grad_norm": 2.375,
      "learning_rate": 2.526162345433824e-05,
      "loss": 0.7639,
      "step": 466760
    },
    {
      "epoch": 1.6359124793657802,
      "grad_norm": 2.859375,
      "learning_rate": 2.5260974425674537e-05,
      "loss": 0.8433,
      "step": 466770
    },
    {
      "epoch": 1.635947526872676,
      "grad_norm": 3.171875,
      "learning_rate": 2.5260325397010835e-05,
      "loss": 0.8355,
      "step": 466780
    },
    {
      "epoch": 1.6359825743795715,
      "grad_norm": 2.984375,
      "learning_rate": 2.5259676368347137e-05,
      "loss": 0.9153,
      "step": 466790
    },
    {
      "epoch": 1.636017621886467,
      "grad_norm": 2.875,
      "learning_rate": 2.5259027339683435e-05,
      "loss": 0.8367,
      "step": 466800
    },
    {
      "epoch": 1.6360526693933628,
      "grad_norm": 2.859375,
      "learning_rate": 2.5258378311019733e-05,
      "loss": 0.913,
      "step": 466810
    },
    {
      "epoch": 1.6360877169002583,
      "grad_norm": 3.109375,
      "learning_rate": 2.525772928235603e-05,
      "loss": 0.8517,
      "step": 466820
    },
    {
      "epoch": 1.6361227644071539,
      "grad_norm": 2.546875,
      "learning_rate": 2.525708025369233e-05,
      "loss": 0.8046,
      "step": 466830
    },
    {
      "epoch": 1.6361578119140496,
      "grad_norm": 2.8125,
      "learning_rate": 2.5256431225028627e-05,
      "loss": 0.8678,
      "step": 466840
    },
    {
      "epoch": 1.636192859420945,
      "grad_norm": 2.765625,
      "learning_rate": 2.5255782196364918e-05,
      "loss": 0.7888,
      "step": 466850
    },
    {
      "epoch": 1.6362279069278407,
      "grad_norm": 3.078125,
      "learning_rate": 2.5255133167701216e-05,
      "loss": 0.8501,
      "step": 466860
    },
    {
      "epoch": 1.6362629544347362,
      "grad_norm": 3.0,
      "learning_rate": 2.5254484139037514e-05,
      "loss": 0.7467,
      "step": 466870
    },
    {
      "epoch": 1.6362980019416318,
      "grad_norm": 2.96875,
      "learning_rate": 2.5253835110373815e-05,
      "loss": 0.8118,
      "step": 466880
    },
    {
      "epoch": 1.6363330494485275,
      "grad_norm": 2.671875,
      "learning_rate": 2.5253186081710113e-05,
      "loss": 0.8444,
      "step": 466890
    },
    {
      "epoch": 1.636368096955423,
      "grad_norm": 3.109375,
      "learning_rate": 2.525253705304641e-05,
      "loss": 0.8065,
      "step": 466900
    },
    {
      "epoch": 1.6364031444623186,
      "grad_norm": 3.078125,
      "learning_rate": 2.525188802438271e-05,
      "loss": 0.8446,
      "step": 466910
    },
    {
      "epoch": 1.6364381919692144,
      "grad_norm": 2.921875,
      "learning_rate": 2.5251238995719007e-05,
      "loss": 0.8479,
      "step": 466920
    },
    {
      "epoch": 1.63647323947611,
      "grad_norm": 2.640625,
      "learning_rate": 2.5250589967055305e-05,
      "loss": 0.906,
      "step": 466930
    },
    {
      "epoch": 1.6365082869830054,
      "grad_norm": 2.625,
      "learning_rate": 2.5249940938391603e-05,
      "loss": 0.7913,
      "step": 466940
    },
    {
      "epoch": 1.6365433344899012,
      "grad_norm": 2.875,
      "learning_rate": 2.52492919097279e-05,
      "loss": 0.843,
      "step": 466950
    },
    {
      "epoch": 1.6365783819967965,
      "grad_norm": 3.203125,
      "learning_rate": 2.52486428810642e-05,
      "loss": 0.8643,
      "step": 466960
    },
    {
      "epoch": 1.6366134295036923,
      "grad_norm": 2.90625,
      "learning_rate": 2.5247993852400497e-05,
      "loss": 0.8854,
      "step": 466970
    },
    {
      "epoch": 1.6366484770105878,
      "grad_norm": 3.3125,
      "learning_rate": 2.5247344823736795e-05,
      "loss": 0.9043,
      "step": 466980
    },
    {
      "epoch": 1.6366835245174833,
      "grad_norm": 3.28125,
      "learning_rate": 2.5246695795073093e-05,
      "loss": 0.8142,
      "step": 466990
    },
    {
      "epoch": 1.636718572024379,
      "grad_norm": 2.90625,
      "learning_rate": 2.524604676640939e-05,
      "loss": 0.7749,
      "step": 467000
    },
    {
      "epoch": 1.6367536195312746,
      "grad_norm": 2.5625,
      "learning_rate": 2.524539773774569e-05,
      "loss": 0.8575,
      "step": 467010
    },
    {
      "epoch": 1.6367886670381702,
      "grad_norm": 2.765625,
      "learning_rate": 2.524474870908199e-05,
      "loss": 0.7591,
      "step": 467020
    },
    {
      "epoch": 1.636823714545066,
      "grad_norm": 2.8125,
      "learning_rate": 2.524409968041829e-05,
      "loss": 0.8475,
      "step": 467030
    },
    {
      "epoch": 1.6368587620519615,
      "grad_norm": 3.09375,
      "learning_rate": 2.5243450651754587e-05,
      "loss": 0.7791,
      "step": 467040
    },
    {
      "epoch": 1.636893809558857,
      "grad_norm": 2.78125,
      "learning_rate": 2.5242801623090885e-05,
      "loss": 0.8372,
      "step": 467050
    },
    {
      "epoch": 1.6369288570657528,
      "grad_norm": 3.171875,
      "learning_rate": 2.5242152594427183e-05,
      "loss": 0.8556,
      "step": 467060
    },
    {
      "epoch": 1.636963904572648,
      "grad_norm": 3.140625,
      "learning_rate": 2.524150356576348e-05,
      "loss": 0.9163,
      "step": 467070
    },
    {
      "epoch": 1.6369989520795438,
      "grad_norm": 2.59375,
      "learning_rate": 2.524085453709978e-05,
      "loss": 0.8626,
      "step": 467080
    },
    {
      "epoch": 1.6370339995864396,
      "grad_norm": 2.71875,
      "learning_rate": 2.5240205508436077e-05,
      "loss": 0.8156,
      "step": 467090
    },
    {
      "epoch": 1.637069047093335,
      "grad_norm": 3.25,
      "learning_rate": 2.5239556479772375e-05,
      "loss": 0.8795,
      "step": 467100
    },
    {
      "epoch": 1.6371040946002307,
      "grad_norm": 3.265625,
      "learning_rate": 2.5238907451108673e-05,
      "loss": 0.8368,
      "step": 467110
    },
    {
      "epoch": 1.6371391421071262,
      "grad_norm": 3.25,
      "learning_rate": 2.523825842244497e-05,
      "loss": 0.8037,
      "step": 467120
    },
    {
      "epoch": 1.6371741896140217,
      "grad_norm": 3.203125,
      "learning_rate": 2.523760939378127e-05,
      "loss": 0.8891,
      "step": 467130
    },
    {
      "epoch": 1.6372092371209175,
      "grad_norm": 2.84375,
      "learning_rate": 2.5236960365117567e-05,
      "loss": 0.874,
      "step": 467140
    },
    {
      "epoch": 1.637244284627813,
      "grad_norm": 3.203125,
      "learning_rate": 2.5236311336453865e-05,
      "loss": 0.7594,
      "step": 467150
    },
    {
      "epoch": 1.6372793321347086,
      "grad_norm": 2.96875,
      "learning_rate": 2.5235662307790166e-05,
      "loss": 0.7959,
      "step": 467160
    },
    {
      "epoch": 1.6373143796416043,
      "grad_norm": 3.0625,
      "learning_rate": 2.5235013279126464e-05,
      "loss": 0.7931,
      "step": 467170
    },
    {
      "epoch": 1.6373494271484996,
      "grad_norm": 3.1875,
      "learning_rate": 2.5234364250462762e-05,
      "loss": 0.9097,
      "step": 467180
    },
    {
      "epoch": 1.6373844746553954,
      "grad_norm": 2.796875,
      "learning_rate": 2.523371522179906e-05,
      "loss": 0.9034,
      "step": 467190
    },
    {
      "epoch": 1.6374195221622911,
      "grad_norm": 2.84375,
      "learning_rate": 2.5233066193135358e-05,
      "loss": 0.7859,
      "step": 467200
    },
    {
      "epoch": 1.6374545696691865,
      "grad_norm": 3.21875,
      "learning_rate": 2.5232417164471656e-05,
      "loss": 0.8297,
      "step": 467210
    },
    {
      "epoch": 1.6374896171760822,
      "grad_norm": 2.609375,
      "learning_rate": 2.5231768135807947e-05,
      "loss": 0.8667,
      "step": 467220
    },
    {
      "epoch": 1.6375246646829777,
      "grad_norm": 3.109375,
      "learning_rate": 2.5231119107144245e-05,
      "loss": 0.8344,
      "step": 467230
    },
    {
      "epoch": 1.6375597121898733,
      "grad_norm": 3.140625,
      "learning_rate": 2.5230470078480543e-05,
      "loss": 0.8117,
      "step": 467240
    },
    {
      "epoch": 1.637594759696769,
      "grad_norm": 3.0,
      "learning_rate": 2.5229821049816845e-05,
      "loss": 0.8234,
      "step": 467250
    },
    {
      "epoch": 1.6376298072036646,
      "grad_norm": 3.515625,
      "learning_rate": 2.5229172021153143e-05,
      "loss": 0.8534,
      "step": 467260
    },
    {
      "epoch": 1.6376648547105601,
      "grad_norm": 3.21875,
      "learning_rate": 2.522852299248944e-05,
      "loss": 1.0012,
      "step": 467270
    },
    {
      "epoch": 1.6376999022174559,
      "grad_norm": 3.25,
      "learning_rate": 2.522787396382574e-05,
      "loss": 0.8283,
      "step": 467280
    },
    {
      "epoch": 1.6377349497243514,
      "grad_norm": 3.046875,
      "learning_rate": 2.5227224935162037e-05,
      "loss": 0.8676,
      "step": 467290
    },
    {
      "epoch": 1.637769997231247,
      "grad_norm": 3.0625,
      "learning_rate": 2.5226575906498335e-05,
      "loss": 0.8503,
      "step": 467300
    },
    {
      "epoch": 1.6378050447381427,
      "grad_norm": 2.765625,
      "learning_rate": 2.5225926877834633e-05,
      "loss": 0.8207,
      "step": 467310
    },
    {
      "epoch": 1.637840092245038,
      "grad_norm": 2.90625,
      "learning_rate": 2.522527784917093e-05,
      "loss": 0.8955,
      "step": 467320
    },
    {
      "epoch": 1.6378751397519338,
      "grad_norm": 2.96875,
      "learning_rate": 2.522462882050723e-05,
      "loss": 0.7556,
      "step": 467330
    },
    {
      "epoch": 1.6379101872588293,
      "grad_norm": 2.75,
      "learning_rate": 2.5223979791843527e-05,
      "loss": 0.9013,
      "step": 467340
    },
    {
      "epoch": 1.6379452347657248,
      "grad_norm": 3.3125,
      "learning_rate": 2.5223330763179825e-05,
      "loss": 0.8007,
      "step": 467350
    },
    {
      "epoch": 1.6379802822726206,
      "grad_norm": 2.765625,
      "learning_rate": 2.5222681734516123e-05,
      "loss": 0.8465,
      "step": 467360
    },
    {
      "epoch": 1.6380153297795161,
      "grad_norm": 2.9375,
      "learning_rate": 2.522203270585242e-05,
      "loss": 0.8174,
      "step": 467370
    },
    {
      "epoch": 1.6380503772864117,
      "grad_norm": 3.015625,
      "learning_rate": 2.522138367718872e-05,
      "loss": 0.9344,
      "step": 467380
    },
    {
      "epoch": 1.6380854247933074,
      "grad_norm": 2.546875,
      "learning_rate": 2.522073464852502e-05,
      "loss": 0.8038,
      "step": 467390
    },
    {
      "epoch": 1.638120472300203,
      "grad_norm": 2.953125,
      "learning_rate": 2.5220085619861318e-05,
      "loss": 0.8341,
      "step": 467400
    },
    {
      "epoch": 1.6381555198070985,
      "grad_norm": 3.375,
      "learning_rate": 2.5219436591197616e-05,
      "loss": 0.8493,
      "step": 467410
    },
    {
      "epoch": 1.6381905673139943,
      "grad_norm": 3.03125,
      "learning_rate": 2.5218787562533914e-05,
      "loss": 0.838,
      "step": 467420
    },
    {
      "epoch": 1.6382256148208896,
      "grad_norm": 3.140625,
      "learning_rate": 2.5218138533870212e-05,
      "loss": 0.8222,
      "step": 467430
    },
    {
      "epoch": 1.6382606623277853,
      "grad_norm": 2.953125,
      "learning_rate": 2.521748950520651e-05,
      "loss": 0.847,
      "step": 467440
    },
    {
      "epoch": 1.6382957098346809,
      "grad_norm": 2.8125,
      "learning_rate": 2.5216840476542808e-05,
      "loss": 0.8733,
      "step": 467450
    },
    {
      "epoch": 1.6383307573415764,
      "grad_norm": 3.171875,
      "learning_rate": 2.5216191447879106e-05,
      "loss": 0.8306,
      "step": 467460
    },
    {
      "epoch": 1.6383658048484722,
      "grad_norm": 3.484375,
      "learning_rate": 2.5215542419215404e-05,
      "loss": 0.8414,
      "step": 467470
    },
    {
      "epoch": 1.6384008523553677,
      "grad_norm": 3.03125,
      "learning_rate": 2.5214893390551702e-05,
      "loss": 0.8037,
      "step": 467480
    },
    {
      "epoch": 1.6384358998622632,
      "grad_norm": 2.890625,
      "learning_rate": 2.5214244361888e-05,
      "loss": 0.8876,
      "step": 467490
    },
    {
      "epoch": 1.638470947369159,
      "grad_norm": 3.140625,
      "learning_rate": 2.5213595333224298e-05,
      "loss": 0.8671,
      "step": 467500
    },
    {
      "epoch": 1.6385059948760545,
      "grad_norm": 2.84375,
      "learning_rate": 2.5212946304560596e-05,
      "loss": 0.8303,
      "step": 467510
    },
    {
      "epoch": 1.63854104238295,
      "grad_norm": 3.125,
      "learning_rate": 2.5212297275896894e-05,
      "loss": 0.9358,
      "step": 467520
    },
    {
      "epoch": 1.6385760898898458,
      "grad_norm": 3.265625,
      "learning_rate": 2.5211648247233195e-05,
      "loss": 0.8537,
      "step": 467530
    },
    {
      "epoch": 1.6386111373967411,
      "grad_norm": 2.78125,
      "learning_rate": 2.5210999218569493e-05,
      "loss": 0.7727,
      "step": 467540
    },
    {
      "epoch": 1.638646184903637,
      "grad_norm": 2.734375,
      "learning_rate": 2.521035018990579e-05,
      "loss": 0.7585,
      "step": 467550
    },
    {
      "epoch": 1.6386812324105324,
      "grad_norm": 3.421875,
      "learning_rate": 2.520970116124209e-05,
      "loss": 0.8971,
      "step": 467560
    },
    {
      "epoch": 1.638716279917428,
      "grad_norm": 2.640625,
      "learning_rate": 2.5209052132578387e-05,
      "loss": 0.8428,
      "step": 467570
    },
    {
      "epoch": 1.6387513274243237,
      "grad_norm": 2.765625,
      "learning_rate": 2.5208403103914685e-05,
      "loss": 0.7909,
      "step": 467580
    },
    {
      "epoch": 1.6387863749312193,
      "grad_norm": 2.953125,
      "learning_rate": 2.5207754075250983e-05,
      "loss": 0.87,
      "step": 467590
    },
    {
      "epoch": 1.6388214224381148,
      "grad_norm": 2.984375,
      "learning_rate": 2.5207105046587275e-05,
      "loss": 0.8358,
      "step": 467600
    },
    {
      "epoch": 1.6388564699450106,
      "grad_norm": 2.71875,
      "learning_rate": 2.5206456017923576e-05,
      "loss": 0.747,
      "step": 467610
    },
    {
      "epoch": 1.638891517451906,
      "grad_norm": 2.953125,
      "learning_rate": 2.5205806989259874e-05,
      "loss": 0.7638,
      "step": 467620
    },
    {
      "epoch": 1.6389265649588016,
      "grad_norm": 3.25,
      "learning_rate": 2.5205157960596172e-05,
      "loss": 0.8404,
      "step": 467630
    },
    {
      "epoch": 1.6389616124656974,
      "grad_norm": 3.078125,
      "learning_rate": 2.520450893193247e-05,
      "loss": 0.8136,
      "step": 467640
    },
    {
      "epoch": 1.6389966599725927,
      "grad_norm": 2.921875,
      "learning_rate": 2.5203859903268768e-05,
      "loss": 0.8442,
      "step": 467650
    },
    {
      "epoch": 1.6390317074794885,
      "grad_norm": 3.4375,
      "learning_rate": 2.5203210874605066e-05,
      "loss": 0.823,
      "step": 467660
    },
    {
      "epoch": 1.639066754986384,
      "grad_norm": 2.421875,
      "learning_rate": 2.5202561845941364e-05,
      "loss": 0.8315,
      "step": 467670
    },
    {
      "epoch": 1.6391018024932795,
      "grad_norm": 2.734375,
      "learning_rate": 2.5201912817277662e-05,
      "loss": 0.7722,
      "step": 467680
    },
    {
      "epoch": 1.6391368500001753,
      "grad_norm": 2.859375,
      "learning_rate": 2.520126378861396e-05,
      "loss": 0.8767,
      "step": 467690
    },
    {
      "epoch": 1.6391718975070708,
      "grad_norm": 2.984375,
      "learning_rate": 2.5200614759950258e-05,
      "loss": 0.8034,
      "step": 467700
    },
    {
      "epoch": 1.6392069450139664,
      "grad_norm": 2.71875,
      "learning_rate": 2.5199965731286556e-05,
      "loss": 0.8222,
      "step": 467710
    },
    {
      "epoch": 1.6392419925208621,
      "grad_norm": 2.96875,
      "learning_rate": 2.5199316702622854e-05,
      "loss": 0.8812,
      "step": 467720
    },
    {
      "epoch": 1.6392770400277576,
      "grad_norm": 2.65625,
      "learning_rate": 2.5198667673959152e-05,
      "loss": 0.8732,
      "step": 467730
    },
    {
      "epoch": 1.6393120875346532,
      "grad_norm": 2.96875,
      "learning_rate": 2.519801864529545e-05,
      "loss": 0.9393,
      "step": 467740
    },
    {
      "epoch": 1.639347135041549,
      "grad_norm": 2.796875,
      "learning_rate": 2.519736961663175e-05,
      "loss": 0.8191,
      "step": 467750
    },
    {
      "epoch": 1.6393821825484443,
      "grad_norm": 3.390625,
      "learning_rate": 2.519672058796805e-05,
      "loss": 0.8745,
      "step": 467760
    },
    {
      "epoch": 1.63941723005534,
      "grad_norm": 3.078125,
      "learning_rate": 2.5196071559304347e-05,
      "loss": 0.7914,
      "step": 467770
    },
    {
      "epoch": 1.6394522775622358,
      "grad_norm": 3.25,
      "learning_rate": 2.5195422530640645e-05,
      "loss": 0.8585,
      "step": 467780
    },
    {
      "epoch": 1.639487325069131,
      "grad_norm": 3.203125,
      "learning_rate": 2.5194773501976943e-05,
      "loss": 0.7869,
      "step": 467790
    },
    {
      "epoch": 1.6395223725760268,
      "grad_norm": 2.90625,
      "learning_rate": 2.519412447331324e-05,
      "loss": 0.8258,
      "step": 467800
    },
    {
      "epoch": 1.6395574200829224,
      "grad_norm": 2.421875,
      "learning_rate": 2.519347544464954e-05,
      "loss": 0.865,
      "step": 467810
    },
    {
      "epoch": 1.639592467589818,
      "grad_norm": 2.625,
      "learning_rate": 2.5192826415985837e-05,
      "loss": 0.8214,
      "step": 467820
    },
    {
      "epoch": 1.6396275150967137,
      "grad_norm": 2.5625,
      "learning_rate": 2.5192177387322135e-05,
      "loss": 0.805,
      "step": 467830
    },
    {
      "epoch": 1.6396625626036092,
      "grad_norm": 2.59375,
      "learning_rate": 2.5191528358658433e-05,
      "loss": 0.803,
      "step": 467840
    },
    {
      "epoch": 1.6396976101105047,
      "grad_norm": 2.9375,
      "learning_rate": 2.519087932999473e-05,
      "loss": 0.8481,
      "step": 467850
    },
    {
      "epoch": 1.6397326576174005,
      "grad_norm": 2.390625,
      "learning_rate": 2.519023030133103e-05,
      "loss": 0.855,
      "step": 467860
    },
    {
      "epoch": 1.639767705124296,
      "grad_norm": 2.953125,
      "learning_rate": 2.5189581272667327e-05,
      "loss": 0.8094,
      "step": 467870
    },
    {
      "epoch": 1.6398027526311916,
      "grad_norm": 2.890625,
      "learning_rate": 2.5188932244003625e-05,
      "loss": 0.824,
      "step": 467880
    },
    {
      "epoch": 1.6398378001380873,
      "grad_norm": 3.03125,
      "learning_rate": 2.5188283215339927e-05,
      "loss": 0.8718,
      "step": 467890
    },
    {
      "epoch": 1.6398728476449826,
      "grad_norm": 2.71875,
      "learning_rate": 2.5187634186676225e-05,
      "loss": 0.7928,
      "step": 467900
    },
    {
      "epoch": 1.6399078951518784,
      "grad_norm": 3.015625,
      "learning_rate": 2.5186985158012523e-05,
      "loss": 0.8703,
      "step": 467910
    },
    {
      "epoch": 1.639942942658774,
      "grad_norm": 3.03125,
      "learning_rate": 2.518633612934882e-05,
      "loss": 0.9562,
      "step": 467920
    },
    {
      "epoch": 1.6399779901656695,
      "grad_norm": 2.9375,
      "learning_rate": 2.518568710068512e-05,
      "loss": 0.9435,
      "step": 467930
    },
    {
      "epoch": 1.6400130376725652,
      "grad_norm": 2.84375,
      "learning_rate": 2.5185038072021417e-05,
      "loss": 0.8861,
      "step": 467940
    },
    {
      "epoch": 1.6400480851794608,
      "grad_norm": 3.359375,
      "learning_rate": 2.5184389043357715e-05,
      "loss": 0.9337,
      "step": 467950
    },
    {
      "epoch": 1.6400831326863563,
      "grad_norm": 3.375,
      "learning_rate": 2.5183740014694013e-05,
      "loss": 0.7798,
      "step": 467960
    },
    {
      "epoch": 1.640118180193252,
      "grad_norm": 2.96875,
      "learning_rate": 2.5183090986030304e-05,
      "loss": 0.8828,
      "step": 467970
    },
    {
      "epoch": 1.6401532277001476,
      "grad_norm": 2.828125,
      "learning_rate": 2.5182441957366605e-05,
      "loss": 0.9235,
      "step": 467980
    },
    {
      "epoch": 1.6401882752070431,
      "grad_norm": 2.53125,
      "learning_rate": 2.5181792928702903e-05,
      "loss": 0.8312,
      "step": 467990
    },
    {
      "epoch": 1.640223322713939,
      "grad_norm": 3.140625,
      "learning_rate": 2.51811439000392e-05,
      "loss": 0.8436,
      "step": 468000
    },
    {
      "epoch": 1.6402583702208342,
      "grad_norm": 2.84375,
      "learning_rate": 2.51804948713755e-05,
      "loss": 0.8308,
      "step": 468010
    },
    {
      "epoch": 1.64029341772773,
      "grad_norm": 3.171875,
      "learning_rate": 2.5179845842711797e-05,
      "loss": 0.8752,
      "step": 468020
    },
    {
      "epoch": 1.6403284652346255,
      "grad_norm": 2.609375,
      "learning_rate": 2.5179196814048095e-05,
      "loss": 0.89,
      "step": 468030
    },
    {
      "epoch": 1.640363512741521,
      "grad_norm": 2.8125,
      "learning_rate": 2.5178547785384393e-05,
      "loss": 0.8532,
      "step": 468040
    },
    {
      "epoch": 1.6403985602484168,
      "grad_norm": 2.703125,
      "learning_rate": 2.517789875672069e-05,
      "loss": 0.7414,
      "step": 468050
    },
    {
      "epoch": 1.6404336077553123,
      "grad_norm": 2.765625,
      "learning_rate": 2.517724972805699e-05,
      "loss": 0.808,
      "step": 468060
    },
    {
      "epoch": 1.6404686552622079,
      "grad_norm": 3.234375,
      "learning_rate": 2.5176600699393287e-05,
      "loss": 0.8166,
      "step": 468070
    },
    {
      "epoch": 1.6405037027691036,
      "grad_norm": 2.734375,
      "learning_rate": 2.5175951670729585e-05,
      "loss": 0.804,
      "step": 468080
    },
    {
      "epoch": 1.6405387502759992,
      "grad_norm": 2.734375,
      "learning_rate": 2.5175302642065883e-05,
      "loss": 0.7759,
      "step": 468090
    },
    {
      "epoch": 1.6405737977828947,
      "grad_norm": 2.390625,
      "learning_rate": 2.517465361340218e-05,
      "loss": 0.7769,
      "step": 468100
    },
    {
      "epoch": 1.6406088452897905,
      "grad_norm": 3.234375,
      "learning_rate": 2.517400458473848e-05,
      "loss": 0.7724,
      "step": 468110
    },
    {
      "epoch": 1.6406438927966858,
      "grad_norm": 2.703125,
      "learning_rate": 2.517335555607478e-05,
      "loss": 0.7868,
      "step": 468120
    },
    {
      "epoch": 1.6406789403035815,
      "grad_norm": 3.015625,
      "learning_rate": 2.517270652741108e-05,
      "loss": 0.8297,
      "step": 468130
    },
    {
      "epoch": 1.640713987810477,
      "grad_norm": 2.875,
      "learning_rate": 2.5172057498747377e-05,
      "loss": 0.8493,
      "step": 468140
    },
    {
      "epoch": 1.6407490353173726,
      "grad_norm": 3.03125,
      "learning_rate": 2.5171408470083675e-05,
      "loss": 0.8412,
      "step": 468150
    },
    {
      "epoch": 1.6407840828242684,
      "grad_norm": 3.0625,
      "learning_rate": 2.5170759441419973e-05,
      "loss": 0.9106,
      "step": 468160
    },
    {
      "epoch": 1.6408191303311639,
      "grad_norm": 2.859375,
      "learning_rate": 2.517011041275627e-05,
      "loss": 0.7743,
      "step": 468170
    },
    {
      "epoch": 1.6408541778380594,
      "grad_norm": 2.90625,
      "learning_rate": 2.516946138409257e-05,
      "loss": 0.839,
      "step": 468180
    },
    {
      "epoch": 1.6408892253449552,
      "grad_norm": 2.640625,
      "learning_rate": 2.5168812355428867e-05,
      "loss": 0.8873,
      "step": 468190
    },
    {
      "epoch": 1.6409242728518507,
      "grad_norm": 2.859375,
      "learning_rate": 2.5168163326765165e-05,
      "loss": 0.7894,
      "step": 468200
    },
    {
      "epoch": 1.6409593203587463,
      "grad_norm": 2.96875,
      "learning_rate": 2.5167514298101463e-05,
      "loss": 0.8951,
      "step": 468210
    },
    {
      "epoch": 1.640994367865642,
      "grad_norm": 2.90625,
      "learning_rate": 2.516686526943776e-05,
      "loss": 0.8406,
      "step": 468220
    },
    {
      "epoch": 1.6410294153725373,
      "grad_norm": 2.828125,
      "learning_rate": 2.516621624077406e-05,
      "loss": 0.8137,
      "step": 468230
    },
    {
      "epoch": 1.641064462879433,
      "grad_norm": 2.734375,
      "learning_rate": 2.5165567212110357e-05,
      "loss": 0.8817,
      "step": 468240
    },
    {
      "epoch": 1.6410995103863286,
      "grad_norm": 2.609375,
      "learning_rate": 2.5164918183446655e-05,
      "loss": 0.8713,
      "step": 468250
    },
    {
      "epoch": 1.6411345578932242,
      "grad_norm": 3.1875,
      "learning_rate": 2.5164269154782956e-05,
      "loss": 0.8233,
      "step": 468260
    },
    {
      "epoch": 1.64116960540012,
      "grad_norm": 2.78125,
      "learning_rate": 2.5163620126119254e-05,
      "loss": 0.7943,
      "step": 468270
    },
    {
      "epoch": 1.6412046529070154,
      "grad_norm": 2.78125,
      "learning_rate": 2.5162971097455552e-05,
      "loss": 0.8216,
      "step": 468280
    },
    {
      "epoch": 1.641239700413911,
      "grad_norm": 3.28125,
      "learning_rate": 2.516232206879185e-05,
      "loss": 0.8443,
      "step": 468290
    },
    {
      "epoch": 1.6412747479208067,
      "grad_norm": 2.84375,
      "learning_rate": 2.5161673040128148e-05,
      "loss": 0.8476,
      "step": 468300
    },
    {
      "epoch": 1.6413097954277023,
      "grad_norm": 2.921875,
      "learning_rate": 2.5161024011464446e-05,
      "loss": 0.8005,
      "step": 468310
    },
    {
      "epoch": 1.6413448429345978,
      "grad_norm": 2.90625,
      "learning_rate": 2.5160374982800744e-05,
      "loss": 0.8856,
      "step": 468320
    },
    {
      "epoch": 1.6413798904414936,
      "grad_norm": 3.046875,
      "learning_rate": 2.5159725954137042e-05,
      "loss": 0.8746,
      "step": 468330
    },
    {
      "epoch": 1.6414149379483889,
      "grad_norm": 2.90625,
      "learning_rate": 2.515907692547334e-05,
      "loss": 0.95,
      "step": 468340
    },
    {
      "epoch": 1.6414499854552846,
      "grad_norm": 2.65625,
      "learning_rate": 2.5158427896809635e-05,
      "loss": 0.8293,
      "step": 468350
    },
    {
      "epoch": 1.6414850329621804,
      "grad_norm": 3.21875,
      "learning_rate": 2.5157778868145933e-05,
      "loss": 0.7986,
      "step": 468360
    },
    {
      "epoch": 1.6415200804690757,
      "grad_norm": 2.9375,
      "learning_rate": 2.515712983948223e-05,
      "loss": 0.7637,
      "step": 468370
    },
    {
      "epoch": 1.6415551279759715,
      "grad_norm": 2.734375,
      "learning_rate": 2.515648081081853e-05,
      "loss": 0.749,
      "step": 468380
    },
    {
      "epoch": 1.641590175482867,
      "grad_norm": 2.828125,
      "learning_rate": 2.5155831782154827e-05,
      "loss": 0.8219,
      "step": 468390
    },
    {
      "epoch": 1.6416252229897625,
      "grad_norm": 2.828125,
      "learning_rate": 2.5155182753491125e-05,
      "loss": 0.8485,
      "step": 468400
    },
    {
      "epoch": 1.6416602704966583,
      "grad_norm": 2.8125,
      "learning_rate": 2.5154533724827423e-05,
      "loss": 0.7835,
      "step": 468410
    },
    {
      "epoch": 1.6416953180035538,
      "grad_norm": 2.953125,
      "learning_rate": 2.515388469616372e-05,
      "loss": 0.7703,
      "step": 468420
    },
    {
      "epoch": 1.6417303655104494,
      "grad_norm": 3.21875,
      "learning_rate": 2.515323566750002e-05,
      "loss": 0.7334,
      "step": 468430
    },
    {
      "epoch": 1.6417654130173451,
      "grad_norm": 2.8125,
      "learning_rate": 2.5152586638836317e-05,
      "loss": 0.8845,
      "step": 468440
    },
    {
      "epoch": 1.6418004605242404,
      "grad_norm": 3.0625,
      "learning_rate": 2.5151937610172615e-05,
      "loss": 0.8063,
      "step": 468450
    },
    {
      "epoch": 1.6418355080311362,
      "grad_norm": 3.359375,
      "learning_rate": 2.5151288581508913e-05,
      "loss": 0.7863,
      "step": 468460
    },
    {
      "epoch": 1.641870555538032,
      "grad_norm": 2.75,
      "learning_rate": 2.515063955284521e-05,
      "loss": 0.8386,
      "step": 468470
    },
    {
      "epoch": 1.6419056030449273,
      "grad_norm": 3.078125,
      "learning_rate": 2.514999052418151e-05,
      "loss": 0.845,
      "step": 468480
    },
    {
      "epoch": 1.641940650551823,
      "grad_norm": 2.703125,
      "learning_rate": 2.514934149551781e-05,
      "loss": 0.865,
      "step": 468490
    },
    {
      "epoch": 1.6419756980587186,
      "grad_norm": 2.71875,
      "learning_rate": 2.5148692466854108e-05,
      "loss": 0.9002,
      "step": 468500
    },
    {
      "epoch": 1.642010745565614,
      "grad_norm": 2.578125,
      "learning_rate": 2.5148043438190406e-05,
      "loss": 0.8083,
      "step": 468510
    },
    {
      "epoch": 1.6420457930725099,
      "grad_norm": 2.578125,
      "learning_rate": 2.5147394409526704e-05,
      "loss": 0.8563,
      "step": 468520
    },
    {
      "epoch": 1.6420808405794054,
      "grad_norm": 2.984375,
      "learning_rate": 2.5146745380863002e-05,
      "loss": 0.8428,
      "step": 468530
    },
    {
      "epoch": 1.642115888086301,
      "grad_norm": 2.546875,
      "learning_rate": 2.51460963521993e-05,
      "loss": 0.8353,
      "step": 468540
    },
    {
      "epoch": 1.6421509355931967,
      "grad_norm": 2.46875,
      "learning_rate": 2.5145447323535598e-05,
      "loss": 0.8264,
      "step": 468550
    },
    {
      "epoch": 1.6421859831000922,
      "grad_norm": 3.171875,
      "learning_rate": 2.5144798294871896e-05,
      "loss": 0.8231,
      "step": 468560
    },
    {
      "epoch": 1.6422210306069878,
      "grad_norm": 3.15625,
      "learning_rate": 2.5144149266208194e-05,
      "loss": 0.8113,
      "step": 468570
    },
    {
      "epoch": 1.6422560781138835,
      "grad_norm": 2.984375,
      "learning_rate": 2.5143500237544492e-05,
      "loss": 0.8716,
      "step": 468580
    },
    {
      "epoch": 1.6422911256207788,
      "grad_norm": 2.6875,
      "learning_rate": 2.514285120888079e-05,
      "loss": 0.8006,
      "step": 468590
    },
    {
      "epoch": 1.6423261731276746,
      "grad_norm": 3.125,
      "learning_rate": 2.5142202180217088e-05,
      "loss": 0.8587,
      "step": 468600
    },
    {
      "epoch": 1.6423612206345701,
      "grad_norm": 2.609375,
      "learning_rate": 2.5141553151553386e-05,
      "loss": 0.8354,
      "step": 468610
    },
    {
      "epoch": 1.6423962681414657,
      "grad_norm": 2.84375,
      "learning_rate": 2.5140904122889687e-05,
      "loss": 0.8793,
      "step": 468620
    },
    {
      "epoch": 1.6424313156483614,
      "grad_norm": 2.875,
      "learning_rate": 2.5140255094225985e-05,
      "loss": 0.8443,
      "step": 468630
    },
    {
      "epoch": 1.642466363155257,
      "grad_norm": 2.875,
      "learning_rate": 2.5139606065562283e-05,
      "loss": 0.7952,
      "step": 468640
    },
    {
      "epoch": 1.6425014106621525,
      "grad_norm": 2.71875,
      "learning_rate": 2.513895703689858e-05,
      "loss": 0.9135,
      "step": 468650
    },
    {
      "epoch": 1.6425364581690483,
      "grad_norm": 3.84375,
      "learning_rate": 2.513830800823488e-05,
      "loss": 0.852,
      "step": 468660
    },
    {
      "epoch": 1.6425715056759438,
      "grad_norm": 2.734375,
      "learning_rate": 2.5137658979571177e-05,
      "loss": 0.875,
      "step": 468670
    },
    {
      "epoch": 1.6426065531828393,
      "grad_norm": 2.65625,
      "learning_rate": 2.5137009950907475e-05,
      "loss": 0.8172,
      "step": 468680
    },
    {
      "epoch": 1.642641600689735,
      "grad_norm": 3.25,
      "learning_rate": 2.5136360922243773e-05,
      "loss": 0.8247,
      "step": 468690
    },
    {
      "epoch": 1.6426766481966304,
      "grad_norm": 3.21875,
      "learning_rate": 2.513571189358007e-05,
      "loss": 0.7793,
      "step": 468700
    },
    {
      "epoch": 1.6427116957035262,
      "grad_norm": 2.703125,
      "learning_rate": 2.513506286491637e-05,
      "loss": 0.7831,
      "step": 468710
    },
    {
      "epoch": 1.6427467432104217,
      "grad_norm": 2.875,
      "learning_rate": 2.5134413836252667e-05,
      "loss": 0.8611,
      "step": 468720
    },
    {
      "epoch": 1.6427817907173172,
      "grad_norm": 2.890625,
      "learning_rate": 2.5133764807588962e-05,
      "loss": 0.7776,
      "step": 468730
    },
    {
      "epoch": 1.642816838224213,
      "grad_norm": 3.171875,
      "learning_rate": 2.513311577892526e-05,
      "loss": 0.8737,
      "step": 468740
    },
    {
      "epoch": 1.6428518857311085,
      "grad_norm": 2.734375,
      "learning_rate": 2.5132466750261558e-05,
      "loss": 0.9684,
      "step": 468750
    },
    {
      "epoch": 1.642886933238004,
      "grad_norm": 2.84375,
      "learning_rate": 2.5131817721597856e-05,
      "loss": 0.8123,
      "step": 468760
    },
    {
      "epoch": 1.6429219807448998,
      "grad_norm": 3.328125,
      "learning_rate": 2.5131168692934154e-05,
      "loss": 0.8821,
      "step": 468770
    },
    {
      "epoch": 1.6429570282517953,
      "grad_norm": 2.28125,
      "learning_rate": 2.5130519664270452e-05,
      "loss": 0.7939,
      "step": 468780
    },
    {
      "epoch": 1.6429920757586909,
      "grad_norm": 3.0625,
      "learning_rate": 2.512987063560675e-05,
      "loss": 0.8097,
      "step": 468790
    },
    {
      "epoch": 1.6430271232655866,
      "grad_norm": 2.921875,
      "learning_rate": 2.5129221606943048e-05,
      "loss": 0.7613,
      "step": 468800
    },
    {
      "epoch": 1.643062170772482,
      "grad_norm": 3.21875,
      "learning_rate": 2.5128572578279346e-05,
      "loss": 0.8679,
      "step": 468810
    },
    {
      "epoch": 1.6430972182793777,
      "grad_norm": 2.796875,
      "learning_rate": 2.5127923549615644e-05,
      "loss": 0.8306,
      "step": 468820
    },
    {
      "epoch": 1.6431322657862732,
      "grad_norm": 2.484375,
      "learning_rate": 2.5127274520951942e-05,
      "loss": 0.8445,
      "step": 468830
    },
    {
      "epoch": 1.6431673132931688,
      "grad_norm": 2.984375,
      "learning_rate": 2.512662549228824e-05,
      "loss": 0.7207,
      "step": 468840
    },
    {
      "epoch": 1.6432023608000645,
      "grad_norm": 3.21875,
      "learning_rate": 2.512597646362454e-05,
      "loss": 0.8741,
      "step": 468850
    },
    {
      "epoch": 1.64323740830696,
      "grad_norm": 2.890625,
      "learning_rate": 2.512532743496084e-05,
      "loss": 0.802,
      "step": 468860
    },
    {
      "epoch": 1.6432724558138556,
      "grad_norm": 2.625,
      "learning_rate": 2.5124678406297137e-05,
      "loss": 0.8262,
      "step": 468870
    },
    {
      "epoch": 1.6433075033207514,
      "grad_norm": 2.96875,
      "learning_rate": 2.5124029377633435e-05,
      "loss": 0.8578,
      "step": 468880
    },
    {
      "epoch": 1.643342550827647,
      "grad_norm": 2.71875,
      "learning_rate": 2.5123380348969733e-05,
      "loss": 0.8058,
      "step": 468890
    },
    {
      "epoch": 1.6433775983345424,
      "grad_norm": 3.046875,
      "learning_rate": 2.512273132030603e-05,
      "loss": 0.8289,
      "step": 468900
    },
    {
      "epoch": 1.6434126458414382,
      "grad_norm": 2.703125,
      "learning_rate": 2.512208229164233e-05,
      "loss": 0.8764,
      "step": 468910
    },
    {
      "epoch": 1.6434476933483335,
      "grad_norm": 2.546875,
      "learning_rate": 2.5121433262978627e-05,
      "loss": 0.8164,
      "step": 468920
    },
    {
      "epoch": 1.6434827408552293,
      "grad_norm": 3.015625,
      "learning_rate": 2.5120784234314925e-05,
      "loss": 0.8397,
      "step": 468930
    },
    {
      "epoch": 1.6435177883621248,
      "grad_norm": 3.328125,
      "learning_rate": 2.5120135205651223e-05,
      "loss": 0.7923,
      "step": 468940
    },
    {
      "epoch": 1.6435528358690203,
      "grad_norm": 2.75,
      "learning_rate": 2.511948617698752e-05,
      "loss": 0.8132,
      "step": 468950
    },
    {
      "epoch": 1.643587883375916,
      "grad_norm": 3.0625,
      "learning_rate": 2.511883714832382e-05,
      "loss": 0.7688,
      "step": 468960
    },
    {
      "epoch": 1.6436229308828116,
      "grad_norm": 3.625,
      "learning_rate": 2.5118188119660117e-05,
      "loss": 0.9561,
      "step": 468970
    },
    {
      "epoch": 1.6436579783897072,
      "grad_norm": 2.640625,
      "learning_rate": 2.5117539090996415e-05,
      "loss": 0.8573,
      "step": 468980
    },
    {
      "epoch": 1.643693025896603,
      "grad_norm": 3.046875,
      "learning_rate": 2.5116890062332717e-05,
      "loss": 0.874,
      "step": 468990
    },
    {
      "epoch": 1.6437280734034985,
      "grad_norm": 3.03125,
      "learning_rate": 2.5116241033669015e-05,
      "loss": 0.8326,
      "step": 469000
    },
    {
      "epoch": 1.643763120910394,
      "grad_norm": 3.109375,
      "learning_rate": 2.5115592005005313e-05,
      "loss": 0.8628,
      "step": 469010
    },
    {
      "epoch": 1.6437981684172898,
      "grad_norm": 2.6875,
      "learning_rate": 2.511494297634161e-05,
      "loss": 0.7893,
      "step": 469020
    },
    {
      "epoch": 1.643833215924185,
      "grad_norm": 2.53125,
      "learning_rate": 2.511429394767791e-05,
      "loss": 0.8169,
      "step": 469030
    },
    {
      "epoch": 1.6438682634310808,
      "grad_norm": 2.765625,
      "learning_rate": 2.5113644919014207e-05,
      "loss": 0.8117,
      "step": 469040
    },
    {
      "epoch": 1.6439033109379766,
      "grad_norm": 3.3125,
      "learning_rate": 2.5112995890350505e-05,
      "loss": 0.8667,
      "step": 469050
    },
    {
      "epoch": 1.643938358444872,
      "grad_norm": 2.890625,
      "learning_rate": 2.5112346861686803e-05,
      "loss": 0.8201,
      "step": 469060
    },
    {
      "epoch": 1.6439734059517677,
      "grad_norm": 2.9375,
      "learning_rate": 2.51116978330231e-05,
      "loss": 0.8752,
      "step": 469070
    },
    {
      "epoch": 1.6440084534586632,
      "grad_norm": 2.765625,
      "learning_rate": 2.51110488043594e-05,
      "loss": 0.8179,
      "step": 469080
    },
    {
      "epoch": 1.6440435009655587,
      "grad_norm": 2.75,
      "learning_rate": 2.5110399775695697e-05,
      "loss": 0.8971,
      "step": 469090
    },
    {
      "epoch": 1.6440785484724545,
      "grad_norm": 2.765625,
      "learning_rate": 2.510975074703199e-05,
      "loss": 0.8085,
      "step": 469100
    },
    {
      "epoch": 1.64411359597935,
      "grad_norm": 3.28125,
      "learning_rate": 2.510910171836829e-05,
      "loss": 0.9229,
      "step": 469110
    },
    {
      "epoch": 1.6441486434862456,
      "grad_norm": 3.1875,
      "learning_rate": 2.5108452689704587e-05,
      "loss": 0.8605,
      "step": 469120
    },
    {
      "epoch": 1.6441836909931413,
      "grad_norm": 2.59375,
      "learning_rate": 2.5107803661040885e-05,
      "loss": 0.7633,
      "step": 469130
    },
    {
      "epoch": 1.6442187385000366,
      "grad_norm": 2.609375,
      "learning_rate": 2.5107154632377183e-05,
      "loss": 0.7645,
      "step": 469140
    },
    {
      "epoch": 1.6442537860069324,
      "grad_norm": 2.828125,
      "learning_rate": 2.510650560371348e-05,
      "loss": 0.8467,
      "step": 469150
    },
    {
      "epoch": 1.6442888335138282,
      "grad_norm": 3.078125,
      "learning_rate": 2.510585657504978e-05,
      "loss": 0.8311,
      "step": 469160
    },
    {
      "epoch": 1.6443238810207235,
      "grad_norm": 2.515625,
      "learning_rate": 2.5105207546386077e-05,
      "loss": 0.827,
      "step": 469170
    },
    {
      "epoch": 1.6443589285276192,
      "grad_norm": 3.125,
      "learning_rate": 2.5104558517722375e-05,
      "loss": 0.9382,
      "step": 469180
    },
    {
      "epoch": 1.6443939760345148,
      "grad_norm": 3.1875,
      "learning_rate": 2.5103909489058673e-05,
      "loss": 0.8789,
      "step": 469190
    },
    {
      "epoch": 1.6444290235414103,
      "grad_norm": 2.515625,
      "learning_rate": 2.510326046039497e-05,
      "loss": 0.9115,
      "step": 469200
    },
    {
      "epoch": 1.644464071048306,
      "grad_norm": 3.453125,
      "learning_rate": 2.510261143173127e-05,
      "loss": 0.7473,
      "step": 469210
    },
    {
      "epoch": 1.6444991185552016,
      "grad_norm": 2.921875,
      "learning_rate": 2.510196240306757e-05,
      "loss": 0.8044,
      "step": 469220
    },
    {
      "epoch": 1.6445341660620971,
      "grad_norm": 2.953125,
      "learning_rate": 2.510131337440387e-05,
      "loss": 0.8262,
      "step": 469230
    },
    {
      "epoch": 1.6445692135689929,
      "grad_norm": 3.421875,
      "learning_rate": 2.5100664345740167e-05,
      "loss": 0.8349,
      "step": 469240
    },
    {
      "epoch": 1.6446042610758884,
      "grad_norm": 2.8125,
      "learning_rate": 2.5100015317076465e-05,
      "loss": 0.7431,
      "step": 469250
    },
    {
      "epoch": 1.644639308582784,
      "grad_norm": 3.109375,
      "learning_rate": 2.5099366288412763e-05,
      "loss": 0.8899,
      "step": 469260
    },
    {
      "epoch": 1.6446743560896797,
      "grad_norm": 2.5625,
      "learning_rate": 2.509871725974906e-05,
      "loss": 0.7811,
      "step": 469270
    },
    {
      "epoch": 1.644709403596575,
      "grad_norm": 2.9375,
      "learning_rate": 2.509806823108536e-05,
      "loss": 0.8303,
      "step": 469280
    },
    {
      "epoch": 1.6447444511034708,
      "grad_norm": 2.609375,
      "learning_rate": 2.5097419202421657e-05,
      "loss": 0.8383,
      "step": 469290
    },
    {
      "epoch": 1.6447794986103663,
      "grad_norm": 2.921875,
      "learning_rate": 2.5096770173757955e-05,
      "loss": 0.8604,
      "step": 469300
    },
    {
      "epoch": 1.6448145461172619,
      "grad_norm": 3.265625,
      "learning_rate": 2.5096121145094253e-05,
      "loss": 0.8317,
      "step": 469310
    },
    {
      "epoch": 1.6448495936241576,
      "grad_norm": 3.53125,
      "learning_rate": 2.509547211643055e-05,
      "loss": 0.8449,
      "step": 469320
    },
    {
      "epoch": 1.6448846411310531,
      "grad_norm": 2.703125,
      "learning_rate": 2.509482308776685e-05,
      "loss": 0.7973,
      "step": 469330
    },
    {
      "epoch": 1.6449196886379487,
      "grad_norm": 3.21875,
      "learning_rate": 2.5094174059103147e-05,
      "loss": 0.8693,
      "step": 469340
    },
    {
      "epoch": 1.6449547361448444,
      "grad_norm": 2.9375,
      "learning_rate": 2.5093525030439445e-05,
      "loss": 0.8997,
      "step": 469350
    },
    {
      "epoch": 1.64498978365174,
      "grad_norm": 2.796875,
      "learning_rate": 2.5092876001775746e-05,
      "loss": 0.8412,
      "step": 469360
    },
    {
      "epoch": 1.6450248311586355,
      "grad_norm": 3.484375,
      "learning_rate": 2.5092226973112044e-05,
      "loss": 0.8589,
      "step": 469370
    },
    {
      "epoch": 1.6450598786655313,
      "grad_norm": 2.375,
      "learning_rate": 2.5091577944448342e-05,
      "loss": 0.7681,
      "step": 469380
    },
    {
      "epoch": 1.6450949261724266,
      "grad_norm": 2.84375,
      "learning_rate": 2.509092891578464e-05,
      "loss": 0.7481,
      "step": 469390
    },
    {
      "epoch": 1.6451299736793223,
      "grad_norm": 2.84375,
      "learning_rate": 2.5090279887120938e-05,
      "loss": 0.866,
      "step": 469400
    },
    {
      "epoch": 1.6451650211862179,
      "grad_norm": 3.125,
      "learning_rate": 2.5089630858457236e-05,
      "loss": 0.792,
      "step": 469410
    },
    {
      "epoch": 1.6452000686931134,
      "grad_norm": 2.671875,
      "learning_rate": 2.5088981829793534e-05,
      "loss": 0.9219,
      "step": 469420
    },
    {
      "epoch": 1.6452351162000092,
      "grad_norm": 2.765625,
      "learning_rate": 2.5088332801129832e-05,
      "loss": 0.8875,
      "step": 469430
    },
    {
      "epoch": 1.6452701637069047,
      "grad_norm": 2.796875,
      "learning_rate": 2.508768377246613e-05,
      "loss": 0.8318,
      "step": 469440
    },
    {
      "epoch": 1.6453052112138002,
      "grad_norm": 3.078125,
      "learning_rate": 2.5087034743802428e-05,
      "loss": 0.8492,
      "step": 469450
    },
    {
      "epoch": 1.645340258720696,
      "grad_norm": 3.125,
      "learning_rate": 2.5086385715138726e-05,
      "loss": 0.7893,
      "step": 469460
    },
    {
      "epoch": 1.6453753062275915,
      "grad_norm": 3.15625,
      "learning_rate": 2.5085736686475024e-05,
      "loss": 0.9218,
      "step": 469470
    },
    {
      "epoch": 1.645410353734487,
      "grad_norm": 2.96875,
      "learning_rate": 2.508508765781132e-05,
      "loss": 0.9563,
      "step": 469480
    },
    {
      "epoch": 1.6454454012413828,
      "grad_norm": 2.859375,
      "learning_rate": 2.5084438629147617e-05,
      "loss": 0.7747,
      "step": 469490
    },
    {
      "epoch": 1.6454804487482781,
      "grad_norm": 2.453125,
      "learning_rate": 2.5083789600483915e-05,
      "loss": 0.816,
      "step": 469500
    },
    {
      "epoch": 1.645515496255174,
      "grad_norm": 2.828125,
      "learning_rate": 2.5083140571820213e-05,
      "loss": 0.8845,
      "step": 469510
    },
    {
      "epoch": 1.6455505437620694,
      "grad_norm": 2.734375,
      "learning_rate": 2.508249154315651e-05,
      "loss": 0.7992,
      "step": 469520
    },
    {
      "epoch": 1.645585591268965,
      "grad_norm": 2.46875,
      "learning_rate": 2.508184251449281e-05,
      "loss": 0.8349,
      "step": 469530
    },
    {
      "epoch": 1.6456206387758607,
      "grad_norm": 2.6875,
      "learning_rate": 2.5081193485829107e-05,
      "loss": 0.8847,
      "step": 469540
    },
    {
      "epoch": 1.6456556862827563,
      "grad_norm": 2.953125,
      "learning_rate": 2.5080544457165405e-05,
      "loss": 0.8168,
      "step": 469550
    },
    {
      "epoch": 1.6456907337896518,
      "grad_norm": 3.125,
      "learning_rate": 2.5079895428501703e-05,
      "loss": 0.8398,
      "step": 469560
    },
    {
      "epoch": 1.6457257812965476,
      "grad_norm": 2.890625,
      "learning_rate": 2.5079246399838e-05,
      "loss": 0.8781,
      "step": 469570
    },
    {
      "epoch": 1.645760828803443,
      "grad_norm": 2.421875,
      "learning_rate": 2.50785973711743e-05,
      "loss": 0.8331,
      "step": 469580
    },
    {
      "epoch": 1.6457958763103386,
      "grad_norm": 2.859375,
      "learning_rate": 2.50779483425106e-05,
      "loss": 0.9848,
      "step": 469590
    },
    {
      "epoch": 1.6458309238172344,
      "grad_norm": 3.234375,
      "learning_rate": 2.5077299313846898e-05,
      "loss": 0.8109,
      "step": 469600
    },
    {
      "epoch": 1.6458659713241297,
      "grad_norm": 2.5,
      "learning_rate": 2.5076650285183196e-05,
      "loss": 0.7976,
      "step": 469610
    },
    {
      "epoch": 1.6459010188310255,
      "grad_norm": 2.6875,
      "learning_rate": 2.5076001256519494e-05,
      "loss": 0.8436,
      "step": 469620
    },
    {
      "epoch": 1.645936066337921,
      "grad_norm": 3.46875,
      "learning_rate": 2.5075352227855792e-05,
      "loss": 0.8719,
      "step": 469630
    },
    {
      "epoch": 1.6459711138448165,
      "grad_norm": 3.03125,
      "learning_rate": 2.507470319919209e-05,
      "loss": 0.8483,
      "step": 469640
    },
    {
      "epoch": 1.6460061613517123,
      "grad_norm": 2.90625,
      "learning_rate": 2.5074054170528388e-05,
      "loss": 0.8523,
      "step": 469650
    },
    {
      "epoch": 1.6460412088586078,
      "grad_norm": 2.546875,
      "learning_rate": 2.5073405141864686e-05,
      "loss": 0.8564,
      "step": 469660
    },
    {
      "epoch": 1.6460762563655034,
      "grad_norm": 3.421875,
      "learning_rate": 2.5072756113200984e-05,
      "loss": 0.8432,
      "step": 469670
    },
    {
      "epoch": 1.6461113038723991,
      "grad_norm": 3.03125,
      "learning_rate": 2.5072107084537282e-05,
      "loss": 0.87,
      "step": 469680
    },
    {
      "epoch": 1.6461463513792947,
      "grad_norm": 2.90625,
      "learning_rate": 2.507145805587358e-05,
      "loss": 0.7994,
      "step": 469690
    },
    {
      "epoch": 1.6461813988861902,
      "grad_norm": 2.71875,
      "learning_rate": 2.5070809027209878e-05,
      "loss": 0.8031,
      "step": 469700
    },
    {
      "epoch": 1.646216446393086,
      "grad_norm": 3.15625,
      "learning_rate": 2.5070159998546176e-05,
      "loss": 0.8587,
      "step": 469710
    },
    {
      "epoch": 1.6462514938999813,
      "grad_norm": 3.296875,
      "learning_rate": 2.5069510969882478e-05,
      "loss": 0.9369,
      "step": 469720
    },
    {
      "epoch": 1.646286541406877,
      "grad_norm": 2.96875,
      "learning_rate": 2.5068861941218776e-05,
      "loss": 0.8627,
      "step": 469730
    },
    {
      "epoch": 1.6463215889137728,
      "grad_norm": 2.96875,
      "learning_rate": 2.5068212912555074e-05,
      "loss": 0.9128,
      "step": 469740
    },
    {
      "epoch": 1.646356636420668,
      "grad_norm": 2.96875,
      "learning_rate": 2.506756388389137e-05,
      "loss": 0.8723,
      "step": 469750
    },
    {
      "epoch": 1.6463916839275639,
      "grad_norm": 2.765625,
      "learning_rate": 2.506691485522767e-05,
      "loss": 0.8129,
      "step": 469760
    },
    {
      "epoch": 1.6464267314344594,
      "grad_norm": 2.421875,
      "learning_rate": 2.5066265826563968e-05,
      "loss": 0.8321,
      "step": 469770
    },
    {
      "epoch": 1.646461778941355,
      "grad_norm": 2.65625,
      "learning_rate": 2.5065616797900266e-05,
      "loss": 0.8559,
      "step": 469780
    },
    {
      "epoch": 1.6464968264482507,
      "grad_norm": 2.890625,
      "learning_rate": 2.5064967769236564e-05,
      "loss": 0.8268,
      "step": 469790
    },
    {
      "epoch": 1.6465318739551462,
      "grad_norm": 2.90625,
      "learning_rate": 2.506431874057286e-05,
      "loss": 0.9149,
      "step": 469800
    },
    {
      "epoch": 1.6465669214620418,
      "grad_norm": 3.0,
      "learning_rate": 2.506366971190916e-05,
      "loss": 0.9079,
      "step": 469810
    },
    {
      "epoch": 1.6466019689689375,
      "grad_norm": 3.140625,
      "learning_rate": 2.5063020683245458e-05,
      "loss": 0.7547,
      "step": 469820
    },
    {
      "epoch": 1.6466370164758328,
      "grad_norm": 3.390625,
      "learning_rate": 2.5062371654581756e-05,
      "loss": 0.8371,
      "step": 469830
    },
    {
      "epoch": 1.6466720639827286,
      "grad_norm": 2.625,
      "learning_rate": 2.5061722625918054e-05,
      "loss": 0.8466,
      "step": 469840
    },
    {
      "epoch": 1.6467071114896243,
      "grad_norm": 2.859375,
      "learning_rate": 2.506107359725435e-05,
      "loss": 0.8186,
      "step": 469850
    },
    {
      "epoch": 1.6467421589965197,
      "grad_norm": 2.71875,
      "learning_rate": 2.5060424568590646e-05,
      "loss": 0.8109,
      "step": 469860
    },
    {
      "epoch": 1.6467772065034154,
      "grad_norm": 3.359375,
      "learning_rate": 2.5059775539926944e-05,
      "loss": 0.8247,
      "step": 469870
    },
    {
      "epoch": 1.646812254010311,
      "grad_norm": 3.125,
      "learning_rate": 2.5059126511263242e-05,
      "loss": 0.8153,
      "step": 469880
    },
    {
      "epoch": 1.6468473015172065,
      "grad_norm": 2.640625,
      "learning_rate": 2.505847748259954e-05,
      "loss": 0.8415,
      "step": 469890
    },
    {
      "epoch": 1.6468823490241022,
      "grad_norm": 3.0625,
      "learning_rate": 2.5057828453935838e-05,
      "loss": 0.8657,
      "step": 469900
    },
    {
      "epoch": 1.6469173965309978,
      "grad_norm": 3.140625,
      "learning_rate": 2.5057179425272136e-05,
      "loss": 0.7805,
      "step": 469910
    },
    {
      "epoch": 1.6469524440378933,
      "grad_norm": 3.96875,
      "learning_rate": 2.5056530396608434e-05,
      "loss": 0.835,
      "step": 469920
    },
    {
      "epoch": 1.646987491544789,
      "grad_norm": 3.203125,
      "learning_rate": 2.5055881367944732e-05,
      "loss": 0.8858,
      "step": 469930
    },
    {
      "epoch": 1.6470225390516846,
      "grad_norm": 2.640625,
      "learning_rate": 2.505523233928103e-05,
      "loss": 0.8432,
      "step": 469940
    },
    {
      "epoch": 1.6470575865585801,
      "grad_norm": 2.640625,
      "learning_rate": 2.505458331061733e-05,
      "loss": 0.7554,
      "step": 469950
    },
    {
      "epoch": 1.647092634065476,
      "grad_norm": 2.703125,
      "learning_rate": 2.505393428195363e-05,
      "loss": 0.8105,
      "step": 469960
    },
    {
      "epoch": 1.6471276815723712,
      "grad_norm": 2.640625,
      "learning_rate": 2.5053285253289928e-05,
      "loss": 0.911,
      "step": 469970
    },
    {
      "epoch": 1.647162729079267,
      "grad_norm": 3.109375,
      "learning_rate": 2.5052636224626226e-05,
      "loss": 0.8654,
      "step": 469980
    },
    {
      "epoch": 1.6471977765861625,
      "grad_norm": 2.71875,
      "learning_rate": 2.5051987195962524e-05,
      "loss": 0.8056,
      "step": 469990
    },
    {
      "epoch": 1.647232824093058,
      "grad_norm": 3.0625,
      "learning_rate": 2.505133816729882e-05,
      "loss": 0.7858,
      "step": 470000
    },
    {
      "epoch": 1.647232824093058,
      "eval_loss": 0.7833724021911621,
      "eval_runtime": 554.3157,
      "eval_samples_per_second": 686.316,
      "eval_steps_per_second": 57.193,
      "step": 470000
    },
    {
      "epoch": 1.6472678715999538,
      "grad_norm": 3.0625,
      "learning_rate": 2.505068913863512e-05,
      "loss": 0.8228,
      "step": 470010
    },
    {
      "epoch": 1.6473029191068493,
      "grad_norm": 2.546875,
      "learning_rate": 2.5050040109971418e-05,
      "loss": 0.7584,
      "step": 470020
    },
    {
      "epoch": 1.6473379666137449,
      "grad_norm": 3.09375,
      "learning_rate": 2.5049391081307716e-05,
      "loss": 0.8032,
      "step": 470030
    },
    {
      "epoch": 1.6473730141206406,
      "grad_norm": 2.765625,
      "learning_rate": 2.5048742052644014e-05,
      "loss": 0.8442,
      "step": 470040
    },
    {
      "epoch": 1.6474080616275362,
      "grad_norm": 3.046875,
      "learning_rate": 2.504809302398031e-05,
      "loss": 0.7916,
      "step": 470050
    },
    {
      "epoch": 1.6474431091344317,
      "grad_norm": 2.375,
      "learning_rate": 2.504744399531661e-05,
      "loss": 0.7649,
      "step": 470060
    },
    {
      "epoch": 1.6474781566413275,
      "grad_norm": 3.15625,
      "learning_rate": 2.5046794966652908e-05,
      "loss": 0.9193,
      "step": 470070
    },
    {
      "epoch": 1.6475132041482228,
      "grad_norm": 2.65625,
      "learning_rate": 2.5046145937989206e-05,
      "loss": 0.8179,
      "step": 470080
    },
    {
      "epoch": 1.6475482516551185,
      "grad_norm": 2.703125,
      "learning_rate": 2.5045496909325507e-05,
      "loss": 0.7805,
      "step": 470090
    },
    {
      "epoch": 1.647583299162014,
      "grad_norm": 3.046875,
      "learning_rate": 2.5044847880661805e-05,
      "loss": 0.7712,
      "step": 470100
    },
    {
      "epoch": 1.6476183466689096,
      "grad_norm": 2.984375,
      "learning_rate": 2.5044198851998103e-05,
      "loss": 0.9031,
      "step": 470110
    },
    {
      "epoch": 1.6476533941758054,
      "grad_norm": 2.890625,
      "learning_rate": 2.50435498233344e-05,
      "loss": 0.9141,
      "step": 470120
    },
    {
      "epoch": 1.647688441682701,
      "grad_norm": 3.15625,
      "learning_rate": 2.50429007946707e-05,
      "loss": 0.8473,
      "step": 470130
    },
    {
      "epoch": 1.6477234891895964,
      "grad_norm": 3.34375,
      "learning_rate": 2.5042251766006997e-05,
      "loss": 0.7311,
      "step": 470140
    },
    {
      "epoch": 1.6477585366964922,
      "grad_norm": 2.8125,
      "learning_rate": 2.5041602737343295e-05,
      "loss": 0.8221,
      "step": 470150
    },
    {
      "epoch": 1.6477935842033877,
      "grad_norm": 2.671875,
      "learning_rate": 2.5040953708679593e-05,
      "loss": 0.7836,
      "step": 470160
    },
    {
      "epoch": 1.6478286317102833,
      "grad_norm": 2.6875,
      "learning_rate": 2.504030468001589e-05,
      "loss": 0.8393,
      "step": 470170
    },
    {
      "epoch": 1.647863679217179,
      "grad_norm": 2.796875,
      "learning_rate": 2.503965565135219e-05,
      "loss": 0.7994,
      "step": 470180
    },
    {
      "epoch": 1.6478987267240743,
      "grad_norm": 2.921875,
      "learning_rate": 2.5039006622688487e-05,
      "loss": 0.722,
      "step": 470190
    },
    {
      "epoch": 1.64793377423097,
      "grad_norm": 3.515625,
      "learning_rate": 2.5038357594024785e-05,
      "loss": 0.8776,
      "step": 470200
    },
    {
      "epoch": 1.6479688217378656,
      "grad_norm": 2.890625,
      "learning_rate": 2.5037708565361083e-05,
      "loss": 0.779,
      "step": 470210
    },
    {
      "epoch": 1.6480038692447612,
      "grad_norm": 2.9375,
      "learning_rate": 2.503705953669738e-05,
      "loss": 0.8608,
      "step": 470220
    },
    {
      "epoch": 1.648038916751657,
      "grad_norm": 3.234375,
      "learning_rate": 2.5036410508033676e-05,
      "loss": 0.7487,
      "step": 470230
    },
    {
      "epoch": 1.6480739642585525,
      "grad_norm": 3.078125,
      "learning_rate": 2.5035761479369974e-05,
      "loss": 0.7816,
      "step": 470240
    },
    {
      "epoch": 1.648109011765448,
      "grad_norm": 3.03125,
      "learning_rate": 2.503511245070627e-05,
      "loss": 0.7408,
      "step": 470250
    },
    {
      "epoch": 1.6481440592723438,
      "grad_norm": 3.0,
      "learning_rate": 2.503446342204257e-05,
      "loss": 0.8417,
      "step": 470260
    },
    {
      "epoch": 1.6481791067792393,
      "grad_norm": 3.296875,
      "learning_rate": 2.5033814393378868e-05,
      "loss": 0.8422,
      "step": 470270
    },
    {
      "epoch": 1.6482141542861348,
      "grad_norm": 2.796875,
      "learning_rate": 2.5033165364715166e-05,
      "loss": 0.8526,
      "step": 470280
    },
    {
      "epoch": 1.6482492017930306,
      "grad_norm": 3.078125,
      "learning_rate": 2.5032516336051464e-05,
      "loss": 0.9078,
      "step": 470290
    },
    {
      "epoch": 1.648284249299926,
      "grad_norm": 2.90625,
      "learning_rate": 2.503186730738776e-05,
      "loss": 0.869,
      "step": 470300
    },
    {
      "epoch": 1.6483192968068217,
      "grad_norm": 2.953125,
      "learning_rate": 2.503121827872406e-05,
      "loss": 0.8816,
      "step": 470310
    },
    {
      "epoch": 1.6483543443137172,
      "grad_norm": 2.9375,
      "learning_rate": 2.503056925006036e-05,
      "loss": 0.8827,
      "step": 470320
    },
    {
      "epoch": 1.6483893918206127,
      "grad_norm": 2.6875,
      "learning_rate": 2.502992022139666e-05,
      "loss": 0.7704,
      "step": 470330
    },
    {
      "epoch": 1.6484244393275085,
      "grad_norm": 2.8125,
      "learning_rate": 2.5029271192732957e-05,
      "loss": 0.9133,
      "step": 470340
    },
    {
      "epoch": 1.648459486834404,
      "grad_norm": 3.21875,
      "learning_rate": 2.5028622164069255e-05,
      "loss": 0.8297,
      "step": 470350
    },
    {
      "epoch": 1.6484945343412996,
      "grad_norm": 2.75,
      "learning_rate": 2.5027973135405553e-05,
      "loss": 0.8343,
      "step": 470360
    },
    {
      "epoch": 1.6485295818481953,
      "grad_norm": 2.828125,
      "learning_rate": 2.502732410674185e-05,
      "loss": 0.8534,
      "step": 470370
    },
    {
      "epoch": 1.6485646293550908,
      "grad_norm": 2.90625,
      "learning_rate": 2.502667507807815e-05,
      "loss": 0.7739,
      "step": 470380
    },
    {
      "epoch": 1.6485996768619864,
      "grad_norm": 2.640625,
      "learning_rate": 2.5026026049414447e-05,
      "loss": 0.8015,
      "step": 470390
    },
    {
      "epoch": 1.6486347243688821,
      "grad_norm": 2.671875,
      "learning_rate": 2.5025377020750745e-05,
      "loss": 0.8228,
      "step": 470400
    },
    {
      "epoch": 1.6486697718757775,
      "grad_norm": 2.921875,
      "learning_rate": 2.5024727992087043e-05,
      "loss": 0.8221,
      "step": 470410
    },
    {
      "epoch": 1.6487048193826732,
      "grad_norm": 3.25,
      "learning_rate": 2.502407896342334e-05,
      "loss": 0.8781,
      "step": 470420
    },
    {
      "epoch": 1.648739866889569,
      "grad_norm": 2.875,
      "learning_rate": 2.502342993475964e-05,
      "loss": 0.8509,
      "step": 470430
    },
    {
      "epoch": 1.6487749143964643,
      "grad_norm": 3.0,
      "learning_rate": 2.5022780906095937e-05,
      "loss": 0.8447,
      "step": 470440
    },
    {
      "epoch": 1.64880996190336,
      "grad_norm": 3.09375,
      "learning_rate": 2.5022131877432235e-05,
      "loss": 0.7837,
      "step": 470450
    },
    {
      "epoch": 1.6488450094102556,
      "grad_norm": 2.71875,
      "learning_rate": 2.5021482848768536e-05,
      "loss": 0.8484,
      "step": 470460
    },
    {
      "epoch": 1.6488800569171511,
      "grad_norm": 2.71875,
      "learning_rate": 2.5020833820104834e-05,
      "loss": 0.8316,
      "step": 470470
    },
    {
      "epoch": 1.6489151044240469,
      "grad_norm": 2.96875,
      "learning_rate": 2.5020184791441132e-05,
      "loss": 0.9094,
      "step": 470480
    },
    {
      "epoch": 1.6489501519309424,
      "grad_norm": 2.5625,
      "learning_rate": 2.501953576277743e-05,
      "loss": 0.7968,
      "step": 470490
    },
    {
      "epoch": 1.648985199437838,
      "grad_norm": 2.859375,
      "learning_rate": 2.501888673411373e-05,
      "loss": 0.8946,
      "step": 470500
    },
    {
      "epoch": 1.6490202469447337,
      "grad_norm": 3.015625,
      "learning_rate": 2.5018237705450026e-05,
      "loss": 0.8683,
      "step": 470510
    },
    {
      "epoch": 1.6490552944516292,
      "grad_norm": 2.765625,
      "learning_rate": 2.5017588676786324e-05,
      "loss": 0.793,
      "step": 470520
    },
    {
      "epoch": 1.6490903419585248,
      "grad_norm": 2.90625,
      "learning_rate": 2.5016939648122622e-05,
      "loss": 0.8018,
      "step": 470530
    },
    {
      "epoch": 1.6491253894654205,
      "grad_norm": 3.171875,
      "learning_rate": 2.501629061945892e-05,
      "loss": 0.891,
      "step": 470540
    },
    {
      "epoch": 1.6491604369723158,
      "grad_norm": 3.1875,
      "learning_rate": 2.501564159079522e-05,
      "loss": 0.894,
      "step": 470550
    },
    {
      "epoch": 1.6491954844792116,
      "grad_norm": 3.34375,
      "learning_rate": 2.5014992562131516e-05,
      "loss": 0.8204,
      "step": 470560
    },
    {
      "epoch": 1.6492305319861071,
      "grad_norm": 3.328125,
      "learning_rate": 2.5014343533467814e-05,
      "loss": 0.8456,
      "step": 470570
    },
    {
      "epoch": 1.6492655794930027,
      "grad_norm": 3.046875,
      "learning_rate": 2.5013694504804112e-05,
      "loss": 0.8098,
      "step": 470580
    },
    {
      "epoch": 1.6493006269998984,
      "grad_norm": 2.703125,
      "learning_rate": 2.501304547614041e-05,
      "loss": 0.8133,
      "step": 470590
    },
    {
      "epoch": 1.649335674506794,
      "grad_norm": 2.875,
      "learning_rate": 2.5012396447476712e-05,
      "loss": 0.8191,
      "step": 470600
    },
    {
      "epoch": 1.6493707220136895,
      "grad_norm": 3.171875,
      "learning_rate": 2.5011747418813003e-05,
      "loss": 0.8622,
      "step": 470610
    },
    {
      "epoch": 1.6494057695205853,
      "grad_norm": 2.875,
      "learning_rate": 2.50110983901493e-05,
      "loss": 0.8196,
      "step": 470620
    },
    {
      "epoch": 1.6494408170274808,
      "grad_norm": 3.125,
      "learning_rate": 2.50104493614856e-05,
      "loss": 0.8246,
      "step": 470630
    },
    {
      "epoch": 1.6494758645343763,
      "grad_norm": 3.140625,
      "learning_rate": 2.5009800332821897e-05,
      "loss": 0.8041,
      "step": 470640
    },
    {
      "epoch": 1.649510912041272,
      "grad_norm": 2.921875,
      "learning_rate": 2.5009151304158195e-05,
      "loss": 0.8858,
      "step": 470650
    },
    {
      "epoch": 1.6495459595481674,
      "grad_norm": 2.890625,
      "learning_rate": 2.5008502275494493e-05,
      "loss": 0.8488,
      "step": 470660
    },
    {
      "epoch": 1.6495810070550632,
      "grad_norm": 3.328125,
      "learning_rate": 2.500785324683079e-05,
      "loss": 0.8935,
      "step": 470670
    },
    {
      "epoch": 1.6496160545619587,
      "grad_norm": 2.34375,
      "learning_rate": 2.5007204218167092e-05,
      "loss": 0.8485,
      "step": 470680
    },
    {
      "epoch": 1.6496511020688542,
      "grad_norm": 2.71875,
      "learning_rate": 2.500655518950339e-05,
      "loss": 0.8668,
      "step": 470690
    },
    {
      "epoch": 1.64968614957575,
      "grad_norm": 2.90625,
      "learning_rate": 2.500590616083969e-05,
      "loss": 0.7785,
      "step": 470700
    },
    {
      "epoch": 1.6497211970826455,
      "grad_norm": 2.671875,
      "learning_rate": 2.5005257132175986e-05,
      "loss": 0.8766,
      "step": 470710
    },
    {
      "epoch": 1.649756244589541,
      "grad_norm": 3.109375,
      "learning_rate": 2.5004608103512284e-05,
      "loss": 0.84,
      "step": 470720
    },
    {
      "epoch": 1.6497912920964368,
      "grad_norm": 3.03125,
      "learning_rate": 2.5003959074848582e-05,
      "loss": 0.8555,
      "step": 470730
    },
    {
      "epoch": 1.6498263396033324,
      "grad_norm": 3.15625,
      "learning_rate": 2.500331004618488e-05,
      "loss": 0.8758,
      "step": 470740
    },
    {
      "epoch": 1.649861387110228,
      "grad_norm": 3.0625,
      "learning_rate": 2.500266101752118e-05,
      "loss": 0.8903,
      "step": 470750
    },
    {
      "epoch": 1.6498964346171237,
      "grad_norm": 2.546875,
      "learning_rate": 2.5002011988857476e-05,
      "loss": 0.8026,
      "step": 470760
    },
    {
      "epoch": 1.649931482124019,
      "grad_norm": 2.484375,
      "learning_rate": 2.5001362960193774e-05,
      "loss": 0.8201,
      "step": 470770
    },
    {
      "epoch": 1.6499665296309147,
      "grad_norm": 2.640625,
      "learning_rate": 2.5000713931530072e-05,
      "loss": 0.7567,
      "step": 470780
    },
    {
      "epoch": 1.6500015771378103,
      "grad_norm": 3.0625,
      "learning_rate": 2.500006490286637e-05,
      "loss": 0.9171,
      "step": 470790
    },
    {
      "epoch": 1.6500366246447058,
      "grad_norm": 2.890625,
      "learning_rate": 2.499941587420267e-05,
      "loss": 0.8895,
      "step": 470800
    },
    {
      "epoch": 1.6500716721516016,
      "grad_norm": 2.828125,
      "learning_rate": 2.4998766845538966e-05,
      "loss": 0.6899,
      "step": 470810
    },
    {
      "epoch": 1.650106719658497,
      "grad_norm": 2.390625,
      "learning_rate": 2.4998117816875268e-05,
      "loss": 0.8366,
      "step": 470820
    },
    {
      "epoch": 1.6501417671653926,
      "grad_norm": 2.875,
      "learning_rate": 2.4997468788211566e-05,
      "loss": 0.8108,
      "step": 470830
    },
    {
      "epoch": 1.6501768146722884,
      "grad_norm": 3.0625,
      "learning_rate": 2.4996819759547864e-05,
      "loss": 0.7583,
      "step": 470840
    },
    {
      "epoch": 1.650211862179184,
      "grad_norm": 3.34375,
      "learning_rate": 2.4996170730884162e-05,
      "loss": 0.8011,
      "step": 470850
    },
    {
      "epoch": 1.6502469096860795,
      "grad_norm": 3.0,
      "learning_rate": 2.499552170222046e-05,
      "loss": 0.7805,
      "step": 470860
    },
    {
      "epoch": 1.6502819571929752,
      "grad_norm": 2.65625,
      "learning_rate": 2.4994872673556758e-05,
      "loss": 0.8774,
      "step": 470870
    },
    {
      "epoch": 1.6503170046998705,
      "grad_norm": 2.578125,
      "learning_rate": 2.4994223644893056e-05,
      "loss": 0.8052,
      "step": 470880
    },
    {
      "epoch": 1.6503520522067663,
      "grad_norm": 3.203125,
      "learning_rate": 2.499357461622935e-05,
      "loss": 0.8503,
      "step": 470890
    },
    {
      "epoch": 1.6503870997136618,
      "grad_norm": 3.21875,
      "learning_rate": 2.499292558756565e-05,
      "loss": 0.8227,
      "step": 470900
    },
    {
      "epoch": 1.6504221472205574,
      "grad_norm": 2.796875,
      "learning_rate": 2.4992276558901946e-05,
      "loss": 0.8281,
      "step": 470910
    },
    {
      "epoch": 1.6504571947274531,
      "grad_norm": 2.90625,
      "learning_rate": 2.4991627530238244e-05,
      "loss": 0.8179,
      "step": 470920
    },
    {
      "epoch": 1.6504922422343486,
      "grad_norm": 2.75,
      "learning_rate": 2.4990978501574542e-05,
      "loss": 0.8443,
      "step": 470930
    },
    {
      "epoch": 1.6505272897412442,
      "grad_norm": 2.671875,
      "learning_rate": 2.4990329472910844e-05,
      "loss": 0.8364,
      "step": 470940
    },
    {
      "epoch": 1.65056233724814,
      "grad_norm": 3.328125,
      "learning_rate": 2.4989680444247142e-05,
      "loss": 0.8852,
      "step": 470950
    },
    {
      "epoch": 1.6505973847550355,
      "grad_norm": 2.828125,
      "learning_rate": 2.498903141558344e-05,
      "loss": 0.7621,
      "step": 470960
    },
    {
      "epoch": 1.650632432261931,
      "grad_norm": 3.21875,
      "learning_rate": 2.4988382386919738e-05,
      "loss": 0.876,
      "step": 470970
    },
    {
      "epoch": 1.6506674797688268,
      "grad_norm": 3.03125,
      "learning_rate": 2.4987733358256036e-05,
      "loss": 0.8513,
      "step": 470980
    },
    {
      "epoch": 1.650702527275722,
      "grad_norm": 3.109375,
      "learning_rate": 2.4987084329592334e-05,
      "loss": 0.8579,
      "step": 470990
    },
    {
      "epoch": 1.6507375747826178,
      "grad_norm": 2.875,
      "learning_rate": 2.4986435300928632e-05,
      "loss": 0.8454,
      "step": 471000
    },
    {
      "epoch": 1.6507726222895134,
      "grad_norm": 2.8125,
      "learning_rate": 2.498578627226493e-05,
      "loss": 0.8332,
      "step": 471010
    },
    {
      "epoch": 1.650807669796409,
      "grad_norm": 2.875,
      "learning_rate": 2.4985137243601228e-05,
      "loss": 0.8089,
      "step": 471020
    },
    {
      "epoch": 1.6508427173033047,
      "grad_norm": 2.78125,
      "learning_rate": 2.4984488214937526e-05,
      "loss": 0.875,
      "step": 471030
    },
    {
      "epoch": 1.6508777648102002,
      "grad_norm": 2.890625,
      "learning_rate": 2.4983839186273824e-05,
      "loss": 0.8275,
      "step": 471040
    },
    {
      "epoch": 1.6509128123170957,
      "grad_norm": 2.96875,
      "learning_rate": 2.4983190157610122e-05,
      "loss": 0.8034,
      "step": 471050
    },
    {
      "epoch": 1.6509478598239915,
      "grad_norm": 2.859375,
      "learning_rate": 2.498254112894642e-05,
      "loss": 0.8195,
      "step": 471060
    },
    {
      "epoch": 1.650982907330887,
      "grad_norm": 2.453125,
      "learning_rate": 2.4981892100282718e-05,
      "loss": 0.7823,
      "step": 471070
    },
    {
      "epoch": 1.6510179548377826,
      "grad_norm": 2.96875,
      "learning_rate": 2.4981243071619016e-05,
      "loss": 0.8591,
      "step": 471080
    },
    {
      "epoch": 1.6510530023446783,
      "grad_norm": 3.4375,
      "learning_rate": 2.4980594042955314e-05,
      "loss": 0.8637,
      "step": 471090
    },
    {
      "epoch": 1.6510880498515736,
      "grad_norm": 2.75,
      "learning_rate": 2.4979945014291612e-05,
      "loss": 0.8433,
      "step": 471100
    },
    {
      "epoch": 1.6511230973584694,
      "grad_norm": 2.984375,
      "learning_rate": 2.497929598562791e-05,
      "loss": 0.9245,
      "step": 471110
    },
    {
      "epoch": 1.6511581448653652,
      "grad_norm": 2.59375,
      "learning_rate": 2.4978646956964208e-05,
      "loss": 0.8581,
      "step": 471120
    },
    {
      "epoch": 1.6511931923722605,
      "grad_norm": 2.6875,
      "learning_rate": 2.4977997928300506e-05,
      "loss": 0.8106,
      "step": 471130
    },
    {
      "epoch": 1.6512282398791562,
      "grad_norm": 2.578125,
      "learning_rate": 2.4977348899636804e-05,
      "loss": 0.8167,
      "step": 471140
    },
    {
      "epoch": 1.6512632873860518,
      "grad_norm": 3.09375,
      "learning_rate": 2.4976699870973102e-05,
      "loss": 0.8251,
      "step": 471150
    },
    {
      "epoch": 1.6512983348929473,
      "grad_norm": 2.875,
      "learning_rate": 2.49760508423094e-05,
      "loss": 0.8462,
      "step": 471160
    },
    {
      "epoch": 1.651333382399843,
      "grad_norm": 2.5,
      "learning_rate": 2.4975401813645698e-05,
      "loss": 0.8487,
      "step": 471170
    },
    {
      "epoch": 1.6513684299067386,
      "grad_norm": 2.796875,
      "learning_rate": 2.4974752784981996e-05,
      "loss": 0.8128,
      "step": 471180
    },
    {
      "epoch": 1.6514034774136341,
      "grad_norm": 2.625,
      "learning_rate": 2.4974103756318297e-05,
      "loss": 0.8798,
      "step": 471190
    },
    {
      "epoch": 1.65143852492053,
      "grad_norm": 3.0625,
      "learning_rate": 2.4973454727654595e-05,
      "loss": 0.8183,
      "step": 471200
    },
    {
      "epoch": 1.6514735724274254,
      "grad_norm": 2.984375,
      "learning_rate": 2.4972805698990893e-05,
      "loss": 0.8791,
      "step": 471210
    },
    {
      "epoch": 1.651508619934321,
      "grad_norm": 2.6875,
      "learning_rate": 2.497215667032719e-05,
      "loss": 0.9133,
      "step": 471220
    },
    {
      "epoch": 1.6515436674412167,
      "grad_norm": 3.140625,
      "learning_rate": 2.497150764166349e-05,
      "loss": 0.8338,
      "step": 471230
    },
    {
      "epoch": 1.651578714948112,
      "grad_norm": 2.765625,
      "learning_rate": 2.4970858612999787e-05,
      "loss": 0.7715,
      "step": 471240
    },
    {
      "epoch": 1.6516137624550078,
      "grad_norm": 3.40625,
      "learning_rate": 2.4970209584336085e-05,
      "loss": 0.8081,
      "step": 471250
    },
    {
      "epoch": 1.6516488099619033,
      "grad_norm": 3.171875,
      "learning_rate": 2.4969560555672383e-05,
      "loss": 0.8433,
      "step": 471260
    },
    {
      "epoch": 1.6516838574687989,
      "grad_norm": 2.796875,
      "learning_rate": 2.4968911527008678e-05,
      "loss": 0.8276,
      "step": 471270
    },
    {
      "epoch": 1.6517189049756946,
      "grad_norm": 2.6875,
      "learning_rate": 2.4968262498344976e-05,
      "loss": 0.8338,
      "step": 471280
    },
    {
      "epoch": 1.6517539524825902,
      "grad_norm": 2.390625,
      "learning_rate": 2.4967613469681274e-05,
      "loss": 0.8285,
      "step": 471290
    },
    {
      "epoch": 1.6517889999894857,
      "grad_norm": 3.046875,
      "learning_rate": 2.4966964441017575e-05,
      "loss": 0.7853,
      "step": 471300
    },
    {
      "epoch": 1.6518240474963815,
      "grad_norm": 2.75,
      "learning_rate": 2.4966315412353873e-05,
      "loss": 0.8736,
      "step": 471310
    },
    {
      "epoch": 1.651859095003277,
      "grad_norm": 2.65625,
      "learning_rate": 2.496566638369017e-05,
      "loss": 0.8891,
      "step": 471320
    },
    {
      "epoch": 1.6518941425101725,
      "grad_norm": 2.90625,
      "learning_rate": 2.496501735502647e-05,
      "loss": 0.8806,
      "step": 471330
    },
    {
      "epoch": 1.6519291900170683,
      "grad_norm": 3.203125,
      "learning_rate": 2.4964368326362767e-05,
      "loss": 0.7965,
      "step": 471340
    },
    {
      "epoch": 1.6519642375239636,
      "grad_norm": 3.3125,
      "learning_rate": 2.4963719297699065e-05,
      "loss": 0.7722,
      "step": 471350
    },
    {
      "epoch": 1.6519992850308594,
      "grad_norm": 3.34375,
      "learning_rate": 2.4963070269035363e-05,
      "loss": 0.7836,
      "step": 471360
    },
    {
      "epoch": 1.652034332537755,
      "grad_norm": 2.78125,
      "learning_rate": 2.496242124037166e-05,
      "loss": 0.7826,
      "step": 471370
    },
    {
      "epoch": 1.6520693800446504,
      "grad_norm": 2.46875,
      "learning_rate": 2.496177221170796e-05,
      "loss": 0.789,
      "step": 471380
    },
    {
      "epoch": 1.6521044275515462,
      "grad_norm": 2.8125,
      "learning_rate": 2.4961123183044257e-05,
      "loss": 0.8594,
      "step": 471390
    },
    {
      "epoch": 1.6521394750584417,
      "grad_norm": 2.78125,
      "learning_rate": 2.4960474154380555e-05,
      "loss": 0.8184,
      "step": 471400
    },
    {
      "epoch": 1.6521745225653373,
      "grad_norm": 2.59375,
      "learning_rate": 2.4959825125716853e-05,
      "loss": 0.8539,
      "step": 471410
    },
    {
      "epoch": 1.652209570072233,
      "grad_norm": 2.984375,
      "learning_rate": 2.495917609705315e-05,
      "loss": 0.86,
      "step": 471420
    },
    {
      "epoch": 1.6522446175791285,
      "grad_norm": 3.03125,
      "learning_rate": 2.495852706838945e-05,
      "loss": 0.9072,
      "step": 471430
    },
    {
      "epoch": 1.652279665086024,
      "grad_norm": 3.09375,
      "learning_rate": 2.495787803972575e-05,
      "loss": 0.849,
      "step": 471440
    },
    {
      "epoch": 1.6523147125929198,
      "grad_norm": 3.1875,
      "learning_rate": 2.4957229011062045e-05,
      "loss": 0.7756,
      "step": 471450
    },
    {
      "epoch": 1.6523497600998152,
      "grad_norm": 3.25,
      "learning_rate": 2.4956579982398343e-05,
      "loss": 0.8606,
      "step": 471460
    },
    {
      "epoch": 1.652384807606711,
      "grad_norm": 2.65625,
      "learning_rate": 2.495593095373464e-05,
      "loss": 0.8091,
      "step": 471470
    },
    {
      "epoch": 1.6524198551136065,
      "grad_norm": 2.84375,
      "learning_rate": 2.495528192507094e-05,
      "loss": 0.7889,
      "step": 471480
    },
    {
      "epoch": 1.652454902620502,
      "grad_norm": 2.71875,
      "learning_rate": 2.4954632896407237e-05,
      "loss": 0.7993,
      "step": 471490
    },
    {
      "epoch": 1.6524899501273977,
      "grad_norm": 2.75,
      "learning_rate": 2.4953983867743535e-05,
      "loss": 0.8671,
      "step": 471500
    },
    {
      "epoch": 1.6525249976342933,
      "grad_norm": 2.859375,
      "learning_rate": 2.4953334839079833e-05,
      "loss": 0.8361,
      "step": 471510
    },
    {
      "epoch": 1.6525600451411888,
      "grad_norm": 2.875,
      "learning_rate": 2.495268581041613e-05,
      "loss": 0.7403,
      "step": 471520
    },
    {
      "epoch": 1.6525950926480846,
      "grad_norm": 2.71875,
      "learning_rate": 2.495203678175243e-05,
      "loss": 0.8473,
      "step": 471530
    },
    {
      "epoch": 1.65263014015498,
      "grad_norm": 2.9375,
      "learning_rate": 2.4951387753088727e-05,
      "loss": 0.9418,
      "step": 471540
    },
    {
      "epoch": 1.6526651876618756,
      "grad_norm": 3.15625,
      "learning_rate": 2.4950738724425025e-05,
      "loss": 0.8434,
      "step": 471550
    },
    {
      "epoch": 1.6527002351687714,
      "grad_norm": 3.046875,
      "learning_rate": 2.4950089695761327e-05,
      "loss": 0.8892,
      "step": 471560
    },
    {
      "epoch": 1.6527352826756667,
      "grad_norm": 2.84375,
      "learning_rate": 2.4949440667097625e-05,
      "loss": 0.8293,
      "step": 471570
    },
    {
      "epoch": 1.6527703301825625,
      "grad_norm": 2.796875,
      "learning_rate": 2.4948791638433923e-05,
      "loss": 0.8626,
      "step": 471580
    },
    {
      "epoch": 1.652805377689458,
      "grad_norm": 2.984375,
      "learning_rate": 2.494814260977022e-05,
      "loss": 0.865,
      "step": 471590
    },
    {
      "epoch": 1.6528404251963535,
      "grad_norm": 2.796875,
      "learning_rate": 2.494749358110652e-05,
      "loss": 0.7675,
      "step": 471600
    },
    {
      "epoch": 1.6528754727032493,
      "grad_norm": 2.5,
      "learning_rate": 2.4946844552442817e-05,
      "loss": 0.8824,
      "step": 471610
    },
    {
      "epoch": 1.6529105202101448,
      "grad_norm": 3.03125,
      "learning_rate": 2.4946195523779115e-05,
      "loss": 0.8331,
      "step": 471620
    },
    {
      "epoch": 1.6529455677170404,
      "grad_norm": 3.265625,
      "learning_rate": 2.4945546495115413e-05,
      "loss": 0.9259,
      "step": 471630
    },
    {
      "epoch": 1.6529806152239361,
      "grad_norm": 2.546875,
      "learning_rate": 2.4944897466451707e-05,
      "loss": 0.7658,
      "step": 471640
    },
    {
      "epoch": 1.6530156627308317,
      "grad_norm": 2.875,
      "learning_rate": 2.4944248437788005e-05,
      "loss": 0.8352,
      "step": 471650
    },
    {
      "epoch": 1.6530507102377272,
      "grad_norm": 2.59375,
      "learning_rate": 2.4943599409124303e-05,
      "loss": 0.8928,
      "step": 471660
    },
    {
      "epoch": 1.653085757744623,
      "grad_norm": 2.9375,
      "learning_rate": 2.4942950380460605e-05,
      "loss": 0.8202,
      "step": 471670
    },
    {
      "epoch": 1.6531208052515183,
      "grad_norm": 3.0,
      "learning_rate": 2.4942301351796903e-05,
      "loss": 0.8193,
      "step": 471680
    },
    {
      "epoch": 1.653155852758414,
      "grad_norm": 2.984375,
      "learning_rate": 2.49416523231332e-05,
      "loss": 0.8131,
      "step": 471690
    },
    {
      "epoch": 1.6531909002653098,
      "grad_norm": 2.6875,
      "learning_rate": 2.49410032944695e-05,
      "loss": 0.8499,
      "step": 471700
    },
    {
      "epoch": 1.653225947772205,
      "grad_norm": 3.328125,
      "learning_rate": 2.4940354265805797e-05,
      "loss": 0.8369,
      "step": 471710
    },
    {
      "epoch": 1.6532609952791009,
      "grad_norm": 2.828125,
      "learning_rate": 2.4939705237142095e-05,
      "loss": 0.8641,
      "step": 471720
    },
    {
      "epoch": 1.6532960427859964,
      "grad_norm": 3.09375,
      "learning_rate": 2.4939056208478393e-05,
      "loss": 0.8372,
      "step": 471730
    },
    {
      "epoch": 1.653331090292892,
      "grad_norm": 2.734375,
      "learning_rate": 2.493840717981469e-05,
      "loss": 0.7967,
      "step": 471740
    },
    {
      "epoch": 1.6533661377997877,
      "grad_norm": 2.921875,
      "learning_rate": 2.493775815115099e-05,
      "loss": 0.8538,
      "step": 471750
    },
    {
      "epoch": 1.6534011853066832,
      "grad_norm": 3.265625,
      "learning_rate": 2.4937109122487287e-05,
      "loss": 0.8268,
      "step": 471760
    },
    {
      "epoch": 1.6534362328135788,
      "grad_norm": 2.9375,
      "learning_rate": 2.4936460093823585e-05,
      "loss": 0.8986,
      "step": 471770
    },
    {
      "epoch": 1.6534712803204745,
      "grad_norm": 3.0,
      "learning_rate": 2.4935811065159883e-05,
      "loss": 0.8187,
      "step": 471780
    },
    {
      "epoch": 1.6535063278273698,
      "grad_norm": 2.921875,
      "learning_rate": 2.493516203649618e-05,
      "loss": 0.8398,
      "step": 471790
    },
    {
      "epoch": 1.6535413753342656,
      "grad_norm": 3.0625,
      "learning_rate": 2.493451300783248e-05,
      "loss": 0.8865,
      "step": 471800
    },
    {
      "epoch": 1.6535764228411614,
      "grad_norm": 2.734375,
      "learning_rate": 2.493386397916878e-05,
      "loss": 0.8517,
      "step": 471810
    },
    {
      "epoch": 1.6536114703480567,
      "grad_norm": 3.015625,
      "learning_rate": 2.4933214950505078e-05,
      "loss": 0.8259,
      "step": 471820
    },
    {
      "epoch": 1.6536465178549524,
      "grad_norm": 2.78125,
      "learning_rate": 2.4932565921841373e-05,
      "loss": 0.8075,
      "step": 471830
    },
    {
      "epoch": 1.653681565361848,
      "grad_norm": 3.46875,
      "learning_rate": 2.493191689317767e-05,
      "loss": 0.8509,
      "step": 471840
    },
    {
      "epoch": 1.6537166128687435,
      "grad_norm": 2.875,
      "learning_rate": 2.493126786451397e-05,
      "loss": 0.8589,
      "step": 471850
    },
    {
      "epoch": 1.6537516603756393,
      "grad_norm": 2.6875,
      "learning_rate": 2.4930618835850267e-05,
      "loss": 0.8715,
      "step": 471860
    },
    {
      "epoch": 1.6537867078825348,
      "grad_norm": 2.96875,
      "learning_rate": 2.4929969807186565e-05,
      "loss": 0.844,
      "step": 471870
    },
    {
      "epoch": 1.6538217553894303,
      "grad_norm": 2.609375,
      "learning_rate": 2.4929320778522863e-05,
      "loss": 0.9403,
      "step": 471880
    },
    {
      "epoch": 1.653856802896326,
      "grad_norm": 2.953125,
      "learning_rate": 2.492867174985916e-05,
      "loss": 0.7328,
      "step": 471890
    },
    {
      "epoch": 1.6538918504032216,
      "grad_norm": 2.28125,
      "learning_rate": 2.492802272119546e-05,
      "loss": 0.7744,
      "step": 471900
    },
    {
      "epoch": 1.6539268979101172,
      "grad_norm": 2.71875,
      "learning_rate": 2.4927373692531757e-05,
      "loss": 0.8486,
      "step": 471910
    },
    {
      "epoch": 1.653961945417013,
      "grad_norm": 2.96875,
      "learning_rate": 2.4926724663868058e-05,
      "loss": 0.8478,
      "step": 471920
    },
    {
      "epoch": 1.6539969929239082,
      "grad_norm": 3.0,
      "learning_rate": 2.4926075635204356e-05,
      "loss": 0.8258,
      "step": 471930
    },
    {
      "epoch": 1.654032040430804,
      "grad_norm": 2.734375,
      "learning_rate": 2.4925426606540654e-05,
      "loss": 0.8511,
      "step": 471940
    },
    {
      "epoch": 1.6540670879376995,
      "grad_norm": 2.53125,
      "learning_rate": 2.4924777577876952e-05,
      "loss": 0.8816,
      "step": 471950
    },
    {
      "epoch": 1.654102135444595,
      "grad_norm": 2.78125,
      "learning_rate": 2.492412854921325e-05,
      "loss": 0.838,
      "step": 471960
    },
    {
      "epoch": 1.6541371829514908,
      "grad_norm": 2.53125,
      "learning_rate": 2.4923479520549548e-05,
      "loss": 0.835,
      "step": 471970
    },
    {
      "epoch": 1.6541722304583864,
      "grad_norm": 2.921875,
      "learning_rate": 2.4922830491885846e-05,
      "loss": 0.8798,
      "step": 471980
    },
    {
      "epoch": 1.6542072779652819,
      "grad_norm": 2.671875,
      "learning_rate": 2.4922181463222144e-05,
      "loss": 0.7028,
      "step": 471990
    },
    {
      "epoch": 1.6542423254721776,
      "grad_norm": 2.59375,
      "learning_rate": 2.4921532434558442e-05,
      "loss": 0.9096,
      "step": 472000
    },
    {
      "epoch": 1.6542773729790732,
      "grad_norm": 2.8125,
      "learning_rate": 2.492088340589474e-05,
      "loss": 0.8006,
      "step": 472010
    },
    {
      "epoch": 1.6543124204859687,
      "grad_norm": 3.390625,
      "learning_rate": 2.4920234377231035e-05,
      "loss": 0.881,
      "step": 472020
    },
    {
      "epoch": 1.6543474679928645,
      "grad_norm": 2.59375,
      "learning_rate": 2.4919585348567333e-05,
      "loss": 0.8072,
      "step": 472030
    },
    {
      "epoch": 1.6543825154997598,
      "grad_norm": 3.0625,
      "learning_rate": 2.4918936319903634e-05,
      "loss": 0.8524,
      "step": 472040
    },
    {
      "epoch": 1.6544175630066555,
      "grad_norm": 2.875,
      "learning_rate": 2.4918287291239932e-05,
      "loss": 0.832,
      "step": 472050
    },
    {
      "epoch": 1.654452610513551,
      "grad_norm": 2.625,
      "learning_rate": 2.491763826257623e-05,
      "loss": 0.8553,
      "step": 472060
    },
    {
      "epoch": 1.6544876580204466,
      "grad_norm": 2.71875,
      "learning_rate": 2.4916989233912528e-05,
      "loss": 0.8287,
      "step": 472070
    },
    {
      "epoch": 1.6545227055273424,
      "grad_norm": 2.953125,
      "learning_rate": 2.4916340205248826e-05,
      "loss": 0.8356,
      "step": 472080
    },
    {
      "epoch": 1.654557753034238,
      "grad_norm": 2.71875,
      "learning_rate": 2.4915691176585124e-05,
      "loss": 0.8335,
      "step": 472090
    },
    {
      "epoch": 1.6545928005411334,
      "grad_norm": 3.015625,
      "learning_rate": 2.4915042147921422e-05,
      "loss": 0.8368,
      "step": 472100
    },
    {
      "epoch": 1.6546278480480292,
      "grad_norm": 3.25,
      "learning_rate": 2.491439311925772e-05,
      "loss": 0.8442,
      "step": 472110
    },
    {
      "epoch": 1.6546628955549247,
      "grad_norm": 2.984375,
      "learning_rate": 2.4913744090594018e-05,
      "loss": 0.8078,
      "step": 472120
    },
    {
      "epoch": 1.6546979430618203,
      "grad_norm": 3.140625,
      "learning_rate": 2.4913095061930316e-05,
      "loss": 0.784,
      "step": 472130
    },
    {
      "epoch": 1.654732990568716,
      "grad_norm": 3.046875,
      "learning_rate": 2.4912446033266614e-05,
      "loss": 0.8631,
      "step": 472140
    },
    {
      "epoch": 1.6547680380756113,
      "grad_norm": 3.0,
      "learning_rate": 2.4911797004602912e-05,
      "loss": 0.9438,
      "step": 472150
    },
    {
      "epoch": 1.654803085582507,
      "grad_norm": 3.046875,
      "learning_rate": 2.491114797593921e-05,
      "loss": 0.7768,
      "step": 472160
    },
    {
      "epoch": 1.6548381330894026,
      "grad_norm": 2.890625,
      "learning_rate": 2.4910498947275508e-05,
      "loss": 0.8425,
      "step": 472170
    },
    {
      "epoch": 1.6548731805962982,
      "grad_norm": 3.46875,
      "learning_rate": 2.490984991861181e-05,
      "loss": 0.7716,
      "step": 472180
    },
    {
      "epoch": 1.654908228103194,
      "grad_norm": 2.40625,
      "learning_rate": 2.4909200889948107e-05,
      "loss": 0.8148,
      "step": 472190
    },
    {
      "epoch": 1.6549432756100895,
      "grad_norm": 2.765625,
      "learning_rate": 2.4908551861284405e-05,
      "loss": 0.7836,
      "step": 472200
    },
    {
      "epoch": 1.654978323116985,
      "grad_norm": 2.8125,
      "learning_rate": 2.49079028326207e-05,
      "loss": 0.8452,
      "step": 472210
    },
    {
      "epoch": 1.6550133706238808,
      "grad_norm": 3.046875,
      "learning_rate": 2.4907253803956998e-05,
      "loss": 0.8846,
      "step": 472220
    },
    {
      "epoch": 1.6550484181307763,
      "grad_norm": 2.6875,
      "learning_rate": 2.4906604775293296e-05,
      "loss": 0.8618,
      "step": 472230
    },
    {
      "epoch": 1.6550834656376718,
      "grad_norm": 3.09375,
      "learning_rate": 2.4905955746629594e-05,
      "loss": 0.7661,
      "step": 472240
    },
    {
      "epoch": 1.6551185131445676,
      "grad_norm": 2.6875,
      "learning_rate": 2.4905306717965892e-05,
      "loss": 0.8498,
      "step": 472250
    },
    {
      "epoch": 1.655153560651463,
      "grad_norm": 2.875,
      "learning_rate": 2.490465768930219e-05,
      "loss": 0.8741,
      "step": 472260
    },
    {
      "epoch": 1.6551886081583587,
      "grad_norm": 2.953125,
      "learning_rate": 2.4904008660638488e-05,
      "loss": 0.8934,
      "step": 472270
    },
    {
      "epoch": 1.6552236556652542,
      "grad_norm": 2.90625,
      "learning_rate": 2.4903359631974786e-05,
      "loss": 0.8616,
      "step": 472280
    },
    {
      "epoch": 1.6552587031721497,
      "grad_norm": 2.828125,
      "learning_rate": 2.4902710603311087e-05,
      "loss": 0.8354,
      "step": 472290
    },
    {
      "epoch": 1.6552937506790455,
      "grad_norm": 2.671875,
      "learning_rate": 2.4902061574647385e-05,
      "loss": 0.7649,
      "step": 472300
    },
    {
      "epoch": 1.655328798185941,
      "grad_norm": 2.90625,
      "learning_rate": 2.4901412545983683e-05,
      "loss": 0.8553,
      "step": 472310
    },
    {
      "epoch": 1.6553638456928366,
      "grad_norm": 2.78125,
      "learning_rate": 2.490076351731998e-05,
      "loss": 0.8363,
      "step": 472320
    },
    {
      "epoch": 1.6553988931997323,
      "grad_norm": 2.46875,
      "learning_rate": 2.490011448865628e-05,
      "loss": 0.8194,
      "step": 472330
    },
    {
      "epoch": 1.6554339407066279,
      "grad_norm": 2.734375,
      "learning_rate": 2.4899465459992577e-05,
      "loss": 0.8297,
      "step": 472340
    },
    {
      "epoch": 1.6554689882135234,
      "grad_norm": 2.65625,
      "learning_rate": 2.4898816431328875e-05,
      "loss": 0.8194,
      "step": 472350
    },
    {
      "epoch": 1.6555040357204192,
      "grad_norm": 2.8125,
      "learning_rate": 2.4898167402665173e-05,
      "loss": 0.8611,
      "step": 472360
    },
    {
      "epoch": 1.6555390832273145,
      "grad_norm": 3.140625,
      "learning_rate": 2.489751837400147e-05,
      "loss": 0.8639,
      "step": 472370
    },
    {
      "epoch": 1.6555741307342102,
      "grad_norm": 2.921875,
      "learning_rate": 2.489686934533777e-05,
      "loss": 0.8454,
      "step": 472380
    },
    {
      "epoch": 1.655609178241106,
      "grad_norm": 2.796875,
      "learning_rate": 2.4896220316674064e-05,
      "loss": 0.8139,
      "step": 472390
    },
    {
      "epoch": 1.6556442257480013,
      "grad_norm": 3.03125,
      "learning_rate": 2.4895571288010365e-05,
      "loss": 0.7607,
      "step": 472400
    },
    {
      "epoch": 1.655679273254897,
      "grad_norm": 2.734375,
      "learning_rate": 2.4894922259346663e-05,
      "loss": 0.797,
      "step": 472410
    },
    {
      "epoch": 1.6557143207617926,
      "grad_norm": 2.9375,
      "learning_rate": 2.489427323068296e-05,
      "loss": 0.7825,
      "step": 472420
    },
    {
      "epoch": 1.6557493682686881,
      "grad_norm": 2.984375,
      "learning_rate": 2.489362420201926e-05,
      "loss": 0.8957,
      "step": 472430
    },
    {
      "epoch": 1.6557844157755839,
      "grad_norm": 2.390625,
      "learning_rate": 2.4892975173355557e-05,
      "loss": 0.8177,
      "step": 472440
    },
    {
      "epoch": 1.6558194632824794,
      "grad_norm": 3.015625,
      "learning_rate": 2.4892326144691855e-05,
      "loss": 0.8309,
      "step": 472450
    },
    {
      "epoch": 1.655854510789375,
      "grad_norm": 2.90625,
      "learning_rate": 2.4891677116028153e-05,
      "loss": 0.7901,
      "step": 472460
    },
    {
      "epoch": 1.6558895582962707,
      "grad_norm": 3.03125,
      "learning_rate": 2.489102808736445e-05,
      "loss": 0.807,
      "step": 472470
    },
    {
      "epoch": 1.655924605803166,
      "grad_norm": 3.125,
      "learning_rate": 2.489037905870075e-05,
      "loss": 0.8514,
      "step": 472480
    },
    {
      "epoch": 1.6559596533100618,
      "grad_norm": 2.8125,
      "learning_rate": 2.4889730030037047e-05,
      "loss": 0.8313,
      "step": 472490
    },
    {
      "epoch": 1.6559947008169575,
      "grad_norm": 3.046875,
      "learning_rate": 2.4889081001373345e-05,
      "loss": 0.773,
      "step": 472500
    },
    {
      "epoch": 1.6560297483238529,
      "grad_norm": 2.90625,
      "learning_rate": 2.4888431972709643e-05,
      "loss": 0.8404,
      "step": 472510
    },
    {
      "epoch": 1.6560647958307486,
      "grad_norm": 2.921875,
      "learning_rate": 2.488778294404594e-05,
      "loss": 0.8642,
      "step": 472520
    },
    {
      "epoch": 1.6560998433376442,
      "grad_norm": 2.59375,
      "learning_rate": 2.488713391538224e-05,
      "loss": 0.8601,
      "step": 472530
    },
    {
      "epoch": 1.6561348908445397,
      "grad_norm": 3.21875,
      "learning_rate": 2.488648488671854e-05,
      "loss": 0.8215,
      "step": 472540
    },
    {
      "epoch": 1.6561699383514354,
      "grad_norm": 3.015625,
      "learning_rate": 2.488583585805484e-05,
      "loss": 0.8902,
      "step": 472550
    },
    {
      "epoch": 1.656204985858331,
      "grad_norm": 2.59375,
      "learning_rate": 2.4885186829391137e-05,
      "loss": 0.7931,
      "step": 472560
    },
    {
      "epoch": 1.6562400333652265,
      "grad_norm": 3.109375,
      "learning_rate": 2.4884537800727435e-05,
      "loss": 0.9016,
      "step": 472570
    },
    {
      "epoch": 1.6562750808721223,
      "grad_norm": 2.78125,
      "learning_rate": 2.488388877206373e-05,
      "loss": 0.7868,
      "step": 472580
    },
    {
      "epoch": 1.6563101283790178,
      "grad_norm": 2.796875,
      "learning_rate": 2.4883239743400027e-05,
      "loss": 0.7572,
      "step": 472590
    },
    {
      "epoch": 1.6563451758859133,
      "grad_norm": 3.25,
      "learning_rate": 2.4882590714736325e-05,
      "loss": 0.8702,
      "step": 472600
    },
    {
      "epoch": 1.656380223392809,
      "grad_norm": 2.890625,
      "learning_rate": 2.4881941686072623e-05,
      "loss": 0.7876,
      "step": 472610
    },
    {
      "epoch": 1.6564152708997044,
      "grad_norm": 3.078125,
      "learning_rate": 2.488129265740892e-05,
      "loss": 0.8553,
      "step": 472620
    },
    {
      "epoch": 1.6564503184066002,
      "grad_norm": 2.8125,
      "learning_rate": 2.488064362874522e-05,
      "loss": 0.8224,
      "step": 472630
    },
    {
      "epoch": 1.6564853659134957,
      "grad_norm": 2.765625,
      "learning_rate": 2.4879994600081517e-05,
      "loss": 0.7963,
      "step": 472640
    },
    {
      "epoch": 1.6565204134203912,
      "grad_norm": 2.703125,
      "learning_rate": 2.4879345571417815e-05,
      "loss": 0.9465,
      "step": 472650
    },
    {
      "epoch": 1.656555460927287,
      "grad_norm": 2.859375,
      "learning_rate": 2.4878696542754117e-05,
      "loss": 0.8411,
      "step": 472660
    },
    {
      "epoch": 1.6565905084341825,
      "grad_norm": 2.828125,
      "learning_rate": 2.4878047514090415e-05,
      "loss": 0.8402,
      "step": 472670
    },
    {
      "epoch": 1.656625555941078,
      "grad_norm": 2.84375,
      "learning_rate": 2.4877398485426713e-05,
      "loss": 0.8353,
      "step": 472680
    },
    {
      "epoch": 1.6566606034479738,
      "grad_norm": 2.796875,
      "learning_rate": 2.487674945676301e-05,
      "loss": 0.7864,
      "step": 472690
    },
    {
      "epoch": 1.6566956509548694,
      "grad_norm": 2.671875,
      "learning_rate": 2.487610042809931e-05,
      "loss": 0.8291,
      "step": 472700
    },
    {
      "epoch": 1.656730698461765,
      "grad_norm": 2.5,
      "learning_rate": 2.4875451399435607e-05,
      "loss": 0.7906,
      "step": 472710
    },
    {
      "epoch": 1.6567657459686607,
      "grad_norm": 3.171875,
      "learning_rate": 2.4874802370771905e-05,
      "loss": 0.8137,
      "step": 472720
    },
    {
      "epoch": 1.656800793475556,
      "grad_norm": 2.9375,
      "learning_rate": 2.4874153342108203e-05,
      "loss": 0.8177,
      "step": 472730
    },
    {
      "epoch": 1.6568358409824517,
      "grad_norm": 2.734375,
      "learning_rate": 2.48735043134445e-05,
      "loss": 0.8271,
      "step": 472740
    },
    {
      "epoch": 1.6568708884893473,
      "grad_norm": 2.984375,
      "learning_rate": 2.48728552847808e-05,
      "loss": 0.9254,
      "step": 472750
    },
    {
      "epoch": 1.6569059359962428,
      "grad_norm": 2.890625,
      "learning_rate": 2.4872206256117097e-05,
      "loss": 0.8797,
      "step": 472760
    },
    {
      "epoch": 1.6569409835031386,
      "grad_norm": 2.96875,
      "learning_rate": 2.4871557227453395e-05,
      "loss": 0.9381,
      "step": 472770
    },
    {
      "epoch": 1.656976031010034,
      "grad_norm": 3.046875,
      "learning_rate": 2.4870908198789693e-05,
      "loss": 0.8642,
      "step": 472780
    },
    {
      "epoch": 1.6570110785169296,
      "grad_norm": 2.875,
      "learning_rate": 2.487025917012599e-05,
      "loss": 0.8904,
      "step": 472790
    },
    {
      "epoch": 1.6570461260238254,
      "grad_norm": 2.859375,
      "learning_rate": 2.486961014146229e-05,
      "loss": 0.8037,
      "step": 472800
    },
    {
      "epoch": 1.657081173530721,
      "grad_norm": 2.484375,
      "learning_rate": 2.4868961112798587e-05,
      "loss": 0.7541,
      "step": 472810
    },
    {
      "epoch": 1.6571162210376165,
      "grad_norm": 2.546875,
      "learning_rate": 2.4868312084134885e-05,
      "loss": 0.7392,
      "step": 472820
    },
    {
      "epoch": 1.6571512685445122,
      "grad_norm": 2.671875,
      "learning_rate": 2.4867663055471183e-05,
      "loss": 0.7659,
      "step": 472830
    },
    {
      "epoch": 1.6571863160514075,
      "grad_norm": 2.59375,
      "learning_rate": 2.486701402680748e-05,
      "loss": 0.7598,
      "step": 472840
    },
    {
      "epoch": 1.6572213635583033,
      "grad_norm": 2.46875,
      "learning_rate": 2.486636499814378e-05,
      "loss": 0.8283,
      "step": 472850
    },
    {
      "epoch": 1.6572564110651988,
      "grad_norm": 2.8125,
      "learning_rate": 2.4865715969480077e-05,
      "loss": 0.8587,
      "step": 472860
    },
    {
      "epoch": 1.6572914585720944,
      "grad_norm": 2.703125,
      "learning_rate": 2.4865066940816375e-05,
      "loss": 0.8609,
      "step": 472870
    },
    {
      "epoch": 1.6573265060789901,
      "grad_norm": 3.265625,
      "learning_rate": 2.4864417912152673e-05,
      "loss": 0.8147,
      "step": 472880
    },
    {
      "epoch": 1.6573615535858857,
      "grad_norm": 2.640625,
      "learning_rate": 2.486376888348897e-05,
      "loss": 0.7278,
      "step": 472890
    },
    {
      "epoch": 1.6573966010927812,
      "grad_norm": 3.015625,
      "learning_rate": 2.486311985482527e-05,
      "loss": 0.8585,
      "step": 472900
    },
    {
      "epoch": 1.657431648599677,
      "grad_norm": 2.5,
      "learning_rate": 2.486247082616157e-05,
      "loss": 0.7502,
      "step": 472910
    },
    {
      "epoch": 1.6574666961065725,
      "grad_norm": 2.9375,
      "learning_rate": 2.4861821797497868e-05,
      "loss": 0.9529,
      "step": 472920
    },
    {
      "epoch": 1.657501743613468,
      "grad_norm": 2.859375,
      "learning_rate": 2.4861172768834166e-05,
      "loss": 0.8721,
      "step": 472930
    },
    {
      "epoch": 1.6575367911203638,
      "grad_norm": 3.75,
      "learning_rate": 2.4860523740170464e-05,
      "loss": 0.8379,
      "step": 472940
    },
    {
      "epoch": 1.657571838627259,
      "grad_norm": 3.296875,
      "learning_rate": 2.4859874711506762e-05,
      "loss": 0.815,
      "step": 472950
    },
    {
      "epoch": 1.6576068861341549,
      "grad_norm": 2.65625,
      "learning_rate": 2.4859225682843057e-05,
      "loss": 0.7767,
      "step": 472960
    },
    {
      "epoch": 1.6576419336410504,
      "grad_norm": 2.890625,
      "learning_rate": 2.4858576654179355e-05,
      "loss": 0.8627,
      "step": 472970
    },
    {
      "epoch": 1.657676981147946,
      "grad_norm": 2.84375,
      "learning_rate": 2.4857927625515653e-05,
      "loss": 0.9236,
      "step": 472980
    },
    {
      "epoch": 1.6577120286548417,
      "grad_norm": 3.109375,
      "learning_rate": 2.485727859685195e-05,
      "loss": 0.785,
      "step": 472990
    },
    {
      "epoch": 1.6577470761617372,
      "grad_norm": 3.578125,
      "learning_rate": 2.485662956818825e-05,
      "loss": 0.9011,
      "step": 473000
    },
    {
      "epoch": 1.6577821236686328,
      "grad_norm": 2.875,
      "learning_rate": 2.4855980539524547e-05,
      "loss": 0.8416,
      "step": 473010
    },
    {
      "epoch": 1.6578171711755285,
      "grad_norm": 2.765625,
      "learning_rate": 2.4855331510860848e-05,
      "loss": 0.8411,
      "step": 473020
    },
    {
      "epoch": 1.657852218682424,
      "grad_norm": 3.4375,
      "learning_rate": 2.4854682482197146e-05,
      "loss": 0.8438,
      "step": 473030
    },
    {
      "epoch": 1.6578872661893196,
      "grad_norm": 3.046875,
      "learning_rate": 2.4854033453533444e-05,
      "loss": 0.782,
      "step": 473040
    },
    {
      "epoch": 1.6579223136962153,
      "grad_norm": 3.0625,
      "learning_rate": 2.4853384424869742e-05,
      "loss": 0.8616,
      "step": 473050
    },
    {
      "epoch": 1.6579573612031107,
      "grad_norm": 3.296875,
      "learning_rate": 2.485273539620604e-05,
      "loss": 0.8221,
      "step": 473060
    },
    {
      "epoch": 1.6579924087100064,
      "grad_norm": 2.78125,
      "learning_rate": 2.4852086367542338e-05,
      "loss": 0.8847,
      "step": 473070
    },
    {
      "epoch": 1.6580274562169022,
      "grad_norm": 2.609375,
      "learning_rate": 2.4851437338878636e-05,
      "loss": 0.8027,
      "step": 473080
    },
    {
      "epoch": 1.6580625037237975,
      "grad_norm": 3.15625,
      "learning_rate": 2.4850788310214934e-05,
      "loss": 0.8217,
      "step": 473090
    },
    {
      "epoch": 1.6580975512306932,
      "grad_norm": 2.84375,
      "learning_rate": 2.4850139281551232e-05,
      "loss": 0.8902,
      "step": 473100
    },
    {
      "epoch": 1.6581325987375888,
      "grad_norm": 3.4375,
      "learning_rate": 2.484949025288753e-05,
      "loss": 0.8626,
      "step": 473110
    },
    {
      "epoch": 1.6581676462444843,
      "grad_norm": 2.28125,
      "learning_rate": 2.4848841224223828e-05,
      "loss": 0.8169,
      "step": 473120
    },
    {
      "epoch": 1.65820269375138,
      "grad_norm": 3.078125,
      "learning_rate": 2.4848192195560126e-05,
      "loss": 0.8568,
      "step": 473130
    },
    {
      "epoch": 1.6582377412582756,
      "grad_norm": 3.375,
      "learning_rate": 2.4847543166896424e-05,
      "loss": 0.8905,
      "step": 473140
    },
    {
      "epoch": 1.6582727887651711,
      "grad_norm": 2.59375,
      "learning_rate": 2.4846894138232722e-05,
      "loss": 0.7774,
      "step": 473150
    },
    {
      "epoch": 1.658307836272067,
      "grad_norm": 3.546875,
      "learning_rate": 2.484624510956902e-05,
      "loss": 0.8921,
      "step": 473160
    },
    {
      "epoch": 1.6583428837789622,
      "grad_norm": 3.0,
      "learning_rate": 2.4845596080905318e-05,
      "loss": 0.8789,
      "step": 473170
    },
    {
      "epoch": 1.658377931285858,
      "grad_norm": 2.96875,
      "learning_rate": 2.4844947052241616e-05,
      "loss": 0.8496,
      "step": 473180
    },
    {
      "epoch": 1.6584129787927537,
      "grad_norm": 2.859375,
      "learning_rate": 2.4844298023577914e-05,
      "loss": 0.8603,
      "step": 473190
    },
    {
      "epoch": 1.658448026299649,
      "grad_norm": 2.5625,
      "learning_rate": 2.4843648994914212e-05,
      "loss": 0.8054,
      "step": 473200
    },
    {
      "epoch": 1.6584830738065448,
      "grad_norm": 2.90625,
      "learning_rate": 2.484299996625051e-05,
      "loss": 0.8488,
      "step": 473210
    },
    {
      "epoch": 1.6585181213134403,
      "grad_norm": 3.125,
      "learning_rate": 2.4842350937586808e-05,
      "loss": 0.8059,
      "step": 473220
    },
    {
      "epoch": 1.6585531688203359,
      "grad_norm": 2.890625,
      "learning_rate": 2.4841701908923106e-05,
      "loss": 0.8059,
      "step": 473230
    },
    {
      "epoch": 1.6585882163272316,
      "grad_norm": 2.84375,
      "learning_rate": 2.4841052880259404e-05,
      "loss": 0.8705,
      "step": 473240
    },
    {
      "epoch": 1.6586232638341272,
      "grad_norm": 3.3125,
      "learning_rate": 2.4840403851595702e-05,
      "loss": 0.9544,
      "step": 473250
    },
    {
      "epoch": 1.6586583113410227,
      "grad_norm": 2.921875,
      "learning_rate": 2.4839754822932e-05,
      "loss": 0.8311,
      "step": 473260
    },
    {
      "epoch": 1.6586933588479185,
      "grad_norm": 3.125,
      "learning_rate": 2.4839105794268298e-05,
      "loss": 0.8193,
      "step": 473270
    },
    {
      "epoch": 1.658728406354814,
      "grad_norm": 2.75,
      "learning_rate": 2.48384567656046e-05,
      "loss": 0.7983,
      "step": 473280
    },
    {
      "epoch": 1.6587634538617095,
      "grad_norm": 3.234375,
      "learning_rate": 2.4837807736940897e-05,
      "loss": 0.8804,
      "step": 473290
    },
    {
      "epoch": 1.6587985013686053,
      "grad_norm": 2.65625,
      "learning_rate": 2.4837158708277195e-05,
      "loss": 0.8502,
      "step": 473300
    },
    {
      "epoch": 1.6588335488755006,
      "grad_norm": 3.21875,
      "learning_rate": 2.4836509679613493e-05,
      "loss": 0.9045,
      "step": 473310
    },
    {
      "epoch": 1.6588685963823964,
      "grad_norm": 3.21875,
      "learning_rate": 2.483586065094979e-05,
      "loss": 0.839,
      "step": 473320
    },
    {
      "epoch": 1.658903643889292,
      "grad_norm": 2.65625,
      "learning_rate": 2.483521162228609e-05,
      "loss": 0.8335,
      "step": 473330
    },
    {
      "epoch": 1.6589386913961874,
      "grad_norm": 3.125,
      "learning_rate": 2.4834562593622384e-05,
      "loss": 0.7955,
      "step": 473340
    },
    {
      "epoch": 1.6589737389030832,
      "grad_norm": 2.78125,
      "learning_rate": 2.4833913564958682e-05,
      "loss": 0.7679,
      "step": 473350
    },
    {
      "epoch": 1.6590087864099787,
      "grad_norm": 2.53125,
      "learning_rate": 2.483326453629498e-05,
      "loss": 0.8198,
      "step": 473360
    },
    {
      "epoch": 1.6590438339168743,
      "grad_norm": 2.921875,
      "learning_rate": 2.4832615507631278e-05,
      "loss": 0.847,
      "step": 473370
    },
    {
      "epoch": 1.65907888142377,
      "grad_norm": 2.546875,
      "learning_rate": 2.4831966478967576e-05,
      "loss": 0.8378,
      "step": 473380
    },
    {
      "epoch": 1.6591139289306656,
      "grad_norm": 3.125,
      "learning_rate": 2.4831317450303877e-05,
      "loss": 0.8351,
      "step": 473390
    },
    {
      "epoch": 1.659148976437561,
      "grad_norm": 2.640625,
      "learning_rate": 2.4830668421640175e-05,
      "loss": 0.7602,
      "step": 473400
    },
    {
      "epoch": 1.6591840239444569,
      "grad_norm": 3.0,
      "learning_rate": 2.4830019392976473e-05,
      "loss": 0.7689,
      "step": 473410
    },
    {
      "epoch": 1.6592190714513522,
      "grad_norm": 3.15625,
      "learning_rate": 2.482937036431277e-05,
      "loss": 0.8569,
      "step": 473420
    },
    {
      "epoch": 1.659254118958248,
      "grad_norm": 3.328125,
      "learning_rate": 2.482872133564907e-05,
      "loss": 0.7672,
      "step": 473430
    },
    {
      "epoch": 1.6592891664651435,
      "grad_norm": 4.3125,
      "learning_rate": 2.4828072306985367e-05,
      "loss": 0.748,
      "step": 473440
    },
    {
      "epoch": 1.659324213972039,
      "grad_norm": 2.71875,
      "learning_rate": 2.4827423278321665e-05,
      "loss": 0.7853,
      "step": 473450
    },
    {
      "epoch": 1.6593592614789348,
      "grad_norm": 3.0,
      "learning_rate": 2.4826774249657963e-05,
      "loss": 0.7983,
      "step": 473460
    },
    {
      "epoch": 1.6593943089858303,
      "grad_norm": 2.75,
      "learning_rate": 2.482612522099426e-05,
      "loss": 0.8121,
      "step": 473470
    },
    {
      "epoch": 1.6594293564927258,
      "grad_norm": 3.078125,
      "learning_rate": 2.482547619233056e-05,
      "loss": 0.8839,
      "step": 473480
    },
    {
      "epoch": 1.6594644039996216,
      "grad_norm": 3.203125,
      "learning_rate": 2.4824827163666857e-05,
      "loss": 0.752,
      "step": 473490
    },
    {
      "epoch": 1.6594994515065171,
      "grad_norm": 3.078125,
      "learning_rate": 2.4824178135003155e-05,
      "loss": 0.8334,
      "step": 473500
    },
    {
      "epoch": 1.6595344990134127,
      "grad_norm": 2.875,
      "learning_rate": 2.4823529106339453e-05,
      "loss": 0.8294,
      "step": 473510
    },
    {
      "epoch": 1.6595695465203084,
      "grad_norm": 2.984375,
      "learning_rate": 2.482288007767575e-05,
      "loss": 0.9037,
      "step": 473520
    },
    {
      "epoch": 1.6596045940272037,
      "grad_norm": 2.75,
      "learning_rate": 2.482223104901205e-05,
      "loss": 0.7797,
      "step": 473530
    },
    {
      "epoch": 1.6596396415340995,
      "grad_norm": 3.0,
      "learning_rate": 2.4821582020348347e-05,
      "loss": 0.7812,
      "step": 473540
    },
    {
      "epoch": 1.659674689040995,
      "grad_norm": 2.953125,
      "learning_rate": 2.4820932991684645e-05,
      "loss": 0.8218,
      "step": 473550
    },
    {
      "epoch": 1.6597097365478906,
      "grad_norm": 2.796875,
      "learning_rate": 2.4820283963020943e-05,
      "loss": 0.7809,
      "step": 473560
    },
    {
      "epoch": 1.6597447840547863,
      "grad_norm": 3.03125,
      "learning_rate": 2.481963493435724e-05,
      "loss": 0.8983,
      "step": 473570
    },
    {
      "epoch": 1.6597798315616819,
      "grad_norm": 2.71875,
      "learning_rate": 2.481898590569354e-05,
      "loss": 0.8693,
      "step": 473580
    },
    {
      "epoch": 1.6598148790685774,
      "grad_norm": 2.921875,
      "learning_rate": 2.4818336877029837e-05,
      "loss": 0.7799,
      "step": 473590
    },
    {
      "epoch": 1.6598499265754731,
      "grad_norm": 3.390625,
      "learning_rate": 2.4817687848366135e-05,
      "loss": 0.8528,
      "step": 473600
    },
    {
      "epoch": 1.6598849740823687,
      "grad_norm": 2.84375,
      "learning_rate": 2.4817038819702433e-05,
      "loss": 0.8488,
      "step": 473610
    },
    {
      "epoch": 1.6599200215892642,
      "grad_norm": 2.984375,
      "learning_rate": 2.481638979103873e-05,
      "loss": 0.9305,
      "step": 473620
    },
    {
      "epoch": 1.65995506909616,
      "grad_norm": 2.96875,
      "learning_rate": 2.481574076237503e-05,
      "loss": 0.7775,
      "step": 473630
    },
    {
      "epoch": 1.6599901166030553,
      "grad_norm": 2.859375,
      "learning_rate": 2.481509173371133e-05,
      "loss": 0.8291,
      "step": 473640
    },
    {
      "epoch": 1.660025164109951,
      "grad_norm": 3.28125,
      "learning_rate": 2.481444270504763e-05,
      "loss": 0.8896,
      "step": 473650
    },
    {
      "epoch": 1.6600602116168466,
      "grad_norm": 2.640625,
      "learning_rate": 2.4813793676383927e-05,
      "loss": 0.8103,
      "step": 473660
    },
    {
      "epoch": 1.6600952591237421,
      "grad_norm": 3.3125,
      "learning_rate": 2.4813144647720225e-05,
      "loss": 0.8832,
      "step": 473670
    },
    {
      "epoch": 1.6601303066306379,
      "grad_norm": 3.078125,
      "learning_rate": 2.4812495619056523e-05,
      "loss": 0.8056,
      "step": 473680
    },
    {
      "epoch": 1.6601653541375334,
      "grad_norm": 2.8125,
      "learning_rate": 2.481184659039282e-05,
      "loss": 0.7365,
      "step": 473690
    },
    {
      "epoch": 1.660200401644429,
      "grad_norm": 2.421875,
      "learning_rate": 2.481119756172912e-05,
      "loss": 0.8963,
      "step": 473700
    },
    {
      "epoch": 1.6602354491513247,
      "grad_norm": 3.015625,
      "learning_rate": 2.4810548533065413e-05,
      "loss": 0.8735,
      "step": 473710
    },
    {
      "epoch": 1.6602704966582202,
      "grad_norm": 2.78125,
      "learning_rate": 2.480989950440171e-05,
      "loss": 0.8439,
      "step": 473720
    },
    {
      "epoch": 1.6603055441651158,
      "grad_norm": 2.828125,
      "learning_rate": 2.480925047573801e-05,
      "loss": 0.9032,
      "step": 473730
    },
    {
      "epoch": 1.6603405916720115,
      "grad_norm": 3.40625,
      "learning_rate": 2.4808601447074307e-05,
      "loss": 0.8759,
      "step": 473740
    },
    {
      "epoch": 1.6603756391789068,
      "grad_norm": 2.765625,
      "learning_rate": 2.4807952418410605e-05,
      "loss": 0.7711,
      "step": 473750
    },
    {
      "epoch": 1.6604106866858026,
      "grad_norm": 3.4375,
      "learning_rate": 2.4807303389746907e-05,
      "loss": 0.87,
      "step": 473760
    },
    {
      "epoch": 1.6604457341926984,
      "grad_norm": 2.890625,
      "learning_rate": 2.4806654361083205e-05,
      "loss": 0.8184,
      "step": 473770
    },
    {
      "epoch": 1.6604807816995937,
      "grad_norm": 2.890625,
      "learning_rate": 2.4806005332419503e-05,
      "loss": 0.8944,
      "step": 473780
    },
    {
      "epoch": 1.6605158292064894,
      "grad_norm": 2.609375,
      "learning_rate": 2.48053563037558e-05,
      "loss": 0.818,
      "step": 473790
    },
    {
      "epoch": 1.660550876713385,
      "grad_norm": 2.453125,
      "learning_rate": 2.48047072750921e-05,
      "loss": 0.8666,
      "step": 473800
    },
    {
      "epoch": 1.6605859242202805,
      "grad_norm": 2.40625,
      "learning_rate": 2.4804058246428397e-05,
      "loss": 0.7712,
      "step": 473810
    },
    {
      "epoch": 1.6606209717271763,
      "grad_norm": 2.984375,
      "learning_rate": 2.4803409217764695e-05,
      "loss": 0.7519,
      "step": 473820
    },
    {
      "epoch": 1.6606560192340718,
      "grad_norm": 3.34375,
      "learning_rate": 2.4802760189100993e-05,
      "loss": 0.8572,
      "step": 473830
    },
    {
      "epoch": 1.6606910667409673,
      "grad_norm": 2.828125,
      "learning_rate": 2.480211116043729e-05,
      "loss": 0.9004,
      "step": 473840
    },
    {
      "epoch": 1.660726114247863,
      "grad_norm": 2.875,
      "learning_rate": 2.480146213177359e-05,
      "loss": 0.8835,
      "step": 473850
    },
    {
      "epoch": 1.6607611617547586,
      "grad_norm": 3.03125,
      "learning_rate": 2.4800813103109887e-05,
      "loss": 0.7748,
      "step": 473860
    },
    {
      "epoch": 1.6607962092616542,
      "grad_norm": 2.96875,
      "learning_rate": 2.4800164074446185e-05,
      "loss": 0.878,
      "step": 473870
    },
    {
      "epoch": 1.66083125676855,
      "grad_norm": 2.75,
      "learning_rate": 2.4799515045782483e-05,
      "loss": 0.896,
      "step": 473880
    },
    {
      "epoch": 1.6608663042754452,
      "grad_norm": 2.90625,
      "learning_rate": 2.4798866017118784e-05,
      "loss": 0.7499,
      "step": 473890
    },
    {
      "epoch": 1.660901351782341,
      "grad_norm": 3.265625,
      "learning_rate": 2.479821698845508e-05,
      "loss": 0.8176,
      "step": 473900
    },
    {
      "epoch": 1.6609363992892365,
      "grad_norm": 2.59375,
      "learning_rate": 2.4797567959791377e-05,
      "loss": 0.836,
      "step": 473910
    },
    {
      "epoch": 1.660971446796132,
      "grad_norm": 2.546875,
      "learning_rate": 2.4796918931127675e-05,
      "loss": 0.8394,
      "step": 473920
    },
    {
      "epoch": 1.6610064943030278,
      "grad_norm": 3.15625,
      "learning_rate": 2.4796269902463973e-05,
      "loss": 0.9003,
      "step": 473930
    },
    {
      "epoch": 1.6610415418099234,
      "grad_norm": 2.21875,
      "learning_rate": 2.479562087380027e-05,
      "loss": 0.8564,
      "step": 473940
    },
    {
      "epoch": 1.661076589316819,
      "grad_norm": 3.0,
      "learning_rate": 2.479497184513657e-05,
      "loss": 0.836,
      "step": 473950
    },
    {
      "epoch": 1.6611116368237147,
      "grad_norm": 3.21875,
      "learning_rate": 2.4794322816472867e-05,
      "loss": 0.8337,
      "step": 473960
    },
    {
      "epoch": 1.6611466843306102,
      "grad_norm": 3.09375,
      "learning_rate": 2.4793673787809165e-05,
      "loss": 0.8713,
      "step": 473970
    },
    {
      "epoch": 1.6611817318375057,
      "grad_norm": 2.703125,
      "learning_rate": 2.4793024759145463e-05,
      "loss": 0.8075,
      "step": 473980
    },
    {
      "epoch": 1.6612167793444015,
      "grad_norm": 2.859375,
      "learning_rate": 2.479237573048176e-05,
      "loss": 0.8699,
      "step": 473990
    },
    {
      "epoch": 1.6612518268512968,
      "grad_norm": 2.9375,
      "learning_rate": 2.479172670181806e-05,
      "loss": 0.8342,
      "step": 474000
    },
    {
      "epoch": 1.6612868743581926,
      "grad_norm": 2.796875,
      "learning_rate": 2.479107767315436e-05,
      "loss": 0.8975,
      "step": 474010
    },
    {
      "epoch": 1.661321921865088,
      "grad_norm": 2.734375,
      "learning_rate": 2.4790428644490658e-05,
      "loss": 0.8153,
      "step": 474020
    },
    {
      "epoch": 1.6613569693719836,
      "grad_norm": 3.21875,
      "learning_rate": 2.4789779615826956e-05,
      "loss": 0.8447,
      "step": 474030
    },
    {
      "epoch": 1.6613920168788794,
      "grad_norm": 2.5,
      "learning_rate": 2.4789130587163254e-05,
      "loss": 0.842,
      "step": 474040
    },
    {
      "epoch": 1.661427064385775,
      "grad_norm": 2.53125,
      "learning_rate": 2.4788481558499552e-05,
      "loss": 0.8,
      "step": 474050
    },
    {
      "epoch": 1.6614621118926705,
      "grad_norm": 2.890625,
      "learning_rate": 2.478783252983585e-05,
      "loss": 0.8254,
      "step": 474060
    },
    {
      "epoch": 1.6614971593995662,
      "grad_norm": 2.859375,
      "learning_rate": 2.4787183501172148e-05,
      "loss": 0.804,
      "step": 474070
    },
    {
      "epoch": 1.6615322069064618,
      "grad_norm": 2.859375,
      "learning_rate": 2.4786534472508446e-05,
      "loss": 0.8098,
      "step": 474080
    },
    {
      "epoch": 1.6615672544133573,
      "grad_norm": 2.75,
      "learning_rate": 2.478588544384474e-05,
      "loss": 0.8244,
      "step": 474090
    },
    {
      "epoch": 1.661602301920253,
      "grad_norm": 2.78125,
      "learning_rate": 2.478523641518104e-05,
      "loss": 0.8658,
      "step": 474100
    },
    {
      "epoch": 1.6616373494271484,
      "grad_norm": 2.65625,
      "learning_rate": 2.4784587386517337e-05,
      "loss": 0.8722,
      "step": 474110
    },
    {
      "epoch": 1.6616723969340441,
      "grad_norm": 3.4375,
      "learning_rate": 2.4783938357853638e-05,
      "loss": 0.8814,
      "step": 474120
    },
    {
      "epoch": 1.6617074444409397,
      "grad_norm": 2.984375,
      "learning_rate": 2.4783289329189936e-05,
      "loss": 0.8651,
      "step": 474130
    },
    {
      "epoch": 1.6617424919478352,
      "grad_norm": 2.40625,
      "learning_rate": 2.4782640300526234e-05,
      "loss": 0.8741,
      "step": 474140
    },
    {
      "epoch": 1.661777539454731,
      "grad_norm": 2.578125,
      "learning_rate": 2.4781991271862532e-05,
      "loss": 0.7935,
      "step": 474150
    },
    {
      "epoch": 1.6618125869616265,
      "grad_norm": 3.0625,
      "learning_rate": 2.478134224319883e-05,
      "loss": 0.8927,
      "step": 474160
    },
    {
      "epoch": 1.661847634468522,
      "grad_norm": 2.890625,
      "learning_rate": 2.4780693214535128e-05,
      "loss": 0.8363,
      "step": 474170
    },
    {
      "epoch": 1.6618826819754178,
      "grad_norm": 3.015625,
      "learning_rate": 2.4780044185871426e-05,
      "loss": 0.925,
      "step": 474180
    },
    {
      "epoch": 1.6619177294823133,
      "grad_norm": 2.828125,
      "learning_rate": 2.4779395157207724e-05,
      "loss": 0.852,
      "step": 474190
    },
    {
      "epoch": 1.6619527769892088,
      "grad_norm": 3.046875,
      "learning_rate": 2.4778746128544022e-05,
      "loss": 0.8863,
      "step": 474200
    },
    {
      "epoch": 1.6619878244961046,
      "grad_norm": 3.171875,
      "learning_rate": 2.477809709988032e-05,
      "loss": 0.8634,
      "step": 474210
    },
    {
      "epoch": 1.662022872003,
      "grad_norm": 2.5,
      "learning_rate": 2.4777448071216618e-05,
      "loss": 0.8782,
      "step": 474220
    },
    {
      "epoch": 1.6620579195098957,
      "grad_norm": 2.625,
      "learning_rate": 2.4776799042552916e-05,
      "loss": 0.785,
      "step": 474230
    },
    {
      "epoch": 1.6620929670167912,
      "grad_norm": 2.640625,
      "learning_rate": 2.4776150013889214e-05,
      "loss": 0.7943,
      "step": 474240
    },
    {
      "epoch": 1.6621280145236867,
      "grad_norm": 2.953125,
      "learning_rate": 2.4775500985225512e-05,
      "loss": 0.82,
      "step": 474250
    },
    {
      "epoch": 1.6621630620305825,
      "grad_norm": 3.515625,
      "learning_rate": 2.4774851956561814e-05,
      "loss": 0.8398,
      "step": 474260
    },
    {
      "epoch": 1.662198109537478,
      "grad_norm": 2.90625,
      "learning_rate": 2.477420292789811e-05,
      "loss": 0.7995,
      "step": 474270
    },
    {
      "epoch": 1.6622331570443736,
      "grad_norm": 2.734375,
      "learning_rate": 2.4773553899234406e-05,
      "loss": 0.7682,
      "step": 474280
    },
    {
      "epoch": 1.6622682045512693,
      "grad_norm": 2.859375,
      "learning_rate": 2.4772904870570704e-05,
      "loss": 0.8399,
      "step": 474290
    },
    {
      "epoch": 1.6623032520581649,
      "grad_norm": 2.921875,
      "learning_rate": 2.4772255841907002e-05,
      "loss": 0.8021,
      "step": 474300
    },
    {
      "epoch": 1.6623382995650604,
      "grad_norm": 2.75,
      "learning_rate": 2.47716068132433e-05,
      "loss": 0.8296,
      "step": 474310
    },
    {
      "epoch": 1.6623733470719562,
      "grad_norm": 2.765625,
      "learning_rate": 2.4770957784579598e-05,
      "loss": 0.8799,
      "step": 474320
    },
    {
      "epoch": 1.6624083945788515,
      "grad_norm": 2.921875,
      "learning_rate": 2.4770308755915896e-05,
      "loss": 0.8118,
      "step": 474330
    },
    {
      "epoch": 1.6624434420857472,
      "grad_norm": 2.78125,
      "learning_rate": 2.4769659727252194e-05,
      "loss": 0.7881,
      "step": 474340
    },
    {
      "epoch": 1.6624784895926428,
      "grad_norm": 2.875,
      "learning_rate": 2.4769010698588492e-05,
      "loss": 0.8456,
      "step": 474350
    },
    {
      "epoch": 1.6625135370995383,
      "grad_norm": 3.0625,
      "learning_rate": 2.476836166992479e-05,
      "loss": 0.8757,
      "step": 474360
    },
    {
      "epoch": 1.662548584606434,
      "grad_norm": 2.65625,
      "learning_rate": 2.476771264126109e-05,
      "loss": 0.9351,
      "step": 474370
    },
    {
      "epoch": 1.6625836321133296,
      "grad_norm": 2.625,
      "learning_rate": 2.476706361259739e-05,
      "loss": 0.7679,
      "step": 474380
    },
    {
      "epoch": 1.6626186796202251,
      "grad_norm": 3.09375,
      "learning_rate": 2.4766414583933688e-05,
      "loss": 0.837,
      "step": 474390
    },
    {
      "epoch": 1.662653727127121,
      "grad_norm": 2.78125,
      "learning_rate": 2.4765765555269986e-05,
      "loss": 0.7488,
      "step": 474400
    },
    {
      "epoch": 1.6626887746340164,
      "grad_norm": 3.015625,
      "learning_rate": 2.4765116526606284e-05,
      "loss": 0.8526,
      "step": 474410
    },
    {
      "epoch": 1.662723822140912,
      "grad_norm": 2.859375,
      "learning_rate": 2.476446749794258e-05,
      "loss": 0.8645,
      "step": 474420
    },
    {
      "epoch": 1.6627588696478077,
      "grad_norm": 2.84375,
      "learning_rate": 2.476381846927888e-05,
      "loss": 0.8678,
      "step": 474430
    },
    {
      "epoch": 1.662793917154703,
      "grad_norm": 3.0,
      "learning_rate": 2.4763169440615178e-05,
      "loss": 0.7881,
      "step": 474440
    },
    {
      "epoch": 1.6628289646615988,
      "grad_norm": 3.0625,
      "learning_rate": 2.4762520411951476e-05,
      "loss": 0.8927,
      "step": 474450
    },
    {
      "epoch": 1.6628640121684946,
      "grad_norm": 2.734375,
      "learning_rate": 2.476187138328777e-05,
      "loss": 0.7981,
      "step": 474460
    },
    {
      "epoch": 1.6628990596753899,
      "grad_norm": 2.75,
      "learning_rate": 2.4761222354624068e-05,
      "loss": 0.7967,
      "step": 474470
    },
    {
      "epoch": 1.6629341071822856,
      "grad_norm": 2.78125,
      "learning_rate": 2.4760573325960366e-05,
      "loss": 0.8286,
      "step": 474480
    },
    {
      "epoch": 1.6629691546891812,
      "grad_norm": 2.75,
      "learning_rate": 2.4759924297296668e-05,
      "loss": 0.8761,
      "step": 474490
    },
    {
      "epoch": 1.6630042021960767,
      "grad_norm": 2.96875,
      "learning_rate": 2.4759275268632966e-05,
      "loss": 0.7773,
      "step": 474500
    },
    {
      "epoch": 1.6630392497029725,
      "grad_norm": 2.796875,
      "learning_rate": 2.4758626239969264e-05,
      "loss": 0.8429,
      "step": 474510
    },
    {
      "epoch": 1.663074297209868,
      "grad_norm": 2.875,
      "learning_rate": 2.475797721130556e-05,
      "loss": 0.8555,
      "step": 474520
    },
    {
      "epoch": 1.6631093447167635,
      "grad_norm": 2.609375,
      "learning_rate": 2.475732818264186e-05,
      "loss": 0.844,
      "step": 474530
    },
    {
      "epoch": 1.6631443922236593,
      "grad_norm": 2.609375,
      "learning_rate": 2.4756679153978158e-05,
      "loss": 0.8283,
      "step": 474540
    },
    {
      "epoch": 1.6631794397305548,
      "grad_norm": 3.03125,
      "learning_rate": 2.4756030125314456e-05,
      "loss": 0.8598,
      "step": 474550
    },
    {
      "epoch": 1.6632144872374504,
      "grad_norm": 2.609375,
      "learning_rate": 2.4755381096650754e-05,
      "loss": 0.8211,
      "step": 474560
    },
    {
      "epoch": 1.6632495347443461,
      "grad_norm": 2.609375,
      "learning_rate": 2.475473206798705e-05,
      "loss": 0.7925,
      "step": 474570
    },
    {
      "epoch": 1.6632845822512414,
      "grad_norm": 3.109375,
      "learning_rate": 2.475408303932335e-05,
      "loss": 0.8673,
      "step": 474580
    },
    {
      "epoch": 1.6633196297581372,
      "grad_norm": 2.75,
      "learning_rate": 2.4753434010659648e-05,
      "loss": 0.793,
      "step": 474590
    },
    {
      "epoch": 1.6633546772650327,
      "grad_norm": 2.921875,
      "learning_rate": 2.4752784981995946e-05,
      "loss": 0.8694,
      "step": 474600
    },
    {
      "epoch": 1.6633897247719283,
      "grad_norm": 2.890625,
      "learning_rate": 2.4752135953332244e-05,
      "loss": 0.8594,
      "step": 474610
    },
    {
      "epoch": 1.663424772278824,
      "grad_norm": 2.5625,
      "learning_rate": 2.475148692466854e-05,
      "loss": 0.807,
      "step": 474620
    },
    {
      "epoch": 1.6634598197857196,
      "grad_norm": 2.953125,
      "learning_rate": 2.4750837896004843e-05,
      "loss": 0.8618,
      "step": 474630
    },
    {
      "epoch": 1.663494867292615,
      "grad_norm": 2.625,
      "learning_rate": 2.475018886734114e-05,
      "loss": 0.9225,
      "step": 474640
    },
    {
      "epoch": 1.6635299147995108,
      "grad_norm": 2.8125,
      "learning_rate": 2.4749539838677436e-05,
      "loss": 0.837,
      "step": 474650
    },
    {
      "epoch": 1.6635649623064064,
      "grad_norm": 3.015625,
      "learning_rate": 2.4748890810013734e-05,
      "loss": 0.9067,
      "step": 474660
    },
    {
      "epoch": 1.663600009813302,
      "grad_norm": 2.40625,
      "learning_rate": 2.474824178135003e-05,
      "loss": 0.7819,
      "step": 474670
    },
    {
      "epoch": 1.6636350573201977,
      "grad_norm": 2.921875,
      "learning_rate": 2.474759275268633e-05,
      "loss": 0.9324,
      "step": 474680
    },
    {
      "epoch": 1.663670104827093,
      "grad_norm": 2.9375,
      "learning_rate": 2.4746943724022628e-05,
      "loss": 0.911,
      "step": 474690
    },
    {
      "epoch": 1.6637051523339887,
      "grad_norm": 2.734375,
      "learning_rate": 2.4746294695358926e-05,
      "loss": 0.8602,
      "step": 474700
    },
    {
      "epoch": 1.6637401998408843,
      "grad_norm": 2.921875,
      "learning_rate": 2.4745645666695224e-05,
      "loss": 0.8248,
      "step": 474710
    },
    {
      "epoch": 1.6637752473477798,
      "grad_norm": 2.75,
      "learning_rate": 2.474499663803152e-05,
      "loss": 0.8901,
      "step": 474720
    },
    {
      "epoch": 1.6638102948546756,
      "grad_norm": 2.890625,
      "learning_rate": 2.474434760936782e-05,
      "loss": 0.83,
      "step": 474730
    },
    {
      "epoch": 1.6638453423615711,
      "grad_norm": 2.9375,
      "learning_rate": 2.474369858070412e-05,
      "loss": 0.8355,
      "step": 474740
    },
    {
      "epoch": 1.6638803898684666,
      "grad_norm": 3.109375,
      "learning_rate": 2.474304955204042e-05,
      "loss": 0.8449,
      "step": 474750
    },
    {
      "epoch": 1.6639154373753624,
      "grad_norm": 2.96875,
      "learning_rate": 2.4742400523376717e-05,
      "loss": 0.7791,
      "step": 474760
    },
    {
      "epoch": 1.663950484882258,
      "grad_norm": 2.578125,
      "learning_rate": 2.4741751494713015e-05,
      "loss": 0.8446,
      "step": 474770
    },
    {
      "epoch": 1.6639855323891535,
      "grad_norm": 2.59375,
      "learning_rate": 2.4741102466049313e-05,
      "loss": 0.7923,
      "step": 474780
    },
    {
      "epoch": 1.6640205798960492,
      "grad_norm": 2.703125,
      "learning_rate": 2.474045343738561e-05,
      "loss": 0.8002,
      "step": 474790
    },
    {
      "epoch": 1.6640556274029445,
      "grad_norm": 3.046875,
      "learning_rate": 2.473980440872191e-05,
      "loss": 0.8014,
      "step": 474800
    },
    {
      "epoch": 1.6640906749098403,
      "grad_norm": 2.90625,
      "learning_rate": 2.4739155380058207e-05,
      "loss": 0.8447,
      "step": 474810
    },
    {
      "epoch": 1.6641257224167358,
      "grad_norm": 3.625,
      "learning_rate": 2.4738506351394505e-05,
      "loss": 0.8441,
      "step": 474820
    },
    {
      "epoch": 1.6641607699236314,
      "grad_norm": 2.953125,
      "learning_rate": 2.4737857322730803e-05,
      "loss": 0.921,
      "step": 474830
    },
    {
      "epoch": 1.6641958174305271,
      "grad_norm": 2.953125,
      "learning_rate": 2.4737208294067098e-05,
      "loss": 0.954,
      "step": 474840
    },
    {
      "epoch": 1.6642308649374227,
      "grad_norm": 2.828125,
      "learning_rate": 2.4736559265403396e-05,
      "loss": 0.8135,
      "step": 474850
    },
    {
      "epoch": 1.6642659124443182,
      "grad_norm": 3.078125,
      "learning_rate": 2.4735910236739697e-05,
      "loss": 0.877,
      "step": 474860
    },
    {
      "epoch": 1.664300959951214,
      "grad_norm": 2.890625,
      "learning_rate": 2.4735261208075995e-05,
      "loss": 0.9044,
      "step": 474870
    },
    {
      "epoch": 1.6643360074581095,
      "grad_norm": 3.203125,
      "learning_rate": 2.4734612179412293e-05,
      "loss": 0.8518,
      "step": 474880
    },
    {
      "epoch": 1.664371054965005,
      "grad_norm": 2.9375,
      "learning_rate": 2.473396315074859e-05,
      "loss": 0.7263,
      "step": 474890
    },
    {
      "epoch": 1.6644061024719008,
      "grad_norm": 3.03125,
      "learning_rate": 2.473331412208489e-05,
      "loss": 0.8494,
      "step": 474900
    },
    {
      "epoch": 1.664441149978796,
      "grad_norm": 2.875,
      "learning_rate": 2.4732665093421187e-05,
      "loss": 0.7224,
      "step": 474910
    },
    {
      "epoch": 1.6644761974856919,
      "grad_norm": 3.5,
      "learning_rate": 2.4732016064757485e-05,
      "loss": 0.8631,
      "step": 474920
    },
    {
      "epoch": 1.6645112449925874,
      "grad_norm": 2.734375,
      "learning_rate": 2.4731367036093783e-05,
      "loss": 0.7902,
      "step": 474930
    },
    {
      "epoch": 1.664546292499483,
      "grad_norm": 2.640625,
      "learning_rate": 2.473071800743008e-05,
      "loss": 0.8587,
      "step": 474940
    },
    {
      "epoch": 1.6645813400063787,
      "grad_norm": 3.109375,
      "learning_rate": 2.473006897876638e-05,
      "loss": 0.8423,
      "step": 474950
    },
    {
      "epoch": 1.6646163875132742,
      "grad_norm": 3.140625,
      "learning_rate": 2.4729419950102677e-05,
      "loss": 0.9503,
      "step": 474960
    },
    {
      "epoch": 1.6646514350201698,
      "grad_norm": 2.375,
      "learning_rate": 2.4728770921438975e-05,
      "loss": 0.874,
      "step": 474970
    },
    {
      "epoch": 1.6646864825270655,
      "grad_norm": 2.890625,
      "learning_rate": 2.4728121892775273e-05,
      "loss": 0.885,
      "step": 474980
    },
    {
      "epoch": 1.664721530033961,
      "grad_norm": 2.6875,
      "learning_rate": 2.4727472864111574e-05,
      "loss": 0.9106,
      "step": 474990
    },
    {
      "epoch": 1.6647565775408566,
      "grad_norm": 2.984375,
      "learning_rate": 2.4726823835447872e-05,
      "loss": 0.8161,
      "step": 475000
    },
    {
      "epoch": 1.6647565775408566,
      "eval_loss": 0.7825086712837219,
      "eval_runtime": 556.8031,
      "eval_samples_per_second": 683.251,
      "eval_steps_per_second": 56.938,
      "step": 475000
    },
    {
      "epoch": 1.6647916250477524,
      "grad_norm": 2.859375,
      "learning_rate": 2.472617480678417e-05,
      "loss": 0.8115,
      "step": 475010
    },
    {
      "epoch": 1.6648266725546477,
      "grad_norm": 3.078125,
      "learning_rate": 2.472552577812047e-05,
      "loss": 0.927,
      "step": 475020
    },
    {
      "epoch": 1.6648617200615434,
      "grad_norm": 2.625,
      "learning_rate": 2.4724876749456763e-05,
      "loss": 0.7692,
      "step": 475030
    },
    {
      "epoch": 1.6648967675684392,
      "grad_norm": 3.09375,
      "learning_rate": 2.472422772079306e-05,
      "loss": 0.8485,
      "step": 475040
    },
    {
      "epoch": 1.6649318150753345,
      "grad_norm": 3.078125,
      "learning_rate": 2.472357869212936e-05,
      "loss": 0.7736,
      "step": 475050
    },
    {
      "epoch": 1.6649668625822303,
      "grad_norm": 2.9375,
      "learning_rate": 2.4722929663465657e-05,
      "loss": 0.813,
      "step": 475060
    },
    {
      "epoch": 1.6650019100891258,
      "grad_norm": 2.46875,
      "learning_rate": 2.4722280634801955e-05,
      "loss": 0.8147,
      "step": 475070
    },
    {
      "epoch": 1.6650369575960213,
      "grad_norm": 3.15625,
      "learning_rate": 2.4721631606138253e-05,
      "loss": 0.8762,
      "step": 475080
    },
    {
      "epoch": 1.665072005102917,
      "grad_norm": 3.0,
      "learning_rate": 2.472098257747455e-05,
      "loss": 0.8449,
      "step": 475090
    },
    {
      "epoch": 1.6651070526098126,
      "grad_norm": 2.5,
      "learning_rate": 2.472033354881085e-05,
      "loss": 0.8024,
      "step": 475100
    },
    {
      "epoch": 1.6651421001167082,
      "grad_norm": 2.875,
      "learning_rate": 2.471968452014715e-05,
      "loss": 0.816,
      "step": 475110
    },
    {
      "epoch": 1.665177147623604,
      "grad_norm": 2.84375,
      "learning_rate": 2.471903549148345e-05,
      "loss": 0.8837,
      "step": 475120
    },
    {
      "epoch": 1.6652121951304992,
      "grad_norm": 2.890625,
      "learning_rate": 2.4718386462819746e-05,
      "loss": 0.9026,
      "step": 475130
    },
    {
      "epoch": 1.665247242637395,
      "grad_norm": 3.1875,
      "learning_rate": 2.4717737434156044e-05,
      "loss": 0.8377,
      "step": 475140
    },
    {
      "epoch": 1.6652822901442907,
      "grad_norm": 2.859375,
      "learning_rate": 2.4717088405492342e-05,
      "loss": 0.8014,
      "step": 475150
    },
    {
      "epoch": 1.665317337651186,
      "grad_norm": 3.0,
      "learning_rate": 2.471643937682864e-05,
      "loss": 0.8369,
      "step": 475160
    },
    {
      "epoch": 1.6653523851580818,
      "grad_norm": 2.609375,
      "learning_rate": 2.471579034816494e-05,
      "loss": 0.8503,
      "step": 475170
    },
    {
      "epoch": 1.6653874326649774,
      "grad_norm": 2.8125,
      "learning_rate": 2.4715141319501236e-05,
      "loss": 0.7654,
      "step": 475180
    },
    {
      "epoch": 1.6654224801718729,
      "grad_norm": 2.9375,
      "learning_rate": 2.4714492290837534e-05,
      "loss": 0.8448,
      "step": 475190
    },
    {
      "epoch": 1.6654575276787686,
      "grad_norm": 3.0,
      "learning_rate": 2.4713843262173832e-05,
      "loss": 0.8647,
      "step": 475200
    },
    {
      "epoch": 1.6654925751856642,
      "grad_norm": 3.390625,
      "learning_rate": 2.471319423351013e-05,
      "loss": 0.799,
      "step": 475210
    },
    {
      "epoch": 1.6655276226925597,
      "grad_norm": 3.0,
      "learning_rate": 2.471254520484643e-05,
      "loss": 0.9372,
      "step": 475220
    },
    {
      "epoch": 1.6655626701994555,
      "grad_norm": 3.0,
      "learning_rate": 2.4711896176182726e-05,
      "loss": 0.9121,
      "step": 475230
    },
    {
      "epoch": 1.665597717706351,
      "grad_norm": 2.921875,
      "learning_rate": 2.4711247147519024e-05,
      "loss": 0.9303,
      "step": 475240
    },
    {
      "epoch": 1.6656327652132465,
      "grad_norm": 2.609375,
      "learning_rate": 2.4710598118855322e-05,
      "loss": 0.8027,
      "step": 475250
    },
    {
      "epoch": 1.6656678127201423,
      "grad_norm": 3.03125,
      "learning_rate": 2.470994909019162e-05,
      "loss": 0.8213,
      "step": 475260
    },
    {
      "epoch": 1.6657028602270376,
      "grad_norm": 2.78125,
      "learning_rate": 2.470930006152792e-05,
      "loss": 0.9048,
      "step": 475270
    },
    {
      "epoch": 1.6657379077339334,
      "grad_norm": 3.09375,
      "learning_rate": 2.4708651032864216e-05,
      "loss": 0.8712,
      "step": 475280
    },
    {
      "epoch": 1.665772955240829,
      "grad_norm": 2.796875,
      "learning_rate": 2.4708002004200514e-05,
      "loss": 0.8505,
      "step": 475290
    },
    {
      "epoch": 1.6658080027477244,
      "grad_norm": 2.90625,
      "learning_rate": 2.4707352975536812e-05,
      "loss": 0.9441,
      "step": 475300
    },
    {
      "epoch": 1.6658430502546202,
      "grad_norm": 2.96875,
      "learning_rate": 2.470670394687311e-05,
      "loss": 0.8069,
      "step": 475310
    },
    {
      "epoch": 1.6658780977615157,
      "grad_norm": 3.078125,
      "learning_rate": 2.470605491820941e-05,
      "loss": 0.7952,
      "step": 475320
    },
    {
      "epoch": 1.6659131452684113,
      "grad_norm": 2.984375,
      "learning_rate": 2.4705405889545706e-05,
      "loss": 0.8757,
      "step": 475330
    },
    {
      "epoch": 1.665948192775307,
      "grad_norm": 2.890625,
      "learning_rate": 2.4704756860882004e-05,
      "loss": 0.8132,
      "step": 475340
    },
    {
      "epoch": 1.6659832402822026,
      "grad_norm": 2.515625,
      "learning_rate": 2.4704107832218302e-05,
      "loss": 0.7801,
      "step": 475350
    },
    {
      "epoch": 1.666018287789098,
      "grad_norm": 2.25,
      "learning_rate": 2.4703458803554604e-05,
      "loss": 0.8254,
      "step": 475360
    },
    {
      "epoch": 1.6660533352959939,
      "grad_norm": 2.859375,
      "learning_rate": 2.4702809774890902e-05,
      "loss": 0.8441,
      "step": 475370
    },
    {
      "epoch": 1.6660883828028892,
      "grad_norm": 3.46875,
      "learning_rate": 2.47021607462272e-05,
      "loss": 0.8749,
      "step": 475380
    },
    {
      "epoch": 1.666123430309785,
      "grad_norm": 2.84375,
      "learning_rate": 2.4701511717563498e-05,
      "loss": 0.8389,
      "step": 475390
    },
    {
      "epoch": 1.6661584778166805,
      "grad_norm": 2.90625,
      "learning_rate": 2.4700862688899792e-05,
      "loss": 0.9261,
      "step": 475400
    },
    {
      "epoch": 1.666193525323576,
      "grad_norm": 2.671875,
      "learning_rate": 2.470021366023609e-05,
      "loss": 0.8856,
      "step": 475410
    },
    {
      "epoch": 1.6662285728304718,
      "grad_norm": 2.984375,
      "learning_rate": 2.469956463157239e-05,
      "loss": 0.8316,
      "step": 475420
    },
    {
      "epoch": 1.6662636203373673,
      "grad_norm": 3.5625,
      "learning_rate": 2.4698915602908686e-05,
      "loss": 0.7945,
      "step": 475430
    },
    {
      "epoch": 1.6662986678442628,
      "grad_norm": 2.421875,
      "learning_rate": 2.4698266574244984e-05,
      "loss": 0.8067,
      "step": 475440
    },
    {
      "epoch": 1.6663337153511586,
      "grad_norm": 2.9375,
      "learning_rate": 2.4697617545581282e-05,
      "loss": 0.7734,
      "step": 475450
    },
    {
      "epoch": 1.6663687628580541,
      "grad_norm": 3.109375,
      "learning_rate": 2.469696851691758e-05,
      "loss": 0.8785,
      "step": 475460
    },
    {
      "epoch": 1.6664038103649497,
      "grad_norm": 2.625,
      "learning_rate": 2.4696319488253882e-05,
      "loss": 0.8209,
      "step": 475470
    },
    {
      "epoch": 1.6664388578718454,
      "grad_norm": 3.3125,
      "learning_rate": 2.469567045959018e-05,
      "loss": 0.8286,
      "step": 475480
    },
    {
      "epoch": 1.6664739053787407,
      "grad_norm": 3.125,
      "learning_rate": 2.4695021430926478e-05,
      "loss": 0.8502,
      "step": 475490
    },
    {
      "epoch": 1.6665089528856365,
      "grad_norm": 3.1875,
      "learning_rate": 2.4694372402262776e-05,
      "loss": 0.8952,
      "step": 475500
    },
    {
      "epoch": 1.666544000392532,
      "grad_norm": 2.53125,
      "learning_rate": 2.4693723373599074e-05,
      "loss": 0.7333,
      "step": 475510
    },
    {
      "epoch": 1.6665790478994276,
      "grad_norm": 3.09375,
      "learning_rate": 2.4693074344935372e-05,
      "loss": 0.8388,
      "step": 475520
    },
    {
      "epoch": 1.6666140954063233,
      "grad_norm": 3.21875,
      "learning_rate": 2.469242531627167e-05,
      "loss": 0.7927,
      "step": 475530
    },
    {
      "epoch": 1.6666491429132189,
      "grad_norm": 2.828125,
      "learning_rate": 2.4691776287607968e-05,
      "loss": 0.7933,
      "step": 475540
    },
    {
      "epoch": 1.6666841904201144,
      "grad_norm": 3.046875,
      "learning_rate": 2.4691127258944266e-05,
      "loss": 0.8128,
      "step": 475550
    },
    {
      "epoch": 1.6667192379270102,
      "grad_norm": 3.40625,
      "learning_rate": 2.4690478230280564e-05,
      "loss": 0.8917,
      "step": 475560
    },
    {
      "epoch": 1.6667542854339057,
      "grad_norm": 2.671875,
      "learning_rate": 2.4689829201616862e-05,
      "loss": 0.8379,
      "step": 475570
    },
    {
      "epoch": 1.6667893329408012,
      "grad_norm": 3.359375,
      "learning_rate": 2.468918017295316e-05,
      "loss": 0.8552,
      "step": 475580
    },
    {
      "epoch": 1.666824380447697,
      "grad_norm": 2.796875,
      "learning_rate": 2.4688531144289458e-05,
      "loss": 0.8705,
      "step": 475590
    },
    {
      "epoch": 1.6668594279545923,
      "grad_norm": 3.125,
      "learning_rate": 2.4687882115625756e-05,
      "loss": 0.8595,
      "step": 475600
    },
    {
      "epoch": 1.666894475461488,
      "grad_norm": 3.03125,
      "learning_rate": 2.4687233086962054e-05,
      "loss": 0.8909,
      "step": 475610
    },
    {
      "epoch": 1.6669295229683836,
      "grad_norm": 2.640625,
      "learning_rate": 2.4686584058298352e-05,
      "loss": 0.8474,
      "step": 475620
    },
    {
      "epoch": 1.6669645704752791,
      "grad_norm": 3.390625,
      "learning_rate": 2.468593502963465e-05,
      "loss": 0.817,
      "step": 475630
    },
    {
      "epoch": 1.6669996179821749,
      "grad_norm": 3.0,
      "learning_rate": 2.4685286000970948e-05,
      "loss": 0.8638,
      "step": 475640
    },
    {
      "epoch": 1.6670346654890704,
      "grad_norm": 3.0,
      "learning_rate": 2.4684636972307246e-05,
      "loss": 0.8253,
      "step": 475650
    },
    {
      "epoch": 1.667069712995966,
      "grad_norm": 2.75,
      "learning_rate": 2.4683987943643544e-05,
      "loss": 0.8302,
      "step": 475660
    },
    {
      "epoch": 1.6671047605028617,
      "grad_norm": 2.65625,
      "learning_rate": 2.4683338914979842e-05,
      "loss": 0.9142,
      "step": 475670
    },
    {
      "epoch": 1.6671398080097573,
      "grad_norm": 2.4375,
      "learning_rate": 2.468268988631614e-05,
      "loss": 0.8636,
      "step": 475680
    },
    {
      "epoch": 1.6671748555166528,
      "grad_norm": 3.125,
      "learning_rate": 2.4682040857652438e-05,
      "loss": 0.8142,
      "step": 475690
    },
    {
      "epoch": 1.6672099030235485,
      "grad_norm": 2.625,
      "learning_rate": 2.4681391828988736e-05,
      "loss": 0.9097,
      "step": 475700
    },
    {
      "epoch": 1.6672449505304439,
      "grad_norm": 2.796875,
      "learning_rate": 2.4680742800325034e-05,
      "loss": 0.8192,
      "step": 475710
    },
    {
      "epoch": 1.6672799980373396,
      "grad_norm": 2.65625,
      "learning_rate": 2.4680093771661332e-05,
      "loss": 0.8634,
      "step": 475720
    },
    {
      "epoch": 1.6673150455442354,
      "grad_norm": 3.0625,
      "learning_rate": 2.4679444742997633e-05,
      "loss": 0.8055,
      "step": 475730
    },
    {
      "epoch": 1.6673500930511307,
      "grad_norm": 2.515625,
      "learning_rate": 2.467879571433393e-05,
      "loss": 0.8503,
      "step": 475740
    },
    {
      "epoch": 1.6673851405580264,
      "grad_norm": 2.671875,
      "learning_rate": 2.467814668567023e-05,
      "loss": 0.8599,
      "step": 475750
    },
    {
      "epoch": 1.667420188064922,
      "grad_norm": 2.984375,
      "learning_rate": 2.4677497657006527e-05,
      "loss": 0.8396,
      "step": 475760
    },
    {
      "epoch": 1.6674552355718175,
      "grad_norm": 3.03125,
      "learning_rate": 2.4676848628342825e-05,
      "loss": 0.8307,
      "step": 475770
    },
    {
      "epoch": 1.6674902830787133,
      "grad_norm": 2.8125,
      "learning_rate": 2.467619959967912e-05,
      "loss": 0.9222,
      "step": 475780
    },
    {
      "epoch": 1.6675253305856088,
      "grad_norm": 2.875,
      "learning_rate": 2.4675550571015418e-05,
      "loss": 0.8605,
      "step": 475790
    },
    {
      "epoch": 1.6675603780925043,
      "grad_norm": 2.640625,
      "learning_rate": 2.4674901542351716e-05,
      "loss": 0.7408,
      "step": 475800
    },
    {
      "epoch": 1.6675954255994,
      "grad_norm": 2.734375,
      "learning_rate": 2.4674252513688014e-05,
      "loss": 0.7972,
      "step": 475810
    },
    {
      "epoch": 1.6676304731062954,
      "grad_norm": 2.84375,
      "learning_rate": 2.4673603485024312e-05,
      "loss": 0.8027,
      "step": 475820
    },
    {
      "epoch": 1.6676655206131912,
      "grad_norm": 3.0,
      "learning_rate": 2.467295445636061e-05,
      "loss": 0.8677,
      "step": 475830
    },
    {
      "epoch": 1.667700568120087,
      "grad_norm": 2.375,
      "learning_rate": 2.467230542769691e-05,
      "loss": 0.8062,
      "step": 475840
    },
    {
      "epoch": 1.6677356156269822,
      "grad_norm": 2.75,
      "learning_rate": 2.467165639903321e-05,
      "loss": 0.8123,
      "step": 475850
    },
    {
      "epoch": 1.667770663133878,
      "grad_norm": 3.265625,
      "learning_rate": 2.4671007370369507e-05,
      "loss": 0.729,
      "step": 475860
    },
    {
      "epoch": 1.6678057106407735,
      "grad_norm": 3.40625,
      "learning_rate": 2.4670358341705805e-05,
      "loss": 0.8939,
      "step": 475870
    },
    {
      "epoch": 1.667840758147669,
      "grad_norm": 2.890625,
      "learning_rate": 2.4669709313042103e-05,
      "loss": 0.8305,
      "step": 475880
    },
    {
      "epoch": 1.6678758056545648,
      "grad_norm": 3.015625,
      "learning_rate": 2.46690602843784e-05,
      "loss": 0.8807,
      "step": 475890
    },
    {
      "epoch": 1.6679108531614604,
      "grad_norm": 3.3125,
      "learning_rate": 2.46684112557147e-05,
      "loss": 0.9686,
      "step": 475900
    },
    {
      "epoch": 1.667945900668356,
      "grad_norm": 3.046875,
      "learning_rate": 2.4667762227050997e-05,
      "loss": 0.7631,
      "step": 475910
    },
    {
      "epoch": 1.6679809481752517,
      "grad_norm": 2.953125,
      "learning_rate": 2.4667113198387295e-05,
      "loss": 0.8957,
      "step": 475920
    },
    {
      "epoch": 1.6680159956821472,
      "grad_norm": 2.671875,
      "learning_rate": 2.4666464169723593e-05,
      "loss": 0.8324,
      "step": 475930
    },
    {
      "epoch": 1.6680510431890427,
      "grad_norm": 3.109375,
      "learning_rate": 2.466581514105989e-05,
      "loss": 0.8971,
      "step": 475940
    },
    {
      "epoch": 1.6680860906959385,
      "grad_norm": 2.75,
      "learning_rate": 2.466516611239619e-05,
      "loss": 0.901,
      "step": 475950
    },
    {
      "epoch": 1.6681211382028338,
      "grad_norm": 3.0,
      "learning_rate": 2.4664517083732487e-05,
      "loss": 0.7832,
      "step": 475960
    },
    {
      "epoch": 1.6681561857097296,
      "grad_norm": 3.046875,
      "learning_rate": 2.4663868055068785e-05,
      "loss": 0.8371,
      "step": 475970
    },
    {
      "epoch": 1.668191233216625,
      "grad_norm": 2.625,
      "learning_rate": 2.4663219026405083e-05,
      "loss": 0.8992,
      "step": 475980
    },
    {
      "epoch": 1.6682262807235206,
      "grad_norm": 3.03125,
      "learning_rate": 2.466256999774138e-05,
      "loss": 0.9008,
      "step": 475990
    },
    {
      "epoch": 1.6682613282304164,
      "grad_norm": 2.65625,
      "learning_rate": 2.466192096907768e-05,
      "loss": 0.8626,
      "step": 476000
    },
    {
      "epoch": 1.668296375737312,
      "grad_norm": 2.78125,
      "learning_rate": 2.4661271940413977e-05,
      "loss": 0.8471,
      "step": 476010
    },
    {
      "epoch": 1.6683314232442075,
      "grad_norm": 2.9375,
      "learning_rate": 2.4660622911750275e-05,
      "loss": 0.9113,
      "step": 476020
    },
    {
      "epoch": 1.6683664707511032,
      "grad_norm": 3.3125,
      "learning_rate": 2.4659973883086573e-05,
      "loss": 0.8334,
      "step": 476030
    },
    {
      "epoch": 1.6684015182579988,
      "grad_norm": 3.171875,
      "learning_rate": 2.465932485442287e-05,
      "loss": 0.9142,
      "step": 476040
    },
    {
      "epoch": 1.6684365657648943,
      "grad_norm": 3.140625,
      "learning_rate": 2.465867582575917e-05,
      "loss": 0.8538,
      "step": 476050
    },
    {
      "epoch": 1.66847161327179,
      "grad_norm": 3.0,
      "learning_rate": 2.4658026797095467e-05,
      "loss": 0.7426,
      "step": 476060
    },
    {
      "epoch": 1.6685066607786854,
      "grad_norm": 2.609375,
      "learning_rate": 2.4657377768431765e-05,
      "loss": 0.8268,
      "step": 476070
    },
    {
      "epoch": 1.6685417082855811,
      "grad_norm": 2.96875,
      "learning_rate": 2.4656728739768063e-05,
      "loss": 0.8854,
      "step": 476080
    },
    {
      "epoch": 1.6685767557924767,
      "grad_norm": 2.765625,
      "learning_rate": 2.4656079711104365e-05,
      "loss": 0.7318,
      "step": 476090
    },
    {
      "epoch": 1.6686118032993722,
      "grad_norm": 2.921875,
      "learning_rate": 2.4655430682440663e-05,
      "loss": 0.8818,
      "step": 476100
    },
    {
      "epoch": 1.668646850806268,
      "grad_norm": 3.21875,
      "learning_rate": 2.465478165377696e-05,
      "loss": 0.8105,
      "step": 476110
    },
    {
      "epoch": 1.6686818983131635,
      "grad_norm": 2.90625,
      "learning_rate": 2.465413262511326e-05,
      "loss": 0.8627,
      "step": 476120
    },
    {
      "epoch": 1.668716945820059,
      "grad_norm": 2.828125,
      "learning_rate": 2.4653483596449557e-05,
      "loss": 0.8605,
      "step": 476130
    },
    {
      "epoch": 1.6687519933269548,
      "grad_norm": 3.203125,
      "learning_rate": 2.4652834567785855e-05,
      "loss": 0.8909,
      "step": 476140
    },
    {
      "epoch": 1.6687870408338503,
      "grad_norm": 2.59375,
      "learning_rate": 2.4652185539122153e-05,
      "loss": 0.8633,
      "step": 476150
    },
    {
      "epoch": 1.6688220883407459,
      "grad_norm": 3.140625,
      "learning_rate": 2.4651536510458447e-05,
      "loss": 0.8434,
      "step": 476160
    },
    {
      "epoch": 1.6688571358476416,
      "grad_norm": 3.125,
      "learning_rate": 2.4650887481794745e-05,
      "loss": 0.784,
      "step": 476170
    },
    {
      "epoch": 1.668892183354537,
      "grad_norm": 3.140625,
      "learning_rate": 2.4650238453131043e-05,
      "loss": 0.8282,
      "step": 476180
    },
    {
      "epoch": 1.6689272308614327,
      "grad_norm": 2.296875,
      "learning_rate": 2.464958942446734e-05,
      "loss": 0.7898,
      "step": 476190
    },
    {
      "epoch": 1.6689622783683282,
      "grad_norm": 2.59375,
      "learning_rate": 2.464894039580364e-05,
      "loss": 0.8293,
      "step": 476200
    },
    {
      "epoch": 1.6689973258752238,
      "grad_norm": 2.875,
      "learning_rate": 2.464829136713994e-05,
      "loss": 0.8734,
      "step": 476210
    },
    {
      "epoch": 1.6690323733821195,
      "grad_norm": 3.609375,
      "learning_rate": 2.464764233847624e-05,
      "loss": 0.8985,
      "step": 476220
    },
    {
      "epoch": 1.669067420889015,
      "grad_norm": 2.46875,
      "learning_rate": 2.4646993309812537e-05,
      "loss": 0.8122,
      "step": 476230
    },
    {
      "epoch": 1.6691024683959106,
      "grad_norm": 3.109375,
      "learning_rate": 2.4646344281148835e-05,
      "loss": 0.872,
      "step": 476240
    },
    {
      "epoch": 1.6691375159028063,
      "grad_norm": 2.734375,
      "learning_rate": 2.4645695252485133e-05,
      "loss": 0.7936,
      "step": 476250
    },
    {
      "epoch": 1.6691725634097019,
      "grad_norm": 3.09375,
      "learning_rate": 2.464504622382143e-05,
      "loss": 0.8517,
      "step": 476260
    },
    {
      "epoch": 1.6692076109165974,
      "grad_norm": 3.328125,
      "learning_rate": 2.464439719515773e-05,
      "loss": 0.8781,
      "step": 476270
    },
    {
      "epoch": 1.6692426584234932,
      "grad_norm": 3.125,
      "learning_rate": 2.4643748166494027e-05,
      "loss": 0.8635,
      "step": 476280
    },
    {
      "epoch": 1.6692777059303885,
      "grad_norm": 2.75,
      "learning_rate": 2.4643099137830325e-05,
      "loss": 0.772,
      "step": 476290
    },
    {
      "epoch": 1.6693127534372842,
      "grad_norm": 3.265625,
      "learning_rate": 2.4642450109166623e-05,
      "loss": 0.8692,
      "step": 476300
    },
    {
      "epoch": 1.6693478009441798,
      "grad_norm": 2.921875,
      "learning_rate": 2.464180108050292e-05,
      "loss": 0.8946,
      "step": 476310
    },
    {
      "epoch": 1.6693828484510753,
      "grad_norm": 2.984375,
      "learning_rate": 2.464115205183922e-05,
      "loss": 0.7921,
      "step": 476320
    },
    {
      "epoch": 1.669417895957971,
      "grad_norm": 2.734375,
      "learning_rate": 2.4640503023175517e-05,
      "loss": 0.8273,
      "step": 476330
    },
    {
      "epoch": 1.6694529434648666,
      "grad_norm": 2.765625,
      "learning_rate": 2.4639853994511815e-05,
      "loss": 0.8279,
      "step": 476340
    },
    {
      "epoch": 1.6694879909717621,
      "grad_norm": 2.859375,
      "learning_rate": 2.4639204965848113e-05,
      "loss": 0.8614,
      "step": 476350
    },
    {
      "epoch": 1.669523038478658,
      "grad_norm": 2.9375,
      "learning_rate": 2.463855593718441e-05,
      "loss": 0.8151,
      "step": 476360
    },
    {
      "epoch": 1.6695580859855534,
      "grad_norm": 2.828125,
      "learning_rate": 2.463790690852071e-05,
      "loss": 0.7511,
      "step": 476370
    },
    {
      "epoch": 1.669593133492449,
      "grad_norm": 2.671875,
      "learning_rate": 2.4637257879857007e-05,
      "loss": 0.8511,
      "step": 476380
    },
    {
      "epoch": 1.6696281809993447,
      "grad_norm": 2.578125,
      "learning_rate": 2.4636608851193305e-05,
      "loss": 0.7915,
      "step": 476390
    },
    {
      "epoch": 1.66966322850624,
      "grad_norm": 3.140625,
      "learning_rate": 2.4635959822529603e-05,
      "loss": 0.792,
      "step": 476400
    },
    {
      "epoch": 1.6696982760131358,
      "grad_norm": 2.796875,
      "learning_rate": 2.46353107938659e-05,
      "loss": 0.8036,
      "step": 476410
    },
    {
      "epoch": 1.6697333235200316,
      "grad_norm": 2.796875,
      "learning_rate": 2.46346617652022e-05,
      "loss": 0.86,
      "step": 476420
    },
    {
      "epoch": 1.6697683710269269,
      "grad_norm": 2.578125,
      "learning_rate": 2.4634012736538497e-05,
      "loss": 0.8388,
      "step": 476430
    },
    {
      "epoch": 1.6698034185338226,
      "grad_norm": 2.5,
      "learning_rate": 2.4633363707874795e-05,
      "loss": 0.8211,
      "step": 476440
    },
    {
      "epoch": 1.6698384660407182,
      "grad_norm": 2.890625,
      "learning_rate": 2.4632714679211093e-05,
      "loss": 0.7428,
      "step": 476450
    },
    {
      "epoch": 1.6698735135476137,
      "grad_norm": 2.6875,
      "learning_rate": 2.4632065650547394e-05,
      "loss": 0.8534,
      "step": 476460
    },
    {
      "epoch": 1.6699085610545095,
      "grad_norm": 3.421875,
      "learning_rate": 2.4631416621883692e-05,
      "loss": 0.9192,
      "step": 476470
    },
    {
      "epoch": 1.669943608561405,
      "grad_norm": 3.171875,
      "learning_rate": 2.463076759321999e-05,
      "loss": 0.8265,
      "step": 476480
    },
    {
      "epoch": 1.6699786560683005,
      "grad_norm": 3.109375,
      "learning_rate": 2.4630118564556288e-05,
      "loss": 0.8384,
      "step": 476490
    },
    {
      "epoch": 1.6700137035751963,
      "grad_norm": 2.859375,
      "learning_rate": 2.4629469535892586e-05,
      "loss": 0.832,
      "step": 476500
    },
    {
      "epoch": 1.6700487510820916,
      "grad_norm": 2.859375,
      "learning_rate": 2.4628820507228884e-05,
      "loss": 0.8103,
      "step": 476510
    },
    {
      "epoch": 1.6700837985889874,
      "grad_norm": 2.578125,
      "learning_rate": 2.4628171478565182e-05,
      "loss": 0.8412,
      "step": 476520
    },
    {
      "epoch": 1.6701188460958831,
      "grad_norm": 2.78125,
      "learning_rate": 2.4627522449901477e-05,
      "loss": 0.8273,
      "step": 476530
    },
    {
      "epoch": 1.6701538936027784,
      "grad_norm": 3.265625,
      "learning_rate": 2.4626873421237775e-05,
      "loss": 0.766,
      "step": 476540
    },
    {
      "epoch": 1.6701889411096742,
      "grad_norm": 2.90625,
      "learning_rate": 2.4626224392574073e-05,
      "loss": 0.8755,
      "step": 476550
    },
    {
      "epoch": 1.6702239886165697,
      "grad_norm": 3.296875,
      "learning_rate": 2.462557536391037e-05,
      "loss": 0.86,
      "step": 476560
    },
    {
      "epoch": 1.6702590361234653,
      "grad_norm": 3.046875,
      "learning_rate": 2.4624926335246672e-05,
      "loss": 0.8902,
      "step": 476570
    },
    {
      "epoch": 1.670294083630361,
      "grad_norm": 3.328125,
      "learning_rate": 2.462427730658297e-05,
      "loss": 0.7715,
      "step": 476580
    },
    {
      "epoch": 1.6703291311372566,
      "grad_norm": 3.046875,
      "learning_rate": 2.4623628277919268e-05,
      "loss": 0.8944,
      "step": 476590
    },
    {
      "epoch": 1.670364178644152,
      "grad_norm": 2.921875,
      "learning_rate": 2.4622979249255566e-05,
      "loss": 0.8414,
      "step": 476600
    },
    {
      "epoch": 1.6703992261510479,
      "grad_norm": 2.78125,
      "learning_rate": 2.4622330220591864e-05,
      "loss": 0.8387,
      "step": 476610
    },
    {
      "epoch": 1.6704342736579434,
      "grad_norm": 3.0625,
      "learning_rate": 2.4621681191928162e-05,
      "loss": 0.8888,
      "step": 476620
    },
    {
      "epoch": 1.670469321164839,
      "grad_norm": 3.0625,
      "learning_rate": 2.462103216326446e-05,
      "loss": 0.7821,
      "step": 476630
    },
    {
      "epoch": 1.6705043686717347,
      "grad_norm": 2.703125,
      "learning_rate": 2.4620383134600758e-05,
      "loss": 0.769,
      "step": 476640
    },
    {
      "epoch": 1.67053941617863,
      "grad_norm": 3.0,
      "learning_rate": 2.4619734105937056e-05,
      "loss": 0.8702,
      "step": 476650
    },
    {
      "epoch": 1.6705744636855258,
      "grad_norm": 2.828125,
      "learning_rate": 2.4619085077273354e-05,
      "loss": 0.8008,
      "step": 476660
    },
    {
      "epoch": 1.6706095111924213,
      "grad_norm": 2.796875,
      "learning_rate": 2.4618436048609652e-05,
      "loss": 0.8559,
      "step": 476670
    },
    {
      "epoch": 1.6706445586993168,
      "grad_norm": 2.875,
      "learning_rate": 2.461778701994595e-05,
      "loss": 0.8731,
      "step": 476680
    },
    {
      "epoch": 1.6706796062062126,
      "grad_norm": 3.09375,
      "learning_rate": 2.4617137991282248e-05,
      "loss": 0.792,
      "step": 476690
    },
    {
      "epoch": 1.6707146537131081,
      "grad_norm": 3.03125,
      "learning_rate": 2.4616488962618546e-05,
      "loss": 0.8209,
      "step": 476700
    },
    {
      "epoch": 1.6707497012200037,
      "grad_norm": 2.84375,
      "learning_rate": 2.4615839933954847e-05,
      "loss": 0.8712,
      "step": 476710
    },
    {
      "epoch": 1.6707847487268994,
      "grad_norm": 3.078125,
      "learning_rate": 2.4615190905291142e-05,
      "loss": 0.8244,
      "step": 476720
    },
    {
      "epoch": 1.670819796233795,
      "grad_norm": 3.046875,
      "learning_rate": 2.461454187662744e-05,
      "loss": 0.8413,
      "step": 476730
    },
    {
      "epoch": 1.6708548437406905,
      "grad_norm": 3.03125,
      "learning_rate": 2.4613892847963738e-05,
      "loss": 0.7891,
      "step": 476740
    },
    {
      "epoch": 1.6708898912475862,
      "grad_norm": 3.046875,
      "learning_rate": 2.4613243819300036e-05,
      "loss": 0.8395,
      "step": 476750
    },
    {
      "epoch": 1.6709249387544816,
      "grad_norm": 2.5625,
      "learning_rate": 2.4612594790636334e-05,
      "loss": 0.7674,
      "step": 476760
    },
    {
      "epoch": 1.6709599862613773,
      "grad_norm": 2.453125,
      "learning_rate": 2.4611945761972632e-05,
      "loss": 0.7319,
      "step": 476770
    },
    {
      "epoch": 1.6709950337682729,
      "grad_norm": 3.265625,
      "learning_rate": 2.461129673330893e-05,
      "loss": 0.9,
      "step": 476780
    },
    {
      "epoch": 1.6710300812751684,
      "grad_norm": 2.75,
      "learning_rate": 2.4610647704645228e-05,
      "loss": 0.903,
      "step": 476790
    },
    {
      "epoch": 1.6710651287820641,
      "grad_norm": 2.90625,
      "learning_rate": 2.4609998675981526e-05,
      "loss": 0.8323,
      "step": 476800
    },
    {
      "epoch": 1.6711001762889597,
      "grad_norm": 2.5625,
      "learning_rate": 2.4609349647317824e-05,
      "loss": 0.868,
      "step": 476810
    },
    {
      "epoch": 1.6711352237958552,
      "grad_norm": 3.03125,
      "learning_rate": 2.4608700618654122e-05,
      "loss": 0.723,
      "step": 476820
    },
    {
      "epoch": 1.671170271302751,
      "grad_norm": 2.5,
      "learning_rate": 2.4608051589990423e-05,
      "loss": 0.796,
      "step": 476830
    },
    {
      "epoch": 1.6712053188096465,
      "grad_norm": 3.03125,
      "learning_rate": 2.460740256132672e-05,
      "loss": 0.8558,
      "step": 476840
    },
    {
      "epoch": 1.671240366316542,
      "grad_norm": 2.84375,
      "learning_rate": 2.460675353266302e-05,
      "loss": 0.7941,
      "step": 476850
    },
    {
      "epoch": 1.6712754138234378,
      "grad_norm": 2.796875,
      "learning_rate": 2.4606104503999317e-05,
      "loss": 0.8324,
      "step": 476860
    },
    {
      "epoch": 1.6713104613303331,
      "grad_norm": 3.140625,
      "learning_rate": 2.4605455475335615e-05,
      "loss": 0.8393,
      "step": 476870
    },
    {
      "epoch": 1.6713455088372289,
      "grad_norm": 2.375,
      "learning_rate": 2.4604806446671913e-05,
      "loss": 0.7919,
      "step": 476880
    },
    {
      "epoch": 1.6713805563441244,
      "grad_norm": 3.21875,
      "learning_rate": 2.460415741800821e-05,
      "loss": 0.8257,
      "step": 476890
    },
    {
      "epoch": 1.67141560385102,
      "grad_norm": 3.03125,
      "learning_rate": 2.460350838934451e-05,
      "loss": 0.8732,
      "step": 476900
    },
    {
      "epoch": 1.6714506513579157,
      "grad_norm": 2.90625,
      "learning_rate": 2.4602859360680804e-05,
      "loss": 0.8112,
      "step": 476910
    },
    {
      "epoch": 1.6714856988648112,
      "grad_norm": 2.546875,
      "learning_rate": 2.4602210332017102e-05,
      "loss": 0.8468,
      "step": 476920
    },
    {
      "epoch": 1.6715207463717068,
      "grad_norm": 2.8125,
      "learning_rate": 2.46015613033534e-05,
      "loss": 0.8535,
      "step": 476930
    },
    {
      "epoch": 1.6715557938786025,
      "grad_norm": 2.640625,
      "learning_rate": 2.46009122746897e-05,
      "loss": 0.8099,
      "step": 476940
    },
    {
      "epoch": 1.671590841385498,
      "grad_norm": 2.34375,
      "learning_rate": 2.4600263246026e-05,
      "loss": 0.8264,
      "step": 476950
    },
    {
      "epoch": 1.6716258888923936,
      "grad_norm": 2.75,
      "learning_rate": 2.4599614217362297e-05,
      "loss": 0.7627,
      "step": 476960
    },
    {
      "epoch": 1.6716609363992894,
      "grad_norm": 2.78125,
      "learning_rate": 2.4598965188698595e-05,
      "loss": 0.8731,
      "step": 476970
    },
    {
      "epoch": 1.6716959839061847,
      "grad_norm": 2.90625,
      "learning_rate": 2.4598316160034893e-05,
      "loss": 0.8285,
      "step": 476980
    },
    {
      "epoch": 1.6717310314130804,
      "grad_norm": 2.75,
      "learning_rate": 2.459766713137119e-05,
      "loss": 0.7367,
      "step": 476990
    },
    {
      "epoch": 1.671766078919976,
      "grad_norm": 2.484375,
      "learning_rate": 2.459701810270749e-05,
      "loss": 0.7537,
      "step": 477000
    },
    {
      "epoch": 1.6718011264268715,
      "grad_norm": 3.171875,
      "learning_rate": 2.4596369074043787e-05,
      "loss": 0.8301,
      "step": 477010
    },
    {
      "epoch": 1.6718361739337673,
      "grad_norm": 3.21875,
      "learning_rate": 2.4595720045380085e-05,
      "loss": 0.8634,
      "step": 477020
    },
    {
      "epoch": 1.6718712214406628,
      "grad_norm": 2.515625,
      "learning_rate": 2.4595071016716383e-05,
      "loss": 0.8307,
      "step": 477030
    },
    {
      "epoch": 1.6719062689475583,
      "grad_norm": 2.78125,
      "learning_rate": 2.459442198805268e-05,
      "loss": 0.7514,
      "step": 477040
    },
    {
      "epoch": 1.671941316454454,
      "grad_norm": 3.15625,
      "learning_rate": 2.459377295938898e-05,
      "loss": 0.8361,
      "step": 477050
    },
    {
      "epoch": 1.6719763639613496,
      "grad_norm": 2.984375,
      "learning_rate": 2.4593123930725277e-05,
      "loss": 0.8044,
      "step": 477060
    },
    {
      "epoch": 1.6720114114682452,
      "grad_norm": 2.546875,
      "learning_rate": 2.4592474902061575e-05,
      "loss": 0.7705,
      "step": 477070
    },
    {
      "epoch": 1.672046458975141,
      "grad_norm": 3.59375,
      "learning_rate": 2.4591825873397877e-05,
      "loss": 0.8255,
      "step": 477080
    },
    {
      "epoch": 1.6720815064820362,
      "grad_norm": 3.140625,
      "learning_rate": 2.4591176844734175e-05,
      "loss": 0.9357,
      "step": 477090
    },
    {
      "epoch": 1.672116553988932,
      "grad_norm": 2.46875,
      "learning_rate": 2.459052781607047e-05,
      "loss": 0.7768,
      "step": 477100
    },
    {
      "epoch": 1.6721516014958278,
      "grad_norm": 2.71875,
      "learning_rate": 2.4589878787406767e-05,
      "loss": 0.8103,
      "step": 477110
    },
    {
      "epoch": 1.672186649002723,
      "grad_norm": 2.96875,
      "learning_rate": 2.4589229758743065e-05,
      "loss": 0.8407,
      "step": 477120
    },
    {
      "epoch": 1.6722216965096188,
      "grad_norm": 3.0,
      "learning_rate": 2.4588580730079363e-05,
      "loss": 0.7803,
      "step": 477130
    },
    {
      "epoch": 1.6722567440165144,
      "grad_norm": 2.734375,
      "learning_rate": 2.458793170141566e-05,
      "loss": 0.7167,
      "step": 477140
    },
    {
      "epoch": 1.67229179152341,
      "grad_norm": 2.640625,
      "learning_rate": 2.458728267275196e-05,
      "loss": 0.7609,
      "step": 477150
    },
    {
      "epoch": 1.6723268390303057,
      "grad_norm": 2.53125,
      "learning_rate": 2.4586633644088257e-05,
      "loss": 0.8772,
      "step": 477160
    },
    {
      "epoch": 1.6723618865372012,
      "grad_norm": 2.9375,
      "learning_rate": 2.4585984615424555e-05,
      "loss": 0.7942,
      "step": 477170
    },
    {
      "epoch": 1.6723969340440967,
      "grad_norm": 2.4375,
      "learning_rate": 2.4585335586760853e-05,
      "loss": 0.7858,
      "step": 477180
    },
    {
      "epoch": 1.6724319815509925,
      "grad_norm": 2.84375,
      "learning_rate": 2.4584686558097155e-05,
      "loss": 0.8271,
      "step": 477190
    },
    {
      "epoch": 1.672467029057888,
      "grad_norm": 3.03125,
      "learning_rate": 2.4584037529433453e-05,
      "loss": 0.7878,
      "step": 477200
    },
    {
      "epoch": 1.6725020765647836,
      "grad_norm": 2.75,
      "learning_rate": 2.458338850076975e-05,
      "loss": 0.8596,
      "step": 477210
    },
    {
      "epoch": 1.6725371240716793,
      "grad_norm": 2.515625,
      "learning_rate": 2.458273947210605e-05,
      "loss": 0.8309,
      "step": 477220
    },
    {
      "epoch": 1.6725721715785746,
      "grad_norm": 2.71875,
      "learning_rate": 2.4582090443442347e-05,
      "loss": 0.9232,
      "step": 477230
    },
    {
      "epoch": 1.6726072190854704,
      "grad_norm": 2.515625,
      "learning_rate": 2.4581441414778645e-05,
      "loss": 0.7958,
      "step": 477240
    },
    {
      "epoch": 1.672642266592366,
      "grad_norm": 3.234375,
      "learning_rate": 2.4580792386114943e-05,
      "loss": 0.8087,
      "step": 477250
    },
    {
      "epoch": 1.6726773140992615,
      "grad_norm": 2.796875,
      "learning_rate": 2.458014335745124e-05,
      "loss": 0.8058,
      "step": 477260
    },
    {
      "epoch": 1.6727123616061572,
      "grad_norm": 2.515625,
      "learning_rate": 2.457949432878754e-05,
      "loss": 0.8103,
      "step": 477270
    },
    {
      "epoch": 1.6727474091130528,
      "grad_norm": 2.640625,
      "learning_rate": 2.4578845300123833e-05,
      "loss": 0.8201,
      "step": 477280
    },
    {
      "epoch": 1.6727824566199483,
      "grad_norm": 3.015625,
      "learning_rate": 2.457819627146013e-05,
      "loss": 0.9653,
      "step": 477290
    },
    {
      "epoch": 1.672817504126844,
      "grad_norm": 3.171875,
      "learning_rate": 2.457754724279643e-05,
      "loss": 0.8604,
      "step": 477300
    },
    {
      "epoch": 1.6728525516337396,
      "grad_norm": 2.875,
      "learning_rate": 2.457689821413273e-05,
      "loss": 0.9357,
      "step": 477310
    },
    {
      "epoch": 1.6728875991406351,
      "grad_norm": 2.921875,
      "learning_rate": 2.457624918546903e-05,
      "loss": 0.8059,
      "step": 477320
    },
    {
      "epoch": 1.6729226466475309,
      "grad_norm": 2.984375,
      "learning_rate": 2.4575600156805327e-05,
      "loss": 0.893,
      "step": 477330
    },
    {
      "epoch": 1.6729576941544262,
      "grad_norm": 3.09375,
      "learning_rate": 2.4574951128141625e-05,
      "loss": 0.811,
      "step": 477340
    },
    {
      "epoch": 1.672992741661322,
      "grad_norm": 3.359375,
      "learning_rate": 2.4574302099477923e-05,
      "loss": 0.9246,
      "step": 477350
    },
    {
      "epoch": 1.6730277891682175,
      "grad_norm": 2.96875,
      "learning_rate": 2.457365307081422e-05,
      "loss": 0.8531,
      "step": 477360
    },
    {
      "epoch": 1.673062836675113,
      "grad_norm": 3.15625,
      "learning_rate": 2.457300404215052e-05,
      "loss": 0.8152,
      "step": 477370
    },
    {
      "epoch": 1.6730978841820088,
      "grad_norm": 3.359375,
      "learning_rate": 2.4572355013486817e-05,
      "loss": 0.8428,
      "step": 477380
    },
    {
      "epoch": 1.6731329316889043,
      "grad_norm": 3.109375,
      "learning_rate": 2.4571705984823115e-05,
      "loss": 0.8121,
      "step": 477390
    },
    {
      "epoch": 1.6731679791957998,
      "grad_norm": 2.71875,
      "learning_rate": 2.4571056956159413e-05,
      "loss": 0.7563,
      "step": 477400
    },
    {
      "epoch": 1.6732030267026956,
      "grad_norm": 2.65625,
      "learning_rate": 2.457040792749571e-05,
      "loss": 0.8313,
      "step": 477410
    },
    {
      "epoch": 1.6732380742095911,
      "grad_norm": 3.046875,
      "learning_rate": 2.456975889883201e-05,
      "loss": 0.8504,
      "step": 477420
    },
    {
      "epoch": 1.6732731217164867,
      "grad_norm": 2.609375,
      "learning_rate": 2.4569109870168307e-05,
      "loss": 0.8068,
      "step": 477430
    },
    {
      "epoch": 1.6733081692233824,
      "grad_norm": 2.671875,
      "learning_rate": 2.4568460841504605e-05,
      "loss": 0.7721,
      "step": 477440
    },
    {
      "epoch": 1.6733432167302777,
      "grad_norm": 2.765625,
      "learning_rate": 2.4567811812840906e-05,
      "loss": 0.8665,
      "step": 477450
    },
    {
      "epoch": 1.6733782642371735,
      "grad_norm": 2.6875,
      "learning_rate": 2.4567162784177204e-05,
      "loss": 0.8613,
      "step": 477460
    },
    {
      "epoch": 1.673413311744069,
      "grad_norm": 3.171875,
      "learning_rate": 2.45665137555135e-05,
      "loss": 0.8923,
      "step": 477470
    },
    {
      "epoch": 1.6734483592509646,
      "grad_norm": 2.65625,
      "learning_rate": 2.4565864726849797e-05,
      "loss": 0.8642,
      "step": 477480
    },
    {
      "epoch": 1.6734834067578603,
      "grad_norm": 3.234375,
      "learning_rate": 2.4565215698186095e-05,
      "loss": 0.7825,
      "step": 477490
    },
    {
      "epoch": 1.6735184542647559,
      "grad_norm": 2.921875,
      "learning_rate": 2.4564566669522393e-05,
      "loss": 0.8563,
      "step": 477500
    },
    {
      "epoch": 1.6735535017716514,
      "grad_norm": 3.109375,
      "learning_rate": 2.456391764085869e-05,
      "loss": 0.9006,
      "step": 477510
    },
    {
      "epoch": 1.6735885492785472,
      "grad_norm": 3.03125,
      "learning_rate": 2.456326861219499e-05,
      "loss": 0.8644,
      "step": 477520
    },
    {
      "epoch": 1.6736235967854427,
      "grad_norm": 2.59375,
      "learning_rate": 2.4562619583531287e-05,
      "loss": 0.8534,
      "step": 477530
    },
    {
      "epoch": 1.6736586442923382,
      "grad_norm": 2.671875,
      "learning_rate": 2.4561970554867585e-05,
      "loss": 0.8376,
      "step": 477540
    },
    {
      "epoch": 1.673693691799234,
      "grad_norm": 3.625,
      "learning_rate": 2.4561321526203883e-05,
      "loss": 0.8052,
      "step": 477550
    },
    {
      "epoch": 1.6737287393061293,
      "grad_norm": 3.125,
      "learning_rate": 2.4560672497540184e-05,
      "loss": 0.8431,
      "step": 477560
    },
    {
      "epoch": 1.673763786813025,
      "grad_norm": 2.84375,
      "learning_rate": 2.4560023468876482e-05,
      "loss": 0.8616,
      "step": 477570
    },
    {
      "epoch": 1.6737988343199206,
      "grad_norm": 2.96875,
      "learning_rate": 2.455937444021278e-05,
      "loss": 0.7699,
      "step": 477580
    },
    {
      "epoch": 1.6738338818268161,
      "grad_norm": 3.28125,
      "learning_rate": 2.4558725411549078e-05,
      "loss": 0.8369,
      "step": 477590
    },
    {
      "epoch": 1.673868929333712,
      "grad_norm": 2.75,
      "learning_rate": 2.4558076382885376e-05,
      "loss": 0.7592,
      "step": 477600
    },
    {
      "epoch": 1.6739039768406074,
      "grad_norm": 2.6875,
      "learning_rate": 2.4557427354221674e-05,
      "loss": 0.8122,
      "step": 477610
    },
    {
      "epoch": 1.673939024347503,
      "grad_norm": 3.109375,
      "learning_rate": 2.4556778325557972e-05,
      "loss": 0.8972,
      "step": 477620
    },
    {
      "epoch": 1.6739740718543987,
      "grad_norm": 2.578125,
      "learning_rate": 2.455612929689427e-05,
      "loss": 0.8347,
      "step": 477630
    },
    {
      "epoch": 1.6740091193612943,
      "grad_norm": 2.828125,
      "learning_rate": 2.4555480268230568e-05,
      "loss": 0.7977,
      "step": 477640
    },
    {
      "epoch": 1.6740441668681898,
      "grad_norm": 2.828125,
      "learning_rate": 2.4554831239566866e-05,
      "loss": 0.8602,
      "step": 477650
    },
    {
      "epoch": 1.6740792143750856,
      "grad_norm": 3.109375,
      "learning_rate": 2.455418221090316e-05,
      "loss": 0.802,
      "step": 477660
    },
    {
      "epoch": 1.6741142618819809,
      "grad_norm": 2.8125,
      "learning_rate": 2.4553533182239462e-05,
      "loss": 0.8157,
      "step": 477670
    },
    {
      "epoch": 1.6741493093888766,
      "grad_norm": 3.046875,
      "learning_rate": 2.455288415357576e-05,
      "loss": 0.8516,
      "step": 477680
    },
    {
      "epoch": 1.6741843568957724,
      "grad_norm": 2.703125,
      "learning_rate": 2.4552235124912058e-05,
      "loss": 0.7636,
      "step": 477690
    },
    {
      "epoch": 1.6742194044026677,
      "grad_norm": 2.5,
      "learning_rate": 2.4551586096248356e-05,
      "loss": 0.7875,
      "step": 477700
    },
    {
      "epoch": 1.6742544519095635,
      "grad_norm": 2.828125,
      "learning_rate": 2.4550937067584654e-05,
      "loss": 0.8284,
      "step": 477710
    },
    {
      "epoch": 1.674289499416459,
      "grad_norm": 2.875,
      "learning_rate": 2.4550288038920952e-05,
      "loss": 0.8181,
      "step": 477720
    },
    {
      "epoch": 1.6743245469233545,
      "grad_norm": 3.1875,
      "learning_rate": 2.454963901025725e-05,
      "loss": 0.8827,
      "step": 477730
    },
    {
      "epoch": 1.6743595944302503,
      "grad_norm": 3.03125,
      "learning_rate": 2.4548989981593548e-05,
      "loss": 0.8493,
      "step": 477740
    },
    {
      "epoch": 1.6743946419371458,
      "grad_norm": 3.078125,
      "learning_rate": 2.4548340952929846e-05,
      "loss": 0.8868,
      "step": 477750
    },
    {
      "epoch": 1.6744296894440414,
      "grad_norm": 2.546875,
      "learning_rate": 2.4547691924266144e-05,
      "loss": 0.8304,
      "step": 477760
    },
    {
      "epoch": 1.6744647369509371,
      "grad_norm": 2.78125,
      "learning_rate": 2.4547042895602442e-05,
      "loss": 0.7843,
      "step": 477770
    },
    {
      "epoch": 1.6744997844578324,
      "grad_norm": 2.796875,
      "learning_rate": 2.454639386693874e-05,
      "loss": 0.9053,
      "step": 477780
    },
    {
      "epoch": 1.6745348319647282,
      "grad_norm": 2.765625,
      "learning_rate": 2.4545744838275038e-05,
      "loss": 0.8003,
      "step": 477790
    },
    {
      "epoch": 1.674569879471624,
      "grad_norm": 3.28125,
      "learning_rate": 2.4545095809611336e-05,
      "loss": 0.7531,
      "step": 477800
    },
    {
      "epoch": 1.6746049269785193,
      "grad_norm": 3.359375,
      "learning_rate": 2.4544446780947637e-05,
      "loss": 0.8522,
      "step": 477810
    },
    {
      "epoch": 1.674639974485415,
      "grad_norm": 2.796875,
      "learning_rate": 2.4543797752283935e-05,
      "loss": 0.8595,
      "step": 477820
    },
    {
      "epoch": 1.6746750219923106,
      "grad_norm": 3.046875,
      "learning_rate": 2.4543148723620233e-05,
      "loss": 0.8608,
      "step": 477830
    },
    {
      "epoch": 1.674710069499206,
      "grad_norm": 2.75,
      "learning_rate": 2.454249969495653e-05,
      "loss": 0.8366,
      "step": 477840
    },
    {
      "epoch": 1.6747451170061018,
      "grad_norm": 2.921875,
      "learning_rate": 2.4541850666292826e-05,
      "loss": 0.8524,
      "step": 477850
    },
    {
      "epoch": 1.6747801645129974,
      "grad_norm": 2.609375,
      "learning_rate": 2.4541201637629124e-05,
      "loss": 0.8158,
      "step": 477860
    },
    {
      "epoch": 1.674815212019893,
      "grad_norm": 3.09375,
      "learning_rate": 2.4540552608965422e-05,
      "loss": 0.8791,
      "step": 477870
    },
    {
      "epoch": 1.6748502595267887,
      "grad_norm": 2.953125,
      "learning_rate": 2.453990358030172e-05,
      "loss": 0.8588,
      "step": 477880
    },
    {
      "epoch": 1.6748853070336842,
      "grad_norm": 2.515625,
      "learning_rate": 2.4539254551638018e-05,
      "loss": 0.8329,
      "step": 477890
    },
    {
      "epoch": 1.6749203545405797,
      "grad_norm": 3.21875,
      "learning_rate": 2.4538605522974316e-05,
      "loss": 0.9064,
      "step": 477900
    },
    {
      "epoch": 1.6749554020474755,
      "grad_norm": 2.671875,
      "learning_rate": 2.4537956494310614e-05,
      "loss": 0.9043,
      "step": 477910
    },
    {
      "epoch": 1.6749904495543708,
      "grad_norm": 3.453125,
      "learning_rate": 2.4537307465646912e-05,
      "loss": 0.8632,
      "step": 477920
    },
    {
      "epoch": 1.6750254970612666,
      "grad_norm": 3.015625,
      "learning_rate": 2.4536658436983213e-05,
      "loss": 0.8331,
      "step": 477930
    },
    {
      "epoch": 1.6750605445681621,
      "grad_norm": 3.4375,
      "learning_rate": 2.453600940831951e-05,
      "loss": 0.8668,
      "step": 477940
    },
    {
      "epoch": 1.6750955920750576,
      "grad_norm": 2.765625,
      "learning_rate": 2.453536037965581e-05,
      "loss": 0.8708,
      "step": 477950
    },
    {
      "epoch": 1.6751306395819534,
      "grad_norm": 2.96875,
      "learning_rate": 2.4534711350992107e-05,
      "loss": 0.8371,
      "step": 477960
    },
    {
      "epoch": 1.675165687088849,
      "grad_norm": 2.953125,
      "learning_rate": 2.4534062322328405e-05,
      "loss": 0.9223,
      "step": 477970
    },
    {
      "epoch": 1.6752007345957445,
      "grad_norm": 3.0,
      "learning_rate": 2.4533413293664703e-05,
      "loss": 0.8663,
      "step": 477980
    },
    {
      "epoch": 1.6752357821026402,
      "grad_norm": 2.703125,
      "learning_rate": 2.4532764265001e-05,
      "loss": 0.8365,
      "step": 477990
    },
    {
      "epoch": 1.6752708296095358,
      "grad_norm": 3.046875,
      "learning_rate": 2.45321152363373e-05,
      "loss": 0.8051,
      "step": 478000
    },
    {
      "epoch": 1.6753058771164313,
      "grad_norm": 2.796875,
      "learning_rate": 2.4531466207673597e-05,
      "loss": 0.8554,
      "step": 478010
    },
    {
      "epoch": 1.675340924623327,
      "grad_norm": 3.015625,
      "learning_rate": 2.4530817179009895e-05,
      "loss": 0.8394,
      "step": 478020
    },
    {
      "epoch": 1.6753759721302224,
      "grad_norm": 2.796875,
      "learning_rate": 2.4530168150346193e-05,
      "loss": 0.8749,
      "step": 478030
    },
    {
      "epoch": 1.6754110196371181,
      "grad_norm": 3.125,
      "learning_rate": 2.452951912168249e-05,
      "loss": 0.7861,
      "step": 478040
    },
    {
      "epoch": 1.6754460671440137,
      "grad_norm": 2.734375,
      "learning_rate": 2.452887009301879e-05,
      "loss": 0.8315,
      "step": 478050
    },
    {
      "epoch": 1.6754811146509092,
      "grad_norm": 2.90625,
      "learning_rate": 2.4528221064355087e-05,
      "loss": 0.7856,
      "step": 478060
    },
    {
      "epoch": 1.675516162157805,
      "grad_norm": 3.0,
      "learning_rate": 2.4527572035691385e-05,
      "loss": 0.8223,
      "step": 478070
    },
    {
      "epoch": 1.6755512096647005,
      "grad_norm": 2.4375,
      "learning_rate": 2.4526923007027683e-05,
      "loss": 0.7332,
      "step": 478080
    },
    {
      "epoch": 1.675586257171596,
      "grad_norm": 2.9375,
      "learning_rate": 2.452627397836398e-05,
      "loss": 0.8394,
      "step": 478090
    },
    {
      "epoch": 1.6756213046784918,
      "grad_norm": 2.765625,
      "learning_rate": 2.452562494970028e-05,
      "loss": 0.7662,
      "step": 478100
    },
    {
      "epoch": 1.6756563521853873,
      "grad_norm": 2.703125,
      "learning_rate": 2.4524975921036577e-05,
      "loss": 0.8236,
      "step": 478110
    },
    {
      "epoch": 1.6756913996922829,
      "grad_norm": 2.484375,
      "learning_rate": 2.4524326892372875e-05,
      "loss": 0.7181,
      "step": 478120
    },
    {
      "epoch": 1.6757264471991786,
      "grad_norm": 3.25,
      "learning_rate": 2.4523677863709173e-05,
      "loss": 0.7689,
      "step": 478130
    },
    {
      "epoch": 1.675761494706074,
      "grad_norm": 2.578125,
      "learning_rate": 2.452302883504547e-05,
      "loss": 0.8034,
      "step": 478140
    },
    {
      "epoch": 1.6757965422129697,
      "grad_norm": 3.53125,
      "learning_rate": 2.452237980638177e-05,
      "loss": 0.8635,
      "step": 478150
    },
    {
      "epoch": 1.6758315897198652,
      "grad_norm": 2.90625,
      "learning_rate": 2.4521730777718067e-05,
      "loss": 0.7287,
      "step": 478160
    },
    {
      "epoch": 1.6758666372267608,
      "grad_norm": 2.734375,
      "learning_rate": 2.4521081749054365e-05,
      "loss": 0.8574,
      "step": 478170
    },
    {
      "epoch": 1.6759016847336565,
      "grad_norm": 2.96875,
      "learning_rate": 2.4520432720390667e-05,
      "loss": 0.846,
      "step": 478180
    },
    {
      "epoch": 1.675936732240552,
      "grad_norm": 3.125,
      "learning_rate": 2.4519783691726965e-05,
      "loss": 0.8296,
      "step": 478190
    },
    {
      "epoch": 1.6759717797474476,
      "grad_norm": 2.5625,
      "learning_rate": 2.4519134663063263e-05,
      "loss": 0.8199,
      "step": 478200
    },
    {
      "epoch": 1.6760068272543434,
      "grad_norm": 3.0,
      "learning_rate": 2.451848563439956e-05,
      "loss": 0.8442,
      "step": 478210
    },
    {
      "epoch": 1.676041874761239,
      "grad_norm": 2.59375,
      "learning_rate": 2.451783660573586e-05,
      "loss": 0.8659,
      "step": 478220
    },
    {
      "epoch": 1.6760769222681344,
      "grad_norm": 3.09375,
      "learning_rate": 2.4517187577072153e-05,
      "loss": 0.8101,
      "step": 478230
    },
    {
      "epoch": 1.6761119697750302,
      "grad_norm": 2.390625,
      "learning_rate": 2.451653854840845e-05,
      "loss": 0.7852,
      "step": 478240
    },
    {
      "epoch": 1.6761470172819255,
      "grad_norm": 2.84375,
      "learning_rate": 2.451588951974475e-05,
      "loss": 0.809,
      "step": 478250
    },
    {
      "epoch": 1.6761820647888213,
      "grad_norm": 2.859375,
      "learning_rate": 2.4515240491081047e-05,
      "loss": 0.7452,
      "step": 478260
    },
    {
      "epoch": 1.6762171122957168,
      "grad_norm": 3.28125,
      "learning_rate": 2.4514591462417345e-05,
      "loss": 0.7883,
      "step": 478270
    },
    {
      "epoch": 1.6762521598026123,
      "grad_norm": 2.65625,
      "learning_rate": 2.4513942433753643e-05,
      "loss": 0.8399,
      "step": 478280
    },
    {
      "epoch": 1.676287207309508,
      "grad_norm": 3.171875,
      "learning_rate": 2.4513293405089945e-05,
      "loss": 0.8956,
      "step": 478290
    },
    {
      "epoch": 1.6763222548164036,
      "grad_norm": 2.71875,
      "learning_rate": 2.4512644376426243e-05,
      "loss": 0.8268,
      "step": 478300
    },
    {
      "epoch": 1.6763573023232992,
      "grad_norm": 3.453125,
      "learning_rate": 2.451199534776254e-05,
      "loss": 0.7742,
      "step": 478310
    },
    {
      "epoch": 1.676392349830195,
      "grad_norm": 2.859375,
      "learning_rate": 2.451134631909884e-05,
      "loss": 0.8242,
      "step": 478320
    },
    {
      "epoch": 1.6764273973370905,
      "grad_norm": 3.046875,
      "learning_rate": 2.4510697290435137e-05,
      "loss": 0.8153,
      "step": 478330
    },
    {
      "epoch": 1.676462444843986,
      "grad_norm": 2.96875,
      "learning_rate": 2.4510048261771435e-05,
      "loss": 0.8553,
      "step": 478340
    },
    {
      "epoch": 1.6764974923508817,
      "grad_norm": 2.828125,
      "learning_rate": 2.4509399233107733e-05,
      "loss": 0.8828,
      "step": 478350
    },
    {
      "epoch": 1.676532539857777,
      "grad_norm": 3.140625,
      "learning_rate": 2.450875020444403e-05,
      "loss": 0.7899,
      "step": 478360
    },
    {
      "epoch": 1.6765675873646728,
      "grad_norm": 3.109375,
      "learning_rate": 2.450810117578033e-05,
      "loss": 0.9098,
      "step": 478370
    },
    {
      "epoch": 1.6766026348715686,
      "grad_norm": 2.921875,
      "learning_rate": 2.4507452147116627e-05,
      "loss": 0.7904,
      "step": 478380
    },
    {
      "epoch": 1.676637682378464,
      "grad_norm": 2.8125,
      "learning_rate": 2.4506803118452925e-05,
      "loss": 0.7768,
      "step": 478390
    },
    {
      "epoch": 1.6766727298853596,
      "grad_norm": 2.59375,
      "learning_rate": 2.4506154089789223e-05,
      "loss": 0.971,
      "step": 478400
    },
    {
      "epoch": 1.6767077773922552,
      "grad_norm": 2.953125,
      "learning_rate": 2.450550506112552e-05,
      "loss": 0.8768,
      "step": 478410
    },
    {
      "epoch": 1.6767428248991507,
      "grad_norm": 2.765625,
      "learning_rate": 2.450485603246182e-05,
      "loss": 0.8267,
      "step": 478420
    },
    {
      "epoch": 1.6767778724060465,
      "grad_norm": 2.75,
      "learning_rate": 2.4504207003798117e-05,
      "loss": 0.8278,
      "step": 478430
    },
    {
      "epoch": 1.676812919912942,
      "grad_norm": 3.109375,
      "learning_rate": 2.4503557975134415e-05,
      "loss": 0.873,
      "step": 478440
    },
    {
      "epoch": 1.6768479674198375,
      "grad_norm": 3.09375,
      "learning_rate": 2.4502908946470713e-05,
      "loss": 0.8475,
      "step": 478450
    },
    {
      "epoch": 1.6768830149267333,
      "grad_norm": 2.671875,
      "learning_rate": 2.450225991780701e-05,
      "loss": 0.7358,
      "step": 478460
    },
    {
      "epoch": 1.6769180624336286,
      "grad_norm": 3.078125,
      "learning_rate": 2.450161088914331e-05,
      "loss": 0.8648,
      "step": 478470
    },
    {
      "epoch": 1.6769531099405244,
      "grad_norm": 2.859375,
      "learning_rate": 2.4500961860479607e-05,
      "loss": 0.8537,
      "step": 478480
    },
    {
      "epoch": 1.6769881574474201,
      "grad_norm": 2.890625,
      "learning_rate": 2.4500312831815905e-05,
      "loss": 0.8157,
      "step": 478490
    },
    {
      "epoch": 1.6770232049543154,
      "grad_norm": 2.734375,
      "learning_rate": 2.4499663803152203e-05,
      "loss": 0.8044,
      "step": 478500
    },
    {
      "epoch": 1.6770582524612112,
      "grad_norm": 3.125,
      "learning_rate": 2.44990147744885e-05,
      "loss": 0.8377,
      "step": 478510
    },
    {
      "epoch": 1.6770932999681067,
      "grad_norm": 2.609375,
      "learning_rate": 2.44983657458248e-05,
      "loss": 0.8162,
      "step": 478520
    },
    {
      "epoch": 1.6771283474750023,
      "grad_norm": 2.671875,
      "learning_rate": 2.4497716717161097e-05,
      "loss": 0.7132,
      "step": 478530
    },
    {
      "epoch": 1.677163394981898,
      "grad_norm": 2.5625,
      "learning_rate": 2.4497067688497395e-05,
      "loss": 0.8084,
      "step": 478540
    },
    {
      "epoch": 1.6771984424887936,
      "grad_norm": 2.671875,
      "learning_rate": 2.4496418659833696e-05,
      "loss": 0.8443,
      "step": 478550
    },
    {
      "epoch": 1.677233489995689,
      "grad_norm": 2.765625,
      "learning_rate": 2.4495769631169994e-05,
      "loss": 0.7622,
      "step": 478560
    },
    {
      "epoch": 1.6772685375025849,
      "grad_norm": 2.78125,
      "learning_rate": 2.4495120602506292e-05,
      "loss": 0.8785,
      "step": 478570
    },
    {
      "epoch": 1.6773035850094804,
      "grad_norm": 2.921875,
      "learning_rate": 2.449447157384259e-05,
      "loss": 0.9263,
      "step": 478580
    },
    {
      "epoch": 1.677338632516376,
      "grad_norm": 3.21875,
      "learning_rate": 2.4493822545178888e-05,
      "loss": 0.8081,
      "step": 478590
    },
    {
      "epoch": 1.6773736800232717,
      "grad_norm": 3.078125,
      "learning_rate": 2.4493173516515183e-05,
      "loss": 0.8917,
      "step": 478600
    },
    {
      "epoch": 1.677408727530167,
      "grad_norm": 2.859375,
      "learning_rate": 2.449252448785148e-05,
      "loss": 0.8762,
      "step": 478610
    },
    {
      "epoch": 1.6774437750370628,
      "grad_norm": 2.703125,
      "learning_rate": 2.449187545918778e-05,
      "loss": 0.8203,
      "step": 478620
    },
    {
      "epoch": 1.6774788225439583,
      "grad_norm": 2.859375,
      "learning_rate": 2.4491226430524077e-05,
      "loss": 0.8387,
      "step": 478630
    },
    {
      "epoch": 1.6775138700508538,
      "grad_norm": 3.0,
      "learning_rate": 2.4490577401860375e-05,
      "loss": 0.8218,
      "step": 478640
    },
    {
      "epoch": 1.6775489175577496,
      "grad_norm": 2.90625,
      "learning_rate": 2.4489928373196673e-05,
      "loss": 0.8423,
      "step": 478650
    },
    {
      "epoch": 1.6775839650646451,
      "grad_norm": 2.546875,
      "learning_rate": 2.4489279344532974e-05,
      "loss": 0.8934,
      "step": 478660
    },
    {
      "epoch": 1.6776190125715407,
      "grad_norm": 3.078125,
      "learning_rate": 2.4488630315869272e-05,
      "loss": 0.8132,
      "step": 478670
    },
    {
      "epoch": 1.6776540600784364,
      "grad_norm": 2.9375,
      "learning_rate": 2.448798128720557e-05,
      "loss": 0.8409,
      "step": 478680
    },
    {
      "epoch": 1.677689107585332,
      "grad_norm": 2.921875,
      "learning_rate": 2.4487332258541868e-05,
      "loss": 0.8142,
      "step": 478690
    },
    {
      "epoch": 1.6777241550922275,
      "grad_norm": 3.28125,
      "learning_rate": 2.4486683229878166e-05,
      "loss": 0.8731,
      "step": 478700
    },
    {
      "epoch": 1.6777592025991233,
      "grad_norm": 2.96875,
      "learning_rate": 2.4486034201214464e-05,
      "loss": 0.9271,
      "step": 478710
    },
    {
      "epoch": 1.6777942501060186,
      "grad_norm": 3.078125,
      "learning_rate": 2.4485385172550762e-05,
      "loss": 0.7888,
      "step": 478720
    },
    {
      "epoch": 1.6778292976129143,
      "grad_norm": 2.796875,
      "learning_rate": 2.448473614388706e-05,
      "loss": 0.8566,
      "step": 478730
    },
    {
      "epoch": 1.6778643451198099,
      "grad_norm": 2.921875,
      "learning_rate": 2.4484087115223358e-05,
      "loss": 0.8565,
      "step": 478740
    },
    {
      "epoch": 1.6778993926267054,
      "grad_norm": 2.828125,
      "learning_rate": 2.4483438086559656e-05,
      "loss": 0.8525,
      "step": 478750
    },
    {
      "epoch": 1.6779344401336012,
      "grad_norm": 2.4375,
      "learning_rate": 2.4482789057895954e-05,
      "loss": 0.863,
      "step": 478760
    },
    {
      "epoch": 1.6779694876404967,
      "grad_norm": 2.640625,
      "learning_rate": 2.4482140029232252e-05,
      "loss": 0.8094,
      "step": 478770
    },
    {
      "epoch": 1.6780045351473922,
      "grad_norm": 3.15625,
      "learning_rate": 2.448149100056855e-05,
      "loss": 0.8084,
      "step": 478780
    },
    {
      "epoch": 1.678039582654288,
      "grad_norm": 3.265625,
      "learning_rate": 2.4480841971904848e-05,
      "loss": 0.8163,
      "step": 478790
    },
    {
      "epoch": 1.6780746301611835,
      "grad_norm": 3.25,
      "learning_rate": 2.4480192943241146e-05,
      "loss": 0.8151,
      "step": 478800
    },
    {
      "epoch": 1.678109677668079,
      "grad_norm": 2.921875,
      "learning_rate": 2.4479543914577444e-05,
      "loss": 0.8085,
      "step": 478810
    },
    {
      "epoch": 1.6781447251749748,
      "grad_norm": 2.96875,
      "learning_rate": 2.4478894885913742e-05,
      "loss": 0.7781,
      "step": 478820
    },
    {
      "epoch": 1.6781797726818701,
      "grad_norm": 3.078125,
      "learning_rate": 2.447824585725004e-05,
      "loss": 0.8405,
      "step": 478830
    },
    {
      "epoch": 1.6782148201887659,
      "grad_norm": 3.0,
      "learning_rate": 2.4477596828586338e-05,
      "loss": 0.8288,
      "step": 478840
    },
    {
      "epoch": 1.6782498676956614,
      "grad_norm": 2.984375,
      "learning_rate": 2.4476947799922636e-05,
      "loss": 0.7766,
      "step": 478850
    },
    {
      "epoch": 1.678284915202557,
      "grad_norm": 2.90625,
      "learning_rate": 2.4476298771258934e-05,
      "loss": 0.7952,
      "step": 478860
    },
    {
      "epoch": 1.6783199627094527,
      "grad_norm": 3.4375,
      "learning_rate": 2.4475649742595232e-05,
      "loss": 0.8094,
      "step": 478870
    },
    {
      "epoch": 1.6783550102163483,
      "grad_norm": 3.078125,
      "learning_rate": 2.447500071393153e-05,
      "loss": 0.8213,
      "step": 478880
    },
    {
      "epoch": 1.6783900577232438,
      "grad_norm": 2.78125,
      "learning_rate": 2.4474351685267828e-05,
      "loss": 0.8993,
      "step": 478890
    },
    {
      "epoch": 1.6784251052301395,
      "grad_norm": 2.71875,
      "learning_rate": 2.4473702656604126e-05,
      "loss": 0.8717,
      "step": 478900
    },
    {
      "epoch": 1.678460152737035,
      "grad_norm": 2.5,
      "learning_rate": 2.4473053627940428e-05,
      "loss": 0.8294,
      "step": 478910
    },
    {
      "epoch": 1.6784952002439306,
      "grad_norm": 3.09375,
      "learning_rate": 2.4472404599276726e-05,
      "loss": 0.8991,
      "step": 478920
    },
    {
      "epoch": 1.6785302477508264,
      "grad_norm": 2.875,
      "learning_rate": 2.4471755570613024e-05,
      "loss": 0.8309,
      "step": 478930
    },
    {
      "epoch": 1.6785652952577217,
      "grad_norm": 2.65625,
      "learning_rate": 2.447110654194932e-05,
      "loss": 0.8385,
      "step": 478940
    },
    {
      "epoch": 1.6786003427646174,
      "grad_norm": 2.4375,
      "learning_rate": 2.447045751328562e-05,
      "loss": 0.8293,
      "step": 478950
    },
    {
      "epoch": 1.678635390271513,
      "grad_norm": 3.140625,
      "learning_rate": 2.4469808484621918e-05,
      "loss": 0.8103,
      "step": 478960
    },
    {
      "epoch": 1.6786704377784085,
      "grad_norm": 2.921875,
      "learning_rate": 2.4469159455958216e-05,
      "loss": 0.7893,
      "step": 478970
    },
    {
      "epoch": 1.6787054852853043,
      "grad_norm": 2.578125,
      "learning_rate": 2.446851042729451e-05,
      "loss": 0.8307,
      "step": 478980
    },
    {
      "epoch": 1.6787405327921998,
      "grad_norm": 2.484375,
      "learning_rate": 2.4467861398630808e-05,
      "loss": 0.751,
      "step": 478990
    },
    {
      "epoch": 1.6787755802990953,
      "grad_norm": 3.40625,
      "learning_rate": 2.4467212369967106e-05,
      "loss": 0.9007,
      "step": 479000
    },
    {
      "epoch": 1.678810627805991,
      "grad_norm": 2.921875,
      "learning_rate": 2.4466563341303404e-05,
      "loss": 0.8241,
      "step": 479010
    },
    {
      "epoch": 1.6788456753128866,
      "grad_norm": 3.40625,
      "learning_rate": 2.4465914312639702e-05,
      "loss": 0.8471,
      "step": 479020
    },
    {
      "epoch": 1.6788807228197822,
      "grad_norm": 3.40625,
      "learning_rate": 2.4465265283976004e-05,
      "loss": 0.8861,
      "step": 479030
    },
    {
      "epoch": 1.678915770326678,
      "grad_norm": 2.953125,
      "learning_rate": 2.44646162553123e-05,
      "loss": 0.918,
      "step": 479040
    },
    {
      "epoch": 1.6789508178335733,
      "grad_norm": 3.296875,
      "learning_rate": 2.44639672266486e-05,
      "loss": 0.808,
      "step": 479050
    },
    {
      "epoch": 1.678985865340469,
      "grad_norm": 3.28125,
      "learning_rate": 2.4463318197984898e-05,
      "loss": 0.803,
      "step": 479060
    },
    {
      "epoch": 1.6790209128473648,
      "grad_norm": 2.90625,
      "learning_rate": 2.4462669169321196e-05,
      "loss": 0.9323,
      "step": 479070
    },
    {
      "epoch": 1.67905596035426,
      "grad_norm": 3.453125,
      "learning_rate": 2.4462020140657494e-05,
      "loss": 0.8344,
      "step": 479080
    },
    {
      "epoch": 1.6790910078611558,
      "grad_norm": 2.578125,
      "learning_rate": 2.446137111199379e-05,
      "loss": 0.854,
      "step": 479090
    },
    {
      "epoch": 1.6791260553680514,
      "grad_norm": 2.890625,
      "learning_rate": 2.446072208333009e-05,
      "loss": 0.8643,
      "step": 479100
    },
    {
      "epoch": 1.679161102874947,
      "grad_norm": 2.96875,
      "learning_rate": 2.4460073054666388e-05,
      "loss": 0.8155,
      "step": 479110
    },
    {
      "epoch": 1.6791961503818427,
      "grad_norm": 2.546875,
      "learning_rate": 2.4459424026002686e-05,
      "loss": 0.879,
      "step": 479120
    },
    {
      "epoch": 1.6792311978887382,
      "grad_norm": 3.140625,
      "learning_rate": 2.4458774997338984e-05,
      "loss": 0.8941,
      "step": 479130
    },
    {
      "epoch": 1.6792662453956337,
      "grad_norm": 3.140625,
      "learning_rate": 2.445812596867528e-05,
      "loss": 0.8625,
      "step": 479140
    },
    {
      "epoch": 1.6793012929025295,
      "grad_norm": 2.671875,
      "learning_rate": 2.445747694001158e-05,
      "loss": 0.8428,
      "step": 479150
    },
    {
      "epoch": 1.6793363404094248,
      "grad_norm": 2.703125,
      "learning_rate": 2.445682791134788e-05,
      "loss": 0.9181,
      "step": 479160
    },
    {
      "epoch": 1.6793713879163206,
      "grad_norm": 3.109375,
      "learning_rate": 2.4456178882684176e-05,
      "loss": 0.8351,
      "step": 479170
    },
    {
      "epoch": 1.6794064354232163,
      "grad_norm": 2.890625,
      "learning_rate": 2.4455529854020474e-05,
      "loss": 0.8149,
      "step": 479180
    },
    {
      "epoch": 1.6794414829301116,
      "grad_norm": 2.609375,
      "learning_rate": 2.445488082535677e-05,
      "loss": 0.8219,
      "step": 479190
    },
    {
      "epoch": 1.6794765304370074,
      "grad_norm": 3.078125,
      "learning_rate": 2.445423179669307e-05,
      "loss": 0.7961,
      "step": 479200
    },
    {
      "epoch": 1.679511577943903,
      "grad_norm": 2.890625,
      "learning_rate": 2.4453582768029368e-05,
      "loss": 0.8424,
      "step": 479210
    },
    {
      "epoch": 1.6795466254507985,
      "grad_norm": 2.765625,
      "learning_rate": 2.4452933739365666e-05,
      "loss": 0.8079,
      "step": 479220
    },
    {
      "epoch": 1.6795816729576942,
      "grad_norm": 2.734375,
      "learning_rate": 2.4452284710701964e-05,
      "loss": 0.8776,
      "step": 479230
    },
    {
      "epoch": 1.6796167204645898,
      "grad_norm": 3.203125,
      "learning_rate": 2.445163568203826e-05,
      "loss": 0.8294,
      "step": 479240
    },
    {
      "epoch": 1.6796517679714853,
      "grad_norm": 2.84375,
      "learning_rate": 2.445098665337456e-05,
      "loss": 0.7967,
      "step": 479250
    },
    {
      "epoch": 1.679686815478381,
      "grad_norm": 2.734375,
      "learning_rate": 2.4450337624710858e-05,
      "loss": 0.887,
      "step": 479260
    },
    {
      "epoch": 1.6797218629852766,
      "grad_norm": 2.765625,
      "learning_rate": 2.4449688596047156e-05,
      "loss": 0.8805,
      "step": 479270
    },
    {
      "epoch": 1.6797569104921721,
      "grad_norm": 2.671875,
      "learning_rate": 2.4449039567383457e-05,
      "loss": 0.8841,
      "step": 479280
    },
    {
      "epoch": 1.6797919579990679,
      "grad_norm": 3.046875,
      "learning_rate": 2.4448390538719755e-05,
      "loss": 0.8474,
      "step": 479290
    },
    {
      "epoch": 1.6798270055059632,
      "grad_norm": 2.875,
      "learning_rate": 2.4447741510056053e-05,
      "loss": 0.8376,
      "step": 479300
    },
    {
      "epoch": 1.679862053012859,
      "grad_norm": 3.546875,
      "learning_rate": 2.444709248139235e-05,
      "loss": 0.8004,
      "step": 479310
    },
    {
      "epoch": 1.6798971005197545,
      "grad_norm": 2.84375,
      "learning_rate": 2.444644345272865e-05,
      "loss": 0.8156,
      "step": 479320
    },
    {
      "epoch": 1.67993214802665,
      "grad_norm": 2.671875,
      "learning_rate": 2.4445794424064947e-05,
      "loss": 0.8202,
      "step": 479330
    },
    {
      "epoch": 1.6799671955335458,
      "grad_norm": 2.53125,
      "learning_rate": 2.4445145395401245e-05,
      "loss": 0.8763,
      "step": 479340
    },
    {
      "epoch": 1.6800022430404413,
      "grad_norm": 3.140625,
      "learning_rate": 2.444449636673754e-05,
      "loss": 0.9161,
      "step": 479350
    },
    {
      "epoch": 1.6800372905473369,
      "grad_norm": 3.0625,
      "learning_rate": 2.4443847338073838e-05,
      "loss": 0.9176,
      "step": 479360
    },
    {
      "epoch": 1.6800723380542326,
      "grad_norm": 2.859375,
      "learning_rate": 2.4443198309410136e-05,
      "loss": 0.7967,
      "step": 479370
    },
    {
      "epoch": 1.6801073855611282,
      "grad_norm": 2.546875,
      "learning_rate": 2.4442549280746434e-05,
      "loss": 0.8676,
      "step": 479380
    },
    {
      "epoch": 1.6801424330680237,
      "grad_norm": 2.640625,
      "learning_rate": 2.4441900252082735e-05,
      "loss": 0.7997,
      "step": 479390
    },
    {
      "epoch": 1.6801774805749194,
      "grad_norm": 3.328125,
      "learning_rate": 2.4441251223419033e-05,
      "loss": 0.9206,
      "step": 479400
    },
    {
      "epoch": 1.6802125280818148,
      "grad_norm": 2.921875,
      "learning_rate": 2.444060219475533e-05,
      "loss": 0.85,
      "step": 479410
    },
    {
      "epoch": 1.6802475755887105,
      "grad_norm": 2.734375,
      "learning_rate": 2.443995316609163e-05,
      "loss": 0.8423,
      "step": 479420
    },
    {
      "epoch": 1.680282623095606,
      "grad_norm": 2.65625,
      "learning_rate": 2.4439304137427927e-05,
      "loss": 0.8052,
      "step": 479430
    },
    {
      "epoch": 1.6803176706025016,
      "grad_norm": 3.046875,
      "learning_rate": 2.4438655108764225e-05,
      "loss": 0.8228,
      "step": 479440
    },
    {
      "epoch": 1.6803527181093973,
      "grad_norm": 2.640625,
      "learning_rate": 2.4438006080100523e-05,
      "loss": 0.8473,
      "step": 479450
    },
    {
      "epoch": 1.6803877656162929,
      "grad_norm": 3.078125,
      "learning_rate": 2.443735705143682e-05,
      "loss": 0.8064,
      "step": 479460
    },
    {
      "epoch": 1.6804228131231884,
      "grad_norm": 2.296875,
      "learning_rate": 2.443670802277312e-05,
      "loss": 0.8386,
      "step": 479470
    },
    {
      "epoch": 1.6804578606300842,
      "grad_norm": 2.75,
      "learning_rate": 2.4436058994109417e-05,
      "loss": 0.8246,
      "step": 479480
    },
    {
      "epoch": 1.6804929081369797,
      "grad_norm": 2.859375,
      "learning_rate": 2.4435409965445715e-05,
      "loss": 0.8612,
      "step": 479490
    },
    {
      "epoch": 1.6805279556438752,
      "grad_norm": 3.171875,
      "learning_rate": 2.4434760936782013e-05,
      "loss": 0.8471,
      "step": 479500
    },
    {
      "epoch": 1.680563003150771,
      "grad_norm": 3.25,
      "learning_rate": 2.443411190811831e-05,
      "loss": 0.8344,
      "step": 479510
    },
    {
      "epoch": 1.6805980506576663,
      "grad_norm": 3.046875,
      "learning_rate": 2.443346287945461e-05,
      "loss": 0.8124,
      "step": 479520
    },
    {
      "epoch": 1.680633098164562,
      "grad_norm": 2.640625,
      "learning_rate": 2.443281385079091e-05,
      "loss": 0.8071,
      "step": 479530
    },
    {
      "epoch": 1.6806681456714576,
      "grad_norm": 3.34375,
      "learning_rate": 2.4432164822127205e-05,
      "loss": 0.8509,
      "step": 479540
    },
    {
      "epoch": 1.6807031931783531,
      "grad_norm": 2.578125,
      "learning_rate": 2.4431515793463503e-05,
      "loss": 0.8879,
      "step": 479550
    },
    {
      "epoch": 1.680738240685249,
      "grad_norm": 2.921875,
      "learning_rate": 2.44308667647998e-05,
      "loss": 0.8998,
      "step": 479560
    },
    {
      "epoch": 1.6807732881921444,
      "grad_norm": 2.875,
      "learning_rate": 2.44302177361361e-05,
      "loss": 0.9432,
      "step": 479570
    },
    {
      "epoch": 1.68080833569904,
      "grad_norm": 2.5625,
      "learning_rate": 2.4429568707472397e-05,
      "loss": 0.8462,
      "step": 479580
    },
    {
      "epoch": 1.6808433832059357,
      "grad_norm": 2.5625,
      "learning_rate": 2.4428919678808695e-05,
      "loss": 0.746,
      "step": 479590
    },
    {
      "epoch": 1.6808784307128313,
      "grad_norm": 2.796875,
      "learning_rate": 2.4428270650144993e-05,
      "loss": 0.8639,
      "step": 479600
    },
    {
      "epoch": 1.6809134782197268,
      "grad_norm": 2.921875,
      "learning_rate": 2.442762162148129e-05,
      "loss": 0.7749,
      "step": 479610
    },
    {
      "epoch": 1.6809485257266226,
      "grad_norm": 3.984375,
      "learning_rate": 2.442697259281759e-05,
      "loss": 0.7693,
      "step": 479620
    },
    {
      "epoch": 1.6809835732335179,
      "grad_norm": 3.28125,
      "learning_rate": 2.4426323564153887e-05,
      "loss": 0.8658,
      "step": 479630
    },
    {
      "epoch": 1.6810186207404136,
      "grad_norm": 3.078125,
      "learning_rate": 2.442567453549019e-05,
      "loss": 0.8914,
      "step": 479640
    },
    {
      "epoch": 1.6810536682473092,
      "grad_norm": 3.015625,
      "learning_rate": 2.4425025506826486e-05,
      "loss": 0.7896,
      "step": 479650
    },
    {
      "epoch": 1.6810887157542047,
      "grad_norm": 3.109375,
      "learning_rate": 2.4424376478162784e-05,
      "loss": 0.8261,
      "step": 479660
    },
    {
      "epoch": 1.6811237632611005,
      "grad_norm": 2.78125,
      "learning_rate": 2.4423727449499082e-05,
      "loss": 0.7491,
      "step": 479670
    },
    {
      "epoch": 1.681158810767996,
      "grad_norm": 2.953125,
      "learning_rate": 2.442307842083538e-05,
      "loss": 0.7594,
      "step": 479680
    },
    {
      "epoch": 1.6811938582748915,
      "grad_norm": 2.375,
      "learning_rate": 2.442242939217168e-05,
      "loss": 0.8951,
      "step": 479690
    },
    {
      "epoch": 1.6812289057817873,
      "grad_norm": 3.28125,
      "learning_rate": 2.4421780363507976e-05,
      "loss": 0.8292,
      "step": 479700
    },
    {
      "epoch": 1.6812639532886828,
      "grad_norm": 3.328125,
      "learning_rate": 2.4421131334844274e-05,
      "loss": 0.8569,
      "step": 479710
    },
    {
      "epoch": 1.6812990007955784,
      "grad_norm": 2.859375,
      "learning_rate": 2.4420482306180572e-05,
      "loss": 0.8378,
      "step": 479720
    },
    {
      "epoch": 1.6813340483024741,
      "grad_norm": 2.96875,
      "learning_rate": 2.4419833277516867e-05,
      "loss": 0.8956,
      "step": 479730
    },
    {
      "epoch": 1.6813690958093694,
      "grad_norm": 2.828125,
      "learning_rate": 2.4419184248853165e-05,
      "loss": 0.7847,
      "step": 479740
    },
    {
      "epoch": 1.6814041433162652,
      "grad_norm": 3.078125,
      "learning_rate": 2.4418535220189463e-05,
      "loss": 0.8441,
      "step": 479750
    },
    {
      "epoch": 1.681439190823161,
      "grad_norm": 3.546875,
      "learning_rate": 2.4417886191525764e-05,
      "loss": 0.7712,
      "step": 479760
    },
    {
      "epoch": 1.6814742383300563,
      "grad_norm": 2.90625,
      "learning_rate": 2.4417237162862062e-05,
      "loss": 0.9144,
      "step": 479770
    },
    {
      "epoch": 1.681509285836952,
      "grad_norm": 2.671875,
      "learning_rate": 2.441658813419836e-05,
      "loss": 0.8483,
      "step": 479780
    },
    {
      "epoch": 1.6815443333438476,
      "grad_norm": 2.90625,
      "learning_rate": 2.441593910553466e-05,
      "loss": 0.8171,
      "step": 479790
    },
    {
      "epoch": 1.681579380850743,
      "grad_norm": 3.09375,
      "learning_rate": 2.4415290076870956e-05,
      "loss": 0.8243,
      "step": 479800
    },
    {
      "epoch": 1.6816144283576389,
      "grad_norm": 2.359375,
      "learning_rate": 2.4414641048207254e-05,
      "loss": 0.8079,
      "step": 479810
    },
    {
      "epoch": 1.6816494758645344,
      "grad_norm": 2.609375,
      "learning_rate": 2.4413992019543552e-05,
      "loss": 0.846,
      "step": 479820
    },
    {
      "epoch": 1.68168452337143,
      "grad_norm": 3.390625,
      "learning_rate": 2.441334299087985e-05,
      "loss": 0.8443,
      "step": 479830
    },
    {
      "epoch": 1.6817195708783257,
      "grad_norm": 3.015625,
      "learning_rate": 2.441269396221615e-05,
      "loss": 0.8463,
      "step": 479840
    },
    {
      "epoch": 1.6817546183852212,
      "grad_norm": 2.78125,
      "learning_rate": 2.4412044933552446e-05,
      "loss": 0.8375,
      "step": 479850
    },
    {
      "epoch": 1.6817896658921168,
      "grad_norm": 2.890625,
      "learning_rate": 2.4411395904888744e-05,
      "loss": 0.8355,
      "step": 479860
    },
    {
      "epoch": 1.6818247133990125,
      "grad_norm": 3.109375,
      "learning_rate": 2.4410746876225042e-05,
      "loss": 0.8155,
      "step": 479870
    },
    {
      "epoch": 1.6818597609059078,
      "grad_norm": 2.5625,
      "learning_rate": 2.441009784756134e-05,
      "loss": 0.8115,
      "step": 479880
    },
    {
      "epoch": 1.6818948084128036,
      "grad_norm": 3.15625,
      "learning_rate": 2.440944881889764e-05,
      "loss": 0.85,
      "step": 479890
    },
    {
      "epoch": 1.6819298559196991,
      "grad_norm": 2.828125,
      "learning_rate": 2.440879979023394e-05,
      "loss": 0.7312,
      "step": 479900
    },
    {
      "epoch": 1.6819649034265947,
      "grad_norm": 3.125,
      "learning_rate": 2.4408150761570238e-05,
      "loss": 0.839,
      "step": 479910
    },
    {
      "epoch": 1.6819999509334904,
      "grad_norm": 2.765625,
      "learning_rate": 2.4407501732906532e-05,
      "loss": 0.8232,
      "step": 479920
    },
    {
      "epoch": 1.682034998440386,
      "grad_norm": 2.46875,
      "learning_rate": 2.440685270424283e-05,
      "loss": 0.8531,
      "step": 479930
    },
    {
      "epoch": 1.6820700459472815,
      "grad_norm": 2.78125,
      "learning_rate": 2.440620367557913e-05,
      "loss": 0.7838,
      "step": 479940
    },
    {
      "epoch": 1.6821050934541772,
      "grad_norm": 3.53125,
      "learning_rate": 2.4405554646915426e-05,
      "loss": 0.8209,
      "step": 479950
    },
    {
      "epoch": 1.6821401409610728,
      "grad_norm": 3.171875,
      "learning_rate": 2.4404905618251724e-05,
      "loss": 0.7937,
      "step": 479960
    },
    {
      "epoch": 1.6821751884679683,
      "grad_norm": 2.828125,
      "learning_rate": 2.4404256589588022e-05,
      "loss": 0.8628,
      "step": 479970
    },
    {
      "epoch": 1.682210235974864,
      "grad_norm": 2.734375,
      "learning_rate": 2.440360756092432e-05,
      "loss": 0.8543,
      "step": 479980
    },
    {
      "epoch": 1.6822452834817594,
      "grad_norm": 3.03125,
      "learning_rate": 2.440295853226062e-05,
      "loss": 0.8502,
      "step": 479990
    },
    {
      "epoch": 1.6822803309886551,
      "grad_norm": 3.234375,
      "learning_rate": 2.4402309503596916e-05,
      "loss": 0.8454,
      "step": 480000
    },
    {
      "epoch": 1.6822803309886551,
      "eval_loss": 0.7817250490188599,
      "eval_runtime": 562.4734,
      "eval_samples_per_second": 676.363,
      "eval_steps_per_second": 56.364,
      "step": 480000
    },
    {
      "epoch": 1.6823153784955507,
      "grad_norm": 2.375,
      "learning_rate": 2.4401660474933218e-05,
      "loss": 0.8208,
      "step": 480010
    },
    {
      "epoch": 1.6823504260024462,
      "grad_norm": 2.65625,
      "learning_rate": 2.4401011446269516e-05,
      "loss": 0.8068,
      "step": 480020
    },
    {
      "epoch": 1.682385473509342,
      "grad_norm": 2.734375,
      "learning_rate": 2.4400362417605814e-05,
      "loss": 0.8079,
      "step": 480030
    },
    {
      "epoch": 1.6824205210162375,
      "grad_norm": 3.046875,
      "learning_rate": 2.4399713388942112e-05,
      "loss": 0.8469,
      "step": 480040
    },
    {
      "epoch": 1.682455568523133,
      "grad_norm": 3.234375,
      "learning_rate": 2.439906436027841e-05,
      "loss": 0.7392,
      "step": 480050
    },
    {
      "epoch": 1.6824906160300288,
      "grad_norm": 2.8125,
      "learning_rate": 2.4398415331614708e-05,
      "loss": 0.8314,
      "step": 480060
    },
    {
      "epoch": 1.6825256635369243,
      "grad_norm": 3.109375,
      "learning_rate": 2.4397766302951006e-05,
      "loss": 0.7718,
      "step": 480070
    },
    {
      "epoch": 1.6825607110438199,
      "grad_norm": 2.59375,
      "learning_rate": 2.4397117274287304e-05,
      "loss": 0.7553,
      "step": 480080
    },
    {
      "epoch": 1.6825957585507156,
      "grad_norm": 2.9375,
      "learning_rate": 2.4396468245623602e-05,
      "loss": 0.8178,
      "step": 480090
    },
    {
      "epoch": 1.682630806057611,
      "grad_norm": 3.09375,
      "learning_rate": 2.43958192169599e-05,
      "loss": 0.8772,
      "step": 480100
    },
    {
      "epoch": 1.6826658535645067,
      "grad_norm": 2.625,
      "learning_rate": 2.4395170188296194e-05,
      "loss": 0.8932,
      "step": 480110
    },
    {
      "epoch": 1.6827009010714022,
      "grad_norm": 3.09375,
      "learning_rate": 2.4394521159632496e-05,
      "loss": 0.8309,
      "step": 480120
    },
    {
      "epoch": 1.6827359485782978,
      "grad_norm": 3.25,
      "learning_rate": 2.4393872130968794e-05,
      "loss": 0.8733,
      "step": 480130
    },
    {
      "epoch": 1.6827709960851935,
      "grad_norm": 2.625,
      "learning_rate": 2.4393223102305092e-05,
      "loss": 0.8114,
      "step": 480140
    },
    {
      "epoch": 1.682806043592089,
      "grad_norm": 3.109375,
      "learning_rate": 2.439257407364139e-05,
      "loss": 0.8177,
      "step": 480150
    },
    {
      "epoch": 1.6828410910989846,
      "grad_norm": 3.1875,
      "learning_rate": 2.4391925044977688e-05,
      "loss": 0.8271,
      "step": 480160
    },
    {
      "epoch": 1.6828761386058804,
      "grad_norm": 2.578125,
      "learning_rate": 2.4391276016313986e-05,
      "loss": 0.7301,
      "step": 480170
    },
    {
      "epoch": 1.682911186112776,
      "grad_norm": 2.703125,
      "learning_rate": 2.4390626987650284e-05,
      "loss": 0.7466,
      "step": 480180
    },
    {
      "epoch": 1.6829462336196714,
      "grad_norm": 2.875,
      "learning_rate": 2.4389977958986582e-05,
      "loss": 0.9008,
      "step": 480190
    },
    {
      "epoch": 1.6829812811265672,
      "grad_norm": 2.71875,
      "learning_rate": 2.438932893032288e-05,
      "loss": 0.8428,
      "step": 480200
    },
    {
      "epoch": 1.6830163286334625,
      "grad_norm": 2.4375,
      "learning_rate": 2.4388679901659178e-05,
      "loss": 0.8459,
      "step": 480210
    },
    {
      "epoch": 1.6830513761403583,
      "grad_norm": 3.890625,
      "learning_rate": 2.4388030872995476e-05,
      "loss": 0.7989,
      "step": 480220
    },
    {
      "epoch": 1.6830864236472538,
      "grad_norm": 2.828125,
      "learning_rate": 2.4387381844331774e-05,
      "loss": 0.8615,
      "step": 480230
    },
    {
      "epoch": 1.6831214711541493,
      "grad_norm": 2.453125,
      "learning_rate": 2.4386732815668072e-05,
      "loss": 0.8511,
      "step": 480240
    },
    {
      "epoch": 1.683156518661045,
      "grad_norm": 3.21875,
      "learning_rate": 2.438608378700437e-05,
      "loss": 0.7886,
      "step": 480250
    },
    {
      "epoch": 1.6831915661679406,
      "grad_norm": 2.296875,
      "learning_rate": 2.438543475834067e-05,
      "loss": 0.8363,
      "step": 480260
    },
    {
      "epoch": 1.6832266136748362,
      "grad_norm": 2.953125,
      "learning_rate": 2.438478572967697e-05,
      "loss": 0.8241,
      "step": 480270
    },
    {
      "epoch": 1.683261661181732,
      "grad_norm": 2.84375,
      "learning_rate": 2.4384136701013267e-05,
      "loss": 0.8578,
      "step": 480280
    },
    {
      "epoch": 1.6832967086886275,
      "grad_norm": 2.9375,
      "learning_rate": 2.4383487672349562e-05,
      "loss": 0.8954,
      "step": 480290
    },
    {
      "epoch": 1.683331756195523,
      "grad_norm": 3.109375,
      "learning_rate": 2.438283864368586e-05,
      "loss": 0.7849,
      "step": 480300
    },
    {
      "epoch": 1.6833668037024188,
      "grad_norm": 2.578125,
      "learning_rate": 2.4382189615022158e-05,
      "loss": 0.9085,
      "step": 480310
    },
    {
      "epoch": 1.683401851209314,
      "grad_norm": 2.8125,
      "learning_rate": 2.4381540586358456e-05,
      "loss": 0.7927,
      "step": 480320
    },
    {
      "epoch": 1.6834368987162098,
      "grad_norm": 2.640625,
      "learning_rate": 2.4380891557694754e-05,
      "loss": 0.7872,
      "step": 480330
    },
    {
      "epoch": 1.6834719462231054,
      "grad_norm": 2.828125,
      "learning_rate": 2.4380242529031052e-05,
      "loss": 0.8403,
      "step": 480340
    },
    {
      "epoch": 1.683506993730001,
      "grad_norm": 2.90625,
      "learning_rate": 2.437959350036735e-05,
      "loss": 0.868,
      "step": 480350
    },
    {
      "epoch": 1.6835420412368967,
      "grad_norm": 3.03125,
      "learning_rate": 2.4378944471703648e-05,
      "loss": 0.8897,
      "step": 480360
    },
    {
      "epoch": 1.6835770887437922,
      "grad_norm": 2.84375,
      "learning_rate": 2.4378295443039946e-05,
      "loss": 0.8037,
      "step": 480370
    },
    {
      "epoch": 1.6836121362506877,
      "grad_norm": 2.828125,
      "learning_rate": 2.4377646414376247e-05,
      "loss": 0.9022,
      "step": 480380
    },
    {
      "epoch": 1.6836471837575835,
      "grad_norm": 2.84375,
      "learning_rate": 2.4376997385712545e-05,
      "loss": 0.8576,
      "step": 480390
    },
    {
      "epoch": 1.683682231264479,
      "grad_norm": 2.859375,
      "learning_rate": 2.4376348357048843e-05,
      "loss": 0.7971,
      "step": 480400
    },
    {
      "epoch": 1.6837172787713746,
      "grad_norm": 3.078125,
      "learning_rate": 2.437569932838514e-05,
      "loss": 0.8474,
      "step": 480410
    },
    {
      "epoch": 1.6837523262782703,
      "grad_norm": 2.953125,
      "learning_rate": 2.437505029972144e-05,
      "loss": 0.8622,
      "step": 480420
    },
    {
      "epoch": 1.6837873737851656,
      "grad_norm": 3.28125,
      "learning_rate": 2.4374401271057737e-05,
      "loss": 0.8112,
      "step": 480430
    },
    {
      "epoch": 1.6838224212920614,
      "grad_norm": 2.859375,
      "learning_rate": 2.4373752242394035e-05,
      "loss": 0.7695,
      "step": 480440
    },
    {
      "epoch": 1.6838574687989571,
      "grad_norm": 3.03125,
      "learning_rate": 2.4373103213730333e-05,
      "loss": 0.7723,
      "step": 480450
    },
    {
      "epoch": 1.6838925163058525,
      "grad_norm": 3.0,
      "learning_rate": 2.437245418506663e-05,
      "loss": 0.8207,
      "step": 480460
    },
    {
      "epoch": 1.6839275638127482,
      "grad_norm": 2.34375,
      "learning_rate": 2.437180515640293e-05,
      "loss": 0.7925,
      "step": 480470
    },
    {
      "epoch": 1.6839626113196438,
      "grad_norm": 2.78125,
      "learning_rate": 2.4371156127739224e-05,
      "loss": 0.7922,
      "step": 480480
    },
    {
      "epoch": 1.6839976588265393,
      "grad_norm": 3.421875,
      "learning_rate": 2.4370507099075525e-05,
      "loss": 0.8309,
      "step": 480490
    },
    {
      "epoch": 1.684032706333435,
      "grad_norm": 2.796875,
      "learning_rate": 2.4369858070411823e-05,
      "loss": 0.82,
      "step": 480500
    },
    {
      "epoch": 1.6840677538403306,
      "grad_norm": 2.9375,
      "learning_rate": 2.436920904174812e-05,
      "loss": 0.8091,
      "step": 480510
    },
    {
      "epoch": 1.6841028013472261,
      "grad_norm": 2.484375,
      "learning_rate": 2.436856001308442e-05,
      "loss": 0.7702,
      "step": 480520
    },
    {
      "epoch": 1.6841378488541219,
      "grad_norm": 2.671875,
      "learning_rate": 2.4367910984420717e-05,
      "loss": 0.7899,
      "step": 480530
    },
    {
      "epoch": 1.6841728963610174,
      "grad_norm": 2.8125,
      "learning_rate": 2.4367261955757015e-05,
      "loss": 0.8197,
      "step": 480540
    },
    {
      "epoch": 1.684207943867913,
      "grad_norm": 2.671875,
      "learning_rate": 2.4366612927093313e-05,
      "loss": 0.7903,
      "step": 480550
    },
    {
      "epoch": 1.6842429913748087,
      "grad_norm": 2.78125,
      "learning_rate": 2.436596389842961e-05,
      "loss": 0.8382,
      "step": 480560
    },
    {
      "epoch": 1.684278038881704,
      "grad_norm": 3.234375,
      "learning_rate": 2.436531486976591e-05,
      "loss": 0.8127,
      "step": 480570
    },
    {
      "epoch": 1.6843130863885998,
      "grad_norm": 2.671875,
      "learning_rate": 2.4364665841102207e-05,
      "loss": 0.8098,
      "step": 480580
    },
    {
      "epoch": 1.6843481338954953,
      "grad_norm": 2.859375,
      "learning_rate": 2.4364016812438505e-05,
      "loss": 0.7778,
      "step": 480590
    },
    {
      "epoch": 1.6843831814023908,
      "grad_norm": 3.015625,
      "learning_rate": 2.4363367783774803e-05,
      "loss": 0.834,
      "step": 480600
    },
    {
      "epoch": 1.6844182289092866,
      "grad_norm": 2.875,
      "learning_rate": 2.43627187551111e-05,
      "loss": 0.8249,
      "step": 480610
    },
    {
      "epoch": 1.6844532764161821,
      "grad_norm": 2.671875,
      "learning_rate": 2.43620697264474e-05,
      "loss": 0.8815,
      "step": 480620
    },
    {
      "epoch": 1.6844883239230777,
      "grad_norm": 3.15625,
      "learning_rate": 2.43614206977837e-05,
      "loss": 0.8754,
      "step": 480630
    },
    {
      "epoch": 1.6845233714299734,
      "grad_norm": 3.03125,
      "learning_rate": 2.436077166912e-05,
      "loss": 0.8844,
      "step": 480640
    },
    {
      "epoch": 1.684558418936869,
      "grad_norm": 3.03125,
      "learning_rate": 2.4360122640456297e-05,
      "loss": 0.8437,
      "step": 480650
    },
    {
      "epoch": 1.6845934664437645,
      "grad_norm": 3.03125,
      "learning_rate": 2.4359473611792595e-05,
      "loss": 0.7591,
      "step": 480660
    },
    {
      "epoch": 1.6846285139506603,
      "grad_norm": 2.65625,
      "learning_rate": 2.435882458312889e-05,
      "loss": 0.781,
      "step": 480670
    },
    {
      "epoch": 1.6846635614575556,
      "grad_norm": 2.875,
      "learning_rate": 2.4358175554465187e-05,
      "loss": 0.859,
      "step": 480680
    },
    {
      "epoch": 1.6846986089644513,
      "grad_norm": 2.78125,
      "learning_rate": 2.4357526525801485e-05,
      "loss": 0.8181,
      "step": 480690
    },
    {
      "epoch": 1.6847336564713469,
      "grad_norm": 2.828125,
      "learning_rate": 2.4356877497137783e-05,
      "loss": 0.8047,
      "step": 480700
    },
    {
      "epoch": 1.6847687039782424,
      "grad_norm": 3.515625,
      "learning_rate": 2.435622846847408e-05,
      "loss": 0.9184,
      "step": 480710
    },
    {
      "epoch": 1.6848037514851382,
      "grad_norm": 3.140625,
      "learning_rate": 2.435557943981038e-05,
      "loss": 0.7799,
      "step": 480720
    },
    {
      "epoch": 1.6848387989920337,
      "grad_norm": 2.578125,
      "learning_rate": 2.4354930411146677e-05,
      "loss": 0.8215,
      "step": 480730
    },
    {
      "epoch": 1.6848738464989292,
      "grad_norm": 2.875,
      "learning_rate": 2.435428138248298e-05,
      "loss": 0.7912,
      "step": 480740
    },
    {
      "epoch": 1.684908894005825,
      "grad_norm": 3.21875,
      "learning_rate": 2.4353632353819277e-05,
      "loss": 0.7553,
      "step": 480750
    },
    {
      "epoch": 1.6849439415127205,
      "grad_norm": 3.0625,
      "learning_rate": 2.4352983325155575e-05,
      "loss": 0.8515,
      "step": 480760
    },
    {
      "epoch": 1.684978989019616,
      "grad_norm": 3.1875,
      "learning_rate": 2.4352334296491873e-05,
      "loss": 0.7968,
      "step": 480770
    },
    {
      "epoch": 1.6850140365265118,
      "grad_norm": 3.078125,
      "learning_rate": 2.435168526782817e-05,
      "loss": 0.916,
      "step": 480780
    },
    {
      "epoch": 1.6850490840334071,
      "grad_norm": 2.734375,
      "learning_rate": 2.435103623916447e-05,
      "loss": 0.8488,
      "step": 480790
    },
    {
      "epoch": 1.685084131540303,
      "grad_norm": 2.765625,
      "learning_rate": 2.4350387210500767e-05,
      "loss": 0.8577,
      "step": 480800
    },
    {
      "epoch": 1.6851191790471984,
      "grad_norm": 3.109375,
      "learning_rate": 2.4349738181837065e-05,
      "loss": 0.7797,
      "step": 480810
    },
    {
      "epoch": 1.685154226554094,
      "grad_norm": 3.03125,
      "learning_rate": 2.4349089153173363e-05,
      "loss": 0.818,
      "step": 480820
    },
    {
      "epoch": 1.6851892740609897,
      "grad_norm": 2.4375,
      "learning_rate": 2.434844012450966e-05,
      "loss": 0.8123,
      "step": 480830
    },
    {
      "epoch": 1.6852243215678853,
      "grad_norm": 2.8125,
      "learning_rate": 2.434779109584596e-05,
      "loss": 0.8665,
      "step": 480840
    },
    {
      "epoch": 1.6852593690747808,
      "grad_norm": 2.328125,
      "learning_rate": 2.4347142067182257e-05,
      "loss": 0.8603,
      "step": 480850
    },
    {
      "epoch": 1.6852944165816766,
      "grad_norm": 2.984375,
      "learning_rate": 2.4346493038518555e-05,
      "loss": 0.8459,
      "step": 480860
    },
    {
      "epoch": 1.685329464088572,
      "grad_norm": 2.984375,
      "learning_rate": 2.4345844009854853e-05,
      "loss": 0.8415,
      "step": 480870
    },
    {
      "epoch": 1.6853645115954676,
      "grad_norm": 3.125,
      "learning_rate": 2.434519498119115e-05,
      "loss": 0.8701,
      "step": 480880
    },
    {
      "epoch": 1.6853995591023634,
      "grad_norm": 2.8125,
      "learning_rate": 2.434454595252745e-05,
      "loss": 0.9127,
      "step": 480890
    },
    {
      "epoch": 1.6854346066092587,
      "grad_norm": 2.953125,
      "learning_rate": 2.4343896923863747e-05,
      "loss": 0.8411,
      "step": 480900
    },
    {
      "epoch": 1.6854696541161545,
      "grad_norm": 3.140625,
      "learning_rate": 2.4343247895200045e-05,
      "loss": 0.8613,
      "step": 480910
    },
    {
      "epoch": 1.68550470162305,
      "grad_norm": 2.734375,
      "learning_rate": 2.4342598866536343e-05,
      "loss": 0.8219,
      "step": 480920
    },
    {
      "epoch": 1.6855397491299455,
      "grad_norm": 2.9375,
      "learning_rate": 2.434194983787264e-05,
      "loss": 0.7931,
      "step": 480930
    },
    {
      "epoch": 1.6855747966368413,
      "grad_norm": 2.984375,
      "learning_rate": 2.434130080920894e-05,
      "loss": 0.8232,
      "step": 480940
    },
    {
      "epoch": 1.6856098441437368,
      "grad_norm": 3.0,
      "learning_rate": 2.4340651780545237e-05,
      "loss": 0.814,
      "step": 480950
    },
    {
      "epoch": 1.6856448916506324,
      "grad_norm": 2.875,
      "learning_rate": 2.4340002751881535e-05,
      "loss": 0.8941,
      "step": 480960
    },
    {
      "epoch": 1.6856799391575281,
      "grad_norm": 2.78125,
      "learning_rate": 2.4339353723217833e-05,
      "loss": 0.7878,
      "step": 480970
    },
    {
      "epoch": 1.6857149866644237,
      "grad_norm": 3.328125,
      "learning_rate": 2.433870469455413e-05,
      "loss": 0.8673,
      "step": 480980
    },
    {
      "epoch": 1.6857500341713192,
      "grad_norm": 2.90625,
      "learning_rate": 2.433805566589043e-05,
      "loss": 0.8649,
      "step": 480990
    },
    {
      "epoch": 1.685785081678215,
      "grad_norm": 3.15625,
      "learning_rate": 2.433740663722673e-05,
      "loss": 0.8316,
      "step": 481000
    },
    {
      "epoch": 1.6858201291851103,
      "grad_norm": 3.03125,
      "learning_rate": 2.4336757608563028e-05,
      "loss": 0.8353,
      "step": 481010
    },
    {
      "epoch": 1.685855176692006,
      "grad_norm": 3.15625,
      "learning_rate": 2.4336108579899326e-05,
      "loss": 0.8372,
      "step": 481020
    },
    {
      "epoch": 1.6858902241989018,
      "grad_norm": 2.765625,
      "learning_rate": 2.4335459551235624e-05,
      "loss": 0.85,
      "step": 481030
    },
    {
      "epoch": 1.685925271705797,
      "grad_norm": 2.390625,
      "learning_rate": 2.4334810522571922e-05,
      "loss": 0.7666,
      "step": 481040
    },
    {
      "epoch": 1.6859603192126928,
      "grad_norm": 2.734375,
      "learning_rate": 2.4334161493908217e-05,
      "loss": 0.8683,
      "step": 481050
    },
    {
      "epoch": 1.6859953667195884,
      "grad_norm": 2.6875,
      "learning_rate": 2.4333512465244515e-05,
      "loss": 0.8879,
      "step": 481060
    },
    {
      "epoch": 1.686030414226484,
      "grad_norm": 3.171875,
      "learning_rate": 2.4332863436580813e-05,
      "loss": 0.8272,
      "step": 481070
    },
    {
      "epoch": 1.6860654617333797,
      "grad_norm": 3.03125,
      "learning_rate": 2.433221440791711e-05,
      "loss": 0.8173,
      "step": 481080
    },
    {
      "epoch": 1.6861005092402752,
      "grad_norm": 3.34375,
      "learning_rate": 2.433156537925341e-05,
      "loss": 0.8631,
      "step": 481090
    },
    {
      "epoch": 1.6861355567471707,
      "grad_norm": 3.171875,
      "learning_rate": 2.4330916350589707e-05,
      "loss": 0.906,
      "step": 481100
    },
    {
      "epoch": 1.6861706042540665,
      "grad_norm": 2.84375,
      "learning_rate": 2.4330267321926008e-05,
      "loss": 0.8823,
      "step": 481110
    },
    {
      "epoch": 1.6862056517609618,
      "grad_norm": 3.0625,
      "learning_rate": 2.4329618293262306e-05,
      "loss": 0.8411,
      "step": 481120
    },
    {
      "epoch": 1.6862406992678576,
      "grad_norm": 3.046875,
      "learning_rate": 2.4328969264598604e-05,
      "loss": 0.8562,
      "step": 481130
    },
    {
      "epoch": 1.6862757467747533,
      "grad_norm": 3.03125,
      "learning_rate": 2.4328320235934902e-05,
      "loss": 0.8912,
      "step": 481140
    },
    {
      "epoch": 1.6863107942816487,
      "grad_norm": 2.59375,
      "learning_rate": 2.43276712072712e-05,
      "loss": 0.8266,
      "step": 481150
    },
    {
      "epoch": 1.6863458417885444,
      "grad_norm": 2.90625,
      "learning_rate": 2.4327022178607498e-05,
      "loss": 0.8326,
      "step": 481160
    },
    {
      "epoch": 1.68638088929544,
      "grad_norm": 2.96875,
      "learning_rate": 2.4326373149943796e-05,
      "loss": 0.8736,
      "step": 481170
    },
    {
      "epoch": 1.6864159368023355,
      "grad_norm": 2.9375,
      "learning_rate": 2.4325724121280094e-05,
      "loss": 0.8791,
      "step": 481180
    },
    {
      "epoch": 1.6864509843092312,
      "grad_norm": 2.625,
      "learning_rate": 2.4325075092616392e-05,
      "loss": 0.7665,
      "step": 481190
    },
    {
      "epoch": 1.6864860318161268,
      "grad_norm": 3.546875,
      "learning_rate": 2.432442606395269e-05,
      "loss": 0.8949,
      "step": 481200
    },
    {
      "epoch": 1.6865210793230223,
      "grad_norm": 2.984375,
      "learning_rate": 2.4323777035288988e-05,
      "loss": 0.815,
      "step": 481210
    },
    {
      "epoch": 1.686556126829918,
      "grad_norm": 2.8125,
      "learning_rate": 2.4323128006625286e-05,
      "loss": 0.8076,
      "step": 481220
    },
    {
      "epoch": 1.6865911743368136,
      "grad_norm": 3.125,
      "learning_rate": 2.4322478977961584e-05,
      "loss": 0.8389,
      "step": 481230
    },
    {
      "epoch": 1.6866262218437091,
      "grad_norm": 2.953125,
      "learning_rate": 2.4321829949297882e-05,
      "loss": 0.7439,
      "step": 481240
    },
    {
      "epoch": 1.686661269350605,
      "grad_norm": 3.609375,
      "learning_rate": 2.432118092063418e-05,
      "loss": 0.874,
      "step": 481250
    },
    {
      "epoch": 1.6866963168575002,
      "grad_norm": 2.59375,
      "learning_rate": 2.4320531891970478e-05,
      "loss": 0.8029,
      "step": 481260
    },
    {
      "epoch": 1.686731364364396,
      "grad_norm": 2.9375,
      "learning_rate": 2.4319882863306776e-05,
      "loss": 0.8697,
      "step": 481270
    },
    {
      "epoch": 1.6867664118712915,
      "grad_norm": 2.96875,
      "learning_rate": 2.4319233834643074e-05,
      "loss": 0.8601,
      "step": 481280
    },
    {
      "epoch": 1.686801459378187,
      "grad_norm": 3.109375,
      "learning_rate": 2.4318584805979372e-05,
      "loss": 0.8346,
      "step": 481290
    },
    {
      "epoch": 1.6868365068850828,
      "grad_norm": 2.96875,
      "learning_rate": 2.431793577731567e-05,
      "loss": 0.7932,
      "step": 481300
    },
    {
      "epoch": 1.6868715543919783,
      "grad_norm": 3.078125,
      "learning_rate": 2.4317286748651968e-05,
      "loss": 0.8562,
      "step": 481310
    },
    {
      "epoch": 1.6869066018988739,
      "grad_norm": 2.671875,
      "learning_rate": 2.4316637719988266e-05,
      "loss": 0.8184,
      "step": 481320
    },
    {
      "epoch": 1.6869416494057696,
      "grad_norm": 2.84375,
      "learning_rate": 2.4315988691324564e-05,
      "loss": 0.7912,
      "step": 481330
    },
    {
      "epoch": 1.6869766969126652,
      "grad_norm": 2.953125,
      "learning_rate": 2.4315339662660862e-05,
      "loss": 0.8564,
      "step": 481340
    },
    {
      "epoch": 1.6870117444195607,
      "grad_norm": 3.375,
      "learning_rate": 2.431469063399716e-05,
      "loss": 0.7929,
      "step": 481350
    },
    {
      "epoch": 1.6870467919264565,
      "grad_norm": 2.390625,
      "learning_rate": 2.431404160533346e-05,
      "loss": 0.8025,
      "step": 481360
    },
    {
      "epoch": 1.6870818394333518,
      "grad_norm": 2.875,
      "learning_rate": 2.431339257666976e-05,
      "loss": 0.8203,
      "step": 481370
    },
    {
      "epoch": 1.6871168869402475,
      "grad_norm": 2.90625,
      "learning_rate": 2.4312743548006057e-05,
      "loss": 0.8173,
      "step": 481380
    },
    {
      "epoch": 1.687151934447143,
      "grad_norm": 2.625,
      "learning_rate": 2.4312094519342355e-05,
      "loss": 0.8124,
      "step": 481390
    },
    {
      "epoch": 1.6871869819540386,
      "grad_norm": 3.0625,
      "learning_rate": 2.4311445490678653e-05,
      "loss": 0.8681,
      "step": 481400
    },
    {
      "epoch": 1.6872220294609344,
      "grad_norm": 3.046875,
      "learning_rate": 2.431079646201495e-05,
      "loss": 0.8842,
      "step": 481410
    },
    {
      "epoch": 1.68725707696783,
      "grad_norm": 3.25,
      "learning_rate": 2.4310147433351246e-05,
      "loss": 0.8815,
      "step": 481420
    },
    {
      "epoch": 1.6872921244747254,
      "grad_norm": 2.71875,
      "learning_rate": 2.4309498404687544e-05,
      "loss": 0.8116,
      "step": 481430
    },
    {
      "epoch": 1.6873271719816212,
      "grad_norm": 2.84375,
      "learning_rate": 2.4308849376023842e-05,
      "loss": 0.8006,
      "step": 481440
    },
    {
      "epoch": 1.6873622194885167,
      "grad_norm": 2.921875,
      "learning_rate": 2.430820034736014e-05,
      "loss": 0.8295,
      "step": 481450
    },
    {
      "epoch": 1.6873972669954123,
      "grad_norm": 2.6875,
      "learning_rate": 2.4307551318696438e-05,
      "loss": 0.7924,
      "step": 481460
    },
    {
      "epoch": 1.687432314502308,
      "grad_norm": 3.046875,
      "learning_rate": 2.4306902290032736e-05,
      "loss": 0.8454,
      "step": 481470
    },
    {
      "epoch": 1.6874673620092033,
      "grad_norm": 2.8125,
      "learning_rate": 2.4306253261369037e-05,
      "loss": 0.8435,
      "step": 481480
    },
    {
      "epoch": 1.687502409516099,
      "grad_norm": 2.828125,
      "learning_rate": 2.4305604232705335e-05,
      "loss": 0.8046,
      "step": 481490
    },
    {
      "epoch": 1.6875374570229946,
      "grad_norm": 3.21875,
      "learning_rate": 2.4304955204041633e-05,
      "loss": 0.8961,
      "step": 481500
    },
    {
      "epoch": 1.6875725045298902,
      "grad_norm": 2.65625,
      "learning_rate": 2.430430617537793e-05,
      "loss": 0.7836,
      "step": 481510
    },
    {
      "epoch": 1.687607552036786,
      "grad_norm": 2.859375,
      "learning_rate": 2.430365714671423e-05,
      "loss": 0.8531,
      "step": 481520
    },
    {
      "epoch": 1.6876425995436815,
      "grad_norm": 2.515625,
      "learning_rate": 2.4303008118050527e-05,
      "loss": 0.7959,
      "step": 481530
    },
    {
      "epoch": 1.687677647050577,
      "grad_norm": 3.1875,
      "learning_rate": 2.4302359089386825e-05,
      "loss": 0.8212,
      "step": 481540
    },
    {
      "epoch": 1.6877126945574727,
      "grad_norm": 3.3125,
      "learning_rate": 2.4301710060723123e-05,
      "loss": 0.8439,
      "step": 481550
    },
    {
      "epoch": 1.6877477420643683,
      "grad_norm": 2.90625,
      "learning_rate": 2.430106103205942e-05,
      "loss": 0.7833,
      "step": 481560
    },
    {
      "epoch": 1.6877827895712638,
      "grad_norm": 3.328125,
      "learning_rate": 2.430041200339572e-05,
      "loss": 0.8534,
      "step": 481570
    },
    {
      "epoch": 1.6878178370781596,
      "grad_norm": 2.921875,
      "learning_rate": 2.4299762974732017e-05,
      "loss": 0.8773,
      "step": 481580
    },
    {
      "epoch": 1.687852884585055,
      "grad_norm": 3.375,
      "learning_rate": 2.4299113946068315e-05,
      "loss": 0.8398,
      "step": 481590
    },
    {
      "epoch": 1.6878879320919506,
      "grad_norm": 2.671875,
      "learning_rate": 2.4298464917404613e-05,
      "loss": 0.8149,
      "step": 481600
    },
    {
      "epoch": 1.6879229795988462,
      "grad_norm": 2.59375,
      "learning_rate": 2.429781588874091e-05,
      "loss": 0.8815,
      "step": 481610
    },
    {
      "epoch": 1.6879580271057417,
      "grad_norm": 3.03125,
      "learning_rate": 2.429716686007721e-05,
      "loss": 0.8279,
      "step": 481620
    },
    {
      "epoch": 1.6879930746126375,
      "grad_norm": 3.015625,
      "learning_rate": 2.4296517831413507e-05,
      "loss": 0.9572,
      "step": 481630
    },
    {
      "epoch": 1.688028122119533,
      "grad_norm": 2.890625,
      "learning_rate": 2.4295868802749805e-05,
      "loss": 0.7962,
      "step": 481640
    },
    {
      "epoch": 1.6880631696264286,
      "grad_norm": 3.125,
      "learning_rate": 2.4295219774086103e-05,
      "loss": 0.9058,
      "step": 481650
    },
    {
      "epoch": 1.6880982171333243,
      "grad_norm": 2.859375,
      "learning_rate": 2.42945707454224e-05,
      "loss": 0.8262,
      "step": 481660
    },
    {
      "epoch": 1.6881332646402198,
      "grad_norm": 3.21875,
      "learning_rate": 2.42939217167587e-05,
      "loss": 0.8646,
      "step": 481670
    },
    {
      "epoch": 1.6881683121471154,
      "grad_norm": 2.484375,
      "learning_rate": 2.4293272688094997e-05,
      "loss": 0.8685,
      "step": 481680
    },
    {
      "epoch": 1.6882033596540111,
      "grad_norm": 2.953125,
      "learning_rate": 2.4292623659431295e-05,
      "loss": 0.8343,
      "step": 481690
    },
    {
      "epoch": 1.6882384071609065,
      "grad_norm": 2.984375,
      "learning_rate": 2.4291974630767593e-05,
      "loss": 0.8312,
      "step": 481700
    },
    {
      "epoch": 1.6882734546678022,
      "grad_norm": 3.421875,
      "learning_rate": 2.429132560210389e-05,
      "loss": 0.9261,
      "step": 481710
    },
    {
      "epoch": 1.688308502174698,
      "grad_norm": 2.75,
      "learning_rate": 2.429067657344019e-05,
      "loss": 0.8436,
      "step": 481720
    },
    {
      "epoch": 1.6883435496815933,
      "grad_norm": 2.9375,
      "learning_rate": 2.429002754477649e-05,
      "loss": 0.8244,
      "step": 481730
    },
    {
      "epoch": 1.688378597188489,
      "grad_norm": 2.6875,
      "learning_rate": 2.428937851611279e-05,
      "loss": 0.7839,
      "step": 481740
    },
    {
      "epoch": 1.6884136446953846,
      "grad_norm": 3.140625,
      "learning_rate": 2.4288729487449087e-05,
      "loss": 0.8774,
      "step": 481750
    },
    {
      "epoch": 1.68844869220228,
      "grad_norm": 2.421875,
      "learning_rate": 2.4288080458785385e-05,
      "loss": 0.8185,
      "step": 481760
    },
    {
      "epoch": 1.6884837397091759,
      "grad_norm": 2.6875,
      "learning_rate": 2.4287431430121683e-05,
      "loss": 0.8393,
      "step": 481770
    },
    {
      "epoch": 1.6885187872160714,
      "grad_norm": 2.984375,
      "learning_rate": 2.428678240145798e-05,
      "loss": 0.915,
      "step": 481780
    },
    {
      "epoch": 1.688553834722967,
      "grad_norm": 2.796875,
      "learning_rate": 2.428613337279428e-05,
      "loss": 0.842,
      "step": 481790
    },
    {
      "epoch": 1.6885888822298627,
      "grad_norm": 2.78125,
      "learning_rate": 2.4285484344130573e-05,
      "loss": 0.8224,
      "step": 481800
    },
    {
      "epoch": 1.688623929736758,
      "grad_norm": 2.953125,
      "learning_rate": 2.428483531546687e-05,
      "loss": 0.8041,
      "step": 481810
    },
    {
      "epoch": 1.6886589772436538,
      "grad_norm": 2.90625,
      "learning_rate": 2.428418628680317e-05,
      "loss": 0.8501,
      "step": 481820
    },
    {
      "epoch": 1.6886940247505495,
      "grad_norm": 3.390625,
      "learning_rate": 2.4283537258139467e-05,
      "loss": 0.8998,
      "step": 481830
    },
    {
      "epoch": 1.6887290722574448,
      "grad_norm": 3.328125,
      "learning_rate": 2.428288822947577e-05,
      "loss": 0.8433,
      "step": 481840
    },
    {
      "epoch": 1.6887641197643406,
      "grad_norm": 2.78125,
      "learning_rate": 2.4282239200812067e-05,
      "loss": 0.8252,
      "step": 481850
    },
    {
      "epoch": 1.6887991672712361,
      "grad_norm": 2.59375,
      "learning_rate": 2.4281590172148365e-05,
      "loss": 0.7605,
      "step": 481860
    },
    {
      "epoch": 1.6888342147781317,
      "grad_norm": 2.859375,
      "learning_rate": 2.4280941143484663e-05,
      "loss": 0.8493,
      "step": 481870
    },
    {
      "epoch": 1.6888692622850274,
      "grad_norm": 2.5625,
      "learning_rate": 2.428029211482096e-05,
      "loss": 0.7913,
      "step": 481880
    },
    {
      "epoch": 1.688904309791923,
      "grad_norm": 2.59375,
      "learning_rate": 2.427964308615726e-05,
      "loss": 0.9023,
      "step": 481890
    },
    {
      "epoch": 1.6889393572988185,
      "grad_norm": 2.859375,
      "learning_rate": 2.4278994057493557e-05,
      "loss": 0.9358,
      "step": 481900
    },
    {
      "epoch": 1.6889744048057143,
      "grad_norm": 2.75,
      "learning_rate": 2.4278345028829855e-05,
      "loss": 0.8443,
      "step": 481910
    },
    {
      "epoch": 1.6890094523126098,
      "grad_norm": 2.703125,
      "learning_rate": 2.4277696000166153e-05,
      "loss": 0.7261,
      "step": 481920
    },
    {
      "epoch": 1.6890444998195053,
      "grad_norm": 3.40625,
      "learning_rate": 2.427704697150245e-05,
      "loss": 0.8704,
      "step": 481930
    },
    {
      "epoch": 1.689079547326401,
      "grad_norm": 2.71875,
      "learning_rate": 2.427639794283875e-05,
      "loss": 0.8631,
      "step": 481940
    },
    {
      "epoch": 1.6891145948332964,
      "grad_norm": 3.203125,
      "learning_rate": 2.4275748914175047e-05,
      "loss": 0.8105,
      "step": 481950
    },
    {
      "epoch": 1.6891496423401922,
      "grad_norm": 2.515625,
      "learning_rate": 2.4275099885511345e-05,
      "loss": 0.8608,
      "step": 481960
    },
    {
      "epoch": 1.6891846898470877,
      "grad_norm": 3.484375,
      "learning_rate": 2.4274450856847643e-05,
      "loss": 0.8311,
      "step": 481970
    },
    {
      "epoch": 1.6892197373539832,
      "grad_norm": 2.84375,
      "learning_rate": 2.4273801828183944e-05,
      "loss": 0.8401,
      "step": 481980
    },
    {
      "epoch": 1.689254784860879,
      "grad_norm": 3.015625,
      "learning_rate": 2.427315279952024e-05,
      "loss": 0.8092,
      "step": 481990
    },
    {
      "epoch": 1.6892898323677745,
      "grad_norm": 3.21875,
      "learning_rate": 2.4272503770856537e-05,
      "loss": 0.8022,
      "step": 482000
    },
    {
      "epoch": 1.68932487987467,
      "grad_norm": 2.9375,
      "learning_rate": 2.4271854742192835e-05,
      "loss": 0.8503,
      "step": 482010
    },
    {
      "epoch": 1.6893599273815658,
      "grad_norm": 2.75,
      "learning_rate": 2.4271205713529133e-05,
      "loss": 0.7775,
      "step": 482020
    },
    {
      "epoch": 1.6893949748884614,
      "grad_norm": 2.703125,
      "learning_rate": 2.427055668486543e-05,
      "loss": 0.7242,
      "step": 482030
    },
    {
      "epoch": 1.689430022395357,
      "grad_norm": 2.734375,
      "learning_rate": 2.426990765620173e-05,
      "loss": 0.8153,
      "step": 482040
    },
    {
      "epoch": 1.6894650699022526,
      "grad_norm": 2.859375,
      "learning_rate": 2.4269258627538027e-05,
      "loss": 0.8931,
      "step": 482050
    },
    {
      "epoch": 1.689500117409148,
      "grad_norm": 3.03125,
      "learning_rate": 2.4268609598874325e-05,
      "loss": 0.8785,
      "step": 482060
    },
    {
      "epoch": 1.6895351649160437,
      "grad_norm": 2.671875,
      "learning_rate": 2.4267960570210623e-05,
      "loss": 0.7817,
      "step": 482070
    },
    {
      "epoch": 1.6895702124229393,
      "grad_norm": 2.4375,
      "learning_rate": 2.426731154154692e-05,
      "loss": 0.7593,
      "step": 482080
    },
    {
      "epoch": 1.6896052599298348,
      "grad_norm": 3.109375,
      "learning_rate": 2.426666251288322e-05,
      "loss": 0.8493,
      "step": 482090
    },
    {
      "epoch": 1.6896403074367305,
      "grad_norm": 3.078125,
      "learning_rate": 2.426601348421952e-05,
      "loss": 0.8231,
      "step": 482100
    },
    {
      "epoch": 1.689675354943626,
      "grad_norm": 2.796875,
      "learning_rate": 2.4265364455555818e-05,
      "loss": 0.8143,
      "step": 482110
    },
    {
      "epoch": 1.6897104024505216,
      "grad_norm": 2.75,
      "learning_rate": 2.4264715426892116e-05,
      "loss": 0.8421,
      "step": 482120
    },
    {
      "epoch": 1.6897454499574174,
      "grad_norm": 2.84375,
      "learning_rate": 2.4264066398228414e-05,
      "loss": 0.9119,
      "step": 482130
    },
    {
      "epoch": 1.689780497464313,
      "grad_norm": 2.875,
      "learning_rate": 2.4263417369564712e-05,
      "loss": 0.836,
      "step": 482140
    },
    {
      "epoch": 1.6898155449712084,
      "grad_norm": 2.609375,
      "learning_rate": 2.426276834090101e-05,
      "loss": 0.7599,
      "step": 482150
    },
    {
      "epoch": 1.6898505924781042,
      "grad_norm": 2.875,
      "learning_rate": 2.4262119312237308e-05,
      "loss": 0.7642,
      "step": 482160
    },
    {
      "epoch": 1.6898856399849995,
      "grad_norm": 3.109375,
      "learning_rate": 2.4261470283573603e-05,
      "loss": 0.8388,
      "step": 482170
    },
    {
      "epoch": 1.6899206874918953,
      "grad_norm": 2.984375,
      "learning_rate": 2.42608212549099e-05,
      "loss": 0.8899,
      "step": 482180
    },
    {
      "epoch": 1.6899557349987908,
      "grad_norm": 2.65625,
      "learning_rate": 2.42601722262462e-05,
      "loss": 0.8298,
      "step": 482190
    },
    {
      "epoch": 1.6899907825056864,
      "grad_norm": 2.765625,
      "learning_rate": 2.4259523197582497e-05,
      "loss": 0.8166,
      "step": 482200
    },
    {
      "epoch": 1.690025830012582,
      "grad_norm": 3.125,
      "learning_rate": 2.4258874168918798e-05,
      "loss": 0.8206,
      "step": 482210
    },
    {
      "epoch": 1.6900608775194776,
      "grad_norm": 2.890625,
      "learning_rate": 2.4258225140255096e-05,
      "loss": 0.8594,
      "step": 482220
    },
    {
      "epoch": 1.6900959250263732,
      "grad_norm": 2.59375,
      "learning_rate": 2.4257576111591394e-05,
      "loss": 0.8431,
      "step": 482230
    },
    {
      "epoch": 1.690130972533269,
      "grad_norm": 2.796875,
      "learning_rate": 2.4256927082927692e-05,
      "loss": 0.8791,
      "step": 482240
    },
    {
      "epoch": 1.6901660200401645,
      "grad_norm": 3.234375,
      "learning_rate": 2.425627805426399e-05,
      "loss": 0.8236,
      "step": 482250
    },
    {
      "epoch": 1.69020106754706,
      "grad_norm": 2.53125,
      "learning_rate": 2.4255629025600288e-05,
      "loss": 0.8898,
      "step": 482260
    },
    {
      "epoch": 1.6902361150539558,
      "grad_norm": 2.390625,
      "learning_rate": 2.4254979996936586e-05,
      "loss": 0.807,
      "step": 482270
    },
    {
      "epoch": 1.690271162560851,
      "grad_norm": 3.03125,
      "learning_rate": 2.4254330968272884e-05,
      "loss": 0.8885,
      "step": 482280
    },
    {
      "epoch": 1.6903062100677468,
      "grad_norm": 2.65625,
      "learning_rate": 2.4253681939609182e-05,
      "loss": 0.8157,
      "step": 482290
    },
    {
      "epoch": 1.6903412575746424,
      "grad_norm": 2.640625,
      "learning_rate": 2.425303291094548e-05,
      "loss": 0.835,
      "step": 482300
    },
    {
      "epoch": 1.690376305081538,
      "grad_norm": 3.0625,
      "learning_rate": 2.4252383882281778e-05,
      "loss": 0.698,
      "step": 482310
    },
    {
      "epoch": 1.6904113525884337,
      "grad_norm": 2.9375,
      "learning_rate": 2.4251734853618076e-05,
      "loss": 0.8193,
      "step": 482320
    },
    {
      "epoch": 1.6904464000953292,
      "grad_norm": 3.015625,
      "learning_rate": 2.4251085824954374e-05,
      "loss": 0.9197,
      "step": 482330
    },
    {
      "epoch": 1.6904814476022247,
      "grad_norm": 3.140625,
      "learning_rate": 2.4250436796290672e-05,
      "loss": 0.859,
      "step": 482340
    },
    {
      "epoch": 1.6905164951091205,
      "grad_norm": 2.640625,
      "learning_rate": 2.4249787767626973e-05,
      "loss": 0.9303,
      "step": 482350
    },
    {
      "epoch": 1.690551542616016,
      "grad_norm": 2.625,
      "learning_rate": 2.4249138738963268e-05,
      "loss": 0.8201,
      "step": 482360
    },
    {
      "epoch": 1.6905865901229116,
      "grad_norm": 2.828125,
      "learning_rate": 2.4248489710299566e-05,
      "loss": 0.7987,
      "step": 482370
    },
    {
      "epoch": 1.6906216376298073,
      "grad_norm": 2.84375,
      "learning_rate": 2.4247840681635864e-05,
      "loss": 0.8943,
      "step": 482380
    },
    {
      "epoch": 1.6906566851367026,
      "grad_norm": 2.6875,
      "learning_rate": 2.4247191652972162e-05,
      "loss": 0.8501,
      "step": 482390
    },
    {
      "epoch": 1.6906917326435984,
      "grad_norm": 2.734375,
      "learning_rate": 2.424654262430846e-05,
      "loss": 0.8736,
      "step": 482400
    },
    {
      "epoch": 1.6907267801504942,
      "grad_norm": 2.859375,
      "learning_rate": 2.4245893595644758e-05,
      "loss": 0.8397,
      "step": 482410
    },
    {
      "epoch": 1.6907618276573895,
      "grad_norm": 2.703125,
      "learning_rate": 2.4245244566981056e-05,
      "loss": 0.8488,
      "step": 482420
    },
    {
      "epoch": 1.6907968751642852,
      "grad_norm": 3.3125,
      "learning_rate": 2.4244595538317354e-05,
      "loss": 0.9004,
      "step": 482430
    },
    {
      "epoch": 1.6908319226711808,
      "grad_norm": 3.25,
      "learning_rate": 2.4243946509653652e-05,
      "loss": 0.873,
      "step": 482440
    },
    {
      "epoch": 1.6908669701780763,
      "grad_norm": 2.859375,
      "learning_rate": 2.424329748098995e-05,
      "loss": 0.8436,
      "step": 482450
    },
    {
      "epoch": 1.690902017684972,
      "grad_norm": 2.703125,
      "learning_rate": 2.424264845232625e-05,
      "loss": 0.8595,
      "step": 482460
    },
    {
      "epoch": 1.6909370651918676,
      "grad_norm": 2.859375,
      "learning_rate": 2.424199942366255e-05,
      "loss": 0.8579,
      "step": 482470
    },
    {
      "epoch": 1.6909721126987631,
      "grad_norm": 2.734375,
      "learning_rate": 2.4241350394998847e-05,
      "loss": 0.8349,
      "step": 482480
    },
    {
      "epoch": 1.6910071602056589,
      "grad_norm": 2.546875,
      "learning_rate": 2.4240701366335145e-05,
      "loss": 0.7643,
      "step": 482490
    },
    {
      "epoch": 1.6910422077125542,
      "grad_norm": 3.46875,
      "learning_rate": 2.4240052337671443e-05,
      "loss": 0.6638,
      "step": 482500
    },
    {
      "epoch": 1.69107725521945,
      "grad_norm": 3.0,
      "learning_rate": 2.423940330900774e-05,
      "loss": 0.7615,
      "step": 482510
    },
    {
      "epoch": 1.6911123027263457,
      "grad_norm": 3.484375,
      "learning_rate": 2.423875428034404e-05,
      "loss": 0.8558,
      "step": 482520
    },
    {
      "epoch": 1.691147350233241,
      "grad_norm": 2.125,
      "learning_rate": 2.4238105251680337e-05,
      "loss": 0.9044,
      "step": 482530
    },
    {
      "epoch": 1.6911823977401368,
      "grad_norm": 3.125,
      "learning_rate": 2.4237456223016635e-05,
      "loss": 0.8135,
      "step": 482540
    },
    {
      "epoch": 1.6912174452470323,
      "grad_norm": 3.15625,
      "learning_rate": 2.423680719435293e-05,
      "loss": 0.8075,
      "step": 482550
    },
    {
      "epoch": 1.6912524927539279,
      "grad_norm": 3.046875,
      "learning_rate": 2.4236158165689228e-05,
      "loss": 0.8465,
      "step": 482560
    },
    {
      "epoch": 1.6912875402608236,
      "grad_norm": 2.90625,
      "learning_rate": 2.4235509137025526e-05,
      "loss": 0.7994,
      "step": 482570
    },
    {
      "epoch": 1.6913225877677192,
      "grad_norm": 2.96875,
      "learning_rate": 2.4234860108361827e-05,
      "loss": 0.837,
      "step": 482580
    },
    {
      "epoch": 1.6913576352746147,
      "grad_norm": 2.8125,
      "learning_rate": 2.4234211079698125e-05,
      "loss": 0.8165,
      "step": 482590
    },
    {
      "epoch": 1.6913926827815104,
      "grad_norm": 3.375,
      "learning_rate": 2.4233562051034423e-05,
      "loss": 0.9267,
      "step": 482600
    },
    {
      "epoch": 1.691427730288406,
      "grad_norm": 3.078125,
      "learning_rate": 2.423291302237072e-05,
      "loss": 0.7592,
      "step": 482610
    },
    {
      "epoch": 1.6914627777953015,
      "grad_norm": 3.203125,
      "learning_rate": 2.423226399370702e-05,
      "loss": 0.745,
      "step": 482620
    },
    {
      "epoch": 1.6914978253021973,
      "grad_norm": 2.4375,
      "learning_rate": 2.4231614965043317e-05,
      "loss": 0.8703,
      "step": 482630
    },
    {
      "epoch": 1.6915328728090926,
      "grad_norm": 2.59375,
      "learning_rate": 2.4230965936379615e-05,
      "loss": 0.837,
      "step": 482640
    },
    {
      "epoch": 1.6915679203159883,
      "grad_norm": 3.234375,
      "learning_rate": 2.4230316907715913e-05,
      "loss": 0.847,
      "step": 482650
    },
    {
      "epoch": 1.6916029678228839,
      "grad_norm": 2.640625,
      "learning_rate": 2.422966787905221e-05,
      "loss": 0.7811,
      "step": 482660
    },
    {
      "epoch": 1.6916380153297794,
      "grad_norm": 2.75,
      "learning_rate": 2.422901885038851e-05,
      "loss": 0.8276,
      "step": 482670
    },
    {
      "epoch": 1.6916730628366752,
      "grad_norm": 2.703125,
      "learning_rate": 2.4228369821724807e-05,
      "loss": 0.8201,
      "step": 482680
    },
    {
      "epoch": 1.6917081103435707,
      "grad_norm": 3.03125,
      "learning_rate": 2.4227720793061105e-05,
      "loss": 0.9017,
      "step": 482690
    },
    {
      "epoch": 1.6917431578504663,
      "grad_norm": 3.15625,
      "learning_rate": 2.4227071764397403e-05,
      "loss": 0.7795,
      "step": 482700
    },
    {
      "epoch": 1.691778205357362,
      "grad_norm": 2.78125,
      "learning_rate": 2.42264227357337e-05,
      "loss": 0.7794,
      "step": 482710
    },
    {
      "epoch": 1.6918132528642575,
      "grad_norm": 2.6875,
      "learning_rate": 2.4225773707070003e-05,
      "loss": 0.9314,
      "step": 482720
    },
    {
      "epoch": 1.691848300371153,
      "grad_norm": 2.8125,
      "learning_rate": 2.42251246784063e-05,
      "loss": 0.8526,
      "step": 482730
    },
    {
      "epoch": 1.6918833478780488,
      "grad_norm": 3.09375,
      "learning_rate": 2.4224475649742595e-05,
      "loss": 0.7932,
      "step": 482740
    },
    {
      "epoch": 1.6919183953849442,
      "grad_norm": 2.59375,
      "learning_rate": 2.4223826621078893e-05,
      "loss": 0.9048,
      "step": 482750
    },
    {
      "epoch": 1.69195344289184,
      "grad_norm": 3.203125,
      "learning_rate": 2.422317759241519e-05,
      "loss": 0.8505,
      "step": 482760
    },
    {
      "epoch": 1.6919884903987354,
      "grad_norm": 2.765625,
      "learning_rate": 2.422252856375149e-05,
      "loss": 0.7839,
      "step": 482770
    },
    {
      "epoch": 1.692023537905631,
      "grad_norm": 3.046875,
      "learning_rate": 2.4221879535087787e-05,
      "loss": 0.9153,
      "step": 482780
    },
    {
      "epoch": 1.6920585854125267,
      "grad_norm": 3.125,
      "learning_rate": 2.4221230506424085e-05,
      "loss": 0.7512,
      "step": 482790
    },
    {
      "epoch": 1.6920936329194223,
      "grad_norm": 2.890625,
      "learning_rate": 2.4220581477760383e-05,
      "loss": 0.8404,
      "step": 482800
    },
    {
      "epoch": 1.6921286804263178,
      "grad_norm": 2.8125,
      "learning_rate": 2.421993244909668e-05,
      "loss": 0.8564,
      "step": 482810
    },
    {
      "epoch": 1.6921637279332136,
      "grad_norm": 2.625,
      "learning_rate": 2.421928342043298e-05,
      "loss": 0.8555,
      "step": 482820
    },
    {
      "epoch": 1.692198775440109,
      "grad_norm": 2.6875,
      "learning_rate": 2.421863439176928e-05,
      "loss": 0.796,
      "step": 482830
    },
    {
      "epoch": 1.6922338229470046,
      "grad_norm": 3.078125,
      "learning_rate": 2.421798536310558e-05,
      "loss": 0.8845,
      "step": 482840
    },
    {
      "epoch": 1.6922688704539004,
      "grad_norm": 2.84375,
      "learning_rate": 2.4217336334441877e-05,
      "loss": 0.7999,
      "step": 482850
    },
    {
      "epoch": 1.6923039179607957,
      "grad_norm": 2.71875,
      "learning_rate": 2.4216687305778175e-05,
      "loss": 0.8003,
      "step": 482860
    },
    {
      "epoch": 1.6923389654676915,
      "grad_norm": 2.734375,
      "learning_rate": 2.4216038277114473e-05,
      "loss": 0.8847,
      "step": 482870
    },
    {
      "epoch": 1.692374012974587,
      "grad_norm": 3.125,
      "learning_rate": 2.421538924845077e-05,
      "loss": 0.8351,
      "step": 482880
    },
    {
      "epoch": 1.6924090604814825,
      "grad_norm": 2.84375,
      "learning_rate": 2.421474021978707e-05,
      "loss": 0.9099,
      "step": 482890
    },
    {
      "epoch": 1.6924441079883783,
      "grad_norm": 3.1875,
      "learning_rate": 2.4214091191123367e-05,
      "loss": 0.8192,
      "step": 482900
    },
    {
      "epoch": 1.6924791554952738,
      "grad_norm": 2.859375,
      "learning_rate": 2.4213442162459665e-05,
      "loss": 0.8108,
      "step": 482910
    },
    {
      "epoch": 1.6925142030021694,
      "grad_norm": 2.734375,
      "learning_rate": 2.4212793133795963e-05,
      "loss": 0.7986,
      "step": 482920
    },
    {
      "epoch": 1.6925492505090651,
      "grad_norm": 2.71875,
      "learning_rate": 2.4212144105132257e-05,
      "loss": 0.7878,
      "step": 482930
    },
    {
      "epoch": 1.6925842980159607,
      "grad_norm": 2.90625,
      "learning_rate": 2.421149507646856e-05,
      "loss": 0.8093,
      "step": 482940
    },
    {
      "epoch": 1.6926193455228562,
      "grad_norm": 2.734375,
      "learning_rate": 2.4210846047804857e-05,
      "loss": 0.8653,
      "step": 482950
    },
    {
      "epoch": 1.692654393029752,
      "grad_norm": 2.71875,
      "learning_rate": 2.4210197019141155e-05,
      "loss": 0.8534,
      "step": 482960
    },
    {
      "epoch": 1.6926894405366473,
      "grad_norm": 2.578125,
      "learning_rate": 2.4209547990477453e-05,
      "loss": 0.7948,
      "step": 482970
    },
    {
      "epoch": 1.692724488043543,
      "grad_norm": 2.890625,
      "learning_rate": 2.420889896181375e-05,
      "loss": 0.8111,
      "step": 482980
    },
    {
      "epoch": 1.6927595355504386,
      "grad_norm": 2.75,
      "learning_rate": 2.420824993315005e-05,
      "loss": 0.8388,
      "step": 482990
    },
    {
      "epoch": 1.692794583057334,
      "grad_norm": 2.65625,
      "learning_rate": 2.4207600904486347e-05,
      "loss": 0.8159,
      "step": 483000
    },
    {
      "epoch": 1.6928296305642299,
      "grad_norm": 2.765625,
      "learning_rate": 2.4206951875822645e-05,
      "loss": 0.8083,
      "step": 483010
    },
    {
      "epoch": 1.6928646780711254,
      "grad_norm": 2.421875,
      "learning_rate": 2.4206302847158943e-05,
      "loss": 0.7728,
      "step": 483020
    },
    {
      "epoch": 1.692899725578021,
      "grad_norm": 3.0,
      "learning_rate": 2.420565381849524e-05,
      "loss": 0.8548,
      "step": 483030
    },
    {
      "epoch": 1.6929347730849167,
      "grad_norm": 2.890625,
      "learning_rate": 2.420500478983154e-05,
      "loss": 0.8101,
      "step": 483040
    },
    {
      "epoch": 1.6929698205918122,
      "grad_norm": 2.640625,
      "learning_rate": 2.4204355761167837e-05,
      "loss": 0.8234,
      "step": 483050
    },
    {
      "epoch": 1.6930048680987078,
      "grad_norm": 3.640625,
      "learning_rate": 2.4203706732504135e-05,
      "loss": 0.9002,
      "step": 483060
    },
    {
      "epoch": 1.6930399156056035,
      "grad_norm": 3.015625,
      "learning_rate": 2.4203057703840433e-05,
      "loss": 0.8728,
      "step": 483070
    },
    {
      "epoch": 1.6930749631124988,
      "grad_norm": 3.03125,
      "learning_rate": 2.4202408675176734e-05,
      "loss": 0.8908,
      "step": 483080
    },
    {
      "epoch": 1.6931100106193946,
      "grad_norm": 2.671875,
      "learning_rate": 2.4201759646513032e-05,
      "loss": 0.841,
      "step": 483090
    },
    {
      "epoch": 1.6931450581262903,
      "grad_norm": 2.21875,
      "learning_rate": 2.420111061784933e-05,
      "loss": 0.8458,
      "step": 483100
    },
    {
      "epoch": 1.6931801056331857,
      "grad_norm": 3.109375,
      "learning_rate": 2.4200461589185628e-05,
      "loss": 0.8336,
      "step": 483110
    },
    {
      "epoch": 1.6932151531400814,
      "grad_norm": 2.921875,
      "learning_rate": 2.4199812560521923e-05,
      "loss": 0.8099,
      "step": 483120
    },
    {
      "epoch": 1.693250200646977,
      "grad_norm": 3.140625,
      "learning_rate": 2.419916353185822e-05,
      "loss": 0.8781,
      "step": 483130
    },
    {
      "epoch": 1.6932852481538725,
      "grad_norm": 2.921875,
      "learning_rate": 2.419851450319452e-05,
      "loss": 0.8891,
      "step": 483140
    },
    {
      "epoch": 1.6933202956607682,
      "grad_norm": 3.234375,
      "learning_rate": 2.4197865474530817e-05,
      "loss": 0.833,
      "step": 483150
    },
    {
      "epoch": 1.6933553431676638,
      "grad_norm": 3.109375,
      "learning_rate": 2.4197216445867115e-05,
      "loss": 0.8637,
      "step": 483160
    },
    {
      "epoch": 1.6933903906745593,
      "grad_norm": 2.84375,
      "learning_rate": 2.4196567417203413e-05,
      "loss": 0.8282,
      "step": 483170
    },
    {
      "epoch": 1.693425438181455,
      "grad_norm": 3.046875,
      "learning_rate": 2.419591838853971e-05,
      "loss": 0.8815,
      "step": 483180
    },
    {
      "epoch": 1.6934604856883506,
      "grad_norm": 2.828125,
      "learning_rate": 2.419526935987601e-05,
      "loss": 0.8241,
      "step": 483190
    },
    {
      "epoch": 1.6934955331952461,
      "grad_norm": 2.609375,
      "learning_rate": 2.419462033121231e-05,
      "loss": 0.8525,
      "step": 483200
    },
    {
      "epoch": 1.693530580702142,
      "grad_norm": 2.84375,
      "learning_rate": 2.4193971302548608e-05,
      "loss": 0.8578,
      "step": 483210
    },
    {
      "epoch": 1.6935656282090372,
      "grad_norm": 3.0,
      "learning_rate": 2.4193322273884906e-05,
      "loss": 0.8674,
      "step": 483220
    },
    {
      "epoch": 1.693600675715933,
      "grad_norm": 2.78125,
      "learning_rate": 2.4192673245221204e-05,
      "loss": 0.8428,
      "step": 483230
    },
    {
      "epoch": 1.6936357232228285,
      "grad_norm": 3.078125,
      "learning_rate": 2.4192024216557502e-05,
      "loss": 0.8411,
      "step": 483240
    },
    {
      "epoch": 1.693670770729724,
      "grad_norm": 2.703125,
      "learning_rate": 2.41913751878938e-05,
      "loss": 0.7844,
      "step": 483250
    },
    {
      "epoch": 1.6937058182366198,
      "grad_norm": 2.96875,
      "learning_rate": 2.4190726159230098e-05,
      "loss": 0.8354,
      "step": 483260
    },
    {
      "epoch": 1.6937408657435153,
      "grad_norm": 2.75,
      "learning_rate": 2.4190077130566396e-05,
      "loss": 0.83,
      "step": 483270
    },
    {
      "epoch": 1.6937759132504109,
      "grad_norm": 2.75,
      "learning_rate": 2.4189428101902694e-05,
      "loss": 0.8071,
      "step": 483280
    },
    {
      "epoch": 1.6938109607573066,
      "grad_norm": 3.140625,
      "learning_rate": 2.4188779073238992e-05,
      "loss": 0.8683,
      "step": 483290
    },
    {
      "epoch": 1.6938460082642022,
      "grad_norm": 2.96875,
      "learning_rate": 2.4188130044575287e-05,
      "loss": 0.8168,
      "step": 483300
    },
    {
      "epoch": 1.6938810557710977,
      "grad_norm": 2.890625,
      "learning_rate": 2.4187481015911588e-05,
      "loss": 0.7951,
      "step": 483310
    },
    {
      "epoch": 1.6939161032779935,
      "grad_norm": 2.640625,
      "learning_rate": 2.4186831987247886e-05,
      "loss": 0.8148,
      "step": 483320
    },
    {
      "epoch": 1.6939511507848888,
      "grad_norm": 3.21875,
      "learning_rate": 2.4186182958584184e-05,
      "loss": 0.7837,
      "step": 483330
    },
    {
      "epoch": 1.6939861982917845,
      "grad_norm": 2.828125,
      "learning_rate": 2.4185533929920482e-05,
      "loss": 0.7354,
      "step": 483340
    },
    {
      "epoch": 1.69402124579868,
      "grad_norm": 2.296875,
      "learning_rate": 2.418488490125678e-05,
      "loss": 0.8291,
      "step": 483350
    },
    {
      "epoch": 1.6940562933055756,
      "grad_norm": 3.3125,
      "learning_rate": 2.4184235872593078e-05,
      "loss": 0.8791,
      "step": 483360
    },
    {
      "epoch": 1.6940913408124714,
      "grad_norm": 2.84375,
      "learning_rate": 2.4183586843929376e-05,
      "loss": 0.8501,
      "step": 483370
    },
    {
      "epoch": 1.694126388319367,
      "grad_norm": 3.59375,
      "learning_rate": 2.4182937815265674e-05,
      "loss": 0.8503,
      "step": 483380
    },
    {
      "epoch": 1.6941614358262624,
      "grad_norm": 2.984375,
      "learning_rate": 2.4182288786601972e-05,
      "loss": 0.8248,
      "step": 483390
    },
    {
      "epoch": 1.6941964833331582,
      "grad_norm": 2.625,
      "learning_rate": 2.418163975793827e-05,
      "loss": 0.7566,
      "step": 483400
    },
    {
      "epoch": 1.6942315308400537,
      "grad_norm": 2.828125,
      "learning_rate": 2.4180990729274568e-05,
      "loss": 0.8673,
      "step": 483410
    },
    {
      "epoch": 1.6942665783469493,
      "grad_norm": 2.71875,
      "learning_rate": 2.4180341700610866e-05,
      "loss": 0.7943,
      "step": 483420
    },
    {
      "epoch": 1.694301625853845,
      "grad_norm": 2.9375,
      "learning_rate": 2.4179692671947164e-05,
      "loss": 0.8144,
      "step": 483430
    },
    {
      "epoch": 1.6943366733607403,
      "grad_norm": 2.8125,
      "learning_rate": 2.4179043643283462e-05,
      "loss": 0.8474,
      "step": 483440
    },
    {
      "epoch": 1.694371720867636,
      "grad_norm": 2.9375,
      "learning_rate": 2.4178394614619764e-05,
      "loss": 0.786,
      "step": 483450
    },
    {
      "epoch": 1.6944067683745316,
      "grad_norm": 2.53125,
      "learning_rate": 2.417774558595606e-05,
      "loss": 0.8098,
      "step": 483460
    },
    {
      "epoch": 1.6944418158814272,
      "grad_norm": 2.953125,
      "learning_rate": 2.417709655729236e-05,
      "loss": 0.8241,
      "step": 483470
    },
    {
      "epoch": 1.694476863388323,
      "grad_norm": 3.171875,
      "learning_rate": 2.4176447528628658e-05,
      "loss": 0.8298,
      "step": 483480
    },
    {
      "epoch": 1.6945119108952185,
      "grad_norm": 2.859375,
      "learning_rate": 2.4175798499964952e-05,
      "loss": 0.8018,
      "step": 483490
    },
    {
      "epoch": 1.694546958402114,
      "grad_norm": 2.390625,
      "learning_rate": 2.417514947130125e-05,
      "loss": 0.7726,
      "step": 483500
    },
    {
      "epoch": 1.6945820059090098,
      "grad_norm": 2.59375,
      "learning_rate": 2.4174500442637548e-05,
      "loss": 0.8511,
      "step": 483510
    },
    {
      "epoch": 1.6946170534159053,
      "grad_norm": 2.8125,
      "learning_rate": 2.4173851413973846e-05,
      "loss": 0.8814,
      "step": 483520
    },
    {
      "epoch": 1.6946521009228008,
      "grad_norm": 2.5625,
      "learning_rate": 2.4173202385310144e-05,
      "loss": 0.8185,
      "step": 483530
    },
    {
      "epoch": 1.6946871484296966,
      "grad_norm": 2.859375,
      "learning_rate": 2.4172553356646442e-05,
      "loss": 0.8899,
      "step": 483540
    },
    {
      "epoch": 1.694722195936592,
      "grad_norm": 2.828125,
      "learning_rate": 2.417190432798274e-05,
      "loss": 0.801,
      "step": 483550
    },
    {
      "epoch": 1.6947572434434877,
      "grad_norm": 2.6875,
      "learning_rate": 2.417125529931904e-05,
      "loss": 0.7606,
      "step": 483560
    },
    {
      "epoch": 1.6947922909503832,
      "grad_norm": 2.875,
      "learning_rate": 2.417060627065534e-05,
      "loss": 0.838,
      "step": 483570
    },
    {
      "epoch": 1.6948273384572787,
      "grad_norm": 3.09375,
      "learning_rate": 2.4169957241991638e-05,
      "loss": 0.8398,
      "step": 483580
    },
    {
      "epoch": 1.6948623859641745,
      "grad_norm": 2.96875,
      "learning_rate": 2.4169308213327936e-05,
      "loss": 0.797,
      "step": 483590
    },
    {
      "epoch": 1.69489743347107,
      "grad_norm": 2.75,
      "learning_rate": 2.4168659184664234e-05,
      "loss": 0.7748,
      "step": 483600
    },
    {
      "epoch": 1.6949324809779656,
      "grad_norm": 2.6875,
      "learning_rate": 2.416801015600053e-05,
      "loss": 0.82,
      "step": 483610
    },
    {
      "epoch": 1.6949675284848613,
      "grad_norm": 2.984375,
      "learning_rate": 2.416736112733683e-05,
      "loss": 0.7768,
      "step": 483620
    },
    {
      "epoch": 1.6950025759917569,
      "grad_norm": 3.1875,
      "learning_rate": 2.4166712098673128e-05,
      "loss": 0.7721,
      "step": 483630
    },
    {
      "epoch": 1.6950376234986524,
      "grad_norm": 3.109375,
      "learning_rate": 2.4166063070009426e-05,
      "loss": 0.8907,
      "step": 483640
    },
    {
      "epoch": 1.6950726710055481,
      "grad_norm": 3.140625,
      "learning_rate": 2.4165414041345724e-05,
      "loss": 0.7906,
      "step": 483650
    },
    {
      "epoch": 1.6951077185124435,
      "grad_norm": 3.0625,
      "learning_rate": 2.416476501268202e-05,
      "loss": 0.8612,
      "step": 483660
    },
    {
      "epoch": 1.6951427660193392,
      "grad_norm": 2.25,
      "learning_rate": 2.416411598401832e-05,
      "loss": 0.8041,
      "step": 483670
    },
    {
      "epoch": 1.695177813526235,
      "grad_norm": 2.796875,
      "learning_rate": 2.4163466955354618e-05,
      "loss": 0.8489,
      "step": 483680
    },
    {
      "epoch": 1.6952128610331303,
      "grad_norm": 2.859375,
      "learning_rate": 2.4162817926690916e-05,
      "loss": 0.8435,
      "step": 483690
    },
    {
      "epoch": 1.695247908540026,
      "grad_norm": 2.8125,
      "learning_rate": 2.4162168898027214e-05,
      "loss": 0.822,
      "step": 483700
    },
    {
      "epoch": 1.6952829560469216,
      "grad_norm": 2.703125,
      "learning_rate": 2.416151986936351e-05,
      "loss": 0.8091,
      "step": 483710
    },
    {
      "epoch": 1.6953180035538171,
      "grad_norm": 2.859375,
      "learning_rate": 2.416087084069981e-05,
      "loss": 0.7466,
      "step": 483720
    },
    {
      "epoch": 1.6953530510607129,
      "grad_norm": 2.78125,
      "learning_rate": 2.4160221812036108e-05,
      "loss": 0.8037,
      "step": 483730
    },
    {
      "epoch": 1.6953880985676084,
      "grad_norm": 2.65625,
      "learning_rate": 2.4159572783372406e-05,
      "loss": 0.7768,
      "step": 483740
    },
    {
      "epoch": 1.695423146074504,
      "grad_norm": 2.171875,
      "learning_rate": 2.4158923754708704e-05,
      "loss": 0.7873,
      "step": 483750
    },
    {
      "epoch": 1.6954581935813997,
      "grad_norm": 2.75,
      "learning_rate": 2.4158274726045e-05,
      "loss": 0.8655,
      "step": 483760
    },
    {
      "epoch": 1.695493241088295,
      "grad_norm": 2.859375,
      "learning_rate": 2.41576256973813e-05,
      "loss": 0.8575,
      "step": 483770
    },
    {
      "epoch": 1.6955282885951908,
      "grad_norm": 2.84375,
      "learning_rate": 2.4156976668717598e-05,
      "loss": 0.828,
      "step": 483780
    },
    {
      "epoch": 1.6955633361020865,
      "grad_norm": 2.921875,
      "learning_rate": 2.4156327640053896e-05,
      "loss": 0.8551,
      "step": 483790
    },
    {
      "epoch": 1.6955983836089819,
      "grad_norm": 2.53125,
      "learning_rate": 2.4155678611390194e-05,
      "loss": 0.7794,
      "step": 483800
    },
    {
      "epoch": 1.6956334311158776,
      "grad_norm": 2.4375,
      "learning_rate": 2.4155029582726495e-05,
      "loss": 0.7317,
      "step": 483810
    },
    {
      "epoch": 1.6956684786227731,
      "grad_norm": 3.421875,
      "learning_rate": 2.4154380554062793e-05,
      "loss": 0.8159,
      "step": 483820
    },
    {
      "epoch": 1.6957035261296687,
      "grad_norm": 3.046875,
      "learning_rate": 2.415373152539909e-05,
      "loss": 0.815,
      "step": 483830
    },
    {
      "epoch": 1.6957385736365644,
      "grad_norm": 2.96875,
      "learning_rate": 2.415308249673539e-05,
      "loss": 0.7926,
      "step": 483840
    },
    {
      "epoch": 1.69577362114346,
      "grad_norm": 2.953125,
      "learning_rate": 2.4152433468071687e-05,
      "loss": 0.8472,
      "step": 483850
    },
    {
      "epoch": 1.6958086686503555,
      "grad_norm": 2.71875,
      "learning_rate": 2.4151784439407985e-05,
      "loss": 0.8183,
      "step": 483860
    },
    {
      "epoch": 1.6958437161572513,
      "grad_norm": 3.125,
      "learning_rate": 2.415113541074428e-05,
      "loss": 0.844,
      "step": 483870
    },
    {
      "epoch": 1.6958787636641468,
      "grad_norm": 3.09375,
      "learning_rate": 2.4150486382080578e-05,
      "loss": 0.895,
      "step": 483880
    },
    {
      "epoch": 1.6959138111710423,
      "grad_norm": 3.046875,
      "learning_rate": 2.4149837353416876e-05,
      "loss": 0.8712,
      "step": 483890
    },
    {
      "epoch": 1.695948858677938,
      "grad_norm": 2.65625,
      "learning_rate": 2.4149188324753174e-05,
      "loss": 0.7714,
      "step": 483900
    },
    {
      "epoch": 1.6959839061848334,
      "grad_norm": 2.734375,
      "learning_rate": 2.414853929608947e-05,
      "loss": 0.785,
      "step": 483910
    },
    {
      "epoch": 1.6960189536917292,
      "grad_norm": 2.734375,
      "learning_rate": 2.414789026742577e-05,
      "loss": 0.794,
      "step": 483920
    },
    {
      "epoch": 1.6960540011986247,
      "grad_norm": 2.640625,
      "learning_rate": 2.414724123876207e-05,
      "loss": 0.8151,
      "step": 483930
    },
    {
      "epoch": 1.6960890487055202,
      "grad_norm": 3.1875,
      "learning_rate": 2.414659221009837e-05,
      "loss": 0.9828,
      "step": 483940
    },
    {
      "epoch": 1.696124096212416,
      "grad_norm": 3.046875,
      "learning_rate": 2.4145943181434667e-05,
      "loss": 0.8209,
      "step": 483950
    },
    {
      "epoch": 1.6961591437193115,
      "grad_norm": 2.796875,
      "learning_rate": 2.4145294152770965e-05,
      "loss": 0.8081,
      "step": 483960
    },
    {
      "epoch": 1.696194191226207,
      "grad_norm": 3.0,
      "learning_rate": 2.4144645124107263e-05,
      "loss": 0.8325,
      "step": 483970
    },
    {
      "epoch": 1.6962292387331028,
      "grad_norm": 3.28125,
      "learning_rate": 2.414399609544356e-05,
      "loss": 0.8889,
      "step": 483980
    },
    {
      "epoch": 1.6962642862399984,
      "grad_norm": 2.546875,
      "learning_rate": 2.414334706677986e-05,
      "loss": 0.841,
      "step": 483990
    },
    {
      "epoch": 1.696299333746894,
      "grad_norm": 2.90625,
      "learning_rate": 2.4142698038116157e-05,
      "loss": 0.8517,
      "step": 484000
    },
    {
      "epoch": 1.6963343812537897,
      "grad_norm": 2.828125,
      "learning_rate": 2.4142049009452455e-05,
      "loss": 0.9014,
      "step": 484010
    },
    {
      "epoch": 1.696369428760685,
      "grad_norm": 2.71875,
      "learning_rate": 2.4141399980788753e-05,
      "loss": 0.7961,
      "step": 484020
    },
    {
      "epoch": 1.6964044762675807,
      "grad_norm": 2.984375,
      "learning_rate": 2.414075095212505e-05,
      "loss": 0.8222,
      "step": 484030
    },
    {
      "epoch": 1.6964395237744763,
      "grad_norm": 3.203125,
      "learning_rate": 2.414010192346135e-05,
      "loss": 0.8328,
      "step": 484040
    },
    {
      "epoch": 1.6964745712813718,
      "grad_norm": 3.09375,
      "learning_rate": 2.4139452894797647e-05,
      "loss": 0.8172,
      "step": 484050
    },
    {
      "epoch": 1.6965096187882676,
      "grad_norm": 2.75,
      "learning_rate": 2.4138803866133945e-05,
      "loss": 0.8207,
      "step": 484060
    },
    {
      "epoch": 1.696544666295163,
      "grad_norm": 2.984375,
      "learning_rate": 2.4138154837470243e-05,
      "loss": 0.8274,
      "step": 484070
    },
    {
      "epoch": 1.6965797138020586,
      "grad_norm": 2.84375,
      "learning_rate": 2.413750580880654e-05,
      "loss": 0.7362,
      "step": 484080
    },
    {
      "epoch": 1.6966147613089544,
      "grad_norm": 2.796875,
      "learning_rate": 2.413685678014284e-05,
      "loss": 0.8336,
      "step": 484090
    },
    {
      "epoch": 1.69664980881585,
      "grad_norm": 3.109375,
      "learning_rate": 2.4136207751479137e-05,
      "loss": 0.7824,
      "step": 484100
    },
    {
      "epoch": 1.6966848563227455,
      "grad_norm": 2.6875,
      "learning_rate": 2.4135558722815435e-05,
      "loss": 0.7735,
      "step": 484110
    },
    {
      "epoch": 1.6967199038296412,
      "grad_norm": 2.765625,
      "learning_rate": 2.4134909694151733e-05,
      "loss": 0.8116,
      "step": 484120
    },
    {
      "epoch": 1.6967549513365365,
      "grad_norm": 3.125,
      "learning_rate": 2.413426066548803e-05,
      "loss": 0.7372,
      "step": 484130
    },
    {
      "epoch": 1.6967899988434323,
      "grad_norm": 2.953125,
      "learning_rate": 2.413361163682433e-05,
      "loss": 0.804,
      "step": 484140
    },
    {
      "epoch": 1.6968250463503278,
      "grad_norm": 2.640625,
      "learning_rate": 2.4132962608160627e-05,
      "loss": 0.8368,
      "step": 484150
    },
    {
      "epoch": 1.6968600938572234,
      "grad_norm": 2.71875,
      "learning_rate": 2.4132313579496925e-05,
      "loss": 0.7702,
      "step": 484160
    },
    {
      "epoch": 1.6968951413641191,
      "grad_norm": 2.671875,
      "learning_rate": 2.4131664550833223e-05,
      "loss": 0.8012,
      "step": 484170
    },
    {
      "epoch": 1.6969301888710147,
      "grad_norm": 3.203125,
      "learning_rate": 2.4131015522169524e-05,
      "loss": 0.9135,
      "step": 484180
    },
    {
      "epoch": 1.6969652363779102,
      "grad_norm": 3.203125,
      "learning_rate": 2.4130366493505822e-05,
      "loss": 0.8084,
      "step": 484190
    },
    {
      "epoch": 1.697000283884806,
      "grad_norm": 2.828125,
      "learning_rate": 2.412971746484212e-05,
      "loss": 0.845,
      "step": 484200
    },
    {
      "epoch": 1.6970353313917015,
      "grad_norm": 3.15625,
      "learning_rate": 2.412906843617842e-05,
      "loss": 0.7849,
      "step": 484210
    },
    {
      "epoch": 1.697070378898597,
      "grad_norm": 3.109375,
      "learning_rate": 2.4128419407514716e-05,
      "loss": 0.7997,
      "step": 484220
    },
    {
      "epoch": 1.6971054264054928,
      "grad_norm": 2.625,
      "learning_rate": 2.4127770378851014e-05,
      "loss": 0.782,
      "step": 484230
    },
    {
      "epoch": 1.697140473912388,
      "grad_norm": 2.96875,
      "learning_rate": 2.412712135018731e-05,
      "loss": 0.9262,
      "step": 484240
    },
    {
      "epoch": 1.6971755214192839,
      "grad_norm": 3.0625,
      "learning_rate": 2.4126472321523607e-05,
      "loss": 0.8323,
      "step": 484250
    },
    {
      "epoch": 1.6972105689261794,
      "grad_norm": 2.375,
      "learning_rate": 2.4125823292859905e-05,
      "loss": 0.8523,
      "step": 484260
    },
    {
      "epoch": 1.697245616433075,
      "grad_norm": 2.6875,
      "learning_rate": 2.4125174264196203e-05,
      "loss": 0.8318,
      "step": 484270
    },
    {
      "epoch": 1.6972806639399707,
      "grad_norm": 2.625,
      "learning_rate": 2.41245252355325e-05,
      "loss": 0.9035,
      "step": 484280
    },
    {
      "epoch": 1.6973157114468662,
      "grad_norm": 2.6875,
      "learning_rate": 2.41238762068688e-05,
      "loss": 0.8244,
      "step": 484290
    },
    {
      "epoch": 1.6973507589537618,
      "grad_norm": 2.640625,
      "learning_rate": 2.41232271782051e-05,
      "loss": 0.7812,
      "step": 484300
    },
    {
      "epoch": 1.6973858064606575,
      "grad_norm": 2.234375,
      "learning_rate": 2.41225781495414e-05,
      "loss": 0.8118,
      "step": 484310
    },
    {
      "epoch": 1.697420853967553,
      "grad_norm": 2.5,
      "learning_rate": 2.4121929120877696e-05,
      "loss": 0.8343,
      "step": 484320
    },
    {
      "epoch": 1.6974559014744486,
      "grad_norm": 3.03125,
      "learning_rate": 2.4121280092213994e-05,
      "loss": 0.7992,
      "step": 484330
    },
    {
      "epoch": 1.6974909489813443,
      "grad_norm": 3.1875,
      "learning_rate": 2.4120631063550292e-05,
      "loss": 0.929,
      "step": 484340
    },
    {
      "epoch": 1.6975259964882397,
      "grad_norm": 2.984375,
      "learning_rate": 2.411998203488659e-05,
      "loss": 0.7712,
      "step": 484350
    },
    {
      "epoch": 1.6975610439951354,
      "grad_norm": 2.9375,
      "learning_rate": 2.411933300622289e-05,
      "loss": 0.851,
      "step": 484360
    },
    {
      "epoch": 1.6975960915020312,
      "grad_norm": 2.890625,
      "learning_rate": 2.4118683977559186e-05,
      "loss": 0.7386,
      "step": 484370
    },
    {
      "epoch": 1.6976311390089265,
      "grad_norm": 4.0625,
      "learning_rate": 2.4118034948895484e-05,
      "loss": 0.7689,
      "step": 484380
    },
    {
      "epoch": 1.6976661865158222,
      "grad_norm": 2.78125,
      "learning_rate": 2.4117385920231782e-05,
      "loss": 0.9143,
      "step": 484390
    },
    {
      "epoch": 1.6977012340227178,
      "grad_norm": 2.59375,
      "learning_rate": 2.411673689156808e-05,
      "loss": 0.7976,
      "step": 484400
    },
    {
      "epoch": 1.6977362815296133,
      "grad_norm": 2.859375,
      "learning_rate": 2.411608786290438e-05,
      "loss": 0.8366,
      "step": 484410
    },
    {
      "epoch": 1.697771329036509,
      "grad_norm": 2.6875,
      "learning_rate": 2.4115438834240676e-05,
      "loss": 0.8666,
      "step": 484420
    },
    {
      "epoch": 1.6978063765434046,
      "grad_norm": 3.03125,
      "learning_rate": 2.4114789805576974e-05,
      "loss": 0.7783,
      "step": 484430
    },
    {
      "epoch": 1.6978414240503001,
      "grad_norm": 2.5,
      "learning_rate": 2.4114140776913272e-05,
      "loss": 0.7557,
      "step": 484440
    },
    {
      "epoch": 1.697876471557196,
      "grad_norm": 3.125,
      "learning_rate": 2.411349174824957e-05,
      "loss": 0.8427,
      "step": 484450
    },
    {
      "epoch": 1.6979115190640912,
      "grad_norm": 3.375,
      "learning_rate": 2.411284271958587e-05,
      "loss": 0.7296,
      "step": 484460
    },
    {
      "epoch": 1.697946566570987,
      "grad_norm": 3.0625,
      "learning_rate": 2.4112193690922166e-05,
      "loss": 0.8973,
      "step": 484470
    },
    {
      "epoch": 1.6979816140778827,
      "grad_norm": 2.5,
      "learning_rate": 2.4111544662258464e-05,
      "loss": 0.7346,
      "step": 484480
    },
    {
      "epoch": 1.698016661584778,
      "grad_norm": 2.703125,
      "learning_rate": 2.4110895633594762e-05,
      "loss": 0.8736,
      "step": 484490
    },
    {
      "epoch": 1.6980517090916738,
      "grad_norm": 2.78125,
      "learning_rate": 2.411024660493106e-05,
      "loss": 0.8131,
      "step": 484500
    },
    {
      "epoch": 1.6980867565985693,
      "grad_norm": 3.15625,
      "learning_rate": 2.410959757626736e-05,
      "loss": 0.762,
      "step": 484510
    },
    {
      "epoch": 1.6981218041054649,
      "grad_norm": 2.765625,
      "learning_rate": 2.4108948547603656e-05,
      "loss": 0.847,
      "step": 484520
    },
    {
      "epoch": 1.6981568516123606,
      "grad_norm": 3.109375,
      "learning_rate": 2.4108299518939954e-05,
      "loss": 0.819,
      "step": 484530
    },
    {
      "epoch": 1.6981918991192562,
      "grad_norm": 3.546875,
      "learning_rate": 2.4107650490276252e-05,
      "loss": 0.8565,
      "step": 484540
    },
    {
      "epoch": 1.6982269466261517,
      "grad_norm": 2.953125,
      "learning_rate": 2.4107001461612554e-05,
      "loss": 0.7712,
      "step": 484550
    },
    {
      "epoch": 1.6982619941330475,
      "grad_norm": 3.078125,
      "learning_rate": 2.4106352432948852e-05,
      "loss": 0.8061,
      "step": 484560
    },
    {
      "epoch": 1.698297041639943,
      "grad_norm": 2.734375,
      "learning_rate": 2.410570340428515e-05,
      "loss": 0.7734,
      "step": 484570
    },
    {
      "epoch": 1.6983320891468385,
      "grad_norm": 2.96875,
      "learning_rate": 2.4105054375621448e-05,
      "loss": 0.9035,
      "step": 484580
    },
    {
      "epoch": 1.6983671366537343,
      "grad_norm": 3.265625,
      "learning_rate": 2.4104405346957746e-05,
      "loss": 0.8492,
      "step": 484590
    },
    {
      "epoch": 1.6984021841606296,
      "grad_norm": 2.8125,
      "learning_rate": 2.4103756318294044e-05,
      "loss": 0.8347,
      "step": 484600
    },
    {
      "epoch": 1.6984372316675254,
      "grad_norm": 3.078125,
      "learning_rate": 2.4103107289630342e-05,
      "loss": 0.7645,
      "step": 484610
    },
    {
      "epoch": 1.698472279174421,
      "grad_norm": 2.921875,
      "learning_rate": 2.4102458260966636e-05,
      "loss": 0.8329,
      "step": 484620
    },
    {
      "epoch": 1.6985073266813164,
      "grad_norm": 2.953125,
      "learning_rate": 2.4101809232302934e-05,
      "loss": 0.9206,
      "step": 484630
    },
    {
      "epoch": 1.6985423741882122,
      "grad_norm": 2.953125,
      "learning_rate": 2.4101160203639232e-05,
      "loss": 0.9038,
      "step": 484640
    },
    {
      "epoch": 1.6985774216951077,
      "grad_norm": 2.9375,
      "learning_rate": 2.410051117497553e-05,
      "loss": 0.7808,
      "step": 484650
    },
    {
      "epoch": 1.6986124692020033,
      "grad_norm": 2.671875,
      "learning_rate": 2.4099862146311832e-05,
      "loss": 0.9166,
      "step": 484660
    },
    {
      "epoch": 1.698647516708899,
      "grad_norm": 3.0625,
      "learning_rate": 2.409921311764813e-05,
      "loss": 0.927,
      "step": 484670
    },
    {
      "epoch": 1.6986825642157946,
      "grad_norm": 3.03125,
      "learning_rate": 2.4098564088984428e-05,
      "loss": 0.8851,
      "step": 484680
    },
    {
      "epoch": 1.69871761172269,
      "grad_norm": 2.9375,
      "learning_rate": 2.4097915060320726e-05,
      "loss": 0.8143,
      "step": 484690
    },
    {
      "epoch": 1.6987526592295858,
      "grad_norm": 3.0,
      "learning_rate": 2.4097266031657024e-05,
      "loss": 0.8655,
      "step": 484700
    },
    {
      "epoch": 1.6987877067364812,
      "grad_norm": 3.28125,
      "learning_rate": 2.4096617002993322e-05,
      "loss": 0.8819,
      "step": 484710
    },
    {
      "epoch": 1.698822754243377,
      "grad_norm": 2.203125,
      "learning_rate": 2.409596797432962e-05,
      "loss": 0.8159,
      "step": 484720
    },
    {
      "epoch": 1.6988578017502725,
      "grad_norm": 3.1875,
      "learning_rate": 2.4095318945665918e-05,
      "loss": 0.9052,
      "step": 484730
    },
    {
      "epoch": 1.698892849257168,
      "grad_norm": 2.671875,
      "learning_rate": 2.4094669917002216e-05,
      "loss": 0.9154,
      "step": 484740
    },
    {
      "epoch": 1.6989278967640637,
      "grad_norm": 2.78125,
      "learning_rate": 2.4094020888338514e-05,
      "loss": 0.8913,
      "step": 484750
    },
    {
      "epoch": 1.6989629442709593,
      "grad_norm": 2.109375,
      "learning_rate": 2.4093371859674812e-05,
      "loss": 0.7931,
      "step": 484760
    },
    {
      "epoch": 1.6989979917778548,
      "grad_norm": 3.265625,
      "learning_rate": 2.409272283101111e-05,
      "loss": 0.8973,
      "step": 484770
    },
    {
      "epoch": 1.6990330392847506,
      "grad_norm": 3.125,
      "learning_rate": 2.4092073802347408e-05,
      "loss": 0.8741,
      "step": 484780
    },
    {
      "epoch": 1.6990680867916461,
      "grad_norm": 2.8125,
      "learning_rate": 2.4091424773683706e-05,
      "loss": 0.8688,
      "step": 484790
    },
    {
      "epoch": 1.6991031342985417,
      "grad_norm": 2.65625,
      "learning_rate": 2.4090775745020007e-05,
      "loss": 0.7845,
      "step": 484800
    },
    {
      "epoch": 1.6991381818054374,
      "grad_norm": 3.1875,
      "learning_rate": 2.4090126716356302e-05,
      "loss": 0.8079,
      "step": 484810
    },
    {
      "epoch": 1.6991732293123327,
      "grad_norm": 3.140625,
      "learning_rate": 2.40894776876926e-05,
      "loss": 0.8959,
      "step": 484820
    },
    {
      "epoch": 1.6992082768192285,
      "grad_norm": 3.0,
      "learning_rate": 2.4088828659028898e-05,
      "loss": 0.784,
      "step": 484830
    },
    {
      "epoch": 1.699243324326124,
      "grad_norm": 3.0,
      "learning_rate": 2.4088179630365196e-05,
      "loss": 0.8539,
      "step": 484840
    },
    {
      "epoch": 1.6992783718330196,
      "grad_norm": 3.078125,
      "learning_rate": 2.4087530601701494e-05,
      "loss": 0.8425,
      "step": 484850
    },
    {
      "epoch": 1.6993134193399153,
      "grad_norm": 3.3125,
      "learning_rate": 2.4086881573037792e-05,
      "loss": 0.8076,
      "step": 484860
    },
    {
      "epoch": 1.6993484668468108,
      "grad_norm": 2.8125,
      "learning_rate": 2.408623254437409e-05,
      "loss": 0.8516,
      "step": 484870
    },
    {
      "epoch": 1.6993835143537064,
      "grad_norm": 2.765625,
      "learning_rate": 2.4085583515710388e-05,
      "loss": 0.8201,
      "step": 484880
    },
    {
      "epoch": 1.6994185618606021,
      "grad_norm": 2.828125,
      "learning_rate": 2.4084934487046686e-05,
      "loss": 0.8374,
      "step": 484890
    },
    {
      "epoch": 1.6994536093674977,
      "grad_norm": 3.453125,
      "learning_rate": 2.4084285458382984e-05,
      "loss": 0.913,
      "step": 484900
    },
    {
      "epoch": 1.6994886568743932,
      "grad_norm": 2.71875,
      "learning_rate": 2.4083636429719285e-05,
      "loss": 0.7814,
      "step": 484910
    },
    {
      "epoch": 1.699523704381289,
      "grad_norm": 3.265625,
      "learning_rate": 2.4082987401055583e-05,
      "loss": 0.8552,
      "step": 484920
    },
    {
      "epoch": 1.6995587518881843,
      "grad_norm": 2.640625,
      "learning_rate": 2.408233837239188e-05,
      "loss": 0.8186,
      "step": 484930
    },
    {
      "epoch": 1.69959379939508,
      "grad_norm": 3.140625,
      "learning_rate": 2.408168934372818e-05,
      "loss": 0.8886,
      "step": 484940
    },
    {
      "epoch": 1.6996288469019756,
      "grad_norm": 3.484375,
      "learning_rate": 2.4081040315064477e-05,
      "loss": 0.8514,
      "step": 484950
    },
    {
      "epoch": 1.6996638944088711,
      "grad_norm": 2.828125,
      "learning_rate": 2.4080391286400775e-05,
      "loss": 0.8384,
      "step": 484960
    },
    {
      "epoch": 1.6996989419157669,
      "grad_norm": 2.828125,
      "learning_rate": 2.4079742257737073e-05,
      "loss": 0.8998,
      "step": 484970
    },
    {
      "epoch": 1.6997339894226624,
      "grad_norm": 2.609375,
      "learning_rate": 2.407909322907337e-05,
      "loss": 0.7773,
      "step": 484980
    },
    {
      "epoch": 1.699769036929558,
      "grad_norm": 3.203125,
      "learning_rate": 2.407844420040967e-05,
      "loss": 0.8664,
      "step": 484990
    },
    {
      "epoch": 1.6998040844364537,
      "grad_norm": 2.46875,
      "learning_rate": 2.4077795171745964e-05,
      "loss": 0.8114,
      "step": 485000
    },
    {
      "epoch": 1.6998040844364537,
      "eval_loss": 0.7818120121955872,
      "eval_runtime": 557.4187,
      "eval_samples_per_second": 682.496,
      "eval_steps_per_second": 56.875,
      "step": 485000
    },
    {
      "epoch": 1.6998391319433492,
      "grad_norm": 2.9375,
      "learning_rate": 2.4077146143082262e-05,
      "loss": 0.9002,
      "step": 485010
    },
    {
      "epoch": 1.6998741794502448,
      "grad_norm": 3.09375,
      "learning_rate": 2.407649711441856e-05,
      "loss": 0.8552,
      "step": 485020
    },
    {
      "epoch": 1.6999092269571405,
      "grad_norm": 3.5,
      "learning_rate": 2.407584808575486e-05,
      "loss": 0.8045,
      "step": 485030
    },
    {
      "epoch": 1.6999442744640358,
      "grad_norm": 2.859375,
      "learning_rate": 2.407519905709116e-05,
      "loss": 0.8048,
      "step": 485040
    },
    {
      "epoch": 1.6999793219709316,
      "grad_norm": 3.0,
      "learning_rate": 2.4074550028427457e-05,
      "loss": 0.8699,
      "step": 485050
    },
    {
      "epoch": 1.7000143694778274,
      "grad_norm": 2.84375,
      "learning_rate": 2.4073900999763755e-05,
      "loss": 0.8743,
      "step": 485060
    },
    {
      "epoch": 1.7000494169847227,
      "grad_norm": 2.765625,
      "learning_rate": 2.4073251971100053e-05,
      "loss": 0.8657,
      "step": 485070
    },
    {
      "epoch": 1.7000844644916184,
      "grad_norm": 3.0,
      "learning_rate": 2.407260294243635e-05,
      "loss": 0.9048,
      "step": 485080
    },
    {
      "epoch": 1.700119511998514,
      "grad_norm": 3.125,
      "learning_rate": 2.407195391377265e-05,
      "loss": 0.7988,
      "step": 485090
    },
    {
      "epoch": 1.7001545595054095,
      "grad_norm": 3.140625,
      "learning_rate": 2.4071304885108947e-05,
      "loss": 0.811,
      "step": 485100
    },
    {
      "epoch": 1.7001896070123053,
      "grad_norm": 3.28125,
      "learning_rate": 2.4070655856445245e-05,
      "loss": 0.8606,
      "step": 485110
    },
    {
      "epoch": 1.7002246545192008,
      "grad_norm": 2.859375,
      "learning_rate": 2.4070006827781543e-05,
      "loss": 0.8892,
      "step": 485120
    },
    {
      "epoch": 1.7002597020260963,
      "grad_norm": 3.125,
      "learning_rate": 2.406935779911784e-05,
      "loss": 0.8387,
      "step": 485130
    },
    {
      "epoch": 1.700294749532992,
      "grad_norm": 2.765625,
      "learning_rate": 2.406870877045414e-05,
      "loss": 0.9139,
      "step": 485140
    },
    {
      "epoch": 1.7003297970398874,
      "grad_norm": 2.96875,
      "learning_rate": 2.4068059741790437e-05,
      "loss": 0.8797,
      "step": 485150
    },
    {
      "epoch": 1.7003648445467832,
      "grad_norm": 3.078125,
      "learning_rate": 2.4067410713126735e-05,
      "loss": 0.8195,
      "step": 485160
    },
    {
      "epoch": 1.700399892053679,
      "grad_norm": 3.265625,
      "learning_rate": 2.4066761684463037e-05,
      "loss": 0.8627,
      "step": 485170
    },
    {
      "epoch": 1.7004349395605742,
      "grad_norm": 2.671875,
      "learning_rate": 2.406611265579933e-05,
      "loss": 0.8162,
      "step": 485180
    },
    {
      "epoch": 1.70046998706747,
      "grad_norm": 2.734375,
      "learning_rate": 2.406546362713563e-05,
      "loss": 0.839,
      "step": 485190
    },
    {
      "epoch": 1.7005050345743655,
      "grad_norm": 2.96875,
      "learning_rate": 2.4064814598471927e-05,
      "loss": 0.8342,
      "step": 485200
    },
    {
      "epoch": 1.700540082081261,
      "grad_norm": 3.03125,
      "learning_rate": 2.4064165569808225e-05,
      "loss": 0.8798,
      "step": 485210
    },
    {
      "epoch": 1.7005751295881568,
      "grad_norm": 2.640625,
      "learning_rate": 2.4063516541144523e-05,
      "loss": 0.7932,
      "step": 485220
    },
    {
      "epoch": 1.7006101770950524,
      "grad_norm": 2.8125,
      "learning_rate": 2.406286751248082e-05,
      "loss": 0.7912,
      "step": 485230
    },
    {
      "epoch": 1.700645224601948,
      "grad_norm": 3.078125,
      "learning_rate": 2.406221848381712e-05,
      "loss": 0.7756,
      "step": 485240
    },
    {
      "epoch": 1.7006802721088436,
      "grad_norm": 2.828125,
      "learning_rate": 2.4061569455153417e-05,
      "loss": 0.8487,
      "step": 485250
    },
    {
      "epoch": 1.7007153196157392,
      "grad_norm": 3.046875,
      "learning_rate": 2.4060920426489715e-05,
      "loss": 0.8056,
      "step": 485260
    },
    {
      "epoch": 1.7007503671226347,
      "grad_norm": 2.484375,
      "learning_rate": 2.4060271397826013e-05,
      "loss": 0.8706,
      "step": 485270
    },
    {
      "epoch": 1.7007854146295305,
      "grad_norm": 2.5,
      "learning_rate": 2.4059622369162315e-05,
      "loss": 0.8139,
      "step": 485280
    },
    {
      "epoch": 1.7008204621364258,
      "grad_norm": 2.734375,
      "learning_rate": 2.4058973340498613e-05,
      "loss": 0.8991,
      "step": 485290
    },
    {
      "epoch": 1.7008555096433216,
      "grad_norm": 2.9375,
      "learning_rate": 2.405832431183491e-05,
      "loss": 0.8528,
      "step": 485300
    },
    {
      "epoch": 1.700890557150217,
      "grad_norm": 3.34375,
      "learning_rate": 2.405767528317121e-05,
      "loss": 0.9416,
      "step": 485310
    },
    {
      "epoch": 1.7009256046571126,
      "grad_norm": 3.21875,
      "learning_rate": 2.4057026254507507e-05,
      "loss": 0.8489,
      "step": 485320
    },
    {
      "epoch": 1.7009606521640084,
      "grad_norm": 3.015625,
      "learning_rate": 2.4056377225843805e-05,
      "loss": 0.8764,
      "step": 485330
    },
    {
      "epoch": 1.700995699670904,
      "grad_norm": 2.859375,
      "learning_rate": 2.4055728197180103e-05,
      "loss": 0.8697,
      "step": 485340
    },
    {
      "epoch": 1.7010307471777995,
      "grad_norm": 2.859375,
      "learning_rate": 2.40550791685164e-05,
      "loss": 0.8552,
      "step": 485350
    },
    {
      "epoch": 1.7010657946846952,
      "grad_norm": 3.03125,
      "learning_rate": 2.40544301398527e-05,
      "loss": 0.9512,
      "step": 485360
    },
    {
      "epoch": 1.7011008421915907,
      "grad_norm": 2.875,
      "learning_rate": 2.4053781111188993e-05,
      "loss": 0.905,
      "step": 485370
    },
    {
      "epoch": 1.7011358896984863,
      "grad_norm": 2.78125,
      "learning_rate": 2.405313208252529e-05,
      "loss": 0.8295,
      "step": 485380
    },
    {
      "epoch": 1.701170937205382,
      "grad_norm": 3.296875,
      "learning_rate": 2.4052483053861593e-05,
      "loss": 0.8519,
      "step": 485390
    },
    {
      "epoch": 1.7012059847122774,
      "grad_norm": 3.1875,
      "learning_rate": 2.405183402519789e-05,
      "loss": 0.7899,
      "step": 485400
    },
    {
      "epoch": 1.701241032219173,
      "grad_norm": 3.0,
      "learning_rate": 2.405118499653419e-05,
      "loss": 0.8482,
      "step": 485410
    },
    {
      "epoch": 1.7012760797260686,
      "grad_norm": 3.046875,
      "learning_rate": 2.4050535967870487e-05,
      "loss": 0.8531,
      "step": 485420
    },
    {
      "epoch": 1.7013111272329642,
      "grad_norm": 2.78125,
      "learning_rate": 2.4049886939206785e-05,
      "loss": 0.8761,
      "step": 485430
    },
    {
      "epoch": 1.70134617473986,
      "grad_norm": 2.546875,
      "learning_rate": 2.4049237910543083e-05,
      "loss": 0.7157,
      "step": 485440
    },
    {
      "epoch": 1.7013812222467555,
      "grad_norm": 3.078125,
      "learning_rate": 2.404858888187938e-05,
      "loss": 0.8428,
      "step": 485450
    },
    {
      "epoch": 1.701416269753651,
      "grad_norm": 2.90625,
      "learning_rate": 2.404793985321568e-05,
      "loss": 0.935,
      "step": 485460
    },
    {
      "epoch": 1.7014513172605468,
      "grad_norm": 3.34375,
      "learning_rate": 2.4047290824551977e-05,
      "loss": 0.7781,
      "step": 485470
    },
    {
      "epoch": 1.7014863647674423,
      "grad_norm": 2.875,
      "learning_rate": 2.4046641795888275e-05,
      "loss": 0.8733,
      "step": 485480
    },
    {
      "epoch": 1.7015214122743378,
      "grad_norm": 2.859375,
      "learning_rate": 2.4045992767224573e-05,
      "loss": 0.9573,
      "step": 485490
    },
    {
      "epoch": 1.7015564597812336,
      "grad_norm": 3.171875,
      "learning_rate": 2.404534373856087e-05,
      "loss": 0.8388,
      "step": 485500
    },
    {
      "epoch": 1.701591507288129,
      "grad_norm": 2.65625,
      "learning_rate": 2.404469470989717e-05,
      "loss": 0.8243,
      "step": 485510
    },
    {
      "epoch": 1.7016265547950247,
      "grad_norm": 2.5625,
      "learning_rate": 2.4044045681233467e-05,
      "loss": 0.88,
      "step": 485520
    },
    {
      "epoch": 1.7016616023019202,
      "grad_norm": 2.53125,
      "learning_rate": 2.4043396652569768e-05,
      "loss": 0.8725,
      "step": 485530
    },
    {
      "epoch": 1.7016966498088157,
      "grad_norm": 2.453125,
      "learning_rate": 2.4042747623906066e-05,
      "loss": 0.8949,
      "step": 485540
    },
    {
      "epoch": 1.7017316973157115,
      "grad_norm": 2.8125,
      "learning_rate": 2.4042098595242364e-05,
      "loss": 0.8469,
      "step": 485550
    },
    {
      "epoch": 1.701766744822607,
      "grad_norm": 2.625,
      "learning_rate": 2.404144956657866e-05,
      "loss": 0.8482,
      "step": 485560
    },
    {
      "epoch": 1.7018017923295026,
      "grad_norm": 3.140625,
      "learning_rate": 2.4040800537914957e-05,
      "loss": 0.7847,
      "step": 485570
    },
    {
      "epoch": 1.7018368398363983,
      "grad_norm": 2.5625,
      "learning_rate": 2.4040151509251255e-05,
      "loss": 0.8176,
      "step": 485580
    },
    {
      "epoch": 1.7018718873432939,
      "grad_norm": 2.953125,
      "learning_rate": 2.4039502480587553e-05,
      "loss": 0.862,
      "step": 485590
    },
    {
      "epoch": 1.7019069348501894,
      "grad_norm": 2.578125,
      "learning_rate": 2.403885345192385e-05,
      "loss": 0.7995,
      "step": 485600
    },
    {
      "epoch": 1.7019419823570852,
      "grad_norm": 2.84375,
      "learning_rate": 2.403820442326015e-05,
      "loss": 0.8072,
      "step": 485610
    },
    {
      "epoch": 1.7019770298639805,
      "grad_norm": 2.9375,
      "learning_rate": 2.4037555394596447e-05,
      "loss": 0.8437,
      "step": 485620
    },
    {
      "epoch": 1.7020120773708762,
      "grad_norm": 2.734375,
      "learning_rate": 2.4036906365932745e-05,
      "loss": 0.8976,
      "step": 485630
    },
    {
      "epoch": 1.7020471248777718,
      "grad_norm": 3.0,
      "learning_rate": 2.4036257337269043e-05,
      "loss": 0.8511,
      "step": 485640
    },
    {
      "epoch": 1.7020821723846673,
      "grad_norm": 3.125,
      "learning_rate": 2.4035608308605344e-05,
      "loss": 0.8398,
      "step": 485650
    },
    {
      "epoch": 1.702117219891563,
      "grad_norm": 3.234375,
      "learning_rate": 2.4034959279941642e-05,
      "loss": 0.8303,
      "step": 485660
    },
    {
      "epoch": 1.7021522673984586,
      "grad_norm": 2.65625,
      "learning_rate": 2.403431025127794e-05,
      "loss": 0.8179,
      "step": 485670
    },
    {
      "epoch": 1.7021873149053541,
      "grad_norm": 2.6875,
      "learning_rate": 2.4033661222614238e-05,
      "loss": 0.8061,
      "step": 485680
    },
    {
      "epoch": 1.70222236241225,
      "grad_norm": 2.5625,
      "learning_rate": 2.4033012193950536e-05,
      "loss": 0.8543,
      "step": 485690
    },
    {
      "epoch": 1.7022574099191454,
      "grad_norm": 2.765625,
      "learning_rate": 2.4032363165286834e-05,
      "loss": 0.7479,
      "step": 485700
    },
    {
      "epoch": 1.702292457426041,
      "grad_norm": 3.0625,
      "learning_rate": 2.4031714136623132e-05,
      "loss": 0.8575,
      "step": 485710
    },
    {
      "epoch": 1.7023275049329367,
      "grad_norm": 3.109375,
      "learning_rate": 2.403106510795943e-05,
      "loss": 0.788,
      "step": 485720
    },
    {
      "epoch": 1.702362552439832,
      "grad_norm": 2.65625,
      "learning_rate": 2.4030416079295728e-05,
      "loss": 0.9566,
      "step": 485730
    },
    {
      "epoch": 1.7023975999467278,
      "grad_norm": 3.0625,
      "learning_rate": 2.4029767050632026e-05,
      "loss": 0.8441,
      "step": 485740
    },
    {
      "epoch": 1.7024326474536235,
      "grad_norm": 2.953125,
      "learning_rate": 2.402911802196832e-05,
      "loss": 0.8063,
      "step": 485750
    },
    {
      "epoch": 1.7024676949605189,
      "grad_norm": 2.578125,
      "learning_rate": 2.4028468993304622e-05,
      "loss": 0.8163,
      "step": 485760
    },
    {
      "epoch": 1.7025027424674146,
      "grad_norm": 3.0625,
      "learning_rate": 2.402781996464092e-05,
      "loss": 0.87,
      "step": 485770
    },
    {
      "epoch": 1.7025377899743102,
      "grad_norm": 3.0,
      "learning_rate": 2.4027170935977218e-05,
      "loss": 0.8462,
      "step": 485780
    },
    {
      "epoch": 1.7025728374812057,
      "grad_norm": 2.875,
      "learning_rate": 2.4026521907313516e-05,
      "loss": 0.7902,
      "step": 485790
    },
    {
      "epoch": 1.7026078849881014,
      "grad_norm": 2.875,
      "learning_rate": 2.4025872878649814e-05,
      "loss": 0.8648,
      "step": 485800
    },
    {
      "epoch": 1.702642932494997,
      "grad_norm": 2.671875,
      "learning_rate": 2.4025223849986112e-05,
      "loss": 0.8141,
      "step": 485810
    },
    {
      "epoch": 1.7026779800018925,
      "grad_norm": 2.71875,
      "learning_rate": 2.402457482132241e-05,
      "loss": 0.8171,
      "step": 485820
    },
    {
      "epoch": 1.7027130275087883,
      "grad_norm": 3.09375,
      "learning_rate": 2.4023925792658708e-05,
      "loss": 0.8301,
      "step": 485830
    },
    {
      "epoch": 1.7027480750156836,
      "grad_norm": 2.734375,
      "learning_rate": 2.4023276763995006e-05,
      "loss": 0.9058,
      "step": 485840
    },
    {
      "epoch": 1.7027831225225794,
      "grad_norm": 2.5,
      "learning_rate": 2.4022627735331304e-05,
      "loss": 0.793,
      "step": 485850
    },
    {
      "epoch": 1.702818170029475,
      "grad_norm": 3.109375,
      "learning_rate": 2.4021978706667602e-05,
      "loss": 0.8092,
      "step": 485860
    },
    {
      "epoch": 1.7028532175363704,
      "grad_norm": 2.921875,
      "learning_rate": 2.40213296780039e-05,
      "loss": 0.8154,
      "step": 485870
    },
    {
      "epoch": 1.7028882650432662,
      "grad_norm": 2.78125,
      "learning_rate": 2.4020680649340198e-05,
      "loss": 0.8667,
      "step": 485880
    },
    {
      "epoch": 1.7029233125501617,
      "grad_norm": 2.828125,
      "learning_rate": 2.4020031620676496e-05,
      "loss": 0.733,
      "step": 485890
    },
    {
      "epoch": 1.7029583600570573,
      "grad_norm": 2.828125,
      "learning_rate": 2.4019382592012797e-05,
      "loss": 0.8218,
      "step": 485900
    },
    {
      "epoch": 1.702993407563953,
      "grad_norm": 2.84375,
      "learning_rate": 2.4018733563349095e-05,
      "loss": 0.8056,
      "step": 485910
    },
    {
      "epoch": 1.7030284550708485,
      "grad_norm": 2.59375,
      "learning_rate": 2.4018084534685393e-05,
      "loss": 0.784,
      "step": 485920
    },
    {
      "epoch": 1.703063502577744,
      "grad_norm": 3.015625,
      "learning_rate": 2.401743550602169e-05,
      "loss": 0.8907,
      "step": 485930
    },
    {
      "epoch": 1.7030985500846398,
      "grad_norm": 3.078125,
      "learning_rate": 2.4016786477357986e-05,
      "loss": 0.8253,
      "step": 485940
    },
    {
      "epoch": 1.7031335975915354,
      "grad_norm": 2.828125,
      "learning_rate": 2.4016137448694284e-05,
      "loss": 0.748,
      "step": 485950
    },
    {
      "epoch": 1.703168645098431,
      "grad_norm": 3.453125,
      "learning_rate": 2.4015488420030582e-05,
      "loss": 0.8438,
      "step": 485960
    },
    {
      "epoch": 1.7032036926053267,
      "grad_norm": 2.609375,
      "learning_rate": 2.401483939136688e-05,
      "loss": 0.845,
      "step": 485970
    },
    {
      "epoch": 1.703238740112222,
      "grad_norm": 3.03125,
      "learning_rate": 2.4014190362703178e-05,
      "loss": 0.8083,
      "step": 485980
    },
    {
      "epoch": 1.7032737876191177,
      "grad_norm": 2.375,
      "learning_rate": 2.4013541334039476e-05,
      "loss": 0.8521,
      "step": 485990
    },
    {
      "epoch": 1.7033088351260133,
      "grad_norm": 2.75,
      "learning_rate": 2.4012892305375774e-05,
      "loss": 0.8092,
      "step": 486000
    },
    {
      "epoch": 1.7033438826329088,
      "grad_norm": 2.59375,
      "learning_rate": 2.4012243276712075e-05,
      "loss": 0.8287,
      "step": 486010
    },
    {
      "epoch": 1.7033789301398046,
      "grad_norm": 3.109375,
      "learning_rate": 2.4011594248048373e-05,
      "loss": 0.7343,
      "step": 486020
    },
    {
      "epoch": 1.7034139776467,
      "grad_norm": 3.375,
      "learning_rate": 2.401094521938467e-05,
      "loss": 0.8577,
      "step": 486030
    },
    {
      "epoch": 1.7034490251535956,
      "grad_norm": 2.734375,
      "learning_rate": 2.401029619072097e-05,
      "loss": 0.8337,
      "step": 486040
    },
    {
      "epoch": 1.7034840726604914,
      "grad_norm": 2.71875,
      "learning_rate": 2.4009647162057267e-05,
      "loss": 0.8526,
      "step": 486050
    },
    {
      "epoch": 1.703519120167387,
      "grad_norm": 3.375,
      "learning_rate": 2.4008998133393565e-05,
      "loss": 0.8239,
      "step": 486060
    },
    {
      "epoch": 1.7035541676742825,
      "grad_norm": 2.90625,
      "learning_rate": 2.4008349104729863e-05,
      "loss": 0.755,
      "step": 486070
    },
    {
      "epoch": 1.7035892151811782,
      "grad_norm": 2.625,
      "learning_rate": 2.400770007606616e-05,
      "loss": 0.8042,
      "step": 486080
    },
    {
      "epoch": 1.7036242626880735,
      "grad_norm": 2.828125,
      "learning_rate": 2.400705104740246e-05,
      "loss": 0.7962,
      "step": 486090
    },
    {
      "epoch": 1.7036593101949693,
      "grad_norm": 3.125,
      "learning_rate": 2.4006402018738757e-05,
      "loss": 0.7992,
      "step": 486100
    },
    {
      "epoch": 1.7036943577018648,
      "grad_norm": 3.0,
      "learning_rate": 2.4005752990075055e-05,
      "loss": 0.9048,
      "step": 486110
    },
    {
      "epoch": 1.7037294052087604,
      "grad_norm": 2.953125,
      "learning_rate": 2.400510396141135e-05,
      "loss": 0.887,
      "step": 486120
    },
    {
      "epoch": 1.7037644527156561,
      "grad_norm": 3.59375,
      "learning_rate": 2.400445493274765e-05,
      "loss": 0.8823,
      "step": 486130
    },
    {
      "epoch": 1.7037995002225517,
      "grad_norm": 2.8125,
      "learning_rate": 2.400380590408395e-05,
      "loss": 0.8184,
      "step": 486140
    },
    {
      "epoch": 1.7038345477294472,
      "grad_norm": 2.921875,
      "learning_rate": 2.4003156875420247e-05,
      "loss": 0.8494,
      "step": 486150
    },
    {
      "epoch": 1.703869595236343,
      "grad_norm": 2.890625,
      "learning_rate": 2.4002507846756545e-05,
      "loss": 0.8794,
      "step": 486160
    },
    {
      "epoch": 1.7039046427432385,
      "grad_norm": 2.828125,
      "learning_rate": 2.4001858818092843e-05,
      "loss": 0.8602,
      "step": 486170
    },
    {
      "epoch": 1.703939690250134,
      "grad_norm": 2.65625,
      "learning_rate": 2.400120978942914e-05,
      "loss": 0.8993,
      "step": 486180
    },
    {
      "epoch": 1.7039747377570298,
      "grad_norm": 3.234375,
      "learning_rate": 2.400056076076544e-05,
      "loss": 0.9098,
      "step": 486190
    },
    {
      "epoch": 1.704009785263925,
      "grad_norm": 2.84375,
      "learning_rate": 2.3999911732101737e-05,
      "loss": 0.8428,
      "step": 486200
    },
    {
      "epoch": 1.7040448327708209,
      "grad_norm": 2.71875,
      "learning_rate": 2.3999262703438035e-05,
      "loss": 0.7908,
      "step": 486210
    },
    {
      "epoch": 1.7040798802777164,
      "grad_norm": 3.3125,
      "learning_rate": 2.3998613674774333e-05,
      "loss": 0.8899,
      "step": 486220
    },
    {
      "epoch": 1.704114927784612,
      "grad_norm": 2.8125,
      "learning_rate": 2.399796464611063e-05,
      "loss": 0.8235,
      "step": 486230
    },
    {
      "epoch": 1.7041499752915077,
      "grad_norm": 3.328125,
      "learning_rate": 2.399731561744693e-05,
      "loss": 0.912,
      "step": 486240
    },
    {
      "epoch": 1.7041850227984032,
      "grad_norm": 3.0625,
      "learning_rate": 2.3996666588783227e-05,
      "loss": 0.8283,
      "step": 486250
    },
    {
      "epoch": 1.7042200703052988,
      "grad_norm": 2.953125,
      "learning_rate": 2.3996017560119525e-05,
      "loss": 0.8083,
      "step": 486260
    },
    {
      "epoch": 1.7042551178121945,
      "grad_norm": 2.921875,
      "learning_rate": 2.3995368531455827e-05,
      "loss": 0.7965,
      "step": 486270
    },
    {
      "epoch": 1.70429016531909,
      "grad_norm": 3.0,
      "learning_rate": 2.3994719502792125e-05,
      "loss": 0.8575,
      "step": 486280
    },
    {
      "epoch": 1.7043252128259856,
      "grad_norm": 3.03125,
      "learning_rate": 2.3994070474128423e-05,
      "loss": 0.8239,
      "step": 486290
    },
    {
      "epoch": 1.7043602603328813,
      "grad_norm": 3.5,
      "learning_rate": 2.399342144546472e-05,
      "loss": 0.8351,
      "step": 486300
    },
    {
      "epoch": 1.7043953078397767,
      "grad_norm": 2.984375,
      "learning_rate": 2.3992772416801015e-05,
      "loss": 0.8041,
      "step": 486310
    },
    {
      "epoch": 1.7044303553466724,
      "grad_norm": 3.015625,
      "learning_rate": 2.3992123388137313e-05,
      "loss": 0.8499,
      "step": 486320
    },
    {
      "epoch": 1.704465402853568,
      "grad_norm": 2.671875,
      "learning_rate": 2.399147435947361e-05,
      "loss": 0.8507,
      "step": 486330
    },
    {
      "epoch": 1.7045004503604635,
      "grad_norm": 2.796875,
      "learning_rate": 2.399082533080991e-05,
      "loss": 0.8539,
      "step": 486340
    },
    {
      "epoch": 1.7045354978673593,
      "grad_norm": 2.890625,
      "learning_rate": 2.3990176302146207e-05,
      "loss": 0.8271,
      "step": 486350
    },
    {
      "epoch": 1.7045705453742548,
      "grad_norm": 3.203125,
      "learning_rate": 2.3989527273482505e-05,
      "loss": 0.7452,
      "step": 486360
    },
    {
      "epoch": 1.7046055928811503,
      "grad_norm": 2.4375,
      "learning_rate": 2.3988878244818803e-05,
      "loss": 0.775,
      "step": 486370
    },
    {
      "epoch": 1.704640640388046,
      "grad_norm": 3.25,
      "learning_rate": 2.3988229216155105e-05,
      "loss": 0.9588,
      "step": 486380
    },
    {
      "epoch": 1.7046756878949416,
      "grad_norm": 2.921875,
      "learning_rate": 2.3987580187491403e-05,
      "loss": 0.8118,
      "step": 486390
    },
    {
      "epoch": 1.7047107354018372,
      "grad_norm": 2.828125,
      "learning_rate": 2.39869311588277e-05,
      "loss": 0.8812,
      "step": 486400
    },
    {
      "epoch": 1.704745782908733,
      "grad_norm": 3.078125,
      "learning_rate": 2.3986282130164e-05,
      "loss": 0.8149,
      "step": 486410
    },
    {
      "epoch": 1.7047808304156282,
      "grad_norm": 2.546875,
      "learning_rate": 2.3985633101500297e-05,
      "loss": 0.8226,
      "step": 486420
    },
    {
      "epoch": 1.704815877922524,
      "grad_norm": 3.078125,
      "learning_rate": 2.3984984072836595e-05,
      "loss": 0.8581,
      "step": 486430
    },
    {
      "epoch": 1.7048509254294197,
      "grad_norm": 2.984375,
      "learning_rate": 2.3984335044172893e-05,
      "loss": 0.7879,
      "step": 486440
    },
    {
      "epoch": 1.704885972936315,
      "grad_norm": 2.734375,
      "learning_rate": 2.398368601550919e-05,
      "loss": 0.7984,
      "step": 486450
    },
    {
      "epoch": 1.7049210204432108,
      "grad_norm": 2.75,
      "learning_rate": 2.398303698684549e-05,
      "loss": 0.7647,
      "step": 486460
    },
    {
      "epoch": 1.7049560679501063,
      "grad_norm": 2.765625,
      "learning_rate": 2.3982387958181787e-05,
      "loss": 0.7941,
      "step": 486470
    },
    {
      "epoch": 1.7049911154570019,
      "grad_norm": 3.140625,
      "learning_rate": 2.3981738929518085e-05,
      "loss": 0.8153,
      "step": 486480
    },
    {
      "epoch": 1.7050261629638976,
      "grad_norm": 2.96875,
      "learning_rate": 2.3981089900854383e-05,
      "loss": 0.9064,
      "step": 486490
    },
    {
      "epoch": 1.7050612104707932,
      "grad_norm": 2.71875,
      "learning_rate": 2.398044087219068e-05,
      "loss": 0.8165,
      "step": 486500
    },
    {
      "epoch": 1.7050962579776887,
      "grad_norm": 2.703125,
      "learning_rate": 2.397979184352698e-05,
      "loss": 0.861,
      "step": 486510
    },
    {
      "epoch": 1.7051313054845845,
      "grad_norm": 3.0,
      "learning_rate": 2.3979142814863277e-05,
      "loss": 0.8854,
      "step": 486520
    },
    {
      "epoch": 1.70516635299148,
      "grad_norm": 2.71875,
      "learning_rate": 2.3978493786199575e-05,
      "loss": 0.762,
      "step": 486530
    },
    {
      "epoch": 1.7052014004983755,
      "grad_norm": 2.953125,
      "learning_rate": 2.3977844757535873e-05,
      "loss": 0.7749,
      "step": 486540
    },
    {
      "epoch": 1.7052364480052713,
      "grad_norm": 3.359375,
      "learning_rate": 2.397719572887217e-05,
      "loss": 0.8975,
      "step": 486550
    },
    {
      "epoch": 1.7052714955121666,
      "grad_norm": 3.0,
      "learning_rate": 2.397654670020847e-05,
      "loss": 0.9928,
      "step": 486560
    },
    {
      "epoch": 1.7053065430190624,
      "grad_norm": 2.8125,
      "learning_rate": 2.3975897671544767e-05,
      "loss": 0.8563,
      "step": 486570
    },
    {
      "epoch": 1.705341590525958,
      "grad_norm": 2.765625,
      "learning_rate": 2.3975248642881065e-05,
      "loss": 0.8726,
      "step": 486580
    },
    {
      "epoch": 1.7053766380328534,
      "grad_norm": 2.921875,
      "learning_rate": 2.3974599614217363e-05,
      "loss": 0.8821,
      "step": 486590
    },
    {
      "epoch": 1.7054116855397492,
      "grad_norm": 2.96875,
      "learning_rate": 2.397395058555366e-05,
      "loss": 0.8303,
      "step": 486600
    },
    {
      "epoch": 1.7054467330466447,
      "grad_norm": 3.03125,
      "learning_rate": 2.397330155688996e-05,
      "loss": 0.7237,
      "step": 486610
    },
    {
      "epoch": 1.7054817805535403,
      "grad_norm": 3.03125,
      "learning_rate": 2.3972652528226257e-05,
      "loss": 0.8489,
      "step": 486620
    },
    {
      "epoch": 1.705516828060436,
      "grad_norm": 2.71875,
      "learning_rate": 2.3972003499562558e-05,
      "loss": 0.8241,
      "step": 486630
    },
    {
      "epoch": 1.7055518755673316,
      "grad_norm": 2.734375,
      "learning_rate": 2.3971354470898856e-05,
      "loss": 0.7994,
      "step": 486640
    },
    {
      "epoch": 1.705586923074227,
      "grad_norm": 4.0,
      "learning_rate": 2.3970705442235154e-05,
      "loss": 0.8696,
      "step": 486650
    },
    {
      "epoch": 1.7056219705811229,
      "grad_norm": 3.421875,
      "learning_rate": 2.3970056413571452e-05,
      "loss": 0.8525,
      "step": 486660
    },
    {
      "epoch": 1.7056570180880182,
      "grad_norm": 2.796875,
      "learning_rate": 2.396940738490775e-05,
      "loss": 0.9128,
      "step": 486670
    },
    {
      "epoch": 1.705692065594914,
      "grad_norm": 3.09375,
      "learning_rate": 2.3968758356244048e-05,
      "loss": 0.8949,
      "step": 486680
    },
    {
      "epoch": 1.7057271131018095,
      "grad_norm": 2.71875,
      "learning_rate": 2.3968109327580343e-05,
      "loss": 0.7853,
      "step": 486690
    },
    {
      "epoch": 1.705762160608705,
      "grad_norm": 3.34375,
      "learning_rate": 2.396746029891664e-05,
      "loss": 0.8677,
      "step": 486700
    },
    {
      "epoch": 1.7057972081156008,
      "grad_norm": 3.1875,
      "learning_rate": 2.396681127025294e-05,
      "loss": 0.8396,
      "step": 486710
    },
    {
      "epoch": 1.7058322556224963,
      "grad_norm": 2.78125,
      "learning_rate": 2.3966162241589237e-05,
      "loss": 0.8459,
      "step": 486720
    },
    {
      "epoch": 1.7058673031293918,
      "grad_norm": 4.09375,
      "learning_rate": 2.3965513212925535e-05,
      "loss": 0.8985,
      "step": 486730
    },
    {
      "epoch": 1.7059023506362876,
      "grad_norm": 2.46875,
      "learning_rate": 2.3964864184261833e-05,
      "loss": 0.7522,
      "step": 486740
    },
    {
      "epoch": 1.7059373981431831,
      "grad_norm": 2.96875,
      "learning_rate": 2.3964215155598134e-05,
      "loss": 0.854,
      "step": 486750
    },
    {
      "epoch": 1.7059724456500787,
      "grad_norm": 2.484375,
      "learning_rate": 2.3963566126934432e-05,
      "loss": 0.8256,
      "step": 486760
    },
    {
      "epoch": 1.7060074931569744,
      "grad_norm": 2.453125,
      "learning_rate": 2.396291709827073e-05,
      "loss": 0.7865,
      "step": 486770
    },
    {
      "epoch": 1.7060425406638697,
      "grad_norm": 3.421875,
      "learning_rate": 2.3962268069607028e-05,
      "loss": 0.9587,
      "step": 486780
    },
    {
      "epoch": 1.7060775881707655,
      "grad_norm": 2.75,
      "learning_rate": 2.3961619040943326e-05,
      "loss": 0.8101,
      "step": 486790
    },
    {
      "epoch": 1.706112635677661,
      "grad_norm": 2.84375,
      "learning_rate": 2.3960970012279624e-05,
      "loss": 0.7741,
      "step": 486800
    },
    {
      "epoch": 1.7061476831845566,
      "grad_norm": 2.5,
      "learning_rate": 2.3960320983615922e-05,
      "loss": 0.806,
      "step": 486810
    },
    {
      "epoch": 1.7061827306914523,
      "grad_norm": 2.53125,
      "learning_rate": 2.395967195495222e-05,
      "loss": 0.8198,
      "step": 486820
    },
    {
      "epoch": 1.7062177781983479,
      "grad_norm": 3.1875,
      "learning_rate": 2.3959022926288518e-05,
      "loss": 0.896,
      "step": 486830
    },
    {
      "epoch": 1.7062528257052434,
      "grad_norm": 2.765625,
      "learning_rate": 2.3958373897624816e-05,
      "loss": 0.8769,
      "step": 486840
    },
    {
      "epoch": 1.7062878732121392,
      "grad_norm": 2.625,
      "learning_rate": 2.3957724868961114e-05,
      "loss": 0.9703,
      "step": 486850
    },
    {
      "epoch": 1.7063229207190347,
      "grad_norm": 2.96875,
      "learning_rate": 2.3957075840297412e-05,
      "loss": 0.8783,
      "step": 486860
    },
    {
      "epoch": 1.7063579682259302,
      "grad_norm": 2.453125,
      "learning_rate": 2.395642681163371e-05,
      "loss": 0.848,
      "step": 486870
    },
    {
      "epoch": 1.706393015732826,
      "grad_norm": 3.21875,
      "learning_rate": 2.3955777782970008e-05,
      "loss": 0.8202,
      "step": 486880
    },
    {
      "epoch": 1.7064280632397213,
      "grad_norm": 2.5625,
      "learning_rate": 2.3955128754306306e-05,
      "loss": 0.8583,
      "step": 486890
    },
    {
      "epoch": 1.706463110746617,
      "grad_norm": 2.921875,
      "learning_rate": 2.3954479725642604e-05,
      "loss": 0.8621,
      "step": 486900
    },
    {
      "epoch": 1.7064981582535126,
      "grad_norm": 2.515625,
      "learning_rate": 2.3953830696978902e-05,
      "loss": 0.8282,
      "step": 486910
    },
    {
      "epoch": 1.7065332057604081,
      "grad_norm": 2.8125,
      "learning_rate": 2.39531816683152e-05,
      "loss": 0.8441,
      "step": 486920
    },
    {
      "epoch": 1.7065682532673039,
      "grad_norm": 2.640625,
      "learning_rate": 2.3952532639651498e-05,
      "loss": 0.858,
      "step": 486930
    },
    {
      "epoch": 1.7066033007741994,
      "grad_norm": 3.125,
      "learning_rate": 2.3951883610987796e-05,
      "loss": 0.7771,
      "step": 486940
    },
    {
      "epoch": 1.706638348281095,
      "grad_norm": 2.640625,
      "learning_rate": 2.3951234582324094e-05,
      "loss": 0.8573,
      "step": 486950
    },
    {
      "epoch": 1.7066733957879907,
      "grad_norm": 2.53125,
      "learning_rate": 2.3950585553660392e-05,
      "loss": 0.8899,
      "step": 486960
    },
    {
      "epoch": 1.7067084432948862,
      "grad_norm": 3.359375,
      "learning_rate": 2.394993652499669e-05,
      "loss": 0.852,
      "step": 486970
    },
    {
      "epoch": 1.7067434908017818,
      "grad_norm": 2.90625,
      "learning_rate": 2.3949287496332988e-05,
      "loss": 0.8315,
      "step": 486980
    },
    {
      "epoch": 1.7067785383086775,
      "grad_norm": 2.78125,
      "learning_rate": 2.3948638467669286e-05,
      "loss": 0.8587,
      "step": 486990
    },
    {
      "epoch": 1.7068135858155729,
      "grad_norm": 2.921875,
      "learning_rate": 2.3947989439005587e-05,
      "loss": 0.9491,
      "step": 487000
    },
    {
      "epoch": 1.7068486333224686,
      "grad_norm": 3.0,
      "learning_rate": 2.3947340410341885e-05,
      "loss": 0.8813,
      "step": 487010
    },
    {
      "epoch": 1.7068836808293644,
      "grad_norm": 2.921875,
      "learning_rate": 2.3946691381678183e-05,
      "loss": 0.8372,
      "step": 487020
    },
    {
      "epoch": 1.7069187283362597,
      "grad_norm": 2.90625,
      "learning_rate": 2.394604235301448e-05,
      "loss": 0.9006,
      "step": 487030
    },
    {
      "epoch": 1.7069537758431554,
      "grad_norm": 3.140625,
      "learning_rate": 2.394539332435078e-05,
      "loss": 0.9213,
      "step": 487040
    },
    {
      "epoch": 1.706988823350051,
      "grad_norm": 2.859375,
      "learning_rate": 2.3944744295687077e-05,
      "loss": 0.8368,
      "step": 487050
    },
    {
      "epoch": 1.7070238708569465,
      "grad_norm": 2.921875,
      "learning_rate": 2.3944095267023375e-05,
      "loss": 0.8442,
      "step": 487060
    },
    {
      "epoch": 1.7070589183638423,
      "grad_norm": 3.21875,
      "learning_rate": 2.394344623835967e-05,
      "loss": 0.9117,
      "step": 487070
    },
    {
      "epoch": 1.7070939658707378,
      "grad_norm": 3.359375,
      "learning_rate": 2.3942797209695968e-05,
      "loss": 0.882,
      "step": 487080
    },
    {
      "epoch": 1.7071290133776333,
      "grad_norm": 2.8125,
      "learning_rate": 2.3942148181032266e-05,
      "loss": 0.7911,
      "step": 487090
    },
    {
      "epoch": 1.707164060884529,
      "grad_norm": 2.328125,
      "learning_rate": 2.3941499152368564e-05,
      "loss": 0.7988,
      "step": 487100
    },
    {
      "epoch": 1.7071991083914244,
      "grad_norm": 3.484375,
      "learning_rate": 2.3940850123704865e-05,
      "loss": 0.9179,
      "step": 487110
    },
    {
      "epoch": 1.7072341558983202,
      "grad_norm": 2.984375,
      "learning_rate": 2.3940201095041163e-05,
      "loss": 0.8913,
      "step": 487120
    },
    {
      "epoch": 1.707269203405216,
      "grad_norm": 2.703125,
      "learning_rate": 2.393955206637746e-05,
      "loss": 0.7935,
      "step": 487130
    },
    {
      "epoch": 1.7073042509121112,
      "grad_norm": 2.625,
      "learning_rate": 2.393890303771376e-05,
      "loss": 0.8055,
      "step": 487140
    },
    {
      "epoch": 1.707339298419007,
      "grad_norm": 3.5,
      "learning_rate": 2.3938254009050057e-05,
      "loss": 0.8987,
      "step": 487150
    },
    {
      "epoch": 1.7073743459259025,
      "grad_norm": 2.953125,
      "learning_rate": 2.3937604980386355e-05,
      "loss": 0.8697,
      "step": 487160
    },
    {
      "epoch": 1.707409393432798,
      "grad_norm": 3.078125,
      "learning_rate": 2.3936955951722653e-05,
      "loss": 0.8012,
      "step": 487170
    },
    {
      "epoch": 1.7074444409396938,
      "grad_norm": 2.84375,
      "learning_rate": 2.393630692305895e-05,
      "loss": 0.8118,
      "step": 487180
    },
    {
      "epoch": 1.7074794884465894,
      "grad_norm": 2.65625,
      "learning_rate": 2.393565789439525e-05,
      "loss": 0.8246,
      "step": 487190
    },
    {
      "epoch": 1.707514535953485,
      "grad_norm": 3.03125,
      "learning_rate": 2.3935008865731547e-05,
      "loss": 0.8476,
      "step": 487200
    },
    {
      "epoch": 1.7075495834603807,
      "grad_norm": 3.125,
      "learning_rate": 2.3934359837067845e-05,
      "loss": 0.8406,
      "step": 487210
    },
    {
      "epoch": 1.7075846309672762,
      "grad_norm": 2.78125,
      "learning_rate": 2.3933710808404143e-05,
      "loss": 0.867,
      "step": 487220
    },
    {
      "epoch": 1.7076196784741717,
      "grad_norm": 3.140625,
      "learning_rate": 2.393306177974044e-05,
      "loss": 0.8808,
      "step": 487230
    },
    {
      "epoch": 1.7076547259810675,
      "grad_norm": 2.40625,
      "learning_rate": 2.393241275107674e-05,
      "loss": 0.7571,
      "step": 487240
    },
    {
      "epoch": 1.7076897734879628,
      "grad_norm": 3.109375,
      "learning_rate": 2.3931763722413037e-05,
      "loss": 0.8901,
      "step": 487250
    },
    {
      "epoch": 1.7077248209948586,
      "grad_norm": 3.625,
      "learning_rate": 2.3931114693749335e-05,
      "loss": 0.891,
      "step": 487260
    },
    {
      "epoch": 1.707759868501754,
      "grad_norm": 2.78125,
      "learning_rate": 2.3930465665085633e-05,
      "loss": 0.9469,
      "step": 487270
    },
    {
      "epoch": 1.7077949160086496,
      "grad_norm": 2.90625,
      "learning_rate": 2.392981663642193e-05,
      "loss": 0.8742,
      "step": 487280
    },
    {
      "epoch": 1.7078299635155454,
      "grad_norm": 2.625,
      "learning_rate": 2.392916760775823e-05,
      "loss": 0.9318,
      "step": 487290
    },
    {
      "epoch": 1.707865011022441,
      "grad_norm": 2.9375,
      "learning_rate": 2.3928518579094527e-05,
      "loss": 0.8518,
      "step": 487300
    },
    {
      "epoch": 1.7079000585293365,
      "grad_norm": 2.53125,
      "learning_rate": 2.3927869550430825e-05,
      "loss": 0.8633,
      "step": 487310
    },
    {
      "epoch": 1.7079351060362322,
      "grad_norm": 2.75,
      "learning_rate": 2.3927220521767123e-05,
      "loss": 0.8264,
      "step": 487320
    },
    {
      "epoch": 1.7079701535431278,
      "grad_norm": 2.875,
      "learning_rate": 2.392657149310342e-05,
      "loss": 0.8437,
      "step": 487330
    },
    {
      "epoch": 1.7080052010500233,
      "grad_norm": 3.078125,
      "learning_rate": 2.392592246443972e-05,
      "loss": 0.7707,
      "step": 487340
    },
    {
      "epoch": 1.708040248556919,
      "grad_norm": 2.421875,
      "learning_rate": 2.3925273435776017e-05,
      "loss": 0.8269,
      "step": 487350
    },
    {
      "epoch": 1.7080752960638144,
      "grad_norm": 2.65625,
      "learning_rate": 2.3924624407112315e-05,
      "loss": 0.9119,
      "step": 487360
    },
    {
      "epoch": 1.7081103435707101,
      "grad_norm": 3.546875,
      "learning_rate": 2.3923975378448617e-05,
      "loss": 0.8496,
      "step": 487370
    },
    {
      "epoch": 1.7081453910776057,
      "grad_norm": 2.90625,
      "learning_rate": 2.3923326349784915e-05,
      "loss": 0.8234,
      "step": 487380
    },
    {
      "epoch": 1.7081804385845012,
      "grad_norm": 2.921875,
      "learning_rate": 2.3922677321121213e-05,
      "loss": 0.9488,
      "step": 487390
    },
    {
      "epoch": 1.708215486091397,
      "grad_norm": 2.984375,
      "learning_rate": 2.392202829245751e-05,
      "loss": 0.7631,
      "step": 487400
    },
    {
      "epoch": 1.7082505335982925,
      "grad_norm": 3.34375,
      "learning_rate": 2.392137926379381e-05,
      "loss": 0.7326,
      "step": 487410
    },
    {
      "epoch": 1.708285581105188,
      "grad_norm": 2.453125,
      "learning_rate": 2.3920730235130107e-05,
      "loss": 0.7914,
      "step": 487420
    },
    {
      "epoch": 1.7083206286120838,
      "grad_norm": 3.0625,
      "learning_rate": 2.3920081206466405e-05,
      "loss": 0.7432,
      "step": 487430
    },
    {
      "epoch": 1.7083556761189793,
      "grad_norm": 3.0625,
      "learning_rate": 2.39194321778027e-05,
      "loss": 0.84,
      "step": 487440
    },
    {
      "epoch": 1.7083907236258749,
      "grad_norm": 3.015625,
      "learning_rate": 2.3918783149138997e-05,
      "loss": 0.8222,
      "step": 487450
    },
    {
      "epoch": 1.7084257711327706,
      "grad_norm": 2.828125,
      "learning_rate": 2.3918134120475295e-05,
      "loss": 0.7962,
      "step": 487460
    },
    {
      "epoch": 1.708460818639666,
      "grad_norm": 2.828125,
      "learning_rate": 2.3917485091811593e-05,
      "loss": 0.8174,
      "step": 487470
    },
    {
      "epoch": 1.7084958661465617,
      "grad_norm": 6.9375,
      "learning_rate": 2.3916836063147895e-05,
      "loss": 0.8076,
      "step": 487480
    },
    {
      "epoch": 1.7085309136534572,
      "grad_norm": 2.546875,
      "learning_rate": 2.3916187034484193e-05,
      "loss": 0.7795,
      "step": 487490
    },
    {
      "epoch": 1.7085659611603528,
      "grad_norm": 2.859375,
      "learning_rate": 2.391553800582049e-05,
      "loss": 0.8124,
      "step": 487500
    },
    {
      "epoch": 1.7086010086672485,
      "grad_norm": 2.90625,
      "learning_rate": 2.391488897715679e-05,
      "loss": 0.8618,
      "step": 487510
    },
    {
      "epoch": 1.708636056174144,
      "grad_norm": 2.546875,
      "learning_rate": 2.3914239948493087e-05,
      "loss": 0.7988,
      "step": 487520
    },
    {
      "epoch": 1.7086711036810396,
      "grad_norm": 3.109375,
      "learning_rate": 2.3913590919829385e-05,
      "loss": 0.8476,
      "step": 487530
    },
    {
      "epoch": 1.7087061511879353,
      "grad_norm": 2.9375,
      "learning_rate": 2.3912941891165683e-05,
      "loss": 0.88,
      "step": 487540
    },
    {
      "epoch": 1.7087411986948309,
      "grad_norm": 2.734375,
      "learning_rate": 2.391229286250198e-05,
      "loss": 0.7526,
      "step": 487550
    },
    {
      "epoch": 1.7087762462017264,
      "grad_norm": 2.5,
      "learning_rate": 2.391164383383828e-05,
      "loss": 0.7437,
      "step": 487560
    },
    {
      "epoch": 1.7088112937086222,
      "grad_norm": 2.921875,
      "learning_rate": 2.3910994805174577e-05,
      "loss": 0.7207,
      "step": 487570
    },
    {
      "epoch": 1.7088463412155175,
      "grad_norm": 3.21875,
      "learning_rate": 2.3910345776510875e-05,
      "loss": 0.8548,
      "step": 487580
    },
    {
      "epoch": 1.7088813887224132,
      "grad_norm": 3.078125,
      "learning_rate": 2.3909696747847173e-05,
      "loss": 0.8037,
      "step": 487590
    },
    {
      "epoch": 1.7089164362293088,
      "grad_norm": 2.890625,
      "learning_rate": 2.390904771918347e-05,
      "loss": 0.8525,
      "step": 487600
    },
    {
      "epoch": 1.7089514837362043,
      "grad_norm": 3.015625,
      "learning_rate": 2.390839869051977e-05,
      "loss": 0.9063,
      "step": 487610
    },
    {
      "epoch": 1.7089865312431,
      "grad_norm": 3.140625,
      "learning_rate": 2.390774966185607e-05,
      "loss": 0.7579,
      "step": 487620
    },
    {
      "epoch": 1.7090215787499956,
      "grad_norm": 2.90625,
      "learning_rate": 2.3907100633192365e-05,
      "loss": 0.8283,
      "step": 487630
    },
    {
      "epoch": 1.7090566262568911,
      "grad_norm": 2.84375,
      "learning_rate": 2.3906451604528663e-05,
      "loss": 0.8898,
      "step": 487640
    },
    {
      "epoch": 1.709091673763787,
      "grad_norm": 2.46875,
      "learning_rate": 2.390580257586496e-05,
      "loss": 0.8365,
      "step": 487650
    },
    {
      "epoch": 1.7091267212706824,
      "grad_norm": 2.84375,
      "learning_rate": 2.390515354720126e-05,
      "loss": 0.766,
      "step": 487660
    },
    {
      "epoch": 1.709161768777578,
      "grad_norm": 3.609375,
      "learning_rate": 2.3904504518537557e-05,
      "loss": 0.8993,
      "step": 487670
    },
    {
      "epoch": 1.7091968162844737,
      "grad_norm": 2.734375,
      "learning_rate": 2.3903855489873855e-05,
      "loss": 0.8085,
      "step": 487680
    },
    {
      "epoch": 1.709231863791369,
      "grad_norm": 2.828125,
      "learning_rate": 2.3903206461210153e-05,
      "loss": 0.8067,
      "step": 487690
    },
    {
      "epoch": 1.7092669112982648,
      "grad_norm": 3.046875,
      "learning_rate": 2.390255743254645e-05,
      "loss": 0.8977,
      "step": 487700
    },
    {
      "epoch": 1.7093019588051606,
      "grad_norm": 3.328125,
      "learning_rate": 2.390190840388275e-05,
      "loss": 0.8654,
      "step": 487710
    },
    {
      "epoch": 1.7093370063120559,
      "grad_norm": 2.8125,
      "learning_rate": 2.3901259375219047e-05,
      "loss": 0.8759,
      "step": 487720
    },
    {
      "epoch": 1.7093720538189516,
      "grad_norm": 3.140625,
      "learning_rate": 2.3900610346555348e-05,
      "loss": 0.8323,
      "step": 487730
    },
    {
      "epoch": 1.7094071013258472,
      "grad_norm": 2.8125,
      "learning_rate": 2.3899961317891646e-05,
      "loss": 0.7918,
      "step": 487740
    },
    {
      "epoch": 1.7094421488327427,
      "grad_norm": 2.75,
      "learning_rate": 2.3899312289227944e-05,
      "loss": 0.8072,
      "step": 487750
    },
    {
      "epoch": 1.7094771963396385,
      "grad_norm": 3.03125,
      "learning_rate": 2.3898663260564242e-05,
      "loss": 0.8678,
      "step": 487760
    },
    {
      "epoch": 1.709512243846534,
      "grad_norm": 3.140625,
      "learning_rate": 2.389801423190054e-05,
      "loss": 0.8261,
      "step": 487770
    },
    {
      "epoch": 1.7095472913534295,
      "grad_norm": 3.0,
      "learning_rate": 2.3897365203236838e-05,
      "loss": 0.7847,
      "step": 487780
    },
    {
      "epoch": 1.7095823388603253,
      "grad_norm": 3.015625,
      "learning_rate": 2.3896716174573136e-05,
      "loss": 0.8548,
      "step": 487790
    },
    {
      "epoch": 1.7096173863672206,
      "grad_norm": 3.28125,
      "learning_rate": 2.3896067145909434e-05,
      "loss": 0.8631,
      "step": 487800
    },
    {
      "epoch": 1.7096524338741164,
      "grad_norm": 2.640625,
      "learning_rate": 2.3895418117245732e-05,
      "loss": 0.8484,
      "step": 487810
    },
    {
      "epoch": 1.7096874813810121,
      "grad_norm": 2.875,
      "learning_rate": 2.3894769088582027e-05,
      "loss": 0.8609,
      "step": 487820
    },
    {
      "epoch": 1.7097225288879074,
      "grad_norm": 2.609375,
      "learning_rate": 2.3894120059918325e-05,
      "loss": 0.7773,
      "step": 487830
    },
    {
      "epoch": 1.7097575763948032,
      "grad_norm": 3.078125,
      "learning_rate": 2.3893471031254623e-05,
      "loss": 0.9471,
      "step": 487840
    },
    {
      "epoch": 1.7097926239016987,
      "grad_norm": 2.78125,
      "learning_rate": 2.3892822002590924e-05,
      "loss": 0.7698,
      "step": 487850
    },
    {
      "epoch": 1.7098276714085943,
      "grad_norm": 2.609375,
      "learning_rate": 2.3892172973927222e-05,
      "loss": 0.8181,
      "step": 487860
    },
    {
      "epoch": 1.70986271891549,
      "grad_norm": 3.375,
      "learning_rate": 2.389152394526352e-05,
      "loss": 0.7774,
      "step": 487870
    },
    {
      "epoch": 1.7098977664223856,
      "grad_norm": 2.8125,
      "learning_rate": 2.3890874916599818e-05,
      "loss": 0.8352,
      "step": 487880
    },
    {
      "epoch": 1.709932813929281,
      "grad_norm": 2.671875,
      "learning_rate": 2.3890225887936116e-05,
      "loss": 0.8255,
      "step": 487890
    },
    {
      "epoch": 1.7099678614361769,
      "grad_norm": 2.890625,
      "learning_rate": 2.3889576859272414e-05,
      "loss": 0.8033,
      "step": 487900
    },
    {
      "epoch": 1.7100029089430724,
      "grad_norm": 2.8125,
      "learning_rate": 2.3888927830608712e-05,
      "loss": 0.9162,
      "step": 487910
    },
    {
      "epoch": 1.710037956449968,
      "grad_norm": 2.5,
      "learning_rate": 2.388827880194501e-05,
      "loss": 0.8114,
      "step": 487920
    },
    {
      "epoch": 1.7100730039568637,
      "grad_norm": 2.75,
      "learning_rate": 2.3887629773281308e-05,
      "loss": 0.8735,
      "step": 487930
    },
    {
      "epoch": 1.710108051463759,
      "grad_norm": 2.921875,
      "learning_rate": 2.3886980744617606e-05,
      "loss": 0.8105,
      "step": 487940
    },
    {
      "epoch": 1.7101430989706548,
      "grad_norm": 3.25,
      "learning_rate": 2.3886331715953904e-05,
      "loss": 0.8475,
      "step": 487950
    },
    {
      "epoch": 1.7101781464775503,
      "grad_norm": 2.75,
      "learning_rate": 2.3885682687290202e-05,
      "loss": 0.8749,
      "step": 487960
    },
    {
      "epoch": 1.7102131939844458,
      "grad_norm": 2.59375,
      "learning_rate": 2.38850336586265e-05,
      "loss": 0.752,
      "step": 487970
    },
    {
      "epoch": 1.7102482414913416,
      "grad_norm": 3.125,
      "learning_rate": 2.3884384629962798e-05,
      "loss": 0.79,
      "step": 487980
    },
    {
      "epoch": 1.7102832889982371,
      "grad_norm": 2.921875,
      "learning_rate": 2.38837356012991e-05,
      "loss": 0.8057,
      "step": 487990
    },
    {
      "epoch": 1.7103183365051327,
      "grad_norm": 3.046875,
      "learning_rate": 2.3883086572635398e-05,
      "loss": 0.8283,
      "step": 488000
    },
    {
      "epoch": 1.7103533840120284,
      "grad_norm": 3.265625,
      "learning_rate": 2.3882437543971692e-05,
      "loss": 0.8258,
      "step": 488010
    },
    {
      "epoch": 1.710388431518924,
      "grad_norm": 3.203125,
      "learning_rate": 2.388178851530799e-05,
      "loss": 0.9198,
      "step": 488020
    },
    {
      "epoch": 1.7104234790258195,
      "grad_norm": 2.578125,
      "learning_rate": 2.3881139486644288e-05,
      "loss": 0.8868,
      "step": 488030
    },
    {
      "epoch": 1.7104585265327152,
      "grad_norm": 2.875,
      "learning_rate": 2.3880490457980586e-05,
      "loss": 0.8843,
      "step": 488040
    },
    {
      "epoch": 1.7104935740396106,
      "grad_norm": 2.953125,
      "learning_rate": 2.3879841429316884e-05,
      "loss": 0.8086,
      "step": 488050
    },
    {
      "epoch": 1.7105286215465063,
      "grad_norm": 3.015625,
      "learning_rate": 2.3879192400653182e-05,
      "loss": 0.8531,
      "step": 488060
    },
    {
      "epoch": 1.7105636690534018,
      "grad_norm": 2.5,
      "learning_rate": 2.387854337198948e-05,
      "loss": 0.8395,
      "step": 488070
    },
    {
      "epoch": 1.7105987165602974,
      "grad_norm": 3.328125,
      "learning_rate": 2.3877894343325778e-05,
      "loss": 0.8971,
      "step": 488080
    },
    {
      "epoch": 1.7106337640671931,
      "grad_norm": 2.703125,
      "learning_rate": 2.3877245314662076e-05,
      "loss": 0.7941,
      "step": 488090
    },
    {
      "epoch": 1.7106688115740887,
      "grad_norm": 2.625,
      "learning_rate": 2.3876596285998378e-05,
      "loss": 0.8029,
      "step": 488100
    },
    {
      "epoch": 1.7107038590809842,
      "grad_norm": 2.9375,
      "learning_rate": 2.3875947257334676e-05,
      "loss": 0.8628,
      "step": 488110
    },
    {
      "epoch": 1.71073890658788,
      "grad_norm": 2.65625,
      "learning_rate": 2.3875298228670974e-05,
      "loss": 0.7468,
      "step": 488120
    },
    {
      "epoch": 1.7107739540947755,
      "grad_norm": 2.65625,
      "learning_rate": 2.387464920000727e-05,
      "loss": 0.7624,
      "step": 488130
    },
    {
      "epoch": 1.710809001601671,
      "grad_norm": 3.171875,
      "learning_rate": 2.387400017134357e-05,
      "loss": 0.8276,
      "step": 488140
    },
    {
      "epoch": 1.7108440491085668,
      "grad_norm": 3.421875,
      "learning_rate": 2.3873351142679868e-05,
      "loss": 0.8869,
      "step": 488150
    },
    {
      "epoch": 1.7108790966154621,
      "grad_norm": 2.75,
      "learning_rate": 2.3872702114016166e-05,
      "loss": 0.7954,
      "step": 488160
    },
    {
      "epoch": 1.7109141441223579,
      "grad_norm": 2.359375,
      "learning_rate": 2.3872053085352464e-05,
      "loss": 0.8956,
      "step": 488170
    },
    {
      "epoch": 1.7109491916292534,
      "grad_norm": 2.78125,
      "learning_rate": 2.387140405668876e-05,
      "loss": 0.8195,
      "step": 488180
    },
    {
      "epoch": 1.710984239136149,
      "grad_norm": 2.859375,
      "learning_rate": 2.3870755028025056e-05,
      "loss": 0.7954,
      "step": 488190
    },
    {
      "epoch": 1.7110192866430447,
      "grad_norm": 3.0625,
      "learning_rate": 2.3870105999361354e-05,
      "loss": 0.8011,
      "step": 488200
    },
    {
      "epoch": 1.7110543341499402,
      "grad_norm": 2.890625,
      "learning_rate": 2.3869456970697656e-05,
      "loss": 0.8209,
      "step": 488210
    },
    {
      "epoch": 1.7110893816568358,
      "grad_norm": 3.0,
      "learning_rate": 2.3868807942033954e-05,
      "loss": 0.828,
      "step": 488220
    },
    {
      "epoch": 1.7111244291637315,
      "grad_norm": 3.109375,
      "learning_rate": 2.386815891337025e-05,
      "loss": 0.8336,
      "step": 488230
    },
    {
      "epoch": 1.711159476670627,
      "grad_norm": 2.5,
      "learning_rate": 2.386750988470655e-05,
      "loss": 0.8117,
      "step": 488240
    },
    {
      "epoch": 1.7111945241775226,
      "grad_norm": 2.96875,
      "learning_rate": 2.3866860856042848e-05,
      "loss": 0.7847,
      "step": 488250
    },
    {
      "epoch": 1.7112295716844184,
      "grad_norm": 2.9375,
      "learning_rate": 2.3866211827379146e-05,
      "loss": 0.9448,
      "step": 488260
    },
    {
      "epoch": 1.7112646191913137,
      "grad_norm": 3.21875,
      "learning_rate": 2.3865562798715444e-05,
      "loss": 0.864,
      "step": 488270
    },
    {
      "epoch": 1.7112996666982094,
      "grad_norm": 2.75,
      "learning_rate": 2.386491377005174e-05,
      "loss": 0.9137,
      "step": 488280
    },
    {
      "epoch": 1.711334714205105,
      "grad_norm": 3.234375,
      "learning_rate": 2.386426474138804e-05,
      "loss": 0.8526,
      "step": 488290
    },
    {
      "epoch": 1.7113697617120005,
      "grad_norm": 2.53125,
      "learning_rate": 2.3863615712724338e-05,
      "loss": 0.7878,
      "step": 488300
    },
    {
      "epoch": 1.7114048092188963,
      "grad_norm": 2.828125,
      "learning_rate": 2.3862966684060636e-05,
      "loss": 0.8377,
      "step": 488310
    },
    {
      "epoch": 1.7114398567257918,
      "grad_norm": 2.625,
      "learning_rate": 2.3862317655396934e-05,
      "loss": 0.8445,
      "step": 488320
    },
    {
      "epoch": 1.7114749042326873,
      "grad_norm": 3.015625,
      "learning_rate": 2.386166862673323e-05,
      "loss": 0.8112,
      "step": 488330
    },
    {
      "epoch": 1.711509951739583,
      "grad_norm": 3.0,
      "learning_rate": 2.386101959806953e-05,
      "loss": 0.9094,
      "step": 488340
    },
    {
      "epoch": 1.7115449992464786,
      "grad_norm": 3.203125,
      "learning_rate": 2.386037056940583e-05,
      "loss": 0.8226,
      "step": 488350
    },
    {
      "epoch": 1.7115800467533742,
      "grad_norm": 2.9375,
      "learning_rate": 2.385972154074213e-05,
      "loss": 0.8216,
      "step": 488360
    },
    {
      "epoch": 1.71161509426027,
      "grad_norm": 2.671875,
      "learning_rate": 2.3859072512078427e-05,
      "loss": 0.8212,
      "step": 488370
    },
    {
      "epoch": 1.7116501417671652,
      "grad_norm": 3.015625,
      "learning_rate": 2.385842348341472e-05,
      "loss": 0.8081,
      "step": 488380
    },
    {
      "epoch": 1.711685189274061,
      "grad_norm": 2.875,
      "learning_rate": 2.385777445475102e-05,
      "loss": 0.8042,
      "step": 488390
    },
    {
      "epoch": 1.7117202367809567,
      "grad_norm": 2.96875,
      "learning_rate": 2.3857125426087318e-05,
      "loss": 0.802,
      "step": 488400
    },
    {
      "epoch": 1.711755284287852,
      "grad_norm": 3.21875,
      "learning_rate": 2.3856476397423616e-05,
      "loss": 0.8931,
      "step": 488410
    },
    {
      "epoch": 1.7117903317947478,
      "grad_norm": 2.5625,
      "learning_rate": 2.3855827368759914e-05,
      "loss": 0.8961,
      "step": 488420
    },
    {
      "epoch": 1.7118253793016434,
      "grad_norm": 2.734375,
      "learning_rate": 2.385517834009621e-05,
      "loss": 0.8521,
      "step": 488430
    },
    {
      "epoch": 1.711860426808539,
      "grad_norm": 2.859375,
      "learning_rate": 2.385452931143251e-05,
      "loss": 0.8351,
      "step": 488440
    },
    {
      "epoch": 1.7118954743154347,
      "grad_norm": 2.640625,
      "learning_rate": 2.3853880282768808e-05,
      "loss": 0.8422,
      "step": 488450
    },
    {
      "epoch": 1.7119305218223302,
      "grad_norm": 2.890625,
      "learning_rate": 2.3853231254105106e-05,
      "loss": 0.7562,
      "step": 488460
    },
    {
      "epoch": 1.7119655693292257,
      "grad_norm": 2.9375,
      "learning_rate": 2.3852582225441407e-05,
      "loss": 0.8828,
      "step": 488470
    },
    {
      "epoch": 1.7120006168361215,
      "grad_norm": 2.8125,
      "learning_rate": 2.3851933196777705e-05,
      "loss": 0.7765,
      "step": 488480
    },
    {
      "epoch": 1.7120356643430168,
      "grad_norm": 2.859375,
      "learning_rate": 2.3851284168114003e-05,
      "loss": 0.8536,
      "step": 488490
    },
    {
      "epoch": 1.7120707118499126,
      "grad_norm": 2.8125,
      "learning_rate": 2.38506351394503e-05,
      "loss": 0.8642,
      "step": 488500
    },
    {
      "epoch": 1.7121057593568083,
      "grad_norm": 3.234375,
      "learning_rate": 2.38499861107866e-05,
      "loss": 0.8892,
      "step": 488510
    },
    {
      "epoch": 1.7121408068637036,
      "grad_norm": 2.890625,
      "learning_rate": 2.3849337082122897e-05,
      "loss": 0.8958,
      "step": 488520
    },
    {
      "epoch": 1.7121758543705994,
      "grad_norm": 3.1875,
      "learning_rate": 2.3848688053459195e-05,
      "loss": 0.8274,
      "step": 488530
    },
    {
      "epoch": 1.712210901877495,
      "grad_norm": 2.5625,
      "learning_rate": 2.3848039024795493e-05,
      "loss": 0.7949,
      "step": 488540
    },
    {
      "epoch": 1.7122459493843905,
      "grad_norm": 3.265625,
      "learning_rate": 2.384738999613179e-05,
      "loss": 0.8009,
      "step": 488550
    },
    {
      "epoch": 1.7122809968912862,
      "grad_norm": 3.15625,
      "learning_rate": 2.384674096746809e-05,
      "loss": 0.8659,
      "step": 488560
    },
    {
      "epoch": 1.7123160443981817,
      "grad_norm": 3.046875,
      "learning_rate": 2.3846091938804384e-05,
      "loss": 0.7891,
      "step": 488570
    },
    {
      "epoch": 1.7123510919050773,
      "grad_norm": 3.015625,
      "learning_rate": 2.3845442910140685e-05,
      "loss": 0.7589,
      "step": 488580
    },
    {
      "epoch": 1.712386139411973,
      "grad_norm": 2.8125,
      "learning_rate": 2.3844793881476983e-05,
      "loss": 0.8037,
      "step": 488590
    },
    {
      "epoch": 1.7124211869188686,
      "grad_norm": 2.703125,
      "learning_rate": 2.384414485281328e-05,
      "loss": 0.788,
      "step": 488600
    },
    {
      "epoch": 1.7124562344257641,
      "grad_norm": 2.6875,
      "learning_rate": 2.384349582414958e-05,
      "loss": 0.8423,
      "step": 488610
    },
    {
      "epoch": 1.7124912819326599,
      "grad_norm": 2.671875,
      "learning_rate": 2.3842846795485877e-05,
      "loss": 0.8081,
      "step": 488620
    },
    {
      "epoch": 1.7125263294395552,
      "grad_norm": 2.546875,
      "learning_rate": 2.3842197766822175e-05,
      "loss": 0.7822,
      "step": 488630
    },
    {
      "epoch": 1.712561376946451,
      "grad_norm": 2.859375,
      "learning_rate": 2.3841548738158473e-05,
      "loss": 0.8537,
      "step": 488640
    },
    {
      "epoch": 1.7125964244533465,
      "grad_norm": 2.9375,
      "learning_rate": 2.384089970949477e-05,
      "loss": 0.7997,
      "step": 488650
    },
    {
      "epoch": 1.712631471960242,
      "grad_norm": 3.609375,
      "learning_rate": 2.384025068083107e-05,
      "loss": 0.8239,
      "step": 488660
    },
    {
      "epoch": 1.7126665194671378,
      "grad_norm": 3.046875,
      "learning_rate": 2.3839601652167367e-05,
      "loss": 0.8545,
      "step": 488670
    },
    {
      "epoch": 1.7127015669740333,
      "grad_norm": 3.1875,
      "learning_rate": 2.3838952623503665e-05,
      "loss": 0.9034,
      "step": 488680
    },
    {
      "epoch": 1.7127366144809288,
      "grad_norm": 3.546875,
      "learning_rate": 2.3838303594839963e-05,
      "loss": 0.8698,
      "step": 488690
    },
    {
      "epoch": 1.7127716619878246,
      "grad_norm": 3.0625,
      "learning_rate": 2.383765456617626e-05,
      "loss": 0.7683,
      "step": 488700
    },
    {
      "epoch": 1.7128067094947201,
      "grad_norm": 2.65625,
      "learning_rate": 2.383700553751256e-05,
      "loss": 0.812,
      "step": 488710
    },
    {
      "epoch": 1.7128417570016157,
      "grad_norm": 2.6875,
      "learning_rate": 2.383635650884886e-05,
      "loss": 0.8485,
      "step": 488720
    },
    {
      "epoch": 1.7128768045085114,
      "grad_norm": 3.171875,
      "learning_rate": 2.383570748018516e-05,
      "loss": 0.8348,
      "step": 488730
    },
    {
      "epoch": 1.7129118520154067,
      "grad_norm": 3.15625,
      "learning_rate": 2.3835058451521456e-05,
      "loss": 0.9004,
      "step": 488740
    },
    {
      "epoch": 1.7129468995223025,
      "grad_norm": 3.1875,
      "learning_rate": 2.3834409422857754e-05,
      "loss": 0.8985,
      "step": 488750
    },
    {
      "epoch": 1.712981947029198,
      "grad_norm": 2.9375,
      "learning_rate": 2.383376039419405e-05,
      "loss": 0.8526,
      "step": 488760
    },
    {
      "epoch": 1.7130169945360936,
      "grad_norm": 3.0,
      "learning_rate": 2.3833111365530347e-05,
      "loss": 0.8832,
      "step": 488770
    },
    {
      "epoch": 1.7130520420429893,
      "grad_norm": 2.640625,
      "learning_rate": 2.3832462336866645e-05,
      "loss": 0.8494,
      "step": 488780
    },
    {
      "epoch": 1.7130870895498849,
      "grad_norm": 3.3125,
      "learning_rate": 2.3831813308202943e-05,
      "loss": 0.8349,
      "step": 488790
    },
    {
      "epoch": 1.7131221370567804,
      "grad_norm": 3.140625,
      "learning_rate": 2.383116427953924e-05,
      "loss": 0.8487,
      "step": 488800
    },
    {
      "epoch": 1.7131571845636762,
      "grad_norm": 2.46875,
      "learning_rate": 2.383051525087554e-05,
      "loss": 0.7813,
      "step": 488810
    },
    {
      "epoch": 1.7131922320705717,
      "grad_norm": 2.609375,
      "learning_rate": 2.3829866222211837e-05,
      "loss": 0.8015,
      "step": 488820
    },
    {
      "epoch": 1.7132272795774672,
      "grad_norm": 2.8125,
      "learning_rate": 2.382921719354814e-05,
      "loss": 0.8258,
      "step": 488830
    },
    {
      "epoch": 1.713262327084363,
      "grad_norm": 2.75,
      "learning_rate": 2.3828568164884436e-05,
      "loss": 0.8244,
      "step": 488840
    },
    {
      "epoch": 1.7132973745912583,
      "grad_norm": 3.015625,
      "learning_rate": 2.3827919136220734e-05,
      "loss": 0.8827,
      "step": 488850
    },
    {
      "epoch": 1.713332422098154,
      "grad_norm": 2.171875,
      "learning_rate": 2.3827270107557032e-05,
      "loss": 0.8644,
      "step": 488860
    },
    {
      "epoch": 1.7133674696050496,
      "grad_norm": 3.015625,
      "learning_rate": 2.382662107889333e-05,
      "loss": 0.8681,
      "step": 488870
    },
    {
      "epoch": 1.7134025171119451,
      "grad_norm": 2.640625,
      "learning_rate": 2.382597205022963e-05,
      "loss": 0.7669,
      "step": 488880
    },
    {
      "epoch": 1.713437564618841,
      "grad_norm": 3.265625,
      "learning_rate": 2.3825323021565926e-05,
      "loss": 0.8919,
      "step": 488890
    },
    {
      "epoch": 1.7134726121257364,
      "grad_norm": 3.140625,
      "learning_rate": 2.3824673992902224e-05,
      "loss": 0.9255,
      "step": 488900
    },
    {
      "epoch": 1.713507659632632,
      "grad_norm": 2.65625,
      "learning_rate": 2.3824024964238522e-05,
      "loss": 0.8605,
      "step": 488910
    },
    {
      "epoch": 1.7135427071395277,
      "grad_norm": 2.59375,
      "learning_rate": 2.382337593557482e-05,
      "loss": 0.8159,
      "step": 488920
    },
    {
      "epoch": 1.7135777546464233,
      "grad_norm": 3.0625,
      "learning_rate": 2.382272690691112e-05,
      "loss": 0.8173,
      "step": 488930
    },
    {
      "epoch": 1.7136128021533188,
      "grad_norm": 2.875,
      "learning_rate": 2.3822077878247416e-05,
      "loss": 0.8408,
      "step": 488940
    },
    {
      "epoch": 1.7136478496602146,
      "grad_norm": 3.09375,
      "learning_rate": 2.3821428849583714e-05,
      "loss": 0.8397,
      "step": 488950
    },
    {
      "epoch": 1.7136828971671099,
      "grad_norm": 2.78125,
      "learning_rate": 2.3820779820920012e-05,
      "loss": 0.8236,
      "step": 488960
    },
    {
      "epoch": 1.7137179446740056,
      "grad_norm": 2.890625,
      "learning_rate": 2.382013079225631e-05,
      "loss": 0.8093,
      "step": 488970
    },
    {
      "epoch": 1.7137529921809012,
      "grad_norm": 2.65625,
      "learning_rate": 2.381948176359261e-05,
      "loss": 0.7956,
      "step": 488980
    },
    {
      "epoch": 1.7137880396877967,
      "grad_norm": 2.84375,
      "learning_rate": 2.3818832734928906e-05,
      "loss": 0.8866,
      "step": 488990
    },
    {
      "epoch": 1.7138230871946925,
      "grad_norm": 3.40625,
      "learning_rate": 2.3818183706265204e-05,
      "loss": 0.8396,
      "step": 489000
    },
    {
      "epoch": 1.713858134701588,
      "grad_norm": 2.828125,
      "learning_rate": 2.3817534677601502e-05,
      "loss": 0.8397,
      "step": 489010
    },
    {
      "epoch": 1.7138931822084835,
      "grad_norm": 3.171875,
      "learning_rate": 2.38168856489378e-05,
      "loss": 0.8251,
      "step": 489020
    },
    {
      "epoch": 1.7139282297153793,
      "grad_norm": 2.75,
      "learning_rate": 2.38162366202741e-05,
      "loss": 0.8759,
      "step": 489030
    },
    {
      "epoch": 1.7139632772222748,
      "grad_norm": 2.59375,
      "learning_rate": 2.3815587591610396e-05,
      "loss": 0.813,
      "step": 489040
    },
    {
      "epoch": 1.7139983247291704,
      "grad_norm": 2.671875,
      "learning_rate": 2.3814938562946694e-05,
      "loss": 0.7293,
      "step": 489050
    },
    {
      "epoch": 1.714033372236066,
      "grad_norm": 3.078125,
      "learning_rate": 2.3814289534282992e-05,
      "loss": 0.843,
      "step": 489060
    },
    {
      "epoch": 1.7140684197429614,
      "grad_norm": 2.703125,
      "learning_rate": 2.381364050561929e-05,
      "loss": 0.8347,
      "step": 489070
    },
    {
      "epoch": 1.7141034672498572,
      "grad_norm": 3.046875,
      "learning_rate": 2.3812991476955592e-05,
      "loss": 0.7841,
      "step": 489080
    },
    {
      "epoch": 1.714138514756753,
      "grad_norm": 2.953125,
      "learning_rate": 2.381234244829189e-05,
      "loss": 0.877,
      "step": 489090
    },
    {
      "epoch": 1.7141735622636483,
      "grad_norm": 2.75,
      "learning_rate": 2.3811693419628188e-05,
      "loss": 0.7861,
      "step": 489100
    },
    {
      "epoch": 1.714208609770544,
      "grad_norm": 3.28125,
      "learning_rate": 2.3811044390964486e-05,
      "loss": 0.7344,
      "step": 489110
    },
    {
      "epoch": 1.7142436572774395,
      "grad_norm": 2.53125,
      "learning_rate": 2.3810395362300784e-05,
      "loss": 0.7928,
      "step": 489120
    },
    {
      "epoch": 1.714278704784335,
      "grad_norm": 2.5625,
      "learning_rate": 2.380974633363708e-05,
      "loss": 0.8073,
      "step": 489130
    },
    {
      "epoch": 1.7143137522912308,
      "grad_norm": 3.296875,
      "learning_rate": 2.3809097304973376e-05,
      "loss": 0.8616,
      "step": 489140
    },
    {
      "epoch": 1.7143487997981264,
      "grad_norm": 3.140625,
      "learning_rate": 2.3808448276309674e-05,
      "loss": 0.9172,
      "step": 489150
    },
    {
      "epoch": 1.714383847305022,
      "grad_norm": 2.59375,
      "learning_rate": 2.3807799247645972e-05,
      "loss": 0.7683,
      "step": 489160
    },
    {
      "epoch": 1.7144188948119177,
      "grad_norm": 2.734375,
      "learning_rate": 2.380715021898227e-05,
      "loss": 0.7465,
      "step": 489170
    },
    {
      "epoch": 1.7144539423188132,
      "grad_norm": 2.84375,
      "learning_rate": 2.380650119031857e-05,
      "loss": 0.8467,
      "step": 489180
    },
    {
      "epoch": 1.7144889898257087,
      "grad_norm": 3.34375,
      "learning_rate": 2.3805852161654866e-05,
      "loss": 0.9452,
      "step": 489190
    },
    {
      "epoch": 1.7145240373326045,
      "grad_norm": 3.34375,
      "learning_rate": 2.3805203132991168e-05,
      "loss": 0.8531,
      "step": 489200
    },
    {
      "epoch": 1.7145590848394998,
      "grad_norm": 2.734375,
      "learning_rate": 2.3804554104327466e-05,
      "loss": 0.7658,
      "step": 489210
    },
    {
      "epoch": 1.7145941323463956,
      "grad_norm": 3.171875,
      "learning_rate": 2.3803905075663764e-05,
      "loss": 0.8705,
      "step": 489220
    },
    {
      "epoch": 1.714629179853291,
      "grad_norm": 2.609375,
      "learning_rate": 2.3803256047000062e-05,
      "loss": 0.8248,
      "step": 489230
    },
    {
      "epoch": 1.7146642273601866,
      "grad_norm": 3.1875,
      "learning_rate": 2.380260701833636e-05,
      "loss": 0.9584,
      "step": 489240
    },
    {
      "epoch": 1.7146992748670824,
      "grad_norm": 2.875,
      "learning_rate": 2.3801957989672658e-05,
      "loss": 0.8459,
      "step": 489250
    },
    {
      "epoch": 1.714734322373978,
      "grad_norm": 2.84375,
      "learning_rate": 2.3801308961008956e-05,
      "loss": 0.8495,
      "step": 489260
    },
    {
      "epoch": 1.7147693698808735,
      "grad_norm": 2.96875,
      "learning_rate": 2.3800659932345254e-05,
      "loss": 0.8485,
      "step": 489270
    },
    {
      "epoch": 1.7148044173877692,
      "grad_norm": 2.640625,
      "learning_rate": 2.3800010903681552e-05,
      "loss": 0.8257,
      "step": 489280
    },
    {
      "epoch": 1.7148394648946648,
      "grad_norm": 2.8125,
      "learning_rate": 2.379936187501785e-05,
      "loss": 0.9463,
      "step": 489290
    },
    {
      "epoch": 1.7148745124015603,
      "grad_norm": 2.984375,
      "learning_rate": 2.3798712846354148e-05,
      "loss": 0.8582,
      "step": 489300
    },
    {
      "epoch": 1.714909559908456,
      "grad_norm": 2.6875,
      "learning_rate": 2.3798063817690446e-05,
      "loss": 0.7745,
      "step": 489310
    },
    {
      "epoch": 1.7149446074153514,
      "grad_norm": 2.984375,
      "learning_rate": 2.3797414789026744e-05,
      "loss": 0.8128,
      "step": 489320
    },
    {
      "epoch": 1.7149796549222471,
      "grad_norm": 2.96875,
      "learning_rate": 2.3796765760363042e-05,
      "loss": 0.7629,
      "step": 489330
    },
    {
      "epoch": 1.7150147024291427,
      "grad_norm": 3.21875,
      "learning_rate": 2.379611673169934e-05,
      "loss": 0.8191,
      "step": 489340
    },
    {
      "epoch": 1.7150497499360382,
      "grad_norm": 3.140625,
      "learning_rate": 2.3795467703035638e-05,
      "loss": 0.8275,
      "step": 489350
    },
    {
      "epoch": 1.715084797442934,
      "grad_norm": 2.625,
      "learning_rate": 2.3794818674371936e-05,
      "loss": 0.7513,
      "step": 489360
    },
    {
      "epoch": 1.7151198449498295,
      "grad_norm": 3.265625,
      "learning_rate": 2.3794169645708234e-05,
      "loss": 0.8813,
      "step": 489370
    },
    {
      "epoch": 1.715154892456725,
      "grad_norm": 2.59375,
      "learning_rate": 2.3793520617044532e-05,
      "loss": 0.7683,
      "step": 489380
    },
    {
      "epoch": 1.7151899399636208,
      "grad_norm": 2.828125,
      "learning_rate": 2.379287158838083e-05,
      "loss": 0.7924,
      "step": 489390
    },
    {
      "epoch": 1.7152249874705163,
      "grad_norm": 5.40625,
      "learning_rate": 2.3792222559717128e-05,
      "loss": 0.8174,
      "step": 489400
    },
    {
      "epoch": 1.7152600349774119,
      "grad_norm": 3.1875,
      "learning_rate": 2.3791573531053426e-05,
      "loss": 0.8629,
      "step": 489410
    },
    {
      "epoch": 1.7152950824843076,
      "grad_norm": 2.796875,
      "learning_rate": 2.3790924502389724e-05,
      "loss": 0.8708,
      "step": 489420
    },
    {
      "epoch": 1.715330129991203,
      "grad_norm": 2.453125,
      "learning_rate": 2.3790275473726022e-05,
      "loss": 0.8961,
      "step": 489430
    },
    {
      "epoch": 1.7153651774980987,
      "grad_norm": 2.703125,
      "learning_rate": 2.378962644506232e-05,
      "loss": 0.8844,
      "step": 489440
    },
    {
      "epoch": 1.7154002250049942,
      "grad_norm": 2.71875,
      "learning_rate": 2.378897741639862e-05,
      "loss": 0.7907,
      "step": 489450
    },
    {
      "epoch": 1.7154352725118898,
      "grad_norm": 3.046875,
      "learning_rate": 2.378832838773492e-05,
      "loss": 0.7769,
      "step": 489460
    },
    {
      "epoch": 1.7154703200187855,
      "grad_norm": 3.234375,
      "learning_rate": 2.3787679359071217e-05,
      "loss": 0.8496,
      "step": 489470
    },
    {
      "epoch": 1.715505367525681,
      "grad_norm": 3.28125,
      "learning_rate": 2.3787030330407515e-05,
      "loss": 0.8458,
      "step": 489480
    },
    {
      "epoch": 1.7155404150325766,
      "grad_norm": 3.125,
      "learning_rate": 2.3786381301743813e-05,
      "loss": 0.8695,
      "step": 489490
    },
    {
      "epoch": 1.7155754625394724,
      "grad_norm": 2.859375,
      "learning_rate": 2.378573227308011e-05,
      "loss": 0.792,
      "step": 489500
    },
    {
      "epoch": 1.7156105100463679,
      "grad_norm": 2.96875,
      "learning_rate": 2.3785083244416406e-05,
      "loss": 0.7045,
      "step": 489510
    },
    {
      "epoch": 1.7156455575532634,
      "grad_norm": 2.8125,
      "learning_rate": 2.3784434215752704e-05,
      "loss": 0.8256,
      "step": 489520
    },
    {
      "epoch": 1.7156806050601592,
      "grad_norm": 3.15625,
      "learning_rate": 2.3783785187089002e-05,
      "loss": 0.8295,
      "step": 489530
    },
    {
      "epoch": 1.7157156525670545,
      "grad_norm": 2.5625,
      "learning_rate": 2.37831361584253e-05,
      "loss": 0.8795,
      "step": 489540
    },
    {
      "epoch": 1.7157507000739503,
      "grad_norm": 3.3125,
      "learning_rate": 2.3782487129761598e-05,
      "loss": 0.8284,
      "step": 489550
    },
    {
      "epoch": 1.7157857475808458,
      "grad_norm": 3.078125,
      "learning_rate": 2.3781838101097896e-05,
      "loss": 0.7825,
      "step": 489560
    },
    {
      "epoch": 1.7158207950877413,
      "grad_norm": 2.703125,
      "learning_rate": 2.3781189072434197e-05,
      "loss": 0.8228,
      "step": 489570
    },
    {
      "epoch": 1.715855842594637,
      "grad_norm": 3.25,
      "learning_rate": 2.3780540043770495e-05,
      "loss": 0.7903,
      "step": 489580
    },
    {
      "epoch": 1.7158908901015326,
      "grad_norm": 3.265625,
      "learning_rate": 2.3779891015106793e-05,
      "loss": 0.8696,
      "step": 489590
    },
    {
      "epoch": 1.7159259376084282,
      "grad_norm": 2.59375,
      "learning_rate": 2.377924198644309e-05,
      "loss": 0.897,
      "step": 489600
    },
    {
      "epoch": 1.715960985115324,
      "grad_norm": 2.5,
      "learning_rate": 2.377859295777939e-05,
      "loss": 0.8122,
      "step": 489610
    },
    {
      "epoch": 1.7159960326222194,
      "grad_norm": 3.015625,
      "learning_rate": 2.3777943929115687e-05,
      "loss": 0.8492,
      "step": 489620
    },
    {
      "epoch": 1.716031080129115,
      "grad_norm": 2.703125,
      "learning_rate": 2.3777294900451985e-05,
      "loss": 0.8562,
      "step": 489630
    },
    {
      "epoch": 1.7160661276360107,
      "grad_norm": 2.796875,
      "learning_rate": 2.3776645871788283e-05,
      "loss": 0.8892,
      "step": 489640
    },
    {
      "epoch": 1.716101175142906,
      "grad_norm": 2.6875,
      "learning_rate": 2.377599684312458e-05,
      "loss": 0.8183,
      "step": 489650
    },
    {
      "epoch": 1.7161362226498018,
      "grad_norm": 3.25,
      "learning_rate": 2.377534781446088e-05,
      "loss": 0.8025,
      "step": 489660
    },
    {
      "epoch": 1.7161712701566973,
      "grad_norm": 2.953125,
      "learning_rate": 2.3774698785797177e-05,
      "loss": 0.8437,
      "step": 489670
    },
    {
      "epoch": 1.7162063176635929,
      "grad_norm": 2.3125,
      "learning_rate": 2.3774049757133475e-05,
      "loss": 0.7301,
      "step": 489680
    },
    {
      "epoch": 1.7162413651704886,
      "grad_norm": 2.890625,
      "learning_rate": 2.3773400728469773e-05,
      "loss": 0.7917,
      "step": 489690
    },
    {
      "epoch": 1.7162764126773842,
      "grad_norm": 2.8125,
      "learning_rate": 2.377275169980607e-05,
      "loss": 0.8174,
      "step": 489700
    },
    {
      "epoch": 1.7163114601842797,
      "grad_norm": 2.453125,
      "learning_rate": 2.377210267114237e-05,
      "loss": 0.8274,
      "step": 489710
    },
    {
      "epoch": 1.7163465076911755,
      "grad_norm": 3.0625,
      "learning_rate": 2.3771453642478667e-05,
      "loss": 0.8596,
      "step": 489720
    },
    {
      "epoch": 1.716381555198071,
      "grad_norm": 3.0,
      "learning_rate": 2.3770804613814965e-05,
      "loss": 0.7641,
      "step": 489730
    },
    {
      "epoch": 1.7164166027049665,
      "grad_norm": 2.984375,
      "learning_rate": 2.3770155585151263e-05,
      "loss": 0.8155,
      "step": 489740
    },
    {
      "epoch": 1.7164516502118623,
      "grad_norm": 3.25,
      "learning_rate": 2.376950655648756e-05,
      "loss": 0.8437,
      "step": 489750
    },
    {
      "epoch": 1.7164866977187576,
      "grad_norm": 3.0,
      "learning_rate": 2.376885752782386e-05,
      "loss": 0.9458,
      "step": 489760
    },
    {
      "epoch": 1.7165217452256534,
      "grad_norm": 2.625,
      "learning_rate": 2.3768208499160157e-05,
      "loss": 0.8035,
      "step": 489770
    },
    {
      "epoch": 1.7165567927325491,
      "grad_norm": 3.15625,
      "learning_rate": 2.3767559470496455e-05,
      "loss": 0.8954,
      "step": 489780
    },
    {
      "epoch": 1.7165918402394444,
      "grad_norm": 3.09375,
      "learning_rate": 2.3766910441832753e-05,
      "loss": 0.7558,
      "step": 489790
    },
    {
      "epoch": 1.7166268877463402,
      "grad_norm": 2.921875,
      "learning_rate": 2.376626141316905e-05,
      "loss": 0.8249,
      "step": 489800
    },
    {
      "epoch": 1.7166619352532357,
      "grad_norm": 3.109375,
      "learning_rate": 2.376561238450535e-05,
      "loss": 0.8858,
      "step": 489810
    },
    {
      "epoch": 1.7166969827601313,
      "grad_norm": 2.65625,
      "learning_rate": 2.376496335584165e-05,
      "loss": 0.7909,
      "step": 489820
    },
    {
      "epoch": 1.716732030267027,
      "grad_norm": 2.96875,
      "learning_rate": 2.376431432717795e-05,
      "loss": 0.8282,
      "step": 489830
    },
    {
      "epoch": 1.7167670777739226,
      "grad_norm": 2.46875,
      "learning_rate": 2.3763665298514247e-05,
      "loss": 0.8691,
      "step": 489840
    },
    {
      "epoch": 1.716802125280818,
      "grad_norm": 3.359375,
      "learning_rate": 2.3763016269850545e-05,
      "loss": 0.8378,
      "step": 489850
    },
    {
      "epoch": 1.7168371727877139,
      "grad_norm": 2.953125,
      "learning_rate": 2.3762367241186843e-05,
      "loss": 0.9458,
      "step": 489860
    },
    {
      "epoch": 1.7168722202946094,
      "grad_norm": 2.75,
      "learning_rate": 2.376171821252314e-05,
      "loss": 0.7542,
      "step": 489870
    },
    {
      "epoch": 1.716907267801505,
      "grad_norm": 3.078125,
      "learning_rate": 2.376106918385944e-05,
      "loss": 0.8178,
      "step": 489880
    },
    {
      "epoch": 1.7169423153084007,
      "grad_norm": 2.96875,
      "learning_rate": 2.3760420155195733e-05,
      "loss": 0.7946,
      "step": 489890
    },
    {
      "epoch": 1.716977362815296,
      "grad_norm": 2.828125,
      "learning_rate": 2.375977112653203e-05,
      "loss": 0.7844,
      "step": 489900
    },
    {
      "epoch": 1.7170124103221918,
      "grad_norm": 2.953125,
      "learning_rate": 2.375912209786833e-05,
      "loss": 0.7867,
      "step": 489910
    },
    {
      "epoch": 1.7170474578290873,
      "grad_norm": 2.875,
      "learning_rate": 2.3758473069204627e-05,
      "loss": 0.8082,
      "step": 489920
    },
    {
      "epoch": 1.7170825053359828,
      "grad_norm": 3.015625,
      "learning_rate": 2.375782404054093e-05,
      "loss": 0.8924,
      "step": 489930
    },
    {
      "epoch": 1.7171175528428786,
      "grad_norm": 2.515625,
      "learning_rate": 2.3757175011877227e-05,
      "loss": 0.8418,
      "step": 489940
    },
    {
      "epoch": 1.7171526003497741,
      "grad_norm": 3.421875,
      "learning_rate": 2.3756525983213525e-05,
      "loss": 0.9019,
      "step": 489950
    },
    {
      "epoch": 1.7171876478566697,
      "grad_norm": 2.609375,
      "learning_rate": 2.3755876954549823e-05,
      "loss": 0.804,
      "step": 489960
    },
    {
      "epoch": 1.7172226953635654,
      "grad_norm": 2.765625,
      "learning_rate": 2.375522792588612e-05,
      "loss": 0.904,
      "step": 489970
    },
    {
      "epoch": 1.717257742870461,
      "grad_norm": 2.921875,
      "learning_rate": 2.375457889722242e-05,
      "loss": 0.8845,
      "step": 489980
    },
    {
      "epoch": 1.7172927903773565,
      "grad_norm": 2.859375,
      "learning_rate": 2.3753929868558717e-05,
      "loss": 0.8033,
      "step": 489990
    },
    {
      "epoch": 1.7173278378842523,
      "grad_norm": 3.21875,
      "learning_rate": 2.3753280839895015e-05,
      "loss": 0.8449,
      "step": 490000
    },
    {
      "epoch": 1.7173278378842523,
      "eval_loss": 0.7808954119682312,
      "eval_runtime": 556.856,
      "eval_samples_per_second": 683.186,
      "eval_steps_per_second": 56.932,
      "step": 490000
    },
    {
      "epoch": 1.7173628853911476,
      "grad_norm": 3.171875,
      "learning_rate": 2.3752631811231313e-05,
      "loss": 0.7774,
      "step": 490010
    },
    {
      "epoch": 1.7173979328980433,
      "grad_norm": 3.0625,
      "learning_rate": 2.375198278256761e-05,
      "loss": 0.9132,
      "step": 490020
    },
    {
      "epoch": 1.7174329804049389,
      "grad_norm": 2.875,
      "learning_rate": 2.375133375390391e-05,
      "loss": 0.8847,
      "step": 490030
    },
    {
      "epoch": 1.7174680279118344,
      "grad_norm": 2.609375,
      "learning_rate": 2.3750684725240207e-05,
      "loss": 0.8581,
      "step": 490040
    },
    {
      "epoch": 1.7175030754187302,
      "grad_norm": 3.015625,
      "learning_rate": 2.3750035696576505e-05,
      "loss": 0.8267,
      "step": 490050
    },
    {
      "epoch": 1.7175381229256257,
      "grad_norm": 3.15625,
      "learning_rate": 2.3749386667912803e-05,
      "loss": 0.8641,
      "step": 490060
    },
    {
      "epoch": 1.7175731704325212,
      "grad_norm": 3.1875,
      "learning_rate": 2.37487376392491e-05,
      "loss": 0.9027,
      "step": 490070
    },
    {
      "epoch": 1.717608217939417,
      "grad_norm": 2.625,
      "learning_rate": 2.37480886105854e-05,
      "loss": 0.8295,
      "step": 490080
    },
    {
      "epoch": 1.7176432654463125,
      "grad_norm": 2.75,
      "learning_rate": 2.3747439581921697e-05,
      "loss": 0.787,
      "step": 490090
    },
    {
      "epoch": 1.717678312953208,
      "grad_norm": 2.609375,
      "learning_rate": 2.3746790553257995e-05,
      "loss": 0.7634,
      "step": 490100
    },
    {
      "epoch": 1.7177133604601038,
      "grad_norm": 2.375,
      "learning_rate": 2.3746141524594293e-05,
      "loss": 0.8319,
      "step": 490110
    },
    {
      "epoch": 1.7177484079669991,
      "grad_norm": 3.03125,
      "learning_rate": 2.374549249593059e-05,
      "loss": 0.8665,
      "step": 490120
    },
    {
      "epoch": 1.7177834554738949,
      "grad_norm": 3.078125,
      "learning_rate": 2.374484346726689e-05,
      "loss": 0.8171,
      "step": 490130
    },
    {
      "epoch": 1.7178185029807904,
      "grad_norm": 2.296875,
      "learning_rate": 2.3744194438603187e-05,
      "loss": 0.7632,
      "step": 490140
    },
    {
      "epoch": 1.717853550487686,
      "grad_norm": 2.703125,
      "learning_rate": 2.3743545409939485e-05,
      "loss": 0.7752,
      "step": 490150
    },
    {
      "epoch": 1.7178885979945817,
      "grad_norm": 2.671875,
      "learning_rate": 2.3742896381275783e-05,
      "loss": 0.8389,
      "step": 490160
    },
    {
      "epoch": 1.7179236455014772,
      "grad_norm": 2.875,
      "learning_rate": 2.374224735261208e-05,
      "loss": 0.8633,
      "step": 490170
    },
    {
      "epoch": 1.7179586930083728,
      "grad_norm": 3.203125,
      "learning_rate": 2.3741598323948382e-05,
      "loss": 0.8277,
      "step": 490180
    },
    {
      "epoch": 1.7179937405152685,
      "grad_norm": 2.90625,
      "learning_rate": 2.374094929528468e-05,
      "loss": 0.835,
      "step": 490190
    },
    {
      "epoch": 1.718028788022164,
      "grad_norm": 3.34375,
      "learning_rate": 2.3740300266620978e-05,
      "loss": 0.8501,
      "step": 490200
    },
    {
      "epoch": 1.7180638355290596,
      "grad_norm": 2.890625,
      "learning_rate": 2.3739651237957276e-05,
      "loss": 0.823,
      "step": 490210
    },
    {
      "epoch": 1.7180988830359554,
      "grad_norm": 2.703125,
      "learning_rate": 2.3739002209293574e-05,
      "loss": 0.8885,
      "step": 490220
    },
    {
      "epoch": 1.7181339305428507,
      "grad_norm": 3.0625,
      "learning_rate": 2.3738353180629872e-05,
      "loss": 0.8308,
      "step": 490230
    },
    {
      "epoch": 1.7181689780497464,
      "grad_norm": 3.09375,
      "learning_rate": 2.373770415196617e-05,
      "loss": 0.8932,
      "step": 490240
    },
    {
      "epoch": 1.718204025556642,
      "grad_norm": 2.96875,
      "learning_rate": 2.3737055123302468e-05,
      "loss": 0.8097,
      "step": 490250
    },
    {
      "epoch": 1.7182390730635375,
      "grad_norm": 3.03125,
      "learning_rate": 2.3736406094638763e-05,
      "loss": 0.78,
      "step": 490260
    },
    {
      "epoch": 1.7182741205704333,
      "grad_norm": 2.515625,
      "learning_rate": 2.373575706597506e-05,
      "loss": 0.8305,
      "step": 490270
    },
    {
      "epoch": 1.7183091680773288,
      "grad_norm": 2.40625,
      "learning_rate": 2.373510803731136e-05,
      "loss": 0.7699,
      "step": 490280
    },
    {
      "epoch": 1.7183442155842243,
      "grad_norm": 2.921875,
      "learning_rate": 2.3734459008647657e-05,
      "loss": 0.7784,
      "step": 490290
    },
    {
      "epoch": 1.71837926309112,
      "grad_norm": 3.265625,
      "learning_rate": 2.3733809979983958e-05,
      "loss": 0.8447,
      "step": 490300
    },
    {
      "epoch": 1.7184143105980156,
      "grad_norm": 2.984375,
      "learning_rate": 2.3733160951320256e-05,
      "loss": 0.8795,
      "step": 490310
    },
    {
      "epoch": 1.7184493581049112,
      "grad_norm": 3.390625,
      "learning_rate": 2.3732511922656554e-05,
      "loss": 0.8011,
      "step": 490320
    },
    {
      "epoch": 1.718484405611807,
      "grad_norm": 2.796875,
      "learning_rate": 2.3731862893992852e-05,
      "loss": 0.8519,
      "step": 490330
    },
    {
      "epoch": 1.7185194531187022,
      "grad_norm": 3.09375,
      "learning_rate": 2.373121386532915e-05,
      "loss": 0.8395,
      "step": 490340
    },
    {
      "epoch": 1.718554500625598,
      "grad_norm": 2.703125,
      "learning_rate": 2.3730564836665448e-05,
      "loss": 0.8584,
      "step": 490350
    },
    {
      "epoch": 1.7185895481324938,
      "grad_norm": 3.0625,
      "learning_rate": 2.3729915808001746e-05,
      "loss": 0.9298,
      "step": 490360
    },
    {
      "epoch": 1.718624595639389,
      "grad_norm": 2.890625,
      "learning_rate": 2.3729266779338044e-05,
      "loss": 0.8266,
      "step": 490370
    },
    {
      "epoch": 1.7186596431462848,
      "grad_norm": 2.765625,
      "learning_rate": 2.3728617750674342e-05,
      "loss": 0.8418,
      "step": 490380
    },
    {
      "epoch": 1.7186946906531804,
      "grad_norm": 2.453125,
      "learning_rate": 2.372796872201064e-05,
      "loss": 0.8575,
      "step": 490390
    },
    {
      "epoch": 1.718729738160076,
      "grad_norm": 2.875,
      "learning_rate": 2.3727319693346938e-05,
      "loss": 0.8088,
      "step": 490400
    },
    {
      "epoch": 1.7187647856669717,
      "grad_norm": 2.90625,
      "learning_rate": 2.3726670664683236e-05,
      "loss": 0.783,
      "step": 490410
    },
    {
      "epoch": 1.7187998331738672,
      "grad_norm": 2.828125,
      "learning_rate": 2.3726021636019534e-05,
      "loss": 0.8136,
      "step": 490420
    },
    {
      "epoch": 1.7188348806807627,
      "grad_norm": 3.125,
      "learning_rate": 2.3725372607355832e-05,
      "loss": 0.8092,
      "step": 490430
    },
    {
      "epoch": 1.7188699281876585,
      "grad_norm": 3.125,
      "learning_rate": 2.3724723578692133e-05,
      "loss": 0.8404,
      "step": 490440
    },
    {
      "epoch": 1.7189049756945538,
      "grad_norm": 2.59375,
      "learning_rate": 2.3724074550028428e-05,
      "loss": 0.7585,
      "step": 490450
    },
    {
      "epoch": 1.7189400232014496,
      "grad_norm": 2.875,
      "learning_rate": 2.3723425521364726e-05,
      "loss": 0.8166,
      "step": 490460
    },
    {
      "epoch": 1.7189750707083453,
      "grad_norm": 3.421875,
      "learning_rate": 2.3722776492701024e-05,
      "loss": 0.8394,
      "step": 490470
    },
    {
      "epoch": 1.7190101182152406,
      "grad_norm": 3.125,
      "learning_rate": 2.3722127464037322e-05,
      "loss": 0.8162,
      "step": 490480
    },
    {
      "epoch": 1.7190451657221364,
      "grad_norm": 2.921875,
      "learning_rate": 2.372147843537362e-05,
      "loss": 0.7758,
      "step": 490490
    },
    {
      "epoch": 1.719080213229032,
      "grad_norm": 2.390625,
      "learning_rate": 2.3720829406709918e-05,
      "loss": 0.9091,
      "step": 490500
    },
    {
      "epoch": 1.7191152607359275,
      "grad_norm": 3.015625,
      "learning_rate": 2.3720180378046216e-05,
      "loss": 0.7851,
      "step": 490510
    },
    {
      "epoch": 1.7191503082428232,
      "grad_norm": 3.171875,
      "learning_rate": 2.3719531349382514e-05,
      "loss": 0.8709,
      "step": 490520
    },
    {
      "epoch": 1.7191853557497188,
      "grad_norm": 2.6875,
      "learning_rate": 2.3718882320718812e-05,
      "loss": 0.8258,
      "step": 490530
    },
    {
      "epoch": 1.7192204032566143,
      "grad_norm": 2.875,
      "learning_rate": 2.371823329205511e-05,
      "loss": 0.7958,
      "step": 490540
    },
    {
      "epoch": 1.71925545076351,
      "grad_norm": 3.25,
      "learning_rate": 2.371758426339141e-05,
      "loss": 0.7839,
      "step": 490550
    },
    {
      "epoch": 1.7192904982704056,
      "grad_norm": 3.1875,
      "learning_rate": 2.371693523472771e-05,
      "loss": 0.8656,
      "step": 490560
    },
    {
      "epoch": 1.7193255457773011,
      "grad_norm": 2.46875,
      "learning_rate": 2.3716286206064007e-05,
      "loss": 0.7945,
      "step": 490570
    },
    {
      "epoch": 1.7193605932841969,
      "grad_norm": 3.28125,
      "learning_rate": 2.3715637177400305e-05,
      "loss": 0.865,
      "step": 490580
    },
    {
      "epoch": 1.7193956407910922,
      "grad_norm": 2.78125,
      "learning_rate": 2.3714988148736603e-05,
      "loss": 0.8938,
      "step": 490590
    },
    {
      "epoch": 1.719430688297988,
      "grad_norm": 2.96875,
      "learning_rate": 2.37143391200729e-05,
      "loss": 0.8297,
      "step": 490600
    },
    {
      "epoch": 1.7194657358048835,
      "grad_norm": 3.15625,
      "learning_rate": 2.37136900914092e-05,
      "loss": 0.8286,
      "step": 490610
    },
    {
      "epoch": 1.719500783311779,
      "grad_norm": 2.625,
      "learning_rate": 2.3713041062745497e-05,
      "loss": 0.7542,
      "step": 490620
    },
    {
      "epoch": 1.7195358308186748,
      "grad_norm": 2.9375,
      "learning_rate": 2.3712392034081795e-05,
      "loss": 0.8817,
      "step": 490630
    },
    {
      "epoch": 1.7195708783255703,
      "grad_norm": 2.75,
      "learning_rate": 2.371174300541809e-05,
      "loss": 0.8305,
      "step": 490640
    },
    {
      "epoch": 1.7196059258324659,
      "grad_norm": 2.90625,
      "learning_rate": 2.3711093976754388e-05,
      "loss": 0.7968,
      "step": 490650
    },
    {
      "epoch": 1.7196409733393616,
      "grad_norm": 2.921875,
      "learning_rate": 2.371044494809069e-05,
      "loss": 0.8981,
      "step": 490660
    },
    {
      "epoch": 1.7196760208462571,
      "grad_norm": 2.921875,
      "learning_rate": 2.3709795919426987e-05,
      "loss": 0.8827,
      "step": 490670
    },
    {
      "epoch": 1.7197110683531527,
      "grad_norm": 3.0,
      "learning_rate": 2.3709146890763285e-05,
      "loss": 0.7848,
      "step": 490680
    },
    {
      "epoch": 1.7197461158600484,
      "grad_norm": 2.453125,
      "learning_rate": 2.3708497862099583e-05,
      "loss": 0.819,
      "step": 490690
    },
    {
      "epoch": 1.7197811633669438,
      "grad_norm": 3.03125,
      "learning_rate": 2.370784883343588e-05,
      "loss": 0.8407,
      "step": 490700
    },
    {
      "epoch": 1.7198162108738395,
      "grad_norm": 2.671875,
      "learning_rate": 2.370719980477218e-05,
      "loss": 0.8282,
      "step": 490710
    },
    {
      "epoch": 1.719851258380735,
      "grad_norm": 2.859375,
      "learning_rate": 2.3706550776108477e-05,
      "loss": 0.7592,
      "step": 490720
    },
    {
      "epoch": 1.7198863058876306,
      "grad_norm": 2.375,
      "learning_rate": 2.3705901747444775e-05,
      "loss": 0.8584,
      "step": 490730
    },
    {
      "epoch": 1.7199213533945263,
      "grad_norm": 3.0,
      "learning_rate": 2.3705252718781073e-05,
      "loss": 0.8616,
      "step": 490740
    },
    {
      "epoch": 1.7199564009014219,
      "grad_norm": 2.953125,
      "learning_rate": 2.370460369011737e-05,
      "loss": 0.8069,
      "step": 490750
    },
    {
      "epoch": 1.7199914484083174,
      "grad_norm": 3.078125,
      "learning_rate": 2.370395466145367e-05,
      "loss": 0.8322,
      "step": 490760
    },
    {
      "epoch": 1.7200264959152132,
      "grad_norm": 2.921875,
      "learning_rate": 2.3703305632789967e-05,
      "loss": 0.86,
      "step": 490770
    },
    {
      "epoch": 1.7200615434221087,
      "grad_norm": 2.578125,
      "learning_rate": 2.3702656604126265e-05,
      "loss": 0.7446,
      "step": 490780
    },
    {
      "epoch": 1.7200965909290042,
      "grad_norm": 3.6875,
      "learning_rate": 2.3702007575462563e-05,
      "loss": 0.8211,
      "step": 490790
    },
    {
      "epoch": 1.7201316384359,
      "grad_norm": 3.140625,
      "learning_rate": 2.3701358546798865e-05,
      "loss": 0.8005,
      "step": 490800
    },
    {
      "epoch": 1.7201666859427953,
      "grad_norm": 3.34375,
      "learning_rate": 2.3700709518135163e-05,
      "loss": 0.8547,
      "step": 490810
    },
    {
      "epoch": 1.720201733449691,
      "grad_norm": 3.15625,
      "learning_rate": 2.370006048947146e-05,
      "loss": 0.8071,
      "step": 490820
    },
    {
      "epoch": 1.7202367809565866,
      "grad_norm": 3.109375,
      "learning_rate": 2.3699411460807755e-05,
      "loss": 0.8096,
      "step": 490830
    },
    {
      "epoch": 1.7202718284634821,
      "grad_norm": 2.34375,
      "learning_rate": 2.3698762432144053e-05,
      "loss": 0.8191,
      "step": 490840
    },
    {
      "epoch": 1.720306875970378,
      "grad_norm": 2.671875,
      "learning_rate": 2.369811340348035e-05,
      "loss": 0.8103,
      "step": 490850
    },
    {
      "epoch": 1.7203419234772734,
      "grad_norm": 2.859375,
      "learning_rate": 2.369746437481665e-05,
      "loss": 0.914,
      "step": 490860
    },
    {
      "epoch": 1.720376970984169,
      "grad_norm": 2.96875,
      "learning_rate": 2.3696815346152947e-05,
      "loss": 0.8435,
      "step": 490870
    },
    {
      "epoch": 1.7204120184910647,
      "grad_norm": 3.09375,
      "learning_rate": 2.3696166317489245e-05,
      "loss": 0.9275,
      "step": 490880
    },
    {
      "epoch": 1.7204470659979603,
      "grad_norm": 2.953125,
      "learning_rate": 2.3695517288825543e-05,
      "loss": 0.8558,
      "step": 490890
    },
    {
      "epoch": 1.7204821135048558,
      "grad_norm": 2.84375,
      "learning_rate": 2.369486826016184e-05,
      "loss": 0.7745,
      "step": 490900
    },
    {
      "epoch": 1.7205171610117516,
      "grad_norm": 2.953125,
      "learning_rate": 2.369421923149814e-05,
      "loss": 0.8421,
      "step": 490910
    },
    {
      "epoch": 1.7205522085186469,
      "grad_norm": 2.328125,
      "learning_rate": 2.369357020283444e-05,
      "loss": 0.7888,
      "step": 490920
    },
    {
      "epoch": 1.7205872560255426,
      "grad_norm": 2.984375,
      "learning_rate": 2.369292117417074e-05,
      "loss": 0.8566,
      "step": 490930
    },
    {
      "epoch": 1.7206223035324382,
      "grad_norm": 3.28125,
      "learning_rate": 2.3692272145507037e-05,
      "loss": 0.7824,
      "step": 490940
    },
    {
      "epoch": 1.7206573510393337,
      "grad_norm": 2.375,
      "learning_rate": 2.3691623116843335e-05,
      "loss": 0.7879,
      "step": 490950
    },
    {
      "epoch": 1.7206923985462295,
      "grad_norm": 3.3125,
      "learning_rate": 2.3690974088179633e-05,
      "loss": 0.8568,
      "step": 490960
    },
    {
      "epoch": 1.720727446053125,
      "grad_norm": 2.734375,
      "learning_rate": 2.369032505951593e-05,
      "loss": 0.7894,
      "step": 490970
    },
    {
      "epoch": 1.7207624935600205,
      "grad_norm": 2.65625,
      "learning_rate": 2.368967603085223e-05,
      "loss": 0.835,
      "step": 490980
    },
    {
      "epoch": 1.7207975410669163,
      "grad_norm": 3.1875,
      "learning_rate": 2.3689027002188527e-05,
      "loss": 0.8256,
      "step": 490990
    },
    {
      "epoch": 1.7208325885738118,
      "grad_norm": 3.0625,
      "learning_rate": 2.3688377973524825e-05,
      "loss": 0.8359,
      "step": 491000
    },
    {
      "epoch": 1.7208676360807074,
      "grad_norm": 2.390625,
      "learning_rate": 2.368772894486112e-05,
      "loss": 0.8859,
      "step": 491010
    },
    {
      "epoch": 1.7209026835876031,
      "grad_norm": 2.75,
      "learning_rate": 2.3687079916197417e-05,
      "loss": 0.7948,
      "step": 491020
    },
    {
      "epoch": 1.7209377310944984,
      "grad_norm": 2.4375,
      "learning_rate": 2.368643088753372e-05,
      "loss": 0.7886,
      "step": 491030
    },
    {
      "epoch": 1.7209727786013942,
      "grad_norm": 2.734375,
      "learning_rate": 2.3685781858870017e-05,
      "loss": 0.8126,
      "step": 491040
    },
    {
      "epoch": 1.72100782610829,
      "grad_norm": 2.859375,
      "learning_rate": 2.3685132830206315e-05,
      "loss": 0.767,
      "step": 491050
    },
    {
      "epoch": 1.7210428736151853,
      "grad_norm": 2.828125,
      "learning_rate": 2.3684483801542613e-05,
      "loss": 0.9047,
      "step": 491060
    },
    {
      "epoch": 1.721077921122081,
      "grad_norm": 2.859375,
      "learning_rate": 2.368383477287891e-05,
      "loss": 0.8868,
      "step": 491070
    },
    {
      "epoch": 1.7211129686289766,
      "grad_norm": 2.875,
      "learning_rate": 2.368318574421521e-05,
      "loss": 0.8653,
      "step": 491080
    },
    {
      "epoch": 1.721148016135872,
      "grad_norm": 2.46875,
      "learning_rate": 2.3682536715551507e-05,
      "loss": 0.7958,
      "step": 491090
    },
    {
      "epoch": 1.7211830636427679,
      "grad_norm": 3.15625,
      "learning_rate": 2.3681887686887805e-05,
      "loss": 0.8176,
      "step": 491100
    },
    {
      "epoch": 1.7212181111496634,
      "grad_norm": 3.21875,
      "learning_rate": 2.3681238658224103e-05,
      "loss": 0.8117,
      "step": 491110
    },
    {
      "epoch": 1.721253158656559,
      "grad_norm": 2.75,
      "learning_rate": 2.36805896295604e-05,
      "loss": 0.7827,
      "step": 491120
    },
    {
      "epoch": 1.7212882061634547,
      "grad_norm": 3.171875,
      "learning_rate": 2.36799406008967e-05,
      "loss": 0.7703,
      "step": 491130
    },
    {
      "epoch": 1.72132325367035,
      "grad_norm": 3.1875,
      "learning_rate": 2.3679291572232997e-05,
      "loss": 0.7902,
      "step": 491140
    },
    {
      "epoch": 1.7213583011772458,
      "grad_norm": 2.46875,
      "learning_rate": 2.3678642543569295e-05,
      "loss": 0.785,
      "step": 491150
    },
    {
      "epoch": 1.7213933486841415,
      "grad_norm": 2.75,
      "learning_rate": 2.3677993514905593e-05,
      "loss": 0.8523,
      "step": 491160
    },
    {
      "epoch": 1.7214283961910368,
      "grad_norm": 2.796875,
      "learning_rate": 2.3677344486241894e-05,
      "loss": 0.7913,
      "step": 491170
    },
    {
      "epoch": 1.7214634436979326,
      "grad_norm": 3.0,
      "learning_rate": 2.3676695457578192e-05,
      "loss": 0.7799,
      "step": 491180
    },
    {
      "epoch": 1.7214984912048281,
      "grad_norm": 3.0625,
      "learning_rate": 2.367604642891449e-05,
      "loss": 0.8031,
      "step": 491190
    },
    {
      "epoch": 1.7215335387117237,
      "grad_norm": 2.796875,
      "learning_rate": 2.3675397400250785e-05,
      "loss": 0.7622,
      "step": 491200
    },
    {
      "epoch": 1.7215685862186194,
      "grad_norm": 3.109375,
      "learning_rate": 2.3674748371587083e-05,
      "loss": 0.809,
      "step": 491210
    },
    {
      "epoch": 1.721603633725515,
      "grad_norm": 2.96875,
      "learning_rate": 2.367409934292338e-05,
      "loss": 0.7615,
      "step": 491220
    },
    {
      "epoch": 1.7216386812324105,
      "grad_norm": 2.65625,
      "learning_rate": 2.367345031425968e-05,
      "loss": 0.7984,
      "step": 491230
    },
    {
      "epoch": 1.7216737287393062,
      "grad_norm": 3.015625,
      "learning_rate": 2.3672801285595977e-05,
      "loss": 0.7474,
      "step": 491240
    },
    {
      "epoch": 1.7217087762462018,
      "grad_norm": 2.828125,
      "learning_rate": 2.3672152256932275e-05,
      "loss": 0.784,
      "step": 491250
    },
    {
      "epoch": 1.7217438237530973,
      "grad_norm": 2.609375,
      "learning_rate": 2.3671503228268573e-05,
      "loss": 0.8342,
      "step": 491260
    },
    {
      "epoch": 1.721778871259993,
      "grad_norm": 2.78125,
      "learning_rate": 2.367085419960487e-05,
      "loss": 0.8563,
      "step": 491270
    },
    {
      "epoch": 1.7218139187668884,
      "grad_norm": 2.890625,
      "learning_rate": 2.3670205170941172e-05,
      "loss": 0.904,
      "step": 491280
    },
    {
      "epoch": 1.7218489662737841,
      "grad_norm": 2.625,
      "learning_rate": 2.366955614227747e-05,
      "loss": 0.8048,
      "step": 491290
    },
    {
      "epoch": 1.7218840137806797,
      "grad_norm": 3.046875,
      "learning_rate": 2.3668907113613768e-05,
      "loss": 0.8564,
      "step": 491300
    },
    {
      "epoch": 1.7219190612875752,
      "grad_norm": 3.0625,
      "learning_rate": 2.3668258084950066e-05,
      "loss": 0.9159,
      "step": 491310
    },
    {
      "epoch": 1.721954108794471,
      "grad_norm": 2.9375,
      "learning_rate": 2.3667609056286364e-05,
      "loss": 0.8457,
      "step": 491320
    },
    {
      "epoch": 1.7219891563013665,
      "grad_norm": 2.796875,
      "learning_rate": 2.3666960027622662e-05,
      "loss": 0.8793,
      "step": 491330
    },
    {
      "epoch": 1.722024203808262,
      "grad_norm": 2.6875,
      "learning_rate": 2.366631099895896e-05,
      "loss": 0.7763,
      "step": 491340
    },
    {
      "epoch": 1.7220592513151578,
      "grad_norm": 2.796875,
      "learning_rate": 2.3665661970295258e-05,
      "loss": 0.8307,
      "step": 491350
    },
    {
      "epoch": 1.7220942988220533,
      "grad_norm": 2.65625,
      "learning_rate": 2.3665012941631556e-05,
      "loss": 0.8731,
      "step": 491360
    },
    {
      "epoch": 1.7221293463289489,
      "grad_norm": 2.46875,
      "learning_rate": 2.3664363912967854e-05,
      "loss": 0.8039,
      "step": 491370
    },
    {
      "epoch": 1.7221643938358446,
      "grad_norm": 3.1875,
      "learning_rate": 2.3663714884304152e-05,
      "loss": 0.8168,
      "step": 491380
    },
    {
      "epoch": 1.72219944134274,
      "grad_norm": 2.5625,
      "learning_rate": 2.3663065855640447e-05,
      "loss": 0.8602,
      "step": 491390
    },
    {
      "epoch": 1.7222344888496357,
      "grad_norm": 3.203125,
      "learning_rate": 2.3662416826976748e-05,
      "loss": 0.8361,
      "step": 491400
    },
    {
      "epoch": 1.7222695363565312,
      "grad_norm": 3.0,
      "learning_rate": 2.3661767798313046e-05,
      "loss": 0.8989,
      "step": 491410
    },
    {
      "epoch": 1.7223045838634268,
      "grad_norm": 3.375,
      "learning_rate": 2.3661118769649344e-05,
      "loss": 0.9411,
      "step": 491420
    },
    {
      "epoch": 1.7223396313703225,
      "grad_norm": 2.59375,
      "learning_rate": 2.3660469740985642e-05,
      "loss": 0.8187,
      "step": 491430
    },
    {
      "epoch": 1.722374678877218,
      "grad_norm": 2.578125,
      "learning_rate": 2.365982071232194e-05,
      "loss": 0.879,
      "step": 491440
    },
    {
      "epoch": 1.7224097263841136,
      "grad_norm": 2.546875,
      "learning_rate": 2.3659171683658238e-05,
      "loss": 0.7921,
      "step": 491450
    },
    {
      "epoch": 1.7224447738910094,
      "grad_norm": 3.015625,
      "learning_rate": 2.3658522654994536e-05,
      "loss": 0.9163,
      "step": 491460
    },
    {
      "epoch": 1.722479821397905,
      "grad_norm": 3.28125,
      "learning_rate": 2.3657873626330834e-05,
      "loss": 0.8365,
      "step": 491470
    },
    {
      "epoch": 1.7225148689048004,
      "grad_norm": 2.921875,
      "learning_rate": 2.3657224597667132e-05,
      "loss": 0.8241,
      "step": 491480
    },
    {
      "epoch": 1.7225499164116962,
      "grad_norm": 2.84375,
      "learning_rate": 2.365657556900343e-05,
      "loss": 0.7816,
      "step": 491490
    },
    {
      "epoch": 1.7225849639185915,
      "grad_norm": 2.765625,
      "learning_rate": 2.3655926540339728e-05,
      "loss": 0.884,
      "step": 491500
    },
    {
      "epoch": 1.7226200114254873,
      "grad_norm": 3.390625,
      "learning_rate": 2.3655277511676026e-05,
      "loss": 0.7521,
      "step": 491510
    },
    {
      "epoch": 1.7226550589323828,
      "grad_norm": 2.796875,
      "learning_rate": 2.3654628483012324e-05,
      "loss": 0.8713,
      "step": 491520
    },
    {
      "epoch": 1.7226901064392783,
      "grad_norm": 2.921875,
      "learning_rate": 2.3653979454348622e-05,
      "loss": 0.8767,
      "step": 491530
    },
    {
      "epoch": 1.722725153946174,
      "grad_norm": 2.40625,
      "learning_rate": 2.3653330425684923e-05,
      "loss": 0.7863,
      "step": 491540
    },
    {
      "epoch": 1.7227602014530696,
      "grad_norm": 3.25,
      "learning_rate": 2.365268139702122e-05,
      "loss": 0.8463,
      "step": 491550
    },
    {
      "epoch": 1.7227952489599652,
      "grad_norm": 2.96875,
      "learning_rate": 2.365203236835752e-05,
      "loss": 0.8931,
      "step": 491560
    },
    {
      "epoch": 1.722830296466861,
      "grad_norm": 2.59375,
      "learning_rate": 2.3651383339693817e-05,
      "loss": 0.8219,
      "step": 491570
    },
    {
      "epoch": 1.7228653439737565,
      "grad_norm": 3.265625,
      "learning_rate": 2.3650734311030112e-05,
      "loss": 0.9144,
      "step": 491580
    },
    {
      "epoch": 1.722900391480652,
      "grad_norm": 2.71875,
      "learning_rate": 2.365008528236641e-05,
      "loss": 0.831,
      "step": 491590
    },
    {
      "epoch": 1.7229354389875478,
      "grad_norm": 2.75,
      "learning_rate": 2.3649436253702708e-05,
      "loss": 0.7707,
      "step": 491600
    },
    {
      "epoch": 1.722970486494443,
      "grad_norm": 2.75,
      "learning_rate": 2.3648787225039006e-05,
      "loss": 0.809,
      "step": 491610
    },
    {
      "epoch": 1.7230055340013388,
      "grad_norm": 2.796875,
      "learning_rate": 2.3648138196375304e-05,
      "loss": 0.8347,
      "step": 491620
    },
    {
      "epoch": 1.7230405815082344,
      "grad_norm": 2.625,
      "learning_rate": 2.3647489167711602e-05,
      "loss": 0.8646,
      "step": 491630
    },
    {
      "epoch": 1.72307562901513,
      "grad_norm": 2.71875,
      "learning_rate": 2.36468401390479e-05,
      "loss": 0.8339,
      "step": 491640
    },
    {
      "epoch": 1.7231106765220257,
      "grad_norm": 2.578125,
      "learning_rate": 2.36461911103842e-05,
      "loss": 0.8506,
      "step": 491650
    },
    {
      "epoch": 1.7231457240289212,
      "grad_norm": 2.59375,
      "learning_rate": 2.36455420817205e-05,
      "loss": 0.8726,
      "step": 491660
    },
    {
      "epoch": 1.7231807715358167,
      "grad_norm": 3.28125,
      "learning_rate": 2.3644893053056797e-05,
      "loss": 0.8152,
      "step": 491670
    },
    {
      "epoch": 1.7232158190427125,
      "grad_norm": 2.671875,
      "learning_rate": 2.3644244024393095e-05,
      "loss": 0.8288,
      "step": 491680
    },
    {
      "epoch": 1.723250866549608,
      "grad_norm": 3.015625,
      "learning_rate": 2.3643594995729393e-05,
      "loss": 0.8034,
      "step": 491690
    },
    {
      "epoch": 1.7232859140565036,
      "grad_norm": 3.09375,
      "learning_rate": 2.364294596706569e-05,
      "loss": 0.9655,
      "step": 491700
    },
    {
      "epoch": 1.7233209615633993,
      "grad_norm": 3.1875,
      "learning_rate": 2.364229693840199e-05,
      "loss": 0.8883,
      "step": 491710
    },
    {
      "epoch": 1.7233560090702946,
      "grad_norm": 2.953125,
      "learning_rate": 2.3641647909738287e-05,
      "loss": 0.8915,
      "step": 491720
    },
    {
      "epoch": 1.7233910565771904,
      "grad_norm": 3.09375,
      "learning_rate": 2.3640998881074585e-05,
      "loss": 0.8271,
      "step": 491730
    },
    {
      "epoch": 1.7234261040840861,
      "grad_norm": 3.078125,
      "learning_rate": 2.3640349852410883e-05,
      "loss": 0.8486,
      "step": 491740
    },
    {
      "epoch": 1.7234611515909815,
      "grad_norm": 2.515625,
      "learning_rate": 2.363970082374718e-05,
      "loss": 0.7759,
      "step": 491750
    },
    {
      "epoch": 1.7234961990978772,
      "grad_norm": 3.15625,
      "learning_rate": 2.363905179508348e-05,
      "loss": 0.9027,
      "step": 491760
    },
    {
      "epoch": 1.7235312466047727,
      "grad_norm": 2.71875,
      "learning_rate": 2.3638402766419777e-05,
      "loss": 0.8826,
      "step": 491770
    },
    {
      "epoch": 1.7235662941116683,
      "grad_norm": 2.796875,
      "learning_rate": 2.3637753737756075e-05,
      "loss": 0.8305,
      "step": 491780
    },
    {
      "epoch": 1.723601341618564,
      "grad_norm": 2.8125,
      "learning_rate": 2.3637104709092373e-05,
      "loss": 0.7677,
      "step": 491790
    },
    {
      "epoch": 1.7236363891254596,
      "grad_norm": 3.125,
      "learning_rate": 2.363645568042867e-05,
      "loss": 0.8482,
      "step": 491800
    },
    {
      "epoch": 1.7236714366323551,
      "grad_norm": 3.1875,
      "learning_rate": 2.363580665176497e-05,
      "loss": 0.9244,
      "step": 491810
    },
    {
      "epoch": 1.7237064841392509,
      "grad_norm": 2.734375,
      "learning_rate": 2.3635157623101267e-05,
      "loss": 0.7656,
      "step": 491820
    },
    {
      "epoch": 1.7237415316461462,
      "grad_norm": 2.953125,
      "learning_rate": 2.3634508594437565e-05,
      "loss": 0.8419,
      "step": 491830
    },
    {
      "epoch": 1.723776579153042,
      "grad_norm": 3.078125,
      "learning_rate": 2.3633859565773863e-05,
      "loss": 0.8472,
      "step": 491840
    },
    {
      "epoch": 1.7238116266599377,
      "grad_norm": 2.875,
      "learning_rate": 2.363321053711016e-05,
      "loss": 0.8589,
      "step": 491850
    },
    {
      "epoch": 1.723846674166833,
      "grad_norm": 2.734375,
      "learning_rate": 2.363256150844646e-05,
      "loss": 0.8703,
      "step": 491860
    },
    {
      "epoch": 1.7238817216737288,
      "grad_norm": 3.0,
      "learning_rate": 2.3631912479782757e-05,
      "loss": 0.8488,
      "step": 491870
    },
    {
      "epoch": 1.7239167691806243,
      "grad_norm": 3.03125,
      "learning_rate": 2.3631263451119055e-05,
      "loss": 0.7623,
      "step": 491880
    },
    {
      "epoch": 1.7239518166875198,
      "grad_norm": 2.75,
      "learning_rate": 2.3630614422455353e-05,
      "loss": 0.8207,
      "step": 491890
    },
    {
      "epoch": 1.7239868641944156,
      "grad_norm": 3.15625,
      "learning_rate": 2.3629965393791655e-05,
      "loss": 0.8094,
      "step": 491900
    },
    {
      "epoch": 1.7240219117013111,
      "grad_norm": 2.921875,
      "learning_rate": 2.3629316365127953e-05,
      "loss": 0.8452,
      "step": 491910
    },
    {
      "epoch": 1.7240569592082067,
      "grad_norm": 3.078125,
      "learning_rate": 2.362866733646425e-05,
      "loss": 0.8597,
      "step": 491920
    },
    {
      "epoch": 1.7240920067151024,
      "grad_norm": 2.703125,
      "learning_rate": 2.362801830780055e-05,
      "loss": 0.7943,
      "step": 491930
    },
    {
      "epoch": 1.724127054221998,
      "grad_norm": 2.8125,
      "learning_rate": 2.3627369279136847e-05,
      "loss": 0.7892,
      "step": 491940
    },
    {
      "epoch": 1.7241621017288935,
      "grad_norm": 3.0625,
      "learning_rate": 2.3626720250473145e-05,
      "loss": 0.8901,
      "step": 491950
    },
    {
      "epoch": 1.7241971492357893,
      "grad_norm": 2.5625,
      "learning_rate": 2.362607122180944e-05,
      "loss": 0.7743,
      "step": 491960
    },
    {
      "epoch": 1.7242321967426846,
      "grad_norm": 3.015625,
      "learning_rate": 2.3625422193145737e-05,
      "loss": 0.8045,
      "step": 491970
    },
    {
      "epoch": 1.7242672442495803,
      "grad_norm": 2.796875,
      "learning_rate": 2.3624773164482035e-05,
      "loss": 0.8137,
      "step": 491980
    },
    {
      "epoch": 1.7243022917564759,
      "grad_norm": 2.6875,
      "learning_rate": 2.3624124135818333e-05,
      "loss": 0.7659,
      "step": 491990
    },
    {
      "epoch": 1.7243373392633714,
      "grad_norm": 2.703125,
      "learning_rate": 2.362347510715463e-05,
      "loss": 0.8043,
      "step": 492000
    },
    {
      "epoch": 1.7243723867702672,
      "grad_norm": 2.84375,
      "learning_rate": 2.362282607849093e-05,
      "loss": 0.8682,
      "step": 492010
    },
    {
      "epoch": 1.7244074342771627,
      "grad_norm": 2.765625,
      "learning_rate": 2.362217704982723e-05,
      "loss": 0.8409,
      "step": 492020
    },
    {
      "epoch": 1.7244424817840582,
      "grad_norm": 2.78125,
      "learning_rate": 2.362152802116353e-05,
      "loss": 0.888,
      "step": 492030
    },
    {
      "epoch": 1.724477529290954,
      "grad_norm": 2.984375,
      "learning_rate": 2.3620878992499827e-05,
      "loss": 0.8845,
      "step": 492040
    },
    {
      "epoch": 1.7245125767978495,
      "grad_norm": 3.28125,
      "learning_rate": 2.3620229963836125e-05,
      "loss": 0.8785,
      "step": 492050
    },
    {
      "epoch": 1.724547624304745,
      "grad_norm": 3.25,
      "learning_rate": 2.3619580935172423e-05,
      "loss": 0.8398,
      "step": 492060
    },
    {
      "epoch": 1.7245826718116408,
      "grad_norm": 2.8125,
      "learning_rate": 2.361893190650872e-05,
      "loss": 0.8452,
      "step": 492070
    },
    {
      "epoch": 1.7246177193185361,
      "grad_norm": 3.21875,
      "learning_rate": 2.361828287784502e-05,
      "loss": 0.7847,
      "step": 492080
    },
    {
      "epoch": 1.724652766825432,
      "grad_norm": 3.109375,
      "learning_rate": 2.3617633849181317e-05,
      "loss": 0.841,
      "step": 492090
    },
    {
      "epoch": 1.7246878143323274,
      "grad_norm": 2.71875,
      "learning_rate": 2.3616984820517615e-05,
      "loss": 0.862,
      "step": 492100
    },
    {
      "epoch": 1.724722861839223,
      "grad_norm": 2.34375,
      "learning_rate": 2.3616335791853913e-05,
      "loss": 0.8256,
      "step": 492110
    },
    {
      "epoch": 1.7247579093461187,
      "grad_norm": 2.625,
      "learning_rate": 2.361568676319021e-05,
      "loss": 0.7857,
      "step": 492120
    },
    {
      "epoch": 1.7247929568530143,
      "grad_norm": 3.03125,
      "learning_rate": 2.361503773452651e-05,
      "loss": 0.8007,
      "step": 492130
    },
    {
      "epoch": 1.7248280043599098,
      "grad_norm": 3.25,
      "learning_rate": 2.3614388705862807e-05,
      "loss": 0.8629,
      "step": 492140
    },
    {
      "epoch": 1.7248630518668056,
      "grad_norm": 3.125,
      "learning_rate": 2.3613739677199105e-05,
      "loss": 0.9247,
      "step": 492150
    },
    {
      "epoch": 1.724898099373701,
      "grad_norm": 3.296875,
      "learning_rate": 2.3613090648535403e-05,
      "loss": 0.7882,
      "step": 492160
    },
    {
      "epoch": 1.7249331468805966,
      "grad_norm": 3.0,
      "learning_rate": 2.36124416198717e-05,
      "loss": 0.9291,
      "step": 492170
    },
    {
      "epoch": 1.7249681943874924,
      "grad_norm": 2.75,
      "learning_rate": 2.3611792591208e-05,
      "loss": 0.8307,
      "step": 492180
    },
    {
      "epoch": 1.7250032418943877,
      "grad_norm": 3.25,
      "learning_rate": 2.3611143562544297e-05,
      "loss": 0.8676,
      "step": 492190
    },
    {
      "epoch": 1.7250382894012835,
      "grad_norm": 2.921875,
      "learning_rate": 2.3610494533880595e-05,
      "loss": 0.8399,
      "step": 492200
    },
    {
      "epoch": 1.725073336908179,
      "grad_norm": 2.875,
      "learning_rate": 2.3609845505216893e-05,
      "loss": 0.8031,
      "step": 492210
    },
    {
      "epoch": 1.7251083844150745,
      "grad_norm": 3.265625,
      "learning_rate": 2.360919647655319e-05,
      "loss": 0.796,
      "step": 492220
    },
    {
      "epoch": 1.7251434319219703,
      "grad_norm": 2.8125,
      "learning_rate": 2.360854744788949e-05,
      "loss": 0.8518,
      "step": 492230
    },
    {
      "epoch": 1.7251784794288658,
      "grad_norm": 2.40625,
      "learning_rate": 2.3607898419225787e-05,
      "loss": 0.8299,
      "step": 492240
    },
    {
      "epoch": 1.7252135269357614,
      "grad_norm": 2.953125,
      "learning_rate": 2.3607249390562085e-05,
      "loss": 0.8671,
      "step": 492250
    },
    {
      "epoch": 1.7252485744426571,
      "grad_norm": 2.890625,
      "learning_rate": 2.3606600361898383e-05,
      "loss": 0.899,
      "step": 492260
    },
    {
      "epoch": 1.7252836219495526,
      "grad_norm": 3.015625,
      "learning_rate": 2.3605951333234684e-05,
      "loss": 0.9287,
      "step": 492270
    },
    {
      "epoch": 1.7253186694564482,
      "grad_norm": 2.9375,
      "learning_rate": 2.3605302304570982e-05,
      "loss": 0.8162,
      "step": 492280
    },
    {
      "epoch": 1.725353716963344,
      "grad_norm": 2.640625,
      "learning_rate": 2.360465327590728e-05,
      "loss": 0.8223,
      "step": 492290
    },
    {
      "epoch": 1.7253887644702393,
      "grad_norm": 2.875,
      "learning_rate": 2.3604004247243578e-05,
      "loss": 0.8404,
      "step": 492300
    },
    {
      "epoch": 1.725423811977135,
      "grad_norm": 2.9375,
      "learning_rate": 2.3603355218579876e-05,
      "loss": 0.8115,
      "step": 492310
    },
    {
      "epoch": 1.7254588594840305,
      "grad_norm": 3.03125,
      "learning_rate": 2.3602706189916174e-05,
      "loss": 0.8425,
      "step": 492320
    },
    {
      "epoch": 1.725493906990926,
      "grad_norm": 3.0625,
      "learning_rate": 2.360205716125247e-05,
      "loss": 0.8101,
      "step": 492330
    },
    {
      "epoch": 1.7255289544978218,
      "grad_norm": 2.953125,
      "learning_rate": 2.3601408132588767e-05,
      "loss": 0.7595,
      "step": 492340
    },
    {
      "epoch": 1.7255640020047174,
      "grad_norm": 2.796875,
      "learning_rate": 2.3600759103925065e-05,
      "loss": 0.8491,
      "step": 492350
    },
    {
      "epoch": 1.725599049511613,
      "grad_norm": 2.6875,
      "learning_rate": 2.3600110075261363e-05,
      "loss": 0.8284,
      "step": 492360
    },
    {
      "epoch": 1.7256340970185087,
      "grad_norm": 2.609375,
      "learning_rate": 2.359946104659766e-05,
      "loss": 0.7952,
      "step": 492370
    },
    {
      "epoch": 1.7256691445254042,
      "grad_norm": 2.984375,
      "learning_rate": 2.3598812017933962e-05,
      "loss": 0.7939,
      "step": 492380
    },
    {
      "epoch": 1.7257041920322997,
      "grad_norm": 2.921875,
      "learning_rate": 2.359816298927026e-05,
      "loss": 0.8402,
      "step": 492390
    },
    {
      "epoch": 1.7257392395391955,
      "grad_norm": 2.234375,
      "learning_rate": 2.3597513960606558e-05,
      "loss": 0.7756,
      "step": 492400
    },
    {
      "epoch": 1.7257742870460908,
      "grad_norm": 2.984375,
      "learning_rate": 2.3596864931942856e-05,
      "loss": 0.8377,
      "step": 492410
    },
    {
      "epoch": 1.7258093345529866,
      "grad_norm": 3.0,
      "learning_rate": 2.3596215903279154e-05,
      "loss": 0.7826,
      "step": 492420
    },
    {
      "epoch": 1.7258443820598823,
      "grad_norm": 3.015625,
      "learning_rate": 2.3595566874615452e-05,
      "loss": 0.8448,
      "step": 492430
    },
    {
      "epoch": 1.7258794295667776,
      "grad_norm": 3.0625,
      "learning_rate": 2.359491784595175e-05,
      "loss": 0.8387,
      "step": 492440
    },
    {
      "epoch": 1.7259144770736734,
      "grad_norm": 3.15625,
      "learning_rate": 2.3594268817288048e-05,
      "loss": 0.7619,
      "step": 492450
    },
    {
      "epoch": 1.725949524580569,
      "grad_norm": 2.3125,
      "learning_rate": 2.3593619788624346e-05,
      "loss": 0.8192,
      "step": 492460
    },
    {
      "epoch": 1.7259845720874645,
      "grad_norm": 2.84375,
      "learning_rate": 2.3592970759960644e-05,
      "loss": 0.9025,
      "step": 492470
    },
    {
      "epoch": 1.7260196195943602,
      "grad_norm": 3.40625,
      "learning_rate": 2.3592321731296942e-05,
      "loss": 0.9103,
      "step": 492480
    },
    {
      "epoch": 1.7260546671012558,
      "grad_norm": 2.40625,
      "learning_rate": 2.359167270263324e-05,
      "loss": 0.8028,
      "step": 492490
    },
    {
      "epoch": 1.7260897146081513,
      "grad_norm": 3.0,
      "learning_rate": 2.3591023673969538e-05,
      "loss": 0.8286,
      "step": 492500
    },
    {
      "epoch": 1.726124762115047,
      "grad_norm": 2.515625,
      "learning_rate": 2.3590374645305836e-05,
      "loss": 0.7934,
      "step": 492510
    },
    {
      "epoch": 1.7261598096219426,
      "grad_norm": 2.890625,
      "learning_rate": 2.3589725616642134e-05,
      "loss": 0.7906,
      "step": 492520
    },
    {
      "epoch": 1.7261948571288381,
      "grad_norm": 3.203125,
      "learning_rate": 2.3589076587978432e-05,
      "loss": 0.8485,
      "step": 492530
    },
    {
      "epoch": 1.726229904635734,
      "grad_norm": 3.40625,
      "learning_rate": 2.358842755931473e-05,
      "loss": 0.8449,
      "step": 492540
    },
    {
      "epoch": 1.7262649521426292,
      "grad_norm": 2.90625,
      "learning_rate": 2.3587778530651028e-05,
      "loss": 0.8353,
      "step": 492550
    },
    {
      "epoch": 1.726299999649525,
      "grad_norm": 2.71875,
      "learning_rate": 2.3587129501987326e-05,
      "loss": 0.8072,
      "step": 492560
    },
    {
      "epoch": 1.7263350471564205,
      "grad_norm": 2.96875,
      "learning_rate": 2.3586480473323624e-05,
      "loss": 0.8379,
      "step": 492570
    },
    {
      "epoch": 1.726370094663316,
      "grad_norm": 2.875,
      "learning_rate": 2.3585831444659922e-05,
      "loss": 0.7977,
      "step": 492580
    },
    {
      "epoch": 1.7264051421702118,
      "grad_norm": 3.09375,
      "learning_rate": 2.358518241599622e-05,
      "loss": 0.6963,
      "step": 492590
    },
    {
      "epoch": 1.7264401896771073,
      "grad_norm": 2.984375,
      "learning_rate": 2.3584533387332518e-05,
      "loss": 0.8375,
      "step": 492600
    },
    {
      "epoch": 1.7264752371840029,
      "grad_norm": 3.125,
      "learning_rate": 2.3583884358668816e-05,
      "loss": 0.8339,
      "step": 492610
    },
    {
      "epoch": 1.7265102846908986,
      "grad_norm": 3.3125,
      "learning_rate": 2.3583235330005114e-05,
      "loss": 0.8171,
      "step": 492620
    },
    {
      "epoch": 1.7265453321977942,
      "grad_norm": 2.921875,
      "learning_rate": 2.3582586301341412e-05,
      "loss": 0.853,
      "step": 492630
    },
    {
      "epoch": 1.7265803797046897,
      "grad_norm": 2.90625,
      "learning_rate": 2.3581937272677714e-05,
      "loss": 0.732,
      "step": 492640
    },
    {
      "epoch": 1.7266154272115855,
      "grad_norm": 3.171875,
      "learning_rate": 2.358128824401401e-05,
      "loss": 0.8673,
      "step": 492650
    },
    {
      "epoch": 1.7266504747184808,
      "grad_norm": 3.296875,
      "learning_rate": 2.358063921535031e-05,
      "loss": 0.8462,
      "step": 492660
    },
    {
      "epoch": 1.7266855222253765,
      "grad_norm": 3.015625,
      "learning_rate": 2.3579990186686608e-05,
      "loss": 0.8234,
      "step": 492670
    },
    {
      "epoch": 1.726720569732272,
      "grad_norm": 2.421875,
      "learning_rate": 2.3579341158022906e-05,
      "loss": 0.8001,
      "step": 492680
    },
    {
      "epoch": 1.7267556172391676,
      "grad_norm": 2.5625,
      "learning_rate": 2.3578692129359204e-05,
      "loss": 0.8334,
      "step": 492690
    },
    {
      "epoch": 1.7267906647460634,
      "grad_norm": 2.484375,
      "learning_rate": 2.35780431006955e-05,
      "loss": 0.8614,
      "step": 492700
    },
    {
      "epoch": 1.7268257122529589,
      "grad_norm": 3.03125,
      "learning_rate": 2.3577394072031796e-05,
      "loss": 0.9009,
      "step": 492710
    },
    {
      "epoch": 1.7268607597598544,
      "grad_norm": 2.75,
      "learning_rate": 2.3576745043368094e-05,
      "loss": 0.7912,
      "step": 492720
    },
    {
      "epoch": 1.7268958072667502,
      "grad_norm": 3.390625,
      "learning_rate": 2.3576096014704392e-05,
      "loss": 0.8047,
      "step": 492730
    },
    {
      "epoch": 1.7269308547736457,
      "grad_norm": 3.03125,
      "learning_rate": 2.357544698604069e-05,
      "loss": 0.8356,
      "step": 492740
    },
    {
      "epoch": 1.7269659022805413,
      "grad_norm": 2.59375,
      "learning_rate": 2.357479795737699e-05,
      "loss": 0.8175,
      "step": 492750
    },
    {
      "epoch": 1.727000949787437,
      "grad_norm": 2.515625,
      "learning_rate": 2.357414892871329e-05,
      "loss": 0.8057,
      "step": 492760
    },
    {
      "epoch": 1.7270359972943323,
      "grad_norm": 3.28125,
      "learning_rate": 2.3573499900049588e-05,
      "loss": 0.8763,
      "step": 492770
    },
    {
      "epoch": 1.727071044801228,
      "grad_norm": 3.65625,
      "learning_rate": 2.3572850871385886e-05,
      "loss": 0.8027,
      "step": 492780
    },
    {
      "epoch": 1.7271060923081236,
      "grad_norm": 2.984375,
      "learning_rate": 2.3572201842722184e-05,
      "loss": 0.761,
      "step": 492790
    },
    {
      "epoch": 1.7271411398150192,
      "grad_norm": 2.609375,
      "learning_rate": 2.357155281405848e-05,
      "loss": 0.8187,
      "step": 492800
    },
    {
      "epoch": 1.727176187321915,
      "grad_norm": 2.59375,
      "learning_rate": 2.357090378539478e-05,
      "loss": 0.7508,
      "step": 492810
    },
    {
      "epoch": 1.7272112348288104,
      "grad_norm": 2.65625,
      "learning_rate": 2.3570254756731078e-05,
      "loss": 0.7917,
      "step": 492820
    },
    {
      "epoch": 1.727246282335706,
      "grad_norm": 2.953125,
      "learning_rate": 2.3569605728067376e-05,
      "loss": 0.888,
      "step": 492830
    },
    {
      "epoch": 1.7272813298426017,
      "grad_norm": 3.296875,
      "learning_rate": 2.3568956699403674e-05,
      "loss": 0.8687,
      "step": 492840
    },
    {
      "epoch": 1.7273163773494973,
      "grad_norm": 3.109375,
      "learning_rate": 2.356830767073997e-05,
      "loss": 0.8335,
      "step": 492850
    },
    {
      "epoch": 1.7273514248563928,
      "grad_norm": 3.171875,
      "learning_rate": 2.356765864207627e-05,
      "loss": 0.8439,
      "step": 492860
    },
    {
      "epoch": 1.7273864723632886,
      "grad_norm": 3.28125,
      "learning_rate": 2.3567009613412568e-05,
      "loss": 0.8622,
      "step": 492870
    },
    {
      "epoch": 1.7274215198701839,
      "grad_norm": 2.75,
      "learning_rate": 2.3566360584748866e-05,
      "loss": 0.8652,
      "step": 492880
    },
    {
      "epoch": 1.7274565673770796,
      "grad_norm": 3.09375,
      "learning_rate": 2.3565711556085167e-05,
      "loss": 0.9156,
      "step": 492890
    },
    {
      "epoch": 1.7274916148839752,
      "grad_norm": 2.75,
      "learning_rate": 2.356506252742146e-05,
      "loss": 0.7612,
      "step": 492900
    },
    {
      "epoch": 1.7275266623908707,
      "grad_norm": 3.015625,
      "learning_rate": 2.356441349875776e-05,
      "loss": 0.8287,
      "step": 492910
    },
    {
      "epoch": 1.7275617098977665,
      "grad_norm": 2.859375,
      "learning_rate": 2.3563764470094058e-05,
      "loss": 0.8318,
      "step": 492920
    },
    {
      "epoch": 1.727596757404662,
      "grad_norm": 2.96875,
      "learning_rate": 2.3563115441430356e-05,
      "loss": 0.8178,
      "step": 492930
    },
    {
      "epoch": 1.7276318049115575,
      "grad_norm": 2.640625,
      "learning_rate": 2.3562466412766654e-05,
      "loss": 0.812,
      "step": 492940
    },
    {
      "epoch": 1.7276668524184533,
      "grad_norm": 2.34375,
      "learning_rate": 2.356181738410295e-05,
      "loss": 0.8383,
      "step": 492950
    },
    {
      "epoch": 1.7277018999253488,
      "grad_norm": 3.140625,
      "learning_rate": 2.356116835543925e-05,
      "loss": 0.7921,
      "step": 492960
    },
    {
      "epoch": 1.7277369474322444,
      "grad_norm": 3.046875,
      "learning_rate": 2.3560519326775548e-05,
      "loss": 0.7424,
      "step": 492970
    },
    {
      "epoch": 1.7277719949391401,
      "grad_norm": 3.015625,
      "learning_rate": 2.3559870298111846e-05,
      "loss": 0.8584,
      "step": 492980
    },
    {
      "epoch": 1.7278070424460354,
      "grad_norm": 2.984375,
      "learning_rate": 2.3559221269448144e-05,
      "loss": 0.7664,
      "step": 492990
    },
    {
      "epoch": 1.7278420899529312,
      "grad_norm": 2.5625,
      "learning_rate": 2.3558572240784445e-05,
      "loss": 0.7767,
      "step": 493000
    },
    {
      "epoch": 1.727877137459827,
      "grad_norm": 2.8125,
      "learning_rate": 2.3557923212120743e-05,
      "loss": 0.8029,
      "step": 493010
    },
    {
      "epoch": 1.7279121849667223,
      "grad_norm": 3.359375,
      "learning_rate": 2.355727418345704e-05,
      "loss": 0.8212,
      "step": 493020
    },
    {
      "epoch": 1.727947232473618,
      "grad_norm": 2.96875,
      "learning_rate": 2.355662515479334e-05,
      "loss": 0.7903,
      "step": 493030
    },
    {
      "epoch": 1.7279822799805136,
      "grad_norm": 3.21875,
      "learning_rate": 2.3555976126129637e-05,
      "loss": 0.7941,
      "step": 493040
    },
    {
      "epoch": 1.728017327487409,
      "grad_norm": 2.65625,
      "learning_rate": 2.3555327097465935e-05,
      "loss": 0.8207,
      "step": 493050
    },
    {
      "epoch": 1.7280523749943049,
      "grad_norm": 2.875,
      "learning_rate": 2.3554678068802233e-05,
      "loss": 0.7831,
      "step": 493060
    },
    {
      "epoch": 1.7280874225012004,
      "grad_norm": 2.65625,
      "learning_rate": 2.355402904013853e-05,
      "loss": 0.9049,
      "step": 493070
    },
    {
      "epoch": 1.728122470008096,
      "grad_norm": 2.78125,
      "learning_rate": 2.3553380011474826e-05,
      "loss": 0.8177,
      "step": 493080
    },
    {
      "epoch": 1.7281575175149917,
      "grad_norm": 2.796875,
      "learning_rate": 2.3552730982811124e-05,
      "loss": 0.8256,
      "step": 493090
    },
    {
      "epoch": 1.728192565021887,
      "grad_norm": 3.265625,
      "learning_rate": 2.355208195414742e-05,
      "loss": 0.8589,
      "step": 493100
    },
    {
      "epoch": 1.7282276125287828,
      "grad_norm": 2.921875,
      "learning_rate": 2.355143292548372e-05,
      "loss": 0.8429,
      "step": 493110
    },
    {
      "epoch": 1.7282626600356785,
      "grad_norm": 2.875,
      "learning_rate": 2.355078389682002e-05,
      "loss": 0.868,
      "step": 493120
    },
    {
      "epoch": 1.7282977075425738,
      "grad_norm": 2.78125,
      "learning_rate": 2.355013486815632e-05,
      "loss": 0.8629,
      "step": 493130
    },
    {
      "epoch": 1.7283327550494696,
      "grad_norm": 3.15625,
      "learning_rate": 2.3549485839492617e-05,
      "loss": 0.7188,
      "step": 493140
    },
    {
      "epoch": 1.7283678025563651,
      "grad_norm": 2.65625,
      "learning_rate": 2.3548836810828915e-05,
      "loss": 0.8973,
      "step": 493150
    },
    {
      "epoch": 1.7284028500632607,
      "grad_norm": 2.703125,
      "learning_rate": 2.3548187782165213e-05,
      "loss": 0.8575,
      "step": 493160
    },
    {
      "epoch": 1.7284378975701564,
      "grad_norm": 2.546875,
      "learning_rate": 2.354753875350151e-05,
      "loss": 0.8223,
      "step": 493170
    },
    {
      "epoch": 1.728472945077052,
      "grad_norm": 2.59375,
      "learning_rate": 2.354688972483781e-05,
      "loss": 0.8572,
      "step": 493180
    },
    {
      "epoch": 1.7285079925839475,
      "grad_norm": 2.828125,
      "learning_rate": 2.3546240696174107e-05,
      "loss": 0.8954,
      "step": 493190
    },
    {
      "epoch": 1.7285430400908433,
      "grad_norm": 3.015625,
      "learning_rate": 2.3545591667510405e-05,
      "loss": 0.8468,
      "step": 493200
    },
    {
      "epoch": 1.7285780875977388,
      "grad_norm": 3.328125,
      "learning_rate": 2.3544942638846703e-05,
      "loss": 0.9082,
      "step": 493210
    },
    {
      "epoch": 1.7286131351046343,
      "grad_norm": 2.5,
      "learning_rate": 2.3544293610183e-05,
      "loss": 0.8782,
      "step": 493220
    },
    {
      "epoch": 1.72864818261153,
      "grad_norm": 2.9375,
      "learning_rate": 2.35436445815193e-05,
      "loss": 0.8256,
      "step": 493230
    },
    {
      "epoch": 1.7286832301184254,
      "grad_norm": 2.640625,
      "learning_rate": 2.3542995552855597e-05,
      "loss": 0.7887,
      "step": 493240
    },
    {
      "epoch": 1.7287182776253212,
      "grad_norm": 3.03125,
      "learning_rate": 2.3542346524191895e-05,
      "loss": 0.9034,
      "step": 493250
    },
    {
      "epoch": 1.7287533251322167,
      "grad_norm": 2.8125,
      "learning_rate": 2.3541697495528196e-05,
      "loss": 0.8622,
      "step": 493260
    },
    {
      "epoch": 1.7287883726391122,
      "grad_norm": 2.8125,
      "learning_rate": 2.354104846686449e-05,
      "loss": 0.8175,
      "step": 493270
    },
    {
      "epoch": 1.728823420146008,
      "grad_norm": 2.65625,
      "learning_rate": 2.354039943820079e-05,
      "loss": 0.828,
      "step": 493280
    },
    {
      "epoch": 1.7288584676529035,
      "grad_norm": 3.71875,
      "learning_rate": 2.3539750409537087e-05,
      "loss": 0.906,
      "step": 493290
    },
    {
      "epoch": 1.728893515159799,
      "grad_norm": 2.875,
      "learning_rate": 2.3539101380873385e-05,
      "loss": 0.7995,
      "step": 493300
    },
    {
      "epoch": 1.7289285626666948,
      "grad_norm": 3.015625,
      "learning_rate": 2.3538452352209683e-05,
      "loss": 0.8334,
      "step": 493310
    },
    {
      "epoch": 1.7289636101735903,
      "grad_norm": 2.53125,
      "learning_rate": 2.353780332354598e-05,
      "loss": 0.8368,
      "step": 493320
    },
    {
      "epoch": 1.7289986576804859,
      "grad_norm": 2.828125,
      "learning_rate": 2.353715429488228e-05,
      "loss": 0.8861,
      "step": 493330
    },
    {
      "epoch": 1.7290337051873816,
      "grad_norm": 3.296875,
      "learning_rate": 2.3536505266218577e-05,
      "loss": 0.86,
      "step": 493340
    },
    {
      "epoch": 1.729068752694277,
      "grad_norm": 2.546875,
      "learning_rate": 2.3535856237554875e-05,
      "loss": 0.8085,
      "step": 493350
    },
    {
      "epoch": 1.7291038002011727,
      "grad_norm": 2.953125,
      "learning_rate": 2.3535207208891173e-05,
      "loss": 0.796,
      "step": 493360
    },
    {
      "epoch": 1.7291388477080682,
      "grad_norm": 2.953125,
      "learning_rate": 2.3534558180227474e-05,
      "loss": 0.8261,
      "step": 493370
    },
    {
      "epoch": 1.7291738952149638,
      "grad_norm": 2.578125,
      "learning_rate": 2.3533909151563772e-05,
      "loss": 0.8163,
      "step": 493380
    },
    {
      "epoch": 1.7292089427218595,
      "grad_norm": 2.34375,
      "learning_rate": 2.353326012290007e-05,
      "loss": 0.7756,
      "step": 493390
    },
    {
      "epoch": 1.729243990228755,
      "grad_norm": 2.984375,
      "learning_rate": 2.353261109423637e-05,
      "loss": 0.7862,
      "step": 493400
    },
    {
      "epoch": 1.7292790377356506,
      "grad_norm": 2.515625,
      "learning_rate": 2.3531962065572666e-05,
      "loss": 0.7811,
      "step": 493410
    },
    {
      "epoch": 1.7293140852425464,
      "grad_norm": 3.15625,
      "learning_rate": 2.3531313036908964e-05,
      "loss": 0.8474,
      "step": 493420
    },
    {
      "epoch": 1.729349132749442,
      "grad_norm": 2.90625,
      "learning_rate": 2.3530664008245262e-05,
      "loss": 0.8535,
      "step": 493430
    },
    {
      "epoch": 1.7293841802563374,
      "grad_norm": 2.703125,
      "learning_rate": 2.353001497958156e-05,
      "loss": 0.8348,
      "step": 493440
    },
    {
      "epoch": 1.7294192277632332,
      "grad_norm": 2.6875,
      "learning_rate": 2.352936595091786e-05,
      "loss": 0.7902,
      "step": 493450
    },
    {
      "epoch": 1.7294542752701285,
      "grad_norm": 2.9375,
      "learning_rate": 2.3528716922254153e-05,
      "loss": 0.8375,
      "step": 493460
    },
    {
      "epoch": 1.7294893227770243,
      "grad_norm": 2.640625,
      "learning_rate": 2.352806789359045e-05,
      "loss": 0.8178,
      "step": 493470
    },
    {
      "epoch": 1.7295243702839198,
      "grad_norm": 2.640625,
      "learning_rate": 2.3527418864926752e-05,
      "loss": 0.9283,
      "step": 493480
    },
    {
      "epoch": 1.7295594177908153,
      "grad_norm": 2.53125,
      "learning_rate": 2.352676983626305e-05,
      "loss": 0.8311,
      "step": 493490
    },
    {
      "epoch": 1.729594465297711,
      "grad_norm": 2.578125,
      "learning_rate": 2.352612080759935e-05,
      "loss": 0.9166,
      "step": 493500
    },
    {
      "epoch": 1.7296295128046066,
      "grad_norm": 2.59375,
      "learning_rate": 2.3525471778935646e-05,
      "loss": 0.7622,
      "step": 493510
    },
    {
      "epoch": 1.7296645603115022,
      "grad_norm": 2.828125,
      "learning_rate": 2.3524822750271944e-05,
      "loss": 0.9035,
      "step": 493520
    },
    {
      "epoch": 1.729699607818398,
      "grad_norm": 3.03125,
      "learning_rate": 2.3524173721608242e-05,
      "loss": 0.8412,
      "step": 493530
    },
    {
      "epoch": 1.7297346553252935,
      "grad_norm": 3.09375,
      "learning_rate": 2.352352469294454e-05,
      "loss": 0.7925,
      "step": 493540
    },
    {
      "epoch": 1.729769702832189,
      "grad_norm": 3.0,
      "learning_rate": 2.352287566428084e-05,
      "loss": 0.8404,
      "step": 493550
    },
    {
      "epoch": 1.7298047503390848,
      "grad_norm": 2.84375,
      "learning_rate": 2.3522226635617136e-05,
      "loss": 0.8222,
      "step": 493560
    },
    {
      "epoch": 1.72983979784598,
      "grad_norm": 3.03125,
      "learning_rate": 2.3521577606953434e-05,
      "loss": 0.8177,
      "step": 493570
    },
    {
      "epoch": 1.7298748453528758,
      "grad_norm": 2.859375,
      "learning_rate": 2.3520928578289732e-05,
      "loss": 0.8365,
      "step": 493580
    },
    {
      "epoch": 1.7299098928597714,
      "grad_norm": 3.203125,
      "learning_rate": 2.352027954962603e-05,
      "loss": 0.8746,
      "step": 493590
    },
    {
      "epoch": 1.729944940366667,
      "grad_norm": 2.390625,
      "learning_rate": 2.351963052096233e-05,
      "loss": 0.8096,
      "step": 493600
    },
    {
      "epoch": 1.7299799878735627,
      "grad_norm": 3.375,
      "learning_rate": 2.3518981492298626e-05,
      "loss": 0.8522,
      "step": 493610
    },
    {
      "epoch": 1.7300150353804582,
      "grad_norm": 3.203125,
      "learning_rate": 2.3518332463634928e-05,
      "loss": 0.8461,
      "step": 493620
    },
    {
      "epoch": 1.7300500828873537,
      "grad_norm": 2.234375,
      "learning_rate": 2.3517683434971226e-05,
      "loss": 0.7889,
      "step": 493630
    },
    {
      "epoch": 1.7300851303942495,
      "grad_norm": 3.203125,
      "learning_rate": 2.3517034406307524e-05,
      "loss": 0.8833,
      "step": 493640
    },
    {
      "epoch": 1.730120177901145,
      "grad_norm": 3.0,
      "learning_rate": 2.351638537764382e-05,
      "loss": 0.8493,
      "step": 493650
    },
    {
      "epoch": 1.7301552254080406,
      "grad_norm": 3.15625,
      "learning_rate": 2.3515736348980116e-05,
      "loss": 0.9251,
      "step": 493660
    },
    {
      "epoch": 1.7301902729149363,
      "grad_norm": 3.1875,
      "learning_rate": 2.3515087320316414e-05,
      "loss": 0.8681,
      "step": 493670
    },
    {
      "epoch": 1.7302253204218316,
      "grad_norm": 2.421875,
      "learning_rate": 2.3514438291652712e-05,
      "loss": 0.7506,
      "step": 493680
    },
    {
      "epoch": 1.7302603679287274,
      "grad_norm": 2.9375,
      "learning_rate": 2.351378926298901e-05,
      "loss": 0.8645,
      "step": 493690
    },
    {
      "epoch": 1.7302954154356232,
      "grad_norm": 3.140625,
      "learning_rate": 2.351314023432531e-05,
      "loss": 0.8754,
      "step": 493700
    },
    {
      "epoch": 1.7303304629425185,
      "grad_norm": 2.75,
      "learning_rate": 2.3512491205661606e-05,
      "loss": 0.7991,
      "step": 493710
    },
    {
      "epoch": 1.7303655104494142,
      "grad_norm": 2.703125,
      "learning_rate": 2.3511842176997904e-05,
      "loss": 0.7774,
      "step": 493720
    },
    {
      "epoch": 1.7304005579563098,
      "grad_norm": 3.140625,
      "learning_rate": 2.3511193148334202e-05,
      "loss": 0.8264,
      "step": 493730
    },
    {
      "epoch": 1.7304356054632053,
      "grad_norm": 2.671875,
      "learning_rate": 2.3510544119670504e-05,
      "loss": 0.7883,
      "step": 493740
    },
    {
      "epoch": 1.730470652970101,
      "grad_norm": 2.75,
      "learning_rate": 2.3509895091006802e-05,
      "loss": 0.795,
      "step": 493750
    },
    {
      "epoch": 1.7305057004769966,
      "grad_norm": 3.21875,
      "learning_rate": 2.35092460623431e-05,
      "loss": 0.7955,
      "step": 493760
    },
    {
      "epoch": 1.7305407479838921,
      "grad_norm": 2.734375,
      "learning_rate": 2.3508597033679398e-05,
      "loss": 0.7337,
      "step": 493770
    },
    {
      "epoch": 1.7305757954907879,
      "grad_norm": 2.828125,
      "learning_rate": 2.3507948005015696e-05,
      "loss": 0.8082,
      "step": 493780
    },
    {
      "epoch": 1.7306108429976832,
      "grad_norm": 3.21875,
      "learning_rate": 2.3507298976351994e-05,
      "loss": 0.8597,
      "step": 493790
    },
    {
      "epoch": 1.730645890504579,
      "grad_norm": 2.625,
      "learning_rate": 2.3506649947688292e-05,
      "loss": 0.9201,
      "step": 493800
    },
    {
      "epoch": 1.7306809380114747,
      "grad_norm": 3.859375,
      "learning_rate": 2.350600091902459e-05,
      "loss": 0.8477,
      "step": 493810
    },
    {
      "epoch": 1.73071598551837,
      "grad_norm": 2.484375,
      "learning_rate": 2.3505351890360888e-05,
      "loss": 0.8119,
      "step": 493820
    },
    {
      "epoch": 1.7307510330252658,
      "grad_norm": 2.875,
      "learning_rate": 2.3504702861697186e-05,
      "loss": 0.8432,
      "step": 493830
    },
    {
      "epoch": 1.7307860805321613,
      "grad_norm": 2.8125,
      "learning_rate": 2.350405383303348e-05,
      "loss": 0.7508,
      "step": 493840
    },
    {
      "epoch": 1.7308211280390569,
      "grad_norm": 3.0,
      "learning_rate": 2.3503404804369782e-05,
      "loss": 0.8092,
      "step": 493850
    },
    {
      "epoch": 1.7308561755459526,
      "grad_norm": 2.96875,
      "learning_rate": 2.350275577570608e-05,
      "loss": 0.8243,
      "step": 493860
    },
    {
      "epoch": 1.7308912230528481,
      "grad_norm": 2.59375,
      "learning_rate": 2.3502106747042378e-05,
      "loss": 0.8131,
      "step": 493870
    },
    {
      "epoch": 1.7309262705597437,
      "grad_norm": 2.46875,
      "learning_rate": 2.3501457718378676e-05,
      "loss": 0.7566,
      "step": 493880
    },
    {
      "epoch": 1.7309613180666394,
      "grad_norm": 2.578125,
      "learning_rate": 2.3500808689714974e-05,
      "loss": 0.8081,
      "step": 493890
    },
    {
      "epoch": 1.730996365573535,
      "grad_norm": 3.0,
      "learning_rate": 2.3500159661051272e-05,
      "loss": 0.7852,
      "step": 493900
    },
    {
      "epoch": 1.7310314130804305,
      "grad_norm": 2.96875,
      "learning_rate": 2.349951063238757e-05,
      "loss": 0.9116,
      "step": 493910
    },
    {
      "epoch": 1.7310664605873263,
      "grad_norm": 2.5,
      "learning_rate": 2.3498861603723868e-05,
      "loss": 0.8425,
      "step": 493920
    },
    {
      "epoch": 1.7311015080942216,
      "grad_norm": 2.796875,
      "learning_rate": 2.3498212575060166e-05,
      "loss": 0.777,
      "step": 493930
    },
    {
      "epoch": 1.7311365556011173,
      "grad_norm": 3.046875,
      "learning_rate": 2.3497563546396464e-05,
      "loss": 0.8405,
      "step": 493940
    },
    {
      "epoch": 1.7311716031080129,
      "grad_norm": 3.171875,
      "learning_rate": 2.3496914517732762e-05,
      "loss": 0.8155,
      "step": 493950
    },
    {
      "epoch": 1.7312066506149084,
      "grad_norm": 3.28125,
      "learning_rate": 2.349626548906906e-05,
      "loss": 0.8994,
      "step": 493960
    },
    {
      "epoch": 1.7312416981218042,
      "grad_norm": 2.734375,
      "learning_rate": 2.3495616460405358e-05,
      "loss": 0.8155,
      "step": 493970
    },
    {
      "epoch": 1.7312767456286997,
      "grad_norm": 2.890625,
      "learning_rate": 2.3494967431741656e-05,
      "loss": 0.7852,
      "step": 493980
    },
    {
      "epoch": 1.7313117931355952,
      "grad_norm": 2.875,
      "learning_rate": 2.3494318403077957e-05,
      "loss": 0.7754,
      "step": 493990
    },
    {
      "epoch": 1.731346840642491,
      "grad_norm": 3.0,
      "learning_rate": 2.3493669374414255e-05,
      "loss": 0.8554,
      "step": 494000
    },
    {
      "epoch": 1.7313818881493865,
      "grad_norm": 3.171875,
      "learning_rate": 2.3493020345750553e-05,
      "loss": 0.7967,
      "step": 494010
    },
    {
      "epoch": 1.731416935656282,
      "grad_norm": 2.53125,
      "learning_rate": 2.3492371317086848e-05,
      "loss": 0.7183,
      "step": 494020
    },
    {
      "epoch": 1.7314519831631778,
      "grad_norm": 3.140625,
      "learning_rate": 2.3491722288423146e-05,
      "loss": 0.9198,
      "step": 494030
    },
    {
      "epoch": 1.7314870306700731,
      "grad_norm": 2.953125,
      "learning_rate": 2.3491073259759444e-05,
      "loss": 0.8404,
      "step": 494040
    },
    {
      "epoch": 1.731522078176969,
      "grad_norm": 3.3125,
      "learning_rate": 2.3490424231095742e-05,
      "loss": 0.8209,
      "step": 494050
    },
    {
      "epoch": 1.7315571256838644,
      "grad_norm": 2.703125,
      "learning_rate": 2.348977520243204e-05,
      "loss": 0.8162,
      "step": 494060
    },
    {
      "epoch": 1.73159217319076,
      "grad_norm": 3.046875,
      "learning_rate": 2.3489126173768338e-05,
      "loss": 0.8095,
      "step": 494070
    },
    {
      "epoch": 1.7316272206976557,
      "grad_norm": 2.96875,
      "learning_rate": 2.3488477145104636e-05,
      "loss": 0.8231,
      "step": 494080
    },
    {
      "epoch": 1.7316622682045513,
      "grad_norm": 2.578125,
      "learning_rate": 2.3487828116440934e-05,
      "loss": 0.8254,
      "step": 494090
    },
    {
      "epoch": 1.7316973157114468,
      "grad_norm": 2.5625,
      "learning_rate": 2.3487179087777235e-05,
      "loss": 0.8853,
      "step": 494100
    },
    {
      "epoch": 1.7317323632183426,
      "grad_norm": 3.0,
      "learning_rate": 2.3486530059113533e-05,
      "loss": 0.8268,
      "step": 494110
    },
    {
      "epoch": 1.731767410725238,
      "grad_norm": 2.828125,
      "learning_rate": 2.348588103044983e-05,
      "loss": 0.8272,
      "step": 494120
    },
    {
      "epoch": 1.7318024582321336,
      "grad_norm": 3.03125,
      "learning_rate": 2.348523200178613e-05,
      "loss": 0.8906,
      "step": 494130
    },
    {
      "epoch": 1.7318375057390294,
      "grad_norm": 2.71875,
      "learning_rate": 2.3484582973122427e-05,
      "loss": 0.8178,
      "step": 494140
    },
    {
      "epoch": 1.7318725532459247,
      "grad_norm": 3.453125,
      "learning_rate": 2.3483933944458725e-05,
      "loss": 0.8749,
      "step": 494150
    },
    {
      "epoch": 1.7319076007528205,
      "grad_norm": 2.859375,
      "learning_rate": 2.3483284915795023e-05,
      "loss": 0.7886,
      "step": 494160
    },
    {
      "epoch": 1.731942648259716,
      "grad_norm": 2.796875,
      "learning_rate": 2.348263588713132e-05,
      "loss": 0.835,
      "step": 494170
    },
    {
      "epoch": 1.7319776957666115,
      "grad_norm": 2.78125,
      "learning_rate": 2.348198685846762e-05,
      "loss": 0.7435,
      "step": 494180
    },
    {
      "epoch": 1.7320127432735073,
      "grad_norm": 3.15625,
      "learning_rate": 2.3481337829803917e-05,
      "loss": 0.7947,
      "step": 494190
    },
    {
      "epoch": 1.7320477907804028,
      "grad_norm": 2.875,
      "learning_rate": 2.3480688801140215e-05,
      "loss": 0.8256,
      "step": 494200
    },
    {
      "epoch": 1.7320828382872984,
      "grad_norm": 2.359375,
      "learning_rate": 2.348003977247651e-05,
      "loss": 0.7876,
      "step": 494210
    },
    {
      "epoch": 1.7321178857941941,
      "grad_norm": 3.09375,
      "learning_rate": 2.347939074381281e-05,
      "loss": 0.8709,
      "step": 494220
    },
    {
      "epoch": 1.7321529333010897,
      "grad_norm": 3.109375,
      "learning_rate": 2.347874171514911e-05,
      "loss": 0.8805,
      "step": 494230
    },
    {
      "epoch": 1.7321879808079852,
      "grad_norm": 2.859375,
      "learning_rate": 2.3478092686485407e-05,
      "loss": 0.845,
      "step": 494240
    },
    {
      "epoch": 1.732223028314881,
      "grad_norm": 3.046875,
      "learning_rate": 2.3477443657821705e-05,
      "loss": 0.8545,
      "step": 494250
    },
    {
      "epoch": 1.7322580758217763,
      "grad_norm": 2.96875,
      "learning_rate": 2.3476794629158003e-05,
      "loss": 0.8489,
      "step": 494260
    },
    {
      "epoch": 1.732293123328672,
      "grad_norm": 2.984375,
      "learning_rate": 2.34761456004943e-05,
      "loss": 0.8713,
      "step": 494270
    },
    {
      "epoch": 1.7323281708355676,
      "grad_norm": 2.546875,
      "learning_rate": 2.34754965718306e-05,
      "loss": 0.787,
      "step": 494280
    },
    {
      "epoch": 1.732363218342463,
      "grad_norm": 2.328125,
      "learning_rate": 2.3474847543166897e-05,
      "loss": 0.8684,
      "step": 494290
    },
    {
      "epoch": 1.7323982658493589,
      "grad_norm": 2.953125,
      "learning_rate": 2.3474198514503195e-05,
      "loss": 0.7791,
      "step": 494300
    },
    {
      "epoch": 1.7324333133562544,
      "grad_norm": 3.28125,
      "learning_rate": 2.3473549485839493e-05,
      "loss": 0.7579,
      "step": 494310
    },
    {
      "epoch": 1.73246836086315,
      "grad_norm": 2.453125,
      "learning_rate": 2.347290045717579e-05,
      "loss": 0.9054,
      "step": 494320
    },
    {
      "epoch": 1.7325034083700457,
      "grad_norm": 2.921875,
      "learning_rate": 2.347225142851209e-05,
      "loss": 0.8538,
      "step": 494330
    },
    {
      "epoch": 1.7325384558769412,
      "grad_norm": 2.796875,
      "learning_rate": 2.3471602399848387e-05,
      "loss": 0.8676,
      "step": 494340
    },
    {
      "epoch": 1.7325735033838368,
      "grad_norm": 2.921875,
      "learning_rate": 2.347095337118469e-05,
      "loss": 0.7744,
      "step": 494350
    },
    {
      "epoch": 1.7326085508907325,
      "grad_norm": 2.984375,
      "learning_rate": 2.3470304342520987e-05,
      "loss": 0.8381,
      "step": 494360
    },
    {
      "epoch": 1.7326435983976278,
      "grad_norm": 2.828125,
      "learning_rate": 2.3469655313857285e-05,
      "loss": 0.7621,
      "step": 494370
    },
    {
      "epoch": 1.7326786459045236,
      "grad_norm": 3.0,
      "learning_rate": 2.3469006285193583e-05,
      "loss": 0.8974,
      "step": 494380
    },
    {
      "epoch": 1.7327136934114193,
      "grad_norm": 2.921875,
      "learning_rate": 2.346835725652988e-05,
      "loss": 0.9089,
      "step": 494390
    },
    {
      "epoch": 1.7327487409183147,
      "grad_norm": 3.484375,
      "learning_rate": 2.3467708227866175e-05,
      "loss": 0.7759,
      "step": 494400
    },
    {
      "epoch": 1.7327837884252104,
      "grad_norm": 2.46875,
      "learning_rate": 2.3467059199202473e-05,
      "loss": 0.8788,
      "step": 494410
    },
    {
      "epoch": 1.732818835932106,
      "grad_norm": 2.984375,
      "learning_rate": 2.346641017053877e-05,
      "loss": 0.7723,
      "step": 494420
    },
    {
      "epoch": 1.7328538834390015,
      "grad_norm": 2.734375,
      "learning_rate": 2.346576114187507e-05,
      "loss": 0.8963,
      "step": 494430
    },
    {
      "epoch": 1.7328889309458972,
      "grad_norm": 2.890625,
      "learning_rate": 2.3465112113211367e-05,
      "loss": 0.8596,
      "step": 494440
    },
    {
      "epoch": 1.7329239784527928,
      "grad_norm": 2.9375,
      "learning_rate": 2.3464463084547665e-05,
      "loss": 0.9101,
      "step": 494450
    },
    {
      "epoch": 1.7329590259596883,
      "grad_norm": 2.421875,
      "learning_rate": 2.3463814055883963e-05,
      "loss": 0.7704,
      "step": 494460
    },
    {
      "epoch": 1.732994073466584,
      "grad_norm": 3.09375,
      "learning_rate": 2.3463165027220265e-05,
      "loss": 0.8785,
      "step": 494470
    },
    {
      "epoch": 1.7330291209734794,
      "grad_norm": 3.1875,
      "learning_rate": 2.3462515998556563e-05,
      "loss": 0.8995,
      "step": 494480
    },
    {
      "epoch": 1.7330641684803751,
      "grad_norm": 2.546875,
      "learning_rate": 2.346186696989286e-05,
      "loss": 0.8174,
      "step": 494490
    },
    {
      "epoch": 1.733099215987271,
      "grad_norm": 2.4375,
      "learning_rate": 2.346121794122916e-05,
      "loss": 0.8112,
      "step": 494500
    },
    {
      "epoch": 1.7331342634941662,
      "grad_norm": 3.109375,
      "learning_rate": 2.3460568912565457e-05,
      "loss": 0.8114,
      "step": 494510
    },
    {
      "epoch": 1.733169311001062,
      "grad_norm": 3.0625,
      "learning_rate": 2.3459919883901755e-05,
      "loss": 0.7734,
      "step": 494520
    },
    {
      "epoch": 1.7332043585079575,
      "grad_norm": 2.875,
      "learning_rate": 2.3459270855238053e-05,
      "loss": 0.8738,
      "step": 494530
    },
    {
      "epoch": 1.733239406014853,
      "grad_norm": 3.203125,
      "learning_rate": 2.345862182657435e-05,
      "loss": 0.8135,
      "step": 494540
    },
    {
      "epoch": 1.7332744535217488,
      "grad_norm": 3.21875,
      "learning_rate": 2.345797279791065e-05,
      "loss": 0.8383,
      "step": 494550
    },
    {
      "epoch": 1.7333095010286443,
      "grad_norm": 2.796875,
      "learning_rate": 2.3457323769246947e-05,
      "loss": 0.7754,
      "step": 494560
    },
    {
      "epoch": 1.7333445485355399,
      "grad_norm": 2.890625,
      "learning_rate": 2.3456674740583245e-05,
      "loss": 0.8175,
      "step": 494570
    },
    {
      "epoch": 1.7333795960424356,
      "grad_norm": 2.96875,
      "learning_rate": 2.3456025711919543e-05,
      "loss": 0.8872,
      "step": 494580
    },
    {
      "epoch": 1.7334146435493312,
      "grad_norm": 2.25,
      "learning_rate": 2.345537668325584e-05,
      "loss": 0.8238,
      "step": 494590
    },
    {
      "epoch": 1.7334496910562267,
      "grad_norm": 2.875,
      "learning_rate": 2.345472765459214e-05,
      "loss": 0.841,
      "step": 494600
    },
    {
      "epoch": 1.7334847385631225,
      "grad_norm": 3.03125,
      "learning_rate": 2.3454078625928437e-05,
      "loss": 0.7843,
      "step": 494610
    },
    {
      "epoch": 1.7335197860700178,
      "grad_norm": 3.015625,
      "learning_rate": 2.3453429597264735e-05,
      "loss": 0.886,
      "step": 494620
    },
    {
      "epoch": 1.7335548335769135,
      "grad_norm": 3.375,
      "learning_rate": 2.3452780568601033e-05,
      "loss": 0.8809,
      "step": 494630
    },
    {
      "epoch": 1.733589881083809,
      "grad_norm": 3.0,
      "learning_rate": 2.345213153993733e-05,
      "loss": 0.8255,
      "step": 494640
    },
    {
      "epoch": 1.7336249285907046,
      "grad_norm": 2.65625,
      "learning_rate": 2.345148251127363e-05,
      "loss": 0.794,
      "step": 494650
    },
    {
      "epoch": 1.7336599760976004,
      "grad_norm": 3.328125,
      "learning_rate": 2.3450833482609927e-05,
      "loss": 0.8769,
      "step": 494660
    },
    {
      "epoch": 1.733695023604496,
      "grad_norm": 3.03125,
      "learning_rate": 2.3450184453946225e-05,
      "loss": 0.848,
      "step": 494670
    },
    {
      "epoch": 1.7337300711113914,
      "grad_norm": 3.640625,
      "learning_rate": 2.3449535425282523e-05,
      "loss": 0.9195,
      "step": 494680
    },
    {
      "epoch": 1.7337651186182872,
      "grad_norm": 3.0,
      "learning_rate": 2.344888639661882e-05,
      "loss": 0.7932,
      "step": 494690
    },
    {
      "epoch": 1.7338001661251827,
      "grad_norm": 2.84375,
      "learning_rate": 2.344823736795512e-05,
      "loss": 0.7245,
      "step": 494700
    },
    {
      "epoch": 1.7338352136320783,
      "grad_norm": 2.734375,
      "learning_rate": 2.3447588339291417e-05,
      "loss": 0.806,
      "step": 494710
    },
    {
      "epoch": 1.733870261138974,
      "grad_norm": 2.71875,
      "learning_rate": 2.3446939310627718e-05,
      "loss": 0.8437,
      "step": 494720
    },
    {
      "epoch": 1.7339053086458693,
      "grad_norm": 2.953125,
      "learning_rate": 2.3446290281964016e-05,
      "loss": 0.8212,
      "step": 494730
    },
    {
      "epoch": 1.733940356152765,
      "grad_norm": 3.21875,
      "learning_rate": 2.3445641253300314e-05,
      "loss": 0.8469,
      "step": 494740
    },
    {
      "epoch": 1.7339754036596606,
      "grad_norm": 2.84375,
      "learning_rate": 2.3444992224636612e-05,
      "loss": 0.8747,
      "step": 494750
    },
    {
      "epoch": 1.7340104511665562,
      "grad_norm": 3.078125,
      "learning_rate": 2.344434319597291e-05,
      "loss": 0.7908,
      "step": 494760
    },
    {
      "epoch": 1.734045498673452,
      "grad_norm": 2.84375,
      "learning_rate": 2.3443694167309208e-05,
      "loss": 0.8029,
      "step": 494770
    },
    {
      "epoch": 1.7340805461803475,
      "grad_norm": 2.96875,
      "learning_rate": 2.3443045138645503e-05,
      "loss": 0.7877,
      "step": 494780
    },
    {
      "epoch": 1.734115593687243,
      "grad_norm": 3.0625,
      "learning_rate": 2.34423961099818e-05,
      "loss": 0.8452,
      "step": 494790
    },
    {
      "epoch": 1.7341506411941388,
      "grad_norm": 2.921875,
      "learning_rate": 2.34417470813181e-05,
      "loss": 0.832,
      "step": 494800
    },
    {
      "epoch": 1.7341856887010343,
      "grad_norm": 2.859375,
      "learning_rate": 2.3441098052654397e-05,
      "loss": 0.8196,
      "step": 494810
    },
    {
      "epoch": 1.7342207362079298,
      "grad_norm": 3.0625,
      "learning_rate": 2.3440449023990695e-05,
      "loss": 0.7908,
      "step": 494820
    },
    {
      "epoch": 1.7342557837148256,
      "grad_norm": 2.953125,
      "learning_rate": 2.3439799995326996e-05,
      "loss": 0.8991,
      "step": 494830
    },
    {
      "epoch": 1.734290831221721,
      "grad_norm": 3.03125,
      "learning_rate": 2.3439150966663294e-05,
      "loss": 0.9221,
      "step": 494840
    },
    {
      "epoch": 1.7343258787286167,
      "grad_norm": 3.109375,
      "learning_rate": 2.3438501937999592e-05,
      "loss": 0.7522,
      "step": 494850
    },
    {
      "epoch": 1.7343609262355122,
      "grad_norm": 2.90625,
      "learning_rate": 2.343785290933589e-05,
      "loss": 0.8078,
      "step": 494860
    },
    {
      "epoch": 1.7343959737424077,
      "grad_norm": 3.203125,
      "learning_rate": 2.3437203880672188e-05,
      "loss": 0.8616,
      "step": 494870
    },
    {
      "epoch": 1.7344310212493035,
      "grad_norm": 2.828125,
      "learning_rate": 2.3436554852008486e-05,
      "loss": 0.8404,
      "step": 494880
    },
    {
      "epoch": 1.734466068756199,
      "grad_norm": 2.921875,
      "learning_rate": 2.3435905823344784e-05,
      "loss": 0.884,
      "step": 494890
    },
    {
      "epoch": 1.7345011162630946,
      "grad_norm": 3.171875,
      "learning_rate": 2.3435256794681082e-05,
      "loss": 0.8035,
      "step": 494900
    },
    {
      "epoch": 1.7345361637699903,
      "grad_norm": 2.625,
      "learning_rate": 2.343460776601738e-05,
      "loss": 0.8238,
      "step": 494910
    },
    {
      "epoch": 1.7345712112768858,
      "grad_norm": 2.90625,
      "learning_rate": 2.3433958737353678e-05,
      "loss": 0.7754,
      "step": 494920
    },
    {
      "epoch": 1.7346062587837814,
      "grad_norm": 2.84375,
      "learning_rate": 2.3433309708689976e-05,
      "loss": 0.8732,
      "step": 494930
    },
    {
      "epoch": 1.7346413062906771,
      "grad_norm": 2.625,
      "learning_rate": 2.3432660680026274e-05,
      "loss": 0.7935,
      "step": 494940
    },
    {
      "epoch": 1.7346763537975725,
      "grad_norm": 3.078125,
      "learning_rate": 2.3432011651362572e-05,
      "loss": 0.8368,
      "step": 494950
    },
    {
      "epoch": 1.7347114013044682,
      "grad_norm": 3.078125,
      "learning_rate": 2.343136262269887e-05,
      "loss": 0.7394,
      "step": 494960
    },
    {
      "epoch": 1.7347464488113638,
      "grad_norm": 3.03125,
      "learning_rate": 2.3430713594035168e-05,
      "loss": 0.778,
      "step": 494970
    },
    {
      "epoch": 1.7347814963182593,
      "grad_norm": 2.53125,
      "learning_rate": 2.3430064565371466e-05,
      "loss": 0.8052,
      "step": 494980
    },
    {
      "epoch": 1.734816543825155,
      "grad_norm": 3.015625,
      "learning_rate": 2.3429415536707764e-05,
      "loss": 0.7974,
      "step": 494990
    },
    {
      "epoch": 1.7348515913320506,
      "grad_norm": 2.96875,
      "learning_rate": 2.3428766508044062e-05,
      "loss": 0.8449,
      "step": 495000
    },
    {
      "epoch": 1.7348515913320506,
      "eval_loss": 0.7806063294410706,
      "eval_runtime": 553.7493,
      "eval_samples_per_second": 687.018,
      "eval_steps_per_second": 57.252,
      "step": 495000
    },
    {
      "epoch": 1.7348866388389461,
      "grad_norm": 2.953125,
      "learning_rate": 2.342811747938036e-05,
      "loss": 0.8516,
      "step": 495010
    },
    {
      "epoch": 1.7349216863458419,
      "grad_norm": 2.96875,
      "learning_rate": 2.3427468450716658e-05,
      "loss": 0.8738,
      "step": 495020
    },
    {
      "epoch": 1.7349567338527374,
      "grad_norm": 2.71875,
      "learning_rate": 2.3426819422052956e-05,
      "loss": 0.7845,
      "step": 495030
    },
    {
      "epoch": 1.734991781359633,
      "grad_norm": 3.046875,
      "learning_rate": 2.3426170393389254e-05,
      "loss": 0.7701,
      "step": 495040
    },
    {
      "epoch": 1.7350268288665287,
      "grad_norm": 2.90625,
      "learning_rate": 2.3425521364725552e-05,
      "loss": 0.8197,
      "step": 495050
    },
    {
      "epoch": 1.735061876373424,
      "grad_norm": 2.90625,
      "learning_rate": 2.342487233606185e-05,
      "loss": 0.8534,
      "step": 495060
    },
    {
      "epoch": 1.7350969238803198,
      "grad_norm": 2.640625,
      "learning_rate": 2.3424223307398148e-05,
      "loss": 0.8451,
      "step": 495070
    },
    {
      "epoch": 1.7351319713872155,
      "grad_norm": 2.75,
      "learning_rate": 2.3423574278734446e-05,
      "loss": 0.7902,
      "step": 495080
    },
    {
      "epoch": 1.7351670188941108,
      "grad_norm": 2.75,
      "learning_rate": 2.3422925250070747e-05,
      "loss": 0.8143,
      "step": 495090
    },
    {
      "epoch": 1.7352020664010066,
      "grad_norm": 2.59375,
      "learning_rate": 2.3422276221407045e-05,
      "loss": 0.8467,
      "step": 495100
    },
    {
      "epoch": 1.7352371139079021,
      "grad_norm": 3.015625,
      "learning_rate": 2.3421627192743343e-05,
      "loss": 0.8527,
      "step": 495110
    },
    {
      "epoch": 1.7352721614147977,
      "grad_norm": 3.46875,
      "learning_rate": 2.342097816407964e-05,
      "loss": 0.8869,
      "step": 495120
    },
    {
      "epoch": 1.7353072089216934,
      "grad_norm": 2.734375,
      "learning_rate": 2.342032913541594e-05,
      "loss": 0.8775,
      "step": 495130
    },
    {
      "epoch": 1.735342256428589,
      "grad_norm": 3.15625,
      "learning_rate": 2.3419680106752237e-05,
      "loss": 0.8013,
      "step": 495140
    },
    {
      "epoch": 1.7353773039354845,
      "grad_norm": 2.703125,
      "learning_rate": 2.3419031078088532e-05,
      "loss": 0.915,
      "step": 495150
    },
    {
      "epoch": 1.7354123514423803,
      "grad_norm": 3.09375,
      "learning_rate": 2.341838204942483e-05,
      "loss": 0.8694,
      "step": 495160
    },
    {
      "epoch": 1.7354473989492756,
      "grad_norm": 2.78125,
      "learning_rate": 2.3417733020761128e-05,
      "loss": 0.8797,
      "step": 495170
    },
    {
      "epoch": 1.7354824464561713,
      "grad_norm": 2.890625,
      "learning_rate": 2.3417083992097426e-05,
      "loss": 0.818,
      "step": 495180
    },
    {
      "epoch": 1.735517493963067,
      "grad_norm": 2.890625,
      "learning_rate": 2.3416434963433724e-05,
      "loss": 0.8436,
      "step": 495190
    },
    {
      "epoch": 1.7355525414699624,
      "grad_norm": 2.953125,
      "learning_rate": 2.3415785934770025e-05,
      "loss": 0.7989,
      "step": 495200
    },
    {
      "epoch": 1.7355875889768582,
      "grad_norm": 3.03125,
      "learning_rate": 2.3415136906106323e-05,
      "loss": 0.7873,
      "step": 495210
    },
    {
      "epoch": 1.7356226364837537,
      "grad_norm": 2.9375,
      "learning_rate": 2.341448787744262e-05,
      "loss": 0.7926,
      "step": 495220
    },
    {
      "epoch": 1.7356576839906492,
      "grad_norm": 2.796875,
      "learning_rate": 2.341383884877892e-05,
      "loss": 0.7622,
      "step": 495230
    },
    {
      "epoch": 1.735692731497545,
      "grad_norm": 3.328125,
      "learning_rate": 2.3413189820115217e-05,
      "loss": 0.8471,
      "step": 495240
    },
    {
      "epoch": 1.7357277790044405,
      "grad_norm": 3.046875,
      "learning_rate": 2.3412540791451515e-05,
      "loss": 0.8261,
      "step": 495250
    },
    {
      "epoch": 1.735762826511336,
      "grad_norm": 2.828125,
      "learning_rate": 2.3411891762787813e-05,
      "loss": 0.7579,
      "step": 495260
    },
    {
      "epoch": 1.7357978740182318,
      "grad_norm": 2.5,
      "learning_rate": 2.341124273412411e-05,
      "loss": 0.8057,
      "step": 495270
    },
    {
      "epoch": 1.7358329215251274,
      "grad_norm": 2.796875,
      "learning_rate": 2.341059370546041e-05,
      "loss": 0.7589,
      "step": 495280
    },
    {
      "epoch": 1.735867969032023,
      "grad_norm": 3.015625,
      "learning_rate": 2.3409944676796707e-05,
      "loss": 0.7881,
      "step": 495290
    },
    {
      "epoch": 1.7359030165389187,
      "grad_norm": 3.40625,
      "learning_rate": 2.3409295648133005e-05,
      "loss": 0.9144,
      "step": 495300
    },
    {
      "epoch": 1.735938064045814,
      "grad_norm": 3.15625,
      "learning_rate": 2.3408646619469303e-05,
      "loss": 0.8249,
      "step": 495310
    },
    {
      "epoch": 1.7359731115527097,
      "grad_norm": 2.890625,
      "learning_rate": 2.34079975908056e-05,
      "loss": 0.8389,
      "step": 495320
    },
    {
      "epoch": 1.7360081590596053,
      "grad_norm": 3.328125,
      "learning_rate": 2.34073485621419e-05,
      "loss": 0.8732,
      "step": 495330
    },
    {
      "epoch": 1.7360432065665008,
      "grad_norm": 2.984375,
      "learning_rate": 2.3406699533478197e-05,
      "loss": 0.7974,
      "step": 495340
    },
    {
      "epoch": 1.7360782540733966,
      "grad_norm": 2.546875,
      "learning_rate": 2.3406050504814495e-05,
      "loss": 0.8315,
      "step": 495350
    },
    {
      "epoch": 1.736113301580292,
      "grad_norm": 2.625,
      "learning_rate": 2.3405401476150793e-05,
      "loss": 0.8766,
      "step": 495360
    },
    {
      "epoch": 1.7361483490871876,
      "grad_norm": 3.046875,
      "learning_rate": 2.340475244748709e-05,
      "loss": 0.96,
      "step": 495370
    },
    {
      "epoch": 1.7361833965940834,
      "grad_norm": 2.5,
      "learning_rate": 2.340410341882339e-05,
      "loss": 0.8169,
      "step": 495380
    },
    {
      "epoch": 1.736218444100979,
      "grad_norm": 3.40625,
      "learning_rate": 2.3403454390159687e-05,
      "loss": 0.8122,
      "step": 495390
    },
    {
      "epoch": 1.7362534916078745,
      "grad_norm": 2.828125,
      "learning_rate": 2.3402805361495985e-05,
      "loss": 0.923,
      "step": 495400
    },
    {
      "epoch": 1.7362885391147702,
      "grad_norm": 3.203125,
      "learning_rate": 2.3402156332832283e-05,
      "loss": 0.8081,
      "step": 495410
    },
    {
      "epoch": 1.7363235866216655,
      "grad_norm": 2.984375,
      "learning_rate": 2.340150730416858e-05,
      "loss": 0.8573,
      "step": 495420
    },
    {
      "epoch": 1.7363586341285613,
      "grad_norm": 3.203125,
      "learning_rate": 2.340085827550488e-05,
      "loss": 0.8216,
      "step": 495430
    },
    {
      "epoch": 1.7363936816354568,
      "grad_norm": 2.921875,
      "learning_rate": 2.3400209246841177e-05,
      "loss": 0.8906,
      "step": 495440
    },
    {
      "epoch": 1.7364287291423524,
      "grad_norm": 2.5625,
      "learning_rate": 2.339956021817748e-05,
      "loss": 0.849,
      "step": 495450
    },
    {
      "epoch": 1.7364637766492481,
      "grad_norm": 2.828125,
      "learning_rate": 2.3398911189513777e-05,
      "loss": 0.7758,
      "step": 495460
    },
    {
      "epoch": 1.7364988241561436,
      "grad_norm": 2.828125,
      "learning_rate": 2.3398262160850075e-05,
      "loss": 0.8694,
      "step": 495470
    },
    {
      "epoch": 1.7365338716630392,
      "grad_norm": 2.828125,
      "learning_rate": 2.3397613132186373e-05,
      "loss": 0.823,
      "step": 495480
    },
    {
      "epoch": 1.736568919169935,
      "grad_norm": 3.078125,
      "learning_rate": 2.339696410352267e-05,
      "loss": 0.893,
      "step": 495490
    },
    {
      "epoch": 1.7366039666768305,
      "grad_norm": 2.84375,
      "learning_rate": 2.339631507485897e-05,
      "loss": 0.8593,
      "step": 495500
    },
    {
      "epoch": 1.736639014183726,
      "grad_norm": 2.828125,
      "learning_rate": 2.3395666046195267e-05,
      "loss": 0.927,
      "step": 495510
    },
    {
      "epoch": 1.7366740616906218,
      "grad_norm": 2.234375,
      "learning_rate": 2.3395017017531565e-05,
      "loss": 0.8025,
      "step": 495520
    },
    {
      "epoch": 1.736709109197517,
      "grad_norm": 3.125,
      "learning_rate": 2.339436798886786e-05,
      "loss": 0.843,
      "step": 495530
    },
    {
      "epoch": 1.7367441567044128,
      "grad_norm": 2.546875,
      "learning_rate": 2.3393718960204157e-05,
      "loss": 0.708,
      "step": 495540
    },
    {
      "epoch": 1.7367792042113084,
      "grad_norm": 3.359375,
      "learning_rate": 2.3393069931540455e-05,
      "loss": 0.8338,
      "step": 495550
    },
    {
      "epoch": 1.736814251718204,
      "grad_norm": 2.703125,
      "learning_rate": 2.3392420902876753e-05,
      "loss": 0.7871,
      "step": 495560
    },
    {
      "epoch": 1.7368492992250997,
      "grad_norm": 2.515625,
      "learning_rate": 2.3391771874213055e-05,
      "loss": 0.7866,
      "step": 495570
    },
    {
      "epoch": 1.7368843467319952,
      "grad_norm": 2.8125,
      "learning_rate": 2.3391122845549353e-05,
      "loss": 0.8692,
      "step": 495580
    },
    {
      "epoch": 1.7369193942388907,
      "grad_norm": 3.046875,
      "learning_rate": 2.339047381688565e-05,
      "loss": 0.8292,
      "step": 495590
    },
    {
      "epoch": 1.7369544417457865,
      "grad_norm": 3.046875,
      "learning_rate": 2.338982478822195e-05,
      "loss": 0.8545,
      "step": 495600
    },
    {
      "epoch": 1.736989489252682,
      "grad_norm": 2.8125,
      "learning_rate": 2.3389175759558247e-05,
      "loss": 0.7875,
      "step": 495610
    },
    {
      "epoch": 1.7370245367595776,
      "grad_norm": 2.84375,
      "learning_rate": 2.3388526730894545e-05,
      "loss": 0.8272,
      "step": 495620
    },
    {
      "epoch": 1.7370595842664733,
      "grad_norm": 2.859375,
      "learning_rate": 2.3387877702230843e-05,
      "loss": 0.8173,
      "step": 495630
    },
    {
      "epoch": 1.7370946317733686,
      "grad_norm": 2.90625,
      "learning_rate": 2.338722867356714e-05,
      "loss": 0.8795,
      "step": 495640
    },
    {
      "epoch": 1.7371296792802644,
      "grad_norm": 2.875,
      "learning_rate": 2.338657964490344e-05,
      "loss": 0.9604,
      "step": 495650
    },
    {
      "epoch": 1.73716472678716,
      "grad_norm": 2.59375,
      "learning_rate": 2.3385930616239737e-05,
      "loss": 0.7443,
      "step": 495660
    },
    {
      "epoch": 1.7371997742940555,
      "grad_norm": 2.890625,
      "learning_rate": 2.3385281587576035e-05,
      "loss": 0.7882,
      "step": 495670
    },
    {
      "epoch": 1.7372348218009512,
      "grad_norm": 2.859375,
      "learning_rate": 2.3384632558912333e-05,
      "loss": 0.7804,
      "step": 495680
    },
    {
      "epoch": 1.7372698693078468,
      "grad_norm": 3.296875,
      "learning_rate": 2.338398353024863e-05,
      "loss": 0.7971,
      "step": 495690
    },
    {
      "epoch": 1.7373049168147423,
      "grad_norm": 2.90625,
      "learning_rate": 2.338333450158493e-05,
      "loss": 0.8592,
      "step": 495700
    },
    {
      "epoch": 1.737339964321638,
      "grad_norm": 2.859375,
      "learning_rate": 2.338268547292123e-05,
      "loss": 0.8192,
      "step": 495710
    },
    {
      "epoch": 1.7373750118285336,
      "grad_norm": 2.6875,
      "learning_rate": 2.3382036444257525e-05,
      "loss": 0.8545,
      "step": 495720
    },
    {
      "epoch": 1.7374100593354291,
      "grad_norm": 2.5,
      "learning_rate": 2.3381387415593823e-05,
      "loss": 0.8088,
      "step": 495730
    },
    {
      "epoch": 1.737445106842325,
      "grad_norm": 3.328125,
      "learning_rate": 2.338073838693012e-05,
      "loss": 0.8987,
      "step": 495740
    },
    {
      "epoch": 1.7374801543492202,
      "grad_norm": 2.515625,
      "learning_rate": 2.338008935826642e-05,
      "loss": 0.8202,
      "step": 495750
    },
    {
      "epoch": 1.737515201856116,
      "grad_norm": 3.0,
      "learning_rate": 2.3379440329602717e-05,
      "loss": 0.8389,
      "step": 495760
    },
    {
      "epoch": 1.7375502493630117,
      "grad_norm": 2.859375,
      "learning_rate": 2.3378791300939015e-05,
      "loss": 0.8764,
      "step": 495770
    },
    {
      "epoch": 1.737585296869907,
      "grad_norm": 3.25,
      "learning_rate": 2.3378142272275313e-05,
      "loss": 0.8854,
      "step": 495780
    },
    {
      "epoch": 1.7376203443768028,
      "grad_norm": 3.078125,
      "learning_rate": 2.337749324361161e-05,
      "loss": 0.7945,
      "step": 495790
    },
    {
      "epoch": 1.7376553918836983,
      "grad_norm": 2.765625,
      "learning_rate": 2.337684421494791e-05,
      "loss": 0.8532,
      "step": 495800
    },
    {
      "epoch": 1.7376904393905939,
      "grad_norm": 2.546875,
      "learning_rate": 2.3376195186284207e-05,
      "loss": 0.903,
      "step": 495810
    },
    {
      "epoch": 1.7377254868974896,
      "grad_norm": 2.71875,
      "learning_rate": 2.3375546157620508e-05,
      "loss": 0.8211,
      "step": 495820
    },
    {
      "epoch": 1.7377605344043852,
      "grad_norm": 2.4375,
      "learning_rate": 2.3374897128956806e-05,
      "loss": 0.8321,
      "step": 495830
    },
    {
      "epoch": 1.7377955819112807,
      "grad_norm": 2.8125,
      "learning_rate": 2.3374248100293104e-05,
      "loss": 0.8509,
      "step": 495840
    },
    {
      "epoch": 1.7378306294181765,
      "grad_norm": 2.515625,
      "learning_rate": 2.3373599071629402e-05,
      "loss": 0.8355,
      "step": 495850
    },
    {
      "epoch": 1.737865676925072,
      "grad_norm": 2.765625,
      "learning_rate": 2.33729500429657e-05,
      "loss": 0.7571,
      "step": 495860
    },
    {
      "epoch": 1.7379007244319675,
      "grad_norm": 2.828125,
      "learning_rate": 2.3372301014301998e-05,
      "loss": 0.8234,
      "step": 495870
    },
    {
      "epoch": 1.7379357719388633,
      "grad_norm": 2.421875,
      "learning_rate": 2.3371651985638296e-05,
      "loss": 0.8299,
      "step": 495880
    },
    {
      "epoch": 1.7379708194457586,
      "grad_norm": 2.890625,
      "learning_rate": 2.3371002956974594e-05,
      "loss": 0.8558,
      "step": 495890
    },
    {
      "epoch": 1.7380058669526544,
      "grad_norm": 2.40625,
      "learning_rate": 2.337035392831089e-05,
      "loss": 0.7847,
      "step": 495900
    },
    {
      "epoch": 1.73804091445955,
      "grad_norm": 2.765625,
      "learning_rate": 2.3369704899647187e-05,
      "loss": 0.8208,
      "step": 495910
    },
    {
      "epoch": 1.7380759619664454,
      "grad_norm": 3.0,
      "learning_rate": 2.3369055870983485e-05,
      "loss": 0.8593,
      "step": 495920
    },
    {
      "epoch": 1.7381110094733412,
      "grad_norm": 2.984375,
      "learning_rate": 2.3368406842319786e-05,
      "loss": 0.823,
      "step": 495930
    },
    {
      "epoch": 1.7381460569802367,
      "grad_norm": 2.8125,
      "learning_rate": 2.3367757813656084e-05,
      "loss": 0.8913,
      "step": 495940
    },
    {
      "epoch": 1.7381811044871323,
      "grad_norm": 3.140625,
      "learning_rate": 2.3367108784992382e-05,
      "loss": 0.8645,
      "step": 495950
    },
    {
      "epoch": 1.738216151994028,
      "grad_norm": 2.671875,
      "learning_rate": 2.336645975632868e-05,
      "loss": 0.8165,
      "step": 495960
    },
    {
      "epoch": 1.7382511995009235,
      "grad_norm": 2.78125,
      "learning_rate": 2.3365810727664978e-05,
      "loss": 0.7873,
      "step": 495970
    },
    {
      "epoch": 1.738286247007819,
      "grad_norm": 2.9375,
      "learning_rate": 2.3365161699001276e-05,
      "loss": 0.8117,
      "step": 495980
    },
    {
      "epoch": 1.7383212945147148,
      "grad_norm": 2.125,
      "learning_rate": 2.3364512670337574e-05,
      "loss": 0.6906,
      "step": 495990
    },
    {
      "epoch": 1.7383563420216102,
      "grad_norm": 2.765625,
      "learning_rate": 2.3363863641673872e-05,
      "loss": 0.8779,
      "step": 496000
    },
    {
      "epoch": 1.738391389528506,
      "grad_norm": 2.265625,
      "learning_rate": 2.336321461301017e-05,
      "loss": 0.7643,
      "step": 496010
    },
    {
      "epoch": 1.7384264370354015,
      "grad_norm": 3.015625,
      "learning_rate": 2.3362565584346468e-05,
      "loss": 0.789,
      "step": 496020
    },
    {
      "epoch": 1.738461484542297,
      "grad_norm": 3.125,
      "learning_rate": 2.3361916555682766e-05,
      "loss": 0.8302,
      "step": 496030
    },
    {
      "epoch": 1.7384965320491927,
      "grad_norm": 3.140625,
      "learning_rate": 2.3361267527019064e-05,
      "loss": 0.773,
      "step": 496040
    },
    {
      "epoch": 1.7385315795560883,
      "grad_norm": 3.078125,
      "learning_rate": 2.3360618498355362e-05,
      "loss": 0.8286,
      "step": 496050
    },
    {
      "epoch": 1.7385666270629838,
      "grad_norm": 3.09375,
      "learning_rate": 2.335996946969166e-05,
      "loss": 0.9874,
      "step": 496060
    },
    {
      "epoch": 1.7386016745698796,
      "grad_norm": 2.859375,
      "learning_rate": 2.335932044102796e-05,
      "loss": 0.889,
      "step": 496070
    },
    {
      "epoch": 1.738636722076775,
      "grad_norm": 3.171875,
      "learning_rate": 2.335867141236426e-05,
      "loss": 0.8426,
      "step": 496080
    },
    {
      "epoch": 1.7386717695836706,
      "grad_norm": 2.828125,
      "learning_rate": 2.3358022383700554e-05,
      "loss": 0.8904,
      "step": 496090
    },
    {
      "epoch": 1.7387068170905664,
      "grad_norm": 2.96875,
      "learning_rate": 2.3357373355036852e-05,
      "loss": 0.793,
      "step": 496100
    },
    {
      "epoch": 1.7387418645974617,
      "grad_norm": 3.03125,
      "learning_rate": 2.335672432637315e-05,
      "loss": 0.807,
      "step": 496110
    },
    {
      "epoch": 1.7387769121043575,
      "grad_norm": 2.875,
      "learning_rate": 2.3356075297709448e-05,
      "loss": 0.9327,
      "step": 496120
    },
    {
      "epoch": 1.738811959611253,
      "grad_norm": 2.75,
      "learning_rate": 2.3355426269045746e-05,
      "loss": 0.8854,
      "step": 496130
    },
    {
      "epoch": 1.7388470071181485,
      "grad_norm": 2.890625,
      "learning_rate": 2.3354777240382044e-05,
      "loss": 0.8327,
      "step": 496140
    },
    {
      "epoch": 1.7388820546250443,
      "grad_norm": 3.203125,
      "learning_rate": 2.3354128211718342e-05,
      "loss": 0.8657,
      "step": 496150
    },
    {
      "epoch": 1.7389171021319398,
      "grad_norm": 2.953125,
      "learning_rate": 2.335347918305464e-05,
      "loss": 0.8856,
      "step": 496160
    },
    {
      "epoch": 1.7389521496388354,
      "grad_norm": 3.0,
      "learning_rate": 2.3352830154390938e-05,
      "loss": 0.795,
      "step": 496170
    },
    {
      "epoch": 1.7389871971457311,
      "grad_norm": 3.078125,
      "learning_rate": 2.3352181125727236e-05,
      "loss": 0.7952,
      "step": 496180
    },
    {
      "epoch": 1.7390222446526267,
      "grad_norm": 2.921875,
      "learning_rate": 2.3351532097063537e-05,
      "loss": 0.8214,
      "step": 496190
    },
    {
      "epoch": 1.7390572921595222,
      "grad_norm": 2.953125,
      "learning_rate": 2.3350883068399835e-05,
      "loss": 0.7635,
      "step": 496200
    },
    {
      "epoch": 1.739092339666418,
      "grad_norm": 2.8125,
      "learning_rate": 2.3350234039736133e-05,
      "loss": 0.8617,
      "step": 496210
    },
    {
      "epoch": 1.7391273871733133,
      "grad_norm": 2.953125,
      "learning_rate": 2.334958501107243e-05,
      "loss": 0.8038,
      "step": 496220
    },
    {
      "epoch": 1.739162434680209,
      "grad_norm": 3.25,
      "learning_rate": 2.334893598240873e-05,
      "loss": 0.7974,
      "step": 496230
    },
    {
      "epoch": 1.7391974821871046,
      "grad_norm": 3.03125,
      "learning_rate": 2.3348286953745027e-05,
      "loss": 0.8428,
      "step": 496240
    },
    {
      "epoch": 1.739232529694,
      "grad_norm": 2.53125,
      "learning_rate": 2.3347637925081325e-05,
      "loss": 0.8047,
      "step": 496250
    },
    {
      "epoch": 1.7392675772008959,
      "grad_norm": 2.828125,
      "learning_rate": 2.3346988896417623e-05,
      "loss": 0.8674,
      "step": 496260
    },
    {
      "epoch": 1.7393026247077914,
      "grad_norm": 2.953125,
      "learning_rate": 2.334633986775392e-05,
      "loss": 0.7752,
      "step": 496270
    },
    {
      "epoch": 1.739337672214687,
      "grad_norm": 3.71875,
      "learning_rate": 2.3345690839090216e-05,
      "loss": 0.9023,
      "step": 496280
    },
    {
      "epoch": 1.7393727197215827,
      "grad_norm": 2.703125,
      "learning_rate": 2.3345041810426514e-05,
      "loss": 0.7654,
      "step": 496290
    },
    {
      "epoch": 1.7394077672284782,
      "grad_norm": 3.296875,
      "learning_rate": 2.3344392781762815e-05,
      "loss": 0.8557,
      "step": 496300
    },
    {
      "epoch": 1.7394428147353738,
      "grad_norm": 2.984375,
      "learning_rate": 2.3343743753099113e-05,
      "loss": 0.8268,
      "step": 496310
    },
    {
      "epoch": 1.7394778622422695,
      "grad_norm": 3.75,
      "learning_rate": 2.334309472443541e-05,
      "loss": 0.9405,
      "step": 496320
    },
    {
      "epoch": 1.7395129097491648,
      "grad_norm": 2.890625,
      "learning_rate": 2.334244569577171e-05,
      "loss": 0.8236,
      "step": 496330
    },
    {
      "epoch": 1.7395479572560606,
      "grad_norm": 3.125,
      "learning_rate": 2.3341796667108007e-05,
      "loss": 0.7615,
      "step": 496340
    },
    {
      "epoch": 1.7395830047629564,
      "grad_norm": 3.15625,
      "learning_rate": 2.3341147638444305e-05,
      "loss": 0.799,
      "step": 496350
    },
    {
      "epoch": 1.7396180522698517,
      "grad_norm": 3.234375,
      "learning_rate": 2.3340498609780603e-05,
      "loss": 0.8566,
      "step": 496360
    },
    {
      "epoch": 1.7396530997767474,
      "grad_norm": 2.9375,
      "learning_rate": 2.33398495811169e-05,
      "loss": 0.7957,
      "step": 496370
    },
    {
      "epoch": 1.739688147283643,
      "grad_norm": 2.5625,
      "learning_rate": 2.33392005524532e-05,
      "loss": 0.7911,
      "step": 496380
    },
    {
      "epoch": 1.7397231947905385,
      "grad_norm": 3.359375,
      "learning_rate": 2.3338551523789497e-05,
      "loss": 0.8756,
      "step": 496390
    },
    {
      "epoch": 1.7397582422974343,
      "grad_norm": 3.328125,
      "learning_rate": 2.3337902495125795e-05,
      "loss": 0.8221,
      "step": 496400
    },
    {
      "epoch": 1.7397932898043298,
      "grad_norm": 3.09375,
      "learning_rate": 2.3337253466462093e-05,
      "loss": 0.8122,
      "step": 496410
    },
    {
      "epoch": 1.7398283373112253,
      "grad_norm": 2.90625,
      "learning_rate": 2.333660443779839e-05,
      "loss": 0.8374,
      "step": 496420
    },
    {
      "epoch": 1.739863384818121,
      "grad_norm": 2.796875,
      "learning_rate": 2.333595540913469e-05,
      "loss": 0.8343,
      "step": 496430
    },
    {
      "epoch": 1.7398984323250164,
      "grad_norm": 2.90625,
      "learning_rate": 2.333530638047099e-05,
      "loss": 0.8192,
      "step": 496440
    },
    {
      "epoch": 1.7399334798319122,
      "grad_norm": 2.796875,
      "learning_rate": 2.333465735180729e-05,
      "loss": 0.7834,
      "step": 496450
    },
    {
      "epoch": 1.739968527338808,
      "grad_norm": 3.125,
      "learning_rate": 2.3334008323143587e-05,
      "loss": 0.9028,
      "step": 496460
    },
    {
      "epoch": 1.7400035748457032,
      "grad_norm": 2.859375,
      "learning_rate": 2.333335929447988e-05,
      "loss": 0.853,
      "step": 496470
    },
    {
      "epoch": 1.740038622352599,
      "grad_norm": 2.8125,
      "learning_rate": 2.333271026581618e-05,
      "loss": 0.7862,
      "step": 496480
    },
    {
      "epoch": 1.7400736698594945,
      "grad_norm": 3.3125,
      "learning_rate": 2.3332061237152477e-05,
      "loss": 0.8513,
      "step": 496490
    },
    {
      "epoch": 1.74010871736639,
      "grad_norm": 2.765625,
      "learning_rate": 2.3331412208488775e-05,
      "loss": 0.8138,
      "step": 496500
    },
    {
      "epoch": 1.7401437648732858,
      "grad_norm": 2.625,
      "learning_rate": 2.3330763179825073e-05,
      "loss": 0.857,
      "step": 496510
    },
    {
      "epoch": 1.7401788123801814,
      "grad_norm": 2.5625,
      "learning_rate": 2.333011415116137e-05,
      "loss": 0.7946,
      "step": 496520
    },
    {
      "epoch": 1.7402138598870769,
      "grad_norm": 2.8125,
      "learning_rate": 2.332946512249767e-05,
      "loss": 0.801,
      "step": 496530
    },
    {
      "epoch": 1.7402489073939726,
      "grad_norm": 2.625,
      "learning_rate": 2.3328816093833967e-05,
      "loss": 0.8581,
      "step": 496540
    },
    {
      "epoch": 1.7402839549008682,
      "grad_norm": 3.0,
      "learning_rate": 2.332816706517027e-05,
      "loss": 0.7741,
      "step": 496550
    },
    {
      "epoch": 1.7403190024077637,
      "grad_norm": 2.84375,
      "learning_rate": 2.3327518036506567e-05,
      "loss": 0.8131,
      "step": 496560
    },
    {
      "epoch": 1.7403540499146595,
      "grad_norm": 3.078125,
      "learning_rate": 2.3326869007842865e-05,
      "loss": 0.9376,
      "step": 496570
    },
    {
      "epoch": 1.7403890974215548,
      "grad_norm": 2.84375,
      "learning_rate": 2.3326219979179163e-05,
      "loss": 0.8288,
      "step": 496580
    },
    {
      "epoch": 1.7404241449284505,
      "grad_norm": 2.984375,
      "learning_rate": 2.332557095051546e-05,
      "loss": 0.922,
      "step": 496590
    },
    {
      "epoch": 1.740459192435346,
      "grad_norm": 2.203125,
      "learning_rate": 2.332492192185176e-05,
      "loss": 0.7862,
      "step": 496600
    },
    {
      "epoch": 1.7404942399422416,
      "grad_norm": 2.5625,
      "learning_rate": 2.3324272893188057e-05,
      "loss": 0.7861,
      "step": 496610
    },
    {
      "epoch": 1.7405292874491374,
      "grad_norm": 2.84375,
      "learning_rate": 2.3323623864524355e-05,
      "loss": 0.8237,
      "step": 496620
    },
    {
      "epoch": 1.740564334956033,
      "grad_norm": 2.921875,
      "learning_rate": 2.3322974835860653e-05,
      "loss": 0.8681,
      "step": 496630
    },
    {
      "epoch": 1.7405993824629284,
      "grad_norm": 3.421875,
      "learning_rate": 2.332232580719695e-05,
      "loss": 0.8686,
      "step": 496640
    },
    {
      "epoch": 1.7406344299698242,
      "grad_norm": 3.515625,
      "learning_rate": 2.332167677853325e-05,
      "loss": 0.9077,
      "step": 496650
    },
    {
      "epoch": 1.7406694774767197,
      "grad_norm": 3.21875,
      "learning_rate": 2.3321027749869543e-05,
      "loss": 0.8579,
      "step": 496660
    },
    {
      "epoch": 1.7407045249836153,
      "grad_norm": 2.953125,
      "learning_rate": 2.3320378721205845e-05,
      "loss": 0.8683,
      "step": 496670
    },
    {
      "epoch": 1.740739572490511,
      "grad_norm": 2.53125,
      "learning_rate": 2.3319729692542143e-05,
      "loss": 0.7587,
      "step": 496680
    },
    {
      "epoch": 1.7407746199974063,
      "grad_norm": 3.03125,
      "learning_rate": 2.331908066387844e-05,
      "loss": 0.8614,
      "step": 496690
    },
    {
      "epoch": 1.740809667504302,
      "grad_norm": 2.84375,
      "learning_rate": 2.331843163521474e-05,
      "loss": 0.8393,
      "step": 496700
    },
    {
      "epoch": 1.7408447150111976,
      "grad_norm": 4.75,
      "learning_rate": 2.3317782606551037e-05,
      "loss": 0.8488,
      "step": 496710
    },
    {
      "epoch": 1.7408797625180932,
      "grad_norm": 2.296875,
      "learning_rate": 2.3317133577887335e-05,
      "loss": 0.8056,
      "step": 496720
    },
    {
      "epoch": 1.740914810024989,
      "grad_norm": 2.765625,
      "learning_rate": 2.3316484549223633e-05,
      "loss": 0.8299,
      "step": 496730
    },
    {
      "epoch": 1.7409498575318845,
      "grad_norm": 3.34375,
      "learning_rate": 2.331583552055993e-05,
      "loss": 0.7809,
      "step": 496740
    },
    {
      "epoch": 1.74098490503878,
      "grad_norm": 2.8125,
      "learning_rate": 2.331518649189623e-05,
      "loss": 0.8374,
      "step": 496750
    },
    {
      "epoch": 1.7410199525456758,
      "grad_norm": 3.1875,
      "learning_rate": 2.3314537463232527e-05,
      "loss": 0.8879,
      "step": 496760
    },
    {
      "epoch": 1.7410550000525713,
      "grad_norm": 3.4375,
      "learning_rate": 2.3313888434568825e-05,
      "loss": 0.8726,
      "step": 496770
    },
    {
      "epoch": 1.7410900475594668,
      "grad_norm": 2.984375,
      "learning_rate": 2.3313239405905123e-05,
      "loss": 0.8234,
      "step": 496780
    },
    {
      "epoch": 1.7411250950663626,
      "grad_norm": 2.953125,
      "learning_rate": 2.331259037724142e-05,
      "loss": 0.8524,
      "step": 496790
    },
    {
      "epoch": 1.741160142573258,
      "grad_norm": 2.796875,
      "learning_rate": 2.331194134857772e-05,
      "loss": 0.855,
      "step": 496800
    },
    {
      "epoch": 1.7411951900801537,
      "grad_norm": 2.984375,
      "learning_rate": 2.331129231991402e-05,
      "loss": 0.7674,
      "step": 496810
    },
    {
      "epoch": 1.7412302375870492,
      "grad_norm": 2.609375,
      "learning_rate": 2.3310643291250318e-05,
      "loss": 0.852,
      "step": 496820
    },
    {
      "epoch": 1.7412652850939447,
      "grad_norm": 2.59375,
      "learning_rate": 2.3309994262586616e-05,
      "loss": 0.8327,
      "step": 496830
    },
    {
      "epoch": 1.7413003326008405,
      "grad_norm": 3.171875,
      "learning_rate": 2.3309345233922914e-05,
      "loss": 0.8007,
      "step": 496840
    },
    {
      "epoch": 1.741335380107736,
      "grad_norm": 3.1875,
      "learning_rate": 2.330869620525921e-05,
      "loss": 0.8976,
      "step": 496850
    },
    {
      "epoch": 1.7413704276146316,
      "grad_norm": 3.171875,
      "learning_rate": 2.3308047176595507e-05,
      "loss": 0.8009,
      "step": 496860
    },
    {
      "epoch": 1.7414054751215273,
      "grad_norm": 2.9375,
      "learning_rate": 2.3307398147931805e-05,
      "loss": 0.8421,
      "step": 496870
    },
    {
      "epoch": 1.7414405226284229,
      "grad_norm": 3.046875,
      "learning_rate": 2.3306749119268103e-05,
      "loss": 0.8118,
      "step": 496880
    },
    {
      "epoch": 1.7414755701353184,
      "grad_norm": 3.078125,
      "learning_rate": 2.33061000906044e-05,
      "loss": 0.8764,
      "step": 496890
    },
    {
      "epoch": 1.7415106176422142,
      "grad_norm": 2.984375,
      "learning_rate": 2.33054510619407e-05,
      "loss": 0.9025,
      "step": 496900
    },
    {
      "epoch": 1.7415456651491095,
      "grad_norm": 3.1875,
      "learning_rate": 2.3304802033276997e-05,
      "loss": 0.7784,
      "step": 496910
    },
    {
      "epoch": 1.7415807126560052,
      "grad_norm": 3.203125,
      "learning_rate": 2.3304153004613298e-05,
      "loss": 0.9087,
      "step": 496920
    },
    {
      "epoch": 1.7416157601629008,
      "grad_norm": 2.796875,
      "learning_rate": 2.3303503975949596e-05,
      "loss": 0.838,
      "step": 496930
    },
    {
      "epoch": 1.7416508076697963,
      "grad_norm": 2.859375,
      "learning_rate": 2.3302854947285894e-05,
      "loss": 0.796,
      "step": 496940
    },
    {
      "epoch": 1.741685855176692,
      "grad_norm": 3.03125,
      "learning_rate": 2.3302205918622192e-05,
      "loss": 0.8463,
      "step": 496950
    },
    {
      "epoch": 1.7417209026835876,
      "grad_norm": 2.703125,
      "learning_rate": 2.330155688995849e-05,
      "loss": 0.7694,
      "step": 496960
    },
    {
      "epoch": 1.7417559501904831,
      "grad_norm": 2.59375,
      "learning_rate": 2.3300907861294788e-05,
      "loss": 0.7426,
      "step": 496970
    },
    {
      "epoch": 1.7417909976973789,
      "grad_norm": 3.21875,
      "learning_rate": 2.3300258832631086e-05,
      "loss": 0.8226,
      "step": 496980
    },
    {
      "epoch": 1.7418260452042744,
      "grad_norm": 2.765625,
      "learning_rate": 2.3299609803967384e-05,
      "loss": 0.8972,
      "step": 496990
    },
    {
      "epoch": 1.74186109271117,
      "grad_norm": 3.1875,
      "learning_rate": 2.3298960775303682e-05,
      "loss": 0.8212,
      "step": 497000
    },
    {
      "epoch": 1.7418961402180657,
      "grad_norm": 2.9375,
      "learning_rate": 2.329831174663998e-05,
      "loss": 0.9275,
      "step": 497010
    },
    {
      "epoch": 1.741931187724961,
      "grad_norm": 2.75,
      "learning_rate": 2.3297662717976278e-05,
      "loss": 0.798,
      "step": 497020
    },
    {
      "epoch": 1.7419662352318568,
      "grad_norm": 2.890625,
      "learning_rate": 2.3297013689312576e-05,
      "loss": 0.8044,
      "step": 497030
    },
    {
      "epoch": 1.7420012827387525,
      "grad_norm": 2.890625,
      "learning_rate": 2.3296364660648874e-05,
      "loss": 0.8226,
      "step": 497040
    },
    {
      "epoch": 1.7420363302456479,
      "grad_norm": 3.078125,
      "learning_rate": 2.3295715631985172e-05,
      "loss": 0.9001,
      "step": 497050
    },
    {
      "epoch": 1.7420713777525436,
      "grad_norm": 3.3125,
      "learning_rate": 2.329506660332147e-05,
      "loss": 0.8408,
      "step": 497060
    },
    {
      "epoch": 1.7421064252594392,
      "grad_norm": 2.75,
      "learning_rate": 2.3294417574657768e-05,
      "loss": 0.7813,
      "step": 497070
    },
    {
      "epoch": 1.7421414727663347,
      "grad_norm": 3.0,
      "learning_rate": 2.3293768545994066e-05,
      "loss": 0.7751,
      "step": 497080
    },
    {
      "epoch": 1.7421765202732304,
      "grad_norm": 3.15625,
      "learning_rate": 2.3293119517330364e-05,
      "loss": 0.7707,
      "step": 497090
    },
    {
      "epoch": 1.742211567780126,
      "grad_norm": 3.171875,
      "learning_rate": 2.3292470488666662e-05,
      "loss": 0.8327,
      "step": 497100
    },
    {
      "epoch": 1.7422466152870215,
      "grad_norm": 3.390625,
      "learning_rate": 2.329182146000296e-05,
      "loss": 0.89,
      "step": 497110
    },
    {
      "epoch": 1.7422816627939173,
      "grad_norm": 3.078125,
      "learning_rate": 2.3291172431339258e-05,
      "loss": 0.7976,
      "step": 497120
    },
    {
      "epoch": 1.7423167103008126,
      "grad_norm": 2.625,
      "learning_rate": 2.3290523402675556e-05,
      "loss": 0.8,
      "step": 497130
    },
    {
      "epoch": 1.7423517578077083,
      "grad_norm": 3.078125,
      "learning_rate": 2.3289874374011854e-05,
      "loss": 0.929,
      "step": 497140
    },
    {
      "epoch": 1.742386805314604,
      "grad_norm": 3.3125,
      "learning_rate": 2.3289225345348152e-05,
      "loss": 0.8724,
      "step": 497150
    },
    {
      "epoch": 1.7424218528214994,
      "grad_norm": 2.90625,
      "learning_rate": 2.328857631668445e-05,
      "loss": 0.8851,
      "step": 497160
    },
    {
      "epoch": 1.7424569003283952,
      "grad_norm": 2.984375,
      "learning_rate": 2.328792728802075e-05,
      "loss": 0.8107,
      "step": 497170
    },
    {
      "epoch": 1.7424919478352907,
      "grad_norm": 2.84375,
      "learning_rate": 2.328727825935705e-05,
      "loss": 0.8212,
      "step": 497180
    },
    {
      "epoch": 1.7425269953421862,
      "grad_norm": 2.71875,
      "learning_rate": 2.3286629230693348e-05,
      "loss": 0.8514,
      "step": 497190
    },
    {
      "epoch": 1.742562042849082,
      "grad_norm": 2.625,
      "learning_rate": 2.3285980202029646e-05,
      "loss": 0.8496,
      "step": 497200
    },
    {
      "epoch": 1.7425970903559775,
      "grad_norm": 3.140625,
      "learning_rate": 2.3285331173365944e-05,
      "loss": 0.8455,
      "step": 497210
    },
    {
      "epoch": 1.742632137862873,
      "grad_norm": 2.796875,
      "learning_rate": 2.3284682144702238e-05,
      "loss": 0.9683,
      "step": 497220
    },
    {
      "epoch": 1.7426671853697688,
      "grad_norm": 2.84375,
      "learning_rate": 2.3284033116038536e-05,
      "loss": 0.7692,
      "step": 497230
    },
    {
      "epoch": 1.7427022328766644,
      "grad_norm": 3.078125,
      "learning_rate": 2.3283384087374834e-05,
      "loss": 0.8402,
      "step": 497240
    },
    {
      "epoch": 1.74273728038356,
      "grad_norm": 3.296875,
      "learning_rate": 2.3282735058711132e-05,
      "loss": 0.8539,
      "step": 497250
    },
    {
      "epoch": 1.7427723278904557,
      "grad_norm": 2.59375,
      "learning_rate": 2.328208603004743e-05,
      "loss": 0.7879,
      "step": 497260
    },
    {
      "epoch": 1.742807375397351,
      "grad_norm": 2.859375,
      "learning_rate": 2.3281437001383728e-05,
      "loss": 0.8214,
      "step": 497270
    },
    {
      "epoch": 1.7428424229042467,
      "grad_norm": 3.140625,
      "learning_rate": 2.3280787972720026e-05,
      "loss": 0.8752,
      "step": 497280
    },
    {
      "epoch": 1.7428774704111423,
      "grad_norm": 3.3125,
      "learning_rate": 2.3280138944056328e-05,
      "loss": 0.8682,
      "step": 497290
    },
    {
      "epoch": 1.7429125179180378,
      "grad_norm": 3.109375,
      "learning_rate": 2.3279489915392626e-05,
      "loss": 0.8576,
      "step": 497300
    },
    {
      "epoch": 1.7429475654249336,
      "grad_norm": 2.953125,
      "learning_rate": 2.3278840886728924e-05,
      "loss": 0.8262,
      "step": 497310
    },
    {
      "epoch": 1.742982612931829,
      "grad_norm": 3.875,
      "learning_rate": 2.327819185806522e-05,
      "loss": 0.8031,
      "step": 497320
    },
    {
      "epoch": 1.7430176604387246,
      "grad_norm": 2.90625,
      "learning_rate": 2.327754282940152e-05,
      "loss": 0.8498,
      "step": 497330
    },
    {
      "epoch": 1.7430527079456204,
      "grad_norm": 2.875,
      "learning_rate": 2.3276893800737818e-05,
      "loss": 0.7846,
      "step": 497340
    },
    {
      "epoch": 1.743087755452516,
      "grad_norm": 2.875,
      "learning_rate": 2.3276244772074116e-05,
      "loss": 0.8493,
      "step": 497350
    },
    {
      "epoch": 1.7431228029594115,
      "grad_norm": 3.3125,
      "learning_rate": 2.3275595743410414e-05,
      "loss": 0.7979,
      "step": 497360
    },
    {
      "epoch": 1.7431578504663072,
      "grad_norm": 2.828125,
      "learning_rate": 2.327494671474671e-05,
      "loss": 0.8329,
      "step": 497370
    },
    {
      "epoch": 1.7431928979732025,
      "grad_norm": 2.875,
      "learning_rate": 2.327429768608301e-05,
      "loss": 0.8213,
      "step": 497380
    },
    {
      "epoch": 1.7432279454800983,
      "grad_norm": 3.203125,
      "learning_rate": 2.3273648657419308e-05,
      "loss": 0.875,
      "step": 497390
    },
    {
      "epoch": 1.7432629929869938,
      "grad_norm": 2.515625,
      "learning_rate": 2.3272999628755606e-05,
      "loss": 0.8851,
      "step": 497400
    },
    {
      "epoch": 1.7432980404938894,
      "grad_norm": 2.828125,
      "learning_rate": 2.3272350600091904e-05,
      "loss": 0.8168,
      "step": 497410
    },
    {
      "epoch": 1.7433330880007851,
      "grad_norm": 2.796875,
      "learning_rate": 2.32717015714282e-05,
      "loss": 0.7747,
      "step": 497420
    },
    {
      "epoch": 1.7433681355076807,
      "grad_norm": 3.015625,
      "learning_rate": 2.32710525427645e-05,
      "loss": 0.8498,
      "step": 497430
    },
    {
      "epoch": 1.7434031830145762,
      "grad_norm": 2.890625,
      "learning_rate": 2.3270403514100798e-05,
      "loss": 0.8734,
      "step": 497440
    },
    {
      "epoch": 1.743438230521472,
      "grad_norm": 2.84375,
      "learning_rate": 2.3269754485437096e-05,
      "loss": 0.8108,
      "step": 497450
    },
    {
      "epoch": 1.7434732780283675,
      "grad_norm": 2.4375,
      "learning_rate": 2.3269105456773394e-05,
      "loss": 0.8293,
      "step": 497460
    },
    {
      "epoch": 1.743508325535263,
      "grad_norm": 3.0,
      "learning_rate": 2.326845642810969e-05,
      "loss": 0.8006,
      "step": 497470
    },
    {
      "epoch": 1.7435433730421588,
      "grad_norm": 2.78125,
      "learning_rate": 2.326780739944599e-05,
      "loss": 0.841,
      "step": 497480
    },
    {
      "epoch": 1.743578420549054,
      "grad_norm": 2.671875,
      "learning_rate": 2.3267158370782288e-05,
      "loss": 0.8658,
      "step": 497490
    },
    {
      "epoch": 1.7436134680559499,
      "grad_norm": 2.390625,
      "learning_rate": 2.3266509342118586e-05,
      "loss": 0.8787,
      "step": 497500
    },
    {
      "epoch": 1.7436485155628454,
      "grad_norm": 2.9375,
      "learning_rate": 2.3265860313454884e-05,
      "loss": 0.8399,
      "step": 497510
    },
    {
      "epoch": 1.743683563069741,
      "grad_norm": 3.125,
      "learning_rate": 2.326521128479118e-05,
      "loss": 0.875,
      "step": 497520
    },
    {
      "epoch": 1.7437186105766367,
      "grad_norm": 2.453125,
      "learning_rate": 2.326456225612748e-05,
      "loss": 0.8522,
      "step": 497530
    },
    {
      "epoch": 1.7437536580835322,
      "grad_norm": 2.578125,
      "learning_rate": 2.326391322746378e-05,
      "loss": 0.8354,
      "step": 497540
    },
    {
      "epoch": 1.7437887055904278,
      "grad_norm": 2.578125,
      "learning_rate": 2.326326419880008e-05,
      "loss": 0.8335,
      "step": 497550
    },
    {
      "epoch": 1.7438237530973235,
      "grad_norm": 2.921875,
      "learning_rate": 2.3262615170136377e-05,
      "loss": 0.8046,
      "step": 497560
    },
    {
      "epoch": 1.743858800604219,
      "grad_norm": 2.9375,
      "learning_rate": 2.3261966141472675e-05,
      "loss": 0.8281,
      "step": 497570
    },
    {
      "epoch": 1.7438938481111146,
      "grad_norm": 2.671875,
      "learning_rate": 2.3261317112808973e-05,
      "loss": 0.8059,
      "step": 497580
    },
    {
      "epoch": 1.7439288956180103,
      "grad_norm": 2.515625,
      "learning_rate": 2.326066808414527e-05,
      "loss": 0.8346,
      "step": 497590
    },
    {
      "epoch": 1.7439639431249057,
      "grad_norm": 3.03125,
      "learning_rate": 2.3260019055481566e-05,
      "loss": 0.8278,
      "step": 497600
    },
    {
      "epoch": 1.7439989906318014,
      "grad_norm": 2.921875,
      "learning_rate": 2.3259370026817864e-05,
      "loss": 0.8097,
      "step": 497610
    },
    {
      "epoch": 1.744034038138697,
      "grad_norm": 2.953125,
      "learning_rate": 2.325872099815416e-05,
      "loss": 0.8524,
      "step": 497620
    },
    {
      "epoch": 1.7440690856455925,
      "grad_norm": 2.96875,
      "learning_rate": 2.325807196949046e-05,
      "loss": 0.8865,
      "step": 497630
    },
    {
      "epoch": 1.7441041331524882,
      "grad_norm": 2.703125,
      "learning_rate": 2.3257422940826758e-05,
      "loss": 0.7932,
      "step": 497640
    },
    {
      "epoch": 1.7441391806593838,
      "grad_norm": 2.765625,
      "learning_rate": 2.325677391216306e-05,
      "loss": 0.812,
      "step": 497650
    },
    {
      "epoch": 1.7441742281662793,
      "grad_norm": 2.71875,
      "learning_rate": 2.3256124883499357e-05,
      "loss": 0.8159,
      "step": 497660
    },
    {
      "epoch": 1.744209275673175,
      "grad_norm": 2.53125,
      "learning_rate": 2.3255475854835655e-05,
      "loss": 0.8392,
      "step": 497670
    },
    {
      "epoch": 1.7442443231800706,
      "grad_norm": 3.09375,
      "learning_rate": 2.3254826826171953e-05,
      "loss": 0.8425,
      "step": 497680
    },
    {
      "epoch": 1.7442793706869661,
      "grad_norm": 2.6875,
      "learning_rate": 2.325417779750825e-05,
      "loss": 0.77,
      "step": 497690
    },
    {
      "epoch": 1.744314418193862,
      "grad_norm": 2.53125,
      "learning_rate": 2.325352876884455e-05,
      "loss": 0.7829,
      "step": 497700
    },
    {
      "epoch": 1.7443494657007572,
      "grad_norm": 3.0,
      "learning_rate": 2.3252879740180847e-05,
      "loss": 0.856,
      "step": 497710
    },
    {
      "epoch": 1.744384513207653,
      "grad_norm": 2.75,
      "learning_rate": 2.3252230711517145e-05,
      "loss": 0.8306,
      "step": 497720
    },
    {
      "epoch": 1.7444195607145487,
      "grad_norm": 3.109375,
      "learning_rate": 2.3251581682853443e-05,
      "loss": 0.87,
      "step": 497730
    },
    {
      "epoch": 1.744454608221444,
      "grad_norm": 2.890625,
      "learning_rate": 2.325093265418974e-05,
      "loss": 0.8941,
      "step": 497740
    },
    {
      "epoch": 1.7444896557283398,
      "grad_norm": 2.390625,
      "learning_rate": 2.325028362552604e-05,
      "loss": 0.8725,
      "step": 497750
    },
    {
      "epoch": 1.7445247032352353,
      "grad_norm": 2.765625,
      "learning_rate": 2.3249634596862337e-05,
      "loss": 0.8692,
      "step": 497760
    },
    {
      "epoch": 1.7445597507421309,
      "grad_norm": 2.625,
      "learning_rate": 2.3248985568198635e-05,
      "loss": 0.811,
      "step": 497770
    },
    {
      "epoch": 1.7445947982490266,
      "grad_norm": 2.546875,
      "learning_rate": 2.3248336539534933e-05,
      "loss": 0.802,
      "step": 497780
    },
    {
      "epoch": 1.7446298457559222,
      "grad_norm": 3.515625,
      "learning_rate": 2.324768751087123e-05,
      "loss": 0.8316,
      "step": 497790
    },
    {
      "epoch": 1.7446648932628177,
      "grad_norm": 3.0625,
      "learning_rate": 2.324703848220753e-05,
      "loss": 0.8638,
      "step": 497800
    },
    {
      "epoch": 1.7446999407697135,
      "grad_norm": 2.96875,
      "learning_rate": 2.3246389453543827e-05,
      "loss": 0.8504,
      "step": 497810
    },
    {
      "epoch": 1.7447349882766088,
      "grad_norm": 2.671875,
      "learning_rate": 2.3245740424880125e-05,
      "loss": 0.8428,
      "step": 497820
    },
    {
      "epoch": 1.7447700357835045,
      "grad_norm": 2.703125,
      "learning_rate": 2.3245091396216423e-05,
      "loss": 0.7184,
      "step": 497830
    },
    {
      "epoch": 1.7448050832904003,
      "grad_norm": 2.734375,
      "learning_rate": 2.324444236755272e-05,
      "loss": 0.867,
      "step": 497840
    },
    {
      "epoch": 1.7448401307972956,
      "grad_norm": 3.171875,
      "learning_rate": 2.324379333888902e-05,
      "loss": 0.798,
      "step": 497850
    },
    {
      "epoch": 1.7448751783041914,
      "grad_norm": 3.296875,
      "learning_rate": 2.3243144310225317e-05,
      "loss": 0.8685,
      "step": 497860
    },
    {
      "epoch": 1.744910225811087,
      "grad_norm": 3.0625,
      "learning_rate": 2.3242495281561615e-05,
      "loss": 0.9201,
      "step": 497870
    },
    {
      "epoch": 1.7449452733179824,
      "grad_norm": 3.4375,
      "learning_rate": 2.3241846252897913e-05,
      "loss": 0.7831,
      "step": 497880
    },
    {
      "epoch": 1.7449803208248782,
      "grad_norm": 3.046875,
      "learning_rate": 2.324119722423421e-05,
      "loss": 0.9254,
      "step": 497890
    },
    {
      "epoch": 1.7450153683317737,
      "grad_norm": 3.390625,
      "learning_rate": 2.324054819557051e-05,
      "loss": 0.8358,
      "step": 497900
    },
    {
      "epoch": 1.7450504158386693,
      "grad_norm": 2.671875,
      "learning_rate": 2.323989916690681e-05,
      "loss": 0.8907,
      "step": 497910
    },
    {
      "epoch": 1.745085463345565,
      "grad_norm": 2.890625,
      "learning_rate": 2.323925013824311e-05,
      "loss": 0.8506,
      "step": 497920
    },
    {
      "epoch": 1.7451205108524606,
      "grad_norm": 2.75,
      "learning_rate": 2.3238601109579406e-05,
      "loss": 0.828,
      "step": 497930
    },
    {
      "epoch": 1.745155558359356,
      "grad_norm": 2.859375,
      "learning_rate": 2.3237952080915704e-05,
      "loss": 0.813,
      "step": 497940
    },
    {
      "epoch": 1.7451906058662519,
      "grad_norm": 3.015625,
      "learning_rate": 2.3237303052252002e-05,
      "loss": 0.9378,
      "step": 497950
    },
    {
      "epoch": 1.7452256533731472,
      "grad_norm": 2.90625,
      "learning_rate": 2.32366540235883e-05,
      "loss": 0.8006,
      "step": 497960
    },
    {
      "epoch": 1.745260700880043,
      "grad_norm": 2.53125,
      "learning_rate": 2.3236004994924595e-05,
      "loss": 0.7621,
      "step": 497970
    },
    {
      "epoch": 1.7452957483869385,
      "grad_norm": 3.046875,
      "learning_rate": 2.3235355966260893e-05,
      "loss": 0.8462,
      "step": 497980
    },
    {
      "epoch": 1.745330795893834,
      "grad_norm": 2.59375,
      "learning_rate": 2.323470693759719e-05,
      "loss": 0.8314,
      "step": 497990
    },
    {
      "epoch": 1.7453658434007298,
      "grad_norm": 2.875,
      "learning_rate": 2.323405790893349e-05,
      "loss": 0.8245,
      "step": 498000
    },
    {
      "epoch": 1.7454008909076253,
      "grad_norm": 3.03125,
      "learning_rate": 2.3233408880269787e-05,
      "loss": 0.8137,
      "step": 498010
    },
    {
      "epoch": 1.7454359384145208,
      "grad_norm": 2.875,
      "learning_rate": 2.323275985160609e-05,
      "loss": 0.709,
      "step": 498020
    },
    {
      "epoch": 1.7454709859214166,
      "grad_norm": 2.4375,
      "learning_rate": 2.3232110822942386e-05,
      "loss": 0.8398,
      "step": 498030
    },
    {
      "epoch": 1.7455060334283121,
      "grad_norm": 2.1875,
      "learning_rate": 2.3231461794278684e-05,
      "loss": 0.8089,
      "step": 498040
    },
    {
      "epoch": 1.7455410809352077,
      "grad_norm": 2.78125,
      "learning_rate": 2.3230812765614982e-05,
      "loss": 0.8195,
      "step": 498050
    },
    {
      "epoch": 1.7455761284421034,
      "grad_norm": 3.34375,
      "learning_rate": 2.323016373695128e-05,
      "loss": 0.8303,
      "step": 498060
    },
    {
      "epoch": 1.7456111759489987,
      "grad_norm": 2.9375,
      "learning_rate": 2.322951470828758e-05,
      "loss": 0.8651,
      "step": 498070
    },
    {
      "epoch": 1.7456462234558945,
      "grad_norm": 3.015625,
      "learning_rate": 2.3228865679623876e-05,
      "loss": 0.8268,
      "step": 498080
    },
    {
      "epoch": 1.74568127096279,
      "grad_norm": 2.828125,
      "learning_rate": 2.3228216650960174e-05,
      "loss": 0.7517,
      "step": 498090
    },
    {
      "epoch": 1.7457163184696856,
      "grad_norm": 2.625,
      "learning_rate": 2.3227567622296472e-05,
      "loss": 0.8505,
      "step": 498100
    },
    {
      "epoch": 1.7457513659765813,
      "grad_norm": 2.953125,
      "learning_rate": 2.322691859363277e-05,
      "loss": 0.7954,
      "step": 498110
    },
    {
      "epoch": 1.7457864134834769,
      "grad_norm": 3.3125,
      "learning_rate": 2.322626956496907e-05,
      "loss": 0.8859,
      "step": 498120
    },
    {
      "epoch": 1.7458214609903724,
      "grad_norm": 3.125,
      "learning_rate": 2.3225620536305366e-05,
      "loss": 0.8736,
      "step": 498130
    },
    {
      "epoch": 1.7458565084972681,
      "grad_norm": 2.5,
      "learning_rate": 2.3224971507641664e-05,
      "loss": 0.7892,
      "step": 498140
    },
    {
      "epoch": 1.7458915560041637,
      "grad_norm": 3.1875,
      "learning_rate": 2.3224322478977962e-05,
      "loss": 0.8565,
      "step": 498150
    },
    {
      "epoch": 1.7459266035110592,
      "grad_norm": 2.828125,
      "learning_rate": 2.322367345031426e-05,
      "loss": 0.8357,
      "step": 498160
    },
    {
      "epoch": 1.745961651017955,
      "grad_norm": 3.109375,
      "learning_rate": 2.322302442165056e-05,
      "loss": 0.8055,
      "step": 498170
    },
    {
      "epoch": 1.7459966985248503,
      "grad_norm": 3.15625,
      "learning_rate": 2.3222375392986856e-05,
      "loss": 0.8061,
      "step": 498180
    },
    {
      "epoch": 1.746031746031746,
      "grad_norm": 2.875,
      "learning_rate": 2.3221726364323154e-05,
      "loss": 0.8446,
      "step": 498190
    },
    {
      "epoch": 1.7460667935386416,
      "grad_norm": 2.625,
      "learning_rate": 2.3221077335659452e-05,
      "loss": 0.8163,
      "step": 498200
    },
    {
      "epoch": 1.7461018410455371,
      "grad_norm": 3.03125,
      "learning_rate": 2.322042830699575e-05,
      "loss": 0.7953,
      "step": 498210
    },
    {
      "epoch": 1.7461368885524329,
      "grad_norm": 3.109375,
      "learning_rate": 2.321977927833205e-05,
      "loss": 0.9056,
      "step": 498220
    },
    {
      "epoch": 1.7461719360593284,
      "grad_norm": 2.90625,
      "learning_rate": 2.3219130249668346e-05,
      "loss": 0.7454,
      "step": 498230
    },
    {
      "epoch": 1.746206983566224,
      "grad_norm": 2.90625,
      "learning_rate": 2.3218481221004644e-05,
      "loss": 0.7652,
      "step": 498240
    },
    {
      "epoch": 1.7462420310731197,
      "grad_norm": 3.578125,
      "learning_rate": 2.3217832192340942e-05,
      "loss": 0.9315,
      "step": 498250
    },
    {
      "epoch": 1.7462770785800152,
      "grad_norm": 2.9375,
      "learning_rate": 2.321718316367724e-05,
      "loss": 0.8956,
      "step": 498260
    },
    {
      "epoch": 1.7463121260869108,
      "grad_norm": 3.015625,
      "learning_rate": 2.3216534135013542e-05,
      "loss": 0.8126,
      "step": 498270
    },
    {
      "epoch": 1.7463471735938065,
      "grad_norm": 2.9375,
      "learning_rate": 2.321588510634984e-05,
      "loss": 0.828,
      "step": 498280
    },
    {
      "epoch": 1.7463822211007018,
      "grad_norm": 2.765625,
      "learning_rate": 2.3215236077686138e-05,
      "loss": 0.7574,
      "step": 498290
    },
    {
      "epoch": 1.7464172686075976,
      "grad_norm": 3.46875,
      "learning_rate": 2.3214587049022436e-05,
      "loss": 0.8113,
      "step": 498300
    },
    {
      "epoch": 1.7464523161144931,
      "grad_norm": 2.640625,
      "learning_rate": 2.3213938020358734e-05,
      "loss": 0.7787,
      "step": 498310
    },
    {
      "epoch": 1.7464873636213887,
      "grad_norm": 2.75,
      "learning_rate": 2.3213288991695032e-05,
      "loss": 0.8194,
      "step": 498320
    },
    {
      "epoch": 1.7465224111282844,
      "grad_norm": 3.5,
      "learning_rate": 2.321263996303133e-05,
      "loss": 0.8069,
      "step": 498330
    },
    {
      "epoch": 1.74655745863518,
      "grad_norm": 3.109375,
      "learning_rate": 2.3211990934367628e-05,
      "loss": 0.8384,
      "step": 498340
    },
    {
      "epoch": 1.7465925061420755,
      "grad_norm": 3.234375,
      "learning_rate": 2.3211341905703922e-05,
      "loss": 0.8818,
      "step": 498350
    },
    {
      "epoch": 1.7466275536489713,
      "grad_norm": 2.96875,
      "learning_rate": 2.321069287704022e-05,
      "loss": 0.8567,
      "step": 498360
    },
    {
      "epoch": 1.7466626011558668,
      "grad_norm": 2.671875,
      "learning_rate": 2.321004384837652e-05,
      "loss": 0.8016,
      "step": 498370
    },
    {
      "epoch": 1.7466976486627623,
      "grad_norm": 3.109375,
      "learning_rate": 2.3209394819712816e-05,
      "loss": 0.8218,
      "step": 498380
    },
    {
      "epoch": 1.746732696169658,
      "grad_norm": 3.3125,
      "learning_rate": 2.3208745791049118e-05,
      "loss": 0.8261,
      "step": 498390
    },
    {
      "epoch": 1.7467677436765534,
      "grad_norm": 2.953125,
      "learning_rate": 2.3208096762385416e-05,
      "loss": 0.8354,
      "step": 498400
    },
    {
      "epoch": 1.7468027911834492,
      "grad_norm": 2.875,
      "learning_rate": 2.3207447733721714e-05,
      "loss": 0.8088,
      "step": 498410
    },
    {
      "epoch": 1.746837838690345,
      "grad_norm": 2.96875,
      "learning_rate": 2.3206798705058012e-05,
      "loss": 0.8341,
      "step": 498420
    },
    {
      "epoch": 1.7468728861972402,
      "grad_norm": 3.078125,
      "learning_rate": 2.320614967639431e-05,
      "loss": 0.8122,
      "step": 498430
    },
    {
      "epoch": 1.746907933704136,
      "grad_norm": 3.03125,
      "learning_rate": 2.3205500647730608e-05,
      "loss": 0.7671,
      "step": 498440
    },
    {
      "epoch": 1.7469429812110315,
      "grad_norm": 2.828125,
      "learning_rate": 2.3204851619066906e-05,
      "loss": 0.776,
      "step": 498450
    },
    {
      "epoch": 1.746978028717927,
      "grad_norm": 2.625,
      "learning_rate": 2.3204202590403204e-05,
      "loss": 0.8725,
      "step": 498460
    },
    {
      "epoch": 1.7470130762248228,
      "grad_norm": 3.25,
      "learning_rate": 2.3203553561739502e-05,
      "loss": 0.841,
      "step": 498470
    },
    {
      "epoch": 1.7470481237317184,
      "grad_norm": 2.9375,
      "learning_rate": 2.32029045330758e-05,
      "loss": 0.8005,
      "step": 498480
    },
    {
      "epoch": 1.747083171238614,
      "grad_norm": 2.875,
      "learning_rate": 2.3202255504412098e-05,
      "loss": 0.8292,
      "step": 498490
    },
    {
      "epoch": 1.7471182187455097,
      "grad_norm": 2.734375,
      "learning_rate": 2.3201606475748396e-05,
      "loss": 0.8231,
      "step": 498500
    },
    {
      "epoch": 1.7471532662524052,
      "grad_norm": 2.796875,
      "learning_rate": 2.3200957447084694e-05,
      "loss": 0.8043,
      "step": 498510
    },
    {
      "epoch": 1.7471883137593007,
      "grad_norm": 2.953125,
      "learning_rate": 2.3200308418420995e-05,
      "loss": 0.8412,
      "step": 498520
    },
    {
      "epoch": 1.7472233612661965,
      "grad_norm": 2.515625,
      "learning_rate": 2.3199659389757293e-05,
      "loss": 0.8335,
      "step": 498530
    },
    {
      "epoch": 1.7472584087730918,
      "grad_norm": 3.015625,
      "learning_rate": 2.3199010361093588e-05,
      "loss": 0.8749,
      "step": 498540
    },
    {
      "epoch": 1.7472934562799876,
      "grad_norm": 2.640625,
      "learning_rate": 2.3198361332429886e-05,
      "loss": 0.7653,
      "step": 498550
    },
    {
      "epoch": 1.747328503786883,
      "grad_norm": 2.953125,
      "learning_rate": 2.3197712303766184e-05,
      "loss": 0.8297,
      "step": 498560
    },
    {
      "epoch": 1.7473635512937786,
      "grad_norm": 2.796875,
      "learning_rate": 2.3197063275102482e-05,
      "loss": 0.8856,
      "step": 498570
    },
    {
      "epoch": 1.7473985988006744,
      "grad_norm": 2.765625,
      "learning_rate": 2.319641424643878e-05,
      "loss": 0.7998,
      "step": 498580
    },
    {
      "epoch": 1.74743364630757,
      "grad_norm": 2.984375,
      "learning_rate": 2.3195765217775078e-05,
      "loss": 0.8916,
      "step": 498590
    },
    {
      "epoch": 1.7474686938144655,
      "grad_norm": 3.03125,
      "learning_rate": 2.3195116189111376e-05,
      "loss": 0.8599,
      "step": 498600
    },
    {
      "epoch": 1.7475037413213612,
      "grad_norm": 2.6875,
      "learning_rate": 2.3194467160447674e-05,
      "loss": 0.7023,
      "step": 498610
    },
    {
      "epoch": 1.7475387888282568,
      "grad_norm": 2.875,
      "learning_rate": 2.3193818131783972e-05,
      "loss": 0.8655,
      "step": 498620
    },
    {
      "epoch": 1.7475738363351523,
      "grad_norm": 2.765625,
      "learning_rate": 2.319316910312027e-05,
      "loss": 0.8324,
      "step": 498630
    },
    {
      "epoch": 1.747608883842048,
      "grad_norm": 2.921875,
      "learning_rate": 2.319252007445657e-05,
      "loss": 0.8897,
      "step": 498640
    },
    {
      "epoch": 1.7476439313489434,
      "grad_norm": 2.84375,
      "learning_rate": 2.319187104579287e-05,
      "loss": 0.7785,
      "step": 498650
    },
    {
      "epoch": 1.7476789788558391,
      "grad_norm": 2.953125,
      "learning_rate": 2.3191222017129167e-05,
      "loss": 0.7301,
      "step": 498660
    },
    {
      "epoch": 1.7477140263627347,
      "grad_norm": 2.609375,
      "learning_rate": 2.3190572988465465e-05,
      "loss": 0.8077,
      "step": 498670
    },
    {
      "epoch": 1.7477490738696302,
      "grad_norm": 3.90625,
      "learning_rate": 2.3189923959801763e-05,
      "loss": 1.0405,
      "step": 498680
    },
    {
      "epoch": 1.747784121376526,
      "grad_norm": 2.84375,
      "learning_rate": 2.318927493113806e-05,
      "loss": 0.836,
      "step": 498690
    },
    {
      "epoch": 1.7478191688834215,
      "grad_norm": 3.21875,
      "learning_rate": 2.318862590247436e-05,
      "loss": 0.8389,
      "step": 498700
    },
    {
      "epoch": 1.747854216390317,
      "grad_norm": 3.140625,
      "learning_rate": 2.3187976873810657e-05,
      "loss": 0.8578,
      "step": 498710
    },
    {
      "epoch": 1.7478892638972128,
      "grad_norm": 3.078125,
      "learning_rate": 2.3187327845146955e-05,
      "loss": 0.7769,
      "step": 498720
    },
    {
      "epoch": 1.7479243114041083,
      "grad_norm": 2.859375,
      "learning_rate": 2.318667881648325e-05,
      "loss": 0.809,
      "step": 498730
    },
    {
      "epoch": 1.7479593589110038,
      "grad_norm": 2.890625,
      "learning_rate": 2.3186029787819548e-05,
      "loss": 0.8183,
      "step": 498740
    },
    {
      "epoch": 1.7479944064178996,
      "grad_norm": 2.921875,
      "learning_rate": 2.318538075915585e-05,
      "loss": 0.8457,
      "step": 498750
    },
    {
      "epoch": 1.748029453924795,
      "grad_norm": 3.203125,
      "learning_rate": 2.3184731730492147e-05,
      "loss": 0.8106,
      "step": 498760
    },
    {
      "epoch": 1.7480645014316907,
      "grad_norm": 2.6875,
      "learning_rate": 2.3184082701828445e-05,
      "loss": 0.7452,
      "step": 498770
    },
    {
      "epoch": 1.7480995489385862,
      "grad_norm": 3.109375,
      "learning_rate": 2.3183433673164743e-05,
      "loss": 0.8451,
      "step": 498780
    },
    {
      "epoch": 1.7481345964454817,
      "grad_norm": 2.765625,
      "learning_rate": 2.318278464450104e-05,
      "loss": 0.8441,
      "step": 498790
    },
    {
      "epoch": 1.7481696439523775,
      "grad_norm": 2.46875,
      "learning_rate": 2.318213561583734e-05,
      "loss": 0.7807,
      "step": 498800
    },
    {
      "epoch": 1.748204691459273,
      "grad_norm": 2.9375,
      "learning_rate": 2.3181486587173637e-05,
      "loss": 0.9115,
      "step": 498810
    },
    {
      "epoch": 1.7482397389661686,
      "grad_norm": 3.203125,
      "learning_rate": 2.3180837558509935e-05,
      "loss": 0.8375,
      "step": 498820
    },
    {
      "epoch": 1.7482747864730643,
      "grad_norm": 2.5,
      "learning_rate": 2.3180188529846233e-05,
      "loss": 0.8671,
      "step": 498830
    },
    {
      "epoch": 1.7483098339799599,
      "grad_norm": 2.90625,
      "learning_rate": 2.317953950118253e-05,
      "loss": 0.818,
      "step": 498840
    },
    {
      "epoch": 1.7483448814868554,
      "grad_norm": 2.140625,
      "learning_rate": 2.317889047251883e-05,
      "loss": 0.7512,
      "step": 498850
    },
    {
      "epoch": 1.7483799289937512,
      "grad_norm": 3.03125,
      "learning_rate": 2.3178241443855127e-05,
      "loss": 0.7813,
      "step": 498860
    },
    {
      "epoch": 1.7484149765006465,
      "grad_norm": 2.875,
      "learning_rate": 2.3177592415191425e-05,
      "loss": 0.8095,
      "step": 498870
    },
    {
      "epoch": 1.7484500240075422,
      "grad_norm": 2.671875,
      "learning_rate": 2.3176943386527723e-05,
      "loss": 0.8079,
      "step": 498880
    },
    {
      "epoch": 1.7484850715144378,
      "grad_norm": 2.796875,
      "learning_rate": 2.3176294357864025e-05,
      "loss": 0.9055,
      "step": 498890
    },
    {
      "epoch": 1.7485201190213333,
      "grad_norm": 2.640625,
      "learning_rate": 2.3175645329200323e-05,
      "loss": 0.8943,
      "step": 498900
    },
    {
      "epoch": 1.748555166528229,
      "grad_norm": 2.625,
      "learning_rate": 2.3174996300536617e-05,
      "loss": 0.7402,
      "step": 498910
    },
    {
      "epoch": 1.7485902140351246,
      "grad_norm": 2.5625,
      "learning_rate": 2.3174347271872915e-05,
      "loss": 0.7583,
      "step": 498920
    },
    {
      "epoch": 1.7486252615420201,
      "grad_norm": 2.765625,
      "learning_rate": 2.3173698243209213e-05,
      "loss": 0.8051,
      "step": 498930
    },
    {
      "epoch": 1.748660309048916,
      "grad_norm": 2.984375,
      "learning_rate": 2.317304921454551e-05,
      "loss": 0.8913,
      "step": 498940
    },
    {
      "epoch": 1.7486953565558114,
      "grad_norm": 3.453125,
      "learning_rate": 2.317240018588181e-05,
      "loss": 0.8853,
      "step": 498950
    },
    {
      "epoch": 1.748730404062707,
      "grad_norm": 2.9375,
      "learning_rate": 2.3171751157218107e-05,
      "loss": 0.8353,
      "step": 498960
    },
    {
      "epoch": 1.7487654515696027,
      "grad_norm": 3.015625,
      "learning_rate": 2.3171102128554405e-05,
      "loss": 0.7922,
      "step": 498970
    },
    {
      "epoch": 1.748800499076498,
      "grad_norm": 3.03125,
      "learning_rate": 2.3170453099890703e-05,
      "loss": 0.8784,
      "step": 498980
    },
    {
      "epoch": 1.7488355465833938,
      "grad_norm": 2.921875,
      "learning_rate": 2.3169804071227e-05,
      "loss": 0.7992,
      "step": 498990
    },
    {
      "epoch": 1.7488705940902893,
      "grad_norm": 2.25,
      "learning_rate": 2.31691550425633e-05,
      "loss": 0.7975,
      "step": 499000
    },
    {
      "epoch": 1.7489056415971849,
      "grad_norm": 2.96875,
      "learning_rate": 2.31685060138996e-05,
      "loss": 0.8852,
      "step": 499010
    },
    {
      "epoch": 1.7489406891040806,
      "grad_norm": 3.125,
      "learning_rate": 2.31678569852359e-05,
      "loss": 0.7881,
      "step": 499020
    },
    {
      "epoch": 1.7489757366109762,
      "grad_norm": 2.578125,
      "learning_rate": 2.3167207956572197e-05,
      "loss": 0.8044,
      "step": 499030
    },
    {
      "epoch": 1.7490107841178717,
      "grad_norm": 3.21875,
      "learning_rate": 2.3166558927908495e-05,
      "loss": 0.7746,
      "step": 499040
    },
    {
      "epoch": 1.7490458316247675,
      "grad_norm": 2.90625,
      "learning_rate": 2.3165909899244793e-05,
      "loss": 0.8392,
      "step": 499050
    },
    {
      "epoch": 1.749080879131663,
      "grad_norm": 3.171875,
      "learning_rate": 2.316526087058109e-05,
      "loss": 0.7828,
      "step": 499060
    },
    {
      "epoch": 1.7491159266385585,
      "grad_norm": 3.1875,
      "learning_rate": 2.316461184191739e-05,
      "loss": 0.8397,
      "step": 499070
    },
    {
      "epoch": 1.7491509741454543,
      "grad_norm": 3.109375,
      "learning_rate": 2.3163962813253687e-05,
      "loss": 0.814,
      "step": 499080
    },
    {
      "epoch": 1.7491860216523496,
      "grad_norm": 3.484375,
      "learning_rate": 2.3163313784589985e-05,
      "loss": 0.7856,
      "step": 499090
    },
    {
      "epoch": 1.7492210691592454,
      "grad_norm": 2.96875,
      "learning_rate": 2.316266475592628e-05,
      "loss": 0.7601,
      "step": 499100
    },
    {
      "epoch": 1.7492561166661411,
      "grad_norm": 2.546875,
      "learning_rate": 2.3162015727262577e-05,
      "loss": 0.7858,
      "step": 499110
    },
    {
      "epoch": 1.7492911641730364,
      "grad_norm": 3.3125,
      "learning_rate": 2.316136669859888e-05,
      "loss": 0.8345,
      "step": 499120
    },
    {
      "epoch": 1.7493262116799322,
      "grad_norm": 3.109375,
      "learning_rate": 2.3160717669935177e-05,
      "loss": 0.7979,
      "step": 499130
    },
    {
      "epoch": 1.7493612591868277,
      "grad_norm": 2.671875,
      "learning_rate": 2.3160068641271475e-05,
      "loss": 0.8067,
      "step": 499140
    },
    {
      "epoch": 1.7493963066937233,
      "grad_norm": 2.9375,
      "learning_rate": 2.3159419612607773e-05,
      "loss": 0.8845,
      "step": 499150
    },
    {
      "epoch": 1.749431354200619,
      "grad_norm": 2.453125,
      "learning_rate": 2.315877058394407e-05,
      "loss": 0.8178,
      "step": 499160
    },
    {
      "epoch": 1.7494664017075146,
      "grad_norm": 2.984375,
      "learning_rate": 2.315812155528037e-05,
      "loss": 0.8663,
      "step": 499170
    },
    {
      "epoch": 1.74950144921441,
      "grad_norm": 3.046875,
      "learning_rate": 2.3157472526616667e-05,
      "loss": 0.9079,
      "step": 499180
    },
    {
      "epoch": 1.7495364967213058,
      "grad_norm": 3.0,
      "learning_rate": 2.3156823497952965e-05,
      "loss": 0.8709,
      "step": 499190
    },
    {
      "epoch": 1.7495715442282014,
      "grad_norm": 2.640625,
      "learning_rate": 2.3156174469289263e-05,
      "loss": 0.9112,
      "step": 499200
    },
    {
      "epoch": 1.749606591735097,
      "grad_norm": 3.25,
      "learning_rate": 2.315552544062556e-05,
      "loss": 0.8042,
      "step": 499210
    },
    {
      "epoch": 1.7496416392419927,
      "grad_norm": 2.703125,
      "learning_rate": 2.315487641196186e-05,
      "loss": 0.8493,
      "step": 499220
    },
    {
      "epoch": 1.749676686748888,
      "grad_norm": 2.875,
      "learning_rate": 2.3154227383298157e-05,
      "loss": 0.8745,
      "step": 499230
    },
    {
      "epoch": 1.7497117342557837,
      "grad_norm": 3.21875,
      "learning_rate": 2.3153578354634455e-05,
      "loss": 0.8158,
      "step": 499240
    },
    {
      "epoch": 1.7497467817626793,
      "grad_norm": 3.421875,
      "learning_rate": 2.3152929325970753e-05,
      "loss": 0.8495,
      "step": 499250
    },
    {
      "epoch": 1.7497818292695748,
      "grad_norm": 3.140625,
      "learning_rate": 2.3152280297307054e-05,
      "loss": 0.8637,
      "step": 499260
    },
    {
      "epoch": 1.7498168767764706,
      "grad_norm": 2.8125,
      "learning_rate": 2.3151631268643352e-05,
      "loss": 0.8344,
      "step": 499270
    },
    {
      "epoch": 1.7498519242833661,
      "grad_norm": 2.96875,
      "learning_rate": 2.315098223997965e-05,
      "loss": 0.8185,
      "step": 499280
    },
    {
      "epoch": 1.7498869717902616,
      "grad_norm": 2.703125,
      "learning_rate": 2.3150333211315945e-05,
      "loss": 0.8154,
      "step": 499290
    },
    {
      "epoch": 1.7499220192971574,
      "grad_norm": 2.9375,
      "learning_rate": 2.3149684182652243e-05,
      "loss": 0.9105,
      "step": 499300
    },
    {
      "epoch": 1.749957066804053,
      "grad_norm": 2.59375,
      "learning_rate": 2.314903515398854e-05,
      "loss": 0.7692,
      "step": 499310
    },
    {
      "epoch": 1.7499921143109485,
      "grad_norm": 2.484375,
      "learning_rate": 2.314838612532484e-05,
      "loss": 0.8545,
      "step": 499320
    },
    {
      "epoch": 1.7500271618178442,
      "grad_norm": 3.046875,
      "learning_rate": 2.3147737096661137e-05,
      "loss": 0.8237,
      "step": 499330
    },
    {
      "epoch": 1.7500622093247395,
      "grad_norm": 3.125,
      "learning_rate": 2.3147088067997435e-05,
      "loss": 0.8827,
      "step": 499340
    },
    {
      "epoch": 1.7500972568316353,
      "grad_norm": 2.9375,
      "learning_rate": 2.3146439039333733e-05,
      "loss": 0.8088,
      "step": 499350
    },
    {
      "epoch": 1.7501323043385308,
      "grad_norm": 2.875,
      "learning_rate": 2.314579001067003e-05,
      "loss": 0.8073,
      "step": 499360
    },
    {
      "epoch": 1.7501673518454264,
      "grad_norm": 2.890625,
      "learning_rate": 2.3145140982006332e-05,
      "loss": 0.8056,
      "step": 499370
    },
    {
      "epoch": 1.7502023993523221,
      "grad_norm": 2.890625,
      "learning_rate": 2.314449195334263e-05,
      "loss": 0.8261,
      "step": 499380
    },
    {
      "epoch": 1.7502374468592177,
      "grad_norm": 2.921875,
      "learning_rate": 2.3143842924678928e-05,
      "loss": 0.8422,
      "step": 499390
    },
    {
      "epoch": 1.7502724943661132,
      "grad_norm": 2.671875,
      "learning_rate": 2.3143193896015226e-05,
      "loss": 0.8444,
      "step": 499400
    },
    {
      "epoch": 1.750307541873009,
      "grad_norm": 3.140625,
      "learning_rate": 2.3142544867351524e-05,
      "loss": 0.8566,
      "step": 499410
    },
    {
      "epoch": 1.7503425893799045,
      "grad_norm": 3.234375,
      "learning_rate": 2.3141895838687822e-05,
      "loss": 0.8681,
      "step": 499420
    },
    {
      "epoch": 1.7503776368868,
      "grad_norm": 2.796875,
      "learning_rate": 2.314124681002412e-05,
      "loss": 0.8849,
      "step": 499430
    },
    {
      "epoch": 1.7504126843936958,
      "grad_norm": 3.09375,
      "learning_rate": 2.3140597781360418e-05,
      "loss": 0.8168,
      "step": 499440
    },
    {
      "epoch": 1.750447731900591,
      "grad_norm": 2.53125,
      "learning_rate": 2.3139948752696716e-05,
      "loss": 0.7499,
      "step": 499450
    },
    {
      "epoch": 1.7504827794074869,
      "grad_norm": 2.828125,
      "learning_rate": 2.3139299724033014e-05,
      "loss": 0.8486,
      "step": 499460
    },
    {
      "epoch": 1.7505178269143824,
      "grad_norm": 2.890625,
      "learning_rate": 2.3138650695369312e-05,
      "loss": 0.8168,
      "step": 499470
    },
    {
      "epoch": 1.750552874421278,
      "grad_norm": 2.84375,
      "learning_rate": 2.3138001666705607e-05,
      "loss": 0.803,
      "step": 499480
    },
    {
      "epoch": 1.7505879219281737,
      "grad_norm": 3.046875,
      "learning_rate": 2.3137352638041908e-05,
      "loss": 0.816,
      "step": 499490
    },
    {
      "epoch": 1.7506229694350692,
      "grad_norm": 3.375,
      "learning_rate": 2.3136703609378206e-05,
      "loss": 0.8713,
      "step": 499500
    },
    {
      "epoch": 1.7506580169419648,
      "grad_norm": 3.234375,
      "learning_rate": 2.3136054580714504e-05,
      "loss": 0.8157,
      "step": 499510
    },
    {
      "epoch": 1.7506930644488605,
      "grad_norm": 3.09375,
      "learning_rate": 2.3135405552050802e-05,
      "loss": 0.8082,
      "step": 499520
    },
    {
      "epoch": 1.750728111955756,
      "grad_norm": 2.703125,
      "learning_rate": 2.31347565233871e-05,
      "loss": 0.8492,
      "step": 499530
    },
    {
      "epoch": 1.7507631594626516,
      "grad_norm": 2.6875,
      "learning_rate": 2.3134107494723398e-05,
      "loss": 0.7789,
      "step": 499540
    },
    {
      "epoch": 1.7507982069695474,
      "grad_norm": 2.453125,
      "learning_rate": 2.3133458466059696e-05,
      "loss": 0.754,
      "step": 499550
    },
    {
      "epoch": 1.7508332544764427,
      "grad_norm": 3.0625,
      "learning_rate": 2.3132809437395994e-05,
      "loss": 0.9064,
      "step": 499560
    },
    {
      "epoch": 1.7508683019833384,
      "grad_norm": 2.84375,
      "learning_rate": 2.3132160408732292e-05,
      "loss": 0.9086,
      "step": 499570
    },
    {
      "epoch": 1.750903349490234,
      "grad_norm": 2.78125,
      "learning_rate": 2.313151138006859e-05,
      "loss": 0.901,
      "step": 499580
    },
    {
      "epoch": 1.7509383969971295,
      "grad_norm": 3.0,
      "learning_rate": 2.3130862351404888e-05,
      "loss": 0.7737,
      "step": 499590
    },
    {
      "epoch": 1.7509734445040253,
      "grad_norm": 2.6875,
      "learning_rate": 2.3130213322741186e-05,
      "loss": 0.8366,
      "step": 499600
    },
    {
      "epoch": 1.7510084920109208,
      "grad_norm": 3.015625,
      "learning_rate": 2.3129564294077484e-05,
      "loss": 0.8653,
      "step": 499610
    },
    {
      "epoch": 1.7510435395178163,
      "grad_norm": 2.40625,
      "learning_rate": 2.3128915265413785e-05,
      "loss": 0.7496,
      "step": 499620
    },
    {
      "epoch": 1.751078587024712,
      "grad_norm": 2.796875,
      "learning_rate": 2.3128266236750083e-05,
      "loss": 0.7768,
      "step": 499630
    },
    {
      "epoch": 1.7511136345316076,
      "grad_norm": 3.03125,
      "learning_rate": 2.312761720808638e-05,
      "loss": 0.8039,
      "step": 499640
    },
    {
      "epoch": 1.7511486820385032,
      "grad_norm": 2.8125,
      "learning_rate": 2.312696817942268e-05,
      "loss": 0.7799,
      "step": 499650
    },
    {
      "epoch": 1.751183729545399,
      "grad_norm": 2.53125,
      "learning_rate": 2.3126319150758977e-05,
      "loss": 0.9146,
      "step": 499660
    },
    {
      "epoch": 1.7512187770522942,
      "grad_norm": 2.984375,
      "learning_rate": 2.3125670122095272e-05,
      "loss": 0.7249,
      "step": 499670
    },
    {
      "epoch": 1.75125382455919,
      "grad_norm": 2.8125,
      "learning_rate": 2.312502109343157e-05,
      "loss": 0.7514,
      "step": 499680
    },
    {
      "epoch": 1.7512888720660857,
      "grad_norm": 2.703125,
      "learning_rate": 2.3124372064767868e-05,
      "loss": 0.7903,
      "step": 499690
    },
    {
      "epoch": 1.751323919572981,
      "grad_norm": 3.0,
      "learning_rate": 2.3123723036104166e-05,
      "loss": 0.8083,
      "step": 499700
    },
    {
      "epoch": 1.7513589670798768,
      "grad_norm": 2.765625,
      "learning_rate": 2.3123074007440464e-05,
      "loss": 0.7869,
      "step": 499710
    },
    {
      "epoch": 1.7513940145867724,
      "grad_norm": 3.546875,
      "learning_rate": 2.3122424978776762e-05,
      "loss": 0.8161,
      "step": 499720
    },
    {
      "epoch": 1.7514290620936679,
      "grad_norm": 2.96875,
      "learning_rate": 2.312177595011306e-05,
      "loss": 0.867,
      "step": 499730
    },
    {
      "epoch": 1.7514641096005636,
      "grad_norm": 3.015625,
      "learning_rate": 2.312112692144936e-05,
      "loss": 0.855,
      "step": 499740
    },
    {
      "epoch": 1.7514991571074592,
      "grad_norm": 2.75,
      "learning_rate": 2.312047789278566e-05,
      "loss": 0.8386,
      "step": 499750
    },
    {
      "epoch": 1.7515342046143547,
      "grad_norm": 3.109375,
      "learning_rate": 2.3119828864121957e-05,
      "loss": 0.8855,
      "step": 499760
    },
    {
      "epoch": 1.7515692521212505,
      "grad_norm": 3.09375,
      "learning_rate": 2.3119179835458255e-05,
      "loss": 0.851,
      "step": 499770
    },
    {
      "epoch": 1.7516042996281458,
      "grad_norm": 3.09375,
      "learning_rate": 2.3118530806794553e-05,
      "loss": 0.8464,
      "step": 499780
    },
    {
      "epoch": 1.7516393471350415,
      "grad_norm": 2.90625,
      "learning_rate": 2.311788177813085e-05,
      "loss": 0.7732,
      "step": 499790
    },
    {
      "epoch": 1.7516743946419373,
      "grad_norm": 3.34375,
      "learning_rate": 2.311723274946715e-05,
      "loss": 0.8459,
      "step": 499800
    },
    {
      "epoch": 1.7517094421488326,
      "grad_norm": 3.109375,
      "learning_rate": 2.3116583720803447e-05,
      "loss": 0.8528,
      "step": 499810
    },
    {
      "epoch": 1.7517444896557284,
      "grad_norm": 2.828125,
      "learning_rate": 2.3115934692139745e-05,
      "loss": 0.7947,
      "step": 499820
    },
    {
      "epoch": 1.751779537162624,
      "grad_norm": 3.0,
      "learning_rate": 2.3115285663476043e-05,
      "loss": 0.8624,
      "step": 499830
    },
    {
      "epoch": 1.7518145846695194,
      "grad_norm": 2.84375,
      "learning_rate": 2.311463663481234e-05,
      "loss": 0.9099,
      "step": 499840
    },
    {
      "epoch": 1.7518496321764152,
      "grad_norm": 3.15625,
      "learning_rate": 2.311398760614864e-05,
      "loss": 0.8396,
      "step": 499850
    },
    {
      "epoch": 1.7518846796833107,
      "grad_norm": 2.671875,
      "learning_rate": 2.3113338577484937e-05,
      "loss": 0.8391,
      "step": 499860
    },
    {
      "epoch": 1.7519197271902063,
      "grad_norm": 3.125,
      "learning_rate": 2.3112689548821235e-05,
      "loss": 0.8177,
      "step": 499870
    },
    {
      "epoch": 1.751954774697102,
      "grad_norm": 2.90625,
      "learning_rate": 2.3112040520157533e-05,
      "loss": 0.8349,
      "step": 499880
    },
    {
      "epoch": 1.7519898222039976,
      "grad_norm": 2.34375,
      "learning_rate": 2.311139149149383e-05,
      "loss": 0.8312,
      "step": 499890
    },
    {
      "epoch": 1.752024869710893,
      "grad_norm": 2.78125,
      "learning_rate": 2.311074246283013e-05,
      "loss": 0.844,
      "step": 499900
    },
    {
      "epoch": 1.7520599172177889,
      "grad_norm": 2.890625,
      "learning_rate": 2.3110093434166427e-05,
      "loss": 0.8701,
      "step": 499910
    },
    {
      "epoch": 1.7520949647246842,
      "grad_norm": 3.15625,
      "learning_rate": 2.3109444405502725e-05,
      "loss": 0.7201,
      "step": 499920
    },
    {
      "epoch": 1.75213001223158,
      "grad_norm": 2.921875,
      "learning_rate": 2.3108795376839023e-05,
      "loss": 0.894,
      "step": 499930
    },
    {
      "epoch": 1.7521650597384755,
      "grad_norm": 3.171875,
      "learning_rate": 2.310814634817532e-05,
      "loss": 0.816,
      "step": 499940
    },
    {
      "epoch": 1.752200107245371,
      "grad_norm": 3.34375,
      "learning_rate": 2.310749731951162e-05,
      "loss": 0.8075,
      "step": 499950
    },
    {
      "epoch": 1.7522351547522668,
      "grad_norm": 3.015625,
      "learning_rate": 2.3106848290847917e-05,
      "loss": 0.7998,
      "step": 499960
    },
    {
      "epoch": 1.7522702022591623,
      "grad_norm": 3.125,
      "learning_rate": 2.3106199262184215e-05,
      "loss": 0.8101,
      "step": 499970
    },
    {
      "epoch": 1.7523052497660578,
      "grad_norm": 3.3125,
      "learning_rate": 2.3105550233520513e-05,
      "loss": 0.8576,
      "step": 499980
    },
    {
      "epoch": 1.7523402972729536,
      "grad_norm": 2.609375,
      "learning_rate": 2.3104901204856815e-05,
      "loss": 0.8928,
      "step": 499990
    },
    {
      "epoch": 1.7523753447798491,
      "grad_norm": 2.859375,
      "learning_rate": 2.3104252176193113e-05,
      "loss": 0.8729,
      "step": 500000
    },
    {
      "epoch": 1.7523753447798491,
      "eval_loss": 0.7797040939331055,
      "eval_runtime": 552.084,
      "eval_samples_per_second": 689.091,
      "eval_steps_per_second": 57.424,
      "step": 500000
    },
    {
      "epoch": 1.7524103922867447,
      "grad_norm": 3.015625,
      "learning_rate": 2.310360314752941e-05,
      "loss": 0.8174,
      "step": 500010
    },
    {
      "epoch": 1.7524454397936404,
      "grad_norm": 2.9375,
      "learning_rate": 2.310295411886571e-05,
      "loss": 0.8408,
      "step": 500020
    },
    {
      "epoch": 1.7524804873005357,
      "grad_norm": 2.5625,
      "learning_rate": 2.3102305090202007e-05,
      "loss": 0.8982,
      "step": 500030
    },
    {
      "epoch": 1.7525155348074315,
      "grad_norm": 2.859375,
      "learning_rate": 2.31016560615383e-05,
      "loss": 0.7662,
      "step": 500040
    },
    {
      "epoch": 1.752550582314327,
      "grad_norm": 2.671875,
      "learning_rate": 2.31010070328746e-05,
      "loss": 0.7651,
      "step": 500050
    },
    {
      "epoch": 1.7525856298212226,
      "grad_norm": 2.953125,
      "learning_rate": 2.3100358004210897e-05,
      "loss": 0.8534,
      "step": 500060
    },
    {
      "epoch": 1.7526206773281183,
      "grad_norm": 3.0,
      "learning_rate": 2.3099708975547195e-05,
      "loss": 0.798,
      "step": 500070
    },
    {
      "epoch": 1.7526557248350139,
      "grad_norm": 3.390625,
      "learning_rate": 2.3099059946883493e-05,
      "loss": 0.7989,
      "step": 500080
    },
    {
      "epoch": 1.7526907723419094,
      "grad_norm": 3.171875,
      "learning_rate": 2.309841091821979e-05,
      "loss": 0.8498,
      "step": 500090
    },
    {
      "epoch": 1.7527258198488052,
      "grad_norm": 2.90625,
      "learning_rate": 2.3097761889556093e-05,
      "loss": 0.8631,
      "step": 500100
    },
    {
      "epoch": 1.7527608673557007,
      "grad_norm": 3.21875,
      "learning_rate": 2.309711286089239e-05,
      "loss": 0.7989,
      "step": 500110
    },
    {
      "epoch": 1.7527959148625962,
      "grad_norm": 2.6875,
      "learning_rate": 2.309646383222869e-05,
      "loss": 0.8469,
      "step": 500120
    },
    {
      "epoch": 1.752830962369492,
      "grad_norm": 2.765625,
      "learning_rate": 2.3095814803564987e-05,
      "loss": 0.7856,
      "step": 500130
    },
    {
      "epoch": 1.7528660098763873,
      "grad_norm": 2.484375,
      "learning_rate": 2.3095165774901285e-05,
      "loss": 0.8108,
      "step": 500140
    },
    {
      "epoch": 1.752901057383283,
      "grad_norm": 2.625,
      "learning_rate": 2.3094516746237583e-05,
      "loss": 0.8064,
      "step": 500150
    },
    {
      "epoch": 1.7529361048901786,
      "grad_norm": 2.296875,
      "learning_rate": 2.309386771757388e-05,
      "loss": 0.8092,
      "step": 500160
    },
    {
      "epoch": 1.7529711523970741,
      "grad_norm": 3.234375,
      "learning_rate": 2.309321868891018e-05,
      "loss": 0.8972,
      "step": 500170
    },
    {
      "epoch": 1.7530061999039699,
      "grad_norm": 2.625,
      "learning_rate": 2.3092569660246477e-05,
      "loss": 0.8546,
      "step": 500180
    },
    {
      "epoch": 1.7530412474108654,
      "grad_norm": 2.75,
      "learning_rate": 2.3091920631582775e-05,
      "loss": 0.7984,
      "step": 500190
    },
    {
      "epoch": 1.753076294917761,
      "grad_norm": 2.953125,
      "learning_rate": 2.3091271602919073e-05,
      "loss": 0.7879,
      "step": 500200
    },
    {
      "epoch": 1.7531113424246567,
      "grad_norm": 2.9375,
      "learning_rate": 2.309062257425537e-05,
      "loss": 0.8638,
      "step": 500210
    },
    {
      "epoch": 1.7531463899315523,
      "grad_norm": 2.671875,
      "learning_rate": 2.308997354559167e-05,
      "loss": 0.8378,
      "step": 500220
    },
    {
      "epoch": 1.7531814374384478,
      "grad_norm": 2.796875,
      "learning_rate": 2.3089324516927967e-05,
      "loss": 0.7398,
      "step": 500230
    },
    {
      "epoch": 1.7532164849453435,
      "grad_norm": 3.203125,
      "learning_rate": 2.3088675488264265e-05,
      "loss": 0.8682,
      "step": 500240
    },
    {
      "epoch": 1.7532515324522389,
      "grad_norm": 3.046875,
      "learning_rate": 2.3088026459600563e-05,
      "loss": 0.8162,
      "step": 500250
    },
    {
      "epoch": 1.7532865799591346,
      "grad_norm": 2.953125,
      "learning_rate": 2.308737743093686e-05,
      "loss": 0.9035,
      "step": 500260
    },
    {
      "epoch": 1.7533216274660302,
      "grad_norm": 3.109375,
      "learning_rate": 2.308672840227316e-05,
      "loss": 0.8644,
      "step": 500270
    },
    {
      "epoch": 1.7533566749729257,
      "grad_norm": 3.25,
      "learning_rate": 2.3086079373609457e-05,
      "loss": 0.8285,
      "step": 500280
    },
    {
      "epoch": 1.7533917224798214,
      "grad_norm": 2.890625,
      "learning_rate": 2.3085430344945755e-05,
      "loss": 0.8997,
      "step": 500290
    },
    {
      "epoch": 1.753426769986717,
      "grad_norm": 2.96875,
      "learning_rate": 2.3084781316282053e-05,
      "loss": 0.7926,
      "step": 500300
    },
    {
      "epoch": 1.7534618174936125,
      "grad_norm": 2.828125,
      "learning_rate": 2.308413228761835e-05,
      "loss": 0.8454,
      "step": 500310
    },
    {
      "epoch": 1.7534968650005083,
      "grad_norm": 2.453125,
      "learning_rate": 2.308348325895465e-05,
      "loss": 0.7914,
      "step": 500320
    },
    {
      "epoch": 1.7535319125074038,
      "grad_norm": 2.4375,
      "learning_rate": 2.3082834230290947e-05,
      "loss": 0.727,
      "step": 500330
    },
    {
      "epoch": 1.7535669600142993,
      "grad_norm": 2.859375,
      "learning_rate": 2.3082185201627245e-05,
      "loss": 0.7996,
      "step": 500340
    },
    {
      "epoch": 1.753602007521195,
      "grad_norm": 2.796875,
      "learning_rate": 2.3081536172963543e-05,
      "loss": 0.8082,
      "step": 500350
    },
    {
      "epoch": 1.7536370550280904,
      "grad_norm": 2.90625,
      "learning_rate": 2.3080887144299844e-05,
      "loss": 0.8089,
      "step": 500360
    },
    {
      "epoch": 1.7536721025349862,
      "grad_norm": 3.109375,
      "learning_rate": 2.3080238115636142e-05,
      "loss": 0.8842,
      "step": 500370
    },
    {
      "epoch": 1.753707150041882,
      "grad_norm": 3.09375,
      "learning_rate": 2.307958908697244e-05,
      "loss": 0.8539,
      "step": 500380
    },
    {
      "epoch": 1.7537421975487772,
      "grad_norm": 2.796875,
      "learning_rate": 2.3078940058308738e-05,
      "loss": 0.8963,
      "step": 500390
    },
    {
      "epoch": 1.753777245055673,
      "grad_norm": 2.875,
      "learning_rate": 2.3078291029645036e-05,
      "loss": 0.7712,
      "step": 500400
    },
    {
      "epoch": 1.7538122925625685,
      "grad_norm": 2.953125,
      "learning_rate": 2.3077642000981334e-05,
      "loss": 0.77,
      "step": 500410
    },
    {
      "epoch": 1.753847340069464,
      "grad_norm": 2.921875,
      "learning_rate": 2.307699297231763e-05,
      "loss": 0.8809,
      "step": 500420
    },
    {
      "epoch": 1.7538823875763598,
      "grad_norm": 2.421875,
      "learning_rate": 2.3076343943653927e-05,
      "loss": 0.7459,
      "step": 500430
    },
    {
      "epoch": 1.7539174350832554,
      "grad_norm": 2.609375,
      "learning_rate": 2.3075694914990225e-05,
      "loss": 0.8149,
      "step": 500440
    },
    {
      "epoch": 1.753952482590151,
      "grad_norm": 3.109375,
      "learning_rate": 2.3075045886326523e-05,
      "loss": 0.7974,
      "step": 500450
    },
    {
      "epoch": 1.7539875300970467,
      "grad_norm": 3.046875,
      "learning_rate": 2.307439685766282e-05,
      "loss": 0.7684,
      "step": 500460
    },
    {
      "epoch": 1.754022577603942,
      "grad_norm": 3.140625,
      "learning_rate": 2.3073747828999122e-05,
      "loss": 0.8014,
      "step": 500470
    },
    {
      "epoch": 1.7540576251108377,
      "grad_norm": 3.046875,
      "learning_rate": 2.307309880033542e-05,
      "loss": 0.8109,
      "step": 500480
    },
    {
      "epoch": 1.7540926726177335,
      "grad_norm": 3.109375,
      "learning_rate": 2.3072449771671718e-05,
      "loss": 0.8349,
      "step": 500490
    },
    {
      "epoch": 1.7541277201246288,
      "grad_norm": 2.6875,
      "learning_rate": 2.3071800743008016e-05,
      "loss": 0.7803,
      "step": 500500
    },
    {
      "epoch": 1.7541627676315246,
      "grad_norm": 2.765625,
      "learning_rate": 2.3071151714344314e-05,
      "loss": 0.7693,
      "step": 500510
    },
    {
      "epoch": 1.75419781513842,
      "grad_norm": 2.9375,
      "learning_rate": 2.3070502685680612e-05,
      "loss": 0.8839,
      "step": 500520
    },
    {
      "epoch": 1.7542328626453156,
      "grad_norm": 2.734375,
      "learning_rate": 2.306985365701691e-05,
      "loss": 0.8447,
      "step": 500530
    },
    {
      "epoch": 1.7542679101522114,
      "grad_norm": 2.59375,
      "learning_rate": 2.3069204628353208e-05,
      "loss": 0.9064,
      "step": 500540
    },
    {
      "epoch": 1.754302957659107,
      "grad_norm": 2.78125,
      "learning_rate": 2.3068555599689506e-05,
      "loss": 0.8188,
      "step": 500550
    },
    {
      "epoch": 1.7543380051660025,
      "grad_norm": 3.203125,
      "learning_rate": 2.3067906571025804e-05,
      "loss": 0.735,
      "step": 500560
    },
    {
      "epoch": 1.7543730526728982,
      "grad_norm": 3.09375,
      "learning_rate": 2.3067257542362102e-05,
      "loss": 0.9189,
      "step": 500570
    },
    {
      "epoch": 1.7544081001797938,
      "grad_norm": 2.5625,
      "learning_rate": 2.30666085136984e-05,
      "loss": 0.8083,
      "step": 500580
    },
    {
      "epoch": 1.7544431476866893,
      "grad_norm": 3.03125,
      "learning_rate": 2.3065959485034698e-05,
      "loss": 0.8424,
      "step": 500590
    },
    {
      "epoch": 1.754478195193585,
      "grad_norm": 2.859375,
      "learning_rate": 2.3065310456370996e-05,
      "loss": 0.8358,
      "step": 500600
    },
    {
      "epoch": 1.7545132427004804,
      "grad_norm": 2.765625,
      "learning_rate": 2.3064661427707294e-05,
      "loss": 0.8269,
      "step": 500610
    },
    {
      "epoch": 1.7545482902073761,
      "grad_norm": 3.03125,
      "learning_rate": 2.3064012399043592e-05,
      "loss": 0.7886,
      "step": 500620
    },
    {
      "epoch": 1.7545833377142717,
      "grad_norm": 2.875,
      "learning_rate": 2.306336337037989e-05,
      "loss": 0.8516,
      "step": 500630
    },
    {
      "epoch": 1.7546183852211672,
      "grad_norm": 2.8125,
      "learning_rate": 2.3062714341716188e-05,
      "loss": 0.9067,
      "step": 500640
    },
    {
      "epoch": 1.754653432728063,
      "grad_norm": 3.53125,
      "learning_rate": 2.3062065313052486e-05,
      "loss": 0.8581,
      "step": 500650
    },
    {
      "epoch": 1.7546884802349585,
      "grad_norm": 2.671875,
      "learning_rate": 2.3061416284388784e-05,
      "loss": 0.878,
      "step": 500660
    },
    {
      "epoch": 1.754723527741854,
      "grad_norm": 2.9375,
      "learning_rate": 2.3060767255725082e-05,
      "loss": 0.8266,
      "step": 500670
    },
    {
      "epoch": 1.7547585752487498,
      "grad_norm": 3.015625,
      "learning_rate": 2.306011822706138e-05,
      "loss": 0.9193,
      "step": 500680
    },
    {
      "epoch": 1.7547936227556453,
      "grad_norm": 3.390625,
      "learning_rate": 2.3059469198397678e-05,
      "loss": 0.7736,
      "step": 500690
    },
    {
      "epoch": 1.7548286702625409,
      "grad_norm": 2.78125,
      "learning_rate": 2.3058820169733976e-05,
      "loss": 0.7474,
      "step": 500700
    },
    {
      "epoch": 1.7548637177694366,
      "grad_norm": 2.90625,
      "learning_rate": 2.3058171141070274e-05,
      "loss": 0.8594,
      "step": 500710
    },
    {
      "epoch": 1.754898765276332,
      "grad_norm": 2.9375,
      "learning_rate": 2.3057522112406575e-05,
      "loss": 0.833,
      "step": 500720
    },
    {
      "epoch": 1.7549338127832277,
      "grad_norm": 3.109375,
      "learning_rate": 2.3056873083742873e-05,
      "loss": 0.8405,
      "step": 500730
    },
    {
      "epoch": 1.7549688602901232,
      "grad_norm": 3.0625,
      "learning_rate": 2.305622405507917e-05,
      "loss": 0.8695,
      "step": 500740
    },
    {
      "epoch": 1.7550039077970188,
      "grad_norm": 2.34375,
      "learning_rate": 2.305557502641547e-05,
      "loss": 0.9255,
      "step": 500750
    },
    {
      "epoch": 1.7550389553039145,
      "grad_norm": 3.15625,
      "learning_rate": 2.3054925997751767e-05,
      "loss": 0.843,
      "step": 500760
    },
    {
      "epoch": 1.75507400281081,
      "grad_norm": 2.96875,
      "learning_rate": 2.3054276969088065e-05,
      "loss": 0.834,
      "step": 500770
    },
    {
      "epoch": 1.7551090503177056,
      "grad_norm": 2.671875,
      "learning_rate": 2.3053627940424363e-05,
      "loss": 0.824,
      "step": 500780
    },
    {
      "epoch": 1.7551440978246013,
      "grad_norm": 2.84375,
      "learning_rate": 2.305297891176066e-05,
      "loss": 0.8356,
      "step": 500790
    },
    {
      "epoch": 1.7551791453314969,
      "grad_norm": 2.6875,
      "learning_rate": 2.3052329883096956e-05,
      "loss": 0.7468,
      "step": 500800
    },
    {
      "epoch": 1.7552141928383924,
      "grad_norm": 3.390625,
      "learning_rate": 2.3051680854433254e-05,
      "loss": 0.9121,
      "step": 500810
    },
    {
      "epoch": 1.7552492403452882,
      "grad_norm": 2.734375,
      "learning_rate": 2.3051031825769552e-05,
      "loss": 0.824,
      "step": 500820
    },
    {
      "epoch": 1.7552842878521835,
      "grad_norm": 2.96875,
      "learning_rate": 2.305038279710585e-05,
      "loss": 0.8509,
      "step": 500830
    },
    {
      "epoch": 1.7553193353590792,
      "grad_norm": 2.875,
      "learning_rate": 2.304973376844215e-05,
      "loss": 0.7526,
      "step": 500840
    },
    {
      "epoch": 1.7553543828659748,
      "grad_norm": 3.109375,
      "learning_rate": 2.304908473977845e-05,
      "loss": 0.8296,
      "step": 500850
    },
    {
      "epoch": 1.7553894303728703,
      "grad_norm": 2.890625,
      "learning_rate": 2.3048435711114747e-05,
      "loss": 0.8672,
      "step": 500860
    },
    {
      "epoch": 1.755424477879766,
      "grad_norm": 3.03125,
      "learning_rate": 2.3047786682451045e-05,
      "loss": 0.7798,
      "step": 500870
    },
    {
      "epoch": 1.7554595253866616,
      "grad_norm": 2.75,
      "learning_rate": 2.3047137653787343e-05,
      "loss": 0.8216,
      "step": 500880
    },
    {
      "epoch": 1.7554945728935571,
      "grad_norm": 2.78125,
      "learning_rate": 2.304648862512364e-05,
      "loss": 0.8506,
      "step": 500890
    },
    {
      "epoch": 1.755529620400453,
      "grad_norm": 2.546875,
      "learning_rate": 2.304583959645994e-05,
      "loss": 0.8557,
      "step": 500900
    },
    {
      "epoch": 1.7555646679073484,
      "grad_norm": 2.953125,
      "learning_rate": 2.3045190567796237e-05,
      "loss": 0.8164,
      "step": 500910
    },
    {
      "epoch": 1.755599715414244,
      "grad_norm": 2.640625,
      "learning_rate": 2.3044541539132535e-05,
      "loss": 0.8113,
      "step": 500920
    },
    {
      "epoch": 1.7556347629211397,
      "grad_norm": 3.109375,
      "learning_rate": 2.3043892510468833e-05,
      "loss": 0.8765,
      "step": 500930
    },
    {
      "epoch": 1.755669810428035,
      "grad_norm": 2.40625,
      "learning_rate": 2.304324348180513e-05,
      "loss": 0.8358,
      "step": 500940
    },
    {
      "epoch": 1.7557048579349308,
      "grad_norm": 3.328125,
      "learning_rate": 2.304259445314143e-05,
      "loss": 0.8732,
      "step": 500950
    },
    {
      "epoch": 1.7557399054418263,
      "grad_norm": 2.96875,
      "learning_rate": 2.3041945424477727e-05,
      "loss": 0.8007,
      "step": 500960
    },
    {
      "epoch": 1.7557749529487219,
      "grad_norm": 3.140625,
      "learning_rate": 2.3041296395814025e-05,
      "loss": 0.811,
      "step": 500970
    },
    {
      "epoch": 1.7558100004556176,
      "grad_norm": 3.328125,
      "learning_rate": 2.3040647367150323e-05,
      "loss": 0.8273,
      "step": 500980
    },
    {
      "epoch": 1.7558450479625132,
      "grad_norm": 2.4375,
      "learning_rate": 2.303999833848662e-05,
      "loss": 0.8548,
      "step": 500990
    },
    {
      "epoch": 1.7558800954694087,
      "grad_norm": 2.875,
      "learning_rate": 2.303934930982292e-05,
      "loss": 0.9168,
      "step": 501000
    },
    {
      "epoch": 1.7559151429763045,
      "grad_norm": 3.140625,
      "learning_rate": 2.3038700281159217e-05,
      "loss": 0.8246,
      "step": 501010
    },
    {
      "epoch": 1.7559501904832,
      "grad_norm": 2.84375,
      "learning_rate": 2.3038051252495515e-05,
      "loss": 0.8088,
      "step": 501020
    },
    {
      "epoch": 1.7559852379900955,
      "grad_norm": 2.84375,
      "learning_rate": 2.3037402223831813e-05,
      "loss": 0.8891,
      "step": 501030
    },
    {
      "epoch": 1.7560202854969913,
      "grad_norm": 3.0,
      "learning_rate": 2.303675319516811e-05,
      "loss": 0.8333,
      "step": 501040
    },
    {
      "epoch": 1.7560553330038866,
      "grad_norm": 3.0625,
      "learning_rate": 2.303610416650441e-05,
      "loss": 0.8994,
      "step": 501050
    },
    {
      "epoch": 1.7560903805107824,
      "grad_norm": 2.28125,
      "learning_rate": 2.3035455137840707e-05,
      "loss": 0.847,
      "step": 501060
    },
    {
      "epoch": 1.7561254280176781,
      "grad_norm": 3.140625,
      "learning_rate": 2.3034806109177005e-05,
      "loss": 0.9117,
      "step": 501070
    },
    {
      "epoch": 1.7561604755245734,
      "grad_norm": 3.1875,
      "learning_rate": 2.3034157080513303e-05,
      "loss": 0.7904,
      "step": 501080
    },
    {
      "epoch": 1.7561955230314692,
      "grad_norm": 2.9375,
      "learning_rate": 2.3033508051849605e-05,
      "loss": 0.8086,
      "step": 501090
    },
    {
      "epoch": 1.7562305705383647,
      "grad_norm": 2.765625,
      "learning_rate": 2.3032859023185903e-05,
      "loss": 0.788,
      "step": 501100
    },
    {
      "epoch": 1.7562656180452603,
      "grad_norm": 3.0625,
      "learning_rate": 2.30322099945222e-05,
      "loss": 0.7484,
      "step": 501110
    },
    {
      "epoch": 1.756300665552156,
      "grad_norm": 2.890625,
      "learning_rate": 2.30315609658585e-05,
      "loss": 0.8135,
      "step": 501120
    },
    {
      "epoch": 1.7563357130590516,
      "grad_norm": 2.765625,
      "learning_rate": 2.3030911937194797e-05,
      "loss": 0.8352,
      "step": 501130
    },
    {
      "epoch": 1.756370760565947,
      "grad_norm": 2.9375,
      "learning_rate": 2.3030262908531095e-05,
      "loss": 0.7697,
      "step": 501140
    },
    {
      "epoch": 1.7564058080728429,
      "grad_norm": 2.796875,
      "learning_rate": 2.3029613879867393e-05,
      "loss": 0.8642,
      "step": 501150
    },
    {
      "epoch": 1.7564408555797382,
      "grad_norm": 2.90625,
      "learning_rate": 2.302896485120369e-05,
      "loss": 0.862,
      "step": 501160
    },
    {
      "epoch": 1.756475903086634,
      "grad_norm": 2.90625,
      "learning_rate": 2.3028315822539985e-05,
      "loss": 0.802,
      "step": 501170
    },
    {
      "epoch": 1.7565109505935297,
      "grad_norm": 2.734375,
      "learning_rate": 2.3027666793876283e-05,
      "loss": 0.8316,
      "step": 501180
    },
    {
      "epoch": 1.756545998100425,
      "grad_norm": 2.53125,
      "learning_rate": 2.302701776521258e-05,
      "loss": 0.847,
      "step": 501190
    },
    {
      "epoch": 1.7565810456073208,
      "grad_norm": 2.453125,
      "learning_rate": 2.3026368736548883e-05,
      "loss": 0.8795,
      "step": 501200
    },
    {
      "epoch": 1.7566160931142163,
      "grad_norm": 3.203125,
      "learning_rate": 2.302571970788518e-05,
      "loss": 0.8192,
      "step": 501210
    },
    {
      "epoch": 1.7566511406211118,
      "grad_norm": 2.671875,
      "learning_rate": 2.302507067922148e-05,
      "loss": 0.7601,
      "step": 501220
    },
    {
      "epoch": 1.7566861881280076,
      "grad_norm": 3.265625,
      "learning_rate": 2.3024421650557777e-05,
      "loss": 0.7379,
      "step": 501230
    },
    {
      "epoch": 1.7567212356349031,
      "grad_norm": 2.53125,
      "learning_rate": 2.3023772621894075e-05,
      "loss": 0.8879,
      "step": 501240
    },
    {
      "epoch": 1.7567562831417987,
      "grad_norm": 3.0,
      "learning_rate": 2.3023123593230373e-05,
      "loss": 0.8944,
      "step": 501250
    },
    {
      "epoch": 1.7567913306486944,
      "grad_norm": 2.703125,
      "learning_rate": 2.302247456456667e-05,
      "loss": 0.7534,
      "step": 501260
    },
    {
      "epoch": 1.75682637815559,
      "grad_norm": 3.1875,
      "learning_rate": 2.302182553590297e-05,
      "loss": 0.775,
      "step": 501270
    },
    {
      "epoch": 1.7568614256624855,
      "grad_norm": 2.921875,
      "learning_rate": 2.3021176507239267e-05,
      "loss": 0.8011,
      "step": 501280
    },
    {
      "epoch": 1.7568964731693812,
      "grad_norm": 3.171875,
      "learning_rate": 2.3020527478575565e-05,
      "loss": 0.8276,
      "step": 501290
    },
    {
      "epoch": 1.7569315206762766,
      "grad_norm": 3.125,
      "learning_rate": 2.3019878449911863e-05,
      "loss": 0.8554,
      "step": 501300
    },
    {
      "epoch": 1.7569665681831723,
      "grad_norm": 3.140625,
      "learning_rate": 2.301922942124816e-05,
      "loss": 0.8799,
      "step": 501310
    },
    {
      "epoch": 1.7570016156900679,
      "grad_norm": 2.953125,
      "learning_rate": 2.301858039258446e-05,
      "loss": 0.8825,
      "step": 501320
    },
    {
      "epoch": 1.7570366631969634,
      "grad_norm": 2.796875,
      "learning_rate": 2.3017931363920757e-05,
      "loss": 0.8614,
      "step": 501330
    },
    {
      "epoch": 1.7570717107038591,
      "grad_norm": 2.890625,
      "learning_rate": 2.3017282335257058e-05,
      "loss": 0.7868,
      "step": 501340
    },
    {
      "epoch": 1.7571067582107547,
      "grad_norm": 2.484375,
      "learning_rate": 2.3016633306593356e-05,
      "loss": 0.7855,
      "step": 501350
    },
    {
      "epoch": 1.7571418057176502,
      "grad_norm": 3.140625,
      "learning_rate": 2.301598427792965e-05,
      "loss": 0.7807,
      "step": 501360
    },
    {
      "epoch": 1.757176853224546,
      "grad_norm": 3.21875,
      "learning_rate": 2.301533524926595e-05,
      "loss": 0.8441,
      "step": 501370
    },
    {
      "epoch": 1.7572119007314415,
      "grad_norm": 3.40625,
      "learning_rate": 2.3014686220602247e-05,
      "loss": 0.8717,
      "step": 501380
    },
    {
      "epoch": 1.757246948238337,
      "grad_norm": 2.578125,
      "learning_rate": 2.3014037191938545e-05,
      "loss": 0.8956,
      "step": 501390
    },
    {
      "epoch": 1.7572819957452328,
      "grad_norm": 2.75,
      "learning_rate": 2.3013388163274843e-05,
      "loss": 0.8784,
      "step": 501400
    },
    {
      "epoch": 1.7573170432521281,
      "grad_norm": 2.640625,
      "learning_rate": 2.301273913461114e-05,
      "loss": 0.8335,
      "step": 501410
    },
    {
      "epoch": 1.7573520907590239,
      "grad_norm": 2.78125,
      "learning_rate": 2.301209010594744e-05,
      "loss": 0.8233,
      "step": 501420
    },
    {
      "epoch": 1.7573871382659194,
      "grad_norm": 2.796875,
      "learning_rate": 2.3011441077283737e-05,
      "loss": 0.8931,
      "step": 501430
    },
    {
      "epoch": 1.757422185772815,
      "grad_norm": 2.671875,
      "learning_rate": 2.3010792048620035e-05,
      "loss": 0.8231,
      "step": 501440
    },
    {
      "epoch": 1.7574572332797107,
      "grad_norm": 2.984375,
      "learning_rate": 2.3010143019956333e-05,
      "loss": 0.8647,
      "step": 501450
    },
    {
      "epoch": 1.7574922807866062,
      "grad_norm": 2.84375,
      "learning_rate": 2.3009493991292634e-05,
      "loss": 0.7903,
      "step": 501460
    },
    {
      "epoch": 1.7575273282935018,
      "grad_norm": 2.71875,
      "learning_rate": 2.3008844962628932e-05,
      "loss": 0.8562,
      "step": 501470
    },
    {
      "epoch": 1.7575623758003975,
      "grad_norm": 3.078125,
      "learning_rate": 2.300819593396523e-05,
      "loss": 0.8843,
      "step": 501480
    },
    {
      "epoch": 1.757597423307293,
      "grad_norm": 2.640625,
      "learning_rate": 2.3007546905301528e-05,
      "loss": 0.8129,
      "step": 501490
    },
    {
      "epoch": 1.7576324708141886,
      "grad_norm": 2.796875,
      "learning_rate": 2.3006897876637826e-05,
      "loss": 0.8043,
      "step": 501500
    },
    {
      "epoch": 1.7576675183210844,
      "grad_norm": 2.734375,
      "learning_rate": 2.3006248847974124e-05,
      "loss": 0.859,
      "step": 501510
    },
    {
      "epoch": 1.7577025658279797,
      "grad_norm": 3.296875,
      "learning_rate": 2.3005599819310422e-05,
      "loss": 0.9057,
      "step": 501520
    },
    {
      "epoch": 1.7577376133348754,
      "grad_norm": 3.296875,
      "learning_rate": 2.300495079064672e-05,
      "loss": 0.8199,
      "step": 501530
    },
    {
      "epoch": 1.757772660841771,
      "grad_norm": 2.578125,
      "learning_rate": 2.3004301761983018e-05,
      "loss": 0.8727,
      "step": 501540
    },
    {
      "epoch": 1.7578077083486665,
      "grad_norm": 2.75,
      "learning_rate": 2.3003652733319313e-05,
      "loss": 0.8458,
      "step": 501550
    },
    {
      "epoch": 1.7578427558555623,
      "grad_norm": 2.953125,
      "learning_rate": 2.300300370465561e-05,
      "loss": 0.8405,
      "step": 501560
    },
    {
      "epoch": 1.7578778033624578,
      "grad_norm": 3.03125,
      "learning_rate": 2.3002354675991912e-05,
      "loss": 0.7862,
      "step": 501570
    },
    {
      "epoch": 1.7579128508693533,
      "grad_norm": 3.046875,
      "learning_rate": 2.300170564732821e-05,
      "loss": 0.8973,
      "step": 501580
    },
    {
      "epoch": 1.757947898376249,
      "grad_norm": 3.0,
      "learning_rate": 2.3001056618664508e-05,
      "loss": 0.8275,
      "step": 501590
    },
    {
      "epoch": 1.7579829458831446,
      "grad_norm": 2.8125,
      "learning_rate": 2.3000407590000806e-05,
      "loss": 0.8473,
      "step": 501600
    },
    {
      "epoch": 1.7580179933900402,
      "grad_norm": 2.78125,
      "learning_rate": 2.2999758561337104e-05,
      "loss": 0.9123,
      "step": 501610
    },
    {
      "epoch": 1.758053040896936,
      "grad_norm": 2.875,
      "learning_rate": 2.2999109532673402e-05,
      "loss": 0.8379,
      "step": 501620
    },
    {
      "epoch": 1.7580880884038312,
      "grad_norm": 2.984375,
      "learning_rate": 2.29984605040097e-05,
      "loss": 0.747,
      "step": 501630
    },
    {
      "epoch": 1.758123135910727,
      "grad_norm": 3.140625,
      "learning_rate": 2.2997811475345998e-05,
      "loss": 0.7759,
      "step": 501640
    },
    {
      "epoch": 1.7581581834176225,
      "grad_norm": 3.015625,
      "learning_rate": 2.2997162446682296e-05,
      "loss": 0.7759,
      "step": 501650
    },
    {
      "epoch": 1.758193230924518,
      "grad_norm": 2.78125,
      "learning_rate": 2.2996513418018594e-05,
      "loss": 0.8309,
      "step": 501660
    },
    {
      "epoch": 1.7582282784314138,
      "grad_norm": 2.421875,
      "learning_rate": 2.2995864389354892e-05,
      "loss": 0.7978,
      "step": 501670
    },
    {
      "epoch": 1.7582633259383094,
      "grad_norm": 2.5,
      "learning_rate": 2.299521536069119e-05,
      "loss": 0.8083,
      "step": 501680
    },
    {
      "epoch": 1.758298373445205,
      "grad_norm": 2.75,
      "learning_rate": 2.2994566332027488e-05,
      "loss": 0.84,
      "step": 501690
    },
    {
      "epoch": 1.7583334209521007,
      "grad_norm": 2.734375,
      "learning_rate": 2.2993917303363786e-05,
      "loss": 0.8218,
      "step": 501700
    },
    {
      "epoch": 1.7583684684589962,
      "grad_norm": 2.78125,
      "learning_rate": 2.2993268274700088e-05,
      "loss": 0.8219,
      "step": 501710
    },
    {
      "epoch": 1.7584035159658917,
      "grad_norm": 2.46875,
      "learning_rate": 2.2992619246036386e-05,
      "loss": 0.7601,
      "step": 501720
    },
    {
      "epoch": 1.7584385634727875,
      "grad_norm": 2.78125,
      "learning_rate": 2.2991970217372684e-05,
      "loss": 0.8244,
      "step": 501730
    },
    {
      "epoch": 1.7584736109796828,
      "grad_norm": 2.71875,
      "learning_rate": 2.2991321188708978e-05,
      "loss": 0.7459,
      "step": 501740
    },
    {
      "epoch": 1.7585086584865786,
      "grad_norm": 2.96875,
      "learning_rate": 2.2990672160045276e-05,
      "loss": 0.7668,
      "step": 501750
    },
    {
      "epoch": 1.7585437059934743,
      "grad_norm": 2.859375,
      "learning_rate": 2.2990023131381574e-05,
      "loss": 0.837,
      "step": 501760
    },
    {
      "epoch": 1.7585787535003696,
      "grad_norm": 3.40625,
      "learning_rate": 2.2989374102717872e-05,
      "loss": 0.9214,
      "step": 501770
    },
    {
      "epoch": 1.7586138010072654,
      "grad_norm": 3.421875,
      "learning_rate": 2.298872507405417e-05,
      "loss": 0.8481,
      "step": 501780
    },
    {
      "epoch": 1.758648848514161,
      "grad_norm": 2.96875,
      "learning_rate": 2.2988076045390468e-05,
      "loss": 0.8423,
      "step": 501790
    },
    {
      "epoch": 1.7586838960210565,
      "grad_norm": 3.140625,
      "learning_rate": 2.2987427016726766e-05,
      "loss": 0.81,
      "step": 501800
    },
    {
      "epoch": 1.7587189435279522,
      "grad_norm": 2.703125,
      "learning_rate": 2.2986777988063064e-05,
      "loss": 0.7843,
      "step": 501810
    },
    {
      "epoch": 1.7587539910348478,
      "grad_norm": 2.828125,
      "learning_rate": 2.2986128959399366e-05,
      "loss": 0.7453,
      "step": 501820
    },
    {
      "epoch": 1.7587890385417433,
      "grad_norm": 2.59375,
      "learning_rate": 2.2985479930735664e-05,
      "loss": 0.7868,
      "step": 501830
    },
    {
      "epoch": 1.758824086048639,
      "grad_norm": 3.28125,
      "learning_rate": 2.298483090207196e-05,
      "loss": 0.8197,
      "step": 501840
    },
    {
      "epoch": 1.7588591335555346,
      "grad_norm": 2.46875,
      "learning_rate": 2.298418187340826e-05,
      "loss": 0.8455,
      "step": 501850
    },
    {
      "epoch": 1.7588941810624301,
      "grad_norm": 3.265625,
      "learning_rate": 2.2983532844744558e-05,
      "loss": 0.7999,
      "step": 501860
    },
    {
      "epoch": 1.7589292285693259,
      "grad_norm": 2.46875,
      "learning_rate": 2.2982883816080856e-05,
      "loss": 0.8721,
      "step": 501870
    },
    {
      "epoch": 1.7589642760762212,
      "grad_norm": 2.90625,
      "learning_rate": 2.2982234787417154e-05,
      "loss": 0.8349,
      "step": 501880
    },
    {
      "epoch": 1.758999323583117,
      "grad_norm": 2.796875,
      "learning_rate": 2.298158575875345e-05,
      "loss": 0.8131,
      "step": 501890
    },
    {
      "epoch": 1.7590343710900125,
      "grad_norm": 2.78125,
      "learning_rate": 2.298093673008975e-05,
      "loss": 0.8305,
      "step": 501900
    },
    {
      "epoch": 1.759069418596908,
      "grad_norm": 2.875,
      "learning_rate": 2.2980287701426048e-05,
      "loss": 0.8068,
      "step": 501910
    },
    {
      "epoch": 1.7591044661038038,
      "grad_norm": 2.453125,
      "learning_rate": 2.2979638672762342e-05,
      "loss": 0.7941,
      "step": 501920
    },
    {
      "epoch": 1.7591395136106993,
      "grad_norm": 2.953125,
      "learning_rate": 2.297898964409864e-05,
      "loss": 0.7555,
      "step": 501930
    },
    {
      "epoch": 1.7591745611175948,
      "grad_norm": 3.265625,
      "learning_rate": 2.297834061543494e-05,
      "loss": 0.8302,
      "step": 501940
    },
    {
      "epoch": 1.7592096086244906,
      "grad_norm": 2.8125,
      "learning_rate": 2.297769158677124e-05,
      "loss": 0.8456,
      "step": 501950
    },
    {
      "epoch": 1.7592446561313861,
      "grad_norm": 2.546875,
      "learning_rate": 2.2977042558107538e-05,
      "loss": 0.7991,
      "step": 501960
    },
    {
      "epoch": 1.7592797036382817,
      "grad_norm": 3.015625,
      "learning_rate": 2.2976393529443836e-05,
      "loss": 0.8975,
      "step": 501970
    },
    {
      "epoch": 1.7593147511451774,
      "grad_norm": 2.96875,
      "learning_rate": 2.2975744500780134e-05,
      "loss": 0.8548,
      "step": 501980
    },
    {
      "epoch": 1.7593497986520727,
      "grad_norm": 2.796875,
      "learning_rate": 2.297509547211643e-05,
      "loss": 0.8753,
      "step": 501990
    },
    {
      "epoch": 1.7593848461589685,
      "grad_norm": 2.890625,
      "learning_rate": 2.297444644345273e-05,
      "loss": 0.8329,
      "step": 502000
    },
    {
      "epoch": 1.759419893665864,
      "grad_norm": 2.796875,
      "learning_rate": 2.2973797414789028e-05,
      "loss": 0.8257,
      "step": 502010
    },
    {
      "epoch": 1.7594549411727596,
      "grad_norm": 2.59375,
      "learning_rate": 2.2973148386125326e-05,
      "loss": 0.8268,
      "step": 502020
    },
    {
      "epoch": 1.7594899886796553,
      "grad_norm": 2.78125,
      "learning_rate": 2.2972499357461624e-05,
      "loss": 0.7743,
      "step": 502030
    },
    {
      "epoch": 1.7595250361865509,
      "grad_norm": 2.65625,
      "learning_rate": 2.297185032879792e-05,
      "loss": 0.8127,
      "step": 502040
    },
    {
      "epoch": 1.7595600836934464,
      "grad_norm": 2.953125,
      "learning_rate": 2.297120130013422e-05,
      "loss": 0.8209,
      "step": 502050
    },
    {
      "epoch": 1.7595951312003422,
      "grad_norm": 3.28125,
      "learning_rate": 2.2970552271470518e-05,
      "loss": 0.8074,
      "step": 502060
    },
    {
      "epoch": 1.7596301787072377,
      "grad_norm": 2.953125,
      "learning_rate": 2.2969903242806816e-05,
      "loss": 0.8112,
      "step": 502070
    },
    {
      "epoch": 1.7596652262141332,
      "grad_norm": 2.828125,
      "learning_rate": 2.2969254214143117e-05,
      "loss": 0.7898,
      "step": 502080
    },
    {
      "epoch": 1.759700273721029,
      "grad_norm": 3.15625,
      "learning_rate": 2.2968605185479415e-05,
      "loss": 0.7893,
      "step": 502090
    },
    {
      "epoch": 1.7597353212279243,
      "grad_norm": 2.625,
      "learning_rate": 2.2967956156815713e-05,
      "loss": 0.8811,
      "step": 502100
    },
    {
      "epoch": 1.75977036873482,
      "grad_norm": 2.859375,
      "learning_rate": 2.2967307128152008e-05,
      "loss": 0.8559,
      "step": 502110
    },
    {
      "epoch": 1.7598054162417156,
      "grad_norm": 2.453125,
      "learning_rate": 2.2966658099488306e-05,
      "loss": 0.7748,
      "step": 502120
    },
    {
      "epoch": 1.7598404637486111,
      "grad_norm": 2.703125,
      "learning_rate": 2.2966009070824604e-05,
      "loss": 0.7858,
      "step": 502130
    },
    {
      "epoch": 1.759875511255507,
      "grad_norm": 3.046875,
      "learning_rate": 2.29653600421609e-05,
      "loss": 0.8941,
      "step": 502140
    },
    {
      "epoch": 1.7599105587624024,
      "grad_norm": 2.984375,
      "learning_rate": 2.29647110134972e-05,
      "loss": 0.7821,
      "step": 502150
    },
    {
      "epoch": 1.759945606269298,
      "grad_norm": 3.015625,
      "learning_rate": 2.2964061984833498e-05,
      "loss": 0.7903,
      "step": 502160
    },
    {
      "epoch": 1.7599806537761937,
      "grad_norm": 2.75,
      "learning_rate": 2.2963412956169796e-05,
      "loss": 0.8282,
      "step": 502170
    },
    {
      "epoch": 1.7600157012830893,
      "grad_norm": 2.8125,
      "learning_rate": 2.2962763927506094e-05,
      "loss": 0.7753,
      "step": 502180
    },
    {
      "epoch": 1.7600507487899848,
      "grad_norm": 2.90625,
      "learning_rate": 2.2962114898842395e-05,
      "loss": 0.8067,
      "step": 502190
    },
    {
      "epoch": 1.7600857962968806,
      "grad_norm": 2.953125,
      "learning_rate": 2.2961465870178693e-05,
      "loss": 0.7999,
      "step": 502200
    },
    {
      "epoch": 1.7601208438037759,
      "grad_norm": 3.015625,
      "learning_rate": 2.296081684151499e-05,
      "loss": 0.782,
      "step": 502210
    },
    {
      "epoch": 1.7601558913106716,
      "grad_norm": 3.125,
      "learning_rate": 2.296016781285129e-05,
      "loss": 0.8311,
      "step": 502220
    },
    {
      "epoch": 1.7601909388175672,
      "grad_norm": 3.171875,
      "learning_rate": 2.2959518784187587e-05,
      "loss": 0.8239,
      "step": 502230
    },
    {
      "epoch": 1.7602259863244627,
      "grad_norm": 2.921875,
      "learning_rate": 2.2958869755523885e-05,
      "loss": 0.8093,
      "step": 502240
    },
    {
      "epoch": 1.7602610338313585,
      "grad_norm": 2.9375,
      "learning_rate": 2.2958220726860183e-05,
      "loss": 0.8364,
      "step": 502250
    },
    {
      "epoch": 1.760296081338254,
      "grad_norm": 3.1875,
      "learning_rate": 2.295757169819648e-05,
      "loss": 0.8994,
      "step": 502260
    },
    {
      "epoch": 1.7603311288451495,
      "grad_norm": 2.8125,
      "learning_rate": 2.295692266953278e-05,
      "loss": 0.8226,
      "step": 502270
    },
    {
      "epoch": 1.7603661763520453,
      "grad_norm": 2.625,
      "learning_rate": 2.2956273640869077e-05,
      "loss": 0.8499,
      "step": 502280
    },
    {
      "epoch": 1.7604012238589408,
      "grad_norm": 2.625,
      "learning_rate": 2.2955624612205375e-05,
      "loss": 0.8277,
      "step": 502290
    },
    {
      "epoch": 1.7604362713658364,
      "grad_norm": 2.484375,
      "learning_rate": 2.2954975583541673e-05,
      "loss": 0.7751,
      "step": 502300
    },
    {
      "epoch": 1.7604713188727321,
      "grad_norm": 3.21875,
      "learning_rate": 2.295432655487797e-05,
      "loss": 0.8002,
      "step": 502310
    },
    {
      "epoch": 1.7605063663796274,
      "grad_norm": 3.15625,
      "learning_rate": 2.295367752621427e-05,
      "loss": 0.9291,
      "step": 502320
    },
    {
      "epoch": 1.7605414138865232,
      "grad_norm": 2.65625,
      "learning_rate": 2.2953028497550567e-05,
      "loss": 0.8079,
      "step": 502330
    },
    {
      "epoch": 1.760576461393419,
      "grad_norm": 3.328125,
      "learning_rate": 2.2952379468886865e-05,
      "loss": 0.941,
      "step": 502340
    },
    {
      "epoch": 1.7606115089003143,
      "grad_norm": 3.09375,
      "learning_rate": 2.2951730440223163e-05,
      "loss": 0.8391,
      "step": 502350
    },
    {
      "epoch": 1.76064655640721,
      "grad_norm": 3.484375,
      "learning_rate": 2.295108141155946e-05,
      "loss": 0.8228,
      "step": 502360
    },
    {
      "epoch": 1.7606816039141056,
      "grad_norm": 2.765625,
      "learning_rate": 2.295043238289576e-05,
      "loss": 0.7789,
      "step": 502370
    },
    {
      "epoch": 1.760716651421001,
      "grad_norm": 2.96875,
      "learning_rate": 2.2949783354232057e-05,
      "loss": 0.8766,
      "step": 502380
    },
    {
      "epoch": 1.7607516989278968,
      "grad_norm": 2.9375,
      "learning_rate": 2.2949134325568355e-05,
      "loss": 0.7972,
      "step": 502390
    },
    {
      "epoch": 1.7607867464347924,
      "grad_norm": 3.328125,
      "learning_rate": 2.2948485296904653e-05,
      "loss": 0.9085,
      "step": 502400
    },
    {
      "epoch": 1.760821793941688,
      "grad_norm": 2.640625,
      "learning_rate": 2.294783626824095e-05,
      "loss": 0.8174,
      "step": 502410
    },
    {
      "epoch": 1.7608568414485837,
      "grad_norm": 3.125,
      "learning_rate": 2.294718723957725e-05,
      "loss": 0.7781,
      "step": 502420
    },
    {
      "epoch": 1.760891888955479,
      "grad_norm": 2.578125,
      "learning_rate": 2.2946538210913547e-05,
      "loss": 0.9044,
      "step": 502430
    },
    {
      "epoch": 1.7609269364623747,
      "grad_norm": 2.859375,
      "learning_rate": 2.294588918224985e-05,
      "loss": 0.8149,
      "step": 502440
    },
    {
      "epoch": 1.7609619839692705,
      "grad_norm": 2.8125,
      "learning_rate": 2.2945240153586146e-05,
      "loss": 0.8547,
      "step": 502450
    },
    {
      "epoch": 1.7609970314761658,
      "grad_norm": 3.1875,
      "learning_rate": 2.2944591124922444e-05,
      "loss": 0.7738,
      "step": 502460
    },
    {
      "epoch": 1.7610320789830616,
      "grad_norm": 2.984375,
      "learning_rate": 2.2943942096258742e-05,
      "loss": 0.8987,
      "step": 502470
    },
    {
      "epoch": 1.7610671264899571,
      "grad_norm": 3.078125,
      "learning_rate": 2.294329306759504e-05,
      "loss": 0.8281,
      "step": 502480
    },
    {
      "epoch": 1.7611021739968526,
      "grad_norm": 3.046875,
      "learning_rate": 2.2942644038931335e-05,
      "loss": 0.8191,
      "step": 502490
    },
    {
      "epoch": 1.7611372215037484,
      "grad_norm": 2.71875,
      "learning_rate": 2.2941995010267633e-05,
      "loss": 0.8467,
      "step": 502500
    },
    {
      "epoch": 1.761172269010644,
      "grad_norm": 3.109375,
      "learning_rate": 2.294134598160393e-05,
      "loss": 0.8642,
      "step": 502510
    },
    {
      "epoch": 1.7612073165175395,
      "grad_norm": 2.734375,
      "learning_rate": 2.294069695294023e-05,
      "loss": 0.7256,
      "step": 502520
    },
    {
      "epoch": 1.7612423640244352,
      "grad_norm": 3.359375,
      "learning_rate": 2.2940047924276527e-05,
      "loss": 0.8874,
      "step": 502530
    },
    {
      "epoch": 1.7612774115313308,
      "grad_norm": 3.046875,
      "learning_rate": 2.2939398895612825e-05,
      "loss": 0.7853,
      "step": 502540
    },
    {
      "epoch": 1.7613124590382263,
      "grad_norm": 2.671875,
      "learning_rate": 2.2938749866949123e-05,
      "loss": 0.8001,
      "step": 502550
    },
    {
      "epoch": 1.761347506545122,
      "grad_norm": 3.078125,
      "learning_rate": 2.2938100838285424e-05,
      "loss": 0.8618,
      "step": 502560
    },
    {
      "epoch": 1.7613825540520174,
      "grad_norm": 2.765625,
      "learning_rate": 2.2937451809621722e-05,
      "loss": 0.7851,
      "step": 502570
    },
    {
      "epoch": 1.7614176015589131,
      "grad_norm": 2.40625,
      "learning_rate": 2.293680278095802e-05,
      "loss": 0.8063,
      "step": 502580
    },
    {
      "epoch": 1.7614526490658087,
      "grad_norm": 3.171875,
      "learning_rate": 2.293615375229432e-05,
      "loss": 0.834,
      "step": 502590
    },
    {
      "epoch": 1.7614876965727042,
      "grad_norm": 3.03125,
      "learning_rate": 2.2935504723630616e-05,
      "loss": 0.8151,
      "step": 502600
    },
    {
      "epoch": 1.7615227440796,
      "grad_norm": 3.234375,
      "learning_rate": 2.2934855694966914e-05,
      "loss": 0.9271,
      "step": 502610
    },
    {
      "epoch": 1.7615577915864955,
      "grad_norm": 3.125,
      "learning_rate": 2.2934206666303212e-05,
      "loss": 0.8386,
      "step": 502620
    },
    {
      "epoch": 1.761592839093391,
      "grad_norm": 2.65625,
      "learning_rate": 2.293355763763951e-05,
      "loss": 0.8361,
      "step": 502630
    },
    {
      "epoch": 1.7616278866002868,
      "grad_norm": 3.109375,
      "learning_rate": 2.293290860897581e-05,
      "loss": 0.8418,
      "step": 502640
    },
    {
      "epoch": 1.7616629341071823,
      "grad_norm": 2.921875,
      "learning_rate": 2.2932259580312106e-05,
      "loss": 0.7939,
      "step": 502650
    },
    {
      "epoch": 1.7616979816140779,
      "grad_norm": 2.5,
      "learning_rate": 2.2931610551648404e-05,
      "loss": 0.7708,
      "step": 502660
    },
    {
      "epoch": 1.7617330291209736,
      "grad_norm": 2.609375,
      "learning_rate": 2.2930961522984702e-05,
      "loss": 0.8277,
      "step": 502670
    },
    {
      "epoch": 1.761768076627869,
      "grad_norm": 2.84375,
      "learning_rate": 2.2930312494321e-05,
      "loss": 0.8419,
      "step": 502680
    },
    {
      "epoch": 1.7618031241347647,
      "grad_norm": 3.140625,
      "learning_rate": 2.29296634656573e-05,
      "loss": 0.8539,
      "step": 502690
    },
    {
      "epoch": 1.7618381716416602,
      "grad_norm": 3.03125,
      "learning_rate": 2.2929014436993596e-05,
      "loss": 0.8569,
      "step": 502700
    },
    {
      "epoch": 1.7618732191485558,
      "grad_norm": 2.640625,
      "learning_rate": 2.2928365408329894e-05,
      "loss": 0.9236,
      "step": 502710
    },
    {
      "epoch": 1.7619082666554515,
      "grad_norm": 2.890625,
      "learning_rate": 2.2927716379666192e-05,
      "loss": 0.8148,
      "step": 502720
    },
    {
      "epoch": 1.761943314162347,
      "grad_norm": 2.75,
      "learning_rate": 2.292706735100249e-05,
      "loss": 0.8317,
      "step": 502730
    },
    {
      "epoch": 1.7619783616692426,
      "grad_norm": 2.859375,
      "learning_rate": 2.292641832233879e-05,
      "loss": 0.8152,
      "step": 502740
    },
    {
      "epoch": 1.7620134091761384,
      "grad_norm": 2.640625,
      "learning_rate": 2.2925769293675086e-05,
      "loss": 0.8649,
      "step": 502750
    },
    {
      "epoch": 1.762048456683034,
      "grad_norm": 3.046875,
      "learning_rate": 2.2925120265011384e-05,
      "loss": 0.8512,
      "step": 502760
    },
    {
      "epoch": 1.7620835041899294,
      "grad_norm": 2.78125,
      "learning_rate": 2.2924471236347682e-05,
      "loss": 0.8224,
      "step": 502770
    },
    {
      "epoch": 1.7621185516968252,
      "grad_norm": 2.90625,
      "learning_rate": 2.292382220768398e-05,
      "loss": 0.9006,
      "step": 502780
    },
    {
      "epoch": 1.7621535992037205,
      "grad_norm": 3.03125,
      "learning_rate": 2.292317317902028e-05,
      "loss": 0.892,
      "step": 502790
    },
    {
      "epoch": 1.7621886467106163,
      "grad_norm": 2.828125,
      "learning_rate": 2.2922524150356576e-05,
      "loss": 0.8119,
      "step": 502800
    },
    {
      "epoch": 1.7622236942175118,
      "grad_norm": 3.4375,
      "learning_rate": 2.2921875121692878e-05,
      "loss": 0.7742,
      "step": 502810
    },
    {
      "epoch": 1.7622587417244073,
      "grad_norm": 2.640625,
      "learning_rate": 2.2921226093029176e-05,
      "loss": 0.8446,
      "step": 502820
    },
    {
      "epoch": 1.762293789231303,
      "grad_norm": 2.90625,
      "learning_rate": 2.2920577064365474e-05,
      "loss": 0.7629,
      "step": 502830
    },
    {
      "epoch": 1.7623288367381986,
      "grad_norm": 2.578125,
      "learning_rate": 2.2919928035701772e-05,
      "loss": 0.8225,
      "step": 502840
    },
    {
      "epoch": 1.7623638842450942,
      "grad_norm": 2.828125,
      "learning_rate": 2.291927900703807e-05,
      "loss": 0.8987,
      "step": 502850
    },
    {
      "epoch": 1.76239893175199,
      "grad_norm": 2.890625,
      "learning_rate": 2.2918629978374364e-05,
      "loss": 0.8618,
      "step": 502860
    },
    {
      "epoch": 1.7624339792588855,
      "grad_norm": 2.828125,
      "learning_rate": 2.2917980949710662e-05,
      "loss": 0.7841,
      "step": 502870
    },
    {
      "epoch": 1.762469026765781,
      "grad_norm": 3.0625,
      "learning_rate": 2.291733192104696e-05,
      "loss": 0.8447,
      "step": 502880
    },
    {
      "epoch": 1.7625040742726767,
      "grad_norm": 3.09375,
      "learning_rate": 2.291668289238326e-05,
      "loss": 0.8332,
      "step": 502890
    },
    {
      "epoch": 1.762539121779572,
      "grad_norm": 2.53125,
      "learning_rate": 2.2916033863719556e-05,
      "loss": 0.8103,
      "step": 502900
    },
    {
      "epoch": 1.7625741692864678,
      "grad_norm": 3.234375,
      "learning_rate": 2.2915384835055854e-05,
      "loss": 0.9094,
      "step": 502910
    },
    {
      "epoch": 1.7626092167933634,
      "grad_norm": 3.0,
      "learning_rate": 2.2914735806392156e-05,
      "loss": 0.8623,
      "step": 502920
    },
    {
      "epoch": 1.762644264300259,
      "grad_norm": 2.765625,
      "learning_rate": 2.2914086777728454e-05,
      "loss": 0.7613,
      "step": 502930
    },
    {
      "epoch": 1.7626793118071546,
      "grad_norm": 3.234375,
      "learning_rate": 2.2913437749064752e-05,
      "loss": 0.8989,
      "step": 502940
    },
    {
      "epoch": 1.7627143593140502,
      "grad_norm": 2.78125,
      "learning_rate": 2.291278872040105e-05,
      "loss": 0.8275,
      "step": 502950
    },
    {
      "epoch": 1.7627494068209457,
      "grad_norm": 3.15625,
      "learning_rate": 2.2912139691737348e-05,
      "loss": 0.7834,
      "step": 502960
    },
    {
      "epoch": 1.7627844543278415,
      "grad_norm": 2.796875,
      "learning_rate": 2.2911490663073646e-05,
      "loss": 0.7934,
      "step": 502970
    },
    {
      "epoch": 1.762819501834737,
      "grad_norm": 2.9375,
      "learning_rate": 2.2910841634409944e-05,
      "loss": 0.8141,
      "step": 502980
    },
    {
      "epoch": 1.7628545493416325,
      "grad_norm": 2.484375,
      "learning_rate": 2.2910192605746242e-05,
      "loss": 0.8675,
      "step": 502990
    },
    {
      "epoch": 1.7628895968485283,
      "grad_norm": 2.984375,
      "learning_rate": 2.290954357708254e-05,
      "loss": 0.7769,
      "step": 503000
    },
    {
      "epoch": 1.7629246443554236,
      "grad_norm": 3.75,
      "learning_rate": 2.2908894548418838e-05,
      "loss": 0.8888,
      "step": 503010
    },
    {
      "epoch": 1.7629596918623194,
      "grad_norm": 3.078125,
      "learning_rate": 2.2908245519755136e-05,
      "loss": 0.7823,
      "step": 503020
    },
    {
      "epoch": 1.7629947393692151,
      "grad_norm": 3.375,
      "learning_rate": 2.2907596491091434e-05,
      "loss": 0.9586,
      "step": 503030
    },
    {
      "epoch": 1.7630297868761104,
      "grad_norm": 3.5625,
      "learning_rate": 2.2906947462427732e-05,
      "loss": 0.7979,
      "step": 503040
    },
    {
      "epoch": 1.7630648343830062,
      "grad_norm": 3.03125,
      "learning_rate": 2.290629843376403e-05,
      "loss": 0.7666,
      "step": 503050
    },
    {
      "epoch": 1.7630998818899017,
      "grad_norm": 2.890625,
      "learning_rate": 2.2905649405100328e-05,
      "loss": 0.8863,
      "step": 503060
    },
    {
      "epoch": 1.7631349293967973,
      "grad_norm": 2.21875,
      "learning_rate": 2.2905000376436626e-05,
      "loss": 0.7342,
      "step": 503070
    },
    {
      "epoch": 1.763169976903693,
      "grad_norm": 2.96875,
      "learning_rate": 2.2904351347772924e-05,
      "loss": 0.8269,
      "step": 503080
    },
    {
      "epoch": 1.7632050244105886,
      "grad_norm": 3.125,
      "learning_rate": 2.2903702319109222e-05,
      "loss": 0.8928,
      "step": 503090
    },
    {
      "epoch": 1.763240071917484,
      "grad_norm": 3.453125,
      "learning_rate": 2.290305329044552e-05,
      "loss": 0.9205,
      "step": 503100
    },
    {
      "epoch": 1.7632751194243799,
      "grad_norm": 2.640625,
      "learning_rate": 2.2902404261781818e-05,
      "loss": 0.848,
      "step": 503110
    },
    {
      "epoch": 1.7633101669312752,
      "grad_norm": 2.71875,
      "learning_rate": 2.2901755233118116e-05,
      "loss": 0.8287,
      "step": 503120
    },
    {
      "epoch": 1.763345214438171,
      "grad_norm": 3.296875,
      "learning_rate": 2.2901106204454414e-05,
      "loss": 0.8574,
      "step": 503130
    },
    {
      "epoch": 1.7633802619450667,
      "grad_norm": 2.9375,
      "learning_rate": 2.2900457175790712e-05,
      "loss": 0.817,
      "step": 503140
    },
    {
      "epoch": 1.763415309451962,
      "grad_norm": 2.84375,
      "learning_rate": 2.289980814712701e-05,
      "loss": 0.8362,
      "step": 503150
    },
    {
      "epoch": 1.7634503569588578,
      "grad_norm": 2.5,
      "learning_rate": 2.2899159118463308e-05,
      "loss": 0.8021,
      "step": 503160
    },
    {
      "epoch": 1.7634854044657533,
      "grad_norm": 2.71875,
      "learning_rate": 2.2898510089799606e-05,
      "loss": 0.8159,
      "step": 503170
    },
    {
      "epoch": 1.7635204519726488,
      "grad_norm": 3.34375,
      "learning_rate": 2.2897861061135907e-05,
      "loss": 0.8076,
      "step": 503180
    },
    {
      "epoch": 1.7635554994795446,
      "grad_norm": 3.125,
      "learning_rate": 2.2897212032472205e-05,
      "loss": 0.8333,
      "step": 503190
    },
    {
      "epoch": 1.7635905469864401,
      "grad_norm": 2.640625,
      "learning_rate": 2.2896563003808503e-05,
      "loss": 0.8021,
      "step": 503200
    },
    {
      "epoch": 1.7636255944933357,
      "grad_norm": 2.65625,
      "learning_rate": 2.28959139751448e-05,
      "loss": 0.8098,
      "step": 503210
    },
    {
      "epoch": 1.7636606420002314,
      "grad_norm": 3.078125,
      "learning_rate": 2.28952649464811e-05,
      "loss": 0.7958,
      "step": 503220
    },
    {
      "epoch": 1.763695689507127,
      "grad_norm": 2.484375,
      "learning_rate": 2.2894615917817397e-05,
      "loss": 0.846,
      "step": 503230
    },
    {
      "epoch": 1.7637307370140225,
      "grad_norm": 3.109375,
      "learning_rate": 2.2893966889153692e-05,
      "loss": 0.8554,
      "step": 503240
    },
    {
      "epoch": 1.7637657845209183,
      "grad_norm": 3.015625,
      "learning_rate": 2.289331786048999e-05,
      "loss": 0.9606,
      "step": 503250
    },
    {
      "epoch": 1.7638008320278136,
      "grad_norm": 2.8125,
      "learning_rate": 2.2892668831826288e-05,
      "loss": 0.842,
      "step": 503260
    },
    {
      "epoch": 1.7638358795347093,
      "grad_norm": 2.78125,
      "learning_rate": 2.2892019803162586e-05,
      "loss": 0.7428,
      "step": 503270
    },
    {
      "epoch": 1.7638709270416049,
      "grad_norm": 2.9375,
      "learning_rate": 2.2891370774498884e-05,
      "loss": 0.8202,
      "step": 503280
    },
    {
      "epoch": 1.7639059745485004,
      "grad_norm": 2.5,
      "learning_rate": 2.2890721745835185e-05,
      "loss": 0.9072,
      "step": 503290
    },
    {
      "epoch": 1.7639410220553962,
      "grad_norm": 3.28125,
      "learning_rate": 2.2890072717171483e-05,
      "loss": 0.7616,
      "step": 503300
    },
    {
      "epoch": 1.7639760695622917,
      "grad_norm": 3.140625,
      "learning_rate": 2.288942368850778e-05,
      "loss": 0.9054,
      "step": 503310
    },
    {
      "epoch": 1.7640111170691872,
      "grad_norm": 2.546875,
      "learning_rate": 2.288877465984408e-05,
      "loss": 0.9731,
      "step": 503320
    },
    {
      "epoch": 1.764046164576083,
      "grad_norm": 3.015625,
      "learning_rate": 2.2888125631180377e-05,
      "loss": 0.8722,
      "step": 503330
    },
    {
      "epoch": 1.7640812120829785,
      "grad_norm": 2.953125,
      "learning_rate": 2.2887476602516675e-05,
      "loss": 0.8185,
      "step": 503340
    },
    {
      "epoch": 1.764116259589874,
      "grad_norm": 2.90625,
      "learning_rate": 2.2886827573852973e-05,
      "loss": 0.8044,
      "step": 503350
    },
    {
      "epoch": 1.7641513070967698,
      "grad_norm": 3.359375,
      "learning_rate": 2.288617854518927e-05,
      "loss": 0.8579,
      "step": 503360
    },
    {
      "epoch": 1.7641863546036651,
      "grad_norm": 2.921875,
      "learning_rate": 2.288552951652557e-05,
      "loss": 0.8179,
      "step": 503370
    },
    {
      "epoch": 1.7642214021105609,
      "grad_norm": 3.046875,
      "learning_rate": 2.2884880487861867e-05,
      "loss": 0.8291,
      "step": 503380
    },
    {
      "epoch": 1.7642564496174564,
      "grad_norm": 3.015625,
      "learning_rate": 2.2884231459198165e-05,
      "loss": 0.8024,
      "step": 503390
    },
    {
      "epoch": 1.764291497124352,
      "grad_norm": 2.453125,
      "learning_rate": 2.2883582430534463e-05,
      "loss": 0.8227,
      "step": 503400
    },
    {
      "epoch": 1.7643265446312477,
      "grad_norm": 3.3125,
      "learning_rate": 2.288293340187076e-05,
      "loss": 0.8765,
      "step": 503410
    },
    {
      "epoch": 1.7643615921381433,
      "grad_norm": 3.046875,
      "learning_rate": 2.288228437320706e-05,
      "loss": 0.8863,
      "step": 503420
    },
    {
      "epoch": 1.7643966396450388,
      "grad_norm": 2.421875,
      "learning_rate": 2.2881635344543357e-05,
      "loss": 0.7962,
      "step": 503430
    },
    {
      "epoch": 1.7644316871519345,
      "grad_norm": 3.21875,
      "learning_rate": 2.2880986315879655e-05,
      "loss": 0.9993,
      "step": 503440
    },
    {
      "epoch": 1.76446673465883,
      "grad_norm": 2.984375,
      "learning_rate": 2.2880337287215953e-05,
      "loss": 0.8352,
      "step": 503450
    },
    {
      "epoch": 1.7645017821657256,
      "grad_norm": 2.09375,
      "learning_rate": 2.287968825855225e-05,
      "loss": 0.8643,
      "step": 503460
    },
    {
      "epoch": 1.7645368296726214,
      "grad_norm": 2.890625,
      "learning_rate": 2.287903922988855e-05,
      "loss": 0.8295,
      "step": 503470
    },
    {
      "epoch": 1.7645718771795167,
      "grad_norm": 2.765625,
      "learning_rate": 2.2878390201224847e-05,
      "loss": 0.8714,
      "step": 503480
    },
    {
      "epoch": 1.7646069246864124,
      "grad_norm": 2.78125,
      "learning_rate": 2.2877741172561145e-05,
      "loss": 0.8894,
      "step": 503490
    },
    {
      "epoch": 1.764641972193308,
      "grad_norm": 2.1875,
      "learning_rate": 2.2877092143897443e-05,
      "loss": 0.8358,
      "step": 503500
    },
    {
      "epoch": 1.7646770197002035,
      "grad_norm": 3.0,
      "learning_rate": 2.287644311523374e-05,
      "loss": 0.8467,
      "step": 503510
    },
    {
      "epoch": 1.7647120672070993,
      "grad_norm": 2.953125,
      "learning_rate": 2.287579408657004e-05,
      "loss": 0.8561,
      "step": 503520
    },
    {
      "epoch": 1.7647471147139948,
      "grad_norm": 2.578125,
      "learning_rate": 2.2875145057906337e-05,
      "loss": 0.8119,
      "step": 503530
    },
    {
      "epoch": 1.7647821622208903,
      "grad_norm": 2.484375,
      "learning_rate": 2.287449602924264e-05,
      "loss": 0.7998,
      "step": 503540
    },
    {
      "epoch": 1.764817209727786,
      "grad_norm": 3.046875,
      "learning_rate": 2.2873847000578937e-05,
      "loss": 0.793,
      "step": 503550
    },
    {
      "epoch": 1.7648522572346816,
      "grad_norm": 2.375,
      "learning_rate": 2.2873197971915235e-05,
      "loss": 0.7976,
      "step": 503560
    },
    {
      "epoch": 1.7648873047415772,
      "grad_norm": 2.859375,
      "learning_rate": 2.2872548943251533e-05,
      "loss": 0.7769,
      "step": 503570
    },
    {
      "epoch": 1.764922352248473,
      "grad_norm": 3.359375,
      "learning_rate": 2.287189991458783e-05,
      "loss": 0.9037,
      "step": 503580
    },
    {
      "epoch": 1.7649573997553683,
      "grad_norm": 2.671875,
      "learning_rate": 2.287125088592413e-05,
      "loss": 0.9145,
      "step": 503590
    },
    {
      "epoch": 1.764992447262264,
      "grad_norm": 2.71875,
      "learning_rate": 2.2870601857260427e-05,
      "loss": 0.8485,
      "step": 503600
    },
    {
      "epoch": 1.7650274947691595,
      "grad_norm": 2.765625,
      "learning_rate": 2.2869952828596725e-05,
      "loss": 0.8927,
      "step": 503610
    },
    {
      "epoch": 1.765062542276055,
      "grad_norm": 2.5625,
      "learning_rate": 2.286930379993302e-05,
      "loss": 0.7724,
      "step": 503620
    },
    {
      "epoch": 1.7650975897829508,
      "grad_norm": 2.875,
      "learning_rate": 2.2868654771269317e-05,
      "loss": 0.8149,
      "step": 503630
    },
    {
      "epoch": 1.7651326372898464,
      "grad_norm": 2.953125,
      "learning_rate": 2.2868005742605615e-05,
      "loss": 0.8131,
      "step": 503640
    },
    {
      "epoch": 1.765167684796742,
      "grad_norm": 2.796875,
      "learning_rate": 2.2867356713941913e-05,
      "loss": 0.7525,
      "step": 503650
    },
    {
      "epoch": 1.7652027323036377,
      "grad_norm": 3.03125,
      "learning_rate": 2.2866707685278215e-05,
      "loss": 0.8597,
      "step": 503660
    },
    {
      "epoch": 1.7652377798105332,
      "grad_norm": 2.640625,
      "learning_rate": 2.2866058656614513e-05,
      "loss": 0.8111,
      "step": 503670
    },
    {
      "epoch": 1.7652728273174287,
      "grad_norm": 2.65625,
      "learning_rate": 2.286540962795081e-05,
      "loss": 0.8302,
      "step": 503680
    },
    {
      "epoch": 1.7653078748243245,
      "grad_norm": 3.0,
      "learning_rate": 2.286476059928711e-05,
      "loss": 0.851,
      "step": 503690
    },
    {
      "epoch": 1.7653429223312198,
      "grad_norm": 2.796875,
      "learning_rate": 2.2864111570623407e-05,
      "loss": 0.7948,
      "step": 503700
    },
    {
      "epoch": 1.7653779698381156,
      "grad_norm": 2.875,
      "learning_rate": 2.2863462541959705e-05,
      "loss": 0.9215,
      "step": 503710
    },
    {
      "epoch": 1.7654130173450113,
      "grad_norm": 2.859375,
      "learning_rate": 2.2862813513296003e-05,
      "loss": 0.8979,
      "step": 503720
    },
    {
      "epoch": 1.7654480648519066,
      "grad_norm": 2.40625,
      "learning_rate": 2.28621644846323e-05,
      "loss": 0.8259,
      "step": 503730
    },
    {
      "epoch": 1.7654831123588024,
      "grad_norm": 3.265625,
      "learning_rate": 2.28615154559686e-05,
      "loss": 0.8651,
      "step": 503740
    },
    {
      "epoch": 1.765518159865698,
      "grad_norm": 2.9375,
      "learning_rate": 2.2860866427304897e-05,
      "loss": 0.7998,
      "step": 503750
    },
    {
      "epoch": 1.7655532073725935,
      "grad_norm": 2.828125,
      "learning_rate": 2.2860217398641195e-05,
      "loss": 0.826,
      "step": 503760
    },
    {
      "epoch": 1.7655882548794892,
      "grad_norm": 2.859375,
      "learning_rate": 2.2859568369977493e-05,
      "loss": 0.8285,
      "step": 503770
    },
    {
      "epoch": 1.7656233023863848,
      "grad_norm": 2.40625,
      "learning_rate": 2.285891934131379e-05,
      "loss": 0.8259,
      "step": 503780
    },
    {
      "epoch": 1.7656583498932803,
      "grad_norm": 2.859375,
      "learning_rate": 2.2858270312650092e-05,
      "loss": 0.818,
      "step": 503790
    },
    {
      "epoch": 1.765693397400176,
      "grad_norm": 2.84375,
      "learning_rate": 2.2857621283986387e-05,
      "loss": 0.7664,
      "step": 503800
    },
    {
      "epoch": 1.7657284449070714,
      "grad_norm": 2.5625,
      "learning_rate": 2.2856972255322685e-05,
      "loss": 0.817,
      "step": 503810
    },
    {
      "epoch": 1.7657634924139671,
      "grad_norm": 3.140625,
      "learning_rate": 2.2856323226658983e-05,
      "loss": 0.8639,
      "step": 503820
    },
    {
      "epoch": 1.7657985399208629,
      "grad_norm": 2.8125,
      "learning_rate": 2.285567419799528e-05,
      "loss": 0.8291,
      "step": 503830
    },
    {
      "epoch": 1.7658335874277582,
      "grad_norm": 2.84375,
      "learning_rate": 2.285502516933158e-05,
      "loss": 0.826,
      "step": 503840
    },
    {
      "epoch": 1.765868634934654,
      "grad_norm": 2.71875,
      "learning_rate": 2.2854376140667877e-05,
      "loss": 0.823,
      "step": 503850
    },
    {
      "epoch": 1.7659036824415495,
      "grad_norm": 4.21875,
      "learning_rate": 2.2853727112004175e-05,
      "loss": 0.8698,
      "step": 503860
    },
    {
      "epoch": 1.765938729948445,
      "grad_norm": 2.65625,
      "learning_rate": 2.2853078083340473e-05,
      "loss": 0.7732,
      "step": 503870
    },
    {
      "epoch": 1.7659737774553408,
      "grad_norm": 2.796875,
      "learning_rate": 2.285242905467677e-05,
      "loss": 0.8598,
      "step": 503880
    },
    {
      "epoch": 1.7660088249622363,
      "grad_norm": 2.75,
      "learning_rate": 2.285178002601307e-05,
      "loss": 0.8845,
      "step": 503890
    },
    {
      "epoch": 1.7660438724691319,
      "grad_norm": 2.71875,
      "learning_rate": 2.2851130997349367e-05,
      "loss": 0.8159,
      "step": 503900
    },
    {
      "epoch": 1.7660789199760276,
      "grad_norm": 2.75,
      "learning_rate": 2.2850481968685668e-05,
      "loss": 0.7884,
      "step": 503910
    },
    {
      "epoch": 1.7661139674829232,
      "grad_norm": 2.96875,
      "learning_rate": 2.2849832940021966e-05,
      "loss": 0.8539,
      "step": 503920
    },
    {
      "epoch": 1.7661490149898187,
      "grad_norm": 2.953125,
      "learning_rate": 2.2849183911358264e-05,
      "loss": 0.8712,
      "step": 503930
    },
    {
      "epoch": 1.7661840624967144,
      "grad_norm": 3.125,
      "learning_rate": 2.2848534882694562e-05,
      "loss": 0.9066,
      "step": 503940
    },
    {
      "epoch": 1.7662191100036098,
      "grad_norm": 2.875,
      "learning_rate": 2.284788585403086e-05,
      "loss": 0.8483,
      "step": 503950
    },
    {
      "epoch": 1.7662541575105055,
      "grad_norm": 3.4375,
      "learning_rate": 2.2847236825367158e-05,
      "loss": 0.8788,
      "step": 503960
    },
    {
      "epoch": 1.766289205017401,
      "grad_norm": 3.03125,
      "learning_rate": 2.2846587796703456e-05,
      "loss": 0.8086,
      "step": 503970
    },
    {
      "epoch": 1.7663242525242966,
      "grad_norm": 3.078125,
      "learning_rate": 2.2845938768039754e-05,
      "loss": 0.8847,
      "step": 503980
    },
    {
      "epoch": 1.7663593000311923,
      "grad_norm": 3.078125,
      "learning_rate": 2.284528973937605e-05,
      "loss": 0.7115,
      "step": 503990
    },
    {
      "epoch": 1.7663943475380879,
      "grad_norm": 2.609375,
      "learning_rate": 2.2844640710712347e-05,
      "loss": 0.8421,
      "step": 504000
    },
    {
      "epoch": 1.7664293950449834,
      "grad_norm": 2.625,
      "learning_rate": 2.2843991682048645e-05,
      "loss": 0.8841,
      "step": 504010
    },
    {
      "epoch": 1.7664644425518792,
      "grad_norm": 2.875,
      "learning_rate": 2.2843342653384946e-05,
      "loss": 0.7944,
      "step": 504020
    },
    {
      "epoch": 1.7664994900587747,
      "grad_norm": 2.9375,
      "learning_rate": 2.2842693624721244e-05,
      "loss": 0.8336,
      "step": 504030
    },
    {
      "epoch": 1.7665345375656702,
      "grad_norm": 2.65625,
      "learning_rate": 2.2842044596057542e-05,
      "loss": 0.7949,
      "step": 504040
    },
    {
      "epoch": 1.766569585072566,
      "grad_norm": 2.875,
      "learning_rate": 2.284139556739384e-05,
      "loss": 0.82,
      "step": 504050
    },
    {
      "epoch": 1.7666046325794613,
      "grad_norm": 3.140625,
      "learning_rate": 2.2840746538730138e-05,
      "loss": 0.8361,
      "step": 504060
    },
    {
      "epoch": 1.766639680086357,
      "grad_norm": 2.875,
      "learning_rate": 2.2840097510066436e-05,
      "loss": 0.7709,
      "step": 504070
    },
    {
      "epoch": 1.7666747275932526,
      "grad_norm": 3.375,
      "learning_rate": 2.2839448481402734e-05,
      "loss": 0.86,
      "step": 504080
    },
    {
      "epoch": 1.7667097751001481,
      "grad_norm": 2.75,
      "learning_rate": 2.2838799452739032e-05,
      "loss": 0.8094,
      "step": 504090
    },
    {
      "epoch": 1.766744822607044,
      "grad_norm": 2.890625,
      "learning_rate": 2.283815042407533e-05,
      "loss": 0.7748,
      "step": 504100
    },
    {
      "epoch": 1.7667798701139394,
      "grad_norm": 2.984375,
      "learning_rate": 2.2837501395411628e-05,
      "loss": 0.7557,
      "step": 504110
    },
    {
      "epoch": 1.766814917620835,
      "grad_norm": 3.0,
      "learning_rate": 2.2836852366747926e-05,
      "loss": 0.8185,
      "step": 504120
    },
    {
      "epoch": 1.7668499651277307,
      "grad_norm": 2.3125,
      "learning_rate": 2.2836203338084224e-05,
      "loss": 0.7567,
      "step": 504130
    },
    {
      "epoch": 1.7668850126346263,
      "grad_norm": 2.65625,
      "learning_rate": 2.2835554309420522e-05,
      "loss": 0.7488,
      "step": 504140
    },
    {
      "epoch": 1.7669200601415218,
      "grad_norm": 2.671875,
      "learning_rate": 2.283490528075682e-05,
      "loss": 0.7637,
      "step": 504150
    },
    {
      "epoch": 1.7669551076484176,
      "grad_norm": 3.0,
      "learning_rate": 2.283425625209312e-05,
      "loss": 0.8353,
      "step": 504160
    },
    {
      "epoch": 1.7669901551553129,
      "grad_norm": 2.828125,
      "learning_rate": 2.283360722342942e-05,
      "loss": 0.836,
      "step": 504170
    },
    {
      "epoch": 1.7670252026622086,
      "grad_norm": 3.203125,
      "learning_rate": 2.2832958194765714e-05,
      "loss": 0.8367,
      "step": 504180
    },
    {
      "epoch": 1.7670602501691042,
      "grad_norm": 2.59375,
      "learning_rate": 2.2832309166102012e-05,
      "loss": 0.8367,
      "step": 504190
    },
    {
      "epoch": 1.7670952976759997,
      "grad_norm": 2.65625,
      "learning_rate": 2.283166013743831e-05,
      "loss": 0.8193,
      "step": 504200
    },
    {
      "epoch": 1.7671303451828955,
      "grad_norm": 2.6875,
      "learning_rate": 2.2831011108774608e-05,
      "loss": 0.8674,
      "step": 504210
    },
    {
      "epoch": 1.767165392689791,
      "grad_norm": 3.125,
      "learning_rate": 2.2830362080110906e-05,
      "loss": 0.7963,
      "step": 504220
    },
    {
      "epoch": 1.7672004401966865,
      "grad_norm": 2.984375,
      "learning_rate": 2.2829713051447204e-05,
      "loss": 0.8079,
      "step": 504230
    },
    {
      "epoch": 1.7672354877035823,
      "grad_norm": 2.953125,
      "learning_rate": 2.2829064022783502e-05,
      "loss": 0.9121,
      "step": 504240
    },
    {
      "epoch": 1.7672705352104778,
      "grad_norm": 2.390625,
      "learning_rate": 2.28284149941198e-05,
      "loss": 0.756,
      "step": 504250
    },
    {
      "epoch": 1.7673055827173734,
      "grad_norm": 2.734375,
      "learning_rate": 2.2827765965456098e-05,
      "loss": 0.8233,
      "step": 504260
    },
    {
      "epoch": 1.7673406302242691,
      "grad_norm": 2.265625,
      "learning_rate": 2.28271169367924e-05,
      "loss": 0.8225,
      "step": 504270
    },
    {
      "epoch": 1.7673756777311644,
      "grad_norm": 3.609375,
      "learning_rate": 2.2826467908128697e-05,
      "loss": 0.8128,
      "step": 504280
    },
    {
      "epoch": 1.7674107252380602,
      "grad_norm": 3.125,
      "learning_rate": 2.2825818879464995e-05,
      "loss": 0.7636,
      "step": 504290
    },
    {
      "epoch": 1.7674457727449557,
      "grad_norm": 3.484375,
      "learning_rate": 2.2825169850801293e-05,
      "loss": 0.8178,
      "step": 504300
    },
    {
      "epoch": 1.7674808202518513,
      "grad_norm": 2.734375,
      "learning_rate": 2.282452082213759e-05,
      "loss": 0.8178,
      "step": 504310
    },
    {
      "epoch": 1.767515867758747,
      "grad_norm": 3.234375,
      "learning_rate": 2.282387179347389e-05,
      "loss": 0.8336,
      "step": 504320
    },
    {
      "epoch": 1.7675509152656426,
      "grad_norm": 2.75,
      "learning_rate": 2.2823222764810187e-05,
      "loss": 0.8269,
      "step": 504330
    },
    {
      "epoch": 1.767585962772538,
      "grad_norm": 3.0625,
      "learning_rate": 2.2822573736146485e-05,
      "loss": 0.833,
      "step": 504340
    },
    {
      "epoch": 1.7676210102794339,
      "grad_norm": 2.65625,
      "learning_rate": 2.2821924707482783e-05,
      "loss": 0.8063,
      "step": 504350
    },
    {
      "epoch": 1.7676560577863294,
      "grad_norm": 3.0,
      "learning_rate": 2.282127567881908e-05,
      "loss": 0.8211,
      "step": 504360
    },
    {
      "epoch": 1.767691105293225,
      "grad_norm": 2.859375,
      "learning_rate": 2.2820626650155376e-05,
      "loss": 0.7554,
      "step": 504370
    },
    {
      "epoch": 1.7677261528001207,
      "grad_norm": 2.90625,
      "learning_rate": 2.2819977621491674e-05,
      "loss": 0.8041,
      "step": 504380
    },
    {
      "epoch": 1.767761200307016,
      "grad_norm": 2.859375,
      "learning_rate": 2.2819328592827975e-05,
      "loss": 0.8548,
      "step": 504390
    },
    {
      "epoch": 1.7677962478139118,
      "grad_norm": 2.109375,
      "learning_rate": 2.2818679564164273e-05,
      "loss": 0.7441,
      "step": 504400
    },
    {
      "epoch": 1.7678312953208075,
      "grad_norm": 3.296875,
      "learning_rate": 2.281803053550057e-05,
      "loss": 0.8221,
      "step": 504410
    },
    {
      "epoch": 1.7678663428277028,
      "grad_norm": 2.453125,
      "learning_rate": 2.281738150683687e-05,
      "loss": 0.8247,
      "step": 504420
    },
    {
      "epoch": 1.7679013903345986,
      "grad_norm": 2.984375,
      "learning_rate": 2.2816732478173167e-05,
      "loss": 0.7592,
      "step": 504430
    },
    {
      "epoch": 1.7679364378414941,
      "grad_norm": 2.703125,
      "learning_rate": 2.2816083449509465e-05,
      "loss": 0.7701,
      "step": 504440
    },
    {
      "epoch": 1.7679714853483897,
      "grad_norm": 2.609375,
      "learning_rate": 2.2815434420845763e-05,
      "loss": 0.8846,
      "step": 504450
    },
    {
      "epoch": 1.7680065328552854,
      "grad_norm": 3.171875,
      "learning_rate": 2.281478539218206e-05,
      "loss": 0.8962,
      "step": 504460
    },
    {
      "epoch": 1.768041580362181,
      "grad_norm": 2.96875,
      "learning_rate": 2.281413636351836e-05,
      "loss": 0.7946,
      "step": 504470
    },
    {
      "epoch": 1.7680766278690765,
      "grad_norm": 2.546875,
      "learning_rate": 2.2813487334854657e-05,
      "loss": 0.8043,
      "step": 504480
    },
    {
      "epoch": 1.7681116753759722,
      "grad_norm": 2.75,
      "learning_rate": 2.2812838306190955e-05,
      "loss": 0.8418,
      "step": 504490
    },
    {
      "epoch": 1.7681467228828676,
      "grad_norm": 3.140625,
      "learning_rate": 2.2812189277527253e-05,
      "loss": 0.7463,
      "step": 504500
    },
    {
      "epoch": 1.7681817703897633,
      "grad_norm": 2.953125,
      "learning_rate": 2.281154024886355e-05,
      "loss": 0.8823,
      "step": 504510
    },
    {
      "epoch": 1.768216817896659,
      "grad_norm": 2.96875,
      "learning_rate": 2.281089122019985e-05,
      "loss": 0.8031,
      "step": 504520
    },
    {
      "epoch": 1.7682518654035544,
      "grad_norm": 2.671875,
      "learning_rate": 2.281024219153615e-05,
      "loss": 0.821,
      "step": 504530
    },
    {
      "epoch": 1.7682869129104501,
      "grad_norm": 2.734375,
      "learning_rate": 2.280959316287245e-05,
      "loss": 0.7858,
      "step": 504540
    },
    {
      "epoch": 1.7683219604173457,
      "grad_norm": 2.8125,
      "learning_rate": 2.2808944134208747e-05,
      "loss": 0.8248,
      "step": 504550
    },
    {
      "epoch": 1.7683570079242412,
      "grad_norm": 3.296875,
      "learning_rate": 2.280829510554504e-05,
      "loss": 0.8358,
      "step": 504560
    },
    {
      "epoch": 1.768392055431137,
      "grad_norm": 2.78125,
      "learning_rate": 2.280764607688134e-05,
      "loss": 0.8146,
      "step": 504570
    },
    {
      "epoch": 1.7684271029380325,
      "grad_norm": 3.15625,
      "learning_rate": 2.2806997048217637e-05,
      "loss": 0.9296,
      "step": 504580
    },
    {
      "epoch": 1.768462150444928,
      "grad_norm": 2.953125,
      "learning_rate": 2.2806348019553935e-05,
      "loss": 0.8326,
      "step": 504590
    },
    {
      "epoch": 1.7684971979518238,
      "grad_norm": 2.875,
      "learning_rate": 2.2805698990890233e-05,
      "loss": 0.8885,
      "step": 504600
    },
    {
      "epoch": 1.7685322454587193,
      "grad_norm": 3.0,
      "learning_rate": 2.280504996222653e-05,
      "loss": 0.8897,
      "step": 504610
    },
    {
      "epoch": 1.7685672929656149,
      "grad_norm": 3.625,
      "learning_rate": 2.280440093356283e-05,
      "loss": 0.8284,
      "step": 504620
    },
    {
      "epoch": 1.7686023404725106,
      "grad_norm": 2.6875,
      "learning_rate": 2.2803751904899127e-05,
      "loss": 0.8596,
      "step": 504630
    },
    {
      "epoch": 1.768637387979406,
      "grad_norm": 3.15625,
      "learning_rate": 2.280310287623543e-05,
      "loss": 0.7461,
      "step": 504640
    },
    {
      "epoch": 1.7686724354863017,
      "grad_norm": 2.78125,
      "learning_rate": 2.2802453847571727e-05,
      "loss": 0.8154,
      "step": 504650
    },
    {
      "epoch": 1.7687074829931972,
      "grad_norm": 2.9375,
      "learning_rate": 2.2801804818908025e-05,
      "loss": 0.8617,
      "step": 504660
    },
    {
      "epoch": 1.7687425305000928,
      "grad_norm": 3.546875,
      "learning_rate": 2.2801155790244323e-05,
      "loss": 0.9462,
      "step": 504670
    },
    {
      "epoch": 1.7687775780069885,
      "grad_norm": 3.25,
      "learning_rate": 2.280050676158062e-05,
      "loss": 0.8977,
      "step": 504680
    },
    {
      "epoch": 1.768812625513884,
      "grad_norm": 2.96875,
      "learning_rate": 2.279985773291692e-05,
      "loss": 0.8342,
      "step": 504690
    },
    {
      "epoch": 1.7688476730207796,
      "grad_norm": 2.921875,
      "learning_rate": 2.2799208704253217e-05,
      "loss": 0.8083,
      "step": 504700
    },
    {
      "epoch": 1.7688827205276754,
      "grad_norm": 2.703125,
      "learning_rate": 2.2798559675589515e-05,
      "loss": 0.8587,
      "step": 504710
    },
    {
      "epoch": 1.768917768034571,
      "grad_norm": 2.921875,
      "learning_rate": 2.2797910646925813e-05,
      "loss": 0.804,
      "step": 504720
    },
    {
      "epoch": 1.7689528155414664,
      "grad_norm": 2.59375,
      "learning_rate": 2.279726161826211e-05,
      "loss": 0.791,
      "step": 504730
    },
    {
      "epoch": 1.7689878630483622,
      "grad_norm": 2.9375,
      "learning_rate": 2.2796612589598405e-05,
      "loss": 0.8279,
      "step": 504740
    },
    {
      "epoch": 1.7690229105552575,
      "grad_norm": 3.546875,
      "learning_rate": 2.2795963560934703e-05,
      "loss": 0.7924,
      "step": 504750
    },
    {
      "epoch": 1.7690579580621533,
      "grad_norm": 2.84375,
      "learning_rate": 2.2795314532271005e-05,
      "loss": 0.9145,
      "step": 504760
    },
    {
      "epoch": 1.7690930055690488,
      "grad_norm": 2.71875,
      "learning_rate": 2.2794665503607303e-05,
      "loss": 0.8263,
      "step": 504770
    },
    {
      "epoch": 1.7691280530759443,
      "grad_norm": 2.859375,
      "learning_rate": 2.27940164749436e-05,
      "loss": 0.8918,
      "step": 504780
    },
    {
      "epoch": 1.76916310058284,
      "grad_norm": 2.71875,
      "learning_rate": 2.27933674462799e-05,
      "loss": 0.7813,
      "step": 504790
    },
    {
      "epoch": 1.7691981480897356,
      "grad_norm": 2.75,
      "learning_rate": 2.2792718417616197e-05,
      "loss": 0.8327,
      "step": 504800
    },
    {
      "epoch": 1.7692331955966312,
      "grad_norm": 3.125,
      "learning_rate": 2.2792069388952495e-05,
      "loss": 0.8992,
      "step": 504810
    },
    {
      "epoch": 1.769268243103527,
      "grad_norm": 2.5625,
      "learning_rate": 2.2791420360288793e-05,
      "loss": 0.7733,
      "step": 504820
    },
    {
      "epoch": 1.7693032906104225,
      "grad_norm": 2.890625,
      "learning_rate": 2.279077133162509e-05,
      "loss": 0.7881,
      "step": 504830
    },
    {
      "epoch": 1.769338338117318,
      "grad_norm": 2.578125,
      "learning_rate": 2.279012230296139e-05,
      "loss": 0.8031,
      "step": 504840
    },
    {
      "epoch": 1.7693733856242138,
      "grad_norm": 2.640625,
      "learning_rate": 2.2789473274297687e-05,
      "loss": 0.8356,
      "step": 504850
    },
    {
      "epoch": 1.769408433131109,
      "grad_norm": 2.9375,
      "learning_rate": 2.2788824245633985e-05,
      "loss": 0.7572,
      "step": 504860
    },
    {
      "epoch": 1.7694434806380048,
      "grad_norm": 2.546875,
      "learning_rate": 2.2788175216970283e-05,
      "loss": 0.8043,
      "step": 504870
    },
    {
      "epoch": 1.7694785281449004,
      "grad_norm": 3.0,
      "learning_rate": 2.278752618830658e-05,
      "loss": 0.8731,
      "step": 504880
    },
    {
      "epoch": 1.769513575651796,
      "grad_norm": 3.015625,
      "learning_rate": 2.2786877159642882e-05,
      "loss": 0.8428,
      "step": 504890
    },
    {
      "epoch": 1.7695486231586917,
      "grad_norm": 2.78125,
      "learning_rate": 2.278622813097918e-05,
      "loss": 0.9321,
      "step": 504900
    },
    {
      "epoch": 1.7695836706655872,
      "grad_norm": 2.765625,
      "learning_rate": 2.2785579102315478e-05,
      "loss": 0.8316,
      "step": 504910
    },
    {
      "epoch": 1.7696187181724827,
      "grad_norm": 3.078125,
      "learning_rate": 2.2784930073651776e-05,
      "loss": 0.7948,
      "step": 504920
    },
    {
      "epoch": 1.7696537656793785,
      "grad_norm": 2.78125,
      "learning_rate": 2.278428104498807e-05,
      "loss": 0.8541,
      "step": 504930
    },
    {
      "epoch": 1.769688813186274,
      "grad_norm": 2.71875,
      "learning_rate": 2.278363201632437e-05,
      "loss": 0.8272,
      "step": 504940
    },
    {
      "epoch": 1.7697238606931696,
      "grad_norm": 2.609375,
      "learning_rate": 2.2782982987660667e-05,
      "loss": 0.8079,
      "step": 504950
    },
    {
      "epoch": 1.7697589082000653,
      "grad_norm": 2.6875,
      "learning_rate": 2.2782333958996965e-05,
      "loss": 0.8778,
      "step": 504960
    },
    {
      "epoch": 1.7697939557069606,
      "grad_norm": 2.90625,
      "learning_rate": 2.2781684930333263e-05,
      "loss": 0.7423,
      "step": 504970
    },
    {
      "epoch": 1.7698290032138564,
      "grad_norm": 3.09375,
      "learning_rate": 2.278103590166956e-05,
      "loss": 0.8425,
      "step": 504980
    },
    {
      "epoch": 1.769864050720752,
      "grad_norm": 3.15625,
      "learning_rate": 2.278038687300586e-05,
      "loss": 0.9472,
      "step": 504990
    },
    {
      "epoch": 1.7698990982276475,
      "grad_norm": 3.1875,
      "learning_rate": 2.2779737844342157e-05,
      "loss": 0.7795,
      "step": 505000
    },
    {
      "epoch": 1.7698990982276475,
      "eval_loss": 0.7782299518585205,
      "eval_runtime": 552.8585,
      "eval_samples_per_second": 688.126,
      "eval_steps_per_second": 57.344,
      "step": 505000
    },
    {
      "epoch": 1.7699341457345432,
      "grad_norm": 3.140625,
      "learning_rate": 2.2779088815678458e-05,
      "loss": 0.7347,
      "step": 505010
    },
    {
      "epoch": 1.7699691932414388,
      "grad_norm": 2.96875,
      "learning_rate": 2.2778439787014756e-05,
      "loss": 0.7844,
      "step": 505020
    },
    {
      "epoch": 1.7700042407483343,
      "grad_norm": 2.65625,
      "learning_rate": 2.2777790758351054e-05,
      "loss": 0.8951,
      "step": 505030
    },
    {
      "epoch": 1.77003928825523,
      "grad_norm": 2.78125,
      "learning_rate": 2.2777141729687352e-05,
      "loss": 0.8101,
      "step": 505040
    },
    {
      "epoch": 1.7700743357621256,
      "grad_norm": 2.671875,
      "learning_rate": 2.277649270102365e-05,
      "loss": 0.8022,
      "step": 505050
    },
    {
      "epoch": 1.7701093832690211,
      "grad_norm": 2.625,
      "learning_rate": 2.2775843672359948e-05,
      "loss": 0.7781,
      "step": 505060
    },
    {
      "epoch": 1.7701444307759169,
      "grad_norm": 2.921875,
      "learning_rate": 2.2775194643696246e-05,
      "loss": 0.8169,
      "step": 505070
    },
    {
      "epoch": 1.7701794782828122,
      "grad_norm": 3.171875,
      "learning_rate": 2.2774545615032544e-05,
      "loss": 0.7876,
      "step": 505080
    },
    {
      "epoch": 1.770214525789708,
      "grad_norm": 3.03125,
      "learning_rate": 2.2773896586368842e-05,
      "loss": 0.9117,
      "step": 505090
    },
    {
      "epoch": 1.7702495732966037,
      "grad_norm": 2.75,
      "learning_rate": 2.277324755770514e-05,
      "loss": 0.8052,
      "step": 505100
    },
    {
      "epoch": 1.770284620803499,
      "grad_norm": 3.03125,
      "learning_rate": 2.2772598529041438e-05,
      "loss": 0.8614,
      "step": 505110
    },
    {
      "epoch": 1.7703196683103948,
      "grad_norm": 3.140625,
      "learning_rate": 2.2771949500377736e-05,
      "loss": 0.8085,
      "step": 505120
    },
    {
      "epoch": 1.7703547158172903,
      "grad_norm": 2.84375,
      "learning_rate": 2.2771300471714034e-05,
      "loss": 0.8971,
      "step": 505130
    },
    {
      "epoch": 1.7703897633241859,
      "grad_norm": 2.625,
      "learning_rate": 2.2770651443050332e-05,
      "loss": 0.714,
      "step": 505140
    },
    {
      "epoch": 1.7704248108310816,
      "grad_norm": 2.90625,
      "learning_rate": 2.277000241438663e-05,
      "loss": 0.842,
      "step": 505150
    },
    {
      "epoch": 1.7704598583379771,
      "grad_norm": 3.125,
      "learning_rate": 2.2769353385722928e-05,
      "loss": 0.7867,
      "step": 505160
    },
    {
      "epoch": 1.7704949058448727,
      "grad_norm": 2.765625,
      "learning_rate": 2.2768704357059226e-05,
      "loss": 0.7824,
      "step": 505170
    },
    {
      "epoch": 1.7705299533517684,
      "grad_norm": 3.109375,
      "learning_rate": 2.2768055328395524e-05,
      "loss": 0.8728,
      "step": 505180
    },
    {
      "epoch": 1.770565000858664,
      "grad_norm": 2.796875,
      "learning_rate": 2.2767406299731822e-05,
      "loss": 0.7887,
      "step": 505190
    },
    {
      "epoch": 1.7706000483655595,
      "grad_norm": 2.625,
      "learning_rate": 2.276675727106812e-05,
      "loss": 0.8887,
      "step": 505200
    },
    {
      "epoch": 1.7706350958724553,
      "grad_norm": 3.09375,
      "learning_rate": 2.2766108242404418e-05,
      "loss": 0.822,
      "step": 505210
    },
    {
      "epoch": 1.7706701433793506,
      "grad_norm": 3.390625,
      "learning_rate": 2.2765459213740716e-05,
      "loss": 0.847,
      "step": 505220
    },
    {
      "epoch": 1.7707051908862463,
      "grad_norm": 2.796875,
      "learning_rate": 2.2764810185077014e-05,
      "loss": 0.789,
      "step": 505230
    },
    {
      "epoch": 1.7707402383931419,
      "grad_norm": 2.640625,
      "learning_rate": 2.2764161156413312e-05,
      "loss": 0.7595,
      "step": 505240
    },
    {
      "epoch": 1.7707752859000374,
      "grad_norm": 2.828125,
      "learning_rate": 2.276351212774961e-05,
      "loss": 0.8442,
      "step": 505250
    },
    {
      "epoch": 1.7708103334069332,
      "grad_norm": 2.640625,
      "learning_rate": 2.276286309908591e-05,
      "loss": 0.7602,
      "step": 505260
    },
    {
      "epoch": 1.7708453809138287,
      "grad_norm": 2.765625,
      "learning_rate": 2.276221407042221e-05,
      "loss": 0.8312,
      "step": 505270
    },
    {
      "epoch": 1.7708804284207242,
      "grad_norm": 3.046875,
      "learning_rate": 2.2761565041758507e-05,
      "loss": 0.8422,
      "step": 505280
    },
    {
      "epoch": 1.77091547592762,
      "grad_norm": 3.03125,
      "learning_rate": 2.2760916013094805e-05,
      "loss": 0.9265,
      "step": 505290
    },
    {
      "epoch": 1.7709505234345155,
      "grad_norm": 2.546875,
      "learning_rate": 2.2760266984431103e-05,
      "loss": 0.8172,
      "step": 505300
    },
    {
      "epoch": 1.770985570941411,
      "grad_norm": 2.890625,
      "learning_rate": 2.2759617955767398e-05,
      "loss": 0.8535,
      "step": 505310
    },
    {
      "epoch": 1.7710206184483068,
      "grad_norm": 2.515625,
      "learning_rate": 2.2758968927103696e-05,
      "loss": 0.7624,
      "step": 505320
    },
    {
      "epoch": 1.7710556659552021,
      "grad_norm": 2.625,
      "learning_rate": 2.2758319898439994e-05,
      "loss": 0.8101,
      "step": 505330
    },
    {
      "epoch": 1.771090713462098,
      "grad_norm": 2.84375,
      "learning_rate": 2.2757670869776292e-05,
      "loss": 0.7641,
      "step": 505340
    },
    {
      "epoch": 1.7711257609689934,
      "grad_norm": 2.65625,
      "learning_rate": 2.275702184111259e-05,
      "loss": 0.8707,
      "step": 505350
    },
    {
      "epoch": 1.771160808475889,
      "grad_norm": 2.703125,
      "learning_rate": 2.2756372812448888e-05,
      "loss": 0.7934,
      "step": 505360
    },
    {
      "epoch": 1.7711958559827847,
      "grad_norm": 2.484375,
      "learning_rate": 2.275572378378519e-05,
      "loss": 0.7224,
      "step": 505370
    },
    {
      "epoch": 1.7712309034896803,
      "grad_norm": 2.90625,
      "learning_rate": 2.2755074755121487e-05,
      "loss": 0.8667,
      "step": 505380
    },
    {
      "epoch": 1.7712659509965758,
      "grad_norm": 2.90625,
      "learning_rate": 2.2754425726457785e-05,
      "loss": 0.82,
      "step": 505390
    },
    {
      "epoch": 1.7713009985034716,
      "grad_norm": 2.65625,
      "learning_rate": 2.2753776697794083e-05,
      "loss": 0.7801,
      "step": 505400
    },
    {
      "epoch": 1.771336046010367,
      "grad_norm": 2.484375,
      "learning_rate": 2.275312766913038e-05,
      "loss": 0.8518,
      "step": 505410
    },
    {
      "epoch": 1.7713710935172626,
      "grad_norm": 2.625,
      "learning_rate": 2.275247864046668e-05,
      "loss": 0.8011,
      "step": 505420
    },
    {
      "epoch": 1.7714061410241584,
      "grad_norm": 2.875,
      "learning_rate": 2.2751829611802977e-05,
      "loss": 0.8745,
      "step": 505430
    },
    {
      "epoch": 1.7714411885310537,
      "grad_norm": 2.828125,
      "learning_rate": 2.2751180583139275e-05,
      "loss": 0.8158,
      "step": 505440
    },
    {
      "epoch": 1.7714762360379495,
      "grad_norm": 2.828125,
      "learning_rate": 2.2750531554475573e-05,
      "loss": 0.8701,
      "step": 505450
    },
    {
      "epoch": 1.771511283544845,
      "grad_norm": 2.984375,
      "learning_rate": 2.274988252581187e-05,
      "loss": 0.8225,
      "step": 505460
    },
    {
      "epoch": 1.7715463310517405,
      "grad_norm": 3.03125,
      "learning_rate": 2.274923349714817e-05,
      "loss": 0.8173,
      "step": 505470
    },
    {
      "epoch": 1.7715813785586363,
      "grad_norm": 2.90625,
      "learning_rate": 2.2748584468484467e-05,
      "loss": 0.8331,
      "step": 505480
    },
    {
      "epoch": 1.7716164260655318,
      "grad_norm": 3.140625,
      "learning_rate": 2.2747935439820765e-05,
      "loss": 0.9008,
      "step": 505490
    },
    {
      "epoch": 1.7716514735724274,
      "grad_norm": 2.828125,
      "learning_rate": 2.2747286411157063e-05,
      "loss": 0.9126,
      "step": 505500
    },
    {
      "epoch": 1.7716865210793231,
      "grad_norm": 2.59375,
      "learning_rate": 2.274663738249336e-05,
      "loss": 0.8323,
      "step": 505510
    },
    {
      "epoch": 1.7717215685862187,
      "grad_norm": 3.203125,
      "learning_rate": 2.274598835382966e-05,
      "loss": 0.7988,
      "step": 505520
    },
    {
      "epoch": 1.7717566160931142,
      "grad_norm": 2.984375,
      "learning_rate": 2.2745339325165957e-05,
      "loss": 0.7978,
      "step": 505530
    },
    {
      "epoch": 1.77179166360001,
      "grad_norm": 2.890625,
      "learning_rate": 2.2744690296502255e-05,
      "loss": 0.7616,
      "step": 505540
    },
    {
      "epoch": 1.7718267111069053,
      "grad_norm": 2.984375,
      "learning_rate": 2.2744041267838553e-05,
      "loss": 0.8414,
      "step": 505550
    },
    {
      "epoch": 1.771861758613801,
      "grad_norm": 3.03125,
      "learning_rate": 2.274339223917485e-05,
      "loss": 0.8213,
      "step": 505560
    },
    {
      "epoch": 1.7718968061206966,
      "grad_norm": 3.03125,
      "learning_rate": 2.274274321051115e-05,
      "loss": 0.8587,
      "step": 505570
    },
    {
      "epoch": 1.771931853627592,
      "grad_norm": 3.5625,
      "learning_rate": 2.2742094181847447e-05,
      "loss": 0.8688,
      "step": 505580
    },
    {
      "epoch": 1.7719669011344878,
      "grad_norm": 2.96875,
      "learning_rate": 2.2741445153183745e-05,
      "loss": 0.8034,
      "step": 505590
    },
    {
      "epoch": 1.7720019486413834,
      "grad_norm": 2.78125,
      "learning_rate": 2.2740796124520043e-05,
      "loss": 0.813,
      "step": 505600
    },
    {
      "epoch": 1.772036996148279,
      "grad_norm": 2.703125,
      "learning_rate": 2.274014709585634e-05,
      "loss": 0.8226,
      "step": 505610
    },
    {
      "epoch": 1.7720720436551747,
      "grad_norm": 3.140625,
      "learning_rate": 2.273949806719264e-05,
      "loss": 0.8902,
      "step": 505620
    },
    {
      "epoch": 1.7721070911620702,
      "grad_norm": 2.71875,
      "learning_rate": 2.273884903852894e-05,
      "loss": 0.8299,
      "step": 505630
    },
    {
      "epoch": 1.7721421386689657,
      "grad_norm": 3.359375,
      "learning_rate": 2.273820000986524e-05,
      "loss": 0.7439,
      "step": 505640
    },
    {
      "epoch": 1.7721771861758615,
      "grad_norm": 2.5625,
      "learning_rate": 2.2737550981201537e-05,
      "loss": 0.7437,
      "step": 505650
    },
    {
      "epoch": 1.7722122336827568,
      "grad_norm": 3.375,
      "learning_rate": 2.2736901952537835e-05,
      "loss": 0.863,
      "step": 505660
    },
    {
      "epoch": 1.7722472811896526,
      "grad_norm": 2.828125,
      "learning_rate": 2.2736252923874133e-05,
      "loss": 0.8138,
      "step": 505670
    },
    {
      "epoch": 1.7722823286965483,
      "grad_norm": 3.234375,
      "learning_rate": 2.273560389521043e-05,
      "loss": 0.854,
      "step": 505680
    },
    {
      "epoch": 1.7723173762034437,
      "grad_norm": 3.078125,
      "learning_rate": 2.2734954866546725e-05,
      "loss": 0.8061,
      "step": 505690
    },
    {
      "epoch": 1.7723524237103394,
      "grad_norm": 2.671875,
      "learning_rate": 2.2734305837883023e-05,
      "loss": 0.8346,
      "step": 505700
    },
    {
      "epoch": 1.772387471217235,
      "grad_norm": 2.875,
      "learning_rate": 2.273365680921932e-05,
      "loss": 0.8101,
      "step": 505710
    },
    {
      "epoch": 1.7724225187241305,
      "grad_norm": 3.234375,
      "learning_rate": 2.273300778055562e-05,
      "loss": 0.8715,
      "step": 505720
    },
    {
      "epoch": 1.7724575662310262,
      "grad_norm": 3.0625,
      "learning_rate": 2.2732358751891917e-05,
      "loss": 0.7775,
      "step": 505730
    },
    {
      "epoch": 1.7724926137379218,
      "grad_norm": 2.59375,
      "learning_rate": 2.273170972322822e-05,
      "loss": 0.7825,
      "step": 505740
    },
    {
      "epoch": 1.7725276612448173,
      "grad_norm": 2.34375,
      "learning_rate": 2.2731060694564517e-05,
      "loss": 0.7641,
      "step": 505750
    },
    {
      "epoch": 1.772562708751713,
      "grad_norm": 2.546875,
      "learning_rate": 2.2730411665900815e-05,
      "loss": 0.7445,
      "step": 505760
    },
    {
      "epoch": 1.7725977562586084,
      "grad_norm": 2.3125,
      "learning_rate": 2.2729762637237113e-05,
      "loss": 0.8202,
      "step": 505770
    },
    {
      "epoch": 1.7726328037655041,
      "grad_norm": 2.6875,
      "learning_rate": 2.272911360857341e-05,
      "loss": 0.8669,
      "step": 505780
    },
    {
      "epoch": 1.7726678512724,
      "grad_norm": 3.171875,
      "learning_rate": 2.272846457990971e-05,
      "loss": 0.8833,
      "step": 505790
    },
    {
      "epoch": 1.7727028987792952,
      "grad_norm": 3.015625,
      "learning_rate": 2.2727815551246007e-05,
      "loss": 0.8592,
      "step": 505800
    },
    {
      "epoch": 1.772737946286191,
      "grad_norm": 2.515625,
      "learning_rate": 2.2727166522582305e-05,
      "loss": 0.7696,
      "step": 505810
    },
    {
      "epoch": 1.7727729937930865,
      "grad_norm": 3.125,
      "learning_rate": 2.2726517493918603e-05,
      "loss": 0.9243,
      "step": 505820
    },
    {
      "epoch": 1.772808041299982,
      "grad_norm": 2.65625,
      "learning_rate": 2.27258684652549e-05,
      "loss": 0.8728,
      "step": 505830
    },
    {
      "epoch": 1.7728430888068778,
      "grad_norm": 3.1875,
      "learning_rate": 2.27252194365912e-05,
      "loss": 0.8626,
      "step": 505840
    },
    {
      "epoch": 1.7728781363137733,
      "grad_norm": 3.078125,
      "learning_rate": 2.2724570407927497e-05,
      "loss": 0.8615,
      "step": 505850
    },
    {
      "epoch": 1.7729131838206689,
      "grad_norm": 3.375,
      "learning_rate": 2.2723921379263795e-05,
      "loss": 0.8335,
      "step": 505860
    },
    {
      "epoch": 1.7729482313275646,
      "grad_norm": 3.03125,
      "learning_rate": 2.2723272350600093e-05,
      "loss": 0.8384,
      "step": 505870
    },
    {
      "epoch": 1.7729832788344602,
      "grad_norm": 2.953125,
      "learning_rate": 2.272262332193639e-05,
      "loss": 0.8237,
      "step": 505880
    },
    {
      "epoch": 1.7730183263413557,
      "grad_norm": 3.125,
      "learning_rate": 2.272197429327269e-05,
      "loss": 0.8242,
      "step": 505890
    },
    {
      "epoch": 1.7730533738482515,
      "grad_norm": 3.15625,
      "learning_rate": 2.2721325264608987e-05,
      "loss": 0.9161,
      "step": 505900
    },
    {
      "epoch": 1.7730884213551468,
      "grad_norm": 2.921875,
      "learning_rate": 2.2720676235945285e-05,
      "loss": 0.7746,
      "step": 505910
    },
    {
      "epoch": 1.7731234688620425,
      "grad_norm": 2.8125,
      "learning_rate": 2.2720027207281583e-05,
      "loss": 0.8294,
      "step": 505920
    },
    {
      "epoch": 1.773158516368938,
      "grad_norm": 3.046875,
      "learning_rate": 2.271937817861788e-05,
      "loss": 0.7497,
      "step": 505930
    },
    {
      "epoch": 1.7731935638758336,
      "grad_norm": 2.703125,
      "learning_rate": 2.271872914995418e-05,
      "loss": 0.8143,
      "step": 505940
    },
    {
      "epoch": 1.7732286113827294,
      "grad_norm": 2.796875,
      "learning_rate": 2.2718080121290477e-05,
      "loss": 0.8828,
      "step": 505950
    },
    {
      "epoch": 1.773263658889625,
      "grad_norm": 2.890625,
      "learning_rate": 2.2717431092626775e-05,
      "loss": 0.8956,
      "step": 505960
    },
    {
      "epoch": 1.7732987063965204,
      "grad_norm": 3.0625,
      "learning_rate": 2.2716782063963073e-05,
      "loss": 0.8194,
      "step": 505970
    },
    {
      "epoch": 1.7733337539034162,
      "grad_norm": 3.0625,
      "learning_rate": 2.271613303529937e-05,
      "loss": 0.8035,
      "step": 505980
    },
    {
      "epoch": 1.7733688014103117,
      "grad_norm": 3.0625,
      "learning_rate": 2.2715484006635672e-05,
      "loss": 0.8607,
      "step": 505990
    },
    {
      "epoch": 1.7734038489172073,
      "grad_norm": 3.203125,
      "learning_rate": 2.271483497797197e-05,
      "loss": 0.9048,
      "step": 506000
    },
    {
      "epoch": 1.773438896424103,
      "grad_norm": 2.75,
      "learning_rate": 2.2714185949308268e-05,
      "loss": 0.7809,
      "step": 506010
    },
    {
      "epoch": 1.7734739439309983,
      "grad_norm": 3.15625,
      "learning_rate": 2.2713536920644566e-05,
      "loss": 0.9177,
      "step": 506020
    },
    {
      "epoch": 1.773508991437894,
      "grad_norm": 3.0,
      "learning_rate": 2.2712887891980864e-05,
      "loss": 0.7881,
      "step": 506030
    },
    {
      "epoch": 1.7735440389447896,
      "grad_norm": 3.0,
      "learning_rate": 2.2712238863317162e-05,
      "loss": 0.9751,
      "step": 506040
    },
    {
      "epoch": 1.7735790864516852,
      "grad_norm": 3.046875,
      "learning_rate": 2.271158983465346e-05,
      "loss": 0.8594,
      "step": 506050
    },
    {
      "epoch": 1.773614133958581,
      "grad_norm": 2.765625,
      "learning_rate": 2.2710940805989755e-05,
      "loss": 0.7864,
      "step": 506060
    },
    {
      "epoch": 1.7736491814654765,
      "grad_norm": 2.828125,
      "learning_rate": 2.2710291777326053e-05,
      "loss": 0.8082,
      "step": 506070
    },
    {
      "epoch": 1.773684228972372,
      "grad_norm": 2.65625,
      "learning_rate": 2.270964274866235e-05,
      "loss": 0.7627,
      "step": 506080
    },
    {
      "epoch": 1.7737192764792677,
      "grad_norm": 3.09375,
      "learning_rate": 2.270899371999865e-05,
      "loss": 0.819,
      "step": 506090
    },
    {
      "epoch": 1.7737543239861633,
      "grad_norm": 2.9375,
      "learning_rate": 2.2708344691334947e-05,
      "loss": 0.9213,
      "step": 506100
    },
    {
      "epoch": 1.7737893714930588,
      "grad_norm": 2.84375,
      "learning_rate": 2.2707695662671248e-05,
      "loss": 0.7776,
      "step": 506110
    },
    {
      "epoch": 1.7738244189999546,
      "grad_norm": 2.4375,
      "learning_rate": 2.2707046634007546e-05,
      "loss": 0.778,
      "step": 506120
    },
    {
      "epoch": 1.77385946650685,
      "grad_norm": 3.3125,
      "learning_rate": 2.2706397605343844e-05,
      "loss": 0.8464,
      "step": 506130
    },
    {
      "epoch": 1.7738945140137456,
      "grad_norm": 3.734375,
      "learning_rate": 2.2705748576680142e-05,
      "loss": 0.8642,
      "step": 506140
    },
    {
      "epoch": 1.7739295615206412,
      "grad_norm": 3.0,
      "learning_rate": 2.270509954801644e-05,
      "loss": 0.8638,
      "step": 506150
    },
    {
      "epoch": 1.7739646090275367,
      "grad_norm": 2.8125,
      "learning_rate": 2.2704450519352738e-05,
      "loss": 0.9105,
      "step": 506160
    },
    {
      "epoch": 1.7739996565344325,
      "grad_norm": 3.140625,
      "learning_rate": 2.2703801490689036e-05,
      "loss": 0.7712,
      "step": 506170
    },
    {
      "epoch": 1.774034704041328,
      "grad_norm": 2.4375,
      "learning_rate": 2.2703152462025334e-05,
      "loss": 0.8934,
      "step": 506180
    },
    {
      "epoch": 1.7740697515482236,
      "grad_norm": 3.203125,
      "learning_rate": 2.2702503433361632e-05,
      "loss": 0.8748,
      "step": 506190
    },
    {
      "epoch": 1.7741047990551193,
      "grad_norm": 3.125,
      "learning_rate": 2.270185440469793e-05,
      "loss": 0.7831,
      "step": 506200
    },
    {
      "epoch": 1.7741398465620148,
      "grad_norm": 3.015625,
      "learning_rate": 2.2701205376034228e-05,
      "loss": 0.9198,
      "step": 506210
    },
    {
      "epoch": 1.7741748940689104,
      "grad_norm": 2.78125,
      "learning_rate": 2.2700556347370526e-05,
      "loss": 0.7805,
      "step": 506220
    },
    {
      "epoch": 1.7742099415758061,
      "grad_norm": 2.46875,
      "learning_rate": 2.2699907318706824e-05,
      "loss": 0.8138,
      "step": 506230
    },
    {
      "epoch": 1.7742449890827015,
      "grad_norm": 2.78125,
      "learning_rate": 2.2699258290043122e-05,
      "loss": 0.871,
      "step": 506240
    },
    {
      "epoch": 1.7742800365895972,
      "grad_norm": 2.875,
      "learning_rate": 2.269860926137942e-05,
      "loss": 0.8361,
      "step": 506250
    },
    {
      "epoch": 1.7743150840964927,
      "grad_norm": 2.46875,
      "learning_rate": 2.2697960232715718e-05,
      "loss": 0.7752,
      "step": 506260
    },
    {
      "epoch": 1.7743501316033883,
      "grad_norm": 2.625,
      "learning_rate": 2.2697311204052016e-05,
      "loss": 0.7727,
      "step": 506270
    },
    {
      "epoch": 1.774385179110284,
      "grad_norm": 2.8125,
      "learning_rate": 2.2696662175388314e-05,
      "loss": 0.8226,
      "step": 506280
    },
    {
      "epoch": 1.7744202266171796,
      "grad_norm": 2.359375,
      "learning_rate": 2.2696013146724612e-05,
      "loss": 0.7352,
      "step": 506290
    },
    {
      "epoch": 1.774455274124075,
      "grad_norm": 2.671875,
      "learning_rate": 2.269536411806091e-05,
      "loss": 0.7943,
      "step": 506300
    },
    {
      "epoch": 1.7744903216309709,
      "grad_norm": 2.828125,
      "learning_rate": 2.2694715089397208e-05,
      "loss": 0.7964,
      "step": 506310
    },
    {
      "epoch": 1.7745253691378664,
      "grad_norm": 2.953125,
      "learning_rate": 2.2694066060733506e-05,
      "loss": 0.7999,
      "step": 506320
    },
    {
      "epoch": 1.774560416644762,
      "grad_norm": 3.078125,
      "learning_rate": 2.2693417032069804e-05,
      "loss": 0.8335,
      "step": 506330
    },
    {
      "epoch": 1.7745954641516577,
      "grad_norm": 2.9375,
      "learning_rate": 2.2692768003406102e-05,
      "loss": 0.8172,
      "step": 506340
    },
    {
      "epoch": 1.774630511658553,
      "grad_norm": 2.828125,
      "learning_rate": 2.26921189747424e-05,
      "loss": 0.7716,
      "step": 506350
    },
    {
      "epoch": 1.7746655591654488,
      "grad_norm": 2.921875,
      "learning_rate": 2.26914699460787e-05,
      "loss": 0.8864,
      "step": 506360
    },
    {
      "epoch": 1.7747006066723445,
      "grad_norm": 2.8125,
      "learning_rate": 2.2690820917415e-05,
      "loss": 0.7921,
      "step": 506370
    },
    {
      "epoch": 1.7747356541792398,
      "grad_norm": 2.8125,
      "learning_rate": 2.2690171888751298e-05,
      "loss": 0.8216,
      "step": 506380
    },
    {
      "epoch": 1.7747707016861356,
      "grad_norm": 2.71875,
      "learning_rate": 2.2689522860087596e-05,
      "loss": 0.7997,
      "step": 506390
    },
    {
      "epoch": 1.7748057491930311,
      "grad_norm": 2.84375,
      "learning_rate": 2.2688873831423894e-05,
      "loss": 0.8033,
      "step": 506400
    },
    {
      "epoch": 1.7748407966999267,
      "grad_norm": 2.75,
      "learning_rate": 2.268822480276019e-05,
      "loss": 0.7275,
      "step": 506410
    },
    {
      "epoch": 1.7748758442068224,
      "grad_norm": 3.125,
      "learning_rate": 2.268757577409649e-05,
      "loss": 0.7931,
      "step": 506420
    },
    {
      "epoch": 1.774910891713718,
      "grad_norm": 3.375,
      "learning_rate": 2.2686926745432788e-05,
      "loss": 0.8558,
      "step": 506430
    },
    {
      "epoch": 1.7749459392206135,
      "grad_norm": 3.0,
      "learning_rate": 2.2686277716769082e-05,
      "loss": 0.7845,
      "step": 506440
    },
    {
      "epoch": 1.7749809867275093,
      "grad_norm": 2.796875,
      "learning_rate": 2.268562868810538e-05,
      "loss": 0.7471,
      "step": 506450
    },
    {
      "epoch": 1.7750160342344046,
      "grad_norm": 2.734375,
      "learning_rate": 2.2684979659441678e-05,
      "loss": 0.7924,
      "step": 506460
    },
    {
      "epoch": 1.7750510817413003,
      "grad_norm": 3.390625,
      "learning_rate": 2.268433063077798e-05,
      "loss": 0.8621,
      "step": 506470
    },
    {
      "epoch": 1.775086129248196,
      "grad_norm": 2.796875,
      "learning_rate": 2.2683681602114278e-05,
      "loss": 0.8442,
      "step": 506480
    },
    {
      "epoch": 1.7751211767550914,
      "grad_norm": 2.484375,
      "learning_rate": 2.2683032573450576e-05,
      "loss": 0.8016,
      "step": 506490
    },
    {
      "epoch": 1.7751562242619872,
      "grad_norm": 2.765625,
      "learning_rate": 2.2682383544786874e-05,
      "loss": 0.7921,
      "step": 506500
    },
    {
      "epoch": 1.7751912717688827,
      "grad_norm": 2.71875,
      "learning_rate": 2.268173451612317e-05,
      "loss": 0.8295,
      "step": 506510
    },
    {
      "epoch": 1.7752263192757782,
      "grad_norm": 2.46875,
      "learning_rate": 2.268108548745947e-05,
      "loss": 0.8199,
      "step": 506520
    },
    {
      "epoch": 1.775261366782674,
      "grad_norm": 2.765625,
      "learning_rate": 2.2680436458795768e-05,
      "loss": 0.7846,
      "step": 506530
    },
    {
      "epoch": 1.7752964142895695,
      "grad_norm": 2.96875,
      "learning_rate": 2.2679787430132066e-05,
      "loss": 0.9203,
      "step": 506540
    },
    {
      "epoch": 1.775331461796465,
      "grad_norm": 3.203125,
      "learning_rate": 2.2679138401468364e-05,
      "loss": 0.7876,
      "step": 506550
    },
    {
      "epoch": 1.7753665093033608,
      "grad_norm": 3.03125,
      "learning_rate": 2.267848937280466e-05,
      "loss": 0.934,
      "step": 506560
    },
    {
      "epoch": 1.7754015568102564,
      "grad_norm": 2.90625,
      "learning_rate": 2.267784034414096e-05,
      "loss": 0.7507,
      "step": 506570
    },
    {
      "epoch": 1.775436604317152,
      "grad_norm": 2.796875,
      "learning_rate": 2.2677191315477258e-05,
      "loss": 0.7898,
      "step": 506580
    },
    {
      "epoch": 1.7754716518240476,
      "grad_norm": 2.59375,
      "learning_rate": 2.2676542286813556e-05,
      "loss": 0.7593,
      "step": 506590
    },
    {
      "epoch": 1.775506699330943,
      "grad_norm": 2.625,
      "learning_rate": 2.2675893258149854e-05,
      "loss": 0.8161,
      "step": 506600
    },
    {
      "epoch": 1.7755417468378387,
      "grad_norm": 3.0625,
      "learning_rate": 2.2675244229486155e-05,
      "loss": 0.7689,
      "step": 506610
    },
    {
      "epoch": 1.7755767943447343,
      "grad_norm": 2.953125,
      "learning_rate": 2.2674595200822453e-05,
      "loss": 0.8089,
      "step": 506620
    },
    {
      "epoch": 1.7756118418516298,
      "grad_norm": 3.0,
      "learning_rate": 2.2673946172158748e-05,
      "loss": 0.9038,
      "step": 506630
    },
    {
      "epoch": 1.7756468893585255,
      "grad_norm": 3.046875,
      "learning_rate": 2.2673297143495046e-05,
      "loss": 0.8127,
      "step": 506640
    },
    {
      "epoch": 1.775681936865421,
      "grad_norm": 2.765625,
      "learning_rate": 2.2672648114831344e-05,
      "loss": 0.791,
      "step": 506650
    },
    {
      "epoch": 1.7757169843723166,
      "grad_norm": 2.828125,
      "learning_rate": 2.267199908616764e-05,
      "loss": 0.7902,
      "step": 506660
    },
    {
      "epoch": 1.7757520318792124,
      "grad_norm": 3.125,
      "learning_rate": 2.267135005750394e-05,
      "loss": 0.9517,
      "step": 506670
    },
    {
      "epoch": 1.775787079386108,
      "grad_norm": 3.09375,
      "learning_rate": 2.2670701028840238e-05,
      "loss": 0.8486,
      "step": 506680
    },
    {
      "epoch": 1.7758221268930034,
      "grad_norm": 3.0,
      "learning_rate": 2.2670052000176536e-05,
      "loss": 0.8259,
      "step": 506690
    },
    {
      "epoch": 1.7758571743998992,
      "grad_norm": 2.5625,
      "learning_rate": 2.2669402971512834e-05,
      "loss": 0.8222,
      "step": 506700
    },
    {
      "epoch": 1.7758922219067945,
      "grad_norm": 3.4375,
      "learning_rate": 2.266875394284913e-05,
      "loss": 0.8406,
      "step": 506710
    },
    {
      "epoch": 1.7759272694136903,
      "grad_norm": 2.859375,
      "learning_rate": 2.266810491418543e-05,
      "loss": 0.7972,
      "step": 506720
    },
    {
      "epoch": 1.7759623169205858,
      "grad_norm": 3.0,
      "learning_rate": 2.266745588552173e-05,
      "loss": 0.8229,
      "step": 506730
    },
    {
      "epoch": 1.7759973644274814,
      "grad_norm": 2.984375,
      "learning_rate": 2.266680685685803e-05,
      "loss": 0.8312,
      "step": 506740
    },
    {
      "epoch": 1.776032411934377,
      "grad_norm": 3.171875,
      "learning_rate": 2.2666157828194327e-05,
      "loss": 0.7976,
      "step": 506750
    },
    {
      "epoch": 1.7760674594412726,
      "grad_norm": 2.953125,
      "learning_rate": 2.2665508799530625e-05,
      "loss": 0.7955,
      "step": 506760
    },
    {
      "epoch": 1.7761025069481682,
      "grad_norm": 2.78125,
      "learning_rate": 2.2664859770866923e-05,
      "loss": 0.8551,
      "step": 506770
    },
    {
      "epoch": 1.776137554455064,
      "grad_norm": 3.15625,
      "learning_rate": 2.266421074220322e-05,
      "loss": 0.7896,
      "step": 506780
    },
    {
      "epoch": 1.7761726019619595,
      "grad_norm": 2.71875,
      "learning_rate": 2.266356171353952e-05,
      "loss": 0.8184,
      "step": 506790
    },
    {
      "epoch": 1.776207649468855,
      "grad_norm": 3.109375,
      "learning_rate": 2.2662912684875817e-05,
      "loss": 0.8153,
      "step": 506800
    },
    {
      "epoch": 1.7762426969757508,
      "grad_norm": 2.5625,
      "learning_rate": 2.266226365621211e-05,
      "loss": 0.8032,
      "step": 506810
    },
    {
      "epoch": 1.776277744482646,
      "grad_norm": 3.484375,
      "learning_rate": 2.266161462754841e-05,
      "loss": 0.8503,
      "step": 506820
    },
    {
      "epoch": 1.7763127919895418,
      "grad_norm": 2.859375,
      "learning_rate": 2.2660965598884708e-05,
      "loss": 0.8626,
      "step": 506830
    },
    {
      "epoch": 1.7763478394964374,
      "grad_norm": 2.71875,
      "learning_rate": 2.266031657022101e-05,
      "loss": 0.7894,
      "step": 506840
    },
    {
      "epoch": 1.776382887003333,
      "grad_norm": 3.0625,
      "learning_rate": 2.2659667541557307e-05,
      "loss": 0.8026,
      "step": 506850
    },
    {
      "epoch": 1.7764179345102287,
      "grad_norm": 2.6875,
      "learning_rate": 2.2659018512893605e-05,
      "loss": 0.8282,
      "step": 506860
    },
    {
      "epoch": 1.7764529820171242,
      "grad_norm": 2.8125,
      "learning_rate": 2.2658369484229903e-05,
      "loss": 0.8535,
      "step": 506870
    },
    {
      "epoch": 1.7764880295240197,
      "grad_norm": 2.953125,
      "learning_rate": 2.26577204555662e-05,
      "loss": 0.8179,
      "step": 506880
    },
    {
      "epoch": 1.7765230770309155,
      "grad_norm": 3.1875,
      "learning_rate": 2.26570714269025e-05,
      "loss": 0.8768,
      "step": 506890
    },
    {
      "epoch": 1.776558124537811,
      "grad_norm": 2.9375,
      "learning_rate": 2.2656422398238797e-05,
      "loss": 0.8762,
      "step": 506900
    },
    {
      "epoch": 1.7765931720447066,
      "grad_norm": 2.453125,
      "learning_rate": 2.2655773369575095e-05,
      "loss": 0.8095,
      "step": 506910
    },
    {
      "epoch": 1.7766282195516023,
      "grad_norm": 2.875,
      "learning_rate": 2.2655124340911393e-05,
      "loss": 0.8268,
      "step": 506920
    },
    {
      "epoch": 1.7766632670584976,
      "grad_norm": 2.5625,
      "learning_rate": 2.265447531224769e-05,
      "loss": 0.7669,
      "step": 506930
    },
    {
      "epoch": 1.7766983145653934,
      "grad_norm": 2.75,
      "learning_rate": 2.265382628358399e-05,
      "loss": 0.8686,
      "step": 506940
    },
    {
      "epoch": 1.776733362072289,
      "grad_norm": 2.890625,
      "learning_rate": 2.2653177254920287e-05,
      "loss": 0.8575,
      "step": 506950
    },
    {
      "epoch": 1.7767684095791845,
      "grad_norm": 3.109375,
      "learning_rate": 2.2652528226256585e-05,
      "loss": 0.8314,
      "step": 506960
    },
    {
      "epoch": 1.7768034570860802,
      "grad_norm": 2.75,
      "learning_rate": 2.2651879197592883e-05,
      "loss": 0.8248,
      "step": 506970
    },
    {
      "epoch": 1.7768385045929758,
      "grad_norm": 3.0,
      "learning_rate": 2.2651230168929184e-05,
      "loss": 0.7792,
      "step": 506980
    },
    {
      "epoch": 1.7768735520998713,
      "grad_norm": 2.140625,
      "learning_rate": 2.2650581140265482e-05,
      "loss": 0.9138,
      "step": 506990
    },
    {
      "epoch": 1.776908599606767,
      "grad_norm": 2.890625,
      "learning_rate": 2.2649932111601777e-05,
      "loss": 0.8574,
      "step": 507000
    },
    {
      "epoch": 1.7769436471136626,
      "grad_norm": 3.0,
      "learning_rate": 2.2649283082938075e-05,
      "loss": 0.7568,
      "step": 507010
    },
    {
      "epoch": 1.7769786946205581,
      "grad_norm": 3.296875,
      "learning_rate": 2.2648634054274373e-05,
      "loss": 0.8493,
      "step": 507020
    },
    {
      "epoch": 1.7770137421274539,
      "grad_norm": 3.171875,
      "learning_rate": 2.264798502561067e-05,
      "loss": 0.8829,
      "step": 507030
    },
    {
      "epoch": 1.7770487896343492,
      "grad_norm": 3.21875,
      "learning_rate": 2.264733599694697e-05,
      "loss": 0.9228,
      "step": 507040
    },
    {
      "epoch": 1.777083837141245,
      "grad_norm": 2.953125,
      "learning_rate": 2.2646686968283267e-05,
      "loss": 0.7468,
      "step": 507050
    },
    {
      "epoch": 1.7771188846481407,
      "grad_norm": 3.015625,
      "learning_rate": 2.2646037939619565e-05,
      "loss": 0.8336,
      "step": 507060
    },
    {
      "epoch": 1.777153932155036,
      "grad_norm": 2.578125,
      "learning_rate": 2.2645388910955863e-05,
      "loss": 0.7647,
      "step": 507070
    },
    {
      "epoch": 1.7771889796619318,
      "grad_norm": 3.359375,
      "learning_rate": 2.264473988229216e-05,
      "loss": 0.8501,
      "step": 507080
    },
    {
      "epoch": 1.7772240271688273,
      "grad_norm": 2.859375,
      "learning_rate": 2.2644090853628462e-05,
      "loss": 0.849,
      "step": 507090
    },
    {
      "epoch": 1.7772590746757229,
      "grad_norm": 2.921875,
      "learning_rate": 2.264344182496476e-05,
      "loss": 0.7973,
      "step": 507100
    },
    {
      "epoch": 1.7772941221826186,
      "grad_norm": 3.359375,
      "learning_rate": 2.264279279630106e-05,
      "loss": 0.825,
      "step": 507110
    },
    {
      "epoch": 1.7773291696895142,
      "grad_norm": 3.328125,
      "learning_rate": 2.2642143767637356e-05,
      "loss": 0.827,
      "step": 507120
    },
    {
      "epoch": 1.7773642171964097,
      "grad_norm": 2.421875,
      "learning_rate": 2.2641494738973654e-05,
      "loss": 0.7723,
      "step": 507130
    },
    {
      "epoch": 1.7773992647033054,
      "grad_norm": 2.640625,
      "learning_rate": 2.2640845710309952e-05,
      "loss": 0.8539,
      "step": 507140
    },
    {
      "epoch": 1.7774343122102008,
      "grad_norm": 2.796875,
      "learning_rate": 2.264019668164625e-05,
      "loss": 0.8407,
      "step": 507150
    },
    {
      "epoch": 1.7774693597170965,
      "grad_norm": 2.75,
      "learning_rate": 2.263954765298255e-05,
      "loss": 0.9199,
      "step": 507160
    },
    {
      "epoch": 1.7775044072239923,
      "grad_norm": 3.5,
      "learning_rate": 2.2638898624318846e-05,
      "loss": 0.9439,
      "step": 507170
    },
    {
      "epoch": 1.7775394547308876,
      "grad_norm": 2.921875,
      "learning_rate": 2.2638249595655144e-05,
      "loss": 0.8636,
      "step": 507180
    },
    {
      "epoch": 1.7775745022377833,
      "grad_norm": 3.1875,
      "learning_rate": 2.263760056699144e-05,
      "loss": 0.8436,
      "step": 507190
    },
    {
      "epoch": 1.7776095497446789,
      "grad_norm": 2.546875,
      "learning_rate": 2.2636951538327737e-05,
      "loss": 0.7523,
      "step": 507200
    },
    {
      "epoch": 1.7776445972515744,
      "grad_norm": 3.09375,
      "learning_rate": 2.263630250966404e-05,
      "loss": 0.87,
      "step": 507210
    },
    {
      "epoch": 1.7776796447584702,
      "grad_norm": 2.71875,
      "learning_rate": 2.2635653481000336e-05,
      "loss": 0.782,
      "step": 507220
    },
    {
      "epoch": 1.7777146922653657,
      "grad_norm": 2.96875,
      "learning_rate": 2.2635004452336634e-05,
      "loss": 0.8382,
      "step": 507230
    },
    {
      "epoch": 1.7777497397722613,
      "grad_norm": 3.125,
      "learning_rate": 2.2634355423672932e-05,
      "loss": 0.8905,
      "step": 507240
    },
    {
      "epoch": 1.777784787279157,
      "grad_norm": 2.6875,
      "learning_rate": 2.263370639500923e-05,
      "loss": 0.8475,
      "step": 507250
    },
    {
      "epoch": 1.7778198347860525,
      "grad_norm": 2.59375,
      "learning_rate": 2.263305736634553e-05,
      "loss": 0.7446,
      "step": 507260
    },
    {
      "epoch": 1.777854882292948,
      "grad_norm": 2.875,
      "learning_rate": 2.2632408337681826e-05,
      "loss": 0.8624,
      "step": 507270
    },
    {
      "epoch": 1.7778899297998438,
      "grad_norm": 2.875,
      "learning_rate": 2.2631759309018124e-05,
      "loss": 0.7719,
      "step": 507280
    },
    {
      "epoch": 1.7779249773067392,
      "grad_norm": 2.984375,
      "learning_rate": 2.2631110280354422e-05,
      "loss": 0.7895,
      "step": 507290
    },
    {
      "epoch": 1.777960024813635,
      "grad_norm": 2.8125,
      "learning_rate": 2.263046125169072e-05,
      "loss": 0.8433,
      "step": 507300
    },
    {
      "epoch": 1.7779950723205304,
      "grad_norm": 3.296875,
      "learning_rate": 2.262981222302702e-05,
      "loss": 0.7634,
      "step": 507310
    },
    {
      "epoch": 1.778030119827426,
      "grad_norm": 3.609375,
      "learning_rate": 2.2629163194363316e-05,
      "loss": 0.7872,
      "step": 507320
    },
    {
      "epoch": 1.7780651673343217,
      "grad_norm": 2.828125,
      "learning_rate": 2.2628514165699614e-05,
      "loss": 0.8167,
      "step": 507330
    },
    {
      "epoch": 1.7781002148412173,
      "grad_norm": 2.890625,
      "learning_rate": 2.2627865137035912e-05,
      "loss": 0.7997,
      "step": 507340
    },
    {
      "epoch": 1.7781352623481128,
      "grad_norm": 2.90625,
      "learning_rate": 2.2627216108372214e-05,
      "loss": 0.827,
      "step": 507350
    },
    {
      "epoch": 1.7781703098550086,
      "grad_norm": 3.625,
      "learning_rate": 2.2626567079708512e-05,
      "loss": 0.9288,
      "step": 507360
    },
    {
      "epoch": 1.778205357361904,
      "grad_norm": 2.859375,
      "learning_rate": 2.262591805104481e-05,
      "loss": 0.823,
      "step": 507370
    },
    {
      "epoch": 1.7782404048687996,
      "grad_norm": 2.609375,
      "learning_rate": 2.2625269022381104e-05,
      "loss": 0.919,
      "step": 507380
    },
    {
      "epoch": 1.7782754523756954,
      "grad_norm": 2.890625,
      "learning_rate": 2.2624619993717402e-05,
      "loss": 0.8524,
      "step": 507390
    },
    {
      "epoch": 1.7783104998825907,
      "grad_norm": 3.046875,
      "learning_rate": 2.26239709650537e-05,
      "loss": 0.8134,
      "step": 507400
    },
    {
      "epoch": 1.7783455473894865,
      "grad_norm": 2.859375,
      "learning_rate": 2.262332193639e-05,
      "loss": 0.7799,
      "step": 507410
    },
    {
      "epoch": 1.778380594896382,
      "grad_norm": 2.78125,
      "learning_rate": 2.2622672907726296e-05,
      "loss": 0.8753,
      "step": 507420
    },
    {
      "epoch": 1.7784156424032775,
      "grad_norm": 3.078125,
      "learning_rate": 2.2622023879062594e-05,
      "loss": 0.7357,
      "step": 507430
    },
    {
      "epoch": 1.7784506899101733,
      "grad_norm": 3.234375,
      "learning_rate": 2.2621374850398892e-05,
      "loss": 0.8517,
      "step": 507440
    },
    {
      "epoch": 1.7784857374170688,
      "grad_norm": 2.78125,
      "learning_rate": 2.262072582173519e-05,
      "loss": 0.892,
      "step": 507450
    },
    {
      "epoch": 1.7785207849239644,
      "grad_norm": 3.140625,
      "learning_rate": 2.2620076793071492e-05,
      "loss": 0.883,
      "step": 507460
    },
    {
      "epoch": 1.7785558324308601,
      "grad_norm": 3.0,
      "learning_rate": 2.261942776440779e-05,
      "loss": 0.8656,
      "step": 507470
    },
    {
      "epoch": 1.7785908799377557,
      "grad_norm": 2.640625,
      "learning_rate": 2.2618778735744088e-05,
      "loss": 0.8879,
      "step": 507480
    },
    {
      "epoch": 1.7786259274446512,
      "grad_norm": 2.734375,
      "learning_rate": 2.2618129707080386e-05,
      "loss": 0.8312,
      "step": 507490
    },
    {
      "epoch": 1.778660974951547,
      "grad_norm": 2.890625,
      "learning_rate": 2.2617480678416684e-05,
      "loss": 0.8247,
      "step": 507500
    },
    {
      "epoch": 1.7786960224584423,
      "grad_norm": 3.3125,
      "learning_rate": 2.2616831649752982e-05,
      "loss": 0.8095,
      "step": 507510
    },
    {
      "epoch": 1.778731069965338,
      "grad_norm": 2.90625,
      "learning_rate": 2.261618262108928e-05,
      "loss": 0.8001,
      "step": 507520
    },
    {
      "epoch": 1.7787661174722336,
      "grad_norm": 2.46875,
      "learning_rate": 2.2615533592425578e-05,
      "loss": 0.8086,
      "step": 507530
    },
    {
      "epoch": 1.778801164979129,
      "grad_norm": 3.078125,
      "learning_rate": 2.2614884563761876e-05,
      "loss": 0.8472,
      "step": 507540
    },
    {
      "epoch": 1.7788362124860249,
      "grad_norm": 3.65625,
      "learning_rate": 2.2614235535098174e-05,
      "loss": 0.8392,
      "step": 507550
    },
    {
      "epoch": 1.7788712599929204,
      "grad_norm": 3.15625,
      "learning_rate": 2.2613586506434472e-05,
      "loss": 0.7924,
      "step": 507560
    },
    {
      "epoch": 1.778906307499816,
      "grad_norm": 2.890625,
      "learning_rate": 2.261293747777077e-05,
      "loss": 0.7498,
      "step": 507570
    },
    {
      "epoch": 1.7789413550067117,
      "grad_norm": 3.25,
      "learning_rate": 2.2612288449107068e-05,
      "loss": 0.8533,
      "step": 507580
    },
    {
      "epoch": 1.7789764025136072,
      "grad_norm": 2.78125,
      "learning_rate": 2.2611639420443366e-05,
      "loss": 0.9074,
      "step": 507590
    },
    {
      "epoch": 1.7790114500205028,
      "grad_norm": 2.75,
      "learning_rate": 2.2610990391779664e-05,
      "loss": 0.8541,
      "step": 507600
    },
    {
      "epoch": 1.7790464975273985,
      "grad_norm": 3.09375,
      "learning_rate": 2.2610341363115962e-05,
      "loss": 0.7836,
      "step": 507610
    },
    {
      "epoch": 1.7790815450342938,
      "grad_norm": 2.4375,
      "learning_rate": 2.260969233445226e-05,
      "loss": 0.8448,
      "step": 507620
    },
    {
      "epoch": 1.7791165925411896,
      "grad_norm": 2.859375,
      "learning_rate": 2.2609043305788558e-05,
      "loss": 0.8657,
      "step": 507630
    },
    {
      "epoch": 1.7791516400480851,
      "grad_norm": 3.015625,
      "learning_rate": 2.2608394277124856e-05,
      "loss": 0.7684,
      "step": 507640
    },
    {
      "epoch": 1.7791866875549807,
      "grad_norm": 3.21875,
      "learning_rate": 2.2607745248461154e-05,
      "loss": 0.8213,
      "step": 507650
    },
    {
      "epoch": 1.7792217350618764,
      "grad_norm": 2.734375,
      "learning_rate": 2.2607096219797452e-05,
      "loss": 0.7999,
      "step": 507660
    },
    {
      "epoch": 1.779256782568772,
      "grad_norm": 2.65625,
      "learning_rate": 2.260644719113375e-05,
      "loss": 0.7873,
      "step": 507670
    },
    {
      "epoch": 1.7792918300756675,
      "grad_norm": 3.203125,
      "learning_rate": 2.2605798162470048e-05,
      "loss": 0.7682,
      "step": 507680
    },
    {
      "epoch": 1.7793268775825632,
      "grad_norm": 3.015625,
      "learning_rate": 2.2605149133806346e-05,
      "loss": 0.8575,
      "step": 507690
    },
    {
      "epoch": 1.7793619250894588,
      "grad_norm": 3.015625,
      "learning_rate": 2.2604500105142644e-05,
      "loss": 0.7947,
      "step": 507700
    },
    {
      "epoch": 1.7793969725963543,
      "grad_norm": 2.671875,
      "learning_rate": 2.2603851076478945e-05,
      "loss": 0.879,
      "step": 507710
    },
    {
      "epoch": 1.77943202010325,
      "grad_norm": 3.1875,
      "learning_rate": 2.2603202047815243e-05,
      "loss": 0.8455,
      "step": 507720
    },
    {
      "epoch": 1.7794670676101454,
      "grad_norm": 2.53125,
      "learning_rate": 2.260255301915154e-05,
      "loss": 0.8197,
      "step": 507730
    },
    {
      "epoch": 1.7795021151170411,
      "grad_norm": 2.875,
      "learning_rate": 2.260190399048784e-05,
      "loss": 0.8216,
      "step": 507740
    },
    {
      "epoch": 1.779537162623937,
      "grad_norm": 2.5,
      "learning_rate": 2.2601254961824134e-05,
      "loss": 0.8111,
      "step": 507750
    },
    {
      "epoch": 1.7795722101308322,
      "grad_norm": 2.671875,
      "learning_rate": 2.2600605933160432e-05,
      "loss": 0.8197,
      "step": 507760
    },
    {
      "epoch": 1.779607257637728,
      "grad_norm": 2.921875,
      "learning_rate": 2.259995690449673e-05,
      "loss": 0.8138,
      "step": 507770
    },
    {
      "epoch": 1.7796423051446235,
      "grad_norm": 2.890625,
      "learning_rate": 2.2599307875833028e-05,
      "loss": 0.8308,
      "step": 507780
    },
    {
      "epoch": 1.779677352651519,
      "grad_norm": 2.859375,
      "learning_rate": 2.2598658847169326e-05,
      "loss": 0.7931,
      "step": 507790
    },
    {
      "epoch": 1.7797124001584148,
      "grad_norm": 3.109375,
      "learning_rate": 2.2598009818505624e-05,
      "loss": 0.8085,
      "step": 507800
    },
    {
      "epoch": 1.7797474476653103,
      "grad_norm": 2.84375,
      "learning_rate": 2.2597360789841922e-05,
      "loss": 0.8121,
      "step": 507810
    },
    {
      "epoch": 1.7797824951722059,
      "grad_norm": 2.703125,
      "learning_rate": 2.259671176117822e-05,
      "loss": 0.8922,
      "step": 507820
    },
    {
      "epoch": 1.7798175426791016,
      "grad_norm": 2.671875,
      "learning_rate": 2.259606273251452e-05,
      "loss": 0.8002,
      "step": 507830
    },
    {
      "epoch": 1.7798525901859972,
      "grad_norm": 3.171875,
      "learning_rate": 2.259541370385082e-05,
      "loss": 0.8062,
      "step": 507840
    },
    {
      "epoch": 1.7798876376928927,
      "grad_norm": 2.984375,
      "learning_rate": 2.2594764675187117e-05,
      "loss": 0.9111,
      "step": 507850
    },
    {
      "epoch": 1.7799226851997885,
      "grad_norm": 2.9375,
      "learning_rate": 2.2594115646523415e-05,
      "loss": 0.899,
      "step": 507860
    },
    {
      "epoch": 1.7799577327066838,
      "grad_norm": 2.65625,
      "learning_rate": 2.2593466617859713e-05,
      "loss": 0.7239,
      "step": 507870
    },
    {
      "epoch": 1.7799927802135795,
      "grad_norm": 3.578125,
      "learning_rate": 2.259281758919601e-05,
      "loss": 0.7811,
      "step": 507880
    },
    {
      "epoch": 1.780027827720475,
      "grad_norm": 2.828125,
      "learning_rate": 2.259216856053231e-05,
      "loss": 0.7704,
      "step": 507890
    },
    {
      "epoch": 1.7800628752273706,
      "grad_norm": 2.46875,
      "learning_rate": 2.2591519531868607e-05,
      "loss": 0.8567,
      "step": 507900
    },
    {
      "epoch": 1.7800979227342664,
      "grad_norm": 3.015625,
      "learning_rate": 2.2590870503204905e-05,
      "loss": 0.8313,
      "step": 507910
    },
    {
      "epoch": 1.780132970241162,
      "grad_norm": 2.65625,
      "learning_rate": 2.2590221474541203e-05,
      "loss": 0.885,
      "step": 507920
    },
    {
      "epoch": 1.7801680177480574,
      "grad_norm": 3.375,
      "learning_rate": 2.25895724458775e-05,
      "loss": 0.7789,
      "step": 507930
    },
    {
      "epoch": 1.7802030652549532,
      "grad_norm": 3.09375,
      "learning_rate": 2.25889234172138e-05,
      "loss": 0.8634,
      "step": 507940
    },
    {
      "epoch": 1.7802381127618487,
      "grad_norm": 2.890625,
      "learning_rate": 2.2588274388550097e-05,
      "loss": 0.8516,
      "step": 507950
    },
    {
      "epoch": 1.7802731602687443,
      "grad_norm": 2.71875,
      "learning_rate": 2.2587625359886395e-05,
      "loss": 0.7944,
      "step": 507960
    },
    {
      "epoch": 1.78030820777564,
      "grad_norm": 3.09375,
      "learning_rate": 2.2586976331222693e-05,
      "loss": 0.8666,
      "step": 507970
    },
    {
      "epoch": 1.7803432552825353,
      "grad_norm": 3.09375,
      "learning_rate": 2.258632730255899e-05,
      "loss": 0.7808,
      "step": 507980
    },
    {
      "epoch": 1.780378302789431,
      "grad_norm": 2.984375,
      "learning_rate": 2.258567827389529e-05,
      "loss": 0.8406,
      "step": 507990
    },
    {
      "epoch": 1.7804133502963266,
      "grad_norm": 2.71875,
      "learning_rate": 2.2585029245231587e-05,
      "loss": 0.8154,
      "step": 508000
    },
    {
      "epoch": 1.7804483978032222,
      "grad_norm": 3.140625,
      "learning_rate": 2.2584380216567885e-05,
      "loss": 0.8026,
      "step": 508010
    },
    {
      "epoch": 1.780483445310118,
      "grad_norm": 3.046875,
      "learning_rate": 2.2583731187904183e-05,
      "loss": 0.8515,
      "step": 508020
    },
    {
      "epoch": 1.7805184928170135,
      "grad_norm": 3.375,
      "learning_rate": 2.258308215924048e-05,
      "loss": 0.8462,
      "step": 508030
    },
    {
      "epoch": 1.780553540323909,
      "grad_norm": 2.53125,
      "learning_rate": 2.258243313057678e-05,
      "loss": 0.7724,
      "step": 508040
    },
    {
      "epoch": 1.7805885878308048,
      "grad_norm": 3.078125,
      "learning_rate": 2.2581784101913077e-05,
      "loss": 0.8567,
      "step": 508050
    },
    {
      "epoch": 1.7806236353377003,
      "grad_norm": 2.65625,
      "learning_rate": 2.2581135073249375e-05,
      "loss": 0.7414,
      "step": 508060
    },
    {
      "epoch": 1.7806586828445958,
      "grad_norm": 2.984375,
      "learning_rate": 2.2580486044585673e-05,
      "loss": 0.9281,
      "step": 508070
    },
    {
      "epoch": 1.7806937303514916,
      "grad_norm": 2.765625,
      "learning_rate": 2.2579837015921975e-05,
      "loss": 0.9194,
      "step": 508080
    },
    {
      "epoch": 1.780728777858387,
      "grad_norm": 2.375,
      "learning_rate": 2.2579187987258273e-05,
      "loss": 0.7957,
      "step": 508090
    },
    {
      "epoch": 1.7807638253652827,
      "grad_norm": 2.9375,
      "learning_rate": 2.257853895859457e-05,
      "loss": 0.8035,
      "step": 508100
    },
    {
      "epoch": 1.7807988728721782,
      "grad_norm": 2.84375,
      "learning_rate": 2.257788992993087e-05,
      "loss": 0.8673,
      "step": 508110
    },
    {
      "epoch": 1.7808339203790737,
      "grad_norm": 3.09375,
      "learning_rate": 2.2577240901267167e-05,
      "loss": 0.8927,
      "step": 508120
    },
    {
      "epoch": 1.7808689678859695,
      "grad_norm": 2.4375,
      "learning_rate": 2.257659187260346e-05,
      "loss": 0.7542,
      "step": 508130
    },
    {
      "epoch": 1.780904015392865,
      "grad_norm": 2.703125,
      "learning_rate": 2.257594284393976e-05,
      "loss": 0.8348,
      "step": 508140
    },
    {
      "epoch": 1.7809390628997606,
      "grad_norm": 2.796875,
      "learning_rate": 2.2575293815276057e-05,
      "loss": 0.7875,
      "step": 508150
    },
    {
      "epoch": 1.7809741104066563,
      "grad_norm": 3.03125,
      "learning_rate": 2.2574644786612355e-05,
      "loss": 0.805,
      "step": 508160
    },
    {
      "epoch": 1.7810091579135519,
      "grad_norm": 2.90625,
      "learning_rate": 2.2573995757948653e-05,
      "loss": 0.784,
      "step": 508170
    },
    {
      "epoch": 1.7810442054204474,
      "grad_norm": 2.65625,
      "learning_rate": 2.257334672928495e-05,
      "loss": 0.8431,
      "step": 508180
    },
    {
      "epoch": 1.7810792529273431,
      "grad_norm": 3.0625,
      "learning_rate": 2.2572697700621253e-05,
      "loss": 0.8764,
      "step": 508190
    },
    {
      "epoch": 1.7811143004342385,
      "grad_norm": 2.609375,
      "learning_rate": 2.257204867195755e-05,
      "loss": 0.8454,
      "step": 508200
    },
    {
      "epoch": 1.7811493479411342,
      "grad_norm": 3.125,
      "learning_rate": 2.257139964329385e-05,
      "loss": 0.8394,
      "step": 508210
    },
    {
      "epoch": 1.7811843954480298,
      "grad_norm": 3.171875,
      "learning_rate": 2.2570750614630147e-05,
      "loss": 0.8096,
      "step": 508220
    },
    {
      "epoch": 1.7812194429549253,
      "grad_norm": 3.34375,
      "learning_rate": 2.2570101585966445e-05,
      "loss": 0.8607,
      "step": 508230
    },
    {
      "epoch": 1.781254490461821,
      "grad_norm": 2.734375,
      "learning_rate": 2.2569452557302743e-05,
      "loss": 0.8783,
      "step": 508240
    },
    {
      "epoch": 1.7812895379687166,
      "grad_norm": 3.4375,
      "learning_rate": 2.256880352863904e-05,
      "loss": 0.8645,
      "step": 508250
    },
    {
      "epoch": 1.7813245854756121,
      "grad_norm": 2.765625,
      "learning_rate": 2.256815449997534e-05,
      "loss": 0.8604,
      "step": 508260
    },
    {
      "epoch": 1.7813596329825079,
      "grad_norm": 3.0,
      "learning_rate": 2.2567505471311637e-05,
      "loss": 0.7806,
      "step": 508270
    },
    {
      "epoch": 1.7813946804894034,
      "grad_norm": 2.828125,
      "learning_rate": 2.2566856442647935e-05,
      "loss": 0.6799,
      "step": 508280
    },
    {
      "epoch": 1.781429727996299,
      "grad_norm": 2.640625,
      "learning_rate": 2.2566207413984233e-05,
      "loss": 0.7913,
      "step": 508290
    },
    {
      "epoch": 1.7814647755031947,
      "grad_norm": 2.734375,
      "learning_rate": 2.256555838532053e-05,
      "loss": 0.8557,
      "step": 508300
    },
    {
      "epoch": 1.78149982301009,
      "grad_norm": 2.734375,
      "learning_rate": 2.256490935665683e-05,
      "loss": 0.7967,
      "step": 508310
    },
    {
      "epoch": 1.7815348705169858,
      "grad_norm": 2.84375,
      "learning_rate": 2.2564260327993127e-05,
      "loss": 0.855,
      "step": 508320
    },
    {
      "epoch": 1.7815699180238813,
      "grad_norm": 3.390625,
      "learning_rate": 2.2563611299329425e-05,
      "loss": 0.803,
      "step": 508330
    },
    {
      "epoch": 1.7816049655307769,
      "grad_norm": 2.234375,
      "learning_rate": 2.2562962270665723e-05,
      "loss": 0.7662,
      "step": 508340
    },
    {
      "epoch": 1.7816400130376726,
      "grad_norm": 3.171875,
      "learning_rate": 2.256231324200202e-05,
      "loss": 0.8355,
      "step": 508350
    },
    {
      "epoch": 1.7816750605445681,
      "grad_norm": 3.0,
      "learning_rate": 2.256166421333832e-05,
      "loss": 0.7999,
      "step": 508360
    },
    {
      "epoch": 1.7817101080514637,
      "grad_norm": 2.53125,
      "learning_rate": 2.2561015184674617e-05,
      "loss": 0.7798,
      "step": 508370
    },
    {
      "epoch": 1.7817451555583594,
      "grad_norm": 2.96875,
      "learning_rate": 2.2560366156010915e-05,
      "loss": 0.8326,
      "step": 508380
    },
    {
      "epoch": 1.781780203065255,
      "grad_norm": 2.609375,
      "learning_rate": 2.2559717127347213e-05,
      "loss": 0.7589,
      "step": 508390
    },
    {
      "epoch": 1.7818152505721505,
      "grad_norm": 2.984375,
      "learning_rate": 2.255906809868351e-05,
      "loss": 0.8609,
      "step": 508400
    },
    {
      "epoch": 1.7818502980790463,
      "grad_norm": 2.90625,
      "learning_rate": 2.255841907001981e-05,
      "loss": 0.8719,
      "step": 508410
    },
    {
      "epoch": 1.7818853455859416,
      "grad_norm": 2.78125,
      "learning_rate": 2.2557770041356107e-05,
      "loss": 0.7796,
      "step": 508420
    },
    {
      "epoch": 1.7819203930928373,
      "grad_norm": 2.890625,
      "learning_rate": 2.2557121012692405e-05,
      "loss": 0.8203,
      "step": 508430
    },
    {
      "epoch": 1.781955440599733,
      "grad_norm": 2.921875,
      "learning_rate": 2.2556471984028703e-05,
      "loss": 0.768,
      "step": 508440
    },
    {
      "epoch": 1.7819904881066284,
      "grad_norm": 2.453125,
      "learning_rate": 2.2555822955365004e-05,
      "loss": 0.8474,
      "step": 508450
    },
    {
      "epoch": 1.7820255356135242,
      "grad_norm": 2.875,
      "learning_rate": 2.2555173926701302e-05,
      "loss": 0.7379,
      "step": 508460
    },
    {
      "epoch": 1.7820605831204197,
      "grad_norm": 2.734375,
      "learning_rate": 2.25545248980376e-05,
      "loss": 0.8555,
      "step": 508470
    },
    {
      "epoch": 1.7820956306273152,
      "grad_norm": 3.03125,
      "learning_rate": 2.2553875869373898e-05,
      "loss": 0.8776,
      "step": 508480
    },
    {
      "epoch": 1.782130678134211,
      "grad_norm": 2.9375,
      "learning_rate": 2.2553226840710196e-05,
      "loss": 0.8978,
      "step": 508490
    },
    {
      "epoch": 1.7821657256411065,
      "grad_norm": 2.78125,
      "learning_rate": 2.2552577812046494e-05,
      "loss": 0.8261,
      "step": 508500
    },
    {
      "epoch": 1.782200773148002,
      "grad_norm": 2.6875,
      "learning_rate": 2.255192878338279e-05,
      "loss": 0.8011,
      "step": 508510
    },
    {
      "epoch": 1.7822358206548978,
      "grad_norm": 2.953125,
      "learning_rate": 2.2551279754719087e-05,
      "loss": 0.9036,
      "step": 508520
    },
    {
      "epoch": 1.7822708681617934,
      "grad_norm": 2.578125,
      "learning_rate": 2.2550630726055385e-05,
      "loss": 0.8389,
      "step": 508530
    },
    {
      "epoch": 1.782305915668689,
      "grad_norm": 2.9375,
      "learning_rate": 2.2549981697391683e-05,
      "loss": 0.8446,
      "step": 508540
    },
    {
      "epoch": 1.7823409631755847,
      "grad_norm": 2.984375,
      "learning_rate": 2.254933266872798e-05,
      "loss": 0.7891,
      "step": 508550
    },
    {
      "epoch": 1.78237601068248,
      "grad_norm": 2.6875,
      "learning_rate": 2.2548683640064282e-05,
      "loss": 0.9123,
      "step": 508560
    },
    {
      "epoch": 1.7824110581893757,
      "grad_norm": 3.046875,
      "learning_rate": 2.254803461140058e-05,
      "loss": 0.8149,
      "step": 508570
    },
    {
      "epoch": 1.7824461056962713,
      "grad_norm": 2.796875,
      "learning_rate": 2.2547385582736878e-05,
      "loss": 0.9299,
      "step": 508580
    },
    {
      "epoch": 1.7824811532031668,
      "grad_norm": 3.328125,
      "learning_rate": 2.2546736554073176e-05,
      "loss": 0.8052,
      "step": 508590
    },
    {
      "epoch": 1.7825162007100626,
      "grad_norm": 2.71875,
      "learning_rate": 2.2546087525409474e-05,
      "loss": 0.7789,
      "step": 508600
    },
    {
      "epoch": 1.782551248216958,
      "grad_norm": 3.109375,
      "learning_rate": 2.2545438496745772e-05,
      "loss": 0.8793,
      "step": 508610
    },
    {
      "epoch": 1.7825862957238536,
      "grad_norm": 3.046875,
      "learning_rate": 2.254478946808207e-05,
      "loss": 0.8185,
      "step": 508620
    },
    {
      "epoch": 1.7826213432307494,
      "grad_norm": 3.109375,
      "learning_rate": 2.2544140439418368e-05,
      "loss": 0.9138,
      "step": 508630
    },
    {
      "epoch": 1.782656390737645,
      "grad_norm": 2.84375,
      "learning_rate": 2.2543491410754666e-05,
      "loss": 0.8254,
      "step": 508640
    },
    {
      "epoch": 1.7826914382445405,
      "grad_norm": 2.859375,
      "learning_rate": 2.2542842382090964e-05,
      "loss": 0.8138,
      "step": 508650
    },
    {
      "epoch": 1.7827264857514362,
      "grad_norm": 3.125,
      "learning_rate": 2.2542193353427262e-05,
      "loss": 0.7881,
      "step": 508660
    },
    {
      "epoch": 1.7827615332583315,
      "grad_norm": 2.640625,
      "learning_rate": 2.254154432476356e-05,
      "loss": 0.7535,
      "step": 508670
    },
    {
      "epoch": 1.7827965807652273,
      "grad_norm": 3.078125,
      "learning_rate": 2.2540895296099858e-05,
      "loss": 0.8128,
      "step": 508680
    },
    {
      "epoch": 1.7828316282721228,
      "grad_norm": 3.078125,
      "learning_rate": 2.2540246267436156e-05,
      "loss": 0.8159,
      "step": 508690
    },
    {
      "epoch": 1.7828666757790184,
      "grad_norm": 3.28125,
      "learning_rate": 2.2539597238772454e-05,
      "loss": 0.8406,
      "step": 508700
    },
    {
      "epoch": 1.7829017232859141,
      "grad_norm": 3.109375,
      "learning_rate": 2.2538948210108752e-05,
      "loss": 0.8133,
      "step": 508710
    },
    {
      "epoch": 1.7829367707928097,
      "grad_norm": 2.953125,
      "learning_rate": 2.253829918144505e-05,
      "loss": 0.8832,
      "step": 508720
    },
    {
      "epoch": 1.7829718182997052,
      "grad_norm": 2.796875,
      "learning_rate": 2.2537650152781348e-05,
      "loss": 0.8246,
      "step": 508730
    },
    {
      "epoch": 1.783006865806601,
      "grad_norm": 3.21875,
      "learning_rate": 2.2537001124117646e-05,
      "loss": 0.926,
      "step": 508740
    },
    {
      "epoch": 1.7830419133134965,
      "grad_norm": 3.015625,
      "learning_rate": 2.2536352095453944e-05,
      "loss": 0.9083,
      "step": 508750
    },
    {
      "epoch": 1.783076960820392,
      "grad_norm": 2.578125,
      "learning_rate": 2.2535703066790242e-05,
      "loss": 0.8083,
      "step": 508760
    },
    {
      "epoch": 1.7831120083272878,
      "grad_norm": 2.796875,
      "learning_rate": 2.253505403812654e-05,
      "loss": 0.7824,
      "step": 508770
    },
    {
      "epoch": 1.783147055834183,
      "grad_norm": 2.453125,
      "learning_rate": 2.2534405009462838e-05,
      "loss": 0.7653,
      "step": 508780
    },
    {
      "epoch": 1.7831821033410789,
      "grad_norm": 2.859375,
      "learning_rate": 2.2533755980799136e-05,
      "loss": 0.8055,
      "step": 508790
    },
    {
      "epoch": 1.7832171508479744,
      "grad_norm": 2.984375,
      "learning_rate": 2.2533106952135434e-05,
      "loss": 0.8808,
      "step": 508800
    },
    {
      "epoch": 1.78325219835487,
      "grad_norm": 3.46875,
      "learning_rate": 2.2532457923471735e-05,
      "loss": 0.8779,
      "step": 508810
    },
    {
      "epoch": 1.7832872458617657,
      "grad_norm": 2.71875,
      "learning_rate": 2.2531808894808033e-05,
      "loss": 0.8018,
      "step": 508820
    },
    {
      "epoch": 1.7833222933686612,
      "grad_norm": 2.859375,
      "learning_rate": 2.253115986614433e-05,
      "loss": 0.8547,
      "step": 508830
    },
    {
      "epoch": 1.7833573408755568,
      "grad_norm": 2.953125,
      "learning_rate": 2.253051083748063e-05,
      "loss": 0.8319,
      "step": 508840
    },
    {
      "epoch": 1.7833923883824525,
      "grad_norm": 3.03125,
      "learning_rate": 2.2529861808816927e-05,
      "loss": 0.7977,
      "step": 508850
    },
    {
      "epoch": 1.783427435889348,
      "grad_norm": 3.15625,
      "learning_rate": 2.2529212780153225e-05,
      "loss": 0.8512,
      "step": 508860
    },
    {
      "epoch": 1.7834624833962436,
      "grad_norm": 3.140625,
      "learning_rate": 2.2528563751489523e-05,
      "loss": 0.8498,
      "step": 508870
    },
    {
      "epoch": 1.7834975309031393,
      "grad_norm": 2.953125,
      "learning_rate": 2.2527914722825818e-05,
      "loss": 0.7851,
      "step": 508880
    },
    {
      "epoch": 1.7835325784100347,
      "grad_norm": 2.953125,
      "learning_rate": 2.2527265694162116e-05,
      "loss": 0.7136,
      "step": 508890
    },
    {
      "epoch": 1.7835676259169304,
      "grad_norm": 2.984375,
      "learning_rate": 2.2526616665498414e-05,
      "loss": 0.7445,
      "step": 508900
    },
    {
      "epoch": 1.783602673423826,
      "grad_norm": 3.109375,
      "learning_rate": 2.2525967636834712e-05,
      "loss": 0.8118,
      "step": 508910
    },
    {
      "epoch": 1.7836377209307215,
      "grad_norm": 2.328125,
      "learning_rate": 2.252531860817101e-05,
      "loss": 0.7301,
      "step": 508920
    },
    {
      "epoch": 1.7836727684376172,
      "grad_norm": 3.28125,
      "learning_rate": 2.252466957950731e-05,
      "loss": 0.7933,
      "step": 508930
    },
    {
      "epoch": 1.7837078159445128,
      "grad_norm": 3.0,
      "learning_rate": 2.252402055084361e-05,
      "loss": 0.8879,
      "step": 508940
    },
    {
      "epoch": 1.7837428634514083,
      "grad_norm": 3.046875,
      "learning_rate": 2.2523371522179907e-05,
      "loss": 0.8157,
      "step": 508950
    },
    {
      "epoch": 1.783777910958304,
      "grad_norm": 2.3125,
      "learning_rate": 2.2522722493516205e-05,
      "loss": 0.8495,
      "step": 508960
    },
    {
      "epoch": 1.7838129584651996,
      "grad_norm": 3.328125,
      "learning_rate": 2.2522073464852503e-05,
      "loss": 0.8123,
      "step": 508970
    },
    {
      "epoch": 1.7838480059720951,
      "grad_norm": 3.015625,
      "learning_rate": 2.25214244361888e-05,
      "loss": 0.7924,
      "step": 508980
    },
    {
      "epoch": 1.783883053478991,
      "grad_norm": 2.40625,
      "learning_rate": 2.25207754075251e-05,
      "loss": 0.8199,
      "step": 508990
    },
    {
      "epoch": 1.7839181009858862,
      "grad_norm": 2.5,
      "learning_rate": 2.2520126378861397e-05,
      "loss": 0.7943,
      "step": 509000
    },
    {
      "epoch": 1.783953148492782,
      "grad_norm": 2.9375,
      "learning_rate": 2.2519477350197695e-05,
      "loss": 0.7923,
      "step": 509010
    },
    {
      "epoch": 1.7839881959996777,
      "grad_norm": 3.203125,
      "learning_rate": 2.2518828321533993e-05,
      "loss": 0.7536,
      "step": 509020
    },
    {
      "epoch": 1.784023243506573,
      "grad_norm": 3.09375,
      "learning_rate": 2.251817929287029e-05,
      "loss": 0.8273,
      "step": 509030
    },
    {
      "epoch": 1.7840582910134688,
      "grad_norm": 2.640625,
      "learning_rate": 2.251753026420659e-05,
      "loss": 0.867,
      "step": 509040
    },
    {
      "epoch": 1.7840933385203643,
      "grad_norm": 2.875,
      "learning_rate": 2.2516881235542887e-05,
      "loss": 0.8181,
      "step": 509050
    },
    {
      "epoch": 1.7841283860272599,
      "grad_norm": 3.734375,
      "learning_rate": 2.251623220687919e-05,
      "loss": 0.8496,
      "step": 509060
    },
    {
      "epoch": 1.7841634335341556,
      "grad_norm": 3.1875,
      "learning_rate": 2.2515583178215483e-05,
      "loss": 0.833,
      "step": 509070
    },
    {
      "epoch": 1.7841984810410512,
      "grad_norm": 3.4375,
      "learning_rate": 2.251493414955178e-05,
      "loss": 0.8667,
      "step": 509080
    },
    {
      "epoch": 1.7842335285479467,
      "grad_norm": 3.0625,
      "learning_rate": 2.251428512088808e-05,
      "loss": 0.767,
      "step": 509090
    },
    {
      "epoch": 1.7842685760548425,
      "grad_norm": 2.640625,
      "learning_rate": 2.2513636092224377e-05,
      "loss": 0.8219,
      "step": 509100
    },
    {
      "epoch": 1.7843036235617378,
      "grad_norm": 2.90625,
      "learning_rate": 2.2512987063560675e-05,
      "loss": 0.8427,
      "step": 509110
    },
    {
      "epoch": 1.7843386710686335,
      "grad_norm": 2.734375,
      "learning_rate": 2.2512338034896973e-05,
      "loss": 0.8193,
      "step": 509120
    },
    {
      "epoch": 1.7843737185755293,
      "grad_norm": 2.96875,
      "learning_rate": 2.251168900623327e-05,
      "loss": 0.8702,
      "step": 509130
    },
    {
      "epoch": 1.7844087660824246,
      "grad_norm": 2.734375,
      "learning_rate": 2.251103997756957e-05,
      "loss": 0.8188,
      "step": 509140
    },
    {
      "epoch": 1.7844438135893204,
      "grad_norm": 2.421875,
      "learning_rate": 2.2510390948905867e-05,
      "loss": 0.8377,
      "step": 509150
    },
    {
      "epoch": 1.784478861096216,
      "grad_norm": 2.53125,
      "learning_rate": 2.2509741920242165e-05,
      "loss": 0.7947,
      "step": 509160
    },
    {
      "epoch": 1.7845139086031114,
      "grad_norm": 2.765625,
      "learning_rate": 2.2509092891578463e-05,
      "loss": 0.8088,
      "step": 509170
    },
    {
      "epoch": 1.7845489561100072,
      "grad_norm": 3.03125,
      "learning_rate": 2.2508443862914765e-05,
      "loss": 0.7294,
      "step": 509180
    },
    {
      "epoch": 1.7845840036169027,
      "grad_norm": 2.671875,
      "learning_rate": 2.2507794834251063e-05,
      "loss": 0.7813,
      "step": 509190
    },
    {
      "epoch": 1.7846190511237983,
      "grad_norm": 2.640625,
      "learning_rate": 2.250714580558736e-05,
      "loss": 0.7378,
      "step": 509200
    },
    {
      "epoch": 1.784654098630694,
      "grad_norm": 3.359375,
      "learning_rate": 2.250649677692366e-05,
      "loss": 0.839,
      "step": 509210
    },
    {
      "epoch": 1.7846891461375896,
      "grad_norm": 2.828125,
      "learning_rate": 2.2505847748259957e-05,
      "loss": 0.8502,
      "step": 509220
    },
    {
      "epoch": 1.784724193644485,
      "grad_norm": 3.21875,
      "learning_rate": 2.2505198719596255e-05,
      "loss": 0.8411,
      "step": 509230
    },
    {
      "epoch": 1.7847592411513808,
      "grad_norm": 3.21875,
      "learning_rate": 2.2504549690932553e-05,
      "loss": 0.8856,
      "step": 509240
    },
    {
      "epoch": 1.7847942886582762,
      "grad_norm": 2.5,
      "learning_rate": 2.250390066226885e-05,
      "loss": 0.8838,
      "step": 509250
    },
    {
      "epoch": 1.784829336165172,
      "grad_norm": 2.9375,
      "learning_rate": 2.2503251633605145e-05,
      "loss": 0.7996,
      "step": 509260
    },
    {
      "epoch": 1.7848643836720675,
      "grad_norm": 2.921875,
      "learning_rate": 2.2502602604941443e-05,
      "loss": 0.8836,
      "step": 509270
    },
    {
      "epoch": 1.784899431178963,
      "grad_norm": 2.59375,
      "learning_rate": 2.250195357627774e-05,
      "loss": 0.7744,
      "step": 509280
    },
    {
      "epoch": 1.7849344786858587,
      "grad_norm": 2.734375,
      "learning_rate": 2.2501304547614043e-05,
      "loss": 0.7877,
      "step": 509290
    },
    {
      "epoch": 1.7849695261927543,
      "grad_norm": 2.484375,
      "learning_rate": 2.250065551895034e-05,
      "loss": 0.832,
      "step": 509300
    },
    {
      "epoch": 1.7850045736996498,
      "grad_norm": 2.9375,
      "learning_rate": 2.250000649028664e-05,
      "loss": 0.781,
      "step": 509310
    },
    {
      "epoch": 1.7850396212065456,
      "grad_norm": 3.328125,
      "learning_rate": 2.2499357461622937e-05,
      "loss": 0.8014,
      "step": 509320
    },
    {
      "epoch": 1.7850746687134411,
      "grad_norm": 2.75,
      "learning_rate": 2.2498708432959235e-05,
      "loss": 0.8009,
      "step": 509330
    },
    {
      "epoch": 1.7851097162203367,
      "grad_norm": 2.75,
      "learning_rate": 2.2498059404295533e-05,
      "loss": 0.7938,
      "step": 509340
    },
    {
      "epoch": 1.7851447637272324,
      "grad_norm": 2.796875,
      "learning_rate": 2.249741037563183e-05,
      "loss": 0.8058,
      "step": 509350
    },
    {
      "epoch": 1.7851798112341277,
      "grad_norm": 2.984375,
      "learning_rate": 2.249676134696813e-05,
      "loss": 0.8066,
      "step": 509360
    },
    {
      "epoch": 1.7852148587410235,
      "grad_norm": 2.625,
      "learning_rate": 2.2496112318304427e-05,
      "loss": 0.8883,
      "step": 509370
    },
    {
      "epoch": 1.785249906247919,
      "grad_norm": 2.8125,
      "learning_rate": 2.2495463289640725e-05,
      "loss": 0.7646,
      "step": 509380
    },
    {
      "epoch": 1.7852849537548146,
      "grad_norm": 2.609375,
      "learning_rate": 2.2494814260977023e-05,
      "loss": 0.8728,
      "step": 509390
    },
    {
      "epoch": 1.7853200012617103,
      "grad_norm": 2.5625,
      "learning_rate": 2.249416523231332e-05,
      "loss": 0.7762,
      "step": 509400
    },
    {
      "epoch": 1.7853550487686058,
      "grad_norm": 2.5,
      "learning_rate": 2.249351620364962e-05,
      "loss": 0.8328,
      "step": 509410
    },
    {
      "epoch": 1.7853900962755014,
      "grad_norm": 2.640625,
      "learning_rate": 2.2492867174985917e-05,
      "loss": 0.7649,
      "step": 509420
    },
    {
      "epoch": 1.7854251437823971,
      "grad_norm": 3.484375,
      "learning_rate": 2.2492218146322218e-05,
      "loss": 0.8694,
      "step": 509430
    },
    {
      "epoch": 1.7854601912892927,
      "grad_norm": 3.0625,
      "learning_rate": 2.2491569117658516e-05,
      "loss": 0.8865,
      "step": 509440
    },
    {
      "epoch": 1.7854952387961882,
      "grad_norm": 2.71875,
      "learning_rate": 2.249092008899481e-05,
      "loss": 0.8062,
      "step": 509450
    },
    {
      "epoch": 1.785530286303084,
      "grad_norm": 2.890625,
      "learning_rate": 2.249027106033111e-05,
      "loss": 0.8774,
      "step": 509460
    },
    {
      "epoch": 1.7855653338099793,
      "grad_norm": 2.78125,
      "learning_rate": 2.2489622031667407e-05,
      "loss": 0.8448,
      "step": 509470
    },
    {
      "epoch": 1.785600381316875,
      "grad_norm": 3.109375,
      "learning_rate": 2.2488973003003705e-05,
      "loss": 0.8126,
      "step": 509480
    },
    {
      "epoch": 1.7856354288237706,
      "grad_norm": 2.921875,
      "learning_rate": 2.2488323974340003e-05,
      "loss": 0.7706,
      "step": 509490
    },
    {
      "epoch": 1.7856704763306661,
      "grad_norm": 3.421875,
      "learning_rate": 2.24876749456763e-05,
      "loss": 0.8063,
      "step": 509500
    },
    {
      "epoch": 1.7857055238375619,
      "grad_norm": 2.796875,
      "learning_rate": 2.24870259170126e-05,
      "loss": 0.848,
      "step": 509510
    },
    {
      "epoch": 1.7857405713444574,
      "grad_norm": 3.1875,
      "learning_rate": 2.2486376888348897e-05,
      "loss": 0.8097,
      "step": 509520
    },
    {
      "epoch": 1.785775618851353,
      "grad_norm": 2.875,
      "learning_rate": 2.2485727859685195e-05,
      "loss": 0.8269,
      "step": 509530
    },
    {
      "epoch": 1.7858106663582487,
      "grad_norm": 3.1875,
      "learning_rate": 2.2485078831021496e-05,
      "loss": 0.8744,
      "step": 509540
    },
    {
      "epoch": 1.7858457138651442,
      "grad_norm": 3.65625,
      "learning_rate": 2.2484429802357794e-05,
      "loss": 0.8583,
      "step": 509550
    },
    {
      "epoch": 1.7858807613720398,
      "grad_norm": 2.421875,
      "learning_rate": 2.2483780773694092e-05,
      "loss": 0.9079,
      "step": 509560
    },
    {
      "epoch": 1.7859158088789355,
      "grad_norm": 2.609375,
      "learning_rate": 2.248313174503039e-05,
      "loss": 0.7474,
      "step": 509570
    },
    {
      "epoch": 1.7859508563858308,
      "grad_norm": 3.046875,
      "learning_rate": 2.2482482716366688e-05,
      "loss": 0.8264,
      "step": 509580
    },
    {
      "epoch": 1.7859859038927266,
      "grad_norm": 2.71875,
      "learning_rate": 2.2481833687702986e-05,
      "loss": 0.7814,
      "step": 509590
    },
    {
      "epoch": 1.7860209513996221,
      "grad_norm": 2.765625,
      "learning_rate": 2.2481184659039284e-05,
      "loss": 0.9021,
      "step": 509600
    },
    {
      "epoch": 1.7860559989065177,
      "grad_norm": 3.390625,
      "learning_rate": 2.2480535630375582e-05,
      "loss": 0.8733,
      "step": 509610
    },
    {
      "epoch": 1.7860910464134134,
      "grad_norm": 3.296875,
      "learning_rate": 2.247988660171188e-05,
      "loss": 0.862,
      "step": 509620
    },
    {
      "epoch": 1.786126093920309,
      "grad_norm": 3.296875,
      "learning_rate": 2.2479237573048175e-05,
      "loss": 0.8498,
      "step": 509630
    },
    {
      "epoch": 1.7861611414272045,
      "grad_norm": 3.265625,
      "learning_rate": 2.2478588544384473e-05,
      "loss": 0.8303,
      "step": 509640
    },
    {
      "epoch": 1.7861961889341003,
      "grad_norm": 2.640625,
      "learning_rate": 2.247793951572077e-05,
      "loss": 0.836,
      "step": 509650
    },
    {
      "epoch": 1.7862312364409958,
      "grad_norm": 2.453125,
      "learning_rate": 2.2477290487057072e-05,
      "loss": 0.8752,
      "step": 509660
    },
    {
      "epoch": 1.7862662839478913,
      "grad_norm": 2.984375,
      "learning_rate": 2.247664145839337e-05,
      "loss": 0.8059,
      "step": 509670
    },
    {
      "epoch": 1.786301331454787,
      "grad_norm": 2.5625,
      "learning_rate": 2.2475992429729668e-05,
      "loss": 0.8444,
      "step": 509680
    },
    {
      "epoch": 1.7863363789616824,
      "grad_norm": 3.34375,
      "learning_rate": 2.2475343401065966e-05,
      "loss": 0.7841,
      "step": 509690
    },
    {
      "epoch": 1.7863714264685782,
      "grad_norm": 3.03125,
      "learning_rate": 2.2474694372402264e-05,
      "loss": 0.8029,
      "step": 509700
    },
    {
      "epoch": 1.786406473975474,
      "grad_norm": 2.890625,
      "learning_rate": 2.2474045343738562e-05,
      "loss": 0.8321,
      "step": 509710
    },
    {
      "epoch": 1.7864415214823692,
      "grad_norm": 2.828125,
      "learning_rate": 2.247339631507486e-05,
      "loss": 0.8565,
      "step": 509720
    },
    {
      "epoch": 1.786476568989265,
      "grad_norm": 3.171875,
      "learning_rate": 2.2472747286411158e-05,
      "loss": 0.825,
      "step": 509730
    },
    {
      "epoch": 1.7865116164961605,
      "grad_norm": 2.890625,
      "learning_rate": 2.2472098257747456e-05,
      "loss": 0.8456,
      "step": 509740
    },
    {
      "epoch": 1.786546664003056,
      "grad_norm": 3.1875,
      "learning_rate": 2.2471449229083754e-05,
      "loss": 0.7809,
      "step": 509750
    },
    {
      "epoch": 1.7865817115099518,
      "grad_norm": 5.75,
      "learning_rate": 2.2470800200420052e-05,
      "loss": 0.7778,
      "step": 509760
    },
    {
      "epoch": 1.7866167590168474,
      "grad_norm": 3.0,
      "learning_rate": 2.247015117175635e-05,
      "loss": 0.885,
      "step": 509770
    },
    {
      "epoch": 1.786651806523743,
      "grad_norm": 3.15625,
      "learning_rate": 2.2469502143092648e-05,
      "loss": 0.7481,
      "step": 509780
    },
    {
      "epoch": 1.7866868540306386,
      "grad_norm": 2.671875,
      "learning_rate": 2.2468853114428946e-05,
      "loss": 0.7485,
      "step": 509790
    },
    {
      "epoch": 1.786721901537534,
      "grad_norm": 2.953125,
      "learning_rate": 2.2468204085765247e-05,
      "loss": 0.8432,
      "step": 509800
    },
    {
      "epoch": 1.7867569490444297,
      "grad_norm": 2.8125,
      "learning_rate": 2.2467555057101545e-05,
      "loss": 0.8406,
      "step": 509810
    },
    {
      "epoch": 1.7867919965513255,
      "grad_norm": 3.0,
      "learning_rate": 2.246690602843784e-05,
      "loss": 0.7833,
      "step": 509820
    },
    {
      "epoch": 1.7868270440582208,
      "grad_norm": 3.1875,
      "learning_rate": 2.2466256999774138e-05,
      "loss": 0.9227,
      "step": 509830
    },
    {
      "epoch": 1.7868620915651166,
      "grad_norm": 3.0625,
      "learning_rate": 2.2465607971110436e-05,
      "loss": 0.7588,
      "step": 509840
    },
    {
      "epoch": 1.786897139072012,
      "grad_norm": 2.8125,
      "learning_rate": 2.2464958942446734e-05,
      "loss": 0.8434,
      "step": 509850
    },
    {
      "epoch": 1.7869321865789076,
      "grad_norm": 2.421875,
      "learning_rate": 2.2464309913783032e-05,
      "loss": 0.7558,
      "step": 509860
    },
    {
      "epoch": 1.7869672340858034,
      "grad_norm": 2.90625,
      "learning_rate": 2.246366088511933e-05,
      "loss": 0.9098,
      "step": 509870
    },
    {
      "epoch": 1.787002281592699,
      "grad_norm": 2.8125,
      "learning_rate": 2.2463011856455628e-05,
      "loss": 0.8063,
      "step": 509880
    },
    {
      "epoch": 1.7870373290995945,
      "grad_norm": 2.921875,
      "learning_rate": 2.2462362827791926e-05,
      "loss": 0.8218,
      "step": 509890
    },
    {
      "epoch": 1.7870723766064902,
      "grad_norm": 2.90625,
      "learning_rate": 2.2461713799128224e-05,
      "loss": 0.7945,
      "step": 509900
    },
    {
      "epoch": 1.7871074241133857,
      "grad_norm": 2.484375,
      "learning_rate": 2.2461064770464525e-05,
      "loss": 0.8426,
      "step": 509910
    },
    {
      "epoch": 1.7871424716202813,
      "grad_norm": 2.9375,
      "learning_rate": 2.2460415741800823e-05,
      "loss": 0.7708,
      "step": 509920
    },
    {
      "epoch": 1.787177519127177,
      "grad_norm": 2.8125,
      "learning_rate": 2.245976671313712e-05,
      "loss": 0.7964,
      "step": 509930
    },
    {
      "epoch": 1.7872125666340724,
      "grad_norm": 3.0625,
      "learning_rate": 2.245911768447342e-05,
      "loss": 0.912,
      "step": 509940
    },
    {
      "epoch": 1.787247614140968,
      "grad_norm": 2.890625,
      "learning_rate": 2.2458468655809717e-05,
      "loss": 0.7734,
      "step": 509950
    },
    {
      "epoch": 1.7872826616478636,
      "grad_norm": 2.375,
      "learning_rate": 2.2457819627146015e-05,
      "loss": 0.7887,
      "step": 509960
    },
    {
      "epoch": 1.7873177091547592,
      "grad_norm": 3.25,
      "learning_rate": 2.2457170598482313e-05,
      "loss": 0.7952,
      "step": 509970
    },
    {
      "epoch": 1.787352756661655,
      "grad_norm": 2.984375,
      "learning_rate": 2.245652156981861e-05,
      "loss": 0.8374,
      "step": 509980
    },
    {
      "epoch": 1.7873878041685505,
      "grad_norm": 2.796875,
      "learning_rate": 2.245587254115491e-05,
      "loss": 0.809,
      "step": 509990
    },
    {
      "epoch": 1.787422851675446,
      "grad_norm": 2.875,
      "learning_rate": 2.2455223512491207e-05,
      "loss": 0.8231,
      "step": 510000
    },
    {
      "epoch": 1.787422851675446,
      "eval_loss": 0.7799898386001587,
      "eval_runtime": 564.8168,
      "eval_samples_per_second": 673.556,
      "eval_steps_per_second": 56.13,
      "step": 510000
    },
    {
      "epoch": 1.7874578991823418,
      "grad_norm": 3.125,
      "learning_rate": 2.2454574483827502e-05,
      "loss": 0.8052,
      "step": 510010
    },
    {
      "epoch": 1.7874929466892373,
      "grad_norm": 2.75,
      "learning_rate": 2.24539254551638e-05,
      "loss": 0.8008,
      "step": 510020
    },
    {
      "epoch": 1.7875279941961328,
      "grad_norm": 2.65625,
      "learning_rate": 2.24532764265001e-05,
      "loss": 0.8525,
      "step": 510030
    },
    {
      "epoch": 1.7875630417030286,
      "grad_norm": 2.875,
      "learning_rate": 2.24526273978364e-05,
      "loss": 0.8036,
      "step": 510040
    },
    {
      "epoch": 1.787598089209924,
      "grad_norm": 3.09375,
      "learning_rate": 2.2451978369172697e-05,
      "loss": 0.8498,
      "step": 510050
    },
    {
      "epoch": 1.7876331367168197,
      "grad_norm": 2.984375,
      "learning_rate": 2.2451329340508995e-05,
      "loss": 0.8349,
      "step": 510060
    },
    {
      "epoch": 1.7876681842237152,
      "grad_norm": 2.796875,
      "learning_rate": 2.2450680311845293e-05,
      "loss": 0.8729,
      "step": 510070
    },
    {
      "epoch": 1.7877032317306107,
      "grad_norm": 2.53125,
      "learning_rate": 2.245003128318159e-05,
      "loss": 0.8589,
      "step": 510080
    },
    {
      "epoch": 1.7877382792375065,
      "grad_norm": 2.4375,
      "learning_rate": 2.244938225451789e-05,
      "loss": 0.8185,
      "step": 510090
    },
    {
      "epoch": 1.787773326744402,
      "grad_norm": 3.09375,
      "learning_rate": 2.2448733225854187e-05,
      "loss": 0.8779,
      "step": 510100
    },
    {
      "epoch": 1.7878083742512976,
      "grad_norm": 2.859375,
      "learning_rate": 2.2448084197190485e-05,
      "loss": 0.8202,
      "step": 510110
    },
    {
      "epoch": 1.7878434217581933,
      "grad_norm": 3.078125,
      "learning_rate": 2.2447435168526783e-05,
      "loss": 0.8491,
      "step": 510120
    },
    {
      "epoch": 1.7878784692650889,
      "grad_norm": 2.9375,
      "learning_rate": 2.244678613986308e-05,
      "loss": 0.8337,
      "step": 510130
    },
    {
      "epoch": 1.7879135167719844,
      "grad_norm": 3.25,
      "learning_rate": 2.244613711119938e-05,
      "loss": 0.7953,
      "step": 510140
    },
    {
      "epoch": 1.7879485642788802,
      "grad_norm": 3.375,
      "learning_rate": 2.2445488082535677e-05,
      "loss": 0.8045,
      "step": 510150
    },
    {
      "epoch": 1.7879836117857755,
      "grad_norm": 2.5625,
      "learning_rate": 2.244483905387198e-05,
      "loss": 0.8042,
      "step": 510160
    },
    {
      "epoch": 1.7880186592926712,
      "grad_norm": 2.75,
      "learning_rate": 2.2444190025208277e-05,
      "loss": 0.84,
      "step": 510170
    },
    {
      "epoch": 1.7880537067995668,
      "grad_norm": 2.84375,
      "learning_rate": 2.2443540996544575e-05,
      "loss": 0.8631,
      "step": 510180
    },
    {
      "epoch": 1.7880887543064623,
      "grad_norm": 3.015625,
      "learning_rate": 2.2442891967880873e-05,
      "loss": 0.8963,
      "step": 510190
    },
    {
      "epoch": 1.788123801813358,
      "grad_norm": 2.8125,
      "learning_rate": 2.2442242939217167e-05,
      "loss": 0.8337,
      "step": 510200
    },
    {
      "epoch": 1.7881588493202536,
      "grad_norm": 2.65625,
      "learning_rate": 2.2441593910553465e-05,
      "loss": 0.8134,
      "step": 510210
    },
    {
      "epoch": 1.7881938968271491,
      "grad_norm": 2.890625,
      "learning_rate": 2.2440944881889763e-05,
      "loss": 0.7971,
      "step": 510220
    },
    {
      "epoch": 1.788228944334045,
      "grad_norm": 2.375,
      "learning_rate": 2.244029585322606e-05,
      "loss": 0.842,
      "step": 510230
    },
    {
      "epoch": 1.7882639918409404,
      "grad_norm": 2.453125,
      "learning_rate": 2.243964682456236e-05,
      "loss": 0.7678,
      "step": 510240
    },
    {
      "epoch": 1.788299039347836,
      "grad_norm": 2.984375,
      "learning_rate": 2.2438997795898657e-05,
      "loss": 0.8488,
      "step": 510250
    },
    {
      "epoch": 1.7883340868547317,
      "grad_norm": 3.0,
      "learning_rate": 2.2438348767234955e-05,
      "loss": 0.8303,
      "step": 510260
    },
    {
      "epoch": 1.788369134361627,
      "grad_norm": 2.578125,
      "learning_rate": 2.2437699738571253e-05,
      "loss": 0.8615,
      "step": 510270
    },
    {
      "epoch": 1.7884041818685228,
      "grad_norm": 2.75,
      "learning_rate": 2.2437050709907555e-05,
      "loss": 0.8121,
      "step": 510280
    },
    {
      "epoch": 1.7884392293754183,
      "grad_norm": 3.078125,
      "learning_rate": 2.2436401681243853e-05,
      "loss": 0.8713,
      "step": 510290
    },
    {
      "epoch": 1.7884742768823139,
      "grad_norm": 3.21875,
      "learning_rate": 2.243575265258015e-05,
      "loss": 0.9042,
      "step": 510300
    },
    {
      "epoch": 1.7885093243892096,
      "grad_norm": 2.828125,
      "learning_rate": 2.243510362391645e-05,
      "loss": 0.7492,
      "step": 510310
    },
    {
      "epoch": 1.7885443718961052,
      "grad_norm": 3.171875,
      "learning_rate": 2.2434454595252747e-05,
      "loss": 0.9202,
      "step": 510320
    },
    {
      "epoch": 1.7885794194030007,
      "grad_norm": 3.0,
      "learning_rate": 2.2433805566589045e-05,
      "loss": 0.8508,
      "step": 510330
    },
    {
      "epoch": 1.7886144669098964,
      "grad_norm": 2.8125,
      "learning_rate": 2.2433156537925343e-05,
      "loss": 0.7723,
      "step": 510340
    },
    {
      "epoch": 1.788649514416792,
      "grad_norm": 2.921875,
      "learning_rate": 2.243250750926164e-05,
      "loss": 0.8076,
      "step": 510350
    },
    {
      "epoch": 1.7886845619236875,
      "grad_norm": 2.734375,
      "learning_rate": 2.243185848059794e-05,
      "loss": 0.7872,
      "step": 510360
    },
    {
      "epoch": 1.7887196094305833,
      "grad_norm": 2.9375,
      "learning_rate": 2.2431209451934237e-05,
      "loss": 0.8719,
      "step": 510370
    },
    {
      "epoch": 1.7887546569374786,
      "grad_norm": 2.859375,
      "learning_rate": 2.2430560423270535e-05,
      "loss": 0.8474,
      "step": 510380
    },
    {
      "epoch": 1.7887897044443744,
      "grad_norm": 2.90625,
      "learning_rate": 2.2429911394606833e-05,
      "loss": 0.843,
      "step": 510390
    },
    {
      "epoch": 1.78882475195127,
      "grad_norm": 3.03125,
      "learning_rate": 2.242926236594313e-05,
      "loss": 0.858,
      "step": 510400
    },
    {
      "epoch": 1.7888597994581654,
      "grad_norm": 3.109375,
      "learning_rate": 2.242861333727943e-05,
      "loss": 0.7822,
      "step": 510410
    },
    {
      "epoch": 1.7888948469650612,
      "grad_norm": 2.875,
      "learning_rate": 2.2427964308615727e-05,
      "loss": 0.8206,
      "step": 510420
    },
    {
      "epoch": 1.7889298944719567,
      "grad_norm": 3.03125,
      "learning_rate": 2.2427315279952025e-05,
      "loss": 0.7789,
      "step": 510430
    },
    {
      "epoch": 1.7889649419788523,
      "grad_norm": 3.03125,
      "learning_rate": 2.2426666251288323e-05,
      "loss": 0.8535,
      "step": 510440
    },
    {
      "epoch": 1.788999989485748,
      "grad_norm": 2.890625,
      "learning_rate": 2.242601722262462e-05,
      "loss": 0.8193,
      "step": 510450
    },
    {
      "epoch": 1.7890350369926435,
      "grad_norm": 2.734375,
      "learning_rate": 2.242536819396092e-05,
      "loss": 0.7877,
      "step": 510460
    },
    {
      "epoch": 1.789070084499539,
      "grad_norm": 3.1875,
      "learning_rate": 2.2424719165297217e-05,
      "loss": 0.8381,
      "step": 510470
    },
    {
      "epoch": 1.7891051320064348,
      "grad_norm": 2.984375,
      "learning_rate": 2.2424070136633515e-05,
      "loss": 0.8972,
      "step": 510480
    },
    {
      "epoch": 1.7891401795133302,
      "grad_norm": 2.625,
      "learning_rate": 2.2423421107969813e-05,
      "loss": 0.7058,
      "step": 510490
    },
    {
      "epoch": 1.789175227020226,
      "grad_norm": 2.578125,
      "learning_rate": 2.242277207930611e-05,
      "loss": 0.7692,
      "step": 510500
    },
    {
      "epoch": 1.7892102745271217,
      "grad_norm": 2.796875,
      "learning_rate": 2.242212305064241e-05,
      "loss": 0.7393,
      "step": 510510
    },
    {
      "epoch": 1.789245322034017,
      "grad_norm": 2.625,
      "learning_rate": 2.2421474021978707e-05,
      "loss": 0.8093,
      "step": 510520
    },
    {
      "epoch": 1.7892803695409127,
      "grad_norm": 2.875,
      "learning_rate": 2.2420824993315008e-05,
      "loss": 0.8944,
      "step": 510530
    },
    {
      "epoch": 1.7893154170478083,
      "grad_norm": 3.0,
      "learning_rate": 2.2420175964651306e-05,
      "loss": 0.8467,
      "step": 510540
    },
    {
      "epoch": 1.7893504645547038,
      "grad_norm": 2.828125,
      "learning_rate": 2.2419526935987604e-05,
      "loss": 0.8281,
      "step": 510550
    },
    {
      "epoch": 1.7893855120615996,
      "grad_norm": 2.796875,
      "learning_rate": 2.2418877907323902e-05,
      "loss": 0.7433,
      "step": 510560
    },
    {
      "epoch": 1.789420559568495,
      "grad_norm": 3.109375,
      "learning_rate": 2.24182288786602e-05,
      "loss": 0.7405,
      "step": 510570
    },
    {
      "epoch": 1.7894556070753906,
      "grad_norm": 3.03125,
      "learning_rate": 2.2417579849996495e-05,
      "loss": 0.8348,
      "step": 510580
    },
    {
      "epoch": 1.7894906545822864,
      "grad_norm": 3.03125,
      "learning_rate": 2.2416930821332793e-05,
      "loss": 0.8794,
      "step": 510590
    },
    {
      "epoch": 1.789525702089182,
      "grad_norm": 4.03125,
      "learning_rate": 2.241628179266909e-05,
      "loss": 0.8351,
      "step": 510600
    },
    {
      "epoch": 1.7895607495960775,
      "grad_norm": 2.6875,
      "learning_rate": 2.241563276400539e-05,
      "loss": 0.8169,
      "step": 510610
    },
    {
      "epoch": 1.7895957971029732,
      "grad_norm": 2.875,
      "learning_rate": 2.2414983735341687e-05,
      "loss": 0.8081,
      "step": 510620
    },
    {
      "epoch": 1.7896308446098685,
      "grad_norm": 3.125,
      "learning_rate": 2.2414334706677985e-05,
      "loss": 0.8299,
      "step": 510630
    },
    {
      "epoch": 1.7896658921167643,
      "grad_norm": 2.8125,
      "learning_rate": 2.2413685678014286e-05,
      "loss": 0.893,
      "step": 510640
    },
    {
      "epoch": 1.7897009396236598,
      "grad_norm": 2.71875,
      "learning_rate": 2.2413036649350584e-05,
      "loss": 0.7621,
      "step": 510650
    },
    {
      "epoch": 1.7897359871305554,
      "grad_norm": 3.09375,
      "learning_rate": 2.2412387620686882e-05,
      "loss": 0.8684,
      "step": 510660
    },
    {
      "epoch": 1.7897710346374511,
      "grad_norm": 3.21875,
      "learning_rate": 2.241173859202318e-05,
      "loss": 0.8668,
      "step": 510670
    },
    {
      "epoch": 1.7898060821443467,
      "grad_norm": 2.8125,
      "learning_rate": 2.2411089563359478e-05,
      "loss": 0.7939,
      "step": 510680
    },
    {
      "epoch": 1.7898411296512422,
      "grad_norm": 2.796875,
      "learning_rate": 2.2410440534695776e-05,
      "loss": 0.8265,
      "step": 510690
    },
    {
      "epoch": 1.789876177158138,
      "grad_norm": 3.3125,
      "learning_rate": 2.2409791506032074e-05,
      "loss": 0.8438,
      "step": 510700
    },
    {
      "epoch": 1.7899112246650335,
      "grad_norm": 3.140625,
      "learning_rate": 2.2409142477368372e-05,
      "loss": 0.9113,
      "step": 510710
    },
    {
      "epoch": 1.789946272171929,
      "grad_norm": 2.921875,
      "learning_rate": 2.240849344870467e-05,
      "loss": 0.7045,
      "step": 510720
    },
    {
      "epoch": 1.7899813196788248,
      "grad_norm": 2.703125,
      "learning_rate": 2.2407844420040968e-05,
      "loss": 0.8749,
      "step": 510730
    },
    {
      "epoch": 1.79001636718572,
      "grad_norm": 2.78125,
      "learning_rate": 2.2407195391377266e-05,
      "loss": 0.8005,
      "step": 510740
    },
    {
      "epoch": 1.7900514146926159,
      "grad_norm": 2.65625,
      "learning_rate": 2.2406546362713564e-05,
      "loss": 0.7573,
      "step": 510750
    },
    {
      "epoch": 1.7900864621995114,
      "grad_norm": 2.984375,
      "learning_rate": 2.2405897334049862e-05,
      "loss": 0.8938,
      "step": 510760
    },
    {
      "epoch": 1.790121509706407,
      "grad_norm": 3.21875,
      "learning_rate": 2.240524830538616e-05,
      "loss": 0.8695,
      "step": 510770
    },
    {
      "epoch": 1.7901565572133027,
      "grad_norm": 2.75,
      "learning_rate": 2.2404599276722458e-05,
      "loss": 0.8296,
      "step": 510780
    },
    {
      "epoch": 1.7901916047201982,
      "grad_norm": 2.703125,
      "learning_rate": 2.2403950248058756e-05,
      "loss": 0.7341,
      "step": 510790
    },
    {
      "epoch": 1.7902266522270938,
      "grad_norm": 3.03125,
      "learning_rate": 2.2403301219395054e-05,
      "loss": 0.9021,
      "step": 510800
    },
    {
      "epoch": 1.7902616997339895,
      "grad_norm": 2.8125,
      "learning_rate": 2.2402652190731352e-05,
      "loss": 0.8271,
      "step": 510810
    },
    {
      "epoch": 1.790296747240885,
      "grad_norm": 3.359375,
      "learning_rate": 2.240200316206765e-05,
      "loss": 0.8109,
      "step": 510820
    },
    {
      "epoch": 1.7903317947477806,
      "grad_norm": 2.5,
      "learning_rate": 2.2401354133403948e-05,
      "loss": 0.8394,
      "step": 510830
    },
    {
      "epoch": 1.7903668422546763,
      "grad_norm": 2.859375,
      "learning_rate": 2.2400705104740246e-05,
      "loss": 0.8389,
      "step": 510840
    },
    {
      "epoch": 1.7904018897615717,
      "grad_norm": 3.015625,
      "learning_rate": 2.2400056076076544e-05,
      "loss": 0.8589,
      "step": 510850
    },
    {
      "epoch": 1.7904369372684674,
      "grad_norm": 2.96875,
      "learning_rate": 2.2399407047412842e-05,
      "loss": 0.7981,
      "step": 510860
    },
    {
      "epoch": 1.790471984775363,
      "grad_norm": 3.265625,
      "learning_rate": 2.239875801874914e-05,
      "loss": 0.8304,
      "step": 510870
    },
    {
      "epoch": 1.7905070322822585,
      "grad_norm": 2.1875,
      "learning_rate": 2.2398108990085438e-05,
      "loss": 0.8337,
      "step": 510880
    },
    {
      "epoch": 1.7905420797891543,
      "grad_norm": 2.890625,
      "learning_rate": 2.2397459961421736e-05,
      "loss": 0.7464,
      "step": 510890
    },
    {
      "epoch": 1.7905771272960498,
      "grad_norm": 2.890625,
      "learning_rate": 2.2396810932758038e-05,
      "loss": 0.8959,
      "step": 510900
    },
    {
      "epoch": 1.7906121748029453,
      "grad_norm": 2.78125,
      "learning_rate": 2.2396161904094336e-05,
      "loss": 0.8504,
      "step": 510910
    },
    {
      "epoch": 1.790647222309841,
      "grad_norm": 2.703125,
      "learning_rate": 2.2395512875430634e-05,
      "loss": 0.8382,
      "step": 510920
    },
    {
      "epoch": 1.7906822698167366,
      "grad_norm": 2.53125,
      "learning_rate": 2.239486384676693e-05,
      "loss": 0.8956,
      "step": 510930
    },
    {
      "epoch": 1.7907173173236322,
      "grad_norm": 2.765625,
      "learning_rate": 2.239421481810323e-05,
      "loss": 0.8743,
      "step": 510940
    },
    {
      "epoch": 1.790752364830528,
      "grad_norm": 2.875,
      "learning_rate": 2.2393565789439524e-05,
      "loss": 0.8505,
      "step": 510950
    },
    {
      "epoch": 1.7907874123374232,
      "grad_norm": 2.765625,
      "learning_rate": 2.2392916760775822e-05,
      "loss": 0.8753,
      "step": 510960
    },
    {
      "epoch": 1.790822459844319,
      "grad_norm": 3.515625,
      "learning_rate": 2.239226773211212e-05,
      "loss": 0.8126,
      "step": 510970
    },
    {
      "epoch": 1.7908575073512145,
      "grad_norm": 2.796875,
      "learning_rate": 2.2391618703448418e-05,
      "loss": 0.8639,
      "step": 510980
    },
    {
      "epoch": 1.79089255485811,
      "grad_norm": 2.890625,
      "learning_rate": 2.2390969674784716e-05,
      "loss": 0.8425,
      "step": 510990
    },
    {
      "epoch": 1.7909276023650058,
      "grad_norm": 3.0625,
      "learning_rate": 2.2390320646121014e-05,
      "loss": 0.8653,
      "step": 511000
    },
    {
      "epoch": 1.7909626498719013,
      "grad_norm": 2.953125,
      "learning_rate": 2.2389671617457316e-05,
      "loss": 0.8356,
      "step": 511010
    },
    {
      "epoch": 1.7909976973787969,
      "grad_norm": 3.046875,
      "learning_rate": 2.2389022588793614e-05,
      "loss": 0.7812,
      "step": 511020
    },
    {
      "epoch": 1.7910327448856926,
      "grad_norm": 2.9375,
      "learning_rate": 2.238837356012991e-05,
      "loss": 0.7983,
      "step": 511030
    },
    {
      "epoch": 1.7910677923925882,
      "grad_norm": 2.71875,
      "learning_rate": 2.238772453146621e-05,
      "loss": 0.7786,
      "step": 511040
    },
    {
      "epoch": 1.7911028398994837,
      "grad_norm": 2.546875,
      "learning_rate": 2.2387075502802508e-05,
      "loss": 0.8383,
      "step": 511050
    },
    {
      "epoch": 1.7911378874063795,
      "grad_norm": 2.5,
      "learning_rate": 2.2386426474138806e-05,
      "loss": 0.8619,
      "step": 511060
    },
    {
      "epoch": 1.7911729349132748,
      "grad_norm": 3.15625,
      "learning_rate": 2.2385777445475104e-05,
      "loss": 0.8886,
      "step": 511070
    },
    {
      "epoch": 1.7912079824201705,
      "grad_norm": 2.90625,
      "learning_rate": 2.23851284168114e-05,
      "loss": 0.7394,
      "step": 511080
    },
    {
      "epoch": 1.7912430299270663,
      "grad_norm": 3.140625,
      "learning_rate": 2.23844793881477e-05,
      "loss": 0.7959,
      "step": 511090
    },
    {
      "epoch": 1.7912780774339616,
      "grad_norm": 2.6875,
      "learning_rate": 2.2383830359483998e-05,
      "loss": 0.8445,
      "step": 511100
    },
    {
      "epoch": 1.7913131249408574,
      "grad_norm": 2.734375,
      "learning_rate": 2.2383181330820296e-05,
      "loss": 0.8002,
      "step": 511110
    },
    {
      "epoch": 1.791348172447753,
      "grad_norm": 2.546875,
      "learning_rate": 2.2382532302156594e-05,
      "loss": 0.8112,
      "step": 511120
    },
    {
      "epoch": 1.7913832199546484,
      "grad_norm": 3.5625,
      "learning_rate": 2.238188327349289e-05,
      "loss": 0.9823,
      "step": 511130
    },
    {
      "epoch": 1.7914182674615442,
      "grad_norm": 3.328125,
      "learning_rate": 2.238123424482919e-05,
      "loss": 0.7932,
      "step": 511140
    },
    {
      "epoch": 1.7914533149684397,
      "grad_norm": 2.953125,
      "learning_rate": 2.2380585216165488e-05,
      "loss": 0.8832,
      "step": 511150
    },
    {
      "epoch": 1.7914883624753353,
      "grad_norm": 2.890625,
      "learning_rate": 2.2379936187501786e-05,
      "loss": 0.84,
      "step": 511160
    },
    {
      "epoch": 1.791523409982231,
      "grad_norm": 2.796875,
      "learning_rate": 2.2379287158838084e-05,
      "loss": 0.8741,
      "step": 511170
    },
    {
      "epoch": 1.7915584574891266,
      "grad_norm": 2.484375,
      "learning_rate": 2.237863813017438e-05,
      "loss": 0.8612,
      "step": 511180
    },
    {
      "epoch": 1.791593504996022,
      "grad_norm": 2.71875,
      "learning_rate": 2.237798910151068e-05,
      "loss": 0.8668,
      "step": 511190
    },
    {
      "epoch": 1.7916285525029179,
      "grad_norm": 3.0625,
      "learning_rate": 2.2377340072846978e-05,
      "loss": 0.869,
      "step": 511200
    },
    {
      "epoch": 1.7916636000098132,
      "grad_norm": 2.84375,
      "learning_rate": 2.2376691044183276e-05,
      "loss": 0.8384,
      "step": 511210
    },
    {
      "epoch": 1.791698647516709,
      "grad_norm": 3.0,
      "learning_rate": 2.2376042015519574e-05,
      "loss": 0.8201,
      "step": 511220
    },
    {
      "epoch": 1.7917336950236045,
      "grad_norm": 3.046875,
      "learning_rate": 2.237539298685587e-05,
      "loss": 0.9379,
      "step": 511230
    },
    {
      "epoch": 1.7917687425305,
      "grad_norm": 2.5,
      "learning_rate": 2.237474395819217e-05,
      "loss": 0.7049,
      "step": 511240
    },
    {
      "epoch": 1.7918037900373958,
      "grad_norm": 3.015625,
      "learning_rate": 2.2374094929528468e-05,
      "loss": 0.8492,
      "step": 511250
    },
    {
      "epoch": 1.7918388375442913,
      "grad_norm": 3.0625,
      "learning_rate": 2.237344590086477e-05,
      "loss": 0.8011,
      "step": 511260
    },
    {
      "epoch": 1.7918738850511868,
      "grad_norm": 3.28125,
      "learning_rate": 2.2372796872201067e-05,
      "loss": 0.892,
      "step": 511270
    },
    {
      "epoch": 1.7919089325580826,
      "grad_norm": 2.828125,
      "learning_rate": 2.2372147843537365e-05,
      "loss": 0.8051,
      "step": 511280
    },
    {
      "epoch": 1.7919439800649781,
      "grad_norm": 2.5,
      "learning_rate": 2.2371498814873663e-05,
      "loss": 0.7524,
      "step": 511290
    },
    {
      "epoch": 1.7919790275718737,
      "grad_norm": 2.84375,
      "learning_rate": 2.237084978620996e-05,
      "loss": 0.8506,
      "step": 511300
    },
    {
      "epoch": 1.7920140750787694,
      "grad_norm": 3.0,
      "learning_rate": 2.237020075754626e-05,
      "loss": 0.8222,
      "step": 511310
    },
    {
      "epoch": 1.7920491225856647,
      "grad_norm": 2.765625,
      "learning_rate": 2.2369551728882557e-05,
      "loss": 0.8032,
      "step": 511320
    },
    {
      "epoch": 1.7920841700925605,
      "grad_norm": 2.703125,
      "learning_rate": 2.236890270021885e-05,
      "loss": 0.8571,
      "step": 511330
    },
    {
      "epoch": 1.792119217599456,
      "grad_norm": 2.875,
      "learning_rate": 2.236825367155515e-05,
      "loss": 0.7959,
      "step": 511340
    },
    {
      "epoch": 1.7921542651063516,
      "grad_norm": 2.90625,
      "learning_rate": 2.2367604642891448e-05,
      "loss": 0.8337,
      "step": 511350
    },
    {
      "epoch": 1.7921893126132473,
      "grad_norm": 2.90625,
      "learning_rate": 2.2366955614227746e-05,
      "loss": 0.7852,
      "step": 511360
    },
    {
      "epoch": 1.7922243601201429,
      "grad_norm": 3.359375,
      "learning_rate": 2.2366306585564044e-05,
      "loss": 0.8693,
      "step": 511370
    },
    {
      "epoch": 1.7922594076270384,
      "grad_norm": 2.734375,
      "learning_rate": 2.2365657556900345e-05,
      "loss": 0.8059,
      "step": 511380
    },
    {
      "epoch": 1.7922944551339342,
      "grad_norm": 3.0,
      "learning_rate": 2.2365008528236643e-05,
      "loss": 0.8045,
      "step": 511390
    },
    {
      "epoch": 1.7923295026408297,
      "grad_norm": 2.859375,
      "learning_rate": 2.236435949957294e-05,
      "loss": 0.8646,
      "step": 511400
    },
    {
      "epoch": 1.7923645501477252,
      "grad_norm": 3.0625,
      "learning_rate": 2.236371047090924e-05,
      "loss": 0.7579,
      "step": 511410
    },
    {
      "epoch": 1.792399597654621,
      "grad_norm": 3.234375,
      "learning_rate": 2.2363061442245537e-05,
      "loss": 0.8666,
      "step": 511420
    },
    {
      "epoch": 1.7924346451615163,
      "grad_norm": 2.78125,
      "learning_rate": 2.2362412413581835e-05,
      "loss": 0.8922,
      "step": 511430
    },
    {
      "epoch": 1.792469692668412,
      "grad_norm": 2.71875,
      "learning_rate": 2.2361763384918133e-05,
      "loss": 0.8297,
      "step": 511440
    },
    {
      "epoch": 1.7925047401753076,
      "grad_norm": 3.625,
      "learning_rate": 2.236111435625443e-05,
      "loss": 0.8673,
      "step": 511450
    },
    {
      "epoch": 1.7925397876822031,
      "grad_norm": 3.15625,
      "learning_rate": 2.236046532759073e-05,
      "loss": 0.8366,
      "step": 511460
    },
    {
      "epoch": 1.7925748351890989,
      "grad_norm": 2.890625,
      "learning_rate": 2.2359816298927027e-05,
      "loss": 0.7797,
      "step": 511470
    },
    {
      "epoch": 1.7926098826959944,
      "grad_norm": 3.265625,
      "learning_rate": 2.2359167270263325e-05,
      "loss": 0.846,
      "step": 511480
    },
    {
      "epoch": 1.79264493020289,
      "grad_norm": 2.625,
      "learning_rate": 2.2358518241599623e-05,
      "loss": 0.8098,
      "step": 511490
    },
    {
      "epoch": 1.7926799777097857,
      "grad_norm": 2.703125,
      "learning_rate": 2.235786921293592e-05,
      "loss": 0.8072,
      "step": 511500
    },
    {
      "epoch": 1.7927150252166812,
      "grad_norm": 3.34375,
      "learning_rate": 2.235722018427222e-05,
      "loss": 0.9158,
      "step": 511510
    },
    {
      "epoch": 1.7927500727235768,
      "grad_norm": 2.421875,
      "learning_rate": 2.2356571155608517e-05,
      "loss": 0.8083,
      "step": 511520
    },
    {
      "epoch": 1.7927851202304725,
      "grad_norm": 2.546875,
      "learning_rate": 2.2355922126944815e-05,
      "loss": 0.857,
      "step": 511530
    },
    {
      "epoch": 1.7928201677373679,
      "grad_norm": 3.1875,
      "learning_rate": 2.2355273098281113e-05,
      "loss": 0.8847,
      "step": 511540
    },
    {
      "epoch": 1.7928552152442636,
      "grad_norm": 2.953125,
      "learning_rate": 2.235462406961741e-05,
      "loss": 0.845,
      "step": 511550
    },
    {
      "epoch": 1.7928902627511591,
      "grad_norm": 2.703125,
      "learning_rate": 2.235397504095371e-05,
      "loss": 0.8297,
      "step": 511560
    },
    {
      "epoch": 1.7929253102580547,
      "grad_norm": 2.78125,
      "learning_rate": 2.2353326012290007e-05,
      "loss": 0.7634,
      "step": 511570
    },
    {
      "epoch": 1.7929603577649504,
      "grad_norm": 2.984375,
      "learning_rate": 2.2352676983626305e-05,
      "loss": 0.8246,
      "step": 511580
    },
    {
      "epoch": 1.792995405271846,
      "grad_norm": 2.265625,
      "learning_rate": 2.2352027954962603e-05,
      "loss": 0.7255,
      "step": 511590
    },
    {
      "epoch": 1.7930304527787415,
      "grad_norm": 3.265625,
      "learning_rate": 2.23513789262989e-05,
      "loss": 0.8384,
      "step": 511600
    },
    {
      "epoch": 1.7930655002856373,
      "grad_norm": 3.15625,
      "learning_rate": 2.23507298976352e-05,
      "loss": 0.8608,
      "step": 511610
    },
    {
      "epoch": 1.7931005477925328,
      "grad_norm": 2.921875,
      "learning_rate": 2.2350080868971497e-05,
      "loss": 0.826,
      "step": 511620
    },
    {
      "epoch": 1.7931355952994283,
      "grad_norm": 3.0625,
      "learning_rate": 2.23494318403078e-05,
      "loss": 0.7873,
      "step": 511630
    },
    {
      "epoch": 1.793170642806324,
      "grad_norm": 2.890625,
      "learning_rate": 2.2348782811644096e-05,
      "loss": 0.7963,
      "step": 511640
    },
    {
      "epoch": 1.7932056903132194,
      "grad_norm": 3.140625,
      "learning_rate": 2.2348133782980394e-05,
      "loss": 0.9189,
      "step": 511650
    },
    {
      "epoch": 1.7932407378201152,
      "grad_norm": 3.1875,
      "learning_rate": 2.2347484754316692e-05,
      "loss": 0.8874,
      "step": 511660
    },
    {
      "epoch": 1.793275785327011,
      "grad_norm": 2.828125,
      "learning_rate": 2.234683572565299e-05,
      "loss": 0.8049,
      "step": 511670
    },
    {
      "epoch": 1.7933108328339062,
      "grad_norm": 2.484375,
      "learning_rate": 2.234618669698929e-05,
      "loss": 0.7151,
      "step": 511680
    },
    {
      "epoch": 1.793345880340802,
      "grad_norm": 2.859375,
      "learning_rate": 2.2345537668325586e-05,
      "loss": 0.8499,
      "step": 511690
    },
    {
      "epoch": 1.7933809278476975,
      "grad_norm": 2.6875,
      "learning_rate": 2.234488863966188e-05,
      "loss": 0.7963,
      "step": 511700
    },
    {
      "epoch": 1.793415975354593,
      "grad_norm": 2.953125,
      "learning_rate": 2.234423961099818e-05,
      "loss": 0.787,
      "step": 511710
    },
    {
      "epoch": 1.7934510228614888,
      "grad_norm": 2.921875,
      "learning_rate": 2.2343590582334477e-05,
      "loss": 0.8207,
      "step": 511720
    },
    {
      "epoch": 1.7934860703683844,
      "grad_norm": 2.765625,
      "learning_rate": 2.2342941553670775e-05,
      "loss": 0.8996,
      "step": 511730
    },
    {
      "epoch": 1.79352111787528,
      "grad_norm": 2.34375,
      "learning_rate": 2.2342292525007076e-05,
      "loss": 0.8537,
      "step": 511740
    },
    {
      "epoch": 1.7935561653821757,
      "grad_norm": 3.203125,
      "learning_rate": 2.2341643496343374e-05,
      "loss": 0.8626,
      "step": 511750
    },
    {
      "epoch": 1.793591212889071,
      "grad_norm": 2.90625,
      "learning_rate": 2.2340994467679672e-05,
      "loss": 0.8389,
      "step": 511760
    },
    {
      "epoch": 1.7936262603959667,
      "grad_norm": 2.625,
      "learning_rate": 2.234034543901597e-05,
      "loss": 0.8667,
      "step": 511770
    },
    {
      "epoch": 1.7936613079028625,
      "grad_norm": 2.765625,
      "learning_rate": 2.233969641035227e-05,
      "loss": 0.8512,
      "step": 511780
    },
    {
      "epoch": 1.7936963554097578,
      "grad_norm": 3.15625,
      "learning_rate": 2.2339047381688566e-05,
      "loss": 0.8517,
      "step": 511790
    },
    {
      "epoch": 1.7937314029166536,
      "grad_norm": 2.75,
      "learning_rate": 2.2338398353024864e-05,
      "loss": 0.8044,
      "step": 511800
    },
    {
      "epoch": 1.793766450423549,
      "grad_norm": 2.828125,
      "learning_rate": 2.2337749324361162e-05,
      "loss": 0.8319,
      "step": 511810
    },
    {
      "epoch": 1.7938014979304446,
      "grad_norm": 3.078125,
      "learning_rate": 2.233710029569746e-05,
      "loss": 0.9016,
      "step": 511820
    },
    {
      "epoch": 1.7938365454373404,
      "grad_norm": 2.375,
      "learning_rate": 2.233645126703376e-05,
      "loss": 0.7057,
      "step": 511830
    },
    {
      "epoch": 1.793871592944236,
      "grad_norm": 2.640625,
      "learning_rate": 2.2335802238370056e-05,
      "loss": 0.8588,
      "step": 511840
    },
    {
      "epoch": 1.7939066404511315,
      "grad_norm": 2.84375,
      "learning_rate": 2.2335153209706354e-05,
      "loss": 0.7918,
      "step": 511850
    },
    {
      "epoch": 1.7939416879580272,
      "grad_norm": 3.765625,
      "learning_rate": 2.2334504181042652e-05,
      "loss": 0.8706,
      "step": 511860
    },
    {
      "epoch": 1.7939767354649228,
      "grad_norm": 2.703125,
      "learning_rate": 2.233385515237895e-05,
      "loss": 0.7989,
      "step": 511870
    },
    {
      "epoch": 1.7940117829718183,
      "grad_norm": 2.671875,
      "learning_rate": 2.2333206123715252e-05,
      "loss": 0.7995,
      "step": 511880
    },
    {
      "epoch": 1.794046830478714,
      "grad_norm": 3.046875,
      "learning_rate": 2.2332557095051546e-05,
      "loss": 0.8859,
      "step": 511890
    },
    {
      "epoch": 1.7940818779856094,
      "grad_norm": 2.828125,
      "learning_rate": 2.2331908066387844e-05,
      "loss": 0.8183,
      "step": 511900
    },
    {
      "epoch": 1.7941169254925051,
      "grad_norm": 2.78125,
      "learning_rate": 2.2331259037724142e-05,
      "loss": 0.7784,
      "step": 511910
    },
    {
      "epoch": 1.7941519729994007,
      "grad_norm": 2.6875,
      "learning_rate": 2.233061000906044e-05,
      "loss": 0.9023,
      "step": 511920
    },
    {
      "epoch": 1.7941870205062962,
      "grad_norm": 2.90625,
      "learning_rate": 2.232996098039674e-05,
      "loss": 0.8451,
      "step": 511930
    },
    {
      "epoch": 1.794222068013192,
      "grad_norm": 3.21875,
      "learning_rate": 2.2329311951733036e-05,
      "loss": 0.8347,
      "step": 511940
    },
    {
      "epoch": 1.7942571155200875,
      "grad_norm": 2.90625,
      "learning_rate": 2.2328662923069334e-05,
      "loss": 0.7608,
      "step": 511950
    },
    {
      "epoch": 1.794292163026983,
      "grad_norm": 3.046875,
      "learning_rate": 2.2328013894405632e-05,
      "loss": 0.8855,
      "step": 511960
    },
    {
      "epoch": 1.7943272105338788,
      "grad_norm": 2.78125,
      "learning_rate": 2.232736486574193e-05,
      "loss": 0.7629,
      "step": 511970
    },
    {
      "epoch": 1.7943622580407743,
      "grad_norm": 2.6875,
      "learning_rate": 2.232671583707823e-05,
      "loss": 0.7364,
      "step": 511980
    },
    {
      "epoch": 1.7943973055476699,
      "grad_norm": 2.796875,
      "learning_rate": 2.2326066808414526e-05,
      "loss": 0.7423,
      "step": 511990
    },
    {
      "epoch": 1.7944323530545656,
      "grad_norm": 2.5625,
      "learning_rate": 2.2325417779750828e-05,
      "loss": 0.8096,
      "step": 512000
    },
    {
      "epoch": 1.794467400561461,
      "grad_norm": 2.484375,
      "learning_rate": 2.2324768751087126e-05,
      "loss": 0.8316,
      "step": 512010
    },
    {
      "epoch": 1.7945024480683567,
      "grad_norm": 2.796875,
      "learning_rate": 2.2324119722423424e-05,
      "loss": 0.7916,
      "step": 512020
    },
    {
      "epoch": 1.7945374955752522,
      "grad_norm": 3.03125,
      "learning_rate": 2.2323470693759722e-05,
      "loss": 0.8853,
      "step": 512030
    },
    {
      "epoch": 1.7945725430821478,
      "grad_norm": 2.71875,
      "learning_rate": 2.232282166509602e-05,
      "loss": 0.8188,
      "step": 512040
    },
    {
      "epoch": 1.7946075905890435,
      "grad_norm": 3.203125,
      "learning_rate": 2.2322172636432318e-05,
      "loss": 0.795,
      "step": 512050
    },
    {
      "epoch": 1.794642638095939,
      "grad_norm": 3.109375,
      "learning_rate": 2.2321523607768616e-05,
      "loss": 0.8487,
      "step": 512060
    },
    {
      "epoch": 1.7946776856028346,
      "grad_norm": 2.71875,
      "learning_rate": 2.2320874579104914e-05,
      "loss": 0.8429,
      "step": 512070
    },
    {
      "epoch": 1.7947127331097303,
      "grad_norm": 3.078125,
      "learning_rate": 2.232022555044121e-05,
      "loss": 0.7726,
      "step": 512080
    },
    {
      "epoch": 1.7947477806166259,
      "grad_norm": 2.53125,
      "learning_rate": 2.2319576521777506e-05,
      "loss": 0.8189,
      "step": 512090
    },
    {
      "epoch": 1.7947828281235214,
      "grad_norm": 2.875,
      "learning_rate": 2.2318927493113804e-05,
      "loss": 0.8243,
      "step": 512100
    },
    {
      "epoch": 1.7948178756304172,
      "grad_norm": 2.78125,
      "learning_rate": 2.2318278464450106e-05,
      "loss": 0.8022,
      "step": 512110
    },
    {
      "epoch": 1.7948529231373125,
      "grad_norm": 2.546875,
      "learning_rate": 2.2317629435786404e-05,
      "loss": 0.7748,
      "step": 512120
    },
    {
      "epoch": 1.7948879706442082,
      "grad_norm": 3.0,
      "learning_rate": 2.2316980407122702e-05,
      "loss": 0.8341,
      "step": 512130
    },
    {
      "epoch": 1.7949230181511038,
      "grad_norm": 2.9375,
      "learning_rate": 2.2316331378459e-05,
      "loss": 0.8281,
      "step": 512140
    },
    {
      "epoch": 1.7949580656579993,
      "grad_norm": 3.328125,
      "learning_rate": 2.2315682349795298e-05,
      "loss": 0.9127,
      "step": 512150
    },
    {
      "epoch": 1.794993113164895,
      "grad_norm": 2.765625,
      "learning_rate": 2.2315033321131596e-05,
      "loss": 0.8508,
      "step": 512160
    },
    {
      "epoch": 1.7950281606717906,
      "grad_norm": 2.921875,
      "learning_rate": 2.2314384292467894e-05,
      "loss": 0.9157,
      "step": 512170
    },
    {
      "epoch": 1.7950632081786861,
      "grad_norm": 2.78125,
      "learning_rate": 2.2313735263804192e-05,
      "loss": 0.7986,
      "step": 512180
    },
    {
      "epoch": 1.795098255685582,
      "grad_norm": 3.09375,
      "learning_rate": 2.231308623514049e-05,
      "loss": 0.8845,
      "step": 512190
    },
    {
      "epoch": 1.7951333031924774,
      "grad_norm": 2.75,
      "learning_rate": 2.2312437206476788e-05,
      "loss": 0.7572,
      "step": 512200
    },
    {
      "epoch": 1.795168350699373,
      "grad_norm": 3.125,
      "learning_rate": 2.2311788177813086e-05,
      "loss": 0.8335,
      "step": 512210
    },
    {
      "epoch": 1.7952033982062687,
      "grad_norm": 2.640625,
      "learning_rate": 2.2311139149149384e-05,
      "loss": 0.8008,
      "step": 512220
    },
    {
      "epoch": 1.795238445713164,
      "grad_norm": 2.859375,
      "learning_rate": 2.2310490120485682e-05,
      "loss": 0.7926,
      "step": 512230
    },
    {
      "epoch": 1.7952734932200598,
      "grad_norm": 3.1875,
      "learning_rate": 2.230984109182198e-05,
      "loss": 0.7587,
      "step": 512240
    },
    {
      "epoch": 1.7953085407269553,
      "grad_norm": 3.34375,
      "learning_rate": 2.230919206315828e-05,
      "loss": 0.9212,
      "step": 512250
    },
    {
      "epoch": 1.7953435882338509,
      "grad_norm": 3.609375,
      "learning_rate": 2.230854303449458e-05,
      "loss": 0.9076,
      "step": 512260
    },
    {
      "epoch": 1.7953786357407466,
      "grad_norm": 2.703125,
      "learning_rate": 2.2307894005830874e-05,
      "loss": 0.8377,
      "step": 512270
    },
    {
      "epoch": 1.7954136832476422,
      "grad_norm": 2.71875,
      "learning_rate": 2.2307244977167172e-05,
      "loss": 0.8833,
      "step": 512280
    },
    {
      "epoch": 1.7954487307545377,
      "grad_norm": 3.125,
      "learning_rate": 2.230659594850347e-05,
      "loss": 0.864,
      "step": 512290
    },
    {
      "epoch": 1.7954837782614335,
      "grad_norm": 3.15625,
      "learning_rate": 2.2305946919839768e-05,
      "loss": 0.7927,
      "step": 512300
    },
    {
      "epoch": 1.795518825768329,
      "grad_norm": 2.640625,
      "learning_rate": 2.2305297891176066e-05,
      "loss": 0.8275,
      "step": 512310
    },
    {
      "epoch": 1.7955538732752245,
      "grad_norm": 2.703125,
      "learning_rate": 2.2304648862512364e-05,
      "loss": 0.8485,
      "step": 512320
    },
    {
      "epoch": 1.7955889207821203,
      "grad_norm": 2.796875,
      "learning_rate": 2.2303999833848662e-05,
      "loss": 0.7561,
      "step": 512330
    },
    {
      "epoch": 1.7956239682890156,
      "grad_norm": 3.0,
      "learning_rate": 2.230335080518496e-05,
      "loss": 0.8666,
      "step": 512340
    },
    {
      "epoch": 1.7956590157959114,
      "grad_norm": 2.375,
      "learning_rate": 2.2302701776521258e-05,
      "loss": 0.8929,
      "step": 512350
    },
    {
      "epoch": 1.7956940633028071,
      "grad_norm": 2.84375,
      "learning_rate": 2.230205274785756e-05,
      "loss": 0.7811,
      "step": 512360
    },
    {
      "epoch": 1.7957291108097024,
      "grad_norm": 2.59375,
      "learning_rate": 2.2301403719193857e-05,
      "loss": 0.8086,
      "step": 512370
    },
    {
      "epoch": 1.7957641583165982,
      "grad_norm": 2.71875,
      "learning_rate": 2.2300754690530155e-05,
      "loss": 0.7725,
      "step": 512380
    },
    {
      "epoch": 1.7957992058234937,
      "grad_norm": 3.03125,
      "learning_rate": 2.2300105661866453e-05,
      "loss": 0.8201,
      "step": 512390
    },
    {
      "epoch": 1.7958342533303893,
      "grad_norm": 3.109375,
      "learning_rate": 2.229945663320275e-05,
      "loss": 0.8312,
      "step": 512400
    },
    {
      "epoch": 1.795869300837285,
      "grad_norm": 2.546875,
      "learning_rate": 2.229880760453905e-05,
      "loss": 0.7293,
      "step": 512410
    },
    {
      "epoch": 1.7959043483441806,
      "grad_norm": 2.953125,
      "learning_rate": 2.2298158575875347e-05,
      "loss": 0.8914,
      "step": 512420
    },
    {
      "epoch": 1.795939395851076,
      "grad_norm": 2.734375,
      "learning_rate": 2.2297509547211645e-05,
      "loss": 0.8043,
      "step": 512430
    },
    {
      "epoch": 1.7959744433579719,
      "grad_norm": 2.8125,
      "learning_rate": 2.2296860518547943e-05,
      "loss": 0.8191,
      "step": 512440
    },
    {
      "epoch": 1.7960094908648672,
      "grad_norm": 2.765625,
      "learning_rate": 2.229621148988424e-05,
      "loss": 0.8533,
      "step": 512450
    },
    {
      "epoch": 1.796044538371763,
      "grad_norm": 2.5,
      "learning_rate": 2.2295562461220536e-05,
      "loss": 0.7987,
      "step": 512460
    },
    {
      "epoch": 1.7960795858786587,
      "grad_norm": 2.640625,
      "learning_rate": 2.2294913432556834e-05,
      "loss": 0.7816,
      "step": 512470
    },
    {
      "epoch": 1.796114633385554,
      "grad_norm": 2.96875,
      "learning_rate": 2.2294264403893135e-05,
      "loss": 0.815,
      "step": 512480
    },
    {
      "epoch": 1.7961496808924498,
      "grad_norm": 2.734375,
      "learning_rate": 2.2293615375229433e-05,
      "loss": 0.8201,
      "step": 512490
    },
    {
      "epoch": 1.7961847283993453,
      "grad_norm": 2.671875,
      "learning_rate": 2.229296634656573e-05,
      "loss": 0.8133,
      "step": 512500
    },
    {
      "epoch": 1.7962197759062408,
      "grad_norm": 2.8125,
      "learning_rate": 2.229231731790203e-05,
      "loss": 0.8422,
      "step": 512510
    },
    {
      "epoch": 1.7962548234131366,
      "grad_norm": 2.65625,
      "learning_rate": 2.2291668289238327e-05,
      "loss": 0.8788,
      "step": 512520
    },
    {
      "epoch": 1.7962898709200321,
      "grad_norm": 2.859375,
      "learning_rate": 2.2291019260574625e-05,
      "loss": 0.8137,
      "step": 512530
    },
    {
      "epoch": 1.7963249184269277,
      "grad_norm": 2.609375,
      "learning_rate": 2.2290370231910923e-05,
      "loss": 0.7912,
      "step": 512540
    },
    {
      "epoch": 1.7963599659338234,
      "grad_norm": 3.078125,
      "learning_rate": 2.228972120324722e-05,
      "loss": 0.8844,
      "step": 512550
    },
    {
      "epoch": 1.796395013440719,
      "grad_norm": 3.0625,
      "learning_rate": 2.228907217458352e-05,
      "loss": 0.7813,
      "step": 512560
    },
    {
      "epoch": 1.7964300609476145,
      "grad_norm": 2.5,
      "learning_rate": 2.2288423145919817e-05,
      "loss": 0.8585,
      "step": 512570
    },
    {
      "epoch": 1.7964651084545102,
      "grad_norm": 2.453125,
      "learning_rate": 2.2287774117256115e-05,
      "loss": 0.827,
      "step": 512580
    },
    {
      "epoch": 1.7965001559614056,
      "grad_norm": 3.203125,
      "learning_rate": 2.2287125088592413e-05,
      "loss": 0.8296,
      "step": 512590
    },
    {
      "epoch": 1.7965352034683013,
      "grad_norm": 2.375,
      "learning_rate": 2.228647605992871e-05,
      "loss": 0.834,
      "step": 512600
    },
    {
      "epoch": 1.7965702509751968,
      "grad_norm": 3.4375,
      "learning_rate": 2.228582703126501e-05,
      "loss": 0.8889,
      "step": 512610
    },
    {
      "epoch": 1.7966052984820924,
      "grad_norm": 2.90625,
      "learning_rate": 2.228517800260131e-05,
      "loss": 0.8852,
      "step": 512620
    },
    {
      "epoch": 1.7966403459889881,
      "grad_norm": 2.96875,
      "learning_rate": 2.228452897393761e-05,
      "loss": 0.7954,
      "step": 512630
    },
    {
      "epoch": 1.7966753934958837,
      "grad_norm": 2.890625,
      "learning_rate": 2.2283879945273903e-05,
      "loss": 0.8317,
      "step": 512640
    },
    {
      "epoch": 1.7967104410027792,
      "grad_norm": 2.765625,
      "learning_rate": 2.22832309166102e-05,
      "loss": 0.8655,
      "step": 512650
    },
    {
      "epoch": 1.796745488509675,
      "grad_norm": 2.765625,
      "learning_rate": 2.22825818879465e-05,
      "loss": 0.8783,
      "step": 512660
    },
    {
      "epoch": 1.7967805360165705,
      "grad_norm": 2.671875,
      "learning_rate": 2.2281932859282797e-05,
      "loss": 0.7323,
      "step": 512670
    },
    {
      "epoch": 1.796815583523466,
      "grad_norm": 2.71875,
      "learning_rate": 2.2281283830619095e-05,
      "loss": 0.7893,
      "step": 512680
    },
    {
      "epoch": 1.7968506310303618,
      "grad_norm": 3.078125,
      "learning_rate": 2.2280634801955393e-05,
      "loss": 0.8315,
      "step": 512690
    },
    {
      "epoch": 1.7968856785372571,
      "grad_norm": 2.96875,
      "learning_rate": 2.227998577329169e-05,
      "loss": 0.7814,
      "step": 512700
    },
    {
      "epoch": 1.7969207260441529,
      "grad_norm": 2.78125,
      "learning_rate": 2.227933674462799e-05,
      "loss": 0.8544,
      "step": 512710
    },
    {
      "epoch": 1.7969557735510484,
      "grad_norm": 2.4375,
      "learning_rate": 2.2278687715964287e-05,
      "loss": 0.8182,
      "step": 512720
    },
    {
      "epoch": 1.796990821057944,
      "grad_norm": 3.109375,
      "learning_rate": 2.227803868730059e-05,
      "loss": 0.8268,
      "step": 512730
    },
    {
      "epoch": 1.7970258685648397,
      "grad_norm": 2.890625,
      "learning_rate": 2.2277389658636887e-05,
      "loss": 0.9149,
      "step": 512740
    },
    {
      "epoch": 1.7970609160717352,
      "grad_norm": 2.546875,
      "learning_rate": 2.2276740629973185e-05,
      "loss": 0.8038,
      "step": 512750
    },
    {
      "epoch": 1.7970959635786308,
      "grad_norm": 2.875,
      "learning_rate": 2.2276091601309483e-05,
      "loss": 0.8755,
      "step": 512760
    },
    {
      "epoch": 1.7971310110855265,
      "grad_norm": 3.109375,
      "learning_rate": 2.227544257264578e-05,
      "loss": 0.8461,
      "step": 512770
    },
    {
      "epoch": 1.797166058592422,
      "grad_norm": 3.359375,
      "learning_rate": 2.227479354398208e-05,
      "loss": 0.8283,
      "step": 512780
    },
    {
      "epoch": 1.7972011060993176,
      "grad_norm": 2.5625,
      "learning_rate": 2.2274144515318377e-05,
      "loss": 0.7963,
      "step": 512790
    },
    {
      "epoch": 1.7972361536062134,
      "grad_norm": 3.03125,
      "learning_rate": 2.2273495486654675e-05,
      "loss": 0.8251,
      "step": 512800
    },
    {
      "epoch": 1.7972712011131087,
      "grad_norm": 2.59375,
      "learning_rate": 2.2272846457990973e-05,
      "loss": 0.7685,
      "step": 512810
    },
    {
      "epoch": 1.7973062486200044,
      "grad_norm": 2.75,
      "learning_rate": 2.227219742932727e-05,
      "loss": 0.812,
      "step": 512820
    },
    {
      "epoch": 1.7973412961269,
      "grad_norm": 2.765625,
      "learning_rate": 2.2271548400663565e-05,
      "loss": 0.8618,
      "step": 512830
    },
    {
      "epoch": 1.7973763436337955,
      "grad_norm": 2.984375,
      "learning_rate": 2.2270899371999867e-05,
      "loss": 0.7752,
      "step": 512840
    },
    {
      "epoch": 1.7974113911406913,
      "grad_norm": 3.125,
      "learning_rate": 2.2270250343336165e-05,
      "loss": 0.8559,
      "step": 512850
    },
    {
      "epoch": 1.7974464386475868,
      "grad_norm": 3.015625,
      "learning_rate": 2.2269601314672463e-05,
      "loss": 0.8046,
      "step": 512860
    },
    {
      "epoch": 1.7974814861544823,
      "grad_norm": 2.65625,
      "learning_rate": 2.226895228600876e-05,
      "loss": 0.8167,
      "step": 512870
    },
    {
      "epoch": 1.797516533661378,
      "grad_norm": 3.046875,
      "learning_rate": 2.226830325734506e-05,
      "loss": 0.9054,
      "step": 512880
    },
    {
      "epoch": 1.7975515811682736,
      "grad_norm": 3.125,
      "learning_rate": 2.2267654228681357e-05,
      "loss": 0.8115,
      "step": 512890
    },
    {
      "epoch": 1.7975866286751692,
      "grad_norm": 3.734375,
      "learning_rate": 2.2267005200017655e-05,
      "loss": 0.8393,
      "step": 512900
    },
    {
      "epoch": 1.797621676182065,
      "grad_norm": 2.84375,
      "learning_rate": 2.2266356171353953e-05,
      "loss": 0.8411,
      "step": 512910
    },
    {
      "epoch": 1.7976567236889602,
      "grad_norm": 2.890625,
      "learning_rate": 2.226570714269025e-05,
      "loss": 0.9269,
      "step": 512920
    },
    {
      "epoch": 1.797691771195856,
      "grad_norm": 2.8125,
      "learning_rate": 2.226505811402655e-05,
      "loss": 0.7461,
      "step": 512930
    },
    {
      "epoch": 1.7977268187027515,
      "grad_norm": 2.9375,
      "learning_rate": 2.2264409085362847e-05,
      "loss": 0.8266,
      "step": 512940
    },
    {
      "epoch": 1.797761866209647,
      "grad_norm": 3.015625,
      "learning_rate": 2.2263760056699145e-05,
      "loss": 0.8696,
      "step": 512950
    },
    {
      "epoch": 1.7977969137165428,
      "grad_norm": 2.6875,
      "learning_rate": 2.2263111028035443e-05,
      "loss": 0.8544,
      "step": 512960
    },
    {
      "epoch": 1.7978319612234384,
      "grad_norm": 2.65625,
      "learning_rate": 2.226246199937174e-05,
      "loss": 0.798,
      "step": 512970
    },
    {
      "epoch": 1.797867008730334,
      "grad_norm": 2.765625,
      "learning_rate": 2.2261812970708042e-05,
      "loss": 0.7621,
      "step": 512980
    },
    {
      "epoch": 1.7979020562372297,
      "grad_norm": 2.65625,
      "learning_rate": 2.226116394204434e-05,
      "loss": 0.7554,
      "step": 512990
    },
    {
      "epoch": 1.7979371037441252,
      "grad_norm": 2.8125,
      "learning_rate": 2.2260514913380638e-05,
      "loss": 0.8641,
      "step": 513000
    },
    {
      "epoch": 1.7979721512510207,
      "grad_norm": 3.171875,
      "learning_rate": 2.2259865884716936e-05,
      "loss": 0.8587,
      "step": 513010
    },
    {
      "epoch": 1.7980071987579165,
      "grad_norm": 2.921875,
      "learning_rate": 2.225921685605323e-05,
      "loss": 0.823,
      "step": 513020
    },
    {
      "epoch": 1.7980422462648118,
      "grad_norm": 3.09375,
      "learning_rate": 2.225856782738953e-05,
      "loss": 0.7956,
      "step": 513030
    },
    {
      "epoch": 1.7980772937717076,
      "grad_norm": 2.46875,
      "learning_rate": 2.2257918798725827e-05,
      "loss": 0.8561,
      "step": 513040
    },
    {
      "epoch": 1.7981123412786033,
      "grad_norm": 2.4375,
      "learning_rate": 2.2257269770062125e-05,
      "loss": 0.7914,
      "step": 513050
    },
    {
      "epoch": 1.7981473887854986,
      "grad_norm": 2.921875,
      "learning_rate": 2.2256620741398423e-05,
      "loss": 0.8634,
      "step": 513060
    },
    {
      "epoch": 1.7981824362923944,
      "grad_norm": 2.859375,
      "learning_rate": 2.225597171273472e-05,
      "loss": 0.8123,
      "step": 513070
    },
    {
      "epoch": 1.79821748379929,
      "grad_norm": 3.0,
      "learning_rate": 2.225532268407102e-05,
      "loss": 0.8282,
      "step": 513080
    },
    {
      "epoch": 1.7982525313061855,
      "grad_norm": 2.40625,
      "learning_rate": 2.2254673655407317e-05,
      "loss": 0.8142,
      "step": 513090
    },
    {
      "epoch": 1.7982875788130812,
      "grad_norm": 2.65625,
      "learning_rate": 2.2254024626743618e-05,
      "loss": 0.8047,
      "step": 513100
    },
    {
      "epoch": 1.7983226263199767,
      "grad_norm": 2.875,
      "learning_rate": 2.2253375598079916e-05,
      "loss": 0.8259,
      "step": 513110
    },
    {
      "epoch": 1.7983576738268723,
      "grad_norm": 2.75,
      "learning_rate": 2.2252726569416214e-05,
      "loss": 0.8301,
      "step": 513120
    },
    {
      "epoch": 1.798392721333768,
      "grad_norm": 2.390625,
      "learning_rate": 2.2252077540752512e-05,
      "loss": 0.7556,
      "step": 513130
    },
    {
      "epoch": 1.7984277688406634,
      "grad_norm": 3.265625,
      "learning_rate": 2.225142851208881e-05,
      "loss": 0.9187,
      "step": 513140
    },
    {
      "epoch": 1.7984628163475591,
      "grad_norm": 3.09375,
      "learning_rate": 2.2250779483425108e-05,
      "loss": 0.8441,
      "step": 513150
    },
    {
      "epoch": 1.7984978638544549,
      "grad_norm": 2.8125,
      "learning_rate": 2.2250130454761406e-05,
      "loss": 0.7453,
      "step": 513160
    },
    {
      "epoch": 1.7985329113613502,
      "grad_norm": 2.75,
      "learning_rate": 2.2249481426097704e-05,
      "loss": 0.8273,
      "step": 513170
    },
    {
      "epoch": 1.798567958868246,
      "grad_norm": 3.265625,
      "learning_rate": 2.2248832397434002e-05,
      "loss": 0.8177,
      "step": 513180
    },
    {
      "epoch": 1.7986030063751415,
      "grad_norm": 3.1875,
      "learning_rate": 2.22481833687703e-05,
      "loss": 0.7861,
      "step": 513190
    },
    {
      "epoch": 1.798638053882037,
      "grad_norm": 2.5625,
      "learning_rate": 2.2247534340106598e-05,
      "loss": 0.825,
      "step": 513200
    },
    {
      "epoch": 1.7986731013889328,
      "grad_norm": 2.859375,
      "learning_rate": 2.2246885311442896e-05,
      "loss": 0.7846,
      "step": 513210
    },
    {
      "epoch": 1.7987081488958283,
      "grad_norm": 2.765625,
      "learning_rate": 2.2246236282779194e-05,
      "loss": 0.8607,
      "step": 513220
    },
    {
      "epoch": 1.7987431964027238,
      "grad_norm": 2.59375,
      "learning_rate": 2.2245587254115492e-05,
      "loss": 0.8317,
      "step": 513230
    },
    {
      "epoch": 1.7987782439096196,
      "grad_norm": 3.015625,
      "learning_rate": 2.224493822545179e-05,
      "loss": 0.7587,
      "step": 513240
    },
    {
      "epoch": 1.7988132914165151,
      "grad_norm": 2.984375,
      "learning_rate": 2.2244289196788088e-05,
      "loss": 0.8317,
      "step": 513250
    },
    {
      "epoch": 1.7988483389234107,
      "grad_norm": 3.140625,
      "learning_rate": 2.2243640168124386e-05,
      "loss": 0.8259,
      "step": 513260
    },
    {
      "epoch": 1.7988833864303064,
      "grad_norm": 2.96875,
      "learning_rate": 2.2242991139460684e-05,
      "loss": 0.8527,
      "step": 513270
    },
    {
      "epoch": 1.7989184339372017,
      "grad_norm": 3.3125,
      "learning_rate": 2.2242342110796982e-05,
      "loss": 0.8906,
      "step": 513280
    },
    {
      "epoch": 1.7989534814440975,
      "grad_norm": 2.671875,
      "learning_rate": 2.224169308213328e-05,
      "loss": 0.8321,
      "step": 513290
    },
    {
      "epoch": 1.798988528950993,
      "grad_norm": 3.125,
      "learning_rate": 2.2241044053469578e-05,
      "loss": 0.8083,
      "step": 513300
    },
    {
      "epoch": 1.7990235764578886,
      "grad_norm": 2.6875,
      "learning_rate": 2.2240395024805876e-05,
      "loss": 0.8361,
      "step": 513310
    },
    {
      "epoch": 1.7990586239647843,
      "grad_norm": 2.6875,
      "learning_rate": 2.2239745996142174e-05,
      "loss": 0.7855,
      "step": 513320
    },
    {
      "epoch": 1.7990936714716799,
      "grad_norm": 3.40625,
      "learning_rate": 2.2239096967478472e-05,
      "loss": 0.821,
      "step": 513330
    },
    {
      "epoch": 1.7991287189785754,
      "grad_norm": 2.9375,
      "learning_rate": 2.223844793881477e-05,
      "loss": 0.8241,
      "step": 513340
    },
    {
      "epoch": 1.7991637664854712,
      "grad_norm": 3.25,
      "learning_rate": 2.223779891015107e-05,
      "loss": 0.7202,
      "step": 513350
    },
    {
      "epoch": 1.7991988139923667,
      "grad_norm": 2.96875,
      "learning_rate": 2.223714988148737e-05,
      "loss": 0.8133,
      "step": 513360
    },
    {
      "epoch": 1.7992338614992622,
      "grad_norm": 2.71875,
      "learning_rate": 2.2236500852823667e-05,
      "loss": 0.8246,
      "step": 513370
    },
    {
      "epoch": 1.799268909006158,
      "grad_norm": 2.828125,
      "learning_rate": 2.2235851824159965e-05,
      "loss": 0.8056,
      "step": 513380
    },
    {
      "epoch": 1.7993039565130533,
      "grad_norm": 2.96875,
      "learning_rate": 2.2235202795496263e-05,
      "loss": 0.8552,
      "step": 513390
    },
    {
      "epoch": 1.799339004019949,
      "grad_norm": 2.8125,
      "learning_rate": 2.2234553766832558e-05,
      "loss": 0.8416,
      "step": 513400
    },
    {
      "epoch": 1.7993740515268446,
      "grad_norm": 2.75,
      "learning_rate": 2.2233904738168856e-05,
      "loss": 0.7746,
      "step": 513410
    },
    {
      "epoch": 1.7994090990337401,
      "grad_norm": 2.53125,
      "learning_rate": 2.2233255709505154e-05,
      "loss": 0.8631,
      "step": 513420
    },
    {
      "epoch": 1.799444146540636,
      "grad_norm": 2.5,
      "learning_rate": 2.2232606680841452e-05,
      "loss": 0.8003,
      "step": 513430
    },
    {
      "epoch": 1.7994791940475314,
      "grad_norm": 2.421875,
      "learning_rate": 2.223195765217775e-05,
      "loss": 0.7966,
      "step": 513440
    },
    {
      "epoch": 1.799514241554427,
      "grad_norm": 2.703125,
      "learning_rate": 2.2231308623514048e-05,
      "loss": 0.7427,
      "step": 513450
    },
    {
      "epoch": 1.7995492890613227,
      "grad_norm": 2.921875,
      "learning_rate": 2.223065959485035e-05,
      "loss": 0.7664,
      "step": 513460
    },
    {
      "epoch": 1.7995843365682183,
      "grad_norm": 3.203125,
      "learning_rate": 2.2230010566186647e-05,
      "loss": 0.8819,
      "step": 513470
    },
    {
      "epoch": 1.7996193840751138,
      "grad_norm": 2.84375,
      "learning_rate": 2.2229361537522945e-05,
      "loss": 0.8501,
      "step": 513480
    },
    {
      "epoch": 1.7996544315820096,
      "grad_norm": 2.90625,
      "learning_rate": 2.2228712508859243e-05,
      "loss": 0.8198,
      "step": 513490
    },
    {
      "epoch": 1.7996894790889049,
      "grad_norm": 3.265625,
      "learning_rate": 2.222806348019554e-05,
      "loss": 0.8125,
      "step": 513500
    },
    {
      "epoch": 1.7997245265958006,
      "grad_norm": 2.4375,
      "learning_rate": 2.222741445153184e-05,
      "loss": 0.826,
      "step": 513510
    },
    {
      "epoch": 1.7997595741026962,
      "grad_norm": 2.5625,
      "learning_rate": 2.2226765422868137e-05,
      "loss": 0.8258,
      "step": 513520
    },
    {
      "epoch": 1.7997946216095917,
      "grad_norm": 2.703125,
      "learning_rate": 2.2226116394204435e-05,
      "loss": 0.8852,
      "step": 513530
    },
    {
      "epoch": 1.7998296691164875,
      "grad_norm": 2.875,
      "learning_rate": 2.2225467365540733e-05,
      "loss": 0.7979,
      "step": 513540
    },
    {
      "epoch": 1.799864716623383,
      "grad_norm": 2.671875,
      "learning_rate": 2.222481833687703e-05,
      "loss": 0.8159,
      "step": 513550
    },
    {
      "epoch": 1.7998997641302785,
      "grad_norm": 2.953125,
      "learning_rate": 2.222416930821333e-05,
      "loss": 0.8917,
      "step": 513560
    },
    {
      "epoch": 1.7999348116371743,
      "grad_norm": 2.765625,
      "learning_rate": 2.2223520279549627e-05,
      "loss": 0.7897,
      "step": 513570
    },
    {
      "epoch": 1.7999698591440698,
      "grad_norm": 3.046875,
      "learning_rate": 2.2222871250885925e-05,
      "loss": 0.8256,
      "step": 513580
    },
    {
      "epoch": 1.8000049066509654,
      "grad_norm": 2.953125,
      "learning_rate": 2.2222222222222223e-05,
      "loss": 0.89,
      "step": 513590
    },
    {
      "epoch": 1.800039954157861,
      "grad_norm": 3.1875,
      "learning_rate": 2.222157319355852e-05,
      "loss": 0.8438,
      "step": 513600
    },
    {
      "epoch": 1.8000750016647564,
      "grad_norm": 2.796875,
      "learning_rate": 2.222092416489482e-05,
      "loss": 0.775,
      "step": 513610
    },
    {
      "epoch": 1.8001100491716522,
      "grad_norm": 2.59375,
      "learning_rate": 2.2220275136231117e-05,
      "loss": 0.7857,
      "step": 513620
    },
    {
      "epoch": 1.8001450966785477,
      "grad_norm": 3.125,
      "learning_rate": 2.2219626107567415e-05,
      "loss": 0.8755,
      "step": 513630
    },
    {
      "epoch": 1.8001801441854433,
      "grad_norm": 3.0625,
      "learning_rate": 2.2218977078903713e-05,
      "loss": 0.746,
      "step": 513640
    },
    {
      "epoch": 1.800215191692339,
      "grad_norm": 2.703125,
      "learning_rate": 2.221832805024001e-05,
      "loss": 0.8613,
      "step": 513650
    },
    {
      "epoch": 1.8002502391992345,
      "grad_norm": 2.8125,
      "learning_rate": 2.221767902157631e-05,
      "loss": 0.8004,
      "step": 513660
    },
    {
      "epoch": 1.80028528670613,
      "grad_norm": 2.765625,
      "learning_rate": 2.2217029992912607e-05,
      "loss": 0.8726,
      "step": 513670
    },
    {
      "epoch": 1.8003203342130258,
      "grad_norm": 2.6875,
      "learning_rate": 2.2216380964248905e-05,
      "loss": 0.7388,
      "step": 513680
    },
    {
      "epoch": 1.8003553817199214,
      "grad_norm": 3.09375,
      "learning_rate": 2.2215731935585203e-05,
      "loss": 0.7738,
      "step": 513690
    },
    {
      "epoch": 1.800390429226817,
      "grad_norm": 3.0625,
      "learning_rate": 2.22150829069215e-05,
      "loss": 0.8833,
      "step": 513700
    },
    {
      "epoch": 1.8004254767337127,
      "grad_norm": 2.9375,
      "learning_rate": 2.22144338782578e-05,
      "loss": 0.7888,
      "step": 513710
    },
    {
      "epoch": 1.800460524240608,
      "grad_norm": 2.796875,
      "learning_rate": 2.22137848495941e-05,
      "loss": 0.7943,
      "step": 513720
    },
    {
      "epoch": 1.8004955717475037,
      "grad_norm": 2.75,
      "learning_rate": 2.22131358209304e-05,
      "loss": 0.7994,
      "step": 513730
    },
    {
      "epoch": 1.8005306192543995,
      "grad_norm": 3.046875,
      "learning_rate": 2.2212486792266697e-05,
      "loss": 0.8159,
      "step": 513740
    },
    {
      "epoch": 1.8005656667612948,
      "grad_norm": 2.703125,
      "learning_rate": 2.2211837763602995e-05,
      "loss": 0.7942,
      "step": 513750
    },
    {
      "epoch": 1.8006007142681906,
      "grad_norm": 2.9375,
      "learning_rate": 2.2211188734939293e-05,
      "loss": 0.8778,
      "step": 513760
    },
    {
      "epoch": 1.800635761775086,
      "grad_norm": 3.1875,
      "learning_rate": 2.2210539706275587e-05,
      "loss": 0.8703,
      "step": 513770
    },
    {
      "epoch": 1.8006708092819816,
      "grad_norm": 2.921875,
      "learning_rate": 2.2209890677611885e-05,
      "loss": 0.8855,
      "step": 513780
    },
    {
      "epoch": 1.8007058567888774,
      "grad_norm": 2.578125,
      "learning_rate": 2.2209241648948183e-05,
      "loss": 0.8448,
      "step": 513790
    },
    {
      "epoch": 1.800740904295773,
      "grad_norm": 2.984375,
      "learning_rate": 2.220859262028448e-05,
      "loss": 0.863,
      "step": 513800
    },
    {
      "epoch": 1.8007759518026685,
      "grad_norm": 2.875,
      "learning_rate": 2.220794359162078e-05,
      "loss": 0.8594,
      "step": 513810
    },
    {
      "epoch": 1.8008109993095642,
      "grad_norm": 2.671875,
      "learning_rate": 2.2207294562957077e-05,
      "loss": 0.7747,
      "step": 513820
    },
    {
      "epoch": 1.8008460468164595,
      "grad_norm": 3.203125,
      "learning_rate": 2.220664553429338e-05,
      "loss": 0.8755,
      "step": 513830
    },
    {
      "epoch": 1.8008810943233553,
      "grad_norm": 2.671875,
      "learning_rate": 2.2205996505629677e-05,
      "loss": 0.7805,
      "step": 513840
    },
    {
      "epoch": 1.800916141830251,
      "grad_norm": 3.328125,
      "learning_rate": 2.2205347476965975e-05,
      "loss": 0.7965,
      "step": 513850
    },
    {
      "epoch": 1.8009511893371464,
      "grad_norm": 3.453125,
      "learning_rate": 2.2204698448302273e-05,
      "loss": 0.8903,
      "step": 513860
    },
    {
      "epoch": 1.8009862368440421,
      "grad_norm": 2.984375,
      "learning_rate": 2.220404941963857e-05,
      "loss": 0.8513,
      "step": 513870
    },
    {
      "epoch": 1.8010212843509377,
      "grad_norm": 3.234375,
      "learning_rate": 2.220340039097487e-05,
      "loss": 0.8127,
      "step": 513880
    },
    {
      "epoch": 1.8010563318578332,
      "grad_norm": 2.984375,
      "learning_rate": 2.2202751362311167e-05,
      "loss": 0.8914,
      "step": 513890
    },
    {
      "epoch": 1.801091379364729,
      "grad_norm": 2.59375,
      "learning_rate": 2.2202102333647465e-05,
      "loss": 0.8118,
      "step": 513900
    },
    {
      "epoch": 1.8011264268716245,
      "grad_norm": 3.03125,
      "learning_rate": 2.2201453304983763e-05,
      "loss": 0.8166,
      "step": 513910
    },
    {
      "epoch": 1.80116147437852,
      "grad_norm": 2.6875,
      "learning_rate": 2.220080427632006e-05,
      "loss": 0.8682,
      "step": 513920
    },
    {
      "epoch": 1.8011965218854158,
      "grad_norm": 2.765625,
      "learning_rate": 2.220015524765636e-05,
      "loss": 0.7619,
      "step": 513930
    },
    {
      "epoch": 1.8012315693923113,
      "grad_norm": 2.671875,
      "learning_rate": 2.2199506218992657e-05,
      "loss": 0.8211,
      "step": 513940
    },
    {
      "epoch": 1.8012666168992069,
      "grad_norm": 2.625,
      "learning_rate": 2.2198857190328955e-05,
      "loss": 0.8679,
      "step": 513950
    },
    {
      "epoch": 1.8013016644061026,
      "grad_norm": 2.65625,
      "learning_rate": 2.2198208161665253e-05,
      "loss": 0.8578,
      "step": 513960
    },
    {
      "epoch": 1.801336711912998,
      "grad_norm": 3.046875,
      "learning_rate": 2.219755913300155e-05,
      "loss": 0.7861,
      "step": 513970
    },
    {
      "epoch": 1.8013717594198937,
      "grad_norm": 2.890625,
      "learning_rate": 2.219691010433785e-05,
      "loss": 0.8287,
      "step": 513980
    },
    {
      "epoch": 1.8014068069267892,
      "grad_norm": 3.15625,
      "learning_rate": 2.2196261075674147e-05,
      "loss": 0.8538,
      "step": 513990
    },
    {
      "epoch": 1.8014418544336848,
      "grad_norm": 2.9375,
      "learning_rate": 2.2195612047010445e-05,
      "loss": 0.8341,
      "step": 514000
    },
    {
      "epoch": 1.8014769019405805,
      "grad_norm": 3.0,
      "learning_rate": 2.2194963018346743e-05,
      "loss": 0.8731,
      "step": 514010
    },
    {
      "epoch": 1.801511949447476,
      "grad_norm": 2.921875,
      "learning_rate": 2.219431398968304e-05,
      "loss": 0.8726,
      "step": 514020
    },
    {
      "epoch": 1.8015469969543716,
      "grad_norm": 2.734375,
      "learning_rate": 2.219366496101934e-05,
      "loss": 0.8072,
      "step": 514030
    },
    {
      "epoch": 1.8015820444612674,
      "grad_norm": 2.9375,
      "learning_rate": 2.2193015932355637e-05,
      "loss": 0.7622,
      "step": 514040
    },
    {
      "epoch": 1.8016170919681629,
      "grad_norm": 2.984375,
      "learning_rate": 2.2192366903691935e-05,
      "loss": 0.7583,
      "step": 514050
    },
    {
      "epoch": 1.8016521394750584,
      "grad_norm": 3.3125,
      "learning_rate": 2.2191717875028233e-05,
      "loss": 0.7885,
      "step": 514060
    },
    {
      "epoch": 1.8016871869819542,
      "grad_norm": 3.203125,
      "learning_rate": 2.219106884636453e-05,
      "loss": 0.9209,
      "step": 514070
    },
    {
      "epoch": 1.8017222344888495,
      "grad_norm": 2.515625,
      "learning_rate": 2.2190419817700832e-05,
      "loss": 0.8423,
      "step": 514080
    },
    {
      "epoch": 1.8017572819957453,
      "grad_norm": 2.84375,
      "learning_rate": 2.218977078903713e-05,
      "loss": 0.8471,
      "step": 514090
    },
    {
      "epoch": 1.8017923295026408,
      "grad_norm": 3.0625,
      "learning_rate": 2.2189121760373428e-05,
      "loss": 0.8279,
      "step": 514100
    },
    {
      "epoch": 1.8018273770095363,
      "grad_norm": 3.078125,
      "learning_rate": 2.2188472731709726e-05,
      "loss": 0.7821,
      "step": 514110
    },
    {
      "epoch": 1.801862424516432,
      "grad_norm": 2.90625,
      "learning_rate": 2.2187823703046024e-05,
      "loss": 0.8557,
      "step": 514120
    },
    {
      "epoch": 1.8018974720233276,
      "grad_norm": 2.703125,
      "learning_rate": 2.2187174674382322e-05,
      "loss": 0.7708,
      "step": 514130
    },
    {
      "epoch": 1.8019325195302232,
      "grad_norm": 3.140625,
      "learning_rate": 2.218652564571862e-05,
      "loss": 0.8396,
      "step": 514140
    },
    {
      "epoch": 1.801967567037119,
      "grad_norm": 3.109375,
      "learning_rate": 2.2185876617054915e-05,
      "loss": 0.9305,
      "step": 514150
    },
    {
      "epoch": 1.8020026145440144,
      "grad_norm": 3.0,
      "learning_rate": 2.2185227588391213e-05,
      "loss": 0.7858,
      "step": 514160
    },
    {
      "epoch": 1.80203766205091,
      "grad_norm": 2.953125,
      "learning_rate": 2.218457855972751e-05,
      "loss": 0.8672,
      "step": 514170
    },
    {
      "epoch": 1.8020727095578057,
      "grad_norm": 2.859375,
      "learning_rate": 2.218392953106381e-05,
      "loss": 0.772,
      "step": 514180
    },
    {
      "epoch": 1.802107757064701,
      "grad_norm": 3.5625,
      "learning_rate": 2.2183280502400107e-05,
      "loss": 0.7631,
      "step": 514190
    },
    {
      "epoch": 1.8021428045715968,
      "grad_norm": 3.125,
      "learning_rate": 2.2182631473736408e-05,
      "loss": 0.8063,
      "step": 514200
    },
    {
      "epoch": 1.8021778520784923,
      "grad_norm": 3.125,
      "learning_rate": 2.2181982445072706e-05,
      "loss": 0.9344,
      "step": 514210
    },
    {
      "epoch": 1.8022128995853879,
      "grad_norm": 2.71875,
      "learning_rate": 2.2181333416409004e-05,
      "loss": 0.8471,
      "step": 514220
    },
    {
      "epoch": 1.8022479470922836,
      "grad_norm": 2.6875,
      "learning_rate": 2.2180684387745302e-05,
      "loss": 0.843,
      "step": 514230
    },
    {
      "epoch": 1.8022829945991792,
      "grad_norm": 2.75,
      "learning_rate": 2.21800353590816e-05,
      "loss": 0.8874,
      "step": 514240
    },
    {
      "epoch": 1.8023180421060747,
      "grad_norm": 2.859375,
      "learning_rate": 2.2179386330417898e-05,
      "loss": 0.8072,
      "step": 514250
    },
    {
      "epoch": 1.8023530896129705,
      "grad_norm": 3.15625,
      "learning_rate": 2.2178737301754196e-05,
      "loss": 0.7432,
      "step": 514260
    },
    {
      "epoch": 1.802388137119866,
      "grad_norm": 2.765625,
      "learning_rate": 2.2178088273090494e-05,
      "loss": 0.7838,
      "step": 514270
    },
    {
      "epoch": 1.8024231846267615,
      "grad_norm": 3.34375,
      "learning_rate": 2.2177439244426792e-05,
      "loss": 0.816,
      "step": 514280
    },
    {
      "epoch": 1.8024582321336573,
      "grad_norm": 2.84375,
      "learning_rate": 2.217679021576309e-05,
      "loss": 0.8,
      "step": 514290
    },
    {
      "epoch": 1.8024932796405526,
      "grad_norm": 2.984375,
      "learning_rate": 2.2176141187099388e-05,
      "loss": 0.941,
      "step": 514300
    },
    {
      "epoch": 1.8025283271474484,
      "grad_norm": 2.84375,
      "learning_rate": 2.2175492158435686e-05,
      "loss": 0.7942,
      "step": 514310
    },
    {
      "epoch": 1.802563374654344,
      "grad_norm": 2.6875,
      "learning_rate": 2.2174843129771984e-05,
      "loss": 0.7856,
      "step": 514320
    },
    {
      "epoch": 1.8025984221612394,
      "grad_norm": 3.34375,
      "learning_rate": 2.2174194101108285e-05,
      "loss": 0.8356,
      "step": 514330
    },
    {
      "epoch": 1.8026334696681352,
      "grad_norm": 3.109375,
      "learning_rate": 2.217354507244458e-05,
      "loss": 0.8756,
      "step": 514340
    },
    {
      "epoch": 1.8026685171750307,
      "grad_norm": 2.375,
      "learning_rate": 2.2172896043780878e-05,
      "loss": 0.7974,
      "step": 514350
    },
    {
      "epoch": 1.8027035646819263,
      "grad_norm": 3.046875,
      "learning_rate": 2.2172247015117176e-05,
      "loss": 0.97,
      "step": 514360
    },
    {
      "epoch": 1.802738612188822,
      "grad_norm": 2.765625,
      "learning_rate": 2.2171597986453474e-05,
      "loss": 0.7376,
      "step": 514370
    },
    {
      "epoch": 1.8027736596957176,
      "grad_norm": 2.8125,
      "learning_rate": 2.2170948957789772e-05,
      "loss": 0.7775,
      "step": 514380
    },
    {
      "epoch": 1.802808707202613,
      "grad_norm": 2.859375,
      "learning_rate": 2.217029992912607e-05,
      "loss": 0.7807,
      "step": 514390
    },
    {
      "epoch": 1.8028437547095089,
      "grad_norm": 2.890625,
      "learning_rate": 2.2169650900462368e-05,
      "loss": 0.7779,
      "step": 514400
    },
    {
      "epoch": 1.8028788022164042,
      "grad_norm": 2.828125,
      "learning_rate": 2.2169001871798666e-05,
      "loss": 0.7558,
      "step": 514410
    },
    {
      "epoch": 1.8029138497233,
      "grad_norm": 2.75,
      "learning_rate": 2.2168352843134964e-05,
      "loss": 0.8811,
      "step": 514420
    },
    {
      "epoch": 1.8029488972301957,
      "grad_norm": 2.921875,
      "learning_rate": 2.2167703814471262e-05,
      "loss": 0.8385,
      "step": 514430
    },
    {
      "epoch": 1.802983944737091,
      "grad_norm": 3.0,
      "learning_rate": 2.216705478580756e-05,
      "loss": 0.894,
      "step": 514440
    },
    {
      "epoch": 1.8030189922439868,
      "grad_norm": 3.203125,
      "learning_rate": 2.216640575714386e-05,
      "loss": 0.857,
      "step": 514450
    },
    {
      "epoch": 1.8030540397508823,
      "grad_norm": 3.109375,
      "learning_rate": 2.216575672848016e-05,
      "loss": 0.74,
      "step": 514460
    },
    {
      "epoch": 1.8030890872577778,
      "grad_norm": 2.859375,
      "learning_rate": 2.2165107699816457e-05,
      "loss": 0.8387,
      "step": 514470
    },
    {
      "epoch": 1.8031241347646736,
      "grad_norm": 2.53125,
      "learning_rate": 2.2164458671152755e-05,
      "loss": 0.8116,
      "step": 514480
    },
    {
      "epoch": 1.8031591822715691,
      "grad_norm": 2.6875,
      "learning_rate": 2.2163809642489053e-05,
      "loss": 0.8395,
      "step": 514490
    },
    {
      "epoch": 1.8031942297784647,
      "grad_norm": 3.0,
      "learning_rate": 2.216316061382535e-05,
      "loss": 0.7956,
      "step": 514500
    },
    {
      "epoch": 1.8032292772853604,
      "grad_norm": 3.125,
      "learning_rate": 2.216251158516165e-05,
      "loss": 0.7974,
      "step": 514510
    },
    {
      "epoch": 1.803264324792256,
      "grad_norm": 3.0625,
      "learning_rate": 2.2161862556497944e-05,
      "loss": 0.8361,
      "step": 514520
    },
    {
      "epoch": 1.8032993722991515,
      "grad_norm": 3.0625,
      "learning_rate": 2.2161213527834242e-05,
      "loss": 0.8732,
      "step": 514530
    },
    {
      "epoch": 1.8033344198060473,
      "grad_norm": 2.53125,
      "learning_rate": 2.216056449917054e-05,
      "loss": 0.8338,
      "step": 514540
    },
    {
      "epoch": 1.8033694673129426,
      "grad_norm": 3.046875,
      "learning_rate": 2.2159915470506838e-05,
      "loss": 0.8169,
      "step": 514550
    },
    {
      "epoch": 1.8034045148198383,
      "grad_norm": 2.8125,
      "learning_rate": 2.215926644184314e-05,
      "loss": 0.7862,
      "step": 514560
    },
    {
      "epoch": 1.8034395623267339,
      "grad_norm": 2.84375,
      "learning_rate": 2.2158617413179437e-05,
      "loss": 0.7714,
      "step": 514570
    },
    {
      "epoch": 1.8034746098336294,
      "grad_norm": 3.09375,
      "learning_rate": 2.2157968384515735e-05,
      "loss": 0.8342,
      "step": 514580
    },
    {
      "epoch": 1.8035096573405252,
      "grad_norm": 3.125,
      "learning_rate": 2.2157319355852033e-05,
      "loss": 0.9296,
      "step": 514590
    },
    {
      "epoch": 1.8035447048474207,
      "grad_norm": 2.890625,
      "learning_rate": 2.215667032718833e-05,
      "loss": 0.8935,
      "step": 514600
    },
    {
      "epoch": 1.8035797523543162,
      "grad_norm": 2.5,
      "learning_rate": 2.215602129852463e-05,
      "loss": 0.8556,
      "step": 514610
    },
    {
      "epoch": 1.803614799861212,
      "grad_norm": 2.875,
      "learning_rate": 2.2155372269860927e-05,
      "loss": 0.8775,
      "step": 514620
    },
    {
      "epoch": 1.8036498473681075,
      "grad_norm": 2.859375,
      "learning_rate": 2.2154723241197225e-05,
      "loss": 0.8725,
      "step": 514630
    },
    {
      "epoch": 1.803684894875003,
      "grad_norm": 2.265625,
      "learning_rate": 2.2154074212533523e-05,
      "loss": 0.7945,
      "step": 514640
    },
    {
      "epoch": 1.8037199423818988,
      "grad_norm": 3.53125,
      "learning_rate": 2.215342518386982e-05,
      "loss": 0.7441,
      "step": 514650
    },
    {
      "epoch": 1.8037549898887941,
      "grad_norm": 2.90625,
      "learning_rate": 2.215277615520612e-05,
      "loss": 0.7705,
      "step": 514660
    },
    {
      "epoch": 1.8037900373956899,
      "grad_norm": 3.296875,
      "learning_rate": 2.2152127126542417e-05,
      "loss": 0.9102,
      "step": 514670
    },
    {
      "epoch": 1.8038250849025854,
      "grad_norm": 2.6875,
      "learning_rate": 2.2151478097878715e-05,
      "loss": 0.8124,
      "step": 514680
    },
    {
      "epoch": 1.803860132409481,
      "grad_norm": 3.328125,
      "learning_rate": 2.2150829069215013e-05,
      "loss": 0.8703,
      "step": 514690
    },
    {
      "epoch": 1.8038951799163767,
      "grad_norm": 3.15625,
      "learning_rate": 2.2150180040551315e-05,
      "loss": 0.7881,
      "step": 514700
    },
    {
      "epoch": 1.8039302274232722,
      "grad_norm": 2.71875,
      "learning_rate": 2.214953101188761e-05,
      "loss": 0.8787,
      "step": 514710
    },
    {
      "epoch": 1.8039652749301678,
      "grad_norm": 2.484375,
      "learning_rate": 2.2148881983223907e-05,
      "loss": 0.808,
      "step": 514720
    },
    {
      "epoch": 1.8040003224370635,
      "grad_norm": 3.09375,
      "learning_rate": 2.2148232954560205e-05,
      "loss": 0.8308,
      "step": 514730
    },
    {
      "epoch": 1.804035369943959,
      "grad_norm": 3.1875,
      "learning_rate": 2.2147583925896503e-05,
      "loss": 0.8857,
      "step": 514740
    },
    {
      "epoch": 1.8040704174508546,
      "grad_norm": 2.546875,
      "learning_rate": 2.21469348972328e-05,
      "loss": 0.8531,
      "step": 514750
    },
    {
      "epoch": 1.8041054649577504,
      "grad_norm": 2.828125,
      "learning_rate": 2.21462858685691e-05,
      "loss": 0.7495,
      "step": 514760
    },
    {
      "epoch": 1.8041405124646457,
      "grad_norm": 2.890625,
      "learning_rate": 2.2145636839905397e-05,
      "loss": 0.8384,
      "step": 514770
    },
    {
      "epoch": 1.8041755599715414,
      "grad_norm": 2.65625,
      "learning_rate": 2.2144987811241695e-05,
      "loss": 0.8357,
      "step": 514780
    },
    {
      "epoch": 1.804210607478437,
      "grad_norm": 3.25,
      "learning_rate": 2.2144338782577993e-05,
      "loss": 0.8681,
      "step": 514790
    },
    {
      "epoch": 1.8042456549853325,
      "grad_norm": 3.0625,
      "learning_rate": 2.214368975391429e-05,
      "loss": 0.839,
      "step": 514800
    },
    {
      "epoch": 1.8042807024922283,
      "grad_norm": 2.609375,
      "learning_rate": 2.2143040725250593e-05,
      "loss": 0.8291,
      "step": 514810
    },
    {
      "epoch": 1.8043157499991238,
      "grad_norm": 2.9375,
      "learning_rate": 2.214239169658689e-05,
      "loss": 0.8096,
      "step": 514820
    },
    {
      "epoch": 1.8043507975060193,
      "grad_norm": 2.8125,
      "learning_rate": 2.214174266792319e-05,
      "loss": 0.7978,
      "step": 514830
    },
    {
      "epoch": 1.804385845012915,
      "grad_norm": 2.8125,
      "learning_rate": 2.2141093639259487e-05,
      "loss": 0.7906,
      "step": 514840
    },
    {
      "epoch": 1.8044208925198106,
      "grad_norm": 2.34375,
      "learning_rate": 2.2140444610595785e-05,
      "loss": 0.8574,
      "step": 514850
    },
    {
      "epoch": 1.8044559400267062,
      "grad_norm": 3.0,
      "learning_rate": 2.2139795581932083e-05,
      "loss": 0.8108,
      "step": 514860
    },
    {
      "epoch": 1.804490987533602,
      "grad_norm": 3.125,
      "learning_rate": 2.213914655326838e-05,
      "loss": 0.8523,
      "step": 514870
    },
    {
      "epoch": 1.8045260350404972,
      "grad_norm": 2.78125,
      "learning_rate": 2.213849752460468e-05,
      "loss": 0.892,
      "step": 514880
    },
    {
      "epoch": 1.804561082547393,
      "grad_norm": 2.75,
      "learning_rate": 2.2137848495940977e-05,
      "loss": 0.7886,
      "step": 514890
    },
    {
      "epoch": 1.8045961300542885,
      "grad_norm": 3.125,
      "learning_rate": 2.213719946727727e-05,
      "loss": 0.8814,
      "step": 514900
    },
    {
      "epoch": 1.804631177561184,
      "grad_norm": 3.140625,
      "learning_rate": 2.213655043861357e-05,
      "loss": 0.8328,
      "step": 514910
    },
    {
      "epoch": 1.8046662250680798,
      "grad_norm": 3.09375,
      "learning_rate": 2.2135901409949867e-05,
      "loss": 0.8564,
      "step": 514920
    },
    {
      "epoch": 1.8047012725749754,
      "grad_norm": 2.546875,
      "learning_rate": 2.213525238128617e-05,
      "loss": 0.8597,
      "step": 514930
    },
    {
      "epoch": 1.804736320081871,
      "grad_norm": 2.9375,
      "learning_rate": 2.2134603352622467e-05,
      "loss": 0.8379,
      "step": 514940
    },
    {
      "epoch": 1.8047713675887667,
      "grad_norm": 2.890625,
      "learning_rate": 2.2133954323958765e-05,
      "loss": 0.8673,
      "step": 514950
    },
    {
      "epoch": 1.8048064150956622,
      "grad_norm": 3.203125,
      "learning_rate": 2.2133305295295063e-05,
      "loss": 0.8762,
      "step": 514960
    },
    {
      "epoch": 1.8048414626025577,
      "grad_norm": 2.921875,
      "learning_rate": 2.213265626663136e-05,
      "loss": 0.8597,
      "step": 514970
    },
    {
      "epoch": 1.8048765101094535,
      "grad_norm": 2.890625,
      "learning_rate": 2.213200723796766e-05,
      "loss": 0.8579,
      "step": 514980
    },
    {
      "epoch": 1.8049115576163488,
      "grad_norm": 2.859375,
      "learning_rate": 2.2131358209303957e-05,
      "loss": 0.8108,
      "step": 514990
    },
    {
      "epoch": 1.8049466051232446,
      "grad_norm": 2.984375,
      "learning_rate": 2.2130709180640255e-05,
      "loss": 0.8374,
      "step": 515000
    },
    {
      "epoch": 1.8049466051232446,
      "eval_loss": 0.7783234119415283,
      "eval_runtime": 558.8211,
      "eval_samples_per_second": 680.783,
      "eval_steps_per_second": 56.732,
      "step": 515000
    },
    {
      "epoch": 1.8049816526301403,
      "grad_norm": 2.828125,
      "learning_rate": 2.2130060151976553e-05,
      "loss": 0.8439,
      "step": 515010
    },
    {
      "epoch": 1.8050167001370356,
      "grad_norm": 2.75,
      "learning_rate": 2.212941112331285e-05,
      "loss": 0.79,
      "step": 515020
    },
    {
      "epoch": 1.8050517476439314,
      "grad_norm": 2.78125,
      "learning_rate": 2.212876209464915e-05,
      "loss": 0.854,
      "step": 515030
    },
    {
      "epoch": 1.805086795150827,
      "grad_norm": 3.171875,
      "learning_rate": 2.2128113065985447e-05,
      "loss": 0.8727,
      "step": 515040
    },
    {
      "epoch": 1.8051218426577225,
      "grad_norm": 2.875,
      "learning_rate": 2.2127464037321745e-05,
      "loss": 0.8931,
      "step": 515050
    },
    {
      "epoch": 1.8051568901646182,
      "grad_norm": 3.25,
      "learning_rate": 2.2126815008658043e-05,
      "loss": 0.8521,
      "step": 515060
    },
    {
      "epoch": 1.8051919376715138,
      "grad_norm": 2.46875,
      "learning_rate": 2.2126165979994344e-05,
      "loss": 0.8222,
      "step": 515070
    },
    {
      "epoch": 1.8052269851784093,
      "grad_norm": 2.953125,
      "learning_rate": 2.2125516951330642e-05,
      "loss": 0.8379,
      "step": 515080
    },
    {
      "epoch": 1.805262032685305,
      "grad_norm": 2.265625,
      "learning_rate": 2.2124867922666937e-05,
      "loss": 0.8364,
      "step": 515090
    },
    {
      "epoch": 1.8052970801922004,
      "grad_norm": 2.890625,
      "learning_rate": 2.2124218894003235e-05,
      "loss": 0.9127,
      "step": 515100
    },
    {
      "epoch": 1.8053321276990961,
      "grad_norm": 2.515625,
      "learning_rate": 2.2123569865339533e-05,
      "loss": 0.8249,
      "step": 515110
    },
    {
      "epoch": 1.8053671752059919,
      "grad_norm": 2.84375,
      "learning_rate": 2.212292083667583e-05,
      "loss": 0.8414,
      "step": 515120
    },
    {
      "epoch": 1.8054022227128872,
      "grad_norm": 3.015625,
      "learning_rate": 2.212227180801213e-05,
      "loss": 0.8017,
      "step": 515130
    },
    {
      "epoch": 1.805437270219783,
      "grad_norm": 2.828125,
      "learning_rate": 2.2121622779348427e-05,
      "loss": 0.8256,
      "step": 515140
    },
    {
      "epoch": 1.8054723177266785,
      "grad_norm": 3.328125,
      "learning_rate": 2.2120973750684725e-05,
      "loss": 0.8394,
      "step": 515150
    },
    {
      "epoch": 1.805507365233574,
      "grad_norm": 2.40625,
      "learning_rate": 2.2120324722021023e-05,
      "loss": 0.8305,
      "step": 515160
    },
    {
      "epoch": 1.8055424127404698,
      "grad_norm": 2.65625,
      "learning_rate": 2.211967569335732e-05,
      "loss": 0.8217,
      "step": 515170
    },
    {
      "epoch": 1.8055774602473653,
      "grad_norm": 2.890625,
      "learning_rate": 2.2119026664693622e-05,
      "loss": 0.7505,
      "step": 515180
    },
    {
      "epoch": 1.8056125077542609,
      "grad_norm": 3.125,
      "learning_rate": 2.211837763602992e-05,
      "loss": 0.8213,
      "step": 515190
    },
    {
      "epoch": 1.8056475552611566,
      "grad_norm": 3.34375,
      "learning_rate": 2.2117728607366218e-05,
      "loss": 0.8148,
      "step": 515200
    },
    {
      "epoch": 1.8056826027680521,
      "grad_norm": 3.0625,
      "learning_rate": 2.2117079578702516e-05,
      "loss": 0.8883,
      "step": 515210
    },
    {
      "epoch": 1.8057176502749477,
      "grad_norm": 3.265625,
      "learning_rate": 2.2116430550038814e-05,
      "loss": 0.8597,
      "step": 515220
    },
    {
      "epoch": 1.8057526977818434,
      "grad_norm": 3.203125,
      "learning_rate": 2.2115781521375112e-05,
      "loss": 0.8148,
      "step": 515230
    },
    {
      "epoch": 1.8057877452887388,
      "grad_norm": 2.84375,
      "learning_rate": 2.211513249271141e-05,
      "loss": 0.8152,
      "step": 515240
    },
    {
      "epoch": 1.8058227927956345,
      "grad_norm": 2.359375,
      "learning_rate": 2.2114483464047708e-05,
      "loss": 0.7567,
      "step": 515250
    },
    {
      "epoch": 1.80585784030253,
      "grad_norm": 2.71875,
      "learning_rate": 2.2113834435384006e-05,
      "loss": 0.8095,
      "step": 515260
    },
    {
      "epoch": 1.8058928878094256,
      "grad_norm": 3.0,
      "learning_rate": 2.2113185406720304e-05,
      "loss": 0.8791,
      "step": 515270
    },
    {
      "epoch": 1.8059279353163213,
      "grad_norm": 3.09375,
      "learning_rate": 2.21125363780566e-05,
      "loss": 0.8657,
      "step": 515280
    },
    {
      "epoch": 1.8059629828232169,
      "grad_norm": 2.609375,
      "learning_rate": 2.21118873493929e-05,
      "loss": 0.8068,
      "step": 515290
    },
    {
      "epoch": 1.8059980303301124,
      "grad_norm": 3.203125,
      "learning_rate": 2.2111238320729198e-05,
      "loss": 0.8777,
      "step": 515300
    },
    {
      "epoch": 1.8060330778370082,
      "grad_norm": 3.578125,
      "learning_rate": 2.2110589292065496e-05,
      "loss": 0.8159,
      "step": 515310
    },
    {
      "epoch": 1.8060681253439037,
      "grad_norm": 2.859375,
      "learning_rate": 2.2109940263401794e-05,
      "loss": 0.8592,
      "step": 515320
    },
    {
      "epoch": 1.8061031728507992,
      "grad_norm": 3.0625,
      "learning_rate": 2.2109291234738092e-05,
      "loss": 0.816,
      "step": 515330
    },
    {
      "epoch": 1.806138220357695,
      "grad_norm": 3.125,
      "learning_rate": 2.210864220607439e-05,
      "loss": 0.7644,
      "step": 515340
    },
    {
      "epoch": 1.8061732678645903,
      "grad_norm": 2.78125,
      "learning_rate": 2.2107993177410688e-05,
      "loss": 0.8495,
      "step": 515350
    },
    {
      "epoch": 1.806208315371486,
      "grad_norm": 3.0625,
      "learning_rate": 2.2107344148746986e-05,
      "loss": 0.8444,
      "step": 515360
    },
    {
      "epoch": 1.8062433628783816,
      "grad_norm": 2.6875,
      "learning_rate": 2.2106695120083284e-05,
      "loss": 0.8582,
      "step": 515370
    },
    {
      "epoch": 1.8062784103852771,
      "grad_norm": 2.4375,
      "learning_rate": 2.2106046091419582e-05,
      "loss": 0.7889,
      "step": 515380
    },
    {
      "epoch": 1.806313457892173,
      "grad_norm": 3.25,
      "learning_rate": 2.210539706275588e-05,
      "loss": 0.7775,
      "step": 515390
    },
    {
      "epoch": 1.8063485053990684,
      "grad_norm": 3.28125,
      "learning_rate": 2.2104748034092178e-05,
      "loss": 0.7643,
      "step": 515400
    },
    {
      "epoch": 1.806383552905964,
      "grad_norm": 2.796875,
      "learning_rate": 2.2104099005428476e-05,
      "loss": 0.8834,
      "step": 515410
    },
    {
      "epoch": 1.8064186004128597,
      "grad_norm": 2.890625,
      "learning_rate": 2.2103449976764774e-05,
      "loss": 0.7984,
      "step": 515420
    },
    {
      "epoch": 1.8064536479197553,
      "grad_norm": 2.9375,
      "learning_rate": 2.2102800948101076e-05,
      "loss": 0.8862,
      "step": 515430
    },
    {
      "epoch": 1.8064886954266508,
      "grad_norm": 3.21875,
      "learning_rate": 2.2102151919437374e-05,
      "loss": 0.8266,
      "step": 515440
    },
    {
      "epoch": 1.8065237429335466,
      "grad_norm": 3.34375,
      "learning_rate": 2.210150289077367e-05,
      "loss": 0.8599,
      "step": 515450
    },
    {
      "epoch": 1.8065587904404419,
      "grad_norm": 2.8125,
      "learning_rate": 2.210085386210997e-05,
      "loss": 0.8609,
      "step": 515460
    },
    {
      "epoch": 1.8065938379473376,
      "grad_norm": 2.546875,
      "learning_rate": 2.2100204833446264e-05,
      "loss": 0.7821,
      "step": 515470
    },
    {
      "epoch": 1.8066288854542332,
      "grad_norm": 3.09375,
      "learning_rate": 2.2099555804782562e-05,
      "loss": 0.8336,
      "step": 515480
    },
    {
      "epoch": 1.8066639329611287,
      "grad_norm": 2.6875,
      "learning_rate": 2.209890677611886e-05,
      "loss": 0.7906,
      "step": 515490
    },
    {
      "epoch": 1.8066989804680245,
      "grad_norm": 2.765625,
      "learning_rate": 2.2098257747455158e-05,
      "loss": 0.8288,
      "step": 515500
    },
    {
      "epoch": 1.80673402797492,
      "grad_norm": 3.203125,
      "learning_rate": 2.2097608718791456e-05,
      "loss": 0.8854,
      "step": 515510
    },
    {
      "epoch": 1.8067690754818155,
      "grad_norm": 3.0625,
      "learning_rate": 2.2096959690127754e-05,
      "loss": 0.8211,
      "step": 515520
    },
    {
      "epoch": 1.8068041229887113,
      "grad_norm": 2.953125,
      "learning_rate": 2.2096310661464052e-05,
      "loss": 0.854,
      "step": 515530
    },
    {
      "epoch": 1.8068391704956068,
      "grad_norm": 3.09375,
      "learning_rate": 2.209566163280035e-05,
      "loss": 0.7872,
      "step": 515540
    },
    {
      "epoch": 1.8068742180025024,
      "grad_norm": 2.328125,
      "learning_rate": 2.209501260413665e-05,
      "loss": 0.7952,
      "step": 515550
    },
    {
      "epoch": 1.8069092655093981,
      "grad_norm": 2.625,
      "learning_rate": 2.209436357547295e-05,
      "loss": 0.7715,
      "step": 515560
    },
    {
      "epoch": 1.8069443130162934,
      "grad_norm": 3.28125,
      "learning_rate": 2.2093714546809248e-05,
      "loss": 0.8682,
      "step": 515570
    },
    {
      "epoch": 1.8069793605231892,
      "grad_norm": 2.5625,
      "learning_rate": 2.2093065518145546e-05,
      "loss": 0.7685,
      "step": 515580
    },
    {
      "epoch": 1.8070144080300847,
      "grad_norm": 2.578125,
      "learning_rate": 2.2092416489481844e-05,
      "loss": 0.8408,
      "step": 515590
    },
    {
      "epoch": 1.8070494555369803,
      "grad_norm": 2.875,
      "learning_rate": 2.209176746081814e-05,
      "loss": 0.8692,
      "step": 515600
    },
    {
      "epoch": 1.807084503043876,
      "grad_norm": 3.09375,
      "learning_rate": 2.209111843215444e-05,
      "loss": 0.8687,
      "step": 515610
    },
    {
      "epoch": 1.8071195505507716,
      "grad_norm": 2.890625,
      "learning_rate": 2.2090469403490738e-05,
      "loss": 0.8081,
      "step": 515620
    },
    {
      "epoch": 1.807154598057667,
      "grad_norm": 2.75,
      "learning_rate": 2.2089820374827036e-05,
      "loss": 0.8187,
      "step": 515630
    },
    {
      "epoch": 1.8071896455645629,
      "grad_norm": 2.5,
      "learning_rate": 2.2089171346163334e-05,
      "loss": 0.7949,
      "step": 515640
    },
    {
      "epoch": 1.8072246930714584,
      "grad_norm": 3.15625,
      "learning_rate": 2.2088522317499628e-05,
      "loss": 0.865,
      "step": 515650
    },
    {
      "epoch": 1.807259740578354,
      "grad_norm": 2.671875,
      "learning_rate": 2.208787328883593e-05,
      "loss": 0.877,
      "step": 515660
    },
    {
      "epoch": 1.8072947880852497,
      "grad_norm": 2.75,
      "learning_rate": 2.2087224260172228e-05,
      "loss": 0.7852,
      "step": 515670
    },
    {
      "epoch": 1.807329835592145,
      "grad_norm": 2.9375,
      "learning_rate": 2.2086575231508526e-05,
      "loss": 0.778,
      "step": 515680
    },
    {
      "epoch": 1.8073648830990408,
      "grad_norm": 2.9375,
      "learning_rate": 2.2085926202844824e-05,
      "loss": 0.7847,
      "step": 515690
    },
    {
      "epoch": 1.8073999306059365,
      "grad_norm": 2.859375,
      "learning_rate": 2.208527717418112e-05,
      "loss": 0.7745,
      "step": 515700
    },
    {
      "epoch": 1.8074349781128318,
      "grad_norm": 2.703125,
      "learning_rate": 2.208462814551742e-05,
      "loss": 0.8073,
      "step": 515710
    },
    {
      "epoch": 1.8074700256197276,
      "grad_norm": 2.84375,
      "learning_rate": 2.2083979116853718e-05,
      "loss": 0.7756,
      "step": 515720
    },
    {
      "epoch": 1.8075050731266231,
      "grad_norm": 2.953125,
      "learning_rate": 2.2083330088190016e-05,
      "loss": 0.7765,
      "step": 515730
    },
    {
      "epoch": 1.8075401206335187,
      "grad_norm": 3.140625,
      "learning_rate": 2.2082681059526314e-05,
      "loss": 0.7953,
      "step": 515740
    },
    {
      "epoch": 1.8075751681404144,
      "grad_norm": 3.0625,
      "learning_rate": 2.208203203086261e-05,
      "loss": 0.8102,
      "step": 515750
    },
    {
      "epoch": 1.80761021564731,
      "grad_norm": 3.15625,
      "learning_rate": 2.208138300219891e-05,
      "loss": 0.7921,
      "step": 515760
    },
    {
      "epoch": 1.8076452631542055,
      "grad_norm": 2.609375,
      "learning_rate": 2.2080733973535208e-05,
      "loss": 0.8823,
      "step": 515770
    },
    {
      "epoch": 1.8076803106611012,
      "grad_norm": 3.140625,
      "learning_rate": 2.2080084944871506e-05,
      "loss": 0.778,
      "step": 515780
    },
    {
      "epoch": 1.8077153581679966,
      "grad_norm": 2.734375,
      "learning_rate": 2.2079435916207804e-05,
      "loss": 0.8448,
      "step": 515790
    },
    {
      "epoch": 1.8077504056748923,
      "grad_norm": 2.90625,
      "learning_rate": 2.2078786887544105e-05,
      "loss": 0.8334,
      "step": 515800
    },
    {
      "epoch": 1.807785453181788,
      "grad_norm": 2.953125,
      "learning_rate": 2.2078137858880403e-05,
      "loss": 0.7663,
      "step": 515810
    },
    {
      "epoch": 1.8078205006886834,
      "grad_norm": 2.890625,
      "learning_rate": 2.20774888302167e-05,
      "loss": 0.7688,
      "step": 515820
    },
    {
      "epoch": 1.8078555481955791,
      "grad_norm": 3.296875,
      "learning_rate": 2.2076839801553e-05,
      "loss": 0.8077,
      "step": 515830
    },
    {
      "epoch": 1.8078905957024747,
      "grad_norm": 2.96875,
      "learning_rate": 2.2076190772889294e-05,
      "loss": 0.8428,
      "step": 515840
    },
    {
      "epoch": 1.8079256432093702,
      "grad_norm": 2.140625,
      "learning_rate": 2.207554174422559e-05,
      "loss": 0.7555,
      "step": 515850
    },
    {
      "epoch": 1.807960690716266,
      "grad_norm": 2.96875,
      "learning_rate": 2.207489271556189e-05,
      "loss": 0.827,
      "step": 515860
    },
    {
      "epoch": 1.8079957382231615,
      "grad_norm": 3.421875,
      "learning_rate": 2.2074243686898188e-05,
      "loss": 0.7589,
      "step": 515870
    },
    {
      "epoch": 1.808030785730057,
      "grad_norm": 2.609375,
      "learning_rate": 2.2073594658234486e-05,
      "loss": 0.774,
      "step": 515880
    },
    {
      "epoch": 1.8080658332369528,
      "grad_norm": 2.625,
      "learning_rate": 2.2072945629570784e-05,
      "loss": 0.7944,
      "step": 515890
    },
    {
      "epoch": 1.8081008807438483,
      "grad_norm": 3.109375,
      "learning_rate": 2.207229660090708e-05,
      "loss": 0.8928,
      "step": 515900
    },
    {
      "epoch": 1.8081359282507439,
      "grad_norm": 2.578125,
      "learning_rate": 2.2071647572243383e-05,
      "loss": 0.8008,
      "step": 515910
    },
    {
      "epoch": 1.8081709757576396,
      "grad_norm": 3.546875,
      "learning_rate": 2.207099854357968e-05,
      "loss": 0.8868,
      "step": 515920
    },
    {
      "epoch": 1.808206023264535,
      "grad_norm": 3.015625,
      "learning_rate": 2.207034951491598e-05,
      "loss": 0.8528,
      "step": 515930
    },
    {
      "epoch": 1.8082410707714307,
      "grad_norm": 2.5625,
      "learning_rate": 2.2069700486252277e-05,
      "loss": 0.845,
      "step": 515940
    },
    {
      "epoch": 1.8082761182783262,
      "grad_norm": 2.765625,
      "learning_rate": 2.2069051457588575e-05,
      "loss": 0.8081,
      "step": 515950
    },
    {
      "epoch": 1.8083111657852218,
      "grad_norm": 2.609375,
      "learning_rate": 2.2068402428924873e-05,
      "loss": 0.7643,
      "step": 515960
    },
    {
      "epoch": 1.8083462132921175,
      "grad_norm": 3.125,
      "learning_rate": 2.206775340026117e-05,
      "loss": 0.861,
      "step": 515970
    },
    {
      "epoch": 1.808381260799013,
      "grad_norm": 2.828125,
      "learning_rate": 2.206710437159747e-05,
      "loss": 0.8174,
      "step": 515980
    },
    {
      "epoch": 1.8084163083059086,
      "grad_norm": 2.765625,
      "learning_rate": 2.2066455342933767e-05,
      "loss": 0.8605,
      "step": 515990
    },
    {
      "epoch": 1.8084513558128044,
      "grad_norm": 2.71875,
      "learning_rate": 2.2065806314270065e-05,
      "loss": 0.8384,
      "step": 516000
    },
    {
      "epoch": 1.8084864033197,
      "grad_norm": 2.890625,
      "learning_rate": 2.2065157285606363e-05,
      "loss": 0.8113,
      "step": 516010
    },
    {
      "epoch": 1.8085214508265954,
      "grad_norm": 2.859375,
      "learning_rate": 2.206450825694266e-05,
      "loss": 0.842,
      "step": 516020
    },
    {
      "epoch": 1.8085564983334912,
      "grad_norm": 2.734375,
      "learning_rate": 2.206385922827896e-05,
      "loss": 0.8451,
      "step": 516030
    },
    {
      "epoch": 1.8085915458403865,
      "grad_norm": 3.1875,
      "learning_rate": 2.2063210199615257e-05,
      "loss": 0.8288,
      "step": 516040
    },
    {
      "epoch": 1.8086265933472823,
      "grad_norm": 3.0625,
      "learning_rate": 2.2062561170951555e-05,
      "loss": 0.776,
      "step": 516050
    },
    {
      "epoch": 1.8086616408541778,
      "grad_norm": 2.34375,
      "learning_rate": 2.2061912142287853e-05,
      "loss": 0.7835,
      "step": 516060
    },
    {
      "epoch": 1.8086966883610733,
      "grad_norm": 2.578125,
      "learning_rate": 2.206126311362415e-05,
      "loss": 0.7955,
      "step": 516070
    },
    {
      "epoch": 1.808731735867969,
      "grad_norm": 2.53125,
      "learning_rate": 2.206061408496045e-05,
      "loss": 0.7781,
      "step": 516080
    },
    {
      "epoch": 1.8087667833748646,
      "grad_norm": 2.890625,
      "learning_rate": 2.2059965056296747e-05,
      "loss": 0.9365,
      "step": 516090
    },
    {
      "epoch": 1.8088018308817602,
      "grad_norm": 2.703125,
      "learning_rate": 2.2059316027633045e-05,
      "loss": 0.7628,
      "step": 516100
    },
    {
      "epoch": 1.808836878388656,
      "grad_norm": 3.015625,
      "learning_rate": 2.2058666998969343e-05,
      "loss": 0.883,
      "step": 516110
    },
    {
      "epoch": 1.8088719258955515,
      "grad_norm": 2.65625,
      "learning_rate": 2.205801797030564e-05,
      "loss": 0.816,
      "step": 516120
    },
    {
      "epoch": 1.808906973402447,
      "grad_norm": 2.34375,
      "learning_rate": 2.205736894164194e-05,
      "loss": 0.818,
      "step": 516130
    },
    {
      "epoch": 1.8089420209093428,
      "grad_norm": 3.40625,
      "learning_rate": 2.2056719912978237e-05,
      "loss": 0.8647,
      "step": 516140
    },
    {
      "epoch": 1.808977068416238,
      "grad_norm": 2.953125,
      "learning_rate": 2.2056070884314535e-05,
      "loss": 0.9461,
      "step": 516150
    },
    {
      "epoch": 1.8090121159231338,
      "grad_norm": 2.5,
      "learning_rate": 2.2055421855650833e-05,
      "loss": 0.7288,
      "step": 516160
    },
    {
      "epoch": 1.8090471634300294,
      "grad_norm": 3.015625,
      "learning_rate": 2.2054772826987134e-05,
      "loss": 0.8909,
      "step": 516170
    },
    {
      "epoch": 1.809082210936925,
      "grad_norm": 3.046875,
      "learning_rate": 2.2054123798323432e-05,
      "loss": 0.8772,
      "step": 516180
    },
    {
      "epoch": 1.8091172584438207,
      "grad_norm": 2.875,
      "learning_rate": 2.205347476965973e-05,
      "loss": 0.8178,
      "step": 516190
    },
    {
      "epoch": 1.8091523059507162,
      "grad_norm": 2.375,
      "learning_rate": 2.205282574099603e-05,
      "loss": 0.7801,
      "step": 516200
    },
    {
      "epoch": 1.8091873534576117,
      "grad_norm": 2.703125,
      "learning_rate": 2.2052176712332326e-05,
      "loss": 0.7925,
      "step": 516210
    },
    {
      "epoch": 1.8092224009645075,
      "grad_norm": 2.609375,
      "learning_rate": 2.205152768366862e-05,
      "loss": 0.8473,
      "step": 516220
    },
    {
      "epoch": 1.809257448471403,
      "grad_norm": 3.25,
      "learning_rate": 2.205087865500492e-05,
      "loss": 0.7274,
      "step": 516230
    },
    {
      "epoch": 1.8092924959782986,
      "grad_norm": 3.125,
      "learning_rate": 2.2050229626341217e-05,
      "loss": 0.8787,
      "step": 516240
    },
    {
      "epoch": 1.8093275434851943,
      "grad_norm": 2.53125,
      "learning_rate": 2.2049580597677515e-05,
      "loss": 0.8577,
      "step": 516250
    },
    {
      "epoch": 1.8093625909920896,
      "grad_norm": 2.8125,
      "learning_rate": 2.2048931569013813e-05,
      "loss": 0.8013,
      "step": 516260
    },
    {
      "epoch": 1.8093976384989854,
      "grad_norm": 3.078125,
      "learning_rate": 2.204828254035011e-05,
      "loss": 0.8657,
      "step": 516270
    },
    {
      "epoch": 1.809432686005881,
      "grad_norm": 2.578125,
      "learning_rate": 2.2047633511686412e-05,
      "loss": 0.806,
      "step": 516280
    },
    {
      "epoch": 1.8094677335127765,
      "grad_norm": 3.3125,
      "learning_rate": 2.204698448302271e-05,
      "loss": 0.8917,
      "step": 516290
    },
    {
      "epoch": 1.8095027810196722,
      "grad_norm": 2.71875,
      "learning_rate": 2.204633545435901e-05,
      "loss": 0.7213,
      "step": 516300
    },
    {
      "epoch": 1.8095378285265677,
      "grad_norm": 2.9375,
      "learning_rate": 2.2045686425695306e-05,
      "loss": 0.7594,
      "step": 516310
    },
    {
      "epoch": 1.8095728760334633,
      "grad_norm": 3.359375,
      "learning_rate": 2.2045037397031604e-05,
      "loss": 0.8599,
      "step": 516320
    },
    {
      "epoch": 1.809607923540359,
      "grad_norm": 2.84375,
      "learning_rate": 2.2044388368367902e-05,
      "loss": 0.792,
      "step": 516330
    },
    {
      "epoch": 1.8096429710472546,
      "grad_norm": 3.328125,
      "learning_rate": 2.20437393397042e-05,
      "loss": 0.8307,
      "step": 516340
    },
    {
      "epoch": 1.8096780185541501,
      "grad_norm": 2.875,
      "learning_rate": 2.20430903110405e-05,
      "loss": 0.8087,
      "step": 516350
    },
    {
      "epoch": 1.8097130660610459,
      "grad_norm": 3.234375,
      "learning_rate": 2.2042441282376796e-05,
      "loss": 0.8763,
      "step": 516360
    },
    {
      "epoch": 1.8097481135679412,
      "grad_norm": 2.734375,
      "learning_rate": 2.2041792253713094e-05,
      "loss": 0.8694,
      "step": 516370
    },
    {
      "epoch": 1.809783161074837,
      "grad_norm": 3.078125,
      "learning_rate": 2.2041143225049392e-05,
      "loss": 0.802,
      "step": 516380
    },
    {
      "epoch": 1.8098182085817327,
      "grad_norm": 2.96875,
      "learning_rate": 2.204049419638569e-05,
      "loss": 0.7631,
      "step": 516390
    },
    {
      "epoch": 1.809853256088628,
      "grad_norm": 2.65625,
      "learning_rate": 2.203984516772199e-05,
      "loss": 0.8587,
      "step": 516400
    },
    {
      "epoch": 1.8098883035955238,
      "grad_norm": 2.953125,
      "learning_rate": 2.2039196139058286e-05,
      "loss": 0.7619,
      "step": 516410
    },
    {
      "epoch": 1.8099233511024193,
      "grad_norm": 3.296875,
      "learning_rate": 2.2038547110394584e-05,
      "loss": 0.849,
      "step": 516420
    },
    {
      "epoch": 1.8099583986093148,
      "grad_norm": 2.640625,
      "learning_rate": 2.2037898081730882e-05,
      "loss": 0.8686,
      "step": 516430
    },
    {
      "epoch": 1.8099934461162106,
      "grad_norm": 3.25,
      "learning_rate": 2.203724905306718e-05,
      "loss": 0.7902,
      "step": 516440
    },
    {
      "epoch": 1.8100284936231061,
      "grad_norm": 3.34375,
      "learning_rate": 2.203660002440348e-05,
      "loss": 0.8105,
      "step": 516450
    },
    {
      "epoch": 1.8100635411300017,
      "grad_norm": 3.0625,
      "learning_rate": 2.2035950995739776e-05,
      "loss": 0.783,
      "step": 516460
    },
    {
      "epoch": 1.8100985886368974,
      "grad_norm": 2.875,
      "learning_rate": 2.2035301967076074e-05,
      "loss": 0.8225,
      "step": 516470
    },
    {
      "epoch": 1.8101336361437927,
      "grad_norm": 3.15625,
      "learning_rate": 2.2034652938412372e-05,
      "loss": 0.7317,
      "step": 516480
    },
    {
      "epoch": 1.8101686836506885,
      "grad_norm": 2.8125,
      "learning_rate": 2.203400390974867e-05,
      "loss": 0.8828,
      "step": 516490
    },
    {
      "epoch": 1.8102037311575843,
      "grad_norm": 2.609375,
      "learning_rate": 2.203335488108497e-05,
      "loss": 0.7397,
      "step": 516500
    },
    {
      "epoch": 1.8102387786644796,
      "grad_norm": 2.703125,
      "learning_rate": 2.2032705852421266e-05,
      "loss": 0.8072,
      "step": 516510
    },
    {
      "epoch": 1.8102738261713753,
      "grad_norm": 2.578125,
      "learning_rate": 2.2032056823757564e-05,
      "loss": 0.8523,
      "step": 516520
    },
    {
      "epoch": 1.8103088736782709,
      "grad_norm": 2.375,
      "learning_rate": 2.2031407795093866e-05,
      "loss": 0.8135,
      "step": 516530
    },
    {
      "epoch": 1.8103439211851664,
      "grad_norm": 2.890625,
      "learning_rate": 2.2030758766430164e-05,
      "loss": 0.7174,
      "step": 516540
    },
    {
      "epoch": 1.8103789686920622,
      "grad_norm": 2.90625,
      "learning_rate": 2.2030109737766462e-05,
      "loss": 0.9342,
      "step": 516550
    },
    {
      "epoch": 1.8104140161989577,
      "grad_norm": 2.71875,
      "learning_rate": 2.202946070910276e-05,
      "loss": 0.9203,
      "step": 516560
    },
    {
      "epoch": 1.8104490637058532,
      "grad_norm": 2.953125,
      "learning_rate": 2.2028811680439058e-05,
      "loss": 0.8832,
      "step": 516570
    },
    {
      "epoch": 1.810484111212749,
      "grad_norm": 2.90625,
      "learning_rate": 2.2028162651775356e-05,
      "loss": 0.8491,
      "step": 516580
    },
    {
      "epoch": 1.8105191587196445,
      "grad_norm": 2.796875,
      "learning_rate": 2.202751362311165e-05,
      "loss": 0.8139,
      "step": 516590
    },
    {
      "epoch": 1.81055420622654,
      "grad_norm": 2.671875,
      "learning_rate": 2.202686459444795e-05,
      "loss": 0.7182,
      "step": 516600
    },
    {
      "epoch": 1.8105892537334358,
      "grad_norm": 2.96875,
      "learning_rate": 2.2026215565784246e-05,
      "loss": 0.8545,
      "step": 516610
    },
    {
      "epoch": 1.8106243012403311,
      "grad_norm": 2.875,
      "learning_rate": 2.2025566537120544e-05,
      "loss": 0.8124,
      "step": 516620
    },
    {
      "epoch": 1.810659348747227,
      "grad_norm": 3.265625,
      "learning_rate": 2.2024917508456842e-05,
      "loss": 0.8626,
      "step": 516630
    },
    {
      "epoch": 1.8106943962541224,
      "grad_norm": 2.90625,
      "learning_rate": 2.202426847979314e-05,
      "loss": 0.8138,
      "step": 516640
    },
    {
      "epoch": 1.810729443761018,
      "grad_norm": 3.0,
      "learning_rate": 2.2023619451129442e-05,
      "loss": 0.8987,
      "step": 516650
    },
    {
      "epoch": 1.8107644912679137,
      "grad_norm": 3.015625,
      "learning_rate": 2.202297042246574e-05,
      "loss": 0.7716,
      "step": 516660
    },
    {
      "epoch": 1.8107995387748093,
      "grad_norm": 2.796875,
      "learning_rate": 2.2022321393802038e-05,
      "loss": 0.8207,
      "step": 516670
    },
    {
      "epoch": 1.8108345862817048,
      "grad_norm": 3.453125,
      "learning_rate": 2.2021672365138336e-05,
      "loss": 0.8107,
      "step": 516680
    },
    {
      "epoch": 1.8108696337886006,
      "grad_norm": 2.453125,
      "learning_rate": 2.2021023336474634e-05,
      "loss": 0.8242,
      "step": 516690
    },
    {
      "epoch": 1.810904681295496,
      "grad_norm": 2.640625,
      "learning_rate": 2.2020374307810932e-05,
      "loss": 0.8575,
      "step": 516700
    },
    {
      "epoch": 1.8109397288023916,
      "grad_norm": 2.609375,
      "learning_rate": 2.201972527914723e-05,
      "loss": 0.8453,
      "step": 516710
    },
    {
      "epoch": 1.8109747763092874,
      "grad_norm": 2.84375,
      "learning_rate": 2.2019076250483528e-05,
      "loss": 0.8494,
      "step": 516720
    },
    {
      "epoch": 1.8110098238161827,
      "grad_norm": 2.90625,
      "learning_rate": 2.2018427221819826e-05,
      "loss": 0.8463,
      "step": 516730
    },
    {
      "epoch": 1.8110448713230785,
      "grad_norm": 3.140625,
      "learning_rate": 2.2017778193156124e-05,
      "loss": 0.8718,
      "step": 516740
    },
    {
      "epoch": 1.811079918829974,
      "grad_norm": 2.703125,
      "learning_rate": 2.2017129164492422e-05,
      "loss": 0.8227,
      "step": 516750
    },
    {
      "epoch": 1.8111149663368695,
      "grad_norm": 3.0625,
      "learning_rate": 2.201648013582872e-05,
      "loss": 0.8346,
      "step": 516760
    },
    {
      "epoch": 1.8111500138437653,
      "grad_norm": 2.40625,
      "learning_rate": 2.2015831107165018e-05,
      "loss": 0.8087,
      "step": 516770
    },
    {
      "epoch": 1.8111850613506608,
      "grad_norm": 2.75,
      "learning_rate": 2.2015182078501316e-05,
      "loss": 0.8643,
      "step": 516780
    },
    {
      "epoch": 1.8112201088575564,
      "grad_norm": 2.609375,
      "learning_rate": 2.2014533049837614e-05,
      "loss": 0.8071,
      "step": 516790
    },
    {
      "epoch": 1.8112551563644521,
      "grad_norm": 3.234375,
      "learning_rate": 2.2013884021173912e-05,
      "loss": 0.8735,
      "step": 516800
    },
    {
      "epoch": 1.8112902038713476,
      "grad_norm": 3.03125,
      "learning_rate": 2.201323499251021e-05,
      "loss": 0.8394,
      "step": 516810
    },
    {
      "epoch": 1.8113252513782432,
      "grad_norm": 3.0,
      "learning_rate": 2.2012585963846508e-05,
      "loss": 0.9196,
      "step": 516820
    },
    {
      "epoch": 1.811360298885139,
      "grad_norm": 3.8125,
      "learning_rate": 2.2011936935182806e-05,
      "loss": 0.7789,
      "step": 516830
    },
    {
      "epoch": 1.8113953463920343,
      "grad_norm": 3.21875,
      "learning_rate": 2.2011287906519104e-05,
      "loss": 0.835,
      "step": 516840
    },
    {
      "epoch": 1.81143039389893,
      "grad_norm": 3.3125,
      "learning_rate": 2.2010638877855402e-05,
      "loss": 0.8236,
      "step": 516850
    },
    {
      "epoch": 1.8114654414058255,
      "grad_norm": 2.828125,
      "learning_rate": 2.20099898491917e-05,
      "loss": 0.8296,
      "step": 516860
    },
    {
      "epoch": 1.811500488912721,
      "grad_norm": 3.46875,
      "learning_rate": 2.2009340820527998e-05,
      "loss": 0.8585,
      "step": 516870
    },
    {
      "epoch": 1.8115355364196168,
      "grad_norm": 2.8125,
      "learning_rate": 2.2008691791864296e-05,
      "loss": 0.8255,
      "step": 516880
    },
    {
      "epoch": 1.8115705839265124,
      "grad_norm": 2.765625,
      "learning_rate": 2.2008042763200594e-05,
      "loss": 0.7819,
      "step": 516890
    },
    {
      "epoch": 1.811605631433408,
      "grad_norm": 2.625,
      "learning_rate": 2.2007393734536895e-05,
      "loss": 0.7973,
      "step": 516900
    },
    {
      "epoch": 1.8116406789403037,
      "grad_norm": 2.375,
      "learning_rate": 2.2006744705873193e-05,
      "loss": 0.8311,
      "step": 516910
    },
    {
      "epoch": 1.8116757264471992,
      "grad_norm": 2.859375,
      "learning_rate": 2.200609567720949e-05,
      "loss": 0.8393,
      "step": 516920
    },
    {
      "epoch": 1.8117107739540947,
      "grad_norm": 3.328125,
      "learning_rate": 2.200544664854579e-05,
      "loss": 0.7851,
      "step": 516930
    },
    {
      "epoch": 1.8117458214609905,
      "grad_norm": 2.828125,
      "learning_rate": 2.2004797619882087e-05,
      "loss": 0.8204,
      "step": 516940
    },
    {
      "epoch": 1.8117808689678858,
      "grad_norm": 3.171875,
      "learning_rate": 2.2004148591218385e-05,
      "loss": 0.8443,
      "step": 516950
    },
    {
      "epoch": 1.8118159164747816,
      "grad_norm": 2.71875,
      "learning_rate": 2.2003499562554683e-05,
      "loss": 0.8042,
      "step": 516960
    },
    {
      "epoch": 1.811850963981677,
      "grad_norm": 2.765625,
      "learning_rate": 2.2002850533890978e-05,
      "loss": 0.825,
      "step": 516970
    },
    {
      "epoch": 1.8118860114885726,
      "grad_norm": 2.78125,
      "learning_rate": 2.2002201505227276e-05,
      "loss": 0.8343,
      "step": 516980
    },
    {
      "epoch": 1.8119210589954684,
      "grad_norm": 3.09375,
      "learning_rate": 2.2001552476563574e-05,
      "loss": 0.8859,
      "step": 516990
    },
    {
      "epoch": 1.811956106502364,
      "grad_norm": 3.03125,
      "learning_rate": 2.2000903447899872e-05,
      "loss": 0.8223,
      "step": 517000
    },
    {
      "epoch": 1.8119911540092595,
      "grad_norm": 2.90625,
      "learning_rate": 2.2000254419236173e-05,
      "loss": 0.895,
      "step": 517010
    },
    {
      "epoch": 1.8120262015161552,
      "grad_norm": 2.796875,
      "learning_rate": 2.199960539057247e-05,
      "loss": 0.798,
      "step": 517020
    },
    {
      "epoch": 1.8120612490230508,
      "grad_norm": 2.625,
      "learning_rate": 2.199895636190877e-05,
      "loss": 0.8308,
      "step": 517030
    },
    {
      "epoch": 1.8120962965299463,
      "grad_norm": 2.734375,
      "learning_rate": 2.1998307333245067e-05,
      "loss": 0.853,
      "step": 517040
    },
    {
      "epoch": 1.812131344036842,
      "grad_norm": 2.578125,
      "learning_rate": 2.1997658304581365e-05,
      "loss": 0.742,
      "step": 517050
    },
    {
      "epoch": 1.8121663915437374,
      "grad_norm": 2.421875,
      "learning_rate": 2.1997009275917663e-05,
      "loss": 0.7572,
      "step": 517060
    },
    {
      "epoch": 1.8122014390506331,
      "grad_norm": 2.984375,
      "learning_rate": 2.199636024725396e-05,
      "loss": 0.7966,
      "step": 517070
    },
    {
      "epoch": 1.812236486557529,
      "grad_norm": 2.515625,
      "learning_rate": 2.199571121859026e-05,
      "loss": 0.7519,
      "step": 517080
    },
    {
      "epoch": 1.8122715340644242,
      "grad_norm": 2.8125,
      "learning_rate": 2.1995062189926557e-05,
      "loss": 0.8104,
      "step": 517090
    },
    {
      "epoch": 1.81230658157132,
      "grad_norm": 2.453125,
      "learning_rate": 2.1994413161262855e-05,
      "loss": 0.827,
      "step": 517100
    },
    {
      "epoch": 1.8123416290782155,
      "grad_norm": 2.375,
      "learning_rate": 2.1993764132599153e-05,
      "loss": 0.83,
      "step": 517110
    },
    {
      "epoch": 1.812376676585111,
      "grad_norm": 2.78125,
      "learning_rate": 2.199311510393545e-05,
      "loss": 0.8434,
      "step": 517120
    },
    {
      "epoch": 1.8124117240920068,
      "grad_norm": 3.0625,
      "learning_rate": 2.199246607527175e-05,
      "loss": 0.8374,
      "step": 517130
    },
    {
      "epoch": 1.8124467715989023,
      "grad_norm": 3.015625,
      "learning_rate": 2.1991817046608047e-05,
      "loss": 0.7937,
      "step": 517140
    },
    {
      "epoch": 1.8124818191057979,
      "grad_norm": 3.28125,
      "learning_rate": 2.199116801794435e-05,
      "loss": 0.8842,
      "step": 517150
    },
    {
      "epoch": 1.8125168666126936,
      "grad_norm": 2.578125,
      "learning_rate": 2.1990518989280643e-05,
      "loss": 0.7589,
      "step": 517160
    },
    {
      "epoch": 1.8125519141195892,
      "grad_norm": 2.6875,
      "learning_rate": 2.198986996061694e-05,
      "loss": 0.8134,
      "step": 517170
    },
    {
      "epoch": 1.8125869616264847,
      "grad_norm": 2.859375,
      "learning_rate": 2.198922093195324e-05,
      "loss": 0.8688,
      "step": 517180
    },
    {
      "epoch": 1.8126220091333805,
      "grad_norm": 2.8125,
      "learning_rate": 2.1988571903289537e-05,
      "loss": 0.8631,
      "step": 517190
    },
    {
      "epoch": 1.8126570566402758,
      "grad_norm": 3.140625,
      "learning_rate": 2.1987922874625835e-05,
      "loss": 0.8424,
      "step": 517200
    },
    {
      "epoch": 1.8126921041471715,
      "grad_norm": 2.421875,
      "learning_rate": 2.1987273845962133e-05,
      "loss": 0.8576,
      "step": 517210
    },
    {
      "epoch": 1.812727151654067,
      "grad_norm": 2.8125,
      "learning_rate": 2.198662481729843e-05,
      "loss": 0.8248,
      "step": 517220
    },
    {
      "epoch": 1.8127621991609626,
      "grad_norm": 2.9375,
      "learning_rate": 2.198597578863473e-05,
      "loss": 0.8982,
      "step": 517230
    },
    {
      "epoch": 1.8127972466678584,
      "grad_norm": 3.1875,
      "learning_rate": 2.1985326759971027e-05,
      "loss": 0.817,
      "step": 517240
    },
    {
      "epoch": 1.8128322941747539,
      "grad_norm": 3.03125,
      "learning_rate": 2.1984677731307325e-05,
      "loss": 0.8174,
      "step": 517250
    },
    {
      "epoch": 1.8128673416816494,
      "grad_norm": 2.640625,
      "learning_rate": 2.1984028702643623e-05,
      "loss": 0.84,
      "step": 517260
    },
    {
      "epoch": 1.8129023891885452,
      "grad_norm": 2.96875,
      "learning_rate": 2.1983379673979925e-05,
      "loss": 0.8812,
      "step": 517270
    },
    {
      "epoch": 1.8129374366954407,
      "grad_norm": 3.046875,
      "learning_rate": 2.1982730645316223e-05,
      "loss": 0.758,
      "step": 517280
    },
    {
      "epoch": 1.8129724842023363,
      "grad_norm": 2.953125,
      "learning_rate": 2.198208161665252e-05,
      "loss": 0.7544,
      "step": 517290
    },
    {
      "epoch": 1.813007531709232,
      "grad_norm": 3.109375,
      "learning_rate": 2.198143258798882e-05,
      "loss": 0.8373,
      "step": 517300
    },
    {
      "epoch": 1.8130425792161273,
      "grad_norm": 3.09375,
      "learning_rate": 2.1980783559325117e-05,
      "loss": 0.8323,
      "step": 517310
    },
    {
      "epoch": 1.813077626723023,
      "grad_norm": 3.03125,
      "learning_rate": 2.1980134530661415e-05,
      "loss": 0.8767,
      "step": 517320
    },
    {
      "epoch": 1.8131126742299186,
      "grad_norm": 3.234375,
      "learning_rate": 2.1979485501997713e-05,
      "loss": 1.0127,
      "step": 517330
    },
    {
      "epoch": 1.8131477217368142,
      "grad_norm": 3.265625,
      "learning_rate": 2.197883647333401e-05,
      "loss": 0.8534,
      "step": 517340
    },
    {
      "epoch": 1.81318276924371,
      "grad_norm": 3.1875,
      "learning_rate": 2.1978187444670305e-05,
      "loss": 0.8251,
      "step": 517350
    },
    {
      "epoch": 1.8132178167506054,
      "grad_norm": 3.125,
      "learning_rate": 2.1977538416006603e-05,
      "loss": 0.8174,
      "step": 517360
    },
    {
      "epoch": 1.813252864257501,
      "grad_norm": 3.078125,
      "learning_rate": 2.19768893873429e-05,
      "loss": 0.8221,
      "step": 517370
    },
    {
      "epoch": 1.8132879117643967,
      "grad_norm": 2.84375,
      "learning_rate": 2.1976240358679203e-05,
      "loss": 0.8062,
      "step": 517380
    },
    {
      "epoch": 1.8133229592712923,
      "grad_norm": 2.984375,
      "learning_rate": 2.19755913300155e-05,
      "loss": 0.772,
      "step": 517390
    },
    {
      "epoch": 1.8133580067781878,
      "grad_norm": 3.046875,
      "learning_rate": 2.19749423013518e-05,
      "loss": 0.7623,
      "step": 517400
    },
    {
      "epoch": 1.8133930542850836,
      "grad_norm": 2.734375,
      "learning_rate": 2.1974293272688097e-05,
      "loss": 0.7638,
      "step": 517410
    },
    {
      "epoch": 1.8134281017919789,
      "grad_norm": 2.578125,
      "learning_rate": 2.1973644244024395e-05,
      "loss": 0.7802,
      "step": 517420
    },
    {
      "epoch": 1.8134631492988746,
      "grad_norm": 2.8125,
      "learning_rate": 2.1972995215360693e-05,
      "loss": 0.8237,
      "step": 517430
    },
    {
      "epoch": 1.8134981968057702,
      "grad_norm": 2.875,
      "learning_rate": 2.197234618669699e-05,
      "loss": 0.8282,
      "step": 517440
    },
    {
      "epoch": 1.8135332443126657,
      "grad_norm": 3.25,
      "learning_rate": 2.197169715803329e-05,
      "loss": 0.8769,
      "step": 517450
    },
    {
      "epoch": 1.8135682918195615,
      "grad_norm": 2.703125,
      "learning_rate": 2.1971048129369587e-05,
      "loss": 0.8179,
      "step": 517460
    },
    {
      "epoch": 1.813603339326457,
      "grad_norm": 2.59375,
      "learning_rate": 2.1970399100705885e-05,
      "loss": 0.8306,
      "step": 517470
    },
    {
      "epoch": 1.8136383868333525,
      "grad_norm": 2.4375,
      "learning_rate": 2.1969750072042183e-05,
      "loss": 0.8246,
      "step": 517480
    },
    {
      "epoch": 1.8136734343402483,
      "grad_norm": 2.875,
      "learning_rate": 2.196910104337848e-05,
      "loss": 0.7893,
      "step": 517490
    },
    {
      "epoch": 1.8137084818471438,
      "grad_norm": 2.828125,
      "learning_rate": 2.196845201471478e-05,
      "loss": 0.8172,
      "step": 517500
    },
    {
      "epoch": 1.8137435293540394,
      "grad_norm": 2.84375,
      "learning_rate": 2.1967802986051077e-05,
      "loss": 0.7874,
      "step": 517510
    },
    {
      "epoch": 1.8137785768609351,
      "grad_norm": 2.96875,
      "learning_rate": 2.1967153957387378e-05,
      "loss": 0.7757,
      "step": 517520
    },
    {
      "epoch": 1.8138136243678304,
      "grad_norm": 3.21875,
      "learning_rate": 2.1966504928723673e-05,
      "loss": 0.8034,
      "step": 517530
    },
    {
      "epoch": 1.8138486718747262,
      "grad_norm": 3.109375,
      "learning_rate": 2.196585590005997e-05,
      "loss": 0.8879,
      "step": 517540
    },
    {
      "epoch": 1.8138837193816217,
      "grad_norm": 3.15625,
      "learning_rate": 2.196520687139627e-05,
      "loss": 0.8563,
      "step": 517550
    },
    {
      "epoch": 1.8139187668885173,
      "grad_norm": 2.546875,
      "learning_rate": 2.1964557842732567e-05,
      "loss": 0.8669,
      "step": 517560
    },
    {
      "epoch": 1.813953814395413,
      "grad_norm": 2.5625,
      "learning_rate": 2.1963908814068865e-05,
      "loss": 0.7343,
      "step": 517570
    },
    {
      "epoch": 1.8139888619023086,
      "grad_norm": 2.59375,
      "learning_rate": 2.1963259785405163e-05,
      "loss": 0.7973,
      "step": 517580
    },
    {
      "epoch": 1.814023909409204,
      "grad_norm": 3.390625,
      "learning_rate": 2.196261075674146e-05,
      "loss": 0.8696,
      "step": 517590
    },
    {
      "epoch": 1.8140589569160999,
      "grad_norm": 2.40625,
      "learning_rate": 2.196196172807776e-05,
      "loss": 0.8089,
      "step": 517600
    },
    {
      "epoch": 1.8140940044229954,
      "grad_norm": 2.921875,
      "learning_rate": 2.1961312699414057e-05,
      "loss": 0.7829,
      "step": 517610
    },
    {
      "epoch": 1.814129051929891,
      "grad_norm": 2.625,
      "learning_rate": 2.1960663670750355e-05,
      "loss": 0.7622,
      "step": 517620
    },
    {
      "epoch": 1.8141640994367867,
      "grad_norm": 2.53125,
      "learning_rate": 2.1960014642086656e-05,
      "loss": 0.8005,
      "step": 517630
    },
    {
      "epoch": 1.814199146943682,
      "grad_norm": 2.953125,
      "learning_rate": 2.1959365613422954e-05,
      "loss": 0.8716,
      "step": 517640
    },
    {
      "epoch": 1.8142341944505778,
      "grad_norm": 2.546875,
      "learning_rate": 2.1958716584759252e-05,
      "loss": 0.7722,
      "step": 517650
    },
    {
      "epoch": 1.8142692419574733,
      "grad_norm": 2.953125,
      "learning_rate": 2.195806755609555e-05,
      "loss": 0.7263,
      "step": 517660
    },
    {
      "epoch": 1.8143042894643688,
      "grad_norm": 2.515625,
      "learning_rate": 2.1957418527431848e-05,
      "loss": 0.7801,
      "step": 517670
    },
    {
      "epoch": 1.8143393369712646,
      "grad_norm": 3.578125,
      "learning_rate": 2.1956769498768146e-05,
      "loss": 0.8763,
      "step": 517680
    },
    {
      "epoch": 1.8143743844781601,
      "grad_norm": 3.171875,
      "learning_rate": 2.1956120470104444e-05,
      "loss": 0.9489,
      "step": 517690
    },
    {
      "epoch": 1.8144094319850557,
      "grad_norm": 3.0,
      "learning_rate": 2.1955471441440742e-05,
      "loss": 0.7954,
      "step": 517700
    },
    {
      "epoch": 1.8144444794919514,
      "grad_norm": 3.046875,
      "learning_rate": 2.195482241277704e-05,
      "loss": 0.8729,
      "step": 517710
    },
    {
      "epoch": 1.814479526998847,
      "grad_norm": 3.078125,
      "learning_rate": 2.1954173384113335e-05,
      "loss": 0.8868,
      "step": 517720
    },
    {
      "epoch": 1.8145145745057425,
      "grad_norm": 2.84375,
      "learning_rate": 2.1953524355449633e-05,
      "loss": 0.9489,
      "step": 517730
    },
    {
      "epoch": 1.8145496220126383,
      "grad_norm": 2.59375,
      "learning_rate": 2.195287532678593e-05,
      "loss": 0.8221,
      "step": 517740
    },
    {
      "epoch": 1.8145846695195336,
      "grad_norm": 3.09375,
      "learning_rate": 2.1952226298122232e-05,
      "loss": 0.8469,
      "step": 517750
    },
    {
      "epoch": 1.8146197170264293,
      "grad_norm": 2.828125,
      "learning_rate": 2.195157726945853e-05,
      "loss": 0.8308,
      "step": 517760
    },
    {
      "epoch": 1.814654764533325,
      "grad_norm": 2.5625,
      "learning_rate": 2.1950928240794828e-05,
      "loss": 0.8544,
      "step": 517770
    },
    {
      "epoch": 1.8146898120402204,
      "grad_norm": 3.0625,
      "learning_rate": 2.1950279212131126e-05,
      "loss": 0.8679,
      "step": 517780
    },
    {
      "epoch": 1.8147248595471162,
      "grad_norm": 3.0625,
      "learning_rate": 2.1949630183467424e-05,
      "loss": 0.8736,
      "step": 517790
    },
    {
      "epoch": 1.8147599070540117,
      "grad_norm": 2.65625,
      "learning_rate": 2.1948981154803722e-05,
      "loss": 0.7437,
      "step": 517800
    },
    {
      "epoch": 1.8147949545609072,
      "grad_norm": 2.828125,
      "learning_rate": 2.194833212614002e-05,
      "loss": 0.8892,
      "step": 517810
    },
    {
      "epoch": 1.814830002067803,
      "grad_norm": 3.0625,
      "learning_rate": 2.1947683097476318e-05,
      "loss": 0.8353,
      "step": 517820
    },
    {
      "epoch": 1.8148650495746985,
      "grad_norm": 2.71875,
      "learning_rate": 2.1947034068812616e-05,
      "loss": 0.7719,
      "step": 517830
    },
    {
      "epoch": 1.814900097081594,
      "grad_norm": 3.390625,
      "learning_rate": 2.1946385040148914e-05,
      "loss": 0.761,
      "step": 517840
    },
    {
      "epoch": 1.8149351445884898,
      "grad_norm": 2.5,
      "learning_rate": 2.1945736011485212e-05,
      "loss": 0.8492,
      "step": 517850
    },
    {
      "epoch": 1.8149701920953853,
      "grad_norm": 3.09375,
      "learning_rate": 2.194508698282151e-05,
      "loss": 0.9345,
      "step": 517860
    },
    {
      "epoch": 1.8150052396022809,
      "grad_norm": 2.984375,
      "learning_rate": 2.1944437954157808e-05,
      "loss": 0.756,
      "step": 517870
    },
    {
      "epoch": 1.8150402871091766,
      "grad_norm": 3.421875,
      "learning_rate": 2.1943788925494106e-05,
      "loss": 0.8489,
      "step": 517880
    },
    {
      "epoch": 1.815075334616072,
      "grad_norm": 3.34375,
      "learning_rate": 2.1943139896830407e-05,
      "loss": 0.9629,
      "step": 517890
    },
    {
      "epoch": 1.8151103821229677,
      "grad_norm": 3.546875,
      "learning_rate": 2.1942490868166705e-05,
      "loss": 0.7016,
      "step": 517900
    },
    {
      "epoch": 1.8151454296298632,
      "grad_norm": 2.921875,
      "learning_rate": 2.1941841839503e-05,
      "loss": 0.8183,
      "step": 517910
    },
    {
      "epoch": 1.8151804771367588,
      "grad_norm": 3.171875,
      "learning_rate": 2.1941192810839298e-05,
      "loss": 0.8018,
      "step": 517920
    },
    {
      "epoch": 1.8152155246436545,
      "grad_norm": 2.875,
      "learning_rate": 2.1940543782175596e-05,
      "loss": 0.8067,
      "step": 517930
    },
    {
      "epoch": 1.81525057215055,
      "grad_norm": 3.0,
      "learning_rate": 2.1939894753511894e-05,
      "loss": 0.7566,
      "step": 517940
    },
    {
      "epoch": 1.8152856196574456,
      "grad_norm": 3.28125,
      "learning_rate": 2.1939245724848192e-05,
      "loss": 0.8499,
      "step": 517950
    },
    {
      "epoch": 1.8153206671643414,
      "grad_norm": 2.328125,
      "learning_rate": 2.193859669618449e-05,
      "loss": 0.8475,
      "step": 517960
    },
    {
      "epoch": 1.815355714671237,
      "grad_norm": 2.9375,
      "learning_rate": 2.1937947667520788e-05,
      "loss": 0.8682,
      "step": 517970
    },
    {
      "epoch": 1.8153907621781324,
      "grad_norm": 3.109375,
      "learning_rate": 2.1937298638857086e-05,
      "loss": 0.795,
      "step": 517980
    },
    {
      "epoch": 1.8154258096850282,
      "grad_norm": 2.90625,
      "learning_rate": 2.1936649610193384e-05,
      "loss": 0.8049,
      "step": 517990
    },
    {
      "epoch": 1.8154608571919235,
      "grad_norm": 3.28125,
      "learning_rate": 2.1936000581529685e-05,
      "loss": 0.8743,
      "step": 518000
    },
    {
      "epoch": 1.8154959046988193,
      "grad_norm": 2.390625,
      "learning_rate": 2.1935351552865983e-05,
      "loss": 0.8598,
      "step": 518010
    },
    {
      "epoch": 1.8155309522057148,
      "grad_norm": 2.875,
      "learning_rate": 2.193470252420228e-05,
      "loss": 0.9138,
      "step": 518020
    },
    {
      "epoch": 1.8155659997126103,
      "grad_norm": 2.328125,
      "learning_rate": 2.193405349553858e-05,
      "loss": 0.7639,
      "step": 518030
    },
    {
      "epoch": 1.815601047219506,
      "grad_norm": 2.921875,
      "learning_rate": 2.1933404466874877e-05,
      "loss": 0.8491,
      "step": 518040
    },
    {
      "epoch": 1.8156360947264016,
      "grad_norm": 2.65625,
      "learning_rate": 2.1932755438211175e-05,
      "loss": 0.7848,
      "step": 518050
    },
    {
      "epoch": 1.8156711422332972,
      "grad_norm": 2.609375,
      "learning_rate": 2.1932106409547473e-05,
      "loss": 0.7662,
      "step": 518060
    },
    {
      "epoch": 1.815706189740193,
      "grad_norm": 2.71875,
      "learning_rate": 2.193145738088377e-05,
      "loss": 0.8026,
      "step": 518070
    },
    {
      "epoch": 1.8157412372470885,
      "grad_norm": 2.890625,
      "learning_rate": 2.193080835222007e-05,
      "loss": 0.8486,
      "step": 518080
    },
    {
      "epoch": 1.815776284753984,
      "grad_norm": 2.75,
      "learning_rate": 2.1930159323556367e-05,
      "loss": 0.8214,
      "step": 518090
    },
    {
      "epoch": 1.8158113322608798,
      "grad_norm": 2.734375,
      "learning_rate": 2.1929510294892662e-05,
      "loss": 0.817,
      "step": 518100
    },
    {
      "epoch": 1.815846379767775,
      "grad_norm": 3.328125,
      "learning_rate": 2.1928861266228963e-05,
      "loss": 0.8851,
      "step": 518110
    },
    {
      "epoch": 1.8158814272746708,
      "grad_norm": 2.890625,
      "learning_rate": 2.192821223756526e-05,
      "loss": 0.8648,
      "step": 518120
    },
    {
      "epoch": 1.8159164747815664,
      "grad_norm": 2.671875,
      "learning_rate": 2.192756320890156e-05,
      "loss": 0.7877,
      "step": 518130
    },
    {
      "epoch": 1.815951522288462,
      "grad_norm": 2.921875,
      "learning_rate": 2.1926914180237857e-05,
      "loss": 0.7879,
      "step": 518140
    },
    {
      "epoch": 1.8159865697953577,
      "grad_norm": 2.890625,
      "learning_rate": 2.1926265151574155e-05,
      "loss": 0.8824,
      "step": 518150
    },
    {
      "epoch": 1.8160216173022532,
      "grad_norm": 2.765625,
      "learning_rate": 2.1925616122910453e-05,
      "loss": 0.7537,
      "step": 518160
    },
    {
      "epoch": 1.8160566648091487,
      "grad_norm": 2.421875,
      "learning_rate": 2.192496709424675e-05,
      "loss": 0.8305,
      "step": 518170
    },
    {
      "epoch": 1.8160917123160445,
      "grad_norm": 2.796875,
      "learning_rate": 2.192431806558305e-05,
      "loss": 0.7776,
      "step": 518180
    },
    {
      "epoch": 1.81612675982294,
      "grad_norm": 2.703125,
      "learning_rate": 2.1923669036919347e-05,
      "loss": 0.8,
      "step": 518190
    },
    {
      "epoch": 1.8161618073298356,
      "grad_norm": 2.109375,
      "learning_rate": 2.1923020008255645e-05,
      "loss": 0.8323,
      "step": 518200
    },
    {
      "epoch": 1.8161968548367313,
      "grad_norm": 2.9375,
      "learning_rate": 2.1922370979591943e-05,
      "loss": 0.8178,
      "step": 518210
    },
    {
      "epoch": 1.8162319023436266,
      "grad_norm": 2.625,
      "learning_rate": 2.192172195092824e-05,
      "loss": 0.8258,
      "step": 518220
    },
    {
      "epoch": 1.8162669498505224,
      "grad_norm": 2.53125,
      "learning_rate": 2.192107292226454e-05,
      "loss": 0.7487,
      "step": 518230
    },
    {
      "epoch": 1.816301997357418,
      "grad_norm": 2.734375,
      "learning_rate": 2.1920423893600837e-05,
      "loss": 0.9006,
      "step": 518240
    },
    {
      "epoch": 1.8163370448643135,
      "grad_norm": 3.09375,
      "learning_rate": 2.191977486493714e-05,
      "loss": 0.7723,
      "step": 518250
    },
    {
      "epoch": 1.8163720923712092,
      "grad_norm": 3.140625,
      "learning_rate": 2.1919125836273437e-05,
      "loss": 0.8964,
      "step": 518260
    },
    {
      "epoch": 1.8164071398781048,
      "grad_norm": 2.875,
      "learning_rate": 2.1918476807609735e-05,
      "loss": 0.8325,
      "step": 518270
    },
    {
      "epoch": 1.8164421873850003,
      "grad_norm": 3.21875,
      "learning_rate": 2.1917827778946033e-05,
      "loss": 0.8064,
      "step": 518280
    },
    {
      "epoch": 1.816477234891896,
      "grad_norm": 3.0,
      "learning_rate": 2.1917178750282327e-05,
      "loss": 0.8342,
      "step": 518290
    },
    {
      "epoch": 1.8165122823987916,
      "grad_norm": 2.921875,
      "learning_rate": 2.1916529721618625e-05,
      "loss": 0.8228,
      "step": 518300
    },
    {
      "epoch": 1.8165473299056871,
      "grad_norm": 2.8125,
      "learning_rate": 2.1915880692954923e-05,
      "loss": 0.8898,
      "step": 518310
    },
    {
      "epoch": 1.8165823774125829,
      "grad_norm": 3.328125,
      "learning_rate": 2.191523166429122e-05,
      "loss": 0.8424,
      "step": 518320
    },
    {
      "epoch": 1.8166174249194782,
      "grad_norm": 2.703125,
      "learning_rate": 2.191458263562752e-05,
      "loss": 0.8348,
      "step": 518330
    },
    {
      "epoch": 1.816652472426374,
      "grad_norm": 2.578125,
      "learning_rate": 2.1913933606963817e-05,
      "loss": 0.8289,
      "step": 518340
    },
    {
      "epoch": 1.8166875199332697,
      "grad_norm": 2.921875,
      "learning_rate": 2.1913284578300115e-05,
      "loss": 0.8345,
      "step": 518350
    },
    {
      "epoch": 1.816722567440165,
      "grad_norm": 3.0625,
      "learning_rate": 2.1912635549636413e-05,
      "loss": 0.8422,
      "step": 518360
    },
    {
      "epoch": 1.8167576149470608,
      "grad_norm": 3.328125,
      "learning_rate": 2.1911986520972715e-05,
      "loss": 0.8731,
      "step": 518370
    },
    {
      "epoch": 1.8167926624539563,
      "grad_norm": 3.03125,
      "learning_rate": 2.1911337492309013e-05,
      "loss": 0.8381,
      "step": 518380
    },
    {
      "epoch": 1.8168277099608519,
      "grad_norm": 2.828125,
      "learning_rate": 2.191068846364531e-05,
      "loss": 0.7764,
      "step": 518390
    },
    {
      "epoch": 1.8168627574677476,
      "grad_norm": 2.890625,
      "learning_rate": 2.191003943498161e-05,
      "loss": 0.825,
      "step": 518400
    },
    {
      "epoch": 1.8168978049746431,
      "grad_norm": 2.890625,
      "learning_rate": 2.1909390406317907e-05,
      "loss": 0.7511,
      "step": 518410
    },
    {
      "epoch": 1.8169328524815387,
      "grad_norm": 3.015625,
      "learning_rate": 2.1908741377654205e-05,
      "loss": 0.8219,
      "step": 518420
    },
    {
      "epoch": 1.8169678999884344,
      "grad_norm": 3.21875,
      "learning_rate": 2.1908092348990503e-05,
      "loss": 0.8386,
      "step": 518430
    },
    {
      "epoch": 1.8170029474953298,
      "grad_norm": 3.203125,
      "learning_rate": 2.19074433203268e-05,
      "loss": 0.7652,
      "step": 518440
    },
    {
      "epoch": 1.8170379950022255,
      "grad_norm": 3.15625,
      "learning_rate": 2.19067942916631e-05,
      "loss": 0.8393,
      "step": 518450
    },
    {
      "epoch": 1.8170730425091213,
      "grad_norm": 3.03125,
      "learning_rate": 2.1906145262999397e-05,
      "loss": 0.8233,
      "step": 518460
    },
    {
      "epoch": 1.8171080900160166,
      "grad_norm": 3.03125,
      "learning_rate": 2.190549623433569e-05,
      "loss": 0.753,
      "step": 518470
    },
    {
      "epoch": 1.8171431375229123,
      "grad_norm": 2.859375,
      "learning_rate": 2.1904847205671993e-05,
      "loss": 0.8616,
      "step": 518480
    },
    {
      "epoch": 1.8171781850298079,
      "grad_norm": 2.921875,
      "learning_rate": 2.190419817700829e-05,
      "loss": 0.806,
      "step": 518490
    },
    {
      "epoch": 1.8172132325367034,
      "grad_norm": 2.8125,
      "learning_rate": 2.190354914834459e-05,
      "loss": 0.8014,
      "step": 518500
    },
    {
      "epoch": 1.8172482800435992,
      "grad_norm": 3.109375,
      "learning_rate": 2.1902900119680887e-05,
      "loss": 0.7781,
      "step": 518510
    },
    {
      "epoch": 1.8172833275504947,
      "grad_norm": 2.953125,
      "learning_rate": 2.1902251091017185e-05,
      "loss": 0.8979,
      "step": 518520
    },
    {
      "epoch": 1.8173183750573902,
      "grad_norm": 3.578125,
      "learning_rate": 2.1901602062353483e-05,
      "loss": 0.8414,
      "step": 518530
    },
    {
      "epoch": 1.817353422564286,
      "grad_norm": 2.640625,
      "learning_rate": 2.190095303368978e-05,
      "loss": 0.7762,
      "step": 518540
    },
    {
      "epoch": 1.8173884700711815,
      "grad_norm": 2.6875,
      "learning_rate": 2.190030400502608e-05,
      "loss": 0.754,
      "step": 518550
    },
    {
      "epoch": 1.817423517578077,
      "grad_norm": 2.734375,
      "learning_rate": 2.1899654976362377e-05,
      "loss": 0.8616,
      "step": 518560
    },
    {
      "epoch": 1.8174585650849728,
      "grad_norm": 3.03125,
      "learning_rate": 2.1899005947698675e-05,
      "loss": 0.8393,
      "step": 518570
    },
    {
      "epoch": 1.8174936125918681,
      "grad_norm": 2.8125,
      "learning_rate": 2.1898356919034973e-05,
      "loss": 0.7919,
      "step": 518580
    },
    {
      "epoch": 1.817528660098764,
      "grad_norm": 3.09375,
      "learning_rate": 2.189770789037127e-05,
      "loss": 0.8411,
      "step": 518590
    },
    {
      "epoch": 1.8175637076056594,
      "grad_norm": 2.796875,
      "learning_rate": 2.189705886170757e-05,
      "loss": 0.7394,
      "step": 518600
    },
    {
      "epoch": 1.817598755112555,
      "grad_norm": 2.875,
      "learning_rate": 2.1896409833043867e-05,
      "loss": 0.8306,
      "step": 518610
    },
    {
      "epoch": 1.8176338026194507,
      "grad_norm": 2.8125,
      "learning_rate": 2.1895760804380168e-05,
      "loss": 0.9167,
      "step": 518620
    },
    {
      "epoch": 1.8176688501263463,
      "grad_norm": 3.078125,
      "learning_rate": 2.1895111775716466e-05,
      "loss": 0.8525,
      "step": 518630
    },
    {
      "epoch": 1.8177038976332418,
      "grad_norm": 3.125,
      "learning_rate": 2.1894462747052764e-05,
      "loss": 0.7561,
      "step": 518640
    },
    {
      "epoch": 1.8177389451401376,
      "grad_norm": 3.359375,
      "learning_rate": 2.1893813718389062e-05,
      "loss": 0.929,
      "step": 518650
    },
    {
      "epoch": 1.817773992647033,
      "grad_norm": 2.671875,
      "learning_rate": 2.1893164689725357e-05,
      "loss": 0.8142,
      "step": 518660
    },
    {
      "epoch": 1.8178090401539286,
      "grad_norm": 3.21875,
      "learning_rate": 2.1892515661061655e-05,
      "loss": 0.8597,
      "step": 518670
    },
    {
      "epoch": 1.8178440876608244,
      "grad_norm": 3.1875,
      "learning_rate": 2.1891866632397953e-05,
      "loss": 0.8812,
      "step": 518680
    },
    {
      "epoch": 1.8178791351677197,
      "grad_norm": 3.4375,
      "learning_rate": 2.189121760373425e-05,
      "loss": 0.8868,
      "step": 518690
    },
    {
      "epoch": 1.8179141826746155,
      "grad_norm": 3.40625,
      "learning_rate": 2.189056857507055e-05,
      "loss": 0.8066,
      "step": 518700
    },
    {
      "epoch": 1.817949230181511,
      "grad_norm": 2.765625,
      "learning_rate": 2.1889919546406847e-05,
      "loss": 0.8405,
      "step": 518710
    },
    {
      "epoch": 1.8179842776884065,
      "grad_norm": 3.484375,
      "learning_rate": 2.1889270517743145e-05,
      "loss": 0.75,
      "step": 518720
    },
    {
      "epoch": 1.8180193251953023,
      "grad_norm": 2.59375,
      "learning_rate": 2.1888621489079446e-05,
      "loss": 0.9191,
      "step": 518730
    },
    {
      "epoch": 1.8180543727021978,
      "grad_norm": 2.890625,
      "learning_rate": 2.1887972460415744e-05,
      "loss": 0.76,
      "step": 518740
    },
    {
      "epoch": 1.8180894202090934,
      "grad_norm": 2.671875,
      "learning_rate": 2.1887323431752042e-05,
      "loss": 0.7478,
      "step": 518750
    },
    {
      "epoch": 1.8181244677159891,
      "grad_norm": 2.828125,
      "learning_rate": 2.188667440308834e-05,
      "loss": 0.7413,
      "step": 518760
    },
    {
      "epoch": 1.8181595152228847,
      "grad_norm": 2.890625,
      "learning_rate": 2.1886025374424638e-05,
      "loss": 0.8026,
      "step": 518770
    },
    {
      "epoch": 1.8181945627297802,
      "grad_norm": 2.53125,
      "learning_rate": 2.1885376345760936e-05,
      "loss": 0.8126,
      "step": 518780
    },
    {
      "epoch": 1.818229610236676,
      "grad_norm": 3.046875,
      "learning_rate": 2.1884727317097234e-05,
      "loss": 0.845,
      "step": 518790
    },
    {
      "epoch": 1.8182646577435713,
      "grad_norm": 2.875,
      "learning_rate": 2.1884078288433532e-05,
      "loss": 0.8997,
      "step": 518800
    },
    {
      "epoch": 1.818299705250467,
      "grad_norm": 2.875,
      "learning_rate": 2.188342925976983e-05,
      "loss": 0.7759,
      "step": 518810
    },
    {
      "epoch": 1.8183347527573626,
      "grad_norm": 2.515625,
      "learning_rate": 2.1882780231106128e-05,
      "loss": 0.7465,
      "step": 518820
    },
    {
      "epoch": 1.818369800264258,
      "grad_norm": 2.9375,
      "learning_rate": 2.1882131202442426e-05,
      "loss": 0.816,
      "step": 518830
    },
    {
      "epoch": 1.8184048477711539,
      "grad_norm": 3.015625,
      "learning_rate": 2.1881482173778724e-05,
      "loss": 0.8838,
      "step": 518840
    },
    {
      "epoch": 1.8184398952780494,
      "grad_norm": 2.71875,
      "learning_rate": 2.1880833145115022e-05,
      "loss": 0.7766,
      "step": 518850
    },
    {
      "epoch": 1.818474942784945,
      "grad_norm": 2.875,
      "learning_rate": 2.188018411645132e-05,
      "loss": 0.7392,
      "step": 518860
    },
    {
      "epoch": 1.8185099902918407,
      "grad_norm": 3.015625,
      "learning_rate": 2.1879535087787618e-05,
      "loss": 0.776,
      "step": 518870
    },
    {
      "epoch": 1.8185450377987362,
      "grad_norm": 2.875,
      "learning_rate": 2.1878886059123916e-05,
      "loss": 0.7811,
      "step": 518880
    },
    {
      "epoch": 1.8185800853056318,
      "grad_norm": 2.953125,
      "learning_rate": 2.1878237030460214e-05,
      "loss": 0.8281,
      "step": 518890
    },
    {
      "epoch": 1.8186151328125275,
      "grad_norm": 2.4375,
      "learning_rate": 2.1877588001796512e-05,
      "loss": 0.8071,
      "step": 518900
    },
    {
      "epoch": 1.8186501803194228,
      "grad_norm": 2.921875,
      "learning_rate": 2.187693897313281e-05,
      "loss": 0.8407,
      "step": 518910
    },
    {
      "epoch": 1.8186852278263186,
      "grad_norm": 2.984375,
      "learning_rate": 2.1876289944469108e-05,
      "loss": 0.8461,
      "step": 518920
    },
    {
      "epoch": 1.8187202753332141,
      "grad_norm": 2.65625,
      "learning_rate": 2.1875640915805406e-05,
      "loss": 0.8028,
      "step": 518930
    },
    {
      "epoch": 1.8187553228401097,
      "grad_norm": 3.125,
      "learning_rate": 2.1874991887141704e-05,
      "loss": 0.8431,
      "step": 518940
    },
    {
      "epoch": 1.8187903703470054,
      "grad_norm": 2.828125,
      "learning_rate": 2.1874342858478002e-05,
      "loss": 0.7512,
      "step": 518950
    },
    {
      "epoch": 1.818825417853901,
      "grad_norm": 2.875,
      "learning_rate": 2.18736938298143e-05,
      "loss": 0.7881,
      "step": 518960
    },
    {
      "epoch": 1.8188604653607965,
      "grad_norm": 2.28125,
      "learning_rate": 2.1873044801150598e-05,
      "loss": 0.7538,
      "step": 518970
    },
    {
      "epoch": 1.8188955128676922,
      "grad_norm": 3.03125,
      "learning_rate": 2.18723957724869e-05,
      "loss": 0.7779,
      "step": 518980
    },
    {
      "epoch": 1.8189305603745878,
      "grad_norm": 2.828125,
      "learning_rate": 2.1871746743823197e-05,
      "loss": 0.8515,
      "step": 518990
    },
    {
      "epoch": 1.8189656078814833,
      "grad_norm": 2.984375,
      "learning_rate": 2.1871097715159495e-05,
      "loss": 0.8139,
      "step": 519000
    },
    {
      "epoch": 1.819000655388379,
      "grad_norm": 3.078125,
      "learning_rate": 2.1870448686495793e-05,
      "loss": 0.8306,
      "step": 519010
    },
    {
      "epoch": 1.8190357028952744,
      "grad_norm": 2.59375,
      "learning_rate": 2.186979965783209e-05,
      "loss": 0.8096,
      "step": 519020
    },
    {
      "epoch": 1.8190707504021701,
      "grad_norm": 3.09375,
      "learning_rate": 2.186915062916839e-05,
      "loss": 0.8553,
      "step": 519030
    },
    {
      "epoch": 1.819105797909066,
      "grad_norm": 3.171875,
      "learning_rate": 2.1868501600504684e-05,
      "loss": 0.9025,
      "step": 519040
    },
    {
      "epoch": 1.8191408454159612,
      "grad_norm": 3.25,
      "learning_rate": 2.1867852571840982e-05,
      "loss": 0.8597,
      "step": 519050
    },
    {
      "epoch": 1.819175892922857,
      "grad_norm": 2.984375,
      "learning_rate": 2.186720354317728e-05,
      "loss": 0.8573,
      "step": 519060
    },
    {
      "epoch": 1.8192109404297525,
      "grad_norm": 2.578125,
      "learning_rate": 2.1866554514513578e-05,
      "loss": 0.7771,
      "step": 519070
    },
    {
      "epoch": 1.819245987936648,
      "grad_norm": 2.546875,
      "learning_rate": 2.1865905485849876e-05,
      "loss": 0.8477,
      "step": 519080
    },
    {
      "epoch": 1.8192810354435438,
      "grad_norm": 2.921875,
      "learning_rate": 2.1865256457186174e-05,
      "loss": 0.8076,
      "step": 519090
    },
    {
      "epoch": 1.8193160829504393,
      "grad_norm": 3.1875,
      "learning_rate": 2.1864607428522475e-05,
      "loss": 0.8609,
      "step": 519100
    },
    {
      "epoch": 1.8193511304573349,
      "grad_norm": 3.046875,
      "learning_rate": 2.1863958399858773e-05,
      "loss": 0.8307,
      "step": 519110
    },
    {
      "epoch": 1.8193861779642306,
      "grad_norm": 2.546875,
      "learning_rate": 2.186330937119507e-05,
      "loss": 0.7829,
      "step": 519120
    },
    {
      "epoch": 1.819421225471126,
      "grad_norm": 2.625,
      "learning_rate": 2.186266034253137e-05,
      "loss": 0.8177,
      "step": 519130
    },
    {
      "epoch": 1.8194562729780217,
      "grad_norm": 2.8125,
      "learning_rate": 2.1862011313867667e-05,
      "loss": 0.8171,
      "step": 519140
    },
    {
      "epoch": 1.8194913204849175,
      "grad_norm": 2.96875,
      "learning_rate": 2.1861362285203965e-05,
      "loss": 0.793,
      "step": 519150
    },
    {
      "epoch": 1.8195263679918128,
      "grad_norm": 3.375,
      "learning_rate": 2.1860713256540263e-05,
      "loss": 0.9252,
      "step": 519160
    },
    {
      "epoch": 1.8195614154987085,
      "grad_norm": 3.1875,
      "learning_rate": 2.186006422787656e-05,
      "loss": 0.8401,
      "step": 519170
    },
    {
      "epoch": 1.819596463005604,
      "grad_norm": 3.453125,
      "learning_rate": 2.185941519921286e-05,
      "loss": 0.9267,
      "step": 519180
    },
    {
      "epoch": 1.8196315105124996,
      "grad_norm": 2.984375,
      "learning_rate": 2.1858766170549157e-05,
      "loss": 0.8591,
      "step": 519190
    },
    {
      "epoch": 1.8196665580193954,
      "grad_norm": 2.921875,
      "learning_rate": 2.1858117141885455e-05,
      "loss": 0.9189,
      "step": 519200
    },
    {
      "epoch": 1.819701605526291,
      "grad_norm": 2.765625,
      "learning_rate": 2.1857468113221753e-05,
      "loss": 0.8641,
      "step": 519210
    },
    {
      "epoch": 1.8197366530331864,
      "grad_norm": 3.1875,
      "learning_rate": 2.185681908455805e-05,
      "loss": 0.7692,
      "step": 519220
    },
    {
      "epoch": 1.8197717005400822,
      "grad_norm": 3.0625,
      "learning_rate": 2.185617005589435e-05,
      "loss": 0.8427,
      "step": 519230
    },
    {
      "epoch": 1.8198067480469777,
      "grad_norm": 3.09375,
      "learning_rate": 2.1855521027230647e-05,
      "loss": 0.8749,
      "step": 519240
    },
    {
      "epoch": 1.8198417955538733,
      "grad_norm": 3.0625,
      "learning_rate": 2.1854871998566945e-05,
      "loss": 0.8714,
      "step": 519250
    },
    {
      "epoch": 1.819876843060769,
      "grad_norm": 3.03125,
      "learning_rate": 2.1854222969903243e-05,
      "loss": 0.7628,
      "step": 519260
    },
    {
      "epoch": 1.8199118905676643,
      "grad_norm": 2.84375,
      "learning_rate": 2.185357394123954e-05,
      "loss": 0.8148,
      "step": 519270
    },
    {
      "epoch": 1.81994693807456,
      "grad_norm": 2.796875,
      "learning_rate": 2.185292491257584e-05,
      "loss": 0.8412,
      "step": 519280
    },
    {
      "epoch": 1.8199819855814556,
      "grad_norm": 2.890625,
      "learning_rate": 2.1852275883912137e-05,
      "loss": 0.8189,
      "step": 519290
    },
    {
      "epoch": 1.8200170330883512,
      "grad_norm": 2.765625,
      "learning_rate": 2.1851626855248435e-05,
      "loss": 0.8113,
      "step": 519300
    },
    {
      "epoch": 1.820052080595247,
      "grad_norm": 2.6875,
      "learning_rate": 2.1850977826584733e-05,
      "loss": 0.8791,
      "step": 519310
    },
    {
      "epoch": 1.8200871281021425,
      "grad_norm": 2.75,
      "learning_rate": 2.185032879792103e-05,
      "loss": 0.7442,
      "step": 519320
    },
    {
      "epoch": 1.820122175609038,
      "grad_norm": 2.96875,
      "learning_rate": 2.184967976925733e-05,
      "loss": 0.809,
      "step": 519330
    },
    {
      "epoch": 1.8201572231159338,
      "grad_norm": 3.25,
      "learning_rate": 2.1849030740593627e-05,
      "loss": 0.899,
      "step": 519340
    },
    {
      "epoch": 1.8201922706228293,
      "grad_norm": 2.5,
      "learning_rate": 2.184838171192993e-05,
      "loss": 0.7767,
      "step": 519350
    },
    {
      "epoch": 1.8202273181297248,
      "grad_norm": 2.375,
      "learning_rate": 2.1847732683266227e-05,
      "loss": 0.7832,
      "step": 519360
    },
    {
      "epoch": 1.8202623656366206,
      "grad_norm": 2.84375,
      "learning_rate": 2.1847083654602525e-05,
      "loss": 0.868,
      "step": 519370
    },
    {
      "epoch": 1.820297413143516,
      "grad_norm": 3.09375,
      "learning_rate": 2.1846434625938823e-05,
      "loss": 0.7553,
      "step": 519380
    },
    {
      "epoch": 1.8203324606504117,
      "grad_norm": 3.390625,
      "learning_rate": 2.184578559727512e-05,
      "loss": 0.882,
      "step": 519390
    },
    {
      "epoch": 1.8203675081573072,
      "grad_norm": 3.171875,
      "learning_rate": 2.184513656861142e-05,
      "loss": 0.8235,
      "step": 519400
    },
    {
      "epoch": 1.8204025556642027,
      "grad_norm": 2.96875,
      "learning_rate": 2.1844487539947717e-05,
      "loss": 0.8802,
      "step": 519410
    },
    {
      "epoch": 1.8204376031710985,
      "grad_norm": 2.78125,
      "learning_rate": 2.184383851128401e-05,
      "loss": 0.8695,
      "step": 519420
    },
    {
      "epoch": 1.820472650677994,
      "grad_norm": 3.125,
      "learning_rate": 2.184318948262031e-05,
      "loss": 0.8148,
      "step": 519430
    },
    {
      "epoch": 1.8205076981848896,
      "grad_norm": 2.890625,
      "learning_rate": 2.1842540453956607e-05,
      "loss": 0.7695,
      "step": 519440
    },
    {
      "epoch": 1.8205427456917853,
      "grad_norm": 2.890625,
      "learning_rate": 2.1841891425292905e-05,
      "loss": 0.93,
      "step": 519450
    },
    {
      "epoch": 1.8205777931986808,
      "grad_norm": 3.5625,
      "learning_rate": 2.1841242396629203e-05,
      "loss": 0.7985,
      "step": 519460
    },
    {
      "epoch": 1.8206128407055764,
      "grad_norm": 3.0,
      "learning_rate": 2.1840593367965505e-05,
      "loss": 0.8406,
      "step": 519470
    },
    {
      "epoch": 1.8206478882124721,
      "grad_norm": 3.140625,
      "learning_rate": 2.1839944339301803e-05,
      "loss": 0.811,
      "step": 519480
    },
    {
      "epoch": 1.8206829357193675,
      "grad_norm": 2.875,
      "learning_rate": 2.18392953106381e-05,
      "loss": 0.8149,
      "step": 519490
    },
    {
      "epoch": 1.8207179832262632,
      "grad_norm": 3.15625,
      "learning_rate": 2.18386462819744e-05,
      "loss": 0.8865,
      "step": 519500
    },
    {
      "epoch": 1.8207530307331588,
      "grad_norm": 2.46875,
      "learning_rate": 2.1837997253310697e-05,
      "loss": 0.8688,
      "step": 519510
    },
    {
      "epoch": 1.8207880782400543,
      "grad_norm": 2.6875,
      "learning_rate": 2.1837348224646995e-05,
      "loss": 0.7523,
      "step": 519520
    },
    {
      "epoch": 1.82082312574695,
      "grad_norm": 2.859375,
      "learning_rate": 2.1836699195983293e-05,
      "loss": 0.8377,
      "step": 519530
    },
    {
      "epoch": 1.8208581732538456,
      "grad_norm": 2.8125,
      "learning_rate": 2.183605016731959e-05,
      "loss": 0.8444,
      "step": 519540
    },
    {
      "epoch": 1.8208932207607411,
      "grad_norm": 2.5,
      "learning_rate": 2.183540113865589e-05,
      "loss": 0.8585,
      "step": 519550
    },
    {
      "epoch": 1.8209282682676369,
      "grad_norm": 2.59375,
      "learning_rate": 2.1834752109992187e-05,
      "loss": 0.8453,
      "step": 519560
    },
    {
      "epoch": 1.8209633157745324,
      "grad_norm": 2.90625,
      "learning_rate": 2.1834103081328485e-05,
      "loss": 0.7892,
      "step": 519570
    },
    {
      "epoch": 1.820998363281428,
      "grad_norm": 2.84375,
      "learning_rate": 2.1833454052664783e-05,
      "loss": 0.8485,
      "step": 519580
    },
    {
      "epoch": 1.8210334107883237,
      "grad_norm": 2.59375,
      "learning_rate": 2.183280502400108e-05,
      "loss": 0.9234,
      "step": 519590
    },
    {
      "epoch": 1.821068458295219,
      "grad_norm": 3.046875,
      "learning_rate": 2.183215599533738e-05,
      "loss": 0.8039,
      "step": 519600
    },
    {
      "epoch": 1.8211035058021148,
      "grad_norm": 2.921875,
      "learning_rate": 2.1831506966673677e-05,
      "loss": 0.7932,
      "step": 519610
    },
    {
      "epoch": 1.8211385533090103,
      "grad_norm": 2.78125,
      "learning_rate": 2.1830857938009975e-05,
      "loss": 0.8182,
      "step": 519620
    },
    {
      "epoch": 1.8211736008159058,
      "grad_norm": 2.578125,
      "learning_rate": 2.1830208909346273e-05,
      "loss": 0.7851,
      "step": 519630
    },
    {
      "epoch": 1.8212086483228016,
      "grad_norm": 2.796875,
      "learning_rate": 2.182955988068257e-05,
      "loss": 0.8149,
      "step": 519640
    },
    {
      "epoch": 1.8212436958296971,
      "grad_norm": 3.0625,
      "learning_rate": 2.182891085201887e-05,
      "loss": 0.7855,
      "step": 519650
    },
    {
      "epoch": 1.8212787433365927,
      "grad_norm": 2.96875,
      "learning_rate": 2.1828261823355167e-05,
      "loss": 0.8427,
      "step": 519660
    },
    {
      "epoch": 1.8213137908434884,
      "grad_norm": 2.296875,
      "learning_rate": 2.1827612794691465e-05,
      "loss": 0.8837,
      "step": 519670
    },
    {
      "epoch": 1.821348838350384,
      "grad_norm": 3.296875,
      "learning_rate": 2.1826963766027763e-05,
      "loss": 0.8272,
      "step": 519680
    },
    {
      "epoch": 1.8213838858572795,
      "grad_norm": 3.15625,
      "learning_rate": 2.182631473736406e-05,
      "loss": 0.8749,
      "step": 519690
    },
    {
      "epoch": 1.8214189333641753,
      "grad_norm": 3.09375,
      "learning_rate": 2.182566570870036e-05,
      "loss": 0.8295,
      "step": 519700
    },
    {
      "epoch": 1.8214539808710706,
      "grad_norm": 3.109375,
      "learning_rate": 2.1825016680036657e-05,
      "loss": 0.8224,
      "step": 519710
    },
    {
      "epoch": 1.8214890283779663,
      "grad_norm": 3.109375,
      "learning_rate": 2.1824367651372958e-05,
      "loss": 0.8581,
      "step": 519720
    },
    {
      "epoch": 1.821524075884862,
      "grad_norm": 3.03125,
      "learning_rate": 2.1823718622709256e-05,
      "loss": 0.8445,
      "step": 519730
    },
    {
      "epoch": 1.8215591233917574,
      "grad_norm": 3.125,
      "learning_rate": 2.1823069594045554e-05,
      "loss": 0.8569,
      "step": 519740
    },
    {
      "epoch": 1.8215941708986532,
      "grad_norm": 2.84375,
      "learning_rate": 2.1822420565381852e-05,
      "loss": 0.8405,
      "step": 519750
    },
    {
      "epoch": 1.8216292184055487,
      "grad_norm": 2.859375,
      "learning_rate": 2.182177153671815e-05,
      "loss": 0.842,
      "step": 519760
    },
    {
      "epoch": 1.8216642659124442,
      "grad_norm": 2.53125,
      "learning_rate": 2.1821122508054448e-05,
      "loss": 0.7282,
      "step": 519770
    },
    {
      "epoch": 1.82169931341934,
      "grad_norm": 3.0625,
      "learning_rate": 2.1820473479390746e-05,
      "loss": 0.7839,
      "step": 519780
    },
    {
      "epoch": 1.8217343609262355,
      "grad_norm": 2.359375,
      "learning_rate": 2.181982445072704e-05,
      "loss": 0.7698,
      "step": 519790
    },
    {
      "epoch": 1.821769408433131,
      "grad_norm": 2.703125,
      "learning_rate": 2.181917542206334e-05,
      "loss": 0.8864,
      "step": 519800
    },
    {
      "epoch": 1.8218044559400268,
      "grad_norm": 3.296875,
      "learning_rate": 2.1818526393399637e-05,
      "loss": 0.8901,
      "step": 519810
    },
    {
      "epoch": 1.8218395034469221,
      "grad_norm": 2.484375,
      "learning_rate": 2.1817877364735935e-05,
      "loss": 0.8071,
      "step": 519820
    },
    {
      "epoch": 1.821874550953818,
      "grad_norm": 3.703125,
      "learning_rate": 2.1817228336072236e-05,
      "loss": 0.8129,
      "step": 519830
    },
    {
      "epoch": 1.8219095984607137,
      "grad_norm": 2.828125,
      "learning_rate": 2.1816579307408534e-05,
      "loss": 0.7898,
      "step": 519840
    },
    {
      "epoch": 1.821944645967609,
      "grad_norm": 3.328125,
      "learning_rate": 2.1815930278744832e-05,
      "loss": 0.7546,
      "step": 519850
    },
    {
      "epoch": 1.8219796934745047,
      "grad_norm": 2.796875,
      "learning_rate": 2.181528125008113e-05,
      "loss": 0.9101,
      "step": 519860
    },
    {
      "epoch": 1.8220147409814003,
      "grad_norm": 2.734375,
      "learning_rate": 2.1814632221417428e-05,
      "loss": 0.8214,
      "step": 519870
    },
    {
      "epoch": 1.8220497884882958,
      "grad_norm": 3.109375,
      "learning_rate": 2.1813983192753726e-05,
      "loss": 0.8031,
      "step": 519880
    },
    {
      "epoch": 1.8220848359951916,
      "grad_norm": 2.921875,
      "learning_rate": 2.1813334164090024e-05,
      "loss": 0.8263,
      "step": 519890
    },
    {
      "epoch": 1.822119883502087,
      "grad_norm": 2.984375,
      "learning_rate": 2.1812685135426322e-05,
      "loss": 0.8123,
      "step": 519900
    },
    {
      "epoch": 1.8221549310089826,
      "grad_norm": 2.984375,
      "learning_rate": 2.181203610676262e-05,
      "loss": 0.8358,
      "step": 519910
    },
    {
      "epoch": 1.8221899785158784,
      "grad_norm": 3.34375,
      "learning_rate": 2.1811387078098918e-05,
      "loss": 0.8442,
      "step": 519920
    },
    {
      "epoch": 1.822225026022774,
      "grad_norm": 2.78125,
      "learning_rate": 2.1810738049435216e-05,
      "loss": 0.8222,
      "step": 519930
    },
    {
      "epoch": 1.8222600735296695,
      "grad_norm": 3.09375,
      "learning_rate": 2.1810089020771514e-05,
      "loss": 0.883,
      "step": 519940
    },
    {
      "epoch": 1.8222951210365652,
      "grad_norm": 3.453125,
      "learning_rate": 2.1809439992107812e-05,
      "loss": 0.793,
      "step": 519950
    },
    {
      "epoch": 1.8223301685434605,
      "grad_norm": 2.796875,
      "learning_rate": 2.180879096344411e-05,
      "loss": 0.8406,
      "step": 519960
    },
    {
      "epoch": 1.8223652160503563,
      "grad_norm": 2.921875,
      "learning_rate": 2.180814193478041e-05,
      "loss": 0.8498,
      "step": 519970
    },
    {
      "epoch": 1.8224002635572518,
      "grad_norm": 2.578125,
      "learning_rate": 2.1807492906116706e-05,
      "loss": 0.7749,
      "step": 519980
    },
    {
      "epoch": 1.8224353110641474,
      "grad_norm": 3.03125,
      "learning_rate": 2.1806843877453004e-05,
      "loss": 0.8392,
      "step": 519990
    },
    {
      "epoch": 1.8224703585710431,
      "grad_norm": 2.609375,
      "learning_rate": 2.1806194848789302e-05,
      "loss": 0.861,
      "step": 520000
    },
    {
      "epoch": 1.8224703585710431,
      "eval_loss": 0.7761481404304504,
      "eval_runtime": 564.3518,
      "eval_samples_per_second": 674.111,
      "eval_steps_per_second": 56.176,
      "step": 520000
    },
    {
      "epoch": 1.8225054060779387,
      "grad_norm": 2.9375,
      "learning_rate": 2.18055458201256e-05,
      "loss": 0.8401,
      "step": 520010
    },
    {
      "epoch": 1.8225404535848342,
      "grad_norm": 2.9375,
      "learning_rate": 2.1804896791461898e-05,
      "loss": 0.8677,
      "step": 520020
    },
    {
      "epoch": 1.82257550109173,
      "grad_norm": 2.640625,
      "learning_rate": 2.1804247762798196e-05,
      "loss": 0.8882,
      "step": 520030
    },
    {
      "epoch": 1.8226105485986255,
      "grad_norm": 2.8125,
      "learning_rate": 2.1803598734134494e-05,
      "loss": 0.8182,
      "step": 520040
    },
    {
      "epoch": 1.822645596105521,
      "grad_norm": 3.390625,
      "learning_rate": 2.1802949705470792e-05,
      "loss": 0.8135,
      "step": 520050
    },
    {
      "epoch": 1.8226806436124168,
      "grad_norm": 2.75,
      "learning_rate": 2.180230067680709e-05,
      "loss": 0.8915,
      "step": 520060
    },
    {
      "epoch": 1.822715691119312,
      "grad_norm": 2.96875,
      "learning_rate": 2.1801651648143388e-05,
      "loss": 0.8577,
      "step": 520070
    },
    {
      "epoch": 1.8227507386262078,
      "grad_norm": 3.140625,
      "learning_rate": 2.180100261947969e-05,
      "loss": 0.8232,
      "step": 520080
    },
    {
      "epoch": 1.8227857861331034,
      "grad_norm": 2.96875,
      "learning_rate": 2.1800353590815988e-05,
      "loss": 0.8563,
      "step": 520090
    },
    {
      "epoch": 1.822820833639999,
      "grad_norm": 3.578125,
      "learning_rate": 2.1799704562152286e-05,
      "loss": 0.819,
      "step": 520100
    },
    {
      "epoch": 1.8228558811468947,
      "grad_norm": 2.828125,
      "learning_rate": 2.1799055533488584e-05,
      "loss": 0.8416,
      "step": 520110
    },
    {
      "epoch": 1.8228909286537902,
      "grad_norm": 2.53125,
      "learning_rate": 2.179840650482488e-05,
      "loss": 0.8204,
      "step": 520120
    },
    {
      "epoch": 1.8229259761606857,
      "grad_norm": 2.78125,
      "learning_rate": 2.179775747616118e-05,
      "loss": 0.7526,
      "step": 520130
    },
    {
      "epoch": 1.8229610236675815,
      "grad_norm": 2.796875,
      "learning_rate": 2.1797108447497478e-05,
      "loss": 0.8423,
      "step": 520140
    },
    {
      "epoch": 1.822996071174477,
      "grad_norm": 2.890625,
      "learning_rate": 2.1796459418833776e-05,
      "loss": 0.8166,
      "step": 520150
    },
    {
      "epoch": 1.8230311186813726,
      "grad_norm": 2.734375,
      "learning_rate": 2.1795810390170074e-05,
      "loss": 0.8112,
      "step": 520160
    },
    {
      "epoch": 1.8230661661882683,
      "grad_norm": 2.65625,
      "learning_rate": 2.1795161361506368e-05,
      "loss": 0.7757,
      "step": 520170
    },
    {
      "epoch": 1.8231012136951636,
      "grad_norm": 2.671875,
      "learning_rate": 2.1794512332842666e-05,
      "loss": 0.7884,
      "step": 520180
    },
    {
      "epoch": 1.8231362612020594,
      "grad_norm": 2.984375,
      "learning_rate": 2.1793863304178964e-05,
      "loss": 0.8432,
      "step": 520190
    },
    {
      "epoch": 1.823171308708955,
      "grad_norm": 3.046875,
      "learning_rate": 2.1793214275515266e-05,
      "loss": 0.7948,
      "step": 520200
    },
    {
      "epoch": 1.8232063562158505,
      "grad_norm": 3.109375,
      "learning_rate": 2.1792565246851564e-05,
      "loss": 0.8066,
      "step": 520210
    },
    {
      "epoch": 1.8232414037227462,
      "grad_norm": 2.9375,
      "learning_rate": 2.179191621818786e-05,
      "loss": 0.861,
      "step": 520220
    },
    {
      "epoch": 1.8232764512296418,
      "grad_norm": 2.71875,
      "learning_rate": 2.179126718952416e-05,
      "loss": 0.7409,
      "step": 520230
    },
    {
      "epoch": 1.8233114987365373,
      "grad_norm": 2.8125,
      "learning_rate": 2.1790618160860458e-05,
      "loss": 0.8496,
      "step": 520240
    },
    {
      "epoch": 1.823346546243433,
      "grad_norm": 3.265625,
      "learning_rate": 2.1789969132196756e-05,
      "loss": 0.8006,
      "step": 520250
    },
    {
      "epoch": 1.8233815937503286,
      "grad_norm": 2.8125,
      "learning_rate": 2.1789320103533054e-05,
      "loss": 0.8434,
      "step": 520260
    },
    {
      "epoch": 1.8234166412572241,
      "grad_norm": 3.34375,
      "learning_rate": 2.178867107486935e-05,
      "loss": 0.9115,
      "step": 520270
    },
    {
      "epoch": 1.82345168876412,
      "grad_norm": 3.421875,
      "learning_rate": 2.178802204620565e-05,
      "loss": 0.9412,
      "step": 520280
    },
    {
      "epoch": 1.8234867362710152,
      "grad_norm": 2.59375,
      "learning_rate": 2.1787373017541948e-05,
      "loss": 0.8275,
      "step": 520290
    },
    {
      "epoch": 1.823521783777911,
      "grad_norm": 2.65625,
      "learning_rate": 2.1786723988878246e-05,
      "loss": 0.8971,
      "step": 520300
    },
    {
      "epoch": 1.8235568312848065,
      "grad_norm": 2.984375,
      "learning_rate": 2.1786074960214544e-05,
      "loss": 0.7688,
      "step": 520310
    },
    {
      "epoch": 1.823591878791702,
      "grad_norm": 3.328125,
      "learning_rate": 2.178542593155084e-05,
      "loss": 0.8367,
      "step": 520320
    },
    {
      "epoch": 1.8236269262985978,
      "grad_norm": 2.84375,
      "learning_rate": 2.178477690288714e-05,
      "loss": 0.8109,
      "step": 520330
    },
    {
      "epoch": 1.8236619738054933,
      "grad_norm": 3.203125,
      "learning_rate": 2.178412787422344e-05,
      "loss": 0.7765,
      "step": 520340
    },
    {
      "epoch": 1.8236970213123889,
      "grad_norm": 3.3125,
      "learning_rate": 2.178347884555974e-05,
      "loss": 0.8748,
      "step": 520350
    },
    {
      "epoch": 1.8237320688192846,
      "grad_norm": 2.84375,
      "learning_rate": 2.1782829816896034e-05,
      "loss": 0.9111,
      "step": 520360
    },
    {
      "epoch": 1.8237671163261802,
      "grad_norm": 3.0,
      "learning_rate": 2.178218078823233e-05,
      "loss": 0.8097,
      "step": 520370
    },
    {
      "epoch": 1.8238021638330757,
      "grad_norm": 2.6875,
      "learning_rate": 2.178153175956863e-05,
      "loss": 0.8651,
      "step": 520380
    },
    {
      "epoch": 1.8238372113399715,
      "grad_norm": 2.875,
      "learning_rate": 2.1780882730904928e-05,
      "loss": 0.7738,
      "step": 520390
    },
    {
      "epoch": 1.8238722588468668,
      "grad_norm": 2.734375,
      "learning_rate": 2.1780233702241226e-05,
      "loss": 0.7085,
      "step": 520400
    },
    {
      "epoch": 1.8239073063537625,
      "grad_norm": 2.953125,
      "learning_rate": 2.1779584673577524e-05,
      "loss": 0.8536,
      "step": 520410
    },
    {
      "epoch": 1.8239423538606583,
      "grad_norm": 2.765625,
      "learning_rate": 2.177893564491382e-05,
      "loss": 0.8017,
      "step": 520420
    },
    {
      "epoch": 1.8239774013675536,
      "grad_norm": 3.15625,
      "learning_rate": 2.177828661625012e-05,
      "loss": 0.8141,
      "step": 520430
    },
    {
      "epoch": 1.8240124488744494,
      "grad_norm": 2.5625,
      "learning_rate": 2.1777637587586418e-05,
      "loss": 0.8835,
      "step": 520440
    },
    {
      "epoch": 1.824047496381345,
      "grad_norm": 2.875,
      "learning_rate": 2.177698855892272e-05,
      "loss": 0.8,
      "step": 520450
    },
    {
      "epoch": 1.8240825438882404,
      "grad_norm": 3.15625,
      "learning_rate": 2.1776339530259017e-05,
      "loss": 0.9268,
      "step": 520460
    },
    {
      "epoch": 1.8241175913951362,
      "grad_norm": 3.140625,
      "learning_rate": 2.1775690501595315e-05,
      "loss": 0.8306,
      "step": 520470
    },
    {
      "epoch": 1.8241526389020317,
      "grad_norm": 3.0625,
      "learning_rate": 2.1775041472931613e-05,
      "loss": 0.8393,
      "step": 520480
    },
    {
      "epoch": 1.8241876864089273,
      "grad_norm": 3.15625,
      "learning_rate": 2.177439244426791e-05,
      "loss": 0.8371,
      "step": 520490
    },
    {
      "epoch": 1.824222733915823,
      "grad_norm": 3.046875,
      "learning_rate": 2.177374341560421e-05,
      "loss": 0.8159,
      "step": 520500
    },
    {
      "epoch": 1.8242577814227185,
      "grad_norm": 3.203125,
      "learning_rate": 2.1773094386940507e-05,
      "loss": 0.8553,
      "step": 520510
    },
    {
      "epoch": 1.824292828929614,
      "grad_norm": 2.703125,
      "learning_rate": 2.1772445358276805e-05,
      "loss": 0.8093,
      "step": 520520
    },
    {
      "epoch": 1.8243278764365098,
      "grad_norm": 2.75,
      "learning_rate": 2.1771796329613103e-05,
      "loss": 0.8054,
      "step": 520530
    },
    {
      "epoch": 1.8243629239434052,
      "grad_norm": 3.265625,
      "learning_rate": 2.1771147300949398e-05,
      "loss": 0.726,
      "step": 520540
    },
    {
      "epoch": 1.824397971450301,
      "grad_norm": 3.125,
      "learning_rate": 2.1770498272285696e-05,
      "loss": 0.7915,
      "step": 520550
    },
    {
      "epoch": 1.8244330189571965,
      "grad_norm": 2.640625,
      "learning_rate": 2.1769849243621997e-05,
      "loss": 0.8903,
      "step": 520560
    },
    {
      "epoch": 1.824468066464092,
      "grad_norm": 2.734375,
      "learning_rate": 2.1769200214958295e-05,
      "loss": 0.8312,
      "step": 520570
    },
    {
      "epoch": 1.8245031139709877,
      "grad_norm": 2.9375,
      "learning_rate": 2.1768551186294593e-05,
      "loss": 0.7795,
      "step": 520580
    },
    {
      "epoch": 1.8245381614778833,
      "grad_norm": 3.578125,
      "learning_rate": 2.176790215763089e-05,
      "loss": 0.8873,
      "step": 520590
    },
    {
      "epoch": 1.8245732089847788,
      "grad_norm": 3.328125,
      "learning_rate": 2.176725312896719e-05,
      "loss": 0.7888,
      "step": 520600
    },
    {
      "epoch": 1.8246082564916746,
      "grad_norm": 3.09375,
      "learning_rate": 2.1766604100303487e-05,
      "loss": 0.882,
      "step": 520610
    },
    {
      "epoch": 1.82464330399857,
      "grad_norm": 3.125,
      "learning_rate": 2.1765955071639785e-05,
      "loss": 0.8579,
      "step": 520620
    },
    {
      "epoch": 1.8246783515054656,
      "grad_norm": 2.953125,
      "learning_rate": 2.1765306042976083e-05,
      "loss": 0.8241,
      "step": 520630
    },
    {
      "epoch": 1.8247133990123614,
      "grad_norm": 2.921875,
      "learning_rate": 2.176465701431238e-05,
      "loss": 0.889,
      "step": 520640
    },
    {
      "epoch": 1.8247484465192567,
      "grad_norm": 2.953125,
      "learning_rate": 2.176400798564868e-05,
      "loss": 0.7762,
      "step": 520650
    },
    {
      "epoch": 1.8247834940261525,
      "grad_norm": 2.625,
      "learning_rate": 2.1763358956984977e-05,
      "loss": 0.8518,
      "step": 520660
    },
    {
      "epoch": 1.824818541533048,
      "grad_norm": 2.953125,
      "learning_rate": 2.1762709928321275e-05,
      "loss": 0.8368,
      "step": 520670
    },
    {
      "epoch": 1.8248535890399435,
      "grad_norm": 3.46875,
      "learning_rate": 2.1762060899657573e-05,
      "loss": 0.839,
      "step": 520680
    },
    {
      "epoch": 1.8248886365468393,
      "grad_norm": 3.125,
      "learning_rate": 2.176141187099387e-05,
      "loss": 0.8387,
      "step": 520690
    },
    {
      "epoch": 1.8249236840537348,
      "grad_norm": 3.203125,
      "learning_rate": 2.1760762842330172e-05,
      "loss": 0.802,
      "step": 520700
    },
    {
      "epoch": 1.8249587315606304,
      "grad_norm": 3.234375,
      "learning_rate": 2.176011381366647e-05,
      "loss": 0.8904,
      "step": 520710
    },
    {
      "epoch": 1.8249937790675261,
      "grad_norm": 2.703125,
      "learning_rate": 2.175946478500277e-05,
      "loss": 0.8855,
      "step": 520720
    },
    {
      "epoch": 1.8250288265744217,
      "grad_norm": 2.890625,
      "learning_rate": 2.1758815756339063e-05,
      "loss": 0.7836,
      "step": 520730
    },
    {
      "epoch": 1.8250638740813172,
      "grad_norm": 3.09375,
      "learning_rate": 2.175816672767536e-05,
      "loss": 0.8378,
      "step": 520740
    },
    {
      "epoch": 1.825098921588213,
      "grad_norm": 2.4375,
      "learning_rate": 2.175751769901166e-05,
      "loss": 0.9004,
      "step": 520750
    },
    {
      "epoch": 1.8251339690951083,
      "grad_norm": 3.140625,
      "learning_rate": 2.1756868670347957e-05,
      "loss": 0.891,
      "step": 520760
    },
    {
      "epoch": 1.825169016602004,
      "grad_norm": 3.328125,
      "learning_rate": 2.1756219641684255e-05,
      "loss": 0.7751,
      "step": 520770
    },
    {
      "epoch": 1.8252040641088996,
      "grad_norm": 2.609375,
      "learning_rate": 2.1755570613020553e-05,
      "loss": 0.7975,
      "step": 520780
    },
    {
      "epoch": 1.825239111615795,
      "grad_norm": 2.828125,
      "learning_rate": 2.175492158435685e-05,
      "loss": 0.8059,
      "step": 520790
    },
    {
      "epoch": 1.8252741591226909,
      "grad_norm": 3.0625,
      "learning_rate": 2.175427255569315e-05,
      "loss": 0.8098,
      "step": 520800
    },
    {
      "epoch": 1.8253092066295864,
      "grad_norm": 3.015625,
      "learning_rate": 2.1753623527029447e-05,
      "loss": 0.7947,
      "step": 520810
    },
    {
      "epoch": 1.825344254136482,
      "grad_norm": 3.078125,
      "learning_rate": 2.175297449836575e-05,
      "loss": 0.8444,
      "step": 520820
    },
    {
      "epoch": 1.8253793016433777,
      "grad_norm": 3.4375,
      "learning_rate": 2.1752325469702046e-05,
      "loss": 0.8155,
      "step": 520830
    },
    {
      "epoch": 1.8254143491502732,
      "grad_norm": 2.859375,
      "learning_rate": 2.1751676441038344e-05,
      "loss": 0.8579,
      "step": 520840
    },
    {
      "epoch": 1.8254493966571688,
      "grad_norm": 3.234375,
      "learning_rate": 2.1751027412374642e-05,
      "loss": 0.8373,
      "step": 520850
    },
    {
      "epoch": 1.8254844441640645,
      "grad_norm": 3.21875,
      "learning_rate": 2.175037838371094e-05,
      "loss": 0.791,
      "step": 520860
    },
    {
      "epoch": 1.8255194916709598,
      "grad_norm": 2.84375,
      "learning_rate": 2.174972935504724e-05,
      "loss": 0.8314,
      "step": 520870
    },
    {
      "epoch": 1.8255545391778556,
      "grad_norm": 2.5625,
      "learning_rate": 2.1749080326383536e-05,
      "loss": 0.8167,
      "step": 520880
    },
    {
      "epoch": 1.8255895866847511,
      "grad_norm": 3.03125,
      "learning_rate": 2.1748431297719834e-05,
      "loss": 0.7913,
      "step": 520890
    },
    {
      "epoch": 1.8256246341916467,
      "grad_norm": 2.6875,
      "learning_rate": 2.1747782269056132e-05,
      "loss": 0.9024,
      "step": 520900
    },
    {
      "epoch": 1.8256596816985424,
      "grad_norm": 2.953125,
      "learning_rate": 2.174713324039243e-05,
      "loss": 0.8136,
      "step": 520910
    },
    {
      "epoch": 1.825694729205438,
      "grad_norm": 3.0,
      "learning_rate": 2.1746484211728725e-05,
      "loss": 0.8936,
      "step": 520920
    },
    {
      "epoch": 1.8257297767123335,
      "grad_norm": 2.921875,
      "learning_rate": 2.1745835183065026e-05,
      "loss": 0.8941,
      "step": 520930
    },
    {
      "epoch": 1.8257648242192293,
      "grad_norm": 2.578125,
      "learning_rate": 2.1745186154401324e-05,
      "loss": 0.8076,
      "step": 520940
    },
    {
      "epoch": 1.8257998717261248,
      "grad_norm": 3.109375,
      "learning_rate": 2.1744537125737622e-05,
      "loss": 0.8035,
      "step": 520950
    },
    {
      "epoch": 1.8258349192330203,
      "grad_norm": 3.140625,
      "learning_rate": 2.174388809707392e-05,
      "loss": 0.7857,
      "step": 520960
    },
    {
      "epoch": 1.825869966739916,
      "grad_norm": 2.421875,
      "learning_rate": 2.174323906841022e-05,
      "loss": 0.7945,
      "step": 520970
    },
    {
      "epoch": 1.8259050142468114,
      "grad_norm": 2.6875,
      "learning_rate": 2.1742590039746516e-05,
      "loss": 0.8148,
      "step": 520980
    },
    {
      "epoch": 1.8259400617537072,
      "grad_norm": 2.78125,
      "learning_rate": 2.1741941011082814e-05,
      "loss": 0.8493,
      "step": 520990
    },
    {
      "epoch": 1.825975109260603,
      "grad_norm": 3.1875,
      "learning_rate": 2.1741291982419112e-05,
      "loss": 0.7593,
      "step": 521000
    },
    {
      "epoch": 1.8260101567674982,
      "grad_norm": 3.0,
      "learning_rate": 2.174064295375541e-05,
      "loss": 0.8488,
      "step": 521010
    },
    {
      "epoch": 1.826045204274394,
      "grad_norm": 2.6875,
      "learning_rate": 2.173999392509171e-05,
      "loss": 0.8045,
      "step": 521020
    },
    {
      "epoch": 1.8260802517812895,
      "grad_norm": 3.4375,
      "learning_rate": 2.1739344896428006e-05,
      "loss": 0.9026,
      "step": 521030
    },
    {
      "epoch": 1.826115299288185,
      "grad_norm": 3.484375,
      "learning_rate": 2.1738695867764304e-05,
      "loss": 0.8722,
      "step": 521040
    },
    {
      "epoch": 1.8261503467950808,
      "grad_norm": 3.078125,
      "learning_rate": 2.1738046839100602e-05,
      "loss": 0.7845,
      "step": 521050
    },
    {
      "epoch": 1.8261853943019764,
      "grad_norm": 3.40625,
      "learning_rate": 2.17373978104369e-05,
      "loss": 0.8792,
      "step": 521060
    },
    {
      "epoch": 1.8262204418088719,
      "grad_norm": 3.125,
      "learning_rate": 2.1736748781773202e-05,
      "loss": 0.8191,
      "step": 521070
    },
    {
      "epoch": 1.8262554893157676,
      "grad_norm": 2.78125,
      "learning_rate": 2.17360997531095e-05,
      "loss": 0.8228,
      "step": 521080
    },
    {
      "epoch": 1.826290536822663,
      "grad_norm": 3.171875,
      "learning_rate": 2.1735450724445798e-05,
      "loss": 0.8352,
      "step": 521090
    },
    {
      "epoch": 1.8263255843295587,
      "grad_norm": 2.984375,
      "learning_rate": 2.1734801695782096e-05,
      "loss": 0.814,
      "step": 521100
    },
    {
      "epoch": 1.8263606318364545,
      "grad_norm": 2.96875,
      "learning_rate": 2.173415266711839e-05,
      "loss": 0.8199,
      "step": 521110
    },
    {
      "epoch": 1.8263956793433498,
      "grad_norm": 2.84375,
      "learning_rate": 2.173350363845469e-05,
      "loss": 0.8652,
      "step": 521120
    },
    {
      "epoch": 1.8264307268502455,
      "grad_norm": 2.90625,
      "learning_rate": 2.1732854609790986e-05,
      "loss": 0.7139,
      "step": 521130
    },
    {
      "epoch": 1.826465774357141,
      "grad_norm": 2.96875,
      "learning_rate": 2.1732205581127284e-05,
      "loss": 0.7953,
      "step": 521140
    },
    {
      "epoch": 1.8265008218640366,
      "grad_norm": 3.140625,
      "learning_rate": 2.1731556552463582e-05,
      "loss": 0.8427,
      "step": 521150
    },
    {
      "epoch": 1.8265358693709324,
      "grad_norm": 3.984375,
      "learning_rate": 2.173090752379988e-05,
      "loss": 0.906,
      "step": 521160
    },
    {
      "epoch": 1.826570916877828,
      "grad_norm": 3.1875,
      "learning_rate": 2.173025849513618e-05,
      "loss": 0.8403,
      "step": 521170
    },
    {
      "epoch": 1.8266059643847234,
      "grad_norm": 3.09375,
      "learning_rate": 2.172960946647248e-05,
      "loss": 0.9036,
      "step": 521180
    },
    {
      "epoch": 1.8266410118916192,
      "grad_norm": 2.234375,
      "learning_rate": 2.1728960437808778e-05,
      "loss": 0.815,
      "step": 521190
    },
    {
      "epoch": 1.8266760593985147,
      "grad_norm": 3.015625,
      "learning_rate": 2.1728311409145076e-05,
      "loss": 0.8333,
      "step": 521200
    },
    {
      "epoch": 1.8267111069054103,
      "grad_norm": 3.21875,
      "learning_rate": 2.1727662380481374e-05,
      "loss": 0.8202,
      "step": 521210
    },
    {
      "epoch": 1.826746154412306,
      "grad_norm": 2.671875,
      "learning_rate": 2.1727013351817672e-05,
      "loss": 0.8486,
      "step": 521220
    },
    {
      "epoch": 1.8267812019192013,
      "grad_norm": 3.03125,
      "learning_rate": 2.172636432315397e-05,
      "loss": 0.8798,
      "step": 521230
    },
    {
      "epoch": 1.826816249426097,
      "grad_norm": 2.765625,
      "learning_rate": 2.1725715294490268e-05,
      "loss": 0.8189,
      "step": 521240
    },
    {
      "epoch": 1.8268512969329926,
      "grad_norm": 3.109375,
      "learning_rate": 2.1725066265826566e-05,
      "loss": 0.8266,
      "step": 521250
    },
    {
      "epoch": 1.8268863444398882,
      "grad_norm": 2.796875,
      "learning_rate": 2.1724417237162864e-05,
      "loss": 0.853,
      "step": 521260
    },
    {
      "epoch": 1.826921391946784,
      "grad_norm": 3.0625,
      "learning_rate": 2.1723768208499162e-05,
      "loss": 0.8557,
      "step": 521270
    },
    {
      "epoch": 1.8269564394536795,
      "grad_norm": 2.453125,
      "learning_rate": 2.172311917983546e-05,
      "loss": 0.8125,
      "step": 521280
    },
    {
      "epoch": 1.826991486960575,
      "grad_norm": 2.84375,
      "learning_rate": 2.1722470151171758e-05,
      "loss": 0.8676,
      "step": 521290
    },
    {
      "epoch": 1.8270265344674708,
      "grad_norm": 2.5,
      "learning_rate": 2.1721821122508056e-05,
      "loss": 0.7372,
      "step": 521300
    },
    {
      "epoch": 1.8270615819743663,
      "grad_norm": 2.890625,
      "learning_rate": 2.1721172093844354e-05,
      "loss": 0.8318,
      "step": 521310
    },
    {
      "epoch": 1.8270966294812618,
      "grad_norm": 3.375,
      "learning_rate": 2.1720523065180652e-05,
      "loss": 0.8211,
      "step": 521320
    },
    {
      "epoch": 1.8271316769881576,
      "grad_norm": 3.21875,
      "learning_rate": 2.171987403651695e-05,
      "loss": 0.8305,
      "step": 521330
    },
    {
      "epoch": 1.827166724495053,
      "grad_norm": 3.28125,
      "learning_rate": 2.1719225007853248e-05,
      "loss": 0.866,
      "step": 521340
    },
    {
      "epoch": 1.8272017720019487,
      "grad_norm": 2.765625,
      "learning_rate": 2.1718575979189546e-05,
      "loss": 0.7705,
      "step": 521350
    },
    {
      "epoch": 1.8272368195088442,
      "grad_norm": 2.6875,
      "learning_rate": 2.1717926950525844e-05,
      "loss": 0.8829,
      "step": 521360
    },
    {
      "epoch": 1.8272718670157397,
      "grad_norm": 3.171875,
      "learning_rate": 2.1717277921862142e-05,
      "loss": 0.8701,
      "step": 521370
    },
    {
      "epoch": 1.8273069145226355,
      "grad_norm": 2.9375,
      "learning_rate": 2.171662889319844e-05,
      "loss": 0.8095,
      "step": 521380
    },
    {
      "epoch": 1.827341962029531,
      "grad_norm": 2.890625,
      "learning_rate": 2.1715979864534738e-05,
      "loss": 0.7959,
      "step": 521390
    },
    {
      "epoch": 1.8273770095364266,
      "grad_norm": 2.6875,
      "learning_rate": 2.1715330835871036e-05,
      "loss": 0.8522,
      "step": 521400
    },
    {
      "epoch": 1.8274120570433223,
      "grad_norm": 2.40625,
      "learning_rate": 2.1714681807207334e-05,
      "loss": 0.7944,
      "step": 521410
    },
    {
      "epoch": 1.8274471045502179,
      "grad_norm": 2.96875,
      "learning_rate": 2.1714032778543632e-05,
      "loss": 0.7937,
      "step": 521420
    },
    {
      "epoch": 1.8274821520571134,
      "grad_norm": 3.296875,
      "learning_rate": 2.171338374987993e-05,
      "loss": 0.7895,
      "step": 521430
    },
    {
      "epoch": 1.8275171995640092,
      "grad_norm": 2.46875,
      "learning_rate": 2.171273472121623e-05,
      "loss": 0.7864,
      "step": 521440
    },
    {
      "epoch": 1.8275522470709045,
      "grad_norm": 2.828125,
      "learning_rate": 2.171208569255253e-05,
      "loss": 0.9054,
      "step": 521450
    },
    {
      "epoch": 1.8275872945778002,
      "grad_norm": 2.546875,
      "learning_rate": 2.1711436663888827e-05,
      "loss": 0.7102,
      "step": 521460
    },
    {
      "epoch": 1.8276223420846958,
      "grad_norm": 3.015625,
      "learning_rate": 2.1710787635225125e-05,
      "loss": 0.8464,
      "step": 521470
    },
    {
      "epoch": 1.8276573895915913,
      "grad_norm": 2.921875,
      "learning_rate": 2.171013860656142e-05,
      "loss": 0.7853,
      "step": 521480
    },
    {
      "epoch": 1.827692437098487,
      "grad_norm": 2.8125,
      "learning_rate": 2.1709489577897718e-05,
      "loss": 0.8357,
      "step": 521490
    },
    {
      "epoch": 1.8277274846053826,
      "grad_norm": 2.546875,
      "learning_rate": 2.1708840549234016e-05,
      "loss": 0.7686,
      "step": 521500
    },
    {
      "epoch": 1.8277625321122781,
      "grad_norm": 2.3125,
      "learning_rate": 2.1708191520570314e-05,
      "loss": 0.8227,
      "step": 521510
    },
    {
      "epoch": 1.8277975796191739,
      "grad_norm": 2.796875,
      "learning_rate": 2.1707542491906612e-05,
      "loss": 0.7658,
      "step": 521520
    },
    {
      "epoch": 1.8278326271260694,
      "grad_norm": 3.390625,
      "learning_rate": 2.170689346324291e-05,
      "loss": 0.8267,
      "step": 521530
    },
    {
      "epoch": 1.827867674632965,
      "grad_norm": 2.625,
      "learning_rate": 2.1706244434579208e-05,
      "loss": 0.7604,
      "step": 521540
    },
    {
      "epoch": 1.8279027221398607,
      "grad_norm": 3.1875,
      "learning_rate": 2.170559540591551e-05,
      "loss": 0.7954,
      "step": 521550
    },
    {
      "epoch": 1.827937769646756,
      "grad_norm": 2.828125,
      "learning_rate": 2.1704946377251807e-05,
      "loss": 0.7874,
      "step": 521560
    },
    {
      "epoch": 1.8279728171536518,
      "grad_norm": 3.171875,
      "learning_rate": 2.1704297348588105e-05,
      "loss": 0.8343,
      "step": 521570
    },
    {
      "epoch": 1.8280078646605473,
      "grad_norm": 4.0,
      "learning_rate": 2.1703648319924403e-05,
      "loss": 0.8689,
      "step": 521580
    },
    {
      "epoch": 1.8280429121674429,
      "grad_norm": 2.890625,
      "learning_rate": 2.17029992912607e-05,
      "loss": 0.7457,
      "step": 521590
    },
    {
      "epoch": 1.8280779596743386,
      "grad_norm": 3.046875,
      "learning_rate": 2.1702350262597e-05,
      "loss": 0.8249,
      "step": 521600
    },
    {
      "epoch": 1.8281130071812342,
      "grad_norm": 3.234375,
      "learning_rate": 2.1701701233933297e-05,
      "loss": 0.7969,
      "step": 521610
    },
    {
      "epoch": 1.8281480546881297,
      "grad_norm": 2.828125,
      "learning_rate": 2.1701052205269595e-05,
      "loss": 0.8036,
      "step": 521620
    },
    {
      "epoch": 1.8281831021950254,
      "grad_norm": 2.46875,
      "learning_rate": 2.1700403176605893e-05,
      "loss": 0.7767,
      "step": 521630
    },
    {
      "epoch": 1.828218149701921,
      "grad_norm": 3.125,
      "learning_rate": 2.169975414794219e-05,
      "loss": 0.8135,
      "step": 521640
    },
    {
      "epoch": 1.8282531972088165,
      "grad_norm": 2.875,
      "learning_rate": 2.169910511927849e-05,
      "loss": 0.7961,
      "step": 521650
    },
    {
      "epoch": 1.8282882447157123,
      "grad_norm": 3.140625,
      "learning_rate": 2.1698456090614787e-05,
      "loss": 0.8582,
      "step": 521660
    },
    {
      "epoch": 1.8283232922226076,
      "grad_norm": 3.125,
      "learning_rate": 2.1697807061951085e-05,
      "loss": 0.8142,
      "step": 521670
    },
    {
      "epoch": 1.8283583397295033,
      "grad_norm": 2.65625,
      "learning_rate": 2.1697158033287383e-05,
      "loss": 0.8727,
      "step": 521680
    },
    {
      "epoch": 1.828393387236399,
      "grad_norm": 2.71875,
      "learning_rate": 2.169650900462368e-05,
      "loss": 0.7943,
      "step": 521690
    },
    {
      "epoch": 1.8284284347432944,
      "grad_norm": 2.53125,
      "learning_rate": 2.169585997595998e-05,
      "loss": 0.7887,
      "step": 521700
    },
    {
      "epoch": 1.8284634822501902,
      "grad_norm": 2.765625,
      "learning_rate": 2.1695210947296277e-05,
      "loss": 0.8273,
      "step": 521710
    },
    {
      "epoch": 1.8284985297570857,
      "grad_norm": 3.375,
      "learning_rate": 2.1694561918632575e-05,
      "loss": 0.8543,
      "step": 521720
    },
    {
      "epoch": 1.8285335772639812,
      "grad_norm": 2.453125,
      "learning_rate": 2.1693912889968873e-05,
      "loss": 0.7888,
      "step": 521730
    },
    {
      "epoch": 1.828568624770877,
      "grad_norm": 2.90625,
      "learning_rate": 2.169326386130517e-05,
      "loss": 0.8094,
      "step": 521740
    },
    {
      "epoch": 1.8286036722777725,
      "grad_norm": 2.625,
      "learning_rate": 2.169261483264147e-05,
      "loss": 0.8683,
      "step": 521750
    },
    {
      "epoch": 1.828638719784668,
      "grad_norm": 3.109375,
      "learning_rate": 2.1691965803977767e-05,
      "loss": 0.8248,
      "step": 521760
    },
    {
      "epoch": 1.8286737672915638,
      "grad_norm": 2.875,
      "learning_rate": 2.1691316775314065e-05,
      "loss": 0.8204,
      "step": 521770
    },
    {
      "epoch": 1.8287088147984591,
      "grad_norm": 2.75,
      "learning_rate": 2.1690667746650363e-05,
      "loss": 0.8748,
      "step": 521780
    },
    {
      "epoch": 1.828743862305355,
      "grad_norm": 2.65625,
      "learning_rate": 2.169001871798666e-05,
      "loss": 0.8041,
      "step": 521790
    },
    {
      "epoch": 1.8287789098122507,
      "grad_norm": 3.515625,
      "learning_rate": 2.1689369689322963e-05,
      "loss": 0.8227,
      "step": 521800
    },
    {
      "epoch": 1.828813957319146,
      "grad_norm": 2.28125,
      "learning_rate": 2.168872066065926e-05,
      "loss": 0.7259,
      "step": 521810
    },
    {
      "epoch": 1.8288490048260417,
      "grad_norm": 2.84375,
      "learning_rate": 2.168807163199556e-05,
      "loss": 0.937,
      "step": 521820
    },
    {
      "epoch": 1.8288840523329373,
      "grad_norm": 2.46875,
      "learning_rate": 2.1687422603331857e-05,
      "loss": 0.829,
      "step": 521830
    },
    {
      "epoch": 1.8289190998398328,
      "grad_norm": 3.21875,
      "learning_rate": 2.1686773574668155e-05,
      "loss": 0.8459,
      "step": 521840
    },
    {
      "epoch": 1.8289541473467286,
      "grad_norm": 3.25,
      "learning_rate": 2.1686124546004453e-05,
      "loss": 0.8472,
      "step": 521850
    },
    {
      "epoch": 1.828989194853624,
      "grad_norm": 3.109375,
      "learning_rate": 2.1685475517340747e-05,
      "loss": 0.827,
      "step": 521860
    },
    {
      "epoch": 1.8290242423605196,
      "grad_norm": 2.90625,
      "learning_rate": 2.1684826488677045e-05,
      "loss": 0.8408,
      "step": 521870
    },
    {
      "epoch": 1.8290592898674154,
      "grad_norm": 2.796875,
      "learning_rate": 2.1684177460013343e-05,
      "loss": 0.8079,
      "step": 521880
    },
    {
      "epoch": 1.829094337374311,
      "grad_norm": 3.484375,
      "learning_rate": 2.168352843134964e-05,
      "loss": 0.8113,
      "step": 521890
    },
    {
      "epoch": 1.8291293848812065,
      "grad_norm": 2.40625,
      "learning_rate": 2.168287940268594e-05,
      "loss": 0.7521,
      "step": 521900
    },
    {
      "epoch": 1.8291644323881022,
      "grad_norm": 3.109375,
      "learning_rate": 2.1682230374022237e-05,
      "loss": 0.8467,
      "step": 521910
    },
    {
      "epoch": 1.8291994798949975,
      "grad_norm": 2.96875,
      "learning_rate": 2.168158134535854e-05,
      "loss": 0.8267,
      "step": 521920
    },
    {
      "epoch": 1.8292345274018933,
      "grad_norm": 3.109375,
      "learning_rate": 2.1680932316694837e-05,
      "loss": 0.857,
      "step": 521930
    },
    {
      "epoch": 1.8292695749087888,
      "grad_norm": 2.8125,
      "learning_rate": 2.1680283288031135e-05,
      "loss": 0.8951,
      "step": 521940
    },
    {
      "epoch": 1.8293046224156844,
      "grad_norm": 2.671875,
      "learning_rate": 2.1679634259367433e-05,
      "loss": 0.8764,
      "step": 521950
    },
    {
      "epoch": 1.8293396699225801,
      "grad_norm": 3.421875,
      "learning_rate": 2.167898523070373e-05,
      "loss": 0.8317,
      "step": 521960
    },
    {
      "epoch": 1.8293747174294757,
      "grad_norm": 2.6875,
      "learning_rate": 2.167833620204003e-05,
      "loss": 0.8552,
      "step": 521970
    },
    {
      "epoch": 1.8294097649363712,
      "grad_norm": 2.734375,
      "learning_rate": 2.1677687173376327e-05,
      "loss": 0.7885,
      "step": 521980
    },
    {
      "epoch": 1.829444812443267,
      "grad_norm": 3.09375,
      "learning_rate": 2.1677038144712625e-05,
      "loss": 0.7769,
      "step": 521990
    },
    {
      "epoch": 1.8294798599501625,
      "grad_norm": 2.578125,
      "learning_rate": 2.1676389116048923e-05,
      "loss": 0.8024,
      "step": 522000
    },
    {
      "epoch": 1.829514907457058,
      "grad_norm": 2.8125,
      "learning_rate": 2.167574008738522e-05,
      "loss": 0.8805,
      "step": 522010
    },
    {
      "epoch": 1.8295499549639538,
      "grad_norm": 2.71875,
      "learning_rate": 2.167509105872152e-05,
      "loss": 0.8997,
      "step": 522020
    },
    {
      "epoch": 1.829585002470849,
      "grad_norm": 2.859375,
      "learning_rate": 2.1674442030057817e-05,
      "loss": 0.8478,
      "step": 522030
    },
    {
      "epoch": 1.8296200499777449,
      "grad_norm": 3.3125,
      "learning_rate": 2.1673793001394115e-05,
      "loss": 0.9236,
      "step": 522040
    },
    {
      "epoch": 1.8296550974846404,
      "grad_norm": 3.5,
      "learning_rate": 2.1673143972730413e-05,
      "loss": 0.8691,
      "step": 522050
    },
    {
      "epoch": 1.829690144991536,
      "grad_norm": 3.453125,
      "learning_rate": 2.167249494406671e-05,
      "loss": 0.8806,
      "step": 522060
    },
    {
      "epoch": 1.8297251924984317,
      "grad_norm": 2.5625,
      "learning_rate": 2.167184591540301e-05,
      "loss": 0.7781,
      "step": 522070
    },
    {
      "epoch": 1.8297602400053272,
      "grad_norm": 2.625,
      "learning_rate": 2.1671196886739307e-05,
      "loss": 0.7996,
      "step": 522080
    },
    {
      "epoch": 1.8297952875122228,
      "grad_norm": 2.765625,
      "learning_rate": 2.1670547858075605e-05,
      "loss": 0.8386,
      "step": 522090
    },
    {
      "epoch": 1.8298303350191185,
      "grad_norm": 3.140625,
      "learning_rate": 2.1669898829411903e-05,
      "loss": 0.8644,
      "step": 522100
    },
    {
      "epoch": 1.829865382526014,
      "grad_norm": 2.75,
      "learning_rate": 2.16692498007482e-05,
      "loss": 0.7988,
      "step": 522110
    },
    {
      "epoch": 1.8299004300329096,
      "grad_norm": 2.96875,
      "learning_rate": 2.16686007720845e-05,
      "loss": 0.7931,
      "step": 522120
    },
    {
      "epoch": 1.8299354775398053,
      "grad_norm": 3.28125,
      "learning_rate": 2.1667951743420797e-05,
      "loss": 0.8788,
      "step": 522130
    },
    {
      "epoch": 1.8299705250467007,
      "grad_norm": 2.984375,
      "learning_rate": 2.1667302714757095e-05,
      "loss": 0.8881,
      "step": 522140
    },
    {
      "epoch": 1.8300055725535964,
      "grad_norm": 2.75,
      "learning_rate": 2.1666653686093393e-05,
      "loss": 0.7823,
      "step": 522150
    },
    {
      "epoch": 1.830040620060492,
      "grad_norm": 2.671875,
      "learning_rate": 2.166600465742969e-05,
      "loss": 0.7911,
      "step": 522160
    },
    {
      "epoch": 1.8300756675673875,
      "grad_norm": 2.4375,
      "learning_rate": 2.1665355628765992e-05,
      "loss": 0.8078,
      "step": 522170
    },
    {
      "epoch": 1.8301107150742832,
      "grad_norm": 2.96875,
      "learning_rate": 2.166470660010229e-05,
      "loss": 0.7503,
      "step": 522180
    },
    {
      "epoch": 1.8301457625811788,
      "grad_norm": 2.859375,
      "learning_rate": 2.1664057571438588e-05,
      "loss": 0.8424,
      "step": 522190
    },
    {
      "epoch": 1.8301808100880743,
      "grad_norm": 2.734375,
      "learning_rate": 2.1663408542774886e-05,
      "loss": 0.8598,
      "step": 522200
    },
    {
      "epoch": 1.83021585759497,
      "grad_norm": 2.71875,
      "learning_rate": 2.1662759514111184e-05,
      "loss": 0.8407,
      "step": 522210
    },
    {
      "epoch": 1.8302509051018656,
      "grad_norm": 2.84375,
      "learning_rate": 2.1662110485447482e-05,
      "loss": 0.8013,
      "step": 522220
    },
    {
      "epoch": 1.8302859526087611,
      "grad_norm": 2.96875,
      "learning_rate": 2.166146145678378e-05,
      "loss": 0.8732,
      "step": 522230
    },
    {
      "epoch": 1.830321000115657,
      "grad_norm": 3.09375,
      "learning_rate": 2.1660812428120075e-05,
      "loss": 0.8205,
      "step": 522240
    },
    {
      "epoch": 1.8303560476225522,
      "grad_norm": 3.296875,
      "learning_rate": 2.1660163399456373e-05,
      "loss": 0.8982,
      "step": 522250
    },
    {
      "epoch": 1.830391095129448,
      "grad_norm": 3.1875,
      "learning_rate": 2.165951437079267e-05,
      "loss": 0.787,
      "step": 522260
    },
    {
      "epoch": 1.8304261426363435,
      "grad_norm": 3.109375,
      "learning_rate": 2.165886534212897e-05,
      "loss": 0.8656,
      "step": 522270
    },
    {
      "epoch": 1.830461190143239,
      "grad_norm": 2.9375,
      "learning_rate": 2.165821631346527e-05,
      "loss": 0.7942,
      "step": 522280
    },
    {
      "epoch": 1.8304962376501348,
      "grad_norm": 3.28125,
      "learning_rate": 2.1657567284801568e-05,
      "loss": 0.8695,
      "step": 522290
    },
    {
      "epoch": 1.8305312851570303,
      "grad_norm": 3.078125,
      "learning_rate": 2.1656918256137866e-05,
      "loss": 0.8567,
      "step": 522300
    },
    {
      "epoch": 1.8305663326639259,
      "grad_norm": 2.546875,
      "learning_rate": 2.1656269227474164e-05,
      "loss": 0.8169,
      "step": 522310
    },
    {
      "epoch": 1.8306013801708216,
      "grad_norm": 3.046875,
      "learning_rate": 2.1655620198810462e-05,
      "loss": 0.8818,
      "step": 522320
    },
    {
      "epoch": 1.8306364276777172,
      "grad_norm": 2.84375,
      "learning_rate": 2.165497117014676e-05,
      "loss": 0.9153,
      "step": 522330
    },
    {
      "epoch": 1.8306714751846127,
      "grad_norm": 2.953125,
      "learning_rate": 2.1654322141483058e-05,
      "loss": 0.8975,
      "step": 522340
    },
    {
      "epoch": 1.8307065226915085,
      "grad_norm": 3.390625,
      "learning_rate": 2.1653673112819356e-05,
      "loss": 0.871,
      "step": 522350
    },
    {
      "epoch": 1.8307415701984038,
      "grad_norm": 2.9375,
      "learning_rate": 2.1653024084155654e-05,
      "loss": 0.8797,
      "step": 522360
    },
    {
      "epoch": 1.8307766177052995,
      "grad_norm": 2.671875,
      "learning_rate": 2.1652375055491952e-05,
      "loss": 0.8662,
      "step": 522370
    },
    {
      "epoch": 1.8308116652121953,
      "grad_norm": 3.0625,
      "learning_rate": 2.165172602682825e-05,
      "loss": 0.8825,
      "step": 522380
    },
    {
      "epoch": 1.8308467127190906,
      "grad_norm": 2.9375,
      "learning_rate": 2.1651076998164548e-05,
      "loss": 0.8195,
      "step": 522390
    },
    {
      "epoch": 1.8308817602259864,
      "grad_norm": 3.109375,
      "learning_rate": 2.1650427969500846e-05,
      "loss": 0.8399,
      "step": 522400
    },
    {
      "epoch": 1.830916807732882,
      "grad_norm": 3.1875,
      "learning_rate": 2.1649778940837144e-05,
      "loss": 0.7572,
      "step": 522410
    },
    {
      "epoch": 1.8309518552397774,
      "grad_norm": 2.90625,
      "learning_rate": 2.1649129912173442e-05,
      "loss": 0.8629,
      "step": 522420
    },
    {
      "epoch": 1.8309869027466732,
      "grad_norm": 2.90625,
      "learning_rate": 2.164848088350974e-05,
      "loss": 0.8579,
      "step": 522430
    },
    {
      "epoch": 1.8310219502535687,
      "grad_norm": 2.671875,
      "learning_rate": 2.1647831854846038e-05,
      "loss": 0.7006,
      "step": 522440
    },
    {
      "epoch": 1.8310569977604643,
      "grad_norm": 2.9375,
      "learning_rate": 2.1647182826182336e-05,
      "loss": 0.8029,
      "step": 522450
    },
    {
      "epoch": 1.83109204526736,
      "grad_norm": 2.5625,
      "learning_rate": 2.1646533797518634e-05,
      "loss": 0.7982,
      "step": 522460
    },
    {
      "epoch": 1.8311270927742553,
      "grad_norm": 2.921875,
      "learning_rate": 2.1645884768854932e-05,
      "loss": 0.7921,
      "step": 522470
    },
    {
      "epoch": 1.831162140281151,
      "grad_norm": 3.046875,
      "learning_rate": 2.164523574019123e-05,
      "loss": 0.8159,
      "step": 522480
    },
    {
      "epoch": 1.8311971877880469,
      "grad_norm": 2.953125,
      "learning_rate": 2.1644586711527528e-05,
      "loss": 0.8168,
      "step": 522490
    },
    {
      "epoch": 1.8312322352949422,
      "grad_norm": 2.9375,
      "learning_rate": 2.1643937682863826e-05,
      "loss": 0.8052,
      "step": 522500
    },
    {
      "epoch": 1.831267282801838,
      "grad_norm": 3.21875,
      "learning_rate": 2.1643288654200124e-05,
      "loss": 0.8783,
      "step": 522510
    },
    {
      "epoch": 1.8313023303087335,
      "grad_norm": 3.078125,
      "learning_rate": 2.1642639625536422e-05,
      "loss": 0.839,
      "step": 522520
    },
    {
      "epoch": 1.831337377815629,
      "grad_norm": 2.53125,
      "learning_rate": 2.164199059687272e-05,
      "loss": 0.8023,
      "step": 522530
    },
    {
      "epoch": 1.8313724253225248,
      "grad_norm": 2.984375,
      "learning_rate": 2.164134156820902e-05,
      "loss": 0.826,
      "step": 522540
    },
    {
      "epoch": 1.8314074728294203,
      "grad_norm": 2.828125,
      "learning_rate": 2.164069253954532e-05,
      "loss": 0.8647,
      "step": 522550
    },
    {
      "epoch": 1.8314425203363158,
      "grad_norm": 2.828125,
      "learning_rate": 2.1640043510881617e-05,
      "loss": 0.8156,
      "step": 522560
    },
    {
      "epoch": 1.8314775678432116,
      "grad_norm": 2.984375,
      "learning_rate": 2.1639394482217915e-05,
      "loss": 0.7948,
      "step": 522570
    },
    {
      "epoch": 1.8315126153501071,
      "grad_norm": 2.796875,
      "learning_rate": 2.1638745453554213e-05,
      "loss": 0.8563,
      "step": 522580
    },
    {
      "epoch": 1.8315476628570027,
      "grad_norm": 3.234375,
      "learning_rate": 2.163809642489051e-05,
      "loss": 0.8411,
      "step": 522590
    },
    {
      "epoch": 1.8315827103638984,
      "grad_norm": 2.890625,
      "learning_rate": 2.163744739622681e-05,
      "loss": 0.8352,
      "step": 522600
    },
    {
      "epoch": 1.8316177578707937,
      "grad_norm": 3.09375,
      "learning_rate": 2.1636798367563104e-05,
      "loss": 0.8196,
      "step": 522610
    },
    {
      "epoch": 1.8316528053776895,
      "grad_norm": 3.390625,
      "learning_rate": 2.1636149338899402e-05,
      "loss": 0.8506,
      "step": 522620
    },
    {
      "epoch": 1.831687852884585,
      "grad_norm": 3.0625,
      "learning_rate": 2.16355003102357e-05,
      "loss": 0.8731,
      "step": 522630
    },
    {
      "epoch": 1.8317229003914806,
      "grad_norm": 2.84375,
      "learning_rate": 2.1634851281571998e-05,
      "loss": 0.8082,
      "step": 522640
    },
    {
      "epoch": 1.8317579478983763,
      "grad_norm": 2.53125,
      "learning_rate": 2.16342022529083e-05,
      "loss": 0.854,
      "step": 522650
    },
    {
      "epoch": 1.8317929954052719,
      "grad_norm": 4.03125,
      "learning_rate": 2.1633553224244597e-05,
      "loss": 0.903,
      "step": 522660
    },
    {
      "epoch": 1.8318280429121674,
      "grad_norm": 2.78125,
      "learning_rate": 2.1632904195580895e-05,
      "loss": 0.7695,
      "step": 522670
    },
    {
      "epoch": 1.8318630904190631,
      "grad_norm": 2.78125,
      "learning_rate": 2.1632255166917193e-05,
      "loss": 0.7733,
      "step": 522680
    },
    {
      "epoch": 1.8318981379259587,
      "grad_norm": 3.078125,
      "learning_rate": 2.163160613825349e-05,
      "loss": 0.8651,
      "step": 522690
    },
    {
      "epoch": 1.8319331854328542,
      "grad_norm": 2.4375,
      "learning_rate": 2.163095710958979e-05,
      "loss": 0.8225,
      "step": 522700
    },
    {
      "epoch": 1.83196823293975,
      "grad_norm": 2.953125,
      "learning_rate": 2.1630308080926087e-05,
      "loss": 0.7977,
      "step": 522710
    },
    {
      "epoch": 1.8320032804466453,
      "grad_norm": 2.515625,
      "learning_rate": 2.1629659052262385e-05,
      "loss": 0.8722,
      "step": 522720
    },
    {
      "epoch": 1.832038327953541,
      "grad_norm": 3.21875,
      "learning_rate": 2.1629010023598683e-05,
      "loss": 0.8103,
      "step": 522730
    },
    {
      "epoch": 1.8320733754604366,
      "grad_norm": 2.71875,
      "learning_rate": 2.162836099493498e-05,
      "loss": 0.8005,
      "step": 522740
    },
    {
      "epoch": 1.8321084229673321,
      "grad_norm": 3.234375,
      "learning_rate": 2.162771196627128e-05,
      "loss": 0.8757,
      "step": 522750
    },
    {
      "epoch": 1.8321434704742279,
      "grad_norm": 3.109375,
      "learning_rate": 2.1627062937607577e-05,
      "loss": 0.8533,
      "step": 522760
    },
    {
      "epoch": 1.8321785179811234,
      "grad_norm": 2.78125,
      "learning_rate": 2.1626413908943875e-05,
      "loss": 0.8909,
      "step": 522770
    },
    {
      "epoch": 1.832213565488019,
      "grad_norm": 2.6875,
      "learning_rate": 2.1625764880280173e-05,
      "loss": 0.7596,
      "step": 522780
    },
    {
      "epoch": 1.8322486129949147,
      "grad_norm": 3.046875,
      "learning_rate": 2.1625115851616475e-05,
      "loss": 0.8394,
      "step": 522790
    },
    {
      "epoch": 1.8322836605018102,
      "grad_norm": 2.453125,
      "learning_rate": 2.162446682295277e-05,
      "loss": 0.8053,
      "step": 522800
    },
    {
      "epoch": 1.8323187080087058,
      "grad_norm": 2.5,
      "learning_rate": 2.1623817794289067e-05,
      "loss": 0.9049,
      "step": 522810
    },
    {
      "epoch": 1.8323537555156015,
      "grad_norm": 3.09375,
      "learning_rate": 2.1623168765625365e-05,
      "loss": 0.8504,
      "step": 522820
    },
    {
      "epoch": 1.8323888030224968,
      "grad_norm": 3.15625,
      "learning_rate": 2.1622519736961663e-05,
      "loss": 0.8346,
      "step": 522830
    },
    {
      "epoch": 1.8324238505293926,
      "grad_norm": 3.390625,
      "learning_rate": 2.162187070829796e-05,
      "loss": 0.8355,
      "step": 522840
    },
    {
      "epoch": 1.8324588980362881,
      "grad_norm": 3.140625,
      "learning_rate": 2.162122167963426e-05,
      "loss": 0.8377,
      "step": 522850
    },
    {
      "epoch": 1.8324939455431837,
      "grad_norm": 2.703125,
      "learning_rate": 2.1620572650970557e-05,
      "loss": 0.8132,
      "step": 522860
    },
    {
      "epoch": 1.8325289930500794,
      "grad_norm": 2.953125,
      "learning_rate": 2.1619923622306855e-05,
      "loss": 0.793,
      "step": 522870
    },
    {
      "epoch": 1.832564040556975,
      "grad_norm": 3.375,
      "learning_rate": 2.1619274593643153e-05,
      "loss": 0.8639,
      "step": 522880
    },
    {
      "epoch": 1.8325990880638705,
      "grad_norm": 3.1875,
      "learning_rate": 2.161862556497945e-05,
      "loss": 0.8109,
      "step": 522890
    },
    {
      "epoch": 1.8326341355707663,
      "grad_norm": 3.421875,
      "learning_rate": 2.1617976536315753e-05,
      "loss": 0.8511,
      "step": 522900
    },
    {
      "epoch": 1.8326691830776618,
      "grad_norm": 2.78125,
      "learning_rate": 2.161732750765205e-05,
      "loss": 0.9468,
      "step": 522910
    },
    {
      "epoch": 1.8327042305845573,
      "grad_norm": 3.265625,
      "learning_rate": 2.161667847898835e-05,
      "loss": 0.7992,
      "step": 522920
    },
    {
      "epoch": 1.832739278091453,
      "grad_norm": 2.8125,
      "learning_rate": 2.1616029450324647e-05,
      "loss": 0.8039,
      "step": 522930
    },
    {
      "epoch": 1.8327743255983484,
      "grad_norm": 2.765625,
      "learning_rate": 2.1615380421660945e-05,
      "loss": 0.8187,
      "step": 522940
    },
    {
      "epoch": 1.8328093731052442,
      "grad_norm": 2.90625,
      "learning_rate": 2.1614731392997243e-05,
      "loss": 0.87,
      "step": 522950
    },
    {
      "epoch": 1.8328444206121397,
      "grad_norm": 2.5625,
      "learning_rate": 2.161408236433354e-05,
      "loss": 0.8143,
      "step": 522960
    },
    {
      "epoch": 1.8328794681190352,
      "grad_norm": 2.84375,
      "learning_rate": 2.161343333566984e-05,
      "loss": 0.9044,
      "step": 522970
    },
    {
      "epoch": 1.832914515625931,
      "grad_norm": 2.390625,
      "learning_rate": 2.1612784307006137e-05,
      "loss": 0.8392,
      "step": 522980
    },
    {
      "epoch": 1.8329495631328265,
      "grad_norm": 3.25,
      "learning_rate": 2.161213527834243e-05,
      "loss": 0.8495,
      "step": 522990
    },
    {
      "epoch": 1.832984610639722,
      "grad_norm": 2.46875,
      "learning_rate": 2.161148624967873e-05,
      "loss": 0.7337,
      "step": 523000
    },
    {
      "epoch": 1.8330196581466178,
      "grad_norm": 2.296875,
      "learning_rate": 2.1610837221015027e-05,
      "loss": 0.8519,
      "step": 523010
    },
    {
      "epoch": 1.8330547056535134,
      "grad_norm": 3.53125,
      "learning_rate": 2.161018819235133e-05,
      "loss": 0.8379,
      "step": 523020
    },
    {
      "epoch": 1.833089753160409,
      "grad_norm": 3.296875,
      "learning_rate": 2.1609539163687627e-05,
      "loss": 0.9334,
      "step": 523030
    },
    {
      "epoch": 1.8331248006673047,
      "grad_norm": 3.296875,
      "learning_rate": 2.1608890135023925e-05,
      "loss": 0.9271,
      "step": 523040
    },
    {
      "epoch": 1.8331598481742,
      "grad_norm": 2.703125,
      "learning_rate": 2.1608241106360223e-05,
      "loss": 0.8776,
      "step": 523050
    },
    {
      "epoch": 1.8331948956810957,
      "grad_norm": 2.40625,
      "learning_rate": 2.160759207769652e-05,
      "loss": 0.8101,
      "step": 523060
    },
    {
      "epoch": 1.8332299431879915,
      "grad_norm": 3.28125,
      "learning_rate": 2.160694304903282e-05,
      "loss": 0.8989,
      "step": 523070
    },
    {
      "epoch": 1.8332649906948868,
      "grad_norm": 2.96875,
      "learning_rate": 2.1606294020369117e-05,
      "loss": 0.895,
      "step": 523080
    },
    {
      "epoch": 1.8333000382017826,
      "grad_norm": 3.046875,
      "learning_rate": 2.1605644991705415e-05,
      "loss": 0.8362,
      "step": 523090
    },
    {
      "epoch": 1.833335085708678,
      "grad_norm": 3.015625,
      "learning_rate": 2.1604995963041713e-05,
      "loss": 0.8395,
      "step": 523100
    },
    {
      "epoch": 1.8333701332155736,
      "grad_norm": 2.640625,
      "learning_rate": 2.160434693437801e-05,
      "loss": 0.8071,
      "step": 523110
    },
    {
      "epoch": 1.8334051807224694,
      "grad_norm": 2.796875,
      "learning_rate": 2.160369790571431e-05,
      "loss": 0.7791,
      "step": 523120
    },
    {
      "epoch": 1.833440228229365,
      "grad_norm": 2.90625,
      "learning_rate": 2.1603048877050607e-05,
      "loss": 0.7549,
      "step": 523130
    },
    {
      "epoch": 1.8334752757362605,
      "grad_norm": 2.84375,
      "learning_rate": 2.1602399848386905e-05,
      "loss": 0.795,
      "step": 523140
    },
    {
      "epoch": 1.8335103232431562,
      "grad_norm": 3.515625,
      "learning_rate": 2.1601750819723203e-05,
      "loss": 0.8745,
      "step": 523150
    },
    {
      "epoch": 1.8335453707500515,
      "grad_norm": 2.96875,
      "learning_rate": 2.1601101791059504e-05,
      "loss": 0.7991,
      "step": 523160
    },
    {
      "epoch": 1.8335804182569473,
      "grad_norm": 3.140625,
      "learning_rate": 2.1600452762395802e-05,
      "loss": 0.8287,
      "step": 523170
    },
    {
      "epoch": 1.833615465763843,
      "grad_norm": 2.59375,
      "learning_rate": 2.1599803733732097e-05,
      "loss": 0.8178,
      "step": 523180
    },
    {
      "epoch": 1.8336505132707384,
      "grad_norm": 2.84375,
      "learning_rate": 2.1599154705068395e-05,
      "loss": 0.7426,
      "step": 523190
    },
    {
      "epoch": 1.8336855607776341,
      "grad_norm": 3.125,
      "learning_rate": 2.1598505676404693e-05,
      "loss": 0.8391,
      "step": 523200
    },
    {
      "epoch": 1.8337206082845297,
      "grad_norm": 2.984375,
      "learning_rate": 2.159785664774099e-05,
      "loss": 0.7926,
      "step": 523210
    },
    {
      "epoch": 1.8337556557914252,
      "grad_norm": 2.84375,
      "learning_rate": 2.159720761907729e-05,
      "loss": 0.7825,
      "step": 523220
    },
    {
      "epoch": 1.833790703298321,
      "grad_norm": 2.78125,
      "learning_rate": 2.1596558590413587e-05,
      "loss": 0.8069,
      "step": 523230
    },
    {
      "epoch": 1.8338257508052165,
      "grad_norm": 2.578125,
      "learning_rate": 2.1595909561749885e-05,
      "loss": 0.8261,
      "step": 523240
    },
    {
      "epoch": 1.833860798312112,
      "grad_norm": 2.96875,
      "learning_rate": 2.1595260533086183e-05,
      "loss": 0.7681,
      "step": 523250
    },
    {
      "epoch": 1.8338958458190078,
      "grad_norm": 2.6875,
      "learning_rate": 2.159461150442248e-05,
      "loss": 0.7798,
      "step": 523260
    },
    {
      "epoch": 1.8339308933259033,
      "grad_norm": 3.015625,
      "learning_rate": 2.1593962475758782e-05,
      "loss": 0.7918,
      "step": 523270
    },
    {
      "epoch": 1.8339659408327988,
      "grad_norm": 2.46875,
      "learning_rate": 2.159331344709508e-05,
      "loss": 0.7677,
      "step": 523280
    },
    {
      "epoch": 1.8340009883396946,
      "grad_norm": 2.75,
      "learning_rate": 2.1592664418431378e-05,
      "loss": 0.8598,
      "step": 523290
    },
    {
      "epoch": 1.83403603584659,
      "grad_norm": 2.796875,
      "learning_rate": 2.1592015389767676e-05,
      "loss": 0.808,
      "step": 523300
    },
    {
      "epoch": 1.8340710833534857,
      "grad_norm": 3.140625,
      "learning_rate": 2.1591366361103974e-05,
      "loss": 0.8195,
      "step": 523310
    },
    {
      "epoch": 1.8341061308603812,
      "grad_norm": 2.34375,
      "learning_rate": 2.1590717332440272e-05,
      "loss": 0.8322,
      "step": 523320
    },
    {
      "epoch": 1.8341411783672767,
      "grad_norm": 2.875,
      "learning_rate": 2.159006830377657e-05,
      "loss": 0.8674,
      "step": 523330
    },
    {
      "epoch": 1.8341762258741725,
      "grad_norm": 2.90625,
      "learning_rate": 2.1589419275112868e-05,
      "loss": 0.8701,
      "step": 523340
    },
    {
      "epoch": 1.834211273381068,
      "grad_norm": 3.078125,
      "learning_rate": 2.1588770246449166e-05,
      "loss": 0.8363,
      "step": 523350
    },
    {
      "epoch": 1.8342463208879636,
      "grad_norm": 3.171875,
      "learning_rate": 2.158812121778546e-05,
      "loss": 0.8345,
      "step": 523360
    },
    {
      "epoch": 1.8342813683948593,
      "grad_norm": 3.0,
      "learning_rate": 2.158747218912176e-05,
      "loss": 0.9029,
      "step": 523370
    },
    {
      "epoch": 1.8343164159017549,
      "grad_norm": 2.78125,
      "learning_rate": 2.158682316045806e-05,
      "loss": 0.7866,
      "step": 523380
    },
    {
      "epoch": 1.8343514634086504,
      "grad_norm": 2.953125,
      "learning_rate": 2.1586174131794358e-05,
      "loss": 0.8003,
      "step": 523390
    },
    {
      "epoch": 1.8343865109155462,
      "grad_norm": 3.546875,
      "learning_rate": 2.1585525103130656e-05,
      "loss": 0.9706,
      "step": 523400
    },
    {
      "epoch": 1.8344215584224415,
      "grad_norm": 2.984375,
      "learning_rate": 2.1584876074466954e-05,
      "loss": 0.8491,
      "step": 523410
    },
    {
      "epoch": 1.8344566059293372,
      "grad_norm": 3.296875,
      "learning_rate": 2.1584227045803252e-05,
      "loss": 0.851,
      "step": 523420
    },
    {
      "epoch": 1.8344916534362328,
      "grad_norm": 2.96875,
      "learning_rate": 2.158357801713955e-05,
      "loss": 0.8698,
      "step": 523430
    },
    {
      "epoch": 1.8345267009431283,
      "grad_norm": 2.875,
      "learning_rate": 2.1582928988475848e-05,
      "loss": 0.8183,
      "step": 523440
    },
    {
      "epoch": 1.834561748450024,
      "grad_norm": 3.21875,
      "learning_rate": 2.1582279959812146e-05,
      "loss": 0.8562,
      "step": 523450
    },
    {
      "epoch": 1.8345967959569196,
      "grad_norm": 2.9375,
      "learning_rate": 2.1581630931148444e-05,
      "loss": 0.8583,
      "step": 523460
    },
    {
      "epoch": 1.8346318434638151,
      "grad_norm": 3.609375,
      "learning_rate": 2.1580981902484742e-05,
      "loss": 0.8514,
      "step": 523470
    },
    {
      "epoch": 1.834666890970711,
      "grad_norm": 2.3125,
      "learning_rate": 2.158033287382104e-05,
      "loss": 0.7794,
      "step": 523480
    },
    {
      "epoch": 1.8347019384776064,
      "grad_norm": 2.8125,
      "learning_rate": 2.1579683845157338e-05,
      "loss": 0.8345,
      "step": 523490
    },
    {
      "epoch": 1.834736985984502,
      "grad_norm": 2.578125,
      "learning_rate": 2.1579034816493636e-05,
      "loss": 0.8799,
      "step": 523500
    },
    {
      "epoch": 1.8347720334913977,
      "grad_norm": 3.3125,
      "learning_rate": 2.1578385787829934e-05,
      "loss": 0.8428,
      "step": 523510
    },
    {
      "epoch": 1.834807080998293,
      "grad_norm": 3.015625,
      "learning_rate": 2.1577736759166235e-05,
      "loss": 0.778,
      "step": 523520
    },
    {
      "epoch": 1.8348421285051888,
      "grad_norm": 2.4375,
      "learning_rate": 2.1577087730502533e-05,
      "loss": 0.7337,
      "step": 523530
    },
    {
      "epoch": 1.8348771760120843,
      "grad_norm": 2.421875,
      "learning_rate": 2.157643870183883e-05,
      "loss": 0.8434,
      "step": 523540
    },
    {
      "epoch": 1.8349122235189799,
      "grad_norm": 3.078125,
      "learning_rate": 2.1575789673175126e-05,
      "loss": 0.7771,
      "step": 523550
    },
    {
      "epoch": 1.8349472710258756,
      "grad_norm": 2.796875,
      "learning_rate": 2.1575140644511424e-05,
      "loss": 0.7897,
      "step": 523560
    },
    {
      "epoch": 1.8349823185327712,
      "grad_norm": 3.125,
      "learning_rate": 2.1574491615847722e-05,
      "loss": 0.7185,
      "step": 523570
    },
    {
      "epoch": 1.8350173660396667,
      "grad_norm": 2.96875,
      "learning_rate": 2.157384258718402e-05,
      "loss": 0.7626,
      "step": 523580
    },
    {
      "epoch": 1.8350524135465625,
      "grad_norm": 3.046875,
      "learning_rate": 2.1573193558520318e-05,
      "loss": 0.7808,
      "step": 523590
    },
    {
      "epoch": 1.835087461053458,
      "grad_norm": 2.640625,
      "learning_rate": 2.1572544529856616e-05,
      "loss": 0.8923,
      "step": 523600
    },
    {
      "epoch": 1.8351225085603535,
      "grad_norm": 2.375,
      "learning_rate": 2.1571895501192914e-05,
      "loss": 0.733,
      "step": 523610
    },
    {
      "epoch": 1.8351575560672493,
      "grad_norm": 2.984375,
      "learning_rate": 2.1571246472529212e-05,
      "loss": 0.8071,
      "step": 523620
    },
    {
      "epoch": 1.8351926035741446,
      "grad_norm": 2.6875,
      "learning_rate": 2.157059744386551e-05,
      "loss": 0.7508,
      "step": 523630
    },
    {
      "epoch": 1.8352276510810404,
      "grad_norm": 3.015625,
      "learning_rate": 2.156994841520181e-05,
      "loss": 0.8333,
      "step": 523640
    },
    {
      "epoch": 1.835262698587936,
      "grad_norm": 3.21875,
      "learning_rate": 2.156929938653811e-05,
      "loss": 0.8413,
      "step": 523650
    },
    {
      "epoch": 1.8352977460948314,
      "grad_norm": 3.609375,
      "learning_rate": 2.1568650357874407e-05,
      "loss": 0.9044,
      "step": 523660
    },
    {
      "epoch": 1.8353327936017272,
      "grad_norm": 3.203125,
      "learning_rate": 2.1568001329210705e-05,
      "loss": 0.9171,
      "step": 523670
    },
    {
      "epoch": 1.8353678411086227,
      "grad_norm": 2.5,
      "learning_rate": 2.1567352300547003e-05,
      "loss": 0.7825,
      "step": 523680
    },
    {
      "epoch": 1.8354028886155183,
      "grad_norm": 3.1875,
      "learning_rate": 2.15667032718833e-05,
      "loss": 0.8472,
      "step": 523690
    },
    {
      "epoch": 1.835437936122414,
      "grad_norm": 2.828125,
      "learning_rate": 2.15660542432196e-05,
      "loss": 0.7924,
      "step": 523700
    },
    {
      "epoch": 1.8354729836293096,
      "grad_norm": 2.8125,
      "learning_rate": 2.1565405214555897e-05,
      "loss": 0.8942,
      "step": 523710
    },
    {
      "epoch": 1.835508031136205,
      "grad_norm": 3.390625,
      "learning_rate": 2.1564756185892195e-05,
      "loss": 0.9168,
      "step": 523720
    },
    {
      "epoch": 1.8355430786431008,
      "grad_norm": 2.84375,
      "learning_rate": 2.1564107157228493e-05,
      "loss": 0.9073,
      "step": 523730
    },
    {
      "epoch": 1.8355781261499962,
      "grad_norm": 2.90625,
      "learning_rate": 2.1563458128564788e-05,
      "loss": 0.79,
      "step": 523740
    },
    {
      "epoch": 1.835613173656892,
      "grad_norm": 2.859375,
      "learning_rate": 2.156280909990109e-05,
      "loss": 0.7856,
      "step": 523750
    },
    {
      "epoch": 1.8356482211637877,
      "grad_norm": 2.984375,
      "learning_rate": 2.1562160071237387e-05,
      "loss": 0.7996,
      "step": 523760
    },
    {
      "epoch": 1.835683268670683,
      "grad_norm": 2.671875,
      "learning_rate": 2.1561511042573685e-05,
      "loss": 0.8922,
      "step": 523770
    },
    {
      "epoch": 1.8357183161775787,
      "grad_norm": 2.9375,
      "learning_rate": 2.1560862013909983e-05,
      "loss": 0.9068,
      "step": 523780
    },
    {
      "epoch": 1.8357533636844743,
      "grad_norm": 3.328125,
      "learning_rate": 2.156021298524628e-05,
      "loss": 0.8667,
      "step": 523790
    },
    {
      "epoch": 1.8357884111913698,
      "grad_norm": 2.828125,
      "learning_rate": 2.155956395658258e-05,
      "loss": 0.8605,
      "step": 523800
    },
    {
      "epoch": 1.8358234586982656,
      "grad_norm": 2.734375,
      "learning_rate": 2.1558914927918877e-05,
      "loss": 0.7832,
      "step": 523810
    },
    {
      "epoch": 1.8358585062051611,
      "grad_norm": 2.9375,
      "learning_rate": 2.1558265899255175e-05,
      "loss": 0.7772,
      "step": 523820
    },
    {
      "epoch": 1.8358935537120566,
      "grad_norm": 2.484375,
      "learning_rate": 2.1557616870591473e-05,
      "loss": 0.8458,
      "step": 523830
    },
    {
      "epoch": 1.8359286012189524,
      "grad_norm": 2.734375,
      "learning_rate": 2.155696784192777e-05,
      "loss": 0.8563,
      "step": 523840
    },
    {
      "epoch": 1.835963648725848,
      "grad_norm": 2.390625,
      "learning_rate": 2.155631881326407e-05,
      "loss": 0.791,
      "step": 523850
    },
    {
      "epoch": 1.8359986962327435,
      "grad_norm": 2.96875,
      "learning_rate": 2.1555669784600367e-05,
      "loss": 0.8927,
      "step": 523860
    },
    {
      "epoch": 1.8360337437396392,
      "grad_norm": 2.71875,
      "learning_rate": 2.1555020755936665e-05,
      "loss": 0.8604,
      "step": 523870
    },
    {
      "epoch": 1.8360687912465345,
      "grad_norm": 2.828125,
      "learning_rate": 2.1554371727272963e-05,
      "loss": 0.8231,
      "step": 523880
    },
    {
      "epoch": 1.8361038387534303,
      "grad_norm": 3.125,
      "learning_rate": 2.1553722698609265e-05,
      "loss": 0.8127,
      "step": 523890
    },
    {
      "epoch": 1.8361388862603258,
      "grad_norm": 2.890625,
      "learning_rate": 2.1553073669945563e-05,
      "loss": 0.8229,
      "step": 523900
    },
    {
      "epoch": 1.8361739337672214,
      "grad_norm": 3.03125,
      "learning_rate": 2.155242464128186e-05,
      "loss": 0.8808,
      "step": 523910
    },
    {
      "epoch": 1.8362089812741171,
      "grad_norm": 2.796875,
      "learning_rate": 2.155177561261816e-05,
      "loss": 0.8778,
      "step": 523920
    },
    {
      "epoch": 1.8362440287810127,
      "grad_norm": 2.75,
      "learning_rate": 2.1551126583954453e-05,
      "loss": 0.7924,
      "step": 523930
    },
    {
      "epoch": 1.8362790762879082,
      "grad_norm": 3.03125,
      "learning_rate": 2.155047755529075e-05,
      "loss": 0.7985,
      "step": 523940
    },
    {
      "epoch": 1.836314123794804,
      "grad_norm": 2.765625,
      "learning_rate": 2.154982852662705e-05,
      "loss": 0.9108,
      "step": 523950
    },
    {
      "epoch": 1.8363491713016995,
      "grad_norm": 3.03125,
      "learning_rate": 2.1549179497963347e-05,
      "loss": 0.8031,
      "step": 523960
    },
    {
      "epoch": 1.836384218808595,
      "grad_norm": 3.015625,
      "learning_rate": 2.1548530469299645e-05,
      "loss": 0.8635,
      "step": 523970
    },
    {
      "epoch": 1.8364192663154908,
      "grad_norm": 3.171875,
      "learning_rate": 2.1547881440635943e-05,
      "loss": 0.8876,
      "step": 523980
    },
    {
      "epoch": 1.836454313822386,
      "grad_norm": 2.65625,
      "learning_rate": 2.154723241197224e-05,
      "loss": 0.7993,
      "step": 523990
    },
    {
      "epoch": 1.8364893613292819,
      "grad_norm": 2.84375,
      "learning_rate": 2.1546583383308543e-05,
      "loss": 0.8099,
      "step": 524000
    },
    {
      "epoch": 1.8365244088361774,
      "grad_norm": 3.296875,
      "learning_rate": 2.154593435464484e-05,
      "loss": 0.8524,
      "step": 524010
    },
    {
      "epoch": 1.836559456343073,
      "grad_norm": 2.8125,
      "learning_rate": 2.154528532598114e-05,
      "loss": 0.7983,
      "step": 524020
    },
    {
      "epoch": 1.8365945038499687,
      "grad_norm": 3.203125,
      "learning_rate": 2.1544636297317437e-05,
      "loss": 0.9165,
      "step": 524030
    },
    {
      "epoch": 1.8366295513568642,
      "grad_norm": 3.078125,
      "learning_rate": 2.1543987268653735e-05,
      "loss": 0.944,
      "step": 524040
    },
    {
      "epoch": 1.8366645988637598,
      "grad_norm": 3.109375,
      "learning_rate": 2.1543338239990033e-05,
      "loss": 0.9003,
      "step": 524050
    },
    {
      "epoch": 1.8366996463706555,
      "grad_norm": 3.1875,
      "learning_rate": 2.154268921132633e-05,
      "loss": 0.8508,
      "step": 524060
    },
    {
      "epoch": 1.836734693877551,
      "grad_norm": 2.5,
      "learning_rate": 2.154204018266263e-05,
      "loss": 0.8506,
      "step": 524070
    },
    {
      "epoch": 1.8367697413844466,
      "grad_norm": 2.9375,
      "learning_rate": 2.1541391153998927e-05,
      "loss": 0.9032,
      "step": 524080
    },
    {
      "epoch": 1.8368047888913424,
      "grad_norm": 3.0625,
      "learning_rate": 2.1540742125335225e-05,
      "loss": 0.8789,
      "step": 524090
    },
    {
      "epoch": 1.8368398363982377,
      "grad_norm": 3.0625,
      "learning_rate": 2.1540093096671523e-05,
      "loss": 0.8505,
      "step": 524100
    },
    {
      "epoch": 1.8368748839051334,
      "grad_norm": 3.3125,
      "learning_rate": 2.153944406800782e-05,
      "loss": 0.903,
      "step": 524110
    },
    {
      "epoch": 1.836909931412029,
      "grad_norm": 2.96875,
      "learning_rate": 2.153879503934412e-05,
      "loss": 0.9278,
      "step": 524120
    },
    {
      "epoch": 1.8369449789189245,
      "grad_norm": 2.78125,
      "learning_rate": 2.1538146010680417e-05,
      "loss": 0.834,
      "step": 524130
    },
    {
      "epoch": 1.8369800264258203,
      "grad_norm": 2.59375,
      "learning_rate": 2.1537496982016715e-05,
      "loss": 0.8128,
      "step": 524140
    },
    {
      "epoch": 1.8370150739327158,
      "grad_norm": 3.0,
      "learning_rate": 2.1536847953353013e-05,
      "loss": 0.7538,
      "step": 524150
    },
    {
      "epoch": 1.8370501214396113,
      "grad_norm": 2.875,
      "learning_rate": 2.153619892468931e-05,
      "loss": 0.7974,
      "step": 524160
    },
    {
      "epoch": 1.837085168946507,
      "grad_norm": 2.796875,
      "learning_rate": 2.153554989602561e-05,
      "loss": 0.8737,
      "step": 524170
    },
    {
      "epoch": 1.8371202164534026,
      "grad_norm": 2.640625,
      "learning_rate": 2.1534900867361907e-05,
      "loss": 0.768,
      "step": 524180
    },
    {
      "epoch": 1.8371552639602982,
      "grad_norm": 2.9375,
      "learning_rate": 2.1534251838698205e-05,
      "loss": 0.8104,
      "step": 524190
    },
    {
      "epoch": 1.837190311467194,
      "grad_norm": 3.59375,
      "learning_rate": 2.1533602810034503e-05,
      "loss": 0.8682,
      "step": 524200
    },
    {
      "epoch": 1.8372253589740892,
      "grad_norm": 2.765625,
      "learning_rate": 2.15329537813708e-05,
      "loss": 0.7741,
      "step": 524210
    },
    {
      "epoch": 1.837260406480985,
      "grad_norm": 2.875,
      "learning_rate": 2.15323047527071e-05,
      "loss": 0.8415,
      "step": 524220
    },
    {
      "epoch": 1.8372954539878805,
      "grad_norm": 3.15625,
      "learning_rate": 2.1531655724043397e-05,
      "loss": 0.8212,
      "step": 524230
    },
    {
      "epoch": 1.837330501494776,
      "grad_norm": 3.171875,
      "learning_rate": 2.1531006695379695e-05,
      "loss": 0.7983,
      "step": 524240
    },
    {
      "epoch": 1.8373655490016718,
      "grad_norm": 3.09375,
      "learning_rate": 2.1530357666715996e-05,
      "loss": 0.8591,
      "step": 524250
    },
    {
      "epoch": 1.8374005965085674,
      "grad_norm": 2.6875,
      "learning_rate": 2.1529708638052294e-05,
      "loss": 0.7761,
      "step": 524260
    },
    {
      "epoch": 1.8374356440154629,
      "grad_norm": 3.015625,
      "learning_rate": 2.1529059609388592e-05,
      "loss": 0.8225,
      "step": 524270
    },
    {
      "epoch": 1.8374706915223586,
      "grad_norm": 2.828125,
      "learning_rate": 2.152841058072489e-05,
      "loss": 0.8441,
      "step": 524280
    },
    {
      "epoch": 1.8375057390292542,
      "grad_norm": 2.640625,
      "learning_rate": 2.1527761552061188e-05,
      "loss": 0.795,
      "step": 524290
    },
    {
      "epoch": 1.8375407865361497,
      "grad_norm": 3.296875,
      "learning_rate": 2.1527112523397486e-05,
      "loss": 0.8597,
      "step": 524300
    },
    {
      "epoch": 1.8375758340430455,
      "grad_norm": 3.109375,
      "learning_rate": 2.152646349473378e-05,
      "loss": 0.9594,
      "step": 524310
    },
    {
      "epoch": 1.8376108815499408,
      "grad_norm": 2.90625,
      "learning_rate": 2.152581446607008e-05,
      "loss": 0.8527,
      "step": 524320
    },
    {
      "epoch": 1.8376459290568365,
      "grad_norm": 3.03125,
      "learning_rate": 2.1525165437406377e-05,
      "loss": 0.8152,
      "step": 524330
    },
    {
      "epoch": 1.8376809765637323,
      "grad_norm": 2.734375,
      "learning_rate": 2.1524516408742675e-05,
      "loss": 0.8203,
      "step": 524340
    },
    {
      "epoch": 1.8377160240706276,
      "grad_norm": 2.71875,
      "learning_rate": 2.1523867380078973e-05,
      "loss": 0.8749,
      "step": 524350
    },
    {
      "epoch": 1.8377510715775234,
      "grad_norm": 3.34375,
      "learning_rate": 2.152321835141527e-05,
      "loss": 0.8242,
      "step": 524360
    },
    {
      "epoch": 1.837786119084419,
      "grad_norm": 3.328125,
      "learning_rate": 2.1522569322751572e-05,
      "loss": 0.8447,
      "step": 524370
    },
    {
      "epoch": 1.8378211665913144,
      "grad_norm": 3.15625,
      "learning_rate": 2.152192029408787e-05,
      "loss": 0.7573,
      "step": 524380
    },
    {
      "epoch": 1.8378562140982102,
      "grad_norm": 2.9375,
      "learning_rate": 2.1521271265424168e-05,
      "loss": 0.8193,
      "step": 524390
    },
    {
      "epoch": 1.8378912616051057,
      "grad_norm": 3.25,
      "learning_rate": 2.1520622236760466e-05,
      "loss": 0.8916,
      "step": 524400
    },
    {
      "epoch": 1.8379263091120013,
      "grad_norm": 2.578125,
      "learning_rate": 2.1519973208096764e-05,
      "loss": 0.8937,
      "step": 524410
    },
    {
      "epoch": 1.837961356618897,
      "grad_norm": 3.046875,
      "learning_rate": 2.1519324179433062e-05,
      "loss": 0.81,
      "step": 524420
    },
    {
      "epoch": 1.8379964041257923,
      "grad_norm": 2.96875,
      "learning_rate": 2.151867515076936e-05,
      "loss": 0.7977,
      "step": 524430
    },
    {
      "epoch": 1.838031451632688,
      "grad_norm": 2.671875,
      "learning_rate": 2.1518026122105658e-05,
      "loss": 0.8027,
      "step": 524440
    },
    {
      "epoch": 1.8380664991395839,
      "grad_norm": 2.796875,
      "learning_rate": 2.1517377093441956e-05,
      "loss": 0.791,
      "step": 524450
    },
    {
      "epoch": 1.8381015466464792,
      "grad_norm": 3.03125,
      "learning_rate": 2.1516728064778254e-05,
      "loss": 0.7856,
      "step": 524460
    },
    {
      "epoch": 1.838136594153375,
      "grad_norm": 3.4375,
      "learning_rate": 2.1516079036114552e-05,
      "loss": 0.8781,
      "step": 524470
    },
    {
      "epoch": 1.8381716416602705,
      "grad_norm": 3.015625,
      "learning_rate": 2.151543000745085e-05,
      "loss": 0.8403,
      "step": 524480
    },
    {
      "epoch": 1.838206689167166,
      "grad_norm": 3.0625,
      "learning_rate": 2.1514780978787148e-05,
      "loss": 0.8557,
      "step": 524490
    },
    {
      "epoch": 1.8382417366740618,
      "grad_norm": 3.0625,
      "learning_rate": 2.1514131950123446e-05,
      "loss": 0.8688,
      "step": 524500
    },
    {
      "epoch": 1.8382767841809573,
      "grad_norm": 3.03125,
      "learning_rate": 2.1513482921459744e-05,
      "loss": 0.7622,
      "step": 524510
    },
    {
      "epoch": 1.8383118316878528,
      "grad_norm": 2.859375,
      "learning_rate": 2.1512833892796042e-05,
      "loss": 0.8255,
      "step": 524520
    },
    {
      "epoch": 1.8383468791947486,
      "grad_norm": 2.609375,
      "learning_rate": 2.151218486413234e-05,
      "loss": 0.7732,
      "step": 524530
    },
    {
      "epoch": 1.8383819267016441,
      "grad_norm": 2.40625,
      "learning_rate": 2.1511535835468638e-05,
      "loss": 0.8232,
      "step": 524540
    },
    {
      "epoch": 1.8384169742085397,
      "grad_norm": 2.90625,
      "learning_rate": 2.1510886806804936e-05,
      "loss": 0.7674,
      "step": 524550
    },
    {
      "epoch": 1.8384520217154354,
      "grad_norm": 3.15625,
      "learning_rate": 2.1510237778141234e-05,
      "loss": 0.8511,
      "step": 524560
    },
    {
      "epoch": 1.8384870692223307,
      "grad_norm": 3.0,
      "learning_rate": 2.1509588749477532e-05,
      "loss": 0.7817,
      "step": 524570
    },
    {
      "epoch": 1.8385221167292265,
      "grad_norm": 2.90625,
      "learning_rate": 2.150893972081383e-05,
      "loss": 0.8141,
      "step": 524580
    },
    {
      "epoch": 1.838557164236122,
      "grad_norm": 2.53125,
      "learning_rate": 2.1508290692150128e-05,
      "loss": 0.769,
      "step": 524590
    },
    {
      "epoch": 1.8385922117430176,
      "grad_norm": 3.03125,
      "learning_rate": 2.1507641663486426e-05,
      "loss": 0.8403,
      "step": 524600
    },
    {
      "epoch": 1.8386272592499133,
      "grad_norm": 2.859375,
      "learning_rate": 2.1506992634822724e-05,
      "loss": 0.8754,
      "step": 524610
    },
    {
      "epoch": 1.8386623067568089,
      "grad_norm": 2.5,
      "learning_rate": 2.1506343606159026e-05,
      "loss": 0.8011,
      "step": 524620
    },
    {
      "epoch": 1.8386973542637044,
      "grad_norm": 2.859375,
      "learning_rate": 2.1505694577495324e-05,
      "loss": 0.733,
      "step": 524630
    },
    {
      "epoch": 1.8387324017706002,
      "grad_norm": 2.703125,
      "learning_rate": 2.150504554883162e-05,
      "loss": 0.83,
      "step": 524640
    },
    {
      "epoch": 1.8387674492774957,
      "grad_norm": 2.78125,
      "learning_rate": 2.150439652016792e-05,
      "loss": 0.9078,
      "step": 524650
    },
    {
      "epoch": 1.8388024967843912,
      "grad_norm": 2.921875,
      "learning_rate": 2.1503747491504218e-05,
      "loss": 0.7773,
      "step": 524660
    },
    {
      "epoch": 1.838837544291287,
      "grad_norm": 2.953125,
      "learning_rate": 2.1503098462840516e-05,
      "loss": 0.86,
      "step": 524670
    },
    {
      "epoch": 1.8388725917981823,
      "grad_norm": 2.734375,
      "learning_rate": 2.150244943417681e-05,
      "loss": 0.8002,
      "step": 524680
    },
    {
      "epoch": 1.838907639305078,
      "grad_norm": 2.609375,
      "learning_rate": 2.1501800405513108e-05,
      "loss": 0.8077,
      "step": 524690
    },
    {
      "epoch": 1.8389426868119736,
      "grad_norm": 2.875,
      "learning_rate": 2.1501151376849406e-05,
      "loss": 0.8314,
      "step": 524700
    },
    {
      "epoch": 1.8389777343188691,
      "grad_norm": 2.96875,
      "learning_rate": 2.1500502348185704e-05,
      "loss": 0.8437,
      "step": 524710
    },
    {
      "epoch": 1.8390127818257649,
      "grad_norm": 3.015625,
      "learning_rate": 2.1499853319522002e-05,
      "loss": 0.9373,
      "step": 524720
    },
    {
      "epoch": 1.8390478293326604,
      "grad_norm": 2.96875,
      "learning_rate": 2.1499204290858304e-05,
      "loss": 0.8158,
      "step": 524730
    },
    {
      "epoch": 1.839082876839556,
      "grad_norm": 3.078125,
      "learning_rate": 2.14985552621946e-05,
      "loss": 0.8262,
      "step": 524740
    },
    {
      "epoch": 1.8391179243464517,
      "grad_norm": 3.328125,
      "learning_rate": 2.14979062335309e-05,
      "loss": 0.9109,
      "step": 524750
    },
    {
      "epoch": 1.8391529718533473,
      "grad_norm": 2.828125,
      "learning_rate": 2.1497257204867198e-05,
      "loss": 0.8015,
      "step": 524760
    },
    {
      "epoch": 1.8391880193602428,
      "grad_norm": 2.890625,
      "learning_rate": 2.1496608176203496e-05,
      "loss": 0.7505,
      "step": 524770
    },
    {
      "epoch": 1.8392230668671385,
      "grad_norm": 3.234375,
      "learning_rate": 2.1495959147539794e-05,
      "loss": 0.8015,
      "step": 524780
    },
    {
      "epoch": 1.8392581143740339,
      "grad_norm": 2.5625,
      "learning_rate": 2.149531011887609e-05,
      "loss": 0.8456,
      "step": 524790
    },
    {
      "epoch": 1.8392931618809296,
      "grad_norm": 2.96875,
      "learning_rate": 2.149466109021239e-05,
      "loss": 0.8745,
      "step": 524800
    },
    {
      "epoch": 1.8393282093878252,
      "grad_norm": 2.84375,
      "learning_rate": 2.1494012061548688e-05,
      "loss": 0.796,
      "step": 524810
    },
    {
      "epoch": 1.8393632568947207,
      "grad_norm": 2.375,
      "learning_rate": 2.1493363032884986e-05,
      "loss": 0.7276,
      "step": 524820
    },
    {
      "epoch": 1.8393983044016164,
      "grad_norm": 2.90625,
      "learning_rate": 2.1492714004221284e-05,
      "loss": 0.7935,
      "step": 524830
    },
    {
      "epoch": 1.839433351908512,
      "grad_norm": 2.765625,
      "learning_rate": 2.149206497555758e-05,
      "loss": 0.8283,
      "step": 524840
    },
    {
      "epoch": 1.8394683994154075,
      "grad_norm": 2.984375,
      "learning_rate": 2.149141594689388e-05,
      "loss": 0.8833,
      "step": 524850
    },
    {
      "epoch": 1.8395034469223033,
      "grad_norm": 3.3125,
      "learning_rate": 2.1490766918230178e-05,
      "loss": 0.828,
      "step": 524860
    },
    {
      "epoch": 1.8395384944291988,
      "grad_norm": 2.5,
      "learning_rate": 2.1490117889566476e-05,
      "loss": 0.8923,
      "step": 524870
    },
    {
      "epoch": 1.8395735419360943,
      "grad_norm": 3.234375,
      "learning_rate": 2.1489468860902774e-05,
      "loss": 0.8343,
      "step": 524880
    },
    {
      "epoch": 1.83960858944299,
      "grad_norm": 2.921875,
      "learning_rate": 2.148881983223907e-05,
      "loss": 0.7884,
      "step": 524890
    },
    {
      "epoch": 1.8396436369498854,
      "grad_norm": 2.6875,
      "learning_rate": 2.148817080357537e-05,
      "loss": 0.8241,
      "step": 524900
    },
    {
      "epoch": 1.8396786844567812,
      "grad_norm": 2.359375,
      "learning_rate": 2.1487521774911668e-05,
      "loss": 0.7416,
      "step": 524910
    },
    {
      "epoch": 1.8397137319636767,
      "grad_norm": 2.703125,
      "learning_rate": 2.1486872746247966e-05,
      "loss": 0.8391,
      "step": 524920
    },
    {
      "epoch": 1.8397487794705722,
      "grad_norm": 2.78125,
      "learning_rate": 2.1486223717584264e-05,
      "loss": 0.7811,
      "step": 524930
    },
    {
      "epoch": 1.839783826977468,
      "grad_norm": 2.71875,
      "learning_rate": 2.148557468892056e-05,
      "loss": 0.8,
      "step": 524940
    },
    {
      "epoch": 1.8398188744843635,
      "grad_norm": 2.8125,
      "learning_rate": 2.148492566025686e-05,
      "loss": 0.8307,
      "step": 524950
    },
    {
      "epoch": 1.839853921991259,
      "grad_norm": 2.890625,
      "learning_rate": 2.1484276631593158e-05,
      "loss": 0.851,
      "step": 524960
    },
    {
      "epoch": 1.8398889694981548,
      "grad_norm": 2.609375,
      "learning_rate": 2.1483627602929456e-05,
      "loss": 0.7771,
      "step": 524970
    },
    {
      "epoch": 1.8399240170050504,
      "grad_norm": 2.734375,
      "learning_rate": 2.1482978574265754e-05,
      "loss": 0.8257,
      "step": 524980
    },
    {
      "epoch": 1.839959064511946,
      "grad_norm": 2.484375,
      "learning_rate": 2.1482329545602055e-05,
      "loss": 0.8135,
      "step": 524990
    },
    {
      "epoch": 1.8399941120188417,
      "grad_norm": 2.46875,
      "learning_rate": 2.1481680516938353e-05,
      "loss": 0.8062,
      "step": 525000
    },
    {
      "epoch": 1.8399941120188417,
      "eval_loss": 0.776016116142273,
      "eval_runtime": 562.0711,
      "eval_samples_per_second": 676.847,
      "eval_steps_per_second": 56.404,
      "step": 525000
    },
    {
      "epoch": 1.840029159525737,
      "grad_norm": 2.40625,
      "learning_rate": 2.148103148827465e-05,
      "loss": 0.7502,
      "step": 525010
    },
    {
      "epoch": 1.8400642070326327,
      "grad_norm": 2.6875,
      "learning_rate": 2.148038245961095e-05,
      "loss": 0.766,
      "step": 525020
    },
    {
      "epoch": 1.8400992545395285,
      "grad_norm": 3.125,
      "learning_rate": 2.1479733430947247e-05,
      "loss": 0.827,
      "step": 525030
    },
    {
      "epoch": 1.8401343020464238,
      "grad_norm": 3.1875,
      "learning_rate": 2.1479084402283545e-05,
      "loss": 0.8161,
      "step": 525040
    },
    {
      "epoch": 1.8401693495533196,
      "grad_norm": 2.984375,
      "learning_rate": 2.1478435373619843e-05,
      "loss": 0.8489,
      "step": 525050
    },
    {
      "epoch": 1.840204397060215,
      "grad_norm": 3.25,
      "learning_rate": 2.1477786344956138e-05,
      "loss": 0.8675,
      "step": 525060
    },
    {
      "epoch": 1.8402394445671106,
      "grad_norm": 3.265625,
      "learning_rate": 2.1477137316292436e-05,
      "loss": 0.8528,
      "step": 525070
    },
    {
      "epoch": 1.8402744920740064,
      "grad_norm": 2.71875,
      "learning_rate": 2.1476488287628734e-05,
      "loss": 0.7963,
      "step": 525080
    },
    {
      "epoch": 1.840309539580902,
      "grad_norm": 2.53125,
      "learning_rate": 2.147583925896503e-05,
      "loss": 0.8303,
      "step": 525090
    },
    {
      "epoch": 1.8403445870877975,
      "grad_norm": 3.0625,
      "learning_rate": 2.1475190230301333e-05,
      "loss": 0.861,
      "step": 525100
    },
    {
      "epoch": 1.8403796345946932,
      "grad_norm": 3.046875,
      "learning_rate": 2.147454120163763e-05,
      "loss": 0.8267,
      "step": 525110
    },
    {
      "epoch": 1.8404146821015885,
      "grad_norm": 3.0625,
      "learning_rate": 2.147389217297393e-05,
      "loss": 0.8373,
      "step": 525120
    },
    {
      "epoch": 1.8404497296084843,
      "grad_norm": 3.078125,
      "learning_rate": 2.1473243144310227e-05,
      "loss": 0.8763,
      "step": 525130
    },
    {
      "epoch": 1.84048477711538,
      "grad_norm": 2.796875,
      "learning_rate": 2.1472594115646525e-05,
      "loss": 0.8186,
      "step": 525140
    },
    {
      "epoch": 1.8405198246222754,
      "grad_norm": 2.859375,
      "learning_rate": 2.1471945086982823e-05,
      "loss": 0.8305,
      "step": 525150
    },
    {
      "epoch": 1.8405548721291711,
      "grad_norm": 2.65625,
      "learning_rate": 2.147129605831912e-05,
      "loss": 0.8212,
      "step": 525160
    },
    {
      "epoch": 1.8405899196360667,
      "grad_norm": 2.640625,
      "learning_rate": 2.147064702965542e-05,
      "loss": 0.8626,
      "step": 525170
    },
    {
      "epoch": 1.8406249671429622,
      "grad_norm": 3.015625,
      "learning_rate": 2.1469998000991717e-05,
      "loss": 0.8184,
      "step": 525180
    },
    {
      "epoch": 1.840660014649858,
      "grad_norm": 2.859375,
      "learning_rate": 2.1469348972328015e-05,
      "loss": 0.8825,
      "step": 525190
    },
    {
      "epoch": 1.8406950621567535,
      "grad_norm": 3.109375,
      "learning_rate": 2.1468699943664313e-05,
      "loss": 0.7995,
      "step": 525200
    },
    {
      "epoch": 1.840730109663649,
      "grad_norm": 2.609375,
      "learning_rate": 2.146805091500061e-05,
      "loss": 0.7766,
      "step": 525210
    },
    {
      "epoch": 1.8407651571705448,
      "grad_norm": 2.8125,
      "learning_rate": 2.146740188633691e-05,
      "loss": 0.748,
      "step": 525220
    },
    {
      "epoch": 1.8408002046774403,
      "grad_norm": 2.828125,
      "learning_rate": 2.1466752857673207e-05,
      "loss": 0.8286,
      "step": 525230
    },
    {
      "epoch": 1.8408352521843359,
      "grad_norm": 2.890625,
      "learning_rate": 2.146610382900951e-05,
      "loss": 0.817,
      "step": 525240
    },
    {
      "epoch": 1.8408702996912316,
      "grad_norm": 2.6875,
      "learning_rate": 2.1465454800345803e-05,
      "loss": 0.8848,
      "step": 525250
    },
    {
      "epoch": 1.840905347198127,
      "grad_norm": 3.046875,
      "learning_rate": 2.14648057716821e-05,
      "loss": 0.8307,
      "step": 525260
    },
    {
      "epoch": 1.8409403947050227,
      "grad_norm": 2.921875,
      "learning_rate": 2.14641567430184e-05,
      "loss": 0.858,
      "step": 525270
    },
    {
      "epoch": 1.8409754422119182,
      "grad_norm": 3.25,
      "learning_rate": 2.1463507714354697e-05,
      "loss": 0.8376,
      "step": 525280
    },
    {
      "epoch": 1.8410104897188138,
      "grad_norm": 2.828125,
      "learning_rate": 2.1462858685690995e-05,
      "loss": 0.7722,
      "step": 525290
    },
    {
      "epoch": 1.8410455372257095,
      "grad_norm": 3.03125,
      "learning_rate": 2.1462209657027293e-05,
      "loss": 0.8117,
      "step": 525300
    },
    {
      "epoch": 1.841080584732605,
      "grad_norm": 2.765625,
      "learning_rate": 2.146156062836359e-05,
      "loss": 0.852,
      "step": 525310
    },
    {
      "epoch": 1.8411156322395006,
      "grad_norm": 3.046875,
      "learning_rate": 2.146091159969989e-05,
      "loss": 0.8608,
      "step": 525320
    },
    {
      "epoch": 1.8411506797463963,
      "grad_norm": 2.703125,
      "learning_rate": 2.1460262571036187e-05,
      "loss": 0.9663,
      "step": 525330
    },
    {
      "epoch": 1.8411857272532919,
      "grad_norm": 2.59375,
      "learning_rate": 2.1459613542372485e-05,
      "loss": 0.8837,
      "step": 525340
    },
    {
      "epoch": 1.8412207747601874,
      "grad_norm": 2.84375,
      "learning_rate": 2.1458964513708786e-05,
      "loss": 0.8996,
      "step": 525350
    },
    {
      "epoch": 1.8412558222670832,
      "grad_norm": 3.046875,
      "learning_rate": 2.1458315485045084e-05,
      "loss": 0.8664,
      "step": 525360
    },
    {
      "epoch": 1.8412908697739785,
      "grad_norm": 2.671875,
      "learning_rate": 2.1457666456381382e-05,
      "loss": 0.8467,
      "step": 525370
    },
    {
      "epoch": 1.8413259172808742,
      "grad_norm": 2.875,
      "learning_rate": 2.145701742771768e-05,
      "loss": 0.7642,
      "step": 525380
    },
    {
      "epoch": 1.8413609647877698,
      "grad_norm": 2.75,
      "learning_rate": 2.145636839905398e-05,
      "loss": 0.7687,
      "step": 525390
    },
    {
      "epoch": 1.8413960122946653,
      "grad_norm": 3.25,
      "learning_rate": 2.1455719370390276e-05,
      "loss": 0.7957,
      "step": 525400
    },
    {
      "epoch": 1.841431059801561,
      "grad_norm": 2.421875,
      "learning_rate": 2.1455070341726574e-05,
      "loss": 0.8738,
      "step": 525410
    },
    {
      "epoch": 1.8414661073084566,
      "grad_norm": 2.421875,
      "learning_rate": 2.1454421313062872e-05,
      "loss": 0.7887,
      "step": 525420
    },
    {
      "epoch": 1.8415011548153521,
      "grad_norm": 3.34375,
      "learning_rate": 2.1453772284399167e-05,
      "loss": 0.8692,
      "step": 525430
    },
    {
      "epoch": 1.841536202322248,
      "grad_norm": 2.5,
      "learning_rate": 2.1453123255735465e-05,
      "loss": 0.8209,
      "step": 525440
    },
    {
      "epoch": 1.8415712498291434,
      "grad_norm": 2.96875,
      "learning_rate": 2.1452474227071763e-05,
      "loss": 0.9013,
      "step": 525450
    },
    {
      "epoch": 1.841606297336039,
      "grad_norm": 2.828125,
      "learning_rate": 2.145182519840806e-05,
      "loss": 0.7534,
      "step": 525460
    },
    {
      "epoch": 1.8416413448429347,
      "grad_norm": 3.265625,
      "learning_rate": 2.1451176169744362e-05,
      "loss": 0.7654,
      "step": 525470
    },
    {
      "epoch": 1.84167639234983,
      "grad_norm": 2.734375,
      "learning_rate": 2.145052714108066e-05,
      "loss": 0.8488,
      "step": 525480
    },
    {
      "epoch": 1.8417114398567258,
      "grad_norm": 2.90625,
      "learning_rate": 2.144987811241696e-05,
      "loss": 0.7632,
      "step": 525490
    },
    {
      "epoch": 1.8417464873636213,
      "grad_norm": 2.515625,
      "learning_rate": 2.1449229083753256e-05,
      "loss": 0.8539,
      "step": 525500
    },
    {
      "epoch": 1.8417815348705169,
      "grad_norm": 3.015625,
      "learning_rate": 2.1448580055089554e-05,
      "loss": 0.8532,
      "step": 525510
    },
    {
      "epoch": 1.8418165823774126,
      "grad_norm": 2.890625,
      "learning_rate": 2.1447931026425852e-05,
      "loss": 0.8225,
      "step": 525520
    },
    {
      "epoch": 1.8418516298843082,
      "grad_norm": 2.453125,
      "learning_rate": 2.144728199776215e-05,
      "loss": 0.7251,
      "step": 525530
    },
    {
      "epoch": 1.8418866773912037,
      "grad_norm": 2.734375,
      "learning_rate": 2.144663296909845e-05,
      "loss": 0.8754,
      "step": 525540
    },
    {
      "epoch": 1.8419217248980995,
      "grad_norm": 2.5,
      "learning_rate": 2.1445983940434746e-05,
      "loss": 0.774,
      "step": 525550
    },
    {
      "epoch": 1.841956772404995,
      "grad_norm": 2.984375,
      "learning_rate": 2.1445334911771044e-05,
      "loss": 0.8798,
      "step": 525560
    },
    {
      "epoch": 1.8419918199118905,
      "grad_norm": 3.3125,
      "learning_rate": 2.1444685883107342e-05,
      "loss": 0.8823,
      "step": 525570
    },
    {
      "epoch": 1.8420268674187863,
      "grad_norm": 2.59375,
      "learning_rate": 2.144403685444364e-05,
      "loss": 0.8726,
      "step": 525580
    },
    {
      "epoch": 1.8420619149256816,
      "grad_norm": 2.859375,
      "learning_rate": 2.144338782577994e-05,
      "loss": 0.816,
      "step": 525590
    },
    {
      "epoch": 1.8420969624325774,
      "grad_norm": 3.265625,
      "learning_rate": 2.1442738797116236e-05,
      "loss": 0.8561,
      "step": 525600
    },
    {
      "epoch": 1.842132009939473,
      "grad_norm": 2.875,
      "learning_rate": 2.1442089768452538e-05,
      "loss": 0.8499,
      "step": 525610
    },
    {
      "epoch": 1.8421670574463684,
      "grad_norm": 2.484375,
      "learning_rate": 2.1441440739788832e-05,
      "loss": 0.7223,
      "step": 525620
    },
    {
      "epoch": 1.8422021049532642,
      "grad_norm": 2.40625,
      "learning_rate": 2.144079171112513e-05,
      "loss": 0.8994,
      "step": 525630
    },
    {
      "epoch": 1.8422371524601597,
      "grad_norm": 2.890625,
      "learning_rate": 2.144014268246143e-05,
      "loss": 0.803,
      "step": 525640
    },
    {
      "epoch": 1.8422721999670553,
      "grad_norm": 3.21875,
      "learning_rate": 2.1439493653797726e-05,
      "loss": 0.7715,
      "step": 525650
    },
    {
      "epoch": 1.842307247473951,
      "grad_norm": 3.296875,
      "learning_rate": 2.1438844625134024e-05,
      "loss": 0.8631,
      "step": 525660
    },
    {
      "epoch": 1.8423422949808466,
      "grad_norm": 2.40625,
      "learning_rate": 2.1438195596470322e-05,
      "loss": 0.7302,
      "step": 525670
    },
    {
      "epoch": 1.842377342487742,
      "grad_norm": 3.03125,
      "learning_rate": 2.143754656780662e-05,
      "loss": 0.8733,
      "step": 525680
    },
    {
      "epoch": 1.8424123899946379,
      "grad_norm": 3.265625,
      "learning_rate": 2.143689753914292e-05,
      "loss": 0.8388,
      "step": 525690
    },
    {
      "epoch": 1.8424474375015332,
      "grad_norm": 2.828125,
      "learning_rate": 2.1436248510479216e-05,
      "loss": 0.828,
      "step": 525700
    },
    {
      "epoch": 1.842482485008429,
      "grad_norm": 2.875,
      "learning_rate": 2.1435599481815514e-05,
      "loss": 0.8466,
      "step": 525710
    },
    {
      "epoch": 1.8425175325153247,
      "grad_norm": 2.71875,
      "learning_rate": 2.1434950453151816e-05,
      "loss": 0.8222,
      "step": 525720
    },
    {
      "epoch": 1.84255258002222,
      "grad_norm": 2.46875,
      "learning_rate": 2.1434301424488114e-05,
      "loss": 0.7375,
      "step": 525730
    },
    {
      "epoch": 1.8425876275291158,
      "grad_norm": 2.6875,
      "learning_rate": 2.1433652395824412e-05,
      "loss": 0.7427,
      "step": 525740
    },
    {
      "epoch": 1.8426226750360113,
      "grad_norm": 3.140625,
      "learning_rate": 2.143300336716071e-05,
      "loss": 0.7856,
      "step": 525750
    },
    {
      "epoch": 1.8426577225429068,
      "grad_norm": 3.5,
      "learning_rate": 2.1432354338497008e-05,
      "loss": 0.8391,
      "step": 525760
    },
    {
      "epoch": 1.8426927700498026,
      "grad_norm": 2.96875,
      "learning_rate": 2.1431705309833306e-05,
      "loss": 0.8717,
      "step": 525770
    },
    {
      "epoch": 1.8427278175566981,
      "grad_norm": 2.15625,
      "learning_rate": 2.1431056281169604e-05,
      "loss": 0.8577,
      "step": 525780
    },
    {
      "epoch": 1.8427628650635937,
      "grad_norm": 3.421875,
      "learning_rate": 2.1430407252505902e-05,
      "loss": 0.8983,
      "step": 525790
    },
    {
      "epoch": 1.8427979125704894,
      "grad_norm": 2.96875,
      "learning_rate": 2.14297582238422e-05,
      "loss": 0.8098,
      "step": 525800
    },
    {
      "epoch": 1.8428329600773847,
      "grad_norm": 2.6875,
      "learning_rate": 2.1429109195178494e-05,
      "loss": 0.7642,
      "step": 525810
    },
    {
      "epoch": 1.8428680075842805,
      "grad_norm": 2.671875,
      "learning_rate": 2.1428460166514792e-05,
      "loss": 0.7748,
      "step": 525820
    },
    {
      "epoch": 1.8429030550911762,
      "grad_norm": 3.625,
      "learning_rate": 2.1427811137851094e-05,
      "loss": 0.8609,
      "step": 525830
    },
    {
      "epoch": 1.8429381025980716,
      "grad_norm": 2.75,
      "learning_rate": 2.1427162109187392e-05,
      "loss": 0.7811,
      "step": 525840
    },
    {
      "epoch": 1.8429731501049673,
      "grad_norm": 2.96875,
      "learning_rate": 2.142651308052369e-05,
      "loss": 0.8284,
      "step": 525850
    },
    {
      "epoch": 1.8430081976118629,
      "grad_norm": 3.6875,
      "learning_rate": 2.1425864051859988e-05,
      "loss": 0.7943,
      "step": 525860
    },
    {
      "epoch": 1.8430432451187584,
      "grad_norm": 2.90625,
      "learning_rate": 2.1425215023196286e-05,
      "loss": 0.7733,
      "step": 525870
    },
    {
      "epoch": 1.8430782926256541,
      "grad_norm": 2.828125,
      "learning_rate": 2.1424565994532584e-05,
      "loss": 0.8821,
      "step": 525880
    },
    {
      "epoch": 1.8431133401325497,
      "grad_norm": 3.09375,
      "learning_rate": 2.1423916965868882e-05,
      "loss": 0.8336,
      "step": 525890
    },
    {
      "epoch": 1.8431483876394452,
      "grad_norm": 2.875,
      "learning_rate": 2.142326793720518e-05,
      "loss": 0.7442,
      "step": 525900
    },
    {
      "epoch": 1.843183435146341,
      "grad_norm": 2.703125,
      "learning_rate": 2.1422618908541478e-05,
      "loss": 0.8227,
      "step": 525910
    },
    {
      "epoch": 1.8432184826532365,
      "grad_norm": 2.9375,
      "learning_rate": 2.1421969879877776e-05,
      "loss": 0.7887,
      "step": 525920
    },
    {
      "epoch": 1.843253530160132,
      "grad_norm": 2.984375,
      "learning_rate": 2.1421320851214074e-05,
      "loss": 0.9102,
      "step": 525930
    },
    {
      "epoch": 1.8432885776670278,
      "grad_norm": 2.578125,
      "learning_rate": 2.1420671822550372e-05,
      "loss": 0.8487,
      "step": 525940
    },
    {
      "epoch": 1.8433236251739231,
      "grad_norm": 2.421875,
      "learning_rate": 2.142002279388667e-05,
      "loss": 0.8061,
      "step": 525950
    },
    {
      "epoch": 1.8433586726808189,
      "grad_norm": 2.625,
      "learning_rate": 2.1419373765222968e-05,
      "loss": 0.8596,
      "step": 525960
    },
    {
      "epoch": 1.8433937201877144,
      "grad_norm": 3.09375,
      "learning_rate": 2.141872473655927e-05,
      "loss": 0.8304,
      "step": 525970
    },
    {
      "epoch": 1.84342876769461,
      "grad_norm": 2.71875,
      "learning_rate": 2.1418075707895567e-05,
      "loss": 0.8466,
      "step": 525980
    },
    {
      "epoch": 1.8434638152015057,
      "grad_norm": 3.109375,
      "learning_rate": 2.1417426679231865e-05,
      "loss": 0.8844,
      "step": 525990
    },
    {
      "epoch": 1.8434988627084012,
      "grad_norm": 2.6875,
      "learning_rate": 2.141677765056816e-05,
      "loss": 0.7349,
      "step": 526000
    },
    {
      "epoch": 1.8435339102152968,
      "grad_norm": 3.4375,
      "learning_rate": 2.1416128621904458e-05,
      "loss": 0.8201,
      "step": 526010
    },
    {
      "epoch": 1.8435689577221925,
      "grad_norm": 2.484375,
      "learning_rate": 2.1415479593240756e-05,
      "loss": 0.8199,
      "step": 526020
    },
    {
      "epoch": 1.843604005229088,
      "grad_norm": 3.0625,
      "learning_rate": 2.1414830564577054e-05,
      "loss": 0.8537,
      "step": 526030
    },
    {
      "epoch": 1.8436390527359836,
      "grad_norm": 2.921875,
      "learning_rate": 2.1414181535913352e-05,
      "loss": 0.9154,
      "step": 526040
    },
    {
      "epoch": 1.8436741002428794,
      "grad_norm": 3.078125,
      "learning_rate": 2.141353250724965e-05,
      "loss": 0.8365,
      "step": 526050
    },
    {
      "epoch": 1.8437091477497747,
      "grad_norm": 2.609375,
      "learning_rate": 2.1412883478585948e-05,
      "loss": 0.7166,
      "step": 526060
    },
    {
      "epoch": 1.8437441952566704,
      "grad_norm": 2.96875,
      "learning_rate": 2.1412234449922246e-05,
      "loss": 0.8546,
      "step": 526070
    },
    {
      "epoch": 1.843779242763566,
      "grad_norm": 3.109375,
      "learning_rate": 2.1411585421258544e-05,
      "loss": 0.846,
      "step": 526080
    },
    {
      "epoch": 1.8438142902704615,
      "grad_norm": 3.171875,
      "learning_rate": 2.1410936392594845e-05,
      "loss": 0.8246,
      "step": 526090
    },
    {
      "epoch": 1.8438493377773573,
      "grad_norm": 2.96875,
      "learning_rate": 2.1410287363931143e-05,
      "loss": 0.8602,
      "step": 526100
    },
    {
      "epoch": 1.8438843852842528,
      "grad_norm": 3.359375,
      "learning_rate": 2.140963833526744e-05,
      "loss": 0.8507,
      "step": 526110
    },
    {
      "epoch": 1.8439194327911483,
      "grad_norm": 2.734375,
      "learning_rate": 2.140898930660374e-05,
      "loss": 0.8235,
      "step": 526120
    },
    {
      "epoch": 1.843954480298044,
      "grad_norm": 2.875,
      "learning_rate": 2.1408340277940037e-05,
      "loss": 0.8142,
      "step": 526130
    },
    {
      "epoch": 1.8439895278049396,
      "grad_norm": 2.65625,
      "learning_rate": 2.1407691249276335e-05,
      "loss": 0.7917,
      "step": 526140
    },
    {
      "epoch": 1.8440245753118352,
      "grad_norm": 2.65625,
      "learning_rate": 2.1407042220612633e-05,
      "loss": 0.8414,
      "step": 526150
    },
    {
      "epoch": 1.844059622818731,
      "grad_norm": 3.0,
      "learning_rate": 2.140639319194893e-05,
      "loss": 0.8292,
      "step": 526160
    },
    {
      "epoch": 1.8440946703256262,
      "grad_norm": 2.90625,
      "learning_rate": 2.140574416328523e-05,
      "loss": 0.8868,
      "step": 526170
    },
    {
      "epoch": 1.844129717832522,
      "grad_norm": 2.921875,
      "learning_rate": 2.1405095134621527e-05,
      "loss": 0.8836,
      "step": 526180
    },
    {
      "epoch": 1.8441647653394175,
      "grad_norm": 2.359375,
      "learning_rate": 2.1404446105957822e-05,
      "loss": 0.858,
      "step": 526190
    },
    {
      "epoch": 1.844199812846313,
      "grad_norm": 3.21875,
      "learning_rate": 2.1403797077294123e-05,
      "loss": 0.8269,
      "step": 526200
    },
    {
      "epoch": 1.8442348603532088,
      "grad_norm": 3.1875,
      "learning_rate": 2.140314804863042e-05,
      "loss": 0.7584,
      "step": 526210
    },
    {
      "epoch": 1.8442699078601044,
      "grad_norm": 2.46875,
      "learning_rate": 2.140249901996672e-05,
      "loss": 0.8062,
      "step": 526220
    },
    {
      "epoch": 1.844304955367,
      "grad_norm": 3.109375,
      "learning_rate": 2.1401849991303017e-05,
      "loss": 0.77,
      "step": 526230
    },
    {
      "epoch": 1.8443400028738957,
      "grad_norm": 2.890625,
      "learning_rate": 2.1401200962639315e-05,
      "loss": 0.8674,
      "step": 526240
    },
    {
      "epoch": 1.8443750503807912,
      "grad_norm": 3.203125,
      "learning_rate": 2.1400551933975613e-05,
      "loss": 0.8217,
      "step": 526250
    },
    {
      "epoch": 1.8444100978876867,
      "grad_norm": 3.078125,
      "learning_rate": 2.139990290531191e-05,
      "loss": 0.8615,
      "step": 526260
    },
    {
      "epoch": 1.8444451453945825,
      "grad_norm": 3.015625,
      "learning_rate": 2.139925387664821e-05,
      "loss": 0.8237,
      "step": 526270
    },
    {
      "epoch": 1.8444801929014778,
      "grad_norm": 2.9375,
      "learning_rate": 2.1398604847984507e-05,
      "loss": 0.8795,
      "step": 526280
    },
    {
      "epoch": 1.8445152404083736,
      "grad_norm": 2.953125,
      "learning_rate": 2.1397955819320805e-05,
      "loss": 0.8619,
      "step": 526290
    },
    {
      "epoch": 1.844550287915269,
      "grad_norm": 2.984375,
      "learning_rate": 2.1397306790657103e-05,
      "loss": 0.8425,
      "step": 526300
    },
    {
      "epoch": 1.8445853354221646,
      "grad_norm": 2.71875,
      "learning_rate": 2.13966577619934e-05,
      "loss": 0.8417,
      "step": 526310
    },
    {
      "epoch": 1.8446203829290604,
      "grad_norm": 2.515625,
      "learning_rate": 2.13960087333297e-05,
      "loss": 0.7608,
      "step": 526320
    },
    {
      "epoch": 1.844655430435956,
      "grad_norm": 3.046875,
      "learning_rate": 2.1395359704665997e-05,
      "loss": 0.8432,
      "step": 526330
    },
    {
      "epoch": 1.8446904779428515,
      "grad_norm": 2.375,
      "learning_rate": 2.13947106760023e-05,
      "loss": 0.8302,
      "step": 526340
    },
    {
      "epoch": 1.8447255254497472,
      "grad_norm": 2.75,
      "learning_rate": 2.1394061647338597e-05,
      "loss": 0.7737,
      "step": 526350
    },
    {
      "epoch": 1.8447605729566428,
      "grad_norm": 2.859375,
      "learning_rate": 2.1393412618674895e-05,
      "loss": 0.8782,
      "step": 526360
    },
    {
      "epoch": 1.8447956204635383,
      "grad_norm": 2.875,
      "learning_rate": 2.139276359001119e-05,
      "loss": 0.9191,
      "step": 526370
    },
    {
      "epoch": 1.844830667970434,
      "grad_norm": 2.71875,
      "learning_rate": 2.1392114561347487e-05,
      "loss": 0.9149,
      "step": 526380
    },
    {
      "epoch": 1.8448657154773294,
      "grad_norm": 2.765625,
      "learning_rate": 2.1391465532683785e-05,
      "loss": 0.7748,
      "step": 526390
    },
    {
      "epoch": 1.8449007629842251,
      "grad_norm": 2.796875,
      "learning_rate": 2.1390816504020083e-05,
      "loss": 0.8275,
      "step": 526400
    },
    {
      "epoch": 1.8449358104911209,
      "grad_norm": 2.59375,
      "learning_rate": 2.139016747535638e-05,
      "loss": 0.8097,
      "step": 526410
    },
    {
      "epoch": 1.8449708579980162,
      "grad_norm": 2.984375,
      "learning_rate": 2.138951844669268e-05,
      "loss": 0.8492,
      "step": 526420
    },
    {
      "epoch": 1.845005905504912,
      "grad_norm": 2.96875,
      "learning_rate": 2.1388869418028977e-05,
      "loss": 0.875,
      "step": 526430
    },
    {
      "epoch": 1.8450409530118075,
      "grad_norm": 2.765625,
      "learning_rate": 2.1388220389365275e-05,
      "loss": 0.7873,
      "step": 526440
    },
    {
      "epoch": 1.845076000518703,
      "grad_norm": 3.0625,
      "learning_rate": 2.1387571360701577e-05,
      "loss": 0.809,
      "step": 526450
    },
    {
      "epoch": 1.8451110480255988,
      "grad_norm": 2.953125,
      "learning_rate": 2.1386922332037875e-05,
      "loss": 0.8579,
      "step": 526460
    },
    {
      "epoch": 1.8451460955324943,
      "grad_norm": 2.90625,
      "learning_rate": 2.1386273303374173e-05,
      "loss": 0.8436,
      "step": 526470
    },
    {
      "epoch": 1.8451811430393898,
      "grad_norm": 2.6875,
      "learning_rate": 2.138562427471047e-05,
      "loss": 0.7999,
      "step": 526480
    },
    {
      "epoch": 1.8452161905462856,
      "grad_norm": 3.03125,
      "learning_rate": 2.138497524604677e-05,
      "loss": 0.8064,
      "step": 526490
    },
    {
      "epoch": 1.8452512380531811,
      "grad_norm": 3.234375,
      "learning_rate": 2.1384326217383067e-05,
      "loss": 0.8734,
      "step": 526500
    },
    {
      "epoch": 1.8452862855600767,
      "grad_norm": 3.0,
      "learning_rate": 2.1383677188719365e-05,
      "loss": 0.8605,
      "step": 526510
    },
    {
      "epoch": 1.8453213330669724,
      "grad_norm": 3.03125,
      "learning_rate": 2.1383028160055663e-05,
      "loss": 0.7725,
      "step": 526520
    },
    {
      "epoch": 1.8453563805738677,
      "grad_norm": 2.65625,
      "learning_rate": 2.138237913139196e-05,
      "loss": 0.8337,
      "step": 526530
    },
    {
      "epoch": 1.8453914280807635,
      "grad_norm": 2.921875,
      "learning_rate": 2.138173010272826e-05,
      "loss": 0.7796,
      "step": 526540
    },
    {
      "epoch": 1.845426475587659,
      "grad_norm": 2.65625,
      "learning_rate": 2.1381081074064557e-05,
      "loss": 0.8187,
      "step": 526550
    },
    {
      "epoch": 1.8454615230945546,
      "grad_norm": 2.734375,
      "learning_rate": 2.138043204540085e-05,
      "loss": 0.7882,
      "step": 526560
    },
    {
      "epoch": 1.8454965706014503,
      "grad_norm": 2.875,
      "learning_rate": 2.1379783016737153e-05,
      "loss": 0.9024,
      "step": 526570
    },
    {
      "epoch": 1.8455316181083459,
      "grad_norm": 3.171875,
      "learning_rate": 2.137913398807345e-05,
      "loss": 0.8399,
      "step": 526580
    },
    {
      "epoch": 1.8455666656152414,
      "grad_norm": 2.890625,
      "learning_rate": 2.137848495940975e-05,
      "loss": 0.8559,
      "step": 526590
    },
    {
      "epoch": 1.8456017131221372,
      "grad_norm": 2.5625,
      "learning_rate": 2.1377835930746047e-05,
      "loss": 0.8104,
      "step": 526600
    },
    {
      "epoch": 1.8456367606290327,
      "grad_norm": 2.9375,
      "learning_rate": 2.1377186902082345e-05,
      "loss": 0.7525,
      "step": 526610
    },
    {
      "epoch": 1.8456718081359282,
      "grad_norm": 3.25,
      "learning_rate": 2.1376537873418643e-05,
      "loss": 0.7814,
      "step": 526620
    },
    {
      "epoch": 1.845706855642824,
      "grad_norm": 2.671875,
      "learning_rate": 2.137588884475494e-05,
      "loss": 0.8403,
      "step": 526630
    },
    {
      "epoch": 1.8457419031497193,
      "grad_norm": 2.859375,
      "learning_rate": 2.137523981609124e-05,
      "loss": 0.8562,
      "step": 526640
    },
    {
      "epoch": 1.845776950656615,
      "grad_norm": 2.65625,
      "learning_rate": 2.1374590787427537e-05,
      "loss": 0.7713,
      "step": 526650
    },
    {
      "epoch": 1.8458119981635106,
      "grad_norm": 2.984375,
      "learning_rate": 2.1373941758763835e-05,
      "loss": 0.7646,
      "step": 526660
    },
    {
      "epoch": 1.8458470456704061,
      "grad_norm": 2.828125,
      "learning_rate": 2.1373292730100133e-05,
      "loss": 0.8322,
      "step": 526670
    },
    {
      "epoch": 1.845882093177302,
      "grad_norm": 2.828125,
      "learning_rate": 2.137264370143643e-05,
      "loss": 0.8697,
      "step": 526680
    },
    {
      "epoch": 1.8459171406841974,
      "grad_norm": 2.859375,
      "learning_rate": 2.137199467277273e-05,
      "loss": 0.8229,
      "step": 526690
    },
    {
      "epoch": 1.845952188191093,
      "grad_norm": 2.609375,
      "learning_rate": 2.1371345644109027e-05,
      "loss": 0.8506,
      "step": 526700
    },
    {
      "epoch": 1.8459872356979887,
      "grad_norm": 2.828125,
      "learning_rate": 2.1370696615445328e-05,
      "loss": 0.8445,
      "step": 526710
    },
    {
      "epoch": 1.8460222832048843,
      "grad_norm": 3.078125,
      "learning_rate": 2.1370047586781626e-05,
      "loss": 0.8859,
      "step": 526720
    },
    {
      "epoch": 1.8460573307117798,
      "grad_norm": 3.15625,
      "learning_rate": 2.1369398558117924e-05,
      "loss": 0.83,
      "step": 526730
    },
    {
      "epoch": 1.8460923782186756,
      "grad_norm": 2.875,
      "learning_rate": 2.1368749529454222e-05,
      "loss": 0.839,
      "step": 526740
    },
    {
      "epoch": 1.8461274257255709,
      "grad_norm": 2.578125,
      "learning_rate": 2.1368100500790517e-05,
      "loss": 0.9247,
      "step": 526750
    },
    {
      "epoch": 1.8461624732324666,
      "grad_norm": 3.03125,
      "learning_rate": 2.1367451472126815e-05,
      "loss": 0.88,
      "step": 526760
    },
    {
      "epoch": 1.8461975207393622,
      "grad_norm": 3.125,
      "learning_rate": 2.1366802443463113e-05,
      "loss": 0.8816,
      "step": 526770
    },
    {
      "epoch": 1.8462325682462577,
      "grad_norm": 2.109375,
      "learning_rate": 2.136615341479941e-05,
      "loss": 0.7291,
      "step": 526780
    },
    {
      "epoch": 1.8462676157531535,
      "grad_norm": 3.09375,
      "learning_rate": 2.136550438613571e-05,
      "loss": 0.8371,
      "step": 526790
    },
    {
      "epoch": 1.846302663260049,
      "grad_norm": 2.875,
      "learning_rate": 2.1364855357472007e-05,
      "loss": 0.8508,
      "step": 526800
    },
    {
      "epoch": 1.8463377107669445,
      "grad_norm": 2.65625,
      "learning_rate": 2.1364206328808305e-05,
      "loss": 0.8756,
      "step": 526810
    },
    {
      "epoch": 1.8463727582738403,
      "grad_norm": 2.671875,
      "learning_rate": 2.1363557300144606e-05,
      "loss": 0.8177,
      "step": 526820
    },
    {
      "epoch": 1.8464078057807358,
      "grad_norm": 3.453125,
      "learning_rate": 2.1362908271480904e-05,
      "loss": 0.814,
      "step": 526830
    },
    {
      "epoch": 1.8464428532876314,
      "grad_norm": 3.078125,
      "learning_rate": 2.1362259242817202e-05,
      "loss": 0.8199,
      "step": 526840
    },
    {
      "epoch": 1.8464779007945271,
      "grad_norm": 3.296875,
      "learning_rate": 2.13616102141535e-05,
      "loss": 0.8473,
      "step": 526850
    },
    {
      "epoch": 1.8465129483014224,
      "grad_norm": 2.625,
      "learning_rate": 2.1360961185489798e-05,
      "loss": 0.8245,
      "step": 526860
    },
    {
      "epoch": 1.8465479958083182,
      "grad_norm": 3.140625,
      "learning_rate": 2.1360312156826096e-05,
      "loss": 0.9132,
      "step": 526870
    },
    {
      "epoch": 1.8465830433152137,
      "grad_norm": 3.09375,
      "learning_rate": 2.1359663128162394e-05,
      "loss": 0.8242,
      "step": 526880
    },
    {
      "epoch": 1.8466180908221093,
      "grad_norm": 3.390625,
      "learning_rate": 2.1359014099498692e-05,
      "loss": 0.9172,
      "step": 526890
    },
    {
      "epoch": 1.846653138329005,
      "grad_norm": 2.921875,
      "learning_rate": 2.135836507083499e-05,
      "loss": 0.8212,
      "step": 526900
    },
    {
      "epoch": 1.8466881858359006,
      "grad_norm": 2.515625,
      "learning_rate": 2.1357716042171288e-05,
      "loss": 0.8045,
      "step": 526910
    },
    {
      "epoch": 1.846723233342796,
      "grad_norm": 2.875,
      "learning_rate": 2.1357067013507586e-05,
      "loss": 0.6805,
      "step": 526920
    },
    {
      "epoch": 1.8467582808496918,
      "grad_norm": 3.0625,
      "learning_rate": 2.1356417984843884e-05,
      "loss": 0.8405,
      "step": 526930
    },
    {
      "epoch": 1.8467933283565874,
      "grad_norm": 2.265625,
      "learning_rate": 2.1355768956180182e-05,
      "loss": 0.7805,
      "step": 526940
    },
    {
      "epoch": 1.846828375863483,
      "grad_norm": 2.96875,
      "learning_rate": 2.135511992751648e-05,
      "loss": 0.8049,
      "step": 526950
    },
    {
      "epoch": 1.8468634233703787,
      "grad_norm": 2.921875,
      "learning_rate": 2.1354470898852778e-05,
      "loss": 0.834,
      "step": 526960
    },
    {
      "epoch": 1.846898470877274,
      "grad_norm": 3.3125,
      "learning_rate": 2.1353821870189076e-05,
      "loss": 0.8625,
      "step": 526970
    },
    {
      "epoch": 1.8469335183841697,
      "grad_norm": 2.484375,
      "learning_rate": 2.1353172841525374e-05,
      "loss": 0.8541,
      "step": 526980
    },
    {
      "epoch": 1.8469685658910653,
      "grad_norm": 2.609375,
      "learning_rate": 2.1352523812861672e-05,
      "loss": 0.7331,
      "step": 526990
    },
    {
      "epoch": 1.8470036133979608,
      "grad_norm": 2.578125,
      "learning_rate": 2.135187478419797e-05,
      "loss": 0.8968,
      "step": 527000
    },
    {
      "epoch": 1.8470386609048566,
      "grad_norm": 2.46875,
      "learning_rate": 2.1351225755534268e-05,
      "loss": 0.7691,
      "step": 527010
    },
    {
      "epoch": 1.8470737084117521,
      "grad_norm": 3.28125,
      "learning_rate": 2.1350576726870566e-05,
      "loss": 0.8928,
      "step": 527020
    },
    {
      "epoch": 1.8471087559186476,
      "grad_norm": 2.65625,
      "learning_rate": 2.1349927698206864e-05,
      "loss": 0.8511,
      "step": 527030
    },
    {
      "epoch": 1.8471438034255434,
      "grad_norm": 2.921875,
      "learning_rate": 2.1349278669543162e-05,
      "loss": 0.8692,
      "step": 527040
    },
    {
      "epoch": 1.847178850932439,
      "grad_norm": 2.921875,
      "learning_rate": 2.134862964087946e-05,
      "loss": 0.753,
      "step": 527050
    },
    {
      "epoch": 1.8472138984393345,
      "grad_norm": 3.0625,
      "learning_rate": 2.1347980612215758e-05,
      "loss": 0.7979,
      "step": 527060
    },
    {
      "epoch": 1.8472489459462302,
      "grad_norm": 2.640625,
      "learning_rate": 2.134733158355206e-05,
      "loss": 0.778,
      "step": 527070
    },
    {
      "epoch": 1.8472839934531256,
      "grad_norm": 3.09375,
      "learning_rate": 2.1346682554888357e-05,
      "loss": 0.8742,
      "step": 527080
    },
    {
      "epoch": 1.8473190409600213,
      "grad_norm": 2.78125,
      "learning_rate": 2.1346033526224655e-05,
      "loss": 0.7811,
      "step": 527090
    },
    {
      "epoch": 1.847354088466917,
      "grad_norm": 2.8125,
      "learning_rate": 2.1345384497560953e-05,
      "loss": 0.8053,
      "step": 527100
    },
    {
      "epoch": 1.8473891359738124,
      "grad_norm": 3.046875,
      "learning_rate": 2.134473546889725e-05,
      "loss": 0.8355,
      "step": 527110
    },
    {
      "epoch": 1.8474241834807081,
      "grad_norm": 3.09375,
      "learning_rate": 2.134408644023355e-05,
      "loss": 0.8224,
      "step": 527120
    },
    {
      "epoch": 1.8474592309876037,
      "grad_norm": 2.828125,
      "learning_rate": 2.1343437411569844e-05,
      "loss": 0.8597,
      "step": 527130
    },
    {
      "epoch": 1.8474942784944992,
      "grad_norm": 2.65625,
      "learning_rate": 2.1342788382906142e-05,
      "loss": 0.8534,
      "step": 527140
    },
    {
      "epoch": 1.847529326001395,
      "grad_norm": 3.0625,
      "learning_rate": 2.134213935424244e-05,
      "loss": 0.7534,
      "step": 527150
    },
    {
      "epoch": 1.8475643735082905,
      "grad_norm": 3.15625,
      "learning_rate": 2.1341490325578738e-05,
      "loss": 0.8448,
      "step": 527160
    },
    {
      "epoch": 1.847599421015186,
      "grad_norm": 2.984375,
      "learning_rate": 2.1340841296915036e-05,
      "loss": 0.8562,
      "step": 527170
    },
    {
      "epoch": 1.8476344685220818,
      "grad_norm": 2.890625,
      "learning_rate": 2.1340192268251334e-05,
      "loss": 0.8096,
      "step": 527180
    },
    {
      "epoch": 1.8476695160289773,
      "grad_norm": 2.703125,
      "learning_rate": 2.1339543239587635e-05,
      "loss": 0.8334,
      "step": 527190
    },
    {
      "epoch": 1.8477045635358729,
      "grad_norm": 3.15625,
      "learning_rate": 2.1338894210923933e-05,
      "loss": 0.8921,
      "step": 527200
    },
    {
      "epoch": 1.8477396110427686,
      "grad_norm": 3.15625,
      "learning_rate": 2.133824518226023e-05,
      "loss": 0.8526,
      "step": 527210
    },
    {
      "epoch": 1.847774658549664,
      "grad_norm": 2.71875,
      "learning_rate": 2.133759615359653e-05,
      "loss": 0.7847,
      "step": 527220
    },
    {
      "epoch": 1.8478097060565597,
      "grad_norm": 2.703125,
      "learning_rate": 2.1336947124932827e-05,
      "loss": 0.7707,
      "step": 527230
    },
    {
      "epoch": 1.8478447535634552,
      "grad_norm": 2.25,
      "learning_rate": 2.1336298096269125e-05,
      "loss": 0.8162,
      "step": 527240
    },
    {
      "epoch": 1.8478798010703508,
      "grad_norm": 2.9375,
      "learning_rate": 2.1335649067605423e-05,
      "loss": 0.808,
      "step": 527250
    },
    {
      "epoch": 1.8479148485772465,
      "grad_norm": 2.546875,
      "learning_rate": 2.133500003894172e-05,
      "loss": 0.7364,
      "step": 527260
    },
    {
      "epoch": 1.847949896084142,
      "grad_norm": 2.625,
      "learning_rate": 2.133435101027802e-05,
      "loss": 0.8125,
      "step": 527270
    },
    {
      "epoch": 1.8479849435910376,
      "grad_norm": 3.40625,
      "learning_rate": 2.1333701981614317e-05,
      "loss": 0.9122,
      "step": 527280
    },
    {
      "epoch": 1.8480199910979334,
      "grad_norm": 3.09375,
      "learning_rate": 2.1333052952950615e-05,
      "loss": 0.8725,
      "step": 527290
    },
    {
      "epoch": 1.848055038604829,
      "grad_norm": 2.5625,
      "learning_rate": 2.1332403924286913e-05,
      "loss": 0.8437,
      "step": 527300
    },
    {
      "epoch": 1.8480900861117244,
      "grad_norm": 2.53125,
      "learning_rate": 2.133175489562321e-05,
      "loss": 0.7735,
      "step": 527310
    },
    {
      "epoch": 1.8481251336186202,
      "grad_norm": 3.15625,
      "learning_rate": 2.133110586695951e-05,
      "loss": 0.9408,
      "step": 527320
    },
    {
      "epoch": 1.8481601811255155,
      "grad_norm": 3.0625,
      "learning_rate": 2.1330456838295807e-05,
      "loss": 0.7687,
      "step": 527330
    },
    {
      "epoch": 1.8481952286324113,
      "grad_norm": 2.40625,
      "learning_rate": 2.1329807809632105e-05,
      "loss": 0.8236,
      "step": 527340
    },
    {
      "epoch": 1.8482302761393068,
      "grad_norm": 3.28125,
      "learning_rate": 2.1329158780968403e-05,
      "loss": 0.8653,
      "step": 527350
    },
    {
      "epoch": 1.8482653236462023,
      "grad_norm": 2.796875,
      "learning_rate": 2.13285097523047e-05,
      "loss": 0.8282,
      "step": 527360
    },
    {
      "epoch": 1.848300371153098,
      "grad_norm": 2.984375,
      "learning_rate": 2.1327860723641e-05,
      "loss": 0.8939,
      "step": 527370
    },
    {
      "epoch": 1.8483354186599936,
      "grad_norm": 3.34375,
      "learning_rate": 2.1327211694977297e-05,
      "loss": 0.8957,
      "step": 527380
    },
    {
      "epoch": 1.8483704661668892,
      "grad_norm": 2.953125,
      "learning_rate": 2.1326562666313595e-05,
      "loss": 0.7835,
      "step": 527390
    },
    {
      "epoch": 1.848405513673785,
      "grad_norm": 3.046875,
      "learning_rate": 2.1325913637649893e-05,
      "loss": 0.7596,
      "step": 527400
    },
    {
      "epoch": 1.8484405611806805,
      "grad_norm": 3.15625,
      "learning_rate": 2.132526460898619e-05,
      "loss": 0.7738,
      "step": 527410
    },
    {
      "epoch": 1.848475608687576,
      "grad_norm": 2.796875,
      "learning_rate": 2.132461558032249e-05,
      "loss": 0.793,
      "step": 527420
    },
    {
      "epoch": 1.8485106561944717,
      "grad_norm": 2.78125,
      "learning_rate": 2.1323966551658787e-05,
      "loss": 0.8234,
      "step": 527430
    },
    {
      "epoch": 1.848545703701367,
      "grad_norm": 3.421875,
      "learning_rate": 2.132331752299509e-05,
      "loss": 0.8783,
      "step": 527440
    },
    {
      "epoch": 1.8485807512082628,
      "grad_norm": 2.640625,
      "learning_rate": 2.1322668494331387e-05,
      "loss": 0.8282,
      "step": 527450
    },
    {
      "epoch": 1.8486157987151584,
      "grad_norm": 3.234375,
      "learning_rate": 2.1322019465667685e-05,
      "loss": 0.9219,
      "step": 527460
    },
    {
      "epoch": 1.848650846222054,
      "grad_norm": 3.046875,
      "learning_rate": 2.1321370437003983e-05,
      "loss": 0.8616,
      "step": 527470
    },
    {
      "epoch": 1.8486858937289496,
      "grad_norm": 2.765625,
      "learning_rate": 2.132072140834028e-05,
      "loss": 0.7813,
      "step": 527480
    },
    {
      "epoch": 1.8487209412358452,
      "grad_norm": 2.671875,
      "learning_rate": 2.132007237967658e-05,
      "loss": 0.9513,
      "step": 527490
    },
    {
      "epoch": 1.8487559887427407,
      "grad_norm": 2.921875,
      "learning_rate": 2.1319423351012873e-05,
      "loss": 0.8527,
      "step": 527500
    },
    {
      "epoch": 1.8487910362496365,
      "grad_norm": 3.46875,
      "learning_rate": 2.131877432234917e-05,
      "loss": 0.7935,
      "step": 527510
    },
    {
      "epoch": 1.848826083756532,
      "grad_norm": 3.015625,
      "learning_rate": 2.131812529368547e-05,
      "loss": 0.8116,
      "step": 527520
    },
    {
      "epoch": 1.8488611312634275,
      "grad_norm": 3.3125,
      "learning_rate": 2.1317476265021767e-05,
      "loss": 0.8139,
      "step": 527530
    },
    {
      "epoch": 1.8488961787703233,
      "grad_norm": 2.859375,
      "learning_rate": 2.1316827236358065e-05,
      "loss": 0.8424,
      "step": 527540
    },
    {
      "epoch": 1.8489312262772186,
      "grad_norm": 2.953125,
      "learning_rate": 2.1316178207694367e-05,
      "loss": 0.7609,
      "step": 527550
    },
    {
      "epoch": 1.8489662737841144,
      "grad_norm": 2.5625,
      "learning_rate": 2.1315529179030665e-05,
      "loss": 0.8366,
      "step": 527560
    },
    {
      "epoch": 1.84900132129101,
      "grad_norm": 2.21875,
      "learning_rate": 2.1314880150366963e-05,
      "loss": 0.7826,
      "step": 527570
    },
    {
      "epoch": 1.8490363687979054,
      "grad_norm": 3.171875,
      "learning_rate": 2.131423112170326e-05,
      "loss": 0.8846,
      "step": 527580
    },
    {
      "epoch": 1.8490714163048012,
      "grad_norm": 2.890625,
      "learning_rate": 2.131358209303956e-05,
      "loss": 0.7904,
      "step": 527590
    },
    {
      "epoch": 1.8491064638116967,
      "grad_norm": 2.734375,
      "learning_rate": 2.1312933064375857e-05,
      "loss": 0.7486,
      "step": 527600
    },
    {
      "epoch": 1.8491415113185923,
      "grad_norm": 2.953125,
      "learning_rate": 2.1312284035712155e-05,
      "loss": 0.8278,
      "step": 527610
    },
    {
      "epoch": 1.849176558825488,
      "grad_norm": 2.96875,
      "learning_rate": 2.1311635007048453e-05,
      "loss": 0.8168,
      "step": 527620
    },
    {
      "epoch": 1.8492116063323836,
      "grad_norm": 3.3125,
      "learning_rate": 2.131098597838475e-05,
      "loss": 0.8068,
      "step": 527630
    },
    {
      "epoch": 1.849246653839279,
      "grad_norm": 2.75,
      "learning_rate": 2.131033694972105e-05,
      "loss": 0.8751,
      "step": 527640
    },
    {
      "epoch": 1.8492817013461749,
      "grad_norm": 2.84375,
      "learning_rate": 2.1309687921057347e-05,
      "loss": 0.8568,
      "step": 527650
    },
    {
      "epoch": 1.8493167488530702,
      "grad_norm": 2.78125,
      "learning_rate": 2.1309038892393645e-05,
      "loss": 0.7745,
      "step": 527660
    },
    {
      "epoch": 1.849351796359966,
      "grad_norm": 2.953125,
      "learning_rate": 2.1308389863729943e-05,
      "loss": 0.8457,
      "step": 527670
    },
    {
      "epoch": 1.8493868438668617,
      "grad_norm": 3.109375,
      "learning_rate": 2.130774083506624e-05,
      "loss": 0.7826,
      "step": 527680
    },
    {
      "epoch": 1.849421891373757,
      "grad_norm": 2.828125,
      "learning_rate": 2.130709180640254e-05,
      "loss": 0.8786,
      "step": 527690
    },
    {
      "epoch": 1.8494569388806528,
      "grad_norm": 2.90625,
      "learning_rate": 2.1306442777738837e-05,
      "loss": 0.8725,
      "step": 527700
    },
    {
      "epoch": 1.8494919863875483,
      "grad_norm": 2.9375,
      "learning_rate": 2.1305793749075135e-05,
      "loss": 0.7405,
      "step": 527710
    },
    {
      "epoch": 1.8495270338944438,
      "grad_norm": 2.921875,
      "learning_rate": 2.1305144720411433e-05,
      "loss": 0.7698,
      "step": 527720
    },
    {
      "epoch": 1.8495620814013396,
      "grad_norm": 2.46875,
      "learning_rate": 2.130449569174773e-05,
      "loss": 0.8327,
      "step": 527730
    },
    {
      "epoch": 1.8495971289082351,
      "grad_norm": 3.046875,
      "learning_rate": 2.130384666308403e-05,
      "loss": 0.8549,
      "step": 527740
    },
    {
      "epoch": 1.8496321764151307,
      "grad_norm": 2.8125,
      "learning_rate": 2.1303197634420327e-05,
      "loss": 0.8374,
      "step": 527750
    },
    {
      "epoch": 1.8496672239220264,
      "grad_norm": 3.0,
      "learning_rate": 2.1302548605756625e-05,
      "loss": 0.8638,
      "step": 527760
    },
    {
      "epoch": 1.8497022714289217,
      "grad_norm": 2.484375,
      "learning_rate": 2.1301899577092923e-05,
      "loss": 0.7858,
      "step": 527770
    },
    {
      "epoch": 1.8497373189358175,
      "grad_norm": 2.6875,
      "learning_rate": 2.130125054842922e-05,
      "loss": 0.7835,
      "step": 527780
    },
    {
      "epoch": 1.8497723664427133,
      "grad_norm": 2.703125,
      "learning_rate": 2.130060151976552e-05,
      "loss": 0.7791,
      "step": 527790
    },
    {
      "epoch": 1.8498074139496086,
      "grad_norm": 3.09375,
      "learning_rate": 2.1299952491101817e-05,
      "loss": 0.7578,
      "step": 527800
    },
    {
      "epoch": 1.8498424614565043,
      "grad_norm": 3.203125,
      "learning_rate": 2.1299303462438118e-05,
      "loss": 0.8495,
      "step": 527810
    },
    {
      "epoch": 1.8498775089633999,
      "grad_norm": 3.03125,
      "learning_rate": 2.1298654433774416e-05,
      "loss": 0.8114,
      "step": 527820
    },
    {
      "epoch": 1.8499125564702954,
      "grad_norm": 3.140625,
      "learning_rate": 2.1298005405110714e-05,
      "loss": 0.8368,
      "step": 527830
    },
    {
      "epoch": 1.8499476039771912,
      "grad_norm": 2.84375,
      "learning_rate": 2.1297356376447012e-05,
      "loss": 0.8607,
      "step": 527840
    },
    {
      "epoch": 1.8499826514840867,
      "grad_norm": 2.953125,
      "learning_rate": 2.129670734778331e-05,
      "loss": 0.8424,
      "step": 527850
    },
    {
      "epoch": 1.8500176989909822,
      "grad_norm": 2.953125,
      "learning_rate": 2.1296058319119608e-05,
      "loss": 0.91,
      "step": 527860
    },
    {
      "epoch": 1.850052746497878,
      "grad_norm": 2.921875,
      "learning_rate": 2.1295409290455906e-05,
      "loss": 0.8069,
      "step": 527870
    },
    {
      "epoch": 1.8500877940047735,
      "grad_norm": 2.703125,
      "learning_rate": 2.12947602617922e-05,
      "loss": 0.7816,
      "step": 527880
    },
    {
      "epoch": 1.850122841511669,
      "grad_norm": 3.03125,
      "learning_rate": 2.12941112331285e-05,
      "loss": 0.9198,
      "step": 527890
    },
    {
      "epoch": 1.8501578890185648,
      "grad_norm": 3.09375,
      "learning_rate": 2.1293462204464797e-05,
      "loss": 0.8368,
      "step": 527900
    },
    {
      "epoch": 1.8501929365254601,
      "grad_norm": 2.78125,
      "learning_rate": 2.1292813175801095e-05,
      "loss": 0.7645,
      "step": 527910
    },
    {
      "epoch": 1.8502279840323559,
      "grad_norm": 2.8125,
      "learning_rate": 2.1292164147137396e-05,
      "loss": 0.8657,
      "step": 527920
    },
    {
      "epoch": 1.8502630315392514,
      "grad_norm": 3.0625,
      "learning_rate": 2.1291515118473694e-05,
      "loss": 0.817,
      "step": 527930
    },
    {
      "epoch": 1.850298079046147,
      "grad_norm": 3.328125,
      "learning_rate": 2.1290866089809992e-05,
      "loss": 0.791,
      "step": 527940
    },
    {
      "epoch": 1.8503331265530427,
      "grad_norm": 2.484375,
      "learning_rate": 2.129021706114629e-05,
      "loss": 0.8541,
      "step": 527950
    },
    {
      "epoch": 1.8503681740599383,
      "grad_norm": 3.265625,
      "learning_rate": 2.1289568032482588e-05,
      "loss": 0.8252,
      "step": 527960
    },
    {
      "epoch": 1.8504032215668338,
      "grad_norm": 2.78125,
      "learning_rate": 2.1288919003818886e-05,
      "loss": 0.7926,
      "step": 527970
    },
    {
      "epoch": 1.8504382690737295,
      "grad_norm": 2.9375,
      "learning_rate": 2.1288269975155184e-05,
      "loss": 0.8696,
      "step": 527980
    },
    {
      "epoch": 1.850473316580625,
      "grad_norm": 3.34375,
      "learning_rate": 2.1287620946491482e-05,
      "loss": 0.8175,
      "step": 527990
    },
    {
      "epoch": 1.8505083640875206,
      "grad_norm": 2.875,
      "learning_rate": 2.128697191782778e-05,
      "loss": 0.8164,
      "step": 528000
    },
    {
      "epoch": 1.8505434115944164,
      "grad_norm": 2.84375,
      "learning_rate": 2.1286322889164078e-05,
      "loss": 0.8805,
      "step": 528010
    },
    {
      "epoch": 1.8505784591013117,
      "grad_norm": 2.640625,
      "learning_rate": 2.1285673860500376e-05,
      "loss": 0.8585,
      "step": 528020
    },
    {
      "epoch": 1.8506135066082074,
      "grad_norm": 2.6875,
      "learning_rate": 2.1285024831836674e-05,
      "loss": 0.8674,
      "step": 528030
    },
    {
      "epoch": 1.850648554115103,
      "grad_norm": 3.0625,
      "learning_rate": 2.1284375803172972e-05,
      "loss": 0.8176,
      "step": 528040
    },
    {
      "epoch": 1.8506836016219985,
      "grad_norm": 2.921875,
      "learning_rate": 2.128372677450927e-05,
      "loss": 0.8609,
      "step": 528050
    },
    {
      "epoch": 1.8507186491288943,
      "grad_norm": 2.75,
      "learning_rate": 2.128307774584557e-05,
      "loss": 0.7886,
      "step": 528060
    },
    {
      "epoch": 1.8507536966357898,
      "grad_norm": 2.765625,
      "learning_rate": 2.1282428717181866e-05,
      "loss": 0.8003,
      "step": 528070
    },
    {
      "epoch": 1.8507887441426853,
      "grad_norm": 2.984375,
      "learning_rate": 2.1281779688518164e-05,
      "loss": 0.8004,
      "step": 528080
    },
    {
      "epoch": 1.850823791649581,
      "grad_norm": 2.75,
      "learning_rate": 2.1281130659854462e-05,
      "loss": 0.8939,
      "step": 528090
    },
    {
      "epoch": 1.8508588391564766,
      "grad_norm": 3.234375,
      "learning_rate": 2.128048163119076e-05,
      "loss": 0.8577,
      "step": 528100
    },
    {
      "epoch": 1.8508938866633722,
      "grad_norm": 2.6875,
      "learning_rate": 2.1279832602527058e-05,
      "loss": 0.8306,
      "step": 528110
    },
    {
      "epoch": 1.850928934170268,
      "grad_norm": 3.515625,
      "learning_rate": 2.1279183573863356e-05,
      "loss": 0.8802,
      "step": 528120
    },
    {
      "epoch": 1.8509639816771633,
      "grad_norm": 2.78125,
      "learning_rate": 2.1278534545199654e-05,
      "loss": 0.8076,
      "step": 528130
    },
    {
      "epoch": 1.850999029184059,
      "grad_norm": 3.421875,
      "learning_rate": 2.1277885516535952e-05,
      "loss": 0.9156,
      "step": 528140
    },
    {
      "epoch": 1.8510340766909545,
      "grad_norm": 2.84375,
      "learning_rate": 2.127723648787225e-05,
      "loss": 0.7969,
      "step": 528150
    },
    {
      "epoch": 1.85106912419785,
      "grad_norm": 2.96875,
      "learning_rate": 2.1276587459208548e-05,
      "loss": 0.88,
      "step": 528160
    },
    {
      "epoch": 1.8511041717047458,
      "grad_norm": 2.828125,
      "learning_rate": 2.127593843054485e-05,
      "loss": 0.844,
      "step": 528170
    },
    {
      "epoch": 1.8511392192116414,
      "grad_norm": 2.90625,
      "learning_rate": 2.1275289401881147e-05,
      "loss": 0.7928,
      "step": 528180
    },
    {
      "epoch": 1.851174266718537,
      "grad_norm": 3.140625,
      "learning_rate": 2.1274640373217445e-05,
      "loss": 0.928,
      "step": 528190
    },
    {
      "epoch": 1.8512093142254327,
      "grad_norm": 3.109375,
      "learning_rate": 2.1273991344553743e-05,
      "loss": 0.786,
      "step": 528200
    },
    {
      "epoch": 1.8512443617323282,
      "grad_norm": 2.359375,
      "learning_rate": 2.127334231589004e-05,
      "loss": 0.8643,
      "step": 528210
    },
    {
      "epoch": 1.8512794092392237,
      "grad_norm": 3.109375,
      "learning_rate": 2.127269328722634e-05,
      "loss": 0.8311,
      "step": 528220
    },
    {
      "epoch": 1.8513144567461195,
      "grad_norm": 3.75,
      "learning_rate": 2.1272044258562637e-05,
      "loss": 0.8288,
      "step": 528230
    },
    {
      "epoch": 1.8513495042530148,
      "grad_norm": 2.671875,
      "learning_rate": 2.1271395229898935e-05,
      "loss": 0.8592,
      "step": 528240
    },
    {
      "epoch": 1.8513845517599106,
      "grad_norm": 2.328125,
      "learning_rate": 2.127074620123523e-05,
      "loss": 0.7786,
      "step": 528250
    },
    {
      "epoch": 1.851419599266806,
      "grad_norm": 3.078125,
      "learning_rate": 2.1270097172571528e-05,
      "loss": 0.908,
      "step": 528260
    },
    {
      "epoch": 1.8514546467737016,
      "grad_norm": 2.734375,
      "learning_rate": 2.1269448143907826e-05,
      "loss": 0.7956,
      "step": 528270
    },
    {
      "epoch": 1.8514896942805974,
      "grad_norm": 2.78125,
      "learning_rate": 2.1268799115244124e-05,
      "loss": 0.7599,
      "step": 528280
    },
    {
      "epoch": 1.851524741787493,
      "grad_norm": 2.921875,
      "learning_rate": 2.1268150086580425e-05,
      "loss": 0.8394,
      "step": 528290
    },
    {
      "epoch": 1.8515597892943885,
      "grad_norm": 3.1875,
      "learning_rate": 2.1267501057916723e-05,
      "loss": 0.896,
      "step": 528300
    },
    {
      "epoch": 1.8515948368012842,
      "grad_norm": 2.875,
      "learning_rate": 2.126685202925302e-05,
      "loss": 0.9258,
      "step": 528310
    },
    {
      "epoch": 1.8516298843081798,
      "grad_norm": 2.921875,
      "learning_rate": 2.126620300058932e-05,
      "loss": 0.861,
      "step": 528320
    },
    {
      "epoch": 1.8516649318150753,
      "grad_norm": 2.9375,
      "learning_rate": 2.1265553971925617e-05,
      "loss": 0.8418,
      "step": 528330
    },
    {
      "epoch": 1.851699979321971,
      "grad_norm": 2.703125,
      "learning_rate": 2.1264904943261915e-05,
      "loss": 0.8643,
      "step": 528340
    },
    {
      "epoch": 1.8517350268288664,
      "grad_norm": 2.890625,
      "learning_rate": 2.1264255914598213e-05,
      "loss": 0.7764,
      "step": 528350
    },
    {
      "epoch": 1.8517700743357621,
      "grad_norm": 2.84375,
      "learning_rate": 2.126360688593451e-05,
      "loss": 0.8198,
      "step": 528360
    },
    {
      "epoch": 1.8518051218426579,
      "grad_norm": 2.765625,
      "learning_rate": 2.126295785727081e-05,
      "loss": 0.807,
      "step": 528370
    },
    {
      "epoch": 1.8518401693495532,
      "grad_norm": 2.953125,
      "learning_rate": 2.1262308828607107e-05,
      "loss": 0.8002,
      "step": 528380
    },
    {
      "epoch": 1.851875216856449,
      "grad_norm": 2.828125,
      "learning_rate": 2.1261659799943405e-05,
      "loss": 0.7681,
      "step": 528390
    },
    {
      "epoch": 1.8519102643633445,
      "grad_norm": 2.703125,
      "learning_rate": 2.1261010771279703e-05,
      "loss": 0.8005,
      "step": 528400
    },
    {
      "epoch": 1.85194531187024,
      "grad_norm": 2.6875,
      "learning_rate": 2.1260361742616e-05,
      "loss": 0.8404,
      "step": 528410
    },
    {
      "epoch": 1.8519803593771358,
      "grad_norm": 2.640625,
      "learning_rate": 2.1259712713952303e-05,
      "loss": 0.7745,
      "step": 528420
    },
    {
      "epoch": 1.8520154068840313,
      "grad_norm": 3.21875,
      "learning_rate": 2.12590636852886e-05,
      "loss": 0.7826,
      "step": 528430
    },
    {
      "epoch": 1.8520504543909269,
      "grad_norm": 2.71875,
      "learning_rate": 2.1258414656624895e-05,
      "loss": 0.8397,
      "step": 528440
    },
    {
      "epoch": 1.8520855018978226,
      "grad_norm": 2.203125,
      "learning_rate": 2.1257765627961193e-05,
      "loss": 0.8175,
      "step": 528450
    },
    {
      "epoch": 1.852120549404718,
      "grad_norm": 2.765625,
      "learning_rate": 2.125711659929749e-05,
      "loss": 0.9047,
      "step": 528460
    },
    {
      "epoch": 1.8521555969116137,
      "grad_norm": 2.609375,
      "learning_rate": 2.125646757063379e-05,
      "loss": 0.8136,
      "step": 528470
    },
    {
      "epoch": 1.8521906444185094,
      "grad_norm": 2.796875,
      "learning_rate": 2.1255818541970087e-05,
      "loss": 0.8322,
      "step": 528480
    },
    {
      "epoch": 1.8522256919254048,
      "grad_norm": 2.890625,
      "learning_rate": 2.1255169513306385e-05,
      "loss": 0.8805,
      "step": 528490
    },
    {
      "epoch": 1.8522607394323005,
      "grad_norm": 3.375,
      "learning_rate": 2.1254520484642683e-05,
      "loss": 0.8273,
      "step": 528500
    },
    {
      "epoch": 1.852295786939196,
      "grad_norm": 2.609375,
      "learning_rate": 2.125387145597898e-05,
      "loss": 0.7861,
      "step": 528510
    },
    {
      "epoch": 1.8523308344460916,
      "grad_norm": 2.75,
      "learning_rate": 2.125322242731528e-05,
      "loss": 0.8112,
      "step": 528520
    },
    {
      "epoch": 1.8523658819529873,
      "grad_norm": 3.234375,
      "learning_rate": 2.1252573398651577e-05,
      "loss": 0.7555,
      "step": 528530
    },
    {
      "epoch": 1.8524009294598829,
      "grad_norm": 2.9375,
      "learning_rate": 2.125192436998788e-05,
      "loss": 0.864,
      "step": 528540
    },
    {
      "epoch": 1.8524359769667784,
      "grad_norm": 2.5625,
      "learning_rate": 2.1251275341324177e-05,
      "loss": 0.8057,
      "step": 528550
    },
    {
      "epoch": 1.8524710244736742,
      "grad_norm": 3.3125,
      "learning_rate": 2.1250626312660475e-05,
      "loss": 0.8507,
      "step": 528560
    },
    {
      "epoch": 1.8525060719805697,
      "grad_norm": 2.765625,
      "learning_rate": 2.1249977283996773e-05,
      "loss": 0.7667,
      "step": 528570
    },
    {
      "epoch": 1.8525411194874652,
      "grad_norm": 2.875,
      "learning_rate": 2.124932825533307e-05,
      "loss": 0.875,
      "step": 528580
    },
    {
      "epoch": 1.852576166994361,
      "grad_norm": 3.046875,
      "learning_rate": 2.124867922666937e-05,
      "loss": 0.8237,
      "step": 528590
    },
    {
      "epoch": 1.8526112145012563,
      "grad_norm": 2.828125,
      "learning_rate": 2.1248030198005667e-05,
      "loss": 0.7546,
      "step": 528600
    },
    {
      "epoch": 1.852646262008152,
      "grad_norm": 2.9375,
      "learning_rate": 2.1247381169341965e-05,
      "loss": 0.9085,
      "step": 528610
    },
    {
      "epoch": 1.8526813095150476,
      "grad_norm": 3.46875,
      "learning_rate": 2.1246732140678263e-05,
      "loss": 0.847,
      "step": 528620
    },
    {
      "epoch": 1.8527163570219431,
      "grad_norm": 3.125,
      "learning_rate": 2.1246083112014557e-05,
      "loss": 0.8941,
      "step": 528630
    },
    {
      "epoch": 1.852751404528839,
      "grad_norm": 3.171875,
      "learning_rate": 2.1245434083350855e-05,
      "loss": 0.81,
      "step": 528640
    },
    {
      "epoch": 1.8527864520357344,
      "grad_norm": 2.953125,
      "learning_rate": 2.1244785054687157e-05,
      "loss": 0.7905,
      "step": 528650
    },
    {
      "epoch": 1.85282149954263,
      "grad_norm": 3.109375,
      "learning_rate": 2.1244136026023455e-05,
      "loss": 0.8128,
      "step": 528660
    },
    {
      "epoch": 1.8528565470495257,
      "grad_norm": 3.453125,
      "learning_rate": 2.1243486997359753e-05,
      "loss": 0.8236,
      "step": 528670
    },
    {
      "epoch": 1.8528915945564213,
      "grad_norm": 3.359375,
      "learning_rate": 2.124283796869605e-05,
      "loss": 0.887,
      "step": 528680
    },
    {
      "epoch": 1.8529266420633168,
      "grad_norm": 2.90625,
      "learning_rate": 2.124218894003235e-05,
      "loss": 0.7661,
      "step": 528690
    },
    {
      "epoch": 1.8529616895702126,
      "grad_norm": 2.515625,
      "learning_rate": 2.1241539911368647e-05,
      "loss": 0.8843,
      "step": 528700
    },
    {
      "epoch": 1.8529967370771079,
      "grad_norm": 3.46875,
      "learning_rate": 2.1240890882704945e-05,
      "loss": 0.8707,
      "step": 528710
    },
    {
      "epoch": 1.8530317845840036,
      "grad_norm": 3.0625,
      "learning_rate": 2.1240241854041243e-05,
      "loss": 0.8379,
      "step": 528720
    },
    {
      "epoch": 1.8530668320908992,
      "grad_norm": 2.8125,
      "learning_rate": 2.123959282537754e-05,
      "loss": 0.8808,
      "step": 528730
    },
    {
      "epoch": 1.8531018795977947,
      "grad_norm": 2.78125,
      "learning_rate": 2.123894379671384e-05,
      "loss": 0.7798,
      "step": 528740
    },
    {
      "epoch": 1.8531369271046905,
      "grad_norm": 2.96875,
      "learning_rate": 2.1238294768050137e-05,
      "loss": 0.8231,
      "step": 528750
    },
    {
      "epoch": 1.853171974611586,
      "grad_norm": 3.25,
      "learning_rate": 2.1237645739386435e-05,
      "loss": 0.8075,
      "step": 528760
    },
    {
      "epoch": 1.8532070221184815,
      "grad_norm": 3.3125,
      "learning_rate": 2.1236996710722733e-05,
      "loss": 0.865,
      "step": 528770
    },
    {
      "epoch": 1.8532420696253773,
      "grad_norm": 2.640625,
      "learning_rate": 2.123634768205903e-05,
      "loss": 0.865,
      "step": 528780
    },
    {
      "epoch": 1.8532771171322728,
      "grad_norm": 2.578125,
      "learning_rate": 2.1235698653395332e-05,
      "loss": 0.7999,
      "step": 528790
    },
    {
      "epoch": 1.8533121646391684,
      "grad_norm": 2.53125,
      "learning_rate": 2.123504962473163e-05,
      "loss": 0.7786,
      "step": 528800
    },
    {
      "epoch": 1.8533472121460641,
      "grad_norm": 2.53125,
      "learning_rate": 2.1234400596067928e-05,
      "loss": 0.8415,
      "step": 528810
    },
    {
      "epoch": 1.8533822596529594,
      "grad_norm": 3.171875,
      "learning_rate": 2.1233751567404223e-05,
      "loss": 0.8957,
      "step": 528820
    },
    {
      "epoch": 1.8534173071598552,
      "grad_norm": 2.5625,
      "learning_rate": 2.123310253874052e-05,
      "loss": 0.7893,
      "step": 528830
    },
    {
      "epoch": 1.8534523546667507,
      "grad_norm": 2.71875,
      "learning_rate": 2.123245351007682e-05,
      "loss": 0.751,
      "step": 528840
    },
    {
      "epoch": 1.8534874021736463,
      "grad_norm": 2.53125,
      "learning_rate": 2.1231804481413117e-05,
      "loss": 0.7457,
      "step": 528850
    },
    {
      "epoch": 1.853522449680542,
      "grad_norm": 2.734375,
      "learning_rate": 2.1231155452749415e-05,
      "loss": 0.7973,
      "step": 528860
    },
    {
      "epoch": 1.8535574971874376,
      "grad_norm": 2.984375,
      "learning_rate": 2.1230506424085713e-05,
      "loss": 0.7687,
      "step": 528870
    },
    {
      "epoch": 1.853592544694333,
      "grad_norm": 2.8125,
      "learning_rate": 2.122985739542201e-05,
      "loss": 0.9103,
      "step": 528880
    },
    {
      "epoch": 1.8536275922012289,
      "grad_norm": 3.671875,
      "learning_rate": 2.122920836675831e-05,
      "loss": 0.7873,
      "step": 528890
    },
    {
      "epoch": 1.8536626397081244,
      "grad_norm": 3.375,
      "learning_rate": 2.1228559338094607e-05,
      "loss": 0.8183,
      "step": 528900
    },
    {
      "epoch": 1.85369768721502,
      "grad_norm": 3.125,
      "learning_rate": 2.1227910309430908e-05,
      "loss": 0.8194,
      "step": 528910
    },
    {
      "epoch": 1.8537327347219157,
      "grad_norm": 2.546875,
      "learning_rate": 2.1227261280767206e-05,
      "loss": 0.8617,
      "step": 528920
    },
    {
      "epoch": 1.853767782228811,
      "grad_norm": 2.859375,
      "learning_rate": 2.1226612252103504e-05,
      "loss": 0.9145,
      "step": 528930
    },
    {
      "epoch": 1.8538028297357068,
      "grad_norm": 2.421875,
      "learning_rate": 2.1225963223439802e-05,
      "loss": 0.8625,
      "step": 528940
    },
    {
      "epoch": 1.8538378772426023,
      "grad_norm": 3.234375,
      "learning_rate": 2.12253141947761e-05,
      "loss": 0.7833,
      "step": 528950
    },
    {
      "epoch": 1.8538729247494978,
      "grad_norm": 2.671875,
      "learning_rate": 2.1224665166112398e-05,
      "loss": 0.8374,
      "step": 528960
    },
    {
      "epoch": 1.8539079722563936,
      "grad_norm": 2.984375,
      "learning_rate": 2.1224016137448696e-05,
      "loss": 0.9055,
      "step": 528970
    },
    {
      "epoch": 1.8539430197632891,
      "grad_norm": 2.65625,
      "learning_rate": 2.1223367108784994e-05,
      "loss": 0.7903,
      "step": 528980
    },
    {
      "epoch": 1.8539780672701847,
      "grad_norm": 3.328125,
      "learning_rate": 2.1222718080121292e-05,
      "loss": 0.8088,
      "step": 528990
    },
    {
      "epoch": 1.8540131147770804,
      "grad_norm": 2.9375,
      "learning_rate": 2.122206905145759e-05,
      "loss": 0.9075,
      "step": 529000
    },
    {
      "epoch": 1.854048162283976,
      "grad_norm": 2.890625,
      "learning_rate": 2.1221420022793885e-05,
      "loss": 0.858,
      "step": 529010
    },
    {
      "epoch": 1.8540832097908715,
      "grad_norm": 2.78125,
      "learning_rate": 2.1220770994130186e-05,
      "loss": 0.7585,
      "step": 529020
    },
    {
      "epoch": 1.8541182572977672,
      "grad_norm": 2.921875,
      "learning_rate": 2.1220121965466484e-05,
      "loss": 0.9032,
      "step": 529030
    },
    {
      "epoch": 1.8541533048046626,
      "grad_norm": 3.0,
      "learning_rate": 2.1219472936802782e-05,
      "loss": 0.7828,
      "step": 529040
    },
    {
      "epoch": 1.8541883523115583,
      "grad_norm": 2.890625,
      "learning_rate": 2.121882390813908e-05,
      "loss": 0.8116,
      "step": 529050
    },
    {
      "epoch": 1.854223399818454,
      "grad_norm": 2.78125,
      "learning_rate": 2.1218174879475378e-05,
      "loss": 0.8016,
      "step": 529060
    },
    {
      "epoch": 1.8542584473253494,
      "grad_norm": 3.03125,
      "learning_rate": 2.1217525850811676e-05,
      "loss": 0.8573,
      "step": 529070
    },
    {
      "epoch": 1.8542934948322451,
      "grad_norm": 2.46875,
      "learning_rate": 2.1216876822147974e-05,
      "loss": 0.7613,
      "step": 529080
    },
    {
      "epoch": 1.8543285423391407,
      "grad_norm": 2.796875,
      "learning_rate": 2.1216227793484272e-05,
      "loss": 0.7991,
      "step": 529090
    },
    {
      "epoch": 1.8543635898460362,
      "grad_norm": 3.03125,
      "learning_rate": 2.121557876482057e-05,
      "loss": 0.801,
      "step": 529100
    },
    {
      "epoch": 1.854398637352932,
      "grad_norm": 3.046875,
      "learning_rate": 2.1214929736156868e-05,
      "loss": 0.8535,
      "step": 529110
    },
    {
      "epoch": 1.8544336848598275,
      "grad_norm": 2.921875,
      "learning_rate": 2.1214280707493166e-05,
      "loss": 0.8552,
      "step": 529120
    },
    {
      "epoch": 1.854468732366723,
      "grad_norm": 3.34375,
      "learning_rate": 2.1213631678829464e-05,
      "loss": 0.8322,
      "step": 529130
    },
    {
      "epoch": 1.8545037798736188,
      "grad_norm": 3.421875,
      "learning_rate": 2.1212982650165762e-05,
      "loss": 0.7902,
      "step": 529140
    },
    {
      "epoch": 1.8545388273805141,
      "grad_norm": 3.0,
      "learning_rate": 2.121233362150206e-05,
      "loss": 0.8239,
      "step": 529150
    },
    {
      "epoch": 1.8545738748874099,
      "grad_norm": 2.71875,
      "learning_rate": 2.121168459283836e-05,
      "loss": 0.7711,
      "step": 529160
    },
    {
      "epoch": 1.8546089223943056,
      "grad_norm": 2.859375,
      "learning_rate": 2.121103556417466e-05,
      "loss": 0.8085,
      "step": 529170
    },
    {
      "epoch": 1.854643969901201,
      "grad_norm": 2.828125,
      "learning_rate": 2.1210386535510958e-05,
      "loss": 0.7873,
      "step": 529180
    },
    {
      "epoch": 1.8546790174080967,
      "grad_norm": 2.765625,
      "learning_rate": 2.1209737506847256e-05,
      "loss": 0.755,
      "step": 529190
    },
    {
      "epoch": 1.8547140649149922,
      "grad_norm": 3.28125,
      "learning_rate": 2.120908847818355e-05,
      "loss": 0.8777,
      "step": 529200
    },
    {
      "epoch": 1.8547491124218878,
      "grad_norm": 2.59375,
      "learning_rate": 2.1208439449519848e-05,
      "loss": 0.8342,
      "step": 529210
    },
    {
      "epoch": 1.8547841599287835,
      "grad_norm": 2.65625,
      "learning_rate": 2.1207790420856146e-05,
      "loss": 0.7812,
      "step": 529220
    },
    {
      "epoch": 1.854819207435679,
      "grad_norm": 3.3125,
      "learning_rate": 2.1207141392192444e-05,
      "loss": 0.7773,
      "step": 529230
    },
    {
      "epoch": 1.8548542549425746,
      "grad_norm": 2.59375,
      "learning_rate": 2.1206492363528742e-05,
      "loss": 0.8428,
      "step": 529240
    },
    {
      "epoch": 1.8548893024494704,
      "grad_norm": 2.4375,
      "learning_rate": 2.120584333486504e-05,
      "loss": 0.7891,
      "step": 529250
    },
    {
      "epoch": 1.854924349956366,
      "grad_norm": 2.75,
      "learning_rate": 2.1205194306201338e-05,
      "loss": 0.8735,
      "step": 529260
    },
    {
      "epoch": 1.8549593974632614,
      "grad_norm": 2.859375,
      "learning_rate": 2.120454527753764e-05,
      "loss": 0.7698,
      "step": 529270
    },
    {
      "epoch": 1.8549944449701572,
      "grad_norm": 3.265625,
      "learning_rate": 2.1203896248873938e-05,
      "loss": 0.8068,
      "step": 529280
    },
    {
      "epoch": 1.8550294924770525,
      "grad_norm": 3.03125,
      "learning_rate": 2.1203247220210236e-05,
      "loss": 0.8344,
      "step": 529290
    },
    {
      "epoch": 1.8550645399839483,
      "grad_norm": 2.625,
      "learning_rate": 2.1202598191546534e-05,
      "loss": 0.7852,
      "step": 529300
    },
    {
      "epoch": 1.8550995874908438,
      "grad_norm": 2.859375,
      "learning_rate": 2.120194916288283e-05,
      "loss": 0.9134,
      "step": 529310
    },
    {
      "epoch": 1.8551346349977393,
      "grad_norm": 2.734375,
      "learning_rate": 2.120130013421913e-05,
      "loss": 0.7503,
      "step": 529320
    },
    {
      "epoch": 1.855169682504635,
      "grad_norm": 2.671875,
      "learning_rate": 2.1200651105555428e-05,
      "loss": 0.8601,
      "step": 529330
    },
    {
      "epoch": 1.8552047300115306,
      "grad_norm": 2.921875,
      "learning_rate": 2.1200002076891726e-05,
      "loss": 0.777,
      "step": 529340
    },
    {
      "epoch": 1.8552397775184262,
      "grad_norm": 2.796875,
      "learning_rate": 2.1199353048228024e-05,
      "loss": 0.7842,
      "step": 529350
    },
    {
      "epoch": 1.855274825025322,
      "grad_norm": 3.21875,
      "learning_rate": 2.119870401956432e-05,
      "loss": 0.849,
      "step": 529360
    },
    {
      "epoch": 1.8553098725322175,
      "grad_norm": 2.953125,
      "learning_rate": 2.119805499090062e-05,
      "loss": 0.8312,
      "step": 529370
    },
    {
      "epoch": 1.855344920039113,
      "grad_norm": 2.8125,
      "learning_rate": 2.1197405962236914e-05,
      "loss": 0.7601,
      "step": 529380
    },
    {
      "epoch": 1.8553799675460088,
      "grad_norm": 3.53125,
      "learning_rate": 2.1196756933573216e-05,
      "loss": 0.8359,
      "step": 529390
    },
    {
      "epoch": 1.855415015052904,
      "grad_norm": 2.609375,
      "learning_rate": 2.1196107904909514e-05,
      "loss": 0.8029,
      "step": 529400
    },
    {
      "epoch": 1.8554500625597998,
      "grad_norm": 3.46875,
      "learning_rate": 2.119545887624581e-05,
      "loss": 0.8593,
      "step": 529410
    },
    {
      "epoch": 1.8554851100666954,
      "grad_norm": 3.21875,
      "learning_rate": 2.119480984758211e-05,
      "loss": 0.8847,
      "step": 529420
    },
    {
      "epoch": 1.855520157573591,
      "grad_norm": 2.90625,
      "learning_rate": 2.1194160818918408e-05,
      "loss": 0.7761,
      "step": 529430
    },
    {
      "epoch": 1.8555552050804867,
      "grad_norm": 2.828125,
      "learning_rate": 2.1193511790254706e-05,
      "loss": 0.833,
      "step": 529440
    },
    {
      "epoch": 1.8555902525873822,
      "grad_norm": 2.375,
      "learning_rate": 2.1192862761591004e-05,
      "loss": 0.8381,
      "step": 529450
    },
    {
      "epoch": 1.8556253000942777,
      "grad_norm": 3.03125,
      "learning_rate": 2.11922137329273e-05,
      "loss": 0.9093,
      "step": 529460
    },
    {
      "epoch": 1.8556603476011735,
      "grad_norm": 2.546875,
      "learning_rate": 2.11915647042636e-05,
      "loss": 0.7882,
      "step": 529470
    },
    {
      "epoch": 1.855695395108069,
      "grad_norm": 3.171875,
      "learning_rate": 2.1190915675599898e-05,
      "loss": 0.8489,
      "step": 529480
    },
    {
      "epoch": 1.8557304426149646,
      "grad_norm": 3.0,
      "learning_rate": 2.1190266646936196e-05,
      "loss": 0.8805,
      "step": 529490
    },
    {
      "epoch": 1.8557654901218603,
      "grad_norm": 2.703125,
      "learning_rate": 2.1189617618272494e-05,
      "loss": 0.8508,
      "step": 529500
    },
    {
      "epoch": 1.8558005376287556,
      "grad_norm": 2.71875,
      "learning_rate": 2.118896858960879e-05,
      "loss": 0.775,
      "step": 529510
    },
    {
      "epoch": 1.8558355851356514,
      "grad_norm": 2.84375,
      "learning_rate": 2.1188319560945093e-05,
      "loss": 0.8357,
      "step": 529520
    },
    {
      "epoch": 1.855870632642547,
      "grad_norm": 2.546875,
      "learning_rate": 2.118767053228139e-05,
      "loss": 0.7621,
      "step": 529530
    },
    {
      "epoch": 1.8559056801494425,
      "grad_norm": 2.765625,
      "learning_rate": 2.118702150361769e-05,
      "loss": 0.81,
      "step": 529540
    },
    {
      "epoch": 1.8559407276563382,
      "grad_norm": 2.5625,
      "learning_rate": 2.1186372474953987e-05,
      "loss": 0.7958,
      "step": 529550
    },
    {
      "epoch": 1.8559757751632338,
      "grad_norm": 2.953125,
      "learning_rate": 2.1185723446290285e-05,
      "loss": 0.8028,
      "step": 529560
    },
    {
      "epoch": 1.8560108226701293,
      "grad_norm": 2.546875,
      "learning_rate": 2.118507441762658e-05,
      "loss": 0.8555,
      "step": 529570
    },
    {
      "epoch": 1.856045870177025,
      "grad_norm": 2.984375,
      "learning_rate": 2.1184425388962878e-05,
      "loss": 0.8676,
      "step": 529580
    },
    {
      "epoch": 1.8560809176839206,
      "grad_norm": 3.0625,
      "learning_rate": 2.1183776360299176e-05,
      "loss": 0.7847,
      "step": 529590
    },
    {
      "epoch": 1.8561159651908161,
      "grad_norm": 3.1875,
      "learning_rate": 2.1183127331635474e-05,
      "loss": 0.8302,
      "step": 529600
    },
    {
      "epoch": 1.8561510126977119,
      "grad_norm": 3.015625,
      "learning_rate": 2.118247830297177e-05,
      "loss": 0.8222,
      "step": 529610
    },
    {
      "epoch": 1.8561860602046072,
      "grad_norm": 3.21875,
      "learning_rate": 2.118182927430807e-05,
      "loss": 0.7931,
      "step": 529620
    },
    {
      "epoch": 1.856221107711503,
      "grad_norm": 2.703125,
      "learning_rate": 2.1181180245644368e-05,
      "loss": 0.7486,
      "step": 529630
    },
    {
      "epoch": 1.8562561552183985,
      "grad_norm": 2.96875,
      "learning_rate": 2.118053121698067e-05,
      "loss": 0.8271,
      "step": 529640
    },
    {
      "epoch": 1.856291202725294,
      "grad_norm": 2.8125,
      "learning_rate": 2.1179882188316967e-05,
      "loss": 0.7765,
      "step": 529650
    },
    {
      "epoch": 1.8563262502321898,
      "grad_norm": 3.515625,
      "learning_rate": 2.1179233159653265e-05,
      "loss": 0.7967,
      "step": 529660
    },
    {
      "epoch": 1.8563612977390853,
      "grad_norm": 2.578125,
      "learning_rate": 2.1178584130989563e-05,
      "loss": 0.8407,
      "step": 529670
    },
    {
      "epoch": 1.8563963452459809,
      "grad_norm": 2.921875,
      "learning_rate": 2.117793510232586e-05,
      "loss": 0.6768,
      "step": 529680
    },
    {
      "epoch": 1.8564313927528766,
      "grad_norm": 2.890625,
      "learning_rate": 2.117728607366216e-05,
      "loss": 0.8647,
      "step": 529690
    },
    {
      "epoch": 1.8564664402597721,
      "grad_norm": 3.046875,
      "learning_rate": 2.1176637044998457e-05,
      "loss": 0.8007,
      "step": 529700
    },
    {
      "epoch": 1.8565014877666677,
      "grad_norm": 3.703125,
      "learning_rate": 2.1175988016334755e-05,
      "loss": 0.7474,
      "step": 529710
    },
    {
      "epoch": 1.8565365352735634,
      "grad_norm": 2.9375,
      "learning_rate": 2.1175338987671053e-05,
      "loss": 0.8176,
      "step": 529720
    },
    {
      "epoch": 1.8565715827804588,
      "grad_norm": 2.984375,
      "learning_rate": 2.117468995900735e-05,
      "loss": 0.8635,
      "step": 529730
    },
    {
      "epoch": 1.8566066302873545,
      "grad_norm": 3.0,
      "learning_rate": 2.117404093034365e-05,
      "loss": 0.8041,
      "step": 529740
    },
    {
      "epoch": 1.8566416777942503,
      "grad_norm": 3.484375,
      "learning_rate": 2.1173391901679947e-05,
      "loss": 0.9691,
      "step": 529750
    },
    {
      "epoch": 1.8566767253011456,
      "grad_norm": 2.59375,
      "learning_rate": 2.1172742873016245e-05,
      "loss": 0.8556,
      "step": 529760
    },
    {
      "epoch": 1.8567117728080413,
      "grad_norm": 3.28125,
      "learning_rate": 2.1172093844352543e-05,
      "loss": 0.8483,
      "step": 529770
    },
    {
      "epoch": 1.8567468203149369,
      "grad_norm": 2.9375,
      "learning_rate": 2.117144481568884e-05,
      "loss": 0.871,
      "step": 529780
    },
    {
      "epoch": 1.8567818678218324,
      "grad_norm": 3.0,
      "learning_rate": 2.117079578702514e-05,
      "loss": 0.8516,
      "step": 529790
    },
    {
      "epoch": 1.8568169153287282,
      "grad_norm": 2.953125,
      "learning_rate": 2.1170146758361437e-05,
      "loss": 0.8151,
      "step": 529800
    },
    {
      "epoch": 1.8568519628356237,
      "grad_norm": 2.765625,
      "learning_rate": 2.1169497729697735e-05,
      "loss": 0.8921,
      "step": 529810
    },
    {
      "epoch": 1.8568870103425192,
      "grad_norm": 3.5,
      "learning_rate": 2.1168848701034033e-05,
      "loss": 0.8735,
      "step": 529820
    },
    {
      "epoch": 1.856922057849415,
      "grad_norm": 2.953125,
      "learning_rate": 2.116819967237033e-05,
      "loss": 0.8791,
      "step": 529830
    },
    {
      "epoch": 1.8569571053563105,
      "grad_norm": 2.828125,
      "learning_rate": 2.116755064370663e-05,
      "loss": 0.8144,
      "step": 529840
    },
    {
      "epoch": 1.856992152863206,
      "grad_norm": 2.921875,
      "learning_rate": 2.1166901615042927e-05,
      "loss": 0.7482,
      "step": 529850
    },
    {
      "epoch": 1.8570272003701018,
      "grad_norm": 2.828125,
      "learning_rate": 2.1166252586379225e-05,
      "loss": 0.8407,
      "step": 529860
    },
    {
      "epoch": 1.8570622478769971,
      "grad_norm": 3.21875,
      "learning_rate": 2.1165603557715523e-05,
      "loss": 0.8593,
      "step": 529870
    },
    {
      "epoch": 1.857097295383893,
      "grad_norm": 2.859375,
      "learning_rate": 2.116495452905182e-05,
      "loss": 0.7888,
      "step": 529880
    },
    {
      "epoch": 1.8571323428907884,
      "grad_norm": 2.9375,
      "learning_rate": 2.1164305500388122e-05,
      "loss": 0.8852,
      "step": 529890
    },
    {
      "epoch": 1.857167390397684,
      "grad_norm": 3.140625,
      "learning_rate": 2.116365647172442e-05,
      "loss": 0.9043,
      "step": 529900
    },
    {
      "epoch": 1.8572024379045797,
      "grad_norm": 2.796875,
      "learning_rate": 2.116300744306072e-05,
      "loss": 0.8065,
      "step": 529910
    },
    {
      "epoch": 1.8572374854114753,
      "grad_norm": 3.015625,
      "learning_rate": 2.1162358414397016e-05,
      "loss": 0.8338,
      "step": 529920
    },
    {
      "epoch": 1.8572725329183708,
      "grad_norm": 2.984375,
      "learning_rate": 2.1161709385733314e-05,
      "loss": 0.9253,
      "step": 529930
    },
    {
      "epoch": 1.8573075804252666,
      "grad_norm": 2.546875,
      "learning_rate": 2.1161060357069612e-05,
      "loss": 0.8351,
      "step": 529940
    },
    {
      "epoch": 1.857342627932162,
      "grad_norm": 2.5,
      "learning_rate": 2.1160411328405907e-05,
      "loss": 0.9088,
      "step": 529950
    },
    {
      "epoch": 1.8573776754390576,
      "grad_norm": 3.515625,
      "learning_rate": 2.1159762299742205e-05,
      "loss": 0.7975,
      "step": 529960
    },
    {
      "epoch": 1.8574127229459534,
      "grad_norm": 2.96875,
      "learning_rate": 2.1159113271078503e-05,
      "loss": 0.7734,
      "step": 529970
    },
    {
      "epoch": 1.8574477704528487,
      "grad_norm": 2.65625,
      "learning_rate": 2.11584642424148e-05,
      "loss": 0.8278,
      "step": 529980
    },
    {
      "epoch": 1.8574828179597445,
      "grad_norm": 3.25,
      "learning_rate": 2.11578152137511e-05,
      "loss": 0.9278,
      "step": 529990
    },
    {
      "epoch": 1.85751786546664,
      "grad_norm": 3.46875,
      "learning_rate": 2.11571661850874e-05,
      "loss": 0.8815,
      "step": 530000
    },
    {
      "epoch": 1.85751786546664,
      "eval_loss": 0.7766532301902771,
      "eval_runtime": 564.111,
      "eval_samples_per_second": 674.399,
      "eval_steps_per_second": 56.2,
      "step": 530000
    },
    {
      "epoch": 1.8575529129735355,
      "grad_norm": 3.328125,
      "learning_rate": 2.11565171564237e-05,
      "loss": 0.7847,
      "step": 530010
    },
    {
      "epoch": 1.8575879604804313,
      "grad_norm": 2.71875,
      "learning_rate": 2.1155868127759996e-05,
      "loss": 0.7382,
      "step": 530020
    },
    {
      "epoch": 1.8576230079873268,
      "grad_norm": 2.625,
      "learning_rate": 2.1155219099096294e-05,
      "loss": 0.8418,
      "step": 530030
    },
    {
      "epoch": 1.8576580554942224,
      "grad_norm": 2.90625,
      "learning_rate": 2.1154570070432592e-05,
      "loss": 0.7347,
      "step": 530040
    },
    {
      "epoch": 1.8576931030011181,
      "grad_norm": 2.859375,
      "learning_rate": 2.115392104176889e-05,
      "loss": 0.868,
      "step": 530050
    },
    {
      "epoch": 1.8577281505080137,
      "grad_norm": 3.484375,
      "learning_rate": 2.115327201310519e-05,
      "loss": 0.749,
      "step": 530060
    },
    {
      "epoch": 1.8577631980149092,
      "grad_norm": 2.828125,
      "learning_rate": 2.1152622984441486e-05,
      "loss": 0.7489,
      "step": 530070
    },
    {
      "epoch": 1.857798245521805,
      "grad_norm": 2.578125,
      "learning_rate": 2.1151973955777784e-05,
      "loss": 0.8105,
      "step": 530080
    },
    {
      "epoch": 1.8578332930287003,
      "grad_norm": 3.125,
      "learning_rate": 2.1151324927114082e-05,
      "loss": 0.868,
      "step": 530090
    },
    {
      "epoch": 1.857868340535596,
      "grad_norm": 2.4375,
      "learning_rate": 2.115067589845038e-05,
      "loss": 0.8807,
      "step": 530100
    },
    {
      "epoch": 1.8579033880424916,
      "grad_norm": 2.96875,
      "learning_rate": 2.115002686978668e-05,
      "loss": 0.8289,
      "step": 530110
    },
    {
      "epoch": 1.857938435549387,
      "grad_norm": 2.859375,
      "learning_rate": 2.1149377841122976e-05,
      "loss": 0.779,
      "step": 530120
    },
    {
      "epoch": 1.8579734830562828,
      "grad_norm": 2.90625,
      "learning_rate": 2.1148728812459274e-05,
      "loss": 0.8362,
      "step": 530130
    },
    {
      "epoch": 1.8580085305631784,
      "grad_norm": 3.09375,
      "learning_rate": 2.1148079783795572e-05,
      "loss": 0.8028,
      "step": 530140
    },
    {
      "epoch": 1.858043578070074,
      "grad_norm": 2.53125,
      "learning_rate": 2.114743075513187e-05,
      "loss": 0.8535,
      "step": 530150
    },
    {
      "epoch": 1.8580786255769697,
      "grad_norm": 3.203125,
      "learning_rate": 2.114678172646817e-05,
      "loss": 0.8923,
      "step": 530160
    },
    {
      "epoch": 1.8581136730838652,
      "grad_norm": 2.84375,
      "learning_rate": 2.1146132697804466e-05,
      "loss": 0.8466,
      "step": 530170
    },
    {
      "epoch": 1.8581487205907607,
      "grad_norm": 2.640625,
      "learning_rate": 2.1145483669140764e-05,
      "loss": 0.9038,
      "step": 530180
    },
    {
      "epoch": 1.8581837680976565,
      "grad_norm": 3.296875,
      "learning_rate": 2.1144834640477062e-05,
      "loss": 0.7934,
      "step": 530190
    },
    {
      "epoch": 1.8582188156045518,
      "grad_norm": 3.09375,
      "learning_rate": 2.114418561181336e-05,
      "loss": 0.8032,
      "step": 530200
    },
    {
      "epoch": 1.8582538631114476,
      "grad_norm": 2.59375,
      "learning_rate": 2.114353658314966e-05,
      "loss": 0.7827,
      "step": 530210
    },
    {
      "epoch": 1.8582889106183431,
      "grad_norm": 2.8125,
      "learning_rate": 2.1142887554485956e-05,
      "loss": 0.7266,
      "step": 530220
    },
    {
      "epoch": 1.8583239581252387,
      "grad_norm": 3.203125,
      "learning_rate": 2.1142238525822254e-05,
      "loss": 0.8395,
      "step": 530230
    },
    {
      "epoch": 1.8583590056321344,
      "grad_norm": 2.75,
      "learning_rate": 2.1141589497158552e-05,
      "loss": 0.8696,
      "step": 530240
    },
    {
      "epoch": 1.85839405313903,
      "grad_norm": 2.65625,
      "learning_rate": 2.114094046849485e-05,
      "loss": 0.8196,
      "step": 530250
    },
    {
      "epoch": 1.8584291006459255,
      "grad_norm": 3.328125,
      "learning_rate": 2.1140291439831152e-05,
      "loss": 0.8879,
      "step": 530260
    },
    {
      "epoch": 1.8584641481528212,
      "grad_norm": 3.515625,
      "learning_rate": 2.113964241116745e-05,
      "loss": 0.8988,
      "step": 530270
    },
    {
      "epoch": 1.8584991956597168,
      "grad_norm": 2.734375,
      "learning_rate": 2.1138993382503748e-05,
      "loss": 0.8275,
      "step": 530280
    },
    {
      "epoch": 1.8585342431666123,
      "grad_norm": 2.78125,
      "learning_rate": 2.1138344353840046e-05,
      "loss": 0.8248,
      "step": 530290
    },
    {
      "epoch": 1.858569290673508,
      "grad_norm": 2.703125,
      "learning_rate": 2.1137695325176344e-05,
      "loss": 0.7714,
      "step": 530300
    },
    {
      "epoch": 1.8586043381804034,
      "grad_norm": 3.078125,
      "learning_rate": 2.1137046296512642e-05,
      "loss": 0.8398,
      "step": 530310
    },
    {
      "epoch": 1.8586393856872991,
      "grad_norm": 3.0625,
      "learning_rate": 2.1136397267848936e-05,
      "loss": 0.8056,
      "step": 530320
    },
    {
      "epoch": 1.858674433194195,
      "grad_norm": 2.515625,
      "learning_rate": 2.1135748239185234e-05,
      "loss": 0.8224,
      "step": 530330
    },
    {
      "epoch": 1.8587094807010902,
      "grad_norm": 3.140625,
      "learning_rate": 2.1135099210521532e-05,
      "loss": 0.8474,
      "step": 530340
    },
    {
      "epoch": 1.858744528207986,
      "grad_norm": 2.78125,
      "learning_rate": 2.113445018185783e-05,
      "loss": 0.7985,
      "step": 530350
    },
    {
      "epoch": 1.8587795757148815,
      "grad_norm": 2.890625,
      "learning_rate": 2.113380115319413e-05,
      "loss": 0.8411,
      "step": 530360
    },
    {
      "epoch": 1.858814623221777,
      "grad_norm": 2.734375,
      "learning_rate": 2.113315212453043e-05,
      "loss": 0.7783,
      "step": 530370
    },
    {
      "epoch": 1.8588496707286728,
      "grad_norm": 2.84375,
      "learning_rate": 2.1132503095866728e-05,
      "loss": 0.7674,
      "step": 530380
    },
    {
      "epoch": 1.8588847182355683,
      "grad_norm": 3.015625,
      "learning_rate": 2.1131854067203026e-05,
      "loss": 0.7369,
      "step": 530390
    },
    {
      "epoch": 1.8589197657424639,
      "grad_norm": 2.765625,
      "learning_rate": 2.1131205038539324e-05,
      "loss": 0.83,
      "step": 530400
    },
    {
      "epoch": 1.8589548132493596,
      "grad_norm": 3.3125,
      "learning_rate": 2.1130556009875622e-05,
      "loss": 0.8692,
      "step": 530410
    },
    {
      "epoch": 1.858989860756255,
      "grad_norm": 3.15625,
      "learning_rate": 2.112990698121192e-05,
      "loss": 0.8287,
      "step": 530420
    },
    {
      "epoch": 1.8590249082631507,
      "grad_norm": 2.5,
      "learning_rate": 2.1129257952548218e-05,
      "loss": 0.7791,
      "step": 530430
    },
    {
      "epoch": 1.8590599557700465,
      "grad_norm": 3.078125,
      "learning_rate": 2.1128608923884516e-05,
      "loss": 0.8227,
      "step": 530440
    },
    {
      "epoch": 1.8590950032769418,
      "grad_norm": 3.203125,
      "learning_rate": 2.1127959895220814e-05,
      "loss": 0.8425,
      "step": 530450
    },
    {
      "epoch": 1.8591300507838375,
      "grad_norm": 2.53125,
      "learning_rate": 2.1127310866557112e-05,
      "loss": 0.7931,
      "step": 530460
    },
    {
      "epoch": 1.859165098290733,
      "grad_norm": 2.828125,
      "learning_rate": 2.112666183789341e-05,
      "loss": 0.8884,
      "step": 530470
    },
    {
      "epoch": 1.8592001457976286,
      "grad_norm": 2.765625,
      "learning_rate": 2.1126012809229708e-05,
      "loss": 0.7928,
      "step": 530480
    },
    {
      "epoch": 1.8592351933045244,
      "grad_norm": 2.734375,
      "learning_rate": 2.1125363780566006e-05,
      "loss": 0.7753,
      "step": 530490
    },
    {
      "epoch": 1.85927024081142,
      "grad_norm": 2.890625,
      "learning_rate": 2.1124714751902304e-05,
      "loss": 0.8237,
      "step": 530500
    },
    {
      "epoch": 1.8593052883183154,
      "grad_norm": 3.09375,
      "learning_rate": 2.1124065723238602e-05,
      "loss": 0.842,
      "step": 530510
    },
    {
      "epoch": 1.8593403358252112,
      "grad_norm": 2.8125,
      "learning_rate": 2.11234166945749e-05,
      "loss": 0.8401,
      "step": 530520
    },
    {
      "epoch": 1.8593753833321067,
      "grad_norm": 3.21875,
      "learning_rate": 2.1122767665911198e-05,
      "loss": 0.8773,
      "step": 530530
    },
    {
      "epoch": 1.8594104308390023,
      "grad_norm": 3.234375,
      "learning_rate": 2.1122118637247496e-05,
      "loss": 0.8096,
      "step": 530540
    },
    {
      "epoch": 1.859445478345898,
      "grad_norm": 3.078125,
      "learning_rate": 2.1121469608583794e-05,
      "loss": 0.7767,
      "step": 530550
    },
    {
      "epoch": 1.8594805258527933,
      "grad_norm": 2.640625,
      "learning_rate": 2.1120820579920092e-05,
      "loss": 0.807,
      "step": 530560
    },
    {
      "epoch": 1.859515573359689,
      "grad_norm": 2.390625,
      "learning_rate": 2.112017155125639e-05,
      "loss": 0.8084,
      "step": 530570
    },
    {
      "epoch": 1.8595506208665846,
      "grad_norm": 3.671875,
      "learning_rate": 2.1119522522592688e-05,
      "loss": 0.7898,
      "step": 530580
    },
    {
      "epoch": 1.8595856683734802,
      "grad_norm": 2.453125,
      "learning_rate": 2.1118873493928986e-05,
      "loss": 0.7712,
      "step": 530590
    },
    {
      "epoch": 1.859620715880376,
      "grad_norm": 2.8125,
      "learning_rate": 2.1118224465265284e-05,
      "loss": 0.8472,
      "step": 530600
    },
    {
      "epoch": 1.8596557633872715,
      "grad_norm": 3.328125,
      "learning_rate": 2.1117575436601582e-05,
      "loss": 0.8266,
      "step": 530610
    },
    {
      "epoch": 1.859690810894167,
      "grad_norm": 2.625,
      "learning_rate": 2.1116926407937883e-05,
      "loss": 0.8193,
      "step": 530620
    },
    {
      "epoch": 1.8597258584010627,
      "grad_norm": 2.859375,
      "learning_rate": 2.111627737927418e-05,
      "loss": 0.8894,
      "step": 530630
    },
    {
      "epoch": 1.8597609059079583,
      "grad_norm": 3.109375,
      "learning_rate": 2.111562835061048e-05,
      "loss": 0.7647,
      "step": 530640
    },
    {
      "epoch": 1.8597959534148538,
      "grad_norm": 2.71875,
      "learning_rate": 2.1114979321946777e-05,
      "loss": 0.7826,
      "step": 530650
    },
    {
      "epoch": 1.8598310009217496,
      "grad_norm": 2.859375,
      "learning_rate": 2.1114330293283075e-05,
      "loss": 0.86,
      "step": 530660
    },
    {
      "epoch": 1.859866048428645,
      "grad_norm": 2.78125,
      "learning_rate": 2.1113681264619373e-05,
      "loss": 0.8505,
      "step": 530670
    },
    {
      "epoch": 1.8599010959355406,
      "grad_norm": 3.140625,
      "learning_rate": 2.111303223595567e-05,
      "loss": 0.8071,
      "step": 530680
    },
    {
      "epoch": 1.8599361434424362,
      "grad_norm": 3.875,
      "learning_rate": 2.111238320729197e-05,
      "loss": 0.7708,
      "step": 530690
    },
    {
      "epoch": 1.8599711909493317,
      "grad_norm": 2.609375,
      "learning_rate": 2.1111734178628264e-05,
      "loss": 0.8001,
      "step": 530700
    },
    {
      "epoch": 1.8600062384562275,
      "grad_norm": 2.984375,
      "learning_rate": 2.1111085149964562e-05,
      "loss": 0.8329,
      "step": 530710
    },
    {
      "epoch": 1.860041285963123,
      "grad_norm": 2.8125,
      "learning_rate": 2.111043612130086e-05,
      "loss": 0.8359,
      "step": 530720
    },
    {
      "epoch": 1.8600763334700186,
      "grad_norm": 2.796875,
      "learning_rate": 2.1109787092637158e-05,
      "loss": 0.7956,
      "step": 530730
    },
    {
      "epoch": 1.8601113809769143,
      "grad_norm": 2.6875,
      "learning_rate": 2.110913806397346e-05,
      "loss": 0.8844,
      "step": 530740
    },
    {
      "epoch": 1.8601464284838098,
      "grad_norm": 3.171875,
      "learning_rate": 2.1108489035309757e-05,
      "loss": 0.86,
      "step": 530750
    },
    {
      "epoch": 1.8601814759907054,
      "grad_norm": 2.984375,
      "learning_rate": 2.1107840006646055e-05,
      "loss": 0.904,
      "step": 530760
    },
    {
      "epoch": 1.8602165234976011,
      "grad_norm": 2.921875,
      "learning_rate": 2.1107190977982353e-05,
      "loss": 0.8502,
      "step": 530770
    },
    {
      "epoch": 1.8602515710044965,
      "grad_norm": 2.921875,
      "learning_rate": 2.110654194931865e-05,
      "loss": 0.7889,
      "step": 530780
    },
    {
      "epoch": 1.8602866185113922,
      "grad_norm": 2.9375,
      "learning_rate": 2.110589292065495e-05,
      "loss": 0.8913,
      "step": 530790
    },
    {
      "epoch": 1.8603216660182877,
      "grad_norm": 2.984375,
      "learning_rate": 2.1105243891991247e-05,
      "loss": 0.8179,
      "step": 530800
    },
    {
      "epoch": 1.8603567135251833,
      "grad_norm": 2.796875,
      "learning_rate": 2.1104594863327545e-05,
      "loss": 0.8454,
      "step": 530810
    },
    {
      "epoch": 1.860391761032079,
      "grad_norm": 3.046875,
      "learning_rate": 2.1103945834663843e-05,
      "loss": 0.8442,
      "step": 530820
    },
    {
      "epoch": 1.8604268085389746,
      "grad_norm": 3.015625,
      "learning_rate": 2.110329680600014e-05,
      "loss": 0.8052,
      "step": 530830
    },
    {
      "epoch": 1.86046185604587,
      "grad_norm": 2.921875,
      "learning_rate": 2.110264777733644e-05,
      "loss": 0.8449,
      "step": 530840
    },
    {
      "epoch": 1.8604969035527659,
      "grad_norm": 2.640625,
      "learning_rate": 2.1101998748672737e-05,
      "loss": 0.8476,
      "step": 530850
    },
    {
      "epoch": 1.8605319510596614,
      "grad_norm": 3.34375,
      "learning_rate": 2.1101349720009035e-05,
      "loss": 0.8479,
      "step": 530860
    },
    {
      "epoch": 1.860566998566557,
      "grad_norm": 3.140625,
      "learning_rate": 2.1100700691345333e-05,
      "loss": 0.8466,
      "step": 530870
    },
    {
      "epoch": 1.8606020460734527,
      "grad_norm": 3.421875,
      "learning_rate": 2.1100051662681635e-05,
      "loss": 0.7677,
      "step": 530880
    },
    {
      "epoch": 1.860637093580348,
      "grad_norm": 2.8125,
      "learning_rate": 2.109940263401793e-05,
      "loss": 0.8001,
      "step": 530890
    },
    {
      "epoch": 1.8606721410872438,
      "grad_norm": 2.859375,
      "learning_rate": 2.1098753605354227e-05,
      "loss": 0.7458,
      "step": 530900
    },
    {
      "epoch": 1.8607071885941393,
      "grad_norm": 2.421875,
      "learning_rate": 2.1098104576690525e-05,
      "loss": 0.8053,
      "step": 530910
    },
    {
      "epoch": 1.8607422361010348,
      "grad_norm": 2.8125,
      "learning_rate": 2.1097455548026823e-05,
      "loss": 0.802,
      "step": 530920
    },
    {
      "epoch": 1.8607772836079306,
      "grad_norm": 2.8125,
      "learning_rate": 2.109680651936312e-05,
      "loss": 0.8521,
      "step": 530930
    },
    {
      "epoch": 1.8608123311148261,
      "grad_norm": 2.875,
      "learning_rate": 2.109615749069942e-05,
      "loss": 0.8578,
      "step": 530940
    },
    {
      "epoch": 1.8608473786217217,
      "grad_norm": 2.765625,
      "learning_rate": 2.1095508462035717e-05,
      "loss": 0.8095,
      "step": 530950
    },
    {
      "epoch": 1.8608824261286174,
      "grad_norm": 2.828125,
      "learning_rate": 2.1094859433372015e-05,
      "loss": 0.8325,
      "step": 530960
    },
    {
      "epoch": 1.860917473635513,
      "grad_norm": 3.140625,
      "learning_rate": 2.1094210404708313e-05,
      "loss": 0.9207,
      "step": 530970
    },
    {
      "epoch": 1.8609525211424085,
      "grad_norm": 3.109375,
      "learning_rate": 2.109356137604461e-05,
      "loss": 0.8851,
      "step": 530980
    },
    {
      "epoch": 1.8609875686493043,
      "grad_norm": 2.6875,
      "learning_rate": 2.1092912347380913e-05,
      "loss": 0.846,
      "step": 530990
    },
    {
      "epoch": 1.8610226161561996,
      "grad_norm": 3.09375,
      "learning_rate": 2.109226331871721e-05,
      "loss": 0.8213,
      "step": 531000
    },
    {
      "epoch": 1.8610576636630953,
      "grad_norm": 3.34375,
      "learning_rate": 2.109161429005351e-05,
      "loss": 0.8336,
      "step": 531010
    },
    {
      "epoch": 1.861092711169991,
      "grad_norm": 2.71875,
      "learning_rate": 2.1090965261389807e-05,
      "loss": 0.8507,
      "step": 531020
    },
    {
      "epoch": 1.8611277586768864,
      "grad_norm": 3.171875,
      "learning_rate": 2.1090316232726105e-05,
      "loss": 0.8579,
      "step": 531030
    },
    {
      "epoch": 1.8611628061837822,
      "grad_norm": 2.78125,
      "learning_rate": 2.1089667204062403e-05,
      "loss": 0.8184,
      "step": 531040
    },
    {
      "epoch": 1.8611978536906777,
      "grad_norm": 2.65625,
      "learning_rate": 2.10890181753987e-05,
      "loss": 0.8939,
      "step": 531050
    },
    {
      "epoch": 1.8612329011975732,
      "grad_norm": 2.96875,
      "learning_rate": 2.1088369146735e-05,
      "loss": 0.8022,
      "step": 531060
    },
    {
      "epoch": 1.861267948704469,
      "grad_norm": 2.9375,
      "learning_rate": 2.1087720118071297e-05,
      "loss": 0.818,
      "step": 531070
    },
    {
      "epoch": 1.8613029962113645,
      "grad_norm": 3.453125,
      "learning_rate": 2.108707108940759e-05,
      "loss": 0.8123,
      "step": 531080
    },
    {
      "epoch": 1.86133804371826,
      "grad_norm": 3.03125,
      "learning_rate": 2.108642206074389e-05,
      "loss": 0.8234,
      "step": 531090
    },
    {
      "epoch": 1.8613730912251558,
      "grad_norm": 3.40625,
      "learning_rate": 2.108577303208019e-05,
      "loss": 0.8459,
      "step": 531100
    },
    {
      "epoch": 1.8614081387320511,
      "grad_norm": 2.84375,
      "learning_rate": 2.108512400341649e-05,
      "loss": 0.8099,
      "step": 531110
    },
    {
      "epoch": 1.861443186238947,
      "grad_norm": 3.015625,
      "learning_rate": 2.1084474974752787e-05,
      "loss": 0.8615,
      "step": 531120
    },
    {
      "epoch": 1.8614782337458426,
      "grad_norm": 2.5625,
      "learning_rate": 2.1083825946089085e-05,
      "loss": 0.8177,
      "step": 531130
    },
    {
      "epoch": 1.861513281252738,
      "grad_norm": 2.59375,
      "learning_rate": 2.1083176917425383e-05,
      "loss": 0.8463,
      "step": 531140
    },
    {
      "epoch": 1.8615483287596337,
      "grad_norm": 3.015625,
      "learning_rate": 2.108252788876168e-05,
      "loss": 0.8872,
      "step": 531150
    },
    {
      "epoch": 1.8615833762665293,
      "grad_norm": 2.859375,
      "learning_rate": 2.108187886009798e-05,
      "loss": 0.8096,
      "step": 531160
    },
    {
      "epoch": 1.8616184237734248,
      "grad_norm": 2.5625,
      "learning_rate": 2.1081229831434277e-05,
      "loss": 0.8008,
      "step": 531170
    },
    {
      "epoch": 1.8616534712803205,
      "grad_norm": 2.859375,
      "learning_rate": 2.1080580802770575e-05,
      "loss": 0.7831,
      "step": 531180
    },
    {
      "epoch": 1.861688518787216,
      "grad_norm": 2.5625,
      "learning_rate": 2.1079931774106873e-05,
      "loss": 0.7779,
      "step": 531190
    },
    {
      "epoch": 1.8617235662941116,
      "grad_norm": 3.09375,
      "learning_rate": 2.107928274544317e-05,
      "loss": 0.8439,
      "step": 531200
    },
    {
      "epoch": 1.8617586138010074,
      "grad_norm": 2.90625,
      "learning_rate": 2.107863371677947e-05,
      "loss": 0.7716,
      "step": 531210
    },
    {
      "epoch": 1.861793661307903,
      "grad_norm": 2.640625,
      "learning_rate": 2.1077984688115767e-05,
      "loss": 0.7808,
      "step": 531220
    },
    {
      "epoch": 1.8618287088147984,
      "grad_norm": 3.40625,
      "learning_rate": 2.1077335659452065e-05,
      "loss": 0.8837,
      "step": 531230
    },
    {
      "epoch": 1.8618637563216942,
      "grad_norm": 3.21875,
      "learning_rate": 2.1076686630788366e-05,
      "loss": 0.835,
      "step": 531240
    },
    {
      "epoch": 1.8618988038285895,
      "grad_norm": 2.828125,
      "learning_rate": 2.1076037602124664e-05,
      "loss": 0.7828,
      "step": 531250
    },
    {
      "epoch": 1.8619338513354853,
      "grad_norm": 2.859375,
      "learning_rate": 2.107538857346096e-05,
      "loss": 0.8296,
      "step": 531260
    },
    {
      "epoch": 1.8619688988423808,
      "grad_norm": 2.578125,
      "learning_rate": 2.1074739544797257e-05,
      "loss": 0.8097,
      "step": 531270
    },
    {
      "epoch": 1.8620039463492764,
      "grad_norm": 3.546875,
      "learning_rate": 2.1074090516133555e-05,
      "loss": 0.7984,
      "step": 531280
    },
    {
      "epoch": 1.862038993856172,
      "grad_norm": 3.09375,
      "learning_rate": 2.1073441487469853e-05,
      "loss": 0.8125,
      "step": 531290
    },
    {
      "epoch": 1.8620740413630676,
      "grad_norm": 2.5,
      "learning_rate": 2.107279245880615e-05,
      "loss": 0.7563,
      "step": 531300
    },
    {
      "epoch": 1.8621090888699632,
      "grad_norm": 2.59375,
      "learning_rate": 2.107214343014245e-05,
      "loss": 0.8068,
      "step": 531310
    },
    {
      "epoch": 1.862144136376859,
      "grad_norm": 3.28125,
      "learning_rate": 2.1071494401478747e-05,
      "loss": 0.8306,
      "step": 531320
    },
    {
      "epoch": 1.8621791838837545,
      "grad_norm": 3.421875,
      "learning_rate": 2.1070845372815045e-05,
      "loss": 0.8921,
      "step": 531330
    },
    {
      "epoch": 1.86221423139065,
      "grad_norm": 2.640625,
      "learning_rate": 2.1070196344151343e-05,
      "loss": 0.8488,
      "step": 531340
    },
    {
      "epoch": 1.8622492788975458,
      "grad_norm": 2.75,
      "learning_rate": 2.106954731548764e-05,
      "loss": 0.8078,
      "step": 531350
    },
    {
      "epoch": 1.862284326404441,
      "grad_norm": 2.5625,
      "learning_rate": 2.1068898286823942e-05,
      "loss": 0.7733,
      "step": 531360
    },
    {
      "epoch": 1.8623193739113368,
      "grad_norm": 2.515625,
      "learning_rate": 2.106824925816024e-05,
      "loss": 0.7777,
      "step": 531370
    },
    {
      "epoch": 1.8623544214182324,
      "grad_norm": 2.609375,
      "learning_rate": 2.1067600229496538e-05,
      "loss": 0.8422,
      "step": 531380
    },
    {
      "epoch": 1.862389468925128,
      "grad_norm": 3.125,
      "learning_rate": 2.1066951200832836e-05,
      "loss": 0.7968,
      "step": 531390
    },
    {
      "epoch": 1.8624245164320237,
      "grad_norm": 2.84375,
      "learning_rate": 2.1066302172169134e-05,
      "loss": 0.7813,
      "step": 531400
    },
    {
      "epoch": 1.8624595639389192,
      "grad_norm": 3.03125,
      "learning_rate": 2.1065653143505432e-05,
      "loss": 0.7821,
      "step": 531410
    },
    {
      "epoch": 1.8624946114458147,
      "grad_norm": 2.875,
      "learning_rate": 2.106500411484173e-05,
      "loss": 0.893,
      "step": 531420
    },
    {
      "epoch": 1.8625296589527105,
      "grad_norm": 2.71875,
      "learning_rate": 2.1064355086178028e-05,
      "loss": 0.7556,
      "step": 531430
    },
    {
      "epoch": 1.862564706459606,
      "grad_norm": 2.953125,
      "learning_rate": 2.1063706057514326e-05,
      "loss": 0.8143,
      "step": 531440
    },
    {
      "epoch": 1.8625997539665016,
      "grad_norm": 2.40625,
      "learning_rate": 2.106305702885062e-05,
      "loss": 0.815,
      "step": 531450
    },
    {
      "epoch": 1.8626348014733973,
      "grad_norm": 2.640625,
      "learning_rate": 2.106240800018692e-05,
      "loss": 0.7723,
      "step": 531460
    },
    {
      "epoch": 1.8626698489802926,
      "grad_norm": 2.96875,
      "learning_rate": 2.106175897152322e-05,
      "loss": 0.8167,
      "step": 531470
    },
    {
      "epoch": 1.8627048964871884,
      "grad_norm": 2.71875,
      "learning_rate": 2.1061109942859518e-05,
      "loss": 0.8224,
      "step": 531480
    },
    {
      "epoch": 1.862739943994084,
      "grad_norm": 3.1875,
      "learning_rate": 2.1060460914195816e-05,
      "loss": 0.8105,
      "step": 531490
    },
    {
      "epoch": 1.8627749915009795,
      "grad_norm": 2.78125,
      "learning_rate": 2.1059811885532114e-05,
      "loss": 0.8494,
      "step": 531500
    },
    {
      "epoch": 1.8628100390078752,
      "grad_norm": 3.03125,
      "learning_rate": 2.1059162856868412e-05,
      "loss": 0.8206,
      "step": 531510
    },
    {
      "epoch": 1.8628450865147708,
      "grad_norm": 3.0,
      "learning_rate": 2.105851382820471e-05,
      "loss": 0.7987,
      "step": 531520
    },
    {
      "epoch": 1.8628801340216663,
      "grad_norm": 2.84375,
      "learning_rate": 2.1057864799541008e-05,
      "loss": 0.7622,
      "step": 531530
    },
    {
      "epoch": 1.862915181528562,
      "grad_norm": 2.6875,
      "learning_rate": 2.1057215770877306e-05,
      "loss": 0.8055,
      "step": 531540
    },
    {
      "epoch": 1.8629502290354576,
      "grad_norm": 2.796875,
      "learning_rate": 2.1056566742213604e-05,
      "loss": 0.7475,
      "step": 531550
    },
    {
      "epoch": 1.8629852765423531,
      "grad_norm": 3.078125,
      "learning_rate": 2.1055917713549902e-05,
      "loss": 0.8675,
      "step": 531560
    },
    {
      "epoch": 1.8630203240492489,
      "grad_norm": 3.140625,
      "learning_rate": 2.10552686848862e-05,
      "loss": 0.9198,
      "step": 531570
    },
    {
      "epoch": 1.8630553715561442,
      "grad_norm": 2.625,
      "learning_rate": 2.1054619656222498e-05,
      "loss": 0.8464,
      "step": 531580
    },
    {
      "epoch": 1.86309041906304,
      "grad_norm": 2.796875,
      "learning_rate": 2.1053970627558796e-05,
      "loss": 0.9037,
      "step": 531590
    },
    {
      "epoch": 1.8631254665699355,
      "grad_norm": 3.109375,
      "learning_rate": 2.1053321598895094e-05,
      "loss": 0.8232,
      "step": 531600
    },
    {
      "epoch": 1.863160514076831,
      "grad_norm": 3.125,
      "learning_rate": 2.1052672570231395e-05,
      "loss": 0.8349,
      "step": 531610
    },
    {
      "epoch": 1.8631955615837268,
      "grad_norm": 2.90625,
      "learning_rate": 2.1052023541567693e-05,
      "loss": 0.8209,
      "step": 531620
    },
    {
      "epoch": 1.8632306090906223,
      "grad_norm": 3.46875,
      "learning_rate": 2.105137451290399e-05,
      "loss": 0.784,
      "step": 531630
    },
    {
      "epoch": 1.8632656565975179,
      "grad_norm": 3.0,
      "learning_rate": 2.1050725484240286e-05,
      "loss": 0.8232,
      "step": 531640
    },
    {
      "epoch": 1.8633007041044136,
      "grad_norm": 2.984375,
      "learning_rate": 2.1050076455576584e-05,
      "loss": 0.9255,
      "step": 531650
    },
    {
      "epoch": 1.8633357516113092,
      "grad_norm": 3.046875,
      "learning_rate": 2.1049427426912882e-05,
      "loss": 0.8984,
      "step": 531660
    },
    {
      "epoch": 1.8633707991182047,
      "grad_norm": 2.703125,
      "learning_rate": 2.104877839824918e-05,
      "loss": 0.8225,
      "step": 531670
    },
    {
      "epoch": 1.8634058466251004,
      "grad_norm": 3.359375,
      "learning_rate": 2.1048129369585478e-05,
      "loss": 0.8736,
      "step": 531680
    },
    {
      "epoch": 1.8634408941319958,
      "grad_norm": 3.015625,
      "learning_rate": 2.1047480340921776e-05,
      "loss": 0.7568,
      "step": 531690
    },
    {
      "epoch": 1.8634759416388915,
      "grad_norm": 3.0,
      "learning_rate": 2.1046831312258074e-05,
      "loss": 0.8214,
      "step": 531700
    },
    {
      "epoch": 1.8635109891457873,
      "grad_norm": 3.34375,
      "learning_rate": 2.1046182283594372e-05,
      "loss": 0.829,
      "step": 531710
    },
    {
      "epoch": 1.8635460366526826,
      "grad_norm": 2.78125,
      "learning_rate": 2.1045533254930673e-05,
      "loss": 0.8252,
      "step": 531720
    },
    {
      "epoch": 1.8635810841595783,
      "grad_norm": 2.75,
      "learning_rate": 2.104488422626697e-05,
      "loss": 0.7479,
      "step": 531730
    },
    {
      "epoch": 1.8636161316664739,
      "grad_norm": 3.03125,
      "learning_rate": 2.104423519760327e-05,
      "loss": 0.8699,
      "step": 531740
    },
    {
      "epoch": 1.8636511791733694,
      "grad_norm": 2.921875,
      "learning_rate": 2.1043586168939567e-05,
      "loss": 0.8794,
      "step": 531750
    },
    {
      "epoch": 1.8636862266802652,
      "grad_norm": 2.625,
      "learning_rate": 2.1042937140275865e-05,
      "loss": 0.8586,
      "step": 531760
    },
    {
      "epoch": 1.8637212741871607,
      "grad_norm": 2.953125,
      "learning_rate": 2.1042288111612163e-05,
      "loss": 0.8384,
      "step": 531770
    },
    {
      "epoch": 1.8637563216940563,
      "grad_norm": 2.984375,
      "learning_rate": 2.104163908294846e-05,
      "loss": 0.8379,
      "step": 531780
    },
    {
      "epoch": 1.863791369200952,
      "grad_norm": 2.984375,
      "learning_rate": 2.104099005428476e-05,
      "loss": 0.8848,
      "step": 531790
    },
    {
      "epoch": 1.8638264167078473,
      "grad_norm": 3.234375,
      "learning_rate": 2.1040341025621057e-05,
      "loss": 0.8668,
      "step": 531800
    },
    {
      "epoch": 1.863861464214743,
      "grad_norm": 2.921875,
      "learning_rate": 2.1039691996957355e-05,
      "loss": 0.8588,
      "step": 531810
    },
    {
      "epoch": 1.8638965117216388,
      "grad_norm": 3.5,
      "learning_rate": 2.1039042968293653e-05,
      "loss": 0.8414,
      "step": 531820
    },
    {
      "epoch": 1.8639315592285342,
      "grad_norm": 3.0,
      "learning_rate": 2.1038393939629948e-05,
      "loss": 0.8042,
      "step": 531830
    },
    {
      "epoch": 1.86396660673543,
      "grad_norm": 2.640625,
      "learning_rate": 2.103774491096625e-05,
      "loss": 0.7654,
      "step": 531840
    },
    {
      "epoch": 1.8640016542423254,
      "grad_norm": 2.9375,
      "learning_rate": 2.1037095882302547e-05,
      "loss": 0.8239,
      "step": 531850
    },
    {
      "epoch": 1.864036701749221,
      "grad_norm": 2.640625,
      "learning_rate": 2.1036446853638845e-05,
      "loss": 0.7835,
      "step": 531860
    },
    {
      "epoch": 1.8640717492561167,
      "grad_norm": 3.265625,
      "learning_rate": 2.1035797824975143e-05,
      "loss": 0.9054,
      "step": 531870
    },
    {
      "epoch": 1.8641067967630123,
      "grad_norm": 3.34375,
      "learning_rate": 2.103514879631144e-05,
      "loss": 0.8143,
      "step": 531880
    },
    {
      "epoch": 1.8641418442699078,
      "grad_norm": 3.1875,
      "learning_rate": 2.103449976764774e-05,
      "loss": 0.7951,
      "step": 531890
    },
    {
      "epoch": 1.8641768917768036,
      "grad_norm": 2.96875,
      "learning_rate": 2.1033850738984037e-05,
      "loss": 0.7525,
      "step": 531900
    },
    {
      "epoch": 1.864211939283699,
      "grad_norm": 2.5,
      "learning_rate": 2.1033201710320335e-05,
      "loss": 0.9004,
      "step": 531910
    },
    {
      "epoch": 1.8642469867905946,
      "grad_norm": 2.90625,
      "learning_rate": 2.1032552681656633e-05,
      "loss": 0.8469,
      "step": 531920
    },
    {
      "epoch": 1.8642820342974904,
      "grad_norm": 3.015625,
      "learning_rate": 2.103190365299293e-05,
      "loss": 0.9153,
      "step": 531930
    },
    {
      "epoch": 1.8643170818043857,
      "grad_norm": 2.765625,
      "learning_rate": 2.103125462432923e-05,
      "loss": 0.8638,
      "step": 531940
    },
    {
      "epoch": 1.8643521293112815,
      "grad_norm": 3.015625,
      "learning_rate": 2.1030605595665527e-05,
      "loss": 0.8124,
      "step": 531950
    },
    {
      "epoch": 1.864387176818177,
      "grad_norm": 3.15625,
      "learning_rate": 2.1029956567001825e-05,
      "loss": 0.7859,
      "step": 531960
    },
    {
      "epoch": 1.8644222243250725,
      "grad_norm": 3.015625,
      "learning_rate": 2.1029307538338123e-05,
      "loss": 0.8041,
      "step": 531970
    },
    {
      "epoch": 1.8644572718319683,
      "grad_norm": 3.671875,
      "learning_rate": 2.1028658509674425e-05,
      "loss": 0.8,
      "step": 531980
    },
    {
      "epoch": 1.8644923193388638,
      "grad_norm": 2.5,
      "learning_rate": 2.1028009481010723e-05,
      "loss": 0.759,
      "step": 531990
    },
    {
      "epoch": 1.8645273668457594,
      "grad_norm": 2.671875,
      "learning_rate": 2.102736045234702e-05,
      "loss": 0.8904,
      "step": 532000
    },
    {
      "epoch": 1.8645624143526551,
      "grad_norm": 3.359375,
      "learning_rate": 2.102671142368332e-05,
      "loss": 0.8199,
      "step": 532010
    },
    {
      "epoch": 1.8645974618595507,
      "grad_norm": 2.78125,
      "learning_rate": 2.1026062395019613e-05,
      "loss": 0.8587,
      "step": 532020
    },
    {
      "epoch": 1.8646325093664462,
      "grad_norm": 2.6875,
      "learning_rate": 2.102541336635591e-05,
      "loss": 0.8492,
      "step": 532030
    },
    {
      "epoch": 1.864667556873342,
      "grad_norm": 2.515625,
      "learning_rate": 2.102476433769221e-05,
      "loss": 0.8277,
      "step": 532040
    },
    {
      "epoch": 1.8647026043802373,
      "grad_norm": 3.046875,
      "learning_rate": 2.1024115309028507e-05,
      "loss": 0.8697,
      "step": 532050
    },
    {
      "epoch": 1.864737651887133,
      "grad_norm": 3.375,
      "learning_rate": 2.1023466280364805e-05,
      "loss": 0.8706,
      "step": 532060
    },
    {
      "epoch": 1.8647726993940286,
      "grad_norm": 3.375,
      "learning_rate": 2.1022817251701103e-05,
      "loss": 0.8767,
      "step": 532070
    },
    {
      "epoch": 1.864807746900924,
      "grad_norm": 2.53125,
      "learning_rate": 2.10221682230374e-05,
      "loss": 0.7837,
      "step": 532080
    },
    {
      "epoch": 1.8648427944078199,
      "grad_norm": 2.703125,
      "learning_rate": 2.1021519194373703e-05,
      "loss": 0.8031,
      "step": 532090
    },
    {
      "epoch": 1.8648778419147154,
      "grad_norm": 3.265625,
      "learning_rate": 2.102087016571e-05,
      "loss": 0.9066,
      "step": 532100
    },
    {
      "epoch": 1.864912889421611,
      "grad_norm": 2.484375,
      "learning_rate": 2.10202211370463e-05,
      "loss": 0.7674,
      "step": 532110
    },
    {
      "epoch": 1.8649479369285067,
      "grad_norm": 2.875,
      "learning_rate": 2.1019572108382597e-05,
      "loss": 0.8473,
      "step": 532120
    },
    {
      "epoch": 1.8649829844354022,
      "grad_norm": 2.734375,
      "learning_rate": 2.1018923079718895e-05,
      "loss": 0.8351,
      "step": 532130
    },
    {
      "epoch": 1.8650180319422978,
      "grad_norm": 2.984375,
      "learning_rate": 2.1018274051055193e-05,
      "loss": 0.8369,
      "step": 532140
    },
    {
      "epoch": 1.8650530794491935,
      "grad_norm": 3.25,
      "learning_rate": 2.101762502239149e-05,
      "loss": 0.8048,
      "step": 532150
    },
    {
      "epoch": 1.8650881269560888,
      "grad_norm": 2.828125,
      "learning_rate": 2.101697599372779e-05,
      "loss": 0.7937,
      "step": 532160
    },
    {
      "epoch": 1.8651231744629846,
      "grad_norm": 2.90625,
      "learning_rate": 2.1016326965064087e-05,
      "loss": 0.8856,
      "step": 532170
    },
    {
      "epoch": 1.8651582219698801,
      "grad_norm": 3.296875,
      "learning_rate": 2.1015677936400385e-05,
      "loss": 0.8517,
      "step": 532180
    },
    {
      "epoch": 1.8651932694767757,
      "grad_norm": 2.9375,
      "learning_rate": 2.1015028907736683e-05,
      "loss": 0.865,
      "step": 532190
    },
    {
      "epoch": 1.8652283169836714,
      "grad_norm": 3.09375,
      "learning_rate": 2.101437987907298e-05,
      "loss": 0.812,
      "step": 532200
    },
    {
      "epoch": 1.865263364490567,
      "grad_norm": 3.984375,
      "learning_rate": 2.101373085040928e-05,
      "loss": 0.8813,
      "step": 532210
    },
    {
      "epoch": 1.8652984119974625,
      "grad_norm": 2.6875,
      "learning_rate": 2.1013081821745577e-05,
      "loss": 0.7455,
      "step": 532220
    },
    {
      "epoch": 1.8653334595043582,
      "grad_norm": 3.078125,
      "learning_rate": 2.1012432793081875e-05,
      "loss": 0.8191,
      "step": 532230
    },
    {
      "epoch": 1.8653685070112538,
      "grad_norm": 3.5,
      "learning_rate": 2.1011783764418173e-05,
      "loss": 0.8502,
      "step": 532240
    },
    {
      "epoch": 1.8654035545181493,
      "grad_norm": 3.25,
      "learning_rate": 2.101113473575447e-05,
      "loss": 0.8362,
      "step": 532250
    },
    {
      "epoch": 1.865438602025045,
      "grad_norm": 2.734375,
      "learning_rate": 2.101048570709077e-05,
      "loss": 0.8271,
      "step": 532260
    },
    {
      "epoch": 1.8654736495319404,
      "grad_norm": 3.296875,
      "learning_rate": 2.1009836678427067e-05,
      "loss": 0.752,
      "step": 532270
    },
    {
      "epoch": 1.8655086970388362,
      "grad_norm": 2.84375,
      "learning_rate": 2.1009187649763365e-05,
      "loss": 0.8502,
      "step": 532280
    },
    {
      "epoch": 1.8655437445457317,
      "grad_norm": 2.84375,
      "learning_rate": 2.1008538621099663e-05,
      "loss": 0.8151,
      "step": 532290
    },
    {
      "epoch": 1.8655787920526272,
      "grad_norm": 2.671875,
      "learning_rate": 2.100788959243596e-05,
      "loss": 0.7921,
      "step": 532300
    },
    {
      "epoch": 1.865613839559523,
      "grad_norm": 2.96875,
      "learning_rate": 2.100724056377226e-05,
      "loss": 0.8475,
      "step": 532310
    },
    {
      "epoch": 1.8656488870664185,
      "grad_norm": 2.765625,
      "learning_rate": 2.1006591535108557e-05,
      "loss": 0.887,
      "step": 532320
    },
    {
      "epoch": 1.865683934573314,
      "grad_norm": 3.171875,
      "learning_rate": 2.1005942506444855e-05,
      "loss": 0.7886,
      "step": 532330
    },
    {
      "epoch": 1.8657189820802098,
      "grad_norm": 2.609375,
      "learning_rate": 2.1005293477781156e-05,
      "loss": 0.7911,
      "step": 532340
    },
    {
      "epoch": 1.8657540295871053,
      "grad_norm": 3.390625,
      "learning_rate": 2.1004644449117454e-05,
      "loss": 0.8276,
      "step": 532350
    },
    {
      "epoch": 1.8657890770940009,
      "grad_norm": 2.96875,
      "learning_rate": 2.1003995420453752e-05,
      "loss": 0.7652,
      "step": 532360
    },
    {
      "epoch": 1.8658241246008966,
      "grad_norm": 3.234375,
      "learning_rate": 2.100334639179005e-05,
      "loss": 0.8911,
      "step": 532370
    },
    {
      "epoch": 1.865859172107792,
      "grad_norm": 2.828125,
      "learning_rate": 2.1002697363126348e-05,
      "loss": 0.8525,
      "step": 532380
    },
    {
      "epoch": 1.8658942196146877,
      "grad_norm": 3.25,
      "learning_rate": 2.1002048334462643e-05,
      "loss": 0.7988,
      "step": 532390
    },
    {
      "epoch": 1.8659292671215835,
      "grad_norm": 2.921875,
      "learning_rate": 2.100139930579894e-05,
      "loss": 0.9943,
      "step": 532400
    },
    {
      "epoch": 1.8659643146284788,
      "grad_norm": 2.625,
      "learning_rate": 2.100075027713524e-05,
      "loss": 0.8255,
      "step": 532410
    },
    {
      "epoch": 1.8659993621353745,
      "grad_norm": 3.078125,
      "learning_rate": 2.1000101248471537e-05,
      "loss": 0.8572,
      "step": 532420
    },
    {
      "epoch": 1.86603440964227,
      "grad_norm": 2.53125,
      "learning_rate": 2.0999452219807835e-05,
      "loss": 0.7576,
      "step": 532430
    },
    {
      "epoch": 1.8660694571491656,
      "grad_norm": 2.765625,
      "learning_rate": 2.0998803191144133e-05,
      "loss": 0.8279,
      "step": 532440
    },
    {
      "epoch": 1.8661045046560614,
      "grad_norm": 3.515625,
      "learning_rate": 2.099815416248043e-05,
      "loss": 0.8031,
      "step": 532450
    },
    {
      "epoch": 1.866139552162957,
      "grad_norm": 2.984375,
      "learning_rate": 2.0997505133816732e-05,
      "loss": 0.9376,
      "step": 532460
    },
    {
      "epoch": 1.8661745996698524,
      "grad_norm": 2.75,
      "learning_rate": 2.099685610515303e-05,
      "loss": 0.819,
      "step": 532470
    },
    {
      "epoch": 1.8662096471767482,
      "grad_norm": 3.234375,
      "learning_rate": 2.0996207076489328e-05,
      "loss": 0.7842,
      "step": 532480
    },
    {
      "epoch": 1.8662446946836437,
      "grad_norm": 3.234375,
      "learning_rate": 2.0995558047825626e-05,
      "loss": 0.8471,
      "step": 532490
    },
    {
      "epoch": 1.8662797421905393,
      "grad_norm": 2.890625,
      "learning_rate": 2.0994909019161924e-05,
      "loss": 0.8576,
      "step": 532500
    },
    {
      "epoch": 1.866314789697435,
      "grad_norm": 3.0625,
      "learning_rate": 2.0994259990498222e-05,
      "loss": 0.8048,
      "step": 532510
    },
    {
      "epoch": 1.8663498372043303,
      "grad_norm": 3.453125,
      "learning_rate": 2.099361096183452e-05,
      "loss": 0.8376,
      "step": 532520
    },
    {
      "epoch": 1.866384884711226,
      "grad_norm": 2.796875,
      "learning_rate": 2.0992961933170818e-05,
      "loss": 0.8021,
      "step": 532530
    },
    {
      "epoch": 1.8664199322181216,
      "grad_norm": 2.984375,
      "learning_rate": 2.0992312904507116e-05,
      "loss": 0.8429,
      "step": 532540
    },
    {
      "epoch": 1.8664549797250172,
      "grad_norm": 2.53125,
      "learning_rate": 2.0991663875843414e-05,
      "loss": 0.8356,
      "step": 532550
    },
    {
      "epoch": 1.866490027231913,
      "grad_norm": 3.421875,
      "learning_rate": 2.0991014847179712e-05,
      "loss": 0.8568,
      "step": 532560
    },
    {
      "epoch": 1.8665250747388085,
      "grad_norm": 3.03125,
      "learning_rate": 2.099036581851601e-05,
      "loss": 0.7707,
      "step": 532570
    },
    {
      "epoch": 1.866560122245704,
      "grad_norm": 2.765625,
      "learning_rate": 2.0989716789852308e-05,
      "loss": 0.7805,
      "step": 532580
    },
    {
      "epoch": 1.8665951697525998,
      "grad_norm": 2.75,
      "learning_rate": 2.0989067761188606e-05,
      "loss": 0.7484,
      "step": 532590
    },
    {
      "epoch": 1.8666302172594953,
      "grad_norm": 2.78125,
      "learning_rate": 2.0988418732524904e-05,
      "loss": 0.7987,
      "step": 532600
    },
    {
      "epoch": 1.8666652647663908,
      "grad_norm": 3.125,
      "learning_rate": 2.0987769703861202e-05,
      "loss": 0.8863,
      "step": 532610
    },
    {
      "epoch": 1.8667003122732866,
      "grad_norm": 2.90625,
      "learning_rate": 2.09871206751975e-05,
      "loss": 0.7949,
      "step": 532620
    },
    {
      "epoch": 1.866735359780182,
      "grad_norm": 2.796875,
      "learning_rate": 2.0986471646533798e-05,
      "loss": 0.8258,
      "step": 532630
    },
    {
      "epoch": 1.8667704072870777,
      "grad_norm": 2.953125,
      "learning_rate": 2.0985822617870096e-05,
      "loss": 0.8106,
      "step": 532640
    },
    {
      "epoch": 1.8668054547939732,
      "grad_norm": 2.921875,
      "learning_rate": 2.0985173589206394e-05,
      "loss": 0.8195,
      "step": 532650
    },
    {
      "epoch": 1.8668405023008687,
      "grad_norm": 3.03125,
      "learning_rate": 2.0984524560542692e-05,
      "loss": 0.8473,
      "step": 532660
    },
    {
      "epoch": 1.8668755498077645,
      "grad_norm": 3.484375,
      "learning_rate": 2.098387553187899e-05,
      "loss": 0.8871,
      "step": 532670
    },
    {
      "epoch": 1.86691059731466,
      "grad_norm": 2.8125,
      "learning_rate": 2.0983226503215288e-05,
      "loss": 0.745,
      "step": 532680
    },
    {
      "epoch": 1.8669456448215556,
      "grad_norm": 2.65625,
      "learning_rate": 2.0982577474551586e-05,
      "loss": 0.8107,
      "step": 532690
    },
    {
      "epoch": 1.8669806923284513,
      "grad_norm": 3.109375,
      "learning_rate": 2.0981928445887884e-05,
      "loss": 0.8805,
      "step": 532700
    },
    {
      "epoch": 1.8670157398353469,
      "grad_norm": 3.359375,
      "learning_rate": 2.0981279417224185e-05,
      "loss": 0.8344,
      "step": 532710
    },
    {
      "epoch": 1.8670507873422424,
      "grad_norm": 2.75,
      "learning_rate": 2.0980630388560483e-05,
      "loss": 0.7914,
      "step": 532720
    },
    {
      "epoch": 1.8670858348491381,
      "grad_norm": 3.21875,
      "learning_rate": 2.097998135989678e-05,
      "loss": 0.838,
      "step": 532730
    },
    {
      "epoch": 1.8671208823560335,
      "grad_norm": 2.625,
      "learning_rate": 2.097933233123308e-05,
      "loss": 0.8342,
      "step": 532740
    },
    {
      "epoch": 1.8671559298629292,
      "grad_norm": 3.109375,
      "learning_rate": 2.0978683302569377e-05,
      "loss": 0.9047,
      "step": 532750
    },
    {
      "epoch": 1.8671909773698248,
      "grad_norm": 2.390625,
      "learning_rate": 2.0978034273905675e-05,
      "loss": 0.7956,
      "step": 532760
    },
    {
      "epoch": 1.8672260248767203,
      "grad_norm": 2.953125,
      "learning_rate": 2.097738524524197e-05,
      "loss": 0.8719,
      "step": 532770
    },
    {
      "epoch": 1.867261072383616,
      "grad_norm": 2.921875,
      "learning_rate": 2.0976736216578268e-05,
      "loss": 0.9207,
      "step": 532780
    },
    {
      "epoch": 1.8672961198905116,
      "grad_norm": 3.125,
      "learning_rate": 2.0976087187914566e-05,
      "loss": 0.8269,
      "step": 532790
    },
    {
      "epoch": 1.8673311673974071,
      "grad_norm": 3.1875,
      "learning_rate": 2.0975438159250864e-05,
      "loss": 0.8412,
      "step": 532800
    },
    {
      "epoch": 1.8673662149043029,
      "grad_norm": 3.25,
      "learning_rate": 2.0974789130587162e-05,
      "loss": 0.8125,
      "step": 532810
    },
    {
      "epoch": 1.8674012624111984,
      "grad_norm": 2.96875,
      "learning_rate": 2.0974140101923463e-05,
      "loss": 0.8843,
      "step": 532820
    },
    {
      "epoch": 1.867436309918094,
      "grad_norm": 2.84375,
      "learning_rate": 2.097349107325976e-05,
      "loss": 0.8052,
      "step": 532830
    },
    {
      "epoch": 1.8674713574249897,
      "grad_norm": 2.65625,
      "learning_rate": 2.097284204459606e-05,
      "loss": 0.8005,
      "step": 532840
    },
    {
      "epoch": 1.867506404931885,
      "grad_norm": 3.1875,
      "learning_rate": 2.0972193015932357e-05,
      "loss": 0.8657,
      "step": 532850
    },
    {
      "epoch": 1.8675414524387808,
      "grad_norm": 3.125,
      "learning_rate": 2.0971543987268655e-05,
      "loss": 0.7917,
      "step": 532860
    },
    {
      "epoch": 1.8675764999456763,
      "grad_norm": 3.046875,
      "learning_rate": 2.0970894958604953e-05,
      "loss": 0.8894,
      "step": 532870
    },
    {
      "epoch": 1.8676115474525719,
      "grad_norm": 2.734375,
      "learning_rate": 2.097024592994125e-05,
      "loss": 0.8313,
      "step": 532880
    },
    {
      "epoch": 1.8676465949594676,
      "grad_norm": 3.015625,
      "learning_rate": 2.096959690127755e-05,
      "loss": 0.8367,
      "step": 532890
    },
    {
      "epoch": 1.8676816424663631,
      "grad_norm": 2.953125,
      "learning_rate": 2.0968947872613847e-05,
      "loss": 0.8823,
      "step": 532900
    },
    {
      "epoch": 1.8677166899732587,
      "grad_norm": 2.90625,
      "learning_rate": 2.0968298843950145e-05,
      "loss": 0.8637,
      "step": 532910
    },
    {
      "epoch": 1.8677517374801544,
      "grad_norm": 2.75,
      "learning_rate": 2.0967649815286443e-05,
      "loss": 0.8127,
      "step": 532920
    },
    {
      "epoch": 1.86778678498705,
      "grad_norm": 2.734375,
      "learning_rate": 2.096700078662274e-05,
      "loss": 0.7566,
      "step": 532930
    },
    {
      "epoch": 1.8678218324939455,
      "grad_norm": 3.328125,
      "learning_rate": 2.096635175795904e-05,
      "loss": 0.8495,
      "step": 532940
    },
    {
      "epoch": 1.8678568800008413,
      "grad_norm": 3.046875,
      "learning_rate": 2.0965702729295337e-05,
      "loss": 0.7576,
      "step": 532950
    },
    {
      "epoch": 1.8678919275077366,
      "grad_norm": 3.0625,
      "learning_rate": 2.0965053700631635e-05,
      "loss": 0.7789,
      "step": 532960
    },
    {
      "epoch": 1.8679269750146323,
      "grad_norm": 2.65625,
      "learning_rate": 2.0964404671967933e-05,
      "loss": 0.7787,
      "step": 532970
    },
    {
      "epoch": 1.8679620225215279,
      "grad_norm": 2.5,
      "learning_rate": 2.096375564330423e-05,
      "loss": 0.8266,
      "step": 532980
    },
    {
      "epoch": 1.8679970700284234,
      "grad_norm": 2.84375,
      "learning_rate": 2.096310661464053e-05,
      "loss": 0.7815,
      "step": 532990
    },
    {
      "epoch": 1.8680321175353192,
      "grad_norm": 2.953125,
      "learning_rate": 2.0962457585976827e-05,
      "loss": 0.7863,
      "step": 533000
    },
    {
      "epoch": 1.8680671650422147,
      "grad_norm": 2.34375,
      "learning_rate": 2.0961808557313125e-05,
      "loss": 0.8134,
      "step": 533010
    },
    {
      "epoch": 1.8681022125491102,
      "grad_norm": 2.171875,
      "learning_rate": 2.0961159528649423e-05,
      "loss": 0.7971,
      "step": 533020
    },
    {
      "epoch": 1.868137260056006,
      "grad_norm": 2.953125,
      "learning_rate": 2.096051049998572e-05,
      "loss": 0.7597,
      "step": 533030
    },
    {
      "epoch": 1.8681723075629015,
      "grad_norm": 2.71875,
      "learning_rate": 2.095986147132202e-05,
      "loss": 0.891,
      "step": 533040
    },
    {
      "epoch": 1.868207355069797,
      "grad_norm": 3.015625,
      "learning_rate": 2.0959212442658317e-05,
      "loss": 0.8526,
      "step": 533050
    },
    {
      "epoch": 1.8682424025766928,
      "grad_norm": 2.625,
      "learning_rate": 2.0958563413994615e-05,
      "loss": 0.8673,
      "step": 533060
    },
    {
      "epoch": 1.8682774500835881,
      "grad_norm": 3.40625,
      "learning_rate": 2.0957914385330913e-05,
      "loss": 0.8792,
      "step": 533070
    },
    {
      "epoch": 1.868312497590484,
      "grad_norm": 3.03125,
      "learning_rate": 2.0957265356667215e-05,
      "loss": 0.9733,
      "step": 533080
    },
    {
      "epoch": 1.8683475450973797,
      "grad_norm": 3.171875,
      "learning_rate": 2.0956616328003513e-05,
      "loss": 0.8446,
      "step": 533090
    },
    {
      "epoch": 1.868382592604275,
      "grad_norm": 3.078125,
      "learning_rate": 2.095596729933981e-05,
      "loss": 0.8187,
      "step": 533100
    },
    {
      "epoch": 1.8684176401111707,
      "grad_norm": 3.125,
      "learning_rate": 2.095531827067611e-05,
      "loss": 0.8137,
      "step": 533110
    },
    {
      "epoch": 1.8684526876180663,
      "grad_norm": 2.6875,
      "learning_rate": 2.0954669242012407e-05,
      "loss": 0.7983,
      "step": 533120
    },
    {
      "epoch": 1.8684877351249618,
      "grad_norm": 2.96875,
      "learning_rate": 2.0954020213348705e-05,
      "loss": 0.7684,
      "step": 533130
    },
    {
      "epoch": 1.8685227826318576,
      "grad_norm": 2.859375,
      "learning_rate": 2.0953371184685003e-05,
      "loss": 0.8316,
      "step": 533140
    },
    {
      "epoch": 1.868557830138753,
      "grad_norm": 2.59375,
      "learning_rate": 2.0952722156021297e-05,
      "loss": 0.8503,
      "step": 533150
    },
    {
      "epoch": 1.8685928776456486,
      "grad_norm": 2.84375,
      "learning_rate": 2.0952073127357595e-05,
      "loss": 0.833,
      "step": 533160
    },
    {
      "epoch": 1.8686279251525444,
      "grad_norm": 3.234375,
      "learning_rate": 2.0951424098693893e-05,
      "loss": 0.7618,
      "step": 533170
    },
    {
      "epoch": 1.86866297265944,
      "grad_norm": 3.25,
      "learning_rate": 2.095077507003019e-05,
      "loss": 0.7382,
      "step": 533180
    },
    {
      "epoch": 1.8686980201663355,
      "grad_norm": 3.09375,
      "learning_rate": 2.0950126041366493e-05,
      "loss": 0.7874,
      "step": 533190
    },
    {
      "epoch": 1.8687330676732312,
      "grad_norm": 3.15625,
      "learning_rate": 2.094947701270279e-05,
      "loss": 0.8096,
      "step": 533200
    },
    {
      "epoch": 1.8687681151801265,
      "grad_norm": 2.625,
      "learning_rate": 2.094882798403909e-05,
      "loss": 0.7574,
      "step": 533210
    },
    {
      "epoch": 1.8688031626870223,
      "grad_norm": 2.984375,
      "learning_rate": 2.0948178955375387e-05,
      "loss": 0.8252,
      "step": 533220
    },
    {
      "epoch": 1.8688382101939178,
      "grad_norm": 3.625,
      "learning_rate": 2.0947529926711685e-05,
      "loss": 0.8755,
      "step": 533230
    },
    {
      "epoch": 1.8688732577008134,
      "grad_norm": 2.875,
      "learning_rate": 2.0946880898047983e-05,
      "loss": 0.8432,
      "step": 533240
    },
    {
      "epoch": 1.8689083052077091,
      "grad_norm": 3.109375,
      "learning_rate": 2.094623186938428e-05,
      "loss": 0.8667,
      "step": 533250
    },
    {
      "epoch": 1.8689433527146047,
      "grad_norm": 2.59375,
      "learning_rate": 2.094558284072058e-05,
      "loss": 0.7997,
      "step": 533260
    },
    {
      "epoch": 1.8689784002215002,
      "grad_norm": 2.859375,
      "learning_rate": 2.0944933812056877e-05,
      "loss": 0.8144,
      "step": 533270
    },
    {
      "epoch": 1.869013447728396,
      "grad_norm": 2.59375,
      "learning_rate": 2.0944284783393175e-05,
      "loss": 0.7756,
      "step": 533280
    },
    {
      "epoch": 1.8690484952352915,
      "grad_norm": 2.9375,
      "learning_rate": 2.0943635754729473e-05,
      "loss": 0.7607,
      "step": 533290
    },
    {
      "epoch": 1.869083542742187,
      "grad_norm": 3.0,
      "learning_rate": 2.094298672606577e-05,
      "loss": 0.7585,
      "step": 533300
    },
    {
      "epoch": 1.8691185902490828,
      "grad_norm": 2.765625,
      "learning_rate": 2.094233769740207e-05,
      "loss": 0.8143,
      "step": 533310
    },
    {
      "epoch": 1.869153637755978,
      "grad_norm": 2.75,
      "learning_rate": 2.0941688668738367e-05,
      "loss": 0.8586,
      "step": 533320
    },
    {
      "epoch": 1.8691886852628739,
      "grad_norm": 3.09375,
      "learning_rate": 2.0941039640074665e-05,
      "loss": 0.801,
      "step": 533330
    },
    {
      "epoch": 1.8692237327697694,
      "grad_norm": 3.21875,
      "learning_rate": 2.0940390611410963e-05,
      "loss": 0.8766,
      "step": 533340
    },
    {
      "epoch": 1.869258780276665,
      "grad_norm": 2.734375,
      "learning_rate": 2.093974158274726e-05,
      "loss": 0.8516,
      "step": 533350
    },
    {
      "epoch": 1.8692938277835607,
      "grad_norm": 2.734375,
      "learning_rate": 2.093909255408356e-05,
      "loss": 0.7827,
      "step": 533360
    },
    {
      "epoch": 1.8693288752904562,
      "grad_norm": 2.5625,
      "learning_rate": 2.0938443525419857e-05,
      "loss": 0.8614,
      "step": 533370
    },
    {
      "epoch": 1.8693639227973518,
      "grad_norm": 2.6875,
      "learning_rate": 2.0937794496756155e-05,
      "loss": 0.8291,
      "step": 533380
    },
    {
      "epoch": 1.8693989703042475,
      "grad_norm": 2.765625,
      "learning_rate": 2.0937145468092453e-05,
      "loss": 0.7453,
      "step": 533390
    },
    {
      "epoch": 1.869434017811143,
      "grad_norm": 2.59375,
      "learning_rate": 2.093649643942875e-05,
      "loss": 0.7806,
      "step": 533400
    },
    {
      "epoch": 1.8694690653180386,
      "grad_norm": 2.84375,
      "learning_rate": 2.093584741076505e-05,
      "loss": 0.8388,
      "step": 533410
    },
    {
      "epoch": 1.8695041128249343,
      "grad_norm": 2.890625,
      "learning_rate": 2.0935198382101347e-05,
      "loss": 0.9967,
      "step": 533420
    },
    {
      "epoch": 1.8695391603318297,
      "grad_norm": 3.0,
      "learning_rate": 2.0934549353437645e-05,
      "loss": 0.7843,
      "step": 533430
    },
    {
      "epoch": 1.8695742078387254,
      "grad_norm": 3.03125,
      "learning_rate": 2.0933900324773946e-05,
      "loss": 0.8241,
      "step": 533440
    },
    {
      "epoch": 1.869609255345621,
      "grad_norm": 2.890625,
      "learning_rate": 2.0933251296110244e-05,
      "loss": 0.7539,
      "step": 533450
    },
    {
      "epoch": 1.8696443028525165,
      "grad_norm": 2.578125,
      "learning_rate": 2.0932602267446542e-05,
      "loss": 0.862,
      "step": 533460
    },
    {
      "epoch": 1.8696793503594122,
      "grad_norm": 3.0,
      "learning_rate": 2.093195323878284e-05,
      "loss": 0.8626,
      "step": 533470
    },
    {
      "epoch": 1.8697143978663078,
      "grad_norm": 2.859375,
      "learning_rate": 2.0931304210119138e-05,
      "loss": 0.8919,
      "step": 533480
    },
    {
      "epoch": 1.8697494453732033,
      "grad_norm": 3.3125,
      "learning_rate": 2.0930655181455436e-05,
      "loss": 0.7951,
      "step": 533490
    },
    {
      "epoch": 1.869784492880099,
      "grad_norm": 2.734375,
      "learning_rate": 2.0930006152791734e-05,
      "loss": 0.8579,
      "step": 533500
    },
    {
      "epoch": 1.8698195403869946,
      "grad_norm": 2.9375,
      "learning_rate": 2.0929357124128032e-05,
      "loss": 0.903,
      "step": 533510
    },
    {
      "epoch": 1.8698545878938901,
      "grad_norm": 2.40625,
      "learning_rate": 2.0928708095464327e-05,
      "loss": 0.7765,
      "step": 533520
    },
    {
      "epoch": 1.869889635400786,
      "grad_norm": 2.703125,
      "learning_rate": 2.0928059066800625e-05,
      "loss": 0.7616,
      "step": 533530
    },
    {
      "epoch": 1.8699246829076812,
      "grad_norm": 2.71875,
      "learning_rate": 2.0927410038136923e-05,
      "loss": 0.8378,
      "step": 533540
    },
    {
      "epoch": 1.869959730414577,
      "grad_norm": 2.71875,
      "learning_rate": 2.092676100947322e-05,
      "loss": 0.8563,
      "step": 533550
    },
    {
      "epoch": 1.8699947779214725,
      "grad_norm": 3.203125,
      "learning_rate": 2.0926111980809522e-05,
      "loss": 0.8911,
      "step": 533560
    },
    {
      "epoch": 1.870029825428368,
      "grad_norm": 2.84375,
      "learning_rate": 2.092546295214582e-05,
      "loss": 0.8236,
      "step": 533570
    },
    {
      "epoch": 1.8700648729352638,
      "grad_norm": 2.6875,
      "learning_rate": 2.0924813923482118e-05,
      "loss": 0.8556,
      "step": 533580
    },
    {
      "epoch": 1.8700999204421593,
      "grad_norm": 3.03125,
      "learning_rate": 2.0924164894818416e-05,
      "loss": 0.8015,
      "step": 533590
    },
    {
      "epoch": 1.8701349679490549,
      "grad_norm": 2.890625,
      "learning_rate": 2.0923515866154714e-05,
      "loss": 0.8817,
      "step": 533600
    },
    {
      "epoch": 1.8701700154559506,
      "grad_norm": 3.265625,
      "learning_rate": 2.0922866837491012e-05,
      "loss": 0.8353,
      "step": 533610
    },
    {
      "epoch": 1.8702050629628462,
      "grad_norm": 3.34375,
      "learning_rate": 2.092221780882731e-05,
      "loss": 0.9022,
      "step": 533620
    },
    {
      "epoch": 1.8702401104697417,
      "grad_norm": 2.59375,
      "learning_rate": 2.0921568780163608e-05,
      "loss": 0.7987,
      "step": 533630
    },
    {
      "epoch": 1.8702751579766375,
      "grad_norm": 4.1875,
      "learning_rate": 2.0920919751499906e-05,
      "loss": 0.8492,
      "step": 533640
    },
    {
      "epoch": 1.8703102054835328,
      "grad_norm": 2.5625,
      "learning_rate": 2.0920270722836204e-05,
      "loss": 0.8918,
      "step": 533650
    },
    {
      "epoch": 1.8703452529904285,
      "grad_norm": 3.125,
      "learning_rate": 2.0919621694172502e-05,
      "loss": 0.8844,
      "step": 533660
    },
    {
      "epoch": 1.8703803004973243,
      "grad_norm": 3.34375,
      "learning_rate": 2.09189726655088e-05,
      "loss": 0.8517,
      "step": 533670
    },
    {
      "epoch": 1.8704153480042196,
      "grad_norm": 2.6875,
      "learning_rate": 2.0918323636845098e-05,
      "loss": 0.7832,
      "step": 533680
    },
    {
      "epoch": 1.8704503955111154,
      "grad_norm": 2.625,
      "learning_rate": 2.09176746081814e-05,
      "loss": 0.8281,
      "step": 533690
    },
    {
      "epoch": 1.870485443018011,
      "grad_norm": 3.0,
      "learning_rate": 2.0917025579517698e-05,
      "loss": 0.8733,
      "step": 533700
    },
    {
      "epoch": 1.8705204905249064,
      "grad_norm": 2.796875,
      "learning_rate": 2.0916376550853992e-05,
      "loss": 0.8284,
      "step": 533710
    },
    {
      "epoch": 1.8705555380318022,
      "grad_norm": 2.609375,
      "learning_rate": 2.091572752219029e-05,
      "loss": 0.8501,
      "step": 533720
    },
    {
      "epoch": 1.8705905855386977,
      "grad_norm": 3.46875,
      "learning_rate": 2.0915078493526588e-05,
      "loss": 0.8005,
      "step": 533730
    },
    {
      "epoch": 1.8706256330455933,
      "grad_norm": 2.6875,
      "learning_rate": 2.0914429464862886e-05,
      "loss": 0.8258,
      "step": 533740
    },
    {
      "epoch": 1.870660680552489,
      "grad_norm": 2.40625,
      "learning_rate": 2.0913780436199184e-05,
      "loss": 0.9174,
      "step": 533750
    },
    {
      "epoch": 1.8706957280593843,
      "grad_norm": 2.671875,
      "learning_rate": 2.0913131407535482e-05,
      "loss": 0.8512,
      "step": 533760
    },
    {
      "epoch": 1.87073077556628,
      "grad_norm": 3.015625,
      "learning_rate": 2.091248237887178e-05,
      "loss": 0.8436,
      "step": 533770
    },
    {
      "epoch": 1.8707658230731758,
      "grad_norm": 2.875,
      "learning_rate": 2.0911833350208078e-05,
      "loss": 0.8203,
      "step": 533780
    },
    {
      "epoch": 1.8708008705800712,
      "grad_norm": 2.953125,
      "learning_rate": 2.0911184321544376e-05,
      "loss": 0.758,
      "step": 533790
    },
    {
      "epoch": 1.870835918086967,
      "grad_norm": 3.28125,
      "learning_rate": 2.0910535292880674e-05,
      "loss": 0.8985,
      "step": 533800
    },
    {
      "epoch": 1.8708709655938625,
      "grad_norm": 2.78125,
      "learning_rate": 2.0909886264216976e-05,
      "loss": 0.8363,
      "step": 533810
    },
    {
      "epoch": 1.870906013100758,
      "grad_norm": 2.78125,
      "learning_rate": 2.0909237235553274e-05,
      "loss": 0.736,
      "step": 533820
    },
    {
      "epoch": 1.8709410606076537,
      "grad_norm": 2.734375,
      "learning_rate": 2.090858820688957e-05,
      "loss": 0.8345,
      "step": 533830
    },
    {
      "epoch": 1.8709761081145493,
      "grad_norm": 3.203125,
      "learning_rate": 2.090793917822587e-05,
      "loss": 0.7692,
      "step": 533840
    },
    {
      "epoch": 1.8710111556214448,
      "grad_norm": 3.28125,
      "learning_rate": 2.0907290149562168e-05,
      "loss": 0.7762,
      "step": 533850
    },
    {
      "epoch": 1.8710462031283406,
      "grad_norm": 3.1875,
      "learning_rate": 2.0906641120898466e-05,
      "loss": 0.9005,
      "step": 533860
    },
    {
      "epoch": 1.8710812506352361,
      "grad_norm": 3.109375,
      "learning_rate": 2.0905992092234764e-05,
      "loss": 0.8462,
      "step": 533870
    },
    {
      "epoch": 1.8711162981421317,
      "grad_norm": 2.609375,
      "learning_rate": 2.090534306357106e-05,
      "loss": 0.8253,
      "step": 533880
    },
    {
      "epoch": 1.8711513456490274,
      "grad_norm": 2.546875,
      "learning_rate": 2.090469403490736e-05,
      "loss": 0.8377,
      "step": 533890
    },
    {
      "epoch": 1.8711863931559227,
      "grad_norm": 2.671875,
      "learning_rate": 2.0904045006243654e-05,
      "loss": 0.7295,
      "step": 533900
    },
    {
      "epoch": 1.8712214406628185,
      "grad_norm": 2.625,
      "learning_rate": 2.0903395977579952e-05,
      "loss": 0.7325,
      "step": 533910
    },
    {
      "epoch": 1.871256488169714,
      "grad_norm": 3.03125,
      "learning_rate": 2.0902746948916254e-05,
      "loss": 0.8228,
      "step": 533920
    },
    {
      "epoch": 1.8712915356766096,
      "grad_norm": 2.4375,
      "learning_rate": 2.090209792025255e-05,
      "loss": 0.7625,
      "step": 533930
    },
    {
      "epoch": 1.8713265831835053,
      "grad_norm": 2.984375,
      "learning_rate": 2.090144889158885e-05,
      "loss": 0.867,
      "step": 533940
    },
    {
      "epoch": 1.8713616306904008,
      "grad_norm": 2.46875,
      "learning_rate": 2.0900799862925148e-05,
      "loss": 0.7917,
      "step": 533950
    },
    {
      "epoch": 1.8713966781972964,
      "grad_norm": 2.671875,
      "learning_rate": 2.0900150834261446e-05,
      "loss": 0.8056,
      "step": 533960
    },
    {
      "epoch": 1.8714317257041921,
      "grad_norm": 2.9375,
      "learning_rate": 2.0899501805597744e-05,
      "loss": 0.7545,
      "step": 533970
    },
    {
      "epoch": 1.8714667732110877,
      "grad_norm": 3.3125,
      "learning_rate": 2.089885277693404e-05,
      "loss": 0.8348,
      "step": 533980
    },
    {
      "epoch": 1.8715018207179832,
      "grad_norm": 2.8125,
      "learning_rate": 2.089820374827034e-05,
      "loss": 0.8585,
      "step": 533990
    },
    {
      "epoch": 1.871536868224879,
      "grad_norm": 3.09375,
      "learning_rate": 2.0897554719606638e-05,
      "loss": 0.8578,
      "step": 534000
    },
    {
      "epoch": 1.8715719157317743,
      "grad_norm": 3.421875,
      "learning_rate": 2.0896905690942936e-05,
      "loss": 0.9356,
      "step": 534010
    },
    {
      "epoch": 1.87160696323867,
      "grad_norm": 2.765625,
      "learning_rate": 2.0896256662279234e-05,
      "loss": 0.808,
      "step": 534020
    },
    {
      "epoch": 1.8716420107455656,
      "grad_norm": 3.328125,
      "learning_rate": 2.089560763361553e-05,
      "loss": 0.8773,
      "step": 534030
    },
    {
      "epoch": 1.8716770582524611,
      "grad_norm": 2.5,
      "learning_rate": 2.089495860495183e-05,
      "loss": 0.8402,
      "step": 534040
    },
    {
      "epoch": 1.8717121057593569,
      "grad_norm": 2.765625,
      "learning_rate": 2.0894309576288128e-05,
      "loss": 0.7745,
      "step": 534050
    },
    {
      "epoch": 1.8717471532662524,
      "grad_norm": 2.71875,
      "learning_rate": 2.089366054762443e-05,
      "loss": 0.8187,
      "step": 534060
    },
    {
      "epoch": 1.871782200773148,
      "grad_norm": 3.203125,
      "learning_rate": 2.0893011518960727e-05,
      "loss": 0.9078,
      "step": 534070
    },
    {
      "epoch": 1.8718172482800437,
      "grad_norm": 3.1875,
      "learning_rate": 2.0892362490297025e-05,
      "loss": 0.8756,
      "step": 534080
    },
    {
      "epoch": 1.8718522957869392,
      "grad_norm": 2.734375,
      "learning_rate": 2.089171346163332e-05,
      "loss": 0.7944,
      "step": 534090
    },
    {
      "epoch": 1.8718873432938348,
      "grad_norm": 2.953125,
      "learning_rate": 2.0891064432969618e-05,
      "loss": 0.7938,
      "step": 534100
    },
    {
      "epoch": 1.8719223908007305,
      "grad_norm": 3.125,
      "learning_rate": 2.0890415404305916e-05,
      "loss": 0.8619,
      "step": 534110
    },
    {
      "epoch": 1.8719574383076258,
      "grad_norm": 2.921875,
      "learning_rate": 2.0889766375642214e-05,
      "loss": 0.8056,
      "step": 534120
    },
    {
      "epoch": 1.8719924858145216,
      "grad_norm": 2.578125,
      "learning_rate": 2.088911734697851e-05,
      "loss": 0.7883,
      "step": 534130
    },
    {
      "epoch": 1.8720275333214171,
      "grad_norm": 2.75,
      "learning_rate": 2.088846831831481e-05,
      "loss": 0.9257,
      "step": 534140
    },
    {
      "epoch": 1.8720625808283127,
      "grad_norm": 2.65625,
      "learning_rate": 2.0887819289651108e-05,
      "loss": 0.8697,
      "step": 534150
    },
    {
      "epoch": 1.8720976283352084,
      "grad_norm": 3.28125,
      "learning_rate": 2.0887170260987406e-05,
      "loss": 0.8396,
      "step": 534160
    },
    {
      "epoch": 1.872132675842104,
      "grad_norm": 2.90625,
      "learning_rate": 2.0886521232323707e-05,
      "loss": 0.7727,
      "step": 534170
    },
    {
      "epoch": 1.8721677233489995,
      "grad_norm": 2.765625,
      "learning_rate": 2.0885872203660005e-05,
      "loss": 0.819,
      "step": 534180
    },
    {
      "epoch": 1.8722027708558953,
      "grad_norm": 2.75,
      "learning_rate": 2.0885223174996303e-05,
      "loss": 0.8449,
      "step": 534190
    },
    {
      "epoch": 1.8722378183627908,
      "grad_norm": 3.234375,
      "learning_rate": 2.08845741463326e-05,
      "loss": 0.9159,
      "step": 534200
    },
    {
      "epoch": 1.8722728658696863,
      "grad_norm": 2.921875,
      "learning_rate": 2.08839251176689e-05,
      "loss": 0.8953,
      "step": 534210
    },
    {
      "epoch": 1.872307913376582,
      "grad_norm": 2.578125,
      "learning_rate": 2.0883276089005197e-05,
      "loss": 0.8226,
      "step": 534220
    },
    {
      "epoch": 1.8723429608834774,
      "grad_norm": 3.3125,
      "learning_rate": 2.0882627060341495e-05,
      "loss": 0.8128,
      "step": 534230
    },
    {
      "epoch": 1.8723780083903732,
      "grad_norm": 2.8125,
      "learning_rate": 2.0881978031677793e-05,
      "loss": 0.8155,
      "step": 534240
    },
    {
      "epoch": 1.8724130558972687,
      "grad_norm": 2.796875,
      "learning_rate": 2.088132900301409e-05,
      "loss": 0.8644,
      "step": 534250
    },
    {
      "epoch": 1.8724481034041642,
      "grad_norm": 3.28125,
      "learning_rate": 2.088067997435039e-05,
      "loss": 0.8064,
      "step": 534260
    },
    {
      "epoch": 1.87248315091106,
      "grad_norm": 2.546875,
      "learning_rate": 2.0880030945686684e-05,
      "loss": 0.7388,
      "step": 534270
    },
    {
      "epoch": 1.8725181984179555,
      "grad_norm": 2.640625,
      "learning_rate": 2.087938191702298e-05,
      "loss": 0.7132,
      "step": 534280
    },
    {
      "epoch": 1.872553245924851,
      "grad_norm": 2.6875,
      "learning_rate": 2.0878732888359283e-05,
      "loss": 0.75,
      "step": 534290
    },
    {
      "epoch": 1.8725882934317468,
      "grad_norm": 2.875,
      "learning_rate": 2.087808385969558e-05,
      "loss": 0.8258,
      "step": 534300
    },
    {
      "epoch": 1.8726233409386424,
      "grad_norm": 2.703125,
      "learning_rate": 2.087743483103188e-05,
      "loss": 0.8276,
      "step": 534310
    },
    {
      "epoch": 1.872658388445538,
      "grad_norm": 3.0,
      "learning_rate": 2.0876785802368177e-05,
      "loss": 0.7716,
      "step": 534320
    },
    {
      "epoch": 1.8726934359524336,
      "grad_norm": 2.875,
      "learning_rate": 2.0876136773704475e-05,
      "loss": 0.8627,
      "step": 534330
    },
    {
      "epoch": 1.872728483459329,
      "grad_norm": 2.46875,
      "learning_rate": 2.0875487745040773e-05,
      "loss": 0.8106,
      "step": 534340
    },
    {
      "epoch": 1.8727635309662247,
      "grad_norm": 3.234375,
      "learning_rate": 2.087483871637707e-05,
      "loss": 0.7583,
      "step": 534350
    },
    {
      "epoch": 1.8727985784731205,
      "grad_norm": 2.8125,
      "learning_rate": 2.087418968771337e-05,
      "loss": 0.754,
      "step": 534360
    },
    {
      "epoch": 1.8728336259800158,
      "grad_norm": 3.421875,
      "learning_rate": 2.0873540659049667e-05,
      "loss": 0.8464,
      "step": 534370
    },
    {
      "epoch": 1.8728686734869116,
      "grad_norm": 2.9375,
      "learning_rate": 2.0872891630385965e-05,
      "loss": 0.8226,
      "step": 534380
    },
    {
      "epoch": 1.872903720993807,
      "grad_norm": 2.703125,
      "learning_rate": 2.0872242601722263e-05,
      "loss": 0.861,
      "step": 534390
    },
    {
      "epoch": 1.8729387685007026,
      "grad_norm": 2.875,
      "learning_rate": 2.087159357305856e-05,
      "loss": 0.7969,
      "step": 534400
    },
    {
      "epoch": 1.8729738160075984,
      "grad_norm": 2.96875,
      "learning_rate": 2.087094454439486e-05,
      "loss": 0.7931,
      "step": 534410
    },
    {
      "epoch": 1.873008863514494,
      "grad_norm": 3.046875,
      "learning_rate": 2.0870295515731157e-05,
      "loss": 0.7988,
      "step": 534420
    },
    {
      "epoch": 1.8730439110213895,
      "grad_norm": 2.921875,
      "learning_rate": 2.086964648706746e-05,
      "loss": 0.7681,
      "step": 534430
    },
    {
      "epoch": 1.8730789585282852,
      "grad_norm": 2.984375,
      "learning_rate": 2.0868997458403756e-05,
      "loss": 0.8404,
      "step": 534440
    },
    {
      "epoch": 1.8731140060351805,
      "grad_norm": 2.921875,
      "learning_rate": 2.0868348429740054e-05,
      "loss": 0.7693,
      "step": 534450
    },
    {
      "epoch": 1.8731490535420763,
      "grad_norm": 2.8125,
      "learning_rate": 2.086769940107635e-05,
      "loss": 0.7355,
      "step": 534460
    },
    {
      "epoch": 1.873184101048972,
      "grad_norm": 2.796875,
      "learning_rate": 2.0867050372412647e-05,
      "loss": 0.7985,
      "step": 534470
    },
    {
      "epoch": 1.8732191485558674,
      "grad_norm": 2.59375,
      "learning_rate": 2.0866401343748945e-05,
      "loss": 0.7929,
      "step": 534480
    },
    {
      "epoch": 1.873254196062763,
      "grad_norm": 2.6875,
      "learning_rate": 2.0865752315085243e-05,
      "loss": 0.8287,
      "step": 534490
    },
    {
      "epoch": 1.8732892435696586,
      "grad_norm": 2.40625,
      "learning_rate": 2.086510328642154e-05,
      "loss": 0.7611,
      "step": 534500
    },
    {
      "epoch": 1.8733242910765542,
      "grad_norm": 3.078125,
      "learning_rate": 2.086445425775784e-05,
      "loss": 0.7402,
      "step": 534510
    },
    {
      "epoch": 1.87335933858345,
      "grad_norm": 2.984375,
      "learning_rate": 2.0863805229094137e-05,
      "loss": 0.8233,
      "step": 534520
    },
    {
      "epoch": 1.8733943860903455,
      "grad_norm": 3.203125,
      "learning_rate": 2.0863156200430435e-05,
      "loss": 0.8546,
      "step": 534530
    },
    {
      "epoch": 1.873429433597241,
      "grad_norm": 2.828125,
      "learning_rate": 2.0862507171766736e-05,
      "loss": 0.7979,
      "step": 534540
    },
    {
      "epoch": 1.8734644811041368,
      "grad_norm": 3.125,
      "learning_rate": 2.0861858143103034e-05,
      "loss": 0.8928,
      "step": 534550
    },
    {
      "epoch": 1.8734995286110323,
      "grad_norm": 2.890625,
      "learning_rate": 2.0861209114439332e-05,
      "loss": 0.7882,
      "step": 534560
    },
    {
      "epoch": 1.8735345761179278,
      "grad_norm": 2.40625,
      "learning_rate": 2.086056008577563e-05,
      "loss": 0.7342,
      "step": 534570
    },
    {
      "epoch": 1.8735696236248236,
      "grad_norm": 2.359375,
      "learning_rate": 2.085991105711193e-05,
      "loss": 0.797,
      "step": 534580
    },
    {
      "epoch": 1.873604671131719,
      "grad_norm": 3.28125,
      "learning_rate": 2.0859262028448226e-05,
      "loss": 0.9312,
      "step": 534590
    },
    {
      "epoch": 1.8736397186386147,
      "grad_norm": 3.15625,
      "learning_rate": 2.0858612999784524e-05,
      "loss": 0.8244,
      "step": 534600
    },
    {
      "epoch": 1.8736747661455102,
      "grad_norm": 2.640625,
      "learning_rate": 2.0857963971120822e-05,
      "loss": 0.822,
      "step": 534610
    },
    {
      "epoch": 1.8737098136524057,
      "grad_norm": 2.453125,
      "learning_rate": 2.085731494245712e-05,
      "loss": 0.8116,
      "step": 534620
    },
    {
      "epoch": 1.8737448611593015,
      "grad_norm": 2.875,
      "learning_rate": 2.085666591379342e-05,
      "loss": 0.8416,
      "step": 534630
    },
    {
      "epoch": 1.873779908666197,
      "grad_norm": 3.203125,
      "learning_rate": 2.0856016885129716e-05,
      "loss": 0.834,
      "step": 534640
    },
    {
      "epoch": 1.8738149561730926,
      "grad_norm": 2.671875,
      "learning_rate": 2.085536785646601e-05,
      "loss": 0.8235,
      "step": 534650
    },
    {
      "epoch": 1.8738500036799883,
      "grad_norm": 3.109375,
      "learning_rate": 2.0854718827802312e-05,
      "loss": 0.841,
      "step": 534660
    },
    {
      "epoch": 1.8738850511868839,
      "grad_norm": 3.265625,
      "learning_rate": 2.085406979913861e-05,
      "loss": 0.8543,
      "step": 534670
    },
    {
      "epoch": 1.8739200986937794,
      "grad_norm": 2.921875,
      "learning_rate": 2.085342077047491e-05,
      "loss": 0.8713,
      "step": 534680
    },
    {
      "epoch": 1.8739551462006752,
      "grad_norm": 2.65625,
      "learning_rate": 2.0852771741811206e-05,
      "loss": 0.8169,
      "step": 534690
    },
    {
      "epoch": 1.8739901937075705,
      "grad_norm": 3.140625,
      "learning_rate": 2.0852122713147504e-05,
      "loss": 0.7818,
      "step": 534700
    },
    {
      "epoch": 1.8740252412144662,
      "grad_norm": 3.828125,
      "learning_rate": 2.0851473684483802e-05,
      "loss": 0.8241,
      "step": 534710
    },
    {
      "epoch": 1.8740602887213618,
      "grad_norm": 2.953125,
      "learning_rate": 2.08508246558201e-05,
      "loss": 0.8527,
      "step": 534720
    },
    {
      "epoch": 1.8740953362282573,
      "grad_norm": 2.6875,
      "learning_rate": 2.08501756271564e-05,
      "loss": 0.915,
      "step": 534730
    },
    {
      "epoch": 1.874130383735153,
      "grad_norm": 3.1875,
      "learning_rate": 2.0849526598492696e-05,
      "loss": 0.8596,
      "step": 534740
    },
    {
      "epoch": 1.8741654312420486,
      "grad_norm": 2.3125,
      "learning_rate": 2.0848877569828994e-05,
      "loss": 0.8041,
      "step": 534750
    },
    {
      "epoch": 1.8742004787489441,
      "grad_norm": 2.921875,
      "learning_rate": 2.0848228541165292e-05,
      "loss": 0.833,
      "step": 534760
    },
    {
      "epoch": 1.87423552625584,
      "grad_norm": 2.953125,
      "learning_rate": 2.084757951250159e-05,
      "loss": 0.7874,
      "step": 534770
    },
    {
      "epoch": 1.8742705737627354,
      "grad_norm": 2.984375,
      "learning_rate": 2.084693048383789e-05,
      "loss": 0.833,
      "step": 534780
    },
    {
      "epoch": 1.874305621269631,
      "grad_norm": 3.0625,
      "learning_rate": 2.084628145517419e-05,
      "loss": 0.8196,
      "step": 534790
    },
    {
      "epoch": 1.8743406687765267,
      "grad_norm": 2.890625,
      "learning_rate": 2.0845632426510488e-05,
      "loss": 0.8648,
      "step": 534800
    },
    {
      "epoch": 1.874375716283422,
      "grad_norm": 2.59375,
      "learning_rate": 2.0844983397846786e-05,
      "loss": 0.7846,
      "step": 534810
    },
    {
      "epoch": 1.8744107637903178,
      "grad_norm": 2.625,
      "learning_rate": 2.0844334369183084e-05,
      "loss": 0.7297,
      "step": 534820
    },
    {
      "epoch": 1.8744458112972133,
      "grad_norm": 3.265625,
      "learning_rate": 2.0843685340519382e-05,
      "loss": 0.9095,
      "step": 534830
    },
    {
      "epoch": 1.8744808588041089,
      "grad_norm": 2.46875,
      "learning_rate": 2.0843036311855676e-05,
      "loss": 0.8586,
      "step": 534840
    },
    {
      "epoch": 1.8745159063110046,
      "grad_norm": 2.609375,
      "learning_rate": 2.0842387283191974e-05,
      "loss": 0.8249,
      "step": 534850
    },
    {
      "epoch": 1.8745509538179002,
      "grad_norm": 3.171875,
      "learning_rate": 2.0841738254528272e-05,
      "loss": 0.8324,
      "step": 534860
    },
    {
      "epoch": 1.8745860013247957,
      "grad_norm": 2.640625,
      "learning_rate": 2.084108922586457e-05,
      "loss": 0.8171,
      "step": 534870
    },
    {
      "epoch": 1.8746210488316914,
      "grad_norm": 2.90625,
      "learning_rate": 2.084044019720087e-05,
      "loss": 0.8074,
      "step": 534880
    },
    {
      "epoch": 1.874656096338587,
      "grad_norm": 3.03125,
      "learning_rate": 2.0839791168537166e-05,
      "loss": 0.8721,
      "step": 534890
    },
    {
      "epoch": 1.8746911438454825,
      "grad_norm": 2.984375,
      "learning_rate": 2.0839142139873464e-05,
      "loss": 0.8679,
      "step": 534900
    },
    {
      "epoch": 1.8747261913523783,
      "grad_norm": 3.5625,
      "learning_rate": 2.0838493111209766e-05,
      "loss": 0.8839,
      "step": 534910
    },
    {
      "epoch": 1.8747612388592736,
      "grad_norm": 3.421875,
      "learning_rate": 2.0837844082546064e-05,
      "loss": 0.8478,
      "step": 534920
    },
    {
      "epoch": 1.8747962863661694,
      "grad_norm": 3.203125,
      "learning_rate": 2.0837195053882362e-05,
      "loss": 0.8317,
      "step": 534930
    },
    {
      "epoch": 1.8748313338730649,
      "grad_norm": 2.765625,
      "learning_rate": 2.083654602521866e-05,
      "loss": 0.8231,
      "step": 534940
    },
    {
      "epoch": 1.8748663813799604,
      "grad_norm": 2.625,
      "learning_rate": 2.0835896996554958e-05,
      "loss": 0.7713,
      "step": 534950
    },
    {
      "epoch": 1.8749014288868562,
      "grad_norm": 2.9375,
      "learning_rate": 2.0835247967891256e-05,
      "loss": 0.7991,
      "step": 534960
    },
    {
      "epoch": 1.8749364763937517,
      "grad_norm": 2.828125,
      "learning_rate": 2.0834598939227554e-05,
      "loss": 0.794,
      "step": 534970
    },
    {
      "epoch": 1.8749715239006473,
      "grad_norm": 3.3125,
      "learning_rate": 2.0833949910563852e-05,
      "loss": 0.8602,
      "step": 534980
    },
    {
      "epoch": 1.875006571407543,
      "grad_norm": 2.609375,
      "learning_rate": 2.083330088190015e-05,
      "loss": 0.8135,
      "step": 534990
    },
    {
      "epoch": 1.8750416189144385,
      "grad_norm": 2.5625,
      "learning_rate": 2.0832651853236448e-05,
      "loss": 0.785,
      "step": 535000
    },
    {
      "epoch": 1.8750416189144385,
      "eval_loss": 0.7747290134429932,
      "eval_runtime": 565.5685,
      "eval_samples_per_second": 672.661,
      "eval_steps_per_second": 56.055,
      "step": 535000
    },
    {
      "epoch": 1.875076666421334,
      "grad_norm": 2.390625,
      "learning_rate": 2.0832002824572746e-05,
      "loss": 0.7622,
      "step": 535010
    },
    {
      "epoch": 1.8751117139282298,
      "grad_norm": 3.15625,
      "learning_rate": 2.0831353795909044e-05,
      "loss": 0.7779,
      "step": 535020
    },
    {
      "epoch": 1.8751467614351252,
      "grad_norm": 2.515625,
      "learning_rate": 2.0830704767245342e-05,
      "loss": 0.7866,
      "step": 535030
    },
    {
      "epoch": 1.875181808942021,
      "grad_norm": 2.96875,
      "learning_rate": 2.083005573858164e-05,
      "loss": 0.8017,
      "step": 535040
    },
    {
      "epoch": 1.8752168564489167,
      "grad_norm": 2.703125,
      "learning_rate": 2.0829406709917938e-05,
      "loss": 0.8747,
      "step": 535050
    },
    {
      "epoch": 1.875251903955812,
      "grad_norm": 2.6875,
      "learning_rate": 2.0828757681254236e-05,
      "loss": 0.8857,
      "step": 535060
    },
    {
      "epoch": 1.8752869514627077,
      "grad_norm": 2.828125,
      "learning_rate": 2.0828108652590534e-05,
      "loss": 0.7993,
      "step": 535070
    },
    {
      "epoch": 1.8753219989696033,
      "grad_norm": 3.0625,
      "learning_rate": 2.0827459623926832e-05,
      "loss": 0.8757,
      "step": 535080
    },
    {
      "epoch": 1.8753570464764988,
      "grad_norm": 2.921875,
      "learning_rate": 2.082681059526313e-05,
      "loss": 0.7981,
      "step": 535090
    },
    {
      "epoch": 1.8753920939833946,
      "grad_norm": 3.03125,
      "learning_rate": 2.0826161566599428e-05,
      "loss": 0.8249,
      "step": 535100
    },
    {
      "epoch": 1.87542714149029,
      "grad_norm": 2.625,
      "learning_rate": 2.0825512537935726e-05,
      "loss": 0.7465,
      "step": 535110
    },
    {
      "epoch": 1.8754621889971856,
      "grad_norm": 3.109375,
      "learning_rate": 2.0824863509272024e-05,
      "loss": 0.8847,
      "step": 535120
    },
    {
      "epoch": 1.8754972365040814,
      "grad_norm": 2.84375,
      "learning_rate": 2.0824214480608322e-05,
      "loss": 0.7806,
      "step": 535130
    },
    {
      "epoch": 1.8755322840109767,
      "grad_norm": 3.109375,
      "learning_rate": 2.082356545194462e-05,
      "loss": 0.701,
      "step": 535140
    },
    {
      "epoch": 1.8755673315178725,
      "grad_norm": 3.046875,
      "learning_rate": 2.0822916423280918e-05,
      "loss": 0.8733,
      "step": 535150
    },
    {
      "epoch": 1.8756023790247682,
      "grad_norm": 2.65625,
      "learning_rate": 2.082226739461722e-05,
      "loss": 0.8475,
      "step": 535160
    },
    {
      "epoch": 1.8756374265316635,
      "grad_norm": 3.28125,
      "learning_rate": 2.0821618365953517e-05,
      "loss": 0.8626,
      "step": 535170
    },
    {
      "epoch": 1.8756724740385593,
      "grad_norm": 2.953125,
      "learning_rate": 2.0820969337289815e-05,
      "loss": 0.7939,
      "step": 535180
    },
    {
      "epoch": 1.8757075215454548,
      "grad_norm": 3.0625,
      "learning_rate": 2.0820320308626113e-05,
      "loss": 0.7917,
      "step": 535190
    },
    {
      "epoch": 1.8757425690523504,
      "grad_norm": 2.796875,
      "learning_rate": 2.081967127996241e-05,
      "loss": 0.8048,
      "step": 535200
    },
    {
      "epoch": 1.8757776165592461,
      "grad_norm": 3.375,
      "learning_rate": 2.0819022251298706e-05,
      "loss": 0.7997,
      "step": 535210
    },
    {
      "epoch": 1.8758126640661417,
      "grad_norm": 3.34375,
      "learning_rate": 2.0818373222635004e-05,
      "loss": 0.8698,
      "step": 535220
    },
    {
      "epoch": 1.8758477115730372,
      "grad_norm": 2.96875,
      "learning_rate": 2.0817724193971302e-05,
      "loss": 0.8574,
      "step": 535230
    },
    {
      "epoch": 1.875882759079933,
      "grad_norm": 2.953125,
      "learning_rate": 2.08170751653076e-05,
      "loss": 0.8393,
      "step": 535240
    },
    {
      "epoch": 1.8759178065868285,
      "grad_norm": 3.40625,
      "learning_rate": 2.0816426136643898e-05,
      "loss": 0.8783,
      "step": 535250
    },
    {
      "epoch": 1.875952854093724,
      "grad_norm": 3.1875,
      "learning_rate": 2.0815777107980196e-05,
      "loss": 0.8496,
      "step": 535260
    },
    {
      "epoch": 1.8759879016006198,
      "grad_norm": 3.203125,
      "learning_rate": 2.0815128079316497e-05,
      "loss": 0.8215,
      "step": 535270
    },
    {
      "epoch": 1.876022949107515,
      "grad_norm": 2.75,
      "learning_rate": 2.0814479050652795e-05,
      "loss": 0.7673,
      "step": 535280
    },
    {
      "epoch": 1.8760579966144109,
      "grad_norm": 2.703125,
      "learning_rate": 2.0813830021989093e-05,
      "loss": 0.8219,
      "step": 535290
    },
    {
      "epoch": 1.8760930441213064,
      "grad_norm": 2.609375,
      "learning_rate": 2.081318099332539e-05,
      "loss": 0.8257,
      "step": 535300
    },
    {
      "epoch": 1.876128091628202,
      "grad_norm": 2.984375,
      "learning_rate": 2.081253196466169e-05,
      "loss": 0.8724,
      "step": 535310
    },
    {
      "epoch": 1.8761631391350977,
      "grad_norm": 2.90625,
      "learning_rate": 2.0811882935997987e-05,
      "loss": 0.7606,
      "step": 535320
    },
    {
      "epoch": 1.8761981866419932,
      "grad_norm": 3.046875,
      "learning_rate": 2.0811233907334285e-05,
      "loss": 0.7954,
      "step": 535330
    },
    {
      "epoch": 1.8762332341488888,
      "grad_norm": 3.484375,
      "learning_rate": 2.0810584878670583e-05,
      "loss": 0.8668,
      "step": 535340
    },
    {
      "epoch": 1.8762682816557845,
      "grad_norm": 2.5,
      "learning_rate": 2.080993585000688e-05,
      "loss": 0.7915,
      "step": 535350
    },
    {
      "epoch": 1.87630332916268,
      "grad_norm": 2.84375,
      "learning_rate": 2.080928682134318e-05,
      "loss": 0.7742,
      "step": 535360
    },
    {
      "epoch": 1.8763383766695756,
      "grad_norm": 2.765625,
      "learning_rate": 2.0808637792679477e-05,
      "loss": 0.8591,
      "step": 535370
    },
    {
      "epoch": 1.8763734241764713,
      "grad_norm": 3.328125,
      "learning_rate": 2.0807988764015775e-05,
      "loss": 0.8314,
      "step": 535380
    },
    {
      "epoch": 1.8764084716833667,
      "grad_norm": 2.734375,
      "learning_rate": 2.0807339735352073e-05,
      "loss": 0.8752,
      "step": 535390
    },
    {
      "epoch": 1.8764435191902624,
      "grad_norm": 2.859375,
      "learning_rate": 2.080669070668837e-05,
      "loss": 0.8699,
      "step": 535400
    },
    {
      "epoch": 1.876478566697158,
      "grad_norm": 2.921875,
      "learning_rate": 2.080604167802467e-05,
      "loss": 0.8025,
      "step": 535410
    },
    {
      "epoch": 1.8765136142040535,
      "grad_norm": 2.53125,
      "learning_rate": 2.0805392649360967e-05,
      "loss": 0.7984,
      "step": 535420
    },
    {
      "epoch": 1.8765486617109493,
      "grad_norm": 2.984375,
      "learning_rate": 2.0804743620697265e-05,
      "loss": 0.8583,
      "step": 535430
    },
    {
      "epoch": 1.8765837092178448,
      "grad_norm": 3.078125,
      "learning_rate": 2.0804094592033563e-05,
      "loss": 0.8294,
      "step": 535440
    },
    {
      "epoch": 1.8766187567247403,
      "grad_norm": 2.59375,
      "learning_rate": 2.080344556336986e-05,
      "loss": 0.8221,
      "step": 535450
    },
    {
      "epoch": 1.876653804231636,
      "grad_norm": 2.453125,
      "learning_rate": 2.080279653470616e-05,
      "loss": 0.778,
      "step": 535460
    },
    {
      "epoch": 1.8766888517385316,
      "grad_norm": 3.03125,
      "learning_rate": 2.0802147506042457e-05,
      "loss": 0.844,
      "step": 535470
    },
    {
      "epoch": 1.8767238992454272,
      "grad_norm": 3.28125,
      "learning_rate": 2.0801498477378755e-05,
      "loss": 0.7691,
      "step": 535480
    },
    {
      "epoch": 1.876758946752323,
      "grad_norm": 3.34375,
      "learning_rate": 2.0800849448715053e-05,
      "loss": 0.87,
      "step": 535490
    },
    {
      "epoch": 1.8767939942592182,
      "grad_norm": 2.875,
      "learning_rate": 2.080020042005135e-05,
      "loss": 0.7777,
      "step": 535500
    },
    {
      "epoch": 1.876829041766114,
      "grad_norm": 3.65625,
      "learning_rate": 2.079955139138765e-05,
      "loss": 0.8486,
      "step": 535510
    },
    {
      "epoch": 1.8768640892730095,
      "grad_norm": 3.125,
      "learning_rate": 2.0798902362723947e-05,
      "loss": 0.8729,
      "step": 535520
    },
    {
      "epoch": 1.876899136779905,
      "grad_norm": 3.09375,
      "learning_rate": 2.079825333406025e-05,
      "loss": 0.8408,
      "step": 535530
    },
    {
      "epoch": 1.8769341842868008,
      "grad_norm": 2.734375,
      "learning_rate": 2.0797604305396547e-05,
      "loss": 0.8481,
      "step": 535540
    },
    {
      "epoch": 1.8769692317936963,
      "grad_norm": 3.34375,
      "learning_rate": 2.0796955276732845e-05,
      "loss": 0.9199,
      "step": 535550
    },
    {
      "epoch": 1.8770042793005919,
      "grad_norm": 3.21875,
      "learning_rate": 2.0796306248069143e-05,
      "loss": 0.7879,
      "step": 535560
    },
    {
      "epoch": 1.8770393268074876,
      "grad_norm": 2.5,
      "learning_rate": 2.079565721940544e-05,
      "loss": 0.8398,
      "step": 535570
    },
    {
      "epoch": 1.8770743743143832,
      "grad_norm": 3.46875,
      "learning_rate": 2.079500819074174e-05,
      "loss": 0.8685,
      "step": 535580
    },
    {
      "epoch": 1.8771094218212787,
      "grad_norm": 2.796875,
      "learning_rate": 2.0794359162078033e-05,
      "loss": 0.811,
      "step": 535590
    },
    {
      "epoch": 1.8771444693281745,
      "grad_norm": 2.859375,
      "learning_rate": 2.079371013341433e-05,
      "loss": 0.8552,
      "step": 535600
    },
    {
      "epoch": 1.8771795168350698,
      "grad_norm": 2.6875,
      "learning_rate": 2.079306110475063e-05,
      "loss": 0.7938,
      "step": 535610
    },
    {
      "epoch": 1.8772145643419655,
      "grad_norm": 3.046875,
      "learning_rate": 2.0792412076086927e-05,
      "loss": 0.8305,
      "step": 535620
    },
    {
      "epoch": 1.877249611848861,
      "grad_norm": 2.890625,
      "learning_rate": 2.0791763047423225e-05,
      "loss": 0.8373,
      "step": 535630
    },
    {
      "epoch": 1.8772846593557566,
      "grad_norm": 3.09375,
      "learning_rate": 2.0791114018759527e-05,
      "loss": 0.811,
      "step": 535640
    },
    {
      "epoch": 1.8773197068626524,
      "grad_norm": 2.953125,
      "learning_rate": 2.0790464990095825e-05,
      "loss": 0.8719,
      "step": 535650
    },
    {
      "epoch": 1.877354754369548,
      "grad_norm": 3.546875,
      "learning_rate": 2.0789815961432123e-05,
      "loss": 0.9298,
      "step": 535660
    },
    {
      "epoch": 1.8773898018764434,
      "grad_norm": 2.921875,
      "learning_rate": 2.078916693276842e-05,
      "loss": 0.7823,
      "step": 535670
    },
    {
      "epoch": 1.8774248493833392,
      "grad_norm": 3.234375,
      "learning_rate": 2.078851790410472e-05,
      "loss": 0.9291,
      "step": 535680
    },
    {
      "epoch": 1.8774598968902347,
      "grad_norm": 2.3125,
      "learning_rate": 2.0787868875441017e-05,
      "loss": 0.7294,
      "step": 535690
    },
    {
      "epoch": 1.8774949443971303,
      "grad_norm": 4.375,
      "learning_rate": 2.0787219846777315e-05,
      "loss": 0.8303,
      "step": 535700
    },
    {
      "epoch": 1.877529991904026,
      "grad_norm": 2.5,
      "learning_rate": 2.0786570818113613e-05,
      "loss": 0.8398,
      "step": 535710
    },
    {
      "epoch": 1.8775650394109213,
      "grad_norm": 3.015625,
      "learning_rate": 2.078592178944991e-05,
      "loss": 0.7736,
      "step": 535720
    },
    {
      "epoch": 1.877600086917817,
      "grad_norm": 2.875,
      "learning_rate": 2.078527276078621e-05,
      "loss": 0.7891,
      "step": 535730
    },
    {
      "epoch": 1.8776351344247129,
      "grad_norm": 2.984375,
      "learning_rate": 2.0784623732122507e-05,
      "loss": 0.8163,
      "step": 535740
    },
    {
      "epoch": 1.8776701819316082,
      "grad_norm": 2.875,
      "learning_rate": 2.0783974703458805e-05,
      "loss": 0.8014,
      "step": 535750
    },
    {
      "epoch": 1.877705229438504,
      "grad_norm": 2.53125,
      "learning_rate": 2.0783325674795103e-05,
      "loss": 0.7915,
      "step": 535760
    },
    {
      "epoch": 1.8777402769453995,
      "grad_norm": 3.25,
      "learning_rate": 2.07826766461314e-05,
      "loss": 0.7832,
      "step": 535770
    },
    {
      "epoch": 1.877775324452295,
      "grad_norm": 3.015625,
      "learning_rate": 2.07820276174677e-05,
      "loss": 0.7236,
      "step": 535780
    },
    {
      "epoch": 1.8778103719591908,
      "grad_norm": 2.875,
      "learning_rate": 2.0781378588803997e-05,
      "loss": 0.8965,
      "step": 535790
    },
    {
      "epoch": 1.8778454194660863,
      "grad_norm": 2.9375,
      "learning_rate": 2.0780729560140295e-05,
      "loss": 0.7949,
      "step": 535800
    },
    {
      "epoch": 1.8778804669729818,
      "grad_norm": 3.125,
      "learning_rate": 2.0780080531476593e-05,
      "loss": 0.846,
      "step": 535810
    },
    {
      "epoch": 1.8779155144798776,
      "grad_norm": 3.125,
      "learning_rate": 2.077943150281289e-05,
      "loss": 0.8853,
      "step": 535820
    },
    {
      "epoch": 1.8779505619867731,
      "grad_norm": 2.96875,
      "learning_rate": 2.077878247414919e-05,
      "loss": 0.7894,
      "step": 535830
    },
    {
      "epoch": 1.8779856094936687,
      "grad_norm": 2.796875,
      "learning_rate": 2.0778133445485487e-05,
      "loss": 0.8611,
      "step": 535840
    },
    {
      "epoch": 1.8780206570005644,
      "grad_norm": 3.0625,
      "learning_rate": 2.0777484416821785e-05,
      "loss": 0.8359,
      "step": 535850
    },
    {
      "epoch": 1.8780557045074597,
      "grad_norm": 3.1875,
      "learning_rate": 2.0776835388158083e-05,
      "loss": 0.8548,
      "step": 535860
    },
    {
      "epoch": 1.8780907520143555,
      "grad_norm": 2.984375,
      "learning_rate": 2.077618635949438e-05,
      "loss": 0.8524,
      "step": 535870
    },
    {
      "epoch": 1.878125799521251,
      "grad_norm": 3.03125,
      "learning_rate": 2.077553733083068e-05,
      "loss": 0.9141,
      "step": 535880
    },
    {
      "epoch": 1.8781608470281466,
      "grad_norm": 2.734375,
      "learning_rate": 2.077488830216698e-05,
      "loss": 0.8306,
      "step": 535890
    },
    {
      "epoch": 1.8781958945350423,
      "grad_norm": 3.21875,
      "learning_rate": 2.0774239273503278e-05,
      "loss": 0.8256,
      "step": 535900
    },
    {
      "epoch": 1.8782309420419379,
      "grad_norm": 3.03125,
      "learning_rate": 2.0773590244839576e-05,
      "loss": 0.8237,
      "step": 535910
    },
    {
      "epoch": 1.8782659895488334,
      "grad_norm": 3.4375,
      "learning_rate": 2.0772941216175874e-05,
      "loss": 0.8262,
      "step": 535920
    },
    {
      "epoch": 1.8783010370557292,
      "grad_norm": 3.078125,
      "learning_rate": 2.0772292187512172e-05,
      "loss": 0.8092,
      "step": 535930
    },
    {
      "epoch": 1.8783360845626247,
      "grad_norm": 2.875,
      "learning_rate": 2.077164315884847e-05,
      "loss": 0.8062,
      "step": 535940
    },
    {
      "epoch": 1.8783711320695202,
      "grad_norm": 2.90625,
      "learning_rate": 2.0770994130184768e-05,
      "loss": 0.8337,
      "step": 535950
    },
    {
      "epoch": 1.878406179576416,
      "grad_norm": 2.921875,
      "learning_rate": 2.0770345101521066e-05,
      "loss": 0.8414,
      "step": 535960
    },
    {
      "epoch": 1.8784412270833113,
      "grad_norm": 3.0,
      "learning_rate": 2.076969607285736e-05,
      "loss": 0.8783,
      "step": 535970
    },
    {
      "epoch": 1.878476274590207,
      "grad_norm": 2.75,
      "learning_rate": 2.076904704419366e-05,
      "loss": 0.8477,
      "step": 535980
    },
    {
      "epoch": 1.8785113220971026,
      "grad_norm": 2.59375,
      "learning_rate": 2.0768398015529957e-05,
      "loss": 0.7999,
      "step": 535990
    },
    {
      "epoch": 1.8785463696039981,
      "grad_norm": 2.640625,
      "learning_rate": 2.0767748986866255e-05,
      "loss": 0.7351,
      "step": 536000
    },
    {
      "epoch": 1.8785814171108939,
      "grad_norm": 2.828125,
      "learning_rate": 2.0767099958202556e-05,
      "loss": 0.7406,
      "step": 536010
    },
    {
      "epoch": 1.8786164646177894,
      "grad_norm": 2.4375,
      "learning_rate": 2.0766450929538854e-05,
      "loss": 0.8966,
      "step": 536020
    },
    {
      "epoch": 1.878651512124685,
      "grad_norm": 2.921875,
      "learning_rate": 2.0765801900875152e-05,
      "loss": 0.7388,
      "step": 536030
    },
    {
      "epoch": 1.8786865596315807,
      "grad_norm": 2.59375,
      "learning_rate": 2.076515287221145e-05,
      "loss": 0.8891,
      "step": 536040
    },
    {
      "epoch": 1.8787216071384762,
      "grad_norm": 2.640625,
      "learning_rate": 2.0764503843547748e-05,
      "loss": 0.8058,
      "step": 536050
    },
    {
      "epoch": 1.8787566546453718,
      "grad_norm": 3.15625,
      "learning_rate": 2.0763854814884046e-05,
      "loss": 0.8443,
      "step": 536060
    },
    {
      "epoch": 1.8787917021522675,
      "grad_norm": 3.328125,
      "learning_rate": 2.0763205786220344e-05,
      "loss": 0.8797,
      "step": 536070
    },
    {
      "epoch": 1.8788267496591629,
      "grad_norm": 2.796875,
      "learning_rate": 2.0762556757556642e-05,
      "loss": 0.8832,
      "step": 536080
    },
    {
      "epoch": 1.8788617971660586,
      "grad_norm": 2.5,
      "learning_rate": 2.076190772889294e-05,
      "loss": 0.8284,
      "step": 536090
    },
    {
      "epoch": 1.8788968446729541,
      "grad_norm": 3.4375,
      "learning_rate": 2.0761258700229238e-05,
      "loss": 0.8945,
      "step": 536100
    },
    {
      "epoch": 1.8789318921798497,
      "grad_norm": 2.96875,
      "learning_rate": 2.0760609671565536e-05,
      "loss": 0.7621,
      "step": 536110
    },
    {
      "epoch": 1.8789669396867454,
      "grad_norm": 2.671875,
      "learning_rate": 2.0759960642901834e-05,
      "loss": 0.833,
      "step": 536120
    },
    {
      "epoch": 1.879001987193641,
      "grad_norm": 2.875,
      "learning_rate": 2.0759311614238132e-05,
      "loss": 0.8422,
      "step": 536130
    },
    {
      "epoch": 1.8790370347005365,
      "grad_norm": 2.9375,
      "learning_rate": 2.075866258557443e-05,
      "loss": 0.8294,
      "step": 536140
    },
    {
      "epoch": 1.8790720822074323,
      "grad_norm": 3.3125,
      "learning_rate": 2.0758013556910728e-05,
      "loss": 0.7676,
      "step": 536150
    },
    {
      "epoch": 1.8791071297143278,
      "grad_norm": 2.734375,
      "learning_rate": 2.0757364528247026e-05,
      "loss": 0.7404,
      "step": 536160
    },
    {
      "epoch": 1.8791421772212233,
      "grad_norm": 3.28125,
      "learning_rate": 2.0756715499583324e-05,
      "loss": 0.7887,
      "step": 536170
    },
    {
      "epoch": 1.879177224728119,
      "grad_norm": 2.890625,
      "learning_rate": 2.0756066470919622e-05,
      "loss": 0.8212,
      "step": 536180
    },
    {
      "epoch": 1.8792122722350144,
      "grad_norm": 3.109375,
      "learning_rate": 2.075541744225592e-05,
      "loss": 0.8405,
      "step": 536190
    },
    {
      "epoch": 1.8792473197419102,
      "grad_norm": 2.734375,
      "learning_rate": 2.0754768413592218e-05,
      "loss": 0.8883,
      "step": 536200
    },
    {
      "epoch": 1.8792823672488057,
      "grad_norm": 2.765625,
      "learning_rate": 2.0754119384928516e-05,
      "loss": 0.8951,
      "step": 536210
    },
    {
      "epoch": 1.8793174147557012,
      "grad_norm": 2.328125,
      "learning_rate": 2.0753470356264814e-05,
      "loss": 0.7634,
      "step": 536220
    },
    {
      "epoch": 1.879352462262597,
      "grad_norm": 2.5625,
      "learning_rate": 2.0752821327601112e-05,
      "loss": 0.8771,
      "step": 536230
    },
    {
      "epoch": 1.8793875097694925,
      "grad_norm": 2.65625,
      "learning_rate": 2.075217229893741e-05,
      "loss": 0.834,
      "step": 536240
    },
    {
      "epoch": 1.879422557276388,
      "grad_norm": 2.859375,
      "learning_rate": 2.0751523270273708e-05,
      "loss": 0.8187,
      "step": 536250
    },
    {
      "epoch": 1.8794576047832838,
      "grad_norm": 3.03125,
      "learning_rate": 2.075087424161001e-05,
      "loss": 0.8341,
      "step": 536260
    },
    {
      "epoch": 1.8794926522901794,
      "grad_norm": 3.34375,
      "learning_rate": 2.0750225212946307e-05,
      "loss": 0.9019,
      "step": 536270
    },
    {
      "epoch": 1.879527699797075,
      "grad_norm": 2.875,
      "learning_rate": 2.0749576184282605e-05,
      "loss": 0.8561,
      "step": 536280
    },
    {
      "epoch": 1.8795627473039707,
      "grad_norm": 3.03125,
      "learning_rate": 2.0748927155618903e-05,
      "loss": 0.744,
      "step": 536290
    },
    {
      "epoch": 1.879597794810866,
      "grad_norm": 2.671875,
      "learning_rate": 2.07482781269552e-05,
      "loss": 0.8392,
      "step": 536300
    },
    {
      "epoch": 1.8796328423177617,
      "grad_norm": 3.875,
      "learning_rate": 2.07476290982915e-05,
      "loss": 0.8187,
      "step": 536310
    },
    {
      "epoch": 1.8796678898246573,
      "grad_norm": 2.734375,
      "learning_rate": 2.0746980069627797e-05,
      "loss": 0.835,
      "step": 536320
    },
    {
      "epoch": 1.8797029373315528,
      "grad_norm": 3.140625,
      "learning_rate": 2.0746331040964095e-05,
      "loss": 0.7084,
      "step": 536330
    },
    {
      "epoch": 1.8797379848384486,
      "grad_norm": 3.0,
      "learning_rate": 2.074568201230039e-05,
      "loss": 0.922,
      "step": 536340
    },
    {
      "epoch": 1.879773032345344,
      "grad_norm": 2.859375,
      "learning_rate": 2.0745032983636688e-05,
      "loss": 0.8316,
      "step": 536350
    },
    {
      "epoch": 1.8798080798522396,
      "grad_norm": 3.28125,
      "learning_rate": 2.0744383954972986e-05,
      "loss": 0.9033,
      "step": 536360
    },
    {
      "epoch": 1.8798431273591354,
      "grad_norm": 3.359375,
      "learning_rate": 2.0743734926309287e-05,
      "loss": 0.9074,
      "step": 536370
    },
    {
      "epoch": 1.879878174866031,
      "grad_norm": 2.96875,
      "learning_rate": 2.0743085897645585e-05,
      "loss": 0.8551,
      "step": 536380
    },
    {
      "epoch": 1.8799132223729265,
      "grad_norm": 2.765625,
      "learning_rate": 2.0742436868981883e-05,
      "loss": 0.8109,
      "step": 536390
    },
    {
      "epoch": 1.8799482698798222,
      "grad_norm": 3.0625,
      "learning_rate": 2.074178784031818e-05,
      "loss": 0.8019,
      "step": 536400
    },
    {
      "epoch": 1.8799833173867175,
      "grad_norm": 2.703125,
      "learning_rate": 2.074113881165448e-05,
      "loss": 0.8337,
      "step": 536410
    },
    {
      "epoch": 1.8800183648936133,
      "grad_norm": 2.65625,
      "learning_rate": 2.0740489782990777e-05,
      "loss": 0.8829,
      "step": 536420
    },
    {
      "epoch": 1.880053412400509,
      "grad_norm": 3.046875,
      "learning_rate": 2.0739840754327075e-05,
      "loss": 0.8296,
      "step": 536430
    },
    {
      "epoch": 1.8800884599074044,
      "grad_norm": 3.078125,
      "learning_rate": 2.0739191725663373e-05,
      "loss": 0.7325,
      "step": 536440
    },
    {
      "epoch": 1.8801235074143001,
      "grad_norm": 2.78125,
      "learning_rate": 2.073854269699967e-05,
      "loss": 0.8189,
      "step": 536450
    },
    {
      "epoch": 1.8801585549211957,
      "grad_norm": 3.171875,
      "learning_rate": 2.073789366833597e-05,
      "loss": 0.8704,
      "step": 536460
    },
    {
      "epoch": 1.8801936024280912,
      "grad_norm": 2.984375,
      "learning_rate": 2.0737244639672267e-05,
      "loss": 0.8551,
      "step": 536470
    },
    {
      "epoch": 1.880228649934987,
      "grad_norm": 3.203125,
      "learning_rate": 2.0736595611008565e-05,
      "loss": 0.9673,
      "step": 536480
    },
    {
      "epoch": 1.8802636974418825,
      "grad_norm": 2.9375,
      "learning_rate": 2.0735946582344863e-05,
      "loss": 0.8242,
      "step": 536490
    },
    {
      "epoch": 1.880298744948778,
      "grad_norm": 2.78125,
      "learning_rate": 2.073529755368116e-05,
      "loss": 0.8303,
      "step": 536500
    },
    {
      "epoch": 1.8803337924556738,
      "grad_norm": 3.0625,
      "learning_rate": 2.0734648525017463e-05,
      "loss": 0.7914,
      "step": 536510
    },
    {
      "epoch": 1.8803688399625693,
      "grad_norm": 2.59375,
      "learning_rate": 2.073399949635376e-05,
      "loss": 0.7641,
      "step": 536520
    },
    {
      "epoch": 1.8804038874694649,
      "grad_norm": 2.890625,
      "learning_rate": 2.0733350467690055e-05,
      "loss": 0.7859,
      "step": 536530
    },
    {
      "epoch": 1.8804389349763606,
      "grad_norm": 2.8125,
      "learning_rate": 2.0732701439026353e-05,
      "loss": 0.7718,
      "step": 536540
    },
    {
      "epoch": 1.880473982483256,
      "grad_norm": 2.90625,
      "learning_rate": 2.073205241036265e-05,
      "loss": 0.8527,
      "step": 536550
    },
    {
      "epoch": 1.8805090299901517,
      "grad_norm": 2.5,
      "learning_rate": 2.073140338169895e-05,
      "loss": 0.7176,
      "step": 536560
    },
    {
      "epoch": 1.8805440774970472,
      "grad_norm": 2.8125,
      "learning_rate": 2.0730754353035247e-05,
      "loss": 0.7982,
      "step": 536570
    },
    {
      "epoch": 1.8805791250039428,
      "grad_norm": 2.375,
      "learning_rate": 2.0730105324371545e-05,
      "loss": 0.7423,
      "step": 536580
    },
    {
      "epoch": 1.8806141725108385,
      "grad_norm": 2.921875,
      "learning_rate": 2.0729456295707843e-05,
      "loss": 0.7906,
      "step": 536590
    },
    {
      "epoch": 1.880649220017734,
      "grad_norm": 3.21875,
      "learning_rate": 2.072880726704414e-05,
      "loss": 0.86,
      "step": 536600
    },
    {
      "epoch": 1.8806842675246296,
      "grad_norm": 2.578125,
      "learning_rate": 2.072815823838044e-05,
      "loss": 0.8295,
      "step": 536610
    },
    {
      "epoch": 1.8807193150315253,
      "grad_norm": 2.765625,
      "learning_rate": 2.0727509209716737e-05,
      "loss": 0.8623,
      "step": 536620
    },
    {
      "epoch": 1.8807543625384209,
      "grad_norm": 2.796875,
      "learning_rate": 2.072686018105304e-05,
      "loss": 0.8116,
      "step": 536630
    },
    {
      "epoch": 1.8807894100453164,
      "grad_norm": 2.59375,
      "learning_rate": 2.0726211152389337e-05,
      "loss": 0.8159,
      "step": 536640
    },
    {
      "epoch": 1.8808244575522122,
      "grad_norm": 3.296875,
      "learning_rate": 2.0725562123725635e-05,
      "loss": 0.8264,
      "step": 536650
    },
    {
      "epoch": 1.8808595050591075,
      "grad_norm": 2.859375,
      "learning_rate": 2.0724913095061933e-05,
      "loss": 0.8723,
      "step": 536660
    },
    {
      "epoch": 1.8808945525660032,
      "grad_norm": 3.0625,
      "learning_rate": 2.072426406639823e-05,
      "loss": 0.9353,
      "step": 536670
    },
    {
      "epoch": 1.8809296000728988,
      "grad_norm": 3.09375,
      "learning_rate": 2.072361503773453e-05,
      "loss": 0.8275,
      "step": 536680
    },
    {
      "epoch": 1.8809646475797943,
      "grad_norm": 2.875,
      "learning_rate": 2.0722966009070827e-05,
      "loss": 0.7754,
      "step": 536690
    },
    {
      "epoch": 1.88099969508669,
      "grad_norm": 3.0,
      "learning_rate": 2.0722316980407125e-05,
      "loss": 0.8238,
      "step": 536700
    },
    {
      "epoch": 1.8810347425935856,
      "grad_norm": 2.515625,
      "learning_rate": 2.0721667951743423e-05,
      "loss": 0.8113,
      "step": 536710
    },
    {
      "epoch": 1.8810697901004811,
      "grad_norm": 3.078125,
      "learning_rate": 2.0721018923079717e-05,
      "loss": 0.8409,
      "step": 536720
    },
    {
      "epoch": 1.881104837607377,
      "grad_norm": 2.84375,
      "learning_rate": 2.0720369894416015e-05,
      "loss": 0.8487,
      "step": 536730
    },
    {
      "epoch": 1.8811398851142724,
      "grad_norm": 2.9375,
      "learning_rate": 2.0719720865752317e-05,
      "loss": 0.8191,
      "step": 536740
    },
    {
      "epoch": 1.881174932621168,
      "grad_norm": 3.078125,
      "learning_rate": 2.0719071837088615e-05,
      "loss": 0.8664,
      "step": 536750
    },
    {
      "epoch": 1.8812099801280637,
      "grad_norm": 2.75,
      "learning_rate": 2.0718422808424913e-05,
      "loss": 0.7965,
      "step": 536760
    },
    {
      "epoch": 1.881245027634959,
      "grad_norm": 3.109375,
      "learning_rate": 2.071777377976121e-05,
      "loss": 0.8889,
      "step": 536770
    },
    {
      "epoch": 1.8812800751418548,
      "grad_norm": 2.5625,
      "learning_rate": 2.071712475109751e-05,
      "loss": 0.8301,
      "step": 536780
    },
    {
      "epoch": 1.8813151226487503,
      "grad_norm": 2.828125,
      "learning_rate": 2.0716475722433807e-05,
      "loss": 0.8178,
      "step": 536790
    },
    {
      "epoch": 1.8813501701556459,
      "grad_norm": 2.8125,
      "learning_rate": 2.0715826693770105e-05,
      "loss": 0.8656,
      "step": 536800
    },
    {
      "epoch": 1.8813852176625416,
      "grad_norm": 2.90625,
      "learning_rate": 2.0715177665106403e-05,
      "loss": 0.8514,
      "step": 536810
    },
    {
      "epoch": 1.8814202651694372,
      "grad_norm": 3.171875,
      "learning_rate": 2.07145286364427e-05,
      "loss": 0.7775,
      "step": 536820
    },
    {
      "epoch": 1.8814553126763327,
      "grad_norm": 2.875,
      "learning_rate": 2.0713879607779e-05,
      "loss": 0.8176,
      "step": 536830
    },
    {
      "epoch": 1.8814903601832285,
      "grad_norm": 2.640625,
      "learning_rate": 2.0713230579115297e-05,
      "loss": 0.7738,
      "step": 536840
    },
    {
      "epoch": 1.881525407690124,
      "grad_norm": 3.015625,
      "learning_rate": 2.0712581550451595e-05,
      "loss": 0.8577,
      "step": 536850
    },
    {
      "epoch": 1.8815604551970195,
      "grad_norm": 3.03125,
      "learning_rate": 2.0711932521787893e-05,
      "loss": 0.8562,
      "step": 536860
    },
    {
      "epoch": 1.8815955027039153,
      "grad_norm": 2.578125,
      "learning_rate": 2.071128349312419e-05,
      "loss": 0.8048,
      "step": 536870
    },
    {
      "epoch": 1.8816305502108106,
      "grad_norm": 2.78125,
      "learning_rate": 2.0710634464460492e-05,
      "loss": 0.7856,
      "step": 536880
    },
    {
      "epoch": 1.8816655977177064,
      "grad_norm": 2.71875,
      "learning_rate": 2.070998543579679e-05,
      "loss": 0.7854,
      "step": 536890
    },
    {
      "epoch": 1.881700645224602,
      "grad_norm": 2.8125,
      "learning_rate": 2.0709336407133088e-05,
      "loss": 0.7927,
      "step": 536900
    },
    {
      "epoch": 1.8817356927314974,
      "grad_norm": 2.75,
      "learning_rate": 2.0708687378469383e-05,
      "loss": 0.8289,
      "step": 536910
    },
    {
      "epoch": 1.8817707402383932,
      "grad_norm": 3.296875,
      "learning_rate": 2.070803834980568e-05,
      "loss": 0.8312,
      "step": 536920
    },
    {
      "epoch": 1.8818057877452887,
      "grad_norm": 3.015625,
      "learning_rate": 2.070738932114198e-05,
      "loss": 0.795,
      "step": 536930
    },
    {
      "epoch": 1.8818408352521843,
      "grad_norm": 2.703125,
      "learning_rate": 2.0706740292478277e-05,
      "loss": 0.8932,
      "step": 536940
    },
    {
      "epoch": 1.88187588275908,
      "grad_norm": 2.75,
      "learning_rate": 2.0706091263814575e-05,
      "loss": 0.8897,
      "step": 536950
    },
    {
      "epoch": 1.8819109302659756,
      "grad_norm": 3.03125,
      "learning_rate": 2.0705442235150873e-05,
      "loss": 0.8069,
      "step": 536960
    },
    {
      "epoch": 1.881945977772871,
      "grad_norm": 2.96875,
      "learning_rate": 2.070479320648717e-05,
      "loss": 0.8117,
      "step": 536970
    },
    {
      "epoch": 1.8819810252797669,
      "grad_norm": 2.953125,
      "learning_rate": 2.070414417782347e-05,
      "loss": 0.7944,
      "step": 536980
    },
    {
      "epoch": 1.8820160727866622,
      "grad_norm": 3.046875,
      "learning_rate": 2.070349514915977e-05,
      "loss": 0.8254,
      "step": 536990
    },
    {
      "epoch": 1.882051120293558,
      "grad_norm": 2.921875,
      "learning_rate": 2.0702846120496068e-05,
      "loss": 0.8128,
      "step": 537000
    },
    {
      "epoch": 1.8820861678004537,
      "grad_norm": 2.671875,
      "learning_rate": 2.0702197091832366e-05,
      "loss": 0.9122,
      "step": 537010
    },
    {
      "epoch": 1.882121215307349,
      "grad_norm": 3.078125,
      "learning_rate": 2.0701548063168664e-05,
      "loss": 0.8387,
      "step": 537020
    },
    {
      "epoch": 1.8821562628142448,
      "grad_norm": 2.71875,
      "learning_rate": 2.0700899034504962e-05,
      "loss": 0.8203,
      "step": 537030
    },
    {
      "epoch": 1.8821913103211403,
      "grad_norm": 2.59375,
      "learning_rate": 2.070025000584126e-05,
      "loss": 0.7709,
      "step": 537040
    },
    {
      "epoch": 1.8822263578280358,
      "grad_norm": 2.828125,
      "learning_rate": 2.0699600977177558e-05,
      "loss": 0.8108,
      "step": 537050
    },
    {
      "epoch": 1.8822614053349316,
      "grad_norm": 2.75,
      "learning_rate": 2.0698951948513856e-05,
      "loss": 0.8041,
      "step": 537060
    },
    {
      "epoch": 1.8822964528418271,
      "grad_norm": 2.9375,
      "learning_rate": 2.0698302919850154e-05,
      "loss": 0.7913,
      "step": 537070
    },
    {
      "epoch": 1.8823315003487227,
      "grad_norm": 2.703125,
      "learning_rate": 2.0697653891186452e-05,
      "loss": 0.8823,
      "step": 537080
    },
    {
      "epoch": 1.8823665478556184,
      "grad_norm": 3.015625,
      "learning_rate": 2.0697004862522747e-05,
      "loss": 0.8863,
      "step": 537090
    },
    {
      "epoch": 1.8824015953625137,
      "grad_norm": 2.84375,
      "learning_rate": 2.0696355833859045e-05,
      "loss": 0.8094,
      "step": 537100
    },
    {
      "epoch": 1.8824366428694095,
      "grad_norm": 2.890625,
      "learning_rate": 2.0695706805195346e-05,
      "loss": 0.9077,
      "step": 537110
    },
    {
      "epoch": 1.8824716903763052,
      "grad_norm": 2.578125,
      "learning_rate": 2.0695057776531644e-05,
      "loss": 0.7679,
      "step": 537120
    },
    {
      "epoch": 1.8825067378832006,
      "grad_norm": 2.96875,
      "learning_rate": 2.0694408747867942e-05,
      "loss": 0.8781,
      "step": 537130
    },
    {
      "epoch": 1.8825417853900963,
      "grad_norm": 2.59375,
      "learning_rate": 2.069375971920424e-05,
      "loss": 0.7677,
      "step": 537140
    },
    {
      "epoch": 1.8825768328969918,
      "grad_norm": 2.71875,
      "learning_rate": 2.0693110690540538e-05,
      "loss": 0.7724,
      "step": 537150
    },
    {
      "epoch": 1.8826118804038874,
      "grad_norm": 2.984375,
      "learning_rate": 2.0692461661876836e-05,
      "loss": 0.8553,
      "step": 537160
    },
    {
      "epoch": 1.8826469279107831,
      "grad_norm": 3.03125,
      "learning_rate": 2.0691812633213134e-05,
      "loss": 0.8228,
      "step": 537170
    },
    {
      "epoch": 1.8826819754176787,
      "grad_norm": 3.15625,
      "learning_rate": 2.0691163604549432e-05,
      "loss": 0.9058,
      "step": 537180
    },
    {
      "epoch": 1.8827170229245742,
      "grad_norm": 3.1875,
      "learning_rate": 2.069051457588573e-05,
      "loss": 0.8058,
      "step": 537190
    },
    {
      "epoch": 1.88275207043147,
      "grad_norm": 2.96875,
      "learning_rate": 2.0689865547222028e-05,
      "loss": 0.8654,
      "step": 537200
    },
    {
      "epoch": 1.8827871179383655,
      "grad_norm": 2.71875,
      "learning_rate": 2.0689216518558326e-05,
      "loss": 0.8282,
      "step": 537210
    },
    {
      "epoch": 1.882822165445261,
      "grad_norm": 2.734375,
      "learning_rate": 2.0688567489894624e-05,
      "loss": 0.8323,
      "step": 537220
    },
    {
      "epoch": 1.8828572129521568,
      "grad_norm": 2.96875,
      "learning_rate": 2.0687918461230922e-05,
      "loss": 0.8454,
      "step": 537230
    },
    {
      "epoch": 1.8828922604590521,
      "grad_norm": 3.4375,
      "learning_rate": 2.068726943256722e-05,
      "loss": 0.7924,
      "step": 537240
    },
    {
      "epoch": 1.8829273079659479,
      "grad_norm": 3.125,
      "learning_rate": 2.068662040390352e-05,
      "loss": 0.8214,
      "step": 537250
    },
    {
      "epoch": 1.8829623554728434,
      "grad_norm": 2.578125,
      "learning_rate": 2.068597137523982e-05,
      "loss": 0.7987,
      "step": 537260
    },
    {
      "epoch": 1.882997402979739,
      "grad_norm": 3.125,
      "learning_rate": 2.0685322346576117e-05,
      "loss": 0.7494,
      "step": 537270
    },
    {
      "epoch": 1.8830324504866347,
      "grad_norm": 2.765625,
      "learning_rate": 2.0684673317912412e-05,
      "loss": 0.7921,
      "step": 537280
    },
    {
      "epoch": 1.8830674979935302,
      "grad_norm": 2.8125,
      "learning_rate": 2.068402428924871e-05,
      "loss": 0.8439,
      "step": 537290
    },
    {
      "epoch": 1.8831025455004258,
      "grad_norm": 2.8125,
      "learning_rate": 2.0683375260585008e-05,
      "loss": 0.7805,
      "step": 537300
    },
    {
      "epoch": 1.8831375930073215,
      "grad_norm": 2.875,
      "learning_rate": 2.0682726231921306e-05,
      "loss": 0.8598,
      "step": 537310
    },
    {
      "epoch": 1.883172640514217,
      "grad_norm": 2.765625,
      "learning_rate": 2.0682077203257604e-05,
      "loss": 0.7954,
      "step": 537320
    },
    {
      "epoch": 1.8832076880211126,
      "grad_norm": 3.078125,
      "learning_rate": 2.0681428174593902e-05,
      "loss": 0.8683,
      "step": 537330
    },
    {
      "epoch": 1.8832427355280084,
      "grad_norm": 2.796875,
      "learning_rate": 2.06807791459302e-05,
      "loss": 0.8503,
      "step": 537340
    },
    {
      "epoch": 1.8832777830349037,
      "grad_norm": 2.78125,
      "learning_rate": 2.0680130117266498e-05,
      "loss": 0.837,
      "step": 537350
    },
    {
      "epoch": 1.8833128305417994,
      "grad_norm": 3.1875,
      "learning_rate": 2.06794810886028e-05,
      "loss": 0.8417,
      "step": 537360
    },
    {
      "epoch": 1.883347878048695,
      "grad_norm": 3.015625,
      "learning_rate": 2.0678832059939097e-05,
      "loss": 0.7981,
      "step": 537370
    },
    {
      "epoch": 1.8833829255555905,
      "grad_norm": 2.734375,
      "learning_rate": 2.0678183031275395e-05,
      "loss": 0.7809,
      "step": 537380
    },
    {
      "epoch": 1.8834179730624863,
      "grad_norm": 3.109375,
      "learning_rate": 2.0677534002611693e-05,
      "loss": 0.7737,
      "step": 537390
    },
    {
      "epoch": 1.8834530205693818,
      "grad_norm": 2.953125,
      "learning_rate": 2.067688497394799e-05,
      "loss": 0.7594,
      "step": 537400
    },
    {
      "epoch": 1.8834880680762773,
      "grad_norm": 2.890625,
      "learning_rate": 2.067623594528429e-05,
      "loss": 0.7871,
      "step": 537410
    },
    {
      "epoch": 1.883523115583173,
      "grad_norm": 3.171875,
      "learning_rate": 2.0675586916620587e-05,
      "loss": 0.7722,
      "step": 537420
    },
    {
      "epoch": 1.8835581630900686,
      "grad_norm": 2.734375,
      "learning_rate": 2.0674937887956885e-05,
      "loss": 0.7235,
      "step": 537430
    },
    {
      "epoch": 1.8835932105969642,
      "grad_norm": 2.71875,
      "learning_rate": 2.0674288859293183e-05,
      "loss": 0.8368,
      "step": 537440
    },
    {
      "epoch": 1.88362825810386,
      "grad_norm": 2.75,
      "learning_rate": 2.067363983062948e-05,
      "loss": 0.8666,
      "step": 537450
    },
    {
      "epoch": 1.8836633056107552,
      "grad_norm": 3.25,
      "learning_rate": 2.067299080196578e-05,
      "loss": 0.8323,
      "step": 537460
    },
    {
      "epoch": 1.883698353117651,
      "grad_norm": 2.921875,
      "learning_rate": 2.0672341773302077e-05,
      "loss": 0.9143,
      "step": 537470
    },
    {
      "epoch": 1.8837334006245465,
      "grad_norm": 2.703125,
      "learning_rate": 2.0671692744638375e-05,
      "loss": 0.7969,
      "step": 537480
    },
    {
      "epoch": 1.883768448131442,
      "grad_norm": 2.65625,
      "learning_rate": 2.0671043715974673e-05,
      "loss": 0.8185,
      "step": 537490
    },
    {
      "epoch": 1.8838034956383378,
      "grad_norm": 2.96875,
      "learning_rate": 2.067039468731097e-05,
      "loss": 0.9201,
      "step": 537500
    },
    {
      "epoch": 1.8838385431452334,
      "grad_norm": 2.6875,
      "learning_rate": 2.066974565864727e-05,
      "loss": 0.8537,
      "step": 537510
    },
    {
      "epoch": 1.883873590652129,
      "grad_norm": 3.0,
      "learning_rate": 2.0669096629983567e-05,
      "loss": 0.8239,
      "step": 537520
    },
    {
      "epoch": 1.8839086381590247,
      "grad_norm": 3.125,
      "learning_rate": 2.0668447601319865e-05,
      "loss": 0.9212,
      "step": 537530
    },
    {
      "epoch": 1.8839436856659202,
      "grad_norm": 2.75,
      "learning_rate": 2.0667798572656163e-05,
      "loss": 0.9268,
      "step": 537540
    },
    {
      "epoch": 1.8839787331728157,
      "grad_norm": 3.203125,
      "learning_rate": 2.066714954399246e-05,
      "loss": 0.8394,
      "step": 537550
    },
    {
      "epoch": 1.8840137806797115,
      "grad_norm": 2.9375,
      "learning_rate": 2.066650051532876e-05,
      "loss": 0.8508,
      "step": 537560
    },
    {
      "epoch": 1.8840488281866068,
      "grad_norm": 2.953125,
      "learning_rate": 2.0665851486665057e-05,
      "loss": 0.8326,
      "step": 537570
    },
    {
      "epoch": 1.8840838756935026,
      "grad_norm": 2.84375,
      "learning_rate": 2.0665202458001355e-05,
      "loss": 0.8338,
      "step": 537580
    },
    {
      "epoch": 1.884118923200398,
      "grad_norm": 3.09375,
      "learning_rate": 2.0664553429337653e-05,
      "loss": 0.8909,
      "step": 537590
    },
    {
      "epoch": 1.8841539707072936,
      "grad_norm": 2.78125,
      "learning_rate": 2.066390440067395e-05,
      "loss": 0.8647,
      "step": 537600
    },
    {
      "epoch": 1.8841890182141894,
      "grad_norm": 2.546875,
      "learning_rate": 2.0663255372010253e-05,
      "loss": 0.817,
      "step": 537610
    },
    {
      "epoch": 1.884224065721085,
      "grad_norm": 3.0625,
      "learning_rate": 2.066260634334655e-05,
      "loss": 0.9343,
      "step": 537620
    },
    {
      "epoch": 1.8842591132279805,
      "grad_norm": 3.390625,
      "learning_rate": 2.066195731468285e-05,
      "loss": 0.9174,
      "step": 537630
    },
    {
      "epoch": 1.8842941607348762,
      "grad_norm": 2.8125,
      "learning_rate": 2.0661308286019147e-05,
      "loss": 0.8173,
      "step": 537640
    },
    {
      "epoch": 1.8843292082417717,
      "grad_norm": 2.765625,
      "learning_rate": 2.0660659257355445e-05,
      "loss": 0.8255,
      "step": 537650
    },
    {
      "epoch": 1.8843642557486673,
      "grad_norm": 3.09375,
      "learning_rate": 2.066001022869174e-05,
      "loss": 0.8386,
      "step": 537660
    },
    {
      "epoch": 1.884399303255563,
      "grad_norm": 2.875,
      "learning_rate": 2.0659361200028037e-05,
      "loss": 0.8488,
      "step": 537670
    },
    {
      "epoch": 1.8844343507624584,
      "grad_norm": 2.75,
      "learning_rate": 2.0658712171364335e-05,
      "loss": 0.7924,
      "step": 537680
    },
    {
      "epoch": 1.8844693982693541,
      "grad_norm": 2.71875,
      "learning_rate": 2.0658063142700633e-05,
      "loss": 0.8584,
      "step": 537690
    },
    {
      "epoch": 1.8845044457762499,
      "grad_norm": 2.875,
      "learning_rate": 2.065741411403693e-05,
      "loss": 0.869,
      "step": 537700
    },
    {
      "epoch": 1.8845394932831452,
      "grad_norm": 2.828125,
      "learning_rate": 2.065676508537323e-05,
      "loss": 0.8308,
      "step": 537710
    },
    {
      "epoch": 1.884574540790041,
      "grad_norm": 2.984375,
      "learning_rate": 2.0656116056709527e-05,
      "loss": 0.8199,
      "step": 537720
    },
    {
      "epoch": 1.8846095882969365,
      "grad_norm": 3.015625,
      "learning_rate": 2.065546702804583e-05,
      "loss": 0.842,
      "step": 537730
    },
    {
      "epoch": 1.884644635803832,
      "grad_norm": 3.1875,
      "learning_rate": 2.0654817999382127e-05,
      "loss": 0.7949,
      "step": 537740
    },
    {
      "epoch": 1.8846796833107278,
      "grad_norm": 2.90625,
      "learning_rate": 2.0654168970718425e-05,
      "loss": 0.8488,
      "step": 537750
    },
    {
      "epoch": 1.8847147308176233,
      "grad_norm": 3.09375,
      "learning_rate": 2.0653519942054723e-05,
      "loss": 0.8289,
      "step": 537760
    },
    {
      "epoch": 1.8847497783245188,
      "grad_norm": 3.0,
      "learning_rate": 2.065287091339102e-05,
      "loss": 0.8356,
      "step": 537770
    },
    {
      "epoch": 1.8847848258314146,
      "grad_norm": 2.96875,
      "learning_rate": 2.065222188472732e-05,
      "loss": 0.8814,
      "step": 537780
    },
    {
      "epoch": 1.88481987333831,
      "grad_norm": 2.78125,
      "learning_rate": 2.0651572856063617e-05,
      "loss": 0.8052,
      "step": 537790
    },
    {
      "epoch": 1.8848549208452057,
      "grad_norm": 2.640625,
      "learning_rate": 2.0650923827399915e-05,
      "loss": 0.7539,
      "step": 537800
    },
    {
      "epoch": 1.8848899683521014,
      "grad_norm": 2.890625,
      "learning_rate": 2.0650274798736213e-05,
      "loss": 0.795,
      "step": 537810
    },
    {
      "epoch": 1.8849250158589967,
      "grad_norm": 3.03125,
      "learning_rate": 2.064962577007251e-05,
      "loss": 0.8431,
      "step": 537820
    },
    {
      "epoch": 1.8849600633658925,
      "grad_norm": 3.171875,
      "learning_rate": 2.064897674140881e-05,
      "loss": 0.8441,
      "step": 537830
    },
    {
      "epoch": 1.884995110872788,
      "grad_norm": 3.65625,
      "learning_rate": 2.0648327712745107e-05,
      "loss": 0.8443,
      "step": 537840
    },
    {
      "epoch": 1.8850301583796836,
      "grad_norm": 2.90625,
      "learning_rate": 2.0647678684081405e-05,
      "loss": 0.8602,
      "step": 537850
    },
    {
      "epoch": 1.8850652058865793,
      "grad_norm": 3.125,
      "learning_rate": 2.0647029655417703e-05,
      "loss": 0.9381,
      "step": 537860
    },
    {
      "epoch": 1.8851002533934749,
      "grad_norm": 2.71875,
      "learning_rate": 2.0646380626754e-05,
      "loss": 0.8021,
      "step": 537870
    },
    {
      "epoch": 1.8851353009003704,
      "grad_norm": 3.25,
      "learning_rate": 2.06457315980903e-05,
      "loss": 0.8395,
      "step": 537880
    },
    {
      "epoch": 1.8851703484072662,
      "grad_norm": 2.828125,
      "learning_rate": 2.0645082569426597e-05,
      "loss": 0.9093,
      "step": 537890
    },
    {
      "epoch": 1.8852053959141617,
      "grad_norm": 2.609375,
      "learning_rate": 2.0644433540762895e-05,
      "loss": 0.845,
      "step": 537900
    },
    {
      "epoch": 1.8852404434210572,
      "grad_norm": 2.890625,
      "learning_rate": 2.0643784512099193e-05,
      "loss": 0.8718,
      "step": 537910
    },
    {
      "epoch": 1.885275490927953,
      "grad_norm": 2.859375,
      "learning_rate": 2.064313548343549e-05,
      "loss": 0.7701,
      "step": 537920
    },
    {
      "epoch": 1.8853105384348483,
      "grad_norm": 3.265625,
      "learning_rate": 2.064248645477179e-05,
      "loss": 0.9292,
      "step": 537930
    },
    {
      "epoch": 1.885345585941744,
      "grad_norm": 3.359375,
      "learning_rate": 2.0641837426108087e-05,
      "loss": 0.9026,
      "step": 537940
    },
    {
      "epoch": 1.8853806334486396,
      "grad_norm": 2.765625,
      "learning_rate": 2.0641188397444385e-05,
      "loss": 0.7251,
      "step": 537950
    },
    {
      "epoch": 1.8854156809555351,
      "grad_norm": 2.40625,
      "learning_rate": 2.0640539368780683e-05,
      "loss": 0.93,
      "step": 537960
    },
    {
      "epoch": 1.885450728462431,
      "grad_norm": 2.875,
      "learning_rate": 2.063989034011698e-05,
      "loss": 0.8118,
      "step": 537970
    },
    {
      "epoch": 1.8854857759693264,
      "grad_norm": 3.28125,
      "learning_rate": 2.0639241311453282e-05,
      "loss": 0.7787,
      "step": 537980
    },
    {
      "epoch": 1.885520823476222,
      "grad_norm": 2.9375,
      "learning_rate": 2.063859228278958e-05,
      "loss": 0.8252,
      "step": 537990
    },
    {
      "epoch": 1.8855558709831177,
      "grad_norm": 2.765625,
      "learning_rate": 2.0637943254125878e-05,
      "loss": 0.9132,
      "step": 538000
    },
    {
      "epoch": 1.8855909184900133,
      "grad_norm": 2.796875,
      "learning_rate": 2.0637294225462176e-05,
      "loss": 0.8229,
      "step": 538010
    },
    {
      "epoch": 1.8856259659969088,
      "grad_norm": 2.671875,
      "learning_rate": 2.0636645196798474e-05,
      "loss": 0.7327,
      "step": 538020
    },
    {
      "epoch": 1.8856610135038046,
      "grad_norm": 2.96875,
      "learning_rate": 2.0635996168134772e-05,
      "loss": 0.8349,
      "step": 538030
    },
    {
      "epoch": 1.8856960610106999,
      "grad_norm": 2.921875,
      "learning_rate": 2.0635347139471067e-05,
      "loss": 0.8486,
      "step": 538040
    },
    {
      "epoch": 1.8857311085175956,
      "grad_norm": 3.328125,
      "learning_rate": 2.0634698110807365e-05,
      "loss": 0.8057,
      "step": 538050
    },
    {
      "epoch": 1.8857661560244912,
      "grad_norm": 3.265625,
      "learning_rate": 2.0634049082143663e-05,
      "loss": 0.7899,
      "step": 538060
    },
    {
      "epoch": 1.8858012035313867,
      "grad_norm": 2.671875,
      "learning_rate": 2.063340005347996e-05,
      "loss": 0.8871,
      "step": 538070
    },
    {
      "epoch": 1.8858362510382825,
      "grad_norm": 3.125,
      "learning_rate": 2.063275102481626e-05,
      "loss": 0.8246,
      "step": 538080
    },
    {
      "epoch": 1.885871298545178,
      "grad_norm": 3.03125,
      "learning_rate": 2.063210199615256e-05,
      "loss": 0.9231,
      "step": 538090
    },
    {
      "epoch": 1.8859063460520735,
      "grad_norm": 2.65625,
      "learning_rate": 2.0631452967488858e-05,
      "loss": 0.8002,
      "step": 538100
    },
    {
      "epoch": 1.8859413935589693,
      "grad_norm": 2.953125,
      "learning_rate": 2.0630803938825156e-05,
      "loss": 0.7042,
      "step": 538110
    },
    {
      "epoch": 1.8859764410658648,
      "grad_norm": 2.875,
      "learning_rate": 2.0630154910161454e-05,
      "loss": 0.8532,
      "step": 538120
    },
    {
      "epoch": 1.8860114885727604,
      "grad_norm": 2.921875,
      "learning_rate": 2.0629505881497752e-05,
      "loss": 0.7747,
      "step": 538130
    },
    {
      "epoch": 1.886046536079656,
      "grad_norm": 3.453125,
      "learning_rate": 2.062885685283405e-05,
      "loss": 0.7731,
      "step": 538140
    },
    {
      "epoch": 1.8860815835865514,
      "grad_norm": 2.640625,
      "learning_rate": 2.0628207824170348e-05,
      "loss": 0.833,
      "step": 538150
    },
    {
      "epoch": 1.8861166310934472,
      "grad_norm": 2.859375,
      "learning_rate": 2.0627558795506646e-05,
      "loss": 0.8178,
      "step": 538160
    },
    {
      "epoch": 1.8861516786003427,
      "grad_norm": 2.59375,
      "learning_rate": 2.0626909766842944e-05,
      "loss": 0.8082,
      "step": 538170
    },
    {
      "epoch": 1.8861867261072383,
      "grad_norm": 2.65625,
      "learning_rate": 2.0626260738179242e-05,
      "loss": 0.885,
      "step": 538180
    },
    {
      "epoch": 1.886221773614134,
      "grad_norm": 2.921875,
      "learning_rate": 2.062561170951554e-05,
      "loss": 0.8365,
      "step": 538190
    },
    {
      "epoch": 1.8862568211210295,
      "grad_norm": 3.0,
      "learning_rate": 2.0624962680851838e-05,
      "loss": 0.9253,
      "step": 538200
    },
    {
      "epoch": 1.886291868627925,
      "grad_norm": 3.265625,
      "learning_rate": 2.0624313652188136e-05,
      "loss": 0.7541,
      "step": 538210
    },
    {
      "epoch": 1.8863269161348208,
      "grad_norm": 2.71875,
      "learning_rate": 2.0623664623524434e-05,
      "loss": 0.832,
      "step": 538220
    },
    {
      "epoch": 1.8863619636417164,
      "grad_norm": 2.515625,
      "learning_rate": 2.0623015594860732e-05,
      "loss": 0.8212,
      "step": 538230
    },
    {
      "epoch": 1.886397011148612,
      "grad_norm": 2.890625,
      "learning_rate": 2.062236656619703e-05,
      "loss": 0.9693,
      "step": 538240
    },
    {
      "epoch": 1.8864320586555077,
      "grad_norm": 2.71875,
      "learning_rate": 2.0621717537533328e-05,
      "loss": 0.8362,
      "step": 538250
    },
    {
      "epoch": 1.886467106162403,
      "grad_norm": 3.6875,
      "learning_rate": 2.0621068508869626e-05,
      "loss": 0.8336,
      "step": 538260
    },
    {
      "epoch": 1.8865021536692987,
      "grad_norm": 3.03125,
      "learning_rate": 2.0620419480205924e-05,
      "loss": 0.8532,
      "step": 538270
    },
    {
      "epoch": 1.8865372011761943,
      "grad_norm": 2.9375,
      "learning_rate": 2.0619770451542222e-05,
      "loss": 0.862,
      "step": 538280
    },
    {
      "epoch": 1.8865722486830898,
      "grad_norm": 2.75,
      "learning_rate": 2.061912142287852e-05,
      "loss": 0.7609,
      "step": 538290
    },
    {
      "epoch": 1.8866072961899856,
      "grad_norm": 2.78125,
      "learning_rate": 2.0618472394214818e-05,
      "loss": 0.8714,
      "step": 538300
    },
    {
      "epoch": 1.886642343696881,
      "grad_norm": 2.515625,
      "learning_rate": 2.0617823365551116e-05,
      "loss": 0.7445,
      "step": 538310
    },
    {
      "epoch": 1.8866773912037766,
      "grad_norm": 2.9375,
      "learning_rate": 2.0617174336887414e-05,
      "loss": 0.7915,
      "step": 538320
    },
    {
      "epoch": 1.8867124387106724,
      "grad_norm": 3.171875,
      "learning_rate": 2.0616525308223712e-05,
      "loss": 0.905,
      "step": 538330
    },
    {
      "epoch": 1.886747486217568,
      "grad_norm": 2.609375,
      "learning_rate": 2.061587627956001e-05,
      "loss": 0.8132,
      "step": 538340
    },
    {
      "epoch": 1.8867825337244635,
      "grad_norm": 3.3125,
      "learning_rate": 2.061522725089631e-05,
      "loss": 0.877,
      "step": 538350
    },
    {
      "epoch": 1.8868175812313592,
      "grad_norm": 2.859375,
      "learning_rate": 2.061457822223261e-05,
      "loss": 0.798,
      "step": 538360
    },
    {
      "epoch": 1.8868526287382545,
      "grad_norm": 2.609375,
      "learning_rate": 2.0613929193568908e-05,
      "loss": 0.7122,
      "step": 538370
    },
    {
      "epoch": 1.8868876762451503,
      "grad_norm": 2.90625,
      "learning_rate": 2.0613280164905206e-05,
      "loss": 0.8602,
      "step": 538380
    },
    {
      "epoch": 1.886922723752046,
      "grad_norm": 3.375,
      "learning_rate": 2.0612631136241504e-05,
      "loss": 0.8206,
      "step": 538390
    },
    {
      "epoch": 1.8869577712589414,
      "grad_norm": 3.46875,
      "learning_rate": 2.06119821075778e-05,
      "loss": 0.76,
      "step": 538400
    },
    {
      "epoch": 1.8869928187658371,
      "grad_norm": 2.78125,
      "learning_rate": 2.0611333078914096e-05,
      "loss": 0.7548,
      "step": 538410
    },
    {
      "epoch": 1.8870278662727327,
      "grad_norm": 2.859375,
      "learning_rate": 2.0610684050250394e-05,
      "loss": 0.8433,
      "step": 538420
    },
    {
      "epoch": 1.8870629137796282,
      "grad_norm": 2.96875,
      "learning_rate": 2.0610035021586692e-05,
      "loss": 0.8905,
      "step": 538430
    },
    {
      "epoch": 1.887097961286524,
      "grad_norm": 2.5625,
      "learning_rate": 2.060938599292299e-05,
      "loss": 0.8614,
      "step": 538440
    },
    {
      "epoch": 1.8871330087934195,
      "grad_norm": 2.828125,
      "learning_rate": 2.0608736964259288e-05,
      "loss": 0.9141,
      "step": 538450
    },
    {
      "epoch": 1.887168056300315,
      "grad_norm": 2.8125,
      "learning_rate": 2.060808793559559e-05,
      "loss": 0.8369,
      "step": 538460
    },
    {
      "epoch": 1.8872031038072108,
      "grad_norm": 3.0625,
      "learning_rate": 2.0607438906931888e-05,
      "loss": 0.7377,
      "step": 538470
    },
    {
      "epoch": 1.887238151314106,
      "grad_norm": 2.703125,
      "learning_rate": 2.0606789878268186e-05,
      "loss": 0.804,
      "step": 538480
    },
    {
      "epoch": 1.8872731988210019,
      "grad_norm": 3.0625,
      "learning_rate": 2.0606140849604484e-05,
      "loss": 0.8328,
      "step": 538490
    },
    {
      "epoch": 1.8873082463278976,
      "grad_norm": 2.359375,
      "learning_rate": 2.060549182094078e-05,
      "loss": 0.8028,
      "step": 538500
    },
    {
      "epoch": 1.887343293834793,
      "grad_norm": 2.828125,
      "learning_rate": 2.060484279227708e-05,
      "loss": 0.8259,
      "step": 538510
    },
    {
      "epoch": 1.8873783413416887,
      "grad_norm": 2.890625,
      "learning_rate": 2.0604193763613378e-05,
      "loss": 0.826,
      "step": 538520
    },
    {
      "epoch": 1.8874133888485842,
      "grad_norm": 3.34375,
      "learning_rate": 2.0603544734949676e-05,
      "loss": 0.865,
      "step": 538530
    },
    {
      "epoch": 1.8874484363554798,
      "grad_norm": 2.875,
      "learning_rate": 2.0602895706285974e-05,
      "loss": 0.723,
      "step": 538540
    },
    {
      "epoch": 1.8874834838623755,
      "grad_norm": 2.796875,
      "learning_rate": 2.060224667762227e-05,
      "loss": 0.8564,
      "step": 538550
    },
    {
      "epoch": 1.887518531369271,
      "grad_norm": 2.65625,
      "learning_rate": 2.060159764895857e-05,
      "loss": 0.8548,
      "step": 538560
    },
    {
      "epoch": 1.8875535788761666,
      "grad_norm": 3.171875,
      "learning_rate": 2.0600948620294868e-05,
      "loss": 0.852,
      "step": 538570
    },
    {
      "epoch": 1.8875886263830624,
      "grad_norm": 3.671875,
      "learning_rate": 2.0600299591631166e-05,
      "loss": 0.8356,
      "step": 538580
    },
    {
      "epoch": 1.8876236738899579,
      "grad_norm": 3.15625,
      "learning_rate": 2.0599650562967464e-05,
      "loss": 0.7483,
      "step": 538590
    },
    {
      "epoch": 1.8876587213968534,
      "grad_norm": 2.75,
      "learning_rate": 2.059900153430376e-05,
      "loss": 0.7625,
      "step": 538600
    },
    {
      "epoch": 1.8876937689037492,
      "grad_norm": 2.6875,
      "learning_rate": 2.059835250564006e-05,
      "loss": 0.8559,
      "step": 538610
    },
    {
      "epoch": 1.8877288164106445,
      "grad_norm": 2.828125,
      "learning_rate": 2.0597703476976358e-05,
      "loss": 0.7493,
      "step": 538620
    },
    {
      "epoch": 1.8877638639175403,
      "grad_norm": 2.671875,
      "learning_rate": 2.0597054448312656e-05,
      "loss": 0.8655,
      "step": 538630
    },
    {
      "epoch": 1.8877989114244358,
      "grad_norm": 2.734375,
      "learning_rate": 2.0596405419648954e-05,
      "loss": 0.8115,
      "step": 538640
    },
    {
      "epoch": 1.8878339589313313,
      "grad_norm": 2.53125,
      "learning_rate": 2.059575639098525e-05,
      "loss": 0.7608,
      "step": 538650
    },
    {
      "epoch": 1.887869006438227,
      "grad_norm": 3.296875,
      "learning_rate": 2.059510736232155e-05,
      "loss": 0.8057,
      "step": 538660
    },
    {
      "epoch": 1.8879040539451226,
      "grad_norm": 3.453125,
      "learning_rate": 2.0594458333657848e-05,
      "loss": 0.8503,
      "step": 538670
    },
    {
      "epoch": 1.8879391014520182,
      "grad_norm": 2.75,
      "learning_rate": 2.0593809304994146e-05,
      "loss": 0.8716,
      "step": 538680
    },
    {
      "epoch": 1.887974148958914,
      "grad_norm": 3.09375,
      "learning_rate": 2.0593160276330444e-05,
      "loss": 0.8464,
      "step": 538690
    },
    {
      "epoch": 1.8880091964658094,
      "grad_norm": 2.890625,
      "learning_rate": 2.059251124766674e-05,
      "loss": 0.8169,
      "step": 538700
    },
    {
      "epoch": 1.888044243972705,
      "grad_norm": 2.890625,
      "learning_rate": 2.0591862219003043e-05,
      "loss": 0.8421,
      "step": 538710
    },
    {
      "epoch": 1.8880792914796007,
      "grad_norm": 2.9375,
      "learning_rate": 2.059121319033934e-05,
      "loss": 0.8541,
      "step": 538720
    },
    {
      "epoch": 1.888114338986496,
      "grad_norm": 3.203125,
      "learning_rate": 2.059056416167564e-05,
      "loss": 0.7934,
      "step": 538730
    },
    {
      "epoch": 1.8881493864933918,
      "grad_norm": 2.859375,
      "learning_rate": 2.0589915133011937e-05,
      "loss": 0.7932,
      "step": 538740
    },
    {
      "epoch": 1.8881844340002873,
      "grad_norm": 2.84375,
      "learning_rate": 2.0589266104348235e-05,
      "loss": 0.7927,
      "step": 538750
    },
    {
      "epoch": 1.8882194815071829,
      "grad_norm": 2.515625,
      "learning_rate": 2.0588617075684533e-05,
      "loss": 0.7924,
      "step": 538760
    },
    {
      "epoch": 1.8882545290140786,
      "grad_norm": 2.984375,
      "learning_rate": 2.058796804702083e-05,
      "loss": 0.8668,
      "step": 538770
    },
    {
      "epoch": 1.8882895765209742,
      "grad_norm": 3.0,
      "learning_rate": 2.058731901835713e-05,
      "loss": 0.7812,
      "step": 538780
    },
    {
      "epoch": 1.8883246240278697,
      "grad_norm": 2.9375,
      "learning_rate": 2.0586669989693424e-05,
      "loss": 0.7438,
      "step": 538790
    },
    {
      "epoch": 1.8883596715347655,
      "grad_norm": 3.0,
      "learning_rate": 2.058602096102972e-05,
      "loss": 0.8043,
      "step": 538800
    },
    {
      "epoch": 1.888394719041661,
      "grad_norm": 2.609375,
      "learning_rate": 2.058537193236602e-05,
      "loss": 0.7147,
      "step": 538810
    },
    {
      "epoch": 1.8884297665485565,
      "grad_norm": 2.65625,
      "learning_rate": 2.0584722903702318e-05,
      "loss": 0.7986,
      "step": 538820
    },
    {
      "epoch": 1.8884648140554523,
      "grad_norm": 3.21875,
      "learning_rate": 2.058407387503862e-05,
      "loss": 0.8399,
      "step": 538830
    },
    {
      "epoch": 1.8884998615623476,
      "grad_norm": 3.453125,
      "learning_rate": 2.0583424846374917e-05,
      "loss": 0.8879,
      "step": 538840
    },
    {
      "epoch": 1.8885349090692434,
      "grad_norm": 2.609375,
      "learning_rate": 2.0582775817711215e-05,
      "loss": 0.7269,
      "step": 538850
    },
    {
      "epoch": 1.888569956576139,
      "grad_norm": 2.984375,
      "learning_rate": 2.0582126789047513e-05,
      "loss": 0.8479,
      "step": 538860
    },
    {
      "epoch": 1.8886050040830344,
      "grad_norm": 3.296875,
      "learning_rate": 2.058147776038381e-05,
      "loss": 0.9247,
      "step": 538870
    },
    {
      "epoch": 1.8886400515899302,
      "grad_norm": 2.8125,
      "learning_rate": 2.058082873172011e-05,
      "loss": 0.8962,
      "step": 538880
    },
    {
      "epoch": 1.8886750990968257,
      "grad_norm": 3.125,
      "learning_rate": 2.0580179703056407e-05,
      "loss": 0.8901,
      "step": 538890
    },
    {
      "epoch": 1.8887101466037213,
      "grad_norm": 3.375,
      "learning_rate": 2.0579530674392705e-05,
      "loss": 0.8953,
      "step": 538900
    },
    {
      "epoch": 1.888745194110617,
      "grad_norm": 2.8125,
      "learning_rate": 2.0578881645729003e-05,
      "loss": 0.7808,
      "step": 538910
    },
    {
      "epoch": 1.8887802416175126,
      "grad_norm": 2.46875,
      "learning_rate": 2.05782326170653e-05,
      "loss": 0.8098,
      "step": 538920
    },
    {
      "epoch": 1.888815289124408,
      "grad_norm": 2.796875,
      "learning_rate": 2.05775835884016e-05,
      "loss": 0.8312,
      "step": 538930
    },
    {
      "epoch": 1.8888503366313039,
      "grad_norm": 2.96875,
      "learning_rate": 2.0576934559737897e-05,
      "loss": 0.7753,
      "step": 538940
    },
    {
      "epoch": 1.8888853841381992,
      "grad_norm": 2.96875,
      "learning_rate": 2.0576285531074195e-05,
      "loss": 0.8437,
      "step": 538950
    },
    {
      "epoch": 1.888920431645095,
      "grad_norm": 3.09375,
      "learning_rate": 2.0575636502410496e-05,
      "loss": 0.8335,
      "step": 538960
    },
    {
      "epoch": 1.8889554791519905,
      "grad_norm": 3.1875,
      "learning_rate": 2.0574987473746794e-05,
      "loss": 0.8916,
      "step": 538970
    },
    {
      "epoch": 1.888990526658886,
      "grad_norm": 2.6875,
      "learning_rate": 2.057433844508309e-05,
      "loss": 0.861,
      "step": 538980
    },
    {
      "epoch": 1.8890255741657818,
      "grad_norm": 3.125,
      "learning_rate": 2.0573689416419387e-05,
      "loss": 0.8633,
      "step": 538990
    },
    {
      "epoch": 1.8890606216726773,
      "grad_norm": 3.234375,
      "learning_rate": 2.0573040387755685e-05,
      "loss": 0.8545,
      "step": 539000
    },
    {
      "epoch": 1.8890956691795728,
      "grad_norm": 2.46875,
      "learning_rate": 2.0572391359091983e-05,
      "loss": 0.846,
      "step": 539010
    },
    {
      "epoch": 1.8891307166864686,
      "grad_norm": 2.703125,
      "learning_rate": 2.057174233042828e-05,
      "loss": 0.759,
      "step": 539020
    },
    {
      "epoch": 1.8891657641933641,
      "grad_norm": 3.21875,
      "learning_rate": 2.057109330176458e-05,
      "loss": 0.7981,
      "step": 539030
    },
    {
      "epoch": 1.8892008117002597,
      "grad_norm": 3.21875,
      "learning_rate": 2.0570444273100877e-05,
      "loss": 0.9443,
      "step": 539040
    },
    {
      "epoch": 1.8892358592071554,
      "grad_norm": 3.078125,
      "learning_rate": 2.0569795244437175e-05,
      "loss": 0.7795,
      "step": 539050
    },
    {
      "epoch": 1.8892709067140507,
      "grad_norm": 2.859375,
      "learning_rate": 2.0569146215773473e-05,
      "loss": 0.8442,
      "step": 539060
    },
    {
      "epoch": 1.8893059542209465,
      "grad_norm": 2.71875,
      "learning_rate": 2.056849718710977e-05,
      "loss": 0.7836,
      "step": 539070
    },
    {
      "epoch": 1.8893410017278423,
      "grad_norm": 3.109375,
      "learning_rate": 2.0567848158446072e-05,
      "loss": 0.8422,
      "step": 539080
    },
    {
      "epoch": 1.8893760492347376,
      "grad_norm": 2.90625,
      "learning_rate": 2.056719912978237e-05,
      "loss": 0.8643,
      "step": 539090
    },
    {
      "epoch": 1.8894110967416333,
      "grad_norm": 2.546875,
      "learning_rate": 2.056655010111867e-05,
      "loss": 0.8176,
      "step": 539100
    },
    {
      "epoch": 1.8894461442485289,
      "grad_norm": 3.5625,
      "learning_rate": 2.0565901072454966e-05,
      "loss": 0.8742,
      "step": 539110
    },
    {
      "epoch": 1.8894811917554244,
      "grad_norm": 2.75,
      "learning_rate": 2.0565252043791264e-05,
      "loss": 0.9738,
      "step": 539120
    },
    {
      "epoch": 1.8895162392623202,
      "grad_norm": 3.78125,
      "learning_rate": 2.0564603015127562e-05,
      "loss": 0.7926,
      "step": 539130
    },
    {
      "epoch": 1.8895512867692157,
      "grad_norm": 3.125,
      "learning_rate": 2.056395398646386e-05,
      "loss": 0.8802,
      "step": 539140
    },
    {
      "epoch": 1.8895863342761112,
      "grad_norm": 3.5625,
      "learning_rate": 2.056330495780016e-05,
      "loss": 0.9282,
      "step": 539150
    },
    {
      "epoch": 1.889621381783007,
      "grad_norm": 2.78125,
      "learning_rate": 2.0562655929136453e-05,
      "loss": 0.7991,
      "step": 539160
    },
    {
      "epoch": 1.8896564292899025,
      "grad_norm": 3.296875,
      "learning_rate": 2.056200690047275e-05,
      "loss": 0.8124,
      "step": 539170
    },
    {
      "epoch": 1.889691476796798,
      "grad_norm": 3.34375,
      "learning_rate": 2.056135787180905e-05,
      "loss": 0.7794,
      "step": 539180
    },
    {
      "epoch": 1.8897265243036938,
      "grad_norm": 2.84375,
      "learning_rate": 2.056070884314535e-05,
      "loss": 0.8354,
      "step": 539190
    },
    {
      "epoch": 1.8897615718105891,
      "grad_norm": 2.4375,
      "learning_rate": 2.056005981448165e-05,
      "loss": 0.6661,
      "step": 539200
    },
    {
      "epoch": 1.8897966193174849,
      "grad_norm": 3.34375,
      "learning_rate": 2.0559410785817946e-05,
      "loss": 0.8805,
      "step": 539210
    },
    {
      "epoch": 1.8898316668243804,
      "grad_norm": 2.890625,
      "learning_rate": 2.0558761757154244e-05,
      "loss": 0.9079,
      "step": 539220
    },
    {
      "epoch": 1.889866714331276,
      "grad_norm": 3.109375,
      "learning_rate": 2.0558112728490542e-05,
      "loss": 0.798,
      "step": 539230
    },
    {
      "epoch": 1.8899017618381717,
      "grad_norm": 2.390625,
      "learning_rate": 2.055746369982684e-05,
      "loss": 0.7825,
      "step": 539240
    },
    {
      "epoch": 1.8899368093450672,
      "grad_norm": 2.921875,
      "learning_rate": 2.055681467116314e-05,
      "loss": 0.8498,
      "step": 539250
    },
    {
      "epoch": 1.8899718568519628,
      "grad_norm": 3.015625,
      "learning_rate": 2.0556165642499436e-05,
      "loss": 0.853,
      "step": 539260
    },
    {
      "epoch": 1.8900069043588585,
      "grad_norm": 2.984375,
      "learning_rate": 2.0555516613835734e-05,
      "loss": 0.8634,
      "step": 539270
    },
    {
      "epoch": 1.890041951865754,
      "grad_norm": 3.0625,
      "learning_rate": 2.0554867585172032e-05,
      "loss": 0.8352,
      "step": 539280
    },
    {
      "epoch": 1.8900769993726496,
      "grad_norm": 2.5,
      "learning_rate": 2.055421855650833e-05,
      "loss": 0.8246,
      "step": 539290
    },
    {
      "epoch": 1.8901120468795454,
      "grad_norm": 3.34375,
      "learning_rate": 2.055356952784463e-05,
      "loss": 0.853,
      "step": 539300
    },
    {
      "epoch": 1.8901470943864407,
      "grad_norm": 2.640625,
      "learning_rate": 2.0552920499180926e-05,
      "loss": 0.6863,
      "step": 539310
    },
    {
      "epoch": 1.8901821418933364,
      "grad_norm": 3.609375,
      "learning_rate": 2.0552271470517224e-05,
      "loss": 0.8996,
      "step": 539320
    },
    {
      "epoch": 1.890217189400232,
      "grad_norm": 2.59375,
      "learning_rate": 2.0551622441853526e-05,
      "loss": 0.8343,
      "step": 539330
    },
    {
      "epoch": 1.8902522369071275,
      "grad_norm": 2.8125,
      "learning_rate": 2.0550973413189824e-05,
      "loss": 0.8021,
      "step": 539340
    },
    {
      "epoch": 1.8902872844140233,
      "grad_norm": 2.859375,
      "learning_rate": 2.055032438452612e-05,
      "loss": 0.8698,
      "step": 539350
    },
    {
      "epoch": 1.8903223319209188,
      "grad_norm": 2.703125,
      "learning_rate": 2.0549675355862416e-05,
      "loss": 0.8244,
      "step": 539360
    },
    {
      "epoch": 1.8903573794278143,
      "grad_norm": 3.28125,
      "learning_rate": 2.0549026327198714e-05,
      "loss": 0.8753,
      "step": 539370
    },
    {
      "epoch": 1.89039242693471,
      "grad_norm": 3.09375,
      "learning_rate": 2.0548377298535012e-05,
      "loss": 0.788,
      "step": 539380
    },
    {
      "epoch": 1.8904274744416056,
      "grad_norm": 3.140625,
      "learning_rate": 2.054772826987131e-05,
      "loss": 0.8128,
      "step": 539390
    },
    {
      "epoch": 1.8904625219485012,
      "grad_norm": 3.015625,
      "learning_rate": 2.054707924120761e-05,
      "loss": 0.8493,
      "step": 539400
    },
    {
      "epoch": 1.890497569455397,
      "grad_norm": 2.828125,
      "learning_rate": 2.0546430212543906e-05,
      "loss": 0.88,
      "step": 539410
    },
    {
      "epoch": 1.8905326169622922,
      "grad_norm": 3.0,
      "learning_rate": 2.0545781183880204e-05,
      "loss": 0.8579,
      "step": 539420
    },
    {
      "epoch": 1.890567664469188,
      "grad_norm": 2.734375,
      "learning_rate": 2.0545132155216502e-05,
      "loss": 0.8165,
      "step": 539430
    },
    {
      "epoch": 1.8906027119760835,
      "grad_norm": 2.53125,
      "learning_rate": 2.0544483126552804e-05,
      "loss": 0.7935,
      "step": 539440
    },
    {
      "epoch": 1.890637759482979,
      "grad_norm": 2.59375,
      "learning_rate": 2.0543834097889102e-05,
      "loss": 0.8669,
      "step": 539450
    },
    {
      "epoch": 1.8906728069898748,
      "grad_norm": 3.140625,
      "learning_rate": 2.05431850692254e-05,
      "loss": 0.8663,
      "step": 539460
    },
    {
      "epoch": 1.8907078544967704,
      "grad_norm": 3.25,
      "learning_rate": 2.0542536040561698e-05,
      "loss": 0.8496,
      "step": 539470
    },
    {
      "epoch": 1.890742902003666,
      "grad_norm": 3.4375,
      "learning_rate": 2.0541887011897996e-05,
      "loss": 0.7659,
      "step": 539480
    },
    {
      "epoch": 1.8907779495105617,
      "grad_norm": 2.828125,
      "learning_rate": 2.0541237983234294e-05,
      "loss": 0.9005,
      "step": 539490
    },
    {
      "epoch": 1.8908129970174572,
      "grad_norm": 2.65625,
      "learning_rate": 2.0540588954570592e-05,
      "loss": 0.7627,
      "step": 539500
    },
    {
      "epoch": 1.8908480445243527,
      "grad_norm": 2.734375,
      "learning_rate": 2.053993992590689e-05,
      "loss": 0.7864,
      "step": 539510
    },
    {
      "epoch": 1.8908830920312485,
      "grad_norm": 2.703125,
      "learning_rate": 2.0539290897243188e-05,
      "loss": 0.8365,
      "step": 539520
    },
    {
      "epoch": 1.8909181395381438,
      "grad_norm": 2.96875,
      "learning_rate": 2.0538641868579486e-05,
      "loss": 0.8251,
      "step": 539530
    },
    {
      "epoch": 1.8909531870450396,
      "grad_norm": 3.234375,
      "learning_rate": 2.053799283991578e-05,
      "loss": 0.9018,
      "step": 539540
    },
    {
      "epoch": 1.890988234551935,
      "grad_norm": 2.875,
      "learning_rate": 2.053734381125208e-05,
      "loss": 0.8487,
      "step": 539550
    },
    {
      "epoch": 1.8910232820588306,
      "grad_norm": 2.890625,
      "learning_rate": 2.053669478258838e-05,
      "loss": 0.9348,
      "step": 539560
    },
    {
      "epoch": 1.8910583295657264,
      "grad_norm": 3.0625,
      "learning_rate": 2.0536045753924678e-05,
      "loss": 0.7649,
      "step": 539570
    },
    {
      "epoch": 1.891093377072622,
      "grad_norm": 3.0,
      "learning_rate": 2.0535396725260976e-05,
      "loss": 0.8031,
      "step": 539580
    },
    {
      "epoch": 1.8911284245795175,
      "grad_norm": 2.90625,
      "learning_rate": 2.0534747696597274e-05,
      "loss": 0.8286,
      "step": 539590
    },
    {
      "epoch": 1.8911634720864132,
      "grad_norm": 2.625,
      "learning_rate": 2.0534098667933572e-05,
      "loss": 0.7656,
      "step": 539600
    },
    {
      "epoch": 1.8911985195933088,
      "grad_norm": 2.6875,
      "learning_rate": 2.053344963926987e-05,
      "loss": 0.8675,
      "step": 539610
    },
    {
      "epoch": 1.8912335671002043,
      "grad_norm": 2.640625,
      "learning_rate": 2.0532800610606168e-05,
      "loss": 0.77,
      "step": 539620
    },
    {
      "epoch": 1.8912686146071,
      "grad_norm": 3.078125,
      "learning_rate": 2.0532151581942466e-05,
      "loss": 0.8163,
      "step": 539630
    },
    {
      "epoch": 1.8913036621139954,
      "grad_norm": 2.96875,
      "learning_rate": 2.0531502553278764e-05,
      "loss": 0.8534,
      "step": 539640
    },
    {
      "epoch": 1.8913387096208911,
      "grad_norm": 2.515625,
      "learning_rate": 2.0530853524615062e-05,
      "loss": 0.7497,
      "step": 539650
    },
    {
      "epoch": 1.8913737571277869,
      "grad_norm": 3.5,
      "learning_rate": 2.053020449595136e-05,
      "loss": 0.7696,
      "step": 539660
    },
    {
      "epoch": 1.8914088046346822,
      "grad_norm": 3.375,
      "learning_rate": 2.0529555467287658e-05,
      "loss": 0.826,
      "step": 539670
    },
    {
      "epoch": 1.891443852141578,
      "grad_norm": 3.0,
      "learning_rate": 2.0528906438623956e-05,
      "loss": 0.7697,
      "step": 539680
    },
    {
      "epoch": 1.8914788996484735,
      "grad_norm": 3.53125,
      "learning_rate": 2.0528257409960254e-05,
      "loss": 0.8578,
      "step": 539690
    },
    {
      "epoch": 1.891513947155369,
      "grad_norm": 4.46875,
      "learning_rate": 2.0527608381296555e-05,
      "loss": 0.8448,
      "step": 539700
    },
    {
      "epoch": 1.8915489946622648,
      "grad_norm": 3.109375,
      "learning_rate": 2.0526959352632853e-05,
      "loss": 0.905,
      "step": 539710
    },
    {
      "epoch": 1.8915840421691603,
      "grad_norm": 2.796875,
      "learning_rate": 2.052631032396915e-05,
      "loss": 0.7789,
      "step": 539720
    },
    {
      "epoch": 1.8916190896760559,
      "grad_norm": 2.90625,
      "learning_rate": 2.0525661295305446e-05,
      "loss": 0.8728,
      "step": 539730
    },
    {
      "epoch": 1.8916541371829516,
      "grad_norm": 2.9375,
      "learning_rate": 2.0525012266641744e-05,
      "loss": 0.8556,
      "step": 539740
    },
    {
      "epoch": 1.891689184689847,
      "grad_norm": 2.953125,
      "learning_rate": 2.0524363237978042e-05,
      "loss": 0.8527,
      "step": 539750
    },
    {
      "epoch": 1.8917242321967427,
      "grad_norm": 3.15625,
      "learning_rate": 2.052371420931434e-05,
      "loss": 0.7775,
      "step": 539760
    },
    {
      "epoch": 1.8917592797036384,
      "grad_norm": 2.828125,
      "learning_rate": 2.0523065180650638e-05,
      "loss": 0.8924,
      "step": 539770
    },
    {
      "epoch": 1.8917943272105338,
      "grad_norm": 2.6875,
      "learning_rate": 2.0522416151986936e-05,
      "loss": 0.8399,
      "step": 539780
    },
    {
      "epoch": 1.8918293747174295,
      "grad_norm": 2.84375,
      "learning_rate": 2.0521767123323234e-05,
      "loss": 0.8727,
      "step": 539790
    },
    {
      "epoch": 1.891864422224325,
      "grad_norm": 2.546875,
      "learning_rate": 2.0521118094659532e-05,
      "loss": 0.8054,
      "step": 539800
    },
    {
      "epoch": 1.8918994697312206,
      "grad_norm": 2.71875,
      "learning_rate": 2.0520469065995833e-05,
      "loss": 0.758,
      "step": 539810
    },
    {
      "epoch": 1.8919345172381163,
      "grad_norm": 2.5625,
      "learning_rate": 2.051982003733213e-05,
      "loss": 0.8711,
      "step": 539820
    },
    {
      "epoch": 1.8919695647450119,
      "grad_norm": 2.953125,
      "learning_rate": 2.051917100866843e-05,
      "loss": 0.7691,
      "step": 539830
    },
    {
      "epoch": 1.8920046122519074,
      "grad_norm": 2.328125,
      "learning_rate": 2.0518521980004727e-05,
      "loss": 0.8139,
      "step": 539840
    },
    {
      "epoch": 1.8920396597588032,
      "grad_norm": 2.953125,
      "learning_rate": 2.0517872951341025e-05,
      "loss": 0.8624,
      "step": 539850
    },
    {
      "epoch": 1.8920747072656987,
      "grad_norm": 2.625,
      "learning_rate": 2.0517223922677323e-05,
      "loss": 0.8057,
      "step": 539860
    },
    {
      "epoch": 1.8921097547725942,
      "grad_norm": 2.78125,
      "learning_rate": 2.051657489401362e-05,
      "loss": 0.95,
      "step": 539870
    },
    {
      "epoch": 1.89214480227949,
      "grad_norm": 3.171875,
      "learning_rate": 2.051592586534992e-05,
      "loss": 0.8122,
      "step": 539880
    },
    {
      "epoch": 1.8921798497863853,
      "grad_norm": 3.078125,
      "learning_rate": 2.0515276836686217e-05,
      "loss": 0.7519,
      "step": 539890
    },
    {
      "epoch": 1.892214897293281,
      "grad_norm": 3.171875,
      "learning_rate": 2.0514627808022515e-05,
      "loss": 0.7535,
      "step": 539900
    },
    {
      "epoch": 1.8922499448001766,
      "grad_norm": 2.953125,
      "learning_rate": 2.0513978779358813e-05,
      "loss": 0.8833,
      "step": 539910
    },
    {
      "epoch": 1.8922849923070721,
      "grad_norm": 2.8125,
      "learning_rate": 2.0513329750695108e-05,
      "loss": 0.8257,
      "step": 539920
    },
    {
      "epoch": 1.892320039813968,
      "grad_norm": 2.9375,
      "learning_rate": 2.051268072203141e-05,
      "loss": 0.8177,
      "step": 539930
    },
    {
      "epoch": 1.8923550873208634,
      "grad_norm": 2.734375,
      "learning_rate": 2.0512031693367707e-05,
      "loss": 0.8794,
      "step": 539940
    },
    {
      "epoch": 1.892390134827759,
      "grad_norm": 3.015625,
      "learning_rate": 2.0511382664704005e-05,
      "loss": 0.7646,
      "step": 539950
    },
    {
      "epoch": 1.8924251823346547,
      "grad_norm": 2.9375,
      "learning_rate": 2.0510733636040303e-05,
      "loss": 0.8458,
      "step": 539960
    },
    {
      "epoch": 1.8924602298415503,
      "grad_norm": 2.84375,
      "learning_rate": 2.05100846073766e-05,
      "loss": 0.8579,
      "step": 539970
    },
    {
      "epoch": 1.8924952773484458,
      "grad_norm": 3.015625,
      "learning_rate": 2.05094355787129e-05,
      "loss": 0.7383,
      "step": 539980
    },
    {
      "epoch": 1.8925303248553416,
      "grad_norm": 2.609375,
      "learning_rate": 2.0508786550049197e-05,
      "loss": 0.7685,
      "step": 539990
    },
    {
      "epoch": 1.8925653723622369,
      "grad_norm": 2.78125,
      "learning_rate": 2.0508137521385495e-05,
      "loss": 0.808,
      "step": 540000
    },
    {
      "epoch": 1.8925653723622369,
      "eval_loss": 0.7756980657577515,
      "eval_runtime": 555.3126,
      "eval_samples_per_second": 685.084,
      "eval_steps_per_second": 57.09,
      "step": 540000
    },
    {
      "epoch": 1.8926004198691326,
      "grad_norm": 2.828125,
      "learning_rate": 2.0507488492721793e-05,
      "loss": 0.8017,
      "step": 540010
    },
    {
      "epoch": 1.8926354673760282,
      "grad_norm": 2.9375,
      "learning_rate": 2.050683946405809e-05,
      "loss": 0.8242,
      "step": 540020
    },
    {
      "epoch": 1.8926705148829237,
      "grad_norm": 2.921875,
      "learning_rate": 2.050619043539439e-05,
      "loss": 0.7941,
      "step": 540030
    },
    {
      "epoch": 1.8927055623898195,
      "grad_norm": 3.59375,
      "learning_rate": 2.0505541406730687e-05,
      "loss": 0.8532,
      "step": 540040
    },
    {
      "epoch": 1.892740609896715,
      "grad_norm": 3.09375,
      "learning_rate": 2.0504892378066985e-05,
      "loss": 0.8168,
      "step": 540050
    },
    {
      "epoch": 1.8927756574036105,
      "grad_norm": 2.734375,
      "learning_rate": 2.0504243349403287e-05,
      "loss": 0.7406,
      "step": 540060
    },
    {
      "epoch": 1.8928107049105063,
      "grad_norm": 2.578125,
      "learning_rate": 2.0503594320739585e-05,
      "loss": 0.7771,
      "step": 540070
    },
    {
      "epoch": 1.8928457524174018,
      "grad_norm": 3.171875,
      "learning_rate": 2.0502945292075883e-05,
      "loss": 0.8982,
      "step": 540080
    },
    {
      "epoch": 1.8928807999242974,
      "grad_norm": 2.078125,
      "learning_rate": 2.050229626341218e-05,
      "loss": 0.756,
      "step": 540090
    },
    {
      "epoch": 1.8929158474311931,
      "grad_norm": 2.90625,
      "learning_rate": 2.0501647234748475e-05,
      "loss": 0.8238,
      "step": 540100
    },
    {
      "epoch": 1.8929508949380884,
      "grad_norm": 3.15625,
      "learning_rate": 2.0500998206084773e-05,
      "loss": 0.8383,
      "step": 540110
    },
    {
      "epoch": 1.8929859424449842,
      "grad_norm": 2.859375,
      "learning_rate": 2.050034917742107e-05,
      "loss": 0.7517,
      "step": 540120
    },
    {
      "epoch": 1.8930209899518797,
      "grad_norm": 2.640625,
      "learning_rate": 2.049970014875737e-05,
      "loss": 0.8592,
      "step": 540130
    },
    {
      "epoch": 1.8930560374587753,
      "grad_norm": 2.71875,
      "learning_rate": 2.0499051120093667e-05,
      "loss": 0.8177,
      "step": 540140
    },
    {
      "epoch": 1.893091084965671,
      "grad_norm": 3.265625,
      "learning_rate": 2.0498402091429965e-05,
      "loss": 0.8864,
      "step": 540150
    },
    {
      "epoch": 1.8931261324725666,
      "grad_norm": 2.765625,
      "learning_rate": 2.0497753062766263e-05,
      "loss": 0.8439,
      "step": 540160
    },
    {
      "epoch": 1.893161179979462,
      "grad_norm": 3.484375,
      "learning_rate": 2.049710403410256e-05,
      "loss": 0.8611,
      "step": 540170
    },
    {
      "epoch": 1.8931962274863579,
      "grad_norm": 2.3125,
      "learning_rate": 2.0496455005438863e-05,
      "loss": 0.7233,
      "step": 540180
    },
    {
      "epoch": 1.8932312749932534,
      "grad_norm": 3.03125,
      "learning_rate": 2.049580597677516e-05,
      "loss": 0.8663,
      "step": 540190
    },
    {
      "epoch": 1.893266322500149,
      "grad_norm": 3.171875,
      "learning_rate": 2.049515694811146e-05,
      "loss": 0.7561,
      "step": 540200
    },
    {
      "epoch": 1.8933013700070447,
      "grad_norm": 3.328125,
      "learning_rate": 2.0494507919447757e-05,
      "loss": 0.8306,
      "step": 540210
    },
    {
      "epoch": 1.89333641751394,
      "grad_norm": 2.328125,
      "learning_rate": 2.0493858890784055e-05,
      "loss": 0.8362,
      "step": 540220
    },
    {
      "epoch": 1.8933714650208358,
      "grad_norm": 2.734375,
      "learning_rate": 2.0493209862120353e-05,
      "loss": 0.871,
      "step": 540230
    },
    {
      "epoch": 1.8934065125277313,
      "grad_norm": 3.53125,
      "learning_rate": 2.049256083345665e-05,
      "loss": 0.8376,
      "step": 540240
    },
    {
      "epoch": 1.8934415600346268,
      "grad_norm": 2.984375,
      "learning_rate": 2.049191180479295e-05,
      "loss": 0.8431,
      "step": 540250
    },
    {
      "epoch": 1.8934766075415226,
      "grad_norm": 2.671875,
      "learning_rate": 2.0491262776129247e-05,
      "loss": 0.781,
      "step": 540260
    },
    {
      "epoch": 1.8935116550484181,
      "grad_norm": 3.375,
      "learning_rate": 2.0490613747465545e-05,
      "loss": 0.8323,
      "step": 540270
    },
    {
      "epoch": 1.8935467025553137,
      "grad_norm": 2.59375,
      "learning_rate": 2.0489964718801843e-05,
      "loss": 0.7583,
      "step": 540280
    },
    {
      "epoch": 1.8935817500622094,
      "grad_norm": 3.328125,
      "learning_rate": 2.048931569013814e-05,
      "loss": 0.8688,
      "step": 540290
    },
    {
      "epoch": 1.893616797569105,
      "grad_norm": 2.9375,
      "learning_rate": 2.048866666147444e-05,
      "loss": 0.831,
      "step": 540300
    },
    {
      "epoch": 1.8936518450760005,
      "grad_norm": 3.34375,
      "learning_rate": 2.0488017632810737e-05,
      "loss": 0.7881,
      "step": 540310
    },
    {
      "epoch": 1.8936868925828962,
      "grad_norm": 2.796875,
      "learning_rate": 2.0487368604147035e-05,
      "loss": 0.8772,
      "step": 540320
    },
    {
      "epoch": 1.8937219400897916,
      "grad_norm": 2.890625,
      "learning_rate": 2.0486719575483333e-05,
      "loss": 0.7233,
      "step": 540330
    },
    {
      "epoch": 1.8937569875966873,
      "grad_norm": 3.109375,
      "learning_rate": 2.048607054681963e-05,
      "loss": 0.776,
      "step": 540340
    },
    {
      "epoch": 1.893792035103583,
      "grad_norm": 2.765625,
      "learning_rate": 2.048542151815593e-05,
      "loss": 0.828,
      "step": 540350
    },
    {
      "epoch": 1.8938270826104784,
      "grad_norm": 2.703125,
      "learning_rate": 2.0484772489492227e-05,
      "loss": 0.7299,
      "step": 540360
    },
    {
      "epoch": 1.8938621301173741,
      "grad_norm": 2.78125,
      "learning_rate": 2.0484123460828525e-05,
      "loss": 0.8306,
      "step": 540370
    },
    {
      "epoch": 1.8938971776242697,
      "grad_norm": 2.765625,
      "learning_rate": 2.0483474432164823e-05,
      "loss": 0.7554,
      "step": 540380
    },
    {
      "epoch": 1.8939322251311652,
      "grad_norm": 2.796875,
      "learning_rate": 2.048282540350112e-05,
      "loss": 0.776,
      "step": 540390
    },
    {
      "epoch": 1.893967272638061,
      "grad_norm": 2.703125,
      "learning_rate": 2.048217637483742e-05,
      "loss": 0.875,
      "step": 540400
    },
    {
      "epoch": 1.8940023201449565,
      "grad_norm": 2.78125,
      "learning_rate": 2.0481527346173717e-05,
      "loss": 0.8039,
      "step": 540410
    },
    {
      "epoch": 1.894037367651852,
      "grad_norm": 2.609375,
      "learning_rate": 2.0480878317510015e-05,
      "loss": 0.8799,
      "step": 540420
    },
    {
      "epoch": 1.8940724151587478,
      "grad_norm": 2.59375,
      "learning_rate": 2.0480229288846316e-05,
      "loss": 0.7902,
      "step": 540430
    },
    {
      "epoch": 1.8941074626656431,
      "grad_norm": 3.09375,
      "learning_rate": 2.0479580260182614e-05,
      "loss": 0.7657,
      "step": 540440
    },
    {
      "epoch": 1.8941425101725389,
      "grad_norm": 3.15625,
      "learning_rate": 2.0478931231518912e-05,
      "loss": 0.8252,
      "step": 540450
    },
    {
      "epoch": 1.8941775576794346,
      "grad_norm": 3.640625,
      "learning_rate": 2.047828220285521e-05,
      "loss": 0.7953,
      "step": 540460
    },
    {
      "epoch": 1.89421260518633,
      "grad_norm": 3.046875,
      "learning_rate": 2.0477633174191508e-05,
      "loss": 0.8309,
      "step": 540470
    },
    {
      "epoch": 1.8942476526932257,
      "grad_norm": 2.671875,
      "learning_rate": 2.0476984145527803e-05,
      "loss": 0.7898,
      "step": 540480
    },
    {
      "epoch": 1.8942827002001212,
      "grad_norm": 2.828125,
      "learning_rate": 2.04763351168641e-05,
      "loss": 0.7501,
      "step": 540490
    },
    {
      "epoch": 1.8943177477070168,
      "grad_norm": 2.578125,
      "learning_rate": 2.04756860882004e-05,
      "loss": 0.7667,
      "step": 540500
    },
    {
      "epoch": 1.8943527952139125,
      "grad_norm": 3.28125,
      "learning_rate": 2.0475037059536697e-05,
      "loss": 0.7717,
      "step": 540510
    },
    {
      "epoch": 1.894387842720808,
      "grad_norm": 2.75,
      "learning_rate": 2.0474388030872995e-05,
      "loss": 0.8441,
      "step": 540520
    },
    {
      "epoch": 1.8944228902277036,
      "grad_norm": 2.65625,
      "learning_rate": 2.0473739002209293e-05,
      "loss": 0.8616,
      "step": 540530
    },
    {
      "epoch": 1.8944579377345994,
      "grad_norm": 2.734375,
      "learning_rate": 2.0473089973545594e-05,
      "loss": 0.7492,
      "step": 540540
    },
    {
      "epoch": 1.894492985241495,
      "grad_norm": 2.515625,
      "learning_rate": 2.0472440944881892e-05,
      "loss": 0.7641,
      "step": 540550
    },
    {
      "epoch": 1.8945280327483904,
      "grad_norm": 3.078125,
      "learning_rate": 2.047179191621819e-05,
      "loss": 0.8762,
      "step": 540560
    },
    {
      "epoch": 1.8945630802552862,
      "grad_norm": 2.625,
      "learning_rate": 2.0471142887554488e-05,
      "loss": 0.8508,
      "step": 540570
    },
    {
      "epoch": 1.8945981277621815,
      "grad_norm": 3.171875,
      "learning_rate": 2.0470493858890786e-05,
      "loss": 0.8464,
      "step": 540580
    },
    {
      "epoch": 1.8946331752690773,
      "grad_norm": 2.828125,
      "learning_rate": 2.0469844830227084e-05,
      "loss": 0.8485,
      "step": 540590
    },
    {
      "epoch": 1.8946682227759728,
      "grad_norm": 3.1875,
      "learning_rate": 2.0469195801563382e-05,
      "loss": 0.8265,
      "step": 540600
    },
    {
      "epoch": 1.8947032702828683,
      "grad_norm": 3.0,
      "learning_rate": 2.046854677289968e-05,
      "loss": 0.8228,
      "step": 540610
    },
    {
      "epoch": 1.894738317789764,
      "grad_norm": 2.75,
      "learning_rate": 2.0467897744235978e-05,
      "loss": 0.8132,
      "step": 540620
    },
    {
      "epoch": 1.8947733652966596,
      "grad_norm": 2.703125,
      "learning_rate": 2.0467248715572276e-05,
      "loss": 0.8401,
      "step": 540630
    },
    {
      "epoch": 1.8948084128035552,
      "grad_norm": 2.5,
      "learning_rate": 2.0466599686908574e-05,
      "loss": 0.8377,
      "step": 540640
    },
    {
      "epoch": 1.894843460310451,
      "grad_norm": 3.25,
      "learning_rate": 2.0465950658244872e-05,
      "loss": 0.807,
      "step": 540650
    },
    {
      "epoch": 1.8948785078173465,
      "grad_norm": 2.671875,
      "learning_rate": 2.046530162958117e-05,
      "loss": 0.742,
      "step": 540660
    },
    {
      "epoch": 1.894913555324242,
      "grad_norm": 3.34375,
      "learning_rate": 2.0464652600917468e-05,
      "loss": 0.9146,
      "step": 540670
    },
    {
      "epoch": 1.8949486028311378,
      "grad_norm": 2.828125,
      "learning_rate": 2.0464003572253766e-05,
      "loss": 0.8195,
      "step": 540680
    },
    {
      "epoch": 1.894983650338033,
      "grad_norm": 2.515625,
      "learning_rate": 2.0463354543590064e-05,
      "loss": 0.7752,
      "step": 540690
    },
    {
      "epoch": 1.8950186978449288,
      "grad_norm": 2.890625,
      "learning_rate": 2.0462705514926362e-05,
      "loss": 0.7708,
      "step": 540700
    },
    {
      "epoch": 1.8950537453518244,
      "grad_norm": 3.109375,
      "learning_rate": 2.046205648626266e-05,
      "loss": 0.8186,
      "step": 540710
    },
    {
      "epoch": 1.89508879285872,
      "grad_norm": 2.90625,
      "learning_rate": 2.0461407457598958e-05,
      "loss": 0.8232,
      "step": 540720
    },
    {
      "epoch": 1.8951238403656157,
      "grad_norm": 2.8125,
      "learning_rate": 2.0460758428935256e-05,
      "loss": 0.8887,
      "step": 540730
    },
    {
      "epoch": 1.8951588878725112,
      "grad_norm": 2.890625,
      "learning_rate": 2.0460109400271554e-05,
      "loss": 0.79,
      "step": 540740
    },
    {
      "epoch": 1.8951939353794067,
      "grad_norm": 2.6875,
      "learning_rate": 2.0459460371607852e-05,
      "loss": 0.7318,
      "step": 540750
    },
    {
      "epoch": 1.8952289828863025,
      "grad_norm": 2.90625,
      "learning_rate": 2.045881134294415e-05,
      "loss": 0.8185,
      "step": 540760
    },
    {
      "epoch": 1.895264030393198,
      "grad_norm": 3.421875,
      "learning_rate": 2.0458162314280448e-05,
      "loss": 0.8014,
      "step": 540770
    },
    {
      "epoch": 1.8952990779000936,
      "grad_norm": 3.03125,
      "learning_rate": 2.0457513285616746e-05,
      "loss": 0.8347,
      "step": 540780
    },
    {
      "epoch": 1.8953341254069893,
      "grad_norm": 3.015625,
      "learning_rate": 2.0456864256953044e-05,
      "loss": 0.8913,
      "step": 540790
    },
    {
      "epoch": 1.8953691729138846,
      "grad_norm": 3.203125,
      "learning_rate": 2.0456215228289345e-05,
      "loss": 0.8203,
      "step": 540800
    },
    {
      "epoch": 1.8954042204207804,
      "grad_norm": 2.90625,
      "learning_rate": 2.0455566199625643e-05,
      "loss": 0.8337,
      "step": 540810
    },
    {
      "epoch": 1.895439267927676,
      "grad_norm": 2.84375,
      "learning_rate": 2.045491717096194e-05,
      "loss": 0.78,
      "step": 540820
    },
    {
      "epoch": 1.8954743154345715,
      "grad_norm": 2.828125,
      "learning_rate": 2.045426814229824e-05,
      "loss": 0.808,
      "step": 540830
    },
    {
      "epoch": 1.8955093629414672,
      "grad_norm": 2.96875,
      "learning_rate": 2.0453619113634537e-05,
      "loss": 0.9273,
      "step": 540840
    },
    {
      "epoch": 1.8955444104483627,
      "grad_norm": 2.484375,
      "learning_rate": 2.0452970084970835e-05,
      "loss": 0.7324,
      "step": 540850
    },
    {
      "epoch": 1.8955794579552583,
      "grad_norm": 2.6875,
      "learning_rate": 2.045232105630713e-05,
      "loss": 0.776,
      "step": 540860
    },
    {
      "epoch": 1.895614505462154,
      "grad_norm": 2.953125,
      "learning_rate": 2.0451672027643428e-05,
      "loss": 0.8256,
      "step": 540870
    },
    {
      "epoch": 1.8956495529690496,
      "grad_norm": 2.75,
      "learning_rate": 2.0451022998979726e-05,
      "loss": 0.8194,
      "step": 540880
    },
    {
      "epoch": 1.8956846004759451,
      "grad_norm": 3.6875,
      "learning_rate": 2.0450373970316024e-05,
      "loss": 0.8889,
      "step": 540890
    },
    {
      "epoch": 1.8957196479828409,
      "grad_norm": 2.75,
      "learning_rate": 2.0449724941652322e-05,
      "loss": 0.8903,
      "step": 540900
    },
    {
      "epoch": 1.8957546954897362,
      "grad_norm": 3.375,
      "learning_rate": 2.0449075912988623e-05,
      "loss": 0.8378,
      "step": 540910
    },
    {
      "epoch": 1.895789742996632,
      "grad_norm": 2.8125,
      "learning_rate": 2.044842688432492e-05,
      "loss": 0.8676,
      "step": 540920
    },
    {
      "epoch": 1.8958247905035275,
      "grad_norm": 2.609375,
      "learning_rate": 2.044777785566122e-05,
      "loss": 0.8188,
      "step": 540930
    },
    {
      "epoch": 1.895859838010423,
      "grad_norm": 2.8125,
      "learning_rate": 2.0447128826997517e-05,
      "loss": 0.798,
      "step": 540940
    },
    {
      "epoch": 1.8958948855173188,
      "grad_norm": 3.546875,
      "learning_rate": 2.0446479798333815e-05,
      "loss": 0.7974,
      "step": 540950
    },
    {
      "epoch": 1.8959299330242143,
      "grad_norm": 2.65625,
      "learning_rate": 2.0445830769670113e-05,
      "loss": 0.659,
      "step": 540960
    },
    {
      "epoch": 1.8959649805311098,
      "grad_norm": 2.9375,
      "learning_rate": 2.044518174100641e-05,
      "loss": 0.8222,
      "step": 540970
    },
    {
      "epoch": 1.8960000280380056,
      "grad_norm": 2.953125,
      "learning_rate": 2.044453271234271e-05,
      "loss": 0.7505,
      "step": 540980
    },
    {
      "epoch": 1.8960350755449011,
      "grad_norm": 2.90625,
      "learning_rate": 2.0443883683679007e-05,
      "loss": 0.8665,
      "step": 540990
    },
    {
      "epoch": 1.8960701230517967,
      "grad_norm": 2.84375,
      "learning_rate": 2.0443234655015305e-05,
      "loss": 0.7519,
      "step": 541000
    },
    {
      "epoch": 1.8961051705586924,
      "grad_norm": 3.09375,
      "learning_rate": 2.0442585626351603e-05,
      "loss": 0.7604,
      "step": 541010
    },
    {
      "epoch": 1.8961402180655877,
      "grad_norm": 3.03125,
      "learning_rate": 2.04419365976879e-05,
      "loss": 0.754,
      "step": 541020
    },
    {
      "epoch": 1.8961752655724835,
      "grad_norm": 2.859375,
      "learning_rate": 2.04412875690242e-05,
      "loss": 0.8292,
      "step": 541030
    },
    {
      "epoch": 1.8962103130793793,
      "grad_norm": 2.9375,
      "learning_rate": 2.0440638540360497e-05,
      "loss": 0.8682,
      "step": 541040
    },
    {
      "epoch": 1.8962453605862746,
      "grad_norm": 2.375,
      "learning_rate": 2.0439989511696795e-05,
      "loss": 0.7748,
      "step": 541050
    },
    {
      "epoch": 1.8962804080931703,
      "grad_norm": 3.0,
      "learning_rate": 2.0439340483033093e-05,
      "loss": 0.8061,
      "step": 541060
    },
    {
      "epoch": 1.8963154556000659,
      "grad_norm": 2.734375,
      "learning_rate": 2.043869145436939e-05,
      "loss": 0.785,
      "step": 541070
    },
    {
      "epoch": 1.8963505031069614,
      "grad_norm": 2.984375,
      "learning_rate": 2.043804242570569e-05,
      "loss": 0.8274,
      "step": 541080
    },
    {
      "epoch": 1.8963855506138572,
      "grad_norm": 2.96875,
      "learning_rate": 2.0437393397041987e-05,
      "loss": 0.8239,
      "step": 541090
    },
    {
      "epoch": 1.8964205981207527,
      "grad_norm": 2.625,
      "learning_rate": 2.0436744368378285e-05,
      "loss": 0.8275,
      "step": 541100
    },
    {
      "epoch": 1.8964556456276482,
      "grad_norm": 2.734375,
      "learning_rate": 2.0436095339714583e-05,
      "loss": 0.8105,
      "step": 541110
    },
    {
      "epoch": 1.896490693134544,
      "grad_norm": 3.046875,
      "learning_rate": 2.043544631105088e-05,
      "loss": 0.7715,
      "step": 541120
    },
    {
      "epoch": 1.8965257406414393,
      "grad_norm": 2.34375,
      "learning_rate": 2.043479728238718e-05,
      "loss": 0.7624,
      "step": 541130
    },
    {
      "epoch": 1.896560788148335,
      "grad_norm": 2.09375,
      "learning_rate": 2.0434148253723477e-05,
      "loss": 0.821,
      "step": 541140
    },
    {
      "epoch": 1.8965958356552308,
      "grad_norm": 3.015625,
      "learning_rate": 2.0433499225059775e-05,
      "loss": 0.84,
      "step": 541150
    },
    {
      "epoch": 1.8966308831621261,
      "grad_norm": 2.640625,
      "learning_rate": 2.0432850196396077e-05,
      "loss": 0.8333,
      "step": 541160
    },
    {
      "epoch": 1.896665930669022,
      "grad_norm": 3.328125,
      "learning_rate": 2.0432201167732375e-05,
      "loss": 0.891,
      "step": 541170
    },
    {
      "epoch": 1.8967009781759174,
      "grad_norm": 2.90625,
      "learning_rate": 2.0431552139068673e-05,
      "loss": 0.7512,
      "step": 541180
    },
    {
      "epoch": 1.896736025682813,
      "grad_norm": 2.90625,
      "learning_rate": 2.043090311040497e-05,
      "loss": 0.8839,
      "step": 541190
    },
    {
      "epoch": 1.8967710731897087,
      "grad_norm": 3.015625,
      "learning_rate": 2.043025408174127e-05,
      "loss": 0.8016,
      "step": 541200
    },
    {
      "epoch": 1.8968061206966043,
      "grad_norm": 3.296875,
      "learning_rate": 2.0429605053077567e-05,
      "loss": 0.8515,
      "step": 541210
    },
    {
      "epoch": 1.8968411682034998,
      "grad_norm": 2.8125,
      "learning_rate": 2.0428956024413865e-05,
      "loss": 0.9059,
      "step": 541220
    },
    {
      "epoch": 1.8968762157103956,
      "grad_norm": 2.625,
      "learning_rate": 2.042830699575016e-05,
      "loss": 0.8461,
      "step": 541230
    },
    {
      "epoch": 1.896911263217291,
      "grad_norm": 2.921875,
      "learning_rate": 2.0427657967086457e-05,
      "loss": 0.834,
      "step": 541240
    },
    {
      "epoch": 1.8969463107241866,
      "grad_norm": 2.828125,
      "learning_rate": 2.0427008938422755e-05,
      "loss": 0.8844,
      "step": 541250
    },
    {
      "epoch": 1.8969813582310824,
      "grad_norm": 3.109375,
      "learning_rate": 2.0426359909759053e-05,
      "loss": 0.7943,
      "step": 541260
    },
    {
      "epoch": 1.8970164057379777,
      "grad_norm": 3.140625,
      "learning_rate": 2.042571088109535e-05,
      "loss": 0.818,
      "step": 541270
    },
    {
      "epoch": 1.8970514532448735,
      "grad_norm": 2.9375,
      "learning_rate": 2.0425061852431653e-05,
      "loss": 0.8221,
      "step": 541280
    },
    {
      "epoch": 1.897086500751769,
      "grad_norm": 2.609375,
      "learning_rate": 2.042441282376795e-05,
      "loss": 0.8358,
      "step": 541290
    },
    {
      "epoch": 1.8971215482586645,
      "grad_norm": 2.578125,
      "learning_rate": 2.042376379510425e-05,
      "loss": 0.8699,
      "step": 541300
    },
    {
      "epoch": 1.8971565957655603,
      "grad_norm": 3.15625,
      "learning_rate": 2.0423114766440547e-05,
      "loss": 0.8534,
      "step": 541310
    },
    {
      "epoch": 1.8971916432724558,
      "grad_norm": 2.859375,
      "learning_rate": 2.0422465737776845e-05,
      "loss": 0.7902,
      "step": 541320
    },
    {
      "epoch": 1.8972266907793514,
      "grad_norm": 2.6875,
      "learning_rate": 2.0421816709113143e-05,
      "loss": 0.8431,
      "step": 541330
    },
    {
      "epoch": 1.8972617382862471,
      "grad_norm": 2.8125,
      "learning_rate": 2.042116768044944e-05,
      "loss": 0.8013,
      "step": 541340
    },
    {
      "epoch": 1.8972967857931426,
      "grad_norm": 3.125,
      "learning_rate": 2.042051865178574e-05,
      "loss": 0.8585,
      "step": 541350
    },
    {
      "epoch": 1.8973318333000382,
      "grad_norm": 2.8125,
      "learning_rate": 2.0419869623122037e-05,
      "loss": 0.8918,
      "step": 541360
    },
    {
      "epoch": 1.897366880806934,
      "grad_norm": 2.640625,
      "learning_rate": 2.0419220594458335e-05,
      "loss": 0.7779,
      "step": 541370
    },
    {
      "epoch": 1.8974019283138293,
      "grad_norm": 3.015625,
      "learning_rate": 2.0418571565794633e-05,
      "loss": 0.8054,
      "step": 541380
    },
    {
      "epoch": 1.897436975820725,
      "grad_norm": 2.703125,
      "learning_rate": 2.041792253713093e-05,
      "loss": 0.8581,
      "step": 541390
    },
    {
      "epoch": 1.8974720233276205,
      "grad_norm": 2.734375,
      "learning_rate": 2.041727350846723e-05,
      "loss": 0.8754,
      "step": 541400
    },
    {
      "epoch": 1.897507070834516,
      "grad_norm": 3.359375,
      "learning_rate": 2.0416624479803527e-05,
      "loss": 0.8456,
      "step": 541410
    },
    {
      "epoch": 1.8975421183414118,
      "grad_norm": 3.046875,
      "learning_rate": 2.0415975451139825e-05,
      "loss": 0.8439,
      "step": 541420
    },
    {
      "epoch": 1.8975771658483074,
      "grad_norm": 2.453125,
      "learning_rate": 2.0415326422476123e-05,
      "loss": 0.7737,
      "step": 541430
    },
    {
      "epoch": 1.897612213355203,
      "grad_norm": 2.71875,
      "learning_rate": 2.041467739381242e-05,
      "loss": 0.8212,
      "step": 541440
    },
    {
      "epoch": 1.8976472608620987,
      "grad_norm": 2.609375,
      "learning_rate": 2.041402836514872e-05,
      "loss": 0.8215,
      "step": 541450
    },
    {
      "epoch": 1.8976823083689942,
      "grad_norm": 2.859375,
      "learning_rate": 2.0413379336485017e-05,
      "loss": 0.8682,
      "step": 541460
    },
    {
      "epoch": 1.8977173558758897,
      "grad_norm": 3.453125,
      "learning_rate": 2.0412730307821315e-05,
      "loss": 0.8491,
      "step": 541470
    },
    {
      "epoch": 1.8977524033827855,
      "grad_norm": 4.5,
      "learning_rate": 2.0412081279157613e-05,
      "loss": 0.8197,
      "step": 541480
    },
    {
      "epoch": 1.8977874508896808,
      "grad_norm": 2.515625,
      "learning_rate": 2.041143225049391e-05,
      "loss": 0.7739,
      "step": 541490
    },
    {
      "epoch": 1.8978224983965766,
      "grad_norm": 3.5625,
      "learning_rate": 2.041078322183021e-05,
      "loss": 0.805,
      "step": 541500
    },
    {
      "epoch": 1.897857545903472,
      "grad_norm": 2.765625,
      "learning_rate": 2.0410134193166507e-05,
      "loss": 0.8159,
      "step": 541510
    },
    {
      "epoch": 1.8978925934103676,
      "grad_norm": 2.546875,
      "learning_rate": 2.0409485164502805e-05,
      "loss": 0.7889,
      "step": 541520
    },
    {
      "epoch": 1.8979276409172634,
      "grad_norm": 3.03125,
      "learning_rate": 2.0408836135839106e-05,
      "loss": 0.753,
      "step": 541530
    },
    {
      "epoch": 1.897962688424159,
      "grad_norm": 2.640625,
      "learning_rate": 2.0408187107175404e-05,
      "loss": 0.7731,
      "step": 541540
    },
    {
      "epoch": 1.8979977359310545,
      "grad_norm": 2.765625,
      "learning_rate": 2.0407538078511702e-05,
      "loss": 0.7859,
      "step": 541550
    },
    {
      "epoch": 1.8980327834379502,
      "grad_norm": 2.671875,
      "learning_rate": 2.0406889049848e-05,
      "loss": 0.8201,
      "step": 541560
    },
    {
      "epoch": 1.8980678309448458,
      "grad_norm": 3.015625,
      "learning_rate": 2.0406240021184298e-05,
      "loss": 0.8491,
      "step": 541570
    },
    {
      "epoch": 1.8981028784517413,
      "grad_norm": 2.90625,
      "learning_rate": 2.0405590992520596e-05,
      "loss": 0.8235,
      "step": 541580
    },
    {
      "epoch": 1.898137925958637,
      "grad_norm": 3.140625,
      "learning_rate": 2.0404941963856894e-05,
      "loss": 0.7338,
      "step": 541590
    },
    {
      "epoch": 1.8981729734655324,
      "grad_norm": 2.671875,
      "learning_rate": 2.0404292935193192e-05,
      "loss": 0.7538,
      "step": 541600
    },
    {
      "epoch": 1.8982080209724281,
      "grad_norm": 2.703125,
      "learning_rate": 2.0403643906529487e-05,
      "loss": 0.8143,
      "step": 541610
    },
    {
      "epoch": 1.8982430684793237,
      "grad_norm": 2.984375,
      "learning_rate": 2.0402994877865785e-05,
      "loss": 0.9073,
      "step": 541620
    },
    {
      "epoch": 1.8982781159862192,
      "grad_norm": 2.640625,
      "learning_rate": 2.0402345849202083e-05,
      "loss": 0.8729,
      "step": 541630
    },
    {
      "epoch": 1.898313163493115,
      "grad_norm": 2.90625,
      "learning_rate": 2.0401696820538384e-05,
      "loss": 0.8331,
      "step": 541640
    },
    {
      "epoch": 1.8983482110000105,
      "grad_norm": 3.09375,
      "learning_rate": 2.0401047791874682e-05,
      "loss": 0.8598,
      "step": 541650
    },
    {
      "epoch": 1.898383258506906,
      "grad_norm": 2.875,
      "learning_rate": 2.040039876321098e-05,
      "loss": 0.8184,
      "step": 541660
    },
    {
      "epoch": 1.8984183060138018,
      "grad_norm": 3.5,
      "learning_rate": 2.0399749734547278e-05,
      "loss": 0.9072,
      "step": 541670
    },
    {
      "epoch": 1.8984533535206973,
      "grad_norm": 3.40625,
      "learning_rate": 2.0399100705883576e-05,
      "loss": 0.8276,
      "step": 541680
    },
    {
      "epoch": 1.8984884010275929,
      "grad_norm": 3.09375,
      "learning_rate": 2.0398451677219874e-05,
      "loss": 0.8269,
      "step": 541690
    },
    {
      "epoch": 1.8985234485344886,
      "grad_norm": 2.96875,
      "learning_rate": 2.0397802648556172e-05,
      "loss": 0.8818,
      "step": 541700
    },
    {
      "epoch": 1.898558496041384,
      "grad_norm": 3.40625,
      "learning_rate": 2.039715361989247e-05,
      "loss": 0.818,
      "step": 541710
    },
    {
      "epoch": 1.8985935435482797,
      "grad_norm": 3.09375,
      "learning_rate": 2.0396504591228768e-05,
      "loss": 0.7872,
      "step": 541720
    },
    {
      "epoch": 1.8986285910551755,
      "grad_norm": 2.625,
      "learning_rate": 2.0395855562565066e-05,
      "loss": 0.8133,
      "step": 541730
    },
    {
      "epoch": 1.8986636385620708,
      "grad_norm": 2.484375,
      "learning_rate": 2.0395206533901364e-05,
      "loss": 0.8456,
      "step": 541740
    },
    {
      "epoch": 1.8986986860689665,
      "grad_norm": 3.0625,
      "learning_rate": 2.0394557505237662e-05,
      "loss": 0.8067,
      "step": 541750
    },
    {
      "epoch": 1.898733733575862,
      "grad_norm": 2.875,
      "learning_rate": 2.039390847657396e-05,
      "loss": 0.8574,
      "step": 541760
    },
    {
      "epoch": 1.8987687810827576,
      "grad_norm": 2.8125,
      "learning_rate": 2.0393259447910258e-05,
      "loss": 0.8495,
      "step": 541770
    },
    {
      "epoch": 1.8988038285896534,
      "grad_norm": 2.90625,
      "learning_rate": 2.039261041924656e-05,
      "loss": 0.7947,
      "step": 541780
    },
    {
      "epoch": 1.8988388760965489,
      "grad_norm": 3.296875,
      "learning_rate": 2.0391961390582857e-05,
      "loss": 0.8161,
      "step": 541790
    },
    {
      "epoch": 1.8988739236034444,
      "grad_norm": 3.171875,
      "learning_rate": 2.0391312361919152e-05,
      "loss": 0.8449,
      "step": 541800
    },
    {
      "epoch": 1.8989089711103402,
      "grad_norm": 3.171875,
      "learning_rate": 2.039066333325545e-05,
      "loss": 0.7256,
      "step": 541810
    },
    {
      "epoch": 1.8989440186172357,
      "grad_norm": 3.234375,
      "learning_rate": 2.0390014304591748e-05,
      "loss": 0.9284,
      "step": 541820
    },
    {
      "epoch": 1.8989790661241313,
      "grad_norm": 2.5625,
      "learning_rate": 2.0389365275928046e-05,
      "loss": 0.7998,
      "step": 541830
    },
    {
      "epoch": 1.899014113631027,
      "grad_norm": 3.140625,
      "learning_rate": 2.0388716247264344e-05,
      "loss": 0.8197,
      "step": 541840
    },
    {
      "epoch": 1.8990491611379223,
      "grad_norm": 2.90625,
      "learning_rate": 2.0388067218600642e-05,
      "loss": 0.8507,
      "step": 541850
    },
    {
      "epoch": 1.899084208644818,
      "grad_norm": 2.671875,
      "learning_rate": 2.038741818993694e-05,
      "loss": 0.8136,
      "step": 541860
    },
    {
      "epoch": 1.8991192561517136,
      "grad_norm": 3.078125,
      "learning_rate": 2.0386769161273238e-05,
      "loss": 0.8035,
      "step": 541870
    },
    {
      "epoch": 1.8991543036586092,
      "grad_norm": 2.921875,
      "learning_rate": 2.0386120132609536e-05,
      "loss": 0.8044,
      "step": 541880
    },
    {
      "epoch": 1.899189351165505,
      "grad_norm": 2.59375,
      "learning_rate": 2.0385471103945834e-05,
      "loss": 0.7574,
      "step": 541890
    },
    {
      "epoch": 1.8992243986724004,
      "grad_norm": 2.796875,
      "learning_rate": 2.0384822075282135e-05,
      "loss": 0.7688,
      "step": 541900
    },
    {
      "epoch": 1.899259446179296,
      "grad_norm": 2.828125,
      "learning_rate": 2.0384173046618433e-05,
      "loss": 0.853,
      "step": 541910
    },
    {
      "epoch": 1.8992944936861917,
      "grad_norm": 3.203125,
      "learning_rate": 2.038352401795473e-05,
      "loss": 0.7164,
      "step": 541920
    },
    {
      "epoch": 1.8993295411930873,
      "grad_norm": 3.0,
      "learning_rate": 2.038287498929103e-05,
      "loss": 0.7923,
      "step": 541930
    },
    {
      "epoch": 1.8993645886999828,
      "grad_norm": 2.984375,
      "learning_rate": 2.0382225960627327e-05,
      "loss": 0.7933,
      "step": 541940
    },
    {
      "epoch": 1.8993996362068786,
      "grad_norm": 3.0625,
      "learning_rate": 2.0381576931963625e-05,
      "loss": 0.8963,
      "step": 541950
    },
    {
      "epoch": 1.8994346837137739,
      "grad_norm": 2.796875,
      "learning_rate": 2.0380927903299923e-05,
      "loss": 0.8272,
      "step": 541960
    },
    {
      "epoch": 1.8994697312206696,
      "grad_norm": 2.734375,
      "learning_rate": 2.038027887463622e-05,
      "loss": 0.8231,
      "step": 541970
    },
    {
      "epoch": 1.8995047787275652,
      "grad_norm": 3.046875,
      "learning_rate": 2.0379629845972516e-05,
      "loss": 0.8737,
      "step": 541980
    },
    {
      "epoch": 1.8995398262344607,
      "grad_norm": 2.921875,
      "learning_rate": 2.0378980817308814e-05,
      "loss": 0.8057,
      "step": 541990
    },
    {
      "epoch": 1.8995748737413565,
      "grad_norm": 3.125,
      "learning_rate": 2.0378331788645112e-05,
      "loss": 0.837,
      "step": 542000
    },
    {
      "epoch": 1.899609921248252,
      "grad_norm": 3.078125,
      "learning_rate": 2.0377682759981413e-05,
      "loss": 0.8646,
      "step": 542010
    },
    {
      "epoch": 1.8996449687551475,
      "grad_norm": 2.8125,
      "learning_rate": 2.037703373131771e-05,
      "loss": 0.8677,
      "step": 542020
    },
    {
      "epoch": 1.8996800162620433,
      "grad_norm": 3.046875,
      "learning_rate": 2.037638470265401e-05,
      "loss": 0.7566,
      "step": 542030
    },
    {
      "epoch": 1.8997150637689388,
      "grad_norm": 3.03125,
      "learning_rate": 2.0375735673990307e-05,
      "loss": 0.8937,
      "step": 542040
    },
    {
      "epoch": 1.8997501112758344,
      "grad_norm": 2.671875,
      "learning_rate": 2.0375086645326605e-05,
      "loss": 0.76,
      "step": 542050
    },
    {
      "epoch": 1.8997851587827301,
      "grad_norm": 2.953125,
      "learning_rate": 2.0374437616662903e-05,
      "loss": 0.8763,
      "step": 542060
    },
    {
      "epoch": 1.8998202062896254,
      "grad_norm": 2.671875,
      "learning_rate": 2.03737885879992e-05,
      "loss": 0.8412,
      "step": 542070
    },
    {
      "epoch": 1.8998552537965212,
      "grad_norm": 2.640625,
      "learning_rate": 2.03731395593355e-05,
      "loss": 0.7698,
      "step": 542080
    },
    {
      "epoch": 1.8998903013034167,
      "grad_norm": 2.578125,
      "learning_rate": 2.0372490530671797e-05,
      "loss": 0.893,
      "step": 542090
    },
    {
      "epoch": 1.8999253488103123,
      "grad_norm": 2.5,
      "learning_rate": 2.0371841502008095e-05,
      "loss": 0.8279,
      "step": 542100
    },
    {
      "epoch": 1.899960396317208,
      "grad_norm": 3.0,
      "learning_rate": 2.0371192473344393e-05,
      "loss": 0.8064,
      "step": 542110
    },
    {
      "epoch": 1.8999954438241036,
      "grad_norm": 2.96875,
      "learning_rate": 2.037054344468069e-05,
      "loss": 0.7836,
      "step": 542120
    },
    {
      "epoch": 1.900030491330999,
      "grad_norm": 3.046875,
      "learning_rate": 2.036989441601699e-05,
      "loss": 0.8172,
      "step": 542130
    },
    {
      "epoch": 1.9000655388378949,
      "grad_norm": 3.5625,
      "learning_rate": 2.0369245387353287e-05,
      "loss": 0.8907,
      "step": 542140
    },
    {
      "epoch": 1.9001005863447904,
      "grad_norm": 2.515625,
      "learning_rate": 2.036859635868959e-05,
      "loss": 0.8313,
      "step": 542150
    },
    {
      "epoch": 1.900135633851686,
      "grad_norm": 3.34375,
      "learning_rate": 2.0367947330025887e-05,
      "loss": 0.8014,
      "step": 542160
    },
    {
      "epoch": 1.9001706813585817,
      "grad_norm": 3.125,
      "learning_rate": 2.036729830136218e-05,
      "loss": 0.8063,
      "step": 542170
    },
    {
      "epoch": 1.900205728865477,
      "grad_norm": 2.75,
      "learning_rate": 2.036664927269848e-05,
      "loss": 0.901,
      "step": 542180
    },
    {
      "epoch": 1.9002407763723728,
      "grad_norm": 3.296875,
      "learning_rate": 2.0366000244034777e-05,
      "loss": 0.8331,
      "step": 542190
    },
    {
      "epoch": 1.9002758238792683,
      "grad_norm": 2.984375,
      "learning_rate": 2.0365351215371075e-05,
      "loss": 0.8607,
      "step": 542200
    },
    {
      "epoch": 1.9003108713861638,
      "grad_norm": 3.46875,
      "learning_rate": 2.0364702186707373e-05,
      "loss": 0.8851,
      "step": 542210
    },
    {
      "epoch": 1.9003459188930596,
      "grad_norm": 2.9375,
      "learning_rate": 2.036405315804367e-05,
      "loss": 0.8694,
      "step": 542220
    },
    {
      "epoch": 1.9003809663999551,
      "grad_norm": 2.984375,
      "learning_rate": 2.036340412937997e-05,
      "loss": 0.7106,
      "step": 542230
    },
    {
      "epoch": 1.9004160139068507,
      "grad_norm": 2.890625,
      "learning_rate": 2.0362755100716267e-05,
      "loss": 0.8826,
      "step": 542240
    },
    {
      "epoch": 1.9004510614137464,
      "grad_norm": 3.328125,
      "learning_rate": 2.0362106072052565e-05,
      "loss": 0.8554,
      "step": 542250
    },
    {
      "epoch": 1.900486108920642,
      "grad_norm": 2.75,
      "learning_rate": 2.0361457043388867e-05,
      "loss": 0.8074,
      "step": 542260
    },
    {
      "epoch": 1.9005211564275375,
      "grad_norm": 2.828125,
      "learning_rate": 2.0360808014725165e-05,
      "loss": 0.8785,
      "step": 542270
    },
    {
      "epoch": 1.9005562039344333,
      "grad_norm": 3.109375,
      "learning_rate": 2.0360158986061463e-05,
      "loss": 0.8109,
      "step": 542280
    },
    {
      "epoch": 1.9005912514413286,
      "grad_norm": 3.109375,
      "learning_rate": 2.035950995739776e-05,
      "loss": 0.8717,
      "step": 542290
    },
    {
      "epoch": 1.9006262989482243,
      "grad_norm": 2.984375,
      "learning_rate": 2.035886092873406e-05,
      "loss": 0.8213,
      "step": 542300
    },
    {
      "epoch": 1.9006613464551199,
      "grad_norm": 2.859375,
      "learning_rate": 2.0358211900070357e-05,
      "loss": 0.8797,
      "step": 542310
    },
    {
      "epoch": 1.9006963939620154,
      "grad_norm": 2.921875,
      "learning_rate": 2.0357562871406655e-05,
      "loss": 0.7919,
      "step": 542320
    },
    {
      "epoch": 1.9007314414689112,
      "grad_norm": 3.375,
      "learning_rate": 2.0356913842742953e-05,
      "loss": 0.7885,
      "step": 542330
    },
    {
      "epoch": 1.9007664889758067,
      "grad_norm": 2.40625,
      "learning_rate": 2.035626481407925e-05,
      "loss": 0.7253,
      "step": 542340
    },
    {
      "epoch": 1.9008015364827022,
      "grad_norm": 3.265625,
      "learning_rate": 2.035561578541555e-05,
      "loss": 0.8666,
      "step": 542350
    },
    {
      "epoch": 1.900836583989598,
      "grad_norm": 3.0625,
      "learning_rate": 2.0354966756751843e-05,
      "loss": 0.8202,
      "step": 542360
    },
    {
      "epoch": 1.9008716314964935,
      "grad_norm": 2.796875,
      "learning_rate": 2.035431772808814e-05,
      "loss": 0.808,
      "step": 542370
    },
    {
      "epoch": 1.900906679003389,
      "grad_norm": 3.125,
      "learning_rate": 2.0353668699424443e-05,
      "loss": 0.817,
      "step": 542380
    },
    {
      "epoch": 1.9009417265102848,
      "grad_norm": 2.6875,
      "learning_rate": 2.035301967076074e-05,
      "loss": 0.837,
      "step": 542390
    },
    {
      "epoch": 1.9009767740171801,
      "grad_norm": 2.625,
      "learning_rate": 2.035237064209704e-05,
      "loss": 0.7886,
      "step": 542400
    },
    {
      "epoch": 1.9010118215240759,
      "grad_norm": 2.75,
      "learning_rate": 2.0351721613433337e-05,
      "loss": 0.7865,
      "step": 542410
    },
    {
      "epoch": 1.9010468690309716,
      "grad_norm": 2.890625,
      "learning_rate": 2.0351072584769635e-05,
      "loss": 0.9033,
      "step": 542420
    },
    {
      "epoch": 1.901081916537867,
      "grad_norm": 2.953125,
      "learning_rate": 2.0350423556105933e-05,
      "loss": 0.8584,
      "step": 542430
    },
    {
      "epoch": 1.9011169640447627,
      "grad_norm": 2.953125,
      "learning_rate": 2.034977452744223e-05,
      "loss": 0.825,
      "step": 542440
    },
    {
      "epoch": 1.9011520115516582,
      "grad_norm": 2.765625,
      "learning_rate": 2.034912549877853e-05,
      "loss": 0.7501,
      "step": 542450
    },
    {
      "epoch": 1.9011870590585538,
      "grad_norm": 2.65625,
      "learning_rate": 2.0348476470114827e-05,
      "loss": 0.8768,
      "step": 542460
    },
    {
      "epoch": 1.9012221065654495,
      "grad_norm": 3.09375,
      "learning_rate": 2.0347827441451125e-05,
      "loss": 0.8599,
      "step": 542470
    },
    {
      "epoch": 1.901257154072345,
      "grad_norm": 2.796875,
      "learning_rate": 2.0347178412787423e-05,
      "loss": 0.7677,
      "step": 542480
    },
    {
      "epoch": 1.9012922015792406,
      "grad_norm": 2.96875,
      "learning_rate": 2.034652938412372e-05,
      "loss": 0.8531,
      "step": 542490
    },
    {
      "epoch": 1.9013272490861364,
      "grad_norm": 2.46875,
      "learning_rate": 2.034588035546002e-05,
      "loss": 0.7983,
      "step": 542500
    },
    {
      "epoch": 1.901362296593032,
      "grad_norm": 2.8125,
      "learning_rate": 2.0345231326796317e-05,
      "loss": 0.8597,
      "step": 542510
    },
    {
      "epoch": 1.9013973440999274,
      "grad_norm": 2.734375,
      "learning_rate": 2.0344582298132618e-05,
      "loss": 0.8039,
      "step": 542520
    },
    {
      "epoch": 1.9014323916068232,
      "grad_norm": 3.09375,
      "learning_rate": 2.0343933269468916e-05,
      "loss": 0.8061,
      "step": 542530
    },
    {
      "epoch": 1.9014674391137185,
      "grad_norm": 2.625,
      "learning_rate": 2.0343284240805214e-05,
      "loss": 0.8019,
      "step": 542540
    },
    {
      "epoch": 1.9015024866206143,
      "grad_norm": 2.96875,
      "learning_rate": 2.034263521214151e-05,
      "loss": 0.8788,
      "step": 542550
    },
    {
      "epoch": 1.9015375341275098,
      "grad_norm": 3.046875,
      "learning_rate": 2.0341986183477807e-05,
      "loss": 0.7919,
      "step": 542560
    },
    {
      "epoch": 1.9015725816344053,
      "grad_norm": 3.0,
      "learning_rate": 2.0341337154814105e-05,
      "loss": 0.8016,
      "step": 542570
    },
    {
      "epoch": 1.901607629141301,
      "grad_norm": 2.546875,
      "learning_rate": 2.0340688126150403e-05,
      "loss": 0.8379,
      "step": 542580
    },
    {
      "epoch": 1.9016426766481966,
      "grad_norm": 2.84375,
      "learning_rate": 2.03400390974867e-05,
      "loss": 0.7915,
      "step": 542590
    },
    {
      "epoch": 1.9016777241550922,
      "grad_norm": 2.890625,
      "learning_rate": 2.0339390068823e-05,
      "loss": 0.8745,
      "step": 542600
    },
    {
      "epoch": 1.901712771661988,
      "grad_norm": 2.96875,
      "learning_rate": 2.0338741040159297e-05,
      "loss": 0.8276,
      "step": 542610
    },
    {
      "epoch": 1.9017478191688835,
      "grad_norm": 2.828125,
      "learning_rate": 2.0338092011495595e-05,
      "loss": 0.7815,
      "step": 542620
    },
    {
      "epoch": 1.901782866675779,
      "grad_norm": 2.90625,
      "learning_rate": 2.0337442982831896e-05,
      "loss": 0.8432,
      "step": 542630
    },
    {
      "epoch": 1.9018179141826748,
      "grad_norm": 2.8125,
      "learning_rate": 2.0336793954168194e-05,
      "loss": 0.8715,
      "step": 542640
    },
    {
      "epoch": 1.90185296168957,
      "grad_norm": 2.609375,
      "learning_rate": 2.0336144925504492e-05,
      "loss": 0.729,
      "step": 542650
    },
    {
      "epoch": 1.9018880091964658,
      "grad_norm": 2.546875,
      "learning_rate": 2.033549589684079e-05,
      "loss": 0.8273,
      "step": 542660
    },
    {
      "epoch": 1.9019230567033614,
      "grad_norm": 2.8125,
      "learning_rate": 2.0334846868177088e-05,
      "loss": 0.8695,
      "step": 542670
    },
    {
      "epoch": 1.901958104210257,
      "grad_norm": 2.4375,
      "learning_rate": 2.0334197839513386e-05,
      "loss": 0.7607,
      "step": 542680
    },
    {
      "epoch": 1.9019931517171527,
      "grad_norm": 2.84375,
      "learning_rate": 2.0333548810849684e-05,
      "loss": 0.8059,
      "step": 542690
    },
    {
      "epoch": 1.9020281992240482,
      "grad_norm": 3.0625,
      "learning_rate": 2.0332899782185982e-05,
      "loss": 0.8178,
      "step": 542700
    },
    {
      "epoch": 1.9020632467309437,
      "grad_norm": 2.890625,
      "learning_rate": 2.033225075352228e-05,
      "loss": 0.7627,
      "step": 542710
    },
    {
      "epoch": 1.9020982942378395,
      "grad_norm": 2.421875,
      "learning_rate": 2.0331601724858578e-05,
      "loss": 0.8541,
      "step": 542720
    },
    {
      "epoch": 1.902133341744735,
      "grad_norm": 2.75,
      "learning_rate": 2.0330952696194876e-05,
      "loss": 0.8894,
      "step": 542730
    },
    {
      "epoch": 1.9021683892516306,
      "grad_norm": 2.953125,
      "learning_rate": 2.0330303667531174e-05,
      "loss": 0.8442,
      "step": 542740
    },
    {
      "epoch": 1.9022034367585263,
      "grad_norm": 2.984375,
      "learning_rate": 2.0329654638867472e-05,
      "loss": 0.8607,
      "step": 542750
    },
    {
      "epoch": 1.9022384842654216,
      "grad_norm": 2.546875,
      "learning_rate": 2.032900561020377e-05,
      "loss": 0.8408,
      "step": 542760
    },
    {
      "epoch": 1.9022735317723174,
      "grad_norm": 2.578125,
      "learning_rate": 2.0328356581540068e-05,
      "loss": 0.7862,
      "step": 542770
    },
    {
      "epoch": 1.902308579279213,
      "grad_norm": 2.515625,
      "learning_rate": 2.0327707552876366e-05,
      "loss": 0.8104,
      "step": 542780
    },
    {
      "epoch": 1.9023436267861085,
      "grad_norm": 2.734375,
      "learning_rate": 2.0327058524212664e-05,
      "loss": 0.7653,
      "step": 542790
    },
    {
      "epoch": 1.9023786742930042,
      "grad_norm": 2.796875,
      "learning_rate": 2.0326409495548962e-05,
      "loss": 0.8428,
      "step": 542800
    },
    {
      "epoch": 1.9024137217998998,
      "grad_norm": 2.90625,
      "learning_rate": 2.032576046688526e-05,
      "loss": 0.8197,
      "step": 542810
    },
    {
      "epoch": 1.9024487693067953,
      "grad_norm": 2.859375,
      "learning_rate": 2.0325111438221558e-05,
      "loss": 0.789,
      "step": 542820
    },
    {
      "epoch": 1.902483816813691,
      "grad_norm": 3.109375,
      "learning_rate": 2.0324462409557856e-05,
      "loss": 0.8242,
      "step": 542830
    },
    {
      "epoch": 1.9025188643205866,
      "grad_norm": 3.328125,
      "learning_rate": 2.0323813380894154e-05,
      "loss": 0.7523,
      "step": 542840
    },
    {
      "epoch": 1.9025539118274821,
      "grad_norm": 3.03125,
      "learning_rate": 2.0323164352230452e-05,
      "loss": 0.8967,
      "step": 542850
    },
    {
      "epoch": 1.9025889593343779,
      "grad_norm": 2.640625,
      "learning_rate": 2.032251532356675e-05,
      "loss": 0.7834,
      "step": 542860
    },
    {
      "epoch": 1.9026240068412732,
      "grad_norm": 2.71875,
      "learning_rate": 2.0321866294903048e-05,
      "loss": 0.7791,
      "step": 542870
    },
    {
      "epoch": 1.902659054348169,
      "grad_norm": 2.859375,
      "learning_rate": 2.032121726623935e-05,
      "loss": 0.8507,
      "step": 542880
    },
    {
      "epoch": 1.9026941018550645,
      "grad_norm": 2.671875,
      "learning_rate": 2.0320568237575648e-05,
      "loss": 0.8126,
      "step": 542890
    },
    {
      "epoch": 1.90272914936196,
      "grad_norm": 2.796875,
      "learning_rate": 2.0319919208911946e-05,
      "loss": 0.8874,
      "step": 542900
    },
    {
      "epoch": 1.9027641968688558,
      "grad_norm": 3.0625,
      "learning_rate": 2.0319270180248244e-05,
      "loss": 0.8808,
      "step": 542910
    },
    {
      "epoch": 1.9027992443757513,
      "grad_norm": 3.046875,
      "learning_rate": 2.031862115158454e-05,
      "loss": 0.7913,
      "step": 542920
    },
    {
      "epoch": 1.9028342918826469,
      "grad_norm": 3.359375,
      "learning_rate": 2.0317972122920836e-05,
      "loss": 0.811,
      "step": 542930
    },
    {
      "epoch": 1.9028693393895426,
      "grad_norm": 3.25,
      "learning_rate": 2.0317323094257134e-05,
      "loss": 0.8578,
      "step": 542940
    },
    {
      "epoch": 1.9029043868964381,
      "grad_norm": 2.96875,
      "learning_rate": 2.0316674065593432e-05,
      "loss": 0.7371,
      "step": 542950
    },
    {
      "epoch": 1.9029394344033337,
      "grad_norm": 2.4375,
      "learning_rate": 2.031602503692973e-05,
      "loss": 0.781,
      "step": 542960
    },
    {
      "epoch": 1.9029744819102294,
      "grad_norm": 2.796875,
      "learning_rate": 2.0315376008266028e-05,
      "loss": 0.8419,
      "step": 542970
    },
    {
      "epoch": 1.9030095294171248,
      "grad_norm": 4.28125,
      "learning_rate": 2.0314726979602326e-05,
      "loss": 0.8939,
      "step": 542980
    },
    {
      "epoch": 1.9030445769240205,
      "grad_norm": 2.109375,
      "learning_rate": 2.0314077950938624e-05,
      "loss": 0.7631,
      "step": 542990
    },
    {
      "epoch": 1.9030796244309163,
      "grad_norm": 2.96875,
      "learning_rate": 2.0313428922274926e-05,
      "loss": 0.8464,
      "step": 543000
    },
    {
      "epoch": 1.9031146719378116,
      "grad_norm": 3.140625,
      "learning_rate": 2.0312779893611224e-05,
      "loss": 0.9017,
      "step": 543010
    },
    {
      "epoch": 1.9031497194447073,
      "grad_norm": 3.25,
      "learning_rate": 2.031213086494752e-05,
      "loss": 0.9338,
      "step": 543020
    },
    {
      "epoch": 1.9031847669516029,
      "grad_norm": 3.265625,
      "learning_rate": 2.031148183628382e-05,
      "loss": 0.8339,
      "step": 543030
    },
    {
      "epoch": 1.9032198144584984,
      "grad_norm": 3.015625,
      "learning_rate": 2.0310832807620118e-05,
      "loss": 0.7987,
      "step": 543040
    },
    {
      "epoch": 1.9032548619653942,
      "grad_norm": 2.609375,
      "learning_rate": 2.0310183778956416e-05,
      "loss": 0.8151,
      "step": 543050
    },
    {
      "epoch": 1.9032899094722897,
      "grad_norm": 2.984375,
      "learning_rate": 2.0309534750292714e-05,
      "loss": 0.818,
      "step": 543060
    },
    {
      "epoch": 1.9033249569791852,
      "grad_norm": 3.203125,
      "learning_rate": 2.030888572162901e-05,
      "loss": 0.874,
      "step": 543070
    },
    {
      "epoch": 1.903360004486081,
      "grad_norm": 2.53125,
      "learning_rate": 2.030823669296531e-05,
      "loss": 0.6726,
      "step": 543080
    },
    {
      "epoch": 1.9033950519929763,
      "grad_norm": 2.546875,
      "learning_rate": 2.0307587664301608e-05,
      "loss": 0.7669,
      "step": 543090
    },
    {
      "epoch": 1.903430099499872,
      "grad_norm": 2.703125,
      "learning_rate": 2.0306938635637906e-05,
      "loss": 0.7886,
      "step": 543100
    },
    {
      "epoch": 1.9034651470067678,
      "grad_norm": 3.03125,
      "learning_rate": 2.0306289606974204e-05,
      "loss": 0.7975,
      "step": 543110
    },
    {
      "epoch": 1.9035001945136631,
      "grad_norm": 2.734375,
      "learning_rate": 2.03056405783105e-05,
      "loss": 0.7913,
      "step": 543120
    },
    {
      "epoch": 1.903535242020559,
      "grad_norm": 3.046875,
      "learning_rate": 2.03049915496468e-05,
      "loss": 0.7761,
      "step": 543130
    },
    {
      "epoch": 1.9035702895274544,
      "grad_norm": 3.34375,
      "learning_rate": 2.0304342520983098e-05,
      "loss": 0.8372,
      "step": 543140
    },
    {
      "epoch": 1.90360533703435,
      "grad_norm": 2.671875,
      "learning_rate": 2.0303693492319396e-05,
      "loss": 0.751,
      "step": 543150
    },
    {
      "epoch": 1.9036403845412457,
      "grad_norm": 2.8125,
      "learning_rate": 2.0303044463655694e-05,
      "loss": 0.8444,
      "step": 543160
    },
    {
      "epoch": 1.9036754320481413,
      "grad_norm": 2.609375,
      "learning_rate": 2.030239543499199e-05,
      "loss": 0.7763,
      "step": 543170
    },
    {
      "epoch": 1.9037104795550368,
      "grad_norm": 2.71875,
      "learning_rate": 2.030174640632829e-05,
      "loss": 0.8661,
      "step": 543180
    },
    {
      "epoch": 1.9037455270619326,
      "grad_norm": 3.25,
      "learning_rate": 2.0301097377664588e-05,
      "loss": 0.8394,
      "step": 543190
    },
    {
      "epoch": 1.903780574568828,
      "grad_norm": 2.265625,
      "learning_rate": 2.0300448349000886e-05,
      "loss": 0.7543,
      "step": 543200
    },
    {
      "epoch": 1.9038156220757236,
      "grad_norm": 2.84375,
      "learning_rate": 2.0299799320337184e-05,
      "loss": 0.7858,
      "step": 543210
    },
    {
      "epoch": 1.9038506695826194,
      "grad_norm": 2.96875,
      "learning_rate": 2.029915029167348e-05,
      "loss": 0.8345,
      "step": 543220
    },
    {
      "epoch": 1.9038857170895147,
      "grad_norm": 2.765625,
      "learning_rate": 2.029850126300978e-05,
      "loss": 0.7306,
      "step": 543230
    },
    {
      "epoch": 1.9039207645964105,
      "grad_norm": 2.71875,
      "learning_rate": 2.0297852234346078e-05,
      "loss": 0.795,
      "step": 543240
    },
    {
      "epoch": 1.903955812103306,
      "grad_norm": 3.015625,
      "learning_rate": 2.029720320568238e-05,
      "loss": 0.8066,
      "step": 543250
    },
    {
      "epoch": 1.9039908596102015,
      "grad_norm": 2.96875,
      "learning_rate": 2.0296554177018677e-05,
      "loss": 0.8548,
      "step": 543260
    },
    {
      "epoch": 1.9040259071170973,
      "grad_norm": 2.796875,
      "learning_rate": 2.0295905148354975e-05,
      "loss": 0.8334,
      "step": 543270
    },
    {
      "epoch": 1.9040609546239928,
      "grad_norm": 2.984375,
      "learning_rate": 2.0295256119691273e-05,
      "loss": 0.8392,
      "step": 543280
    },
    {
      "epoch": 1.9040960021308884,
      "grad_norm": 2.9375,
      "learning_rate": 2.029460709102757e-05,
      "loss": 0.8115,
      "step": 543290
    },
    {
      "epoch": 1.9041310496377841,
      "grad_norm": 2.625,
      "learning_rate": 2.0293958062363866e-05,
      "loss": 0.7802,
      "step": 543300
    },
    {
      "epoch": 1.9041660971446797,
      "grad_norm": 3.09375,
      "learning_rate": 2.0293309033700164e-05,
      "loss": 0.853,
      "step": 543310
    },
    {
      "epoch": 1.9042011446515752,
      "grad_norm": 2.59375,
      "learning_rate": 2.029266000503646e-05,
      "loss": 0.8375,
      "step": 543320
    },
    {
      "epoch": 1.904236192158471,
      "grad_norm": 2.84375,
      "learning_rate": 2.029201097637276e-05,
      "loss": 0.7622,
      "step": 543330
    },
    {
      "epoch": 1.9042712396653663,
      "grad_norm": 2.65625,
      "learning_rate": 2.0291361947709058e-05,
      "loss": 0.8979,
      "step": 543340
    },
    {
      "epoch": 1.904306287172262,
      "grad_norm": 2.421875,
      "learning_rate": 2.0290712919045356e-05,
      "loss": 0.8247,
      "step": 543350
    },
    {
      "epoch": 1.9043413346791576,
      "grad_norm": 2.890625,
      "learning_rate": 2.0290063890381657e-05,
      "loss": 0.8104,
      "step": 543360
    },
    {
      "epoch": 1.904376382186053,
      "grad_norm": 2.703125,
      "learning_rate": 2.0289414861717955e-05,
      "loss": 0.8958,
      "step": 543370
    },
    {
      "epoch": 1.9044114296929489,
      "grad_norm": 3.265625,
      "learning_rate": 2.0288765833054253e-05,
      "loss": 0.8667,
      "step": 543380
    },
    {
      "epoch": 1.9044464771998444,
      "grad_norm": 2.671875,
      "learning_rate": 2.028811680439055e-05,
      "loss": 0.8382,
      "step": 543390
    },
    {
      "epoch": 1.90448152470674,
      "grad_norm": 2.9375,
      "learning_rate": 2.028746777572685e-05,
      "loss": 0.8199,
      "step": 543400
    },
    {
      "epoch": 1.9045165722136357,
      "grad_norm": 3.171875,
      "learning_rate": 2.0286818747063147e-05,
      "loss": 0.8326,
      "step": 543410
    },
    {
      "epoch": 1.9045516197205312,
      "grad_norm": 2.578125,
      "learning_rate": 2.0286169718399445e-05,
      "loss": 0.8111,
      "step": 543420
    },
    {
      "epoch": 1.9045866672274268,
      "grad_norm": 2.796875,
      "learning_rate": 2.0285520689735743e-05,
      "loss": 0.8203,
      "step": 543430
    },
    {
      "epoch": 1.9046217147343225,
      "grad_norm": 2.453125,
      "learning_rate": 2.028487166107204e-05,
      "loss": 0.8317,
      "step": 543440
    },
    {
      "epoch": 1.9046567622412178,
      "grad_norm": 2.34375,
      "learning_rate": 2.028422263240834e-05,
      "loss": 0.8005,
      "step": 543450
    },
    {
      "epoch": 1.9046918097481136,
      "grad_norm": 3.203125,
      "learning_rate": 2.0283573603744637e-05,
      "loss": 0.8779,
      "step": 543460
    },
    {
      "epoch": 1.9047268572550091,
      "grad_norm": 3.203125,
      "learning_rate": 2.0282924575080935e-05,
      "loss": 0.8473,
      "step": 543470
    },
    {
      "epoch": 1.9047619047619047,
      "grad_norm": 2.640625,
      "learning_rate": 2.0282275546417233e-05,
      "loss": 0.7752,
      "step": 543480
    },
    {
      "epoch": 1.9047969522688004,
      "grad_norm": 2.953125,
      "learning_rate": 2.028162651775353e-05,
      "loss": 0.7312,
      "step": 543490
    },
    {
      "epoch": 1.904831999775696,
      "grad_norm": 2.90625,
      "learning_rate": 2.028097748908983e-05,
      "loss": 0.8542,
      "step": 543500
    },
    {
      "epoch": 1.9048670472825915,
      "grad_norm": 3.4375,
      "learning_rate": 2.0280328460426127e-05,
      "loss": 0.8661,
      "step": 543510
    },
    {
      "epoch": 1.9049020947894872,
      "grad_norm": 3.40625,
      "learning_rate": 2.0279679431762425e-05,
      "loss": 0.7753,
      "step": 543520
    },
    {
      "epoch": 1.9049371422963828,
      "grad_norm": 3.640625,
      "learning_rate": 2.0279030403098723e-05,
      "loss": 0.8201,
      "step": 543530
    },
    {
      "epoch": 1.9049721898032783,
      "grad_norm": 3.375,
      "learning_rate": 2.027838137443502e-05,
      "loss": 0.7574,
      "step": 543540
    },
    {
      "epoch": 1.905007237310174,
      "grad_norm": 3.109375,
      "learning_rate": 2.027773234577132e-05,
      "loss": 0.7784,
      "step": 543550
    },
    {
      "epoch": 1.9050422848170694,
      "grad_norm": 3.0625,
      "learning_rate": 2.0277083317107617e-05,
      "loss": 0.7573,
      "step": 543560
    },
    {
      "epoch": 1.9050773323239651,
      "grad_norm": 2.734375,
      "learning_rate": 2.0276434288443915e-05,
      "loss": 0.7603,
      "step": 543570
    },
    {
      "epoch": 1.9051123798308607,
      "grad_norm": 2.921875,
      "learning_rate": 2.0275785259780213e-05,
      "loss": 0.7978,
      "step": 543580
    },
    {
      "epoch": 1.9051474273377562,
      "grad_norm": 2.890625,
      "learning_rate": 2.027513623111651e-05,
      "loss": 0.7414,
      "step": 543590
    },
    {
      "epoch": 1.905182474844652,
      "grad_norm": 3.015625,
      "learning_rate": 2.027448720245281e-05,
      "loss": 0.8043,
      "step": 543600
    },
    {
      "epoch": 1.9052175223515475,
      "grad_norm": 2.921875,
      "learning_rate": 2.0273838173789107e-05,
      "loss": 0.8388,
      "step": 543610
    },
    {
      "epoch": 1.905252569858443,
      "grad_norm": 2.84375,
      "learning_rate": 2.027318914512541e-05,
      "loss": 0.8281,
      "step": 543620
    },
    {
      "epoch": 1.9052876173653388,
      "grad_norm": 2.875,
      "learning_rate": 2.0272540116461706e-05,
      "loss": 0.8034,
      "step": 543630
    },
    {
      "epoch": 1.9053226648722343,
      "grad_norm": 2.625,
      "learning_rate": 2.0271891087798004e-05,
      "loss": 0.8177,
      "step": 543640
    },
    {
      "epoch": 1.9053577123791299,
      "grad_norm": 3.234375,
      "learning_rate": 2.0271242059134302e-05,
      "loss": 0.8564,
      "step": 543650
    },
    {
      "epoch": 1.9053927598860256,
      "grad_norm": 2.484375,
      "learning_rate": 2.02705930304706e-05,
      "loss": 0.7822,
      "step": 543660
    },
    {
      "epoch": 1.905427807392921,
      "grad_norm": 2.546875,
      "learning_rate": 2.02699440018069e-05,
      "loss": 0.864,
      "step": 543670
    },
    {
      "epoch": 1.9054628548998167,
      "grad_norm": 2.3125,
      "learning_rate": 2.0269294973143193e-05,
      "loss": 0.8635,
      "step": 543680
    },
    {
      "epoch": 1.9054979024067125,
      "grad_norm": 2.5625,
      "learning_rate": 2.026864594447949e-05,
      "loss": 0.8142,
      "step": 543690
    },
    {
      "epoch": 1.9055329499136078,
      "grad_norm": 3.171875,
      "learning_rate": 2.026799691581579e-05,
      "loss": 0.8848,
      "step": 543700
    },
    {
      "epoch": 1.9055679974205035,
      "grad_norm": 3.0,
      "learning_rate": 2.0267347887152087e-05,
      "loss": 0.8276,
      "step": 543710
    },
    {
      "epoch": 1.905603044927399,
      "grad_norm": 2.546875,
      "learning_rate": 2.0266698858488385e-05,
      "loss": 0.8683,
      "step": 543720
    },
    {
      "epoch": 1.9056380924342946,
      "grad_norm": 3.34375,
      "learning_rate": 2.0266049829824686e-05,
      "loss": 0.8462,
      "step": 543730
    },
    {
      "epoch": 1.9056731399411904,
      "grad_norm": 2.796875,
      "learning_rate": 2.0265400801160984e-05,
      "loss": 0.7843,
      "step": 543740
    },
    {
      "epoch": 1.905708187448086,
      "grad_norm": 3.03125,
      "learning_rate": 2.0264751772497282e-05,
      "loss": 0.8734,
      "step": 543750
    },
    {
      "epoch": 1.9057432349549814,
      "grad_norm": 3.0,
      "learning_rate": 2.026410274383358e-05,
      "loss": 0.7251,
      "step": 543760
    },
    {
      "epoch": 1.9057782824618772,
      "grad_norm": 2.859375,
      "learning_rate": 2.026345371516988e-05,
      "loss": 0.8688,
      "step": 543770
    },
    {
      "epoch": 1.9058133299687725,
      "grad_norm": 2.71875,
      "learning_rate": 2.0262804686506176e-05,
      "loss": 0.8664,
      "step": 543780
    },
    {
      "epoch": 1.9058483774756683,
      "grad_norm": 3.359375,
      "learning_rate": 2.0262155657842474e-05,
      "loss": 0.9012,
      "step": 543790
    },
    {
      "epoch": 1.905883424982564,
      "grad_norm": 3.046875,
      "learning_rate": 2.0261506629178772e-05,
      "loss": 0.8506,
      "step": 543800
    },
    {
      "epoch": 1.9059184724894593,
      "grad_norm": 3.125,
      "learning_rate": 2.026085760051507e-05,
      "loss": 0.8309,
      "step": 543810
    },
    {
      "epoch": 1.905953519996355,
      "grad_norm": 2.671875,
      "learning_rate": 2.026020857185137e-05,
      "loss": 0.8936,
      "step": 543820
    },
    {
      "epoch": 1.9059885675032506,
      "grad_norm": 2.796875,
      "learning_rate": 2.0259559543187666e-05,
      "loss": 0.7717,
      "step": 543830
    },
    {
      "epoch": 1.9060236150101462,
      "grad_norm": 2.75,
      "learning_rate": 2.0258910514523964e-05,
      "loss": 0.8617,
      "step": 543840
    },
    {
      "epoch": 1.906058662517042,
      "grad_norm": 2.734375,
      "learning_rate": 2.0258261485860262e-05,
      "loss": 0.7808,
      "step": 543850
    },
    {
      "epoch": 1.9060937100239375,
      "grad_norm": 2.8125,
      "learning_rate": 2.025761245719656e-05,
      "loss": 0.8678,
      "step": 543860
    },
    {
      "epoch": 1.906128757530833,
      "grad_norm": 2.875,
      "learning_rate": 2.025696342853286e-05,
      "loss": 0.8252,
      "step": 543870
    },
    {
      "epoch": 1.9061638050377288,
      "grad_norm": 2.703125,
      "learning_rate": 2.0256314399869156e-05,
      "loss": 0.7999,
      "step": 543880
    },
    {
      "epoch": 1.9061988525446243,
      "grad_norm": 3.0625,
      "learning_rate": 2.0255665371205454e-05,
      "loss": 0.8163,
      "step": 543890
    },
    {
      "epoch": 1.9062339000515198,
      "grad_norm": 2.484375,
      "learning_rate": 2.0255016342541752e-05,
      "loss": 0.8241,
      "step": 543900
    },
    {
      "epoch": 1.9062689475584156,
      "grad_norm": 3.09375,
      "learning_rate": 2.025436731387805e-05,
      "loss": 0.8265,
      "step": 543910
    },
    {
      "epoch": 1.906303995065311,
      "grad_norm": 2.515625,
      "learning_rate": 2.025371828521435e-05,
      "loss": 0.8591,
      "step": 543920
    },
    {
      "epoch": 1.9063390425722067,
      "grad_norm": 2.59375,
      "learning_rate": 2.0253069256550646e-05,
      "loss": 0.7853,
      "step": 543930
    },
    {
      "epoch": 1.9063740900791022,
      "grad_norm": 3.09375,
      "learning_rate": 2.0252420227886944e-05,
      "loss": 0.832,
      "step": 543940
    },
    {
      "epoch": 1.9064091375859977,
      "grad_norm": 2.59375,
      "learning_rate": 2.0251771199223242e-05,
      "loss": 0.8152,
      "step": 543950
    },
    {
      "epoch": 1.9064441850928935,
      "grad_norm": 3.84375,
      "learning_rate": 2.025112217055954e-05,
      "loss": 0.8496,
      "step": 543960
    },
    {
      "epoch": 1.906479232599789,
      "grad_norm": 3.28125,
      "learning_rate": 2.025047314189584e-05,
      "loss": 0.8052,
      "step": 543970
    },
    {
      "epoch": 1.9065142801066846,
      "grad_norm": 3.09375,
      "learning_rate": 2.024982411323214e-05,
      "loss": 0.7884,
      "step": 543980
    },
    {
      "epoch": 1.9065493276135803,
      "grad_norm": 2.515625,
      "learning_rate": 2.0249175084568438e-05,
      "loss": 0.8398,
      "step": 543990
    },
    {
      "epoch": 1.9065843751204758,
      "grad_norm": 3.234375,
      "learning_rate": 2.0248526055904736e-05,
      "loss": 0.8283,
      "step": 544000
    },
    {
      "epoch": 1.9066194226273714,
      "grad_norm": 2.640625,
      "learning_rate": 2.0247877027241034e-05,
      "loss": 0.7685,
      "step": 544010
    },
    {
      "epoch": 1.9066544701342671,
      "grad_norm": 2.5625,
      "learning_rate": 2.0247227998577332e-05,
      "loss": 0.8762,
      "step": 544020
    },
    {
      "epoch": 1.9066895176411625,
      "grad_norm": 3.21875,
      "learning_rate": 2.024657896991363e-05,
      "loss": 0.8515,
      "step": 544030
    },
    {
      "epoch": 1.9067245651480582,
      "grad_norm": 3.421875,
      "learning_rate": 2.0245929941249928e-05,
      "loss": 0.8609,
      "step": 544040
    },
    {
      "epoch": 1.9067596126549538,
      "grad_norm": 3.25,
      "learning_rate": 2.0245280912586222e-05,
      "loss": 0.8098,
      "step": 544050
    },
    {
      "epoch": 1.9067946601618493,
      "grad_norm": 2.734375,
      "learning_rate": 2.024463188392252e-05,
      "loss": 0.8389,
      "step": 544060
    },
    {
      "epoch": 1.906829707668745,
      "grad_norm": 2.96875,
      "learning_rate": 2.024398285525882e-05,
      "loss": 0.7891,
      "step": 544070
    },
    {
      "epoch": 1.9068647551756406,
      "grad_norm": 2.8125,
      "learning_rate": 2.0243333826595116e-05,
      "loss": 0.8765,
      "step": 544080
    },
    {
      "epoch": 1.9068998026825361,
      "grad_norm": 3.109375,
      "learning_rate": 2.0242684797931414e-05,
      "loss": 0.7894,
      "step": 544090
    },
    {
      "epoch": 1.9069348501894319,
      "grad_norm": 2.78125,
      "learning_rate": 2.0242035769267716e-05,
      "loss": 0.8461,
      "step": 544100
    },
    {
      "epoch": 1.9069698976963274,
      "grad_norm": 3.140625,
      "learning_rate": 2.0241386740604014e-05,
      "loss": 0.8065,
      "step": 544110
    },
    {
      "epoch": 1.907004945203223,
      "grad_norm": 3.0625,
      "learning_rate": 2.0240737711940312e-05,
      "loss": 0.8762,
      "step": 544120
    },
    {
      "epoch": 1.9070399927101187,
      "grad_norm": 3.09375,
      "learning_rate": 2.024008868327661e-05,
      "loss": 0.8272,
      "step": 544130
    },
    {
      "epoch": 1.907075040217014,
      "grad_norm": 3.078125,
      "learning_rate": 2.0239439654612908e-05,
      "loss": 0.8726,
      "step": 544140
    },
    {
      "epoch": 1.9071100877239098,
      "grad_norm": 2.8125,
      "learning_rate": 2.0238790625949206e-05,
      "loss": 0.798,
      "step": 544150
    },
    {
      "epoch": 1.9071451352308053,
      "grad_norm": 2.734375,
      "learning_rate": 2.0238141597285504e-05,
      "loss": 0.8372,
      "step": 544160
    },
    {
      "epoch": 1.9071801827377008,
      "grad_norm": 2.515625,
      "learning_rate": 2.0237492568621802e-05,
      "loss": 0.7845,
      "step": 544170
    },
    {
      "epoch": 1.9072152302445966,
      "grad_norm": 3.078125,
      "learning_rate": 2.02368435399581e-05,
      "loss": 0.792,
      "step": 544180
    },
    {
      "epoch": 1.9072502777514921,
      "grad_norm": 2.53125,
      "learning_rate": 2.0236194511294398e-05,
      "loss": 0.7572,
      "step": 544190
    },
    {
      "epoch": 1.9072853252583877,
      "grad_norm": 3.0,
      "learning_rate": 2.0235545482630696e-05,
      "loss": 0.8062,
      "step": 544200
    },
    {
      "epoch": 1.9073203727652834,
      "grad_norm": 3.109375,
      "learning_rate": 2.0234896453966994e-05,
      "loss": 0.795,
      "step": 544210
    },
    {
      "epoch": 1.907355420272179,
      "grad_norm": 2.890625,
      "learning_rate": 2.0234247425303292e-05,
      "loss": 0.8022,
      "step": 544220
    },
    {
      "epoch": 1.9073904677790745,
      "grad_norm": 2.859375,
      "learning_rate": 2.0233598396639593e-05,
      "loss": 0.8744,
      "step": 544230
    },
    {
      "epoch": 1.9074255152859703,
      "grad_norm": 2.6875,
      "learning_rate": 2.0232949367975888e-05,
      "loss": 0.8198,
      "step": 544240
    },
    {
      "epoch": 1.9074605627928656,
      "grad_norm": 3.109375,
      "learning_rate": 2.0232300339312186e-05,
      "loss": 0.7877,
      "step": 544250
    },
    {
      "epoch": 1.9074956102997613,
      "grad_norm": 2.984375,
      "learning_rate": 2.0231651310648484e-05,
      "loss": 0.7564,
      "step": 544260
    },
    {
      "epoch": 1.9075306578066569,
      "grad_norm": 2.96875,
      "learning_rate": 2.0231002281984782e-05,
      "loss": 0.79,
      "step": 544270
    },
    {
      "epoch": 1.9075657053135524,
      "grad_norm": 2.828125,
      "learning_rate": 2.023035325332108e-05,
      "loss": 0.7868,
      "step": 544280
    },
    {
      "epoch": 1.9076007528204482,
      "grad_norm": 3.125,
      "learning_rate": 2.0229704224657378e-05,
      "loss": 0.8532,
      "step": 544290
    },
    {
      "epoch": 1.9076358003273437,
      "grad_norm": 2.734375,
      "learning_rate": 2.0229055195993676e-05,
      "loss": 0.8327,
      "step": 544300
    },
    {
      "epoch": 1.9076708478342392,
      "grad_norm": 2.828125,
      "learning_rate": 2.0228406167329974e-05,
      "loss": 0.8307,
      "step": 544310
    },
    {
      "epoch": 1.907705895341135,
      "grad_norm": 3.078125,
      "learning_rate": 2.0227757138666272e-05,
      "loss": 0.8111,
      "step": 544320
    },
    {
      "epoch": 1.9077409428480305,
      "grad_norm": 2.375,
      "learning_rate": 2.022710811000257e-05,
      "loss": 0.7466,
      "step": 544330
    },
    {
      "epoch": 1.907775990354926,
      "grad_norm": 2.90625,
      "learning_rate": 2.0226459081338868e-05,
      "loss": 0.8205,
      "step": 544340
    },
    {
      "epoch": 1.9078110378618218,
      "grad_norm": 3.25,
      "learning_rate": 2.022581005267517e-05,
      "loss": 0.8234,
      "step": 544350
    },
    {
      "epoch": 1.9078460853687171,
      "grad_norm": 2.921875,
      "learning_rate": 2.0225161024011467e-05,
      "loss": 0.8124,
      "step": 544360
    },
    {
      "epoch": 1.907881132875613,
      "grad_norm": 3.265625,
      "learning_rate": 2.0224511995347765e-05,
      "loss": 0.7927,
      "step": 544370
    },
    {
      "epoch": 1.9079161803825087,
      "grad_norm": 2.890625,
      "learning_rate": 2.0223862966684063e-05,
      "loss": 0.8187,
      "step": 544380
    },
    {
      "epoch": 1.907951227889404,
      "grad_norm": 2.890625,
      "learning_rate": 2.022321393802036e-05,
      "loss": 0.8325,
      "step": 544390
    },
    {
      "epoch": 1.9079862753962997,
      "grad_norm": 3.171875,
      "learning_rate": 2.022256490935666e-05,
      "loss": 0.8291,
      "step": 544400
    },
    {
      "epoch": 1.9080213229031953,
      "grad_norm": 2.890625,
      "learning_rate": 2.0221915880692957e-05,
      "loss": 0.8056,
      "step": 544410
    },
    {
      "epoch": 1.9080563704100908,
      "grad_norm": 3.28125,
      "learning_rate": 2.0221266852029255e-05,
      "loss": 0.84,
      "step": 544420
    },
    {
      "epoch": 1.9080914179169866,
      "grad_norm": 2.90625,
      "learning_rate": 2.022061782336555e-05,
      "loss": 0.7607,
      "step": 544430
    },
    {
      "epoch": 1.908126465423882,
      "grad_norm": 3.09375,
      "learning_rate": 2.0219968794701848e-05,
      "loss": 0.8178,
      "step": 544440
    },
    {
      "epoch": 1.9081615129307776,
      "grad_norm": 2.265625,
      "learning_rate": 2.0219319766038146e-05,
      "loss": 0.8049,
      "step": 544450
    },
    {
      "epoch": 1.9081965604376734,
      "grad_norm": 3.09375,
      "learning_rate": 2.0218670737374447e-05,
      "loss": 0.8893,
      "step": 544460
    },
    {
      "epoch": 1.9082316079445687,
      "grad_norm": 3.15625,
      "learning_rate": 2.0218021708710745e-05,
      "loss": 0.8621,
      "step": 544470
    },
    {
      "epoch": 1.9082666554514645,
      "grad_norm": 2.859375,
      "learning_rate": 2.0217372680047043e-05,
      "loss": 0.8638,
      "step": 544480
    },
    {
      "epoch": 1.9083017029583602,
      "grad_norm": 2.65625,
      "learning_rate": 2.021672365138334e-05,
      "loss": 0.7626,
      "step": 544490
    },
    {
      "epoch": 1.9083367504652555,
      "grad_norm": 2.890625,
      "learning_rate": 2.021607462271964e-05,
      "loss": 0.7981,
      "step": 544500
    },
    {
      "epoch": 1.9083717979721513,
      "grad_norm": 3.375,
      "learning_rate": 2.0215425594055937e-05,
      "loss": 0.8475,
      "step": 544510
    },
    {
      "epoch": 1.9084068454790468,
      "grad_norm": 2.859375,
      "learning_rate": 2.0214776565392235e-05,
      "loss": 0.7484,
      "step": 544520
    },
    {
      "epoch": 1.9084418929859424,
      "grad_norm": 3.078125,
      "learning_rate": 2.0214127536728533e-05,
      "loss": 0.8415,
      "step": 544530
    },
    {
      "epoch": 1.9084769404928381,
      "grad_norm": 2.796875,
      "learning_rate": 2.021347850806483e-05,
      "loss": 0.8416,
      "step": 544540
    },
    {
      "epoch": 1.9085119879997337,
      "grad_norm": 3.09375,
      "learning_rate": 2.021282947940113e-05,
      "loss": 0.8608,
      "step": 544550
    },
    {
      "epoch": 1.9085470355066292,
      "grad_norm": 2.890625,
      "learning_rate": 2.0212180450737427e-05,
      "loss": 0.8235,
      "step": 544560
    },
    {
      "epoch": 1.908582083013525,
      "grad_norm": 3.171875,
      "learning_rate": 2.0211531422073725e-05,
      "loss": 0.8804,
      "step": 544570
    },
    {
      "epoch": 1.9086171305204205,
      "grad_norm": 2.78125,
      "learning_rate": 2.0210882393410023e-05,
      "loss": 0.8134,
      "step": 544580
    },
    {
      "epoch": 1.908652178027316,
      "grad_norm": 2.734375,
      "learning_rate": 2.021023336474632e-05,
      "loss": 0.8467,
      "step": 544590
    },
    {
      "epoch": 1.9086872255342118,
      "grad_norm": 2.671875,
      "learning_rate": 2.0209584336082623e-05,
      "loss": 0.7114,
      "step": 544600
    },
    {
      "epoch": 1.908722273041107,
      "grad_norm": 2.65625,
      "learning_rate": 2.020893530741892e-05,
      "loss": 0.814,
      "step": 544610
    },
    {
      "epoch": 1.9087573205480028,
      "grad_norm": 2.390625,
      "learning_rate": 2.0208286278755215e-05,
      "loss": 0.7739,
      "step": 544620
    },
    {
      "epoch": 1.9087923680548984,
      "grad_norm": 2.921875,
      "learning_rate": 2.0207637250091513e-05,
      "loss": 0.7745,
      "step": 544630
    },
    {
      "epoch": 1.908827415561794,
      "grad_norm": 2.515625,
      "learning_rate": 2.020698822142781e-05,
      "loss": 0.8975,
      "step": 544640
    },
    {
      "epoch": 1.9088624630686897,
      "grad_norm": 2.734375,
      "learning_rate": 2.020633919276411e-05,
      "loss": 0.8046,
      "step": 544650
    },
    {
      "epoch": 1.9088975105755852,
      "grad_norm": 2.734375,
      "learning_rate": 2.0205690164100407e-05,
      "loss": 0.8022,
      "step": 544660
    },
    {
      "epoch": 1.9089325580824807,
      "grad_norm": 3.109375,
      "learning_rate": 2.0205041135436705e-05,
      "loss": 0.8448,
      "step": 544670
    },
    {
      "epoch": 1.9089676055893765,
      "grad_norm": 3.015625,
      "learning_rate": 2.0204392106773003e-05,
      "loss": 0.8201,
      "step": 544680
    },
    {
      "epoch": 1.909002653096272,
      "grad_norm": 2.84375,
      "learning_rate": 2.02037430781093e-05,
      "loss": 0.8492,
      "step": 544690
    },
    {
      "epoch": 1.9090377006031676,
      "grad_norm": 2.796875,
      "learning_rate": 2.02030940494456e-05,
      "loss": 0.8516,
      "step": 544700
    },
    {
      "epoch": 1.9090727481100633,
      "grad_norm": 2.875,
      "learning_rate": 2.02024450207819e-05,
      "loss": 0.8089,
      "step": 544710
    },
    {
      "epoch": 1.9091077956169586,
      "grad_norm": 2.921875,
      "learning_rate": 2.02017959921182e-05,
      "loss": 0.8463,
      "step": 544720
    },
    {
      "epoch": 1.9091428431238544,
      "grad_norm": 3.28125,
      "learning_rate": 2.0201146963454497e-05,
      "loss": 0.8199,
      "step": 544730
    },
    {
      "epoch": 1.90917789063075,
      "grad_norm": 2.890625,
      "learning_rate": 2.0200497934790795e-05,
      "loss": 0.7757,
      "step": 544740
    },
    {
      "epoch": 1.9092129381376455,
      "grad_norm": 2.953125,
      "learning_rate": 2.0199848906127093e-05,
      "loss": 0.8062,
      "step": 544750
    },
    {
      "epoch": 1.9092479856445412,
      "grad_norm": 3.046875,
      "learning_rate": 2.019919987746339e-05,
      "loss": 0.8197,
      "step": 544760
    },
    {
      "epoch": 1.9092830331514368,
      "grad_norm": 3.0,
      "learning_rate": 2.019855084879969e-05,
      "loss": 0.8438,
      "step": 544770
    },
    {
      "epoch": 1.9093180806583323,
      "grad_norm": 3.421875,
      "learning_rate": 2.0197901820135987e-05,
      "loss": 0.8029,
      "step": 544780
    },
    {
      "epoch": 1.909353128165228,
      "grad_norm": 2.90625,
      "learning_rate": 2.0197252791472285e-05,
      "loss": 0.7233,
      "step": 544790
    },
    {
      "epoch": 1.9093881756721236,
      "grad_norm": 3.140625,
      "learning_rate": 2.0196603762808583e-05,
      "loss": 0.8152,
      "step": 544800
    },
    {
      "epoch": 1.9094232231790191,
      "grad_norm": 3.421875,
      "learning_rate": 2.0195954734144877e-05,
      "loss": 0.754,
      "step": 544810
    },
    {
      "epoch": 1.909458270685915,
      "grad_norm": 2.5625,
      "learning_rate": 2.0195305705481175e-05,
      "loss": 0.8541,
      "step": 544820
    },
    {
      "epoch": 1.9094933181928102,
      "grad_norm": 2.59375,
      "learning_rate": 2.0194656676817477e-05,
      "loss": 0.84,
      "step": 544830
    },
    {
      "epoch": 1.909528365699706,
      "grad_norm": 3.1875,
      "learning_rate": 2.0194007648153775e-05,
      "loss": 0.8869,
      "step": 544840
    },
    {
      "epoch": 1.9095634132066015,
      "grad_norm": 2.9375,
      "learning_rate": 2.0193358619490073e-05,
      "loss": 0.842,
      "step": 544850
    },
    {
      "epoch": 1.909598460713497,
      "grad_norm": 2.875,
      "learning_rate": 2.019270959082637e-05,
      "loss": 0.7618,
      "step": 544860
    },
    {
      "epoch": 1.9096335082203928,
      "grad_norm": 3.3125,
      "learning_rate": 2.019206056216267e-05,
      "loss": 0.9076,
      "step": 544870
    },
    {
      "epoch": 1.9096685557272883,
      "grad_norm": 3.109375,
      "learning_rate": 2.0191411533498967e-05,
      "loss": 0.8498,
      "step": 544880
    },
    {
      "epoch": 1.9097036032341839,
      "grad_norm": 2.90625,
      "learning_rate": 2.0190762504835265e-05,
      "loss": 0.8329,
      "step": 544890
    },
    {
      "epoch": 1.9097386507410796,
      "grad_norm": 3.03125,
      "learning_rate": 2.0190113476171563e-05,
      "loss": 0.8354,
      "step": 544900
    },
    {
      "epoch": 1.9097736982479752,
      "grad_norm": 3.546875,
      "learning_rate": 2.018946444750786e-05,
      "loss": 0.9056,
      "step": 544910
    },
    {
      "epoch": 1.9098087457548707,
      "grad_norm": 3.0,
      "learning_rate": 2.018881541884416e-05,
      "loss": 0.8381,
      "step": 544920
    },
    {
      "epoch": 1.9098437932617665,
      "grad_norm": 2.890625,
      "learning_rate": 2.0188166390180457e-05,
      "loss": 0.8371,
      "step": 544930
    },
    {
      "epoch": 1.9098788407686618,
      "grad_norm": 3.046875,
      "learning_rate": 2.0187517361516755e-05,
      "loss": 0.7622,
      "step": 544940
    },
    {
      "epoch": 1.9099138882755575,
      "grad_norm": 3.078125,
      "learning_rate": 2.0186868332853053e-05,
      "loss": 0.8051,
      "step": 544950
    },
    {
      "epoch": 1.909948935782453,
      "grad_norm": 2.859375,
      "learning_rate": 2.018621930418935e-05,
      "loss": 0.7253,
      "step": 544960
    },
    {
      "epoch": 1.9099839832893486,
      "grad_norm": 3.140625,
      "learning_rate": 2.0185570275525652e-05,
      "loss": 0.8451,
      "step": 544970
    },
    {
      "epoch": 1.9100190307962444,
      "grad_norm": 3.0625,
      "learning_rate": 2.018492124686195e-05,
      "loss": 0.944,
      "step": 544980
    },
    {
      "epoch": 1.91005407830314,
      "grad_norm": 2.4375,
      "learning_rate": 2.0184272218198245e-05,
      "loss": 0.7797,
      "step": 544990
    },
    {
      "epoch": 1.9100891258100354,
      "grad_norm": 3.34375,
      "learning_rate": 2.0183623189534543e-05,
      "loss": 0.8639,
      "step": 545000
    },
    {
      "epoch": 1.9100891258100354,
      "eval_loss": 0.7731403708457947,
      "eval_runtime": 562.8448,
      "eval_samples_per_second": 675.916,
      "eval_steps_per_second": 56.326,
      "step": 545000
    },
    {
      "epoch": 1.9101241733169312,
      "grad_norm": 2.4375,
      "learning_rate": 2.018297416087084e-05,
      "loss": 0.806,
      "step": 545010
    },
    {
      "epoch": 1.9101592208238267,
      "grad_norm": 2.796875,
      "learning_rate": 2.018232513220714e-05,
      "loss": 0.859,
      "step": 545020
    },
    {
      "epoch": 1.9101942683307223,
      "grad_norm": 3.09375,
      "learning_rate": 2.0181676103543437e-05,
      "loss": 0.7851,
      "step": 545030
    },
    {
      "epoch": 1.910229315837618,
      "grad_norm": 2.734375,
      "learning_rate": 2.0181027074879735e-05,
      "loss": 0.8568,
      "step": 545040
    },
    {
      "epoch": 1.9102643633445133,
      "grad_norm": 2.546875,
      "learning_rate": 2.0180378046216033e-05,
      "loss": 0.7644,
      "step": 545050
    },
    {
      "epoch": 1.910299410851409,
      "grad_norm": 3.25,
      "learning_rate": 2.017972901755233e-05,
      "loss": 0.8365,
      "step": 545060
    },
    {
      "epoch": 1.9103344583583048,
      "grad_norm": 2.75,
      "learning_rate": 2.017907998888863e-05,
      "loss": 0.85,
      "step": 545070
    },
    {
      "epoch": 1.9103695058652002,
      "grad_norm": 3.265625,
      "learning_rate": 2.017843096022493e-05,
      "loss": 0.7905,
      "step": 545080
    },
    {
      "epoch": 1.910404553372096,
      "grad_norm": 2.796875,
      "learning_rate": 2.0177781931561228e-05,
      "loss": 0.7911,
      "step": 545090
    },
    {
      "epoch": 1.9104396008789915,
      "grad_norm": 2.625,
      "learning_rate": 2.0177132902897526e-05,
      "loss": 0.7847,
      "step": 545100
    },
    {
      "epoch": 1.910474648385887,
      "grad_norm": 2.96875,
      "learning_rate": 2.0176483874233824e-05,
      "loss": 0.8976,
      "step": 545110
    },
    {
      "epoch": 1.9105096958927827,
      "grad_norm": 2.875,
      "learning_rate": 2.0175834845570122e-05,
      "loss": 0.8475,
      "step": 545120
    },
    {
      "epoch": 1.9105447433996783,
      "grad_norm": 3.046875,
      "learning_rate": 2.017518581690642e-05,
      "loss": 0.8008,
      "step": 545130
    },
    {
      "epoch": 1.9105797909065738,
      "grad_norm": 2.546875,
      "learning_rate": 2.0174536788242718e-05,
      "loss": 0.8923,
      "step": 545140
    },
    {
      "epoch": 1.9106148384134696,
      "grad_norm": 2.8125,
      "learning_rate": 2.0173887759579016e-05,
      "loss": 0.8379,
      "step": 545150
    },
    {
      "epoch": 1.910649885920365,
      "grad_norm": 3.171875,
      "learning_rate": 2.0173238730915314e-05,
      "loss": 0.8713,
      "step": 545160
    },
    {
      "epoch": 1.9106849334272606,
      "grad_norm": 2.75,
      "learning_rate": 2.0172589702251612e-05,
      "loss": 0.8431,
      "step": 545170
    },
    {
      "epoch": 1.9107199809341564,
      "grad_norm": 2.609375,
      "learning_rate": 2.0171940673587907e-05,
      "loss": 0.8114,
      "step": 545180
    },
    {
      "epoch": 1.9107550284410517,
      "grad_norm": 2.90625,
      "learning_rate": 2.0171291644924208e-05,
      "loss": 0.8463,
      "step": 545190
    },
    {
      "epoch": 1.9107900759479475,
      "grad_norm": 3.21875,
      "learning_rate": 2.0170642616260506e-05,
      "loss": 0.9109,
      "step": 545200
    },
    {
      "epoch": 1.910825123454843,
      "grad_norm": 2.625,
      "learning_rate": 2.0169993587596804e-05,
      "loss": 0.8285,
      "step": 545210
    },
    {
      "epoch": 1.9108601709617385,
      "grad_norm": 2.984375,
      "learning_rate": 2.0169344558933102e-05,
      "loss": 0.9003,
      "step": 545220
    },
    {
      "epoch": 1.9108952184686343,
      "grad_norm": 2.734375,
      "learning_rate": 2.01686955302694e-05,
      "loss": 0.8494,
      "step": 545230
    },
    {
      "epoch": 1.9109302659755298,
      "grad_norm": 2.703125,
      "learning_rate": 2.0168046501605698e-05,
      "loss": 0.7041,
      "step": 545240
    },
    {
      "epoch": 1.9109653134824254,
      "grad_norm": 3.28125,
      "learning_rate": 2.0167397472941996e-05,
      "loss": 0.8495,
      "step": 545250
    },
    {
      "epoch": 1.9110003609893211,
      "grad_norm": 3.109375,
      "learning_rate": 2.0166748444278294e-05,
      "loss": 0.8815,
      "step": 545260
    },
    {
      "epoch": 1.9110354084962167,
      "grad_norm": 3.046875,
      "learning_rate": 2.0166099415614592e-05,
      "loss": 0.7959,
      "step": 545270
    },
    {
      "epoch": 1.9110704560031122,
      "grad_norm": 2.765625,
      "learning_rate": 2.016545038695089e-05,
      "loss": 0.9673,
      "step": 545280
    },
    {
      "epoch": 1.911105503510008,
      "grad_norm": 2.921875,
      "learning_rate": 2.0164801358287188e-05,
      "loss": 0.7432,
      "step": 545290
    },
    {
      "epoch": 1.9111405510169033,
      "grad_norm": 2.359375,
      "learning_rate": 2.0164152329623486e-05,
      "loss": 0.8397,
      "step": 545300
    },
    {
      "epoch": 1.911175598523799,
      "grad_norm": 3.6875,
      "learning_rate": 2.0163503300959784e-05,
      "loss": 0.8865,
      "step": 545310
    },
    {
      "epoch": 1.9112106460306946,
      "grad_norm": 2.46875,
      "learning_rate": 2.0162854272296082e-05,
      "loss": 0.8425,
      "step": 545320
    },
    {
      "epoch": 1.91124569353759,
      "grad_norm": 3.171875,
      "learning_rate": 2.0162205243632383e-05,
      "loss": 0.9062,
      "step": 545330
    },
    {
      "epoch": 1.9112807410444859,
      "grad_norm": 2.5,
      "learning_rate": 2.016155621496868e-05,
      "loss": 0.8227,
      "step": 545340
    },
    {
      "epoch": 1.9113157885513814,
      "grad_norm": 2.65625,
      "learning_rate": 2.016090718630498e-05,
      "loss": 0.8575,
      "step": 545350
    },
    {
      "epoch": 1.911350836058277,
      "grad_norm": 3.09375,
      "learning_rate": 2.0160258157641277e-05,
      "loss": 0.8485,
      "step": 545360
    },
    {
      "epoch": 1.9113858835651727,
      "grad_norm": 3.046875,
      "learning_rate": 2.0159609128977572e-05,
      "loss": 0.8531,
      "step": 545370
    },
    {
      "epoch": 1.9114209310720682,
      "grad_norm": 3.0,
      "learning_rate": 2.015896010031387e-05,
      "loss": 0.7972,
      "step": 545380
    },
    {
      "epoch": 1.9114559785789638,
      "grad_norm": 2.796875,
      "learning_rate": 2.0158311071650168e-05,
      "loss": 0.8297,
      "step": 545390
    },
    {
      "epoch": 1.9114910260858595,
      "grad_norm": 3.0625,
      "learning_rate": 2.0157662042986466e-05,
      "loss": 0.8215,
      "step": 545400
    },
    {
      "epoch": 1.9115260735927548,
      "grad_norm": 2.671875,
      "learning_rate": 2.0157013014322764e-05,
      "loss": 0.6993,
      "step": 545410
    },
    {
      "epoch": 1.9115611210996506,
      "grad_norm": 3.09375,
      "learning_rate": 2.0156363985659062e-05,
      "loss": 0.7917,
      "step": 545420
    },
    {
      "epoch": 1.9115961686065461,
      "grad_norm": 3.25,
      "learning_rate": 2.015571495699536e-05,
      "loss": 0.8372,
      "step": 545430
    },
    {
      "epoch": 1.9116312161134417,
      "grad_norm": 2.734375,
      "learning_rate": 2.0155065928331658e-05,
      "loss": 0.8208,
      "step": 545440
    },
    {
      "epoch": 1.9116662636203374,
      "grad_norm": 2.8125,
      "learning_rate": 2.015441689966796e-05,
      "loss": 0.8327,
      "step": 545450
    },
    {
      "epoch": 1.911701311127233,
      "grad_norm": 2.796875,
      "learning_rate": 2.0153767871004257e-05,
      "loss": 0.8884,
      "step": 545460
    },
    {
      "epoch": 1.9117363586341285,
      "grad_norm": 2.890625,
      "learning_rate": 2.0153118842340555e-05,
      "loss": 0.8037,
      "step": 545470
    },
    {
      "epoch": 1.9117714061410243,
      "grad_norm": 3.203125,
      "learning_rate": 2.0152469813676853e-05,
      "loss": 0.8741,
      "step": 545480
    },
    {
      "epoch": 1.9118064536479198,
      "grad_norm": 2.796875,
      "learning_rate": 2.015182078501315e-05,
      "loss": 0.8663,
      "step": 545490
    },
    {
      "epoch": 1.9118415011548153,
      "grad_norm": 2.515625,
      "learning_rate": 2.015117175634945e-05,
      "loss": 0.8605,
      "step": 545500
    },
    {
      "epoch": 1.911876548661711,
      "grad_norm": 2.625,
      "learning_rate": 2.0150522727685747e-05,
      "loss": 0.8717,
      "step": 545510
    },
    {
      "epoch": 1.9119115961686064,
      "grad_norm": 2.5625,
      "learning_rate": 2.0149873699022045e-05,
      "loss": 0.8146,
      "step": 545520
    },
    {
      "epoch": 1.9119466436755022,
      "grad_norm": 3.109375,
      "learning_rate": 2.0149224670358343e-05,
      "loss": 0.8049,
      "step": 545530
    },
    {
      "epoch": 1.9119816911823977,
      "grad_norm": 2.9375,
      "learning_rate": 2.014857564169464e-05,
      "loss": 0.8858,
      "step": 545540
    },
    {
      "epoch": 1.9120167386892932,
      "grad_norm": 2.578125,
      "learning_rate": 2.014792661303094e-05,
      "loss": 0.7904,
      "step": 545550
    },
    {
      "epoch": 1.912051786196189,
      "grad_norm": 3.09375,
      "learning_rate": 2.0147277584367237e-05,
      "loss": 0.7975,
      "step": 545560
    },
    {
      "epoch": 1.9120868337030845,
      "grad_norm": 3.109375,
      "learning_rate": 2.0146628555703535e-05,
      "loss": 0.8783,
      "step": 545570
    },
    {
      "epoch": 1.91212188120998,
      "grad_norm": 2.4375,
      "learning_rate": 2.0145979527039833e-05,
      "loss": 0.8004,
      "step": 545580
    },
    {
      "epoch": 1.9121569287168758,
      "grad_norm": 3.1875,
      "learning_rate": 2.014533049837613e-05,
      "loss": 0.8416,
      "step": 545590
    },
    {
      "epoch": 1.9121919762237714,
      "grad_norm": 3.125,
      "learning_rate": 2.014468146971243e-05,
      "loss": 0.8344,
      "step": 545600
    },
    {
      "epoch": 1.9122270237306669,
      "grad_norm": 2.734375,
      "learning_rate": 2.0144032441048727e-05,
      "loss": 0.8507,
      "step": 545610
    },
    {
      "epoch": 1.9122620712375626,
      "grad_norm": 3.09375,
      "learning_rate": 2.0143383412385025e-05,
      "loss": 0.8183,
      "step": 545620
    },
    {
      "epoch": 1.912297118744458,
      "grad_norm": 3.296875,
      "learning_rate": 2.0142734383721323e-05,
      "loss": 0.8345,
      "step": 545630
    },
    {
      "epoch": 1.9123321662513537,
      "grad_norm": 3.375,
      "learning_rate": 2.014208535505762e-05,
      "loss": 0.8792,
      "step": 545640
    },
    {
      "epoch": 1.9123672137582495,
      "grad_norm": 2.71875,
      "learning_rate": 2.014143632639392e-05,
      "loss": 0.8522,
      "step": 545650
    },
    {
      "epoch": 1.9124022612651448,
      "grad_norm": 2.859375,
      "learning_rate": 2.0140787297730217e-05,
      "loss": 0.8647,
      "step": 545660
    },
    {
      "epoch": 1.9124373087720405,
      "grad_norm": 2.90625,
      "learning_rate": 2.0140138269066515e-05,
      "loss": 0.8339,
      "step": 545670
    },
    {
      "epoch": 1.912472356278936,
      "grad_norm": 3.25,
      "learning_rate": 2.0139489240402813e-05,
      "loss": 0.8849,
      "step": 545680
    },
    {
      "epoch": 1.9125074037858316,
      "grad_norm": 2.90625,
      "learning_rate": 2.013884021173911e-05,
      "loss": 0.8321,
      "step": 545690
    },
    {
      "epoch": 1.9125424512927274,
      "grad_norm": 3.234375,
      "learning_rate": 2.0138191183075413e-05,
      "loss": 0.8001,
      "step": 545700
    },
    {
      "epoch": 1.912577498799623,
      "grad_norm": 3.546875,
      "learning_rate": 2.013754215441171e-05,
      "loss": 0.7817,
      "step": 545710
    },
    {
      "epoch": 1.9126125463065184,
      "grad_norm": 2.90625,
      "learning_rate": 2.013689312574801e-05,
      "loss": 0.8189,
      "step": 545720
    },
    {
      "epoch": 1.9126475938134142,
      "grad_norm": 2.65625,
      "learning_rate": 2.0136244097084307e-05,
      "loss": 0.8414,
      "step": 545730
    },
    {
      "epoch": 1.9126826413203095,
      "grad_norm": 3.0,
      "learning_rate": 2.0135595068420605e-05,
      "loss": 0.8709,
      "step": 545740
    },
    {
      "epoch": 1.9127176888272053,
      "grad_norm": 3.09375,
      "learning_rate": 2.01349460397569e-05,
      "loss": 0.8551,
      "step": 545750
    },
    {
      "epoch": 1.912752736334101,
      "grad_norm": 2.75,
      "learning_rate": 2.0134297011093197e-05,
      "loss": 0.8712,
      "step": 545760
    },
    {
      "epoch": 1.9127877838409963,
      "grad_norm": 3.140625,
      "learning_rate": 2.0133647982429495e-05,
      "loss": 0.8152,
      "step": 545770
    },
    {
      "epoch": 1.912822831347892,
      "grad_norm": 2.53125,
      "learning_rate": 2.0132998953765793e-05,
      "loss": 0.7456,
      "step": 545780
    },
    {
      "epoch": 1.9128578788547876,
      "grad_norm": 3.234375,
      "learning_rate": 2.013234992510209e-05,
      "loss": 0.9561,
      "step": 545790
    },
    {
      "epoch": 1.9128929263616832,
      "grad_norm": 2.640625,
      "learning_rate": 2.013170089643839e-05,
      "loss": 0.8891,
      "step": 545800
    },
    {
      "epoch": 1.912927973868579,
      "grad_norm": 3.109375,
      "learning_rate": 2.013105186777469e-05,
      "loss": 0.8786,
      "step": 545810
    },
    {
      "epoch": 1.9129630213754745,
      "grad_norm": 2.015625,
      "learning_rate": 2.013040283911099e-05,
      "loss": 0.7859,
      "step": 545820
    },
    {
      "epoch": 1.91299806888237,
      "grad_norm": 2.984375,
      "learning_rate": 2.0129753810447287e-05,
      "loss": 0.811,
      "step": 545830
    },
    {
      "epoch": 1.9130331163892658,
      "grad_norm": 2.890625,
      "learning_rate": 2.0129104781783585e-05,
      "loss": 0.8773,
      "step": 545840
    },
    {
      "epoch": 1.9130681638961613,
      "grad_norm": 2.859375,
      "learning_rate": 2.0128455753119883e-05,
      "loss": 0.8293,
      "step": 545850
    },
    {
      "epoch": 1.9131032114030568,
      "grad_norm": 2.5625,
      "learning_rate": 2.012780672445618e-05,
      "loss": 0.9286,
      "step": 545860
    },
    {
      "epoch": 1.9131382589099526,
      "grad_norm": 2.6875,
      "learning_rate": 2.012715769579248e-05,
      "loss": 0.7841,
      "step": 545870
    },
    {
      "epoch": 1.913173306416848,
      "grad_norm": 3.046875,
      "learning_rate": 2.0126508667128777e-05,
      "loss": 0.8084,
      "step": 545880
    },
    {
      "epoch": 1.9132083539237437,
      "grad_norm": 3.15625,
      "learning_rate": 2.0125859638465075e-05,
      "loss": 0.8149,
      "step": 545890
    },
    {
      "epoch": 1.9132434014306392,
      "grad_norm": 2.5625,
      "learning_rate": 2.0125210609801373e-05,
      "loss": 0.8004,
      "step": 545900
    },
    {
      "epoch": 1.9132784489375347,
      "grad_norm": 2.65625,
      "learning_rate": 2.012456158113767e-05,
      "loss": 0.7366,
      "step": 545910
    },
    {
      "epoch": 1.9133134964444305,
      "grad_norm": 2.9375,
      "learning_rate": 2.012391255247397e-05,
      "loss": 0.7723,
      "step": 545920
    },
    {
      "epoch": 1.913348543951326,
      "grad_norm": 3.28125,
      "learning_rate": 2.0123263523810267e-05,
      "loss": 0.7794,
      "step": 545930
    },
    {
      "epoch": 1.9133835914582216,
      "grad_norm": 2.984375,
      "learning_rate": 2.0122614495146565e-05,
      "loss": 0.8426,
      "step": 545940
    },
    {
      "epoch": 1.9134186389651173,
      "grad_norm": 3.34375,
      "learning_rate": 2.0121965466482863e-05,
      "loss": 0.8686,
      "step": 545950
    },
    {
      "epoch": 1.9134536864720129,
      "grad_norm": 2.828125,
      "learning_rate": 2.012131643781916e-05,
      "loss": 0.7775,
      "step": 545960
    },
    {
      "epoch": 1.9134887339789084,
      "grad_norm": 2.75,
      "learning_rate": 2.012066740915546e-05,
      "loss": 0.7899,
      "step": 545970
    },
    {
      "epoch": 1.9135237814858042,
      "grad_norm": 2.828125,
      "learning_rate": 2.0120018380491757e-05,
      "loss": 0.8139,
      "step": 545980
    },
    {
      "epoch": 1.9135588289926995,
      "grad_norm": 2.75,
      "learning_rate": 2.0119369351828055e-05,
      "loss": 0.8222,
      "step": 545990
    },
    {
      "epoch": 1.9135938764995952,
      "grad_norm": 3.265625,
      "learning_rate": 2.0118720323164353e-05,
      "loss": 0.7396,
      "step": 546000
    },
    {
      "epoch": 1.9136289240064908,
      "grad_norm": 3.0,
      "learning_rate": 2.011807129450065e-05,
      "loss": 0.8523,
      "step": 546010
    },
    {
      "epoch": 1.9136639715133863,
      "grad_norm": 2.625,
      "learning_rate": 2.011742226583695e-05,
      "loss": 0.8021,
      "step": 546020
    },
    {
      "epoch": 1.913699019020282,
      "grad_norm": 3.0,
      "learning_rate": 2.0116773237173247e-05,
      "loss": 0.7663,
      "step": 546030
    },
    {
      "epoch": 1.9137340665271776,
      "grad_norm": 3.015625,
      "learning_rate": 2.0116124208509545e-05,
      "loss": 0.8714,
      "step": 546040
    },
    {
      "epoch": 1.9137691140340731,
      "grad_norm": 3.171875,
      "learning_rate": 2.0115475179845843e-05,
      "loss": 0.8835,
      "step": 546050
    },
    {
      "epoch": 1.9138041615409689,
      "grad_norm": 3.0625,
      "learning_rate": 2.011482615118214e-05,
      "loss": 0.7664,
      "step": 546060
    },
    {
      "epoch": 1.9138392090478644,
      "grad_norm": 2.5625,
      "learning_rate": 2.0114177122518442e-05,
      "loss": 0.8072,
      "step": 546070
    },
    {
      "epoch": 1.91387425655476,
      "grad_norm": 2.875,
      "learning_rate": 2.011352809385474e-05,
      "loss": 0.815,
      "step": 546080
    },
    {
      "epoch": 1.9139093040616557,
      "grad_norm": 3.34375,
      "learning_rate": 2.0112879065191038e-05,
      "loss": 0.8442,
      "step": 546090
    },
    {
      "epoch": 1.913944351568551,
      "grad_norm": 2.765625,
      "learning_rate": 2.0112230036527336e-05,
      "loss": 0.8215,
      "step": 546100
    },
    {
      "epoch": 1.9139793990754468,
      "grad_norm": 2.421875,
      "learning_rate": 2.0111581007863634e-05,
      "loss": 0.8047,
      "step": 546110
    },
    {
      "epoch": 1.9140144465823423,
      "grad_norm": 2.515625,
      "learning_rate": 2.011093197919993e-05,
      "loss": 0.8669,
      "step": 546120
    },
    {
      "epoch": 1.9140494940892379,
      "grad_norm": 2.90625,
      "learning_rate": 2.0110282950536227e-05,
      "loss": 0.8726,
      "step": 546130
    },
    {
      "epoch": 1.9140845415961336,
      "grad_norm": 3.125,
      "learning_rate": 2.0109633921872525e-05,
      "loss": 0.8795,
      "step": 546140
    },
    {
      "epoch": 1.9141195891030292,
      "grad_norm": 2.953125,
      "learning_rate": 2.0108984893208823e-05,
      "loss": 0.8519,
      "step": 546150
    },
    {
      "epoch": 1.9141546366099247,
      "grad_norm": 3.171875,
      "learning_rate": 2.010833586454512e-05,
      "loss": 0.8439,
      "step": 546160
    },
    {
      "epoch": 1.9141896841168204,
      "grad_norm": 2.890625,
      "learning_rate": 2.010768683588142e-05,
      "loss": 0.8662,
      "step": 546170
    },
    {
      "epoch": 1.914224731623716,
      "grad_norm": 3.234375,
      "learning_rate": 2.010703780721772e-05,
      "loss": 0.7675,
      "step": 546180
    },
    {
      "epoch": 1.9142597791306115,
      "grad_norm": 2.9375,
      "learning_rate": 2.0106388778554018e-05,
      "loss": 0.8484,
      "step": 546190
    },
    {
      "epoch": 1.9142948266375073,
      "grad_norm": 3.203125,
      "learning_rate": 2.0105739749890316e-05,
      "loss": 0.7872,
      "step": 546200
    },
    {
      "epoch": 1.9143298741444026,
      "grad_norm": 3.171875,
      "learning_rate": 2.0105090721226614e-05,
      "loss": 0.7676,
      "step": 546210
    },
    {
      "epoch": 1.9143649216512983,
      "grad_norm": 2.96875,
      "learning_rate": 2.0104441692562912e-05,
      "loss": 0.7884,
      "step": 546220
    },
    {
      "epoch": 1.9143999691581939,
      "grad_norm": 3.140625,
      "learning_rate": 2.010379266389921e-05,
      "loss": 0.7974,
      "step": 546230
    },
    {
      "epoch": 1.9144350166650894,
      "grad_norm": 3.03125,
      "learning_rate": 2.0103143635235508e-05,
      "loss": 0.8502,
      "step": 546240
    },
    {
      "epoch": 1.9144700641719852,
      "grad_norm": 2.921875,
      "learning_rate": 2.0102494606571806e-05,
      "loss": 0.7533,
      "step": 546250
    },
    {
      "epoch": 1.9145051116788807,
      "grad_norm": 2.765625,
      "learning_rate": 2.0101845577908104e-05,
      "loss": 0.8677,
      "step": 546260
    },
    {
      "epoch": 1.9145401591857762,
      "grad_norm": 2.578125,
      "learning_rate": 2.0101196549244402e-05,
      "loss": 0.7649,
      "step": 546270
    },
    {
      "epoch": 1.914575206692672,
      "grad_norm": 3.0,
      "learning_rate": 2.01005475205807e-05,
      "loss": 0.8601,
      "step": 546280
    },
    {
      "epoch": 1.9146102541995675,
      "grad_norm": 3.328125,
      "learning_rate": 2.0099898491916998e-05,
      "loss": 0.8141,
      "step": 546290
    },
    {
      "epoch": 1.914645301706463,
      "grad_norm": 3.09375,
      "learning_rate": 2.0099249463253296e-05,
      "loss": 0.8512,
      "step": 546300
    },
    {
      "epoch": 1.9146803492133588,
      "grad_norm": 3.078125,
      "learning_rate": 2.0098600434589594e-05,
      "loss": 0.8183,
      "step": 546310
    },
    {
      "epoch": 1.9147153967202541,
      "grad_norm": 2.5625,
      "learning_rate": 2.0097951405925892e-05,
      "loss": 0.7858,
      "step": 546320
    },
    {
      "epoch": 1.91475044422715,
      "grad_norm": 2.25,
      "learning_rate": 2.009730237726219e-05,
      "loss": 0.7313,
      "step": 546330
    },
    {
      "epoch": 1.9147854917340457,
      "grad_norm": 2.640625,
      "learning_rate": 2.0096653348598488e-05,
      "loss": 0.7335,
      "step": 546340
    },
    {
      "epoch": 1.914820539240941,
      "grad_norm": 2.578125,
      "learning_rate": 2.0096004319934786e-05,
      "loss": 0.8138,
      "step": 546350
    },
    {
      "epoch": 1.9148555867478367,
      "grad_norm": 2.703125,
      "learning_rate": 2.0095355291271084e-05,
      "loss": 0.8651,
      "step": 546360
    },
    {
      "epoch": 1.9148906342547323,
      "grad_norm": 2.8125,
      "learning_rate": 2.0094706262607382e-05,
      "loss": 0.8545,
      "step": 546370
    },
    {
      "epoch": 1.9149256817616278,
      "grad_norm": 2.859375,
      "learning_rate": 2.009405723394368e-05,
      "loss": 0.7651,
      "step": 546380
    },
    {
      "epoch": 1.9149607292685236,
      "grad_norm": 2.515625,
      "learning_rate": 2.0093408205279978e-05,
      "loss": 0.8389,
      "step": 546390
    },
    {
      "epoch": 1.914995776775419,
      "grad_norm": 3.109375,
      "learning_rate": 2.0092759176616276e-05,
      "loss": 0.8607,
      "step": 546400
    },
    {
      "epoch": 1.9150308242823146,
      "grad_norm": 3.34375,
      "learning_rate": 2.0092110147952574e-05,
      "loss": 0.7702,
      "step": 546410
    },
    {
      "epoch": 1.9150658717892104,
      "grad_norm": 3.46875,
      "learning_rate": 2.0091461119288872e-05,
      "loss": 0.7917,
      "step": 546420
    },
    {
      "epoch": 1.9151009192961057,
      "grad_norm": 2.9375,
      "learning_rate": 2.0090812090625173e-05,
      "loss": 0.8395,
      "step": 546430
    },
    {
      "epoch": 1.9151359668030015,
      "grad_norm": 2.640625,
      "learning_rate": 2.009016306196147e-05,
      "loss": 0.7199,
      "step": 546440
    },
    {
      "epoch": 1.9151710143098972,
      "grad_norm": 3.234375,
      "learning_rate": 2.008951403329777e-05,
      "loss": 0.874,
      "step": 546450
    },
    {
      "epoch": 1.9152060618167925,
      "grad_norm": 3.0625,
      "learning_rate": 2.0088865004634067e-05,
      "loss": 0.8789,
      "step": 546460
    },
    {
      "epoch": 1.9152411093236883,
      "grad_norm": 3.125,
      "learning_rate": 2.0088215975970365e-05,
      "loss": 0.8456,
      "step": 546470
    },
    {
      "epoch": 1.9152761568305838,
      "grad_norm": 2.640625,
      "learning_rate": 2.0087566947306663e-05,
      "loss": 0.7733,
      "step": 546480
    },
    {
      "epoch": 1.9153112043374794,
      "grad_norm": 3.109375,
      "learning_rate": 2.008691791864296e-05,
      "loss": 0.8446,
      "step": 546490
    },
    {
      "epoch": 1.9153462518443751,
      "grad_norm": 2.96875,
      "learning_rate": 2.0086268889979256e-05,
      "loss": 0.8617,
      "step": 546500
    },
    {
      "epoch": 1.9153812993512707,
      "grad_norm": 2.703125,
      "learning_rate": 2.0085619861315554e-05,
      "loss": 0.8723,
      "step": 546510
    },
    {
      "epoch": 1.9154163468581662,
      "grad_norm": 2.671875,
      "learning_rate": 2.0084970832651852e-05,
      "loss": 0.8563,
      "step": 546520
    },
    {
      "epoch": 1.915451394365062,
      "grad_norm": 2.921875,
      "learning_rate": 2.008432180398815e-05,
      "loss": 0.8369,
      "step": 546530
    },
    {
      "epoch": 1.9154864418719575,
      "grad_norm": 2.921875,
      "learning_rate": 2.0083672775324448e-05,
      "loss": 0.8639,
      "step": 546540
    },
    {
      "epoch": 1.915521489378853,
      "grad_norm": 2.9375,
      "learning_rate": 2.008302374666075e-05,
      "loss": 0.8238,
      "step": 546550
    },
    {
      "epoch": 1.9155565368857488,
      "grad_norm": 3.015625,
      "learning_rate": 2.0082374717997047e-05,
      "loss": 0.8495,
      "step": 546560
    },
    {
      "epoch": 1.915591584392644,
      "grad_norm": 2.59375,
      "learning_rate": 2.0081725689333345e-05,
      "loss": 0.8131,
      "step": 546570
    },
    {
      "epoch": 1.9156266318995399,
      "grad_norm": 2.5625,
      "learning_rate": 2.0081076660669643e-05,
      "loss": 0.7725,
      "step": 546580
    },
    {
      "epoch": 1.9156616794064354,
      "grad_norm": 3.375,
      "learning_rate": 2.008042763200594e-05,
      "loss": 0.7979,
      "step": 546590
    },
    {
      "epoch": 1.915696726913331,
      "grad_norm": 2.578125,
      "learning_rate": 2.007977860334224e-05,
      "loss": 0.8643,
      "step": 546600
    },
    {
      "epoch": 1.9157317744202267,
      "grad_norm": 3.15625,
      "learning_rate": 2.0079129574678537e-05,
      "loss": 0.8564,
      "step": 546610
    },
    {
      "epoch": 1.9157668219271222,
      "grad_norm": 2.953125,
      "learning_rate": 2.0078480546014835e-05,
      "loss": 0.7669,
      "step": 546620
    },
    {
      "epoch": 1.9158018694340178,
      "grad_norm": 2.9375,
      "learning_rate": 2.0077831517351133e-05,
      "loss": 0.8507,
      "step": 546630
    },
    {
      "epoch": 1.9158369169409135,
      "grad_norm": 2.703125,
      "learning_rate": 2.007718248868743e-05,
      "loss": 0.8435,
      "step": 546640
    },
    {
      "epoch": 1.915871964447809,
      "grad_norm": 2.859375,
      "learning_rate": 2.007653346002373e-05,
      "loss": 0.8514,
      "step": 546650
    },
    {
      "epoch": 1.9159070119547046,
      "grad_norm": 3.109375,
      "learning_rate": 2.0075884431360027e-05,
      "loss": 0.7854,
      "step": 546660
    },
    {
      "epoch": 1.9159420594616003,
      "grad_norm": 2.859375,
      "learning_rate": 2.0075235402696325e-05,
      "loss": 0.7957,
      "step": 546670
    },
    {
      "epoch": 1.9159771069684957,
      "grad_norm": 3.0,
      "learning_rate": 2.0074586374032623e-05,
      "loss": 0.7999,
      "step": 546680
    },
    {
      "epoch": 1.9160121544753914,
      "grad_norm": 3.015625,
      "learning_rate": 2.007393734536892e-05,
      "loss": 0.8171,
      "step": 546690
    },
    {
      "epoch": 1.916047201982287,
      "grad_norm": 2.65625,
      "learning_rate": 2.007328831670522e-05,
      "loss": 0.844,
      "step": 546700
    },
    {
      "epoch": 1.9160822494891825,
      "grad_norm": 2.515625,
      "learning_rate": 2.0072639288041517e-05,
      "loss": 0.7923,
      "step": 546710
    },
    {
      "epoch": 1.9161172969960782,
      "grad_norm": 3.46875,
      "learning_rate": 2.0071990259377815e-05,
      "loss": 0.8324,
      "step": 546720
    },
    {
      "epoch": 1.9161523445029738,
      "grad_norm": 2.984375,
      "learning_rate": 2.0071341230714113e-05,
      "loss": 0.8516,
      "step": 546730
    },
    {
      "epoch": 1.9161873920098693,
      "grad_norm": 2.859375,
      "learning_rate": 2.007069220205041e-05,
      "loss": 0.8069,
      "step": 546740
    },
    {
      "epoch": 1.916222439516765,
      "grad_norm": 3.078125,
      "learning_rate": 2.007004317338671e-05,
      "loss": 0.8328,
      "step": 546750
    },
    {
      "epoch": 1.9162574870236606,
      "grad_norm": 2.96875,
      "learning_rate": 2.0069394144723007e-05,
      "loss": 0.8052,
      "step": 546760
    },
    {
      "epoch": 1.9162925345305561,
      "grad_norm": 3.140625,
      "learning_rate": 2.0068745116059305e-05,
      "loss": 0.7942,
      "step": 546770
    },
    {
      "epoch": 1.916327582037452,
      "grad_norm": 2.921875,
      "learning_rate": 2.0068096087395603e-05,
      "loss": 0.7703,
      "step": 546780
    },
    {
      "epoch": 1.9163626295443472,
      "grad_norm": 2.71875,
      "learning_rate": 2.00674470587319e-05,
      "loss": 0.9067,
      "step": 546790
    },
    {
      "epoch": 1.916397677051243,
      "grad_norm": 3.1875,
      "learning_rate": 2.0066798030068203e-05,
      "loss": 0.8948,
      "step": 546800
    },
    {
      "epoch": 1.9164327245581385,
      "grad_norm": 2.5625,
      "learning_rate": 2.00661490014045e-05,
      "loss": 0.8599,
      "step": 546810
    },
    {
      "epoch": 1.916467772065034,
      "grad_norm": 3.09375,
      "learning_rate": 2.00654999727408e-05,
      "loss": 0.8588,
      "step": 546820
    },
    {
      "epoch": 1.9165028195719298,
      "grad_norm": 2.609375,
      "learning_rate": 2.0064850944077097e-05,
      "loss": 0.8035,
      "step": 546830
    },
    {
      "epoch": 1.9165378670788253,
      "grad_norm": 3.9375,
      "learning_rate": 2.0064201915413395e-05,
      "loss": 0.7829,
      "step": 546840
    },
    {
      "epoch": 1.9165729145857209,
      "grad_norm": 2.96875,
      "learning_rate": 2.0063552886749693e-05,
      "loss": 0.8822,
      "step": 546850
    },
    {
      "epoch": 1.9166079620926166,
      "grad_norm": 2.84375,
      "learning_rate": 2.006290385808599e-05,
      "loss": 0.8635,
      "step": 546860
    },
    {
      "epoch": 1.9166430095995122,
      "grad_norm": 3.265625,
      "learning_rate": 2.0062254829422285e-05,
      "loss": 0.8178,
      "step": 546870
    },
    {
      "epoch": 1.9166780571064077,
      "grad_norm": 3.140625,
      "learning_rate": 2.0061605800758583e-05,
      "loss": 0.7899,
      "step": 546880
    },
    {
      "epoch": 1.9167131046133035,
      "grad_norm": 2.796875,
      "learning_rate": 2.006095677209488e-05,
      "loss": 0.8751,
      "step": 546890
    },
    {
      "epoch": 1.9167481521201988,
      "grad_norm": 3.03125,
      "learning_rate": 2.006030774343118e-05,
      "loss": 0.8022,
      "step": 546900
    },
    {
      "epoch": 1.9167831996270945,
      "grad_norm": 3.046875,
      "learning_rate": 2.005965871476748e-05,
      "loss": 0.9125,
      "step": 546910
    },
    {
      "epoch": 1.91681824713399,
      "grad_norm": 2.90625,
      "learning_rate": 2.005900968610378e-05,
      "loss": 0.7671,
      "step": 546920
    },
    {
      "epoch": 1.9168532946408856,
      "grad_norm": 3.0,
      "learning_rate": 2.0058360657440077e-05,
      "loss": 0.8283,
      "step": 546930
    },
    {
      "epoch": 1.9168883421477814,
      "grad_norm": 3.015625,
      "learning_rate": 2.0057711628776375e-05,
      "loss": 0.8233,
      "step": 546940
    },
    {
      "epoch": 1.916923389654677,
      "grad_norm": 2.890625,
      "learning_rate": 2.0057062600112673e-05,
      "loss": 0.8778,
      "step": 546950
    },
    {
      "epoch": 1.9169584371615724,
      "grad_norm": 3.0,
      "learning_rate": 2.005641357144897e-05,
      "loss": 0.8317,
      "step": 546960
    },
    {
      "epoch": 1.9169934846684682,
      "grad_norm": 2.765625,
      "learning_rate": 2.005576454278527e-05,
      "loss": 0.8981,
      "step": 546970
    },
    {
      "epoch": 1.9170285321753637,
      "grad_norm": 2.578125,
      "learning_rate": 2.0055115514121567e-05,
      "loss": 0.8118,
      "step": 546980
    },
    {
      "epoch": 1.9170635796822593,
      "grad_norm": 2.75,
      "learning_rate": 2.0054466485457865e-05,
      "loss": 0.8062,
      "step": 546990
    },
    {
      "epoch": 1.917098627189155,
      "grad_norm": 3.015625,
      "learning_rate": 2.0053817456794163e-05,
      "loss": 0.8437,
      "step": 547000
    },
    {
      "epoch": 1.9171336746960503,
      "grad_norm": 3.0625,
      "learning_rate": 2.005316842813046e-05,
      "loss": 0.7745,
      "step": 547010
    },
    {
      "epoch": 1.917168722202946,
      "grad_norm": 2.8125,
      "learning_rate": 2.005251939946676e-05,
      "loss": 0.8445,
      "step": 547020
    },
    {
      "epoch": 1.9172037697098419,
      "grad_norm": 2.34375,
      "learning_rate": 2.0051870370803057e-05,
      "loss": 0.7764,
      "step": 547030
    },
    {
      "epoch": 1.9172388172167372,
      "grad_norm": 3.109375,
      "learning_rate": 2.0051221342139355e-05,
      "loss": 0.8255,
      "step": 547040
    },
    {
      "epoch": 1.917273864723633,
      "grad_norm": 3.0625,
      "learning_rate": 2.0050572313475656e-05,
      "loss": 0.8443,
      "step": 547050
    },
    {
      "epoch": 1.9173089122305285,
      "grad_norm": 3.015625,
      "learning_rate": 2.004992328481195e-05,
      "loss": 0.7822,
      "step": 547060
    },
    {
      "epoch": 1.917343959737424,
      "grad_norm": 4.46875,
      "learning_rate": 2.004927425614825e-05,
      "loss": 0.8602,
      "step": 547070
    },
    {
      "epoch": 1.9173790072443198,
      "grad_norm": 2.6875,
      "learning_rate": 2.0048625227484547e-05,
      "loss": 0.9034,
      "step": 547080
    },
    {
      "epoch": 1.9174140547512153,
      "grad_norm": 3.25,
      "learning_rate": 2.0047976198820845e-05,
      "loss": 0.8766,
      "step": 547090
    },
    {
      "epoch": 1.9174491022581108,
      "grad_norm": 3.25,
      "learning_rate": 2.0047327170157143e-05,
      "loss": 0.8072,
      "step": 547100
    },
    {
      "epoch": 1.9174841497650066,
      "grad_norm": 3.140625,
      "learning_rate": 2.004667814149344e-05,
      "loss": 0.8311,
      "step": 547110
    },
    {
      "epoch": 1.917519197271902,
      "grad_norm": 2.90625,
      "learning_rate": 2.004602911282974e-05,
      "loss": 0.7374,
      "step": 547120
    },
    {
      "epoch": 1.9175542447787977,
      "grad_norm": 3.109375,
      "learning_rate": 2.0045380084166037e-05,
      "loss": 0.8085,
      "step": 547130
    },
    {
      "epoch": 1.9175892922856934,
      "grad_norm": 2.71875,
      "learning_rate": 2.0044731055502335e-05,
      "loss": 0.8518,
      "step": 547140
    },
    {
      "epoch": 1.9176243397925887,
      "grad_norm": 3.40625,
      "learning_rate": 2.0044082026838633e-05,
      "loss": 0.8597,
      "step": 547150
    },
    {
      "epoch": 1.9176593872994845,
      "grad_norm": 2.65625,
      "learning_rate": 2.004343299817493e-05,
      "loss": 0.7777,
      "step": 547160
    },
    {
      "epoch": 1.91769443480638,
      "grad_norm": 3.046875,
      "learning_rate": 2.0042783969511232e-05,
      "loss": 0.9286,
      "step": 547170
    },
    {
      "epoch": 1.9177294823132756,
      "grad_norm": 2.765625,
      "learning_rate": 2.004213494084753e-05,
      "loss": 0.7595,
      "step": 547180
    },
    {
      "epoch": 1.9177645298201713,
      "grad_norm": 2.984375,
      "learning_rate": 2.0041485912183828e-05,
      "loss": 0.8853,
      "step": 547190
    },
    {
      "epoch": 1.9177995773270669,
      "grad_norm": 3.625,
      "learning_rate": 2.0040836883520126e-05,
      "loss": 0.8737,
      "step": 547200
    },
    {
      "epoch": 1.9178346248339624,
      "grad_norm": 3.171875,
      "learning_rate": 2.0040187854856424e-05,
      "loss": 0.7798,
      "step": 547210
    },
    {
      "epoch": 1.9178696723408581,
      "grad_norm": 2.65625,
      "learning_rate": 2.0039538826192722e-05,
      "loss": 0.8817,
      "step": 547220
    },
    {
      "epoch": 1.9179047198477537,
      "grad_norm": 2.953125,
      "learning_rate": 2.003888979752902e-05,
      "loss": 0.8099,
      "step": 547230
    },
    {
      "epoch": 1.9179397673546492,
      "grad_norm": 2.921875,
      "learning_rate": 2.0038240768865318e-05,
      "loss": 0.7947,
      "step": 547240
    },
    {
      "epoch": 1.917974814861545,
      "grad_norm": 2.921875,
      "learning_rate": 2.0037591740201613e-05,
      "loss": 0.8469,
      "step": 547250
    },
    {
      "epoch": 1.9180098623684403,
      "grad_norm": 2.765625,
      "learning_rate": 2.003694271153791e-05,
      "loss": 0.8298,
      "step": 547260
    },
    {
      "epoch": 1.918044909875336,
      "grad_norm": 2.75,
      "learning_rate": 2.003629368287421e-05,
      "loss": 0.8454,
      "step": 547270
    },
    {
      "epoch": 1.9180799573822316,
      "grad_norm": 3.234375,
      "learning_rate": 2.003564465421051e-05,
      "loss": 0.8657,
      "step": 547280
    },
    {
      "epoch": 1.9181150048891271,
      "grad_norm": 3.46875,
      "learning_rate": 2.0034995625546808e-05,
      "loss": 0.7771,
      "step": 547290
    },
    {
      "epoch": 1.9181500523960229,
      "grad_norm": 2.953125,
      "learning_rate": 2.0034346596883106e-05,
      "loss": 0.8392,
      "step": 547300
    },
    {
      "epoch": 1.9181850999029184,
      "grad_norm": 2.90625,
      "learning_rate": 2.0033697568219404e-05,
      "loss": 0.8663,
      "step": 547310
    },
    {
      "epoch": 1.918220147409814,
      "grad_norm": 3.03125,
      "learning_rate": 2.0033048539555702e-05,
      "loss": 0.8787,
      "step": 547320
    },
    {
      "epoch": 1.9182551949167097,
      "grad_norm": 2.5625,
      "learning_rate": 2.0032399510892e-05,
      "loss": 0.781,
      "step": 547330
    },
    {
      "epoch": 1.9182902424236052,
      "grad_norm": 3.125,
      "learning_rate": 2.0031750482228298e-05,
      "loss": 0.8046,
      "step": 547340
    },
    {
      "epoch": 1.9183252899305008,
      "grad_norm": 3.171875,
      "learning_rate": 2.0031101453564596e-05,
      "loss": 0.8468,
      "step": 547350
    },
    {
      "epoch": 1.9183603374373965,
      "grad_norm": 3.296875,
      "learning_rate": 2.0030452424900894e-05,
      "loss": 0.8722,
      "step": 547360
    },
    {
      "epoch": 1.9183953849442918,
      "grad_norm": 3.296875,
      "learning_rate": 2.0029803396237192e-05,
      "loss": 0.8446,
      "step": 547370
    },
    {
      "epoch": 1.9184304324511876,
      "grad_norm": 2.921875,
      "learning_rate": 2.002915436757349e-05,
      "loss": 0.7002,
      "step": 547380
    },
    {
      "epoch": 1.9184654799580831,
      "grad_norm": 3.328125,
      "learning_rate": 2.0028505338909788e-05,
      "loss": 0.82,
      "step": 547390
    },
    {
      "epoch": 1.9185005274649787,
      "grad_norm": 2.953125,
      "learning_rate": 2.0027856310246086e-05,
      "loss": 0.8185,
      "step": 547400
    },
    {
      "epoch": 1.9185355749718744,
      "grad_norm": 2.953125,
      "learning_rate": 2.0027207281582384e-05,
      "loss": 0.7622,
      "step": 547410
    },
    {
      "epoch": 1.91857062247877,
      "grad_norm": 2.765625,
      "learning_rate": 2.0026558252918686e-05,
      "loss": 0.7826,
      "step": 547420
    },
    {
      "epoch": 1.9186056699856655,
      "grad_norm": 3.09375,
      "learning_rate": 2.0025909224254984e-05,
      "loss": 0.8006,
      "step": 547430
    },
    {
      "epoch": 1.9186407174925613,
      "grad_norm": 2.859375,
      "learning_rate": 2.0025260195591278e-05,
      "loss": 0.8587,
      "step": 547440
    },
    {
      "epoch": 1.9186757649994568,
      "grad_norm": 2.9375,
      "learning_rate": 2.0024611166927576e-05,
      "loss": 0.8059,
      "step": 547450
    },
    {
      "epoch": 1.9187108125063523,
      "grad_norm": 2.796875,
      "learning_rate": 2.0023962138263874e-05,
      "loss": 0.82,
      "step": 547460
    },
    {
      "epoch": 1.918745860013248,
      "grad_norm": 2.984375,
      "learning_rate": 2.0023313109600172e-05,
      "loss": 0.8574,
      "step": 547470
    },
    {
      "epoch": 1.9187809075201434,
      "grad_norm": 3.125,
      "learning_rate": 2.002266408093647e-05,
      "loss": 0.8381,
      "step": 547480
    },
    {
      "epoch": 1.9188159550270392,
      "grad_norm": 3.015625,
      "learning_rate": 2.0022015052272768e-05,
      "loss": 0.8513,
      "step": 547490
    },
    {
      "epoch": 1.9188510025339347,
      "grad_norm": 2.671875,
      "learning_rate": 2.0021366023609066e-05,
      "loss": 0.7841,
      "step": 547500
    },
    {
      "epoch": 1.9188860500408302,
      "grad_norm": 2.890625,
      "learning_rate": 2.0020716994945364e-05,
      "loss": 0.8333,
      "step": 547510
    },
    {
      "epoch": 1.918921097547726,
      "grad_norm": 2.609375,
      "learning_rate": 2.0020067966281662e-05,
      "loss": 0.7479,
      "step": 547520
    },
    {
      "epoch": 1.9189561450546215,
      "grad_norm": 3.328125,
      "learning_rate": 2.0019418937617964e-05,
      "loss": 0.8107,
      "step": 547530
    },
    {
      "epoch": 1.918991192561517,
      "grad_norm": 3.015625,
      "learning_rate": 2.001876990895426e-05,
      "loss": 0.7677,
      "step": 547540
    },
    {
      "epoch": 1.9190262400684128,
      "grad_norm": 3.0,
      "learning_rate": 2.001812088029056e-05,
      "loss": 0.839,
      "step": 547550
    },
    {
      "epoch": 1.9190612875753084,
      "grad_norm": 3.140625,
      "learning_rate": 2.0017471851626858e-05,
      "loss": 0.8329,
      "step": 547560
    },
    {
      "epoch": 1.919096335082204,
      "grad_norm": 2.671875,
      "learning_rate": 2.0016822822963156e-05,
      "loss": 0.93,
      "step": 547570
    },
    {
      "epoch": 1.9191313825890997,
      "grad_norm": 2.5,
      "learning_rate": 2.0016173794299454e-05,
      "loss": 0.8612,
      "step": 547580
    },
    {
      "epoch": 1.919166430095995,
      "grad_norm": 3.015625,
      "learning_rate": 2.001552476563575e-05,
      "loss": 0.8598,
      "step": 547590
    },
    {
      "epoch": 1.9192014776028907,
      "grad_norm": 3.0625,
      "learning_rate": 2.001487573697205e-05,
      "loss": 0.9113,
      "step": 547600
    },
    {
      "epoch": 1.9192365251097863,
      "grad_norm": 2.859375,
      "learning_rate": 2.0014226708308348e-05,
      "loss": 0.9438,
      "step": 547610
    },
    {
      "epoch": 1.9192715726166818,
      "grad_norm": 2.578125,
      "learning_rate": 2.0013577679644646e-05,
      "loss": 0.8278,
      "step": 547620
    },
    {
      "epoch": 1.9193066201235776,
      "grad_norm": 2.984375,
      "learning_rate": 2.001292865098094e-05,
      "loss": 0.8939,
      "step": 547630
    },
    {
      "epoch": 1.919341667630473,
      "grad_norm": 2.984375,
      "learning_rate": 2.0012279622317238e-05,
      "loss": 0.8391,
      "step": 547640
    },
    {
      "epoch": 1.9193767151373686,
      "grad_norm": 3.03125,
      "learning_rate": 2.001163059365354e-05,
      "loss": 0.8627,
      "step": 547650
    },
    {
      "epoch": 1.9194117626442644,
      "grad_norm": 3.09375,
      "learning_rate": 2.0010981564989838e-05,
      "loss": 0.7878,
      "step": 547660
    },
    {
      "epoch": 1.91944681015116,
      "grad_norm": 2.890625,
      "learning_rate": 2.0010332536326136e-05,
      "loss": 0.8407,
      "step": 547670
    },
    {
      "epoch": 1.9194818576580555,
      "grad_norm": 2.859375,
      "learning_rate": 2.0009683507662434e-05,
      "loss": 0.8764,
      "step": 547680
    },
    {
      "epoch": 1.9195169051649512,
      "grad_norm": 2.609375,
      "learning_rate": 2.000903447899873e-05,
      "loss": 0.8345,
      "step": 547690
    },
    {
      "epoch": 1.9195519526718465,
      "grad_norm": 2.4375,
      "learning_rate": 2.000838545033503e-05,
      "loss": 0.7944,
      "step": 547700
    },
    {
      "epoch": 1.9195870001787423,
      "grad_norm": 2.875,
      "learning_rate": 2.0007736421671328e-05,
      "loss": 0.8267,
      "step": 547710
    },
    {
      "epoch": 1.919622047685638,
      "grad_norm": 2.828125,
      "learning_rate": 2.0007087393007626e-05,
      "loss": 0.8838,
      "step": 547720
    },
    {
      "epoch": 1.9196570951925334,
      "grad_norm": 2.71875,
      "learning_rate": 2.0006438364343924e-05,
      "loss": 0.7432,
      "step": 547730
    },
    {
      "epoch": 1.9196921426994291,
      "grad_norm": 3.578125,
      "learning_rate": 2.000578933568022e-05,
      "loss": 0.8713,
      "step": 547740
    },
    {
      "epoch": 1.9197271902063247,
      "grad_norm": 2.734375,
      "learning_rate": 2.000514030701652e-05,
      "loss": 0.779,
      "step": 547750
    },
    {
      "epoch": 1.9197622377132202,
      "grad_norm": 3.359375,
      "learning_rate": 2.0004491278352818e-05,
      "loss": 0.8974,
      "step": 547760
    },
    {
      "epoch": 1.919797285220116,
      "grad_norm": 2.53125,
      "learning_rate": 2.0003842249689116e-05,
      "loss": 0.7739,
      "step": 547770
    },
    {
      "epoch": 1.9198323327270115,
      "grad_norm": 3.28125,
      "learning_rate": 2.0003193221025414e-05,
      "loss": 0.8751,
      "step": 547780
    },
    {
      "epoch": 1.919867380233907,
      "grad_norm": 3.125,
      "learning_rate": 2.0002544192361715e-05,
      "loss": 0.873,
      "step": 547790
    },
    {
      "epoch": 1.9199024277408028,
      "grad_norm": 2.703125,
      "learning_rate": 2.0001895163698013e-05,
      "loss": 0.8291,
      "step": 547800
    },
    {
      "epoch": 1.919937475247698,
      "grad_norm": 2.375,
      "learning_rate": 2.000124613503431e-05,
      "loss": 0.7494,
      "step": 547810
    },
    {
      "epoch": 1.9199725227545938,
      "grad_norm": 2.578125,
      "learning_rate": 2.0000597106370606e-05,
      "loss": 0.7875,
      "step": 547820
    },
    {
      "epoch": 1.9200075702614896,
      "grad_norm": 2.75,
      "learning_rate": 1.9999948077706904e-05,
      "loss": 0.7629,
      "step": 547830
    },
    {
      "epoch": 1.920042617768385,
      "grad_norm": 2.859375,
      "learning_rate": 1.99992990490432e-05,
      "loss": 0.8291,
      "step": 547840
    },
    {
      "epoch": 1.9200776652752807,
      "grad_norm": 2.609375,
      "learning_rate": 1.99986500203795e-05,
      "loss": 0.7955,
      "step": 547850
    },
    {
      "epoch": 1.9201127127821762,
      "grad_norm": 3.03125,
      "learning_rate": 1.9998000991715798e-05,
      "loss": 0.9027,
      "step": 547860
    },
    {
      "epoch": 1.9201477602890717,
      "grad_norm": 2.828125,
      "learning_rate": 1.9997351963052096e-05,
      "loss": 0.8071,
      "step": 547870
    },
    {
      "epoch": 1.9201828077959675,
      "grad_norm": 2.5625,
      "learning_rate": 1.9996702934388394e-05,
      "loss": 0.8463,
      "step": 547880
    },
    {
      "epoch": 1.920217855302863,
      "grad_norm": 2.96875,
      "learning_rate": 1.999605390572469e-05,
      "loss": 0.8404,
      "step": 547890
    },
    {
      "epoch": 1.9202529028097586,
      "grad_norm": 3.3125,
      "learning_rate": 1.9995404877060993e-05,
      "loss": 0.9318,
      "step": 547900
    },
    {
      "epoch": 1.9202879503166543,
      "grad_norm": 2.953125,
      "learning_rate": 1.999475584839729e-05,
      "loss": 0.8989,
      "step": 547910
    },
    {
      "epoch": 1.9203229978235499,
      "grad_norm": 3.140625,
      "learning_rate": 1.999410681973359e-05,
      "loss": 0.8729,
      "step": 547920
    },
    {
      "epoch": 1.9203580453304454,
      "grad_norm": 2.703125,
      "learning_rate": 1.9993457791069887e-05,
      "loss": 0.8165,
      "step": 547930
    },
    {
      "epoch": 1.9203930928373412,
      "grad_norm": 3.078125,
      "learning_rate": 1.9992808762406185e-05,
      "loss": 0.8849,
      "step": 547940
    },
    {
      "epoch": 1.9204281403442365,
      "grad_norm": 2.515625,
      "learning_rate": 1.9992159733742483e-05,
      "loss": 0.8246,
      "step": 547950
    },
    {
      "epoch": 1.9204631878511322,
      "grad_norm": 3.296875,
      "learning_rate": 1.999151070507878e-05,
      "loss": 0.871,
      "step": 547960
    },
    {
      "epoch": 1.9204982353580278,
      "grad_norm": 2.9375,
      "learning_rate": 1.999086167641508e-05,
      "loss": 0.8373,
      "step": 547970
    },
    {
      "epoch": 1.9205332828649233,
      "grad_norm": 2.84375,
      "learning_rate": 1.9990212647751377e-05,
      "loss": 0.9142,
      "step": 547980
    },
    {
      "epoch": 1.920568330371819,
      "grad_norm": 3.109375,
      "learning_rate": 1.9989563619087675e-05,
      "loss": 0.8463,
      "step": 547990
    },
    {
      "epoch": 1.9206033778787146,
      "grad_norm": 2.640625,
      "learning_rate": 1.998891459042397e-05,
      "loss": 0.854,
      "step": 548000
    },
    {
      "epoch": 1.9206384253856101,
      "grad_norm": 2.609375,
      "learning_rate": 1.998826556176027e-05,
      "loss": 0.8884,
      "step": 548010
    },
    {
      "epoch": 1.920673472892506,
      "grad_norm": 2.515625,
      "learning_rate": 1.998761653309657e-05,
      "loss": 0.8097,
      "step": 548020
    },
    {
      "epoch": 1.9207085203994014,
      "grad_norm": 2.796875,
      "learning_rate": 1.9986967504432867e-05,
      "loss": 0.8214,
      "step": 548030
    },
    {
      "epoch": 1.920743567906297,
      "grad_norm": 3.0625,
      "learning_rate": 1.9986318475769165e-05,
      "loss": 0.8111,
      "step": 548040
    },
    {
      "epoch": 1.9207786154131927,
      "grad_norm": 3.4375,
      "learning_rate": 1.9985669447105463e-05,
      "loss": 0.8078,
      "step": 548050
    },
    {
      "epoch": 1.920813662920088,
      "grad_norm": 2.796875,
      "learning_rate": 1.998502041844176e-05,
      "loss": 0.8537,
      "step": 548060
    },
    {
      "epoch": 1.9208487104269838,
      "grad_norm": 2.65625,
      "learning_rate": 1.998437138977806e-05,
      "loss": 0.787,
      "step": 548070
    },
    {
      "epoch": 1.9208837579338793,
      "grad_norm": 2.8125,
      "learning_rate": 1.9983722361114357e-05,
      "loss": 0.8513,
      "step": 548080
    },
    {
      "epoch": 1.9209188054407749,
      "grad_norm": 2.609375,
      "learning_rate": 1.9983073332450655e-05,
      "loss": 0.8234,
      "step": 548090
    },
    {
      "epoch": 1.9209538529476706,
      "grad_norm": 3.0625,
      "learning_rate": 1.9982424303786953e-05,
      "loss": 0.8404,
      "step": 548100
    },
    {
      "epoch": 1.9209889004545662,
      "grad_norm": 3.53125,
      "learning_rate": 1.998177527512325e-05,
      "loss": 0.8441,
      "step": 548110
    },
    {
      "epoch": 1.9210239479614617,
      "grad_norm": 2.828125,
      "learning_rate": 1.998112624645955e-05,
      "loss": 0.8058,
      "step": 548120
    },
    {
      "epoch": 1.9210589954683575,
      "grad_norm": 2.78125,
      "learning_rate": 1.9980477217795847e-05,
      "loss": 0.8146,
      "step": 548130
    },
    {
      "epoch": 1.921094042975253,
      "grad_norm": 2.890625,
      "learning_rate": 1.9979828189132145e-05,
      "loss": 0.764,
      "step": 548140
    },
    {
      "epoch": 1.9211290904821485,
      "grad_norm": 3.40625,
      "learning_rate": 1.9979179160468446e-05,
      "loss": 0.8194,
      "step": 548150
    },
    {
      "epoch": 1.9211641379890443,
      "grad_norm": 3.25,
      "learning_rate": 1.9978530131804744e-05,
      "loss": 0.855,
      "step": 548160
    },
    {
      "epoch": 1.9211991854959396,
      "grad_norm": 2.703125,
      "learning_rate": 1.9977881103141042e-05,
      "loss": 0.8403,
      "step": 548170
    },
    {
      "epoch": 1.9212342330028354,
      "grad_norm": 2.921875,
      "learning_rate": 1.997723207447734e-05,
      "loss": 0.7706,
      "step": 548180
    },
    {
      "epoch": 1.921269280509731,
      "grad_norm": 2.640625,
      "learning_rate": 1.9976583045813635e-05,
      "loss": 0.8643,
      "step": 548190
    },
    {
      "epoch": 1.9213043280166264,
      "grad_norm": 2.546875,
      "learning_rate": 1.9975934017149933e-05,
      "loss": 0.8551,
      "step": 548200
    },
    {
      "epoch": 1.9213393755235222,
      "grad_norm": 2.609375,
      "learning_rate": 1.997528498848623e-05,
      "loss": 0.8865,
      "step": 548210
    },
    {
      "epoch": 1.9213744230304177,
      "grad_norm": 3.125,
      "learning_rate": 1.997463595982253e-05,
      "loss": 0.9213,
      "step": 548220
    },
    {
      "epoch": 1.9214094705373133,
      "grad_norm": 2.65625,
      "learning_rate": 1.9973986931158827e-05,
      "loss": 0.862,
      "step": 548230
    },
    {
      "epoch": 1.921444518044209,
      "grad_norm": 2.890625,
      "learning_rate": 1.9973337902495125e-05,
      "loss": 0.8612,
      "step": 548240
    },
    {
      "epoch": 1.9214795655511046,
      "grad_norm": 2.75,
      "learning_rate": 1.9972688873831423e-05,
      "loss": 0.7585,
      "step": 548250
    },
    {
      "epoch": 1.921514613058,
      "grad_norm": 2.953125,
      "learning_rate": 1.997203984516772e-05,
      "loss": 0.8235,
      "step": 548260
    },
    {
      "epoch": 1.9215496605648958,
      "grad_norm": 2.734375,
      "learning_rate": 1.9971390816504022e-05,
      "loss": 0.814,
      "step": 548270
    },
    {
      "epoch": 1.9215847080717912,
      "grad_norm": 2.640625,
      "learning_rate": 1.997074178784032e-05,
      "loss": 0.807,
      "step": 548280
    },
    {
      "epoch": 1.921619755578687,
      "grad_norm": 2.8125,
      "learning_rate": 1.997009275917662e-05,
      "loss": 0.8381,
      "step": 548290
    },
    {
      "epoch": 1.9216548030855825,
      "grad_norm": 2.765625,
      "learning_rate": 1.9969443730512916e-05,
      "loss": 0.8149,
      "step": 548300
    },
    {
      "epoch": 1.921689850592478,
      "grad_norm": 3.0,
      "learning_rate": 1.9968794701849214e-05,
      "loss": 0.8479,
      "step": 548310
    },
    {
      "epoch": 1.9217248980993737,
      "grad_norm": 2.890625,
      "learning_rate": 1.9968145673185512e-05,
      "loss": 0.7816,
      "step": 548320
    },
    {
      "epoch": 1.9217599456062693,
      "grad_norm": 3.046875,
      "learning_rate": 1.996749664452181e-05,
      "loss": 0.8606,
      "step": 548330
    },
    {
      "epoch": 1.9217949931131648,
      "grad_norm": 2.46875,
      "learning_rate": 1.996684761585811e-05,
      "loss": 0.7472,
      "step": 548340
    },
    {
      "epoch": 1.9218300406200606,
      "grad_norm": 2.859375,
      "learning_rate": 1.9966198587194406e-05,
      "loss": 0.7439,
      "step": 548350
    },
    {
      "epoch": 1.9218650881269561,
      "grad_norm": 3.765625,
      "learning_rate": 1.9965549558530704e-05,
      "loss": 0.8641,
      "step": 548360
    },
    {
      "epoch": 1.9219001356338516,
      "grad_norm": 2.75,
      "learning_rate": 1.9964900529867002e-05,
      "loss": 0.8688,
      "step": 548370
    },
    {
      "epoch": 1.9219351831407474,
      "grad_norm": 2.796875,
      "learning_rate": 1.99642515012033e-05,
      "loss": 0.8131,
      "step": 548380
    },
    {
      "epoch": 1.9219702306476427,
      "grad_norm": 2.859375,
      "learning_rate": 1.99636024725396e-05,
      "loss": 0.8552,
      "step": 548390
    },
    {
      "epoch": 1.9220052781545385,
      "grad_norm": 2.9375,
      "learning_rate": 1.9962953443875896e-05,
      "loss": 0.816,
      "step": 548400
    },
    {
      "epoch": 1.9220403256614342,
      "grad_norm": 3.09375,
      "learning_rate": 1.9962304415212194e-05,
      "loss": 0.7608,
      "step": 548410
    },
    {
      "epoch": 1.9220753731683295,
      "grad_norm": 2.96875,
      "learning_rate": 1.9961655386548492e-05,
      "loss": 0.8865,
      "step": 548420
    },
    {
      "epoch": 1.9221104206752253,
      "grad_norm": 3.25,
      "learning_rate": 1.996100635788479e-05,
      "loss": 0.8706,
      "step": 548430
    },
    {
      "epoch": 1.9221454681821208,
      "grad_norm": 3.078125,
      "learning_rate": 1.996035732922109e-05,
      "loss": 0.8934,
      "step": 548440
    },
    {
      "epoch": 1.9221805156890164,
      "grad_norm": 3.296875,
      "learning_rate": 1.9959708300557386e-05,
      "loss": 0.797,
      "step": 548450
    },
    {
      "epoch": 1.9222155631959121,
      "grad_norm": 2.859375,
      "learning_rate": 1.9959059271893684e-05,
      "loss": 0.869,
      "step": 548460
    },
    {
      "epoch": 1.9222506107028077,
      "grad_norm": 2.859375,
      "learning_rate": 1.9958410243229982e-05,
      "loss": 0.8086,
      "step": 548470
    },
    {
      "epoch": 1.9222856582097032,
      "grad_norm": 2.71875,
      "learning_rate": 1.995776121456628e-05,
      "loss": 0.8365,
      "step": 548480
    },
    {
      "epoch": 1.922320705716599,
      "grad_norm": 3.0625,
      "learning_rate": 1.995711218590258e-05,
      "loss": 0.836,
      "step": 548490
    },
    {
      "epoch": 1.9223557532234945,
      "grad_norm": 3.0625,
      "learning_rate": 1.9956463157238876e-05,
      "loss": 0.7985,
      "step": 548500
    },
    {
      "epoch": 1.92239080073039,
      "grad_norm": 2.890625,
      "learning_rate": 1.9955814128575174e-05,
      "loss": 0.8037,
      "step": 548510
    },
    {
      "epoch": 1.9224258482372858,
      "grad_norm": 2.953125,
      "learning_rate": 1.9955165099911476e-05,
      "loss": 0.8055,
      "step": 548520
    },
    {
      "epoch": 1.922460895744181,
      "grad_norm": 2.953125,
      "learning_rate": 1.9954516071247774e-05,
      "loss": 0.8457,
      "step": 548530
    },
    {
      "epoch": 1.9224959432510769,
      "grad_norm": 2.78125,
      "learning_rate": 1.9953867042584072e-05,
      "loss": 0.8067,
      "step": 548540
    },
    {
      "epoch": 1.9225309907579724,
      "grad_norm": 2.609375,
      "learning_rate": 1.995321801392037e-05,
      "loss": 0.7294,
      "step": 548550
    },
    {
      "epoch": 1.922566038264868,
      "grad_norm": 2.734375,
      "learning_rate": 1.9952568985256668e-05,
      "loss": 0.847,
      "step": 548560
    },
    {
      "epoch": 1.9226010857717637,
      "grad_norm": 2.859375,
      "learning_rate": 1.9951919956592962e-05,
      "loss": 0.8138,
      "step": 548570
    },
    {
      "epoch": 1.9226361332786592,
      "grad_norm": 2.46875,
      "learning_rate": 1.995127092792926e-05,
      "loss": 0.8913,
      "step": 548580
    },
    {
      "epoch": 1.9226711807855548,
      "grad_norm": 2.9375,
      "learning_rate": 1.995062189926556e-05,
      "loss": 0.8599,
      "step": 548590
    },
    {
      "epoch": 1.9227062282924505,
      "grad_norm": 3.015625,
      "learning_rate": 1.9949972870601856e-05,
      "loss": 0.8016,
      "step": 548600
    },
    {
      "epoch": 1.922741275799346,
      "grad_norm": 3.1875,
      "learning_rate": 1.9949323841938154e-05,
      "loss": 0.8234,
      "step": 548610
    },
    {
      "epoch": 1.9227763233062416,
      "grad_norm": 2.53125,
      "learning_rate": 1.9948674813274452e-05,
      "loss": 0.7302,
      "step": 548620
    },
    {
      "epoch": 1.9228113708131374,
      "grad_norm": 3.6875,
      "learning_rate": 1.9948025784610754e-05,
      "loss": 0.9107,
      "step": 548630
    },
    {
      "epoch": 1.9228464183200327,
      "grad_norm": 3.0,
      "learning_rate": 1.9947376755947052e-05,
      "loss": 0.8937,
      "step": 548640
    },
    {
      "epoch": 1.9228814658269284,
      "grad_norm": 4.8125,
      "learning_rate": 1.994672772728335e-05,
      "loss": 0.8094,
      "step": 548650
    },
    {
      "epoch": 1.922916513333824,
      "grad_norm": 3.40625,
      "learning_rate": 1.9946078698619648e-05,
      "loss": 0.8613,
      "step": 548660
    },
    {
      "epoch": 1.9229515608407195,
      "grad_norm": 2.875,
      "learning_rate": 1.9945429669955946e-05,
      "loss": 0.9058,
      "step": 548670
    },
    {
      "epoch": 1.9229866083476153,
      "grad_norm": 2.890625,
      "learning_rate": 1.9944780641292244e-05,
      "loss": 0.7922,
      "step": 548680
    },
    {
      "epoch": 1.9230216558545108,
      "grad_norm": 2.703125,
      "learning_rate": 1.9944131612628542e-05,
      "loss": 0.777,
      "step": 548690
    },
    {
      "epoch": 1.9230567033614063,
      "grad_norm": 2.40625,
      "learning_rate": 1.994348258396484e-05,
      "loss": 0.8373,
      "step": 548700
    },
    {
      "epoch": 1.923091750868302,
      "grad_norm": 3.375,
      "learning_rate": 1.9942833555301138e-05,
      "loss": 0.8366,
      "step": 548710
    },
    {
      "epoch": 1.9231267983751976,
      "grad_norm": 2.6875,
      "learning_rate": 1.9942184526637436e-05,
      "loss": 0.7487,
      "step": 548720
    },
    {
      "epoch": 1.9231618458820932,
      "grad_norm": 2.6875,
      "learning_rate": 1.9941535497973734e-05,
      "loss": 0.7989,
      "step": 548730
    },
    {
      "epoch": 1.923196893388989,
      "grad_norm": 2.609375,
      "learning_rate": 1.9940886469310032e-05,
      "loss": 0.7611,
      "step": 548740
    },
    {
      "epoch": 1.9232319408958842,
      "grad_norm": 2.890625,
      "learning_rate": 1.994023744064633e-05,
      "loss": 0.7519,
      "step": 548750
    },
    {
      "epoch": 1.92326698840278,
      "grad_norm": 3.265625,
      "learning_rate": 1.9939588411982628e-05,
      "loss": 0.8676,
      "step": 548760
    },
    {
      "epoch": 1.9233020359096755,
      "grad_norm": 2.859375,
      "learning_rate": 1.9938939383318926e-05,
      "loss": 0.8604,
      "step": 548770
    },
    {
      "epoch": 1.923337083416571,
      "grad_norm": 3.078125,
      "learning_rate": 1.9938290354655224e-05,
      "loss": 0.7912,
      "step": 548780
    },
    {
      "epoch": 1.9233721309234668,
      "grad_norm": 3.53125,
      "learning_rate": 1.9937641325991522e-05,
      "loss": 0.9187,
      "step": 548790
    },
    {
      "epoch": 1.9234071784303624,
      "grad_norm": 3.046875,
      "learning_rate": 1.993699229732782e-05,
      "loss": 0.9081,
      "step": 548800
    },
    {
      "epoch": 1.9234422259372579,
      "grad_norm": 2.828125,
      "learning_rate": 1.9936343268664118e-05,
      "loss": 0.9404,
      "step": 548810
    },
    {
      "epoch": 1.9234772734441536,
      "grad_norm": 2.515625,
      "learning_rate": 1.9935694240000416e-05,
      "loss": 0.7015,
      "step": 548820
    },
    {
      "epoch": 1.9235123209510492,
      "grad_norm": 2.734375,
      "learning_rate": 1.9935045211336714e-05,
      "loss": 0.801,
      "step": 548830
    },
    {
      "epoch": 1.9235473684579447,
      "grad_norm": 2.5625,
      "learning_rate": 1.9934396182673012e-05,
      "loss": 0.7867,
      "step": 548840
    },
    {
      "epoch": 1.9235824159648405,
      "grad_norm": 2.75,
      "learning_rate": 1.993374715400931e-05,
      "loss": 0.803,
      "step": 548850
    },
    {
      "epoch": 1.9236174634717358,
      "grad_norm": 2.796875,
      "learning_rate": 1.9933098125345608e-05,
      "loss": 0.8348,
      "step": 548860
    },
    {
      "epoch": 1.9236525109786315,
      "grad_norm": 2.734375,
      "learning_rate": 1.9932449096681906e-05,
      "loss": 0.8014,
      "step": 548870
    },
    {
      "epoch": 1.923687558485527,
      "grad_norm": 3.15625,
      "learning_rate": 1.9931800068018207e-05,
      "loss": 0.8212,
      "step": 548880
    },
    {
      "epoch": 1.9237226059924226,
      "grad_norm": 3.203125,
      "learning_rate": 1.9931151039354505e-05,
      "loss": 0.7998,
      "step": 548890
    },
    {
      "epoch": 1.9237576534993184,
      "grad_norm": 2.515625,
      "learning_rate": 1.9930502010690803e-05,
      "loss": 0.7703,
      "step": 548900
    },
    {
      "epoch": 1.923792701006214,
      "grad_norm": 2.421875,
      "learning_rate": 1.99298529820271e-05,
      "loss": 0.7909,
      "step": 548910
    },
    {
      "epoch": 1.9238277485131094,
      "grad_norm": 2.78125,
      "learning_rate": 1.99292039533634e-05,
      "loss": 0.8423,
      "step": 548920
    },
    {
      "epoch": 1.9238627960200052,
      "grad_norm": 3.375,
      "learning_rate": 1.9928554924699697e-05,
      "loss": 0.7758,
      "step": 548930
    },
    {
      "epoch": 1.9238978435269007,
      "grad_norm": 3.171875,
      "learning_rate": 1.9927905896035992e-05,
      "loss": 0.8339,
      "step": 548940
    },
    {
      "epoch": 1.9239328910337963,
      "grad_norm": 3.375,
      "learning_rate": 1.992725686737229e-05,
      "loss": 0.7532,
      "step": 548950
    },
    {
      "epoch": 1.923967938540692,
      "grad_norm": 3.1875,
      "learning_rate": 1.9926607838708588e-05,
      "loss": 0.803,
      "step": 548960
    },
    {
      "epoch": 1.9240029860475873,
      "grad_norm": 3.171875,
      "learning_rate": 1.9925958810044886e-05,
      "loss": 0.8746,
      "step": 548970
    },
    {
      "epoch": 1.924038033554483,
      "grad_norm": 2.796875,
      "learning_rate": 1.9925309781381184e-05,
      "loss": 0.8323,
      "step": 548980
    },
    {
      "epoch": 1.9240730810613789,
      "grad_norm": 3.25,
      "learning_rate": 1.9924660752717482e-05,
      "loss": 0.8898,
      "step": 548990
    },
    {
      "epoch": 1.9241081285682742,
      "grad_norm": 2.75,
      "learning_rate": 1.9924011724053783e-05,
      "loss": 0.8419,
      "step": 549000
    },
    {
      "epoch": 1.92414317607517,
      "grad_norm": 3.046875,
      "learning_rate": 1.992336269539008e-05,
      "loss": 0.8036,
      "step": 549010
    },
    {
      "epoch": 1.9241782235820655,
      "grad_norm": 2.796875,
      "learning_rate": 1.992271366672638e-05,
      "loss": 0.8019,
      "step": 549020
    },
    {
      "epoch": 1.924213271088961,
      "grad_norm": 3.53125,
      "learning_rate": 1.9922064638062677e-05,
      "loss": 0.8933,
      "step": 549030
    },
    {
      "epoch": 1.9242483185958568,
      "grad_norm": 2.71875,
      "learning_rate": 1.9921415609398975e-05,
      "loss": 0.8058,
      "step": 549040
    },
    {
      "epoch": 1.9242833661027523,
      "grad_norm": 2.53125,
      "learning_rate": 1.9920766580735273e-05,
      "loss": 0.7985,
      "step": 549050
    },
    {
      "epoch": 1.9243184136096478,
      "grad_norm": 3.078125,
      "learning_rate": 1.992011755207157e-05,
      "loss": 0.8263,
      "step": 549060
    },
    {
      "epoch": 1.9243534611165436,
      "grad_norm": 3.25,
      "learning_rate": 1.991946852340787e-05,
      "loss": 0.8629,
      "step": 549070
    },
    {
      "epoch": 1.924388508623439,
      "grad_norm": 2.859375,
      "learning_rate": 1.9918819494744167e-05,
      "loss": 0.8554,
      "step": 549080
    },
    {
      "epoch": 1.9244235561303347,
      "grad_norm": 2.6875,
      "learning_rate": 1.9918170466080465e-05,
      "loss": 0.8119,
      "step": 549090
    },
    {
      "epoch": 1.9244586036372304,
      "grad_norm": 2.828125,
      "learning_rate": 1.9917521437416763e-05,
      "loss": 0.8414,
      "step": 549100
    },
    {
      "epoch": 1.9244936511441257,
      "grad_norm": 3.0,
      "learning_rate": 1.991687240875306e-05,
      "loss": 0.7904,
      "step": 549110
    },
    {
      "epoch": 1.9245286986510215,
      "grad_norm": 2.953125,
      "learning_rate": 1.991622338008936e-05,
      "loss": 0.7559,
      "step": 549120
    },
    {
      "epoch": 1.924563746157917,
      "grad_norm": 2.953125,
      "learning_rate": 1.9915574351425657e-05,
      "loss": 0.7686,
      "step": 549130
    },
    {
      "epoch": 1.9245987936648126,
      "grad_norm": 3.078125,
      "learning_rate": 1.9914925322761955e-05,
      "loss": 0.8195,
      "step": 549140
    },
    {
      "epoch": 1.9246338411717083,
      "grad_norm": 2.8125,
      "learning_rate": 1.9914276294098253e-05,
      "loss": 0.7267,
      "step": 549150
    },
    {
      "epoch": 1.9246688886786039,
      "grad_norm": 2.765625,
      "learning_rate": 1.991362726543455e-05,
      "loss": 0.7942,
      "step": 549160
    },
    {
      "epoch": 1.9247039361854994,
      "grad_norm": 2.65625,
      "learning_rate": 1.991297823677085e-05,
      "loss": 0.7983,
      "step": 549170
    },
    {
      "epoch": 1.9247389836923952,
      "grad_norm": 2.328125,
      "learning_rate": 1.9912329208107147e-05,
      "loss": 0.7961,
      "step": 549180
    },
    {
      "epoch": 1.9247740311992907,
      "grad_norm": 3.0625,
      "learning_rate": 1.9911680179443445e-05,
      "loss": 0.8167,
      "step": 549190
    },
    {
      "epoch": 1.9248090787061862,
      "grad_norm": 2.890625,
      "learning_rate": 1.9911031150779743e-05,
      "loss": 0.8106,
      "step": 549200
    },
    {
      "epoch": 1.924844126213082,
      "grad_norm": 2.84375,
      "learning_rate": 1.991038212211604e-05,
      "loss": 0.8176,
      "step": 549210
    },
    {
      "epoch": 1.9248791737199773,
      "grad_norm": 2.609375,
      "learning_rate": 1.990973309345234e-05,
      "loss": 0.9009,
      "step": 549220
    },
    {
      "epoch": 1.924914221226873,
      "grad_norm": 2.875,
      "learning_rate": 1.9909084064788637e-05,
      "loss": 0.8193,
      "step": 549230
    },
    {
      "epoch": 1.9249492687337686,
      "grad_norm": 2.984375,
      "learning_rate": 1.9908435036124935e-05,
      "loss": 0.8298,
      "step": 549240
    },
    {
      "epoch": 1.9249843162406641,
      "grad_norm": 2.8125,
      "learning_rate": 1.9907786007461237e-05,
      "loss": 0.7752,
      "step": 549250
    },
    {
      "epoch": 1.9250193637475599,
      "grad_norm": 2.921875,
      "learning_rate": 1.9907136978797535e-05,
      "loss": 0.7981,
      "step": 549260
    },
    {
      "epoch": 1.9250544112544554,
      "grad_norm": 2.78125,
      "learning_rate": 1.9906487950133833e-05,
      "loss": 0.8209,
      "step": 549270
    },
    {
      "epoch": 1.925089458761351,
      "grad_norm": 2.6875,
      "learning_rate": 1.990583892147013e-05,
      "loss": 0.8088,
      "step": 549280
    },
    {
      "epoch": 1.9251245062682467,
      "grad_norm": 3.125,
      "learning_rate": 1.990518989280643e-05,
      "loss": 0.7741,
      "step": 549290
    },
    {
      "epoch": 1.9251595537751423,
      "grad_norm": 2.984375,
      "learning_rate": 1.9904540864142727e-05,
      "loss": 0.8095,
      "step": 549300
    },
    {
      "epoch": 1.9251946012820378,
      "grad_norm": 3.234375,
      "learning_rate": 1.9903891835479025e-05,
      "loss": 0.8928,
      "step": 549310
    },
    {
      "epoch": 1.9252296487889335,
      "grad_norm": 2.953125,
      "learning_rate": 1.990324280681532e-05,
      "loss": 0.8522,
      "step": 549320
    },
    {
      "epoch": 1.9252646962958289,
      "grad_norm": 2.890625,
      "learning_rate": 1.9902593778151617e-05,
      "loss": 0.7585,
      "step": 549330
    },
    {
      "epoch": 1.9252997438027246,
      "grad_norm": 3.046875,
      "learning_rate": 1.9901944749487915e-05,
      "loss": 0.9088,
      "step": 549340
    },
    {
      "epoch": 1.9253347913096202,
      "grad_norm": 2.9375,
      "learning_rate": 1.9901295720824213e-05,
      "loss": 0.8491,
      "step": 549350
    },
    {
      "epoch": 1.9253698388165157,
      "grad_norm": 2.703125,
      "learning_rate": 1.990064669216051e-05,
      "loss": 0.7561,
      "step": 549360
    },
    {
      "epoch": 1.9254048863234114,
      "grad_norm": 2.6875,
      "learning_rate": 1.9899997663496813e-05,
      "loss": 0.8293,
      "step": 549370
    },
    {
      "epoch": 1.925439933830307,
      "grad_norm": 3.21875,
      "learning_rate": 1.989934863483311e-05,
      "loss": 0.8522,
      "step": 549380
    },
    {
      "epoch": 1.9254749813372025,
      "grad_norm": 2.765625,
      "learning_rate": 1.989869960616941e-05,
      "loss": 0.8591,
      "step": 549390
    },
    {
      "epoch": 1.9255100288440983,
      "grad_norm": 3.078125,
      "learning_rate": 1.9898050577505707e-05,
      "loss": 0.7992,
      "step": 549400
    },
    {
      "epoch": 1.9255450763509938,
      "grad_norm": 3.359375,
      "learning_rate": 1.9897401548842005e-05,
      "loss": 0.7905,
      "step": 549410
    },
    {
      "epoch": 1.9255801238578893,
      "grad_norm": 2.546875,
      "learning_rate": 1.9896752520178303e-05,
      "loss": 0.8418,
      "step": 549420
    },
    {
      "epoch": 1.925615171364785,
      "grad_norm": 2.796875,
      "learning_rate": 1.98961034915146e-05,
      "loss": 0.8497,
      "step": 549430
    },
    {
      "epoch": 1.9256502188716804,
      "grad_norm": 3.171875,
      "learning_rate": 1.98954544628509e-05,
      "loss": 0.8211,
      "step": 549440
    },
    {
      "epoch": 1.9256852663785762,
      "grad_norm": 3.1875,
      "learning_rate": 1.9894805434187197e-05,
      "loss": 0.8276,
      "step": 549450
    },
    {
      "epoch": 1.9257203138854717,
      "grad_norm": 3.09375,
      "learning_rate": 1.9894156405523495e-05,
      "loss": 0.7932,
      "step": 549460
    },
    {
      "epoch": 1.9257553613923672,
      "grad_norm": 2.375,
      "learning_rate": 1.9893507376859793e-05,
      "loss": 0.8602,
      "step": 549470
    },
    {
      "epoch": 1.925790408899263,
      "grad_norm": 2.75,
      "learning_rate": 1.989285834819609e-05,
      "loss": 0.8341,
      "step": 549480
    },
    {
      "epoch": 1.9258254564061585,
      "grad_norm": 3.078125,
      "learning_rate": 1.989220931953239e-05,
      "loss": 0.8383,
      "step": 549490
    },
    {
      "epoch": 1.925860503913054,
      "grad_norm": 3.328125,
      "learning_rate": 1.989156029086869e-05,
      "loss": 0.8322,
      "step": 549500
    },
    {
      "epoch": 1.9258955514199498,
      "grad_norm": 3.171875,
      "learning_rate": 1.9890911262204985e-05,
      "loss": 0.7855,
      "step": 549510
    },
    {
      "epoch": 1.9259305989268454,
      "grad_norm": 2.734375,
      "learning_rate": 1.9890262233541283e-05,
      "loss": 0.788,
      "step": 549520
    },
    {
      "epoch": 1.925965646433741,
      "grad_norm": 2.765625,
      "learning_rate": 1.988961320487758e-05,
      "loss": 0.8457,
      "step": 549530
    },
    {
      "epoch": 1.9260006939406367,
      "grad_norm": 2.546875,
      "learning_rate": 1.988896417621388e-05,
      "loss": 0.8778,
      "step": 549540
    },
    {
      "epoch": 1.926035741447532,
      "grad_norm": 2.984375,
      "learning_rate": 1.9888315147550177e-05,
      "loss": 0.8591,
      "step": 549550
    },
    {
      "epoch": 1.9260707889544277,
      "grad_norm": 3.03125,
      "learning_rate": 1.9887666118886475e-05,
      "loss": 0.8515,
      "step": 549560
    },
    {
      "epoch": 1.9261058364613233,
      "grad_norm": 2.921875,
      "learning_rate": 1.9887017090222773e-05,
      "loss": 0.8585,
      "step": 549570
    },
    {
      "epoch": 1.9261408839682188,
      "grad_norm": 2.828125,
      "learning_rate": 1.988636806155907e-05,
      "loss": 0.8208,
      "step": 549580
    },
    {
      "epoch": 1.9261759314751146,
      "grad_norm": 2.953125,
      "learning_rate": 1.988571903289537e-05,
      "loss": 0.86,
      "step": 549590
    },
    {
      "epoch": 1.92621097898201,
      "grad_norm": 2.796875,
      "learning_rate": 1.9885070004231667e-05,
      "loss": 0.7791,
      "step": 549600
    },
    {
      "epoch": 1.9262460264889056,
      "grad_norm": 2.90625,
      "learning_rate": 1.9884420975567965e-05,
      "loss": 0.8057,
      "step": 549610
    },
    {
      "epoch": 1.9262810739958014,
      "grad_norm": 2.78125,
      "learning_rate": 1.9883771946904266e-05,
      "loss": 0.8021,
      "step": 549620
    },
    {
      "epoch": 1.926316121502697,
      "grad_norm": 3.203125,
      "learning_rate": 1.9883122918240564e-05,
      "loss": 0.8426,
      "step": 549630
    },
    {
      "epoch": 1.9263511690095925,
      "grad_norm": 2.671875,
      "learning_rate": 1.9882473889576862e-05,
      "loss": 0.8284,
      "step": 549640
    },
    {
      "epoch": 1.9263862165164882,
      "grad_norm": 2.8125,
      "learning_rate": 1.988182486091316e-05,
      "loss": 0.8744,
      "step": 549650
    },
    {
      "epoch": 1.9264212640233835,
      "grad_norm": 2.40625,
      "learning_rate": 1.9881175832249458e-05,
      "loss": 0.8204,
      "step": 549660
    },
    {
      "epoch": 1.9264563115302793,
      "grad_norm": 2.71875,
      "learning_rate": 1.9880526803585756e-05,
      "loss": 0.8158,
      "step": 549670
    },
    {
      "epoch": 1.926491359037175,
      "grad_norm": 2.9375,
      "learning_rate": 1.9879877774922054e-05,
      "loss": 0.8612,
      "step": 549680
    },
    {
      "epoch": 1.9265264065440704,
      "grad_norm": 2.75,
      "learning_rate": 1.9879228746258352e-05,
      "loss": 0.8298,
      "step": 549690
    },
    {
      "epoch": 1.9265614540509661,
      "grad_norm": 2.671875,
      "learning_rate": 1.9878579717594646e-05,
      "loss": 0.7521,
      "step": 549700
    },
    {
      "epoch": 1.9265965015578617,
      "grad_norm": 2.734375,
      "learning_rate": 1.9877930688930944e-05,
      "loss": 0.8458,
      "step": 549710
    },
    {
      "epoch": 1.9266315490647572,
      "grad_norm": 2.96875,
      "learning_rate": 1.9877281660267242e-05,
      "loss": 0.8042,
      "step": 549720
    },
    {
      "epoch": 1.926666596571653,
      "grad_norm": 2.921875,
      "learning_rate": 1.9876632631603544e-05,
      "loss": 0.9219,
      "step": 549730
    },
    {
      "epoch": 1.9267016440785485,
      "grad_norm": 3.28125,
      "learning_rate": 1.9875983602939842e-05,
      "loss": 0.8389,
      "step": 549740
    },
    {
      "epoch": 1.926736691585444,
      "grad_norm": 3.078125,
      "learning_rate": 1.987533457427614e-05,
      "loss": 0.9032,
      "step": 549750
    },
    {
      "epoch": 1.9267717390923398,
      "grad_norm": 2.234375,
      "learning_rate": 1.9874685545612438e-05,
      "loss": 0.7478,
      "step": 549760
    },
    {
      "epoch": 1.926806786599235,
      "grad_norm": 2.546875,
      "learning_rate": 1.9874036516948736e-05,
      "loss": 0.7733,
      "step": 549770
    },
    {
      "epoch": 1.9268418341061309,
      "grad_norm": 3.15625,
      "learning_rate": 1.9873387488285034e-05,
      "loss": 0.8471,
      "step": 549780
    },
    {
      "epoch": 1.9268768816130266,
      "grad_norm": 3.078125,
      "learning_rate": 1.9872738459621332e-05,
      "loss": 0.8731,
      "step": 549790
    },
    {
      "epoch": 1.926911929119922,
      "grad_norm": 2.78125,
      "learning_rate": 1.987208943095763e-05,
      "loss": 0.8034,
      "step": 549800
    },
    {
      "epoch": 1.9269469766268177,
      "grad_norm": 2.703125,
      "learning_rate": 1.9871440402293928e-05,
      "loss": 0.8266,
      "step": 549810
    },
    {
      "epoch": 1.9269820241337132,
      "grad_norm": 3.03125,
      "learning_rate": 1.9870791373630226e-05,
      "loss": 0.83,
      "step": 549820
    },
    {
      "epoch": 1.9270170716406088,
      "grad_norm": 3.28125,
      "learning_rate": 1.9870142344966524e-05,
      "loss": 0.7647,
      "step": 549830
    },
    {
      "epoch": 1.9270521191475045,
      "grad_norm": 3.453125,
      "learning_rate": 1.9869493316302822e-05,
      "loss": 0.885,
      "step": 549840
    },
    {
      "epoch": 1.9270871666544,
      "grad_norm": 3.265625,
      "learning_rate": 1.986884428763912e-05,
      "loss": 0.9496,
      "step": 549850
    },
    {
      "epoch": 1.9271222141612956,
      "grad_norm": 2.578125,
      "learning_rate": 1.9868195258975418e-05,
      "loss": 0.8275,
      "step": 549860
    },
    {
      "epoch": 1.9271572616681913,
      "grad_norm": 2.8125,
      "learning_rate": 1.986754623031172e-05,
      "loss": 0.7664,
      "step": 549870
    },
    {
      "epoch": 1.9271923091750869,
      "grad_norm": 2.34375,
      "learning_rate": 1.9866897201648014e-05,
      "loss": 0.8076,
      "step": 549880
    },
    {
      "epoch": 1.9272273566819824,
      "grad_norm": 2.9375,
      "learning_rate": 1.9866248172984312e-05,
      "loss": 0.869,
      "step": 549890
    },
    {
      "epoch": 1.9272624041888782,
      "grad_norm": 3.265625,
      "learning_rate": 1.986559914432061e-05,
      "loss": 0.808,
      "step": 549900
    },
    {
      "epoch": 1.9272974516957735,
      "grad_norm": 3.015625,
      "learning_rate": 1.9864950115656908e-05,
      "loss": 0.891,
      "step": 549910
    },
    {
      "epoch": 1.9273324992026692,
      "grad_norm": 3.03125,
      "learning_rate": 1.9864301086993206e-05,
      "loss": 0.7805,
      "step": 549920
    },
    {
      "epoch": 1.9273675467095648,
      "grad_norm": 2.78125,
      "learning_rate": 1.9863652058329504e-05,
      "loss": 0.8902,
      "step": 549930
    },
    {
      "epoch": 1.9274025942164603,
      "grad_norm": 2.59375,
      "learning_rate": 1.9863003029665802e-05,
      "loss": 0.8068,
      "step": 549940
    },
    {
      "epoch": 1.927437641723356,
      "grad_norm": 2.65625,
      "learning_rate": 1.98623540010021e-05,
      "loss": 0.7677,
      "step": 549950
    },
    {
      "epoch": 1.9274726892302516,
      "grad_norm": 3.140625,
      "learning_rate": 1.9861704972338398e-05,
      "loss": 0.7841,
      "step": 549960
    },
    {
      "epoch": 1.9275077367371471,
      "grad_norm": 2.671875,
      "learning_rate": 1.9861055943674696e-05,
      "loss": 0.7963,
      "step": 549970
    },
    {
      "epoch": 1.927542784244043,
      "grad_norm": 2.8125,
      "learning_rate": 1.9860406915010997e-05,
      "loss": 0.763,
      "step": 549980
    },
    {
      "epoch": 1.9275778317509384,
      "grad_norm": 3.484375,
      "learning_rate": 1.9859757886347295e-05,
      "loss": 0.854,
      "step": 549990
    },
    {
      "epoch": 1.927612879257834,
      "grad_norm": 3.359375,
      "learning_rate": 1.9859108857683593e-05,
      "loss": 0.8542,
      "step": 550000
    },
    {
      "epoch": 1.927612879257834,
      "eval_loss": 0.7725487351417542,
      "eval_runtime": 564.2362,
      "eval_samples_per_second": 674.25,
      "eval_steps_per_second": 56.187,
      "step": 550000
    },
    {
      "epoch": 1.9276479267647297,
      "grad_norm": 2.859375,
      "learning_rate": 1.985845982901989e-05,
      "loss": 0.8072,
      "step": 550010
    },
    {
      "epoch": 1.927682974271625,
      "grad_norm": 3.484375,
      "learning_rate": 1.985781080035619e-05,
      "loss": 0.8284,
      "step": 550020
    },
    {
      "epoch": 1.9277180217785208,
      "grad_norm": 2.296875,
      "learning_rate": 1.9857161771692487e-05,
      "loss": 0.7878,
      "step": 550030
    },
    {
      "epoch": 1.9277530692854163,
      "grad_norm": 2.828125,
      "learning_rate": 1.9856512743028785e-05,
      "loss": 0.8144,
      "step": 550040
    },
    {
      "epoch": 1.9277881167923119,
      "grad_norm": 3.015625,
      "learning_rate": 1.9855863714365083e-05,
      "loss": 0.8084,
      "step": 550050
    },
    {
      "epoch": 1.9278231642992076,
      "grad_norm": 2.765625,
      "learning_rate": 1.985521468570138e-05,
      "loss": 0.8071,
      "step": 550060
    },
    {
      "epoch": 1.9278582118061032,
      "grad_norm": 2.359375,
      "learning_rate": 1.9854565657037676e-05,
      "loss": 0.7999,
      "step": 550070
    },
    {
      "epoch": 1.9278932593129987,
      "grad_norm": 2.453125,
      "learning_rate": 1.9853916628373974e-05,
      "loss": 0.822,
      "step": 550080
    },
    {
      "epoch": 1.9279283068198945,
      "grad_norm": 3.1875,
      "learning_rate": 1.9853267599710272e-05,
      "loss": 0.8192,
      "step": 550090
    },
    {
      "epoch": 1.92796335432679,
      "grad_norm": 3.03125,
      "learning_rate": 1.9852618571046573e-05,
      "loss": 0.8557,
      "step": 550100
    },
    {
      "epoch": 1.9279984018336855,
      "grad_norm": 3.40625,
      "learning_rate": 1.985196954238287e-05,
      "loss": 0.8357,
      "step": 550110
    },
    {
      "epoch": 1.9280334493405813,
      "grad_norm": 2.703125,
      "learning_rate": 1.985132051371917e-05,
      "loss": 0.9106,
      "step": 550120
    },
    {
      "epoch": 1.9280684968474766,
      "grad_norm": 2.671875,
      "learning_rate": 1.9850671485055467e-05,
      "loss": 0.8021,
      "step": 550130
    },
    {
      "epoch": 1.9281035443543724,
      "grad_norm": 2.9375,
      "learning_rate": 1.9850022456391765e-05,
      "loss": 0.8131,
      "step": 550140
    },
    {
      "epoch": 1.928138591861268,
      "grad_norm": 3.234375,
      "learning_rate": 1.9849373427728063e-05,
      "loss": 0.8673,
      "step": 550150
    },
    {
      "epoch": 1.9281736393681634,
      "grad_norm": 2.90625,
      "learning_rate": 1.984872439906436e-05,
      "loss": 0.7999,
      "step": 550160
    },
    {
      "epoch": 1.9282086868750592,
      "grad_norm": 2.703125,
      "learning_rate": 1.984807537040066e-05,
      "loss": 0.7702,
      "step": 550170
    },
    {
      "epoch": 1.9282437343819547,
      "grad_norm": 2.828125,
      "learning_rate": 1.9847426341736957e-05,
      "loss": 0.8406,
      "step": 550180
    },
    {
      "epoch": 1.9282787818888503,
      "grad_norm": 2.671875,
      "learning_rate": 1.9846777313073255e-05,
      "loss": 0.7727,
      "step": 550190
    },
    {
      "epoch": 1.928313829395746,
      "grad_norm": 3.125,
      "learning_rate": 1.9846128284409553e-05,
      "loss": 0.8149,
      "step": 550200
    },
    {
      "epoch": 1.9283488769026416,
      "grad_norm": 2.890625,
      "learning_rate": 1.984547925574585e-05,
      "loss": 0.7907,
      "step": 550210
    },
    {
      "epoch": 1.928383924409537,
      "grad_norm": 2.78125,
      "learning_rate": 1.984483022708215e-05,
      "loss": 0.886,
      "step": 550220
    },
    {
      "epoch": 1.9284189719164329,
      "grad_norm": 2.8125,
      "learning_rate": 1.9844181198418447e-05,
      "loss": 0.8669,
      "step": 550230
    },
    {
      "epoch": 1.9284540194233282,
      "grad_norm": 2.90625,
      "learning_rate": 1.984353216975475e-05,
      "loss": 0.8837,
      "step": 550240
    },
    {
      "epoch": 1.928489066930224,
      "grad_norm": 2.5625,
      "learning_rate": 1.9842883141091047e-05,
      "loss": 0.7929,
      "step": 550250
    },
    {
      "epoch": 1.9285241144371195,
      "grad_norm": 3.1875,
      "learning_rate": 1.984223411242734e-05,
      "loss": 0.7901,
      "step": 550260
    },
    {
      "epoch": 1.928559161944015,
      "grad_norm": 3.125,
      "learning_rate": 1.984158508376364e-05,
      "loss": 0.7835,
      "step": 550270
    },
    {
      "epoch": 1.9285942094509108,
      "grad_norm": 3.1875,
      "learning_rate": 1.9840936055099937e-05,
      "loss": 0.8682,
      "step": 550280
    },
    {
      "epoch": 1.9286292569578063,
      "grad_norm": 2.890625,
      "learning_rate": 1.9840287026436235e-05,
      "loss": 0.8866,
      "step": 550290
    },
    {
      "epoch": 1.9286643044647018,
      "grad_norm": 3.15625,
      "learning_rate": 1.9839637997772533e-05,
      "loss": 0.8468,
      "step": 550300
    },
    {
      "epoch": 1.9286993519715976,
      "grad_norm": 2.671875,
      "learning_rate": 1.983898896910883e-05,
      "loss": 0.7683,
      "step": 550310
    },
    {
      "epoch": 1.9287343994784931,
      "grad_norm": 2.78125,
      "learning_rate": 1.983833994044513e-05,
      "loss": 0.8537,
      "step": 550320
    },
    {
      "epoch": 1.9287694469853887,
      "grad_norm": 2.515625,
      "learning_rate": 1.9837690911781427e-05,
      "loss": 0.7669,
      "step": 550330
    },
    {
      "epoch": 1.9288044944922844,
      "grad_norm": 3.640625,
      "learning_rate": 1.9837041883117725e-05,
      "loss": 0.8507,
      "step": 550340
    },
    {
      "epoch": 1.9288395419991797,
      "grad_norm": 2.546875,
      "learning_rate": 1.9836392854454027e-05,
      "loss": 0.7626,
      "step": 550350
    },
    {
      "epoch": 1.9288745895060755,
      "grad_norm": 2.984375,
      "learning_rate": 1.9835743825790325e-05,
      "loss": 0.7849,
      "step": 550360
    },
    {
      "epoch": 1.9289096370129712,
      "grad_norm": 3.109375,
      "learning_rate": 1.9835094797126623e-05,
      "loss": 0.7692,
      "step": 550370
    },
    {
      "epoch": 1.9289446845198666,
      "grad_norm": 2.90625,
      "learning_rate": 1.983444576846292e-05,
      "loss": 0.7618,
      "step": 550380
    },
    {
      "epoch": 1.9289797320267623,
      "grad_norm": 3.25,
      "learning_rate": 1.983379673979922e-05,
      "loss": 0.7289,
      "step": 550390
    },
    {
      "epoch": 1.9290147795336579,
      "grad_norm": 2.671875,
      "learning_rate": 1.9833147711135517e-05,
      "loss": 0.8046,
      "step": 550400
    },
    {
      "epoch": 1.9290498270405534,
      "grad_norm": 2.296875,
      "learning_rate": 1.9832498682471815e-05,
      "loss": 0.7843,
      "step": 550410
    },
    {
      "epoch": 1.9290848745474491,
      "grad_norm": 2.875,
      "learning_rate": 1.9831849653808113e-05,
      "loss": 0.8387,
      "step": 550420
    },
    {
      "epoch": 1.9291199220543447,
      "grad_norm": 3.078125,
      "learning_rate": 1.983120062514441e-05,
      "loss": 0.8725,
      "step": 550430
    },
    {
      "epoch": 1.9291549695612402,
      "grad_norm": 3.0,
      "learning_rate": 1.983055159648071e-05,
      "loss": 0.8358,
      "step": 550440
    },
    {
      "epoch": 1.929190017068136,
      "grad_norm": 3.1875,
      "learning_rate": 1.9829902567817003e-05,
      "loss": 0.8839,
      "step": 550450
    },
    {
      "epoch": 1.9292250645750313,
      "grad_norm": 2.734375,
      "learning_rate": 1.9829253539153305e-05,
      "loss": 0.752,
      "step": 550460
    },
    {
      "epoch": 1.929260112081927,
      "grad_norm": 3.0625,
      "learning_rate": 1.9828604510489603e-05,
      "loss": 0.8186,
      "step": 550470
    },
    {
      "epoch": 1.9292951595888228,
      "grad_norm": 3.046875,
      "learning_rate": 1.98279554818259e-05,
      "loss": 0.8797,
      "step": 550480
    },
    {
      "epoch": 1.9293302070957181,
      "grad_norm": 2.84375,
      "learning_rate": 1.98273064531622e-05,
      "loss": 0.8262,
      "step": 550490
    },
    {
      "epoch": 1.9293652546026139,
      "grad_norm": 2.953125,
      "learning_rate": 1.9826657424498497e-05,
      "loss": 0.8757,
      "step": 550500
    },
    {
      "epoch": 1.9294003021095094,
      "grad_norm": 2.84375,
      "learning_rate": 1.9826008395834795e-05,
      "loss": 0.7477,
      "step": 550510
    },
    {
      "epoch": 1.929435349616405,
      "grad_norm": 3.046875,
      "learning_rate": 1.9825359367171093e-05,
      "loss": 0.7697,
      "step": 550520
    },
    {
      "epoch": 1.9294703971233007,
      "grad_norm": 2.765625,
      "learning_rate": 1.982471033850739e-05,
      "loss": 0.8227,
      "step": 550530
    },
    {
      "epoch": 1.9295054446301962,
      "grad_norm": 2.875,
      "learning_rate": 1.982406130984369e-05,
      "loss": 0.7541,
      "step": 550540
    },
    {
      "epoch": 1.9295404921370918,
      "grad_norm": 2.765625,
      "learning_rate": 1.9823412281179987e-05,
      "loss": 0.8498,
      "step": 550550
    },
    {
      "epoch": 1.9295755396439875,
      "grad_norm": 2.9375,
      "learning_rate": 1.9822763252516285e-05,
      "loss": 0.872,
      "step": 550560
    },
    {
      "epoch": 1.929610587150883,
      "grad_norm": 2.625,
      "learning_rate": 1.9822114223852583e-05,
      "loss": 0.8081,
      "step": 550570
    },
    {
      "epoch": 1.9296456346577786,
      "grad_norm": 3.203125,
      "learning_rate": 1.982146519518888e-05,
      "loss": 0.7557,
      "step": 550580
    },
    {
      "epoch": 1.9296806821646744,
      "grad_norm": 2.875,
      "learning_rate": 1.982081616652518e-05,
      "loss": 0.7747,
      "step": 550590
    },
    {
      "epoch": 1.9297157296715697,
      "grad_norm": 2.890625,
      "learning_rate": 1.982016713786148e-05,
      "loss": 0.7973,
      "step": 550600
    },
    {
      "epoch": 1.9297507771784654,
      "grad_norm": 2.890625,
      "learning_rate": 1.9819518109197778e-05,
      "loss": 0.8785,
      "step": 550610
    },
    {
      "epoch": 1.929785824685361,
      "grad_norm": 2.625,
      "learning_rate": 1.9818869080534076e-05,
      "loss": 0.7309,
      "step": 550620
    },
    {
      "epoch": 1.9298208721922565,
      "grad_norm": 2.6875,
      "learning_rate": 1.9818220051870374e-05,
      "loss": 0.8232,
      "step": 550630
    },
    {
      "epoch": 1.9298559196991523,
      "grad_norm": 2.78125,
      "learning_rate": 1.981757102320667e-05,
      "loss": 0.7755,
      "step": 550640
    },
    {
      "epoch": 1.9298909672060478,
      "grad_norm": 2.984375,
      "learning_rate": 1.9816921994542967e-05,
      "loss": 0.7703,
      "step": 550650
    },
    {
      "epoch": 1.9299260147129433,
      "grad_norm": 2.203125,
      "learning_rate": 1.9816272965879265e-05,
      "loss": 0.7501,
      "step": 550660
    },
    {
      "epoch": 1.929961062219839,
      "grad_norm": 2.9375,
      "learning_rate": 1.9815623937215563e-05,
      "loss": 0.8565,
      "step": 550670
    },
    {
      "epoch": 1.9299961097267346,
      "grad_norm": 3.15625,
      "learning_rate": 1.981497490855186e-05,
      "loss": 0.8483,
      "step": 550680
    },
    {
      "epoch": 1.9300311572336302,
      "grad_norm": 2.578125,
      "learning_rate": 1.981432587988816e-05,
      "loss": 0.8395,
      "step": 550690
    },
    {
      "epoch": 1.930066204740526,
      "grad_norm": 3.046875,
      "learning_rate": 1.9813676851224457e-05,
      "loss": 0.7839,
      "step": 550700
    },
    {
      "epoch": 1.9301012522474212,
      "grad_norm": 2.328125,
      "learning_rate": 1.9813027822560755e-05,
      "loss": 0.8118,
      "step": 550710
    },
    {
      "epoch": 1.930136299754317,
      "grad_norm": 2.703125,
      "learning_rate": 1.9812378793897056e-05,
      "loss": 0.8745,
      "step": 550720
    },
    {
      "epoch": 1.9301713472612125,
      "grad_norm": 2.65625,
      "learning_rate": 1.9811729765233354e-05,
      "loss": 0.7958,
      "step": 550730
    },
    {
      "epoch": 1.930206394768108,
      "grad_norm": 2.8125,
      "learning_rate": 1.9811080736569652e-05,
      "loss": 0.8166,
      "step": 550740
    },
    {
      "epoch": 1.9302414422750038,
      "grad_norm": 2.5,
      "learning_rate": 1.981043170790595e-05,
      "loss": 0.8011,
      "step": 550750
    },
    {
      "epoch": 1.9302764897818994,
      "grad_norm": 2.484375,
      "learning_rate": 1.9809782679242248e-05,
      "loss": 0.8449,
      "step": 550760
    },
    {
      "epoch": 1.930311537288795,
      "grad_norm": 2.875,
      "learning_rate": 1.9809133650578546e-05,
      "loss": 0.8122,
      "step": 550770
    },
    {
      "epoch": 1.9303465847956907,
      "grad_norm": 3.25,
      "learning_rate": 1.9808484621914844e-05,
      "loss": 0.8403,
      "step": 550780
    },
    {
      "epoch": 1.9303816323025862,
      "grad_norm": 2.65625,
      "learning_rate": 1.9807835593251142e-05,
      "loss": 0.7865,
      "step": 550790
    },
    {
      "epoch": 1.9304166798094817,
      "grad_norm": 3.1875,
      "learning_rate": 1.980718656458744e-05,
      "loss": 0.8228,
      "step": 550800
    },
    {
      "epoch": 1.9304517273163775,
      "grad_norm": 2.671875,
      "learning_rate": 1.9806537535923738e-05,
      "loss": 0.7277,
      "step": 550810
    },
    {
      "epoch": 1.9304867748232728,
      "grad_norm": 2.546875,
      "learning_rate": 1.9805888507260033e-05,
      "loss": 0.772,
      "step": 550820
    },
    {
      "epoch": 1.9305218223301686,
      "grad_norm": 2.765625,
      "learning_rate": 1.9805239478596334e-05,
      "loss": 0.7426,
      "step": 550830
    },
    {
      "epoch": 1.930556869837064,
      "grad_norm": 2.21875,
      "learning_rate": 1.9804590449932632e-05,
      "loss": 0.7852,
      "step": 550840
    },
    {
      "epoch": 1.9305919173439596,
      "grad_norm": 2.984375,
      "learning_rate": 1.980394142126893e-05,
      "loss": 0.7819,
      "step": 550850
    },
    {
      "epoch": 1.9306269648508554,
      "grad_norm": 2.984375,
      "learning_rate": 1.9803292392605228e-05,
      "loss": 0.9043,
      "step": 550860
    },
    {
      "epoch": 1.930662012357751,
      "grad_norm": 2.703125,
      "learning_rate": 1.9802643363941526e-05,
      "loss": 0.8298,
      "step": 550870
    },
    {
      "epoch": 1.9306970598646465,
      "grad_norm": 3.09375,
      "learning_rate": 1.9801994335277824e-05,
      "loss": 0.9013,
      "step": 550880
    },
    {
      "epoch": 1.9307321073715422,
      "grad_norm": 2.90625,
      "learning_rate": 1.9801345306614122e-05,
      "loss": 0.893,
      "step": 550890
    },
    {
      "epoch": 1.9307671548784378,
      "grad_norm": 3.203125,
      "learning_rate": 1.980069627795042e-05,
      "loss": 0.8886,
      "step": 550900
    },
    {
      "epoch": 1.9308022023853333,
      "grad_norm": 2.515625,
      "learning_rate": 1.9800047249286718e-05,
      "loss": 0.7981,
      "step": 550910
    },
    {
      "epoch": 1.930837249892229,
      "grad_norm": 2.984375,
      "learning_rate": 1.9799398220623016e-05,
      "loss": 0.8204,
      "step": 550920
    },
    {
      "epoch": 1.9308722973991244,
      "grad_norm": 4.0,
      "learning_rate": 1.9798749191959314e-05,
      "loss": 0.9235,
      "step": 550930
    },
    {
      "epoch": 1.9309073449060201,
      "grad_norm": 2.546875,
      "learning_rate": 1.9798100163295612e-05,
      "loss": 0.8067,
      "step": 550940
    },
    {
      "epoch": 1.9309423924129157,
      "grad_norm": 2.875,
      "learning_rate": 1.979745113463191e-05,
      "loss": 0.8105,
      "step": 550950
    },
    {
      "epoch": 1.9309774399198112,
      "grad_norm": 3.625,
      "learning_rate": 1.9796802105968208e-05,
      "loss": 0.8201,
      "step": 550960
    },
    {
      "epoch": 1.931012487426707,
      "grad_norm": 2.671875,
      "learning_rate": 1.979615307730451e-05,
      "loss": 0.7781,
      "step": 550970
    },
    {
      "epoch": 1.9310475349336025,
      "grad_norm": 3.15625,
      "learning_rate": 1.9795504048640807e-05,
      "loss": 0.8245,
      "step": 550980
    },
    {
      "epoch": 1.931082582440498,
      "grad_norm": 3.140625,
      "learning_rate": 1.9794855019977105e-05,
      "loss": 0.8768,
      "step": 550990
    },
    {
      "epoch": 1.9311176299473938,
      "grad_norm": 3.234375,
      "learning_rate": 1.9794205991313403e-05,
      "loss": 0.7877,
      "step": 551000
    },
    {
      "epoch": 1.9311526774542893,
      "grad_norm": 2.765625,
      "learning_rate": 1.9793556962649698e-05,
      "loss": 0.9091,
      "step": 551010
    },
    {
      "epoch": 1.9311877249611848,
      "grad_norm": 3.140625,
      "learning_rate": 1.9792907933985996e-05,
      "loss": 0.8303,
      "step": 551020
    },
    {
      "epoch": 1.9312227724680806,
      "grad_norm": 2.84375,
      "learning_rate": 1.9792258905322294e-05,
      "loss": 0.8433,
      "step": 551030
    },
    {
      "epoch": 1.931257819974976,
      "grad_norm": 2.5,
      "learning_rate": 1.9791609876658592e-05,
      "loss": 0.7251,
      "step": 551040
    },
    {
      "epoch": 1.9312928674818717,
      "grad_norm": 2.859375,
      "learning_rate": 1.979096084799489e-05,
      "loss": 0.7915,
      "step": 551050
    },
    {
      "epoch": 1.9313279149887674,
      "grad_norm": 2.8125,
      "learning_rate": 1.9790311819331188e-05,
      "loss": 0.8526,
      "step": 551060
    },
    {
      "epoch": 1.9313629624956627,
      "grad_norm": 2.546875,
      "learning_rate": 1.9789662790667486e-05,
      "loss": 0.7963,
      "step": 551070
    },
    {
      "epoch": 1.9313980100025585,
      "grad_norm": 2.921875,
      "learning_rate": 1.9789013762003787e-05,
      "loss": 0.8155,
      "step": 551080
    },
    {
      "epoch": 1.931433057509454,
      "grad_norm": 3.515625,
      "learning_rate": 1.9788364733340085e-05,
      "loss": 0.9319,
      "step": 551090
    },
    {
      "epoch": 1.9314681050163496,
      "grad_norm": 3.203125,
      "learning_rate": 1.9787715704676383e-05,
      "loss": 0.846,
      "step": 551100
    },
    {
      "epoch": 1.9315031525232453,
      "grad_norm": 3.03125,
      "learning_rate": 1.978706667601268e-05,
      "loss": 0.7877,
      "step": 551110
    },
    {
      "epoch": 1.9315382000301409,
      "grad_norm": 2.890625,
      "learning_rate": 1.978641764734898e-05,
      "loss": 0.8147,
      "step": 551120
    },
    {
      "epoch": 1.9315732475370364,
      "grad_norm": 2.859375,
      "learning_rate": 1.9785768618685277e-05,
      "loss": 0.7653,
      "step": 551130
    },
    {
      "epoch": 1.9316082950439322,
      "grad_norm": 2.609375,
      "learning_rate": 1.9785119590021575e-05,
      "loss": 0.8268,
      "step": 551140
    },
    {
      "epoch": 1.9316433425508277,
      "grad_norm": 3.0,
      "learning_rate": 1.9784470561357873e-05,
      "loss": 0.7969,
      "step": 551150
    },
    {
      "epoch": 1.9316783900577232,
      "grad_norm": 3.09375,
      "learning_rate": 1.978382153269417e-05,
      "loss": 0.8137,
      "step": 551160
    },
    {
      "epoch": 1.931713437564619,
      "grad_norm": 2.609375,
      "learning_rate": 1.978317250403047e-05,
      "loss": 0.7937,
      "step": 551170
    },
    {
      "epoch": 1.9317484850715143,
      "grad_norm": 2.296875,
      "learning_rate": 1.9782523475366767e-05,
      "loss": 0.7857,
      "step": 551180
    },
    {
      "epoch": 1.93178353257841,
      "grad_norm": 2.921875,
      "learning_rate": 1.9781874446703065e-05,
      "loss": 0.7621,
      "step": 551190
    },
    {
      "epoch": 1.9318185800853056,
      "grad_norm": 3.421875,
      "learning_rate": 1.9781225418039363e-05,
      "loss": 0.8374,
      "step": 551200
    },
    {
      "epoch": 1.9318536275922011,
      "grad_norm": 2.734375,
      "learning_rate": 1.978057638937566e-05,
      "loss": 0.7595,
      "step": 551210
    },
    {
      "epoch": 1.931888675099097,
      "grad_norm": 2.765625,
      "learning_rate": 1.977992736071196e-05,
      "loss": 0.7834,
      "step": 551220
    },
    {
      "epoch": 1.9319237226059924,
      "grad_norm": 3.03125,
      "learning_rate": 1.9779278332048257e-05,
      "loss": 0.8026,
      "step": 551230
    },
    {
      "epoch": 1.931958770112888,
      "grad_norm": 2.9375,
      "learning_rate": 1.9778629303384555e-05,
      "loss": 0.8104,
      "step": 551240
    },
    {
      "epoch": 1.9319938176197837,
      "grad_norm": 2.671875,
      "learning_rate": 1.9777980274720853e-05,
      "loss": 0.7763,
      "step": 551250
    },
    {
      "epoch": 1.9320288651266793,
      "grad_norm": 2.90625,
      "learning_rate": 1.977733124605715e-05,
      "loss": 0.8027,
      "step": 551260
    },
    {
      "epoch": 1.9320639126335748,
      "grad_norm": 2.890625,
      "learning_rate": 1.977668221739345e-05,
      "loss": 0.8093,
      "step": 551270
    },
    {
      "epoch": 1.9320989601404706,
      "grad_norm": 2.78125,
      "learning_rate": 1.9776033188729747e-05,
      "loss": 0.8113,
      "step": 551280
    },
    {
      "epoch": 1.9321340076473659,
      "grad_norm": 2.65625,
      "learning_rate": 1.9775384160066045e-05,
      "loss": 0.77,
      "step": 551290
    },
    {
      "epoch": 1.9321690551542616,
      "grad_norm": 3.140625,
      "learning_rate": 1.9774735131402343e-05,
      "loss": 0.766,
      "step": 551300
    },
    {
      "epoch": 1.9322041026611572,
      "grad_norm": 2.8125,
      "learning_rate": 1.977408610273864e-05,
      "loss": 0.8758,
      "step": 551310
    },
    {
      "epoch": 1.9322391501680527,
      "grad_norm": 2.71875,
      "learning_rate": 1.977343707407494e-05,
      "loss": 0.831,
      "step": 551320
    },
    {
      "epoch": 1.9322741976749485,
      "grad_norm": 2.765625,
      "learning_rate": 1.9772788045411237e-05,
      "loss": 0.841,
      "step": 551330
    },
    {
      "epoch": 1.932309245181844,
      "grad_norm": 2.875,
      "learning_rate": 1.977213901674754e-05,
      "loss": 0.8488,
      "step": 551340
    },
    {
      "epoch": 1.9323442926887395,
      "grad_norm": 3.25,
      "learning_rate": 1.9771489988083837e-05,
      "loss": 0.8408,
      "step": 551350
    },
    {
      "epoch": 1.9323793401956353,
      "grad_norm": 3.28125,
      "learning_rate": 1.9770840959420135e-05,
      "loss": 0.8321,
      "step": 551360
    },
    {
      "epoch": 1.9324143877025308,
      "grad_norm": 2.984375,
      "learning_rate": 1.9770191930756433e-05,
      "loss": 0.8254,
      "step": 551370
    },
    {
      "epoch": 1.9324494352094264,
      "grad_norm": 2.828125,
      "learning_rate": 1.976954290209273e-05,
      "loss": 0.8241,
      "step": 551380
    },
    {
      "epoch": 1.9324844827163221,
      "grad_norm": 2.640625,
      "learning_rate": 1.9768893873429025e-05,
      "loss": 0.7536,
      "step": 551390
    },
    {
      "epoch": 1.9325195302232174,
      "grad_norm": 2.90625,
      "learning_rate": 1.9768244844765323e-05,
      "loss": 0.8097,
      "step": 551400
    },
    {
      "epoch": 1.9325545777301132,
      "grad_norm": 2.5625,
      "learning_rate": 1.976759581610162e-05,
      "loss": 0.8208,
      "step": 551410
    },
    {
      "epoch": 1.9325896252370087,
      "grad_norm": 2.546875,
      "learning_rate": 1.976694678743792e-05,
      "loss": 0.8344,
      "step": 551420
    },
    {
      "epoch": 1.9326246727439043,
      "grad_norm": 3.296875,
      "learning_rate": 1.9766297758774217e-05,
      "loss": 0.859,
      "step": 551430
    },
    {
      "epoch": 1.9326597202508,
      "grad_norm": 2.625,
      "learning_rate": 1.9765648730110515e-05,
      "loss": 0.9004,
      "step": 551440
    },
    {
      "epoch": 1.9326947677576956,
      "grad_norm": 2.84375,
      "learning_rate": 1.9764999701446817e-05,
      "loss": 0.9472,
      "step": 551450
    },
    {
      "epoch": 1.932729815264591,
      "grad_norm": 2.84375,
      "learning_rate": 1.9764350672783115e-05,
      "loss": 0.7814,
      "step": 551460
    },
    {
      "epoch": 1.9327648627714868,
      "grad_norm": 3.140625,
      "learning_rate": 1.9763701644119413e-05,
      "loss": 0.832,
      "step": 551470
    },
    {
      "epoch": 1.9327999102783824,
      "grad_norm": 2.828125,
      "learning_rate": 1.976305261545571e-05,
      "loss": 0.8748,
      "step": 551480
    },
    {
      "epoch": 1.932834957785278,
      "grad_norm": 3.015625,
      "learning_rate": 1.976240358679201e-05,
      "loss": 0.7829,
      "step": 551490
    },
    {
      "epoch": 1.9328700052921737,
      "grad_norm": 2.859375,
      "learning_rate": 1.9761754558128307e-05,
      "loss": 0.8395,
      "step": 551500
    },
    {
      "epoch": 1.932905052799069,
      "grad_norm": 2.984375,
      "learning_rate": 1.9761105529464605e-05,
      "loss": 0.8266,
      "step": 551510
    },
    {
      "epoch": 1.9329401003059647,
      "grad_norm": 2.875,
      "learning_rate": 1.9760456500800903e-05,
      "loss": 0.8879,
      "step": 551520
    },
    {
      "epoch": 1.9329751478128603,
      "grad_norm": 2.796875,
      "learning_rate": 1.97598074721372e-05,
      "loss": 0.843,
      "step": 551530
    },
    {
      "epoch": 1.9330101953197558,
      "grad_norm": 2.71875,
      "learning_rate": 1.97591584434735e-05,
      "loss": 0.8277,
      "step": 551540
    },
    {
      "epoch": 1.9330452428266516,
      "grad_norm": 2.921875,
      "learning_rate": 1.9758509414809797e-05,
      "loss": 0.7467,
      "step": 551550
    },
    {
      "epoch": 1.9330802903335471,
      "grad_norm": 2.875,
      "learning_rate": 1.9757860386146095e-05,
      "loss": 0.7828,
      "step": 551560
    },
    {
      "epoch": 1.9331153378404426,
      "grad_norm": 3.1875,
      "learning_rate": 1.9757211357482393e-05,
      "loss": 0.9235,
      "step": 551570
    },
    {
      "epoch": 1.9331503853473384,
      "grad_norm": 2.65625,
      "learning_rate": 1.975656232881869e-05,
      "loss": 0.8528,
      "step": 551580
    },
    {
      "epoch": 1.933185432854234,
      "grad_norm": 2.671875,
      "learning_rate": 1.975591330015499e-05,
      "loss": 0.852,
      "step": 551590
    },
    {
      "epoch": 1.9332204803611295,
      "grad_norm": 2.53125,
      "learning_rate": 1.9755264271491287e-05,
      "loss": 0.7956,
      "step": 551600
    },
    {
      "epoch": 1.9332555278680252,
      "grad_norm": 3.125,
      "learning_rate": 1.9754615242827585e-05,
      "loss": 0.845,
      "step": 551610
    },
    {
      "epoch": 1.9332905753749206,
      "grad_norm": 2.5625,
      "learning_rate": 1.9753966214163883e-05,
      "loss": 0.8357,
      "step": 551620
    },
    {
      "epoch": 1.9333256228818163,
      "grad_norm": 2.828125,
      "learning_rate": 1.975331718550018e-05,
      "loss": 0.8047,
      "step": 551630
    },
    {
      "epoch": 1.9333606703887118,
      "grad_norm": 2.71875,
      "learning_rate": 1.975266815683648e-05,
      "loss": 0.8182,
      "step": 551640
    },
    {
      "epoch": 1.9333957178956074,
      "grad_norm": 2.703125,
      "learning_rate": 1.9752019128172777e-05,
      "loss": 0.7692,
      "step": 551650
    },
    {
      "epoch": 1.9334307654025031,
      "grad_norm": 2.359375,
      "learning_rate": 1.9751370099509075e-05,
      "loss": 0.7751,
      "step": 551660
    },
    {
      "epoch": 1.9334658129093987,
      "grad_norm": 2.828125,
      "learning_rate": 1.9750721070845373e-05,
      "loss": 0.7868,
      "step": 551670
    },
    {
      "epoch": 1.9335008604162942,
      "grad_norm": 3.21875,
      "learning_rate": 1.975007204218167e-05,
      "loss": 0.807,
      "step": 551680
    },
    {
      "epoch": 1.93353590792319,
      "grad_norm": 2.734375,
      "learning_rate": 1.974942301351797e-05,
      "loss": 0.8435,
      "step": 551690
    },
    {
      "epoch": 1.9335709554300855,
      "grad_norm": 2.609375,
      "learning_rate": 1.974877398485427e-05,
      "loss": 0.8033,
      "step": 551700
    },
    {
      "epoch": 1.933606002936981,
      "grad_norm": 2.40625,
      "learning_rate": 1.9748124956190568e-05,
      "loss": 0.7626,
      "step": 551710
    },
    {
      "epoch": 1.9336410504438768,
      "grad_norm": 2.546875,
      "learning_rate": 1.9747475927526866e-05,
      "loss": 0.8026,
      "step": 551720
    },
    {
      "epoch": 1.933676097950772,
      "grad_norm": 2.96875,
      "learning_rate": 1.9746826898863164e-05,
      "loss": 0.8718,
      "step": 551730
    },
    {
      "epoch": 1.9337111454576679,
      "grad_norm": 3.328125,
      "learning_rate": 1.9746177870199462e-05,
      "loss": 0.7965,
      "step": 551740
    },
    {
      "epoch": 1.9337461929645636,
      "grad_norm": 2.46875,
      "learning_rate": 1.974552884153576e-05,
      "loss": 0.8651,
      "step": 551750
    },
    {
      "epoch": 1.933781240471459,
      "grad_norm": 3.1875,
      "learning_rate": 1.9744879812872058e-05,
      "loss": 0.8424,
      "step": 551760
    },
    {
      "epoch": 1.9338162879783547,
      "grad_norm": 3.03125,
      "learning_rate": 1.9744230784208353e-05,
      "loss": 0.8783,
      "step": 551770
    },
    {
      "epoch": 1.9338513354852502,
      "grad_norm": 3.5,
      "learning_rate": 1.974358175554465e-05,
      "loss": 0.8817,
      "step": 551780
    },
    {
      "epoch": 1.9338863829921458,
      "grad_norm": 3.296875,
      "learning_rate": 1.974293272688095e-05,
      "loss": 0.821,
      "step": 551790
    },
    {
      "epoch": 1.9339214304990415,
      "grad_norm": 2.484375,
      "learning_rate": 1.9742283698217247e-05,
      "loss": 0.8961,
      "step": 551800
    },
    {
      "epoch": 1.933956478005937,
      "grad_norm": 2.75,
      "learning_rate": 1.9741634669553545e-05,
      "loss": 0.7939,
      "step": 551810
    },
    {
      "epoch": 1.9339915255128326,
      "grad_norm": 3.34375,
      "learning_rate": 1.9740985640889846e-05,
      "loss": 0.8133,
      "step": 551820
    },
    {
      "epoch": 1.9340265730197284,
      "grad_norm": 3.0,
      "learning_rate": 1.9740336612226144e-05,
      "loss": 0.8458,
      "step": 551830
    },
    {
      "epoch": 1.934061620526624,
      "grad_norm": 2.59375,
      "learning_rate": 1.9739687583562442e-05,
      "loss": 0.9221,
      "step": 551840
    },
    {
      "epoch": 1.9340966680335194,
      "grad_norm": 2.328125,
      "learning_rate": 1.973903855489874e-05,
      "loss": 0.881,
      "step": 551850
    },
    {
      "epoch": 1.9341317155404152,
      "grad_norm": 2.375,
      "learning_rate": 1.9738389526235038e-05,
      "loss": 0.8443,
      "step": 551860
    },
    {
      "epoch": 1.9341667630473105,
      "grad_norm": 3.453125,
      "learning_rate": 1.9737740497571336e-05,
      "loss": 0.8109,
      "step": 551870
    },
    {
      "epoch": 1.9342018105542063,
      "grad_norm": 2.765625,
      "learning_rate": 1.9737091468907634e-05,
      "loss": 0.758,
      "step": 551880
    },
    {
      "epoch": 1.9342368580611018,
      "grad_norm": 2.890625,
      "learning_rate": 1.9736442440243932e-05,
      "loss": 0.8029,
      "step": 551890
    },
    {
      "epoch": 1.9342719055679973,
      "grad_norm": 3.0625,
      "learning_rate": 1.973579341158023e-05,
      "loss": 0.7885,
      "step": 551900
    },
    {
      "epoch": 1.934306953074893,
      "grad_norm": 2.90625,
      "learning_rate": 1.9735144382916528e-05,
      "loss": 0.7928,
      "step": 551910
    },
    {
      "epoch": 1.9343420005817886,
      "grad_norm": 3.203125,
      "learning_rate": 1.9734495354252826e-05,
      "loss": 0.8979,
      "step": 551920
    },
    {
      "epoch": 1.9343770480886842,
      "grad_norm": 2.671875,
      "learning_rate": 1.9733846325589124e-05,
      "loss": 0.9132,
      "step": 551930
    },
    {
      "epoch": 1.93441209559558,
      "grad_norm": 2.859375,
      "learning_rate": 1.9733197296925422e-05,
      "loss": 0.8752,
      "step": 551940
    },
    {
      "epoch": 1.9344471431024755,
      "grad_norm": 3.171875,
      "learning_rate": 1.973254826826172e-05,
      "loss": 0.856,
      "step": 551950
    },
    {
      "epoch": 1.934482190609371,
      "grad_norm": 2.65625,
      "learning_rate": 1.9731899239598018e-05,
      "loss": 0.8219,
      "step": 551960
    },
    {
      "epoch": 1.9345172381162667,
      "grad_norm": 2.40625,
      "learning_rate": 1.9731250210934316e-05,
      "loss": 0.8299,
      "step": 551970
    },
    {
      "epoch": 1.934552285623162,
      "grad_norm": 2.796875,
      "learning_rate": 1.9730601182270614e-05,
      "loss": 0.8172,
      "step": 551980
    },
    {
      "epoch": 1.9345873331300578,
      "grad_norm": 2.515625,
      "learning_rate": 1.9729952153606912e-05,
      "loss": 0.7866,
      "step": 551990
    },
    {
      "epoch": 1.9346223806369534,
      "grad_norm": 2.984375,
      "learning_rate": 1.972930312494321e-05,
      "loss": 0.8608,
      "step": 552000
    },
    {
      "epoch": 1.934657428143849,
      "grad_norm": 2.75,
      "learning_rate": 1.9728654096279508e-05,
      "loss": 0.8297,
      "step": 552010
    },
    {
      "epoch": 1.9346924756507446,
      "grad_norm": 2.984375,
      "learning_rate": 1.9728005067615806e-05,
      "loss": 0.8083,
      "step": 552020
    },
    {
      "epoch": 1.9347275231576402,
      "grad_norm": 2.546875,
      "learning_rate": 1.9727356038952104e-05,
      "loss": 0.8098,
      "step": 552030
    },
    {
      "epoch": 1.9347625706645357,
      "grad_norm": 2.78125,
      "learning_rate": 1.9726707010288402e-05,
      "loss": 0.8725,
      "step": 552040
    },
    {
      "epoch": 1.9347976181714315,
      "grad_norm": 2.703125,
      "learning_rate": 1.97260579816247e-05,
      "loss": 0.8173,
      "step": 552050
    },
    {
      "epoch": 1.934832665678327,
      "grad_norm": 3.015625,
      "learning_rate": 1.9725408952960998e-05,
      "loss": 0.8711,
      "step": 552060
    },
    {
      "epoch": 1.9348677131852225,
      "grad_norm": 2.96875,
      "learning_rate": 1.97247599242973e-05,
      "loss": 0.7564,
      "step": 552070
    },
    {
      "epoch": 1.9349027606921183,
      "grad_norm": 3.0625,
      "learning_rate": 1.9724110895633598e-05,
      "loss": 0.8625,
      "step": 552080
    },
    {
      "epoch": 1.9349378081990136,
      "grad_norm": 3.09375,
      "learning_rate": 1.9723461866969896e-05,
      "loss": 0.91,
      "step": 552090
    },
    {
      "epoch": 1.9349728557059094,
      "grad_norm": 2.609375,
      "learning_rate": 1.9722812838306194e-05,
      "loss": 0.8281,
      "step": 552100
    },
    {
      "epoch": 1.935007903212805,
      "grad_norm": 2.71875,
      "learning_rate": 1.972216380964249e-05,
      "loss": 0.8981,
      "step": 552110
    },
    {
      "epoch": 1.9350429507197004,
      "grad_norm": 3.140625,
      "learning_rate": 1.972151478097879e-05,
      "loss": 0.7751,
      "step": 552120
    },
    {
      "epoch": 1.9350779982265962,
      "grad_norm": 3.046875,
      "learning_rate": 1.9720865752315088e-05,
      "loss": 0.8626,
      "step": 552130
    },
    {
      "epoch": 1.9351130457334917,
      "grad_norm": 2.640625,
      "learning_rate": 1.9720216723651382e-05,
      "loss": 0.8467,
      "step": 552140
    },
    {
      "epoch": 1.9351480932403873,
      "grad_norm": 3.109375,
      "learning_rate": 1.971956769498768e-05,
      "loss": 0.8479,
      "step": 552150
    },
    {
      "epoch": 1.935183140747283,
      "grad_norm": 3.234375,
      "learning_rate": 1.9718918666323978e-05,
      "loss": 0.9166,
      "step": 552160
    },
    {
      "epoch": 1.9352181882541786,
      "grad_norm": 3.21875,
      "learning_rate": 1.9718269637660276e-05,
      "loss": 0.8946,
      "step": 552170
    },
    {
      "epoch": 1.935253235761074,
      "grad_norm": 3.0,
      "learning_rate": 1.9717620608996578e-05,
      "loss": 0.8047,
      "step": 552180
    },
    {
      "epoch": 1.9352882832679699,
      "grad_norm": 2.9375,
      "learning_rate": 1.9716971580332876e-05,
      "loss": 0.8645,
      "step": 552190
    },
    {
      "epoch": 1.9353233307748652,
      "grad_norm": 3.25,
      "learning_rate": 1.9716322551669174e-05,
      "loss": 0.7754,
      "step": 552200
    },
    {
      "epoch": 1.935358378281761,
      "grad_norm": 3.234375,
      "learning_rate": 1.971567352300547e-05,
      "loss": 0.8396,
      "step": 552210
    },
    {
      "epoch": 1.9353934257886565,
      "grad_norm": 2.59375,
      "learning_rate": 1.971502449434177e-05,
      "loss": 0.8402,
      "step": 552220
    },
    {
      "epoch": 1.935428473295552,
      "grad_norm": 3.0,
      "learning_rate": 1.9714375465678068e-05,
      "loss": 0.8076,
      "step": 552230
    },
    {
      "epoch": 1.9354635208024478,
      "grad_norm": 2.84375,
      "learning_rate": 1.9713726437014366e-05,
      "loss": 0.8384,
      "step": 552240
    },
    {
      "epoch": 1.9354985683093433,
      "grad_norm": 2.859375,
      "learning_rate": 1.9713077408350664e-05,
      "loss": 0.79,
      "step": 552250
    },
    {
      "epoch": 1.9355336158162388,
      "grad_norm": 2.96875,
      "learning_rate": 1.971242837968696e-05,
      "loss": 0.8088,
      "step": 552260
    },
    {
      "epoch": 1.9355686633231346,
      "grad_norm": 3.59375,
      "learning_rate": 1.971177935102326e-05,
      "loss": 0.8594,
      "step": 552270
    },
    {
      "epoch": 1.9356037108300301,
      "grad_norm": 2.890625,
      "learning_rate": 1.9711130322359558e-05,
      "loss": 0.8597,
      "step": 552280
    },
    {
      "epoch": 1.9356387583369257,
      "grad_norm": 2.359375,
      "learning_rate": 1.9710481293695856e-05,
      "loss": 0.8242,
      "step": 552290
    },
    {
      "epoch": 1.9356738058438214,
      "grad_norm": 3.125,
      "learning_rate": 1.9709832265032154e-05,
      "loss": 0.8524,
      "step": 552300
    },
    {
      "epoch": 1.9357088533507167,
      "grad_norm": 2.953125,
      "learning_rate": 1.970918323636845e-05,
      "loss": 0.7765,
      "step": 552310
    },
    {
      "epoch": 1.9357439008576125,
      "grad_norm": 2.84375,
      "learning_rate": 1.9708534207704753e-05,
      "loss": 0.8215,
      "step": 552320
    },
    {
      "epoch": 1.9357789483645083,
      "grad_norm": 3.109375,
      "learning_rate": 1.9707885179041048e-05,
      "loss": 0.7792,
      "step": 552330
    },
    {
      "epoch": 1.9358139958714036,
      "grad_norm": 2.875,
      "learning_rate": 1.9707236150377346e-05,
      "loss": 0.812,
      "step": 552340
    },
    {
      "epoch": 1.9358490433782993,
      "grad_norm": 3.140625,
      "learning_rate": 1.9706587121713644e-05,
      "loss": 0.8114,
      "step": 552350
    },
    {
      "epoch": 1.9358840908851949,
      "grad_norm": 2.53125,
      "learning_rate": 1.970593809304994e-05,
      "loss": 0.7984,
      "step": 552360
    },
    {
      "epoch": 1.9359191383920904,
      "grad_norm": 3.078125,
      "learning_rate": 1.970528906438624e-05,
      "loss": 0.9078,
      "step": 552370
    },
    {
      "epoch": 1.9359541858989862,
      "grad_norm": 3.015625,
      "learning_rate": 1.9704640035722538e-05,
      "loss": 0.863,
      "step": 552380
    },
    {
      "epoch": 1.9359892334058817,
      "grad_norm": 3.1875,
      "learning_rate": 1.9703991007058836e-05,
      "loss": 0.8796,
      "step": 552390
    },
    {
      "epoch": 1.9360242809127772,
      "grad_norm": 2.984375,
      "learning_rate": 1.9703341978395134e-05,
      "loss": 0.8218,
      "step": 552400
    },
    {
      "epoch": 1.936059328419673,
      "grad_norm": 2.921875,
      "learning_rate": 1.970269294973143e-05,
      "loss": 0.8441,
      "step": 552410
    },
    {
      "epoch": 1.9360943759265683,
      "grad_norm": 3.15625,
      "learning_rate": 1.970204392106773e-05,
      "loss": 0.8841,
      "step": 552420
    },
    {
      "epoch": 1.936129423433464,
      "grad_norm": 2.90625,
      "learning_rate": 1.9701394892404028e-05,
      "loss": 0.882,
      "step": 552430
    },
    {
      "epoch": 1.9361644709403598,
      "grad_norm": 2.5625,
      "learning_rate": 1.970074586374033e-05,
      "loss": 0.734,
      "step": 552440
    },
    {
      "epoch": 1.9361995184472551,
      "grad_norm": 3.140625,
      "learning_rate": 1.9700096835076627e-05,
      "loss": 0.8506,
      "step": 552450
    },
    {
      "epoch": 1.9362345659541509,
      "grad_norm": 2.921875,
      "learning_rate": 1.9699447806412925e-05,
      "loss": 0.7412,
      "step": 552460
    },
    {
      "epoch": 1.9362696134610464,
      "grad_norm": 2.78125,
      "learning_rate": 1.9698798777749223e-05,
      "loss": 0.7697,
      "step": 552470
    },
    {
      "epoch": 1.936304660967942,
      "grad_norm": 2.953125,
      "learning_rate": 1.969814974908552e-05,
      "loss": 0.8704,
      "step": 552480
    },
    {
      "epoch": 1.9363397084748377,
      "grad_norm": 3.125,
      "learning_rate": 1.969750072042182e-05,
      "loss": 0.7703,
      "step": 552490
    },
    {
      "epoch": 1.9363747559817333,
      "grad_norm": 2.75,
      "learning_rate": 1.9696851691758117e-05,
      "loss": 0.8206,
      "step": 552500
    },
    {
      "epoch": 1.9364098034886288,
      "grad_norm": 2.640625,
      "learning_rate": 1.9696202663094415e-05,
      "loss": 0.7779,
      "step": 552510
    },
    {
      "epoch": 1.9364448509955245,
      "grad_norm": 2.890625,
      "learning_rate": 1.969555363443071e-05,
      "loss": 0.8743,
      "step": 552520
    },
    {
      "epoch": 1.93647989850242,
      "grad_norm": 3.15625,
      "learning_rate": 1.9694904605767008e-05,
      "loss": 0.8307,
      "step": 552530
    },
    {
      "epoch": 1.9365149460093156,
      "grad_norm": 2.046875,
      "learning_rate": 1.9694255577103306e-05,
      "loss": 0.7446,
      "step": 552540
    },
    {
      "epoch": 1.9365499935162114,
      "grad_norm": 2.609375,
      "learning_rate": 1.9693606548439607e-05,
      "loss": 0.8494,
      "step": 552550
    },
    {
      "epoch": 1.9365850410231067,
      "grad_norm": 3.15625,
      "learning_rate": 1.9692957519775905e-05,
      "loss": 0.7368,
      "step": 552560
    },
    {
      "epoch": 1.9366200885300024,
      "grad_norm": 2.640625,
      "learning_rate": 1.9692308491112203e-05,
      "loss": 0.8936,
      "step": 552570
    },
    {
      "epoch": 1.936655136036898,
      "grad_norm": 2.84375,
      "learning_rate": 1.96916594624485e-05,
      "loss": 0.8933,
      "step": 552580
    },
    {
      "epoch": 1.9366901835437935,
      "grad_norm": 2.953125,
      "learning_rate": 1.96910104337848e-05,
      "loss": 0.7922,
      "step": 552590
    },
    {
      "epoch": 1.9367252310506893,
      "grad_norm": 2.859375,
      "learning_rate": 1.9690361405121097e-05,
      "loss": 0.7183,
      "step": 552600
    },
    {
      "epoch": 1.9367602785575848,
      "grad_norm": 3.21875,
      "learning_rate": 1.9689712376457395e-05,
      "loss": 0.7714,
      "step": 552610
    },
    {
      "epoch": 1.9367953260644803,
      "grad_norm": 2.515625,
      "learning_rate": 1.9689063347793693e-05,
      "loss": 0.8149,
      "step": 552620
    },
    {
      "epoch": 1.936830373571376,
      "grad_norm": 2.359375,
      "learning_rate": 1.968841431912999e-05,
      "loss": 0.8428,
      "step": 552630
    },
    {
      "epoch": 1.9368654210782716,
      "grad_norm": 2.8125,
      "learning_rate": 1.968776529046629e-05,
      "loss": 0.8023,
      "step": 552640
    },
    {
      "epoch": 1.9369004685851672,
      "grad_norm": 2.75,
      "learning_rate": 1.9687116261802587e-05,
      "loss": 0.8658,
      "step": 552650
    },
    {
      "epoch": 1.936935516092063,
      "grad_norm": 2.9375,
      "learning_rate": 1.9686467233138885e-05,
      "loss": 0.8843,
      "step": 552660
    },
    {
      "epoch": 1.9369705635989583,
      "grad_norm": 3.3125,
      "learning_rate": 1.9685818204475183e-05,
      "loss": 0.9503,
      "step": 552670
    },
    {
      "epoch": 1.937005611105854,
      "grad_norm": 3.1875,
      "learning_rate": 1.968516917581148e-05,
      "loss": 0.8992,
      "step": 552680
    },
    {
      "epoch": 1.9370406586127495,
      "grad_norm": 2.8125,
      "learning_rate": 1.9684520147147782e-05,
      "loss": 0.7687,
      "step": 552690
    },
    {
      "epoch": 1.937075706119645,
      "grad_norm": 3.171875,
      "learning_rate": 1.968387111848408e-05,
      "loss": 0.857,
      "step": 552700
    },
    {
      "epoch": 1.9371107536265408,
      "grad_norm": 2.84375,
      "learning_rate": 1.9683222089820375e-05,
      "loss": 0.7982,
      "step": 552710
    },
    {
      "epoch": 1.9371458011334364,
      "grad_norm": 2.953125,
      "learning_rate": 1.9682573061156673e-05,
      "loss": 0.8122,
      "step": 552720
    },
    {
      "epoch": 1.937180848640332,
      "grad_norm": 2.734375,
      "learning_rate": 1.968192403249297e-05,
      "loss": 0.8467,
      "step": 552730
    },
    {
      "epoch": 1.9372158961472277,
      "grad_norm": 2.609375,
      "learning_rate": 1.968127500382927e-05,
      "loss": 0.7544,
      "step": 552740
    },
    {
      "epoch": 1.9372509436541232,
      "grad_norm": 2.4375,
      "learning_rate": 1.9680625975165567e-05,
      "loss": 0.7402,
      "step": 552750
    },
    {
      "epoch": 1.9372859911610187,
      "grad_norm": 2.796875,
      "learning_rate": 1.9679976946501865e-05,
      "loss": 0.8555,
      "step": 552760
    },
    {
      "epoch": 1.9373210386679145,
      "grad_norm": 2.765625,
      "learning_rate": 1.9679327917838163e-05,
      "loss": 0.9106,
      "step": 552770
    },
    {
      "epoch": 1.9373560861748098,
      "grad_norm": 2.90625,
      "learning_rate": 1.967867888917446e-05,
      "loss": 0.8547,
      "step": 552780
    },
    {
      "epoch": 1.9373911336817056,
      "grad_norm": 3.078125,
      "learning_rate": 1.967802986051076e-05,
      "loss": 0.817,
      "step": 552790
    },
    {
      "epoch": 1.937426181188601,
      "grad_norm": 2.875,
      "learning_rate": 1.967738083184706e-05,
      "loss": 0.7472,
      "step": 552800
    },
    {
      "epoch": 1.9374612286954966,
      "grad_norm": 2.3125,
      "learning_rate": 1.967673180318336e-05,
      "loss": 0.7961,
      "step": 552810
    },
    {
      "epoch": 1.9374962762023924,
      "grad_norm": 3.15625,
      "learning_rate": 1.9676082774519656e-05,
      "loss": 0.7608,
      "step": 552820
    },
    {
      "epoch": 1.937531323709288,
      "grad_norm": 2.953125,
      "learning_rate": 1.9675433745855954e-05,
      "loss": 0.7002,
      "step": 552830
    },
    {
      "epoch": 1.9375663712161835,
      "grad_norm": 2.78125,
      "learning_rate": 1.9674784717192252e-05,
      "loss": 0.8366,
      "step": 552840
    },
    {
      "epoch": 1.9376014187230792,
      "grad_norm": 2.84375,
      "learning_rate": 1.967413568852855e-05,
      "loss": 0.8443,
      "step": 552850
    },
    {
      "epoch": 1.9376364662299748,
      "grad_norm": 2.953125,
      "learning_rate": 1.967348665986485e-05,
      "loss": 0.8278,
      "step": 552860
    },
    {
      "epoch": 1.9376715137368703,
      "grad_norm": 2.6875,
      "learning_rate": 1.9672837631201146e-05,
      "loss": 0.7839,
      "step": 552870
    },
    {
      "epoch": 1.937706561243766,
      "grad_norm": 2.984375,
      "learning_rate": 1.9672188602537444e-05,
      "loss": 0.8591,
      "step": 552880
    },
    {
      "epoch": 1.9377416087506614,
      "grad_norm": 2.984375,
      "learning_rate": 1.967153957387374e-05,
      "loss": 0.9269,
      "step": 552890
    },
    {
      "epoch": 1.9377766562575571,
      "grad_norm": 3.015625,
      "learning_rate": 1.9670890545210037e-05,
      "loss": 0.9279,
      "step": 552900
    },
    {
      "epoch": 1.9378117037644527,
      "grad_norm": 3.140625,
      "learning_rate": 1.9670241516546335e-05,
      "loss": 0.905,
      "step": 552910
    },
    {
      "epoch": 1.9378467512713482,
      "grad_norm": 3.234375,
      "learning_rate": 1.9669592487882636e-05,
      "loss": 0.8385,
      "step": 552920
    },
    {
      "epoch": 1.937881798778244,
      "grad_norm": 2.453125,
      "learning_rate": 1.9668943459218934e-05,
      "loss": 0.8112,
      "step": 552930
    },
    {
      "epoch": 1.9379168462851395,
      "grad_norm": 2.78125,
      "learning_rate": 1.9668294430555232e-05,
      "loss": 0.7895,
      "step": 552940
    },
    {
      "epoch": 1.937951893792035,
      "grad_norm": 2.984375,
      "learning_rate": 1.966764540189153e-05,
      "loss": 0.8459,
      "step": 552950
    },
    {
      "epoch": 1.9379869412989308,
      "grad_norm": 2.515625,
      "learning_rate": 1.966699637322783e-05,
      "loss": 0.7365,
      "step": 552960
    },
    {
      "epoch": 1.9380219888058263,
      "grad_norm": 2.46875,
      "learning_rate": 1.9666347344564126e-05,
      "loss": 0.8107,
      "step": 552970
    },
    {
      "epoch": 1.9380570363127219,
      "grad_norm": 2.765625,
      "learning_rate": 1.9665698315900424e-05,
      "loss": 0.8359,
      "step": 552980
    },
    {
      "epoch": 1.9380920838196176,
      "grad_norm": 2.90625,
      "learning_rate": 1.9665049287236722e-05,
      "loss": 0.7858,
      "step": 552990
    },
    {
      "epoch": 1.938127131326513,
      "grad_norm": 3.265625,
      "learning_rate": 1.966440025857302e-05,
      "loss": 0.8485,
      "step": 553000
    },
    {
      "epoch": 1.9381621788334087,
      "grad_norm": 2.734375,
      "learning_rate": 1.966375122990932e-05,
      "loss": 0.831,
      "step": 553010
    },
    {
      "epoch": 1.9381972263403044,
      "grad_norm": 3.609375,
      "learning_rate": 1.9663102201245616e-05,
      "loss": 0.8817,
      "step": 553020
    },
    {
      "epoch": 1.9382322738471998,
      "grad_norm": 2.90625,
      "learning_rate": 1.9662453172581914e-05,
      "loss": 0.8399,
      "step": 553030
    },
    {
      "epoch": 1.9382673213540955,
      "grad_norm": 3.046875,
      "learning_rate": 1.9661804143918212e-05,
      "loss": 0.8605,
      "step": 553040
    },
    {
      "epoch": 1.938302368860991,
      "grad_norm": 3.40625,
      "learning_rate": 1.966115511525451e-05,
      "loss": 0.9231,
      "step": 553050
    },
    {
      "epoch": 1.9383374163678866,
      "grad_norm": 3.078125,
      "learning_rate": 1.9660506086590812e-05,
      "loss": 0.8374,
      "step": 553060
    },
    {
      "epoch": 1.9383724638747823,
      "grad_norm": 3.0,
      "learning_rate": 1.965985705792711e-05,
      "loss": 0.8671,
      "step": 553070
    },
    {
      "epoch": 1.9384075113816779,
      "grad_norm": 2.546875,
      "learning_rate": 1.9659208029263404e-05,
      "loss": 0.7906,
      "step": 553080
    },
    {
      "epoch": 1.9384425588885734,
      "grad_norm": 3.046875,
      "learning_rate": 1.9658559000599702e-05,
      "loss": 0.8562,
      "step": 553090
    },
    {
      "epoch": 1.9384776063954692,
      "grad_norm": 2.421875,
      "learning_rate": 1.9657909971936e-05,
      "loss": 0.7785,
      "step": 553100
    },
    {
      "epoch": 1.9385126539023645,
      "grad_norm": 2.8125,
      "learning_rate": 1.96572609432723e-05,
      "loss": 0.8144,
      "step": 553110
    },
    {
      "epoch": 1.9385477014092602,
      "grad_norm": 2.390625,
      "learning_rate": 1.9656611914608596e-05,
      "loss": 0.827,
      "step": 553120
    },
    {
      "epoch": 1.938582748916156,
      "grad_norm": 2.78125,
      "learning_rate": 1.9655962885944894e-05,
      "loss": 0.8319,
      "step": 553130
    },
    {
      "epoch": 1.9386177964230513,
      "grad_norm": 2.859375,
      "learning_rate": 1.9655313857281192e-05,
      "loss": 0.8424,
      "step": 553140
    },
    {
      "epoch": 1.938652843929947,
      "grad_norm": 2.453125,
      "learning_rate": 1.965466482861749e-05,
      "loss": 0.7543,
      "step": 553150
    },
    {
      "epoch": 1.9386878914368426,
      "grad_norm": 2.90625,
      "learning_rate": 1.965401579995379e-05,
      "loss": 0.8717,
      "step": 553160
    },
    {
      "epoch": 1.9387229389437381,
      "grad_norm": 2.703125,
      "learning_rate": 1.965336677129009e-05,
      "loss": 0.8262,
      "step": 553170
    },
    {
      "epoch": 1.938757986450634,
      "grad_norm": 3.078125,
      "learning_rate": 1.9652717742626388e-05,
      "loss": 0.8347,
      "step": 553180
    },
    {
      "epoch": 1.9387930339575294,
      "grad_norm": 2.8125,
      "learning_rate": 1.9652068713962686e-05,
      "loss": 0.8564,
      "step": 553190
    },
    {
      "epoch": 1.938828081464425,
      "grad_norm": 2.875,
      "learning_rate": 1.9651419685298984e-05,
      "loss": 0.8335,
      "step": 553200
    },
    {
      "epoch": 1.9388631289713207,
      "grad_norm": 2.84375,
      "learning_rate": 1.9650770656635282e-05,
      "loss": 0.8384,
      "step": 553210
    },
    {
      "epoch": 1.9388981764782163,
      "grad_norm": 2.875,
      "learning_rate": 1.965012162797158e-05,
      "loss": 0.8085,
      "step": 553220
    },
    {
      "epoch": 1.9389332239851118,
      "grad_norm": 2.890625,
      "learning_rate": 1.9649472599307878e-05,
      "loss": 0.8851,
      "step": 553230
    },
    {
      "epoch": 1.9389682714920076,
      "grad_norm": 2.234375,
      "learning_rate": 1.9648823570644176e-05,
      "loss": 0.7331,
      "step": 553240
    },
    {
      "epoch": 1.9390033189989029,
      "grad_norm": 2.78125,
      "learning_rate": 1.9648174541980474e-05,
      "loss": 0.7354,
      "step": 553250
    },
    {
      "epoch": 1.9390383665057986,
      "grad_norm": 2.921875,
      "learning_rate": 1.9647525513316772e-05,
      "loss": 0.8054,
      "step": 553260
    },
    {
      "epoch": 1.9390734140126942,
      "grad_norm": 3.5625,
      "learning_rate": 1.9646876484653066e-05,
      "loss": 0.779,
      "step": 553270
    },
    {
      "epoch": 1.9391084615195897,
      "grad_norm": 2.859375,
      "learning_rate": 1.9646227455989368e-05,
      "loss": 0.7969,
      "step": 553280
    },
    {
      "epoch": 1.9391435090264855,
      "grad_norm": 2.953125,
      "learning_rate": 1.9645578427325666e-05,
      "loss": 0.7889,
      "step": 553290
    },
    {
      "epoch": 1.939178556533381,
      "grad_norm": 2.71875,
      "learning_rate": 1.9644929398661964e-05,
      "loss": 0.8053,
      "step": 553300
    },
    {
      "epoch": 1.9392136040402765,
      "grad_norm": 2.578125,
      "learning_rate": 1.9644280369998262e-05,
      "loss": 0.8344,
      "step": 553310
    },
    {
      "epoch": 1.9392486515471723,
      "grad_norm": 3.1875,
      "learning_rate": 1.964363134133456e-05,
      "loss": 0.8372,
      "step": 553320
    },
    {
      "epoch": 1.9392836990540678,
      "grad_norm": 3.015625,
      "learning_rate": 1.9642982312670858e-05,
      "loss": 0.7516,
      "step": 553330
    },
    {
      "epoch": 1.9393187465609634,
      "grad_norm": 2.453125,
      "learning_rate": 1.9642333284007156e-05,
      "loss": 0.8065,
      "step": 553340
    },
    {
      "epoch": 1.9393537940678591,
      "grad_norm": 2.90625,
      "learning_rate": 1.9641684255343454e-05,
      "loss": 0.8142,
      "step": 553350
    },
    {
      "epoch": 1.9393888415747544,
      "grad_norm": 3.0625,
      "learning_rate": 1.9641035226679752e-05,
      "loss": 0.8647,
      "step": 553360
    },
    {
      "epoch": 1.9394238890816502,
      "grad_norm": 2.953125,
      "learning_rate": 1.964038619801605e-05,
      "loss": 0.766,
      "step": 553370
    },
    {
      "epoch": 1.9394589365885457,
      "grad_norm": 2.671875,
      "learning_rate": 1.9639737169352348e-05,
      "loss": 0.843,
      "step": 553380
    },
    {
      "epoch": 1.9394939840954413,
      "grad_norm": 3.296875,
      "learning_rate": 1.9639088140688646e-05,
      "loss": 0.8369,
      "step": 553390
    },
    {
      "epoch": 1.939529031602337,
      "grad_norm": 2.796875,
      "learning_rate": 1.9638439112024944e-05,
      "loss": 0.8097,
      "step": 553400
    },
    {
      "epoch": 1.9395640791092326,
      "grad_norm": 3.15625,
      "learning_rate": 1.9637790083361242e-05,
      "loss": 0.8325,
      "step": 553410
    },
    {
      "epoch": 1.939599126616128,
      "grad_norm": 3.03125,
      "learning_rate": 1.9637141054697543e-05,
      "loss": 0.9038,
      "step": 553420
    },
    {
      "epoch": 1.9396341741230239,
      "grad_norm": 2.34375,
      "learning_rate": 1.963649202603384e-05,
      "loss": 0.72,
      "step": 553430
    },
    {
      "epoch": 1.9396692216299194,
      "grad_norm": 2.796875,
      "learning_rate": 1.963584299737014e-05,
      "loss": 0.7794,
      "step": 553440
    },
    {
      "epoch": 1.939704269136815,
      "grad_norm": 2.390625,
      "learning_rate": 1.9635193968706437e-05,
      "loss": 0.8417,
      "step": 553450
    },
    {
      "epoch": 1.9397393166437107,
      "grad_norm": 2.71875,
      "learning_rate": 1.9634544940042732e-05,
      "loss": 0.9266,
      "step": 553460
    },
    {
      "epoch": 1.939774364150606,
      "grad_norm": 2.734375,
      "learning_rate": 1.963389591137903e-05,
      "loss": 0.7688,
      "step": 553470
    },
    {
      "epoch": 1.9398094116575018,
      "grad_norm": 2.921875,
      "learning_rate": 1.9633246882715328e-05,
      "loss": 0.8234,
      "step": 553480
    },
    {
      "epoch": 1.9398444591643973,
      "grad_norm": 2.484375,
      "learning_rate": 1.9632597854051626e-05,
      "loss": 0.7909,
      "step": 553490
    },
    {
      "epoch": 1.9398795066712928,
      "grad_norm": 3.421875,
      "learning_rate": 1.9631948825387924e-05,
      "loss": 0.818,
      "step": 553500
    },
    {
      "epoch": 1.9399145541781886,
      "grad_norm": 2.734375,
      "learning_rate": 1.9631299796724222e-05,
      "loss": 0.8125,
      "step": 553510
    },
    {
      "epoch": 1.9399496016850841,
      "grad_norm": 3.015625,
      "learning_rate": 1.963065076806052e-05,
      "loss": 0.8884,
      "step": 553520
    },
    {
      "epoch": 1.9399846491919797,
      "grad_norm": 2.96875,
      "learning_rate": 1.9630001739396818e-05,
      "loss": 0.8011,
      "step": 553530
    },
    {
      "epoch": 1.9400196966988754,
      "grad_norm": 2.78125,
      "learning_rate": 1.962935271073312e-05,
      "loss": 0.8752,
      "step": 553540
    },
    {
      "epoch": 1.940054744205771,
      "grad_norm": 2.578125,
      "learning_rate": 1.9628703682069417e-05,
      "loss": 0.8017,
      "step": 553550
    },
    {
      "epoch": 1.9400897917126665,
      "grad_norm": 2.65625,
      "learning_rate": 1.9628054653405715e-05,
      "loss": 0.8754,
      "step": 553560
    },
    {
      "epoch": 1.9401248392195622,
      "grad_norm": 2.625,
      "learning_rate": 1.9627405624742013e-05,
      "loss": 0.8439,
      "step": 553570
    },
    {
      "epoch": 1.9401598867264576,
      "grad_norm": 3.0625,
      "learning_rate": 1.962675659607831e-05,
      "loss": 0.795,
      "step": 553580
    },
    {
      "epoch": 1.9401949342333533,
      "grad_norm": 2.859375,
      "learning_rate": 1.962610756741461e-05,
      "loss": 0.9051,
      "step": 553590
    },
    {
      "epoch": 1.9402299817402489,
      "grad_norm": 3.0,
      "learning_rate": 1.9625458538750907e-05,
      "loss": 0.8538,
      "step": 553600
    },
    {
      "epoch": 1.9402650292471444,
      "grad_norm": 2.75,
      "learning_rate": 1.9624809510087205e-05,
      "loss": 0.886,
      "step": 553610
    },
    {
      "epoch": 1.9403000767540401,
      "grad_norm": 3.0,
      "learning_rate": 1.9624160481423503e-05,
      "loss": 0.8342,
      "step": 553620
    },
    {
      "epoch": 1.9403351242609357,
      "grad_norm": 2.71875,
      "learning_rate": 1.96235114527598e-05,
      "loss": 0.7266,
      "step": 553630
    },
    {
      "epoch": 1.9403701717678312,
      "grad_norm": 2.734375,
      "learning_rate": 1.96228624240961e-05,
      "loss": 0.8014,
      "step": 553640
    },
    {
      "epoch": 1.940405219274727,
      "grad_norm": 2.65625,
      "learning_rate": 1.9622213395432397e-05,
      "loss": 0.7997,
      "step": 553650
    },
    {
      "epoch": 1.9404402667816225,
      "grad_norm": 3.109375,
      "learning_rate": 1.9621564366768695e-05,
      "loss": 0.7994,
      "step": 553660
    },
    {
      "epoch": 1.940475314288518,
      "grad_norm": 2.671875,
      "learning_rate": 1.9620915338104993e-05,
      "loss": 0.7412,
      "step": 553670
    },
    {
      "epoch": 1.9405103617954138,
      "grad_norm": 3.171875,
      "learning_rate": 1.962026630944129e-05,
      "loss": 0.7776,
      "step": 553680
    },
    {
      "epoch": 1.9405454093023091,
      "grad_norm": 3.15625,
      "learning_rate": 1.961961728077759e-05,
      "loss": 0.8286,
      "step": 553690
    },
    {
      "epoch": 1.9405804568092049,
      "grad_norm": 2.53125,
      "learning_rate": 1.9618968252113887e-05,
      "loss": 0.8792,
      "step": 553700
    },
    {
      "epoch": 1.9406155043161006,
      "grad_norm": 2.859375,
      "learning_rate": 1.9618319223450185e-05,
      "loss": 0.8181,
      "step": 553710
    },
    {
      "epoch": 1.940650551822996,
      "grad_norm": 2.875,
      "learning_rate": 1.9617670194786483e-05,
      "loss": 0.8319,
      "step": 553720
    },
    {
      "epoch": 1.9406855993298917,
      "grad_norm": 3.78125,
      "learning_rate": 1.961702116612278e-05,
      "loss": 0.8058,
      "step": 553730
    },
    {
      "epoch": 1.9407206468367872,
      "grad_norm": 2.59375,
      "learning_rate": 1.961637213745908e-05,
      "loss": 0.8495,
      "step": 553740
    },
    {
      "epoch": 1.9407556943436828,
      "grad_norm": 2.859375,
      "learning_rate": 1.9615723108795377e-05,
      "loss": 0.7824,
      "step": 553750
    },
    {
      "epoch": 1.9407907418505785,
      "grad_norm": 3.03125,
      "learning_rate": 1.9615074080131675e-05,
      "loss": 0.8042,
      "step": 553760
    },
    {
      "epoch": 1.940825789357474,
      "grad_norm": 2.890625,
      "learning_rate": 1.9614425051467973e-05,
      "loss": 0.7543,
      "step": 553770
    },
    {
      "epoch": 1.9408608368643696,
      "grad_norm": 2.984375,
      "learning_rate": 1.961377602280427e-05,
      "loss": 0.7838,
      "step": 553780
    },
    {
      "epoch": 1.9408958843712654,
      "grad_norm": 3.203125,
      "learning_rate": 1.9613126994140573e-05,
      "loss": 0.8587,
      "step": 553790
    },
    {
      "epoch": 1.9409309318781607,
      "grad_norm": 3.21875,
      "learning_rate": 1.961247796547687e-05,
      "loss": 0.894,
      "step": 553800
    },
    {
      "epoch": 1.9409659793850564,
      "grad_norm": 3.078125,
      "learning_rate": 1.961182893681317e-05,
      "loss": 0.871,
      "step": 553810
    },
    {
      "epoch": 1.9410010268919522,
      "grad_norm": 2.28125,
      "learning_rate": 1.9611179908149467e-05,
      "loss": 0.8198,
      "step": 553820
    },
    {
      "epoch": 1.9410360743988475,
      "grad_norm": 2.828125,
      "learning_rate": 1.961053087948576e-05,
      "loss": 0.8434,
      "step": 553830
    },
    {
      "epoch": 1.9410711219057433,
      "grad_norm": 2.90625,
      "learning_rate": 1.960988185082206e-05,
      "loss": 0.8631,
      "step": 553840
    },
    {
      "epoch": 1.9411061694126388,
      "grad_norm": 2.640625,
      "learning_rate": 1.9609232822158357e-05,
      "loss": 0.7767,
      "step": 553850
    },
    {
      "epoch": 1.9411412169195343,
      "grad_norm": 2.8125,
      "learning_rate": 1.9608583793494655e-05,
      "loss": 0.8866,
      "step": 553860
    },
    {
      "epoch": 1.94117626442643,
      "grad_norm": 2.671875,
      "learning_rate": 1.9607934764830953e-05,
      "loss": 0.8277,
      "step": 553870
    },
    {
      "epoch": 1.9412113119333256,
      "grad_norm": 2.453125,
      "learning_rate": 1.960728573616725e-05,
      "loss": 0.8254,
      "step": 553880
    },
    {
      "epoch": 1.9412463594402212,
      "grad_norm": 2.703125,
      "learning_rate": 1.960663670750355e-05,
      "loss": 0.8044,
      "step": 553890
    },
    {
      "epoch": 1.941281406947117,
      "grad_norm": 3.046875,
      "learning_rate": 1.960598767883985e-05,
      "loss": 0.8986,
      "step": 553900
    },
    {
      "epoch": 1.9413164544540125,
      "grad_norm": 3.171875,
      "learning_rate": 1.960533865017615e-05,
      "loss": 0.8714,
      "step": 553910
    },
    {
      "epoch": 1.941351501960908,
      "grad_norm": 2.984375,
      "learning_rate": 1.9604689621512447e-05,
      "loss": 0.7725,
      "step": 553920
    },
    {
      "epoch": 1.9413865494678038,
      "grad_norm": 2.578125,
      "learning_rate": 1.9604040592848745e-05,
      "loss": 0.8112,
      "step": 553930
    },
    {
      "epoch": 1.941421596974699,
      "grad_norm": 2.484375,
      "learning_rate": 1.9603391564185043e-05,
      "loss": 0.897,
      "step": 553940
    },
    {
      "epoch": 1.9414566444815948,
      "grad_norm": 2.6875,
      "learning_rate": 1.960274253552134e-05,
      "loss": 0.7942,
      "step": 553950
    },
    {
      "epoch": 1.9414916919884904,
      "grad_norm": 3.234375,
      "learning_rate": 1.960209350685764e-05,
      "loss": 0.9264,
      "step": 553960
    },
    {
      "epoch": 1.941526739495386,
      "grad_norm": 2.390625,
      "learning_rate": 1.9601444478193937e-05,
      "loss": 0.7673,
      "step": 553970
    },
    {
      "epoch": 1.9415617870022817,
      "grad_norm": 2.859375,
      "learning_rate": 1.9600795449530235e-05,
      "loss": 0.8411,
      "step": 553980
    },
    {
      "epoch": 1.9415968345091772,
      "grad_norm": 2.6875,
      "learning_rate": 1.9600146420866533e-05,
      "loss": 0.7859,
      "step": 553990
    },
    {
      "epoch": 1.9416318820160727,
      "grad_norm": 3.125,
      "learning_rate": 1.959949739220283e-05,
      "loss": 0.8533,
      "step": 554000
    },
    {
      "epoch": 1.9416669295229685,
      "grad_norm": 2.484375,
      "learning_rate": 1.959884836353913e-05,
      "loss": 0.7611,
      "step": 554010
    },
    {
      "epoch": 1.941701977029864,
      "grad_norm": 3.265625,
      "learning_rate": 1.9598199334875427e-05,
      "loss": 0.8967,
      "step": 554020
    },
    {
      "epoch": 1.9417370245367596,
      "grad_norm": 2.734375,
      "learning_rate": 1.9597550306211725e-05,
      "loss": 0.8519,
      "step": 554030
    },
    {
      "epoch": 1.9417720720436553,
      "grad_norm": 2.84375,
      "learning_rate": 1.9596901277548023e-05,
      "loss": 0.8664,
      "step": 554040
    },
    {
      "epoch": 1.9418071195505506,
      "grad_norm": 3.234375,
      "learning_rate": 1.959625224888432e-05,
      "loss": 0.8364,
      "step": 554050
    },
    {
      "epoch": 1.9418421670574464,
      "grad_norm": 2.953125,
      "learning_rate": 1.959560322022062e-05,
      "loss": 0.8226,
      "step": 554060
    },
    {
      "epoch": 1.941877214564342,
      "grad_norm": 3.109375,
      "learning_rate": 1.9594954191556916e-05,
      "loss": 0.7857,
      "step": 554070
    },
    {
      "epoch": 1.9419122620712375,
      "grad_norm": 2.640625,
      "learning_rate": 1.9594305162893214e-05,
      "loss": 0.81,
      "step": 554080
    },
    {
      "epoch": 1.9419473095781332,
      "grad_norm": 3.078125,
      "learning_rate": 1.9593656134229512e-05,
      "loss": 0.8093,
      "step": 554090
    },
    {
      "epoch": 1.9419823570850288,
      "grad_norm": 2.90625,
      "learning_rate": 1.959300710556581e-05,
      "loss": 0.8062,
      "step": 554100
    },
    {
      "epoch": 1.9420174045919243,
      "grad_norm": 2.734375,
      "learning_rate": 1.959235807690211e-05,
      "loss": 0.8062,
      "step": 554110
    },
    {
      "epoch": 1.94205245209882,
      "grad_norm": 2.78125,
      "learning_rate": 1.9591709048238406e-05,
      "loss": 0.8078,
      "step": 554120
    },
    {
      "epoch": 1.9420874996057156,
      "grad_norm": 2.671875,
      "learning_rate": 1.9591060019574704e-05,
      "loss": 0.8006,
      "step": 554130
    },
    {
      "epoch": 1.9421225471126111,
      "grad_norm": 2.859375,
      "learning_rate": 1.9590410990911002e-05,
      "loss": 0.8926,
      "step": 554140
    },
    {
      "epoch": 1.9421575946195069,
      "grad_norm": 2.90625,
      "learning_rate": 1.9589761962247304e-05,
      "loss": 0.7676,
      "step": 554150
    },
    {
      "epoch": 1.9421926421264022,
      "grad_norm": 2.796875,
      "learning_rate": 1.9589112933583602e-05,
      "loss": 0.8251,
      "step": 554160
    },
    {
      "epoch": 1.942227689633298,
      "grad_norm": 3.125,
      "learning_rate": 1.95884639049199e-05,
      "loss": 0.7918,
      "step": 554170
    },
    {
      "epoch": 1.9422627371401935,
      "grad_norm": 2.71875,
      "learning_rate": 1.9587814876256198e-05,
      "loss": 0.7872,
      "step": 554180
    },
    {
      "epoch": 1.942297784647089,
      "grad_norm": 2.65625,
      "learning_rate": 1.9587165847592496e-05,
      "loss": 0.7847,
      "step": 554190
    },
    {
      "epoch": 1.9423328321539848,
      "grad_norm": 2.5,
      "learning_rate": 1.9586516818928794e-05,
      "loss": 0.7492,
      "step": 554200
    },
    {
      "epoch": 1.9423678796608803,
      "grad_norm": 3.421875,
      "learning_rate": 1.958586779026509e-05,
      "loss": 0.7803,
      "step": 554210
    },
    {
      "epoch": 1.9424029271677759,
      "grad_norm": 2.78125,
      "learning_rate": 1.9585218761601386e-05,
      "loss": 0.8339,
      "step": 554220
    },
    {
      "epoch": 1.9424379746746716,
      "grad_norm": 3.0,
      "learning_rate": 1.9584569732937684e-05,
      "loss": 0.8626,
      "step": 554230
    },
    {
      "epoch": 1.9424730221815671,
      "grad_norm": 3.171875,
      "learning_rate": 1.9583920704273982e-05,
      "loss": 0.8433,
      "step": 554240
    },
    {
      "epoch": 1.9425080696884627,
      "grad_norm": 2.828125,
      "learning_rate": 1.958327167561028e-05,
      "loss": 0.7828,
      "step": 554250
    },
    {
      "epoch": 1.9425431171953584,
      "grad_norm": 4.03125,
      "learning_rate": 1.958262264694658e-05,
      "loss": 0.8742,
      "step": 554260
    },
    {
      "epoch": 1.9425781647022538,
      "grad_norm": 2.75,
      "learning_rate": 1.958197361828288e-05,
      "loss": 0.8355,
      "step": 554270
    },
    {
      "epoch": 1.9426132122091495,
      "grad_norm": 2.765625,
      "learning_rate": 1.9581324589619178e-05,
      "loss": 0.8222,
      "step": 554280
    },
    {
      "epoch": 1.942648259716045,
      "grad_norm": 2.71875,
      "learning_rate": 1.9580675560955476e-05,
      "loss": 0.8505,
      "step": 554290
    },
    {
      "epoch": 1.9426833072229406,
      "grad_norm": 2.4375,
      "learning_rate": 1.9580026532291774e-05,
      "loss": 0.7923,
      "step": 554300
    },
    {
      "epoch": 1.9427183547298363,
      "grad_norm": 2.828125,
      "learning_rate": 1.9579377503628072e-05,
      "loss": 0.7335,
      "step": 554310
    },
    {
      "epoch": 1.9427534022367319,
      "grad_norm": 3.03125,
      "learning_rate": 1.957872847496437e-05,
      "loss": 0.7805,
      "step": 554320
    },
    {
      "epoch": 1.9427884497436274,
      "grad_norm": 2.671875,
      "learning_rate": 1.9578079446300668e-05,
      "loss": 0.8426,
      "step": 554330
    },
    {
      "epoch": 1.9428234972505232,
      "grad_norm": 3.03125,
      "learning_rate": 1.9577430417636966e-05,
      "loss": 0.8338,
      "step": 554340
    },
    {
      "epoch": 1.9428585447574187,
      "grad_norm": 2.890625,
      "learning_rate": 1.9576781388973264e-05,
      "loss": 0.784,
      "step": 554350
    },
    {
      "epoch": 1.9428935922643142,
      "grad_norm": 2.921875,
      "learning_rate": 1.9576132360309562e-05,
      "loss": 0.8954,
      "step": 554360
    },
    {
      "epoch": 1.94292863977121,
      "grad_norm": 2.890625,
      "learning_rate": 1.957548333164586e-05,
      "loss": 0.7914,
      "step": 554370
    },
    {
      "epoch": 1.9429636872781053,
      "grad_norm": 2.75,
      "learning_rate": 1.9574834302982158e-05,
      "loss": 0.7987,
      "step": 554380
    },
    {
      "epoch": 1.942998734785001,
      "grad_norm": 2.84375,
      "learning_rate": 1.9574185274318456e-05,
      "loss": 0.8078,
      "step": 554390
    },
    {
      "epoch": 1.9430337822918968,
      "grad_norm": 2.703125,
      "learning_rate": 1.9573536245654754e-05,
      "loss": 0.8569,
      "step": 554400
    },
    {
      "epoch": 1.9430688297987921,
      "grad_norm": 3.03125,
      "learning_rate": 1.9572887216991052e-05,
      "loss": 0.8479,
      "step": 554410
    },
    {
      "epoch": 1.943103877305688,
      "grad_norm": 2.765625,
      "learning_rate": 1.957223818832735e-05,
      "loss": 0.7801,
      "step": 554420
    },
    {
      "epoch": 1.9431389248125834,
      "grad_norm": 3.640625,
      "learning_rate": 1.9571589159663648e-05,
      "loss": 0.8927,
      "step": 554430
    },
    {
      "epoch": 1.943173972319479,
      "grad_norm": 3.09375,
      "learning_rate": 1.9570940130999946e-05,
      "loss": 0.7559,
      "step": 554440
    },
    {
      "epoch": 1.9432090198263747,
      "grad_norm": 2.6875,
      "learning_rate": 1.9570291102336244e-05,
      "loss": 0.7446,
      "step": 554450
    },
    {
      "epoch": 1.9432440673332703,
      "grad_norm": 3.1875,
      "learning_rate": 1.9569642073672542e-05,
      "loss": 0.9072,
      "step": 554460
    },
    {
      "epoch": 1.9432791148401658,
      "grad_norm": 2.25,
      "learning_rate": 1.956899304500884e-05,
      "loss": 0.7751,
      "step": 554470
    },
    {
      "epoch": 1.9433141623470616,
      "grad_norm": 2.921875,
      "learning_rate": 1.9568344016345138e-05,
      "loss": 0.7531,
      "step": 554480
    },
    {
      "epoch": 1.943349209853957,
      "grad_norm": 3.15625,
      "learning_rate": 1.9567694987681436e-05,
      "loss": 0.8562,
      "step": 554490
    },
    {
      "epoch": 1.9433842573608526,
      "grad_norm": 3.109375,
      "learning_rate": 1.9567045959017734e-05,
      "loss": 0.853,
      "step": 554500
    },
    {
      "epoch": 1.9434193048677484,
      "grad_norm": 3.171875,
      "learning_rate": 1.9566396930354032e-05,
      "loss": 0.8174,
      "step": 554510
    },
    {
      "epoch": 1.9434543523746437,
      "grad_norm": 3.140625,
      "learning_rate": 1.9565747901690333e-05,
      "loss": 0.925,
      "step": 554520
    },
    {
      "epoch": 1.9434893998815395,
      "grad_norm": 2.59375,
      "learning_rate": 1.956509887302663e-05,
      "loss": 0.7632,
      "step": 554530
    },
    {
      "epoch": 1.943524447388435,
      "grad_norm": 3.0625,
      "learning_rate": 1.956444984436293e-05,
      "loss": 0.8893,
      "step": 554540
    },
    {
      "epoch": 1.9435594948953305,
      "grad_norm": 3.15625,
      "learning_rate": 1.9563800815699227e-05,
      "loss": 0.8829,
      "step": 554550
    },
    {
      "epoch": 1.9435945424022263,
      "grad_norm": 2.921875,
      "learning_rate": 1.9563151787035525e-05,
      "loss": 0.7757,
      "step": 554560
    },
    {
      "epoch": 1.9436295899091218,
      "grad_norm": 2.875,
      "learning_rate": 1.9562502758371823e-05,
      "loss": 0.7471,
      "step": 554570
    },
    {
      "epoch": 1.9436646374160174,
      "grad_norm": 2.515625,
      "learning_rate": 1.956185372970812e-05,
      "loss": 0.7641,
      "step": 554580
    },
    {
      "epoch": 1.9436996849229131,
      "grad_norm": 2.609375,
      "learning_rate": 1.9561204701044416e-05,
      "loss": 0.7538,
      "step": 554590
    },
    {
      "epoch": 1.9437347324298087,
      "grad_norm": 2.75,
      "learning_rate": 1.9560555672380714e-05,
      "loss": 0.8628,
      "step": 554600
    },
    {
      "epoch": 1.9437697799367042,
      "grad_norm": 3.21875,
      "learning_rate": 1.9559906643717012e-05,
      "loss": 0.9106,
      "step": 554610
    },
    {
      "epoch": 1.9438048274436,
      "grad_norm": 3.0,
      "learning_rate": 1.955925761505331e-05,
      "loss": 0.6974,
      "step": 554620
    },
    {
      "epoch": 1.9438398749504953,
      "grad_norm": 2.859375,
      "learning_rate": 1.955860858638961e-05,
      "loss": 0.8025,
      "step": 554630
    },
    {
      "epoch": 1.943874922457391,
      "grad_norm": 2.78125,
      "learning_rate": 1.955795955772591e-05,
      "loss": 0.8103,
      "step": 554640
    },
    {
      "epoch": 1.9439099699642866,
      "grad_norm": 2.734375,
      "learning_rate": 1.9557310529062207e-05,
      "loss": 0.8454,
      "step": 554650
    },
    {
      "epoch": 1.943945017471182,
      "grad_norm": 3.28125,
      "learning_rate": 1.9556661500398505e-05,
      "loss": 0.8232,
      "step": 554660
    },
    {
      "epoch": 1.9439800649780778,
      "grad_norm": 2.953125,
      "learning_rate": 1.9556012471734803e-05,
      "loss": 0.7525,
      "step": 554670
    },
    {
      "epoch": 1.9440151124849734,
      "grad_norm": 2.78125,
      "learning_rate": 1.95553634430711e-05,
      "loss": 0.7779,
      "step": 554680
    },
    {
      "epoch": 1.944050159991869,
      "grad_norm": 2.578125,
      "learning_rate": 1.95547144144074e-05,
      "loss": 0.7256,
      "step": 554690
    },
    {
      "epoch": 1.9440852074987647,
      "grad_norm": 3.1875,
      "learning_rate": 1.9554065385743697e-05,
      "loss": 0.8182,
      "step": 554700
    },
    {
      "epoch": 1.9441202550056602,
      "grad_norm": 2.765625,
      "learning_rate": 1.9553416357079995e-05,
      "loss": 0.7588,
      "step": 554710
    },
    {
      "epoch": 1.9441553025125557,
      "grad_norm": 3.25,
      "learning_rate": 1.9552767328416293e-05,
      "loss": 0.8728,
      "step": 554720
    },
    {
      "epoch": 1.9441903500194515,
      "grad_norm": 2.828125,
      "learning_rate": 1.955211829975259e-05,
      "loss": 0.8281,
      "step": 554730
    },
    {
      "epoch": 1.9442253975263468,
      "grad_norm": 2.75,
      "learning_rate": 1.955146927108889e-05,
      "loss": 0.7253,
      "step": 554740
    },
    {
      "epoch": 1.9442604450332426,
      "grad_norm": 2.953125,
      "learning_rate": 1.9550820242425187e-05,
      "loss": 0.7563,
      "step": 554750
    },
    {
      "epoch": 1.9442954925401381,
      "grad_norm": 3.0625,
      "learning_rate": 1.9550171213761485e-05,
      "loss": 0.8237,
      "step": 554760
    },
    {
      "epoch": 1.9443305400470337,
      "grad_norm": 3.46875,
      "learning_rate": 1.9549522185097783e-05,
      "loss": 0.854,
      "step": 554770
    },
    {
      "epoch": 1.9443655875539294,
      "grad_norm": 2.765625,
      "learning_rate": 1.954887315643408e-05,
      "loss": 0.7437,
      "step": 554780
    },
    {
      "epoch": 1.944400635060825,
      "grad_norm": 2.890625,
      "learning_rate": 1.954822412777038e-05,
      "loss": 0.8073,
      "step": 554790
    },
    {
      "epoch": 1.9444356825677205,
      "grad_norm": 3.453125,
      "learning_rate": 1.9547575099106677e-05,
      "loss": 0.8144,
      "step": 554800
    },
    {
      "epoch": 1.9444707300746162,
      "grad_norm": 3.203125,
      "learning_rate": 1.9546926070442975e-05,
      "loss": 0.8743,
      "step": 554810
    },
    {
      "epoch": 1.9445057775815118,
      "grad_norm": 2.828125,
      "learning_rate": 1.9546277041779273e-05,
      "loss": 0.7884,
      "step": 554820
    },
    {
      "epoch": 1.9445408250884073,
      "grad_norm": 2.65625,
      "learning_rate": 1.954562801311557e-05,
      "loss": 0.8576,
      "step": 554830
    },
    {
      "epoch": 1.944575872595303,
      "grad_norm": 3.140625,
      "learning_rate": 1.954497898445187e-05,
      "loss": 0.7648,
      "step": 554840
    },
    {
      "epoch": 1.9446109201021984,
      "grad_norm": 2.890625,
      "learning_rate": 1.9544329955788167e-05,
      "loss": 0.8203,
      "step": 554850
    },
    {
      "epoch": 1.9446459676090941,
      "grad_norm": 2.671875,
      "learning_rate": 1.9543680927124465e-05,
      "loss": 0.816,
      "step": 554860
    },
    {
      "epoch": 1.9446810151159897,
      "grad_norm": 2.75,
      "learning_rate": 1.9543031898460763e-05,
      "loss": 0.8153,
      "step": 554870
    },
    {
      "epoch": 1.9447160626228852,
      "grad_norm": 3.328125,
      "learning_rate": 1.954238286979706e-05,
      "loss": 0.8735,
      "step": 554880
    },
    {
      "epoch": 1.944751110129781,
      "grad_norm": 3.1875,
      "learning_rate": 1.9541733841133363e-05,
      "loss": 0.8751,
      "step": 554890
    },
    {
      "epoch": 1.9447861576366765,
      "grad_norm": 2.984375,
      "learning_rate": 1.954108481246966e-05,
      "loss": 0.8327,
      "step": 554900
    },
    {
      "epoch": 1.944821205143572,
      "grad_norm": 3.03125,
      "learning_rate": 1.954043578380596e-05,
      "loss": 0.8035,
      "step": 554910
    },
    {
      "epoch": 1.9448562526504678,
      "grad_norm": 2.8125,
      "learning_rate": 1.9539786755142257e-05,
      "loss": 0.7955,
      "step": 554920
    },
    {
      "epoch": 1.9448913001573633,
      "grad_norm": 2.640625,
      "learning_rate": 1.9539137726478555e-05,
      "loss": 0.8684,
      "step": 554930
    },
    {
      "epoch": 1.9449263476642589,
      "grad_norm": 3.078125,
      "learning_rate": 1.9538488697814853e-05,
      "loss": 0.8091,
      "step": 554940
    },
    {
      "epoch": 1.9449613951711546,
      "grad_norm": 2.46875,
      "learning_rate": 1.953783966915115e-05,
      "loss": 0.7587,
      "step": 554950
    },
    {
      "epoch": 1.94499644267805,
      "grad_norm": 3.09375,
      "learning_rate": 1.9537190640487445e-05,
      "loss": 0.7902,
      "step": 554960
    },
    {
      "epoch": 1.9450314901849457,
      "grad_norm": 2.671875,
      "learning_rate": 1.9536541611823743e-05,
      "loss": 0.8211,
      "step": 554970
    },
    {
      "epoch": 1.9450665376918415,
      "grad_norm": 2.875,
      "learning_rate": 1.953589258316004e-05,
      "loss": 0.961,
      "step": 554980
    },
    {
      "epoch": 1.9451015851987368,
      "grad_norm": 3.328125,
      "learning_rate": 1.953524355449634e-05,
      "loss": 0.8587,
      "step": 554990
    },
    {
      "epoch": 1.9451366327056325,
      "grad_norm": 3.078125,
      "learning_rate": 1.953459452583264e-05,
      "loss": 0.8062,
      "step": 555000
    },
    {
      "epoch": 1.9451366327056325,
      "eval_loss": 0.7724341154098511,
      "eval_runtime": 552.2874,
      "eval_samples_per_second": 688.837,
      "eval_steps_per_second": 57.403,
      "step": 555000
    },
    {
      "epoch": 1.945171680212528,
      "grad_norm": 3.046875,
      "learning_rate": 1.953394549716894e-05,
      "loss": 0.8345,
      "step": 555010
    },
    {
      "epoch": 1.9452067277194236,
      "grad_norm": 2.78125,
      "learning_rate": 1.9533296468505237e-05,
      "loss": 0.8706,
      "step": 555020
    },
    {
      "epoch": 1.9452417752263194,
      "grad_norm": 2.78125,
      "learning_rate": 1.9532647439841535e-05,
      "loss": 0.8432,
      "step": 555030
    },
    {
      "epoch": 1.945276822733215,
      "grad_norm": 2.828125,
      "learning_rate": 1.9531998411177833e-05,
      "loss": 0.7615,
      "step": 555040
    },
    {
      "epoch": 1.9453118702401104,
      "grad_norm": 2.71875,
      "learning_rate": 1.953134938251413e-05,
      "loss": 0.8104,
      "step": 555050
    },
    {
      "epoch": 1.9453469177470062,
      "grad_norm": 2.96875,
      "learning_rate": 1.953070035385043e-05,
      "loss": 0.9005,
      "step": 555060
    },
    {
      "epoch": 1.9453819652539015,
      "grad_norm": 3.046875,
      "learning_rate": 1.9530051325186727e-05,
      "loss": 0.8878,
      "step": 555070
    },
    {
      "epoch": 1.9454170127607973,
      "grad_norm": 3.03125,
      "learning_rate": 1.9529402296523025e-05,
      "loss": 0.784,
      "step": 555080
    },
    {
      "epoch": 1.945452060267693,
      "grad_norm": 3.0,
      "learning_rate": 1.9528753267859323e-05,
      "loss": 0.8129,
      "step": 555090
    },
    {
      "epoch": 1.9454871077745883,
      "grad_norm": 2.828125,
      "learning_rate": 1.952810423919562e-05,
      "loss": 0.8559,
      "step": 555100
    },
    {
      "epoch": 1.945522155281484,
      "grad_norm": 2.921875,
      "learning_rate": 1.952745521053192e-05,
      "loss": 0.8263,
      "step": 555110
    },
    {
      "epoch": 1.9455572027883796,
      "grad_norm": 2.734375,
      "learning_rate": 1.9526806181868217e-05,
      "loss": 0.8346,
      "step": 555120
    },
    {
      "epoch": 1.9455922502952752,
      "grad_norm": 2.515625,
      "learning_rate": 1.9526157153204515e-05,
      "loss": 0.7896,
      "step": 555130
    },
    {
      "epoch": 1.945627297802171,
      "grad_norm": 2.96875,
      "learning_rate": 1.9525508124540816e-05,
      "loss": 0.8046,
      "step": 555140
    },
    {
      "epoch": 1.9456623453090665,
      "grad_norm": 2.96875,
      "learning_rate": 1.952485909587711e-05,
      "loss": 0.8407,
      "step": 555150
    },
    {
      "epoch": 1.945697392815962,
      "grad_norm": 3.015625,
      "learning_rate": 1.952421006721341e-05,
      "loss": 0.8088,
      "step": 555160
    },
    {
      "epoch": 1.9457324403228577,
      "grad_norm": 3.03125,
      "learning_rate": 1.9523561038549707e-05,
      "loss": 0.8174,
      "step": 555170
    },
    {
      "epoch": 1.9457674878297533,
      "grad_norm": 3.28125,
      "learning_rate": 1.9522912009886005e-05,
      "loss": 0.8497,
      "step": 555180
    },
    {
      "epoch": 1.9458025353366488,
      "grad_norm": 2.484375,
      "learning_rate": 1.9522262981222303e-05,
      "loss": 0.7253,
      "step": 555190
    },
    {
      "epoch": 1.9458375828435446,
      "grad_norm": 2.921875,
      "learning_rate": 1.95216139525586e-05,
      "loss": 0.808,
      "step": 555200
    },
    {
      "epoch": 1.94587263035044,
      "grad_norm": 2.875,
      "learning_rate": 1.95209649238949e-05,
      "loss": 0.8092,
      "step": 555210
    },
    {
      "epoch": 1.9459076778573356,
      "grad_norm": 2.84375,
      "learning_rate": 1.9520315895231197e-05,
      "loss": 0.8199,
      "step": 555220
    },
    {
      "epoch": 1.9459427253642312,
      "grad_norm": 2.890625,
      "learning_rate": 1.9519666866567495e-05,
      "loss": 0.7127,
      "step": 555230
    },
    {
      "epoch": 1.9459777728711267,
      "grad_norm": 2.859375,
      "learning_rate": 1.9519017837903793e-05,
      "loss": 0.7492,
      "step": 555240
    },
    {
      "epoch": 1.9460128203780225,
      "grad_norm": 3.34375,
      "learning_rate": 1.9518368809240094e-05,
      "loss": 0.8414,
      "step": 555250
    },
    {
      "epoch": 1.946047867884918,
      "grad_norm": 3.140625,
      "learning_rate": 1.9517719780576392e-05,
      "loss": 0.8711,
      "step": 555260
    },
    {
      "epoch": 1.9460829153918136,
      "grad_norm": 3.125,
      "learning_rate": 1.951707075191269e-05,
      "loss": 0.7664,
      "step": 555270
    },
    {
      "epoch": 1.9461179628987093,
      "grad_norm": 3.015625,
      "learning_rate": 1.9516421723248988e-05,
      "loss": 0.8607,
      "step": 555280
    },
    {
      "epoch": 1.9461530104056048,
      "grad_norm": 3.0,
      "learning_rate": 1.9515772694585286e-05,
      "loss": 0.7928,
      "step": 555290
    },
    {
      "epoch": 1.9461880579125004,
      "grad_norm": 2.59375,
      "learning_rate": 1.9515123665921584e-05,
      "loss": 0.8268,
      "step": 555300
    },
    {
      "epoch": 1.9462231054193961,
      "grad_norm": 2.796875,
      "learning_rate": 1.9514474637257882e-05,
      "loss": 0.8351,
      "step": 555310
    },
    {
      "epoch": 1.9462581529262915,
      "grad_norm": 3.078125,
      "learning_rate": 1.951382560859418e-05,
      "loss": 0.754,
      "step": 555320
    },
    {
      "epoch": 1.9462932004331872,
      "grad_norm": 2.65625,
      "learning_rate": 1.9513176579930478e-05,
      "loss": 0.846,
      "step": 555330
    },
    {
      "epoch": 1.9463282479400827,
      "grad_norm": 3.421875,
      "learning_rate": 1.9512527551266773e-05,
      "loss": 0.8325,
      "step": 555340
    },
    {
      "epoch": 1.9463632954469783,
      "grad_norm": 2.78125,
      "learning_rate": 1.951187852260307e-05,
      "loss": 0.7814,
      "step": 555350
    },
    {
      "epoch": 1.946398342953874,
      "grad_norm": 2.71875,
      "learning_rate": 1.951122949393937e-05,
      "loss": 0.8811,
      "step": 555360
    },
    {
      "epoch": 1.9464333904607696,
      "grad_norm": 2.703125,
      "learning_rate": 1.951058046527567e-05,
      "loss": 0.8809,
      "step": 555370
    },
    {
      "epoch": 1.946468437967665,
      "grad_norm": 2.890625,
      "learning_rate": 1.9509931436611968e-05,
      "loss": 0.8452,
      "step": 555380
    },
    {
      "epoch": 1.9465034854745609,
      "grad_norm": 2.84375,
      "learning_rate": 1.9509282407948266e-05,
      "loss": 0.8493,
      "step": 555390
    },
    {
      "epoch": 1.9465385329814564,
      "grad_norm": 2.75,
      "learning_rate": 1.9508633379284564e-05,
      "loss": 0.7748,
      "step": 555400
    },
    {
      "epoch": 1.946573580488352,
      "grad_norm": 2.5625,
      "learning_rate": 1.9507984350620862e-05,
      "loss": 0.7499,
      "step": 555410
    },
    {
      "epoch": 1.9466086279952477,
      "grad_norm": 3.4375,
      "learning_rate": 1.950733532195716e-05,
      "loss": 0.8154,
      "step": 555420
    },
    {
      "epoch": 1.946643675502143,
      "grad_norm": 2.703125,
      "learning_rate": 1.9506686293293458e-05,
      "loss": 0.842,
      "step": 555430
    },
    {
      "epoch": 1.9466787230090388,
      "grad_norm": 3.484375,
      "learning_rate": 1.9506037264629756e-05,
      "loss": 0.7657,
      "step": 555440
    },
    {
      "epoch": 1.9467137705159343,
      "grad_norm": 2.796875,
      "learning_rate": 1.9505388235966054e-05,
      "loss": 0.7669,
      "step": 555450
    },
    {
      "epoch": 1.9467488180228298,
      "grad_norm": 2.671875,
      "learning_rate": 1.9504739207302352e-05,
      "loss": 0.8036,
      "step": 555460
    },
    {
      "epoch": 1.9467838655297256,
      "grad_norm": 2.953125,
      "learning_rate": 1.950409017863865e-05,
      "loss": 0.8735,
      "step": 555470
    },
    {
      "epoch": 1.9468189130366211,
      "grad_norm": 2.875,
      "learning_rate": 1.9503441149974948e-05,
      "loss": 0.7907,
      "step": 555480
    },
    {
      "epoch": 1.9468539605435167,
      "grad_norm": 3.109375,
      "learning_rate": 1.9502792121311246e-05,
      "loss": 0.8878,
      "step": 555490
    },
    {
      "epoch": 1.9468890080504124,
      "grad_norm": 2.90625,
      "learning_rate": 1.9502143092647544e-05,
      "loss": 0.8681,
      "step": 555500
    },
    {
      "epoch": 1.946924055557308,
      "grad_norm": 2.796875,
      "learning_rate": 1.9501494063983845e-05,
      "loss": 0.8684,
      "step": 555510
    },
    {
      "epoch": 1.9469591030642035,
      "grad_norm": 3.25,
      "learning_rate": 1.9500845035320143e-05,
      "loss": 0.8548,
      "step": 555520
    },
    {
      "epoch": 1.9469941505710993,
      "grad_norm": 2.984375,
      "learning_rate": 1.9500196006656438e-05,
      "loss": 0.8843,
      "step": 555530
    },
    {
      "epoch": 1.9470291980779946,
      "grad_norm": 2.921875,
      "learning_rate": 1.9499546977992736e-05,
      "loss": 0.7497,
      "step": 555540
    },
    {
      "epoch": 1.9470642455848903,
      "grad_norm": 2.765625,
      "learning_rate": 1.9498897949329034e-05,
      "loss": 0.8286,
      "step": 555550
    },
    {
      "epoch": 1.9470992930917859,
      "grad_norm": 2.84375,
      "learning_rate": 1.9498248920665332e-05,
      "loss": 0.8474,
      "step": 555560
    },
    {
      "epoch": 1.9471343405986814,
      "grad_norm": 2.6875,
      "learning_rate": 1.949759989200163e-05,
      "loss": 0.9493,
      "step": 555570
    },
    {
      "epoch": 1.9471693881055772,
      "grad_norm": 2.859375,
      "learning_rate": 1.9496950863337928e-05,
      "loss": 0.8117,
      "step": 555580
    },
    {
      "epoch": 1.9472044356124727,
      "grad_norm": 3.140625,
      "learning_rate": 1.9496301834674226e-05,
      "loss": 0.8456,
      "step": 555590
    },
    {
      "epoch": 1.9472394831193682,
      "grad_norm": 3.03125,
      "learning_rate": 1.9495652806010524e-05,
      "loss": 0.8774,
      "step": 555600
    },
    {
      "epoch": 1.947274530626264,
      "grad_norm": 2.59375,
      "learning_rate": 1.9495003777346822e-05,
      "loss": 0.8266,
      "step": 555610
    },
    {
      "epoch": 1.9473095781331595,
      "grad_norm": 3.15625,
      "learning_rate": 1.9494354748683123e-05,
      "loss": 0.8816,
      "step": 555620
    },
    {
      "epoch": 1.947344625640055,
      "grad_norm": 3.03125,
      "learning_rate": 1.949370572001942e-05,
      "loss": 0.8219,
      "step": 555630
    },
    {
      "epoch": 1.9473796731469508,
      "grad_norm": 2.8125,
      "learning_rate": 1.949305669135572e-05,
      "loss": 0.8385,
      "step": 555640
    },
    {
      "epoch": 1.9474147206538461,
      "grad_norm": 2.515625,
      "learning_rate": 1.9492407662692017e-05,
      "loss": 0.8915,
      "step": 555650
    },
    {
      "epoch": 1.947449768160742,
      "grad_norm": 2.640625,
      "learning_rate": 1.9491758634028315e-05,
      "loss": 0.8586,
      "step": 555660
    },
    {
      "epoch": 1.9474848156676376,
      "grad_norm": 2.765625,
      "learning_rate": 1.9491109605364613e-05,
      "loss": 0.8848,
      "step": 555670
    },
    {
      "epoch": 1.947519863174533,
      "grad_norm": 2.859375,
      "learning_rate": 1.949046057670091e-05,
      "loss": 0.8641,
      "step": 555680
    },
    {
      "epoch": 1.9475549106814287,
      "grad_norm": 3.03125,
      "learning_rate": 1.948981154803721e-05,
      "loss": 0.7761,
      "step": 555690
    },
    {
      "epoch": 1.9475899581883243,
      "grad_norm": 2.640625,
      "learning_rate": 1.9489162519373507e-05,
      "loss": 0.8328,
      "step": 555700
    },
    {
      "epoch": 1.9476250056952198,
      "grad_norm": 2.875,
      "learning_rate": 1.9488513490709802e-05,
      "loss": 0.848,
      "step": 555710
    },
    {
      "epoch": 1.9476600532021155,
      "grad_norm": 3.28125,
      "learning_rate": 1.94878644620461e-05,
      "loss": 0.8275,
      "step": 555720
    },
    {
      "epoch": 1.947695100709011,
      "grad_norm": 2.515625,
      "learning_rate": 1.94872154333824e-05,
      "loss": 0.7956,
      "step": 555730
    },
    {
      "epoch": 1.9477301482159066,
      "grad_norm": 2.5,
      "learning_rate": 1.94865664047187e-05,
      "loss": 0.816,
      "step": 555740
    },
    {
      "epoch": 1.9477651957228024,
      "grad_norm": 2.609375,
      "learning_rate": 1.9485917376054997e-05,
      "loss": 0.762,
      "step": 555750
    },
    {
      "epoch": 1.9478002432296977,
      "grad_norm": 2.828125,
      "learning_rate": 1.9485268347391295e-05,
      "loss": 0.8577,
      "step": 555760
    },
    {
      "epoch": 1.9478352907365934,
      "grad_norm": 2.890625,
      "learning_rate": 1.9484619318727593e-05,
      "loss": 0.8144,
      "step": 555770
    },
    {
      "epoch": 1.9478703382434892,
      "grad_norm": 2.984375,
      "learning_rate": 1.948397029006389e-05,
      "loss": 0.8778,
      "step": 555780
    },
    {
      "epoch": 1.9479053857503845,
      "grad_norm": 3.0625,
      "learning_rate": 1.948332126140019e-05,
      "loss": 0.8315,
      "step": 555790
    },
    {
      "epoch": 1.9479404332572803,
      "grad_norm": 2.71875,
      "learning_rate": 1.9482672232736487e-05,
      "loss": 0.7884,
      "step": 555800
    },
    {
      "epoch": 1.9479754807641758,
      "grad_norm": 3.59375,
      "learning_rate": 1.9482023204072785e-05,
      "loss": 0.893,
      "step": 555810
    },
    {
      "epoch": 1.9480105282710714,
      "grad_norm": 3.125,
      "learning_rate": 1.9481374175409083e-05,
      "loss": 0.8345,
      "step": 555820
    },
    {
      "epoch": 1.948045575777967,
      "grad_norm": 3.171875,
      "learning_rate": 1.948072514674538e-05,
      "loss": 0.8044,
      "step": 555830
    },
    {
      "epoch": 1.9480806232848626,
      "grad_norm": 2.9375,
      "learning_rate": 1.948007611808168e-05,
      "loss": 0.857,
      "step": 555840
    },
    {
      "epoch": 1.9481156707917582,
      "grad_norm": 2.828125,
      "learning_rate": 1.9479427089417977e-05,
      "loss": 0.7996,
      "step": 555850
    },
    {
      "epoch": 1.948150718298654,
      "grad_norm": 3.0,
      "learning_rate": 1.9478778060754275e-05,
      "loss": 0.8794,
      "step": 555860
    },
    {
      "epoch": 1.9481857658055495,
      "grad_norm": 2.765625,
      "learning_rate": 1.9478129032090577e-05,
      "loss": 0.8194,
      "step": 555870
    },
    {
      "epoch": 1.948220813312445,
      "grad_norm": 3.046875,
      "learning_rate": 1.9477480003426875e-05,
      "loss": 0.8372,
      "step": 555880
    },
    {
      "epoch": 1.9482558608193408,
      "grad_norm": 2.9375,
      "learning_rate": 1.9476830974763173e-05,
      "loss": 0.844,
      "step": 555890
    },
    {
      "epoch": 1.948290908326236,
      "grad_norm": 2.65625,
      "learning_rate": 1.9476181946099467e-05,
      "loss": 0.7535,
      "step": 555900
    },
    {
      "epoch": 1.9483259558331318,
      "grad_norm": 2.78125,
      "learning_rate": 1.9475532917435765e-05,
      "loss": 0.835,
      "step": 555910
    },
    {
      "epoch": 1.9483610033400274,
      "grad_norm": 3.21875,
      "learning_rate": 1.9474883888772063e-05,
      "loss": 0.7822,
      "step": 555920
    },
    {
      "epoch": 1.948396050846923,
      "grad_norm": 3.0,
      "learning_rate": 1.947423486010836e-05,
      "loss": 0.8624,
      "step": 555930
    },
    {
      "epoch": 1.9484310983538187,
      "grad_norm": 2.921875,
      "learning_rate": 1.947358583144466e-05,
      "loss": 0.8325,
      "step": 555940
    },
    {
      "epoch": 1.9484661458607142,
      "grad_norm": 2.875,
      "learning_rate": 1.9472936802780957e-05,
      "loss": 0.8087,
      "step": 555950
    },
    {
      "epoch": 1.9485011933676097,
      "grad_norm": 2.96875,
      "learning_rate": 1.9472287774117255e-05,
      "loss": 0.8661,
      "step": 555960
    },
    {
      "epoch": 1.9485362408745055,
      "grad_norm": 3.265625,
      "learning_rate": 1.9471638745453553e-05,
      "loss": 0.8647,
      "step": 555970
    },
    {
      "epoch": 1.948571288381401,
      "grad_norm": 3.59375,
      "learning_rate": 1.947098971678985e-05,
      "loss": 0.8423,
      "step": 555980
    },
    {
      "epoch": 1.9486063358882966,
      "grad_norm": 2.453125,
      "learning_rate": 1.9470340688126153e-05,
      "loss": 0.7217,
      "step": 555990
    },
    {
      "epoch": 1.9486413833951923,
      "grad_norm": 3.3125,
      "learning_rate": 1.946969165946245e-05,
      "loss": 0.8105,
      "step": 556000
    },
    {
      "epoch": 1.9486764309020876,
      "grad_norm": 3.0,
      "learning_rate": 1.946904263079875e-05,
      "loss": 0.8044,
      "step": 556010
    },
    {
      "epoch": 1.9487114784089834,
      "grad_norm": 3.25,
      "learning_rate": 1.9468393602135047e-05,
      "loss": 0.8453,
      "step": 556020
    },
    {
      "epoch": 1.948746525915879,
      "grad_norm": 3.234375,
      "learning_rate": 1.9467744573471345e-05,
      "loss": 0.9521,
      "step": 556030
    },
    {
      "epoch": 1.9487815734227745,
      "grad_norm": 2.796875,
      "learning_rate": 1.9467095544807643e-05,
      "loss": 0.8382,
      "step": 556040
    },
    {
      "epoch": 1.9488166209296702,
      "grad_norm": 2.671875,
      "learning_rate": 1.946644651614394e-05,
      "loss": 0.8036,
      "step": 556050
    },
    {
      "epoch": 1.9488516684365658,
      "grad_norm": 3.0,
      "learning_rate": 1.946579748748024e-05,
      "loss": 0.7073,
      "step": 556060
    },
    {
      "epoch": 1.9488867159434613,
      "grad_norm": 3.140625,
      "learning_rate": 1.9465148458816537e-05,
      "loss": 0.8376,
      "step": 556070
    },
    {
      "epoch": 1.948921763450357,
      "grad_norm": 2.765625,
      "learning_rate": 1.9464499430152835e-05,
      "loss": 0.7963,
      "step": 556080
    },
    {
      "epoch": 1.9489568109572526,
      "grad_norm": 2.734375,
      "learning_rate": 1.946385040148913e-05,
      "loss": 0.8017,
      "step": 556090
    },
    {
      "epoch": 1.9489918584641481,
      "grad_norm": 3.25,
      "learning_rate": 1.946320137282543e-05,
      "loss": 0.7339,
      "step": 556100
    },
    {
      "epoch": 1.9490269059710439,
      "grad_norm": 2.453125,
      "learning_rate": 1.946255234416173e-05,
      "loss": 0.7682,
      "step": 556110
    },
    {
      "epoch": 1.9490619534779392,
      "grad_norm": 3.921875,
      "learning_rate": 1.9461903315498027e-05,
      "loss": 0.8433,
      "step": 556120
    },
    {
      "epoch": 1.949097000984835,
      "grad_norm": 2.90625,
      "learning_rate": 1.9461254286834325e-05,
      "loss": 0.8678,
      "step": 556130
    },
    {
      "epoch": 1.9491320484917305,
      "grad_norm": 3.078125,
      "learning_rate": 1.9460605258170623e-05,
      "loss": 0.8106,
      "step": 556140
    },
    {
      "epoch": 1.949167095998626,
      "grad_norm": 3.140625,
      "learning_rate": 1.945995622950692e-05,
      "loss": 0.7962,
      "step": 556150
    },
    {
      "epoch": 1.9492021435055218,
      "grad_norm": 3.546875,
      "learning_rate": 1.945930720084322e-05,
      "loss": 0.8013,
      "step": 556160
    },
    {
      "epoch": 1.9492371910124173,
      "grad_norm": 3.03125,
      "learning_rate": 1.9458658172179517e-05,
      "loss": 0.875,
      "step": 556170
    },
    {
      "epoch": 1.9492722385193129,
      "grad_norm": 3.171875,
      "learning_rate": 1.9458009143515815e-05,
      "loss": 0.8302,
      "step": 556180
    },
    {
      "epoch": 1.9493072860262086,
      "grad_norm": 2.890625,
      "learning_rate": 1.9457360114852113e-05,
      "loss": 0.8516,
      "step": 556190
    },
    {
      "epoch": 1.9493423335331042,
      "grad_norm": 3.1875,
      "learning_rate": 1.945671108618841e-05,
      "loss": 0.8642,
      "step": 556200
    },
    {
      "epoch": 1.9493773810399997,
      "grad_norm": 3.4375,
      "learning_rate": 1.945606205752471e-05,
      "loss": 0.806,
      "step": 556210
    },
    {
      "epoch": 1.9494124285468954,
      "grad_norm": 3.21875,
      "learning_rate": 1.9455413028861007e-05,
      "loss": 0.7478,
      "step": 556220
    },
    {
      "epoch": 1.9494474760537908,
      "grad_norm": 3.03125,
      "learning_rate": 1.9454764000197305e-05,
      "loss": 0.7901,
      "step": 556230
    },
    {
      "epoch": 1.9494825235606865,
      "grad_norm": 3.234375,
      "learning_rate": 1.9454114971533606e-05,
      "loss": 0.7859,
      "step": 556240
    },
    {
      "epoch": 1.949517571067582,
      "grad_norm": 3.125,
      "learning_rate": 1.9453465942869904e-05,
      "loss": 0.8129,
      "step": 556250
    },
    {
      "epoch": 1.9495526185744776,
      "grad_norm": 2.90625,
      "learning_rate": 1.9452816914206202e-05,
      "loss": 0.7791,
      "step": 556260
    },
    {
      "epoch": 1.9495876660813733,
      "grad_norm": 3.0625,
      "learning_rate": 1.94521678855425e-05,
      "loss": 0.7826,
      "step": 556270
    },
    {
      "epoch": 1.9496227135882689,
      "grad_norm": 3.09375,
      "learning_rate": 1.9451518856878795e-05,
      "loss": 0.8858,
      "step": 556280
    },
    {
      "epoch": 1.9496577610951644,
      "grad_norm": 2.78125,
      "learning_rate": 1.9450869828215093e-05,
      "loss": 0.7724,
      "step": 556290
    },
    {
      "epoch": 1.9496928086020602,
      "grad_norm": 2.703125,
      "learning_rate": 1.945022079955139e-05,
      "loss": 0.7733,
      "step": 556300
    },
    {
      "epoch": 1.9497278561089557,
      "grad_norm": 3.125,
      "learning_rate": 1.944957177088769e-05,
      "loss": 0.8156,
      "step": 556310
    },
    {
      "epoch": 1.9497629036158513,
      "grad_norm": 2.984375,
      "learning_rate": 1.9448922742223987e-05,
      "loss": 0.9211,
      "step": 556320
    },
    {
      "epoch": 1.949797951122747,
      "grad_norm": 3.0,
      "learning_rate": 1.9448273713560285e-05,
      "loss": 0.8209,
      "step": 556330
    },
    {
      "epoch": 1.9498329986296423,
      "grad_norm": 3.15625,
      "learning_rate": 1.9447624684896583e-05,
      "loss": 0.8218,
      "step": 556340
    },
    {
      "epoch": 1.949868046136538,
      "grad_norm": 3.15625,
      "learning_rate": 1.9446975656232884e-05,
      "loss": 0.8558,
      "step": 556350
    },
    {
      "epoch": 1.9499030936434338,
      "grad_norm": 3.078125,
      "learning_rate": 1.9446326627569182e-05,
      "loss": 0.7608,
      "step": 556360
    },
    {
      "epoch": 1.9499381411503292,
      "grad_norm": 2.984375,
      "learning_rate": 1.944567759890548e-05,
      "loss": 0.7767,
      "step": 556370
    },
    {
      "epoch": 1.949973188657225,
      "grad_norm": 3.546875,
      "learning_rate": 1.9445028570241778e-05,
      "loss": 0.7962,
      "step": 556380
    },
    {
      "epoch": 1.9500082361641204,
      "grad_norm": 3.03125,
      "learning_rate": 1.9444379541578076e-05,
      "loss": 0.8079,
      "step": 556390
    },
    {
      "epoch": 1.950043283671016,
      "grad_norm": 2.640625,
      "learning_rate": 1.9443730512914374e-05,
      "loss": 0.8201,
      "step": 556400
    },
    {
      "epoch": 1.9500783311779117,
      "grad_norm": 3.046875,
      "learning_rate": 1.9443081484250672e-05,
      "loss": 0.7832,
      "step": 556410
    },
    {
      "epoch": 1.9501133786848073,
      "grad_norm": 2.828125,
      "learning_rate": 1.944243245558697e-05,
      "loss": 0.8401,
      "step": 556420
    },
    {
      "epoch": 1.9501484261917028,
      "grad_norm": 2.84375,
      "learning_rate": 1.9441783426923268e-05,
      "loss": 0.7993,
      "step": 556430
    },
    {
      "epoch": 1.9501834736985986,
      "grad_norm": 3.0625,
      "learning_rate": 1.9441134398259566e-05,
      "loss": 0.8067,
      "step": 556440
    },
    {
      "epoch": 1.9502185212054939,
      "grad_norm": 3.15625,
      "learning_rate": 1.9440485369595864e-05,
      "loss": 0.7745,
      "step": 556450
    },
    {
      "epoch": 1.9502535687123896,
      "grad_norm": 2.796875,
      "learning_rate": 1.9439836340932162e-05,
      "loss": 0.8143,
      "step": 556460
    },
    {
      "epoch": 1.9502886162192854,
      "grad_norm": 2.703125,
      "learning_rate": 1.943918731226846e-05,
      "loss": 0.8754,
      "step": 556470
    },
    {
      "epoch": 1.9503236637261807,
      "grad_norm": 3.09375,
      "learning_rate": 1.9438538283604758e-05,
      "loss": 0.8442,
      "step": 556480
    },
    {
      "epoch": 1.9503587112330765,
      "grad_norm": 3.15625,
      "learning_rate": 1.9437889254941056e-05,
      "loss": 0.7768,
      "step": 556490
    },
    {
      "epoch": 1.950393758739972,
      "grad_norm": 2.65625,
      "learning_rate": 1.9437240226277354e-05,
      "loss": 0.8145,
      "step": 556500
    },
    {
      "epoch": 1.9504288062468675,
      "grad_norm": 3.03125,
      "learning_rate": 1.9436591197613652e-05,
      "loss": 0.8776,
      "step": 556510
    },
    {
      "epoch": 1.9504638537537633,
      "grad_norm": 3.140625,
      "learning_rate": 1.943594216894995e-05,
      "loss": 0.8951,
      "step": 556520
    },
    {
      "epoch": 1.9504989012606588,
      "grad_norm": 2.734375,
      "learning_rate": 1.9435293140286248e-05,
      "loss": 0.9181,
      "step": 556530
    },
    {
      "epoch": 1.9505339487675544,
      "grad_norm": 2.75,
      "learning_rate": 1.9434644111622546e-05,
      "loss": 0.7933,
      "step": 556540
    },
    {
      "epoch": 1.9505689962744501,
      "grad_norm": 2.671875,
      "learning_rate": 1.9433995082958844e-05,
      "loss": 0.7471,
      "step": 556550
    },
    {
      "epoch": 1.9506040437813457,
      "grad_norm": 2.640625,
      "learning_rate": 1.9433346054295142e-05,
      "loss": 0.7627,
      "step": 556560
    },
    {
      "epoch": 1.9506390912882412,
      "grad_norm": 2.96875,
      "learning_rate": 1.943269702563144e-05,
      "loss": 0.8236,
      "step": 556570
    },
    {
      "epoch": 1.950674138795137,
      "grad_norm": 3.046875,
      "learning_rate": 1.9432047996967738e-05,
      "loss": 0.9031,
      "step": 556580
    },
    {
      "epoch": 1.9507091863020323,
      "grad_norm": 2.90625,
      "learning_rate": 1.9431398968304036e-05,
      "loss": 0.8803,
      "step": 556590
    },
    {
      "epoch": 1.950744233808928,
      "grad_norm": 2.265625,
      "learning_rate": 1.9430749939640334e-05,
      "loss": 0.8383,
      "step": 556600
    },
    {
      "epoch": 1.9507792813158236,
      "grad_norm": 3.015625,
      "learning_rate": 1.9430100910976636e-05,
      "loss": 0.744,
      "step": 556610
    },
    {
      "epoch": 1.950814328822719,
      "grad_norm": 3.125,
      "learning_rate": 1.9429451882312934e-05,
      "loss": 0.9028,
      "step": 556620
    },
    {
      "epoch": 1.9508493763296149,
      "grad_norm": 2.890625,
      "learning_rate": 1.942880285364923e-05,
      "loss": 0.8206,
      "step": 556630
    },
    {
      "epoch": 1.9508844238365104,
      "grad_norm": 2.859375,
      "learning_rate": 1.942815382498553e-05,
      "loss": 0.796,
      "step": 556640
    },
    {
      "epoch": 1.950919471343406,
      "grad_norm": 3.125,
      "learning_rate": 1.9427504796321828e-05,
      "loss": 0.877,
      "step": 556650
    },
    {
      "epoch": 1.9509545188503017,
      "grad_norm": 2.8125,
      "learning_rate": 1.9426855767658122e-05,
      "loss": 0.7455,
      "step": 556660
    },
    {
      "epoch": 1.9509895663571972,
      "grad_norm": 2.953125,
      "learning_rate": 1.942620673899442e-05,
      "loss": 0.8342,
      "step": 556670
    },
    {
      "epoch": 1.9510246138640928,
      "grad_norm": 2.640625,
      "learning_rate": 1.9425557710330718e-05,
      "loss": 0.8587,
      "step": 556680
    },
    {
      "epoch": 1.9510596613709885,
      "grad_norm": 3.234375,
      "learning_rate": 1.9424908681667016e-05,
      "loss": 0.8912,
      "step": 556690
    },
    {
      "epoch": 1.9510947088778838,
      "grad_norm": 3.203125,
      "learning_rate": 1.9424259653003314e-05,
      "loss": 0.8284,
      "step": 556700
    },
    {
      "epoch": 1.9511297563847796,
      "grad_norm": 2.640625,
      "learning_rate": 1.9423610624339612e-05,
      "loss": 0.8001,
      "step": 556710
    },
    {
      "epoch": 1.9511648038916751,
      "grad_norm": 2.4375,
      "learning_rate": 1.9422961595675914e-05,
      "loss": 0.7523,
      "step": 556720
    },
    {
      "epoch": 1.9511998513985707,
      "grad_norm": 3.171875,
      "learning_rate": 1.942231256701221e-05,
      "loss": 0.8223,
      "step": 556730
    },
    {
      "epoch": 1.9512348989054664,
      "grad_norm": 2.9375,
      "learning_rate": 1.942166353834851e-05,
      "loss": 0.7744,
      "step": 556740
    },
    {
      "epoch": 1.951269946412362,
      "grad_norm": 2.921875,
      "learning_rate": 1.9421014509684808e-05,
      "loss": 0.8084,
      "step": 556750
    },
    {
      "epoch": 1.9513049939192575,
      "grad_norm": 2.3125,
      "learning_rate": 1.9420365481021106e-05,
      "loss": 0.8041,
      "step": 556760
    },
    {
      "epoch": 1.9513400414261532,
      "grad_norm": 3.03125,
      "learning_rate": 1.9419716452357404e-05,
      "loss": 0.9041,
      "step": 556770
    },
    {
      "epoch": 1.9513750889330488,
      "grad_norm": 2.90625,
      "learning_rate": 1.94190674236937e-05,
      "loss": 0.8475,
      "step": 556780
    },
    {
      "epoch": 1.9514101364399443,
      "grad_norm": 2.765625,
      "learning_rate": 1.941841839503e-05,
      "loss": 0.8728,
      "step": 556790
    },
    {
      "epoch": 1.95144518394684,
      "grad_norm": 3.171875,
      "learning_rate": 1.9417769366366298e-05,
      "loss": 0.7333,
      "step": 556800
    },
    {
      "epoch": 1.9514802314537354,
      "grad_norm": 3.0625,
      "learning_rate": 1.9417120337702596e-05,
      "loss": 0.8542,
      "step": 556810
    },
    {
      "epoch": 1.9515152789606312,
      "grad_norm": 2.484375,
      "learning_rate": 1.9416471309038894e-05,
      "loss": 0.7383,
      "step": 556820
    },
    {
      "epoch": 1.9515503264675267,
      "grad_norm": 3.03125,
      "learning_rate": 1.941582228037519e-05,
      "loss": 0.8236,
      "step": 556830
    },
    {
      "epoch": 1.9515853739744222,
      "grad_norm": 2.96875,
      "learning_rate": 1.941517325171149e-05,
      "loss": 0.8552,
      "step": 556840
    },
    {
      "epoch": 1.951620421481318,
      "grad_norm": 3.03125,
      "learning_rate": 1.9414524223047788e-05,
      "loss": 0.7969,
      "step": 556850
    },
    {
      "epoch": 1.9516554689882135,
      "grad_norm": 2.84375,
      "learning_rate": 1.9413875194384086e-05,
      "loss": 0.8961,
      "step": 556860
    },
    {
      "epoch": 1.951690516495109,
      "grad_norm": 3.375,
      "learning_rate": 1.9413226165720384e-05,
      "loss": 0.9252,
      "step": 556870
    },
    {
      "epoch": 1.9517255640020048,
      "grad_norm": 2.984375,
      "learning_rate": 1.941257713705668e-05,
      "loss": 0.8345,
      "step": 556880
    },
    {
      "epoch": 1.9517606115089003,
      "grad_norm": 2.75,
      "learning_rate": 1.941192810839298e-05,
      "loss": 0.8152,
      "step": 556890
    },
    {
      "epoch": 1.9517956590157959,
      "grad_norm": 2.890625,
      "learning_rate": 1.9411279079729278e-05,
      "loss": 0.8079,
      "step": 556900
    },
    {
      "epoch": 1.9518307065226916,
      "grad_norm": 2.984375,
      "learning_rate": 1.9410630051065576e-05,
      "loss": 0.8674,
      "step": 556910
    },
    {
      "epoch": 1.951865754029587,
      "grad_norm": 2.671875,
      "learning_rate": 1.9409981022401874e-05,
      "loss": 0.7787,
      "step": 556920
    },
    {
      "epoch": 1.9519008015364827,
      "grad_norm": 3.046875,
      "learning_rate": 1.940933199373817e-05,
      "loss": 0.8119,
      "step": 556930
    },
    {
      "epoch": 1.9519358490433782,
      "grad_norm": 2.96875,
      "learning_rate": 1.940868296507447e-05,
      "loss": 0.8424,
      "step": 556940
    },
    {
      "epoch": 1.9519708965502738,
      "grad_norm": 2.703125,
      "learning_rate": 1.9408033936410768e-05,
      "loss": 0.835,
      "step": 556950
    },
    {
      "epoch": 1.9520059440571695,
      "grad_norm": 2.65625,
      "learning_rate": 1.9407384907747066e-05,
      "loss": 0.8314,
      "step": 556960
    },
    {
      "epoch": 1.952040991564065,
      "grad_norm": 2.71875,
      "learning_rate": 1.9406735879083367e-05,
      "loss": 0.783,
      "step": 556970
    },
    {
      "epoch": 1.9520760390709606,
      "grad_norm": 3.015625,
      "learning_rate": 1.9406086850419665e-05,
      "loss": 0.945,
      "step": 556980
    },
    {
      "epoch": 1.9521110865778564,
      "grad_norm": 3.125,
      "learning_rate": 1.9405437821755963e-05,
      "loss": 0.7887,
      "step": 556990
    },
    {
      "epoch": 1.952146134084752,
      "grad_norm": 3.015625,
      "learning_rate": 1.940478879309226e-05,
      "loss": 0.7858,
      "step": 557000
    },
    {
      "epoch": 1.9521811815916474,
      "grad_norm": 2.734375,
      "learning_rate": 1.940413976442856e-05,
      "loss": 0.8839,
      "step": 557010
    },
    {
      "epoch": 1.9522162290985432,
      "grad_norm": 2.734375,
      "learning_rate": 1.9403490735764857e-05,
      "loss": 0.7674,
      "step": 557020
    },
    {
      "epoch": 1.9522512766054385,
      "grad_norm": 2.703125,
      "learning_rate": 1.940284170710115e-05,
      "loss": 0.8653,
      "step": 557030
    },
    {
      "epoch": 1.9522863241123343,
      "grad_norm": 2.84375,
      "learning_rate": 1.940219267843745e-05,
      "loss": 0.8655,
      "step": 557040
    },
    {
      "epoch": 1.95232137161923,
      "grad_norm": 2.734375,
      "learning_rate": 1.9401543649773748e-05,
      "loss": 0.8,
      "step": 557050
    },
    {
      "epoch": 1.9523564191261253,
      "grad_norm": 2.484375,
      "learning_rate": 1.9400894621110046e-05,
      "loss": 0.7421,
      "step": 557060
    },
    {
      "epoch": 1.952391466633021,
      "grad_norm": 3.3125,
      "learning_rate": 1.9400245592446344e-05,
      "loss": 0.7783,
      "step": 557070
    },
    {
      "epoch": 1.9524265141399166,
      "grad_norm": 2.75,
      "learning_rate": 1.939959656378264e-05,
      "loss": 0.899,
      "step": 557080
    },
    {
      "epoch": 1.9524615616468122,
      "grad_norm": 2.734375,
      "learning_rate": 1.9398947535118943e-05,
      "loss": 0.7703,
      "step": 557090
    },
    {
      "epoch": 1.952496609153708,
      "grad_norm": 2.640625,
      "learning_rate": 1.939829850645524e-05,
      "loss": 0.8958,
      "step": 557100
    },
    {
      "epoch": 1.9525316566606035,
      "grad_norm": 2.734375,
      "learning_rate": 1.939764947779154e-05,
      "loss": 0.7634,
      "step": 557110
    },
    {
      "epoch": 1.952566704167499,
      "grad_norm": 3.21875,
      "learning_rate": 1.9397000449127837e-05,
      "loss": 0.8384,
      "step": 557120
    },
    {
      "epoch": 1.9526017516743948,
      "grad_norm": 2.953125,
      "learning_rate": 1.9396351420464135e-05,
      "loss": 0.7565,
      "step": 557130
    },
    {
      "epoch": 1.95263679918129,
      "grad_norm": 2.46875,
      "learning_rate": 1.9395702391800433e-05,
      "loss": 0.7338,
      "step": 557140
    },
    {
      "epoch": 1.9526718466881858,
      "grad_norm": 3.296875,
      "learning_rate": 1.939505336313673e-05,
      "loss": 0.8508,
      "step": 557150
    },
    {
      "epoch": 1.9527068941950816,
      "grad_norm": 2.8125,
      "learning_rate": 1.939440433447303e-05,
      "loss": 0.7626,
      "step": 557160
    },
    {
      "epoch": 1.952741941701977,
      "grad_norm": 2.703125,
      "learning_rate": 1.9393755305809327e-05,
      "loss": 0.788,
      "step": 557170
    },
    {
      "epoch": 1.9527769892088727,
      "grad_norm": 3.171875,
      "learning_rate": 1.9393106277145625e-05,
      "loss": 0.8156,
      "step": 557180
    },
    {
      "epoch": 1.9528120367157682,
      "grad_norm": 2.765625,
      "learning_rate": 1.9392457248481923e-05,
      "loss": 0.8184,
      "step": 557190
    },
    {
      "epoch": 1.9528470842226637,
      "grad_norm": 2.765625,
      "learning_rate": 1.939180821981822e-05,
      "loss": 0.8402,
      "step": 557200
    },
    {
      "epoch": 1.9528821317295595,
      "grad_norm": 2.71875,
      "learning_rate": 1.939115919115452e-05,
      "loss": 0.7824,
      "step": 557210
    },
    {
      "epoch": 1.952917179236455,
      "grad_norm": 2.96875,
      "learning_rate": 1.9390510162490817e-05,
      "loss": 0.7803,
      "step": 557220
    },
    {
      "epoch": 1.9529522267433506,
      "grad_norm": 3.15625,
      "learning_rate": 1.9389861133827115e-05,
      "loss": 0.8075,
      "step": 557230
    },
    {
      "epoch": 1.9529872742502463,
      "grad_norm": 2.9375,
      "learning_rate": 1.9389212105163413e-05,
      "loss": 0.8178,
      "step": 557240
    },
    {
      "epoch": 1.9530223217571419,
      "grad_norm": 3.421875,
      "learning_rate": 1.938856307649971e-05,
      "loss": 0.8116,
      "step": 557250
    },
    {
      "epoch": 1.9530573692640374,
      "grad_norm": 3.5,
      "learning_rate": 1.938791404783601e-05,
      "loss": 0.8463,
      "step": 557260
    },
    {
      "epoch": 1.9530924167709331,
      "grad_norm": 2.6875,
      "learning_rate": 1.9387265019172307e-05,
      "loss": 0.8459,
      "step": 557270
    },
    {
      "epoch": 1.9531274642778285,
      "grad_norm": 2.90625,
      "learning_rate": 1.9386615990508605e-05,
      "loss": 0.866,
      "step": 557280
    },
    {
      "epoch": 1.9531625117847242,
      "grad_norm": 3.375,
      "learning_rate": 1.9385966961844903e-05,
      "loss": 0.8678,
      "step": 557290
    },
    {
      "epoch": 1.9531975592916198,
      "grad_norm": 2.796875,
      "learning_rate": 1.93853179331812e-05,
      "loss": 0.8317,
      "step": 557300
    },
    {
      "epoch": 1.9532326067985153,
      "grad_norm": 2.765625,
      "learning_rate": 1.93846689045175e-05,
      "loss": 0.8658,
      "step": 557310
    },
    {
      "epoch": 1.953267654305411,
      "grad_norm": 3.09375,
      "learning_rate": 1.9384019875853797e-05,
      "loss": 0.7833,
      "step": 557320
    },
    {
      "epoch": 1.9533027018123066,
      "grad_norm": 2.421875,
      "learning_rate": 1.9383370847190095e-05,
      "loss": 0.8343,
      "step": 557330
    },
    {
      "epoch": 1.9533377493192021,
      "grad_norm": 3.046875,
      "learning_rate": 1.9382721818526396e-05,
      "loss": 0.8284,
      "step": 557340
    },
    {
      "epoch": 1.9533727968260979,
      "grad_norm": 3.03125,
      "learning_rate": 1.9382072789862694e-05,
      "loss": 0.923,
      "step": 557350
    },
    {
      "epoch": 1.9534078443329934,
      "grad_norm": 2.984375,
      "learning_rate": 1.9381423761198992e-05,
      "loss": 0.7556,
      "step": 557360
    },
    {
      "epoch": 1.953442891839889,
      "grad_norm": 3.4375,
      "learning_rate": 1.938077473253529e-05,
      "loss": 0.934,
      "step": 557370
    },
    {
      "epoch": 1.9534779393467847,
      "grad_norm": 3.15625,
      "learning_rate": 1.938012570387159e-05,
      "loss": 0.895,
      "step": 557380
    },
    {
      "epoch": 1.95351298685368,
      "grad_norm": 2.828125,
      "learning_rate": 1.9379476675207886e-05,
      "loss": 0.8387,
      "step": 557390
    },
    {
      "epoch": 1.9535480343605758,
      "grad_norm": 3.203125,
      "learning_rate": 1.9378827646544184e-05,
      "loss": 0.8809,
      "step": 557400
    },
    {
      "epoch": 1.9535830818674713,
      "grad_norm": 2.8125,
      "learning_rate": 1.937817861788048e-05,
      "loss": 0.8172,
      "step": 557410
    },
    {
      "epoch": 1.9536181293743669,
      "grad_norm": 2.546875,
      "learning_rate": 1.9377529589216777e-05,
      "loss": 0.7993,
      "step": 557420
    },
    {
      "epoch": 1.9536531768812626,
      "grad_norm": 2.6875,
      "learning_rate": 1.9376880560553075e-05,
      "loss": 0.8638,
      "step": 557430
    },
    {
      "epoch": 1.9536882243881581,
      "grad_norm": 2.875,
      "learning_rate": 1.9376231531889373e-05,
      "loss": 0.8551,
      "step": 557440
    },
    {
      "epoch": 1.9537232718950537,
      "grad_norm": 2.265625,
      "learning_rate": 1.9375582503225674e-05,
      "loss": 0.9046,
      "step": 557450
    },
    {
      "epoch": 1.9537583194019494,
      "grad_norm": 3.40625,
      "learning_rate": 1.9374933474561972e-05,
      "loss": 0.8376,
      "step": 557460
    },
    {
      "epoch": 1.953793366908845,
      "grad_norm": 2.890625,
      "learning_rate": 1.937428444589827e-05,
      "loss": 0.7639,
      "step": 557470
    },
    {
      "epoch": 1.9538284144157405,
      "grad_norm": 2.578125,
      "learning_rate": 1.937363541723457e-05,
      "loss": 0.7457,
      "step": 557480
    },
    {
      "epoch": 1.9538634619226363,
      "grad_norm": 2.921875,
      "learning_rate": 1.9372986388570866e-05,
      "loss": 0.8618,
      "step": 557490
    },
    {
      "epoch": 1.9538985094295316,
      "grad_norm": 3.078125,
      "learning_rate": 1.9372337359907164e-05,
      "loss": 0.8729,
      "step": 557500
    },
    {
      "epoch": 1.9539335569364273,
      "grad_norm": 2.578125,
      "learning_rate": 1.9371688331243462e-05,
      "loss": 0.8445,
      "step": 557510
    },
    {
      "epoch": 1.9539686044433229,
      "grad_norm": 2.6875,
      "learning_rate": 1.937103930257976e-05,
      "loss": 0.7787,
      "step": 557520
    },
    {
      "epoch": 1.9540036519502184,
      "grad_norm": 2.5625,
      "learning_rate": 1.937039027391606e-05,
      "loss": 0.896,
      "step": 557530
    },
    {
      "epoch": 1.9540386994571142,
      "grad_norm": 2.90625,
      "learning_rate": 1.9369741245252356e-05,
      "loss": 0.8587,
      "step": 557540
    },
    {
      "epoch": 1.9540737469640097,
      "grad_norm": 2.890625,
      "learning_rate": 1.9369092216588654e-05,
      "loss": 0.7417,
      "step": 557550
    },
    {
      "epoch": 1.9541087944709052,
      "grad_norm": 3.21875,
      "learning_rate": 1.9368443187924952e-05,
      "loss": 0.7971,
      "step": 557560
    },
    {
      "epoch": 1.954143841977801,
      "grad_norm": 2.796875,
      "learning_rate": 1.936779415926125e-05,
      "loss": 0.8043,
      "step": 557570
    },
    {
      "epoch": 1.9541788894846965,
      "grad_norm": 2.984375,
      "learning_rate": 1.936714513059755e-05,
      "loss": 0.8729,
      "step": 557580
    },
    {
      "epoch": 1.954213936991592,
      "grad_norm": 2.890625,
      "learning_rate": 1.936649610193385e-05,
      "loss": 0.8391,
      "step": 557590
    },
    {
      "epoch": 1.9542489844984878,
      "grad_norm": 2.8125,
      "learning_rate": 1.9365847073270144e-05,
      "loss": 0.8096,
      "step": 557600
    },
    {
      "epoch": 1.9542840320053831,
      "grad_norm": 3.34375,
      "learning_rate": 1.9365198044606442e-05,
      "loss": 0.8399,
      "step": 557610
    },
    {
      "epoch": 1.954319079512279,
      "grad_norm": 3.25,
      "learning_rate": 1.936454901594274e-05,
      "loss": 0.8748,
      "step": 557620
    },
    {
      "epoch": 1.9543541270191744,
      "grad_norm": 3.03125,
      "learning_rate": 1.936389998727904e-05,
      "loss": 0.8674,
      "step": 557630
    },
    {
      "epoch": 1.95438917452607,
      "grad_norm": 2.859375,
      "learning_rate": 1.9363250958615336e-05,
      "loss": 0.8709,
      "step": 557640
    },
    {
      "epoch": 1.9544242220329657,
      "grad_norm": 3.46875,
      "learning_rate": 1.9362601929951634e-05,
      "loss": 0.802,
      "step": 557650
    },
    {
      "epoch": 1.9544592695398613,
      "grad_norm": 3.109375,
      "learning_rate": 1.9361952901287932e-05,
      "loss": 0.8382,
      "step": 557660
    },
    {
      "epoch": 1.9544943170467568,
      "grad_norm": 3.8125,
      "learning_rate": 1.936130387262423e-05,
      "loss": 0.8161,
      "step": 557670
    },
    {
      "epoch": 1.9545293645536526,
      "grad_norm": 2.671875,
      "learning_rate": 1.936065484396053e-05,
      "loss": 0.8215,
      "step": 557680
    },
    {
      "epoch": 1.954564412060548,
      "grad_norm": 2.609375,
      "learning_rate": 1.9360005815296826e-05,
      "loss": 0.7528,
      "step": 557690
    },
    {
      "epoch": 1.9545994595674436,
      "grad_norm": 3.125,
      "learning_rate": 1.9359356786633124e-05,
      "loss": 0.771,
      "step": 557700
    },
    {
      "epoch": 1.9546345070743394,
      "grad_norm": 2.5625,
      "learning_rate": 1.9358707757969426e-05,
      "loss": 0.8121,
      "step": 557710
    },
    {
      "epoch": 1.9546695545812347,
      "grad_norm": 2.78125,
      "learning_rate": 1.9358058729305724e-05,
      "loss": 0.8154,
      "step": 557720
    },
    {
      "epoch": 1.9547046020881305,
      "grad_norm": 3.265625,
      "learning_rate": 1.9357409700642022e-05,
      "loss": 0.8098,
      "step": 557730
    },
    {
      "epoch": 1.9547396495950262,
      "grad_norm": 3.03125,
      "learning_rate": 1.935676067197832e-05,
      "loss": 0.8937,
      "step": 557740
    },
    {
      "epoch": 1.9547746971019215,
      "grad_norm": 2.640625,
      "learning_rate": 1.9356111643314618e-05,
      "loss": 0.7157,
      "step": 557750
    },
    {
      "epoch": 1.9548097446088173,
      "grad_norm": 3.046875,
      "learning_rate": 1.9355462614650916e-05,
      "loss": 0.8055,
      "step": 557760
    },
    {
      "epoch": 1.9548447921157128,
      "grad_norm": 3.34375,
      "learning_rate": 1.9354813585987214e-05,
      "loss": 0.8264,
      "step": 557770
    },
    {
      "epoch": 1.9548798396226084,
      "grad_norm": 3.109375,
      "learning_rate": 1.935416455732351e-05,
      "loss": 0.8441,
      "step": 557780
    },
    {
      "epoch": 1.9549148871295041,
      "grad_norm": 2.59375,
      "learning_rate": 1.9353515528659806e-05,
      "loss": 0.7734,
      "step": 557790
    },
    {
      "epoch": 1.9549499346363997,
      "grad_norm": 2.9375,
      "learning_rate": 1.9352866499996104e-05,
      "loss": 0.8152,
      "step": 557800
    },
    {
      "epoch": 1.9549849821432952,
      "grad_norm": 2.8125,
      "learning_rate": 1.9352217471332402e-05,
      "loss": 0.8302,
      "step": 557810
    },
    {
      "epoch": 1.955020029650191,
      "grad_norm": 2.953125,
      "learning_rate": 1.9351568442668704e-05,
      "loss": 0.8177,
      "step": 557820
    },
    {
      "epoch": 1.9550550771570865,
      "grad_norm": 2.6875,
      "learning_rate": 1.9350919414005002e-05,
      "loss": 0.8121,
      "step": 557830
    },
    {
      "epoch": 1.955090124663982,
      "grad_norm": 2.8125,
      "learning_rate": 1.93502703853413e-05,
      "loss": 0.8008,
      "step": 557840
    },
    {
      "epoch": 1.9551251721708778,
      "grad_norm": 3.453125,
      "learning_rate": 1.9349621356677598e-05,
      "loss": 0.8858,
      "step": 557850
    },
    {
      "epoch": 1.955160219677773,
      "grad_norm": 2.828125,
      "learning_rate": 1.9348972328013896e-05,
      "loss": 0.7964,
      "step": 557860
    },
    {
      "epoch": 1.9551952671846689,
      "grad_norm": 3.15625,
      "learning_rate": 1.9348323299350194e-05,
      "loss": 0.8088,
      "step": 557870
    },
    {
      "epoch": 1.9552303146915644,
      "grad_norm": 3.140625,
      "learning_rate": 1.9347674270686492e-05,
      "loss": 0.8293,
      "step": 557880
    },
    {
      "epoch": 1.95526536219846,
      "grad_norm": 2.78125,
      "learning_rate": 1.934702524202279e-05,
      "loss": 0.8161,
      "step": 557890
    },
    {
      "epoch": 1.9553004097053557,
      "grad_norm": 2.625,
      "learning_rate": 1.9346376213359088e-05,
      "loss": 0.8235,
      "step": 557900
    },
    {
      "epoch": 1.9553354572122512,
      "grad_norm": 2.75,
      "learning_rate": 1.9345727184695386e-05,
      "loss": 0.8035,
      "step": 557910
    },
    {
      "epoch": 1.9553705047191468,
      "grad_norm": 3.3125,
      "learning_rate": 1.9345078156031684e-05,
      "loss": 0.7701,
      "step": 557920
    },
    {
      "epoch": 1.9554055522260425,
      "grad_norm": 2.875,
      "learning_rate": 1.9344429127367982e-05,
      "loss": 0.8481,
      "step": 557930
    },
    {
      "epoch": 1.955440599732938,
      "grad_norm": 2.953125,
      "learning_rate": 1.934378009870428e-05,
      "loss": 0.885,
      "step": 557940
    },
    {
      "epoch": 1.9554756472398336,
      "grad_norm": 2.453125,
      "learning_rate": 1.9343131070040578e-05,
      "loss": 0.9236,
      "step": 557950
    },
    {
      "epoch": 1.9555106947467293,
      "grad_norm": 2.671875,
      "learning_rate": 1.934248204137688e-05,
      "loss": 0.7973,
      "step": 557960
    },
    {
      "epoch": 1.9555457422536247,
      "grad_norm": 2.78125,
      "learning_rate": 1.9341833012713174e-05,
      "loss": 0.8508,
      "step": 557970
    },
    {
      "epoch": 1.9555807897605204,
      "grad_norm": 3.0625,
      "learning_rate": 1.9341183984049472e-05,
      "loss": 0.8454,
      "step": 557980
    },
    {
      "epoch": 1.955615837267416,
      "grad_norm": 2.75,
      "learning_rate": 1.934053495538577e-05,
      "loss": 0.8384,
      "step": 557990
    },
    {
      "epoch": 1.9556508847743115,
      "grad_norm": 2.8125,
      "learning_rate": 1.9339885926722068e-05,
      "loss": 0.8173,
      "step": 558000
    },
    {
      "epoch": 1.9556859322812072,
      "grad_norm": 2.6875,
      "learning_rate": 1.9339236898058366e-05,
      "loss": 0.8484,
      "step": 558010
    },
    {
      "epoch": 1.9557209797881028,
      "grad_norm": 2.40625,
      "learning_rate": 1.9338587869394664e-05,
      "loss": 0.7415,
      "step": 558020
    },
    {
      "epoch": 1.9557560272949983,
      "grad_norm": 3.03125,
      "learning_rate": 1.9337938840730962e-05,
      "loss": 0.8593,
      "step": 558030
    },
    {
      "epoch": 1.955791074801894,
      "grad_norm": 3.4375,
      "learning_rate": 1.933728981206726e-05,
      "loss": 0.8541,
      "step": 558040
    },
    {
      "epoch": 1.9558261223087896,
      "grad_norm": 2.6875,
      "learning_rate": 1.9336640783403558e-05,
      "loss": 0.7977,
      "step": 558050
    },
    {
      "epoch": 1.9558611698156851,
      "grad_norm": 3.15625,
      "learning_rate": 1.9335991754739856e-05,
      "loss": 0.8179,
      "step": 558060
    },
    {
      "epoch": 1.955896217322581,
      "grad_norm": 2.78125,
      "learning_rate": 1.9335342726076157e-05,
      "loss": 0.8944,
      "step": 558070
    },
    {
      "epoch": 1.9559312648294762,
      "grad_norm": 2.703125,
      "learning_rate": 1.9334693697412455e-05,
      "loss": 0.752,
      "step": 558080
    },
    {
      "epoch": 1.955966312336372,
      "grad_norm": 3.234375,
      "learning_rate": 1.9334044668748753e-05,
      "loss": 0.7458,
      "step": 558090
    },
    {
      "epoch": 1.9560013598432675,
      "grad_norm": 2.640625,
      "learning_rate": 1.933339564008505e-05,
      "loss": 0.7798,
      "step": 558100
    },
    {
      "epoch": 1.956036407350163,
      "grad_norm": 3.046875,
      "learning_rate": 1.933274661142135e-05,
      "loss": 0.8591,
      "step": 558110
    },
    {
      "epoch": 1.9560714548570588,
      "grad_norm": 3.296875,
      "learning_rate": 1.9332097582757647e-05,
      "loss": 0.9077,
      "step": 558120
    },
    {
      "epoch": 1.9561065023639543,
      "grad_norm": 3.0,
      "learning_rate": 1.9331448554093945e-05,
      "loss": 0.8606,
      "step": 558130
    },
    {
      "epoch": 1.9561415498708499,
      "grad_norm": 2.953125,
      "learning_rate": 1.9330799525430243e-05,
      "loss": 0.7894,
      "step": 558140
    },
    {
      "epoch": 1.9561765973777456,
      "grad_norm": 2.515625,
      "learning_rate": 1.933015049676654e-05,
      "loss": 0.7947,
      "step": 558150
    },
    {
      "epoch": 1.9562116448846412,
      "grad_norm": 2.90625,
      "learning_rate": 1.9329501468102836e-05,
      "loss": 0.8306,
      "step": 558160
    },
    {
      "epoch": 1.9562466923915367,
      "grad_norm": 2.53125,
      "learning_rate": 1.9328852439439134e-05,
      "loss": 0.8235,
      "step": 558170
    },
    {
      "epoch": 1.9562817398984325,
      "grad_norm": 2.609375,
      "learning_rate": 1.9328203410775432e-05,
      "loss": 0.7738,
      "step": 558180
    },
    {
      "epoch": 1.9563167874053278,
      "grad_norm": 2.96875,
      "learning_rate": 1.9327554382111733e-05,
      "loss": 0.7533,
      "step": 558190
    },
    {
      "epoch": 1.9563518349122235,
      "grad_norm": 2.765625,
      "learning_rate": 1.932690535344803e-05,
      "loss": 0.7764,
      "step": 558200
    },
    {
      "epoch": 1.956386882419119,
      "grad_norm": 2.9375,
      "learning_rate": 1.932625632478433e-05,
      "loss": 0.8478,
      "step": 558210
    },
    {
      "epoch": 1.9564219299260146,
      "grad_norm": 2.359375,
      "learning_rate": 1.9325607296120627e-05,
      "loss": 0.867,
      "step": 558220
    },
    {
      "epoch": 1.9564569774329104,
      "grad_norm": 2.640625,
      "learning_rate": 1.9324958267456925e-05,
      "loss": 0.8254,
      "step": 558230
    },
    {
      "epoch": 1.956492024939806,
      "grad_norm": 2.765625,
      "learning_rate": 1.9324309238793223e-05,
      "loss": 0.8913,
      "step": 558240
    },
    {
      "epoch": 1.9565270724467014,
      "grad_norm": 2.4375,
      "learning_rate": 1.932366021012952e-05,
      "loss": 0.7413,
      "step": 558250
    },
    {
      "epoch": 1.9565621199535972,
      "grad_norm": 2.65625,
      "learning_rate": 1.932301118146582e-05,
      "loss": 0.8085,
      "step": 558260
    },
    {
      "epoch": 1.9565971674604927,
      "grad_norm": 3.203125,
      "learning_rate": 1.9322362152802117e-05,
      "loss": 0.8381,
      "step": 558270
    },
    {
      "epoch": 1.9566322149673883,
      "grad_norm": 2.625,
      "learning_rate": 1.9321713124138415e-05,
      "loss": 0.7577,
      "step": 558280
    },
    {
      "epoch": 1.956667262474284,
      "grad_norm": 2.953125,
      "learning_rate": 1.9321064095474713e-05,
      "loss": 0.8716,
      "step": 558290
    },
    {
      "epoch": 1.9567023099811793,
      "grad_norm": 2.65625,
      "learning_rate": 1.932041506681101e-05,
      "loss": 0.819,
      "step": 558300
    },
    {
      "epoch": 1.956737357488075,
      "grad_norm": 3.109375,
      "learning_rate": 1.931976603814731e-05,
      "loss": 0.7593,
      "step": 558310
    },
    {
      "epoch": 1.9567724049949708,
      "grad_norm": 2.5,
      "learning_rate": 1.931911700948361e-05,
      "loss": 0.8742,
      "step": 558320
    },
    {
      "epoch": 1.9568074525018662,
      "grad_norm": 3.0625,
      "learning_rate": 1.931846798081991e-05,
      "loss": 0.8213,
      "step": 558330
    },
    {
      "epoch": 1.956842500008762,
      "grad_norm": 3.03125,
      "learning_rate": 1.9317818952156207e-05,
      "loss": 0.8786,
      "step": 558340
    },
    {
      "epoch": 1.9568775475156575,
      "grad_norm": 2.9375,
      "learning_rate": 1.93171699234925e-05,
      "loss": 0.8943,
      "step": 558350
    },
    {
      "epoch": 1.956912595022553,
      "grad_norm": 2.984375,
      "learning_rate": 1.93165208948288e-05,
      "loss": 0.8243,
      "step": 558360
    },
    {
      "epoch": 1.9569476425294487,
      "grad_norm": 2.78125,
      "learning_rate": 1.9315871866165097e-05,
      "loss": 0.8186,
      "step": 558370
    },
    {
      "epoch": 1.9569826900363443,
      "grad_norm": 3.03125,
      "learning_rate": 1.9315222837501395e-05,
      "loss": 0.7997,
      "step": 558380
    },
    {
      "epoch": 1.9570177375432398,
      "grad_norm": 2.84375,
      "learning_rate": 1.9314573808837693e-05,
      "loss": 0.7825,
      "step": 558390
    },
    {
      "epoch": 1.9570527850501356,
      "grad_norm": 2.609375,
      "learning_rate": 1.931392478017399e-05,
      "loss": 0.7991,
      "step": 558400
    },
    {
      "epoch": 1.957087832557031,
      "grad_norm": 3.078125,
      "learning_rate": 1.931327575151029e-05,
      "loss": 0.8147,
      "step": 558410
    },
    {
      "epoch": 1.9571228800639267,
      "grad_norm": 3.03125,
      "learning_rate": 1.9312626722846587e-05,
      "loss": 0.7232,
      "step": 558420
    },
    {
      "epoch": 1.9571579275708224,
      "grad_norm": 2.890625,
      "learning_rate": 1.9311977694182885e-05,
      "loss": 0.8148,
      "step": 558430
    },
    {
      "epoch": 1.9571929750777177,
      "grad_norm": 2.640625,
      "learning_rate": 1.9311328665519187e-05,
      "loss": 0.8102,
      "step": 558440
    },
    {
      "epoch": 1.9572280225846135,
      "grad_norm": 2.796875,
      "learning_rate": 1.9310679636855484e-05,
      "loss": 0.8575,
      "step": 558450
    },
    {
      "epoch": 1.957263070091509,
      "grad_norm": 3.09375,
      "learning_rate": 1.9310030608191782e-05,
      "loss": 0.851,
      "step": 558460
    },
    {
      "epoch": 1.9572981175984046,
      "grad_norm": 2.875,
      "learning_rate": 1.930938157952808e-05,
      "loss": 0.78,
      "step": 558470
    },
    {
      "epoch": 1.9573331651053003,
      "grad_norm": 2.84375,
      "learning_rate": 1.930873255086438e-05,
      "loss": 0.7923,
      "step": 558480
    },
    {
      "epoch": 1.9573682126121958,
      "grad_norm": 2.984375,
      "learning_rate": 1.9308083522200676e-05,
      "loss": 0.8567,
      "step": 558490
    },
    {
      "epoch": 1.9574032601190914,
      "grad_norm": 3.140625,
      "learning_rate": 1.9307434493536974e-05,
      "loss": 0.81,
      "step": 558500
    },
    {
      "epoch": 1.9574383076259871,
      "grad_norm": 2.796875,
      "learning_rate": 1.9306785464873272e-05,
      "loss": 0.8323,
      "step": 558510
    },
    {
      "epoch": 1.9574733551328827,
      "grad_norm": 2.953125,
      "learning_rate": 1.930613643620957e-05,
      "loss": 0.8547,
      "step": 558520
    },
    {
      "epoch": 1.9575084026397782,
      "grad_norm": 3.171875,
      "learning_rate": 1.930548740754587e-05,
      "loss": 0.8433,
      "step": 558530
    },
    {
      "epoch": 1.957543450146674,
      "grad_norm": 3.34375,
      "learning_rate": 1.9304838378882163e-05,
      "loss": 0.8123,
      "step": 558540
    },
    {
      "epoch": 1.9575784976535693,
      "grad_norm": 3.234375,
      "learning_rate": 1.9304189350218464e-05,
      "loss": 0.8515,
      "step": 558550
    },
    {
      "epoch": 1.957613545160465,
      "grad_norm": 3.0625,
      "learning_rate": 1.9303540321554762e-05,
      "loss": 0.7863,
      "step": 558560
    },
    {
      "epoch": 1.9576485926673606,
      "grad_norm": 3.28125,
      "learning_rate": 1.930289129289106e-05,
      "loss": 0.9502,
      "step": 558570
    },
    {
      "epoch": 1.9576836401742561,
      "grad_norm": 3.09375,
      "learning_rate": 1.930224226422736e-05,
      "loss": 0.8658,
      "step": 558580
    },
    {
      "epoch": 1.9577186876811519,
      "grad_norm": 3.359375,
      "learning_rate": 1.9301593235563656e-05,
      "loss": 0.8255,
      "step": 558590
    },
    {
      "epoch": 1.9577537351880474,
      "grad_norm": 3.03125,
      "learning_rate": 1.9300944206899954e-05,
      "loss": 0.8967,
      "step": 558600
    },
    {
      "epoch": 1.957788782694943,
      "grad_norm": 3.015625,
      "learning_rate": 1.9300295178236252e-05,
      "loss": 0.8495,
      "step": 558610
    },
    {
      "epoch": 1.9578238302018387,
      "grad_norm": 2.8125,
      "learning_rate": 1.929964614957255e-05,
      "loss": 0.7481,
      "step": 558620
    },
    {
      "epoch": 1.9578588777087342,
      "grad_norm": 2.828125,
      "learning_rate": 1.929899712090885e-05,
      "loss": 0.8762,
      "step": 558630
    },
    {
      "epoch": 1.9578939252156298,
      "grad_norm": 2.828125,
      "learning_rate": 1.9298348092245146e-05,
      "loss": 0.8687,
      "step": 558640
    },
    {
      "epoch": 1.9579289727225255,
      "grad_norm": 2.875,
      "learning_rate": 1.9297699063581444e-05,
      "loss": 0.8301,
      "step": 558650
    },
    {
      "epoch": 1.9579640202294208,
      "grad_norm": 2.828125,
      "learning_rate": 1.9297050034917742e-05,
      "loss": 0.8514,
      "step": 558660
    },
    {
      "epoch": 1.9579990677363166,
      "grad_norm": 3.21875,
      "learning_rate": 1.929640100625404e-05,
      "loss": 0.866,
      "step": 558670
    },
    {
      "epoch": 1.9580341152432121,
      "grad_norm": 2.90625,
      "learning_rate": 1.929575197759034e-05,
      "loss": 0.8162,
      "step": 558680
    },
    {
      "epoch": 1.9580691627501077,
      "grad_norm": 2.9375,
      "learning_rate": 1.929510294892664e-05,
      "loss": 0.8617,
      "step": 558690
    },
    {
      "epoch": 1.9581042102570034,
      "grad_norm": 2.875,
      "learning_rate": 1.9294453920262938e-05,
      "loss": 0.8355,
      "step": 558700
    },
    {
      "epoch": 1.958139257763899,
      "grad_norm": 3.40625,
      "learning_rate": 1.9293804891599236e-05,
      "loss": 0.7938,
      "step": 558710
    },
    {
      "epoch": 1.9581743052707945,
      "grad_norm": 2.84375,
      "learning_rate": 1.929315586293553e-05,
      "loss": 0.9382,
      "step": 558720
    },
    {
      "epoch": 1.9582093527776903,
      "grad_norm": 2.515625,
      "learning_rate": 1.929250683427183e-05,
      "loss": 0.822,
      "step": 558730
    },
    {
      "epoch": 1.9582444002845858,
      "grad_norm": 2.75,
      "learning_rate": 1.9291857805608126e-05,
      "loss": 0.8011,
      "step": 558740
    },
    {
      "epoch": 1.9582794477914813,
      "grad_norm": 2.984375,
      "learning_rate": 1.9291208776944424e-05,
      "loss": 0.7844,
      "step": 558750
    },
    {
      "epoch": 1.958314495298377,
      "grad_norm": 3.390625,
      "learning_rate": 1.9290559748280722e-05,
      "loss": 0.7828,
      "step": 558760
    },
    {
      "epoch": 1.9583495428052724,
      "grad_norm": 2.796875,
      "learning_rate": 1.928991071961702e-05,
      "loss": 0.8845,
      "step": 558770
    },
    {
      "epoch": 1.9583845903121682,
      "grad_norm": 2.53125,
      "learning_rate": 1.928926169095332e-05,
      "loss": 0.8276,
      "step": 558780
    },
    {
      "epoch": 1.9584196378190637,
      "grad_norm": 2.734375,
      "learning_rate": 1.9288612662289616e-05,
      "loss": 0.8075,
      "step": 558790
    },
    {
      "epoch": 1.9584546853259592,
      "grad_norm": 2.953125,
      "learning_rate": 1.9287963633625914e-05,
      "loss": 0.9024,
      "step": 558800
    },
    {
      "epoch": 1.958489732832855,
      "grad_norm": 2.90625,
      "learning_rate": 1.9287314604962216e-05,
      "loss": 0.8159,
      "step": 558810
    },
    {
      "epoch": 1.9585247803397505,
      "grad_norm": 2.984375,
      "learning_rate": 1.9286665576298514e-05,
      "loss": 0.8494,
      "step": 558820
    },
    {
      "epoch": 1.958559827846646,
      "grad_norm": 2.71875,
      "learning_rate": 1.9286016547634812e-05,
      "loss": 0.842,
      "step": 558830
    },
    {
      "epoch": 1.9585948753535418,
      "grad_norm": 3.0625,
      "learning_rate": 1.928536751897111e-05,
      "loss": 0.7258,
      "step": 558840
    },
    {
      "epoch": 1.9586299228604374,
      "grad_norm": 2.78125,
      "learning_rate": 1.9284718490307408e-05,
      "loss": 0.7791,
      "step": 558850
    },
    {
      "epoch": 1.958664970367333,
      "grad_norm": 2.15625,
      "learning_rate": 1.9284069461643706e-05,
      "loss": 0.7859,
      "step": 558860
    },
    {
      "epoch": 1.9587000178742286,
      "grad_norm": 3.296875,
      "learning_rate": 1.9283420432980004e-05,
      "loss": 0.8996,
      "step": 558870
    },
    {
      "epoch": 1.958735065381124,
      "grad_norm": 3.15625,
      "learning_rate": 1.9282771404316302e-05,
      "loss": 0.9032,
      "step": 558880
    },
    {
      "epoch": 1.9587701128880197,
      "grad_norm": 3.0625,
      "learning_rate": 1.92821223756526e-05,
      "loss": 0.8565,
      "step": 558890
    },
    {
      "epoch": 1.9588051603949153,
      "grad_norm": 2.796875,
      "learning_rate": 1.9281473346988898e-05,
      "loss": 0.8443,
      "step": 558900
    },
    {
      "epoch": 1.9588402079018108,
      "grad_norm": 2.828125,
      "learning_rate": 1.9280824318325192e-05,
      "loss": 0.8091,
      "step": 558910
    },
    {
      "epoch": 1.9588752554087066,
      "grad_norm": 2.8125,
      "learning_rate": 1.9280175289661494e-05,
      "loss": 0.8345,
      "step": 558920
    },
    {
      "epoch": 1.958910302915602,
      "grad_norm": 2.84375,
      "learning_rate": 1.9279526260997792e-05,
      "loss": 0.9236,
      "step": 558930
    },
    {
      "epoch": 1.9589453504224976,
      "grad_norm": 2.859375,
      "learning_rate": 1.927887723233409e-05,
      "loss": 0.8471,
      "step": 558940
    },
    {
      "epoch": 1.9589803979293934,
      "grad_norm": 3.59375,
      "learning_rate": 1.9278228203670388e-05,
      "loss": 0.945,
      "step": 558950
    },
    {
      "epoch": 1.959015445436289,
      "grad_norm": 2.5,
      "learning_rate": 1.9277579175006686e-05,
      "loss": 0.8898,
      "step": 558960
    },
    {
      "epoch": 1.9590504929431845,
      "grad_norm": 2.78125,
      "learning_rate": 1.9276930146342984e-05,
      "loss": 0.8741,
      "step": 558970
    },
    {
      "epoch": 1.9590855404500802,
      "grad_norm": 2.71875,
      "learning_rate": 1.9276281117679282e-05,
      "loss": 0.9197,
      "step": 558980
    },
    {
      "epoch": 1.9591205879569755,
      "grad_norm": 3.046875,
      "learning_rate": 1.927563208901558e-05,
      "loss": 0.8119,
      "step": 558990
    },
    {
      "epoch": 1.9591556354638713,
      "grad_norm": 2.671875,
      "learning_rate": 1.9274983060351878e-05,
      "loss": 0.7559,
      "step": 559000
    },
    {
      "epoch": 1.959190682970767,
      "grad_norm": 2.328125,
      "learning_rate": 1.9274334031688176e-05,
      "loss": 0.8625,
      "step": 559010
    },
    {
      "epoch": 1.9592257304776624,
      "grad_norm": 3.96875,
      "learning_rate": 1.9273685003024474e-05,
      "loss": 0.8572,
      "step": 559020
    },
    {
      "epoch": 1.959260777984558,
      "grad_norm": 2.71875,
      "learning_rate": 1.9273035974360772e-05,
      "loss": 0.8111,
      "step": 559030
    },
    {
      "epoch": 1.9592958254914536,
      "grad_norm": 3.15625,
      "learning_rate": 1.927238694569707e-05,
      "loss": 0.9016,
      "step": 559040
    },
    {
      "epoch": 1.9593308729983492,
      "grad_norm": 3.296875,
      "learning_rate": 1.9271737917033368e-05,
      "loss": 0.8409,
      "step": 559050
    },
    {
      "epoch": 1.959365920505245,
      "grad_norm": 2.8125,
      "learning_rate": 1.927108888836967e-05,
      "loss": 0.7461,
      "step": 559060
    },
    {
      "epoch": 1.9594009680121405,
      "grad_norm": 2.796875,
      "learning_rate": 1.9270439859705967e-05,
      "loss": 0.8145,
      "step": 559070
    },
    {
      "epoch": 1.959436015519036,
      "grad_norm": 3.015625,
      "learning_rate": 1.9269790831042265e-05,
      "loss": 0.8295,
      "step": 559080
    },
    {
      "epoch": 1.9594710630259318,
      "grad_norm": 2.96875,
      "learning_rate": 1.9269141802378563e-05,
      "loss": 0.7477,
      "step": 559090
    },
    {
      "epoch": 1.959506110532827,
      "grad_norm": 2.96875,
      "learning_rate": 1.9268492773714858e-05,
      "loss": 0.8274,
      "step": 559100
    },
    {
      "epoch": 1.9595411580397228,
      "grad_norm": 2.90625,
      "learning_rate": 1.9267843745051156e-05,
      "loss": 0.7956,
      "step": 559110
    },
    {
      "epoch": 1.9595762055466186,
      "grad_norm": 3.109375,
      "learning_rate": 1.9267194716387454e-05,
      "loss": 0.8776,
      "step": 559120
    },
    {
      "epoch": 1.959611253053514,
      "grad_norm": 2.625,
      "learning_rate": 1.9266545687723752e-05,
      "loss": 0.8323,
      "step": 559130
    },
    {
      "epoch": 1.9596463005604097,
      "grad_norm": 2.78125,
      "learning_rate": 1.926589665906005e-05,
      "loss": 0.8033,
      "step": 559140
    },
    {
      "epoch": 1.9596813480673052,
      "grad_norm": 2.3125,
      "learning_rate": 1.9265247630396348e-05,
      "loss": 0.8329,
      "step": 559150
    },
    {
      "epoch": 1.9597163955742007,
      "grad_norm": 3.3125,
      "learning_rate": 1.9264598601732646e-05,
      "loss": 0.9071,
      "step": 559160
    },
    {
      "epoch": 1.9597514430810965,
      "grad_norm": 3.125,
      "learning_rate": 1.9263949573068947e-05,
      "loss": 0.7912,
      "step": 559170
    },
    {
      "epoch": 1.959786490587992,
      "grad_norm": 2.890625,
      "learning_rate": 1.9263300544405245e-05,
      "loss": 0.682,
      "step": 559180
    },
    {
      "epoch": 1.9598215380948876,
      "grad_norm": 3.25,
      "learning_rate": 1.9262651515741543e-05,
      "loss": 0.9208,
      "step": 559190
    },
    {
      "epoch": 1.9598565856017833,
      "grad_norm": 2.921875,
      "learning_rate": 1.926200248707784e-05,
      "loss": 0.9227,
      "step": 559200
    },
    {
      "epoch": 1.9598916331086789,
      "grad_norm": 3.796875,
      "learning_rate": 1.926135345841414e-05,
      "loss": 0.8058,
      "step": 559210
    },
    {
      "epoch": 1.9599266806155744,
      "grad_norm": 2.859375,
      "learning_rate": 1.9260704429750437e-05,
      "loss": 0.7893,
      "step": 559220
    },
    {
      "epoch": 1.9599617281224702,
      "grad_norm": 3.15625,
      "learning_rate": 1.9260055401086735e-05,
      "loss": 0.7528,
      "step": 559230
    },
    {
      "epoch": 1.9599967756293655,
      "grad_norm": 2.921875,
      "learning_rate": 1.9259406372423033e-05,
      "loss": 0.8717,
      "step": 559240
    },
    {
      "epoch": 1.9600318231362612,
      "grad_norm": 2.796875,
      "learning_rate": 1.925875734375933e-05,
      "loss": 0.7585,
      "step": 559250
    },
    {
      "epoch": 1.9600668706431568,
      "grad_norm": 3.3125,
      "learning_rate": 1.925810831509563e-05,
      "loss": 0.8299,
      "step": 559260
    },
    {
      "epoch": 1.9601019181500523,
      "grad_norm": 2.3125,
      "learning_rate": 1.9257459286431927e-05,
      "loss": 0.8354,
      "step": 559270
    },
    {
      "epoch": 1.960136965656948,
      "grad_norm": 2.921875,
      "learning_rate": 1.9256810257768225e-05,
      "loss": 0.8582,
      "step": 559280
    },
    {
      "epoch": 1.9601720131638436,
      "grad_norm": 3.21875,
      "learning_rate": 1.9256161229104523e-05,
      "loss": 0.7619,
      "step": 559290
    },
    {
      "epoch": 1.9602070606707391,
      "grad_norm": 2.875,
      "learning_rate": 1.925551220044082e-05,
      "loss": 0.8859,
      "step": 559300
    },
    {
      "epoch": 1.960242108177635,
      "grad_norm": 3.078125,
      "learning_rate": 1.925486317177712e-05,
      "loss": 0.9021,
      "step": 559310
    },
    {
      "epoch": 1.9602771556845304,
      "grad_norm": 2.890625,
      "learning_rate": 1.9254214143113417e-05,
      "loss": 0.8556,
      "step": 559320
    },
    {
      "epoch": 1.960312203191426,
      "grad_norm": 2.5625,
      "learning_rate": 1.9253565114449715e-05,
      "loss": 0.7768,
      "step": 559330
    },
    {
      "epoch": 1.9603472506983217,
      "grad_norm": 3.078125,
      "learning_rate": 1.9252916085786013e-05,
      "loss": 0.8403,
      "step": 559340
    },
    {
      "epoch": 1.960382298205217,
      "grad_norm": 2.84375,
      "learning_rate": 1.925226705712231e-05,
      "loss": 0.8733,
      "step": 559350
    },
    {
      "epoch": 1.9604173457121128,
      "grad_norm": 3.109375,
      "learning_rate": 1.925161802845861e-05,
      "loss": 0.7723,
      "step": 559360
    },
    {
      "epoch": 1.9604523932190083,
      "grad_norm": 2.515625,
      "learning_rate": 1.9250968999794907e-05,
      "loss": 0.8895,
      "step": 559370
    },
    {
      "epoch": 1.9604874407259039,
      "grad_norm": 3.0,
      "learning_rate": 1.9250319971131205e-05,
      "loss": 0.87,
      "step": 559380
    },
    {
      "epoch": 1.9605224882327996,
      "grad_norm": 2.609375,
      "learning_rate": 1.9249670942467503e-05,
      "loss": 0.7171,
      "step": 559390
    },
    {
      "epoch": 1.9605575357396952,
      "grad_norm": 2.578125,
      "learning_rate": 1.92490219138038e-05,
      "loss": 0.821,
      "step": 559400
    },
    {
      "epoch": 1.9605925832465907,
      "grad_norm": 2.875,
      "learning_rate": 1.92483728851401e-05,
      "loss": 0.7722,
      "step": 559410
    },
    {
      "epoch": 1.9606276307534865,
      "grad_norm": 2.96875,
      "learning_rate": 1.92477238564764e-05,
      "loss": 0.8408,
      "step": 559420
    },
    {
      "epoch": 1.960662678260382,
      "grad_norm": 3.265625,
      "learning_rate": 1.92470748278127e-05,
      "loss": 0.8774,
      "step": 559430
    },
    {
      "epoch": 1.9606977257672775,
      "grad_norm": 3.0,
      "learning_rate": 1.9246425799148997e-05,
      "loss": 0.8905,
      "step": 559440
    },
    {
      "epoch": 1.9607327732741733,
      "grad_norm": 2.5625,
      "learning_rate": 1.9245776770485295e-05,
      "loss": 0.8091,
      "step": 559450
    },
    {
      "epoch": 1.9607678207810686,
      "grad_norm": 2.125,
      "learning_rate": 1.9245127741821593e-05,
      "loss": 0.7901,
      "step": 559460
    },
    {
      "epoch": 1.9608028682879644,
      "grad_norm": 3.1875,
      "learning_rate": 1.924447871315789e-05,
      "loss": 0.8083,
      "step": 559470
    },
    {
      "epoch": 1.9608379157948599,
      "grad_norm": 2.5,
      "learning_rate": 1.9243829684494185e-05,
      "loss": 0.7627,
      "step": 559480
    },
    {
      "epoch": 1.9608729633017554,
      "grad_norm": 2.71875,
      "learning_rate": 1.9243180655830483e-05,
      "loss": 0.8367,
      "step": 559490
    },
    {
      "epoch": 1.9609080108086512,
      "grad_norm": 3.09375,
      "learning_rate": 1.924253162716678e-05,
      "loss": 0.8161,
      "step": 559500
    },
    {
      "epoch": 1.9609430583155467,
      "grad_norm": 2.953125,
      "learning_rate": 1.924188259850308e-05,
      "loss": 0.7566,
      "step": 559510
    },
    {
      "epoch": 1.9609781058224423,
      "grad_norm": 2.640625,
      "learning_rate": 1.9241233569839377e-05,
      "loss": 0.787,
      "step": 559520
    },
    {
      "epoch": 1.961013153329338,
      "grad_norm": 3.140625,
      "learning_rate": 1.9240584541175675e-05,
      "loss": 0.8027,
      "step": 559530
    },
    {
      "epoch": 1.9610482008362335,
      "grad_norm": 2.875,
      "learning_rate": 1.9239935512511977e-05,
      "loss": 0.803,
      "step": 559540
    },
    {
      "epoch": 1.961083248343129,
      "grad_norm": 2.8125,
      "learning_rate": 1.9239286483848275e-05,
      "loss": 0.8144,
      "step": 559550
    },
    {
      "epoch": 1.9611182958500248,
      "grad_norm": 2.484375,
      "learning_rate": 1.9238637455184573e-05,
      "loss": 0.7991,
      "step": 559560
    },
    {
      "epoch": 1.9611533433569202,
      "grad_norm": 3.03125,
      "learning_rate": 1.923798842652087e-05,
      "loss": 0.8954,
      "step": 559570
    },
    {
      "epoch": 1.961188390863816,
      "grad_norm": 3.0625,
      "learning_rate": 1.923733939785717e-05,
      "loss": 0.803,
      "step": 559580
    },
    {
      "epoch": 1.9612234383707114,
      "grad_norm": 3.1875,
      "learning_rate": 1.9236690369193467e-05,
      "loss": 0.8387,
      "step": 559590
    },
    {
      "epoch": 1.961258485877607,
      "grad_norm": 2.84375,
      "learning_rate": 1.9236041340529765e-05,
      "loss": 0.8181,
      "step": 559600
    },
    {
      "epoch": 1.9612935333845027,
      "grad_norm": 2.9375,
      "learning_rate": 1.9235392311866063e-05,
      "loss": 0.8328,
      "step": 559610
    },
    {
      "epoch": 1.9613285808913983,
      "grad_norm": 3.171875,
      "learning_rate": 1.923474328320236e-05,
      "loss": 0.8383,
      "step": 559620
    },
    {
      "epoch": 1.9613636283982938,
      "grad_norm": 2.546875,
      "learning_rate": 1.923409425453866e-05,
      "loss": 0.8275,
      "step": 559630
    },
    {
      "epoch": 1.9613986759051896,
      "grad_norm": 2.796875,
      "learning_rate": 1.9233445225874957e-05,
      "loss": 0.8238,
      "step": 559640
    },
    {
      "epoch": 1.961433723412085,
      "grad_norm": 2.9375,
      "learning_rate": 1.9232796197211255e-05,
      "loss": 0.8075,
      "step": 559650
    },
    {
      "epoch": 1.9614687709189806,
      "grad_norm": 2.984375,
      "learning_rate": 1.9232147168547553e-05,
      "loss": 0.776,
      "step": 559660
    },
    {
      "epoch": 1.9615038184258764,
      "grad_norm": 3.046875,
      "learning_rate": 1.923149813988385e-05,
      "loss": 0.822,
      "step": 559670
    },
    {
      "epoch": 1.9615388659327717,
      "grad_norm": 2.71875,
      "learning_rate": 1.923084911122015e-05,
      "loss": 0.8881,
      "step": 559680
    },
    {
      "epoch": 1.9615739134396675,
      "grad_norm": 3.109375,
      "learning_rate": 1.9230200082556447e-05,
      "loss": 0.8844,
      "step": 559690
    },
    {
      "epoch": 1.9616089609465632,
      "grad_norm": 2.953125,
      "learning_rate": 1.9229551053892745e-05,
      "loss": 0.8255,
      "step": 559700
    },
    {
      "epoch": 1.9616440084534585,
      "grad_norm": 2.8125,
      "learning_rate": 1.9228902025229043e-05,
      "loss": 0.804,
      "step": 559710
    },
    {
      "epoch": 1.9616790559603543,
      "grad_norm": 2.875,
      "learning_rate": 1.922825299656534e-05,
      "loss": 0.8288,
      "step": 559720
    },
    {
      "epoch": 1.9617141034672498,
      "grad_norm": 2.90625,
      "learning_rate": 1.922760396790164e-05,
      "loss": 0.8208,
      "step": 559730
    },
    {
      "epoch": 1.9617491509741454,
      "grad_norm": 2.8125,
      "learning_rate": 1.9226954939237937e-05,
      "loss": 0.7711,
      "step": 559740
    },
    {
      "epoch": 1.9617841984810411,
      "grad_norm": 3.25,
      "learning_rate": 1.9226305910574235e-05,
      "loss": 0.7878,
      "step": 559750
    },
    {
      "epoch": 1.9618192459879367,
      "grad_norm": 2.890625,
      "learning_rate": 1.9225656881910533e-05,
      "loss": 0.7733,
      "step": 559760
    },
    {
      "epoch": 1.9618542934948322,
      "grad_norm": 2.9375,
      "learning_rate": 1.922500785324683e-05,
      "loss": 0.853,
      "step": 559770
    },
    {
      "epoch": 1.961889341001728,
      "grad_norm": 3.3125,
      "learning_rate": 1.922435882458313e-05,
      "loss": 0.8005,
      "step": 559780
    },
    {
      "epoch": 1.9619243885086233,
      "grad_norm": 3.203125,
      "learning_rate": 1.922370979591943e-05,
      "loss": 0.7736,
      "step": 559790
    },
    {
      "epoch": 1.961959436015519,
      "grad_norm": 2.9375,
      "learning_rate": 1.9223060767255728e-05,
      "loss": 0.8445,
      "step": 559800
    },
    {
      "epoch": 1.9619944835224148,
      "grad_norm": 2.59375,
      "learning_rate": 1.9222411738592026e-05,
      "loss": 0.9248,
      "step": 559810
    },
    {
      "epoch": 1.96202953102931,
      "grad_norm": 2.703125,
      "learning_rate": 1.9221762709928324e-05,
      "loss": 0.8482,
      "step": 559820
    },
    {
      "epoch": 1.9620645785362059,
      "grad_norm": 2.8125,
      "learning_rate": 1.9221113681264622e-05,
      "loss": 0.9319,
      "step": 559830
    },
    {
      "epoch": 1.9620996260431014,
      "grad_norm": 2.984375,
      "learning_rate": 1.922046465260092e-05,
      "loss": 0.7877,
      "step": 559840
    },
    {
      "epoch": 1.962134673549997,
      "grad_norm": 2.421875,
      "learning_rate": 1.9219815623937215e-05,
      "loss": 0.8198,
      "step": 559850
    },
    {
      "epoch": 1.9621697210568927,
      "grad_norm": 2.6875,
      "learning_rate": 1.9219166595273513e-05,
      "loss": 0.7765,
      "step": 559860
    },
    {
      "epoch": 1.9622047685637882,
      "grad_norm": 3.171875,
      "learning_rate": 1.921851756660981e-05,
      "loss": 0.8474,
      "step": 559870
    },
    {
      "epoch": 1.9622398160706838,
      "grad_norm": 2.6875,
      "learning_rate": 1.921786853794611e-05,
      "loss": 0.8048,
      "step": 559880
    },
    {
      "epoch": 1.9622748635775795,
      "grad_norm": 3.265625,
      "learning_rate": 1.9217219509282407e-05,
      "loss": 0.7822,
      "step": 559890
    },
    {
      "epoch": 1.962309911084475,
      "grad_norm": 2.5625,
      "learning_rate": 1.9216570480618708e-05,
      "loss": 0.8121,
      "step": 559900
    },
    {
      "epoch": 1.9623449585913706,
      "grad_norm": 2.8125,
      "learning_rate": 1.9215921451955006e-05,
      "loss": 0.8903,
      "step": 559910
    },
    {
      "epoch": 1.9623800060982663,
      "grad_norm": 2.984375,
      "learning_rate": 1.9215272423291304e-05,
      "loss": 0.7707,
      "step": 559920
    },
    {
      "epoch": 1.9624150536051617,
      "grad_norm": 2.265625,
      "learning_rate": 1.9214623394627602e-05,
      "loss": 0.825,
      "step": 559930
    },
    {
      "epoch": 1.9624501011120574,
      "grad_norm": 3.09375,
      "learning_rate": 1.92139743659639e-05,
      "loss": 0.845,
      "step": 559940
    },
    {
      "epoch": 1.962485148618953,
      "grad_norm": 2.5,
      "learning_rate": 1.9213325337300198e-05,
      "loss": 0.8966,
      "step": 559950
    },
    {
      "epoch": 1.9625201961258485,
      "grad_norm": 2.984375,
      "learning_rate": 1.9212676308636496e-05,
      "loss": 0.7568,
      "step": 559960
    },
    {
      "epoch": 1.9625552436327443,
      "grad_norm": 2.875,
      "learning_rate": 1.9212027279972794e-05,
      "loss": 0.9089,
      "step": 559970
    },
    {
      "epoch": 1.9625902911396398,
      "grad_norm": 3.21875,
      "learning_rate": 1.9211378251309092e-05,
      "loss": 0.8361,
      "step": 559980
    },
    {
      "epoch": 1.9626253386465353,
      "grad_norm": 2.28125,
      "learning_rate": 1.921072922264539e-05,
      "loss": 0.8154,
      "step": 559990
    },
    {
      "epoch": 1.962660386153431,
      "grad_norm": 3.546875,
      "learning_rate": 1.9210080193981688e-05,
      "loss": 0.8107,
      "step": 560000
    },
    {
      "epoch": 1.962660386153431,
      "eval_loss": 0.7721700668334961,
      "eval_runtime": 555.6648,
      "eval_samples_per_second": 684.65,
      "eval_steps_per_second": 57.054,
      "step": 560000
    },
    {
      "epoch": 1.9626954336603266,
      "grad_norm": 3.09375,
      "learning_rate": 1.9209431165317986e-05,
      "loss": 0.8574,
      "step": 560010
    },
    {
      "epoch": 1.9627304811672222,
      "grad_norm": 2.75,
      "learning_rate": 1.9208782136654284e-05,
      "loss": 0.8968,
      "step": 560020
    },
    {
      "epoch": 1.962765528674118,
      "grad_norm": 3.28125,
      "learning_rate": 1.9208133107990582e-05,
      "loss": 0.8183,
      "step": 560030
    },
    {
      "epoch": 1.9628005761810132,
      "grad_norm": 2.71875,
      "learning_rate": 1.920748407932688e-05,
      "loss": 0.814,
      "step": 560040
    },
    {
      "epoch": 1.962835623687909,
      "grad_norm": 3.046875,
      "learning_rate": 1.9206835050663178e-05,
      "loss": 0.8395,
      "step": 560050
    },
    {
      "epoch": 1.9628706711948045,
      "grad_norm": 2.71875,
      "learning_rate": 1.9206186021999476e-05,
      "loss": 0.8029,
      "step": 560060
    },
    {
      "epoch": 1.9629057187017,
      "grad_norm": 2.9375,
      "learning_rate": 1.9205536993335774e-05,
      "loss": 0.8543,
      "step": 560070
    },
    {
      "epoch": 1.9629407662085958,
      "grad_norm": 3.21875,
      "learning_rate": 1.9204887964672072e-05,
      "loss": 0.8286,
      "step": 560080
    },
    {
      "epoch": 1.9629758137154913,
      "grad_norm": 2.84375,
      "learning_rate": 1.920423893600837e-05,
      "loss": 0.8336,
      "step": 560090
    },
    {
      "epoch": 1.9630108612223869,
      "grad_norm": 3.171875,
      "learning_rate": 1.9203589907344668e-05,
      "loss": 0.8826,
      "step": 560100
    },
    {
      "epoch": 1.9630459087292826,
      "grad_norm": 2.8125,
      "learning_rate": 1.9202940878680966e-05,
      "loss": 0.7746,
      "step": 560110
    },
    {
      "epoch": 1.9630809562361782,
      "grad_norm": 2.828125,
      "learning_rate": 1.9202291850017264e-05,
      "loss": 0.9097,
      "step": 560120
    },
    {
      "epoch": 1.9631160037430737,
      "grad_norm": 2.703125,
      "learning_rate": 1.9201642821353562e-05,
      "loss": 0.7587,
      "step": 560130
    },
    {
      "epoch": 1.9631510512499695,
      "grad_norm": 3.421875,
      "learning_rate": 1.920099379268986e-05,
      "loss": 0.8414,
      "step": 560140
    },
    {
      "epoch": 1.9631860987568648,
      "grad_norm": 3.09375,
      "learning_rate": 1.9200344764026158e-05,
      "loss": 0.8718,
      "step": 560150
    },
    {
      "epoch": 1.9632211462637605,
      "grad_norm": 3.0,
      "learning_rate": 1.919969573536246e-05,
      "loss": 0.8386,
      "step": 560160
    },
    {
      "epoch": 1.963256193770656,
      "grad_norm": 2.359375,
      "learning_rate": 1.9199046706698757e-05,
      "loss": 0.8826,
      "step": 560170
    },
    {
      "epoch": 1.9632912412775516,
      "grad_norm": 3.921875,
      "learning_rate": 1.9198397678035055e-05,
      "loss": 0.8903,
      "step": 560180
    },
    {
      "epoch": 1.9633262887844474,
      "grad_norm": 3.109375,
      "learning_rate": 1.9197748649371353e-05,
      "loss": 0.7864,
      "step": 560190
    },
    {
      "epoch": 1.963361336291343,
      "grad_norm": 2.78125,
      "learning_rate": 1.919709962070765e-05,
      "loss": 0.9083,
      "step": 560200
    },
    {
      "epoch": 1.9633963837982384,
      "grad_norm": 2.90625,
      "learning_rate": 1.919645059204395e-05,
      "loss": 0.8938,
      "step": 560210
    },
    {
      "epoch": 1.9634314313051342,
      "grad_norm": 2.859375,
      "learning_rate": 1.9195801563380247e-05,
      "loss": 0.8142,
      "step": 560220
    },
    {
      "epoch": 1.9634664788120297,
      "grad_norm": 2.609375,
      "learning_rate": 1.9195152534716542e-05,
      "loss": 0.7872,
      "step": 560230
    },
    {
      "epoch": 1.9635015263189253,
      "grad_norm": 2.875,
      "learning_rate": 1.919450350605284e-05,
      "loss": 0.7885,
      "step": 560240
    },
    {
      "epoch": 1.963536573825821,
      "grad_norm": 2.953125,
      "learning_rate": 1.9193854477389138e-05,
      "loss": 0.8375,
      "step": 560250
    },
    {
      "epoch": 1.9635716213327163,
      "grad_norm": 3.015625,
      "learning_rate": 1.9193205448725436e-05,
      "loss": 0.7868,
      "step": 560260
    },
    {
      "epoch": 1.963606668839612,
      "grad_norm": 2.953125,
      "learning_rate": 1.9192556420061737e-05,
      "loss": 0.8378,
      "step": 560270
    },
    {
      "epoch": 1.9636417163465076,
      "grad_norm": 2.828125,
      "learning_rate": 1.9191907391398035e-05,
      "loss": 0.8156,
      "step": 560280
    },
    {
      "epoch": 1.9636767638534032,
      "grad_norm": 2.640625,
      "learning_rate": 1.9191258362734333e-05,
      "loss": 0.8864,
      "step": 560290
    },
    {
      "epoch": 1.963711811360299,
      "grad_norm": 2.96875,
      "learning_rate": 1.919060933407063e-05,
      "loss": 0.874,
      "step": 560300
    },
    {
      "epoch": 1.9637468588671945,
      "grad_norm": 2.609375,
      "learning_rate": 1.918996030540693e-05,
      "loss": 0.7456,
      "step": 560310
    },
    {
      "epoch": 1.96378190637409,
      "grad_norm": 3.078125,
      "learning_rate": 1.9189311276743227e-05,
      "loss": 0.854,
      "step": 560320
    },
    {
      "epoch": 1.9638169538809858,
      "grad_norm": 2.75,
      "learning_rate": 1.9188662248079525e-05,
      "loss": 0.8479,
      "step": 560330
    },
    {
      "epoch": 1.9638520013878813,
      "grad_norm": 3.0,
      "learning_rate": 1.9188013219415823e-05,
      "loss": 0.8445,
      "step": 560340
    },
    {
      "epoch": 1.9638870488947768,
      "grad_norm": 3.0,
      "learning_rate": 1.918736419075212e-05,
      "loss": 0.7335,
      "step": 560350
    },
    {
      "epoch": 1.9639220964016726,
      "grad_norm": 3.0,
      "learning_rate": 1.918671516208842e-05,
      "loss": 0.8388,
      "step": 560360
    },
    {
      "epoch": 1.963957143908568,
      "grad_norm": 2.765625,
      "learning_rate": 1.9186066133424717e-05,
      "loss": 0.7383,
      "step": 560370
    },
    {
      "epoch": 1.9639921914154637,
      "grad_norm": 3.375,
      "learning_rate": 1.9185417104761015e-05,
      "loss": 0.8439,
      "step": 560380
    },
    {
      "epoch": 1.9640272389223594,
      "grad_norm": 2.671875,
      "learning_rate": 1.9184768076097313e-05,
      "loss": 0.867,
      "step": 560390
    },
    {
      "epoch": 1.9640622864292547,
      "grad_norm": 2.78125,
      "learning_rate": 1.918411904743361e-05,
      "loss": 0.7931,
      "step": 560400
    },
    {
      "epoch": 1.9640973339361505,
      "grad_norm": 2.703125,
      "learning_rate": 1.9183470018769913e-05,
      "loss": 0.8809,
      "step": 560410
    },
    {
      "epoch": 1.964132381443046,
      "grad_norm": 2.828125,
      "learning_rate": 1.9182820990106207e-05,
      "loss": 0.7428,
      "step": 560420
    },
    {
      "epoch": 1.9641674289499416,
      "grad_norm": 3.15625,
      "learning_rate": 1.9182171961442505e-05,
      "loss": 0.7678,
      "step": 560430
    },
    {
      "epoch": 1.9642024764568373,
      "grad_norm": 3.171875,
      "learning_rate": 1.9181522932778803e-05,
      "loss": 0.8537,
      "step": 560440
    },
    {
      "epoch": 1.9642375239637329,
      "grad_norm": 3.984375,
      "learning_rate": 1.91808739041151e-05,
      "loss": 0.8475,
      "step": 560450
    },
    {
      "epoch": 1.9642725714706284,
      "grad_norm": 2.640625,
      "learning_rate": 1.91802248754514e-05,
      "loss": 0.8361,
      "step": 560460
    },
    {
      "epoch": 1.9643076189775242,
      "grad_norm": 3.046875,
      "learning_rate": 1.9179575846787697e-05,
      "loss": 0.9081,
      "step": 560470
    },
    {
      "epoch": 1.9643426664844197,
      "grad_norm": 2.515625,
      "learning_rate": 1.9178926818123995e-05,
      "loss": 0.7672,
      "step": 560480
    },
    {
      "epoch": 1.9643777139913152,
      "grad_norm": 3.0625,
      "learning_rate": 1.9178277789460293e-05,
      "loss": 0.8045,
      "step": 560490
    },
    {
      "epoch": 1.964412761498211,
      "grad_norm": 2.5625,
      "learning_rate": 1.917762876079659e-05,
      "loss": 0.8551,
      "step": 560500
    },
    {
      "epoch": 1.9644478090051063,
      "grad_norm": 2.796875,
      "learning_rate": 1.917697973213289e-05,
      "loss": 0.8664,
      "step": 560510
    },
    {
      "epoch": 1.964482856512002,
      "grad_norm": 3.015625,
      "learning_rate": 1.917633070346919e-05,
      "loss": 0.8719,
      "step": 560520
    },
    {
      "epoch": 1.9645179040188976,
      "grad_norm": 2.5625,
      "learning_rate": 1.917568167480549e-05,
      "loss": 0.8346,
      "step": 560530
    },
    {
      "epoch": 1.9645529515257931,
      "grad_norm": 2.734375,
      "learning_rate": 1.9175032646141787e-05,
      "loss": 0.8169,
      "step": 560540
    },
    {
      "epoch": 1.9645879990326889,
      "grad_norm": 2.6875,
      "learning_rate": 1.9174383617478085e-05,
      "loss": 0.8385,
      "step": 560550
    },
    {
      "epoch": 1.9646230465395844,
      "grad_norm": 3.34375,
      "learning_rate": 1.9173734588814383e-05,
      "loss": 0.8448,
      "step": 560560
    },
    {
      "epoch": 1.96465809404648,
      "grad_norm": 2.734375,
      "learning_rate": 1.917308556015068e-05,
      "loss": 0.7409,
      "step": 560570
    },
    {
      "epoch": 1.9646931415533757,
      "grad_norm": 3.140625,
      "learning_rate": 1.917243653148698e-05,
      "loss": 0.8257,
      "step": 560580
    },
    {
      "epoch": 1.9647281890602712,
      "grad_norm": 2.609375,
      "learning_rate": 1.9171787502823277e-05,
      "loss": 0.8366,
      "step": 560590
    },
    {
      "epoch": 1.9647632365671668,
      "grad_norm": 3.046875,
      "learning_rate": 1.917113847415957e-05,
      "loss": 0.8558,
      "step": 560600
    },
    {
      "epoch": 1.9647982840740625,
      "grad_norm": 2.640625,
      "learning_rate": 1.917048944549587e-05,
      "loss": 0.8067,
      "step": 560610
    },
    {
      "epoch": 1.9648333315809579,
      "grad_norm": 2.984375,
      "learning_rate": 1.9169840416832167e-05,
      "loss": 0.8151,
      "step": 560620
    },
    {
      "epoch": 1.9648683790878536,
      "grad_norm": 3.625,
      "learning_rate": 1.9169191388168465e-05,
      "loss": 0.7844,
      "step": 560630
    },
    {
      "epoch": 1.9649034265947491,
      "grad_norm": 2.953125,
      "learning_rate": 1.9168542359504767e-05,
      "loss": 0.8615,
      "step": 560640
    },
    {
      "epoch": 1.9649384741016447,
      "grad_norm": 2.828125,
      "learning_rate": 1.9167893330841065e-05,
      "loss": 0.7645,
      "step": 560650
    },
    {
      "epoch": 1.9649735216085404,
      "grad_norm": 3.015625,
      "learning_rate": 1.9167244302177363e-05,
      "loss": 0.8756,
      "step": 560660
    },
    {
      "epoch": 1.965008569115436,
      "grad_norm": 2.40625,
      "learning_rate": 1.916659527351366e-05,
      "loss": 0.8148,
      "step": 560670
    },
    {
      "epoch": 1.9650436166223315,
      "grad_norm": 2.421875,
      "learning_rate": 1.916594624484996e-05,
      "loss": 0.8766,
      "step": 560680
    },
    {
      "epoch": 1.9650786641292273,
      "grad_norm": 3.25,
      "learning_rate": 1.9165297216186257e-05,
      "loss": 0.8623,
      "step": 560690
    },
    {
      "epoch": 1.9651137116361228,
      "grad_norm": 2.75,
      "learning_rate": 1.9164648187522555e-05,
      "loss": 0.8161,
      "step": 560700
    },
    {
      "epoch": 1.9651487591430183,
      "grad_norm": 3.390625,
      "learning_rate": 1.9163999158858853e-05,
      "loss": 0.7893,
      "step": 560710
    },
    {
      "epoch": 1.965183806649914,
      "grad_norm": 3.015625,
      "learning_rate": 1.916335013019515e-05,
      "loss": 0.783,
      "step": 560720
    },
    {
      "epoch": 1.9652188541568094,
      "grad_norm": 2.75,
      "learning_rate": 1.916270110153145e-05,
      "loss": 0.7649,
      "step": 560730
    },
    {
      "epoch": 1.9652539016637052,
      "grad_norm": 3.328125,
      "learning_rate": 1.9162052072867747e-05,
      "loss": 0.8938,
      "step": 560740
    },
    {
      "epoch": 1.9652889491706007,
      "grad_norm": 2.96875,
      "learning_rate": 1.9161403044204045e-05,
      "loss": 0.8628,
      "step": 560750
    },
    {
      "epoch": 1.9653239966774962,
      "grad_norm": 3.265625,
      "learning_rate": 1.9160754015540343e-05,
      "loss": 0.856,
      "step": 560760
    },
    {
      "epoch": 1.965359044184392,
      "grad_norm": 2.890625,
      "learning_rate": 1.916010498687664e-05,
      "loss": 0.8344,
      "step": 560770
    },
    {
      "epoch": 1.9653940916912875,
      "grad_norm": 3.34375,
      "learning_rate": 1.9159455958212942e-05,
      "loss": 0.8344,
      "step": 560780
    },
    {
      "epoch": 1.965429139198183,
      "grad_norm": 2.625,
      "learning_rate": 1.9158806929549237e-05,
      "loss": 0.784,
      "step": 560790
    },
    {
      "epoch": 1.9654641867050788,
      "grad_norm": 2.640625,
      "learning_rate": 1.9158157900885535e-05,
      "loss": 0.7937,
      "step": 560800
    },
    {
      "epoch": 1.9654992342119744,
      "grad_norm": 3.3125,
      "learning_rate": 1.9157508872221833e-05,
      "loss": 0.785,
      "step": 560810
    },
    {
      "epoch": 1.96553428171887,
      "grad_norm": 3.046875,
      "learning_rate": 1.915685984355813e-05,
      "loss": 0.8049,
      "step": 560820
    },
    {
      "epoch": 1.9655693292257657,
      "grad_norm": 2.5625,
      "learning_rate": 1.915621081489443e-05,
      "loss": 0.8523,
      "step": 560830
    },
    {
      "epoch": 1.965604376732661,
      "grad_norm": 2.84375,
      "learning_rate": 1.9155561786230727e-05,
      "loss": 0.8421,
      "step": 560840
    },
    {
      "epoch": 1.9656394242395567,
      "grad_norm": 2.953125,
      "learning_rate": 1.9154912757567025e-05,
      "loss": 0.8575,
      "step": 560850
    },
    {
      "epoch": 1.9656744717464523,
      "grad_norm": 2.9375,
      "learning_rate": 1.9154263728903323e-05,
      "loss": 0.8419,
      "step": 560860
    },
    {
      "epoch": 1.9657095192533478,
      "grad_norm": 3.046875,
      "learning_rate": 1.915361470023962e-05,
      "loss": 0.7843,
      "step": 560870
    },
    {
      "epoch": 1.9657445667602436,
      "grad_norm": 3.125,
      "learning_rate": 1.915296567157592e-05,
      "loss": 0.8162,
      "step": 560880
    },
    {
      "epoch": 1.965779614267139,
      "grad_norm": 2.828125,
      "learning_rate": 1.915231664291222e-05,
      "loss": 0.7995,
      "step": 560890
    },
    {
      "epoch": 1.9658146617740346,
      "grad_norm": 3.328125,
      "learning_rate": 1.9151667614248518e-05,
      "loss": 0.8169,
      "step": 560900
    },
    {
      "epoch": 1.9658497092809304,
      "grad_norm": 2.609375,
      "learning_rate": 1.9151018585584816e-05,
      "loss": 0.7887,
      "step": 560910
    },
    {
      "epoch": 1.965884756787826,
      "grad_norm": 3.015625,
      "learning_rate": 1.9150369556921114e-05,
      "loss": 0.8127,
      "step": 560920
    },
    {
      "epoch": 1.9659198042947215,
      "grad_norm": 2.65625,
      "learning_rate": 1.9149720528257412e-05,
      "loss": 0.7806,
      "step": 560930
    },
    {
      "epoch": 1.9659548518016172,
      "grad_norm": 2.703125,
      "learning_rate": 1.914907149959371e-05,
      "loss": 0.8249,
      "step": 560940
    },
    {
      "epoch": 1.9659898993085125,
      "grad_norm": 2.65625,
      "learning_rate": 1.9148422470930008e-05,
      "loss": 0.8639,
      "step": 560950
    },
    {
      "epoch": 1.9660249468154083,
      "grad_norm": 2.875,
      "learning_rate": 1.9147773442266306e-05,
      "loss": 0.8019,
      "step": 560960
    },
    {
      "epoch": 1.9660599943223038,
      "grad_norm": 2.921875,
      "learning_rate": 1.9147124413602604e-05,
      "loss": 0.8263,
      "step": 560970
    },
    {
      "epoch": 1.9660950418291994,
      "grad_norm": 2.734375,
      "learning_rate": 1.91464753849389e-05,
      "loss": 0.8461,
      "step": 560980
    },
    {
      "epoch": 1.9661300893360951,
      "grad_norm": 2.9375,
      "learning_rate": 1.9145826356275197e-05,
      "loss": 0.8421,
      "step": 560990
    },
    {
      "epoch": 1.9661651368429907,
      "grad_norm": 2.890625,
      "learning_rate": 1.9145177327611498e-05,
      "loss": 0.8017,
      "step": 561000
    },
    {
      "epoch": 1.9662001843498862,
      "grad_norm": 2.75,
      "learning_rate": 1.9144528298947796e-05,
      "loss": 0.7776,
      "step": 561010
    },
    {
      "epoch": 1.966235231856782,
      "grad_norm": 2.609375,
      "learning_rate": 1.9143879270284094e-05,
      "loss": 0.829,
      "step": 561020
    },
    {
      "epoch": 1.9662702793636775,
      "grad_norm": 2.609375,
      "learning_rate": 1.9143230241620392e-05,
      "loss": 0.8052,
      "step": 561030
    },
    {
      "epoch": 1.966305326870573,
      "grad_norm": 3.0,
      "learning_rate": 1.914258121295669e-05,
      "loss": 0.7972,
      "step": 561040
    },
    {
      "epoch": 1.9663403743774688,
      "grad_norm": 2.59375,
      "learning_rate": 1.9141932184292988e-05,
      "loss": 0.8392,
      "step": 561050
    },
    {
      "epoch": 1.966375421884364,
      "grad_norm": 3.046875,
      "learning_rate": 1.9141283155629286e-05,
      "loss": 0.8361,
      "step": 561060
    },
    {
      "epoch": 1.9664104693912599,
      "grad_norm": 3.21875,
      "learning_rate": 1.9140634126965584e-05,
      "loss": 0.7857,
      "step": 561070
    },
    {
      "epoch": 1.9664455168981556,
      "grad_norm": 3.046875,
      "learning_rate": 1.9139985098301882e-05,
      "loss": 0.7687,
      "step": 561080
    },
    {
      "epoch": 1.966480564405051,
      "grad_norm": 2.9375,
      "learning_rate": 1.913933606963818e-05,
      "loss": 0.8081,
      "step": 561090
    },
    {
      "epoch": 1.9665156119119467,
      "grad_norm": 2.890625,
      "learning_rate": 1.9138687040974478e-05,
      "loss": 0.7348,
      "step": 561100
    },
    {
      "epoch": 1.9665506594188422,
      "grad_norm": 2.65625,
      "learning_rate": 1.9138038012310776e-05,
      "loss": 0.7652,
      "step": 561110
    },
    {
      "epoch": 1.9665857069257378,
      "grad_norm": 3.28125,
      "learning_rate": 1.9137388983647074e-05,
      "loss": 0.842,
      "step": 561120
    },
    {
      "epoch": 1.9666207544326335,
      "grad_norm": 3.109375,
      "learning_rate": 1.9136739954983372e-05,
      "loss": 0.8617,
      "step": 561130
    },
    {
      "epoch": 1.966655801939529,
      "grad_norm": 2.6875,
      "learning_rate": 1.9136090926319674e-05,
      "loss": 0.8926,
      "step": 561140
    },
    {
      "epoch": 1.9666908494464246,
      "grad_norm": 3.03125,
      "learning_rate": 1.913544189765597e-05,
      "loss": 0.8323,
      "step": 561150
    },
    {
      "epoch": 1.9667258969533203,
      "grad_norm": 3.28125,
      "learning_rate": 1.913479286899227e-05,
      "loss": 0.828,
      "step": 561160
    },
    {
      "epoch": 1.9667609444602159,
      "grad_norm": 2.96875,
      "learning_rate": 1.9134143840328564e-05,
      "loss": 0.8833,
      "step": 561170
    },
    {
      "epoch": 1.9667959919671114,
      "grad_norm": 2.578125,
      "learning_rate": 1.9133494811664862e-05,
      "loss": 0.7748,
      "step": 561180
    },
    {
      "epoch": 1.9668310394740072,
      "grad_norm": 3.125,
      "learning_rate": 1.913284578300116e-05,
      "loss": 0.8076,
      "step": 561190
    },
    {
      "epoch": 1.9668660869809025,
      "grad_norm": 3.015625,
      "learning_rate": 1.9132196754337458e-05,
      "loss": 0.7854,
      "step": 561200
    },
    {
      "epoch": 1.9669011344877982,
      "grad_norm": 3.046875,
      "learning_rate": 1.9131547725673756e-05,
      "loss": 0.8488,
      "step": 561210
    },
    {
      "epoch": 1.9669361819946938,
      "grad_norm": 2.6875,
      "learning_rate": 1.9130898697010054e-05,
      "loss": 0.8268,
      "step": 561220
    },
    {
      "epoch": 1.9669712295015893,
      "grad_norm": 3.15625,
      "learning_rate": 1.9130249668346352e-05,
      "loss": 0.7831,
      "step": 561230
    },
    {
      "epoch": 1.967006277008485,
      "grad_norm": 2.703125,
      "learning_rate": 1.912960063968265e-05,
      "loss": 0.803,
      "step": 561240
    },
    {
      "epoch": 1.9670413245153806,
      "grad_norm": 2.953125,
      "learning_rate": 1.9128951611018948e-05,
      "loss": 0.8885,
      "step": 561250
    },
    {
      "epoch": 1.9670763720222761,
      "grad_norm": 2.921875,
      "learning_rate": 1.912830258235525e-05,
      "loss": 0.7127,
      "step": 561260
    },
    {
      "epoch": 1.967111419529172,
      "grad_norm": 2.78125,
      "learning_rate": 1.9127653553691548e-05,
      "loss": 0.8378,
      "step": 561270
    },
    {
      "epoch": 1.9671464670360674,
      "grad_norm": 3.171875,
      "learning_rate": 1.9127004525027846e-05,
      "loss": 0.8643,
      "step": 561280
    },
    {
      "epoch": 1.967181514542963,
      "grad_norm": 2.375,
      "learning_rate": 1.9126355496364144e-05,
      "loss": 0.7996,
      "step": 561290
    },
    {
      "epoch": 1.9672165620498587,
      "grad_norm": 2.75,
      "learning_rate": 1.912570646770044e-05,
      "loss": 0.865,
      "step": 561300
    },
    {
      "epoch": 1.967251609556754,
      "grad_norm": 3.203125,
      "learning_rate": 1.912505743903674e-05,
      "loss": 0.785,
      "step": 561310
    },
    {
      "epoch": 1.9672866570636498,
      "grad_norm": 2.859375,
      "learning_rate": 1.9124408410373038e-05,
      "loss": 0.7852,
      "step": 561320
    },
    {
      "epoch": 1.9673217045705453,
      "grad_norm": 3.0,
      "learning_rate": 1.9123759381709336e-05,
      "loss": 0.8721,
      "step": 561330
    },
    {
      "epoch": 1.9673567520774409,
      "grad_norm": 2.484375,
      "learning_rate": 1.9123110353045634e-05,
      "loss": 0.7695,
      "step": 561340
    },
    {
      "epoch": 1.9673917995843366,
      "grad_norm": 2.9375,
      "learning_rate": 1.912246132438193e-05,
      "loss": 0.8007,
      "step": 561350
    },
    {
      "epoch": 1.9674268470912322,
      "grad_norm": 2.671875,
      "learning_rate": 1.9121812295718226e-05,
      "loss": 0.8487,
      "step": 561360
    },
    {
      "epoch": 1.9674618945981277,
      "grad_norm": 2.984375,
      "learning_rate": 1.9121163267054528e-05,
      "loss": 0.8331,
      "step": 561370
    },
    {
      "epoch": 1.9674969421050235,
      "grad_norm": 3.0625,
      "learning_rate": 1.9120514238390826e-05,
      "loss": 0.7842,
      "step": 561380
    },
    {
      "epoch": 1.967531989611919,
      "grad_norm": 2.9375,
      "learning_rate": 1.9119865209727124e-05,
      "loss": 0.8001,
      "step": 561390
    },
    {
      "epoch": 1.9675670371188145,
      "grad_norm": 3.390625,
      "learning_rate": 1.911921618106342e-05,
      "loss": 0.8601,
      "step": 561400
    },
    {
      "epoch": 1.9676020846257103,
      "grad_norm": 2.859375,
      "learning_rate": 1.911856715239972e-05,
      "loss": 0.8961,
      "step": 561410
    },
    {
      "epoch": 1.9676371321326056,
      "grad_norm": 2.5625,
      "learning_rate": 1.9117918123736018e-05,
      "loss": 0.7921,
      "step": 561420
    },
    {
      "epoch": 1.9676721796395014,
      "grad_norm": 2.84375,
      "learning_rate": 1.9117269095072316e-05,
      "loss": 0.8223,
      "step": 561430
    },
    {
      "epoch": 1.967707227146397,
      "grad_norm": 2.359375,
      "learning_rate": 1.9116620066408614e-05,
      "loss": 0.778,
      "step": 561440
    },
    {
      "epoch": 1.9677422746532924,
      "grad_norm": 2.203125,
      "learning_rate": 1.911597103774491e-05,
      "loss": 0.7848,
      "step": 561450
    },
    {
      "epoch": 1.9677773221601882,
      "grad_norm": 2.609375,
      "learning_rate": 1.911532200908121e-05,
      "loss": 0.8267,
      "step": 561460
    },
    {
      "epoch": 1.9678123696670837,
      "grad_norm": 2.6875,
      "learning_rate": 1.9114672980417508e-05,
      "loss": 0.7487,
      "step": 561470
    },
    {
      "epoch": 1.9678474171739793,
      "grad_norm": 2.875,
      "learning_rate": 1.9114023951753806e-05,
      "loss": 0.8298,
      "step": 561480
    },
    {
      "epoch": 1.967882464680875,
      "grad_norm": 2.859375,
      "learning_rate": 1.9113374923090104e-05,
      "loss": 0.857,
      "step": 561490
    },
    {
      "epoch": 1.9679175121877706,
      "grad_norm": 2.953125,
      "learning_rate": 1.91127258944264e-05,
      "loss": 0.8092,
      "step": 561500
    },
    {
      "epoch": 1.967952559694666,
      "grad_norm": 2.4375,
      "learning_rate": 1.9112076865762703e-05,
      "loss": 0.8412,
      "step": 561510
    },
    {
      "epoch": 1.9679876072015619,
      "grad_norm": 3.28125,
      "learning_rate": 1.9111427837099e-05,
      "loss": 0.7962,
      "step": 561520
    },
    {
      "epoch": 1.9680226547084572,
      "grad_norm": 3.1875,
      "learning_rate": 1.91107788084353e-05,
      "loss": 0.7181,
      "step": 561530
    },
    {
      "epoch": 1.968057702215353,
      "grad_norm": 2.859375,
      "learning_rate": 1.9110129779771597e-05,
      "loss": 0.7745,
      "step": 561540
    },
    {
      "epoch": 1.9680927497222485,
      "grad_norm": 2.9375,
      "learning_rate": 1.910948075110789e-05,
      "loss": 0.7201,
      "step": 561550
    },
    {
      "epoch": 1.968127797229144,
      "grad_norm": 2.65625,
      "learning_rate": 1.910883172244419e-05,
      "loss": 0.7827,
      "step": 561560
    },
    {
      "epoch": 1.9681628447360398,
      "grad_norm": 2.765625,
      "learning_rate": 1.9108182693780488e-05,
      "loss": 0.8064,
      "step": 561570
    },
    {
      "epoch": 1.9681978922429353,
      "grad_norm": 3.046875,
      "learning_rate": 1.9107533665116786e-05,
      "loss": 0.7995,
      "step": 561580
    },
    {
      "epoch": 1.9682329397498308,
      "grad_norm": 2.96875,
      "learning_rate": 1.9106884636453084e-05,
      "loss": 0.8106,
      "step": 561590
    },
    {
      "epoch": 1.9682679872567266,
      "grad_norm": 2.890625,
      "learning_rate": 1.910623560778938e-05,
      "loss": 0.8198,
      "step": 561600
    },
    {
      "epoch": 1.9683030347636221,
      "grad_norm": 2.78125,
      "learning_rate": 1.910558657912568e-05,
      "loss": 0.823,
      "step": 561610
    },
    {
      "epoch": 1.9683380822705177,
      "grad_norm": 2.53125,
      "learning_rate": 1.910493755046198e-05,
      "loss": 0.9186,
      "step": 561620
    },
    {
      "epoch": 1.9683731297774134,
      "grad_norm": 2.765625,
      "learning_rate": 1.910428852179828e-05,
      "loss": 0.7979,
      "step": 561630
    },
    {
      "epoch": 1.9684081772843087,
      "grad_norm": 2.8125,
      "learning_rate": 1.9103639493134577e-05,
      "loss": 0.8056,
      "step": 561640
    },
    {
      "epoch": 1.9684432247912045,
      "grad_norm": 3.03125,
      "learning_rate": 1.9102990464470875e-05,
      "loss": 0.8925,
      "step": 561650
    },
    {
      "epoch": 1.9684782722981002,
      "grad_norm": 3.421875,
      "learning_rate": 1.9102341435807173e-05,
      "loss": 0.8259,
      "step": 561660
    },
    {
      "epoch": 1.9685133198049956,
      "grad_norm": 2.625,
      "learning_rate": 1.910169240714347e-05,
      "loss": 0.7983,
      "step": 561670
    },
    {
      "epoch": 1.9685483673118913,
      "grad_norm": 2.921875,
      "learning_rate": 1.910104337847977e-05,
      "loss": 0.8361,
      "step": 561680
    },
    {
      "epoch": 1.9685834148187868,
      "grad_norm": 3.28125,
      "learning_rate": 1.9100394349816067e-05,
      "loss": 0.8117,
      "step": 561690
    },
    {
      "epoch": 1.9686184623256824,
      "grad_norm": 3.265625,
      "learning_rate": 1.9099745321152365e-05,
      "loss": 0.8345,
      "step": 561700
    },
    {
      "epoch": 1.9686535098325781,
      "grad_norm": 2.96875,
      "learning_rate": 1.9099096292488663e-05,
      "loss": 0.7994,
      "step": 561710
    },
    {
      "epoch": 1.9686885573394737,
      "grad_norm": 3.75,
      "learning_rate": 1.909844726382496e-05,
      "loss": 0.8289,
      "step": 561720
    },
    {
      "epoch": 1.9687236048463692,
      "grad_norm": 3.203125,
      "learning_rate": 1.9097798235161256e-05,
      "loss": 0.8407,
      "step": 561730
    },
    {
      "epoch": 1.968758652353265,
      "grad_norm": 2.515625,
      "learning_rate": 1.9097149206497557e-05,
      "loss": 0.9153,
      "step": 561740
    },
    {
      "epoch": 1.9687936998601603,
      "grad_norm": 2.65625,
      "learning_rate": 1.9096500177833855e-05,
      "loss": 0.8181,
      "step": 561750
    },
    {
      "epoch": 1.968828747367056,
      "grad_norm": 2.703125,
      "learning_rate": 1.9095851149170153e-05,
      "loss": 0.7945,
      "step": 561760
    },
    {
      "epoch": 1.9688637948739518,
      "grad_norm": 3.171875,
      "learning_rate": 1.909520212050645e-05,
      "loss": 0.855,
      "step": 561770
    },
    {
      "epoch": 1.9688988423808471,
      "grad_norm": 2.78125,
      "learning_rate": 1.909455309184275e-05,
      "loss": 0.8834,
      "step": 561780
    },
    {
      "epoch": 1.9689338898877429,
      "grad_norm": 3.0625,
      "learning_rate": 1.9093904063179047e-05,
      "loss": 0.7868,
      "step": 561790
    },
    {
      "epoch": 1.9689689373946384,
      "grad_norm": 3.375,
      "learning_rate": 1.9093255034515345e-05,
      "loss": 0.8763,
      "step": 561800
    },
    {
      "epoch": 1.969003984901534,
      "grad_norm": 2.828125,
      "learning_rate": 1.9092606005851643e-05,
      "loss": 0.7615,
      "step": 561810
    },
    {
      "epoch": 1.9690390324084297,
      "grad_norm": 2.953125,
      "learning_rate": 1.909195697718794e-05,
      "loss": 0.7947,
      "step": 561820
    },
    {
      "epoch": 1.9690740799153252,
      "grad_norm": 2.828125,
      "learning_rate": 1.909130794852424e-05,
      "loss": 0.8239,
      "step": 561830
    },
    {
      "epoch": 1.9691091274222208,
      "grad_norm": 3.5,
      "learning_rate": 1.9090658919860537e-05,
      "loss": 0.856,
      "step": 561840
    },
    {
      "epoch": 1.9691441749291165,
      "grad_norm": 2.75,
      "learning_rate": 1.9090009891196835e-05,
      "loss": 0.8428,
      "step": 561850
    },
    {
      "epoch": 1.969179222436012,
      "grad_norm": 2.859375,
      "learning_rate": 1.9089360862533133e-05,
      "loss": 0.8255,
      "step": 561860
    },
    {
      "epoch": 1.9692142699429076,
      "grad_norm": 2.9375,
      "learning_rate": 1.908871183386943e-05,
      "loss": 0.8102,
      "step": 561870
    },
    {
      "epoch": 1.9692493174498034,
      "grad_norm": 3.09375,
      "learning_rate": 1.9088062805205732e-05,
      "loss": 0.86,
      "step": 561880
    },
    {
      "epoch": 1.9692843649566987,
      "grad_norm": 3.484375,
      "learning_rate": 1.908741377654203e-05,
      "loss": 0.8649,
      "step": 561890
    },
    {
      "epoch": 1.9693194124635944,
      "grad_norm": 2.796875,
      "learning_rate": 1.908676474787833e-05,
      "loss": 0.8381,
      "step": 561900
    },
    {
      "epoch": 1.96935445997049,
      "grad_norm": 3.140625,
      "learning_rate": 1.9086115719214626e-05,
      "loss": 0.9145,
      "step": 561910
    },
    {
      "epoch": 1.9693895074773855,
      "grad_norm": 2.953125,
      "learning_rate": 1.908546669055092e-05,
      "loss": 0.9279,
      "step": 561920
    },
    {
      "epoch": 1.9694245549842813,
      "grad_norm": 2.96875,
      "learning_rate": 1.908481766188722e-05,
      "loss": 0.8774,
      "step": 561930
    },
    {
      "epoch": 1.9694596024911768,
      "grad_norm": 3.0,
      "learning_rate": 1.9084168633223517e-05,
      "loss": 0.8293,
      "step": 561940
    },
    {
      "epoch": 1.9694946499980723,
      "grad_norm": 3.0625,
      "learning_rate": 1.9083519604559815e-05,
      "loss": 0.8808,
      "step": 561950
    },
    {
      "epoch": 1.969529697504968,
      "grad_norm": 3.046875,
      "learning_rate": 1.9082870575896113e-05,
      "loss": 0.7711,
      "step": 561960
    },
    {
      "epoch": 1.9695647450118636,
      "grad_norm": 3.046875,
      "learning_rate": 1.908222154723241e-05,
      "loss": 0.7858,
      "step": 561970
    },
    {
      "epoch": 1.9695997925187592,
      "grad_norm": 2.65625,
      "learning_rate": 1.908157251856871e-05,
      "loss": 0.6765,
      "step": 561980
    },
    {
      "epoch": 1.969634840025655,
      "grad_norm": 2.484375,
      "learning_rate": 1.908092348990501e-05,
      "loss": 0.7576,
      "step": 561990
    },
    {
      "epoch": 1.9696698875325502,
      "grad_norm": 2.84375,
      "learning_rate": 1.908027446124131e-05,
      "loss": 0.7996,
      "step": 562000
    },
    {
      "epoch": 1.969704935039446,
      "grad_norm": 2.96875,
      "learning_rate": 1.9079625432577606e-05,
      "loss": 0.7995,
      "step": 562010
    },
    {
      "epoch": 1.9697399825463415,
      "grad_norm": 3.21875,
      "learning_rate": 1.9078976403913904e-05,
      "loss": 0.7425,
      "step": 562020
    },
    {
      "epoch": 1.969775030053237,
      "grad_norm": 2.8125,
      "learning_rate": 1.9078327375250202e-05,
      "loss": 0.8065,
      "step": 562030
    },
    {
      "epoch": 1.9698100775601328,
      "grad_norm": 2.765625,
      "learning_rate": 1.90776783465865e-05,
      "loss": 0.8189,
      "step": 562040
    },
    {
      "epoch": 1.9698451250670284,
      "grad_norm": 3.140625,
      "learning_rate": 1.90770293179228e-05,
      "loss": 0.8423,
      "step": 562050
    },
    {
      "epoch": 1.969880172573924,
      "grad_norm": 2.640625,
      "learning_rate": 1.9076380289259096e-05,
      "loss": 0.7974,
      "step": 562060
    },
    {
      "epoch": 1.9699152200808197,
      "grad_norm": 2.578125,
      "learning_rate": 1.9075731260595394e-05,
      "loss": 0.8384,
      "step": 562070
    },
    {
      "epoch": 1.9699502675877152,
      "grad_norm": 3.28125,
      "learning_rate": 1.9075082231931692e-05,
      "loss": 0.7894,
      "step": 562080
    },
    {
      "epoch": 1.9699853150946107,
      "grad_norm": 2.765625,
      "learning_rate": 1.907443320326799e-05,
      "loss": 0.8255,
      "step": 562090
    },
    {
      "epoch": 1.9700203626015065,
      "grad_norm": 2.828125,
      "learning_rate": 1.907378417460429e-05,
      "loss": 0.8571,
      "step": 562100
    },
    {
      "epoch": 1.9700554101084018,
      "grad_norm": 2.71875,
      "learning_rate": 1.9073135145940586e-05,
      "loss": 0.763,
      "step": 562110
    },
    {
      "epoch": 1.9700904576152976,
      "grad_norm": 2.96875,
      "learning_rate": 1.9072486117276884e-05,
      "loss": 0.7924,
      "step": 562120
    },
    {
      "epoch": 1.970125505122193,
      "grad_norm": 3.703125,
      "learning_rate": 1.9071837088613182e-05,
      "loss": 0.9656,
      "step": 562130
    },
    {
      "epoch": 1.9701605526290886,
      "grad_norm": 3.0,
      "learning_rate": 1.907118805994948e-05,
      "loss": 0.8051,
      "step": 562140
    },
    {
      "epoch": 1.9701956001359844,
      "grad_norm": 2.734375,
      "learning_rate": 1.907053903128578e-05,
      "loss": 0.7903,
      "step": 562150
    },
    {
      "epoch": 1.97023064764288,
      "grad_norm": 3.25,
      "learning_rate": 1.9069890002622076e-05,
      "loss": 0.8355,
      "step": 562160
    },
    {
      "epoch": 1.9702656951497755,
      "grad_norm": 3.1875,
      "learning_rate": 1.9069240973958374e-05,
      "loss": 0.8462,
      "step": 562170
    },
    {
      "epoch": 1.9703007426566712,
      "grad_norm": 3.109375,
      "learning_rate": 1.9068591945294672e-05,
      "loss": 0.833,
      "step": 562180
    },
    {
      "epoch": 1.9703357901635667,
      "grad_norm": 2.265625,
      "learning_rate": 1.906794291663097e-05,
      "loss": 0.8155,
      "step": 562190
    },
    {
      "epoch": 1.9703708376704623,
      "grad_norm": 2.984375,
      "learning_rate": 1.906729388796727e-05,
      "loss": 0.7991,
      "step": 562200
    },
    {
      "epoch": 1.970405885177358,
      "grad_norm": 2.890625,
      "learning_rate": 1.9066644859303566e-05,
      "loss": 0.7597,
      "step": 562210
    },
    {
      "epoch": 1.9704409326842534,
      "grad_norm": 3.078125,
      "learning_rate": 1.9065995830639864e-05,
      "loss": 0.7849,
      "step": 562220
    },
    {
      "epoch": 1.9704759801911491,
      "grad_norm": 2.8125,
      "learning_rate": 1.9065346801976162e-05,
      "loss": 0.8303,
      "step": 562230
    },
    {
      "epoch": 1.9705110276980446,
      "grad_norm": 2.640625,
      "learning_rate": 1.9064697773312464e-05,
      "loss": 0.8194,
      "step": 562240
    },
    {
      "epoch": 1.9705460752049402,
      "grad_norm": 2.75,
      "learning_rate": 1.9064048744648762e-05,
      "loss": 0.8317,
      "step": 562250
    },
    {
      "epoch": 1.970581122711836,
      "grad_norm": 3.09375,
      "learning_rate": 1.906339971598506e-05,
      "loss": 0.7593,
      "step": 562260
    },
    {
      "epoch": 1.9706161702187315,
      "grad_norm": 2.71875,
      "learning_rate": 1.9062750687321358e-05,
      "loss": 0.7915,
      "step": 562270
    },
    {
      "epoch": 1.970651217725627,
      "grad_norm": 2.640625,
      "learning_rate": 1.9062101658657656e-05,
      "loss": 0.8707,
      "step": 562280
    },
    {
      "epoch": 1.9706862652325228,
      "grad_norm": 3.171875,
      "learning_rate": 1.9061452629993954e-05,
      "loss": 0.8502,
      "step": 562290
    },
    {
      "epoch": 1.9707213127394183,
      "grad_norm": 2.734375,
      "learning_rate": 1.906080360133025e-05,
      "loss": 0.807,
      "step": 562300
    },
    {
      "epoch": 1.9707563602463138,
      "grad_norm": 3.125,
      "learning_rate": 1.9060154572666546e-05,
      "loss": 0.8473,
      "step": 562310
    },
    {
      "epoch": 1.9707914077532096,
      "grad_norm": 2.671875,
      "learning_rate": 1.9059505544002844e-05,
      "loss": 0.7929,
      "step": 562320
    },
    {
      "epoch": 1.970826455260105,
      "grad_norm": 3.0,
      "learning_rate": 1.9058856515339142e-05,
      "loss": 0.861,
      "step": 562330
    },
    {
      "epoch": 1.9708615027670007,
      "grad_norm": 3.28125,
      "learning_rate": 1.905820748667544e-05,
      "loss": 0.8608,
      "step": 562340
    },
    {
      "epoch": 1.9708965502738964,
      "grad_norm": 2.828125,
      "learning_rate": 1.905755845801174e-05,
      "loss": 0.8288,
      "step": 562350
    },
    {
      "epoch": 1.9709315977807917,
      "grad_norm": 2.9375,
      "learning_rate": 1.905690942934804e-05,
      "loss": 0.7763,
      "step": 562360
    },
    {
      "epoch": 1.9709666452876875,
      "grad_norm": 2.921875,
      "learning_rate": 1.9056260400684338e-05,
      "loss": 0.8488,
      "step": 562370
    },
    {
      "epoch": 1.971001692794583,
      "grad_norm": 2.671875,
      "learning_rate": 1.9055611372020636e-05,
      "loss": 0.8205,
      "step": 562380
    },
    {
      "epoch": 1.9710367403014786,
      "grad_norm": 3.15625,
      "learning_rate": 1.9054962343356934e-05,
      "loss": 0.8082,
      "step": 562390
    },
    {
      "epoch": 1.9710717878083743,
      "grad_norm": 3.359375,
      "learning_rate": 1.9054313314693232e-05,
      "loss": 0.8127,
      "step": 562400
    },
    {
      "epoch": 1.9711068353152699,
      "grad_norm": 2.328125,
      "learning_rate": 1.905366428602953e-05,
      "loss": 0.7614,
      "step": 562410
    },
    {
      "epoch": 1.9711418828221654,
      "grad_norm": 3.125,
      "learning_rate": 1.9053015257365828e-05,
      "loss": 0.8083,
      "step": 562420
    },
    {
      "epoch": 1.9711769303290612,
      "grad_norm": 3.046875,
      "learning_rate": 1.9052366228702126e-05,
      "loss": 0.771,
      "step": 562430
    },
    {
      "epoch": 1.9712119778359565,
      "grad_norm": 2.78125,
      "learning_rate": 1.9051717200038424e-05,
      "loss": 0.8059,
      "step": 562440
    },
    {
      "epoch": 1.9712470253428522,
      "grad_norm": 2.796875,
      "learning_rate": 1.9051068171374722e-05,
      "loss": 0.8449,
      "step": 562450
    },
    {
      "epoch": 1.971282072849748,
      "grad_norm": 3.078125,
      "learning_rate": 1.905041914271102e-05,
      "loss": 0.7715,
      "step": 562460
    },
    {
      "epoch": 1.9713171203566433,
      "grad_norm": 3.375,
      "learning_rate": 1.9049770114047318e-05,
      "loss": 0.7658,
      "step": 562470
    },
    {
      "epoch": 1.971352167863539,
      "grad_norm": 3.078125,
      "learning_rate": 1.9049121085383616e-05,
      "loss": 0.8076,
      "step": 562480
    },
    {
      "epoch": 1.9713872153704346,
      "grad_norm": 2.75,
      "learning_rate": 1.9048472056719914e-05,
      "loss": 0.8969,
      "step": 562490
    },
    {
      "epoch": 1.9714222628773301,
      "grad_norm": 3.359375,
      "learning_rate": 1.9047823028056212e-05,
      "loss": 0.8754,
      "step": 562500
    },
    {
      "epoch": 1.971457310384226,
      "grad_norm": 3.09375,
      "learning_rate": 1.904717399939251e-05,
      "loss": 0.8029,
      "step": 562510
    },
    {
      "epoch": 1.9714923578911214,
      "grad_norm": 3.578125,
      "learning_rate": 1.9046524970728808e-05,
      "loss": 0.901,
      "step": 562520
    },
    {
      "epoch": 1.971527405398017,
      "grad_norm": 2.484375,
      "learning_rate": 1.9045875942065106e-05,
      "loss": 0.7952,
      "step": 562530
    },
    {
      "epoch": 1.9715624529049127,
      "grad_norm": 2.328125,
      "learning_rate": 1.9045226913401404e-05,
      "loss": 0.7727,
      "step": 562540
    },
    {
      "epoch": 1.9715975004118083,
      "grad_norm": 2.65625,
      "learning_rate": 1.9044577884737702e-05,
      "loss": 0.8602,
      "step": 562550
    },
    {
      "epoch": 1.9716325479187038,
      "grad_norm": 3.09375,
      "learning_rate": 1.9043928856074e-05,
      "loss": 0.8325,
      "step": 562560
    },
    {
      "epoch": 1.9716675954255996,
      "grad_norm": 2.515625,
      "learning_rate": 1.9043279827410298e-05,
      "loss": 0.8086,
      "step": 562570
    },
    {
      "epoch": 1.9717026429324949,
      "grad_norm": 2.796875,
      "learning_rate": 1.9042630798746596e-05,
      "loss": 0.8417,
      "step": 562580
    },
    {
      "epoch": 1.9717376904393906,
      "grad_norm": 2.796875,
      "learning_rate": 1.9041981770082894e-05,
      "loss": 0.8106,
      "step": 562590
    },
    {
      "epoch": 1.9717727379462862,
      "grad_norm": 2.578125,
      "learning_rate": 1.9041332741419192e-05,
      "loss": 0.8432,
      "step": 562600
    },
    {
      "epoch": 1.9718077854531817,
      "grad_norm": 3.0,
      "learning_rate": 1.9040683712755493e-05,
      "loss": 0.8518,
      "step": 562610
    },
    {
      "epoch": 1.9718428329600775,
      "grad_norm": 2.734375,
      "learning_rate": 1.904003468409179e-05,
      "loss": 0.8273,
      "step": 562620
    },
    {
      "epoch": 1.971877880466973,
      "grad_norm": 2.4375,
      "learning_rate": 1.903938565542809e-05,
      "loss": 0.8056,
      "step": 562630
    },
    {
      "epoch": 1.9719129279738685,
      "grad_norm": 2.75,
      "learning_rate": 1.9038736626764387e-05,
      "loss": 0.756,
      "step": 562640
    },
    {
      "epoch": 1.9719479754807643,
      "grad_norm": 2.515625,
      "learning_rate": 1.9038087598100685e-05,
      "loss": 0.8665,
      "step": 562650
    },
    {
      "epoch": 1.9719830229876598,
      "grad_norm": 3.0,
      "learning_rate": 1.9037438569436983e-05,
      "loss": 0.8273,
      "step": 562660
    },
    {
      "epoch": 1.9720180704945554,
      "grad_norm": 2.6875,
      "learning_rate": 1.9036789540773278e-05,
      "loss": 0.7839,
      "step": 562670
    },
    {
      "epoch": 1.972053118001451,
      "grad_norm": 3.328125,
      "learning_rate": 1.9036140512109576e-05,
      "loss": 0.8109,
      "step": 562680
    },
    {
      "epoch": 1.9720881655083464,
      "grad_norm": 3.1875,
      "learning_rate": 1.9035491483445874e-05,
      "loss": 0.8001,
      "step": 562690
    },
    {
      "epoch": 1.9721232130152422,
      "grad_norm": 3.0625,
      "learning_rate": 1.9034842454782172e-05,
      "loss": 0.8535,
      "step": 562700
    },
    {
      "epoch": 1.9721582605221377,
      "grad_norm": 2.734375,
      "learning_rate": 1.903419342611847e-05,
      "loss": 0.8015,
      "step": 562710
    },
    {
      "epoch": 1.9721933080290333,
      "grad_norm": 2.78125,
      "learning_rate": 1.903354439745477e-05,
      "loss": 0.8567,
      "step": 562720
    },
    {
      "epoch": 1.972228355535929,
      "grad_norm": 3.171875,
      "learning_rate": 1.903289536879107e-05,
      "loss": 0.7481,
      "step": 562730
    },
    {
      "epoch": 1.9722634030428245,
      "grad_norm": 2.90625,
      "learning_rate": 1.9032246340127367e-05,
      "loss": 0.817,
      "step": 562740
    },
    {
      "epoch": 1.97229845054972,
      "grad_norm": 2.859375,
      "learning_rate": 1.9031597311463665e-05,
      "loss": 0.8249,
      "step": 562750
    },
    {
      "epoch": 1.9723334980566158,
      "grad_norm": 4.0625,
      "learning_rate": 1.9030948282799963e-05,
      "loss": 0.81,
      "step": 562760
    },
    {
      "epoch": 1.9723685455635114,
      "grad_norm": 2.6875,
      "learning_rate": 1.903029925413626e-05,
      "loss": 0.8182,
      "step": 562770
    },
    {
      "epoch": 1.972403593070407,
      "grad_norm": 3.171875,
      "learning_rate": 1.902965022547256e-05,
      "loss": 0.8681,
      "step": 562780
    },
    {
      "epoch": 1.9724386405773027,
      "grad_norm": 2.8125,
      "learning_rate": 1.9029001196808857e-05,
      "loss": 0.8629,
      "step": 562790
    },
    {
      "epoch": 1.972473688084198,
      "grad_norm": 3.4375,
      "learning_rate": 1.9028352168145155e-05,
      "loss": 0.8632,
      "step": 562800
    },
    {
      "epoch": 1.9725087355910937,
      "grad_norm": 2.859375,
      "learning_rate": 1.9027703139481453e-05,
      "loss": 0.8462,
      "step": 562810
    },
    {
      "epoch": 1.9725437830979893,
      "grad_norm": 2.34375,
      "learning_rate": 1.902705411081775e-05,
      "loss": 0.8489,
      "step": 562820
    },
    {
      "epoch": 1.9725788306048848,
      "grad_norm": 2.484375,
      "learning_rate": 1.902640508215405e-05,
      "loss": 0.7755,
      "step": 562830
    },
    {
      "epoch": 1.9726138781117806,
      "grad_norm": 2.4375,
      "learning_rate": 1.9025756053490347e-05,
      "loss": 0.8132,
      "step": 562840
    },
    {
      "epoch": 1.972648925618676,
      "grad_norm": 3.3125,
      "learning_rate": 1.9025107024826645e-05,
      "loss": 0.7892,
      "step": 562850
    },
    {
      "epoch": 1.9726839731255716,
      "grad_norm": 2.796875,
      "learning_rate": 1.9024457996162943e-05,
      "loss": 0.8016,
      "step": 562860
    },
    {
      "epoch": 1.9727190206324674,
      "grad_norm": 2.78125,
      "learning_rate": 1.902380896749924e-05,
      "loss": 0.8082,
      "step": 562870
    },
    {
      "epoch": 1.972754068139363,
      "grad_norm": 3.0625,
      "learning_rate": 1.902315993883554e-05,
      "loss": 0.8119,
      "step": 562880
    },
    {
      "epoch": 1.9727891156462585,
      "grad_norm": 2.65625,
      "learning_rate": 1.9022510910171837e-05,
      "loss": 0.815,
      "step": 562890
    },
    {
      "epoch": 1.9728241631531542,
      "grad_norm": 3.203125,
      "learning_rate": 1.9021861881508135e-05,
      "loss": 0.8348,
      "step": 562900
    },
    {
      "epoch": 1.9728592106600495,
      "grad_norm": 2.515625,
      "learning_rate": 1.9021212852844433e-05,
      "loss": 0.7615,
      "step": 562910
    },
    {
      "epoch": 1.9728942581669453,
      "grad_norm": 3.296875,
      "learning_rate": 1.902056382418073e-05,
      "loss": 0.9054,
      "step": 562920
    },
    {
      "epoch": 1.9729293056738408,
      "grad_norm": 2.84375,
      "learning_rate": 1.901991479551703e-05,
      "loss": 0.7918,
      "step": 562930
    },
    {
      "epoch": 1.9729643531807364,
      "grad_norm": 2.953125,
      "learning_rate": 1.9019265766853327e-05,
      "loss": 0.8805,
      "step": 562940
    },
    {
      "epoch": 1.9729994006876321,
      "grad_norm": 2.59375,
      "learning_rate": 1.9018616738189625e-05,
      "loss": 0.7174,
      "step": 562950
    },
    {
      "epoch": 1.9730344481945277,
      "grad_norm": 2.609375,
      "learning_rate": 1.9017967709525923e-05,
      "loss": 0.8367,
      "step": 562960
    },
    {
      "epoch": 1.9730694957014232,
      "grad_norm": 2.90625,
      "learning_rate": 1.901731868086222e-05,
      "loss": 0.8056,
      "step": 562970
    },
    {
      "epoch": 1.973104543208319,
      "grad_norm": 3.078125,
      "learning_rate": 1.9016669652198522e-05,
      "loss": 0.8292,
      "step": 562980
    },
    {
      "epoch": 1.9731395907152145,
      "grad_norm": 3.25,
      "learning_rate": 1.901602062353482e-05,
      "loss": 0.7937,
      "step": 562990
    },
    {
      "epoch": 1.97317463822211,
      "grad_norm": 2.71875,
      "learning_rate": 1.901537159487112e-05,
      "loss": 0.7783,
      "step": 563000
    },
    {
      "epoch": 1.9732096857290058,
      "grad_norm": 2.875,
      "learning_rate": 1.9014722566207416e-05,
      "loss": 0.8396,
      "step": 563010
    },
    {
      "epoch": 1.973244733235901,
      "grad_norm": 2.984375,
      "learning_rate": 1.9014073537543714e-05,
      "loss": 0.8076,
      "step": 563020
    },
    {
      "epoch": 1.9732797807427969,
      "grad_norm": 3.03125,
      "learning_rate": 1.9013424508880012e-05,
      "loss": 0.8914,
      "step": 563030
    },
    {
      "epoch": 1.9733148282496926,
      "grad_norm": 2.546875,
      "learning_rate": 1.901277548021631e-05,
      "loss": 0.7646,
      "step": 563040
    },
    {
      "epoch": 1.973349875756588,
      "grad_norm": 3.1875,
      "learning_rate": 1.9012126451552605e-05,
      "loss": 0.7819,
      "step": 563050
    },
    {
      "epoch": 1.9733849232634837,
      "grad_norm": 4.0625,
      "learning_rate": 1.9011477422888903e-05,
      "loss": 0.7637,
      "step": 563060
    },
    {
      "epoch": 1.9734199707703792,
      "grad_norm": 2.4375,
      "learning_rate": 1.90108283942252e-05,
      "loss": 0.8314,
      "step": 563070
    },
    {
      "epoch": 1.9734550182772748,
      "grad_norm": 3.1875,
      "learning_rate": 1.90101793655615e-05,
      "loss": 0.8603,
      "step": 563080
    },
    {
      "epoch": 1.9734900657841705,
      "grad_norm": 2.5625,
      "learning_rate": 1.90095303368978e-05,
      "loss": 0.7621,
      "step": 563090
    },
    {
      "epoch": 1.973525113291066,
      "grad_norm": 2.75,
      "learning_rate": 1.90088813082341e-05,
      "loss": 0.7818,
      "step": 563100
    },
    {
      "epoch": 1.9735601607979616,
      "grad_norm": 2.5625,
      "learning_rate": 1.9008232279570396e-05,
      "loss": 0.8179,
      "step": 563110
    },
    {
      "epoch": 1.9735952083048574,
      "grad_norm": 2.4375,
      "learning_rate": 1.9007583250906694e-05,
      "loss": 0.8188,
      "step": 563120
    },
    {
      "epoch": 1.9736302558117527,
      "grad_norm": 2.859375,
      "learning_rate": 1.9006934222242992e-05,
      "loss": 0.9297,
      "step": 563130
    },
    {
      "epoch": 1.9736653033186484,
      "grad_norm": 3.125,
      "learning_rate": 1.900628519357929e-05,
      "loss": 0.8795,
      "step": 563140
    },
    {
      "epoch": 1.9737003508255442,
      "grad_norm": 3.421875,
      "learning_rate": 1.900563616491559e-05,
      "loss": 0.8084,
      "step": 563150
    },
    {
      "epoch": 1.9737353983324395,
      "grad_norm": 2.828125,
      "learning_rate": 1.9004987136251886e-05,
      "loss": 0.85,
      "step": 563160
    },
    {
      "epoch": 1.9737704458393353,
      "grad_norm": 3.03125,
      "learning_rate": 1.9004338107588184e-05,
      "loss": 0.876,
      "step": 563170
    },
    {
      "epoch": 1.9738054933462308,
      "grad_norm": 2.796875,
      "learning_rate": 1.9003689078924482e-05,
      "loss": 0.7543,
      "step": 563180
    },
    {
      "epoch": 1.9738405408531263,
      "grad_norm": 2.609375,
      "learning_rate": 1.900304005026078e-05,
      "loss": 0.7951,
      "step": 563190
    },
    {
      "epoch": 1.973875588360022,
      "grad_norm": 2.546875,
      "learning_rate": 1.900239102159708e-05,
      "loss": 0.8086,
      "step": 563200
    },
    {
      "epoch": 1.9739106358669176,
      "grad_norm": 3.0,
      "learning_rate": 1.9001741992933376e-05,
      "loss": 0.8164,
      "step": 563210
    },
    {
      "epoch": 1.9739456833738132,
      "grad_norm": 3.078125,
      "learning_rate": 1.9001092964269674e-05,
      "loss": 0.7948,
      "step": 563220
    },
    {
      "epoch": 1.973980730880709,
      "grad_norm": 3.15625,
      "learning_rate": 1.9000443935605976e-05,
      "loss": 0.8259,
      "step": 563230
    },
    {
      "epoch": 1.9740157783876044,
      "grad_norm": 2.796875,
      "learning_rate": 1.899979490694227e-05,
      "loss": 0.8707,
      "step": 563240
    },
    {
      "epoch": 1.9740508258945,
      "grad_norm": 3.21875,
      "learning_rate": 1.899914587827857e-05,
      "loss": 0.8649,
      "step": 563250
    },
    {
      "epoch": 1.9740858734013957,
      "grad_norm": 3.671875,
      "learning_rate": 1.8998496849614866e-05,
      "loss": 0.8461,
      "step": 563260
    },
    {
      "epoch": 1.974120920908291,
      "grad_norm": 3.015625,
      "learning_rate": 1.8997847820951164e-05,
      "loss": 0.8424,
      "step": 563270
    },
    {
      "epoch": 1.9741559684151868,
      "grad_norm": 2.53125,
      "learning_rate": 1.8997198792287462e-05,
      "loss": 0.841,
      "step": 563280
    },
    {
      "epoch": 1.9741910159220823,
      "grad_norm": 2.859375,
      "learning_rate": 1.899654976362376e-05,
      "loss": 0.7446,
      "step": 563290
    },
    {
      "epoch": 1.9742260634289779,
      "grad_norm": 3.34375,
      "learning_rate": 1.899590073496006e-05,
      "loss": 0.9183,
      "step": 563300
    },
    {
      "epoch": 1.9742611109358736,
      "grad_norm": 2.96875,
      "learning_rate": 1.8995251706296356e-05,
      "loss": 0.8257,
      "step": 563310
    },
    {
      "epoch": 1.9742961584427692,
      "grad_norm": 3.3125,
      "learning_rate": 1.8994602677632654e-05,
      "loss": 0.7588,
      "step": 563320
    },
    {
      "epoch": 1.9743312059496647,
      "grad_norm": 3.28125,
      "learning_rate": 1.8993953648968952e-05,
      "loss": 0.9244,
      "step": 563330
    },
    {
      "epoch": 1.9743662534565605,
      "grad_norm": 2.984375,
      "learning_rate": 1.8993304620305254e-05,
      "loss": 0.8163,
      "step": 563340
    },
    {
      "epoch": 1.974401300963456,
      "grad_norm": 2.9375,
      "learning_rate": 1.8992655591641552e-05,
      "loss": 0.8547,
      "step": 563350
    },
    {
      "epoch": 1.9744363484703515,
      "grad_norm": 2.671875,
      "learning_rate": 1.899200656297785e-05,
      "loss": 0.7814,
      "step": 563360
    },
    {
      "epoch": 1.9744713959772473,
      "grad_norm": 2.609375,
      "learning_rate": 1.8991357534314148e-05,
      "loss": 0.8403,
      "step": 563370
    },
    {
      "epoch": 1.9745064434841426,
      "grad_norm": 2.765625,
      "learning_rate": 1.8990708505650446e-05,
      "loss": 0.8124,
      "step": 563380
    },
    {
      "epoch": 1.9745414909910384,
      "grad_norm": 2.765625,
      "learning_rate": 1.8990059476986744e-05,
      "loss": 0.7929,
      "step": 563390
    },
    {
      "epoch": 1.974576538497934,
      "grad_norm": 2.796875,
      "learning_rate": 1.8989410448323042e-05,
      "loss": 0.8409,
      "step": 563400
    },
    {
      "epoch": 1.9746115860048294,
      "grad_norm": 3.03125,
      "learning_rate": 1.898876141965934e-05,
      "loss": 0.7448,
      "step": 563410
    },
    {
      "epoch": 1.9746466335117252,
      "grad_norm": 3.015625,
      "learning_rate": 1.8988112390995638e-05,
      "loss": 0.8895,
      "step": 563420
    },
    {
      "epoch": 1.9746816810186207,
      "grad_norm": 3.5,
      "learning_rate": 1.8987463362331932e-05,
      "loss": 0.8304,
      "step": 563430
    },
    {
      "epoch": 1.9747167285255163,
      "grad_norm": 3.0625,
      "learning_rate": 1.898681433366823e-05,
      "loss": 0.9138,
      "step": 563440
    },
    {
      "epoch": 1.974751776032412,
      "grad_norm": 2.828125,
      "learning_rate": 1.898616530500453e-05,
      "loss": 0.7717,
      "step": 563450
    },
    {
      "epoch": 1.9747868235393076,
      "grad_norm": 2.84375,
      "learning_rate": 1.898551627634083e-05,
      "loss": 0.8125,
      "step": 563460
    },
    {
      "epoch": 1.974821871046203,
      "grad_norm": 3.265625,
      "learning_rate": 1.8984867247677128e-05,
      "loss": 0.8885,
      "step": 563470
    },
    {
      "epoch": 1.9748569185530989,
      "grad_norm": 2.71875,
      "learning_rate": 1.8984218219013426e-05,
      "loss": 0.8951,
      "step": 563480
    },
    {
      "epoch": 1.9748919660599942,
      "grad_norm": 3.046875,
      "learning_rate": 1.8983569190349724e-05,
      "loss": 0.8558,
      "step": 563490
    },
    {
      "epoch": 1.97492701356689,
      "grad_norm": 2.609375,
      "learning_rate": 1.8982920161686022e-05,
      "loss": 0.8506,
      "step": 563500
    },
    {
      "epoch": 1.9749620610737855,
      "grad_norm": 2.890625,
      "learning_rate": 1.898227113302232e-05,
      "loss": 0.9139,
      "step": 563510
    },
    {
      "epoch": 1.974997108580681,
      "grad_norm": 2.734375,
      "learning_rate": 1.8981622104358618e-05,
      "loss": 0.8132,
      "step": 563520
    },
    {
      "epoch": 1.9750321560875768,
      "grad_norm": 2.953125,
      "learning_rate": 1.8980973075694916e-05,
      "loss": 0.8198,
      "step": 563530
    },
    {
      "epoch": 1.9750672035944723,
      "grad_norm": 2.578125,
      "learning_rate": 1.8980324047031214e-05,
      "loss": 0.8323,
      "step": 563540
    },
    {
      "epoch": 1.9751022511013678,
      "grad_norm": 2.6875,
      "learning_rate": 1.8979675018367512e-05,
      "loss": 0.8206,
      "step": 563550
    },
    {
      "epoch": 1.9751372986082636,
      "grad_norm": 2.921875,
      "learning_rate": 1.897902598970381e-05,
      "loss": 0.7496,
      "step": 563560
    },
    {
      "epoch": 1.9751723461151591,
      "grad_norm": 2.890625,
      "learning_rate": 1.8978376961040108e-05,
      "loss": 0.9061,
      "step": 563570
    },
    {
      "epoch": 1.9752073936220547,
      "grad_norm": 2.484375,
      "learning_rate": 1.8977727932376406e-05,
      "loss": 0.78,
      "step": 563580
    },
    {
      "epoch": 1.9752424411289504,
      "grad_norm": 2.515625,
      "learning_rate": 1.8977078903712707e-05,
      "loss": 0.719,
      "step": 563590
    },
    {
      "epoch": 1.9752774886358457,
      "grad_norm": 2.796875,
      "learning_rate": 1.8976429875049005e-05,
      "loss": 0.852,
      "step": 563600
    },
    {
      "epoch": 1.9753125361427415,
      "grad_norm": 2.71875,
      "learning_rate": 1.89757808463853e-05,
      "loss": 0.7759,
      "step": 563610
    },
    {
      "epoch": 1.975347583649637,
      "grad_norm": 2.78125,
      "learning_rate": 1.8975131817721598e-05,
      "loss": 0.9047,
      "step": 563620
    },
    {
      "epoch": 1.9753826311565326,
      "grad_norm": 2.5,
      "learning_rate": 1.8974482789057896e-05,
      "loss": 0.863,
      "step": 563630
    },
    {
      "epoch": 1.9754176786634283,
      "grad_norm": 2.6875,
      "learning_rate": 1.8973833760394194e-05,
      "loss": 0.7778,
      "step": 563640
    },
    {
      "epoch": 1.9754527261703239,
      "grad_norm": 3.890625,
      "learning_rate": 1.8973184731730492e-05,
      "loss": 0.7899,
      "step": 563650
    },
    {
      "epoch": 1.9754877736772194,
      "grad_norm": 2.78125,
      "learning_rate": 1.897253570306679e-05,
      "loss": 0.8309,
      "step": 563660
    },
    {
      "epoch": 1.9755228211841152,
      "grad_norm": 2.6875,
      "learning_rate": 1.8971886674403088e-05,
      "loss": 0.8263,
      "step": 563670
    },
    {
      "epoch": 1.9755578686910107,
      "grad_norm": 2.984375,
      "learning_rate": 1.8971237645739386e-05,
      "loss": 0.9201,
      "step": 563680
    },
    {
      "epoch": 1.9755929161979062,
      "grad_norm": 2.78125,
      "learning_rate": 1.8970588617075684e-05,
      "loss": 0.8097,
      "step": 563690
    },
    {
      "epoch": 1.975627963704802,
      "grad_norm": 2.625,
      "learning_rate": 1.8969939588411982e-05,
      "loss": 0.8393,
      "step": 563700
    },
    {
      "epoch": 1.9756630112116973,
      "grad_norm": 3.046875,
      "learning_rate": 1.8969290559748283e-05,
      "loss": 0.8855,
      "step": 563710
    },
    {
      "epoch": 1.975698058718593,
      "grad_norm": 2.890625,
      "learning_rate": 1.896864153108458e-05,
      "loss": 0.8051,
      "step": 563720
    },
    {
      "epoch": 1.9757331062254888,
      "grad_norm": 3.6875,
      "learning_rate": 1.896799250242088e-05,
      "loss": 0.7881,
      "step": 563730
    },
    {
      "epoch": 1.9757681537323841,
      "grad_norm": 2.90625,
      "learning_rate": 1.8967343473757177e-05,
      "loss": 0.9001,
      "step": 563740
    },
    {
      "epoch": 1.9758032012392799,
      "grad_norm": 2.953125,
      "learning_rate": 1.8966694445093475e-05,
      "loss": 0.7693,
      "step": 563750
    },
    {
      "epoch": 1.9758382487461754,
      "grad_norm": 3.28125,
      "learning_rate": 1.8966045416429773e-05,
      "loss": 0.8457,
      "step": 563760
    },
    {
      "epoch": 1.975873296253071,
      "grad_norm": 2.546875,
      "learning_rate": 1.896539638776607e-05,
      "loss": 0.8081,
      "step": 563770
    },
    {
      "epoch": 1.9759083437599667,
      "grad_norm": 3.03125,
      "learning_rate": 1.896474735910237e-05,
      "loss": 0.8776,
      "step": 563780
    },
    {
      "epoch": 1.9759433912668622,
      "grad_norm": 2.9375,
      "learning_rate": 1.8964098330438667e-05,
      "loss": 0.8297,
      "step": 563790
    },
    {
      "epoch": 1.9759784387737578,
      "grad_norm": 2.734375,
      "learning_rate": 1.8963449301774962e-05,
      "loss": 0.82,
      "step": 563800
    },
    {
      "epoch": 1.9760134862806535,
      "grad_norm": 2.9375,
      "learning_rate": 1.896280027311126e-05,
      "loss": 0.9149,
      "step": 563810
    },
    {
      "epoch": 1.976048533787549,
      "grad_norm": 3.046875,
      "learning_rate": 1.896215124444756e-05,
      "loss": 0.8362,
      "step": 563820
    },
    {
      "epoch": 1.9760835812944446,
      "grad_norm": 3.015625,
      "learning_rate": 1.896150221578386e-05,
      "loss": 0.8319,
      "step": 563830
    },
    {
      "epoch": 1.9761186288013404,
      "grad_norm": 3.0625,
      "learning_rate": 1.8960853187120157e-05,
      "loss": 0.7812,
      "step": 563840
    },
    {
      "epoch": 1.9761536763082357,
      "grad_norm": 2.453125,
      "learning_rate": 1.8960204158456455e-05,
      "loss": 0.72,
      "step": 563850
    },
    {
      "epoch": 1.9761887238151314,
      "grad_norm": 2.78125,
      "learning_rate": 1.8959555129792753e-05,
      "loss": 0.7658,
      "step": 563860
    },
    {
      "epoch": 1.976223771322027,
      "grad_norm": 2.40625,
      "learning_rate": 1.895890610112905e-05,
      "loss": 0.8615,
      "step": 563870
    },
    {
      "epoch": 1.9762588188289225,
      "grad_norm": 2.890625,
      "learning_rate": 1.895825707246535e-05,
      "loss": 0.8212,
      "step": 563880
    },
    {
      "epoch": 1.9762938663358183,
      "grad_norm": 2.828125,
      "learning_rate": 1.8957608043801647e-05,
      "loss": 0.8338,
      "step": 563890
    },
    {
      "epoch": 1.9763289138427138,
      "grad_norm": 2.84375,
      "learning_rate": 1.8956959015137945e-05,
      "loss": 0.8611,
      "step": 563900
    },
    {
      "epoch": 1.9763639613496093,
      "grad_norm": 2.828125,
      "learning_rate": 1.8956309986474243e-05,
      "loss": 0.8411,
      "step": 563910
    },
    {
      "epoch": 1.976399008856505,
      "grad_norm": 3.15625,
      "learning_rate": 1.895566095781054e-05,
      "loss": 0.893,
      "step": 563920
    },
    {
      "epoch": 1.9764340563634006,
      "grad_norm": 2.765625,
      "learning_rate": 1.895501192914684e-05,
      "loss": 0.7952,
      "step": 563930
    },
    {
      "epoch": 1.9764691038702962,
      "grad_norm": 2.734375,
      "learning_rate": 1.8954362900483137e-05,
      "loss": 0.7868,
      "step": 563940
    },
    {
      "epoch": 1.976504151377192,
      "grad_norm": 3.0,
      "learning_rate": 1.8953713871819435e-05,
      "loss": 0.8477,
      "step": 563950
    },
    {
      "epoch": 1.9765391988840872,
      "grad_norm": 3.015625,
      "learning_rate": 1.8953064843155737e-05,
      "loss": 0.7698,
      "step": 563960
    },
    {
      "epoch": 1.976574246390983,
      "grad_norm": 2.953125,
      "learning_rate": 1.8952415814492035e-05,
      "loss": 0.8718,
      "step": 563970
    },
    {
      "epoch": 1.9766092938978785,
      "grad_norm": 2.71875,
      "learning_rate": 1.8951766785828333e-05,
      "loss": 0.8048,
      "step": 563980
    },
    {
      "epoch": 1.976644341404774,
      "grad_norm": 2.46875,
      "learning_rate": 1.8951117757164627e-05,
      "loss": 0.7571,
      "step": 563990
    },
    {
      "epoch": 1.9766793889116698,
      "grad_norm": 3.0,
      "learning_rate": 1.8950468728500925e-05,
      "loss": 0.7994,
      "step": 564000
    },
    {
      "epoch": 1.9767144364185654,
      "grad_norm": 2.84375,
      "learning_rate": 1.8949819699837223e-05,
      "loss": 0.773,
      "step": 564010
    },
    {
      "epoch": 1.976749483925461,
      "grad_norm": 3.1875,
      "learning_rate": 1.894917067117352e-05,
      "loss": 0.7665,
      "step": 564020
    },
    {
      "epoch": 1.9767845314323567,
      "grad_norm": 2.984375,
      "learning_rate": 1.894852164250982e-05,
      "loss": 0.7943,
      "step": 564030
    },
    {
      "epoch": 1.9768195789392522,
      "grad_norm": 2.84375,
      "learning_rate": 1.8947872613846117e-05,
      "loss": 0.8219,
      "step": 564040
    },
    {
      "epoch": 1.9768546264461477,
      "grad_norm": 2.71875,
      "learning_rate": 1.8947223585182415e-05,
      "loss": 0.7563,
      "step": 564050
    },
    {
      "epoch": 1.9768896739530435,
      "grad_norm": 2.984375,
      "learning_rate": 1.8946574556518713e-05,
      "loss": 0.8453,
      "step": 564060
    },
    {
      "epoch": 1.9769247214599388,
      "grad_norm": 2.859375,
      "learning_rate": 1.894592552785501e-05,
      "loss": 0.8625,
      "step": 564070
    },
    {
      "epoch": 1.9769597689668346,
      "grad_norm": 2.8125,
      "learning_rate": 1.8945276499191313e-05,
      "loss": 0.8401,
      "step": 564080
    },
    {
      "epoch": 1.97699481647373,
      "grad_norm": 2.796875,
      "learning_rate": 1.894462747052761e-05,
      "loss": 0.8263,
      "step": 564090
    },
    {
      "epoch": 1.9770298639806256,
      "grad_norm": 2.765625,
      "learning_rate": 1.894397844186391e-05,
      "loss": 0.855,
      "step": 564100
    },
    {
      "epoch": 1.9770649114875214,
      "grad_norm": 2.859375,
      "learning_rate": 1.8943329413200207e-05,
      "loss": 0.8672,
      "step": 564110
    },
    {
      "epoch": 1.977099958994417,
      "grad_norm": 3.015625,
      "learning_rate": 1.8942680384536505e-05,
      "loss": 0.8789,
      "step": 564120
    },
    {
      "epoch": 1.9771350065013125,
      "grad_norm": 2.953125,
      "learning_rate": 1.8942031355872803e-05,
      "loss": 0.8522,
      "step": 564130
    },
    {
      "epoch": 1.9771700540082082,
      "grad_norm": 2.828125,
      "learning_rate": 1.89413823272091e-05,
      "loss": 0.7471,
      "step": 564140
    },
    {
      "epoch": 1.9772051015151038,
      "grad_norm": 3.0,
      "learning_rate": 1.89407332985454e-05,
      "loss": 0.8169,
      "step": 564150
    },
    {
      "epoch": 1.9772401490219993,
      "grad_norm": 3.28125,
      "learning_rate": 1.8940084269881697e-05,
      "loss": 0.8644,
      "step": 564160
    },
    {
      "epoch": 1.977275196528895,
      "grad_norm": 2.8125,
      "learning_rate": 1.8939435241217995e-05,
      "loss": 0.783,
      "step": 564170
    },
    {
      "epoch": 1.9773102440357904,
      "grad_norm": 3.203125,
      "learning_rate": 1.893878621255429e-05,
      "loss": 0.8592,
      "step": 564180
    },
    {
      "epoch": 1.9773452915426861,
      "grad_norm": 2.6875,
      "learning_rate": 1.893813718389059e-05,
      "loss": 0.8819,
      "step": 564190
    },
    {
      "epoch": 1.9773803390495817,
      "grad_norm": 2.59375,
      "learning_rate": 1.893748815522689e-05,
      "loss": 0.7386,
      "step": 564200
    },
    {
      "epoch": 1.9774153865564772,
      "grad_norm": 2.359375,
      "learning_rate": 1.8936839126563187e-05,
      "loss": 0.7345,
      "step": 564210
    },
    {
      "epoch": 1.977450434063373,
      "grad_norm": 2.859375,
      "learning_rate": 1.8936190097899485e-05,
      "loss": 0.7685,
      "step": 564220
    },
    {
      "epoch": 1.9774854815702685,
      "grad_norm": 2.640625,
      "learning_rate": 1.8935541069235783e-05,
      "loss": 0.8173,
      "step": 564230
    },
    {
      "epoch": 1.977520529077164,
      "grad_norm": 2.875,
      "learning_rate": 1.893489204057208e-05,
      "loss": 0.9115,
      "step": 564240
    },
    {
      "epoch": 1.9775555765840598,
      "grad_norm": 3.0625,
      "learning_rate": 1.893424301190838e-05,
      "loss": 0.885,
      "step": 564250
    },
    {
      "epoch": 1.9775906240909553,
      "grad_norm": 3.40625,
      "learning_rate": 1.8933593983244677e-05,
      "loss": 0.8978,
      "step": 564260
    },
    {
      "epoch": 1.9776256715978509,
      "grad_norm": 2.90625,
      "learning_rate": 1.8932944954580975e-05,
      "loss": 0.8064,
      "step": 564270
    },
    {
      "epoch": 1.9776607191047466,
      "grad_norm": 2.734375,
      "learning_rate": 1.8932295925917273e-05,
      "loss": 0.785,
      "step": 564280
    },
    {
      "epoch": 1.977695766611642,
      "grad_norm": 3.03125,
      "learning_rate": 1.893164689725357e-05,
      "loss": 0.9086,
      "step": 564290
    },
    {
      "epoch": 1.9777308141185377,
      "grad_norm": 2.5,
      "learning_rate": 1.893099786858987e-05,
      "loss": 0.7928,
      "step": 564300
    },
    {
      "epoch": 1.9777658616254334,
      "grad_norm": 2.90625,
      "learning_rate": 1.8930348839926167e-05,
      "loss": 0.877,
      "step": 564310
    },
    {
      "epoch": 1.9778009091323288,
      "grad_norm": 2.734375,
      "learning_rate": 1.8929699811262465e-05,
      "loss": 0.813,
      "step": 564320
    },
    {
      "epoch": 1.9778359566392245,
      "grad_norm": 3.234375,
      "learning_rate": 1.8929050782598766e-05,
      "loss": 0.8297,
      "step": 564330
    },
    {
      "epoch": 1.97787100414612,
      "grad_norm": 3.21875,
      "learning_rate": 1.8928401753935064e-05,
      "loss": 0.7822,
      "step": 564340
    },
    {
      "epoch": 1.9779060516530156,
      "grad_norm": 2.40625,
      "learning_rate": 1.8927752725271362e-05,
      "loss": 0.7998,
      "step": 564350
    },
    {
      "epoch": 1.9779410991599113,
      "grad_norm": 2.75,
      "learning_rate": 1.892710369660766e-05,
      "loss": 0.6898,
      "step": 564360
    },
    {
      "epoch": 1.9779761466668069,
      "grad_norm": 3.046875,
      "learning_rate": 1.8926454667943955e-05,
      "loss": 0.8247,
      "step": 564370
    },
    {
      "epoch": 1.9780111941737024,
      "grad_norm": 2.546875,
      "learning_rate": 1.8925805639280253e-05,
      "loss": 0.7826,
      "step": 564380
    },
    {
      "epoch": 1.9780462416805982,
      "grad_norm": 3.078125,
      "learning_rate": 1.892515661061655e-05,
      "loss": 0.8501,
      "step": 564390
    },
    {
      "epoch": 1.9780812891874935,
      "grad_norm": 2.640625,
      "learning_rate": 1.892450758195285e-05,
      "loss": 0.8491,
      "step": 564400
    },
    {
      "epoch": 1.9781163366943892,
      "grad_norm": 2.875,
      "learning_rate": 1.8923858553289147e-05,
      "loss": 0.7727,
      "step": 564410
    },
    {
      "epoch": 1.978151384201285,
      "grad_norm": 3.828125,
      "learning_rate": 1.8923209524625445e-05,
      "loss": 0.8563,
      "step": 564420
    },
    {
      "epoch": 1.9781864317081803,
      "grad_norm": 2.921875,
      "learning_rate": 1.8922560495961743e-05,
      "loss": 0.9085,
      "step": 564430
    },
    {
      "epoch": 1.978221479215076,
      "grad_norm": 2.84375,
      "learning_rate": 1.8921911467298044e-05,
      "loss": 0.8997,
      "step": 564440
    },
    {
      "epoch": 1.9782565267219716,
      "grad_norm": 2.734375,
      "learning_rate": 1.8921262438634342e-05,
      "loss": 0.7732,
      "step": 564450
    },
    {
      "epoch": 1.9782915742288671,
      "grad_norm": 3.015625,
      "learning_rate": 1.892061340997064e-05,
      "loss": 0.9069,
      "step": 564460
    },
    {
      "epoch": 1.978326621735763,
      "grad_norm": 2.984375,
      "learning_rate": 1.8919964381306938e-05,
      "loss": 0.802,
      "step": 564470
    },
    {
      "epoch": 1.9783616692426584,
      "grad_norm": 2.765625,
      "learning_rate": 1.8919315352643236e-05,
      "loss": 0.8662,
      "step": 564480
    },
    {
      "epoch": 1.978396716749554,
      "grad_norm": 2.78125,
      "learning_rate": 1.8918666323979534e-05,
      "loss": 0.7909,
      "step": 564490
    },
    {
      "epoch": 1.9784317642564497,
      "grad_norm": 2.765625,
      "learning_rate": 1.8918017295315832e-05,
      "loss": 0.8669,
      "step": 564500
    },
    {
      "epoch": 1.9784668117633453,
      "grad_norm": 2.828125,
      "learning_rate": 1.891736826665213e-05,
      "loss": 0.7096,
      "step": 564510
    },
    {
      "epoch": 1.9785018592702408,
      "grad_norm": 2.921875,
      "learning_rate": 1.8916719237988428e-05,
      "loss": 0.8265,
      "step": 564520
    },
    {
      "epoch": 1.9785369067771366,
      "grad_norm": 2.71875,
      "learning_rate": 1.8916070209324726e-05,
      "loss": 0.8348,
      "step": 564530
    },
    {
      "epoch": 1.9785719542840319,
      "grad_norm": 3.125,
      "learning_rate": 1.8915421180661024e-05,
      "loss": 0.8574,
      "step": 564540
    },
    {
      "epoch": 1.9786070017909276,
      "grad_norm": 2.859375,
      "learning_rate": 1.891477215199732e-05,
      "loss": 0.7244,
      "step": 564550
    },
    {
      "epoch": 1.9786420492978232,
      "grad_norm": 3.140625,
      "learning_rate": 1.891412312333362e-05,
      "loss": 0.8154,
      "step": 564560
    },
    {
      "epoch": 1.9786770968047187,
      "grad_norm": 2.65625,
      "learning_rate": 1.8913474094669918e-05,
      "loss": 0.781,
      "step": 564570
    },
    {
      "epoch": 1.9787121443116145,
      "grad_norm": 4.09375,
      "learning_rate": 1.8912825066006216e-05,
      "loss": 0.8128,
      "step": 564580
    },
    {
      "epoch": 1.97874719181851,
      "grad_norm": 3.15625,
      "learning_rate": 1.8912176037342514e-05,
      "loss": 0.7707,
      "step": 564590
    },
    {
      "epoch": 1.9787822393254055,
      "grad_norm": 2.90625,
      "learning_rate": 1.8911527008678812e-05,
      "loss": 0.8848,
      "step": 564600
    },
    {
      "epoch": 1.9788172868323013,
      "grad_norm": 2.84375,
      "learning_rate": 1.891087798001511e-05,
      "loss": 0.8707,
      "step": 564610
    },
    {
      "epoch": 1.9788523343391968,
      "grad_norm": 2.84375,
      "learning_rate": 1.8910228951351408e-05,
      "loss": 0.8761,
      "step": 564620
    },
    {
      "epoch": 1.9788873818460924,
      "grad_norm": 2.96875,
      "learning_rate": 1.8909579922687706e-05,
      "loss": 0.8762,
      "step": 564630
    },
    {
      "epoch": 1.9789224293529881,
      "grad_norm": 3.453125,
      "learning_rate": 1.8908930894024004e-05,
      "loss": 0.8701,
      "step": 564640
    },
    {
      "epoch": 1.9789574768598834,
      "grad_norm": 2.34375,
      "learning_rate": 1.8908281865360302e-05,
      "loss": 0.8059,
      "step": 564650
    },
    {
      "epoch": 1.9789925243667792,
      "grad_norm": 2.40625,
      "learning_rate": 1.89076328366966e-05,
      "loss": 0.7833,
      "step": 564660
    },
    {
      "epoch": 1.9790275718736747,
      "grad_norm": 2.96875,
      "learning_rate": 1.8906983808032898e-05,
      "loss": 0.7445,
      "step": 564670
    },
    {
      "epoch": 1.9790626193805703,
      "grad_norm": 2.65625,
      "learning_rate": 1.8906334779369196e-05,
      "loss": 0.8369,
      "step": 564680
    },
    {
      "epoch": 1.979097666887466,
      "grad_norm": 3.0625,
      "learning_rate": 1.8905685750705497e-05,
      "loss": 0.8083,
      "step": 564690
    },
    {
      "epoch": 1.9791327143943616,
      "grad_norm": 2.703125,
      "learning_rate": 1.8905036722041795e-05,
      "loss": 0.81,
      "step": 564700
    },
    {
      "epoch": 1.979167761901257,
      "grad_norm": 2.84375,
      "learning_rate": 1.8904387693378093e-05,
      "loss": 0.7817,
      "step": 564710
    },
    {
      "epoch": 1.9792028094081529,
      "grad_norm": 3.0,
      "learning_rate": 1.890373866471439e-05,
      "loss": 0.8623,
      "step": 564720
    },
    {
      "epoch": 1.9792378569150484,
      "grad_norm": 3.3125,
      "learning_rate": 1.890308963605069e-05,
      "loss": 0.8393,
      "step": 564730
    },
    {
      "epoch": 1.979272904421944,
      "grad_norm": 2.859375,
      "learning_rate": 1.8902440607386984e-05,
      "loss": 0.8588,
      "step": 564740
    },
    {
      "epoch": 1.9793079519288397,
      "grad_norm": 3.515625,
      "learning_rate": 1.8901791578723282e-05,
      "loss": 0.745,
      "step": 564750
    },
    {
      "epoch": 1.979342999435735,
      "grad_norm": 2.828125,
      "learning_rate": 1.890114255005958e-05,
      "loss": 0.7912,
      "step": 564760
    },
    {
      "epoch": 1.9793780469426308,
      "grad_norm": 3.53125,
      "learning_rate": 1.8900493521395878e-05,
      "loss": 0.8549,
      "step": 564770
    },
    {
      "epoch": 1.9794130944495263,
      "grad_norm": 2.625,
      "learning_rate": 1.8899844492732176e-05,
      "loss": 0.8126,
      "step": 564780
    },
    {
      "epoch": 1.9794481419564218,
      "grad_norm": 2.75,
      "learning_rate": 1.8899195464068474e-05,
      "loss": 0.7733,
      "step": 564790
    },
    {
      "epoch": 1.9794831894633176,
      "grad_norm": 3.046875,
      "learning_rate": 1.8898546435404772e-05,
      "loss": 0.7713,
      "step": 564800
    },
    {
      "epoch": 1.9795182369702131,
      "grad_norm": 3.0,
      "learning_rate": 1.8897897406741073e-05,
      "loss": 0.802,
      "step": 564810
    },
    {
      "epoch": 1.9795532844771087,
      "grad_norm": 2.96875,
      "learning_rate": 1.889724837807737e-05,
      "loss": 0.7968,
      "step": 564820
    },
    {
      "epoch": 1.9795883319840044,
      "grad_norm": 2.703125,
      "learning_rate": 1.889659934941367e-05,
      "loss": 0.8305,
      "step": 564830
    },
    {
      "epoch": 1.9796233794909,
      "grad_norm": 2.734375,
      "learning_rate": 1.8895950320749967e-05,
      "loss": 0.8258,
      "step": 564840
    },
    {
      "epoch": 1.9796584269977955,
      "grad_norm": 2.84375,
      "learning_rate": 1.8895301292086265e-05,
      "loss": 0.7632,
      "step": 564850
    },
    {
      "epoch": 1.9796934745046912,
      "grad_norm": 2.765625,
      "learning_rate": 1.8894652263422563e-05,
      "loss": 0.7701,
      "step": 564860
    },
    {
      "epoch": 1.9797285220115866,
      "grad_norm": 3.34375,
      "learning_rate": 1.889400323475886e-05,
      "loss": 0.8502,
      "step": 564870
    },
    {
      "epoch": 1.9797635695184823,
      "grad_norm": 2.734375,
      "learning_rate": 1.889335420609516e-05,
      "loss": 0.8935,
      "step": 564880
    },
    {
      "epoch": 1.9797986170253778,
      "grad_norm": 2.671875,
      "learning_rate": 1.8892705177431457e-05,
      "loss": 0.7412,
      "step": 564890
    },
    {
      "epoch": 1.9798336645322734,
      "grad_norm": 2.515625,
      "learning_rate": 1.8892056148767755e-05,
      "loss": 0.8343,
      "step": 564900
    },
    {
      "epoch": 1.9798687120391691,
      "grad_norm": 2.203125,
      "learning_rate": 1.8891407120104053e-05,
      "loss": 0.875,
      "step": 564910
    },
    {
      "epoch": 1.9799037595460647,
      "grad_norm": 2.953125,
      "learning_rate": 1.889075809144035e-05,
      "loss": 0.82,
      "step": 564920
    },
    {
      "epoch": 1.9799388070529602,
      "grad_norm": 2.9375,
      "learning_rate": 1.889010906277665e-05,
      "loss": 0.8492,
      "step": 564930
    },
    {
      "epoch": 1.979973854559856,
      "grad_norm": 2.890625,
      "learning_rate": 1.8889460034112947e-05,
      "loss": 0.8214,
      "step": 564940
    },
    {
      "epoch": 1.9800089020667515,
      "grad_norm": 2.875,
      "learning_rate": 1.8888811005449245e-05,
      "loss": 0.8548,
      "step": 564950
    },
    {
      "epoch": 1.980043949573647,
      "grad_norm": 2.703125,
      "learning_rate": 1.8888161976785543e-05,
      "loss": 0.8831,
      "step": 564960
    },
    {
      "epoch": 1.9800789970805428,
      "grad_norm": 3.0,
      "learning_rate": 1.888751294812184e-05,
      "loss": 0.8264,
      "step": 564970
    },
    {
      "epoch": 1.9801140445874381,
      "grad_norm": 3.015625,
      "learning_rate": 1.888686391945814e-05,
      "loss": 0.825,
      "step": 564980
    },
    {
      "epoch": 1.9801490920943339,
      "grad_norm": 2.984375,
      "learning_rate": 1.8886214890794437e-05,
      "loss": 0.8921,
      "step": 564990
    },
    {
      "epoch": 1.9801841396012296,
      "grad_norm": 2.421875,
      "learning_rate": 1.8885565862130735e-05,
      "loss": 0.7999,
      "step": 565000
    },
    {
      "epoch": 1.9801841396012296,
      "eval_loss": 0.7710798978805542,
      "eval_runtime": 561.492,
      "eval_samples_per_second": 677.545,
      "eval_steps_per_second": 56.462,
      "step": 565000
    },
    {
      "epoch": 1.980219187108125,
      "grad_norm": 3.1875,
      "learning_rate": 1.8884916833467033e-05,
      "loss": 0.8534,
      "step": 565010
    },
    {
      "epoch": 1.9802542346150207,
      "grad_norm": 2.9375,
      "learning_rate": 1.888426780480333e-05,
      "loss": 0.7441,
      "step": 565020
    },
    {
      "epoch": 1.9802892821219162,
      "grad_norm": 3.078125,
      "learning_rate": 1.888361877613963e-05,
      "loss": 0.8194,
      "step": 565030
    },
    {
      "epoch": 1.9803243296288118,
      "grad_norm": 2.828125,
      "learning_rate": 1.8882969747475927e-05,
      "loss": 0.9078,
      "step": 565040
    },
    {
      "epoch": 1.9803593771357075,
      "grad_norm": 2.78125,
      "learning_rate": 1.8882320718812225e-05,
      "loss": 0.8094,
      "step": 565050
    },
    {
      "epoch": 1.980394424642603,
      "grad_norm": 2.375,
      "learning_rate": 1.8881671690148527e-05,
      "loss": 0.8402,
      "step": 565060
    },
    {
      "epoch": 1.9804294721494986,
      "grad_norm": 2.890625,
      "learning_rate": 1.8881022661484825e-05,
      "loss": 0.8103,
      "step": 565070
    },
    {
      "epoch": 1.9804645196563944,
      "grad_norm": 2.96875,
      "learning_rate": 1.8880373632821123e-05,
      "loss": 0.8513,
      "step": 565080
    },
    {
      "epoch": 1.9804995671632897,
      "grad_norm": 2.28125,
      "learning_rate": 1.887972460415742e-05,
      "loss": 0.7971,
      "step": 565090
    },
    {
      "epoch": 1.9805346146701854,
      "grad_norm": 2.75,
      "learning_rate": 1.887907557549372e-05,
      "loss": 0.8899,
      "step": 565100
    },
    {
      "epoch": 1.9805696621770812,
      "grad_norm": 2.921875,
      "learning_rate": 1.8878426546830017e-05,
      "loss": 0.8639,
      "step": 565110
    },
    {
      "epoch": 1.9806047096839765,
      "grad_norm": 2.96875,
      "learning_rate": 1.887777751816631e-05,
      "loss": 0.9158,
      "step": 565120
    },
    {
      "epoch": 1.9806397571908723,
      "grad_norm": 2.53125,
      "learning_rate": 1.887712848950261e-05,
      "loss": 0.8085,
      "step": 565130
    },
    {
      "epoch": 1.9806748046977678,
      "grad_norm": 2.609375,
      "learning_rate": 1.8876479460838907e-05,
      "loss": 0.7673,
      "step": 565140
    },
    {
      "epoch": 1.9807098522046633,
      "grad_norm": 2.546875,
      "learning_rate": 1.8875830432175205e-05,
      "loss": 0.9255,
      "step": 565150
    },
    {
      "epoch": 1.980744899711559,
      "grad_norm": 2.953125,
      "learning_rate": 1.8875181403511503e-05,
      "loss": 0.8252,
      "step": 565160
    },
    {
      "epoch": 1.9807799472184546,
      "grad_norm": 2.828125,
      "learning_rate": 1.8874532374847805e-05,
      "loss": 0.7894,
      "step": 565170
    },
    {
      "epoch": 1.9808149947253502,
      "grad_norm": 2.65625,
      "learning_rate": 1.8873883346184103e-05,
      "loss": 0.8111,
      "step": 565180
    },
    {
      "epoch": 1.980850042232246,
      "grad_norm": 2.46875,
      "learning_rate": 1.88732343175204e-05,
      "loss": 0.7855,
      "step": 565190
    },
    {
      "epoch": 1.9808850897391415,
      "grad_norm": 3.0,
      "learning_rate": 1.88725852888567e-05,
      "loss": 0.8058,
      "step": 565200
    },
    {
      "epoch": 1.980920137246037,
      "grad_norm": 3.40625,
      "learning_rate": 1.8871936260192997e-05,
      "loss": 0.8575,
      "step": 565210
    },
    {
      "epoch": 1.9809551847529328,
      "grad_norm": 3.015625,
      "learning_rate": 1.8871287231529295e-05,
      "loss": 0.8199,
      "step": 565220
    },
    {
      "epoch": 1.980990232259828,
      "grad_norm": 3.359375,
      "learning_rate": 1.8870638202865593e-05,
      "loss": 0.8067,
      "step": 565230
    },
    {
      "epoch": 1.9810252797667238,
      "grad_norm": 2.84375,
      "learning_rate": 1.886998917420189e-05,
      "loss": 0.8049,
      "step": 565240
    },
    {
      "epoch": 1.9810603272736194,
      "grad_norm": 2.578125,
      "learning_rate": 1.886934014553819e-05,
      "loss": 0.8133,
      "step": 565250
    },
    {
      "epoch": 1.981095374780515,
      "grad_norm": 2.90625,
      "learning_rate": 1.8868691116874487e-05,
      "loss": 0.7919,
      "step": 565260
    },
    {
      "epoch": 1.9811304222874107,
      "grad_norm": 5.15625,
      "learning_rate": 1.8868042088210785e-05,
      "loss": 0.8622,
      "step": 565270
    },
    {
      "epoch": 1.9811654697943062,
      "grad_norm": 2.859375,
      "learning_rate": 1.8867393059547083e-05,
      "loss": 0.8024,
      "step": 565280
    },
    {
      "epoch": 1.9812005173012017,
      "grad_norm": 2.96875,
      "learning_rate": 1.886674403088338e-05,
      "loss": 0.8431,
      "step": 565290
    },
    {
      "epoch": 1.9812355648080975,
      "grad_norm": 3.109375,
      "learning_rate": 1.886609500221968e-05,
      "loss": 0.8112,
      "step": 565300
    },
    {
      "epoch": 1.981270612314993,
      "grad_norm": 2.796875,
      "learning_rate": 1.8865445973555977e-05,
      "loss": 0.7759,
      "step": 565310
    },
    {
      "epoch": 1.9813056598218886,
      "grad_norm": 3.046875,
      "learning_rate": 1.8864796944892275e-05,
      "loss": 0.8978,
      "step": 565320
    },
    {
      "epoch": 1.9813407073287843,
      "grad_norm": 2.765625,
      "learning_rate": 1.8864147916228573e-05,
      "loss": 0.8538,
      "step": 565330
    },
    {
      "epoch": 1.9813757548356796,
      "grad_norm": 2.875,
      "learning_rate": 1.886349888756487e-05,
      "loss": 0.8126,
      "step": 565340
    },
    {
      "epoch": 1.9814108023425754,
      "grad_norm": 2.890625,
      "learning_rate": 1.886284985890117e-05,
      "loss": 0.7728,
      "step": 565350
    },
    {
      "epoch": 1.981445849849471,
      "grad_norm": 2.859375,
      "learning_rate": 1.8862200830237467e-05,
      "loss": 0.7496,
      "step": 565360
    },
    {
      "epoch": 1.9814808973563665,
      "grad_norm": 3.234375,
      "learning_rate": 1.8861551801573765e-05,
      "loss": 0.86,
      "step": 565370
    },
    {
      "epoch": 1.9815159448632622,
      "grad_norm": 3.15625,
      "learning_rate": 1.8860902772910063e-05,
      "loss": 0.7928,
      "step": 565380
    },
    {
      "epoch": 1.9815509923701577,
      "grad_norm": 2.8125,
      "learning_rate": 1.886025374424636e-05,
      "loss": 0.8241,
      "step": 565390
    },
    {
      "epoch": 1.9815860398770533,
      "grad_norm": 2.984375,
      "learning_rate": 1.885960471558266e-05,
      "loss": 0.8151,
      "step": 565400
    },
    {
      "epoch": 1.981621087383949,
      "grad_norm": 3.390625,
      "learning_rate": 1.8858955686918957e-05,
      "loss": 0.8209,
      "step": 565410
    },
    {
      "epoch": 1.9816561348908446,
      "grad_norm": 3.0625,
      "learning_rate": 1.8858306658255255e-05,
      "loss": 0.8072,
      "step": 565420
    },
    {
      "epoch": 1.9816911823977401,
      "grad_norm": 3.203125,
      "learning_rate": 1.8857657629591556e-05,
      "loss": 0.9076,
      "step": 565430
    },
    {
      "epoch": 1.9817262299046359,
      "grad_norm": 2.875,
      "learning_rate": 1.8857008600927854e-05,
      "loss": 0.7808,
      "step": 565440
    },
    {
      "epoch": 1.9817612774115312,
      "grad_norm": 3.125,
      "learning_rate": 1.8856359572264152e-05,
      "loss": 0.796,
      "step": 565450
    },
    {
      "epoch": 1.981796324918427,
      "grad_norm": 2.71875,
      "learning_rate": 1.885571054360045e-05,
      "loss": 0.827,
      "step": 565460
    },
    {
      "epoch": 1.9818313724253225,
      "grad_norm": 2.6875,
      "learning_rate": 1.8855061514936748e-05,
      "loss": 0.7817,
      "step": 565470
    },
    {
      "epoch": 1.981866419932218,
      "grad_norm": 2.625,
      "learning_rate": 1.8854412486273046e-05,
      "loss": 0.7414,
      "step": 565480
    },
    {
      "epoch": 1.9819014674391138,
      "grad_norm": 3.171875,
      "learning_rate": 1.8853763457609344e-05,
      "loss": 0.7701,
      "step": 565490
    },
    {
      "epoch": 1.9819365149460093,
      "grad_norm": 2.96875,
      "learning_rate": 1.885311442894564e-05,
      "loss": 0.7953,
      "step": 565500
    },
    {
      "epoch": 1.9819715624529048,
      "grad_norm": 3.234375,
      "learning_rate": 1.8852465400281937e-05,
      "loss": 0.8483,
      "step": 565510
    },
    {
      "epoch": 1.9820066099598006,
      "grad_norm": 2.984375,
      "learning_rate": 1.8851816371618235e-05,
      "loss": 0.823,
      "step": 565520
    },
    {
      "epoch": 1.9820416574666961,
      "grad_norm": 2.921875,
      "learning_rate": 1.8851167342954533e-05,
      "loss": 0.8241,
      "step": 565530
    },
    {
      "epoch": 1.9820767049735917,
      "grad_norm": 2.625,
      "learning_rate": 1.8850518314290834e-05,
      "loss": 0.8971,
      "step": 565540
    },
    {
      "epoch": 1.9821117524804874,
      "grad_norm": 2.875,
      "learning_rate": 1.8849869285627132e-05,
      "loss": 0.8457,
      "step": 565550
    },
    {
      "epoch": 1.9821467999873827,
      "grad_norm": 3.0625,
      "learning_rate": 1.884922025696343e-05,
      "loss": 0.7445,
      "step": 565560
    },
    {
      "epoch": 1.9821818474942785,
      "grad_norm": 3.453125,
      "learning_rate": 1.8848571228299728e-05,
      "loss": 0.8564,
      "step": 565570
    },
    {
      "epoch": 1.982216895001174,
      "grad_norm": 2.5625,
      "learning_rate": 1.8847922199636026e-05,
      "loss": 0.7784,
      "step": 565580
    },
    {
      "epoch": 1.9822519425080696,
      "grad_norm": 2.953125,
      "learning_rate": 1.8847273170972324e-05,
      "loss": 0.7576,
      "step": 565590
    },
    {
      "epoch": 1.9822869900149653,
      "grad_norm": 2.90625,
      "learning_rate": 1.8846624142308622e-05,
      "loss": 0.8247,
      "step": 565600
    },
    {
      "epoch": 1.9823220375218609,
      "grad_norm": 2.90625,
      "learning_rate": 1.884597511364492e-05,
      "loss": 0.7808,
      "step": 565610
    },
    {
      "epoch": 1.9823570850287564,
      "grad_norm": 2.578125,
      "learning_rate": 1.8845326084981218e-05,
      "loss": 0.8292,
      "step": 565620
    },
    {
      "epoch": 1.9823921325356522,
      "grad_norm": 2.578125,
      "learning_rate": 1.8844677056317516e-05,
      "loss": 0.8071,
      "step": 565630
    },
    {
      "epoch": 1.9824271800425477,
      "grad_norm": 3.078125,
      "learning_rate": 1.8844028027653814e-05,
      "loss": 0.8252,
      "step": 565640
    },
    {
      "epoch": 1.9824622275494432,
      "grad_norm": 3.25,
      "learning_rate": 1.8843378998990112e-05,
      "loss": 0.7481,
      "step": 565650
    },
    {
      "epoch": 1.982497275056339,
      "grad_norm": 3.21875,
      "learning_rate": 1.884272997032641e-05,
      "loss": 0.7612,
      "step": 565660
    },
    {
      "epoch": 1.9825323225632343,
      "grad_norm": 3.171875,
      "learning_rate": 1.8842080941662708e-05,
      "loss": 0.8607,
      "step": 565670
    },
    {
      "epoch": 1.98256737007013,
      "grad_norm": 2.75,
      "learning_rate": 1.8841431912999006e-05,
      "loss": 0.7657,
      "step": 565680
    },
    {
      "epoch": 1.9826024175770258,
      "grad_norm": 2.828125,
      "learning_rate": 1.8840782884335304e-05,
      "loss": 0.7898,
      "step": 565690
    },
    {
      "epoch": 1.9826374650839211,
      "grad_norm": 3.0625,
      "learning_rate": 1.8840133855671602e-05,
      "loss": 0.8699,
      "step": 565700
    },
    {
      "epoch": 1.982672512590817,
      "grad_norm": 2.75,
      "learning_rate": 1.88394848270079e-05,
      "loss": 0.7455,
      "step": 565710
    },
    {
      "epoch": 1.9827075600977124,
      "grad_norm": 2.53125,
      "learning_rate": 1.8838835798344198e-05,
      "loss": 0.8891,
      "step": 565720
    },
    {
      "epoch": 1.982742607604608,
      "grad_norm": 3.109375,
      "learning_rate": 1.8838186769680496e-05,
      "loss": 0.7971,
      "step": 565730
    },
    {
      "epoch": 1.9827776551115037,
      "grad_norm": 3.28125,
      "learning_rate": 1.8837537741016794e-05,
      "loss": 0.8116,
      "step": 565740
    },
    {
      "epoch": 1.9828127026183993,
      "grad_norm": 2.578125,
      "learning_rate": 1.8836888712353092e-05,
      "loss": 0.7421,
      "step": 565750
    },
    {
      "epoch": 1.9828477501252948,
      "grad_norm": 3.078125,
      "learning_rate": 1.883623968368939e-05,
      "loss": 0.805,
      "step": 565760
    },
    {
      "epoch": 1.9828827976321906,
      "grad_norm": 2.5625,
      "learning_rate": 1.8835590655025688e-05,
      "loss": 0.8979,
      "step": 565770
    },
    {
      "epoch": 1.9829178451390859,
      "grad_norm": 2.90625,
      "learning_rate": 1.8834941626361986e-05,
      "loss": 0.8431,
      "step": 565780
    },
    {
      "epoch": 1.9829528926459816,
      "grad_norm": 2.640625,
      "learning_rate": 1.8834292597698288e-05,
      "loss": 0.7959,
      "step": 565790
    },
    {
      "epoch": 1.9829879401528774,
      "grad_norm": 2.75,
      "learning_rate": 1.8833643569034586e-05,
      "loss": 0.7665,
      "step": 565800
    },
    {
      "epoch": 1.9830229876597727,
      "grad_norm": 2.5625,
      "learning_rate": 1.8832994540370884e-05,
      "loss": 0.7556,
      "step": 565810
    },
    {
      "epoch": 1.9830580351666685,
      "grad_norm": 3.015625,
      "learning_rate": 1.883234551170718e-05,
      "loss": 0.8133,
      "step": 565820
    },
    {
      "epoch": 1.983093082673564,
      "grad_norm": 2.59375,
      "learning_rate": 1.883169648304348e-05,
      "loss": 0.7513,
      "step": 565830
    },
    {
      "epoch": 1.9831281301804595,
      "grad_norm": 2.5625,
      "learning_rate": 1.8831047454379778e-05,
      "loss": 0.8123,
      "step": 565840
    },
    {
      "epoch": 1.9831631776873553,
      "grad_norm": 3.203125,
      "learning_rate": 1.8830398425716076e-05,
      "loss": 0.8746,
      "step": 565850
    },
    {
      "epoch": 1.9831982251942508,
      "grad_norm": 2.40625,
      "learning_rate": 1.8829749397052374e-05,
      "loss": 0.7666,
      "step": 565860
    },
    {
      "epoch": 1.9832332727011464,
      "grad_norm": 3.046875,
      "learning_rate": 1.8829100368388668e-05,
      "loss": 0.8855,
      "step": 565870
    },
    {
      "epoch": 1.9832683202080421,
      "grad_norm": 3.3125,
      "learning_rate": 1.8828451339724966e-05,
      "loss": 0.7802,
      "step": 565880
    },
    {
      "epoch": 1.9833033677149376,
      "grad_norm": 2.78125,
      "learning_rate": 1.8827802311061264e-05,
      "loss": 0.818,
      "step": 565890
    },
    {
      "epoch": 1.9833384152218332,
      "grad_norm": 2.875,
      "learning_rate": 1.8827153282397562e-05,
      "loss": 0.7619,
      "step": 565900
    },
    {
      "epoch": 1.983373462728729,
      "grad_norm": 2.4375,
      "learning_rate": 1.8826504253733864e-05,
      "loss": 0.7943,
      "step": 565910
    },
    {
      "epoch": 1.9834085102356243,
      "grad_norm": 2.828125,
      "learning_rate": 1.882585522507016e-05,
      "loss": 0.7806,
      "step": 565920
    },
    {
      "epoch": 1.98344355774252,
      "grad_norm": 2.734375,
      "learning_rate": 1.882520619640646e-05,
      "loss": 0.8481,
      "step": 565930
    },
    {
      "epoch": 1.9834786052494155,
      "grad_norm": 2.96875,
      "learning_rate": 1.8824557167742758e-05,
      "loss": 0.8321,
      "step": 565940
    },
    {
      "epoch": 1.983513652756311,
      "grad_norm": 2.765625,
      "learning_rate": 1.8823908139079056e-05,
      "loss": 0.8155,
      "step": 565950
    },
    {
      "epoch": 1.9835487002632068,
      "grad_norm": 2.65625,
      "learning_rate": 1.8823259110415354e-05,
      "loss": 0.8851,
      "step": 565960
    },
    {
      "epoch": 1.9835837477701024,
      "grad_norm": 2.921875,
      "learning_rate": 1.882261008175165e-05,
      "loss": 0.8386,
      "step": 565970
    },
    {
      "epoch": 1.983618795276998,
      "grad_norm": 3.265625,
      "learning_rate": 1.882196105308795e-05,
      "loss": 0.7814,
      "step": 565980
    },
    {
      "epoch": 1.9836538427838937,
      "grad_norm": 3.125,
      "learning_rate": 1.8821312024424248e-05,
      "loss": 0.8751,
      "step": 565990
    },
    {
      "epoch": 1.9836888902907892,
      "grad_norm": 2.65625,
      "learning_rate": 1.8820662995760546e-05,
      "loss": 0.7816,
      "step": 566000
    },
    {
      "epoch": 1.9837239377976847,
      "grad_norm": 3.109375,
      "learning_rate": 1.8820013967096844e-05,
      "loss": 0.7637,
      "step": 566010
    },
    {
      "epoch": 1.9837589853045805,
      "grad_norm": 3.09375,
      "learning_rate": 1.881936493843314e-05,
      "loss": 0.7773,
      "step": 566020
    },
    {
      "epoch": 1.9837940328114758,
      "grad_norm": 3.046875,
      "learning_rate": 1.881871590976944e-05,
      "loss": 0.7383,
      "step": 566030
    },
    {
      "epoch": 1.9838290803183716,
      "grad_norm": 3.046875,
      "learning_rate": 1.8818066881105738e-05,
      "loss": 0.8238,
      "step": 566040
    },
    {
      "epoch": 1.983864127825267,
      "grad_norm": 2.96875,
      "learning_rate": 1.881741785244204e-05,
      "loss": 0.8546,
      "step": 566050
    },
    {
      "epoch": 1.9838991753321626,
      "grad_norm": 2.96875,
      "learning_rate": 1.8816768823778334e-05,
      "loss": 0.8925,
      "step": 566060
    },
    {
      "epoch": 1.9839342228390584,
      "grad_norm": 2.890625,
      "learning_rate": 1.881611979511463e-05,
      "loss": 0.8659,
      "step": 566070
    },
    {
      "epoch": 1.983969270345954,
      "grad_norm": 3.140625,
      "learning_rate": 1.881547076645093e-05,
      "loss": 0.8384,
      "step": 566080
    },
    {
      "epoch": 1.9840043178528495,
      "grad_norm": 2.765625,
      "learning_rate": 1.8814821737787228e-05,
      "loss": 0.774,
      "step": 566090
    },
    {
      "epoch": 1.9840393653597452,
      "grad_norm": 2.46875,
      "learning_rate": 1.8814172709123526e-05,
      "loss": 0.872,
      "step": 566100
    },
    {
      "epoch": 1.9840744128666408,
      "grad_norm": 2.453125,
      "learning_rate": 1.8813523680459824e-05,
      "loss": 0.7766,
      "step": 566110
    },
    {
      "epoch": 1.9841094603735363,
      "grad_norm": 2.71875,
      "learning_rate": 1.881287465179612e-05,
      "loss": 0.809,
      "step": 566120
    },
    {
      "epoch": 1.984144507880432,
      "grad_norm": 2.875,
      "learning_rate": 1.881222562313242e-05,
      "loss": 0.7482,
      "step": 566130
    },
    {
      "epoch": 1.9841795553873274,
      "grad_norm": 2.703125,
      "learning_rate": 1.8811576594468718e-05,
      "loss": 0.7562,
      "step": 566140
    },
    {
      "epoch": 1.9842146028942231,
      "grad_norm": 3.15625,
      "learning_rate": 1.8810927565805016e-05,
      "loss": 0.8399,
      "step": 566150
    },
    {
      "epoch": 1.9842496504011187,
      "grad_norm": 2.90625,
      "learning_rate": 1.8810278537141317e-05,
      "loss": 0.8127,
      "step": 566160
    },
    {
      "epoch": 1.9842846979080142,
      "grad_norm": 3.25,
      "learning_rate": 1.8809629508477615e-05,
      "loss": 0.9189,
      "step": 566170
    },
    {
      "epoch": 1.98431974541491,
      "grad_norm": 3.546875,
      "learning_rate": 1.8808980479813913e-05,
      "loss": 0.8592,
      "step": 566180
    },
    {
      "epoch": 1.9843547929218055,
      "grad_norm": 3.046875,
      "learning_rate": 1.880833145115021e-05,
      "loss": 0.8657,
      "step": 566190
    },
    {
      "epoch": 1.984389840428701,
      "grad_norm": 2.484375,
      "learning_rate": 1.880768242248651e-05,
      "loss": 0.8044,
      "step": 566200
    },
    {
      "epoch": 1.9844248879355968,
      "grad_norm": 3.0,
      "learning_rate": 1.8807033393822807e-05,
      "loss": 0.8633,
      "step": 566210
    },
    {
      "epoch": 1.9844599354424923,
      "grad_norm": 2.875,
      "learning_rate": 1.8806384365159105e-05,
      "loss": 0.8461,
      "step": 566220
    },
    {
      "epoch": 1.9844949829493879,
      "grad_norm": 2.84375,
      "learning_rate": 1.8805735336495403e-05,
      "loss": 0.88,
      "step": 566230
    },
    {
      "epoch": 1.9845300304562836,
      "grad_norm": 2.390625,
      "learning_rate": 1.88050863078317e-05,
      "loss": 0.8168,
      "step": 566240
    },
    {
      "epoch": 1.984565077963179,
      "grad_norm": 2.828125,
      "learning_rate": 1.8804437279167996e-05,
      "loss": 0.7674,
      "step": 566250
    },
    {
      "epoch": 1.9846001254700747,
      "grad_norm": 3.03125,
      "learning_rate": 1.8803788250504294e-05,
      "loss": 0.7871,
      "step": 566260
    },
    {
      "epoch": 1.9846351729769702,
      "grad_norm": 3.15625,
      "learning_rate": 1.8803139221840595e-05,
      "loss": 0.7882,
      "step": 566270
    },
    {
      "epoch": 1.9846702204838658,
      "grad_norm": 2.78125,
      "learning_rate": 1.8802490193176893e-05,
      "loss": 0.8712,
      "step": 566280
    },
    {
      "epoch": 1.9847052679907615,
      "grad_norm": 2.796875,
      "learning_rate": 1.880184116451319e-05,
      "loss": 0.8545,
      "step": 566290
    },
    {
      "epoch": 1.984740315497657,
      "grad_norm": 3.359375,
      "learning_rate": 1.880119213584949e-05,
      "loss": 0.9081,
      "step": 566300
    },
    {
      "epoch": 1.9847753630045526,
      "grad_norm": 2.578125,
      "learning_rate": 1.8800543107185787e-05,
      "loss": 0.8194,
      "step": 566310
    },
    {
      "epoch": 1.9848104105114484,
      "grad_norm": 3.046875,
      "learning_rate": 1.8799894078522085e-05,
      "loss": 0.8102,
      "step": 566320
    },
    {
      "epoch": 1.9848454580183439,
      "grad_norm": 3.09375,
      "learning_rate": 1.8799245049858383e-05,
      "loss": 0.8479,
      "step": 566330
    },
    {
      "epoch": 1.9848805055252394,
      "grad_norm": 2.703125,
      "learning_rate": 1.879859602119468e-05,
      "loss": 0.8081,
      "step": 566340
    },
    {
      "epoch": 1.9849155530321352,
      "grad_norm": 3.109375,
      "learning_rate": 1.879794699253098e-05,
      "loss": 0.8225,
      "step": 566350
    },
    {
      "epoch": 1.9849506005390305,
      "grad_norm": 2.234375,
      "learning_rate": 1.8797297963867277e-05,
      "loss": 0.7733,
      "step": 566360
    },
    {
      "epoch": 1.9849856480459263,
      "grad_norm": 2.703125,
      "learning_rate": 1.8796648935203575e-05,
      "loss": 0.8789,
      "step": 566370
    },
    {
      "epoch": 1.985020695552822,
      "grad_norm": 2.953125,
      "learning_rate": 1.8795999906539873e-05,
      "loss": 0.8057,
      "step": 566380
    },
    {
      "epoch": 1.9850557430597173,
      "grad_norm": 2.890625,
      "learning_rate": 1.879535087787617e-05,
      "loss": 0.8215,
      "step": 566390
    },
    {
      "epoch": 1.985090790566613,
      "grad_norm": 2.890625,
      "learning_rate": 1.879470184921247e-05,
      "loss": 0.8698,
      "step": 566400
    },
    {
      "epoch": 1.9851258380735086,
      "grad_norm": 2.640625,
      "learning_rate": 1.879405282054877e-05,
      "loss": 0.7923,
      "step": 566410
    },
    {
      "epoch": 1.9851608855804042,
      "grad_norm": 2.671875,
      "learning_rate": 1.879340379188507e-05,
      "loss": 0.7696,
      "step": 566420
    },
    {
      "epoch": 1.9851959330873,
      "grad_norm": 3.25,
      "learning_rate": 1.8792754763221366e-05,
      "loss": 0.8489,
      "step": 566430
    },
    {
      "epoch": 1.9852309805941954,
      "grad_norm": 3.140625,
      "learning_rate": 1.879210573455766e-05,
      "loss": 0.8264,
      "step": 566440
    },
    {
      "epoch": 1.985266028101091,
      "grad_norm": 2.828125,
      "learning_rate": 1.879145670589396e-05,
      "loss": 0.8054,
      "step": 566450
    },
    {
      "epoch": 1.9853010756079867,
      "grad_norm": 2.671875,
      "learning_rate": 1.8790807677230257e-05,
      "loss": 0.7515,
      "step": 566460
    },
    {
      "epoch": 1.985336123114882,
      "grad_norm": 2.640625,
      "learning_rate": 1.8790158648566555e-05,
      "loss": 0.8443,
      "step": 566470
    },
    {
      "epoch": 1.9853711706217778,
      "grad_norm": 2.71875,
      "learning_rate": 1.8789509619902853e-05,
      "loss": 0.8487,
      "step": 566480
    },
    {
      "epoch": 1.9854062181286736,
      "grad_norm": 2.921875,
      "learning_rate": 1.878886059123915e-05,
      "loss": 0.8671,
      "step": 566490
    },
    {
      "epoch": 1.9854412656355689,
      "grad_norm": 2.4375,
      "learning_rate": 1.878821156257545e-05,
      "loss": 0.8231,
      "step": 566500
    },
    {
      "epoch": 1.9854763131424646,
      "grad_norm": 3.0625,
      "learning_rate": 1.8787562533911747e-05,
      "loss": 0.8793,
      "step": 566510
    },
    {
      "epoch": 1.9855113606493602,
      "grad_norm": 3.046875,
      "learning_rate": 1.8786913505248045e-05,
      "loss": 0.8693,
      "step": 566520
    },
    {
      "epoch": 1.9855464081562557,
      "grad_norm": 2.953125,
      "learning_rate": 1.8786264476584346e-05,
      "loss": 0.8455,
      "step": 566530
    },
    {
      "epoch": 1.9855814556631515,
      "grad_norm": 3.453125,
      "learning_rate": 1.8785615447920644e-05,
      "loss": 0.8558,
      "step": 566540
    },
    {
      "epoch": 1.985616503170047,
      "grad_norm": 2.875,
      "learning_rate": 1.8784966419256942e-05,
      "loss": 0.8678,
      "step": 566550
    },
    {
      "epoch": 1.9856515506769425,
      "grad_norm": 3.09375,
      "learning_rate": 1.878431739059324e-05,
      "loss": 0.7991,
      "step": 566560
    },
    {
      "epoch": 1.9856865981838383,
      "grad_norm": 3.15625,
      "learning_rate": 1.878366836192954e-05,
      "loss": 0.8404,
      "step": 566570
    },
    {
      "epoch": 1.9857216456907338,
      "grad_norm": 2.875,
      "learning_rate": 1.8783019333265836e-05,
      "loss": 0.8546,
      "step": 566580
    },
    {
      "epoch": 1.9857566931976294,
      "grad_norm": 3.015625,
      "learning_rate": 1.8782370304602134e-05,
      "loss": 0.7689,
      "step": 566590
    },
    {
      "epoch": 1.9857917407045251,
      "grad_norm": 3.15625,
      "learning_rate": 1.8781721275938432e-05,
      "loss": 0.7707,
      "step": 566600
    },
    {
      "epoch": 1.9858267882114204,
      "grad_norm": 3.359375,
      "learning_rate": 1.878107224727473e-05,
      "loss": 0.8432,
      "step": 566610
    },
    {
      "epoch": 1.9858618357183162,
      "grad_norm": 2.96875,
      "learning_rate": 1.8780423218611025e-05,
      "loss": 0.7926,
      "step": 566620
    },
    {
      "epoch": 1.9858968832252117,
      "grad_norm": 2.921875,
      "learning_rate": 1.8779774189947323e-05,
      "loss": 0.8352,
      "step": 566630
    },
    {
      "epoch": 1.9859319307321073,
      "grad_norm": 2.75,
      "learning_rate": 1.8779125161283624e-05,
      "loss": 0.8078,
      "step": 566640
    },
    {
      "epoch": 1.985966978239003,
      "grad_norm": 2.890625,
      "learning_rate": 1.8778476132619922e-05,
      "loss": 0.7507,
      "step": 566650
    },
    {
      "epoch": 1.9860020257458986,
      "grad_norm": 3.171875,
      "learning_rate": 1.877782710395622e-05,
      "loss": 0.8116,
      "step": 566660
    },
    {
      "epoch": 1.986037073252794,
      "grad_norm": 3.140625,
      "learning_rate": 1.877717807529252e-05,
      "loss": 0.8106,
      "step": 566670
    },
    {
      "epoch": 1.9860721207596899,
      "grad_norm": 2.921875,
      "learning_rate": 1.8776529046628816e-05,
      "loss": 0.8696,
      "step": 566680
    },
    {
      "epoch": 1.9861071682665854,
      "grad_norm": 2.78125,
      "learning_rate": 1.8775880017965114e-05,
      "loss": 0.803,
      "step": 566690
    },
    {
      "epoch": 1.986142215773481,
      "grad_norm": 2.8125,
      "learning_rate": 1.8775230989301412e-05,
      "loss": 0.8094,
      "step": 566700
    },
    {
      "epoch": 1.9861772632803767,
      "grad_norm": 3.09375,
      "learning_rate": 1.877458196063771e-05,
      "loss": 0.7907,
      "step": 566710
    },
    {
      "epoch": 1.986212310787272,
      "grad_norm": 2.65625,
      "learning_rate": 1.877393293197401e-05,
      "loss": 0.8022,
      "step": 566720
    },
    {
      "epoch": 1.9862473582941678,
      "grad_norm": 2.6875,
      "learning_rate": 1.8773283903310306e-05,
      "loss": 0.8535,
      "step": 566730
    },
    {
      "epoch": 1.9862824058010633,
      "grad_norm": 3.046875,
      "learning_rate": 1.8772634874646604e-05,
      "loss": 0.7944,
      "step": 566740
    },
    {
      "epoch": 1.9863174533079588,
      "grad_norm": 2.859375,
      "learning_rate": 1.8771985845982902e-05,
      "loss": 0.7783,
      "step": 566750
    },
    {
      "epoch": 1.9863525008148546,
      "grad_norm": 2.953125,
      "learning_rate": 1.87713368173192e-05,
      "loss": 0.8097,
      "step": 566760
    },
    {
      "epoch": 1.9863875483217501,
      "grad_norm": 3.0625,
      "learning_rate": 1.87706877886555e-05,
      "loss": 0.7833,
      "step": 566770
    },
    {
      "epoch": 1.9864225958286457,
      "grad_norm": 3.0625,
      "learning_rate": 1.87700387599918e-05,
      "loss": 0.8241,
      "step": 566780
    },
    {
      "epoch": 1.9864576433355414,
      "grad_norm": 2.859375,
      "learning_rate": 1.8769389731328098e-05,
      "loss": 0.8566,
      "step": 566790
    },
    {
      "epoch": 1.986492690842437,
      "grad_norm": 2.5,
      "learning_rate": 1.8768740702664396e-05,
      "loss": 0.791,
      "step": 566800
    },
    {
      "epoch": 1.9865277383493325,
      "grad_norm": 3.0,
      "learning_rate": 1.876809167400069e-05,
      "loss": 0.8271,
      "step": 566810
    },
    {
      "epoch": 1.9865627858562283,
      "grad_norm": 2.90625,
      "learning_rate": 1.876744264533699e-05,
      "loss": 0.8272,
      "step": 566820
    },
    {
      "epoch": 1.9865978333631236,
      "grad_norm": 2.765625,
      "learning_rate": 1.8766793616673286e-05,
      "loss": 0.8148,
      "step": 566830
    },
    {
      "epoch": 1.9866328808700193,
      "grad_norm": 2.234375,
      "learning_rate": 1.8766144588009584e-05,
      "loss": 0.7322,
      "step": 566840
    },
    {
      "epoch": 1.9866679283769149,
      "grad_norm": 2.609375,
      "learning_rate": 1.8765495559345882e-05,
      "loss": 0.8233,
      "step": 566850
    },
    {
      "epoch": 1.9867029758838104,
      "grad_norm": 2.59375,
      "learning_rate": 1.876484653068218e-05,
      "loss": 0.7363,
      "step": 566860
    },
    {
      "epoch": 1.9867380233907062,
      "grad_norm": 2.765625,
      "learning_rate": 1.876419750201848e-05,
      "loss": 0.8517,
      "step": 566870
    },
    {
      "epoch": 1.9867730708976017,
      "grad_norm": 3.484375,
      "learning_rate": 1.8763548473354776e-05,
      "loss": 0.8019,
      "step": 566880
    },
    {
      "epoch": 1.9868081184044972,
      "grad_norm": 2.9375,
      "learning_rate": 1.8762899444691078e-05,
      "loss": 0.7636,
      "step": 566890
    },
    {
      "epoch": 1.986843165911393,
      "grad_norm": 2.8125,
      "learning_rate": 1.8762250416027376e-05,
      "loss": 0.8408,
      "step": 566900
    },
    {
      "epoch": 1.9868782134182885,
      "grad_norm": 3.0625,
      "learning_rate": 1.8761601387363674e-05,
      "loss": 0.8316,
      "step": 566910
    },
    {
      "epoch": 1.986913260925184,
      "grad_norm": 2.78125,
      "learning_rate": 1.8760952358699972e-05,
      "loss": 0.855,
      "step": 566920
    },
    {
      "epoch": 1.9869483084320798,
      "grad_norm": 2.75,
      "learning_rate": 1.876030333003627e-05,
      "loss": 0.8288,
      "step": 566930
    },
    {
      "epoch": 1.9869833559389751,
      "grad_norm": 3.03125,
      "learning_rate": 1.8759654301372568e-05,
      "loss": 0.8841,
      "step": 566940
    },
    {
      "epoch": 1.9870184034458709,
      "grad_norm": 3.0,
      "learning_rate": 1.8759005272708866e-05,
      "loss": 0.8183,
      "step": 566950
    },
    {
      "epoch": 1.9870534509527664,
      "grad_norm": 2.90625,
      "learning_rate": 1.8758356244045164e-05,
      "loss": 0.8602,
      "step": 566960
    },
    {
      "epoch": 1.987088498459662,
      "grad_norm": 2.640625,
      "learning_rate": 1.8757707215381462e-05,
      "loss": 0.7354,
      "step": 566970
    },
    {
      "epoch": 1.9871235459665577,
      "grad_norm": 2.625,
      "learning_rate": 1.875705818671776e-05,
      "loss": 0.7956,
      "step": 566980
    },
    {
      "epoch": 1.9871585934734532,
      "grad_norm": 3.046875,
      "learning_rate": 1.8756409158054058e-05,
      "loss": 0.8035,
      "step": 566990
    },
    {
      "epoch": 1.9871936409803488,
      "grad_norm": 2.921875,
      "learning_rate": 1.8755760129390352e-05,
      "loss": 0.7648,
      "step": 567000
    },
    {
      "epoch": 1.9872286884872445,
      "grad_norm": 3.1875,
      "learning_rate": 1.8755111100726654e-05,
      "loss": 0.9403,
      "step": 567010
    },
    {
      "epoch": 1.98726373599414,
      "grad_norm": 2.921875,
      "learning_rate": 1.8754462072062952e-05,
      "loss": 0.8346,
      "step": 567020
    },
    {
      "epoch": 1.9872987835010356,
      "grad_norm": 3.09375,
      "learning_rate": 1.875381304339925e-05,
      "loss": 0.8351,
      "step": 567030
    },
    {
      "epoch": 1.9873338310079314,
      "grad_norm": 2.6875,
      "learning_rate": 1.8753164014735548e-05,
      "loss": 0.7395,
      "step": 567040
    },
    {
      "epoch": 1.9873688785148267,
      "grad_norm": 2.984375,
      "learning_rate": 1.8752514986071846e-05,
      "loss": 0.8432,
      "step": 567050
    },
    {
      "epoch": 1.9874039260217224,
      "grad_norm": 3.0,
      "learning_rate": 1.8751865957408144e-05,
      "loss": 0.8387,
      "step": 567060
    },
    {
      "epoch": 1.9874389735286182,
      "grad_norm": 2.875,
      "learning_rate": 1.8751216928744442e-05,
      "loss": 0.8441,
      "step": 567070
    },
    {
      "epoch": 1.9874740210355135,
      "grad_norm": 2.953125,
      "learning_rate": 1.875056790008074e-05,
      "loss": 0.8179,
      "step": 567080
    },
    {
      "epoch": 1.9875090685424093,
      "grad_norm": 3.125,
      "learning_rate": 1.8749918871417038e-05,
      "loss": 0.8218,
      "step": 567090
    },
    {
      "epoch": 1.9875441160493048,
      "grad_norm": 2.625,
      "learning_rate": 1.8749269842753336e-05,
      "loss": 0.8269,
      "step": 567100
    },
    {
      "epoch": 1.9875791635562003,
      "grad_norm": 2.484375,
      "learning_rate": 1.8748620814089634e-05,
      "loss": 0.7995,
      "step": 567110
    },
    {
      "epoch": 1.987614211063096,
      "grad_norm": 2.859375,
      "learning_rate": 1.8747971785425932e-05,
      "loss": 0.82,
      "step": 567120
    },
    {
      "epoch": 1.9876492585699916,
      "grad_norm": 2.984375,
      "learning_rate": 1.874732275676223e-05,
      "loss": 0.8148,
      "step": 567130
    },
    {
      "epoch": 1.9876843060768872,
      "grad_norm": 2.9375,
      "learning_rate": 1.8746673728098528e-05,
      "loss": 0.8134,
      "step": 567140
    },
    {
      "epoch": 1.987719353583783,
      "grad_norm": 3.359375,
      "learning_rate": 1.874602469943483e-05,
      "loss": 0.8379,
      "step": 567150
    },
    {
      "epoch": 1.9877544010906785,
      "grad_norm": 3.265625,
      "learning_rate": 1.8745375670771127e-05,
      "loss": 0.8728,
      "step": 567160
    },
    {
      "epoch": 1.987789448597574,
      "grad_norm": 3.4375,
      "learning_rate": 1.8744726642107425e-05,
      "loss": 0.8361,
      "step": 567170
    },
    {
      "epoch": 1.9878244961044698,
      "grad_norm": 3.015625,
      "learning_rate": 1.8744077613443723e-05,
      "loss": 0.8864,
      "step": 567180
    },
    {
      "epoch": 1.987859543611365,
      "grad_norm": 3.046875,
      "learning_rate": 1.8743428584780018e-05,
      "loss": 0.7358,
      "step": 567190
    },
    {
      "epoch": 1.9878945911182608,
      "grad_norm": 2.9375,
      "learning_rate": 1.8742779556116316e-05,
      "loss": 0.7492,
      "step": 567200
    },
    {
      "epoch": 1.9879296386251564,
      "grad_norm": 3.015625,
      "learning_rate": 1.8742130527452614e-05,
      "loss": 0.7848,
      "step": 567210
    },
    {
      "epoch": 1.987964686132052,
      "grad_norm": 2.4375,
      "learning_rate": 1.8741481498788912e-05,
      "loss": 0.7904,
      "step": 567220
    },
    {
      "epoch": 1.9879997336389477,
      "grad_norm": 2.953125,
      "learning_rate": 1.874083247012521e-05,
      "loss": 0.8069,
      "step": 567230
    },
    {
      "epoch": 1.9880347811458432,
      "grad_norm": 3.09375,
      "learning_rate": 1.8740183441461508e-05,
      "loss": 0.8512,
      "step": 567240
    },
    {
      "epoch": 1.9880698286527387,
      "grad_norm": 2.75,
      "learning_rate": 1.8739534412797806e-05,
      "loss": 0.7613,
      "step": 567250
    },
    {
      "epoch": 1.9881048761596345,
      "grad_norm": 2.765625,
      "learning_rate": 1.8738885384134107e-05,
      "loss": 0.7927,
      "step": 567260
    },
    {
      "epoch": 1.98813992366653,
      "grad_norm": 3.171875,
      "learning_rate": 1.8738236355470405e-05,
      "loss": 0.7741,
      "step": 567270
    },
    {
      "epoch": 1.9881749711734256,
      "grad_norm": 3.15625,
      "learning_rate": 1.8737587326806703e-05,
      "loss": 0.84,
      "step": 567280
    },
    {
      "epoch": 1.9882100186803213,
      "grad_norm": 3.046875,
      "learning_rate": 1.8736938298143e-05,
      "loss": 0.815,
      "step": 567290
    },
    {
      "epoch": 1.9882450661872166,
      "grad_norm": 2.71875,
      "learning_rate": 1.87362892694793e-05,
      "loss": 0.8602,
      "step": 567300
    },
    {
      "epoch": 1.9882801136941124,
      "grad_norm": 3.125,
      "learning_rate": 1.8735640240815597e-05,
      "loss": 0.8301,
      "step": 567310
    },
    {
      "epoch": 1.988315161201008,
      "grad_norm": 3.03125,
      "learning_rate": 1.8734991212151895e-05,
      "loss": 0.8374,
      "step": 567320
    },
    {
      "epoch": 1.9883502087079035,
      "grad_norm": 2.84375,
      "learning_rate": 1.8734342183488193e-05,
      "loss": 0.7925,
      "step": 567330
    },
    {
      "epoch": 1.9883852562147992,
      "grad_norm": 2.75,
      "learning_rate": 1.873369315482449e-05,
      "loss": 0.8337,
      "step": 567340
    },
    {
      "epoch": 1.9884203037216948,
      "grad_norm": 2.6875,
      "learning_rate": 1.873304412616079e-05,
      "loss": 0.8692,
      "step": 567350
    },
    {
      "epoch": 1.9884553512285903,
      "grad_norm": 2.640625,
      "learning_rate": 1.8732395097497087e-05,
      "loss": 0.8374,
      "step": 567360
    },
    {
      "epoch": 1.988490398735486,
      "grad_norm": 3.03125,
      "learning_rate": 1.8731746068833385e-05,
      "loss": 0.9221,
      "step": 567370
    },
    {
      "epoch": 1.9885254462423816,
      "grad_norm": 2.703125,
      "learning_rate": 1.8731097040169683e-05,
      "loss": 0.9195,
      "step": 567380
    },
    {
      "epoch": 1.9885604937492771,
      "grad_norm": 3.046875,
      "learning_rate": 1.873044801150598e-05,
      "loss": 0.8378,
      "step": 567390
    },
    {
      "epoch": 1.9885955412561729,
      "grad_norm": 2.921875,
      "learning_rate": 1.872979898284228e-05,
      "loss": 0.85,
      "step": 567400
    },
    {
      "epoch": 1.9886305887630682,
      "grad_norm": 3.0625,
      "learning_rate": 1.8729149954178577e-05,
      "loss": 0.8187,
      "step": 567410
    },
    {
      "epoch": 1.988665636269964,
      "grad_norm": 2.734375,
      "learning_rate": 1.8728500925514875e-05,
      "loss": 0.8858,
      "step": 567420
    },
    {
      "epoch": 1.9887006837768595,
      "grad_norm": 3.21875,
      "learning_rate": 1.8727851896851173e-05,
      "loss": 0.8072,
      "step": 567430
    },
    {
      "epoch": 1.988735731283755,
      "grad_norm": 3.28125,
      "learning_rate": 1.872720286818747e-05,
      "loss": 0.7872,
      "step": 567440
    },
    {
      "epoch": 1.9887707787906508,
      "grad_norm": 3.203125,
      "learning_rate": 1.872655383952377e-05,
      "loss": 0.8058,
      "step": 567450
    },
    {
      "epoch": 1.9888058262975463,
      "grad_norm": 2.859375,
      "learning_rate": 1.8725904810860067e-05,
      "loss": 0.9092,
      "step": 567460
    },
    {
      "epoch": 1.9888408738044419,
      "grad_norm": 2.875,
      "learning_rate": 1.8725255782196365e-05,
      "loss": 0.8293,
      "step": 567470
    },
    {
      "epoch": 1.9888759213113376,
      "grad_norm": 2.140625,
      "learning_rate": 1.8724606753532663e-05,
      "loss": 0.7998,
      "step": 567480
    },
    {
      "epoch": 1.9889109688182331,
      "grad_norm": 2.546875,
      "learning_rate": 1.872395772486896e-05,
      "loss": 0.7439,
      "step": 567490
    },
    {
      "epoch": 1.9889460163251287,
      "grad_norm": 2.859375,
      "learning_rate": 1.872330869620526e-05,
      "loss": 0.7363,
      "step": 567500
    },
    {
      "epoch": 1.9889810638320244,
      "grad_norm": 3.28125,
      "learning_rate": 1.872265966754156e-05,
      "loss": 0.8477,
      "step": 567510
    },
    {
      "epoch": 1.9890161113389198,
      "grad_norm": 2.96875,
      "learning_rate": 1.872201063887786e-05,
      "loss": 0.9284,
      "step": 567520
    },
    {
      "epoch": 1.9890511588458155,
      "grad_norm": 2.703125,
      "learning_rate": 1.8721361610214156e-05,
      "loss": 0.8201,
      "step": 567530
    },
    {
      "epoch": 1.989086206352711,
      "grad_norm": 2.859375,
      "learning_rate": 1.8720712581550454e-05,
      "loss": 0.8005,
      "step": 567540
    },
    {
      "epoch": 1.9891212538596066,
      "grad_norm": 2.6875,
      "learning_rate": 1.8720063552886752e-05,
      "loss": 0.9251,
      "step": 567550
    },
    {
      "epoch": 1.9891563013665023,
      "grad_norm": 3.125,
      "learning_rate": 1.8719414524223047e-05,
      "loss": 0.7907,
      "step": 567560
    },
    {
      "epoch": 1.9891913488733979,
      "grad_norm": 3.296875,
      "learning_rate": 1.8718765495559345e-05,
      "loss": 0.7878,
      "step": 567570
    },
    {
      "epoch": 1.9892263963802934,
      "grad_norm": 2.828125,
      "learning_rate": 1.8718116466895643e-05,
      "loss": 0.7984,
      "step": 567580
    },
    {
      "epoch": 1.9892614438871892,
      "grad_norm": 3.046875,
      "learning_rate": 1.871746743823194e-05,
      "loss": 0.7746,
      "step": 567590
    },
    {
      "epoch": 1.9892964913940847,
      "grad_norm": 3.015625,
      "learning_rate": 1.871681840956824e-05,
      "loss": 0.8258,
      "step": 567600
    },
    {
      "epoch": 1.9893315389009802,
      "grad_norm": 3.109375,
      "learning_rate": 1.8716169380904537e-05,
      "loss": 0.7708,
      "step": 567610
    },
    {
      "epoch": 1.989366586407876,
      "grad_norm": 3.015625,
      "learning_rate": 1.8715520352240835e-05,
      "loss": 0.817,
      "step": 567620
    },
    {
      "epoch": 1.9894016339147713,
      "grad_norm": 3.046875,
      "learning_rate": 1.8714871323577136e-05,
      "loss": 0.8211,
      "step": 567630
    },
    {
      "epoch": 1.989436681421667,
      "grad_norm": 3.1875,
      "learning_rate": 1.8714222294913434e-05,
      "loss": 0.852,
      "step": 567640
    },
    {
      "epoch": 1.9894717289285628,
      "grad_norm": 2.453125,
      "learning_rate": 1.8713573266249732e-05,
      "loss": 0.7772,
      "step": 567650
    },
    {
      "epoch": 1.9895067764354581,
      "grad_norm": 2.921875,
      "learning_rate": 1.871292423758603e-05,
      "loss": 0.8922,
      "step": 567660
    },
    {
      "epoch": 1.989541823942354,
      "grad_norm": 2.640625,
      "learning_rate": 1.871227520892233e-05,
      "loss": 0.8247,
      "step": 567670
    },
    {
      "epoch": 1.9895768714492494,
      "grad_norm": 3.046875,
      "learning_rate": 1.8711626180258626e-05,
      "loss": 0.7632,
      "step": 567680
    },
    {
      "epoch": 1.989611918956145,
      "grad_norm": 2.59375,
      "learning_rate": 1.8710977151594924e-05,
      "loss": 0.7881,
      "step": 567690
    },
    {
      "epoch": 1.9896469664630407,
      "grad_norm": 3.046875,
      "learning_rate": 1.8710328122931222e-05,
      "loss": 0.8172,
      "step": 567700
    },
    {
      "epoch": 1.9896820139699363,
      "grad_norm": 2.734375,
      "learning_rate": 1.870967909426752e-05,
      "loss": 0.8333,
      "step": 567710
    },
    {
      "epoch": 1.9897170614768318,
      "grad_norm": 2.96875,
      "learning_rate": 1.870903006560382e-05,
      "loss": 0.8817,
      "step": 567720
    },
    {
      "epoch": 1.9897521089837276,
      "grad_norm": 2.96875,
      "learning_rate": 1.8708381036940116e-05,
      "loss": 0.886,
      "step": 567730
    },
    {
      "epoch": 1.9897871564906229,
      "grad_norm": 2.890625,
      "learning_rate": 1.8707732008276414e-05,
      "loss": 0.898,
      "step": 567740
    },
    {
      "epoch": 1.9898222039975186,
      "grad_norm": 2.921875,
      "learning_rate": 1.8707082979612712e-05,
      "loss": 0.8076,
      "step": 567750
    },
    {
      "epoch": 1.9898572515044144,
      "grad_norm": 3.0,
      "learning_rate": 1.870643395094901e-05,
      "loss": 0.7783,
      "step": 567760
    },
    {
      "epoch": 1.9898922990113097,
      "grad_norm": 2.671875,
      "learning_rate": 1.870578492228531e-05,
      "loss": 0.814,
      "step": 567770
    },
    {
      "epoch": 1.9899273465182055,
      "grad_norm": 2.59375,
      "learning_rate": 1.8705135893621606e-05,
      "loss": 0.8061,
      "step": 567780
    },
    {
      "epoch": 1.989962394025101,
      "grad_norm": 3.03125,
      "learning_rate": 1.8704486864957904e-05,
      "loss": 0.7322,
      "step": 567790
    },
    {
      "epoch": 1.9899974415319965,
      "grad_norm": 3.0,
      "learning_rate": 1.8703837836294202e-05,
      "loss": 0.778,
      "step": 567800
    },
    {
      "epoch": 1.9900324890388923,
      "grad_norm": 2.65625,
      "learning_rate": 1.87031888076305e-05,
      "loss": 0.7562,
      "step": 567810
    },
    {
      "epoch": 1.9900675365457878,
      "grad_norm": 2.640625,
      "learning_rate": 1.87025397789668e-05,
      "loss": 0.8151,
      "step": 567820
    },
    {
      "epoch": 1.9901025840526834,
      "grad_norm": 3.1875,
      "learning_rate": 1.8701890750303096e-05,
      "loss": 0.861,
      "step": 567830
    },
    {
      "epoch": 1.9901376315595791,
      "grad_norm": 2.609375,
      "learning_rate": 1.8701241721639394e-05,
      "loss": 0.8695,
      "step": 567840
    },
    {
      "epoch": 1.9901726790664747,
      "grad_norm": 3.15625,
      "learning_rate": 1.8700592692975692e-05,
      "loss": 0.8289,
      "step": 567850
    },
    {
      "epoch": 1.9902077265733702,
      "grad_norm": 2.75,
      "learning_rate": 1.869994366431199e-05,
      "loss": 0.7995,
      "step": 567860
    },
    {
      "epoch": 1.990242774080266,
      "grad_norm": 2.90625,
      "learning_rate": 1.869929463564829e-05,
      "loss": 0.777,
      "step": 567870
    },
    {
      "epoch": 1.9902778215871613,
      "grad_norm": 2.96875,
      "learning_rate": 1.869864560698459e-05,
      "loss": 0.8644,
      "step": 567880
    },
    {
      "epoch": 1.990312869094057,
      "grad_norm": 3.046875,
      "learning_rate": 1.8697996578320888e-05,
      "loss": 0.8161,
      "step": 567890
    },
    {
      "epoch": 1.9903479166009526,
      "grad_norm": 3.109375,
      "learning_rate": 1.8697347549657186e-05,
      "loss": 0.8299,
      "step": 567900
    },
    {
      "epoch": 1.990382964107848,
      "grad_norm": 2.359375,
      "learning_rate": 1.8696698520993484e-05,
      "loss": 0.8637,
      "step": 567910
    },
    {
      "epoch": 1.9904180116147439,
      "grad_norm": 3.390625,
      "learning_rate": 1.8696049492329782e-05,
      "loss": 0.896,
      "step": 567920
    },
    {
      "epoch": 1.9904530591216394,
      "grad_norm": 2.234375,
      "learning_rate": 1.869540046366608e-05,
      "loss": 0.7911,
      "step": 567930
    },
    {
      "epoch": 1.990488106628535,
      "grad_norm": 3.140625,
      "learning_rate": 1.8694751435002374e-05,
      "loss": 0.9065,
      "step": 567940
    },
    {
      "epoch": 1.9905231541354307,
      "grad_norm": 2.71875,
      "learning_rate": 1.8694102406338672e-05,
      "loss": 0.7936,
      "step": 567950
    },
    {
      "epoch": 1.9905582016423262,
      "grad_norm": 2.671875,
      "learning_rate": 1.869345337767497e-05,
      "loss": 0.8589,
      "step": 567960
    },
    {
      "epoch": 1.9905932491492218,
      "grad_norm": 3.015625,
      "learning_rate": 1.869280434901127e-05,
      "loss": 0.9301,
      "step": 567970
    },
    {
      "epoch": 1.9906282966561175,
      "grad_norm": 3.21875,
      "learning_rate": 1.8692155320347566e-05,
      "loss": 0.8616,
      "step": 567980
    },
    {
      "epoch": 1.9906633441630128,
      "grad_norm": 2.515625,
      "learning_rate": 1.8691506291683868e-05,
      "loss": 0.8658,
      "step": 567990
    },
    {
      "epoch": 1.9906983916699086,
      "grad_norm": 2.859375,
      "learning_rate": 1.8690857263020166e-05,
      "loss": 0.8135,
      "step": 568000
    },
    {
      "epoch": 1.9907334391768041,
      "grad_norm": 2.734375,
      "learning_rate": 1.8690208234356464e-05,
      "loss": 0.8447,
      "step": 568010
    },
    {
      "epoch": 1.9907684866836997,
      "grad_norm": 2.890625,
      "learning_rate": 1.8689559205692762e-05,
      "loss": 0.7999,
      "step": 568020
    },
    {
      "epoch": 1.9908035341905954,
      "grad_norm": 2.859375,
      "learning_rate": 1.868891017702906e-05,
      "loss": 0.7034,
      "step": 568030
    },
    {
      "epoch": 1.990838581697491,
      "grad_norm": 2.328125,
      "learning_rate": 1.8688261148365358e-05,
      "loss": 0.7834,
      "step": 568040
    },
    {
      "epoch": 1.9908736292043865,
      "grad_norm": 2.859375,
      "learning_rate": 1.8687612119701656e-05,
      "loss": 0.8086,
      "step": 568050
    },
    {
      "epoch": 1.9909086767112822,
      "grad_norm": 4.28125,
      "learning_rate": 1.8686963091037954e-05,
      "loss": 0.8179,
      "step": 568060
    },
    {
      "epoch": 1.9909437242181778,
      "grad_norm": 2.921875,
      "learning_rate": 1.8686314062374252e-05,
      "loss": 0.7962,
      "step": 568070
    },
    {
      "epoch": 1.9909787717250733,
      "grad_norm": 2.84375,
      "learning_rate": 1.868566503371055e-05,
      "loss": 0.8592,
      "step": 568080
    },
    {
      "epoch": 1.991013819231969,
      "grad_norm": 3.21875,
      "learning_rate": 1.8685016005046848e-05,
      "loss": 0.825,
      "step": 568090
    },
    {
      "epoch": 1.9910488667388644,
      "grad_norm": 2.796875,
      "learning_rate": 1.8684366976383146e-05,
      "loss": 0.8025,
      "step": 568100
    },
    {
      "epoch": 1.9910839142457601,
      "grad_norm": 3.0625,
      "learning_rate": 1.8683717947719444e-05,
      "loss": 0.8054,
      "step": 568110
    },
    {
      "epoch": 1.9911189617526557,
      "grad_norm": 2.84375,
      "learning_rate": 1.8683068919055742e-05,
      "loss": 0.8034,
      "step": 568120
    },
    {
      "epoch": 1.9911540092595512,
      "grad_norm": 2.453125,
      "learning_rate": 1.868241989039204e-05,
      "loss": 0.8131,
      "step": 568130
    },
    {
      "epoch": 1.991189056766447,
      "grad_norm": 3.0625,
      "learning_rate": 1.8681770861728338e-05,
      "loss": 0.8593,
      "step": 568140
    },
    {
      "epoch": 1.9912241042733425,
      "grad_norm": 2.421875,
      "learning_rate": 1.8681121833064636e-05,
      "loss": 0.9196,
      "step": 568150
    },
    {
      "epoch": 1.991259151780238,
      "grad_norm": 3.03125,
      "learning_rate": 1.8680472804400934e-05,
      "loss": 0.8481,
      "step": 568160
    },
    {
      "epoch": 1.9912941992871338,
      "grad_norm": 2.90625,
      "learning_rate": 1.8679823775737232e-05,
      "loss": 0.8233,
      "step": 568170
    },
    {
      "epoch": 1.9913292467940293,
      "grad_norm": 2.84375,
      "learning_rate": 1.867917474707353e-05,
      "loss": 0.8291,
      "step": 568180
    },
    {
      "epoch": 1.9913642943009249,
      "grad_norm": 2.796875,
      "learning_rate": 1.8678525718409828e-05,
      "loss": 0.7847,
      "step": 568190
    },
    {
      "epoch": 1.9913993418078206,
      "grad_norm": 2.84375,
      "learning_rate": 1.8677876689746126e-05,
      "loss": 0.8141,
      "step": 568200
    },
    {
      "epoch": 1.991434389314716,
      "grad_norm": 2.6875,
      "learning_rate": 1.8677227661082424e-05,
      "loss": 0.8193,
      "step": 568210
    },
    {
      "epoch": 1.9914694368216117,
      "grad_norm": 2.859375,
      "learning_rate": 1.8676578632418722e-05,
      "loss": 0.8137,
      "step": 568220
    },
    {
      "epoch": 1.9915044843285072,
      "grad_norm": 2.859375,
      "learning_rate": 1.867592960375502e-05,
      "loss": 0.8066,
      "step": 568230
    },
    {
      "epoch": 1.9915395318354028,
      "grad_norm": 3.15625,
      "learning_rate": 1.8675280575091318e-05,
      "loss": 0.8439,
      "step": 568240
    },
    {
      "epoch": 1.9915745793422985,
      "grad_norm": 2.75,
      "learning_rate": 1.867463154642762e-05,
      "loss": 0.7657,
      "step": 568250
    },
    {
      "epoch": 1.991609626849194,
      "grad_norm": 3.015625,
      "learning_rate": 1.8673982517763917e-05,
      "loss": 0.8745,
      "step": 568260
    },
    {
      "epoch": 1.9916446743560896,
      "grad_norm": 2.6875,
      "learning_rate": 1.8673333489100215e-05,
      "loss": 0.8713,
      "step": 568270
    },
    {
      "epoch": 1.9916797218629854,
      "grad_norm": 2.59375,
      "learning_rate": 1.8672684460436513e-05,
      "loss": 0.8379,
      "step": 568280
    },
    {
      "epoch": 1.991714769369881,
      "grad_norm": 3.265625,
      "learning_rate": 1.867203543177281e-05,
      "loss": 0.81,
      "step": 568290
    },
    {
      "epoch": 1.9917498168767764,
      "grad_norm": 3.09375,
      "learning_rate": 1.867138640310911e-05,
      "loss": 0.9083,
      "step": 568300
    },
    {
      "epoch": 1.9917848643836722,
      "grad_norm": 2.828125,
      "learning_rate": 1.8670737374445407e-05,
      "loss": 0.8112,
      "step": 568310
    },
    {
      "epoch": 1.9918199118905675,
      "grad_norm": 2.8125,
      "learning_rate": 1.8670088345781702e-05,
      "loss": 0.8962,
      "step": 568320
    },
    {
      "epoch": 1.9918549593974633,
      "grad_norm": 3.78125,
      "learning_rate": 1.8669439317118e-05,
      "loss": 0.7926,
      "step": 568330
    },
    {
      "epoch": 1.991890006904359,
      "grad_norm": 2.34375,
      "learning_rate": 1.8668790288454298e-05,
      "loss": 0.8397,
      "step": 568340
    },
    {
      "epoch": 1.9919250544112543,
      "grad_norm": 2.703125,
      "learning_rate": 1.8668141259790596e-05,
      "loss": 0.8332,
      "step": 568350
    },
    {
      "epoch": 1.99196010191815,
      "grad_norm": 2.6875,
      "learning_rate": 1.8667492231126897e-05,
      "loss": 0.7841,
      "step": 568360
    },
    {
      "epoch": 1.9919951494250456,
      "grad_norm": 2.953125,
      "learning_rate": 1.8666843202463195e-05,
      "loss": 0.8594,
      "step": 568370
    },
    {
      "epoch": 1.9920301969319412,
      "grad_norm": 2.5625,
      "learning_rate": 1.8666194173799493e-05,
      "loss": 0.7848,
      "step": 568380
    },
    {
      "epoch": 1.992065244438837,
      "grad_norm": 3.1875,
      "learning_rate": 1.866554514513579e-05,
      "loss": 0.8465,
      "step": 568390
    },
    {
      "epoch": 1.9921002919457325,
      "grad_norm": 3.3125,
      "learning_rate": 1.866489611647209e-05,
      "loss": 0.7517,
      "step": 568400
    },
    {
      "epoch": 1.992135339452628,
      "grad_norm": 2.890625,
      "learning_rate": 1.8664247087808387e-05,
      "loss": 0.8256,
      "step": 568410
    },
    {
      "epoch": 1.9921703869595238,
      "grad_norm": 3.046875,
      "learning_rate": 1.8663598059144685e-05,
      "loss": 0.7784,
      "step": 568420
    },
    {
      "epoch": 1.992205434466419,
      "grad_norm": 2.703125,
      "learning_rate": 1.8662949030480983e-05,
      "loss": 0.8032,
      "step": 568430
    },
    {
      "epoch": 1.9922404819733148,
      "grad_norm": 2.59375,
      "learning_rate": 1.866230000181728e-05,
      "loss": 0.8238,
      "step": 568440
    },
    {
      "epoch": 1.9922755294802106,
      "grad_norm": 3.046875,
      "learning_rate": 1.866165097315358e-05,
      "loss": 0.8605,
      "step": 568450
    },
    {
      "epoch": 1.992310576987106,
      "grad_norm": 2.875,
      "learning_rate": 1.8661001944489877e-05,
      "loss": 0.7822,
      "step": 568460
    },
    {
      "epoch": 1.9923456244940017,
      "grad_norm": 2.828125,
      "learning_rate": 1.8660352915826175e-05,
      "loss": 0.8021,
      "step": 568470
    },
    {
      "epoch": 1.9923806720008972,
      "grad_norm": 2.90625,
      "learning_rate": 1.8659703887162473e-05,
      "loss": 0.7874,
      "step": 568480
    },
    {
      "epoch": 1.9924157195077927,
      "grad_norm": 2.6875,
      "learning_rate": 1.865905485849877e-05,
      "loss": 0.8691,
      "step": 568490
    },
    {
      "epoch": 1.9924507670146885,
      "grad_norm": 2.8125,
      "learning_rate": 1.865840582983507e-05,
      "loss": 0.8584,
      "step": 568500
    },
    {
      "epoch": 1.992485814521584,
      "grad_norm": 2.71875,
      "learning_rate": 1.8657756801171367e-05,
      "loss": 0.7642,
      "step": 568510
    },
    {
      "epoch": 1.9925208620284796,
      "grad_norm": 3.234375,
      "learning_rate": 1.8657107772507665e-05,
      "loss": 0.8079,
      "step": 568520
    },
    {
      "epoch": 1.9925559095353753,
      "grad_norm": 2.90625,
      "learning_rate": 1.8656458743843963e-05,
      "loss": 0.766,
      "step": 568530
    },
    {
      "epoch": 1.9925909570422708,
      "grad_norm": 3.171875,
      "learning_rate": 1.865580971518026e-05,
      "loss": 0.8641,
      "step": 568540
    },
    {
      "epoch": 1.9926260045491664,
      "grad_norm": 2.625,
      "learning_rate": 1.865516068651656e-05,
      "loss": 0.7525,
      "step": 568550
    },
    {
      "epoch": 1.9926610520560621,
      "grad_norm": 2.609375,
      "learning_rate": 1.8654511657852857e-05,
      "loss": 0.7778,
      "step": 568560
    },
    {
      "epoch": 1.9926960995629575,
      "grad_norm": 2.9375,
      "learning_rate": 1.8653862629189155e-05,
      "loss": 0.8406,
      "step": 568570
    },
    {
      "epoch": 1.9927311470698532,
      "grad_norm": 2.859375,
      "learning_rate": 1.8653213600525453e-05,
      "loss": 0.8361,
      "step": 568580
    },
    {
      "epoch": 1.9927661945767488,
      "grad_norm": 3.25,
      "learning_rate": 1.865256457186175e-05,
      "loss": 0.8118,
      "step": 568590
    },
    {
      "epoch": 1.9928012420836443,
      "grad_norm": 2.84375,
      "learning_rate": 1.865191554319805e-05,
      "loss": 0.8016,
      "step": 568600
    },
    {
      "epoch": 1.99283628959054,
      "grad_norm": 2.953125,
      "learning_rate": 1.865126651453435e-05,
      "loss": 0.8675,
      "step": 568610
    },
    {
      "epoch": 1.9928713370974356,
      "grad_norm": 2.890625,
      "learning_rate": 1.865061748587065e-05,
      "loss": 0.7502,
      "step": 568620
    },
    {
      "epoch": 1.9929063846043311,
      "grad_norm": 2.671875,
      "learning_rate": 1.8649968457206947e-05,
      "loss": 0.7812,
      "step": 568630
    },
    {
      "epoch": 1.9929414321112269,
      "grad_norm": 3.3125,
      "learning_rate": 1.8649319428543245e-05,
      "loss": 0.8115,
      "step": 568640
    },
    {
      "epoch": 1.9929764796181224,
      "grad_norm": 2.71875,
      "learning_rate": 1.8648670399879543e-05,
      "loss": 0.8098,
      "step": 568650
    },
    {
      "epoch": 1.993011527125018,
      "grad_norm": 3.15625,
      "learning_rate": 1.864802137121584e-05,
      "loss": 0.7877,
      "step": 568660
    },
    {
      "epoch": 1.9930465746319137,
      "grad_norm": 3.015625,
      "learning_rate": 1.864737234255214e-05,
      "loss": 0.8643,
      "step": 568670
    },
    {
      "epoch": 1.993081622138809,
      "grad_norm": 3.484375,
      "learning_rate": 1.8646723313888437e-05,
      "loss": 0.8808,
      "step": 568680
    },
    {
      "epoch": 1.9931166696457048,
      "grad_norm": 2.453125,
      "learning_rate": 1.864607428522473e-05,
      "loss": 0.8218,
      "step": 568690
    },
    {
      "epoch": 1.9931517171526003,
      "grad_norm": 3.09375,
      "learning_rate": 1.864542525656103e-05,
      "loss": 0.8088,
      "step": 568700
    },
    {
      "epoch": 1.9931867646594958,
      "grad_norm": 3.0625,
      "learning_rate": 1.8644776227897327e-05,
      "loss": 0.7539,
      "step": 568710
    },
    {
      "epoch": 1.9932218121663916,
      "grad_norm": 2.8125,
      "learning_rate": 1.8644127199233625e-05,
      "loss": 0.8356,
      "step": 568720
    },
    {
      "epoch": 1.9932568596732871,
      "grad_norm": 3.0,
      "learning_rate": 1.8643478170569927e-05,
      "loss": 0.8108,
      "step": 568730
    },
    {
      "epoch": 1.9932919071801827,
      "grad_norm": 2.90625,
      "learning_rate": 1.8642829141906225e-05,
      "loss": 0.8494,
      "step": 568740
    },
    {
      "epoch": 1.9933269546870784,
      "grad_norm": 2.609375,
      "learning_rate": 1.8642180113242523e-05,
      "loss": 0.8556,
      "step": 568750
    },
    {
      "epoch": 1.993362002193974,
      "grad_norm": 2.875,
      "learning_rate": 1.864153108457882e-05,
      "loss": 0.8284,
      "step": 568760
    },
    {
      "epoch": 1.9933970497008695,
      "grad_norm": 2.484375,
      "learning_rate": 1.864088205591512e-05,
      "loss": 0.8067,
      "step": 568770
    },
    {
      "epoch": 1.9934320972077653,
      "grad_norm": 3.078125,
      "learning_rate": 1.8640233027251417e-05,
      "loss": 0.7819,
      "step": 568780
    },
    {
      "epoch": 1.9934671447146606,
      "grad_norm": 2.359375,
      "learning_rate": 1.8639583998587715e-05,
      "loss": 0.8079,
      "step": 568790
    },
    {
      "epoch": 1.9935021922215563,
      "grad_norm": 2.8125,
      "learning_rate": 1.8638934969924013e-05,
      "loss": 0.8047,
      "step": 568800
    },
    {
      "epoch": 1.9935372397284519,
      "grad_norm": 2.765625,
      "learning_rate": 1.863828594126031e-05,
      "loss": 0.8336,
      "step": 568810
    },
    {
      "epoch": 1.9935722872353474,
      "grad_norm": 2.84375,
      "learning_rate": 1.863763691259661e-05,
      "loss": 0.7805,
      "step": 568820
    },
    {
      "epoch": 1.9936073347422432,
      "grad_norm": 3.125,
      "learning_rate": 1.8636987883932907e-05,
      "loss": 0.7948,
      "step": 568830
    },
    {
      "epoch": 1.9936423822491387,
      "grad_norm": 2.984375,
      "learning_rate": 1.8636338855269205e-05,
      "loss": 0.8071,
      "step": 568840
    },
    {
      "epoch": 1.9936774297560342,
      "grad_norm": 2.890625,
      "learning_rate": 1.8635689826605503e-05,
      "loss": 0.7838,
      "step": 568850
    },
    {
      "epoch": 1.99371247726293,
      "grad_norm": 3.453125,
      "learning_rate": 1.8635040797941804e-05,
      "loss": 0.9502,
      "step": 568860
    },
    {
      "epoch": 1.9937475247698255,
      "grad_norm": 3.078125,
      "learning_rate": 1.8634391769278102e-05,
      "loss": 0.8421,
      "step": 568870
    },
    {
      "epoch": 1.993782572276721,
      "grad_norm": 2.703125,
      "learning_rate": 1.8633742740614397e-05,
      "loss": 0.885,
      "step": 568880
    },
    {
      "epoch": 1.9938176197836168,
      "grad_norm": 2.875,
      "learning_rate": 1.8633093711950695e-05,
      "loss": 0.8489,
      "step": 568890
    },
    {
      "epoch": 1.9938526672905121,
      "grad_norm": 3.125,
      "learning_rate": 1.8632444683286993e-05,
      "loss": 0.7903,
      "step": 568900
    },
    {
      "epoch": 1.993887714797408,
      "grad_norm": 2.796875,
      "learning_rate": 1.863179565462329e-05,
      "loss": 0.7537,
      "step": 568910
    },
    {
      "epoch": 1.9939227623043034,
      "grad_norm": 2.859375,
      "learning_rate": 1.863114662595959e-05,
      "loss": 0.7611,
      "step": 568920
    },
    {
      "epoch": 1.993957809811199,
      "grad_norm": 3.453125,
      "learning_rate": 1.8630497597295887e-05,
      "loss": 0.8521,
      "step": 568930
    },
    {
      "epoch": 1.9939928573180947,
      "grad_norm": 2.875,
      "learning_rate": 1.8629848568632185e-05,
      "loss": 0.9079,
      "step": 568940
    },
    {
      "epoch": 1.9940279048249903,
      "grad_norm": 2.65625,
      "learning_rate": 1.8629199539968483e-05,
      "loss": 0.7792,
      "step": 568950
    },
    {
      "epoch": 1.9940629523318858,
      "grad_norm": 2.84375,
      "learning_rate": 1.862855051130478e-05,
      "loss": 0.8291,
      "step": 568960
    },
    {
      "epoch": 1.9940979998387816,
      "grad_norm": 3.125,
      "learning_rate": 1.862790148264108e-05,
      "loss": 0.7967,
      "step": 568970
    },
    {
      "epoch": 1.994133047345677,
      "grad_norm": 3.0625,
      "learning_rate": 1.862725245397738e-05,
      "loss": 0.7227,
      "step": 568980
    },
    {
      "epoch": 1.9941680948525726,
      "grad_norm": 3.03125,
      "learning_rate": 1.8626603425313678e-05,
      "loss": 0.8569,
      "step": 568990
    },
    {
      "epoch": 1.9942031423594684,
      "grad_norm": 2.546875,
      "learning_rate": 1.8625954396649976e-05,
      "loss": 0.7791,
      "step": 569000
    },
    {
      "epoch": 1.9942381898663637,
      "grad_norm": 3.5,
      "learning_rate": 1.8625305367986274e-05,
      "loss": 0.7891,
      "step": 569010
    },
    {
      "epoch": 1.9942732373732595,
      "grad_norm": 2.734375,
      "learning_rate": 1.8624656339322572e-05,
      "loss": 0.7585,
      "step": 569020
    },
    {
      "epoch": 1.9943082848801552,
      "grad_norm": 2.9375,
      "learning_rate": 1.862400731065887e-05,
      "loss": 0.8189,
      "step": 569030
    },
    {
      "epoch": 1.9943433323870505,
      "grad_norm": 2.90625,
      "learning_rate": 1.8623358281995168e-05,
      "loss": 0.8119,
      "step": 569040
    },
    {
      "epoch": 1.9943783798939463,
      "grad_norm": 3.0625,
      "learning_rate": 1.8622709253331466e-05,
      "loss": 0.8469,
      "step": 569050
    },
    {
      "epoch": 1.9944134274008418,
      "grad_norm": 2.546875,
      "learning_rate": 1.8622060224667764e-05,
      "loss": 0.8733,
      "step": 569060
    },
    {
      "epoch": 1.9944484749077374,
      "grad_norm": 2.859375,
      "learning_rate": 1.862141119600406e-05,
      "loss": 0.7799,
      "step": 569070
    },
    {
      "epoch": 1.9944835224146331,
      "grad_norm": 2.609375,
      "learning_rate": 1.8620762167340357e-05,
      "loss": 0.8074,
      "step": 569080
    },
    {
      "epoch": 1.9945185699215287,
      "grad_norm": 2.46875,
      "learning_rate": 1.8620113138676658e-05,
      "loss": 0.8548,
      "step": 569090
    },
    {
      "epoch": 1.9945536174284242,
      "grad_norm": 3.3125,
      "learning_rate": 1.8619464110012956e-05,
      "loss": 0.9081,
      "step": 569100
    },
    {
      "epoch": 1.99458866493532,
      "grad_norm": 3.125,
      "learning_rate": 1.8618815081349254e-05,
      "loss": 0.8564,
      "step": 569110
    },
    {
      "epoch": 1.9946237124422153,
      "grad_norm": 3.46875,
      "learning_rate": 1.8618166052685552e-05,
      "loss": 0.8201,
      "step": 569120
    },
    {
      "epoch": 1.994658759949111,
      "grad_norm": 3.03125,
      "learning_rate": 1.861751702402185e-05,
      "loss": 0.8786,
      "step": 569130
    },
    {
      "epoch": 1.9946938074560068,
      "grad_norm": 3.0625,
      "learning_rate": 1.8616867995358148e-05,
      "loss": 0.7376,
      "step": 569140
    },
    {
      "epoch": 1.994728854962902,
      "grad_norm": 3.078125,
      "learning_rate": 1.8616218966694446e-05,
      "loss": 0.884,
      "step": 569150
    },
    {
      "epoch": 1.9947639024697978,
      "grad_norm": 2.875,
      "learning_rate": 1.8615569938030744e-05,
      "loss": 0.8592,
      "step": 569160
    },
    {
      "epoch": 1.9947989499766934,
      "grad_norm": 3.171875,
      "learning_rate": 1.8614920909367042e-05,
      "loss": 0.8398,
      "step": 569170
    },
    {
      "epoch": 1.994833997483589,
      "grad_norm": 3.109375,
      "learning_rate": 1.861427188070334e-05,
      "loss": 0.7518,
      "step": 569180
    },
    {
      "epoch": 1.9948690449904847,
      "grad_norm": 2.5625,
      "learning_rate": 1.8613622852039638e-05,
      "loss": 0.812,
      "step": 569190
    },
    {
      "epoch": 1.9949040924973802,
      "grad_norm": 3.046875,
      "learning_rate": 1.8612973823375936e-05,
      "loss": 0.8553,
      "step": 569200
    },
    {
      "epoch": 1.9949391400042757,
      "grad_norm": 2.703125,
      "learning_rate": 1.8612324794712234e-05,
      "loss": 0.7831,
      "step": 569210
    },
    {
      "epoch": 1.9949741875111715,
      "grad_norm": 3.09375,
      "learning_rate": 1.8611675766048532e-05,
      "loss": 0.7761,
      "step": 569220
    },
    {
      "epoch": 1.995009235018067,
      "grad_norm": 2.84375,
      "learning_rate": 1.8611026737384833e-05,
      "loss": 0.789,
      "step": 569230
    },
    {
      "epoch": 1.9950442825249626,
      "grad_norm": 2.65625,
      "learning_rate": 1.861037770872113e-05,
      "loss": 0.7676,
      "step": 569240
    },
    {
      "epoch": 1.9950793300318583,
      "grad_norm": 3.140625,
      "learning_rate": 1.860972868005743e-05,
      "loss": 0.7548,
      "step": 569250
    },
    {
      "epoch": 1.9951143775387536,
      "grad_norm": 3.03125,
      "learning_rate": 1.8609079651393724e-05,
      "loss": 0.7664,
      "step": 569260
    },
    {
      "epoch": 1.9951494250456494,
      "grad_norm": 3.0,
      "learning_rate": 1.8608430622730022e-05,
      "loss": 0.8434,
      "step": 569270
    },
    {
      "epoch": 1.995184472552545,
      "grad_norm": 3.0,
      "learning_rate": 1.860778159406632e-05,
      "loss": 0.8411,
      "step": 569280
    },
    {
      "epoch": 1.9952195200594405,
      "grad_norm": 2.71875,
      "learning_rate": 1.8607132565402618e-05,
      "loss": 0.884,
      "step": 569290
    },
    {
      "epoch": 1.9952545675663362,
      "grad_norm": 2.859375,
      "learning_rate": 1.8606483536738916e-05,
      "loss": 0.8367,
      "step": 569300
    },
    {
      "epoch": 1.9952896150732318,
      "grad_norm": 2.765625,
      "learning_rate": 1.8605834508075214e-05,
      "loss": 0.8257,
      "step": 569310
    },
    {
      "epoch": 1.9953246625801273,
      "grad_norm": 2.609375,
      "learning_rate": 1.8605185479411512e-05,
      "loss": 0.9098,
      "step": 569320
    },
    {
      "epoch": 1.995359710087023,
      "grad_norm": 3.015625,
      "learning_rate": 1.860453645074781e-05,
      "loss": 0.8112,
      "step": 569330
    },
    {
      "epoch": 1.9953947575939186,
      "grad_norm": 2.875,
      "learning_rate": 1.860388742208411e-05,
      "loss": 0.8009,
      "step": 569340
    },
    {
      "epoch": 1.9954298051008141,
      "grad_norm": 2.546875,
      "learning_rate": 1.860323839342041e-05,
      "loss": 0.7931,
      "step": 569350
    },
    {
      "epoch": 1.99546485260771,
      "grad_norm": 2.84375,
      "learning_rate": 1.8602589364756707e-05,
      "loss": 0.8835,
      "step": 569360
    },
    {
      "epoch": 1.9954999001146052,
      "grad_norm": 3.0,
      "learning_rate": 1.8601940336093005e-05,
      "loss": 0.8486,
      "step": 569370
    },
    {
      "epoch": 1.995534947621501,
      "grad_norm": 3.0625,
      "learning_rate": 1.8601291307429303e-05,
      "loss": 0.8577,
      "step": 569380
    },
    {
      "epoch": 1.9955699951283965,
      "grad_norm": 3.328125,
      "learning_rate": 1.86006422787656e-05,
      "loss": 0.8685,
      "step": 569390
    },
    {
      "epoch": 1.995605042635292,
      "grad_norm": 2.734375,
      "learning_rate": 1.85999932501019e-05,
      "loss": 0.8858,
      "step": 569400
    },
    {
      "epoch": 1.9956400901421878,
      "grad_norm": 2.875,
      "learning_rate": 1.8599344221438197e-05,
      "loss": 0.8144,
      "step": 569410
    },
    {
      "epoch": 1.9956751376490833,
      "grad_norm": 3.328125,
      "learning_rate": 1.8598695192774495e-05,
      "loss": 0.8315,
      "step": 569420
    },
    {
      "epoch": 1.9957101851559789,
      "grad_norm": 2.875,
      "learning_rate": 1.8598046164110793e-05,
      "loss": 0.8435,
      "step": 569430
    },
    {
      "epoch": 1.9957452326628746,
      "grad_norm": 2.6875,
      "learning_rate": 1.8597397135447088e-05,
      "loss": 0.8016,
      "step": 569440
    },
    {
      "epoch": 1.9957802801697702,
      "grad_norm": 2.90625,
      "learning_rate": 1.8596748106783386e-05,
      "loss": 0.8063,
      "step": 569450
    },
    {
      "epoch": 1.9958153276766657,
      "grad_norm": 2.921875,
      "learning_rate": 1.8596099078119687e-05,
      "loss": 0.896,
      "step": 569460
    },
    {
      "epoch": 1.9958503751835615,
      "grad_norm": 2.6875,
      "learning_rate": 1.8595450049455985e-05,
      "loss": 0.7964,
      "step": 569470
    },
    {
      "epoch": 1.9958854226904568,
      "grad_norm": 2.828125,
      "learning_rate": 1.8594801020792283e-05,
      "loss": 0.829,
      "step": 569480
    },
    {
      "epoch": 1.9959204701973525,
      "grad_norm": 3.0,
      "learning_rate": 1.859415199212858e-05,
      "loss": 0.7257,
      "step": 569490
    },
    {
      "epoch": 1.995955517704248,
      "grad_norm": 2.6875,
      "learning_rate": 1.859350296346488e-05,
      "loss": 0.7847,
      "step": 569500
    },
    {
      "epoch": 1.9959905652111436,
      "grad_norm": 3.125,
      "learning_rate": 1.8592853934801177e-05,
      "loss": 0.8937,
      "step": 569510
    },
    {
      "epoch": 1.9960256127180394,
      "grad_norm": 2.765625,
      "learning_rate": 1.8592204906137475e-05,
      "loss": 0.785,
      "step": 569520
    },
    {
      "epoch": 1.996060660224935,
      "grad_norm": 3.0625,
      "learning_rate": 1.8591555877473773e-05,
      "loss": 0.7875,
      "step": 569530
    },
    {
      "epoch": 1.9960957077318304,
      "grad_norm": 2.796875,
      "learning_rate": 1.859090684881007e-05,
      "loss": 0.7906,
      "step": 569540
    },
    {
      "epoch": 1.9961307552387262,
      "grad_norm": 3.234375,
      "learning_rate": 1.859025782014637e-05,
      "loss": 0.8209,
      "step": 569550
    },
    {
      "epoch": 1.9961658027456217,
      "grad_norm": 3.125,
      "learning_rate": 1.8589608791482667e-05,
      "loss": 0.7987,
      "step": 569560
    },
    {
      "epoch": 1.9962008502525173,
      "grad_norm": 3.03125,
      "learning_rate": 1.8588959762818965e-05,
      "loss": 0.8649,
      "step": 569570
    },
    {
      "epoch": 1.996235897759413,
      "grad_norm": 2.96875,
      "learning_rate": 1.8588310734155263e-05,
      "loss": 0.8462,
      "step": 569580
    },
    {
      "epoch": 1.9962709452663083,
      "grad_norm": 3.5,
      "learning_rate": 1.858766170549156e-05,
      "loss": 0.7818,
      "step": 569590
    },
    {
      "epoch": 1.996305992773204,
      "grad_norm": 2.71875,
      "learning_rate": 1.8587012676827863e-05,
      "loss": 0.8459,
      "step": 569600
    },
    {
      "epoch": 1.9963410402800996,
      "grad_norm": 2.859375,
      "learning_rate": 1.858636364816416e-05,
      "loss": 0.8749,
      "step": 569610
    },
    {
      "epoch": 1.9963760877869952,
      "grad_norm": 2.46875,
      "learning_rate": 1.858571461950046e-05,
      "loss": 0.8377,
      "step": 569620
    },
    {
      "epoch": 1.996411135293891,
      "grad_norm": 2.859375,
      "learning_rate": 1.8585065590836753e-05,
      "loss": 0.7764,
      "step": 569630
    },
    {
      "epoch": 1.9964461828007865,
      "grad_norm": 2.640625,
      "learning_rate": 1.858441656217305e-05,
      "loss": 0.8056,
      "step": 569640
    },
    {
      "epoch": 1.996481230307682,
      "grad_norm": 2.234375,
      "learning_rate": 1.858376753350935e-05,
      "loss": 0.7107,
      "step": 569650
    },
    {
      "epoch": 1.9965162778145777,
      "grad_norm": 2.828125,
      "learning_rate": 1.8583118504845647e-05,
      "loss": 0.8225,
      "step": 569660
    },
    {
      "epoch": 1.9965513253214733,
      "grad_norm": 3.015625,
      "learning_rate": 1.8582469476181945e-05,
      "loss": 0.8466,
      "step": 569670
    },
    {
      "epoch": 1.9965863728283688,
      "grad_norm": 3.0625,
      "learning_rate": 1.8581820447518243e-05,
      "loss": 0.8313,
      "step": 569680
    },
    {
      "epoch": 1.9966214203352646,
      "grad_norm": 2.984375,
      "learning_rate": 1.858117141885454e-05,
      "loss": 0.8593,
      "step": 569690
    },
    {
      "epoch": 1.9966564678421599,
      "grad_norm": 3.109375,
      "learning_rate": 1.858052239019084e-05,
      "loss": 0.7862,
      "step": 569700
    },
    {
      "epoch": 1.9966915153490556,
      "grad_norm": 2.796875,
      "learning_rate": 1.857987336152714e-05,
      "loss": 0.8437,
      "step": 569710
    },
    {
      "epoch": 1.9967265628559514,
      "grad_norm": 3.203125,
      "learning_rate": 1.857922433286344e-05,
      "loss": 0.8251,
      "step": 569720
    },
    {
      "epoch": 1.9967616103628467,
      "grad_norm": 3.03125,
      "learning_rate": 1.8578575304199737e-05,
      "loss": 0.7905,
      "step": 569730
    },
    {
      "epoch": 1.9967966578697425,
      "grad_norm": 2.859375,
      "learning_rate": 1.8577926275536035e-05,
      "loss": 0.7783,
      "step": 569740
    },
    {
      "epoch": 1.996831705376638,
      "grad_norm": 2.796875,
      "learning_rate": 1.8577277246872333e-05,
      "loss": 0.8578,
      "step": 569750
    },
    {
      "epoch": 1.9968667528835335,
      "grad_norm": 2.875,
      "learning_rate": 1.857662821820863e-05,
      "loss": 0.783,
      "step": 569760
    },
    {
      "epoch": 1.9969018003904293,
      "grad_norm": 3.25,
      "learning_rate": 1.857597918954493e-05,
      "loss": 0.7803,
      "step": 569770
    },
    {
      "epoch": 1.9969368478973248,
      "grad_norm": 3.0625,
      "learning_rate": 1.8575330160881227e-05,
      "loss": 0.7905,
      "step": 569780
    },
    {
      "epoch": 1.9969718954042204,
      "grad_norm": 2.71875,
      "learning_rate": 1.8574681132217525e-05,
      "loss": 0.8713,
      "step": 569790
    },
    {
      "epoch": 1.9970069429111161,
      "grad_norm": 3.140625,
      "learning_rate": 1.8574032103553823e-05,
      "loss": 0.8629,
      "step": 569800
    },
    {
      "epoch": 1.9970419904180117,
      "grad_norm": 3.21875,
      "learning_rate": 1.857338307489012e-05,
      "loss": 0.9078,
      "step": 569810
    },
    {
      "epoch": 1.9970770379249072,
      "grad_norm": 3.3125,
      "learning_rate": 1.8572734046226415e-05,
      "loss": 0.7463,
      "step": 569820
    },
    {
      "epoch": 1.997112085431803,
      "grad_norm": 2.953125,
      "learning_rate": 1.8572085017562717e-05,
      "loss": 0.8157,
      "step": 569830
    },
    {
      "epoch": 1.9971471329386983,
      "grad_norm": 2.859375,
      "learning_rate": 1.8571435988899015e-05,
      "loss": 0.7456,
      "step": 569840
    },
    {
      "epoch": 1.997182180445594,
      "grad_norm": 2.625,
      "learning_rate": 1.8570786960235313e-05,
      "loss": 0.8676,
      "step": 569850
    },
    {
      "epoch": 1.9972172279524896,
      "grad_norm": 2.484375,
      "learning_rate": 1.857013793157161e-05,
      "loss": 0.8206,
      "step": 569860
    },
    {
      "epoch": 1.997252275459385,
      "grad_norm": 3.4375,
      "learning_rate": 1.856948890290791e-05,
      "loss": 0.8849,
      "step": 569870
    },
    {
      "epoch": 1.9972873229662809,
      "grad_norm": 2.828125,
      "learning_rate": 1.8568839874244207e-05,
      "loss": 0.8105,
      "step": 569880
    },
    {
      "epoch": 1.9973223704731764,
      "grad_norm": 3.203125,
      "learning_rate": 1.8568190845580505e-05,
      "loss": 0.8157,
      "step": 569890
    },
    {
      "epoch": 1.997357417980072,
      "grad_norm": 3.0,
      "learning_rate": 1.8567541816916803e-05,
      "loss": 0.881,
      "step": 569900
    },
    {
      "epoch": 1.9973924654869677,
      "grad_norm": 2.546875,
      "learning_rate": 1.85668927882531e-05,
      "loss": 0.7717,
      "step": 569910
    },
    {
      "epoch": 1.9974275129938632,
      "grad_norm": 2.671875,
      "learning_rate": 1.85662437595894e-05,
      "loss": 0.8457,
      "step": 569920
    },
    {
      "epoch": 1.9974625605007588,
      "grad_norm": 2.609375,
      "learning_rate": 1.8565594730925697e-05,
      "loss": 0.8449,
      "step": 569930
    },
    {
      "epoch": 1.9974976080076545,
      "grad_norm": 2.78125,
      "learning_rate": 1.8564945702261995e-05,
      "loss": 0.8508,
      "step": 569940
    },
    {
      "epoch": 1.9975326555145498,
      "grad_norm": 2.5625,
      "learning_rate": 1.8564296673598293e-05,
      "loss": 0.8183,
      "step": 569950
    },
    {
      "epoch": 1.9975677030214456,
      "grad_norm": 2.8125,
      "learning_rate": 1.8563647644934594e-05,
      "loss": 0.9042,
      "step": 569960
    },
    {
      "epoch": 1.9976027505283411,
      "grad_norm": 2.78125,
      "learning_rate": 1.8562998616270892e-05,
      "loss": 0.7834,
      "step": 569970
    },
    {
      "epoch": 1.9976377980352367,
      "grad_norm": 3.21875,
      "learning_rate": 1.856234958760719e-05,
      "loss": 0.8879,
      "step": 569980
    },
    {
      "epoch": 1.9976728455421324,
      "grad_norm": 2.46875,
      "learning_rate": 1.8561700558943488e-05,
      "loss": 0.7792,
      "step": 569990
    },
    {
      "epoch": 1.997707893049028,
      "grad_norm": 2.890625,
      "learning_rate": 1.8561051530279786e-05,
      "loss": 0.8291,
      "step": 570000
    },
    {
      "epoch": 1.997707893049028,
      "eval_loss": 0.7716218829154968,
      "eval_runtime": 566.6179,
      "eval_samples_per_second": 671.415,
      "eval_steps_per_second": 55.951,
      "step": 570000
    },
    {
      "epoch": 1.9977429405559235,
      "grad_norm": 2.609375,
      "learning_rate": 1.856040250161608e-05,
      "loss": 0.8007,
      "step": 570010
    },
    {
      "epoch": 1.9977779880628193,
      "grad_norm": 3.015625,
      "learning_rate": 1.855975347295238e-05,
      "loss": 0.7976,
      "step": 570020
    },
    {
      "epoch": 1.9978130355697148,
      "grad_norm": 2.59375,
      "learning_rate": 1.8559104444288677e-05,
      "loss": 0.7662,
      "step": 570030
    },
    {
      "epoch": 1.9978480830766103,
      "grad_norm": 3.078125,
      "learning_rate": 1.8558455415624975e-05,
      "loss": 0.8005,
      "step": 570040
    },
    {
      "epoch": 1.997883130583506,
      "grad_norm": 3.0625,
      "learning_rate": 1.8557806386961273e-05,
      "loss": 0.7771,
      "step": 570050
    },
    {
      "epoch": 1.9979181780904014,
      "grad_norm": 2.671875,
      "learning_rate": 1.855715735829757e-05,
      "loss": 0.7266,
      "step": 570060
    },
    {
      "epoch": 1.9979532255972972,
      "grad_norm": 3.015625,
      "learning_rate": 1.855650832963387e-05,
      "loss": 0.9133,
      "step": 570070
    },
    {
      "epoch": 1.9979882731041927,
      "grad_norm": 2.875,
      "learning_rate": 1.855585930097017e-05,
      "loss": 0.8818,
      "step": 570080
    },
    {
      "epoch": 1.9980233206110882,
      "grad_norm": 2.65625,
      "learning_rate": 1.8555210272306468e-05,
      "loss": 0.895,
      "step": 570090
    },
    {
      "epoch": 1.998058368117984,
      "grad_norm": 3.21875,
      "learning_rate": 1.8554561243642766e-05,
      "loss": 0.8589,
      "step": 570100
    },
    {
      "epoch": 1.9980934156248795,
      "grad_norm": 2.859375,
      "learning_rate": 1.8553912214979064e-05,
      "loss": 0.7721,
      "step": 570110
    },
    {
      "epoch": 1.998128463131775,
      "grad_norm": 3.15625,
      "learning_rate": 1.8553263186315362e-05,
      "loss": 0.8058,
      "step": 570120
    },
    {
      "epoch": 1.9981635106386708,
      "grad_norm": 2.875,
      "learning_rate": 1.855261415765166e-05,
      "loss": 0.765,
      "step": 570130
    },
    {
      "epoch": 1.9981985581455664,
      "grad_norm": 2.75,
      "learning_rate": 1.8551965128987958e-05,
      "loss": 0.7795,
      "step": 570140
    },
    {
      "epoch": 1.9982336056524619,
      "grad_norm": 2.75,
      "learning_rate": 1.8551316100324256e-05,
      "loss": 0.7795,
      "step": 570150
    },
    {
      "epoch": 1.9982686531593576,
      "grad_norm": 3.546875,
      "learning_rate": 1.8550667071660554e-05,
      "loss": 0.8943,
      "step": 570160
    },
    {
      "epoch": 1.998303700666253,
      "grad_norm": 2.828125,
      "learning_rate": 1.8550018042996852e-05,
      "loss": 0.9171,
      "step": 570170
    },
    {
      "epoch": 1.9983387481731487,
      "grad_norm": 2.828125,
      "learning_rate": 1.854936901433315e-05,
      "loss": 0.7719,
      "step": 570180
    },
    {
      "epoch": 1.9983737956800443,
      "grad_norm": 2.984375,
      "learning_rate": 1.8548719985669448e-05,
      "loss": 0.8265,
      "step": 570190
    },
    {
      "epoch": 1.9984088431869398,
      "grad_norm": 2.65625,
      "learning_rate": 1.8548070957005746e-05,
      "loss": 0.7888,
      "step": 570200
    },
    {
      "epoch": 1.9984438906938355,
      "grad_norm": 2.8125,
      "learning_rate": 1.8547421928342044e-05,
      "loss": 0.8111,
      "step": 570210
    },
    {
      "epoch": 1.998478938200731,
      "grad_norm": 3.359375,
      "learning_rate": 1.8546772899678342e-05,
      "loss": 0.7736,
      "step": 570220
    },
    {
      "epoch": 1.9985139857076266,
      "grad_norm": 2.890625,
      "learning_rate": 1.854612387101464e-05,
      "loss": 0.8013,
      "step": 570230
    },
    {
      "epoch": 1.9985490332145224,
      "grad_norm": 2.921875,
      "learning_rate": 1.8545474842350938e-05,
      "loss": 0.8473,
      "step": 570240
    },
    {
      "epoch": 1.998584080721418,
      "grad_norm": 2.765625,
      "learning_rate": 1.8544825813687236e-05,
      "loss": 0.8323,
      "step": 570250
    },
    {
      "epoch": 1.9986191282283134,
      "grad_norm": 3.15625,
      "learning_rate": 1.8544176785023534e-05,
      "loss": 0.8166,
      "step": 570260
    },
    {
      "epoch": 1.9986541757352092,
      "grad_norm": 2.796875,
      "learning_rate": 1.8543527756359832e-05,
      "loss": 0.8357,
      "step": 570270
    },
    {
      "epoch": 1.9986892232421045,
      "grad_norm": 3.15625,
      "learning_rate": 1.854287872769613e-05,
      "loss": 0.8515,
      "step": 570280
    },
    {
      "epoch": 1.9987242707490003,
      "grad_norm": 2.625,
      "learning_rate": 1.8542229699032428e-05,
      "loss": 0.8239,
      "step": 570290
    },
    {
      "epoch": 1.9987593182558958,
      "grad_norm": 2.78125,
      "learning_rate": 1.8541580670368726e-05,
      "loss": 0.819,
      "step": 570300
    },
    {
      "epoch": 1.9987943657627913,
      "grad_norm": 3.546875,
      "learning_rate": 1.8540931641705024e-05,
      "loss": 0.8081,
      "step": 570310
    },
    {
      "epoch": 1.998829413269687,
      "grad_norm": 2.8125,
      "learning_rate": 1.8540282613041322e-05,
      "loss": 0.8409,
      "step": 570320
    },
    {
      "epoch": 1.9988644607765826,
      "grad_norm": 3.515625,
      "learning_rate": 1.8539633584377624e-05,
      "loss": 0.8456,
      "step": 570330
    },
    {
      "epoch": 1.9988995082834782,
      "grad_norm": 3.171875,
      "learning_rate": 1.853898455571392e-05,
      "loss": 0.7505,
      "step": 570340
    },
    {
      "epoch": 1.998934555790374,
      "grad_norm": 3.265625,
      "learning_rate": 1.853833552705022e-05,
      "loss": 0.8279,
      "step": 570350
    },
    {
      "epoch": 1.9989696032972695,
      "grad_norm": 2.75,
      "learning_rate": 1.8537686498386518e-05,
      "loss": 0.8493,
      "step": 570360
    },
    {
      "epoch": 1.999004650804165,
      "grad_norm": 2.765625,
      "learning_rate": 1.8537037469722816e-05,
      "loss": 0.8399,
      "step": 570370
    },
    {
      "epoch": 1.9990396983110608,
      "grad_norm": 2.8125,
      "learning_rate": 1.8536388441059114e-05,
      "loss": 0.8125,
      "step": 570380
    },
    {
      "epoch": 1.999074745817956,
      "grad_norm": 3.078125,
      "learning_rate": 1.8535739412395408e-05,
      "loss": 0.8078,
      "step": 570390
    },
    {
      "epoch": 1.9991097933248518,
      "grad_norm": 3.203125,
      "learning_rate": 1.8535090383731706e-05,
      "loss": 0.9015,
      "step": 570400
    },
    {
      "epoch": 1.9991448408317476,
      "grad_norm": 2.5625,
      "learning_rate": 1.8534441355068004e-05,
      "loss": 0.7777,
      "step": 570410
    },
    {
      "epoch": 1.999179888338643,
      "grad_norm": 2.96875,
      "learning_rate": 1.8533792326404302e-05,
      "loss": 0.8057,
      "step": 570420
    },
    {
      "epoch": 1.9992149358455387,
      "grad_norm": 3.484375,
      "learning_rate": 1.85331432977406e-05,
      "loss": 0.7869,
      "step": 570430
    },
    {
      "epoch": 1.9992499833524342,
      "grad_norm": 3.0,
      "learning_rate": 1.85324942690769e-05,
      "loss": 0.812,
      "step": 570440
    },
    {
      "epoch": 1.9992850308593297,
      "grad_norm": 2.671875,
      "learning_rate": 1.85318452404132e-05,
      "loss": 0.8233,
      "step": 570450
    },
    {
      "epoch": 1.9993200783662255,
      "grad_norm": 3.078125,
      "learning_rate": 1.8531196211749498e-05,
      "loss": 0.8215,
      "step": 570460
    },
    {
      "epoch": 1.999355125873121,
      "grad_norm": 2.90625,
      "learning_rate": 1.8530547183085796e-05,
      "loss": 0.7872,
      "step": 570470
    },
    {
      "epoch": 1.9993901733800166,
      "grad_norm": 2.734375,
      "learning_rate": 1.8529898154422094e-05,
      "loss": 0.8438,
      "step": 570480
    },
    {
      "epoch": 1.9994252208869123,
      "grad_norm": 2.671875,
      "learning_rate": 1.852924912575839e-05,
      "loss": 0.7563,
      "step": 570490
    },
    {
      "epoch": 1.9994602683938079,
      "grad_norm": 2.71875,
      "learning_rate": 1.852860009709469e-05,
      "loss": 0.7942,
      "step": 570500
    },
    {
      "epoch": 1.9994953159007034,
      "grad_norm": 2.59375,
      "learning_rate": 1.8527951068430988e-05,
      "loss": 0.7569,
      "step": 570510
    },
    {
      "epoch": 1.9995303634075992,
      "grad_norm": 3.0,
      "learning_rate": 1.8527302039767286e-05,
      "loss": 0.7205,
      "step": 570520
    },
    {
      "epoch": 1.9995654109144945,
      "grad_norm": 3.578125,
      "learning_rate": 1.8526653011103584e-05,
      "loss": 0.7911,
      "step": 570530
    },
    {
      "epoch": 1.9996004584213902,
      "grad_norm": 2.59375,
      "learning_rate": 1.852600398243988e-05,
      "loss": 0.8104,
      "step": 570540
    },
    {
      "epoch": 1.9996355059282858,
      "grad_norm": 2.8125,
      "learning_rate": 1.852535495377618e-05,
      "loss": 0.8683,
      "step": 570550
    },
    {
      "epoch": 1.9996705534351813,
      "grad_norm": 2.6875,
      "learning_rate": 1.8524705925112478e-05,
      "loss": 0.8347,
      "step": 570560
    },
    {
      "epoch": 1.999705600942077,
      "grad_norm": 2.8125,
      "learning_rate": 1.8524056896448776e-05,
      "loss": 0.8131,
      "step": 570570
    },
    {
      "epoch": 1.9997406484489726,
      "grad_norm": 2.75,
      "learning_rate": 1.8523407867785074e-05,
      "loss": 0.85,
      "step": 570580
    },
    {
      "epoch": 1.9997756959558681,
      "grad_norm": 2.796875,
      "learning_rate": 1.852275883912137e-05,
      "loss": 0.8133,
      "step": 570590
    },
    {
      "epoch": 1.9998107434627639,
      "grad_norm": 2.671875,
      "learning_rate": 1.852210981045767e-05,
      "loss": 0.8164,
      "step": 570600
    },
    {
      "epoch": 1.9998457909696594,
      "grad_norm": 3.09375,
      "learning_rate": 1.8521460781793968e-05,
      "loss": 0.7918,
      "step": 570610
    },
    {
      "epoch": 1.999880838476555,
      "grad_norm": 2.546875,
      "learning_rate": 1.8520811753130266e-05,
      "loss": 0.8396,
      "step": 570620
    },
    {
      "epoch": 1.9999158859834507,
      "grad_norm": 2.5,
      "learning_rate": 1.8520162724466564e-05,
      "loss": 0.7922,
      "step": 570630
    },
    {
      "epoch": 1.999950933490346,
      "grad_norm": 2.9375,
      "learning_rate": 1.851951369580286e-05,
      "loss": 0.7575,
      "step": 570640
    },
    {
      "epoch": 1.9999859809972418,
      "grad_norm": 3.296875,
      "learning_rate": 1.851886466713916e-05,
      "loss": 0.8214,
      "step": 570650
    },
    {
      "epoch": 2.0000210285041375,
      "grad_norm": 2.734375,
      "learning_rate": 1.8518215638475458e-05,
      "loss": 0.7484,
      "step": 570660
    },
    {
      "epoch": 2.000056076011033,
      "grad_norm": 2.71875,
      "learning_rate": 1.8517566609811756e-05,
      "loss": 0.7183,
      "step": 570670
    },
    {
      "epoch": 2.0000911235179286,
      "grad_norm": 2.8125,
      "learning_rate": 1.8516917581148054e-05,
      "loss": 0.8122,
      "step": 570680
    },
    {
      "epoch": 2.000126171024824,
      "grad_norm": 2.984375,
      "learning_rate": 1.851626855248435e-05,
      "loss": 0.902,
      "step": 570690
    },
    {
      "epoch": 2.0001612185317197,
      "grad_norm": 2.640625,
      "learning_rate": 1.8515619523820653e-05,
      "loss": 0.8476,
      "step": 570700
    },
    {
      "epoch": 2.0001962660386154,
      "grad_norm": 2.625,
      "learning_rate": 1.851497049515695e-05,
      "loss": 0.7116,
      "step": 570710
    },
    {
      "epoch": 2.0002313135455108,
      "grad_norm": 3.015625,
      "learning_rate": 1.851432146649325e-05,
      "loss": 0.8027,
      "step": 570720
    },
    {
      "epoch": 2.0002663610524065,
      "grad_norm": 2.6875,
      "learning_rate": 1.8513672437829547e-05,
      "loss": 0.8241,
      "step": 570730
    },
    {
      "epoch": 2.0003014085593023,
      "grad_norm": 2.34375,
      "learning_rate": 1.8513023409165845e-05,
      "loss": 0.8606,
      "step": 570740
    },
    {
      "epoch": 2.0003364560661976,
      "grad_norm": 3.203125,
      "learning_rate": 1.8512374380502143e-05,
      "loss": 0.7994,
      "step": 570750
    },
    {
      "epoch": 2.0003715035730933,
      "grad_norm": 2.515625,
      "learning_rate": 1.8511725351838438e-05,
      "loss": 0.8345,
      "step": 570760
    },
    {
      "epoch": 2.000406551079989,
      "grad_norm": 2.796875,
      "learning_rate": 1.8511076323174736e-05,
      "loss": 0.8455,
      "step": 570770
    },
    {
      "epoch": 2.0004415985868844,
      "grad_norm": 3.375,
      "learning_rate": 1.8510427294511034e-05,
      "loss": 0.8903,
      "step": 570780
    },
    {
      "epoch": 2.00047664609378,
      "grad_norm": 3.015625,
      "learning_rate": 1.850977826584733e-05,
      "loss": 0.7964,
      "step": 570790
    },
    {
      "epoch": 2.000511693600676,
      "grad_norm": 2.78125,
      "learning_rate": 1.850912923718363e-05,
      "loss": 0.8193,
      "step": 570800
    },
    {
      "epoch": 2.0005467411075712,
      "grad_norm": 3.03125,
      "learning_rate": 1.850848020851993e-05,
      "loss": 0.8578,
      "step": 570810
    },
    {
      "epoch": 2.000581788614467,
      "grad_norm": 2.484375,
      "learning_rate": 1.850783117985623e-05,
      "loss": 0.7399,
      "step": 570820
    },
    {
      "epoch": 2.0006168361213623,
      "grad_norm": 3.015625,
      "learning_rate": 1.8507182151192527e-05,
      "loss": 0.797,
      "step": 570830
    },
    {
      "epoch": 2.000651883628258,
      "grad_norm": 3.21875,
      "learning_rate": 1.8506533122528825e-05,
      "loss": 0.7988,
      "step": 570840
    },
    {
      "epoch": 2.000686931135154,
      "grad_norm": 3.203125,
      "learning_rate": 1.8505884093865123e-05,
      "loss": 0.7846,
      "step": 570850
    },
    {
      "epoch": 2.000721978642049,
      "grad_norm": 3.015625,
      "learning_rate": 1.850523506520142e-05,
      "loss": 0.726,
      "step": 570860
    },
    {
      "epoch": 2.000757026148945,
      "grad_norm": 2.9375,
      "learning_rate": 1.850458603653772e-05,
      "loss": 0.858,
      "step": 570870
    },
    {
      "epoch": 2.0007920736558407,
      "grad_norm": 3.0625,
      "learning_rate": 1.8503937007874017e-05,
      "loss": 0.8098,
      "step": 570880
    },
    {
      "epoch": 2.000827121162736,
      "grad_norm": 3.09375,
      "learning_rate": 1.8503287979210315e-05,
      "loss": 0.7572,
      "step": 570890
    },
    {
      "epoch": 2.0008621686696317,
      "grad_norm": 2.765625,
      "learning_rate": 1.8502638950546613e-05,
      "loss": 0.7739,
      "step": 570900
    },
    {
      "epoch": 2.0008972161765275,
      "grad_norm": 3.15625,
      "learning_rate": 1.850198992188291e-05,
      "loss": 0.8214,
      "step": 570910
    },
    {
      "epoch": 2.000932263683423,
      "grad_norm": 2.9375,
      "learning_rate": 1.850134089321921e-05,
      "loss": 0.8203,
      "step": 570920
    },
    {
      "epoch": 2.0009673111903186,
      "grad_norm": 2.828125,
      "learning_rate": 1.8500691864555507e-05,
      "loss": 0.8343,
      "step": 570930
    },
    {
      "epoch": 2.001002358697214,
      "grad_norm": 3.3125,
      "learning_rate": 1.8500042835891805e-05,
      "loss": 0.8297,
      "step": 570940
    },
    {
      "epoch": 2.0010374062041096,
      "grad_norm": 2.90625,
      "learning_rate": 1.8499393807228103e-05,
      "loss": 0.7866,
      "step": 570950
    },
    {
      "epoch": 2.0010724537110054,
      "grad_norm": 2.828125,
      "learning_rate": 1.84987447785644e-05,
      "loss": 0.8006,
      "step": 570960
    },
    {
      "epoch": 2.0011075012179007,
      "grad_norm": 2.71875,
      "learning_rate": 1.84980957499007e-05,
      "loss": 0.8748,
      "step": 570970
    },
    {
      "epoch": 2.0011425487247965,
      "grad_norm": 3.21875,
      "learning_rate": 1.8497446721236997e-05,
      "loss": 0.8013,
      "step": 570980
    },
    {
      "epoch": 2.0011775962316922,
      "grad_norm": 2.921875,
      "learning_rate": 1.8496797692573295e-05,
      "loss": 0.7258,
      "step": 570990
    },
    {
      "epoch": 2.0012126437385875,
      "grad_norm": 2.90625,
      "learning_rate": 1.8496148663909593e-05,
      "loss": 0.8474,
      "step": 571000
    },
    {
      "epoch": 2.0012476912454833,
      "grad_norm": 2.65625,
      "learning_rate": 1.849549963524589e-05,
      "loss": 0.7567,
      "step": 571010
    },
    {
      "epoch": 2.001282738752379,
      "grad_norm": 3.140625,
      "learning_rate": 1.849485060658219e-05,
      "loss": 0.7868,
      "step": 571020
    },
    {
      "epoch": 2.0013177862592744,
      "grad_norm": 3.03125,
      "learning_rate": 1.8494201577918487e-05,
      "loss": 0.78,
      "step": 571030
    },
    {
      "epoch": 2.00135283376617,
      "grad_norm": 2.671875,
      "learning_rate": 1.8493552549254785e-05,
      "loss": 0.7836,
      "step": 571040
    },
    {
      "epoch": 2.0013878812730654,
      "grad_norm": 3.140625,
      "learning_rate": 1.8492903520591083e-05,
      "loss": 0.8124,
      "step": 571050
    },
    {
      "epoch": 2.001422928779961,
      "grad_norm": 3.09375,
      "learning_rate": 1.8492254491927384e-05,
      "loss": 0.8381,
      "step": 571060
    },
    {
      "epoch": 2.001457976286857,
      "grad_norm": 2.53125,
      "learning_rate": 1.8491605463263682e-05,
      "loss": 0.787,
      "step": 571070
    },
    {
      "epoch": 2.0014930237937523,
      "grad_norm": 2.78125,
      "learning_rate": 1.849095643459998e-05,
      "loss": 0.8319,
      "step": 571080
    },
    {
      "epoch": 2.001528071300648,
      "grad_norm": 2.90625,
      "learning_rate": 1.849030740593628e-05,
      "loss": 0.7245,
      "step": 571090
    },
    {
      "epoch": 2.001563118807544,
      "grad_norm": 3.015625,
      "learning_rate": 1.8489658377272576e-05,
      "loss": 0.8144,
      "step": 571100
    },
    {
      "epoch": 2.001598166314439,
      "grad_norm": 2.59375,
      "learning_rate": 1.8489009348608874e-05,
      "loss": 0.7635,
      "step": 571110
    },
    {
      "epoch": 2.001633213821335,
      "grad_norm": 2.8125,
      "learning_rate": 1.8488360319945172e-05,
      "loss": 0.816,
      "step": 571120
    },
    {
      "epoch": 2.0016682613282306,
      "grad_norm": 3.171875,
      "learning_rate": 1.848771129128147e-05,
      "loss": 0.8536,
      "step": 571130
    },
    {
      "epoch": 2.001703308835126,
      "grad_norm": 2.984375,
      "learning_rate": 1.8487062262617765e-05,
      "loss": 0.8024,
      "step": 571140
    },
    {
      "epoch": 2.0017383563420217,
      "grad_norm": 3.0625,
      "learning_rate": 1.8486413233954063e-05,
      "loss": 0.8151,
      "step": 571150
    },
    {
      "epoch": 2.001773403848917,
      "grad_norm": 2.8125,
      "learning_rate": 1.848576420529036e-05,
      "loss": 0.7907,
      "step": 571160
    },
    {
      "epoch": 2.0018084513558128,
      "grad_norm": 2.203125,
      "learning_rate": 1.848511517662666e-05,
      "loss": 0.7882,
      "step": 571170
    },
    {
      "epoch": 2.0018434988627085,
      "grad_norm": 2.703125,
      "learning_rate": 1.848446614796296e-05,
      "loss": 0.907,
      "step": 571180
    },
    {
      "epoch": 2.001878546369604,
      "grad_norm": 2.8125,
      "learning_rate": 1.848381711929926e-05,
      "loss": 0.8517,
      "step": 571190
    },
    {
      "epoch": 2.0019135938764996,
      "grad_norm": 3.078125,
      "learning_rate": 1.8483168090635556e-05,
      "loss": 0.9138,
      "step": 571200
    },
    {
      "epoch": 2.0019486413833953,
      "grad_norm": 3.21875,
      "learning_rate": 1.8482519061971854e-05,
      "loss": 0.8717,
      "step": 571210
    },
    {
      "epoch": 2.0019836888902907,
      "grad_norm": 2.90625,
      "learning_rate": 1.8481870033308152e-05,
      "loss": 0.8574,
      "step": 571220
    },
    {
      "epoch": 2.0020187363971864,
      "grad_norm": 2.5625,
      "learning_rate": 1.848122100464445e-05,
      "loss": 0.8655,
      "step": 571230
    },
    {
      "epoch": 2.002053783904082,
      "grad_norm": 2.8125,
      "learning_rate": 1.848057197598075e-05,
      "loss": 0.7789,
      "step": 571240
    },
    {
      "epoch": 2.0020888314109775,
      "grad_norm": 2.765625,
      "learning_rate": 1.8479922947317046e-05,
      "loss": 0.8211,
      "step": 571250
    },
    {
      "epoch": 2.0021238789178732,
      "grad_norm": 2.984375,
      "learning_rate": 1.8479273918653344e-05,
      "loss": 0.8249,
      "step": 571260
    },
    {
      "epoch": 2.0021589264247686,
      "grad_norm": 2.75,
      "learning_rate": 1.8478624889989642e-05,
      "loss": 0.7928,
      "step": 571270
    },
    {
      "epoch": 2.0021939739316643,
      "grad_norm": 2.703125,
      "learning_rate": 1.847797586132594e-05,
      "loss": 0.7861,
      "step": 571280
    },
    {
      "epoch": 2.00222902143856,
      "grad_norm": 3.34375,
      "learning_rate": 1.847732683266224e-05,
      "loss": 0.8395,
      "step": 571290
    },
    {
      "epoch": 2.0022640689454554,
      "grad_norm": 2.953125,
      "learning_rate": 1.8476677803998536e-05,
      "loss": 0.7437,
      "step": 571300
    },
    {
      "epoch": 2.002299116452351,
      "grad_norm": 2.75,
      "learning_rate": 1.8476028775334834e-05,
      "loss": 0.8235,
      "step": 571310
    },
    {
      "epoch": 2.002334163959247,
      "grad_norm": 2.390625,
      "learning_rate": 1.8475379746671136e-05,
      "loss": 0.7894,
      "step": 571320
    },
    {
      "epoch": 2.002369211466142,
      "grad_norm": 2.71875,
      "learning_rate": 1.847473071800743e-05,
      "loss": 0.8129,
      "step": 571330
    },
    {
      "epoch": 2.002404258973038,
      "grad_norm": 3.234375,
      "learning_rate": 1.847408168934373e-05,
      "loss": 0.7807,
      "step": 571340
    },
    {
      "epoch": 2.0024393064799337,
      "grad_norm": 2.53125,
      "learning_rate": 1.8473432660680026e-05,
      "loss": 0.8783,
      "step": 571350
    },
    {
      "epoch": 2.002474353986829,
      "grad_norm": 2.9375,
      "learning_rate": 1.8472783632016324e-05,
      "loss": 0.8555,
      "step": 571360
    },
    {
      "epoch": 2.002509401493725,
      "grad_norm": 3.125,
      "learning_rate": 1.8472134603352622e-05,
      "loss": 0.8032,
      "step": 571370
    },
    {
      "epoch": 2.00254444900062,
      "grad_norm": 2.953125,
      "learning_rate": 1.847148557468892e-05,
      "loss": 0.8055,
      "step": 571380
    },
    {
      "epoch": 2.002579496507516,
      "grad_norm": 2.890625,
      "learning_rate": 1.847083654602522e-05,
      "loss": 0.7865,
      "step": 571390
    },
    {
      "epoch": 2.0026145440144116,
      "grad_norm": 3.203125,
      "learning_rate": 1.8470187517361516e-05,
      "loss": 0.9263,
      "step": 571400
    },
    {
      "epoch": 2.002649591521307,
      "grad_norm": 3.34375,
      "learning_rate": 1.8469538488697814e-05,
      "loss": 0.829,
      "step": 571410
    },
    {
      "epoch": 2.0026846390282027,
      "grad_norm": 3.15625,
      "learning_rate": 1.8468889460034112e-05,
      "loss": 0.7383,
      "step": 571420
    },
    {
      "epoch": 2.0027196865350985,
      "grad_norm": 2.5625,
      "learning_rate": 1.8468240431370414e-05,
      "loss": 0.775,
      "step": 571430
    },
    {
      "epoch": 2.002754734041994,
      "grad_norm": 2.859375,
      "learning_rate": 1.8467591402706712e-05,
      "loss": 0.8873,
      "step": 571440
    },
    {
      "epoch": 2.0027897815488895,
      "grad_norm": 2.953125,
      "learning_rate": 1.846694237404301e-05,
      "loss": 0.8005,
      "step": 571450
    },
    {
      "epoch": 2.0028248290557853,
      "grad_norm": 3.03125,
      "learning_rate": 1.8466293345379308e-05,
      "loss": 0.7923,
      "step": 571460
    },
    {
      "epoch": 2.0028598765626806,
      "grad_norm": 2.796875,
      "learning_rate": 1.8465644316715606e-05,
      "loss": 0.773,
      "step": 571470
    },
    {
      "epoch": 2.0028949240695764,
      "grad_norm": 2.890625,
      "learning_rate": 1.8464995288051904e-05,
      "loss": 0.8816,
      "step": 571480
    },
    {
      "epoch": 2.002929971576472,
      "grad_norm": 2.9375,
      "learning_rate": 1.8464346259388202e-05,
      "loss": 0.7968,
      "step": 571490
    },
    {
      "epoch": 2.0029650190833674,
      "grad_norm": 3.03125,
      "learning_rate": 1.84636972307245e-05,
      "loss": 0.8369,
      "step": 571500
    },
    {
      "epoch": 2.003000066590263,
      "grad_norm": 2.53125,
      "learning_rate": 1.8463048202060794e-05,
      "loss": 0.8073,
      "step": 571510
    },
    {
      "epoch": 2.0030351140971585,
      "grad_norm": 2.59375,
      "learning_rate": 1.8462399173397092e-05,
      "loss": 0.8412,
      "step": 571520
    },
    {
      "epoch": 2.0030701616040543,
      "grad_norm": 2.8125,
      "learning_rate": 1.846175014473339e-05,
      "loss": 0.8031,
      "step": 571530
    },
    {
      "epoch": 2.00310520911095,
      "grad_norm": 3.0,
      "learning_rate": 1.8461101116069692e-05,
      "loss": 0.8452,
      "step": 571540
    },
    {
      "epoch": 2.0031402566178453,
      "grad_norm": 2.671875,
      "learning_rate": 1.846045208740599e-05,
      "loss": 0.7906,
      "step": 571550
    },
    {
      "epoch": 2.003175304124741,
      "grad_norm": 2.75,
      "learning_rate": 1.8459803058742288e-05,
      "loss": 0.8039,
      "step": 571560
    },
    {
      "epoch": 2.003210351631637,
      "grad_norm": 2.984375,
      "learning_rate": 1.8459154030078586e-05,
      "loss": 0.7801,
      "step": 571570
    },
    {
      "epoch": 2.003245399138532,
      "grad_norm": 3.296875,
      "learning_rate": 1.8458505001414884e-05,
      "loss": 0.8758,
      "step": 571580
    },
    {
      "epoch": 2.003280446645428,
      "grad_norm": 3.234375,
      "learning_rate": 1.8457855972751182e-05,
      "loss": 0.7673,
      "step": 571590
    },
    {
      "epoch": 2.0033154941523237,
      "grad_norm": 3.21875,
      "learning_rate": 1.845720694408748e-05,
      "loss": 0.7917,
      "step": 571600
    },
    {
      "epoch": 2.003350541659219,
      "grad_norm": 2.8125,
      "learning_rate": 1.8456557915423778e-05,
      "loss": 0.9153,
      "step": 571610
    },
    {
      "epoch": 2.0033855891661148,
      "grad_norm": 2.6875,
      "learning_rate": 1.8455908886760076e-05,
      "loss": 0.7809,
      "step": 571620
    },
    {
      "epoch": 2.00342063667301,
      "grad_norm": 2.796875,
      "learning_rate": 1.8455259858096374e-05,
      "loss": 0.8595,
      "step": 571630
    },
    {
      "epoch": 2.003455684179906,
      "grad_norm": 3.046875,
      "learning_rate": 1.8454610829432672e-05,
      "loss": 0.8458,
      "step": 571640
    },
    {
      "epoch": 2.0034907316868016,
      "grad_norm": 2.921875,
      "learning_rate": 1.845396180076897e-05,
      "loss": 0.765,
      "step": 571650
    },
    {
      "epoch": 2.003525779193697,
      "grad_norm": 3.28125,
      "learning_rate": 1.8453312772105268e-05,
      "loss": 0.8077,
      "step": 571660
    },
    {
      "epoch": 2.0035608267005927,
      "grad_norm": 2.765625,
      "learning_rate": 1.8452663743441566e-05,
      "loss": 0.8753,
      "step": 571670
    },
    {
      "epoch": 2.0035958742074884,
      "grad_norm": 2.421875,
      "learning_rate": 1.8452014714777867e-05,
      "loss": 0.7595,
      "step": 571680
    },
    {
      "epoch": 2.0036309217143837,
      "grad_norm": 2.71875,
      "learning_rate": 1.8451365686114165e-05,
      "loss": 0.822,
      "step": 571690
    },
    {
      "epoch": 2.0036659692212795,
      "grad_norm": 2.9375,
      "learning_rate": 1.845071665745046e-05,
      "loss": 0.8094,
      "step": 571700
    },
    {
      "epoch": 2.0037010167281752,
      "grad_norm": 2.703125,
      "learning_rate": 1.8450067628786758e-05,
      "loss": 0.7938,
      "step": 571710
    },
    {
      "epoch": 2.0037360642350706,
      "grad_norm": 3.28125,
      "learning_rate": 1.8449418600123056e-05,
      "loss": 0.8118,
      "step": 571720
    },
    {
      "epoch": 2.0037711117419663,
      "grad_norm": 2.703125,
      "learning_rate": 1.8448769571459354e-05,
      "loss": 0.7642,
      "step": 571730
    },
    {
      "epoch": 2.0038061592488616,
      "grad_norm": 2.625,
      "learning_rate": 1.8448120542795652e-05,
      "loss": 0.8385,
      "step": 571740
    },
    {
      "epoch": 2.0038412067557574,
      "grad_norm": 2.78125,
      "learning_rate": 1.844747151413195e-05,
      "loss": 0.7928,
      "step": 571750
    },
    {
      "epoch": 2.003876254262653,
      "grad_norm": 2.796875,
      "learning_rate": 1.8446822485468248e-05,
      "loss": 0.8752,
      "step": 571760
    },
    {
      "epoch": 2.0039113017695485,
      "grad_norm": 3.296875,
      "learning_rate": 1.8446173456804546e-05,
      "loss": 0.8839,
      "step": 571770
    },
    {
      "epoch": 2.003946349276444,
      "grad_norm": 3.234375,
      "learning_rate": 1.8445524428140844e-05,
      "loss": 0.7542,
      "step": 571780
    },
    {
      "epoch": 2.00398139678334,
      "grad_norm": 3.078125,
      "learning_rate": 1.8444875399477142e-05,
      "loss": 0.8524,
      "step": 571790
    },
    {
      "epoch": 2.0040164442902353,
      "grad_norm": 2.6875,
      "learning_rate": 1.8444226370813443e-05,
      "loss": 0.8674,
      "step": 571800
    },
    {
      "epoch": 2.004051491797131,
      "grad_norm": 2.84375,
      "learning_rate": 1.844357734214974e-05,
      "loss": 0.8,
      "step": 571810
    },
    {
      "epoch": 2.004086539304027,
      "grad_norm": 2.671875,
      "learning_rate": 1.844292831348604e-05,
      "loss": 0.8205,
      "step": 571820
    },
    {
      "epoch": 2.004121586810922,
      "grad_norm": 3.15625,
      "learning_rate": 1.8442279284822337e-05,
      "loss": 0.8058,
      "step": 571830
    },
    {
      "epoch": 2.004156634317818,
      "grad_norm": 3.28125,
      "learning_rate": 1.8441630256158635e-05,
      "loss": 0.8591,
      "step": 571840
    },
    {
      "epoch": 2.004191681824713,
      "grad_norm": 2.53125,
      "learning_rate": 1.8440981227494933e-05,
      "loss": 0.7762,
      "step": 571850
    },
    {
      "epoch": 2.004226729331609,
      "grad_norm": 2.515625,
      "learning_rate": 1.844033219883123e-05,
      "loss": 0.8067,
      "step": 571860
    },
    {
      "epoch": 2.0042617768385047,
      "grad_norm": 3.0625,
      "learning_rate": 1.843968317016753e-05,
      "loss": 0.7414,
      "step": 571870
    },
    {
      "epoch": 2.0042968243454,
      "grad_norm": 2.609375,
      "learning_rate": 1.8439034141503827e-05,
      "loss": 0.7589,
      "step": 571880
    },
    {
      "epoch": 2.0043318718522958,
      "grad_norm": 4.59375,
      "learning_rate": 1.8438385112840122e-05,
      "loss": 0.7676,
      "step": 571890
    },
    {
      "epoch": 2.0043669193591915,
      "grad_norm": 3.078125,
      "learning_rate": 1.843773608417642e-05,
      "loss": 0.8712,
      "step": 571900
    },
    {
      "epoch": 2.004401966866087,
      "grad_norm": 2.671875,
      "learning_rate": 1.843708705551272e-05,
      "loss": 0.8523,
      "step": 571910
    },
    {
      "epoch": 2.0044370143729826,
      "grad_norm": 2.734375,
      "learning_rate": 1.843643802684902e-05,
      "loss": 0.7889,
      "step": 571920
    },
    {
      "epoch": 2.0044720618798784,
      "grad_norm": 2.40625,
      "learning_rate": 1.8435788998185317e-05,
      "loss": 0.8121,
      "step": 571930
    },
    {
      "epoch": 2.0045071093867737,
      "grad_norm": 2.984375,
      "learning_rate": 1.8435139969521615e-05,
      "loss": 0.8527,
      "step": 571940
    },
    {
      "epoch": 2.0045421568936694,
      "grad_norm": 2.421875,
      "learning_rate": 1.8434490940857913e-05,
      "loss": 0.7808,
      "step": 571950
    },
    {
      "epoch": 2.0045772044005647,
      "grad_norm": 2.421875,
      "learning_rate": 1.843384191219421e-05,
      "loss": 0.8094,
      "step": 571960
    },
    {
      "epoch": 2.0046122519074605,
      "grad_norm": 2.640625,
      "learning_rate": 1.843319288353051e-05,
      "loss": 0.7827,
      "step": 571970
    },
    {
      "epoch": 2.0046472994143563,
      "grad_norm": 3.140625,
      "learning_rate": 1.8432543854866807e-05,
      "loss": 0.8509,
      "step": 571980
    },
    {
      "epoch": 2.0046823469212516,
      "grad_norm": 3.6875,
      "learning_rate": 1.8431894826203105e-05,
      "loss": 0.8167,
      "step": 571990
    },
    {
      "epoch": 2.0047173944281473,
      "grad_norm": 2.984375,
      "learning_rate": 1.8431245797539403e-05,
      "loss": 0.7992,
      "step": 572000
    },
    {
      "epoch": 2.004752441935043,
      "grad_norm": 2.75,
      "learning_rate": 1.84305967688757e-05,
      "loss": 0.7525,
      "step": 572010
    },
    {
      "epoch": 2.0047874894419384,
      "grad_norm": 2.859375,
      "learning_rate": 1.8429947740212e-05,
      "loss": 0.8231,
      "step": 572020
    },
    {
      "epoch": 2.004822536948834,
      "grad_norm": 2.8125,
      "learning_rate": 1.8429298711548297e-05,
      "loss": 0.86,
      "step": 572030
    },
    {
      "epoch": 2.00485758445573,
      "grad_norm": 2.71875,
      "learning_rate": 1.8428649682884595e-05,
      "loss": 0.8169,
      "step": 572040
    },
    {
      "epoch": 2.0048926319626252,
      "grad_norm": 2.53125,
      "learning_rate": 1.8428000654220896e-05,
      "loss": 0.8143,
      "step": 572050
    },
    {
      "epoch": 2.004927679469521,
      "grad_norm": 3.015625,
      "learning_rate": 1.8427351625557194e-05,
      "loss": 0.8354,
      "step": 572060
    },
    {
      "epoch": 2.0049627269764168,
      "grad_norm": 3.484375,
      "learning_rate": 1.8426702596893492e-05,
      "loss": 0.8372,
      "step": 572070
    },
    {
      "epoch": 2.004997774483312,
      "grad_norm": 2.984375,
      "learning_rate": 1.8426053568229787e-05,
      "loss": 0.8846,
      "step": 572080
    },
    {
      "epoch": 2.005032821990208,
      "grad_norm": 3.34375,
      "learning_rate": 1.8425404539566085e-05,
      "loss": 0.8013,
      "step": 572090
    },
    {
      "epoch": 2.005067869497103,
      "grad_norm": 2.796875,
      "learning_rate": 1.8424755510902383e-05,
      "loss": 0.7935,
      "step": 572100
    },
    {
      "epoch": 2.005102917003999,
      "grad_norm": 2.953125,
      "learning_rate": 1.842410648223868e-05,
      "loss": 0.9256,
      "step": 572110
    },
    {
      "epoch": 2.0051379645108947,
      "grad_norm": 2.75,
      "learning_rate": 1.842345745357498e-05,
      "loss": 0.8349,
      "step": 572120
    },
    {
      "epoch": 2.00517301201779,
      "grad_norm": 3.0,
      "learning_rate": 1.8422808424911277e-05,
      "loss": 0.9009,
      "step": 572130
    },
    {
      "epoch": 2.0052080595246857,
      "grad_norm": 2.71875,
      "learning_rate": 1.8422159396247575e-05,
      "loss": 0.8122,
      "step": 572140
    },
    {
      "epoch": 2.0052431070315815,
      "grad_norm": 3.53125,
      "learning_rate": 1.8421510367583873e-05,
      "loss": 0.7919,
      "step": 572150
    },
    {
      "epoch": 2.005278154538477,
      "grad_norm": 3.109375,
      "learning_rate": 1.8420861338920174e-05,
      "loss": 0.9183,
      "step": 572160
    },
    {
      "epoch": 2.0053132020453726,
      "grad_norm": 3.046875,
      "learning_rate": 1.8420212310256472e-05,
      "loss": 0.8138,
      "step": 572170
    },
    {
      "epoch": 2.0053482495522683,
      "grad_norm": 2.8125,
      "learning_rate": 1.841956328159277e-05,
      "loss": 0.9385,
      "step": 572180
    },
    {
      "epoch": 2.0053832970591636,
      "grad_norm": 2.78125,
      "learning_rate": 1.841891425292907e-05,
      "loss": 0.7801,
      "step": 572190
    },
    {
      "epoch": 2.0054183445660594,
      "grad_norm": 2.65625,
      "learning_rate": 1.8418265224265366e-05,
      "loss": 0.8578,
      "step": 572200
    },
    {
      "epoch": 2.0054533920729547,
      "grad_norm": 2.859375,
      "learning_rate": 1.8417616195601664e-05,
      "loss": 0.8818,
      "step": 572210
    },
    {
      "epoch": 2.0054884395798505,
      "grad_norm": 3.03125,
      "learning_rate": 1.8416967166937962e-05,
      "loss": 0.8454,
      "step": 572220
    },
    {
      "epoch": 2.005523487086746,
      "grad_norm": 3.234375,
      "learning_rate": 1.841631813827426e-05,
      "loss": 0.8387,
      "step": 572230
    },
    {
      "epoch": 2.0055585345936415,
      "grad_norm": 3.046875,
      "learning_rate": 1.841566910961056e-05,
      "loss": 0.847,
      "step": 572240
    },
    {
      "epoch": 2.0055935821005373,
      "grad_norm": 2.8125,
      "learning_rate": 1.8415020080946856e-05,
      "loss": 0.8827,
      "step": 572250
    },
    {
      "epoch": 2.005628629607433,
      "grad_norm": 2.6875,
      "learning_rate": 1.8414371052283154e-05,
      "loss": 0.8139,
      "step": 572260
    },
    {
      "epoch": 2.0056636771143284,
      "grad_norm": 2.703125,
      "learning_rate": 1.841372202361945e-05,
      "loss": 0.8548,
      "step": 572270
    },
    {
      "epoch": 2.005698724621224,
      "grad_norm": 3.015625,
      "learning_rate": 1.841307299495575e-05,
      "loss": 0.8649,
      "step": 572280
    },
    {
      "epoch": 2.00573377212812,
      "grad_norm": 2.984375,
      "learning_rate": 1.841242396629205e-05,
      "loss": 0.8633,
      "step": 572290
    },
    {
      "epoch": 2.005768819635015,
      "grad_norm": 3.0625,
      "learning_rate": 1.8411774937628346e-05,
      "loss": 0.8674,
      "step": 572300
    },
    {
      "epoch": 2.005803867141911,
      "grad_norm": 2.75,
      "learning_rate": 1.8411125908964644e-05,
      "loss": 0.8149,
      "step": 572310
    },
    {
      "epoch": 2.0058389146488063,
      "grad_norm": 3.03125,
      "learning_rate": 1.8410476880300942e-05,
      "loss": 0.8135,
      "step": 572320
    },
    {
      "epoch": 2.005873962155702,
      "grad_norm": 2.671875,
      "learning_rate": 1.840982785163724e-05,
      "loss": 0.8231,
      "step": 572330
    },
    {
      "epoch": 2.0059090096625978,
      "grad_norm": 3.015625,
      "learning_rate": 1.840917882297354e-05,
      "loss": 0.7721,
      "step": 572340
    },
    {
      "epoch": 2.005944057169493,
      "grad_norm": 2.84375,
      "learning_rate": 1.8408529794309836e-05,
      "loss": 0.726,
      "step": 572350
    },
    {
      "epoch": 2.005979104676389,
      "grad_norm": 3.328125,
      "learning_rate": 1.8407880765646134e-05,
      "loss": 0.8585,
      "step": 572360
    },
    {
      "epoch": 2.0060141521832846,
      "grad_norm": 2.59375,
      "learning_rate": 1.8407231736982432e-05,
      "loss": 0.8125,
      "step": 572370
    },
    {
      "epoch": 2.00604919969018,
      "grad_norm": 2.5625,
      "learning_rate": 1.840658270831873e-05,
      "loss": 0.8371,
      "step": 572380
    },
    {
      "epoch": 2.0060842471970757,
      "grad_norm": 2.359375,
      "learning_rate": 1.840593367965503e-05,
      "loss": 0.7424,
      "step": 572390
    },
    {
      "epoch": 2.0061192947039714,
      "grad_norm": 2.765625,
      "learning_rate": 1.8405284650991326e-05,
      "loss": 0.7961,
      "step": 572400
    },
    {
      "epoch": 2.0061543422108667,
      "grad_norm": 2.5625,
      "learning_rate": 1.8404635622327624e-05,
      "loss": 0.7119,
      "step": 572410
    },
    {
      "epoch": 2.0061893897177625,
      "grad_norm": 3.0625,
      "learning_rate": 1.8403986593663926e-05,
      "loss": 0.7671,
      "step": 572420
    },
    {
      "epoch": 2.006224437224658,
      "grad_norm": 2.71875,
      "learning_rate": 1.8403337565000224e-05,
      "loss": 0.8127,
      "step": 572430
    },
    {
      "epoch": 2.0062594847315536,
      "grad_norm": 2.703125,
      "learning_rate": 1.8402688536336522e-05,
      "loss": 0.8408,
      "step": 572440
    },
    {
      "epoch": 2.0062945322384493,
      "grad_norm": 3.0,
      "learning_rate": 1.8402039507672816e-05,
      "loss": 0.7981,
      "step": 572450
    },
    {
      "epoch": 2.0063295797453446,
      "grad_norm": 2.984375,
      "learning_rate": 1.8401390479009114e-05,
      "loss": 0.751,
      "step": 572460
    },
    {
      "epoch": 2.0063646272522404,
      "grad_norm": 2.46875,
      "learning_rate": 1.8400741450345412e-05,
      "loss": 0.8043,
      "step": 572470
    },
    {
      "epoch": 2.006399674759136,
      "grad_norm": 2.859375,
      "learning_rate": 1.840009242168171e-05,
      "loss": 0.8019,
      "step": 572480
    },
    {
      "epoch": 2.0064347222660315,
      "grad_norm": 3.203125,
      "learning_rate": 1.839944339301801e-05,
      "loss": 0.8398,
      "step": 572490
    },
    {
      "epoch": 2.0064697697729272,
      "grad_norm": 2.421875,
      "learning_rate": 1.8398794364354306e-05,
      "loss": 0.8175,
      "step": 572500
    },
    {
      "epoch": 2.006504817279823,
      "grad_norm": 3.078125,
      "learning_rate": 1.8398145335690604e-05,
      "loss": 0.7816,
      "step": 572510
    },
    {
      "epoch": 2.0065398647867183,
      "grad_norm": 2.203125,
      "learning_rate": 1.8397496307026902e-05,
      "loss": 0.7284,
      "step": 572520
    },
    {
      "epoch": 2.006574912293614,
      "grad_norm": 3.109375,
      "learning_rate": 1.8396847278363204e-05,
      "loss": 0.7434,
      "step": 572530
    },
    {
      "epoch": 2.0066099598005094,
      "grad_norm": 2.609375,
      "learning_rate": 1.8396198249699502e-05,
      "loss": 0.8121,
      "step": 572540
    },
    {
      "epoch": 2.006645007307405,
      "grad_norm": 3.078125,
      "learning_rate": 1.83955492210358e-05,
      "loss": 0.9374,
      "step": 572550
    },
    {
      "epoch": 2.006680054814301,
      "grad_norm": 3.015625,
      "learning_rate": 1.8394900192372098e-05,
      "loss": 0.8917,
      "step": 572560
    },
    {
      "epoch": 2.006715102321196,
      "grad_norm": 3.1875,
      "learning_rate": 1.8394251163708396e-05,
      "loss": 0.8005,
      "step": 572570
    },
    {
      "epoch": 2.006750149828092,
      "grad_norm": 2.609375,
      "learning_rate": 1.8393602135044694e-05,
      "loss": 0.8738,
      "step": 572580
    },
    {
      "epoch": 2.0067851973349877,
      "grad_norm": 3.046875,
      "learning_rate": 1.8392953106380992e-05,
      "loss": 0.7811,
      "step": 572590
    },
    {
      "epoch": 2.006820244841883,
      "grad_norm": 3.140625,
      "learning_rate": 1.839230407771729e-05,
      "loss": 0.7877,
      "step": 572600
    },
    {
      "epoch": 2.006855292348779,
      "grad_norm": 2.65625,
      "learning_rate": 1.8391655049053588e-05,
      "loss": 0.8739,
      "step": 572610
    },
    {
      "epoch": 2.0068903398556746,
      "grad_norm": 2.828125,
      "learning_rate": 1.8391006020389886e-05,
      "loss": 0.8131,
      "step": 572620
    },
    {
      "epoch": 2.00692538736257,
      "grad_norm": 2.96875,
      "learning_rate": 1.8390356991726184e-05,
      "loss": 0.8451,
      "step": 572630
    },
    {
      "epoch": 2.0069604348694656,
      "grad_norm": 2.90625,
      "learning_rate": 1.8389707963062482e-05,
      "loss": 0.8053,
      "step": 572640
    },
    {
      "epoch": 2.006995482376361,
      "grad_norm": 2.890625,
      "learning_rate": 1.838905893439878e-05,
      "loss": 0.8454,
      "step": 572650
    },
    {
      "epoch": 2.0070305298832567,
      "grad_norm": 2.5,
      "learning_rate": 1.8388409905735078e-05,
      "loss": 0.778,
      "step": 572660
    },
    {
      "epoch": 2.0070655773901525,
      "grad_norm": 3.15625,
      "learning_rate": 1.8387760877071376e-05,
      "loss": 0.8052,
      "step": 572670
    },
    {
      "epoch": 2.0071006248970478,
      "grad_norm": 2.75,
      "learning_rate": 1.8387111848407674e-05,
      "loss": 0.7628,
      "step": 572680
    },
    {
      "epoch": 2.0071356724039435,
      "grad_norm": 3.34375,
      "learning_rate": 1.8386462819743972e-05,
      "loss": 0.7853,
      "step": 572690
    },
    {
      "epoch": 2.0071707199108393,
      "grad_norm": 2.921875,
      "learning_rate": 1.838581379108027e-05,
      "loss": 0.779,
      "step": 572700
    },
    {
      "epoch": 2.0072057674177346,
      "grad_norm": 3.0,
      "learning_rate": 1.8385164762416568e-05,
      "loss": 0.8181,
      "step": 572710
    },
    {
      "epoch": 2.0072408149246304,
      "grad_norm": 2.984375,
      "learning_rate": 1.8384515733752866e-05,
      "loss": 0.8483,
      "step": 572720
    },
    {
      "epoch": 2.007275862431526,
      "grad_norm": 3.140625,
      "learning_rate": 1.8383866705089164e-05,
      "loss": 0.9053,
      "step": 572730
    },
    {
      "epoch": 2.0073109099384214,
      "grad_norm": 2.96875,
      "learning_rate": 1.8383217676425462e-05,
      "loss": 0.787,
      "step": 572740
    },
    {
      "epoch": 2.007345957445317,
      "grad_norm": 2.640625,
      "learning_rate": 1.838256864776176e-05,
      "loss": 0.784,
      "step": 572750
    },
    {
      "epoch": 2.007381004952213,
      "grad_norm": 2.640625,
      "learning_rate": 1.8381919619098058e-05,
      "loss": 0.7596,
      "step": 572760
    },
    {
      "epoch": 2.0074160524591083,
      "grad_norm": 3.34375,
      "learning_rate": 1.8381270590434356e-05,
      "loss": 0.8406,
      "step": 572770
    },
    {
      "epoch": 2.007451099966004,
      "grad_norm": 2.53125,
      "learning_rate": 1.8380621561770657e-05,
      "loss": 0.7443,
      "step": 572780
    },
    {
      "epoch": 2.0074861474728993,
      "grad_norm": 2.96875,
      "learning_rate": 1.8379972533106955e-05,
      "loss": 0.8928,
      "step": 572790
    },
    {
      "epoch": 2.007521194979795,
      "grad_norm": 3.25,
      "learning_rate": 1.8379323504443253e-05,
      "loss": 0.8028,
      "step": 572800
    },
    {
      "epoch": 2.007556242486691,
      "grad_norm": 2.96875,
      "learning_rate": 1.837867447577955e-05,
      "loss": 0.8286,
      "step": 572810
    },
    {
      "epoch": 2.007591289993586,
      "grad_norm": 2.671875,
      "learning_rate": 1.837802544711585e-05,
      "loss": 0.817,
      "step": 572820
    },
    {
      "epoch": 2.007626337500482,
      "grad_norm": 2.171875,
      "learning_rate": 1.8377376418452144e-05,
      "loss": 0.756,
      "step": 572830
    },
    {
      "epoch": 2.0076613850073777,
      "grad_norm": 3.140625,
      "learning_rate": 1.8376727389788442e-05,
      "loss": 0.7681,
      "step": 572840
    },
    {
      "epoch": 2.007696432514273,
      "grad_norm": 3.140625,
      "learning_rate": 1.837607836112474e-05,
      "loss": 0.8331,
      "step": 572850
    },
    {
      "epoch": 2.0077314800211687,
      "grad_norm": 3.09375,
      "learning_rate": 1.8375429332461038e-05,
      "loss": 0.796,
      "step": 572860
    },
    {
      "epoch": 2.0077665275280645,
      "grad_norm": 3.15625,
      "learning_rate": 1.8374780303797336e-05,
      "loss": 0.8261,
      "step": 572870
    },
    {
      "epoch": 2.00780157503496,
      "grad_norm": 3.203125,
      "learning_rate": 1.8374131275133634e-05,
      "loss": 0.812,
      "step": 572880
    },
    {
      "epoch": 2.0078366225418556,
      "grad_norm": 3.28125,
      "learning_rate": 1.8373482246469932e-05,
      "loss": 0.8102,
      "step": 572890
    },
    {
      "epoch": 2.007871670048751,
      "grad_norm": 2.859375,
      "learning_rate": 1.8372833217806233e-05,
      "loss": 0.8319,
      "step": 572900
    },
    {
      "epoch": 2.0079067175556466,
      "grad_norm": 2.609375,
      "learning_rate": 1.837218418914253e-05,
      "loss": 0.7463,
      "step": 572910
    },
    {
      "epoch": 2.0079417650625424,
      "grad_norm": 2.734375,
      "learning_rate": 1.837153516047883e-05,
      "loss": 0.8433,
      "step": 572920
    },
    {
      "epoch": 2.0079768125694377,
      "grad_norm": 2.78125,
      "learning_rate": 1.8370886131815127e-05,
      "loss": 0.8263,
      "step": 572930
    },
    {
      "epoch": 2.0080118600763335,
      "grad_norm": 2.40625,
      "learning_rate": 1.8370237103151425e-05,
      "loss": 0.7183,
      "step": 572940
    },
    {
      "epoch": 2.0080469075832292,
      "grad_norm": 3.28125,
      "learning_rate": 1.8369588074487723e-05,
      "loss": 0.8206,
      "step": 572950
    },
    {
      "epoch": 2.0080819550901245,
      "grad_norm": 2.921875,
      "learning_rate": 1.836893904582402e-05,
      "loss": 0.7433,
      "step": 572960
    },
    {
      "epoch": 2.0081170025970203,
      "grad_norm": 2.5625,
      "learning_rate": 1.836829001716032e-05,
      "loss": 0.8487,
      "step": 572970
    },
    {
      "epoch": 2.008152050103916,
      "grad_norm": 3.03125,
      "learning_rate": 1.8367640988496617e-05,
      "loss": 0.8266,
      "step": 572980
    },
    {
      "epoch": 2.0081870976108114,
      "grad_norm": 2.890625,
      "learning_rate": 1.8366991959832915e-05,
      "loss": 0.7289,
      "step": 572990
    },
    {
      "epoch": 2.008222145117707,
      "grad_norm": 3.15625,
      "learning_rate": 1.8366342931169213e-05,
      "loss": 0.8217,
      "step": 573000
    },
    {
      "epoch": 2.0082571926246024,
      "grad_norm": 2.984375,
      "learning_rate": 1.836569390250551e-05,
      "loss": 0.8925,
      "step": 573010
    },
    {
      "epoch": 2.008292240131498,
      "grad_norm": 2.578125,
      "learning_rate": 1.836504487384181e-05,
      "loss": 0.7928,
      "step": 573020
    },
    {
      "epoch": 2.008327287638394,
      "grad_norm": 3.625,
      "learning_rate": 1.8364395845178107e-05,
      "loss": 0.851,
      "step": 573030
    },
    {
      "epoch": 2.0083623351452893,
      "grad_norm": 3.0,
      "learning_rate": 1.8363746816514405e-05,
      "loss": 0.7885,
      "step": 573040
    },
    {
      "epoch": 2.008397382652185,
      "grad_norm": 2.453125,
      "learning_rate": 1.8363097787850703e-05,
      "loss": 0.7934,
      "step": 573050
    },
    {
      "epoch": 2.008432430159081,
      "grad_norm": 3.828125,
      "learning_rate": 1.8362448759187e-05,
      "loss": 0.7777,
      "step": 573060
    },
    {
      "epoch": 2.008467477665976,
      "grad_norm": 2.984375,
      "learning_rate": 1.83617997305233e-05,
      "loss": 0.7963,
      "step": 573070
    },
    {
      "epoch": 2.008502525172872,
      "grad_norm": 2.984375,
      "learning_rate": 1.8361150701859597e-05,
      "loss": 0.7989,
      "step": 573080
    },
    {
      "epoch": 2.0085375726797676,
      "grad_norm": 3.109375,
      "learning_rate": 1.8360501673195895e-05,
      "loss": 0.8684,
      "step": 573090
    },
    {
      "epoch": 2.008572620186663,
      "grad_norm": 2.875,
      "learning_rate": 1.8359852644532193e-05,
      "loss": 0.8651,
      "step": 573100
    },
    {
      "epoch": 2.0086076676935587,
      "grad_norm": 3.109375,
      "learning_rate": 1.835920361586849e-05,
      "loss": 0.8143,
      "step": 573110
    },
    {
      "epoch": 2.008642715200454,
      "grad_norm": 3.421875,
      "learning_rate": 1.835855458720479e-05,
      "loss": 0.8041,
      "step": 573120
    },
    {
      "epoch": 2.0086777627073498,
      "grad_norm": 2.578125,
      "learning_rate": 1.8357905558541087e-05,
      "loss": 0.7707,
      "step": 573130
    },
    {
      "epoch": 2.0087128102142455,
      "grad_norm": 2.875,
      "learning_rate": 1.8357256529877385e-05,
      "loss": 0.8634,
      "step": 573140
    },
    {
      "epoch": 2.008747857721141,
      "grad_norm": 3.1875,
      "learning_rate": 1.8356607501213687e-05,
      "loss": 0.8193,
      "step": 573150
    },
    {
      "epoch": 2.0087829052280366,
      "grad_norm": 2.734375,
      "learning_rate": 1.8355958472549985e-05,
      "loss": 0.8036,
      "step": 573160
    },
    {
      "epoch": 2.0088179527349324,
      "grad_norm": 2.96875,
      "learning_rate": 1.8355309443886283e-05,
      "loss": 0.8731,
      "step": 573170
    },
    {
      "epoch": 2.0088530002418277,
      "grad_norm": 3.109375,
      "learning_rate": 1.835466041522258e-05,
      "loss": 0.8008,
      "step": 573180
    },
    {
      "epoch": 2.0088880477487234,
      "grad_norm": 3.078125,
      "learning_rate": 1.835401138655888e-05,
      "loss": 0.7913,
      "step": 573190
    },
    {
      "epoch": 2.008923095255619,
      "grad_norm": 2.890625,
      "learning_rate": 1.8353362357895177e-05,
      "loss": 0.7247,
      "step": 573200
    },
    {
      "epoch": 2.0089581427625145,
      "grad_norm": 2.828125,
      "learning_rate": 1.835271332923147e-05,
      "loss": 0.8685,
      "step": 573210
    },
    {
      "epoch": 2.0089931902694103,
      "grad_norm": 2.953125,
      "learning_rate": 1.835206430056777e-05,
      "loss": 0.8474,
      "step": 573220
    },
    {
      "epoch": 2.0090282377763056,
      "grad_norm": 2.96875,
      "learning_rate": 1.8351415271904067e-05,
      "loss": 0.7733,
      "step": 573230
    },
    {
      "epoch": 2.0090632852832013,
      "grad_norm": 2.9375,
      "learning_rate": 1.8350766243240365e-05,
      "loss": 0.8209,
      "step": 573240
    },
    {
      "epoch": 2.009098332790097,
      "grad_norm": 3.28125,
      "learning_rate": 1.8350117214576663e-05,
      "loss": 0.7512,
      "step": 573250
    },
    {
      "epoch": 2.0091333802969924,
      "grad_norm": 2.390625,
      "learning_rate": 1.8349468185912965e-05,
      "loss": 0.7669,
      "step": 573260
    },
    {
      "epoch": 2.009168427803888,
      "grad_norm": 2.59375,
      "learning_rate": 1.8348819157249263e-05,
      "loss": 0.8147,
      "step": 573270
    },
    {
      "epoch": 2.009203475310784,
      "grad_norm": 2.90625,
      "learning_rate": 1.834817012858556e-05,
      "loss": 0.8262,
      "step": 573280
    },
    {
      "epoch": 2.0092385228176792,
      "grad_norm": 2.796875,
      "learning_rate": 1.834752109992186e-05,
      "loss": 0.7669,
      "step": 573290
    },
    {
      "epoch": 2.009273570324575,
      "grad_norm": 2.9375,
      "learning_rate": 1.8346872071258157e-05,
      "loss": 0.845,
      "step": 573300
    },
    {
      "epoch": 2.0093086178314707,
      "grad_norm": 2.953125,
      "learning_rate": 1.8346223042594455e-05,
      "loss": 0.7029,
      "step": 573310
    },
    {
      "epoch": 2.009343665338366,
      "grad_norm": 2.765625,
      "learning_rate": 1.8345574013930753e-05,
      "loss": 0.8379,
      "step": 573320
    },
    {
      "epoch": 2.009378712845262,
      "grad_norm": 2.890625,
      "learning_rate": 1.834492498526705e-05,
      "loss": 0.9356,
      "step": 573330
    },
    {
      "epoch": 2.009413760352157,
      "grad_norm": 2.796875,
      "learning_rate": 1.834427595660335e-05,
      "loss": 0.8418,
      "step": 573340
    },
    {
      "epoch": 2.009448807859053,
      "grad_norm": 3.3125,
      "learning_rate": 1.8343626927939647e-05,
      "loss": 0.8139,
      "step": 573350
    },
    {
      "epoch": 2.0094838553659486,
      "grad_norm": 3.0625,
      "learning_rate": 1.8342977899275945e-05,
      "loss": 0.7611,
      "step": 573360
    },
    {
      "epoch": 2.009518902872844,
      "grad_norm": 2.390625,
      "learning_rate": 1.8342328870612243e-05,
      "loss": 0.8518,
      "step": 573370
    },
    {
      "epoch": 2.0095539503797397,
      "grad_norm": 3.015625,
      "learning_rate": 1.834167984194854e-05,
      "loss": 0.7784,
      "step": 573380
    },
    {
      "epoch": 2.0095889978866355,
      "grad_norm": 3.1875,
      "learning_rate": 1.834103081328484e-05,
      "loss": 0.8116,
      "step": 573390
    },
    {
      "epoch": 2.009624045393531,
      "grad_norm": 2.75,
      "learning_rate": 1.8340381784621137e-05,
      "loss": 0.8124,
      "step": 573400
    },
    {
      "epoch": 2.0096590929004265,
      "grad_norm": 2.953125,
      "learning_rate": 1.8339732755957435e-05,
      "loss": 0.8611,
      "step": 573410
    },
    {
      "epoch": 2.0096941404073223,
      "grad_norm": 2.796875,
      "learning_rate": 1.8339083727293733e-05,
      "loss": 0.877,
      "step": 573420
    },
    {
      "epoch": 2.0097291879142176,
      "grad_norm": 3.03125,
      "learning_rate": 1.833843469863003e-05,
      "loss": 0.8861,
      "step": 573430
    },
    {
      "epoch": 2.0097642354211134,
      "grad_norm": 2.53125,
      "learning_rate": 1.833778566996633e-05,
      "loss": 0.7776,
      "step": 573440
    },
    {
      "epoch": 2.009799282928009,
      "grad_norm": 3.34375,
      "learning_rate": 1.8337136641302627e-05,
      "loss": 0.8735,
      "step": 573450
    },
    {
      "epoch": 2.0098343304349044,
      "grad_norm": 3.171875,
      "learning_rate": 1.8336487612638925e-05,
      "loss": 0.9334,
      "step": 573460
    },
    {
      "epoch": 2.0098693779418,
      "grad_norm": 2.625,
      "learning_rate": 1.8335838583975223e-05,
      "loss": 0.7367,
      "step": 573470
    },
    {
      "epoch": 2.0099044254486955,
      "grad_norm": 2.734375,
      "learning_rate": 1.833518955531152e-05,
      "loss": 0.8048,
      "step": 573480
    },
    {
      "epoch": 2.0099394729555913,
      "grad_norm": 3.109375,
      "learning_rate": 1.833454052664782e-05,
      "loss": 0.7975,
      "step": 573490
    },
    {
      "epoch": 2.009974520462487,
      "grad_norm": 2.578125,
      "learning_rate": 1.8333891497984117e-05,
      "loss": 0.9092,
      "step": 573500
    },
    {
      "epoch": 2.0100095679693823,
      "grad_norm": 2.984375,
      "learning_rate": 1.8333242469320415e-05,
      "loss": 0.7889,
      "step": 573510
    },
    {
      "epoch": 2.010044615476278,
      "grad_norm": 2.96875,
      "learning_rate": 1.8332593440656716e-05,
      "loss": 0.8511,
      "step": 573520
    },
    {
      "epoch": 2.010079662983174,
      "grad_norm": 2.78125,
      "learning_rate": 1.8331944411993014e-05,
      "loss": 0.8519,
      "step": 573530
    },
    {
      "epoch": 2.010114710490069,
      "grad_norm": 2.78125,
      "learning_rate": 1.8331295383329312e-05,
      "loss": 0.8014,
      "step": 573540
    },
    {
      "epoch": 2.010149757996965,
      "grad_norm": 2.5625,
      "learning_rate": 1.833064635466561e-05,
      "loss": 0.7492,
      "step": 573550
    },
    {
      "epoch": 2.0101848055038607,
      "grad_norm": 3.03125,
      "learning_rate": 1.8329997326001908e-05,
      "loss": 0.7868,
      "step": 573560
    },
    {
      "epoch": 2.010219853010756,
      "grad_norm": 3.109375,
      "learning_rate": 1.8329348297338206e-05,
      "loss": 0.8632,
      "step": 573570
    },
    {
      "epoch": 2.0102549005176518,
      "grad_norm": 2.71875,
      "learning_rate": 1.83286992686745e-05,
      "loss": 0.7671,
      "step": 573580
    },
    {
      "epoch": 2.010289948024547,
      "grad_norm": 3.359375,
      "learning_rate": 1.83280502400108e-05,
      "loss": 0.8557,
      "step": 573590
    },
    {
      "epoch": 2.010324995531443,
      "grad_norm": 2.90625,
      "learning_rate": 1.8327401211347097e-05,
      "loss": 0.7912,
      "step": 573600
    },
    {
      "epoch": 2.0103600430383386,
      "grad_norm": 3.0625,
      "learning_rate": 1.8326752182683395e-05,
      "loss": 0.9129,
      "step": 573610
    },
    {
      "epoch": 2.010395090545234,
      "grad_norm": 2.859375,
      "learning_rate": 1.8326103154019693e-05,
      "loss": 0.8332,
      "step": 573620
    },
    {
      "epoch": 2.0104301380521297,
      "grad_norm": 3.140625,
      "learning_rate": 1.8325454125355994e-05,
      "loss": 0.7132,
      "step": 573630
    },
    {
      "epoch": 2.0104651855590254,
      "grad_norm": 2.671875,
      "learning_rate": 1.8324805096692292e-05,
      "loss": 0.8598,
      "step": 573640
    },
    {
      "epoch": 2.0105002330659207,
      "grad_norm": 2.640625,
      "learning_rate": 1.832415606802859e-05,
      "loss": 0.8688,
      "step": 573650
    },
    {
      "epoch": 2.0105352805728165,
      "grad_norm": 2.640625,
      "learning_rate": 1.8323507039364888e-05,
      "loss": 0.8606,
      "step": 573660
    },
    {
      "epoch": 2.0105703280797123,
      "grad_norm": 2.703125,
      "learning_rate": 1.8322858010701186e-05,
      "loss": 0.7729,
      "step": 573670
    },
    {
      "epoch": 2.0106053755866076,
      "grad_norm": 2.984375,
      "learning_rate": 1.8322208982037484e-05,
      "loss": 0.8998,
      "step": 573680
    },
    {
      "epoch": 2.0106404230935033,
      "grad_norm": 2.8125,
      "learning_rate": 1.8321559953373782e-05,
      "loss": 0.8415,
      "step": 573690
    },
    {
      "epoch": 2.0106754706003986,
      "grad_norm": 3.0,
      "learning_rate": 1.832091092471008e-05,
      "loss": 0.7841,
      "step": 573700
    },
    {
      "epoch": 2.0107105181072944,
      "grad_norm": 3.140625,
      "learning_rate": 1.8320261896046378e-05,
      "loss": 0.8069,
      "step": 573710
    },
    {
      "epoch": 2.01074556561419,
      "grad_norm": 3.109375,
      "learning_rate": 1.8319612867382676e-05,
      "loss": 0.8452,
      "step": 573720
    },
    {
      "epoch": 2.0107806131210855,
      "grad_norm": 2.390625,
      "learning_rate": 1.8318963838718974e-05,
      "loss": 0.8534,
      "step": 573730
    },
    {
      "epoch": 2.0108156606279812,
      "grad_norm": 2.609375,
      "learning_rate": 1.8318314810055272e-05,
      "loss": 0.8542,
      "step": 573740
    },
    {
      "epoch": 2.010850708134877,
      "grad_norm": 2.859375,
      "learning_rate": 1.831766578139157e-05,
      "loss": 0.7221,
      "step": 573750
    },
    {
      "epoch": 2.0108857556417723,
      "grad_norm": 3.078125,
      "learning_rate": 1.8317016752727868e-05,
      "loss": 0.8906,
      "step": 573760
    },
    {
      "epoch": 2.010920803148668,
      "grad_norm": 3.0625,
      "learning_rate": 1.8316367724064166e-05,
      "loss": 0.8001,
      "step": 573770
    },
    {
      "epoch": 2.010955850655564,
      "grad_norm": 3.4375,
      "learning_rate": 1.8315718695400464e-05,
      "loss": 0.9409,
      "step": 573780
    },
    {
      "epoch": 2.010990898162459,
      "grad_norm": 3.28125,
      "learning_rate": 1.8315069666736762e-05,
      "loss": 0.9169,
      "step": 573790
    },
    {
      "epoch": 2.011025945669355,
      "grad_norm": 2.671875,
      "learning_rate": 1.831442063807306e-05,
      "loss": 0.7805,
      "step": 573800
    },
    {
      "epoch": 2.01106099317625,
      "grad_norm": 3.15625,
      "learning_rate": 1.8313771609409358e-05,
      "loss": 0.7517,
      "step": 573810
    },
    {
      "epoch": 2.011096040683146,
      "grad_norm": 2.84375,
      "learning_rate": 1.8313122580745656e-05,
      "loss": 0.9127,
      "step": 573820
    },
    {
      "epoch": 2.0111310881900417,
      "grad_norm": 3.03125,
      "learning_rate": 1.8312473552081954e-05,
      "loss": 0.8728,
      "step": 573830
    },
    {
      "epoch": 2.011166135696937,
      "grad_norm": 3.15625,
      "learning_rate": 1.8311824523418252e-05,
      "loss": 0.868,
      "step": 573840
    },
    {
      "epoch": 2.011201183203833,
      "grad_norm": 2.703125,
      "learning_rate": 1.831117549475455e-05,
      "loss": 0.8196,
      "step": 573850
    },
    {
      "epoch": 2.0112362307107285,
      "grad_norm": 2.671875,
      "learning_rate": 1.8310526466090848e-05,
      "loss": 0.8045,
      "step": 573860
    },
    {
      "epoch": 2.011271278217624,
      "grad_norm": 2.96875,
      "learning_rate": 1.8309877437427146e-05,
      "loss": 0.7693,
      "step": 573870
    },
    {
      "epoch": 2.0113063257245196,
      "grad_norm": 3.1875,
      "learning_rate": 1.8309228408763447e-05,
      "loss": 0.8554,
      "step": 573880
    },
    {
      "epoch": 2.0113413732314154,
      "grad_norm": 3.125,
      "learning_rate": 1.8308579380099745e-05,
      "loss": 0.8262,
      "step": 573890
    },
    {
      "epoch": 2.0113764207383107,
      "grad_norm": 2.734375,
      "learning_rate": 1.8307930351436043e-05,
      "loss": 0.7593,
      "step": 573900
    },
    {
      "epoch": 2.0114114682452064,
      "grad_norm": 2.828125,
      "learning_rate": 1.830728132277234e-05,
      "loss": 0.7799,
      "step": 573910
    },
    {
      "epoch": 2.0114465157521018,
      "grad_norm": 3.046875,
      "learning_rate": 1.830663229410864e-05,
      "loss": 0.8446,
      "step": 573920
    },
    {
      "epoch": 2.0114815632589975,
      "grad_norm": 3.21875,
      "learning_rate": 1.8305983265444937e-05,
      "loss": 0.8288,
      "step": 573930
    },
    {
      "epoch": 2.0115166107658933,
      "grad_norm": 2.71875,
      "learning_rate": 1.8305334236781235e-05,
      "loss": 0.8356,
      "step": 573940
    },
    {
      "epoch": 2.0115516582727886,
      "grad_norm": 3.0625,
      "learning_rate": 1.8304685208117533e-05,
      "loss": 0.8146,
      "step": 573950
    },
    {
      "epoch": 2.0115867057796843,
      "grad_norm": 2.609375,
      "learning_rate": 1.8304036179453828e-05,
      "loss": 0.7565,
      "step": 573960
    },
    {
      "epoch": 2.01162175328658,
      "grad_norm": 2.625,
      "learning_rate": 1.8303387150790126e-05,
      "loss": 0.74,
      "step": 573970
    },
    {
      "epoch": 2.0116568007934754,
      "grad_norm": 2.921875,
      "learning_rate": 1.8302738122126424e-05,
      "loss": 0.716,
      "step": 573980
    },
    {
      "epoch": 2.011691848300371,
      "grad_norm": 3.28125,
      "learning_rate": 1.8302089093462722e-05,
      "loss": 0.8407,
      "step": 573990
    },
    {
      "epoch": 2.011726895807267,
      "grad_norm": 2.984375,
      "learning_rate": 1.8301440064799023e-05,
      "loss": 0.7996,
      "step": 574000
    },
    {
      "epoch": 2.0117619433141622,
      "grad_norm": 3.0,
      "learning_rate": 1.830079103613532e-05,
      "loss": 0.8246,
      "step": 574010
    },
    {
      "epoch": 2.011796990821058,
      "grad_norm": 2.90625,
      "learning_rate": 1.830014200747162e-05,
      "loss": 0.8739,
      "step": 574020
    },
    {
      "epoch": 2.0118320383279533,
      "grad_norm": 2.546875,
      "learning_rate": 1.8299492978807917e-05,
      "loss": 0.7674,
      "step": 574030
    },
    {
      "epoch": 2.011867085834849,
      "grad_norm": 3.140625,
      "learning_rate": 1.8298843950144215e-05,
      "loss": 0.762,
      "step": 574040
    },
    {
      "epoch": 2.011902133341745,
      "grad_norm": 3.109375,
      "learning_rate": 1.8298194921480513e-05,
      "loss": 0.7869,
      "step": 574050
    },
    {
      "epoch": 2.01193718084864,
      "grad_norm": 3.1875,
      "learning_rate": 1.829754589281681e-05,
      "loss": 0.8799,
      "step": 574060
    },
    {
      "epoch": 2.011972228355536,
      "grad_norm": 3.078125,
      "learning_rate": 1.829689686415311e-05,
      "loss": 0.8377,
      "step": 574070
    },
    {
      "epoch": 2.0120072758624317,
      "grad_norm": 3.0625,
      "learning_rate": 1.8296247835489407e-05,
      "loss": 0.7889,
      "step": 574080
    },
    {
      "epoch": 2.012042323369327,
      "grad_norm": 2.671875,
      "learning_rate": 1.8295598806825705e-05,
      "loss": 0.8235,
      "step": 574090
    },
    {
      "epoch": 2.0120773708762227,
      "grad_norm": 3.0,
      "learning_rate": 1.8294949778162003e-05,
      "loss": 0.8178,
      "step": 574100
    },
    {
      "epoch": 2.0121124183831185,
      "grad_norm": 2.609375,
      "learning_rate": 1.82943007494983e-05,
      "loss": 0.808,
      "step": 574110
    },
    {
      "epoch": 2.012147465890014,
      "grad_norm": 3.046875,
      "learning_rate": 1.82936517208346e-05,
      "loss": 0.8318,
      "step": 574120
    },
    {
      "epoch": 2.0121825133969096,
      "grad_norm": 3.46875,
      "learning_rate": 1.82930026921709e-05,
      "loss": 0.8446,
      "step": 574130
    },
    {
      "epoch": 2.0122175609038053,
      "grad_norm": 2.640625,
      "learning_rate": 1.82923536635072e-05,
      "loss": 0.7181,
      "step": 574140
    },
    {
      "epoch": 2.0122526084107006,
      "grad_norm": 2.71875,
      "learning_rate": 1.8291704634843493e-05,
      "loss": 0.7696,
      "step": 574150
    },
    {
      "epoch": 2.0122876559175964,
      "grad_norm": 2.453125,
      "learning_rate": 1.829105560617979e-05,
      "loss": 0.8828,
      "step": 574160
    },
    {
      "epoch": 2.0123227034244917,
      "grad_norm": 2.765625,
      "learning_rate": 1.829040657751609e-05,
      "loss": 0.7183,
      "step": 574170
    },
    {
      "epoch": 2.0123577509313875,
      "grad_norm": 2.65625,
      "learning_rate": 1.8289757548852387e-05,
      "loss": 0.8679,
      "step": 574180
    },
    {
      "epoch": 2.0123927984382832,
      "grad_norm": 2.921875,
      "learning_rate": 1.8289108520188685e-05,
      "loss": 0.8576,
      "step": 574190
    },
    {
      "epoch": 2.0124278459451785,
      "grad_norm": 2.875,
      "learning_rate": 1.8288459491524983e-05,
      "loss": 0.7171,
      "step": 574200
    },
    {
      "epoch": 2.0124628934520743,
      "grad_norm": 2.515625,
      "learning_rate": 1.828781046286128e-05,
      "loss": 0.7825,
      "step": 574210
    },
    {
      "epoch": 2.01249794095897,
      "grad_norm": 3.15625,
      "learning_rate": 1.828716143419758e-05,
      "loss": 0.7966,
      "step": 574220
    },
    {
      "epoch": 2.0125329884658654,
      "grad_norm": 3.0625,
      "learning_rate": 1.8286512405533877e-05,
      "loss": 0.8213,
      "step": 574230
    },
    {
      "epoch": 2.012568035972761,
      "grad_norm": 2.953125,
      "learning_rate": 1.8285863376870175e-05,
      "loss": 0.7711,
      "step": 574240
    },
    {
      "epoch": 2.012603083479657,
      "grad_norm": 3.125,
      "learning_rate": 1.8285214348206477e-05,
      "loss": 0.8074,
      "step": 574250
    },
    {
      "epoch": 2.012638130986552,
      "grad_norm": 2.734375,
      "learning_rate": 1.8284565319542775e-05,
      "loss": 0.8622,
      "step": 574260
    },
    {
      "epoch": 2.012673178493448,
      "grad_norm": 2.984375,
      "learning_rate": 1.8283916290879073e-05,
      "loss": 0.7878,
      "step": 574270
    },
    {
      "epoch": 2.0127082260003433,
      "grad_norm": 2.890625,
      "learning_rate": 1.828326726221537e-05,
      "loss": 0.7875,
      "step": 574280
    },
    {
      "epoch": 2.012743273507239,
      "grad_norm": 2.71875,
      "learning_rate": 1.828261823355167e-05,
      "loss": 0.8204,
      "step": 574290
    },
    {
      "epoch": 2.012778321014135,
      "grad_norm": 3.03125,
      "learning_rate": 1.8281969204887967e-05,
      "loss": 0.7917,
      "step": 574300
    },
    {
      "epoch": 2.01281336852103,
      "grad_norm": 2.984375,
      "learning_rate": 1.8281320176224265e-05,
      "loss": 0.8392,
      "step": 574310
    },
    {
      "epoch": 2.012848416027926,
      "grad_norm": 3.078125,
      "learning_rate": 1.8280671147560563e-05,
      "loss": 0.8901,
      "step": 574320
    },
    {
      "epoch": 2.0128834635348216,
      "grad_norm": 2.6875,
      "learning_rate": 1.8280022118896857e-05,
      "loss": 0.8624,
      "step": 574330
    },
    {
      "epoch": 2.012918511041717,
      "grad_norm": 2.65625,
      "learning_rate": 1.8279373090233155e-05,
      "loss": 0.8002,
      "step": 574340
    },
    {
      "epoch": 2.0129535585486127,
      "grad_norm": 2.84375,
      "learning_rate": 1.8278724061569453e-05,
      "loss": 0.8157,
      "step": 574350
    },
    {
      "epoch": 2.0129886060555084,
      "grad_norm": 2.53125,
      "learning_rate": 1.8278075032905755e-05,
      "loss": 0.8205,
      "step": 574360
    },
    {
      "epoch": 2.0130236535624038,
      "grad_norm": 2.84375,
      "learning_rate": 1.8277426004242053e-05,
      "loss": 0.8132,
      "step": 574370
    },
    {
      "epoch": 2.0130587010692995,
      "grad_norm": 3.09375,
      "learning_rate": 1.827677697557835e-05,
      "loss": 0.8967,
      "step": 574380
    },
    {
      "epoch": 2.013093748576195,
      "grad_norm": 3.03125,
      "learning_rate": 1.827612794691465e-05,
      "loss": 0.7798,
      "step": 574390
    },
    {
      "epoch": 2.0131287960830906,
      "grad_norm": 3.078125,
      "learning_rate": 1.8275478918250947e-05,
      "loss": 0.8454,
      "step": 574400
    },
    {
      "epoch": 2.0131638435899863,
      "grad_norm": 3.21875,
      "learning_rate": 1.8274829889587245e-05,
      "loss": 0.944,
      "step": 574410
    },
    {
      "epoch": 2.0131988910968817,
      "grad_norm": 1.875,
      "learning_rate": 1.8274180860923543e-05,
      "loss": 0.7968,
      "step": 574420
    },
    {
      "epoch": 2.0132339386037774,
      "grad_norm": 2.765625,
      "learning_rate": 1.827353183225984e-05,
      "loss": 0.8202,
      "step": 574430
    },
    {
      "epoch": 2.013268986110673,
      "grad_norm": 3.0,
      "learning_rate": 1.827288280359614e-05,
      "loss": 0.7682,
      "step": 574440
    },
    {
      "epoch": 2.0133040336175685,
      "grad_norm": 2.5625,
      "learning_rate": 1.8272233774932437e-05,
      "loss": 0.8317,
      "step": 574450
    },
    {
      "epoch": 2.0133390811244642,
      "grad_norm": 3.140625,
      "learning_rate": 1.8271584746268735e-05,
      "loss": 0.8615,
      "step": 574460
    },
    {
      "epoch": 2.01337412863136,
      "grad_norm": 2.921875,
      "learning_rate": 1.8270935717605033e-05,
      "loss": 0.7241,
      "step": 574470
    },
    {
      "epoch": 2.0134091761382553,
      "grad_norm": 2.75,
      "learning_rate": 1.827028668894133e-05,
      "loss": 0.7943,
      "step": 574480
    },
    {
      "epoch": 2.013444223645151,
      "grad_norm": 2.671875,
      "learning_rate": 1.826963766027763e-05,
      "loss": 0.8453,
      "step": 574490
    },
    {
      "epoch": 2.0134792711520464,
      "grad_norm": 2.796875,
      "learning_rate": 1.826898863161393e-05,
      "loss": 0.7122,
      "step": 574500
    },
    {
      "epoch": 2.013514318658942,
      "grad_norm": 2.625,
      "learning_rate": 1.8268339602950228e-05,
      "loss": 0.8023,
      "step": 574510
    },
    {
      "epoch": 2.013549366165838,
      "grad_norm": 3.15625,
      "learning_rate": 1.8267690574286523e-05,
      "loss": 0.8246,
      "step": 574520
    },
    {
      "epoch": 2.013584413672733,
      "grad_norm": 2.703125,
      "learning_rate": 1.826704154562282e-05,
      "loss": 0.8395,
      "step": 574530
    },
    {
      "epoch": 2.013619461179629,
      "grad_norm": 3.015625,
      "learning_rate": 1.826639251695912e-05,
      "loss": 0.7875,
      "step": 574540
    },
    {
      "epoch": 2.0136545086865247,
      "grad_norm": 3.265625,
      "learning_rate": 1.8265743488295417e-05,
      "loss": 0.8708,
      "step": 574550
    },
    {
      "epoch": 2.01368955619342,
      "grad_norm": 3.296875,
      "learning_rate": 1.8265094459631715e-05,
      "loss": 0.8345,
      "step": 574560
    },
    {
      "epoch": 2.013724603700316,
      "grad_norm": 2.875,
      "learning_rate": 1.8264445430968013e-05,
      "loss": 0.7573,
      "step": 574570
    },
    {
      "epoch": 2.0137596512072116,
      "grad_norm": 2.78125,
      "learning_rate": 1.826379640230431e-05,
      "loss": 0.8152,
      "step": 574580
    },
    {
      "epoch": 2.013794698714107,
      "grad_norm": 2.6875,
      "learning_rate": 1.826314737364061e-05,
      "loss": 0.8556,
      "step": 574590
    },
    {
      "epoch": 2.0138297462210026,
      "grad_norm": 2.9375,
      "learning_rate": 1.8262498344976907e-05,
      "loss": 0.7784,
      "step": 574600
    },
    {
      "epoch": 2.013864793727898,
      "grad_norm": 2.59375,
      "learning_rate": 1.8261849316313208e-05,
      "loss": 0.8533,
      "step": 574610
    },
    {
      "epoch": 2.0138998412347937,
      "grad_norm": 2.765625,
      "learning_rate": 1.8261200287649506e-05,
      "loss": 0.8514,
      "step": 574620
    },
    {
      "epoch": 2.0139348887416895,
      "grad_norm": 3.015625,
      "learning_rate": 1.8260551258985804e-05,
      "loss": 0.9189,
      "step": 574630
    },
    {
      "epoch": 2.013969936248585,
      "grad_norm": 2.84375,
      "learning_rate": 1.8259902230322102e-05,
      "loss": 0.8072,
      "step": 574640
    },
    {
      "epoch": 2.0140049837554805,
      "grad_norm": 3.078125,
      "learning_rate": 1.82592532016584e-05,
      "loss": 0.7737,
      "step": 574650
    },
    {
      "epoch": 2.0140400312623763,
      "grad_norm": 2.5625,
      "learning_rate": 1.8258604172994698e-05,
      "loss": 0.7783,
      "step": 574660
    },
    {
      "epoch": 2.0140750787692716,
      "grad_norm": 2.546875,
      "learning_rate": 1.8257955144330996e-05,
      "loss": 0.7876,
      "step": 574670
    },
    {
      "epoch": 2.0141101262761674,
      "grad_norm": 2.921875,
      "learning_rate": 1.8257306115667294e-05,
      "loss": 0.7816,
      "step": 574680
    },
    {
      "epoch": 2.014145173783063,
      "grad_norm": 3.421875,
      "learning_rate": 1.8256657087003592e-05,
      "loss": 0.8012,
      "step": 574690
    },
    {
      "epoch": 2.0141802212899584,
      "grad_norm": 3.015625,
      "learning_rate": 1.825600805833989e-05,
      "loss": 0.8347,
      "step": 574700
    },
    {
      "epoch": 2.014215268796854,
      "grad_norm": 2.921875,
      "learning_rate": 1.8255359029676185e-05,
      "loss": 0.7597,
      "step": 574710
    },
    {
      "epoch": 2.01425031630375,
      "grad_norm": 2.5,
      "learning_rate": 1.8254710001012483e-05,
      "loss": 0.7567,
      "step": 574720
    },
    {
      "epoch": 2.0142853638106453,
      "grad_norm": 3.453125,
      "learning_rate": 1.8254060972348784e-05,
      "loss": 0.8953,
      "step": 574730
    },
    {
      "epoch": 2.014320411317541,
      "grad_norm": 2.703125,
      "learning_rate": 1.8253411943685082e-05,
      "loss": 0.8067,
      "step": 574740
    },
    {
      "epoch": 2.0143554588244363,
      "grad_norm": 2.8125,
      "learning_rate": 1.825276291502138e-05,
      "loss": 0.8018,
      "step": 574750
    },
    {
      "epoch": 2.014390506331332,
      "grad_norm": 3.1875,
      "learning_rate": 1.8252113886357678e-05,
      "loss": 0.8083,
      "step": 574760
    },
    {
      "epoch": 2.014425553838228,
      "grad_norm": 2.5,
      "learning_rate": 1.8251464857693976e-05,
      "loss": 0.8098,
      "step": 574770
    },
    {
      "epoch": 2.014460601345123,
      "grad_norm": 3.046875,
      "learning_rate": 1.8250815829030274e-05,
      "loss": 0.8936,
      "step": 574780
    },
    {
      "epoch": 2.014495648852019,
      "grad_norm": 2.734375,
      "learning_rate": 1.8250166800366572e-05,
      "loss": 0.8352,
      "step": 574790
    },
    {
      "epoch": 2.0145306963589147,
      "grad_norm": 3.1875,
      "learning_rate": 1.824951777170287e-05,
      "loss": 0.9125,
      "step": 574800
    },
    {
      "epoch": 2.01456574386581,
      "grad_norm": 3.03125,
      "learning_rate": 1.8248868743039168e-05,
      "loss": 0.8622,
      "step": 574810
    },
    {
      "epoch": 2.0146007913727058,
      "grad_norm": 2.921875,
      "learning_rate": 1.8248219714375466e-05,
      "loss": 0.8155,
      "step": 574820
    },
    {
      "epoch": 2.0146358388796015,
      "grad_norm": 3.03125,
      "learning_rate": 1.8247570685711764e-05,
      "loss": 0.7459,
      "step": 574830
    },
    {
      "epoch": 2.014670886386497,
      "grad_norm": 2.984375,
      "learning_rate": 1.8246921657048062e-05,
      "loss": 0.804,
      "step": 574840
    },
    {
      "epoch": 2.0147059338933926,
      "grad_norm": 2.828125,
      "learning_rate": 1.824627262838436e-05,
      "loss": 0.8264,
      "step": 574850
    },
    {
      "epoch": 2.014740981400288,
      "grad_norm": 2.859375,
      "learning_rate": 1.8245623599720658e-05,
      "loss": 0.9455,
      "step": 574860
    },
    {
      "epoch": 2.0147760289071837,
      "grad_norm": 2.953125,
      "learning_rate": 1.824497457105696e-05,
      "loss": 0.8076,
      "step": 574870
    },
    {
      "epoch": 2.0148110764140794,
      "grad_norm": 2.46875,
      "learning_rate": 1.8244325542393258e-05,
      "loss": 0.7692,
      "step": 574880
    },
    {
      "epoch": 2.0148461239209747,
      "grad_norm": 2.75,
      "learning_rate": 1.8243676513729556e-05,
      "loss": 0.8085,
      "step": 574890
    },
    {
      "epoch": 2.0148811714278705,
      "grad_norm": 2.609375,
      "learning_rate": 1.824302748506585e-05,
      "loss": 0.872,
      "step": 574900
    },
    {
      "epoch": 2.0149162189347662,
      "grad_norm": 3.09375,
      "learning_rate": 1.8242378456402148e-05,
      "loss": 0.7736,
      "step": 574910
    },
    {
      "epoch": 2.0149512664416616,
      "grad_norm": 2.875,
      "learning_rate": 1.8241729427738446e-05,
      "loss": 0.9086,
      "step": 574920
    },
    {
      "epoch": 2.0149863139485573,
      "grad_norm": 2.984375,
      "learning_rate": 1.8241080399074744e-05,
      "loss": 0.8319,
      "step": 574930
    },
    {
      "epoch": 2.015021361455453,
      "grad_norm": 2.921875,
      "learning_rate": 1.8240431370411042e-05,
      "loss": 0.8143,
      "step": 574940
    },
    {
      "epoch": 2.0150564089623484,
      "grad_norm": 2.4375,
      "learning_rate": 1.823978234174734e-05,
      "loss": 0.782,
      "step": 574950
    },
    {
      "epoch": 2.015091456469244,
      "grad_norm": 2.8125,
      "learning_rate": 1.8239133313083638e-05,
      "loss": 0.8223,
      "step": 574960
    },
    {
      "epoch": 2.0151265039761395,
      "grad_norm": 3.21875,
      "learning_rate": 1.8238484284419936e-05,
      "loss": 0.7866,
      "step": 574970
    },
    {
      "epoch": 2.015161551483035,
      "grad_norm": 2.984375,
      "learning_rate": 1.8237835255756238e-05,
      "loss": 0.883,
      "step": 574980
    },
    {
      "epoch": 2.015196598989931,
      "grad_norm": 3.140625,
      "learning_rate": 1.8237186227092536e-05,
      "loss": 0.8689,
      "step": 574990
    },
    {
      "epoch": 2.0152316464968263,
      "grad_norm": 3.0625,
      "learning_rate": 1.8236537198428834e-05,
      "loss": 0.7943,
      "step": 575000
    },
    {
      "epoch": 2.0152316464968263,
      "eval_loss": 0.7698532342910767,
      "eval_runtime": 562.5953,
      "eval_samples_per_second": 676.216,
      "eval_steps_per_second": 56.351,
      "step": 575000
    },
    {
      "epoch": 2.015266694003722,
      "grad_norm": 2.90625,
      "learning_rate": 1.823588816976513e-05,
      "loss": 0.9308,
      "step": 575010
    },
    {
      "epoch": 2.015301741510618,
      "grad_norm": 2.859375,
      "learning_rate": 1.823523914110143e-05,
      "loss": 0.8094,
      "step": 575020
    },
    {
      "epoch": 2.015336789017513,
      "grad_norm": 2.96875,
      "learning_rate": 1.8234590112437728e-05,
      "loss": 0.855,
      "step": 575030
    },
    {
      "epoch": 2.015371836524409,
      "grad_norm": 3.234375,
      "learning_rate": 1.8233941083774026e-05,
      "loss": 0.9022,
      "step": 575040
    },
    {
      "epoch": 2.0154068840313046,
      "grad_norm": 2.484375,
      "learning_rate": 1.8233292055110324e-05,
      "loss": 0.7414,
      "step": 575050
    },
    {
      "epoch": 2.0154419315382,
      "grad_norm": 3.15625,
      "learning_rate": 1.823264302644662e-05,
      "loss": 0.847,
      "step": 575060
    },
    {
      "epoch": 2.0154769790450957,
      "grad_norm": 2.796875,
      "learning_rate": 1.823199399778292e-05,
      "loss": 0.9026,
      "step": 575070
    },
    {
      "epoch": 2.015512026551991,
      "grad_norm": 3.046875,
      "learning_rate": 1.8231344969119218e-05,
      "loss": 0.8187,
      "step": 575080
    },
    {
      "epoch": 2.015547074058887,
      "grad_norm": 2.65625,
      "learning_rate": 1.8230695940455516e-05,
      "loss": 0.8151,
      "step": 575090
    },
    {
      "epoch": 2.0155821215657825,
      "grad_norm": 3.234375,
      "learning_rate": 1.8230046911791814e-05,
      "loss": 0.8689,
      "step": 575100
    },
    {
      "epoch": 2.015617169072678,
      "grad_norm": 2.765625,
      "learning_rate": 1.822939788312811e-05,
      "loss": 0.8601,
      "step": 575110
    },
    {
      "epoch": 2.0156522165795736,
      "grad_norm": 2.921875,
      "learning_rate": 1.822874885446441e-05,
      "loss": 0.7845,
      "step": 575120
    },
    {
      "epoch": 2.0156872640864694,
      "grad_norm": 2.859375,
      "learning_rate": 1.8228099825800708e-05,
      "loss": 0.8374,
      "step": 575130
    },
    {
      "epoch": 2.0157223115933647,
      "grad_norm": 2.90625,
      "learning_rate": 1.8227450797137006e-05,
      "loss": 0.785,
      "step": 575140
    },
    {
      "epoch": 2.0157573591002604,
      "grad_norm": 2.734375,
      "learning_rate": 1.8226801768473304e-05,
      "loss": 0.7729,
      "step": 575150
    },
    {
      "epoch": 2.015792406607156,
      "grad_norm": 2.59375,
      "learning_rate": 1.82261527398096e-05,
      "loss": 0.7928,
      "step": 575160
    },
    {
      "epoch": 2.0158274541140515,
      "grad_norm": 2.734375,
      "learning_rate": 1.82255037111459e-05,
      "loss": 0.7578,
      "step": 575170
    },
    {
      "epoch": 2.0158625016209473,
      "grad_norm": 2.703125,
      "learning_rate": 1.8224854682482198e-05,
      "loss": 0.8038,
      "step": 575180
    },
    {
      "epoch": 2.0158975491278426,
      "grad_norm": 3.359375,
      "learning_rate": 1.8224205653818496e-05,
      "loss": 0.8239,
      "step": 575190
    },
    {
      "epoch": 2.0159325966347383,
      "grad_norm": 2.828125,
      "learning_rate": 1.8223556625154794e-05,
      "loss": 0.8364,
      "step": 575200
    },
    {
      "epoch": 2.015967644141634,
      "grad_norm": 3.21875,
      "learning_rate": 1.822290759649109e-05,
      "loss": 0.7733,
      "step": 575210
    },
    {
      "epoch": 2.0160026916485294,
      "grad_norm": 3.40625,
      "learning_rate": 1.822225856782739e-05,
      "loss": 0.8102,
      "step": 575220
    },
    {
      "epoch": 2.016037739155425,
      "grad_norm": 3.015625,
      "learning_rate": 1.822160953916369e-05,
      "loss": 0.8189,
      "step": 575230
    },
    {
      "epoch": 2.016072786662321,
      "grad_norm": 2.5625,
      "learning_rate": 1.822096051049999e-05,
      "loss": 0.8249,
      "step": 575240
    },
    {
      "epoch": 2.0161078341692162,
      "grad_norm": 2.5625,
      "learning_rate": 1.8220311481836287e-05,
      "loss": 0.7324,
      "step": 575250
    },
    {
      "epoch": 2.016142881676112,
      "grad_norm": 2.859375,
      "learning_rate": 1.8219662453172585e-05,
      "loss": 0.8077,
      "step": 575260
    },
    {
      "epoch": 2.0161779291830078,
      "grad_norm": 2.65625,
      "learning_rate": 1.8219013424508883e-05,
      "loss": 0.7897,
      "step": 575270
    },
    {
      "epoch": 2.016212976689903,
      "grad_norm": 2.703125,
      "learning_rate": 1.8218364395845178e-05,
      "loss": 0.8518,
      "step": 575280
    },
    {
      "epoch": 2.016248024196799,
      "grad_norm": 2.46875,
      "learning_rate": 1.8217715367181476e-05,
      "loss": 0.854,
      "step": 575290
    },
    {
      "epoch": 2.016283071703694,
      "grad_norm": 3.09375,
      "learning_rate": 1.8217066338517774e-05,
      "loss": 0.8095,
      "step": 575300
    },
    {
      "epoch": 2.01631811921059,
      "grad_norm": 2.6875,
      "learning_rate": 1.821641730985407e-05,
      "loss": 0.7588,
      "step": 575310
    },
    {
      "epoch": 2.0163531667174857,
      "grad_norm": 2.5,
      "learning_rate": 1.821576828119037e-05,
      "loss": 0.8001,
      "step": 575320
    },
    {
      "epoch": 2.016388214224381,
      "grad_norm": 3.25,
      "learning_rate": 1.8215119252526668e-05,
      "loss": 0.9037,
      "step": 575330
    },
    {
      "epoch": 2.0164232617312767,
      "grad_norm": 2.609375,
      "learning_rate": 1.8214470223862966e-05,
      "loss": 0.8737,
      "step": 575340
    },
    {
      "epoch": 2.0164583092381725,
      "grad_norm": 2.75,
      "learning_rate": 1.8213821195199267e-05,
      "loss": 0.8682,
      "step": 575350
    },
    {
      "epoch": 2.016493356745068,
      "grad_norm": 3.015625,
      "learning_rate": 1.8213172166535565e-05,
      "loss": 0.7731,
      "step": 575360
    },
    {
      "epoch": 2.0165284042519636,
      "grad_norm": 2.671875,
      "learning_rate": 1.8212523137871863e-05,
      "loss": 0.7634,
      "step": 575370
    },
    {
      "epoch": 2.0165634517588593,
      "grad_norm": 3.09375,
      "learning_rate": 1.821187410920816e-05,
      "loss": 0.8407,
      "step": 575380
    },
    {
      "epoch": 2.0165984992657546,
      "grad_norm": 3.484375,
      "learning_rate": 1.821122508054446e-05,
      "loss": 0.8665,
      "step": 575390
    },
    {
      "epoch": 2.0166335467726504,
      "grad_norm": 2.953125,
      "learning_rate": 1.8210576051880757e-05,
      "loss": 0.8663,
      "step": 575400
    },
    {
      "epoch": 2.0166685942795457,
      "grad_norm": 2.984375,
      "learning_rate": 1.8209927023217055e-05,
      "loss": 0.7929,
      "step": 575410
    },
    {
      "epoch": 2.0167036417864415,
      "grad_norm": 2.625,
      "learning_rate": 1.8209277994553353e-05,
      "loss": 0.7921,
      "step": 575420
    },
    {
      "epoch": 2.016738689293337,
      "grad_norm": 2.765625,
      "learning_rate": 1.820862896588965e-05,
      "loss": 0.8022,
      "step": 575430
    },
    {
      "epoch": 2.0167737368002325,
      "grad_norm": 2.765625,
      "learning_rate": 1.820797993722595e-05,
      "loss": 0.8802,
      "step": 575440
    },
    {
      "epoch": 2.0168087843071283,
      "grad_norm": 2.671875,
      "learning_rate": 1.8207330908562247e-05,
      "loss": 0.783,
      "step": 575450
    },
    {
      "epoch": 2.016843831814024,
      "grad_norm": 3.0625,
      "learning_rate": 1.8206681879898545e-05,
      "loss": 0.8174,
      "step": 575460
    },
    {
      "epoch": 2.0168788793209194,
      "grad_norm": 3.03125,
      "learning_rate": 1.8206032851234843e-05,
      "loss": 0.8496,
      "step": 575470
    },
    {
      "epoch": 2.016913926827815,
      "grad_norm": 2.59375,
      "learning_rate": 1.820538382257114e-05,
      "loss": 0.841,
      "step": 575480
    },
    {
      "epoch": 2.016948974334711,
      "grad_norm": 3.203125,
      "learning_rate": 1.820473479390744e-05,
      "loss": 0.7873,
      "step": 575490
    },
    {
      "epoch": 2.016984021841606,
      "grad_norm": 3.0,
      "learning_rate": 1.8204085765243737e-05,
      "loss": 0.8794,
      "step": 575500
    },
    {
      "epoch": 2.017019069348502,
      "grad_norm": 2.921875,
      "learning_rate": 1.8203436736580035e-05,
      "loss": 0.7625,
      "step": 575510
    },
    {
      "epoch": 2.0170541168553977,
      "grad_norm": 2.75,
      "learning_rate": 1.8202787707916333e-05,
      "loss": 0.8331,
      "step": 575520
    },
    {
      "epoch": 2.017089164362293,
      "grad_norm": 2.96875,
      "learning_rate": 1.820213867925263e-05,
      "loss": 0.7655,
      "step": 575530
    },
    {
      "epoch": 2.0171242118691888,
      "grad_norm": 3.125,
      "learning_rate": 1.820148965058893e-05,
      "loss": 0.7695,
      "step": 575540
    },
    {
      "epoch": 2.017159259376084,
      "grad_norm": 2.859375,
      "learning_rate": 1.8200840621925227e-05,
      "loss": 0.8592,
      "step": 575550
    },
    {
      "epoch": 2.01719430688298,
      "grad_norm": 2.84375,
      "learning_rate": 1.8200191593261525e-05,
      "loss": 0.7671,
      "step": 575560
    },
    {
      "epoch": 2.0172293543898756,
      "grad_norm": 3.0,
      "learning_rate": 1.8199542564597823e-05,
      "loss": 0.8571,
      "step": 575570
    },
    {
      "epoch": 2.017264401896771,
      "grad_norm": 2.6875,
      "learning_rate": 1.819889353593412e-05,
      "loss": 0.8255,
      "step": 575580
    },
    {
      "epoch": 2.0172994494036667,
      "grad_norm": 2.96875,
      "learning_rate": 1.819824450727042e-05,
      "loss": 0.8303,
      "step": 575590
    },
    {
      "epoch": 2.0173344969105624,
      "grad_norm": 3.15625,
      "learning_rate": 1.819759547860672e-05,
      "loss": 0.8376,
      "step": 575600
    },
    {
      "epoch": 2.0173695444174577,
      "grad_norm": 2.671875,
      "learning_rate": 1.819694644994302e-05,
      "loss": 0.7596,
      "step": 575610
    },
    {
      "epoch": 2.0174045919243535,
      "grad_norm": 2.90625,
      "learning_rate": 1.8196297421279316e-05,
      "loss": 0.7662,
      "step": 575620
    },
    {
      "epoch": 2.0174396394312493,
      "grad_norm": 2.734375,
      "learning_rate": 1.8195648392615614e-05,
      "loss": 0.7518,
      "step": 575630
    },
    {
      "epoch": 2.0174746869381446,
      "grad_norm": 2.703125,
      "learning_rate": 1.8194999363951912e-05,
      "loss": 0.8819,
      "step": 575640
    },
    {
      "epoch": 2.0175097344450403,
      "grad_norm": 2.90625,
      "learning_rate": 1.8194350335288207e-05,
      "loss": 0.7478,
      "step": 575650
    },
    {
      "epoch": 2.0175447819519357,
      "grad_norm": 3.109375,
      "learning_rate": 1.8193701306624505e-05,
      "loss": 0.7802,
      "step": 575660
    },
    {
      "epoch": 2.0175798294588314,
      "grad_norm": 2.75,
      "learning_rate": 1.8193052277960803e-05,
      "loss": 0.8113,
      "step": 575670
    },
    {
      "epoch": 2.017614876965727,
      "grad_norm": 3.109375,
      "learning_rate": 1.81924032492971e-05,
      "loss": 0.929,
      "step": 575680
    },
    {
      "epoch": 2.0176499244726225,
      "grad_norm": 3.3125,
      "learning_rate": 1.81917542206334e-05,
      "loss": 0.8962,
      "step": 575690
    },
    {
      "epoch": 2.0176849719795182,
      "grad_norm": 3.171875,
      "learning_rate": 1.8191105191969697e-05,
      "loss": 0.8505,
      "step": 575700
    },
    {
      "epoch": 2.017720019486414,
      "grad_norm": 2.796875,
      "learning_rate": 1.8190456163306e-05,
      "loss": 0.7311,
      "step": 575710
    },
    {
      "epoch": 2.0177550669933093,
      "grad_norm": 3.171875,
      "learning_rate": 1.8189807134642296e-05,
      "loss": 0.8658,
      "step": 575720
    },
    {
      "epoch": 2.017790114500205,
      "grad_norm": 2.796875,
      "learning_rate": 1.8189158105978594e-05,
      "loss": 0.8588,
      "step": 575730
    },
    {
      "epoch": 2.017825162007101,
      "grad_norm": 2.9375,
      "learning_rate": 1.8188509077314892e-05,
      "loss": 0.8096,
      "step": 575740
    },
    {
      "epoch": 2.017860209513996,
      "grad_norm": 2.984375,
      "learning_rate": 1.818786004865119e-05,
      "loss": 0.8515,
      "step": 575750
    },
    {
      "epoch": 2.017895257020892,
      "grad_norm": 3.09375,
      "learning_rate": 1.818721101998749e-05,
      "loss": 0.8728,
      "step": 575760
    },
    {
      "epoch": 2.017930304527787,
      "grad_norm": 3.078125,
      "learning_rate": 1.8186561991323786e-05,
      "loss": 0.8047,
      "step": 575770
    },
    {
      "epoch": 2.017965352034683,
      "grad_norm": 3.015625,
      "learning_rate": 1.8185912962660084e-05,
      "loss": 0.7878,
      "step": 575780
    },
    {
      "epoch": 2.0180003995415787,
      "grad_norm": 3.09375,
      "learning_rate": 1.8185263933996382e-05,
      "loss": 0.8071,
      "step": 575790
    },
    {
      "epoch": 2.018035447048474,
      "grad_norm": 2.625,
      "learning_rate": 1.818461490533268e-05,
      "loss": 0.7819,
      "step": 575800
    },
    {
      "epoch": 2.01807049455537,
      "grad_norm": 3.609375,
      "learning_rate": 1.818396587666898e-05,
      "loss": 0.768,
      "step": 575810
    },
    {
      "epoch": 2.0181055420622656,
      "grad_norm": 2.5625,
      "learning_rate": 1.8183316848005276e-05,
      "loss": 0.785,
      "step": 575820
    },
    {
      "epoch": 2.018140589569161,
      "grad_norm": 2.78125,
      "learning_rate": 1.8182667819341574e-05,
      "loss": 0.82,
      "step": 575830
    },
    {
      "epoch": 2.0181756370760566,
      "grad_norm": 3.046875,
      "learning_rate": 1.8182018790677872e-05,
      "loss": 0.8668,
      "step": 575840
    },
    {
      "epoch": 2.0182106845829524,
      "grad_norm": 2.453125,
      "learning_rate": 1.818136976201417e-05,
      "loss": 0.7775,
      "step": 575850
    },
    {
      "epoch": 2.0182457320898477,
      "grad_norm": 2.53125,
      "learning_rate": 1.818072073335047e-05,
      "loss": 0.8216,
      "step": 575860
    },
    {
      "epoch": 2.0182807795967435,
      "grad_norm": 3.125,
      "learning_rate": 1.8180071704686766e-05,
      "loss": 0.8222,
      "step": 575870
    },
    {
      "epoch": 2.0183158271036388,
      "grad_norm": 3.5625,
      "learning_rate": 1.8179422676023064e-05,
      "loss": 0.9026,
      "step": 575880
    },
    {
      "epoch": 2.0183508746105345,
      "grad_norm": 3.09375,
      "learning_rate": 1.8178773647359362e-05,
      "loss": 0.8758,
      "step": 575890
    },
    {
      "epoch": 2.0183859221174303,
      "grad_norm": 2.734375,
      "learning_rate": 1.817812461869566e-05,
      "loss": 0.7967,
      "step": 575900
    },
    {
      "epoch": 2.0184209696243256,
      "grad_norm": 2.796875,
      "learning_rate": 1.817747559003196e-05,
      "loss": 0.8344,
      "step": 575910
    },
    {
      "epoch": 2.0184560171312214,
      "grad_norm": 2.984375,
      "learning_rate": 1.8176826561368256e-05,
      "loss": 0.7674,
      "step": 575920
    },
    {
      "epoch": 2.018491064638117,
      "grad_norm": 2.640625,
      "learning_rate": 1.8176177532704554e-05,
      "loss": 0.7905,
      "step": 575930
    },
    {
      "epoch": 2.0185261121450124,
      "grad_norm": 2.984375,
      "learning_rate": 1.8175528504040852e-05,
      "loss": 0.8292,
      "step": 575940
    },
    {
      "epoch": 2.018561159651908,
      "grad_norm": 2.703125,
      "learning_rate": 1.817487947537715e-05,
      "loss": 0.85,
      "step": 575950
    },
    {
      "epoch": 2.018596207158804,
      "grad_norm": 2.90625,
      "learning_rate": 1.817423044671345e-05,
      "loss": 0.7402,
      "step": 575960
    },
    {
      "epoch": 2.0186312546656993,
      "grad_norm": 2.9375,
      "learning_rate": 1.817358141804975e-05,
      "loss": 0.8888,
      "step": 575970
    },
    {
      "epoch": 2.018666302172595,
      "grad_norm": 2.84375,
      "learning_rate": 1.8172932389386048e-05,
      "loss": 0.7803,
      "step": 575980
    },
    {
      "epoch": 2.0187013496794903,
      "grad_norm": 2.671875,
      "learning_rate": 1.8172283360722346e-05,
      "loss": 0.7955,
      "step": 575990
    },
    {
      "epoch": 2.018736397186386,
      "grad_norm": 2.984375,
      "learning_rate": 1.8171634332058644e-05,
      "loss": 0.8789,
      "step": 576000
    },
    {
      "epoch": 2.018771444693282,
      "grad_norm": 3.015625,
      "learning_rate": 1.8170985303394942e-05,
      "loss": 0.8459,
      "step": 576010
    },
    {
      "epoch": 2.018806492200177,
      "grad_norm": 2.828125,
      "learning_rate": 1.817033627473124e-05,
      "loss": 0.7952,
      "step": 576020
    },
    {
      "epoch": 2.018841539707073,
      "grad_norm": 2.953125,
      "learning_rate": 1.8169687246067534e-05,
      "loss": 0.8358,
      "step": 576030
    },
    {
      "epoch": 2.0188765872139687,
      "grad_norm": 2.890625,
      "learning_rate": 1.8169038217403832e-05,
      "loss": 0.8607,
      "step": 576040
    },
    {
      "epoch": 2.018911634720864,
      "grad_norm": 3.03125,
      "learning_rate": 1.816838918874013e-05,
      "loss": 0.8084,
      "step": 576050
    },
    {
      "epoch": 2.0189466822277597,
      "grad_norm": 2.9375,
      "learning_rate": 1.816774016007643e-05,
      "loss": 0.7983,
      "step": 576060
    },
    {
      "epoch": 2.0189817297346555,
      "grad_norm": 2.96875,
      "learning_rate": 1.8167091131412726e-05,
      "loss": 0.7539,
      "step": 576070
    },
    {
      "epoch": 2.019016777241551,
      "grad_norm": 2.75,
      "learning_rate": 1.8166442102749028e-05,
      "loss": 0.8422,
      "step": 576080
    },
    {
      "epoch": 2.0190518247484466,
      "grad_norm": 3.03125,
      "learning_rate": 1.8165793074085326e-05,
      "loss": 0.9257,
      "step": 576090
    },
    {
      "epoch": 2.0190868722553423,
      "grad_norm": 2.75,
      "learning_rate": 1.8165144045421624e-05,
      "loss": 0.8378,
      "step": 576100
    },
    {
      "epoch": 2.0191219197622376,
      "grad_norm": 3.171875,
      "learning_rate": 1.8164495016757922e-05,
      "loss": 0.7941,
      "step": 576110
    },
    {
      "epoch": 2.0191569672691334,
      "grad_norm": 2.484375,
      "learning_rate": 1.816384598809422e-05,
      "loss": 0.765,
      "step": 576120
    },
    {
      "epoch": 2.0191920147760287,
      "grad_norm": 3.109375,
      "learning_rate": 1.8163196959430518e-05,
      "loss": 0.8004,
      "step": 576130
    },
    {
      "epoch": 2.0192270622829245,
      "grad_norm": 3.46875,
      "learning_rate": 1.8162547930766816e-05,
      "loss": 0.8348,
      "step": 576140
    },
    {
      "epoch": 2.0192621097898202,
      "grad_norm": 2.765625,
      "learning_rate": 1.8161898902103114e-05,
      "loss": 0.7992,
      "step": 576150
    },
    {
      "epoch": 2.0192971572967156,
      "grad_norm": 2.671875,
      "learning_rate": 1.8161249873439412e-05,
      "loss": 0.792,
      "step": 576160
    },
    {
      "epoch": 2.0193322048036113,
      "grad_norm": 3.09375,
      "learning_rate": 1.816060084477571e-05,
      "loss": 0.8379,
      "step": 576170
    },
    {
      "epoch": 2.019367252310507,
      "grad_norm": 3.09375,
      "learning_rate": 1.8159951816112008e-05,
      "loss": 0.7687,
      "step": 576180
    },
    {
      "epoch": 2.0194022998174024,
      "grad_norm": 3.078125,
      "learning_rate": 1.8159302787448306e-05,
      "loss": 0.7637,
      "step": 576190
    },
    {
      "epoch": 2.019437347324298,
      "grad_norm": 2.625,
      "learning_rate": 1.8158653758784604e-05,
      "loss": 0.7561,
      "step": 576200
    },
    {
      "epoch": 2.019472394831194,
      "grad_norm": 3.203125,
      "learning_rate": 1.8158004730120902e-05,
      "loss": 0.8598,
      "step": 576210
    },
    {
      "epoch": 2.019507442338089,
      "grad_norm": 2.65625,
      "learning_rate": 1.81573557014572e-05,
      "loss": 0.7177,
      "step": 576220
    },
    {
      "epoch": 2.019542489844985,
      "grad_norm": 2.546875,
      "learning_rate": 1.8156706672793498e-05,
      "loss": 0.8271,
      "step": 576230
    },
    {
      "epoch": 2.0195775373518803,
      "grad_norm": 3.203125,
      "learning_rate": 1.8156057644129796e-05,
      "loss": 0.8574,
      "step": 576240
    },
    {
      "epoch": 2.019612584858776,
      "grad_norm": 3.4375,
      "learning_rate": 1.8155408615466094e-05,
      "loss": 0.795,
      "step": 576250
    },
    {
      "epoch": 2.019647632365672,
      "grad_norm": 2.359375,
      "learning_rate": 1.8154759586802392e-05,
      "loss": 0.8473,
      "step": 576260
    },
    {
      "epoch": 2.019682679872567,
      "grad_norm": 3.375,
      "learning_rate": 1.815411055813869e-05,
      "loss": 0.8646,
      "step": 576270
    },
    {
      "epoch": 2.019717727379463,
      "grad_norm": 2.78125,
      "learning_rate": 1.8153461529474988e-05,
      "loss": 0.745,
      "step": 576280
    },
    {
      "epoch": 2.0197527748863586,
      "grad_norm": 2.71875,
      "learning_rate": 1.8152812500811286e-05,
      "loss": 0.8412,
      "step": 576290
    },
    {
      "epoch": 2.019787822393254,
      "grad_norm": 3.0625,
      "learning_rate": 1.8152163472147584e-05,
      "loss": 0.7796,
      "step": 576300
    },
    {
      "epoch": 2.0198228699001497,
      "grad_norm": 2.65625,
      "learning_rate": 1.8151514443483882e-05,
      "loss": 0.7933,
      "step": 576310
    },
    {
      "epoch": 2.0198579174070455,
      "grad_norm": 3.265625,
      "learning_rate": 1.815086541482018e-05,
      "loss": 0.9306,
      "step": 576320
    },
    {
      "epoch": 2.0198929649139408,
      "grad_norm": 3.109375,
      "learning_rate": 1.815021638615648e-05,
      "loss": 0.7692,
      "step": 576330
    },
    {
      "epoch": 2.0199280124208365,
      "grad_norm": 2.796875,
      "learning_rate": 1.814956735749278e-05,
      "loss": 0.7547,
      "step": 576340
    },
    {
      "epoch": 2.019963059927732,
      "grad_norm": 2.75,
      "learning_rate": 1.8148918328829077e-05,
      "loss": 0.7948,
      "step": 576350
    },
    {
      "epoch": 2.0199981074346276,
      "grad_norm": 3.21875,
      "learning_rate": 1.8148269300165375e-05,
      "loss": 0.8157,
      "step": 576360
    },
    {
      "epoch": 2.0200331549415234,
      "grad_norm": 2.625,
      "learning_rate": 1.8147620271501673e-05,
      "loss": 0.7742,
      "step": 576370
    },
    {
      "epoch": 2.0200682024484187,
      "grad_norm": 2.734375,
      "learning_rate": 1.814697124283797e-05,
      "loss": 0.7712,
      "step": 576380
    },
    {
      "epoch": 2.0201032499553144,
      "grad_norm": 3.078125,
      "learning_rate": 1.814632221417427e-05,
      "loss": 0.8148,
      "step": 576390
    },
    {
      "epoch": 2.02013829746221,
      "grad_norm": 3.1875,
      "learning_rate": 1.8145673185510564e-05,
      "loss": 0.8275,
      "step": 576400
    },
    {
      "epoch": 2.0201733449691055,
      "grad_norm": 2.59375,
      "learning_rate": 1.8145024156846862e-05,
      "loss": 0.8712,
      "step": 576410
    },
    {
      "epoch": 2.0202083924760013,
      "grad_norm": 3.75,
      "learning_rate": 1.814437512818316e-05,
      "loss": 0.8326,
      "step": 576420
    },
    {
      "epoch": 2.020243439982897,
      "grad_norm": 2.921875,
      "learning_rate": 1.8143726099519458e-05,
      "loss": 0.7988,
      "step": 576430
    },
    {
      "epoch": 2.0202784874897923,
      "grad_norm": 3.09375,
      "learning_rate": 1.8143077070855756e-05,
      "loss": 0.8548,
      "step": 576440
    },
    {
      "epoch": 2.020313534996688,
      "grad_norm": 2.640625,
      "learning_rate": 1.8142428042192057e-05,
      "loss": 0.739,
      "step": 576450
    },
    {
      "epoch": 2.0203485825035834,
      "grad_norm": 3.0625,
      "learning_rate": 1.8141779013528355e-05,
      "loss": 0.7902,
      "step": 576460
    },
    {
      "epoch": 2.020383630010479,
      "grad_norm": 2.6875,
      "learning_rate": 1.8141129984864653e-05,
      "loss": 0.7912,
      "step": 576470
    },
    {
      "epoch": 2.020418677517375,
      "grad_norm": 3.515625,
      "learning_rate": 1.814048095620095e-05,
      "loss": 0.8277,
      "step": 576480
    },
    {
      "epoch": 2.0204537250242702,
      "grad_norm": 3.15625,
      "learning_rate": 1.813983192753725e-05,
      "loss": 0.8115,
      "step": 576490
    },
    {
      "epoch": 2.020488772531166,
      "grad_norm": 3.203125,
      "learning_rate": 1.8139182898873547e-05,
      "loss": 0.8792,
      "step": 576500
    },
    {
      "epoch": 2.0205238200380617,
      "grad_norm": 2.546875,
      "learning_rate": 1.8138533870209845e-05,
      "loss": 0.8258,
      "step": 576510
    },
    {
      "epoch": 2.020558867544957,
      "grad_norm": 3.046875,
      "learning_rate": 1.8137884841546143e-05,
      "loss": 0.8585,
      "step": 576520
    },
    {
      "epoch": 2.020593915051853,
      "grad_norm": 2.546875,
      "learning_rate": 1.813723581288244e-05,
      "loss": 0.7505,
      "step": 576530
    },
    {
      "epoch": 2.0206289625587486,
      "grad_norm": 2.796875,
      "learning_rate": 1.813658678421874e-05,
      "loss": 0.8909,
      "step": 576540
    },
    {
      "epoch": 2.020664010065644,
      "grad_norm": 2.9375,
      "learning_rate": 1.8135937755555037e-05,
      "loss": 0.7901,
      "step": 576550
    },
    {
      "epoch": 2.0206990575725396,
      "grad_norm": 3.25,
      "learning_rate": 1.8135288726891335e-05,
      "loss": 0.8804,
      "step": 576560
    },
    {
      "epoch": 2.020734105079435,
      "grad_norm": 2.921875,
      "learning_rate": 1.8134639698227633e-05,
      "loss": 0.7978,
      "step": 576570
    },
    {
      "epoch": 2.0207691525863307,
      "grad_norm": 3.203125,
      "learning_rate": 1.813399066956393e-05,
      "loss": 0.8706,
      "step": 576580
    },
    {
      "epoch": 2.0208042000932265,
      "grad_norm": 2.84375,
      "learning_rate": 1.813334164090023e-05,
      "loss": 0.8741,
      "step": 576590
    },
    {
      "epoch": 2.020839247600122,
      "grad_norm": 3.078125,
      "learning_rate": 1.8132692612236527e-05,
      "loss": 0.7889,
      "step": 576600
    },
    {
      "epoch": 2.0208742951070175,
      "grad_norm": 3.21875,
      "learning_rate": 1.8132043583572825e-05,
      "loss": 0.7496,
      "step": 576610
    },
    {
      "epoch": 2.0209093426139133,
      "grad_norm": 2.578125,
      "learning_rate": 1.8131394554909123e-05,
      "loss": 0.8094,
      "step": 576620
    },
    {
      "epoch": 2.0209443901208086,
      "grad_norm": 3.203125,
      "learning_rate": 1.813074552624542e-05,
      "loss": 0.8134,
      "step": 576630
    },
    {
      "epoch": 2.0209794376277044,
      "grad_norm": 2.4375,
      "learning_rate": 1.813009649758172e-05,
      "loss": 0.8162,
      "step": 576640
    },
    {
      "epoch": 2.0210144851346,
      "grad_norm": 2.90625,
      "learning_rate": 1.8129447468918017e-05,
      "loss": 0.9058,
      "step": 576650
    },
    {
      "epoch": 2.0210495326414954,
      "grad_norm": 2.5,
      "learning_rate": 1.8128798440254315e-05,
      "loss": 0.7532,
      "step": 576660
    },
    {
      "epoch": 2.021084580148391,
      "grad_norm": 2.8125,
      "learning_rate": 1.8128149411590613e-05,
      "loss": 0.8014,
      "step": 576670
    },
    {
      "epoch": 2.0211196276552865,
      "grad_norm": 2.625,
      "learning_rate": 1.812750038292691e-05,
      "loss": 0.7347,
      "step": 576680
    },
    {
      "epoch": 2.0211546751621823,
      "grad_norm": 2.359375,
      "learning_rate": 1.812685135426321e-05,
      "loss": 0.7556,
      "step": 576690
    },
    {
      "epoch": 2.021189722669078,
      "grad_norm": 3.171875,
      "learning_rate": 1.812620232559951e-05,
      "loss": 0.8252,
      "step": 576700
    },
    {
      "epoch": 2.0212247701759734,
      "grad_norm": 2.921875,
      "learning_rate": 1.812555329693581e-05,
      "loss": 0.8125,
      "step": 576710
    },
    {
      "epoch": 2.021259817682869,
      "grad_norm": 2.96875,
      "learning_rate": 1.8124904268272106e-05,
      "loss": 0.7823,
      "step": 576720
    },
    {
      "epoch": 2.021294865189765,
      "grad_norm": 2.734375,
      "learning_rate": 1.8124255239608404e-05,
      "loss": 0.7373,
      "step": 576730
    },
    {
      "epoch": 2.02132991269666,
      "grad_norm": 2.90625,
      "learning_rate": 1.8123606210944702e-05,
      "loss": 0.8421,
      "step": 576740
    },
    {
      "epoch": 2.021364960203556,
      "grad_norm": 2.9375,
      "learning_rate": 1.8122957182281e-05,
      "loss": 0.8774,
      "step": 576750
    },
    {
      "epoch": 2.0214000077104517,
      "grad_norm": 3.03125,
      "learning_rate": 1.81223081536173e-05,
      "loss": 0.8373,
      "step": 576760
    },
    {
      "epoch": 2.021435055217347,
      "grad_norm": 2.859375,
      "learning_rate": 1.8121659124953596e-05,
      "loss": 0.7828,
      "step": 576770
    },
    {
      "epoch": 2.0214701027242428,
      "grad_norm": 3.15625,
      "learning_rate": 1.812101009628989e-05,
      "loss": 0.8614,
      "step": 576780
    },
    {
      "epoch": 2.0215051502311385,
      "grad_norm": 2.8125,
      "learning_rate": 1.812036106762619e-05,
      "loss": 0.8141,
      "step": 576790
    },
    {
      "epoch": 2.021540197738034,
      "grad_norm": 2.75,
      "learning_rate": 1.8119712038962487e-05,
      "loss": 0.7843,
      "step": 576800
    },
    {
      "epoch": 2.0215752452449296,
      "grad_norm": 2.671875,
      "learning_rate": 1.811906301029879e-05,
      "loss": 0.7906,
      "step": 576810
    },
    {
      "epoch": 2.021610292751825,
      "grad_norm": 2.75,
      "learning_rate": 1.8118413981635086e-05,
      "loss": 0.7338,
      "step": 576820
    },
    {
      "epoch": 2.0216453402587207,
      "grad_norm": 2.9375,
      "learning_rate": 1.8117764952971384e-05,
      "loss": 0.772,
      "step": 576830
    },
    {
      "epoch": 2.0216803877656164,
      "grad_norm": 2.765625,
      "learning_rate": 1.8117115924307682e-05,
      "loss": 0.8046,
      "step": 576840
    },
    {
      "epoch": 2.0217154352725117,
      "grad_norm": 2.4375,
      "learning_rate": 1.811646689564398e-05,
      "loss": 0.7693,
      "step": 576850
    },
    {
      "epoch": 2.0217504827794075,
      "grad_norm": 2.921875,
      "learning_rate": 1.811581786698028e-05,
      "loss": 0.8373,
      "step": 576860
    },
    {
      "epoch": 2.0217855302863033,
      "grad_norm": 3.046875,
      "learning_rate": 1.8115168838316576e-05,
      "loss": 0.8467,
      "step": 576870
    },
    {
      "epoch": 2.0218205777931986,
      "grad_norm": 3.296875,
      "learning_rate": 1.8114519809652874e-05,
      "loss": 0.8049,
      "step": 576880
    },
    {
      "epoch": 2.0218556253000943,
      "grad_norm": 3.453125,
      "learning_rate": 1.8113870780989172e-05,
      "loss": 0.7687,
      "step": 576890
    },
    {
      "epoch": 2.02189067280699,
      "grad_norm": 2.90625,
      "learning_rate": 1.811322175232547e-05,
      "loss": 0.7884,
      "step": 576900
    },
    {
      "epoch": 2.0219257203138854,
      "grad_norm": 3.125,
      "learning_rate": 1.811257272366177e-05,
      "loss": 0.7711,
      "step": 576910
    },
    {
      "epoch": 2.021960767820781,
      "grad_norm": 2.71875,
      "learning_rate": 1.8111923694998066e-05,
      "loss": 0.742,
      "step": 576920
    },
    {
      "epoch": 2.0219958153276765,
      "grad_norm": 2.921875,
      "learning_rate": 1.8111274666334364e-05,
      "loss": 0.8616,
      "step": 576930
    },
    {
      "epoch": 2.0220308628345722,
      "grad_norm": 3.078125,
      "learning_rate": 1.8110625637670662e-05,
      "loss": 0.879,
      "step": 576940
    },
    {
      "epoch": 2.022065910341468,
      "grad_norm": 2.84375,
      "learning_rate": 1.8109976609006964e-05,
      "loss": 0.8014,
      "step": 576950
    },
    {
      "epoch": 2.0221009578483633,
      "grad_norm": 3.03125,
      "learning_rate": 1.8109327580343262e-05,
      "loss": 0.7852,
      "step": 576960
    },
    {
      "epoch": 2.022136005355259,
      "grad_norm": 2.78125,
      "learning_rate": 1.8108678551679556e-05,
      "loss": 0.86,
      "step": 576970
    },
    {
      "epoch": 2.022171052862155,
      "grad_norm": 2.578125,
      "learning_rate": 1.8108029523015854e-05,
      "loss": 0.9046,
      "step": 576980
    },
    {
      "epoch": 2.02220610036905,
      "grad_norm": 2.515625,
      "learning_rate": 1.8107380494352152e-05,
      "loss": 0.7438,
      "step": 576990
    },
    {
      "epoch": 2.022241147875946,
      "grad_norm": 3.3125,
      "learning_rate": 1.810673146568845e-05,
      "loss": 0.8845,
      "step": 577000
    },
    {
      "epoch": 2.0222761953828416,
      "grad_norm": 2.8125,
      "learning_rate": 1.810608243702475e-05,
      "loss": 0.8184,
      "step": 577010
    },
    {
      "epoch": 2.022311242889737,
      "grad_norm": 3.078125,
      "learning_rate": 1.8105433408361046e-05,
      "loss": 0.8503,
      "step": 577020
    },
    {
      "epoch": 2.0223462903966327,
      "grad_norm": 2.8125,
      "learning_rate": 1.8104784379697344e-05,
      "loss": 0.9013,
      "step": 577030
    },
    {
      "epoch": 2.022381337903528,
      "grad_norm": 2.640625,
      "learning_rate": 1.8104135351033642e-05,
      "loss": 0.7272,
      "step": 577040
    },
    {
      "epoch": 2.022416385410424,
      "grad_norm": 3.125,
      "learning_rate": 1.810348632236994e-05,
      "loss": 0.6917,
      "step": 577050
    },
    {
      "epoch": 2.0224514329173195,
      "grad_norm": 2.6875,
      "learning_rate": 1.810283729370624e-05,
      "loss": 0.7872,
      "step": 577060
    },
    {
      "epoch": 2.022486480424215,
      "grad_norm": 3.015625,
      "learning_rate": 1.810218826504254e-05,
      "loss": 0.8689,
      "step": 577070
    },
    {
      "epoch": 2.0225215279311106,
      "grad_norm": 2.734375,
      "learning_rate": 1.8101539236378838e-05,
      "loss": 0.7852,
      "step": 577080
    },
    {
      "epoch": 2.0225565754380064,
      "grad_norm": 2.9375,
      "learning_rate": 1.8100890207715136e-05,
      "loss": 0.8122,
      "step": 577090
    },
    {
      "epoch": 2.0225916229449017,
      "grad_norm": 2.75,
      "learning_rate": 1.8100241179051434e-05,
      "loss": 0.8007,
      "step": 577100
    },
    {
      "epoch": 2.0226266704517974,
      "grad_norm": 3.046875,
      "learning_rate": 1.8099592150387732e-05,
      "loss": 0.7688,
      "step": 577110
    },
    {
      "epoch": 2.022661717958693,
      "grad_norm": 2.859375,
      "learning_rate": 1.809894312172403e-05,
      "loss": 0.9105,
      "step": 577120
    },
    {
      "epoch": 2.0226967654655885,
      "grad_norm": 2.953125,
      "learning_rate": 1.8098294093060328e-05,
      "loss": 0.8064,
      "step": 577130
    },
    {
      "epoch": 2.0227318129724843,
      "grad_norm": 3.0625,
      "learning_rate": 1.8097645064396626e-05,
      "loss": 0.9418,
      "step": 577140
    },
    {
      "epoch": 2.0227668604793796,
      "grad_norm": 3.140625,
      "learning_rate": 1.8096996035732924e-05,
      "loss": 0.8079,
      "step": 577150
    },
    {
      "epoch": 2.0228019079862753,
      "grad_norm": 3.015625,
      "learning_rate": 1.809634700706922e-05,
      "loss": 0.8387,
      "step": 577160
    },
    {
      "epoch": 2.022836955493171,
      "grad_norm": 2.734375,
      "learning_rate": 1.8095697978405516e-05,
      "loss": 0.89,
      "step": 577170
    },
    {
      "epoch": 2.0228720030000664,
      "grad_norm": 3.046875,
      "learning_rate": 1.8095048949741818e-05,
      "loss": 0.7931,
      "step": 577180
    },
    {
      "epoch": 2.022907050506962,
      "grad_norm": 2.953125,
      "learning_rate": 1.8094399921078116e-05,
      "loss": 0.8361,
      "step": 577190
    },
    {
      "epoch": 2.022942098013858,
      "grad_norm": 3.0,
      "learning_rate": 1.8093750892414414e-05,
      "loss": 0.7927,
      "step": 577200
    },
    {
      "epoch": 2.0229771455207533,
      "grad_norm": 2.84375,
      "learning_rate": 1.8093101863750712e-05,
      "loss": 0.9034,
      "step": 577210
    },
    {
      "epoch": 2.023012193027649,
      "grad_norm": 2.796875,
      "learning_rate": 1.809245283508701e-05,
      "loss": 0.6704,
      "step": 577220
    },
    {
      "epoch": 2.0230472405345448,
      "grad_norm": 3.21875,
      "learning_rate": 1.8091803806423308e-05,
      "loss": 0.8594,
      "step": 577230
    },
    {
      "epoch": 2.02308228804144,
      "grad_norm": 2.9375,
      "learning_rate": 1.8091154777759606e-05,
      "loss": 0.8287,
      "step": 577240
    },
    {
      "epoch": 2.023117335548336,
      "grad_norm": 2.890625,
      "learning_rate": 1.8090505749095904e-05,
      "loss": 0.8245,
      "step": 577250
    },
    {
      "epoch": 2.023152383055231,
      "grad_norm": 2.90625,
      "learning_rate": 1.8089856720432202e-05,
      "loss": 0.7289,
      "step": 577260
    },
    {
      "epoch": 2.023187430562127,
      "grad_norm": 3.109375,
      "learning_rate": 1.80892076917685e-05,
      "loss": 0.913,
      "step": 577270
    },
    {
      "epoch": 2.0232224780690227,
      "grad_norm": 2.734375,
      "learning_rate": 1.8088558663104798e-05,
      "loss": 0.855,
      "step": 577280
    },
    {
      "epoch": 2.023257525575918,
      "grad_norm": 2.765625,
      "learning_rate": 1.8087909634441096e-05,
      "loss": 0.8171,
      "step": 577290
    },
    {
      "epoch": 2.0232925730828137,
      "grad_norm": 3.1875,
      "learning_rate": 1.8087260605777394e-05,
      "loss": 0.7637,
      "step": 577300
    },
    {
      "epoch": 2.0233276205897095,
      "grad_norm": 2.421875,
      "learning_rate": 1.8086611577113692e-05,
      "loss": 0.8284,
      "step": 577310
    },
    {
      "epoch": 2.023362668096605,
      "grad_norm": 2.734375,
      "learning_rate": 1.8085962548449993e-05,
      "loss": 0.7892,
      "step": 577320
    },
    {
      "epoch": 2.0233977156035006,
      "grad_norm": 3.03125,
      "learning_rate": 1.808531351978629e-05,
      "loss": 0.8454,
      "step": 577330
    },
    {
      "epoch": 2.0234327631103963,
      "grad_norm": 3.09375,
      "learning_rate": 1.8084664491122586e-05,
      "loss": 0.8567,
      "step": 577340
    },
    {
      "epoch": 2.0234678106172916,
      "grad_norm": 2.703125,
      "learning_rate": 1.8084015462458884e-05,
      "loss": 0.8463,
      "step": 577350
    },
    {
      "epoch": 2.0235028581241874,
      "grad_norm": 3.15625,
      "learning_rate": 1.8083366433795182e-05,
      "loss": 0.7774,
      "step": 577360
    },
    {
      "epoch": 2.023537905631083,
      "grad_norm": 2.671875,
      "learning_rate": 1.808271740513148e-05,
      "loss": 0.8281,
      "step": 577370
    },
    {
      "epoch": 2.0235729531379785,
      "grad_norm": 3.0625,
      "learning_rate": 1.8082068376467778e-05,
      "loss": 0.7534,
      "step": 577380
    },
    {
      "epoch": 2.0236080006448742,
      "grad_norm": 2.828125,
      "learning_rate": 1.8081419347804076e-05,
      "loss": 0.8315,
      "step": 577390
    },
    {
      "epoch": 2.0236430481517695,
      "grad_norm": 2.96875,
      "learning_rate": 1.8080770319140374e-05,
      "loss": 0.7816,
      "step": 577400
    },
    {
      "epoch": 2.0236780956586653,
      "grad_norm": 3.0,
      "learning_rate": 1.8080121290476672e-05,
      "loss": 0.8034,
      "step": 577410
    },
    {
      "epoch": 2.023713143165561,
      "grad_norm": 2.6875,
      "learning_rate": 1.807947226181297e-05,
      "loss": 0.7563,
      "step": 577420
    },
    {
      "epoch": 2.0237481906724564,
      "grad_norm": 2.75,
      "learning_rate": 1.807882323314927e-05,
      "loss": 0.7614,
      "step": 577430
    },
    {
      "epoch": 2.023783238179352,
      "grad_norm": 3.609375,
      "learning_rate": 1.807817420448557e-05,
      "loss": 0.9393,
      "step": 577440
    },
    {
      "epoch": 2.023818285686248,
      "grad_norm": 3.03125,
      "learning_rate": 1.8077525175821867e-05,
      "loss": 0.8059,
      "step": 577450
    },
    {
      "epoch": 2.023853333193143,
      "grad_norm": 3.03125,
      "learning_rate": 1.8076876147158165e-05,
      "loss": 0.773,
      "step": 577460
    },
    {
      "epoch": 2.023888380700039,
      "grad_norm": 3.046875,
      "learning_rate": 1.8076227118494463e-05,
      "loss": 0.7152,
      "step": 577470
    },
    {
      "epoch": 2.0239234282069347,
      "grad_norm": 2.859375,
      "learning_rate": 1.807557808983076e-05,
      "loss": 0.8702,
      "step": 577480
    },
    {
      "epoch": 2.02395847571383,
      "grad_norm": 2.703125,
      "learning_rate": 1.807492906116706e-05,
      "loss": 0.7914,
      "step": 577490
    },
    {
      "epoch": 2.023993523220726,
      "grad_norm": 3.296875,
      "learning_rate": 1.8074280032503357e-05,
      "loss": 0.9421,
      "step": 577500
    },
    {
      "epoch": 2.024028570727621,
      "grad_norm": 2.921875,
      "learning_rate": 1.8073631003839655e-05,
      "loss": 0.8485,
      "step": 577510
    },
    {
      "epoch": 2.024063618234517,
      "grad_norm": 2.515625,
      "learning_rate": 1.8072981975175953e-05,
      "loss": 0.7559,
      "step": 577520
    },
    {
      "epoch": 2.0240986657414126,
      "grad_norm": 3.203125,
      "learning_rate": 1.8072332946512248e-05,
      "loss": 0.8168,
      "step": 577530
    },
    {
      "epoch": 2.024133713248308,
      "grad_norm": 3.046875,
      "learning_rate": 1.8071683917848546e-05,
      "loss": 0.8028,
      "step": 577540
    },
    {
      "epoch": 2.0241687607552037,
      "grad_norm": 2.71875,
      "learning_rate": 1.8071034889184847e-05,
      "loss": 0.8517,
      "step": 577550
    },
    {
      "epoch": 2.0242038082620994,
      "grad_norm": 2.9375,
      "learning_rate": 1.8070385860521145e-05,
      "loss": 0.8026,
      "step": 577560
    },
    {
      "epoch": 2.0242388557689948,
      "grad_norm": 3.15625,
      "learning_rate": 1.8069736831857443e-05,
      "loss": 0.8847,
      "step": 577570
    },
    {
      "epoch": 2.0242739032758905,
      "grad_norm": 2.984375,
      "learning_rate": 1.806908780319374e-05,
      "loss": 0.7903,
      "step": 577580
    },
    {
      "epoch": 2.0243089507827863,
      "grad_norm": 2.96875,
      "learning_rate": 1.806843877453004e-05,
      "loss": 0.784,
      "step": 577590
    },
    {
      "epoch": 2.0243439982896816,
      "grad_norm": 2.703125,
      "learning_rate": 1.8067789745866337e-05,
      "loss": 0.804,
      "step": 577600
    },
    {
      "epoch": 2.0243790457965773,
      "grad_norm": 2.578125,
      "learning_rate": 1.8067140717202635e-05,
      "loss": 0.7581,
      "step": 577610
    },
    {
      "epoch": 2.0244140933034727,
      "grad_norm": 2.875,
      "learning_rate": 1.8066491688538933e-05,
      "loss": 0.8642,
      "step": 577620
    },
    {
      "epoch": 2.0244491408103684,
      "grad_norm": 2.828125,
      "learning_rate": 1.806584265987523e-05,
      "loss": 0.8571,
      "step": 577630
    },
    {
      "epoch": 2.024484188317264,
      "grad_norm": 3.03125,
      "learning_rate": 1.806519363121153e-05,
      "loss": 0.848,
      "step": 577640
    },
    {
      "epoch": 2.0245192358241595,
      "grad_norm": 2.734375,
      "learning_rate": 1.8064544602547827e-05,
      "loss": 0.8185,
      "step": 577650
    },
    {
      "epoch": 2.0245542833310552,
      "grad_norm": 2.703125,
      "learning_rate": 1.8063895573884125e-05,
      "loss": 0.7596,
      "step": 577660
    },
    {
      "epoch": 2.024589330837951,
      "grad_norm": 3.0,
      "learning_rate": 1.8063246545220423e-05,
      "loss": 0.7644,
      "step": 577670
    },
    {
      "epoch": 2.0246243783448463,
      "grad_norm": 2.78125,
      "learning_rate": 1.806259751655672e-05,
      "loss": 0.8353,
      "step": 577680
    },
    {
      "epoch": 2.024659425851742,
      "grad_norm": 2.953125,
      "learning_rate": 1.8061948487893023e-05,
      "loss": 0.8535,
      "step": 577690
    },
    {
      "epoch": 2.024694473358638,
      "grad_norm": 2.625,
      "learning_rate": 1.806129945922932e-05,
      "loss": 0.82,
      "step": 577700
    },
    {
      "epoch": 2.024729520865533,
      "grad_norm": 3.078125,
      "learning_rate": 1.806065043056562e-05,
      "loss": 0.9004,
      "step": 577710
    },
    {
      "epoch": 2.024764568372429,
      "grad_norm": 2.859375,
      "learning_rate": 1.8060001401901913e-05,
      "loss": 0.8176,
      "step": 577720
    },
    {
      "epoch": 2.024799615879324,
      "grad_norm": 2.625,
      "learning_rate": 1.805935237323821e-05,
      "loss": 0.713,
      "step": 577730
    },
    {
      "epoch": 2.02483466338622,
      "grad_norm": 2.828125,
      "learning_rate": 1.805870334457451e-05,
      "loss": 0.7992,
      "step": 577740
    },
    {
      "epoch": 2.0248697108931157,
      "grad_norm": 3.015625,
      "learning_rate": 1.8058054315910807e-05,
      "loss": 0.8296,
      "step": 577750
    },
    {
      "epoch": 2.024904758400011,
      "grad_norm": 2.5625,
      "learning_rate": 1.8057405287247105e-05,
      "loss": 0.752,
      "step": 577760
    },
    {
      "epoch": 2.024939805906907,
      "grad_norm": 2.390625,
      "learning_rate": 1.8056756258583403e-05,
      "loss": 0.835,
      "step": 577770
    },
    {
      "epoch": 2.0249748534138026,
      "grad_norm": 2.859375,
      "learning_rate": 1.80561072299197e-05,
      "loss": 0.857,
      "step": 577780
    },
    {
      "epoch": 2.025009900920698,
      "grad_norm": 2.484375,
      "learning_rate": 1.8055458201256e-05,
      "loss": 0.8486,
      "step": 577790
    },
    {
      "epoch": 2.0250449484275936,
      "grad_norm": 2.8125,
      "learning_rate": 1.80548091725923e-05,
      "loss": 0.7722,
      "step": 577800
    },
    {
      "epoch": 2.0250799959344894,
      "grad_norm": 3.296875,
      "learning_rate": 1.80541601439286e-05,
      "loss": 0.8814,
      "step": 577810
    },
    {
      "epoch": 2.0251150434413847,
      "grad_norm": 2.828125,
      "learning_rate": 1.8053511115264897e-05,
      "loss": 0.8408,
      "step": 577820
    },
    {
      "epoch": 2.0251500909482805,
      "grad_norm": 2.609375,
      "learning_rate": 1.8052862086601195e-05,
      "loss": 0.8261,
      "step": 577830
    },
    {
      "epoch": 2.025185138455176,
      "grad_norm": 2.765625,
      "learning_rate": 1.8052213057937493e-05,
      "loss": 0.8247,
      "step": 577840
    },
    {
      "epoch": 2.0252201859620715,
      "grad_norm": 2.703125,
      "learning_rate": 1.805156402927379e-05,
      "loss": 0.8637,
      "step": 577850
    },
    {
      "epoch": 2.0252552334689673,
      "grad_norm": 3.015625,
      "learning_rate": 1.805091500061009e-05,
      "loss": 0.8625,
      "step": 577860
    },
    {
      "epoch": 2.0252902809758626,
      "grad_norm": 2.46875,
      "learning_rate": 1.8050265971946387e-05,
      "loss": 0.7397,
      "step": 577870
    },
    {
      "epoch": 2.0253253284827584,
      "grad_norm": 3.265625,
      "learning_rate": 1.8049616943282685e-05,
      "loss": 0.8775,
      "step": 577880
    },
    {
      "epoch": 2.025360375989654,
      "grad_norm": 2.84375,
      "learning_rate": 1.8048967914618983e-05,
      "loss": 0.8366,
      "step": 577890
    },
    {
      "epoch": 2.0253954234965494,
      "grad_norm": 3.171875,
      "learning_rate": 1.804831888595528e-05,
      "loss": 0.8484,
      "step": 577900
    },
    {
      "epoch": 2.025430471003445,
      "grad_norm": 2.984375,
      "learning_rate": 1.804766985729158e-05,
      "loss": 0.8669,
      "step": 577910
    },
    {
      "epoch": 2.025465518510341,
      "grad_norm": 2.625,
      "learning_rate": 1.8047020828627877e-05,
      "loss": 0.785,
      "step": 577920
    },
    {
      "epoch": 2.0255005660172363,
      "grad_norm": 2.765625,
      "learning_rate": 1.8046371799964175e-05,
      "loss": 0.7729,
      "step": 577930
    },
    {
      "epoch": 2.025535613524132,
      "grad_norm": 3.234375,
      "learning_rate": 1.8045722771300473e-05,
      "loss": 0.8729,
      "step": 577940
    },
    {
      "epoch": 2.0255706610310273,
      "grad_norm": 2.984375,
      "learning_rate": 1.804507374263677e-05,
      "loss": 0.7591,
      "step": 577950
    },
    {
      "epoch": 2.025605708537923,
      "grad_norm": 2.8125,
      "learning_rate": 1.804442471397307e-05,
      "loss": 0.804,
      "step": 577960
    },
    {
      "epoch": 2.025640756044819,
      "grad_norm": 2.671875,
      "learning_rate": 1.8043775685309367e-05,
      "loss": 0.7715,
      "step": 577970
    },
    {
      "epoch": 2.025675803551714,
      "grad_norm": 3.1875,
      "learning_rate": 1.8043126656645665e-05,
      "loss": 0.7535,
      "step": 577980
    },
    {
      "epoch": 2.02571085105861,
      "grad_norm": 2.921875,
      "learning_rate": 1.8042477627981963e-05,
      "loss": 0.903,
      "step": 577990
    },
    {
      "epoch": 2.0257458985655057,
      "grad_norm": 2.765625,
      "learning_rate": 1.804182859931826e-05,
      "loss": 0.8619,
      "step": 578000
    },
    {
      "epoch": 2.025780946072401,
      "grad_norm": 2.640625,
      "learning_rate": 1.804117957065456e-05,
      "loss": 0.8504,
      "step": 578010
    },
    {
      "epoch": 2.0258159935792968,
      "grad_norm": 2.546875,
      "learning_rate": 1.8040530541990857e-05,
      "loss": 0.7638,
      "step": 578020
    },
    {
      "epoch": 2.0258510410861925,
      "grad_norm": 2.734375,
      "learning_rate": 1.8039881513327155e-05,
      "loss": 0.8536,
      "step": 578030
    },
    {
      "epoch": 2.025886088593088,
      "grad_norm": 3.078125,
      "learning_rate": 1.8039232484663453e-05,
      "loss": 0.8676,
      "step": 578040
    },
    {
      "epoch": 2.0259211360999836,
      "grad_norm": 2.875,
      "learning_rate": 1.8038583455999754e-05,
      "loss": 0.8159,
      "step": 578050
    },
    {
      "epoch": 2.025956183606879,
      "grad_norm": 3.0,
      "learning_rate": 1.8037934427336052e-05,
      "loss": 0.7901,
      "step": 578060
    },
    {
      "epoch": 2.0259912311137747,
      "grad_norm": 3.03125,
      "learning_rate": 1.803728539867235e-05,
      "loss": 0.8647,
      "step": 578070
    },
    {
      "epoch": 2.0260262786206704,
      "grad_norm": 2.65625,
      "learning_rate": 1.8036636370008648e-05,
      "loss": 0.7561,
      "step": 578080
    },
    {
      "epoch": 2.0260613261275657,
      "grad_norm": 2.890625,
      "learning_rate": 1.8035987341344946e-05,
      "loss": 0.8641,
      "step": 578090
    },
    {
      "epoch": 2.0260963736344615,
      "grad_norm": 2.859375,
      "learning_rate": 1.803533831268124e-05,
      "loss": 0.7815,
      "step": 578100
    },
    {
      "epoch": 2.0261314211413572,
      "grad_norm": 2.4375,
      "learning_rate": 1.803468928401754e-05,
      "loss": 0.8028,
      "step": 578110
    },
    {
      "epoch": 2.0261664686482526,
      "grad_norm": 3.03125,
      "learning_rate": 1.8034040255353837e-05,
      "loss": 0.7495,
      "step": 578120
    },
    {
      "epoch": 2.0262015161551483,
      "grad_norm": 3.046875,
      "learning_rate": 1.8033391226690135e-05,
      "loss": 0.8246,
      "step": 578130
    },
    {
      "epoch": 2.026236563662044,
      "grad_norm": 2.75,
      "learning_rate": 1.8032742198026433e-05,
      "loss": 0.8686,
      "step": 578140
    },
    {
      "epoch": 2.0262716111689394,
      "grad_norm": 3.125,
      "learning_rate": 1.803209316936273e-05,
      "loss": 0.809,
      "step": 578150
    },
    {
      "epoch": 2.026306658675835,
      "grad_norm": 3.03125,
      "learning_rate": 1.803144414069903e-05,
      "loss": 0.8648,
      "step": 578160
    },
    {
      "epoch": 2.026341706182731,
      "grad_norm": 2.515625,
      "learning_rate": 1.803079511203533e-05,
      "loss": 0.8479,
      "step": 578170
    },
    {
      "epoch": 2.026376753689626,
      "grad_norm": 3.046875,
      "learning_rate": 1.8030146083371628e-05,
      "loss": 0.8789,
      "step": 578180
    },
    {
      "epoch": 2.026411801196522,
      "grad_norm": 3.265625,
      "learning_rate": 1.8029497054707926e-05,
      "loss": 0.8404,
      "step": 578190
    },
    {
      "epoch": 2.0264468487034173,
      "grad_norm": 3.125,
      "learning_rate": 1.8028848026044224e-05,
      "loss": 0.83,
      "step": 578200
    },
    {
      "epoch": 2.026481896210313,
      "grad_norm": 3.015625,
      "learning_rate": 1.8028198997380522e-05,
      "loss": 0.7326,
      "step": 578210
    },
    {
      "epoch": 2.026516943717209,
      "grad_norm": 3.015625,
      "learning_rate": 1.802754996871682e-05,
      "loss": 0.8659,
      "step": 578220
    },
    {
      "epoch": 2.026551991224104,
      "grad_norm": 3.5,
      "learning_rate": 1.8026900940053118e-05,
      "loss": 0.8958,
      "step": 578230
    },
    {
      "epoch": 2.026587038731,
      "grad_norm": 2.953125,
      "learning_rate": 1.8026251911389416e-05,
      "loss": 0.7863,
      "step": 578240
    },
    {
      "epoch": 2.0266220862378956,
      "grad_norm": 2.96875,
      "learning_rate": 1.8025602882725714e-05,
      "loss": 0.9364,
      "step": 578250
    },
    {
      "epoch": 2.026657133744791,
      "grad_norm": 2.65625,
      "learning_rate": 1.8024953854062012e-05,
      "loss": 0.7888,
      "step": 578260
    },
    {
      "epoch": 2.0266921812516867,
      "grad_norm": 2.828125,
      "learning_rate": 1.802430482539831e-05,
      "loss": 0.7941,
      "step": 578270
    },
    {
      "epoch": 2.0267272287585825,
      "grad_norm": 2.828125,
      "learning_rate": 1.8023655796734608e-05,
      "loss": 0.7948,
      "step": 578280
    },
    {
      "epoch": 2.026762276265478,
      "grad_norm": 2.625,
      "learning_rate": 1.8023006768070906e-05,
      "loss": 0.8113,
      "step": 578290
    },
    {
      "epoch": 2.0267973237723735,
      "grad_norm": 2.9375,
      "learning_rate": 1.8022357739407204e-05,
      "loss": 0.777,
      "step": 578300
    },
    {
      "epoch": 2.026832371279269,
      "grad_norm": 3.375,
      "learning_rate": 1.8021708710743502e-05,
      "loss": 0.73,
      "step": 578310
    },
    {
      "epoch": 2.0268674187861646,
      "grad_norm": 3.171875,
      "learning_rate": 1.80210596820798e-05,
      "loss": 0.8838,
      "step": 578320
    },
    {
      "epoch": 2.0269024662930604,
      "grad_norm": 3.5,
      "learning_rate": 1.8020410653416098e-05,
      "loss": 0.863,
      "step": 578330
    },
    {
      "epoch": 2.0269375137999557,
      "grad_norm": 2.859375,
      "learning_rate": 1.8019761624752396e-05,
      "loss": 0.8693,
      "step": 578340
    },
    {
      "epoch": 2.0269725613068514,
      "grad_norm": 2.921875,
      "learning_rate": 1.8019112596088694e-05,
      "loss": 0.8532,
      "step": 578350
    },
    {
      "epoch": 2.027007608813747,
      "grad_norm": 2.75,
      "learning_rate": 1.8018463567424992e-05,
      "loss": 0.7991,
      "step": 578360
    },
    {
      "epoch": 2.0270426563206425,
      "grad_norm": 3.21875,
      "learning_rate": 1.801781453876129e-05,
      "loss": 0.8164,
      "step": 578370
    },
    {
      "epoch": 2.0270777038275383,
      "grad_norm": 3.21875,
      "learning_rate": 1.8017165510097588e-05,
      "loss": 0.8252,
      "step": 578380
    },
    {
      "epoch": 2.027112751334434,
      "grad_norm": 2.46875,
      "learning_rate": 1.8016516481433886e-05,
      "loss": 0.8145,
      "step": 578390
    },
    {
      "epoch": 2.0271477988413293,
      "grad_norm": 2.703125,
      "learning_rate": 1.8015867452770184e-05,
      "loss": 0.7738,
      "step": 578400
    },
    {
      "epoch": 2.027182846348225,
      "grad_norm": 2.671875,
      "learning_rate": 1.8015218424106482e-05,
      "loss": 0.8056,
      "step": 578410
    },
    {
      "epoch": 2.0272178938551204,
      "grad_norm": 2.921875,
      "learning_rate": 1.8014569395442783e-05,
      "loss": 0.7756,
      "step": 578420
    },
    {
      "epoch": 2.027252941362016,
      "grad_norm": 3.125,
      "learning_rate": 1.801392036677908e-05,
      "loss": 0.8339,
      "step": 578430
    },
    {
      "epoch": 2.027287988868912,
      "grad_norm": 3.046875,
      "learning_rate": 1.801327133811538e-05,
      "loss": 0.8121,
      "step": 578440
    },
    {
      "epoch": 2.0273230363758072,
      "grad_norm": 2.9375,
      "learning_rate": 1.8012622309451677e-05,
      "loss": 0.7663,
      "step": 578450
    },
    {
      "epoch": 2.027358083882703,
      "grad_norm": 2.90625,
      "learning_rate": 1.8011973280787975e-05,
      "loss": 0.7444,
      "step": 578460
    },
    {
      "epoch": 2.0273931313895988,
      "grad_norm": 2.53125,
      "learning_rate": 1.801132425212427e-05,
      "loss": 0.7751,
      "step": 578470
    },
    {
      "epoch": 2.027428178896494,
      "grad_norm": 2.40625,
      "learning_rate": 1.8010675223460568e-05,
      "loss": 0.8334,
      "step": 578480
    },
    {
      "epoch": 2.02746322640339,
      "grad_norm": 2.703125,
      "learning_rate": 1.8010026194796866e-05,
      "loss": 0.7756,
      "step": 578490
    },
    {
      "epoch": 2.0274982739102856,
      "grad_norm": 3.234375,
      "learning_rate": 1.8009377166133164e-05,
      "loss": 0.8096,
      "step": 578500
    },
    {
      "epoch": 2.027533321417181,
      "grad_norm": 2.78125,
      "learning_rate": 1.8008728137469462e-05,
      "loss": 0.8008,
      "step": 578510
    },
    {
      "epoch": 2.0275683689240767,
      "grad_norm": 2.921875,
      "learning_rate": 1.800807910880576e-05,
      "loss": 0.8447,
      "step": 578520
    },
    {
      "epoch": 2.027603416430972,
      "grad_norm": 3.125,
      "learning_rate": 1.800743008014206e-05,
      "loss": 0.9298,
      "step": 578530
    },
    {
      "epoch": 2.0276384639378677,
      "grad_norm": 2.84375,
      "learning_rate": 1.800678105147836e-05,
      "loss": 0.8329,
      "step": 578540
    },
    {
      "epoch": 2.0276735114447635,
      "grad_norm": 3.125,
      "learning_rate": 1.8006132022814657e-05,
      "loss": 0.8714,
      "step": 578550
    },
    {
      "epoch": 2.027708558951659,
      "grad_norm": 3.15625,
      "learning_rate": 1.8005482994150955e-05,
      "loss": 0.8338,
      "step": 578560
    },
    {
      "epoch": 2.0277436064585546,
      "grad_norm": 2.90625,
      "learning_rate": 1.8004833965487253e-05,
      "loss": 0.8703,
      "step": 578570
    },
    {
      "epoch": 2.0277786539654503,
      "grad_norm": 2.578125,
      "learning_rate": 1.800418493682355e-05,
      "loss": 0.823,
      "step": 578580
    },
    {
      "epoch": 2.0278137014723456,
      "grad_norm": 3.0,
      "learning_rate": 1.800353590815985e-05,
      "loss": 0.8407,
      "step": 578590
    },
    {
      "epoch": 2.0278487489792414,
      "grad_norm": 3.046875,
      "learning_rate": 1.8002886879496147e-05,
      "loss": 0.8266,
      "step": 578600
    },
    {
      "epoch": 2.027883796486137,
      "grad_norm": 3.359375,
      "learning_rate": 1.8002237850832445e-05,
      "loss": 0.8537,
      "step": 578610
    },
    {
      "epoch": 2.0279188439930325,
      "grad_norm": 2.75,
      "learning_rate": 1.8001588822168743e-05,
      "loss": 0.8415,
      "step": 578620
    },
    {
      "epoch": 2.027953891499928,
      "grad_norm": 2.90625,
      "learning_rate": 1.800093979350504e-05,
      "loss": 0.7671,
      "step": 578630
    },
    {
      "epoch": 2.0279889390068235,
      "grad_norm": 3.109375,
      "learning_rate": 1.800029076484134e-05,
      "loss": 0.775,
      "step": 578640
    },
    {
      "epoch": 2.0280239865137193,
      "grad_norm": 3.328125,
      "learning_rate": 1.7999641736177637e-05,
      "loss": 0.9164,
      "step": 578650
    },
    {
      "epoch": 2.028059034020615,
      "grad_norm": 2.75,
      "learning_rate": 1.7998992707513935e-05,
      "loss": 0.8372,
      "step": 578660
    },
    {
      "epoch": 2.0280940815275104,
      "grad_norm": 2.53125,
      "learning_rate": 1.7998343678850233e-05,
      "loss": 0.7789,
      "step": 578670
    },
    {
      "epoch": 2.028129129034406,
      "grad_norm": 3.0,
      "learning_rate": 1.799769465018653e-05,
      "loss": 0.8038,
      "step": 578680
    },
    {
      "epoch": 2.028164176541302,
      "grad_norm": 3.015625,
      "learning_rate": 1.799704562152283e-05,
      "loss": 0.8067,
      "step": 578690
    },
    {
      "epoch": 2.028199224048197,
      "grad_norm": 3.078125,
      "learning_rate": 1.7996396592859127e-05,
      "loss": 0.9223,
      "step": 578700
    },
    {
      "epoch": 2.028234271555093,
      "grad_norm": 2.765625,
      "learning_rate": 1.7995747564195425e-05,
      "loss": 0.8318,
      "step": 578710
    },
    {
      "epoch": 2.0282693190619887,
      "grad_norm": 3.0,
      "learning_rate": 1.7995098535531723e-05,
      "loss": 0.8583,
      "step": 578720
    },
    {
      "epoch": 2.028304366568884,
      "grad_norm": 2.84375,
      "learning_rate": 1.799444950686802e-05,
      "loss": 0.8851,
      "step": 578730
    },
    {
      "epoch": 2.02833941407578,
      "grad_norm": 3.21875,
      "learning_rate": 1.799380047820432e-05,
      "loss": 0.9144,
      "step": 578740
    },
    {
      "epoch": 2.0283744615826755,
      "grad_norm": 2.828125,
      "learning_rate": 1.7993151449540617e-05,
      "loss": 0.8089,
      "step": 578750
    },
    {
      "epoch": 2.028409509089571,
      "grad_norm": 3.5625,
      "learning_rate": 1.7992502420876915e-05,
      "loss": 0.7665,
      "step": 578760
    },
    {
      "epoch": 2.0284445565964666,
      "grad_norm": 2.921875,
      "learning_rate": 1.7991853392213213e-05,
      "loss": 0.7967,
      "step": 578770
    },
    {
      "epoch": 2.028479604103362,
      "grad_norm": 2.796875,
      "learning_rate": 1.7991204363549515e-05,
      "loss": 0.8271,
      "step": 578780
    },
    {
      "epoch": 2.0285146516102577,
      "grad_norm": 2.609375,
      "learning_rate": 1.7990555334885813e-05,
      "loss": 0.8735,
      "step": 578790
    },
    {
      "epoch": 2.0285496991171534,
      "grad_norm": 2.703125,
      "learning_rate": 1.798990630622211e-05,
      "loss": 0.8244,
      "step": 578800
    },
    {
      "epoch": 2.0285847466240488,
      "grad_norm": 2.671875,
      "learning_rate": 1.798925727755841e-05,
      "loss": 0.7665,
      "step": 578810
    },
    {
      "epoch": 2.0286197941309445,
      "grad_norm": 3.296875,
      "learning_rate": 1.7988608248894707e-05,
      "loss": 0.8325,
      "step": 578820
    },
    {
      "epoch": 2.0286548416378403,
      "grad_norm": 2.96875,
      "learning_rate": 1.7987959220231005e-05,
      "loss": 0.8718,
      "step": 578830
    },
    {
      "epoch": 2.0286898891447356,
      "grad_norm": 3.03125,
      "learning_rate": 1.7987310191567303e-05,
      "loss": 0.8028,
      "step": 578840
    },
    {
      "epoch": 2.0287249366516313,
      "grad_norm": 2.640625,
      "learning_rate": 1.7986661162903597e-05,
      "loss": 0.7966,
      "step": 578850
    },
    {
      "epoch": 2.028759984158527,
      "grad_norm": 3.046875,
      "learning_rate": 1.7986012134239895e-05,
      "loss": 0.8459,
      "step": 578860
    },
    {
      "epoch": 2.0287950316654224,
      "grad_norm": 3.078125,
      "learning_rate": 1.7985363105576193e-05,
      "loss": 0.7661,
      "step": 578870
    },
    {
      "epoch": 2.028830079172318,
      "grad_norm": 3.390625,
      "learning_rate": 1.798471407691249e-05,
      "loss": 0.8408,
      "step": 578880
    },
    {
      "epoch": 2.0288651266792135,
      "grad_norm": 3.265625,
      "learning_rate": 1.798406504824879e-05,
      "loss": 0.8425,
      "step": 578890
    },
    {
      "epoch": 2.0289001741861092,
      "grad_norm": 3.109375,
      "learning_rate": 1.798341601958509e-05,
      "loss": 0.7854,
      "step": 578900
    },
    {
      "epoch": 2.028935221693005,
      "grad_norm": 2.90625,
      "learning_rate": 1.798276699092139e-05,
      "loss": 0.7739,
      "step": 578910
    },
    {
      "epoch": 2.0289702691999003,
      "grad_norm": 2.640625,
      "learning_rate": 1.7982117962257687e-05,
      "loss": 0.8519,
      "step": 578920
    },
    {
      "epoch": 2.029005316706796,
      "grad_norm": 3.296875,
      "learning_rate": 1.7981468933593985e-05,
      "loss": 0.8838,
      "step": 578930
    },
    {
      "epoch": 2.029040364213692,
      "grad_norm": 2.796875,
      "learning_rate": 1.7980819904930283e-05,
      "loss": 0.7955,
      "step": 578940
    },
    {
      "epoch": 2.029075411720587,
      "grad_norm": 2.921875,
      "learning_rate": 1.798017087626658e-05,
      "loss": 0.8322,
      "step": 578950
    },
    {
      "epoch": 2.029110459227483,
      "grad_norm": 2.5625,
      "learning_rate": 1.797952184760288e-05,
      "loss": 0.7329,
      "step": 578960
    },
    {
      "epoch": 2.0291455067343787,
      "grad_norm": 2.703125,
      "learning_rate": 1.7978872818939177e-05,
      "loss": 0.7246,
      "step": 578970
    },
    {
      "epoch": 2.029180554241274,
      "grad_norm": 3.203125,
      "learning_rate": 1.7978223790275475e-05,
      "loss": 0.9081,
      "step": 578980
    },
    {
      "epoch": 2.0292156017481697,
      "grad_norm": 3.046875,
      "learning_rate": 1.7977574761611773e-05,
      "loss": 0.8422,
      "step": 578990
    },
    {
      "epoch": 2.029250649255065,
      "grad_norm": 2.6875,
      "learning_rate": 1.797692573294807e-05,
      "loss": 0.864,
      "step": 579000
    },
    {
      "epoch": 2.029285696761961,
      "grad_norm": 2.90625,
      "learning_rate": 1.797627670428437e-05,
      "loss": 0.8425,
      "step": 579010
    },
    {
      "epoch": 2.0293207442688566,
      "grad_norm": 2.546875,
      "learning_rate": 1.7975627675620667e-05,
      "loss": 0.7815,
      "step": 579020
    },
    {
      "epoch": 2.029355791775752,
      "grad_norm": 3.328125,
      "learning_rate": 1.7974978646956965e-05,
      "loss": 0.773,
      "step": 579030
    },
    {
      "epoch": 2.0293908392826476,
      "grad_norm": 2.765625,
      "learning_rate": 1.7974329618293263e-05,
      "loss": 0.8179,
      "step": 579040
    },
    {
      "epoch": 2.0294258867895434,
      "grad_norm": 2.875,
      "learning_rate": 1.797368058962956e-05,
      "loss": 0.7635,
      "step": 579050
    },
    {
      "epoch": 2.0294609342964387,
      "grad_norm": 2.453125,
      "learning_rate": 1.797303156096586e-05,
      "loss": 0.9299,
      "step": 579060
    },
    {
      "epoch": 2.0294959818033345,
      "grad_norm": 2.734375,
      "learning_rate": 1.7972382532302157e-05,
      "loss": 0.8261,
      "step": 579070
    },
    {
      "epoch": 2.02953102931023,
      "grad_norm": 2.90625,
      "learning_rate": 1.7971733503638455e-05,
      "loss": 0.8358,
      "step": 579080
    },
    {
      "epoch": 2.0295660768171255,
      "grad_norm": 2.484375,
      "learning_rate": 1.7971084474974753e-05,
      "loss": 0.8553,
      "step": 579090
    },
    {
      "epoch": 2.0296011243240213,
      "grad_norm": 2.890625,
      "learning_rate": 1.797043544631105e-05,
      "loss": 0.8454,
      "step": 579100
    },
    {
      "epoch": 2.0296361718309166,
      "grad_norm": 2.734375,
      "learning_rate": 1.796978641764735e-05,
      "loss": 0.8014,
      "step": 579110
    },
    {
      "epoch": 2.0296712193378124,
      "grad_norm": 2.8125,
      "learning_rate": 1.7969137388983647e-05,
      "loss": 0.78,
      "step": 579120
    },
    {
      "epoch": 2.029706266844708,
      "grad_norm": 3.34375,
      "learning_rate": 1.7968488360319945e-05,
      "loss": 0.8002,
      "step": 579130
    },
    {
      "epoch": 2.0297413143516034,
      "grad_norm": 3.0,
      "learning_rate": 1.7967839331656243e-05,
      "loss": 0.9059,
      "step": 579140
    },
    {
      "epoch": 2.029776361858499,
      "grad_norm": 3.09375,
      "learning_rate": 1.7967190302992544e-05,
      "loss": 0.8011,
      "step": 579150
    },
    {
      "epoch": 2.029811409365395,
      "grad_norm": 2.953125,
      "learning_rate": 1.7966541274328842e-05,
      "loss": 0.8431,
      "step": 579160
    },
    {
      "epoch": 2.0298464568722903,
      "grad_norm": 2.75,
      "learning_rate": 1.796589224566514e-05,
      "loss": 0.7459,
      "step": 579170
    },
    {
      "epoch": 2.029881504379186,
      "grad_norm": 2.875,
      "learning_rate": 1.7965243217001438e-05,
      "loss": 0.8157,
      "step": 579180
    },
    {
      "epoch": 2.0299165518860818,
      "grad_norm": 2.765625,
      "learning_rate": 1.7964594188337736e-05,
      "loss": 0.7734,
      "step": 579190
    },
    {
      "epoch": 2.029951599392977,
      "grad_norm": 3.453125,
      "learning_rate": 1.7963945159674034e-05,
      "loss": 0.8826,
      "step": 579200
    },
    {
      "epoch": 2.029986646899873,
      "grad_norm": 2.921875,
      "learning_rate": 1.7963296131010332e-05,
      "loss": 0.8262,
      "step": 579210
    },
    {
      "epoch": 2.030021694406768,
      "grad_norm": 2.828125,
      "learning_rate": 1.796264710234663e-05,
      "loss": 0.8753,
      "step": 579220
    },
    {
      "epoch": 2.030056741913664,
      "grad_norm": 2.796875,
      "learning_rate": 1.7961998073682925e-05,
      "loss": 0.8131,
      "step": 579230
    },
    {
      "epoch": 2.0300917894205597,
      "grad_norm": 2.890625,
      "learning_rate": 1.7961349045019223e-05,
      "loss": 0.8028,
      "step": 579240
    },
    {
      "epoch": 2.030126836927455,
      "grad_norm": 2.75,
      "learning_rate": 1.796070001635552e-05,
      "loss": 0.8685,
      "step": 579250
    },
    {
      "epoch": 2.0301618844343507,
      "grad_norm": 3.140625,
      "learning_rate": 1.796005098769182e-05,
      "loss": 0.8092,
      "step": 579260
    },
    {
      "epoch": 2.0301969319412465,
      "grad_norm": 2.875,
      "learning_rate": 1.795940195902812e-05,
      "loss": 0.7905,
      "step": 579270
    },
    {
      "epoch": 2.030231979448142,
      "grad_norm": 3.15625,
      "learning_rate": 1.7958752930364418e-05,
      "loss": 0.7779,
      "step": 579280
    },
    {
      "epoch": 2.0302670269550376,
      "grad_norm": 3.171875,
      "learning_rate": 1.7958103901700716e-05,
      "loss": 0.8309,
      "step": 579290
    },
    {
      "epoch": 2.0303020744619333,
      "grad_norm": 2.875,
      "learning_rate": 1.7957454873037014e-05,
      "loss": 0.7749,
      "step": 579300
    },
    {
      "epoch": 2.0303371219688287,
      "grad_norm": 2.59375,
      "learning_rate": 1.7956805844373312e-05,
      "loss": 0.7386,
      "step": 579310
    },
    {
      "epoch": 2.0303721694757244,
      "grad_norm": 3.171875,
      "learning_rate": 1.795615681570961e-05,
      "loss": 0.8005,
      "step": 579320
    },
    {
      "epoch": 2.0304072169826197,
      "grad_norm": 2.890625,
      "learning_rate": 1.7955507787045908e-05,
      "loss": 0.7909,
      "step": 579330
    },
    {
      "epoch": 2.0304422644895155,
      "grad_norm": 2.890625,
      "learning_rate": 1.7954858758382206e-05,
      "loss": 0.8915,
      "step": 579340
    },
    {
      "epoch": 2.0304773119964112,
      "grad_norm": 2.609375,
      "learning_rate": 1.7954209729718504e-05,
      "loss": 0.8283,
      "step": 579350
    },
    {
      "epoch": 2.0305123595033066,
      "grad_norm": 2.359375,
      "learning_rate": 1.7953560701054802e-05,
      "loss": 0.7857,
      "step": 579360
    },
    {
      "epoch": 2.0305474070102023,
      "grad_norm": 2.5,
      "learning_rate": 1.79529116723911e-05,
      "loss": 0.8556,
      "step": 579370
    },
    {
      "epoch": 2.030582454517098,
      "grad_norm": 3.46875,
      "learning_rate": 1.7952262643727398e-05,
      "loss": 0.9146,
      "step": 579380
    },
    {
      "epoch": 2.0306175020239934,
      "grad_norm": 2.453125,
      "learning_rate": 1.7951613615063696e-05,
      "loss": 0.8233,
      "step": 579390
    },
    {
      "epoch": 2.030652549530889,
      "grad_norm": 2.921875,
      "learning_rate": 1.7950964586399998e-05,
      "loss": 0.8609,
      "step": 579400
    },
    {
      "epoch": 2.030687597037785,
      "grad_norm": 3.234375,
      "learning_rate": 1.7950315557736292e-05,
      "loss": 0.7706,
      "step": 579410
    },
    {
      "epoch": 2.03072264454468,
      "grad_norm": 3.1875,
      "learning_rate": 1.794966652907259e-05,
      "loss": 0.7327,
      "step": 579420
    },
    {
      "epoch": 2.030757692051576,
      "grad_norm": 2.953125,
      "learning_rate": 1.7949017500408888e-05,
      "loss": 0.8222,
      "step": 579430
    },
    {
      "epoch": 2.0307927395584717,
      "grad_norm": 2.6875,
      "learning_rate": 1.7948368471745186e-05,
      "loss": 0.7958,
      "step": 579440
    },
    {
      "epoch": 2.030827787065367,
      "grad_norm": 2.65625,
      "learning_rate": 1.7947719443081484e-05,
      "loss": 0.7864,
      "step": 579450
    },
    {
      "epoch": 2.030862834572263,
      "grad_norm": 2.59375,
      "learning_rate": 1.7947070414417782e-05,
      "loss": 0.8274,
      "step": 579460
    },
    {
      "epoch": 2.030897882079158,
      "grad_norm": 3.078125,
      "learning_rate": 1.794642138575408e-05,
      "loss": 0.8124,
      "step": 579470
    },
    {
      "epoch": 2.030932929586054,
      "grad_norm": 3.0,
      "learning_rate": 1.7945772357090378e-05,
      "loss": 0.8043,
      "step": 579480
    },
    {
      "epoch": 2.0309679770929496,
      "grad_norm": 2.859375,
      "learning_rate": 1.7945123328426676e-05,
      "loss": 0.7491,
      "step": 579490
    },
    {
      "epoch": 2.031003024599845,
      "grad_norm": 2.421875,
      "learning_rate": 1.7944474299762974e-05,
      "loss": 0.7777,
      "step": 579500
    },
    {
      "epoch": 2.0310380721067407,
      "grad_norm": 3.109375,
      "learning_rate": 1.7943825271099272e-05,
      "loss": 0.8291,
      "step": 579510
    },
    {
      "epoch": 2.0310731196136365,
      "grad_norm": 3.1875,
      "learning_rate": 1.7943176242435574e-05,
      "loss": 0.8044,
      "step": 579520
    },
    {
      "epoch": 2.0311081671205318,
      "grad_norm": 2.96875,
      "learning_rate": 1.794252721377187e-05,
      "loss": 0.8239,
      "step": 579530
    },
    {
      "epoch": 2.0311432146274275,
      "grad_norm": 3.234375,
      "learning_rate": 1.794187818510817e-05,
      "loss": 0.8752,
      "step": 579540
    },
    {
      "epoch": 2.0311782621343233,
      "grad_norm": 2.65625,
      "learning_rate": 1.7941229156444468e-05,
      "loss": 0.8063,
      "step": 579550
    },
    {
      "epoch": 2.0312133096412186,
      "grad_norm": 2.921875,
      "learning_rate": 1.7940580127780766e-05,
      "loss": 0.8239,
      "step": 579560
    },
    {
      "epoch": 2.0312483571481144,
      "grad_norm": 2.890625,
      "learning_rate": 1.7939931099117064e-05,
      "loss": 0.8376,
      "step": 579570
    },
    {
      "epoch": 2.0312834046550097,
      "grad_norm": 2.703125,
      "learning_rate": 1.793928207045336e-05,
      "loss": 0.7904,
      "step": 579580
    },
    {
      "epoch": 2.0313184521619054,
      "grad_norm": 2.921875,
      "learning_rate": 1.793863304178966e-05,
      "loss": 0.8813,
      "step": 579590
    },
    {
      "epoch": 2.031353499668801,
      "grad_norm": 3.046875,
      "learning_rate": 1.7937984013125954e-05,
      "loss": 0.8436,
      "step": 579600
    },
    {
      "epoch": 2.0313885471756965,
      "grad_norm": 3.25,
      "learning_rate": 1.7937334984462252e-05,
      "loss": 0.8331,
      "step": 579610
    },
    {
      "epoch": 2.0314235946825923,
      "grad_norm": 2.65625,
      "learning_rate": 1.793668595579855e-05,
      "loss": 0.8155,
      "step": 579620
    },
    {
      "epoch": 2.031458642189488,
      "grad_norm": 3.15625,
      "learning_rate": 1.793603692713485e-05,
      "loss": 0.8192,
      "step": 579630
    },
    {
      "epoch": 2.0314936896963833,
      "grad_norm": 3.15625,
      "learning_rate": 1.793538789847115e-05,
      "loss": 0.7908,
      "step": 579640
    },
    {
      "epoch": 2.031528737203279,
      "grad_norm": 3.09375,
      "learning_rate": 1.7934738869807448e-05,
      "loss": 0.7831,
      "step": 579650
    },
    {
      "epoch": 2.031563784710175,
      "grad_norm": 2.5625,
      "learning_rate": 1.7934089841143746e-05,
      "loss": 0.8222,
      "step": 579660
    },
    {
      "epoch": 2.03159883221707,
      "grad_norm": 2.6875,
      "learning_rate": 1.7933440812480044e-05,
      "loss": 0.8385,
      "step": 579670
    },
    {
      "epoch": 2.031633879723966,
      "grad_norm": 3.265625,
      "learning_rate": 1.793279178381634e-05,
      "loss": 0.8561,
      "step": 579680
    },
    {
      "epoch": 2.0316689272308612,
      "grad_norm": 2.953125,
      "learning_rate": 1.793214275515264e-05,
      "loss": 0.7922,
      "step": 579690
    },
    {
      "epoch": 2.031703974737757,
      "grad_norm": 2.75,
      "learning_rate": 1.7931493726488938e-05,
      "loss": 0.8088,
      "step": 579700
    },
    {
      "epoch": 2.0317390222446527,
      "grad_norm": 3.078125,
      "learning_rate": 1.7930844697825236e-05,
      "loss": 0.7889,
      "step": 579710
    },
    {
      "epoch": 2.031774069751548,
      "grad_norm": 3.0625,
      "learning_rate": 1.7930195669161534e-05,
      "loss": 0.8517,
      "step": 579720
    },
    {
      "epoch": 2.031809117258444,
      "grad_norm": 2.3125,
      "learning_rate": 1.792954664049783e-05,
      "loss": 0.7803,
      "step": 579730
    },
    {
      "epoch": 2.0318441647653396,
      "grad_norm": 3.109375,
      "learning_rate": 1.792889761183413e-05,
      "loss": 0.8541,
      "step": 579740
    },
    {
      "epoch": 2.031879212272235,
      "grad_norm": 3.015625,
      "learning_rate": 1.7928248583170428e-05,
      "loss": 0.9026,
      "step": 579750
    },
    {
      "epoch": 2.0319142597791306,
      "grad_norm": 2.734375,
      "learning_rate": 1.7927599554506726e-05,
      "loss": 0.8682,
      "step": 579760
    },
    {
      "epoch": 2.0319493072860264,
      "grad_norm": 2.984375,
      "learning_rate": 1.7926950525843027e-05,
      "loss": 0.7973,
      "step": 579770
    },
    {
      "epoch": 2.0319843547929217,
      "grad_norm": 2.890625,
      "learning_rate": 1.7926301497179325e-05,
      "loss": 0.8544,
      "step": 579780
    },
    {
      "epoch": 2.0320194022998175,
      "grad_norm": 2.71875,
      "learning_rate": 1.792565246851562e-05,
      "loss": 0.8259,
      "step": 579790
    },
    {
      "epoch": 2.032054449806713,
      "grad_norm": 2.75,
      "learning_rate": 1.7925003439851918e-05,
      "loss": 0.8161,
      "step": 579800
    },
    {
      "epoch": 2.0320894973136086,
      "grad_norm": 2.859375,
      "learning_rate": 1.7924354411188216e-05,
      "loss": 0.8496,
      "step": 579810
    },
    {
      "epoch": 2.0321245448205043,
      "grad_norm": 2.953125,
      "learning_rate": 1.7923705382524514e-05,
      "loss": 0.8299,
      "step": 579820
    },
    {
      "epoch": 2.0321595923273996,
      "grad_norm": 3.265625,
      "learning_rate": 1.792305635386081e-05,
      "loss": 0.8437,
      "step": 579830
    },
    {
      "epoch": 2.0321946398342954,
      "grad_norm": 2.6875,
      "learning_rate": 1.792240732519711e-05,
      "loss": 0.8775,
      "step": 579840
    },
    {
      "epoch": 2.032229687341191,
      "grad_norm": 2.765625,
      "learning_rate": 1.7921758296533408e-05,
      "loss": 0.7959,
      "step": 579850
    },
    {
      "epoch": 2.0322647348480865,
      "grad_norm": 3.140625,
      "learning_rate": 1.7921109267869706e-05,
      "loss": 0.7969,
      "step": 579860
    },
    {
      "epoch": 2.032299782354982,
      "grad_norm": 2.59375,
      "learning_rate": 1.7920460239206004e-05,
      "loss": 0.8038,
      "step": 579870
    },
    {
      "epoch": 2.032334829861878,
      "grad_norm": 3.296875,
      "learning_rate": 1.7919811210542305e-05,
      "loss": 0.7939,
      "step": 579880
    },
    {
      "epoch": 2.0323698773687733,
      "grad_norm": 3.03125,
      "learning_rate": 1.7919162181878603e-05,
      "loss": 0.82,
      "step": 579890
    },
    {
      "epoch": 2.032404924875669,
      "grad_norm": 2.484375,
      "learning_rate": 1.79185131532149e-05,
      "loss": 0.7169,
      "step": 579900
    },
    {
      "epoch": 2.0324399723825644,
      "grad_norm": 2.859375,
      "learning_rate": 1.79178641245512e-05,
      "loss": 0.8561,
      "step": 579910
    },
    {
      "epoch": 2.03247501988946,
      "grad_norm": 3.109375,
      "learning_rate": 1.7917215095887497e-05,
      "loss": 0.7537,
      "step": 579920
    },
    {
      "epoch": 2.032510067396356,
      "grad_norm": 2.96875,
      "learning_rate": 1.7916566067223795e-05,
      "loss": 0.8382,
      "step": 579930
    },
    {
      "epoch": 2.032545114903251,
      "grad_norm": 3.34375,
      "learning_rate": 1.7915917038560093e-05,
      "loss": 0.8321,
      "step": 579940
    },
    {
      "epoch": 2.032580162410147,
      "grad_norm": 3.0,
      "learning_rate": 1.791526800989639e-05,
      "loss": 0.8176,
      "step": 579950
    },
    {
      "epoch": 2.0326152099170427,
      "grad_norm": 2.828125,
      "learning_rate": 1.791461898123269e-05,
      "loss": 0.8876,
      "step": 579960
    },
    {
      "epoch": 2.032650257423938,
      "grad_norm": 2.515625,
      "learning_rate": 1.7913969952568987e-05,
      "loss": 0.8195,
      "step": 579970
    },
    {
      "epoch": 2.0326853049308338,
      "grad_norm": 2.859375,
      "learning_rate": 1.791332092390528e-05,
      "loss": 0.8058,
      "step": 579980
    },
    {
      "epoch": 2.0327203524377295,
      "grad_norm": 2.65625,
      "learning_rate": 1.791267189524158e-05,
      "loss": 0.789,
      "step": 579990
    },
    {
      "epoch": 2.032755399944625,
      "grad_norm": 2.90625,
      "learning_rate": 1.791202286657788e-05,
      "loss": 0.8328,
      "step": 580000
    },
    {
      "epoch": 2.032755399944625,
      "eval_loss": 0.7699409127235413,
      "eval_runtime": 553.9161,
      "eval_samples_per_second": 686.812,
      "eval_steps_per_second": 57.234,
      "step": 580000
    },
    {
      "epoch": 2.0327904474515206,
      "grad_norm": 3.0625,
      "learning_rate": 1.791137383791418e-05,
      "loss": 0.8276,
      "step": 580010
    },
    {
      "epoch": 2.0328254949584164,
      "grad_norm": 3.125,
      "learning_rate": 1.7910724809250477e-05,
      "loss": 0.8471,
      "step": 580020
    },
    {
      "epoch": 2.0328605424653117,
      "grad_norm": 2.796875,
      "learning_rate": 1.7910075780586775e-05,
      "loss": 0.8556,
      "step": 580030
    },
    {
      "epoch": 2.0328955899722074,
      "grad_norm": 2.578125,
      "learning_rate": 1.7909426751923073e-05,
      "loss": 0.8003,
      "step": 580040
    },
    {
      "epoch": 2.0329306374791027,
      "grad_norm": 2.890625,
      "learning_rate": 1.790877772325937e-05,
      "loss": 0.8061,
      "step": 580050
    },
    {
      "epoch": 2.0329656849859985,
      "grad_norm": 2.875,
      "learning_rate": 1.790812869459567e-05,
      "loss": 0.828,
      "step": 580060
    },
    {
      "epoch": 2.0330007324928943,
      "grad_norm": 3.21875,
      "learning_rate": 1.7907479665931967e-05,
      "loss": 0.9029,
      "step": 580070
    },
    {
      "epoch": 2.0330357799997896,
      "grad_norm": 3.0,
      "learning_rate": 1.7906830637268265e-05,
      "loss": 0.824,
      "step": 580080
    },
    {
      "epoch": 2.0330708275066853,
      "grad_norm": 2.953125,
      "learning_rate": 1.7906181608604563e-05,
      "loss": 0.8166,
      "step": 580090
    },
    {
      "epoch": 2.033105875013581,
      "grad_norm": 3.046875,
      "learning_rate": 1.790553257994086e-05,
      "loss": 0.8595,
      "step": 580100
    },
    {
      "epoch": 2.0331409225204764,
      "grad_norm": 2.703125,
      "learning_rate": 1.790488355127716e-05,
      "loss": 0.8241,
      "step": 580110
    },
    {
      "epoch": 2.033175970027372,
      "grad_norm": 2.75,
      "learning_rate": 1.7904234522613457e-05,
      "loss": 0.7974,
      "step": 580120
    },
    {
      "epoch": 2.033211017534268,
      "grad_norm": 2.90625,
      "learning_rate": 1.7903585493949755e-05,
      "loss": 0.7284,
      "step": 580130
    },
    {
      "epoch": 2.0332460650411632,
      "grad_norm": 3.0,
      "learning_rate": 1.7902936465286056e-05,
      "loss": 0.8505,
      "step": 580140
    },
    {
      "epoch": 2.033281112548059,
      "grad_norm": 2.875,
      "learning_rate": 1.7902287436622354e-05,
      "loss": 0.8454,
      "step": 580150
    },
    {
      "epoch": 2.0333161600549543,
      "grad_norm": 3.0,
      "learning_rate": 1.7901638407958652e-05,
      "loss": 0.8947,
      "step": 580160
    },
    {
      "epoch": 2.03335120756185,
      "grad_norm": 2.96875,
      "learning_rate": 1.7900989379294947e-05,
      "loss": 0.9207,
      "step": 580170
    },
    {
      "epoch": 2.033386255068746,
      "grad_norm": 2.6875,
      "learning_rate": 1.7900340350631245e-05,
      "loss": 0.8201,
      "step": 580180
    },
    {
      "epoch": 2.033421302575641,
      "grad_norm": 2.9375,
      "learning_rate": 1.7899691321967543e-05,
      "loss": 0.8681,
      "step": 580190
    },
    {
      "epoch": 2.033456350082537,
      "grad_norm": 3.078125,
      "learning_rate": 1.789904229330384e-05,
      "loss": 0.813,
      "step": 580200
    },
    {
      "epoch": 2.0334913975894326,
      "grad_norm": 3.0625,
      "learning_rate": 1.789839326464014e-05,
      "loss": 0.783,
      "step": 580210
    },
    {
      "epoch": 2.033526445096328,
      "grad_norm": 3.0625,
      "learning_rate": 1.7897744235976437e-05,
      "loss": 0.8576,
      "step": 580220
    },
    {
      "epoch": 2.0335614926032237,
      "grad_norm": 2.984375,
      "learning_rate": 1.7897095207312735e-05,
      "loss": 0.7607,
      "step": 580230
    },
    {
      "epoch": 2.0335965401101195,
      "grad_norm": 2.734375,
      "learning_rate": 1.7896446178649033e-05,
      "loss": 0.7338,
      "step": 580240
    },
    {
      "epoch": 2.033631587617015,
      "grad_norm": 2.65625,
      "learning_rate": 1.7895797149985334e-05,
      "loss": 0.841,
      "step": 580250
    },
    {
      "epoch": 2.0336666351239105,
      "grad_norm": 2.359375,
      "learning_rate": 1.7895148121321632e-05,
      "loss": 0.7677,
      "step": 580260
    },
    {
      "epoch": 2.033701682630806,
      "grad_norm": 2.609375,
      "learning_rate": 1.789449909265793e-05,
      "loss": 0.8452,
      "step": 580270
    },
    {
      "epoch": 2.0337367301377016,
      "grad_norm": 2.78125,
      "learning_rate": 1.789385006399423e-05,
      "loss": 0.7885,
      "step": 580280
    },
    {
      "epoch": 2.0337717776445974,
      "grad_norm": 2.609375,
      "learning_rate": 1.7893201035330526e-05,
      "loss": 0.7594,
      "step": 580290
    },
    {
      "epoch": 2.0338068251514927,
      "grad_norm": 2.96875,
      "learning_rate": 1.7892552006666824e-05,
      "loss": 0.8022,
      "step": 580300
    },
    {
      "epoch": 2.0338418726583884,
      "grad_norm": 3.6875,
      "learning_rate": 1.7891902978003122e-05,
      "loss": 0.8263,
      "step": 580310
    },
    {
      "epoch": 2.033876920165284,
      "grad_norm": 2.890625,
      "learning_rate": 1.789125394933942e-05,
      "loss": 0.861,
      "step": 580320
    },
    {
      "epoch": 2.0339119676721795,
      "grad_norm": 2.625,
      "learning_rate": 1.789060492067572e-05,
      "loss": 0.8067,
      "step": 580330
    },
    {
      "epoch": 2.0339470151790753,
      "grad_norm": 2.765625,
      "learning_rate": 1.7889955892012016e-05,
      "loss": 0.8308,
      "step": 580340
    },
    {
      "epoch": 2.033982062685971,
      "grad_norm": 3.078125,
      "learning_rate": 1.788930686334831e-05,
      "loss": 0.8152,
      "step": 580350
    },
    {
      "epoch": 2.0340171101928664,
      "grad_norm": 3.3125,
      "learning_rate": 1.7888657834684612e-05,
      "loss": 0.8689,
      "step": 580360
    },
    {
      "epoch": 2.034052157699762,
      "grad_norm": 3.34375,
      "learning_rate": 1.788800880602091e-05,
      "loss": 0.8317,
      "step": 580370
    },
    {
      "epoch": 2.0340872052066574,
      "grad_norm": 3.390625,
      "learning_rate": 1.788735977735721e-05,
      "loss": 0.8669,
      "step": 580380
    },
    {
      "epoch": 2.034122252713553,
      "grad_norm": 3.25,
      "learning_rate": 1.7886710748693506e-05,
      "loss": 0.7242,
      "step": 580390
    },
    {
      "epoch": 2.034157300220449,
      "grad_norm": 3.171875,
      "learning_rate": 1.7886061720029804e-05,
      "loss": 0.8413,
      "step": 580400
    },
    {
      "epoch": 2.0341923477273443,
      "grad_norm": 2.671875,
      "learning_rate": 1.7885412691366102e-05,
      "loss": 0.8629,
      "step": 580410
    },
    {
      "epoch": 2.03422739523424,
      "grad_norm": 3.046875,
      "learning_rate": 1.78847636627024e-05,
      "loss": 0.9013,
      "step": 580420
    },
    {
      "epoch": 2.0342624427411358,
      "grad_norm": 3.015625,
      "learning_rate": 1.78841146340387e-05,
      "loss": 0.8406,
      "step": 580430
    },
    {
      "epoch": 2.034297490248031,
      "grad_norm": 3.609375,
      "learning_rate": 1.7883465605374996e-05,
      "loss": 0.8253,
      "step": 580440
    },
    {
      "epoch": 2.034332537754927,
      "grad_norm": 3.203125,
      "learning_rate": 1.7882816576711294e-05,
      "loss": 0.845,
      "step": 580450
    },
    {
      "epoch": 2.0343675852618226,
      "grad_norm": 2.71875,
      "learning_rate": 1.7882167548047592e-05,
      "loss": 0.8664,
      "step": 580460
    },
    {
      "epoch": 2.034402632768718,
      "grad_norm": 2.78125,
      "learning_rate": 1.788151851938389e-05,
      "loss": 0.8772,
      "step": 580470
    },
    {
      "epoch": 2.0344376802756137,
      "grad_norm": 2.5625,
      "learning_rate": 1.788086949072019e-05,
      "loss": 0.86,
      "step": 580480
    },
    {
      "epoch": 2.034472727782509,
      "grad_norm": 2.8125,
      "learning_rate": 1.7880220462056486e-05,
      "loss": 0.8136,
      "step": 580490
    },
    {
      "epoch": 2.0345077752894047,
      "grad_norm": 2.703125,
      "learning_rate": 1.7879571433392788e-05,
      "loss": 0.7953,
      "step": 580500
    },
    {
      "epoch": 2.0345428227963005,
      "grad_norm": 2.9375,
      "learning_rate": 1.7878922404729086e-05,
      "loss": 0.8152,
      "step": 580510
    },
    {
      "epoch": 2.034577870303196,
      "grad_norm": 3.25,
      "learning_rate": 1.7878273376065384e-05,
      "loss": 0.7916,
      "step": 580520
    },
    {
      "epoch": 2.0346129178100916,
      "grad_norm": 3.140625,
      "learning_rate": 1.7877624347401682e-05,
      "loss": 0.9231,
      "step": 580530
    },
    {
      "epoch": 2.0346479653169873,
      "grad_norm": 3.296875,
      "learning_rate": 1.7876975318737976e-05,
      "loss": 0.8446,
      "step": 580540
    },
    {
      "epoch": 2.0346830128238826,
      "grad_norm": 2.796875,
      "learning_rate": 1.7876326290074274e-05,
      "loss": 0.8355,
      "step": 580550
    },
    {
      "epoch": 2.0347180603307784,
      "grad_norm": 2.703125,
      "learning_rate": 1.7875677261410572e-05,
      "loss": 0.8705,
      "step": 580560
    },
    {
      "epoch": 2.034753107837674,
      "grad_norm": 2.40625,
      "learning_rate": 1.787502823274687e-05,
      "loss": 0.8596,
      "step": 580570
    },
    {
      "epoch": 2.0347881553445695,
      "grad_norm": 3.21875,
      "learning_rate": 1.787437920408317e-05,
      "loss": 0.7559,
      "step": 580580
    },
    {
      "epoch": 2.0348232028514652,
      "grad_norm": 3.1875,
      "learning_rate": 1.7873730175419466e-05,
      "loss": 0.7599,
      "step": 580590
    },
    {
      "epoch": 2.0348582503583605,
      "grad_norm": 3.125,
      "learning_rate": 1.7873081146755764e-05,
      "loss": 0.8249,
      "step": 580600
    },
    {
      "epoch": 2.0348932978652563,
      "grad_norm": 2.953125,
      "learning_rate": 1.7872432118092062e-05,
      "loss": 0.806,
      "step": 580610
    },
    {
      "epoch": 2.034928345372152,
      "grad_norm": 2.796875,
      "learning_rate": 1.7871783089428364e-05,
      "loss": 0.8465,
      "step": 580620
    },
    {
      "epoch": 2.0349633928790474,
      "grad_norm": 2.40625,
      "learning_rate": 1.7871134060764662e-05,
      "loss": 0.7973,
      "step": 580630
    },
    {
      "epoch": 2.034998440385943,
      "grad_norm": 3.203125,
      "learning_rate": 1.787048503210096e-05,
      "loss": 0.8727,
      "step": 580640
    },
    {
      "epoch": 2.035033487892839,
      "grad_norm": 3.09375,
      "learning_rate": 1.7869836003437258e-05,
      "loss": 0.7572,
      "step": 580650
    },
    {
      "epoch": 2.035068535399734,
      "grad_norm": 3.28125,
      "learning_rate": 1.7869186974773556e-05,
      "loss": 0.8035,
      "step": 580660
    },
    {
      "epoch": 2.03510358290663,
      "grad_norm": 2.84375,
      "learning_rate": 1.7868537946109854e-05,
      "loss": 0.7654,
      "step": 580670
    },
    {
      "epoch": 2.0351386304135257,
      "grad_norm": 3.078125,
      "learning_rate": 1.7867888917446152e-05,
      "loss": 0.821,
      "step": 580680
    },
    {
      "epoch": 2.035173677920421,
      "grad_norm": 2.890625,
      "learning_rate": 1.786723988878245e-05,
      "loss": 0.8198,
      "step": 580690
    },
    {
      "epoch": 2.035208725427317,
      "grad_norm": 2.78125,
      "learning_rate": 1.7866590860118748e-05,
      "loss": 0.8158,
      "step": 580700
    },
    {
      "epoch": 2.035243772934212,
      "grad_norm": 2.65625,
      "learning_rate": 1.7865941831455046e-05,
      "loss": 0.8354,
      "step": 580710
    },
    {
      "epoch": 2.035278820441108,
      "grad_norm": 2.671875,
      "learning_rate": 1.7865292802791344e-05,
      "loss": 0.8153,
      "step": 580720
    },
    {
      "epoch": 2.0353138679480036,
      "grad_norm": 2.84375,
      "learning_rate": 1.7864643774127642e-05,
      "loss": 0.8537,
      "step": 580730
    },
    {
      "epoch": 2.035348915454899,
      "grad_norm": 3.171875,
      "learning_rate": 1.786399474546394e-05,
      "loss": 0.8843,
      "step": 580740
    },
    {
      "epoch": 2.0353839629617947,
      "grad_norm": 2.609375,
      "learning_rate": 1.7863345716800238e-05,
      "loss": 0.79,
      "step": 580750
    },
    {
      "epoch": 2.0354190104686904,
      "grad_norm": 3.1875,
      "learning_rate": 1.7862696688136536e-05,
      "loss": 0.7551,
      "step": 580760
    },
    {
      "epoch": 2.0354540579755858,
      "grad_norm": 2.859375,
      "learning_rate": 1.7862047659472834e-05,
      "loss": 0.8124,
      "step": 580770
    },
    {
      "epoch": 2.0354891054824815,
      "grad_norm": 3.125,
      "learning_rate": 1.7861398630809132e-05,
      "loss": 0.8439,
      "step": 580780
    },
    {
      "epoch": 2.0355241529893773,
      "grad_norm": 2.96875,
      "learning_rate": 1.786074960214543e-05,
      "loss": 0.8047,
      "step": 580790
    },
    {
      "epoch": 2.0355592004962726,
      "grad_norm": 2.9375,
      "learning_rate": 1.7860100573481728e-05,
      "loss": 0.7742,
      "step": 580800
    },
    {
      "epoch": 2.0355942480031683,
      "grad_norm": 3.0,
      "learning_rate": 1.7859451544818026e-05,
      "loss": 0.773,
      "step": 580810
    },
    {
      "epoch": 2.035629295510064,
      "grad_norm": 2.71875,
      "learning_rate": 1.7858802516154324e-05,
      "loss": 0.7368,
      "step": 580820
    },
    {
      "epoch": 2.0356643430169594,
      "grad_norm": 2.859375,
      "learning_rate": 1.7858153487490622e-05,
      "loss": 0.7576,
      "step": 580830
    },
    {
      "epoch": 2.035699390523855,
      "grad_norm": 2.921875,
      "learning_rate": 1.785750445882692e-05,
      "loss": 0.7877,
      "step": 580840
    },
    {
      "epoch": 2.0357344380307505,
      "grad_norm": 2.625,
      "learning_rate": 1.7856855430163218e-05,
      "loss": 0.9067,
      "step": 580850
    },
    {
      "epoch": 2.0357694855376463,
      "grad_norm": 3.4375,
      "learning_rate": 1.7856206401499516e-05,
      "loss": 0.8063,
      "step": 580860
    },
    {
      "epoch": 2.035804533044542,
      "grad_norm": 3.15625,
      "learning_rate": 1.7855557372835817e-05,
      "loss": 0.8769,
      "step": 580870
    },
    {
      "epoch": 2.0358395805514373,
      "grad_norm": 2.6875,
      "learning_rate": 1.7854908344172115e-05,
      "loss": 0.8153,
      "step": 580880
    },
    {
      "epoch": 2.035874628058333,
      "grad_norm": 2.71875,
      "learning_rate": 1.7854259315508413e-05,
      "loss": 0.8008,
      "step": 580890
    },
    {
      "epoch": 2.035909675565229,
      "grad_norm": 2.78125,
      "learning_rate": 1.785361028684471e-05,
      "loss": 0.8611,
      "step": 580900
    },
    {
      "epoch": 2.035944723072124,
      "grad_norm": 3.515625,
      "learning_rate": 1.785296125818101e-05,
      "loss": 0.8491,
      "step": 580910
    },
    {
      "epoch": 2.03597977057902,
      "grad_norm": 3.203125,
      "learning_rate": 1.7852312229517304e-05,
      "loss": 0.771,
      "step": 580920
    },
    {
      "epoch": 2.0360148180859157,
      "grad_norm": 2.921875,
      "learning_rate": 1.7851663200853602e-05,
      "loss": 0.7785,
      "step": 580930
    },
    {
      "epoch": 2.036049865592811,
      "grad_norm": 2.859375,
      "learning_rate": 1.78510141721899e-05,
      "loss": 0.8268,
      "step": 580940
    },
    {
      "epoch": 2.0360849130997067,
      "grad_norm": 2.921875,
      "learning_rate": 1.7850365143526198e-05,
      "loss": 0.8699,
      "step": 580950
    },
    {
      "epoch": 2.036119960606602,
      "grad_norm": 2.484375,
      "learning_rate": 1.7849716114862496e-05,
      "loss": 0.7277,
      "step": 580960
    },
    {
      "epoch": 2.036155008113498,
      "grad_norm": 3.015625,
      "learning_rate": 1.7849067086198794e-05,
      "loss": 0.8849,
      "step": 580970
    },
    {
      "epoch": 2.0361900556203936,
      "grad_norm": 2.90625,
      "learning_rate": 1.7848418057535095e-05,
      "loss": 0.7682,
      "step": 580980
    },
    {
      "epoch": 2.036225103127289,
      "grad_norm": 2.453125,
      "learning_rate": 1.7847769028871393e-05,
      "loss": 0.7384,
      "step": 580990
    },
    {
      "epoch": 2.0362601506341846,
      "grad_norm": 2.984375,
      "learning_rate": 1.784712000020769e-05,
      "loss": 0.7685,
      "step": 581000
    },
    {
      "epoch": 2.0362951981410804,
      "grad_norm": 3.046875,
      "learning_rate": 1.784647097154399e-05,
      "loss": 0.8597,
      "step": 581010
    },
    {
      "epoch": 2.0363302456479757,
      "grad_norm": 2.875,
      "learning_rate": 1.7845821942880287e-05,
      "loss": 0.8132,
      "step": 581020
    },
    {
      "epoch": 2.0363652931548715,
      "grad_norm": 3.078125,
      "learning_rate": 1.7845172914216585e-05,
      "loss": 0.7473,
      "step": 581030
    },
    {
      "epoch": 2.0364003406617672,
      "grad_norm": 3.0,
      "learning_rate": 1.7844523885552883e-05,
      "loss": 0.7194,
      "step": 581040
    },
    {
      "epoch": 2.0364353881686625,
      "grad_norm": 3.203125,
      "learning_rate": 1.784387485688918e-05,
      "loss": 0.8654,
      "step": 581050
    },
    {
      "epoch": 2.0364704356755583,
      "grad_norm": 2.96875,
      "learning_rate": 1.784322582822548e-05,
      "loss": 0.8174,
      "step": 581060
    },
    {
      "epoch": 2.0365054831824536,
      "grad_norm": 2.875,
      "learning_rate": 1.7842576799561777e-05,
      "loss": 0.7641,
      "step": 581070
    },
    {
      "epoch": 2.0365405306893494,
      "grad_norm": 2.765625,
      "learning_rate": 1.7841927770898075e-05,
      "loss": 0.7189,
      "step": 581080
    },
    {
      "epoch": 2.036575578196245,
      "grad_norm": 2.546875,
      "learning_rate": 1.7841278742234373e-05,
      "loss": 0.8681,
      "step": 581090
    },
    {
      "epoch": 2.0366106257031404,
      "grad_norm": 3.03125,
      "learning_rate": 1.784062971357067e-05,
      "loss": 0.8233,
      "step": 581100
    },
    {
      "epoch": 2.036645673210036,
      "grad_norm": 2.953125,
      "learning_rate": 1.783998068490697e-05,
      "loss": 0.8089,
      "step": 581110
    },
    {
      "epoch": 2.036680720716932,
      "grad_norm": 2.75,
      "learning_rate": 1.7839331656243267e-05,
      "loss": 0.8838,
      "step": 581120
    },
    {
      "epoch": 2.0367157682238273,
      "grad_norm": 3.03125,
      "learning_rate": 1.7838682627579565e-05,
      "loss": 0.8286,
      "step": 581130
    },
    {
      "epoch": 2.036750815730723,
      "grad_norm": 3.0625,
      "learning_rate": 1.7838033598915863e-05,
      "loss": 0.8867,
      "step": 581140
    },
    {
      "epoch": 2.036785863237619,
      "grad_norm": 3.296875,
      "learning_rate": 1.783738457025216e-05,
      "loss": 0.8278,
      "step": 581150
    },
    {
      "epoch": 2.036820910744514,
      "grad_norm": 2.703125,
      "learning_rate": 1.783673554158846e-05,
      "loss": 0.7942,
      "step": 581160
    },
    {
      "epoch": 2.03685595825141,
      "grad_norm": 3.234375,
      "learning_rate": 1.7836086512924757e-05,
      "loss": 0.7655,
      "step": 581170
    },
    {
      "epoch": 2.036891005758305,
      "grad_norm": 2.65625,
      "learning_rate": 1.7835437484261055e-05,
      "loss": 0.85,
      "step": 581180
    },
    {
      "epoch": 2.036926053265201,
      "grad_norm": 2.90625,
      "learning_rate": 1.7834788455597353e-05,
      "loss": 0.8571,
      "step": 581190
    },
    {
      "epoch": 2.0369611007720967,
      "grad_norm": 2.921875,
      "learning_rate": 1.783413942693365e-05,
      "loss": 0.83,
      "step": 581200
    },
    {
      "epoch": 2.036996148278992,
      "grad_norm": 3.046875,
      "learning_rate": 1.783349039826995e-05,
      "loss": 0.781,
      "step": 581210
    },
    {
      "epoch": 2.0370311957858878,
      "grad_norm": 2.6875,
      "learning_rate": 1.7832841369606247e-05,
      "loss": 0.765,
      "step": 581220
    },
    {
      "epoch": 2.0370662432927835,
      "grad_norm": 2.828125,
      "learning_rate": 1.7832192340942545e-05,
      "loss": 0.8942,
      "step": 581230
    },
    {
      "epoch": 2.037101290799679,
      "grad_norm": 2.546875,
      "learning_rate": 1.7831543312278846e-05,
      "loss": 0.7616,
      "step": 581240
    },
    {
      "epoch": 2.0371363383065746,
      "grad_norm": 2.453125,
      "learning_rate": 1.7830894283615144e-05,
      "loss": 0.7698,
      "step": 581250
    },
    {
      "epoch": 2.0371713858134703,
      "grad_norm": 3.015625,
      "learning_rate": 1.7830245254951442e-05,
      "loss": 0.8085,
      "step": 581260
    },
    {
      "epoch": 2.0372064333203657,
      "grad_norm": 3.171875,
      "learning_rate": 1.782959622628774e-05,
      "loss": 0.7785,
      "step": 581270
    },
    {
      "epoch": 2.0372414808272614,
      "grad_norm": 2.5,
      "learning_rate": 1.782894719762404e-05,
      "loss": 0.8304,
      "step": 581280
    },
    {
      "epoch": 2.0372765283341567,
      "grad_norm": 2.1875,
      "learning_rate": 1.7828298168960333e-05,
      "loss": 0.7989,
      "step": 581290
    },
    {
      "epoch": 2.0373115758410525,
      "grad_norm": 3.125,
      "learning_rate": 1.782764914029663e-05,
      "loss": 0.7892,
      "step": 581300
    },
    {
      "epoch": 2.0373466233479482,
      "grad_norm": 2.75,
      "learning_rate": 1.782700011163293e-05,
      "loss": 0.8014,
      "step": 581310
    },
    {
      "epoch": 2.0373816708548436,
      "grad_norm": 3.046875,
      "learning_rate": 1.7826351082969227e-05,
      "loss": 0.8498,
      "step": 581320
    },
    {
      "epoch": 2.0374167183617393,
      "grad_norm": 2.75,
      "learning_rate": 1.7825702054305525e-05,
      "loss": 0.8914,
      "step": 581330
    },
    {
      "epoch": 2.037451765868635,
      "grad_norm": 2.8125,
      "learning_rate": 1.7825053025641823e-05,
      "loss": 0.9148,
      "step": 581340
    },
    {
      "epoch": 2.0374868133755304,
      "grad_norm": 2.84375,
      "learning_rate": 1.7824403996978124e-05,
      "loss": 0.8803,
      "step": 581350
    },
    {
      "epoch": 2.037521860882426,
      "grad_norm": 3.328125,
      "learning_rate": 1.7823754968314422e-05,
      "loss": 0.8174,
      "step": 581360
    },
    {
      "epoch": 2.037556908389322,
      "grad_norm": 3.015625,
      "learning_rate": 1.782310593965072e-05,
      "loss": 0.7936,
      "step": 581370
    },
    {
      "epoch": 2.037591955896217,
      "grad_norm": 2.953125,
      "learning_rate": 1.782245691098702e-05,
      "loss": 0.8045,
      "step": 581380
    },
    {
      "epoch": 2.037627003403113,
      "grad_norm": 3.03125,
      "learning_rate": 1.7821807882323316e-05,
      "loss": 0.7561,
      "step": 581390
    },
    {
      "epoch": 2.0376620509100087,
      "grad_norm": 3.203125,
      "learning_rate": 1.7821158853659614e-05,
      "loss": 0.8702,
      "step": 581400
    },
    {
      "epoch": 2.037697098416904,
      "grad_norm": 3.234375,
      "learning_rate": 1.7820509824995912e-05,
      "loss": 0.8657,
      "step": 581410
    },
    {
      "epoch": 2.0377321459238,
      "grad_norm": 3.1875,
      "learning_rate": 1.781986079633221e-05,
      "loss": 0.7579,
      "step": 581420
    },
    {
      "epoch": 2.037767193430695,
      "grad_norm": 3.265625,
      "learning_rate": 1.781921176766851e-05,
      "loss": 0.8712,
      "step": 581430
    },
    {
      "epoch": 2.037802240937591,
      "grad_norm": 3.25,
      "learning_rate": 1.7818562739004806e-05,
      "loss": 0.8806,
      "step": 581440
    },
    {
      "epoch": 2.0378372884444866,
      "grad_norm": 2.78125,
      "learning_rate": 1.7817913710341104e-05,
      "loss": 0.8319,
      "step": 581450
    },
    {
      "epoch": 2.037872335951382,
      "grad_norm": 2.84375,
      "learning_rate": 1.7817264681677402e-05,
      "loss": 0.8069,
      "step": 581460
    },
    {
      "epoch": 2.0379073834582777,
      "grad_norm": 2.625,
      "learning_rate": 1.78166156530137e-05,
      "loss": 0.7007,
      "step": 581470
    },
    {
      "epoch": 2.0379424309651735,
      "grad_norm": 2.765625,
      "learning_rate": 1.781596662435e-05,
      "loss": 0.8247,
      "step": 581480
    },
    {
      "epoch": 2.037977478472069,
      "grad_norm": 2.515625,
      "learning_rate": 1.7815317595686296e-05,
      "loss": 0.8301,
      "step": 581490
    },
    {
      "epoch": 2.0380125259789645,
      "grad_norm": 3.453125,
      "learning_rate": 1.7814668567022594e-05,
      "loss": 0.8208,
      "step": 581500
    },
    {
      "epoch": 2.0380475734858603,
      "grad_norm": 2.515625,
      "learning_rate": 1.7814019538358892e-05,
      "loss": 0.7688,
      "step": 581510
    },
    {
      "epoch": 2.0380826209927556,
      "grad_norm": 2.921875,
      "learning_rate": 1.781337050969519e-05,
      "loss": 0.8577,
      "step": 581520
    },
    {
      "epoch": 2.0381176684996514,
      "grad_norm": 2.734375,
      "learning_rate": 1.781272148103149e-05,
      "loss": 0.8095,
      "step": 581530
    },
    {
      "epoch": 2.0381527160065467,
      "grad_norm": 2.671875,
      "learning_rate": 1.7812072452367786e-05,
      "loss": 0.7597,
      "step": 581540
    },
    {
      "epoch": 2.0381877635134424,
      "grad_norm": 3.359375,
      "learning_rate": 1.7811423423704084e-05,
      "loss": 0.9003,
      "step": 581550
    },
    {
      "epoch": 2.038222811020338,
      "grad_norm": 2.953125,
      "learning_rate": 1.7810774395040382e-05,
      "loss": 0.786,
      "step": 581560
    },
    {
      "epoch": 2.0382578585272335,
      "grad_norm": 2.84375,
      "learning_rate": 1.781012536637668e-05,
      "loss": 0.7791,
      "step": 581570
    },
    {
      "epoch": 2.0382929060341293,
      "grad_norm": 2.84375,
      "learning_rate": 1.780947633771298e-05,
      "loss": 0.7613,
      "step": 581580
    },
    {
      "epoch": 2.038327953541025,
      "grad_norm": 2.59375,
      "learning_rate": 1.7808827309049276e-05,
      "loss": 0.8282,
      "step": 581590
    },
    {
      "epoch": 2.0383630010479203,
      "grad_norm": 2.796875,
      "learning_rate": 1.7808178280385578e-05,
      "loss": 0.7831,
      "step": 581600
    },
    {
      "epoch": 2.038398048554816,
      "grad_norm": 2.671875,
      "learning_rate": 1.7807529251721876e-05,
      "loss": 0.7316,
      "step": 581610
    },
    {
      "epoch": 2.038433096061712,
      "grad_norm": 3.109375,
      "learning_rate": 1.7806880223058174e-05,
      "loss": 0.8945,
      "step": 581620
    },
    {
      "epoch": 2.038468143568607,
      "grad_norm": 3.3125,
      "learning_rate": 1.7806231194394472e-05,
      "loss": 0.8962,
      "step": 581630
    },
    {
      "epoch": 2.038503191075503,
      "grad_norm": 3.0,
      "learning_rate": 1.780558216573077e-05,
      "loss": 0.8614,
      "step": 581640
    },
    {
      "epoch": 2.0385382385823982,
      "grad_norm": 2.78125,
      "learning_rate": 1.7804933137067068e-05,
      "loss": 0.7661,
      "step": 581650
    },
    {
      "epoch": 2.038573286089294,
      "grad_norm": 2.765625,
      "learning_rate": 1.7804284108403366e-05,
      "loss": 0.7806,
      "step": 581660
    },
    {
      "epoch": 2.0386083335961898,
      "grad_norm": 2.59375,
      "learning_rate": 1.780363507973966e-05,
      "loss": 0.8653,
      "step": 581670
    },
    {
      "epoch": 2.038643381103085,
      "grad_norm": 3.21875,
      "learning_rate": 1.780298605107596e-05,
      "loss": 0.8388,
      "step": 581680
    },
    {
      "epoch": 2.038678428609981,
      "grad_norm": 3.15625,
      "learning_rate": 1.7802337022412256e-05,
      "loss": 0.9749,
      "step": 581690
    },
    {
      "epoch": 2.0387134761168766,
      "grad_norm": 2.6875,
      "learning_rate": 1.7801687993748554e-05,
      "loss": 0.8693,
      "step": 581700
    },
    {
      "epoch": 2.038748523623772,
      "grad_norm": 2.984375,
      "learning_rate": 1.7801038965084852e-05,
      "loss": 0.7972,
      "step": 581710
    },
    {
      "epoch": 2.0387835711306677,
      "grad_norm": 2.921875,
      "learning_rate": 1.7800389936421154e-05,
      "loss": 0.8073,
      "step": 581720
    },
    {
      "epoch": 2.0388186186375634,
      "grad_norm": 3.1875,
      "learning_rate": 1.7799740907757452e-05,
      "loss": 0.8551,
      "step": 581730
    },
    {
      "epoch": 2.0388536661444587,
      "grad_norm": 2.640625,
      "learning_rate": 1.779909187909375e-05,
      "loss": 0.7109,
      "step": 581740
    },
    {
      "epoch": 2.0388887136513545,
      "grad_norm": 2.484375,
      "learning_rate": 1.7798442850430048e-05,
      "loss": 0.8804,
      "step": 581750
    },
    {
      "epoch": 2.03892376115825,
      "grad_norm": 3.125,
      "learning_rate": 1.7797793821766346e-05,
      "loss": 0.7639,
      "step": 581760
    },
    {
      "epoch": 2.0389588086651456,
      "grad_norm": 2.96875,
      "learning_rate": 1.7797144793102644e-05,
      "loss": 0.7407,
      "step": 581770
    },
    {
      "epoch": 2.0389938561720413,
      "grad_norm": 3.34375,
      "learning_rate": 1.7796495764438942e-05,
      "loss": 0.7831,
      "step": 581780
    },
    {
      "epoch": 2.0390289036789366,
      "grad_norm": 2.40625,
      "learning_rate": 1.779584673577524e-05,
      "loss": 0.8865,
      "step": 581790
    },
    {
      "epoch": 2.0390639511858324,
      "grad_norm": 2.671875,
      "learning_rate": 1.7795197707111538e-05,
      "loss": 0.8283,
      "step": 581800
    },
    {
      "epoch": 2.039098998692728,
      "grad_norm": 2.3125,
      "learning_rate": 1.7794548678447836e-05,
      "loss": 0.8113,
      "step": 581810
    },
    {
      "epoch": 2.0391340461996235,
      "grad_norm": 2.890625,
      "learning_rate": 1.7793899649784134e-05,
      "loss": 0.8816,
      "step": 581820
    },
    {
      "epoch": 2.039169093706519,
      "grad_norm": 3.1875,
      "learning_rate": 1.7793250621120432e-05,
      "loss": 0.8086,
      "step": 581830
    },
    {
      "epoch": 2.039204141213415,
      "grad_norm": 3.03125,
      "learning_rate": 1.779260159245673e-05,
      "loss": 0.7711,
      "step": 581840
    },
    {
      "epoch": 2.0392391887203103,
      "grad_norm": 2.828125,
      "learning_rate": 1.7791952563793028e-05,
      "loss": 0.8744,
      "step": 581850
    },
    {
      "epoch": 2.039274236227206,
      "grad_norm": 2.734375,
      "learning_rate": 1.7791303535129326e-05,
      "loss": 0.8495,
      "step": 581860
    },
    {
      "epoch": 2.0393092837341014,
      "grad_norm": 2.765625,
      "learning_rate": 1.7790654506465624e-05,
      "loss": 0.8089,
      "step": 581870
    },
    {
      "epoch": 2.039344331240997,
      "grad_norm": 2.65625,
      "learning_rate": 1.7790005477801922e-05,
      "loss": 0.8054,
      "step": 581880
    },
    {
      "epoch": 2.039379378747893,
      "grad_norm": 3.359375,
      "learning_rate": 1.778935644913822e-05,
      "loss": 0.8544,
      "step": 581890
    },
    {
      "epoch": 2.039414426254788,
      "grad_norm": 2.765625,
      "learning_rate": 1.7788707420474518e-05,
      "loss": 0.7809,
      "step": 581900
    },
    {
      "epoch": 2.039449473761684,
      "grad_norm": 3.125,
      "learning_rate": 1.7788058391810816e-05,
      "loss": 0.8008,
      "step": 581910
    },
    {
      "epoch": 2.0394845212685797,
      "grad_norm": 3.21875,
      "learning_rate": 1.7787409363147114e-05,
      "loss": 0.7503,
      "step": 581920
    },
    {
      "epoch": 2.039519568775475,
      "grad_norm": 3.125,
      "learning_rate": 1.7786760334483412e-05,
      "loss": 0.777,
      "step": 581930
    },
    {
      "epoch": 2.039554616282371,
      "grad_norm": 3.328125,
      "learning_rate": 1.778611130581971e-05,
      "loss": 0.7799,
      "step": 581940
    },
    {
      "epoch": 2.0395896637892665,
      "grad_norm": 3.0,
      "learning_rate": 1.7785462277156008e-05,
      "loss": 0.9042,
      "step": 581950
    },
    {
      "epoch": 2.039624711296162,
      "grad_norm": 3.390625,
      "learning_rate": 1.7784813248492306e-05,
      "loss": 1.0218,
      "step": 581960
    },
    {
      "epoch": 2.0396597588030576,
      "grad_norm": 3.015625,
      "learning_rate": 1.7784164219828607e-05,
      "loss": 0.7791,
      "step": 581970
    },
    {
      "epoch": 2.039694806309953,
      "grad_norm": 3.234375,
      "learning_rate": 1.7783515191164905e-05,
      "loss": 0.8345,
      "step": 581980
    },
    {
      "epoch": 2.0397298538168487,
      "grad_norm": 2.890625,
      "learning_rate": 1.7782866162501203e-05,
      "loss": 0.9161,
      "step": 581990
    },
    {
      "epoch": 2.0397649013237444,
      "grad_norm": 3.03125,
      "learning_rate": 1.77822171338375e-05,
      "loss": 0.8928,
      "step": 582000
    },
    {
      "epoch": 2.0397999488306398,
      "grad_norm": 2.46875,
      "learning_rate": 1.77815681051738e-05,
      "loss": 0.8334,
      "step": 582010
    },
    {
      "epoch": 2.0398349963375355,
      "grad_norm": 3.296875,
      "learning_rate": 1.7780919076510097e-05,
      "loss": 0.7819,
      "step": 582020
    },
    {
      "epoch": 2.0398700438444313,
      "grad_norm": 2.859375,
      "learning_rate": 1.7780270047846395e-05,
      "loss": 0.7739,
      "step": 582030
    },
    {
      "epoch": 2.0399050913513266,
      "grad_norm": 3.40625,
      "learning_rate": 1.7779621019182693e-05,
      "loss": 0.8213,
      "step": 582040
    },
    {
      "epoch": 2.0399401388582223,
      "grad_norm": 2.953125,
      "learning_rate": 1.7778971990518988e-05,
      "loss": 0.8542,
      "step": 582050
    },
    {
      "epoch": 2.039975186365118,
      "grad_norm": 3.03125,
      "learning_rate": 1.7778322961855286e-05,
      "loss": 0.825,
      "step": 582060
    },
    {
      "epoch": 2.0400102338720134,
      "grad_norm": 2.640625,
      "learning_rate": 1.7777673933191584e-05,
      "loss": 0.8136,
      "step": 582070
    },
    {
      "epoch": 2.040045281378909,
      "grad_norm": 3.0625,
      "learning_rate": 1.7777024904527885e-05,
      "loss": 0.8318,
      "step": 582080
    },
    {
      "epoch": 2.0400803288858045,
      "grad_norm": 2.859375,
      "learning_rate": 1.7776375875864183e-05,
      "loss": 0.8175,
      "step": 582090
    },
    {
      "epoch": 2.0401153763927002,
      "grad_norm": 2.90625,
      "learning_rate": 1.777572684720048e-05,
      "loss": 0.771,
      "step": 582100
    },
    {
      "epoch": 2.040150423899596,
      "grad_norm": 2.953125,
      "learning_rate": 1.777507781853678e-05,
      "loss": 0.7958,
      "step": 582110
    },
    {
      "epoch": 2.0401854714064913,
      "grad_norm": 3.34375,
      "learning_rate": 1.7774428789873077e-05,
      "loss": 0.8464,
      "step": 582120
    },
    {
      "epoch": 2.040220518913387,
      "grad_norm": 2.6875,
      "learning_rate": 1.7773779761209375e-05,
      "loss": 0.8733,
      "step": 582130
    },
    {
      "epoch": 2.040255566420283,
      "grad_norm": 3.171875,
      "learning_rate": 1.7773130732545673e-05,
      "loss": 0.8335,
      "step": 582140
    },
    {
      "epoch": 2.040290613927178,
      "grad_norm": 2.734375,
      "learning_rate": 1.777248170388197e-05,
      "loss": 0.7675,
      "step": 582150
    },
    {
      "epoch": 2.040325661434074,
      "grad_norm": 3.09375,
      "learning_rate": 1.777183267521827e-05,
      "loss": 0.8639,
      "step": 582160
    },
    {
      "epoch": 2.0403607089409697,
      "grad_norm": 2.65625,
      "learning_rate": 1.7771183646554567e-05,
      "loss": 0.8445,
      "step": 582170
    },
    {
      "epoch": 2.040395756447865,
      "grad_norm": 3.015625,
      "learning_rate": 1.7770534617890865e-05,
      "loss": 0.8954,
      "step": 582180
    },
    {
      "epoch": 2.0404308039547607,
      "grad_norm": 2.796875,
      "learning_rate": 1.7769885589227163e-05,
      "loss": 0.8445,
      "step": 582190
    },
    {
      "epoch": 2.0404658514616565,
      "grad_norm": 2.90625,
      "learning_rate": 1.776923656056346e-05,
      "loss": 0.8412,
      "step": 582200
    },
    {
      "epoch": 2.040500898968552,
      "grad_norm": 2.75,
      "learning_rate": 1.776858753189976e-05,
      "loss": 0.7805,
      "step": 582210
    },
    {
      "epoch": 2.0405359464754476,
      "grad_norm": 2.75,
      "learning_rate": 1.776793850323606e-05,
      "loss": 0.8801,
      "step": 582220
    },
    {
      "epoch": 2.040570993982343,
      "grad_norm": 2.953125,
      "learning_rate": 1.7767289474572355e-05,
      "loss": 0.8681,
      "step": 582230
    },
    {
      "epoch": 2.0406060414892386,
      "grad_norm": 3.609375,
      "learning_rate": 1.7766640445908653e-05,
      "loss": 0.7875,
      "step": 582240
    },
    {
      "epoch": 2.0406410889961344,
      "grad_norm": 3.140625,
      "learning_rate": 1.776599141724495e-05,
      "loss": 0.7977,
      "step": 582250
    },
    {
      "epoch": 2.0406761365030297,
      "grad_norm": 2.703125,
      "learning_rate": 1.776534238858125e-05,
      "loss": 0.8993,
      "step": 582260
    },
    {
      "epoch": 2.0407111840099255,
      "grad_norm": 3.421875,
      "learning_rate": 1.7764693359917547e-05,
      "loss": 0.8741,
      "step": 582270
    },
    {
      "epoch": 2.040746231516821,
      "grad_norm": 3.140625,
      "learning_rate": 1.7764044331253845e-05,
      "loss": 0.8524,
      "step": 582280
    },
    {
      "epoch": 2.0407812790237165,
      "grad_norm": 2.734375,
      "learning_rate": 1.7763395302590143e-05,
      "loss": 0.8065,
      "step": 582290
    },
    {
      "epoch": 2.0408163265306123,
      "grad_norm": 3.109375,
      "learning_rate": 1.776274627392644e-05,
      "loss": 0.8548,
      "step": 582300
    },
    {
      "epoch": 2.040851374037508,
      "grad_norm": 2.65625,
      "learning_rate": 1.776209724526274e-05,
      "loss": 0.8191,
      "step": 582310
    },
    {
      "epoch": 2.0408864215444034,
      "grad_norm": 3.171875,
      "learning_rate": 1.7761448216599037e-05,
      "loss": 0.9051,
      "step": 582320
    },
    {
      "epoch": 2.040921469051299,
      "grad_norm": 2.953125,
      "learning_rate": 1.7760799187935335e-05,
      "loss": 0.7761,
      "step": 582330
    },
    {
      "epoch": 2.0409565165581944,
      "grad_norm": 3.203125,
      "learning_rate": 1.7760150159271637e-05,
      "loss": 0.7964,
      "step": 582340
    },
    {
      "epoch": 2.04099156406509,
      "grad_norm": 2.78125,
      "learning_rate": 1.7759501130607935e-05,
      "loss": 0.7871,
      "step": 582350
    },
    {
      "epoch": 2.041026611571986,
      "grad_norm": 2.6875,
      "learning_rate": 1.7758852101944233e-05,
      "loss": 0.7958,
      "step": 582360
    },
    {
      "epoch": 2.0410616590788813,
      "grad_norm": 3.15625,
      "learning_rate": 1.775820307328053e-05,
      "loss": 0.831,
      "step": 582370
    },
    {
      "epoch": 2.041096706585777,
      "grad_norm": 2.25,
      "learning_rate": 1.775755404461683e-05,
      "loss": 0.8396,
      "step": 582380
    },
    {
      "epoch": 2.041131754092673,
      "grad_norm": 3.046875,
      "learning_rate": 1.7756905015953127e-05,
      "loss": 0.8261,
      "step": 582390
    },
    {
      "epoch": 2.041166801599568,
      "grad_norm": 2.765625,
      "learning_rate": 1.7756255987289425e-05,
      "loss": 0.7679,
      "step": 582400
    },
    {
      "epoch": 2.041201849106464,
      "grad_norm": 2.9375,
      "learning_rate": 1.7755606958625723e-05,
      "loss": 0.9253,
      "step": 582410
    },
    {
      "epoch": 2.0412368966133596,
      "grad_norm": 2.40625,
      "learning_rate": 1.7754957929962017e-05,
      "loss": 0.8145,
      "step": 582420
    },
    {
      "epoch": 2.041271944120255,
      "grad_norm": 2.828125,
      "learning_rate": 1.7754308901298315e-05,
      "loss": 0.8306,
      "step": 582430
    },
    {
      "epoch": 2.0413069916271507,
      "grad_norm": 2.796875,
      "learning_rate": 1.7753659872634613e-05,
      "loss": 0.8102,
      "step": 582440
    },
    {
      "epoch": 2.041342039134046,
      "grad_norm": 2.765625,
      "learning_rate": 1.7753010843970915e-05,
      "loss": 0.8409,
      "step": 582450
    },
    {
      "epoch": 2.0413770866409418,
      "grad_norm": 4.375,
      "learning_rate": 1.7752361815307213e-05,
      "loss": 0.843,
      "step": 582460
    },
    {
      "epoch": 2.0414121341478375,
      "grad_norm": 3.296875,
      "learning_rate": 1.775171278664351e-05,
      "loss": 0.8934,
      "step": 582470
    },
    {
      "epoch": 2.041447181654733,
      "grad_norm": 3.0,
      "learning_rate": 1.775106375797981e-05,
      "loss": 0.7245,
      "step": 582480
    },
    {
      "epoch": 2.0414822291616286,
      "grad_norm": 3.296875,
      "learning_rate": 1.7750414729316107e-05,
      "loss": 0.8639,
      "step": 582490
    },
    {
      "epoch": 2.0415172766685243,
      "grad_norm": 2.65625,
      "learning_rate": 1.7749765700652405e-05,
      "loss": 0.8179,
      "step": 582500
    },
    {
      "epoch": 2.0415523241754197,
      "grad_norm": 3.21875,
      "learning_rate": 1.7749116671988703e-05,
      "loss": 0.8125,
      "step": 582510
    },
    {
      "epoch": 2.0415873716823154,
      "grad_norm": 3.140625,
      "learning_rate": 1.7748467643325e-05,
      "loss": 0.8823,
      "step": 582520
    },
    {
      "epoch": 2.041622419189211,
      "grad_norm": 2.53125,
      "learning_rate": 1.77478186146613e-05,
      "loss": 0.8442,
      "step": 582530
    },
    {
      "epoch": 2.0416574666961065,
      "grad_norm": 2.984375,
      "learning_rate": 1.7747169585997597e-05,
      "loss": 0.8365,
      "step": 582540
    },
    {
      "epoch": 2.0416925142030022,
      "grad_norm": 2.75,
      "learning_rate": 1.7746520557333895e-05,
      "loss": 0.913,
      "step": 582550
    },
    {
      "epoch": 2.0417275617098976,
      "grad_norm": 3.171875,
      "learning_rate": 1.7745871528670193e-05,
      "loss": 0.7988,
      "step": 582560
    },
    {
      "epoch": 2.0417626092167933,
      "grad_norm": 2.734375,
      "learning_rate": 1.774522250000649e-05,
      "loss": 0.7509,
      "step": 582570
    },
    {
      "epoch": 2.041797656723689,
      "grad_norm": 2.578125,
      "learning_rate": 1.774457347134279e-05,
      "loss": 0.7829,
      "step": 582580
    },
    {
      "epoch": 2.0418327042305844,
      "grad_norm": 2.84375,
      "learning_rate": 1.774392444267909e-05,
      "loss": 0.906,
      "step": 582590
    },
    {
      "epoch": 2.04186775173748,
      "grad_norm": 3.171875,
      "learning_rate": 1.7743275414015388e-05,
      "loss": 0.8912,
      "step": 582600
    },
    {
      "epoch": 2.041902799244376,
      "grad_norm": 2.984375,
      "learning_rate": 1.7742626385351683e-05,
      "loss": 0.7921,
      "step": 582610
    },
    {
      "epoch": 2.041937846751271,
      "grad_norm": 3.0625,
      "learning_rate": 1.774197735668798e-05,
      "loss": 0.7098,
      "step": 582620
    },
    {
      "epoch": 2.041972894258167,
      "grad_norm": 2.984375,
      "learning_rate": 1.774132832802428e-05,
      "loss": 0.8535,
      "step": 582630
    },
    {
      "epoch": 2.0420079417650627,
      "grad_norm": 3.28125,
      "learning_rate": 1.7740679299360577e-05,
      "loss": 0.883,
      "step": 582640
    },
    {
      "epoch": 2.042042989271958,
      "grad_norm": 2.96875,
      "learning_rate": 1.7740030270696875e-05,
      "loss": 0.845,
      "step": 582650
    },
    {
      "epoch": 2.042078036778854,
      "grad_norm": 2.796875,
      "learning_rate": 1.7739381242033173e-05,
      "loss": 0.7934,
      "step": 582660
    },
    {
      "epoch": 2.0421130842857496,
      "grad_norm": 2.8125,
      "learning_rate": 1.773873221336947e-05,
      "loss": 0.7941,
      "step": 582670
    },
    {
      "epoch": 2.042148131792645,
      "grad_norm": 2.875,
      "learning_rate": 1.773808318470577e-05,
      "loss": 0.8908,
      "step": 582680
    },
    {
      "epoch": 2.0421831792995406,
      "grad_norm": 2.921875,
      "learning_rate": 1.7737434156042067e-05,
      "loss": 0.7743,
      "step": 582690
    },
    {
      "epoch": 2.042218226806436,
      "grad_norm": 2.34375,
      "learning_rate": 1.7736785127378368e-05,
      "loss": 0.8088,
      "step": 582700
    },
    {
      "epoch": 2.0422532743133317,
      "grad_norm": 3.34375,
      "learning_rate": 1.7736136098714666e-05,
      "loss": 0.908,
      "step": 582710
    },
    {
      "epoch": 2.0422883218202275,
      "grad_norm": 2.515625,
      "learning_rate": 1.7735487070050964e-05,
      "loss": 0.8532,
      "step": 582720
    },
    {
      "epoch": 2.0423233693271228,
      "grad_norm": 3.015625,
      "learning_rate": 1.7734838041387262e-05,
      "loss": 0.8805,
      "step": 582730
    },
    {
      "epoch": 2.0423584168340185,
      "grad_norm": 3.375,
      "learning_rate": 1.773418901272356e-05,
      "loss": 0.8356,
      "step": 582740
    },
    {
      "epoch": 2.0423934643409143,
      "grad_norm": 2.6875,
      "learning_rate": 1.7733539984059858e-05,
      "loss": 0.8342,
      "step": 582750
    },
    {
      "epoch": 2.0424285118478096,
      "grad_norm": 2.921875,
      "learning_rate": 1.7732890955396156e-05,
      "loss": 0.767,
      "step": 582760
    },
    {
      "epoch": 2.0424635593547054,
      "grad_norm": 2.53125,
      "learning_rate": 1.7732241926732454e-05,
      "loss": 0.7937,
      "step": 582770
    },
    {
      "epoch": 2.042498606861601,
      "grad_norm": 2.984375,
      "learning_rate": 1.7731592898068752e-05,
      "loss": 0.8143,
      "step": 582780
    },
    {
      "epoch": 2.0425336543684964,
      "grad_norm": 3.46875,
      "learning_rate": 1.773094386940505e-05,
      "loss": 0.8714,
      "step": 582790
    },
    {
      "epoch": 2.042568701875392,
      "grad_norm": 2.859375,
      "learning_rate": 1.7730294840741345e-05,
      "loss": 0.7989,
      "step": 582800
    },
    {
      "epoch": 2.0426037493822875,
      "grad_norm": 2.84375,
      "learning_rate": 1.7729645812077643e-05,
      "loss": 0.7555,
      "step": 582810
    },
    {
      "epoch": 2.0426387968891833,
      "grad_norm": 2.375,
      "learning_rate": 1.7728996783413944e-05,
      "loss": 0.7875,
      "step": 582820
    },
    {
      "epoch": 2.042673844396079,
      "grad_norm": 2.90625,
      "learning_rate": 1.7728347754750242e-05,
      "loss": 0.8042,
      "step": 582830
    },
    {
      "epoch": 2.0427088919029743,
      "grad_norm": 2.609375,
      "learning_rate": 1.772769872608654e-05,
      "loss": 0.8543,
      "step": 582840
    },
    {
      "epoch": 2.04274393940987,
      "grad_norm": 2.390625,
      "learning_rate": 1.7727049697422838e-05,
      "loss": 0.7681,
      "step": 582850
    },
    {
      "epoch": 2.042778986916766,
      "grad_norm": 2.859375,
      "learning_rate": 1.7726400668759136e-05,
      "loss": 0.7537,
      "step": 582860
    },
    {
      "epoch": 2.042814034423661,
      "grad_norm": 2.75,
      "learning_rate": 1.7725751640095434e-05,
      "loss": 0.763,
      "step": 582870
    },
    {
      "epoch": 2.042849081930557,
      "grad_norm": 2.921875,
      "learning_rate": 1.7725102611431732e-05,
      "loss": 0.8218,
      "step": 582880
    },
    {
      "epoch": 2.0428841294374527,
      "grad_norm": 2.890625,
      "learning_rate": 1.772445358276803e-05,
      "loss": 0.8022,
      "step": 582890
    },
    {
      "epoch": 2.042919176944348,
      "grad_norm": 2.671875,
      "learning_rate": 1.7723804554104328e-05,
      "loss": 0.8421,
      "step": 582900
    },
    {
      "epoch": 2.0429542244512437,
      "grad_norm": 2.953125,
      "learning_rate": 1.7723155525440626e-05,
      "loss": 0.8816,
      "step": 582910
    },
    {
      "epoch": 2.042989271958139,
      "grad_norm": 3.03125,
      "learning_rate": 1.7722506496776924e-05,
      "loss": 0.7879,
      "step": 582920
    },
    {
      "epoch": 2.043024319465035,
      "grad_norm": 2.4375,
      "learning_rate": 1.7721857468113222e-05,
      "loss": 0.8393,
      "step": 582930
    },
    {
      "epoch": 2.0430593669719306,
      "grad_norm": 2.828125,
      "learning_rate": 1.772120843944952e-05,
      "loss": 0.7527,
      "step": 582940
    },
    {
      "epoch": 2.043094414478826,
      "grad_norm": 3.140625,
      "learning_rate": 1.7720559410785818e-05,
      "loss": 0.8705,
      "step": 582950
    },
    {
      "epoch": 2.0431294619857217,
      "grad_norm": 2.9375,
      "learning_rate": 1.771991038212212e-05,
      "loss": 0.8218,
      "step": 582960
    },
    {
      "epoch": 2.0431645094926174,
      "grad_norm": 3.015625,
      "learning_rate": 1.7719261353458417e-05,
      "loss": 0.8319,
      "step": 582970
    },
    {
      "epoch": 2.0431995569995127,
      "grad_norm": 3.125,
      "learning_rate": 1.7718612324794715e-05,
      "loss": 0.8127,
      "step": 582980
    },
    {
      "epoch": 2.0432346045064085,
      "grad_norm": 2.84375,
      "learning_rate": 1.771796329613101e-05,
      "loss": 0.7885,
      "step": 582990
    },
    {
      "epoch": 2.0432696520133042,
      "grad_norm": 2.5625,
      "learning_rate": 1.7717314267467308e-05,
      "loss": 0.8566,
      "step": 583000
    },
    {
      "epoch": 2.0433046995201996,
      "grad_norm": 2.671875,
      "learning_rate": 1.7716665238803606e-05,
      "loss": 0.8737,
      "step": 583010
    },
    {
      "epoch": 2.0433397470270953,
      "grad_norm": 3.390625,
      "learning_rate": 1.7716016210139904e-05,
      "loss": 0.8228,
      "step": 583020
    },
    {
      "epoch": 2.0433747945339906,
      "grad_norm": 2.9375,
      "learning_rate": 1.7715367181476202e-05,
      "loss": 0.8447,
      "step": 583030
    },
    {
      "epoch": 2.0434098420408864,
      "grad_norm": 2.9375,
      "learning_rate": 1.77147181528125e-05,
      "loss": 0.8546,
      "step": 583040
    },
    {
      "epoch": 2.043444889547782,
      "grad_norm": 3.125,
      "learning_rate": 1.7714069124148798e-05,
      "loss": 0.7737,
      "step": 583050
    },
    {
      "epoch": 2.0434799370546775,
      "grad_norm": 3.3125,
      "learning_rate": 1.7713420095485096e-05,
      "loss": 0.8708,
      "step": 583060
    },
    {
      "epoch": 2.043514984561573,
      "grad_norm": 3.359375,
      "learning_rate": 1.7712771066821397e-05,
      "loss": 0.8401,
      "step": 583070
    },
    {
      "epoch": 2.043550032068469,
      "grad_norm": 3.03125,
      "learning_rate": 1.7712122038157695e-05,
      "loss": 0.8773,
      "step": 583080
    },
    {
      "epoch": 2.0435850795753643,
      "grad_norm": 2.5,
      "learning_rate": 1.7711473009493993e-05,
      "loss": 0.7348,
      "step": 583090
    },
    {
      "epoch": 2.04362012708226,
      "grad_norm": 3.109375,
      "learning_rate": 1.771082398083029e-05,
      "loss": 0.8783,
      "step": 583100
    },
    {
      "epoch": 2.043655174589156,
      "grad_norm": 2.75,
      "learning_rate": 1.771017495216659e-05,
      "loss": 0.8514,
      "step": 583110
    },
    {
      "epoch": 2.043690222096051,
      "grad_norm": 2.984375,
      "learning_rate": 1.7709525923502887e-05,
      "loss": 0.7162,
      "step": 583120
    },
    {
      "epoch": 2.043725269602947,
      "grad_norm": 2.75,
      "learning_rate": 1.7708876894839185e-05,
      "loss": 0.8442,
      "step": 583130
    },
    {
      "epoch": 2.043760317109842,
      "grad_norm": 3.5625,
      "learning_rate": 1.7708227866175483e-05,
      "loss": 0.831,
      "step": 583140
    },
    {
      "epoch": 2.043795364616738,
      "grad_norm": 2.921875,
      "learning_rate": 1.770757883751178e-05,
      "loss": 0.7297,
      "step": 583150
    },
    {
      "epoch": 2.0438304121236337,
      "grad_norm": 2.828125,
      "learning_rate": 1.770692980884808e-05,
      "loss": 0.8045,
      "step": 583160
    },
    {
      "epoch": 2.043865459630529,
      "grad_norm": 2.796875,
      "learning_rate": 1.7706280780184374e-05,
      "loss": 0.9283,
      "step": 583170
    },
    {
      "epoch": 2.0439005071374248,
      "grad_norm": 2.765625,
      "learning_rate": 1.7705631751520675e-05,
      "loss": 0.8503,
      "step": 583180
    },
    {
      "epoch": 2.0439355546443205,
      "grad_norm": 3.125,
      "learning_rate": 1.7704982722856973e-05,
      "loss": 0.7573,
      "step": 583190
    },
    {
      "epoch": 2.043970602151216,
      "grad_norm": 3.21875,
      "learning_rate": 1.770433369419327e-05,
      "loss": 0.8096,
      "step": 583200
    },
    {
      "epoch": 2.0440056496581116,
      "grad_norm": 3.140625,
      "learning_rate": 1.770368466552957e-05,
      "loss": 0.8153,
      "step": 583210
    },
    {
      "epoch": 2.0440406971650074,
      "grad_norm": 3.265625,
      "learning_rate": 1.7703035636865867e-05,
      "loss": 0.8467,
      "step": 583220
    },
    {
      "epoch": 2.0440757446719027,
      "grad_norm": 2.21875,
      "learning_rate": 1.7702386608202165e-05,
      "loss": 0.8093,
      "step": 583230
    },
    {
      "epoch": 2.0441107921787984,
      "grad_norm": 2.71875,
      "learning_rate": 1.7701737579538463e-05,
      "loss": 0.8841,
      "step": 583240
    },
    {
      "epoch": 2.0441458396856937,
      "grad_norm": 3.0,
      "learning_rate": 1.770108855087476e-05,
      "loss": 0.828,
      "step": 583250
    },
    {
      "epoch": 2.0441808871925895,
      "grad_norm": 2.828125,
      "learning_rate": 1.770043952221106e-05,
      "loss": 0.8048,
      "step": 583260
    },
    {
      "epoch": 2.0442159346994853,
      "grad_norm": 3.171875,
      "learning_rate": 1.7699790493547357e-05,
      "loss": 0.8591,
      "step": 583270
    },
    {
      "epoch": 2.0442509822063806,
      "grad_norm": 2.5625,
      "learning_rate": 1.7699141464883655e-05,
      "loss": 0.7808,
      "step": 583280
    },
    {
      "epoch": 2.0442860297132763,
      "grad_norm": 3.203125,
      "learning_rate": 1.7698492436219953e-05,
      "loss": 0.7429,
      "step": 583290
    },
    {
      "epoch": 2.044321077220172,
      "grad_norm": 2.859375,
      "learning_rate": 1.769784340755625e-05,
      "loss": 0.7657,
      "step": 583300
    },
    {
      "epoch": 2.0443561247270674,
      "grad_norm": 2.8125,
      "learning_rate": 1.769719437889255e-05,
      "loss": 0.7798,
      "step": 583310
    },
    {
      "epoch": 2.044391172233963,
      "grad_norm": 3.203125,
      "learning_rate": 1.769654535022885e-05,
      "loss": 0.7843,
      "step": 583320
    },
    {
      "epoch": 2.044426219740859,
      "grad_norm": 3.125,
      "learning_rate": 1.769589632156515e-05,
      "loss": 0.8683,
      "step": 583330
    },
    {
      "epoch": 2.0444612672477542,
      "grad_norm": 3.203125,
      "learning_rate": 1.7695247292901447e-05,
      "loss": 0.9135,
      "step": 583340
    },
    {
      "epoch": 2.04449631475465,
      "grad_norm": 3.109375,
      "learning_rate": 1.7694598264237745e-05,
      "loss": 0.845,
      "step": 583350
    },
    {
      "epoch": 2.0445313622615453,
      "grad_norm": 3.359375,
      "learning_rate": 1.769394923557404e-05,
      "loss": 0.8994,
      "step": 583360
    },
    {
      "epoch": 2.044566409768441,
      "grad_norm": 3.140625,
      "learning_rate": 1.7693300206910337e-05,
      "loss": 0.8291,
      "step": 583370
    },
    {
      "epoch": 2.044601457275337,
      "grad_norm": 2.453125,
      "learning_rate": 1.7692651178246635e-05,
      "loss": 0.7065,
      "step": 583380
    },
    {
      "epoch": 2.044636504782232,
      "grad_norm": 2.640625,
      "learning_rate": 1.7692002149582933e-05,
      "loss": 0.832,
      "step": 583390
    },
    {
      "epoch": 2.044671552289128,
      "grad_norm": 2.734375,
      "learning_rate": 1.769135312091923e-05,
      "loss": 0.8337,
      "step": 583400
    },
    {
      "epoch": 2.0447065997960236,
      "grad_norm": 2.828125,
      "learning_rate": 1.769070409225553e-05,
      "loss": 0.8245,
      "step": 583410
    },
    {
      "epoch": 2.044741647302919,
      "grad_norm": 2.5,
      "learning_rate": 1.7690055063591827e-05,
      "loss": 0.8665,
      "step": 583420
    },
    {
      "epoch": 2.0447766948098147,
      "grad_norm": 2.859375,
      "learning_rate": 1.7689406034928125e-05,
      "loss": 0.8119,
      "step": 583430
    },
    {
      "epoch": 2.0448117423167105,
      "grad_norm": 2.671875,
      "learning_rate": 1.7688757006264427e-05,
      "loss": 0.7471,
      "step": 583440
    },
    {
      "epoch": 2.044846789823606,
      "grad_norm": 3.03125,
      "learning_rate": 1.7688107977600725e-05,
      "loss": 0.8797,
      "step": 583450
    },
    {
      "epoch": 2.0448818373305016,
      "grad_norm": 2.953125,
      "learning_rate": 1.7687458948937023e-05,
      "loss": 0.7378,
      "step": 583460
    },
    {
      "epoch": 2.0449168848373973,
      "grad_norm": 2.484375,
      "learning_rate": 1.768680992027332e-05,
      "loss": 0.7585,
      "step": 583470
    },
    {
      "epoch": 2.0449519323442926,
      "grad_norm": 2.90625,
      "learning_rate": 1.768616089160962e-05,
      "loss": 0.89,
      "step": 583480
    },
    {
      "epoch": 2.0449869798511884,
      "grad_norm": 3.171875,
      "learning_rate": 1.7685511862945917e-05,
      "loss": 0.7339,
      "step": 583490
    },
    {
      "epoch": 2.0450220273580837,
      "grad_norm": 2.96875,
      "learning_rate": 1.7684862834282215e-05,
      "loss": 0.7867,
      "step": 583500
    },
    {
      "epoch": 2.0450570748649795,
      "grad_norm": 3.140625,
      "learning_rate": 1.7684213805618513e-05,
      "loss": 0.768,
      "step": 583510
    },
    {
      "epoch": 2.045092122371875,
      "grad_norm": 2.96875,
      "learning_rate": 1.768356477695481e-05,
      "loss": 0.8704,
      "step": 583520
    },
    {
      "epoch": 2.0451271698787705,
      "grad_norm": 2.765625,
      "learning_rate": 1.768291574829111e-05,
      "loss": 0.8684,
      "step": 583530
    },
    {
      "epoch": 2.0451622173856663,
      "grad_norm": 2.578125,
      "learning_rate": 1.7682266719627407e-05,
      "loss": 0.7966,
      "step": 583540
    },
    {
      "epoch": 2.045197264892562,
      "grad_norm": 3.234375,
      "learning_rate": 1.7681617690963705e-05,
      "loss": 0.7598,
      "step": 583550
    },
    {
      "epoch": 2.0452323123994574,
      "grad_norm": 2.859375,
      "learning_rate": 1.7680968662300003e-05,
      "loss": 0.8045,
      "step": 583560
    },
    {
      "epoch": 2.045267359906353,
      "grad_norm": 2.75,
      "learning_rate": 1.76803196336363e-05,
      "loss": 0.8172,
      "step": 583570
    },
    {
      "epoch": 2.045302407413249,
      "grad_norm": 2.953125,
      "learning_rate": 1.76796706049726e-05,
      "loss": 0.7881,
      "step": 583580
    },
    {
      "epoch": 2.045337454920144,
      "grad_norm": 2.59375,
      "learning_rate": 1.7679021576308897e-05,
      "loss": 0.8706,
      "step": 583590
    },
    {
      "epoch": 2.04537250242704,
      "grad_norm": 3.21875,
      "learning_rate": 1.7678372547645195e-05,
      "loss": 0.8335,
      "step": 583600
    },
    {
      "epoch": 2.0454075499339353,
      "grad_norm": 2.671875,
      "learning_rate": 1.7677723518981493e-05,
      "loss": 0.8137,
      "step": 583610
    },
    {
      "epoch": 2.045442597440831,
      "grad_norm": 3.4375,
      "learning_rate": 1.767707449031779e-05,
      "loss": 0.8273,
      "step": 583620
    },
    {
      "epoch": 2.0454776449477268,
      "grad_norm": 3.109375,
      "learning_rate": 1.767642546165409e-05,
      "loss": 0.8348,
      "step": 583630
    },
    {
      "epoch": 2.045512692454622,
      "grad_norm": 2.71875,
      "learning_rate": 1.7675776432990387e-05,
      "loss": 0.8508,
      "step": 583640
    },
    {
      "epoch": 2.045547739961518,
      "grad_norm": 3.625,
      "learning_rate": 1.7675127404326685e-05,
      "loss": 0.8017,
      "step": 583650
    },
    {
      "epoch": 2.0455827874684136,
      "grad_norm": 3.375,
      "learning_rate": 1.7674478375662983e-05,
      "loss": 0.8874,
      "step": 583660
    },
    {
      "epoch": 2.045617834975309,
      "grad_norm": 3.078125,
      "learning_rate": 1.767382934699928e-05,
      "loss": 0.8957,
      "step": 583670
    },
    {
      "epoch": 2.0456528824822047,
      "grad_norm": 3.390625,
      "learning_rate": 1.767318031833558e-05,
      "loss": 0.8941,
      "step": 583680
    },
    {
      "epoch": 2.0456879299891004,
      "grad_norm": 3.484375,
      "learning_rate": 1.767253128967188e-05,
      "loss": 0.8246,
      "step": 583690
    },
    {
      "epoch": 2.0457229774959957,
      "grad_norm": 3.03125,
      "learning_rate": 1.7671882261008178e-05,
      "loss": 0.7646,
      "step": 583700
    },
    {
      "epoch": 2.0457580250028915,
      "grad_norm": 3.015625,
      "learning_rate": 1.7671233232344476e-05,
      "loss": 0.8015,
      "step": 583710
    },
    {
      "epoch": 2.045793072509787,
      "grad_norm": 3.046875,
      "learning_rate": 1.7670584203680774e-05,
      "loss": 0.7806,
      "step": 583720
    },
    {
      "epoch": 2.0458281200166826,
      "grad_norm": 2.9375,
      "learning_rate": 1.7669935175017072e-05,
      "loss": 0.8164,
      "step": 583730
    },
    {
      "epoch": 2.0458631675235783,
      "grad_norm": 2.875,
      "learning_rate": 1.7669286146353367e-05,
      "loss": 0.8361,
      "step": 583740
    },
    {
      "epoch": 2.0458982150304736,
      "grad_norm": 2.96875,
      "learning_rate": 1.7668637117689665e-05,
      "loss": 0.8517,
      "step": 583750
    },
    {
      "epoch": 2.0459332625373694,
      "grad_norm": 2.75,
      "learning_rate": 1.7667988089025963e-05,
      "loss": 0.7994,
      "step": 583760
    },
    {
      "epoch": 2.045968310044265,
      "grad_norm": 2.6875,
      "learning_rate": 1.766733906036226e-05,
      "loss": 0.8798,
      "step": 583770
    },
    {
      "epoch": 2.0460033575511605,
      "grad_norm": 2.71875,
      "learning_rate": 1.766669003169856e-05,
      "loss": 0.7623,
      "step": 583780
    },
    {
      "epoch": 2.0460384050580562,
      "grad_norm": 2.578125,
      "learning_rate": 1.7666041003034857e-05,
      "loss": 0.8236,
      "step": 583790
    },
    {
      "epoch": 2.046073452564952,
      "grad_norm": 2.90625,
      "learning_rate": 1.7665391974371158e-05,
      "loss": 0.7317,
      "step": 583800
    },
    {
      "epoch": 2.0461085000718473,
      "grad_norm": 3.078125,
      "learning_rate": 1.7664742945707456e-05,
      "loss": 0.8801,
      "step": 583810
    },
    {
      "epoch": 2.046143547578743,
      "grad_norm": 2.8125,
      "learning_rate": 1.7664093917043754e-05,
      "loss": 0.764,
      "step": 583820
    },
    {
      "epoch": 2.0461785950856384,
      "grad_norm": 2.765625,
      "learning_rate": 1.7663444888380052e-05,
      "loss": 0.8007,
      "step": 583830
    },
    {
      "epoch": 2.046213642592534,
      "grad_norm": 2.859375,
      "learning_rate": 1.766279585971635e-05,
      "loss": 0.84,
      "step": 583840
    },
    {
      "epoch": 2.04624869009943,
      "grad_norm": 2.484375,
      "learning_rate": 1.7662146831052648e-05,
      "loss": 0.7928,
      "step": 583850
    },
    {
      "epoch": 2.046283737606325,
      "grad_norm": 3.0,
      "learning_rate": 1.7661497802388946e-05,
      "loss": 0.8234,
      "step": 583860
    },
    {
      "epoch": 2.046318785113221,
      "grad_norm": 2.75,
      "learning_rate": 1.7660848773725244e-05,
      "loss": 0.7296,
      "step": 583870
    },
    {
      "epoch": 2.0463538326201167,
      "grad_norm": 3.203125,
      "learning_rate": 1.7660199745061542e-05,
      "loss": 0.8146,
      "step": 583880
    },
    {
      "epoch": 2.046388880127012,
      "grad_norm": 2.9375,
      "learning_rate": 1.765955071639784e-05,
      "loss": 0.8034,
      "step": 583890
    },
    {
      "epoch": 2.046423927633908,
      "grad_norm": 3.0,
      "learning_rate": 1.7658901687734138e-05,
      "loss": 0.8619,
      "step": 583900
    },
    {
      "epoch": 2.0464589751408035,
      "grad_norm": 2.9375,
      "learning_rate": 1.7658252659070436e-05,
      "loss": 0.9202,
      "step": 583910
    },
    {
      "epoch": 2.046494022647699,
      "grad_norm": 2.71875,
      "learning_rate": 1.7657603630406734e-05,
      "loss": 0.7729,
      "step": 583920
    },
    {
      "epoch": 2.0465290701545946,
      "grad_norm": 2.75,
      "learning_rate": 1.7656954601743032e-05,
      "loss": 0.9527,
      "step": 583930
    },
    {
      "epoch": 2.04656411766149,
      "grad_norm": 2.765625,
      "learning_rate": 1.765630557307933e-05,
      "loss": 0.7793,
      "step": 583940
    },
    {
      "epoch": 2.0465991651683857,
      "grad_norm": 3.375,
      "learning_rate": 1.7655656544415628e-05,
      "loss": 0.8687,
      "step": 583950
    },
    {
      "epoch": 2.0466342126752815,
      "grad_norm": 2.75,
      "learning_rate": 1.7655007515751926e-05,
      "loss": 0.8389,
      "step": 583960
    },
    {
      "epoch": 2.0466692601821768,
      "grad_norm": 2.984375,
      "learning_rate": 1.7654358487088224e-05,
      "loss": 0.852,
      "step": 583970
    },
    {
      "epoch": 2.0467043076890725,
      "grad_norm": 2.90625,
      "learning_rate": 1.7653709458424522e-05,
      "loss": 0.904,
      "step": 583980
    },
    {
      "epoch": 2.0467393551959683,
      "grad_norm": 2.328125,
      "learning_rate": 1.765306042976082e-05,
      "loss": 0.855,
      "step": 583990
    },
    {
      "epoch": 2.0467744027028636,
      "grad_norm": 2.984375,
      "learning_rate": 1.7652411401097118e-05,
      "loss": 0.9001,
      "step": 584000
    },
    {
      "epoch": 2.0468094502097594,
      "grad_norm": 2.984375,
      "learning_rate": 1.7651762372433416e-05,
      "loss": 0.8593,
      "step": 584010
    },
    {
      "epoch": 2.046844497716655,
      "grad_norm": 2.84375,
      "learning_rate": 1.7651113343769714e-05,
      "loss": 0.7679,
      "step": 584020
    },
    {
      "epoch": 2.0468795452235504,
      "grad_norm": 2.671875,
      "learning_rate": 1.7650464315106012e-05,
      "loss": 0.8675,
      "step": 584030
    },
    {
      "epoch": 2.046914592730446,
      "grad_norm": 2.734375,
      "learning_rate": 1.764981528644231e-05,
      "loss": 0.717,
      "step": 584040
    },
    {
      "epoch": 2.046949640237342,
      "grad_norm": 2.78125,
      "learning_rate": 1.764916625777861e-05,
      "loss": 0.743,
      "step": 584050
    },
    {
      "epoch": 2.0469846877442373,
      "grad_norm": 1.9453125,
      "learning_rate": 1.764851722911491e-05,
      "loss": 0.8057,
      "step": 584060
    },
    {
      "epoch": 2.047019735251133,
      "grad_norm": 2.671875,
      "learning_rate": 1.7647868200451208e-05,
      "loss": 0.8117,
      "step": 584070
    },
    {
      "epoch": 2.0470547827580283,
      "grad_norm": 2.65625,
      "learning_rate": 1.7647219171787506e-05,
      "loss": 0.7919,
      "step": 584080
    },
    {
      "epoch": 2.047089830264924,
      "grad_norm": 3.078125,
      "learning_rate": 1.7646570143123804e-05,
      "loss": 0.8375,
      "step": 584090
    },
    {
      "epoch": 2.04712487777182,
      "grad_norm": 2.84375,
      "learning_rate": 1.76459211144601e-05,
      "loss": 0.8654,
      "step": 584100
    },
    {
      "epoch": 2.047159925278715,
      "grad_norm": 3.359375,
      "learning_rate": 1.76452720857964e-05,
      "loss": 0.8487,
      "step": 584110
    },
    {
      "epoch": 2.047194972785611,
      "grad_norm": 2.8125,
      "learning_rate": 1.7644623057132694e-05,
      "loss": 0.8587,
      "step": 584120
    },
    {
      "epoch": 2.0472300202925067,
      "grad_norm": 3.296875,
      "learning_rate": 1.7643974028468992e-05,
      "loss": 0.8534,
      "step": 584130
    },
    {
      "epoch": 2.047265067799402,
      "grad_norm": 3.015625,
      "learning_rate": 1.764332499980529e-05,
      "loss": 0.8147,
      "step": 584140
    },
    {
      "epoch": 2.0473001153062977,
      "grad_norm": 3.265625,
      "learning_rate": 1.7642675971141588e-05,
      "loss": 0.7974,
      "step": 584150
    },
    {
      "epoch": 2.0473351628131935,
      "grad_norm": 2.59375,
      "learning_rate": 1.7642026942477886e-05,
      "loss": 0.8672,
      "step": 584160
    },
    {
      "epoch": 2.047370210320089,
      "grad_norm": 2.59375,
      "learning_rate": 1.7641377913814188e-05,
      "loss": 0.8025,
      "step": 584170
    },
    {
      "epoch": 2.0474052578269846,
      "grad_norm": 2.796875,
      "learning_rate": 1.7640728885150486e-05,
      "loss": 0.8765,
      "step": 584180
    },
    {
      "epoch": 2.04744030533388,
      "grad_norm": 3.265625,
      "learning_rate": 1.7640079856486784e-05,
      "loss": 0.8654,
      "step": 584190
    },
    {
      "epoch": 2.0474753528407756,
      "grad_norm": 3.421875,
      "learning_rate": 1.763943082782308e-05,
      "loss": 0.7587,
      "step": 584200
    },
    {
      "epoch": 2.0475104003476714,
      "grad_norm": 2.640625,
      "learning_rate": 1.763878179915938e-05,
      "loss": 0.8129,
      "step": 584210
    },
    {
      "epoch": 2.0475454478545667,
      "grad_norm": 3.625,
      "learning_rate": 1.7638132770495678e-05,
      "loss": 0.8681,
      "step": 584220
    },
    {
      "epoch": 2.0475804953614625,
      "grad_norm": 2.890625,
      "learning_rate": 1.7637483741831976e-05,
      "loss": 0.7977,
      "step": 584230
    },
    {
      "epoch": 2.0476155428683582,
      "grad_norm": 3.390625,
      "learning_rate": 1.7636834713168274e-05,
      "loss": 0.857,
      "step": 584240
    },
    {
      "epoch": 2.0476505903752535,
      "grad_norm": 3.328125,
      "learning_rate": 1.763618568450457e-05,
      "loss": 0.7659,
      "step": 584250
    },
    {
      "epoch": 2.0476856378821493,
      "grad_norm": 2.734375,
      "learning_rate": 1.763553665584087e-05,
      "loss": 0.8004,
      "step": 584260
    },
    {
      "epoch": 2.047720685389045,
      "grad_norm": 3.0625,
      "learning_rate": 1.7634887627177168e-05,
      "loss": 0.8958,
      "step": 584270
    },
    {
      "epoch": 2.0477557328959404,
      "grad_norm": 2.78125,
      "learning_rate": 1.7634238598513466e-05,
      "loss": 0.8229,
      "step": 584280
    },
    {
      "epoch": 2.047790780402836,
      "grad_norm": 2.59375,
      "learning_rate": 1.7633589569849764e-05,
      "loss": 0.8115,
      "step": 584290
    },
    {
      "epoch": 2.0478258279097314,
      "grad_norm": 2.921875,
      "learning_rate": 1.763294054118606e-05,
      "loss": 0.7681,
      "step": 584300
    },
    {
      "epoch": 2.047860875416627,
      "grad_norm": 2.890625,
      "learning_rate": 1.763229151252236e-05,
      "loss": 0.7818,
      "step": 584310
    },
    {
      "epoch": 2.047895922923523,
      "grad_norm": 3.140625,
      "learning_rate": 1.7631642483858658e-05,
      "loss": 0.8236,
      "step": 584320
    },
    {
      "epoch": 2.0479309704304183,
      "grad_norm": 2.5625,
      "learning_rate": 1.7630993455194956e-05,
      "loss": 0.7929,
      "step": 584330
    },
    {
      "epoch": 2.047966017937314,
      "grad_norm": 2.921875,
      "learning_rate": 1.7630344426531254e-05,
      "loss": 0.8596,
      "step": 584340
    },
    {
      "epoch": 2.04800106544421,
      "grad_norm": 2.6875,
      "learning_rate": 1.762969539786755e-05,
      "loss": 0.7881,
      "step": 584350
    },
    {
      "epoch": 2.048036112951105,
      "grad_norm": 2.40625,
      "learning_rate": 1.762904636920385e-05,
      "loss": 0.8289,
      "step": 584360
    },
    {
      "epoch": 2.048071160458001,
      "grad_norm": 2.953125,
      "learning_rate": 1.7628397340540148e-05,
      "loss": 0.8405,
      "step": 584370
    },
    {
      "epoch": 2.0481062079648966,
      "grad_norm": 2.8125,
      "learning_rate": 1.7627748311876446e-05,
      "loss": 0.832,
      "step": 584380
    },
    {
      "epoch": 2.048141255471792,
      "grad_norm": 2.875,
      "learning_rate": 1.7627099283212744e-05,
      "loss": 0.7995,
      "step": 584390
    },
    {
      "epoch": 2.0481763029786877,
      "grad_norm": 2.390625,
      "learning_rate": 1.762645025454904e-05,
      "loss": 0.7669,
      "step": 584400
    },
    {
      "epoch": 2.048211350485583,
      "grad_norm": 2.8125,
      "learning_rate": 1.762580122588534e-05,
      "loss": 0.81,
      "step": 584410
    },
    {
      "epoch": 2.0482463979924788,
      "grad_norm": 2.75,
      "learning_rate": 1.762515219722164e-05,
      "loss": 0.7926,
      "step": 584420
    },
    {
      "epoch": 2.0482814454993745,
      "grad_norm": 3.40625,
      "learning_rate": 1.762450316855794e-05,
      "loss": 0.9427,
      "step": 584430
    },
    {
      "epoch": 2.04831649300627,
      "grad_norm": 3.09375,
      "learning_rate": 1.7623854139894237e-05,
      "loss": 0.8058,
      "step": 584440
    },
    {
      "epoch": 2.0483515405131656,
      "grad_norm": 3.328125,
      "learning_rate": 1.7623205111230535e-05,
      "loss": 0.8111,
      "step": 584450
    },
    {
      "epoch": 2.0483865880200613,
      "grad_norm": 2.8125,
      "learning_rate": 1.7622556082566833e-05,
      "loss": 0.7826,
      "step": 584460
    },
    {
      "epoch": 2.0484216355269567,
      "grad_norm": 2.828125,
      "learning_rate": 1.762190705390313e-05,
      "loss": 0.8072,
      "step": 584470
    },
    {
      "epoch": 2.0484566830338524,
      "grad_norm": 3.296875,
      "learning_rate": 1.762125802523943e-05,
      "loss": 0.7863,
      "step": 584480
    },
    {
      "epoch": 2.048491730540748,
      "grad_norm": 3.171875,
      "learning_rate": 1.7620608996575724e-05,
      "loss": 0.8285,
      "step": 584490
    },
    {
      "epoch": 2.0485267780476435,
      "grad_norm": 2.375,
      "learning_rate": 1.761995996791202e-05,
      "loss": 0.8508,
      "step": 584500
    },
    {
      "epoch": 2.0485618255545393,
      "grad_norm": 2.75,
      "learning_rate": 1.761931093924832e-05,
      "loss": 0.8893,
      "step": 584510
    },
    {
      "epoch": 2.0485968730614346,
      "grad_norm": 2.703125,
      "learning_rate": 1.7618661910584618e-05,
      "loss": 0.7717,
      "step": 584520
    },
    {
      "epoch": 2.0486319205683303,
      "grad_norm": 2.796875,
      "learning_rate": 1.761801288192092e-05,
      "loss": 0.83,
      "step": 584530
    },
    {
      "epoch": 2.048666968075226,
      "grad_norm": 2.953125,
      "learning_rate": 1.7617363853257217e-05,
      "loss": 0.8306,
      "step": 584540
    },
    {
      "epoch": 2.0487020155821214,
      "grad_norm": 2.578125,
      "learning_rate": 1.7616714824593515e-05,
      "loss": 0.8162,
      "step": 584550
    },
    {
      "epoch": 2.048737063089017,
      "grad_norm": 3.015625,
      "learning_rate": 1.7616065795929813e-05,
      "loss": 0.8783,
      "step": 584560
    },
    {
      "epoch": 2.048772110595913,
      "grad_norm": 3.0,
      "learning_rate": 1.761541676726611e-05,
      "loss": 0.7693,
      "step": 584570
    },
    {
      "epoch": 2.0488071581028082,
      "grad_norm": 3.109375,
      "learning_rate": 1.761476773860241e-05,
      "loss": 0.7719,
      "step": 584580
    },
    {
      "epoch": 2.048842205609704,
      "grad_norm": 3.03125,
      "learning_rate": 1.7614118709938707e-05,
      "loss": 0.8587,
      "step": 584590
    },
    {
      "epoch": 2.0488772531165997,
      "grad_norm": 2.90625,
      "learning_rate": 1.7613469681275005e-05,
      "loss": 0.7744,
      "step": 584600
    },
    {
      "epoch": 2.048912300623495,
      "grad_norm": 2.578125,
      "learning_rate": 1.7612820652611303e-05,
      "loss": 0.8198,
      "step": 584610
    },
    {
      "epoch": 2.048947348130391,
      "grad_norm": 3.265625,
      "learning_rate": 1.76121716239476e-05,
      "loss": 0.8702,
      "step": 584620
    },
    {
      "epoch": 2.048982395637286,
      "grad_norm": 3.0625,
      "learning_rate": 1.76115225952839e-05,
      "loss": 0.7812,
      "step": 584630
    },
    {
      "epoch": 2.049017443144182,
      "grad_norm": 2.734375,
      "learning_rate": 1.7610873566620197e-05,
      "loss": 0.8365,
      "step": 584640
    },
    {
      "epoch": 2.0490524906510776,
      "grad_norm": 2.640625,
      "learning_rate": 1.7610224537956495e-05,
      "loss": 0.8718,
      "step": 584650
    },
    {
      "epoch": 2.049087538157973,
      "grad_norm": 2.96875,
      "learning_rate": 1.7609575509292793e-05,
      "loss": 0.8299,
      "step": 584660
    },
    {
      "epoch": 2.0491225856648687,
      "grad_norm": 3.21875,
      "learning_rate": 1.7608926480629094e-05,
      "loss": 0.8956,
      "step": 584670
    },
    {
      "epoch": 2.0491576331717645,
      "grad_norm": 3.03125,
      "learning_rate": 1.760827745196539e-05,
      "loss": 0.9075,
      "step": 584680
    },
    {
      "epoch": 2.04919268067866,
      "grad_norm": 2.890625,
      "learning_rate": 1.7607628423301687e-05,
      "loss": 0.7557,
      "step": 584690
    },
    {
      "epoch": 2.0492277281855555,
      "grad_norm": 3.421875,
      "learning_rate": 1.7606979394637985e-05,
      "loss": 0.8454,
      "step": 584700
    },
    {
      "epoch": 2.0492627756924513,
      "grad_norm": 2.90625,
      "learning_rate": 1.7606330365974283e-05,
      "loss": 0.8315,
      "step": 584710
    },
    {
      "epoch": 2.0492978231993466,
      "grad_norm": 2.546875,
      "learning_rate": 1.760568133731058e-05,
      "loss": 0.7983,
      "step": 584720
    },
    {
      "epoch": 2.0493328707062424,
      "grad_norm": 2.71875,
      "learning_rate": 1.760503230864688e-05,
      "loss": 0.8297,
      "step": 584730
    },
    {
      "epoch": 2.0493679182131377,
      "grad_norm": 2.5625,
      "learning_rate": 1.7604383279983177e-05,
      "loss": 0.863,
      "step": 584740
    },
    {
      "epoch": 2.0494029657200334,
      "grad_norm": 2.375,
      "learning_rate": 1.7603734251319475e-05,
      "loss": 0.8385,
      "step": 584750
    },
    {
      "epoch": 2.049438013226929,
      "grad_norm": 3.4375,
      "learning_rate": 1.7603085222655773e-05,
      "loss": 0.8886,
      "step": 584760
    },
    {
      "epoch": 2.0494730607338245,
      "grad_norm": 2.65625,
      "learning_rate": 1.760243619399207e-05,
      "loss": 0.7865,
      "step": 584770
    },
    {
      "epoch": 2.0495081082407203,
      "grad_norm": 2.859375,
      "learning_rate": 1.760178716532837e-05,
      "loss": 0.8083,
      "step": 584780
    },
    {
      "epoch": 2.049543155747616,
      "grad_norm": 2.515625,
      "learning_rate": 1.760113813666467e-05,
      "loss": 0.8256,
      "step": 584790
    },
    {
      "epoch": 2.0495782032545113,
      "grad_norm": 2.96875,
      "learning_rate": 1.760048910800097e-05,
      "loss": 0.8459,
      "step": 584800
    },
    {
      "epoch": 2.049613250761407,
      "grad_norm": 2.921875,
      "learning_rate": 1.7599840079337266e-05,
      "loss": 0.814,
      "step": 584810
    },
    {
      "epoch": 2.049648298268303,
      "grad_norm": 2.890625,
      "learning_rate": 1.7599191050673564e-05,
      "loss": 0.854,
      "step": 584820
    },
    {
      "epoch": 2.049683345775198,
      "grad_norm": 2.5625,
      "learning_rate": 1.7598542022009862e-05,
      "loss": 0.8144,
      "step": 584830
    },
    {
      "epoch": 2.049718393282094,
      "grad_norm": 2.78125,
      "learning_rate": 1.759789299334616e-05,
      "loss": 0.7925,
      "step": 584840
    },
    {
      "epoch": 2.0497534407889897,
      "grad_norm": 2.9375,
      "learning_rate": 1.759724396468246e-05,
      "loss": 0.8445,
      "step": 584850
    },
    {
      "epoch": 2.049788488295885,
      "grad_norm": 3.125,
      "learning_rate": 1.7596594936018756e-05,
      "loss": 0.8853,
      "step": 584860
    },
    {
      "epoch": 2.0498235358027808,
      "grad_norm": 3.234375,
      "learning_rate": 1.759594590735505e-05,
      "loss": 0.8477,
      "step": 584870
    },
    {
      "epoch": 2.049858583309676,
      "grad_norm": 3.1875,
      "learning_rate": 1.759529687869135e-05,
      "loss": 0.8558,
      "step": 584880
    },
    {
      "epoch": 2.049893630816572,
      "grad_norm": 2.6875,
      "learning_rate": 1.7594647850027647e-05,
      "loss": 0.8604,
      "step": 584890
    },
    {
      "epoch": 2.0499286783234676,
      "grad_norm": 2.859375,
      "learning_rate": 1.759399882136395e-05,
      "loss": 0.824,
      "step": 584900
    },
    {
      "epoch": 2.049963725830363,
      "grad_norm": 3.0,
      "learning_rate": 1.7593349792700246e-05,
      "loss": 0.7556,
      "step": 584910
    },
    {
      "epoch": 2.0499987733372587,
      "grad_norm": 3.625,
      "learning_rate": 1.7592700764036544e-05,
      "loss": 0.8263,
      "step": 584920
    },
    {
      "epoch": 2.0500338208441544,
      "grad_norm": 2.859375,
      "learning_rate": 1.7592051735372842e-05,
      "loss": 0.801,
      "step": 584930
    },
    {
      "epoch": 2.0500688683510497,
      "grad_norm": 2.984375,
      "learning_rate": 1.759140270670914e-05,
      "loss": 0.771,
      "step": 584940
    },
    {
      "epoch": 2.0501039158579455,
      "grad_norm": 3.234375,
      "learning_rate": 1.759075367804544e-05,
      "loss": 0.7587,
      "step": 584950
    },
    {
      "epoch": 2.0501389633648412,
      "grad_norm": 2.765625,
      "learning_rate": 1.7590104649381736e-05,
      "loss": 0.8168,
      "step": 584960
    },
    {
      "epoch": 2.0501740108717366,
      "grad_norm": 2.671875,
      "learning_rate": 1.7589455620718034e-05,
      "loss": 0.7248,
      "step": 584970
    },
    {
      "epoch": 2.0502090583786323,
      "grad_norm": 2.9375,
      "learning_rate": 1.7588806592054332e-05,
      "loss": 0.7826,
      "step": 584980
    },
    {
      "epoch": 2.0502441058855276,
      "grad_norm": 2.734375,
      "learning_rate": 1.758815756339063e-05,
      "loss": 0.7827,
      "step": 584990
    },
    {
      "epoch": 2.0502791533924234,
      "grad_norm": 2.6875,
      "learning_rate": 1.758750853472693e-05,
      "loss": 0.8257,
      "step": 585000
    },
    {
      "epoch": 2.0502791533924234,
      "eval_loss": 0.7692489624023438,
      "eval_runtime": 552.9006,
      "eval_samples_per_second": 688.073,
      "eval_steps_per_second": 57.339,
      "step": 585000
    },
    {
      "epoch": 2.050314200899319,
      "grad_norm": 3.125,
      "learning_rate": 1.7586859506063226e-05,
      "loss": 0.8376,
      "step": 585010
    },
    {
      "epoch": 2.0503492484062145,
      "grad_norm": 3.265625,
      "learning_rate": 1.7586210477399524e-05,
      "loss": 0.8183,
      "step": 585020
    },
    {
      "epoch": 2.05038429591311,
      "grad_norm": 2.6875,
      "learning_rate": 1.7585561448735822e-05,
      "loss": 0.8427,
      "step": 585030
    },
    {
      "epoch": 2.050419343420006,
      "grad_norm": 3.109375,
      "learning_rate": 1.7584912420072124e-05,
      "loss": 0.902,
      "step": 585040
    },
    {
      "epoch": 2.0504543909269013,
      "grad_norm": 2.859375,
      "learning_rate": 1.7584263391408422e-05,
      "loss": 0.8382,
      "step": 585050
    },
    {
      "epoch": 2.050489438433797,
      "grad_norm": 3.046875,
      "learning_rate": 1.7583614362744716e-05,
      "loss": 0.7898,
      "step": 585060
    },
    {
      "epoch": 2.050524485940693,
      "grad_norm": 2.734375,
      "learning_rate": 1.7582965334081014e-05,
      "loss": 0.8528,
      "step": 585070
    },
    {
      "epoch": 2.050559533447588,
      "grad_norm": 2.890625,
      "learning_rate": 1.7582316305417312e-05,
      "loss": 0.8561,
      "step": 585080
    },
    {
      "epoch": 2.050594580954484,
      "grad_norm": 2.75,
      "learning_rate": 1.758166727675361e-05,
      "loss": 0.8542,
      "step": 585090
    },
    {
      "epoch": 2.050629628461379,
      "grad_norm": 2.5625,
      "learning_rate": 1.758101824808991e-05,
      "loss": 0.815,
      "step": 585100
    },
    {
      "epoch": 2.050664675968275,
      "grad_norm": 3.0,
      "learning_rate": 1.7580369219426206e-05,
      "loss": 0.7625,
      "step": 585110
    },
    {
      "epoch": 2.0506997234751707,
      "grad_norm": 2.6875,
      "learning_rate": 1.7579720190762504e-05,
      "loss": 0.8169,
      "step": 585120
    },
    {
      "epoch": 2.050734770982066,
      "grad_norm": 2.9375,
      "learning_rate": 1.7579071162098802e-05,
      "loss": 0.8441,
      "step": 585130
    },
    {
      "epoch": 2.050769818488962,
      "grad_norm": 3.046875,
      "learning_rate": 1.75784221334351e-05,
      "loss": 0.8172,
      "step": 585140
    },
    {
      "epoch": 2.0508048659958575,
      "grad_norm": 3.140625,
      "learning_rate": 1.7577773104771402e-05,
      "loss": 0.8016,
      "step": 585150
    },
    {
      "epoch": 2.050839913502753,
      "grad_norm": 2.78125,
      "learning_rate": 1.75771240761077e-05,
      "loss": 0.791,
      "step": 585160
    },
    {
      "epoch": 2.0508749610096486,
      "grad_norm": 2.84375,
      "learning_rate": 1.7576475047443998e-05,
      "loss": 0.8434,
      "step": 585170
    },
    {
      "epoch": 2.0509100085165444,
      "grad_norm": 3.328125,
      "learning_rate": 1.7575826018780296e-05,
      "loss": 0.9058,
      "step": 585180
    },
    {
      "epoch": 2.0509450560234397,
      "grad_norm": 2.953125,
      "learning_rate": 1.7575176990116594e-05,
      "loss": 0.7872,
      "step": 585190
    },
    {
      "epoch": 2.0509801035303354,
      "grad_norm": 2.6875,
      "learning_rate": 1.7574527961452892e-05,
      "loss": 0.7993,
      "step": 585200
    },
    {
      "epoch": 2.0510151510372308,
      "grad_norm": 2.984375,
      "learning_rate": 1.757387893278919e-05,
      "loss": 0.8142,
      "step": 585210
    },
    {
      "epoch": 2.0510501985441265,
      "grad_norm": 2.8125,
      "learning_rate": 1.7573229904125488e-05,
      "loss": 0.8039,
      "step": 585220
    },
    {
      "epoch": 2.0510852460510223,
      "grad_norm": 2.5625,
      "learning_rate": 1.7572580875461786e-05,
      "loss": 0.8097,
      "step": 585230
    },
    {
      "epoch": 2.0511202935579176,
      "grad_norm": 2.71875,
      "learning_rate": 1.757193184679808e-05,
      "loss": 0.804,
      "step": 585240
    },
    {
      "epoch": 2.0511553410648133,
      "grad_norm": 2.671875,
      "learning_rate": 1.757128281813438e-05,
      "loss": 0.8891,
      "step": 585250
    },
    {
      "epoch": 2.051190388571709,
      "grad_norm": 2.75,
      "learning_rate": 1.7570633789470676e-05,
      "loss": 0.8323,
      "step": 585260
    },
    {
      "epoch": 2.0512254360786044,
      "grad_norm": 3.015625,
      "learning_rate": 1.7569984760806978e-05,
      "loss": 0.8678,
      "step": 585270
    },
    {
      "epoch": 2.0512604835855,
      "grad_norm": 3.109375,
      "learning_rate": 1.7569335732143276e-05,
      "loss": 0.8795,
      "step": 585280
    },
    {
      "epoch": 2.051295531092396,
      "grad_norm": 2.359375,
      "learning_rate": 1.7568686703479574e-05,
      "loss": 0.817,
      "step": 585290
    },
    {
      "epoch": 2.0513305785992912,
      "grad_norm": 2.765625,
      "learning_rate": 1.7568037674815872e-05,
      "loss": 0.7726,
      "step": 585300
    },
    {
      "epoch": 2.051365626106187,
      "grad_norm": 2.875,
      "learning_rate": 1.756738864615217e-05,
      "loss": 0.8619,
      "step": 585310
    },
    {
      "epoch": 2.0514006736130823,
      "grad_norm": 2.96875,
      "learning_rate": 1.7566739617488468e-05,
      "loss": 0.833,
      "step": 585320
    },
    {
      "epoch": 2.051435721119978,
      "grad_norm": 3.15625,
      "learning_rate": 1.7566090588824766e-05,
      "loss": 0.8871,
      "step": 585330
    },
    {
      "epoch": 2.051470768626874,
      "grad_norm": 2.734375,
      "learning_rate": 1.7565441560161064e-05,
      "loss": 0.7883,
      "step": 585340
    },
    {
      "epoch": 2.051505816133769,
      "grad_norm": 2.5625,
      "learning_rate": 1.7564792531497362e-05,
      "loss": 0.7421,
      "step": 585350
    },
    {
      "epoch": 2.051540863640665,
      "grad_norm": 2.734375,
      "learning_rate": 1.756414350283366e-05,
      "loss": 0.8787,
      "step": 585360
    },
    {
      "epoch": 2.0515759111475607,
      "grad_norm": 3.046875,
      "learning_rate": 1.7563494474169958e-05,
      "loss": 0.8187,
      "step": 585370
    },
    {
      "epoch": 2.051610958654456,
      "grad_norm": 2.3125,
      "learning_rate": 1.7562845445506256e-05,
      "loss": 0.7987,
      "step": 585380
    },
    {
      "epoch": 2.0516460061613517,
      "grad_norm": 3.328125,
      "learning_rate": 1.7562196416842554e-05,
      "loss": 0.8148,
      "step": 585390
    },
    {
      "epoch": 2.0516810536682475,
      "grad_norm": 2.859375,
      "learning_rate": 1.7561547388178852e-05,
      "loss": 0.8154,
      "step": 585400
    },
    {
      "epoch": 2.051716101175143,
      "grad_norm": 2.984375,
      "learning_rate": 1.7560898359515153e-05,
      "loss": 0.8283,
      "step": 585410
    },
    {
      "epoch": 2.0517511486820386,
      "grad_norm": 2.421875,
      "learning_rate": 1.756024933085145e-05,
      "loss": 0.7866,
      "step": 585420
    },
    {
      "epoch": 2.0517861961889343,
      "grad_norm": 3.21875,
      "learning_rate": 1.7559600302187746e-05,
      "loss": 0.8503,
      "step": 585430
    },
    {
      "epoch": 2.0518212436958296,
      "grad_norm": 2.53125,
      "learning_rate": 1.7558951273524044e-05,
      "loss": 0.7749,
      "step": 585440
    },
    {
      "epoch": 2.0518562912027254,
      "grad_norm": 2.96875,
      "learning_rate": 1.7558302244860342e-05,
      "loss": 0.8393,
      "step": 585450
    },
    {
      "epoch": 2.0518913387096207,
      "grad_norm": 3.1875,
      "learning_rate": 1.755765321619664e-05,
      "loss": 0.766,
      "step": 585460
    },
    {
      "epoch": 2.0519263862165165,
      "grad_norm": 2.921875,
      "learning_rate": 1.7557004187532938e-05,
      "loss": 0.7774,
      "step": 585470
    },
    {
      "epoch": 2.051961433723412,
      "grad_norm": 3.015625,
      "learning_rate": 1.7556355158869236e-05,
      "loss": 0.8644,
      "step": 585480
    },
    {
      "epoch": 2.0519964812303075,
      "grad_norm": 2.71875,
      "learning_rate": 1.7555706130205534e-05,
      "loss": 0.8336,
      "step": 585490
    },
    {
      "epoch": 2.0520315287372033,
      "grad_norm": 2.78125,
      "learning_rate": 1.7555057101541832e-05,
      "loss": 0.8211,
      "step": 585500
    },
    {
      "epoch": 2.052066576244099,
      "grad_norm": 2.59375,
      "learning_rate": 1.755440807287813e-05,
      "loss": 0.7977,
      "step": 585510
    },
    {
      "epoch": 2.0521016237509944,
      "grad_norm": 3.03125,
      "learning_rate": 1.755375904421443e-05,
      "loss": 0.7843,
      "step": 585520
    },
    {
      "epoch": 2.05213667125789,
      "grad_norm": 3.21875,
      "learning_rate": 1.755311001555073e-05,
      "loss": 0.7735,
      "step": 585530
    },
    {
      "epoch": 2.052171718764786,
      "grad_norm": 2.71875,
      "learning_rate": 1.7552460986887027e-05,
      "loss": 0.8249,
      "step": 585540
    },
    {
      "epoch": 2.052206766271681,
      "grad_norm": 2.953125,
      "learning_rate": 1.7551811958223325e-05,
      "loss": 0.8096,
      "step": 585550
    },
    {
      "epoch": 2.052241813778577,
      "grad_norm": 3.21875,
      "learning_rate": 1.7551162929559623e-05,
      "loss": 0.8119,
      "step": 585560
    },
    {
      "epoch": 2.0522768612854723,
      "grad_norm": 3.015625,
      "learning_rate": 1.755051390089592e-05,
      "loss": 0.8417,
      "step": 585570
    },
    {
      "epoch": 2.052311908792368,
      "grad_norm": 2.921875,
      "learning_rate": 1.754986487223222e-05,
      "loss": 0.7487,
      "step": 585580
    },
    {
      "epoch": 2.052346956299264,
      "grad_norm": 2.9375,
      "learning_rate": 1.7549215843568517e-05,
      "loss": 0.8519,
      "step": 585590
    },
    {
      "epoch": 2.052382003806159,
      "grad_norm": 2.71875,
      "learning_rate": 1.7548566814904815e-05,
      "loss": 0.7876,
      "step": 585600
    },
    {
      "epoch": 2.052417051313055,
      "grad_norm": 3.21875,
      "learning_rate": 1.7547917786241113e-05,
      "loss": 0.8499,
      "step": 585610
    },
    {
      "epoch": 2.0524520988199506,
      "grad_norm": 2.59375,
      "learning_rate": 1.7547268757577408e-05,
      "loss": 0.7688,
      "step": 585620
    },
    {
      "epoch": 2.052487146326846,
      "grad_norm": 2.828125,
      "learning_rate": 1.754661972891371e-05,
      "loss": 0.8429,
      "step": 585630
    },
    {
      "epoch": 2.0525221938337417,
      "grad_norm": 2.671875,
      "learning_rate": 1.7545970700250007e-05,
      "loss": 0.8017,
      "step": 585640
    },
    {
      "epoch": 2.0525572413406374,
      "grad_norm": 2.796875,
      "learning_rate": 1.7545321671586305e-05,
      "loss": 0.8342,
      "step": 585650
    },
    {
      "epoch": 2.0525922888475328,
      "grad_norm": 3.015625,
      "learning_rate": 1.7544672642922603e-05,
      "loss": 0.8177,
      "step": 585660
    },
    {
      "epoch": 2.0526273363544285,
      "grad_norm": 2.953125,
      "learning_rate": 1.75440236142589e-05,
      "loss": 0.8408,
      "step": 585670
    },
    {
      "epoch": 2.052662383861324,
      "grad_norm": 3.125,
      "learning_rate": 1.75433745855952e-05,
      "loss": 0.838,
      "step": 585680
    },
    {
      "epoch": 2.0526974313682196,
      "grad_norm": 2.859375,
      "learning_rate": 1.7542725556931497e-05,
      "loss": 0.8609,
      "step": 585690
    },
    {
      "epoch": 2.0527324788751153,
      "grad_norm": 2.921875,
      "learning_rate": 1.7542076528267795e-05,
      "loss": 0.8377,
      "step": 585700
    },
    {
      "epoch": 2.0527675263820107,
      "grad_norm": 2.890625,
      "learning_rate": 1.7541427499604093e-05,
      "loss": 0.8395,
      "step": 585710
    },
    {
      "epoch": 2.0528025738889064,
      "grad_norm": 2.703125,
      "learning_rate": 1.754077847094039e-05,
      "loss": 0.833,
      "step": 585720
    },
    {
      "epoch": 2.052837621395802,
      "grad_norm": 2.90625,
      "learning_rate": 1.754012944227669e-05,
      "loss": 0.8221,
      "step": 585730
    },
    {
      "epoch": 2.0528726689026975,
      "grad_norm": 3.296875,
      "learning_rate": 1.7539480413612987e-05,
      "loss": 0.7703,
      "step": 585740
    },
    {
      "epoch": 2.0529077164095932,
      "grad_norm": 3.390625,
      "learning_rate": 1.7538831384949285e-05,
      "loss": 0.8595,
      "step": 585750
    },
    {
      "epoch": 2.052942763916489,
      "grad_norm": 2.90625,
      "learning_rate": 1.7538182356285583e-05,
      "loss": 0.9299,
      "step": 585760
    },
    {
      "epoch": 2.0529778114233843,
      "grad_norm": 2.59375,
      "learning_rate": 1.7537533327621884e-05,
      "loss": 0.8055,
      "step": 585770
    },
    {
      "epoch": 2.05301285893028,
      "grad_norm": 3.171875,
      "learning_rate": 1.7536884298958182e-05,
      "loss": 0.7254,
      "step": 585780
    },
    {
      "epoch": 2.0530479064371754,
      "grad_norm": 2.8125,
      "learning_rate": 1.753623527029448e-05,
      "loss": 0.7879,
      "step": 585790
    },
    {
      "epoch": 2.053082953944071,
      "grad_norm": 2.921875,
      "learning_rate": 1.753558624163078e-05,
      "loss": 0.8546,
      "step": 585800
    },
    {
      "epoch": 2.053118001450967,
      "grad_norm": 3.015625,
      "learning_rate": 1.7534937212967073e-05,
      "loss": 0.7422,
      "step": 585810
    },
    {
      "epoch": 2.053153048957862,
      "grad_norm": 3.171875,
      "learning_rate": 1.753428818430337e-05,
      "loss": 0.8391,
      "step": 585820
    },
    {
      "epoch": 2.053188096464758,
      "grad_norm": 3.140625,
      "learning_rate": 1.753363915563967e-05,
      "loss": 0.8057,
      "step": 585830
    },
    {
      "epoch": 2.0532231439716537,
      "grad_norm": 3.03125,
      "learning_rate": 1.7532990126975967e-05,
      "loss": 0.8624,
      "step": 585840
    },
    {
      "epoch": 2.053258191478549,
      "grad_norm": 2.671875,
      "learning_rate": 1.7532341098312265e-05,
      "loss": 0.7737,
      "step": 585850
    },
    {
      "epoch": 2.053293238985445,
      "grad_norm": 2.421875,
      "learning_rate": 1.7531692069648563e-05,
      "loss": 0.8077,
      "step": 585860
    },
    {
      "epoch": 2.0533282864923406,
      "grad_norm": 2.625,
      "learning_rate": 1.753104304098486e-05,
      "loss": 0.7417,
      "step": 585870
    },
    {
      "epoch": 2.053363333999236,
      "grad_norm": 2.734375,
      "learning_rate": 1.753039401232116e-05,
      "loss": 0.7392,
      "step": 585880
    },
    {
      "epoch": 2.0533983815061316,
      "grad_norm": 3.078125,
      "learning_rate": 1.752974498365746e-05,
      "loss": 0.8306,
      "step": 585890
    },
    {
      "epoch": 2.053433429013027,
      "grad_norm": 2.40625,
      "learning_rate": 1.752909595499376e-05,
      "loss": 0.7862,
      "step": 585900
    },
    {
      "epoch": 2.0534684765199227,
      "grad_norm": 3.125,
      "learning_rate": 1.7528446926330056e-05,
      "loss": 0.8369,
      "step": 585910
    },
    {
      "epoch": 2.0535035240268185,
      "grad_norm": 2.484375,
      "learning_rate": 1.7527797897666354e-05,
      "loss": 0.7594,
      "step": 585920
    },
    {
      "epoch": 2.0535385715337138,
      "grad_norm": 2.953125,
      "learning_rate": 1.7527148869002652e-05,
      "loss": 0.7418,
      "step": 585930
    },
    {
      "epoch": 2.0535736190406095,
      "grad_norm": 2.5,
      "learning_rate": 1.752649984033895e-05,
      "loss": 0.8048,
      "step": 585940
    },
    {
      "epoch": 2.0536086665475053,
      "grad_norm": 3.109375,
      "learning_rate": 1.752585081167525e-05,
      "loss": 0.8338,
      "step": 585950
    },
    {
      "epoch": 2.0536437140544006,
      "grad_norm": 2.921875,
      "learning_rate": 1.7525201783011546e-05,
      "loss": 0.8033,
      "step": 585960
    },
    {
      "epoch": 2.0536787615612964,
      "grad_norm": 2.71875,
      "learning_rate": 1.7524552754347844e-05,
      "loss": 0.8612,
      "step": 585970
    },
    {
      "epoch": 2.053713809068192,
      "grad_norm": 2.71875,
      "learning_rate": 1.7523903725684142e-05,
      "loss": 0.8448,
      "step": 585980
    },
    {
      "epoch": 2.0537488565750874,
      "grad_norm": 3.125,
      "learning_rate": 1.752325469702044e-05,
      "loss": 0.916,
      "step": 585990
    },
    {
      "epoch": 2.053783904081983,
      "grad_norm": 3.125,
      "learning_rate": 1.752260566835674e-05,
      "loss": 0.8749,
      "step": 586000
    },
    {
      "epoch": 2.0538189515888785,
      "grad_norm": 2.53125,
      "learning_rate": 1.7521956639693036e-05,
      "loss": 0.8646,
      "step": 586010
    },
    {
      "epoch": 2.0538539990957743,
      "grad_norm": 4.15625,
      "learning_rate": 1.7521307611029334e-05,
      "loss": 0.8306,
      "step": 586020
    },
    {
      "epoch": 2.05388904660267,
      "grad_norm": 3.234375,
      "learning_rate": 1.7520658582365632e-05,
      "loss": 0.8301,
      "step": 586030
    },
    {
      "epoch": 2.0539240941095653,
      "grad_norm": 2.8125,
      "learning_rate": 1.752000955370193e-05,
      "loss": 0.9023,
      "step": 586040
    },
    {
      "epoch": 2.053959141616461,
      "grad_norm": 2.59375,
      "learning_rate": 1.751936052503823e-05,
      "loss": 0.8413,
      "step": 586050
    },
    {
      "epoch": 2.053994189123357,
      "grad_norm": 2.90625,
      "learning_rate": 1.7518711496374526e-05,
      "loss": 0.78,
      "step": 586060
    },
    {
      "epoch": 2.054029236630252,
      "grad_norm": 2.515625,
      "learning_rate": 1.7518062467710824e-05,
      "loss": 0.7087,
      "step": 586070
    },
    {
      "epoch": 2.054064284137148,
      "grad_norm": 2.71875,
      "learning_rate": 1.7517413439047122e-05,
      "loss": 0.7458,
      "step": 586080
    },
    {
      "epoch": 2.0540993316440437,
      "grad_norm": 2.859375,
      "learning_rate": 1.751676441038342e-05,
      "loss": 0.7402,
      "step": 586090
    },
    {
      "epoch": 2.054134379150939,
      "grad_norm": 3.125,
      "learning_rate": 1.751611538171972e-05,
      "loss": 0.8562,
      "step": 586100
    },
    {
      "epoch": 2.0541694266578348,
      "grad_norm": 3.5,
      "learning_rate": 1.7515466353056016e-05,
      "loss": 0.8605,
      "step": 586110
    },
    {
      "epoch": 2.0542044741647305,
      "grad_norm": 2.5,
      "learning_rate": 1.7514817324392314e-05,
      "loss": 0.8172,
      "step": 586120
    },
    {
      "epoch": 2.054239521671626,
      "grad_norm": 3.328125,
      "learning_rate": 1.7514168295728612e-05,
      "loss": 0.9047,
      "step": 586130
    },
    {
      "epoch": 2.0542745691785216,
      "grad_norm": 2.828125,
      "learning_rate": 1.7513519267064914e-05,
      "loss": 0.7942,
      "step": 586140
    },
    {
      "epoch": 2.054309616685417,
      "grad_norm": 3.109375,
      "learning_rate": 1.7512870238401212e-05,
      "loss": 0.9093,
      "step": 586150
    },
    {
      "epoch": 2.0543446641923127,
      "grad_norm": 2.71875,
      "learning_rate": 1.751222120973751e-05,
      "loss": 0.836,
      "step": 586160
    },
    {
      "epoch": 2.0543797116992084,
      "grad_norm": 2.671875,
      "learning_rate": 1.7511572181073808e-05,
      "loss": 0.7284,
      "step": 586170
    },
    {
      "epoch": 2.0544147592061037,
      "grad_norm": 2.71875,
      "learning_rate": 1.7510923152410102e-05,
      "loss": 0.7679,
      "step": 586180
    },
    {
      "epoch": 2.0544498067129995,
      "grad_norm": 2.59375,
      "learning_rate": 1.75102741237464e-05,
      "loss": 0.8275,
      "step": 586190
    },
    {
      "epoch": 2.0544848542198952,
      "grad_norm": 2.671875,
      "learning_rate": 1.75096250950827e-05,
      "loss": 0.7821,
      "step": 586200
    },
    {
      "epoch": 2.0545199017267906,
      "grad_norm": 3.09375,
      "learning_rate": 1.7508976066418996e-05,
      "loss": 0.8088,
      "step": 586210
    },
    {
      "epoch": 2.0545549492336863,
      "grad_norm": 3.265625,
      "learning_rate": 1.7508327037755294e-05,
      "loss": 0.752,
      "step": 586220
    },
    {
      "epoch": 2.054589996740582,
      "grad_norm": 3.171875,
      "learning_rate": 1.7507678009091592e-05,
      "loss": 0.8791,
      "step": 586230
    },
    {
      "epoch": 2.0546250442474774,
      "grad_norm": 3.125,
      "learning_rate": 1.750702898042789e-05,
      "loss": 0.8534,
      "step": 586240
    },
    {
      "epoch": 2.054660091754373,
      "grad_norm": 2.8125,
      "learning_rate": 1.7506379951764192e-05,
      "loss": 0.7677,
      "step": 586250
    },
    {
      "epoch": 2.0546951392612685,
      "grad_norm": 2.78125,
      "learning_rate": 1.750573092310049e-05,
      "loss": 0.8127,
      "step": 586260
    },
    {
      "epoch": 2.054730186768164,
      "grad_norm": 2.65625,
      "learning_rate": 1.7505081894436788e-05,
      "loss": 0.8168,
      "step": 586270
    },
    {
      "epoch": 2.05476523427506,
      "grad_norm": 2.765625,
      "learning_rate": 1.7504432865773086e-05,
      "loss": 0.7139,
      "step": 586280
    },
    {
      "epoch": 2.0548002817819553,
      "grad_norm": 2.90625,
      "learning_rate": 1.7503783837109384e-05,
      "loss": 0.775,
      "step": 586290
    },
    {
      "epoch": 2.054835329288851,
      "grad_norm": 2.921875,
      "learning_rate": 1.7503134808445682e-05,
      "loss": 0.8556,
      "step": 586300
    },
    {
      "epoch": 2.054870376795747,
      "grad_norm": 3.703125,
      "learning_rate": 1.750248577978198e-05,
      "loss": 0.8093,
      "step": 586310
    },
    {
      "epoch": 2.054905424302642,
      "grad_norm": 2.734375,
      "learning_rate": 1.7501836751118278e-05,
      "loss": 0.8732,
      "step": 586320
    },
    {
      "epoch": 2.054940471809538,
      "grad_norm": 2.90625,
      "learning_rate": 1.7501187722454576e-05,
      "loss": 0.8471,
      "step": 586330
    },
    {
      "epoch": 2.0549755193164336,
      "grad_norm": 2.984375,
      "learning_rate": 1.7500538693790874e-05,
      "loss": 0.7564,
      "step": 586340
    },
    {
      "epoch": 2.055010566823329,
      "grad_norm": 2.796875,
      "learning_rate": 1.7499889665127172e-05,
      "loss": 0.7835,
      "step": 586350
    },
    {
      "epoch": 2.0550456143302247,
      "grad_norm": 2.90625,
      "learning_rate": 1.749924063646347e-05,
      "loss": 0.7792,
      "step": 586360
    },
    {
      "epoch": 2.05508066183712,
      "grad_norm": 2.6875,
      "learning_rate": 1.7498591607799768e-05,
      "loss": 0.8927,
      "step": 586370
    },
    {
      "epoch": 2.0551157093440158,
      "grad_norm": 2.828125,
      "learning_rate": 1.7497942579136066e-05,
      "loss": 0.8121,
      "step": 586380
    },
    {
      "epoch": 2.0551507568509115,
      "grad_norm": 2.71875,
      "learning_rate": 1.7497293550472364e-05,
      "loss": 0.7664,
      "step": 586390
    },
    {
      "epoch": 2.055185804357807,
      "grad_norm": 3.03125,
      "learning_rate": 1.7496644521808662e-05,
      "loss": 0.8005,
      "step": 586400
    },
    {
      "epoch": 2.0552208518647026,
      "grad_norm": 2.390625,
      "learning_rate": 1.749599549314496e-05,
      "loss": 0.7823,
      "step": 586410
    },
    {
      "epoch": 2.0552558993715984,
      "grad_norm": 2.671875,
      "learning_rate": 1.7495346464481258e-05,
      "loss": 0.8017,
      "step": 586420
    },
    {
      "epoch": 2.0552909468784937,
      "grad_norm": 2.875,
      "learning_rate": 1.7494697435817556e-05,
      "loss": 0.8049,
      "step": 586430
    },
    {
      "epoch": 2.0553259943853894,
      "grad_norm": 2.703125,
      "learning_rate": 1.7494048407153854e-05,
      "loss": 0.804,
      "step": 586440
    },
    {
      "epoch": 2.055361041892285,
      "grad_norm": 2.984375,
      "learning_rate": 1.7493399378490152e-05,
      "loss": 0.8676,
      "step": 586450
    },
    {
      "epoch": 2.0553960893991805,
      "grad_norm": 2.53125,
      "learning_rate": 1.749275034982645e-05,
      "loss": 0.8386,
      "step": 586460
    },
    {
      "epoch": 2.0554311369060763,
      "grad_norm": 2.859375,
      "learning_rate": 1.7492101321162748e-05,
      "loss": 0.8071,
      "step": 586470
    },
    {
      "epoch": 2.0554661844129716,
      "grad_norm": 2.75,
      "learning_rate": 1.7491452292499046e-05,
      "loss": 0.8253,
      "step": 586480
    },
    {
      "epoch": 2.0555012319198673,
      "grad_norm": 2.546875,
      "learning_rate": 1.7490803263835344e-05,
      "loss": 0.7783,
      "step": 586490
    },
    {
      "epoch": 2.055536279426763,
      "grad_norm": 2.625,
      "learning_rate": 1.7490154235171642e-05,
      "loss": 0.7448,
      "step": 586500
    },
    {
      "epoch": 2.0555713269336584,
      "grad_norm": 2.890625,
      "learning_rate": 1.7489505206507943e-05,
      "loss": 0.8043,
      "step": 586510
    },
    {
      "epoch": 2.055606374440554,
      "grad_norm": 2.96875,
      "learning_rate": 1.748885617784424e-05,
      "loss": 0.8293,
      "step": 586520
    },
    {
      "epoch": 2.05564142194745,
      "grad_norm": 3.09375,
      "learning_rate": 1.748820714918054e-05,
      "loss": 0.8464,
      "step": 586530
    },
    {
      "epoch": 2.0556764694543452,
      "grad_norm": 3.390625,
      "learning_rate": 1.7487558120516837e-05,
      "loss": 0.8529,
      "step": 586540
    },
    {
      "epoch": 2.055711516961241,
      "grad_norm": 2.90625,
      "learning_rate": 1.7486909091853135e-05,
      "loss": 0.7976,
      "step": 586550
    },
    {
      "epoch": 2.0557465644681368,
      "grad_norm": 2.9375,
      "learning_rate": 1.748626006318943e-05,
      "loss": 0.8238,
      "step": 586560
    },
    {
      "epoch": 2.055781611975032,
      "grad_norm": 2.484375,
      "learning_rate": 1.7485611034525728e-05,
      "loss": 0.7844,
      "step": 586570
    },
    {
      "epoch": 2.055816659481928,
      "grad_norm": 2.984375,
      "learning_rate": 1.7484962005862026e-05,
      "loss": 0.7499,
      "step": 586580
    },
    {
      "epoch": 2.055851706988823,
      "grad_norm": 3.046875,
      "learning_rate": 1.7484312977198324e-05,
      "loss": 0.9037,
      "step": 586590
    },
    {
      "epoch": 2.055886754495719,
      "grad_norm": 3.265625,
      "learning_rate": 1.7483663948534622e-05,
      "loss": 0.9133,
      "step": 586600
    },
    {
      "epoch": 2.0559218020026147,
      "grad_norm": 3.03125,
      "learning_rate": 1.748301491987092e-05,
      "loss": 0.81,
      "step": 586610
    },
    {
      "epoch": 2.05595684950951,
      "grad_norm": 2.46875,
      "learning_rate": 1.748236589120722e-05,
      "loss": 0.8256,
      "step": 586620
    },
    {
      "epoch": 2.0559918970164057,
      "grad_norm": 2.640625,
      "learning_rate": 1.748171686254352e-05,
      "loss": 0.8167,
      "step": 586630
    },
    {
      "epoch": 2.0560269445233015,
      "grad_norm": 2.921875,
      "learning_rate": 1.7481067833879817e-05,
      "loss": 0.8109,
      "step": 586640
    },
    {
      "epoch": 2.056061992030197,
      "grad_norm": 2.6875,
      "learning_rate": 1.7480418805216115e-05,
      "loss": 0.7746,
      "step": 586650
    },
    {
      "epoch": 2.0560970395370926,
      "grad_norm": 2.84375,
      "learning_rate": 1.7479769776552413e-05,
      "loss": 0.7814,
      "step": 586660
    },
    {
      "epoch": 2.0561320870439883,
      "grad_norm": 3.078125,
      "learning_rate": 1.747912074788871e-05,
      "loss": 0.81,
      "step": 586670
    },
    {
      "epoch": 2.0561671345508836,
      "grad_norm": 3.390625,
      "learning_rate": 1.747847171922501e-05,
      "loss": 0.8611,
      "step": 586680
    },
    {
      "epoch": 2.0562021820577794,
      "grad_norm": 3.046875,
      "learning_rate": 1.7477822690561307e-05,
      "loss": 0.8158,
      "step": 586690
    },
    {
      "epoch": 2.056237229564675,
      "grad_norm": 2.90625,
      "learning_rate": 1.7477173661897605e-05,
      "loss": 0.8232,
      "step": 586700
    },
    {
      "epoch": 2.0562722770715705,
      "grad_norm": 2.8125,
      "learning_rate": 1.7476524633233903e-05,
      "loss": 0.8431,
      "step": 586710
    },
    {
      "epoch": 2.056307324578466,
      "grad_norm": 2.703125,
      "learning_rate": 1.74758756045702e-05,
      "loss": 0.7547,
      "step": 586720
    },
    {
      "epoch": 2.0563423720853615,
      "grad_norm": 2.953125,
      "learning_rate": 1.74752265759065e-05,
      "loss": 0.9302,
      "step": 586730
    },
    {
      "epoch": 2.0563774195922573,
      "grad_norm": 3.125,
      "learning_rate": 1.7474577547242797e-05,
      "loss": 0.8887,
      "step": 586740
    },
    {
      "epoch": 2.056412467099153,
      "grad_norm": 2.828125,
      "learning_rate": 1.7473928518579095e-05,
      "loss": 0.8419,
      "step": 586750
    },
    {
      "epoch": 2.0564475146060484,
      "grad_norm": 2.96875,
      "learning_rate": 1.7473279489915393e-05,
      "loss": 0.9015,
      "step": 586760
    },
    {
      "epoch": 2.056482562112944,
      "grad_norm": 2.890625,
      "learning_rate": 1.747263046125169e-05,
      "loss": 0.7545,
      "step": 586770
    },
    {
      "epoch": 2.05651760961984,
      "grad_norm": 2.640625,
      "learning_rate": 1.747198143258799e-05,
      "loss": 0.7528,
      "step": 586780
    },
    {
      "epoch": 2.056552657126735,
      "grad_norm": 2.578125,
      "learning_rate": 1.7471332403924287e-05,
      "loss": 0.8058,
      "step": 586790
    },
    {
      "epoch": 2.056587704633631,
      "grad_norm": 2.4375,
      "learning_rate": 1.7470683375260585e-05,
      "loss": 0.8171,
      "step": 586800
    },
    {
      "epoch": 2.0566227521405267,
      "grad_norm": 3.421875,
      "learning_rate": 1.7470034346596883e-05,
      "loss": 0.8726,
      "step": 586810
    },
    {
      "epoch": 2.056657799647422,
      "grad_norm": 2.890625,
      "learning_rate": 1.746938531793318e-05,
      "loss": 0.8499,
      "step": 586820
    },
    {
      "epoch": 2.0566928471543178,
      "grad_norm": 2.609375,
      "learning_rate": 1.746873628926948e-05,
      "loss": 0.8298,
      "step": 586830
    },
    {
      "epoch": 2.056727894661213,
      "grad_norm": 2.734375,
      "learning_rate": 1.7468087260605777e-05,
      "loss": 0.8236,
      "step": 586840
    },
    {
      "epoch": 2.056762942168109,
      "grad_norm": 3.296875,
      "learning_rate": 1.7467438231942075e-05,
      "loss": 0.9104,
      "step": 586850
    },
    {
      "epoch": 2.0567979896750046,
      "grad_norm": 3.265625,
      "learning_rate": 1.7466789203278373e-05,
      "loss": 0.8442,
      "step": 586860
    },
    {
      "epoch": 2.0568330371819,
      "grad_norm": 3.1875,
      "learning_rate": 1.7466140174614675e-05,
      "loss": 0.8505,
      "step": 586870
    },
    {
      "epoch": 2.0568680846887957,
      "grad_norm": 3.25,
      "learning_rate": 1.7465491145950973e-05,
      "loss": 0.8398,
      "step": 586880
    },
    {
      "epoch": 2.0569031321956914,
      "grad_norm": 2.84375,
      "learning_rate": 1.746484211728727e-05,
      "loss": 0.7389,
      "step": 586890
    },
    {
      "epoch": 2.0569381797025867,
      "grad_norm": 2.84375,
      "learning_rate": 1.746419308862357e-05,
      "loss": 0.8561,
      "step": 586900
    },
    {
      "epoch": 2.0569732272094825,
      "grad_norm": 3.015625,
      "learning_rate": 1.7463544059959867e-05,
      "loss": 0.8199,
      "step": 586910
    },
    {
      "epoch": 2.0570082747163783,
      "grad_norm": 2.765625,
      "learning_rate": 1.7462895031296165e-05,
      "loss": 0.7481,
      "step": 586920
    },
    {
      "epoch": 2.0570433222232736,
      "grad_norm": 2.9375,
      "learning_rate": 1.7462246002632463e-05,
      "loss": 0.8715,
      "step": 586930
    },
    {
      "epoch": 2.0570783697301693,
      "grad_norm": 3.015625,
      "learning_rate": 1.7461596973968757e-05,
      "loss": 0.8217,
      "step": 586940
    },
    {
      "epoch": 2.0571134172370646,
      "grad_norm": 3.015625,
      "learning_rate": 1.7460947945305055e-05,
      "loss": 0.7827,
      "step": 586950
    },
    {
      "epoch": 2.0571484647439604,
      "grad_norm": 3.09375,
      "learning_rate": 1.7460298916641353e-05,
      "loss": 0.8692,
      "step": 586960
    },
    {
      "epoch": 2.057183512250856,
      "grad_norm": 2.84375,
      "learning_rate": 1.745964988797765e-05,
      "loss": 0.8091,
      "step": 586970
    },
    {
      "epoch": 2.0572185597577515,
      "grad_norm": 3.0,
      "learning_rate": 1.745900085931395e-05,
      "loss": 0.7129,
      "step": 586980
    },
    {
      "epoch": 2.0572536072646472,
      "grad_norm": 2.859375,
      "learning_rate": 1.745835183065025e-05,
      "loss": 0.8797,
      "step": 586990
    },
    {
      "epoch": 2.057288654771543,
      "grad_norm": 2.671875,
      "learning_rate": 1.745770280198655e-05,
      "loss": 0.7385,
      "step": 587000
    },
    {
      "epoch": 2.0573237022784383,
      "grad_norm": 3.1875,
      "learning_rate": 1.7457053773322847e-05,
      "loss": 0.7951,
      "step": 587010
    },
    {
      "epoch": 2.057358749785334,
      "grad_norm": 2.625,
      "learning_rate": 1.7456404744659145e-05,
      "loss": 0.9197,
      "step": 587020
    },
    {
      "epoch": 2.05739379729223,
      "grad_norm": 2.765625,
      "learning_rate": 1.7455755715995443e-05,
      "loss": 0.8094,
      "step": 587030
    },
    {
      "epoch": 2.057428844799125,
      "grad_norm": 2.96875,
      "learning_rate": 1.745510668733174e-05,
      "loss": 0.7969,
      "step": 587040
    },
    {
      "epoch": 2.057463892306021,
      "grad_norm": 3.109375,
      "learning_rate": 1.745445765866804e-05,
      "loss": 0.7906,
      "step": 587050
    },
    {
      "epoch": 2.057498939812916,
      "grad_norm": 3.015625,
      "learning_rate": 1.7453808630004337e-05,
      "loss": 0.7838,
      "step": 587060
    },
    {
      "epoch": 2.057533987319812,
      "grad_norm": 2.78125,
      "learning_rate": 1.7453159601340635e-05,
      "loss": 0.7904,
      "step": 587070
    },
    {
      "epoch": 2.0575690348267077,
      "grad_norm": 2.828125,
      "learning_rate": 1.7452510572676933e-05,
      "loss": 0.8946,
      "step": 587080
    },
    {
      "epoch": 2.057604082333603,
      "grad_norm": 2.640625,
      "learning_rate": 1.745186154401323e-05,
      "loss": 0.7743,
      "step": 587090
    },
    {
      "epoch": 2.057639129840499,
      "grad_norm": 2.796875,
      "learning_rate": 1.745121251534953e-05,
      "loss": 0.8601,
      "step": 587100
    },
    {
      "epoch": 2.0576741773473946,
      "grad_norm": 2.953125,
      "learning_rate": 1.7450563486685827e-05,
      "loss": 0.8014,
      "step": 587110
    },
    {
      "epoch": 2.05770922485429,
      "grad_norm": 2.71875,
      "learning_rate": 1.7449914458022125e-05,
      "loss": 0.795,
      "step": 587120
    },
    {
      "epoch": 2.0577442723611856,
      "grad_norm": 3.015625,
      "learning_rate": 1.7449265429358423e-05,
      "loss": 0.8149,
      "step": 587130
    },
    {
      "epoch": 2.0577793198680814,
      "grad_norm": 3.03125,
      "learning_rate": 1.744861640069472e-05,
      "loss": 0.7491,
      "step": 587140
    },
    {
      "epoch": 2.0578143673749767,
      "grad_norm": 3.15625,
      "learning_rate": 1.744796737203102e-05,
      "loss": 0.8122,
      "step": 587150
    },
    {
      "epoch": 2.0578494148818725,
      "grad_norm": 2.515625,
      "learning_rate": 1.7447318343367317e-05,
      "loss": 0.7862,
      "step": 587160
    },
    {
      "epoch": 2.0578844623887678,
      "grad_norm": 2.78125,
      "learning_rate": 1.7446669314703615e-05,
      "loss": 0.8431,
      "step": 587170
    },
    {
      "epoch": 2.0579195098956635,
      "grad_norm": 2.828125,
      "learning_rate": 1.7446020286039913e-05,
      "loss": 0.801,
      "step": 587180
    },
    {
      "epoch": 2.0579545574025593,
      "grad_norm": 2.921875,
      "learning_rate": 1.744537125737621e-05,
      "loss": 0.9107,
      "step": 587190
    },
    {
      "epoch": 2.0579896049094546,
      "grad_norm": 2.75,
      "learning_rate": 1.744472222871251e-05,
      "loss": 0.8871,
      "step": 587200
    },
    {
      "epoch": 2.0580246524163504,
      "grad_norm": 2.609375,
      "learning_rate": 1.7444073200048807e-05,
      "loss": 0.8583,
      "step": 587210
    },
    {
      "epoch": 2.058059699923246,
      "grad_norm": 3.125,
      "learning_rate": 1.7443424171385105e-05,
      "loss": 0.9041,
      "step": 587220
    },
    {
      "epoch": 2.0580947474301414,
      "grad_norm": 3.0625,
      "learning_rate": 1.7442775142721403e-05,
      "loss": 0.8881,
      "step": 587230
    },
    {
      "epoch": 2.058129794937037,
      "grad_norm": 4.3125,
      "learning_rate": 1.7442126114057704e-05,
      "loss": 0.7808,
      "step": 587240
    },
    {
      "epoch": 2.058164842443933,
      "grad_norm": 2.875,
      "learning_rate": 1.7441477085394002e-05,
      "loss": 0.8328,
      "step": 587250
    },
    {
      "epoch": 2.0581998899508283,
      "grad_norm": 2.65625,
      "learning_rate": 1.74408280567303e-05,
      "loss": 0.7645,
      "step": 587260
    },
    {
      "epoch": 2.058234937457724,
      "grad_norm": 2.765625,
      "learning_rate": 1.7440179028066598e-05,
      "loss": 0.8599,
      "step": 587270
    },
    {
      "epoch": 2.0582699849646193,
      "grad_norm": 2.765625,
      "learning_rate": 1.7439529999402896e-05,
      "loss": 0.8116,
      "step": 587280
    },
    {
      "epoch": 2.058305032471515,
      "grad_norm": 2.53125,
      "learning_rate": 1.7438880970739194e-05,
      "loss": 0.7534,
      "step": 587290
    },
    {
      "epoch": 2.058340079978411,
      "grad_norm": 3.078125,
      "learning_rate": 1.7438231942075492e-05,
      "loss": 0.7484,
      "step": 587300
    },
    {
      "epoch": 2.058375127485306,
      "grad_norm": 2.921875,
      "learning_rate": 1.7437582913411787e-05,
      "loss": 0.8589,
      "step": 587310
    },
    {
      "epoch": 2.058410174992202,
      "grad_norm": 3.21875,
      "learning_rate": 1.7436933884748085e-05,
      "loss": 0.8017,
      "step": 587320
    },
    {
      "epoch": 2.0584452224990977,
      "grad_norm": 3.265625,
      "learning_rate": 1.7436284856084383e-05,
      "loss": 0.7826,
      "step": 587330
    },
    {
      "epoch": 2.058480270005993,
      "grad_norm": 2.8125,
      "learning_rate": 1.743563582742068e-05,
      "loss": 0.7728,
      "step": 587340
    },
    {
      "epoch": 2.0585153175128887,
      "grad_norm": 2.75,
      "learning_rate": 1.7434986798756982e-05,
      "loss": 0.8281,
      "step": 587350
    },
    {
      "epoch": 2.0585503650197845,
      "grad_norm": 2.953125,
      "learning_rate": 1.743433777009328e-05,
      "loss": 0.6964,
      "step": 587360
    },
    {
      "epoch": 2.05858541252668,
      "grad_norm": 2.953125,
      "learning_rate": 1.7433688741429578e-05,
      "loss": 0.7932,
      "step": 587370
    },
    {
      "epoch": 2.0586204600335756,
      "grad_norm": 2.84375,
      "learning_rate": 1.7433039712765876e-05,
      "loss": 0.8467,
      "step": 587380
    },
    {
      "epoch": 2.058655507540471,
      "grad_norm": 2.90625,
      "learning_rate": 1.7432390684102174e-05,
      "loss": 0.8221,
      "step": 587390
    },
    {
      "epoch": 2.0586905550473666,
      "grad_norm": 2.65625,
      "learning_rate": 1.7431741655438472e-05,
      "loss": 0.827,
      "step": 587400
    },
    {
      "epoch": 2.0587256025542624,
      "grad_norm": 2.8125,
      "learning_rate": 1.743109262677477e-05,
      "loss": 0.7728,
      "step": 587410
    },
    {
      "epoch": 2.0587606500611577,
      "grad_norm": 2.390625,
      "learning_rate": 1.7430443598111068e-05,
      "loss": 0.8176,
      "step": 587420
    },
    {
      "epoch": 2.0587956975680535,
      "grad_norm": 3.15625,
      "learning_rate": 1.7429794569447366e-05,
      "loss": 0.7721,
      "step": 587430
    },
    {
      "epoch": 2.0588307450749492,
      "grad_norm": 2.8125,
      "learning_rate": 1.7429145540783664e-05,
      "loss": 0.7829,
      "step": 587440
    },
    {
      "epoch": 2.0588657925818445,
      "grad_norm": 2.984375,
      "learning_rate": 1.7428496512119962e-05,
      "loss": 0.771,
      "step": 587450
    },
    {
      "epoch": 2.0589008400887403,
      "grad_norm": 2.796875,
      "learning_rate": 1.742784748345626e-05,
      "loss": 0.756,
      "step": 587460
    },
    {
      "epoch": 2.058935887595636,
      "grad_norm": 2.5625,
      "learning_rate": 1.7427198454792558e-05,
      "loss": 0.8835,
      "step": 587470
    },
    {
      "epoch": 2.0589709351025314,
      "grad_norm": 2.984375,
      "learning_rate": 1.7426549426128856e-05,
      "loss": 0.7983,
      "step": 587480
    },
    {
      "epoch": 2.059005982609427,
      "grad_norm": 2.8125,
      "learning_rate": 1.7425900397465157e-05,
      "loss": 0.8259,
      "step": 587490
    },
    {
      "epoch": 2.059041030116323,
      "grad_norm": 2.703125,
      "learning_rate": 1.7425251368801452e-05,
      "loss": 0.8491,
      "step": 587500
    },
    {
      "epoch": 2.059076077623218,
      "grad_norm": 2.765625,
      "learning_rate": 1.742460234013775e-05,
      "loss": 0.7744,
      "step": 587510
    },
    {
      "epoch": 2.059111125130114,
      "grad_norm": 2.90625,
      "learning_rate": 1.7423953311474048e-05,
      "loss": 0.8168,
      "step": 587520
    },
    {
      "epoch": 2.0591461726370093,
      "grad_norm": 2.828125,
      "learning_rate": 1.7423304282810346e-05,
      "loss": 0.7578,
      "step": 587530
    },
    {
      "epoch": 2.059181220143905,
      "grad_norm": 3.265625,
      "learning_rate": 1.7422655254146644e-05,
      "loss": 0.8647,
      "step": 587540
    },
    {
      "epoch": 2.059216267650801,
      "grad_norm": 3.171875,
      "learning_rate": 1.7422006225482942e-05,
      "loss": 0.8264,
      "step": 587550
    },
    {
      "epoch": 2.059251315157696,
      "grad_norm": 2.734375,
      "learning_rate": 1.742135719681924e-05,
      "loss": 0.7146,
      "step": 587560
    },
    {
      "epoch": 2.059286362664592,
      "grad_norm": 3.3125,
      "learning_rate": 1.7420708168155538e-05,
      "loss": 0.8964,
      "step": 587570
    },
    {
      "epoch": 2.0593214101714876,
      "grad_norm": 2.921875,
      "learning_rate": 1.7420059139491836e-05,
      "loss": 0.7693,
      "step": 587580
    },
    {
      "epoch": 2.059356457678383,
      "grad_norm": 2.71875,
      "learning_rate": 1.7419410110828134e-05,
      "loss": 0.8303,
      "step": 587590
    },
    {
      "epoch": 2.0593915051852787,
      "grad_norm": 2.890625,
      "learning_rate": 1.7418761082164432e-05,
      "loss": 0.9041,
      "step": 587600
    },
    {
      "epoch": 2.0594265526921745,
      "grad_norm": 3.09375,
      "learning_rate": 1.7418112053500733e-05,
      "loss": 0.8068,
      "step": 587610
    },
    {
      "epoch": 2.0594616001990698,
      "grad_norm": 3.09375,
      "learning_rate": 1.741746302483703e-05,
      "loss": 0.9114,
      "step": 587620
    },
    {
      "epoch": 2.0594966477059655,
      "grad_norm": 2.625,
      "learning_rate": 1.741681399617333e-05,
      "loss": 0.8621,
      "step": 587630
    },
    {
      "epoch": 2.059531695212861,
      "grad_norm": 2.75,
      "learning_rate": 1.7416164967509627e-05,
      "loss": 0.7443,
      "step": 587640
    },
    {
      "epoch": 2.0595667427197566,
      "grad_norm": 3.234375,
      "learning_rate": 1.7415515938845925e-05,
      "loss": 0.9161,
      "step": 587650
    },
    {
      "epoch": 2.0596017902266524,
      "grad_norm": 3.0,
      "learning_rate": 1.7414866910182223e-05,
      "loss": 0.7327,
      "step": 587660
    },
    {
      "epoch": 2.0596368377335477,
      "grad_norm": 3.21875,
      "learning_rate": 1.741421788151852e-05,
      "loss": 0.7974,
      "step": 587670
    },
    {
      "epoch": 2.0596718852404434,
      "grad_norm": 3.0625,
      "learning_rate": 1.741356885285482e-05,
      "loss": 0.8414,
      "step": 587680
    },
    {
      "epoch": 2.059706932747339,
      "grad_norm": 3.109375,
      "learning_rate": 1.7412919824191114e-05,
      "loss": 0.85,
      "step": 587690
    },
    {
      "epoch": 2.0597419802542345,
      "grad_norm": 2.75,
      "learning_rate": 1.7412270795527412e-05,
      "loss": 0.8868,
      "step": 587700
    },
    {
      "epoch": 2.0597770277611303,
      "grad_norm": 2.71875,
      "learning_rate": 1.741162176686371e-05,
      "loss": 0.7514,
      "step": 587710
    },
    {
      "epoch": 2.059812075268026,
      "grad_norm": 2.953125,
      "learning_rate": 1.741097273820001e-05,
      "loss": 0.826,
      "step": 587720
    },
    {
      "epoch": 2.0598471227749213,
      "grad_norm": 3.203125,
      "learning_rate": 1.741032370953631e-05,
      "loss": 0.7713,
      "step": 587730
    },
    {
      "epoch": 2.059882170281817,
      "grad_norm": 3.09375,
      "learning_rate": 1.7409674680872607e-05,
      "loss": 0.7711,
      "step": 587740
    },
    {
      "epoch": 2.0599172177887124,
      "grad_norm": 2.953125,
      "learning_rate": 1.7409025652208905e-05,
      "loss": 0.8585,
      "step": 587750
    },
    {
      "epoch": 2.059952265295608,
      "grad_norm": 2.390625,
      "learning_rate": 1.7408376623545203e-05,
      "loss": 0.8503,
      "step": 587760
    },
    {
      "epoch": 2.059987312802504,
      "grad_norm": 2.5625,
      "learning_rate": 1.74077275948815e-05,
      "loss": 0.6888,
      "step": 587770
    },
    {
      "epoch": 2.0600223603093992,
      "grad_norm": 3.171875,
      "learning_rate": 1.74070785662178e-05,
      "loss": 0.842,
      "step": 587780
    },
    {
      "epoch": 2.060057407816295,
      "grad_norm": 2.78125,
      "learning_rate": 1.7406429537554097e-05,
      "loss": 0.8754,
      "step": 587790
    },
    {
      "epoch": 2.0600924553231907,
      "grad_norm": 3.203125,
      "learning_rate": 1.7405780508890395e-05,
      "loss": 0.9399,
      "step": 587800
    },
    {
      "epoch": 2.060127502830086,
      "grad_norm": 3.234375,
      "learning_rate": 1.7405131480226693e-05,
      "loss": 0.903,
      "step": 587810
    },
    {
      "epoch": 2.060162550336982,
      "grad_norm": 2.828125,
      "learning_rate": 1.740448245156299e-05,
      "loss": 0.8361,
      "step": 587820
    },
    {
      "epoch": 2.0601975978438776,
      "grad_norm": 2.75,
      "learning_rate": 1.740383342289929e-05,
      "loss": 0.898,
      "step": 587830
    },
    {
      "epoch": 2.060232645350773,
      "grad_norm": 3.1875,
      "learning_rate": 1.7403184394235587e-05,
      "loss": 0.7993,
      "step": 587840
    },
    {
      "epoch": 2.0602676928576686,
      "grad_norm": 3.09375,
      "learning_rate": 1.7402535365571885e-05,
      "loss": 0.8799,
      "step": 587850
    },
    {
      "epoch": 2.060302740364564,
      "grad_norm": 3.109375,
      "learning_rate": 1.7401886336908187e-05,
      "loss": 0.9076,
      "step": 587860
    },
    {
      "epoch": 2.0603377878714597,
      "grad_norm": 3.203125,
      "learning_rate": 1.7401237308244485e-05,
      "loss": 0.8393,
      "step": 587870
    },
    {
      "epoch": 2.0603728353783555,
      "grad_norm": 2.796875,
      "learning_rate": 1.740058827958078e-05,
      "loss": 0.802,
      "step": 587880
    },
    {
      "epoch": 2.060407882885251,
      "grad_norm": 2.453125,
      "learning_rate": 1.7399939250917077e-05,
      "loss": 0.875,
      "step": 587890
    },
    {
      "epoch": 2.0604429303921465,
      "grad_norm": 2.40625,
      "learning_rate": 1.7399290222253375e-05,
      "loss": 0.8179,
      "step": 587900
    },
    {
      "epoch": 2.0604779778990423,
      "grad_norm": 2.546875,
      "learning_rate": 1.7398641193589673e-05,
      "loss": 0.7717,
      "step": 587910
    },
    {
      "epoch": 2.0605130254059376,
      "grad_norm": 2.65625,
      "learning_rate": 1.739799216492597e-05,
      "loss": 0.7861,
      "step": 587920
    },
    {
      "epoch": 2.0605480729128334,
      "grad_norm": 2.921875,
      "learning_rate": 1.739734313626227e-05,
      "loss": 0.7927,
      "step": 587930
    },
    {
      "epoch": 2.060583120419729,
      "grad_norm": 2.21875,
      "learning_rate": 1.7396694107598567e-05,
      "loss": 0.8606,
      "step": 587940
    },
    {
      "epoch": 2.0606181679266244,
      "grad_norm": 2.953125,
      "learning_rate": 1.7396045078934865e-05,
      "loss": 0.8483,
      "step": 587950
    },
    {
      "epoch": 2.06065321543352,
      "grad_norm": 2.765625,
      "learning_rate": 1.7395396050271163e-05,
      "loss": 0.9351,
      "step": 587960
    },
    {
      "epoch": 2.0606882629404155,
      "grad_norm": 3.265625,
      "learning_rate": 1.7394747021607465e-05,
      "loss": 0.8251,
      "step": 587970
    },
    {
      "epoch": 2.0607233104473113,
      "grad_norm": 2.796875,
      "learning_rate": 1.7394097992943763e-05,
      "loss": 0.8058,
      "step": 587980
    },
    {
      "epoch": 2.060758357954207,
      "grad_norm": 2.953125,
      "learning_rate": 1.739344896428006e-05,
      "loss": 0.7979,
      "step": 587990
    },
    {
      "epoch": 2.0607934054611023,
      "grad_norm": 3.0,
      "learning_rate": 1.739279993561636e-05,
      "loss": 0.7796,
      "step": 588000
    },
    {
      "epoch": 2.060828452967998,
      "grad_norm": 3.046875,
      "learning_rate": 1.7392150906952657e-05,
      "loss": 0.8867,
      "step": 588010
    },
    {
      "epoch": 2.060863500474894,
      "grad_norm": 2.59375,
      "learning_rate": 1.7391501878288955e-05,
      "loss": 0.7942,
      "step": 588020
    },
    {
      "epoch": 2.060898547981789,
      "grad_norm": 2.546875,
      "learning_rate": 1.7390852849625253e-05,
      "loss": 0.8009,
      "step": 588030
    },
    {
      "epoch": 2.060933595488685,
      "grad_norm": 2.671875,
      "learning_rate": 1.739020382096155e-05,
      "loss": 0.8088,
      "step": 588040
    },
    {
      "epoch": 2.0609686429955807,
      "grad_norm": 3.25,
      "learning_rate": 1.738955479229785e-05,
      "loss": 0.8861,
      "step": 588050
    },
    {
      "epoch": 2.061003690502476,
      "grad_norm": 2.890625,
      "learning_rate": 1.7388905763634143e-05,
      "loss": 0.798,
      "step": 588060
    },
    {
      "epoch": 2.0610387380093718,
      "grad_norm": 3.109375,
      "learning_rate": 1.738825673497044e-05,
      "loss": 0.8182,
      "step": 588070
    },
    {
      "epoch": 2.0610737855162675,
      "grad_norm": 2.625,
      "learning_rate": 1.738760770630674e-05,
      "loss": 0.7864,
      "step": 588080
    },
    {
      "epoch": 2.061108833023163,
      "grad_norm": 3.140625,
      "learning_rate": 1.738695867764304e-05,
      "loss": 0.8204,
      "step": 588090
    },
    {
      "epoch": 2.0611438805300586,
      "grad_norm": 3.109375,
      "learning_rate": 1.738630964897934e-05,
      "loss": 0.884,
      "step": 588100
    },
    {
      "epoch": 2.061178928036954,
      "grad_norm": 2.96875,
      "learning_rate": 1.7385660620315637e-05,
      "loss": 0.8146,
      "step": 588110
    },
    {
      "epoch": 2.0612139755438497,
      "grad_norm": 2.65625,
      "learning_rate": 1.7385011591651935e-05,
      "loss": 0.8226,
      "step": 588120
    },
    {
      "epoch": 2.0612490230507454,
      "grad_norm": 3.21875,
      "learning_rate": 1.7384362562988233e-05,
      "loss": 0.8739,
      "step": 588130
    },
    {
      "epoch": 2.0612840705576407,
      "grad_norm": 2.90625,
      "learning_rate": 1.738371353432453e-05,
      "loss": 0.742,
      "step": 588140
    },
    {
      "epoch": 2.0613191180645365,
      "grad_norm": 2.953125,
      "learning_rate": 1.738306450566083e-05,
      "loss": 0.897,
      "step": 588150
    },
    {
      "epoch": 2.0613541655714323,
      "grad_norm": 3.328125,
      "learning_rate": 1.7382415476997127e-05,
      "loss": 0.8815,
      "step": 588160
    },
    {
      "epoch": 2.0613892130783276,
      "grad_norm": 3.046875,
      "learning_rate": 1.7381766448333425e-05,
      "loss": 0.8072,
      "step": 588170
    },
    {
      "epoch": 2.0614242605852233,
      "grad_norm": 3.1875,
      "learning_rate": 1.7381117419669723e-05,
      "loss": 0.9495,
      "step": 588180
    },
    {
      "epoch": 2.061459308092119,
      "grad_norm": 2.671875,
      "learning_rate": 1.738046839100602e-05,
      "loss": 0.7847,
      "step": 588190
    },
    {
      "epoch": 2.0614943555990144,
      "grad_norm": 2.890625,
      "learning_rate": 1.737981936234232e-05,
      "loss": 0.8045,
      "step": 588200
    },
    {
      "epoch": 2.06152940310591,
      "grad_norm": 2.765625,
      "learning_rate": 1.7379170333678617e-05,
      "loss": 0.7691,
      "step": 588210
    },
    {
      "epoch": 2.0615644506128055,
      "grad_norm": 3.015625,
      "learning_rate": 1.7378521305014918e-05,
      "loss": 0.7783,
      "step": 588220
    },
    {
      "epoch": 2.0615994981197012,
      "grad_norm": 2.96875,
      "learning_rate": 1.7377872276351216e-05,
      "loss": 0.834,
      "step": 588230
    },
    {
      "epoch": 2.061634545626597,
      "grad_norm": 3.15625,
      "learning_rate": 1.7377223247687514e-05,
      "loss": 0.8126,
      "step": 588240
    },
    {
      "epoch": 2.0616695931334923,
      "grad_norm": 2.734375,
      "learning_rate": 1.737657421902381e-05,
      "loss": 0.862,
      "step": 588250
    },
    {
      "epoch": 2.061704640640388,
      "grad_norm": 3.09375,
      "learning_rate": 1.7375925190360107e-05,
      "loss": 0.8486,
      "step": 588260
    },
    {
      "epoch": 2.061739688147284,
      "grad_norm": 2.65625,
      "learning_rate": 1.7375276161696405e-05,
      "loss": 0.849,
      "step": 588270
    },
    {
      "epoch": 2.061774735654179,
      "grad_norm": 3.140625,
      "learning_rate": 1.7374627133032703e-05,
      "loss": 0.8614,
      "step": 588280
    },
    {
      "epoch": 2.061809783161075,
      "grad_norm": 2.796875,
      "learning_rate": 1.7373978104369e-05,
      "loss": 0.7745,
      "step": 588290
    },
    {
      "epoch": 2.0618448306679706,
      "grad_norm": 2.890625,
      "learning_rate": 1.73733290757053e-05,
      "loss": 0.7331,
      "step": 588300
    },
    {
      "epoch": 2.061879878174866,
      "grad_norm": 2.625,
      "learning_rate": 1.7372680047041597e-05,
      "loss": 0.7745,
      "step": 588310
    },
    {
      "epoch": 2.0619149256817617,
      "grad_norm": 2.40625,
      "learning_rate": 1.7372031018377895e-05,
      "loss": 0.7809,
      "step": 588320
    },
    {
      "epoch": 2.061949973188657,
      "grad_norm": 2.71875,
      "learning_rate": 1.7371381989714193e-05,
      "loss": 0.7532,
      "step": 588330
    },
    {
      "epoch": 2.061985020695553,
      "grad_norm": 2.671875,
      "learning_rate": 1.7370732961050494e-05,
      "loss": 0.7628,
      "step": 588340
    },
    {
      "epoch": 2.0620200682024485,
      "grad_norm": 2.765625,
      "learning_rate": 1.7370083932386792e-05,
      "loss": 0.8083,
      "step": 588350
    },
    {
      "epoch": 2.062055115709344,
      "grad_norm": 3.265625,
      "learning_rate": 1.736943490372309e-05,
      "loss": 0.7984,
      "step": 588360
    },
    {
      "epoch": 2.0620901632162396,
      "grad_norm": 2.703125,
      "learning_rate": 1.7368785875059388e-05,
      "loss": 0.8249,
      "step": 588370
    },
    {
      "epoch": 2.0621252107231354,
      "grad_norm": 2.625,
      "learning_rate": 1.7368136846395686e-05,
      "loss": 0.8576,
      "step": 588380
    },
    {
      "epoch": 2.0621602582300307,
      "grad_norm": 2.828125,
      "learning_rate": 1.7367487817731984e-05,
      "loss": 0.8805,
      "step": 588390
    },
    {
      "epoch": 2.0621953057369264,
      "grad_norm": 2.859375,
      "learning_rate": 1.7366838789068282e-05,
      "loss": 0.8692,
      "step": 588400
    },
    {
      "epoch": 2.062230353243822,
      "grad_norm": 3.296875,
      "learning_rate": 1.736618976040458e-05,
      "loss": 0.8202,
      "step": 588410
    },
    {
      "epoch": 2.0622654007507175,
      "grad_norm": 2.953125,
      "learning_rate": 1.7365540731740878e-05,
      "loss": 0.7947,
      "step": 588420
    },
    {
      "epoch": 2.0623004482576133,
      "grad_norm": 2.71875,
      "learning_rate": 1.7364891703077176e-05,
      "loss": 0.9024,
      "step": 588430
    },
    {
      "epoch": 2.0623354957645086,
      "grad_norm": 3.125,
      "learning_rate": 1.736424267441347e-05,
      "loss": 0.7778,
      "step": 588440
    },
    {
      "epoch": 2.0623705432714043,
      "grad_norm": 2.625,
      "learning_rate": 1.7363593645749772e-05,
      "loss": 0.9302,
      "step": 588450
    },
    {
      "epoch": 2.0624055907783,
      "grad_norm": 2.546875,
      "learning_rate": 1.736294461708607e-05,
      "loss": 0.8436,
      "step": 588460
    },
    {
      "epoch": 2.0624406382851954,
      "grad_norm": 2.4375,
      "learning_rate": 1.7362295588422368e-05,
      "loss": 0.7212,
      "step": 588470
    },
    {
      "epoch": 2.062475685792091,
      "grad_norm": 2.890625,
      "learning_rate": 1.7361646559758666e-05,
      "loss": 0.7849,
      "step": 588480
    },
    {
      "epoch": 2.062510733298987,
      "grad_norm": 2.5625,
      "learning_rate": 1.7360997531094964e-05,
      "loss": 0.8303,
      "step": 588490
    },
    {
      "epoch": 2.0625457808058822,
      "grad_norm": 3.0,
      "learning_rate": 1.7360348502431262e-05,
      "loss": 0.9301,
      "step": 588500
    },
    {
      "epoch": 2.062580828312778,
      "grad_norm": 3.0625,
      "learning_rate": 1.735969947376756e-05,
      "loss": 0.8938,
      "step": 588510
    },
    {
      "epoch": 2.0626158758196738,
      "grad_norm": 2.609375,
      "learning_rate": 1.7359050445103858e-05,
      "loss": 0.8077,
      "step": 588520
    },
    {
      "epoch": 2.062650923326569,
      "grad_norm": 3.359375,
      "learning_rate": 1.7358401416440156e-05,
      "loss": 0.795,
      "step": 588530
    },
    {
      "epoch": 2.062685970833465,
      "grad_norm": 2.921875,
      "learning_rate": 1.7357752387776454e-05,
      "loss": 0.7908,
      "step": 588540
    },
    {
      "epoch": 2.06272101834036,
      "grad_norm": 3.09375,
      "learning_rate": 1.7357103359112752e-05,
      "loss": 0.871,
      "step": 588550
    },
    {
      "epoch": 2.062756065847256,
      "grad_norm": 3.125,
      "learning_rate": 1.735645433044905e-05,
      "loss": 0.793,
      "step": 588560
    },
    {
      "epoch": 2.0627911133541517,
      "grad_norm": 2.71875,
      "learning_rate": 1.7355805301785348e-05,
      "loss": 0.7661,
      "step": 588570
    },
    {
      "epoch": 2.062826160861047,
      "grad_norm": 2.84375,
      "learning_rate": 1.7355156273121646e-05,
      "loss": 0.8113,
      "step": 588580
    },
    {
      "epoch": 2.0628612083679427,
      "grad_norm": 2.75,
      "learning_rate": 1.7354507244457948e-05,
      "loss": 0.7577,
      "step": 588590
    },
    {
      "epoch": 2.0628962558748385,
      "grad_norm": 3.3125,
      "learning_rate": 1.7353858215794246e-05,
      "loss": 0.8774,
      "step": 588600
    },
    {
      "epoch": 2.062931303381734,
      "grad_norm": 3.03125,
      "learning_rate": 1.7353209187130544e-05,
      "loss": 0.8195,
      "step": 588610
    },
    {
      "epoch": 2.0629663508886296,
      "grad_norm": 3.234375,
      "learning_rate": 1.735256015846684e-05,
      "loss": 0.8166,
      "step": 588620
    },
    {
      "epoch": 2.0630013983955253,
      "grad_norm": 2.953125,
      "learning_rate": 1.7351911129803136e-05,
      "loss": 0.7911,
      "step": 588630
    },
    {
      "epoch": 2.0630364459024206,
      "grad_norm": 3.09375,
      "learning_rate": 1.7351262101139434e-05,
      "loss": 0.771,
      "step": 588640
    },
    {
      "epoch": 2.0630714934093164,
      "grad_norm": 3.46875,
      "learning_rate": 1.7350613072475732e-05,
      "loss": 0.8647,
      "step": 588650
    },
    {
      "epoch": 2.0631065409162117,
      "grad_norm": 2.828125,
      "learning_rate": 1.734996404381203e-05,
      "loss": 0.7958,
      "step": 588660
    },
    {
      "epoch": 2.0631415884231075,
      "grad_norm": 2.515625,
      "learning_rate": 1.7349315015148328e-05,
      "loss": 0.7681,
      "step": 588670
    },
    {
      "epoch": 2.0631766359300032,
      "grad_norm": 3.0625,
      "learning_rate": 1.7348665986484626e-05,
      "loss": 0.8112,
      "step": 588680
    },
    {
      "epoch": 2.0632116834368985,
      "grad_norm": 2.546875,
      "learning_rate": 1.7348016957820924e-05,
      "loss": 0.8202,
      "step": 588690
    },
    {
      "epoch": 2.0632467309437943,
      "grad_norm": 2.375,
      "learning_rate": 1.7347367929157222e-05,
      "loss": 0.8565,
      "step": 588700
    },
    {
      "epoch": 2.06328177845069,
      "grad_norm": 2.640625,
      "learning_rate": 1.7346718900493524e-05,
      "loss": 0.8692,
      "step": 588710
    },
    {
      "epoch": 2.0633168259575854,
      "grad_norm": 3.640625,
      "learning_rate": 1.734606987182982e-05,
      "loss": 0.8929,
      "step": 588720
    },
    {
      "epoch": 2.063351873464481,
      "grad_norm": 2.96875,
      "learning_rate": 1.734542084316612e-05,
      "loss": 0.89,
      "step": 588730
    },
    {
      "epoch": 2.063386920971377,
      "grad_norm": 2.796875,
      "learning_rate": 1.7344771814502418e-05,
      "loss": 0.8628,
      "step": 588740
    },
    {
      "epoch": 2.063421968478272,
      "grad_norm": 2.9375,
      "learning_rate": 1.7344122785838716e-05,
      "loss": 0.8523,
      "step": 588750
    },
    {
      "epoch": 2.063457015985168,
      "grad_norm": 2.640625,
      "learning_rate": 1.7343473757175014e-05,
      "loss": 0.8845,
      "step": 588760
    },
    {
      "epoch": 2.0634920634920633,
      "grad_norm": 2.859375,
      "learning_rate": 1.734282472851131e-05,
      "loss": 0.8256,
      "step": 588770
    },
    {
      "epoch": 2.063527110998959,
      "grad_norm": 2.828125,
      "learning_rate": 1.734217569984761e-05,
      "loss": 0.8183,
      "step": 588780
    },
    {
      "epoch": 2.063562158505855,
      "grad_norm": 3.0625,
      "learning_rate": 1.7341526671183908e-05,
      "loss": 0.872,
      "step": 588790
    },
    {
      "epoch": 2.06359720601275,
      "grad_norm": 2.53125,
      "learning_rate": 1.7340877642520206e-05,
      "loss": 0.7697,
      "step": 588800
    },
    {
      "epoch": 2.063632253519646,
      "grad_norm": 2.984375,
      "learning_rate": 1.7340228613856504e-05,
      "loss": 0.8644,
      "step": 588810
    },
    {
      "epoch": 2.0636673010265416,
      "grad_norm": 2.984375,
      "learning_rate": 1.73395795851928e-05,
      "loss": 0.8455,
      "step": 588820
    },
    {
      "epoch": 2.063702348533437,
      "grad_norm": 2.9375,
      "learning_rate": 1.73389305565291e-05,
      "loss": 0.8318,
      "step": 588830
    },
    {
      "epoch": 2.0637373960403327,
      "grad_norm": 2.859375,
      "learning_rate": 1.7338281527865398e-05,
      "loss": 0.7475,
      "step": 588840
    },
    {
      "epoch": 2.0637724435472284,
      "grad_norm": 2.9375,
      "learning_rate": 1.7337632499201696e-05,
      "loss": 0.8331,
      "step": 588850
    },
    {
      "epoch": 2.0638074910541238,
      "grad_norm": 3.03125,
      "learning_rate": 1.7336983470537994e-05,
      "loss": 0.8604,
      "step": 588860
    },
    {
      "epoch": 2.0638425385610195,
      "grad_norm": 3.078125,
      "learning_rate": 1.733633444187429e-05,
      "loss": 0.8956,
      "step": 588870
    },
    {
      "epoch": 2.0638775860679153,
      "grad_norm": 2.859375,
      "learning_rate": 1.733568541321059e-05,
      "loss": 0.7923,
      "step": 588880
    },
    {
      "epoch": 2.0639126335748106,
      "grad_norm": 2.609375,
      "learning_rate": 1.7335036384546888e-05,
      "loss": 0.6975,
      "step": 588890
    },
    {
      "epoch": 2.0639476810817063,
      "grad_norm": 3.03125,
      "learning_rate": 1.7334387355883186e-05,
      "loss": 0.8157,
      "step": 588900
    },
    {
      "epoch": 2.0639827285886017,
      "grad_norm": 2.890625,
      "learning_rate": 1.7333738327219484e-05,
      "loss": 0.8921,
      "step": 588910
    },
    {
      "epoch": 2.0640177760954974,
      "grad_norm": 3.015625,
      "learning_rate": 1.733308929855578e-05,
      "loss": 0.829,
      "step": 588920
    },
    {
      "epoch": 2.064052823602393,
      "grad_norm": 3.0625,
      "learning_rate": 1.733244026989208e-05,
      "loss": 0.8052,
      "step": 588930
    },
    {
      "epoch": 2.0640878711092885,
      "grad_norm": 2.984375,
      "learning_rate": 1.7331791241228378e-05,
      "loss": 0.8317,
      "step": 588940
    },
    {
      "epoch": 2.0641229186161842,
      "grad_norm": 3.09375,
      "learning_rate": 1.7331142212564676e-05,
      "loss": 0.812,
      "step": 588950
    },
    {
      "epoch": 2.06415796612308,
      "grad_norm": 3.171875,
      "learning_rate": 1.7330493183900977e-05,
      "loss": 0.7811,
      "step": 588960
    },
    {
      "epoch": 2.0641930136299753,
      "grad_norm": 2.859375,
      "learning_rate": 1.7329844155237275e-05,
      "loss": 0.8653,
      "step": 588970
    },
    {
      "epoch": 2.064228061136871,
      "grad_norm": 3.078125,
      "learning_rate": 1.7329195126573573e-05,
      "loss": 0.8859,
      "step": 588980
    },
    {
      "epoch": 2.064263108643767,
      "grad_norm": 2.515625,
      "learning_rate": 1.732854609790987e-05,
      "loss": 0.7858,
      "step": 588990
    },
    {
      "epoch": 2.064298156150662,
      "grad_norm": 2.875,
      "learning_rate": 1.732789706924617e-05,
      "loss": 0.8077,
      "step": 589000
    },
    {
      "epoch": 2.064333203657558,
      "grad_norm": 2.984375,
      "learning_rate": 1.7327248040582464e-05,
      "loss": 0.7719,
      "step": 589010
    },
    {
      "epoch": 2.064368251164453,
      "grad_norm": 3.046875,
      "learning_rate": 1.732659901191876e-05,
      "loss": 0.828,
      "step": 589020
    },
    {
      "epoch": 2.064403298671349,
      "grad_norm": 2.546875,
      "learning_rate": 1.732594998325506e-05,
      "loss": 0.7079,
      "step": 589030
    },
    {
      "epoch": 2.0644383461782447,
      "grad_norm": 2.5625,
      "learning_rate": 1.7325300954591358e-05,
      "loss": 0.7823,
      "step": 589040
    },
    {
      "epoch": 2.06447339368514,
      "grad_norm": 2.90625,
      "learning_rate": 1.7324651925927656e-05,
      "loss": 0.8421,
      "step": 589050
    },
    {
      "epoch": 2.064508441192036,
      "grad_norm": 2.78125,
      "learning_rate": 1.7324002897263954e-05,
      "loss": 0.7828,
      "step": 589060
    },
    {
      "epoch": 2.0645434886989316,
      "grad_norm": 2.984375,
      "learning_rate": 1.7323353868600255e-05,
      "loss": 0.8888,
      "step": 589070
    },
    {
      "epoch": 2.064578536205827,
      "grad_norm": 2.765625,
      "learning_rate": 1.7322704839936553e-05,
      "loss": 0.8744,
      "step": 589080
    },
    {
      "epoch": 2.0646135837127226,
      "grad_norm": 3.5625,
      "learning_rate": 1.732205581127285e-05,
      "loss": 0.8062,
      "step": 589090
    },
    {
      "epoch": 2.0646486312196184,
      "grad_norm": 2.359375,
      "learning_rate": 1.732140678260915e-05,
      "loss": 0.7193,
      "step": 589100
    },
    {
      "epoch": 2.0646836787265137,
      "grad_norm": 2.734375,
      "learning_rate": 1.7320757753945447e-05,
      "loss": 0.8402,
      "step": 589110
    },
    {
      "epoch": 2.0647187262334095,
      "grad_norm": 3.03125,
      "learning_rate": 1.7320108725281745e-05,
      "loss": 0.7783,
      "step": 589120
    },
    {
      "epoch": 2.0647537737403048,
      "grad_norm": 3.125,
      "learning_rate": 1.7319459696618043e-05,
      "loss": 0.8444,
      "step": 589130
    },
    {
      "epoch": 2.0647888212472005,
      "grad_norm": 2.59375,
      "learning_rate": 1.731881066795434e-05,
      "loss": 0.7967,
      "step": 589140
    },
    {
      "epoch": 2.0648238687540963,
      "grad_norm": 3.25,
      "learning_rate": 1.731816163929064e-05,
      "loss": 0.8484,
      "step": 589150
    },
    {
      "epoch": 2.0648589162609916,
      "grad_norm": 2.21875,
      "learning_rate": 1.7317512610626937e-05,
      "loss": 0.7495,
      "step": 589160
    },
    {
      "epoch": 2.0648939637678874,
      "grad_norm": 3.03125,
      "learning_rate": 1.7316863581963235e-05,
      "loss": 0.8844,
      "step": 589170
    },
    {
      "epoch": 2.064929011274783,
      "grad_norm": 3.421875,
      "learning_rate": 1.7316214553299533e-05,
      "loss": 0.8043,
      "step": 589180
    },
    {
      "epoch": 2.0649640587816784,
      "grad_norm": 3.0,
      "learning_rate": 1.731556552463583e-05,
      "loss": 0.8007,
      "step": 589190
    },
    {
      "epoch": 2.064999106288574,
      "grad_norm": 2.9375,
      "learning_rate": 1.731491649597213e-05,
      "loss": 0.8195,
      "step": 589200
    },
    {
      "epoch": 2.06503415379547,
      "grad_norm": 3.03125,
      "learning_rate": 1.7314267467308427e-05,
      "loss": 0.7257,
      "step": 589210
    },
    {
      "epoch": 2.0650692013023653,
      "grad_norm": 2.828125,
      "learning_rate": 1.7313618438644725e-05,
      "loss": 0.8439,
      "step": 589220
    },
    {
      "epoch": 2.065104248809261,
      "grad_norm": 2.984375,
      "learning_rate": 1.7312969409981023e-05,
      "loss": 0.8371,
      "step": 589230
    },
    {
      "epoch": 2.0651392963161563,
      "grad_norm": 2.984375,
      "learning_rate": 1.731232038131732e-05,
      "loss": 0.8196,
      "step": 589240
    },
    {
      "epoch": 2.065174343823052,
      "grad_norm": 3.234375,
      "learning_rate": 1.731167135265362e-05,
      "loss": 0.8485,
      "step": 589250
    },
    {
      "epoch": 2.065209391329948,
      "grad_norm": 2.828125,
      "learning_rate": 1.7311022323989917e-05,
      "loss": 0.8466,
      "step": 589260
    },
    {
      "epoch": 2.065244438836843,
      "grad_norm": 2.59375,
      "learning_rate": 1.7310373295326215e-05,
      "loss": 0.862,
      "step": 589270
    },
    {
      "epoch": 2.065279486343739,
      "grad_norm": 2.515625,
      "learning_rate": 1.7309724266662513e-05,
      "loss": 0.8475,
      "step": 589280
    },
    {
      "epoch": 2.0653145338506347,
      "grad_norm": 2.34375,
      "learning_rate": 1.730907523799881e-05,
      "loss": 0.808,
      "step": 589290
    },
    {
      "epoch": 2.06534958135753,
      "grad_norm": 2.78125,
      "learning_rate": 1.730842620933511e-05,
      "loss": 0.8564,
      "step": 589300
    },
    {
      "epoch": 2.0653846288644258,
      "grad_norm": 3.046875,
      "learning_rate": 1.7307777180671407e-05,
      "loss": 0.8517,
      "step": 589310
    },
    {
      "epoch": 2.0654196763713215,
      "grad_norm": 2.8125,
      "learning_rate": 1.730712815200771e-05,
      "loss": 0.9294,
      "step": 589320
    },
    {
      "epoch": 2.065454723878217,
      "grad_norm": 3.390625,
      "learning_rate": 1.7306479123344006e-05,
      "loss": 0.8589,
      "step": 589330
    },
    {
      "epoch": 2.0654897713851126,
      "grad_norm": 2.640625,
      "learning_rate": 1.7305830094680304e-05,
      "loss": 0.8366,
      "step": 589340
    },
    {
      "epoch": 2.0655248188920083,
      "grad_norm": 3.171875,
      "learning_rate": 1.7305181066016602e-05,
      "loss": 0.761,
      "step": 589350
    },
    {
      "epoch": 2.0655598663989037,
      "grad_norm": 2.765625,
      "learning_rate": 1.73045320373529e-05,
      "loss": 0.8184,
      "step": 589360
    },
    {
      "epoch": 2.0655949139057994,
      "grad_norm": 2.796875,
      "learning_rate": 1.73038830086892e-05,
      "loss": 0.8152,
      "step": 589370
    },
    {
      "epoch": 2.0656299614126947,
      "grad_norm": 3.359375,
      "learning_rate": 1.7303233980025493e-05,
      "loss": 0.8995,
      "step": 589380
    },
    {
      "epoch": 2.0656650089195905,
      "grad_norm": 2.515625,
      "learning_rate": 1.730258495136179e-05,
      "loss": 0.8942,
      "step": 589390
    },
    {
      "epoch": 2.0657000564264862,
      "grad_norm": 3.03125,
      "learning_rate": 1.730193592269809e-05,
      "loss": 0.8061,
      "step": 589400
    },
    {
      "epoch": 2.0657351039333816,
      "grad_norm": 3.046875,
      "learning_rate": 1.7301286894034387e-05,
      "loss": 0.8333,
      "step": 589410
    },
    {
      "epoch": 2.0657701514402773,
      "grad_norm": 2.703125,
      "learning_rate": 1.7300637865370685e-05,
      "loss": 0.8984,
      "step": 589420
    },
    {
      "epoch": 2.065805198947173,
      "grad_norm": 2.40625,
      "learning_rate": 1.7299988836706983e-05,
      "loss": 0.7934,
      "step": 589430
    },
    {
      "epoch": 2.0658402464540684,
      "grad_norm": 3.0,
      "learning_rate": 1.7299339808043284e-05,
      "loss": 0.8332,
      "step": 589440
    },
    {
      "epoch": 2.065875293960964,
      "grad_norm": 2.859375,
      "learning_rate": 1.7298690779379582e-05,
      "loss": 0.7352,
      "step": 589450
    },
    {
      "epoch": 2.06591034146786,
      "grad_norm": 2.859375,
      "learning_rate": 1.729804175071588e-05,
      "loss": 0.8066,
      "step": 589460
    },
    {
      "epoch": 2.065945388974755,
      "grad_norm": 3.078125,
      "learning_rate": 1.729739272205218e-05,
      "loss": 0.7502,
      "step": 589470
    },
    {
      "epoch": 2.065980436481651,
      "grad_norm": 2.75,
      "learning_rate": 1.7296743693388476e-05,
      "loss": 0.7408,
      "step": 589480
    },
    {
      "epoch": 2.0660154839885463,
      "grad_norm": 3.453125,
      "learning_rate": 1.7296094664724774e-05,
      "loss": 0.9475,
      "step": 589490
    },
    {
      "epoch": 2.066050531495442,
      "grad_norm": 3.125,
      "learning_rate": 1.7295445636061072e-05,
      "loss": 0.8705,
      "step": 589500
    },
    {
      "epoch": 2.066085579002338,
      "grad_norm": 2.546875,
      "learning_rate": 1.729479660739737e-05,
      "loss": 0.838,
      "step": 589510
    },
    {
      "epoch": 2.066120626509233,
      "grad_norm": 3.234375,
      "learning_rate": 1.729414757873367e-05,
      "loss": 0.8347,
      "step": 589520
    },
    {
      "epoch": 2.066155674016129,
      "grad_norm": 2.71875,
      "learning_rate": 1.7293498550069966e-05,
      "loss": 0.8491,
      "step": 589530
    },
    {
      "epoch": 2.0661907215230246,
      "grad_norm": 3.0,
      "learning_rate": 1.7292849521406264e-05,
      "loss": 0.856,
      "step": 589540
    },
    {
      "epoch": 2.06622576902992,
      "grad_norm": 2.8125,
      "learning_rate": 1.7292200492742562e-05,
      "loss": 0.8083,
      "step": 589550
    },
    {
      "epoch": 2.0662608165368157,
      "grad_norm": 2.984375,
      "learning_rate": 1.729155146407886e-05,
      "loss": 0.9667,
      "step": 589560
    },
    {
      "epoch": 2.0662958640437115,
      "grad_norm": 3.59375,
      "learning_rate": 1.729090243541516e-05,
      "loss": 0.7652,
      "step": 589570
    },
    {
      "epoch": 2.0663309115506068,
      "grad_norm": 2.78125,
      "learning_rate": 1.7290253406751456e-05,
      "loss": 0.8206,
      "step": 589580
    },
    {
      "epoch": 2.0663659590575025,
      "grad_norm": 3.03125,
      "learning_rate": 1.7289604378087754e-05,
      "loss": 0.8404,
      "step": 589590
    },
    {
      "epoch": 2.066401006564398,
      "grad_norm": 3.078125,
      "learning_rate": 1.7288955349424052e-05,
      "loss": 0.7873,
      "step": 589600
    },
    {
      "epoch": 2.0664360540712936,
      "grad_norm": 2.203125,
      "learning_rate": 1.728830632076035e-05,
      "loss": 0.7921,
      "step": 589610
    },
    {
      "epoch": 2.0664711015781894,
      "grad_norm": 3.4375,
      "learning_rate": 1.728765729209665e-05,
      "loss": 0.7975,
      "step": 589620
    },
    {
      "epoch": 2.0665061490850847,
      "grad_norm": 2.703125,
      "learning_rate": 1.7287008263432946e-05,
      "loss": 0.843,
      "step": 589630
    },
    {
      "epoch": 2.0665411965919804,
      "grad_norm": 2.484375,
      "learning_rate": 1.7286359234769244e-05,
      "loss": 0.8228,
      "step": 589640
    },
    {
      "epoch": 2.066576244098876,
      "grad_norm": 2.859375,
      "learning_rate": 1.7285710206105542e-05,
      "loss": 0.8129,
      "step": 589650
    },
    {
      "epoch": 2.0666112916057715,
      "grad_norm": 3.09375,
      "learning_rate": 1.728506117744184e-05,
      "loss": 0.7953,
      "step": 589660
    },
    {
      "epoch": 2.0666463391126673,
      "grad_norm": 2.609375,
      "learning_rate": 1.728441214877814e-05,
      "loss": 0.7999,
      "step": 589670
    },
    {
      "epoch": 2.066681386619563,
      "grad_norm": 2.703125,
      "learning_rate": 1.7283763120114436e-05,
      "loss": 0.8348,
      "step": 589680
    },
    {
      "epoch": 2.0667164341264583,
      "grad_norm": 2.96875,
      "learning_rate": 1.7283114091450738e-05,
      "loss": 0.7976,
      "step": 589690
    },
    {
      "epoch": 2.066751481633354,
      "grad_norm": 3.25,
      "learning_rate": 1.7282465062787036e-05,
      "loss": 0.9248,
      "step": 589700
    },
    {
      "epoch": 2.0667865291402494,
      "grad_norm": 2.84375,
      "learning_rate": 1.7281816034123334e-05,
      "loss": 0.8391,
      "step": 589710
    },
    {
      "epoch": 2.066821576647145,
      "grad_norm": 3.265625,
      "learning_rate": 1.7281167005459632e-05,
      "loss": 0.8559,
      "step": 589720
    },
    {
      "epoch": 2.066856624154041,
      "grad_norm": 3.1875,
      "learning_rate": 1.728051797679593e-05,
      "loss": 0.778,
      "step": 589730
    },
    {
      "epoch": 2.0668916716609362,
      "grad_norm": 2.9375,
      "learning_rate": 1.7279868948132228e-05,
      "loss": 0.813,
      "step": 589740
    },
    {
      "epoch": 2.066926719167832,
      "grad_norm": 2.78125,
      "learning_rate": 1.7279219919468526e-05,
      "loss": 0.7984,
      "step": 589750
    },
    {
      "epoch": 2.0669617666747278,
      "grad_norm": 2.53125,
      "learning_rate": 1.727857089080482e-05,
      "loss": 0.8162,
      "step": 589760
    },
    {
      "epoch": 2.066996814181623,
      "grad_norm": 3.25,
      "learning_rate": 1.727792186214112e-05,
      "loss": 0.8394,
      "step": 589770
    },
    {
      "epoch": 2.067031861688519,
      "grad_norm": 2.828125,
      "learning_rate": 1.7277272833477416e-05,
      "loss": 0.8593,
      "step": 589780
    },
    {
      "epoch": 2.0670669091954146,
      "grad_norm": 2.84375,
      "learning_rate": 1.7276623804813714e-05,
      "loss": 0.8163,
      "step": 589790
    },
    {
      "epoch": 2.06710195670231,
      "grad_norm": 2.65625,
      "learning_rate": 1.7275974776150016e-05,
      "loss": 0.8842,
      "step": 589800
    },
    {
      "epoch": 2.0671370042092057,
      "grad_norm": 2.9375,
      "learning_rate": 1.7275325747486314e-05,
      "loss": 0.8823,
      "step": 589810
    },
    {
      "epoch": 2.067172051716101,
      "grad_norm": 3.28125,
      "learning_rate": 1.7274676718822612e-05,
      "loss": 0.8047,
      "step": 589820
    },
    {
      "epoch": 2.0672070992229967,
      "grad_norm": 2.859375,
      "learning_rate": 1.727402769015891e-05,
      "loss": 0.7534,
      "step": 589830
    },
    {
      "epoch": 2.0672421467298925,
      "grad_norm": 2.484375,
      "learning_rate": 1.7273378661495208e-05,
      "loss": 0.6996,
      "step": 589840
    },
    {
      "epoch": 2.067277194236788,
      "grad_norm": 3.03125,
      "learning_rate": 1.7272729632831506e-05,
      "loss": 0.8729,
      "step": 589850
    },
    {
      "epoch": 2.0673122417436836,
      "grad_norm": 2.921875,
      "learning_rate": 1.7272080604167804e-05,
      "loss": 0.7695,
      "step": 589860
    },
    {
      "epoch": 2.0673472892505793,
      "grad_norm": 2.953125,
      "learning_rate": 1.7271431575504102e-05,
      "loss": 0.7717,
      "step": 589870
    },
    {
      "epoch": 2.0673823367574746,
      "grad_norm": 3.171875,
      "learning_rate": 1.72707825468404e-05,
      "loss": 0.8631,
      "step": 589880
    },
    {
      "epoch": 2.0674173842643704,
      "grad_norm": 2.765625,
      "learning_rate": 1.7270133518176698e-05,
      "loss": 0.7853,
      "step": 589890
    },
    {
      "epoch": 2.067452431771266,
      "grad_norm": 2.734375,
      "learning_rate": 1.7269484489512996e-05,
      "loss": 0.7727,
      "step": 589900
    },
    {
      "epoch": 2.0674874792781615,
      "grad_norm": 2.796875,
      "learning_rate": 1.7268835460849294e-05,
      "loss": 0.8148,
      "step": 589910
    },
    {
      "epoch": 2.067522526785057,
      "grad_norm": 2.3125,
      "learning_rate": 1.7268186432185592e-05,
      "loss": 0.8478,
      "step": 589920
    },
    {
      "epoch": 2.0675575742919525,
      "grad_norm": 2.890625,
      "learning_rate": 1.726753740352189e-05,
      "loss": 0.808,
      "step": 589930
    },
    {
      "epoch": 2.0675926217988483,
      "grad_norm": 3.0,
      "learning_rate": 1.726688837485819e-05,
      "loss": 0.8716,
      "step": 589940
    },
    {
      "epoch": 2.067627669305744,
      "grad_norm": 2.65625,
      "learning_rate": 1.7266239346194486e-05,
      "loss": 0.7876,
      "step": 589950
    },
    {
      "epoch": 2.0676627168126394,
      "grad_norm": 3.265625,
      "learning_rate": 1.7265590317530784e-05,
      "loss": 0.8719,
      "step": 589960
    },
    {
      "epoch": 2.067697764319535,
      "grad_norm": 3.3125,
      "learning_rate": 1.7264941288867082e-05,
      "loss": 0.885,
      "step": 589970
    },
    {
      "epoch": 2.067732811826431,
      "grad_norm": 2.953125,
      "learning_rate": 1.726429226020338e-05,
      "loss": 0.7592,
      "step": 589980
    },
    {
      "epoch": 2.067767859333326,
      "grad_norm": 3.03125,
      "learning_rate": 1.7263643231539678e-05,
      "loss": 0.8068,
      "step": 589990
    },
    {
      "epoch": 2.067802906840222,
      "grad_norm": 3.1875,
      "learning_rate": 1.7262994202875976e-05,
      "loss": 0.8972,
      "step": 590000
    },
    {
      "epoch": 2.067802906840222,
      "eval_loss": 0.768334150314331,
      "eval_runtime": 549.8206,
      "eval_samples_per_second": 691.927,
      "eval_steps_per_second": 57.661,
      "step": 590000
    },
    {
      "epoch": 2.0678379543471177,
      "grad_norm": 2.8125,
      "learning_rate": 1.7262345174212274e-05,
      "loss": 0.7985,
      "step": 590010
    },
    {
      "epoch": 2.067873001854013,
      "grad_norm": 2.546875,
      "learning_rate": 1.7261696145548572e-05,
      "loss": 0.8115,
      "step": 590020
    },
    {
      "epoch": 2.0679080493609088,
      "grad_norm": 2.96875,
      "learning_rate": 1.726104711688487e-05,
      "loss": 0.8602,
      "step": 590030
    },
    {
      "epoch": 2.067943096867804,
      "grad_norm": 2.890625,
      "learning_rate": 1.7260398088221168e-05,
      "loss": 0.768,
      "step": 590040
    },
    {
      "epoch": 2.0679781443747,
      "grad_norm": 2.75,
      "learning_rate": 1.7259749059557466e-05,
      "loss": 0.7558,
      "step": 590050
    },
    {
      "epoch": 2.0680131918815956,
      "grad_norm": 3.453125,
      "learning_rate": 1.7259100030893767e-05,
      "loss": 0.8686,
      "step": 590060
    },
    {
      "epoch": 2.068048239388491,
      "grad_norm": 2.625,
      "learning_rate": 1.7258451002230065e-05,
      "loss": 0.7922,
      "step": 590070
    },
    {
      "epoch": 2.0680832868953867,
      "grad_norm": 2.609375,
      "learning_rate": 1.7257801973566363e-05,
      "loss": 0.8525,
      "step": 590080
    },
    {
      "epoch": 2.0681183344022824,
      "grad_norm": 2.90625,
      "learning_rate": 1.725715294490266e-05,
      "loss": 0.7811,
      "step": 590090
    },
    {
      "epoch": 2.0681533819091777,
      "grad_norm": 3.421875,
      "learning_rate": 1.725650391623896e-05,
      "loss": 0.8049,
      "step": 590100
    },
    {
      "epoch": 2.0681884294160735,
      "grad_norm": 2.8125,
      "learning_rate": 1.7255854887575257e-05,
      "loss": 0.8387,
      "step": 590110
    },
    {
      "epoch": 2.0682234769229693,
      "grad_norm": 3.140625,
      "learning_rate": 1.7255205858911555e-05,
      "loss": 0.8976,
      "step": 590120
    },
    {
      "epoch": 2.0682585244298646,
      "grad_norm": 2.546875,
      "learning_rate": 1.725455683024785e-05,
      "loss": 0.6697,
      "step": 590130
    },
    {
      "epoch": 2.0682935719367603,
      "grad_norm": 3.34375,
      "learning_rate": 1.7253907801584148e-05,
      "loss": 0.726,
      "step": 590140
    },
    {
      "epoch": 2.0683286194436556,
      "grad_norm": 2.578125,
      "learning_rate": 1.7253258772920446e-05,
      "loss": 0.8268,
      "step": 590150
    },
    {
      "epoch": 2.0683636669505514,
      "grad_norm": 2.921875,
      "learning_rate": 1.7252609744256744e-05,
      "loss": 0.8355,
      "step": 590160
    },
    {
      "epoch": 2.068398714457447,
      "grad_norm": 2.8125,
      "learning_rate": 1.7251960715593045e-05,
      "loss": 0.7922,
      "step": 590170
    },
    {
      "epoch": 2.0684337619643425,
      "grad_norm": 2.796875,
      "learning_rate": 1.7251311686929343e-05,
      "loss": 0.7417,
      "step": 590180
    },
    {
      "epoch": 2.0684688094712382,
      "grad_norm": 3.40625,
      "learning_rate": 1.725066265826564e-05,
      "loss": 0.8655,
      "step": 590190
    },
    {
      "epoch": 2.068503856978134,
      "grad_norm": 2.8125,
      "learning_rate": 1.725001362960194e-05,
      "loss": 0.8422,
      "step": 590200
    },
    {
      "epoch": 2.0685389044850293,
      "grad_norm": 2.671875,
      "learning_rate": 1.7249364600938237e-05,
      "loss": 0.7941,
      "step": 590210
    },
    {
      "epoch": 2.068573951991925,
      "grad_norm": 2.6875,
      "learning_rate": 1.7248715572274535e-05,
      "loss": 0.8522,
      "step": 590220
    },
    {
      "epoch": 2.068608999498821,
      "grad_norm": 2.921875,
      "learning_rate": 1.7248066543610833e-05,
      "loss": 0.7666,
      "step": 590230
    },
    {
      "epoch": 2.068644047005716,
      "grad_norm": 3.296875,
      "learning_rate": 1.724741751494713e-05,
      "loss": 0.7773,
      "step": 590240
    },
    {
      "epoch": 2.068679094512612,
      "grad_norm": 2.828125,
      "learning_rate": 1.724676848628343e-05,
      "loss": 0.7373,
      "step": 590250
    },
    {
      "epoch": 2.0687141420195077,
      "grad_norm": 2.5,
      "learning_rate": 1.7246119457619727e-05,
      "loss": 0.8148,
      "step": 590260
    },
    {
      "epoch": 2.068749189526403,
      "grad_norm": 2.890625,
      "learning_rate": 1.7245470428956025e-05,
      "loss": 0.8508,
      "step": 590270
    },
    {
      "epoch": 2.0687842370332987,
      "grad_norm": 3.328125,
      "learning_rate": 1.7244821400292323e-05,
      "loss": 0.8673,
      "step": 590280
    },
    {
      "epoch": 2.068819284540194,
      "grad_norm": 2.609375,
      "learning_rate": 1.724417237162862e-05,
      "loss": 0.7408,
      "step": 590290
    },
    {
      "epoch": 2.06885433204709,
      "grad_norm": 3.625,
      "learning_rate": 1.724352334296492e-05,
      "loss": 0.829,
      "step": 590300
    },
    {
      "epoch": 2.0688893795539856,
      "grad_norm": 2.8125,
      "learning_rate": 1.724287431430122e-05,
      "loss": 0.8362,
      "step": 590310
    },
    {
      "epoch": 2.068924427060881,
      "grad_norm": 2.6875,
      "learning_rate": 1.7242225285637515e-05,
      "loss": 0.7755,
      "step": 590320
    },
    {
      "epoch": 2.0689594745677766,
      "grad_norm": 2.828125,
      "learning_rate": 1.7241576256973813e-05,
      "loss": 0.82,
      "step": 590330
    },
    {
      "epoch": 2.0689945220746724,
      "grad_norm": 3.109375,
      "learning_rate": 1.724092722831011e-05,
      "loss": 0.8532,
      "step": 590340
    },
    {
      "epoch": 2.0690295695815677,
      "grad_norm": 2.828125,
      "learning_rate": 1.724027819964641e-05,
      "loss": 0.8719,
      "step": 590350
    },
    {
      "epoch": 2.0690646170884635,
      "grad_norm": 2.890625,
      "learning_rate": 1.7239629170982707e-05,
      "loss": 0.7659,
      "step": 590360
    },
    {
      "epoch": 2.069099664595359,
      "grad_norm": 3.59375,
      "learning_rate": 1.7238980142319005e-05,
      "loss": 0.8558,
      "step": 590370
    },
    {
      "epoch": 2.0691347121022545,
      "grad_norm": 2.8125,
      "learning_rate": 1.7238331113655303e-05,
      "loss": 0.8125,
      "step": 590380
    },
    {
      "epoch": 2.0691697596091503,
      "grad_norm": 2.765625,
      "learning_rate": 1.72376820849916e-05,
      "loss": 0.7503,
      "step": 590390
    },
    {
      "epoch": 2.0692048071160456,
      "grad_norm": 2.953125,
      "learning_rate": 1.72370330563279e-05,
      "loss": 0.7362,
      "step": 590400
    },
    {
      "epoch": 2.0692398546229414,
      "grad_norm": 2.859375,
      "learning_rate": 1.7236384027664197e-05,
      "loss": 0.8501,
      "step": 590410
    },
    {
      "epoch": 2.069274902129837,
      "grad_norm": 3.359375,
      "learning_rate": 1.72357349990005e-05,
      "loss": 0.8264,
      "step": 590420
    },
    {
      "epoch": 2.0693099496367324,
      "grad_norm": 3.078125,
      "learning_rate": 1.7235085970336796e-05,
      "loss": 0.8638,
      "step": 590430
    },
    {
      "epoch": 2.069344997143628,
      "grad_norm": 2.734375,
      "learning_rate": 1.7234436941673094e-05,
      "loss": 0.7965,
      "step": 590440
    },
    {
      "epoch": 2.069380044650524,
      "grad_norm": 2.984375,
      "learning_rate": 1.7233787913009392e-05,
      "loss": 0.8333,
      "step": 590450
    },
    {
      "epoch": 2.0694150921574193,
      "grad_norm": 3.0,
      "learning_rate": 1.723313888434569e-05,
      "loss": 0.7167,
      "step": 590460
    },
    {
      "epoch": 2.069450139664315,
      "grad_norm": 2.890625,
      "learning_rate": 1.723248985568199e-05,
      "loss": 0.7914,
      "step": 590470
    },
    {
      "epoch": 2.0694851871712108,
      "grad_norm": 2.921875,
      "learning_rate": 1.7231840827018286e-05,
      "loss": 0.8139,
      "step": 590480
    },
    {
      "epoch": 2.069520234678106,
      "grad_norm": 2.703125,
      "learning_rate": 1.7231191798354584e-05,
      "loss": 0.8614,
      "step": 590490
    },
    {
      "epoch": 2.069555282185002,
      "grad_norm": 2.75,
      "learning_rate": 1.7230542769690882e-05,
      "loss": 0.8335,
      "step": 590500
    },
    {
      "epoch": 2.069590329691897,
      "grad_norm": 2.390625,
      "learning_rate": 1.7229893741027177e-05,
      "loss": 0.6835,
      "step": 590510
    },
    {
      "epoch": 2.069625377198793,
      "grad_norm": 3.03125,
      "learning_rate": 1.7229244712363475e-05,
      "loss": 0.8778,
      "step": 590520
    },
    {
      "epoch": 2.0696604247056887,
      "grad_norm": 3.171875,
      "learning_rate": 1.7228595683699773e-05,
      "loss": 0.8454,
      "step": 590530
    },
    {
      "epoch": 2.069695472212584,
      "grad_norm": 2.796875,
      "learning_rate": 1.7227946655036074e-05,
      "loss": 0.831,
      "step": 590540
    },
    {
      "epoch": 2.0697305197194797,
      "grad_norm": 3.078125,
      "learning_rate": 1.7227297626372372e-05,
      "loss": 0.7921,
      "step": 590550
    },
    {
      "epoch": 2.0697655672263755,
      "grad_norm": 3.21875,
      "learning_rate": 1.722664859770867e-05,
      "loss": 0.7708,
      "step": 590560
    },
    {
      "epoch": 2.069800614733271,
      "grad_norm": 2.84375,
      "learning_rate": 1.722599956904497e-05,
      "loss": 0.8565,
      "step": 590570
    },
    {
      "epoch": 2.0698356622401666,
      "grad_norm": 2.96875,
      "learning_rate": 1.7225350540381266e-05,
      "loss": 0.8005,
      "step": 590580
    },
    {
      "epoch": 2.0698707097470623,
      "grad_norm": 3.484375,
      "learning_rate": 1.7224701511717564e-05,
      "loss": 0.8528,
      "step": 590590
    },
    {
      "epoch": 2.0699057572539576,
      "grad_norm": 3.234375,
      "learning_rate": 1.7224052483053862e-05,
      "loss": 0.8265,
      "step": 590600
    },
    {
      "epoch": 2.0699408047608534,
      "grad_norm": 2.96875,
      "learning_rate": 1.722340345439016e-05,
      "loss": 0.9426,
      "step": 590610
    },
    {
      "epoch": 2.069975852267749,
      "grad_norm": 3.328125,
      "learning_rate": 1.722275442572646e-05,
      "loss": 0.8574,
      "step": 590620
    },
    {
      "epoch": 2.0700108997746445,
      "grad_norm": 2.78125,
      "learning_rate": 1.7222105397062756e-05,
      "loss": 0.8055,
      "step": 590630
    },
    {
      "epoch": 2.0700459472815402,
      "grad_norm": 3.390625,
      "learning_rate": 1.7221456368399054e-05,
      "loss": 0.8142,
      "step": 590640
    },
    {
      "epoch": 2.0700809947884355,
      "grad_norm": 2.71875,
      "learning_rate": 1.7220807339735352e-05,
      "loss": 0.844,
      "step": 590650
    },
    {
      "epoch": 2.0701160422953313,
      "grad_norm": 2.515625,
      "learning_rate": 1.722015831107165e-05,
      "loss": 0.7325,
      "step": 590660
    },
    {
      "epoch": 2.070151089802227,
      "grad_norm": 3.03125,
      "learning_rate": 1.721950928240795e-05,
      "loss": 0.7741,
      "step": 590670
    },
    {
      "epoch": 2.0701861373091224,
      "grad_norm": 2.875,
      "learning_rate": 1.721886025374425e-05,
      "loss": 0.8533,
      "step": 590680
    },
    {
      "epoch": 2.070221184816018,
      "grad_norm": 2.484375,
      "learning_rate": 1.7218211225080548e-05,
      "loss": 0.7991,
      "step": 590690
    },
    {
      "epoch": 2.070256232322914,
      "grad_norm": 2.859375,
      "learning_rate": 1.7217562196416842e-05,
      "loss": 0.8434,
      "step": 590700
    },
    {
      "epoch": 2.070291279829809,
      "grad_norm": 2.984375,
      "learning_rate": 1.721691316775314e-05,
      "loss": 0.8382,
      "step": 590710
    },
    {
      "epoch": 2.070326327336705,
      "grad_norm": 2.78125,
      "learning_rate": 1.721626413908944e-05,
      "loss": 0.7681,
      "step": 590720
    },
    {
      "epoch": 2.0703613748436007,
      "grad_norm": 3.109375,
      "learning_rate": 1.7215615110425736e-05,
      "loss": 0.7991,
      "step": 590730
    },
    {
      "epoch": 2.070396422350496,
      "grad_norm": 2.84375,
      "learning_rate": 1.7214966081762034e-05,
      "loss": 0.7823,
      "step": 590740
    },
    {
      "epoch": 2.070431469857392,
      "grad_norm": 2.90625,
      "learning_rate": 1.7214317053098332e-05,
      "loss": 0.6953,
      "step": 590750
    },
    {
      "epoch": 2.070466517364287,
      "grad_norm": 3.046875,
      "learning_rate": 1.721366802443463e-05,
      "loss": 0.7628,
      "step": 590760
    },
    {
      "epoch": 2.070501564871183,
      "grad_norm": 2.625,
      "learning_rate": 1.721301899577093e-05,
      "loss": 0.8056,
      "step": 590770
    },
    {
      "epoch": 2.0705366123780786,
      "grad_norm": 2.703125,
      "learning_rate": 1.7212369967107226e-05,
      "loss": 0.795,
      "step": 590780
    },
    {
      "epoch": 2.070571659884974,
      "grad_norm": 2.796875,
      "learning_rate": 1.7211720938443528e-05,
      "loss": 0.7904,
      "step": 590790
    },
    {
      "epoch": 2.0706067073918697,
      "grad_norm": 3.171875,
      "learning_rate": 1.7211071909779826e-05,
      "loss": 0.8362,
      "step": 590800
    },
    {
      "epoch": 2.0706417548987655,
      "grad_norm": 3.296875,
      "learning_rate": 1.7210422881116124e-05,
      "loss": 0.8206,
      "step": 590810
    },
    {
      "epoch": 2.0706768024056608,
      "grad_norm": 2.203125,
      "learning_rate": 1.7209773852452422e-05,
      "loss": 0.7652,
      "step": 590820
    },
    {
      "epoch": 2.0707118499125565,
      "grad_norm": 3.03125,
      "learning_rate": 1.720912482378872e-05,
      "loss": 0.8163,
      "step": 590830
    },
    {
      "epoch": 2.0707468974194523,
      "grad_norm": 2.765625,
      "learning_rate": 1.7208475795125018e-05,
      "loss": 0.8413,
      "step": 590840
    },
    {
      "epoch": 2.0707819449263476,
      "grad_norm": 2.4375,
      "learning_rate": 1.7207826766461316e-05,
      "loss": 0.7777,
      "step": 590850
    },
    {
      "epoch": 2.0708169924332434,
      "grad_norm": 3.25,
      "learning_rate": 1.7207177737797614e-05,
      "loss": 0.7232,
      "step": 590860
    },
    {
      "epoch": 2.0708520399401387,
      "grad_norm": 3.171875,
      "learning_rate": 1.7206528709133912e-05,
      "loss": 0.8927,
      "step": 590870
    },
    {
      "epoch": 2.0708870874470344,
      "grad_norm": 3.46875,
      "learning_rate": 1.720587968047021e-05,
      "loss": 0.8399,
      "step": 590880
    },
    {
      "epoch": 2.07092213495393,
      "grad_norm": 3.140625,
      "learning_rate": 1.7205230651806504e-05,
      "loss": 0.8688,
      "step": 590890
    },
    {
      "epoch": 2.0709571824608255,
      "grad_norm": 2.484375,
      "learning_rate": 1.7204581623142806e-05,
      "loss": 0.756,
      "step": 590900
    },
    {
      "epoch": 2.0709922299677213,
      "grad_norm": 2.5,
      "learning_rate": 1.7203932594479104e-05,
      "loss": 0.8071,
      "step": 590910
    },
    {
      "epoch": 2.071027277474617,
      "grad_norm": 2.828125,
      "learning_rate": 1.7203283565815402e-05,
      "loss": 0.841,
      "step": 590920
    },
    {
      "epoch": 2.0710623249815123,
      "grad_norm": 3.203125,
      "learning_rate": 1.72026345371517e-05,
      "loss": 0.8451,
      "step": 590930
    },
    {
      "epoch": 2.071097372488408,
      "grad_norm": 2.796875,
      "learning_rate": 1.7201985508487998e-05,
      "loss": 0.817,
      "step": 590940
    },
    {
      "epoch": 2.071132419995304,
      "grad_norm": 2.984375,
      "learning_rate": 1.7201336479824296e-05,
      "loss": 0.7888,
      "step": 590950
    },
    {
      "epoch": 2.071167467502199,
      "grad_norm": 2.578125,
      "learning_rate": 1.7200687451160594e-05,
      "loss": 0.7396,
      "step": 590960
    },
    {
      "epoch": 2.071202515009095,
      "grad_norm": 2.71875,
      "learning_rate": 1.7200038422496892e-05,
      "loss": 0.7777,
      "step": 590970
    },
    {
      "epoch": 2.0712375625159902,
      "grad_norm": 2.796875,
      "learning_rate": 1.719938939383319e-05,
      "loss": 0.809,
      "step": 590980
    },
    {
      "epoch": 2.071272610022886,
      "grad_norm": 3.5,
      "learning_rate": 1.7198740365169488e-05,
      "loss": 0.9002,
      "step": 590990
    },
    {
      "epoch": 2.0713076575297817,
      "grad_norm": 3.09375,
      "learning_rate": 1.7198091336505786e-05,
      "loss": 0.8289,
      "step": 591000
    },
    {
      "epoch": 2.071342705036677,
      "grad_norm": 2.625,
      "learning_rate": 1.7197442307842084e-05,
      "loss": 0.8146,
      "step": 591010
    },
    {
      "epoch": 2.071377752543573,
      "grad_norm": 2.671875,
      "learning_rate": 1.7196793279178382e-05,
      "loss": 0.7928,
      "step": 591020
    },
    {
      "epoch": 2.0714128000504686,
      "grad_norm": 3.015625,
      "learning_rate": 1.719614425051468e-05,
      "loss": 0.8772,
      "step": 591030
    },
    {
      "epoch": 2.071447847557364,
      "grad_norm": 2.546875,
      "learning_rate": 1.719549522185098e-05,
      "loss": 0.8136,
      "step": 591040
    },
    {
      "epoch": 2.0714828950642596,
      "grad_norm": 3.359375,
      "learning_rate": 1.719484619318728e-05,
      "loss": 0.8357,
      "step": 591050
    },
    {
      "epoch": 2.0715179425711554,
      "grad_norm": 3.015625,
      "learning_rate": 1.7194197164523577e-05,
      "loss": 0.8044,
      "step": 591060
    },
    {
      "epoch": 2.0715529900780507,
      "grad_norm": 2.65625,
      "learning_rate": 1.7193548135859872e-05,
      "loss": 0.871,
      "step": 591070
    },
    {
      "epoch": 2.0715880375849465,
      "grad_norm": 2.90625,
      "learning_rate": 1.719289910719617e-05,
      "loss": 0.8233,
      "step": 591080
    },
    {
      "epoch": 2.071623085091842,
      "grad_norm": 3.015625,
      "learning_rate": 1.7192250078532468e-05,
      "loss": 0.7857,
      "step": 591090
    },
    {
      "epoch": 2.0716581325987375,
      "grad_norm": 3.03125,
      "learning_rate": 1.7191601049868766e-05,
      "loss": 0.846,
      "step": 591100
    },
    {
      "epoch": 2.0716931801056333,
      "grad_norm": 3.03125,
      "learning_rate": 1.7190952021205064e-05,
      "loss": 0.8482,
      "step": 591110
    },
    {
      "epoch": 2.0717282276125286,
      "grad_norm": 2.90625,
      "learning_rate": 1.7190302992541362e-05,
      "loss": 0.8078,
      "step": 591120
    },
    {
      "epoch": 2.0717632751194244,
      "grad_norm": 2.9375,
      "learning_rate": 1.718965396387766e-05,
      "loss": 0.7828,
      "step": 591130
    },
    {
      "epoch": 2.07179832262632,
      "grad_norm": 4.15625,
      "learning_rate": 1.7189004935213958e-05,
      "loss": 0.7958,
      "step": 591140
    },
    {
      "epoch": 2.0718333701332154,
      "grad_norm": 2.9375,
      "learning_rate": 1.7188355906550256e-05,
      "loss": 0.7472,
      "step": 591150
    },
    {
      "epoch": 2.071868417640111,
      "grad_norm": 2.75,
      "learning_rate": 1.7187706877886557e-05,
      "loss": 0.6753,
      "step": 591160
    },
    {
      "epoch": 2.071903465147007,
      "grad_norm": 2.9375,
      "learning_rate": 1.7187057849222855e-05,
      "loss": 0.7876,
      "step": 591170
    },
    {
      "epoch": 2.0719385126539023,
      "grad_norm": 2.71875,
      "learning_rate": 1.7186408820559153e-05,
      "loss": 0.8193,
      "step": 591180
    },
    {
      "epoch": 2.071973560160798,
      "grad_norm": 3.15625,
      "learning_rate": 1.718575979189545e-05,
      "loss": 0.7786,
      "step": 591190
    },
    {
      "epoch": 2.0720086076676933,
      "grad_norm": 3.40625,
      "learning_rate": 1.718511076323175e-05,
      "loss": 0.8115,
      "step": 591200
    },
    {
      "epoch": 2.072043655174589,
      "grad_norm": 2.671875,
      "learning_rate": 1.7184461734568047e-05,
      "loss": 0.8262,
      "step": 591210
    },
    {
      "epoch": 2.072078702681485,
      "grad_norm": 3.109375,
      "learning_rate": 1.7183812705904345e-05,
      "loss": 0.9271,
      "step": 591220
    },
    {
      "epoch": 2.07211375018838,
      "grad_norm": 3.03125,
      "learning_rate": 1.7183163677240643e-05,
      "loss": 0.8119,
      "step": 591230
    },
    {
      "epoch": 2.072148797695276,
      "grad_norm": 2.921875,
      "learning_rate": 1.718251464857694e-05,
      "loss": 0.8534,
      "step": 591240
    },
    {
      "epoch": 2.0721838452021717,
      "grad_norm": 2.6875,
      "learning_rate": 1.718186561991324e-05,
      "loss": 0.7845,
      "step": 591250
    },
    {
      "epoch": 2.072218892709067,
      "grad_norm": 3.546875,
      "learning_rate": 1.7181216591249534e-05,
      "loss": 0.7669,
      "step": 591260
    },
    {
      "epoch": 2.0722539402159628,
      "grad_norm": 3.046875,
      "learning_rate": 1.7180567562585835e-05,
      "loss": 0.8974,
      "step": 591270
    },
    {
      "epoch": 2.0722889877228585,
      "grad_norm": 2.734375,
      "learning_rate": 1.7179918533922133e-05,
      "loss": 0.804,
      "step": 591280
    },
    {
      "epoch": 2.072324035229754,
      "grad_norm": 2.890625,
      "learning_rate": 1.717926950525843e-05,
      "loss": 0.8578,
      "step": 591290
    },
    {
      "epoch": 2.0723590827366496,
      "grad_norm": 2.984375,
      "learning_rate": 1.717862047659473e-05,
      "loss": 0.8129,
      "step": 591300
    },
    {
      "epoch": 2.072394130243545,
      "grad_norm": 2.96875,
      "learning_rate": 1.7177971447931027e-05,
      "loss": 0.7291,
      "step": 591310
    },
    {
      "epoch": 2.0724291777504407,
      "grad_norm": 2.3125,
      "learning_rate": 1.7177322419267325e-05,
      "loss": 0.8339,
      "step": 591320
    },
    {
      "epoch": 2.0724642252573364,
      "grad_norm": 3.625,
      "learning_rate": 1.7176673390603623e-05,
      "loss": 0.8156,
      "step": 591330
    },
    {
      "epoch": 2.0724992727642317,
      "grad_norm": 2.765625,
      "learning_rate": 1.717602436193992e-05,
      "loss": 0.7194,
      "step": 591340
    },
    {
      "epoch": 2.0725343202711275,
      "grad_norm": 3.0625,
      "learning_rate": 1.717537533327622e-05,
      "loss": 0.8818,
      "step": 591350
    },
    {
      "epoch": 2.0725693677780233,
      "grad_norm": 2.9375,
      "learning_rate": 1.7174726304612517e-05,
      "loss": 0.8006,
      "step": 591360
    },
    {
      "epoch": 2.0726044152849186,
      "grad_norm": 2.8125,
      "learning_rate": 1.7174077275948815e-05,
      "loss": 0.7732,
      "step": 591370
    },
    {
      "epoch": 2.0726394627918143,
      "grad_norm": 2.9375,
      "learning_rate": 1.7173428247285113e-05,
      "loss": 0.8048,
      "step": 591380
    },
    {
      "epoch": 2.07267451029871,
      "grad_norm": 2.625,
      "learning_rate": 1.717277921862141e-05,
      "loss": 0.7834,
      "step": 591390
    },
    {
      "epoch": 2.0727095578056054,
      "grad_norm": 2.84375,
      "learning_rate": 1.717213018995771e-05,
      "loss": 0.8228,
      "step": 591400
    },
    {
      "epoch": 2.072744605312501,
      "grad_norm": 2.859375,
      "learning_rate": 1.717148116129401e-05,
      "loss": 0.7421,
      "step": 591410
    },
    {
      "epoch": 2.0727796528193965,
      "grad_norm": 2.9375,
      "learning_rate": 1.717083213263031e-05,
      "loss": 0.891,
      "step": 591420
    },
    {
      "epoch": 2.0728147003262922,
      "grad_norm": 2.84375,
      "learning_rate": 1.7170183103966607e-05,
      "loss": 0.8001,
      "step": 591430
    },
    {
      "epoch": 2.072849747833188,
      "grad_norm": 2.953125,
      "learning_rate": 1.7169534075302905e-05,
      "loss": 0.7527,
      "step": 591440
    },
    {
      "epoch": 2.0728847953400833,
      "grad_norm": 2.78125,
      "learning_rate": 1.71688850466392e-05,
      "loss": 0.8052,
      "step": 591450
    },
    {
      "epoch": 2.072919842846979,
      "grad_norm": 3.140625,
      "learning_rate": 1.7168236017975497e-05,
      "loss": 0.785,
      "step": 591460
    },
    {
      "epoch": 2.072954890353875,
      "grad_norm": 3.109375,
      "learning_rate": 1.7167586989311795e-05,
      "loss": 0.9167,
      "step": 591470
    },
    {
      "epoch": 2.07298993786077,
      "grad_norm": 3.03125,
      "learning_rate": 1.7166937960648093e-05,
      "loss": 0.8368,
      "step": 591480
    },
    {
      "epoch": 2.073024985367666,
      "grad_norm": 2.5625,
      "learning_rate": 1.716628893198439e-05,
      "loss": 0.7027,
      "step": 591490
    },
    {
      "epoch": 2.0730600328745616,
      "grad_norm": 2.859375,
      "learning_rate": 1.716563990332069e-05,
      "loss": 0.8289,
      "step": 591500
    },
    {
      "epoch": 2.073095080381457,
      "grad_norm": 3.3125,
      "learning_rate": 1.7164990874656987e-05,
      "loss": 0.8542,
      "step": 591510
    },
    {
      "epoch": 2.0731301278883527,
      "grad_norm": 2.921875,
      "learning_rate": 1.716434184599329e-05,
      "loss": 0.7495,
      "step": 591520
    },
    {
      "epoch": 2.0731651753952485,
      "grad_norm": 3.171875,
      "learning_rate": 1.7163692817329587e-05,
      "loss": 0.7499,
      "step": 591530
    },
    {
      "epoch": 2.073200222902144,
      "grad_norm": 2.8125,
      "learning_rate": 1.7163043788665885e-05,
      "loss": 0.8299,
      "step": 591540
    },
    {
      "epoch": 2.0732352704090395,
      "grad_norm": 2.953125,
      "learning_rate": 1.7162394760002183e-05,
      "loss": 0.8281,
      "step": 591550
    },
    {
      "epoch": 2.073270317915935,
      "grad_norm": 3.125,
      "learning_rate": 1.716174573133848e-05,
      "loss": 0.8534,
      "step": 591560
    },
    {
      "epoch": 2.0733053654228306,
      "grad_norm": 3.15625,
      "learning_rate": 1.716109670267478e-05,
      "loss": 0.8335,
      "step": 591570
    },
    {
      "epoch": 2.0733404129297264,
      "grad_norm": 3.40625,
      "learning_rate": 1.7160447674011077e-05,
      "loss": 0.8685,
      "step": 591580
    },
    {
      "epoch": 2.0733754604366217,
      "grad_norm": 3.171875,
      "learning_rate": 1.7159798645347375e-05,
      "loss": 0.846,
      "step": 591590
    },
    {
      "epoch": 2.0734105079435174,
      "grad_norm": 2.5,
      "learning_rate": 1.7159149616683673e-05,
      "loss": 0.8258,
      "step": 591600
    },
    {
      "epoch": 2.073445555450413,
      "grad_norm": 3.046875,
      "learning_rate": 1.715850058801997e-05,
      "loss": 0.8644,
      "step": 591610
    },
    {
      "epoch": 2.0734806029573085,
      "grad_norm": 2.921875,
      "learning_rate": 1.715785155935627e-05,
      "loss": 0.8317,
      "step": 591620
    },
    {
      "epoch": 2.0735156504642043,
      "grad_norm": 3.1875,
      "learning_rate": 1.7157202530692567e-05,
      "loss": 0.7317,
      "step": 591630
    },
    {
      "epoch": 2.0735506979711,
      "grad_norm": 2.796875,
      "learning_rate": 1.7156553502028865e-05,
      "loss": 0.8214,
      "step": 591640
    },
    {
      "epoch": 2.0735857454779953,
      "grad_norm": 2.6875,
      "learning_rate": 1.7155904473365163e-05,
      "loss": 0.8191,
      "step": 591650
    },
    {
      "epoch": 2.073620792984891,
      "grad_norm": 2.765625,
      "learning_rate": 1.715525544470146e-05,
      "loss": 0.8064,
      "step": 591660
    },
    {
      "epoch": 2.0736558404917864,
      "grad_norm": 3.28125,
      "learning_rate": 1.715460641603776e-05,
      "loss": 0.82,
      "step": 591670
    },
    {
      "epoch": 2.073690887998682,
      "grad_norm": 3.25,
      "learning_rate": 1.7153957387374057e-05,
      "loss": 0.7781,
      "step": 591680
    },
    {
      "epoch": 2.073725935505578,
      "grad_norm": 3.0625,
      "learning_rate": 1.7153308358710355e-05,
      "loss": 0.7657,
      "step": 591690
    },
    {
      "epoch": 2.0737609830124732,
      "grad_norm": 2.546875,
      "learning_rate": 1.7152659330046653e-05,
      "loss": 0.809,
      "step": 591700
    },
    {
      "epoch": 2.073796030519369,
      "grad_norm": 2.875,
      "learning_rate": 1.715201030138295e-05,
      "loss": 0.7864,
      "step": 591710
    },
    {
      "epoch": 2.0738310780262648,
      "grad_norm": 3.0,
      "learning_rate": 1.715136127271925e-05,
      "loss": 0.7941,
      "step": 591720
    },
    {
      "epoch": 2.07386612553316,
      "grad_norm": 2.515625,
      "learning_rate": 1.7150712244055547e-05,
      "loss": 0.7828,
      "step": 591730
    },
    {
      "epoch": 2.073901173040056,
      "grad_norm": 3.046875,
      "learning_rate": 1.7150063215391845e-05,
      "loss": 0.8165,
      "step": 591740
    },
    {
      "epoch": 2.0739362205469516,
      "grad_norm": 3.1875,
      "learning_rate": 1.7149414186728143e-05,
      "loss": 0.8849,
      "step": 591750
    },
    {
      "epoch": 2.073971268053847,
      "grad_norm": 2.8125,
      "learning_rate": 1.714876515806444e-05,
      "loss": 0.8047,
      "step": 591760
    },
    {
      "epoch": 2.0740063155607427,
      "grad_norm": 2.6875,
      "learning_rate": 1.714811612940074e-05,
      "loss": 0.7799,
      "step": 591770
    },
    {
      "epoch": 2.074041363067638,
      "grad_norm": 3.25,
      "learning_rate": 1.714746710073704e-05,
      "loss": 0.8502,
      "step": 591780
    },
    {
      "epoch": 2.0740764105745337,
      "grad_norm": 2.546875,
      "learning_rate": 1.7146818072073338e-05,
      "loss": 0.7834,
      "step": 591790
    },
    {
      "epoch": 2.0741114580814295,
      "grad_norm": 2.84375,
      "learning_rate": 1.7146169043409636e-05,
      "loss": 0.8219,
      "step": 591800
    },
    {
      "epoch": 2.074146505588325,
      "grad_norm": 3.171875,
      "learning_rate": 1.7145520014745934e-05,
      "loss": 0.8627,
      "step": 591810
    },
    {
      "epoch": 2.0741815530952206,
      "grad_norm": 3.25,
      "learning_rate": 1.7144870986082232e-05,
      "loss": 0.8602,
      "step": 591820
    },
    {
      "epoch": 2.0742166006021163,
      "grad_norm": 3.125,
      "learning_rate": 1.7144221957418527e-05,
      "loss": 0.7811,
      "step": 591830
    },
    {
      "epoch": 2.0742516481090116,
      "grad_norm": 3.078125,
      "learning_rate": 1.7143572928754825e-05,
      "loss": 0.8197,
      "step": 591840
    },
    {
      "epoch": 2.0742866956159074,
      "grad_norm": 2.890625,
      "learning_rate": 1.7142923900091123e-05,
      "loss": 0.8134,
      "step": 591850
    },
    {
      "epoch": 2.074321743122803,
      "grad_norm": 2.5,
      "learning_rate": 1.714227487142742e-05,
      "loss": 0.8176,
      "step": 591860
    },
    {
      "epoch": 2.0743567906296985,
      "grad_norm": 2.625,
      "learning_rate": 1.714162584276372e-05,
      "loss": 0.8852,
      "step": 591870
    },
    {
      "epoch": 2.0743918381365942,
      "grad_norm": 2.515625,
      "learning_rate": 1.7140976814100017e-05,
      "loss": 0.7664,
      "step": 591880
    },
    {
      "epoch": 2.0744268856434895,
      "grad_norm": 3.015625,
      "learning_rate": 1.7140327785436318e-05,
      "loss": 0.85,
      "step": 591890
    },
    {
      "epoch": 2.0744619331503853,
      "grad_norm": 2.625,
      "learning_rate": 1.7139678756772616e-05,
      "loss": 0.7996,
      "step": 591900
    },
    {
      "epoch": 2.074496980657281,
      "grad_norm": 2.9375,
      "learning_rate": 1.7139029728108914e-05,
      "loss": 0.7411,
      "step": 591910
    },
    {
      "epoch": 2.0745320281641764,
      "grad_norm": 2.984375,
      "learning_rate": 1.7138380699445212e-05,
      "loss": 0.856,
      "step": 591920
    },
    {
      "epoch": 2.074567075671072,
      "grad_norm": 3.046875,
      "learning_rate": 1.713773167078151e-05,
      "loss": 0.8765,
      "step": 591930
    },
    {
      "epoch": 2.074602123177968,
      "grad_norm": 3.328125,
      "learning_rate": 1.7137082642117808e-05,
      "loss": 0.8264,
      "step": 591940
    },
    {
      "epoch": 2.074637170684863,
      "grad_norm": 3.125,
      "learning_rate": 1.7136433613454106e-05,
      "loss": 0.801,
      "step": 591950
    },
    {
      "epoch": 2.074672218191759,
      "grad_norm": 2.3125,
      "learning_rate": 1.7135784584790404e-05,
      "loss": 0.8636,
      "step": 591960
    },
    {
      "epoch": 2.0747072656986547,
      "grad_norm": 2.890625,
      "learning_rate": 1.7135135556126702e-05,
      "loss": 0.8468,
      "step": 591970
    },
    {
      "epoch": 2.07474231320555,
      "grad_norm": 2.890625,
      "learning_rate": 1.7134486527463e-05,
      "loss": 0.7832,
      "step": 591980
    },
    {
      "epoch": 2.074777360712446,
      "grad_norm": 3.125,
      "learning_rate": 1.7133837498799298e-05,
      "loss": 0.8722,
      "step": 591990
    },
    {
      "epoch": 2.0748124082193415,
      "grad_norm": 2.703125,
      "learning_rate": 1.7133188470135596e-05,
      "loss": 0.8085,
      "step": 592000
    },
    {
      "epoch": 2.074847455726237,
      "grad_norm": 2.78125,
      "learning_rate": 1.7132539441471894e-05,
      "loss": 0.846,
      "step": 592010
    },
    {
      "epoch": 2.0748825032331326,
      "grad_norm": 3.234375,
      "learning_rate": 1.7131890412808192e-05,
      "loss": 0.8794,
      "step": 592020
    },
    {
      "epoch": 2.074917550740028,
      "grad_norm": 3.234375,
      "learning_rate": 1.713124138414449e-05,
      "loss": 0.8535,
      "step": 592030
    },
    {
      "epoch": 2.0749525982469237,
      "grad_norm": 2.84375,
      "learning_rate": 1.7130592355480788e-05,
      "loss": 0.8307,
      "step": 592040
    },
    {
      "epoch": 2.0749876457538194,
      "grad_norm": 2.5625,
      "learning_rate": 1.7129943326817086e-05,
      "loss": 0.8241,
      "step": 592050
    },
    {
      "epoch": 2.0750226932607148,
      "grad_norm": 2.8125,
      "learning_rate": 1.7129294298153384e-05,
      "loss": 0.8676,
      "step": 592060
    },
    {
      "epoch": 2.0750577407676105,
      "grad_norm": 3.484375,
      "learning_rate": 1.7128645269489682e-05,
      "loss": 0.9343,
      "step": 592070
    },
    {
      "epoch": 2.0750927882745063,
      "grad_norm": 2.859375,
      "learning_rate": 1.712799624082598e-05,
      "loss": 0.804,
      "step": 592080
    },
    {
      "epoch": 2.0751278357814016,
      "grad_norm": 2.640625,
      "learning_rate": 1.7127347212162278e-05,
      "loss": 0.7553,
      "step": 592090
    },
    {
      "epoch": 2.0751628832882973,
      "grad_norm": 2.890625,
      "learning_rate": 1.7126698183498576e-05,
      "loss": 0.7161,
      "step": 592100
    },
    {
      "epoch": 2.075197930795193,
      "grad_norm": 2.796875,
      "learning_rate": 1.7126049154834874e-05,
      "loss": 0.8435,
      "step": 592110
    },
    {
      "epoch": 2.0752329783020884,
      "grad_norm": 3.0625,
      "learning_rate": 1.7125400126171172e-05,
      "loss": 0.8361,
      "step": 592120
    },
    {
      "epoch": 2.075268025808984,
      "grad_norm": 2.453125,
      "learning_rate": 1.712475109750747e-05,
      "loss": 0.8534,
      "step": 592130
    },
    {
      "epoch": 2.0753030733158795,
      "grad_norm": 2.5625,
      "learning_rate": 1.712410206884377e-05,
      "loss": 0.7897,
      "step": 592140
    },
    {
      "epoch": 2.0753381208227752,
      "grad_norm": 2.984375,
      "learning_rate": 1.712345304018007e-05,
      "loss": 0.8359,
      "step": 592150
    },
    {
      "epoch": 2.075373168329671,
      "grad_norm": 3.0,
      "learning_rate": 1.7122804011516367e-05,
      "loss": 0.8295,
      "step": 592160
    },
    {
      "epoch": 2.0754082158365663,
      "grad_norm": 3.078125,
      "learning_rate": 1.7122154982852665e-05,
      "loss": 0.7987,
      "step": 592170
    },
    {
      "epoch": 2.075443263343462,
      "grad_norm": 3.03125,
      "learning_rate": 1.7121505954188963e-05,
      "loss": 0.8068,
      "step": 592180
    },
    {
      "epoch": 2.075478310850358,
      "grad_norm": 2.609375,
      "learning_rate": 1.712085692552526e-05,
      "loss": 0.7595,
      "step": 592190
    },
    {
      "epoch": 2.075513358357253,
      "grad_norm": 3.140625,
      "learning_rate": 1.7120207896861556e-05,
      "loss": 0.836,
      "step": 592200
    },
    {
      "epoch": 2.075548405864149,
      "grad_norm": 2.953125,
      "learning_rate": 1.7119558868197854e-05,
      "loss": 0.8985,
      "step": 592210
    },
    {
      "epoch": 2.0755834533710447,
      "grad_norm": 2.9375,
      "learning_rate": 1.7118909839534152e-05,
      "loss": 0.9231,
      "step": 592220
    },
    {
      "epoch": 2.07561850087794,
      "grad_norm": 3.3125,
      "learning_rate": 1.711826081087045e-05,
      "loss": 0.8824,
      "step": 592230
    },
    {
      "epoch": 2.0756535483848357,
      "grad_norm": 2.953125,
      "learning_rate": 1.7117611782206748e-05,
      "loss": 0.7519,
      "step": 592240
    },
    {
      "epoch": 2.075688595891731,
      "grad_norm": 3.359375,
      "learning_rate": 1.7116962753543046e-05,
      "loss": 0.8344,
      "step": 592250
    },
    {
      "epoch": 2.075723643398627,
      "grad_norm": 2.921875,
      "learning_rate": 1.7116313724879347e-05,
      "loss": 0.7794,
      "step": 592260
    },
    {
      "epoch": 2.0757586909055226,
      "grad_norm": 2.609375,
      "learning_rate": 1.7115664696215645e-05,
      "loss": 0.8252,
      "step": 592270
    },
    {
      "epoch": 2.075793738412418,
      "grad_norm": 2.359375,
      "learning_rate": 1.7115015667551943e-05,
      "loss": 0.7249,
      "step": 592280
    },
    {
      "epoch": 2.0758287859193136,
      "grad_norm": 2.71875,
      "learning_rate": 1.711436663888824e-05,
      "loss": 0.7656,
      "step": 592290
    },
    {
      "epoch": 2.0758638334262094,
      "grad_norm": 2.578125,
      "learning_rate": 1.711371761022454e-05,
      "loss": 0.8014,
      "step": 592300
    },
    {
      "epoch": 2.0758988809331047,
      "grad_norm": 3.71875,
      "learning_rate": 1.7113068581560837e-05,
      "loss": 0.8302,
      "step": 592310
    },
    {
      "epoch": 2.0759339284400005,
      "grad_norm": 2.625,
      "learning_rate": 1.7112419552897135e-05,
      "loss": 0.8449,
      "step": 592320
    },
    {
      "epoch": 2.0759689759468962,
      "grad_norm": 3.0,
      "learning_rate": 1.7111770524233433e-05,
      "loss": 0.813,
      "step": 592330
    },
    {
      "epoch": 2.0760040234537915,
      "grad_norm": 2.84375,
      "learning_rate": 1.711112149556973e-05,
      "loss": 0.7589,
      "step": 592340
    },
    {
      "epoch": 2.0760390709606873,
      "grad_norm": 2.640625,
      "learning_rate": 1.711047246690603e-05,
      "loss": 0.8522,
      "step": 592350
    },
    {
      "epoch": 2.0760741184675826,
      "grad_norm": 2.765625,
      "learning_rate": 1.7109823438242327e-05,
      "loss": 0.8335,
      "step": 592360
    },
    {
      "epoch": 2.0761091659744784,
      "grad_norm": 2.859375,
      "learning_rate": 1.7109174409578625e-05,
      "loss": 0.8361,
      "step": 592370
    },
    {
      "epoch": 2.076144213481374,
      "grad_norm": 2.5625,
      "learning_rate": 1.7108525380914923e-05,
      "loss": 0.7747,
      "step": 592380
    },
    {
      "epoch": 2.0761792609882694,
      "grad_norm": 3.296875,
      "learning_rate": 1.710787635225122e-05,
      "loss": 0.7612,
      "step": 592390
    },
    {
      "epoch": 2.076214308495165,
      "grad_norm": 2.65625,
      "learning_rate": 1.710722732358752e-05,
      "loss": 0.7841,
      "step": 592400
    },
    {
      "epoch": 2.076249356002061,
      "grad_norm": 2.828125,
      "learning_rate": 1.7106578294923817e-05,
      "loss": 0.7637,
      "step": 592410
    },
    {
      "epoch": 2.0762844035089563,
      "grad_norm": 3.640625,
      "learning_rate": 1.7105929266260115e-05,
      "loss": 0.8492,
      "step": 592420
    },
    {
      "epoch": 2.076319451015852,
      "grad_norm": 2.859375,
      "learning_rate": 1.7105280237596413e-05,
      "loss": 0.8668,
      "step": 592430
    },
    {
      "epoch": 2.076354498522748,
      "grad_norm": 2.796875,
      "learning_rate": 1.710463120893271e-05,
      "loss": 0.768,
      "step": 592440
    },
    {
      "epoch": 2.076389546029643,
      "grad_norm": 2.984375,
      "learning_rate": 1.710398218026901e-05,
      "loss": 0.7744,
      "step": 592450
    },
    {
      "epoch": 2.076424593536539,
      "grad_norm": 3.515625,
      "learning_rate": 1.7103333151605307e-05,
      "loss": 0.8616,
      "step": 592460
    },
    {
      "epoch": 2.076459641043434,
      "grad_norm": 2.65625,
      "learning_rate": 1.7102684122941605e-05,
      "loss": 0.835,
      "step": 592470
    },
    {
      "epoch": 2.07649468855033,
      "grad_norm": 2.71875,
      "learning_rate": 1.7102035094277903e-05,
      "loss": 0.8215,
      "step": 592480
    },
    {
      "epoch": 2.0765297360572257,
      "grad_norm": 2.859375,
      "learning_rate": 1.71013860656142e-05,
      "loss": 0.7781,
      "step": 592490
    },
    {
      "epoch": 2.076564783564121,
      "grad_norm": 3.171875,
      "learning_rate": 1.71007370369505e-05,
      "loss": 0.8174,
      "step": 592500
    },
    {
      "epoch": 2.0765998310710168,
      "grad_norm": 2.984375,
      "learning_rate": 1.71000880082868e-05,
      "loss": 0.8169,
      "step": 592510
    },
    {
      "epoch": 2.0766348785779125,
      "grad_norm": 3.15625,
      "learning_rate": 1.70994389796231e-05,
      "loss": 0.8012,
      "step": 592520
    },
    {
      "epoch": 2.076669926084808,
      "grad_norm": 2.59375,
      "learning_rate": 1.7098789950959397e-05,
      "loss": 0.81,
      "step": 592530
    },
    {
      "epoch": 2.0767049735917036,
      "grad_norm": 2.96875,
      "learning_rate": 1.7098140922295695e-05,
      "loss": 0.8583,
      "step": 592540
    },
    {
      "epoch": 2.0767400210985993,
      "grad_norm": 2.453125,
      "learning_rate": 1.7097491893631993e-05,
      "loss": 0.8533,
      "step": 592550
    },
    {
      "epoch": 2.0767750686054947,
      "grad_norm": 2.953125,
      "learning_rate": 1.709684286496829e-05,
      "loss": 0.7766,
      "step": 592560
    },
    {
      "epoch": 2.0768101161123904,
      "grad_norm": 2.734375,
      "learning_rate": 1.709619383630459e-05,
      "loss": 0.745,
      "step": 592570
    },
    {
      "epoch": 2.0768451636192857,
      "grad_norm": 2.71875,
      "learning_rate": 1.7095544807640883e-05,
      "loss": 0.7801,
      "step": 592580
    },
    {
      "epoch": 2.0768802111261815,
      "grad_norm": 2.71875,
      "learning_rate": 1.709489577897718e-05,
      "loss": 0.7948,
      "step": 592590
    },
    {
      "epoch": 2.0769152586330772,
      "grad_norm": 2.671875,
      "learning_rate": 1.709424675031348e-05,
      "loss": 0.8636,
      "step": 592600
    },
    {
      "epoch": 2.0769503061399726,
      "grad_norm": 2.828125,
      "learning_rate": 1.7093597721649777e-05,
      "loss": 0.7198,
      "step": 592610
    },
    {
      "epoch": 2.0769853536468683,
      "grad_norm": 3.125,
      "learning_rate": 1.709294869298608e-05,
      "loss": 0.7575,
      "step": 592620
    },
    {
      "epoch": 2.077020401153764,
      "grad_norm": 3.265625,
      "learning_rate": 1.7092299664322377e-05,
      "loss": 0.9164,
      "step": 592630
    },
    {
      "epoch": 2.0770554486606594,
      "grad_norm": 3.125,
      "learning_rate": 1.7091650635658675e-05,
      "loss": 0.9102,
      "step": 592640
    },
    {
      "epoch": 2.077090496167555,
      "grad_norm": 2.703125,
      "learning_rate": 1.7091001606994973e-05,
      "loss": 0.8179,
      "step": 592650
    },
    {
      "epoch": 2.077125543674451,
      "grad_norm": 3.015625,
      "learning_rate": 1.709035257833127e-05,
      "loss": 0.855,
      "step": 592660
    },
    {
      "epoch": 2.077160591181346,
      "grad_norm": 2.765625,
      "learning_rate": 1.708970354966757e-05,
      "loss": 0.8109,
      "step": 592670
    },
    {
      "epoch": 2.077195638688242,
      "grad_norm": 2.640625,
      "learning_rate": 1.7089054521003867e-05,
      "loss": 0.8162,
      "step": 592680
    },
    {
      "epoch": 2.0772306861951373,
      "grad_norm": 2.578125,
      "learning_rate": 1.7088405492340165e-05,
      "loss": 0.8079,
      "step": 592690
    },
    {
      "epoch": 2.077265733702033,
      "grad_norm": 2.765625,
      "learning_rate": 1.7087756463676463e-05,
      "loss": 0.8755,
      "step": 592700
    },
    {
      "epoch": 2.077300781208929,
      "grad_norm": 2.96875,
      "learning_rate": 1.708710743501276e-05,
      "loss": 0.8964,
      "step": 592710
    },
    {
      "epoch": 2.077335828715824,
      "grad_norm": 2.84375,
      "learning_rate": 1.708645840634906e-05,
      "loss": 0.9502,
      "step": 592720
    },
    {
      "epoch": 2.07737087622272,
      "grad_norm": 2.859375,
      "learning_rate": 1.7085809377685357e-05,
      "loss": 0.8096,
      "step": 592730
    },
    {
      "epoch": 2.0774059237296156,
      "grad_norm": 3.03125,
      "learning_rate": 1.7085160349021655e-05,
      "loss": 0.9024,
      "step": 592740
    },
    {
      "epoch": 2.077440971236511,
      "grad_norm": 2.65625,
      "learning_rate": 1.7084511320357953e-05,
      "loss": 0.7937,
      "step": 592750
    },
    {
      "epoch": 2.0774760187434067,
      "grad_norm": 2.828125,
      "learning_rate": 1.7083862291694254e-05,
      "loss": 0.869,
      "step": 592760
    },
    {
      "epoch": 2.0775110662503025,
      "grad_norm": 2.578125,
      "learning_rate": 1.708321326303055e-05,
      "loss": 0.7536,
      "step": 592770
    },
    {
      "epoch": 2.0775461137571978,
      "grad_norm": 3.0,
      "learning_rate": 1.7082564234366847e-05,
      "loss": 0.8828,
      "step": 592780
    },
    {
      "epoch": 2.0775811612640935,
      "grad_norm": 2.6875,
      "learning_rate": 1.7081915205703145e-05,
      "loss": 0.8262,
      "step": 592790
    },
    {
      "epoch": 2.077616208770989,
      "grad_norm": 3.65625,
      "learning_rate": 1.7081266177039443e-05,
      "loss": 0.9366,
      "step": 592800
    },
    {
      "epoch": 2.0776512562778846,
      "grad_norm": 2.78125,
      "learning_rate": 1.708061714837574e-05,
      "loss": 0.8136,
      "step": 592810
    },
    {
      "epoch": 2.0776863037847804,
      "grad_norm": 3.046875,
      "learning_rate": 1.707996811971204e-05,
      "loss": 0.8335,
      "step": 592820
    },
    {
      "epoch": 2.0777213512916757,
      "grad_norm": 2.9375,
      "learning_rate": 1.7079319091048337e-05,
      "loss": 0.8619,
      "step": 592830
    },
    {
      "epoch": 2.0777563987985714,
      "grad_norm": 3.0,
      "learning_rate": 1.7078670062384635e-05,
      "loss": 0.9245,
      "step": 592840
    },
    {
      "epoch": 2.077791446305467,
      "grad_norm": 3.15625,
      "learning_rate": 1.7078021033720933e-05,
      "loss": 0.7758,
      "step": 592850
    },
    {
      "epoch": 2.0778264938123625,
      "grad_norm": 2.8125,
      "learning_rate": 1.707737200505723e-05,
      "loss": 0.8227,
      "step": 592860
    },
    {
      "epoch": 2.0778615413192583,
      "grad_norm": 3.25,
      "learning_rate": 1.707672297639353e-05,
      "loss": 0.8075,
      "step": 592870
    },
    {
      "epoch": 2.077896588826154,
      "grad_norm": 2.84375,
      "learning_rate": 1.707607394772983e-05,
      "loss": 0.7955,
      "step": 592880
    },
    {
      "epoch": 2.0779316363330493,
      "grad_norm": 2.765625,
      "learning_rate": 1.7075424919066128e-05,
      "loss": 0.8072,
      "step": 592890
    },
    {
      "epoch": 2.077966683839945,
      "grad_norm": 2.890625,
      "learning_rate": 1.7074775890402426e-05,
      "loss": 0.8127,
      "step": 592900
    },
    {
      "epoch": 2.078001731346841,
      "grad_norm": 2.84375,
      "learning_rate": 1.7074126861738724e-05,
      "loss": 0.836,
      "step": 592910
    },
    {
      "epoch": 2.078036778853736,
      "grad_norm": 3.5,
      "learning_rate": 1.7073477833075022e-05,
      "loss": 0.8316,
      "step": 592920
    },
    {
      "epoch": 2.078071826360632,
      "grad_norm": 2.875,
      "learning_rate": 1.707282880441132e-05,
      "loss": 0.8319,
      "step": 592930
    },
    {
      "epoch": 2.0781068738675272,
      "grad_norm": 2.625,
      "learning_rate": 1.7072179775747618e-05,
      "loss": 0.8044,
      "step": 592940
    },
    {
      "epoch": 2.078141921374423,
      "grad_norm": 2.59375,
      "learning_rate": 1.7071530747083913e-05,
      "loss": 0.7872,
      "step": 592950
    },
    {
      "epoch": 2.0781769688813188,
      "grad_norm": 2.6875,
      "learning_rate": 1.707088171842021e-05,
      "loss": 0.8423,
      "step": 592960
    },
    {
      "epoch": 2.078212016388214,
      "grad_norm": 3.03125,
      "learning_rate": 1.707023268975651e-05,
      "loss": 0.7786,
      "step": 592970
    },
    {
      "epoch": 2.07824706389511,
      "grad_norm": 2.5,
      "learning_rate": 1.7069583661092807e-05,
      "loss": 0.8284,
      "step": 592980
    },
    {
      "epoch": 2.0782821114020056,
      "grad_norm": 2.90625,
      "learning_rate": 1.7068934632429108e-05,
      "loss": 0.7966,
      "step": 592990
    },
    {
      "epoch": 2.078317158908901,
      "grad_norm": 2.484375,
      "learning_rate": 1.7068285603765406e-05,
      "loss": 0.8943,
      "step": 593000
    },
    {
      "epoch": 2.0783522064157967,
      "grad_norm": 2.703125,
      "learning_rate": 1.7067636575101704e-05,
      "loss": 0.7661,
      "step": 593010
    },
    {
      "epoch": 2.0783872539226924,
      "grad_norm": 2.59375,
      "learning_rate": 1.7066987546438002e-05,
      "loss": 0.7501,
      "step": 593020
    },
    {
      "epoch": 2.0784223014295877,
      "grad_norm": 2.578125,
      "learning_rate": 1.70663385177743e-05,
      "loss": 0.8135,
      "step": 593030
    },
    {
      "epoch": 2.0784573489364835,
      "grad_norm": 2.984375,
      "learning_rate": 1.7065689489110598e-05,
      "loss": 0.7979,
      "step": 593040
    },
    {
      "epoch": 2.078492396443379,
      "grad_norm": 3.5,
      "learning_rate": 1.7065040460446896e-05,
      "loss": 0.8036,
      "step": 593050
    },
    {
      "epoch": 2.0785274439502746,
      "grad_norm": 3.265625,
      "learning_rate": 1.7064391431783194e-05,
      "loss": 0.869,
      "step": 593060
    },
    {
      "epoch": 2.0785624914571703,
      "grad_norm": 3.078125,
      "learning_rate": 1.7063742403119492e-05,
      "loss": 0.8436,
      "step": 593070
    },
    {
      "epoch": 2.0785975389640656,
      "grad_norm": 2.59375,
      "learning_rate": 1.706309337445579e-05,
      "loss": 0.7614,
      "step": 593080
    },
    {
      "epoch": 2.0786325864709614,
      "grad_norm": 3.109375,
      "learning_rate": 1.7062444345792088e-05,
      "loss": 0.8363,
      "step": 593090
    },
    {
      "epoch": 2.078667633977857,
      "grad_norm": 2.796875,
      "learning_rate": 1.7061795317128386e-05,
      "loss": 0.741,
      "step": 593100
    },
    {
      "epoch": 2.0787026814847525,
      "grad_norm": 3.28125,
      "learning_rate": 1.7061146288464684e-05,
      "loss": 0.8531,
      "step": 593110
    },
    {
      "epoch": 2.078737728991648,
      "grad_norm": 2.5,
      "learning_rate": 1.7060497259800982e-05,
      "loss": 0.7636,
      "step": 593120
    },
    {
      "epoch": 2.078772776498544,
      "grad_norm": 3.0625,
      "learning_rate": 1.7059848231137284e-05,
      "loss": 0.7364,
      "step": 593130
    },
    {
      "epoch": 2.0788078240054393,
      "grad_norm": 3.109375,
      "learning_rate": 1.7059199202473578e-05,
      "loss": 0.8054,
      "step": 593140
    },
    {
      "epoch": 2.078842871512335,
      "grad_norm": 2.8125,
      "learning_rate": 1.7058550173809876e-05,
      "loss": 0.8382,
      "step": 593150
    },
    {
      "epoch": 2.0788779190192304,
      "grad_norm": 2.78125,
      "learning_rate": 1.7057901145146174e-05,
      "loss": 0.8171,
      "step": 593160
    },
    {
      "epoch": 2.078912966526126,
      "grad_norm": 2.828125,
      "learning_rate": 1.7057252116482472e-05,
      "loss": 0.7973,
      "step": 593170
    },
    {
      "epoch": 2.078948014033022,
      "grad_norm": 3.28125,
      "learning_rate": 1.705660308781877e-05,
      "loss": 0.824,
      "step": 593180
    },
    {
      "epoch": 2.078983061539917,
      "grad_norm": 3.25,
      "learning_rate": 1.7055954059155068e-05,
      "loss": 0.8806,
      "step": 593190
    },
    {
      "epoch": 2.079018109046813,
      "grad_norm": 3.03125,
      "learning_rate": 1.7055305030491366e-05,
      "loss": 0.7972,
      "step": 593200
    },
    {
      "epoch": 2.0790531565537087,
      "grad_norm": 2.953125,
      "learning_rate": 1.7054656001827664e-05,
      "loss": 0.7675,
      "step": 593210
    },
    {
      "epoch": 2.079088204060604,
      "grad_norm": 2.953125,
      "learning_rate": 1.7054006973163962e-05,
      "loss": 0.8281,
      "step": 593220
    },
    {
      "epoch": 2.0791232515674998,
      "grad_norm": 2.9375,
      "learning_rate": 1.705335794450026e-05,
      "loss": 0.7672,
      "step": 593230
    },
    {
      "epoch": 2.0791582990743955,
      "grad_norm": 3.296875,
      "learning_rate": 1.705270891583656e-05,
      "loss": 0.8839,
      "step": 593240
    },
    {
      "epoch": 2.079193346581291,
      "grad_norm": 2.625,
      "learning_rate": 1.705205988717286e-05,
      "loss": 0.8335,
      "step": 593250
    },
    {
      "epoch": 2.0792283940881866,
      "grad_norm": 2.78125,
      "learning_rate": 1.7051410858509158e-05,
      "loss": 0.79,
      "step": 593260
    },
    {
      "epoch": 2.0792634415950824,
      "grad_norm": 2.84375,
      "learning_rate": 1.7050761829845456e-05,
      "loss": 0.8529,
      "step": 593270
    },
    {
      "epoch": 2.0792984891019777,
      "grad_norm": 3.0,
      "learning_rate": 1.7050112801181754e-05,
      "loss": 0.8246,
      "step": 593280
    },
    {
      "epoch": 2.0793335366088734,
      "grad_norm": 2.515625,
      "learning_rate": 1.704946377251805e-05,
      "loss": 0.8217,
      "step": 593290
    },
    {
      "epoch": 2.0793685841157687,
      "grad_norm": 2.890625,
      "learning_rate": 1.704881474385435e-05,
      "loss": 0.7579,
      "step": 593300
    },
    {
      "epoch": 2.0794036316226645,
      "grad_norm": 3.109375,
      "learning_rate": 1.7048165715190648e-05,
      "loss": 0.88,
      "step": 593310
    },
    {
      "epoch": 2.0794386791295603,
      "grad_norm": 2.875,
      "learning_rate": 1.7047516686526946e-05,
      "loss": 0.9269,
      "step": 593320
    },
    {
      "epoch": 2.0794737266364556,
      "grad_norm": 3.078125,
      "learning_rate": 1.704686765786324e-05,
      "loss": 0.7923,
      "step": 593330
    },
    {
      "epoch": 2.0795087741433513,
      "grad_norm": 2.71875,
      "learning_rate": 1.7046218629199538e-05,
      "loss": 0.8448,
      "step": 593340
    },
    {
      "epoch": 2.079543821650247,
      "grad_norm": 3.140625,
      "learning_rate": 1.7045569600535836e-05,
      "loss": 0.853,
      "step": 593350
    },
    {
      "epoch": 2.0795788691571424,
      "grad_norm": 3.0625,
      "learning_rate": 1.7044920571872138e-05,
      "loss": 0.8482,
      "step": 593360
    },
    {
      "epoch": 2.079613916664038,
      "grad_norm": 2.8125,
      "learning_rate": 1.7044271543208436e-05,
      "loss": 0.8143,
      "step": 593370
    },
    {
      "epoch": 2.079648964170934,
      "grad_norm": 2.828125,
      "learning_rate": 1.7043622514544734e-05,
      "loss": 0.7247,
      "step": 593380
    },
    {
      "epoch": 2.0796840116778292,
      "grad_norm": 3.34375,
      "learning_rate": 1.704297348588103e-05,
      "loss": 0.7643,
      "step": 593390
    },
    {
      "epoch": 2.079719059184725,
      "grad_norm": 3.109375,
      "learning_rate": 1.704232445721733e-05,
      "loss": 0.791,
      "step": 593400
    },
    {
      "epoch": 2.0797541066916203,
      "grad_norm": 2.875,
      "learning_rate": 1.7041675428553628e-05,
      "loss": 0.8966,
      "step": 593410
    },
    {
      "epoch": 2.079789154198516,
      "grad_norm": 3.15625,
      "learning_rate": 1.7041026399889926e-05,
      "loss": 0.852,
      "step": 593420
    },
    {
      "epoch": 2.079824201705412,
      "grad_norm": 2.71875,
      "learning_rate": 1.7040377371226224e-05,
      "loss": 0.7652,
      "step": 593430
    },
    {
      "epoch": 2.079859249212307,
      "grad_norm": 2.953125,
      "learning_rate": 1.703972834256252e-05,
      "loss": 0.7895,
      "step": 593440
    },
    {
      "epoch": 2.079894296719203,
      "grad_norm": 2.890625,
      "learning_rate": 1.703907931389882e-05,
      "loss": 0.7942,
      "step": 593450
    },
    {
      "epoch": 2.0799293442260987,
      "grad_norm": 2.65625,
      "learning_rate": 1.7038430285235118e-05,
      "loss": 0.7863,
      "step": 593460
    },
    {
      "epoch": 2.079964391732994,
      "grad_norm": 3.09375,
      "learning_rate": 1.7037781256571416e-05,
      "loss": 0.7352,
      "step": 593470
    },
    {
      "epoch": 2.0799994392398897,
      "grad_norm": 3.171875,
      "learning_rate": 1.7037132227907714e-05,
      "loss": 0.8322,
      "step": 593480
    },
    {
      "epoch": 2.0800344867467855,
      "grad_norm": 2.640625,
      "learning_rate": 1.7036483199244015e-05,
      "loss": 0.7912,
      "step": 593490
    },
    {
      "epoch": 2.080069534253681,
      "grad_norm": 3.09375,
      "learning_rate": 1.7035834170580313e-05,
      "loss": 0.7908,
      "step": 593500
    },
    {
      "epoch": 2.0801045817605766,
      "grad_norm": 3.03125,
      "learning_rate": 1.703518514191661e-05,
      "loss": 0.7726,
      "step": 593510
    },
    {
      "epoch": 2.080139629267472,
      "grad_norm": 2.921875,
      "learning_rate": 1.7034536113252906e-05,
      "loss": 0.828,
      "step": 593520
    },
    {
      "epoch": 2.0801746767743676,
      "grad_norm": 3.046875,
      "learning_rate": 1.7033887084589204e-05,
      "loss": 0.8394,
      "step": 593530
    },
    {
      "epoch": 2.0802097242812634,
      "grad_norm": 2.75,
      "learning_rate": 1.70332380559255e-05,
      "loss": 0.7821,
      "step": 593540
    },
    {
      "epoch": 2.0802447717881587,
      "grad_norm": 3.046875,
      "learning_rate": 1.70325890272618e-05,
      "loss": 0.8096,
      "step": 593550
    },
    {
      "epoch": 2.0802798192950545,
      "grad_norm": 2.484375,
      "learning_rate": 1.7031939998598098e-05,
      "loss": 0.7651,
      "step": 593560
    },
    {
      "epoch": 2.08031486680195,
      "grad_norm": 3.171875,
      "learning_rate": 1.7031290969934396e-05,
      "loss": 0.8233,
      "step": 593570
    },
    {
      "epoch": 2.0803499143088455,
      "grad_norm": 2.703125,
      "learning_rate": 1.7030641941270694e-05,
      "loss": 0.7737,
      "step": 593580
    },
    {
      "epoch": 2.0803849618157413,
      "grad_norm": 2.5625,
      "learning_rate": 1.702999291260699e-05,
      "loss": 0.8818,
      "step": 593590
    },
    {
      "epoch": 2.080420009322637,
      "grad_norm": 3.1875,
      "learning_rate": 1.702934388394329e-05,
      "loss": 0.9252,
      "step": 593600
    },
    {
      "epoch": 2.0804550568295324,
      "grad_norm": 2.734375,
      "learning_rate": 1.702869485527959e-05,
      "loss": 0.8484,
      "step": 593610
    },
    {
      "epoch": 2.080490104336428,
      "grad_norm": 2.9375,
      "learning_rate": 1.702804582661589e-05,
      "loss": 0.9089,
      "step": 593620
    },
    {
      "epoch": 2.0805251518433234,
      "grad_norm": 2.765625,
      "learning_rate": 1.7027396797952187e-05,
      "loss": 0.7214,
      "step": 593630
    },
    {
      "epoch": 2.080560199350219,
      "grad_norm": 3.265625,
      "learning_rate": 1.7026747769288485e-05,
      "loss": 0.8414,
      "step": 593640
    },
    {
      "epoch": 2.080595246857115,
      "grad_norm": 2.84375,
      "learning_rate": 1.7026098740624783e-05,
      "loss": 0.831,
      "step": 593650
    },
    {
      "epoch": 2.0806302943640103,
      "grad_norm": 3.03125,
      "learning_rate": 1.702544971196108e-05,
      "loss": 0.8523,
      "step": 593660
    },
    {
      "epoch": 2.080665341870906,
      "grad_norm": 3.0625,
      "learning_rate": 1.702480068329738e-05,
      "loss": 0.7934,
      "step": 593670
    },
    {
      "epoch": 2.0807003893778018,
      "grad_norm": 2.890625,
      "learning_rate": 1.7024151654633677e-05,
      "loss": 0.7316,
      "step": 593680
    },
    {
      "epoch": 2.080735436884697,
      "grad_norm": 2.40625,
      "learning_rate": 1.7023502625969975e-05,
      "loss": 0.7676,
      "step": 593690
    },
    {
      "epoch": 2.080770484391593,
      "grad_norm": 2.96875,
      "learning_rate": 1.7022853597306273e-05,
      "loss": 0.8034,
      "step": 593700
    },
    {
      "epoch": 2.0808055318984886,
      "grad_norm": 3.359375,
      "learning_rate": 1.7022204568642568e-05,
      "loss": 0.8788,
      "step": 593710
    },
    {
      "epoch": 2.080840579405384,
      "grad_norm": 2.640625,
      "learning_rate": 1.702155553997887e-05,
      "loss": 0.8635,
      "step": 593720
    },
    {
      "epoch": 2.0808756269122797,
      "grad_norm": 3.390625,
      "learning_rate": 1.7020906511315167e-05,
      "loss": 0.82,
      "step": 593730
    },
    {
      "epoch": 2.080910674419175,
      "grad_norm": 3.828125,
      "learning_rate": 1.7020257482651465e-05,
      "loss": 0.8117,
      "step": 593740
    },
    {
      "epoch": 2.0809457219260707,
      "grad_norm": 3.53125,
      "learning_rate": 1.7019608453987763e-05,
      "loss": 0.731,
      "step": 593750
    },
    {
      "epoch": 2.0809807694329665,
      "grad_norm": 2.640625,
      "learning_rate": 1.701895942532406e-05,
      "loss": 0.8483,
      "step": 593760
    },
    {
      "epoch": 2.081015816939862,
      "grad_norm": 3.171875,
      "learning_rate": 1.701831039666036e-05,
      "loss": 0.8329,
      "step": 593770
    },
    {
      "epoch": 2.0810508644467576,
      "grad_norm": 2.90625,
      "learning_rate": 1.7017661367996657e-05,
      "loss": 0.8008,
      "step": 593780
    },
    {
      "epoch": 2.0810859119536533,
      "grad_norm": 2.84375,
      "learning_rate": 1.7017012339332955e-05,
      "loss": 0.7145,
      "step": 593790
    },
    {
      "epoch": 2.0811209594605486,
      "grad_norm": 2.8125,
      "learning_rate": 1.7016363310669253e-05,
      "loss": 0.8363,
      "step": 593800
    },
    {
      "epoch": 2.0811560069674444,
      "grad_norm": 2.703125,
      "learning_rate": 1.701571428200555e-05,
      "loss": 0.8641,
      "step": 593810
    },
    {
      "epoch": 2.08119105447434,
      "grad_norm": 3.03125,
      "learning_rate": 1.701506525334185e-05,
      "loss": 0.8209,
      "step": 593820
    },
    {
      "epoch": 2.0812261019812355,
      "grad_norm": 3.0,
      "learning_rate": 1.7014416224678147e-05,
      "loss": 0.8101,
      "step": 593830
    },
    {
      "epoch": 2.0812611494881312,
      "grad_norm": 2.84375,
      "learning_rate": 1.7013767196014445e-05,
      "loss": 0.7771,
      "step": 593840
    },
    {
      "epoch": 2.0812961969950265,
      "grad_norm": 2.890625,
      "learning_rate": 1.7013118167350743e-05,
      "loss": 0.8889,
      "step": 593850
    },
    {
      "epoch": 2.0813312445019223,
      "grad_norm": 2.4375,
      "learning_rate": 1.7012469138687044e-05,
      "loss": 0.7738,
      "step": 593860
    },
    {
      "epoch": 2.081366292008818,
      "grad_norm": 3.109375,
      "learning_rate": 1.7011820110023342e-05,
      "loss": 0.7616,
      "step": 593870
    },
    {
      "epoch": 2.0814013395157134,
      "grad_norm": 3.171875,
      "learning_rate": 1.701117108135964e-05,
      "loss": 0.8538,
      "step": 593880
    },
    {
      "epoch": 2.081436387022609,
      "grad_norm": 3.234375,
      "learning_rate": 1.701052205269594e-05,
      "loss": 0.8398,
      "step": 593890
    },
    {
      "epoch": 2.081471434529505,
      "grad_norm": 2.671875,
      "learning_rate": 1.7009873024032233e-05,
      "loss": 0.7636,
      "step": 593900
    },
    {
      "epoch": 2.0815064820364,
      "grad_norm": 3.484375,
      "learning_rate": 1.700922399536853e-05,
      "loss": 0.8274,
      "step": 593910
    },
    {
      "epoch": 2.081541529543296,
      "grad_norm": 2.953125,
      "learning_rate": 1.700857496670483e-05,
      "loss": 0.7986,
      "step": 593920
    },
    {
      "epoch": 2.0815765770501917,
      "grad_norm": 3.0,
      "learning_rate": 1.7007925938041127e-05,
      "loss": 0.8037,
      "step": 593930
    },
    {
      "epoch": 2.081611624557087,
      "grad_norm": 3.328125,
      "learning_rate": 1.7007276909377425e-05,
      "loss": 0.8683,
      "step": 593940
    },
    {
      "epoch": 2.081646672063983,
      "grad_norm": 2.71875,
      "learning_rate": 1.7006627880713723e-05,
      "loss": 0.7964,
      "step": 593950
    },
    {
      "epoch": 2.081681719570878,
      "grad_norm": 3.171875,
      "learning_rate": 1.700597885205002e-05,
      "loss": 0.8531,
      "step": 593960
    },
    {
      "epoch": 2.081716767077774,
      "grad_norm": 3.015625,
      "learning_rate": 1.700532982338632e-05,
      "loss": 0.8882,
      "step": 593970
    },
    {
      "epoch": 2.0817518145846696,
      "grad_norm": 2.9375,
      "learning_rate": 1.700468079472262e-05,
      "loss": 0.8193,
      "step": 593980
    },
    {
      "epoch": 2.081786862091565,
      "grad_norm": 3.40625,
      "learning_rate": 1.700403176605892e-05,
      "loss": 0.7679,
      "step": 593990
    },
    {
      "epoch": 2.0818219095984607,
      "grad_norm": 2.8125,
      "learning_rate": 1.7003382737395216e-05,
      "loss": 0.806,
      "step": 594000
    },
    {
      "epoch": 2.0818569571053565,
      "grad_norm": 2.546875,
      "learning_rate": 1.7002733708731514e-05,
      "loss": 0.7548,
      "step": 594010
    },
    {
      "epoch": 2.0818920046122518,
      "grad_norm": 3.515625,
      "learning_rate": 1.7002084680067812e-05,
      "loss": 0.8216,
      "step": 594020
    },
    {
      "epoch": 2.0819270521191475,
      "grad_norm": 2.9375,
      "learning_rate": 1.700143565140411e-05,
      "loss": 0.7461,
      "step": 594030
    },
    {
      "epoch": 2.0819620996260433,
      "grad_norm": 3.1875,
      "learning_rate": 1.700078662274041e-05,
      "loss": 0.8403,
      "step": 594040
    },
    {
      "epoch": 2.0819971471329386,
      "grad_norm": 2.828125,
      "learning_rate": 1.7000137594076706e-05,
      "loss": 0.8037,
      "step": 594050
    },
    {
      "epoch": 2.0820321946398344,
      "grad_norm": 2.484375,
      "learning_rate": 1.6999488565413004e-05,
      "loss": 0.8262,
      "step": 594060
    },
    {
      "epoch": 2.0820672421467297,
      "grad_norm": 2.921875,
      "learning_rate": 1.6998839536749302e-05,
      "loss": 0.7591,
      "step": 594070
    },
    {
      "epoch": 2.0821022896536254,
      "grad_norm": 2.953125,
      "learning_rate": 1.6998190508085597e-05,
      "loss": 0.897,
      "step": 594080
    },
    {
      "epoch": 2.082137337160521,
      "grad_norm": 2.6875,
      "learning_rate": 1.69975414794219e-05,
      "loss": 0.7523,
      "step": 594090
    },
    {
      "epoch": 2.0821723846674165,
      "grad_norm": 2.6875,
      "learning_rate": 1.6996892450758196e-05,
      "loss": 0.8267,
      "step": 594100
    },
    {
      "epoch": 2.0822074321743123,
      "grad_norm": 3.125,
      "learning_rate": 1.6996243422094494e-05,
      "loss": 0.8216,
      "step": 594110
    },
    {
      "epoch": 2.082242479681208,
      "grad_norm": 2.890625,
      "learning_rate": 1.6995594393430792e-05,
      "loss": 0.8552,
      "step": 594120
    },
    {
      "epoch": 2.0822775271881033,
      "grad_norm": 2.953125,
      "learning_rate": 1.699494536476709e-05,
      "loss": 0.8318,
      "step": 594130
    },
    {
      "epoch": 2.082312574694999,
      "grad_norm": 3.09375,
      "learning_rate": 1.699429633610339e-05,
      "loss": 0.8066,
      "step": 594140
    },
    {
      "epoch": 2.082347622201895,
      "grad_norm": 2.390625,
      "learning_rate": 1.6993647307439686e-05,
      "loss": 0.7973,
      "step": 594150
    },
    {
      "epoch": 2.08238266970879,
      "grad_norm": 3.0,
      "learning_rate": 1.6992998278775984e-05,
      "loss": 0.83,
      "step": 594160
    },
    {
      "epoch": 2.082417717215686,
      "grad_norm": 2.78125,
      "learning_rate": 1.6992349250112282e-05,
      "loss": 0.8129,
      "step": 594170
    },
    {
      "epoch": 2.0824527647225817,
      "grad_norm": 2.921875,
      "learning_rate": 1.699170022144858e-05,
      "loss": 0.7427,
      "step": 594180
    },
    {
      "epoch": 2.082487812229477,
      "grad_norm": 2.9375,
      "learning_rate": 1.699105119278488e-05,
      "loss": 0.7712,
      "step": 594190
    },
    {
      "epoch": 2.0825228597363727,
      "grad_norm": 2.828125,
      "learning_rate": 1.6990402164121176e-05,
      "loss": 0.7729,
      "step": 594200
    },
    {
      "epoch": 2.082557907243268,
      "grad_norm": 3.078125,
      "learning_rate": 1.6989753135457474e-05,
      "loss": 0.8581,
      "step": 594210
    },
    {
      "epoch": 2.082592954750164,
      "grad_norm": 2.9375,
      "learning_rate": 1.6989104106793772e-05,
      "loss": 0.8864,
      "step": 594220
    },
    {
      "epoch": 2.0826280022570596,
      "grad_norm": 3.109375,
      "learning_rate": 1.6988455078130074e-05,
      "loss": 0.8052,
      "step": 594230
    },
    {
      "epoch": 2.082663049763955,
      "grad_norm": 2.96875,
      "learning_rate": 1.6987806049466372e-05,
      "loss": 0.8764,
      "step": 594240
    },
    {
      "epoch": 2.0826980972708506,
      "grad_norm": 2.75,
      "learning_rate": 1.698715702080267e-05,
      "loss": 0.759,
      "step": 594250
    },
    {
      "epoch": 2.0827331447777464,
      "grad_norm": 3.125,
      "learning_rate": 1.6986507992138968e-05,
      "loss": 0.786,
      "step": 594260
    },
    {
      "epoch": 2.0827681922846417,
      "grad_norm": 2.703125,
      "learning_rate": 1.6985858963475262e-05,
      "loss": 0.7437,
      "step": 594270
    },
    {
      "epoch": 2.0828032397915375,
      "grad_norm": 2.640625,
      "learning_rate": 1.698520993481156e-05,
      "loss": 0.7977,
      "step": 594280
    },
    {
      "epoch": 2.0828382872984332,
      "grad_norm": 2.984375,
      "learning_rate": 1.698456090614786e-05,
      "loss": 0.8282,
      "step": 594290
    },
    {
      "epoch": 2.0828733348053285,
      "grad_norm": 3.0,
      "learning_rate": 1.6983911877484156e-05,
      "loss": 0.8164,
      "step": 594300
    },
    {
      "epoch": 2.0829083823122243,
      "grad_norm": 2.734375,
      "learning_rate": 1.6983262848820454e-05,
      "loss": 0.8465,
      "step": 594310
    },
    {
      "epoch": 2.0829434298191196,
      "grad_norm": 3.140625,
      "learning_rate": 1.6982613820156752e-05,
      "loss": 0.816,
      "step": 594320
    },
    {
      "epoch": 2.0829784773260154,
      "grad_norm": 2.984375,
      "learning_rate": 1.698196479149305e-05,
      "loss": 0.7584,
      "step": 594330
    },
    {
      "epoch": 2.083013524832911,
      "grad_norm": 2.359375,
      "learning_rate": 1.6981315762829352e-05,
      "loss": 0.7901,
      "step": 594340
    },
    {
      "epoch": 2.0830485723398064,
      "grad_norm": 2.59375,
      "learning_rate": 1.698066673416565e-05,
      "loss": 0.8456,
      "step": 594350
    },
    {
      "epoch": 2.083083619846702,
      "grad_norm": 2.765625,
      "learning_rate": 1.6980017705501948e-05,
      "loss": 0.806,
      "step": 594360
    },
    {
      "epoch": 2.083118667353598,
      "grad_norm": 2.796875,
      "learning_rate": 1.6979368676838246e-05,
      "loss": 0.8032,
      "step": 594370
    },
    {
      "epoch": 2.0831537148604933,
      "grad_norm": 3.328125,
      "learning_rate": 1.6978719648174544e-05,
      "loss": 0.8734,
      "step": 594380
    },
    {
      "epoch": 2.083188762367389,
      "grad_norm": 3.0,
      "learning_rate": 1.6978070619510842e-05,
      "loss": 0.8548,
      "step": 594390
    },
    {
      "epoch": 2.083223809874285,
      "grad_norm": 3.0,
      "learning_rate": 1.697742159084714e-05,
      "loss": 0.8227,
      "step": 594400
    },
    {
      "epoch": 2.08325885738118,
      "grad_norm": 2.890625,
      "learning_rate": 1.6976772562183438e-05,
      "loss": 0.7824,
      "step": 594410
    },
    {
      "epoch": 2.083293904888076,
      "grad_norm": 2.5625,
      "learning_rate": 1.6976123533519736e-05,
      "loss": 0.8101,
      "step": 594420
    },
    {
      "epoch": 2.083328952394971,
      "grad_norm": 2.34375,
      "learning_rate": 1.6975474504856034e-05,
      "loss": 0.7778,
      "step": 594430
    },
    {
      "epoch": 2.083363999901867,
      "grad_norm": 3.390625,
      "learning_rate": 1.6974825476192332e-05,
      "loss": 0.7544,
      "step": 594440
    },
    {
      "epoch": 2.0833990474087627,
      "grad_norm": 3.34375,
      "learning_rate": 1.697417644752863e-05,
      "loss": 0.8195,
      "step": 594450
    },
    {
      "epoch": 2.083434094915658,
      "grad_norm": 2.96875,
      "learning_rate": 1.6973527418864928e-05,
      "loss": 0.8175,
      "step": 594460
    },
    {
      "epoch": 2.0834691424225538,
      "grad_norm": 3.0625,
      "learning_rate": 1.6972878390201226e-05,
      "loss": 0.8135,
      "step": 594470
    },
    {
      "epoch": 2.0835041899294495,
      "grad_norm": 2.953125,
      "learning_rate": 1.6972229361537524e-05,
      "loss": 0.8382,
      "step": 594480
    },
    {
      "epoch": 2.083539237436345,
      "grad_norm": 2.6875,
      "learning_rate": 1.6971580332873822e-05,
      "loss": 0.8763,
      "step": 594490
    },
    {
      "epoch": 2.0835742849432406,
      "grad_norm": 3.015625,
      "learning_rate": 1.697093130421012e-05,
      "loss": 0.8639,
      "step": 594500
    },
    {
      "epoch": 2.0836093324501364,
      "grad_norm": 2.671875,
      "learning_rate": 1.6970282275546418e-05,
      "loss": 0.7786,
      "step": 594510
    },
    {
      "epoch": 2.0836443799570317,
      "grad_norm": 2.71875,
      "learning_rate": 1.6969633246882716e-05,
      "loss": 0.7818,
      "step": 594520
    },
    {
      "epoch": 2.0836794274639274,
      "grad_norm": 2.984375,
      "learning_rate": 1.6968984218219014e-05,
      "loss": 0.8606,
      "step": 594530
    },
    {
      "epoch": 2.0837144749708227,
      "grad_norm": 2.515625,
      "learning_rate": 1.6968335189555312e-05,
      "loss": 0.8639,
      "step": 594540
    },
    {
      "epoch": 2.0837495224777185,
      "grad_norm": 2.578125,
      "learning_rate": 1.696768616089161e-05,
      "loss": 0.7843,
      "step": 594550
    },
    {
      "epoch": 2.0837845699846143,
      "grad_norm": 2.59375,
      "learning_rate": 1.6967037132227908e-05,
      "loss": 0.8172,
      "step": 594560
    },
    {
      "epoch": 2.0838196174915096,
      "grad_norm": 2.96875,
      "learning_rate": 1.6966388103564206e-05,
      "loss": 0.844,
      "step": 594570
    },
    {
      "epoch": 2.0838546649984053,
      "grad_norm": 2.65625,
      "learning_rate": 1.6965739074900504e-05,
      "loss": 0.8207,
      "step": 594580
    },
    {
      "epoch": 2.083889712505301,
      "grad_norm": 2.875,
      "learning_rate": 1.6965090046236805e-05,
      "loss": 0.8448,
      "step": 594590
    },
    {
      "epoch": 2.0839247600121964,
      "grad_norm": 2.796875,
      "learning_rate": 1.6964441017573103e-05,
      "loss": 0.7533,
      "step": 594600
    },
    {
      "epoch": 2.083959807519092,
      "grad_norm": 2.671875,
      "learning_rate": 1.69637919889094e-05,
      "loss": 0.828,
      "step": 594610
    },
    {
      "epoch": 2.083994855025988,
      "grad_norm": 3.40625,
      "learning_rate": 1.69631429602457e-05,
      "loss": 0.8306,
      "step": 594620
    },
    {
      "epoch": 2.0840299025328832,
      "grad_norm": 3.46875,
      "learning_rate": 1.6962493931581997e-05,
      "loss": 0.7884,
      "step": 594630
    },
    {
      "epoch": 2.084064950039779,
      "grad_norm": 3.328125,
      "learning_rate": 1.6961844902918295e-05,
      "loss": 0.8728,
      "step": 594640
    },
    {
      "epoch": 2.0840999975466747,
      "grad_norm": 3.015625,
      "learning_rate": 1.696119587425459e-05,
      "loss": 0.7549,
      "step": 594650
    },
    {
      "epoch": 2.08413504505357,
      "grad_norm": 3.0,
      "learning_rate": 1.6960546845590888e-05,
      "loss": 0.8264,
      "step": 594660
    },
    {
      "epoch": 2.084170092560466,
      "grad_norm": 2.84375,
      "learning_rate": 1.6959897816927186e-05,
      "loss": 0.8165,
      "step": 594670
    },
    {
      "epoch": 2.084205140067361,
      "grad_norm": 2.953125,
      "learning_rate": 1.6959248788263484e-05,
      "loss": 0.9049,
      "step": 594680
    },
    {
      "epoch": 2.084240187574257,
      "grad_norm": 3.0625,
      "learning_rate": 1.6958599759599782e-05,
      "loss": 0.8432,
      "step": 594690
    },
    {
      "epoch": 2.0842752350811526,
      "grad_norm": 3.140625,
      "learning_rate": 1.695795073093608e-05,
      "loss": 0.7975,
      "step": 594700
    },
    {
      "epoch": 2.084310282588048,
      "grad_norm": 2.578125,
      "learning_rate": 1.695730170227238e-05,
      "loss": 0.7439,
      "step": 594710
    },
    {
      "epoch": 2.0843453300949437,
      "grad_norm": 2.21875,
      "learning_rate": 1.695665267360868e-05,
      "loss": 0.7507,
      "step": 594720
    },
    {
      "epoch": 2.0843803776018395,
      "grad_norm": 2.921875,
      "learning_rate": 1.6956003644944977e-05,
      "loss": 0.8109,
      "step": 594730
    },
    {
      "epoch": 2.084415425108735,
      "grad_norm": 3.09375,
      "learning_rate": 1.6955354616281275e-05,
      "loss": 0.8103,
      "step": 594740
    },
    {
      "epoch": 2.0844504726156305,
      "grad_norm": 2.921875,
      "learning_rate": 1.6954705587617573e-05,
      "loss": 0.7897,
      "step": 594750
    },
    {
      "epoch": 2.0844855201225263,
      "grad_norm": 2.421875,
      "learning_rate": 1.695405655895387e-05,
      "loss": 0.8445,
      "step": 594760
    },
    {
      "epoch": 2.0845205676294216,
      "grad_norm": 2.953125,
      "learning_rate": 1.695340753029017e-05,
      "loss": 0.8208,
      "step": 594770
    },
    {
      "epoch": 2.0845556151363174,
      "grad_norm": 2.8125,
      "learning_rate": 1.6952758501626467e-05,
      "loss": 0.8991,
      "step": 594780
    },
    {
      "epoch": 2.0845906626432127,
      "grad_norm": 3.203125,
      "learning_rate": 1.6952109472962765e-05,
      "loss": 0.8693,
      "step": 594790
    },
    {
      "epoch": 2.0846257101501084,
      "grad_norm": 3.015625,
      "learning_rate": 1.6951460444299063e-05,
      "loss": 0.828,
      "step": 594800
    },
    {
      "epoch": 2.084660757657004,
      "grad_norm": 2.484375,
      "learning_rate": 1.695081141563536e-05,
      "loss": 0.8503,
      "step": 594810
    },
    {
      "epoch": 2.0846958051638995,
      "grad_norm": 3.140625,
      "learning_rate": 1.695016238697166e-05,
      "loss": 0.8017,
      "step": 594820
    },
    {
      "epoch": 2.0847308526707953,
      "grad_norm": 3.140625,
      "learning_rate": 1.6949513358307957e-05,
      "loss": 0.776,
      "step": 594830
    },
    {
      "epoch": 2.084765900177691,
      "grad_norm": 3.09375,
      "learning_rate": 1.6948864329644255e-05,
      "loss": 0.8545,
      "step": 594840
    },
    {
      "epoch": 2.0848009476845863,
      "grad_norm": 3.03125,
      "learning_rate": 1.6948215300980553e-05,
      "loss": 0.8534,
      "step": 594850
    },
    {
      "epoch": 2.084835995191482,
      "grad_norm": 2.71875,
      "learning_rate": 1.694756627231685e-05,
      "loss": 0.8287,
      "step": 594860
    },
    {
      "epoch": 2.084871042698378,
      "grad_norm": 2.921875,
      "learning_rate": 1.694691724365315e-05,
      "loss": 0.7431,
      "step": 594870
    },
    {
      "epoch": 2.084906090205273,
      "grad_norm": 2.5,
      "learning_rate": 1.6946268214989447e-05,
      "loss": 0.7897,
      "step": 594880
    },
    {
      "epoch": 2.084941137712169,
      "grad_norm": 2.859375,
      "learning_rate": 1.6945619186325745e-05,
      "loss": 0.9091,
      "step": 594890
    },
    {
      "epoch": 2.0849761852190642,
      "grad_norm": 3.1875,
      "learning_rate": 1.6944970157662043e-05,
      "loss": 0.7842,
      "step": 594900
    },
    {
      "epoch": 2.08501123272596,
      "grad_norm": 2.640625,
      "learning_rate": 1.694432112899834e-05,
      "loss": 0.8514,
      "step": 594910
    },
    {
      "epoch": 2.0850462802328558,
      "grad_norm": 2.875,
      "learning_rate": 1.694367210033464e-05,
      "loss": 0.852,
      "step": 594920
    },
    {
      "epoch": 2.085081327739751,
      "grad_norm": 2.671875,
      "learning_rate": 1.6943023071670937e-05,
      "loss": 0.7386,
      "step": 594930
    },
    {
      "epoch": 2.085116375246647,
      "grad_norm": 3.015625,
      "learning_rate": 1.6942374043007235e-05,
      "loss": 0.7951,
      "step": 594940
    },
    {
      "epoch": 2.0851514227535426,
      "grad_norm": 2.65625,
      "learning_rate": 1.6941725014343533e-05,
      "loss": 0.8249,
      "step": 594950
    },
    {
      "epoch": 2.085186470260438,
      "grad_norm": 2.9375,
      "learning_rate": 1.6941075985679834e-05,
      "loss": 0.7636,
      "step": 594960
    },
    {
      "epoch": 2.0852215177673337,
      "grad_norm": 2.5625,
      "learning_rate": 1.6940426957016132e-05,
      "loss": 0.8261,
      "step": 594970
    },
    {
      "epoch": 2.0852565652742294,
      "grad_norm": 3.609375,
      "learning_rate": 1.693977792835243e-05,
      "loss": 0.8069,
      "step": 594980
    },
    {
      "epoch": 2.0852916127811247,
      "grad_norm": 2.984375,
      "learning_rate": 1.693912889968873e-05,
      "loss": 0.8071,
      "step": 594990
    },
    {
      "epoch": 2.0853266602880205,
      "grad_norm": 2.8125,
      "learning_rate": 1.6938479871025026e-05,
      "loss": 0.7858,
      "step": 595000
    },
    {
      "epoch": 2.0853266602880205,
      "eval_loss": 0.7690289616584778,
      "eval_runtime": 554.2828,
      "eval_samples_per_second": 686.357,
      "eval_steps_per_second": 57.196,
      "step": 595000
    },
    {
      "epoch": 2.085361707794916,
      "grad_norm": 3.53125,
      "learning_rate": 1.6937830842361324e-05,
      "loss": 0.7431,
      "step": 595010
    },
    {
      "epoch": 2.0853967553018116,
      "grad_norm": 2.734375,
      "learning_rate": 1.693718181369762e-05,
      "loss": 0.7149,
      "step": 595020
    },
    {
      "epoch": 2.0854318028087073,
      "grad_norm": 3.0625,
      "learning_rate": 1.6936532785033917e-05,
      "loss": 0.8377,
      "step": 595030
    },
    {
      "epoch": 2.0854668503156026,
      "grad_norm": 2.671875,
      "learning_rate": 1.6935883756370215e-05,
      "loss": 0.9202,
      "step": 595040
    },
    {
      "epoch": 2.0855018978224984,
      "grad_norm": 3.0625,
      "learning_rate": 1.6935234727706513e-05,
      "loss": 0.7808,
      "step": 595050
    },
    {
      "epoch": 2.085536945329394,
      "grad_norm": 2.953125,
      "learning_rate": 1.693458569904281e-05,
      "loss": 0.9317,
      "step": 595060
    },
    {
      "epoch": 2.0855719928362895,
      "grad_norm": 3.09375,
      "learning_rate": 1.6933936670379112e-05,
      "loss": 0.7592,
      "step": 595070
    },
    {
      "epoch": 2.0856070403431852,
      "grad_norm": 3.25,
      "learning_rate": 1.693328764171541e-05,
      "loss": 0.8283,
      "step": 595080
    },
    {
      "epoch": 2.085642087850081,
      "grad_norm": 2.390625,
      "learning_rate": 1.693263861305171e-05,
      "loss": 0.8207,
      "step": 595090
    },
    {
      "epoch": 2.0856771353569763,
      "grad_norm": 3.234375,
      "learning_rate": 1.6931989584388006e-05,
      "loss": 0.7698,
      "step": 595100
    },
    {
      "epoch": 2.085712182863872,
      "grad_norm": 2.78125,
      "learning_rate": 1.6931340555724304e-05,
      "loss": 0.8453,
      "step": 595110
    },
    {
      "epoch": 2.0857472303707674,
      "grad_norm": 3.28125,
      "learning_rate": 1.6930691527060602e-05,
      "loss": 0.8374,
      "step": 595120
    },
    {
      "epoch": 2.085782277877663,
      "grad_norm": 2.765625,
      "learning_rate": 1.69300424983969e-05,
      "loss": 0.7422,
      "step": 595130
    },
    {
      "epoch": 2.085817325384559,
      "grad_norm": 2.71875,
      "learning_rate": 1.69293934697332e-05,
      "loss": 0.8411,
      "step": 595140
    },
    {
      "epoch": 2.085852372891454,
      "grad_norm": 2.984375,
      "learning_rate": 1.6928744441069496e-05,
      "loss": 0.8499,
      "step": 595150
    },
    {
      "epoch": 2.08588742039835,
      "grad_norm": 3.359375,
      "learning_rate": 1.6928095412405794e-05,
      "loss": 0.8424,
      "step": 595160
    },
    {
      "epoch": 2.0859224679052457,
      "grad_norm": 3.671875,
      "learning_rate": 1.6927446383742092e-05,
      "loss": 0.7404,
      "step": 595170
    },
    {
      "epoch": 2.085957515412141,
      "grad_norm": 3.625,
      "learning_rate": 1.692679735507839e-05,
      "loss": 0.7888,
      "step": 595180
    },
    {
      "epoch": 2.085992562919037,
      "grad_norm": 3.25,
      "learning_rate": 1.692614832641469e-05,
      "loss": 0.7865,
      "step": 595190
    },
    {
      "epoch": 2.0860276104259325,
      "grad_norm": 2.8125,
      "learning_rate": 1.6925499297750986e-05,
      "loss": 0.745,
      "step": 595200
    },
    {
      "epoch": 2.086062657932828,
      "grad_norm": 2.640625,
      "learning_rate": 1.6924850269087284e-05,
      "loss": 0.7474,
      "step": 595210
    },
    {
      "epoch": 2.0860977054397236,
      "grad_norm": 3.453125,
      "learning_rate": 1.6924201240423582e-05,
      "loss": 0.8319,
      "step": 595220
    },
    {
      "epoch": 2.086132752946619,
      "grad_norm": 2.578125,
      "learning_rate": 1.692355221175988e-05,
      "loss": 0.7982,
      "step": 595230
    },
    {
      "epoch": 2.0861678004535147,
      "grad_norm": 2.859375,
      "learning_rate": 1.692290318309618e-05,
      "loss": 0.8322,
      "step": 595240
    },
    {
      "epoch": 2.0862028479604104,
      "grad_norm": 2.953125,
      "learning_rate": 1.6922254154432476e-05,
      "loss": 0.8397,
      "step": 595250
    },
    {
      "epoch": 2.0862378954673058,
      "grad_norm": 2.46875,
      "learning_rate": 1.6921605125768774e-05,
      "loss": 0.7114,
      "step": 595260
    },
    {
      "epoch": 2.0862729429742015,
      "grad_norm": 2.65625,
      "learning_rate": 1.6920956097105072e-05,
      "loss": 0.8409,
      "step": 595270
    },
    {
      "epoch": 2.0863079904810973,
      "grad_norm": 2.890625,
      "learning_rate": 1.692030706844137e-05,
      "loss": 0.9337,
      "step": 595280
    },
    {
      "epoch": 2.0863430379879926,
      "grad_norm": 3.09375,
      "learning_rate": 1.691965803977767e-05,
      "loss": 0.7963,
      "step": 595290
    },
    {
      "epoch": 2.0863780854948883,
      "grad_norm": 2.84375,
      "learning_rate": 1.6919009011113966e-05,
      "loss": 0.8114,
      "step": 595300
    },
    {
      "epoch": 2.086413133001784,
      "grad_norm": 3.09375,
      "learning_rate": 1.6918359982450264e-05,
      "loss": 0.8216,
      "step": 595310
    },
    {
      "epoch": 2.0864481805086794,
      "grad_norm": 2.796875,
      "learning_rate": 1.6917710953786562e-05,
      "loss": 0.8405,
      "step": 595320
    },
    {
      "epoch": 2.086483228015575,
      "grad_norm": 2.90625,
      "learning_rate": 1.6917061925122864e-05,
      "loss": 0.7856,
      "step": 595330
    },
    {
      "epoch": 2.0865182755224705,
      "grad_norm": 3.09375,
      "learning_rate": 1.6916412896459162e-05,
      "loss": 0.8334,
      "step": 595340
    },
    {
      "epoch": 2.0865533230293662,
      "grad_norm": 2.75,
      "learning_rate": 1.691576386779546e-05,
      "loss": 0.844,
      "step": 595350
    },
    {
      "epoch": 2.086588370536262,
      "grad_norm": 2.953125,
      "learning_rate": 1.6915114839131758e-05,
      "loss": 0.8848,
      "step": 595360
    },
    {
      "epoch": 2.0866234180431573,
      "grad_norm": 2.96875,
      "learning_rate": 1.6914465810468056e-05,
      "loss": 0.7447,
      "step": 595370
    },
    {
      "epoch": 2.086658465550053,
      "grad_norm": 2.40625,
      "learning_rate": 1.6913816781804354e-05,
      "loss": 0.7754,
      "step": 595380
    },
    {
      "epoch": 2.086693513056949,
      "grad_norm": 3.171875,
      "learning_rate": 1.6913167753140652e-05,
      "loss": 0.8117,
      "step": 595390
    },
    {
      "epoch": 2.086728560563844,
      "grad_norm": 2.578125,
      "learning_rate": 1.6912518724476946e-05,
      "loss": 0.7377,
      "step": 595400
    },
    {
      "epoch": 2.08676360807074,
      "grad_norm": 2.609375,
      "learning_rate": 1.6911869695813244e-05,
      "loss": 0.8472,
      "step": 595410
    },
    {
      "epoch": 2.0867986555776357,
      "grad_norm": 3.046875,
      "learning_rate": 1.6911220667149542e-05,
      "loss": 0.8827,
      "step": 595420
    },
    {
      "epoch": 2.086833703084531,
      "grad_norm": 2.765625,
      "learning_rate": 1.691057163848584e-05,
      "loss": 0.75,
      "step": 595430
    },
    {
      "epoch": 2.0868687505914267,
      "grad_norm": 2.671875,
      "learning_rate": 1.6909922609822142e-05,
      "loss": 0.7794,
      "step": 595440
    },
    {
      "epoch": 2.086903798098322,
      "grad_norm": 2.953125,
      "learning_rate": 1.690927358115844e-05,
      "loss": 0.8211,
      "step": 595450
    },
    {
      "epoch": 2.086938845605218,
      "grad_norm": 3.140625,
      "learning_rate": 1.6908624552494738e-05,
      "loss": 0.8788,
      "step": 595460
    },
    {
      "epoch": 2.0869738931121136,
      "grad_norm": 3.5625,
      "learning_rate": 1.6907975523831036e-05,
      "loss": 0.7126,
      "step": 595470
    },
    {
      "epoch": 2.087008940619009,
      "grad_norm": 3.125,
      "learning_rate": 1.6907326495167334e-05,
      "loss": 0.9351,
      "step": 595480
    },
    {
      "epoch": 2.0870439881259046,
      "grad_norm": 2.90625,
      "learning_rate": 1.6906677466503632e-05,
      "loss": 0.7772,
      "step": 595490
    },
    {
      "epoch": 2.0870790356328004,
      "grad_norm": 3.046875,
      "learning_rate": 1.690602843783993e-05,
      "loss": 0.873,
      "step": 595500
    },
    {
      "epoch": 2.0871140831396957,
      "grad_norm": 3.640625,
      "learning_rate": 1.6905379409176228e-05,
      "loss": 0.8421,
      "step": 595510
    },
    {
      "epoch": 2.0871491306465915,
      "grad_norm": 2.859375,
      "learning_rate": 1.6904730380512526e-05,
      "loss": 0.8204,
      "step": 595520
    },
    {
      "epoch": 2.0871841781534872,
      "grad_norm": 2.96875,
      "learning_rate": 1.6904081351848824e-05,
      "loss": 0.815,
      "step": 595530
    },
    {
      "epoch": 2.0872192256603825,
      "grad_norm": 2.53125,
      "learning_rate": 1.6903432323185122e-05,
      "loss": 0.78,
      "step": 595540
    },
    {
      "epoch": 2.0872542731672783,
      "grad_norm": 3.125,
      "learning_rate": 1.690278329452142e-05,
      "loss": 0.8026,
      "step": 595550
    },
    {
      "epoch": 2.087289320674174,
      "grad_norm": 2.9375,
      "learning_rate": 1.6902134265857718e-05,
      "loss": 0.924,
      "step": 595560
    },
    {
      "epoch": 2.0873243681810694,
      "grad_norm": 3.328125,
      "learning_rate": 1.6901485237194016e-05,
      "loss": 0.7554,
      "step": 595570
    },
    {
      "epoch": 2.087359415687965,
      "grad_norm": 2.859375,
      "learning_rate": 1.6900836208530317e-05,
      "loss": 0.8249,
      "step": 595580
    },
    {
      "epoch": 2.0873944631948604,
      "grad_norm": 3.0,
      "learning_rate": 1.6900187179866612e-05,
      "loss": 0.8188,
      "step": 595590
    },
    {
      "epoch": 2.087429510701756,
      "grad_norm": 2.859375,
      "learning_rate": 1.689953815120291e-05,
      "loss": 0.7922,
      "step": 595600
    },
    {
      "epoch": 2.087464558208652,
      "grad_norm": 2.84375,
      "learning_rate": 1.6898889122539208e-05,
      "loss": 0.7744,
      "step": 595610
    },
    {
      "epoch": 2.0874996057155473,
      "grad_norm": 2.953125,
      "learning_rate": 1.6898240093875506e-05,
      "loss": 0.8898,
      "step": 595620
    },
    {
      "epoch": 2.087534653222443,
      "grad_norm": 2.828125,
      "learning_rate": 1.6897591065211804e-05,
      "loss": 0.8368,
      "step": 595630
    },
    {
      "epoch": 2.087569700729339,
      "grad_norm": 2.96875,
      "learning_rate": 1.6896942036548102e-05,
      "loss": 0.8096,
      "step": 595640
    },
    {
      "epoch": 2.087604748236234,
      "grad_norm": 2.640625,
      "learning_rate": 1.68962930078844e-05,
      "loss": 0.8102,
      "step": 595650
    },
    {
      "epoch": 2.08763979574313,
      "grad_norm": 2.640625,
      "learning_rate": 1.6895643979220698e-05,
      "loss": 0.837,
      "step": 595660
    },
    {
      "epoch": 2.0876748432500256,
      "grad_norm": 3.3125,
      "learning_rate": 1.6894994950556996e-05,
      "loss": 0.8235,
      "step": 595670
    },
    {
      "epoch": 2.087709890756921,
      "grad_norm": 2.90625,
      "learning_rate": 1.6894345921893294e-05,
      "loss": 0.7775,
      "step": 595680
    },
    {
      "epoch": 2.0877449382638167,
      "grad_norm": 2.1875,
      "learning_rate": 1.6893696893229595e-05,
      "loss": 0.833,
      "step": 595690
    },
    {
      "epoch": 2.087779985770712,
      "grad_norm": 2.8125,
      "learning_rate": 1.6893047864565893e-05,
      "loss": 0.8413,
      "step": 595700
    },
    {
      "epoch": 2.0878150332776078,
      "grad_norm": 2.6875,
      "learning_rate": 1.689239883590219e-05,
      "loss": 0.7857,
      "step": 595710
    },
    {
      "epoch": 2.0878500807845035,
      "grad_norm": 2.734375,
      "learning_rate": 1.689174980723849e-05,
      "loss": 0.8249,
      "step": 595720
    },
    {
      "epoch": 2.087885128291399,
      "grad_norm": 3.34375,
      "learning_rate": 1.6891100778574787e-05,
      "loss": 0.8941,
      "step": 595730
    },
    {
      "epoch": 2.0879201757982946,
      "grad_norm": 3.46875,
      "learning_rate": 1.6890451749911085e-05,
      "loss": 0.848,
      "step": 595740
    },
    {
      "epoch": 2.0879552233051903,
      "grad_norm": 2.609375,
      "learning_rate": 1.6889802721247383e-05,
      "loss": 0.8191,
      "step": 595750
    },
    {
      "epoch": 2.0879902708120857,
      "grad_norm": 3.265625,
      "learning_rate": 1.688915369258368e-05,
      "loss": 0.8323,
      "step": 595760
    },
    {
      "epoch": 2.0880253183189814,
      "grad_norm": 2.734375,
      "learning_rate": 1.688850466391998e-05,
      "loss": 0.9271,
      "step": 595770
    },
    {
      "epoch": 2.088060365825877,
      "grad_norm": 2.8125,
      "learning_rate": 1.6887855635256274e-05,
      "loss": 0.8107,
      "step": 595780
    },
    {
      "epoch": 2.0880954133327725,
      "grad_norm": 2.640625,
      "learning_rate": 1.6887206606592572e-05,
      "loss": 0.8866,
      "step": 595790
    },
    {
      "epoch": 2.0881304608396682,
      "grad_norm": 2.546875,
      "learning_rate": 1.688655757792887e-05,
      "loss": 0.8659,
      "step": 595800
    },
    {
      "epoch": 2.0881655083465636,
      "grad_norm": 2.953125,
      "learning_rate": 1.688590854926517e-05,
      "loss": 0.8535,
      "step": 595810
    },
    {
      "epoch": 2.0882005558534593,
      "grad_norm": 2.484375,
      "learning_rate": 1.688525952060147e-05,
      "loss": 0.8542,
      "step": 595820
    },
    {
      "epoch": 2.088235603360355,
      "grad_norm": 2.453125,
      "learning_rate": 1.6884610491937767e-05,
      "loss": 0.8368,
      "step": 595830
    },
    {
      "epoch": 2.0882706508672504,
      "grad_norm": 2.609375,
      "learning_rate": 1.6883961463274065e-05,
      "loss": 0.8475,
      "step": 595840
    },
    {
      "epoch": 2.088305698374146,
      "grad_norm": 2.96875,
      "learning_rate": 1.6883312434610363e-05,
      "loss": 0.7753,
      "step": 595850
    },
    {
      "epoch": 2.088340745881042,
      "grad_norm": 2.90625,
      "learning_rate": 1.688266340594666e-05,
      "loss": 0.9245,
      "step": 595860
    },
    {
      "epoch": 2.088375793387937,
      "grad_norm": 2.640625,
      "learning_rate": 1.688201437728296e-05,
      "loss": 0.8045,
      "step": 595870
    },
    {
      "epoch": 2.088410840894833,
      "grad_norm": 2.828125,
      "learning_rate": 1.6881365348619257e-05,
      "loss": 0.8457,
      "step": 595880
    },
    {
      "epoch": 2.0884458884017287,
      "grad_norm": 2.734375,
      "learning_rate": 1.6880716319955555e-05,
      "loss": 0.7952,
      "step": 595890
    },
    {
      "epoch": 2.088480935908624,
      "grad_norm": 2.625,
      "learning_rate": 1.6880067291291853e-05,
      "loss": 0.836,
      "step": 595900
    },
    {
      "epoch": 2.08851598341552,
      "grad_norm": 3.09375,
      "learning_rate": 1.687941826262815e-05,
      "loss": 0.8605,
      "step": 595910
    },
    {
      "epoch": 2.088551030922415,
      "grad_norm": 2.640625,
      "learning_rate": 1.687876923396445e-05,
      "loss": 0.7501,
      "step": 595920
    },
    {
      "epoch": 2.088586078429311,
      "grad_norm": 2.984375,
      "learning_rate": 1.6878120205300747e-05,
      "loss": 0.8147,
      "step": 595930
    },
    {
      "epoch": 2.0886211259362066,
      "grad_norm": 3.109375,
      "learning_rate": 1.6877471176637045e-05,
      "loss": 0.797,
      "step": 595940
    },
    {
      "epoch": 2.088656173443102,
      "grad_norm": 2.640625,
      "learning_rate": 1.6876822147973347e-05,
      "loss": 0.7602,
      "step": 595950
    },
    {
      "epoch": 2.0886912209499977,
      "grad_norm": 3.046875,
      "learning_rate": 1.687617311930964e-05,
      "loss": 0.8031,
      "step": 595960
    },
    {
      "epoch": 2.0887262684568935,
      "grad_norm": 2.9375,
      "learning_rate": 1.687552409064594e-05,
      "loss": 0.7527,
      "step": 595970
    },
    {
      "epoch": 2.088761315963789,
      "grad_norm": 2.90625,
      "learning_rate": 1.6874875061982237e-05,
      "loss": 0.8142,
      "step": 595980
    },
    {
      "epoch": 2.0887963634706845,
      "grad_norm": 2.765625,
      "learning_rate": 1.6874226033318535e-05,
      "loss": 0.8326,
      "step": 595990
    },
    {
      "epoch": 2.0888314109775803,
      "grad_norm": 3.21875,
      "learning_rate": 1.6873577004654833e-05,
      "loss": 0.8344,
      "step": 596000
    },
    {
      "epoch": 2.0888664584844756,
      "grad_norm": 3.359375,
      "learning_rate": 1.687292797599113e-05,
      "loss": 0.824,
      "step": 596010
    },
    {
      "epoch": 2.0889015059913714,
      "grad_norm": 2.5625,
      "learning_rate": 1.687227894732743e-05,
      "loss": 0.782,
      "step": 596020
    },
    {
      "epoch": 2.088936553498267,
      "grad_norm": 2.609375,
      "learning_rate": 1.6871629918663727e-05,
      "loss": 0.7891,
      "step": 596030
    },
    {
      "epoch": 2.0889716010051624,
      "grad_norm": 3.015625,
      "learning_rate": 1.6870980890000025e-05,
      "loss": 0.75,
      "step": 596040
    },
    {
      "epoch": 2.089006648512058,
      "grad_norm": 2.828125,
      "learning_rate": 1.6870331861336323e-05,
      "loss": 0.8574,
      "step": 596050
    },
    {
      "epoch": 2.0890416960189535,
      "grad_norm": 2.703125,
      "learning_rate": 1.6869682832672625e-05,
      "loss": 0.8036,
      "step": 596060
    },
    {
      "epoch": 2.0890767435258493,
      "grad_norm": 3.234375,
      "learning_rate": 1.6869033804008923e-05,
      "loss": 0.8264,
      "step": 596070
    },
    {
      "epoch": 2.089111791032745,
      "grad_norm": 3.0,
      "learning_rate": 1.686838477534522e-05,
      "loss": 0.8623,
      "step": 596080
    },
    {
      "epoch": 2.0891468385396403,
      "grad_norm": 2.875,
      "learning_rate": 1.686773574668152e-05,
      "loss": 0.8057,
      "step": 596090
    },
    {
      "epoch": 2.089181886046536,
      "grad_norm": 2.953125,
      "learning_rate": 1.6867086718017817e-05,
      "loss": 0.9027,
      "step": 596100
    },
    {
      "epoch": 2.089216933553432,
      "grad_norm": 3.21875,
      "learning_rate": 1.6866437689354115e-05,
      "loss": 0.8377,
      "step": 596110
    },
    {
      "epoch": 2.089251981060327,
      "grad_norm": 2.640625,
      "learning_rate": 1.6865788660690413e-05,
      "loss": 0.8192,
      "step": 596120
    },
    {
      "epoch": 2.089287028567223,
      "grad_norm": 2.96875,
      "learning_rate": 1.686513963202671e-05,
      "loss": 0.776,
      "step": 596130
    },
    {
      "epoch": 2.0893220760741187,
      "grad_norm": 2.5625,
      "learning_rate": 1.686449060336301e-05,
      "loss": 0.7983,
      "step": 596140
    },
    {
      "epoch": 2.089357123581014,
      "grad_norm": 3.296875,
      "learning_rate": 1.6863841574699303e-05,
      "loss": 0.9001,
      "step": 596150
    },
    {
      "epoch": 2.0893921710879098,
      "grad_norm": 3.09375,
      "learning_rate": 1.68631925460356e-05,
      "loss": 0.836,
      "step": 596160
    },
    {
      "epoch": 2.089427218594805,
      "grad_norm": 2.6875,
      "learning_rate": 1.6862543517371903e-05,
      "loss": 0.7653,
      "step": 596170
    },
    {
      "epoch": 2.089462266101701,
      "grad_norm": 3.125,
      "learning_rate": 1.68618944887082e-05,
      "loss": 0.8108,
      "step": 596180
    },
    {
      "epoch": 2.0894973136085966,
      "grad_norm": 2.953125,
      "learning_rate": 1.68612454600445e-05,
      "loss": 0.7866,
      "step": 596190
    },
    {
      "epoch": 2.089532361115492,
      "grad_norm": 2.84375,
      "learning_rate": 1.6860596431380797e-05,
      "loss": 0.7727,
      "step": 596200
    },
    {
      "epoch": 2.0895674086223877,
      "grad_norm": 3.234375,
      "learning_rate": 1.6859947402717095e-05,
      "loss": 0.7822,
      "step": 596210
    },
    {
      "epoch": 2.0896024561292834,
      "grad_norm": 2.421875,
      "learning_rate": 1.6859298374053393e-05,
      "loss": 0.7935,
      "step": 596220
    },
    {
      "epoch": 2.0896375036361787,
      "grad_norm": 2.96875,
      "learning_rate": 1.685864934538969e-05,
      "loss": 0.7025,
      "step": 596230
    },
    {
      "epoch": 2.0896725511430745,
      "grad_norm": 3.1875,
      "learning_rate": 1.685800031672599e-05,
      "loss": 0.8047,
      "step": 596240
    },
    {
      "epoch": 2.0897075986499702,
      "grad_norm": 3.359375,
      "learning_rate": 1.6857351288062287e-05,
      "loss": 0.8913,
      "step": 596250
    },
    {
      "epoch": 2.0897426461568656,
      "grad_norm": 3.421875,
      "learning_rate": 1.6856702259398585e-05,
      "loss": 0.8228,
      "step": 596260
    },
    {
      "epoch": 2.0897776936637613,
      "grad_norm": 3.15625,
      "learning_rate": 1.6856053230734883e-05,
      "loss": 0.8489,
      "step": 596270
    },
    {
      "epoch": 2.0898127411706566,
      "grad_norm": 2.859375,
      "learning_rate": 1.685540420207118e-05,
      "loss": 0.8455,
      "step": 596280
    },
    {
      "epoch": 2.0898477886775524,
      "grad_norm": 3.09375,
      "learning_rate": 1.685475517340748e-05,
      "loss": 0.8223,
      "step": 596290
    },
    {
      "epoch": 2.089882836184448,
      "grad_norm": 2.59375,
      "learning_rate": 1.6854106144743777e-05,
      "loss": 0.784,
      "step": 596300
    },
    {
      "epoch": 2.0899178836913435,
      "grad_norm": 3.1875,
      "learning_rate": 1.6853457116080078e-05,
      "loss": 0.8797,
      "step": 596310
    },
    {
      "epoch": 2.089952931198239,
      "grad_norm": 2.546875,
      "learning_rate": 1.6852808087416376e-05,
      "loss": 0.7799,
      "step": 596320
    },
    {
      "epoch": 2.089987978705135,
      "grad_norm": 3.140625,
      "learning_rate": 1.6852159058752674e-05,
      "loss": 0.8467,
      "step": 596330
    },
    {
      "epoch": 2.0900230262120303,
      "grad_norm": 3.015625,
      "learning_rate": 1.685151003008897e-05,
      "loss": 0.8394,
      "step": 596340
    },
    {
      "epoch": 2.090058073718926,
      "grad_norm": 3.125,
      "learning_rate": 1.6850861001425267e-05,
      "loss": 0.8975,
      "step": 596350
    },
    {
      "epoch": 2.090093121225822,
      "grad_norm": 2.71875,
      "learning_rate": 1.6850211972761565e-05,
      "loss": 0.8626,
      "step": 596360
    },
    {
      "epoch": 2.090128168732717,
      "grad_norm": 2.671875,
      "learning_rate": 1.6849562944097863e-05,
      "loss": 0.7832,
      "step": 596370
    },
    {
      "epoch": 2.090163216239613,
      "grad_norm": 3.0,
      "learning_rate": 1.684891391543416e-05,
      "loss": 0.7866,
      "step": 596380
    },
    {
      "epoch": 2.090198263746508,
      "grad_norm": 2.859375,
      "learning_rate": 1.684826488677046e-05,
      "loss": 0.8481,
      "step": 596390
    },
    {
      "epoch": 2.090233311253404,
      "grad_norm": 2.84375,
      "learning_rate": 1.6847615858106757e-05,
      "loss": 0.8914,
      "step": 596400
    },
    {
      "epoch": 2.0902683587602997,
      "grad_norm": 3.0,
      "learning_rate": 1.6846966829443055e-05,
      "loss": 0.7951,
      "step": 596410
    },
    {
      "epoch": 2.090303406267195,
      "grad_norm": 3.0,
      "learning_rate": 1.6846317800779353e-05,
      "loss": 0.7315,
      "step": 596420
    },
    {
      "epoch": 2.0903384537740908,
      "grad_norm": 2.90625,
      "learning_rate": 1.6845668772115654e-05,
      "loss": 0.8371,
      "step": 596430
    },
    {
      "epoch": 2.0903735012809865,
      "grad_norm": 3.421875,
      "learning_rate": 1.6845019743451952e-05,
      "loss": 0.8184,
      "step": 596440
    },
    {
      "epoch": 2.090408548787882,
      "grad_norm": 3.390625,
      "learning_rate": 1.684437071478825e-05,
      "loss": 0.8311,
      "step": 596450
    },
    {
      "epoch": 2.0904435962947776,
      "grad_norm": 3.015625,
      "learning_rate": 1.6843721686124548e-05,
      "loss": 0.8735,
      "step": 596460
    },
    {
      "epoch": 2.0904786438016734,
      "grad_norm": 3.34375,
      "learning_rate": 1.6843072657460846e-05,
      "loss": 0.8577,
      "step": 596470
    },
    {
      "epoch": 2.0905136913085687,
      "grad_norm": 2.796875,
      "learning_rate": 1.6842423628797144e-05,
      "loss": 0.8062,
      "step": 596480
    },
    {
      "epoch": 2.0905487388154644,
      "grad_norm": 2.53125,
      "learning_rate": 1.6841774600133442e-05,
      "loss": 0.711,
      "step": 596490
    },
    {
      "epoch": 2.0905837863223597,
      "grad_norm": 3.125,
      "learning_rate": 1.684112557146974e-05,
      "loss": 0.7983,
      "step": 596500
    },
    {
      "epoch": 2.0906188338292555,
      "grad_norm": 2.84375,
      "learning_rate": 1.6840476542806038e-05,
      "loss": 0.8482,
      "step": 596510
    },
    {
      "epoch": 2.0906538813361513,
      "grad_norm": 2.984375,
      "learning_rate": 1.6839827514142336e-05,
      "loss": 0.8804,
      "step": 596520
    },
    {
      "epoch": 2.0906889288430466,
      "grad_norm": 3.03125,
      "learning_rate": 1.683917848547863e-05,
      "loss": 0.8453,
      "step": 596530
    },
    {
      "epoch": 2.0907239763499423,
      "grad_norm": 2.984375,
      "learning_rate": 1.6838529456814932e-05,
      "loss": 0.8024,
      "step": 596540
    },
    {
      "epoch": 2.090759023856838,
      "grad_norm": 2.734375,
      "learning_rate": 1.683788042815123e-05,
      "loss": 0.7879,
      "step": 596550
    },
    {
      "epoch": 2.0907940713637334,
      "grad_norm": 2.5625,
      "learning_rate": 1.6837231399487528e-05,
      "loss": 0.8357,
      "step": 596560
    },
    {
      "epoch": 2.090829118870629,
      "grad_norm": 3.0,
      "learning_rate": 1.6836582370823826e-05,
      "loss": 0.7951,
      "step": 596570
    },
    {
      "epoch": 2.090864166377525,
      "grad_norm": 3.4375,
      "learning_rate": 1.6835933342160124e-05,
      "loss": 0.8344,
      "step": 596580
    },
    {
      "epoch": 2.0908992138844202,
      "grad_norm": 2.375,
      "learning_rate": 1.6835284313496422e-05,
      "loss": 0.7286,
      "step": 596590
    },
    {
      "epoch": 2.090934261391316,
      "grad_norm": 2.53125,
      "learning_rate": 1.683463528483272e-05,
      "loss": 0.7957,
      "step": 596600
    },
    {
      "epoch": 2.0909693088982113,
      "grad_norm": 2.796875,
      "learning_rate": 1.6833986256169018e-05,
      "loss": 0.7979,
      "step": 596610
    },
    {
      "epoch": 2.091004356405107,
      "grad_norm": 2.75,
      "learning_rate": 1.6833337227505316e-05,
      "loss": 0.8322,
      "step": 596620
    },
    {
      "epoch": 2.091039403912003,
      "grad_norm": 2.578125,
      "learning_rate": 1.6832688198841614e-05,
      "loss": 0.8056,
      "step": 596630
    },
    {
      "epoch": 2.091074451418898,
      "grad_norm": 3.5,
      "learning_rate": 1.6832039170177912e-05,
      "loss": 0.9027,
      "step": 596640
    },
    {
      "epoch": 2.091109498925794,
      "grad_norm": 3.28125,
      "learning_rate": 1.683139014151421e-05,
      "loss": 0.7867,
      "step": 596650
    },
    {
      "epoch": 2.0911445464326897,
      "grad_norm": 3.296875,
      "learning_rate": 1.6830741112850508e-05,
      "loss": 0.7554,
      "step": 596660
    },
    {
      "epoch": 2.091179593939585,
      "grad_norm": 3.140625,
      "learning_rate": 1.6830092084186806e-05,
      "loss": 0.8557,
      "step": 596670
    },
    {
      "epoch": 2.0912146414464807,
      "grad_norm": 3.03125,
      "learning_rate": 1.6829443055523107e-05,
      "loss": 0.833,
      "step": 596680
    },
    {
      "epoch": 2.0912496889533765,
      "grad_norm": 2.8125,
      "learning_rate": 1.6828794026859405e-05,
      "loss": 0.7543,
      "step": 596690
    },
    {
      "epoch": 2.091284736460272,
      "grad_norm": 2.84375,
      "learning_rate": 1.6828144998195703e-05,
      "loss": 0.8154,
      "step": 596700
    },
    {
      "epoch": 2.0913197839671676,
      "grad_norm": 2.640625,
      "learning_rate": 1.6827495969532e-05,
      "loss": 0.8502,
      "step": 596710
    },
    {
      "epoch": 2.091354831474063,
      "grad_norm": 3.21875,
      "learning_rate": 1.6826846940868296e-05,
      "loss": 0.7757,
      "step": 596720
    },
    {
      "epoch": 2.0913898789809586,
      "grad_norm": 2.59375,
      "learning_rate": 1.6826197912204594e-05,
      "loss": 0.8023,
      "step": 596730
    },
    {
      "epoch": 2.0914249264878544,
      "grad_norm": 3.4375,
      "learning_rate": 1.6825548883540892e-05,
      "loss": 0.7989,
      "step": 596740
    },
    {
      "epoch": 2.0914599739947497,
      "grad_norm": 3.3125,
      "learning_rate": 1.682489985487719e-05,
      "loss": 0.8366,
      "step": 596750
    },
    {
      "epoch": 2.0914950215016455,
      "grad_norm": 2.921875,
      "learning_rate": 1.6824250826213488e-05,
      "loss": 0.806,
      "step": 596760
    },
    {
      "epoch": 2.091530069008541,
      "grad_norm": 2.46875,
      "learning_rate": 1.6823601797549786e-05,
      "loss": 0.8124,
      "step": 596770
    },
    {
      "epoch": 2.0915651165154365,
      "grad_norm": 2.921875,
      "learning_rate": 1.6822952768886084e-05,
      "loss": 0.766,
      "step": 596780
    },
    {
      "epoch": 2.0916001640223323,
      "grad_norm": 2.78125,
      "learning_rate": 1.6822303740222385e-05,
      "loss": 0.7308,
      "step": 596790
    },
    {
      "epoch": 2.091635211529228,
      "grad_norm": 3.15625,
      "learning_rate": 1.6821654711558683e-05,
      "loss": 0.9107,
      "step": 596800
    },
    {
      "epoch": 2.0916702590361234,
      "grad_norm": 3.03125,
      "learning_rate": 1.682100568289498e-05,
      "loss": 0.7755,
      "step": 596810
    },
    {
      "epoch": 2.091705306543019,
      "grad_norm": 2.65625,
      "learning_rate": 1.682035665423128e-05,
      "loss": 0.8644,
      "step": 596820
    },
    {
      "epoch": 2.091740354049915,
      "grad_norm": 2.75,
      "learning_rate": 1.6819707625567577e-05,
      "loss": 0.826,
      "step": 596830
    },
    {
      "epoch": 2.09177540155681,
      "grad_norm": 2.796875,
      "learning_rate": 1.6819058596903875e-05,
      "loss": 0.7308,
      "step": 596840
    },
    {
      "epoch": 2.091810449063706,
      "grad_norm": 2.734375,
      "learning_rate": 1.6818409568240173e-05,
      "loss": 0.8542,
      "step": 596850
    },
    {
      "epoch": 2.0918454965706013,
      "grad_norm": 2.859375,
      "learning_rate": 1.681776053957647e-05,
      "loss": 0.7421,
      "step": 596860
    },
    {
      "epoch": 2.091880544077497,
      "grad_norm": 2.5,
      "learning_rate": 1.681711151091277e-05,
      "loss": 0.7524,
      "step": 596870
    },
    {
      "epoch": 2.0919155915843928,
      "grad_norm": 2.625,
      "learning_rate": 1.6816462482249067e-05,
      "loss": 0.7656,
      "step": 596880
    },
    {
      "epoch": 2.091950639091288,
      "grad_norm": 3.109375,
      "learning_rate": 1.6815813453585365e-05,
      "loss": 0.7291,
      "step": 596890
    },
    {
      "epoch": 2.091985686598184,
      "grad_norm": 2.953125,
      "learning_rate": 1.681516442492166e-05,
      "loss": 0.8009,
      "step": 596900
    },
    {
      "epoch": 2.0920207341050796,
      "grad_norm": 3.109375,
      "learning_rate": 1.681451539625796e-05,
      "loss": 0.8493,
      "step": 596910
    },
    {
      "epoch": 2.092055781611975,
      "grad_norm": 2.890625,
      "learning_rate": 1.681386636759426e-05,
      "loss": 0.8287,
      "step": 596920
    },
    {
      "epoch": 2.0920908291188707,
      "grad_norm": 3.140625,
      "learning_rate": 1.6813217338930557e-05,
      "loss": 0.7581,
      "step": 596930
    },
    {
      "epoch": 2.0921258766257664,
      "grad_norm": 3.046875,
      "learning_rate": 1.6812568310266855e-05,
      "loss": 0.7661,
      "step": 596940
    },
    {
      "epoch": 2.0921609241326617,
      "grad_norm": 3.359375,
      "learning_rate": 1.6811919281603153e-05,
      "loss": 0.8649,
      "step": 596950
    },
    {
      "epoch": 2.0921959716395575,
      "grad_norm": 2.625,
      "learning_rate": 1.681127025293945e-05,
      "loss": 0.7961,
      "step": 596960
    },
    {
      "epoch": 2.092231019146453,
      "grad_norm": 2.640625,
      "learning_rate": 1.681062122427575e-05,
      "loss": 0.8601,
      "step": 596970
    },
    {
      "epoch": 2.0922660666533486,
      "grad_norm": 2.78125,
      "learning_rate": 1.6809972195612047e-05,
      "loss": 0.7999,
      "step": 596980
    },
    {
      "epoch": 2.0923011141602443,
      "grad_norm": 3.0625,
      "learning_rate": 1.6809323166948345e-05,
      "loss": 0.8126,
      "step": 596990
    },
    {
      "epoch": 2.0923361616671396,
      "grad_norm": 3.046875,
      "learning_rate": 1.6808674138284643e-05,
      "loss": 0.8124,
      "step": 597000
    },
    {
      "epoch": 2.0923712091740354,
      "grad_norm": 2.515625,
      "learning_rate": 1.680802510962094e-05,
      "loss": 0.7672,
      "step": 597010
    },
    {
      "epoch": 2.092406256680931,
      "grad_norm": 2.75,
      "learning_rate": 1.680737608095724e-05,
      "loss": 0.7525,
      "step": 597020
    },
    {
      "epoch": 2.0924413041878265,
      "grad_norm": 2.859375,
      "learning_rate": 1.6806727052293537e-05,
      "loss": 0.8513,
      "step": 597030
    },
    {
      "epoch": 2.0924763516947222,
      "grad_norm": 2.8125,
      "learning_rate": 1.6806078023629835e-05,
      "loss": 0.772,
      "step": 597040
    },
    {
      "epoch": 2.092511399201618,
      "grad_norm": 2.578125,
      "learning_rate": 1.6805428994966137e-05,
      "loss": 0.8741,
      "step": 597050
    },
    {
      "epoch": 2.0925464467085133,
      "grad_norm": 3.0,
      "learning_rate": 1.6804779966302435e-05,
      "loss": 0.8173,
      "step": 597060
    },
    {
      "epoch": 2.092581494215409,
      "grad_norm": 2.640625,
      "learning_rate": 1.6804130937638733e-05,
      "loss": 0.8428,
      "step": 597070
    },
    {
      "epoch": 2.0926165417223044,
      "grad_norm": 2.40625,
      "learning_rate": 1.680348190897503e-05,
      "loss": 0.8776,
      "step": 597080
    },
    {
      "epoch": 2.0926515892292,
      "grad_norm": 2.84375,
      "learning_rate": 1.6802832880311325e-05,
      "loss": 0.8419,
      "step": 597090
    },
    {
      "epoch": 2.092686636736096,
      "grad_norm": 3.0625,
      "learning_rate": 1.6802183851647623e-05,
      "loss": 0.8077,
      "step": 597100
    },
    {
      "epoch": 2.092721684242991,
      "grad_norm": 3.671875,
      "learning_rate": 1.680153482298392e-05,
      "loss": 0.8335,
      "step": 597110
    },
    {
      "epoch": 2.092756731749887,
      "grad_norm": 2.78125,
      "learning_rate": 1.680088579432022e-05,
      "loss": 0.7659,
      "step": 597120
    },
    {
      "epoch": 2.0927917792567827,
      "grad_norm": 2.421875,
      "learning_rate": 1.6800236765656517e-05,
      "loss": 0.7917,
      "step": 597130
    },
    {
      "epoch": 2.092826826763678,
      "grad_norm": 2.75,
      "learning_rate": 1.6799587736992815e-05,
      "loss": 0.7451,
      "step": 597140
    },
    {
      "epoch": 2.092861874270574,
      "grad_norm": 3.125,
      "learning_rate": 1.6798938708329113e-05,
      "loss": 0.8415,
      "step": 597150
    },
    {
      "epoch": 2.0928969217774696,
      "grad_norm": 2.765625,
      "learning_rate": 1.6798289679665415e-05,
      "loss": 0.8428,
      "step": 597160
    },
    {
      "epoch": 2.092931969284365,
      "grad_norm": 3.125,
      "learning_rate": 1.6797640651001713e-05,
      "loss": 0.8031,
      "step": 597170
    },
    {
      "epoch": 2.0929670167912606,
      "grad_norm": 3.140625,
      "learning_rate": 1.679699162233801e-05,
      "loss": 0.86,
      "step": 597180
    },
    {
      "epoch": 2.093002064298156,
      "grad_norm": 3.140625,
      "learning_rate": 1.679634259367431e-05,
      "loss": 0.9008,
      "step": 597190
    },
    {
      "epoch": 2.0930371118050517,
      "grad_norm": 3.140625,
      "learning_rate": 1.6795693565010607e-05,
      "loss": 0.8749,
      "step": 597200
    },
    {
      "epoch": 2.0930721593119475,
      "grad_norm": 3.0625,
      "learning_rate": 1.6795044536346905e-05,
      "loss": 0.8233,
      "step": 597210
    },
    {
      "epoch": 2.0931072068188428,
      "grad_norm": 2.515625,
      "learning_rate": 1.6794395507683203e-05,
      "loss": 0.8534,
      "step": 597220
    },
    {
      "epoch": 2.0931422543257385,
      "grad_norm": 3.125,
      "learning_rate": 1.67937464790195e-05,
      "loss": 0.7685,
      "step": 597230
    },
    {
      "epoch": 2.0931773018326343,
      "grad_norm": 2.8125,
      "learning_rate": 1.67930974503558e-05,
      "loss": 0.8452,
      "step": 597240
    },
    {
      "epoch": 2.0932123493395296,
      "grad_norm": 2.890625,
      "learning_rate": 1.6792448421692097e-05,
      "loss": 0.8341,
      "step": 597250
    },
    {
      "epoch": 2.0932473968464254,
      "grad_norm": 3.15625,
      "learning_rate": 1.6791799393028395e-05,
      "loss": 0.9169,
      "step": 597260
    },
    {
      "epoch": 2.093282444353321,
      "grad_norm": 3.171875,
      "learning_rate": 1.6791150364364693e-05,
      "loss": 0.8516,
      "step": 597270
    },
    {
      "epoch": 2.0933174918602164,
      "grad_norm": 3.265625,
      "learning_rate": 1.679050133570099e-05,
      "loss": 0.8336,
      "step": 597280
    },
    {
      "epoch": 2.093352539367112,
      "grad_norm": 3.03125,
      "learning_rate": 1.678985230703729e-05,
      "loss": 0.8441,
      "step": 597290
    },
    {
      "epoch": 2.093387586874008,
      "grad_norm": 3.09375,
      "learning_rate": 1.6789203278373587e-05,
      "loss": 0.8556,
      "step": 597300
    },
    {
      "epoch": 2.0934226343809033,
      "grad_norm": 3.15625,
      "learning_rate": 1.6788554249709885e-05,
      "loss": 0.8101,
      "step": 597310
    },
    {
      "epoch": 2.093457681887799,
      "grad_norm": 2.53125,
      "learning_rate": 1.6787905221046183e-05,
      "loss": 0.8103,
      "step": 597320
    },
    {
      "epoch": 2.0934927293946943,
      "grad_norm": 2.71875,
      "learning_rate": 1.678725619238248e-05,
      "loss": 0.7832,
      "step": 597330
    },
    {
      "epoch": 2.09352777690159,
      "grad_norm": 2.625,
      "learning_rate": 1.678660716371878e-05,
      "loss": 0.7569,
      "step": 597340
    },
    {
      "epoch": 2.093562824408486,
      "grad_norm": 2.46875,
      "learning_rate": 1.6785958135055077e-05,
      "loss": 0.8264,
      "step": 597350
    },
    {
      "epoch": 2.093597871915381,
      "grad_norm": 2.625,
      "learning_rate": 1.6785309106391375e-05,
      "loss": 0.6664,
      "step": 597360
    },
    {
      "epoch": 2.093632919422277,
      "grad_norm": 3.0,
      "learning_rate": 1.6784660077727673e-05,
      "loss": 0.7138,
      "step": 597370
    },
    {
      "epoch": 2.0936679669291727,
      "grad_norm": 2.921875,
      "learning_rate": 1.678401104906397e-05,
      "loss": 0.8334,
      "step": 597380
    },
    {
      "epoch": 2.093703014436068,
      "grad_norm": 3.28125,
      "learning_rate": 1.678336202040027e-05,
      "loss": 0.8307,
      "step": 597390
    },
    {
      "epoch": 2.0937380619429637,
      "grad_norm": 2.8125,
      "learning_rate": 1.6782712991736567e-05,
      "loss": 0.8378,
      "step": 597400
    },
    {
      "epoch": 2.0937731094498595,
      "grad_norm": 2.859375,
      "learning_rate": 1.6782063963072868e-05,
      "loss": 0.7489,
      "step": 597410
    },
    {
      "epoch": 2.093808156956755,
      "grad_norm": 3.15625,
      "learning_rate": 1.6781414934409166e-05,
      "loss": 0.9181,
      "step": 597420
    },
    {
      "epoch": 2.0938432044636506,
      "grad_norm": 2.578125,
      "learning_rate": 1.6780765905745464e-05,
      "loss": 0.7767,
      "step": 597430
    },
    {
      "epoch": 2.093878251970546,
      "grad_norm": 2.4375,
      "learning_rate": 1.6780116877081762e-05,
      "loss": 0.78,
      "step": 597440
    },
    {
      "epoch": 2.0939132994774416,
      "grad_norm": 3.140625,
      "learning_rate": 1.677946784841806e-05,
      "loss": 0.7958,
      "step": 597450
    },
    {
      "epoch": 2.0939483469843374,
      "grad_norm": 2.71875,
      "learning_rate": 1.6778818819754358e-05,
      "loss": 0.8111,
      "step": 597460
    },
    {
      "epoch": 2.0939833944912327,
      "grad_norm": 2.84375,
      "learning_rate": 1.6778169791090653e-05,
      "loss": 0.7805,
      "step": 597470
    },
    {
      "epoch": 2.0940184419981285,
      "grad_norm": 3.40625,
      "learning_rate": 1.677752076242695e-05,
      "loss": 0.8235,
      "step": 597480
    },
    {
      "epoch": 2.0940534895050242,
      "grad_norm": 2.703125,
      "learning_rate": 1.677687173376325e-05,
      "loss": 0.7792,
      "step": 597490
    },
    {
      "epoch": 2.0940885370119195,
      "grad_norm": 2.703125,
      "learning_rate": 1.6776222705099547e-05,
      "loss": 0.8299,
      "step": 597500
    },
    {
      "epoch": 2.0941235845188153,
      "grad_norm": 2.828125,
      "learning_rate": 1.6775573676435845e-05,
      "loss": 0.8186,
      "step": 597510
    },
    {
      "epoch": 2.094158632025711,
      "grad_norm": 2.96875,
      "learning_rate": 1.6774924647772143e-05,
      "loss": 0.7721,
      "step": 597520
    },
    {
      "epoch": 2.0941936795326064,
      "grad_norm": 3.09375,
      "learning_rate": 1.6774275619108444e-05,
      "loss": 0.855,
      "step": 597530
    },
    {
      "epoch": 2.094228727039502,
      "grad_norm": 2.71875,
      "learning_rate": 1.6773626590444742e-05,
      "loss": 0.7731,
      "step": 597540
    },
    {
      "epoch": 2.0942637745463974,
      "grad_norm": 2.484375,
      "learning_rate": 1.677297756178104e-05,
      "loss": 0.744,
      "step": 597550
    },
    {
      "epoch": 2.094298822053293,
      "grad_norm": 3.3125,
      "learning_rate": 1.6772328533117338e-05,
      "loss": 0.818,
      "step": 597560
    },
    {
      "epoch": 2.094333869560189,
      "grad_norm": 2.6875,
      "learning_rate": 1.6771679504453636e-05,
      "loss": 0.7713,
      "step": 597570
    },
    {
      "epoch": 2.0943689170670843,
      "grad_norm": 3.40625,
      "learning_rate": 1.6771030475789934e-05,
      "loss": 0.7529,
      "step": 597580
    },
    {
      "epoch": 2.09440396457398,
      "grad_norm": 2.765625,
      "learning_rate": 1.6770381447126232e-05,
      "loss": 0.7698,
      "step": 597590
    },
    {
      "epoch": 2.094439012080876,
      "grad_norm": 3.015625,
      "learning_rate": 1.676973241846253e-05,
      "loss": 0.873,
      "step": 597600
    },
    {
      "epoch": 2.094474059587771,
      "grad_norm": 2.75,
      "learning_rate": 1.6769083389798828e-05,
      "loss": 0.8296,
      "step": 597610
    },
    {
      "epoch": 2.094509107094667,
      "grad_norm": 2.859375,
      "learning_rate": 1.6768434361135126e-05,
      "loss": 0.7455,
      "step": 597620
    },
    {
      "epoch": 2.0945441546015626,
      "grad_norm": 2.484375,
      "learning_rate": 1.6767785332471424e-05,
      "loss": 0.7677,
      "step": 597630
    },
    {
      "epoch": 2.094579202108458,
      "grad_norm": 3.203125,
      "learning_rate": 1.6767136303807722e-05,
      "loss": 0.8527,
      "step": 597640
    },
    {
      "epoch": 2.0946142496153537,
      "grad_norm": 2.6875,
      "learning_rate": 1.676648727514402e-05,
      "loss": 0.7855,
      "step": 597650
    },
    {
      "epoch": 2.094649297122249,
      "grad_norm": 3.046875,
      "learning_rate": 1.6765838246480318e-05,
      "loss": 0.8495,
      "step": 597660
    },
    {
      "epoch": 2.0946843446291448,
      "grad_norm": 2.453125,
      "learning_rate": 1.6765189217816616e-05,
      "loss": 0.7883,
      "step": 597670
    },
    {
      "epoch": 2.0947193921360405,
      "grad_norm": 2.59375,
      "learning_rate": 1.6764540189152914e-05,
      "loss": 0.7787,
      "step": 597680
    },
    {
      "epoch": 2.094754439642936,
      "grad_norm": 3.03125,
      "learning_rate": 1.6763891160489212e-05,
      "loss": 0.8783,
      "step": 597690
    },
    {
      "epoch": 2.0947894871498316,
      "grad_norm": 2.5625,
      "learning_rate": 1.676324213182551e-05,
      "loss": 0.8166,
      "step": 597700
    },
    {
      "epoch": 2.0948245346567274,
      "grad_norm": 3.0,
      "learning_rate": 1.6762593103161808e-05,
      "loss": 0.8218,
      "step": 597710
    },
    {
      "epoch": 2.0948595821636227,
      "grad_norm": 2.59375,
      "learning_rate": 1.6761944074498106e-05,
      "loss": 0.8362,
      "step": 597720
    },
    {
      "epoch": 2.0948946296705184,
      "grad_norm": 3.09375,
      "learning_rate": 1.6761295045834404e-05,
      "loss": 0.8759,
      "step": 597730
    },
    {
      "epoch": 2.094929677177414,
      "grad_norm": 2.984375,
      "learning_rate": 1.6760646017170702e-05,
      "loss": 0.8036,
      "step": 597740
    },
    {
      "epoch": 2.0949647246843095,
      "grad_norm": 2.65625,
      "learning_rate": 1.6759996988507e-05,
      "loss": 0.7967,
      "step": 597750
    },
    {
      "epoch": 2.0949997721912053,
      "grad_norm": 2.890625,
      "learning_rate": 1.6759347959843298e-05,
      "loss": 0.892,
      "step": 597760
    },
    {
      "epoch": 2.0950348196981006,
      "grad_norm": 2.859375,
      "learning_rate": 1.6758698931179596e-05,
      "loss": 0.7638,
      "step": 597770
    },
    {
      "epoch": 2.0950698672049963,
      "grad_norm": 2.59375,
      "learning_rate": 1.6758049902515898e-05,
      "loss": 0.855,
      "step": 597780
    },
    {
      "epoch": 2.095104914711892,
      "grad_norm": 3.25,
      "learning_rate": 1.6757400873852196e-05,
      "loss": 0.8506,
      "step": 597790
    },
    {
      "epoch": 2.0951399622187874,
      "grad_norm": 2.859375,
      "learning_rate": 1.6756751845188494e-05,
      "loss": 0.8621,
      "step": 597800
    },
    {
      "epoch": 2.095175009725683,
      "grad_norm": 2.71875,
      "learning_rate": 1.675610281652479e-05,
      "loss": 0.8483,
      "step": 597810
    },
    {
      "epoch": 2.095210057232579,
      "grad_norm": 2.703125,
      "learning_rate": 1.675545378786109e-05,
      "loss": 0.7294,
      "step": 597820
    },
    {
      "epoch": 2.0952451047394742,
      "grad_norm": 2.375,
      "learning_rate": 1.6754804759197388e-05,
      "loss": 0.9161,
      "step": 597830
    },
    {
      "epoch": 2.09528015224637,
      "grad_norm": 2.515625,
      "learning_rate": 1.6754155730533686e-05,
      "loss": 0.8364,
      "step": 597840
    },
    {
      "epoch": 2.0953151997532657,
      "grad_norm": 2.984375,
      "learning_rate": 1.675350670186998e-05,
      "loss": 0.7915,
      "step": 597850
    },
    {
      "epoch": 2.095350247260161,
      "grad_norm": 3.203125,
      "learning_rate": 1.6752857673206278e-05,
      "loss": 0.8805,
      "step": 597860
    },
    {
      "epoch": 2.095385294767057,
      "grad_norm": 3.078125,
      "learning_rate": 1.6752208644542576e-05,
      "loss": 0.8844,
      "step": 597870
    },
    {
      "epoch": 2.095420342273952,
      "grad_norm": 3.078125,
      "learning_rate": 1.6751559615878874e-05,
      "loss": 0.793,
      "step": 597880
    },
    {
      "epoch": 2.095455389780848,
      "grad_norm": 2.984375,
      "learning_rate": 1.6750910587215176e-05,
      "loss": 0.8491,
      "step": 597890
    },
    {
      "epoch": 2.0954904372877436,
      "grad_norm": 3.28125,
      "learning_rate": 1.6750261558551474e-05,
      "loss": 0.71,
      "step": 597900
    },
    {
      "epoch": 2.095525484794639,
      "grad_norm": 3.203125,
      "learning_rate": 1.674961252988777e-05,
      "loss": 0.8443,
      "step": 597910
    },
    {
      "epoch": 2.0955605323015347,
      "grad_norm": 2.78125,
      "learning_rate": 1.674896350122407e-05,
      "loss": 0.7974,
      "step": 597920
    },
    {
      "epoch": 2.0955955798084305,
      "grad_norm": 3.171875,
      "learning_rate": 1.6748314472560368e-05,
      "loss": 0.7599,
      "step": 597930
    },
    {
      "epoch": 2.095630627315326,
      "grad_norm": 3.03125,
      "learning_rate": 1.6747665443896666e-05,
      "loss": 0.8313,
      "step": 597940
    },
    {
      "epoch": 2.0956656748222215,
      "grad_norm": 2.984375,
      "learning_rate": 1.6747016415232964e-05,
      "loss": 0.8922,
      "step": 597950
    },
    {
      "epoch": 2.0957007223291173,
      "grad_norm": 2.640625,
      "learning_rate": 1.674636738656926e-05,
      "loss": 0.8231,
      "step": 597960
    },
    {
      "epoch": 2.0957357698360126,
      "grad_norm": 3.6875,
      "learning_rate": 1.674571835790556e-05,
      "loss": 0.8587,
      "step": 597970
    },
    {
      "epoch": 2.0957708173429084,
      "grad_norm": 3.71875,
      "learning_rate": 1.6745069329241858e-05,
      "loss": 0.832,
      "step": 597980
    },
    {
      "epoch": 2.0958058648498037,
      "grad_norm": 3.5,
      "learning_rate": 1.6744420300578156e-05,
      "loss": 0.8016,
      "step": 597990
    },
    {
      "epoch": 2.0958409123566994,
      "grad_norm": 2.625,
      "learning_rate": 1.6743771271914454e-05,
      "loss": 0.8643,
      "step": 598000
    },
    {
      "epoch": 2.095875959863595,
      "grad_norm": 2.546875,
      "learning_rate": 1.674312224325075e-05,
      "loss": 0.73,
      "step": 598010
    },
    {
      "epoch": 2.0959110073704905,
      "grad_norm": 3.296875,
      "learning_rate": 1.674247321458705e-05,
      "loss": 0.7515,
      "step": 598020
    },
    {
      "epoch": 2.0959460548773863,
      "grad_norm": 2.578125,
      "learning_rate": 1.6741824185923348e-05,
      "loss": 0.7864,
      "step": 598030
    },
    {
      "epoch": 2.095981102384282,
      "grad_norm": 2.65625,
      "learning_rate": 1.6741175157259646e-05,
      "loss": 0.7911,
      "step": 598040
    },
    {
      "epoch": 2.0960161498911773,
      "grad_norm": 2.796875,
      "learning_rate": 1.6740526128595944e-05,
      "loss": 0.7421,
      "step": 598050
    },
    {
      "epoch": 2.096051197398073,
      "grad_norm": 2.671875,
      "learning_rate": 1.673987709993224e-05,
      "loss": 0.8643,
      "step": 598060
    },
    {
      "epoch": 2.096086244904969,
      "grad_norm": 4.3125,
      "learning_rate": 1.673922807126854e-05,
      "loss": 0.8768,
      "step": 598070
    },
    {
      "epoch": 2.096121292411864,
      "grad_norm": 2.375,
      "learning_rate": 1.6738579042604838e-05,
      "loss": 0.8453,
      "step": 598080
    },
    {
      "epoch": 2.09615633991876,
      "grad_norm": 3.78125,
      "learning_rate": 1.6737930013941136e-05,
      "loss": 0.794,
      "step": 598090
    },
    {
      "epoch": 2.0961913874256553,
      "grad_norm": 2.765625,
      "learning_rate": 1.6737280985277434e-05,
      "loss": 0.8288,
      "step": 598100
    },
    {
      "epoch": 2.096226434932551,
      "grad_norm": 2.828125,
      "learning_rate": 1.673663195661373e-05,
      "loss": 0.8735,
      "step": 598110
    },
    {
      "epoch": 2.0962614824394468,
      "grad_norm": 2.546875,
      "learning_rate": 1.673598292795003e-05,
      "loss": 0.8068,
      "step": 598120
    },
    {
      "epoch": 2.096296529946342,
      "grad_norm": 3.3125,
      "learning_rate": 1.6735333899286328e-05,
      "loss": 0.8481,
      "step": 598130
    },
    {
      "epoch": 2.096331577453238,
      "grad_norm": 2.765625,
      "learning_rate": 1.6734684870622626e-05,
      "loss": 0.8326,
      "step": 598140
    },
    {
      "epoch": 2.0963666249601336,
      "grad_norm": 3.0625,
      "learning_rate": 1.6734035841958927e-05,
      "loss": 0.7923,
      "step": 598150
    },
    {
      "epoch": 2.096401672467029,
      "grad_norm": 3.28125,
      "learning_rate": 1.6733386813295225e-05,
      "loss": 0.8427,
      "step": 598160
    },
    {
      "epoch": 2.0964367199739247,
      "grad_norm": 2.6875,
      "learning_rate": 1.6732737784631523e-05,
      "loss": 0.7876,
      "step": 598170
    },
    {
      "epoch": 2.0964717674808204,
      "grad_norm": 2.78125,
      "learning_rate": 1.673208875596782e-05,
      "loss": 0.7531,
      "step": 598180
    },
    {
      "epoch": 2.0965068149877157,
      "grad_norm": 3.0,
      "learning_rate": 1.673143972730412e-05,
      "loss": 0.8529,
      "step": 598190
    },
    {
      "epoch": 2.0965418624946115,
      "grad_norm": 2.21875,
      "learning_rate": 1.6730790698640417e-05,
      "loss": 0.802,
      "step": 598200
    },
    {
      "epoch": 2.0965769100015073,
      "grad_norm": 3.09375,
      "learning_rate": 1.6730141669976715e-05,
      "loss": 0.8677,
      "step": 598210
    },
    {
      "epoch": 2.0966119575084026,
      "grad_norm": 3.109375,
      "learning_rate": 1.672949264131301e-05,
      "loss": 0.7883,
      "step": 598220
    },
    {
      "epoch": 2.0966470050152983,
      "grad_norm": 2.703125,
      "learning_rate": 1.6728843612649308e-05,
      "loss": 0.8175,
      "step": 598230
    },
    {
      "epoch": 2.0966820525221936,
      "grad_norm": 2.5,
      "learning_rate": 1.6728194583985606e-05,
      "loss": 0.8408,
      "step": 598240
    },
    {
      "epoch": 2.0967171000290894,
      "grad_norm": 2.515625,
      "learning_rate": 1.6727545555321904e-05,
      "loss": 0.7546,
      "step": 598250
    },
    {
      "epoch": 2.096752147535985,
      "grad_norm": 2.890625,
      "learning_rate": 1.6726896526658205e-05,
      "loss": 0.8625,
      "step": 598260
    },
    {
      "epoch": 2.0967871950428805,
      "grad_norm": 2.65625,
      "learning_rate": 1.6726247497994503e-05,
      "loss": 0.7435,
      "step": 598270
    },
    {
      "epoch": 2.0968222425497762,
      "grad_norm": 2.984375,
      "learning_rate": 1.67255984693308e-05,
      "loss": 0.8507,
      "step": 598280
    },
    {
      "epoch": 2.096857290056672,
      "grad_norm": 3.0,
      "learning_rate": 1.67249494406671e-05,
      "loss": 0.8272,
      "step": 598290
    },
    {
      "epoch": 2.0968923375635673,
      "grad_norm": 3.28125,
      "learning_rate": 1.6724300412003397e-05,
      "loss": 0.7674,
      "step": 598300
    },
    {
      "epoch": 2.096927385070463,
      "grad_norm": 2.875,
      "learning_rate": 1.6723651383339695e-05,
      "loss": 0.6961,
      "step": 598310
    },
    {
      "epoch": 2.096962432577359,
      "grad_norm": 2.90625,
      "learning_rate": 1.6723002354675993e-05,
      "loss": 0.8171,
      "step": 598320
    },
    {
      "epoch": 2.096997480084254,
      "grad_norm": 2.515625,
      "learning_rate": 1.672235332601229e-05,
      "loss": 0.7395,
      "step": 598330
    },
    {
      "epoch": 2.09703252759115,
      "grad_norm": 2.625,
      "learning_rate": 1.672170429734859e-05,
      "loss": 0.8207,
      "step": 598340
    },
    {
      "epoch": 2.097067575098045,
      "grad_norm": 2.953125,
      "learning_rate": 1.6721055268684887e-05,
      "loss": 0.7889,
      "step": 598350
    },
    {
      "epoch": 2.097102622604941,
      "grad_norm": 2.984375,
      "learning_rate": 1.6720406240021185e-05,
      "loss": 0.8009,
      "step": 598360
    },
    {
      "epoch": 2.0971376701118367,
      "grad_norm": 2.640625,
      "learning_rate": 1.6719757211357483e-05,
      "loss": 0.7928,
      "step": 598370
    },
    {
      "epoch": 2.097172717618732,
      "grad_norm": 3.171875,
      "learning_rate": 1.671910818269378e-05,
      "loss": 0.8292,
      "step": 598380
    },
    {
      "epoch": 2.097207765125628,
      "grad_norm": 3.125,
      "learning_rate": 1.671845915403008e-05,
      "loss": 0.7981,
      "step": 598390
    },
    {
      "epoch": 2.0972428126325235,
      "grad_norm": 3.171875,
      "learning_rate": 1.671781012536638e-05,
      "loss": 0.8033,
      "step": 598400
    },
    {
      "epoch": 2.097277860139419,
      "grad_norm": 3.046875,
      "learning_rate": 1.6717161096702675e-05,
      "loss": 0.7831,
      "step": 598410
    },
    {
      "epoch": 2.0973129076463146,
      "grad_norm": 2.796875,
      "learning_rate": 1.6716512068038973e-05,
      "loss": 0.7926,
      "step": 598420
    },
    {
      "epoch": 2.0973479551532104,
      "grad_norm": 2.671875,
      "learning_rate": 1.671586303937527e-05,
      "loss": 0.8186,
      "step": 598430
    },
    {
      "epoch": 2.0973830026601057,
      "grad_norm": 2.546875,
      "learning_rate": 1.671521401071157e-05,
      "loss": 0.7944,
      "step": 598440
    },
    {
      "epoch": 2.0974180501670014,
      "grad_norm": 3.390625,
      "learning_rate": 1.6714564982047867e-05,
      "loss": 0.8516,
      "step": 598450
    },
    {
      "epoch": 2.0974530976738968,
      "grad_norm": 2.9375,
      "learning_rate": 1.6713915953384165e-05,
      "loss": 0.7738,
      "step": 598460
    },
    {
      "epoch": 2.0974881451807925,
      "grad_norm": 2.578125,
      "learning_rate": 1.6713266924720463e-05,
      "loss": 0.8409,
      "step": 598470
    },
    {
      "epoch": 2.0975231926876883,
      "grad_norm": 2.90625,
      "learning_rate": 1.671261789605676e-05,
      "loss": 0.8136,
      "step": 598480
    },
    {
      "epoch": 2.0975582401945836,
      "grad_norm": 2.984375,
      "learning_rate": 1.671196886739306e-05,
      "loss": 0.8435,
      "step": 598490
    },
    {
      "epoch": 2.0975932877014793,
      "grad_norm": 3.328125,
      "learning_rate": 1.6711319838729357e-05,
      "loss": 0.7779,
      "step": 598500
    },
    {
      "epoch": 2.097628335208375,
      "grad_norm": 2.859375,
      "learning_rate": 1.671067081006566e-05,
      "loss": 0.8313,
      "step": 598510
    },
    {
      "epoch": 2.0976633827152704,
      "grad_norm": 2.953125,
      "learning_rate": 1.6710021781401956e-05,
      "loss": 0.8195,
      "step": 598520
    },
    {
      "epoch": 2.097698430222166,
      "grad_norm": 2.96875,
      "learning_rate": 1.6709372752738254e-05,
      "loss": 0.7896,
      "step": 598530
    },
    {
      "epoch": 2.097733477729062,
      "grad_norm": 2.390625,
      "learning_rate": 1.6708723724074552e-05,
      "loss": 0.8197,
      "step": 598540
    },
    {
      "epoch": 2.0977685252359572,
      "grad_norm": 2.734375,
      "learning_rate": 1.670807469541085e-05,
      "loss": 0.8367,
      "step": 598550
    },
    {
      "epoch": 2.097803572742853,
      "grad_norm": 2.890625,
      "learning_rate": 1.670742566674715e-05,
      "loss": 0.7664,
      "step": 598560
    },
    {
      "epoch": 2.0978386202497483,
      "grad_norm": 3.3125,
      "learning_rate": 1.6706776638083446e-05,
      "loss": 0.6936,
      "step": 598570
    },
    {
      "epoch": 2.097873667756644,
      "grad_norm": 3.34375,
      "learning_rate": 1.6706127609419744e-05,
      "loss": 0.786,
      "step": 598580
    },
    {
      "epoch": 2.09790871526354,
      "grad_norm": 3.046875,
      "learning_rate": 1.6705478580756042e-05,
      "loss": 0.8379,
      "step": 598590
    },
    {
      "epoch": 2.097943762770435,
      "grad_norm": 2.8125,
      "learning_rate": 1.6704829552092337e-05,
      "loss": 0.8531,
      "step": 598600
    },
    {
      "epoch": 2.097978810277331,
      "grad_norm": 3.65625,
      "learning_rate": 1.6704180523428635e-05,
      "loss": 0.8599,
      "step": 598610
    },
    {
      "epoch": 2.0980138577842267,
      "grad_norm": 2.8125,
      "learning_rate": 1.6703531494764933e-05,
      "loss": 0.805,
      "step": 598620
    },
    {
      "epoch": 2.098048905291122,
      "grad_norm": 2.984375,
      "learning_rate": 1.6702882466101234e-05,
      "loss": 0.7943,
      "step": 598630
    },
    {
      "epoch": 2.0980839527980177,
      "grad_norm": 2.453125,
      "learning_rate": 1.6702233437437532e-05,
      "loss": 0.7524,
      "step": 598640
    },
    {
      "epoch": 2.0981190003049135,
      "grad_norm": 2.328125,
      "learning_rate": 1.670158440877383e-05,
      "loss": 0.8024,
      "step": 598650
    },
    {
      "epoch": 2.098154047811809,
      "grad_norm": 2.75,
      "learning_rate": 1.670093538011013e-05,
      "loss": 0.7523,
      "step": 598660
    },
    {
      "epoch": 2.0981890953187046,
      "grad_norm": 2.984375,
      "learning_rate": 1.6700286351446426e-05,
      "loss": 0.8294,
      "step": 598670
    },
    {
      "epoch": 2.0982241428256003,
      "grad_norm": 3.140625,
      "learning_rate": 1.6699637322782724e-05,
      "loss": 0.8188,
      "step": 598680
    },
    {
      "epoch": 2.0982591903324956,
      "grad_norm": 3.25,
      "learning_rate": 1.6698988294119022e-05,
      "loss": 0.7732,
      "step": 598690
    },
    {
      "epoch": 2.0982942378393914,
      "grad_norm": 3.0,
      "learning_rate": 1.669833926545532e-05,
      "loss": 0.7824,
      "step": 598700
    },
    {
      "epoch": 2.0983292853462867,
      "grad_norm": 3.046875,
      "learning_rate": 1.669769023679162e-05,
      "loss": 0.8237,
      "step": 598710
    },
    {
      "epoch": 2.0983643328531825,
      "grad_norm": 2.609375,
      "learning_rate": 1.6697041208127916e-05,
      "loss": 0.7273,
      "step": 598720
    },
    {
      "epoch": 2.0983993803600782,
      "grad_norm": 3.25,
      "learning_rate": 1.6696392179464214e-05,
      "loss": 0.7811,
      "step": 598730
    },
    {
      "epoch": 2.0984344278669735,
      "grad_norm": 3.25,
      "learning_rate": 1.6695743150800512e-05,
      "loss": 0.7695,
      "step": 598740
    },
    {
      "epoch": 2.0984694753738693,
      "grad_norm": 3.09375,
      "learning_rate": 1.669509412213681e-05,
      "loss": 0.8324,
      "step": 598750
    },
    {
      "epoch": 2.098504522880765,
      "grad_norm": 3.015625,
      "learning_rate": 1.6694445093473112e-05,
      "loss": 0.9111,
      "step": 598760
    },
    {
      "epoch": 2.0985395703876604,
      "grad_norm": 3.078125,
      "learning_rate": 1.669379606480941e-05,
      "loss": 0.7857,
      "step": 598770
    },
    {
      "epoch": 2.098574617894556,
      "grad_norm": 3.171875,
      "learning_rate": 1.6693147036145708e-05,
      "loss": 0.815,
      "step": 598780
    },
    {
      "epoch": 2.098609665401452,
      "grad_norm": 2.78125,
      "learning_rate": 1.6692498007482002e-05,
      "loss": 0.8454,
      "step": 598790
    },
    {
      "epoch": 2.098644712908347,
      "grad_norm": 2.625,
      "learning_rate": 1.66918489788183e-05,
      "loss": 0.881,
      "step": 598800
    },
    {
      "epoch": 2.098679760415243,
      "grad_norm": 3.0625,
      "learning_rate": 1.66911999501546e-05,
      "loss": 0.7976,
      "step": 598810
    },
    {
      "epoch": 2.0987148079221383,
      "grad_norm": 2.796875,
      "learning_rate": 1.6690550921490896e-05,
      "loss": 0.805,
      "step": 598820
    },
    {
      "epoch": 2.098749855429034,
      "grad_norm": 2.65625,
      "learning_rate": 1.6689901892827194e-05,
      "loss": 0.8211,
      "step": 598830
    },
    {
      "epoch": 2.09878490293593,
      "grad_norm": 2.78125,
      "learning_rate": 1.6689252864163492e-05,
      "loss": 0.7757,
      "step": 598840
    },
    {
      "epoch": 2.098819950442825,
      "grad_norm": 3.0625,
      "learning_rate": 1.668860383549979e-05,
      "loss": 0.8527,
      "step": 598850
    },
    {
      "epoch": 2.098854997949721,
      "grad_norm": 2.703125,
      "learning_rate": 1.668795480683609e-05,
      "loss": 0.7535,
      "step": 598860
    },
    {
      "epoch": 2.0988900454566166,
      "grad_norm": 2.75,
      "learning_rate": 1.6687305778172386e-05,
      "loss": 0.8197,
      "step": 598870
    },
    {
      "epoch": 2.098925092963512,
      "grad_norm": 2.765625,
      "learning_rate": 1.6686656749508688e-05,
      "loss": 0.7883,
      "step": 598880
    },
    {
      "epoch": 2.0989601404704077,
      "grad_norm": 2.859375,
      "learning_rate": 1.6686007720844986e-05,
      "loss": 0.7494,
      "step": 598890
    },
    {
      "epoch": 2.0989951879773034,
      "grad_norm": 2.9375,
      "learning_rate": 1.6685358692181284e-05,
      "loss": 0.8218,
      "step": 598900
    },
    {
      "epoch": 2.0990302354841988,
      "grad_norm": 3.0625,
      "learning_rate": 1.6684709663517582e-05,
      "loss": 0.7677,
      "step": 598910
    },
    {
      "epoch": 2.0990652829910945,
      "grad_norm": 2.90625,
      "learning_rate": 1.668406063485388e-05,
      "loss": 0.8547,
      "step": 598920
    },
    {
      "epoch": 2.09910033049799,
      "grad_norm": 2.71875,
      "learning_rate": 1.6683411606190178e-05,
      "loss": 0.8175,
      "step": 598930
    },
    {
      "epoch": 2.0991353780048856,
      "grad_norm": 2.859375,
      "learning_rate": 1.6682762577526476e-05,
      "loss": 0.8171,
      "step": 598940
    },
    {
      "epoch": 2.0991704255117813,
      "grad_norm": 3.015625,
      "learning_rate": 1.6682113548862774e-05,
      "loss": 0.8008,
      "step": 598950
    },
    {
      "epoch": 2.0992054730186767,
      "grad_norm": 2.515625,
      "learning_rate": 1.6681464520199072e-05,
      "loss": 0.738,
      "step": 598960
    },
    {
      "epoch": 2.0992405205255724,
      "grad_norm": 3.234375,
      "learning_rate": 1.6680815491535366e-05,
      "loss": 0.8563,
      "step": 598970
    },
    {
      "epoch": 2.099275568032468,
      "grad_norm": 2.703125,
      "learning_rate": 1.6680166462871664e-05,
      "loss": 0.8121,
      "step": 598980
    },
    {
      "epoch": 2.0993106155393635,
      "grad_norm": 2.984375,
      "learning_rate": 1.6679517434207966e-05,
      "loss": 0.8015,
      "step": 598990
    },
    {
      "epoch": 2.0993456630462592,
      "grad_norm": 3.390625,
      "learning_rate": 1.6678868405544264e-05,
      "loss": 0.8017,
      "step": 599000
    },
    {
      "epoch": 2.099380710553155,
      "grad_norm": 2.875,
      "learning_rate": 1.6678219376880562e-05,
      "loss": 0.7958,
      "step": 599010
    },
    {
      "epoch": 2.0994157580600503,
      "grad_norm": 2.6875,
      "learning_rate": 1.667757034821686e-05,
      "loss": 0.8864,
      "step": 599020
    },
    {
      "epoch": 2.099450805566946,
      "grad_norm": 2.828125,
      "learning_rate": 1.6676921319553158e-05,
      "loss": 0.8434,
      "step": 599030
    },
    {
      "epoch": 2.0994858530738414,
      "grad_norm": 2.515625,
      "learning_rate": 1.6676272290889456e-05,
      "loss": 0.7507,
      "step": 599040
    },
    {
      "epoch": 2.099520900580737,
      "grad_norm": 3.125,
      "learning_rate": 1.6675623262225754e-05,
      "loss": 0.7985,
      "step": 599050
    },
    {
      "epoch": 2.099555948087633,
      "grad_norm": 2.828125,
      "learning_rate": 1.6674974233562052e-05,
      "loss": 0.8233,
      "step": 599060
    },
    {
      "epoch": 2.099590995594528,
      "grad_norm": 3.078125,
      "learning_rate": 1.667432520489835e-05,
      "loss": 0.9283,
      "step": 599070
    },
    {
      "epoch": 2.099626043101424,
      "grad_norm": 2.9375,
      "learning_rate": 1.6673676176234648e-05,
      "loss": 0.8373,
      "step": 599080
    },
    {
      "epoch": 2.0996610906083197,
      "grad_norm": 3.0,
      "learning_rate": 1.6673027147570946e-05,
      "loss": 0.8551,
      "step": 599090
    },
    {
      "epoch": 2.099696138115215,
      "grad_norm": 3.015625,
      "learning_rate": 1.6672378118907244e-05,
      "loss": 0.7779,
      "step": 599100
    },
    {
      "epoch": 2.099731185622111,
      "grad_norm": 3.15625,
      "learning_rate": 1.6671729090243542e-05,
      "loss": 0.8856,
      "step": 599110
    },
    {
      "epoch": 2.0997662331290066,
      "grad_norm": 3.234375,
      "learning_rate": 1.667108006157984e-05,
      "loss": 0.8396,
      "step": 599120
    },
    {
      "epoch": 2.099801280635902,
      "grad_norm": 3.390625,
      "learning_rate": 1.667043103291614e-05,
      "loss": 0.7957,
      "step": 599130
    },
    {
      "epoch": 2.0998363281427976,
      "grad_norm": 3.140625,
      "learning_rate": 1.666978200425244e-05,
      "loss": 0.8027,
      "step": 599140
    },
    {
      "epoch": 2.099871375649693,
      "grad_norm": 3.09375,
      "learning_rate": 1.6669132975588737e-05,
      "loss": 0.7999,
      "step": 599150
    },
    {
      "epoch": 2.0999064231565887,
      "grad_norm": 2.875,
      "learning_rate": 1.6668483946925032e-05,
      "loss": 0.6813,
      "step": 599160
    },
    {
      "epoch": 2.0999414706634845,
      "grad_norm": 2.953125,
      "learning_rate": 1.666783491826133e-05,
      "loss": 0.9331,
      "step": 599170
    },
    {
      "epoch": 2.09997651817038,
      "grad_norm": 2.796875,
      "learning_rate": 1.6667185889597628e-05,
      "loss": 0.8622,
      "step": 599180
    },
    {
      "epoch": 2.1000115656772755,
      "grad_norm": 2.734375,
      "learning_rate": 1.6666536860933926e-05,
      "loss": 0.7683,
      "step": 599190
    },
    {
      "epoch": 2.1000466131841713,
      "grad_norm": 2.984375,
      "learning_rate": 1.6665887832270224e-05,
      "loss": 0.7923,
      "step": 599200
    },
    {
      "epoch": 2.1000816606910666,
      "grad_norm": 2.84375,
      "learning_rate": 1.6665238803606522e-05,
      "loss": 0.8267,
      "step": 599210
    },
    {
      "epoch": 2.1001167081979624,
      "grad_norm": 2.46875,
      "learning_rate": 1.666458977494282e-05,
      "loss": 0.875,
      "step": 599220
    },
    {
      "epoch": 2.100151755704858,
      "grad_norm": 3.15625,
      "learning_rate": 1.6663940746279118e-05,
      "loss": 0.9186,
      "step": 599230
    },
    {
      "epoch": 2.1001868032117534,
      "grad_norm": 2.6875,
      "learning_rate": 1.666329171761542e-05,
      "loss": 0.8045,
      "step": 599240
    },
    {
      "epoch": 2.100221850718649,
      "grad_norm": 3.03125,
      "learning_rate": 1.6662642688951717e-05,
      "loss": 0.8165,
      "step": 599250
    },
    {
      "epoch": 2.1002568982255445,
      "grad_norm": 2.375,
      "learning_rate": 1.6661993660288015e-05,
      "loss": 0.8322,
      "step": 599260
    },
    {
      "epoch": 2.1002919457324403,
      "grad_norm": 2.96875,
      "learning_rate": 1.6661344631624313e-05,
      "loss": 0.8678,
      "step": 599270
    },
    {
      "epoch": 2.100326993239336,
      "grad_norm": 2.75,
      "learning_rate": 1.666069560296061e-05,
      "loss": 0.8636,
      "step": 599280
    },
    {
      "epoch": 2.1003620407462313,
      "grad_norm": 2.71875,
      "learning_rate": 1.666004657429691e-05,
      "loss": 0.8078,
      "step": 599290
    },
    {
      "epoch": 2.100397088253127,
      "grad_norm": 3.28125,
      "learning_rate": 1.6659397545633207e-05,
      "loss": 0.837,
      "step": 599300
    },
    {
      "epoch": 2.100432135760023,
      "grad_norm": 3.09375,
      "learning_rate": 1.6658748516969505e-05,
      "loss": 0.7619,
      "step": 599310
    },
    {
      "epoch": 2.100467183266918,
      "grad_norm": 3.125,
      "learning_rate": 1.6658099488305803e-05,
      "loss": 0.8978,
      "step": 599320
    },
    {
      "epoch": 2.100502230773814,
      "grad_norm": 2.40625,
      "learning_rate": 1.66574504596421e-05,
      "loss": 0.7165,
      "step": 599330
    },
    {
      "epoch": 2.1005372782807097,
      "grad_norm": 3.109375,
      "learning_rate": 1.66568014309784e-05,
      "loss": 0.7801,
      "step": 599340
    },
    {
      "epoch": 2.100572325787605,
      "grad_norm": 3.03125,
      "learning_rate": 1.6656152402314694e-05,
      "loss": 0.8888,
      "step": 599350
    },
    {
      "epoch": 2.1006073732945008,
      "grad_norm": 2.96875,
      "learning_rate": 1.6655503373650995e-05,
      "loss": 0.8243,
      "step": 599360
    },
    {
      "epoch": 2.100642420801396,
      "grad_norm": 2.484375,
      "learning_rate": 1.6654854344987293e-05,
      "loss": 0.8038,
      "step": 599370
    },
    {
      "epoch": 2.100677468308292,
      "grad_norm": 3.25,
      "learning_rate": 1.665420531632359e-05,
      "loss": 0.8272,
      "step": 599380
    },
    {
      "epoch": 2.1007125158151876,
      "grad_norm": 3.09375,
      "learning_rate": 1.665355628765989e-05,
      "loss": 0.8252,
      "step": 599390
    },
    {
      "epoch": 2.100747563322083,
      "grad_norm": 3.09375,
      "learning_rate": 1.6652907258996187e-05,
      "loss": 0.8436,
      "step": 599400
    },
    {
      "epoch": 2.1007826108289787,
      "grad_norm": 2.65625,
      "learning_rate": 1.6652258230332485e-05,
      "loss": 0.7011,
      "step": 599410
    },
    {
      "epoch": 2.1008176583358744,
      "grad_norm": 3.109375,
      "learning_rate": 1.6651609201668783e-05,
      "loss": 0.7844,
      "step": 599420
    },
    {
      "epoch": 2.1008527058427697,
      "grad_norm": 2.859375,
      "learning_rate": 1.665096017300508e-05,
      "loss": 0.8365,
      "step": 599430
    },
    {
      "epoch": 2.1008877533496655,
      "grad_norm": 2.890625,
      "learning_rate": 1.665031114434138e-05,
      "loss": 0.8436,
      "step": 599440
    },
    {
      "epoch": 2.1009228008565612,
      "grad_norm": 2.921875,
      "learning_rate": 1.6649662115677677e-05,
      "loss": 0.7689,
      "step": 599450
    },
    {
      "epoch": 2.1009578483634566,
      "grad_norm": 2.78125,
      "learning_rate": 1.6649013087013975e-05,
      "loss": 0.819,
      "step": 599460
    },
    {
      "epoch": 2.1009928958703523,
      "grad_norm": 2.734375,
      "learning_rate": 1.6648364058350273e-05,
      "loss": 0.8455,
      "step": 599470
    },
    {
      "epoch": 2.1010279433772476,
      "grad_norm": 2.84375,
      "learning_rate": 1.664771502968657e-05,
      "loss": 0.8041,
      "step": 599480
    },
    {
      "epoch": 2.1010629908841434,
      "grad_norm": 3.234375,
      "learning_rate": 1.664706600102287e-05,
      "loss": 0.8043,
      "step": 599490
    },
    {
      "epoch": 2.101098038391039,
      "grad_norm": 3.171875,
      "learning_rate": 1.664641697235917e-05,
      "loss": 0.8676,
      "step": 599500
    },
    {
      "epoch": 2.1011330858979345,
      "grad_norm": 2.859375,
      "learning_rate": 1.664576794369547e-05,
      "loss": 0.9584,
      "step": 599510
    },
    {
      "epoch": 2.10116813340483,
      "grad_norm": 2.8125,
      "learning_rate": 1.6645118915031766e-05,
      "loss": 0.8095,
      "step": 599520
    },
    {
      "epoch": 2.101203180911726,
      "grad_norm": 3.203125,
      "learning_rate": 1.6644469886368064e-05,
      "loss": 0.7832,
      "step": 599530
    },
    {
      "epoch": 2.1012382284186213,
      "grad_norm": 2.9375,
      "learning_rate": 1.664382085770436e-05,
      "loss": 0.868,
      "step": 599540
    },
    {
      "epoch": 2.101273275925517,
      "grad_norm": 2.46875,
      "learning_rate": 1.6643171829040657e-05,
      "loss": 0.8236,
      "step": 599550
    },
    {
      "epoch": 2.101308323432413,
      "grad_norm": 2.71875,
      "learning_rate": 1.6642522800376955e-05,
      "loss": 0.8451,
      "step": 599560
    },
    {
      "epoch": 2.101343370939308,
      "grad_norm": 3.015625,
      "learning_rate": 1.6641873771713253e-05,
      "loss": 0.8329,
      "step": 599570
    },
    {
      "epoch": 2.101378418446204,
      "grad_norm": 2.765625,
      "learning_rate": 1.664122474304955e-05,
      "loss": 0.8219,
      "step": 599580
    },
    {
      "epoch": 2.1014134659530996,
      "grad_norm": 2.8125,
      "learning_rate": 1.664057571438585e-05,
      "loss": 0.8391,
      "step": 599590
    },
    {
      "epoch": 2.101448513459995,
      "grad_norm": 2.984375,
      "learning_rate": 1.6639926685722147e-05,
      "loss": 0.888,
      "step": 599600
    },
    {
      "epoch": 2.1014835609668907,
      "grad_norm": 2.59375,
      "learning_rate": 1.663927765705845e-05,
      "loss": 0.8326,
      "step": 599610
    },
    {
      "epoch": 2.101518608473786,
      "grad_norm": 2.734375,
      "learning_rate": 1.6638628628394746e-05,
      "loss": 0.7924,
      "step": 599620
    },
    {
      "epoch": 2.101553655980682,
      "grad_norm": 3.203125,
      "learning_rate": 1.6637979599731044e-05,
      "loss": 0.8252,
      "step": 599630
    },
    {
      "epoch": 2.1015887034875775,
      "grad_norm": 3.0625,
      "learning_rate": 1.6637330571067342e-05,
      "loss": 0.8173,
      "step": 599640
    },
    {
      "epoch": 2.101623750994473,
      "grad_norm": 2.96875,
      "learning_rate": 1.663668154240364e-05,
      "loss": 0.918,
      "step": 599650
    },
    {
      "epoch": 2.1016587985013686,
      "grad_norm": 2.84375,
      "learning_rate": 1.663603251373994e-05,
      "loss": 0.8103,
      "step": 599660
    },
    {
      "epoch": 2.1016938460082644,
      "grad_norm": 2.921875,
      "learning_rate": 1.6635383485076236e-05,
      "loss": 0.7767,
      "step": 599670
    },
    {
      "epoch": 2.1017288935151597,
      "grad_norm": 2.734375,
      "learning_rate": 1.6634734456412534e-05,
      "loss": 0.8401,
      "step": 599680
    },
    {
      "epoch": 2.1017639410220554,
      "grad_norm": 2.65625,
      "learning_rate": 1.6634085427748832e-05,
      "loss": 0.7707,
      "step": 599690
    },
    {
      "epoch": 2.101798988528951,
      "grad_norm": 3.390625,
      "learning_rate": 1.663343639908513e-05,
      "loss": 0.8262,
      "step": 599700
    },
    {
      "epoch": 2.1018340360358465,
      "grad_norm": 2.921875,
      "learning_rate": 1.663278737042143e-05,
      "loss": 0.7829,
      "step": 599710
    },
    {
      "epoch": 2.1018690835427423,
      "grad_norm": 3.375,
      "learning_rate": 1.6632138341757726e-05,
      "loss": 0.8508,
      "step": 599720
    },
    {
      "epoch": 2.1019041310496376,
      "grad_norm": 3.203125,
      "learning_rate": 1.6631489313094024e-05,
      "loss": 0.8311,
      "step": 599730
    },
    {
      "epoch": 2.1019391785565333,
      "grad_norm": 2.875,
      "learning_rate": 1.6630840284430322e-05,
      "loss": 0.853,
      "step": 599740
    },
    {
      "epoch": 2.101974226063429,
      "grad_norm": 3.25,
      "learning_rate": 1.663019125576662e-05,
      "loss": 0.7627,
      "step": 599750
    },
    {
      "epoch": 2.1020092735703244,
      "grad_norm": 2.6875,
      "learning_rate": 1.662954222710292e-05,
      "loss": 0.8321,
      "step": 599760
    },
    {
      "epoch": 2.10204432107722,
      "grad_norm": 2.640625,
      "learning_rate": 1.6628893198439216e-05,
      "loss": 0.8484,
      "step": 599770
    },
    {
      "epoch": 2.102079368584116,
      "grad_norm": 2.8125,
      "learning_rate": 1.6628244169775514e-05,
      "loss": 0.8439,
      "step": 599780
    },
    {
      "epoch": 2.1021144160910112,
      "grad_norm": 3.15625,
      "learning_rate": 1.6627595141111812e-05,
      "loss": 0.8617,
      "step": 599790
    },
    {
      "epoch": 2.102149463597907,
      "grad_norm": 2.734375,
      "learning_rate": 1.662694611244811e-05,
      "loss": 0.8011,
      "step": 599800
    },
    {
      "epoch": 2.1021845111048028,
      "grad_norm": 2.765625,
      "learning_rate": 1.662629708378441e-05,
      "loss": 0.7867,
      "step": 599810
    },
    {
      "epoch": 2.102219558611698,
      "grad_norm": 2.4375,
      "learning_rate": 1.6625648055120706e-05,
      "loss": 0.8288,
      "step": 599820
    },
    {
      "epoch": 2.102254606118594,
      "grad_norm": 2.75,
      "learning_rate": 1.6624999026457004e-05,
      "loss": 0.9004,
      "step": 599830
    },
    {
      "epoch": 2.102289653625489,
      "grad_norm": 2.796875,
      "learning_rate": 1.6624349997793302e-05,
      "loss": 0.789,
      "step": 599840
    },
    {
      "epoch": 2.102324701132385,
      "grad_norm": 3.03125,
      "learning_rate": 1.66237009691296e-05,
      "loss": 0.777,
      "step": 599850
    },
    {
      "epoch": 2.1023597486392807,
      "grad_norm": 2.6875,
      "learning_rate": 1.6623051940465902e-05,
      "loss": 0.7917,
      "step": 599860
    },
    {
      "epoch": 2.102394796146176,
      "grad_norm": 3.0,
      "learning_rate": 1.66224029118022e-05,
      "loss": 0.7559,
      "step": 599870
    },
    {
      "epoch": 2.1024298436530717,
      "grad_norm": 2.421875,
      "learning_rate": 1.6621753883138498e-05,
      "loss": 0.7803,
      "step": 599880
    },
    {
      "epoch": 2.1024648911599675,
      "grad_norm": 3.4375,
      "learning_rate": 1.6621104854474796e-05,
      "loss": 0.7587,
      "step": 599890
    },
    {
      "epoch": 2.102499938666863,
      "grad_norm": 3.078125,
      "learning_rate": 1.6620455825811094e-05,
      "loss": 0.8868,
      "step": 599900
    },
    {
      "epoch": 2.1025349861737586,
      "grad_norm": 2.890625,
      "learning_rate": 1.661980679714739e-05,
      "loss": 0.7975,
      "step": 599910
    },
    {
      "epoch": 2.1025700336806543,
      "grad_norm": 3.3125,
      "learning_rate": 1.6619157768483686e-05,
      "loss": 0.8129,
      "step": 599920
    },
    {
      "epoch": 2.1026050811875496,
      "grad_norm": 2.765625,
      "learning_rate": 1.6618508739819984e-05,
      "loss": 0.7871,
      "step": 599930
    },
    {
      "epoch": 2.1026401286944454,
      "grad_norm": 3.109375,
      "learning_rate": 1.6617859711156282e-05,
      "loss": 0.8396,
      "step": 599940
    },
    {
      "epoch": 2.102675176201341,
      "grad_norm": 2.9375,
      "learning_rate": 1.661721068249258e-05,
      "loss": 0.7581,
      "step": 599950
    },
    {
      "epoch": 2.1027102237082365,
      "grad_norm": 2.890625,
      "learning_rate": 1.661656165382888e-05,
      "loss": 0.8575,
      "step": 599960
    },
    {
      "epoch": 2.102745271215132,
      "grad_norm": 3.078125,
      "learning_rate": 1.6615912625165176e-05,
      "loss": 0.7695,
      "step": 599970
    },
    {
      "epoch": 2.1027803187220275,
      "grad_norm": 2.578125,
      "learning_rate": 1.6615263596501478e-05,
      "loss": 0.8462,
      "step": 599980
    },
    {
      "epoch": 2.1028153662289233,
      "grad_norm": 2.546875,
      "learning_rate": 1.6614614567837776e-05,
      "loss": 0.8386,
      "step": 599990
    },
    {
      "epoch": 2.102850413735819,
      "grad_norm": 3.21875,
      "learning_rate": 1.6613965539174074e-05,
      "loss": 0.8471,
      "step": 600000
    },
    {
      "epoch": 2.102850413735819,
      "eval_loss": 0.76625657081604,
      "eval_runtime": 556.6869,
      "eval_samples_per_second": 683.393,
      "eval_steps_per_second": 56.949,
      "step": 600000
    },
    {
      "epoch": 2.1028854612427144,
      "grad_norm": 2.921875,
      "learning_rate": 1.6613316510510372e-05,
      "loss": 0.8793,
      "step": 600010
    },
    {
      "epoch": 2.10292050874961,
      "grad_norm": 2.640625,
      "learning_rate": 1.661266748184667e-05,
      "loss": 0.8261,
      "step": 600020
    },
    {
      "epoch": 2.102955556256506,
      "grad_norm": 3.40625,
      "learning_rate": 1.6612018453182968e-05,
      "loss": 0.8574,
      "step": 600030
    },
    {
      "epoch": 2.102990603763401,
      "grad_norm": 2.796875,
      "learning_rate": 1.6611369424519266e-05,
      "loss": 0.8496,
      "step": 600040
    },
    {
      "epoch": 2.103025651270297,
      "grad_norm": 3.046875,
      "learning_rate": 1.6610720395855564e-05,
      "loss": 0.8715,
      "step": 600050
    },
    {
      "epoch": 2.1030606987771927,
      "grad_norm": 2.75,
      "learning_rate": 1.6610071367191862e-05,
      "loss": 0.7766,
      "step": 600060
    },
    {
      "epoch": 2.103095746284088,
      "grad_norm": 3.109375,
      "learning_rate": 1.660942233852816e-05,
      "loss": 0.8158,
      "step": 600070
    },
    {
      "epoch": 2.1031307937909838,
      "grad_norm": 2.828125,
      "learning_rate": 1.6608773309864458e-05,
      "loss": 0.8434,
      "step": 600080
    },
    {
      "epoch": 2.103165841297879,
      "grad_norm": 2.609375,
      "learning_rate": 1.6608124281200756e-05,
      "loss": 0.7854,
      "step": 600090
    },
    {
      "epoch": 2.103200888804775,
      "grad_norm": 3.140625,
      "learning_rate": 1.6607475252537054e-05,
      "loss": 0.9001,
      "step": 600100
    },
    {
      "epoch": 2.1032359363116706,
      "grad_norm": 2.703125,
      "learning_rate": 1.6606826223873352e-05,
      "loss": 0.8412,
      "step": 600110
    },
    {
      "epoch": 2.103270983818566,
      "grad_norm": 2.71875,
      "learning_rate": 1.660617719520965e-05,
      "loss": 0.7774,
      "step": 600120
    },
    {
      "epoch": 2.1033060313254617,
      "grad_norm": 2.84375,
      "learning_rate": 1.6605528166545948e-05,
      "loss": 0.8218,
      "step": 600130
    },
    {
      "epoch": 2.1033410788323574,
      "grad_norm": 2.5,
      "learning_rate": 1.6604879137882246e-05,
      "loss": 0.8061,
      "step": 600140
    },
    {
      "epoch": 2.1033761263392527,
      "grad_norm": 3.15625,
      "learning_rate": 1.6604230109218544e-05,
      "loss": 0.9026,
      "step": 600150
    },
    {
      "epoch": 2.1034111738461485,
      "grad_norm": 2.53125,
      "learning_rate": 1.6603581080554842e-05,
      "loss": 0.8009,
      "step": 600160
    },
    {
      "epoch": 2.1034462213530443,
      "grad_norm": 3.125,
      "learning_rate": 1.660293205189114e-05,
      "loss": 0.8338,
      "step": 600170
    },
    {
      "epoch": 2.1034812688599396,
      "grad_norm": 2.75,
      "learning_rate": 1.6602283023227438e-05,
      "loss": 0.8315,
      "step": 600180
    },
    {
      "epoch": 2.1035163163668353,
      "grad_norm": 2.609375,
      "learning_rate": 1.6601633994563736e-05,
      "loss": 0.78,
      "step": 600190
    },
    {
      "epoch": 2.1035513638737307,
      "grad_norm": 3.296875,
      "learning_rate": 1.6600984965900034e-05,
      "loss": 0.8412,
      "step": 600200
    },
    {
      "epoch": 2.1035864113806264,
      "grad_norm": 3.03125,
      "learning_rate": 1.6600335937236332e-05,
      "loss": 0.8827,
      "step": 600210
    },
    {
      "epoch": 2.103621458887522,
      "grad_norm": 3.03125,
      "learning_rate": 1.659968690857263e-05,
      "loss": 0.7809,
      "step": 600220
    },
    {
      "epoch": 2.1036565063944175,
      "grad_norm": 2.578125,
      "learning_rate": 1.659903787990893e-05,
      "loss": 0.8056,
      "step": 600230
    },
    {
      "epoch": 2.1036915539013132,
      "grad_norm": 2.953125,
      "learning_rate": 1.659838885124523e-05,
      "loss": 0.8151,
      "step": 600240
    },
    {
      "epoch": 2.103726601408209,
      "grad_norm": 2.859375,
      "learning_rate": 1.6597739822581527e-05,
      "loss": 0.8031,
      "step": 600250
    },
    {
      "epoch": 2.1037616489151043,
      "grad_norm": 2.671875,
      "learning_rate": 1.6597090793917825e-05,
      "loss": 0.8665,
      "step": 600260
    },
    {
      "epoch": 2.103796696422,
      "grad_norm": 2.703125,
      "learning_rate": 1.6596441765254123e-05,
      "loss": 0.8425,
      "step": 600270
    },
    {
      "epoch": 2.103831743928896,
      "grad_norm": 3.25,
      "learning_rate": 1.659579273659042e-05,
      "loss": 0.7985,
      "step": 600280
    },
    {
      "epoch": 2.103866791435791,
      "grad_norm": 2.75,
      "learning_rate": 1.6595143707926716e-05,
      "loss": 0.8151,
      "step": 600290
    },
    {
      "epoch": 2.103901838942687,
      "grad_norm": 3.296875,
      "learning_rate": 1.6594494679263014e-05,
      "loss": 0.8699,
      "step": 600300
    },
    {
      "epoch": 2.103936886449582,
      "grad_norm": 2.8125,
      "learning_rate": 1.6593845650599312e-05,
      "loss": 0.7875,
      "step": 600310
    },
    {
      "epoch": 2.103971933956478,
      "grad_norm": 2.4375,
      "learning_rate": 1.659319662193561e-05,
      "loss": 0.71,
      "step": 600320
    },
    {
      "epoch": 2.1040069814633737,
      "grad_norm": 2.84375,
      "learning_rate": 1.6592547593271908e-05,
      "loss": 0.8157,
      "step": 600330
    },
    {
      "epoch": 2.104042028970269,
      "grad_norm": 3.296875,
      "learning_rate": 1.659189856460821e-05,
      "loss": 0.9077,
      "step": 600340
    },
    {
      "epoch": 2.104077076477165,
      "grad_norm": 3.3125,
      "learning_rate": 1.6591249535944507e-05,
      "loss": 0.8052,
      "step": 600350
    },
    {
      "epoch": 2.1041121239840606,
      "grad_norm": 3.28125,
      "learning_rate": 1.6590600507280805e-05,
      "loss": 0.7772,
      "step": 600360
    },
    {
      "epoch": 2.104147171490956,
      "grad_norm": 3.078125,
      "learning_rate": 1.6589951478617103e-05,
      "loss": 0.8361,
      "step": 600370
    },
    {
      "epoch": 2.1041822189978516,
      "grad_norm": 3.125,
      "learning_rate": 1.65893024499534e-05,
      "loss": 0.7963,
      "step": 600380
    },
    {
      "epoch": 2.1042172665047474,
      "grad_norm": 2.796875,
      "learning_rate": 1.65886534212897e-05,
      "loss": 0.7136,
      "step": 600390
    },
    {
      "epoch": 2.1042523140116427,
      "grad_norm": 2.53125,
      "learning_rate": 1.6588004392625997e-05,
      "loss": 0.8363,
      "step": 600400
    },
    {
      "epoch": 2.1042873615185385,
      "grad_norm": 2.90625,
      "learning_rate": 1.6587355363962295e-05,
      "loss": 0.9146,
      "step": 600410
    },
    {
      "epoch": 2.1043224090254338,
      "grad_norm": 2.921875,
      "learning_rate": 1.6586706335298593e-05,
      "loss": 0.8572,
      "step": 600420
    },
    {
      "epoch": 2.1043574565323295,
      "grad_norm": 2.859375,
      "learning_rate": 1.658605730663489e-05,
      "loss": 0.7305,
      "step": 600430
    },
    {
      "epoch": 2.1043925040392253,
      "grad_norm": 2.96875,
      "learning_rate": 1.658540827797119e-05,
      "loss": 0.9023,
      "step": 600440
    },
    {
      "epoch": 2.1044275515461206,
      "grad_norm": 3.015625,
      "learning_rate": 1.6584759249307487e-05,
      "loss": 0.8008,
      "step": 600450
    },
    {
      "epoch": 2.1044625990530164,
      "grad_norm": 2.75,
      "learning_rate": 1.6584110220643785e-05,
      "loss": 0.7817,
      "step": 600460
    },
    {
      "epoch": 2.104497646559912,
      "grad_norm": 2.296875,
      "learning_rate": 1.6583461191980083e-05,
      "loss": 0.7915,
      "step": 600470
    },
    {
      "epoch": 2.1045326940668074,
      "grad_norm": 2.953125,
      "learning_rate": 1.658281216331638e-05,
      "loss": 0.8506,
      "step": 600480
    },
    {
      "epoch": 2.104567741573703,
      "grad_norm": 2.8125,
      "learning_rate": 1.658216313465268e-05,
      "loss": 0.7893,
      "step": 600490
    },
    {
      "epoch": 2.104602789080599,
      "grad_norm": 3.265625,
      "learning_rate": 1.6581514105988977e-05,
      "loss": 0.8332,
      "step": 600500
    },
    {
      "epoch": 2.1046378365874943,
      "grad_norm": 2.703125,
      "learning_rate": 1.6580865077325275e-05,
      "loss": 0.7713,
      "step": 600510
    },
    {
      "epoch": 2.10467288409439,
      "grad_norm": 2.75,
      "learning_rate": 1.6580216048661573e-05,
      "loss": 0.8184,
      "step": 600520
    },
    {
      "epoch": 2.1047079316012853,
      "grad_norm": 2.65625,
      "learning_rate": 1.657956701999787e-05,
      "loss": 0.8142,
      "step": 600530
    },
    {
      "epoch": 2.104742979108181,
      "grad_norm": 2.875,
      "learning_rate": 1.657891799133417e-05,
      "loss": 0.8085,
      "step": 600540
    },
    {
      "epoch": 2.104778026615077,
      "grad_norm": 3.09375,
      "learning_rate": 1.6578268962670467e-05,
      "loss": 0.8679,
      "step": 600550
    },
    {
      "epoch": 2.104813074121972,
      "grad_norm": 2.875,
      "learning_rate": 1.6577619934006765e-05,
      "loss": 0.8758,
      "step": 600560
    },
    {
      "epoch": 2.104848121628868,
      "grad_norm": 2.5,
      "learning_rate": 1.6576970905343063e-05,
      "loss": 0.734,
      "step": 600570
    },
    {
      "epoch": 2.1048831691357637,
      "grad_norm": 2.703125,
      "learning_rate": 1.657632187667936e-05,
      "loss": 0.8223,
      "step": 600580
    },
    {
      "epoch": 2.104918216642659,
      "grad_norm": 3.015625,
      "learning_rate": 1.657567284801566e-05,
      "loss": 0.8501,
      "step": 600590
    },
    {
      "epoch": 2.1049532641495547,
      "grad_norm": 2.765625,
      "learning_rate": 1.657502381935196e-05,
      "loss": 0.8488,
      "step": 600600
    },
    {
      "epoch": 2.1049883116564505,
      "grad_norm": 3.0,
      "learning_rate": 1.657437479068826e-05,
      "loss": 0.8035,
      "step": 600610
    },
    {
      "epoch": 2.105023359163346,
      "grad_norm": 2.640625,
      "learning_rate": 1.6573725762024557e-05,
      "loss": 0.8062,
      "step": 600620
    },
    {
      "epoch": 2.1050584066702416,
      "grad_norm": 2.734375,
      "learning_rate": 1.6573076733360855e-05,
      "loss": 0.8143,
      "step": 600630
    },
    {
      "epoch": 2.105093454177137,
      "grad_norm": 3.078125,
      "learning_rate": 1.6572427704697153e-05,
      "loss": 0.9135,
      "step": 600640
    },
    {
      "epoch": 2.1051285016840326,
      "grad_norm": 3.375,
      "learning_rate": 1.657177867603345e-05,
      "loss": 0.8295,
      "step": 600650
    },
    {
      "epoch": 2.1051635491909284,
      "grad_norm": 3.21875,
      "learning_rate": 1.657112964736975e-05,
      "loss": 0.8069,
      "step": 600660
    },
    {
      "epoch": 2.1051985966978237,
      "grad_norm": 3.09375,
      "learning_rate": 1.6570480618706043e-05,
      "loss": 0.8804,
      "step": 600670
    },
    {
      "epoch": 2.1052336442047195,
      "grad_norm": 2.640625,
      "learning_rate": 1.656983159004234e-05,
      "loss": 0.7473,
      "step": 600680
    },
    {
      "epoch": 2.1052686917116152,
      "grad_norm": 2.921875,
      "learning_rate": 1.656918256137864e-05,
      "loss": 0.7668,
      "step": 600690
    },
    {
      "epoch": 2.1053037392185106,
      "grad_norm": 2.421875,
      "learning_rate": 1.6568533532714937e-05,
      "loss": 0.8623,
      "step": 600700
    },
    {
      "epoch": 2.1053387867254063,
      "grad_norm": 3.265625,
      "learning_rate": 1.656788450405124e-05,
      "loss": 0.9187,
      "step": 600710
    },
    {
      "epoch": 2.105373834232302,
      "grad_norm": 2.609375,
      "learning_rate": 1.6567235475387537e-05,
      "loss": 0.8172,
      "step": 600720
    },
    {
      "epoch": 2.1054088817391974,
      "grad_norm": 2.984375,
      "learning_rate": 1.6566586446723835e-05,
      "loss": 0.7791,
      "step": 600730
    },
    {
      "epoch": 2.105443929246093,
      "grad_norm": 2.5,
      "learning_rate": 1.6565937418060133e-05,
      "loss": 0.7781,
      "step": 600740
    },
    {
      "epoch": 2.1054789767529885,
      "grad_norm": 2.671875,
      "learning_rate": 1.656528838939643e-05,
      "loss": 0.8806,
      "step": 600750
    },
    {
      "epoch": 2.105514024259884,
      "grad_norm": 2.828125,
      "learning_rate": 1.656463936073273e-05,
      "loss": 0.8217,
      "step": 600760
    },
    {
      "epoch": 2.10554907176678,
      "grad_norm": 3.0,
      "learning_rate": 1.6563990332069027e-05,
      "loss": 0.7536,
      "step": 600770
    },
    {
      "epoch": 2.1055841192736753,
      "grad_norm": 2.796875,
      "learning_rate": 1.6563341303405325e-05,
      "loss": 0.836,
      "step": 600780
    },
    {
      "epoch": 2.105619166780571,
      "grad_norm": 3.234375,
      "learning_rate": 1.6562692274741623e-05,
      "loss": 0.7775,
      "step": 600790
    },
    {
      "epoch": 2.105654214287467,
      "grad_norm": 2.984375,
      "learning_rate": 1.656204324607792e-05,
      "loss": 0.8404,
      "step": 600800
    },
    {
      "epoch": 2.105689261794362,
      "grad_norm": 2.359375,
      "learning_rate": 1.656139421741422e-05,
      "loss": 0.7071,
      "step": 600810
    },
    {
      "epoch": 2.105724309301258,
      "grad_norm": 2.890625,
      "learning_rate": 1.6560745188750517e-05,
      "loss": 0.7958,
      "step": 600820
    },
    {
      "epoch": 2.1057593568081536,
      "grad_norm": 2.75,
      "learning_rate": 1.6560096160086815e-05,
      "loss": 0.8455,
      "step": 600830
    },
    {
      "epoch": 2.105794404315049,
      "grad_norm": 2.8125,
      "learning_rate": 1.6559447131423113e-05,
      "loss": 0.8007,
      "step": 600840
    },
    {
      "epoch": 2.1058294518219447,
      "grad_norm": 2.796875,
      "learning_rate": 1.655879810275941e-05,
      "loss": 0.8315,
      "step": 600850
    },
    {
      "epoch": 2.1058644993288405,
      "grad_norm": 2.59375,
      "learning_rate": 1.655814907409571e-05,
      "loss": 0.7688,
      "step": 600860
    },
    {
      "epoch": 2.1058995468357358,
      "grad_norm": 2.8125,
      "learning_rate": 1.6557500045432007e-05,
      "loss": 0.8727,
      "step": 600870
    },
    {
      "epoch": 2.1059345943426315,
      "grad_norm": 3.359375,
      "learning_rate": 1.6556851016768305e-05,
      "loss": 0.8725,
      "step": 600880
    },
    {
      "epoch": 2.105969641849527,
      "grad_norm": 3.125,
      "learning_rate": 1.6556201988104603e-05,
      "loss": 0.8359,
      "step": 600890
    },
    {
      "epoch": 2.1060046893564226,
      "grad_norm": 2.6875,
      "learning_rate": 1.65555529594409e-05,
      "loss": 0.7333,
      "step": 600900
    },
    {
      "epoch": 2.1060397368633184,
      "grad_norm": 3.25,
      "learning_rate": 1.65549039307772e-05,
      "loss": 0.7592,
      "step": 600910
    },
    {
      "epoch": 2.1060747843702137,
      "grad_norm": 2.53125,
      "learning_rate": 1.6554254902113497e-05,
      "loss": 0.8157,
      "step": 600920
    },
    {
      "epoch": 2.1061098318771094,
      "grad_norm": 2.828125,
      "learning_rate": 1.6553605873449795e-05,
      "loss": 0.7919,
      "step": 600930
    },
    {
      "epoch": 2.106144879384005,
      "grad_norm": 2.515625,
      "learning_rate": 1.6552956844786093e-05,
      "loss": 0.7952,
      "step": 600940
    },
    {
      "epoch": 2.1061799268909005,
      "grad_norm": 3.0,
      "learning_rate": 1.655230781612239e-05,
      "loss": 0.8257,
      "step": 600950
    },
    {
      "epoch": 2.1062149743977963,
      "grad_norm": 2.734375,
      "learning_rate": 1.6551658787458692e-05,
      "loss": 0.8215,
      "step": 600960
    },
    {
      "epoch": 2.106250021904692,
      "grad_norm": 3.296875,
      "learning_rate": 1.655100975879499e-05,
      "loss": 0.8242,
      "step": 600970
    },
    {
      "epoch": 2.1062850694115873,
      "grad_norm": 3.15625,
      "learning_rate": 1.6550360730131288e-05,
      "loss": 0.8577,
      "step": 600980
    },
    {
      "epoch": 2.106320116918483,
      "grad_norm": 2.75,
      "learning_rate": 1.6549711701467586e-05,
      "loss": 0.8274,
      "step": 600990
    },
    {
      "epoch": 2.1063551644253784,
      "grad_norm": 3.046875,
      "learning_rate": 1.6549062672803884e-05,
      "loss": 0.8067,
      "step": 601000
    },
    {
      "epoch": 2.106390211932274,
      "grad_norm": 2.953125,
      "learning_rate": 1.6548413644140182e-05,
      "loss": 0.8503,
      "step": 601010
    },
    {
      "epoch": 2.10642525943917,
      "grad_norm": 2.765625,
      "learning_rate": 1.654776461547648e-05,
      "loss": 0.7728,
      "step": 601020
    },
    {
      "epoch": 2.1064603069460652,
      "grad_norm": 2.75,
      "learning_rate": 1.6547115586812778e-05,
      "loss": 0.8289,
      "step": 601030
    },
    {
      "epoch": 2.106495354452961,
      "grad_norm": 2.640625,
      "learning_rate": 1.6546466558149073e-05,
      "loss": 0.7234,
      "step": 601040
    },
    {
      "epoch": 2.1065304019598567,
      "grad_norm": 3.09375,
      "learning_rate": 1.654581752948537e-05,
      "loss": 0.8163,
      "step": 601050
    },
    {
      "epoch": 2.106565449466752,
      "grad_norm": 2.84375,
      "learning_rate": 1.654516850082167e-05,
      "loss": 0.8102,
      "step": 601060
    },
    {
      "epoch": 2.106600496973648,
      "grad_norm": 2.828125,
      "learning_rate": 1.6544519472157967e-05,
      "loss": 0.7856,
      "step": 601070
    },
    {
      "epoch": 2.1066355444805436,
      "grad_norm": 2.90625,
      "learning_rate": 1.6543870443494268e-05,
      "loss": 0.8046,
      "step": 601080
    },
    {
      "epoch": 2.106670591987439,
      "grad_norm": 2.859375,
      "learning_rate": 1.6543221414830566e-05,
      "loss": 0.8238,
      "step": 601090
    },
    {
      "epoch": 2.1067056394943346,
      "grad_norm": 2.71875,
      "learning_rate": 1.6542572386166864e-05,
      "loss": 0.7741,
      "step": 601100
    },
    {
      "epoch": 2.10674068700123,
      "grad_norm": 2.859375,
      "learning_rate": 1.6541923357503162e-05,
      "loss": 0.7388,
      "step": 601110
    },
    {
      "epoch": 2.1067757345081257,
      "grad_norm": 3.125,
      "learning_rate": 1.654127432883946e-05,
      "loss": 0.7899,
      "step": 601120
    },
    {
      "epoch": 2.1068107820150215,
      "grad_norm": 3.171875,
      "learning_rate": 1.6540625300175758e-05,
      "loss": 0.9264,
      "step": 601130
    },
    {
      "epoch": 2.106845829521917,
      "grad_norm": 2.828125,
      "learning_rate": 1.6539976271512056e-05,
      "loss": 0.8525,
      "step": 601140
    },
    {
      "epoch": 2.1068808770288125,
      "grad_norm": 3.09375,
      "learning_rate": 1.6539327242848354e-05,
      "loss": 0.7631,
      "step": 601150
    },
    {
      "epoch": 2.1069159245357083,
      "grad_norm": 3.234375,
      "learning_rate": 1.6538678214184652e-05,
      "loss": 0.8619,
      "step": 601160
    },
    {
      "epoch": 2.1069509720426036,
      "grad_norm": 2.890625,
      "learning_rate": 1.653802918552095e-05,
      "loss": 0.7692,
      "step": 601170
    },
    {
      "epoch": 2.1069860195494994,
      "grad_norm": 2.984375,
      "learning_rate": 1.6537380156857248e-05,
      "loss": 0.814,
      "step": 601180
    },
    {
      "epoch": 2.107021067056395,
      "grad_norm": 2.859375,
      "learning_rate": 1.6536731128193546e-05,
      "loss": 0.8043,
      "step": 601190
    },
    {
      "epoch": 2.1070561145632904,
      "grad_norm": 3.0625,
      "learning_rate": 1.6536082099529844e-05,
      "loss": 0.8445,
      "step": 601200
    },
    {
      "epoch": 2.107091162070186,
      "grad_norm": 2.828125,
      "learning_rate": 1.6535433070866142e-05,
      "loss": 0.8707,
      "step": 601210
    },
    {
      "epoch": 2.1071262095770815,
      "grad_norm": 3.15625,
      "learning_rate": 1.6534784042202443e-05,
      "loss": 0.857,
      "step": 601220
    },
    {
      "epoch": 2.1071612570839773,
      "grad_norm": 2.625,
      "learning_rate": 1.6534135013538738e-05,
      "loss": 0.8233,
      "step": 601230
    },
    {
      "epoch": 2.107196304590873,
      "grad_norm": 3.34375,
      "learning_rate": 1.6533485984875036e-05,
      "loss": 0.8677,
      "step": 601240
    },
    {
      "epoch": 2.1072313520977684,
      "grad_norm": 2.890625,
      "learning_rate": 1.6532836956211334e-05,
      "loss": 0.8258,
      "step": 601250
    },
    {
      "epoch": 2.107266399604664,
      "grad_norm": 3.234375,
      "learning_rate": 1.6532187927547632e-05,
      "loss": 0.8473,
      "step": 601260
    },
    {
      "epoch": 2.10730144711156,
      "grad_norm": 3.078125,
      "learning_rate": 1.653153889888393e-05,
      "loss": 0.7436,
      "step": 601270
    },
    {
      "epoch": 2.107336494618455,
      "grad_norm": 2.53125,
      "learning_rate": 1.6530889870220228e-05,
      "loss": 0.7942,
      "step": 601280
    },
    {
      "epoch": 2.107371542125351,
      "grad_norm": 2.703125,
      "learning_rate": 1.6530240841556526e-05,
      "loss": 0.7242,
      "step": 601290
    },
    {
      "epoch": 2.1074065896322467,
      "grad_norm": 2.671875,
      "learning_rate": 1.6529591812892824e-05,
      "loss": 0.7702,
      "step": 601300
    },
    {
      "epoch": 2.107441637139142,
      "grad_norm": 2.90625,
      "learning_rate": 1.6528942784229122e-05,
      "loss": 0.7644,
      "step": 601310
    },
    {
      "epoch": 2.1074766846460378,
      "grad_norm": 3.1875,
      "learning_rate": 1.652829375556542e-05,
      "loss": 0.815,
      "step": 601320
    },
    {
      "epoch": 2.1075117321529335,
      "grad_norm": 2.3125,
      "learning_rate": 1.652764472690172e-05,
      "loss": 0.8151,
      "step": 601330
    },
    {
      "epoch": 2.107546779659829,
      "grad_norm": 2.859375,
      "learning_rate": 1.652699569823802e-05,
      "loss": 0.8488,
      "step": 601340
    },
    {
      "epoch": 2.1075818271667246,
      "grad_norm": 2.625,
      "learning_rate": 1.6526346669574317e-05,
      "loss": 0.7798,
      "step": 601350
    },
    {
      "epoch": 2.10761687467362,
      "grad_norm": 2.53125,
      "learning_rate": 1.6525697640910615e-05,
      "loss": 0.794,
      "step": 601360
    },
    {
      "epoch": 2.1076519221805157,
      "grad_norm": 3.125,
      "learning_rate": 1.6525048612246913e-05,
      "loss": 0.8089,
      "step": 601370
    },
    {
      "epoch": 2.1076869696874114,
      "grad_norm": 3.109375,
      "learning_rate": 1.652439958358321e-05,
      "loss": 0.8522,
      "step": 601380
    },
    {
      "epoch": 2.1077220171943067,
      "grad_norm": 2.625,
      "learning_rate": 1.652375055491951e-05,
      "loss": 0.7843,
      "step": 601390
    },
    {
      "epoch": 2.1077570647012025,
      "grad_norm": 3.28125,
      "learning_rate": 1.6523101526255807e-05,
      "loss": 0.8745,
      "step": 601400
    },
    {
      "epoch": 2.1077921122080983,
      "grad_norm": 2.75,
      "learning_rate": 1.6522452497592105e-05,
      "loss": 0.8124,
      "step": 601410
    },
    {
      "epoch": 2.1078271597149936,
      "grad_norm": 3.078125,
      "learning_rate": 1.65218034689284e-05,
      "loss": 0.8568,
      "step": 601420
    },
    {
      "epoch": 2.1078622072218893,
      "grad_norm": 2.75,
      "learning_rate": 1.6521154440264698e-05,
      "loss": 0.8335,
      "step": 601430
    },
    {
      "epoch": 2.107897254728785,
      "grad_norm": 2.71875,
      "learning_rate": 1.6520505411601e-05,
      "loss": 0.7859,
      "step": 601440
    },
    {
      "epoch": 2.1079323022356804,
      "grad_norm": 2.90625,
      "learning_rate": 1.6519856382937297e-05,
      "loss": 0.8137,
      "step": 601450
    },
    {
      "epoch": 2.107967349742576,
      "grad_norm": 3.140625,
      "learning_rate": 1.6519207354273595e-05,
      "loss": 0.774,
      "step": 601460
    },
    {
      "epoch": 2.1080023972494715,
      "grad_norm": 3.46875,
      "learning_rate": 1.6518558325609893e-05,
      "loss": 0.8528,
      "step": 601470
    },
    {
      "epoch": 2.1080374447563672,
      "grad_norm": 3.109375,
      "learning_rate": 1.651790929694619e-05,
      "loss": 0.7898,
      "step": 601480
    },
    {
      "epoch": 2.108072492263263,
      "grad_norm": 3.140625,
      "learning_rate": 1.651726026828249e-05,
      "loss": 0.8214,
      "step": 601490
    },
    {
      "epoch": 2.1081075397701583,
      "grad_norm": 3.421875,
      "learning_rate": 1.6516611239618787e-05,
      "loss": 0.8098,
      "step": 601500
    },
    {
      "epoch": 2.108142587277054,
      "grad_norm": 2.9375,
      "learning_rate": 1.6515962210955085e-05,
      "loss": 0.8359,
      "step": 601510
    },
    {
      "epoch": 2.10817763478395,
      "grad_norm": 2.984375,
      "learning_rate": 1.6515313182291383e-05,
      "loss": 0.7909,
      "step": 601520
    },
    {
      "epoch": 2.108212682290845,
      "grad_norm": 3.03125,
      "learning_rate": 1.651466415362768e-05,
      "loss": 0.8164,
      "step": 601530
    },
    {
      "epoch": 2.108247729797741,
      "grad_norm": 2.703125,
      "learning_rate": 1.651401512496398e-05,
      "loss": 0.8158,
      "step": 601540
    },
    {
      "epoch": 2.1082827773046366,
      "grad_norm": 2.78125,
      "learning_rate": 1.6513366096300277e-05,
      "loss": 0.7962,
      "step": 601550
    },
    {
      "epoch": 2.108317824811532,
      "grad_norm": 2.921875,
      "learning_rate": 1.6512717067636575e-05,
      "loss": 0.7946,
      "step": 601560
    },
    {
      "epoch": 2.1083528723184277,
      "grad_norm": 2.984375,
      "learning_rate": 1.6512068038972873e-05,
      "loss": 0.8762,
      "step": 601570
    },
    {
      "epoch": 2.108387919825323,
      "grad_norm": 3.109375,
      "learning_rate": 1.6511419010309175e-05,
      "loss": 0.8733,
      "step": 601580
    },
    {
      "epoch": 2.108422967332219,
      "grad_norm": 2.828125,
      "learning_rate": 1.6510769981645473e-05,
      "loss": 0.8391,
      "step": 601590
    },
    {
      "epoch": 2.1084580148391145,
      "grad_norm": 3.28125,
      "learning_rate": 1.651012095298177e-05,
      "loss": 0.8897,
      "step": 601600
    },
    {
      "epoch": 2.10849306234601,
      "grad_norm": 3.171875,
      "learning_rate": 1.6509471924318065e-05,
      "loss": 0.7922,
      "step": 601610
    },
    {
      "epoch": 2.1085281098529056,
      "grad_norm": 2.9375,
      "learning_rate": 1.6508822895654363e-05,
      "loss": 0.804,
      "step": 601620
    },
    {
      "epoch": 2.1085631573598014,
      "grad_norm": 2.5625,
      "learning_rate": 1.650817386699066e-05,
      "loss": 0.9007,
      "step": 601630
    },
    {
      "epoch": 2.1085982048666967,
      "grad_norm": 3.109375,
      "learning_rate": 1.650752483832696e-05,
      "loss": 0.7715,
      "step": 601640
    },
    {
      "epoch": 2.1086332523735924,
      "grad_norm": 2.796875,
      "learning_rate": 1.6506875809663257e-05,
      "loss": 0.8143,
      "step": 601650
    },
    {
      "epoch": 2.108668299880488,
      "grad_norm": 2.890625,
      "learning_rate": 1.6506226780999555e-05,
      "loss": 0.7563,
      "step": 601660
    },
    {
      "epoch": 2.1087033473873835,
      "grad_norm": 2.8125,
      "learning_rate": 1.6505577752335853e-05,
      "loss": 0.8532,
      "step": 601670
    },
    {
      "epoch": 2.1087383948942793,
      "grad_norm": 3.03125,
      "learning_rate": 1.650492872367215e-05,
      "loss": 0.7815,
      "step": 601680
    },
    {
      "epoch": 2.1087734424011746,
      "grad_norm": 2.96875,
      "learning_rate": 1.650427969500845e-05,
      "loss": 0.8306,
      "step": 601690
    },
    {
      "epoch": 2.1088084899080703,
      "grad_norm": 2.78125,
      "learning_rate": 1.650363066634475e-05,
      "loss": 0.7706,
      "step": 601700
    },
    {
      "epoch": 2.108843537414966,
      "grad_norm": 3.421875,
      "learning_rate": 1.650298163768105e-05,
      "loss": 0.8264,
      "step": 601710
    },
    {
      "epoch": 2.1088785849218614,
      "grad_norm": 2.5,
      "learning_rate": 1.6502332609017347e-05,
      "loss": 0.8161,
      "step": 601720
    },
    {
      "epoch": 2.108913632428757,
      "grad_norm": 3.4375,
      "learning_rate": 1.6501683580353645e-05,
      "loss": 0.8222,
      "step": 601730
    },
    {
      "epoch": 2.108948679935653,
      "grad_norm": 3.8125,
      "learning_rate": 1.6501034551689943e-05,
      "loss": 0.8351,
      "step": 601740
    },
    {
      "epoch": 2.1089837274425483,
      "grad_norm": 2.765625,
      "learning_rate": 1.650038552302624e-05,
      "loss": 0.7952,
      "step": 601750
    },
    {
      "epoch": 2.109018774949444,
      "grad_norm": 3.125,
      "learning_rate": 1.649973649436254e-05,
      "loss": 0.8139,
      "step": 601760
    },
    {
      "epoch": 2.1090538224563398,
      "grad_norm": 3.078125,
      "learning_rate": 1.6499087465698837e-05,
      "loss": 0.8801,
      "step": 601770
    },
    {
      "epoch": 2.109088869963235,
      "grad_norm": 2.5,
      "learning_rate": 1.6498438437035135e-05,
      "loss": 0.7876,
      "step": 601780
    },
    {
      "epoch": 2.109123917470131,
      "grad_norm": 3.3125,
      "learning_rate": 1.649778940837143e-05,
      "loss": 0.7791,
      "step": 601790
    },
    {
      "epoch": 2.109158964977026,
      "grad_norm": 2.21875,
      "learning_rate": 1.6497140379707727e-05,
      "loss": 0.771,
      "step": 601800
    },
    {
      "epoch": 2.109194012483922,
      "grad_norm": 2.625,
      "learning_rate": 1.649649135104403e-05,
      "loss": 0.77,
      "step": 601810
    },
    {
      "epoch": 2.1092290599908177,
      "grad_norm": 2.5,
      "learning_rate": 1.6495842322380327e-05,
      "loss": 0.8346,
      "step": 601820
    },
    {
      "epoch": 2.109264107497713,
      "grad_norm": 3.109375,
      "learning_rate": 1.6495193293716625e-05,
      "loss": 0.795,
      "step": 601830
    },
    {
      "epoch": 2.1092991550046087,
      "grad_norm": 3.0,
      "learning_rate": 1.6494544265052923e-05,
      "loss": 0.8666,
      "step": 601840
    },
    {
      "epoch": 2.1093342025115045,
      "grad_norm": 3.125,
      "learning_rate": 1.649389523638922e-05,
      "loss": 0.8506,
      "step": 601850
    },
    {
      "epoch": 2.1093692500184,
      "grad_norm": 2.453125,
      "learning_rate": 1.649324620772552e-05,
      "loss": 0.8203,
      "step": 601860
    },
    {
      "epoch": 2.1094042975252956,
      "grad_norm": 2.921875,
      "learning_rate": 1.6492597179061817e-05,
      "loss": 0.8289,
      "step": 601870
    },
    {
      "epoch": 2.1094393450321913,
      "grad_norm": 2.890625,
      "learning_rate": 1.6491948150398115e-05,
      "loss": 0.7822,
      "step": 601880
    },
    {
      "epoch": 2.1094743925390866,
      "grad_norm": 2.609375,
      "learning_rate": 1.6491299121734413e-05,
      "loss": 0.7596,
      "step": 601890
    },
    {
      "epoch": 2.1095094400459824,
      "grad_norm": 3.265625,
      "learning_rate": 1.649065009307071e-05,
      "loss": 0.8661,
      "step": 601900
    },
    {
      "epoch": 2.1095444875528777,
      "grad_norm": 2.9375,
      "learning_rate": 1.649000106440701e-05,
      "loss": 0.7243,
      "step": 601910
    },
    {
      "epoch": 2.1095795350597735,
      "grad_norm": 2.671875,
      "learning_rate": 1.6489352035743307e-05,
      "loss": 0.7432,
      "step": 601920
    },
    {
      "epoch": 2.1096145825666692,
      "grad_norm": 2.9375,
      "learning_rate": 1.6488703007079605e-05,
      "loss": 0.8473,
      "step": 601930
    },
    {
      "epoch": 2.1096496300735645,
      "grad_norm": 2.640625,
      "learning_rate": 1.6488053978415903e-05,
      "loss": 0.7361,
      "step": 601940
    },
    {
      "epoch": 2.1096846775804603,
      "grad_norm": 3.1875,
      "learning_rate": 1.6487404949752204e-05,
      "loss": 0.7824,
      "step": 601950
    },
    {
      "epoch": 2.109719725087356,
      "grad_norm": 3.046875,
      "learning_rate": 1.6486755921088502e-05,
      "loss": 0.8584,
      "step": 601960
    },
    {
      "epoch": 2.1097547725942514,
      "grad_norm": 3.375,
      "learning_rate": 1.64861068924248e-05,
      "loss": 0.8336,
      "step": 601970
    },
    {
      "epoch": 2.109789820101147,
      "grad_norm": 2.703125,
      "learning_rate": 1.6485457863761095e-05,
      "loss": 0.6146,
      "step": 601980
    },
    {
      "epoch": 2.109824867608043,
      "grad_norm": 3.109375,
      "learning_rate": 1.6484808835097393e-05,
      "loss": 0.7883,
      "step": 601990
    },
    {
      "epoch": 2.109859915114938,
      "grad_norm": 2.71875,
      "learning_rate": 1.648415980643369e-05,
      "loss": 0.7763,
      "step": 602000
    },
    {
      "epoch": 2.109894962621834,
      "grad_norm": 3.015625,
      "learning_rate": 1.648351077776999e-05,
      "loss": 0.75,
      "step": 602010
    },
    {
      "epoch": 2.1099300101287293,
      "grad_norm": 3.03125,
      "learning_rate": 1.6482861749106287e-05,
      "loss": 0.8653,
      "step": 602020
    },
    {
      "epoch": 2.109965057635625,
      "grad_norm": 2.578125,
      "learning_rate": 1.6482212720442585e-05,
      "loss": 0.8292,
      "step": 602030
    },
    {
      "epoch": 2.110000105142521,
      "grad_norm": 3.15625,
      "learning_rate": 1.6481563691778883e-05,
      "loss": 0.8781,
      "step": 602040
    },
    {
      "epoch": 2.110035152649416,
      "grad_norm": 2.796875,
      "learning_rate": 1.648091466311518e-05,
      "loss": 0.8206,
      "step": 602050
    },
    {
      "epoch": 2.110070200156312,
      "grad_norm": 2.96875,
      "learning_rate": 1.6480265634451482e-05,
      "loss": 0.8611,
      "step": 602060
    },
    {
      "epoch": 2.1101052476632076,
      "grad_norm": 2.734375,
      "learning_rate": 1.647961660578778e-05,
      "loss": 0.8857,
      "step": 602070
    },
    {
      "epoch": 2.110140295170103,
      "grad_norm": 3.015625,
      "learning_rate": 1.6478967577124078e-05,
      "loss": 0.7509,
      "step": 602080
    },
    {
      "epoch": 2.1101753426769987,
      "grad_norm": 3.0625,
      "learning_rate": 1.6478318548460376e-05,
      "loss": 0.8052,
      "step": 602090
    },
    {
      "epoch": 2.1102103901838944,
      "grad_norm": 2.46875,
      "learning_rate": 1.6477669519796674e-05,
      "loss": 0.7867,
      "step": 602100
    },
    {
      "epoch": 2.1102454376907898,
      "grad_norm": 2.890625,
      "learning_rate": 1.6477020491132972e-05,
      "loss": 0.8491,
      "step": 602110
    },
    {
      "epoch": 2.1102804851976855,
      "grad_norm": 3.046875,
      "learning_rate": 1.647637146246927e-05,
      "loss": 0.854,
      "step": 602120
    },
    {
      "epoch": 2.110315532704581,
      "grad_norm": 3.203125,
      "learning_rate": 1.6475722433805568e-05,
      "loss": 0.8821,
      "step": 602130
    },
    {
      "epoch": 2.1103505802114766,
      "grad_norm": 3.203125,
      "learning_rate": 1.6475073405141866e-05,
      "loss": 0.7991,
      "step": 602140
    },
    {
      "epoch": 2.1103856277183723,
      "grad_norm": 3.0625,
      "learning_rate": 1.6474424376478164e-05,
      "loss": 0.8192,
      "step": 602150
    },
    {
      "epoch": 2.1104206752252677,
      "grad_norm": 2.953125,
      "learning_rate": 1.6473775347814462e-05,
      "loss": 0.8401,
      "step": 602160
    },
    {
      "epoch": 2.1104557227321634,
      "grad_norm": 3.328125,
      "learning_rate": 1.6473126319150757e-05,
      "loss": 0.8538,
      "step": 602170
    },
    {
      "epoch": 2.110490770239059,
      "grad_norm": 2.921875,
      "learning_rate": 1.6472477290487058e-05,
      "loss": 0.8855,
      "step": 602180
    },
    {
      "epoch": 2.1105258177459545,
      "grad_norm": 3.0625,
      "learning_rate": 1.6471828261823356e-05,
      "loss": 0.8652,
      "step": 602190
    },
    {
      "epoch": 2.1105608652528502,
      "grad_norm": 3.5625,
      "learning_rate": 1.6471179233159654e-05,
      "loss": 0.8716,
      "step": 602200
    },
    {
      "epoch": 2.110595912759746,
      "grad_norm": 2.953125,
      "learning_rate": 1.6470530204495952e-05,
      "loss": 0.8724,
      "step": 602210
    },
    {
      "epoch": 2.1106309602666413,
      "grad_norm": 2.984375,
      "learning_rate": 1.646988117583225e-05,
      "loss": 0.8191,
      "step": 602220
    },
    {
      "epoch": 2.110666007773537,
      "grad_norm": 2.703125,
      "learning_rate": 1.6469232147168548e-05,
      "loss": 0.7651,
      "step": 602230
    },
    {
      "epoch": 2.110701055280433,
      "grad_norm": 2.5625,
      "learning_rate": 1.6468583118504846e-05,
      "loss": 0.8313,
      "step": 602240
    },
    {
      "epoch": 2.110736102787328,
      "grad_norm": 3.03125,
      "learning_rate": 1.6467934089841144e-05,
      "loss": 0.7722,
      "step": 602250
    },
    {
      "epoch": 2.110771150294224,
      "grad_norm": 2.984375,
      "learning_rate": 1.6467285061177442e-05,
      "loss": 0.8982,
      "step": 602260
    },
    {
      "epoch": 2.110806197801119,
      "grad_norm": 3.03125,
      "learning_rate": 1.646663603251374e-05,
      "loss": 0.8082,
      "step": 602270
    },
    {
      "epoch": 2.110841245308015,
      "grad_norm": 2.734375,
      "learning_rate": 1.6465987003850038e-05,
      "loss": 0.8164,
      "step": 602280
    },
    {
      "epoch": 2.1108762928149107,
      "grad_norm": 3.078125,
      "learning_rate": 1.6465337975186336e-05,
      "loss": 0.8252,
      "step": 602290
    },
    {
      "epoch": 2.110911340321806,
      "grad_norm": 3.109375,
      "learning_rate": 1.6464688946522634e-05,
      "loss": 0.7781,
      "step": 602300
    },
    {
      "epoch": 2.110946387828702,
      "grad_norm": 3.265625,
      "learning_rate": 1.6464039917858932e-05,
      "loss": 0.9224,
      "step": 602310
    },
    {
      "epoch": 2.1109814353355976,
      "grad_norm": 3.40625,
      "learning_rate": 1.6463390889195234e-05,
      "loss": 0.7725,
      "step": 602320
    },
    {
      "epoch": 2.111016482842493,
      "grad_norm": 3.109375,
      "learning_rate": 1.646274186053153e-05,
      "loss": 0.902,
      "step": 602330
    },
    {
      "epoch": 2.1110515303493886,
      "grad_norm": 3.109375,
      "learning_rate": 1.646209283186783e-05,
      "loss": 0.7772,
      "step": 602340
    },
    {
      "epoch": 2.1110865778562844,
      "grad_norm": 2.640625,
      "learning_rate": 1.6461443803204128e-05,
      "loss": 0.764,
      "step": 602350
    },
    {
      "epoch": 2.1111216253631797,
      "grad_norm": 3.046875,
      "learning_rate": 1.6460794774540422e-05,
      "loss": 0.8576,
      "step": 602360
    },
    {
      "epoch": 2.1111566728700755,
      "grad_norm": 3.25,
      "learning_rate": 1.646014574587672e-05,
      "loss": 0.7574,
      "step": 602370
    },
    {
      "epoch": 2.111191720376971,
      "grad_norm": 2.625,
      "learning_rate": 1.6459496717213018e-05,
      "loss": 0.8907,
      "step": 602380
    },
    {
      "epoch": 2.1112267678838665,
      "grad_norm": 2.734375,
      "learning_rate": 1.6458847688549316e-05,
      "loss": 0.8496,
      "step": 602390
    },
    {
      "epoch": 2.1112618153907623,
      "grad_norm": 2.671875,
      "learning_rate": 1.6458198659885614e-05,
      "loss": 0.8058,
      "step": 602400
    },
    {
      "epoch": 2.1112968628976576,
      "grad_norm": 2.828125,
      "learning_rate": 1.6457549631221912e-05,
      "loss": 0.7781,
      "step": 602410
    },
    {
      "epoch": 2.1113319104045534,
      "grad_norm": 3.09375,
      "learning_rate": 1.645690060255821e-05,
      "loss": 0.7971,
      "step": 602420
    },
    {
      "epoch": 2.111366957911449,
      "grad_norm": 2.90625,
      "learning_rate": 1.645625157389451e-05,
      "loss": 0.7997,
      "step": 602430
    },
    {
      "epoch": 2.1114020054183444,
      "grad_norm": 3.140625,
      "learning_rate": 1.645560254523081e-05,
      "loss": 0.942,
      "step": 602440
    },
    {
      "epoch": 2.11143705292524,
      "grad_norm": 2.703125,
      "learning_rate": 1.6454953516567108e-05,
      "loss": 0.8358,
      "step": 602450
    },
    {
      "epoch": 2.111472100432136,
      "grad_norm": 3.125,
      "learning_rate": 1.6454304487903406e-05,
      "loss": 0.8646,
      "step": 602460
    },
    {
      "epoch": 2.1115071479390313,
      "grad_norm": 2.90625,
      "learning_rate": 1.6453655459239704e-05,
      "loss": 0.8462,
      "step": 602470
    },
    {
      "epoch": 2.111542195445927,
      "grad_norm": 3.125,
      "learning_rate": 1.6453006430576e-05,
      "loss": 0.8788,
      "step": 602480
    },
    {
      "epoch": 2.1115772429528223,
      "grad_norm": 3.15625,
      "learning_rate": 1.64523574019123e-05,
      "loss": 0.9066,
      "step": 602490
    },
    {
      "epoch": 2.111612290459718,
      "grad_norm": 3.171875,
      "learning_rate": 1.6451708373248598e-05,
      "loss": 0.7774,
      "step": 602500
    },
    {
      "epoch": 2.111647337966614,
      "grad_norm": 3.15625,
      "learning_rate": 1.6451059344584896e-05,
      "loss": 0.8348,
      "step": 602510
    },
    {
      "epoch": 2.111682385473509,
      "grad_norm": 2.859375,
      "learning_rate": 1.6450410315921194e-05,
      "loss": 0.7287,
      "step": 602520
    },
    {
      "epoch": 2.111717432980405,
      "grad_norm": 3.375,
      "learning_rate": 1.644976128725749e-05,
      "loss": 0.8559,
      "step": 602530
    },
    {
      "epoch": 2.1117524804873007,
      "grad_norm": 2.90625,
      "learning_rate": 1.644911225859379e-05,
      "loss": 0.8875,
      "step": 602540
    },
    {
      "epoch": 2.111787527994196,
      "grad_norm": 2.921875,
      "learning_rate": 1.6448463229930088e-05,
      "loss": 0.8433,
      "step": 602550
    },
    {
      "epoch": 2.1118225755010918,
      "grad_norm": 3.015625,
      "learning_rate": 1.6447814201266386e-05,
      "loss": 0.8842,
      "step": 602560
    },
    {
      "epoch": 2.1118576230079875,
      "grad_norm": 3.203125,
      "learning_rate": 1.6447165172602684e-05,
      "loss": 0.9464,
      "step": 602570
    },
    {
      "epoch": 2.111892670514883,
      "grad_norm": 2.984375,
      "learning_rate": 1.644651614393898e-05,
      "loss": 0.8687,
      "step": 602580
    },
    {
      "epoch": 2.1119277180217786,
      "grad_norm": 2.75,
      "learning_rate": 1.644586711527528e-05,
      "loss": 0.8321,
      "step": 602590
    },
    {
      "epoch": 2.1119627655286743,
      "grad_norm": 2.984375,
      "learning_rate": 1.6445218086611578e-05,
      "loss": 0.8111,
      "step": 602600
    },
    {
      "epoch": 2.1119978130355697,
      "grad_norm": 2.828125,
      "learning_rate": 1.6444569057947876e-05,
      "loss": 0.8256,
      "step": 602610
    },
    {
      "epoch": 2.1120328605424654,
      "grad_norm": 3.28125,
      "learning_rate": 1.6443920029284174e-05,
      "loss": 0.8792,
      "step": 602620
    },
    {
      "epoch": 2.1120679080493607,
      "grad_norm": 3.21875,
      "learning_rate": 1.644327100062047e-05,
      "loss": 0.8024,
      "step": 602630
    },
    {
      "epoch": 2.1121029555562565,
      "grad_norm": 3.078125,
      "learning_rate": 1.644262197195677e-05,
      "loss": 0.8196,
      "step": 602640
    },
    {
      "epoch": 2.1121380030631522,
      "grad_norm": 3.0,
      "learning_rate": 1.6441972943293068e-05,
      "loss": 0.8841,
      "step": 602650
    },
    {
      "epoch": 2.1121730505700476,
      "grad_norm": 3.078125,
      "learning_rate": 1.6441323914629366e-05,
      "loss": 0.7521,
      "step": 602660
    },
    {
      "epoch": 2.1122080980769433,
      "grad_norm": 2.984375,
      "learning_rate": 1.6440674885965664e-05,
      "loss": 0.7979,
      "step": 602670
    },
    {
      "epoch": 2.112243145583839,
      "grad_norm": 2.5625,
      "learning_rate": 1.6440025857301965e-05,
      "loss": 0.7712,
      "step": 602680
    },
    {
      "epoch": 2.1122781930907344,
      "grad_norm": 2.796875,
      "learning_rate": 1.6439376828638263e-05,
      "loss": 0.8466,
      "step": 602690
    },
    {
      "epoch": 2.11231324059763,
      "grad_norm": 3.015625,
      "learning_rate": 1.643872779997456e-05,
      "loss": 0.8663,
      "step": 602700
    },
    {
      "epoch": 2.112348288104526,
      "grad_norm": 2.625,
      "learning_rate": 1.643807877131086e-05,
      "loss": 0.8026,
      "step": 602710
    },
    {
      "epoch": 2.112383335611421,
      "grad_norm": 3.390625,
      "learning_rate": 1.6437429742647157e-05,
      "loss": 0.8469,
      "step": 602720
    },
    {
      "epoch": 2.112418383118317,
      "grad_norm": 2.890625,
      "learning_rate": 1.6436780713983455e-05,
      "loss": 0.8104,
      "step": 602730
    },
    {
      "epoch": 2.1124534306252123,
      "grad_norm": 3.328125,
      "learning_rate": 1.643613168531975e-05,
      "loss": 0.8416,
      "step": 602740
    },
    {
      "epoch": 2.112488478132108,
      "grad_norm": 3.015625,
      "learning_rate": 1.6435482656656048e-05,
      "loss": 0.804,
      "step": 602750
    },
    {
      "epoch": 2.112523525639004,
      "grad_norm": 2.71875,
      "learning_rate": 1.6434833627992346e-05,
      "loss": 0.7492,
      "step": 602760
    },
    {
      "epoch": 2.112558573145899,
      "grad_norm": 3.375,
      "learning_rate": 1.6434184599328644e-05,
      "loss": 0.8247,
      "step": 602770
    },
    {
      "epoch": 2.112593620652795,
      "grad_norm": 3.078125,
      "learning_rate": 1.643353557066494e-05,
      "loss": 0.8568,
      "step": 602780
    },
    {
      "epoch": 2.1126286681596906,
      "grad_norm": 2.953125,
      "learning_rate": 1.643288654200124e-05,
      "loss": 0.8114,
      "step": 602790
    },
    {
      "epoch": 2.112663715666586,
      "grad_norm": 3.296875,
      "learning_rate": 1.643223751333754e-05,
      "loss": 0.9093,
      "step": 602800
    },
    {
      "epoch": 2.1126987631734817,
      "grad_norm": 2.53125,
      "learning_rate": 1.643158848467384e-05,
      "loss": 0.7794,
      "step": 602810
    },
    {
      "epoch": 2.1127338106803775,
      "grad_norm": 3.171875,
      "learning_rate": 1.6430939456010137e-05,
      "loss": 0.827,
      "step": 602820
    },
    {
      "epoch": 2.112768858187273,
      "grad_norm": 3.046875,
      "learning_rate": 1.6430290427346435e-05,
      "loss": 0.7518,
      "step": 602830
    },
    {
      "epoch": 2.1128039056941685,
      "grad_norm": 3.125,
      "learning_rate": 1.6429641398682733e-05,
      "loss": 0.8975,
      "step": 602840
    },
    {
      "epoch": 2.112838953201064,
      "grad_norm": 3.078125,
      "learning_rate": 1.642899237001903e-05,
      "loss": 0.8645,
      "step": 602850
    },
    {
      "epoch": 2.1128740007079596,
      "grad_norm": 3.0,
      "learning_rate": 1.642834334135533e-05,
      "loss": 0.8619,
      "step": 602860
    },
    {
      "epoch": 2.1129090482148554,
      "grad_norm": 3.171875,
      "learning_rate": 1.6427694312691627e-05,
      "loss": 0.8686,
      "step": 602870
    },
    {
      "epoch": 2.1129440957217507,
      "grad_norm": 3.078125,
      "learning_rate": 1.6427045284027925e-05,
      "loss": 0.8234,
      "step": 602880
    },
    {
      "epoch": 2.1129791432286464,
      "grad_norm": 2.71875,
      "learning_rate": 1.6426396255364223e-05,
      "loss": 0.9192,
      "step": 602890
    },
    {
      "epoch": 2.113014190735542,
      "grad_norm": 3.203125,
      "learning_rate": 1.642574722670052e-05,
      "loss": 0.8322,
      "step": 602900
    },
    {
      "epoch": 2.1130492382424375,
      "grad_norm": 2.921875,
      "learning_rate": 1.642509819803682e-05,
      "loss": 0.8379,
      "step": 602910
    },
    {
      "epoch": 2.1130842857493333,
      "grad_norm": 3.09375,
      "learning_rate": 1.6424449169373117e-05,
      "loss": 0.8165,
      "step": 602920
    },
    {
      "epoch": 2.113119333256229,
      "grad_norm": 2.828125,
      "learning_rate": 1.6423800140709415e-05,
      "loss": 0.7609,
      "step": 602930
    },
    {
      "epoch": 2.1131543807631243,
      "grad_norm": 2.890625,
      "learning_rate": 1.6423151112045713e-05,
      "loss": 0.7835,
      "step": 602940
    },
    {
      "epoch": 2.11318942827002,
      "grad_norm": 3.265625,
      "learning_rate": 1.642250208338201e-05,
      "loss": 0.8692,
      "step": 602950
    },
    {
      "epoch": 2.1132244757769154,
      "grad_norm": 2.84375,
      "learning_rate": 1.642185305471831e-05,
      "loss": 0.8295,
      "step": 602960
    },
    {
      "epoch": 2.113259523283811,
      "grad_norm": 3.375,
      "learning_rate": 1.6421204026054607e-05,
      "loss": 0.8608,
      "step": 602970
    },
    {
      "epoch": 2.113294570790707,
      "grad_norm": 2.640625,
      "learning_rate": 1.6420554997390905e-05,
      "loss": 0.6587,
      "step": 602980
    },
    {
      "epoch": 2.1133296182976022,
      "grad_norm": 2.625,
      "learning_rate": 1.6419905968727203e-05,
      "loss": 0.8644,
      "step": 602990
    },
    {
      "epoch": 2.113364665804498,
      "grad_norm": 3.25,
      "learning_rate": 1.64192569400635e-05,
      "loss": 0.8354,
      "step": 603000
    },
    {
      "epoch": 2.1133997133113938,
      "grad_norm": 3.28125,
      "learning_rate": 1.64186079113998e-05,
      "loss": 0.8612,
      "step": 603010
    },
    {
      "epoch": 2.113434760818289,
      "grad_norm": 2.828125,
      "learning_rate": 1.6417958882736097e-05,
      "loss": 0.8201,
      "step": 603020
    },
    {
      "epoch": 2.113469808325185,
      "grad_norm": 3.03125,
      "learning_rate": 1.6417309854072395e-05,
      "loss": 0.7975,
      "step": 603030
    },
    {
      "epoch": 2.1135048558320806,
      "grad_norm": 2.875,
      "learning_rate": 1.6416660825408693e-05,
      "loss": 0.8199,
      "step": 603040
    },
    {
      "epoch": 2.113539903338976,
      "grad_norm": 3.09375,
      "learning_rate": 1.6416011796744994e-05,
      "loss": 0.808,
      "step": 603050
    },
    {
      "epoch": 2.1135749508458717,
      "grad_norm": 2.640625,
      "learning_rate": 1.6415362768081292e-05,
      "loss": 0.8284,
      "step": 603060
    },
    {
      "epoch": 2.113609998352767,
      "grad_norm": 2.59375,
      "learning_rate": 1.641471373941759e-05,
      "loss": 0.8268,
      "step": 603070
    },
    {
      "epoch": 2.1136450458596627,
      "grad_norm": 2.421875,
      "learning_rate": 1.641406471075389e-05,
      "loss": 0.8192,
      "step": 603080
    },
    {
      "epoch": 2.1136800933665585,
      "grad_norm": 3.046875,
      "learning_rate": 1.6413415682090186e-05,
      "loss": 0.8508,
      "step": 603090
    },
    {
      "epoch": 2.113715140873454,
      "grad_norm": 2.8125,
      "learning_rate": 1.6412766653426484e-05,
      "loss": 0.8222,
      "step": 603100
    },
    {
      "epoch": 2.1137501883803496,
      "grad_norm": 2.515625,
      "learning_rate": 1.641211762476278e-05,
      "loss": 0.8048,
      "step": 603110
    },
    {
      "epoch": 2.1137852358872453,
      "grad_norm": 2.4375,
      "learning_rate": 1.6411468596099077e-05,
      "loss": 0.8309,
      "step": 603120
    },
    {
      "epoch": 2.1138202833941406,
      "grad_norm": 3.171875,
      "learning_rate": 1.6410819567435375e-05,
      "loss": 0.7838,
      "step": 603130
    },
    {
      "epoch": 2.1138553309010364,
      "grad_norm": 3.09375,
      "learning_rate": 1.6410170538771673e-05,
      "loss": 0.8313,
      "step": 603140
    },
    {
      "epoch": 2.113890378407932,
      "grad_norm": 3.265625,
      "learning_rate": 1.640952151010797e-05,
      "loss": 0.8356,
      "step": 603150
    },
    {
      "epoch": 2.1139254259148275,
      "grad_norm": 2.953125,
      "learning_rate": 1.6408872481444272e-05,
      "loss": 0.7558,
      "step": 603160
    },
    {
      "epoch": 2.113960473421723,
      "grad_norm": 2.515625,
      "learning_rate": 1.640822345278057e-05,
      "loss": 0.8262,
      "step": 603170
    },
    {
      "epoch": 2.1139955209286185,
      "grad_norm": 2.765625,
      "learning_rate": 1.640757442411687e-05,
      "loss": 0.8131,
      "step": 603180
    },
    {
      "epoch": 2.1140305684355143,
      "grad_norm": 2.6875,
      "learning_rate": 1.6406925395453166e-05,
      "loss": 0.7749,
      "step": 603190
    },
    {
      "epoch": 2.11406561594241,
      "grad_norm": 2.65625,
      "learning_rate": 1.6406276366789464e-05,
      "loss": 0.8119,
      "step": 603200
    },
    {
      "epoch": 2.1141006634493054,
      "grad_norm": 2.359375,
      "learning_rate": 1.6405627338125762e-05,
      "loss": 0.7295,
      "step": 603210
    },
    {
      "epoch": 2.114135710956201,
      "grad_norm": 2.703125,
      "learning_rate": 1.640497830946206e-05,
      "loss": 0.7802,
      "step": 603220
    },
    {
      "epoch": 2.114170758463097,
      "grad_norm": 3.046875,
      "learning_rate": 1.640432928079836e-05,
      "loss": 0.7788,
      "step": 603230
    },
    {
      "epoch": 2.114205805969992,
      "grad_norm": 2.484375,
      "learning_rate": 1.6403680252134656e-05,
      "loss": 0.7862,
      "step": 603240
    },
    {
      "epoch": 2.114240853476888,
      "grad_norm": 2.734375,
      "learning_rate": 1.6403031223470954e-05,
      "loss": 0.9013,
      "step": 603250
    },
    {
      "epoch": 2.1142759009837837,
      "grad_norm": 2.90625,
      "learning_rate": 1.6402382194807252e-05,
      "loss": 0.8132,
      "step": 603260
    },
    {
      "epoch": 2.114310948490679,
      "grad_norm": 3.421875,
      "learning_rate": 1.640173316614355e-05,
      "loss": 0.8943,
      "step": 603270
    },
    {
      "epoch": 2.114345995997575,
      "grad_norm": 2.828125,
      "learning_rate": 1.640108413747985e-05,
      "loss": 0.8239,
      "step": 603280
    },
    {
      "epoch": 2.11438104350447,
      "grad_norm": 2.734375,
      "learning_rate": 1.6400435108816146e-05,
      "loss": 0.7418,
      "step": 603290
    },
    {
      "epoch": 2.114416091011366,
      "grad_norm": 3.5625,
      "learning_rate": 1.6399786080152444e-05,
      "loss": 0.8259,
      "step": 603300
    },
    {
      "epoch": 2.1144511385182616,
      "grad_norm": 2.859375,
      "learning_rate": 1.6399137051488742e-05,
      "loss": 0.777,
      "step": 603310
    },
    {
      "epoch": 2.114486186025157,
      "grad_norm": 3.25,
      "learning_rate": 1.639848802282504e-05,
      "loss": 0.8742,
      "step": 603320
    },
    {
      "epoch": 2.1145212335320527,
      "grad_norm": 2.984375,
      "learning_rate": 1.639783899416134e-05,
      "loss": 0.8082,
      "step": 603330
    },
    {
      "epoch": 2.1145562810389484,
      "grad_norm": 3.15625,
      "learning_rate": 1.6397189965497636e-05,
      "loss": 0.9035,
      "step": 603340
    },
    {
      "epoch": 2.1145913285458438,
      "grad_norm": 2.96875,
      "learning_rate": 1.6396540936833934e-05,
      "loss": 0.7985,
      "step": 603350
    },
    {
      "epoch": 2.1146263760527395,
      "grad_norm": 3.125,
      "learning_rate": 1.6395891908170232e-05,
      "loss": 0.8475,
      "step": 603360
    },
    {
      "epoch": 2.1146614235596353,
      "grad_norm": 2.578125,
      "learning_rate": 1.639524287950653e-05,
      "loss": 0.7975,
      "step": 603370
    },
    {
      "epoch": 2.1146964710665306,
      "grad_norm": 3.046875,
      "learning_rate": 1.639459385084283e-05,
      "loss": 0.8143,
      "step": 603380
    },
    {
      "epoch": 2.1147315185734263,
      "grad_norm": 2.765625,
      "learning_rate": 1.6393944822179126e-05,
      "loss": 0.7847,
      "step": 603390
    },
    {
      "epoch": 2.1147665660803217,
      "grad_norm": 3.4375,
      "learning_rate": 1.6393295793515424e-05,
      "loss": 0.8758,
      "step": 603400
    },
    {
      "epoch": 2.1148016135872174,
      "grad_norm": 2.953125,
      "learning_rate": 1.6392646764851722e-05,
      "loss": 0.8392,
      "step": 603410
    },
    {
      "epoch": 2.114836661094113,
      "grad_norm": 3.421875,
      "learning_rate": 1.6391997736188024e-05,
      "loss": 0.8231,
      "step": 603420
    },
    {
      "epoch": 2.1148717086010085,
      "grad_norm": 2.828125,
      "learning_rate": 1.6391348707524322e-05,
      "loss": 0.7813,
      "step": 603430
    },
    {
      "epoch": 2.1149067561079042,
      "grad_norm": 2.578125,
      "learning_rate": 1.639069967886062e-05,
      "loss": 0.8137,
      "step": 603440
    },
    {
      "epoch": 2.1149418036148,
      "grad_norm": 2.515625,
      "learning_rate": 1.6390050650196918e-05,
      "loss": 0.8186,
      "step": 603450
    },
    {
      "epoch": 2.1149768511216953,
      "grad_norm": 2.578125,
      "learning_rate": 1.6389401621533216e-05,
      "loss": 0.8055,
      "step": 603460
    },
    {
      "epoch": 2.115011898628591,
      "grad_norm": 2.328125,
      "learning_rate": 1.6388752592869514e-05,
      "loss": 0.7216,
      "step": 603470
    },
    {
      "epoch": 2.115046946135487,
      "grad_norm": 2.84375,
      "learning_rate": 1.6388103564205812e-05,
      "loss": 0.8398,
      "step": 603480
    },
    {
      "epoch": 2.115081993642382,
      "grad_norm": 3.265625,
      "learning_rate": 1.6387454535542106e-05,
      "loss": 0.7963,
      "step": 603490
    },
    {
      "epoch": 2.115117041149278,
      "grad_norm": 3.03125,
      "learning_rate": 1.6386805506878404e-05,
      "loss": 0.8112,
      "step": 603500
    },
    {
      "epoch": 2.1151520886561737,
      "grad_norm": 2.859375,
      "learning_rate": 1.6386156478214702e-05,
      "loss": 0.8312,
      "step": 603510
    },
    {
      "epoch": 2.115187136163069,
      "grad_norm": 3.21875,
      "learning_rate": 1.6385507449551e-05,
      "loss": 0.7822,
      "step": 603520
    },
    {
      "epoch": 2.1152221836699647,
      "grad_norm": 2.71875,
      "learning_rate": 1.6384858420887302e-05,
      "loss": 0.8391,
      "step": 603530
    },
    {
      "epoch": 2.11525723117686,
      "grad_norm": 3.0,
      "learning_rate": 1.63842093922236e-05,
      "loss": 0.8084,
      "step": 603540
    },
    {
      "epoch": 2.115292278683756,
      "grad_norm": 2.75,
      "learning_rate": 1.6383560363559898e-05,
      "loss": 0.7816,
      "step": 603550
    },
    {
      "epoch": 2.1153273261906516,
      "grad_norm": 3.4375,
      "learning_rate": 1.6382911334896196e-05,
      "loss": 0.8331,
      "step": 603560
    },
    {
      "epoch": 2.115362373697547,
      "grad_norm": 3.390625,
      "learning_rate": 1.6382262306232494e-05,
      "loss": 0.8444,
      "step": 603570
    },
    {
      "epoch": 2.1153974212044426,
      "grad_norm": 3.125,
      "learning_rate": 1.6381613277568792e-05,
      "loss": 0.8435,
      "step": 603580
    },
    {
      "epoch": 2.1154324687113384,
      "grad_norm": 2.8125,
      "learning_rate": 1.638096424890509e-05,
      "loss": 0.7522,
      "step": 603590
    },
    {
      "epoch": 2.1154675162182337,
      "grad_norm": 2.453125,
      "learning_rate": 1.6380315220241388e-05,
      "loss": 0.7282,
      "step": 603600
    },
    {
      "epoch": 2.1155025637251295,
      "grad_norm": 3.15625,
      "learning_rate": 1.6379666191577686e-05,
      "loss": 0.8591,
      "step": 603610
    },
    {
      "epoch": 2.115537611232025,
      "grad_norm": 3.078125,
      "learning_rate": 1.6379017162913984e-05,
      "loss": 0.8289,
      "step": 603620
    },
    {
      "epoch": 2.1155726587389205,
      "grad_norm": 2.96875,
      "learning_rate": 1.6378368134250282e-05,
      "loss": 0.8246,
      "step": 603630
    },
    {
      "epoch": 2.1156077062458163,
      "grad_norm": 2.796875,
      "learning_rate": 1.637771910558658e-05,
      "loss": 0.8109,
      "step": 603640
    },
    {
      "epoch": 2.1156427537527116,
      "grad_norm": 2.5,
      "learning_rate": 1.6377070076922878e-05,
      "loss": 0.7944,
      "step": 603650
    },
    {
      "epoch": 2.1156778012596074,
      "grad_norm": 2.8125,
      "learning_rate": 1.6376421048259176e-05,
      "loss": 0.7686,
      "step": 603660
    },
    {
      "epoch": 2.115712848766503,
      "grad_norm": 2.921875,
      "learning_rate": 1.6375772019595477e-05,
      "loss": 0.8706,
      "step": 603670
    },
    {
      "epoch": 2.1157478962733984,
      "grad_norm": 2.765625,
      "learning_rate": 1.6375122990931772e-05,
      "loss": 0.7589,
      "step": 603680
    },
    {
      "epoch": 2.115782943780294,
      "grad_norm": 3.09375,
      "learning_rate": 1.637447396226807e-05,
      "loss": 0.8398,
      "step": 603690
    },
    {
      "epoch": 2.11581799128719,
      "grad_norm": 3.625,
      "learning_rate": 1.6373824933604368e-05,
      "loss": 0.8872,
      "step": 603700
    },
    {
      "epoch": 2.1158530387940853,
      "grad_norm": 2.796875,
      "learning_rate": 1.6373175904940666e-05,
      "loss": 0.8026,
      "step": 603710
    },
    {
      "epoch": 2.115888086300981,
      "grad_norm": 3.265625,
      "learning_rate": 1.6372526876276964e-05,
      "loss": 0.8205,
      "step": 603720
    },
    {
      "epoch": 2.1159231338078768,
      "grad_norm": 3.078125,
      "learning_rate": 1.6371877847613262e-05,
      "loss": 0.7932,
      "step": 603730
    },
    {
      "epoch": 2.115958181314772,
      "grad_norm": 2.90625,
      "learning_rate": 1.637122881894956e-05,
      "loss": 0.8334,
      "step": 603740
    },
    {
      "epoch": 2.115993228821668,
      "grad_norm": 3.140625,
      "learning_rate": 1.6370579790285858e-05,
      "loss": 0.8464,
      "step": 603750
    },
    {
      "epoch": 2.116028276328563,
      "grad_norm": 3.03125,
      "learning_rate": 1.6369930761622156e-05,
      "loss": 0.8433,
      "step": 603760
    },
    {
      "epoch": 2.116063323835459,
      "grad_norm": 3.046875,
      "learning_rate": 1.6369281732958454e-05,
      "loss": 0.8636,
      "step": 603770
    },
    {
      "epoch": 2.1160983713423547,
      "grad_norm": 3.28125,
      "learning_rate": 1.6368632704294755e-05,
      "loss": 0.8393,
      "step": 603780
    },
    {
      "epoch": 2.11613341884925,
      "grad_norm": 3.59375,
      "learning_rate": 1.6367983675631053e-05,
      "loss": 0.8988,
      "step": 603790
    },
    {
      "epoch": 2.1161684663561457,
      "grad_norm": 2.921875,
      "learning_rate": 1.636733464696735e-05,
      "loss": 0.8953,
      "step": 603800
    },
    {
      "epoch": 2.1162035138630415,
      "grad_norm": 2.84375,
      "learning_rate": 1.636668561830365e-05,
      "loss": 0.7528,
      "step": 603810
    },
    {
      "epoch": 2.116238561369937,
      "grad_norm": 3.109375,
      "learning_rate": 1.6366036589639947e-05,
      "loss": 0.8062,
      "step": 603820
    },
    {
      "epoch": 2.1162736088768326,
      "grad_norm": 2.796875,
      "learning_rate": 1.6365387560976245e-05,
      "loss": 0.7434,
      "step": 603830
    },
    {
      "epoch": 2.1163086563837283,
      "grad_norm": 2.953125,
      "learning_rate": 1.6364738532312543e-05,
      "loss": 0.8038,
      "step": 603840
    },
    {
      "epoch": 2.1163437038906237,
      "grad_norm": 3.125,
      "learning_rate": 1.636408950364884e-05,
      "loss": 0.8307,
      "step": 603850
    },
    {
      "epoch": 2.1163787513975194,
      "grad_norm": 2.984375,
      "learning_rate": 1.6363440474985136e-05,
      "loss": 0.8268,
      "step": 603860
    },
    {
      "epoch": 2.1164137989044147,
      "grad_norm": 2.65625,
      "learning_rate": 1.6362791446321434e-05,
      "loss": 0.7856,
      "step": 603870
    },
    {
      "epoch": 2.1164488464113105,
      "grad_norm": 3.359375,
      "learning_rate": 1.6362142417657732e-05,
      "loss": 0.7571,
      "step": 603880
    },
    {
      "epoch": 2.1164838939182062,
      "grad_norm": 3.0625,
      "learning_rate": 1.636149338899403e-05,
      "loss": 0.8776,
      "step": 603890
    },
    {
      "epoch": 2.1165189414251016,
      "grad_norm": 3.28125,
      "learning_rate": 1.636084436033033e-05,
      "loss": 0.8419,
      "step": 603900
    },
    {
      "epoch": 2.1165539889319973,
      "grad_norm": 2.75,
      "learning_rate": 1.636019533166663e-05,
      "loss": 0.8172,
      "step": 603910
    },
    {
      "epoch": 2.116589036438893,
      "grad_norm": 3.015625,
      "learning_rate": 1.6359546303002927e-05,
      "loss": 0.8483,
      "step": 603920
    },
    {
      "epoch": 2.1166240839457884,
      "grad_norm": 3.15625,
      "learning_rate": 1.6358897274339225e-05,
      "loss": 0.8341,
      "step": 603930
    },
    {
      "epoch": 2.116659131452684,
      "grad_norm": 2.546875,
      "learning_rate": 1.6358248245675523e-05,
      "loss": 0.8188,
      "step": 603940
    },
    {
      "epoch": 2.11669417895958,
      "grad_norm": 2.734375,
      "learning_rate": 1.635759921701182e-05,
      "loss": 0.7923,
      "step": 603950
    },
    {
      "epoch": 2.116729226466475,
      "grad_norm": 3.046875,
      "learning_rate": 1.635695018834812e-05,
      "loss": 0.8026,
      "step": 603960
    },
    {
      "epoch": 2.116764273973371,
      "grad_norm": 3.09375,
      "learning_rate": 1.6356301159684417e-05,
      "loss": 0.8529,
      "step": 603970
    },
    {
      "epoch": 2.1167993214802667,
      "grad_norm": 3.015625,
      "learning_rate": 1.6355652131020715e-05,
      "loss": 0.7297,
      "step": 603980
    },
    {
      "epoch": 2.116834368987162,
      "grad_norm": 3.046875,
      "learning_rate": 1.6355003102357013e-05,
      "loss": 0.7697,
      "step": 603990
    },
    {
      "epoch": 2.116869416494058,
      "grad_norm": 2.953125,
      "learning_rate": 1.635435407369331e-05,
      "loss": 0.8197,
      "step": 604000
    },
    {
      "epoch": 2.116904464000953,
      "grad_norm": 2.75,
      "learning_rate": 1.635370504502961e-05,
      "loss": 0.8618,
      "step": 604010
    },
    {
      "epoch": 2.116939511507849,
      "grad_norm": 2.9375,
      "learning_rate": 1.6353056016365907e-05,
      "loss": 0.7558,
      "step": 604020
    },
    {
      "epoch": 2.1169745590147446,
      "grad_norm": 3.328125,
      "learning_rate": 1.635240698770221e-05,
      "loss": 0.7889,
      "step": 604030
    },
    {
      "epoch": 2.11700960652164,
      "grad_norm": 3.0,
      "learning_rate": 1.6351757959038506e-05,
      "loss": 0.8104,
      "step": 604040
    },
    {
      "epoch": 2.1170446540285357,
      "grad_norm": 2.78125,
      "learning_rate": 1.63511089303748e-05,
      "loss": 0.8107,
      "step": 604050
    },
    {
      "epoch": 2.1170797015354315,
      "grad_norm": 3.015625,
      "learning_rate": 1.63504599017111e-05,
      "loss": 0.861,
      "step": 604060
    },
    {
      "epoch": 2.1171147490423268,
      "grad_norm": 3.328125,
      "learning_rate": 1.6349810873047397e-05,
      "loss": 0.7891,
      "step": 604070
    },
    {
      "epoch": 2.1171497965492225,
      "grad_norm": 3.359375,
      "learning_rate": 1.6349161844383695e-05,
      "loss": 0.7697,
      "step": 604080
    },
    {
      "epoch": 2.1171848440561183,
      "grad_norm": 2.671875,
      "learning_rate": 1.6348512815719993e-05,
      "loss": 0.8731,
      "step": 604090
    },
    {
      "epoch": 2.1172198915630136,
      "grad_norm": 2.875,
      "learning_rate": 1.634786378705629e-05,
      "loss": 0.8102,
      "step": 604100
    },
    {
      "epoch": 2.1172549390699094,
      "grad_norm": 2.765625,
      "learning_rate": 1.634721475839259e-05,
      "loss": 0.7354,
      "step": 604110
    },
    {
      "epoch": 2.1172899865768047,
      "grad_norm": 3.0625,
      "learning_rate": 1.6346565729728887e-05,
      "loss": 0.8104,
      "step": 604120
    },
    {
      "epoch": 2.1173250340837004,
      "grad_norm": 3.03125,
      "learning_rate": 1.6345916701065185e-05,
      "loss": 0.8433,
      "step": 604130
    },
    {
      "epoch": 2.117360081590596,
      "grad_norm": 2.921875,
      "learning_rate": 1.6345267672401483e-05,
      "loss": 0.8174,
      "step": 604140
    },
    {
      "epoch": 2.1173951290974915,
      "grad_norm": 3.171875,
      "learning_rate": 1.6344618643737784e-05,
      "loss": 0.8132,
      "step": 604150
    },
    {
      "epoch": 2.1174301766043873,
      "grad_norm": 3.265625,
      "learning_rate": 1.6343969615074082e-05,
      "loss": 0.8518,
      "step": 604160
    },
    {
      "epoch": 2.117465224111283,
      "grad_norm": 3.046875,
      "learning_rate": 1.634332058641038e-05,
      "loss": 0.754,
      "step": 604170
    },
    {
      "epoch": 2.1175002716181783,
      "grad_norm": 2.875,
      "learning_rate": 1.634267155774668e-05,
      "loss": 0.8722,
      "step": 604180
    },
    {
      "epoch": 2.117535319125074,
      "grad_norm": 2.53125,
      "learning_rate": 1.6342022529082976e-05,
      "loss": 0.7829,
      "step": 604190
    },
    {
      "epoch": 2.11757036663197,
      "grad_norm": 2.875,
      "learning_rate": 1.6341373500419274e-05,
      "loss": 0.7833,
      "step": 604200
    },
    {
      "epoch": 2.117605414138865,
      "grad_norm": 2.671875,
      "learning_rate": 1.6340724471755572e-05,
      "loss": 0.7786,
      "step": 604210
    },
    {
      "epoch": 2.117640461645761,
      "grad_norm": 2.578125,
      "learning_rate": 1.634007544309187e-05,
      "loss": 0.7841,
      "step": 604220
    },
    {
      "epoch": 2.1176755091526562,
      "grad_norm": 2.765625,
      "learning_rate": 1.633942641442817e-05,
      "loss": 0.8286,
      "step": 604230
    },
    {
      "epoch": 2.117710556659552,
      "grad_norm": 2.8125,
      "learning_rate": 1.6338777385764463e-05,
      "loss": 0.8936,
      "step": 604240
    },
    {
      "epoch": 2.1177456041664477,
      "grad_norm": 2.890625,
      "learning_rate": 1.633812835710076e-05,
      "loss": 0.798,
      "step": 604250
    },
    {
      "epoch": 2.117780651673343,
      "grad_norm": 2.875,
      "learning_rate": 1.6337479328437062e-05,
      "loss": 0.797,
      "step": 604260
    },
    {
      "epoch": 2.117815699180239,
      "grad_norm": 2.796875,
      "learning_rate": 1.633683029977336e-05,
      "loss": 0.8136,
      "step": 604270
    },
    {
      "epoch": 2.1178507466871346,
      "grad_norm": 2.640625,
      "learning_rate": 1.633618127110966e-05,
      "loss": 0.765,
      "step": 604280
    },
    {
      "epoch": 2.11788579419403,
      "grad_norm": 3.140625,
      "learning_rate": 1.6335532242445956e-05,
      "loss": 0.8235,
      "step": 604290
    },
    {
      "epoch": 2.1179208417009256,
      "grad_norm": 2.96875,
      "learning_rate": 1.6334883213782254e-05,
      "loss": 0.8586,
      "step": 604300
    },
    {
      "epoch": 2.1179558892078214,
      "grad_norm": 2.828125,
      "learning_rate": 1.6334234185118552e-05,
      "loss": 0.8248,
      "step": 604310
    },
    {
      "epoch": 2.1179909367147167,
      "grad_norm": 3.140625,
      "learning_rate": 1.633358515645485e-05,
      "loss": 0.8322,
      "step": 604320
    },
    {
      "epoch": 2.1180259842216125,
      "grad_norm": 2.890625,
      "learning_rate": 1.633293612779115e-05,
      "loss": 0.7328,
      "step": 604330
    },
    {
      "epoch": 2.118061031728508,
      "grad_norm": 3.0,
      "learning_rate": 1.6332287099127446e-05,
      "loss": 0.7594,
      "step": 604340
    },
    {
      "epoch": 2.1180960792354036,
      "grad_norm": 2.65625,
      "learning_rate": 1.6331638070463744e-05,
      "loss": 0.8203,
      "step": 604350
    },
    {
      "epoch": 2.1181311267422993,
      "grad_norm": 2.703125,
      "learning_rate": 1.6330989041800042e-05,
      "loss": 0.8497,
      "step": 604360
    },
    {
      "epoch": 2.1181661742491946,
      "grad_norm": 3.015625,
      "learning_rate": 1.633034001313634e-05,
      "loss": 0.8355,
      "step": 604370
    },
    {
      "epoch": 2.1182012217560904,
      "grad_norm": 2.6875,
      "learning_rate": 1.632969098447264e-05,
      "loss": 0.8358,
      "step": 604380
    },
    {
      "epoch": 2.118236269262986,
      "grad_norm": 2.640625,
      "learning_rate": 1.6329041955808936e-05,
      "loss": 0.8237,
      "step": 604390
    },
    {
      "epoch": 2.1182713167698815,
      "grad_norm": 3.3125,
      "learning_rate": 1.6328392927145238e-05,
      "loss": 0.8242,
      "step": 604400
    },
    {
      "epoch": 2.118306364276777,
      "grad_norm": 2.765625,
      "learning_rate": 1.6327743898481536e-05,
      "loss": 0.8311,
      "step": 604410
    },
    {
      "epoch": 2.118341411783673,
      "grad_norm": 3.015625,
      "learning_rate": 1.6327094869817834e-05,
      "loss": 0.8329,
      "step": 604420
    },
    {
      "epoch": 2.1183764592905683,
      "grad_norm": 2.484375,
      "learning_rate": 1.632644584115413e-05,
      "loss": 0.8039,
      "step": 604430
    },
    {
      "epoch": 2.118411506797464,
      "grad_norm": 3.109375,
      "learning_rate": 1.6325796812490426e-05,
      "loss": 0.8721,
      "step": 604440
    },
    {
      "epoch": 2.1184465543043594,
      "grad_norm": 3.0625,
      "learning_rate": 1.6325147783826724e-05,
      "loss": 0.7546,
      "step": 604450
    },
    {
      "epoch": 2.118481601811255,
      "grad_norm": 3.03125,
      "learning_rate": 1.6324498755163022e-05,
      "loss": 0.7989,
      "step": 604460
    },
    {
      "epoch": 2.118516649318151,
      "grad_norm": 3.28125,
      "learning_rate": 1.632384972649932e-05,
      "loss": 0.7641,
      "step": 604470
    },
    {
      "epoch": 2.118551696825046,
      "grad_norm": 2.734375,
      "learning_rate": 1.632320069783562e-05,
      "loss": 0.8349,
      "step": 604480
    },
    {
      "epoch": 2.118586744331942,
      "grad_norm": 2.9375,
      "learning_rate": 1.6322551669171916e-05,
      "loss": 0.8186,
      "step": 604490
    },
    {
      "epoch": 2.1186217918388377,
      "grad_norm": 2.859375,
      "learning_rate": 1.6321902640508214e-05,
      "loss": 0.7878,
      "step": 604500
    },
    {
      "epoch": 2.118656839345733,
      "grad_norm": 3.546875,
      "learning_rate": 1.6321253611844516e-05,
      "loss": 0.8611,
      "step": 604510
    },
    {
      "epoch": 2.1186918868526288,
      "grad_norm": 2.953125,
      "learning_rate": 1.6320604583180814e-05,
      "loss": 0.8342,
      "step": 604520
    },
    {
      "epoch": 2.1187269343595245,
      "grad_norm": 3.25,
      "learning_rate": 1.6319955554517112e-05,
      "loss": 0.8284,
      "step": 604530
    },
    {
      "epoch": 2.11876198186642,
      "grad_norm": 2.625,
      "learning_rate": 1.631930652585341e-05,
      "loss": 0.846,
      "step": 604540
    },
    {
      "epoch": 2.1187970293733156,
      "grad_norm": 3.40625,
      "learning_rate": 1.6318657497189708e-05,
      "loss": 0.7896,
      "step": 604550
    },
    {
      "epoch": 2.118832076880211,
      "grad_norm": 2.59375,
      "learning_rate": 1.6318008468526006e-05,
      "loss": 0.82,
      "step": 604560
    },
    {
      "epoch": 2.1188671243871067,
      "grad_norm": 3.234375,
      "learning_rate": 1.6317359439862304e-05,
      "loss": 0.8027,
      "step": 604570
    },
    {
      "epoch": 2.1189021718940024,
      "grad_norm": 3.46875,
      "learning_rate": 1.6316710411198602e-05,
      "loss": 0.8297,
      "step": 604580
    },
    {
      "epoch": 2.1189372194008977,
      "grad_norm": 2.796875,
      "learning_rate": 1.63160613825349e-05,
      "loss": 0.773,
      "step": 604590
    },
    {
      "epoch": 2.1189722669077935,
      "grad_norm": 3.078125,
      "learning_rate": 1.6315412353871198e-05,
      "loss": 0.8585,
      "step": 604600
    },
    {
      "epoch": 2.1190073144146893,
      "grad_norm": 2.734375,
      "learning_rate": 1.6314763325207496e-05,
      "loss": 0.8088,
      "step": 604610
    },
    {
      "epoch": 2.1190423619215846,
      "grad_norm": 2.78125,
      "learning_rate": 1.631411429654379e-05,
      "loss": 0.7489,
      "step": 604620
    },
    {
      "epoch": 2.1190774094284803,
      "grad_norm": 2.53125,
      "learning_rate": 1.6313465267880092e-05,
      "loss": 0.7986,
      "step": 604630
    },
    {
      "epoch": 2.119112456935376,
      "grad_norm": 3.109375,
      "learning_rate": 1.631281623921639e-05,
      "loss": 0.7229,
      "step": 604640
    },
    {
      "epoch": 2.1191475044422714,
      "grad_norm": 2.5625,
      "learning_rate": 1.6312167210552688e-05,
      "loss": 0.794,
      "step": 604650
    },
    {
      "epoch": 2.119182551949167,
      "grad_norm": 2.71875,
      "learning_rate": 1.6311518181888986e-05,
      "loss": 0.7776,
      "step": 604660
    },
    {
      "epoch": 2.1192175994560625,
      "grad_norm": 2.609375,
      "learning_rate": 1.6310869153225284e-05,
      "loss": 0.7662,
      "step": 604670
    },
    {
      "epoch": 2.1192526469629582,
      "grad_norm": 3.671875,
      "learning_rate": 1.6310220124561582e-05,
      "loss": 0.7856,
      "step": 604680
    },
    {
      "epoch": 2.119287694469854,
      "grad_norm": 3.0625,
      "learning_rate": 1.630957109589788e-05,
      "loss": 0.8545,
      "step": 604690
    },
    {
      "epoch": 2.1193227419767493,
      "grad_norm": 2.75,
      "learning_rate": 1.6308922067234178e-05,
      "loss": 0.8128,
      "step": 604700
    },
    {
      "epoch": 2.119357789483645,
      "grad_norm": 2.71875,
      "learning_rate": 1.6308273038570476e-05,
      "loss": 0.8008,
      "step": 604710
    },
    {
      "epoch": 2.119392836990541,
      "grad_norm": 3.28125,
      "learning_rate": 1.6307624009906774e-05,
      "loss": 0.8419,
      "step": 604720
    },
    {
      "epoch": 2.119427884497436,
      "grad_norm": 2.90625,
      "learning_rate": 1.6306974981243072e-05,
      "loss": 0.7771,
      "step": 604730
    },
    {
      "epoch": 2.119462932004332,
      "grad_norm": 3.28125,
      "learning_rate": 1.630632595257937e-05,
      "loss": 0.8486,
      "step": 604740
    },
    {
      "epoch": 2.1194979795112276,
      "grad_norm": 3.0625,
      "learning_rate": 1.6305676923915668e-05,
      "loss": 0.8301,
      "step": 604750
    },
    {
      "epoch": 2.119533027018123,
      "grad_norm": 2.859375,
      "learning_rate": 1.6305027895251966e-05,
      "loss": 0.7151,
      "step": 604760
    },
    {
      "epoch": 2.1195680745250187,
      "grad_norm": 3.09375,
      "learning_rate": 1.6304378866588267e-05,
      "loss": 0.8296,
      "step": 604770
    },
    {
      "epoch": 2.119603122031914,
      "grad_norm": 2.765625,
      "learning_rate": 1.6303729837924565e-05,
      "loss": 0.8537,
      "step": 604780
    },
    {
      "epoch": 2.11963816953881,
      "grad_norm": 2.796875,
      "learning_rate": 1.6303080809260863e-05,
      "loss": 0.8069,
      "step": 604790
    },
    {
      "epoch": 2.1196732170457055,
      "grad_norm": 2.703125,
      "learning_rate": 1.6302431780597158e-05,
      "loss": 0.7834,
      "step": 604800
    },
    {
      "epoch": 2.119708264552601,
      "grad_norm": 2.796875,
      "learning_rate": 1.6301782751933456e-05,
      "loss": 0.678,
      "step": 604810
    },
    {
      "epoch": 2.1197433120594966,
      "grad_norm": 2.84375,
      "learning_rate": 1.6301133723269754e-05,
      "loss": 0.8892,
      "step": 604820
    },
    {
      "epoch": 2.1197783595663924,
      "grad_norm": 3.09375,
      "learning_rate": 1.6300484694606052e-05,
      "loss": 0.8586,
      "step": 604830
    },
    {
      "epoch": 2.1198134070732877,
      "grad_norm": 3.171875,
      "learning_rate": 1.629983566594235e-05,
      "loss": 0.8469,
      "step": 604840
    },
    {
      "epoch": 2.1198484545801834,
      "grad_norm": 2.546875,
      "learning_rate": 1.6299186637278648e-05,
      "loss": 0.8363,
      "step": 604850
    },
    {
      "epoch": 2.119883502087079,
      "grad_norm": 2.84375,
      "learning_rate": 1.6298537608614946e-05,
      "loss": 0.8006,
      "step": 604860
    },
    {
      "epoch": 2.1199185495939745,
      "grad_norm": 2.9375,
      "learning_rate": 1.6297888579951244e-05,
      "loss": 0.8476,
      "step": 604870
    },
    {
      "epoch": 2.1199535971008703,
      "grad_norm": 2.515625,
      "learning_rate": 1.6297239551287545e-05,
      "loss": 0.7807,
      "step": 604880
    },
    {
      "epoch": 2.119988644607766,
      "grad_norm": 3.125,
      "learning_rate": 1.6296590522623843e-05,
      "loss": 0.8317,
      "step": 604890
    },
    {
      "epoch": 2.1200236921146614,
      "grad_norm": 3.109375,
      "learning_rate": 1.629594149396014e-05,
      "loss": 0.7975,
      "step": 604900
    },
    {
      "epoch": 2.120058739621557,
      "grad_norm": 3.0625,
      "learning_rate": 1.629529246529644e-05,
      "loss": 0.862,
      "step": 604910
    },
    {
      "epoch": 2.1200937871284524,
      "grad_norm": 3.09375,
      "learning_rate": 1.6294643436632737e-05,
      "loss": 0.7991,
      "step": 604920
    },
    {
      "epoch": 2.120128834635348,
      "grad_norm": 3.34375,
      "learning_rate": 1.6293994407969035e-05,
      "loss": 0.9049,
      "step": 604930
    },
    {
      "epoch": 2.120163882142244,
      "grad_norm": 3.0625,
      "learning_rate": 1.6293345379305333e-05,
      "loss": 0.7928,
      "step": 604940
    },
    {
      "epoch": 2.1201989296491393,
      "grad_norm": 2.734375,
      "learning_rate": 1.629269635064163e-05,
      "loss": 0.8132,
      "step": 604950
    },
    {
      "epoch": 2.120233977156035,
      "grad_norm": 3.03125,
      "learning_rate": 1.629204732197793e-05,
      "loss": 0.8206,
      "step": 604960
    },
    {
      "epoch": 2.1202690246629308,
      "grad_norm": 2.875,
      "learning_rate": 1.6291398293314227e-05,
      "loss": 0.7767,
      "step": 604970
    },
    {
      "epoch": 2.120304072169826,
      "grad_norm": 3.0,
      "learning_rate": 1.6290749264650525e-05,
      "loss": 0.7791,
      "step": 604980
    },
    {
      "epoch": 2.120339119676722,
      "grad_norm": 2.5625,
      "learning_rate": 1.6290100235986823e-05,
      "loss": 0.7131,
      "step": 604990
    },
    {
      "epoch": 2.1203741671836176,
      "grad_norm": 2.609375,
      "learning_rate": 1.628945120732312e-05,
      "loss": 0.8138,
      "step": 605000
    },
    {
      "epoch": 2.1203741671836176,
      "eval_loss": 0.7674710750579834,
      "eval_runtime": 562.4287,
      "eval_samples_per_second": 676.416,
      "eval_steps_per_second": 56.368,
      "step": 605000
    },
    {
      "epoch": 2.120409214690513,
      "grad_norm": 3.0625,
      "learning_rate": 1.628880217865942e-05,
      "loss": 0.8357,
      "step": 605010
    },
    {
      "epoch": 2.1204442621974087,
      "grad_norm": 2.90625,
      "learning_rate": 1.6288153149995717e-05,
      "loss": 0.769,
      "step": 605020
    },
    {
      "epoch": 2.120479309704304,
      "grad_norm": 2.796875,
      "learning_rate": 1.6287504121332015e-05,
      "loss": 0.8057,
      "step": 605030
    },
    {
      "epoch": 2.1205143572111997,
      "grad_norm": 2.46875,
      "learning_rate": 1.6286855092668313e-05,
      "loss": 0.8467,
      "step": 605040
    },
    {
      "epoch": 2.1205494047180955,
      "grad_norm": 2.421875,
      "learning_rate": 1.628620606400461e-05,
      "loss": 0.7398,
      "step": 605050
    },
    {
      "epoch": 2.120584452224991,
      "grad_norm": 2.8125,
      "learning_rate": 1.628555703534091e-05,
      "loss": 0.7991,
      "step": 605060
    },
    {
      "epoch": 2.1206194997318866,
      "grad_norm": 2.4375,
      "learning_rate": 1.6284908006677207e-05,
      "loss": 0.843,
      "step": 605070
    },
    {
      "epoch": 2.1206545472387823,
      "grad_norm": 3.09375,
      "learning_rate": 1.6284258978013505e-05,
      "loss": 0.7535,
      "step": 605080
    },
    {
      "epoch": 2.1206895947456776,
      "grad_norm": 2.765625,
      "learning_rate": 1.6283609949349803e-05,
      "loss": 0.8852,
      "step": 605090
    },
    {
      "epoch": 2.1207246422525734,
      "grad_norm": 2.953125,
      "learning_rate": 1.62829609206861e-05,
      "loss": 0.8078,
      "step": 605100
    },
    {
      "epoch": 2.120759689759469,
      "grad_norm": 2.875,
      "learning_rate": 1.62823118920224e-05,
      "loss": 0.8245,
      "step": 605110
    },
    {
      "epoch": 2.1207947372663645,
      "grad_norm": 2.734375,
      "learning_rate": 1.6281662863358697e-05,
      "loss": 0.8618,
      "step": 605120
    },
    {
      "epoch": 2.1208297847732602,
      "grad_norm": 2.421875,
      "learning_rate": 1.6281013834695e-05,
      "loss": 0.8258,
      "step": 605130
    },
    {
      "epoch": 2.1208648322801555,
      "grad_norm": 2.828125,
      "learning_rate": 1.6280364806031297e-05,
      "loss": 0.7878,
      "step": 605140
    },
    {
      "epoch": 2.1208998797870513,
      "grad_norm": 3.078125,
      "learning_rate": 1.6279715777367595e-05,
      "loss": 0.8606,
      "step": 605150
    },
    {
      "epoch": 2.120934927293947,
      "grad_norm": 2.765625,
      "learning_rate": 1.6279066748703893e-05,
      "loss": 0.8317,
      "step": 605160
    },
    {
      "epoch": 2.1209699748008424,
      "grad_norm": 3.21875,
      "learning_rate": 1.627841772004019e-05,
      "loss": 0.8461,
      "step": 605170
    },
    {
      "epoch": 2.121005022307738,
      "grad_norm": 3.140625,
      "learning_rate": 1.6277768691376485e-05,
      "loss": 0.894,
      "step": 605180
    },
    {
      "epoch": 2.121040069814634,
      "grad_norm": 2.578125,
      "learning_rate": 1.6277119662712783e-05,
      "loss": 0.77,
      "step": 605190
    },
    {
      "epoch": 2.121075117321529,
      "grad_norm": 3.21875,
      "learning_rate": 1.627647063404908e-05,
      "loss": 0.8214,
      "step": 605200
    },
    {
      "epoch": 2.121110164828425,
      "grad_norm": 2.96875,
      "learning_rate": 1.627582160538538e-05,
      "loss": 0.8185,
      "step": 605210
    },
    {
      "epoch": 2.1211452123353207,
      "grad_norm": 2.96875,
      "learning_rate": 1.6275172576721677e-05,
      "loss": 0.7854,
      "step": 605220
    },
    {
      "epoch": 2.121180259842216,
      "grad_norm": 2.796875,
      "learning_rate": 1.6274523548057975e-05,
      "loss": 0.8258,
      "step": 605230
    },
    {
      "epoch": 2.121215307349112,
      "grad_norm": 2.953125,
      "learning_rate": 1.6273874519394273e-05,
      "loss": 0.8181,
      "step": 605240
    },
    {
      "epoch": 2.121250354856007,
      "grad_norm": 3.0,
      "learning_rate": 1.6273225490730575e-05,
      "loss": 0.8385,
      "step": 605250
    },
    {
      "epoch": 2.121285402362903,
      "grad_norm": 3.171875,
      "learning_rate": 1.6272576462066873e-05,
      "loss": 0.7787,
      "step": 605260
    },
    {
      "epoch": 2.1213204498697986,
      "grad_norm": 3.0625,
      "learning_rate": 1.627192743340317e-05,
      "loss": 0.8262,
      "step": 605270
    },
    {
      "epoch": 2.121355497376694,
      "grad_norm": 3.34375,
      "learning_rate": 1.627127840473947e-05,
      "loss": 0.7854,
      "step": 605280
    },
    {
      "epoch": 2.1213905448835897,
      "grad_norm": 2.6875,
      "learning_rate": 1.6270629376075767e-05,
      "loss": 0.8147,
      "step": 605290
    },
    {
      "epoch": 2.1214255923904854,
      "grad_norm": 2.625,
      "learning_rate": 1.6269980347412065e-05,
      "loss": 0.7979,
      "step": 605300
    },
    {
      "epoch": 2.1214606398973808,
      "grad_norm": 2.96875,
      "learning_rate": 1.6269331318748363e-05,
      "loss": 0.8404,
      "step": 605310
    },
    {
      "epoch": 2.1214956874042765,
      "grad_norm": 3.234375,
      "learning_rate": 1.626868229008466e-05,
      "loss": 0.8264,
      "step": 605320
    },
    {
      "epoch": 2.1215307349111723,
      "grad_norm": 3.21875,
      "learning_rate": 1.626803326142096e-05,
      "loss": 0.7918,
      "step": 605330
    },
    {
      "epoch": 2.1215657824180676,
      "grad_norm": 2.484375,
      "learning_rate": 1.6267384232757257e-05,
      "loss": 0.7824,
      "step": 605340
    },
    {
      "epoch": 2.1216008299249633,
      "grad_norm": 2.921875,
      "learning_rate": 1.6266735204093555e-05,
      "loss": 0.7954,
      "step": 605350
    },
    {
      "epoch": 2.121635877431859,
      "grad_norm": 2.8125,
      "learning_rate": 1.6266086175429853e-05,
      "loss": 0.842,
      "step": 605360
    },
    {
      "epoch": 2.1216709249387544,
      "grad_norm": 2.75,
      "learning_rate": 1.626543714676615e-05,
      "loss": 0.7549,
      "step": 605370
    },
    {
      "epoch": 2.12170597244565,
      "grad_norm": 3.0,
      "learning_rate": 1.626478811810245e-05,
      "loss": 0.8605,
      "step": 605380
    },
    {
      "epoch": 2.1217410199525455,
      "grad_norm": 3.015625,
      "learning_rate": 1.6264139089438747e-05,
      "loss": 0.8639,
      "step": 605390
    },
    {
      "epoch": 2.1217760674594413,
      "grad_norm": 3.09375,
      "learning_rate": 1.6263490060775045e-05,
      "loss": 0.8301,
      "step": 605400
    },
    {
      "epoch": 2.121811114966337,
      "grad_norm": 2.75,
      "learning_rate": 1.6262841032111343e-05,
      "loss": 0.8391,
      "step": 605410
    },
    {
      "epoch": 2.1218461624732323,
      "grad_norm": 2.5,
      "learning_rate": 1.626219200344764e-05,
      "loss": 0.7821,
      "step": 605420
    },
    {
      "epoch": 2.121881209980128,
      "grad_norm": 2.703125,
      "learning_rate": 1.626154297478394e-05,
      "loss": 0.8244,
      "step": 605430
    },
    {
      "epoch": 2.121916257487024,
      "grad_norm": 2.84375,
      "learning_rate": 1.6260893946120237e-05,
      "loss": 0.8416,
      "step": 605440
    },
    {
      "epoch": 2.121951304993919,
      "grad_norm": 2.53125,
      "learning_rate": 1.6260244917456535e-05,
      "loss": 0.8491,
      "step": 605450
    },
    {
      "epoch": 2.121986352500815,
      "grad_norm": 3.109375,
      "learning_rate": 1.6259595888792833e-05,
      "loss": 0.8536,
      "step": 605460
    },
    {
      "epoch": 2.1220214000077107,
      "grad_norm": 3.171875,
      "learning_rate": 1.625894686012913e-05,
      "loss": 0.8933,
      "step": 605470
    },
    {
      "epoch": 2.122056447514606,
      "grad_norm": 2.8125,
      "learning_rate": 1.625829783146543e-05,
      "loss": 0.7656,
      "step": 605480
    },
    {
      "epoch": 2.1220914950215017,
      "grad_norm": 3.375,
      "learning_rate": 1.6257648802801727e-05,
      "loss": 0.8189,
      "step": 605490
    },
    {
      "epoch": 2.122126542528397,
      "grad_norm": 2.75,
      "learning_rate": 1.6256999774138028e-05,
      "loss": 0.8742,
      "step": 605500
    },
    {
      "epoch": 2.122161590035293,
      "grad_norm": 2.765625,
      "learning_rate": 1.6256350745474326e-05,
      "loss": 0.7647,
      "step": 605510
    },
    {
      "epoch": 2.1221966375421886,
      "grad_norm": 2.40625,
      "learning_rate": 1.6255701716810624e-05,
      "loss": 0.7959,
      "step": 605520
    },
    {
      "epoch": 2.122231685049084,
      "grad_norm": 3.296875,
      "learning_rate": 1.6255052688146922e-05,
      "loss": 0.8317,
      "step": 605530
    },
    {
      "epoch": 2.1222667325559796,
      "grad_norm": 3.09375,
      "learning_rate": 1.625440365948322e-05,
      "loss": 0.7823,
      "step": 605540
    },
    {
      "epoch": 2.1223017800628754,
      "grad_norm": 2.6875,
      "learning_rate": 1.6253754630819518e-05,
      "loss": 0.8526,
      "step": 605550
    },
    {
      "epoch": 2.1223368275697707,
      "grad_norm": 2.5,
      "learning_rate": 1.6253105602155813e-05,
      "loss": 0.7918,
      "step": 605560
    },
    {
      "epoch": 2.1223718750766665,
      "grad_norm": 2.875,
      "learning_rate": 1.625245657349211e-05,
      "loss": 0.7907,
      "step": 605570
    },
    {
      "epoch": 2.1224069225835622,
      "grad_norm": 3.0,
      "learning_rate": 1.625180754482841e-05,
      "loss": 0.8257,
      "step": 605580
    },
    {
      "epoch": 2.1224419700904575,
      "grad_norm": 3.203125,
      "learning_rate": 1.6251158516164707e-05,
      "loss": 0.8765,
      "step": 605590
    },
    {
      "epoch": 2.1224770175973533,
      "grad_norm": 2.71875,
      "learning_rate": 1.6250509487501005e-05,
      "loss": 0.8366,
      "step": 605600
    },
    {
      "epoch": 2.1225120651042486,
      "grad_norm": 2.9375,
      "learning_rate": 1.6249860458837306e-05,
      "loss": 0.8815,
      "step": 605610
    },
    {
      "epoch": 2.1225471126111444,
      "grad_norm": 2.609375,
      "learning_rate": 1.6249211430173604e-05,
      "loss": 0.8166,
      "step": 605620
    },
    {
      "epoch": 2.12258216011804,
      "grad_norm": 3.140625,
      "learning_rate": 1.6248562401509902e-05,
      "loss": 0.8105,
      "step": 605630
    },
    {
      "epoch": 2.1226172076249354,
      "grad_norm": 3.203125,
      "learning_rate": 1.62479133728462e-05,
      "loss": 0.8859,
      "step": 605640
    },
    {
      "epoch": 2.122652255131831,
      "grad_norm": 3.1875,
      "learning_rate": 1.6247264344182498e-05,
      "loss": 0.8267,
      "step": 605650
    },
    {
      "epoch": 2.122687302638727,
      "grad_norm": 2.984375,
      "learning_rate": 1.6246615315518796e-05,
      "loss": 0.8311,
      "step": 605660
    },
    {
      "epoch": 2.1227223501456223,
      "grad_norm": 2.9375,
      "learning_rate": 1.6245966286855094e-05,
      "loss": 0.7205,
      "step": 605670
    },
    {
      "epoch": 2.122757397652518,
      "grad_norm": 2.96875,
      "learning_rate": 1.6245317258191392e-05,
      "loss": 0.8049,
      "step": 605680
    },
    {
      "epoch": 2.122792445159414,
      "grad_norm": 3.140625,
      "learning_rate": 1.624466822952769e-05,
      "loss": 0.8633,
      "step": 605690
    },
    {
      "epoch": 2.122827492666309,
      "grad_norm": 3.0,
      "learning_rate": 1.6244019200863988e-05,
      "loss": 0.7903,
      "step": 605700
    },
    {
      "epoch": 2.122862540173205,
      "grad_norm": 2.671875,
      "learning_rate": 1.6243370172200286e-05,
      "loss": 0.7416,
      "step": 605710
    },
    {
      "epoch": 2.1228975876801,
      "grad_norm": 3.0,
      "learning_rate": 1.6242721143536584e-05,
      "loss": 0.763,
      "step": 605720
    },
    {
      "epoch": 2.122932635186996,
      "grad_norm": 2.890625,
      "learning_rate": 1.6242072114872882e-05,
      "loss": 0.8014,
      "step": 605730
    },
    {
      "epoch": 2.1229676826938917,
      "grad_norm": 3.09375,
      "learning_rate": 1.624142308620918e-05,
      "loss": 0.8306,
      "step": 605740
    },
    {
      "epoch": 2.123002730200787,
      "grad_norm": 2.625,
      "learning_rate": 1.6240774057545478e-05,
      "loss": 0.753,
      "step": 605750
    },
    {
      "epoch": 2.1230377777076828,
      "grad_norm": 2.875,
      "learning_rate": 1.6240125028881776e-05,
      "loss": 0.8171,
      "step": 605760
    },
    {
      "epoch": 2.1230728252145785,
      "grad_norm": 3.34375,
      "learning_rate": 1.6239476000218074e-05,
      "loss": 0.8827,
      "step": 605770
    },
    {
      "epoch": 2.123107872721474,
      "grad_norm": 3.09375,
      "learning_rate": 1.6238826971554372e-05,
      "loss": 0.7184,
      "step": 605780
    },
    {
      "epoch": 2.1231429202283696,
      "grad_norm": 2.671875,
      "learning_rate": 1.623817794289067e-05,
      "loss": 0.82,
      "step": 605790
    },
    {
      "epoch": 2.1231779677352653,
      "grad_norm": 3.03125,
      "learning_rate": 1.6237528914226968e-05,
      "loss": 0.8261,
      "step": 605800
    },
    {
      "epoch": 2.1232130152421607,
      "grad_norm": 2.9375,
      "learning_rate": 1.6236879885563266e-05,
      "loss": 0.7731,
      "step": 605810
    },
    {
      "epoch": 2.1232480627490564,
      "grad_norm": 2.984375,
      "learning_rate": 1.6236230856899564e-05,
      "loss": 0.8477,
      "step": 605820
    },
    {
      "epoch": 2.1232831102559517,
      "grad_norm": 2.734375,
      "learning_rate": 1.6235581828235862e-05,
      "loss": 0.8018,
      "step": 605830
    },
    {
      "epoch": 2.1233181577628475,
      "grad_norm": 2.6875,
      "learning_rate": 1.623493279957216e-05,
      "loss": 0.8331,
      "step": 605840
    },
    {
      "epoch": 2.1233532052697432,
      "grad_norm": 3.109375,
      "learning_rate": 1.6234283770908458e-05,
      "loss": 0.8766,
      "step": 605850
    },
    {
      "epoch": 2.1233882527766386,
      "grad_norm": 3.640625,
      "learning_rate": 1.6233634742244756e-05,
      "loss": 0.8403,
      "step": 605860
    },
    {
      "epoch": 2.1234233002835343,
      "grad_norm": 3.21875,
      "learning_rate": 1.6232985713581057e-05,
      "loss": 0.7385,
      "step": 605870
    },
    {
      "epoch": 2.12345834779043,
      "grad_norm": 2.96875,
      "learning_rate": 1.6232336684917355e-05,
      "loss": 0.7957,
      "step": 605880
    },
    {
      "epoch": 2.1234933952973254,
      "grad_norm": 2.546875,
      "learning_rate": 1.6231687656253653e-05,
      "loss": 0.7203,
      "step": 605890
    },
    {
      "epoch": 2.123528442804221,
      "grad_norm": 2.90625,
      "learning_rate": 1.623103862758995e-05,
      "loss": 0.7711,
      "step": 605900
    },
    {
      "epoch": 2.123563490311117,
      "grad_norm": 2.78125,
      "learning_rate": 1.623038959892625e-05,
      "loss": 0.8352,
      "step": 605910
    },
    {
      "epoch": 2.123598537818012,
      "grad_norm": 3.03125,
      "learning_rate": 1.6229740570262547e-05,
      "loss": 0.733,
      "step": 605920
    },
    {
      "epoch": 2.123633585324908,
      "grad_norm": 2.4375,
      "learning_rate": 1.6229091541598842e-05,
      "loss": 0.7392,
      "step": 605930
    },
    {
      "epoch": 2.1236686328318033,
      "grad_norm": 2.703125,
      "learning_rate": 1.622844251293514e-05,
      "loss": 0.8163,
      "step": 605940
    },
    {
      "epoch": 2.123703680338699,
      "grad_norm": 2.6875,
      "learning_rate": 1.6227793484271438e-05,
      "loss": 0.8406,
      "step": 605950
    },
    {
      "epoch": 2.123738727845595,
      "grad_norm": 2.671875,
      "learning_rate": 1.6227144455607736e-05,
      "loss": 0.798,
      "step": 605960
    },
    {
      "epoch": 2.12377377535249,
      "grad_norm": 2.703125,
      "learning_rate": 1.6226495426944034e-05,
      "loss": 0.8032,
      "step": 605970
    },
    {
      "epoch": 2.123808822859386,
      "grad_norm": 3.0625,
      "learning_rate": 1.6225846398280335e-05,
      "loss": 0.8934,
      "step": 605980
    },
    {
      "epoch": 2.1238438703662816,
      "grad_norm": 2.9375,
      "learning_rate": 1.6225197369616633e-05,
      "loss": 0.8269,
      "step": 605990
    },
    {
      "epoch": 2.123878917873177,
      "grad_norm": 3.046875,
      "learning_rate": 1.622454834095293e-05,
      "loss": 0.7965,
      "step": 606000
    },
    {
      "epoch": 2.1239139653800727,
      "grad_norm": 2.984375,
      "learning_rate": 1.622389931228923e-05,
      "loss": 0.7661,
      "step": 606010
    },
    {
      "epoch": 2.1239490128869685,
      "grad_norm": 2.96875,
      "learning_rate": 1.6223250283625527e-05,
      "loss": 0.7862,
      "step": 606020
    },
    {
      "epoch": 2.123984060393864,
      "grad_norm": 2.8125,
      "learning_rate": 1.6222601254961825e-05,
      "loss": 0.8748,
      "step": 606030
    },
    {
      "epoch": 2.1240191079007595,
      "grad_norm": 2.359375,
      "learning_rate": 1.6221952226298123e-05,
      "loss": 0.7961,
      "step": 606040
    },
    {
      "epoch": 2.124054155407655,
      "grad_norm": 2.640625,
      "learning_rate": 1.622130319763442e-05,
      "loss": 0.7834,
      "step": 606050
    },
    {
      "epoch": 2.1240892029145506,
      "grad_norm": 3.203125,
      "learning_rate": 1.622065416897072e-05,
      "loss": 0.8636,
      "step": 606060
    },
    {
      "epoch": 2.1241242504214464,
      "grad_norm": 3.15625,
      "learning_rate": 1.6220005140307017e-05,
      "loss": 0.814,
      "step": 606070
    },
    {
      "epoch": 2.1241592979283417,
      "grad_norm": 3.046875,
      "learning_rate": 1.6219356111643315e-05,
      "loss": 0.7978,
      "step": 606080
    },
    {
      "epoch": 2.1241943454352374,
      "grad_norm": 3.125,
      "learning_rate": 1.6218707082979613e-05,
      "loss": 0.8285,
      "step": 606090
    },
    {
      "epoch": 2.124229392942133,
      "grad_norm": 2.8125,
      "learning_rate": 1.621805805431591e-05,
      "loss": 0.852,
      "step": 606100
    },
    {
      "epoch": 2.1242644404490285,
      "grad_norm": 3.5,
      "learning_rate": 1.621740902565221e-05,
      "loss": 0.8691,
      "step": 606110
    },
    {
      "epoch": 2.1242994879559243,
      "grad_norm": 3.421875,
      "learning_rate": 1.6216759996988507e-05,
      "loss": 0.824,
      "step": 606120
    },
    {
      "epoch": 2.12433453546282,
      "grad_norm": 2.890625,
      "learning_rate": 1.6216110968324805e-05,
      "loss": 0.7522,
      "step": 606130
    },
    {
      "epoch": 2.1243695829697153,
      "grad_norm": 3.375,
      "learning_rate": 1.6215461939661103e-05,
      "loss": 0.8616,
      "step": 606140
    },
    {
      "epoch": 2.124404630476611,
      "grad_norm": 2.75,
      "learning_rate": 1.62148129109974e-05,
      "loss": 0.8258,
      "step": 606150
    },
    {
      "epoch": 2.124439677983507,
      "grad_norm": 2.421875,
      "learning_rate": 1.62141638823337e-05,
      "loss": 0.7414,
      "step": 606160
    },
    {
      "epoch": 2.124474725490402,
      "grad_norm": 2.953125,
      "learning_rate": 1.6213514853669997e-05,
      "loss": 0.7708,
      "step": 606170
    },
    {
      "epoch": 2.124509772997298,
      "grad_norm": 3.125,
      "learning_rate": 1.6212865825006295e-05,
      "loss": 0.8073,
      "step": 606180
    },
    {
      "epoch": 2.1245448205041932,
      "grad_norm": 2.75,
      "learning_rate": 1.6212216796342593e-05,
      "loss": 0.8834,
      "step": 606190
    },
    {
      "epoch": 2.124579868011089,
      "grad_norm": 3.15625,
      "learning_rate": 1.621156776767889e-05,
      "loss": 0.7922,
      "step": 606200
    },
    {
      "epoch": 2.1246149155179848,
      "grad_norm": 3.40625,
      "learning_rate": 1.621091873901519e-05,
      "loss": 0.7951,
      "step": 606210
    },
    {
      "epoch": 2.12464996302488,
      "grad_norm": 3.21875,
      "learning_rate": 1.6210269710351487e-05,
      "loss": 0.7857,
      "step": 606220
    },
    {
      "epoch": 2.124685010531776,
      "grad_norm": 2.765625,
      "learning_rate": 1.620962068168779e-05,
      "loss": 0.7784,
      "step": 606230
    },
    {
      "epoch": 2.1247200580386716,
      "grad_norm": 2.75,
      "learning_rate": 1.6208971653024087e-05,
      "loss": 0.8385,
      "step": 606240
    },
    {
      "epoch": 2.124755105545567,
      "grad_norm": 3.09375,
      "learning_rate": 1.6208322624360385e-05,
      "loss": 0.7608,
      "step": 606250
    },
    {
      "epoch": 2.1247901530524627,
      "grad_norm": 3.390625,
      "learning_rate": 1.6207673595696683e-05,
      "loss": 0.8389,
      "step": 606260
    },
    {
      "epoch": 2.1248252005593584,
      "grad_norm": 3.25,
      "learning_rate": 1.620702456703298e-05,
      "loss": 0.8106,
      "step": 606270
    },
    {
      "epoch": 2.1248602480662537,
      "grad_norm": 2.90625,
      "learning_rate": 1.620637553836928e-05,
      "loss": 0.9375,
      "step": 606280
    },
    {
      "epoch": 2.1248952955731495,
      "grad_norm": 2.90625,
      "learning_rate": 1.6205726509705577e-05,
      "loss": 0.8386,
      "step": 606290
    },
    {
      "epoch": 2.124930343080045,
      "grad_norm": 2.796875,
      "learning_rate": 1.6205077481041875e-05,
      "loss": 0.7079,
      "step": 606300
    },
    {
      "epoch": 2.1249653905869406,
      "grad_norm": 2.84375,
      "learning_rate": 1.620442845237817e-05,
      "loss": 0.8426,
      "step": 606310
    },
    {
      "epoch": 2.1250004380938363,
      "grad_norm": 2.90625,
      "learning_rate": 1.6203779423714467e-05,
      "loss": 0.7415,
      "step": 606320
    },
    {
      "epoch": 2.1250354856007316,
      "grad_norm": 2.8125,
      "learning_rate": 1.6203130395050765e-05,
      "loss": 0.7652,
      "step": 606330
    },
    {
      "epoch": 2.1250705331076274,
      "grad_norm": 3.0,
      "learning_rate": 1.6202481366387063e-05,
      "loss": 0.7851,
      "step": 606340
    },
    {
      "epoch": 2.125105580614523,
      "grad_norm": 2.796875,
      "learning_rate": 1.6201832337723365e-05,
      "loss": 0.8249,
      "step": 606350
    },
    {
      "epoch": 2.1251406281214185,
      "grad_norm": 3.09375,
      "learning_rate": 1.6201183309059663e-05,
      "loss": 0.8492,
      "step": 606360
    },
    {
      "epoch": 2.125175675628314,
      "grad_norm": 3.0625,
      "learning_rate": 1.620053428039596e-05,
      "loss": 0.8455,
      "step": 606370
    },
    {
      "epoch": 2.12521072313521,
      "grad_norm": 2.75,
      "learning_rate": 1.619988525173226e-05,
      "loss": 0.8178,
      "step": 606380
    },
    {
      "epoch": 2.1252457706421053,
      "grad_norm": 2.890625,
      "learning_rate": 1.6199236223068557e-05,
      "loss": 0.8753,
      "step": 606390
    },
    {
      "epoch": 2.125280818149001,
      "grad_norm": 2.71875,
      "learning_rate": 1.6198587194404855e-05,
      "loss": 0.785,
      "step": 606400
    },
    {
      "epoch": 2.1253158656558964,
      "grad_norm": 3.171875,
      "learning_rate": 1.6197938165741153e-05,
      "loss": 0.8235,
      "step": 606410
    },
    {
      "epoch": 2.125350913162792,
      "grad_norm": 3.0625,
      "learning_rate": 1.619728913707745e-05,
      "loss": 0.9043,
      "step": 606420
    },
    {
      "epoch": 2.125385960669688,
      "grad_norm": 2.921875,
      "learning_rate": 1.619664010841375e-05,
      "loss": 0.7689,
      "step": 606430
    },
    {
      "epoch": 2.125421008176583,
      "grad_norm": 2.984375,
      "learning_rate": 1.6195991079750047e-05,
      "loss": 0.8242,
      "step": 606440
    },
    {
      "epoch": 2.125456055683479,
      "grad_norm": 3.0625,
      "learning_rate": 1.6195342051086345e-05,
      "loss": 0.7968,
      "step": 606450
    },
    {
      "epoch": 2.1254911031903747,
      "grad_norm": 2.59375,
      "learning_rate": 1.6194693022422643e-05,
      "loss": 0.8197,
      "step": 606460
    },
    {
      "epoch": 2.12552615069727,
      "grad_norm": 2.84375,
      "learning_rate": 1.619404399375894e-05,
      "loss": 0.772,
      "step": 606470
    },
    {
      "epoch": 2.125561198204166,
      "grad_norm": 3.1875,
      "learning_rate": 1.619339496509524e-05,
      "loss": 0.8392,
      "step": 606480
    },
    {
      "epoch": 2.1255962457110615,
      "grad_norm": 2.828125,
      "learning_rate": 1.619274593643154e-05,
      "loss": 0.806,
      "step": 606490
    },
    {
      "epoch": 2.125631293217957,
      "grad_norm": 2.28125,
      "learning_rate": 1.6192096907767835e-05,
      "loss": 0.8352,
      "step": 606500
    },
    {
      "epoch": 2.1256663407248526,
      "grad_norm": 2.65625,
      "learning_rate": 1.6191447879104133e-05,
      "loss": 0.7661,
      "step": 606510
    },
    {
      "epoch": 2.1257013882317484,
      "grad_norm": 2.5,
      "learning_rate": 1.619079885044043e-05,
      "loss": 0.7559,
      "step": 606520
    },
    {
      "epoch": 2.1257364357386437,
      "grad_norm": 3.0625,
      "learning_rate": 1.619014982177673e-05,
      "loss": 0.8352,
      "step": 606530
    },
    {
      "epoch": 2.1257714832455394,
      "grad_norm": 3.015625,
      "learning_rate": 1.6189500793113027e-05,
      "loss": 0.7913,
      "step": 606540
    },
    {
      "epoch": 2.1258065307524348,
      "grad_norm": 2.640625,
      "learning_rate": 1.6188851764449325e-05,
      "loss": 0.767,
      "step": 606550
    },
    {
      "epoch": 2.1258415782593305,
      "grad_norm": 3.28125,
      "learning_rate": 1.6188202735785623e-05,
      "loss": 0.7606,
      "step": 606560
    },
    {
      "epoch": 2.1258766257662263,
      "grad_norm": 2.890625,
      "learning_rate": 1.618755370712192e-05,
      "loss": 0.8233,
      "step": 606570
    },
    {
      "epoch": 2.1259116732731216,
      "grad_norm": 3.0625,
      "learning_rate": 1.618690467845822e-05,
      "loss": 0.8187,
      "step": 606580
    },
    {
      "epoch": 2.1259467207800173,
      "grad_norm": 2.8125,
      "learning_rate": 1.6186255649794517e-05,
      "loss": 0.8725,
      "step": 606590
    },
    {
      "epoch": 2.125981768286913,
      "grad_norm": 3.21875,
      "learning_rate": 1.6185606621130818e-05,
      "loss": 0.9123,
      "step": 606600
    },
    {
      "epoch": 2.1260168157938084,
      "grad_norm": 3.0625,
      "learning_rate": 1.6184957592467116e-05,
      "loss": 0.8414,
      "step": 606610
    },
    {
      "epoch": 2.126051863300704,
      "grad_norm": 2.609375,
      "learning_rate": 1.6184308563803414e-05,
      "loss": 0.7646,
      "step": 606620
    },
    {
      "epoch": 2.1260869108076,
      "grad_norm": 2.5625,
      "learning_rate": 1.6183659535139712e-05,
      "loss": 0.8239,
      "step": 606630
    },
    {
      "epoch": 2.1261219583144952,
      "grad_norm": 3.234375,
      "learning_rate": 1.618301050647601e-05,
      "loss": 0.8551,
      "step": 606640
    },
    {
      "epoch": 2.126157005821391,
      "grad_norm": 2.875,
      "learning_rate": 1.6182361477812308e-05,
      "loss": 0.8472,
      "step": 606650
    },
    {
      "epoch": 2.1261920533282863,
      "grad_norm": 3.171875,
      "learning_rate": 1.6181712449148606e-05,
      "loss": 0.7529,
      "step": 606660
    },
    {
      "epoch": 2.126227100835182,
      "grad_norm": 2.65625,
      "learning_rate": 1.6181063420484904e-05,
      "loss": 0.7871,
      "step": 606670
    },
    {
      "epoch": 2.126262148342078,
      "grad_norm": 2.765625,
      "learning_rate": 1.61804143918212e-05,
      "loss": 0.8448,
      "step": 606680
    },
    {
      "epoch": 2.126297195848973,
      "grad_norm": 2.578125,
      "learning_rate": 1.6179765363157497e-05,
      "loss": 0.8417,
      "step": 606690
    },
    {
      "epoch": 2.126332243355869,
      "grad_norm": 3.25,
      "learning_rate": 1.6179116334493795e-05,
      "loss": 0.796,
      "step": 606700
    },
    {
      "epoch": 2.1263672908627647,
      "grad_norm": 2.59375,
      "learning_rate": 1.6178467305830096e-05,
      "loss": 0.831,
      "step": 606710
    },
    {
      "epoch": 2.12640233836966,
      "grad_norm": 2.640625,
      "learning_rate": 1.6177818277166394e-05,
      "loss": 0.7523,
      "step": 606720
    },
    {
      "epoch": 2.1264373858765557,
      "grad_norm": 2.640625,
      "learning_rate": 1.6177169248502692e-05,
      "loss": 0.892,
      "step": 606730
    },
    {
      "epoch": 2.1264724333834515,
      "grad_norm": 2.6875,
      "learning_rate": 1.617652021983899e-05,
      "loss": 0.8449,
      "step": 606740
    },
    {
      "epoch": 2.126507480890347,
      "grad_norm": 3.015625,
      "learning_rate": 1.6175871191175288e-05,
      "loss": 0.8932,
      "step": 606750
    },
    {
      "epoch": 2.1265425283972426,
      "grad_norm": 2.296875,
      "learning_rate": 1.6175222162511586e-05,
      "loss": 0.8513,
      "step": 606760
    },
    {
      "epoch": 2.126577575904138,
      "grad_norm": 2.8125,
      "learning_rate": 1.6174573133847884e-05,
      "loss": 0.7483,
      "step": 606770
    },
    {
      "epoch": 2.1266126234110336,
      "grad_norm": 2.890625,
      "learning_rate": 1.6173924105184182e-05,
      "loss": 0.7989,
      "step": 606780
    },
    {
      "epoch": 2.1266476709179294,
      "grad_norm": 2.390625,
      "learning_rate": 1.617327507652048e-05,
      "loss": 0.7426,
      "step": 606790
    },
    {
      "epoch": 2.1266827184248247,
      "grad_norm": 3.328125,
      "learning_rate": 1.6172626047856778e-05,
      "loss": 0.8399,
      "step": 606800
    },
    {
      "epoch": 2.1267177659317205,
      "grad_norm": 2.953125,
      "learning_rate": 1.6171977019193076e-05,
      "loss": 0.7921,
      "step": 606810
    },
    {
      "epoch": 2.126752813438616,
      "grad_norm": 2.875,
      "learning_rate": 1.6171327990529374e-05,
      "loss": 0.8582,
      "step": 606820
    },
    {
      "epoch": 2.1267878609455115,
      "grad_norm": 3.234375,
      "learning_rate": 1.6170678961865672e-05,
      "loss": 0.8342,
      "step": 606830
    },
    {
      "epoch": 2.1268229084524073,
      "grad_norm": 3.078125,
      "learning_rate": 1.617002993320197e-05,
      "loss": 0.9149,
      "step": 606840
    },
    {
      "epoch": 2.126857955959303,
      "grad_norm": 2.9375,
      "learning_rate": 1.616938090453827e-05,
      "loss": 0.8345,
      "step": 606850
    },
    {
      "epoch": 2.1268930034661984,
      "grad_norm": 2.84375,
      "learning_rate": 1.616873187587457e-05,
      "loss": 0.7947,
      "step": 606860
    },
    {
      "epoch": 2.126928050973094,
      "grad_norm": 2.84375,
      "learning_rate": 1.6168082847210864e-05,
      "loss": 0.7711,
      "step": 606870
    },
    {
      "epoch": 2.1269630984799894,
      "grad_norm": 2.984375,
      "learning_rate": 1.6167433818547162e-05,
      "loss": 0.8009,
      "step": 606880
    },
    {
      "epoch": 2.126998145986885,
      "grad_norm": 2.953125,
      "learning_rate": 1.616678478988346e-05,
      "loss": 0.8306,
      "step": 606890
    },
    {
      "epoch": 2.127033193493781,
      "grad_norm": 2.953125,
      "learning_rate": 1.6166135761219758e-05,
      "loss": 0.8269,
      "step": 606900
    },
    {
      "epoch": 2.1270682410006763,
      "grad_norm": 2.46875,
      "learning_rate": 1.6165486732556056e-05,
      "loss": 0.8099,
      "step": 606910
    },
    {
      "epoch": 2.127103288507572,
      "grad_norm": 2.71875,
      "learning_rate": 1.6164837703892354e-05,
      "loss": 0.8386,
      "step": 606920
    },
    {
      "epoch": 2.127138336014468,
      "grad_norm": 3.28125,
      "learning_rate": 1.6164188675228652e-05,
      "loss": 0.7919,
      "step": 606930
    },
    {
      "epoch": 2.127173383521363,
      "grad_norm": 2.703125,
      "learning_rate": 1.616353964656495e-05,
      "loss": 0.7655,
      "step": 606940
    },
    {
      "epoch": 2.127208431028259,
      "grad_norm": 2.96875,
      "learning_rate": 1.6162890617901248e-05,
      "loss": 0.7922,
      "step": 606950
    },
    {
      "epoch": 2.1272434785351546,
      "grad_norm": 3.234375,
      "learning_rate": 1.6162241589237546e-05,
      "loss": 0.849,
      "step": 606960
    },
    {
      "epoch": 2.12727852604205,
      "grad_norm": 2.84375,
      "learning_rate": 1.6161592560573848e-05,
      "loss": 0.8375,
      "step": 606970
    },
    {
      "epoch": 2.1273135735489457,
      "grad_norm": 2.65625,
      "learning_rate": 1.6160943531910146e-05,
      "loss": 0.7929,
      "step": 606980
    },
    {
      "epoch": 2.127348621055841,
      "grad_norm": 2.859375,
      "learning_rate": 1.6160294503246444e-05,
      "loss": 0.8145,
      "step": 606990
    },
    {
      "epoch": 2.1273836685627368,
      "grad_norm": 3.25,
      "learning_rate": 1.615964547458274e-05,
      "loss": 0.8048,
      "step": 607000
    },
    {
      "epoch": 2.1274187160696325,
      "grad_norm": 3.015625,
      "learning_rate": 1.615899644591904e-05,
      "loss": 0.8287,
      "step": 607010
    },
    {
      "epoch": 2.127453763576528,
      "grad_norm": 3.015625,
      "learning_rate": 1.6158347417255338e-05,
      "loss": 0.7864,
      "step": 607020
    },
    {
      "epoch": 2.1274888110834236,
      "grad_norm": 2.9375,
      "learning_rate": 1.6157698388591636e-05,
      "loss": 0.8672,
      "step": 607030
    },
    {
      "epoch": 2.1275238585903193,
      "grad_norm": 2.59375,
      "learning_rate": 1.6157049359927934e-05,
      "loss": 0.7755,
      "step": 607040
    },
    {
      "epoch": 2.1275589060972147,
      "grad_norm": 3.078125,
      "learning_rate": 1.615640033126423e-05,
      "loss": 0.8551,
      "step": 607050
    },
    {
      "epoch": 2.1275939536041104,
      "grad_norm": 2.953125,
      "learning_rate": 1.6155751302600526e-05,
      "loss": 0.8687,
      "step": 607060
    },
    {
      "epoch": 2.127629001111006,
      "grad_norm": 3.015625,
      "learning_rate": 1.6155102273936824e-05,
      "loss": 0.855,
      "step": 607070
    },
    {
      "epoch": 2.1276640486179015,
      "grad_norm": 2.71875,
      "learning_rate": 1.6154453245273126e-05,
      "loss": 0.8317,
      "step": 607080
    },
    {
      "epoch": 2.1276990961247972,
      "grad_norm": 3.03125,
      "learning_rate": 1.6153804216609424e-05,
      "loss": 0.8647,
      "step": 607090
    },
    {
      "epoch": 2.1277341436316926,
      "grad_norm": 2.75,
      "learning_rate": 1.615315518794572e-05,
      "loss": 0.7658,
      "step": 607100
    },
    {
      "epoch": 2.1277691911385883,
      "grad_norm": 2.703125,
      "learning_rate": 1.615250615928202e-05,
      "loss": 0.7392,
      "step": 607110
    },
    {
      "epoch": 2.127804238645484,
      "grad_norm": 2.875,
      "learning_rate": 1.6151857130618318e-05,
      "loss": 0.81,
      "step": 607120
    },
    {
      "epoch": 2.1278392861523794,
      "grad_norm": 3.171875,
      "learning_rate": 1.6151208101954616e-05,
      "loss": 0.8741,
      "step": 607130
    },
    {
      "epoch": 2.127874333659275,
      "grad_norm": 2.921875,
      "learning_rate": 1.6150559073290914e-05,
      "loss": 0.8595,
      "step": 607140
    },
    {
      "epoch": 2.127909381166171,
      "grad_norm": 2.90625,
      "learning_rate": 1.614991004462721e-05,
      "loss": 0.8385,
      "step": 607150
    },
    {
      "epoch": 2.127944428673066,
      "grad_norm": 2.59375,
      "learning_rate": 1.614926101596351e-05,
      "loss": 0.8366,
      "step": 607160
    },
    {
      "epoch": 2.127979476179962,
      "grad_norm": 3.015625,
      "learning_rate": 1.6148611987299808e-05,
      "loss": 0.7721,
      "step": 607170
    },
    {
      "epoch": 2.1280145236868577,
      "grad_norm": 2.65625,
      "learning_rate": 1.6147962958636106e-05,
      "loss": 0.8612,
      "step": 607180
    },
    {
      "epoch": 2.128049571193753,
      "grad_norm": 3.125,
      "learning_rate": 1.6147313929972404e-05,
      "loss": 0.8273,
      "step": 607190
    },
    {
      "epoch": 2.128084618700649,
      "grad_norm": 3.0625,
      "learning_rate": 1.61466649013087e-05,
      "loss": 0.7873,
      "step": 607200
    },
    {
      "epoch": 2.128119666207544,
      "grad_norm": 2.6875,
      "learning_rate": 1.6146015872645e-05,
      "loss": 0.8219,
      "step": 607210
    },
    {
      "epoch": 2.12815471371444,
      "grad_norm": 3.140625,
      "learning_rate": 1.61453668439813e-05,
      "loss": 0.7592,
      "step": 607220
    },
    {
      "epoch": 2.1281897612213356,
      "grad_norm": 3.40625,
      "learning_rate": 1.61447178153176e-05,
      "loss": 0.8502,
      "step": 607230
    },
    {
      "epoch": 2.128224808728231,
      "grad_norm": 2.8125,
      "learning_rate": 1.6144068786653897e-05,
      "loss": 0.7825,
      "step": 607240
    },
    {
      "epoch": 2.1282598562351267,
      "grad_norm": 3.125,
      "learning_rate": 1.614341975799019e-05,
      "loss": 0.8607,
      "step": 607250
    },
    {
      "epoch": 2.1282949037420225,
      "grad_norm": 3.140625,
      "learning_rate": 1.614277072932649e-05,
      "loss": 0.8108,
      "step": 607260
    },
    {
      "epoch": 2.1283299512489178,
      "grad_norm": 2.953125,
      "learning_rate": 1.6142121700662788e-05,
      "loss": 0.8448,
      "step": 607270
    },
    {
      "epoch": 2.1283649987558135,
      "grad_norm": 2.78125,
      "learning_rate": 1.6141472671999086e-05,
      "loss": 0.7663,
      "step": 607280
    },
    {
      "epoch": 2.1284000462627093,
      "grad_norm": 2.765625,
      "learning_rate": 1.6140823643335384e-05,
      "loss": 0.7744,
      "step": 607290
    },
    {
      "epoch": 2.1284350937696046,
      "grad_norm": 3.109375,
      "learning_rate": 1.614017461467168e-05,
      "loss": 0.8203,
      "step": 607300
    },
    {
      "epoch": 2.1284701412765004,
      "grad_norm": 3.03125,
      "learning_rate": 1.613952558600798e-05,
      "loss": 0.8389,
      "step": 607310
    },
    {
      "epoch": 2.1285051887833957,
      "grad_norm": 2.796875,
      "learning_rate": 1.6138876557344278e-05,
      "loss": 0.8289,
      "step": 607320
    },
    {
      "epoch": 2.1285402362902914,
      "grad_norm": 2.9375,
      "learning_rate": 1.613822752868058e-05,
      "loss": 0.7608,
      "step": 607330
    },
    {
      "epoch": 2.128575283797187,
      "grad_norm": 2.5,
      "learning_rate": 1.6137578500016877e-05,
      "loss": 0.7865,
      "step": 607340
    },
    {
      "epoch": 2.1286103313040825,
      "grad_norm": 3.46875,
      "learning_rate": 1.6136929471353175e-05,
      "loss": 0.8095,
      "step": 607350
    },
    {
      "epoch": 2.1286453788109783,
      "grad_norm": 2.96875,
      "learning_rate": 1.6136280442689473e-05,
      "loss": 0.7718,
      "step": 607360
    },
    {
      "epoch": 2.128680426317874,
      "grad_norm": 3.03125,
      "learning_rate": 1.613563141402577e-05,
      "loss": 0.806,
      "step": 607370
    },
    {
      "epoch": 2.1287154738247693,
      "grad_norm": 2.875,
      "learning_rate": 1.613498238536207e-05,
      "loss": 0.8286,
      "step": 607380
    },
    {
      "epoch": 2.128750521331665,
      "grad_norm": 2.8125,
      "learning_rate": 1.6134333356698367e-05,
      "loss": 0.87,
      "step": 607390
    },
    {
      "epoch": 2.128785568838561,
      "grad_norm": 3.078125,
      "learning_rate": 1.6133684328034665e-05,
      "loss": 0.7828,
      "step": 607400
    },
    {
      "epoch": 2.128820616345456,
      "grad_norm": 3.015625,
      "learning_rate": 1.6133035299370963e-05,
      "loss": 0.7575,
      "step": 607410
    },
    {
      "epoch": 2.128855663852352,
      "grad_norm": 3.140625,
      "learning_rate": 1.613238627070726e-05,
      "loss": 0.8689,
      "step": 607420
    },
    {
      "epoch": 2.1288907113592472,
      "grad_norm": 3.109375,
      "learning_rate": 1.613173724204356e-05,
      "loss": 0.8588,
      "step": 607430
    },
    {
      "epoch": 2.128925758866143,
      "grad_norm": 2.609375,
      "learning_rate": 1.6131088213379854e-05,
      "loss": 0.7327,
      "step": 607440
    },
    {
      "epoch": 2.1289608063730387,
      "grad_norm": 2.875,
      "learning_rate": 1.6130439184716155e-05,
      "loss": 0.7539,
      "step": 607450
    },
    {
      "epoch": 2.128995853879934,
      "grad_norm": 3.109375,
      "learning_rate": 1.6129790156052453e-05,
      "loss": 0.8548,
      "step": 607460
    },
    {
      "epoch": 2.12903090138683,
      "grad_norm": 2.703125,
      "learning_rate": 1.612914112738875e-05,
      "loss": 0.7939,
      "step": 607470
    },
    {
      "epoch": 2.1290659488937256,
      "grad_norm": 2.921875,
      "learning_rate": 1.612849209872505e-05,
      "loss": 0.8862,
      "step": 607480
    },
    {
      "epoch": 2.129100996400621,
      "grad_norm": 2.9375,
      "learning_rate": 1.6127843070061347e-05,
      "loss": 0.7993,
      "step": 607490
    },
    {
      "epoch": 2.1291360439075167,
      "grad_norm": 2.921875,
      "learning_rate": 1.6127194041397645e-05,
      "loss": 0.7964,
      "step": 607500
    },
    {
      "epoch": 2.1291710914144124,
      "grad_norm": 2.984375,
      "learning_rate": 1.6126545012733943e-05,
      "loss": 0.8866,
      "step": 607510
    },
    {
      "epoch": 2.1292061389213077,
      "grad_norm": 2.3125,
      "learning_rate": 1.612589598407024e-05,
      "loss": 0.8368,
      "step": 607520
    },
    {
      "epoch": 2.1292411864282035,
      "grad_norm": 2.796875,
      "learning_rate": 1.612524695540654e-05,
      "loss": 0.9095,
      "step": 607530
    },
    {
      "epoch": 2.129276233935099,
      "grad_norm": 2.859375,
      "learning_rate": 1.6124597926742837e-05,
      "loss": 0.7947,
      "step": 607540
    },
    {
      "epoch": 2.1293112814419946,
      "grad_norm": 3.140625,
      "learning_rate": 1.6123948898079135e-05,
      "loss": 0.7803,
      "step": 607550
    },
    {
      "epoch": 2.1293463289488903,
      "grad_norm": 2.578125,
      "learning_rate": 1.6123299869415433e-05,
      "loss": 0.8114,
      "step": 607560
    },
    {
      "epoch": 2.1293813764557856,
      "grad_norm": 2.609375,
      "learning_rate": 1.612265084075173e-05,
      "loss": 0.8264,
      "step": 607570
    },
    {
      "epoch": 2.1294164239626814,
      "grad_norm": 2.4375,
      "learning_rate": 1.612200181208803e-05,
      "loss": 0.8738,
      "step": 607580
    },
    {
      "epoch": 2.129451471469577,
      "grad_norm": 2.875,
      "learning_rate": 1.612135278342433e-05,
      "loss": 0.8166,
      "step": 607590
    },
    {
      "epoch": 2.1294865189764725,
      "grad_norm": 2.765625,
      "learning_rate": 1.612070375476063e-05,
      "loss": 0.7382,
      "step": 607600
    },
    {
      "epoch": 2.129521566483368,
      "grad_norm": 2.90625,
      "learning_rate": 1.6120054726096926e-05,
      "loss": 0.7221,
      "step": 607610
    },
    {
      "epoch": 2.129556613990264,
      "grad_norm": 3.40625,
      "learning_rate": 1.6119405697433224e-05,
      "loss": 0.8648,
      "step": 607620
    },
    {
      "epoch": 2.1295916614971593,
      "grad_norm": 2.421875,
      "learning_rate": 1.611875666876952e-05,
      "loss": 0.7903,
      "step": 607630
    },
    {
      "epoch": 2.129626709004055,
      "grad_norm": 2.71875,
      "learning_rate": 1.6118107640105817e-05,
      "loss": 0.7346,
      "step": 607640
    },
    {
      "epoch": 2.129661756510951,
      "grad_norm": 2.640625,
      "learning_rate": 1.6117458611442115e-05,
      "loss": 0.7406,
      "step": 607650
    },
    {
      "epoch": 2.129696804017846,
      "grad_norm": 2.734375,
      "learning_rate": 1.6116809582778413e-05,
      "loss": 0.84,
      "step": 607660
    },
    {
      "epoch": 2.129731851524742,
      "grad_norm": 2.84375,
      "learning_rate": 1.611616055411471e-05,
      "loss": 0.8399,
      "step": 607670
    },
    {
      "epoch": 2.129766899031637,
      "grad_norm": 2.890625,
      "learning_rate": 1.611551152545101e-05,
      "loss": 0.8104,
      "step": 607680
    },
    {
      "epoch": 2.129801946538533,
      "grad_norm": 3.0,
      "learning_rate": 1.6114862496787307e-05,
      "loss": 0.8207,
      "step": 607690
    },
    {
      "epoch": 2.1298369940454287,
      "grad_norm": 2.859375,
      "learning_rate": 1.611421346812361e-05,
      "loss": 0.879,
      "step": 607700
    },
    {
      "epoch": 2.129872041552324,
      "grad_norm": 2.84375,
      "learning_rate": 1.6113564439459906e-05,
      "loss": 0.8386,
      "step": 607710
    },
    {
      "epoch": 2.1299070890592198,
      "grad_norm": 2.59375,
      "learning_rate": 1.6112915410796204e-05,
      "loss": 0.7902,
      "step": 607720
    },
    {
      "epoch": 2.1299421365661155,
      "grad_norm": 2.59375,
      "learning_rate": 1.6112266382132502e-05,
      "loss": 0.7687,
      "step": 607730
    },
    {
      "epoch": 2.129977184073011,
      "grad_norm": 2.5625,
      "learning_rate": 1.61116173534688e-05,
      "loss": 0.7236,
      "step": 607740
    },
    {
      "epoch": 2.1300122315799066,
      "grad_norm": 3.15625,
      "learning_rate": 1.61109683248051e-05,
      "loss": 0.7225,
      "step": 607750
    },
    {
      "epoch": 2.1300472790868024,
      "grad_norm": 3.21875,
      "learning_rate": 1.6110319296141396e-05,
      "loss": 0.79,
      "step": 607760
    },
    {
      "epoch": 2.1300823265936977,
      "grad_norm": 2.546875,
      "learning_rate": 1.6109670267477694e-05,
      "loss": 0.8006,
      "step": 607770
    },
    {
      "epoch": 2.1301173741005934,
      "grad_norm": 2.671875,
      "learning_rate": 1.6109021238813992e-05,
      "loss": 0.8685,
      "step": 607780
    },
    {
      "epoch": 2.1301524216074887,
      "grad_norm": 2.515625,
      "learning_rate": 1.610837221015029e-05,
      "loss": 0.8291,
      "step": 607790
    },
    {
      "epoch": 2.1301874691143845,
      "grad_norm": 2.96875,
      "learning_rate": 1.610772318148659e-05,
      "loss": 0.8257,
      "step": 607800
    },
    {
      "epoch": 2.1302225166212803,
      "grad_norm": 3.421875,
      "learning_rate": 1.6107074152822886e-05,
      "loss": 0.8252,
      "step": 607810
    },
    {
      "epoch": 2.1302575641281756,
      "grad_norm": 3.09375,
      "learning_rate": 1.6106425124159184e-05,
      "loss": 0.8282,
      "step": 607820
    },
    {
      "epoch": 2.1302926116350713,
      "grad_norm": 2.453125,
      "learning_rate": 1.6105776095495482e-05,
      "loss": 0.6772,
      "step": 607830
    },
    {
      "epoch": 2.130327659141967,
      "grad_norm": 2.9375,
      "learning_rate": 1.610512706683178e-05,
      "loss": 0.8257,
      "step": 607840
    },
    {
      "epoch": 2.1303627066488624,
      "grad_norm": 3.171875,
      "learning_rate": 1.610447803816808e-05,
      "loss": 0.8235,
      "step": 607850
    },
    {
      "epoch": 2.130397754155758,
      "grad_norm": 2.9375,
      "learning_rate": 1.6103829009504376e-05,
      "loss": 0.812,
      "step": 607860
    },
    {
      "epoch": 2.130432801662654,
      "grad_norm": 2.8125,
      "learning_rate": 1.6103179980840674e-05,
      "loss": 0.8031,
      "step": 607870
    },
    {
      "epoch": 2.1304678491695492,
      "grad_norm": 3.15625,
      "learning_rate": 1.6102530952176972e-05,
      "loss": 0.8506,
      "step": 607880
    },
    {
      "epoch": 2.130502896676445,
      "grad_norm": 2.5,
      "learning_rate": 1.610188192351327e-05,
      "loss": 0.7672,
      "step": 607890
    },
    {
      "epoch": 2.1305379441833407,
      "grad_norm": 3.015625,
      "learning_rate": 1.610123289484957e-05,
      "loss": 0.8311,
      "step": 607900
    },
    {
      "epoch": 2.130572991690236,
      "grad_norm": 2.65625,
      "learning_rate": 1.6100583866185866e-05,
      "loss": 0.7153,
      "step": 607910
    },
    {
      "epoch": 2.130608039197132,
      "grad_norm": 3.171875,
      "learning_rate": 1.6099934837522164e-05,
      "loss": 0.8379,
      "step": 607920
    },
    {
      "epoch": 2.130643086704027,
      "grad_norm": 3.171875,
      "learning_rate": 1.6099285808858462e-05,
      "loss": 0.8123,
      "step": 607930
    },
    {
      "epoch": 2.130678134210923,
      "grad_norm": 2.390625,
      "learning_rate": 1.609863678019476e-05,
      "loss": 0.7808,
      "step": 607940
    },
    {
      "epoch": 2.1307131817178186,
      "grad_norm": 3.859375,
      "learning_rate": 1.6097987751531062e-05,
      "loss": 0.7856,
      "step": 607950
    },
    {
      "epoch": 2.130748229224714,
      "grad_norm": 2.65625,
      "learning_rate": 1.609733872286736e-05,
      "loss": 0.7913,
      "step": 607960
    },
    {
      "epoch": 2.1307832767316097,
      "grad_norm": 2.328125,
      "learning_rate": 1.6096689694203658e-05,
      "loss": 0.819,
      "step": 607970
    },
    {
      "epoch": 2.1308183242385055,
      "grad_norm": 2.734375,
      "learning_rate": 1.6096040665539956e-05,
      "loss": 0.8123,
      "step": 607980
    },
    {
      "epoch": 2.130853371745401,
      "grad_norm": 3.03125,
      "learning_rate": 1.6095391636876254e-05,
      "loss": 0.8388,
      "step": 607990
    },
    {
      "epoch": 2.1308884192522966,
      "grad_norm": 2.625,
      "learning_rate": 1.609474260821255e-05,
      "loss": 0.8133,
      "step": 608000
    },
    {
      "epoch": 2.1309234667591923,
      "grad_norm": 2.84375,
      "learning_rate": 1.6094093579548846e-05,
      "loss": 0.895,
      "step": 608010
    },
    {
      "epoch": 2.1309585142660876,
      "grad_norm": 2.78125,
      "learning_rate": 1.6093444550885144e-05,
      "loss": 0.7813,
      "step": 608020
    },
    {
      "epoch": 2.1309935617729834,
      "grad_norm": 2.71875,
      "learning_rate": 1.6092795522221442e-05,
      "loss": 0.8252,
      "step": 608030
    },
    {
      "epoch": 2.1310286092798787,
      "grad_norm": 3.015625,
      "learning_rate": 1.609214649355774e-05,
      "loss": 0.8064,
      "step": 608040
    },
    {
      "epoch": 2.1310636567867745,
      "grad_norm": 3.34375,
      "learning_rate": 1.609149746489404e-05,
      "loss": 0.8115,
      "step": 608050
    },
    {
      "epoch": 2.13109870429367,
      "grad_norm": 3.0,
      "learning_rate": 1.6090848436230336e-05,
      "loss": 0.8417,
      "step": 608060
    },
    {
      "epoch": 2.1311337518005655,
      "grad_norm": 2.96875,
      "learning_rate": 1.6090199407566638e-05,
      "loss": 0.9059,
      "step": 608070
    },
    {
      "epoch": 2.1311687993074613,
      "grad_norm": 2.921875,
      "learning_rate": 1.6089550378902936e-05,
      "loss": 0.8263,
      "step": 608080
    },
    {
      "epoch": 2.131203846814357,
      "grad_norm": 3.203125,
      "learning_rate": 1.6088901350239234e-05,
      "loss": 0.7802,
      "step": 608090
    },
    {
      "epoch": 2.1312388943212524,
      "grad_norm": 2.96875,
      "learning_rate": 1.6088252321575532e-05,
      "loss": 0.8399,
      "step": 608100
    },
    {
      "epoch": 2.131273941828148,
      "grad_norm": 3.046875,
      "learning_rate": 1.608760329291183e-05,
      "loss": 0.7296,
      "step": 608110
    },
    {
      "epoch": 2.131308989335044,
      "grad_norm": 3.359375,
      "learning_rate": 1.6086954264248128e-05,
      "loss": 0.8648,
      "step": 608120
    },
    {
      "epoch": 2.131344036841939,
      "grad_norm": 2.9375,
      "learning_rate": 1.6086305235584426e-05,
      "loss": 0.8473,
      "step": 608130
    },
    {
      "epoch": 2.131379084348835,
      "grad_norm": 2.96875,
      "learning_rate": 1.6085656206920724e-05,
      "loss": 0.7624,
      "step": 608140
    },
    {
      "epoch": 2.1314141318557303,
      "grad_norm": 3.046875,
      "learning_rate": 1.6085007178257022e-05,
      "loss": 0.8635,
      "step": 608150
    },
    {
      "epoch": 2.131449179362626,
      "grad_norm": 2.78125,
      "learning_rate": 1.608435814959332e-05,
      "loss": 0.734,
      "step": 608160
    },
    {
      "epoch": 2.1314842268695218,
      "grad_norm": 2.71875,
      "learning_rate": 1.6083709120929618e-05,
      "loss": 0.8258,
      "step": 608170
    },
    {
      "epoch": 2.131519274376417,
      "grad_norm": 2.9375,
      "learning_rate": 1.6083060092265916e-05,
      "loss": 0.8936,
      "step": 608180
    },
    {
      "epoch": 2.131554321883313,
      "grad_norm": 2.84375,
      "learning_rate": 1.6082411063602214e-05,
      "loss": 0.7772,
      "step": 608190
    },
    {
      "epoch": 2.1315893693902086,
      "grad_norm": 2.671875,
      "learning_rate": 1.6081762034938512e-05,
      "loss": 0.8286,
      "step": 608200
    },
    {
      "epoch": 2.131624416897104,
      "grad_norm": 2.96875,
      "learning_rate": 1.608111300627481e-05,
      "loss": 0.7823,
      "step": 608210
    },
    {
      "epoch": 2.1316594644039997,
      "grad_norm": 2.75,
      "learning_rate": 1.6080463977611108e-05,
      "loss": 0.8124,
      "step": 608220
    },
    {
      "epoch": 2.1316945119108954,
      "grad_norm": 2.765625,
      "learning_rate": 1.6079814948947406e-05,
      "loss": 0.8501,
      "step": 608230
    },
    {
      "epoch": 2.1317295594177907,
      "grad_norm": 2.875,
      "learning_rate": 1.6079165920283704e-05,
      "loss": 0.7662,
      "step": 608240
    },
    {
      "epoch": 2.1317646069246865,
      "grad_norm": 2.921875,
      "learning_rate": 1.6078516891620002e-05,
      "loss": 0.7986,
      "step": 608250
    },
    {
      "epoch": 2.131799654431582,
      "grad_norm": 3.03125,
      "learning_rate": 1.60778678629563e-05,
      "loss": 0.7592,
      "step": 608260
    },
    {
      "epoch": 2.1318347019384776,
      "grad_norm": 2.84375,
      "learning_rate": 1.6077218834292598e-05,
      "loss": 0.8032,
      "step": 608270
    },
    {
      "epoch": 2.1318697494453733,
      "grad_norm": 3.0,
      "learning_rate": 1.6076569805628896e-05,
      "loss": 0.8101,
      "step": 608280
    },
    {
      "epoch": 2.1319047969522686,
      "grad_norm": 2.921875,
      "learning_rate": 1.6075920776965194e-05,
      "loss": 0.7483,
      "step": 608290
    },
    {
      "epoch": 2.1319398444591644,
      "grad_norm": 2.9375,
      "learning_rate": 1.6075271748301492e-05,
      "loss": 0.7736,
      "step": 608300
    },
    {
      "epoch": 2.13197489196606,
      "grad_norm": 2.59375,
      "learning_rate": 1.607462271963779e-05,
      "loss": 0.8039,
      "step": 608310
    },
    {
      "epoch": 2.1320099394729555,
      "grad_norm": 2.828125,
      "learning_rate": 1.607397369097409e-05,
      "loss": 0.7988,
      "step": 608320
    },
    {
      "epoch": 2.1320449869798512,
      "grad_norm": 3.09375,
      "learning_rate": 1.607332466231039e-05,
      "loss": 0.8467,
      "step": 608330
    },
    {
      "epoch": 2.132080034486747,
      "grad_norm": 2.65625,
      "learning_rate": 1.6072675633646687e-05,
      "loss": 0.7603,
      "step": 608340
    },
    {
      "epoch": 2.1321150819936423,
      "grad_norm": 3.5,
      "learning_rate": 1.6072026604982985e-05,
      "loss": 0.8556,
      "step": 608350
    },
    {
      "epoch": 2.132150129500538,
      "grad_norm": 2.90625,
      "learning_rate": 1.6071377576319283e-05,
      "loss": 0.79,
      "step": 608360
    },
    {
      "epoch": 2.1321851770074334,
      "grad_norm": 2.5625,
      "learning_rate": 1.607072854765558e-05,
      "loss": 0.9026,
      "step": 608370
    },
    {
      "epoch": 2.132220224514329,
      "grad_norm": 2.9375,
      "learning_rate": 1.6070079518991876e-05,
      "loss": 0.7564,
      "step": 608380
    },
    {
      "epoch": 2.132255272021225,
      "grad_norm": 3.390625,
      "learning_rate": 1.6069430490328174e-05,
      "loss": 0.8256,
      "step": 608390
    },
    {
      "epoch": 2.13229031952812,
      "grad_norm": 3.0,
      "learning_rate": 1.6068781461664472e-05,
      "loss": 0.8,
      "step": 608400
    },
    {
      "epoch": 2.132325367035016,
      "grad_norm": 3.09375,
      "learning_rate": 1.606813243300077e-05,
      "loss": 0.7492,
      "step": 608410
    },
    {
      "epoch": 2.1323604145419117,
      "grad_norm": 3.03125,
      "learning_rate": 1.6067483404337068e-05,
      "loss": 0.7764,
      "step": 608420
    },
    {
      "epoch": 2.132395462048807,
      "grad_norm": 3.078125,
      "learning_rate": 1.606683437567337e-05,
      "loss": 0.8334,
      "step": 608430
    },
    {
      "epoch": 2.132430509555703,
      "grad_norm": 2.515625,
      "learning_rate": 1.6066185347009667e-05,
      "loss": 0.8324,
      "step": 608440
    },
    {
      "epoch": 2.1324655570625985,
      "grad_norm": 2.765625,
      "learning_rate": 1.6065536318345965e-05,
      "loss": 0.7829,
      "step": 608450
    },
    {
      "epoch": 2.132500604569494,
      "grad_norm": 2.8125,
      "learning_rate": 1.6064887289682263e-05,
      "loss": 0.8017,
      "step": 608460
    },
    {
      "epoch": 2.1325356520763896,
      "grad_norm": 2.609375,
      "learning_rate": 1.606423826101856e-05,
      "loss": 0.77,
      "step": 608470
    },
    {
      "epoch": 2.132570699583285,
      "grad_norm": 2.953125,
      "learning_rate": 1.606358923235486e-05,
      "loss": 0.8406,
      "step": 608480
    },
    {
      "epoch": 2.1326057470901807,
      "grad_norm": 2.953125,
      "learning_rate": 1.6062940203691157e-05,
      "loss": 0.871,
      "step": 608490
    },
    {
      "epoch": 2.1326407945970765,
      "grad_norm": 2.953125,
      "learning_rate": 1.6062291175027455e-05,
      "loss": 0.8145,
      "step": 608500
    },
    {
      "epoch": 2.1326758421039718,
      "grad_norm": 2.640625,
      "learning_rate": 1.6061642146363753e-05,
      "loss": 0.8013,
      "step": 608510
    },
    {
      "epoch": 2.1327108896108675,
      "grad_norm": 2.328125,
      "learning_rate": 1.606099311770005e-05,
      "loss": 0.8192,
      "step": 608520
    },
    {
      "epoch": 2.1327459371177633,
      "grad_norm": 3.03125,
      "learning_rate": 1.606034408903635e-05,
      "loss": 0.8677,
      "step": 608530
    },
    {
      "epoch": 2.1327809846246586,
      "grad_norm": 2.78125,
      "learning_rate": 1.6059695060372647e-05,
      "loss": 0.8085,
      "step": 608540
    },
    {
      "epoch": 2.1328160321315544,
      "grad_norm": 2.84375,
      "learning_rate": 1.6059046031708945e-05,
      "loss": 0.8514,
      "step": 608550
    },
    {
      "epoch": 2.13285107963845,
      "grad_norm": 3.0,
      "learning_rate": 1.6058397003045243e-05,
      "loss": 0.7751,
      "step": 608560
    },
    {
      "epoch": 2.1328861271453454,
      "grad_norm": 3.96875,
      "learning_rate": 1.605774797438154e-05,
      "loss": 0.8673,
      "step": 608570
    },
    {
      "epoch": 2.132921174652241,
      "grad_norm": 3.046875,
      "learning_rate": 1.605709894571784e-05,
      "loss": 0.7652,
      "step": 608580
    },
    {
      "epoch": 2.1329562221591365,
      "grad_norm": 2.515625,
      "learning_rate": 1.6056449917054137e-05,
      "loss": 0.808,
      "step": 608590
    },
    {
      "epoch": 2.1329912696660323,
      "grad_norm": 2.796875,
      "learning_rate": 1.6055800888390435e-05,
      "loss": 0.7835,
      "step": 608600
    },
    {
      "epoch": 2.133026317172928,
      "grad_norm": 2.765625,
      "learning_rate": 1.6055151859726733e-05,
      "loss": 0.8332,
      "step": 608610
    },
    {
      "epoch": 2.1330613646798233,
      "grad_norm": 2.765625,
      "learning_rate": 1.605450283106303e-05,
      "loss": 0.7975,
      "step": 608620
    },
    {
      "epoch": 2.133096412186719,
      "grad_norm": 3.046875,
      "learning_rate": 1.605385380239933e-05,
      "loss": 0.7969,
      "step": 608630
    },
    {
      "epoch": 2.133131459693615,
      "grad_norm": 3.15625,
      "learning_rate": 1.6053204773735627e-05,
      "loss": 0.8107,
      "step": 608640
    },
    {
      "epoch": 2.13316650720051,
      "grad_norm": 2.5,
      "learning_rate": 1.6052555745071925e-05,
      "loss": 0.8123,
      "step": 608650
    },
    {
      "epoch": 2.133201554707406,
      "grad_norm": 2.859375,
      "learning_rate": 1.6051906716408223e-05,
      "loss": 0.8275,
      "step": 608660
    },
    {
      "epoch": 2.1332366022143017,
      "grad_norm": 3.171875,
      "learning_rate": 1.605125768774452e-05,
      "loss": 0.8473,
      "step": 608670
    },
    {
      "epoch": 2.133271649721197,
      "grad_norm": 3.25,
      "learning_rate": 1.6050608659080822e-05,
      "loss": 0.7938,
      "step": 608680
    },
    {
      "epoch": 2.1333066972280927,
      "grad_norm": 3.09375,
      "learning_rate": 1.604995963041712e-05,
      "loss": 0.8653,
      "step": 608690
    },
    {
      "epoch": 2.133341744734988,
      "grad_norm": 3.15625,
      "learning_rate": 1.604931060175342e-05,
      "loss": 0.8691,
      "step": 608700
    },
    {
      "epoch": 2.133376792241884,
      "grad_norm": 2.96875,
      "learning_rate": 1.6048661573089716e-05,
      "loss": 0.8348,
      "step": 608710
    },
    {
      "epoch": 2.1334118397487796,
      "grad_norm": 2.953125,
      "learning_rate": 1.6048012544426014e-05,
      "loss": 0.9505,
      "step": 608720
    },
    {
      "epoch": 2.133446887255675,
      "grad_norm": 2.796875,
      "learning_rate": 1.6047363515762312e-05,
      "loss": 0.8418,
      "step": 608730
    },
    {
      "epoch": 2.1334819347625706,
      "grad_norm": 3.296875,
      "learning_rate": 1.604671448709861e-05,
      "loss": 0.7511,
      "step": 608740
    },
    {
      "epoch": 2.1335169822694664,
      "grad_norm": 3.40625,
      "learning_rate": 1.6046065458434905e-05,
      "loss": 0.8285,
      "step": 608750
    },
    {
      "epoch": 2.1335520297763617,
      "grad_norm": 2.59375,
      "learning_rate": 1.6045416429771203e-05,
      "loss": 0.8031,
      "step": 608760
    },
    {
      "epoch": 2.1335870772832575,
      "grad_norm": 3.296875,
      "learning_rate": 1.60447674011075e-05,
      "loss": 0.8492,
      "step": 608770
    },
    {
      "epoch": 2.1336221247901532,
      "grad_norm": 2.59375,
      "learning_rate": 1.60441183724438e-05,
      "loss": 0.8149,
      "step": 608780
    },
    {
      "epoch": 2.1336571722970485,
      "grad_norm": 2.84375,
      "learning_rate": 1.6043469343780097e-05,
      "loss": 0.8167,
      "step": 608790
    },
    {
      "epoch": 2.1336922198039443,
      "grad_norm": 2.453125,
      "learning_rate": 1.60428203151164e-05,
      "loss": 0.7426,
      "step": 608800
    },
    {
      "epoch": 2.1337272673108396,
      "grad_norm": 2.796875,
      "learning_rate": 1.6042171286452696e-05,
      "loss": 0.753,
      "step": 608810
    },
    {
      "epoch": 2.1337623148177354,
      "grad_norm": 2.875,
      "learning_rate": 1.6041522257788994e-05,
      "loss": 0.8873,
      "step": 608820
    },
    {
      "epoch": 2.133797362324631,
      "grad_norm": 3.140625,
      "learning_rate": 1.6040873229125292e-05,
      "loss": 0.8076,
      "step": 608830
    },
    {
      "epoch": 2.1338324098315264,
      "grad_norm": 3.0,
      "learning_rate": 1.604022420046159e-05,
      "loss": 0.8864,
      "step": 608840
    },
    {
      "epoch": 2.133867457338422,
      "grad_norm": 2.59375,
      "learning_rate": 1.603957517179789e-05,
      "loss": 0.7961,
      "step": 608850
    },
    {
      "epoch": 2.133902504845318,
      "grad_norm": 2.625,
      "learning_rate": 1.6038926143134186e-05,
      "loss": 0.7558,
      "step": 608860
    },
    {
      "epoch": 2.1339375523522133,
      "grad_norm": 2.8125,
      "learning_rate": 1.6038277114470484e-05,
      "loss": 0.8283,
      "step": 608870
    },
    {
      "epoch": 2.133972599859109,
      "grad_norm": 3.078125,
      "learning_rate": 1.6037628085806782e-05,
      "loss": 0.8747,
      "step": 608880
    },
    {
      "epoch": 2.134007647366005,
      "grad_norm": 2.6875,
      "learning_rate": 1.603697905714308e-05,
      "loss": 0.8356,
      "step": 608890
    },
    {
      "epoch": 2.1340426948729,
      "grad_norm": 3.25,
      "learning_rate": 1.603633002847938e-05,
      "loss": 0.826,
      "step": 608900
    },
    {
      "epoch": 2.134077742379796,
      "grad_norm": 3.109375,
      "learning_rate": 1.6035680999815676e-05,
      "loss": 0.7343,
      "step": 608910
    },
    {
      "epoch": 2.134112789886691,
      "grad_norm": 3.015625,
      "learning_rate": 1.6035031971151974e-05,
      "loss": 0.8423,
      "step": 608920
    },
    {
      "epoch": 2.134147837393587,
      "grad_norm": 3.03125,
      "learning_rate": 1.6034382942488272e-05,
      "loss": 0.8527,
      "step": 608930
    },
    {
      "epoch": 2.1341828849004827,
      "grad_norm": 3.0625,
      "learning_rate": 1.603373391382457e-05,
      "loss": 0.8199,
      "step": 608940
    },
    {
      "epoch": 2.134217932407378,
      "grad_norm": 2.953125,
      "learning_rate": 1.603308488516087e-05,
      "loss": 0.823,
      "step": 608950
    },
    {
      "epoch": 2.1342529799142738,
      "grad_norm": 3.09375,
      "learning_rate": 1.6032435856497166e-05,
      "loss": 0.8553,
      "step": 608960
    },
    {
      "epoch": 2.1342880274211695,
      "grad_norm": 2.59375,
      "learning_rate": 1.6031786827833464e-05,
      "loss": 0.7806,
      "step": 608970
    },
    {
      "epoch": 2.134323074928065,
      "grad_norm": 3.296875,
      "learning_rate": 1.6031137799169762e-05,
      "loss": 0.8237,
      "step": 608980
    },
    {
      "epoch": 2.1343581224349606,
      "grad_norm": 2.515625,
      "learning_rate": 1.603048877050606e-05,
      "loss": 0.8405,
      "step": 608990
    },
    {
      "epoch": 2.1343931699418563,
      "grad_norm": 3.3125,
      "learning_rate": 1.602983974184236e-05,
      "loss": 0.8327,
      "step": 609000
    },
    {
      "epoch": 2.1344282174487517,
      "grad_norm": 3.0,
      "learning_rate": 1.6029190713178656e-05,
      "loss": 0.8234,
      "step": 609010
    },
    {
      "epoch": 2.1344632649556474,
      "grad_norm": 2.875,
      "learning_rate": 1.6028541684514954e-05,
      "loss": 0.8622,
      "step": 609020
    },
    {
      "epoch": 2.134498312462543,
      "grad_norm": 2.703125,
      "learning_rate": 1.6027892655851252e-05,
      "loss": 0.8089,
      "step": 609030
    },
    {
      "epoch": 2.1345333599694385,
      "grad_norm": 3.140625,
      "learning_rate": 1.602724362718755e-05,
      "loss": 0.8458,
      "step": 609040
    },
    {
      "epoch": 2.1345684074763343,
      "grad_norm": 2.953125,
      "learning_rate": 1.6026594598523852e-05,
      "loss": 0.8172,
      "step": 609050
    },
    {
      "epoch": 2.1346034549832296,
      "grad_norm": 3.21875,
      "learning_rate": 1.602594556986015e-05,
      "loss": 0.8164,
      "step": 609060
    },
    {
      "epoch": 2.1346385024901253,
      "grad_norm": 2.96875,
      "learning_rate": 1.6025296541196448e-05,
      "loss": 0.9342,
      "step": 609070
    },
    {
      "epoch": 2.134673549997021,
      "grad_norm": 3.09375,
      "learning_rate": 1.6024647512532746e-05,
      "loss": 0.8105,
      "step": 609080
    },
    {
      "epoch": 2.1347085975039164,
      "grad_norm": 2.859375,
      "learning_rate": 1.6023998483869044e-05,
      "loss": 0.7706,
      "step": 609090
    },
    {
      "epoch": 2.134743645010812,
      "grad_norm": 2.75,
      "learning_rate": 1.6023349455205342e-05,
      "loss": 0.7234,
      "step": 609100
    },
    {
      "epoch": 2.134778692517708,
      "grad_norm": 3.28125,
      "learning_rate": 1.602270042654164e-05,
      "loss": 0.7424,
      "step": 609110
    },
    {
      "epoch": 2.1348137400246032,
      "grad_norm": 2.671875,
      "learning_rate": 1.6022051397877938e-05,
      "loss": 0.8199,
      "step": 609120
    },
    {
      "epoch": 2.134848787531499,
      "grad_norm": 2.875,
      "learning_rate": 1.6021402369214232e-05,
      "loss": 0.7696,
      "step": 609130
    },
    {
      "epoch": 2.1348838350383947,
      "grad_norm": 2.859375,
      "learning_rate": 1.602075334055053e-05,
      "loss": 0.8151,
      "step": 609140
    },
    {
      "epoch": 2.13491888254529,
      "grad_norm": 2.84375,
      "learning_rate": 1.602010431188683e-05,
      "loss": 0.7617,
      "step": 609150
    },
    {
      "epoch": 2.134953930052186,
      "grad_norm": 3.109375,
      "learning_rate": 1.6019455283223126e-05,
      "loss": 0.8367,
      "step": 609160
    },
    {
      "epoch": 2.1349889775590816,
      "grad_norm": 2.890625,
      "learning_rate": 1.6018806254559428e-05,
      "loss": 0.8183,
      "step": 609170
    },
    {
      "epoch": 2.135024025065977,
      "grad_norm": 2.71875,
      "learning_rate": 1.6018157225895726e-05,
      "loss": 0.8508,
      "step": 609180
    },
    {
      "epoch": 2.1350590725728726,
      "grad_norm": 2.65625,
      "learning_rate": 1.6017508197232024e-05,
      "loss": 0.7584,
      "step": 609190
    },
    {
      "epoch": 2.135094120079768,
      "grad_norm": 3.453125,
      "learning_rate": 1.6016859168568322e-05,
      "loss": 0.8166,
      "step": 609200
    },
    {
      "epoch": 2.1351291675866637,
      "grad_norm": 3.296875,
      "learning_rate": 1.601621013990462e-05,
      "loss": 0.9338,
      "step": 609210
    },
    {
      "epoch": 2.1351642150935595,
      "grad_norm": 3.015625,
      "learning_rate": 1.6015561111240918e-05,
      "loss": 0.9348,
      "step": 609220
    },
    {
      "epoch": 2.135199262600455,
      "grad_norm": 3.171875,
      "learning_rate": 1.6014912082577216e-05,
      "loss": 0.7938,
      "step": 609230
    },
    {
      "epoch": 2.1352343101073505,
      "grad_norm": 2.71875,
      "learning_rate": 1.6014263053913514e-05,
      "loss": 0.7862,
      "step": 609240
    },
    {
      "epoch": 2.1352693576142463,
      "grad_norm": 3.078125,
      "learning_rate": 1.6013614025249812e-05,
      "loss": 0.8707,
      "step": 609250
    },
    {
      "epoch": 2.1353044051211416,
      "grad_norm": 2.875,
      "learning_rate": 1.601296499658611e-05,
      "loss": 0.7947,
      "step": 609260
    },
    {
      "epoch": 2.1353394526280374,
      "grad_norm": 2.9375,
      "learning_rate": 1.6012315967922408e-05,
      "loss": 0.8435,
      "step": 609270
    },
    {
      "epoch": 2.135374500134933,
      "grad_norm": 2.3125,
      "learning_rate": 1.6011666939258706e-05,
      "loss": 0.8406,
      "step": 609280
    },
    {
      "epoch": 2.1354095476418284,
      "grad_norm": 2.671875,
      "learning_rate": 1.6011017910595004e-05,
      "loss": 0.7613,
      "step": 609290
    },
    {
      "epoch": 2.135444595148724,
      "grad_norm": 2.703125,
      "learning_rate": 1.6010368881931305e-05,
      "loss": 0.7909,
      "step": 609300
    },
    {
      "epoch": 2.1354796426556195,
      "grad_norm": 3.109375,
      "learning_rate": 1.6009719853267603e-05,
      "loss": 0.8006,
      "step": 609310
    },
    {
      "epoch": 2.1355146901625153,
      "grad_norm": 2.75,
      "learning_rate": 1.6009070824603898e-05,
      "loss": 0.8563,
      "step": 609320
    },
    {
      "epoch": 2.135549737669411,
      "grad_norm": 2.6875,
      "learning_rate": 1.6008421795940196e-05,
      "loss": 0.8326,
      "step": 609330
    },
    {
      "epoch": 2.1355847851763063,
      "grad_norm": 2.8125,
      "learning_rate": 1.6007772767276494e-05,
      "loss": 0.8475,
      "step": 609340
    },
    {
      "epoch": 2.135619832683202,
      "grad_norm": 2.984375,
      "learning_rate": 1.6007123738612792e-05,
      "loss": 0.8555,
      "step": 609350
    },
    {
      "epoch": 2.135654880190098,
      "grad_norm": 2.90625,
      "learning_rate": 1.600647470994909e-05,
      "loss": 0.8296,
      "step": 609360
    },
    {
      "epoch": 2.135689927696993,
      "grad_norm": 2.734375,
      "learning_rate": 1.6005825681285388e-05,
      "loss": 0.8208,
      "step": 609370
    },
    {
      "epoch": 2.135724975203889,
      "grad_norm": 2.796875,
      "learning_rate": 1.6005176652621686e-05,
      "loss": 0.8237,
      "step": 609380
    },
    {
      "epoch": 2.1357600227107847,
      "grad_norm": 2.78125,
      "learning_rate": 1.6004527623957984e-05,
      "loss": 0.7617,
      "step": 609390
    },
    {
      "epoch": 2.13579507021768,
      "grad_norm": 3.234375,
      "learning_rate": 1.6003878595294282e-05,
      "loss": 0.8727,
      "step": 609400
    },
    {
      "epoch": 2.1358301177245758,
      "grad_norm": 3.140625,
      "learning_rate": 1.600322956663058e-05,
      "loss": 0.8519,
      "step": 609410
    },
    {
      "epoch": 2.135865165231471,
      "grad_norm": 2.703125,
      "learning_rate": 1.600258053796688e-05,
      "loss": 0.7961,
      "step": 609420
    },
    {
      "epoch": 2.135900212738367,
      "grad_norm": 3.265625,
      "learning_rate": 1.600193150930318e-05,
      "loss": 0.7539,
      "step": 609430
    },
    {
      "epoch": 2.1359352602452626,
      "grad_norm": 3.015625,
      "learning_rate": 1.6001282480639477e-05,
      "loss": 0.8244,
      "step": 609440
    },
    {
      "epoch": 2.135970307752158,
      "grad_norm": 2.828125,
      "learning_rate": 1.6000633451975775e-05,
      "loss": 0.8506,
      "step": 609450
    },
    {
      "epoch": 2.1360053552590537,
      "grad_norm": 2.953125,
      "learning_rate": 1.5999984423312073e-05,
      "loss": 0.7975,
      "step": 609460
    },
    {
      "epoch": 2.1360404027659494,
      "grad_norm": 2.90625,
      "learning_rate": 1.599933539464837e-05,
      "loss": 0.8849,
      "step": 609470
    },
    {
      "epoch": 2.1360754502728447,
      "grad_norm": 3.4375,
      "learning_rate": 1.599868636598467e-05,
      "loss": 0.8807,
      "step": 609480
    },
    {
      "epoch": 2.1361104977797405,
      "grad_norm": 3.0625,
      "learning_rate": 1.5998037337320967e-05,
      "loss": 0.7797,
      "step": 609490
    },
    {
      "epoch": 2.1361455452866362,
      "grad_norm": 3.125,
      "learning_rate": 1.5997388308657265e-05,
      "loss": 0.8151,
      "step": 609500
    },
    {
      "epoch": 2.1361805927935316,
      "grad_norm": 2.609375,
      "learning_rate": 1.599673927999356e-05,
      "loss": 0.7918,
      "step": 609510
    },
    {
      "epoch": 2.1362156403004273,
      "grad_norm": 2.75,
      "learning_rate": 1.5996090251329858e-05,
      "loss": 0.858,
      "step": 609520
    },
    {
      "epoch": 2.1362506878073226,
      "grad_norm": 3.25,
      "learning_rate": 1.599544122266616e-05,
      "loss": 0.8997,
      "step": 609530
    },
    {
      "epoch": 2.1362857353142184,
      "grad_norm": 2.78125,
      "learning_rate": 1.5994792194002457e-05,
      "loss": 0.8824,
      "step": 609540
    },
    {
      "epoch": 2.136320782821114,
      "grad_norm": 3.03125,
      "learning_rate": 1.5994143165338755e-05,
      "loss": 0.8813,
      "step": 609550
    },
    {
      "epoch": 2.1363558303280095,
      "grad_norm": 3.03125,
      "learning_rate": 1.5993494136675053e-05,
      "loss": 0.759,
      "step": 609560
    },
    {
      "epoch": 2.136390877834905,
      "grad_norm": 2.75,
      "learning_rate": 1.599284510801135e-05,
      "loss": 0.8256,
      "step": 609570
    },
    {
      "epoch": 2.136425925341801,
      "grad_norm": 2.875,
      "learning_rate": 1.599219607934765e-05,
      "loss": 0.8191,
      "step": 609580
    },
    {
      "epoch": 2.1364609728486963,
      "grad_norm": 2.8125,
      "learning_rate": 1.5991547050683947e-05,
      "loss": 0.7575,
      "step": 609590
    },
    {
      "epoch": 2.136496020355592,
      "grad_norm": 3.265625,
      "learning_rate": 1.5990898022020245e-05,
      "loss": 0.8503,
      "step": 609600
    },
    {
      "epoch": 2.136531067862488,
      "grad_norm": 2.921875,
      "learning_rate": 1.5990248993356543e-05,
      "loss": 0.808,
      "step": 609610
    },
    {
      "epoch": 2.136566115369383,
      "grad_norm": 2.875,
      "learning_rate": 1.598959996469284e-05,
      "loss": 0.7791,
      "step": 609620
    },
    {
      "epoch": 2.136601162876279,
      "grad_norm": 2.90625,
      "learning_rate": 1.598895093602914e-05,
      "loss": 0.8523,
      "step": 609630
    },
    {
      "epoch": 2.136636210383174,
      "grad_norm": 3.328125,
      "learning_rate": 1.5988301907365437e-05,
      "loss": 0.8889,
      "step": 609640
    },
    {
      "epoch": 2.13667125789007,
      "grad_norm": 2.859375,
      "learning_rate": 1.5987652878701735e-05,
      "loss": 0.835,
      "step": 609650
    },
    {
      "epoch": 2.1367063053969657,
      "grad_norm": 3.09375,
      "learning_rate": 1.5987003850038033e-05,
      "loss": 0.7816,
      "step": 609660
    },
    {
      "epoch": 2.136741352903861,
      "grad_norm": 2.28125,
      "learning_rate": 1.5986354821374335e-05,
      "loss": 0.8473,
      "step": 609670
    },
    {
      "epoch": 2.136776400410757,
      "grad_norm": 2.546875,
      "learning_rate": 1.5985705792710633e-05,
      "loss": 0.7712,
      "step": 609680
    },
    {
      "epoch": 2.1368114479176525,
      "grad_norm": 2.703125,
      "learning_rate": 1.5985056764046927e-05,
      "loss": 0.767,
      "step": 609690
    },
    {
      "epoch": 2.136846495424548,
      "grad_norm": 2.640625,
      "learning_rate": 1.5984407735383225e-05,
      "loss": 0.7176,
      "step": 609700
    },
    {
      "epoch": 2.1368815429314436,
      "grad_norm": 2.71875,
      "learning_rate": 1.5983758706719523e-05,
      "loss": 0.7624,
      "step": 609710
    },
    {
      "epoch": 2.1369165904383394,
      "grad_norm": 3.15625,
      "learning_rate": 1.598310967805582e-05,
      "loss": 0.8358,
      "step": 609720
    },
    {
      "epoch": 2.1369516379452347,
      "grad_norm": 2.640625,
      "learning_rate": 1.598246064939212e-05,
      "loss": 0.8878,
      "step": 609730
    },
    {
      "epoch": 2.1369866854521304,
      "grad_norm": 3.03125,
      "learning_rate": 1.5981811620728417e-05,
      "loss": 0.9199,
      "step": 609740
    },
    {
      "epoch": 2.1370217329590258,
      "grad_norm": 3.109375,
      "learning_rate": 1.5981162592064715e-05,
      "loss": 0.7969,
      "step": 609750
    },
    {
      "epoch": 2.1370567804659215,
      "grad_norm": 3.109375,
      "learning_rate": 1.5980513563401013e-05,
      "loss": 0.8613,
      "step": 609760
    },
    {
      "epoch": 2.1370918279728173,
      "grad_norm": 3.203125,
      "learning_rate": 1.597986453473731e-05,
      "loss": 0.7827,
      "step": 609770
    },
    {
      "epoch": 2.1371268754797126,
      "grad_norm": 3.25,
      "learning_rate": 1.5979215506073613e-05,
      "loss": 0.8369,
      "step": 609780
    },
    {
      "epoch": 2.1371619229866083,
      "grad_norm": 2.765625,
      "learning_rate": 1.597856647740991e-05,
      "loss": 0.8582,
      "step": 609790
    },
    {
      "epoch": 2.137196970493504,
      "grad_norm": 2.75,
      "learning_rate": 1.597791744874621e-05,
      "loss": 0.8161,
      "step": 609800
    },
    {
      "epoch": 2.1372320180003994,
      "grad_norm": 2.53125,
      "learning_rate": 1.5977268420082507e-05,
      "loss": 0.7946,
      "step": 609810
    },
    {
      "epoch": 2.137267065507295,
      "grad_norm": 2.859375,
      "learning_rate": 1.5976619391418805e-05,
      "loss": 0.7093,
      "step": 609820
    },
    {
      "epoch": 2.137302113014191,
      "grad_norm": 3.203125,
      "learning_rate": 1.5975970362755103e-05,
      "loss": 0.8548,
      "step": 609830
    },
    {
      "epoch": 2.1373371605210862,
      "grad_norm": 3.140625,
      "learning_rate": 1.59753213340914e-05,
      "loss": 0.7297,
      "step": 609840
    },
    {
      "epoch": 2.137372208027982,
      "grad_norm": 2.734375,
      "learning_rate": 1.59746723054277e-05,
      "loss": 0.7513,
      "step": 609850
    },
    {
      "epoch": 2.1374072555348773,
      "grad_norm": 2.9375,
      "learning_rate": 1.5974023276763997e-05,
      "loss": 0.7537,
      "step": 609860
    },
    {
      "epoch": 2.137442303041773,
      "grad_norm": 2.75,
      "learning_rate": 1.5973374248100295e-05,
      "loss": 0.8225,
      "step": 609870
    },
    {
      "epoch": 2.137477350548669,
      "grad_norm": 3.171875,
      "learning_rate": 1.597272521943659e-05,
      "loss": 0.8593,
      "step": 609880
    },
    {
      "epoch": 2.137512398055564,
      "grad_norm": 2.875,
      "learning_rate": 1.5972076190772887e-05,
      "loss": 0.8379,
      "step": 609890
    },
    {
      "epoch": 2.13754744556246,
      "grad_norm": 2.625,
      "learning_rate": 1.597142716210919e-05,
      "loss": 0.823,
      "step": 609900
    },
    {
      "epoch": 2.1375824930693557,
      "grad_norm": 2.796875,
      "learning_rate": 1.5970778133445487e-05,
      "loss": 0.8037,
      "step": 609910
    },
    {
      "epoch": 2.137617540576251,
      "grad_norm": 3.125,
      "learning_rate": 1.5970129104781785e-05,
      "loss": 0.7398,
      "step": 609920
    },
    {
      "epoch": 2.1376525880831467,
      "grad_norm": 3.046875,
      "learning_rate": 1.5969480076118083e-05,
      "loss": 0.8119,
      "step": 609930
    },
    {
      "epoch": 2.1376876355900425,
      "grad_norm": 3.171875,
      "learning_rate": 1.596883104745438e-05,
      "loss": 0.8396,
      "step": 609940
    },
    {
      "epoch": 2.137722683096938,
      "grad_norm": 2.515625,
      "learning_rate": 1.596818201879068e-05,
      "loss": 0.7809,
      "step": 609950
    },
    {
      "epoch": 2.1377577306038336,
      "grad_norm": 2.890625,
      "learning_rate": 1.5967532990126977e-05,
      "loss": 0.8839,
      "step": 609960
    },
    {
      "epoch": 2.137792778110729,
      "grad_norm": 2.875,
      "learning_rate": 1.5966883961463275e-05,
      "loss": 0.832,
      "step": 609970
    },
    {
      "epoch": 2.1378278256176246,
      "grad_norm": 3.234375,
      "learning_rate": 1.5966234932799573e-05,
      "loss": 0.7791,
      "step": 609980
    },
    {
      "epoch": 2.1378628731245204,
      "grad_norm": 2.828125,
      "learning_rate": 1.596558590413587e-05,
      "loss": 0.8623,
      "step": 609990
    },
    {
      "epoch": 2.1378979206314157,
      "grad_norm": 3.625,
      "learning_rate": 1.596493687547217e-05,
      "loss": 0.7522,
      "step": 610000
    },
    {
      "epoch": 2.1378979206314157,
      "eval_loss": 0.7661474943161011,
      "eval_runtime": 565.897,
      "eval_samples_per_second": 672.271,
      "eval_steps_per_second": 56.023,
      "step": 610000
    },
    {
      "epoch": 2.1379329681383115,
      "grad_norm": 3.3125,
      "learning_rate": 1.5964287846808467e-05,
      "loss": 0.7978,
      "step": 610010
    },
    {
      "epoch": 2.137968015645207,
      "grad_norm": 3.28125,
      "learning_rate": 1.5963638818144765e-05,
      "loss": 0.9029,
      "step": 610020
    },
    {
      "epoch": 2.1380030631521025,
      "grad_norm": 3.09375,
      "learning_rate": 1.5962989789481063e-05,
      "loss": 0.7852,
      "step": 610030
    },
    {
      "epoch": 2.1380381106589983,
      "grad_norm": 2.59375,
      "learning_rate": 1.5962340760817364e-05,
      "loss": 0.7588,
      "step": 610040
    },
    {
      "epoch": 2.138073158165894,
      "grad_norm": 2.828125,
      "learning_rate": 1.5961691732153662e-05,
      "loss": 0.795,
      "step": 610050
    },
    {
      "epoch": 2.1381082056727894,
      "grad_norm": 2.75,
      "learning_rate": 1.596104270348996e-05,
      "loss": 0.7449,
      "step": 610060
    },
    {
      "epoch": 2.138143253179685,
      "grad_norm": 3.03125,
      "learning_rate": 1.5960393674826255e-05,
      "loss": 0.808,
      "step": 610070
    },
    {
      "epoch": 2.1381783006865804,
      "grad_norm": 3.0625,
      "learning_rate": 1.5959744646162553e-05,
      "loss": 0.8002,
      "step": 610080
    },
    {
      "epoch": 2.138213348193476,
      "grad_norm": 2.671875,
      "learning_rate": 1.595909561749885e-05,
      "loss": 0.8108,
      "step": 610090
    },
    {
      "epoch": 2.138248395700372,
      "grad_norm": 3.265625,
      "learning_rate": 1.595844658883515e-05,
      "loss": 0.8629,
      "step": 610100
    },
    {
      "epoch": 2.1382834432072673,
      "grad_norm": 3.171875,
      "learning_rate": 1.5957797560171447e-05,
      "loss": 0.9341,
      "step": 610110
    },
    {
      "epoch": 2.138318490714163,
      "grad_norm": 3.09375,
      "learning_rate": 1.5957148531507745e-05,
      "loss": 0.8113,
      "step": 610120
    },
    {
      "epoch": 2.138353538221059,
      "grad_norm": 2.6875,
      "learning_rate": 1.5956499502844043e-05,
      "loss": 0.8082,
      "step": 610130
    },
    {
      "epoch": 2.138388585727954,
      "grad_norm": 3.15625,
      "learning_rate": 1.595585047418034e-05,
      "loss": 0.7906,
      "step": 610140
    },
    {
      "epoch": 2.13842363323485,
      "grad_norm": 3.1875,
      "learning_rate": 1.5955201445516642e-05,
      "loss": 0.8362,
      "step": 610150
    },
    {
      "epoch": 2.1384586807417456,
      "grad_norm": 3.125,
      "learning_rate": 1.595455241685294e-05,
      "loss": 0.8962,
      "step": 610160
    },
    {
      "epoch": 2.138493728248641,
      "grad_norm": 2.53125,
      "learning_rate": 1.5953903388189238e-05,
      "loss": 0.8483,
      "step": 610170
    },
    {
      "epoch": 2.1385287757555367,
      "grad_norm": 2.859375,
      "learning_rate": 1.5953254359525536e-05,
      "loss": 0.799,
      "step": 610180
    },
    {
      "epoch": 2.138563823262432,
      "grad_norm": 3.046875,
      "learning_rate": 1.5952605330861834e-05,
      "loss": 0.8522,
      "step": 610190
    },
    {
      "epoch": 2.1385988707693278,
      "grad_norm": 2.546875,
      "learning_rate": 1.5951956302198132e-05,
      "loss": 0.8555,
      "step": 610200
    },
    {
      "epoch": 2.1386339182762235,
      "grad_norm": 2.78125,
      "learning_rate": 1.595130727353443e-05,
      "loss": 0.772,
      "step": 610210
    },
    {
      "epoch": 2.138668965783119,
      "grad_norm": 3.046875,
      "learning_rate": 1.5950658244870728e-05,
      "loss": 0.827,
      "step": 610220
    },
    {
      "epoch": 2.1387040132900146,
      "grad_norm": 3.0625,
      "learning_rate": 1.5950009216207026e-05,
      "loss": 0.8146,
      "step": 610230
    },
    {
      "epoch": 2.1387390607969103,
      "grad_norm": 3.03125,
      "learning_rate": 1.5949360187543324e-05,
      "loss": 0.7684,
      "step": 610240
    },
    {
      "epoch": 2.1387741083038057,
      "grad_norm": 2.65625,
      "learning_rate": 1.5948711158879622e-05,
      "loss": 0.7244,
      "step": 610250
    },
    {
      "epoch": 2.1388091558107014,
      "grad_norm": 3.46875,
      "learning_rate": 1.594806213021592e-05,
      "loss": 0.8852,
      "step": 610260
    },
    {
      "epoch": 2.138844203317597,
      "grad_norm": 2.640625,
      "learning_rate": 1.5947413101552218e-05,
      "loss": 0.8632,
      "step": 610270
    },
    {
      "epoch": 2.1388792508244925,
      "grad_norm": 2.765625,
      "learning_rate": 1.5946764072888516e-05,
      "loss": 0.7927,
      "step": 610280
    },
    {
      "epoch": 2.1389142983313882,
      "grad_norm": 3.53125,
      "learning_rate": 1.5946115044224814e-05,
      "loss": 0.861,
      "step": 610290
    },
    {
      "epoch": 2.138949345838284,
      "grad_norm": 3.03125,
      "learning_rate": 1.5945466015561112e-05,
      "loss": 0.7972,
      "step": 610300
    },
    {
      "epoch": 2.1389843933451793,
      "grad_norm": 3.21875,
      "learning_rate": 1.594481698689741e-05,
      "loss": 0.8679,
      "step": 610310
    },
    {
      "epoch": 2.139019440852075,
      "grad_norm": 2.8125,
      "learning_rate": 1.5944167958233708e-05,
      "loss": 0.8519,
      "step": 610320
    },
    {
      "epoch": 2.1390544883589704,
      "grad_norm": 2.6875,
      "learning_rate": 1.5943518929570006e-05,
      "loss": 0.7572,
      "step": 610330
    },
    {
      "epoch": 2.139089535865866,
      "grad_norm": 2.46875,
      "learning_rate": 1.5942869900906304e-05,
      "loss": 0.8444,
      "step": 610340
    },
    {
      "epoch": 2.139124583372762,
      "grad_norm": 3.140625,
      "learning_rate": 1.5942220872242602e-05,
      "loss": 0.8066,
      "step": 610350
    },
    {
      "epoch": 2.139159630879657,
      "grad_norm": 2.875,
      "learning_rate": 1.59415718435789e-05,
      "loss": 0.8342,
      "step": 610360
    },
    {
      "epoch": 2.139194678386553,
      "grad_norm": 3.625,
      "learning_rate": 1.5940922814915198e-05,
      "loss": 0.7939,
      "step": 610370
    },
    {
      "epoch": 2.1392297258934487,
      "grad_norm": 2.375,
      "learning_rate": 1.5940273786251496e-05,
      "loss": 0.7173,
      "step": 610380
    },
    {
      "epoch": 2.139264773400344,
      "grad_norm": 2.78125,
      "learning_rate": 1.5939624757587794e-05,
      "loss": 0.8444,
      "step": 610390
    },
    {
      "epoch": 2.13929982090724,
      "grad_norm": 3.03125,
      "learning_rate": 1.5938975728924095e-05,
      "loss": 0.8465,
      "step": 610400
    },
    {
      "epoch": 2.1393348684141356,
      "grad_norm": 2.421875,
      "learning_rate": 1.5938326700260393e-05,
      "loss": 0.8318,
      "step": 610410
    },
    {
      "epoch": 2.139369915921031,
      "grad_norm": 2.703125,
      "learning_rate": 1.593767767159669e-05,
      "loss": 0.8246,
      "step": 610420
    },
    {
      "epoch": 2.1394049634279266,
      "grad_norm": 2.96875,
      "learning_rate": 1.593702864293299e-05,
      "loss": 0.7566,
      "step": 610430
    },
    {
      "epoch": 2.139440010934822,
      "grad_norm": 3.828125,
      "learning_rate": 1.5936379614269287e-05,
      "loss": 0.7881,
      "step": 610440
    },
    {
      "epoch": 2.1394750584417177,
      "grad_norm": 3.0,
      "learning_rate": 1.5935730585605582e-05,
      "loss": 0.7314,
      "step": 610450
    },
    {
      "epoch": 2.1395101059486135,
      "grad_norm": 2.703125,
      "learning_rate": 1.593508155694188e-05,
      "loss": 0.7827,
      "step": 610460
    },
    {
      "epoch": 2.1395451534555088,
      "grad_norm": 3.09375,
      "learning_rate": 1.5934432528278178e-05,
      "loss": 0.8355,
      "step": 610470
    },
    {
      "epoch": 2.1395802009624045,
      "grad_norm": 2.96875,
      "learning_rate": 1.5933783499614476e-05,
      "loss": 0.8332,
      "step": 610480
    },
    {
      "epoch": 2.1396152484693003,
      "grad_norm": 3.03125,
      "learning_rate": 1.5933134470950774e-05,
      "loss": 0.8044,
      "step": 610490
    },
    {
      "epoch": 2.1396502959761956,
      "grad_norm": 2.703125,
      "learning_rate": 1.5932485442287072e-05,
      "loss": 0.8191,
      "step": 610500
    },
    {
      "epoch": 2.1396853434830914,
      "grad_norm": 2.703125,
      "learning_rate": 1.593183641362337e-05,
      "loss": 0.7888,
      "step": 610510
    },
    {
      "epoch": 2.139720390989987,
      "grad_norm": 2.5625,
      "learning_rate": 1.593118738495967e-05,
      "loss": 0.8398,
      "step": 610520
    },
    {
      "epoch": 2.1397554384968824,
      "grad_norm": 2.890625,
      "learning_rate": 1.593053835629597e-05,
      "loss": 0.7628,
      "step": 610530
    },
    {
      "epoch": 2.139790486003778,
      "grad_norm": 3.046875,
      "learning_rate": 1.5929889327632267e-05,
      "loss": 0.7834,
      "step": 610540
    },
    {
      "epoch": 2.139825533510674,
      "grad_norm": 3.625,
      "learning_rate": 1.5929240298968565e-05,
      "loss": 0.8946,
      "step": 610550
    },
    {
      "epoch": 2.1398605810175693,
      "grad_norm": 2.40625,
      "learning_rate": 1.5928591270304863e-05,
      "loss": 0.964,
      "step": 610560
    },
    {
      "epoch": 2.139895628524465,
      "grad_norm": 2.6875,
      "learning_rate": 1.592794224164116e-05,
      "loss": 0.8145,
      "step": 610570
    },
    {
      "epoch": 2.1399306760313603,
      "grad_norm": 2.71875,
      "learning_rate": 1.592729321297746e-05,
      "loss": 0.7501,
      "step": 610580
    },
    {
      "epoch": 2.139965723538256,
      "grad_norm": 3.015625,
      "learning_rate": 1.5926644184313757e-05,
      "loss": 0.7817,
      "step": 610590
    },
    {
      "epoch": 2.140000771045152,
      "grad_norm": 3.0,
      "learning_rate": 1.5925995155650055e-05,
      "loss": 0.7603,
      "step": 610600
    },
    {
      "epoch": 2.140035818552047,
      "grad_norm": 3.3125,
      "learning_rate": 1.5925346126986353e-05,
      "loss": 0.7699,
      "step": 610610
    },
    {
      "epoch": 2.140070866058943,
      "grad_norm": 2.765625,
      "learning_rate": 1.592469709832265e-05,
      "loss": 0.8193,
      "step": 610620
    },
    {
      "epoch": 2.1401059135658387,
      "grad_norm": 3.09375,
      "learning_rate": 1.592404806965895e-05,
      "loss": 0.8219,
      "step": 610630
    },
    {
      "epoch": 2.140140961072734,
      "grad_norm": 2.90625,
      "learning_rate": 1.5923399040995247e-05,
      "loss": 0.7927,
      "step": 610640
    },
    {
      "epoch": 2.1401760085796298,
      "grad_norm": 3.21875,
      "learning_rate": 1.5922750012331545e-05,
      "loss": 0.8445,
      "step": 610650
    },
    {
      "epoch": 2.1402110560865255,
      "grad_norm": 3.125,
      "learning_rate": 1.5922100983667843e-05,
      "loss": 0.8471,
      "step": 610660
    },
    {
      "epoch": 2.140246103593421,
      "grad_norm": 3.09375,
      "learning_rate": 1.592145195500414e-05,
      "loss": 0.7728,
      "step": 610670
    },
    {
      "epoch": 2.1402811511003166,
      "grad_norm": 3.453125,
      "learning_rate": 1.592080292634044e-05,
      "loss": 0.8327,
      "step": 610680
    },
    {
      "epoch": 2.140316198607212,
      "grad_norm": 3.171875,
      "learning_rate": 1.5920153897676737e-05,
      "loss": 0.8116,
      "step": 610690
    },
    {
      "epoch": 2.1403512461141077,
      "grad_norm": 3.15625,
      "learning_rate": 1.5919504869013035e-05,
      "loss": 0.8269,
      "step": 610700
    },
    {
      "epoch": 2.1403862936210034,
      "grad_norm": 5.5,
      "learning_rate": 1.5918855840349333e-05,
      "loss": 0.8179,
      "step": 610710
    },
    {
      "epoch": 2.1404213411278987,
      "grad_norm": 3.375,
      "learning_rate": 1.591820681168563e-05,
      "loss": 0.8033,
      "step": 610720
    },
    {
      "epoch": 2.1404563886347945,
      "grad_norm": 3.515625,
      "learning_rate": 1.591755778302193e-05,
      "loss": 0.8542,
      "step": 610730
    },
    {
      "epoch": 2.1404914361416902,
      "grad_norm": 3.25,
      "learning_rate": 1.5916908754358227e-05,
      "loss": 0.9425,
      "step": 610740
    },
    {
      "epoch": 2.1405264836485856,
      "grad_norm": 2.921875,
      "learning_rate": 1.5916259725694525e-05,
      "loss": 0.7814,
      "step": 610750
    },
    {
      "epoch": 2.1405615311554813,
      "grad_norm": 2.6875,
      "learning_rate": 1.5915610697030823e-05,
      "loss": 0.7757,
      "step": 610760
    },
    {
      "epoch": 2.140596578662377,
      "grad_norm": 3.171875,
      "learning_rate": 1.5914961668367125e-05,
      "loss": 0.8039,
      "step": 610770
    },
    {
      "epoch": 2.1406316261692724,
      "grad_norm": 3.03125,
      "learning_rate": 1.5914312639703423e-05,
      "loss": 0.8287,
      "step": 610780
    },
    {
      "epoch": 2.140666673676168,
      "grad_norm": 2.578125,
      "learning_rate": 1.591366361103972e-05,
      "loss": 0.7931,
      "step": 610790
    },
    {
      "epoch": 2.1407017211830635,
      "grad_norm": 2.921875,
      "learning_rate": 1.591301458237602e-05,
      "loss": 0.8205,
      "step": 610800
    },
    {
      "epoch": 2.140736768689959,
      "grad_norm": 2.453125,
      "learning_rate": 1.5912365553712317e-05,
      "loss": 0.8337,
      "step": 610810
    },
    {
      "epoch": 2.140771816196855,
      "grad_norm": 2.203125,
      "learning_rate": 1.591171652504861e-05,
      "loss": 0.7933,
      "step": 610820
    },
    {
      "epoch": 2.1408068637037503,
      "grad_norm": 2.578125,
      "learning_rate": 1.591106749638491e-05,
      "loss": 0.8373,
      "step": 610830
    },
    {
      "epoch": 2.140841911210646,
      "grad_norm": 2.984375,
      "learning_rate": 1.5910418467721207e-05,
      "loss": 0.7872,
      "step": 610840
    },
    {
      "epoch": 2.140876958717542,
      "grad_norm": 2.84375,
      "learning_rate": 1.5909769439057505e-05,
      "loss": 0.8678,
      "step": 610850
    },
    {
      "epoch": 2.140912006224437,
      "grad_norm": 2.984375,
      "learning_rate": 1.5909120410393803e-05,
      "loss": 0.8722,
      "step": 610860
    },
    {
      "epoch": 2.140947053731333,
      "grad_norm": 3.0,
      "learning_rate": 1.59084713817301e-05,
      "loss": 0.8347,
      "step": 610870
    },
    {
      "epoch": 2.1409821012382286,
      "grad_norm": 2.484375,
      "learning_rate": 1.5907822353066403e-05,
      "loss": 0.8044,
      "step": 610880
    },
    {
      "epoch": 2.141017148745124,
      "grad_norm": 2.609375,
      "learning_rate": 1.59071733244027e-05,
      "loss": 0.8179,
      "step": 610890
    },
    {
      "epoch": 2.1410521962520197,
      "grad_norm": 3.0625,
      "learning_rate": 1.5906524295739e-05,
      "loss": 0.8567,
      "step": 610900
    },
    {
      "epoch": 2.141087243758915,
      "grad_norm": 2.625,
      "learning_rate": 1.5905875267075297e-05,
      "loss": 0.7976,
      "step": 610910
    },
    {
      "epoch": 2.1411222912658108,
      "grad_norm": 2.671875,
      "learning_rate": 1.5905226238411595e-05,
      "loss": 0.8032,
      "step": 610920
    },
    {
      "epoch": 2.1411573387727065,
      "grad_norm": 2.578125,
      "learning_rate": 1.5904577209747893e-05,
      "loss": 0.8485,
      "step": 610930
    },
    {
      "epoch": 2.141192386279602,
      "grad_norm": 2.765625,
      "learning_rate": 1.590392818108419e-05,
      "loss": 0.7393,
      "step": 610940
    },
    {
      "epoch": 2.1412274337864976,
      "grad_norm": 3.125,
      "learning_rate": 1.590327915242049e-05,
      "loss": 0.7655,
      "step": 610950
    },
    {
      "epoch": 2.1412624812933934,
      "grad_norm": 3.046875,
      "learning_rate": 1.5902630123756787e-05,
      "loss": 0.7412,
      "step": 610960
    },
    {
      "epoch": 2.1412975288002887,
      "grad_norm": 2.921875,
      "learning_rate": 1.5901981095093085e-05,
      "loss": 0.7952,
      "step": 610970
    },
    {
      "epoch": 2.1413325763071844,
      "grad_norm": 3.140625,
      "learning_rate": 1.5901332066429383e-05,
      "loss": 0.8057,
      "step": 610980
    },
    {
      "epoch": 2.14136762381408,
      "grad_norm": 3.0625,
      "learning_rate": 1.590068303776568e-05,
      "loss": 0.8924,
      "step": 610990
    },
    {
      "epoch": 2.1414026713209755,
      "grad_norm": 2.796875,
      "learning_rate": 1.590003400910198e-05,
      "loss": 0.8356,
      "step": 611000
    },
    {
      "epoch": 2.1414377188278713,
      "grad_norm": 2.578125,
      "learning_rate": 1.5899384980438277e-05,
      "loss": 0.7653,
      "step": 611010
    },
    {
      "epoch": 2.1414727663347666,
      "grad_norm": 2.78125,
      "learning_rate": 1.5898735951774575e-05,
      "loss": 0.7539,
      "step": 611020
    },
    {
      "epoch": 2.1415078138416623,
      "grad_norm": 3.34375,
      "learning_rate": 1.5898086923110873e-05,
      "loss": 0.8917,
      "step": 611030
    },
    {
      "epoch": 2.141542861348558,
      "grad_norm": 2.84375,
      "learning_rate": 1.589743789444717e-05,
      "loss": 0.753,
      "step": 611040
    },
    {
      "epoch": 2.1415779088554534,
      "grad_norm": 2.84375,
      "learning_rate": 1.589678886578347e-05,
      "loss": 0.8064,
      "step": 611050
    },
    {
      "epoch": 2.141612956362349,
      "grad_norm": 2.546875,
      "learning_rate": 1.5896139837119767e-05,
      "loss": 0.7853,
      "step": 611060
    },
    {
      "epoch": 2.141648003869245,
      "grad_norm": 3.125,
      "learning_rate": 1.5895490808456065e-05,
      "loss": 0.9092,
      "step": 611070
    },
    {
      "epoch": 2.1416830513761402,
      "grad_norm": 2.859375,
      "learning_rate": 1.5894841779792363e-05,
      "loss": 0.8334,
      "step": 611080
    },
    {
      "epoch": 2.141718098883036,
      "grad_norm": 3.0625,
      "learning_rate": 1.589419275112866e-05,
      "loss": 0.849,
      "step": 611090
    },
    {
      "epoch": 2.1417531463899318,
      "grad_norm": 3.25,
      "learning_rate": 1.589354372246496e-05,
      "loss": 0.8352,
      "step": 611100
    },
    {
      "epoch": 2.141788193896827,
      "grad_norm": 2.640625,
      "learning_rate": 1.5892894693801257e-05,
      "loss": 0.7428,
      "step": 611110
    },
    {
      "epoch": 2.141823241403723,
      "grad_norm": 3.15625,
      "learning_rate": 1.5892245665137555e-05,
      "loss": 0.8093,
      "step": 611120
    },
    {
      "epoch": 2.141858288910618,
      "grad_norm": 2.296875,
      "learning_rate": 1.5891596636473853e-05,
      "loss": 0.8319,
      "step": 611130
    },
    {
      "epoch": 2.141893336417514,
      "grad_norm": 2.640625,
      "learning_rate": 1.5890947607810154e-05,
      "loss": 0.8415,
      "step": 611140
    },
    {
      "epoch": 2.1419283839244097,
      "grad_norm": 2.765625,
      "learning_rate": 1.5890298579146452e-05,
      "loss": 0.7794,
      "step": 611150
    },
    {
      "epoch": 2.141963431431305,
      "grad_norm": 3.21875,
      "learning_rate": 1.588964955048275e-05,
      "loss": 0.8665,
      "step": 611160
    },
    {
      "epoch": 2.1419984789382007,
      "grad_norm": 2.390625,
      "learning_rate": 1.5889000521819048e-05,
      "loss": 0.8305,
      "step": 611170
    },
    {
      "epoch": 2.1420335264450965,
      "grad_norm": 2.84375,
      "learning_rate": 1.5888351493155346e-05,
      "loss": 0.7856,
      "step": 611180
    },
    {
      "epoch": 2.142068573951992,
      "grad_norm": 3.109375,
      "learning_rate": 1.5887702464491644e-05,
      "loss": 0.8516,
      "step": 611190
    },
    {
      "epoch": 2.1421036214588876,
      "grad_norm": 2.90625,
      "learning_rate": 1.588705343582794e-05,
      "loss": 0.8546,
      "step": 611200
    },
    {
      "epoch": 2.1421386689657833,
      "grad_norm": 3.109375,
      "learning_rate": 1.5886404407164237e-05,
      "loss": 0.8857,
      "step": 611210
    },
    {
      "epoch": 2.1421737164726786,
      "grad_norm": 3.1875,
      "learning_rate": 1.5885755378500535e-05,
      "loss": 0.7586,
      "step": 611220
    },
    {
      "epoch": 2.1422087639795744,
      "grad_norm": 3.21875,
      "learning_rate": 1.5885106349836833e-05,
      "loss": 0.8028,
      "step": 611230
    },
    {
      "epoch": 2.1422438114864697,
      "grad_norm": 3.09375,
      "learning_rate": 1.588445732117313e-05,
      "loss": 0.8216,
      "step": 611240
    },
    {
      "epoch": 2.1422788589933655,
      "grad_norm": 3.1875,
      "learning_rate": 1.5883808292509432e-05,
      "loss": 0.8821,
      "step": 611250
    },
    {
      "epoch": 2.142313906500261,
      "grad_norm": 2.546875,
      "learning_rate": 1.588315926384573e-05,
      "loss": 0.8081,
      "step": 611260
    },
    {
      "epoch": 2.1423489540071565,
      "grad_norm": 2.265625,
      "learning_rate": 1.5882510235182028e-05,
      "loss": 0.8166,
      "step": 611270
    },
    {
      "epoch": 2.1423840015140523,
      "grad_norm": 2.953125,
      "learning_rate": 1.5881861206518326e-05,
      "loss": 0.8272,
      "step": 611280
    },
    {
      "epoch": 2.142419049020948,
      "grad_norm": 3.4375,
      "learning_rate": 1.5881212177854624e-05,
      "loss": 0.8412,
      "step": 611290
    },
    {
      "epoch": 2.1424540965278434,
      "grad_norm": 2.515625,
      "learning_rate": 1.5880563149190922e-05,
      "loss": 0.7424,
      "step": 611300
    },
    {
      "epoch": 2.142489144034739,
      "grad_norm": 2.703125,
      "learning_rate": 1.587991412052722e-05,
      "loss": 0.838,
      "step": 611310
    },
    {
      "epoch": 2.142524191541635,
      "grad_norm": 2.703125,
      "learning_rate": 1.5879265091863518e-05,
      "loss": 0.77,
      "step": 611320
    },
    {
      "epoch": 2.14255923904853,
      "grad_norm": 2.875,
      "learning_rate": 1.5878616063199816e-05,
      "loss": 0.8388,
      "step": 611330
    },
    {
      "epoch": 2.142594286555426,
      "grad_norm": 2.875,
      "learning_rate": 1.5877967034536114e-05,
      "loss": 0.8857,
      "step": 611340
    },
    {
      "epoch": 2.1426293340623213,
      "grad_norm": 2.890625,
      "learning_rate": 1.5877318005872412e-05,
      "loss": 0.7748,
      "step": 611350
    },
    {
      "epoch": 2.142664381569217,
      "grad_norm": 2.75,
      "learning_rate": 1.587666897720871e-05,
      "loss": 0.7343,
      "step": 611360
    },
    {
      "epoch": 2.1426994290761128,
      "grad_norm": 3.015625,
      "learning_rate": 1.5876019948545008e-05,
      "loss": 0.8319,
      "step": 611370
    },
    {
      "epoch": 2.142734476583008,
      "grad_norm": 2.734375,
      "learning_rate": 1.5875370919881306e-05,
      "loss": 0.8034,
      "step": 611380
    },
    {
      "epoch": 2.142769524089904,
      "grad_norm": 3.109375,
      "learning_rate": 1.5874721891217604e-05,
      "loss": 0.8021,
      "step": 611390
    },
    {
      "epoch": 2.1428045715967996,
      "grad_norm": 3.265625,
      "learning_rate": 1.5874072862553902e-05,
      "loss": 0.7662,
      "step": 611400
    },
    {
      "epoch": 2.142839619103695,
      "grad_norm": 3.21875,
      "learning_rate": 1.58734238338902e-05,
      "loss": 0.8886,
      "step": 611410
    },
    {
      "epoch": 2.1428746666105907,
      "grad_norm": 3.25,
      "learning_rate": 1.5872774805226498e-05,
      "loss": 0.7982,
      "step": 611420
    },
    {
      "epoch": 2.1429097141174864,
      "grad_norm": 3.171875,
      "learning_rate": 1.5872125776562796e-05,
      "loss": 0.842,
      "step": 611430
    },
    {
      "epoch": 2.1429447616243817,
      "grad_norm": 2.609375,
      "learning_rate": 1.5871476747899094e-05,
      "loss": 0.9349,
      "step": 611440
    },
    {
      "epoch": 2.1429798091312775,
      "grad_norm": 2.4375,
      "learning_rate": 1.5870827719235392e-05,
      "loss": 0.8434,
      "step": 611450
    },
    {
      "epoch": 2.143014856638173,
      "grad_norm": 2.390625,
      "learning_rate": 1.587017869057169e-05,
      "loss": 0.8206,
      "step": 611460
    },
    {
      "epoch": 2.1430499041450686,
      "grad_norm": 3.03125,
      "learning_rate": 1.5869529661907988e-05,
      "loss": 0.8711,
      "step": 611470
    },
    {
      "epoch": 2.1430849516519643,
      "grad_norm": 2.90625,
      "learning_rate": 1.5868880633244286e-05,
      "loss": 0.8315,
      "step": 611480
    },
    {
      "epoch": 2.1431199991588596,
      "grad_norm": 2.921875,
      "learning_rate": 1.5868231604580584e-05,
      "loss": 0.7661,
      "step": 611490
    },
    {
      "epoch": 2.1431550466657554,
      "grad_norm": 2.640625,
      "learning_rate": 1.5867582575916886e-05,
      "loss": 0.8135,
      "step": 611500
    },
    {
      "epoch": 2.143190094172651,
      "grad_norm": 3.28125,
      "learning_rate": 1.5866933547253184e-05,
      "loss": 0.9271,
      "step": 611510
    },
    {
      "epoch": 2.1432251416795465,
      "grad_norm": 2.8125,
      "learning_rate": 1.586628451858948e-05,
      "loss": 0.8539,
      "step": 611520
    },
    {
      "epoch": 2.1432601891864422,
      "grad_norm": 3.03125,
      "learning_rate": 1.586563548992578e-05,
      "loss": 0.7831,
      "step": 611530
    },
    {
      "epoch": 2.143295236693338,
      "grad_norm": 3.015625,
      "learning_rate": 1.5864986461262078e-05,
      "loss": 0.8144,
      "step": 611540
    },
    {
      "epoch": 2.1433302842002333,
      "grad_norm": 3.125,
      "learning_rate": 1.5864337432598376e-05,
      "loss": 0.7884,
      "step": 611550
    },
    {
      "epoch": 2.143365331707129,
      "grad_norm": 2.59375,
      "learning_rate": 1.5863688403934674e-05,
      "loss": 0.7588,
      "step": 611560
    },
    {
      "epoch": 2.1434003792140244,
      "grad_norm": 3.421875,
      "learning_rate": 1.586303937527097e-05,
      "loss": 0.9046,
      "step": 611570
    },
    {
      "epoch": 2.14343542672092,
      "grad_norm": 3.546875,
      "learning_rate": 1.5862390346607266e-05,
      "loss": 0.7852,
      "step": 611580
    },
    {
      "epoch": 2.143470474227816,
      "grad_norm": 3.0,
      "learning_rate": 1.5861741317943564e-05,
      "loss": 0.8125,
      "step": 611590
    },
    {
      "epoch": 2.143505521734711,
      "grad_norm": 2.8125,
      "learning_rate": 1.5861092289279862e-05,
      "loss": 0.8027,
      "step": 611600
    },
    {
      "epoch": 2.143540569241607,
      "grad_norm": 3.078125,
      "learning_rate": 1.586044326061616e-05,
      "loss": 0.8798,
      "step": 611610
    },
    {
      "epoch": 2.1435756167485027,
      "grad_norm": 2.6875,
      "learning_rate": 1.585979423195246e-05,
      "loss": 0.808,
      "step": 611620
    },
    {
      "epoch": 2.143610664255398,
      "grad_norm": 2.953125,
      "learning_rate": 1.585914520328876e-05,
      "loss": 0.7635,
      "step": 611630
    },
    {
      "epoch": 2.143645711762294,
      "grad_norm": 2.6875,
      "learning_rate": 1.5858496174625058e-05,
      "loss": 0.7827,
      "step": 611640
    },
    {
      "epoch": 2.1436807592691896,
      "grad_norm": 2.6875,
      "learning_rate": 1.5857847145961356e-05,
      "loss": 0.8516,
      "step": 611650
    },
    {
      "epoch": 2.143715806776085,
      "grad_norm": 2.859375,
      "learning_rate": 1.5857198117297654e-05,
      "loss": 0.8488,
      "step": 611660
    },
    {
      "epoch": 2.1437508542829806,
      "grad_norm": 2.359375,
      "learning_rate": 1.585654908863395e-05,
      "loss": 0.7959,
      "step": 611670
    },
    {
      "epoch": 2.1437859017898764,
      "grad_norm": 2.40625,
      "learning_rate": 1.585590005997025e-05,
      "loss": 0.8563,
      "step": 611680
    },
    {
      "epoch": 2.1438209492967717,
      "grad_norm": 2.6875,
      "learning_rate": 1.5855251031306548e-05,
      "loss": 0.8148,
      "step": 611690
    },
    {
      "epoch": 2.1438559968036675,
      "grad_norm": 2.890625,
      "learning_rate": 1.5854602002642846e-05,
      "loss": 0.8655,
      "step": 611700
    },
    {
      "epoch": 2.1438910443105628,
      "grad_norm": 2.703125,
      "learning_rate": 1.5853952973979144e-05,
      "loss": 0.805,
      "step": 611710
    },
    {
      "epoch": 2.1439260918174585,
      "grad_norm": 2.90625,
      "learning_rate": 1.585330394531544e-05,
      "loss": 0.7848,
      "step": 611720
    },
    {
      "epoch": 2.1439611393243543,
      "grad_norm": 2.8125,
      "learning_rate": 1.585265491665174e-05,
      "loss": 0.7606,
      "step": 611730
    },
    {
      "epoch": 2.1439961868312496,
      "grad_norm": 3.046875,
      "learning_rate": 1.5852005887988038e-05,
      "loss": 0.8387,
      "step": 611740
    },
    {
      "epoch": 2.1440312343381454,
      "grad_norm": 2.671875,
      "learning_rate": 1.5851356859324336e-05,
      "loss": 0.8175,
      "step": 611750
    },
    {
      "epoch": 2.144066281845041,
      "grad_norm": 3.578125,
      "learning_rate": 1.5850707830660634e-05,
      "loss": 0.7899,
      "step": 611760
    },
    {
      "epoch": 2.1441013293519364,
      "grad_norm": 2.5625,
      "learning_rate": 1.585005880199693e-05,
      "loss": 0.8217,
      "step": 611770
    },
    {
      "epoch": 2.144136376858832,
      "grad_norm": 3.296875,
      "learning_rate": 1.584940977333323e-05,
      "loss": 0.788,
      "step": 611780
    },
    {
      "epoch": 2.144171424365728,
      "grad_norm": 3.109375,
      "learning_rate": 1.5848760744669528e-05,
      "loss": 0.7863,
      "step": 611790
    },
    {
      "epoch": 2.1442064718726233,
      "grad_norm": 2.765625,
      "learning_rate": 1.5848111716005826e-05,
      "loss": 0.7577,
      "step": 611800
    },
    {
      "epoch": 2.144241519379519,
      "grad_norm": 3.046875,
      "learning_rate": 1.5847462687342124e-05,
      "loss": 0.8609,
      "step": 611810
    },
    {
      "epoch": 2.1442765668864148,
      "grad_norm": 3.15625,
      "learning_rate": 1.584681365867842e-05,
      "loss": 0.8656,
      "step": 611820
    },
    {
      "epoch": 2.14431161439331,
      "grad_norm": 3.0,
      "learning_rate": 1.584616463001472e-05,
      "loss": 0.7449,
      "step": 611830
    },
    {
      "epoch": 2.144346661900206,
      "grad_norm": 2.6875,
      "learning_rate": 1.5845515601351018e-05,
      "loss": 0.7807,
      "step": 611840
    },
    {
      "epoch": 2.144381709407101,
      "grad_norm": 2.578125,
      "learning_rate": 1.5844866572687316e-05,
      "loss": 0.7946,
      "step": 611850
    },
    {
      "epoch": 2.144416756913997,
      "grad_norm": 2.75,
      "learning_rate": 1.5844217544023614e-05,
      "loss": 0.8129,
      "step": 611860
    },
    {
      "epoch": 2.1444518044208927,
      "grad_norm": 2.703125,
      "learning_rate": 1.5843568515359915e-05,
      "loss": 0.8344,
      "step": 611870
    },
    {
      "epoch": 2.144486851927788,
      "grad_norm": 2.984375,
      "learning_rate": 1.5842919486696213e-05,
      "loss": 0.8069,
      "step": 611880
    },
    {
      "epoch": 2.1445218994346837,
      "grad_norm": 2.890625,
      "learning_rate": 1.584227045803251e-05,
      "loss": 0.8304,
      "step": 611890
    },
    {
      "epoch": 2.1445569469415795,
      "grad_norm": 2.71875,
      "learning_rate": 1.584162142936881e-05,
      "loss": 0.8777,
      "step": 611900
    },
    {
      "epoch": 2.144591994448475,
      "grad_norm": 2.640625,
      "learning_rate": 1.5840972400705107e-05,
      "loss": 0.8005,
      "step": 611910
    },
    {
      "epoch": 2.1446270419553706,
      "grad_norm": 2.75,
      "learning_rate": 1.5840323372041405e-05,
      "loss": 0.8243,
      "step": 611920
    },
    {
      "epoch": 2.1446620894622663,
      "grad_norm": 3.625,
      "learning_rate": 1.5839674343377703e-05,
      "loss": 0.9021,
      "step": 611930
    },
    {
      "epoch": 2.1446971369691616,
      "grad_norm": 2.734375,
      "learning_rate": 1.5839025314714e-05,
      "loss": 0.8626,
      "step": 611940
    },
    {
      "epoch": 2.1447321844760574,
      "grad_norm": 2.828125,
      "learning_rate": 1.5838376286050296e-05,
      "loss": 0.8344,
      "step": 611950
    },
    {
      "epoch": 2.1447672319829527,
      "grad_norm": 3.3125,
      "learning_rate": 1.5837727257386594e-05,
      "loss": 0.8624,
      "step": 611960
    },
    {
      "epoch": 2.1448022794898485,
      "grad_norm": 2.90625,
      "learning_rate": 1.583707822872289e-05,
      "loss": 0.7899,
      "step": 611970
    },
    {
      "epoch": 2.1448373269967442,
      "grad_norm": 3.03125,
      "learning_rate": 1.5836429200059193e-05,
      "loss": 0.7882,
      "step": 611980
    },
    {
      "epoch": 2.1448723745036395,
      "grad_norm": 3.125,
      "learning_rate": 1.583578017139549e-05,
      "loss": 0.8247,
      "step": 611990
    },
    {
      "epoch": 2.1449074220105353,
      "grad_norm": 2.90625,
      "learning_rate": 1.583513114273179e-05,
      "loss": 0.7945,
      "step": 612000
    },
    {
      "epoch": 2.144942469517431,
      "grad_norm": 2.875,
      "learning_rate": 1.5834482114068087e-05,
      "loss": 0.8681,
      "step": 612010
    },
    {
      "epoch": 2.1449775170243264,
      "grad_norm": 2.828125,
      "learning_rate": 1.5833833085404385e-05,
      "loss": 0.7706,
      "step": 612020
    },
    {
      "epoch": 2.145012564531222,
      "grad_norm": 2.875,
      "learning_rate": 1.5833184056740683e-05,
      "loss": 0.8915,
      "step": 612030
    },
    {
      "epoch": 2.145047612038118,
      "grad_norm": 2.71875,
      "learning_rate": 1.583253502807698e-05,
      "loss": 0.7783,
      "step": 612040
    },
    {
      "epoch": 2.145082659545013,
      "grad_norm": 2.921875,
      "learning_rate": 1.583188599941328e-05,
      "loss": 0.7833,
      "step": 612050
    },
    {
      "epoch": 2.145117707051909,
      "grad_norm": 3.0,
      "learning_rate": 1.5831236970749577e-05,
      "loss": 0.7693,
      "step": 612060
    },
    {
      "epoch": 2.1451527545588043,
      "grad_norm": 2.78125,
      "learning_rate": 1.5830587942085875e-05,
      "loss": 0.7146,
      "step": 612070
    },
    {
      "epoch": 2.1451878020657,
      "grad_norm": 2.796875,
      "learning_rate": 1.5829938913422173e-05,
      "loss": 0.783,
      "step": 612080
    },
    {
      "epoch": 2.145222849572596,
      "grad_norm": 2.9375,
      "learning_rate": 1.582928988475847e-05,
      "loss": 0.9043,
      "step": 612090
    },
    {
      "epoch": 2.145257897079491,
      "grad_norm": 2.4375,
      "learning_rate": 1.582864085609477e-05,
      "loss": 0.7594,
      "step": 612100
    },
    {
      "epoch": 2.145292944586387,
      "grad_norm": 3.0625,
      "learning_rate": 1.5827991827431067e-05,
      "loss": 0.7972,
      "step": 612110
    },
    {
      "epoch": 2.1453279920932826,
      "grad_norm": 2.609375,
      "learning_rate": 1.582734279876737e-05,
      "loss": 0.7618,
      "step": 612120
    },
    {
      "epoch": 2.145363039600178,
      "grad_norm": 2.859375,
      "learning_rate": 1.5826693770103666e-05,
      "loss": 0.833,
      "step": 612130
    },
    {
      "epoch": 2.1453980871070737,
      "grad_norm": 2.28125,
      "learning_rate": 1.582604474143996e-05,
      "loss": 0.833,
      "step": 612140
    },
    {
      "epoch": 2.1454331346139695,
      "grad_norm": 2.71875,
      "learning_rate": 1.582539571277626e-05,
      "loss": 0.7913,
      "step": 612150
    },
    {
      "epoch": 2.1454681821208648,
      "grad_norm": 2.3125,
      "learning_rate": 1.5824746684112557e-05,
      "loss": 0.7889,
      "step": 612160
    },
    {
      "epoch": 2.1455032296277605,
      "grad_norm": 3.28125,
      "learning_rate": 1.5824097655448855e-05,
      "loss": 0.7736,
      "step": 612170
    },
    {
      "epoch": 2.145538277134656,
      "grad_norm": 3.1875,
      "learning_rate": 1.5823448626785153e-05,
      "loss": 0.7887,
      "step": 612180
    },
    {
      "epoch": 2.1455733246415516,
      "grad_norm": 2.890625,
      "learning_rate": 1.582279959812145e-05,
      "loss": 0.793,
      "step": 612190
    },
    {
      "epoch": 2.1456083721484474,
      "grad_norm": 2.453125,
      "learning_rate": 1.582215056945775e-05,
      "loss": 0.7878,
      "step": 612200
    },
    {
      "epoch": 2.1456434196553427,
      "grad_norm": 2.484375,
      "learning_rate": 1.5821501540794047e-05,
      "loss": 0.7773,
      "step": 612210
    },
    {
      "epoch": 2.1456784671622384,
      "grad_norm": 2.734375,
      "learning_rate": 1.5820852512130345e-05,
      "loss": 0.7571,
      "step": 612220
    },
    {
      "epoch": 2.145713514669134,
      "grad_norm": 3.203125,
      "learning_rate": 1.5820203483466643e-05,
      "loss": 0.802,
      "step": 612230
    },
    {
      "epoch": 2.1457485621760295,
      "grad_norm": 2.71875,
      "learning_rate": 1.5819554454802944e-05,
      "loss": 0.917,
      "step": 612240
    },
    {
      "epoch": 2.1457836096829253,
      "grad_norm": 2.890625,
      "learning_rate": 1.5818905426139242e-05,
      "loss": 0.8205,
      "step": 612250
    },
    {
      "epoch": 2.145818657189821,
      "grad_norm": 2.671875,
      "learning_rate": 1.581825639747554e-05,
      "loss": 0.7762,
      "step": 612260
    },
    {
      "epoch": 2.1458537046967163,
      "grad_norm": 2.84375,
      "learning_rate": 1.581760736881184e-05,
      "loss": 0.8123,
      "step": 612270
    },
    {
      "epoch": 2.145888752203612,
      "grad_norm": 2.90625,
      "learning_rate": 1.5816958340148136e-05,
      "loss": 0.8556,
      "step": 612280
    },
    {
      "epoch": 2.1459237997105074,
      "grad_norm": 2.828125,
      "learning_rate": 1.5816309311484434e-05,
      "loss": 0.8236,
      "step": 612290
    },
    {
      "epoch": 2.145958847217403,
      "grad_norm": 2.828125,
      "learning_rate": 1.5815660282820732e-05,
      "loss": 0.7922,
      "step": 612300
    },
    {
      "epoch": 2.145993894724299,
      "grad_norm": 3.15625,
      "learning_rate": 1.581501125415703e-05,
      "loss": 0.791,
      "step": 612310
    },
    {
      "epoch": 2.1460289422311942,
      "grad_norm": 3.109375,
      "learning_rate": 1.581436222549333e-05,
      "loss": 0.7354,
      "step": 612320
    },
    {
      "epoch": 2.14606398973809,
      "grad_norm": 2.359375,
      "learning_rate": 1.5813713196829623e-05,
      "loss": 0.82,
      "step": 612330
    },
    {
      "epoch": 2.1460990372449857,
      "grad_norm": 2.453125,
      "learning_rate": 1.581306416816592e-05,
      "loss": 0.756,
      "step": 612340
    },
    {
      "epoch": 2.146134084751881,
      "grad_norm": 2.453125,
      "learning_rate": 1.5812415139502222e-05,
      "loss": 0.7474,
      "step": 612350
    },
    {
      "epoch": 2.146169132258777,
      "grad_norm": 2.859375,
      "learning_rate": 1.581176611083852e-05,
      "loss": 0.8325,
      "step": 612360
    },
    {
      "epoch": 2.1462041797656726,
      "grad_norm": 2.890625,
      "learning_rate": 1.581111708217482e-05,
      "loss": 0.8343,
      "step": 612370
    },
    {
      "epoch": 2.146239227272568,
      "grad_norm": 2.921875,
      "learning_rate": 1.5810468053511116e-05,
      "loss": 0.9093,
      "step": 612380
    },
    {
      "epoch": 2.1462742747794636,
      "grad_norm": 2.703125,
      "learning_rate": 1.5809819024847414e-05,
      "loss": 0.7874,
      "step": 612390
    },
    {
      "epoch": 2.146309322286359,
      "grad_norm": 3.1875,
      "learning_rate": 1.5809169996183712e-05,
      "loss": 0.7866,
      "step": 612400
    },
    {
      "epoch": 2.1463443697932547,
      "grad_norm": 3.328125,
      "learning_rate": 1.580852096752001e-05,
      "loss": 0.8494,
      "step": 612410
    },
    {
      "epoch": 2.1463794173001505,
      "grad_norm": 3.40625,
      "learning_rate": 1.580787193885631e-05,
      "loss": 0.8103,
      "step": 612420
    },
    {
      "epoch": 2.146414464807046,
      "grad_norm": 2.828125,
      "learning_rate": 1.5807222910192606e-05,
      "loss": 0.8316,
      "step": 612430
    },
    {
      "epoch": 2.1464495123139415,
      "grad_norm": 2.828125,
      "learning_rate": 1.5806573881528904e-05,
      "loss": 0.7797,
      "step": 612440
    },
    {
      "epoch": 2.1464845598208373,
      "grad_norm": 2.84375,
      "learning_rate": 1.5805924852865202e-05,
      "loss": 0.8318,
      "step": 612450
    },
    {
      "epoch": 2.1465196073277326,
      "grad_norm": 3.078125,
      "learning_rate": 1.58052758242015e-05,
      "loss": 0.8618,
      "step": 612460
    },
    {
      "epoch": 2.1465546548346284,
      "grad_norm": 2.6875,
      "learning_rate": 1.58046267955378e-05,
      "loss": 0.7346,
      "step": 612470
    },
    {
      "epoch": 2.146589702341524,
      "grad_norm": 3.03125,
      "learning_rate": 1.5803977766874096e-05,
      "loss": 0.8161,
      "step": 612480
    },
    {
      "epoch": 2.1466247498484194,
      "grad_norm": 2.796875,
      "learning_rate": 1.5803328738210398e-05,
      "loss": 0.8248,
      "step": 612490
    },
    {
      "epoch": 2.146659797355315,
      "grad_norm": 3.28125,
      "learning_rate": 1.5802679709546696e-05,
      "loss": 0.8527,
      "step": 612500
    },
    {
      "epoch": 2.1466948448622105,
      "grad_norm": 2.9375,
      "learning_rate": 1.5802030680882994e-05,
      "loss": 0.9023,
      "step": 612510
    },
    {
      "epoch": 2.1467298923691063,
      "grad_norm": 2.828125,
      "learning_rate": 1.580138165221929e-05,
      "loss": 0.859,
      "step": 612520
    },
    {
      "epoch": 2.146764939876002,
      "grad_norm": 3.0,
      "learning_rate": 1.5800732623555586e-05,
      "loss": 0.861,
      "step": 612530
    },
    {
      "epoch": 2.1467999873828973,
      "grad_norm": 3.15625,
      "learning_rate": 1.5800083594891884e-05,
      "loss": 0.8529,
      "step": 612540
    },
    {
      "epoch": 2.146835034889793,
      "grad_norm": 3.203125,
      "learning_rate": 1.5799434566228182e-05,
      "loss": 0.811,
      "step": 612550
    },
    {
      "epoch": 2.146870082396689,
      "grad_norm": 3.296875,
      "learning_rate": 1.579878553756448e-05,
      "loss": 0.8487,
      "step": 612560
    },
    {
      "epoch": 2.146905129903584,
      "grad_norm": 2.28125,
      "learning_rate": 1.579813650890078e-05,
      "loss": 0.8299,
      "step": 612570
    },
    {
      "epoch": 2.14694017741048,
      "grad_norm": 2.5,
      "learning_rate": 1.5797487480237076e-05,
      "loss": 0.79,
      "step": 612580
    },
    {
      "epoch": 2.1469752249173757,
      "grad_norm": 2.84375,
      "learning_rate": 1.5796838451573374e-05,
      "loss": 0.806,
      "step": 612590
    },
    {
      "epoch": 2.147010272424271,
      "grad_norm": 2.796875,
      "learning_rate": 1.5796189422909676e-05,
      "loss": 0.8239,
      "step": 612600
    },
    {
      "epoch": 2.1470453199311668,
      "grad_norm": 3.046875,
      "learning_rate": 1.5795540394245974e-05,
      "loss": 0.8153,
      "step": 612610
    },
    {
      "epoch": 2.147080367438062,
      "grad_norm": 2.59375,
      "learning_rate": 1.5794891365582272e-05,
      "loss": 0.8249,
      "step": 612620
    },
    {
      "epoch": 2.147115414944958,
      "grad_norm": 3.0625,
      "learning_rate": 1.579424233691857e-05,
      "loss": 0.7335,
      "step": 612630
    },
    {
      "epoch": 2.1471504624518536,
      "grad_norm": 2.71875,
      "learning_rate": 1.5793593308254868e-05,
      "loss": 0.906,
      "step": 612640
    },
    {
      "epoch": 2.147185509958749,
      "grad_norm": 2.484375,
      "learning_rate": 1.5792944279591166e-05,
      "loss": 0.8125,
      "step": 612650
    },
    {
      "epoch": 2.1472205574656447,
      "grad_norm": 3.578125,
      "learning_rate": 1.5792295250927464e-05,
      "loss": 0.8935,
      "step": 612660
    },
    {
      "epoch": 2.1472556049725404,
      "grad_norm": 2.703125,
      "learning_rate": 1.5791646222263762e-05,
      "loss": 0.7493,
      "step": 612670
    },
    {
      "epoch": 2.1472906524794357,
      "grad_norm": 2.765625,
      "learning_rate": 1.579099719360006e-05,
      "loss": 0.7823,
      "step": 612680
    },
    {
      "epoch": 2.1473256999863315,
      "grad_norm": 2.75,
      "learning_rate": 1.5790348164936358e-05,
      "loss": 0.8146,
      "step": 612690
    },
    {
      "epoch": 2.1473607474932273,
      "grad_norm": 2.640625,
      "learning_rate": 1.5789699136272652e-05,
      "loss": 0.8151,
      "step": 612700
    },
    {
      "epoch": 2.1473957950001226,
      "grad_norm": 2.921875,
      "learning_rate": 1.578905010760895e-05,
      "loss": 0.848,
      "step": 612710
    },
    {
      "epoch": 2.1474308425070183,
      "grad_norm": 3.40625,
      "learning_rate": 1.5788401078945252e-05,
      "loss": 0.8121,
      "step": 612720
    },
    {
      "epoch": 2.1474658900139136,
      "grad_norm": 2.84375,
      "learning_rate": 1.578775205028155e-05,
      "loss": 0.8501,
      "step": 612730
    },
    {
      "epoch": 2.1475009375208094,
      "grad_norm": 2.703125,
      "learning_rate": 1.5787103021617848e-05,
      "loss": 0.8006,
      "step": 612740
    },
    {
      "epoch": 2.147535985027705,
      "grad_norm": 3.203125,
      "learning_rate": 1.5786453992954146e-05,
      "loss": 0.8293,
      "step": 612750
    },
    {
      "epoch": 2.1475710325346005,
      "grad_norm": 3.15625,
      "learning_rate": 1.5785804964290444e-05,
      "loss": 0.8346,
      "step": 612760
    },
    {
      "epoch": 2.1476060800414962,
      "grad_norm": 2.5625,
      "learning_rate": 1.5785155935626742e-05,
      "loss": 0.7992,
      "step": 612770
    },
    {
      "epoch": 2.147641127548392,
      "grad_norm": 2.5625,
      "learning_rate": 1.578450690696304e-05,
      "loss": 0.8856,
      "step": 612780
    },
    {
      "epoch": 2.1476761750552873,
      "grad_norm": 2.484375,
      "learning_rate": 1.5783857878299338e-05,
      "loss": 0.7281,
      "step": 612790
    },
    {
      "epoch": 2.147711222562183,
      "grad_norm": 3.34375,
      "learning_rate": 1.5783208849635636e-05,
      "loss": 0.8913,
      "step": 612800
    },
    {
      "epoch": 2.147746270069079,
      "grad_norm": 2.609375,
      "learning_rate": 1.5782559820971934e-05,
      "loss": 0.8492,
      "step": 612810
    },
    {
      "epoch": 2.147781317575974,
      "grad_norm": 2.921875,
      "learning_rate": 1.5781910792308232e-05,
      "loss": 0.7889,
      "step": 612820
    },
    {
      "epoch": 2.14781636508287,
      "grad_norm": 2.828125,
      "learning_rate": 1.578126176364453e-05,
      "loss": 0.9083,
      "step": 612830
    },
    {
      "epoch": 2.147851412589765,
      "grad_norm": 3.03125,
      "learning_rate": 1.5780612734980828e-05,
      "loss": 0.8868,
      "step": 612840
    },
    {
      "epoch": 2.147886460096661,
      "grad_norm": 2.890625,
      "learning_rate": 1.5779963706317126e-05,
      "loss": 0.8276,
      "step": 612850
    },
    {
      "epoch": 2.1479215076035567,
      "grad_norm": 2.796875,
      "learning_rate": 1.5779314677653427e-05,
      "loss": 0.8494,
      "step": 612860
    },
    {
      "epoch": 2.147956555110452,
      "grad_norm": 3.390625,
      "learning_rate": 1.5778665648989725e-05,
      "loss": 0.8989,
      "step": 612870
    },
    {
      "epoch": 2.147991602617348,
      "grad_norm": 3.59375,
      "learning_rate": 1.5778016620326023e-05,
      "loss": 0.86,
      "step": 612880
    },
    {
      "epoch": 2.1480266501242435,
      "grad_norm": 2.6875,
      "learning_rate": 1.5777367591662318e-05,
      "loss": 0.7054,
      "step": 612890
    },
    {
      "epoch": 2.148061697631139,
      "grad_norm": 2.734375,
      "learning_rate": 1.5776718562998616e-05,
      "loss": 0.8034,
      "step": 612900
    },
    {
      "epoch": 2.1480967451380346,
      "grad_norm": 3.046875,
      "learning_rate": 1.5776069534334914e-05,
      "loss": 0.7881,
      "step": 612910
    },
    {
      "epoch": 2.1481317926449304,
      "grad_norm": 3.234375,
      "learning_rate": 1.5775420505671212e-05,
      "loss": 0.8031,
      "step": 612920
    },
    {
      "epoch": 2.1481668401518257,
      "grad_norm": 3.0,
      "learning_rate": 1.577477147700751e-05,
      "loss": 0.8128,
      "step": 612930
    },
    {
      "epoch": 2.1482018876587214,
      "grad_norm": 3.109375,
      "learning_rate": 1.5774122448343808e-05,
      "loss": 0.8929,
      "step": 612940
    },
    {
      "epoch": 2.148236935165617,
      "grad_norm": 3.109375,
      "learning_rate": 1.5773473419680106e-05,
      "loss": 0.7915,
      "step": 612950
    },
    {
      "epoch": 2.1482719826725125,
      "grad_norm": 2.96875,
      "learning_rate": 1.5772824391016404e-05,
      "loss": 0.7788,
      "step": 612960
    },
    {
      "epoch": 2.1483070301794083,
      "grad_norm": 2.515625,
      "learning_rate": 1.5772175362352705e-05,
      "loss": 0.8609,
      "step": 612970
    },
    {
      "epoch": 2.1483420776863036,
      "grad_norm": 2.703125,
      "learning_rate": 1.5771526333689003e-05,
      "loss": 0.7703,
      "step": 612980
    },
    {
      "epoch": 2.1483771251931993,
      "grad_norm": 3.03125,
      "learning_rate": 1.57708773050253e-05,
      "loss": 0.7844,
      "step": 612990
    },
    {
      "epoch": 2.148412172700095,
      "grad_norm": 3.109375,
      "learning_rate": 1.57702282763616e-05,
      "loss": 0.762,
      "step": 613000
    },
    {
      "epoch": 2.1484472202069904,
      "grad_norm": 2.828125,
      "learning_rate": 1.5769579247697897e-05,
      "loss": 0.8317,
      "step": 613010
    },
    {
      "epoch": 2.148482267713886,
      "grad_norm": 2.75,
      "learning_rate": 1.5768930219034195e-05,
      "loss": 0.8249,
      "step": 613020
    },
    {
      "epoch": 2.148517315220782,
      "grad_norm": 3.359375,
      "learning_rate": 1.5768281190370493e-05,
      "loss": 0.8872,
      "step": 613030
    },
    {
      "epoch": 2.1485523627276772,
      "grad_norm": 2.90625,
      "learning_rate": 1.576763216170679e-05,
      "loss": 0.8271,
      "step": 613040
    },
    {
      "epoch": 2.148587410234573,
      "grad_norm": 3.203125,
      "learning_rate": 1.576698313304309e-05,
      "loss": 0.8918,
      "step": 613050
    },
    {
      "epoch": 2.1486224577414688,
      "grad_norm": 2.671875,
      "learning_rate": 1.5766334104379387e-05,
      "loss": 0.8553,
      "step": 613060
    },
    {
      "epoch": 2.148657505248364,
      "grad_norm": 2.8125,
      "learning_rate": 1.5765685075715685e-05,
      "loss": 0.8123,
      "step": 613070
    },
    {
      "epoch": 2.14869255275526,
      "grad_norm": 3.078125,
      "learning_rate": 1.5765036047051983e-05,
      "loss": 0.7134,
      "step": 613080
    },
    {
      "epoch": 2.148727600262155,
      "grad_norm": 2.265625,
      "learning_rate": 1.576438701838828e-05,
      "loss": 0.7725,
      "step": 613090
    },
    {
      "epoch": 2.148762647769051,
      "grad_norm": 3.125,
      "learning_rate": 1.576373798972458e-05,
      "loss": 0.8438,
      "step": 613100
    },
    {
      "epoch": 2.1487976952759467,
      "grad_norm": 2.90625,
      "learning_rate": 1.5763088961060877e-05,
      "loss": 0.7629,
      "step": 613110
    },
    {
      "epoch": 2.148832742782842,
      "grad_norm": 2.546875,
      "learning_rate": 1.5762439932397175e-05,
      "loss": 0.8125,
      "step": 613120
    },
    {
      "epoch": 2.1488677902897377,
      "grad_norm": 2.875,
      "learning_rate": 1.5761790903733473e-05,
      "loss": 0.8286,
      "step": 613130
    },
    {
      "epoch": 2.1489028377966335,
      "grad_norm": 2.765625,
      "learning_rate": 1.576114187506977e-05,
      "loss": 0.8107,
      "step": 613140
    },
    {
      "epoch": 2.148937885303529,
      "grad_norm": 2.78125,
      "learning_rate": 1.576049284640607e-05,
      "loss": 0.8099,
      "step": 613150
    },
    {
      "epoch": 2.1489729328104246,
      "grad_norm": 2.9375,
      "learning_rate": 1.5759843817742367e-05,
      "loss": 0.8378,
      "step": 613160
    },
    {
      "epoch": 2.1490079803173203,
      "grad_norm": 2.96875,
      "learning_rate": 1.5759194789078665e-05,
      "loss": 0.8607,
      "step": 613170
    },
    {
      "epoch": 2.1490430278242156,
      "grad_norm": 2.90625,
      "learning_rate": 1.5758545760414963e-05,
      "loss": 0.877,
      "step": 613180
    },
    {
      "epoch": 2.1490780753311114,
      "grad_norm": 3.171875,
      "learning_rate": 1.575789673175126e-05,
      "loss": 0.7947,
      "step": 613190
    },
    {
      "epoch": 2.149113122838007,
      "grad_norm": 3.21875,
      "learning_rate": 1.575724770308756e-05,
      "loss": 0.7999,
      "step": 613200
    },
    {
      "epoch": 2.1491481703449025,
      "grad_norm": 2.90625,
      "learning_rate": 1.5756598674423857e-05,
      "loss": 0.7757,
      "step": 613210
    },
    {
      "epoch": 2.1491832178517982,
      "grad_norm": 3.03125,
      "learning_rate": 1.575594964576016e-05,
      "loss": 0.7699,
      "step": 613220
    },
    {
      "epoch": 2.1492182653586935,
      "grad_norm": 2.296875,
      "learning_rate": 1.5755300617096456e-05,
      "loss": 0.8412,
      "step": 613230
    },
    {
      "epoch": 2.1492533128655893,
      "grad_norm": 3.34375,
      "learning_rate": 1.5754651588432754e-05,
      "loss": 0.7341,
      "step": 613240
    },
    {
      "epoch": 2.149288360372485,
      "grad_norm": 3.0,
      "learning_rate": 1.5754002559769052e-05,
      "loss": 0.7738,
      "step": 613250
    },
    {
      "epoch": 2.1493234078793804,
      "grad_norm": 3.078125,
      "learning_rate": 1.575335353110535e-05,
      "loss": 0.8329,
      "step": 613260
    },
    {
      "epoch": 2.149358455386276,
      "grad_norm": 2.9375,
      "learning_rate": 1.5752704502441645e-05,
      "loss": 0.8288,
      "step": 613270
    },
    {
      "epoch": 2.149393502893172,
      "grad_norm": 2.765625,
      "learning_rate": 1.5752055473777943e-05,
      "loss": 0.8754,
      "step": 613280
    },
    {
      "epoch": 2.149428550400067,
      "grad_norm": 3.1875,
      "learning_rate": 1.575140644511424e-05,
      "loss": 0.7689,
      "step": 613290
    },
    {
      "epoch": 2.149463597906963,
      "grad_norm": 2.59375,
      "learning_rate": 1.575075741645054e-05,
      "loss": 0.7229,
      "step": 613300
    },
    {
      "epoch": 2.1494986454138587,
      "grad_norm": 3.046875,
      "learning_rate": 1.5750108387786837e-05,
      "loss": 0.8399,
      "step": 613310
    },
    {
      "epoch": 2.149533692920754,
      "grad_norm": 3.046875,
      "learning_rate": 1.5749459359123135e-05,
      "loss": 0.8306,
      "step": 613320
    },
    {
      "epoch": 2.14956874042765,
      "grad_norm": 2.921875,
      "learning_rate": 1.5748810330459433e-05,
      "loss": 0.8005,
      "step": 613330
    },
    {
      "epoch": 2.149603787934545,
      "grad_norm": 2.75,
      "learning_rate": 1.5748161301795734e-05,
      "loss": 0.7867,
      "step": 613340
    },
    {
      "epoch": 2.149638835441441,
      "grad_norm": 3.09375,
      "learning_rate": 1.5747512273132032e-05,
      "loss": 0.8334,
      "step": 613350
    },
    {
      "epoch": 2.1496738829483366,
      "grad_norm": 3.109375,
      "learning_rate": 1.574686324446833e-05,
      "loss": 0.7925,
      "step": 613360
    },
    {
      "epoch": 2.149708930455232,
      "grad_norm": 2.6875,
      "learning_rate": 1.574621421580463e-05,
      "loss": 0.8456,
      "step": 613370
    },
    {
      "epoch": 2.1497439779621277,
      "grad_norm": 2.96875,
      "learning_rate": 1.5745565187140926e-05,
      "loss": 0.8308,
      "step": 613380
    },
    {
      "epoch": 2.1497790254690234,
      "grad_norm": 2.21875,
      "learning_rate": 1.5744916158477224e-05,
      "loss": 0.834,
      "step": 613390
    },
    {
      "epoch": 2.1498140729759188,
      "grad_norm": 3.3125,
      "learning_rate": 1.5744267129813522e-05,
      "loss": 0.8335,
      "step": 613400
    },
    {
      "epoch": 2.1498491204828145,
      "grad_norm": 3.03125,
      "learning_rate": 1.574361810114982e-05,
      "loss": 0.8037,
      "step": 613410
    },
    {
      "epoch": 2.1498841679897103,
      "grad_norm": 2.671875,
      "learning_rate": 1.574296907248612e-05,
      "loss": 0.8613,
      "step": 613420
    },
    {
      "epoch": 2.1499192154966056,
      "grad_norm": 2.6875,
      "learning_rate": 1.5742320043822416e-05,
      "loss": 0.7712,
      "step": 613430
    },
    {
      "epoch": 2.1499542630035013,
      "grad_norm": 2.9375,
      "learning_rate": 1.5741671015158714e-05,
      "loss": 0.8088,
      "step": 613440
    },
    {
      "epoch": 2.1499893105103967,
      "grad_norm": 2.578125,
      "learning_rate": 1.5741021986495012e-05,
      "loss": 0.7461,
      "step": 613450
    },
    {
      "epoch": 2.1500243580172924,
      "grad_norm": 3.21875,
      "learning_rate": 1.574037295783131e-05,
      "loss": 0.793,
      "step": 613460
    },
    {
      "epoch": 2.150059405524188,
      "grad_norm": 2.890625,
      "learning_rate": 1.573972392916761e-05,
      "loss": 0.7532,
      "step": 613470
    },
    {
      "epoch": 2.1500944530310835,
      "grad_norm": 2.78125,
      "learning_rate": 1.5739074900503906e-05,
      "loss": 0.7533,
      "step": 613480
    },
    {
      "epoch": 2.1501295005379792,
      "grad_norm": 3.1875,
      "learning_rate": 1.5738425871840204e-05,
      "loss": 0.8087,
      "step": 613490
    },
    {
      "epoch": 2.150164548044875,
      "grad_norm": 3.484375,
      "learning_rate": 1.5737776843176502e-05,
      "loss": 0.8729,
      "step": 613500
    },
    {
      "epoch": 2.1501995955517703,
      "grad_norm": 3.09375,
      "learning_rate": 1.57371278145128e-05,
      "loss": 0.7949,
      "step": 613510
    },
    {
      "epoch": 2.150234643058666,
      "grad_norm": 2.59375,
      "learning_rate": 1.57364787858491e-05,
      "loss": 0.728,
      "step": 613520
    },
    {
      "epoch": 2.150269690565562,
      "grad_norm": 2.875,
      "learning_rate": 1.5735829757185396e-05,
      "loss": 0.849,
      "step": 613530
    },
    {
      "epoch": 2.150304738072457,
      "grad_norm": 2.9375,
      "learning_rate": 1.5735180728521694e-05,
      "loss": 0.7321,
      "step": 613540
    },
    {
      "epoch": 2.150339785579353,
      "grad_norm": 2.90625,
      "learning_rate": 1.5734531699857992e-05,
      "loss": 0.8038,
      "step": 613550
    },
    {
      "epoch": 2.150374833086248,
      "grad_norm": 2.578125,
      "learning_rate": 1.573388267119429e-05,
      "loss": 0.8324,
      "step": 613560
    },
    {
      "epoch": 2.150409880593144,
      "grad_norm": 2.90625,
      "learning_rate": 1.573323364253059e-05,
      "loss": 0.7592,
      "step": 613570
    },
    {
      "epoch": 2.1504449281000397,
      "grad_norm": 3.421875,
      "learning_rate": 1.5732584613866886e-05,
      "loss": 0.7434,
      "step": 613580
    },
    {
      "epoch": 2.150479975606935,
      "grad_norm": 2.546875,
      "learning_rate": 1.5731935585203188e-05,
      "loss": 0.9343,
      "step": 613590
    },
    {
      "epoch": 2.150515023113831,
      "grad_norm": 3.40625,
      "learning_rate": 1.5731286556539486e-05,
      "loss": 0.7636,
      "step": 613600
    },
    {
      "epoch": 2.1505500706207266,
      "grad_norm": 2.59375,
      "learning_rate": 1.5730637527875784e-05,
      "loss": 0.8375,
      "step": 613610
    },
    {
      "epoch": 2.150585118127622,
      "grad_norm": 3.046875,
      "learning_rate": 1.5729988499212082e-05,
      "loss": 0.7529,
      "step": 613620
    },
    {
      "epoch": 2.1506201656345176,
      "grad_norm": 2.828125,
      "learning_rate": 1.572933947054838e-05,
      "loss": 0.8328,
      "step": 613630
    },
    {
      "epoch": 2.1506552131414134,
      "grad_norm": 3.765625,
      "learning_rate": 1.5728690441884674e-05,
      "loss": 0.8543,
      "step": 613640
    },
    {
      "epoch": 2.1506902606483087,
      "grad_norm": 2.8125,
      "learning_rate": 1.5728041413220972e-05,
      "loss": 0.8089,
      "step": 613650
    },
    {
      "epoch": 2.1507253081552045,
      "grad_norm": 3.1875,
      "learning_rate": 1.572739238455727e-05,
      "loss": 0.822,
      "step": 613660
    },
    {
      "epoch": 2.1507603556620998,
      "grad_norm": 3.546875,
      "learning_rate": 1.572674335589357e-05,
      "loss": 0.8853,
      "step": 613670
    },
    {
      "epoch": 2.1507954031689955,
      "grad_norm": 3.15625,
      "learning_rate": 1.5726094327229866e-05,
      "loss": 0.885,
      "step": 613680
    },
    {
      "epoch": 2.1508304506758913,
      "grad_norm": 2.859375,
      "learning_rate": 1.5725445298566164e-05,
      "loss": 0.7617,
      "step": 613690
    },
    {
      "epoch": 2.1508654981827866,
      "grad_norm": 3.09375,
      "learning_rate": 1.5724796269902466e-05,
      "loss": 0.7979,
      "step": 613700
    },
    {
      "epoch": 2.1509005456896824,
      "grad_norm": 2.984375,
      "learning_rate": 1.5724147241238764e-05,
      "loss": 0.7526,
      "step": 613710
    },
    {
      "epoch": 2.150935593196578,
      "grad_norm": 2.75,
      "learning_rate": 1.5723498212575062e-05,
      "loss": 0.7854,
      "step": 613720
    },
    {
      "epoch": 2.1509706407034734,
      "grad_norm": 2.5,
      "learning_rate": 1.572284918391136e-05,
      "loss": 0.772,
      "step": 613730
    },
    {
      "epoch": 2.151005688210369,
      "grad_norm": 3.0625,
      "learning_rate": 1.5722200155247658e-05,
      "loss": 0.8189,
      "step": 613740
    },
    {
      "epoch": 2.151040735717265,
      "grad_norm": 2.390625,
      "learning_rate": 1.5721551126583956e-05,
      "loss": 0.8148,
      "step": 613750
    },
    {
      "epoch": 2.1510757832241603,
      "grad_norm": 2.65625,
      "learning_rate": 1.5720902097920254e-05,
      "loss": 0.8483,
      "step": 613760
    },
    {
      "epoch": 2.151110830731056,
      "grad_norm": 3.109375,
      "learning_rate": 1.5720253069256552e-05,
      "loss": 0.8098,
      "step": 613770
    },
    {
      "epoch": 2.1511458782379513,
      "grad_norm": 3.03125,
      "learning_rate": 1.571960404059285e-05,
      "loss": 0.773,
      "step": 613780
    },
    {
      "epoch": 2.151180925744847,
      "grad_norm": 2.953125,
      "learning_rate": 1.5718955011929148e-05,
      "loss": 0.807,
      "step": 613790
    },
    {
      "epoch": 2.151215973251743,
      "grad_norm": 3.3125,
      "learning_rate": 1.5718305983265446e-05,
      "loss": 0.7657,
      "step": 613800
    },
    {
      "epoch": 2.151251020758638,
      "grad_norm": 2.546875,
      "learning_rate": 1.5717656954601744e-05,
      "loss": 0.7407,
      "step": 613810
    },
    {
      "epoch": 2.151286068265534,
      "grad_norm": 3.0,
      "learning_rate": 1.5717007925938042e-05,
      "loss": 0.8254,
      "step": 613820
    },
    {
      "epoch": 2.1513211157724297,
      "grad_norm": 3.328125,
      "learning_rate": 1.571635889727434e-05,
      "loss": 0.8763,
      "step": 613830
    },
    {
      "epoch": 2.151356163279325,
      "grad_norm": 3.03125,
      "learning_rate": 1.5715709868610638e-05,
      "loss": 0.8117,
      "step": 613840
    },
    {
      "epoch": 2.1513912107862208,
      "grad_norm": 3.234375,
      "learning_rate": 1.5715060839946936e-05,
      "loss": 0.8225,
      "step": 613850
    },
    {
      "epoch": 2.1514262582931165,
      "grad_norm": 2.796875,
      "learning_rate": 1.5714411811283234e-05,
      "loss": 0.7798,
      "step": 613860
    },
    {
      "epoch": 2.151461305800012,
      "grad_norm": 2.90625,
      "learning_rate": 1.5713762782619532e-05,
      "loss": 0.8299,
      "step": 613870
    },
    {
      "epoch": 2.1514963533069076,
      "grad_norm": 2.5625,
      "learning_rate": 1.571311375395583e-05,
      "loss": 0.8108,
      "step": 613880
    },
    {
      "epoch": 2.151531400813803,
      "grad_norm": 2.75,
      "learning_rate": 1.5712464725292128e-05,
      "loss": 0.8226,
      "step": 613890
    },
    {
      "epoch": 2.1515664483206987,
      "grad_norm": 2.71875,
      "learning_rate": 1.5711815696628426e-05,
      "loss": 0.8848,
      "step": 613900
    },
    {
      "epoch": 2.1516014958275944,
      "grad_norm": 3.1875,
      "learning_rate": 1.5711166667964724e-05,
      "loss": 0.8729,
      "step": 613910
    },
    {
      "epoch": 2.1516365433344897,
      "grad_norm": 3.484375,
      "learning_rate": 1.5710517639301022e-05,
      "loss": 0.7968,
      "step": 613920
    },
    {
      "epoch": 2.1516715908413855,
      "grad_norm": 2.765625,
      "learning_rate": 1.570986861063732e-05,
      "loss": 0.7493,
      "step": 613930
    },
    {
      "epoch": 2.1517066383482812,
      "grad_norm": 3.203125,
      "learning_rate": 1.5709219581973618e-05,
      "loss": 0.8959,
      "step": 613940
    },
    {
      "epoch": 2.1517416858551766,
      "grad_norm": 2.90625,
      "learning_rate": 1.570857055330992e-05,
      "loss": 0.7673,
      "step": 613950
    },
    {
      "epoch": 2.1517767333620723,
      "grad_norm": 2.828125,
      "learning_rate": 1.5707921524646217e-05,
      "loss": 0.8437,
      "step": 613960
    },
    {
      "epoch": 2.151811780868968,
      "grad_norm": 2.40625,
      "learning_rate": 1.5707272495982515e-05,
      "loss": 0.812,
      "step": 613970
    },
    {
      "epoch": 2.1518468283758634,
      "grad_norm": 2.90625,
      "learning_rate": 1.5706623467318813e-05,
      "loss": 0.9058,
      "step": 613980
    },
    {
      "epoch": 2.151881875882759,
      "grad_norm": 2.984375,
      "learning_rate": 1.570597443865511e-05,
      "loss": 0.8281,
      "step": 613990
    },
    {
      "epoch": 2.1519169233896545,
      "grad_norm": 2.78125,
      "learning_rate": 1.570532540999141e-05,
      "loss": 0.8474,
      "step": 614000
    },
    {
      "epoch": 2.15195197089655,
      "grad_norm": 2.609375,
      "learning_rate": 1.5704676381327707e-05,
      "loss": 0.7922,
      "step": 614010
    },
    {
      "epoch": 2.151987018403446,
      "grad_norm": 2.6875,
      "learning_rate": 1.5704027352664002e-05,
      "loss": 0.8127,
      "step": 614020
    },
    {
      "epoch": 2.1520220659103413,
      "grad_norm": 2.78125,
      "learning_rate": 1.57033783240003e-05,
      "loss": 0.8084,
      "step": 614030
    },
    {
      "epoch": 2.152057113417237,
      "grad_norm": 3.203125,
      "learning_rate": 1.5702729295336598e-05,
      "loss": 0.8708,
      "step": 614040
    },
    {
      "epoch": 2.152092160924133,
      "grad_norm": 3.0,
      "learning_rate": 1.5702080266672896e-05,
      "loss": 0.8484,
      "step": 614050
    },
    {
      "epoch": 2.152127208431028,
      "grad_norm": 2.984375,
      "learning_rate": 1.5701431238009194e-05,
      "loss": 0.7652,
      "step": 614060
    },
    {
      "epoch": 2.152162255937924,
      "grad_norm": 3.15625,
      "learning_rate": 1.5700782209345495e-05,
      "loss": 0.8241,
      "step": 614070
    },
    {
      "epoch": 2.1521973034448196,
      "grad_norm": 2.671875,
      "learning_rate": 1.5700133180681793e-05,
      "loss": 0.8607,
      "step": 614080
    },
    {
      "epoch": 2.152232350951715,
      "grad_norm": 2.625,
      "learning_rate": 1.569948415201809e-05,
      "loss": 0.7711,
      "step": 614090
    },
    {
      "epoch": 2.1522673984586107,
      "grad_norm": 2.546875,
      "learning_rate": 1.569883512335439e-05,
      "loss": 0.8235,
      "step": 614100
    },
    {
      "epoch": 2.152302445965506,
      "grad_norm": 2.5,
      "learning_rate": 1.5698186094690687e-05,
      "loss": 0.7985,
      "step": 614110
    },
    {
      "epoch": 2.1523374934724018,
      "grad_norm": 2.8125,
      "learning_rate": 1.5697537066026985e-05,
      "loss": 0.7878,
      "step": 614120
    },
    {
      "epoch": 2.1523725409792975,
      "grad_norm": 3.390625,
      "learning_rate": 1.5696888037363283e-05,
      "loss": 0.8471,
      "step": 614130
    },
    {
      "epoch": 2.152407588486193,
      "grad_norm": 2.984375,
      "learning_rate": 1.569623900869958e-05,
      "loss": 0.8012,
      "step": 614140
    },
    {
      "epoch": 2.1524426359930886,
      "grad_norm": 2.953125,
      "learning_rate": 1.569558998003588e-05,
      "loss": 0.7251,
      "step": 614150
    },
    {
      "epoch": 2.1524776834999844,
      "grad_norm": 3.265625,
      "learning_rate": 1.5694940951372177e-05,
      "loss": 0.8159,
      "step": 614160
    },
    {
      "epoch": 2.1525127310068797,
      "grad_norm": 3.296875,
      "learning_rate": 1.5694291922708475e-05,
      "loss": 0.8068,
      "step": 614170
    },
    {
      "epoch": 2.1525477785137754,
      "grad_norm": 2.9375,
      "learning_rate": 1.5693642894044773e-05,
      "loss": 0.7831,
      "step": 614180
    },
    {
      "epoch": 2.152582826020671,
      "grad_norm": 2.84375,
      "learning_rate": 1.569299386538107e-05,
      "loss": 0.8361,
      "step": 614190
    },
    {
      "epoch": 2.1526178735275665,
      "grad_norm": 2.953125,
      "learning_rate": 1.569234483671737e-05,
      "loss": 0.8524,
      "step": 614200
    },
    {
      "epoch": 2.1526529210344623,
      "grad_norm": 2.671875,
      "learning_rate": 1.5691695808053667e-05,
      "loss": 0.7964,
      "step": 614210
    },
    {
      "epoch": 2.1526879685413576,
      "grad_norm": 2.828125,
      "learning_rate": 1.5691046779389965e-05,
      "loss": 0.8809,
      "step": 614220
    },
    {
      "epoch": 2.1527230160482533,
      "grad_norm": 2.9375,
      "learning_rate": 1.5690397750726263e-05,
      "loss": 0.7899,
      "step": 614230
    },
    {
      "epoch": 2.152758063555149,
      "grad_norm": 2.90625,
      "learning_rate": 1.568974872206256e-05,
      "loss": 0.7389,
      "step": 614240
    },
    {
      "epoch": 2.1527931110620444,
      "grad_norm": 2.984375,
      "learning_rate": 1.568909969339886e-05,
      "loss": 0.8298,
      "step": 614250
    },
    {
      "epoch": 2.15282815856894,
      "grad_norm": 3.15625,
      "learning_rate": 1.5688450664735157e-05,
      "loss": 0.7946,
      "step": 614260
    },
    {
      "epoch": 2.152863206075836,
      "grad_norm": 2.59375,
      "learning_rate": 1.5687801636071455e-05,
      "loss": 0.8117,
      "step": 614270
    },
    {
      "epoch": 2.1528982535827312,
      "grad_norm": 2.96875,
      "learning_rate": 1.5687152607407753e-05,
      "loss": 0.7569,
      "step": 614280
    },
    {
      "epoch": 2.152933301089627,
      "grad_norm": 2.71875,
      "learning_rate": 1.568650357874405e-05,
      "loss": 0.7991,
      "step": 614290
    },
    {
      "epoch": 2.1529683485965228,
      "grad_norm": 2.6875,
      "learning_rate": 1.568585455008035e-05,
      "loss": 0.797,
      "step": 614300
    },
    {
      "epoch": 2.153003396103418,
      "grad_norm": 3.015625,
      "learning_rate": 1.5685205521416647e-05,
      "loss": 0.8487,
      "step": 614310
    },
    {
      "epoch": 2.153038443610314,
      "grad_norm": 3.609375,
      "learning_rate": 1.568455649275295e-05,
      "loss": 0.7924,
      "step": 614320
    },
    {
      "epoch": 2.1530734911172096,
      "grad_norm": 2.609375,
      "learning_rate": 1.5683907464089247e-05,
      "loss": 0.7626,
      "step": 614330
    },
    {
      "epoch": 2.153108538624105,
      "grad_norm": 2.53125,
      "learning_rate": 1.5683258435425545e-05,
      "loss": 0.7593,
      "step": 614340
    },
    {
      "epoch": 2.1531435861310007,
      "grad_norm": 3.0625,
      "learning_rate": 1.5682609406761843e-05,
      "loss": 0.7901,
      "step": 614350
    },
    {
      "epoch": 2.153178633637896,
      "grad_norm": 2.6875,
      "learning_rate": 1.568196037809814e-05,
      "loss": 0.8653,
      "step": 614360
    },
    {
      "epoch": 2.1532136811447917,
      "grad_norm": 3.078125,
      "learning_rate": 1.568131134943444e-05,
      "loss": 0.9295,
      "step": 614370
    },
    {
      "epoch": 2.1532487286516875,
      "grad_norm": 2.71875,
      "learning_rate": 1.5680662320770737e-05,
      "loss": 0.8411,
      "step": 614380
    },
    {
      "epoch": 2.153283776158583,
      "grad_norm": 2.90625,
      "learning_rate": 1.5680013292107035e-05,
      "loss": 0.7646,
      "step": 614390
    },
    {
      "epoch": 2.1533188236654786,
      "grad_norm": 2.953125,
      "learning_rate": 1.567936426344333e-05,
      "loss": 0.8769,
      "step": 614400
    },
    {
      "epoch": 2.1533538711723743,
      "grad_norm": 3.375,
      "learning_rate": 1.5678715234779627e-05,
      "loss": 0.8592,
      "step": 614410
    },
    {
      "epoch": 2.1533889186792696,
      "grad_norm": 3.265625,
      "learning_rate": 1.5678066206115925e-05,
      "loss": 0.7933,
      "step": 614420
    },
    {
      "epoch": 2.1534239661861654,
      "grad_norm": 2.984375,
      "learning_rate": 1.5677417177452227e-05,
      "loss": 0.8317,
      "step": 614430
    },
    {
      "epoch": 2.153459013693061,
      "grad_norm": 2.453125,
      "learning_rate": 1.5676768148788525e-05,
      "loss": 0.7721,
      "step": 614440
    },
    {
      "epoch": 2.1534940611999565,
      "grad_norm": 2.640625,
      "learning_rate": 1.5676119120124823e-05,
      "loss": 0.7825,
      "step": 614450
    },
    {
      "epoch": 2.153529108706852,
      "grad_norm": 2.96875,
      "learning_rate": 1.567547009146112e-05,
      "loss": 0.7808,
      "step": 614460
    },
    {
      "epoch": 2.1535641562137475,
      "grad_norm": 2.75,
      "learning_rate": 1.567482106279742e-05,
      "loss": 0.7671,
      "step": 614470
    },
    {
      "epoch": 2.1535992037206433,
      "grad_norm": 3.5625,
      "learning_rate": 1.5674172034133717e-05,
      "loss": 0.8418,
      "step": 614480
    },
    {
      "epoch": 2.153634251227539,
      "grad_norm": 3.046875,
      "learning_rate": 1.5673523005470015e-05,
      "loss": 0.7775,
      "step": 614490
    },
    {
      "epoch": 2.1536692987344344,
      "grad_norm": 2.96875,
      "learning_rate": 1.5672873976806313e-05,
      "loss": 0.7522,
      "step": 614500
    },
    {
      "epoch": 2.15370434624133,
      "grad_norm": 2.921875,
      "learning_rate": 1.567222494814261e-05,
      "loss": 0.788,
      "step": 614510
    },
    {
      "epoch": 2.153739393748226,
      "grad_norm": 3.015625,
      "learning_rate": 1.567157591947891e-05,
      "loss": 0.8556,
      "step": 614520
    },
    {
      "epoch": 2.153774441255121,
      "grad_norm": 2.828125,
      "learning_rate": 1.5670926890815207e-05,
      "loss": 0.8961,
      "step": 614530
    },
    {
      "epoch": 2.153809488762017,
      "grad_norm": 3.484375,
      "learning_rate": 1.5670277862151505e-05,
      "loss": 0.9245,
      "step": 614540
    },
    {
      "epoch": 2.1538445362689127,
      "grad_norm": 3.0,
      "learning_rate": 1.5669628833487803e-05,
      "loss": 0.7454,
      "step": 614550
    },
    {
      "epoch": 2.153879583775808,
      "grad_norm": 2.5625,
      "learning_rate": 1.56689798048241e-05,
      "loss": 0.8101,
      "step": 614560
    },
    {
      "epoch": 2.1539146312827038,
      "grad_norm": 2.90625,
      "learning_rate": 1.5668330776160402e-05,
      "loss": 0.7822,
      "step": 614570
    },
    {
      "epoch": 2.1539496787895995,
      "grad_norm": 2.90625,
      "learning_rate": 1.5667681747496697e-05,
      "loss": 0.7957,
      "step": 614580
    },
    {
      "epoch": 2.153984726296495,
      "grad_norm": 2.921875,
      "learning_rate": 1.5667032718832995e-05,
      "loss": 0.8359,
      "step": 614590
    },
    {
      "epoch": 2.1540197738033906,
      "grad_norm": 2.671875,
      "learning_rate": 1.5666383690169293e-05,
      "loss": 0.7618,
      "step": 614600
    },
    {
      "epoch": 2.154054821310286,
      "grad_norm": 2.765625,
      "learning_rate": 1.566573466150559e-05,
      "loss": 0.865,
      "step": 614610
    },
    {
      "epoch": 2.1540898688171817,
      "grad_norm": 2.765625,
      "learning_rate": 1.566508563284189e-05,
      "loss": 0.8382,
      "step": 614620
    },
    {
      "epoch": 2.1541249163240774,
      "grad_norm": 3.96875,
      "learning_rate": 1.5664436604178187e-05,
      "loss": 0.7828,
      "step": 614630
    },
    {
      "epoch": 2.1541599638309727,
      "grad_norm": 3.15625,
      "learning_rate": 1.5663787575514485e-05,
      "loss": 0.8571,
      "step": 614640
    },
    {
      "epoch": 2.1541950113378685,
      "grad_norm": 2.53125,
      "learning_rate": 1.5663138546850783e-05,
      "loss": 0.8802,
      "step": 614650
    },
    {
      "epoch": 2.1542300588447643,
      "grad_norm": 3.03125,
      "learning_rate": 1.566248951818708e-05,
      "loss": 0.8288,
      "step": 614660
    },
    {
      "epoch": 2.1542651063516596,
      "grad_norm": 2.71875,
      "learning_rate": 1.566184048952338e-05,
      "loss": 0.7822,
      "step": 614670
    },
    {
      "epoch": 2.1543001538585553,
      "grad_norm": 3.21875,
      "learning_rate": 1.5661191460859677e-05,
      "loss": 0.7593,
      "step": 614680
    },
    {
      "epoch": 2.154335201365451,
      "grad_norm": 2.96875,
      "learning_rate": 1.5660542432195978e-05,
      "loss": 0.8069,
      "step": 614690
    },
    {
      "epoch": 2.1543702488723464,
      "grad_norm": 2.90625,
      "learning_rate": 1.5659893403532276e-05,
      "loss": 0.7547,
      "step": 614700
    },
    {
      "epoch": 2.154405296379242,
      "grad_norm": 3.0,
      "learning_rate": 1.5659244374868574e-05,
      "loss": 0.7817,
      "step": 614710
    },
    {
      "epoch": 2.1544403438861375,
      "grad_norm": 2.859375,
      "learning_rate": 1.5658595346204872e-05,
      "loss": 0.8433,
      "step": 614720
    },
    {
      "epoch": 2.1544753913930332,
      "grad_norm": 2.625,
      "learning_rate": 1.565794631754117e-05,
      "loss": 0.7742,
      "step": 614730
    },
    {
      "epoch": 2.154510438899929,
      "grad_norm": 3.40625,
      "learning_rate": 1.5657297288877468e-05,
      "loss": 0.8449,
      "step": 614740
    },
    {
      "epoch": 2.1545454864068243,
      "grad_norm": 2.890625,
      "learning_rate": 1.5656648260213766e-05,
      "loss": 0.8427,
      "step": 614750
    },
    {
      "epoch": 2.15458053391372,
      "grad_norm": 2.90625,
      "learning_rate": 1.5655999231550064e-05,
      "loss": 0.8345,
      "step": 614760
    },
    {
      "epoch": 2.154615581420616,
      "grad_norm": 3.75,
      "learning_rate": 1.565535020288636e-05,
      "loss": 0.8454,
      "step": 614770
    },
    {
      "epoch": 2.154650628927511,
      "grad_norm": 2.734375,
      "learning_rate": 1.5654701174222657e-05,
      "loss": 0.7955,
      "step": 614780
    },
    {
      "epoch": 2.154685676434407,
      "grad_norm": 3.078125,
      "learning_rate": 1.5654052145558955e-05,
      "loss": 0.7922,
      "step": 614790
    },
    {
      "epoch": 2.1547207239413027,
      "grad_norm": 3.125,
      "learning_rate": 1.5653403116895256e-05,
      "loss": 0.78,
      "step": 614800
    },
    {
      "epoch": 2.154755771448198,
      "grad_norm": 3.359375,
      "learning_rate": 1.5652754088231554e-05,
      "loss": 0.915,
      "step": 614810
    },
    {
      "epoch": 2.1547908189550937,
      "grad_norm": 2.71875,
      "learning_rate": 1.5652105059567852e-05,
      "loss": 0.7529,
      "step": 614820
    },
    {
      "epoch": 2.154825866461989,
      "grad_norm": 2.671875,
      "learning_rate": 1.565145603090415e-05,
      "loss": 0.8278,
      "step": 614830
    },
    {
      "epoch": 2.154860913968885,
      "grad_norm": 2.390625,
      "learning_rate": 1.5650807002240448e-05,
      "loss": 0.8778,
      "step": 614840
    },
    {
      "epoch": 2.1548959614757806,
      "grad_norm": 3.0,
      "learning_rate": 1.5650157973576746e-05,
      "loss": 0.7515,
      "step": 614850
    },
    {
      "epoch": 2.154931008982676,
      "grad_norm": 2.59375,
      "learning_rate": 1.5649508944913044e-05,
      "loss": 0.8439,
      "step": 614860
    },
    {
      "epoch": 2.1549660564895716,
      "grad_norm": 3.03125,
      "learning_rate": 1.5648859916249342e-05,
      "loss": 0.8422,
      "step": 614870
    },
    {
      "epoch": 2.1550011039964674,
      "grad_norm": 3.21875,
      "learning_rate": 1.564821088758564e-05,
      "loss": 0.7401,
      "step": 614880
    },
    {
      "epoch": 2.1550361515033627,
      "grad_norm": 3.03125,
      "learning_rate": 1.5647561858921938e-05,
      "loss": 0.7897,
      "step": 614890
    },
    {
      "epoch": 2.1550711990102585,
      "grad_norm": 2.4375,
      "learning_rate": 1.5646912830258236e-05,
      "loss": 0.8613,
      "step": 614900
    },
    {
      "epoch": 2.155106246517154,
      "grad_norm": 3.0,
      "learning_rate": 1.5646263801594534e-05,
      "loss": 0.7747,
      "step": 614910
    },
    {
      "epoch": 2.1551412940240495,
      "grad_norm": 2.40625,
      "learning_rate": 1.5645614772930832e-05,
      "loss": 0.7402,
      "step": 614920
    },
    {
      "epoch": 2.1551763415309453,
      "grad_norm": 2.953125,
      "learning_rate": 1.564496574426713e-05,
      "loss": 0.8177,
      "step": 614930
    },
    {
      "epoch": 2.1552113890378406,
      "grad_norm": 3.078125,
      "learning_rate": 1.564431671560343e-05,
      "loss": 0.9523,
      "step": 614940
    },
    {
      "epoch": 2.1552464365447364,
      "grad_norm": 2.921875,
      "learning_rate": 1.564366768693973e-05,
      "loss": 0.8861,
      "step": 614950
    },
    {
      "epoch": 2.155281484051632,
      "grad_norm": 3.125,
      "learning_rate": 1.5643018658276024e-05,
      "loss": 0.7552,
      "step": 614960
    },
    {
      "epoch": 2.1553165315585274,
      "grad_norm": 2.84375,
      "learning_rate": 1.5642369629612322e-05,
      "loss": 0.8806,
      "step": 614970
    },
    {
      "epoch": 2.155351579065423,
      "grad_norm": 2.6875,
      "learning_rate": 1.564172060094862e-05,
      "loss": 0.8087,
      "step": 614980
    },
    {
      "epoch": 2.155386626572319,
      "grad_norm": 2.703125,
      "learning_rate": 1.5641071572284918e-05,
      "loss": 0.758,
      "step": 614990
    },
    {
      "epoch": 2.1554216740792143,
      "grad_norm": 2.609375,
      "learning_rate": 1.5640422543621216e-05,
      "loss": 0.8491,
      "step": 615000
    },
    {
      "epoch": 2.1554216740792143,
      "eval_loss": 0.7650275826454163,
      "eval_runtime": 563.4715,
      "eval_samples_per_second": 675.165,
      "eval_steps_per_second": 56.264,
      "step": 615000
    },
    {
      "epoch": 2.15545672158611,
      "grad_norm": 2.515625,
      "learning_rate": 1.5639773514957514e-05,
      "loss": 0.7917,
      "step": 615010
    },
    {
      "epoch": 2.1554917690930058,
      "grad_norm": 3.1875,
      "learning_rate": 1.5639124486293812e-05,
      "loss": 0.8491,
      "step": 615020
    },
    {
      "epoch": 2.155526816599901,
      "grad_norm": 2.0625,
      "learning_rate": 1.563847545763011e-05,
      "loss": 0.8038,
      "step": 615030
    },
    {
      "epoch": 2.155561864106797,
      "grad_norm": 2.796875,
      "learning_rate": 1.5637826428966408e-05,
      "loss": 0.8455,
      "step": 615040
    },
    {
      "epoch": 2.155596911613692,
      "grad_norm": 2.359375,
      "learning_rate": 1.563717740030271e-05,
      "loss": 0.7592,
      "step": 615050
    },
    {
      "epoch": 2.155631959120588,
      "grad_norm": 3.203125,
      "learning_rate": 1.5636528371639007e-05,
      "loss": 0.8298,
      "step": 615060
    },
    {
      "epoch": 2.1556670066274837,
      "grad_norm": 2.296875,
      "learning_rate": 1.5635879342975305e-05,
      "loss": 0.7719,
      "step": 615070
    },
    {
      "epoch": 2.155702054134379,
      "grad_norm": 2.65625,
      "learning_rate": 1.5635230314311603e-05,
      "loss": 0.7601,
      "step": 615080
    },
    {
      "epoch": 2.1557371016412747,
      "grad_norm": 3.0,
      "learning_rate": 1.56345812856479e-05,
      "loss": 0.7986,
      "step": 615090
    },
    {
      "epoch": 2.1557721491481705,
      "grad_norm": 2.578125,
      "learning_rate": 1.56339322569842e-05,
      "loss": 0.7604,
      "step": 615100
    },
    {
      "epoch": 2.155807196655066,
      "grad_norm": 2.875,
      "learning_rate": 1.5633283228320497e-05,
      "loss": 0.8431,
      "step": 615110
    },
    {
      "epoch": 2.1558422441619616,
      "grad_norm": 3.171875,
      "learning_rate": 1.5632634199656795e-05,
      "loss": 0.8485,
      "step": 615120
    },
    {
      "epoch": 2.1558772916688573,
      "grad_norm": 3.90625,
      "learning_rate": 1.5631985170993093e-05,
      "loss": 0.8087,
      "step": 615130
    },
    {
      "epoch": 2.1559123391757526,
      "grad_norm": 2.96875,
      "learning_rate": 1.563133614232939e-05,
      "loss": 0.79,
      "step": 615140
    },
    {
      "epoch": 2.1559473866826484,
      "grad_norm": 2.796875,
      "learning_rate": 1.5630687113665686e-05,
      "loss": 0.8443,
      "step": 615150
    },
    {
      "epoch": 2.1559824341895437,
      "grad_norm": 2.71875,
      "learning_rate": 1.5630038085001984e-05,
      "loss": 0.8772,
      "step": 615160
    },
    {
      "epoch": 2.1560174816964395,
      "grad_norm": 2.765625,
      "learning_rate": 1.5629389056338285e-05,
      "loss": 0.8464,
      "step": 615170
    },
    {
      "epoch": 2.1560525292033352,
      "grad_norm": 3.3125,
      "learning_rate": 1.5628740027674583e-05,
      "loss": 0.8583,
      "step": 615180
    },
    {
      "epoch": 2.1560875767102305,
      "grad_norm": 3.265625,
      "learning_rate": 1.562809099901088e-05,
      "loss": 0.8577,
      "step": 615190
    },
    {
      "epoch": 2.1561226242171263,
      "grad_norm": 3.28125,
      "learning_rate": 1.562744197034718e-05,
      "loss": 0.7515,
      "step": 615200
    },
    {
      "epoch": 2.156157671724022,
      "grad_norm": 2.796875,
      "learning_rate": 1.5626792941683477e-05,
      "loss": 0.87,
      "step": 615210
    },
    {
      "epoch": 2.1561927192309174,
      "grad_norm": 2.71875,
      "learning_rate": 1.5626143913019775e-05,
      "loss": 0.7979,
      "step": 615220
    },
    {
      "epoch": 2.156227766737813,
      "grad_norm": 3.46875,
      "learning_rate": 1.5625494884356073e-05,
      "loss": 0.834,
      "step": 615230
    },
    {
      "epoch": 2.156262814244709,
      "grad_norm": 2.75,
      "learning_rate": 1.562484585569237e-05,
      "loss": 0.8122,
      "step": 615240
    },
    {
      "epoch": 2.156297861751604,
      "grad_norm": 2.640625,
      "learning_rate": 1.562419682702867e-05,
      "loss": 0.8321,
      "step": 615250
    },
    {
      "epoch": 2.1563329092585,
      "grad_norm": 2.65625,
      "learning_rate": 1.5623547798364967e-05,
      "loss": 0.8119,
      "step": 615260
    },
    {
      "epoch": 2.1563679567653953,
      "grad_norm": 2.734375,
      "learning_rate": 1.5622898769701265e-05,
      "loss": 0.7906,
      "step": 615270
    },
    {
      "epoch": 2.156403004272291,
      "grad_norm": 2.78125,
      "learning_rate": 1.5622249741037563e-05,
      "loss": 0.8292,
      "step": 615280
    },
    {
      "epoch": 2.156438051779187,
      "grad_norm": 2.8125,
      "learning_rate": 1.562160071237386e-05,
      "loss": 0.7911,
      "step": 615290
    },
    {
      "epoch": 2.156473099286082,
      "grad_norm": 2.734375,
      "learning_rate": 1.562095168371016e-05,
      "loss": 0.8018,
      "step": 615300
    },
    {
      "epoch": 2.156508146792978,
      "grad_norm": 2.578125,
      "learning_rate": 1.562030265504646e-05,
      "loss": 0.8643,
      "step": 615310
    },
    {
      "epoch": 2.1565431942998736,
      "grad_norm": 2.6875,
      "learning_rate": 1.561965362638276e-05,
      "loss": 0.8042,
      "step": 615320
    },
    {
      "epoch": 2.156578241806769,
      "grad_norm": 3.125,
      "learning_rate": 1.5619004597719057e-05,
      "loss": 0.7514,
      "step": 615330
    },
    {
      "epoch": 2.1566132893136647,
      "grad_norm": 2.796875,
      "learning_rate": 1.561835556905535e-05,
      "loss": 0.7415,
      "step": 615340
    },
    {
      "epoch": 2.1566483368205605,
      "grad_norm": 3.15625,
      "learning_rate": 1.561770654039165e-05,
      "loss": 0.8233,
      "step": 615350
    },
    {
      "epoch": 2.1566833843274558,
      "grad_norm": 2.796875,
      "learning_rate": 1.5617057511727947e-05,
      "loss": 0.7876,
      "step": 615360
    },
    {
      "epoch": 2.1567184318343515,
      "grad_norm": 3.328125,
      "learning_rate": 1.5616408483064245e-05,
      "loss": 0.8212,
      "step": 615370
    },
    {
      "epoch": 2.156753479341247,
      "grad_norm": 2.6875,
      "learning_rate": 1.5615759454400543e-05,
      "loss": 0.8272,
      "step": 615380
    },
    {
      "epoch": 2.1567885268481426,
      "grad_norm": 3.046875,
      "learning_rate": 1.561511042573684e-05,
      "loss": 0.7525,
      "step": 615390
    },
    {
      "epoch": 2.1568235743550384,
      "grad_norm": 2.96875,
      "learning_rate": 1.561446139707314e-05,
      "loss": 0.7901,
      "step": 615400
    },
    {
      "epoch": 2.1568586218619337,
      "grad_norm": 2.796875,
      "learning_rate": 1.5613812368409437e-05,
      "loss": 0.7396,
      "step": 615410
    },
    {
      "epoch": 2.1568936693688294,
      "grad_norm": 3.328125,
      "learning_rate": 1.561316333974574e-05,
      "loss": 0.8254,
      "step": 615420
    },
    {
      "epoch": 2.156928716875725,
      "grad_norm": 2.40625,
      "learning_rate": 1.5612514311082037e-05,
      "loss": 0.7662,
      "step": 615430
    },
    {
      "epoch": 2.1569637643826205,
      "grad_norm": 4.0625,
      "learning_rate": 1.5611865282418335e-05,
      "loss": 0.8337,
      "step": 615440
    },
    {
      "epoch": 2.1569988118895163,
      "grad_norm": 2.859375,
      "learning_rate": 1.5611216253754633e-05,
      "loss": 0.7907,
      "step": 615450
    },
    {
      "epoch": 2.157033859396412,
      "grad_norm": 2.875,
      "learning_rate": 1.561056722509093e-05,
      "loss": 0.7733,
      "step": 615460
    },
    {
      "epoch": 2.1570689069033073,
      "grad_norm": 3.1875,
      "learning_rate": 1.560991819642723e-05,
      "loss": 0.7422,
      "step": 615470
    },
    {
      "epoch": 2.157103954410203,
      "grad_norm": 2.84375,
      "learning_rate": 1.5609269167763527e-05,
      "loss": 0.9195,
      "step": 615480
    },
    {
      "epoch": 2.1571390019170984,
      "grad_norm": 3.0,
      "learning_rate": 1.5608620139099825e-05,
      "loss": 0.8773,
      "step": 615490
    },
    {
      "epoch": 2.157174049423994,
      "grad_norm": 2.640625,
      "learning_rate": 1.5607971110436123e-05,
      "loss": 0.84,
      "step": 615500
    },
    {
      "epoch": 2.15720909693089,
      "grad_norm": 2.875,
      "learning_rate": 1.560732208177242e-05,
      "loss": 0.8684,
      "step": 615510
    },
    {
      "epoch": 2.1572441444377852,
      "grad_norm": 2.953125,
      "learning_rate": 1.5606673053108715e-05,
      "loss": 0.7703,
      "step": 615520
    },
    {
      "epoch": 2.157279191944681,
      "grad_norm": 3.09375,
      "learning_rate": 1.5606024024445017e-05,
      "loss": 0.7375,
      "step": 615530
    },
    {
      "epoch": 2.1573142394515767,
      "grad_norm": 3.265625,
      "learning_rate": 1.5605374995781315e-05,
      "loss": 0.8292,
      "step": 615540
    },
    {
      "epoch": 2.157349286958472,
      "grad_norm": 2.75,
      "learning_rate": 1.5604725967117613e-05,
      "loss": 0.7998,
      "step": 615550
    },
    {
      "epoch": 2.157384334465368,
      "grad_norm": 3.0,
      "learning_rate": 1.560407693845391e-05,
      "loss": 0.8333,
      "step": 615560
    },
    {
      "epoch": 2.1574193819722636,
      "grad_norm": 3.5625,
      "learning_rate": 1.560342790979021e-05,
      "loss": 0.7753,
      "step": 615570
    },
    {
      "epoch": 2.157454429479159,
      "grad_norm": 2.796875,
      "learning_rate": 1.5602778881126507e-05,
      "loss": 0.7348,
      "step": 615580
    },
    {
      "epoch": 2.1574894769860546,
      "grad_norm": 2.921875,
      "learning_rate": 1.5602129852462805e-05,
      "loss": 0.7055,
      "step": 615590
    },
    {
      "epoch": 2.1575245244929504,
      "grad_norm": 3.078125,
      "learning_rate": 1.5601480823799103e-05,
      "loss": 0.8701,
      "step": 615600
    },
    {
      "epoch": 2.1575595719998457,
      "grad_norm": 3.09375,
      "learning_rate": 1.56008317951354e-05,
      "loss": 0.8689,
      "step": 615610
    },
    {
      "epoch": 2.1575946195067415,
      "grad_norm": 2.65625,
      "learning_rate": 1.56001827664717e-05,
      "loss": 0.7926,
      "step": 615620
    },
    {
      "epoch": 2.157629667013637,
      "grad_norm": 2.578125,
      "learning_rate": 1.5599533737807997e-05,
      "loss": 0.7375,
      "step": 615630
    },
    {
      "epoch": 2.1576647145205325,
      "grad_norm": 3.21875,
      "learning_rate": 1.5598884709144295e-05,
      "loss": 0.8713,
      "step": 615640
    },
    {
      "epoch": 2.1576997620274283,
      "grad_norm": 2.921875,
      "learning_rate": 1.5598235680480593e-05,
      "loss": 0.8005,
      "step": 615650
    },
    {
      "epoch": 2.1577348095343236,
      "grad_norm": 3.515625,
      "learning_rate": 1.559758665181689e-05,
      "loss": 0.8473,
      "step": 615660
    },
    {
      "epoch": 2.1577698570412194,
      "grad_norm": 2.734375,
      "learning_rate": 1.5596937623153192e-05,
      "loss": 0.8282,
      "step": 615670
    },
    {
      "epoch": 2.157804904548115,
      "grad_norm": 3.234375,
      "learning_rate": 1.559628859448949e-05,
      "loss": 0.828,
      "step": 615680
    },
    {
      "epoch": 2.1578399520550104,
      "grad_norm": 2.953125,
      "learning_rate": 1.5595639565825788e-05,
      "loss": 0.8513,
      "step": 615690
    },
    {
      "epoch": 2.157874999561906,
      "grad_norm": 2.84375,
      "learning_rate": 1.5594990537162086e-05,
      "loss": 0.8106,
      "step": 615700
    },
    {
      "epoch": 2.157910047068802,
      "grad_norm": 2.953125,
      "learning_rate": 1.559434150849838e-05,
      "loss": 0.7508,
      "step": 615710
    },
    {
      "epoch": 2.1579450945756973,
      "grad_norm": 2.828125,
      "learning_rate": 1.559369247983468e-05,
      "loss": 0.7702,
      "step": 615720
    },
    {
      "epoch": 2.157980142082593,
      "grad_norm": 2.84375,
      "learning_rate": 1.5593043451170977e-05,
      "loss": 0.8043,
      "step": 615730
    },
    {
      "epoch": 2.1580151895894883,
      "grad_norm": 2.4375,
      "learning_rate": 1.5592394422507275e-05,
      "loss": 0.7915,
      "step": 615740
    },
    {
      "epoch": 2.158050237096384,
      "grad_norm": 2.90625,
      "learning_rate": 1.5591745393843573e-05,
      "loss": 0.8383,
      "step": 615750
    },
    {
      "epoch": 2.15808528460328,
      "grad_norm": 2.609375,
      "learning_rate": 1.559109636517987e-05,
      "loss": 0.8181,
      "step": 615760
    },
    {
      "epoch": 2.158120332110175,
      "grad_norm": 2.390625,
      "learning_rate": 1.559044733651617e-05,
      "loss": 0.6985,
      "step": 615770
    },
    {
      "epoch": 2.158155379617071,
      "grad_norm": 3.265625,
      "learning_rate": 1.5589798307852467e-05,
      "loss": 0.9254,
      "step": 615780
    },
    {
      "epoch": 2.1581904271239667,
      "grad_norm": 3.21875,
      "learning_rate": 1.5589149279188768e-05,
      "loss": 0.8442,
      "step": 615790
    },
    {
      "epoch": 2.158225474630862,
      "grad_norm": 2.984375,
      "learning_rate": 1.5588500250525066e-05,
      "loss": 0.8494,
      "step": 615800
    },
    {
      "epoch": 2.1582605221377578,
      "grad_norm": 2.984375,
      "learning_rate": 1.5587851221861364e-05,
      "loss": 0.8302,
      "step": 615810
    },
    {
      "epoch": 2.1582955696446535,
      "grad_norm": 3.0,
      "learning_rate": 1.5587202193197662e-05,
      "loss": 0.8934,
      "step": 615820
    },
    {
      "epoch": 2.158330617151549,
      "grad_norm": 3.546875,
      "learning_rate": 1.558655316453396e-05,
      "loss": 0.8433,
      "step": 615830
    },
    {
      "epoch": 2.1583656646584446,
      "grad_norm": 2.921875,
      "learning_rate": 1.5585904135870258e-05,
      "loss": 0.8285,
      "step": 615840
    },
    {
      "epoch": 2.1584007121653404,
      "grad_norm": 3.046875,
      "learning_rate": 1.5585255107206556e-05,
      "loss": 0.7996,
      "step": 615850
    },
    {
      "epoch": 2.1584357596722357,
      "grad_norm": 2.890625,
      "learning_rate": 1.5584606078542854e-05,
      "loss": 0.756,
      "step": 615860
    },
    {
      "epoch": 2.1584708071791314,
      "grad_norm": 2.84375,
      "learning_rate": 1.5583957049879152e-05,
      "loss": 0.826,
      "step": 615870
    },
    {
      "epoch": 2.1585058546860267,
      "grad_norm": 3.0,
      "learning_rate": 1.558330802121545e-05,
      "loss": 0.8965,
      "step": 615880
    },
    {
      "epoch": 2.1585409021929225,
      "grad_norm": 2.734375,
      "learning_rate": 1.5582658992551748e-05,
      "loss": 0.7902,
      "step": 615890
    },
    {
      "epoch": 2.1585759496998183,
      "grad_norm": 3.015625,
      "learning_rate": 1.5582009963888046e-05,
      "loss": 0.8691,
      "step": 615900
    },
    {
      "epoch": 2.1586109972067136,
      "grad_norm": 2.796875,
      "learning_rate": 1.5581360935224344e-05,
      "loss": 0.8264,
      "step": 615910
    },
    {
      "epoch": 2.1586460447136093,
      "grad_norm": 2.90625,
      "learning_rate": 1.5580711906560642e-05,
      "loss": 0.8189,
      "step": 615920
    },
    {
      "epoch": 2.158681092220505,
      "grad_norm": 2.796875,
      "learning_rate": 1.558006287789694e-05,
      "loss": 0.7786,
      "step": 615930
    },
    {
      "epoch": 2.1587161397274004,
      "grad_norm": 3.03125,
      "learning_rate": 1.5579413849233238e-05,
      "loss": 0.8565,
      "step": 615940
    },
    {
      "epoch": 2.158751187234296,
      "grad_norm": 3.078125,
      "learning_rate": 1.5578764820569536e-05,
      "loss": 0.8021,
      "step": 615950
    },
    {
      "epoch": 2.158786234741192,
      "grad_norm": 3.078125,
      "learning_rate": 1.5578115791905834e-05,
      "loss": 0.8009,
      "step": 615960
    },
    {
      "epoch": 2.1588212822480872,
      "grad_norm": 2.609375,
      "learning_rate": 1.5577466763242132e-05,
      "loss": 0.7318,
      "step": 615970
    },
    {
      "epoch": 2.158856329754983,
      "grad_norm": 2.53125,
      "learning_rate": 1.557681773457843e-05,
      "loss": 0.6875,
      "step": 615980
    },
    {
      "epoch": 2.1588913772618783,
      "grad_norm": 2.5625,
      "learning_rate": 1.5576168705914728e-05,
      "loss": 0.7457,
      "step": 615990
    },
    {
      "epoch": 2.158926424768774,
      "grad_norm": 2.90625,
      "learning_rate": 1.5575519677251026e-05,
      "loss": 0.8405,
      "step": 616000
    },
    {
      "epoch": 2.15896147227567,
      "grad_norm": 2.625,
      "learning_rate": 1.5574870648587324e-05,
      "loss": 0.8272,
      "step": 616010
    },
    {
      "epoch": 2.158996519782565,
      "grad_norm": 3.125,
      "learning_rate": 1.5574221619923622e-05,
      "loss": 0.8325,
      "step": 616020
    },
    {
      "epoch": 2.159031567289461,
      "grad_norm": 2.84375,
      "learning_rate": 1.557357259125992e-05,
      "loss": 0.8067,
      "step": 616030
    },
    {
      "epoch": 2.1590666147963566,
      "grad_norm": 2.625,
      "learning_rate": 1.557292356259622e-05,
      "loss": 0.8142,
      "step": 616040
    },
    {
      "epoch": 2.159101662303252,
      "grad_norm": 2.875,
      "learning_rate": 1.557227453393252e-05,
      "loss": 0.907,
      "step": 616050
    },
    {
      "epoch": 2.1591367098101477,
      "grad_norm": 2.28125,
      "learning_rate": 1.5571625505268818e-05,
      "loss": 0.7567,
      "step": 616060
    },
    {
      "epoch": 2.1591717573170435,
      "grad_norm": 3.5,
      "learning_rate": 1.5570976476605116e-05,
      "loss": 0.8462,
      "step": 616070
    },
    {
      "epoch": 2.159206804823939,
      "grad_norm": 2.9375,
      "learning_rate": 1.5570327447941414e-05,
      "loss": 0.7846,
      "step": 616080
    },
    {
      "epoch": 2.1592418523308345,
      "grad_norm": 2.75,
      "learning_rate": 1.5569678419277708e-05,
      "loss": 0.8548,
      "step": 616090
    },
    {
      "epoch": 2.15927689983773,
      "grad_norm": 3.265625,
      "learning_rate": 1.5569029390614006e-05,
      "loss": 0.8545,
      "step": 616100
    },
    {
      "epoch": 2.1593119473446256,
      "grad_norm": 2.4375,
      "learning_rate": 1.5568380361950304e-05,
      "loss": 0.8326,
      "step": 616110
    },
    {
      "epoch": 2.1593469948515214,
      "grad_norm": 2.734375,
      "learning_rate": 1.5567731333286602e-05,
      "loss": 0.8212,
      "step": 616120
    },
    {
      "epoch": 2.1593820423584167,
      "grad_norm": 3.21875,
      "learning_rate": 1.55670823046229e-05,
      "loss": 0.7702,
      "step": 616130
    },
    {
      "epoch": 2.1594170898653124,
      "grad_norm": 2.421875,
      "learning_rate": 1.5566433275959198e-05,
      "loss": 0.7362,
      "step": 616140
    },
    {
      "epoch": 2.159452137372208,
      "grad_norm": 3.15625,
      "learning_rate": 1.55657842472955e-05,
      "loss": 0.7468,
      "step": 616150
    },
    {
      "epoch": 2.1594871848791035,
      "grad_norm": 3.203125,
      "learning_rate": 1.5565135218631798e-05,
      "loss": 0.8008,
      "step": 616160
    },
    {
      "epoch": 2.1595222323859993,
      "grad_norm": 2.765625,
      "learning_rate": 1.5564486189968096e-05,
      "loss": 0.8123,
      "step": 616170
    },
    {
      "epoch": 2.159557279892895,
      "grad_norm": 3.359375,
      "learning_rate": 1.5563837161304394e-05,
      "loss": 0.8458,
      "step": 616180
    },
    {
      "epoch": 2.1595923273997903,
      "grad_norm": 3.4375,
      "learning_rate": 1.556318813264069e-05,
      "loss": 0.863,
      "step": 616190
    },
    {
      "epoch": 2.159627374906686,
      "grad_norm": 3.0625,
      "learning_rate": 1.556253910397699e-05,
      "loss": 0.8103,
      "step": 616200
    },
    {
      "epoch": 2.1596624224135814,
      "grad_norm": 3.046875,
      "learning_rate": 1.5561890075313288e-05,
      "loss": 0.7911,
      "step": 616210
    },
    {
      "epoch": 2.159697469920477,
      "grad_norm": 3.140625,
      "learning_rate": 1.5561241046649586e-05,
      "loss": 0.9438,
      "step": 616220
    },
    {
      "epoch": 2.159732517427373,
      "grad_norm": 3.234375,
      "learning_rate": 1.5560592017985884e-05,
      "loss": 0.8329,
      "step": 616230
    },
    {
      "epoch": 2.1597675649342682,
      "grad_norm": 3.453125,
      "learning_rate": 1.555994298932218e-05,
      "loss": 0.8582,
      "step": 616240
    },
    {
      "epoch": 2.159802612441164,
      "grad_norm": 2.546875,
      "learning_rate": 1.555929396065848e-05,
      "loss": 0.8331,
      "step": 616250
    },
    {
      "epoch": 2.1598376599480598,
      "grad_norm": 3.53125,
      "learning_rate": 1.5558644931994778e-05,
      "loss": 0.7834,
      "step": 616260
    },
    {
      "epoch": 2.159872707454955,
      "grad_norm": 3.546875,
      "learning_rate": 1.5557995903331076e-05,
      "loss": 0.8812,
      "step": 616270
    },
    {
      "epoch": 2.159907754961851,
      "grad_norm": 2.890625,
      "learning_rate": 1.5557346874667374e-05,
      "loss": 0.7595,
      "step": 616280
    },
    {
      "epoch": 2.1599428024687466,
      "grad_norm": 2.921875,
      "learning_rate": 1.555669784600367e-05,
      "loss": 0.8121,
      "step": 616290
    },
    {
      "epoch": 2.159977849975642,
      "grad_norm": 2.96875,
      "learning_rate": 1.555604881733997e-05,
      "loss": 0.7996,
      "step": 616300
    },
    {
      "epoch": 2.1600128974825377,
      "grad_norm": 2.703125,
      "learning_rate": 1.5555399788676268e-05,
      "loss": 0.8199,
      "step": 616310
    },
    {
      "epoch": 2.160047944989433,
      "grad_norm": 2.921875,
      "learning_rate": 1.5554750760012566e-05,
      "loss": 0.793,
      "step": 616320
    },
    {
      "epoch": 2.1600829924963287,
      "grad_norm": 2.734375,
      "learning_rate": 1.5554101731348864e-05,
      "loss": 0.8684,
      "step": 616330
    },
    {
      "epoch": 2.1601180400032245,
      "grad_norm": 2.40625,
      "learning_rate": 1.555345270268516e-05,
      "loss": 0.8241,
      "step": 616340
    },
    {
      "epoch": 2.16015308751012,
      "grad_norm": 3.09375,
      "learning_rate": 1.555280367402146e-05,
      "loss": 0.8533,
      "step": 616350
    },
    {
      "epoch": 2.1601881350170156,
      "grad_norm": 2.78125,
      "learning_rate": 1.5552154645357758e-05,
      "loss": 0.7939,
      "step": 616360
    },
    {
      "epoch": 2.1602231825239113,
      "grad_norm": 2.765625,
      "learning_rate": 1.5551505616694056e-05,
      "loss": 0.7965,
      "step": 616370
    },
    {
      "epoch": 2.1602582300308066,
      "grad_norm": 2.90625,
      "learning_rate": 1.5550856588030354e-05,
      "loss": 0.8846,
      "step": 616380
    },
    {
      "epoch": 2.1602932775377024,
      "grad_norm": 3.0625,
      "learning_rate": 1.555020755936665e-05,
      "loss": 0.8002,
      "step": 616390
    },
    {
      "epoch": 2.160328325044598,
      "grad_norm": 3.015625,
      "learning_rate": 1.554955853070295e-05,
      "loss": 0.8815,
      "step": 616400
    },
    {
      "epoch": 2.1603633725514935,
      "grad_norm": 2.859375,
      "learning_rate": 1.554890950203925e-05,
      "loss": 0.7503,
      "step": 616410
    },
    {
      "epoch": 2.1603984200583892,
      "grad_norm": 2.921875,
      "learning_rate": 1.554826047337555e-05,
      "loss": 0.798,
      "step": 616420
    },
    {
      "epoch": 2.1604334675652845,
      "grad_norm": 2.640625,
      "learning_rate": 1.5547611444711847e-05,
      "loss": 0.8585,
      "step": 616430
    },
    {
      "epoch": 2.1604685150721803,
      "grad_norm": 3.0625,
      "learning_rate": 1.5546962416048145e-05,
      "loss": 0.7929,
      "step": 616440
    },
    {
      "epoch": 2.160503562579076,
      "grad_norm": 3.171875,
      "learning_rate": 1.5546313387384443e-05,
      "loss": 0.8187,
      "step": 616450
    },
    {
      "epoch": 2.1605386100859714,
      "grad_norm": 2.71875,
      "learning_rate": 1.554566435872074e-05,
      "loss": 0.7345,
      "step": 616460
    },
    {
      "epoch": 2.160573657592867,
      "grad_norm": 2.890625,
      "learning_rate": 1.5545015330057036e-05,
      "loss": 0.8611,
      "step": 616470
    },
    {
      "epoch": 2.160608705099763,
      "grad_norm": 2.515625,
      "learning_rate": 1.5544366301393334e-05,
      "loss": 0.7161,
      "step": 616480
    },
    {
      "epoch": 2.160643752606658,
      "grad_norm": 2.953125,
      "learning_rate": 1.554371727272963e-05,
      "loss": 0.8061,
      "step": 616490
    },
    {
      "epoch": 2.160678800113554,
      "grad_norm": 2.921875,
      "learning_rate": 1.554306824406593e-05,
      "loss": 0.7802,
      "step": 616500
    },
    {
      "epoch": 2.1607138476204497,
      "grad_norm": 2.859375,
      "learning_rate": 1.5542419215402228e-05,
      "loss": 0.7657,
      "step": 616510
    },
    {
      "epoch": 2.160748895127345,
      "grad_norm": 2.6875,
      "learning_rate": 1.554177018673853e-05,
      "loss": 0.8228,
      "step": 616520
    },
    {
      "epoch": 2.160783942634241,
      "grad_norm": 3.0,
      "learning_rate": 1.5541121158074827e-05,
      "loss": 0.7835,
      "step": 616530
    },
    {
      "epoch": 2.160818990141136,
      "grad_norm": 2.640625,
      "learning_rate": 1.5540472129411125e-05,
      "loss": 0.8163,
      "step": 616540
    },
    {
      "epoch": 2.160854037648032,
      "grad_norm": 2.78125,
      "learning_rate": 1.5539823100747423e-05,
      "loss": 0.7703,
      "step": 616550
    },
    {
      "epoch": 2.1608890851549276,
      "grad_norm": 2.65625,
      "learning_rate": 1.553917407208372e-05,
      "loss": 0.8428,
      "step": 616560
    },
    {
      "epoch": 2.160924132661823,
      "grad_norm": 3.109375,
      "learning_rate": 1.553852504342002e-05,
      "loss": 0.8763,
      "step": 616570
    },
    {
      "epoch": 2.1609591801687187,
      "grad_norm": 3.03125,
      "learning_rate": 1.5537876014756317e-05,
      "loss": 0.8131,
      "step": 616580
    },
    {
      "epoch": 2.1609942276756144,
      "grad_norm": 2.765625,
      "learning_rate": 1.5537226986092615e-05,
      "loss": 0.8305,
      "step": 616590
    },
    {
      "epoch": 2.1610292751825098,
      "grad_norm": 2.375,
      "learning_rate": 1.5536577957428913e-05,
      "loss": 0.7929,
      "step": 616600
    },
    {
      "epoch": 2.1610643226894055,
      "grad_norm": 3.125,
      "learning_rate": 1.553592892876521e-05,
      "loss": 0.8256,
      "step": 616610
    },
    {
      "epoch": 2.1610993701963013,
      "grad_norm": 2.78125,
      "learning_rate": 1.553527990010151e-05,
      "loss": 0.8267,
      "step": 616620
    },
    {
      "epoch": 2.1611344177031966,
      "grad_norm": 3.046875,
      "learning_rate": 1.5534630871437807e-05,
      "loss": 0.8534,
      "step": 616630
    },
    {
      "epoch": 2.1611694652100923,
      "grad_norm": 2.828125,
      "learning_rate": 1.5533981842774105e-05,
      "loss": 0.7641,
      "step": 616640
    },
    {
      "epoch": 2.1612045127169877,
      "grad_norm": 2.78125,
      "learning_rate": 1.5533332814110403e-05,
      "loss": 0.7621,
      "step": 616650
    },
    {
      "epoch": 2.1612395602238834,
      "grad_norm": 2.5,
      "learning_rate": 1.55326837854467e-05,
      "loss": 0.7678,
      "step": 616660
    },
    {
      "epoch": 2.161274607730779,
      "grad_norm": 3.265625,
      "learning_rate": 1.5532034756783e-05,
      "loss": 0.8207,
      "step": 616670
    },
    {
      "epoch": 2.1613096552376745,
      "grad_norm": 3.15625,
      "learning_rate": 1.5531385728119297e-05,
      "loss": 0.8703,
      "step": 616680
    },
    {
      "epoch": 2.1613447027445702,
      "grad_norm": 2.765625,
      "learning_rate": 1.5530736699455595e-05,
      "loss": 0.7946,
      "step": 616690
    },
    {
      "epoch": 2.161379750251466,
      "grad_norm": 3.265625,
      "learning_rate": 1.5530087670791893e-05,
      "loss": 0.8198,
      "step": 616700
    },
    {
      "epoch": 2.1614147977583613,
      "grad_norm": 2.53125,
      "learning_rate": 1.552943864212819e-05,
      "loss": 0.8611,
      "step": 616710
    },
    {
      "epoch": 2.161449845265257,
      "grad_norm": 2.59375,
      "learning_rate": 1.552878961346449e-05,
      "loss": 0.8665,
      "step": 616720
    },
    {
      "epoch": 2.161484892772153,
      "grad_norm": 2.734375,
      "learning_rate": 1.5528140584800787e-05,
      "loss": 0.8203,
      "step": 616730
    },
    {
      "epoch": 2.161519940279048,
      "grad_norm": 2.953125,
      "learning_rate": 1.5527491556137085e-05,
      "loss": 0.8019,
      "step": 616740
    },
    {
      "epoch": 2.161554987785944,
      "grad_norm": 2.828125,
      "learning_rate": 1.5526842527473383e-05,
      "loss": 0.8173,
      "step": 616750
    },
    {
      "epoch": 2.161590035292839,
      "grad_norm": 2.484375,
      "learning_rate": 1.552619349880968e-05,
      "loss": 0.7614,
      "step": 616760
    },
    {
      "epoch": 2.161625082799735,
      "grad_norm": 2.875,
      "learning_rate": 1.5525544470145982e-05,
      "loss": 0.7823,
      "step": 616770
    },
    {
      "epoch": 2.1616601303066307,
      "grad_norm": 3.0,
      "learning_rate": 1.552489544148228e-05,
      "loss": 0.8087,
      "step": 616780
    },
    {
      "epoch": 2.161695177813526,
      "grad_norm": 2.75,
      "learning_rate": 1.552424641281858e-05,
      "loss": 0.8109,
      "step": 616790
    },
    {
      "epoch": 2.161730225320422,
      "grad_norm": 2.46875,
      "learning_rate": 1.5523597384154876e-05,
      "loss": 0.7296,
      "step": 616800
    },
    {
      "epoch": 2.1617652728273176,
      "grad_norm": 3.046875,
      "learning_rate": 1.5522948355491174e-05,
      "loss": 0.8236,
      "step": 616810
    },
    {
      "epoch": 2.161800320334213,
      "grad_norm": 2.6875,
      "learning_rate": 1.5522299326827472e-05,
      "loss": 0.8004,
      "step": 616820
    },
    {
      "epoch": 2.1618353678411086,
      "grad_norm": 2.84375,
      "learning_rate": 1.552165029816377e-05,
      "loss": 0.7807,
      "step": 616830
    },
    {
      "epoch": 2.1618704153480044,
      "grad_norm": 2.8125,
      "learning_rate": 1.5521001269500065e-05,
      "loss": 0.8322,
      "step": 616840
    },
    {
      "epoch": 2.1619054628548997,
      "grad_norm": 3.4375,
      "learning_rate": 1.5520352240836363e-05,
      "loss": 0.8951,
      "step": 616850
    },
    {
      "epoch": 2.1619405103617955,
      "grad_norm": 2.75,
      "learning_rate": 1.551970321217266e-05,
      "loss": 0.7224,
      "step": 616860
    },
    {
      "epoch": 2.161975557868691,
      "grad_norm": 2.671875,
      "learning_rate": 1.551905418350896e-05,
      "loss": 0.7752,
      "step": 616870
    },
    {
      "epoch": 2.1620106053755865,
      "grad_norm": 2.984375,
      "learning_rate": 1.5518405154845257e-05,
      "loss": 0.7974,
      "step": 616880
    },
    {
      "epoch": 2.1620456528824823,
      "grad_norm": 2.5625,
      "learning_rate": 1.551775612618156e-05,
      "loss": 0.762,
      "step": 616890
    },
    {
      "epoch": 2.1620807003893776,
      "grad_norm": 2.4375,
      "learning_rate": 1.5517107097517856e-05,
      "loss": 0.7601,
      "step": 616900
    },
    {
      "epoch": 2.1621157478962734,
      "grad_norm": 2.703125,
      "learning_rate": 1.5516458068854154e-05,
      "loss": 0.7767,
      "step": 616910
    },
    {
      "epoch": 2.162150795403169,
      "grad_norm": 3.046875,
      "learning_rate": 1.5515809040190452e-05,
      "loss": 0.8137,
      "step": 616920
    },
    {
      "epoch": 2.1621858429100644,
      "grad_norm": 2.4375,
      "learning_rate": 1.551516001152675e-05,
      "loss": 0.7732,
      "step": 616930
    },
    {
      "epoch": 2.16222089041696,
      "grad_norm": 2.765625,
      "learning_rate": 1.551451098286305e-05,
      "loss": 0.7466,
      "step": 616940
    },
    {
      "epoch": 2.162255937923856,
      "grad_norm": 2.859375,
      "learning_rate": 1.5513861954199346e-05,
      "loss": 0.7207,
      "step": 616950
    },
    {
      "epoch": 2.1622909854307513,
      "grad_norm": 2.265625,
      "learning_rate": 1.5513212925535644e-05,
      "loss": 0.7882,
      "step": 616960
    },
    {
      "epoch": 2.162326032937647,
      "grad_norm": 3.40625,
      "learning_rate": 1.5512563896871942e-05,
      "loss": 0.7953,
      "step": 616970
    },
    {
      "epoch": 2.162361080444543,
      "grad_norm": 2.828125,
      "learning_rate": 1.551191486820824e-05,
      "loss": 0.8542,
      "step": 616980
    },
    {
      "epoch": 2.162396127951438,
      "grad_norm": 3.03125,
      "learning_rate": 1.551126583954454e-05,
      "loss": 0.8593,
      "step": 616990
    },
    {
      "epoch": 2.162431175458334,
      "grad_norm": 2.90625,
      "learning_rate": 1.5510616810880836e-05,
      "loss": 0.7468,
      "step": 617000
    },
    {
      "epoch": 2.162466222965229,
      "grad_norm": 3.078125,
      "learning_rate": 1.5509967782217134e-05,
      "loss": 0.8157,
      "step": 617010
    },
    {
      "epoch": 2.162501270472125,
      "grad_norm": 2.796875,
      "learning_rate": 1.5509318753553432e-05,
      "loss": 0.8277,
      "step": 617020
    },
    {
      "epoch": 2.1625363179790207,
      "grad_norm": 2.671875,
      "learning_rate": 1.550866972488973e-05,
      "loss": 0.7954,
      "step": 617030
    },
    {
      "epoch": 2.162571365485916,
      "grad_norm": 3.0,
      "learning_rate": 1.550802069622603e-05,
      "loss": 0.8494,
      "step": 617040
    },
    {
      "epoch": 2.1626064129928118,
      "grad_norm": 3.078125,
      "learning_rate": 1.5507371667562326e-05,
      "loss": 0.7834,
      "step": 617050
    },
    {
      "epoch": 2.1626414604997075,
      "grad_norm": 3.390625,
      "learning_rate": 1.5506722638898624e-05,
      "loss": 0.813,
      "step": 617060
    },
    {
      "epoch": 2.162676508006603,
      "grad_norm": 2.53125,
      "learning_rate": 1.5506073610234922e-05,
      "loss": 0.8422,
      "step": 617070
    },
    {
      "epoch": 2.1627115555134986,
      "grad_norm": 2.921875,
      "learning_rate": 1.550542458157122e-05,
      "loss": 0.8481,
      "step": 617080
    },
    {
      "epoch": 2.1627466030203943,
      "grad_norm": 3.265625,
      "learning_rate": 1.550477555290752e-05,
      "loss": 0.7962,
      "step": 617090
    },
    {
      "epoch": 2.1627816505272897,
      "grad_norm": 2.828125,
      "learning_rate": 1.5504126524243816e-05,
      "loss": 0.8885,
      "step": 617100
    },
    {
      "epoch": 2.1628166980341854,
      "grad_norm": 3.046875,
      "learning_rate": 1.5503477495580114e-05,
      "loss": 0.8254,
      "step": 617110
    },
    {
      "epoch": 2.1628517455410807,
      "grad_norm": 3.328125,
      "learning_rate": 1.5502828466916412e-05,
      "loss": 0.8408,
      "step": 617120
    },
    {
      "epoch": 2.1628867930479765,
      "grad_norm": 2.921875,
      "learning_rate": 1.550217943825271e-05,
      "loss": 0.8252,
      "step": 617130
    },
    {
      "epoch": 2.1629218405548722,
      "grad_norm": 2.75,
      "learning_rate": 1.5501530409589012e-05,
      "loss": 0.8111,
      "step": 617140
    },
    {
      "epoch": 2.1629568880617676,
      "grad_norm": 2.953125,
      "learning_rate": 1.550088138092531e-05,
      "loss": 0.7778,
      "step": 617150
    },
    {
      "epoch": 2.1629919355686633,
      "grad_norm": 2.53125,
      "learning_rate": 1.5500232352261608e-05,
      "loss": 0.859,
      "step": 617160
    },
    {
      "epoch": 2.163026983075559,
      "grad_norm": 2.6875,
      "learning_rate": 1.5499583323597906e-05,
      "loss": 0.7664,
      "step": 617170
    },
    {
      "epoch": 2.1630620305824544,
      "grad_norm": 2.953125,
      "learning_rate": 1.5498934294934204e-05,
      "loss": 0.7836,
      "step": 617180
    },
    {
      "epoch": 2.16309707808935,
      "grad_norm": 2.859375,
      "learning_rate": 1.5498285266270502e-05,
      "loss": 0.7394,
      "step": 617190
    },
    {
      "epoch": 2.163132125596246,
      "grad_norm": 3.359375,
      "learning_rate": 1.54976362376068e-05,
      "loss": 0.8778,
      "step": 617200
    },
    {
      "epoch": 2.163167173103141,
      "grad_norm": 3.09375,
      "learning_rate": 1.5496987208943098e-05,
      "loss": 0.8158,
      "step": 617210
    },
    {
      "epoch": 2.163202220610037,
      "grad_norm": 2.671875,
      "learning_rate": 1.5496338180279392e-05,
      "loss": 0.7896,
      "step": 617220
    },
    {
      "epoch": 2.1632372681169327,
      "grad_norm": 3.8125,
      "learning_rate": 1.549568915161569e-05,
      "loss": 0.7869,
      "step": 617230
    },
    {
      "epoch": 2.163272315623828,
      "grad_norm": 2.453125,
      "learning_rate": 1.549504012295199e-05,
      "loss": 0.7328,
      "step": 617240
    },
    {
      "epoch": 2.163307363130724,
      "grad_norm": 3.171875,
      "learning_rate": 1.549439109428829e-05,
      "loss": 0.8575,
      "step": 617250
    },
    {
      "epoch": 2.163342410637619,
      "grad_norm": 2.8125,
      "learning_rate": 1.5493742065624588e-05,
      "loss": 0.7712,
      "step": 617260
    },
    {
      "epoch": 2.163377458144515,
      "grad_norm": 3.25,
      "learning_rate": 1.5493093036960886e-05,
      "loss": 0.7957,
      "step": 617270
    },
    {
      "epoch": 2.1634125056514106,
      "grad_norm": 2.9375,
      "learning_rate": 1.5492444008297184e-05,
      "loss": 0.794,
      "step": 617280
    },
    {
      "epoch": 2.163447553158306,
      "grad_norm": 2.8125,
      "learning_rate": 1.5491794979633482e-05,
      "loss": 0.8249,
      "step": 617290
    },
    {
      "epoch": 2.1634826006652017,
      "grad_norm": 2.65625,
      "learning_rate": 1.549114595096978e-05,
      "loss": 0.7677,
      "step": 617300
    },
    {
      "epoch": 2.1635176481720975,
      "grad_norm": 2.828125,
      "learning_rate": 1.5490496922306078e-05,
      "loss": 0.8229,
      "step": 617310
    },
    {
      "epoch": 2.1635526956789928,
      "grad_norm": 3.140625,
      "learning_rate": 1.5489847893642376e-05,
      "loss": 0.812,
      "step": 617320
    },
    {
      "epoch": 2.1635877431858885,
      "grad_norm": 2.765625,
      "learning_rate": 1.5489198864978674e-05,
      "loss": 0.8296,
      "step": 617330
    },
    {
      "epoch": 2.1636227906927843,
      "grad_norm": 2.875,
      "learning_rate": 1.5488549836314972e-05,
      "loss": 0.8126,
      "step": 617340
    },
    {
      "epoch": 2.1636578381996796,
      "grad_norm": 3.078125,
      "learning_rate": 1.548790080765127e-05,
      "loss": 0.835,
      "step": 617350
    },
    {
      "epoch": 2.1636928857065754,
      "grad_norm": 2.609375,
      "learning_rate": 1.5487251778987568e-05,
      "loss": 0.7523,
      "step": 617360
    },
    {
      "epoch": 2.1637279332134707,
      "grad_norm": 2.9375,
      "learning_rate": 1.5486602750323866e-05,
      "loss": 0.8125,
      "step": 617370
    },
    {
      "epoch": 2.1637629807203664,
      "grad_norm": 2.71875,
      "learning_rate": 1.5485953721660164e-05,
      "loss": 0.7601,
      "step": 617380
    },
    {
      "epoch": 2.163798028227262,
      "grad_norm": 3.171875,
      "learning_rate": 1.5485304692996465e-05,
      "loss": 0.7954,
      "step": 617390
    },
    {
      "epoch": 2.1638330757341575,
      "grad_norm": 2.90625,
      "learning_rate": 1.5484655664332763e-05,
      "loss": 0.7902,
      "step": 617400
    },
    {
      "epoch": 2.1638681232410533,
      "grad_norm": 2.6875,
      "learning_rate": 1.5484006635669058e-05,
      "loss": 0.804,
      "step": 617410
    },
    {
      "epoch": 2.163903170747949,
      "grad_norm": 2.4375,
      "learning_rate": 1.5483357607005356e-05,
      "loss": 0.8384,
      "step": 617420
    },
    {
      "epoch": 2.1639382182548443,
      "grad_norm": 3.0625,
      "learning_rate": 1.5482708578341654e-05,
      "loss": 0.8232,
      "step": 617430
    },
    {
      "epoch": 2.16397326576174,
      "grad_norm": 3.5,
      "learning_rate": 1.5482059549677952e-05,
      "loss": 0.8073,
      "step": 617440
    },
    {
      "epoch": 2.164008313268636,
      "grad_norm": 3.109375,
      "learning_rate": 1.548141052101425e-05,
      "loss": 0.7971,
      "step": 617450
    },
    {
      "epoch": 2.164043360775531,
      "grad_norm": 2.34375,
      "learning_rate": 1.5480761492350548e-05,
      "loss": 0.778,
      "step": 617460
    },
    {
      "epoch": 2.164078408282427,
      "grad_norm": 2.8125,
      "learning_rate": 1.5480112463686846e-05,
      "loss": 0.8223,
      "step": 617470
    },
    {
      "epoch": 2.1641134557893222,
      "grad_norm": 3.109375,
      "learning_rate": 1.5479463435023144e-05,
      "loss": 0.7448,
      "step": 617480
    },
    {
      "epoch": 2.164148503296218,
      "grad_norm": 2.875,
      "learning_rate": 1.5478814406359442e-05,
      "loss": 0.7583,
      "step": 617490
    },
    {
      "epoch": 2.1641835508031138,
      "grad_norm": 3.09375,
      "learning_rate": 1.547816537769574e-05,
      "loss": 0.9495,
      "step": 617500
    },
    {
      "epoch": 2.164218598310009,
      "grad_norm": 3.109375,
      "learning_rate": 1.547751634903204e-05,
      "loss": 0.8136,
      "step": 617510
    },
    {
      "epoch": 2.164253645816905,
      "grad_norm": 3.484375,
      "learning_rate": 1.547686732036834e-05,
      "loss": 0.8251,
      "step": 617520
    },
    {
      "epoch": 2.1642886933238006,
      "grad_norm": 3.359375,
      "learning_rate": 1.5476218291704637e-05,
      "loss": 0.8997,
      "step": 617530
    },
    {
      "epoch": 2.164323740830696,
      "grad_norm": 2.703125,
      "learning_rate": 1.5475569263040935e-05,
      "loss": 0.7758,
      "step": 617540
    },
    {
      "epoch": 2.1643587883375917,
      "grad_norm": 3.09375,
      "learning_rate": 1.5474920234377233e-05,
      "loss": 0.8536,
      "step": 617550
    },
    {
      "epoch": 2.1643938358444874,
      "grad_norm": 3.0,
      "learning_rate": 1.547427120571353e-05,
      "loss": 0.763,
      "step": 617560
    },
    {
      "epoch": 2.1644288833513827,
      "grad_norm": 2.84375,
      "learning_rate": 1.547362217704983e-05,
      "loss": 0.7393,
      "step": 617570
    },
    {
      "epoch": 2.1644639308582785,
      "grad_norm": 2.875,
      "learning_rate": 1.5472973148386127e-05,
      "loss": 0.8163,
      "step": 617580
    },
    {
      "epoch": 2.164498978365174,
      "grad_norm": 2.90625,
      "learning_rate": 1.5472324119722422e-05,
      "loss": 0.8802,
      "step": 617590
    },
    {
      "epoch": 2.1645340258720696,
      "grad_norm": 2.359375,
      "learning_rate": 1.547167509105872e-05,
      "loss": 0.7996,
      "step": 617600
    },
    {
      "epoch": 2.1645690733789653,
      "grad_norm": 3.40625,
      "learning_rate": 1.5471026062395018e-05,
      "loss": 0.7932,
      "step": 617610
    },
    {
      "epoch": 2.1646041208858606,
      "grad_norm": 3.234375,
      "learning_rate": 1.547037703373132e-05,
      "loss": 0.8633,
      "step": 617620
    },
    {
      "epoch": 2.1646391683927564,
      "grad_norm": 3.328125,
      "learning_rate": 1.5469728005067617e-05,
      "loss": 0.9402,
      "step": 617630
    },
    {
      "epoch": 2.164674215899652,
      "grad_norm": 2.625,
      "learning_rate": 1.5469078976403915e-05,
      "loss": 0.7807,
      "step": 617640
    },
    {
      "epoch": 2.1647092634065475,
      "grad_norm": 3.421875,
      "learning_rate": 1.5468429947740213e-05,
      "loss": 0.8591,
      "step": 617650
    },
    {
      "epoch": 2.164744310913443,
      "grad_norm": 2.640625,
      "learning_rate": 1.546778091907651e-05,
      "loss": 0.8052,
      "step": 617660
    },
    {
      "epoch": 2.164779358420339,
      "grad_norm": 3.015625,
      "learning_rate": 1.546713189041281e-05,
      "loss": 0.721,
      "step": 617670
    },
    {
      "epoch": 2.1648144059272343,
      "grad_norm": 2.859375,
      "learning_rate": 1.5466482861749107e-05,
      "loss": 0.8191,
      "step": 617680
    },
    {
      "epoch": 2.16484945343413,
      "grad_norm": 2.796875,
      "learning_rate": 1.5465833833085405e-05,
      "loss": 0.764,
      "step": 617690
    },
    {
      "epoch": 2.1648845009410254,
      "grad_norm": 3.328125,
      "learning_rate": 1.5465184804421703e-05,
      "loss": 0.8075,
      "step": 617700
    },
    {
      "epoch": 2.164919548447921,
      "grad_norm": 2.78125,
      "learning_rate": 1.5464535775758e-05,
      "loss": 0.8637,
      "step": 617710
    },
    {
      "epoch": 2.164954595954817,
      "grad_norm": 3.15625,
      "learning_rate": 1.54638867470943e-05,
      "loss": 0.9062,
      "step": 617720
    },
    {
      "epoch": 2.164989643461712,
      "grad_norm": 2.65625,
      "learning_rate": 1.5463237718430597e-05,
      "loss": 0.7564,
      "step": 617730
    },
    {
      "epoch": 2.165024690968608,
      "grad_norm": 2.734375,
      "learning_rate": 1.5462588689766895e-05,
      "loss": 0.7882,
      "step": 617740
    },
    {
      "epoch": 2.1650597384755037,
      "grad_norm": 2.703125,
      "learning_rate": 1.5461939661103193e-05,
      "loss": 0.8215,
      "step": 617750
    },
    {
      "epoch": 2.165094785982399,
      "grad_norm": 2.84375,
      "learning_rate": 1.5461290632439494e-05,
      "loss": 0.8287,
      "step": 617760
    },
    {
      "epoch": 2.1651298334892948,
      "grad_norm": 3.3125,
      "learning_rate": 1.5460641603775792e-05,
      "loss": 0.9144,
      "step": 617770
    },
    {
      "epoch": 2.1651648809961905,
      "grad_norm": 3.03125,
      "learning_rate": 1.5459992575112087e-05,
      "loss": 0.863,
      "step": 617780
    },
    {
      "epoch": 2.165199928503086,
      "grad_norm": 2.859375,
      "learning_rate": 1.5459343546448385e-05,
      "loss": 0.8589,
      "step": 617790
    },
    {
      "epoch": 2.1652349760099816,
      "grad_norm": 2.90625,
      "learning_rate": 1.5458694517784683e-05,
      "loss": 0.7838,
      "step": 617800
    },
    {
      "epoch": 2.165270023516877,
      "grad_norm": 3.046875,
      "learning_rate": 1.545804548912098e-05,
      "loss": 0.8393,
      "step": 617810
    },
    {
      "epoch": 2.1653050710237727,
      "grad_norm": 2.46875,
      "learning_rate": 1.545739646045728e-05,
      "loss": 0.7205,
      "step": 617820
    },
    {
      "epoch": 2.1653401185306684,
      "grad_norm": 3.0,
      "learning_rate": 1.5456747431793577e-05,
      "loss": 0.8162,
      "step": 617830
    },
    {
      "epoch": 2.1653751660375637,
      "grad_norm": 2.875,
      "learning_rate": 1.5456098403129875e-05,
      "loss": 0.8478,
      "step": 617840
    },
    {
      "epoch": 2.1654102135444595,
      "grad_norm": 2.875,
      "learning_rate": 1.5455449374466173e-05,
      "loss": 0.8032,
      "step": 617850
    },
    {
      "epoch": 2.1654452610513553,
      "grad_norm": 3.09375,
      "learning_rate": 1.545480034580247e-05,
      "loss": 0.8759,
      "step": 617860
    },
    {
      "epoch": 2.1654803085582506,
      "grad_norm": 2.984375,
      "learning_rate": 1.5454151317138772e-05,
      "loss": 0.8007,
      "step": 617870
    },
    {
      "epoch": 2.1655153560651463,
      "grad_norm": 3.125,
      "learning_rate": 1.545350228847507e-05,
      "loss": 0.8025,
      "step": 617880
    },
    {
      "epoch": 2.165550403572042,
      "grad_norm": 3.015625,
      "learning_rate": 1.545285325981137e-05,
      "loss": 0.8253,
      "step": 617890
    },
    {
      "epoch": 2.1655854510789374,
      "grad_norm": 3.109375,
      "learning_rate": 1.5452204231147666e-05,
      "loss": 0.7861,
      "step": 617900
    },
    {
      "epoch": 2.165620498585833,
      "grad_norm": 2.875,
      "learning_rate": 1.5451555202483964e-05,
      "loss": 0.7964,
      "step": 617910
    },
    {
      "epoch": 2.1656555460927285,
      "grad_norm": 2.640625,
      "learning_rate": 1.5450906173820262e-05,
      "loss": 0.7635,
      "step": 617920
    },
    {
      "epoch": 2.1656905935996242,
      "grad_norm": 3.234375,
      "learning_rate": 1.545025714515656e-05,
      "loss": 0.8628,
      "step": 617930
    },
    {
      "epoch": 2.16572564110652,
      "grad_norm": 2.53125,
      "learning_rate": 1.544960811649286e-05,
      "loss": 0.7809,
      "step": 617940
    },
    {
      "epoch": 2.1657606886134153,
      "grad_norm": 2.40625,
      "learning_rate": 1.5448959087829156e-05,
      "loss": 0.8162,
      "step": 617950
    },
    {
      "epoch": 2.165795736120311,
      "grad_norm": 2.734375,
      "learning_rate": 1.5448310059165454e-05,
      "loss": 0.8569,
      "step": 617960
    },
    {
      "epoch": 2.165830783627207,
      "grad_norm": 3.1875,
      "learning_rate": 1.544766103050175e-05,
      "loss": 0.7972,
      "step": 617970
    },
    {
      "epoch": 2.165865831134102,
      "grad_norm": 2.484375,
      "learning_rate": 1.5447012001838047e-05,
      "loss": 0.8903,
      "step": 617980
    },
    {
      "epoch": 2.165900878640998,
      "grad_norm": 2.921875,
      "learning_rate": 1.544636297317435e-05,
      "loss": 0.8212,
      "step": 617990
    },
    {
      "epoch": 2.1659359261478937,
      "grad_norm": 2.796875,
      "learning_rate": 1.5445713944510646e-05,
      "loss": 0.7559,
      "step": 618000
    },
    {
      "epoch": 2.165970973654789,
      "grad_norm": 2.421875,
      "learning_rate": 1.5445064915846944e-05,
      "loss": 0.7893,
      "step": 618010
    },
    {
      "epoch": 2.1660060211616847,
      "grad_norm": 2.9375,
      "learning_rate": 1.5444415887183242e-05,
      "loss": 0.8443,
      "step": 618020
    },
    {
      "epoch": 2.16604106866858,
      "grad_norm": 3.3125,
      "learning_rate": 1.544376685851954e-05,
      "loss": 0.794,
      "step": 618030
    },
    {
      "epoch": 2.166076116175476,
      "grad_norm": 2.875,
      "learning_rate": 1.544311782985584e-05,
      "loss": 0.7775,
      "step": 618040
    },
    {
      "epoch": 2.1661111636823716,
      "grad_norm": 2.765625,
      "learning_rate": 1.5442468801192136e-05,
      "loss": 0.9443,
      "step": 618050
    },
    {
      "epoch": 2.166146211189267,
      "grad_norm": 2.953125,
      "learning_rate": 1.5441819772528434e-05,
      "loss": 0.8457,
      "step": 618060
    },
    {
      "epoch": 2.1661812586961626,
      "grad_norm": 3.390625,
      "learning_rate": 1.5441170743864732e-05,
      "loss": 0.8034,
      "step": 618070
    },
    {
      "epoch": 2.1662163062030584,
      "grad_norm": 3.03125,
      "learning_rate": 1.544052171520103e-05,
      "loss": 0.7797,
      "step": 618080
    },
    {
      "epoch": 2.1662513537099537,
      "grad_norm": 3.21875,
      "learning_rate": 1.543987268653733e-05,
      "loss": 0.8024,
      "step": 618090
    },
    {
      "epoch": 2.1662864012168495,
      "grad_norm": 2.734375,
      "learning_rate": 1.5439223657873626e-05,
      "loss": 0.7767,
      "step": 618100
    },
    {
      "epoch": 2.166321448723745,
      "grad_norm": 3.140625,
      "learning_rate": 1.5438574629209924e-05,
      "loss": 0.8215,
      "step": 618110
    },
    {
      "epoch": 2.1663564962306405,
      "grad_norm": 3.28125,
      "learning_rate": 1.5437925600546222e-05,
      "loss": 0.7216,
      "step": 618120
    },
    {
      "epoch": 2.1663915437375363,
      "grad_norm": 2.75,
      "learning_rate": 1.5437276571882524e-05,
      "loss": 0.8499,
      "step": 618130
    },
    {
      "epoch": 2.1664265912444316,
      "grad_norm": 2.890625,
      "learning_rate": 1.5436627543218822e-05,
      "loss": 0.839,
      "step": 618140
    },
    {
      "epoch": 2.1664616387513274,
      "grad_norm": 3.046875,
      "learning_rate": 1.543597851455512e-05,
      "loss": 0.7802,
      "step": 618150
    },
    {
      "epoch": 2.166496686258223,
      "grad_norm": 2.65625,
      "learning_rate": 1.5435329485891414e-05,
      "loss": 0.8918,
      "step": 618160
    },
    {
      "epoch": 2.1665317337651184,
      "grad_norm": 3.03125,
      "learning_rate": 1.5434680457227712e-05,
      "loss": 0.8366,
      "step": 618170
    },
    {
      "epoch": 2.166566781272014,
      "grad_norm": 2.796875,
      "learning_rate": 1.543403142856401e-05,
      "loss": 0.8253,
      "step": 618180
    },
    {
      "epoch": 2.16660182877891,
      "grad_norm": 3.1875,
      "learning_rate": 1.543338239990031e-05,
      "loss": 0.8159,
      "step": 618190
    },
    {
      "epoch": 2.1666368762858053,
      "grad_norm": 3.390625,
      "learning_rate": 1.5432733371236606e-05,
      "loss": 0.8464,
      "step": 618200
    },
    {
      "epoch": 2.166671923792701,
      "grad_norm": 3.25,
      "learning_rate": 1.5432084342572904e-05,
      "loss": 0.7687,
      "step": 618210
    },
    {
      "epoch": 2.1667069712995968,
      "grad_norm": 2.8125,
      "learning_rate": 1.5431435313909202e-05,
      "loss": 0.7994,
      "step": 618220
    },
    {
      "epoch": 2.166742018806492,
      "grad_norm": 2.9375,
      "learning_rate": 1.54307862852455e-05,
      "loss": 0.7769,
      "step": 618230
    },
    {
      "epoch": 2.166777066313388,
      "grad_norm": 3.03125,
      "learning_rate": 1.5430137256581802e-05,
      "loss": 0.7898,
      "step": 618240
    },
    {
      "epoch": 2.166812113820283,
      "grad_norm": 2.390625,
      "learning_rate": 1.54294882279181e-05,
      "loss": 0.8477,
      "step": 618250
    },
    {
      "epoch": 2.166847161327179,
      "grad_norm": 3.078125,
      "learning_rate": 1.5428839199254398e-05,
      "loss": 0.9024,
      "step": 618260
    },
    {
      "epoch": 2.1668822088340747,
      "grad_norm": 2.578125,
      "learning_rate": 1.5428190170590696e-05,
      "loss": 0.8183,
      "step": 618270
    },
    {
      "epoch": 2.16691725634097,
      "grad_norm": 2.71875,
      "learning_rate": 1.5427541141926994e-05,
      "loss": 0.7197,
      "step": 618280
    },
    {
      "epoch": 2.1669523038478657,
      "grad_norm": 3.140625,
      "learning_rate": 1.5426892113263292e-05,
      "loss": 0.9353,
      "step": 618290
    },
    {
      "epoch": 2.1669873513547615,
      "grad_norm": 3.40625,
      "learning_rate": 1.542624308459959e-05,
      "loss": 0.8015,
      "step": 618300
    },
    {
      "epoch": 2.167022398861657,
      "grad_norm": 2.609375,
      "learning_rate": 1.5425594055935888e-05,
      "loss": 0.8533,
      "step": 618310
    },
    {
      "epoch": 2.1670574463685526,
      "grad_norm": 3.0,
      "learning_rate": 1.5424945027272186e-05,
      "loss": 0.8341,
      "step": 618320
    },
    {
      "epoch": 2.1670924938754483,
      "grad_norm": 2.6875,
      "learning_rate": 1.5424295998608484e-05,
      "loss": 0.7933,
      "step": 618330
    },
    {
      "epoch": 2.1671275413823436,
      "grad_norm": 3.125,
      "learning_rate": 1.5423646969944782e-05,
      "loss": 0.8419,
      "step": 618340
    },
    {
      "epoch": 2.1671625888892394,
      "grad_norm": 2.515625,
      "learning_rate": 1.542299794128108e-05,
      "loss": 0.8617,
      "step": 618350
    },
    {
      "epoch": 2.167197636396135,
      "grad_norm": 2.484375,
      "learning_rate": 1.5422348912617378e-05,
      "loss": 0.7999,
      "step": 618360
    },
    {
      "epoch": 2.1672326839030305,
      "grad_norm": 2.5,
      "learning_rate": 1.5421699883953676e-05,
      "loss": 0.7825,
      "step": 618370
    },
    {
      "epoch": 2.1672677314099262,
      "grad_norm": 2.9375,
      "learning_rate": 1.5421050855289974e-05,
      "loss": 0.7568,
      "step": 618380
    },
    {
      "epoch": 2.1673027789168215,
      "grad_norm": 3.1875,
      "learning_rate": 1.5420401826626272e-05,
      "loss": 0.8512,
      "step": 618390
    },
    {
      "epoch": 2.1673378264237173,
      "grad_norm": 2.71875,
      "learning_rate": 1.541975279796257e-05,
      "loss": 0.8372,
      "step": 618400
    },
    {
      "epoch": 2.167372873930613,
      "grad_norm": 2.75,
      "learning_rate": 1.5419103769298868e-05,
      "loss": 0.864,
      "step": 618410
    },
    {
      "epoch": 2.1674079214375084,
      "grad_norm": 3.34375,
      "learning_rate": 1.5418454740635166e-05,
      "loss": 0.7405,
      "step": 618420
    },
    {
      "epoch": 2.167442968944404,
      "grad_norm": 2.890625,
      "learning_rate": 1.5417805711971464e-05,
      "loss": 0.8512,
      "step": 618430
    },
    {
      "epoch": 2.1674780164513,
      "grad_norm": 3.09375,
      "learning_rate": 1.5417156683307762e-05,
      "loss": 0.7547,
      "step": 618440
    },
    {
      "epoch": 2.167513063958195,
      "grad_norm": 3.4375,
      "learning_rate": 1.541650765464406e-05,
      "loss": 0.9019,
      "step": 618450
    },
    {
      "epoch": 2.167548111465091,
      "grad_norm": 2.75,
      "learning_rate": 1.5415858625980358e-05,
      "loss": 0.8194,
      "step": 618460
    },
    {
      "epoch": 2.1675831589719867,
      "grad_norm": 2.578125,
      "learning_rate": 1.5415209597316656e-05,
      "loss": 0.8041,
      "step": 618470
    },
    {
      "epoch": 2.167618206478882,
      "grad_norm": 2.71875,
      "learning_rate": 1.5414560568652954e-05,
      "loss": 0.7423,
      "step": 618480
    },
    {
      "epoch": 2.167653253985778,
      "grad_norm": 3.15625,
      "learning_rate": 1.5413911539989255e-05,
      "loss": 0.8498,
      "step": 618490
    },
    {
      "epoch": 2.1676883014926736,
      "grad_norm": 2.875,
      "learning_rate": 1.5413262511325553e-05,
      "loss": 0.7973,
      "step": 618500
    },
    {
      "epoch": 2.167723348999569,
      "grad_norm": 2.984375,
      "learning_rate": 1.541261348266185e-05,
      "loss": 0.7987,
      "step": 618510
    },
    {
      "epoch": 2.1677583965064646,
      "grad_norm": 2.890625,
      "learning_rate": 1.541196445399815e-05,
      "loss": 0.8241,
      "step": 618520
    },
    {
      "epoch": 2.16779344401336,
      "grad_norm": 3.203125,
      "learning_rate": 1.5411315425334444e-05,
      "loss": 0.9269,
      "step": 618530
    },
    {
      "epoch": 2.1678284915202557,
      "grad_norm": 3.09375,
      "learning_rate": 1.5410666396670742e-05,
      "loss": 0.881,
      "step": 618540
    },
    {
      "epoch": 2.1678635390271515,
      "grad_norm": 2.671875,
      "learning_rate": 1.541001736800704e-05,
      "loss": 0.746,
      "step": 618550
    },
    {
      "epoch": 2.1678985865340468,
      "grad_norm": 3.078125,
      "learning_rate": 1.5409368339343338e-05,
      "loss": 0.8927,
      "step": 618560
    },
    {
      "epoch": 2.1679336340409425,
      "grad_norm": 2.359375,
      "learning_rate": 1.5408719310679636e-05,
      "loss": 0.7433,
      "step": 618570
    },
    {
      "epoch": 2.1679686815478383,
      "grad_norm": 3.0625,
      "learning_rate": 1.5408070282015934e-05,
      "loss": 0.8743,
      "step": 618580
    },
    {
      "epoch": 2.1680037290547336,
      "grad_norm": 3.046875,
      "learning_rate": 1.5407421253352232e-05,
      "loss": 0.8901,
      "step": 618590
    },
    {
      "epoch": 2.1680387765616294,
      "grad_norm": 2.671875,
      "learning_rate": 1.540677222468853e-05,
      "loss": 0.8449,
      "step": 618600
    },
    {
      "epoch": 2.168073824068525,
      "grad_norm": 2.625,
      "learning_rate": 1.540612319602483e-05,
      "loss": 0.8158,
      "step": 618610
    },
    {
      "epoch": 2.1681088715754204,
      "grad_norm": 3.0,
      "learning_rate": 1.540547416736113e-05,
      "loss": 0.7691,
      "step": 618620
    },
    {
      "epoch": 2.168143919082316,
      "grad_norm": 2.78125,
      "learning_rate": 1.5404825138697427e-05,
      "loss": 0.8423,
      "step": 618630
    },
    {
      "epoch": 2.1681789665892115,
      "grad_norm": 2.921875,
      "learning_rate": 1.5404176110033725e-05,
      "loss": 0.837,
      "step": 618640
    },
    {
      "epoch": 2.1682140140961073,
      "grad_norm": 2.65625,
      "learning_rate": 1.5403527081370023e-05,
      "loss": 0.7457,
      "step": 618650
    },
    {
      "epoch": 2.168249061603003,
      "grad_norm": 3.0,
      "learning_rate": 1.540287805270632e-05,
      "loss": 0.8155,
      "step": 618660
    },
    {
      "epoch": 2.1682841091098983,
      "grad_norm": 3.171875,
      "learning_rate": 1.540222902404262e-05,
      "loss": 0.8865,
      "step": 618670
    },
    {
      "epoch": 2.168319156616794,
      "grad_norm": 3.609375,
      "learning_rate": 1.5401579995378917e-05,
      "loss": 0.7997,
      "step": 618680
    },
    {
      "epoch": 2.16835420412369,
      "grad_norm": 2.8125,
      "learning_rate": 1.5400930966715215e-05,
      "loss": 0.8082,
      "step": 618690
    },
    {
      "epoch": 2.168389251630585,
      "grad_norm": 2.71875,
      "learning_rate": 1.5400281938051513e-05,
      "loss": 0.85,
      "step": 618700
    },
    {
      "epoch": 2.168424299137481,
      "grad_norm": 2.765625,
      "learning_rate": 1.539963290938781e-05,
      "loss": 0.7734,
      "step": 618710
    },
    {
      "epoch": 2.1684593466443767,
      "grad_norm": 2.890625,
      "learning_rate": 1.539898388072411e-05,
      "loss": 0.8775,
      "step": 618720
    },
    {
      "epoch": 2.168494394151272,
      "grad_norm": 3.28125,
      "learning_rate": 1.5398334852060407e-05,
      "loss": 0.8353,
      "step": 618730
    },
    {
      "epoch": 2.1685294416581677,
      "grad_norm": 3.25,
      "learning_rate": 1.5397685823396705e-05,
      "loss": 0.808,
      "step": 618740
    },
    {
      "epoch": 2.168564489165063,
      "grad_norm": 2.75,
      "learning_rate": 1.5397036794733003e-05,
      "loss": 0.775,
      "step": 618750
    },
    {
      "epoch": 2.168599536671959,
      "grad_norm": 3.015625,
      "learning_rate": 1.53963877660693e-05,
      "loss": 0.8672,
      "step": 618760
    },
    {
      "epoch": 2.1686345841788546,
      "grad_norm": 2.703125,
      "learning_rate": 1.53957387374056e-05,
      "loss": 0.822,
      "step": 618770
    },
    {
      "epoch": 2.16866963168575,
      "grad_norm": 3.03125,
      "learning_rate": 1.5395089708741897e-05,
      "loss": 0.7678,
      "step": 618780
    },
    {
      "epoch": 2.1687046791926456,
      "grad_norm": 2.921875,
      "learning_rate": 1.5394440680078195e-05,
      "loss": 0.8501,
      "step": 618790
    },
    {
      "epoch": 2.1687397266995414,
      "grad_norm": 3.015625,
      "learning_rate": 1.5393791651414493e-05,
      "loss": 0.8498,
      "step": 618800
    },
    {
      "epoch": 2.1687747742064367,
      "grad_norm": 2.90625,
      "learning_rate": 1.539314262275079e-05,
      "loss": 0.8421,
      "step": 618810
    },
    {
      "epoch": 2.1688098217133325,
      "grad_norm": 2.609375,
      "learning_rate": 1.539249359408709e-05,
      "loss": 0.83,
      "step": 618820
    },
    {
      "epoch": 2.1688448692202282,
      "grad_norm": 2.921875,
      "learning_rate": 1.5391844565423387e-05,
      "loss": 0.7821,
      "step": 618830
    },
    {
      "epoch": 2.1688799167271235,
      "grad_norm": 3.0625,
      "learning_rate": 1.5391195536759685e-05,
      "loss": 0.8015,
      "step": 618840
    },
    {
      "epoch": 2.1689149642340193,
      "grad_norm": 3.46875,
      "learning_rate": 1.5390546508095983e-05,
      "loss": 0.7793,
      "step": 618850
    },
    {
      "epoch": 2.1689500117409146,
      "grad_norm": 3.609375,
      "learning_rate": 1.5389897479432285e-05,
      "loss": 0.8611,
      "step": 618860
    },
    {
      "epoch": 2.1689850592478104,
      "grad_norm": 2.28125,
      "learning_rate": 1.5389248450768583e-05,
      "loss": 0.8477,
      "step": 618870
    },
    {
      "epoch": 2.169020106754706,
      "grad_norm": 3.0625,
      "learning_rate": 1.538859942210488e-05,
      "loss": 0.7785,
      "step": 618880
    },
    {
      "epoch": 2.1690551542616014,
      "grad_norm": 2.953125,
      "learning_rate": 1.538795039344118e-05,
      "loss": 0.834,
      "step": 618890
    },
    {
      "epoch": 2.169090201768497,
      "grad_norm": 3.484375,
      "learning_rate": 1.5387301364777477e-05,
      "loss": 0.8581,
      "step": 618900
    },
    {
      "epoch": 2.169125249275393,
      "grad_norm": 2.984375,
      "learning_rate": 1.538665233611377e-05,
      "loss": 0.8445,
      "step": 618910
    },
    {
      "epoch": 2.1691602967822883,
      "grad_norm": 3.046875,
      "learning_rate": 1.538600330745007e-05,
      "loss": 0.837,
      "step": 618920
    },
    {
      "epoch": 2.169195344289184,
      "grad_norm": 3.203125,
      "learning_rate": 1.5385354278786367e-05,
      "loss": 0.879,
      "step": 618930
    },
    {
      "epoch": 2.16923039179608,
      "grad_norm": 2.796875,
      "learning_rate": 1.5384705250122665e-05,
      "loss": 0.8505,
      "step": 618940
    },
    {
      "epoch": 2.169265439302975,
      "grad_norm": 3.671875,
      "learning_rate": 1.5384056221458963e-05,
      "loss": 0.8003,
      "step": 618950
    },
    {
      "epoch": 2.169300486809871,
      "grad_norm": 2.828125,
      "learning_rate": 1.538340719279526e-05,
      "loss": 0.8318,
      "step": 618960
    },
    {
      "epoch": 2.169335534316766,
      "grad_norm": 3.3125,
      "learning_rate": 1.5382758164131563e-05,
      "loss": 0.8499,
      "step": 618970
    },
    {
      "epoch": 2.169370581823662,
      "grad_norm": 2.96875,
      "learning_rate": 1.538210913546786e-05,
      "loss": 0.7478,
      "step": 618980
    },
    {
      "epoch": 2.1694056293305577,
      "grad_norm": 2.890625,
      "learning_rate": 1.538146010680416e-05,
      "loss": 0.8475,
      "step": 618990
    },
    {
      "epoch": 2.169440676837453,
      "grad_norm": 2.859375,
      "learning_rate": 1.5380811078140457e-05,
      "loss": 0.8066,
      "step": 619000
    },
    {
      "epoch": 2.1694757243443488,
      "grad_norm": 2.703125,
      "learning_rate": 1.5380162049476755e-05,
      "loss": 0.7914,
      "step": 619010
    },
    {
      "epoch": 2.1695107718512445,
      "grad_norm": 3.3125,
      "learning_rate": 1.5379513020813053e-05,
      "loss": 0.7578,
      "step": 619020
    },
    {
      "epoch": 2.16954581935814,
      "grad_norm": 2.84375,
      "learning_rate": 1.537886399214935e-05,
      "loss": 0.8115,
      "step": 619030
    },
    {
      "epoch": 2.1695808668650356,
      "grad_norm": 2.890625,
      "learning_rate": 1.537821496348565e-05,
      "loss": 0.872,
      "step": 619040
    },
    {
      "epoch": 2.1696159143719314,
      "grad_norm": 3.546875,
      "learning_rate": 1.5377565934821947e-05,
      "loss": 0.8526,
      "step": 619050
    },
    {
      "epoch": 2.1696509618788267,
      "grad_norm": 2.734375,
      "learning_rate": 1.5376916906158245e-05,
      "loss": 0.8413,
      "step": 619060
    },
    {
      "epoch": 2.1696860093857224,
      "grad_norm": 2.9375,
      "learning_rate": 1.5376267877494543e-05,
      "loss": 0.8359,
      "step": 619070
    },
    {
      "epoch": 2.1697210568926177,
      "grad_norm": 2.78125,
      "learning_rate": 1.537561884883084e-05,
      "loss": 0.779,
      "step": 619080
    },
    {
      "epoch": 2.1697561043995135,
      "grad_norm": 3.15625,
      "learning_rate": 1.537496982016714e-05,
      "loss": 0.8468,
      "step": 619090
    },
    {
      "epoch": 2.1697911519064093,
      "grad_norm": 2.828125,
      "learning_rate": 1.5374320791503437e-05,
      "loss": 0.7898,
      "step": 619100
    },
    {
      "epoch": 2.1698261994133046,
      "grad_norm": 2.40625,
      "learning_rate": 1.5373671762839735e-05,
      "loss": 0.7585,
      "step": 619110
    },
    {
      "epoch": 2.1698612469202003,
      "grad_norm": 3.65625,
      "learning_rate": 1.5373022734176033e-05,
      "loss": 0.8408,
      "step": 619120
    },
    {
      "epoch": 2.169896294427096,
      "grad_norm": 3.046875,
      "learning_rate": 1.537237370551233e-05,
      "loss": 0.887,
      "step": 619130
    },
    {
      "epoch": 2.1699313419339914,
      "grad_norm": 3.21875,
      "learning_rate": 1.537172467684863e-05,
      "loss": 0.7723,
      "step": 619140
    },
    {
      "epoch": 2.169966389440887,
      "grad_norm": 2.515625,
      "learning_rate": 1.5371075648184927e-05,
      "loss": 0.7864,
      "step": 619150
    },
    {
      "epoch": 2.170001436947783,
      "grad_norm": 2.4375,
      "learning_rate": 1.5370426619521225e-05,
      "loss": 0.7443,
      "step": 619160
    },
    {
      "epoch": 2.1700364844546782,
      "grad_norm": 2.421875,
      "learning_rate": 1.5369777590857523e-05,
      "loss": 0.796,
      "step": 619170
    },
    {
      "epoch": 2.170071531961574,
      "grad_norm": 2.578125,
      "learning_rate": 1.536912856219382e-05,
      "loss": 0.7799,
      "step": 619180
    },
    {
      "epoch": 2.1701065794684693,
      "grad_norm": 2.765625,
      "learning_rate": 1.536847953353012e-05,
      "loss": 0.7942,
      "step": 619190
    },
    {
      "epoch": 2.170141626975365,
      "grad_norm": 2.984375,
      "learning_rate": 1.5367830504866417e-05,
      "loss": 0.8667,
      "step": 619200
    },
    {
      "epoch": 2.170176674482261,
      "grad_norm": 2.75,
      "learning_rate": 1.5367181476202715e-05,
      "loss": 0.835,
      "step": 619210
    },
    {
      "epoch": 2.170211721989156,
      "grad_norm": 2.84375,
      "learning_rate": 1.5366532447539016e-05,
      "loss": 0.7684,
      "step": 619220
    },
    {
      "epoch": 2.170246769496052,
      "grad_norm": 3.265625,
      "learning_rate": 1.5365883418875314e-05,
      "loss": 0.859,
      "step": 619230
    },
    {
      "epoch": 2.1702818170029476,
      "grad_norm": 2.640625,
      "learning_rate": 1.5365234390211612e-05,
      "loss": 0.8246,
      "step": 619240
    },
    {
      "epoch": 2.170316864509843,
      "grad_norm": 2.640625,
      "learning_rate": 1.536458536154791e-05,
      "loss": 0.8903,
      "step": 619250
    },
    {
      "epoch": 2.1703519120167387,
      "grad_norm": 2.90625,
      "learning_rate": 1.5363936332884208e-05,
      "loss": 0.7521,
      "step": 619260
    },
    {
      "epoch": 2.1703869595236345,
      "grad_norm": 3.390625,
      "learning_rate": 1.5363287304220506e-05,
      "loss": 0.8616,
      "step": 619270
    },
    {
      "epoch": 2.17042200703053,
      "grad_norm": 3.03125,
      "learning_rate": 1.5362638275556804e-05,
      "loss": 0.8575,
      "step": 619280
    },
    {
      "epoch": 2.1704570545374255,
      "grad_norm": 3.078125,
      "learning_rate": 1.53619892468931e-05,
      "loss": 0.8871,
      "step": 619290
    },
    {
      "epoch": 2.170492102044321,
      "grad_norm": 3.078125,
      "learning_rate": 1.5361340218229397e-05,
      "loss": 0.7988,
      "step": 619300
    },
    {
      "epoch": 2.1705271495512166,
      "grad_norm": 3.0,
      "learning_rate": 1.5360691189565695e-05,
      "loss": 0.8911,
      "step": 619310
    },
    {
      "epoch": 2.1705621970581124,
      "grad_norm": 2.46875,
      "learning_rate": 1.5360042160901993e-05,
      "loss": 0.7625,
      "step": 619320
    },
    {
      "epoch": 2.1705972445650077,
      "grad_norm": 3.265625,
      "learning_rate": 1.535939313223829e-05,
      "loss": 0.8163,
      "step": 619330
    },
    {
      "epoch": 2.1706322920719034,
      "grad_norm": 3.0,
      "learning_rate": 1.5358744103574592e-05,
      "loss": 0.7549,
      "step": 619340
    },
    {
      "epoch": 2.170667339578799,
      "grad_norm": 2.90625,
      "learning_rate": 1.535809507491089e-05,
      "loss": 0.7953,
      "step": 619350
    },
    {
      "epoch": 2.1707023870856945,
      "grad_norm": 3.0,
      "learning_rate": 1.5357446046247188e-05,
      "loss": 0.8188,
      "step": 619360
    },
    {
      "epoch": 2.1707374345925903,
      "grad_norm": 3.21875,
      "learning_rate": 1.5356797017583486e-05,
      "loss": 0.8603,
      "step": 619370
    },
    {
      "epoch": 2.170772482099486,
      "grad_norm": 2.96875,
      "learning_rate": 1.5356147988919784e-05,
      "loss": 0.7343,
      "step": 619380
    },
    {
      "epoch": 2.1708075296063813,
      "grad_norm": 2.828125,
      "learning_rate": 1.5355498960256082e-05,
      "loss": 0.9031,
      "step": 619390
    },
    {
      "epoch": 2.170842577113277,
      "grad_norm": 3.375,
      "learning_rate": 1.535484993159238e-05,
      "loss": 0.8255,
      "step": 619400
    },
    {
      "epoch": 2.1708776246201724,
      "grad_norm": 2.5,
      "learning_rate": 1.5354200902928678e-05,
      "loss": 0.7993,
      "step": 619410
    },
    {
      "epoch": 2.170912672127068,
      "grad_norm": 3.0,
      "learning_rate": 1.5353551874264976e-05,
      "loss": 0.8017,
      "step": 619420
    },
    {
      "epoch": 2.170947719633964,
      "grad_norm": 2.609375,
      "learning_rate": 1.5352902845601274e-05,
      "loss": 0.8461,
      "step": 619430
    },
    {
      "epoch": 2.1709827671408592,
      "grad_norm": 2.828125,
      "learning_rate": 1.5352253816937572e-05,
      "loss": 0.8019,
      "step": 619440
    },
    {
      "epoch": 2.171017814647755,
      "grad_norm": 2.6875,
      "learning_rate": 1.535160478827387e-05,
      "loss": 0.7944,
      "step": 619450
    },
    {
      "epoch": 2.1710528621546508,
      "grad_norm": 2.578125,
      "learning_rate": 1.5350955759610168e-05,
      "loss": 0.7813,
      "step": 619460
    },
    {
      "epoch": 2.171087909661546,
      "grad_norm": 2.671875,
      "learning_rate": 1.5350306730946466e-05,
      "loss": 0.7325,
      "step": 619470
    },
    {
      "epoch": 2.171122957168442,
      "grad_norm": 2.921875,
      "learning_rate": 1.5349657702282764e-05,
      "loss": 0.8006,
      "step": 619480
    },
    {
      "epoch": 2.1711580046753376,
      "grad_norm": 2.625,
      "learning_rate": 1.5349008673619062e-05,
      "loss": 0.8143,
      "step": 619490
    },
    {
      "epoch": 2.171193052182233,
      "grad_norm": 3.28125,
      "learning_rate": 1.534835964495536e-05,
      "loss": 0.8144,
      "step": 619500
    },
    {
      "epoch": 2.1712280996891287,
      "grad_norm": 2.734375,
      "learning_rate": 1.5347710616291658e-05,
      "loss": 0.7866,
      "step": 619510
    },
    {
      "epoch": 2.171263147196024,
      "grad_norm": 2.8125,
      "learning_rate": 1.5347061587627956e-05,
      "loss": 0.7722,
      "step": 619520
    },
    {
      "epoch": 2.1712981947029197,
      "grad_norm": 2.671875,
      "learning_rate": 1.5346412558964254e-05,
      "loss": 0.7775,
      "step": 619530
    },
    {
      "epoch": 2.1713332422098155,
      "grad_norm": 3.09375,
      "learning_rate": 1.5345763530300552e-05,
      "loss": 0.7689,
      "step": 619540
    },
    {
      "epoch": 2.171368289716711,
      "grad_norm": 3.40625,
      "learning_rate": 1.534511450163685e-05,
      "loss": 0.8029,
      "step": 619550
    },
    {
      "epoch": 2.1714033372236066,
      "grad_norm": 3.3125,
      "learning_rate": 1.5344465472973148e-05,
      "loss": 0.8114,
      "step": 619560
    },
    {
      "epoch": 2.1714383847305023,
      "grad_norm": 2.84375,
      "learning_rate": 1.5343816444309446e-05,
      "loss": 0.8519,
      "step": 619570
    },
    {
      "epoch": 2.1714734322373976,
      "grad_norm": 3.796875,
      "learning_rate": 1.5343167415645744e-05,
      "loss": 0.8684,
      "step": 619580
    },
    {
      "epoch": 2.1715084797442934,
      "grad_norm": 2.796875,
      "learning_rate": 1.5342518386982045e-05,
      "loss": 0.8612,
      "step": 619590
    },
    {
      "epoch": 2.171543527251189,
      "grad_norm": 2.984375,
      "learning_rate": 1.5341869358318343e-05,
      "loss": 0.8212,
      "step": 619600
    },
    {
      "epoch": 2.1715785747580845,
      "grad_norm": 2.875,
      "learning_rate": 1.534122032965464e-05,
      "loss": 0.8354,
      "step": 619610
    },
    {
      "epoch": 2.1716136222649802,
      "grad_norm": 2.5625,
      "learning_rate": 1.534057130099094e-05,
      "loss": 0.7889,
      "step": 619620
    },
    {
      "epoch": 2.171648669771876,
      "grad_norm": 2.390625,
      "learning_rate": 1.5339922272327237e-05,
      "loss": 0.7867,
      "step": 619630
    },
    {
      "epoch": 2.1716837172787713,
      "grad_norm": 2.859375,
      "learning_rate": 1.5339273243663535e-05,
      "loss": 0.8512,
      "step": 619640
    },
    {
      "epoch": 2.171718764785667,
      "grad_norm": 2.578125,
      "learning_rate": 1.5338624214999833e-05,
      "loss": 0.8531,
      "step": 619650
    },
    {
      "epoch": 2.1717538122925624,
      "grad_norm": 2.671875,
      "learning_rate": 1.5337975186336128e-05,
      "loss": 0.7987,
      "step": 619660
    },
    {
      "epoch": 2.171788859799458,
      "grad_norm": 2.78125,
      "learning_rate": 1.5337326157672426e-05,
      "loss": 0.8901,
      "step": 619670
    },
    {
      "epoch": 2.171823907306354,
      "grad_norm": 2.8125,
      "learning_rate": 1.5336677129008724e-05,
      "loss": 0.7789,
      "step": 619680
    },
    {
      "epoch": 2.171858954813249,
      "grad_norm": 3.171875,
      "learning_rate": 1.5336028100345022e-05,
      "loss": 0.8398,
      "step": 619690
    },
    {
      "epoch": 2.171894002320145,
      "grad_norm": 3.0,
      "learning_rate": 1.5335379071681323e-05,
      "loss": 0.7574,
      "step": 619700
    },
    {
      "epoch": 2.1719290498270407,
      "grad_norm": 3.015625,
      "learning_rate": 1.533473004301762e-05,
      "loss": 0.8406,
      "step": 619710
    },
    {
      "epoch": 2.171964097333936,
      "grad_norm": 2.671875,
      "learning_rate": 1.533408101435392e-05,
      "loss": 0.9584,
      "step": 619720
    },
    {
      "epoch": 2.171999144840832,
      "grad_norm": 3.078125,
      "learning_rate": 1.5333431985690217e-05,
      "loss": 0.8025,
      "step": 619730
    },
    {
      "epoch": 2.1720341923477275,
      "grad_norm": 3.078125,
      "learning_rate": 1.5332782957026515e-05,
      "loss": 0.7932,
      "step": 619740
    },
    {
      "epoch": 2.172069239854623,
      "grad_norm": 2.59375,
      "learning_rate": 1.5332133928362813e-05,
      "loss": 0.8405,
      "step": 619750
    },
    {
      "epoch": 2.1721042873615186,
      "grad_norm": 3.171875,
      "learning_rate": 1.533148489969911e-05,
      "loss": 0.8232,
      "step": 619760
    },
    {
      "epoch": 2.172139334868414,
      "grad_norm": 2.671875,
      "learning_rate": 1.533083587103541e-05,
      "loss": 0.8038,
      "step": 619770
    },
    {
      "epoch": 2.1721743823753097,
      "grad_norm": 2.671875,
      "learning_rate": 1.5330186842371707e-05,
      "loss": 0.8775,
      "step": 619780
    },
    {
      "epoch": 2.1722094298822054,
      "grad_norm": 2.609375,
      "learning_rate": 1.5329537813708005e-05,
      "loss": 0.7628,
      "step": 619790
    },
    {
      "epoch": 2.1722444773891008,
      "grad_norm": 3.609375,
      "learning_rate": 1.5328888785044303e-05,
      "loss": 0.8478,
      "step": 619800
    },
    {
      "epoch": 2.1722795248959965,
      "grad_norm": 2.765625,
      "learning_rate": 1.53282397563806e-05,
      "loss": 0.869,
      "step": 619810
    },
    {
      "epoch": 2.1723145724028923,
      "grad_norm": 3.1875,
      "learning_rate": 1.53275907277169e-05,
      "loss": 0.8033,
      "step": 619820
    },
    {
      "epoch": 2.1723496199097876,
      "grad_norm": 3.3125,
      "learning_rate": 1.5326941699053197e-05,
      "loss": 0.8043,
      "step": 619830
    },
    {
      "epoch": 2.1723846674166833,
      "grad_norm": 2.796875,
      "learning_rate": 1.53262926703895e-05,
      "loss": 0.7891,
      "step": 619840
    },
    {
      "epoch": 2.172419714923579,
      "grad_norm": 3.203125,
      "learning_rate": 1.5325643641725793e-05,
      "loss": 0.846,
      "step": 619850
    },
    {
      "epoch": 2.1724547624304744,
      "grad_norm": 2.859375,
      "learning_rate": 1.532499461306209e-05,
      "loss": 0.7859,
      "step": 619860
    },
    {
      "epoch": 2.17248980993737,
      "grad_norm": 3.171875,
      "learning_rate": 1.532434558439839e-05,
      "loss": 0.8087,
      "step": 619870
    },
    {
      "epoch": 2.172524857444266,
      "grad_norm": 3.015625,
      "learning_rate": 1.5323696555734687e-05,
      "loss": 0.8751,
      "step": 619880
    },
    {
      "epoch": 2.1725599049511612,
      "grad_norm": 3.03125,
      "learning_rate": 1.5323047527070985e-05,
      "loss": 0.8364,
      "step": 619890
    },
    {
      "epoch": 2.172594952458057,
      "grad_norm": 2.921875,
      "learning_rate": 1.5322398498407283e-05,
      "loss": 0.846,
      "step": 619900
    },
    {
      "epoch": 2.1726299999649523,
      "grad_norm": 2.734375,
      "learning_rate": 1.532174946974358e-05,
      "loss": 0.7982,
      "step": 619910
    },
    {
      "epoch": 2.172665047471848,
      "grad_norm": 2.765625,
      "learning_rate": 1.532110044107988e-05,
      "loss": 0.809,
      "step": 619920
    },
    {
      "epoch": 2.172700094978744,
      "grad_norm": 2.90625,
      "learning_rate": 1.5320451412416177e-05,
      "loss": 0.8144,
      "step": 619930
    },
    {
      "epoch": 2.172735142485639,
      "grad_norm": 3.328125,
      "learning_rate": 1.5319802383752475e-05,
      "loss": 0.8547,
      "step": 619940
    },
    {
      "epoch": 2.172770189992535,
      "grad_norm": 2.546875,
      "learning_rate": 1.5319153355088773e-05,
      "loss": 0.8231,
      "step": 619950
    },
    {
      "epoch": 2.1728052374994307,
      "grad_norm": 2.953125,
      "learning_rate": 1.5318504326425075e-05,
      "loss": 0.8572,
      "step": 619960
    },
    {
      "epoch": 2.172840285006326,
      "grad_norm": 2.25,
      "learning_rate": 1.5317855297761373e-05,
      "loss": 0.7467,
      "step": 619970
    },
    {
      "epoch": 2.1728753325132217,
      "grad_norm": 3.265625,
      "learning_rate": 1.531720626909767e-05,
      "loss": 0.8739,
      "step": 619980
    },
    {
      "epoch": 2.1729103800201175,
      "grad_norm": 2.96875,
      "learning_rate": 1.531655724043397e-05,
      "loss": 0.7456,
      "step": 619990
    },
    {
      "epoch": 2.172945427527013,
      "grad_norm": 2.65625,
      "learning_rate": 1.5315908211770267e-05,
      "loss": 0.8126,
      "step": 620000
    },
    {
      "epoch": 2.172945427527013,
      "eval_loss": 0.7636868357658386,
      "eval_runtime": 563.7773,
      "eval_samples_per_second": 674.798,
      "eval_steps_per_second": 56.233,
      "step": 620000
    },
    {
      "epoch": 2.1729804750339086,
      "grad_norm": 2.515625,
      "learning_rate": 1.5315259183106565e-05,
      "loss": 0.7403,
      "step": 620010
    },
    {
      "epoch": 2.173015522540804,
      "grad_norm": 3.234375,
      "learning_rate": 1.5314610154442863e-05,
      "loss": 0.9035,
      "step": 620020
    },
    {
      "epoch": 2.1730505700476996,
      "grad_norm": 2.828125,
      "learning_rate": 1.531396112577916e-05,
      "loss": 0.8375,
      "step": 620030
    },
    {
      "epoch": 2.1730856175545954,
      "grad_norm": 2.875,
      "learning_rate": 1.5313312097115455e-05,
      "loss": 0.7227,
      "step": 620040
    },
    {
      "epoch": 2.1731206650614907,
      "grad_norm": 2.96875,
      "learning_rate": 1.5312663068451753e-05,
      "loss": 0.8244,
      "step": 620050
    },
    {
      "epoch": 2.1731557125683865,
      "grad_norm": 2.78125,
      "learning_rate": 1.531201403978805e-05,
      "loss": 0.7652,
      "step": 620060
    },
    {
      "epoch": 2.1731907600752822,
      "grad_norm": 2.90625,
      "learning_rate": 1.5311365011124353e-05,
      "loss": 0.8381,
      "step": 620070
    },
    {
      "epoch": 2.1732258075821775,
      "grad_norm": 3.03125,
      "learning_rate": 1.531071598246065e-05,
      "loss": 0.8637,
      "step": 620080
    },
    {
      "epoch": 2.1732608550890733,
      "grad_norm": 3.015625,
      "learning_rate": 1.531006695379695e-05,
      "loss": 0.7329,
      "step": 620090
    },
    {
      "epoch": 2.173295902595969,
      "grad_norm": 3.078125,
      "learning_rate": 1.5309417925133247e-05,
      "loss": 0.7652,
      "step": 620100
    },
    {
      "epoch": 2.1733309501028644,
      "grad_norm": 2.6875,
      "learning_rate": 1.5308768896469545e-05,
      "loss": 0.8345,
      "step": 620110
    },
    {
      "epoch": 2.17336599760976,
      "grad_norm": 2.578125,
      "learning_rate": 1.5308119867805843e-05,
      "loss": 0.794,
      "step": 620120
    },
    {
      "epoch": 2.1734010451166554,
      "grad_norm": 2.65625,
      "learning_rate": 1.530747083914214e-05,
      "loss": 0.8239,
      "step": 620130
    },
    {
      "epoch": 2.173436092623551,
      "grad_norm": 2.796875,
      "learning_rate": 1.530682181047844e-05,
      "loss": 0.7906,
      "step": 620140
    },
    {
      "epoch": 2.173471140130447,
      "grad_norm": 2.75,
      "learning_rate": 1.5306172781814737e-05,
      "loss": 0.8205,
      "step": 620150
    },
    {
      "epoch": 2.1735061876373423,
      "grad_norm": 2.78125,
      "learning_rate": 1.5305523753151035e-05,
      "loss": 0.8326,
      "step": 620160
    },
    {
      "epoch": 2.173541235144238,
      "grad_norm": 3.25,
      "learning_rate": 1.5304874724487333e-05,
      "loss": 0.8816,
      "step": 620170
    },
    {
      "epoch": 2.173576282651134,
      "grad_norm": 3.234375,
      "learning_rate": 1.530422569582363e-05,
      "loss": 0.8957,
      "step": 620180
    },
    {
      "epoch": 2.173611330158029,
      "grad_norm": 2.765625,
      "learning_rate": 1.530357666715993e-05,
      "loss": 0.8287,
      "step": 620190
    },
    {
      "epoch": 2.173646377664925,
      "grad_norm": 3.109375,
      "learning_rate": 1.5302927638496227e-05,
      "loss": 0.903,
      "step": 620200
    },
    {
      "epoch": 2.1736814251718206,
      "grad_norm": 2.640625,
      "learning_rate": 1.5302278609832528e-05,
      "loss": 0.7974,
      "step": 620210
    },
    {
      "epoch": 2.173716472678716,
      "grad_norm": 2.6875,
      "learning_rate": 1.5301629581168826e-05,
      "loss": 0.8485,
      "step": 620220
    },
    {
      "epoch": 2.1737515201856117,
      "grad_norm": 2.609375,
      "learning_rate": 1.530098055250512e-05,
      "loss": 0.8167,
      "step": 620230
    },
    {
      "epoch": 2.173786567692507,
      "grad_norm": 3.453125,
      "learning_rate": 1.530033152384142e-05,
      "loss": 0.8029,
      "step": 620240
    },
    {
      "epoch": 2.1738216151994028,
      "grad_norm": 2.8125,
      "learning_rate": 1.5299682495177717e-05,
      "loss": 0.7454,
      "step": 620250
    },
    {
      "epoch": 2.1738566627062985,
      "grad_norm": 3.1875,
      "learning_rate": 1.5299033466514015e-05,
      "loss": 0.8109,
      "step": 620260
    },
    {
      "epoch": 2.173891710213194,
      "grad_norm": 2.921875,
      "learning_rate": 1.5298384437850313e-05,
      "loss": 0.8597,
      "step": 620270
    },
    {
      "epoch": 2.1739267577200896,
      "grad_norm": 2.921875,
      "learning_rate": 1.529773540918661e-05,
      "loss": 0.7245,
      "step": 620280
    },
    {
      "epoch": 2.1739618052269853,
      "grad_norm": 2.84375,
      "learning_rate": 1.529708638052291e-05,
      "loss": 0.8388,
      "step": 620290
    },
    {
      "epoch": 2.1739968527338807,
      "grad_norm": 2.6875,
      "learning_rate": 1.5296437351859207e-05,
      "loss": 0.7685,
      "step": 620300
    },
    {
      "epoch": 2.1740319002407764,
      "grad_norm": 2.65625,
      "learning_rate": 1.5295788323195505e-05,
      "loss": 0.7468,
      "step": 620310
    },
    {
      "epoch": 2.174066947747672,
      "grad_norm": 2.8125,
      "learning_rate": 1.5295139294531806e-05,
      "loss": 0.8149,
      "step": 620320
    },
    {
      "epoch": 2.1741019952545675,
      "grad_norm": 3.328125,
      "learning_rate": 1.5294490265868104e-05,
      "loss": 0.8545,
      "step": 620330
    },
    {
      "epoch": 2.1741370427614632,
      "grad_norm": 3.0,
      "learning_rate": 1.5293841237204402e-05,
      "loss": 0.8264,
      "step": 620340
    },
    {
      "epoch": 2.1741720902683586,
      "grad_norm": 3.078125,
      "learning_rate": 1.52931922085407e-05,
      "loss": 0.8681,
      "step": 620350
    },
    {
      "epoch": 2.1742071377752543,
      "grad_norm": 2.953125,
      "learning_rate": 1.5292543179876998e-05,
      "loss": 0.9217,
      "step": 620360
    },
    {
      "epoch": 2.17424218528215,
      "grad_norm": 2.984375,
      "learning_rate": 1.5291894151213296e-05,
      "loss": 0.8277,
      "step": 620370
    },
    {
      "epoch": 2.1742772327890454,
      "grad_norm": 3.09375,
      "learning_rate": 1.5291245122549594e-05,
      "loss": 0.8871,
      "step": 620380
    },
    {
      "epoch": 2.174312280295941,
      "grad_norm": 2.6875,
      "learning_rate": 1.5290596093885892e-05,
      "loss": 0.8116,
      "step": 620390
    },
    {
      "epoch": 2.174347327802837,
      "grad_norm": 2.5,
      "learning_rate": 1.528994706522219e-05,
      "loss": 0.853,
      "step": 620400
    },
    {
      "epoch": 2.174382375309732,
      "grad_norm": 3.078125,
      "learning_rate": 1.5289298036558485e-05,
      "loss": 0.8822,
      "step": 620410
    },
    {
      "epoch": 2.174417422816628,
      "grad_norm": 2.546875,
      "learning_rate": 1.5288649007894783e-05,
      "loss": 0.7847,
      "step": 620420
    },
    {
      "epoch": 2.1744524703235237,
      "grad_norm": 2.578125,
      "learning_rate": 1.528799997923108e-05,
      "loss": 0.8748,
      "step": 620430
    },
    {
      "epoch": 2.174487517830419,
      "grad_norm": 3.34375,
      "learning_rate": 1.5287350950567382e-05,
      "loss": 0.8462,
      "step": 620440
    },
    {
      "epoch": 2.174522565337315,
      "grad_norm": 2.65625,
      "learning_rate": 1.528670192190368e-05,
      "loss": 0.8262,
      "step": 620450
    },
    {
      "epoch": 2.17455761284421,
      "grad_norm": 3.125,
      "learning_rate": 1.5286052893239978e-05,
      "loss": 0.7174,
      "step": 620460
    },
    {
      "epoch": 2.174592660351106,
      "grad_norm": 2.859375,
      "learning_rate": 1.5285403864576276e-05,
      "loss": 0.8646,
      "step": 620470
    },
    {
      "epoch": 2.1746277078580016,
      "grad_norm": 2.890625,
      "learning_rate": 1.5284754835912574e-05,
      "loss": 0.8233,
      "step": 620480
    },
    {
      "epoch": 2.174662755364897,
      "grad_norm": 3.125,
      "learning_rate": 1.5284105807248872e-05,
      "loss": 0.8376,
      "step": 620490
    },
    {
      "epoch": 2.1746978028717927,
      "grad_norm": 2.453125,
      "learning_rate": 1.528345677858517e-05,
      "loss": 0.8613,
      "step": 620500
    },
    {
      "epoch": 2.1747328503786885,
      "grad_norm": 2.8125,
      "learning_rate": 1.5282807749921468e-05,
      "loss": 0.8003,
      "step": 620510
    },
    {
      "epoch": 2.174767897885584,
      "grad_norm": 2.75,
      "learning_rate": 1.5282158721257766e-05,
      "loss": 0.7669,
      "step": 620520
    },
    {
      "epoch": 2.1748029453924795,
      "grad_norm": 3.3125,
      "learning_rate": 1.5281509692594064e-05,
      "loss": 0.8328,
      "step": 620530
    },
    {
      "epoch": 2.1748379928993753,
      "grad_norm": 3.265625,
      "learning_rate": 1.5280860663930362e-05,
      "loss": 0.7757,
      "step": 620540
    },
    {
      "epoch": 2.1748730404062706,
      "grad_norm": 2.40625,
      "learning_rate": 1.528021163526666e-05,
      "loss": 0.8627,
      "step": 620550
    },
    {
      "epoch": 2.1749080879131664,
      "grad_norm": 3.0,
      "learning_rate": 1.5279562606602958e-05,
      "loss": 0.8001,
      "step": 620560
    },
    {
      "epoch": 2.1749431354200617,
      "grad_norm": 3.234375,
      "learning_rate": 1.5278913577939256e-05,
      "loss": 0.8433,
      "step": 620570
    },
    {
      "epoch": 2.1749781829269574,
      "grad_norm": 3.03125,
      "learning_rate": 1.5278264549275558e-05,
      "loss": 0.8673,
      "step": 620580
    },
    {
      "epoch": 2.175013230433853,
      "grad_norm": 3.296875,
      "learning_rate": 1.5277615520611856e-05,
      "loss": 0.757,
      "step": 620590
    },
    {
      "epoch": 2.1750482779407485,
      "grad_norm": 3.21875,
      "learning_rate": 1.527696649194815e-05,
      "loss": 0.8239,
      "step": 620600
    },
    {
      "epoch": 2.1750833254476443,
      "grad_norm": 2.796875,
      "learning_rate": 1.5276317463284448e-05,
      "loss": 0.8746,
      "step": 620610
    },
    {
      "epoch": 2.17511837295454,
      "grad_norm": 2.625,
      "learning_rate": 1.5275668434620746e-05,
      "loss": 0.758,
      "step": 620620
    },
    {
      "epoch": 2.1751534204614353,
      "grad_norm": 2.671875,
      "learning_rate": 1.5275019405957044e-05,
      "loss": 0.8858,
      "step": 620630
    },
    {
      "epoch": 2.175188467968331,
      "grad_norm": 3.265625,
      "learning_rate": 1.5274370377293342e-05,
      "loss": 0.9296,
      "step": 620640
    },
    {
      "epoch": 2.175223515475227,
      "grad_norm": 3.265625,
      "learning_rate": 1.527372134862964e-05,
      "loss": 0.8306,
      "step": 620650
    },
    {
      "epoch": 2.175258562982122,
      "grad_norm": 3.171875,
      "learning_rate": 1.5273072319965938e-05,
      "loss": 0.8054,
      "step": 620660
    },
    {
      "epoch": 2.175293610489018,
      "grad_norm": 2.734375,
      "learning_rate": 1.5272423291302236e-05,
      "loss": 0.7647,
      "step": 620670
    },
    {
      "epoch": 2.1753286579959132,
      "grad_norm": 2.84375,
      "learning_rate": 1.5271774262638534e-05,
      "loss": 0.8354,
      "step": 620680
    },
    {
      "epoch": 2.175363705502809,
      "grad_norm": 2.9375,
      "learning_rate": 1.5271125233974836e-05,
      "loss": 0.8371,
      "step": 620690
    },
    {
      "epoch": 2.1753987530097048,
      "grad_norm": 2.96875,
      "learning_rate": 1.5270476205311134e-05,
      "loss": 0.8803,
      "step": 620700
    },
    {
      "epoch": 2.1754338005166,
      "grad_norm": 2.796875,
      "learning_rate": 1.526982717664743e-05,
      "loss": 0.8344,
      "step": 620710
    },
    {
      "epoch": 2.175468848023496,
      "grad_norm": 2.890625,
      "learning_rate": 1.526917814798373e-05,
      "loss": 0.8557,
      "step": 620720
    },
    {
      "epoch": 2.1755038955303916,
      "grad_norm": 2.796875,
      "learning_rate": 1.5268529119320028e-05,
      "loss": 0.8722,
      "step": 620730
    },
    {
      "epoch": 2.175538943037287,
      "grad_norm": 2.953125,
      "learning_rate": 1.5267880090656326e-05,
      "loss": 0.8728,
      "step": 620740
    },
    {
      "epoch": 2.1755739905441827,
      "grad_norm": 2.71875,
      "learning_rate": 1.5267231061992624e-05,
      "loss": 0.7992,
      "step": 620750
    },
    {
      "epoch": 2.1756090380510784,
      "grad_norm": 2.78125,
      "learning_rate": 1.526658203332892e-05,
      "loss": 0.7928,
      "step": 620760
    },
    {
      "epoch": 2.1756440855579737,
      "grad_norm": 3.03125,
      "learning_rate": 1.526593300466522e-05,
      "loss": 0.8375,
      "step": 620770
    },
    {
      "epoch": 2.1756791330648695,
      "grad_norm": 2.421875,
      "learning_rate": 1.5265283976001518e-05,
      "loss": 0.7702,
      "step": 620780
    },
    {
      "epoch": 2.175714180571765,
      "grad_norm": 3.03125,
      "learning_rate": 1.5264634947337812e-05,
      "loss": 0.8401,
      "step": 620790
    },
    {
      "epoch": 2.1757492280786606,
      "grad_norm": 2.625,
      "learning_rate": 1.5263985918674114e-05,
      "loss": 0.7757,
      "step": 620800
    },
    {
      "epoch": 2.1757842755855563,
      "grad_norm": 2.796875,
      "learning_rate": 1.526333689001041e-05,
      "loss": 0.7679,
      "step": 620810
    },
    {
      "epoch": 2.1758193230924516,
      "grad_norm": 3.390625,
      "learning_rate": 1.526268786134671e-05,
      "loss": 0.8512,
      "step": 620820
    },
    {
      "epoch": 2.1758543705993474,
      "grad_norm": 3.015625,
      "learning_rate": 1.5262038832683008e-05,
      "loss": 0.8007,
      "step": 620830
    },
    {
      "epoch": 2.175889418106243,
      "grad_norm": 3.03125,
      "learning_rate": 1.5261389804019306e-05,
      "loss": 0.8091,
      "step": 620840
    },
    {
      "epoch": 2.1759244656131385,
      "grad_norm": 3.171875,
      "learning_rate": 1.5260740775355604e-05,
      "loss": 0.8268,
      "step": 620850
    },
    {
      "epoch": 2.175959513120034,
      "grad_norm": 2.859375,
      "learning_rate": 1.52600917466919e-05,
      "loss": 0.7578,
      "step": 620860
    },
    {
      "epoch": 2.17599456062693,
      "grad_norm": 2.703125,
      "learning_rate": 1.52594427180282e-05,
      "loss": 0.7644,
      "step": 620870
    },
    {
      "epoch": 2.1760296081338253,
      "grad_norm": 2.984375,
      "learning_rate": 1.5258793689364498e-05,
      "loss": 0.8128,
      "step": 620880
    },
    {
      "epoch": 2.176064655640721,
      "grad_norm": 3.015625,
      "learning_rate": 1.5258144660700796e-05,
      "loss": 0.8102,
      "step": 620890
    },
    {
      "epoch": 2.1760997031476164,
      "grad_norm": 3.296875,
      "learning_rate": 1.5257495632037094e-05,
      "loss": 0.8559,
      "step": 620900
    },
    {
      "epoch": 2.176134750654512,
      "grad_norm": 2.5625,
      "learning_rate": 1.5256846603373393e-05,
      "loss": 0.8221,
      "step": 620910
    },
    {
      "epoch": 2.176169798161408,
      "grad_norm": 2.953125,
      "learning_rate": 1.5256197574709691e-05,
      "loss": 0.7814,
      "step": 620920
    },
    {
      "epoch": 2.176204845668303,
      "grad_norm": 3.0,
      "learning_rate": 1.525554854604599e-05,
      "loss": 0.7861,
      "step": 620930
    },
    {
      "epoch": 2.176239893175199,
      "grad_norm": 2.6875,
      "learning_rate": 1.5254899517382287e-05,
      "loss": 0.7884,
      "step": 620940
    },
    {
      "epoch": 2.1762749406820947,
      "grad_norm": 2.671875,
      "learning_rate": 1.5254250488718585e-05,
      "loss": 0.7701,
      "step": 620950
    },
    {
      "epoch": 2.17630998818899,
      "grad_norm": 3.0625,
      "learning_rate": 1.5253601460054883e-05,
      "loss": 0.7281,
      "step": 620960
    },
    {
      "epoch": 2.1763450356958858,
      "grad_norm": 3.171875,
      "learning_rate": 1.5252952431391181e-05,
      "loss": 0.7461,
      "step": 620970
    },
    {
      "epoch": 2.1763800832027815,
      "grad_norm": 2.546875,
      "learning_rate": 1.5252303402727478e-05,
      "loss": 0.791,
      "step": 620980
    },
    {
      "epoch": 2.176415130709677,
      "grad_norm": 2.90625,
      "learning_rate": 1.5251654374063776e-05,
      "loss": 0.7126,
      "step": 620990
    },
    {
      "epoch": 2.1764501782165726,
      "grad_norm": 2.640625,
      "learning_rate": 1.5251005345400074e-05,
      "loss": 0.8499,
      "step": 621000
    },
    {
      "epoch": 2.1764852257234684,
      "grad_norm": 3.296875,
      "learning_rate": 1.5250356316736372e-05,
      "loss": 0.7921,
      "step": 621010
    },
    {
      "epoch": 2.1765202732303637,
      "grad_norm": 2.84375,
      "learning_rate": 1.5249707288072671e-05,
      "loss": 0.8366,
      "step": 621020
    },
    {
      "epoch": 2.1765553207372594,
      "grad_norm": 2.734375,
      "learning_rate": 1.524905825940897e-05,
      "loss": 0.8203,
      "step": 621030
    },
    {
      "epoch": 2.1765903682441547,
      "grad_norm": 2.96875,
      "learning_rate": 1.5248409230745267e-05,
      "loss": 0.7848,
      "step": 621040
    },
    {
      "epoch": 2.1766254157510505,
      "grad_norm": 3.125,
      "learning_rate": 1.5247760202081565e-05,
      "loss": 0.8936,
      "step": 621050
    },
    {
      "epoch": 2.1766604632579463,
      "grad_norm": 3.171875,
      "learning_rate": 1.5247111173417863e-05,
      "loss": 0.8678,
      "step": 621060
    },
    {
      "epoch": 2.1766955107648416,
      "grad_norm": 2.359375,
      "learning_rate": 1.5246462144754161e-05,
      "loss": 0.7914,
      "step": 621070
    },
    {
      "epoch": 2.1767305582717373,
      "grad_norm": 2.5,
      "learning_rate": 1.524581311609046e-05,
      "loss": 0.8416,
      "step": 621080
    },
    {
      "epoch": 2.176765605778633,
      "grad_norm": 2.765625,
      "learning_rate": 1.5245164087426759e-05,
      "loss": 0.8455,
      "step": 621090
    },
    {
      "epoch": 2.1768006532855284,
      "grad_norm": 2.59375,
      "learning_rate": 1.5244515058763057e-05,
      "loss": 0.8371,
      "step": 621100
    },
    {
      "epoch": 2.176835700792424,
      "grad_norm": 2.78125,
      "learning_rate": 1.5243866030099355e-05,
      "loss": 0.8182,
      "step": 621110
    },
    {
      "epoch": 2.17687074829932,
      "grad_norm": 3.234375,
      "learning_rate": 1.5243217001435653e-05,
      "loss": 0.8036,
      "step": 621120
    },
    {
      "epoch": 2.1769057958062152,
      "grad_norm": 2.734375,
      "learning_rate": 1.5242567972771951e-05,
      "loss": 0.8026,
      "step": 621130
    },
    {
      "epoch": 2.176940843313111,
      "grad_norm": 2.9375,
      "learning_rate": 1.5241918944108249e-05,
      "loss": 0.8109,
      "step": 621140
    },
    {
      "epoch": 2.1769758908200068,
      "grad_norm": 3.03125,
      "learning_rate": 1.5241269915444547e-05,
      "loss": 0.8242,
      "step": 621150
    },
    {
      "epoch": 2.177010938326902,
      "grad_norm": 3.328125,
      "learning_rate": 1.5240620886780847e-05,
      "loss": 0.8063,
      "step": 621160
    },
    {
      "epoch": 2.177045985833798,
      "grad_norm": 2.984375,
      "learning_rate": 1.5239971858117141e-05,
      "loss": 0.8476,
      "step": 621170
    },
    {
      "epoch": 2.177081033340693,
      "grad_norm": 2.484375,
      "learning_rate": 1.523932282945344e-05,
      "loss": 0.8262,
      "step": 621180
    },
    {
      "epoch": 2.177116080847589,
      "grad_norm": 2.953125,
      "learning_rate": 1.5238673800789737e-05,
      "loss": 0.7973,
      "step": 621190
    },
    {
      "epoch": 2.1771511283544847,
      "grad_norm": 2.671875,
      "learning_rate": 1.5238024772126035e-05,
      "loss": 0.8582,
      "step": 621200
    },
    {
      "epoch": 2.17718617586138,
      "grad_norm": 2.953125,
      "learning_rate": 1.5237375743462335e-05,
      "loss": 0.8956,
      "step": 621210
    },
    {
      "epoch": 2.1772212233682757,
      "grad_norm": 2.90625,
      "learning_rate": 1.5236726714798633e-05,
      "loss": 0.7474,
      "step": 621220
    },
    {
      "epoch": 2.1772562708751715,
      "grad_norm": 2.71875,
      "learning_rate": 1.5236077686134931e-05,
      "loss": 0.8748,
      "step": 621230
    },
    {
      "epoch": 2.177291318382067,
      "grad_norm": 2.671875,
      "learning_rate": 1.5235428657471229e-05,
      "loss": 0.7545,
      "step": 621240
    },
    {
      "epoch": 2.1773263658889626,
      "grad_norm": 2.875,
      "learning_rate": 1.5234779628807527e-05,
      "loss": 0.8839,
      "step": 621250
    },
    {
      "epoch": 2.1773614133958583,
      "grad_norm": 3.140625,
      "learning_rate": 1.5234130600143825e-05,
      "loss": 0.8318,
      "step": 621260
    },
    {
      "epoch": 2.1773964609027536,
      "grad_norm": 3.28125,
      "learning_rate": 1.5233481571480125e-05,
      "loss": 0.8046,
      "step": 621270
    },
    {
      "epoch": 2.1774315084096494,
      "grad_norm": 2.734375,
      "learning_rate": 1.5232832542816423e-05,
      "loss": 0.8012,
      "step": 621280
    },
    {
      "epoch": 2.1774665559165447,
      "grad_norm": 3.046875,
      "learning_rate": 1.523218351415272e-05,
      "loss": 0.8584,
      "step": 621290
    },
    {
      "epoch": 2.1775016034234405,
      "grad_norm": 2.578125,
      "learning_rate": 1.5231534485489019e-05,
      "loss": 0.8083,
      "step": 621300
    },
    {
      "epoch": 2.177536650930336,
      "grad_norm": 2.734375,
      "learning_rate": 1.5230885456825317e-05,
      "loss": 0.7894,
      "step": 621310
    },
    {
      "epoch": 2.1775716984372315,
      "grad_norm": 2.640625,
      "learning_rate": 1.5230236428161615e-05,
      "loss": 0.7188,
      "step": 621320
    },
    {
      "epoch": 2.1776067459441273,
      "grad_norm": 2.703125,
      "learning_rate": 1.5229587399497913e-05,
      "loss": 0.814,
      "step": 621330
    },
    {
      "epoch": 2.177641793451023,
      "grad_norm": 2.75,
      "learning_rate": 1.5228938370834212e-05,
      "loss": 0.8237,
      "step": 621340
    },
    {
      "epoch": 2.1776768409579184,
      "grad_norm": 3.0,
      "learning_rate": 1.522828934217051e-05,
      "loss": 0.7619,
      "step": 621350
    },
    {
      "epoch": 2.177711888464814,
      "grad_norm": 2.640625,
      "learning_rate": 1.5227640313506805e-05,
      "loss": 0.796,
      "step": 621360
    },
    {
      "epoch": 2.17774693597171,
      "grad_norm": 3.109375,
      "learning_rate": 1.5226991284843103e-05,
      "loss": 0.7592,
      "step": 621370
    },
    {
      "epoch": 2.177781983478605,
      "grad_norm": 2.796875,
      "learning_rate": 1.5226342256179401e-05,
      "loss": 0.8631,
      "step": 621380
    },
    {
      "epoch": 2.177817030985501,
      "grad_norm": 2.75,
      "learning_rate": 1.52256932275157e-05,
      "loss": 0.7747,
      "step": 621390
    },
    {
      "epoch": 2.1778520784923963,
      "grad_norm": 2.734375,
      "learning_rate": 1.5225044198851999e-05,
      "loss": 0.8806,
      "step": 621400
    },
    {
      "epoch": 2.177887125999292,
      "grad_norm": 2.921875,
      "learning_rate": 1.5224395170188297e-05,
      "loss": 0.8385,
      "step": 621410
    },
    {
      "epoch": 2.1779221735061878,
      "grad_norm": 3.265625,
      "learning_rate": 1.5223746141524595e-05,
      "loss": 0.8456,
      "step": 621420
    },
    {
      "epoch": 2.177957221013083,
      "grad_norm": 2.765625,
      "learning_rate": 1.5223097112860893e-05,
      "loss": 0.7743,
      "step": 621430
    },
    {
      "epoch": 2.177992268519979,
      "grad_norm": 2.90625,
      "learning_rate": 1.522244808419719e-05,
      "loss": 0.8,
      "step": 621440
    },
    {
      "epoch": 2.1780273160268746,
      "grad_norm": 2.96875,
      "learning_rate": 1.5221799055533489e-05,
      "loss": 0.823,
      "step": 621450
    },
    {
      "epoch": 2.17806236353377,
      "grad_norm": 3.171875,
      "learning_rate": 1.5221150026869788e-05,
      "loss": 0.8743,
      "step": 621460
    },
    {
      "epoch": 2.1780974110406657,
      "grad_norm": 2.984375,
      "learning_rate": 1.5220500998206086e-05,
      "loss": 0.8304,
      "step": 621470
    },
    {
      "epoch": 2.1781324585475614,
      "grad_norm": 2.578125,
      "learning_rate": 1.5219851969542384e-05,
      "loss": 0.8581,
      "step": 621480
    },
    {
      "epoch": 2.1781675060544567,
      "grad_norm": 3.265625,
      "learning_rate": 1.5219202940878682e-05,
      "loss": 0.8025,
      "step": 621490
    },
    {
      "epoch": 2.1782025535613525,
      "grad_norm": 2.75,
      "learning_rate": 1.521855391221498e-05,
      "loss": 0.8611,
      "step": 621500
    },
    {
      "epoch": 2.178237601068248,
      "grad_norm": 3.234375,
      "learning_rate": 1.5217904883551278e-05,
      "loss": 0.7865,
      "step": 621510
    },
    {
      "epoch": 2.1782726485751436,
      "grad_norm": 3.203125,
      "learning_rate": 1.5217255854887576e-05,
      "loss": 0.738,
      "step": 621520
    },
    {
      "epoch": 2.1783076960820393,
      "grad_norm": 2.640625,
      "learning_rate": 1.5216606826223876e-05,
      "loss": 0.7642,
      "step": 621530
    },
    {
      "epoch": 2.1783427435889346,
      "grad_norm": 2.96875,
      "learning_rate": 1.521595779756017e-05,
      "loss": 0.7264,
      "step": 621540
    },
    {
      "epoch": 2.1783777910958304,
      "grad_norm": 2.53125,
      "learning_rate": 1.5215308768896469e-05,
      "loss": 0.769,
      "step": 621550
    },
    {
      "epoch": 2.178412838602726,
      "grad_norm": 2.9375,
      "learning_rate": 1.5214659740232767e-05,
      "loss": 0.768,
      "step": 621560
    },
    {
      "epoch": 2.1784478861096215,
      "grad_norm": 2.78125,
      "learning_rate": 1.5214010711569066e-05,
      "loss": 0.811,
      "step": 621570
    },
    {
      "epoch": 2.1784829336165172,
      "grad_norm": 2.5,
      "learning_rate": 1.5213361682905364e-05,
      "loss": 0.8243,
      "step": 621580
    },
    {
      "epoch": 2.178517981123413,
      "grad_norm": 2.34375,
      "learning_rate": 1.5212712654241662e-05,
      "loss": 0.7995,
      "step": 621590
    },
    {
      "epoch": 2.1785530286303083,
      "grad_norm": 2.921875,
      "learning_rate": 1.521206362557796e-05,
      "loss": 0.8315,
      "step": 621600
    },
    {
      "epoch": 2.178588076137204,
      "grad_norm": 3.484375,
      "learning_rate": 1.5211414596914258e-05,
      "loss": 0.834,
      "step": 621610
    },
    {
      "epoch": 2.1786231236440994,
      "grad_norm": 2.5,
      "learning_rate": 1.5210765568250556e-05,
      "loss": 0.8133,
      "step": 621620
    },
    {
      "epoch": 2.178658171150995,
      "grad_norm": 2.828125,
      "learning_rate": 1.5210116539586854e-05,
      "loss": 0.7774,
      "step": 621630
    },
    {
      "epoch": 2.178693218657891,
      "grad_norm": 2.953125,
      "learning_rate": 1.5209467510923154e-05,
      "loss": 0.8066,
      "step": 621640
    },
    {
      "epoch": 2.178728266164786,
      "grad_norm": 2.984375,
      "learning_rate": 1.5208818482259452e-05,
      "loss": 0.8133,
      "step": 621650
    },
    {
      "epoch": 2.178763313671682,
      "grad_norm": 2.875,
      "learning_rate": 1.520816945359575e-05,
      "loss": 0.7708,
      "step": 621660
    },
    {
      "epoch": 2.1787983611785777,
      "grad_norm": 3.0625,
      "learning_rate": 1.5207520424932048e-05,
      "loss": 0.824,
      "step": 621670
    },
    {
      "epoch": 2.178833408685473,
      "grad_norm": 3.265625,
      "learning_rate": 1.5206871396268346e-05,
      "loss": 0.811,
      "step": 621680
    },
    {
      "epoch": 2.178868456192369,
      "grad_norm": 2.765625,
      "learning_rate": 1.5206222367604644e-05,
      "loss": 0.8749,
      "step": 621690
    },
    {
      "epoch": 2.1789035036992646,
      "grad_norm": 3.03125,
      "learning_rate": 1.5205573338940942e-05,
      "loss": 0.9066,
      "step": 621700
    },
    {
      "epoch": 2.17893855120616,
      "grad_norm": 3.1875,
      "learning_rate": 1.5204924310277242e-05,
      "loss": 0.8117,
      "step": 621710
    },
    {
      "epoch": 2.1789735987130556,
      "grad_norm": 3.125,
      "learning_rate": 1.520427528161354e-05,
      "loss": 0.8235,
      "step": 621720
    },
    {
      "epoch": 2.179008646219951,
      "grad_norm": 2.78125,
      "learning_rate": 1.5203626252949834e-05,
      "loss": 0.8085,
      "step": 621730
    },
    {
      "epoch": 2.1790436937268467,
      "grad_norm": 3.125,
      "learning_rate": 1.5202977224286132e-05,
      "loss": 0.7897,
      "step": 621740
    },
    {
      "epoch": 2.1790787412337425,
      "grad_norm": 3.28125,
      "learning_rate": 1.520232819562243e-05,
      "loss": 0.8178,
      "step": 621750
    },
    {
      "epoch": 2.1791137887406378,
      "grad_norm": 2.65625,
      "learning_rate": 1.520167916695873e-05,
      "loss": 0.8267,
      "step": 621760
    },
    {
      "epoch": 2.1791488362475335,
      "grad_norm": 2.46875,
      "learning_rate": 1.5201030138295028e-05,
      "loss": 0.7647,
      "step": 621770
    },
    {
      "epoch": 2.1791838837544293,
      "grad_norm": 3.125,
      "learning_rate": 1.5200381109631326e-05,
      "loss": 0.8178,
      "step": 621780
    },
    {
      "epoch": 2.1792189312613246,
      "grad_norm": 2.65625,
      "learning_rate": 1.5199732080967624e-05,
      "loss": 0.8144,
      "step": 621790
    },
    {
      "epoch": 2.1792539787682204,
      "grad_norm": 3.3125,
      "learning_rate": 1.5199083052303922e-05,
      "loss": 0.792,
      "step": 621800
    },
    {
      "epoch": 2.179289026275116,
      "grad_norm": 2.875,
      "learning_rate": 1.519843402364022e-05,
      "loss": 0.7665,
      "step": 621810
    },
    {
      "epoch": 2.1793240737820114,
      "grad_norm": 2.421875,
      "learning_rate": 1.519778499497652e-05,
      "loss": 0.7363,
      "step": 621820
    },
    {
      "epoch": 2.179359121288907,
      "grad_norm": 2.828125,
      "learning_rate": 1.5197135966312818e-05,
      "loss": 0.6721,
      "step": 621830
    },
    {
      "epoch": 2.1793941687958025,
      "grad_norm": 2.578125,
      "learning_rate": 1.5196486937649116e-05,
      "loss": 0.7711,
      "step": 621840
    },
    {
      "epoch": 2.1794292163026983,
      "grad_norm": 2.921875,
      "learning_rate": 1.5195837908985414e-05,
      "loss": 0.8073,
      "step": 621850
    },
    {
      "epoch": 2.179464263809594,
      "grad_norm": 2.75,
      "learning_rate": 1.5195188880321712e-05,
      "loss": 0.7882,
      "step": 621860
    },
    {
      "epoch": 2.1794993113164893,
      "grad_norm": 2.71875,
      "learning_rate": 1.519453985165801e-05,
      "loss": 0.7948,
      "step": 621870
    },
    {
      "epoch": 2.179534358823385,
      "grad_norm": 2.84375,
      "learning_rate": 1.5193890822994308e-05,
      "loss": 0.8599,
      "step": 621880
    },
    {
      "epoch": 2.179569406330281,
      "grad_norm": 3.28125,
      "learning_rate": 1.5193241794330607e-05,
      "loss": 0.8617,
      "step": 621890
    },
    {
      "epoch": 2.179604453837176,
      "grad_norm": 2.875,
      "learning_rate": 1.5192592765666905e-05,
      "loss": 0.8603,
      "step": 621900
    },
    {
      "epoch": 2.179639501344072,
      "grad_norm": 2.71875,
      "learning_rate": 1.5191943737003203e-05,
      "loss": 0.8091,
      "step": 621910
    },
    {
      "epoch": 2.1796745488509677,
      "grad_norm": 2.96875,
      "learning_rate": 1.5191294708339498e-05,
      "loss": 0.7966,
      "step": 621920
    },
    {
      "epoch": 2.179709596357863,
      "grad_norm": 3.0625,
      "learning_rate": 1.5190645679675796e-05,
      "loss": 0.8354,
      "step": 621930
    },
    {
      "epoch": 2.1797446438647587,
      "grad_norm": 2.796875,
      "learning_rate": 1.5189996651012096e-05,
      "loss": 0.7445,
      "step": 621940
    },
    {
      "epoch": 2.179779691371654,
      "grad_norm": 3.015625,
      "learning_rate": 1.5189347622348394e-05,
      "loss": 0.8445,
      "step": 621950
    },
    {
      "epoch": 2.17981473887855,
      "grad_norm": 3.421875,
      "learning_rate": 1.5188698593684692e-05,
      "loss": 0.8194,
      "step": 621960
    },
    {
      "epoch": 2.1798497863854456,
      "grad_norm": 2.859375,
      "learning_rate": 1.518804956502099e-05,
      "loss": 0.8476,
      "step": 621970
    },
    {
      "epoch": 2.179884833892341,
      "grad_norm": 3.125,
      "learning_rate": 1.5187400536357288e-05,
      "loss": 0.7741,
      "step": 621980
    },
    {
      "epoch": 2.1799198813992366,
      "grad_norm": 2.703125,
      "learning_rate": 1.5186751507693586e-05,
      "loss": 0.793,
      "step": 621990
    },
    {
      "epoch": 2.1799549289061324,
      "grad_norm": 2.71875,
      "learning_rate": 1.5186102479029884e-05,
      "loss": 0.8695,
      "step": 622000
    },
    {
      "epoch": 2.1799899764130277,
      "grad_norm": 3.0,
      "learning_rate": 1.5185453450366183e-05,
      "loss": 0.7003,
      "step": 622010
    },
    {
      "epoch": 2.1800250239199235,
      "grad_norm": 2.75,
      "learning_rate": 1.5184804421702481e-05,
      "loss": 0.8194,
      "step": 622020
    },
    {
      "epoch": 2.1800600714268192,
      "grad_norm": 3.109375,
      "learning_rate": 1.518415539303878e-05,
      "loss": 0.8213,
      "step": 622030
    },
    {
      "epoch": 2.1800951189337145,
      "grad_norm": 2.765625,
      "learning_rate": 1.5183506364375077e-05,
      "loss": 0.7745,
      "step": 622040
    },
    {
      "epoch": 2.1801301664406103,
      "grad_norm": 2.703125,
      "learning_rate": 1.5182857335711375e-05,
      "loss": 0.9236,
      "step": 622050
    },
    {
      "epoch": 2.1801652139475056,
      "grad_norm": 3.046875,
      "learning_rate": 1.5182208307047673e-05,
      "loss": 0.8805,
      "step": 622060
    },
    {
      "epoch": 2.1802002614544014,
      "grad_norm": 3.125,
      "learning_rate": 1.5181559278383971e-05,
      "loss": 0.7647,
      "step": 622070
    },
    {
      "epoch": 2.180235308961297,
      "grad_norm": 2.75,
      "learning_rate": 1.5180910249720271e-05,
      "loss": 0.8536,
      "step": 622080
    },
    {
      "epoch": 2.1802703564681924,
      "grad_norm": 2.8125,
      "learning_rate": 1.5180261221056569e-05,
      "loss": 0.7944,
      "step": 622090
    },
    {
      "epoch": 2.180305403975088,
      "grad_norm": 3.25,
      "learning_rate": 1.5179612192392867e-05,
      "loss": 0.8705,
      "step": 622100
    },
    {
      "epoch": 2.180340451481984,
      "grad_norm": 3.296875,
      "learning_rate": 1.5178963163729162e-05,
      "loss": 0.7706,
      "step": 622110
    },
    {
      "epoch": 2.1803754989888793,
      "grad_norm": 3.34375,
      "learning_rate": 1.5178314135065461e-05,
      "loss": 0.8504,
      "step": 622120
    },
    {
      "epoch": 2.180410546495775,
      "grad_norm": 3.09375,
      "learning_rate": 1.517766510640176e-05,
      "loss": 0.8718,
      "step": 622130
    },
    {
      "epoch": 2.180445594002671,
      "grad_norm": 2.46875,
      "learning_rate": 1.5177016077738057e-05,
      "loss": 0.7917,
      "step": 622140
    },
    {
      "epoch": 2.180480641509566,
      "grad_norm": 2.890625,
      "learning_rate": 1.5176367049074355e-05,
      "loss": 0.8018,
      "step": 622150
    },
    {
      "epoch": 2.180515689016462,
      "grad_norm": 2.640625,
      "learning_rate": 1.5175718020410653e-05,
      "loss": 0.8225,
      "step": 622160
    },
    {
      "epoch": 2.180550736523357,
      "grad_norm": 2.625,
      "learning_rate": 1.5175068991746951e-05,
      "loss": 0.8114,
      "step": 622170
    },
    {
      "epoch": 2.180585784030253,
      "grad_norm": 2.671875,
      "learning_rate": 1.517441996308325e-05,
      "loss": 0.8759,
      "step": 622180
    },
    {
      "epoch": 2.1806208315371487,
      "grad_norm": 2.796875,
      "learning_rate": 1.5173770934419549e-05,
      "loss": 0.803,
      "step": 622190
    },
    {
      "epoch": 2.180655879044044,
      "grad_norm": 3.546875,
      "learning_rate": 1.5173121905755847e-05,
      "loss": 0.8122,
      "step": 622200
    },
    {
      "epoch": 2.1806909265509398,
      "grad_norm": 3.15625,
      "learning_rate": 1.5172472877092145e-05,
      "loss": 0.8775,
      "step": 622210
    },
    {
      "epoch": 2.1807259740578355,
      "grad_norm": 2.390625,
      "learning_rate": 1.5171823848428443e-05,
      "loss": 0.8521,
      "step": 622220
    },
    {
      "epoch": 2.180761021564731,
      "grad_norm": 3.4375,
      "learning_rate": 1.5171174819764741e-05,
      "loss": 0.7502,
      "step": 622230
    },
    {
      "epoch": 2.1807960690716266,
      "grad_norm": 2.78125,
      "learning_rate": 1.5170525791101039e-05,
      "loss": 0.7523,
      "step": 622240
    },
    {
      "epoch": 2.1808311165785224,
      "grad_norm": 3.140625,
      "learning_rate": 1.5169876762437337e-05,
      "loss": 0.8227,
      "step": 622250
    },
    {
      "epoch": 2.1808661640854177,
      "grad_norm": 2.84375,
      "learning_rate": 1.5169227733773637e-05,
      "loss": 0.6897,
      "step": 622260
    },
    {
      "epoch": 2.1809012115923134,
      "grad_norm": 2.6875,
      "learning_rate": 1.5168578705109935e-05,
      "loss": 0.846,
      "step": 622270
    },
    {
      "epoch": 2.180936259099209,
      "grad_norm": 2.65625,
      "learning_rate": 1.5167929676446233e-05,
      "loss": 0.8482,
      "step": 622280
    },
    {
      "epoch": 2.1809713066061045,
      "grad_norm": 2.921875,
      "learning_rate": 1.516728064778253e-05,
      "loss": 0.8406,
      "step": 622290
    },
    {
      "epoch": 2.1810063541130003,
      "grad_norm": 2.984375,
      "learning_rate": 1.5166631619118827e-05,
      "loss": 0.8014,
      "step": 622300
    },
    {
      "epoch": 2.1810414016198956,
      "grad_norm": 2.890625,
      "learning_rate": 1.5165982590455125e-05,
      "loss": 0.8534,
      "step": 622310
    },
    {
      "epoch": 2.1810764491267913,
      "grad_norm": 3.140625,
      "learning_rate": 1.5165333561791423e-05,
      "loss": 0.7636,
      "step": 622320
    },
    {
      "epoch": 2.181111496633687,
      "grad_norm": 3.40625,
      "learning_rate": 1.5164684533127721e-05,
      "loss": 0.8213,
      "step": 622330
    },
    {
      "epoch": 2.1811465441405824,
      "grad_norm": 2.609375,
      "learning_rate": 1.5164035504464019e-05,
      "loss": 0.8003,
      "step": 622340
    },
    {
      "epoch": 2.181181591647478,
      "grad_norm": 3.171875,
      "learning_rate": 1.5163386475800317e-05,
      "loss": 0.7236,
      "step": 622350
    },
    {
      "epoch": 2.181216639154374,
      "grad_norm": 2.984375,
      "learning_rate": 1.5162737447136615e-05,
      "loss": 0.8715,
      "step": 622360
    },
    {
      "epoch": 2.1812516866612692,
      "grad_norm": 3.03125,
      "learning_rate": 1.5162088418472915e-05,
      "loss": 0.7758,
      "step": 622370
    },
    {
      "epoch": 2.181286734168165,
      "grad_norm": 3.0,
      "learning_rate": 1.5161439389809213e-05,
      "loss": 0.7652,
      "step": 622380
    },
    {
      "epoch": 2.1813217816750607,
      "grad_norm": 3.140625,
      "learning_rate": 1.516079036114551e-05,
      "loss": 0.8629,
      "step": 622390
    },
    {
      "epoch": 2.181356829181956,
      "grad_norm": 2.765625,
      "learning_rate": 1.5160141332481809e-05,
      "loss": 0.7982,
      "step": 622400
    },
    {
      "epoch": 2.181391876688852,
      "grad_norm": 2.875,
      "learning_rate": 1.5159492303818107e-05,
      "loss": 0.8285,
      "step": 622410
    },
    {
      "epoch": 2.181426924195747,
      "grad_norm": 3.203125,
      "learning_rate": 1.5158843275154405e-05,
      "loss": 0.8839,
      "step": 622420
    },
    {
      "epoch": 2.181461971702643,
      "grad_norm": 2.890625,
      "learning_rate": 1.5158194246490703e-05,
      "loss": 0.8607,
      "step": 622430
    },
    {
      "epoch": 2.1814970192095386,
      "grad_norm": 3.0,
      "learning_rate": 1.5157545217827002e-05,
      "loss": 0.8608,
      "step": 622440
    },
    {
      "epoch": 2.181532066716434,
      "grad_norm": 2.90625,
      "learning_rate": 1.51568961891633e-05,
      "loss": 0.8192,
      "step": 622450
    },
    {
      "epoch": 2.1815671142233297,
      "grad_norm": 2.546875,
      "learning_rate": 1.5156247160499598e-05,
      "loss": 0.8205,
      "step": 622460
    },
    {
      "epoch": 2.1816021617302255,
      "grad_norm": 3.125,
      "learning_rate": 1.5155598131835896e-05,
      "loss": 0.8428,
      "step": 622470
    },
    {
      "epoch": 2.181637209237121,
      "grad_norm": 2.453125,
      "learning_rate": 1.5154949103172191e-05,
      "loss": 0.8372,
      "step": 622480
    },
    {
      "epoch": 2.1816722567440165,
      "grad_norm": 2.875,
      "learning_rate": 1.515430007450849e-05,
      "loss": 0.7985,
      "step": 622490
    },
    {
      "epoch": 2.1817073042509123,
      "grad_norm": 3.359375,
      "learning_rate": 1.5153651045844789e-05,
      "loss": 0.8177,
      "step": 622500
    },
    {
      "epoch": 2.1817423517578076,
      "grad_norm": 2.796875,
      "learning_rate": 1.5153002017181087e-05,
      "loss": 0.796,
      "step": 622510
    },
    {
      "epoch": 2.1817773992647034,
      "grad_norm": 3.203125,
      "learning_rate": 1.5152352988517385e-05,
      "loss": 0.8583,
      "step": 622520
    },
    {
      "epoch": 2.181812446771599,
      "grad_norm": 2.9375,
      "learning_rate": 1.5151703959853683e-05,
      "loss": 0.7807,
      "step": 622530
    },
    {
      "epoch": 2.1818474942784944,
      "grad_norm": 3.0625,
      "learning_rate": 1.515105493118998e-05,
      "loss": 0.8003,
      "step": 622540
    },
    {
      "epoch": 2.18188254178539,
      "grad_norm": 2.578125,
      "learning_rate": 1.5150405902526279e-05,
      "loss": 0.8174,
      "step": 622550
    },
    {
      "epoch": 2.1819175892922855,
      "grad_norm": 2.859375,
      "learning_rate": 1.5149756873862578e-05,
      "loss": 0.862,
      "step": 622560
    },
    {
      "epoch": 2.1819526367991813,
      "grad_norm": 3.203125,
      "learning_rate": 1.5149107845198876e-05,
      "loss": 0.8756,
      "step": 622570
    },
    {
      "epoch": 2.181987684306077,
      "grad_norm": 2.796875,
      "learning_rate": 1.5148458816535174e-05,
      "loss": 0.8459,
      "step": 622580
    },
    {
      "epoch": 2.1820227318129723,
      "grad_norm": 2.484375,
      "learning_rate": 1.5147809787871472e-05,
      "loss": 0.7963,
      "step": 622590
    },
    {
      "epoch": 2.182057779319868,
      "grad_norm": 2.421875,
      "learning_rate": 1.514716075920777e-05,
      "loss": 0.7701,
      "step": 622600
    },
    {
      "epoch": 2.182092826826764,
      "grad_norm": 2.671875,
      "learning_rate": 1.5146511730544068e-05,
      "loss": 0.908,
      "step": 622610
    },
    {
      "epoch": 2.182127874333659,
      "grad_norm": 2.890625,
      "learning_rate": 1.5145862701880366e-05,
      "loss": 0.7524,
      "step": 622620
    },
    {
      "epoch": 2.182162921840555,
      "grad_norm": 3.125,
      "learning_rate": 1.5145213673216666e-05,
      "loss": 0.8231,
      "step": 622630
    },
    {
      "epoch": 2.1821979693474507,
      "grad_norm": 3.203125,
      "learning_rate": 1.5144564644552964e-05,
      "loss": 0.7903,
      "step": 622640
    },
    {
      "epoch": 2.182233016854346,
      "grad_norm": 3.078125,
      "learning_rate": 1.5143915615889262e-05,
      "loss": 0.8357,
      "step": 622650
    },
    {
      "epoch": 2.1822680643612418,
      "grad_norm": 3.1875,
      "learning_rate": 1.514326658722556e-05,
      "loss": 0.9037,
      "step": 622660
    },
    {
      "epoch": 2.182303111868137,
      "grad_norm": 2.828125,
      "learning_rate": 1.5142617558561856e-05,
      "loss": 0.7605,
      "step": 622670
    },
    {
      "epoch": 2.182338159375033,
      "grad_norm": 2.796875,
      "learning_rate": 1.5141968529898154e-05,
      "loss": 0.8495,
      "step": 622680
    },
    {
      "epoch": 2.1823732068819286,
      "grad_norm": 3.328125,
      "learning_rate": 1.5141319501234452e-05,
      "loss": 0.7745,
      "step": 622690
    },
    {
      "epoch": 2.182408254388824,
      "grad_norm": 2.796875,
      "learning_rate": 1.514067047257075e-05,
      "loss": 0.8257,
      "step": 622700
    },
    {
      "epoch": 2.1824433018957197,
      "grad_norm": 2.578125,
      "learning_rate": 1.5140021443907048e-05,
      "loss": 0.8702,
      "step": 622710
    },
    {
      "epoch": 2.1824783494026154,
      "grad_norm": 2.71875,
      "learning_rate": 1.5139372415243346e-05,
      "loss": 0.7912,
      "step": 622720
    },
    {
      "epoch": 2.1825133969095107,
      "grad_norm": 3.078125,
      "learning_rate": 1.5138723386579644e-05,
      "loss": 0.791,
      "step": 622730
    },
    {
      "epoch": 2.1825484444164065,
      "grad_norm": 3.0,
      "learning_rate": 1.5138074357915944e-05,
      "loss": 0.8521,
      "step": 622740
    },
    {
      "epoch": 2.1825834919233023,
      "grad_norm": 3.125,
      "learning_rate": 1.5137425329252242e-05,
      "loss": 0.8364,
      "step": 622750
    },
    {
      "epoch": 2.1826185394301976,
      "grad_norm": 3.21875,
      "learning_rate": 1.513677630058854e-05,
      "loss": 0.8275,
      "step": 622760
    },
    {
      "epoch": 2.1826535869370933,
      "grad_norm": 3.03125,
      "learning_rate": 1.5136127271924838e-05,
      "loss": 0.866,
      "step": 622770
    },
    {
      "epoch": 2.1826886344439886,
      "grad_norm": 2.515625,
      "learning_rate": 1.5135478243261136e-05,
      "loss": 0.7538,
      "step": 622780
    },
    {
      "epoch": 2.1827236819508844,
      "grad_norm": 3.0,
      "learning_rate": 1.5134829214597434e-05,
      "loss": 0.8179,
      "step": 622790
    },
    {
      "epoch": 2.18275872945778,
      "grad_norm": 3.09375,
      "learning_rate": 1.5134180185933732e-05,
      "loss": 0.8066,
      "step": 622800
    },
    {
      "epoch": 2.1827937769646755,
      "grad_norm": 3.0625,
      "learning_rate": 1.5133531157270032e-05,
      "loss": 0.8303,
      "step": 622810
    },
    {
      "epoch": 2.1828288244715712,
      "grad_norm": 3.328125,
      "learning_rate": 1.513288212860633e-05,
      "loss": 0.8378,
      "step": 622820
    },
    {
      "epoch": 2.182863871978467,
      "grad_norm": 4.0,
      "learning_rate": 1.5132233099942628e-05,
      "loss": 0.8186,
      "step": 622830
    },
    {
      "epoch": 2.1828989194853623,
      "grad_norm": 2.703125,
      "learning_rate": 1.5131584071278926e-05,
      "loss": 0.784,
      "step": 622840
    },
    {
      "epoch": 2.182933966992258,
      "grad_norm": 2.859375,
      "learning_rate": 1.5130935042615224e-05,
      "loss": 0.7783,
      "step": 622850
    },
    {
      "epoch": 2.182969014499154,
      "grad_norm": 3.15625,
      "learning_rate": 1.513028601395152e-05,
      "loss": 0.7131,
      "step": 622860
    },
    {
      "epoch": 2.183004062006049,
      "grad_norm": 3.046875,
      "learning_rate": 1.5129636985287818e-05,
      "loss": 0.8153,
      "step": 622870
    },
    {
      "epoch": 2.183039109512945,
      "grad_norm": 2.859375,
      "learning_rate": 1.5128987956624116e-05,
      "loss": 0.797,
      "step": 622880
    },
    {
      "epoch": 2.18307415701984,
      "grad_norm": 2.625,
      "learning_rate": 1.5128338927960414e-05,
      "loss": 0.8446,
      "step": 622890
    },
    {
      "epoch": 2.183109204526736,
      "grad_norm": 3.0,
      "learning_rate": 1.5127689899296712e-05,
      "loss": 0.8944,
      "step": 622900
    },
    {
      "epoch": 2.1831442520336317,
      "grad_norm": 3.109375,
      "learning_rate": 1.512704087063301e-05,
      "loss": 0.9136,
      "step": 622910
    },
    {
      "epoch": 2.183179299540527,
      "grad_norm": 2.609375,
      "learning_rate": 1.512639184196931e-05,
      "loss": 0.8396,
      "step": 622920
    },
    {
      "epoch": 2.183214347047423,
      "grad_norm": 2.375,
      "learning_rate": 1.5125742813305608e-05,
      "loss": 0.7828,
      "step": 622930
    },
    {
      "epoch": 2.1832493945543185,
      "grad_norm": 2.75,
      "learning_rate": 1.5125093784641906e-05,
      "loss": 0.8118,
      "step": 622940
    },
    {
      "epoch": 2.183284442061214,
      "grad_norm": 3.296875,
      "learning_rate": 1.5124444755978204e-05,
      "loss": 0.8932,
      "step": 622950
    },
    {
      "epoch": 2.1833194895681096,
      "grad_norm": 2.96875,
      "learning_rate": 1.5123795727314502e-05,
      "loss": 0.8103,
      "step": 622960
    },
    {
      "epoch": 2.1833545370750054,
      "grad_norm": 2.5625,
      "learning_rate": 1.51231466986508e-05,
      "loss": 0.8066,
      "step": 622970
    },
    {
      "epoch": 2.1833895845819007,
      "grad_norm": 2.953125,
      "learning_rate": 1.5122497669987098e-05,
      "loss": 0.7571,
      "step": 622980
    },
    {
      "epoch": 2.1834246320887964,
      "grad_norm": 3.0,
      "learning_rate": 1.5121848641323398e-05,
      "loss": 0.789,
      "step": 622990
    },
    {
      "epoch": 2.1834596795956918,
      "grad_norm": 3.171875,
      "learning_rate": 1.5121199612659696e-05,
      "loss": 0.8748,
      "step": 623000
    },
    {
      "epoch": 2.1834947271025875,
      "grad_norm": 2.796875,
      "learning_rate": 1.5120550583995994e-05,
      "loss": 0.7868,
      "step": 623010
    },
    {
      "epoch": 2.1835297746094833,
      "grad_norm": 3.71875,
      "learning_rate": 1.5119901555332292e-05,
      "loss": 0.793,
      "step": 623020
    },
    {
      "epoch": 2.1835648221163786,
      "grad_norm": 2.9375,
      "learning_rate": 1.511925252666859e-05,
      "loss": 0.8571,
      "step": 623030
    },
    {
      "epoch": 2.1835998696232743,
      "grad_norm": 3.046875,
      "learning_rate": 1.5118603498004888e-05,
      "loss": 0.8415,
      "step": 623040
    },
    {
      "epoch": 2.18363491713017,
      "grad_norm": 3.0,
      "learning_rate": 1.5117954469341184e-05,
      "loss": 0.859,
      "step": 623050
    },
    {
      "epoch": 2.1836699646370654,
      "grad_norm": 3.203125,
      "learning_rate": 1.5117305440677482e-05,
      "loss": 0.8477,
      "step": 623060
    },
    {
      "epoch": 2.183705012143961,
      "grad_norm": 2.296875,
      "learning_rate": 1.511665641201378e-05,
      "loss": 0.7404,
      "step": 623070
    },
    {
      "epoch": 2.183740059650857,
      "grad_norm": 2.8125,
      "learning_rate": 1.5116007383350078e-05,
      "loss": 0.7731,
      "step": 623080
    },
    {
      "epoch": 2.1837751071577522,
      "grad_norm": 2.9375,
      "learning_rate": 1.5115358354686376e-05,
      "loss": 0.8223,
      "step": 623090
    },
    {
      "epoch": 2.183810154664648,
      "grad_norm": 2.671875,
      "learning_rate": 1.5114709326022674e-05,
      "loss": 0.7597,
      "step": 623100
    },
    {
      "epoch": 2.1838452021715433,
      "grad_norm": 2.859375,
      "learning_rate": 1.5114060297358974e-05,
      "loss": 0.7684,
      "step": 623110
    },
    {
      "epoch": 2.183880249678439,
      "grad_norm": 3.171875,
      "learning_rate": 1.5113411268695272e-05,
      "loss": 0.7549,
      "step": 623120
    },
    {
      "epoch": 2.183915297185335,
      "grad_norm": 3.03125,
      "learning_rate": 1.511276224003157e-05,
      "loss": 0.8967,
      "step": 623130
    },
    {
      "epoch": 2.18395034469223,
      "grad_norm": 3.0625,
      "learning_rate": 1.5112113211367868e-05,
      "loss": 0.7908,
      "step": 623140
    },
    {
      "epoch": 2.183985392199126,
      "grad_norm": 3.296875,
      "learning_rate": 1.5111464182704166e-05,
      "loss": 0.8571,
      "step": 623150
    },
    {
      "epoch": 2.1840204397060217,
      "grad_norm": 3.03125,
      "learning_rate": 1.5110815154040464e-05,
      "loss": 0.8168,
      "step": 623160
    },
    {
      "epoch": 2.184055487212917,
      "grad_norm": 2.96875,
      "learning_rate": 1.5110166125376762e-05,
      "loss": 0.7531,
      "step": 623170
    },
    {
      "epoch": 2.1840905347198127,
      "grad_norm": 2.75,
      "learning_rate": 1.5109517096713061e-05,
      "loss": 0.7936,
      "step": 623180
    },
    {
      "epoch": 2.1841255822267085,
      "grad_norm": 2.546875,
      "learning_rate": 1.510886806804936e-05,
      "loss": 0.7644,
      "step": 623190
    },
    {
      "epoch": 2.184160629733604,
      "grad_norm": 3.21875,
      "learning_rate": 1.5108219039385657e-05,
      "loss": 0.9346,
      "step": 623200
    },
    {
      "epoch": 2.1841956772404996,
      "grad_norm": 3.1875,
      "learning_rate": 1.5107570010721955e-05,
      "loss": 0.8635,
      "step": 623210
    },
    {
      "epoch": 2.184230724747395,
      "grad_norm": 2.453125,
      "learning_rate": 1.5106920982058253e-05,
      "loss": 0.7405,
      "step": 623220
    },
    {
      "epoch": 2.1842657722542906,
      "grad_norm": 2.609375,
      "learning_rate": 1.5106271953394551e-05,
      "loss": 0.7427,
      "step": 623230
    },
    {
      "epoch": 2.1843008197611864,
      "grad_norm": 3.03125,
      "learning_rate": 1.5105622924730848e-05,
      "loss": 0.8041,
      "step": 623240
    },
    {
      "epoch": 2.1843358672680817,
      "grad_norm": 2.859375,
      "learning_rate": 1.5104973896067146e-05,
      "loss": 0.7818,
      "step": 623250
    },
    {
      "epoch": 2.1843709147749775,
      "grad_norm": 2.6875,
      "learning_rate": 1.5104324867403444e-05,
      "loss": 0.7902,
      "step": 623260
    },
    {
      "epoch": 2.1844059622818732,
      "grad_norm": 2.703125,
      "learning_rate": 1.5103675838739742e-05,
      "loss": 0.7715,
      "step": 623270
    },
    {
      "epoch": 2.1844410097887685,
      "grad_norm": 2.5625,
      "learning_rate": 1.510302681007604e-05,
      "loss": 0.7348,
      "step": 623280
    },
    {
      "epoch": 2.1844760572956643,
      "grad_norm": 2.59375,
      "learning_rate": 1.510237778141234e-05,
      "loss": 0.8309,
      "step": 623290
    },
    {
      "epoch": 2.18451110480256,
      "grad_norm": 2.859375,
      "learning_rate": 1.5101728752748637e-05,
      "loss": 0.8266,
      "step": 623300
    },
    {
      "epoch": 2.1845461523094554,
      "grad_norm": 2.640625,
      "learning_rate": 1.5101079724084935e-05,
      "loss": 0.8694,
      "step": 623310
    },
    {
      "epoch": 2.184581199816351,
      "grad_norm": 2.78125,
      "learning_rate": 1.5100430695421233e-05,
      "loss": 0.8338,
      "step": 623320
    },
    {
      "epoch": 2.1846162473232464,
      "grad_norm": 3.015625,
      "learning_rate": 1.5099781666757531e-05,
      "loss": 0.8465,
      "step": 623330
    },
    {
      "epoch": 2.184651294830142,
      "grad_norm": 2.828125,
      "learning_rate": 1.509913263809383e-05,
      "loss": 0.7944,
      "step": 623340
    },
    {
      "epoch": 2.184686342337038,
      "grad_norm": 3.015625,
      "learning_rate": 1.5098483609430127e-05,
      "loss": 0.8556,
      "step": 623350
    },
    {
      "epoch": 2.1847213898439333,
      "grad_norm": 3.0,
      "learning_rate": 1.5097834580766427e-05,
      "loss": 0.8223,
      "step": 623360
    },
    {
      "epoch": 2.184756437350829,
      "grad_norm": 2.875,
      "learning_rate": 1.5097185552102725e-05,
      "loss": 0.7809,
      "step": 623370
    },
    {
      "epoch": 2.184791484857725,
      "grad_norm": 2.671875,
      "learning_rate": 1.5096536523439023e-05,
      "loss": 0.7996,
      "step": 623380
    },
    {
      "epoch": 2.18482653236462,
      "grad_norm": 3.015625,
      "learning_rate": 1.5095887494775321e-05,
      "loss": 0.804,
      "step": 623390
    },
    {
      "epoch": 2.184861579871516,
      "grad_norm": 2.484375,
      "learning_rate": 1.5095238466111619e-05,
      "loss": 0.8388,
      "step": 623400
    },
    {
      "epoch": 2.1848966273784116,
      "grad_norm": 3.0,
      "learning_rate": 1.5094589437447917e-05,
      "loss": 0.8136,
      "step": 623410
    },
    {
      "epoch": 2.184931674885307,
      "grad_norm": 3.09375,
      "learning_rate": 1.5093940408784213e-05,
      "loss": 0.8529,
      "step": 623420
    },
    {
      "epoch": 2.1849667223922027,
      "grad_norm": 2.796875,
      "learning_rate": 1.5093291380120511e-05,
      "loss": 0.7728,
      "step": 623430
    },
    {
      "epoch": 2.185001769899098,
      "grad_norm": 3.015625,
      "learning_rate": 1.509264235145681e-05,
      "loss": 0.868,
      "step": 623440
    },
    {
      "epoch": 2.1850368174059938,
      "grad_norm": 3.203125,
      "learning_rate": 1.5091993322793107e-05,
      "loss": 0.7481,
      "step": 623450
    },
    {
      "epoch": 2.1850718649128895,
      "grad_norm": 2.859375,
      "learning_rate": 1.5091344294129405e-05,
      "loss": 0.8856,
      "step": 623460
    },
    {
      "epoch": 2.185106912419785,
      "grad_norm": 3.078125,
      "learning_rate": 1.5090695265465705e-05,
      "loss": 0.8264,
      "step": 623470
    },
    {
      "epoch": 2.1851419599266806,
      "grad_norm": 2.6875,
      "learning_rate": 1.5090046236802003e-05,
      "loss": 0.8588,
      "step": 623480
    },
    {
      "epoch": 2.1851770074335763,
      "grad_norm": 3.21875,
      "learning_rate": 1.5089397208138301e-05,
      "loss": 0.8006,
      "step": 623490
    },
    {
      "epoch": 2.1852120549404717,
      "grad_norm": 3.59375,
      "learning_rate": 1.5088748179474599e-05,
      "loss": 0.7767,
      "step": 623500
    },
    {
      "epoch": 2.1852471024473674,
      "grad_norm": 2.625,
      "learning_rate": 1.5088099150810897e-05,
      "loss": 0.7764,
      "step": 623510
    },
    {
      "epoch": 2.185282149954263,
      "grad_norm": 2.765625,
      "learning_rate": 1.5087450122147195e-05,
      "loss": 0.9031,
      "step": 623520
    },
    {
      "epoch": 2.1853171974611585,
      "grad_norm": 3.171875,
      "learning_rate": 1.5086801093483493e-05,
      "loss": 0.9518,
      "step": 623530
    },
    {
      "epoch": 2.1853522449680542,
      "grad_norm": 2.953125,
      "learning_rate": 1.5086152064819793e-05,
      "loss": 0.8801,
      "step": 623540
    },
    {
      "epoch": 2.1853872924749496,
      "grad_norm": 2.75,
      "learning_rate": 1.508550303615609e-05,
      "loss": 0.748,
      "step": 623550
    },
    {
      "epoch": 2.1854223399818453,
      "grad_norm": 2.59375,
      "learning_rate": 1.5084854007492389e-05,
      "loss": 0.7589,
      "step": 623560
    },
    {
      "epoch": 2.185457387488741,
      "grad_norm": 2.75,
      "learning_rate": 1.5084204978828687e-05,
      "loss": 0.8698,
      "step": 623570
    },
    {
      "epoch": 2.1854924349956364,
      "grad_norm": 2.46875,
      "learning_rate": 1.5083555950164985e-05,
      "loss": 0.6987,
      "step": 623580
    },
    {
      "epoch": 2.185527482502532,
      "grad_norm": 3.203125,
      "learning_rate": 1.5082906921501283e-05,
      "loss": 0.7617,
      "step": 623590
    },
    {
      "epoch": 2.185562530009428,
      "grad_norm": 3.28125,
      "learning_rate": 1.508225789283758e-05,
      "loss": 0.906,
      "step": 623600
    },
    {
      "epoch": 2.185597577516323,
      "grad_norm": 3.078125,
      "learning_rate": 1.5081608864173877e-05,
      "loss": 0.7654,
      "step": 623610
    },
    {
      "epoch": 2.185632625023219,
      "grad_norm": 2.828125,
      "learning_rate": 1.5080959835510175e-05,
      "loss": 0.833,
      "step": 623620
    },
    {
      "epoch": 2.1856676725301147,
      "grad_norm": 2.359375,
      "learning_rate": 1.5080310806846473e-05,
      "loss": 0.7795,
      "step": 623630
    },
    {
      "epoch": 2.18570272003701,
      "grad_norm": 2.9375,
      "learning_rate": 1.5079661778182771e-05,
      "loss": 0.8665,
      "step": 623640
    },
    {
      "epoch": 2.185737767543906,
      "grad_norm": 2.953125,
      "learning_rate": 1.5079012749519069e-05,
      "loss": 0.7921,
      "step": 623650
    },
    {
      "epoch": 2.1857728150508016,
      "grad_norm": 3.15625,
      "learning_rate": 1.5078363720855369e-05,
      "loss": 0.7986,
      "step": 623660
    },
    {
      "epoch": 2.185807862557697,
      "grad_norm": 3.015625,
      "learning_rate": 1.5077714692191667e-05,
      "loss": 0.8986,
      "step": 623670
    },
    {
      "epoch": 2.1858429100645926,
      "grad_norm": 3.28125,
      "learning_rate": 1.5077065663527965e-05,
      "loss": 0.7605,
      "step": 623680
    },
    {
      "epoch": 2.185877957571488,
      "grad_norm": 2.84375,
      "learning_rate": 1.5076416634864263e-05,
      "loss": 0.8148,
      "step": 623690
    },
    {
      "epoch": 2.1859130050783837,
      "grad_norm": 3.546875,
      "learning_rate": 1.507576760620056e-05,
      "loss": 0.8407,
      "step": 623700
    },
    {
      "epoch": 2.1859480525852795,
      "grad_norm": 2.734375,
      "learning_rate": 1.5075118577536859e-05,
      "loss": 0.8341,
      "step": 623710
    },
    {
      "epoch": 2.185983100092175,
      "grad_norm": 2.890625,
      "learning_rate": 1.5074469548873157e-05,
      "loss": 0.7634,
      "step": 623720
    },
    {
      "epoch": 2.1860181475990705,
      "grad_norm": 2.9375,
      "learning_rate": 1.5073820520209456e-05,
      "loss": 0.7545,
      "step": 623730
    },
    {
      "epoch": 2.1860531951059663,
      "grad_norm": 2.625,
      "learning_rate": 1.5073171491545754e-05,
      "loss": 0.8821,
      "step": 623740
    },
    {
      "epoch": 2.1860882426128616,
      "grad_norm": 3.34375,
      "learning_rate": 1.5072522462882052e-05,
      "loss": 0.8421,
      "step": 623750
    },
    {
      "epoch": 2.1861232901197574,
      "grad_norm": 2.875,
      "learning_rate": 1.507187343421835e-05,
      "loss": 0.8176,
      "step": 623760
    },
    {
      "epoch": 2.186158337626653,
      "grad_norm": 3.203125,
      "learning_rate": 1.5071224405554648e-05,
      "loss": 0.8218,
      "step": 623770
    },
    {
      "epoch": 2.1861933851335484,
      "grad_norm": 3.15625,
      "learning_rate": 1.5070575376890946e-05,
      "loss": 0.8014,
      "step": 623780
    },
    {
      "epoch": 2.186228432640444,
      "grad_norm": 2.765625,
      "learning_rate": 1.5069926348227244e-05,
      "loss": 0.8772,
      "step": 623790
    },
    {
      "epoch": 2.18626348014734,
      "grad_norm": 2.578125,
      "learning_rate": 1.506927731956354e-05,
      "loss": 0.842,
      "step": 623800
    },
    {
      "epoch": 2.1862985276542353,
      "grad_norm": 2.5625,
      "learning_rate": 1.5068628290899839e-05,
      "loss": 0.7756,
      "step": 623810
    },
    {
      "epoch": 2.186333575161131,
      "grad_norm": 3.109375,
      "learning_rate": 1.5067979262236137e-05,
      "loss": 0.8248,
      "step": 623820
    },
    {
      "epoch": 2.1863686226680263,
      "grad_norm": 2.8125,
      "learning_rate": 1.5067330233572435e-05,
      "loss": 0.7324,
      "step": 623830
    },
    {
      "epoch": 2.186403670174922,
      "grad_norm": 3.3125,
      "learning_rate": 1.5066681204908734e-05,
      "loss": 0.8869,
      "step": 623840
    },
    {
      "epoch": 2.186438717681818,
      "grad_norm": 3.28125,
      "learning_rate": 1.5066032176245032e-05,
      "loss": 0.842,
      "step": 623850
    },
    {
      "epoch": 2.186473765188713,
      "grad_norm": 2.625,
      "learning_rate": 1.506538314758133e-05,
      "loss": 0.8455,
      "step": 623860
    },
    {
      "epoch": 2.186508812695609,
      "grad_norm": 2.703125,
      "learning_rate": 1.5064734118917628e-05,
      "loss": 0.7651,
      "step": 623870
    },
    {
      "epoch": 2.1865438602025047,
      "grad_norm": 3.078125,
      "learning_rate": 1.5064085090253926e-05,
      "loss": 0.8138,
      "step": 623880
    },
    {
      "epoch": 2.1865789077094,
      "grad_norm": 2.65625,
      "learning_rate": 1.5063436061590224e-05,
      "loss": 0.7722,
      "step": 623890
    },
    {
      "epoch": 2.1866139552162958,
      "grad_norm": 2.78125,
      "learning_rate": 1.5062787032926522e-05,
      "loss": 0.892,
      "step": 623900
    },
    {
      "epoch": 2.1866490027231915,
      "grad_norm": 2.484375,
      "learning_rate": 1.5062138004262822e-05,
      "loss": 0.7189,
      "step": 623910
    },
    {
      "epoch": 2.186684050230087,
      "grad_norm": 3.375,
      "learning_rate": 1.506148897559912e-05,
      "loss": 0.7752,
      "step": 623920
    },
    {
      "epoch": 2.1867190977369826,
      "grad_norm": 2.84375,
      "learning_rate": 1.5060839946935418e-05,
      "loss": 0.8244,
      "step": 623930
    },
    {
      "epoch": 2.186754145243878,
      "grad_norm": 2.71875,
      "learning_rate": 1.5060190918271716e-05,
      "loss": 0.8669,
      "step": 623940
    },
    {
      "epoch": 2.1867891927507737,
      "grad_norm": 2.859375,
      "learning_rate": 1.5059541889608014e-05,
      "loss": 0.6997,
      "step": 623950
    },
    {
      "epoch": 2.1868242402576694,
      "grad_norm": 2.796875,
      "learning_rate": 1.5058892860944312e-05,
      "loss": 0.7349,
      "step": 623960
    },
    {
      "epoch": 2.1868592877645647,
      "grad_norm": 3.34375,
      "learning_rate": 1.505824383228061e-05,
      "loss": 0.8264,
      "step": 623970
    },
    {
      "epoch": 2.1868943352714605,
      "grad_norm": 3.15625,
      "learning_rate": 1.505759480361691e-05,
      "loss": 0.81,
      "step": 623980
    },
    {
      "epoch": 2.1869293827783562,
      "grad_norm": 2.8125,
      "learning_rate": 1.5056945774953204e-05,
      "loss": 0.832,
      "step": 623990
    },
    {
      "epoch": 2.1869644302852516,
      "grad_norm": 2.90625,
      "learning_rate": 1.5056296746289502e-05,
      "loss": 0.874,
      "step": 624000
    },
    {
      "epoch": 2.1869994777921473,
      "grad_norm": 2.9375,
      "learning_rate": 1.50556477176258e-05,
      "loss": 0.8049,
      "step": 624010
    },
    {
      "epoch": 2.187034525299043,
      "grad_norm": 2.84375,
      "learning_rate": 1.50549986889621e-05,
      "loss": 0.7753,
      "step": 624020
    },
    {
      "epoch": 2.1870695728059384,
      "grad_norm": 2.828125,
      "learning_rate": 1.5054349660298398e-05,
      "loss": 0.8209,
      "step": 624030
    },
    {
      "epoch": 2.187104620312834,
      "grad_norm": 2.65625,
      "learning_rate": 1.5053700631634696e-05,
      "loss": 0.8029,
      "step": 624040
    },
    {
      "epoch": 2.1871396678197295,
      "grad_norm": 2.90625,
      "learning_rate": 1.5053051602970994e-05,
      "loss": 0.7233,
      "step": 624050
    },
    {
      "epoch": 2.187174715326625,
      "grad_norm": 3.234375,
      "learning_rate": 1.5052402574307292e-05,
      "loss": 0.7803,
      "step": 624060
    },
    {
      "epoch": 2.187209762833521,
      "grad_norm": 2.984375,
      "learning_rate": 1.505175354564359e-05,
      "loss": 0.8592,
      "step": 624070
    },
    {
      "epoch": 2.1872448103404163,
      "grad_norm": 3.234375,
      "learning_rate": 1.5051104516979888e-05,
      "loss": 0.8619,
      "step": 624080
    },
    {
      "epoch": 2.187279857847312,
      "grad_norm": 2.90625,
      "learning_rate": 1.5050455488316188e-05,
      "loss": 0.8582,
      "step": 624090
    },
    {
      "epoch": 2.187314905354208,
      "grad_norm": 3.078125,
      "learning_rate": 1.5049806459652486e-05,
      "loss": 0.801,
      "step": 624100
    },
    {
      "epoch": 2.187349952861103,
      "grad_norm": 3.0,
      "learning_rate": 1.5049157430988784e-05,
      "loss": 0.8309,
      "step": 624110
    },
    {
      "epoch": 2.187385000367999,
      "grad_norm": 2.765625,
      "learning_rate": 1.5048508402325082e-05,
      "loss": 0.8206,
      "step": 624120
    },
    {
      "epoch": 2.1874200478748946,
      "grad_norm": 2.703125,
      "learning_rate": 1.504785937366138e-05,
      "loss": 0.8198,
      "step": 624130
    },
    {
      "epoch": 2.18745509538179,
      "grad_norm": 2.640625,
      "learning_rate": 1.5047210344997678e-05,
      "loss": 0.7799,
      "step": 624140
    },
    {
      "epoch": 2.1874901428886857,
      "grad_norm": 2.84375,
      "learning_rate": 1.5046561316333976e-05,
      "loss": 0.8549,
      "step": 624150
    },
    {
      "epoch": 2.187525190395581,
      "grad_norm": 3.09375,
      "learning_rate": 1.5045912287670275e-05,
      "loss": 0.8461,
      "step": 624160
    },
    {
      "epoch": 2.187560237902477,
      "grad_norm": 3.3125,
      "learning_rate": 1.5045263259006573e-05,
      "loss": 0.8429,
      "step": 624170
    },
    {
      "epoch": 2.1875952854093725,
      "grad_norm": 2.984375,
      "learning_rate": 1.5044614230342868e-05,
      "loss": 0.8007,
      "step": 624180
    },
    {
      "epoch": 2.187630332916268,
      "grad_norm": 3.15625,
      "learning_rate": 1.5043965201679166e-05,
      "loss": 0.7977,
      "step": 624190
    },
    {
      "epoch": 2.1876653804231636,
      "grad_norm": 2.671875,
      "learning_rate": 1.5043316173015464e-05,
      "loss": 0.7185,
      "step": 624200
    },
    {
      "epoch": 2.1877004279300594,
      "grad_norm": 3.546875,
      "learning_rate": 1.5042667144351764e-05,
      "loss": 0.8355,
      "step": 624210
    },
    {
      "epoch": 2.1877354754369547,
      "grad_norm": 3.375,
      "learning_rate": 1.5042018115688062e-05,
      "loss": 0.7942,
      "step": 624220
    },
    {
      "epoch": 2.1877705229438504,
      "grad_norm": 2.84375,
      "learning_rate": 1.504136908702436e-05,
      "loss": 0.7393,
      "step": 624230
    },
    {
      "epoch": 2.187805570450746,
      "grad_norm": 2.34375,
      "learning_rate": 1.5040720058360658e-05,
      "loss": 0.856,
      "step": 624240
    },
    {
      "epoch": 2.1878406179576415,
      "grad_norm": 2.703125,
      "learning_rate": 1.5040071029696956e-05,
      "loss": 0.8736,
      "step": 624250
    },
    {
      "epoch": 2.1878756654645373,
      "grad_norm": 2.875,
      "learning_rate": 1.5039422001033254e-05,
      "loss": 0.7671,
      "step": 624260
    },
    {
      "epoch": 2.1879107129714326,
      "grad_norm": 3.09375,
      "learning_rate": 1.5038772972369552e-05,
      "loss": 0.7563,
      "step": 624270
    },
    {
      "epoch": 2.1879457604783283,
      "grad_norm": 2.5625,
      "learning_rate": 1.5038123943705851e-05,
      "loss": 0.7594,
      "step": 624280
    },
    {
      "epoch": 2.187980807985224,
      "grad_norm": 3.0625,
      "learning_rate": 1.503747491504215e-05,
      "loss": 0.8474,
      "step": 624290
    },
    {
      "epoch": 2.1880158554921194,
      "grad_norm": 2.625,
      "learning_rate": 1.5036825886378447e-05,
      "loss": 0.7811,
      "step": 624300
    },
    {
      "epoch": 2.188050902999015,
      "grad_norm": 2.546875,
      "learning_rate": 1.5036176857714745e-05,
      "loss": 0.9011,
      "step": 624310
    },
    {
      "epoch": 2.188085950505911,
      "grad_norm": 2.546875,
      "learning_rate": 1.5035527829051043e-05,
      "loss": 0.7712,
      "step": 624320
    },
    {
      "epoch": 2.1881209980128062,
      "grad_norm": 3.15625,
      "learning_rate": 1.5034878800387341e-05,
      "loss": 0.8268,
      "step": 624330
    },
    {
      "epoch": 2.188156045519702,
      "grad_norm": 3.15625,
      "learning_rate": 1.503422977172364e-05,
      "loss": 0.8521,
      "step": 624340
    },
    {
      "epoch": 2.1881910930265978,
      "grad_norm": 2.859375,
      "learning_rate": 1.5033580743059939e-05,
      "loss": 0.7577,
      "step": 624350
    },
    {
      "epoch": 2.188226140533493,
      "grad_norm": 2.59375,
      "learning_rate": 1.5032931714396234e-05,
      "loss": 0.8067,
      "step": 624360
    },
    {
      "epoch": 2.188261188040389,
      "grad_norm": 2.953125,
      "learning_rate": 1.5032282685732532e-05,
      "loss": 0.8341,
      "step": 624370
    },
    {
      "epoch": 2.188296235547284,
      "grad_norm": 3.015625,
      "learning_rate": 1.503163365706883e-05,
      "loss": 0.975,
      "step": 624380
    },
    {
      "epoch": 2.18833128305418,
      "grad_norm": 3.15625,
      "learning_rate": 1.503098462840513e-05,
      "loss": 0.798,
      "step": 624390
    },
    {
      "epoch": 2.1883663305610757,
      "grad_norm": 3.015625,
      "learning_rate": 1.5030335599741427e-05,
      "loss": 0.8521,
      "step": 624400
    },
    {
      "epoch": 2.188401378067971,
      "grad_norm": 2.890625,
      "learning_rate": 1.5029686571077725e-05,
      "loss": 0.8606,
      "step": 624410
    },
    {
      "epoch": 2.1884364255748667,
      "grad_norm": 3.109375,
      "learning_rate": 1.5029037542414023e-05,
      "loss": 0.8048,
      "step": 624420
    },
    {
      "epoch": 2.1884714730817625,
      "grad_norm": 2.5625,
      "learning_rate": 1.5028388513750321e-05,
      "loss": 0.7814,
      "step": 624430
    },
    {
      "epoch": 2.188506520588658,
      "grad_norm": 2.484375,
      "learning_rate": 1.502773948508662e-05,
      "loss": 0.7602,
      "step": 624440
    },
    {
      "epoch": 2.1885415680955536,
      "grad_norm": 3.21875,
      "learning_rate": 1.5027090456422917e-05,
      "loss": 0.8042,
      "step": 624450
    },
    {
      "epoch": 2.1885766156024493,
      "grad_norm": 3.109375,
      "learning_rate": 1.5026441427759217e-05,
      "loss": 0.8387,
      "step": 624460
    },
    {
      "epoch": 2.1886116631093446,
      "grad_norm": 2.90625,
      "learning_rate": 1.5025792399095515e-05,
      "loss": 0.8381,
      "step": 624470
    },
    {
      "epoch": 2.1886467106162404,
      "grad_norm": 2.921875,
      "learning_rate": 1.5025143370431813e-05,
      "loss": 0.8199,
      "step": 624480
    },
    {
      "epoch": 2.1886817581231357,
      "grad_norm": 2.5,
      "learning_rate": 1.5024494341768111e-05,
      "loss": 0.7336,
      "step": 624490
    },
    {
      "epoch": 2.1887168056300315,
      "grad_norm": 2.5,
      "learning_rate": 1.5023845313104409e-05,
      "loss": 0.868,
      "step": 624500
    },
    {
      "epoch": 2.188751853136927,
      "grad_norm": 3.296875,
      "learning_rate": 1.5023196284440707e-05,
      "loss": 0.779,
      "step": 624510
    },
    {
      "epoch": 2.1887869006438225,
      "grad_norm": 3.015625,
      "learning_rate": 1.5022547255777005e-05,
      "loss": 0.8472,
      "step": 624520
    },
    {
      "epoch": 2.1888219481507183,
      "grad_norm": 2.984375,
      "learning_rate": 1.5021898227113305e-05,
      "loss": 0.8027,
      "step": 624530
    },
    {
      "epoch": 2.188856995657614,
      "grad_norm": 3.25,
      "learning_rate": 1.5021249198449603e-05,
      "loss": 0.8577,
      "step": 624540
    },
    {
      "epoch": 2.1888920431645094,
      "grad_norm": 3.4375,
      "learning_rate": 1.5020600169785897e-05,
      "loss": 0.8437,
      "step": 624550
    },
    {
      "epoch": 2.188927090671405,
      "grad_norm": 3.03125,
      "learning_rate": 1.5019951141122195e-05,
      "loss": 0.8282,
      "step": 624560
    },
    {
      "epoch": 2.188962138178301,
      "grad_norm": 2.609375,
      "learning_rate": 1.5019302112458495e-05,
      "loss": 0.8357,
      "step": 624570
    },
    {
      "epoch": 2.188997185685196,
      "grad_norm": 3.0,
      "learning_rate": 1.5018653083794793e-05,
      "loss": 0.722,
      "step": 624580
    },
    {
      "epoch": 2.189032233192092,
      "grad_norm": 2.515625,
      "learning_rate": 1.5018004055131091e-05,
      "loss": 0.846,
      "step": 624590
    },
    {
      "epoch": 2.1890672806989873,
      "grad_norm": 2.75,
      "learning_rate": 1.5017355026467389e-05,
      "loss": 0.7235,
      "step": 624600
    },
    {
      "epoch": 2.189102328205883,
      "grad_norm": 3.0625,
      "learning_rate": 1.5016705997803687e-05,
      "loss": 0.804,
      "step": 624610
    },
    {
      "epoch": 2.1891373757127788,
      "grad_norm": 3.515625,
      "learning_rate": 1.5016056969139985e-05,
      "loss": 0.8235,
      "step": 624620
    },
    {
      "epoch": 2.189172423219674,
      "grad_norm": 3.140625,
      "learning_rate": 1.5015407940476283e-05,
      "loss": 0.8478,
      "step": 624630
    },
    {
      "epoch": 2.18920747072657,
      "grad_norm": 3.21875,
      "learning_rate": 1.5014758911812583e-05,
      "loss": 0.7289,
      "step": 624640
    },
    {
      "epoch": 2.1892425182334656,
      "grad_norm": 3.09375,
      "learning_rate": 1.501410988314888e-05,
      "loss": 0.8953,
      "step": 624650
    },
    {
      "epoch": 2.189277565740361,
      "grad_norm": 2.765625,
      "learning_rate": 1.5013460854485179e-05,
      "loss": 0.749,
      "step": 624660
    },
    {
      "epoch": 2.1893126132472567,
      "grad_norm": 2.90625,
      "learning_rate": 1.5012811825821477e-05,
      "loss": 0.8127,
      "step": 624670
    },
    {
      "epoch": 2.1893476607541524,
      "grad_norm": 3.25,
      "learning_rate": 1.5012162797157775e-05,
      "loss": 0.8198,
      "step": 624680
    },
    {
      "epoch": 2.1893827082610477,
      "grad_norm": 2.75,
      "learning_rate": 1.5011513768494073e-05,
      "loss": 0.7408,
      "step": 624690
    },
    {
      "epoch": 2.1894177557679435,
      "grad_norm": 2.796875,
      "learning_rate": 1.501086473983037e-05,
      "loss": 0.7804,
      "step": 624700
    },
    {
      "epoch": 2.189452803274839,
      "grad_norm": 2.765625,
      "learning_rate": 1.501021571116667e-05,
      "loss": 0.742,
      "step": 624710
    },
    {
      "epoch": 2.1894878507817346,
      "grad_norm": 2.65625,
      "learning_rate": 1.5009566682502968e-05,
      "loss": 0.7631,
      "step": 624720
    },
    {
      "epoch": 2.1895228982886303,
      "grad_norm": 3.0,
      "learning_rate": 1.5008917653839266e-05,
      "loss": 0.8551,
      "step": 624730
    },
    {
      "epoch": 2.1895579457955257,
      "grad_norm": 2.921875,
      "learning_rate": 1.5008268625175561e-05,
      "loss": 0.7482,
      "step": 624740
    },
    {
      "epoch": 2.1895929933024214,
      "grad_norm": 3.0625,
      "learning_rate": 1.5007619596511859e-05,
      "loss": 0.8324,
      "step": 624750
    },
    {
      "epoch": 2.189628040809317,
      "grad_norm": 3.1875,
      "learning_rate": 1.5006970567848159e-05,
      "loss": 0.8058,
      "step": 624760
    },
    {
      "epoch": 2.1896630883162125,
      "grad_norm": 2.671875,
      "learning_rate": 1.5006321539184457e-05,
      "loss": 0.7985,
      "step": 624770
    },
    {
      "epoch": 2.1896981358231082,
      "grad_norm": 2.9375,
      "learning_rate": 1.5005672510520755e-05,
      "loss": 0.8238,
      "step": 624780
    },
    {
      "epoch": 2.189733183330004,
      "grad_norm": 2.875,
      "learning_rate": 1.5005023481857053e-05,
      "loss": 0.765,
      "step": 624790
    },
    {
      "epoch": 2.1897682308368993,
      "grad_norm": 3.1875,
      "learning_rate": 1.500437445319335e-05,
      "loss": 0.8064,
      "step": 624800
    },
    {
      "epoch": 2.189803278343795,
      "grad_norm": 2.578125,
      "learning_rate": 1.5003725424529649e-05,
      "loss": 0.8958,
      "step": 624810
    },
    {
      "epoch": 2.1898383258506904,
      "grad_norm": 2.328125,
      "learning_rate": 1.5003076395865947e-05,
      "loss": 0.7816,
      "step": 624820
    },
    {
      "epoch": 2.189873373357586,
      "grad_norm": 2.6875,
      "learning_rate": 1.5002427367202246e-05,
      "loss": 0.7859,
      "step": 624830
    },
    {
      "epoch": 2.189908420864482,
      "grad_norm": 2.71875,
      "learning_rate": 1.5001778338538544e-05,
      "loss": 0.8567,
      "step": 624840
    },
    {
      "epoch": 2.189943468371377,
      "grad_norm": 2.84375,
      "learning_rate": 1.5001129309874842e-05,
      "loss": 0.8825,
      "step": 624850
    },
    {
      "epoch": 2.189978515878273,
      "grad_norm": 3.1875,
      "learning_rate": 1.500048028121114e-05,
      "loss": 0.8372,
      "step": 624860
    },
    {
      "epoch": 2.1900135633851687,
      "grad_norm": 2.890625,
      "learning_rate": 1.4999831252547438e-05,
      "loss": 0.7799,
      "step": 624870
    },
    {
      "epoch": 2.190048610892064,
      "grad_norm": 3.109375,
      "learning_rate": 1.4999182223883736e-05,
      "loss": 0.8308,
      "step": 624880
    },
    {
      "epoch": 2.19008365839896,
      "grad_norm": 3.078125,
      "learning_rate": 1.4998533195220034e-05,
      "loss": 0.8167,
      "step": 624890
    },
    {
      "epoch": 2.1901187059058556,
      "grad_norm": 2.625,
      "learning_rate": 1.4997884166556334e-05,
      "loss": 0.8311,
      "step": 624900
    },
    {
      "epoch": 2.190153753412751,
      "grad_norm": 2.671875,
      "learning_rate": 1.4997235137892632e-05,
      "loss": 0.7719,
      "step": 624910
    },
    {
      "epoch": 2.1901888009196466,
      "grad_norm": 2.75,
      "learning_rate": 1.499658610922893e-05,
      "loss": 0.8166,
      "step": 624920
    },
    {
      "epoch": 2.1902238484265424,
      "grad_norm": 3.0,
      "learning_rate": 1.4995937080565225e-05,
      "loss": 0.8451,
      "step": 624930
    },
    {
      "epoch": 2.1902588959334377,
      "grad_norm": 2.6875,
      "learning_rate": 1.4995288051901524e-05,
      "loss": 0.866,
      "step": 624940
    },
    {
      "epoch": 2.1902939434403335,
      "grad_norm": 3.109375,
      "learning_rate": 1.4994639023237822e-05,
      "loss": 0.8383,
      "step": 624950
    },
    {
      "epoch": 2.1903289909472288,
      "grad_norm": 2.71875,
      "learning_rate": 1.499398999457412e-05,
      "loss": 0.8485,
      "step": 624960
    },
    {
      "epoch": 2.1903640384541245,
      "grad_norm": 3.03125,
      "learning_rate": 1.4993340965910418e-05,
      "loss": 0.8838,
      "step": 624970
    },
    {
      "epoch": 2.1903990859610203,
      "grad_norm": 2.8125,
      "learning_rate": 1.4992691937246716e-05,
      "loss": 0.8643,
      "step": 624980
    },
    {
      "epoch": 2.1904341334679156,
      "grad_norm": 3.453125,
      "learning_rate": 1.4992042908583014e-05,
      "loss": 0.7627,
      "step": 624990
    },
    {
      "epoch": 2.1904691809748114,
      "grad_norm": 3.046875,
      "learning_rate": 1.4991393879919312e-05,
      "loss": 0.8531,
      "step": 625000
    },
    {
      "epoch": 2.1904691809748114,
      "eval_loss": 0.764288604259491,
      "eval_runtime": 552.2897,
      "eval_samples_per_second": 688.834,
      "eval_steps_per_second": 57.403,
      "step": 625000
    },
    {
      "epoch": 2.190504228481707,
      "grad_norm": 2.703125,
      "learning_rate": 1.4990744851255612e-05,
      "loss": 0.7602,
      "step": 625010
    },
    {
      "epoch": 2.1905392759886024,
      "grad_norm": 3.015625,
      "learning_rate": 1.499009582259191e-05,
      "loss": 0.8783,
      "step": 625020
    },
    {
      "epoch": 2.190574323495498,
      "grad_norm": 2.734375,
      "learning_rate": 1.4989446793928208e-05,
      "loss": 0.8485,
      "step": 625030
    },
    {
      "epoch": 2.190609371002394,
      "grad_norm": 2.546875,
      "learning_rate": 1.4988797765264506e-05,
      "loss": 0.8725,
      "step": 625040
    },
    {
      "epoch": 2.1906444185092893,
      "grad_norm": 3.203125,
      "learning_rate": 1.4988148736600804e-05,
      "loss": 0.7244,
      "step": 625050
    },
    {
      "epoch": 2.190679466016185,
      "grad_norm": 3.359375,
      "learning_rate": 1.4987499707937102e-05,
      "loss": 0.8326,
      "step": 625060
    },
    {
      "epoch": 2.1907145135230803,
      "grad_norm": 3.046875,
      "learning_rate": 1.49868506792734e-05,
      "loss": 0.8554,
      "step": 625070
    },
    {
      "epoch": 2.190749561029976,
      "grad_norm": 2.90625,
      "learning_rate": 1.49862016506097e-05,
      "loss": 0.738,
      "step": 625080
    },
    {
      "epoch": 2.190784608536872,
      "grad_norm": 2.9375,
      "learning_rate": 1.4985552621945998e-05,
      "loss": 0.8306,
      "step": 625090
    },
    {
      "epoch": 2.190819656043767,
      "grad_norm": 3.03125,
      "learning_rate": 1.4984903593282296e-05,
      "loss": 0.7828,
      "step": 625100
    },
    {
      "epoch": 2.190854703550663,
      "grad_norm": 2.5,
      "learning_rate": 1.4984254564618594e-05,
      "loss": 0.7668,
      "step": 625110
    },
    {
      "epoch": 2.1908897510575587,
      "grad_norm": 2.546875,
      "learning_rate": 1.498360553595489e-05,
      "loss": 0.7431,
      "step": 625120
    },
    {
      "epoch": 2.190924798564454,
      "grad_norm": 2.703125,
      "learning_rate": 1.4982956507291188e-05,
      "loss": 0.8324,
      "step": 625130
    },
    {
      "epoch": 2.1909598460713497,
      "grad_norm": 3.21875,
      "learning_rate": 1.4982307478627486e-05,
      "loss": 0.8143,
      "step": 625140
    },
    {
      "epoch": 2.1909948935782455,
      "grad_norm": 3.171875,
      "learning_rate": 1.4981658449963784e-05,
      "loss": 0.8786,
      "step": 625150
    },
    {
      "epoch": 2.191029941085141,
      "grad_norm": 3.703125,
      "learning_rate": 1.4981009421300082e-05,
      "loss": 0.8639,
      "step": 625160
    },
    {
      "epoch": 2.1910649885920366,
      "grad_norm": 2.65625,
      "learning_rate": 1.498036039263638e-05,
      "loss": 0.8917,
      "step": 625170
    },
    {
      "epoch": 2.1911000360989323,
      "grad_norm": 3.296875,
      "learning_rate": 1.4979711363972678e-05,
      "loss": 0.8229,
      "step": 625180
    },
    {
      "epoch": 2.1911350836058276,
      "grad_norm": 2.6875,
      "learning_rate": 1.4979062335308978e-05,
      "loss": 0.8503,
      "step": 625190
    },
    {
      "epoch": 2.1911701311127234,
      "grad_norm": 2.703125,
      "learning_rate": 1.4978413306645276e-05,
      "loss": 0.7814,
      "step": 625200
    },
    {
      "epoch": 2.1912051786196187,
      "grad_norm": 2.96875,
      "learning_rate": 1.4977764277981574e-05,
      "loss": 0.8491,
      "step": 625210
    },
    {
      "epoch": 2.1912402261265145,
      "grad_norm": 3.734375,
      "learning_rate": 1.4977115249317872e-05,
      "loss": 0.8737,
      "step": 625220
    },
    {
      "epoch": 2.1912752736334102,
      "grad_norm": 2.421875,
      "learning_rate": 1.497646622065417e-05,
      "loss": 0.7898,
      "step": 625230
    },
    {
      "epoch": 2.1913103211403056,
      "grad_norm": 3.328125,
      "learning_rate": 1.4975817191990468e-05,
      "loss": 0.7766,
      "step": 625240
    },
    {
      "epoch": 2.1913453686472013,
      "grad_norm": 2.671875,
      "learning_rate": 1.4975168163326766e-05,
      "loss": 0.7039,
      "step": 625250
    },
    {
      "epoch": 2.191380416154097,
      "grad_norm": 3.4375,
      "learning_rate": 1.4974519134663066e-05,
      "loss": 0.842,
      "step": 625260
    },
    {
      "epoch": 2.1914154636609924,
      "grad_norm": 2.984375,
      "learning_rate": 1.4973870105999364e-05,
      "loss": 0.8048,
      "step": 625270
    },
    {
      "epoch": 2.191450511167888,
      "grad_norm": 3.0,
      "learning_rate": 1.4973221077335662e-05,
      "loss": 0.8028,
      "step": 625280
    },
    {
      "epoch": 2.191485558674784,
      "grad_norm": 2.921875,
      "learning_rate": 1.497257204867196e-05,
      "loss": 0.8767,
      "step": 625290
    },
    {
      "epoch": 2.191520606181679,
      "grad_norm": 2.828125,
      "learning_rate": 1.4971923020008254e-05,
      "loss": 0.7777,
      "step": 625300
    },
    {
      "epoch": 2.191555653688575,
      "grad_norm": 3.109375,
      "learning_rate": 1.4971273991344554e-05,
      "loss": 0.8559,
      "step": 625310
    },
    {
      "epoch": 2.1915907011954703,
      "grad_norm": 3.234375,
      "learning_rate": 1.4970624962680852e-05,
      "loss": 0.8879,
      "step": 625320
    },
    {
      "epoch": 2.191625748702366,
      "grad_norm": 2.6875,
      "learning_rate": 1.496997593401715e-05,
      "loss": 0.8581,
      "step": 625330
    },
    {
      "epoch": 2.191660796209262,
      "grad_norm": 2.828125,
      "learning_rate": 1.4969326905353448e-05,
      "loss": 0.7743,
      "step": 625340
    },
    {
      "epoch": 2.191695843716157,
      "grad_norm": 3.234375,
      "learning_rate": 1.4968677876689746e-05,
      "loss": 0.8009,
      "step": 625350
    },
    {
      "epoch": 2.191730891223053,
      "grad_norm": 3.125,
      "learning_rate": 1.4968028848026044e-05,
      "loss": 0.7577,
      "step": 625360
    },
    {
      "epoch": 2.1917659387299486,
      "grad_norm": 2.9375,
      "learning_rate": 1.4967379819362342e-05,
      "loss": 0.8401,
      "step": 625370
    },
    {
      "epoch": 2.191800986236844,
      "grad_norm": 2.796875,
      "learning_rate": 1.4966730790698642e-05,
      "loss": 0.8414,
      "step": 625380
    },
    {
      "epoch": 2.1918360337437397,
      "grad_norm": 3.09375,
      "learning_rate": 1.496608176203494e-05,
      "loss": 0.8726,
      "step": 625390
    },
    {
      "epoch": 2.1918710812506355,
      "grad_norm": 2.9375,
      "learning_rate": 1.4965432733371238e-05,
      "loss": 0.7867,
      "step": 625400
    },
    {
      "epoch": 2.1919061287575308,
      "grad_norm": 3.28125,
      "learning_rate": 1.4964783704707536e-05,
      "loss": 0.7924,
      "step": 625410
    },
    {
      "epoch": 2.1919411762644265,
      "grad_norm": 3.078125,
      "learning_rate": 1.4964134676043834e-05,
      "loss": 0.8876,
      "step": 625420
    },
    {
      "epoch": 2.191976223771322,
      "grad_norm": 3.015625,
      "learning_rate": 1.4963485647380132e-05,
      "loss": 0.7833,
      "step": 625430
    },
    {
      "epoch": 2.1920112712782176,
      "grad_norm": 2.96875,
      "learning_rate": 1.496283661871643e-05,
      "loss": 0.7498,
      "step": 625440
    },
    {
      "epoch": 2.1920463187851134,
      "grad_norm": 2.703125,
      "learning_rate": 1.496218759005273e-05,
      "loss": 0.8069,
      "step": 625450
    },
    {
      "epoch": 2.1920813662920087,
      "grad_norm": 3.0625,
      "learning_rate": 1.4961538561389027e-05,
      "loss": 0.8544,
      "step": 625460
    },
    {
      "epoch": 2.1921164137989044,
      "grad_norm": 2.28125,
      "learning_rate": 1.4960889532725325e-05,
      "loss": 0.8015,
      "step": 625470
    },
    {
      "epoch": 2.1921514613058,
      "grad_norm": 2.984375,
      "learning_rate": 1.4960240504061623e-05,
      "loss": 0.9002,
      "step": 625480
    },
    {
      "epoch": 2.1921865088126955,
      "grad_norm": 2.890625,
      "learning_rate": 1.495959147539792e-05,
      "loss": 0.8477,
      "step": 625490
    },
    {
      "epoch": 2.1922215563195913,
      "grad_norm": 3.3125,
      "learning_rate": 1.4958942446734218e-05,
      "loss": 0.798,
      "step": 625500
    },
    {
      "epoch": 2.192256603826487,
      "grad_norm": 3.0,
      "learning_rate": 1.4958293418070516e-05,
      "loss": 0.7249,
      "step": 625510
    },
    {
      "epoch": 2.1922916513333823,
      "grad_norm": 2.796875,
      "learning_rate": 1.4957644389406814e-05,
      "loss": 0.8273,
      "step": 625520
    },
    {
      "epoch": 2.192326698840278,
      "grad_norm": 3.0625,
      "learning_rate": 1.4956995360743112e-05,
      "loss": 0.8362,
      "step": 625530
    },
    {
      "epoch": 2.1923617463471734,
      "grad_norm": 3.203125,
      "learning_rate": 1.495634633207941e-05,
      "loss": 0.8344,
      "step": 625540
    },
    {
      "epoch": 2.192396793854069,
      "grad_norm": 2.453125,
      "learning_rate": 1.4955697303415708e-05,
      "loss": 0.8513,
      "step": 625550
    },
    {
      "epoch": 2.192431841360965,
      "grad_norm": 2.640625,
      "learning_rate": 1.4955048274752007e-05,
      "loss": 0.8459,
      "step": 625560
    },
    {
      "epoch": 2.1924668888678602,
      "grad_norm": 2.734375,
      "learning_rate": 1.4954399246088305e-05,
      "loss": 0.8153,
      "step": 625570
    },
    {
      "epoch": 2.192501936374756,
      "grad_norm": 3.3125,
      "learning_rate": 1.4953750217424603e-05,
      "loss": 0.8456,
      "step": 625580
    },
    {
      "epoch": 2.1925369838816517,
      "grad_norm": 3.171875,
      "learning_rate": 1.4953101188760901e-05,
      "loss": 0.771,
      "step": 625590
    },
    {
      "epoch": 2.192572031388547,
      "grad_norm": 2.953125,
      "learning_rate": 1.49524521600972e-05,
      "loss": 0.7431,
      "step": 625600
    },
    {
      "epoch": 2.192607078895443,
      "grad_norm": 3.15625,
      "learning_rate": 1.4951803131433497e-05,
      "loss": 0.8093,
      "step": 625610
    },
    {
      "epoch": 2.1926421264023386,
      "grad_norm": 3.46875,
      "learning_rate": 1.4951154102769795e-05,
      "loss": 0.7993,
      "step": 625620
    },
    {
      "epoch": 2.192677173909234,
      "grad_norm": 2.421875,
      "learning_rate": 1.4950505074106095e-05,
      "loss": 0.8157,
      "step": 625630
    },
    {
      "epoch": 2.1927122214161296,
      "grad_norm": 3.328125,
      "learning_rate": 1.4949856045442393e-05,
      "loss": 0.8385,
      "step": 625640
    },
    {
      "epoch": 2.192747268923025,
      "grad_norm": 3.203125,
      "learning_rate": 1.4949207016778691e-05,
      "loss": 0.8281,
      "step": 625650
    },
    {
      "epoch": 2.1927823164299207,
      "grad_norm": 2.375,
      "learning_rate": 1.4948557988114989e-05,
      "loss": 0.7536,
      "step": 625660
    },
    {
      "epoch": 2.1928173639368165,
      "grad_norm": 3.140625,
      "learning_rate": 1.4947908959451287e-05,
      "loss": 0.8434,
      "step": 625670
    },
    {
      "epoch": 2.192852411443712,
      "grad_norm": 2.921875,
      "learning_rate": 1.4947259930787583e-05,
      "loss": 0.8348,
      "step": 625680
    },
    {
      "epoch": 2.1928874589506075,
      "grad_norm": 3.234375,
      "learning_rate": 1.4946610902123881e-05,
      "loss": 0.8398,
      "step": 625690
    },
    {
      "epoch": 2.1929225064575033,
      "grad_norm": 2.953125,
      "learning_rate": 1.494596187346018e-05,
      "loss": 0.839,
      "step": 625700
    },
    {
      "epoch": 2.1929575539643986,
      "grad_norm": 2.8125,
      "learning_rate": 1.4945312844796477e-05,
      "loss": 0.7952,
      "step": 625710
    },
    {
      "epoch": 2.1929926014712944,
      "grad_norm": 2.515625,
      "learning_rate": 1.4944663816132775e-05,
      "loss": 0.8699,
      "step": 625720
    },
    {
      "epoch": 2.19302764897819,
      "grad_norm": 2.625,
      "learning_rate": 1.4944014787469073e-05,
      "loss": 0.7904,
      "step": 625730
    },
    {
      "epoch": 2.1930626964850854,
      "grad_norm": 2.8125,
      "learning_rate": 1.4943365758805373e-05,
      "loss": 0.8904,
      "step": 625740
    },
    {
      "epoch": 2.193097743991981,
      "grad_norm": 2.96875,
      "learning_rate": 1.4942716730141671e-05,
      "loss": 0.885,
      "step": 625750
    },
    {
      "epoch": 2.1931327914988765,
      "grad_norm": 3.53125,
      "learning_rate": 1.4942067701477969e-05,
      "loss": 0.8271,
      "step": 625760
    },
    {
      "epoch": 2.1931678390057723,
      "grad_norm": 2.953125,
      "learning_rate": 1.4941418672814267e-05,
      "loss": 0.776,
      "step": 625770
    },
    {
      "epoch": 2.193202886512668,
      "grad_norm": 3.21875,
      "learning_rate": 1.4940769644150565e-05,
      "loss": 0.8357,
      "step": 625780
    },
    {
      "epoch": 2.1932379340195634,
      "grad_norm": 3.078125,
      "learning_rate": 1.4940120615486863e-05,
      "loss": 0.8691,
      "step": 625790
    },
    {
      "epoch": 2.193272981526459,
      "grad_norm": 3.125,
      "learning_rate": 1.4939471586823161e-05,
      "loss": 0.8434,
      "step": 625800
    },
    {
      "epoch": 2.193308029033355,
      "grad_norm": 3.078125,
      "learning_rate": 1.493882255815946e-05,
      "loss": 0.8403,
      "step": 625810
    },
    {
      "epoch": 2.19334307654025,
      "grad_norm": 2.828125,
      "learning_rate": 1.4938173529495759e-05,
      "loss": 0.8596,
      "step": 625820
    },
    {
      "epoch": 2.193378124047146,
      "grad_norm": 3.1875,
      "learning_rate": 1.4937524500832057e-05,
      "loss": 0.7845,
      "step": 625830
    },
    {
      "epoch": 2.1934131715540417,
      "grad_norm": 2.734375,
      "learning_rate": 1.4936875472168355e-05,
      "loss": 0.9441,
      "step": 625840
    },
    {
      "epoch": 2.193448219060937,
      "grad_norm": 3.015625,
      "learning_rate": 1.4936226443504653e-05,
      "loss": 0.878,
      "step": 625850
    },
    {
      "epoch": 2.1934832665678328,
      "grad_norm": 2.5625,
      "learning_rate": 1.493557741484095e-05,
      "loss": 0.79,
      "step": 625860
    },
    {
      "epoch": 2.193518314074728,
      "grad_norm": 2.9375,
      "learning_rate": 1.4934928386177247e-05,
      "loss": 0.8896,
      "step": 625870
    },
    {
      "epoch": 2.193553361581624,
      "grad_norm": 3.328125,
      "learning_rate": 1.4934279357513545e-05,
      "loss": 0.8396,
      "step": 625880
    },
    {
      "epoch": 2.1935884090885196,
      "grad_norm": 3.203125,
      "learning_rate": 1.4933630328849843e-05,
      "loss": 0.8265,
      "step": 625890
    },
    {
      "epoch": 2.193623456595415,
      "grad_norm": 3.03125,
      "learning_rate": 1.4932981300186141e-05,
      "loss": 0.8407,
      "step": 625900
    },
    {
      "epoch": 2.1936585041023107,
      "grad_norm": 3.015625,
      "learning_rate": 1.4932332271522439e-05,
      "loss": 0.8875,
      "step": 625910
    },
    {
      "epoch": 2.1936935516092064,
      "grad_norm": 2.84375,
      "learning_rate": 1.4931683242858737e-05,
      "loss": 0.7694,
      "step": 625920
    },
    {
      "epoch": 2.1937285991161017,
      "grad_norm": 3.0625,
      "learning_rate": 1.4931034214195037e-05,
      "loss": 0.7482,
      "step": 625930
    },
    {
      "epoch": 2.1937636466229975,
      "grad_norm": 2.734375,
      "learning_rate": 1.4930385185531335e-05,
      "loss": 0.8155,
      "step": 625940
    },
    {
      "epoch": 2.1937986941298933,
      "grad_norm": 2.90625,
      "learning_rate": 1.4929736156867633e-05,
      "loss": 0.8807,
      "step": 625950
    },
    {
      "epoch": 2.1938337416367886,
      "grad_norm": 3.15625,
      "learning_rate": 1.492908712820393e-05,
      "loss": 0.8775,
      "step": 625960
    },
    {
      "epoch": 2.1938687891436843,
      "grad_norm": 3.109375,
      "learning_rate": 1.4928438099540229e-05,
      "loss": 0.7998,
      "step": 625970
    },
    {
      "epoch": 2.1939038366505796,
      "grad_norm": 2.78125,
      "learning_rate": 1.4927789070876527e-05,
      "loss": 0.8452,
      "step": 625980
    },
    {
      "epoch": 2.1939388841574754,
      "grad_norm": 3.0,
      "learning_rate": 1.4927140042212826e-05,
      "loss": 0.9028,
      "step": 625990
    },
    {
      "epoch": 2.193973931664371,
      "grad_norm": 2.890625,
      "learning_rate": 1.4926491013549124e-05,
      "loss": 0.7297,
      "step": 626000
    },
    {
      "epoch": 2.1940089791712665,
      "grad_norm": 2.796875,
      "learning_rate": 1.4925841984885422e-05,
      "loss": 0.8665,
      "step": 626010
    },
    {
      "epoch": 2.1940440266781622,
      "grad_norm": 3.15625,
      "learning_rate": 1.492519295622172e-05,
      "loss": 0.8211,
      "step": 626020
    },
    {
      "epoch": 2.194079074185058,
      "grad_norm": 3.15625,
      "learning_rate": 1.4924543927558018e-05,
      "loss": 0.8365,
      "step": 626030
    },
    {
      "epoch": 2.1941141216919533,
      "grad_norm": 3.0,
      "learning_rate": 1.4923894898894316e-05,
      "loss": 0.8646,
      "step": 626040
    },
    {
      "epoch": 2.194149169198849,
      "grad_norm": 2.546875,
      "learning_rate": 1.4923245870230614e-05,
      "loss": 0.8752,
      "step": 626050
    },
    {
      "epoch": 2.194184216705745,
      "grad_norm": 2.890625,
      "learning_rate": 1.492259684156691e-05,
      "loss": 0.8007,
      "step": 626060
    },
    {
      "epoch": 2.19421926421264,
      "grad_norm": 2.90625,
      "learning_rate": 1.4921947812903209e-05,
      "loss": 0.7903,
      "step": 626070
    },
    {
      "epoch": 2.194254311719536,
      "grad_norm": 2.234375,
      "learning_rate": 1.4921298784239507e-05,
      "loss": 0.8101,
      "step": 626080
    },
    {
      "epoch": 2.194289359226431,
      "grad_norm": 3.0,
      "learning_rate": 1.4920649755575805e-05,
      "loss": 0.8784,
      "step": 626090
    },
    {
      "epoch": 2.194324406733327,
      "grad_norm": 2.9375,
      "learning_rate": 1.4920000726912103e-05,
      "loss": 0.9223,
      "step": 626100
    },
    {
      "epoch": 2.1943594542402227,
      "grad_norm": 3.0,
      "learning_rate": 1.4919351698248402e-05,
      "loss": 0.8466,
      "step": 626110
    },
    {
      "epoch": 2.194394501747118,
      "grad_norm": 3.46875,
      "learning_rate": 1.49187026695847e-05,
      "loss": 0.8811,
      "step": 626120
    },
    {
      "epoch": 2.194429549254014,
      "grad_norm": 3.171875,
      "learning_rate": 1.4918053640920998e-05,
      "loss": 0.7684,
      "step": 626130
    },
    {
      "epoch": 2.1944645967609095,
      "grad_norm": 3.03125,
      "learning_rate": 1.4917404612257296e-05,
      "loss": 0.8089,
      "step": 626140
    },
    {
      "epoch": 2.194499644267805,
      "grad_norm": 2.90625,
      "learning_rate": 1.4916755583593594e-05,
      "loss": 0.7972,
      "step": 626150
    },
    {
      "epoch": 2.1945346917747006,
      "grad_norm": 2.703125,
      "learning_rate": 1.4916106554929892e-05,
      "loss": 0.7881,
      "step": 626160
    },
    {
      "epoch": 2.1945697392815964,
      "grad_norm": 3.28125,
      "learning_rate": 1.491545752626619e-05,
      "loss": 0.7775,
      "step": 626170
    },
    {
      "epoch": 2.1946047867884917,
      "grad_norm": 2.8125,
      "learning_rate": 1.491480849760249e-05,
      "loss": 0.8444,
      "step": 626180
    },
    {
      "epoch": 2.1946398342953874,
      "grad_norm": 2.578125,
      "learning_rate": 1.4914159468938788e-05,
      "loss": 0.7841,
      "step": 626190
    },
    {
      "epoch": 2.1946748818022828,
      "grad_norm": 3.671875,
      "learning_rate": 1.4913510440275086e-05,
      "loss": 0.7473,
      "step": 626200
    },
    {
      "epoch": 2.1947099293091785,
      "grad_norm": 2.984375,
      "learning_rate": 1.4912861411611384e-05,
      "loss": 0.8002,
      "step": 626210
    },
    {
      "epoch": 2.1947449768160743,
      "grad_norm": 3.03125,
      "learning_rate": 1.4912212382947682e-05,
      "loss": 0.8193,
      "step": 626220
    },
    {
      "epoch": 2.1947800243229696,
      "grad_norm": 2.71875,
      "learning_rate": 1.491156335428398e-05,
      "loss": 0.7565,
      "step": 626230
    },
    {
      "epoch": 2.1948150718298653,
      "grad_norm": 3.03125,
      "learning_rate": 1.4910914325620278e-05,
      "loss": 0.8055,
      "step": 626240
    },
    {
      "epoch": 2.194850119336761,
      "grad_norm": 2.59375,
      "learning_rate": 1.4910265296956574e-05,
      "loss": 0.8276,
      "step": 626250
    },
    {
      "epoch": 2.1948851668436564,
      "grad_norm": 2.59375,
      "learning_rate": 1.4909616268292872e-05,
      "loss": 0.7879,
      "step": 626260
    },
    {
      "epoch": 2.194920214350552,
      "grad_norm": 2.875,
      "learning_rate": 1.490896723962917e-05,
      "loss": 0.8713,
      "step": 626270
    },
    {
      "epoch": 2.194955261857448,
      "grad_norm": 3.25,
      "learning_rate": 1.4908318210965468e-05,
      "loss": 0.821,
      "step": 626280
    },
    {
      "epoch": 2.1949903093643433,
      "grad_norm": 2.5625,
      "learning_rate": 1.4907669182301768e-05,
      "loss": 0.7936,
      "step": 626290
    },
    {
      "epoch": 2.195025356871239,
      "grad_norm": 3.15625,
      "learning_rate": 1.4907020153638066e-05,
      "loss": 0.804,
      "step": 626300
    },
    {
      "epoch": 2.1950604043781348,
      "grad_norm": 2.90625,
      "learning_rate": 1.4906371124974364e-05,
      "loss": 0.832,
      "step": 626310
    },
    {
      "epoch": 2.19509545188503,
      "grad_norm": 3.125,
      "learning_rate": 1.4905722096310662e-05,
      "loss": 0.8171,
      "step": 626320
    },
    {
      "epoch": 2.195130499391926,
      "grad_norm": 2.890625,
      "learning_rate": 1.490507306764696e-05,
      "loss": 0.8596,
      "step": 626330
    },
    {
      "epoch": 2.195165546898821,
      "grad_norm": 2.46875,
      "learning_rate": 1.4904424038983258e-05,
      "loss": 0.7259,
      "step": 626340
    },
    {
      "epoch": 2.195200594405717,
      "grad_norm": 3.125,
      "learning_rate": 1.4903775010319556e-05,
      "loss": 0.8624,
      "step": 626350
    },
    {
      "epoch": 2.1952356419126127,
      "grad_norm": 3.140625,
      "learning_rate": 1.4903125981655856e-05,
      "loss": 0.8536,
      "step": 626360
    },
    {
      "epoch": 2.195270689419508,
      "grad_norm": 3.203125,
      "learning_rate": 1.4902476952992154e-05,
      "loss": 0.7385,
      "step": 626370
    },
    {
      "epoch": 2.1953057369264037,
      "grad_norm": 2.65625,
      "learning_rate": 1.4901827924328452e-05,
      "loss": 0.809,
      "step": 626380
    },
    {
      "epoch": 2.1953407844332995,
      "grad_norm": 2.984375,
      "learning_rate": 1.490117889566475e-05,
      "loss": 0.84,
      "step": 626390
    },
    {
      "epoch": 2.195375831940195,
      "grad_norm": 2.984375,
      "learning_rate": 1.4900529867001048e-05,
      "loss": 0.8084,
      "step": 626400
    },
    {
      "epoch": 2.1954108794470906,
      "grad_norm": 3.203125,
      "learning_rate": 1.4899880838337346e-05,
      "loss": 0.7823,
      "step": 626410
    },
    {
      "epoch": 2.1954459269539863,
      "grad_norm": 2.59375,
      "learning_rate": 1.4899231809673644e-05,
      "loss": 0.8751,
      "step": 626420
    },
    {
      "epoch": 2.1954809744608816,
      "grad_norm": 2.78125,
      "learning_rate": 1.489858278100994e-05,
      "loss": 0.7804,
      "step": 626430
    },
    {
      "epoch": 2.1955160219677774,
      "grad_norm": 3.0625,
      "learning_rate": 1.4897933752346238e-05,
      "loss": 0.7977,
      "step": 626440
    },
    {
      "epoch": 2.1955510694746727,
      "grad_norm": 3.296875,
      "learning_rate": 1.4897284723682536e-05,
      "loss": 0.8224,
      "step": 626450
    },
    {
      "epoch": 2.1955861169815685,
      "grad_norm": 2.921875,
      "learning_rate": 1.4896635695018834e-05,
      "loss": 0.8575,
      "step": 626460
    },
    {
      "epoch": 2.1956211644884642,
      "grad_norm": 3.328125,
      "learning_rate": 1.4895986666355132e-05,
      "loss": 0.888,
      "step": 626470
    },
    {
      "epoch": 2.1956562119953595,
      "grad_norm": 2.859375,
      "learning_rate": 1.4895337637691432e-05,
      "loss": 0.8741,
      "step": 626480
    },
    {
      "epoch": 2.1956912595022553,
      "grad_norm": 2.59375,
      "learning_rate": 1.489468860902773e-05,
      "loss": 0.8376,
      "step": 626490
    },
    {
      "epoch": 2.195726307009151,
      "grad_norm": 3.125,
      "learning_rate": 1.4894039580364028e-05,
      "loss": 0.8353,
      "step": 626500
    },
    {
      "epoch": 2.1957613545160464,
      "grad_norm": 2.890625,
      "learning_rate": 1.4893390551700326e-05,
      "loss": 0.8593,
      "step": 626510
    },
    {
      "epoch": 2.195796402022942,
      "grad_norm": 2.9375,
      "learning_rate": 1.4892741523036624e-05,
      "loss": 0.8019,
      "step": 626520
    },
    {
      "epoch": 2.195831449529838,
      "grad_norm": 3.140625,
      "learning_rate": 1.4892092494372922e-05,
      "loss": 0.8342,
      "step": 626530
    },
    {
      "epoch": 2.195866497036733,
      "grad_norm": 3.0625,
      "learning_rate": 1.4891443465709221e-05,
      "loss": 0.8395,
      "step": 626540
    },
    {
      "epoch": 2.195901544543629,
      "grad_norm": 3.078125,
      "learning_rate": 1.489079443704552e-05,
      "loss": 0.8391,
      "step": 626550
    },
    {
      "epoch": 2.1959365920505247,
      "grad_norm": 2.640625,
      "learning_rate": 1.4890145408381817e-05,
      "loss": 0.7411,
      "step": 626560
    },
    {
      "epoch": 2.19597163955742,
      "grad_norm": 3.21875,
      "learning_rate": 1.4889496379718115e-05,
      "loss": 0.7947,
      "step": 626570
    },
    {
      "epoch": 2.196006687064316,
      "grad_norm": 2.984375,
      "learning_rate": 1.4888847351054413e-05,
      "loss": 0.7326,
      "step": 626580
    },
    {
      "epoch": 2.196041734571211,
      "grad_norm": 2.78125,
      "learning_rate": 1.4888198322390711e-05,
      "loss": 0.9035,
      "step": 626590
    },
    {
      "epoch": 2.196076782078107,
      "grad_norm": 3.328125,
      "learning_rate": 1.488754929372701e-05,
      "loss": 0.8223,
      "step": 626600
    },
    {
      "epoch": 2.1961118295850026,
      "grad_norm": 2.59375,
      "learning_rate": 1.4886900265063309e-05,
      "loss": 0.7424,
      "step": 626610
    },
    {
      "epoch": 2.196146877091898,
      "grad_norm": 2.90625,
      "learning_rate": 1.4886251236399604e-05,
      "loss": 0.8303,
      "step": 626620
    },
    {
      "epoch": 2.1961819245987937,
      "grad_norm": 2.9375,
      "learning_rate": 1.4885602207735902e-05,
      "loss": 0.8555,
      "step": 626630
    },
    {
      "epoch": 2.1962169721056894,
      "grad_norm": 3.0,
      "learning_rate": 1.48849531790722e-05,
      "loss": 0.8543,
      "step": 626640
    },
    {
      "epoch": 2.1962520196125848,
      "grad_norm": 2.546875,
      "learning_rate": 1.4884304150408498e-05,
      "loss": 0.8084,
      "step": 626650
    },
    {
      "epoch": 2.1962870671194805,
      "grad_norm": 3.28125,
      "learning_rate": 1.4883655121744797e-05,
      "loss": 0.8065,
      "step": 626660
    },
    {
      "epoch": 2.1963221146263763,
      "grad_norm": 3.171875,
      "learning_rate": 1.4883006093081095e-05,
      "loss": 0.8936,
      "step": 626670
    },
    {
      "epoch": 2.1963571621332716,
      "grad_norm": 3.125,
      "learning_rate": 1.4882357064417393e-05,
      "loss": 0.7867,
      "step": 626680
    },
    {
      "epoch": 2.1963922096401673,
      "grad_norm": 2.9375,
      "learning_rate": 1.4881708035753691e-05,
      "loss": 0.8639,
      "step": 626690
    },
    {
      "epoch": 2.1964272571470627,
      "grad_norm": 2.65625,
      "learning_rate": 1.488105900708999e-05,
      "loss": 0.7474,
      "step": 626700
    },
    {
      "epoch": 2.1964623046539584,
      "grad_norm": 2.453125,
      "learning_rate": 1.4880409978426287e-05,
      "loss": 0.718,
      "step": 626710
    },
    {
      "epoch": 2.196497352160854,
      "grad_norm": 2.859375,
      "learning_rate": 1.4879760949762585e-05,
      "loss": 0.8046,
      "step": 626720
    },
    {
      "epoch": 2.1965323996677495,
      "grad_norm": 3.3125,
      "learning_rate": 1.4879111921098885e-05,
      "loss": 0.8524,
      "step": 626730
    },
    {
      "epoch": 2.1965674471746452,
      "grad_norm": 2.953125,
      "learning_rate": 1.4878462892435183e-05,
      "loss": 0.7662,
      "step": 626740
    },
    {
      "epoch": 2.196602494681541,
      "grad_norm": 2.734375,
      "learning_rate": 1.4877813863771481e-05,
      "loss": 0.78,
      "step": 626750
    },
    {
      "epoch": 2.1966375421884363,
      "grad_norm": 2.8125,
      "learning_rate": 1.4877164835107779e-05,
      "loss": 0.8228,
      "step": 626760
    },
    {
      "epoch": 2.196672589695332,
      "grad_norm": 3.015625,
      "learning_rate": 1.4876515806444077e-05,
      "loss": 0.8141,
      "step": 626770
    },
    {
      "epoch": 2.196707637202228,
      "grad_norm": 3.203125,
      "learning_rate": 1.4875866777780375e-05,
      "loss": 0.8173,
      "step": 626780
    },
    {
      "epoch": 2.196742684709123,
      "grad_norm": 3.0625,
      "learning_rate": 1.4875217749116673e-05,
      "loss": 0.7942,
      "step": 626790
    },
    {
      "epoch": 2.196777732216019,
      "grad_norm": 2.359375,
      "learning_rate": 1.4874568720452973e-05,
      "loss": 0.7677,
      "step": 626800
    },
    {
      "epoch": 2.196812779722914,
      "grad_norm": 2.828125,
      "learning_rate": 1.4873919691789267e-05,
      "loss": 0.8073,
      "step": 626810
    },
    {
      "epoch": 2.19684782722981,
      "grad_norm": 2.671875,
      "learning_rate": 1.4873270663125565e-05,
      "loss": 0.7835,
      "step": 626820
    },
    {
      "epoch": 2.1968828747367057,
      "grad_norm": 2.96875,
      "learning_rate": 1.4872621634461863e-05,
      "loss": 0.7551,
      "step": 626830
    },
    {
      "epoch": 2.196917922243601,
      "grad_norm": 2.5,
      "learning_rate": 1.4871972605798163e-05,
      "loss": 0.7353,
      "step": 626840
    },
    {
      "epoch": 2.196952969750497,
      "grad_norm": 2.890625,
      "learning_rate": 1.4871323577134461e-05,
      "loss": 0.7974,
      "step": 626850
    },
    {
      "epoch": 2.1969880172573926,
      "grad_norm": 2.4375,
      "learning_rate": 1.4870674548470759e-05,
      "loss": 0.7735,
      "step": 626860
    },
    {
      "epoch": 2.197023064764288,
      "grad_norm": 2.75,
      "learning_rate": 1.4870025519807057e-05,
      "loss": 0.7368,
      "step": 626870
    },
    {
      "epoch": 2.1970581122711836,
      "grad_norm": 3.34375,
      "learning_rate": 1.4869376491143355e-05,
      "loss": 0.8403,
      "step": 626880
    },
    {
      "epoch": 2.1970931597780794,
      "grad_norm": 3.046875,
      "learning_rate": 1.4868727462479653e-05,
      "loss": 0.872,
      "step": 626890
    },
    {
      "epoch": 2.1971282072849747,
      "grad_norm": 2.765625,
      "learning_rate": 1.4868078433815951e-05,
      "loss": 0.8003,
      "step": 626900
    },
    {
      "epoch": 2.1971632547918705,
      "grad_norm": 2.96875,
      "learning_rate": 1.486742940515225e-05,
      "loss": 0.862,
      "step": 626910
    },
    {
      "epoch": 2.197198302298766,
      "grad_norm": 3.28125,
      "learning_rate": 1.4866780376488549e-05,
      "loss": 0.7958,
      "step": 626920
    },
    {
      "epoch": 2.1972333498056615,
      "grad_norm": 3.21875,
      "learning_rate": 1.4866131347824847e-05,
      "loss": 0.8204,
      "step": 626930
    },
    {
      "epoch": 2.1972683973125573,
      "grad_norm": 2.921875,
      "learning_rate": 1.4865482319161145e-05,
      "loss": 0.8137,
      "step": 626940
    },
    {
      "epoch": 2.1973034448194526,
      "grad_norm": 3.125,
      "learning_rate": 1.4864833290497443e-05,
      "loss": 0.7926,
      "step": 626950
    },
    {
      "epoch": 2.1973384923263484,
      "grad_norm": 3.046875,
      "learning_rate": 1.486418426183374e-05,
      "loss": 0.9104,
      "step": 626960
    },
    {
      "epoch": 2.197373539833244,
      "grad_norm": 2.65625,
      "learning_rate": 1.4863535233170039e-05,
      "loss": 0.8591,
      "step": 626970
    },
    {
      "epoch": 2.1974085873401394,
      "grad_norm": 2.953125,
      "learning_rate": 1.4862886204506338e-05,
      "loss": 0.7851,
      "step": 626980
    },
    {
      "epoch": 2.197443634847035,
      "grad_norm": 2.9375,
      "learning_rate": 1.4862237175842636e-05,
      "loss": 0.8196,
      "step": 626990
    },
    {
      "epoch": 2.197478682353931,
      "grad_norm": 2.96875,
      "learning_rate": 1.4861588147178931e-05,
      "loss": 0.729,
      "step": 627000
    },
    {
      "epoch": 2.1975137298608263,
      "grad_norm": 3.1875,
      "learning_rate": 1.4860939118515229e-05,
      "loss": 0.8029,
      "step": 627010
    },
    {
      "epoch": 2.197548777367722,
      "grad_norm": 3.046875,
      "learning_rate": 1.4860290089851529e-05,
      "loss": 0.8065,
      "step": 627020
    },
    {
      "epoch": 2.1975838248746173,
      "grad_norm": 3.28125,
      "learning_rate": 1.4859641061187827e-05,
      "loss": 0.7708,
      "step": 627030
    },
    {
      "epoch": 2.197618872381513,
      "grad_norm": 2.90625,
      "learning_rate": 1.4858992032524125e-05,
      "loss": 0.8206,
      "step": 627040
    },
    {
      "epoch": 2.197653919888409,
      "grad_norm": 2.640625,
      "learning_rate": 1.4858343003860423e-05,
      "loss": 0.7883,
      "step": 627050
    },
    {
      "epoch": 2.197688967395304,
      "grad_norm": 3.0625,
      "learning_rate": 1.485769397519672e-05,
      "loss": 0.8228,
      "step": 627060
    },
    {
      "epoch": 2.1977240149022,
      "grad_norm": 3.171875,
      "learning_rate": 1.4857044946533019e-05,
      "loss": 0.8595,
      "step": 627070
    },
    {
      "epoch": 2.1977590624090957,
      "grad_norm": 2.765625,
      "learning_rate": 1.4856395917869317e-05,
      "loss": 0.7186,
      "step": 627080
    },
    {
      "epoch": 2.197794109915991,
      "grad_norm": 3.328125,
      "learning_rate": 1.4855746889205616e-05,
      "loss": 0.8482,
      "step": 627090
    },
    {
      "epoch": 2.1978291574228868,
      "grad_norm": 3.125,
      "learning_rate": 1.4855097860541914e-05,
      "loss": 0.7516,
      "step": 627100
    },
    {
      "epoch": 2.1978642049297825,
      "grad_norm": 3.046875,
      "learning_rate": 1.4854448831878212e-05,
      "loss": 0.8586,
      "step": 627110
    },
    {
      "epoch": 2.197899252436678,
      "grad_norm": 3.203125,
      "learning_rate": 1.485379980321451e-05,
      "loss": 0.8773,
      "step": 627120
    },
    {
      "epoch": 2.1979342999435736,
      "grad_norm": 2.390625,
      "learning_rate": 1.4853150774550808e-05,
      "loss": 0.7592,
      "step": 627130
    },
    {
      "epoch": 2.197969347450469,
      "grad_norm": 3.015625,
      "learning_rate": 1.4852501745887106e-05,
      "loss": 0.7876,
      "step": 627140
    },
    {
      "epoch": 2.1980043949573647,
      "grad_norm": 2.78125,
      "learning_rate": 1.4851852717223404e-05,
      "loss": 0.828,
      "step": 627150
    },
    {
      "epoch": 2.1980394424642604,
      "grad_norm": 2.8125,
      "learning_rate": 1.4851203688559704e-05,
      "loss": 0.8163,
      "step": 627160
    },
    {
      "epoch": 2.1980744899711557,
      "grad_norm": 3.234375,
      "learning_rate": 1.4850554659896002e-05,
      "loss": 0.7916,
      "step": 627170
    },
    {
      "epoch": 2.1981095374780515,
      "grad_norm": 2.5,
      "learning_rate": 1.48499056312323e-05,
      "loss": 0.8302,
      "step": 627180
    },
    {
      "epoch": 2.1981445849849472,
      "grad_norm": 2.875,
      "learning_rate": 1.4849256602568595e-05,
      "loss": 0.7825,
      "step": 627190
    },
    {
      "epoch": 2.1981796324918426,
      "grad_norm": 2.765625,
      "learning_rate": 1.4848607573904893e-05,
      "loss": 0.8162,
      "step": 627200
    },
    {
      "epoch": 2.1982146799987383,
      "grad_norm": 2.796875,
      "learning_rate": 1.4847958545241192e-05,
      "loss": 0.7855,
      "step": 627210
    },
    {
      "epoch": 2.198249727505634,
      "grad_norm": 3.078125,
      "learning_rate": 1.484730951657749e-05,
      "loss": 0.8786,
      "step": 627220
    },
    {
      "epoch": 2.1982847750125294,
      "grad_norm": 2.890625,
      "learning_rate": 1.4846660487913788e-05,
      "loss": 0.8029,
      "step": 627230
    },
    {
      "epoch": 2.198319822519425,
      "grad_norm": 3.21875,
      "learning_rate": 1.4846011459250086e-05,
      "loss": 0.8523,
      "step": 627240
    },
    {
      "epoch": 2.1983548700263205,
      "grad_norm": 3.046875,
      "learning_rate": 1.4845362430586384e-05,
      "loss": 0.8321,
      "step": 627250
    },
    {
      "epoch": 2.198389917533216,
      "grad_norm": 2.84375,
      "learning_rate": 1.4844713401922682e-05,
      "loss": 0.7775,
      "step": 627260
    },
    {
      "epoch": 2.198424965040112,
      "grad_norm": 2.9375,
      "learning_rate": 1.484406437325898e-05,
      "loss": 0.842,
      "step": 627270
    },
    {
      "epoch": 2.1984600125470073,
      "grad_norm": 2.421875,
      "learning_rate": 1.484341534459528e-05,
      "loss": 0.8674,
      "step": 627280
    },
    {
      "epoch": 2.198495060053903,
      "grad_norm": 2.984375,
      "learning_rate": 1.4842766315931578e-05,
      "loss": 0.8056,
      "step": 627290
    },
    {
      "epoch": 2.198530107560799,
      "grad_norm": 3.234375,
      "learning_rate": 1.4842117287267876e-05,
      "loss": 0.8075,
      "step": 627300
    },
    {
      "epoch": 2.198565155067694,
      "grad_norm": 3.671875,
      "learning_rate": 1.4841468258604174e-05,
      "loss": 0.8471,
      "step": 627310
    },
    {
      "epoch": 2.19860020257459,
      "grad_norm": 3.234375,
      "learning_rate": 1.4840819229940472e-05,
      "loss": 0.7954,
      "step": 627320
    },
    {
      "epoch": 2.1986352500814856,
      "grad_norm": 2.53125,
      "learning_rate": 1.484017020127677e-05,
      "loss": 0.8151,
      "step": 627330
    },
    {
      "epoch": 2.198670297588381,
      "grad_norm": 2.875,
      "learning_rate": 1.4839521172613068e-05,
      "loss": 0.8347,
      "step": 627340
    },
    {
      "epoch": 2.1987053450952767,
      "grad_norm": 2.625,
      "learning_rate": 1.4838872143949368e-05,
      "loss": 0.8014,
      "step": 627350
    },
    {
      "epoch": 2.198740392602172,
      "grad_norm": 2.859375,
      "learning_rate": 1.4838223115285666e-05,
      "loss": 0.7703,
      "step": 627360
    },
    {
      "epoch": 2.198775440109068,
      "grad_norm": 3.15625,
      "learning_rate": 1.483757408662196e-05,
      "loss": 0.8349,
      "step": 627370
    },
    {
      "epoch": 2.1988104876159635,
      "grad_norm": 2.890625,
      "learning_rate": 1.4836925057958258e-05,
      "loss": 0.8265,
      "step": 627380
    },
    {
      "epoch": 2.198845535122859,
      "grad_norm": 3.421875,
      "learning_rate": 1.4836276029294558e-05,
      "loss": 0.8892,
      "step": 627390
    },
    {
      "epoch": 2.1988805826297546,
      "grad_norm": 2.6875,
      "learning_rate": 1.4835627000630856e-05,
      "loss": 0.7704,
      "step": 627400
    },
    {
      "epoch": 2.1989156301366504,
      "grad_norm": 2.8125,
      "learning_rate": 1.4834977971967154e-05,
      "loss": 0.8433,
      "step": 627410
    },
    {
      "epoch": 2.1989506776435457,
      "grad_norm": 2.921875,
      "learning_rate": 1.4834328943303452e-05,
      "loss": 0.8787,
      "step": 627420
    },
    {
      "epoch": 2.1989857251504414,
      "grad_norm": 3.203125,
      "learning_rate": 1.483367991463975e-05,
      "loss": 0.77,
      "step": 627430
    },
    {
      "epoch": 2.199020772657337,
      "grad_norm": 2.8125,
      "learning_rate": 1.4833030885976048e-05,
      "loss": 0.7683,
      "step": 627440
    },
    {
      "epoch": 2.1990558201642325,
      "grad_norm": 3.46875,
      "learning_rate": 1.4832381857312346e-05,
      "loss": 0.8338,
      "step": 627450
    },
    {
      "epoch": 2.1990908676711283,
      "grad_norm": 2.953125,
      "learning_rate": 1.4831732828648646e-05,
      "loss": 0.7956,
      "step": 627460
    },
    {
      "epoch": 2.1991259151780236,
      "grad_norm": 3.1875,
      "learning_rate": 1.4831083799984944e-05,
      "loss": 0.7653,
      "step": 627470
    },
    {
      "epoch": 2.1991609626849193,
      "grad_norm": 2.671875,
      "learning_rate": 1.4830434771321242e-05,
      "loss": 0.8165,
      "step": 627480
    },
    {
      "epoch": 2.199196010191815,
      "grad_norm": 3.03125,
      "learning_rate": 1.482978574265754e-05,
      "loss": 0.8553,
      "step": 627490
    },
    {
      "epoch": 2.1992310576987104,
      "grad_norm": 2.734375,
      "learning_rate": 1.4829136713993838e-05,
      "loss": 0.7641,
      "step": 627500
    },
    {
      "epoch": 2.199266105205606,
      "grad_norm": 3.03125,
      "learning_rate": 1.4828487685330136e-05,
      "loss": 0.8371,
      "step": 627510
    },
    {
      "epoch": 2.199301152712502,
      "grad_norm": 3.0625,
      "learning_rate": 1.4827838656666434e-05,
      "loss": 0.7938,
      "step": 627520
    },
    {
      "epoch": 2.1993362002193972,
      "grad_norm": 2.890625,
      "learning_rate": 1.4827189628002734e-05,
      "loss": 0.8094,
      "step": 627530
    },
    {
      "epoch": 2.199371247726293,
      "grad_norm": 3.0625,
      "learning_rate": 1.4826540599339032e-05,
      "loss": 0.8899,
      "step": 627540
    },
    {
      "epoch": 2.1994062952331888,
      "grad_norm": 2.734375,
      "learning_rate": 1.482589157067533e-05,
      "loss": 0.8089,
      "step": 627550
    },
    {
      "epoch": 2.199441342740084,
      "grad_norm": 2.765625,
      "learning_rate": 1.4825242542011624e-05,
      "loss": 0.7958,
      "step": 627560
    },
    {
      "epoch": 2.19947639024698,
      "grad_norm": 3.140625,
      "learning_rate": 1.4824593513347924e-05,
      "loss": 0.8173,
      "step": 627570
    },
    {
      "epoch": 2.199511437753875,
      "grad_norm": 3.15625,
      "learning_rate": 1.4823944484684222e-05,
      "loss": 0.7953,
      "step": 627580
    },
    {
      "epoch": 2.199546485260771,
      "grad_norm": 3.109375,
      "learning_rate": 1.482329545602052e-05,
      "loss": 0.7995,
      "step": 627590
    },
    {
      "epoch": 2.1995815327676667,
      "grad_norm": 3.21875,
      "learning_rate": 1.4822646427356818e-05,
      "loss": 0.8652,
      "step": 627600
    },
    {
      "epoch": 2.199616580274562,
      "grad_norm": 2.875,
      "learning_rate": 1.4821997398693116e-05,
      "loss": 0.7193,
      "step": 627610
    },
    {
      "epoch": 2.1996516277814577,
      "grad_norm": 2.703125,
      "learning_rate": 1.4821348370029414e-05,
      "loss": 0.8358,
      "step": 627620
    },
    {
      "epoch": 2.1996866752883535,
      "grad_norm": 2.765625,
      "learning_rate": 1.4820699341365712e-05,
      "loss": 0.7274,
      "step": 627630
    },
    {
      "epoch": 2.199721722795249,
      "grad_norm": 3.25,
      "learning_rate": 1.4820050312702012e-05,
      "loss": 0.8079,
      "step": 627640
    },
    {
      "epoch": 2.1997567703021446,
      "grad_norm": 3.25,
      "learning_rate": 1.481940128403831e-05,
      "loss": 0.801,
      "step": 627650
    },
    {
      "epoch": 2.1997918178090403,
      "grad_norm": 2.390625,
      "learning_rate": 1.4818752255374608e-05,
      "loss": 0.7493,
      "step": 627660
    },
    {
      "epoch": 2.1998268653159356,
      "grad_norm": 2.921875,
      "learning_rate": 1.4818103226710906e-05,
      "loss": 0.8633,
      "step": 627670
    },
    {
      "epoch": 2.1998619128228314,
      "grad_norm": 2.984375,
      "learning_rate": 1.4817454198047204e-05,
      "loss": 0.8534,
      "step": 627680
    },
    {
      "epoch": 2.199896960329727,
      "grad_norm": 2.734375,
      "learning_rate": 1.4816805169383502e-05,
      "loss": 0.8707,
      "step": 627690
    },
    {
      "epoch": 2.1999320078366225,
      "grad_norm": 2.78125,
      "learning_rate": 1.48161561407198e-05,
      "loss": 0.8451,
      "step": 627700
    },
    {
      "epoch": 2.199967055343518,
      "grad_norm": 2.78125,
      "learning_rate": 1.48155071120561e-05,
      "loss": 0.7793,
      "step": 627710
    },
    {
      "epoch": 2.2000021028504135,
      "grad_norm": 3.0625,
      "learning_rate": 1.4814858083392397e-05,
      "loss": 0.8793,
      "step": 627720
    },
    {
      "epoch": 2.2000371503573093,
      "grad_norm": 2.703125,
      "learning_rate": 1.4814209054728695e-05,
      "loss": 0.8677,
      "step": 627730
    },
    {
      "epoch": 2.200072197864205,
      "grad_norm": 3.140625,
      "learning_rate": 1.4813560026064993e-05,
      "loss": 0.7954,
      "step": 627740
    },
    {
      "epoch": 2.2001072453711004,
      "grad_norm": 3.1875,
      "learning_rate": 1.4812910997401288e-05,
      "loss": 0.856,
      "step": 627750
    },
    {
      "epoch": 2.200142292877996,
      "grad_norm": 2.765625,
      "learning_rate": 1.4812261968737588e-05,
      "loss": 0.8373,
      "step": 627760
    },
    {
      "epoch": 2.200177340384892,
      "grad_norm": 3.0625,
      "learning_rate": 1.4811612940073886e-05,
      "loss": 0.813,
      "step": 627770
    },
    {
      "epoch": 2.200212387891787,
      "grad_norm": 2.65625,
      "learning_rate": 1.4810963911410184e-05,
      "loss": 0.8099,
      "step": 627780
    },
    {
      "epoch": 2.200247435398683,
      "grad_norm": 3.25,
      "learning_rate": 1.4810314882746482e-05,
      "loss": 0.7976,
      "step": 627790
    },
    {
      "epoch": 2.2002824829055787,
      "grad_norm": 2.578125,
      "learning_rate": 1.480966585408278e-05,
      "loss": 0.6811,
      "step": 627800
    },
    {
      "epoch": 2.200317530412474,
      "grad_norm": 3.078125,
      "learning_rate": 1.4809016825419078e-05,
      "loss": 0.8346,
      "step": 627810
    },
    {
      "epoch": 2.20035257791937,
      "grad_norm": 2.75,
      "learning_rate": 1.4808367796755376e-05,
      "loss": 0.7486,
      "step": 627820
    },
    {
      "epoch": 2.2003876254262655,
      "grad_norm": 3.21875,
      "learning_rate": 1.4807718768091675e-05,
      "loss": 0.7943,
      "step": 627830
    },
    {
      "epoch": 2.200422672933161,
      "grad_norm": 3.03125,
      "learning_rate": 1.4807069739427973e-05,
      "loss": 0.7958,
      "step": 627840
    },
    {
      "epoch": 2.2004577204400566,
      "grad_norm": 2.875,
      "learning_rate": 1.4806420710764271e-05,
      "loss": 0.7437,
      "step": 627850
    },
    {
      "epoch": 2.200492767946952,
      "grad_norm": 2.9375,
      "learning_rate": 1.480577168210057e-05,
      "loss": 0.8683,
      "step": 627860
    },
    {
      "epoch": 2.2005278154538477,
      "grad_norm": 2.671875,
      "learning_rate": 1.4805122653436867e-05,
      "loss": 0.7607,
      "step": 627870
    },
    {
      "epoch": 2.2005628629607434,
      "grad_norm": 2.515625,
      "learning_rate": 1.4804473624773165e-05,
      "loss": 0.8096,
      "step": 627880
    },
    {
      "epoch": 2.2005979104676388,
      "grad_norm": 2.65625,
      "learning_rate": 1.4803824596109463e-05,
      "loss": 0.7269,
      "step": 627890
    },
    {
      "epoch": 2.2006329579745345,
      "grad_norm": 2.859375,
      "learning_rate": 1.4803175567445763e-05,
      "loss": 0.8338,
      "step": 627900
    },
    {
      "epoch": 2.2006680054814303,
      "grad_norm": 2.8125,
      "learning_rate": 1.4802526538782061e-05,
      "loss": 0.8602,
      "step": 627910
    },
    {
      "epoch": 2.2007030529883256,
      "grad_norm": 2.65625,
      "learning_rate": 1.4801877510118359e-05,
      "loss": 0.8672,
      "step": 627920
    },
    {
      "epoch": 2.2007381004952213,
      "grad_norm": 2.734375,
      "learning_rate": 1.4801228481454657e-05,
      "loss": 0.8362,
      "step": 627930
    },
    {
      "epoch": 2.200773148002117,
      "grad_norm": 2.5625,
      "learning_rate": 1.4800579452790953e-05,
      "loss": 0.8469,
      "step": 627940
    },
    {
      "epoch": 2.2008081955090124,
      "grad_norm": 2.1875,
      "learning_rate": 1.4799930424127251e-05,
      "loss": 0.8136,
      "step": 627950
    },
    {
      "epoch": 2.200843243015908,
      "grad_norm": 2.640625,
      "learning_rate": 1.479928139546355e-05,
      "loss": 0.906,
      "step": 627960
    },
    {
      "epoch": 2.2008782905228035,
      "grad_norm": 3.09375,
      "learning_rate": 1.4798632366799847e-05,
      "loss": 0.8275,
      "step": 627970
    },
    {
      "epoch": 2.2009133380296992,
      "grad_norm": 2.546875,
      "learning_rate": 1.4797983338136145e-05,
      "loss": 0.78,
      "step": 627980
    },
    {
      "epoch": 2.200948385536595,
      "grad_norm": 2.78125,
      "learning_rate": 1.4797334309472443e-05,
      "loss": 0.785,
      "step": 627990
    },
    {
      "epoch": 2.2009834330434903,
      "grad_norm": 2.421875,
      "learning_rate": 1.4796685280808741e-05,
      "loss": 0.8428,
      "step": 628000
    },
    {
      "epoch": 2.201018480550386,
      "grad_norm": 2.421875,
      "learning_rate": 1.4796036252145041e-05,
      "loss": 0.8464,
      "step": 628010
    },
    {
      "epoch": 2.201053528057282,
      "grad_norm": 2.609375,
      "learning_rate": 1.4795387223481339e-05,
      "loss": 0.8537,
      "step": 628020
    },
    {
      "epoch": 2.201088575564177,
      "grad_norm": 3.015625,
      "learning_rate": 1.4794738194817637e-05,
      "loss": 0.7807,
      "step": 628030
    },
    {
      "epoch": 2.201123623071073,
      "grad_norm": 2.953125,
      "learning_rate": 1.4794089166153935e-05,
      "loss": 0.8039,
      "step": 628040
    },
    {
      "epoch": 2.2011586705779687,
      "grad_norm": 3.140625,
      "learning_rate": 1.4793440137490233e-05,
      "loss": 0.8034,
      "step": 628050
    },
    {
      "epoch": 2.201193718084864,
      "grad_norm": 2.859375,
      "learning_rate": 1.4792791108826531e-05,
      "loss": 0.7844,
      "step": 628060
    },
    {
      "epoch": 2.2012287655917597,
      "grad_norm": 3.078125,
      "learning_rate": 1.4792142080162829e-05,
      "loss": 0.7721,
      "step": 628070
    },
    {
      "epoch": 2.201263813098655,
      "grad_norm": 2.796875,
      "learning_rate": 1.4791493051499129e-05,
      "loss": 0.7572,
      "step": 628080
    },
    {
      "epoch": 2.201298860605551,
      "grad_norm": 2.703125,
      "learning_rate": 1.4790844022835427e-05,
      "loss": 0.7696,
      "step": 628090
    },
    {
      "epoch": 2.2013339081124466,
      "grad_norm": 3.453125,
      "learning_rate": 1.4790194994171725e-05,
      "loss": 0.742,
      "step": 628100
    },
    {
      "epoch": 2.201368955619342,
      "grad_norm": 2.59375,
      "learning_rate": 1.4789545965508023e-05,
      "loss": 0.7772,
      "step": 628110
    },
    {
      "epoch": 2.2014040031262376,
      "grad_norm": 2.984375,
      "learning_rate": 1.478889693684432e-05,
      "loss": 0.7542,
      "step": 628120
    },
    {
      "epoch": 2.2014390506331334,
      "grad_norm": 3.3125,
      "learning_rate": 1.4788247908180617e-05,
      "loss": 0.8287,
      "step": 628130
    },
    {
      "epoch": 2.2014740981400287,
      "grad_norm": 3.0625,
      "learning_rate": 1.4787598879516915e-05,
      "loss": 0.87,
      "step": 628140
    },
    {
      "epoch": 2.2015091456469245,
      "grad_norm": 3.3125,
      "learning_rate": 1.4786949850853213e-05,
      "loss": 0.8641,
      "step": 628150
    },
    {
      "epoch": 2.20154419315382,
      "grad_norm": 3.046875,
      "learning_rate": 1.4786300822189511e-05,
      "loss": 0.8288,
      "step": 628160
    },
    {
      "epoch": 2.2015792406607155,
      "grad_norm": 2.90625,
      "learning_rate": 1.4785651793525809e-05,
      "loss": 0.7817,
      "step": 628170
    },
    {
      "epoch": 2.2016142881676113,
      "grad_norm": 2.78125,
      "learning_rate": 1.4785002764862107e-05,
      "loss": 0.7864,
      "step": 628180
    },
    {
      "epoch": 2.2016493356745066,
      "grad_norm": 3.109375,
      "learning_rate": 1.4784353736198407e-05,
      "loss": 0.783,
      "step": 628190
    },
    {
      "epoch": 2.2016843831814024,
      "grad_norm": 2.890625,
      "learning_rate": 1.4783704707534705e-05,
      "loss": 0.8301,
      "step": 628200
    },
    {
      "epoch": 2.201719430688298,
      "grad_norm": 3.4375,
      "learning_rate": 1.4783055678871003e-05,
      "loss": 0.7783,
      "step": 628210
    },
    {
      "epoch": 2.2017544781951934,
      "grad_norm": 2.796875,
      "learning_rate": 1.47824066502073e-05,
      "loss": 0.8055,
      "step": 628220
    },
    {
      "epoch": 2.201789525702089,
      "grad_norm": 2.859375,
      "learning_rate": 1.4781757621543599e-05,
      "loss": 0.7966,
      "step": 628230
    },
    {
      "epoch": 2.201824573208985,
      "grad_norm": 3.03125,
      "learning_rate": 1.4781108592879897e-05,
      "loss": 0.9085,
      "step": 628240
    },
    {
      "epoch": 2.2018596207158803,
      "grad_norm": 2.703125,
      "learning_rate": 1.4780459564216195e-05,
      "loss": 0.7599,
      "step": 628250
    },
    {
      "epoch": 2.201894668222776,
      "grad_norm": 2.984375,
      "learning_rate": 1.4779810535552494e-05,
      "loss": 0.8396,
      "step": 628260
    },
    {
      "epoch": 2.2019297157296718,
      "grad_norm": 2.390625,
      "learning_rate": 1.4779161506888792e-05,
      "loss": 0.7664,
      "step": 628270
    },
    {
      "epoch": 2.201964763236567,
      "grad_norm": 2.515625,
      "learning_rate": 1.477851247822509e-05,
      "loss": 0.7886,
      "step": 628280
    },
    {
      "epoch": 2.201999810743463,
      "grad_norm": 3.328125,
      "learning_rate": 1.4777863449561388e-05,
      "loss": 0.8006,
      "step": 628290
    },
    {
      "epoch": 2.202034858250358,
      "grad_norm": 3.015625,
      "learning_rate": 1.4777214420897686e-05,
      "loss": 0.7425,
      "step": 628300
    },
    {
      "epoch": 2.202069905757254,
      "grad_norm": 2.78125,
      "learning_rate": 1.4776565392233983e-05,
      "loss": 0.7937,
      "step": 628310
    },
    {
      "epoch": 2.2021049532641497,
      "grad_norm": 3.578125,
      "learning_rate": 1.477591636357028e-05,
      "loss": 0.869,
      "step": 628320
    },
    {
      "epoch": 2.202140000771045,
      "grad_norm": 3.203125,
      "learning_rate": 1.4775267334906579e-05,
      "loss": 0.8273,
      "step": 628330
    },
    {
      "epoch": 2.2021750482779407,
      "grad_norm": 3.3125,
      "learning_rate": 1.4774618306242877e-05,
      "loss": 0.8211,
      "step": 628340
    },
    {
      "epoch": 2.2022100957848365,
      "grad_norm": 3.046875,
      "learning_rate": 1.4773969277579175e-05,
      "loss": 0.919,
      "step": 628350
    },
    {
      "epoch": 2.202245143291732,
      "grad_norm": 2.984375,
      "learning_rate": 1.4773320248915473e-05,
      "loss": 0.7947,
      "step": 628360
    },
    {
      "epoch": 2.2022801907986276,
      "grad_norm": 2.6875,
      "learning_rate": 1.477267122025177e-05,
      "loss": 0.7304,
      "step": 628370
    },
    {
      "epoch": 2.2023152383055233,
      "grad_norm": 2.734375,
      "learning_rate": 1.477202219158807e-05,
      "loss": 0.8083,
      "step": 628380
    },
    {
      "epoch": 2.2023502858124187,
      "grad_norm": 2.671875,
      "learning_rate": 1.4771373162924368e-05,
      "loss": 0.8069,
      "step": 628390
    },
    {
      "epoch": 2.2023853333193144,
      "grad_norm": 2.5625,
      "learning_rate": 1.4770724134260666e-05,
      "loss": 0.7482,
      "step": 628400
    },
    {
      "epoch": 2.2024203808262097,
      "grad_norm": 2.609375,
      "learning_rate": 1.4770075105596964e-05,
      "loss": 0.7645,
      "step": 628410
    },
    {
      "epoch": 2.2024554283331055,
      "grad_norm": 3.109375,
      "learning_rate": 1.4769426076933262e-05,
      "loss": 0.8157,
      "step": 628420
    },
    {
      "epoch": 2.2024904758400012,
      "grad_norm": 2.734375,
      "learning_rate": 1.476877704826956e-05,
      "loss": 0.8266,
      "step": 628430
    },
    {
      "epoch": 2.2025255233468966,
      "grad_norm": 2.5625,
      "learning_rate": 1.4768128019605858e-05,
      "loss": 0.8398,
      "step": 628440
    },
    {
      "epoch": 2.2025605708537923,
      "grad_norm": 2.984375,
      "learning_rate": 1.4767478990942158e-05,
      "loss": 0.778,
      "step": 628450
    },
    {
      "epoch": 2.202595618360688,
      "grad_norm": 3.21875,
      "learning_rate": 1.4766829962278456e-05,
      "loss": 0.7899,
      "step": 628460
    },
    {
      "epoch": 2.2026306658675834,
      "grad_norm": 2.96875,
      "learning_rate": 1.4766180933614754e-05,
      "loss": 0.867,
      "step": 628470
    },
    {
      "epoch": 2.202665713374479,
      "grad_norm": 2.890625,
      "learning_rate": 1.4765531904951052e-05,
      "loss": 0.8291,
      "step": 628480
    },
    {
      "epoch": 2.202700760881375,
      "grad_norm": 2.875,
      "learning_rate": 1.476488287628735e-05,
      "loss": 0.836,
      "step": 628490
    },
    {
      "epoch": 2.20273580838827,
      "grad_norm": 2.453125,
      "learning_rate": 1.4764233847623646e-05,
      "loss": 0.8495,
      "step": 628500
    },
    {
      "epoch": 2.202770855895166,
      "grad_norm": 2.875,
      "learning_rate": 1.4763584818959944e-05,
      "loss": 0.7624,
      "step": 628510
    },
    {
      "epoch": 2.2028059034020613,
      "grad_norm": 2.71875,
      "learning_rate": 1.4762935790296242e-05,
      "loss": 0.8135,
      "step": 628520
    },
    {
      "epoch": 2.202840950908957,
      "grad_norm": 2.515625,
      "learning_rate": 1.476228676163254e-05,
      "loss": 0.8168,
      "step": 628530
    },
    {
      "epoch": 2.202875998415853,
      "grad_norm": 3.203125,
      "learning_rate": 1.4761637732968838e-05,
      "loss": 0.8752,
      "step": 628540
    },
    {
      "epoch": 2.202911045922748,
      "grad_norm": 3.015625,
      "learning_rate": 1.4760988704305136e-05,
      "loss": 0.8313,
      "step": 628550
    },
    {
      "epoch": 2.202946093429644,
      "grad_norm": 3.375,
      "learning_rate": 1.4760339675641436e-05,
      "loss": 0.8057,
      "step": 628560
    },
    {
      "epoch": 2.2029811409365396,
      "grad_norm": 2.90625,
      "learning_rate": 1.4759690646977734e-05,
      "loss": 0.822,
      "step": 628570
    },
    {
      "epoch": 2.203016188443435,
      "grad_norm": 2.921875,
      "learning_rate": 1.4759041618314032e-05,
      "loss": 0.911,
      "step": 628580
    },
    {
      "epoch": 2.2030512359503307,
      "grad_norm": 2.8125,
      "learning_rate": 1.475839258965033e-05,
      "loss": 0.797,
      "step": 628590
    },
    {
      "epoch": 2.2030862834572265,
      "grad_norm": 2.4375,
      "learning_rate": 1.4757743560986628e-05,
      "loss": 0.7726,
      "step": 628600
    },
    {
      "epoch": 2.2031213309641218,
      "grad_norm": 2.90625,
      "learning_rate": 1.4757094532322926e-05,
      "loss": 0.806,
      "step": 628610
    },
    {
      "epoch": 2.2031563784710175,
      "grad_norm": 2.859375,
      "learning_rate": 1.4756445503659224e-05,
      "loss": 0.7913,
      "step": 628620
    },
    {
      "epoch": 2.203191425977913,
      "grad_norm": 2.796875,
      "learning_rate": 1.4755796474995524e-05,
      "loss": 0.8978,
      "step": 628630
    },
    {
      "epoch": 2.2032264734848086,
      "grad_norm": 2.96875,
      "learning_rate": 1.4755147446331822e-05,
      "loss": 0.8166,
      "step": 628640
    },
    {
      "epoch": 2.2032615209917044,
      "grad_norm": 2.78125,
      "learning_rate": 1.475449841766812e-05,
      "loss": 0.8357,
      "step": 628650
    },
    {
      "epoch": 2.2032965684985997,
      "grad_norm": 3.46875,
      "learning_rate": 1.4753849389004418e-05,
      "loss": 0.8132,
      "step": 628660
    },
    {
      "epoch": 2.2033316160054954,
      "grad_norm": 3.21875,
      "learning_rate": 1.4753200360340716e-05,
      "loss": 0.8163,
      "step": 628670
    },
    {
      "epoch": 2.203366663512391,
      "grad_norm": 2.84375,
      "learning_rate": 1.4752551331677014e-05,
      "loss": 0.8956,
      "step": 628680
    },
    {
      "epoch": 2.2034017110192865,
      "grad_norm": 3.4375,
      "learning_rate": 1.475190230301331e-05,
      "loss": 0.8128,
      "step": 628690
    },
    {
      "epoch": 2.2034367585261823,
      "grad_norm": 2.734375,
      "learning_rate": 1.4751253274349608e-05,
      "loss": 0.8488,
      "step": 628700
    },
    {
      "epoch": 2.203471806033078,
      "grad_norm": 2.734375,
      "learning_rate": 1.4750604245685906e-05,
      "loss": 0.7947,
      "step": 628710
    },
    {
      "epoch": 2.2035068535399733,
      "grad_norm": 2.8125,
      "learning_rate": 1.4749955217022204e-05,
      "loss": 0.7033,
      "step": 628720
    },
    {
      "epoch": 2.203541901046869,
      "grad_norm": 2.90625,
      "learning_rate": 1.4749306188358502e-05,
      "loss": 0.7366,
      "step": 628730
    },
    {
      "epoch": 2.2035769485537644,
      "grad_norm": 2.625,
      "learning_rate": 1.4748657159694802e-05,
      "loss": 0.8356,
      "step": 628740
    },
    {
      "epoch": 2.20361199606066,
      "grad_norm": 2.890625,
      "learning_rate": 1.47480081310311e-05,
      "loss": 0.8021,
      "step": 628750
    },
    {
      "epoch": 2.203647043567556,
      "grad_norm": 2.796875,
      "learning_rate": 1.4747359102367398e-05,
      "loss": 0.8265,
      "step": 628760
    },
    {
      "epoch": 2.2036820910744512,
      "grad_norm": 2.640625,
      "learning_rate": 1.4746710073703696e-05,
      "loss": 0.8162,
      "step": 628770
    },
    {
      "epoch": 2.203717138581347,
      "grad_norm": 2.984375,
      "learning_rate": 1.4746061045039994e-05,
      "loss": 0.8117,
      "step": 628780
    },
    {
      "epoch": 2.2037521860882427,
      "grad_norm": 3.140625,
      "learning_rate": 1.4745412016376292e-05,
      "loss": 0.7565,
      "step": 628790
    },
    {
      "epoch": 2.203787233595138,
      "grad_norm": 2.78125,
      "learning_rate": 1.474476298771259e-05,
      "loss": 0.7875,
      "step": 628800
    },
    {
      "epoch": 2.203822281102034,
      "grad_norm": 2.828125,
      "learning_rate": 1.474411395904889e-05,
      "loss": 0.8058,
      "step": 628810
    },
    {
      "epoch": 2.2038573286089296,
      "grad_norm": 3.40625,
      "learning_rate": 1.4743464930385187e-05,
      "loss": 0.8108,
      "step": 628820
    },
    {
      "epoch": 2.203892376115825,
      "grad_norm": 2.875,
      "learning_rate": 1.4742815901721485e-05,
      "loss": 0.742,
      "step": 628830
    },
    {
      "epoch": 2.2039274236227206,
      "grad_norm": 2.640625,
      "learning_rate": 1.4742166873057783e-05,
      "loss": 0.8358,
      "step": 628840
    },
    {
      "epoch": 2.203962471129616,
      "grad_norm": 3.390625,
      "learning_rate": 1.4741517844394081e-05,
      "loss": 0.9514,
      "step": 628850
    },
    {
      "epoch": 2.2039975186365117,
      "grad_norm": 2.9375,
      "learning_rate": 1.474086881573038e-05,
      "loss": 0.8618,
      "step": 628860
    },
    {
      "epoch": 2.2040325661434075,
      "grad_norm": 3.171875,
      "learning_rate": 1.4740219787066677e-05,
      "loss": 0.7792,
      "step": 628870
    },
    {
      "epoch": 2.204067613650303,
      "grad_norm": 3.390625,
      "learning_rate": 1.4739570758402974e-05,
      "loss": 0.907,
      "step": 628880
    },
    {
      "epoch": 2.2041026611571986,
      "grad_norm": 2.5625,
      "learning_rate": 1.4738921729739272e-05,
      "loss": 0.7756,
      "step": 628890
    },
    {
      "epoch": 2.2041377086640943,
      "grad_norm": 2.78125,
      "learning_rate": 1.473827270107557e-05,
      "loss": 0.8126,
      "step": 628900
    },
    {
      "epoch": 2.2041727561709896,
      "grad_norm": 2.6875,
      "learning_rate": 1.4737623672411868e-05,
      "loss": 0.8006,
      "step": 628910
    },
    {
      "epoch": 2.2042078036778854,
      "grad_norm": 3.015625,
      "learning_rate": 1.4736974643748166e-05,
      "loss": 0.8359,
      "step": 628920
    },
    {
      "epoch": 2.204242851184781,
      "grad_norm": 3.140625,
      "learning_rate": 1.4736325615084465e-05,
      "loss": 0.8474,
      "step": 628930
    },
    {
      "epoch": 2.2042778986916765,
      "grad_norm": 2.59375,
      "learning_rate": 1.4735676586420763e-05,
      "loss": 0.8178,
      "step": 628940
    },
    {
      "epoch": 2.204312946198572,
      "grad_norm": 2.75,
      "learning_rate": 1.4735027557757061e-05,
      "loss": 0.8128,
      "step": 628950
    },
    {
      "epoch": 2.204347993705468,
      "grad_norm": 2.84375,
      "learning_rate": 1.473437852909336e-05,
      "loss": 0.7287,
      "step": 628960
    },
    {
      "epoch": 2.2043830412123633,
      "grad_norm": 2.8125,
      "learning_rate": 1.4733729500429657e-05,
      "loss": 0.8444,
      "step": 628970
    },
    {
      "epoch": 2.204418088719259,
      "grad_norm": 2.890625,
      "learning_rate": 1.4733080471765955e-05,
      "loss": 0.8932,
      "step": 628980
    },
    {
      "epoch": 2.2044531362261544,
      "grad_norm": 2.640625,
      "learning_rate": 1.4732431443102253e-05,
      "loss": 0.8826,
      "step": 628990
    },
    {
      "epoch": 2.20448818373305,
      "grad_norm": 2.828125,
      "learning_rate": 1.4731782414438553e-05,
      "loss": 0.8693,
      "step": 629000
    },
    {
      "epoch": 2.204523231239946,
      "grad_norm": 2.78125,
      "learning_rate": 1.4731133385774851e-05,
      "loss": 0.8286,
      "step": 629010
    },
    {
      "epoch": 2.204558278746841,
      "grad_norm": 2.546875,
      "learning_rate": 1.4730484357111149e-05,
      "loss": 0.7622,
      "step": 629020
    },
    {
      "epoch": 2.204593326253737,
      "grad_norm": 3.1875,
      "learning_rate": 1.4729835328447447e-05,
      "loss": 0.7972,
      "step": 629030
    },
    {
      "epoch": 2.2046283737606327,
      "grad_norm": 2.828125,
      "learning_rate": 1.4729186299783745e-05,
      "loss": 0.7488,
      "step": 629040
    },
    {
      "epoch": 2.204663421267528,
      "grad_norm": 3.015625,
      "learning_rate": 1.4728537271120043e-05,
      "loss": 0.8401,
      "step": 629050
    },
    {
      "epoch": 2.2046984687744238,
      "grad_norm": 2.953125,
      "learning_rate": 1.4727888242456341e-05,
      "loss": 0.8237,
      "step": 629060
    },
    {
      "epoch": 2.2047335162813195,
      "grad_norm": 2.765625,
      "learning_rate": 1.4727239213792637e-05,
      "loss": 0.7503,
      "step": 629070
    },
    {
      "epoch": 2.204768563788215,
      "grad_norm": 2.6875,
      "learning_rate": 1.4726590185128935e-05,
      "loss": 0.837,
      "step": 629080
    },
    {
      "epoch": 2.2048036112951106,
      "grad_norm": 3.109375,
      "learning_rate": 1.4725941156465233e-05,
      "loss": 0.8694,
      "step": 629090
    },
    {
      "epoch": 2.204838658802006,
      "grad_norm": 2.84375,
      "learning_rate": 1.4725292127801531e-05,
      "loss": 0.7635,
      "step": 629100
    },
    {
      "epoch": 2.2048737063089017,
      "grad_norm": 2.6875,
      "learning_rate": 1.4724643099137831e-05,
      "loss": 0.8036,
      "step": 629110
    },
    {
      "epoch": 2.2049087538157974,
      "grad_norm": 3.0,
      "learning_rate": 1.4723994070474129e-05,
      "loss": 0.8053,
      "step": 629120
    },
    {
      "epoch": 2.2049438013226927,
      "grad_norm": 2.828125,
      "learning_rate": 1.4723345041810427e-05,
      "loss": 0.7688,
      "step": 629130
    },
    {
      "epoch": 2.2049788488295885,
      "grad_norm": 2.953125,
      "learning_rate": 1.4722696013146725e-05,
      "loss": 0.8211,
      "step": 629140
    },
    {
      "epoch": 2.2050138963364843,
      "grad_norm": 3.0625,
      "learning_rate": 1.4722046984483023e-05,
      "loss": 0.7987,
      "step": 629150
    },
    {
      "epoch": 2.2050489438433796,
      "grad_norm": 2.84375,
      "learning_rate": 1.4721397955819321e-05,
      "loss": 0.7631,
      "step": 629160
    },
    {
      "epoch": 2.2050839913502753,
      "grad_norm": 2.59375,
      "learning_rate": 1.4720748927155619e-05,
      "loss": 0.8398,
      "step": 629170
    },
    {
      "epoch": 2.205119038857171,
      "grad_norm": 2.84375,
      "learning_rate": 1.4720099898491919e-05,
      "loss": 0.8642,
      "step": 629180
    },
    {
      "epoch": 2.2051540863640664,
      "grad_norm": 3.0625,
      "learning_rate": 1.4719450869828217e-05,
      "loss": 0.7547,
      "step": 629190
    },
    {
      "epoch": 2.205189133870962,
      "grad_norm": 3.015625,
      "learning_rate": 1.4718801841164515e-05,
      "loss": 0.8255,
      "step": 629200
    },
    {
      "epoch": 2.205224181377858,
      "grad_norm": 2.875,
      "learning_rate": 1.4718152812500813e-05,
      "loss": 0.789,
      "step": 629210
    },
    {
      "epoch": 2.2052592288847532,
      "grad_norm": 2.828125,
      "learning_rate": 1.471750378383711e-05,
      "loss": 0.7838,
      "step": 629220
    },
    {
      "epoch": 2.205294276391649,
      "grad_norm": 2.875,
      "learning_rate": 1.4716854755173409e-05,
      "loss": 0.7908,
      "step": 629230
    },
    {
      "epoch": 2.2053293238985443,
      "grad_norm": 2.765625,
      "learning_rate": 1.4716205726509707e-05,
      "loss": 0.8105,
      "step": 629240
    },
    {
      "epoch": 2.20536437140544,
      "grad_norm": 3.34375,
      "learning_rate": 1.4715556697846003e-05,
      "loss": 0.8682,
      "step": 629250
    },
    {
      "epoch": 2.205399418912336,
      "grad_norm": 2.9375,
      "learning_rate": 1.4714907669182301e-05,
      "loss": 0.8286,
      "step": 629260
    },
    {
      "epoch": 2.205434466419231,
      "grad_norm": 2.671875,
      "learning_rate": 1.4714258640518599e-05,
      "loss": 0.7648,
      "step": 629270
    },
    {
      "epoch": 2.205469513926127,
      "grad_norm": 2.984375,
      "learning_rate": 1.4713609611854897e-05,
      "loss": 0.7676,
      "step": 629280
    },
    {
      "epoch": 2.2055045614330226,
      "grad_norm": 3.03125,
      "learning_rate": 1.4712960583191197e-05,
      "loss": 0.8228,
      "step": 629290
    },
    {
      "epoch": 2.205539608939918,
      "grad_norm": 2.984375,
      "learning_rate": 1.4712311554527495e-05,
      "loss": 0.8492,
      "step": 629300
    },
    {
      "epoch": 2.2055746564468137,
      "grad_norm": 2.9375,
      "learning_rate": 1.4711662525863793e-05,
      "loss": 0.8481,
      "step": 629310
    },
    {
      "epoch": 2.2056097039537095,
      "grad_norm": 3.140625,
      "learning_rate": 1.471101349720009e-05,
      "loss": 0.8358,
      "step": 629320
    },
    {
      "epoch": 2.205644751460605,
      "grad_norm": 3.375,
      "learning_rate": 1.4710364468536389e-05,
      "loss": 0.8163,
      "step": 629330
    },
    {
      "epoch": 2.2056797989675005,
      "grad_norm": 3.09375,
      "learning_rate": 1.4709715439872687e-05,
      "loss": 0.7403,
      "step": 629340
    },
    {
      "epoch": 2.205714846474396,
      "grad_norm": 2.75,
      "learning_rate": 1.4709066411208985e-05,
      "loss": 0.835,
      "step": 629350
    },
    {
      "epoch": 2.2057498939812916,
      "grad_norm": 3.3125,
      "learning_rate": 1.4708417382545284e-05,
      "loss": 0.8197,
      "step": 629360
    },
    {
      "epoch": 2.2057849414881874,
      "grad_norm": 2.796875,
      "learning_rate": 1.4707768353881582e-05,
      "loss": 0.7804,
      "step": 629370
    },
    {
      "epoch": 2.2058199889950827,
      "grad_norm": 3.25,
      "learning_rate": 1.470711932521788e-05,
      "loss": 0.7975,
      "step": 629380
    },
    {
      "epoch": 2.2058550365019785,
      "grad_norm": 2.484375,
      "learning_rate": 1.4706470296554178e-05,
      "loss": 0.8258,
      "step": 629390
    },
    {
      "epoch": 2.205890084008874,
      "grad_norm": 2.921875,
      "learning_rate": 1.4705821267890476e-05,
      "loss": 0.8152,
      "step": 629400
    },
    {
      "epoch": 2.2059251315157695,
      "grad_norm": 2.703125,
      "learning_rate": 1.4705172239226774e-05,
      "loss": 0.7312,
      "step": 629410
    },
    {
      "epoch": 2.2059601790226653,
      "grad_norm": 3.046875,
      "learning_rate": 1.4704523210563072e-05,
      "loss": 0.7406,
      "step": 629420
    },
    {
      "epoch": 2.205995226529561,
      "grad_norm": 3.03125,
      "learning_rate": 1.4703874181899372e-05,
      "loss": 0.8712,
      "step": 629430
    },
    {
      "epoch": 2.2060302740364564,
      "grad_norm": 2.796875,
      "learning_rate": 1.4703225153235667e-05,
      "loss": 0.844,
      "step": 629440
    },
    {
      "epoch": 2.206065321543352,
      "grad_norm": 2.796875,
      "learning_rate": 1.4702576124571965e-05,
      "loss": 0.8203,
      "step": 629450
    },
    {
      "epoch": 2.2061003690502474,
      "grad_norm": 3.296875,
      "learning_rate": 1.4701927095908263e-05,
      "loss": 0.8182,
      "step": 629460
    },
    {
      "epoch": 2.206135416557143,
      "grad_norm": 3.015625,
      "learning_rate": 1.470127806724456e-05,
      "loss": 0.7436,
      "step": 629470
    },
    {
      "epoch": 2.206170464064039,
      "grad_norm": 2.734375,
      "learning_rate": 1.470062903858086e-05,
      "loss": 0.7755,
      "step": 629480
    },
    {
      "epoch": 2.2062055115709343,
      "grad_norm": 2.96875,
      "learning_rate": 1.4699980009917158e-05,
      "loss": 0.7661,
      "step": 629490
    },
    {
      "epoch": 2.20624055907783,
      "grad_norm": 2.59375,
      "learning_rate": 1.4699330981253456e-05,
      "loss": 0.8249,
      "step": 629500
    },
    {
      "epoch": 2.2062756065847258,
      "grad_norm": 3.0,
      "learning_rate": 1.4698681952589754e-05,
      "loss": 0.8775,
      "step": 629510
    },
    {
      "epoch": 2.206310654091621,
      "grad_norm": 2.71875,
      "learning_rate": 1.4698032923926052e-05,
      "loss": 0.8375,
      "step": 629520
    },
    {
      "epoch": 2.206345701598517,
      "grad_norm": 2.421875,
      "learning_rate": 1.469738389526235e-05,
      "loss": 0.8244,
      "step": 629530
    },
    {
      "epoch": 2.2063807491054126,
      "grad_norm": 2.734375,
      "learning_rate": 1.4696734866598648e-05,
      "loss": 0.7917,
      "step": 629540
    },
    {
      "epoch": 2.206415796612308,
      "grad_norm": 3.125,
      "learning_rate": 1.4696085837934948e-05,
      "loss": 0.8166,
      "step": 629550
    },
    {
      "epoch": 2.2064508441192037,
      "grad_norm": 3.125,
      "learning_rate": 1.4695436809271246e-05,
      "loss": 0.8439,
      "step": 629560
    },
    {
      "epoch": 2.206485891626099,
      "grad_norm": 3.171875,
      "learning_rate": 1.4694787780607544e-05,
      "loss": 0.8561,
      "step": 629570
    },
    {
      "epoch": 2.2065209391329947,
      "grad_norm": 3.078125,
      "learning_rate": 1.4694138751943842e-05,
      "loss": 0.7825,
      "step": 629580
    },
    {
      "epoch": 2.2065559866398905,
      "grad_norm": 3.359375,
      "learning_rate": 1.469348972328014e-05,
      "loss": 0.9135,
      "step": 629590
    },
    {
      "epoch": 2.206591034146786,
      "grad_norm": 3.234375,
      "learning_rate": 1.4692840694616438e-05,
      "loss": 0.8144,
      "step": 629600
    },
    {
      "epoch": 2.2066260816536816,
      "grad_norm": 3.296875,
      "learning_rate": 1.4692191665952736e-05,
      "loss": 0.8273,
      "step": 629610
    },
    {
      "epoch": 2.2066611291605773,
      "grad_norm": 3.015625,
      "learning_rate": 1.4691542637289036e-05,
      "loss": 0.8564,
      "step": 629620
    },
    {
      "epoch": 2.2066961766674726,
      "grad_norm": 2.75,
      "learning_rate": 1.469089360862533e-05,
      "loss": 0.8261,
      "step": 629630
    },
    {
      "epoch": 2.2067312241743684,
      "grad_norm": 3.53125,
      "learning_rate": 1.4690244579961628e-05,
      "loss": 0.808,
      "step": 629640
    },
    {
      "epoch": 2.206766271681264,
      "grad_norm": 2.84375,
      "learning_rate": 1.4689595551297926e-05,
      "loss": 0.743,
      "step": 629650
    },
    {
      "epoch": 2.2068013191881595,
      "grad_norm": 3.109375,
      "learning_rate": 1.4688946522634226e-05,
      "loss": 0.8116,
      "step": 629660
    },
    {
      "epoch": 2.2068363666950552,
      "grad_norm": 2.75,
      "learning_rate": 1.4688297493970524e-05,
      "loss": 0.7776,
      "step": 629670
    },
    {
      "epoch": 2.2068714142019505,
      "grad_norm": 3.171875,
      "learning_rate": 1.4687648465306822e-05,
      "loss": 0.8929,
      "step": 629680
    },
    {
      "epoch": 2.2069064617088463,
      "grad_norm": 2.78125,
      "learning_rate": 1.468699943664312e-05,
      "loss": 0.8039,
      "step": 629690
    },
    {
      "epoch": 2.206941509215742,
      "grad_norm": 2.71875,
      "learning_rate": 1.4686350407979418e-05,
      "loss": 0.7919,
      "step": 629700
    },
    {
      "epoch": 2.2069765567226374,
      "grad_norm": 2.953125,
      "learning_rate": 1.4685701379315716e-05,
      "loss": 0.8234,
      "step": 629710
    },
    {
      "epoch": 2.207011604229533,
      "grad_norm": 3.0625,
      "learning_rate": 1.4685052350652014e-05,
      "loss": 0.8274,
      "step": 629720
    },
    {
      "epoch": 2.207046651736429,
      "grad_norm": 2.859375,
      "learning_rate": 1.4684403321988314e-05,
      "loss": 0.7985,
      "step": 629730
    },
    {
      "epoch": 2.207081699243324,
      "grad_norm": 3.1875,
      "learning_rate": 1.4683754293324612e-05,
      "loss": 0.8228,
      "step": 629740
    },
    {
      "epoch": 2.20711674675022,
      "grad_norm": 2.984375,
      "learning_rate": 1.468310526466091e-05,
      "loss": 0.8103,
      "step": 629750
    },
    {
      "epoch": 2.2071517942571157,
      "grad_norm": 2.90625,
      "learning_rate": 1.4682456235997208e-05,
      "loss": 0.8922,
      "step": 629760
    },
    {
      "epoch": 2.207186841764011,
      "grad_norm": 2.984375,
      "learning_rate": 1.4681807207333506e-05,
      "loss": 0.8756,
      "step": 629770
    },
    {
      "epoch": 2.207221889270907,
      "grad_norm": 3.125,
      "learning_rate": 1.4681158178669804e-05,
      "loss": 0.7767,
      "step": 629780
    },
    {
      "epoch": 2.207256936777802,
      "grad_norm": 2.671875,
      "learning_rate": 1.4680509150006102e-05,
      "loss": 0.7632,
      "step": 629790
    },
    {
      "epoch": 2.207291984284698,
      "grad_norm": 2.796875,
      "learning_rate": 1.4679860121342402e-05,
      "loss": 0.7785,
      "step": 629800
    },
    {
      "epoch": 2.2073270317915936,
      "grad_norm": 2.78125,
      "learning_rate": 1.46792110926787e-05,
      "loss": 0.8086,
      "step": 629810
    },
    {
      "epoch": 2.207362079298489,
      "grad_norm": 2.828125,
      "learning_rate": 1.4678562064014994e-05,
      "loss": 0.8149,
      "step": 629820
    },
    {
      "epoch": 2.2073971268053847,
      "grad_norm": 3.328125,
      "learning_rate": 1.4677913035351292e-05,
      "loss": 0.8661,
      "step": 629830
    },
    {
      "epoch": 2.2074321743122804,
      "grad_norm": 3.265625,
      "learning_rate": 1.4677264006687592e-05,
      "loss": 0.7723,
      "step": 629840
    },
    {
      "epoch": 2.2074672218191758,
      "grad_norm": 3.0,
      "learning_rate": 1.467661497802389e-05,
      "loss": 0.7808,
      "step": 629850
    },
    {
      "epoch": 2.2075022693260715,
      "grad_norm": 3.46875,
      "learning_rate": 1.4675965949360188e-05,
      "loss": 0.9104,
      "step": 629860
    },
    {
      "epoch": 2.2075373168329673,
      "grad_norm": 2.890625,
      "learning_rate": 1.4675316920696486e-05,
      "loss": 0.79,
      "step": 629870
    },
    {
      "epoch": 2.2075723643398626,
      "grad_norm": 2.953125,
      "learning_rate": 1.4674667892032784e-05,
      "loss": 0.8833,
      "step": 629880
    },
    {
      "epoch": 2.2076074118467583,
      "grad_norm": 2.9375,
      "learning_rate": 1.4674018863369082e-05,
      "loss": 0.8321,
      "step": 629890
    },
    {
      "epoch": 2.2076424593536537,
      "grad_norm": 3.125,
      "learning_rate": 1.467336983470538e-05,
      "loss": 0.835,
      "step": 629900
    },
    {
      "epoch": 2.2076775068605494,
      "grad_norm": 2.9375,
      "learning_rate": 1.467272080604168e-05,
      "loss": 0.789,
      "step": 629910
    },
    {
      "epoch": 2.207712554367445,
      "grad_norm": 2.890625,
      "learning_rate": 1.4672071777377978e-05,
      "loss": 0.8735,
      "step": 629920
    },
    {
      "epoch": 2.2077476018743405,
      "grad_norm": 2.796875,
      "learning_rate": 1.4671422748714276e-05,
      "loss": 0.7804,
      "step": 629930
    },
    {
      "epoch": 2.2077826493812363,
      "grad_norm": 2.75,
      "learning_rate": 1.4670773720050574e-05,
      "loss": 0.8637,
      "step": 629940
    },
    {
      "epoch": 2.207817696888132,
      "grad_norm": 3.09375,
      "learning_rate": 1.4670124691386872e-05,
      "loss": 0.8168,
      "step": 629950
    },
    {
      "epoch": 2.2078527443950273,
      "grad_norm": 2.765625,
      "learning_rate": 1.466947566272317e-05,
      "loss": 0.8579,
      "step": 629960
    },
    {
      "epoch": 2.207887791901923,
      "grad_norm": 3.078125,
      "learning_rate": 1.4668826634059468e-05,
      "loss": 0.8163,
      "step": 629970
    },
    {
      "epoch": 2.207922839408819,
      "grad_norm": 2.90625,
      "learning_rate": 1.4668177605395767e-05,
      "loss": 0.7843,
      "step": 629980
    },
    {
      "epoch": 2.207957886915714,
      "grad_norm": 2.734375,
      "learning_rate": 1.4667528576732065e-05,
      "loss": 0.8472,
      "step": 629990
    },
    {
      "epoch": 2.20799293442261,
      "grad_norm": 2.9375,
      "learning_rate": 1.4666879548068363e-05,
      "loss": 0.8375,
      "step": 630000
    },
    {
      "epoch": 2.20799293442261,
      "eval_loss": 0.7637956142425537,
      "eval_runtime": 556.5133,
      "eval_samples_per_second": 683.606,
      "eval_steps_per_second": 56.967,
      "step": 630000
    },
    {
      "epoch": 2.2080279819295052,
      "grad_norm": 3.03125,
      "learning_rate": 1.4666230519404658e-05,
      "loss": 0.8195,
      "step": 630010
    },
    {
      "epoch": 2.208063029436401,
      "grad_norm": 2.703125,
      "learning_rate": 1.4665581490740956e-05,
      "loss": 0.8858,
      "step": 630020
    },
    {
      "epoch": 2.2080980769432967,
      "grad_norm": 2.28125,
      "learning_rate": 1.4664932462077256e-05,
      "loss": 0.765,
      "step": 630030
    },
    {
      "epoch": 2.208133124450192,
      "grad_norm": 2.75,
      "learning_rate": 1.4664283433413554e-05,
      "loss": 0.8039,
      "step": 630040
    },
    {
      "epoch": 2.208168171957088,
      "grad_norm": 2.5625,
      "learning_rate": 1.4663634404749852e-05,
      "loss": 0.7461,
      "step": 630050
    },
    {
      "epoch": 2.2082032194639836,
      "grad_norm": 2.4375,
      "learning_rate": 1.466298537608615e-05,
      "loss": 0.8064,
      "step": 630060
    },
    {
      "epoch": 2.208238266970879,
      "grad_norm": 2.796875,
      "learning_rate": 1.4662336347422448e-05,
      "loss": 0.8181,
      "step": 630070
    },
    {
      "epoch": 2.2082733144777746,
      "grad_norm": 3.078125,
      "learning_rate": 1.4661687318758746e-05,
      "loss": 0.8439,
      "step": 630080
    },
    {
      "epoch": 2.2083083619846704,
      "grad_norm": 2.453125,
      "learning_rate": 1.4661038290095044e-05,
      "loss": 0.8399,
      "step": 630090
    },
    {
      "epoch": 2.2083434094915657,
      "grad_norm": 2.875,
      "learning_rate": 1.4660389261431343e-05,
      "loss": 0.8285,
      "step": 630100
    },
    {
      "epoch": 2.2083784569984615,
      "grad_norm": 3.234375,
      "learning_rate": 1.4659740232767641e-05,
      "loss": 0.8474,
      "step": 630110
    },
    {
      "epoch": 2.208413504505357,
      "grad_norm": 3.859375,
      "learning_rate": 1.465909120410394e-05,
      "loss": 0.7492,
      "step": 630120
    },
    {
      "epoch": 2.2084485520122525,
      "grad_norm": 2.890625,
      "learning_rate": 1.4658442175440237e-05,
      "loss": 0.8736,
      "step": 630130
    },
    {
      "epoch": 2.2084835995191483,
      "grad_norm": 2.75,
      "learning_rate": 1.4657793146776535e-05,
      "loss": 0.7784,
      "step": 630140
    },
    {
      "epoch": 2.2085186470260436,
      "grad_norm": 2.53125,
      "learning_rate": 1.4657144118112833e-05,
      "loss": 0.8354,
      "step": 630150
    },
    {
      "epoch": 2.2085536945329394,
      "grad_norm": 2.6875,
      "learning_rate": 1.4656495089449131e-05,
      "loss": 0.8073,
      "step": 630160
    },
    {
      "epoch": 2.208588742039835,
      "grad_norm": 2.828125,
      "learning_rate": 1.4655846060785431e-05,
      "loss": 0.8151,
      "step": 630170
    },
    {
      "epoch": 2.2086237895467304,
      "grad_norm": 3.171875,
      "learning_rate": 1.4655197032121729e-05,
      "loss": 0.8166,
      "step": 630180
    },
    {
      "epoch": 2.208658837053626,
      "grad_norm": 2.875,
      "learning_rate": 1.4654548003458027e-05,
      "loss": 0.8502,
      "step": 630190
    },
    {
      "epoch": 2.208693884560522,
      "grad_norm": 2.625,
      "learning_rate": 1.4653898974794322e-05,
      "loss": 0.8616,
      "step": 630200
    },
    {
      "epoch": 2.2087289320674173,
      "grad_norm": 3.03125,
      "learning_rate": 1.4653249946130621e-05,
      "loss": 0.8057,
      "step": 630210
    },
    {
      "epoch": 2.208763979574313,
      "grad_norm": 2.828125,
      "learning_rate": 1.465260091746692e-05,
      "loss": 0.7535,
      "step": 630220
    },
    {
      "epoch": 2.2087990270812083,
      "grad_norm": 2.734375,
      "learning_rate": 1.4651951888803217e-05,
      "loss": 0.7749,
      "step": 630230
    },
    {
      "epoch": 2.208834074588104,
      "grad_norm": 3.25,
      "learning_rate": 1.4651302860139515e-05,
      "loss": 0.8129,
      "step": 630240
    },
    {
      "epoch": 2.208869122095,
      "grad_norm": 2.8125,
      "learning_rate": 1.4650653831475813e-05,
      "loss": 0.837,
      "step": 630250
    },
    {
      "epoch": 2.208904169601895,
      "grad_norm": 2.78125,
      "learning_rate": 1.4650004802812111e-05,
      "loss": 0.8437,
      "step": 630260
    },
    {
      "epoch": 2.208939217108791,
      "grad_norm": 2.796875,
      "learning_rate": 1.464935577414841e-05,
      "loss": 0.7896,
      "step": 630270
    },
    {
      "epoch": 2.2089742646156867,
      "grad_norm": 2.203125,
      "learning_rate": 1.4648706745484709e-05,
      "loss": 0.7846,
      "step": 630280
    },
    {
      "epoch": 2.209009312122582,
      "grad_norm": 2.8125,
      "learning_rate": 1.4648057716821007e-05,
      "loss": 0.7913,
      "step": 630290
    },
    {
      "epoch": 2.2090443596294778,
      "grad_norm": 3.109375,
      "learning_rate": 1.4647408688157305e-05,
      "loss": 0.8218,
      "step": 630300
    },
    {
      "epoch": 2.2090794071363735,
      "grad_norm": 2.953125,
      "learning_rate": 1.4646759659493603e-05,
      "loss": 0.8458,
      "step": 630310
    },
    {
      "epoch": 2.209114454643269,
      "grad_norm": 3.1875,
      "learning_rate": 1.4646110630829901e-05,
      "loss": 0.8427,
      "step": 630320
    },
    {
      "epoch": 2.2091495021501646,
      "grad_norm": 2.59375,
      "learning_rate": 1.4645461602166199e-05,
      "loss": 0.7886,
      "step": 630330
    },
    {
      "epoch": 2.2091845496570603,
      "grad_norm": 3.109375,
      "learning_rate": 1.4644812573502497e-05,
      "loss": 0.7954,
      "step": 630340
    },
    {
      "epoch": 2.2092195971639557,
      "grad_norm": 3.0625,
      "learning_rate": 1.4644163544838797e-05,
      "loss": 0.8056,
      "step": 630350
    },
    {
      "epoch": 2.2092546446708514,
      "grad_norm": 3.09375,
      "learning_rate": 1.4643514516175095e-05,
      "loss": 0.8515,
      "step": 630360
    },
    {
      "epoch": 2.2092896921777467,
      "grad_norm": 3.109375,
      "learning_rate": 1.4642865487511393e-05,
      "loss": 0.788,
      "step": 630370
    },
    {
      "epoch": 2.2093247396846425,
      "grad_norm": 2.90625,
      "learning_rate": 1.4642216458847687e-05,
      "loss": 0.7382,
      "step": 630380
    },
    {
      "epoch": 2.2093597871915382,
      "grad_norm": 3.375,
      "learning_rate": 1.4641567430183987e-05,
      "loss": 0.8272,
      "step": 630390
    },
    {
      "epoch": 2.2093948346984336,
      "grad_norm": 2.71875,
      "learning_rate": 1.4640918401520285e-05,
      "loss": 0.8203,
      "step": 630400
    },
    {
      "epoch": 2.2094298822053293,
      "grad_norm": 2.65625,
      "learning_rate": 1.4640269372856583e-05,
      "loss": 0.7902,
      "step": 630410
    },
    {
      "epoch": 2.209464929712225,
      "grad_norm": 3.265625,
      "learning_rate": 1.4639620344192881e-05,
      "loss": 0.8865,
      "step": 630420
    },
    {
      "epoch": 2.2094999772191204,
      "grad_norm": 2.75,
      "learning_rate": 1.4638971315529179e-05,
      "loss": 0.7939,
      "step": 630430
    },
    {
      "epoch": 2.209535024726016,
      "grad_norm": 3.15625,
      "learning_rate": 1.4638322286865477e-05,
      "loss": 0.8021,
      "step": 630440
    },
    {
      "epoch": 2.209570072232912,
      "grad_norm": 2.515625,
      "learning_rate": 1.4637673258201775e-05,
      "loss": 0.8402,
      "step": 630450
    },
    {
      "epoch": 2.209605119739807,
      "grad_norm": 3.296875,
      "learning_rate": 1.4637024229538075e-05,
      "loss": 0.8229,
      "step": 630460
    },
    {
      "epoch": 2.209640167246703,
      "grad_norm": 2.625,
      "learning_rate": 1.4636375200874373e-05,
      "loss": 0.762,
      "step": 630470
    },
    {
      "epoch": 2.2096752147535987,
      "grad_norm": 2.859375,
      "learning_rate": 1.463572617221067e-05,
      "loss": 0.9144,
      "step": 630480
    },
    {
      "epoch": 2.209710262260494,
      "grad_norm": 3.5,
      "learning_rate": 1.4635077143546969e-05,
      "loss": 0.8596,
      "step": 630490
    },
    {
      "epoch": 2.20974530976739,
      "grad_norm": 2.765625,
      "learning_rate": 1.4634428114883267e-05,
      "loss": 0.9174,
      "step": 630500
    },
    {
      "epoch": 2.209780357274285,
      "grad_norm": 3.25,
      "learning_rate": 1.4633779086219565e-05,
      "loss": 0.8504,
      "step": 630510
    },
    {
      "epoch": 2.209815404781181,
      "grad_norm": 3.4375,
      "learning_rate": 1.4633130057555863e-05,
      "loss": 0.7442,
      "step": 630520
    },
    {
      "epoch": 2.2098504522880766,
      "grad_norm": 2.703125,
      "learning_rate": 1.4632481028892162e-05,
      "loss": 0.9362,
      "step": 630530
    },
    {
      "epoch": 2.209885499794972,
      "grad_norm": 2.671875,
      "learning_rate": 1.463183200022846e-05,
      "loss": 0.8292,
      "step": 630540
    },
    {
      "epoch": 2.2099205473018677,
      "grad_norm": 2.625,
      "learning_rate": 1.4631182971564758e-05,
      "loss": 0.7224,
      "step": 630550
    },
    {
      "epoch": 2.2099555948087635,
      "grad_norm": 2.65625,
      "learning_rate": 1.4630533942901056e-05,
      "loss": 0.7688,
      "step": 630560
    },
    {
      "epoch": 2.209990642315659,
      "grad_norm": 3.0,
      "learning_rate": 1.4629884914237351e-05,
      "loss": 0.7916,
      "step": 630570
    },
    {
      "epoch": 2.2100256898225545,
      "grad_norm": 2.96875,
      "learning_rate": 1.462923588557365e-05,
      "loss": 0.8805,
      "step": 630580
    },
    {
      "epoch": 2.2100607373294503,
      "grad_norm": 2.71875,
      "learning_rate": 1.4628586856909949e-05,
      "loss": 0.7866,
      "step": 630590
    },
    {
      "epoch": 2.2100957848363456,
      "grad_norm": 2.703125,
      "learning_rate": 1.4627937828246247e-05,
      "loss": 0.7816,
      "step": 630600
    },
    {
      "epoch": 2.2101308323432414,
      "grad_norm": 3.28125,
      "learning_rate": 1.4627288799582545e-05,
      "loss": 0.827,
      "step": 630610
    },
    {
      "epoch": 2.2101658798501367,
      "grad_norm": 2.484375,
      "learning_rate": 1.4626639770918843e-05,
      "loss": 0.7582,
      "step": 630620
    },
    {
      "epoch": 2.2102009273570324,
      "grad_norm": 3.046875,
      "learning_rate": 1.462599074225514e-05,
      "loss": 0.9118,
      "step": 630630
    },
    {
      "epoch": 2.210235974863928,
      "grad_norm": 2.609375,
      "learning_rate": 1.4625341713591439e-05,
      "loss": 0.8794,
      "step": 630640
    },
    {
      "epoch": 2.2102710223708235,
      "grad_norm": 2.75,
      "learning_rate": 1.4624692684927738e-05,
      "loss": 0.7499,
      "step": 630650
    },
    {
      "epoch": 2.2103060698777193,
      "grad_norm": 2.640625,
      "learning_rate": 1.4624043656264036e-05,
      "loss": 0.8272,
      "step": 630660
    },
    {
      "epoch": 2.210341117384615,
      "grad_norm": 2.890625,
      "learning_rate": 1.4623394627600334e-05,
      "loss": 0.8985,
      "step": 630670
    },
    {
      "epoch": 2.2103761648915103,
      "grad_norm": 3.03125,
      "learning_rate": 1.4622745598936632e-05,
      "loss": 0.7537,
      "step": 630680
    },
    {
      "epoch": 2.210411212398406,
      "grad_norm": 2.828125,
      "learning_rate": 1.462209657027293e-05,
      "loss": 0.8368,
      "step": 630690
    },
    {
      "epoch": 2.210446259905302,
      "grad_norm": 2.875,
      "learning_rate": 1.4621447541609228e-05,
      "loss": 0.7585,
      "step": 630700
    },
    {
      "epoch": 2.210481307412197,
      "grad_norm": 3.046875,
      "learning_rate": 1.4620798512945526e-05,
      "loss": 0.7956,
      "step": 630710
    },
    {
      "epoch": 2.210516354919093,
      "grad_norm": 2.890625,
      "learning_rate": 1.4620149484281826e-05,
      "loss": 0.7924,
      "step": 630720
    },
    {
      "epoch": 2.2105514024259882,
      "grad_norm": 2.90625,
      "learning_rate": 1.4619500455618124e-05,
      "loss": 0.7207,
      "step": 630730
    },
    {
      "epoch": 2.210586449932884,
      "grad_norm": 2.546875,
      "learning_rate": 1.4618851426954422e-05,
      "loss": 0.8647,
      "step": 630740
    },
    {
      "epoch": 2.2106214974397798,
      "grad_norm": 2.703125,
      "learning_rate": 1.461820239829072e-05,
      "loss": 0.7472,
      "step": 630750
    },
    {
      "epoch": 2.210656544946675,
      "grad_norm": 3.421875,
      "learning_rate": 1.4617553369627016e-05,
      "loss": 0.7783,
      "step": 630760
    },
    {
      "epoch": 2.210691592453571,
      "grad_norm": 3.296875,
      "learning_rate": 1.4616904340963314e-05,
      "loss": 0.8344,
      "step": 630770
    },
    {
      "epoch": 2.2107266399604666,
      "grad_norm": 2.734375,
      "learning_rate": 1.4616255312299612e-05,
      "loss": 0.7643,
      "step": 630780
    },
    {
      "epoch": 2.210761687467362,
      "grad_norm": 2.390625,
      "learning_rate": 1.461560628363591e-05,
      "loss": 0.8332,
      "step": 630790
    },
    {
      "epoch": 2.2107967349742577,
      "grad_norm": 2.625,
      "learning_rate": 1.4614957254972208e-05,
      "loss": 0.9268,
      "step": 630800
    },
    {
      "epoch": 2.2108317824811534,
      "grad_norm": 2.890625,
      "learning_rate": 1.4614308226308506e-05,
      "loss": 0.7914,
      "step": 630810
    },
    {
      "epoch": 2.2108668299880487,
      "grad_norm": 2.828125,
      "learning_rate": 1.4613659197644804e-05,
      "loss": 0.7265,
      "step": 630820
    },
    {
      "epoch": 2.2109018774949445,
      "grad_norm": 2.875,
      "learning_rate": 1.4613010168981104e-05,
      "loss": 0.8308,
      "step": 630830
    },
    {
      "epoch": 2.21093692500184,
      "grad_norm": 3.34375,
      "learning_rate": 1.4612361140317402e-05,
      "loss": 0.8031,
      "step": 630840
    },
    {
      "epoch": 2.2109719725087356,
      "grad_norm": 3.1875,
      "learning_rate": 1.46117121116537e-05,
      "loss": 0.9195,
      "step": 630850
    },
    {
      "epoch": 2.2110070200156313,
      "grad_norm": 2.5625,
      "learning_rate": 1.4611063082989998e-05,
      "loss": 0.7937,
      "step": 630860
    },
    {
      "epoch": 2.2110420675225266,
      "grad_norm": 3.09375,
      "learning_rate": 1.4610414054326296e-05,
      "loss": 0.8943,
      "step": 630870
    },
    {
      "epoch": 2.2110771150294224,
      "grad_norm": 2.640625,
      "learning_rate": 1.4609765025662594e-05,
      "loss": 0.7606,
      "step": 630880
    },
    {
      "epoch": 2.211112162536318,
      "grad_norm": 2.625,
      "learning_rate": 1.4609115996998892e-05,
      "loss": 0.7955,
      "step": 630890
    },
    {
      "epoch": 2.2111472100432135,
      "grad_norm": 3.109375,
      "learning_rate": 1.4608466968335192e-05,
      "loss": 0.802,
      "step": 630900
    },
    {
      "epoch": 2.211182257550109,
      "grad_norm": 3.1875,
      "learning_rate": 1.460781793967149e-05,
      "loss": 0.7836,
      "step": 630910
    },
    {
      "epoch": 2.211217305057005,
      "grad_norm": 2.953125,
      "learning_rate": 1.4607168911007788e-05,
      "loss": 0.9004,
      "step": 630920
    },
    {
      "epoch": 2.2112523525639003,
      "grad_norm": 3.109375,
      "learning_rate": 1.4606519882344086e-05,
      "loss": 0.8121,
      "step": 630930
    },
    {
      "epoch": 2.211287400070796,
      "grad_norm": 2.84375,
      "learning_rate": 1.4605870853680384e-05,
      "loss": 0.7438,
      "step": 630940
    },
    {
      "epoch": 2.2113224475776914,
      "grad_norm": 3.0,
      "learning_rate": 1.460522182501668e-05,
      "loss": 0.7625,
      "step": 630950
    },
    {
      "epoch": 2.211357495084587,
      "grad_norm": 3.09375,
      "learning_rate": 1.4604572796352978e-05,
      "loss": 0.8115,
      "step": 630960
    },
    {
      "epoch": 2.211392542591483,
      "grad_norm": 2.984375,
      "learning_rate": 1.4603923767689276e-05,
      "loss": 0.8352,
      "step": 630970
    },
    {
      "epoch": 2.211427590098378,
      "grad_norm": 2.875,
      "learning_rate": 1.4603274739025574e-05,
      "loss": 0.804,
      "step": 630980
    },
    {
      "epoch": 2.211462637605274,
      "grad_norm": 2.625,
      "learning_rate": 1.4602625710361872e-05,
      "loss": 0.7775,
      "step": 630990
    },
    {
      "epoch": 2.2114976851121697,
      "grad_norm": 2.703125,
      "learning_rate": 1.460197668169817e-05,
      "loss": 0.7883,
      "step": 631000
    },
    {
      "epoch": 2.211532732619065,
      "grad_norm": 2.875,
      "learning_rate": 1.460132765303447e-05,
      "loss": 0.8632,
      "step": 631010
    },
    {
      "epoch": 2.211567780125961,
      "grad_norm": 3.1875,
      "learning_rate": 1.4600678624370768e-05,
      "loss": 0.8012,
      "step": 631020
    },
    {
      "epoch": 2.2116028276328565,
      "grad_norm": 3.328125,
      "learning_rate": 1.4600029595707066e-05,
      "loss": 0.7837,
      "step": 631030
    },
    {
      "epoch": 2.211637875139752,
      "grad_norm": 2.90625,
      "learning_rate": 1.4599380567043364e-05,
      "loss": 0.8028,
      "step": 631040
    },
    {
      "epoch": 2.2116729226466476,
      "grad_norm": 3.171875,
      "learning_rate": 1.4598731538379662e-05,
      "loss": 0.8972,
      "step": 631050
    },
    {
      "epoch": 2.211707970153543,
      "grad_norm": 2.875,
      "learning_rate": 1.459808250971596e-05,
      "loss": 0.8603,
      "step": 631060
    },
    {
      "epoch": 2.2117430176604387,
      "grad_norm": 2.828125,
      "learning_rate": 1.4597433481052258e-05,
      "loss": 0.8306,
      "step": 631070
    },
    {
      "epoch": 2.2117780651673344,
      "grad_norm": 2.828125,
      "learning_rate": 1.4596784452388557e-05,
      "loss": 0.8401,
      "step": 631080
    },
    {
      "epoch": 2.2118131126742298,
      "grad_norm": 3.046875,
      "learning_rate": 1.4596135423724855e-05,
      "loss": 0.8241,
      "step": 631090
    },
    {
      "epoch": 2.2118481601811255,
      "grad_norm": 2.53125,
      "learning_rate": 1.4595486395061153e-05,
      "loss": 0.8191,
      "step": 631100
    },
    {
      "epoch": 2.2118832076880213,
      "grad_norm": 3.015625,
      "learning_rate": 1.4594837366397451e-05,
      "loss": 0.807,
      "step": 631110
    },
    {
      "epoch": 2.2119182551949166,
      "grad_norm": 2.84375,
      "learning_rate": 1.459418833773375e-05,
      "loss": 0.8645,
      "step": 631120
    },
    {
      "epoch": 2.2119533027018123,
      "grad_norm": 2.984375,
      "learning_rate": 1.4593539309070047e-05,
      "loss": 0.8417,
      "step": 631130
    },
    {
      "epoch": 2.211988350208708,
      "grad_norm": 2.859375,
      "learning_rate": 1.4592890280406344e-05,
      "loss": 0.8458,
      "step": 631140
    },
    {
      "epoch": 2.2120233977156034,
      "grad_norm": 2.59375,
      "learning_rate": 1.4592241251742642e-05,
      "loss": 0.8683,
      "step": 631150
    },
    {
      "epoch": 2.212058445222499,
      "grad_norm": 2.546875,
      "learning_rate": 1.459159222307894e-05,
      "loss": 0.8065,
      "step": 631160
    },
    {
      "epoch": 2.2120934927293945,
      "grad_norm": 3.03125,
      "learning_rate": 1.4590943194415238e-05,
      "loss": 0.7757,
      "step": 631170
    },
    {
      "epoch": 2.2121285402362902,
      "grad_norm": 2.734375,
      "learning_rate": 1.4590294165751536e-05,
      "loss": 0.8623,
      "step": 631180
    },
    {
      "epoch": 2.212163587743186,
      "grad_norm": 2.828125,
      "learning_rate": 1.4589645137087834e-05,
      "loss": 0.8055,
      "step": 631190
    },
    {
      "epoch": 2.2121986352500813,
      "grad_norm": 2.921875,
      "learning_rate": 1.4588996108424133e-05,
      "loss": 0.822,
      "step": 631200
    },
    {
      "epoch": 2.212233682756977,
      "grad_norm": 3.0,
      "learning_rate": 1.4588347079760431e-05,
      "loss": 0.8025,
      "step": 631210
    },
    {
      "epoch": 2.212268730263873,
      "grad_norm": 2.890625,
      "learning_rate": 1.458769805109673e-05,
      "loss": 0.7258,
      "step": 631220
    },
    {
      "epoch": 2.212303777770768,
      "grad_norm": 2.25,
      "learning_rate": 1.4587049022433027e-05,
      "loss": 0.8046,
      "step": 631230
    },
    {
      "epoch": 2.212338825277664,
      "grad_norm": 3.1875,
      "learning_rate": 1.4586399993769325e-05,
      "loss": 0.7684,
      "step": 631240
    },
    {
      "epoch": 2.2123738727845597,
      "grad_norm": 3.5,
      "learning_rate": 1.4585750965105623e-05,
      "loss": 0.8397,
      "step": 631250
    },
    {
      "epoch": 2.212408920291455,
      "grad_norm": 2.578125,
      "learning_rate": 1.4585101936441923e-05,
      "loss": 0.7394,
      "step": 631260
    },
    {
      "epoch": 2.2124439677983507,
      "grad_norm": 2.578125,
      "learning_rate": 1.4584452907778221e-05,
      "loss": 0.814,
      "step": 631270
    },
    {
      "epoch": 2.212479015305246,
      "grad_norm": 3.0625,
      "learning_rate": 1.4583803879114519e-05,
      "loss": 0.8942,
      "step": 631280
    },
    {
      "epoch": 2.212514062812142,
      "grad_norm": 2.515625,
      "learning_rate": 1.4583154850450817e-05,
      "loss": 0.9137,
      "step": 631290
    },
    {
      "epoch": 2.2125491103190376,
      "grad_norm": 2.8125,
      "learning_rate": 1.4582505821787115e-05,
      "loss": 0.7915,
      "step": 631300
    },
    {
      "epoch": 2.212584157825933,
      "grad_norm": 2.921875,
      "learning_rate": 1.4581856793123413e-05,
      "loss": 0.7901,
      "step": 631310
    },
    {
      "epoch": 2.2126192053328286,
      "grad_norm": 3.46875,
      "learning_rate": 1.458120776445971e-05,
      "loss": 0.8769,
      "step": 631320
    },
    {
      "epoch": 2.2126542528397244,
      "grad_norm": 2.875,
      "learning_rate": 1.4580558735796007e-05,
      "loss": 0.8164,
      "step": 631330
    },
    {
      "epoch": 2.2126893003466197,
      "grad_norm": 3.4375,
      "learning_rate": 1.4579909707132305e-05,
      "loss": 0.7903,
      "step": 631340
    },
    {
      "epoch": 2.2127243478535155,
      "grad_norm": 3.078125,
      "learning_rate": 1.4579260678468603e-05,
      "loss": 0.7839,
      "step": 631350
    },
    {
      "epoch": 2.212759395360411,
      "grad_norm": 2.703125,
      "learning_rate": 1.4578611649804901e-05,
      "loss": 0.8845,
      "step": 631360
    },
    {
      "epoch": 2.2127944428673065,
      "grad_norm": 3.046875,
      "learning_rate": 1.45779626211412e-05,
      "loss": 0.8149,
      "step": 631370
    },
    {
      "epoch": 2.2128294903742023,
      "grad_norm": 3.328125,
      "learning_rate": 1.4577313592477499e-05,
      "loss": 0.8212,
      "step": 631380
    },
    {
      "epoch": 2.2128645378810976,
      "grad_norm": 3.296875,
      "learning_rate": 1.4576664563813797e-05,
      "loss": 0.8085,
      "step": 631390
    },
    {
      "epoch": 2.2128995853879934,
      "grad_norm": 2.984375,
      "learning_rate": 1.4576015535150095e-05,
      "loss": 0.8075,
      "step": 631400
    },
    {
      "epoch": 2.212934632894889,
      "grad_norm": 2.96875,
      "learning_rate": 1.4575366506486393e-05,
      "loss": 0.8336,
      "step": 631410
    },
    {
      "epoch": 2.2129696804017844,
      "grad_norm": 3.203125,
      "learning_rate": 1.4574717477822691e-05,
      "loss": 0.8414,
      "step": 631420
    },
    {
      "epoch": 2.21300472790868,
      "grad_norm": 2.734375,
      "learning_rate": 1.4574068449158989e-05,
      "loss": 0.7869,
      "step": 631430
    },
    {
      "epoch": 2.213039775415576,
      "grad_norm": 2.9375,
      "learning_rate": 1.4573419420495287e-05,
      "loss": 0.8589,
      "step": 631440
    },
    {
      "epoch": 2.2130748229224713,
      "grad_norm": 2.765625,
      "learning_rate": 1.4572770391831587e-05,
      "loss": 0.8582,
      "step": 631450
    },
    {
      "epoch": 2.213109870429367,
      "grad_norm": 3.25,
      "learning_rate": 1.4572121363167885e-05,
      "loss": 0.8314,
      "step": 631460
    },
    {
      "epoch": 2.213144917936263,
      "grad_norm": 2.984375,
      "learning_rate": 1.4571472334504183e-05,
      "loss": 0.8393,
      "step": 631470
    },
    {
      "epoch": 2.213179965443158,
      "grad_norm": 3.03125,
      "learning_rate": 1.457082330584048e-05,
      "loss": 0.7885,
      "step": 631480
    },
    {
      "epoch": 2.213215012950054,
      "grad_norm": 2.921875,
      "learning_rate": 1.4570174277176779e-05,
      "loss": 0.8921,
      "step": 631490
    },
    {
      "epoch": 2.213250060456949,
      "grad_norm": 3.234375,
      "learning_rate": 1.4569525248513077e-05,
      "loss": 0.8953,
      "step": 631500
    },
    {
      "epoch": 2.213285107963845,
      "grad_norm": 3.0,
      "learning_rate": 1.4568876219849373e-05,
      "loss": 0.7339,
      "step": 631510
    },
    {
      "epoch": 2.2133201554707407,
      "grad_norm": 2.921875,
      "learning_rate": 1.4568227191185671e-05,
      "loss": 0.7701,
      "step": 631520
    },
    {
      "epoch": 2.213355202977636,
      "grad_norm": 3.3125,
      "learning_rate": 1.4567578162521969e-05,
      "loss": 0.7919,
      "step": 631530
    },
    {
      "epoch": 2.2133902504845318,
      "grad_norm": 3.0,
      "learning_rate": 1.4566929133858267e-05,
      "loss": 0.8759,
      "step": 631540
    },
    {
      "epoch": 2.2134252979914275,
      "grad_norm": 4.34375,
      "learning_rate": 1.4566280105194565e-05,
      "loss": 0.8301,
      "step": 631550
    },
    {
      "epoch": 2.213460345498323,
      "grad_norm": 2.71875,
      "learning_rate": 1.4565631076530865e-05,
      "loss": 0.7934,
      "step": 631560
    },
    {
      "epoch": 2.2134953930052186,
      "grad_norm": 2.875,
      "learning_rate": 1.4564982047867163e-05,
      "loss": 0.8152,
      "step": 631570
    },
    {
      "epoch": 2.2135304405121143,
      "grad_norm": 2.65625,
      "learning_rate": 1.456433301920346e-05,
      "loss": 0.7773,
      "step": 631580
    },
    {
      "epoch": 2.2135654880190097,
      "grad_norm": 3.296875,
      "learning_rate": 1.4563683990539759e-05,
      "loss": 0.806,
      "step": 631590
    },
    {
      "epoch": 2.2136005355259054,
      "grad_norm": 2.609375,
      "learning_rate": 1.4563034961876057e-05,
      "loss": 0.7715,
      "step": 631600
    },
    {
      "epoch": 2.213635583032801,
      "grad_norm": 3.015625,
      "learning_rate": 1.4562385933212355e-05,
      "loss": 0.7747,
      "step": 631610
    },
    {
      "epoch": 2.2136706305396965,
      "grad_norm": 2.515625,
      "learning_rate": 1.4561736904548653e-05,
      "loss": 0.7791,
      "step": 631620
    },
    {
      "epoch": 2.2137056780465922,
      "grad_norm": 3.03125,
      "learning_rate": 1.4561087875884952e-05,
      "loss": 0.7298,
      "step": 631630
    },
    {
      "epoch": 2.2137407255534876,
      "grad_norm": 2.25,
      "learning_rate": 1.456043884722125e-05,
      "loss": 0.8946,
      "step": 631640
    },
    {
      "epoch": 2.2137757730603833,
      "grad_norm": 3.0,
      "learning_rate": 1.4559789818557548e-05,
      "loss": 0.8342,
      "step": 631650
    },
    {
      "epoch": 2.213810820567279,
      "grad_norm": 3.015625,
      "learning_rate": 1.4559140789893846e-05,
      "loss": 0.8369,
      "step": 631660
    },
    {
      "epoch": 2.2138458680741744,
      "grad_norm": 3.03125,
      "learning_rate": 1.4558491761230144e-05,
      "loss": 0.7983,
      "step": 631670
    },
    {
      "epoch": 2.21388091558107,
      "grad_norm": 2.515625,
      "learning_rate": 1.4557842732566442e-05,
      "loss": 0.7862,
      "step": 631680
    },
    {
      "epoch": 2.213915963087966,
      "grad_norm": 3.015625,
      "learning_rate": 1.455719370390274e-05,
      "loss": 0.8797,
      "step": 631690
    },
    {
      "epoch": 2.213951010594861,
      "grad_norm": 2.84375,
      "learning_rate": 1.4556544675239037e-05,
      "loss": 0.7502,
      "step": 631700
    },
    {
      "epoch": 2.213986058101757,
      "grad_norm": 2.890625,
      "learning_rate": 1.4555895646575335e-05,
      "loss": 0.8217,
      "step": 631710
    },
    {
      "epoch": 2.2140211056086527,
      "grad_norm": 3.109375,
      "learning_rate": 1.4555246617911633e-05,
      "loss": 0.8342,
      "step": 631720
    },
    {
      "epoch": 2.214056153115548,
      "grad_norm": 2.46875,
      "learning_rate": 1.455459758924793e-05,
      "loss": 0.7994,
      "step": 631730
    },
    {
      "epoch": 2.214091200622444,
      "grad_norm": 2.828125,
      "learning_rate": 1.4553948560584229e-05,
      "loss": 0.8727,
      "step": 631740
    },
    {
      "epoch": 2.214126248129339,
      "grad_norm": 2.828125,
      "learning_rate": 1.4553299531920528e-05,
      "loss": 0.7807,
      "step": 631750
    },
    {
      "epoch": 2.214161295636235,
      "grad_norm": 2.640625,
      "learning_rate": 1.4552650503256826e-05,
      "loss": 0.7713,
      "step": 631760
    },
    {
      "epoch": 2.2141963431431306,
      "grad_norm": 3.09375,
      "learning_rate": 1.4552001474593124e-05,
      "loss": 0.8373,
      "step": 631770
    },
    {
      "epoch": 2.214231390650026,
      "grad_norm": 2.84375,
      "learning_rate": 1.4551352445929422e-05,
      "loss": 0.792,
      "step": 631780
    },
    {
      "epoch": 2.2142664381569217,
      "grad_norm": 2.9375,
      "learning_rate": 1.455070341726572e-05,
      "loss": 0.8234,
      "step": 631790
    },
    {
      "epoch": 2.2143014856638175,
      "grad_norm": 3.234375,
      "learning_rate": 1.4550054388602018e-05,
      "loss": 0.8826,
      "step": 631800
    },
    {
      "epoch": 2.2143365331707128,
      "grad_norm": 2.921875,
      "learning_rate": 1.4549405359938318e-05,
      "loss": 0.8603,
      "step": 631810
    },
    {
      "epoch": 2.2143715806776085,
      "grad_norm": 2.921875,
      "learning_rate": 1.4548756331274616e-05,
      "loss": 0.7972,
      "step": 631820
    },
    {
      "epoch": 2.2144066281845043,
      "grad_norm": 2.734375,
      "learning_rate": 1.4548107302610914e-05,
      "loss": 0.7782,
      "step": 631830
    },
    {
      "epoch": 2.2144416756913996,
      "grad_norm": 3.203125,
      "learning_rate": 1.4547458273947212e-05,
      "loss": 0.8439,
      "step": 631840
    },
    {
      "epoch": 2.2144767231982954,
      "grad_norm": 2.84375,
      "learning_rate": 1.454680924528351e-05,
      "loss": 0.8316,
      "step": 631850
    },
    {
      "epoch": 2.214511770705191,
      "grad_norm": 3.09375,
      "learning_rate": 1.4546160216619808e-05,
      "loss": 0.892,
      "step": 631860
    },
    {
      "epoch": 2.2145468182120864,
      "grad_norm": 2.921875,
      "learning_rate": 1.4545511187956106e-05,
      "loss": 0.8446,
      "step": 631870
    },
    {
      "epoch": 2.214581865718982,
      "grad_norm": 2.5625,
      "learning_rate": 1.4544862159292406e-05,
      "loss": 0.8095,
      "step": 631880
    },
    {
      "epoch": 2.2146169132258775,
      "grad_norm": 2.90625,
      "learning_rate": 1.45442131306287e-05,
      "loss": 0.7425,
      "step": 631890
    },
    {
      "epoch": 2.2146519607327733,
      "grad_norm": 2.40625,
      "learning_rate": 1.4543564101964998e-05,
      "loss": 0.833,
      "step": 631900
    },
    {
      "epoch": 2.214687008239669,
      "grad_norm": 3.03125,
      "learning_rate": 1.4542915073301296e-05,
      "loss": 0.7586,
      "step": 631910
    },
    {
      "epoch": 2.2147220557465643,
      "grad_norm": 2.71875,
      "learning_rate": 1.4542266044637594e-05,
      "loss": 0.8424,
      "step": 631920
    },
    {
      "epoch": 2.21475710325346,
      "grad_norm": 2.8125,
      "learning_rate": 1.4541617015973894e-05,
      "loss": 0.7994,
      "step": 631930
    },
    {
      "epoch": 2.214792150760356,
      "grad_norm": 2.921875,
      "learning_rate": 1.4540967987310192e-05,
      "loss": 0.8581,
      "step": 631940
    },
    {
      "epoch": 2.214827198267251,
      "grad_norm": 2.9375,
      "learning_rate": 1.454031895864649e-05,
      "loss": 0.8695,
      "step": 631950
    },
    {
      "epoch": 2.214862245774147,
      "grad_norm": 2.734375,
      "learning_rate": 1.4539669929982788e-05,
      "loss": 0.9125,
      "step": 631960
    },
    {
      "epoch": 2.2148972932810427,
      "grad_norm": 3.109375,
      "learning_rate": 1.4539020901319086e-05,
      "loss": 0.855,
      "step": 631970
    },
    {
      "epoch": 2.214932340787938,
      "grad_norm": 3.265625,
      "learning_rate": 1.4538371872655384e-05,
      "loss": 0.9062,
      "step": 631980
    },
    {
      "epoch": 2.2149673882948337,
      "grad_norm": 2.625,
      "learning_rate": 1.4537722843991682e-05,
      "loss": 0.8774,
      "step": 631990
    },
    {
      "epoch": 2.215002435801729,
      "grad_norm": 2.953125,
      "learning_rate": 1.4537073815327982e-05,
      "loss": 0.7877,
      "step": 632000
    },
    {
      "epoch": 2.215037483308625,
      "grad_norm": 3.0625,
      "learning_rate": 1.453642478666428e-05,
      "loss": 0.8128,
      "step": 632010
    },
    {
      "epoch": 2.2150725308155206,
      "grad_norm": 2.609375,
      "learning_rate": 1.4535775758000578e-05,
      "loss": 0.8226,
      "step": 632020
    },
    {
      "epoch": 2.215107578322416,
      "grad_norm": 2.703125,
      "learning_rate": 1.4535126729336876e-05,
      "loss": 0.7528,
      "step": 632030
    },
    {
      "epoch": 2.2151426258293117,
      "grad_norm": 3.125,
      "learning_rate": 1.4534477700673174e-05,
      "loss": 0.8638,
      "step": 632040
    },
    {
      "epoch": 2.2151776733362074,
      "grad_norm": 2.6875,
      "learning_rate": 1.4533828672009472e-05,
      "loss": 0.7604,
      "step": 632050
    },
    {
      "epoch": 2.2152127208431027,
      "grad_norm": 3.28125,
      "learning_rate": 1.453317964334577e-05,
      "loss": 0.8368,
      "step": 632060
    },
    {
      "epoch": 2.2152477683499985,
      "grad_norm": 3.140625,
      "learning_rate": 1.453253061468207e-05,
      "loss": 0.7955,
      "step": 632070
    },
    {
      "epoch": 2.2152828158568942,
      "grad_norm": 2.46875,
      "learning_rate": 1.4531881586018364e-05,
      "loss": 0.7963,
      "step": 632080
    },
    {
      "epoch": 2.2153178633637896,
      "grad_norm": 2.78125,
      "learning_rate": 1.4531232557354662e-05,
      "loss": 0.7962,
      "step": 632090
    },
    {
      "epoch": 2.2153529108706853,
      "grad_norm": 3.0,
      "learning_rate": 1.453058352869096e-05,
      "loss": 0.769,
      "step": 632100
    },
    {
      "epoch": 2.2153879583775806,
      "grad_norm": 3.265625,
      "learning_rate": 1.452993450002726e-05,
      "loss": 0.857,
      "step": 632110
    },
    {
      "epoch": 2.2154230058844764,
      "grad_norm": 2.90625,
      "learning_rate": 1.4529285471363558e-05,
      "loss": 0.7951,
      "step": 632120
    },
    {
      "epoch": 2.215458053391372,
      "grad_norm": 3.0,
      "learning_rate": 1.4528636442699856e-05,
      "loss": 0.8191,
      "step": 632130
    },
    {
      "epoch": 2.2154931008982675,
      "grad_norm": 2.90625,
      "learning_rate": 1.4527987414036154e-05,
      "loss": 0.8655,
      "step": 632140
    },
    {
      "epoch": 2.215528148405163,
      "grad_norm": 3.03125,
      "learning_rate": 1.4527338385372452e-05,
      "loss": 0.8746,
      "step": 632150
    },
    {
      "epoch": 2.215563195912059,
      "grad_norm": 3.0,
      "learning_rate": 1.452668935670875e-05,
      "loss": 0.7581,
      "step": 632160
    },
    {
      "epoch": 2.2155982434189543,
      "grad_norm": 3.234375,
      "learning_rate": 1.4526040328045048e-05,
      "loss": 0.7728,
      "step": 632170
    },
    {
      "epoch": 2.21563329092585,
      "grad_norm": 2.828125,
      "learning_rate": 1.4525391299381348e-05,
      "loss": 0.811,
      "step": 632180
    },
    {
      "epoch": 2.215668338432746,
      "grad_norm": 3.28125,
      "learning_rate": 1.4524742270717646e-05,
      "loss": 0.8411,
      "step": 632190
    },
    {
      "epoch": 2.215703385939641,
      "grad_norm": 3.125,
      "learning_rate": 1.4524093242053944e-05,
      "loss": 0.8409,
      "step": 632200
    },
    {
      "epoch": 2.215738433446537,
      "grad_norm": 2.875,
      "learning_rate": 1.4523444213390242e-05,
      "loss": 0.7787,
      "step": 632210
    },
    {
      "epoch": 2.215773480953432,
      "grad_norm": 2.96875,
      "learning_rate": 1.452279518472654e-05,
      "loss": 0.8094,
      "step": 632220
    },
    {
      "epoch": 2.215808528460328,
      "grad_norm": 2.75,
      "learning_rate": 1.4522146156062838e-05,
      "loss": 0.8467,
      "step": 632230
    },
    {
      "epoch": 2.2158435759672237,
      "grad_norm": 2.921875,
      "learning_rate": 1.4521497127399136e-05,
      "loss": 0.7512,
      "step": 632240
    },
    {
      "epoch": 2.215878623474119,
      "grad_norm": 2.875,
      "learning_rate": 1.4520848098735435e-05,
      "loss": 0.8218,
      "step": 632250
    },
    {
      "epoch": 2.2159136709810148,
      "grad_norm": 2.59375,
      "learning_rate": 1.452019907007173e-05,
      "loss": 0.7708,
      "step": 632260
    },
    {
      "epoch": 2.2159487184879105,
      "grad_norm": 2.796875,
      "learning_rate": 1.4519550041408028e-05,
      "loss": 0.7414,
      "step": 632270
    },
    {
      "epoch": 2.215983765994806,
      "grad_norm": 2.78125,
      "learning_rate": 1.4518901012744326e-05,
      "loss": 0.8609,
      "step": 632280
    },
    {
      "epoch": 2.2160188135017016,
      "grad_norm": 2.40625,
      "learning_rate": 1.4518251984080626e-05,
      "loss": 0.7867,
      "step": 632290
    },
    {
      "epoch": 2.2160538610085974,
      "grad_norm": 2.875,
      "learning_rate": 1.4517602955416924e-05,
      "loss": 0.7508,
      "step": 632300
    },
    {
      "epoch": 2.2160889085154927,
      "grad_norm": 3.171875,
      "learning_rate": 1.4516953926753222e-05,
      "loss": 0.848,
      "step": 632310
    },
    {
      "epoch": 2.2161239560223884,
      "grad_norm": 2.65625,
      "learning_rate": 1.451630489808952e-05,
      "loss": 0.8515,
      "step": 632320
    },
    {
      "epoch": 2.2161590035292837,
      "grad_norm": 2.796875,
      "learning_rate": 1.4515655869425818e-05,
      "loss": 0.6968,
      "step": 632330
    },
    {
      "epoch": 2.2161940510361795,
      "grad_norm": 2.453125,
      "learning_rate": 1.4515006840762116e-05,
      "loss": 0.8807,
      "step": 632340
    },
    {
      "epoch": 2.2162290985430753,
      "grad_norm": 2.96875,
      "learning_rate": 1.4514357812098414e-05,
      "loss": 0.7952,
      "step": 632350
    },
    {
      "epoch": 2.2162641460499706,
      "grad_norm": 2.875,
      "learning_rate": 1.4513708783434713e-05,
      "loss": 0.7629,
      "step": 632360
    },
    {
      "epoch": 2.2162991935568663,
      "grad_norm": 2.90625,
      "learning_rate": 1.4513059754771011e-05,
      "loss": 0.7749,
      "step": 632370
    },
    {
      "epoch": 2.216334241063762,
      "grad_norm": 3.140625,
      "learning_rate": 1.451241072610731e-05,
      "loss": 0.8264,
      "step": 632380
    },
    {
      "epoch": 2.2163692885706574,
      "grad_norm": 3.0,
      "learning_rate": 1.4511761697443607e-05,
      "loss": 0.9125,
      "step": 632390
    },
    {
      "epoch": 2.216404336077553,
      "grad_norm": 3.125,
      "learning_rate": 1.4511112668779905e-05,
      "loss": 0.8613,
      "step": 632400
    },
    {
      "epoch": 2.216439383584449,
      "grad_norm": 2.671875,
      "learning_rate": 1.4510463640116203e-05,
      "loss": 0.8353,
      "step": 632410
    },
    {
      "epoch": 2.2164744310913442,
      "grad_norm": 2.734375,
      "learning_rate": 1.4509814611452501e-05,
      "loss": 0.8046,
      "step": 632420
    },
    {
      "epoch": 2.21650947859824,
      "grad_norm": 3.140625,
      "learning_rate": 1.4509165582788801e-05,
      "loss": 0.8438,
      "step": 632430
    },
    {
      "epoch": 2.2165445261051353,
      "grad_norm": 2.953125,
      "learning_rate": 1.4508516554125099e-05,
      "loss": 0.8013,
      "step": 632440
    },
    {
      "epoch": 2.216579573612031,
      "grad_norm": 3.3125,
      "learning_rate": 1.4507867525461394e-05,
      "loss": 0.8168,
      "step": 632450
    },
    {
      "epoch": 2.216614621118927,
      "grad_norm": 2.75,
      "learning_rate": 1.4507218496797692e-05,
      "loss": 0.7444,
      "step": 632460
    },
    {
      "epoch": 2.216649668625822,
      "grad_norm": 2.578125,
      "learning_rate": 1.450656946813399e-05,
      "loss": 0.7654,
      "step": 632470
    },
    {
      "epoch": 2.216684716132718,
      "grad_norm": 2.765625,
      "learning_rate": 1.450592043947029e-05,
      "loss": 0.7834,
      "step": 632480
    },
    {
      "epoch": 2.2167197636396136,
      "grad_norm": 3.21875,
      "learning_rate": 1.4505271410806587e-05,
      "loss": 0.841,
      "step": 632490
    },
    {
      "epoch": 2.216754811146509,
      "grad_norm": 2.84375,
      "learning_rate": 1.4504622382142885e-05,
      "loss": 0.7842,
      "step": 632500
    },
    {
      "epoch": 2.2167898586534047,
      "grad_norm": 2.59375,
      "learning_rate": 1.4503973353479183e-05,
      "loss": 0.7527,
      "step": 632510
    },
    {
      "epoch": 2.2168249061603005,
      "grad_norm": 2.96875,
      "learning_rate": 1.4503324324815481e-05,
      "loss": 0.7979,
      "step": 632520
    },
    {
      "epoch": 2.216859953667196,
      "grad_norm": 2.859375,
      "learning_rate": 1.450267529615178e-05,
      "loss": 0.831,
      "step": 632530
    },
    {
      "epoch": 2.2168950011740916,
      "grad_norm": 2.921875,
      "learning_rate": 1.4502026267488077e-05,
      "loss": 0.7967,
      "step": 632540
    },
    {
      "epoch": 2.216930048680987,
      "grad_norm": 2.671875,
      "learning_rate": 1.4501377238824377e-05,
      "loss": 0.8047,
      "step": 632550
    },
    {
      "epoch": 2.2169650961878826,
      "grad_norm": 3.28125,
      "learning_rate": 1.4500728210160675e-05,
      "loss": 0.8094,
      "step": 632560
    },
    {
      "epoch": 2.2170001436947784,
      "grad_norm": 3.21875,
      "learning_rate": 1.4500079181496973e-05,
      "loss": 0.7765,
      "step": 632570
    },
    {
      "epoch": 2.2170351912016737,
      "grad_norm": 3.140625,
      "learning_rate": 1.4499430152833271e-05,
      "loss": 0.7767,
      "step": 632580
    },
    {
      "epoch": 2.2170702387085695,
      "grad_norm": 2.546875,
      "learning_rate": 1.4498781124169569e-05,
      "loss": 0.7692,
      "step": 632590
    },
    {
      "epoch": 2.217105286215465,
      "grad_norm": 2.9375,
      "learning_rate": 1.4498132095505867e-05,
      "loss": 0.8601,
      "step": 632600
    },
    {
      "epoch": 2.2171403337223605,
      "grad_norm": 2.828125,
      "learning_rate": 1.4497483066842165e-05,
      "loss": 0.7535,
      "step": 632610
    },
    {
      "epoch": 2.2171753812292563,
      "grad_norm": 2.875,
      "learning_rate": 1.4496834038178465e-05,
      "loss": 0.7807,
      "step": 632620
    },
    {
      "epoch": 2.217210428736152,
      "grad_norm": 2.703125,
      "learning_rate": 1.4496185009514763e-05,
      "loss": 0.8904,
      "step": 632630
    },
    {
      "epoch": 2.2172454762430474,
      "grad_norm": 2.90625,
      "learning_rate": 1.4495535980851057e-05,
      "loss": 0.784,
      "step": 632640
    },
    {
      "epoch": 2.217280523749943,
      "grad_norm": 2.75,
      "learning_rate": 1.4494886952187355e-05,
      "loss": 0.8085,
      "step": 632650
    },
    {
      "epoch": 2.2173155712568384,
      "grad_norm": 2.625,
      "learning_rate": 1.4494237923523655e-05,
      "loss": 0.807,
      "step": 632660
    },
    {
      "epoch": 2.217350618763734,
      "grad_norm": 2.90625,
      "learning_rate": 1.4493588894859953e-05,
      "loss": 0.8154,
      "step": 632670
    },
    {
      "epoch": 2.21738566627063,
      "grad_norm": 3.21875,
      "learning_rate": 1.4492939866196251e-05,
      "loss": 0.6958,
      "step": 632680
    },
    {
      "epoch": 2.2174207137775253,
      "grad_norm": 3.25,
      "learning_rate": 1.4492290837532549e-05,
      "loss": 0.8332,
      "step": 632690
    },
    {
      "epoch": 2.217455761284421,
      "grad_norm": 3.0,
      "learning_rate": 1.4491641808868847e-05,
      "loss": 0.8746,
      "step": 632700
    },
    {
      "epoch": 2.2174908087913168,
      "grad_norm": 2.6875,
      "learning_rate": 1.4490992780205145e-05,
      "loss": 0.7614,
      "step": 632710
    },
    {
      "epoch": 2.217525856298212,
      "grad_norm": 2.6875,
      "learning_rate": 1.4490343751541443e-05,
      "loss": 0.8439,
      "step": 632720
    },
    {
      "epoch": 2.217560903805108,
      "grad_norm": 2.578125,
      "learning_rate": 1.4489694722877743e-05,
      "loss": 0.7234,
      "step": 632730
    },
    {
      "epoch": 2.2175959513120036,
      "grad_norm": 3.203125,
      "learning_rate": 1.448904569421404e-05,
      "loss": 0.7662,
      "step": 632740
    },
    {
      "epoch": 2.217630998818899,
      "grad_norm": 3.0,
      "learning_rate": 1.4488396665550339e-05,
      "loss": 0.8693,
      "step": 632750
    },
    {
      "epoch": 2.2176660463257947,
      "grad_norm": 2.8125,
      "learning_rate": 1.4487747636886637e-05,
      "loss": 0.835,
      "step": 632760
    },
    {
      "epoch": 2.21770109383269,
      "grad_norm": 9.6875,
      "learning_rate": 1.4487098608222935e-05,
      "loss": 0.7876,
      "step": 632770
    },
    {
      "epoch": 2.2177361413395857,
      "grad_norm": 3.125,
      "learning_rate": 1.4486449579559233e-05,
      "loss": 0.8083,
      "step": 632780
    },
    {
      "epoch": 2.2177711888464815,
      "grad_norm": 2.796875,
      "learning_rate": 1.448580055089553e-05,
      "loss": 0.7921,
      "step": 632790
    },
    {
      "epoch": 2.217806236353377,
      "grad_norm": 2.296875,
      "learning_rate": 1.448515152223183e-05,
      "loss": 0.8448,
      "step": 632800
    },
    {
      "epoch": 2.2178412838602726,
      "grad_norm": 3.265625,
      "learning_rate": 1.4484502493568128e-05,
      "loss": 0.8826,
      "step": 632810
    },
    {
      "epoch": 2.2178763313671683,
      "grad_norm": 3.046875,
      "learning_rate": 1.4483853464904426e-05,
      "loss": 0.9185,
      "step": 632820
    },
    {
      "epoch": 2.2179113788740636,
      "grad_norm": 2.609375,
      "learning_rate": 1.4483204436240721e-05,
      "loss": 0.7807,
      "step": 632830
    },
    {
      "epoch": 2.2179464263809594,
      "grad_norm": 2.921875,
      "learning_rate": 1.448255540757702e-05,
      "loss": 0.903,
      "step": 632840
    },
    {
      "epoch": 2.217981473887855,
      "grad_norm": 3.21875,
      "learning_rate": 1.4481906378913319e-05,
      "loss": 0.7703,
      "step": 632850
    },
    {
      "epoch": 2.2180165213947505,
      "grad_norm": 3.09375,
      "learning_rate": 1.4481257350249617e-05,
      "loss": 0.9209,
      "step": 632860
    },
    {
      "epoch": 2.2180515689016462,
      "grad_norm": 3.078125,
      "learning_rate": 1.4480608321585915e-05,
      "loss": 0.7929,
      "step": 632870
    },
    {
      "epoch": 2.2180866164085415,
      "grad_norm": 2.875,
      "learning_rate": 1.4479959292922213e-05,
      "loss": 0.8096,
      "step": 632880
    },
    {
      "epoch": 2.2181216639154373,
      "grad_norm": 2.9375,
      "learning_rate": 1.447931026425851e-05,
      "loss": 0.7841,
      "step": 632890
    },
    {
      "epoch": 2.218156711422333,
      "grad_norm": 2.953125,
      "learning_rate": 1.4478661235594809e-05,
      "loss": 0.7944,
      "step": 632900
    },
    {
      "epoch": 2.2181917589292284,
      "grad_norm": 3.0,
      "learning_rate": 1.4478012206931108e-05,
      "loss": 0.8203,
      "step": 632910
    },
    {
      "epoch": 2.218226806436124,
      "grad_norm": 3.3125,
      "learning_rate": 1.4477363178267406e-05,
      "loss": 0.9241,
      "step": 632920
    },
    {
      "epoch": 2.21826185394302,
      "grad_norm": 2.640625,
      "learning_rate": 1.4476714149603704e-05,
      "loss": 0.8419,
      "step": 632930
    },
    {
      "epoch": 2.218296901449915,
      "grad_norm": 3.140625,
      "learning_rate": 1.4476065120940002e-05,
      "loss": 0.835,
      "step": 632940
    },
    {
      "epoch": 2.218331948956811,
      "grad_norm": 2.546875,
      "learning_rate": 1.44754160922763e-05,
      "loss": 0.77,
      "step": 632950
    },
    {
      "epoch": 2.2183669964637067,
      "grad_norm": 2.453125,
      "learning_rate": 1.4474767063612598e-05,
      "loss": 0.803,
      "step": 632960
    },
    {
      "epoch": 2.218402043970602,
      "grad_norm": 2.84375,
      "learning_rate": 1.4474118034948896e-05,
      "loss": 0.8259,
      "step": 632970
    },
    {
      "epoch": 2.218437091477498,
      "grad_norm": 2.390625,
      "learning_rate": 1.4473469006285196e-05,
      "loss": 0.7756,
      "step": 632980
    },
    {
      "epoch": 2.2184721389843935,
      "grad_norm": 2.671875,
      "learning_rate": 1.4472819977621494e-05,
      "loss": 0.7442,
      "step": 632990
    },
    {
      "epoch": 2.218507186491289,
      "grad_norm": 2.953125,
      "learning_rate": 1.4472170948957792e-05,
      "loss": 0.7825,
      "step": 633000
    },
    {
      "epoch": 2.2185422339981846,
      "grad_norm": 2.421875,
      "learning_rate": 1.447152192029409e-05,
      "loss": 0.855,
      "step": 633010
    },
    {
      "epoch": 2.21857728150508,
      "grad_norm": 3.125,
      "learning_rate": 1.4470872891630385e-05,
      "loss": 0.8541,
      "step": 633020
    },
    {
      "epoch": 2.2186123290119757,
      "grad_norm": 2.640625,
      "learning_rate": 1.4470223862966684e-05,
      "loss": 0.8272,
      "step": 633030
    },
    {
      "epoch": 2.2186473765188715,
      "grad_norm": 2.859375,
      "learning_rate": 1.4469574834302982e-05,
      "loss": 0.7758,
      "step": 633040
    },
    {
      "epoch": 2.2186824240257668,
      "grad_norm": 2.84375,
      "learning_rate": 1.446892580563928e-05,
      "loss": 0.886,
      "step": 633050
    },
    {
      "epoch": 2.2187174715326625,
      "grad_norm": 2.75,
      "learning_rate": 1.4468276776975578e-05,
      "loss": 0.7325,
      "step": 633060
    },
    {
      "epoch": 2.2187525190395583,
      "grad_norm": 2.765625,
      "learning_rate": 1.4467627748311876e-05,
      "loss": 0.8526,
      "step": 633070
    },
    {
      "epoch": 2.2187875665464536,
      "grad_norm": 3.1875,
      "learning_rate": 1.4466978719648174e-05,
      "loss": 0.8201,
      "step": 633080
    },
    {
      "epoch": 2.2188226140533494,
      "grad_norm": 2.765625,
      "learning_rate": 1.4466329690984472e-05,
      "loss": 0.7769,
      "step": 633090
    },
    {
      "epoch": 2.218857661560245,
      "grad_norm": 3.171875,
      "learning_rate": 1.4465680662320772e-05,
      "loss": 0.7832,
      "step": 633100
    },
    {
      "epoch": 2.2188927090671404,
      "grad_norm": 3.265625,
      "learning_rate": 1.446503163365707e-05,
      "loss": 0.8625,
      "step": 633110
    },
    {
      "epoch": 2.218927756574036,
      "grad_norm": 2.890625,
      "learning_rate": 1.4464382604993368e-05,
      "loss": 0.8209,
      "step": 633120
    },
    {
      "epoch": 2.218962804080932,
      "grad_norm": 2.5,
      "learning_rate": 1.4463733576329666e-05,
      "loss": 0.8557,
      "step": 633130
    },
    {
      "epoch": 2.2189978515878273,
      "grad_norm": 2.796875,
      "learning_rate": 1.4463084547665964e-05,
      "loss": 0.8502,
      "step": 633140
    },
    {
      "epoch": 2.219032899094723,
      "grad_norm": 3.125,
      "learning_rate": 1.4462435519002262e-05,
      "loss": 0.7447,
      "step": 633150
    },
    {
      "epoch": 2.2190679466016183,
      "grad_norm": 3.125,
      "learning_rate": 1.446178649033856e-05,
      "loss": 0.7732,
      "step": 633160
    },
    {
      "epoch": 2.219102994108514,
      "grad_norm": 3.015625,
      "learning_rate": 1.446113746167486e-05,
      "loss": 0.7668,
      "step": 633170
    },
    {
      "epoch": 2.21913804161541,
      "grad_norm": 2.875,
      "learning_rate": 1.4460488433011158e-05,
      "loss": 0.7855,
      "step": 633180
    },
    {
      "epoch": 2.219173089122305,
      "grad_norm": 2.953125,
      "learning_rate": 1.4459839404347456e-05,
      "loss": 0.8716,
      "step": 633190
    },
    {
      "epoch": 2.219208136629201,
      "grad_norm": 2.953125,
      "learning_rate": 1.445919037568375e-05,
      "loss": 0.7568,
      "step": 633200
    },
    {
      "epoch": 2.2192431841360967,
      "grad_norm": 3.015625,
      "learning_rate": 1.445854134702005e-05,
      "loss": 0.8409,
      "step": 633210
    },
    {
      "epoch": 2.219278231642992,
      "grad_norm": 2.765625,
      "learning_rate": 1.4457892318356348e-05,
      "loss": 0.8134,
      "step": 633220
    },
    {
      "epoch": 2.2193132791498877,
      "grad_norm": 2.765625,
      "learning_rate": 1.4457243289692646e-05,
      "loss": 0.8024,
      "step": 633230
    },
    {
      "epoch": 2.2193483266567835,
      "grad_norm": 2.828125,
      "learning_rate": 1.4456594261028944e-05,
      "loss": 0.8043,
      "step": 633240
    },
    {
      "epoch": 2.219383374163679,
      "grad_norm": 2.5,
      "learning_rate": 1.4455945232365242e-05,
      "loss": 0.8575,
      "step": 633250
    },
    {
      "epoch": 2.2194184216705746,
      "grad_norm": 3.125,
      "learning_rate": 1.445529620370154e-05,
      "loss": 0.7662,
      "step": 633260
    },
    {
      "epoch": 2.21945346917747,
      "grad_norm": 2.65625,
      "learning_rate": 1.4454647175037838e-05,
      "loss": 0.7717,
      "step": 633270
    },
    {
      "epoch": 2.2194885166843656,
      "grad_norm": 3.53125,
      "learning_rate": 1.4453998146374138e-05,
      "loss": 0.838,
      "step": 633280
    },
    {
      "epoch": 2.2195235641912614,
      "grad_norm": 3.453125,
      "learning_rate": 1.4453349117710436e-05,
      "loss": 0.8307,
      "step": 633290
    },
    {
      "epoch": 2.2195586116981567,
      "grad_norm": 2.796875,
      "learning_rate": 1.4452700089046734e-05,
      "loss": 0.8083,
      "step": 633300
    },
    {
      "epoch": 2.2195936592050525,
      "grad_norm": 3.15625,
      "learning_rate": 1.4452051060383032e-05,
      "loss": 0.7714,
      "step": 633310
    },
    {
      "epoch": 2.2196287067119482,
      "grad_norm": 3.234375,
      "learning_rate": 1.445140203171933e-05,
      "loss": 0.8303,
      "step": 633320
    },
    {
      "epoch": 2.2196637542188435,
      "grad_norm": 3.390625,
      "learning_rate": 1.4450753003055628e-05,
      "loss": 0.7632,
      "step": 633330
    },
    {
      "epoch": 2.2196988017257393,
      "grad_norm": 3.03125,
      "learning_rate": 1.4450103974391926e-05,
      "loss": 0.8372,
      "step": 633340
    },
    {
      "epoch": 2.219733849232635,
      "grad_norm": 2.890625,
      "learning_rate": 1.4449454945728225e-05,
      "loss": 0.8445,
      "step": 633350
    },
    {
      "epoch": 2.2197688967395304,
      "grad_norm": 3.328125,
      "learning_rate": 1.4448805917064523e-05,
      "loss": 0.8947,
      "step": 633360
    },
    {
      "epoch": 2.219803944246426,
      "grad_norm": 2.515625,
      "learning_rate": 1.4448156888400821e-05,
      "loss": 0.7539,
      "step": 633370
    },
    {
      "epoch": 2.2198389917533214,
      "grad_norm": 2.84375,
      "learning_rate": 1.444750785973712e-05,
      "loss": 0.8103,
      "step": 633380
    },
    {
      "epoch": 2.219874039260217,
      "grad_norm": 2.984375,
      "learning_rate": 1.4446858831073416e-05,
      "loss": 0.7877,
      "step": 633390
    },
    {
      "epoch": 2.219909086767113,
      "grad_norm": 3.140625,
      "learning_rate": 1.4446209802409714e-05,
      "loss": 0.8541,
      "step": 633400
    },
    {
      "epoch": 2.2199441342740083,
      "grad_norm": 3.203125,
      "learning_rate": 1.4445560773746012e-05,
      "loss": 0.839,
      "step": 633410
    },
    {
      "epoch": 2.219979181780904,
      "grad_norm": 3.046875,
      "learning_rate": 1.444491174508231e-05,
      "loss": 0.8347,
      "step": 633420
    },
    {
      "epoch": 2.2200142292878,
      "grad_norm": 2.890625,
      "learning_rate": 1.4444262716418608e-05,
      "loss": 0.7239,
      "step": 633430
    },
    {
      "epoch": 2.220049276794695,
      "grad_norm": 2.984375,
      "learning_rate": 1.4443613687754906e-05,
      "loss": 0.7304,
      "step": 633440
    },
    {
      "epoch": 2.220084324301591,
      "grad_norm": 2.6875,
      "learning_rate": 1.4442964659091204e-05,
      "loss": 0.8407,
      "step": 633450
    },
    {
      "epoch": 2.2201193718084866,
      "grad_norm": 3.21875,
      "learning_rate": 1.4442315630427503e-05,
      "loss": 0.8859,
      "step": 633460
    },
    {
      "epoch": 2.220154419315382,
      "grad_norm": 3.1875,
      "learning_rate": 1.4441666601763801e-05,
      "loss": 0.9057,
      "step": 633470
    },
    {
      "epoch": 2.2201894668222777,
      "grad_norm": 2.9375,
      "learning_rate": 1.44410175731001e-05,
      "loss": 0.8192,
      "step": 633480
    },
    {
      "epoch": 2.220224514329173,
      "grad_norm": 3.046875,
      "learning_rate": 1.4440368544436397e-05,
      "loss": 0.8084,
      "step": 633490
    },
    {
      "epoch": 2.2202595618360688,
      "grad_norm": 3.046875,
      "learning_rate": 1.4439719515772695e-05,
      "loss": 0.8534,
      "step": 633500
    },
    {
      "epoch": 2.2202946093429645,
      "grad_norm": 2.96875,
      "learning_rate": 1.4439070487108993e-05,
      "loss": 0.8615,
      "step": 633510
    },
    {
      "epoch": 2.22032965684986,
      "grad_norm": 3.1875,
      "learning_rate": 1.4438421458445291e-05,
      "loss": 0.8581,
      "step": 633520
    },
    {
      "epoch": 2.2203647043567556,
      "grad_norm": 3.09375,
      "learning_rate": 1.4437772429781591e-05,
      "loss": 0.7987,
      "step": 633530
    },
    {
      "epoch": 2.2203997518636513,
      "grad_norm": 3.125,
      "learning_rate": 1.4437123401117889e-05,
      "loss": 0.8114,
      "step": 633540
    },
    {
      "epoch": 2.2204347993705467,
      "grad_norm": 2.96875,
      "learning_rate": 1.4436474372454187e-05,
      "loss": 0.7994,
      "step": 633550
    },
    {
      "epoch": 2.2204698468774424,
      "grad_norm": 2.703125,
      "learning_rate": 1.4435825343790485e-05,
      "loss": 0.7494,
      "step": 633560
    },
    {
      "epoch": 2.220504894384338,
      "grad_norm": 3.15625,
      "learning_rate": 1.4435176315126783e-05,
      "loss": 0.7533,
      "step": 633570
    },
    {
      "epoch": 2.2205399418912335,
      "grad_norm": 3.140625,
      "learning_rate": 1.443452728646308e-05,
      "loss": 0.8478,
      "step": 633580
    },
    {
      "epoch": 2.2205749893981293,
      "grad_norm": 2.5,
      "learning_rate": 1.4433878257799377e-05,
      "loss": 0.8714,
      "step": 633590
    },
    {
      "epoch": 2.2206100369050246,
      "grad_norm": 2.859375,
      "learning_rate": 1.4433229229135675e-05,
      "loss": 0.8195,
      "step": 633600
    },
    {
      "epoch": 2.2206450844119203,
      "grad_norm": 2.796875,
      "learning_rate": 1.4432580200471973e-05,
      "loss": 0.844,
      "step": 633610
    },
    {
      "epoch": 2.220680131918816,
      "grad_norm": 2.90625,
      "learning_rate": 1.4431931171808271e-05,
      "loss": 0.8976,
      "step": 633620
    },
    {
      "epoch": 2.2207151794257114,
      "grad_norm": 2.734375,
      "learning_rate": 1.443128214314457e-05,
      "loss": 0.8395,
      "step": 633630
    },
    {
      "epoch": 2.220750226932607,
      "grad_norm": 2.734375,
      "learning_rate": 1.4430633114480867e-05,
      "loss": 0.7826,
      "step": 633640
    },
    {
      "epoch": 2.220785274439503,
      "grad_norm": 2.6875,
      "learning_rate": 1.4429984085817167e-05,
      "loss": 0.8391,
      "step": 633650
    },
    {
      "epoch": 2.2208203219463982,
      "grad_norm": 3.390625,
      "learning_rate": 1.4429335057153465e-05,
      "loss": 0.7842,
      "step": 633660
    },
    {
      "epoch": 2.220855369453294,
      "grad_norm": 3.1875,
      "learning_rate": 1.4428686028489763e-05,
      "loss": 0.821,
      "step": 633670
    },
    {
      "epoch": 2.2208904169601897,
      "grad_norm": 2.03125,
      "learning_rate": 1.4428036999826061e-05,
      "loss": 0.7778,
      "step": 633680
    },
    {
      "epoch": 2.220925464467085,
      "grad_norm": 2.703125,
      "learning_rate": 1.4427387971162359e-05,
      "loss": 0.8127,
      "step": 633690
    },
    {
      "epoch": 2.220960511973981,
      "grad_norm": 2.875,
      "learning_rate": 1.4426738942498657e-05,
      "loss": 0.8516,
      "step": 633700
    },
    {
      "epoch": 2.220995559480876,
      "grad_norm": 3.15625,
      "learning_rate": 1.4426089913834955e-05,
      "loss": 0.8297,
      "step": 633710
    },
    {
      "epoch": 2.221030606987772,
      "grad_norm": 2.359375,
      "learning_rate": 1.4425440885171255e-05,
      "loss": 0.7301,
      "step": 633720
    },
    {
      "epoch": 2.2210656544946676,
      "grad_norm": 3.1875,
      "learning_rate": 1.4424791856507553e-05,
      "loss": 0.7567,
      "step": 633730
    },
    {
      "epoch": 2.221100702001563,
      "grad_norm": 3.203125,
      "learning_rate": 1.442414282784385e-05,
      "loss": 0.926,
      "step": 633740
    },
    {
      "epoch": 2.2211357495084587,
      "grad_norm": 2.953125,
      "learning_rate": 1.4423493799180149e-05,
      "loss": 0.7633,
      "step": 633750
    },
    {
      "epoch": 2.2211707970153545,
      "grad_norm": 3.03125,
      "learning_rate": 1.4422844770516447e-05,
      "loss": 0.8253,
      "step": 633760
    },
    {
      "epoch": 2.22120584452225,
      "grad_norm": 3.046875,
      "learning_rate": 1.4422195741852743e-05,
      "loss": 0.8038,
      "step": 633770
    },
    {
      "epoch": 2.2212408920291455,
      "grad_norm": 2.484375,
      "learning_rate": 1.4421546713189041e-05,
      "loss": 0.7649,
      "step": 633780
    },
    {
      "epoch": 2.2212759395360413,
      "grad_norm": 2.609375,
      "learning_rate": 1.4420897684525339e-05,
      "loss": 0.7673,
      "step": 633790
    },
    {
      "epoch": 2.2213109870429366,
      "grad_norm": 3.046875,
      "learning_rate": 1.4420248655861637e-05,
      "loss": 0.7632,
      "step": 633800
    },
    {
      "epoch": 2.2213460345498324,
      "grad_norm": 3.296875,
      "learning_rate": 1.4419599627197935e-05,
      "loss": 0.7285,
      "step": 633810
    },
    {
      "epoch": 2.2213810820567277,
      "grad_norm": 2.703125,
      "learning_rate": 1.4418950598534233e-05,
      "loss": 0.7928,
      "step": 633820
    },
    {
      "epoch": 2.2214161295636234,
      "grad_norm": 2.96875,
      "learning_rate": 1.4418301569870533e-05,
      "loss": 0.7365,
      "step": 633830
    },
    {
      "epoch": 2.221451177070519,
      "grad_norm": 3.359375,
      "learning_rate": 1.441765254120683e-05,
      "loss": 0.7877,
      "step": 633840
    },
    {
      "epoch": 2.2214862245774145,
      "grad_norm": 2.953125,
      "learning_rate": 1.4417003512543129e-05,
      "loss": 0.8601,
      "step": 633850
    },
    {
      "epoch": 2.2215212720843103,
      "grad_norm": 3.0,
      "learning_rate": 1.4416354483879427e-05,
      "loss": 0.8173,
      "step": 633860
    },
    {
      "epoch": 2.221556319591206,
      "grad_norm": 2.8125,
      "learning_rate": 1.4415705455215725e-05,
      "loss": 0.7722,
      "step": 633870
    },
    {
      "epoch": 2.2215913670981013,
      "grad_norm": 3.140625,
      "learning_rate": 1.4415056426552023e-05,
      "loss": 0.8239,
      "step": 633880
    },
    {
      "epoch": 2.221626414604997,
      "grad_norm": 2.84375,
      "learning_rate": 1.441440739788832e-05,
      "loss": 0.8271,
      "step": 633890
    },
    {
      "epoch": 2.221661462111893,
      "grad_norm": 2.8125,
      "learning_rate": 1.441375836922462e-05,
      "loss": 0.8623,
      "step": 633900
    },
    {
      "epoch": 2.221696509618788,
      "grad_norm": 2.625,
      "learning_rate": 1.4413109340560918e-05,
      "loss": 0.8041,
      "step": 633910
    },
    {
      "epoch": 2.221731557125684,
      "grad_norm": 2.859375,
      "learning_rate": 1.4412460311897216e-05,
      "loss": 0.8441,
      "step": 633920
    },
    {
      "epoch": 2.2217666046325792,
      "grad_norm": 2.921875,
      "learning_rate": 1.4411811283233514e-05,
      "loss": 0.7831,
      "step": 633930
    },
    {
      "epoch": 2.221801652139475,
      "grad_norm": 2.9375,
      "learning_rate": 1.4411162254569812e-05,
      "loss": 0.797,
      "step": 633940
    },
    {
      "epoch": 2.2218366996463708,
      "grad_norm": 3.296875,
      "learning_rate": 1.441051322590611e-05,
      "loss": 0.8442,
      "step": 633950
    },
    {
      "epoch": 2.221871747153266,
      "grad_norm": 2.5625,
      "learning_rate": 1.4409864197242407e-05,
      "loss": 0.8347,
      "step": 633960
    },
    {
      "epoch": 2.221906794660162,
      "grad_norm": 2.53125,
      "learning_rate": 1.4409215168578705e-05,
      "loss": 0.764,
      "step": 633970
    },
    {
      "epoch": 2.2219418421670576,
      "grad_norm": 2.71875,
      "learning_rate": 1.4408566139915003e-05,
      "loss": 0.8246,
      "step": 633980
    },
    {
      "epoch": 2.221976889673953,
      "grad_norm": 2.78125,
      "learning_rate": 1.44079171112513e-05,
      "loss": 0.8257,
      "step": 633990
    },
    {
      "epoch": 2.2220119371808487,
      "grad_norm": 2.5625,
      "learning_rate": 1.4407268082587599e-05,
      "loss": 0.7839,
      "step": 634000
    },
    {
      "epoch": 2.2220469846877444,
      "grad_norm": 2.65625,
      "learning_rate": 1.4406619053923898e-05,
      "loss": 0.7839,
      "step": 634010
    },
    {
      "epoch": 2.2220820321946397,
      "grad_norm": 3.265625,
      "learning_rate": 1.4405970025260196e-05,
      "loss": 0.8166,
      "step": 634020
    },
    {
      "epoch": 2.2221170797015355,
      "grad_norm": 3.296875,
      "learning_rate": 1.4405320996596494e-05,
      "loss": 0.8466,
      "step": 634030
    },
    {
      "epoch": 2.222152127208431,
      "grad_norm": 2.859375,
      "learning_rate": 1.4404671967932792e-05,
      "loss": 0.8338,
      "step": 634040
    },
    {
      "epoch": 2.2221871747153266,
      "grad_norm": 3.15625,
      "learning_rate": 1.440402293926909e-05,
      "loss": 0.7552,
      "step": 634050
    },
    {
      "epoch": 2.2222222222222223,
      "grad_norm": 2.6875,
      "learning_rate": 1.4403373910605388e-05,
      "loss": 0.8187,
      "step": 634060
    },
    {
      "epoch": 2.2222572697291176,
      "grad_norm": 2.609375,
      "learning_rate": 1.4402724881941686e-05,
      "loss": 0.7882,
      "step": 634070
    },
    {
      "epoch": 2.2222923172360134,
      "grad_norm": 3.421875,
      "learning_rate": 1.4402075853277986e-05,
      "loss": 0.8397,
      "step": 634080
    },
    {
      "epoch": 2.222327364742909,
      "grad_norm": 3.03125,
      "learning_rate": 1.4401426824614284e-05,
      "loss": 0.86,
      "step": 634090
    },
    {
      "epoch": 2.2223624122498045,
      "grad_norm": 2.765625,
      "learning_rate": 1.4400777795950582e-05,
      "loss": 0.8113,
      "step": 634100
    },
    {
      "epoch": 2.2223974597567,
      "grad_norm": 2.828125,
      "learning_rate": 1.440012876728688e-05,
      "loss": 0.7713,
      "step": 634110
    },
    {
      "epoch": 2.222432507263596,
      "grad_norm": 2.890625,
      "learning_rate": 1.4399479738623178e-05,
      "loss": 0.7472,
      "step": 634120
    },
    {
      "epoch": 2.2224675547704913,
      "grad_norm": 3.25,
      "learning_rate": 1.4398830709959476e-05,
      "loss": 0.7956,
      "step": 634130
    },
    {
      "epoch": 2.222502602277387,
      "grad_norm": 2.78125,
      "learning_rate": 1.4398181681295772e-05,
      "loss": 0.8339,
      "step": 634140
    },
    {
      "epoch": 2.2225376497842824,
      "grad_norm": 2.359375,
      "learning_rate": 1.439753265263207e-05,
      "loss": 0.7828,
      "step": 634150
    },
    {
      "epoch": 2.222572697291178,
      "grad_norm": 2.78125,
      "learning_rate": 1.4396883623968368e-05,
      "loss": 0.8249,
      "step": 634160
    },
    {
      "epoch": 2.222607744798074,
      "grad_norm": 3.046875,
      "learning_rate": 1.4396234595304666e-05,
      "loss": 0.8405,
      "step": 634170
    },
    {
      "epoch": 2.222642792304969,
      "grad_norm": 2.96875,
      "learning_rate": 1.4395585566640964e-05,
      "loss": 0.7975,
      "step": 634180
    },
    {
      "epoch": 2.222677839811865,
      "grad_norm": 2.90625,
      "learning_rate": 1.4394936537977262e-05,
      "loss": 0.8572,
      "step": 634190
    },
    {
      "epoch": 2.2227128873187607,
      "grad_norm": 3.34375,
      "learning_rate": 1.4394287509313562e-05,
      "loss": 0.8247,
      "step": 634200
    },
    {
      "epoch": 2.222747934825656,
      "grad_norm": 3.203125,
      "learning_rate": 1.439363848064986e-05,
      "loss": 0.8648,
      "step": 634210
    },
    {
      "epoch": 2.222782982332552,
      "grad_norm": 2.71875,
      "learning_rate": 1.4392989451986158e-05,
      "loss": 0.8562,
      "step": 634220
    },
    {
      "epoch": 2.2228180298394475,
      "grad_norm": 3.34375,
      "learning_rate": 1.4392340423322456e-05,
      "loss": 0.8457,
      "step": 634230
    },
    {
      "epoch": 2.222853077346343,
      "grad_norm": 2.71875,
      "learning_rate": 1.4391691394658754e-05,
      "loss": 0.7949,
      "step": 634240
    },
    {
      "epoch": 2.2228881248532386,
      "grad_norm": 2.640625,
      "learning_rate": 1.4391042365995052e-05,
      "loss": 0.785,
      "step": 634250
    },
    {
      "epoch": 2.2229231723601344,
      "grad_norm": 2.953125,
      "learning_rate": 1.439039333733135e-05,
      "loss": 0.798,
      "step": 634260
    },
    {
      "epoch": 2.2229582198670297,
      "grad_norm": 3.046875,
      "learning_rate": 1.438974430866765e-05,
      "loss": 0.7505,
      "step": 634270
    },
    {
      "epoch": 2.2229932673739254,
      "grad_norm": 2.59375,
      "learning_rate": 1.4389095280003948e-05,
      "loss": 0.8496,
      "step": 634280
    },
    {
      "epoch": 2.2230283148808208,
      "grad_norm": 2.96875,
      "learning_rate": 1.4388446251340246e-05,
      "loss": 0.805,
      "step": 634290
    },
    {
      "epoch": 2.2230633623877165,
      "grad_norm": 2.828125,
      "learning_rate": 1.4387797222676544e-05,
      "loss": 0.8073,
      "step": 634300
    },
    {
      "epoch": 2.2230984098946123,
      "grad_norm": 2.671875,
      "learning_rate": 1.4387148194012842e-05,
      "loss": 0.7547,
      "step": 634310
    },
    {
      "epoch": 2.2231334574015076,
      "grad_norm": 3.125,
      "learning_rate": 1.438649916534914e-05,
      "loss": 0.7996,
      "step": 634320
    },
    {
      "epoch": 2.2231685049084033,
      "grad_norm": 2.859375,
      "learning_rate": 1.4385850136685436e-05,
      "loss": 0.8017,
      "step": 634330
    },
    {
      "epoch": 2.223203552415299,
      "grad_norm": 2.890625,
      "learning_rate": 1.4385201108021734e-05,
      "loss": 0.8844,
      "step": 634340
    },
    {
      "epoch": 2.2232385999221944,
      "grad_norm": 2.984375,
      "learning_rate": 1.4384552079358032e-05,
      "loss": 0.8264,
      "step": 634350
    },
    {
      "epoch": 2.22327364742909,
      "grad_norm": 3.0,
      "learning_rate": 1.438390305069433e-05,
      "loss": 0.7662,
      "step": 634360
    },
    {
      "epoch": 2.223308694935986,
      "grad_norm": 3.234375,
      "learning_rate": 1.4383254022030628e-05,
      "loss": 0.8438,
      "step": 634370
    },
    {
      "epoch": 2.2233437424428812,
      "grad_norm": 3.0,
      "learning_rate": 1.4382604993366928e-05,
      "loss": 0.9094,
      "step": 634380
    },
    {
      "epoch": 2.223378789949777,
      "grad_norm": 2.96875,
      "learning_rate": 1.4381955964703226e-05,
      "loss": 0.7765,
      "step": 634390
    },
    {
      "epoch": 2.2234138374566723,
      "grad_norm": 2.78125,
      "learning_rate": 1.4381306936039524e-05,
      "loss": 0.8087,
      "step": 634400
    },
    {
      "epoch": 2.223448884963568,
      "grad_norm": 2.890625,
      "learning_rate": 1.4380657907375822e-05,
      "loss": 0.7724,
      "step": 634410
    },
    {
      "epoch": 2.223483932470464,
      "grad_norm": 2.859375,
      "learning_rate": 1.438000887871212e-05,
      "loss": 0.895,
      "step": 634420
    },
    {
      "epoch": 2.223518979977359,
      "grad_norm": 2.890625,
      "learning_rate": 1.4379359850048418e-05,
      "loss": 0.7911,
      "step": 634430
    },
    {
      "epoch": 2.223554027484255,
      "grad_norm": 2.921875,
      "learning_rate": 1.4378710821384716e-05,
      "loss": 0.8549,
      "step": 634440
    },
    {
      "epoch": 2.2235890749911507,
      "grad_norm": 3.046875,
      "learning_rate": 1.4378061792721016e-05,
      "loss": 0.8528,
      "step": 634450
    },
    {
      "epoch": 2.223624122498046,
      "grad_norm": 2.984375,
      "learning_rate": 1.4377412764057314e-05,
      "loss": 0.8156,
      "step": 634460
    },
    {
      "epoch": 2.2236591700049417,
      "grad_norm": 2.703125,
      "learning_rate": 1.4376763735393612e-05,
      "loss": 0.7675,
      "step": 634470
    },
    {
      "epoch": 2.2236942175118375,
      "grad_norm": 2.859375,
      "learning_rate": 1.437611470672991e-05,
      "loss": 0.8119,
      "step": 634480
    },
    {
      "epoch": 2.223729265018733,
      "grad_norm": 2.609375,
      "learning_rate": 1.4375465678066208e-05,
      "loss": 0.8355,
      "step": 634490
    },
    {
      "epoch": 2.2237643125256286,
      "grad_norm": 2.484375,
      "learning_rate": 1.4374816649402506e-05,
      "loss": 0.7672,
      "step": 634500
    },
    {
      "epoch": 2.2237993600325243,
      "grad_norm": 2.9375,
      "learning_rate": 1.4374167620738804e-05,
      "loss": 0.8104,
      "step": 634510
    },
    {
      "epoch": 2.2238344075394196,
      "grad_norm": 3.296875,
      "learning_rate": 1.43735185920751e-05,
      "loss": 0.7808,
      "step": 634520
    },
    {
      "epoch": 2.2238694550463154,
      "grad_norm": 3.171875,
      "learning_rate": 1.4372869563411398e-05,
      "loss": 0.8305,
      "step": 634530
    },
    {
      "epoch": 2.2239045025532107,
      "grad_norm": 2.875,
      "learning_rate": 1.4372220534747696e-05,
      "loss": 0.7894,
      "step": 634540
    },
    {
      "epoch": 2.2239395500601065,
      "grad_norm": 2.96875,
      "learning_rate": 1.4371571506083994e-05,
      "loss": 0.8119,
      "step": 634550
    },
    {
      "epoch": 2.223974597567002,
      "grad_norm": 3.34375,
      "learning_rate": 1.4370922477420294e-05,
      "loss": 0.8798,
      "step": 634560
    },
    {
      "epoch": 2.2240096450738975,
      "grad_norm": 3.3125,
      "learning_rate": 1.4370273448756592e-05,
      "loss": 0.7885,
      "step": 634570
    },
    {
      "epoch": 2.2240446925807933,
      "grad_norm": 2.453125,
      "learning_rate": 1.436962442009289e-05,
      "loss": 0.8231,
      "step": 634580
    },
    {
      "epoch": 2.224079740087689,
      "grad_norm": 3.0625,
      "learning_rate": 1.4368975391429188e-05,
      "loss": 0.8664,
      "step": 634590
    },
    {
      "epoch": 2.2241147875945844,
      "grad_norm": 2.9375,
      "learning_rate": 1.4368326362765486e-05,
      "loss": 0.7257,
      "step": 634600
    },
    {
      "epoch": 2.22414983510148,
      "grad_norm": 3.109375,
      "learning_rate": 1.4367677334101784e-05,
      "loss": 0.7831,
      "step": 634610
    },
    {
      "epoch": 2.224184882608376,
      "grad_norm": 2.859375,
      "learning_rate": 1.4367028305438082e-05,
      "loss": 0.8263,
      "step": 634620
    },
    {
      "epoch": 2.224219930115271,
      "grad_norm": 3.078125,
      "learning_rate": 1.4366379276774381e-05,
      "loss": 0.7966,
      "step": 634630
    },
    {
      "epoch": 2.224254977622167,
      "grad_norm": 3.171875,
      "learning_rate": 1.436573024811068e-05,
      "loss": 0.8008,
      "step": 634640
    },
    {
      "epoch": 2.2242900251290623,
      "grad_norm": 2.9375,
      "learning_rate": 1.4365081219446977e-05,
      "loss": 0.781,
      "step": 634650
    },
    {
      "epoch": 2.224325072635958,
      "grad_norm": 2.796875,
      "learning_rate": 1.4364432190783275e-05,
      "loss": 0.785,
      "step": 634660
    },
    {
      "epoch": 2.224360120142854,
      "grad_norm": 3.015625,
      "learning_rate": 1.4363783162119573e-05,
      "loss": 0.7881,
      "step": 634670
    },
    {
      "epoch": 2.224395167649749,
      "grad_norm": 3.46875,
      "learning_rate": 1.4363134133455871e-05,
      "loss": 0.8177,
      "step": 634680
    },
    {
      "epoch": 2.224430215156645,
      "grad_norm": 3.40625,
      "learning_rate": 1.436248510479217e-05,
      "loss": 0.7839,
      "step": 634690
    },
    {
      "epoch": 2.2244652626635406,
      "grad_norm": 3.4375,
      "learning_rate": 1.4361836076128469e-05,
      "loss": 0.9117,
      "step": 634700
    },
    {
      "epoch": 2.224500310170436,
      "grad_norm": 2.75,
      "learning_rate": 1.4361187047464764e-05,
      "loss": 0.8005,
      "step": 634710
    },
    {
      "epoch": 2.2245353576773317,
      "grad_norm": 2.453125,
      "learning_rate": 1.4360538018801062e-05,
      "loss": 0.7992,
      "step": 634720
    },
    {
      "epoch": 2.2245704051842274,
      "grad_norm": 3.15625,
      "learning_rate": 1.435988899013736e-05,
      "loss": 0.8251,
      "step": 634730
    },
    {
      "epoch": 2.2246054526911228,
      "grad_norm": 3.0625,
      "learning_rate": 1.4359239961473658e-05,
      "loss": 0.8049,
      "step": 634740
    },
    {
      "epoch": 2.2246405001980185,
      "grad_norm": 2.90625,
      "learning_rate": 1.4358590932809957e-05,
      "loss": 0.8112,
      "step": 634750
    },
    {
      "epoch": 2.224675547704914,
      "grad_norm": 2.671875,
      "learning_rate": 1.4357941904146255e-05,
      "loss": 0.8499,
      "step": 634760
    },
    {
      "epoch": 2.2247105952118096,
      "grad_norm": 2.875,
      "learning_rate": 1.4357292875482553e-05,
      "loss": 0.8016,
      "step": 634770
    },
    {
      "epoch": 2.2247456427187053,
      "grad_norm": 2.828125,
      "learning_rate": 1.4356643846818851e-05,
      "loss": 0.8069,
      "step": 634780
    },
    {
      "epoch": 2.2247806902256007,
      "grad_norm": 2.859375,
      "learning_rate": 1.435599481815515e-05,
      "loss": 0.7867,
      "step": 634790
    },
    {
      "epoch": 2.2248157377324964,
      "grad_norm": 3.0,
      "learning_rate": 1.4355345789491447e-05,
      "loss": 0.8697,
      "step": 634800
    },
    {
      "epoch": 2.224850785239392,
      "grad_norm": 3.03125,
      "learning_rate": 1.4354696760827745e-05,
      "loss": 0.8139,
      "step": 634810
    },
    {
      "epoch": 2.2248858327462875,
      "grad_norm": 2.640625,
      "learning_rate": 1.4354047732164045e-05,
      "loss": 0.7554,
      "step": 634820
    },
    {
      "epoch": 2.2249208802531832,
      "grad_norm": 2.9375,
      "learning_rate": 1.4353398703500343e-05,
      "loss": 0.758,
      "step": 634830
    },
    {
      "epoch": 2.224955927760079,
      "grad_norm": 2.75,
      "learning_rate": 1.4352749674836641e-05,
      "loss": 0.8322,
      "step": 634840
    },
    {
      "epoch": 2.2249909752669743,
      "grad_norm": 3.21875,
      "learning_rate": 1.4352100646172939e-05,
      "loss": 0.7664,
      "step": 634850
    },
    {
      "epoch": 2.22502602277387,
      "grad_norm": 3.0625,
      "learning_rate": 1.4351451617509237e-05,
      "loss": 0.8032,
      "step": 634860
    },
    {
      "epoch": 2.2250610702807654,
      "grad_norm": 3.0625,
      "learning_rate": 1.4350802588845535e-05,
      "loss": 0.8276,
      "step": 634870
    },
    {
      "epoch": 2.225096117787661,
      "grad_norm": 2.890625,
      "learning_rate": 1.4350153560181833e-05,
      "loss": 0.737,
      "step": 634880
    },
    {
      "epoch": 2.225131165294557,
      "grad_norm": 2.46875,
      "learning_rate": 1.4349504531518133e-05,
      "loss": 0.891,
      "step": 634890
    },
    {
      "epoch": 2.225166212801452,
      "grad_norm": 2.640625,
      "learning_rate": 1.4348855502854427e-05,
      "loss": 0.7601,
      "step": 634900
    },
    {
      "epoch": 2.225201260308348,
      "grad_norm": 3.1875,
      "learning_rate": 1.4348206474190725e-05,
      "loss": 0.7865,
      "step": 634910
    },
    {
      "epoch": 2.2252363078152437,
      "grad_norm": 3.0,
      "learning_rate": 1.4347557445527023e-05,
      "loss": 0.9235,
      "step": 634920
    },
    {
      "epoch": 2.225271355322139,
      "grad_norm": 2.921875,
      "learning_rate": 1.4346908416863323e-05,
      "loss": 0.8367,
      "step": 634930
    },
    {
      "epoch": 2.225306402829035,
      "grad_norm": 2.71875,
      "learning_rate": 1.4346259388199621e-05,
      "loss": 0.8629,
      "step": 634940
    },
    {
      "epoch": 2.2253414503359306,
      "grad_norm": 2.71875,
      "learning_rate": 1.4345610359535919e-05,
      "loss": 0.8357,
      "step": 634950
    },
    {
      "epoch": 2.225376497842826,
      "grad_norm": 3.5625,
      "learning_rate": 1.4344961330872217e-05,
      "loss": 0.8249,
      "step": 634960
    },
    {
      "epoch": 2.2254115453497216,
      "grad_norm": 3.09375,
      "learning_rate": 1.4344312302208515e-05,
      "loss": 0.783,
      "step": 634970
    },
    {
      "epoch": 2.225446592856617,
      "grad_norm": 2.640625,
      "learning_rate": 1.4343663273544813e-05,
      "loss": 0.8845,
      "step": 634980
    },
    {
      "epoch": 2.2254816403635127,
      "grad_norm": 2.46875,
      "learning_rate": 1.4343014244881111e-05,
      "loss": 0.7816,
      "step": 634990
    },
    {
      "epoch": 2.2255166878704085,
      "grad_norm": 3.453125,
      "learning_rate": 1.434236521621741e-05,
      "loss": 0.8287,
      "step": 635000
    },
    {
      "epoch": 2.2255166878704085,
      "eval_loss": 0.7633633017539978,
      "eval_runtime": 554.2449,
      "eval_samples_per_second": 686.404,
      "eval_steps_per_second": 57.2,
      "step": 635000
    },
    {
      "epoch": 2.2255517353773038,
      "grad_norm": 3.234375,
      "learning_rate": 1.4341716187553709e-05,
      "loss": 0.8267,
      "step": 635010
    },
    {
      "epoch": 2.2255867828841995,
      "grad_norm": 2.890625,
      "learning_rate": 1.4341067158890007e-05,
      "loss": 0.8487,
      "step": 635020
    },
    {
      "epoch": 2.2256218303910953,
      "grad_norm": 3.171875,
      "learning_rate": 1.4340418130226305e-05,
      "loss": 0.76,
      "step": 635030
    },
    {
      "epoch": 2.2256568778979906,
      "grad_norm": 2.6875,
      "learning_rate": 1.4339769101562603e-05,
      "loss": 0.8689,
      "step": 635040
    },
    {
      "epoch": 2.2256919254048864,
      "grad_norm": 3.03125,
      "learning_rate": 1.43391200728989e-05,
      "loss": 0.8034,
      "step": 635050
    },
    {
      "epoch": 2.225726972911782,
      "grad_norm": 2.71875,
      "learning_rate": 1.4338471044235199e-05,
      "loss": 0.8546,
      "step": 635060
    },
    {
      "epoch": 2.2257620204186774,
      "grad_norm": 2.859375,
      "learning_rate": 1.4337822015571498e-05,
      "loss": 0.8336,
      "step": 635070
    },
    {
      "epoch": 2.225797067925573,
      "grad_norm": 2.515625,
      "learning_rate": 1.4337172986907796e-05,
      "loss": 0.8024,
      "step": 635080
    },
    {
      "epoch": 2.2258321154324685,
      "grad_norm": 3.25,
      "learning_rate": 1.4336523958244091e-05,
      "loss": 0.7612,
      "step": 635090
    },
    {
      "epoch": 2.2258671629393643,
      "grad_norm": 3.171875,
      "learning_rate": 1.4335874929580389e-05,
      "loss": 0.8491,
      "step": 635100
    },
    {
      "epoch": 2.22590221044626,
      "grad_norm": 2.5625,
      "learning_rate": 1.4335225900916689e-05,
      "loss": 0.6993,
      "step": 635110
    },
    {
      "epoch": 2.2259372579531553,
      "grad_norm": 3.203125,
      "learning_rate": 1.4334576872252987e-05,
      "loss": 0.8286,
      "step": 635120
    },
    {
      "epoch": 2.225972305460051,
      "grad_norm": 2.796875,
      "learning_rate": 1.4333927843589285e-05,
      "loss": 0.8347,
      "step": 635130
    },
    {
      "epoch": 2.226007352966947,
      "grad_norm": 2.6875,
      "learning_rate": 1.4333278814925583e-05,
      "loss": 0.8067,
      "step": 635140
    },
    {
      "epoch": 2.226042400473842,
      "grad_norm": 2.828125,
      "learning_rate": 1.433262978626188e-05,
      "loss": 0.7953,
      "step": 635150
    },
    {
      "epoch": 2.226077447980738,
      "grad_norm": 3.1875,
      "learning_rate": 1.4331980757598179e-05,
      "loss": 0.8016,
      "step": 635160
    },
    {
      "epoch": 2.2261124954876337,
      "grad_norm": 2.609375,
      "learning_rate": 1.4331331728934477e-05,
      "loss": 0.7616,
      "step": 635170
    },
    {
      "epoch": 2.226147542994529,
      "grad_norm": 2.75,
      "learning_rate": 1.4330682700270776e-05,
      "loss": 0.7887,
      "step": 635180
    },
    {
      "epoch": 2.2261825905014248,
      "grad_norm": 2.953125,
      "learning_rate": 1.4330033671607074e-05,
      "loss": 0.8232,
      "step": 635190
    },
    {
      "epoch": 2.22621763800832,
      "grad_norm": 2.578125,
      "learning_rate": 1.4329384642943372e-05,
      "loss": 0.7735,
      "step": 635200
    },
    {
      "epoch": 2.226252685515216,
      "grad_norm": 2.8125,
      "learning_rate": 1.432873561427967e-05,
      "loss": 0.8477,
      "step": 635210
    },
    {
      "epoch": 2.2262877330221116,
      "grad_norm": 3.0625,
      "learning_rate": 1.4328086585615968e-05,
      "loss": 0.8341,
      "step": 635220
    },
    {
      "epoch": 2.226322780529007,
      "grad_norm": 2.984375,
      "learning_rate": 1.4327437556952266e-05,
      "loss": 0.8346,
      "step": 635230
    },
    {
      "epoch": 2.2263578280359027,
      "grad_norm": 2.734375,
      "learning_rate": 1.4326788528288564e-05,
      "loss": 0.8132,
      "step": 635240
    },
    {
      "epoch": 2.2263928755427984,
      "grad_norm": 2.578125,
      "learning_rate": 1.4326139499624864e-05,
      "loss": 0.7839,
      "step": 635250
    },
    {
      "epoch": 2.2264279230496937,
      "grad_norm": 3.1875,
      "learning_rate": 1.4325490470961162e-05,
      "loss": 0.6778,
      "step": 635260
    },
    {
      "epoch": 2.2264629705565895,
      "grad_norm": 3.203125,
      "learning_rate": 1.4324841442297457e-05,
      "loss": 0.869,
      "step": 635270
    },
    {
      "epoch": 2.2264980180634852,
      "grad_norm": 2.984375,
      "learning_rate": 1.4324192413633755e-05,
      "loss": 0.8383,
      "step": 635280
    },
    {
      "epoch": 2.2265330655703806,
      "grad_norm": 2.546875,
      "learning_rate": 1.4323543384970053e-05,
      "loss": 0.8441,
      "step": 635290
    },
    {
      "epoch": 2.2265681130772763,
      "grad_norm": 3.125,
      "learning_rate": 1.4322894356306352e-05,
      "loss": 0.8265,
      "step": 635300
    },
    {
      "epoch": 2.2266031605841716,
      "grad_norm": 3.15625,
      "learning_rate": 1.432224532764265e-05,
      "loss": 0.8237,
      "step": 635310
    },
    {
      "epoch": 2.2266382080910674,
      "grad_norm": 2.640625,
      "learning_rate": 1.4321596298978948e-05,
      "loss": 0.7916,
      "step": 635320
    },
    {
      "epoch": 2.226673255597963,
      "grad_norm": 2.953125,
      "learning_rate": 1.4320947270315246e-05,
      "loss": 0.8398,
      "step": 635330
    },
    {
      "epoch": 2.2267083031048585,
      "grad_norm": 3.5625,
      "learning_rate": 1.4320298241651544e-05,
      "loss": 0.8931,
      "step": 635340
    },
    {
      "epoch": 2.226743350611754,
      "grad_norm": 2.796875,
      "learning_rate": 1.4319649212987842e-05,
      "loss": 0.8564,
      "step": 635350
    },
    {
      "epoch": 2.22677839811865,
      "grad_norm": 2.96875,
      "learning_rate": 1.431900018432414e-05,
      "loss": 0.7836,
      "step": 635360
    },
    {
      "epoch": 2.2268134456255453,
      "grad_norm": 3.109375,
      "learning_rate": 1.431835115566044e-05,
      "loss": 0.8523,
      "step": 635370
    },
    {
      "epoch": 2.226848493132441,
      "grad_norm": 3.03125,
      "learning_rate": 1.4317702126996738e-05,
      "loss": 0.8447,
      "step": 635380
    },
    {
      "epoch": 2.226883540639337,
      "grad_norm": 2.765625,
      "learning_rate": 1.4317053098333036e-05,
      "loss": 0.752,
      "step": 635390
    },
    {
      "epoch": 2.226918588146232,
      "grad_norm": 2.8125,
      "learning_rate": 1.4316404069669334e-05,
      "loss": 0.8381,
      "step": 635400
    },
    {
      "epoch": 2.226953635653128,
      "grad_norm": 2.84375,
      "learning_rate": 1.4315755041005632e-05,
      "loss": 0.8753,
      "step": 635410
    },
    {
      "epoch": 2.226988683160023,
      "grad_norm": 2.78125,
      "learning_rate": 1.431510601234193e-05,
      "loss": 0.8806,
      "step": 635420
    },
    {
      "epoch": 2.227023730666919,
      "grad_norm": 2.59375,
      "learning_rate": 1.4314456983678228e-05,
      "loss": 0.7441,
      "step": 635430
    },
    {
      "epoch": 2.2270587781738147,
      "grad_norm": 2.984375,
      "learning_rate": 1.4313807955014528e-05,
      "loss": 0.8912,
      "step": 635440
    },
    {
      "epoch": 2.22709382568071,
      "grad_norm": 3.046875,
      "learning_rate": 1.4313158926350826e-05,
      "loss": 0.8053,
      "step": 635450
    },
    {
      "epoch": 2.2271288731876058,
      "grad_norm": 2.765625,
      "learning_rate": 1.431250989768712e-05,
      "loss": 0.8751,
      "step": 635460
    },
    {
      "epoch": 2.2271639206945015,
      "grad_norm": 2.921875,
      "learning_rate": 1.4311860869023418e-05,
      "loss": 0.8085,
      "step": 635470
    },
    {
      "epoch": 2.227198968201397,
      "grad_norm": 3.21875,
      "learning_rate": 1.4311211840359718e-05,
      "loss": 0.8015,
      "step": 635480
    },
    {
      "epoch": 2.2272340157082926,
      "grad_norm": 3.125,
      "learning_rate": 1.4310562811696016e-05,
      "loss": 0.8361,
      "step": 635490
    },
    {
      "epoch": 2.2272690632151884,
      "grad_norm": 2.875,
      "learning_rate": 1.4309913783032314e-05,
      "loss": 0.7951,
      "step": 635500
    },
    {
      "epoch": 2.2273041107220837,
      "grad_norm": 2.578125,
      "learning_rate": 1.4309264754368612e-05,
      "loss": 0.7774,
      "step": 635510
    },
    {
      "epoch": 2.2273391582289794,
      "grad_norm": 2.75,
      "learning_rate": 1.430861572570491e-05,
      "loss": 0.8791,
      "step": 635520
    },
    {
      "epoch": 2.2273742057358747,
      "grad_norm": 3.015625,
      "learning_rate": 1.4307966697041208e-05,
      "loss": 0.8215,
      "step": 635530
    },
    {
      "epoch": 2.2274092532427705,
      "grad_norm": 3.015625,
      "learning_rate": 1.4307317668377506e-05,
      "loss": 0.814,
      "step": 635540
    },
    {
      "epoch": 2.2274443007496663,
      "grad_norm": 2.703125,
      "learning_rate": 1.4306668639713806e-05,
      "loss": 0.8073,
      "step": 635550
    },
    {
      "epoch": 2.2274793482565616,
      "grad_norm": 3.328125,
      "learning_rate": 1.4306019611050104e-05,
      "loss": 0.8565,
      "step": 635560
    },
    {
      "epoch": 2.2275143957634573,
      "grad_norm": 2.6875,
      "learning_rate": 1.4305370582386402e-05,
      "loss": 0.8187,
      "step": 635570
    },
    {
      "epoch": 2.227549443270353,
      "grad_norm": 3.296875,
      "learning_rate": 1.43047215537227e-05,
      "loss": 0.877,
      "step": 635580
    },
    {
      "epoch": 2.2275844907772484,
      "grad_norm": 2.59375,
      "learning_rate": 1.4304072525058998e-05,
      "loss": 0.8352,
      "step": 635590
    },
    {
      "epoch": 2.227619538284144,
      "grad_norm": 2.578125,
      "learning_rate": 1.4303423496395296e-05,
      "loss": 0.8334,
      "step": 635600
    },
    {
      "epoch": 2.22765458579104,
      "grad_norm": 2.953125,
      "learning_rate": 1.4302774467731594e-05,
      "loss": 0.7897,
      "step": 635610
    },
    {
      "epoch": 2.2276896332979352,
      "grad_norm": 2.578125,
      "learning_rate": 1.4302125439067893e-05,
      "loss": 0.7469,
      "step": 635620
    },
    {
      "epoch": 2.227724680804831,
      "grad_norm": 3.25,
      "learning_rate": 1.4301476410404191e-05,
      "loss": 0.8391,
      "step": 635630
    },
    {
      "epoch": 2.2277597283117268,
      "grad_norm": 3.0625,
      "learning_rate": 1.430082738174049e-05,
      "loss": 0.7267,
      "step": 635640
    },
    {
      "epoch": 2.227794775818622,
      "grad_norm": 3.203125,
      "learning_rate": 1.4300178353076784e-05,
      "loss": 0.7833,
      "step": 635650
    },
    {
      "epoch": 2.227829823325518,
      "grad_norm": 3.125,
      "learning_rate": 1.4299529324413084e-05,
      "loss": 0.7801,
      "step": 635660
    },
    {
      "epoch": 2.227864870832413,
      "grad_norm": 3.0,
      "learning_rate": 1.4298880295749382e-05,
      "loss": 0.8307,
      "step": 635670
    },
    {
      "epoch": 2.227899918339309,
      "grad_norm": 3.15625,
      "learning_rate": 1.429823126708568e-05,
      "loss": 0.8041,
      "step": 635680
    },
    {
      "epoch": 2.2279349658462047,
      "grad_norm": 2.890625,
      "learning_rate": 1.4297582238421978e-05,
      "loss": 0.7478,
      "step": 635690
    },
    {
      "epoch": 2.2279700133531,
      "grad_norm": 3.328125,
      "learning_rate": 1.4296933209758276e-05,
      "loss": 0.7868,
      "step": 635700
    },
    {
      "epoch": 2.2280050608599957,
      "grad_norm": 3.171875,
      "learning_rate": 1.4296284181094574e-05,
      "loss": 0.7862,
      "step": 635710
    },
    {
      "epoch": 2.2280401083668915,
      "grad_norm": 2.828125,
      "learning_rate": 1.4295635152430872e-05,
      "loss": 0.7379,
      "step": 635720
    },
    {
      "epoch": 2.228075155873787,
      "grad_norm": 2.578125,
      "learning_rate": 1.4294986123767171e-05,
      "loss": 0.8048,
      "step": 635730
    },
    {
      "epoch": 2.2281102033806826,
      "grad_norm": 3.109375,
      "learning_rate": 1.429433709510347e-05,
      "loss": 0.7536,
      "step": 635740
    },
    {
      "epoch": 2.2281452508875783,
      "grad_norm": 3.359375,
      "learning_rate": 1.4293688066439767e-05,
      "loss": 0.9042,
      "step": 635750
    },
    {
      "epoch": 2.2281802983944736,
      "grad_norm": 2.8125,
      "learning_rate": 1.4293039037776065e-05,
      "loss": 0.7963,
      "step": 635760
    },
    {
      "epoch": 2.2282153459013694,
      "grad_norm": 2.96875,
      "learning_rate": 1.4292390009112363e-05,
      "loss": 0.8417,
      "step": 635770
    },
    {
      "epoch": 2.2282503934082647,
      "grad_norm": 2.40625,
      "learning_rate": 1.4291740980448661e-05,
      "loss": 0.8107,
      "step": 635780
    },
    {
      "epoch": 2.2282854409151605,
      "grad_norm": 3.125,
      "learning_rate": 1.429109195178496e-05,
      "loss": 0.7293,
      "step": 635790
    },
    {
      "epoch": 2.228320488422056,
      "grad_norm": 3.0625,
      "learning_rate": 1.4290442923121259e-05,
      "loss": 0.7632,
      "step": 635800
    },
    {
      "epoch": 2.2283555359289515,
      "grad_norm": 3.15625,
      "learning_rate": 1.4289793894457557e-05,
      "loss": 0.8161,
      "step": 635810
    },
    {
      "epoch": 2.2283905834358473,
      "grad_norm": 2.6875,
      "learning_rate": 1.4289144865793855e-05,
      "loss": 0.8412,
      "step": 635820
    },
    {
      "epoch": 2.228425630942743,
      "grad_norm": 3.015625,
      "learning_rate": 1.4288495837130153e-05,
      "loss": 0.7418,
      "step": 635830
    },
    {
      "epoch": 2.2284606784496384,
      "grad_norm": 3.5625,
      "learning_rate": 1.4287846808466448e-05,
      "loss": 0.8191,
      "step": 635840
    },
    {
      "epoch": 2.228495725956534,
      "grad_norm": 3.46875,
      "learning_rate": 1.4287197779802747e-05,
      "loss": 0.7563,
      "step": 635850
    },
    {
      "epoch": 2.22853077346343,
      "grad_norm": 2.96875,
      "learning_rate": 1.4286548751139045e-05,
      "loss": 0.8678,
      "step": 635860
    },
    {
      "epoch": 2.228565820970325,
      "grad_norm": 2.65625,
      "learning_rate": 1.4285899722475343e-05,
      "loss": 0.8174,
      "step": 635870
    },
    {
      "epoch": 2.228600868477221,
      "grad_norm": 2.765625,
      "learning_rate": 1.4285250693811641e-05,
      "loss": 0.8688,
      "step": 635880
    },
    {
      "epoch": 2.2286359159841167,
      "grad_norm": 3.109375,
      "learning_rate": 1.428460166514794e-05,
      "loss": 0.8488,
      "step": 635890
    },
    {
      "epoch": 2.228670963491012,
      "grad_norm": 2.859375,
      "learning_rate": 1.4283952636484237e-05,
      "loss": 0.8531,
      "step": 635900
    },
    {
      "epoch": 2.2287060109979078,
      "grad_norm": 3.140625,
      "learning_rate": 1.4283303607820535e-05,
      "loss": 0.7215,
      "step": 635910
    },
    {
      "epoch": 2.228741058504803,
      "grad_norm": 3.0625,
      "learning_rate": 1.4282654579156835e-05,
      "loss": 0.7977,
      "step": 635920
    },
    {
      "epoch": 2.228776106011699,
      "grad_norm": 2.640625,
      "learning_rate": 1.4282005550493133e-05,
      "loss": 0.7746,
      "step": 635930
    },
    {
      "epoch": 2.2288111535185946,
      "grad_norm": 2.828125,
      "learning_rate": 1.4281356521829431e-05,
      "loss": 0.8627,
      "step": 635940
    },
    {
      "epoch": 2.22884620102549,
      "grad_norm": 3.109375,
      "learning_rate": 1.4280707493165729e-05,
      "loss": 0.8243,
      "step": 635950
    },
    {
      "epoch": 2.2288812485323857,
      "grad_norm": 2.890625,
      "learning_rate": 1.4280058464502027e-05,
      "loss": 0.7553,
      "step": 635960
    },
    {
      "epoch": 2.2289162960392814,
      "grad_norm": 2.984375,
      "learning_rate": 1.4279409435838325e-05,
      "loss": 0.7924,
      "step": 635970
    },
    {
      "epoch": 2.2289513435461767,
      "grad_norm": 2.59375,
      "learning_rate": 1.4278760407174625e-05,
      "loss": 0.8369,
      "step": 635980
    },
    {
      "epoch": 2.2289863910530725,
      "grad_norm": 3.265625,
      "learning_rate": 1.4278111378510923e-05,
      "loss": 0.8729,
      "step": 635990
    },
    {
      "epoch": 2.2290214385599683,
      "grad_norm": 2.5625,
      "learning_rate": 1.427746234984722e-05,
      "loss": 0.8094,
      "step": 636000
    },
    {
      "epoch": 2.2290564860668636,
      "grad_norm": 3.015625,
      "learning_rate": 1.4276813321183519e-05,
      "loss": 0.8201,
      "step": 636010
    },
    {
      "epoch": 2.2290915335737593,
      "grad_norm": 3.109375,
      "learning_rate": 1.4276164292519817e-05,
      "loss": 0.8145,
      "step": 636020
    },
    {
      "epoch": 2.2291265810806546,
      "grad_norm": 2.578125,
      "learning_rate": 1.4275515263856113e-05,
      "loss": 0.7104,
      "step": 636030
    },
    {
      "epoch": 2.2291616285875504,
      "grad_norm": 2.15625,
      "learning_rate": 1.4274866235192411e-05,
      "loss": 0.8004,
      "step": 636040
    },
    {
      "epoch": 2.229196676094446,
      "grad_norm": 2.859375,
      "learning_rate": 1.4274217206528709e-05,
      "loss": 0.8645,
      "step": 636050
    },
    {
      "epoch": 2.2292317236013415,
      "grad_norm": 3.40625,
      "learning_rate": 1.4273568177865007e-05,
      "loss": 0.8164,
      "step": 636060
    },
    {
      "epoch": 2.2292667711082372,
      "grad_norm": 2.859375,
      "learning_rate": 1.4272919149201305e-05,
      "loss": 0.8386,
      "step": 636070
    },
    {
      "epoch": 2.229301818615133,
      "grad_norm": 3.09375,
      "learning_rate": 1.4272270120537603e-05,
      "loss": 0.8034,
      "step": 636080
    },
    {
      "epoch": 2.2293368661220283,
      "grad_norm": 3.28125,
      "learning_rate": 1.4271621091873901e-05,
      "loss": 0.8517,
      "step": 636090
    },
    {
      "epoch": 2.229371913628924,
      "grad_norm": 3.0,
      "learning_rate": 1.42709720632102e-05,
      "loss": 0.8415,
      "step": 636100
    },
    {
      "epoch": 2.22940696113582,
      "grad_norm": 2.90625,
      "learning_rate": 1.4270323034546499e-05,
      "loss": 0.9092,
      "step": 636110
    },
    {
      "epoch": 2.229442008642715,
      "grad_norm": 2.984375,
      "learning_rate": 1.4269674005882797e-05,
      "loss": 0.8292,
      "step": 636120
    },
    {
      "epoch": 2.229477056149611,
      "grad_norm": 2.921875,
      "learning_rate": 1.4269024977219095e-05,
      "loss": 0.8375,
      "step": 636130
    },
    {
      "epoch": 2.229512103656506,
      "grad_norm": 2.875,
      "learning_rate": 1.4268375948555393e-05,
      "loss": 0.7888,
      "step": 636140
    },
    {
      "epoch": 2.229547151163402,
      "grad_norm": 2.640625,
      "learning_rate": 1.426772691989169e-05,
      "loss": 0.8398,
      "step": 636150
    },
    {
      "epoch": 2.2295821986702977,
      "grad_norm": 3.421875,
      "learning_rate": 1.4267077891227989e-05,
      "loss": 0.8374,
      "step": 636160
    },
    {
      "epoch": 2.229617246177193,
      "grad_norm": 2.953125,
      "learning_rate": 1.4266428862564288e-05,
      "loss": 0.8034,
      "step": 636170
    },
    {
      "epoch": 2.229652293684089,
      "grad_norm": 3.40625,
      "learning_rate": 1.4265779833900586e-05,
      "loss": 0.7192,
      "step": 636180
    },
    {
      "epoch": 2.2296873411909846,
      "grad_norm": 2.8125,
      "learning_rate": 1.4265130805236884e-05,
      "loss": 0.8044,
      "step": 636190
    },
    {
      "epoch": 2.22972238869788,
      "grad_norm": 3.046875,
      "learning_rate": 1.4264481776573182e-05,
      "loss": 0.7943,
      "step": 636200
    },
    {
      "epoch": 2.2297574362047756,
      "grad_norm": 2.796875,
      "learning_rate": 1.4263832747909479e-05,
      "loss": 0.7768,
      "step": 636210
    },
    {
      "epoch": 2.2297924837116714,
      "grad_norm": 2.84375,
      "learning_rate": 1.4263183719245777e-05,
      "loss": 0.81,
      "step": 636220
    },
    {
      "epoch": 2.2298275312185667,
      "grad_norm": 3.25,
      "learning_rate": 1.4262534690582075e-05,
      "loss": 0.8301,
      "step": 636230
    },
    {
      "epoch": 2.2298625787254625,
      "grad_norm": 2.921875,
      "learning_rate": 1.4261885661918373e-05,
      "loss": 0.8394,
      "step": 636240
    },
    {
      "epoch": 2.2298976262323578,
      "grad_norm": 3.140625,
      "learning_rate": 1.426123663325467e-05,
      "loss": 0.8001,
      "step": 636250
    },
    {
      "epoch": 2.2299326737392535,
      "grad_norm": 2.71875,
      "learning_rate": 1.4260587604590969e-05,
      "loss": 0.8376,
      "step": 636260
    },
    {
      "epoch": 2.2299677212461493,
      "grad_norm": 3.0,
      "learning_rate": 1.4259938575927267e-05,
      "loss": 0.829,
      "step": 636270
    },
    {
      "epoch": 2.2300027687530446,
      "grad_norm": 2.90625,
      "learning_rate": 1.4259289547263566e-05,
      "loss": 0.8277,
      "step": 636280
    },
    {
      "epoch": 2.2300378162599404,
      "grad_norm": 3.1875,
      "learning_rate": 1.4258640518599864e-05,
      "loss": 0.8418,
      "step": 636290
    },
    {
      "epoch": 2.230072863766836,
      "grad_norm": 2.59375,
      "learning_rate": 1.4257991489936162e-05,
      "loss": 0.8371,
      "step": 636300
    },
    {
      "epoch": 2.2301079112737314,
      "grad_norm": 3.4375,
      "learning_rate": 1.425734246127246e-05,
      "loss": 0.8174,
      "step": 636310
    },
    {
      "epoch": 2.230142958780627,
      "grad_norm": 2.8125,
      "learning_rate": 1.4256693432608758e-05,
      "loss": 0.7446,
      "step": 636320
    },
    {
      "epoch": 2.230178006287523,
      "grad_norm": 2.75,
      "learning_rate": 1.4256044403945056e-05,
      "loss": 0.8293,
      "step": 636330
    },
    {
      "epoch": 2.2302130537944183,
      "grad_norm": 3.28125,
      "learning_rate": 1.4255395375281354e-05,
      "loss": 0.8403,
      "step": 636340
    },
    {
      "epoch": 2.230248101301314,
      "grad_norm": 2.75,
      "learning_rate": 1.4254746346617654e-05,
      "loss": 0.8621,
      "step": 636350
    },
    {
      "epoch": 2.2302831488082093,
      "grad_norm": 3.25,
      "learning_rate": 1.4254097317953952e-05,
      "loss": 0.8715,
      "step": 636360
    },
    {
      "epoch": 2.230318196315105,
      "grad_norm": 3.078125,
      "learning_rate": 1.425344828929025e-05,
      "loss": 0.7576,
      "step": 636370
    },
    {
      "epoch": 2.230353243822001,
      "grad_norm": 2.71875,
      "learning_rate": 1.4252799260626548e-05,
      "loss": 0.7528,
      "step": 636380
    },
    {
      "epoch": 2.230388291328896,
      "grad_norm": 3.625,
      "learning_rate": 1.4252150231962846e-05,
      "loss": 0.833,
      "step": 636390
    },
    {
      "epoch": 2.230423338835792,
      "grad_norm": 3.484375,
      "learning_rate": 1.4251501203299142e-05,
      "loss": 0.8966,
      "step": 636400
    },
    {
      "epoch": 2.2304583863426877,
      "grad_norm": 3.015625,
      "learning_rate": 1.425085217463544e-05,
      "loss": 0.8207,
      "step": 636410
    },
    {
      "epoch": 2.230493433849583,
      "grad_norm": 3.296875,
      "learning_rate": 1.4250203145971738e-05,
      "loss": 0.8712,
      "step": 636420
    },
    {
      "epoch": 2.2305284813564787,
      "grad_norm": 2.796875,
      "learning_rate": 1.4249554117308036e-05,
      "loss": 0.7272,
      "step": 636430
    },
    {
      "epoch": 2.2305635288633745,
      "grad_norm": 3.3125,
      "learning_rate": 1.4248905088644334e-05,
      "loss": 0.8019,
      "step": 636440
    },
    {
      "epoch": 2.23059857637027,
      "grad_norm": 2.6875,
      "learning_rate": 1.4248256059980632e-05,
      "loss": 0.8611,
      "step": 636450
    },
    {
      "epoch": 2.2306336238771656,
      "grad_norm": 2.921875,
      "learning_rate": 1.424760703131693e-05,
      "loss": 0.7781,
      "step": 636460
    },
    {
      "epoch": 2.230668671384061,
      "grad_norm": 2.59375,
      "learning_rate": 1.424695800265323e-05,
      "loss": 0.7867,
      "step": 636470
    },
    {
      "epoch": 2.2307037188909566,
      "grad_norm": 2.796875,
      "learning_rate": 1.4246308973989528e-05,
      "loss": 0.8181,
      "step": 636480
    },
    {
      "epoch": 2.2307387663978524,
      "grad_norm": 2.859375,
      "learning_rate": 1.4245659945325826e-05,
      "loss": 0.8542,
      "step": 636490
    },
    {
      "epoch": 2.2307738139047477,
      "grad_norm": 2.921875,
      "learning_rate": 1.4245010916662124e-05,
      "loss": 0.8166,
      "step": 636500
    },
    {
      "epoch": 2.2308088614116435,
      "grad_norm": 2.109375,
      "learning_rate": 1.4244361887998422e-05,
      "loss": 0.8013,
      "step": 636510
    },
    {
      "epoch": 2.2308439089185392,
      "grad_norm": 2.453125,
      "learning_rate": 1.424371285933472e-05,
      "loss": 0.8263,
      "step": 636520
    },
    {
      "epoch": 2.2308789564254345,
      "grad_norm": 2.859375,
      "learning_rate": 1.424306383067102e-05,
      "loss": 0.7595,
      "step": 636530
    },
    {
      "epoch": 2.2309140039323303,
      "grad_norm": 2.59375,
      "learning_rate": 1.4242414802007318e-05,
      "loss": 0.7113,
      "step": 636540
    },
    {
      "epoch": 2.230949051439226,
      "grad_norm": 3.40625,
      "learning_rate": 1.4241765773343616e-05,
      "loss": 0.8844,
      "step": 636550
    },
    {
      "epoch": 2.2309840989461214,
      "grad_norm": 2.671875,
      "learning_rate": 1.4241116744679914e-05,
      "loss": 0.8368,
      "step": 636560
    },
    {
      "epoch": 2.231019146453017,
      "grad_norm": 3.4375,
      "learning_rate": 1.4240467716016212e-05,
      "loss": 0.8043,
      "step": 636570
    },
    {
      "epoch": 2.2310541939599124,
      "grad_norm": 2.671875,
      "learning_rate": 1.423981868735251e-05,
      "loss": 0.6956,
      "step": 636580
    },
    {
      "epoch": 2.231089241466808,
      "grad_norm": 2.703125,
      "learning_rate": 1.4239169658688806e-05,
      "loss": 0.8247,
      "step": 636590
    },
    {
      "epoch": 2.231124288973704,
      "grad_norm": 2.984375,
      "learning_rate": 1.4238520630025104e-05,
      "loss": 0.8934,
      "step": 636600
    },
    {
      "epoch": 2.2311593364805993,
      "grad_norm": 3.09375,
      "learning_rate": 1.4237871601361402e-05,
      "loss": 0.7903,
      "step": 636610
    },
    {
      "epoch": 2.231194383987495,
      "grad_norm": 2.609375,
      "learning_rate": 1.42372225726977e-05,
      "loss": 0.8252,
      "step": 636620
    },
    {
      "epoch": 2.231229431494391,
      "grad_norm": 2.71875,
      "learning_rate": 1.4236573544033998e-05,
      "loss": 0.8616,
      "step": 636630
    },
    {
      "epoch": 2.231264479001286,
      "grad_norm": 3.109375,
      "learning_rate": 1.4235924515370296e-05,
      "loss": 0.7957,
      "step": 636640
    },
    {
      "epoch": 2.231299526508182,
      "grad_norm": 2.921875,
      "learning_rate": 1.4235275486706596e-05,
      "loss": 0.7771,
      "step": 636650
    },
    {
      "epoch": 2.2313345740150776,
      "grad_norm": 3.296875,
      "learning_rate": 1.4234626458042894e-05,
      "loss": 0.8971,
      "step": 636660
    },
    {
      "epoch": 2.231369621521973,
      "grad_norm": 2.9375,
      "learning_rate": 1.4233977429379192e-05,
      "loss": 0.8281,
      "step": 636670
    },
    {
      "epoch": 2.2314046690288687,
      "grad_norm": 2.828125,
      "learning_rate": 1.423332840071549e-05,
      "loss": 0.8082,
      "step": 636680
    },
    {
      "epoch": 2.231439716535764,
      "grad_norm": 2.59375,
      "learning_rate": 1.4232679372051788e-05,
      "loss": 0.7733,
      "step": 636690
    },
    {
      "epoch": 2.2314747640426598,
      "grad_norm": 3.390625,
      "learning_rate": 1.4232030343388086e-05,
      "loss": 0.8067,
      "step": 636700
    },
    {
      "epoch": 2.2315098115495555,
      "grad_norm": 2.8125,
      "learning_rate": 1.4231381314724384e-05,
      "loss": 0.735,
      "step": 636710
    },
    {
      "epoch": 2.231544859056451,
      "grad_norm": 3.0625,
      "learning_rate": 1.4230732286060684e-05,
      "loss": 0.894,
      "step": 636720
    },
    {
      "epoch": 2.2315799065633466,
      "grad_norm": 2.921875,
      "learning_rate": 1.4230083257396982e-05,
      "loss": 0.7972,
      "step": 636730
    },
    {
      "epoch": 2.2316149540702424,
      "grad_norm": 3.03125,
      "learning_rate": 1.422943422873328e-05,
      "loss": 0.8042,
      "step": 636740
    },
    {
      "epoch": 2.2316500015771377,
      "grad_norm": 2.96875,
      "learning_rate": 1.4228785200069578e-05,
      "loss": 0.7836,
      "step": 636750
    },
    {
      "epoch": 2.2316850490840334,
      "grad_norm": 2.296875,
      "learning_rate": 1.4228136171405876e-05,
      "loss": 0.7689,
      "step": 636760
    },
    {
      "epoch": 2.231720096590929,
      "grad_norm": 2.96875,
      "learning_rate": 1.4227487142742174e-05,
      "loss": 0.8652,
      "step": 636770
    },
    {
      "epoch": 2.2317551440978245,
      "grad_norm": 3.078125,
      "learning_rate": 1.422683811407847e-05,
      "loss": 0.7904,
      "step": 636780
    },
    {
      "epoch": 2.2317901916047203,
      "grad_norm": 2.390625,
      "learning_rate": 1.4226189085414768e-05,
      "loss": 0.8234,
      "step": 636790
    },
    {
      "epoch": 2.2318252391116156,
      "grad_norm": 3.0625,
      "learning_rate": 1.4225540056751066e-05,
      "loss": 0.7232,
      "step": 636800
    },
    {
      "epoch": 2.2318602866185113,
      "grad_norm": 3.03125,
      "learning_rate": 1.4224891028087364e-05,
      "loss": 0.7803,
      "step": 636810
    },
    {
      "epoch": 2.231895334125407,
      "grad_norm": 3.09375,
      "learning_rate": 1.4224241999423662e-05,
      "loss": 0.8298,
      "step": 636820
    },
    {
      "epoch": 2.2319303816323024,
      "grad_norm": 3.171875,
      "learning_rate": 1.4223592970759962e-05,
      "loss": 0.9139,
      "step": 636830
    },
    {
      "epoch": 2.231965429139198,
      "grad_norm": 2.453125,
      "learning_rate": 1.422294394209626e-05,
      "loss": 0.7483,
      "step": 636840
    },
    {
      "epoch": 2.232000476646094,
      "grad_norm": 2.640625,
      "learning_rate": 1.4222294913432558e-05,
      "loss": 0.7341,
      "step": 636850
    },
    {
      "epoch": 2.2320355241529892,
      "grad_norm": 2.796875,
      "learning_rate": 1.4221645884768856e-05,
      "loss": 0.8837,
      "step": 636860
    },
    {
      "epoch": 2.232070571659885,
      "grad_norm": 2.78125,
      "learning_rate": 1.4220996856105154e-05,
      "loss": 0.8169,
      "step": 636870
    },
    {
      "epoch": 2.2321056191667807,
      "grad_norm": 2.609375,
      "learning_rate": 1.4220347827441452e-05,
      "loss": 0.7897,
      "step": 636880
    },
    {
      "epoch": 2.232140666673676,
      "grad_norm": 2.734375,
      "learning_rate": 1.421969879877775e-05,
      "loss": 0.8309,
      "step": 636890
    },
    {
      "epoch": 2.232175714180572,
      "grad_norm": 2.53125,
      "learning_rate": 1.421904977011405e-05,
      "loss": 0.7961,
      "step": 636900
    },
    {
      "epoch": 2.232210761687467,
      "grad_norm": 2.90625,
      "learning_rate": 1.4218400741450347e-05,
      "loss": 0.7856,
      "step": 636910
    },
    {
      "epoch": 2.232245809194363,
      "grad_norm": 2.8125,
      "learning_rate": 1.4217751712786645e-05,
      "loss": 0.7383,
      "step": 636920
    },
    {
      "epoch": 2.2322808567012586,
      "grad_norm": 3.15625,
      "learning_rate": 1.4217102684122943e-05,
      "loss": 0.8341,
      "step": 636930
    },
    {
      "epoch": 2.232315904208154,
      "grad_norm": 2.4375,
      "learning_rate": 1.4216453655459241e-05,
      "loss": 0.7633,
      "step": 636940
    },
    {
      "epoch": 2.2323509517150497,
      "grad_norm": 2.578125,
      "learning_rate": 1.421580462679554e-05,
      "loss": 0.8499,
      "step": 636950
    },
    {
      "epoch": 2.2323859992219455,
      "grad_norm": 2.3125,
      "learning_rate": 1.4215155598131837e-05,
      "loss": 0.851,
      "step": 636960
    },
    {
      "epoch": 2.232421046728841,
      "grad_norm": 2.859375,
      "learning_rate": 1.4214506569468134e-05,
      "loss": 0.8496,
      "step": 636970
    },
    {
      "epoch": 2.2324560942357365,
      "grad_norm": 2.890625,
      "learning_rate": 1.4213857540804432e-05,
      "loss": 0.7425,
      "step": 636980
    },
    {
      "epoch": 2.2324911417426323,
      "grad_norm": 2.984375,
      "learning_rate": 1.421320851214073e-05,
      "loss": 0.8784,
      "step": 636990
    },
    {
      "epoch": 2.2325261892495276,
      "grad_norm": 2.546875,
      "learning_rate": 1.4212559483477028e-05,
      "loss": 0.7702,
      "step": 637000
    },
    {
      "epoch": 2.2325612367564234,
      "grad_norm": 2.78125,
      "learning_rate": 1.4211910454813327e-05,
      "loss": 0.6789,
      "step": 637010
    },
    {
      "epoch": 2.232596284263319,
      "grad_norm": 3.40625,
      "learning_rate": 1.4211261426149625e-05,
      "loss": 0.8311,
      "step": 637020
    },
    {
      "epoch": 2.2326313317702144,
      "grad_norm": 3.078125,
      "learning_rate": 1.4210612397485923e-05,
      "loss": 0.8574,
      "step": 637030
    },
    {
      "epoch": 2.23266637927711,
      "grad_norm": 2.96875,
      "learning_rate": 1.4209963368822221e-05,
      "loss": 0.8191,
      "step": 637040
    },
    {
      "epoch": 2.2327014267840055,
      "grad_norm": 2.53125,
      "learning_rate": 1.420931434015852e-05,
      "loss": 0.8403,
      "step": 637050
    },
    {
      "epoch": 2.2327364742909013,
      "grad_norm": 2.953125,
      "learning_rate": 1.4208665311494817e-05,
      "loss": 0.763,
      "step": 637060
    },
    {
      "epoch": 2.232771521797797,
      "grad_norm": 3.03125,
      "learning_rate": 1.4208016282831115e-05,
      "loss": 0.8221,
      "step": 637070
    },
    {
      "epoch": 2.2328065693046923,
      "grad_norm": 3.328125,
      "learning_rate": 1.4207367254167415e-05,
      "loss": 0.7772,
      "step": 637080
    },
    {
      "epoch": 2.232841616811588,
      "grad_norm": 2.5,
      "learning_rate": 1.4206718225503713e-05,
      "loss": 0.8035,
      "step": 637090
    },
    {
      "epoch": 2.232876664318484,
      "grad_norm": 3.109375,
      "learning_rate": 1.4206069196840011e-05,
      "loss": 0.8294,
      "step": 637100
    },
    {
      "epoch": 2.232911711825379,
      "grad_norm": 3.0,
      "learning_rate": 1.4205420168176309e-05,
      "loss": 0.8127,
      "step": 637110
    },
    {
      "epoch": 2.232946759332275,
      "grad_norm": 2.625,
      "learning_rate": 1.4204771139512607e-05,
      "loss": 0.7748,
      "step": 637120
    },
    {
      "epoch": 2.2329818068391707,
      "grad_norm": 2.8125,
      "learning_rate": 1.4204122110848905e-05,
      "loss": 0.8787,
      "step": 637130
    },
    {
      "epoch": 2.233016854346066,
      "grad_norm": 2.8125,
      "learning_rate": 1.4203473082185203e-05,
      "loss": 0.7515,
      "step": 637140
    },
    {
      "epoch": 2.2330519018529618,
      "grad_norm": 3.0,
      "learning_rate": 1.42028240535215e-05,
      "loss": 0.8322,
      "step": 637150
    },
    {
      "epoch": 2.2330869493598575,
      "grad_norm": 3.375,
      "learning_rate": 1.4202175024857797e-05,
      "loss": 0.7405,
      "step": 637160
    },
    {
      "epoch": 2.233121996866753,
      "grad_norm": 2.734375,
      "learning_rate": 1.4201525996194095e-05,
      "loss": 0.8323,
      "step": 637170
    },
    {
      "epoch": 2.2331570443736486,
      "grad_norm": 2.828125,
      "learning_rate": 1.4200876967530393e-05,
      "loss": 0.783,
      "step": 637180
    },
    {
      "epoch": 2.233192091880544,
      "grad_norm": 3.1875,
      "learning_rate": 1.4200227938866691e-05,
      "loss": 0.8139,
      "step": 637190
    },
    {
      "epoch": 2.2332271393874397,
      "grad_norm": 3.0625,
      "learning_rate": 1.4199578910202991e-05,
      "loss": 0.825,
      "step": 637200
    },
    {
      "epoch": 2.2332621868943354,
      "grad_norm": 2.84375,
      "learning_rate": 1.4198929881539289e-05,
      "loss": 0.9555,
      "step": 637210
    },
    {
      "epoch": 2.2332972344012307,
      "grad_norm": 3.1875,
      "learning_rate": 1.4198280852875587e-05,
      "loss": 0.7805,
      "step": 637220
    },
    {
      "epoch": 2.2333322819081265,
      "grad_norm": 2.765625,
      "learning_rate": 1.4197631824211885e-05,
      "loss": 0.8166,
      "step": 637230
    },
    {
      "epoch": 2.2333673294150223,
      "grad_norm": 3.109375,
      "learning_rate": 1.4196982795548183e-05,
      "loss": 0.8449,
      "step": 637240
    },
    {
      "epoch": 2.2334023769219176,
      "grad_norm": 2.921875,
      "learning_rate": 1.4196333766884481e-05,
      "loss": 0.7452,
      "step": 637250
    },
    {
      "epoch": 2.2334374244288133,
      "grad_norm": 2.796875,
      "learning_rate": 1.4195684738220779e-05,
      "loss": 0.8293,
      "step": 637260
    },
    {
      "epoch": 2.233472471935709,
      "grad_norm": 3.21875,
      "learning_rate": 1.4195035709557079e-05,
      "loss": 0.8536,
      "step": 637270
    },
    {
      "epoch": 2.2335075194426044,
      "grad_norm": 2.6875,
      "learning_rate": 1.4194386680893377e-05,
      "loss": 0.8402,
      "step": 637280
    },
    {
      "epoch": 2.2335425669495,
      "grad_norm": 2.921875,
      "learning_rate": 1.4193737652229675e-05,
      "loss": 0.8506,
      "step": 637290
    },
    {
      "epoch": 2.2335776144563955,
      "grad_norm": 2.640625,
      "learning_rate": 1.4193088623565973e-05,
      "loss": 0.7711,
      "step": 637300
    },
    {
      "epoch": 2.2336126619632912,
      "grad_norm": 2.6875,
      "learning_rate": 1.419243959490227e-05,
      "loss": 0.7878,
      "step": 637310
    },
    {
      "epoch": 2.233647709470187,
      "grad_norm": 3.140625,
      "learning_rate": 1.4191790566238569e-05,
      "loss": 0.7868,
      "step": 637320
    },
    {
      "epoch": 2.2336827569770823,
      "grad_norm": 2.984375,
      "learning_rate": 1.4191141537574867e-05,
      "loss": 0.7272,
      "step": 637330
    },
    {
      "epoch": 2.233717804483978,
      "grad_norm": 3.046875,
      "learning_rate": 1.4190492508911163e-05,
      "loss": 0.7813,
      "step": 637340
    },
    {
      "epoch": 2.233752851990874,
      "grad_norm": 3.125,
      "learning_rate": 1.4189843480247461e-05,
      "loss": 0.8278,
      "step": 637350
    },
    {
      "epoch": 2.233787899497769,
      "grad_norm": 2.609375,
      "learning_rate": 1.4189194451583759e-05,
      "loss": 0.797,
      "step": 637360
    },
    {
      "epoch": 2.233822947004665,
      "grad_norm": 3.171875,
      "learning_rate": 1.4188545422920057e-05,
      "loss": 0.7311,
      "step": 637370
    },
    {
      "epoch": 2.2338579945115606,
      "grad_norm": 2.96875,
      "learning_rate": 1.4187896394256357e-05,
      "loss": 0.8414,
      "step": 637380
    },
    {
      "epoch": 2.233893042018456,
      "grad_norm": 2.734375,
      "learning_rate": 1.4187247365592655e-05,
      "loss": 0.8278,
      "step": 637390
    },
    {
      "epoch": 2.2339280895253517,
      "grad_norm": 2.953125,
      "learning_rate": 1.4186598336928953e-05,
      "loss": 0.8086,
      "step": 637400
    },
    {
      "epoch": 2.233963137032247,
      "grad_norm": 3.046875,
      "learning_rate": 1.418594930826525e-05,
      "loss": 0.7707,
      "step": 637410
    },
    {
      "epoch": 2.233998184539143,
      "grad_norm": 2.734375,
      "learning_rate": 1.4185300279601549e-05,
      "loss": 0.7556,
      "step": 637420
    },
    {
      "epoch": 2.2340332320460385,
      "grad_norm": 2.984375,
      "learning_rate": 1.4184651250937847e-05,
      "loss": 0.8728,
      "step": 637430
    },
    {
      "epoch": 2.234068279552934,
      "grad_norm": 2.8125,
      "learning_rate": 1.4184002222274145e-05,
      "loss": 0.7839,
      "step": 637440
    },
    {
      "epoch": 2.2341033270598296,
      "grad_norm": 2.546875,
      "learning_rate": 1.4183353193610444e-05,
      "loss": 0.8332,
      "step": 637450
    },
    {
      "epoch": 2.2341383745667254,
      "grad_norm": 3.03125,
      "learning_rate": 1.4182704164946742e-05,
      "loss": 0.7856,
      "step": 637460
    },
    {
      "epoch": 2.2341734220736207,
      "grad_norm": 2.546875,
      "learning_rate": 1.418205513628304e-05,
      "loss": 0.7721,
      "step": 637470
    },
    {
      "epoch": 2.2342084695805164,
      "grad_norm": 2.84375,
      "learning_rate": 1.4181406107619338e-05,
      "loss": 0.7969,
      "step": 637480
    },
    {
      "epoch": 2.234243517087412,
      "grad_norm": 3.234375,
      "learning_rate": 1.4180757078955636e-05,
      "loss": 0.8936,
      "step": 637490
    },
    {
      "epoch": 2.2342785645943075,
      "grad_norm": 2.765625,
      "learning_rate": 1.4180108050291934e-05,
      "loss": 0.8235,
      "step": 637500
    },
    {
      "epoch": 2.2343136121012033,
      "grad_norm": 3.015625,
      "learning_rate": 1.4179459021628232e-05,
      "loss": 0.7754,
      "step": 637510
    },
    {
      "epoch": 2.2343486596080986,
      "grad_norm": 2.703125,
      "learning_rate": 1.4178809992964532e-05,
      "loss": 0.8616,
      "step": 637520
    },
    {
      "epoch": 2.2343837071149943,
      "grad_norm": 2.96875,
      "learning_rate": 1.4178160964300827e-05,
      "loss": 0.8415,
      "step": 637530
    },
    {
      "epoch": 2.23441875462189,
      "grad_norm": 3.3125,
      "learning_rate": 1.4177511935637125e-05,
      "loss": 0.8644,
      "step": 637540
    },
    {
      "epoch": 2.2344538021287854,
      "grad_norm": 2.578125,
      "learning_rate": 1.4176862906973423e-05,
      "loss": 0.8306,
      "step": 637550
    },
    {
      "epoch": 2.234488849635681,
      "grad_norm": 2.640625,
      "learning_rate": 1.4176213878309722e-05,
      "loss": 0.8412,
      "step": 637560
    },
    {
      "epoch": 2.234523897142577,
      "grad_norm": 3.03125,
      "learning_rate": 1.417556484964602e-05,
      "loss": 0.8203,
      "step": 637570
    },
    {
      "epoch": 2.2345589446494722,
      "grad_norm": 2.90625,
      "learning_rate": 1.4174915820982318e-05,
      "loss": 0.8176,
      "step": 637580
    },
    {
      "epoch": 2.234593992156368,
      "grad_norm": 2.859375,
      "learning_rate": 1.4174266792318616e-05,
      "loss": 0.7775,
      "step": 637590
    },
    {
      "epoch": 2.2346290396632638,
      "grad_norm": 2.5625,
      "learning_rate": 1.4173617763654914e-05,
      "loss": 0.7833,
      "step": 637600
    },
    {
      "epoch": 2.234664087170159,
      "grad_norm": 3.109375,
      "learning_rate": 1.4172968734991212e-05,
      "loss": 0.8304,
      "step": 637610
    },
    {
      "epoch": 2.234699134677055,
      "grad_norm": 2.703125,
      "learning_rate": 1.417231970632751e-05,
      "loss": 0.7852,
      "step": 637620
    },
    {
      "epoch": 2.23473418218395,
      "grad_norm": 3.078125,
      "learning_rate": 1.417167067766381e-05,
      "loss": 0.7974,
      "step": 637630
    },
    {
      "epoch": 2.234769229690846,
      "grad_norm": 2.640625,
      "learning_rate": 1.4171021649000108e-05,
      "loss": 0.875,
      "step": 637640
    },
    {
      "epoch": 2.2348042771977417,
      "grad_norm": 3.09375,
      "learning_rate": 1.4170372620336406e-05,
      "loss": 0.8297,
      "step": 637650
    },
    {
      "epoch": 2.234839324704637,
      "grad_norm": 2.390625,
      "learning_rate": 1.4169723591672704e-05,
      "loss": 0.8427,
      "step": 637660
    },
    {
      "epoch": 2.2348743722115327,
      "grad_norm": 3.296875,
      "learning_rate": 1.4169074563009002e-05,
      "loss": 0.8135,
      "step": 637670
    },
    {
      "epoch": 2.2349094197184285,
      "grad_norm": 2.734375,
      "learning_rate": 1.41684255343453e-05,
      "loss": 0.8626,
      "step": 637680
    },
    {
      "epoch": 2.234944467225324,
      "grad_norm": 3.109375,
      "learning_rate": 1.4167776505681598e-05,
      "loss": 0.803,
      "step": 637690
    },
    {
      "epoch": 2.2349795147322196,
      "grad_norm": 2.796875,
      "learning_rate": 1.4167127477017898e-05,
      "loss": 0.8408,
      "step": 637700
    },
    {
      "epoch": 2.2350145622391153,
      "grad_norm": 2.5625,
      "learning_rate": 1.4166478448354196e-05,
      "loss": 0.824,
      "step": 637710
    },
    {
      "epoch": 2.2350496097460106,
      "grad_norm": 2.78125,
      "learning_rate": 1.416582941969049e-05,
      "loss": 0.8134,
      "step": 637720
    },
    {
      "epoch": 2.2350846572529064,
      "grad_norm": 2.90625,
      "learning_rate": 1.4165180391026788e-05,
      "loss": 0.9058,
      "step": 637730
    },
    {
      "epoch": 2.2351197047598017,
      "grad_norm": 3.359375,
      "learning_rate": 1.4164531362363086e-05,
      "loss": 0.8586,
      "step": 637740
    },
    {
      "epoch": 2.2351547522666975,
      "grad_norm": 3.125,
      "learning_rate": 1.4163882333699386e-05,
      "loss": 0.8711,
      "step": 637750
    },
    {
      "epoch": 2.2351897997735932,
      "grad_norm": 3.0625,
      "learning_rate": 1.4163233305035684e-05,
      "loss": 0.8553,
      "step": 637760
    },
    {
      "epoch": 2.2352248472804885,
      "grad_norm": 2.875,
      "learning_rate": 1.4162584276371982e-05,
      "loss": 0.8814,
      "step": 637770
    },
    {
      "epoch": 2.2352598947873843,
      "grad_norm": 2.421875,
      "learning_rate": 1.416193524770828e-05,
      "loss": 0.7938,
      "step": 637780
    },
    {
      "epoch": 2.23529494229428,
      "grad_norm": 2.453125,
      "learning_rate": 1.4161286219044578e-05,
      "loss": 0.8288,
      "step": 637790
    },
    {
      "epoch": 2.2353299898011754,
      "grad_norm": 2.703125,
      "learning_rate": 1.4160637190380876e-05,
      "loss": 0.8773,
      "step": 637800
    },
    {
      "epoch": 2.235365037308071,
      "grad_norm": 2.59375,
      "learning_rate": 1.4159988161717174e-05,
      "loss": 0.7342,
      "step": 637810
    },
    {
      "epoch": 2.235400084814967,
      "grad_norm": 2.984375,
      "learning_rate": 1.4159339133053474e-05,
      "loss": 0.7814,
      "step": 637820
    },
    {
      "epoch": 2.235435132321862,
      "grad_norm": 2.796875,
      "learning_rate": 1.4158690104389772e-05,
      "loss": 0.874,
      "step": 637830
    },
    {
      "epoch": 2.235470179828758,
      "grad_norm": 2.71875,
      "learning_rate": 1.415804107572607e-05,
      "loss": 0.8523,
      "step": 637840
    },
    {
      "epoch": 2.2355052273356533,
      "grad_norm": 2.796875,
      "learning_rate": 1.4157392047062368e-05,
      "loss": 0.8158,
      "step": 637850
    },
    {
      "epoch": 2.235540274842549,
      "grad_norm": 2.671875,
      "learning_rate": 1.4156743018398666e-05,
      "loss": 0.8368,
      "step": 637860
    },
    {
      "epoch": 2.235575322349445,
      "grad_norm": 2.796875,
      "learning_rate": 1.4156093989734964e-05,
      "loss": 0.7953,
      "step": 637870
    },
    {
      "epoch": 2.23561036985634,
      "grad_norm": 2.828125,
      "learning_rate": 1.4155444961071262e-05,
      "loss": 0.8094,
      "step": 637880
    },
    {
      "epoch": 2.235645417363236,
      "grad_norm": 2.953125,
      "learning_rate": 1.4154795932407561e-05,
      "loss": 0.7649,
      "step": 637890
    },
    {
      "epoch": 2.2356804648701316,
      "grad_norm": 3.46875,
      "learning_rate": 1.415414690374386e-05,
      "loss": 0.8153,
      "step": 637900
    },
    {
      "epoch": 2.235715512377027,
      "grad_norm": 2.890625,
      "learning_rate": 1.4153497875080154e-05,
      "loss": 0.7833,
      "step": 637910
    },
    {
      "epoch": 2.2357505598839227,
      "grad_norm": 2.875,
      "learning_rate": 1.4152848846416452e-05,
      "loss": 0.7795,
      "step": 637920
    },
    {
      "epoch": 2.2357856073908184,
      "grad_norm": 2.890625,
      "learning_rate": 1.4152199817752752e-05,
      "loss": 0.892,
      "step": 637930
    },
    {
      "epoch": 2.2358206548977138,
      "grad_norm": 3.328125,
      "learning_rate": 1.415155078908905e-05,
      "loss": 0.958,
      "step": 637940
    },
    {
      "epoch": 2.2358557024046095,
      "grad_norm": 3.046875,
      "learning_rate": 1.4150901760425348e-05,
      "loss": 0.7679,
      "step": 637950
    },
    {
      "epoch": 2.235890749911505,
      "grad_norm": 2.421875,
      "learning_rate": 1.4150252731761646e-05,
      "loss": 0.8172,
      "step": 637960
    },
    {
      "epoch": 2.2359257974184006,
      "grad_norm": 2.53125,
      "learning_rate": 1.4149603703097944e-05,
      "loss": 0.724,
      "step": 637970
    },
    {
      "epoch": 2.2359608449252963,
      "grad_norm": 3.03125,
      "learning_rate": 1.4148954674434242e-05,
      "loss": 0.8151,
      "step": 637980
    },
    {
      "epoch": 2.2359958924321917,
      "grad_norm": 2.96875,
      "learning_rate": 1.414830564577054e-05,
      "loss": 0.7532,
      "step": 637990
    },
    {
      "epoch": 2.2360309399390874,
      "grad_norm": 2.921875,
      "learning_rate": 1.414765661710684e-05,
      "loss": 0.7792,
      "step": 638000
    },
    {
      "epoch": 2.236065987445983,
      "grad_norm": 2.859375,
      "learning_rate": 1.4147007588443137e-05,
      "loss": 0.7612,
      "step": 638010
    },
    {
      "epoch": 2.2361010349528785,
      "grad_norm": 3.234375,
      "learning_rate": 1.4146358559779435e-05,
      "loss": 0.833,
      "step": 638020
    },
    {
      "epoch": 2.2361360824597742,
      "grad_norm": 3.171875,
      "learning_rate": 1.4145709531115733e-05,
      "loss": 0.7144,
      "step": 638030
    },
    {
      "epoch": 2.23617112996667,
      "grad_norm": 3.109375,
      "learning_rate": 1.4145060502452031e-05,
      "loss": 0.8202,
      "step": 638040
    },
    {
      "epoch": 2.2362061774735653,
      "grad_norm": 2.9375,
      "learning_rate": 1.414441147378833e-05,
      "loss": 0.7498,
      "step": 638050
    },
    {
      "epoch": 2.236241224980461,
      "grad_norm": 2.71875,
      "learning_rate": 1.4143762445124627e-05,
      "loss": 0.8992,
      "step": 638060
    },
    {
      "epoch": 2.2362762724873564,
      "grad_norm": 3.046875,
      "learning_rate": 1.4143113416460927e-05,
      "loss": 0.9334,
      "step": 638070
    },
    {
      "epoch": 2.236311319994252,
      "grad_norm": 2.984375,
      "learning_rate": 1.4142464387797225e-05,
      "loss": 0.77,
      "step": 638080
    },
    {
      "epoch": 2.236346367501148,
      "grad_norm": 3.078125,
      "learning_rate": 1.414181535913352e-05,
      "loss": 0.8719,
      "step": 638090
    },
    {
      "epoch": 2.236381415008043,
      "grad_norm": 2.984375,
      "learning_rate": 1.4141166330469818e-05,
      "loss": 0.8012,
      "step": 638100
    },
    {
      "epoch": 2.236416462514939,
      "grad_norm": 2.53125,
      "learning_rate": 1.4140517301806117e-05,
      "loss": 0.8257,
      "step": 638110
    },
    {
      "epoch": 2.2364515100218347,
      "grad_norm": 2.734375,
      "learning_rate": 1.4139868273142415e-05,
      "loss": 0.7874,
      "step": 638120
    },
    {
      "epoch": 2.23648655752873,
      "grad_norm": 3.140625,
      "learning_rate": 1.4139219244478713e-05,
      "loss": 0.8231,
      "step": 638130
    },
    {
      "epoch": 2.236521605035626,
      "grad_norm": 2.875,
      "learning_rate": 1.4138570215815011e-05,
      "loss": 0.8094,
      "step": 638140
    },
    {
      "epoch": 2.2365566525425216,
      "grad_norm": 3.09375,
      "learning_rate": 1.413792118715131e-05,
      "loss": 0.7984,
      "step": 638150
    },
    {
      "epoch": 2.236591700049417,
      "grad_norm": 2.609375,
      "learning_rate": 1.4137272158487607e-05,
      "loss": 0.7161,
      "step": 638160
    },
    {
      "epoch": 2.2366267475563126,
      "grad_norm": 2.84375,
      "learning_rate": 1.4136623129823905e-05,
      "loss": 0.84,
      "step": 638170
    },
    {
      "epoch": 2.236661795063208,
      "grad_norm": 3.015625,
      "learning_rate": 1.4135974101160205e-05,
      "loss": 0.8554,
      "step": 638180
    },
    {
      "epoch": 2.2366968425701037,
      "grad_norm": 3.21875,
      "learning_rate": 1.4135325072496503e-05,
      "loss": 0.8689,
      "step": 638190
    },
    {
      "epoch": 2.2367318900769995,
      "grad_norm": 3.25,
      "learning_rate": 1.4134676043832801e-05,
      "loss": 0.8362,
      "step": 638200
    },
    {
      "epoch": 2.2367669375838948,
      "grad_norm": 2.375,
      "learning_rate": 1.4134027015169099e-05,
      "loss": 0.7347,
      "step": 638210
    },
    {
      "epoch": 2.2368019850907905,
      "grad_norm": 2.734375,
      "learning_rate": 1.4133377986505397e-05,
      "loss": 0.79,
      "step": 638220
    },
    {
      "epoch": 2.2368370325976863,
      "grad_norm": 3.21875,
      "learning_rate": 1.4132728957841695e-05,
      "loss": 0.9076,
      "step": 638230
    },
    {
      "epoch": 2.2368720801045816,
      "grad_norm": 2.96875,
      "learning_rate": 1.4132079929177993e-05,
      "loss": 0.7537,
      "step": 638240
    },
    {
      "epoch": 2.2369071276114774,
      "grad_norm": 3.03125,
      "learning_rate": 1.4131430900514293e-05,
      "loss": 0.8382,
      "step": 638250
    },
    {
      "epoch": 2.236942175118373,
      "grad_norm": 2.46875,
      "learning_rate": 1.413078187185059e-05,
      "loss": 0.8035,
      "step": 638260
    },
    {
      "epoch": 2.2369772226252684,
      "grad_norm": 2.65625,
      "learning_rate": 1.4130132843186889e-05,
      "loss": 0.8043,
      "step": 638270
    },
    {
      "epoch": 2.237012270132164,
      "grad_norm": 3.421875,
      "learning_rate": 1.4129483814523183e-05,
      "loss": 0.8246,
      "step": 638280
    },
    {
      "epoch": 2.23704731763906,
      "grad_norm": 3.015625,
      "learning_rate": 1.4128834785859481e-05,
      "loss": 0.8311,
      "step": 638290
    },
    {
      "epoch": 2.2370823651459553,
      "grad_norm": 3.015625,
      "learning_rate": 1.4128185757195781e-05,
      "loss": 0.8178,
      "step": 638300
    },
    {
      "epoch": 2.237117412652851,
      "grad_norm": 2.875,
      "learning_rate": 1.4127536728532079e-05,
      "loss": 0.8134,
      "step": 638310
    },
    {
      "epoch": 2.2371524601597463,
      "grad_norm": 2.828125,
      "learning_rate": 1.4126887699868377e-05,
      "loss": 0.9061,
      "step": 638320
    },
    {
      "epoch": 2.237187507666642,
      "grad_norm": 3.4375,
      "learning_rate": 1.4126238671204675e-05,
      "loss": 0.8234,
      "step": 638330
    },
    {
      "epoch": 2.237222555173538,
      "grad_norm": 2.828125,
      "learning_rate": 1.4125589642540973e-05,
      "loss": 0.7673,
      "step": 638340
    },
    {
      "epoch": 2.237257602680433,
      "grad_norm": 3.046875,
      "learning_rate": 1.4124940613877271e-05,
      "loss": 0.8189,
      "step": 638350
    },
    {
      "epoch": 2.237292650187329,
      "grad_norm": 3.0,
      "learning_rate": 1.4124291585213569e-05,
      "loss": 0.8585,
      "step": 638360
    },
    {
      "epoch": 2.2373276976942247,
      "grad_norm": 2.953125,
      "learning_rate": 1.4123642556549869e-05,
      "loss": 0.8185,
      "step": 638370
    },
    {
      "epoch": 2.23736274520112,
      "grad_norm": 2.796875,
      "learning_rate": 1.4122993527886167e-05,
      "loss": 0.8124,
      "step": 638380
    },
    {
      "epoch": 2.2373977927080158,
      "grad_norm": 2.765625,
      "learning_rate": 1.4122344499222465e-05,
      "loss": 0.8817,
      "step": 638390
    },
    {
      "epoch": 2.2374328402149115,
      "grad_norm": 3.171875,
      "learning_rate": 1.4121695470558763e-05,
      "loss": 0.7771,
      "step": 638400
    },
    {
      "epoch": 2.237467887721807,
      "grad_norm": 3.125,
      "learning_rate": 1.412104644189506e-05,
      "loss": 0.7492,
      "step": 638410
    },
    {
      "epoch": 2.2375029352287026,
      "grad_norm": 2.65625,
      "learning_rate": 1.4120397413231359e-05,
      "loss": 0.7895,
      "step": 638420
    },
    {
      "epoch": 2.237537982735598,
      "grad_norm": 3.09375,
      "learning_rate": 1.4119748384567657e-05,
      "loss": 0.7955,
      "step": 638430
    },
    {
      "epoch": 2.2375730302424937,
      "grad_norm": 2.765625,
      "learning_rate": 1.4119099355903956e-05,
      "loss": 0.812,
      "step": 638440
    },
    {
      "epoch": 2.2376080777493894,
      "grad_norm": 3.21875,
      "learning_rate": 1.4118450327240254e-05,
      "loss": 0.8861,
      "step": 638450
    },
    {
      "epoch": 2.2376431252562847,
      "grad_norm": 3.015625,
      "learning_rate": 1.4117801298576552e-05,
      "loss": 0.8686,
      "step": 638460
    },
    {
      "epoch": 2.2376781727631805,
      "grad_norm": 3.515625,
      "learning_rate": 1.4117152269912847e-05,
      "loss": 0.7445,
      "step": 638470
    },
    {
      "epoch": 2.2377132202700762,
      "grad_norm": 3.03125,
      "learning_rate": 1.4116503241249147e-05,
      "loss": 0.8654,
      "step": 638480
    },
    {
      "epoch": 2.2377482677769716,
      "grad_norm": 2.890625,
      "learning_rate": 1.4115854212585445e-05,
      "loss": 0.8691,
      "step": 638490
    },
    {
      "epoch": 2.2377833152838673,
      "grad_norm": 3.03125,
      "learning_rate": 1.4115205183921743e-05,
      "loss": 0.7953,
      "step": 638500
    },
    {
      "epoch": 2.237818362790763,
      "grad_norm": 2.8125,
      "learning_rate": 1.411455615525804e-05,
      "loss": 0.7881,
      "step": 638510
    },
    {
      "epoch": 2.2378534102976584,
      "grad_norm": 3.1875,
      "learning_rate": 1.4113907126594339e-05,
      "loss": 0.7967,
      "step": 638520
    },
    {
      "epoch": 2.237888457804554,
      "grad_norm": 2.953125,
      "learning_rate": 1.4113258097930637e-05,
      "loss": 0.8231,
      "step": 638530
    },
    {
      "epoch": 2.23792350531145,
      "grad_norm": 2.5625,
      "learning_rate": 1.4112609069266935e-05,
      "loss": 0.8088,
      "step": 638540
    },
    {
      "epoch": 2.237958552818345,
      "grad_norm": 2.71875,
      "learning_rate": 1.4111960040603234e-05,
      "loss": 0.8301,
      "step": 638550
    },
    {
      "epoch": 2.237993600325241,
      "grad_norm": 2.625,
      "learning_rate": 1.4111311011939532e-05,
      "loss": 0.7781,
      "step": 638560
    },
    {
      "epoch": 2.2380286478321363,
      "grad_norm": 2.90625,
      "learning_rate": 1.411066198327583e-05,
      "loss": 0.7814,
      "step": 638570
    },
    {
      "epoch": 2.238063695339032,
      "grad_norm": 3.171875,
      "learning_rate": 1.4110012954612128e-05,
      "loss": 0.8961,
      "step": 638580
    },
    {
      "epoch": 2.238098742845928,
      "grad_norm": 3.125,
      "learning_rate": 1.4109363925948426e-05,
      "loss": 0.763,
      "step": 638590
    },
    {
      "epoch": 2.238133790352823,
      "grad_norm": 3.265625,
      "learning_rate": 1.4108714897284724e-05,
      "loss": 0.7855,
      "step": 638600
    },
    {
      "epoch": 2.238168837859719,
      "grad_norm": 3.421875,
      "learning_rate": 1.4108065868621022e-05,
      "loss": 0.8221,
      "step": 638610
    },
    {
      "epoch": 2.2382038853666146,
      "grad_norm": 3.15625,
      "learning_rate": 1.4107416839957322e-05,
      "loss": 0.8304,
      "step": 638620
    },
    {
      "epoch": 2.23823893287351,
      "grad_norm": 2.578125,
      "learning_rate": 1.410676781129362e-05,
      "loss": 0.8019,
      "step": 638630
    },
    {
      "epoch": 2.2382739803804057,
      "grad_norm": 2.640625,
      "learning_rate": 1.4106118782629918e-05,
      "loss": 0.7634,
      "step": 638640
    },
    {
      "epoch": 2.2383090278873015,
      "grad_norm": 3.140625,
      "learning_rate": 1.4105469753966216e-05,
      "loss": 0.7676,
      "step": 638650
    },
    {
      "epoch": 2.2383440753941968,
      "grad_norm": 3.3125,
      "learning_rate": 1.4104820725302512e-05,
      "loss": 0.8229,
      "step": 638660
    },
    {
      "epoch": 2.2383791229010925,
      "grad_norm": 3.015625,
      "learning_rate": 1.410417169663881e-05,
      "loss": 0.8223,
      "step": 638670
    },
    {
      "epoch": 2.238414170407988,
      "grad_norm": 2.09375,
      "learning_rate": 1.4103522667975108e-05,
      "loss": 0.646,
      "step": 638680
    },
    {
      "epoch": 2.2384492179148836,
      "grad_norm": 3.0,
      "learning_rate": 1.4102873639311406e-05,
      "loss": 0.8568,
      "step": 638690
    },
    {
      "epoch": 2.2384842654217794,
      "grad_norm": 3.015625,
      "learning_rate": 1.4102224610647704e-05,
      "loss": 0.849,
      "step": 638700
    },
    {
      "epoch": 2.2385193129286747,
      "grad_norm": 2.671875,
      "learning_rate": 1.4101575581984002e-05,
      "loss": 0.7866,
      "step": 638710
    },
    {
      "epoch": 2.2385543604355704,
      "grad_norm": 2.9375,
      "learning_rate": 1.41009265533203e-05,
      "loss": 0.7904,
      "step": 638720
    },
    {
      "epoch": 2.238589407942466,
      "grad_norm": 3.296875,
      "learning_rate": 1.41002775246566e-05,
      "loss": 0.7956,
      "step": 638730
    },
    {
      "epoch": 2.2386244554493615,
      "grad_norm": 2.9375,
      "learning_rate": 1.4099628495992898e-05,
      "loss": 0.832,
      "step": 638740
    },
    {
      "epoch": 2.2386595029562573,
      "grad_norm": 2.703125,
      "learning_rate": 1.4098979467329196e-05,
      "loss": 0.8795,
      "step": 638750
    },
    {
      "epoch": 2.238694550463153,
      "grad_norm": 2.859375,
      "learning_rate": 1.4098330438665494e-05,
      "loss": 0.7866,
      "step": 638760
    },
    {
      "epoch": 2.2387295979700483,
      "grad_norm": 2.671875,
      "learning_rate": 1.4097681410001792e-05,
      "loss": 0.8021,
      "step": 638770
    },
    {
      "epoch": 2.238764645476944,
      "grad_norm": 3.265625,
      "learning_rate": 1.409703238133809e-05,
      "loss": 0.7959,
      "step": 638780
    },
    {
      "epoch": 2.2387996929838394,
      "grad_norm": 2.84375,
      "learning_rate": 1.4096383352674388e-05,
      "loss": 0.7641,
      "step": 638790
    },
    {
      "epoch": 2.238834740490735,
      "grad_norm": 3.0625,
      "learning_rate": 1.4095734324010688e-05,
      "loss": 0.7572,
      "step": 638800
    },
    {
      "epoch": 2.238869787997631,
      "grad_norm": 2.90625,
      "learning_rate": 1.4095085295346986e-05,
      "loss": 0.9559,
      "step": 638810
    },
    {
      "epoch": 2.2389048355045262,
      "grad_norm": 2.734375,
      "learning_rate": 1.4094436266683284e-05,
      "loss": 0.817,
      "step": 638820
    },
    {
      "epoch": 2.238939883011422,
      "grad_norm": 2.59375,
      "learning_rate": 1.4093787238019582e-05,
      "loss": 0.9066,
      "step": 638830
    },
    {
      "epoch": 2.2389749305183178,
      "grad_norm": 2.75,
      "learning_rate": 1.409313820935588e-05,
      "loss": 0.831,
      "step": 638840
    },
    {
      "epoch": 2.239009978025213,
      "grad_norm": 2.6875,
      "learning_rate": 1.4092489180692176e-05,
      "loss": 0.7526,
      "step": 638850
    },
    {
      "epoch": 2.239045025532109,
      "grad_norm": 2.921875,
      "learning_rate": 1.4091840152028474e-05,
      "loss": 0.8883,
      "step": 638860
    },
    {
      "epoch": 2.2390800730390046,
      "grad_norm": 3.34375,
      "learning_rate": 1.4091191123364772e-05,
      "loss": 0.8454,
      "step": 638870
    },
    {
      "epoch": 2.2391151205459,
      "grad_norm": 2.9375,
      "learning_rate": 1.409054209470107e-05,
      "loss": 0.7849,
      "step": 638880
    },
    {
      "epoch": 2.2391501680527957,
      "grad_norm": 2.515625,
      "learning_rate": 1.4089893066037368e-05,
      "loss": 0.8115,
      "step": 638890
    },
    {
      "epoch": 2.239185215559691,
      "grad_norm": 2.78125,
      "learning_rate": 1.4089244037373666e-05,
      "loss": 0.7578,
      "step": 638900
    },
    {
      "epoch": 2.2392202630665867,
      "grad_norm": 2.828125,
      "learning_rate": 1.4088595008709964e-05,
      "loss": 0.7527,
      "step": 638910
    },
    {
      "epoch": 2.2392553105734825,
      "grad_norm": 2.890625,
      "learning_rate": 1.4087945980046264e-05,
      "loss": 0.7786,
      "step": 638920
    },
    {
      "epoch": 2.239290358080378,
      "grad_norm": 3.296875,
      "learning_rate": 1.4087296951382562e-05,
      "loss": 0.8702,
      "step": 638930
    },
    {
      "epoch": 2.2393254055872736,
      "grad_norm": 2.734375,
      "learning_rate": 1.408664792271886e-05,
      "loss": 0.7843,
      "step": 638940
    },
    {
      "epoch": 2.2393604530941693,
      "grad_norm": 3.0625,
      "learning_rate": 1.4085998894055158e-05,
      "loss": 0.8741,
      "step": 638950
    },
    {
      "epoch": 2.2393955006010646,
      "grad_norm": 3.140625,
      "learning_rate": 1.4085349865391456e-05,
      "loss": 0.8371,
      "step": 638960
    },
    {
      "epoch": 2.2394305481079604,
      "grad_norm": 3.34375,
      "learning_rate": 1.4084700836727754e-05,
      "loss": 0.8833,
      "step": 638970
    },
    {
      "epoch": 2.239465595614856,
      "grad_norm": 2.765625,
      "learning_rate": 1.4084051808064052e-05,
      "loss": 0.7843,
      "step": 638980
    },
    {
      "epoch": 2.2395006431217515,
      "grad_norm": 2.859375,
      "learning_rate": 1.4083402779400352e-05,
      "loss": 0.8747,
      "step": 638990
    },
    {
      "epoch": 2.239535690628647,
      "grad_norm": 2.78125,
      "learning_rate": 1.408275375073665e-05,
      "loss": 0.7395,
      "step": 639000
    },
    {
      "epoch": 2.2395707381355425,
      "grad_norm": 3.046875,
      "learning_rate": 1.4082104722072948e-05,
      "loss": 0.955,
      "step": 639010
    },
    {
      "epoch": 2.2396057856424383,
      "grad_norm": 3.203125,
      "learning_rate": 1.4081455693409246e-05,
      "loss": 0.8057,
      "step": 639020
    },
    {
      "epoch": 2.239640833149334,
      "grad_norm": 2.875,
      "learning_rate": 1.4080806664745542e-05,
      "loss": 0.8485,
      "step": 639030
    },
    {
      "epoch": 2.2396758806562294,
      "grad_norm": 2.984375,
      "learning_rate": 1.408015763608184e-05,
      "loss": 0.8397,
      "step": 639040
    },
    {
      "epoch": 2.239710928163125,
      "grad_norm": 2.609375,
      "learning_rate": 1.4079508607418138e-05,
      "loss": 0.8138,
      "step": 639050
    },
    {
      "epoch": 2.239745975670021,
      "grad_norm": 2.71875,
      "learning_rate": 1.4078859578754436e-05,
      "loss": 0.874,
      "step": 639060
    },
    {
      "epoch": 2.239781023176916,
      "grad_norm": 3.0,
      "learning_rate": 1.4078210550090734e-05,
      "loss": 0.8177,
      "step": 639070
    },
    {
      "epoch": 2.239816070683812,
      "grad_norm": 2.984375,
      "learning_rate": 1.4077561521427032e-05,
      "loss": 0.7945,
      "step": 639080
    },
    {
      "epoch": 2.2398511181907077,
      "grad_norm": 2.734375,
      "learning_rate": 1.407691249276333e-05,
      "loss": 0.8672,
      "step": 639090
    },
    {
      "epoch": 2.239886165697603,
      "grad_norm": 3.328125,
      "learning_rate": 1.407626346409963e-05,
      "loss": 0.8718,
      "step": 639100
    },
    {
      "epoch": 2.2399212132044988,
      "grad_norm": 2.796875,
      "learning_rate": 1.4075614435435928e-05,
      "loss": 0.7989,
      "step": 639110
    },
    {
      "epoch": 2.239956260711394,
      "grad_norm": 3.28125,
      "learning_rate": 1.4074965406772226e-05,
      "loss": 0.9139,
      "step": 639120
    },
    {
      "epoch": 2.23999130821829,
      "grad_norm": 3.234375,
      "learning_rate": 1.4074316378108524e-05,
      "loss": 0.7928,
      "step": 639130
    },
    {
      "epoch": 2.2400263557251856,
      "grad_norm": 2.421875,
      "learning_rate": 1.4073667349444822e-05,
      "loss": 0.7708,
      "step": 639140
    },
    {
      "epoch": 2.240061403232081,
      "grad_norm": 3.109375,
      "learning_rate": 1.407301832078112e-05,
      "loss": 0.7899,
      "step": 639150
    },
    {
      "epoch": 2.2400964507389767,
      "grad_norm": 2.921875,
      "learning_rate": 1.4072369292117418e-05,
      "loss": 0.8185,
      "step": 639160
    },
    {
      "epoch": 2.2401314982458724,
      "grad_norm": 3.0,
      "learning_rate": 1.4071720263453717e-05,
      "loss": 0.7395,
      "step": 639170
    },
    {
      "epoch": 2.2401665457527677,
      "grad_norm": 2.796875,
      "learning_rate": 1.4071071234790015e-05,
      "loss": 0.8659,
      "step": 639180
    },
    {
      "epoch": 2.2402015932596635,
      "grad_norm": 2.75,
      "learning_rate": 1.4070422206126313e-05,
      "loss": 0.8437,
      "step": 639190
    },
    {
      "epoch": 2.2402366407665593,
      "grad_norm": 3.6875,
      "learning_rate": 1.4069773177462611e-05,
      "loss": 0.7562,
      "step": 639200
    },
    {
      "epoch": 2.2402716882734546,
      "grad_norm": 2.8125,
      "learning_rate": 1.406912414879891e-05,
      "loss": 0.7407,
      "step": 639210
    },
    {
      "epoch": 2.2403067357803503,
      "grad_norm": 2.484375,
      "learning_rate": 1.4068475120135206e-05,
      "loss": 0.7734,
      "step": 639220
    },
    {
      "epoch": 2.2403417832872456,
      "grad_norm": 3.25,
      "learning_rate": 1.4067826091471504e-05,
      "loss": 0.7795,
      "step": 639230
    },
    {
      "epoch": 2.2403768307941414,
      "grad_norm": 3.25,
      "learning_rate": 1.4067177062807802e-05,
      "loss": 0.7882,
      "step": 639240
    },
    {
      "epoch": 2.240411878301037,
      "grad_norm": 3.0625,
      "learning_rate": 1.40665280341441e-05,
      "loss": 0.7593,
      "step": 639250
    },
    {
      "epoch": 2.2404469258079325,
      "grad_norm": 3.015625,
      "learning_rate": 1.4065879005480398e-05,
      "loss": 0.8665,
      "step": 639260
    },
    {
      "epoch": 2.2404819733148282,
      "grad_norm": 2.96875,
      "learning_rate": 1.4065229976816696e-05,
      "loss": 0.8797,
      "step": 639270
    },
    {
      "epoch": 2.240517020821724,
      "grad_norm": 2.953125,
      "learning_rate": 1.4064580948152995e-05,
      "loss": 0.8177,
      "step": 639280
    },
    {
      "epoch": 2.2405520683286193,
      "grad_norm": 2.84375,
      "learning_rate": 1.4063931919489293e-05,
      "loss": 0.7587,
      "step": 639290
    },
    {
      "epoch": 2.240587115835515,
      "grad_norm": 2.90625,
      "learning_rate": 1.4063282890825591e-05,
      "loss": 0.8422,
      "step": 639300
    },
    {
      "epoch": 2.240622163342411,
      "grad_norm": 2.875,
      "learning_rate": 1.406263386216189e-05,
      "loss": 0.7953,
      "step": 639310
    },
    {
      "epoch": 2.240657210849306,
      "grad_norm": 2.6875,
      "learning_rate": 1.4061984833498187e-05,
      "loss": 0.7761,
      "step": 639320
    },
    {
      "epoch": 2.240692258356202,
      "grad_norm": 2.75,
      "learning_rate": 1.4061335804834485e-05,
      "loss": 0.838,
      "step": 639330
    },
    {
      "epoch": 2.240727305863097,
      "grad_norm": 3.0,
      "learning_rate": 1.4060686776170783e-05,
      "loss": 0.8439,
      "step": 639340
    },
    {
      "epoch": 2.240762353369993,
      "grad_norm": 3.1875,
      "learning_rate": 1.4060037747507083e-05,
      "loss": 0.7856,
      "step": 639350
    },
    {
      "epoch": 2.2407974008768887,
      "grad_norm": 2.765625,
      "learning_rate": 1.4059388718843381e-05,
      "loss": 0.8314,
      "step": 639360
    },
    {
      "epoch": 2.240832448383784,
      "grad_norm": 2.984375,
      "learning_rate": 1.4058739690179679e-05,
      "loss": 0.8344,
      "step": 639370
    },
    {
      "epoch": 2.24086749589068,
      "grad_norm": 2.40625,
      "learning_rate": 1.4058090661515977e-05,
      "loss": 0.7508,
      "step": 639380
    },
    {
      "epoch": 2.2409025433975756,
      "grad_norm": 3.671875,
      "learning_rate": 1.4057441632852275e-05,
      "loss": 0.8189,
      "step": 639390
    },
    {
      "epoch": 2.240937590904471,
      "grad_norm": 2.875,
      "learning_rate": 1.4056792604188573e-05,
      "loss": 0.7568,
      "step": 639400
    },
    {
      "epoch": 2.2409726384113666,
      "grad_norm": 2.796875,
      "learning_rate": 1.405614357552487e-05,
      "loss": 0.7951,
      "step": 639410
    },
    {
      "epoch": 2.2410076859182624,
      "grad_norm": 3.109375,
      "learning_rate": 1.4055494546861167e-05,
      "loss": 0.8146,
      "step": 639420
    },
    {
      "epoch": 2.2410427334251577,
      "grad_norm": 2.8125,
      "learning_rate": 1.4054845518197465e-05,
      "loss": 0.7869,
      "step": 639430
    },
    {
      "epoch": 2.2410777809320535,
      "grad_norm": 2.921875,
      "learning_rate": 1.4054196489533763e-05,
      "loss": 0.7715,
      "step": 639440
    },
    {
      "epoch": 2.2411128284389488,
      "grad_norm": 2.390625,
      "learning_rate": 1.4053547460870061e-05,
      "loss": 0.8065,
      "step": 639450
    },
    {
      "epoch": 2.2411478759458445,
      "grad_norm": 2.953125,
      "learning_rate": 1.405289843220636e-05,
      "loss": 0.7524,
      "step": 639460
    },
    {
      "epoch": 2.2411829234527403,
      "grad_norm": 2.796875,
      "learning_rate": 1.4052249403542659e-05,
      "loss": 0.8469,
      "step": 639470
    },
    {
      "epoch": 2.2412179709596356,
      "grad_norm": 2.5,
      "learning_rate": 1.4051600374878957e-05,
      "loss": 0.7841,
      "step": 639480
    },
    {
      "epoch": 2.2412530184665314,
      "grad_norm": 2.796875,
      "learning_rate": 1.4050951346215255e-05,
      "loss": 0.8309,
      "step": 639490
    },
    {
      "epoch": 2.241288065973427,
      "grad_norm": 2.609375,
      "learning_rate": 1.4050302317551553e-05,
      "loss": 0.8267,
      "step": 639500
    },
    {
      "epoch": 2.2413231134803224,
      "grad_norm": 2.921875,
      "learning_rate": 1.4049653288887851e-05,
      "loss": 0.7289,
      "step": 639510
    },
    {
      "epoch": 2.241358160987218,
      "grad_norm": 2.65625,
      "learning_rate": 1.4049004260224149e-05,
      "loss": 0.8127,
      "step": 639520
    },
    {
      "epoch": 2.241393208494114,
      "grad_norm": 2.5,
      "learning_rate": 1.4048355231560447e-05,
      "loss": 0.8354,
      "step": 639530
    },
    {
      "epoch": 2.2414282560010093,
      "grad_norm": 3.234375,
      "learning_rate": 1.4047706202896747e-05,
      "loss": 0.7741,
      "step": 639540
    },
    {
      "epoch": 2.241463303507905,
      "grad_norm": 3.234375,
      "learning_rate": 1.4047057174233045e-05,
      "loss": 0.8985,
      "step": 639550
    },
    {
      "epoch": 2.2414983510148003,
      "grad_norm": 2.828125,
      "learning_rate": 1.4046408145569343e-05,
      "loss": 0.8295,
      "step": 639560
    },
    {
      "epoch": 2.241533398521696,
      "grad_norm": 2.484375,
      "learning_rate": 1.404575911690564e-05,
      "loss": 0.8443,
      "step": 639570
    },
    {
      "epoch": 2.241568446028592,
      "grad_norm": 3.140625,
      "learning_rate": 1.4045110088241939e-05,
      "loss": 0.8011,
      "step": 639580
    },
    {
      "epoch": 2.241603493535487,
      "grad_norm": 2.90625,
      "learning_rate": 1.4044461059578237e-05,
      "loss": 0.8642,
      "step": 639590
    },
    {
      "epoch": 2.241638541042383,
      "grad_norm": 2.5,
      "learning_rate": 1.4043812030914533e-05,
      "loss": 0.7565,
      "step": 639600
    },
    {
      "epoch": 2.2416735885492787,
      "grad_norm": 2.984375,
      "learning_rate": 1.4043163002250831e-05,
      "loss": 0.7823,
      "step": 639610
    },
    {
      "epoch": 2.241708636056174,
      "grad_norm": 2.59375,
      "learning_rate": 1.4042513973587129e-05,
      "loss": 0.7045,
      "step": 639620
    },
    {
      "epoch": 2.2417436835630697,
      "grad_norm": 2.515625,
      "learning_rate": 1.4041864944923427e-05,
      "loss": 0.7475,
      "step": 639630
    },
    {
      "epoch": 2.2417787310699655,
      "grad_norm": 2.578125,
      "learning_rate": 1.4041215916259725e-05,
      "loss": 0.8868,
      "step": 639640
    },
    {
      "epoch": 2.241813778576861,
      "grad_norm": 3.046875,
      "learning_rate": 1.4040566887596025e-05,
      "loss": 0.7541,
      "step": 639650
    },
    {
      "epoch": 2.2418488260837566,
      "grad_norm": 3.625,
      "learning_rate": 1.4039917858932323e-05,
      "loss": 0.7977,
      "step": 639660
    },
    {
      "epoch": 2.2418838735906523,
      "grad_norm": 3.140625,
      "learning_rate": 1.403926883026862e-05,
      "loss": 0.7174,
      "step": 639670
    },
    {
      "epoch": 2.2419189210975476,
      "grad_norm": 2.765625,
      "learning_rate": 1.4038619801604919e-05,
      "loss": 0.7345,
      "step": 639680
    },
    {
      "epoch": 2.2419539686044434,
      "grad_norm": 2.828125,
      "learning_rate": 1.4037970772941217e-05,
      "loss": 0.8279,
      "step": 639690
    },
    {
      "epoch": 2.2419890161113387,
      "grad_norm": 2.65625,
      "learning_rate": 1.4037321744277515e-05,
      "loss": 0.7486,
      "step": 639700
    },
    {
      "epoch": 2.2420240636182345,
      "grad_norm": 2.8125,
      "learning_rate": 1.4036672715613813e-05,
      "loss": 0.8952,
      "step": 639710
    },
    {
      "epoch": 2.2420591111251302,
      "grad_norm": 2.75,
      "learning_rate": 1.4036023686950112e-05,
      "loss": 0.838,
      "step": 639720
    },
    {
      "epoch": 2.2420941586320255,
      "grad_norm": 2.796875,
      "learning_rate": 1.403537465828641e-05,
      "loss": 0.7903,
      "step": 639730
    },
    {
      "epoch": 2.2421292061389213,
      "grad_norm": 3.28125,
      "learning_rate": 1.4034725629622708e-05,
      "loss": 0.8375,
      "step": 639740
    },
    {
      "epoch": 2.242164253645817,
      "grad_norm": 3.046875,
      "learning_rate": 1.4034076600959006e-05,
      "loss": 0.822,
      "step": 639750
    },
    {
      "epoch": 2.2421993011527124,
      "grad_norm": 3.140625,
      "learning_rate": 1.4033427572295304e-05,
      "loss": 0.8225,
      "step": 639760
    },
    {
      "epoch": 2.242234348659608,
      "grad_norm": 2.625,
      "learning_rate": 1.4032778543631602e-05,
      "loss": 0.8056,
      "step": 639770
    },
    {
      "epoch": 2.242269396166504,
      "grad_norm": 2.375,
      "learning_rate": 1.40321295149679e-05,
      "loss": 0.7928,
      "step": 639780
    },
    {
      "epoch": 2.242304443673399,
      "grad_norm": 2.65625,
      "learning_rate": 1.4031480486304197e-05,
      "loss": 0.8335,
      "step": 639790
    },
    {
      "epoch": 2.242339491180295,
      "grad_norm": 3.3125,
      "learning_rate": 1.4030831457640495e-05,
      "loss": 0.849,
      "step": 639800
    },
    {
      "epoch": 2.2423745386871907,
      "grad_norm": 3.1875,
      "learning_rate": 1.4030182428976793e-05,
      "loss": 0.8346,
      "step": 639810
    },
    {
      "epoch": 2.242409586194086,
      "grad_norm": 2.109375,
      "learning_rate": 1.402953340031309e-05,
      "loss": 0.8628,
      "step": 639820
    },
    {
      "epoch": 2.242444633700982,
      "grad_norm": 3.078125,
      "learning_rate": 1.402888437164939e-05,
      "loss": 0.8746,
      "step": 639830
    },
    {
      "epoch": 2.242479681207877,
      "grad_norm": 2.734375,
      "learning_rate": 1.4028235342985688e-05,
      "loss": 0.7497,
      "step": 639840
    },
    {
      "epoch": 2.242514728714773,
      "grad_norm": 2.640625,
      "learning_rate": 1.4027586314321986e-05,
      "loss": 0.7333,
      "step": 639850
    },
    {
      "epoch": 2.2425497762216686,
      "grad_norm": 3.078125,
      "learning_rate": 1.4026937285658284e-05,
      "loss": 0.7982,
      "step": 639860
    },
    {
      "epoch": 2.242584823728564,
      "grad_norm": 2.84375,
      "learning_rate": 1.4026288256994582e-05,
      "loss": 0.7484,
      "step": 639870
    },
    {
      "epoch": 2.2426198712354597,
      "grad_norm": 2.796875,
      "learning_rate": 1.402563922833088e-05,
      "loss": 0.7183,
      "step": 639880
    },
    {
      "epoch": 2.2426549187423555,
      "grad_norm": 2.6875,
      "learning_rate": 1.4024990199667178e-05,
      "loss": 0.8865,
      "step": 639890
    },
    {
      "epoch": 2.2426899662492508,
      "grad_norm": 2.3125,
      "learning_rate": 1.4024341171003478e-05,
      "loss": 0.8177,
      "step": 639900
    },
    {
      "epoch": 2.2427250137561465,
      "grad_norm": 2.796875,
      "learning_rate": 1.4023692142339776e-05,
      "loss": 0.83,
      "step": 639910
    },
    {
      "epoch": 2.2427600612630423,
      "grad_norm": 2.8125,
      "learning_rate": 1.4023043113676074e-05,
      "loss": 0.7946,
      "step": 639920
    },
    {
      "epoch": 2.2427951087699376,
      "grad_norm": 2.59375,
      "learning_rate": 1.4022394085012372e-05,
      "loss": 0.8113,
      "step": 639930
    },
    {
      "epoch": 2.2428301562768334,
      "grad_norm": 3.109375,
      "learning_rate": 1.402174505634867e-05,
      "loss": 0.8526,
      "step": 639940
    },
    {
      "epoch": 2.2428652037837287,
      "grad_norm": 3.234375,
      "learning_rate": 1.4021096027684968e-05,
      "loss": 0.7779,
      "step": 639950
    },
    {
      "epoch": 2.2429002512906244,
      "grad_norm": 4.28125,
      "learning_rate": 1.4020446999021266e-05,
      "loss": 0.786,
      "step": 639960
    },
    {
      "epoch": 2.24293529879752,
      "grad_norm": 3.0625,
      "learning_rate": 1.4019797970357566e-05,
      "loss": 0.8045,
      "step": 639970
    },
    {
      "epoch": 2.2429703463044155,
      "grad_norm": 2.9375,
      "learning_rate": 1.401914894169386e-05,
      "loss": 0.8385,
      "step": 639980
    },
    {
      "epoch": 2.2430053938113113,
      "grad_norm": 2.78125,
      "learning_rate": 1.4018499913030158e-05,
      "loss": 0.806,
      "step": 639990
    },
    {
      "epoch": 2.243040441318207,
      "grad_norm": 2.765625,
      "learning_rate": 1.4017850884366456e-05,
      "loss": 0.8622,
      "step": 640000
    },
    {
      "epoch": 2.243040441318207,
      "eval_loss": 0.7632808685302734,
      "eval_runtime": 555.8745,
      "eval_samples_per_second": 684.392,
      "eval_steps_per_second": 57.033,
      "step": 640000
    },
    {
      "epoch": 2.2430754888251023,
      "grad_norm": 2.96875,
      "learning_rate": 1.4017201855702754e-05,
      "loss": 0.7568,
      "step": 640010
    },
    {
      "epoch": 2.243110536331998,
      "grad_norm": 3.234375,
      "learning_rate": 1.4016552827039054e-05,
      "loss": 0.8842,
      "step": 640020
    },
    {
      "epoch": 2.243145583838894,
      "grad_norm": 3.140625,
      "learning_rate": 1.4015903798375352e-05,
      "loss": 0.7939,
      "step": 640030
    },
    {
      "epoch": 2.243180631345789,
      "grad_norm": 3.140625,
      "learning_rate": 1.401525476971165e-05,
      "loss": 0.8515,
      "step": 640040
    },
    {
      "epoch": 2.243215678852685,
      "grad_norm": 2.96875,
      "learning_rate": 1.4014605741047948e-05,
      "loss": 0.8079,
      "step": 640050
    },
    {
      "epoch": 2.2432507263595802,
      "grad_norm": 2.671875,
      "learning_rate": 1.4013956712384246e-05,
      "loss": 0.8081,
      "step": 640060
    },
    {
      "epoch": 2.243285773866476,
      "grad_norm": 3.171875,
      "learning_rate": 1.4013307683720544e-05,
      "loss": 0.8481,
      "step": 640070
    },
    {
      "epoch": 2.2433208213733717,
      "grad_norm": 3.28125,
      "learning_rate": 1.4012658655056842e-05,
      "loss": 0.8333,
      "step": 640080
    },
    {
      "epoch": 2.243355868880267,
      "grad_norm": 2.921875,
      "learning_rate": 1.4012009626393142e-05,
      "loss": 0.8476,
      "step": 640090
    },
    {
      "epoch": 2.243390916387163,
      "grad_norm": 3.375,
      "learning_rate": 1.401136059772944e-05,
      "loss": 0.8008,
      "step": 640100
    },
    {
      "epoch": 2.2434259638940586,
      "grad_norm": 3.265625,
      "learning_rate": 1.4010711569065738e-05,
      "loss": 0.7893,
      "step": 640110
    },
    {
      "epoch": 2.243461011400954,
      "grad_norm": 2.65625,
      "learning_rate": 1.4010062540402036e-05,
      "loss": 0.7282,
      "step": 640120
    },
    {
      "epoch": 2.2434960589078496,
      "grad_norm": 3.15625,
      "learning_rate": 1.4009413511738334e-05,
      "loss": 0.821,
      "step": 640130
    },
    {
      "epoch": 2.2435311064147454,
      "grad_norm": 2.84375,
      "learning_rate": 1.4008764483074632e-05,
      "loss": 0.8316,
      "step": 640140
    },
    {
      "epoch": 2.2435661539216407,
      "grad_norm": 3.09375,
      "learning_rate": 1.400811545441093e-05,
      "loss": 0.7758,
      "step": 640150
    },
    {
      "epoch": 2.2436012014285365,
      "grad_norm": 3.109375,
      "learning_rate": 1.4007466425747226e-05,
      "loss": 0.8499,
      "step": 640160
    },
    {
      "epoch": 2.243636248935432,
      "grad_norm": 2.84375,
      "learning_rate": 1.4006817397083524e-05,
      "loss": 0.8539,
      "step": 640170
    },
    {
      "epoch": 2.2436712964423275,
      "grad_norm": 3.28125,
      "learning_rate": 1.4006168368419822e-05,
      "loss": 0.8591,
      "step": 640180
    },
    {
      "epoch": 2.2437063439492233,
      "grad_norm": 3.140625,
      "learning_rate": 1.400551933975612e-05,
      "loss": 0.8226,
      "step": 640190
    },
    {
      "epoch": 2.2437413914561186,
      "grad_norm": 2.875,
      "learning_rate": 1.400487031109242e-05,
      "loss": 0.8301,
      "step": 640200
    },
    {
      "epoch": 2.2437764389630144,
      "grad_norm": 2.65625,
      "learning_rate": 1.4004221282428718e-05,
      "loss": 0.7973,
      "step": 640210
    },
    {
      "epoch": 2.24381148646991,
      "grad_norm": 3.0625,
      "learning_rate": 1.4003572253765016e-05,
      "loss": 0.7583,
      "step": 640220
    },
    {
      "epoch": 2.2438465339768054,
      "grad_norm": 2.96875,
      "learning_rate": 1.4002923225101314e-05,
      "loss": 0.8449,
      "step": 640230
    },
    {
      "epoch": 2.243881581483701,
      "grad_norm": 3.3125,
      "learning_rate": 1.4002274196437612e-05,
      "loss": 0.8737,
      "step": 640240
    },
    {
      "epoch": 2.243916628990597,
      "grad_norm": 3.15625,
      "learning_rate": 1.400162516777391e-05,
      "loss": 0.758,
      "step": 640250
    },
    {
      "epoch": 2.2439516764974923,
      "grad_norm": 2.5625,
      "learning_rate": 1.4000976139110208e-05,
      "loss": 0.8233,
      "step": 640260
    },
    {
      "epoch": 2.243986724004388,
      "grad_norm": 2.875,
      "learning_rate": 1.4000327110446507e-05,
      "loss": 0.8412,
      "step": 640270
    },
    {
      "epoch": 2.2440217715112833,
      "grad_norm": 2.6875,
      "learning_rate": 1.3999678081782805e-05,
      "loss": 0.7804,
      "step": 640280
    },
    {
      "epoch": 2.244056819018179,
      "grad_norm": 3.140625,
      "learning_rate": 1.3999029053119103e-05,
      "loss": 0.7871,
      "step": 640290
    },
    {
      "epoch": 2.244091866525075,
      "grad_norm": 2.453125,
      "learning_rate": 1.3998380024455401e-05,
      "loss": 0.7523,
      "step": 640300
    },
    {
      "epoch": 2.24412691403197,
      "grad_norm": 3.0,
      "learning_rate": 1.39977309957917e-05,
      "loss": 0.8492,
      "step": 640310
    },
    {
      "epoch": 2.244161961538866,
      "grad_norm": 2.828125,
      "learning_rate": 1.3997081967127997e-05,
      "loss": 0.8296,
      "step": 640320
    },
    {
      "epoch": 2.2441970090457617,
      "grad_norm": 3.046875,
      "learning_rate": 1.3996432938464295e-05,
      "loss": 0.8152,
      "step": 640330
    },
    {
      "epoch": 2.244232056552657,
      "grad_norm": 3.359375,
      "learning_rate": 1.3995783909800595e-05,
      "loss": 0.8274,
      "step": 640340
    },
    {
      "epoch": 2.2442671040595528,
      "grad_norm": 3.40625,
      "learning_rate": 1.399513488113689e-05,
      "loss": 0.8605,
      "step": 640350
    },
    {
      "epoch": 2.2443021515664485,
      "grad_norm": 2.484375,
      "learning_rate": 1.3994485852473188e-05,
      "loss": 0.7358,
      "step": 640360
    },
    {
      "epoch": 2.244337199073344,
      "grad_norm": 2.34375,
      "learning_rate": 1.3993836823809486e-05,
      "loss": 0.8506,
      "step": 640370
    },
    {
      "epoch": 2.2443722465802396,
      "grad_norm": 2.46875,
      "learning_rate": 1.3993187795145785e-05,
      "loss": 0.8116,
      "step": 640380
    },
    {
      "epoch": 2.244407294087135,
      "grad_norm": 3.296875,
      "learning_rate": 1.3992538766482083e-05,
      "loss": 0.8232,
      "step": 640390
    },
    {
      "epoch": 2.2444423415940307,
      "grad_norm": 2.453125,
      "learning_rate": 1.3991889737818381e-05,
      "loss": 0.8114,
      "step": 640400
    },
    {
      "epoch": 2.2444773891009264,
      "grad_norm": 3.0625,
      "learning_rate": 1.399124070915468e-05,
      "loss": 0.8212,
      "step": 640410
    },
    {
      "epoch": 2.2445124366078217,
      "grad_norm": 3.0625,
      "learning_rate": 1.3990591680490977e-05,
      "loss": 0.8433,
      "step": 640420
    },
    {
      "epoch": 2.2445474841147175,
      "grad_norm": 4.59375,
      "learning_rate": 1.3989942651827275e-05,
      "loss": 0.7537,
      "step": 640430
    },
    {
      "epoch": 2.2445825316216133,
      "grad_norm": 2.921875,
      "learning_rate": 1.3989293623163573e-05,
      "loss": 0.8135,
      "step": 640440
    },
    {
      "epoch": 2.2446175791285086,
      "grad_norm": 2.90625,
      "learning_rate": 1.3988644594499873e-05,
      "loss": 0.8378,
      "step": 640450
    },
    {
      "epoch": 2.2446526266354043,
      "grad_norm": 3.125,
      "learning_rate": 1.3987995565836171e-05,
      "loss": 0.8339,
      "step": 640460
    },
    {
      "epoch": 2.2446876741423,
      "grad_norm": 3.046875,
      "learning_rate": 1.3987346537172469e-05,
      "loss": 0.8614,
      "step": 640470
    },
    {
      "epoch": 2.2447227216491954,
      "grad_norm": 2.65625,
      "learning_rate": 1.3986697508508767e-05,
      "loss": 0.7909,
      "step": 640480
    },
    {
      "epoch": 2.244757769156091,
      "grad_norm": 3.125,
      "learning_rate": 1.3986048479845065e-05,
      "loss": 0.793,
      "step": 640490
    },
    {
      "epoch": 2.2447928166629865,
      "grad_norm": 3.390625,
      "learning_rate": 1.3985399451181363e-05,
      "loss": 0.8174,
      "step": 640500
    },
    {
      "epoch": 2.2448278641698822,
      "grad_norm": 2.78125,
      "learning_rate": 1.3984750422517661e-05,
      "loss": 0.7755,
      "step": 640510
    },
    {
      "epoch": 2.244862911676778,
      "grad_norm": 3.890625,
      "learning_rate": 1.398410139385396e-05,
      "loss": 0.783,
      "step": 640520
    },
    {
      "epoch": 2.2448979591836733,
      "grad_norm": 3.375,
      "learning_rate": 1.3983452365190259e-05,
      "loss": 0.8808,
      "step": 640530
    },
    {
      "epoch": 2.244933006690569,
      "grad_norm": 3.0625,
      "learning_rate": 1.3982803336526553e-05,
      "loss": 0.709,
      "step": 640540
    },
    {
      "epoch": 2.244968054197465,
      "grad_norm": 3.421875,
      "learning_rate": 1.3982154307862851e-05,
      "loss": 0.7947,
      "step": 640550
    },
    {
      "epoch": 2.24500310170436,
      "grad_norm": 2.84375,
      "learning_rate": 1.398150527919915e-05,
      "loss": 0.7566,
      "step": 640560
    },
    {
      "epoch": 2.245038149211256,
      "grad_norm": 3.078125,
      "learning_rate": 1.3980856250535449e-05,
      "loss": 0.7529,
      "step": 640570
    },
    {
      "epoch": 2.2450731967181516,
      "grad_norm": 2.828125,
      "learning_rate": 1.3980207221871747e-05,
      "loss": 0.7889,
      "step": 640580
    },
    {
      "epoch": 2.245108244225047,
      "grad_norm": 3.296875,
      "learning_rate": 1.3979558193208045e-05,
      "loss": 0.8475,
      "step": 640590
    },
    {
      "epoch": 2.2451432917319427,
      "grad_norm": 3.21875,
      "learning_rate": 1.3978909164544343e-05,
      "loss": 0.7619,
      "step": 640600
    },
    {
      "epoch": 2.245178339238838,
      "grad_norm": 3.046875,
      "learning_rate": 1.3978260135880641e-05,
      "loss": 0.8036,
      "step": 640610
    },
    {
      "epoch": 2.245213386745734,
      "grad_norm": 2.609375,
      "learning_rate": 1.3977611107216939e-05,
      "loss": 0.8242,
      "step": 640620
    },
    {
      "epoch": 2.2452484342526295,
      "grad_norm": 2.765625,
      "learning_rate": 1.3976962078553237e-05,
      "loss": 0.9159,
      "step": 640630
    },
    {
      "epoch": 2.245283481759525,
      "grad_norm": 3.078125,
      "learning_rate": 1.3976313049889537e-05,
      "loss": 0.858,
      "step": 640640
    },
    {
      "epoch": 2.2453185292664206,
      "grad_norm": 2.90625,
      "learning_rate": 1.3975664021225835e-05,
      "loss": 0.8059,
      "step": 640650
    },
    {
      "epoch": 2.2453535767733164,
      "grad_norm": 3.03125,
      "learning_rate": 1.3975014992562133e-05,
      "loss": 0.8291,
      "step": 640660
    },
    {
      "epoch": 2.2453886242802117,
      "grad_norm": 3.203125,
      "learning_rate": 1.397436596389843e-05,
      "loss": 0.8176,
      "step": 640670
    },
    {
      "epoch": 2.2454236717871074,
      "grad_norm": 3.125,
      "learning_rate": 1.3973716935234729e-05,
      "loss": 0.8364,
      "step": 640680
    },
    {
      "epoch": 2.245458719294003,
      "grad_norm": 3.15625,
      "learning_rate": 1.3973067906571027e-05,
      "loss": 0.9047,
      "step": 640690
    },
    {
      "epoch": 2.2454937668008985,
      "grad_norm": 3.09375,
      "learning_rate": 1.3972418877907326e-05,
      "loss": 0.8591,
      "step": 640700
    },
    {
      "epoch": 2.2455288143077943,
      "grad_norm": 2.359375,
      "learning_rate": 1.3971769849243624e-05,
      "loss": 0.7189,
      "step": 640710
    },
    {
      "epoch": 2.2455638618146896,
      "grad_norm": 3.5,
      "learning_rate": 1.3971120820579922e-05,
      "loss": 0.7822,
      "step": 640720
    },
    {
      "epoch": 2.2455989093215853,
      "grad_norm": 3.125,
      "learning_rate": 1.3970471791916217e-05,
      "loss": 0.8012,
      "step": 640730
    },
    {
      "epoch": 2.245633956828481,
      "grad_norm": 3.296875,
      "learning_rate": 1.3969822763252515e-05,
      "loss": 0.8572,
      "step": 640740
    },
    {
      "epoch": 2.2456690043353764,
      "grad_norm": 2.40625,
      "learning_rate": 1.3969173734588815e-05,
      "loss": 0.8056,
      "step": 640750
    },
    {
      "epoch": 2.245704051842272,
      "grad_norm": 2.953125,
      "learning_rate": 1.3968524705925113e-05,
      "loss": 0.8039,
      "step": 640760
    },
    {
      "epoch": 2.245739099349168,
      "grad_norm": 2.953125,
      "learning_rate": 1.396787567726141e-05,
      "loss": 0.7826,
      "step": 640770
    },
    {
      "epoch": 2.2457741468560632,
      "grad_norm": 2.59375,
      "learning_rate": 1.3967226648597709e-05,
      "loss": 0.7735,
      "step": 640780
    },
    {
      "epoch": 2.245809194362959,
      "grad_norm": 3.03125,
      "learning_rate": 1.3966577619934007e-05,
      "loss": 0.7214,
      "step": 640790
    },
    {
      "epoch": 2.2458442418698548,
      "grad_norm": 2.828125,
      "learning_rate": 1.3965928591270305e-05,
      "loss": 0.7786,
      "step": 640800
    },
    {
      "epoch": 2.24587928937675,
      "grad_norm": 3.515625,
      "learning_rate": 1.3965279562606603e-05,
      "loss": 0.884,
      "step": 640810
    },
    {
      "epoch": 2.245914336883646,
      "grad_norm": 3.21875,
      "learning_rate": 1.3964630533942902e-05,
      "loss": 0.7552,
      "step": 640820
    },
    {
      "epoch": 2.245949384390541,
      "grad_norm": 2.984375,
      "learning_rate": 1.39639815052792e-05,
      "loss": 0.8541,
      "step": 640830
    },
    {
      "epoch": 2.245984431897437,
      "grad_norm": 2.65625,
      "learning_rate": 1.3963332476615498e-05,
      "loss": 0.8298,
      "step": 640840
    },
    {
      "epoch": 2.2460194794043327,
      "grad_norm": 2.890625,
      "learning_rate": 1.3962683447951796e-05,
      "loss": 0.8086,
      "step": 640850
    },
    {
      "epoch": 2.246054526911228,
      "grad_norm": 2.90625,
      "learning_rate": 1.3962034419288094e-05,
      "loss": 0.8033,
      "step": 640860
    },
    {
      "epoch": 2.2460895744181237,
      "grad_norm": 3.015625,
      "learning_rate": 1.3961385390624392e-05,
      "loss": 0.9022,
      "step": 640870
    },
    {
      "epoch": 2.2461246219250195,
      "grad_norm": 2.921875,
      "learning_rate": 1.396073636196069e-05,
      "loss": 0.7979,
      "step": 640880
    },
    {
      "epoch": 2.246159669431915,
      "grad_norm": 2.90625,
      "learning_rate": 1.396008733329699e-05,
      "loss": 0.8111,
      "step": 640890
    },
    {
      "epoch": 2.2461947169388106,
      "grad_norm": 3.0625,
      "learning_rate": 1.3959438304633288e-05,
      "loss": 0.7579,
      "step": 640900
    },
    {
      "epoch": 2.2462297644457063,
      "grad_norm": 2.84375,
      "learning_rate": 1.3958789275969586e-05,
      "loss": 0.7269,
      "step": 640910
    },
    {
      "epoch": 2.2462648119526016,
      "grad_norm": 2.671875,
      "learning_rate": 1.395814024730588e-05,
      "loss": 0.8327,
      "step": 640920
    },
    {
      "epoch": 2.2462998594594974,
      "grad_norm": 3.546875,
      "learning_rate": 1.395749121864218e-05,
      "loss": 0.8226,
      "step": 640930
    },
    {
      "epoch": 2.246334906966393,
      "grad_norm": 3.046875,
      "learning_rate": 1.3956842189978478e-05,
      "loss": 0.8193,
      "step": 640940
    },
    {
      "epoch": 2.2463699544732885,
      "grad_norm": 2.96875,
      "learning_rate": 1.3956193161314776e-05,
      "loss": 0.7513,
      "step": 640950
    },
    {
      "epoch": 2.2464050019801842,
      "grad_norm": 3.109375,
      "learning_rate": 1.3955544132651074e-05,
      "loss": 0.8217,
      "step": 640960
    },
    {
      "epoch": 2.2464400494870795,
      "grad_norm": 2.8125,
      "learning_rate": 1.3954895103987372e-05,
      "loss": 0.7409,
      "step": 640970
    },
    {
      "epoch": 2.2464750969939753,
      "grad_norm": 3.0625,
      "learning_rate": 1.395424607532367e-05,
      "loss": 0.7993,
      "step": 640980
    },
    {
      "epoch": 2.246510144500871,
      "grad_norm": 3.4375,
      "learning_rate": 1.3953597046659968e-05,
      "loss": 0.8109,
      "step": 640990
    },
    {
      "epoch": 2.2465451920077664,
      "grad_norm": 2.890625,
      "learning_rate": 1.3952948017996268e-05,
      "loss": 0.977,
      "step": 641000
    },
    {
      "epoch": 2.246580239514662,
      "grad_norm": 2.59375,
      "learning_rate": 1.3952298989332566e-05,
      "loss": 0.813,
      "step": 641010
    },
    {
      "epoch": 2.246615287021558,
      "grad_norm": 2.703125,
      "learning_rate": 1.3951649960668864e-05,
      "loss": 0.7821,
      "step": 641020
    },
    {
      "epoch": 2.246650334528453,
      "grad_norm": 2.421875,
      "learning_rate": 1.3951000932005162e-05,
      "loss": 0.7351,
      "step": 641030
    },
    {
      "epoch": 2.246685382035349,
      "grad_norm": 2.75,
      "learning_rate": 1.395035190334146e-05,
      "loss": 0.7552,
      "step": 641040
    },
    {
      "epoch": 2.2467204295422447,
      "grad_norm": 3.140625,
      "learning_rate": 1.3949702874677758e-05,
      "loss": 0.8622,
      "step": 641050
    },
    {
      "epoch": 2.24675547704914,
      "grad_norm": 2.890625,
      "learning_rate": 1.3949053846014056e-05,
      "loss": 0.7961,
      "step": 641060
    },
    {
      "epoch": 2.246790524556036,
      "grad_norm": 3.0,
      "learning_rate": 1.3948404817350356e-05,
      "loss": 0.8136,
      "step": 641070
    },
    {
      "epoch": 2.246825572062931,
      "grad_norm": 3.25,
      "learning_rate": 1.3947755788686654e-05,
      "loss": 0.8143,
      "step": 641080
    },
    {
      "epoch": 2.246860619569827,
      "grad_norm": 2.859375,
      "learning_rate": 1.3947106760022952e-05,
      "loss": 0.8359,
      "step": 641090
    },
    {
      "epoch": 2.2468956670767226,
      "grad_norm": 3.125,
      "learning_rate": 1.3946457731359246e-05,
      "loss": 0.8304,
      "step": 641100
    },
    {
      "epoch": 2.246930714583618,
      "grad_norm": 2.375,
      "learning_rate": 1.3945808702695544e-05,
      "loss": 0.8339,
      "step": 641110
    },
    {
      "epoch": 2.2469657620905137,
      "grad_norm": 2.828125,
      "learning_rate": 1.3945159674031844e-05,
      "loss": 0.8311,
      "step": 641120
    },
    {
      "epoch": 2.2470008095974094,
      "grad_norm": 3.046875,
      "learning_rate": 1.3944510645368142e-05,
      "loss": 0.7338,
      "step": 641130
    },
    {
      "epoch": 2.2470358571043048,
      "grad_norm": 2.578125,
      "learning_rate": 1.394386161670444e-05,
      "loss": 0.7776,
      "step": 641140
    },
    {
      "epoch": 2.2470709046112005,
      "grad_norm": 3.09375,
      "learning_rate": 1.3943212588040738e-05,
      "loss": 0.8109,
      "step": 641150
    },
    {
      "epoch": 2.2471059521180963,
      "grad_norm": 2.90625,
      "learning_rate": 1.3942563559377036e-05,
      "loss": 0.8027,
      "step": 641160
    },
    {
      "epoch": 2.2471409996249916,
      "grad_norm": 2.71875,
      "learning_rate": 1.3941914530713334e-05,
      "loss": 0.8064,
      "step": 641170
    },
    {
      "epoch": 2.2471760471318873,
      "grad_norm": 2.78125,
      "learning_rate": 1.3941265502049632e-05,
      "loss": 0.7416,
      "step": 641180
    },
    {
      "epoch": 2.247211094638783,
      "grad_norm": 2.796875,
      "learning_rate": 1.3940616473385932e-05,
      "loss": 0.9093,
      "step": 641190
    },
    {
      "epoch": 2.2472461421456784,
      "grad_norm": 3.34375,
      "learning_rate": 1.393996744472223e-05,
      "loss": 0.8094,
      "step": 641200
    },
    {
      "epoch": 2.247281189652574,
      "grad_norm": 2.6875,
      "learning_rate": 1.3939318416058528e-05,
      "loss": 0.8444,
      "step": 641210
    },
    {
      "epoch": 2.2473162371594695,
      "grad_norm": 2.359375,
      "learning_rate": 1.3938669387394826e-05,
      "loss": 0.8309,
      "step": 641220
    },
    {
      "epoch": 2.2473512846663652,
      "grad_norm": 2.59375,
      "learning_rate": 1.3938020358731124e-05,
      "loss": 0.8492,
      "step": 641230
    },
    {
      "epoch": 2.247386332173261,
      "grad_norm": 3.0625,
      "learning_rate": 1.3937371330067422e-05,
      "loss": 0.7877,
      "step": 641240
    },
    {
      "epoch": 2.2474213796801563,
      "grad_norm": 2.859375,
      "learning_rate": 1.3936722301403722e-05,
      "loss": 0.874,
      "step": 641250
    },
    {
      "epoch": 2.247456427187052,
      "grad_norm": 3.265625,
      "learning_rate": 1.393607327274002e-05,
      "loss": 0.7998,
      "step": 641260
    },
    {
      "epoch": 2.247491474693948,
      "grad_norm": 2.8125,
      "learning_rate": 1.3935424244076318e-05,
      "loss": 0.8198,
      "step": 641270
    },
    {
      "epoch": 2.247526522200843,
      "grad_norm": 2.59375,
      "learning_rate": 1.3934775215412616e-05,
      "loss": 0.7859,
      "step": 641280
    },
    {
      "epoch": 2.247561569707739,
      "grad_norm": 3.15625,
      "learning_rate": 1.393412618674891e-05,
      "loss": 0.8736,
      "step": 641290
    },
    {
      "epoch": 2.2475966172146347,
      "grad_norm": 3.015625,
      "learning_rate": 1.393347715808521e-05,
      "loss": 0.7613,
      "step": 641300
    },
    {
      "epoch": 2.24763166472153,
      "grad_norm": 2.578125,
      "learning_rate": 1.3932828129421508e-05,
      "loss": 0.8325,
      "step": 641310
    },
    {
      "epoch": 2.2476667122284257,
      "grad_norm": 2.984375,
      "learning_rate": 1.3932179100757806e-05,
      "loss": 0.8159,
      "step": 641320
    },
    {
      "epoch": 2.247701759735321,
      "grad_norm": 2.703125,
      "learning_rate": 1.3931530072094104e-05,
      "loss": 0.7865,
      "step": 641330
    },
    {
      "epoch": 2.247736807242217,
      "grad_norm": 3.375,
      "learning_rate": 1.3930881043430402e-05,
      "loss": 0.805,
      "step": 641340
    },
    {
      "epoch": 2.2477718547491126,
      "grad_norm": 3.078125,
      "learning_rate": 1.39302320147667e-05,
      "loss": 0.7575,
      "step": 641350
    },
    {
      "epoch": 2.247806902256008,
      "grad_norm": 3.1875,
      "learning_rate": 1.3929582986102998e-05,
      "loss": 0.7726,
      "step": 641360
    },
    {
      "epoch": 2.2478419497629036,
      "grad_norm": 3.078125,
      "learning_rate": 1.3928933957439298e-05,
      "loss": 0.8248,
      "step": 641370
    },
    {
      "epoch": 2.2478769972697994,
      "grad_norm": 2.546875,
      "learning_rate": 1.3928284928775596e-05,
      "loss": 0.7237,
      "step": 641380
    },
    {
      "epoch": 2.2479120447766947,
      "grad_norm": 2.53125,
      "learning_rate": 1.3927635900111894e-05,
      "loss": 0.724,
      "step": 641390
    },
    {
      "epoch": 2.2479470922835905,
      "grad_norm": 2.9375,
      "learning_rate": 1.3926986871448192e-05,
      "loss": 0.7911,
      "step": 641400
    },
    {
      "epoch": 2.2479821397904862,
      "grad_norm": 3.15625,
      "learning_rate": 1.392633784278449e-05,
      "loss": 0.8495,
      "step": 641410
    },
    {
      "epoch": 2.2480171872973815,
      "grad_norm": 2.671875,
      "learning_rate": 1.3925688814120788e-05,
      "loss": 0.8267,
      "step": 641420
    },
    {
      "epoch": 2.2480522348042773,
      "grad_norm": 2.6875,
      "learning_rate": 1.3925039785457086e-05,
      "loss": 0.8257,
      "step": 641430
    },
    {
      "epoch": 2.2480872823111726,
      "grad_norm": 3.03125,
      "learning_rate": 1.3924390756793385e-05,
      "loss": 0.7699,
      "step": 641440
    },
    {
      "epoch": 2.2481223298180684,
      "grad_norm": 2.84375,
      "learning_rate": 1.3923741728129683e-05,
      "loss": 0.8182,
      "step": 641450
    },
    {
      "epoch": 2.248157377324964,
      "grad_norm": 2.921875,
      "learning_rate": 1.3923092699465981e-05,
      "loss": 0.7198,
      "step": 641460
    },
    {
      "epoch": 2.2481924248318594,
      "grad_norm": 2.859375,
      "learning_rate": 1.392244367080228e-05,
      "loss": 0.7643,
      "step": 641470
    },
    {
      "epoch": 2.248227472338755,
      "grad_norm": 3.234375,
      "learning_rate": 1.3921794642138576e-05,
      "loss": 0.8114,
      "step": 641480
    },
    {
      "epoch": 2.248262519845651,
      "grad_norm": 3.015625,
      "learning_rate": 1.3921145613474874e-05,
      "loss": 0.8681,
      "step": 641490
    },
    {
      "epoch": 2.2482975673525463,
      "grad_norm": 3.109375,
      "learning_rate": 1.3920496584811172e-05,
      "loss": 0.7931,
      "step": 641500
    },
    {
      "epoch": 2.248332614859442,
      "grad_norm": 2.875,
      "learning_rate": 1.391984755614747e-05,
      "loss": 0.7761,
      "step": 641510
    },
    {
      "epoch": 2.248367662366338,
      "grad_norm": 2.875,
      "learning_rate": 1.3919198527483768e-05,
      "loss": 0.8871,
      "step": 641520
    },
    {
      "epoch": 2.248402709873233,
      "grad_norm": 2.953125,
      "learning_rate": 1.3918549498820066e-05,
      "loss": 0.8131,
      "step": 641530
    },
    {
      "epoch": 2.248437757380129,
      "grad_norm": 3.328125,
      "learning_rate": 1.3917900470156364e-05,
      "loss": 0.8617,
      "step": 641540
    },
    {
      "epoch": 2.248472804887024,
      "grad_norm": 2.625,
      "learning_rate": 1.3917251441492663e-05,
      "loss": 0.8168,
      "step": 641550
    },
    {
      "epoch": 2.24850785239392,
      "grad_norm": 2.703125,
      "learning_rate": 1.3916602412828961e-05,
      "loss": 0.7607,
      "step": 641560
    },
    {
      "epoch": 2.2485428999008157,
      "grad_norm": 3.109375,
      "learning_rate": 1.391595338416526e-05,
      "loss": 0.8405,
      "step": 641570
    },
    {
      "epoch": 2.248577947407711,
      "grad_norm": 3.53125,
      "learning_rate": 1.3915304355501557e-05,
      "loss": 0.8592,
      "step": 641580
    },
    {
      "epoch": 2.2486129949146068,
      "grad_norm": 2.828125,
      "learning_rate": 1.3914655326837855e-05,
      "loss": 0.7907,
      "step": 641590
    },
    {
      "epoch": 2.2486480424215025,
      "grad_norm": 2.8125,
      "learning_rate": 1.3914006298174153e-05,
      "loss": 0.8731,
      "step": 641600
    },
    {
      "epoch": 2.248683089928398,
      "grad_norm": 3.203125,
      "learning_rate": 1.3913357269510451e-05,
      "loss": 0.7676,
      "step": 641610
    },
    {
      "epoch": 2.2487181374352936,
      "grad_norm": 2.8125,
      "learning_rate": 1.3912708240846751e-05,
      "loss": 0.8263,
      "step": 641620
    },
    {
      "epoch": 2.2487531849421893,
      "grad_norm": 2.875,
      "learning_rate": 1.3912059212183049e-05,
      "loss": 0.8443,
      "step": 641630
    },
    {
      "epoch": 2.2487882324490847,
      "grad_norm": 3.03125,
      "learning_rate": 1.3911410183519347e-05,
      "loss": 0.8482,
      "step": 641640
    },
    {
      "epoch": 2.2488232799559804,
      "grad_norm": 3.0625,
      "learning_rate": 1.3910761154855645e-05,
      "loss": 0.8444,
      "step": 641650
    },
    {
      "epoch": 2.2488583274628757,
      "grad_norm": 2.734375,
      "learning_rate": 1.3910112126191943e-05,
      "loss": 0.7619,
      "step": 641660
    },
    {
      "epoch": 2.2488933749697715,
      "grad_norm": 3.09375,
      "learning_rate": 1.390946309752824e-05,
      "loss": 0.8055,
      "step": 641670
    },
    {
      "epoch": 2.2489284224766672,
      "grad_norm": 3.1875,
      "learning_rate": 1.3908814068864537e-05,
      "loss": 0.8656,
      "step": 641680
    },
    {
      "epoch": 2.2489634699835626,
      "grad_norm": 2.953125,
      "learning_rate": 1.3908165040200835e-05,
      "loss": 0.7916,
      "step": 641690
    },
    {
      "epoch": 2.2489985174904583,
      "grad_norm": 2.984375,
      "learning_rate": 1.3907516011537133e-05,
      "loss": 0.754,
      "step": 641700
    },
    {
      "epoch": 2.249033564997354,
      "grad_norm": 3.03125,
      "learning_rate": 1.3906866982873431e-05,
      "loss": 0.849,
      "step": 641710
    },
    {
      "epoch": 2.2490686125042494,
      "grad_norm": 3.09375,
      "learning_rate": 1.390621795420973e-05,
      "loss": 0.8021,
      "step": 641720
    },
    {
      "epoch": 2.249103660011145,
      "grad_norm": 3.390625,
      "learning_rate": 1.3905568925546029e-05,
      "loss": 0.845,
      "step": 641730
    },
    {
      "epoch": 2.249138707518041,
      "grad_norm": 3.0,
      "learning_rate": 1.3904919896882327e-05,
      "loss": 0.7761,
      "step": 641740
    },
    {
      "epoch": 2.249173755024936,
      "grad_norm": 2.875,
      "learning_rate": 1.3904270868218625e-05,
      "loss": 0.7986,
      "step": 641750
    },
    {
      "epoch": 2.249208802531832,
      "grad_norm": 2.578125,
      "learning_rate": 1.3903621839554923e-05,
      "loss": 0.7826,
      "step": 641760
    },
    {
      "epoch": 2.2492438500387273,
      "grad_norm": 2.984375,
      "learning_rate": 1.3902972810891221e-05,
      "loss": 0.7518,
      "step": 641770
    },
    {
      "epoch": 2.249278897545623,
      "grad_norm": 3.3125,
      "learning_rate": 1.3902323782227519e-05,
      "loss": 0.8513,
      "step": 641780
    },
    {
      "epoch": 2.249313945052519,
      "grad_norm": 3.0,
      "learning_rate": 1.3901674753563817e-05,
      "loss": 0.9182,
      "step": 641790
    },
    {
      "epoch": 2.249348992559414,
      "grad_norm": 2.953125,
      "learning_rate": 1.3901025724900117e-05,
      "loss": 0.8206,
      "step": 641800
    },
    {
      "epoch": 2.24938404006631,
      "grad_norm": 3.125,
      "learning_rate": 1.3900376696236415e-05,
      "loss": 0.8374,
      "step": 641810
    },
    {
      "epoch": 2.2494190875732056,
      "grad_norm": 2.796875,
      "learning_rate": 1.3899727667572713e-05,
      "loss": 0.9025,
      "step": 641820
    },
    {
      "epoch": 2.249454135080101,
      "grad_norm": 2.90625,
      "learning_rate": 1.389907863890901e-05,
      "loss": 0.7798,
      "step": 641830
    },
    {
      "epoch": 2.2494891825869967,
      "grad_norm": 3.1875,
      "learning_rate": 1.3898429610245309e-05,
      "loss": 0.8326,
      "step": 641840
    },
    {
      "epoch": 2.2495242300938925,
      "grad_norm": 2.78125,
      "learning_rate": 1.3897780581581607e-05,
      "loss": 0.8285,
      "step": 641850
    },
    {
      "epoch": 2.2495592776007878,
      "grad_norm": 2.53125,
      "learning_rate": 1.3897131552917903e-05,
      "loss": 0.7134,
      "step": 641860
    },
    {
      "epoch": 2.2495943251076835,
      "grad_norm": 3.203125,
      "learning_rate": 1.3896482524254201e-05,
      "loss": 0.7937,
      "step": 641870
    },
    {
      "epoch": 2.249629372614579,
      "grad_norm": 3.125,
      "learning_rate": 1.3895833495590499e-05,
      "loss": 0.8359,
      "step": 641880
    },
    {
      "epoch": 2.2496644201214746,
      "grad_norm": 3.046875,
      "learning_rate": 1.3895184466926797e-05,
      "loss": 0.8435,
      "step": 641890
    },
    {
      "epoch": 2.2496994676283704,
      "grad_norm": 2.84375,
      "learning_rate": 1.3894535438263095e-05,
      "loss": 0.7361,
      "step": 641900
    },
    {
      "epoch": 2.2497345151352657,
      "grad_norm": 2.921875,
      "learning_rate": 1.3893886409599393e-05,
      "loss": 0.8258,
      "step": 641910
    },
    {
      "epoch": 2.2497695626421614,
      "grad_norm": 2.8125,
      "learning_rate": 1.3893237380935693e-05,
      "loss": 0.774,
      "step": 641920
    },
    {
      "epoch": 2.249804610149057,
      "grad_norm": 2.421875,
      "learning_rate": 1.389258835227199e-05,
      "loss": 0.7305,
      "step": 641930
    },
    {
      "epoch": 2.2498396576559525,
      "grad_norm": 2.890625,
      "learning_rate": 1.3891939323608289e-05,
      "loss": 0.8092,
      "step": 641940
    },
    {
      "epoch": 2.2498747051628483,
      "grad_norm": 2.796875,
      "learning_rate": 1.3891290294944587e-05,
      "loss": 0.8068,
      "step": 641950
    },
    {
      "epoch": 2.249909752669744,
      "grad_norm": 2.84375,
      "learning_rate": 1.3890641266280885e-05,
      "loss": 0.8372,
      "step": 641960
    },
    {
      "epoch": 2.2499448001766393,
      "grad_norm": 3.296875,
      "learning_rate": 1.3889992237617183e-05,
      "loss": 0.8194,
      "step": 641970
    },
    {
      "epoch": 2.249979847683535,
      "grad_norm": 2.75,
      "learning_rate": 1.388934320895348e-05,
      "loss": 0.7585,
      "step": 641980
    },
    {
      "epoch": 2.2500148951904304,
      "grad_norm": 2.8125,
      "learning_rate": 1.388869418028978e-05,
      "loss": 0.8222,
      "step": 641990
    },
    {
      "epoch": 2.250049942697326,
      "grad_norm": 2.828125,
      "learning_rate": 1.3888045151626078e-05,
      "loss": 0.8306,
      "step": 642000
    },
    {
      "epoch": 2.250084990204222,
      "grad_norm": 2.734375,
      "learning_rate": 1.3887396122962376e-05,
      "loss": 0.8161,
      "step": 642010
    },
    {
      "epoch": 2.2501200377111172,
      "grad_norm": 3.265625,
      "learning_rate": 1.3886747094298674e-05,
      "loss": 0.8302,
      "step": 642020
    },
    {
      "epoch": 2.250155085218013,
      "grad_norm": 3.109375,
      "learning_rate": 1.3886098065634972e-05,
      "loss": 0.7988,
      "step": 642030
    },
    {
      "epoch": 2.2501901327249088,
      "grad_norm": 2.921875,
      "learning_rate": 1.3885449036971269e-05,
      "loss": 0.8442,
      "step": 642040
    },
    {
      "epoch": 2.250225180231804,
      "grad_norm": 2.890625,
      "learning_rate": 1.3884800008307567e-05,
      "loss": 0.7975,
      "step": 642050
    },
    {
      "epoch": 2.2502602277387,
      "grad_norm": 2.765625,
      "learning_rate": 1.3884150979643865e-05,
      "loss": 0.7772,
      "step": 642060
    },
    {
      "epoch": 2.2502952752455956,
      "grad_norm": 2.5,
      "learning_rate": 1.3883501950980163e-05,
      "loss": 0.8152,
      "step": 642070
    },
    {
      "epoch": 2.250330322752491,
      "grad_norm": 2.875,
      "learning_rate": 1.388285292231646e-05,
      "loss": 0.8939,
      "step": 642080
    },
    {
      "epoch": 2.2503653702593867,
      "grad_norm": 2.796875,
      "learning_rate": 1.3882203893652759e-05,
      "loss": 0.9029,
      "step": 642090
    },
    {
      "epoch": 2.250400417766282,
      "grad_norm": 2.765625,
      "learning_rate": 1.3881554864989058e-05,
      "loss": 0.7783,
      "step": 642100
    },
    {
      "epoch": 2.2504354652731777,
      "grad_norm": 2.359375,
      "learning_rate": 1.3880905836325356e-05,
      "loss": 0.7381,
      "step": 642110
    },
    {
      "epoch": 2.2504705127800735,
      "grad_norm": 3.109375,
      "learning_rate": 1.3880256807661654e-05,
      "loss": 0.8097,
      "step": 642120
    },
    {
      "epoch": 2.250505560286969,
      "grad_norm": 3.0,
      "learning_rate": 1.3879607778997952e-05,
      "loss": 0.7845,
      "step": 642130
    },
    {
      "epoch": 2.2505406077938646,
      "grad_norm": 2.578125,
      "learning_rate": 1.387895875033425e-05,
      "loss": 0.8608,
      "step": 642140
    },
    {
      "epoch": 2.2505756553007603,
      "grad_norm": 2.46875,
      "learning_rate": 1.3878309721670548e-05,
      "loss": 0.7651,
      "step": 642150
    },
    {
      "epoch": 2.2506107028076556,
      "grad_norm": 3.234375,
      "learning_rate": 1.3877660693006846e-05,
      "loss": 0.8067,
      "step": 642160
    },
    {
      "epoch": 2.2506457503145514,
      "grad_norm": 3.109375,
      "learning_rate": 1.3877011664343146e-05,
      "loss": 0.826,
      "step": 642170
    },
    {
      "epoch": 2.250680797821447,
      "grad_norm": 3.15625,
      "learning_rate": 1.3876362635679444e-05,
      "loss": 0.7697,
      "step": 642180
    },
    {
      "epoch": 2.2507158453283425,
      "grad_norm": 3.0,
      "learning_rate": 1.3875713607015742e-05,
      "loss": 0.7967,
      "step": 642190
    },
    {
      "epoch": 2.250750892835238,
      "grad_norm": 2.640625,
      "learning_rate": 1.387506457835204e-05,
      "loss": 0.9056,
      "step": 642200
    },
    {
      "epoch": 2.2507859403421335,
      "grad_norm": 2.921875,
      "learning_rate": 1.3874415549688338e-05,
      "loss": 0.7688,
      "step": 642210
    },
    {
      "epoch": 2.2508209878490293,
      "grad_norm": 2.625,
      "learning_rate": 1.3873766521024636e-05,
      "loss": 0.7808,
      "step": 642220
    },
    {
      "epoch": 2.250856035355925,
      "grad_norm": 2.921875,
      "learning_rate": 1.3873117492360932e-05,
      "loss": 0.8323,
      "step": 642230
    },
    {
      "epoch": 2.250891082862821,
      "grad_norm": 2.953125,
      "learning_rate": 1.387246846369723e-05,
      "loss": 0.8306,
      "step": 642240
    },
    {
      "epoch": 2.250926130369716,
      "grad_norm": 2.625,
      "learning_rate": 1.3871819435033528e-05,
      "loss": 0.81,
      "step": 642250
    },
    {
      "epoch": 2.250961177876612,
      "grad_norm": 2.875,
      "learning_rate": 1.3871170406369826e-05,
      "loss": 0.8984,
      "step": 642260
    },
    {
      "epoch": 2.250996225383507,
      "grad_norm": 2.75,
      "learning_rate": 1.3870521377706124e-05,
      "loss": 0.795,
      "step": 642270
    },
    {
      "epoch": 2.251031272890403,
      "grad_norm": 3.25,
      "learning_rate": 1.3869872349042424e-05,
      "loss": 0.7274,
      "step": 642280
    },
    {
      "epoch": 2.2510663203972987,
      "grad_norm": 2.90625,
      "learning_rate": 1.3869223320378722e-05,
      "loss": 0.7643,
      "step": 642290
    },
    {
      "epoch": 2.251101367904194,
      "grad_norm": 2.859375,
      "learning_rate": 1.386857429171502e-05,
      "loss": 0.8124,
      "step": 642300
    },
    {
      "epoch": 2.2511364154110898,
      "grad_norm": 3.328125,
      "learning_rate": 1.3867925263051318e-05,
      "loss": 0.8683,
      "step": 642310
    },
    {
      "epoch": 2.251171462917985,
      "grad_norm": 3.296875,
      "learning_rate": 1.3867276234387616e-05,
      "loss": 0.7723,
      "step": 642320
    },
    {
      "epoch": 2.251206510424881,
      "grad_norm": 2.640625,
      "learning_rate": 1.3866627205723914e-05,
      "loss": 0.8296,
      "step": 642330
    },
    {
      "epoch": 2.2512415579317766,
      "grad_norm": 2.671875,
      "learning_rate": 1.3865978177060212e-05,
      "loss": 0.7161,
      "step": 642340
    },
    {
      "epoch": 2.2512766054386724,
      "grad_norm": 2.75,
      "learning_rate": 1.3865329148396512e-05,
      "loss": 0.7713,
      "step": 642350
    },
    {
      "epoch": 2.2513116529455677,
      "grad_norm": 2.796875,
      "learning_rate": 1.386468011973281e-05,
      "loss": 0.7561,
      "step": 642360
    },
    {
      "epoch": 2.2513467004524634,
      "grad_norm": 3.234375,
      "learning_rate": 1.3864031091069108e-05,
      "loss": 0.8728,
      "step": 642370
    },
    {
      "epoch": 2.2513817479593587,
      "grad_norm": 2.84375,
      "learning_rate": 1.3863382062405406e-05,
      "loss": 0.8139,
      "step": 642380
    },
    {
      "epoch": 2.2514167954662545,
      "grad_norm": 3.0,
      "learning_rate": 1.3862733033741704e-05,
      "loss": 0.8587,
      "step": 642390
    },
    {
      "epoch": 2.2514518429731503,
      "grad_norm": 2.796875,
      "learning_rate": 1.3862084005078002e-05,
      "loss": 0.8007,
      "step": 642400
    },
    {
      "epoch": 2.2514868904800456,
      "grad_norm": 2.9375,
      "learning_rate": 1.38614349764143e-05,
      "loss": 0.8342,
      "step": 642410
    },
    {
      "epoch": 2.2515219379869413,
      "grad_norm": 2.890625,
      "learning_rate": 1.3860785947750596e-05,
      "loss": 0.8172,
      "step": 642420
    },
    {
      "epoch": 2.2515569854938366,
      "grad_norm": 2.78125,
      "learning_rate": 1.3860136919086894e-05,
      "loss": 0.7793,
      "step": 642430
    },
    {
      "epoch": 2.2515920330007324,
      "grad_norm": 3.140625,
      "learning_rate": 1.3859487890423192e-05,
      "loss": 0.806,
      "step": 642440
    },
    {
      "epoch": 2.251627080507628,
      "grad_norm": 2.515625,
      "learning_rate": 1.385883886175949e-05,
      "loss": 0.8235,
      "step": 642450
    },
    {
      "epoch": 2.251662128014524,
      "grad_norm": 2.96875,
      "learning_rate": 1.3858189833095788e-05,
      "loss": 0.7361,
      "step": 642460
    },
    {
      "epoch": 2.2516971755214192,
      "grad_norm": 2.796875,
      "learning_rate": 1.3857540804432088e-05,
      "loss": 0.8192,
      "step": 642470
    },
    {
      "epoch": 2.251732223028315,
      "grad_norm": 2.71875,
      "learning_rate": 1.3856891775768386e-05,
      "loss": 0.8378,
      "step": 642480
    },
    {
      "epoch": 2.2517672705352103,
      "grad_norm": 2.828125,
      "learning_rate": 1.3856242747104684e-05,
      "loss": 0.8504,
      "step": 642490
    },
    {
      "epoch": 2.251802318042106,
      "grad_norm": 2.890625,
      "learning_rate": 1.3855593718440982e-05,
      "loss": 0.7867,
      "step": 642500
    },
    {
      "epoch": 2.251837365549002,
      "grad_norm": 3.0,
      "learning_rate": 1.385494468977728e-05,
      "loss": 0.9254,
      "step": 642510
    },
    {
      "epoch": 2.251872413055897,
      "grad_norm": 2.78125,
      "learning_rate": 1.3854295661113578e-05,
      "loss": 0.7629,
      "step": 642520
    },
    {
      "epoch": 2.251907460562793,
      "grad_norm": 2.890625,
      "learning_rate": 1.3853646632449876e-05,
      "loss": 0.8029,
      "step": 642530
    },
    {
      "epoch": 2.2519425080696887,
      "grad_norm": 2.65625,
      "learning_rate": 1.3852997603786175e-05,
      "loss": 0.8667,
      "step": 642540
    },
    {
      "epoch": 2.251977555576584,
      "grad_norm": 2.703125,
      "learning_rate": 1.3852348575122473e-05,
      "loss": 0.8446,
      "step": 642550
    },
    {
      "epoch": 2.2520126030834797,
      "grad_norm": 2.453125,
      "learning_rate": 1.3851699546458771e-05,
      "loss": 0.7704,
      "step": 642560
    },
    {
      "epoch": 2.2520476505903755,
      "grad_norm": 3.265625,
      "learning_rate": 1.385105051779507e-05,
      "loss": 0.7017,
      "step": 642570
    },
    {
      "epoch": 2.252082698097271,
      "grad_norm": 2.703125,
      "learning_rate": 1.3850401489131367e-05,
      "loss": 0.8041,
      "step": 642580
    },
    {
      "epoch": 2.2521177456041666,
      "grad_norm": 3.734375,
      "learning_rate": 1.3849752460467665e-05,
      "loss": 0.8393,
      "step": 642590
    },
    {
      "epoch": 2.252152793111062,
      "grad_norm": 2.890625,
      "learning_rate": 1.3849103431803963e-05,
      "loss": 0.8517,
      "step": 642600
    },
    {
      "epoch": 2.2521878406179576,
      "grad_norm": 2.578125,
      "learning_rate": 1.384845440314026e-05,
      "loss": 0.8152,
      "step": 642610
    },
    {
      "epoch": 2.2522228881248534,
      "grad_norm": 2.71875,
      "learning_rate": 1.3847805374476558e-05,
      "loss": 0.7686,
      "step": 642620
    },
    {
      "epoch": 2.2522579356317487,
      "grad_norm": 2.875,
      "learning_rate": 1.3847156345812856e-05,
      "loss": 0.7631,
      "step": 642630
    },
    {
      "epoch": 2.2522929831386445,
      "grad_norm": 3.390625,
      "learning_rate": 1.3846507317149154e-05,
      "loss": 0.7908,
      "step": 642640
    },
    {
      "epoch": 2.25232803064554,
      "grad_norm": 2.546875,
      "learning_rate": 1.3845858288485453e-05,
      "loss": 0.807,
      "step": 642650
    },
    {
      "epoch": 2.2523630781524355,
      "grad_norm": 2.8125,
      "learning_rate": 1.3845209259821751e-05,
      "loss": 0.7756,
      "step": 642660
    },
    {
      "epoch": 2.2523981256593313,
      "grad_norm": 2.578125,
      "learning_rate": 1.384456023115805e-05,
      "loss": 0.7581,
      "step": 642670
    },
    {
      "epoch": 2.252433173166227,
      "grad_norm": 3.09375,
      "learning_rate": 1.3843911202494347e-05,
      "loss": 0.7784,
      "step": 642680
    },
    {
      "epoch": 2.2524682206731224,
      "grad_norm": 2.65625,
      "learning_rate": 1.3843262173830645e-05,
      "loss": 0.8307,
      "step": 642690
    },
    {
      "epoch": 2.252503268180018,
      "grad_norm": 3.046875,
      "learning_rate": 1.3842613145166943e-05,
      "loss": 0.8285,
      "step": 642700
    },
    {
      "epoch": 2.2525383156869134,
      "grad_norm": 2.8125,
      "learning_rate": 1.3841964116503241e-05,
      "loss": 0.8564,
      "step": 642710
    },
    {
      "epoch": 2.252573363193809,
      "grad_norm": 2.984375,
      "learning_rate": 1.3841315087839541e-05,
      "loss": 0.7219,
      "step": 642720
    },
    {
      "epoch": 2.252608410700705,
      "grad_norm": 3.015625,
      "learning_rate": 1.3840666059175839e-05,
      "loss": 0.8554,
      "step": 642730
    },
    {
      "epoch": 2.2526434582076003,
      "grad_norm": 3.1875,
      "learning_rate": 1.3840017030512137e-05,
      "loss": 0.7438,
      "step": 642740
    },
    {
      "epoch": 2.252678505714496,
      "grad_norm": 3.125,
      "learning_rate": 1.3839368001848435e-05,
      "loss": 0.8632,
      "step": 642750
    },
    {
      "epoch": 2.2527135532213918,
      "grad_norm": 3.015625,
      "learning_rate": 1.3838718973184733e-05,
      "loss": 0.7632,
      "step": 642760
    },
    {
      "epoch": 2.252748600728287,
      "grad_norm": 2.984375,
      "learning_rate": 1.3838069944521031e-05,
      "loss": 0.757,
      "step": 642770
    },
    {
      "epoch": 2.252783648235183,
      "grad_norm": 2.921875,
      "learning_rate": 1.3837420915857329e-05,
      "loss": 0.7998,
      "step": 642780
    },
    {
      "epoch": 2.2528186957420786,
      "grad_norm": 3.171875,
      "learning_rate": 1.3836771887193629e-05,
      "loss": 0.7912,
      "step": 642790
    },
    {
      "epoch": 2.252853743248974,
      "grad_norm": 2.671875,
      "learning_rate": 1.3836122858529923e-05,
      "loss": 0.8255,
      "step": 642800
    },
    {
      "epoch": 2.2528887907558697,
      "grad_norm": 3.265625,
      "learning_rate": 1.3835473829866221e-05,
      "loss": 0.8157,
      "step": 642810
    },
    {
      "epoch": 2.252923838262765,
      "grad_norm": 3.0625,
      "learning_rate": 1.383482480120252e-05,
      "loss": 0.8099,
      "step": 642820
    },
    {
      "epoch": 2.2529588857696607,
      "grad_norm": 2.546875,
      "learning_rate": 1.3834175772538819e-05,
      "loss": 0.7965,
      "step": 642830
    },
    {
      "epoch": 2.2529939332765565,
      "grad_norm": 3.109375,
      "learning_rate": 1.3833526743875117e-05,
      "loss": 0.8394,
      "step": 642840
    },
    {
      "epoch": 2.253028980783452,
      "grad_norm": 2.953125,
      "learning_rate": 1.3832877715211415e-05,
      "loss": 0.9319,
      "step": 642850
    },
    {
      "epoch": 2.2530640282903476,
      "grad_norm": 2.84375,
      "learning_rate": 1.3832228686547713e-05,
      "loss": 0.7573,
      "step": 642860
    },
    {
      "epoch": 2.2530990757972433,
      "grad_norm": 2.515625,
      "learning_rate": 1.3831579657884011e-05,
      "loss": 0.7215,
      "step": 642870
    },
    {
      "epoch": 2.2531341233041386,
      "grad_norm": 2.96875,
      "learning_rate": 1.3830930629220309e-05,
      "loss": 0.8601,
      "step": 642880
    },
    {
      "epoch": 2.2531691708110344,
      "grad_norm": 3.109375,
      "learning_rate": 1.3830281600556607e-05,
      "loss": 0.7885,
      "step": 642890
    },
    {
      "epoch": 2.25320421831793,
      "grad_norm": 2.515625,
      "learning_rate": 1.3829632571892907e-05,
      "loss": 0.8267,
      "step": 642900
    },
    {
      "epoch": 2.2532392658248255,
      "grad_norm": 3.4375,
      "learning_rate": 1.3828983543229205e-05,
      "loss": 0.7668,
      "step": 642910
    },
    {
      "epoch": 2.2532743133317212,
      "grad_norm": 2.71875,
      "learning_rate": 1.3828334514565503e-05,
      "loss": 0.7792,
      "step": 642920
    },
    {
      "epoch": 2.2533093608386165,
      "grad_norm": 2.46875,
      "learning_rate": 1.38276854859018e-05,
      "loss": 0.8224,
      "step": 642930
    },
    {
      "epoch": 2.2533444083455123,
      "grad_norm": 2.953125,
      "learning_rate": 1.3827036457238099e-05,
      "loss": 0.8935,
      "step": 642940
    },
    {
      "epoch": 2.253379455852408,
      "grad_norm": 3.0,
      "learning_rate": 1.3826387428574397e-05,
      "loss": 0.8719,
      "step": 642950
    },
    {
      "epoch": 2.2534145033593034,
      "grad_norm": 2.578125,
      "learning_rate": 1.3825738399910695e-05,
      "loss": 0.8081,
      "step": 642960
    },
    {
      "epoch": 2.253449550866199,
      "grad_norm": 2.90625,
      "learning_rate": 1.3825089371246994e-05,
      "loss": 0.7427,
      "step": 642970
    },
    {
      "epoch": 2.253484598373095,
      "grad_norm": 2.71875,
      "learning_rate": 1.3824440342583289e-05,
      "loss": 0.7291,
      "step": 642980
    },
    {
      "epoch": 2.25351964587999,
      "grad_norm": 3.484375,
      "learning_rate": 1.3823791313919587e-05,
      "loss": 0.8492,
      "step": 642990
    },
    {
      "epoch": 2.253554693386886,
      "grad_norm": 2.703125,
      "learning_rate": 1.3823142285255885e-05,
      "loss": 0.8769,
      "step": 643000
    },
    {
      "epoch": 2.2535897408937817,
      "grad_norm": 2.203125,
      "learning_rate": 1.3822493256592183e-05,
      "loss": 0.7958,
      "step": 643010
    },
    {
      "epoch": 2.253624788400677,
      "grad_norm": 2.515625,
      "learning_rate": 1.3821844227928483e-05,
      "loss": 0.8353,
      "step": 643020
    },
    {
      "epoch": 2.253659835907573,
      "grad_norm": 3.09375,
      "learning_rate": 1.382119519926478e-05,
      "loss": 0.7585,
      "step": 643030
    },
    {
      "epoch": 2.253694883414468,
      "grad_norm": 2.734375,
      "learning_rate": 1.3820546170601079e-05,
      "loss": 0.8342,
      "step": 643040
    },
    {
      "epoch": 2.253729930921364,
      "grad_norm": 2.890625,
      "learning_rate": 1.3819897141937377e-05,
      "loss": 0.7439,
      "step": 643050
    },
    {
      "epoch": 2.2537649784282596,
      "grad_norm": 2.796875,
      "learning_rate": 1.3819248113273675e-05,
      "loss": 0.7771,
      "step": 643060
    },
    {
      "epoch": 2.253800025935155,
      "grad_norm": 2.484375,
      "learning_rate": 1.3818599084609973e-05,
      "loss": 0.6816,
      "step": 643070
    },
    {
      "epoch": 2.2538350734420507,
      "grad_norm": 3.1875,
      "learning_rate": 1.381795005594627e-05,
      "loss": 0.7377,
      "step": 643080
    },
    {
      "epoch": 2.2538701209489465,
      "grad_norm": 3.046875,
      "learning_rate": 1.381730102728257e-05,
      "loss": 0.8388,
      "step": 643090
    },
    {
      "epoch": 2.2539051684558418,
      "grad_norm": 2.96875,
      "learning_rate": 1.3816651998618868e-05,
      "loss": 0.8373,
      "step": 643100
    },
    {
      "epoch": 2.2539402159627375,
      "grad_norm": 2.90625,
      "learning_rate": 1.3816002969955166e-05,
      "loss": 0.7844,
      "step": 643110
    },
    {
      "epoch": 2.2539752634696333,
      "grad_norm": 2.828125,
      "learning_rate": 1.3815353941291464e-05,
      "loss": 0.8588,
      "step": 643120
    },
    {
      "epoch": 2.2540103109765286,
      "grad_norm": 3.15625,
      "learning_rate": 1.3814704912627762e-05,
      "loss": 0.8064,
      "step": 643130
    },
    {
      "epoch": 2.2540453584834244,
      "grad_norm": 2.59375,
      "learning_rate": 1.381405588396406e-05,
      "loss": 0.805,
      "step": 643140
    },
    {
      "epoch": 2.2540804059903197,
      "grad_norm": 2.578125,
      "learning_rate": 1.3813406855300358e-05,
      "loss": 0.7374,
      "step": 643150
    },
    {
      "epoch": 2.2541154534972154,
      "grad_norm": 3.328125,
      "learning_rate": 1.3812757826636658e-05,
      "loss": 0.8827,
      "step": 643160
    },
    {
      "epoch": 2.254150501004111,
      "grad_norm": 2.921875,
      "learning_rate": 1.3812108797972953e-05,
      "loss": 0.7277,
      "step": 643170
    },
    {
      "epoch": 2.2541855485110065,
      "grad_norm": 3.109375,
      "learning_rate": 1.381145976930925e-05,
      "loss": 0.7555,
      "step": 643180
    },
    {
      "epoch": 2.2542205960179023,
      "grad_norm": 2.921875,
      "learning_rate": 1.3810810740645549e-05,
      "loss": 0.8549,
      "step": 643190
    },
    {
      "epoch": 2.254255643524798,
      "grad_norm": 3.328125,
      "learning_rate": 1.3810161711981848e-05,
      "loss": 0.8695,
      "step": 643200
    },
    {
      "epoch": 2.2542906910316933,
      "grad_norm": 2.84375,
      "learning_rate": 1.3809512683318146e-05,
      "loss": 0.8307,
      "step": 643210
    },
    {
      "epoch": 2.254325738538589,
      "grad_norm": 2.703125,
      "learning_rate": 1.3808863654654444e-05,
      "loss": 0.7923,
      "step": 643220
    },
    {
      "epoch": 2.254360786045485,
      "grad_norm": 2.8125,
      "learning_rate": 1.3808214625990742e-05,
      "loss": 0.7982,
      "step": 643230
    },
    {
      "epoch": 2.25439583355238,
      "grad_norm": 3.28125,
      "learning_rate": 1.380756559732704e-05,
      "loss": 0.7785,
      "step": 643240
    },
    {
      "epoch": 2.254430881059276,
      "grad_norm": 2.640625,
      "learning_rate": 1.3806916568663338e-05,
      "loss": 0.8678,
      "step": 643250
    },
    {
      "epoch": 2.2544659285661712,
      "grad_norm": 2.953125,
      "learning_rate": 1.3806267539999636e-05,
      "loss": 0.7995,
      "step": 643260
    },
    {
      "epoch": 2.254500976073067,
      "grad_norm": 2.828125,
      "learning_rate": 1.3805618511335936e-05,
      "loss": 0.7274,
      "step": 643270
    },
    {
      "epoch": 2.2545360235799627,
      "grad_norm": 3.046875,
      "learning_rate": 1.3804969482672234e-05,
      "loss": 0.8606,
      "step": 643280
    },
    {
      "epoch": 2.254571071086858,
      "grad_norm": 3.015625,
      "learning_rate": 1.3804320454008532e-05,
      "loss": 0.822,
      "step": 643290
    },
    {
      "epoch": 2.254606118593754,
      "grad_norm": 2.96875,
      "learning_rate": 1.380367142534483e-05,
      "loss": 0.8402,
      "step": 643300
    },
    {
      "epoch": 2.2546411661006496,
      "grad_norm": 2.84375,
      "learning_rate": 1.3803022396681128e-05,
      "loss": 0.8088,
      "step": 643310
    },
    {
      "epoch": 2.254676213607545,
      "grad_norm": 2.953125,
      "learning_rate": 1.3802373368017426e-05,
      "loss": 0.8531,
      "step": 643320
    },
    {
      "epoch": 2.2547112611144406,
      "grad_norm": 2.90625,
      "learning_rate": 1.3801724339353724e-05,
      "loss": 0.803,
      "step": 643330
    },
    {
      "epoch": 2.2547463086213364,
      "grad_norm": 3.40625,
      "learning_rate": 1.3801075310690024e-05,
      "loss": 0.8087,
      "step": 643340
    },
    {
      "epoch": 2.2547813561282317,
      "grad_norm": 3.0625,
      "learning_rate": 1.3800426282026322e-05,
      "loss": 0.8232,
      "step": 643350
    },
    {
      "epoch": 2.2548164036351275,
      "grad_norm": 2.953125,
      "learning_rate": 1.3799777253362616e-05,
      "loss": 0.8685,
      "step": 643360
    },
    {
      "epoch": 2.254851451142023,
      "grad_norm": 3.28125,
      "learning_rate": 1.3799128224698914e-05,
      "loss": 0.8218,
      "step": 643370
    },
    {
      "epoch": 2.2548864986489185,
      "grad_norm": 2.875,
      "learning_rate": 1.3798479196035214e-05,
      "loss": 0.8105,
      "step": 643380
    },
    {
      "epoch": 2.2549215461558143,
      "grad_norm": 2.84375,
      "learning_rate": 1.3797830167371512e-05,
      "loss": 0.8717,
      "step": 643390
    },
    {
      "epoch": 2.2549565936627096,
      "grad_norm": 3.046875,
      "learning_rate": 1.379718113870781e-05,
      "loss": 0.7668,
      "step": 643400
    },
    {
      "epoch": 2.2549916411696054,
      "grad_norm": 2.859375,
      "learning_rate": 1.3796532110044108e-05,
      "loss": 0.769,
      "step": 643410
    },
    {
      "epoch": 2.255026688676501,
      "grad_norm": 2.671875,
      "learning_rate": 1.3795883081380406e-05,
      "loss": 0.7925,
      "step": 643420
    },
    {
      "epoch": 2.2550617361833964,
      "grad_norm": 2.75,
      "learning_rate": 1.3795234052716704e-05,
      "loss": 0.834,
      "step": 643430
    },
    {
      "epoch": 2.255096783690292,
      "grad_norm": 2.75,
      "learning_rate": 1.3794585024053002e-05,
      "loss": 0.8575,
      "step": 643440
    },
    {
      "epoch": 2.255131831197188,
      "grad_norm": 3.09375,
      "learning_rate": 1.3793935995389302e-05,
      "loss": 0.8683,
      "step": 643450
    },
    {
      "epoch": 2.2551668787040833,
      "grad_norm": 2.984375,
      "learning_rate": 1.37932869667256e-05,
      "loss": 0.7097,
      "step": 643460
    },
    {
      "epoch": 2.255201926210979,
      "grad_norm": 2.765625,
      "learning_rate": 1.3792637938061898e-05,
      "loss": 0.859,
      "step": 643470
    },
    {
      "epoch": 2.2552369737178743,
      "grad_norm": 2.640625,
      "learning_rate": 1.3791988909398196e-05,
      "loss": 0.7522,
      "step": 643480
    },
    {
      "epoch": 2.25527202122477,
      "grad_norm": 3.34375,
      "learning_rate": 1.3791339880734494e-05,
      "loss": 0.8215,
      "step": 643490
    },
    {
      "epoch": 2.255307068731666,
      "grad_norm": 3.1875,
      "learning_rate": 1.3790690852070792e-05,
      "loss": 0.8279,
      "step": 643500
    },
    {
      "epoch": 2.255342116238561,
      "grad_norm": 3.015625,
      "learning_rate": 1.379004182340709e-05,
      "loss": 0.7712,
      "step": 643510
    },
    {
      "epoch": 2.255377163745457,
      "grad_norm": 2.828125,
      "learning_rate": 1.378939279474339e-05,
      "loss": 0.7808,
      "step": 643520
    },
    {
      "epoch": 2.2554122112523527,
      "grad_norm": 2.84375,
      "learning_rate": 1.3788743766079688e-05,
      "loss": 0.7753,
      "step": 643530
    },
    {
      "epoch": 2.255447258759248,
      "grad_norm": 3.421875,
      "learning_rate": 1.3788094737415986e-05,
      "loss": 0.8368,
      "step": 643540
    },
    {
      "epoch": 2.2554823062661438,
      "grad_norm": 3.140625,
      "learning_rate": 1.378744570875228e-05,
      "loss": 0.8116,
      "step": 643550
    },
    {
      "epoch": 2.2555173537730395,
      "grad_norm": 2.78125,
      "learning_rate": 1.3786796680088578e-05,
      "loss": 0.8687,
      "step": 643560
    },
    {
      "epoch": 2.255552401279935,
      "grad_norm": 2.953125,
      "learning_rate": 1.3786147651424878e-05,
      "loss": 0.7741,
      "step": 643570
    },
    {
      "epoch": 2.2555874487868306,
      "grad_norm": 2.625,
      "learning_rate": 1.3785498622761176e-05,
      "loss": 0.849,
      "step": 643580
    },
    {
      "epoch": 2.255622496293726,
      "grad_norm": 3.0625,
      "learning_rate": 1.3784849594097474e-05,
      "loss": 0.7792,
      "step": 643590
    },
    {
      "epoch": 2.2556575438006217,
      "grad_norm": 2.875,
      "learning_rate": 1.3784200565433772e-05,
      "loss": 0.7414,
      "step": 643600
    },
    {
      "epoch": 2.2556925913075174,
      "grad_norm": 3.015625,
      "learning_rate": 1.378355153677007e-05,
      "loss": 0.8653,
      "step": 643610
    },
    {
      "epoch": 2.255727638814413,
      "grad_norm": 2.953125,
      "learning_rate": 1.3782902508106368e-05,
      "loss": 0.8185,
      "step": 643620
    },
    {
      "epoch": 2.2557626863213085,
      "grad_norm": 3.78125,
      "learning_rate": 1.3782253479442666e-05,
      "loss": 0.7682,
      "step": 643630
    },
    {
      "epoch": 2.2557977338282043,
      "grad_norm": 3.140625,
      "learning_rate": 1.3781604450778966e-05,
      "loss": 0.8239,
      "step": 643640
    },
    {
      "epoch": 2.2558327813350996,
      "grad_norm": 3.296875,
      "learning_rate": 1.3780955422115264e-05,
      "loss": 0.7773,
      "step": 643650
    },
    {
      "epoch": 2.2558678288419953,
      "grad_norm": 2.703125,
      "learning_rate": 1.3780306393451562e-05,
      "loss": 0.8111,
      "step": 643660
    },
    {
      "epoch": 2.255902876348891,
      "grad_norm": 2.5625,
      "learning_rate": 1.377965736478786e-05,
      "loss": 0.7955,
      "step": 643670
    },
    {
      "epoch": 2.2559379238557864,
      "grad_norm": 2.5625,
      "learning_rate": 1.3779008336124158e-05,
      "loss": 0.7845,
      "step": 643680
    },
    {
      "epoch": 2.255972971362682,
      "grad_norm": 3.171875,
      "learning_rate": 1.3778359307460456e-05,
      "loss": 0.7805,
      "step": 643690
    },
    {
      "epoch": 2.2560080188695775,
      "grad_norm": 3.15625,
      "learning_rate": 1.3777710278796754e-05,
      "loss": 0.8063,
      "step": 643700
    },
    {
      "epoch": 2.2560430663764732,
      "grad_norm": 3.5625,
      "learning_rate": 1.3777061250133053e-05,
      "loss": 0.85,
      "step": 643710
    },
    {
      "epoch": 2.256078113883369,
      "grad_norm": 3.125,
      "learning_rate": 1.3776412221469351e-05,
      "loss": 0.7586,
      "step": 643720
    },
    {
      "epoch": 2.2561131613902647,
      "grad_norm": 2.9375,
      "learning_rate": 1.377576319280565e-05,
      "loss": 0.7696,
      "step": 643730
    },
    {
      "epoch": 2.25614820889716,
      "grad_norm": 3.484375,
      "learning_rate": 1.3775114164141944e-05,
      "loss": 0.8171,
      "step": 643740
    },
    {
      "epoch": 2.256183256404056,
      "grad_norm": 2.6875,
      "learning_rate": 1.3774465135478244e-05,
      "loss": 0.7774,
      "step": 643750
    },
    {
      "epoch": 2.256218303910951,
      "grad_norm": 3.015625,
      "learning_rate": 1.3773816106814542e-05,
      "loss": 0.789,
      "step": 643760
    },
    {
      "epoch": 2.256253351417847,
      "grad_norm": 2.703125,
      "learning_rate": 1.377316707815084e-05,
      "loss": 0.7671,
      "step": 643770
    },
    {
      "epoch": 2.2562883989247426,
      "grad_norm": 2.78125,
      "learning_rate": 1.3772518049487138e-05,
      "loss": 0.794,
      "step": 643780
    },
    {
      "epoch": 2.256323446431638,
      "grad_norm": 2.8125,
      "learning_rate": 1.3771869020823436e-05,
      "loss": 0.8115,
      "step": 643790
    },
    {
      "epoch": 2.2563584939385337,
      "grad_norm": 2.609375,
      "learning_rate": 1.3771219992159734e-05,
      "loss": 0.8421,
      "step": 643800
    },
    {
      "epoch": 2.2563935414454295,
      "grad_norm": 3.15625,
      "learning_rate": 1.3770570963496032e-05,
      "loss": 0.7831,
      "step": 643810
    },
    {
      "epoch": 2.256428588952325,
      "grad_norm": 2.515625,
      "learning_rate": 1.3769921934832331e-05,
      "loss": 0.8216,
      "step": 643820
    },
    {
      "epoch": 2.2564636364592205,
      "grad_norm": 2.703125,
      "learning_rate": 1.376927290616863e-05,
      "loss": 0.8095,
      "step": 643830
    },
    {
      "epoch": 2.2564986839661163,
      "grad_norm": 3.21875,
      "learning_rate": 1.3768623877504927e-05,
      "loss": 0.8101,
      "step": 643840
    },
    {
      "epoch": 2.2565337314730116,
      "grad_norm": 2.890625,
      "learning_rate": 1.3767974848841225e-05,
      "loss": 0.8334,
      "step": 643850
    },
    {
      "epoch": 2.2565687789799074,
      "grad_norm": 3.03125,
      "learning_rate": 1.3767325820177523e-05,
      "loss": 0.7164,
      "step": 643860
    },
    {
      "epoch": 2.2566038264868027,
      "grad_norm": 3.0,
      "learning_rate": 1.3766676791513821e-05,
      "loss": 0.9299,
      "step": 643870
    },
    {
      "epoch": 2.2566388739936984,
      "grad_norm": 3.21875,
      "learning_rate": 1.376602776285012e-05,
      "loss": 0.8466,
      "step": 643880
    },
    {
      "epoch": 2.256673921500594,
      "grad_norm": 2.875,
      "learning_rate": 1.3765378734186419e-05,
      "loss": 0.7063,
      "step": 643890
    },
    {
      "epoch": 2.2567089690074895,
      "grad_norm": 3.296875,
      "learning_rate": 1.3764729705522717e-05,
      "loss": 0.8043,
      "step": 643900
    },
    {
      "epoch": 2.2567440165143853,
      "grad_norm": 3.140625,
      "learning_rate": 1.3764080676859015e-05,
      "loss": 0.799,
      "step": 643910
    },
    {
      "epoch": 2.256779064021281,
      "grad_norm": 3.078125,
      "learning_rate": 1.3763431648195313e-05,
      "loss": 0.845,
      "step": 643920
    },
    {
      "epoch": 2.2568141115281763,
      "grad_norm": 2.921875,
      "learning_rate": 1.376278261953161e-05,
      "loss": 0.8232,
      "step": 643930
    },
    {
      "epoch": 2.256849159035072,
      "grad_norm": 2.703125,
      "learning_rate": 1.3762133590867907e-05,
      "loss": 0.8496,
      "step": 643940
    },
    {
      "epoch": 2.256884206541968,
      "grad_norm": 3.125,
      "learning_rate": 1.3761484562204205e-05,
      "loss": 0.801,
      "step": 643950
    },
    {
      "epoch": 2.256919254048863,
      "grad_norm": 2.953125,
      "learning_rate": 1.3760835533540503e-05,
      "loss": 0.813,
      "step": 643960
    },
    {
      "epoch": 2.256954301555759,
      "grad_norm": 2.796875,
      "learning_rate": 1.3760186504876801e-05,
      "loss": 0.8228,
      "step": 643970
    },
    {
      "epoch": 2.2569893490626542,
      "grad_norm": 2.65625,
      "learning_rate": 1.37595374762131e-05,
      "loss": 0.8281,
      "step": 643980
    },
    {
      "epoch": 2.25702439656955,
      "grad_norm": 3.265625,
      "learning_rate": 1.3758888447549397e-05,
      "loss": 0.7471,
      "step": 643990
    },
    {
      "epoch": 2.2570594440764458,
      "grad_norm": 3.09375,
      "learning_rate": 1.3758239418885697e-05,
      "loss": 0.7899,
      "step": 644000
    },
    {
      "epoch": 2.257094491583341,
      "grad_norm": 3.078125,
      "learning_rate": 1.3757590390221995e-05,
      "loss": 0.822,
      "step": 644010
    },
    {
      "epoch": 2.257129539090237,
      "grad_norm": 3.09375,
      "learning_rate": 1.3756941361558293e-05,
      "loss": 0.8015,
      "step": 644020
    },
    {
      "epoch": 2.2571645865971326,
      "grad_norm": 3.234375,
      "learning_rate": 1.3756292332894591e-05,
      "loss": 0.7937,
      "step": 644030
    },
    {
      "epoch": 2.257199634104028,
      "grad_norm": 2.84375,
      "learning_rate": 1.3755643304230889e-05,
      "loss": 0.7353,
      "step": 644040
    },
    {
      "epoch": 2.2572346816109237,
      "grad_norm": 2.90625,
      "learning_rate": 1.3754994275567187e-05,
      "loss": 0.8157,
      "step": 644050
    },
    {
      "epoch": 2.2572697291178194,
      "grad_norm": 2.953125,
      "learning_rate": 1.3754345246903485e-05,
      "loss": 0.8044,
      "step": 644060
    },
    {
      "epoch": 2.2573047766247147,
      "grad_norm": 2.90625,
      "learning_rate": 1.3753696218239785e-05,
      "loss": 0.8496,
      "step": 644070
    },
    {
      "epoch": 2.2573398241316105,
      "grad_norm": 3.4375,
      "learning_rate": 1.3753047189576083e-05,
      "loss": 0.7926,
      "step": 644080
    },
    {
      "epoch": 2.257374871638506,
      "grad_norm": 3.0625,
      "learning_rate": 1.375239816091238e-05,
      "loss": 0.8273,
      "step": 644090
    },
    {
      "epoch": 2.2574099191454016,
      "grad_norm": 2.9375,
      "learning_rate": 1.3751749132248679e-05,
      "loss": 0.8829,
      "step": 644100
    },
    {
      "epoch": 2.2574449666522973,
      "grad_norm": 2.59375,
      "learning_rate": 1.3751100103584973e-05,
      "loss": 0.7381,
      "step": 644110
    },
    {
      "epoch": 2.2574800141591926,
      "grad_norm": 2.96875,
      "learning_rate": 1.3750451074921273e-05,
      "loss": 0.8465,
      "step": 644120
    },
    {
      "epoch": 2.2575150616660884,
      "grad_norm": 2.96875,
      "learning_rate": 1.3749802046257571e-05,
      "loss": 0.7947,
      "step": 644130
    },
    {
      "epoch": 2.257550109172984,
      "grad_norm": 2.546875,
      "learning_rate": 1.3749153017593869e-05,
      "loss": 0.8047,
      "step": 644140
    },
    {
      "epoch": 2.2575851566798795,
      "grad_norm": 3.1875,
      "learning_rate": 1.3748503988930167e-05,
      "loss": 0.8024,
      "step": 644150
    },
    {
      "epoch": 2.2576202041867752,
      "grad_norm": 2.625,
      "learning_rate": 1.3747854960266465e-05,
      "loss": 0.8178,
      "step": 644160
    },
    {
      "epoch": 2.257655251693671,
      "grad_norm": 2.75,
      "learning_rate": 1.3747205931602763e-05,
      "loss": 0.8365,
      "step": 644170
    },
    {
      "epoch": 2.2576902992005663,
      "grad_norm": 2.78125,
      "learning_rate": 1.3746556902939061e-05,
      "loss": 0.7141,
      "step": 644180
    },
    {
      "epoch": 2.257725346707462,
      "grad_norm": 2.765625,
      "learning_rate": 1.374590787427536e-05,
      "loss": 0.8553,
      "step": 644190
    },
    {
      "epoch": 2.2577603942143574,
      "grad_norm": 3.109375,
      "learning_rate": 1.3745258845611659e-05,
      "loss": 0.8607,
      "step": 644200
    },
    {
      "epoch": 2.257795441721253,
      "grad_norm": 2.828125,
      "learning_rate": 1.3744609816947957e-05,
      "loss": 0.891,
      "step": 644210
    },
    {
      "epoch": 2.257830489228149,
      "grad_norm": 3.328125,
      "learning_rate": 1.3743960788284255e-05,
      "loss": 0.9553,
      "step": 644220
    },
    {
      "epoch": 2.257865536735044,
      "grad_norm": 2.875,
      "learning_rate": 1.3743311759620553e-05,
      "loss": 0.8595,
      "step": 644230
    },
    {
      "epoch": 2.25790058424194,
      "grad_norm": 3.234375,
      "learning_rate": 1.374266273095685e-05,
      "loss": 0.7216,
      "step": 644240
    },
    {
      "epoch": 2.2579356317488357,
      "grad_norm": 3.09375,
      "learning_rate": 1.3742013702293149e-05,
      "loss": 0.8015,
      "step": 644250
    },
    {
      "epoch": 2.257970679255731,
      "grad_norm": 3.25,
      "learning_rate": 1.3741364673629448e-05,
      "loss": 0.8185,
      "step": 644260
    },
    {
      "epoch": 2.258005726762627,
      "grad_norm": 2.65625,
      "learning_rate": 1.3740715644965746e-05,
      "loss": 0.8555,
      "step": 644270
    },
    {
      "epoch": 2.2580407742695225,
      "grad_norm": 2.71875,
      "learning_rate": 1.3740066616302044e-05,
      "loss": 0.851,
      "step": 644280
    },
    {
      "epoch": 2.258075821776418,
      "grad_norm": 3.0625,
      "learning_rate": 1.3739417587638342e-05,
      "loss": 0.798,
      "step": 644290
    },
    {
      "epoch": 2.2581108692833136,
      "grad_norm": 2.953125,
      "learning_rate": 1.3738768558974639e-05,
      "loss": 0.8475,
      "step": 644300
    },
    {
      "epoch": 2.258145916790209,
      "grad_norm": 2.765625,
      "learning_rate": 1.3738119530310937e-05,
      "loss": 0.8501,
      "step": 644310
    },
    {
      "epoch": 2.2581809642971047,
      "grad_norm": 2.75,
      "learning_rate": 1.3737470501647235e-05,
      "loss": 0.8456,
      "step": 644320
    },
    {
      "epoch": 2.2582160118040004,
      "grad_norm": 2.71875,
      "learning_rate": 1.3736821472983533e-05,
      "loss": 0.7988,
      "step": 644330
    },
    {
      "epoch": 2.2582510593108958,
      "grad_norm": 3.21875,
      "learning_rate": 1.373617244431983e-05,
      "loss": 0.9238,
      "step": 644340
    },
    {
      "epoch": 2.2582861068177915,
      "grad_norm": 2.71875,
      "learning_rate": 1.3735523415656129e-05,
      "loss": 0.8655,
      "step": 644350
    },
    {
      "epoch": 2.2583211543246873,
      "grad_norm": 2.859375,
      "learning_rate": 1.3734874386992427e-05,
      "loss": 0.8334,
      "step": 644360
    },
    {
      "epoch": 2.2583562018315826,
      "grad_norm": 2.609375,
      "learning_rate": 1.3734225358328726e-05,
      "loss": 0.8247,
      "step": 644370
    },
    {
      "epoch": 2.2583912493384783,
      "grad_norm": 2.515625,
      "learning_rate": 1.3733576329665024e-05,
      "loss": 0.7253,
      "step": 644380
    },
    {
      "epoch": 2.258426296845374,
      "grad_norm": 2.6875,
      "learning_rate": 1.3732927301001322e-05,
      "loss": 0.7525,
      "step": 644390
    },
    {
      "epoch": 2.2584613443522694,
      "grad_norm": 2.671875,
      "learning_rate": 1.373227827233762e-05,
      "loss": 0.7995,
      "step": 644400
    },
    {
      "epoch": 2.258496391859165,
      "grad_norm": 2.671875,
      "learning_rate": 1.3731629243673918e-05,
      "loss": 0.8573,
      "step": 644410
    },
    {
      "epoch": 2.2585314393660605,
      "grad_norm": 2.78125,
      "learning_rate": 1.3730980215010216e-05,
      "loss": 0.8841,
      "step": 644420
    },
    {
      "epoch": 2.2585664868729562,
      "grad_norm": 2.453125,
      "learning_rate": 1.3730331186346514e-05,
      "loss": 0.8697,
      "step": 644430
    },
    {
      "epoch": 2.258601534379852,
      "grad_norm": 2.828125,
      "learning_rate": 1.3729682157682814e-05,
      "loss": 0.7461,
      "step": 644440
    },
    {
      "epoch": 2.2586365818867473,
      "grad_norm": 2.953125,
      "learning_rate": 1.3729033129019112e-05,
      "loss": 0.7905,
      "step": 644450
    },
    {
      "epoch": 2.258671629393643,
      "grad_norm": 2.671875,
      "learning_rate": 1.372838410035541e-05,
      "loss": 0.8679,
      "step": 644460
    },
    {
      "epoch": 2.258706676900539,
      "grad_norm": 2.734375,
      "learning_rate": 1.3727735071691708e-05,
      "loss": 0.8138,
      "step": 644470
    },
    {
      "epoch": 2.258741724407434,
      "grad_norm": 2.734375,
      "learning_rate": 1.3727086043028006e-05,
      "loss": 0.8424,
      "step": 644480
    },
    {
      "epoch": 2.25877677191433,
      "grad_norm": 2.28125,
      "learning_rate": 1.3726437014364302e-05,
      "loss": 0.7988,
      "step": 644490
    },
    {
      "epoch": 2.2588118194212257,
      "grad_norm": 2.84375,
      "learning_rate": 1.37257879857006e-05,
      "loss": 0.7891,
      "step": 644500
    },
    {
      "epoch": 2.258846866928121,
      "grad_norm": 2.984375,
      "learning_rate": 1.3725138957036898e-05,
      "loss": 0.8466,
      "step": 644510
    },
    {
      "epoch": 2.2588819144350167,
      "grad_norm": 2.984375,
      "learning_rate": 1.3724489928373196e-05,
      "loss": 0.7828,
      "step": 644520
    },
    {
      "epoch": 2.258916961941912,
      "grad_norm": 3.015625,
      "learning_rate": 1.3723840899709494e-05,
      "loss": 0.8581,
      "step": 644530
    },
    {
      "epoch": 2.258952009448808,
      "grad_norm": 2.9375,
      "learning_rate": 1.3723191871045792e-05,
      "loss": 0.8497,
      "step": 644540
    },
    {
      "epoch": 2.2589870569557036,
      "grad_norm": 3.09375,
      "learning_rate": 1.3722542842382092e-05,
      "loss": 0.835,
      "step": 644550
    },
    {
      "epoch": 2.259022104462599,
      "grad_norm": 2.8125,
      "learning_rate": 1.372189381371839e-05,
      "loss": 0.843,
      "step": 644560
    },
    {
      "epoch": 2.2590571519694946,
      "grad_norm": 2.453125,
      "learning_rate": 1.3721244785054688e-05,
      "loss": 0.721,
      "step": 644570
    },
    {
      "epoch": 2.2590921994763904,
      "grad_norm": 2.734375,
      "learning_rate": 1.3720595756390986e-05,
      "loss": 0.7497,
      "step": 644580
    },
    {
      "epoch": 2.2591272469832857,
      "grad_norm": 2.90625,
      "learning_rate": 1.3719946727727284e-05,
      "loss": 0.7675,
      "step": 644590
    },
    {
      "epoch": 2.2591622944901815,
      "grad_norm": 2.796875,
      "learning_rate": 1.3719297699063582e-05,
      "loss": 0.862,
      "step": 644600
    },
    {
      "epoch": 2.2591973419970772,
      "grad_norm": 3.046875,
      "learning_rate": 1.371864867039988e-05,
      "loss": 0.7639,
      "step": 644610
    },
    {
      "epoch": 2.2592323895039725,
      "grad_norm": 2.96875,
      "learning_rate": 1.371799964173618e-05,
      "loss": 0.858,
      "step": 644620
    },
    {
      "epoch": 2.2592674370108683,
      "grad_norm": 2.421875,
      "learning_rate": 1.3717350613072478e-05,
      "loss": 0.7501,
      "step": 644630
    },
    {
      "epoch": 2.2593024845177636,
      "grad_norm": 3.234375,
      "learning_rate": 1.3716701584408776e-05,
      "loss": 0.7437,
      "step": 644640
    },
    {
      "epoch": 2.2593375320246594,
      "grad_norm": 2.671875,
      "learning_rate": 1.3716052555745074e-05,
      "loss": 0.8253,
      "step": 644650
    },
    {
      "epoch": 2.259372579531555,
      "grad_norm": 2.890625,
      "learning_rate": 1.3715403527081372e-05,
      "loss": 0.8608,
      "step": 644660
    },
    {
      "epoch": 2.2594076270384504,
      "grad_norm": 3.1875,
      "learning_rate": 1.371475449841767e-05,
      "loss": 0.8574,
      "step": 644670
    },
    {
      "epoch": 2.259442674545346,
      "grad_norm": 3.125,
      "learning_rate": 1.3714105469753966e-05,
      "loss": 0.8899,
      "step": 644680
    },
    {
      "epoch": 2.259477722052242,
      "grad_norm": 3.265625,
      "learning_rate": 1.3713456441090264e-05,
      "loss": 0.7895,
      "step": 644690
    },
    {
      "epoch": 2.2595127695591373,
      "grad_norm": 2.75,
      "learning_rate": 1.3712807412426562e-05,
      "loss": 0.7534,
      "step": 644700
    },
    {
      "epoch": 2.259547817066033,
      "grad_norm": 2.796875,
      "learning_rate": 1.371215838376286e-05,
      "loss": 0.7869,
      "step": 644710
    },
    {
      "epoch": 2.259582864572929,
      "grad_norm": 2.125,
      "learning_rate": 1.3711509355099158e-05,
      "loss": 0.8995,
      "step": 644720
    },
    {
      "epoch": 2.259617912079824,
      "grad_norm": 2.5625,
      "learning_rate": 1.3710860326435456e-05,
      "loss": 0.8058,
      "step": 644730
    },
    {
      "epoch": 2.25965295958672,
      "grad_norm": 3.140625,
      "learning_rate": 1.3710211297771756e-05,
      "loss": 0.7546,
      "step": 644740
    },
    {
      "epoch": 2.259688007093615,
      "grad_norm": 3.296875,
      "learning_rate": 1.3709562269108054e-05,
      "loss": 0.7852,
      "step": 644750
    },
    {
      "epoch": 2.259723054600511,
      "grad_norm": 2.703125,
      "learning_rate": 1.3708913240444352e-05,
      "loss": 0.8291,
      "step": 644760
    },
    {
      "epoch": 2.2597581021074067,
      "grad_norm": 2.8125,
      "learning_rate": 1.370826421178065e-05,
      "loss": 0.7534,
      "step": 644770
    },
    {
      "epoch": 2.259793149614302,
      "grad_norm": 3.046875,
      "learning_rate": 1.3707615183116948e-05,
      "loss": 0.7218,
      "step": 644780
    },
    {
      "epoch": 2.2598281971211978,
      "grad_norm": 2.84375,
      "learning_rate": 1.3706966154453246e-05,
      "loss": 0.7838,
      "step": 644790
    },
    {
      "epoch": 2.2598632446280935,
      "grad_norm": 2.65625,
      "learning_rate": 1.3706317125789544e-05,
      "loss": 0.8537,
      "step": 644800
    },
    {
      "epoch": 2.259898292134989,
      "grad_norm": 3.46875,
      "learning_rate": 1.3705668097125843e-05,
      "loss": 0.8801,
      "step": 644810
    },
    {
      "epoch": 2.2599333396418846,
      "grad_norm": 2.8125,
      "learning_rate": 1.3705019068462141e-05,
      "loss": 0.8313,
      "step": 644820
    },
    {
      "epoch": 2.2599683871487803,
      "grad_norm": 2.828125,
      "learning_rate": 1.370437003979844e-05,
      "loss": 0.7953,
      "step": 644830
    },
    {
      "epoch": 2.2600034346556757,
      "grad_norm": 2.84375,
      "learning_rate": 1.3703721011134737e-05,
      "loss": 0.8525,
      "step": 644840
    },
    {
      "epoch": 2.2600384821625714,
      "grad_norm": 2.9375,
      "learning_rate": 1.3703071982471035e-05,
      "loss": 0.8681,
      "step": 644850
    },
    {
      "epoch": 2.2600735296694667,
      "grad_norm": 2.640625,
      "learning_rate": 1.3702422953807333e-05,
      "loss": 0.8464,
      "step": 644860
    },
    {
      "epoch": 2.2601085771763625,
      "grad_norm": 2.640625,
      "learning_rate": 1.370177392514363e-05,
      "loss": 0.7797,
      "step": 644870
    },
    {
      "epoch": 2.2601436246832582,
      "grad_norm": 2.765625,
      "learning_rate": 1.3701124896479928e-05,
      "loss": 0.9008,
      "step": 644880
    },
    {
      "epoch": 2.2601786721901536,
      "grad_norm": 2.90625,
      "learning_rate": 1.3700475867816226e-05,
      "loss": 0.8408,
      "step": 644890
    },
    {
      "epoch": 2.2602137196970493,
      "grad_norm": 2.59375,
      "learning_rate": 1.3699826839152524e-05,
      "loss": 0.9165,
      "step": 644900
    },
    {
      "epoch": 2.260248767203945,
      "grad_norm": 2.78125,
      "learning_rate": 1.3699177810488822e-05,
      "loss": 0.768,
      "step": 644910
    },
    {
      "epoch": 2.2602838147108404,
      "grad_norm": 2.875,
      "learning_rate": 1.3698528781825121e-05,
      "loss": 0.8629,
      "step": 644920
    },
    {
      "epoch": 2.260318862217736,
      "grad_norm": 2.671875,
      "learning_rate": 1.369787975316142e-05,
      "loss": 0.8163,
      "step": 644930
    },
    {
      "epoch": 2.260353909724632,
      "grad_norm": 2.4375,
      "learning_rate": 1.3697230724497717e-05,
      "loss": 0.7697,
      "step": 644940
    },
    {
      "epoch": 2.260388957231527,
      "grad_norm": 2.640625,
      "learning_rate": 1.3696581695834015e-05,
      "loss": 0.7983,
      "step": 644950
    },
    {
      "epoch": 2.260424004738423,
      "grad_norm": 3.328125,
      "learning_rate": 1.3695932667170313e-05,
      "loss": 0.9013,
      "step": 644960
    },
    {
      "epoch": 2.2604590522453183,
      "grad_norm": 2.921875,
      "learning_rate": 1.3695283638506611e-05,
      "loss": 0.8285,
      "step": 644970
    },
    {
      "epoch": 2.260494099752214,
      "grad_norm": 2.671875,
      "learning_rate": 1.369463460984291e-05,
      "loss": 0.7906,
      "step": 644980
    },
    {
      "epoch": 2.26052914725911,
      "grad_norm": 2.859375,
      "learning_rate": 1.3693985581179209e-05,
      "loss": 0.7262,
      "step": 644990
    },
    {
      "epoch": 2.2605641947660056,
      "grad_norm": 3.078125,
      "learning_rate": 1.3693336552515507e-05,
      "loss": 0.7968,
      "step": 645000
    },
    {
      "epoch": 2.2605641947660056,
      "eval_loss": 0.7634396553039551,
      "eval_runtime": 552.2834,
      "eval_samples_per_second": 688.842,
      "eval_steps_per_second": 57.403,
      "step": 645000
    },
    {
      "epoch": 2.260599242272901,
      "grad_norm": 2.546875,
      "learning_rate": 1.3692687523851805e-05,
      "loss": 0.7259,
      "step": 645010
    },
    {
      "epoch": 2.2606342897797966,
      "grad_norm": 2.96875,
      "learning_rate": 1.3692038495188103e-05,
      "loss": 0.8304,
      "step": 645020
    },
    {
      "epoch": 2.260669337286692,
      "grad_norm": 2.734375,
      "learning_rate": 1.3691389466524401e-05,
      "loss": 0.796,
      "step": 645030
    },
    {
      "epoch": 2.2607043847935877,
      "grad_norm": 2.765625,
      "learning_rate": 1.3690740437860699e-05,
      "loss": 0.8459,
      "step": 645040
    },
    {
      "epoch": 2.2607394323004835,
      "grad_norm": 2.375,
      "learning_rate": 1.3690091409196995e-05,
      "loss": 0.81,
      "step": 645050
    },
    {
      "epoch": 2.260774479807379,
      "grad_norm": 3.046875,
      "learning_rate": 1.3689442380533293e-05,
      "loss": 0.7861,
      "step": 645060
    },
    {
      "epoch": 2.2608095273142745,
      "grad_norm": 2.578125,
      "learning_rate": 1.3688793351869591e-05,
      "loss": 0.7425,
      "step": 645070
    },
    {
      "epoch": 2.26084457482117,
      "grad_norm": 3.125,
      "learning_rate": 1.368814432320589e-05,
      "loss": 0.8624,
      "step": 645080
    },
    {
      "epoch": 2.2608796223280656,
      "grad_norm": 3.265625,
      "learning_rate": 1.3687495294542187e-05,
      "loss": 0.8352,
      "step": 645090
    },
    {
      "epoch": 2.2609146698349614,
      "grad_norm": 2.328125,
      "learning_rate": 1.3686846265878487e-05,
      "loss": 0.8419,
      "step": 645100
    },
    {
      "epoch": 2.260949717341857,
      "grad_norm": 3.265625,
      "learning_rate": 1.3686197237214785e-05,
      "loss": 0.8358,
      "step": 645110
    },
    {
      "epoch": 2.2609847648487524,
      "grad_norm": 3.40625,
      "learning_rate": 1.3685548208551083e-05,
      "loss": 0.9161,
      "step": 645120
    },
    {
      "epoch": 2.261019812355648,
      "grad_norm": 3.078125,
      "learning_rate": 1.3684899179887381e-05,
      "loss": 0.8444,
      "step": 645130
    },
    {
      "epoch": 2.2610548598625435,
      "grad_norm": 3.515625,
      "learning_rate": 1.3684250151223679e-05,
      "loss": 0.8335,
      "step": 645140
    },
    {
      "epoch": 2.2610899073694393,
      "grad_norm": 2.9375,
      "learning_rate": 1.3683601122559977e-05,
      "loss": 0.8194,
      "step": 645150
    },
    {
      "epoch": 2.261124954876335,
      "grad_norm": 2.703125,
      "learning_rate": 1.3682952093896275e-05,
      "loss": 0.8939,
      "step": 645160
    },
    {
      "epoch": 2.2611600023832303,
      "grad_norm": 2.84375,
      "learning_rate": 1.3682303065232575e-05,
      "loss": 0.7895,
      "step": 645170
    },
    {
      "epoch": 2.261195049890126,
      "grad_norm": 3.125,
      "learning_rate": 1.3681654036568873e-05,
      "loss": 0.8274,
      "step": 645180
    },
    {
      "epoch": 2.261230097397022,
      "grad_norm": 2.984375,
      "learning_rate": 1.368100500790517e-05,
      "loss": 0.8253,
      "step": 645190
    },
    {
      "epoch": 2.261265144903917,
      "grad_norm": 2.90625,
      "learning_rate": 1.3680355979241469e-05,
      "loss": 0.8417,
      "step": 645200
    },
    {
      "epoch": 2.261300192410813,
      "grad_norm": 2.84375,
      "learning_rate": 1.3679706950577767e-05,
      "loss": 0.7867,
      "step": 645210
    },
    {
      "epoch": 2.2613352399177087,
      "grad_norm": 2.40625,
      "learning_rate": 1.3679057921914065e-05,
      "loss": 0.7037,
      "step": 645220
    },
    {
      "epoch": 2.261370287424604,
      "grad_norm": 2.75,
      "learning_rate": 1.3678408893250363e-05,
      "loss": 0.8709,
      "step": 645230
    },
    {
      "epoch": 2.2614053349314998,
      "grad_norm": 3.53125,
      "learning_rate": 1.3677759864586659e-05,
      "loss": 0.8432,
      "step": 645240
    },
    {
      "epoch": 2.261440382438395,
      "grad_norm": 3.03125,
      "learning_rate": 1.3677110835922957e-05,
      "loss": 0.8432,
      "step": 645250
    },
    {
      "epoch": 2.261475429945291,
      "grad_norm": 2.734375,
      "learning_rate": 1.3676461807259255e-05,
      "loss": 0.8555,
      "step": 645260
    },
    {
      "epoch": 2.2615104774521866,
      "grad_norm": 3.15625,
      "learning_rate": 1.3675812778595553e-05,
      "loss": 0.8565,
      "step": 645270
    },
    {
      "epoch": 2.261545524959082,
      "grad_norm": 2.40625,
      "learning_rate": 1.3675163749931851e-05,
      "loss": 0.8115,
      "step": 645280
    },
    {
      "epoch": 2.2615805724659777,
      "grad_norm": 3.171875,
      "learning_rate": 1.367451472126815e-05,
      "loss": 0.8043,
      "step": 645290
    },
    {
      "epoch": 2.2616156199728734,
      "grad_norm": 2.984375,
      "learning_rate": 1.3673865692604449e-05,
      "loss": 0.7782,
      "step": 645300
    },
    {
      "epoch": 2.2616506674797687,
      "grad_norm": 2.4375,
      "learning_rate": 1.3673216663940747e-05,
      "loss": 0.8235,
      "step": 645310
    },
    {
      "epoch": 2.2616857149866645,
      "grad_norm": 3.171875,
      "learning_rate": 1.3672567635277045e-05,
      "loss": 0.8404,
      "step": 645320
    },
    {
      "epoch": 2.2617207624935602,
      "grad_norm": 2.671875,
      "learning_rate": 1.3671918606613343e-05,
      "loss": 0.8742,
      "step": 645330
    },
    {
      "epoch": 2.2617558100004556,
      "grad_norm": 2.859375,
      "learning_rate": 1.367126957794964e-05,
      "loss": 0.8225,
      "step": 645340
    },
    {
      "epoch": 2.2617908575073513,
      "grad_norm": 2.453125,
      "learning_rate": 1.3670620549285939e-05,
      "loss": 0.7128,
      "step": 645350
    },
    {
      "epoch": 2.2618259050142466,
      "grad_norm": 2.390625,
      "learning_rate": 1.3669971520622238e-05,
      "loss": 0.8146,
      "step": 645360
    },
    {
      "epoch": 2.2618609525211424,
      "grad_norm": 2.84375,
      "learning_rate": 1.3669322491958536e-05,
      "loss": 0.8306,
      "step": 645370
    },
    {
      "epoch": 2.261896000028038,
      "grad_norm": 2.5625,
      "learning_rate": 1.3668673463294834e-05,
      "loss": 0.829,
      "step": 645380
    },
    {
      "epoch": 2.2619310475349335,
      "grad_norm": 2.75,
      "learning_rate": 1.3668024434631132e-05,
      "loss": 0.7966,
      "step": 645390
    },
    {
      "epoch": 2.261966095041829,
      "grad_norm": 2.359375,
      "learning_rate": 1.366737540596743e-05,
      "loss": 0.8269,
      "step": 645400
    },
    {
      "epoch": 2.262001142548725,
      "grad_norm": 3.078125,
      "learning_rate": 1.3666726377303728e-05,
      "loss": 0.7525,
      "step": 645410
    },
    {
      "epoch": 2.2620361900556203,
      "grad_norm": 2.765625,
      "learning_rate": 1.3666077348640028e-05,
      "loss": 0.7365,
      "step": 645420
    },
    {
      "epoch": 2.262071237562516,
      "grad_norm": 2.5,
      "learning_rate": 1.3665428319976323e-05,
      "loss": 0.8778,
      "step": 645430
    },
    {
      "epoch": 2.262106285069412,
      "grad_norm": 2.921875,
      "learning_rate": 1.366477929131262e-05,
      "loss": 0.7854,
      "step": 645440
    },
    {
      "epoch": 2.262141332576307,
      "grad_norm": 2.953125,
      "learning_rate": 1.3664130262648919e-05,
      "loss": 0.7791,
      "step": 645450
    },
    {
      "epoch": 2.262176380083203,
      "grad_norm": 2.8125,
      "learning_rate": 1.3663481233985217e-05,
      "loss": 0.8233,
      "step": 645460
    },
    {
      "epoch": 2.262211427590098,
      "grad_norm": 2.6875,
      "learning_rate": 1.3662832205321516e-05,
      "loss": 0.7739,
      "step": 645470
    },
    {
      "epoch": 2.262246475096994,
      "grad_norm": 2.984375,
      "learning_rate": 1.3662183176657814e-05,
      "loss": 0.8357,
      "step": 645480
    },
    {
      "epoch": 2.2622815226038897,
      "grad_norm": 3.140625,
      "learning_rate": 1.3661534147994112e-05,
      "loss": 0.76,
      "step": 645490
    },
    {
      "epoch": 2.262316570110785,
      "grad_norm": 2.875,
      "learning_rate": 1.366088511933041e-05,
      "loss": 0.7191,
      "step": 645500
    },
    {
      "epoch": 2.2623516176176808,
      "grad_norm": 2.8125,
      "learning_rate": 1.3660236090666708e-05,
      "loss": 0.8179,
      "step": 645510
    },
    {
      "epoch": 2.2623866651245765,
      "grad_norm": 3.09375,
      "learning_rate": 1.3659587062003006e-05,
      "loss": 0.8334,
      "step": 645520
    },
    {
      "epoch": 2.262421712631472,
      "grad_norm": 3.1875,
      "learning_rate": 1.3658938033339304e-05,
      "loss": 0.8211,
      "step": 645530
    },
    {
      "epoch": 2.2624567601383676,
      "grad_norm": 2.78125,
      "learning_rate": 1.3658289004675604e-05,
      "loss": 0.8311,
      "step": 645540
    },
    {
      "epoch": 2.2624918076452634,
      "grad_norm": 2.921875,
      "learning_rate": 1.3657639976011902e-05,
      "loss": 0.8207,
      "step": 645550
    },
    {
      "epoch": 2.2625268551521587,
      "grad_norm": 3.078125,
      "learning_rate": 1.36569909473482e-05,
      "loss": 0.8246,
      "step": 645560
    },
    {
      "epoch": 2.2625619026590544,
      "grad_norm": 2.71875,
      "learning_rate": 1.3656341918684498e-05,
      "loss": 0.7248,
      "step": 645570
    },
    {
      "epoch": 2.2625969501659497,
      "grad_norm": 2.75,
      "learning_rate": 1.3655692890020796e-05,
      "loss": 0.8142,
      "step": 645580
    },
    {
      "epoch": 2.2626319976728455,
      "grad_norm": 3.1875,
      "learning_rate": 1.3655043861357094e-05,
      "loss": 0.8302,
      "step": 645590
    },
    {
      "epoch": 2.2626670451797413,
      "grad_norm": 3.34375,
      "learning_rate": 1.3654394832693392e-05,
      "loss": 0.905,
      "step": 645600
    },
    {
      "epoch": 2.2627020926866366,
      "grad_norm": 3.8125,
      "learning_rate": 1.3653745804029692e-05,
      "loss": 0.8024,
      "step": 645610
    },
    {
      "epoch": 2.2627371401935323,
      "grad_norm": 3.140625,
      "learning_rate": 1.3653096775365986e-05,
      "loss": 0.8382,
      "step": 645620
    },
    {
      "epoch": 2.262772187700428,
      "grad_norm": 3.296875,
      "learning_rate": 1.3652447746702284e-05,
      "loss": 0.8138,
      "step": 645630
    },
    {
      "epoch": 2.2628072352073234,
      "grad_norm": 3.15625,
      "learning_rate": 1.3651798718038582e-05,
      "loss": 0.7861,
      "step": 645640
    },
    {
      "epoch": 2.262842282714219,
      "grad_norm": 2.953125,
      "learning_rate": 1.3651149689374882e-05,
      "loss": 0.7741,
      "step": 645650
    },
    {
      "epoch": 2.262877330221115,
      "grad_norm": 2.859375,
      "learning_rate": 1.365050066071118e-05,
      "loss": 0.803,
      "step": 645660
    },
    {
      "epoch": 2.2629123777280102,
      "grad_norm": 2.671875,
      "learning_rate": 1.3649851632047478e-05,
      "loss": 0.845,
      "step": 645670
    },
    {
      "epoch": 2.262947425234906,
      "grad_norm": 3.234375,
      "learning_rate": 1.3649202603383776e-05,
      "loss": 0.8024,
      "step": 645680
    },
    {
      "epoch": 2.2629824727418013,
      "grad_norm": 2.71875,
      "learning_rate": 1.3648553574720074e-05,
      "loss": 0.7756,
      "step": 645690
    },
    {
      "epoch": 2.263017520248697,
      "grad_norm": 2.921875,
      "learning_rate": 1.3647904546056372e-05,
      "loss": 0.7558,
      "step": 645700
    },
    {
      "epoch": 2.263052567755593,
      "grad_norm": 2.9375,
      "learning_rate": 1.364725551739267e-05,
      "loss": 0.7822,
      "step": 645710
    },
    {
      "epoch": 2.263087615262488,
      "grad_norm": 3.25,
      "learning_rate": 1.364660648872897e-05,
      "loss": 0.8582,
      "step": 645720
    },
    {
      "epoch": 2.263122662769384,
      "grad_norm": 2.59375,
      "learning_rate": 1.3645957460065268e-05,
      "loss": 0.7512,
      "step": 645730
    },
    {
      "epoch": 2.2631577102762797,
      "grad_norm": 3.015625,
      "learning_rate": 1.3645308431401566e-05,
      "loss": 0.7857,
      "step": 645740
    },
    {
      "epoch": 2.263192757783175,
      "grad_norm": 3.015625,
      "learning_rate": 1.3644659402737864e-05,
      "loss": 0.7728,
      "step": 645750
    },
    {
      "epoch": 2.2632278052900707,
      "grad_norm": 3.671875,
      "learning_rate": 1.3644010374074162e-05,
      "loss": 0.8344,
      "step": 645760
    },
    {
      "epoch": 2.2632628527969665,
      "grad_norm": 3.265625,
      "learning_rate": 1.364336134541046e-05,
      "loss": 0.8762,
      "step": 645770
    },
    {
      "epoch": 2.263297900303862,
      "grad_norm": 2.890625,
      "learning_rate": 1.3642712316746758e-05,
      "loss": 0.8343,
      "step": 645780
    },
    {
      "epoch": 2.2633329478107576,
      "grad_norm": 2.75,
      "learning_rate": 1.3642063288083058e-05,
      "loss": 0.8111,
      "step": 645790
    },
    {
      "epoch": 2.263367995317653,
      "grad_norm": 2.78125,
      "learning_rate": 1.3641414259419356e-05,
      "loss": 0.794,
      "step": 645800
    },
    {
      "epoch": 2.2634030428245486,
      "grad_norm": 2.828125,
      "learning_rate": 1.364076523075565e-05,
      "loss": 0.7807,
      "step": 645810
    },
    {
      "epoch": 2.2634380903314444,
      "grad_norm": 3.109375,
      "learning_rate": 1.3640116202091948e-05,
      "loss": 0.8027,
      "step": 645820
    },
    {
      "epoch": 2.2634731378383397,
      "grad_norm": 3.046875,
      "learning_rate": 1.3639467173428246e-05,
      "loss": 0.7981,
      "step": 645830
    },
    {
      "epoch": 2.2635081853452355,
      "grad_norm": 3.140625,
      "learning_rate": 1.3638818144764546e-05,
      "loss": 0.8696,
      "step": 645840
    },
    {
      "epoch": 2.263543232852131,
      "grad_norm": 2.765625,
      "learning_rate": 1.3638169116100844e-05,
      "loss": 0.8042,
      "step": 645850
    },
    {
      "epoch": 2.2635782803590265,
      "grad_norm": 3.125,
      "learning_rate": 1.3637520087437142e-05,
      "loss": 0.7674,
      "step": 645860
    },
    {
      "epoch": 2.2636133278659223,
      "grad_norm": 3.578125,
      "learning_rate": 1.363687105877344e-05,
      "loss": 0.8293,
      "step": 645870
    },
    {
      "epoch": 2.263648375372818,
      "grad_norm": 3.234375,
      "learning_rate": 1.3636222030109738e-05,
      "loss": 0.7917,
      "step": 645880
    },
    {
      "epoch": 2.2636834228797134,
      "grad_norm": 3.015625,
      "learning_rate": 1.3635573001446036e-05,
      "loss": 0.7817,
      "step": 645890
    },
    {
      "epoch": 2.263718470386609,
      "grad_norm": 2.671875,
      "learning_rate": 1.3634923972782334e-05,
      "loss": 0.7709,
      "step": 645900
    },
    {
      "epoch": 2.2637535178935044,
      "grad_norm": 3.265625,
      "learning_rate": 1.3634274944118634e-05,
      "loss": 0.8872,
      "step": 645910
    },
    {
      "epoch": 2.2637885654004,
      "grad_norm": 3.21875,
      "learning_rate": 1.3633625915454932e-05,
      "loss": 0.8222,
      "step": 645920
    },
    {
      "epoch": 2.263823612907296,
      "grad_norm": 3.1875,
      "learning_rate": 1.363297688679123e-05,
      "loss": 0.8697,
      "step": 645930
    },
    {
      "epoch": 2.2638586604141913,
      "grad_norm": 2.953125,
      "learning_rate": 1.3632327858127528e-05,
      "loss": 0.8643,
      "step": 645940
    },
    {
      "epoch": 2.263893707921087,
      "grad_norm": 2.671875,
      "learning_rate": 1.3631678829463826e-05,
      "loss": 0.8257,
      "step": 645950
    },
    {
      "epoch": 2.2639287554279828,
      "grad_norm": 2.90625,
      "learning_rate": 1.3631029800800124e-05,
      "loss": 0.7939,
      "step": 645960
    },
    {
      "epoch": 2.263963802934878,
      "grad_norm": 3.328125,
      "learning_rate": 1.3630380772136423e-05,
      "loss": 0.7839,
      "step": 645970
    },
    {
      "epoch": 2.263998850441774,
      "grad_norm": 3.25,
      "learning_rate": 1.3629731743472721e-05,
      "loss": 0.8542,
      "step": 645980
    },
    {
      "epoch": 2.2640338979486696,
      "grad_norm": 2.78125,
      "learning_rate": 1.3629082714809016e-05,
      "loss": 0.8362,
      "step": 645990
    },
    {
      "epoch": 2.264068945455565,
      "grad_norm": 3.25,
      "learning_rate": 1.3628433686145314e-05,
      "loss": 0.8162,
      "step": 646000
    },
    {
      "epoch": 2.2641039929624607,
      "grad_norm": 2.984375,
      "learning_rate": 1.3627784657481612e-05,
      "loss": 0.8487,
      "step": 646010
    },
    {
      "epoch": 2.264139040469356,
      "grad_norm": 2.828125,
      "learning_rate": 1.3627135628817912e-05,
      "loss": 0.7787,
      "step": 646020
    },
    {
      "epoch": 2.2641740879762517,
      "grad_norm": 3.5625,
      "learning_rate": 1.362648660015421e-05,
      "loss": 0.8167,
      "step": 646030
    },
    {
      "epoch": 2.2642091354831475,
      "grad_norm": 2.8125,
      "learning_rate": 1.3625837571490508e-05,
      "loss": 0.8206,
      "step": 646040
    },
    {
      "epoch": 2.264244182990043,
      "grad_norm": 2.71875,
      "learning_rate": 1.3625188542826806e-05,
      "loss": 0.7636,
      "step": 646050
    },
    {
      "epoch": 2.2642792304969386,
      "grad_norm": 3.640625,
      "learning_rate": 1.3624539514163104e-05,
      "loss": 0.8416,
      "step": 646060
    },
    {
      "epoch": 2.2643142780038343,
      "grad_norm": 2.78125,
      "learning_rate": 1.3623890485499402e-05,
      "loss": 0.7593,
      "step": 646070
    },
    {
      "epoch": 2.2643493255107296,
      "grad_norm": 2.78125,
      "learning_rate": 1.36232414568357e-05,
      "loss": 0.811,
      "step": 646080
    },
    {
      "epoch": 2.2643843730176254,
      "grad_norm": 3.0,
      "learning_rate": 1.3622592428172e-05,
      "loss": 0.8699,
      "step": 646090
    },
    {
      "epoch": 2.264419420524521,
      "grad_norm": 2.390625,
      "learning_rate": 1.3621943399508297e-05,
      "loss": 0.7543,
      "step": 646100
    },
    {
      "epoch": 2.2644544680314165,
      "grad_norm": 2.53125,
      "learning_rate": 1.3621294370844595e-05,
      "loss": 0.8492,
      "step": 646110
    },
    {
      "epoch": 2.2644895155383122,
      "grad_norm": 2.65625,
      "learning_rate": 1.3620645342180893e-05,
      "loss": 0.7681,
      "step": 646120
    },
    {
      "epoch": 2.2645245630452076,
      "grad_norm": 2.546875,
      "learning_rate": 1.3619996313517191e-05,
      "loss": 0.8355,
      "step": 646130
    },
    {
      "epoch": 2.2645596105521033,
      "grad_norm": 3.53125,
      "learning_rate": 1.361934728485349e-05,
      "loss": 0.8479,
      "step": 646140
    },
    {
      "epoch": 2.264594658058999,
      "grad_norm": 3.3125,
      "learning_rate": 1.3618698256189787e-05,
      "loss": 0.8246,
      "step": 646150
    },
    {
      "epoch": 2.2646297055658944,
      "grad_norm": 2.703125,
      "learning_rate": 1.3618049227526087e-05,
      "loss": 0.8268,
      "step": 646160
    },
    {
      "epoch": 2.26466475307279,
      "grad_norm": 2.609375,
      "learning_rate": 1.3617400198862385e-05,
      "loss": 0.8243,
      "step": 646170
    },
    {
      "epoch": 2.264699800579686,
      "grad_norm": 2.796875,
      "learning_rate": 1.361675117019868e-05,
      "loss": 0.7675,
      "step": 646180
    },
    {
      "epoch": 2.264734848086581,
      "grad_norm": 3.453125,
      "learning_rate": 1.3616102141534978e-05,
      "loss": 0.8732,
      "step": 646190
    },
    {
      "epoch": 2.264769895593477,
      "grad_norm": 3.046875,
      "learning_rate": 1.3615453112871277e-05,
      "loss": 0.8149,
      "step": 646200
    },
    {
      "epoch": 2.2648049431003727,
      "grad_norm": 2.6875,
      "learning_rate": 1.3614804084207575e-05,
      "loss": 0.8077,
      "step": 646210
    },
    {
      "epoch": 2.264839990607268,
      "grad_norm": 3.15625,
      "learning_rate": 1.3614155055543873e-05,
      "loss": 0.8453,
      "step": 646220
    },
    {
      "epoch": 2.264875038114164,
      "grad_norm": 2.875,
      "learning_rate": 1.3613506026880171e-05,
      "loss": 0.8361,
      "step": 646230
    },
    {
      "epoch": 2.264910085621059,
      "grad_norm": 3.0,
      "learning_rate": 1.361285699821647e-05,
      "loss": 0.8217,
      "step": 646240
    },
    {
      "epoch": 2.264945133127955,
      "grad_norm": 2.8125,
      "learning_rate": 1.3612207969552767e-05,
      "loss": 0.799,
      "step": 646250
    },
    {
      "epoch": 2.2649801806348506,
      "grad_norm": 2.9375,
      "learning_rate": 1.3611558940889065e-05,
      "loss": 0.8053,
      "step": 646260
    },
    {
      "epoch": 2.2650152281417464,
      "grad_norm": 3.015625,
      "learning_rate": 1.3610909912225365e-05,
      "loss": 0.8255,
      "step": 646270
    },
    {
      "epoch": 2.2650502756486417,
      "grad_norm": 2.609375,
      "learning_rate": 1.3610260883561663e-05,
      "loss": 0.8198,
      "step": 646280
    },
    {
      "epoch": 2.2650853231555375,
      "grad_norm": 3.015625,
      "learning_rate": 1.3609611854897961e-05,
      "loss": 0.853,
      "step": 646290
    },
    {
      "epoch": 2.2651203706624328,
      "grad_norm": 2.96875,
      "learning_rate": 1.3608962826234259e-05,
      "loss": 0.7558,
      "step": 646300
    },
    {
      "epoch": 2.2651554181693285,
      "grad_norm": 2.875,
      "learning_rate": 1.3608313797570557e-05,
      "loss": 0.8605,
      "step": 646310
    },
    {
      "epoch": 2.2651904656762243,
      "grad_norm": 2.9375,
      "learning_rate": 1.3607664768906855e-05,
      "loss": 0.8249,
      "step": 646320
    },
    {
      "epoch": 2.2652255131831196,
      "grad_norm": 2.84375,
      "learning_rate": 1.3607015740243153e-05,
      "loss": 0.8725,
      "step": 646330
    },
    {
      "epoch": 2.2652605606900154,
      "grad_norm": 3.21875,
      "learning_rate": 1.3606366711579453e-05,
      "loss": 0.8884,
      "step": 646340
    },
    {
      "epoch": 2.2652956081969107,
      "grad_norm": 3.203125,
      "learning_rate": 1.360571768291575e-05,
      "loss": 0.7837,
      "step": 646350
    },
    {
      "epoch": 2.2653306557038064,
      "grad_norm": 3.15625,
      "learning_rate": 1.3605068654252049e-05,
      "loss": 0.824,
      "step": 646360
    },
    {
      "epoch": 2.265365703210702,
      "grad_norm": 2.84375,
      "learning_rate": 1.3604419625588343e-05,
      "loss": 0.8065,
      "step": 646370
    },
    {
      "epoch": 2.265400750717598,
      "grad_norm": 2.625,
      "learning_rate": 1.3603770596924641e-05,
      "loss": 0.7938,
      "step": 646380
    },
    {
      "epoch": 2.2654357982244933,
      "grad_norm": 3.0625,
      "learning_rate": 1.3603121568260941e-05,
      "loss": 0.7934,
      "step": 646390
    },
    {
      "epoch": 2.265470845731389,
      "grad_norm": 2.46875,
      "learning_rate": 1.3602472539597239e-05,
      "loss": 0.7266,
      "step": 646400
    },
    {
      "epoch": 2.2655058932382843,
      "grad_norm": 2.71875,
      "learning_rate": 1.3601823510933537e-05,
      "loss": 0.8394,
      "step": 646410
    },
    {
      "epoch": 2.26554094074518,
      "grad_norm": 2.703125,
      "learning_rate": 1.3601174482269835e-05,
      "loss": 0.8087,
      "step": 646420
    },
    {
      "epoch": 2.265575988252076,
      "grad_norm": 3.046875,
      "learning_rate": 1.3600525453606133e-05,
      "loss": 0.7708,
      "step": 646430
    },
    {
      "epoch": 2.265611035758971,
      "grad_norm": 2.921875,
      "learning_rate": 1.3599876424942431e-05,
      "loss": 0.8158,
      "step": 646440
    },
    {
      "epoch": 2.265646083265867,
      "grad_norm": 2.859375,
      "learning_rate": 1.359922739627873e-05,
      "loss": 0.8756,
      "step": 646450
    },
    {
      "epoch": 2.2656811307727622,
      "grad_norm": 3.046875,
      "learning_rate": 1.3598578367615029e-05,
      "loss": 0.7322,
      "step": 646460
    },
    {
      "epoch": 2.265716178279658,
      "grad_norm": 3.03125,
      "learning_rate": 1.3597929338951327e-05,
      "loss": 0.8593,
      "step": 646470
    },
    {
      "epoch": 2.2657512257865537,
      "grad_norm": 2.40625,
      "learning_rate": 1.3597280310287625e-05,
      "loss": 0.7552,
      "step": 646480
    },
    {
      "epoch": 2.2657862732934495,
      "grad_norm": 2.734375,
      "learning_rate": 1.3596631281623923e-05,
      "loss": 0.8521,
      "step": 646490
    },
    {
      "epoch": 2.265821320800345,
      "grad_norm": 2.875,
      "learning_rate": 1.359598225296022e-05,
      "loss": 0.8458,
      "step": 646500
    },
    {
      "epoch": 2.2658563683072406,
      "grad_norm": 3.421875,
      "learning_rate": 1.3595333224296519e-05,
      "loss": 0.8725,
      "step": 646510
    },
    {
      "epoch": 2.265891415814136,
      "grad_norm": 3.015625,
      "learning_rate": 1.3594684195632818e-05,
      "loss": 0.7966,
      "step": 646520
    },
    {
      "epoch": 2.2659264633210316,
      "grad_norm": 3.078125,
      "learning_rate": 1.3594035166969116e-05,
      "loss": 0.7988,
      "step": 646530
    },
    {
      "epoch": 2.2659615108279274,
      "grad_norm": 2.625,
      "learning_rate": 1.3593386138305414e-05,
      "loss": 0.7883,
      "step": 646540
    },
    {
      "epoch": 2.2659965583348227,
      "grad_norm": 2.75,
      "learning_rate": 1.3592737109641712e-05,
      "loss": 0.8651,
      "step": 646550
    },
    {
      "epoch": 2.2660316058417185,
      "grad_norm": 2.9375,
      "learning_rate": 1.3592088080978007e-05,
      "loss": 0.8248,
      "step": 646560
    },
    {
      "epoch": 2.2660666533486142,
      "grad_norm": 2.5625,
      "learning_rate": 1.3591439052314307e-05,
      "loss": 0.8496,
      "step": 646570
    },
    {
      "epoch": 2.2661017008555095,
      "grad_norm": 3.125,
      "learning_rate": 1.3590790023650605e-05,
      "loss": 0.7938,
      "step": 646580
    },
    {
      "epoch": 2.2661367483624053,
      "grad_norm": 3.390625,
      "learning_rate": 1.3590140994986903e-05,
      "loss": 0.862,
      "step": 646590
    },
    {
      "epoch": 2.266171795869301,
      "grad_norm": 3.3125,
      "learning_rate": 1.35894919663232e-05,
      "loss": 0.8615,
      "step": 646600
    },
    {
      "epoch": 2.2662068433761964,
      "grad_norm": 3.484375,
      "learning_rate": 1.3588842937659499e-05,
      "loss": 0.8954,
      "step": 646610
    },
    {
      "epoch": 2.266241890883092,
      "grad_norm": 2.921875,
      "learning_rate": 1.3588193908995797e-05,
      "loss": 0.8065,
      "step": 646620
    },
    {
      "epoch": 2.2662769383899874,
      "grad_norm": 2.75,
      "learning_rate": 1.3587544880332095e-05,
      "loss": 0.7845,
      "step": 646630
    },
    {
      "epoch": 2.266311985896883,
      "grad_norm": 2.84375,
      "learning_rate": 1.3586895851668394e-05,
      "loss": 0.8509,
      "step": 646640
    },
    {
      "epoch": 2.266347033403779,
      "grad_norm": 2.828125,
      "learning_rate": 1.3586246823004692e-05,
      "loss": 0.7258,
      "step": 646650
    },
    {
      "epoch": 2.2663820809106743,
      "grad_norm": 2.78125,
      "learning_rate": 1.358559779434099e-05,
      "loss": 0.7491,
      "step": 646660
    },
    {
      "epoch": 2.26641712841757,
      "grad_norm": 3.015625,
      "learning_rate": 1.3584948765677288e-05,
      "loss": 0.7727,
      "step": 646670
    },
    {
      "epoch": 2.266452175924466,
      "grad_norm": 2.75,
      "learning_rate": 1.3584299737013586e-05,
      "loss": 0.7925,
      "step": 646680
    },
    {
      "epoch": 2.266487223431361,
      "grad_norm": 2.984375,
      "learning_rate": 1.3583650708349884e-05,
      "loss": 0.8473,
      "step": 646690
    },
    {
      "epoch": 2.266522270938257,
      "grad_norm": 3.171875,
      "learning_rate": 1.3583001679686182e-05,
      "loss": 0.8168,
      "step": 646700
    },
    {
      "epoch": 2.2665573184451526,
      "grad_norm": 2.984375,
      "learning_rate": 1.3582352651022482e-05,
      "loss": 0.816,
      "step": 646710
    },
    {
      "epoch": 2.266592365952048,
      "grad_norm": 2.984375,
      "learning_rate": 1.358170362235878e-05,
      "loss": 0.7471,
      "step": 646720
    },
    {
      "epoch": 2.2666274134589437,
      "grad_norm": 2.625,
      "learning_rate": 1.3581054593695078e-05,
      "loss": 0.8555,
      "step": 646730
    },
    {
      "epoch": 2.266662460965839,
      "grad_norm": 3.015625,
      "learning_rate": 1.3580405565031376e-05,
      "loss": 0.729,
      "step": 646740
    },
    {
      "epoch": 2.2666975084727348,
      "grad_norm": 2.78125,
      "learning_rate": 1.3579756536367672e-05,
      "loss": 0.8301,
      "step": 646750
    },
    {
      "epoch": 2.2667325559796305,
      "grad_norm": 3.046875,
      "learning_rate": 1.357910750770397e-05,
      "loss": 0.8963,
      "step": 646760
    },
    {
      "epoch": 2.266767603486526,
      "grad_norm": 3.15625,
      "learning_rate": 1.3578458479040268e-05,
      "loss": 0.9261,
      "step": 646770
    },
    {
      "epoch": 2.2668026509934216,
      "grad_norm": 3.140625,
      "learning_rate": 1.3577809450376566e-05,
      "loss": 0.8328,
      "step": 646780
    },
    {
      "epoch": 2.2668376985003174,
      "grad_norm": 2.953125,
      "learning_rate": 1.3577160421712864e-05,
      "loss": 0.7482,
      "step": 646790
    },
    {
      "epoch": 2.2668727460072127,
      "grad_norm": 2.265625,
      "learning_rate": 1.3576511393049162e-05,
      "loss": 0.765,
      "step": 646800
    },
    {
      "epoch": 2.2669077935141084,
      "grad_norm": 3.15625,
      "learning_rate": 1.357586236438546e-05,
      "loss": 0.8608,
      "step": 646810
    },
    {
      "epoch": 2.266942841021004,
      "grad_norm": 2.71875,
      "learning_rate": 1.357521333572176e-05,
      "loss": 0.836,
      "step": 646820
    },
    {
      "epoch": 2.2669778885278995,
      "grad_norm": 2.703125,
      "learning_rate": 1.3574564307058058e-05,
      "loss": 0.8616,
      "step": 646830
    },
    {
      "epoch": 2.2670129360347953,
      "grad_norm": 2.9375,
      "learning_rate": 1.3573915278394356e-05,
      "loss": 0.8372,
      "step": 646840
    },
    {
      "epoch": 2.2670479835416906,
      "grad_norm": 2.671875,
      "learning_rate": 1.3573266249730654e-05,
      "loss": 0.7452,
      "step": 646850
    },
    {
      "epoch": 2.2670830310485863,
      "grad_norm": 2.671875,
      "learning_rate": 1.3572617221066952e-05,
      "loss": 0.7916,
      "step": 646860
    },
    {
      "epoch": 2.267118078555482,
      "grad_norm": 2.8125,
      "learning_rate": 1.357196819240325e-05,
      "loss": 0.8462,
      "step": 646870
    },
    {
      "epoch": 2.2671531260623774,
      "grad_norm": 3.15625,
      "learning_rate": 1.3571319163739548e-05,
      "loss": 0.7669,
      "step": 646880
    },
    {
      "epoch": 2.267188173569273,
      "grad_norm": 3.015625,
      "learning_rate": 1.3570670135075848e-05,
      "loss": 0.8435,
      "step": 646890
    },
    {
      "epoch": 2.267223221076169,
      "grad_norm": 3.21875,
      "learning_rate": 1.3570021106412146e-05,
      "loss": 0.8344,
      "step": 646900
    },
    {
      "epoch": 2.2672582685830642,
      "grad_norm": 2.703125,
      "learning_rate": 1.3569372077748444e-05,
      "loss": 0.8447,
      "step": 646910
    },
    {
      "epoch": 2.26729331608996,
      "grad_norm": 2.734375,
      "learning_rate": 1.3568723049084742e-05,
      "loss": 0.8219,
      "step": 646920
    },
    {
      "epoch": 2.2673283635968557,
      "grad_norm": 2.4375,
      "learning_rate": 1.3568074020421036e-05,
      "loss": 0.8797,
      "step": 646930
    },
    {
      "epoch": 2.267363411103751,
      "grad_norm": 3.046875,
      "learning_rate": 1.3567424991757336e-05,
      "loss": 0.8739,
      "step": 646940
    },
    {
      "epoch": 2.267398458610647,
      "grad_norm": 2.953125,
      "learning_rate": 1.3566775963093634e-05,
      "loss": 0.7708,
      "step": 646950
    },
    {
      "epoch": 2.267433506117542,
      "grad_norm": 2.875,
      "learning_rate": 1.3566126934429932e-05,
      "loss": 0.8428,
      "step": 646960
    },
    {
      "epoch": 2.267468553624438,
      "grad_norm": 3.15625,
      "learning_rate": 1.356547790576623e-05,
      "loss": 0.8165,
      "step": 646970
    },
    {
      "epoch": 2.2675036011313336,
      "grad_norm": 3.1875,
      "learning_rate": 1.3564828877102528e-05,
      "loss": 0.8784,
      "step": 646980
    },
    {
      "epoch": 2.267538648638229,
      "grad_norm": 2.765625,
      "learning_rate": 1.3564179848438826e-05,
      "loss": 0.7579,
      "step": 646990
    },
    {
      "epoch": 2.2675736961451247,
      "grad_norm": 2.9375,
      "learning_rate": 1.3563530819775126e-05,
      "loss": 0.7926,
      "step": 647000
    },
    {
      "epoch": 2.2676087436520205,
      "grad_norm": 3.046875,
      "learning_rate": 1.3562881791111424e-05,
      "loss": 0.8445,
      "step": 647010
    },
    {
      "epoch": 2.267643791158916,
      "grad_norm": 3.015625,
      "learning_rate": 1.3562232762447722e-05,
      "loss": 0.8374,
      "step": 647020
    },
    {
      "epoch": 2.2676788386658115,
      "grad_norm": 2.75,
      "learning_rate": 1.356158373378402e-05,
      "loss": 0.8577,
      "step": 647030
    },
    {
      "epoch": 2.2677138861727073,
      "grad_norm": 2.796875,
      "learning_rate": 1.3560934705120318e-05,
      "loss": 0.7925,
      "step": 647040
    },
    {
      "epoch": 2.2677489336796026,
      "grad_norm": 2.890625,
      "learning_rate": 1.3560285676456616e-05,
      "loss": 0.737,
      "step": 647050
    },
    {
      "epoch": 2.2677839811864984,
      "grad_norm": 2.75,
      "learning_rate": 1.3559636647792914e-05,
      "loss": 0.7635,
      "step": 647060
    },
    {
      "epoch": 2.2678190286933937,
      "grad_norm": 2.96875,
      "learning_rate": 1.3558987619129213e-05,
      "loss": 0.8124,
      "step": 647070
    },
    {
      "epoch": 2.2678540762002894,
      "grad_norm": 2.71875,
      "learning_rate": 1.3558338590465511e-05,
      "loss": 0.8335,
      "step": 647080
    },
    {
      "epoch": 2.267889123707185,
      "grad_norm": 3.015625,
      "learning_rate": 1.355768956180181e-05,
      "loss": 0.7706,
      "step": 647090
    },
    {
      "epoch": 2.2679241712140805,
      "grad_norm": 2.875,
      "learning_rate": 1.3557040533138107e-05,
      "loss": 0.9039,
      "step": 647100
    },
    {
      "epoch": 2.2679592187209763,
      "grad_norm": 2.9375,
      "learning_rate": 1.3556391504474405e-05,
      "loss": 0.8053,
      "step": 647110
    },
    {
      "epoch": 2.267994266227872,
      "grad_norm": 3.09375,
      "learning_rate": 1.3555742475810702e-05,
      "loss": 0.8517,
      "step": 647120
    },
    {
      "epoch": 2.2680293137347673,
      "grad_norm": 3.265625,
      "learning_rate": 1.3555093447147e-05,
      "loss": 0.8289,
      "step": 647130
    },
    {
      "epoch": 2.268064361241663,
      "grad_norm": 2.96875,
      "learning_rate": 1.3554444418483298e-05,
      "loss": 0.8166,
      "step": 647140
    },
    {
      "epoch": 2.268099408748559,
      "grad_norm": 3.21875,
      "learning_rate": 1.3553795389819596e-05,
      "loss": 0.7978,
      "step": 647150
    },
    {
      "epoch": 2.268134456255454,
      "grad_norm": 2.84375,
      "learning_rate": 1.3553146361155894e-05,
      "loss": 0.7996,
      "step": 647160
    },
    {
      "epoch": 2.26816950376235,
      "grad_norm": 2.875,
      "learning_rate": 1.3552497332492192e-05,
      "loss": 0.7604,
      "step": 647170
    },
    {
      "epoch": 2.2682045512692453,
      "grad_norm": 3.046875,
      "learning_rate": 1.355184830382849e-05,
      "loss": 0.8361,
      "step": 647180
    },
    {
      "epoch": 2.268239598776141,
      "grad_norm": 2.453125,
      "learning_rate": 1.355119927516479e-05,
      "loss": 0.72,
      "step": 647190
    },
    {
      "epoch": 2.2682746462830368,
      "grad_norm": 2.828125,
      "learning_rate": 1.3550550246501087e-05,
      "loss": 0.702,
      "step": 647200
    },
    {
      "epoch": 2.268309693789932,
      "grad_norm": 2.765625,
      "learning_rate": 1.3549901217837385e-05,
      "loss": 0.8244,
      "step": 647210
    },
    {
      "epoch": 2.268344741296828,
      "grad_norm": 3.234375,
      "learning_rate": 1.3549252189173683e-05,
      "loss": 0.883,
      "step": 647220
    },
    {
      "epoch": 2.2683797888037236,
      "grad_norm": 2.625,
      "learning_rate": 1.3548603160509981e-05,
      "loss": 0.7551,
      "step": 647230
    },
    {
      "epoch": 2.268414836310619,
      "grad_norm": 3.171875,
      "learning_rate": 1.354795413184628e-05,
      "loss": 0.844,
      "step": 647240
    },
    {
      "epoch": 2.2684498838175147,
      "grad_norm": 2.875,
      "learning_rate": 1.3547305103182577e-05,
      "loss": 0.879,
      "step": 647250
    },
    {
      "epoch": 2.2684849313244104,
      "grad_norm": 2.84375,
      "learning_rate": 1.3546656074518877e-05,
      "loss": 0.8784,
      "step": 647260
    },
    {
      "epoch": 2.2685199788313057,
      "grad_norm": 2.9375,
      "learning_rate": 1.3546007045855175e-05,
      "loss": 0.8162,
      "step": 647270
    },
    {
      "epoch": 2.2685550263382015,
      "grad_norm": 2.953125,
      "learning_rate": 1.3545358017191473e-05,
      "loss": 0.7626,
      "step": 647280
    },
    {
      "epoch": 2.268590073845097,
      "grad_norm": 2.8125,
      "learning_rate": 1.3544708988527771e-05,
      "loss": 0.8305,
      "step": 647290
    },
    {
      "epoch": 2.2686251213519926,
      "grad_norm": 2.96875,
      "learning_rate": 1.3544059959864069e-05,
      "loss": 0.8711,
      "step": 647300
    },
    {
      "epoch": 2.2686601688588883,
      "grad_norm": 3.0625,
      "learning_rate": 1.3543410931200365e-05,
      "loss": 0.7907,
      "step": 647310
    },
    {
      "epoch": 2.2686952163657836,
      "grad_norm": 2.765625,
      "learning_rate": 1.3542761902536663e-05,
      "loss": 0.72,
      "step": 647320
    },
    {
      "epoch": 2.2687302638726794,
      "grad_norm": 2.921875,
      "learning_rate": 1.3542112873872961e-05,
      "loss": 0.8313,
      "step": 647330
    },
    {
      "epoch": 2.268765311379575,
      "grad_norm": 2.890625,
      "learning_rate": 1.354146384520926e-05,
      "loss": 0.8163,
      "step": 647340
    },
    {
      "epoch": 2.2688003588864705,
      "grad_norm": 3.109375,
      "learning_rate": 1.3540814816545557e-05,
      "loss": 0.838,
      "step": 647350
    },
    {
      "epoch": 2.2688354063933662,
      "grad_norm": 2.4375,
      "learning_rate": 1.3540165787881855e-05,
      "loss": 0.8488,
      "step": 647360
    },
    {
      "epoch": 2.268870453900262,
      "grad_norm": 2.453125,
      "learning_rate": 1.3539516759218155e-05,
      "loss": 0.7572,
      "step": 647370
    },
    {
      "epoch": 2.2689055014071573,
      "grad_norm": 2.734375,
      "learning_rate": 1.3538867730554453e-05,
      "loss": 0.845,
      "step": 647380
    },
    {
      "epoch": 2.268940548914053,
      "grad_norm": 2.875,
      "learning_rate": 1.3538218701890751e-05,
      "loss": 0.7957,
      "step": 647390
    },
    {
      "epoch": 2.2689755964209484,
      "grad_norm": 3.03125,
      "learning_rate": 1.3537569673227049e-05,
      "loss": 0.8732,
      "step": 647400
    },
    {
      "epoch": 2.269010643927844,
      "grad_norm": 3.0625,
      "learning_rate": 1.3536920644563347e-05,
      "loss": 0.824,
      "step": 647410
    },
    {
      "epoch": 2.26904569143474,
      "grad_norm": 2.984375,
      "learning_rate": 1.3536271615899645e-05,
      "loss": 0.7508,
      "step": 647420
    },
    {
      "epoch": 2.269080738941635,
      "grad_norm": 2.640625,
      "learning_rate": 1.3535622587235943e-05,
      "loss": 0.7692,
      "step": 647430
    },
    {
      "epoch": 2.269115786448531,
      "grad_norm": 3.0625,
      "learning_rate": 1.3534973558572243e-05,
      "loss": 0.8255,
      "step": 647440
    },
    {
      "epoch": 2.2691508339554267,
      "grad_norm": 2.671875,
      "learning_rate": 1.353432452990854e-05,
      "loss": 0.8425,
      "step": 647450
    },
    {
      "epoch": 2.269185881462322,
      "grad_norm": 2.46875,
      "learning_rate": 1.3533675501244839e-05,
      "loss": 0.7748,
      "step": 647460
    },
    {
      "epoch": 2.269220928969218,
      "grad_norm": 3.484375,
      "learning_rate": 1.3533026472581137e-05,
      "loss": 0.7753,
      "step": 647470
    },
    {
      "epoch": 2.2692559764761135,
      "grad_norm": 3.046875,
      "learning_rate": 1.3532377443917435e-05,
      "loss": 0.7892,
      "step": 647480
    },
    {
      "epoch": 2.269291023983009,
      "grad_norm": 2.875,
      "learning_rate": 1.3531728415253733e-05,
      "loss": 0.807,
      "step": 647490
    },
    {
      "epoch": 2.2693260714899046,
      "grad_norm": 2.65625,
      "learning_rate": 1.3531079386590029e-05,
      "loss": 0.9148,
      "step": 647500
    },
    {
      "epoch": 2.2693611189968,
      "grad_norm": 3.375,
      "learning_rate": 1.3530430357926327e-05,
      "loss": 0.8851,
      "step": 647510
    },
    {
      "epoch": 2.2693961665036957,
      "grad_norm": 3.03125,
      "learning_rate": 1.3529781329262625e-05,
      "loss": 0.8053,
      "step": 647520
    },
    {
      "epoch": 2.2694312140105914,
      "grad_norm": 2.75,
      "learning_rate": 1.3529132300598923e-05,
      "loss": 0.6922,
      "step": 647530
    },
    {
      "epoch": 2.2694662615174868,
      "grad_norm": 3.265625,
      "learning_rate": 1.3528483271935221e-05,
      "loss": 0.8167,
      "step": 647540
    },
    {
      "epoch": 2.2695013090243825,
      "grad_norm": 2.296875,
      "learning_rate": 1.352783424327152e-05,
      "loss": 0.7197,
      "step": 647550
    },
    {
      "epoch": 2.2695363565312783,
      "grad_norm": 3.234375,
      "learning_rate": 1.3527185214607819e-05,
      "loss": 0.8038,
      "step": 647560
    },
    {
      "epoch": 2.2695714040381736,
      "grad_norm": 2.5625,
      "learning_rate": 1.3526536185944117e-05,
      "loss": 0.7008,
      "step": 647570
    },
    {
      "epoch": 2.2696064515450693,
      "grad_norm": 3.296875,
      "learning_rate": 1.3525887157280415e-05,
      "loss": 0.7747,
      "step": 647580
    },
    {
      "epoch": 2.269641499051965,
      "grad_norm": 2.65625,
      "learning_rate": 1.3525238128616713e-05,
      "loss": 0.7918,
      "step": 647590
    },
    {
      "epoch": 2.2696765465588604,
      "grad_norm": 3.1875,
      "learning_rate": 1.352458909995301e-05,
      "loss": 0.7227,
      "step": 647600
    },
    {
      "epoch": 2.269711594065756,
      "grad_norm": 2.953125,
      "learning_rate": 1.3523940071289309e-05,
      "loss": 0.828,
      "step": 647610
    },
    {
      "epoch": 2.2697466415726515,
      "grad_norm": 2.859375,
      "learning_rate": 1.3523291042625608e-05,
      "loss": 0.8334,
      "step": 647620
    },
    {
      "epoch": 2.2697816890795472,
      "grad_norm": 2.453125,
      "learning_rate": 1.3522642013961906e-05,
      "loss": 0.817,
      "step": 647630
    },
    {
      "epoch": 2.269816736586443,
      "grad_norm": 3.15625,
      "learning_rate": 1.3521992985298204e-05,
      "loss": 0.7663,
      "step": 647640
    },
    {
      "epoch": 2.2698517840933388,
      "grad_norm": 3.046875,
      "learning_rate": 1.3521343956634502e-05,
      "loss": 0.8319,
      "step": 647650
    },
    {
      "epoch": 2.269886831600234,
      "grad_norm": 2.546875,
      "learning_rate": 1.35206949279708e-05,
      "loss": 0.8427,
      "step": 647660
    },
    {
      "epoch": 2.26992187910713,
      "grad_norm": 3.03125,
      "learning_rate": 1.3520045899307098e-05,
      "loss": 0.7803,
      "step": 647670
    },
    {
      "epoch": 2.269956926614025,
      "grad_norm": 2.953125,
      "learning_rate": 1.3519396870643396e-05,
      "loss": 0.768,
      "step": 647680
    },
    {
      "epoch": 2.269991974120921,
      "grad_norm": 3.046875,
      "learning_rate": 1.3518747841979693e-05,
      "loss": 0.7969,
      "step": 647690
    },
    {
      "epoch": 2.2700270216278167,
      "grad_norm": 3.03125,
      "learning_rate": 1.351809881331599e-05,
      "loss": 0.7847,
      "step": 647700
    },
    {
      "epoch": 2.270062069134712,
      "grad_norm": 2.828125,
      "learning_rate": 1.3517449784652289e-05,
      "loss": 0.7962,
      "step": 647710
    },
    {
      "epoch": 2.2700971166416077,
      "grad_norm": 3.140625,
      "learning_rate": 1.3516800755988587e-05,
      "loss": 0.7809,
      "step": 647720
    },
    {
      "epoch": 2.270132164148503,
      "grad_norm": 2.453125,
      "learning_rate": 1.3516151727324885e-05,
      "loss": 0.856,
      "step": 647730
    },
    {
      "epoch": 2.270167211655399,
      "grad_norm": 2.859375,
      "learning_rate": 1.3515502698661184e-05,
      "loss": 0.7984,
      "step": 647740
    },
    {
      "epoch": 2.2702022591622946,
      "grad_norm": 3.28125,
      "learning_rate": 1.3514853669997482e-05,
      "loss": 0.9066,
      "step": 647750
    },
    {
      "epoch": 2.2702373066691903,
      "grad_norm": 2.75,
      "learning_rate": 1.351420464133378e-05,
      "loss": 0.7556,
      "step": 647760
    },
    {
      "epoch": 2.2702723541760856,
      "grad_norm": 4.0625,
      "learning_rate": 1.3513555612670078e-05,
      "loss": 0.8394,
      "step": 647770
    },
    {
      "epoch": 2.2703074016829814,
      "grad_norm": 2.90625,
      "learning_rate": 1.3512906584006376e-05,
      "loss": 0.8671,
      "step": 647780
    },
    {
      "epoch": 2.2703424491898767,
      "grad_norm": 2.5,
      "learning_rate": 1.3512257555342674e-05,
      "loss": 0.8383,
      "step": 647790
    },
    {
      "epoch": 2.2703774966967725,
      "grad_norm": 2.71875,
      "learning_rate": 1.3511608526678972e-05,
      "loss": 0.8014,
      "step": 647800
    },
    {
      "epoch": 2.2704125442036682,
      "grad_norm": 2.796875,
      "learning_rate": 1.3510959498015272e-05,
      "loss": 0.7869,
      "step": 647810
    },
    {
      "epoch": 2.2704475917105635,
      "grad_norm": 3.03125,
      "learning_rate": 1.351031046935157e-05,
      "loss": 0.7969,
      "step": 647820
    },
    {
      "epoch": 2.2704826392174593,
      "grad_norm": 2.78125,
      "learning_rate": 1.3509661440687868e-05,
      "loss": 0.8405,
      "step": 647830
    },
    {
      "epoch": 2.270517686724355,
      "grad_norm": 2.84375,
      "learning_rate": 1.3509012412024166e-05,
      "loss": 0.8427,
      "step": 647840
    },
    {
      "epoch": 2.2705527342312504,
      "grad_norm": 2.734375,
      "learning_rate": 1.3508363383360464e-05,
      "loss": 0.7987,
      "step": 647850
    },
    {
      "epoch": 2.270587781738146,
      "grad_norm": 2.8125,
      "learning_rate": 1.3507714354696762e-05,
      "loss": 0.7945,
      "step": 647860
    },
    {
      "epoch": 2.270622829245042,
      "grad_norm": 3.375,
      "learning_rate": 1.3507065326033058e-05,
      "loss": 0.8989,
      "step": 647870
    },
    {
      "epoch": 2.270657876751937,
      "grad_norm": 2.65625,
      "learning_rate": 1.3506416297369356e-05,
      "loss": 0.8015,
      "step": 647880
    },
    {
      "epoch": 2.270692924258833,
      "grad_norm": 2.953125,
      "learning_rate": 1.3505767268705654e-05,
      "loss": 0.8339,
      "step": 647890
    },
    {
      "epoch": 2.2707279717657283,
      "grad_norm": 3.171875,
      "learning_rate": 1.3505118240041952e-05,
      "loss": 0.8315,
      "step": 647900
    },
    {
      "epoch": 2.270763019272624,
      "grad_norm": 2.546875,
      "learning_rate": 1.350446921137825e-05,
      "loss": 0.7477,
      "step": 647910
    },
    {
      "epoch": 2.27079806677952,
      "grad_norm": 2.796875,
      "learning_rate": 1.350382018271455e-05,
      "loss": 0.7603,
      "step": 647920
    },
    {
      "epoch": 2.270833114286415,
      "grad_norm": 3.078125,
      "learning_rate": 1.3503171154050848e-05,
      "loss": 0.7599,
      "step": 647930
    },
    {
      "epoch": 2.270868161793311,
      "grad_norm": 3.234375,
      "learning_rate": 1.3502522125387146e-05,
      "loss": 0.8067,
      "step": 647940
    },
    {
      "epoch": 2.2709032093002066,
      "grad_norm": 2.921875,
      "learning_rate": 1.3501873096723444e-05,
      "loss": 0.7691,
      "step": 647950
    },
    {
      "epoch": 2.270938256807102,
      "grad_norm": 3.25,
      "learning_rate": 1.3501224068059742e-05,
      "loss": 0.7236,
      "step": 647960
    },
    {
      "epoch": 2.2709733043139977,
      "grad_norm": 3.078125,
      "learning_rate": 1.350057503939604e-05,
      "loss": 0.8539,
      "step": 647970
    },
    {
      "epoch": 2.2710083518208934,
      "grad_norm": 2.953125,
      "learning_rate": 1.3499926010732338e-05,
      "loss": 0.7857,
      "step": 647980
    },
    {
      "epoch": 2.2710433993277888,
      "grad_norm": 3.375,
      "learning_rate": 1.3499276982068638e-05,
      "loss": 0.8382,
      "step": 647990
    },
    {
      "epoch": 2.2710784468346845,
      "grad_norm": 2.796875,
      "learning_rate": 1.3498627953404936e-05,
      "loss": 0.8689,
      "step": 648000
    },
    {
      "epoch": 2.27111349434158,
      "grad_norm": 2.765625,
      "learning_rate": 1.3497978924741234e-05,
      "loss": 0.8396,
      "step": 648010
    },
    {
      "epoch": 2.2711485418484756,
      "grad_norm": 2.3125,
      "learning_rate": 1.3497329896077532e-05,
      "loss": 0.8339,
      "step": 648020
    },
    {
      "epoch": 2.2711835893553713,
      "grad_norm": 2.984375,
      "learning_rate": 1.349668086741383e-05,
      "loss": 0.7717,
      "step": 648030
    },
    {
      "epoch": 2.2712186368622667,
      "grad_norm": 3.125,
      "learning_rate": 1.3496031838750128e-05,
      "loss": 0.8324,
      "step": 648040
    },
    {
      "epoch": 2.2712536843691624,
      "grad_norm": 2.890625,
      "learning_rate": 1.3495382810086426e-05,
      "loss": 0.7954,
      "step": 648050
    },
    {
      "epoch": 2.271288731876058,
      "grad_norm": 3.078125,
      "learning_rate": 1.3494733781422722e-05,
      "loss": 0.7975,
      "step": 648060
    },
    {
      "epoch": 2.2713237793829535,
      "grad_norm": 2.875,
      "learning_rate": 1.349408475275902e-05,
      "loss": 0.8172,
      "step": 648070
    },
    {
      "epoch": 2.2713588268898492,
      "grad_norm": 2.890625,
      "learning_rate": 1.3493435724095318e-05,
      "loss": 0.8186,
      "step": 648080
    },
    {
      "epoch": 2.271393874396745,
      "grad_norm": 2.828125,
      "learning_rate": 1.3492786695431616e-05,
      "loss": 0.7776,
      "step": 648090
    },
    {
      "epoch": 2.2714289219036403,
      "grad_norm": 2.734375,
      "learning_rate": 1.3492137666767916e-05,
      "loss": 0.7846,
      "step": 648100
    },
    {
      "epoch": 2.271463969410536,
      "grad_norm": 2.53125,
      "learning_rate": 1.3491488638104214e-05,
      "loss": 0.8298,
      "step": 648110
    },
    {
      "epoch": 2.2714990169174314,
      "grad_norm": 2.53125,
      "learning_rate": 1.3490839609440512e-05,
      "loss": 0.7833,
      "step": 648120
    },
    {
      "epoch": 2.271534064424327,
      "grad_norm": 3.15625,
      "learning_rate": 1.349019058077681e-05,
      "loss": 0.787,
      "step": 648130
    },
    {
      "epoch": 2.271569111931223,
      "grad_norm": 2.90625,
      "learning_rate": 1.3489541552113108e-05,
      "loss": 0.8163,
      "step": 648140
    },
    {
      "epoch": 2.271604159438118,
      "grad_norm": 2.5625,
      "learning_rate": 1.3488892523449406e-05,
      "loss": 0.7967,
      "step": 648150
    },
    {
      "epoch": 2.271639206945014,
      "grad_norm": 2.875,
      "learning_rate": 1.3488243494785704e-05,
      "loss": 0.8334,
      "step": 648160
    },
    {
      "epoch": 2.2716742544519097,
      "grad_norm": 2.953125,
      "learning_rate": 1.3487594466122004e-05,
      "loss": 0.7918,
      "step": 648170
    },
    {
      "epoch": 2.271709301958805,
      "grad_norm": 2.796875,
      "learning_rate": 1.3486945437458302e-05,
      "loss": 0.7867,
      "step": 648180
    },
    {
      "epoch": 2.271744349465701,
      "grad_norm": 2.546875,
      "learning_rate": 1.34862964087946e-05,
      "loss": 0.7947,
      "step": 648190
    },
    {
      "epoch": 2.2717793969725966,
      "grad_norm": 2.953125,
      "learning_rate": 1.3485647380130898e-05,
      "loss": 0.7287,
      "step": 648200
    },
    {
      "epoch": 2.271814444479492,
      "grad_norm": 3.078125,
      "learning_rate": 1.3484998351467196e-05,
      "loss": 0.9286,
      "step": 648210
    },
    {
      "epoch": 2.2718494919863876,
      "grad_norm": 3.0,
      "learning_rate": 1.3484349322803494e-05,
      "loss": 0.8921,
      "step": 648220
    },
    {
      "epoch": 2.271884539493283,
      "grad_norm": 2.890625,
      "learning_rate": 1.3483700294139792e-05,
      "loss": 0.7699,
      "step": 648230
    },
    {
      "epoch": 2.2719195870001787,
      "grad_norm": 3.0,
      "learning_rate": 1.3483051265476091e-05,
      "loss": 0.8071,
      "step": 648240
    },
    {
      "epoch": 2.2719546345070745,
      "grad_norm": 2.71875,
      "learning_rate": 1.3482402236812386e-05,
      "loss": 0.7605,
      "step": 648250
    },
    {
      "epoch": 2.27198968201397,
      "grad_norm": 3.140625,
      "learning_rate": 1.3481753208148684e-05,
      "loss": 0.7018,
      "step": 648260
    },
    {
      "epoch": 2.2720247295208655,
      "grad_norm": 2.953125,
      "learning_rate": 1.3481104179484982e-05,
      "loss": 0.829,
      "step": 648270
    },
    {
      "epoch": 2.2720597770277613,
      "grad_norm": 3.0,
      "learning_rate": 1.348045515082128e-05,
      "loss": 0.8871,
      "step": 648280
    },
    {
      "epoch": 2.2720948245346566,
      "grad_norm": 2.828125,
      "learning_rate": 1.347980612215758e-05,
      "loss": 0.722,
      "step": 648290
    },
    {
      "epoch": 2.2721298720415524,
      "grad_norm": 2.9375,
      "learning_rate": 1.3479157093493878e-05,
      "loss": 0.8241,
      "step": 648300
    },
    {
      "epoch": 2.272164919548448,
      "grad_norm": 2.640625,
      "learning_rate": 1.3478508064830176e-05,
      "loss": 0.8284,
      "step": 648310
    },
    {
      "epoch": 2.2721999670553434,
      "grad_norm": 2.78125,
      "learning_rate": 1.3477859036166474e-05,
      "loss": 0.775,
      "step": 648320
    },
    {
      "epoch": 2.272235014562239,
      "grad_norm": 3.15625,
      "learning_rate": 1.3477210007502772e-05,
      "loss": 0.7756,
      "step": 648330
    },
    {
      "epoch": 2.2722700620691345,
      "grad_norm": 2.828125,
      "learning_rate": 1.347656097883907e-05,
      "loss": 0.8365,
      "step": 648340
    },
    {
      "epoch": 2.2723051095760303,
      "grad_norm": 2.921875,
      "learning_rate": 1.3475911950175368e-05,
      "loss": 0.8456,
      "step": 648350
    },
    {
      "epoch": 2.272340157082926,
      "grad_norm": 2.953125,
      "learning_rate": 1.3475262921511667e-05,
      "loss": 0.8479,
      "step": 648360
    },
    {
      "epoch": 2.2723752045898213,
      "grad_norm": 2.71875,
      "learning_rate": 1.3474613892847965e-05,
      "loss": 0.7721,
      "step": 648370
    },
    {
      "epoch": 2.272410252096717,
      "grad_norm": 2.6875,
      "learning_rate": 1.3473964864184263e-05,
      "loss": 0.7144,
      "step": 648380
    },
    {
      "epoch": 2.272445299603613,
      "grad_norm": 3.015625,
      "learning_rate": 1.3473315835520561e-05,
      "loss": 0.8281,
      "step": 648390
    },
    {
      "epoch": 2.272480347110508,
      "grad_norm": 3.203125,
      "learning_rate": 1.347266680685686e-05,
      "loss": 0.781,
      "step": 648400
    },
    {
      "epoch": 2.272515394617404,
      "grad_norm": 3.34375,
      "learning_rate": 1.3472017778193157e-05,
      "loss": 0.8468,
      "step": 648410
    },
    {
      "epoch": 2.2725504421242997,
      "grad_norm": 3.03125,
      "learning_rate": 1.3471368749529455e-05,
      "loss": 0.8444,
      "step": 648420
    },
    {
      "epoch": 2.272585489631195,
      "grad_norm": 2.765625,
      "learning_rate": 1.3470719720865755e-05,
      "loss": 0.871,
      "step": 648430
    },
    {
      "epoch": 2.2726205371380908,
      "grad_norm": 3.5625,
      "learning_rate": 1.347007069220205e-05,
      "loss": 0.7805,
      "step": 648440
    },
    {
      "epoch": 2.272655584644986,
      "grad_norm": 2.703125,
      "learning_rate": 1.3469421663538348e-05,
      "loss": 0.7682,
      "step": 648450
    },
    {
      "epoch": 2.272690632151882,
      "grad_norm": 2.921875,
      "learning_rate": 1.3468772634874646e-05,
      "loss": 0.7813,
      "step": 648460
    },
    {
      "epoch": 2.2727256796587776,
      "grad_norm": 2.890625,
      "learning_rate": 1.3468123606210945e-05,
      "loss": 0.732,
      "step": 648470
    },
    {
      "epoch": 2.272760727165673,
      "grad_norm": 2.703125,
      "learning_rate": 1.3467474577547243e-05,
      "loss": 0.7703,
      "step": 648480
    },
    {
      "epoch": 2.2727957746725687,
      "grad_norm": 2.875,
      "learning_rate": 1.3466825548883541e-05,
      "loss": 0.8616,
      "step": 648490
    },
    {
      "epoch": 2.2728308221794644,
      "grad_norm": 2.859375,
      "learning_rate": 1.346617652021984e-05,
      "loss": 0.7636,
      "step": 648500
    },
    {
      "epoch": 2.2728658696863597,
      "grad_norm": 3.09375,
      "learning_rate": 1.3465527491556137e-05,
      "loss": 0.836,
      "step": 648510
    },
    {
      "epoch": 2.2729009171932555,
      "grad_norm": 2.6875,
      "learning_rate": 1.3464878462892435e-05,
      "loss": 0.8322,
      "step": 648520
    },
    {
      "epoch": 2.2729359647001512,
      "grad_norm": 2.921875,
      "learning_rate": 1.3464229434228733e-05,
      "loss": 0.7915,
      "step": 648530
    },
    {
      "epoch": 2.2729710122070466,
      "grad_norm": 3.09375,
      "learning_rate": 1.3463580405565033e-05,
      "loss": 0.8125,
      "step": 648540
    },
    {
      "epoch": 2.2730060597139423,
      "grad_norm": 2.609375,
      "learning_rate": 1.3462931376901331e-05,
      "loss": 0.8088,
      "step": 648550
    },
    {
      "epoch": 2.2730411072208376,
      "grad_norm": 3.0,
      "learning_rate": 1.3462282348237629e-05,
      "loss": 0.8293,
      "step": 648560
    },
    {
      "epoch": 2.2730761547277334,
      "grad_norm": 2.890625,
      "learning_rate": 1.3461633319573927e-05,
      "loss": 0.8065,
      "step": 648570
    },
    {
      "epoch": 2.273111202234629,
      "grad_norm": 3.109375,
      "learning_rate": 1.3460984290910225e-05,
      "loss": 0.8357,
      "step": 648580
    },
    {
      "epoch": 2.2731462497415245,
      "grad_norm": 2.71875,
      "learning_rate": 1.3460335262246523e-05,
      "loss": 0.7681,
      "step": 648590
    },
    {
      "epoch": 2.27318129724842,
      "grad_norm": 3.09375,
      "learning_rate": 1.3459686233582821e-05,
      "loss": 0.869,
      "step": 648600
    },
    {
      "epoch": 2.273216344755316,
      "grad_norm": 3.046875,
      "learning_rate": 1.345903720491912e-05,
      "loss": 0.7788,
      "step": 648610
    },
    {
      "epoch": 2.2732513922622113,
      "grad_norm": 2.859375,
      "learning_rate": 1.3458388176255419e-05,
      "loss": 0.8756,
      "step": 648620
    },
    {
      "epoch": 2.273286439769107,
      "grad_norm": 3.03125,
      "learning_rate": 1.3457739147591713e-05,
      "loss": 0.7895,
      "step": 648630
    },
    {
      "epoch": 2.273321487276003,
      "grad_norm": 2.953125,
      "learning_rate": 1.3457090118928011e-05,
      "loss": 0.7619,
      "step": 648640
    },
    {
      "epoch": 2.273356534782898,
      "grad_norm": 2.828125,
      "learning_rate": 1.3456441090264311e-05,
      "loss": 0.8069,
      "step": 648650
    },
    {
      "epoch": 2.273391582289794,
      "grad_norm": 2.6875,
      "learning_rate": 1.3455792061600609e-05,
      "loss": 0.8143,
      "step": 648660
    },
    {
      "epoch": 2.273426629796689,
      "grad_norm": 2.8125,
      "learning_rate": 1.3455143032936907e-05,
      "loss": 0.8167,
      "step": 648670
    },
    {
      "epoch": 2.273461677303585,
      "grad_norm": 3.203125,
      "learning_rate": 1.3454494004273205e-05,
      "loss": 0.8383,
      "step": 648680
    },
    {
      "epoch": 2.2734967248104807,
      "grad_norm": 2.953125,
      "learning_rate": 1.3453844975609503e-05,
      "loss": 0.9141,
      "step": 648690
    },
    {
      "epoch": 2.273531772317376,
      "grad_norm": 2.921875,
      "learning_rate": 1.3453195946945801e-05,
      "loss": 0.7757,
      "step": 648700
    },
    {
      "epoch": 2.273566819824272,
      "grad_norm": 2.828125,
      "learning_rate": 1.3452546918282099e-05,
      "loss": 0.743,
      "step": 648710
    },
    {
      "epoch": 2.2736018673311675,
      "grad_norm": 3.234375,
      "learning_rate": 1.3451897889618399e-05,
      "loss": 0.7986,
      "step": 648720
    },
    {
      "epoch": 2.273636914838063,
      "grad_norm": 3.03125,
      "learning_rate": 1.3451248860954697e-05,
      "loss": 0.8141,
      "step": 648730
    },
    {
      "epoch": 2.2736719623449586,
      "grad_norm": 2.9375,
      "learning_rate": 1.3450599832290995e-05,
      "loss": 0.8733,
      "step": 648740
    },
    {
      "epoch": 2.2737070098518544,
      "grad_norm": 2.71875,
      "learning_rate": 1.3449950803627293e-05,
      "loss": 0.9003,
      "step": 648750
    },
    {
      "epoch": 2.2737420573587497,
      "grad_norm": 2.609375,
      "learning_rate": 1.344930177496359e-05,
      "loss": 0.8741,
      "step": 648760
    },
    {
      "epoch": 2.2737771048656454,
      "grad_norm": 2.796875,
      "learning_rate": 1.3448652746299889e-05,
      "loss": 0.8217,
      "step": 648770
    },
    {
      "epoch": 2.2738121523725408,
      "grad_norm": 3.328125,
      "learning_rate": 1.3448003717636187e-05,
      "loss": 0.7884,
      "step": 648780
    },
    {
      "epoch": 2.2738471998794365,
      "grad_norm": 2.625,
      "learning_rate": 1.3447354688972486e-05,
      "loss": 0.8285,
      "step": 648790
    },
    {
      "epoch": 2.2738822473863323,
      "grad_norm": 3.15625,
      "learning_rate": 1.3446705660308784e-05,
      "loss": 0.8361,
      "step": 648800
    },
    {
      "epoch": 2.2739172948932276,
      "grad_norm": 2.84375,
      "learning_rate": 1.3446056631645082e-05,
      "loss": 0.8056,
      "step": 648810
    },
    {
      "epoch": 2.2739523424001233,
      "grad_norm": 2.5625,
      "learning_rate": 1.3445407602981377e-05,
      "loss": 0.7934,
      "step": 648820
    },
    {
      "epoch": 2.273987389907019,
      "grad_norm": 2.84375,
      "learning_rate": 1.3444758574317675e-05,
      "loss": 0.8504,
      "step": 648830
    },
    {
      "epoch": 2.2740224374139144,
      "grad_norm": 3.03125,
      "learning_rate": 1.3444109545653975e-05,
      "loss": 0.8288,
      "step": 648840
    },
    {
      "epoch": 2.27405748492081,
      "grad_norm": 3.21875,
      "learning_rate": 1.3443460516990273e-05,
      "loss": 0.8864,
      "step": 648850
    },
    {
      "epoch": 2.274092532427706,
      "grad_norm": 2.953125,
      "learning_rate": 1.344281148832657e-05,
      "loss": 0.8264,
      "step": 648860
    },
    {
      "epoch": 2.2741275799346012,
      "grad_norm": 2.46875,
      "learning_rate": 1.3442162459662869e-05,
      "loss": 0.7865,
      "step": 648870
    },
    {
      "epoch": 2.274162627441497,
      "grad_norm": 3.046875,
      "learning_rate": 1.3441513430999167e-05,
      "loss": 0.781,
      "step": 648880
    },
    {
      "epoch": 2.2741976749483923,
      "grad_norm": 2.609375,
      "learning_rate": 1.3440864402335465e-05,
      "loss": 0.7782,
      "step": 648890
    },
    {
      "epoch": 2.274232722455288,
      "grad_norm": 2.96875,
      "learning_rate": 1.3440215373671763e-05,
      "loss": 0.8168,
      "step": 648900
    },
    {
      "epoch": 2.274267769962184,
      "grad_norm": 3.140625,
      "learning_rate": 1.3439566345008062e-05,
      "loss": 0.8655,
      "step": 648910
    },
    {
      "epoch": 2.2743028174690796,
      "grad_norm": 2.8125,
      "learning_rate": 1.343891731634436e-05,
      "loss": 0.8479,
      "step": 648920
    },
    {
      "epoch": 2.274337864975975,
      "grad_norm": 2.578125,
      "learning_rate": 1.3438268287680658e-05,
      "loss": 0.7803,
      "step": 648930
    },
    {
      "epoch": 2.2743729124828707,
      "grad_norm": 2.890625,
      "learning_rate": 1.3437619259016956e-05,
      "loss": 0.7836,
      "step": 648940
    },
    {
      "epoch": 2.274407959989766,
      "grad_norm": 3.0625,
      "learning_rate": 1.3436970230353254e-05,
      "loss": 0.8278,
      "step": 648950
    },
    {
      "epoch": 2.2744430074966617,
      "grad_norm": 2.65625,
      "learning_rate": 1.3436321201689552e-05,
      "loss": 0.8076,
      "step": 648960
    },
    {
      "epoch": 2.2744780550035575,
      "grad_norm": 3.078125,
      "learning_rate": 1.343567217302585e-05,
      "loss": 0.787,
      "step": 648970
    },
    {
      "epoch": 2.274513102510453,
      "grad_norm": 3.34375,
      "learning_rate": 1.343502314436215e-05,
      "loss": 0.871,
      "step": 648980
    },
    {
      "epoch": 2.2745481500173486,
      "grad_norm": 3.1875,
      "learning_rate": 1.3434374115698448e-05,
      "loss": 0.8455,
      "step": 648990
    },
    {
      "epoch": 2.274583197524244,
      "grad_norm": 3.0625,
      "learning_rate": 1.3433725087034743e-05,
      "loss": 0.745,
      "step": 649000
    },
    {
      "epoch": 2.2746182450311396,
      "grad_norm": 3.203125,
      "learning_rate": 1.343307605837104e-05,
      "loss": 0.8161,
      "step": 649010
    },
    {
      "epoch": 2.2746532925380354,
      "grad_norm": 2.8125,
      "learning_rate": 1.343242702970734e-05,
      "loss": 0.8387,
      "step": 649020
    },
    {
      "epoch": 2.274688340044931,
      "grad_norm": 2.953125,
      "learning_rate": 1.3431778001043638e-05,
      "loss": 0.8118,
      "step": 649030
    },
    {
      "epoch": 2.2747233875518265,
      "grad_norm": 3.4375,
      "learning_rate": 1.3431128972379936e-05,
      "loss": 0.8417,
      "step": 649040
    },
    {
      "epoch": 2.274758435058722,
      "grad_norm": 3.1875,
      "learning_rate": 1.3430479943716234e-05,
      "loss": 0.7704,
      "step": 649050
    },
    {
      "epoch": 2.2747934825656175,
      "grad_norm": 2.6875,
      "learning_rate": 1.3429830915052532e-05,
      "loss": 0.8306,
      "step": 649060
    },
    {
      "epoch": 2.2748285300725133,
      "grad_norm": 2.890625,
      "learning_rate": 1.342918188638883e-05,
      "loss": 0.7356,
      "step": 649070
    },
    {
      "epoch": 2.274863577579409,
      "grad_norm": 2.609375,
      "learning_rate": 1.3428532857725128e-05,
      "loss": 0.7251,
      "step": 649080
    },
    {
      "epoch": 2.2748986250863044,
      "grad_norm": 2.40625,
      "learning_rate": 1.3427883829061428e-05,
      "loss": 0.766,
      "step": 649090
    },
    {
      "epoch": 2.2749336725932,
      "grad_norm": 2.96875,
      "learning_rate": 1.3427234800397726e-05,
      "loss": 0.9028,
      "step": 649100
    },
    {
      "epoch": 2.2749687201000954,
      "grad_norm": 2.5625,
      "learning_rate": 1.3426585771734024e-05,
      "loss": 0.7485,
      "step": 649110
    },
    {
      "epoch": 2.275003767606991,
      "grad_norm": 3.171875,
      "learning_rate": 1.3425936743070322e-05,
      "loss": 0.8712,
      "step": 649120
    },
    {
      "epoch": 2.275038815113887,
      "grad_norm": 2.59375,
      "learning_rate": 1.342528771440662e-05,
      "loss": 0.7984,
      "step": 649130
    },
    {
      "epoch": 2.2750738626207827,
      "grad_norm": 3.328125,
      "learning_rate": 1.3424638685742918e-05,
      "loss": 0.8181,
      "step": 649140
    },
    {
      "epoch": 2.275108910127678,
      "grad_norm": 2.8125,
      "learning_rate": 1.3423989657079216e-05,
      "loss": 0.8072,
      "step": 649150
    },
    {
      "epoch": 2.2751439576345738,
      "grad_norm": 3.28125,
      "learning_rate": 1.3423340628415516e-05,
      "loss": 0.8246,
      "step": 649160
    },
    {
      "epoch": 2.275179005141469,
      "grad_norm": 3.125,
      "learning_rate": 1.3422691599751814e-05,
      "loss": 0.8025,
      "step": 649170
    },
    {
      "epoch": 2.275214052648365,
      "grad_norm": 2.921875,
      "learning_rate": 1.3422042571088112e-05,
      "loss": 0.8134,
      "step": 649180
    },
    {
      "epoch": 2.2752491001552606,
      "grad_norm": 3.28125,
      "learning_rate": 1.3421393542424406e-05,
      "loss": 0.806,
      "step": 649190
    },
    {
      "epoch": 2.275284147662156,
      "grad_norm": 2.90625,
      "learning_rate": 1.3420744513760706e-05,
      "loss": 0.7839,
      "step": 649200
    },
    {
      "epoch": 2.2753191951690517,
      "grad_norm": 2.953125,
      "learning_rate": 1.3420095485097004e-05,
      "loss": 0.8088,
      "step": 649210
    },
    {
      "epoch": 2.2753542426759474,
      "grad_norm": 3.390625,
      "learning_rate": 1.3419446456433302e-05,
      "loss": 0.8418,
      "step": 649220
    },
    {
      "epoch": 2.2753892901828427,
      "grad_norm": 3.109375,
      "learning_rate": 1.34187974277696e-05,
      "loss": 0.8706,
      "step": 649230
    },
    {
      "epoch": 2.2754243376897385,
      "grad_norm": 3.25,
      "learning_rate": 1.3418148399105898e-05,
      "loss": 0.8514,
      "step": 649240
    },
    {
      "epoch": 2.2754593851966343,
      "grad_norm": 3.109375,
      "learning_rate": 1.3417499370442196e-05,
      "loss": 0.8202,
      "step": 649250
    },
    {
      "epoch": 2.2754944327035296,
      "grad_norm": 3.171875,
      "learning_rate": 1.3416850341778494e-05,
      "loss": 0.8117,
      "step": 649260
    },
    {
      "epoch": 2.2755294802104253,
      "grad_norm": 2.484375,
      "learning_rate": 1.3416201313114794e-05,
      "loss": 0.6886,
      "step": 649270
    },
    {
      "epoch": 2.2755645277173207,
      "grad_norm": 2.65625,
      "learning_rate": 1.3415552284451092e-05,
      "loss": 0.8202,
      "step": 649280
    },
    {
      "epoch": 2.2755995752242164,
      "grad_norm": 2.703125,
      "learning_rate": 1.341490325578739e-05,
      "loss": 0.9045,
      "step": 649290
    },
    {
      "epoch": 2.275634622731112,
      "grad_norm": 3.265625,
      "learning_rate": 1.3414254227123688e-05,
      "loss": 0.7902,
      "step": 649300
    },
    {
      "epoch": 2.2756696702380075,
      "grad_norm": 3.5625,
      "learning_rate": 1.3413605198459986e-05,
      "loss": 0.8404,
      "step": 649310
    },
    {
      "epoch": 2.2757047177449032,
      "grad_norm": 3.078125,
      "learning_rate": 1.3412956169796284e-05,
      "loss": 0.8445,
      "step": 649320
    },
    {
      "epoch": 2.275739765251799,
      "grad_norm": 3.015625,
      "learning_rate": 1.3412307141132582e-05,
      "loss": 0.8504,
      "step": 649330
    },
    {
      "epoch": 2.2757748127586943,
      "grad_norm": 2.765625,
      "learning_rate": 1.3411658112468881e-05,
      "loss": 0.8006,
      "step": 649340
    },
    {
      "epoch": 2.27580986026559,
      "grad_norm": 2.796875,
      "learning_rate": 1.341100908380518e-05,
      "loss": 0.7597,
      "step": 649350
    },
    {
      "epoch": 2.275844907772486,
      "grad_norm": 2.90625,
      "learning_rate": 1.3410360055141477e-05,
      "loss": 0.8425,
      "step": 649360
    },
    {
      "epoch": 2.275879955279381,
      "grad_norm": 2.78125,
      "learning_rate": 1.3409711026477775e-05,
      "loss": 0.7772,
      "step": 649370
    },
    {
      "epoch": 2.275915002786277,
      "grad_norm": 2.671875,
      "learning_rate": 1.340906199781407e-05,
      "loss": 0.7723,
      "step": 649380
    },
    {
      "epoch": 2.275950050293172,
      "grad_norm": 2.734375,
      "learning_rate": 1.340841296915037e-05,
      "loss": 0.837,
      "step": 649390
    },
    {
      "epoch": 2.275985097800068,
      "grad_norm": 2.484375,
      "learning_rate": 1.3407763940486668e-05,
      "loss": 0.888,
      "step": 649400
    },
    {
      "epoch": 2.2760201453069637,
      "grad_norm": 3.3125,
      "learning_rate": 1.3407114911822966e-05,
      "loss": 0.8357,
      "step": 649410
    },
    {
      "epoch": 2.276055192813859,
      "grad_norm": 3.609375,
      "learning_rate": 1.3406465883159264e-05,
      "loss": 0.8812,
      "step": 649420
    },
    {
      "epoch": 2.276090240320755,
      "grad_norm": 3.328125,
      "learning_rate": 1.3405816854495562e-05,
      "loss": 0.8278,
      "step": 649430
    },
    {
      "epoch": 2.2761252878276506,
      "grad_norm": 2.953125,
      "learning_rate": 1.340516782583186e-05,
      "loss": 0.7759,
      "step": 649440
    },
    {
      "epoch": 2.276160335334546,
      "grad_norm": 2.5625,
      "learning_rate": 1.3404518797168158e-05,
      "loss": 0.7723,
      "step": 649450
    },
    {
      "epoch": 2.2761953828414416,
      "grad_norm": 2.875,
      "learning_rate": 1.3403869768504457e-05,
      "loss": 0.9058,
      "step": 649460
    },
    {
      "epoch": 2.2762304303483374,
      "grad_norm": 2.984375,
      "learning_rate": 1.3403220739840755e-05,
      "loss": 0.8147,
      "step": 649470
    },
    {
      "epoch": 2.2762654778552327,
      "grad_norm": 3.28125,
      "learning_rate": 1.3402571711177053e-05,
      "loss": 0.9476,
      "step": 649480
    },
    {
      "epoch": 2.2763005253621285,
      "grad_norm": 2.890625,
      "learning_rate": 1.3401922682513351e-05,
      "loss": 0.8377,
      "step": 649490
    },
    {
      "epoch": 2.2763355728690238,
      "grad_norm": 2.46875,
      "learning_rate": 1.340127365384965e-05,
      "loss": 0.7619,
      "step": 649500
    },
    {
      "epoch": 2.2763706203759195,
      "grad_norm": 3.375,
      "learning_rate": 1.3400624625185947e-05,
      "loss": 0.8109,
      "step": 649510
    },
    {
      "epoch": 2.2764056678828153,
      "grad_norm": 2.8125,
      "learning_rate": 1.3399975596522245e-05,
      "loss": 0.7865,
      "step": 649520
    },
    {
      "epoch": 2.2764407153897106,
      "grad_norm": 2.9375,
      "learning_rate": 1.3399326567858545e-05,
      "loss": 0.7584,
      "step": 649530
    },
    {
      "epoch": 2.2764757628966064,
      "grad_norm": 2.984375,
      "learning_rate": 1.3398677539194843e-05,
      "loss": 0.7809,
      "step": 649540
    },
    {
      "epoch": 2.276510810403502,
      "grad_norm": 2.796875,
      "learning_rate": 1.3398028510531141e-05,
      "loss": 0.7299,
      "step": 649550
    },
    {
      "epoch": 2.2765458579103974,
      "grad_norm": 2.8125,
      "learning_rate": 1.3397379481867439e-05,
      "loss": 0.7507,
      "step": 649560
    },
    {
      "epoch": 2.276580905417293,
      "grad_norm": 2.9375,
      "learning_rate": 1.3396730453203735e-05,
      "loss": 0.8605,
      "step": 649570
    },
    {
      "epoch": 2.276615952924189,
      "grad_norm": 2.921875,
      "learning_rate": 1.3396081424540033e-05,
      "loss": 0.8471,
      "step": 649580
    },
    {
      "epoch": 2.2766510004310843,
      "grad_norm": 2.8125,
      "learning_rate": 1.3395432395876331e-05,
      "loss": 0.8394,
      "step": 649590
    },
    {
      "epoch": 2.27668604793798,
      "grad_norm": 2.84375,
      "learning_rate": 1.339478336721263e-05,
      "loss": 0.7669,
      "step": 649600
    },
    {
      "epoch": 2.2767210954448753,
      "grad_norm": 2.875,
      "learning_rate": 1.3394134338548927e-05,
      "loss": 0.8505,
      "step": 649610
    },
    {
      "epoch": 2.276756142951771,
      "grad_norm": 2.984375,
      "learning_rate": 1.3393485309885225e-05,
      "loss": 0.6796,
      "step": 649620
    },
    {
      "epoch": 2.276791190458667,
      "grad_norm": 3.171875,
      "learning_rate": 1.3392836281221523e-05,
      "loss": 0.7589,
      "step": 649630
    },
    {
      "epoch": 2.276826237965562,
      "grad_norm": 2.34375,
      "learning_rate": 1.3392187252557823e-05,
      "loss": 0.7939,
      "step": 649640
    },
    {
      "epoch": 2.276861285472458,
      "grad_norm": 2.78125,
      "learning_rate": 1.3391538223894121e-05,
      "loss": 0.7802,
      "step": 649650
    },
    {
      "epoch": 2.2768963329793537,
      "grad_norm": 2.890625,
      "learning_rate": 1.3390889195230419e-05,
      "loss": 0.8005,
      "step": 649660
    },
    {
      "epoch": 2.276931380486249,
      "grad_norm": 2.921875,
      "learning_rate": 1.3390240166566717e-05,
      "loss": 0.745,
      "step": 649670
    },
    {
      "epoch": 2.2769664279931447,
      "grad_norm": 2.75,
      "learning_rate": 1.3389591137903015e-05,
      "loss": 0.8058,
      "step": 649680
    },
    {
      "epoch": 2.2770014755000405,
      "grad_norm": 2.953125,
      "learning_rate": 1.3388942109239313e-05,
      "loss": 0.8167,
      "step": 649690
    },
    {
      "epoch": 2.277036523006936,
      "grad_norm": 2.609375,
      "learning_rate": 1.3388293080575611e-05,
      "loss": 0.802,
      "step": 649700
    },
    {
      "epoch": 2.2770715705138316,
      "grad_norm": 2.859375,
      "learning_rate": 1.338764405191191e-05,
      "loss": 0.8284,
      "step": 649710
    },
    {
      "epoch": 2.277106618020727,
      "grad_norm": 2.859375,
      "learning_rate": 1.3386995023248209e-05,
      "loss": 0.8416,
      "step": 649720
    },
    {
      "epoch": 2.2771416655276226,
      "grad_norm": 3.3125,
      "learning_rate": 1.3386345994584507e-05,
      "loss": 0.7945,
      "step": 649730
    },
    {
      "epoch": 2.2771767130345184,
      "grad_norm": 2.78125,
      "learning_rate": 1.3385696965920805e-05,
      "loss": 0.8046,
      "step": 649740
    },
    {
      "epoch": 2.2772117605414137,
      "grad_norm": 2.96875,
      "learning_rate": 1.3385047937257103e-05,
      "loss": 0.7985,
      "step": 649750
    },
    {
      "epoch": 2.2772468080483095,
      "grad_norm": 2.828125,
      "learning_rate": 1.3384398908593399e-05,
      "loss": 0.8084,
      "step": 649760
    },
    {
      "epoch": 2.2772818555552052,
      "grad_norm": 2.609375,
      "learning_rate": 1.3383749879929697e-05,
      "loss": 0.7976,
      "step": 649770
    },
    {
      "epoch": 2.2773169030621006,
      "grad_norm": 2.59375,
      "learning_rate": 1.3383100851265995e-05,
      "loss": 0.7687,
      "step": 649780
    },
    {
      "epoch": 2.2773519505689963,
      "grad_norm": 2.984375,
      "learning_rate": 1.3382451822602293e-05,
      "loss": 0.8018,
      "step": 649790
    },
    {
      "epoch": 2.277386998075892,
      "grad_norm": 4.09375,
      "learning_rate": 1.3381802793938591e-05,
      "loss": 0.8776,
      "step": 649800
    },
    {
      "epoch": 2.2774220455827874,
      "grad_norm": 2.515625,
      "learning_rate": 1.3381153765274889e-05,
      "loss": 0.8007,
      "step": 649810
    },
    {
      "epoch": 2.277457093089683,
      "grad_norm": 2.515625,
      "learning_rate": 1.3380504736611189e-05,
      "loss": 0.8258,
      "step": 649820
    },
    {
      "epoch": 2.2774921405965785,
      "grad_norm": 2.59375,
      "learning_rate": 1.3379855707947487e-05,
      "loss": 0.89,
      "step": 649830
    },
    {
      "epoch": 2.277527188103474,
      "grad_norm": 2.703125,
      "learning_rate": 1.3379206679283785e-05,
      "loss": 0.8069,
      "step": 649840
    },
    {
      "epoch": 2.27756223561037,
      "grad_norm": 3.09375,
      "learning_rate": 1.3378557650620083e-05,
      "loss": 0.7708,
      "step": 649850
    },
    {
      "epoch": 2.2775972831172653,
      "grad_norm": 2.5625,
      "learning_rate": 1.337790862195638e-05,
      "loss": 0.7818,
      "step": 649860
    },
    {
      "epoch": 2.277632330624161,
      "grad_norm": 2.71875,
      "learning_rate": 1.3377259593292679e-05,
      "loss": 0.7668,
      "step": 649870
    },
    {
      "epoch": 2.277667378131057,
      "grad_norm": 2.953125,
      "learning_rate": 1.3376610564628977e-05,
      "loss": 0.8542,
      "step": 649880
    },
    {
      "epoch": 2.277702425637952,
      "grad_norm": 3.171875,
      "learning_rate": 1.3375961535965276e-05,
      "loss": 0.8208,
      "step": 649890
    },
    {
      "epoch": 2.277737473144848,
      "grad_norm": 2.921875,
      "learning_rate": 1.3375312507301574e-05,
      "loss": 0.7913,
      "step": 649900
    },
    {
      "epoch": 2.2777725206517436,
      "grad_norm": 3.0625,
      "learning_rate": 1.3374663478637872e-05,
      "loss": 0.7392,
      "step": 649910
    },
    {
      "epoch": 2.277807568158639,
      "grad_norm": 3.328125,
      "learning_rate": 1.337401444997417e-05,
      "loss": 0.8328,
      "step": 649920
    },
    {
      "epoch": 2.2778426156655347,
      "grad_norm": 2.734375,
      "learning_rate": 1.3373365421310468e-05,
      "loss": 0.7551,
      "step": 649930
    },
    {
      "epoch": 2.27787766317243,
      "grad_norm": 2.984375,
      "learning_rate": 1.3372716392646765e-05,
      "loss": 0.7853,
      "step": 649940
    },
    {
      "epoch": 2.2779127106793258,
      "grad_norm": 2.859375,
      "learning_rate": 1.3372067363983063e-05,
      "loss": 0.7874,
      "step": 649950
    },
    {
      "epoch": 2.2779477581862215,
      "grad_norm": 3.234375,
      "learning_rate": 1.337141833531936e-05,
      "loss": 0.844,
      "step": 649960
    },
    {
      "epoch": 2.277982805693117,
      "grad_norm": 2.734375,
      "learning_rate": 1.3370769306655659e-05,
      "loss": 0.7607,
      "step": 649970
    },
    {
      "epoch": 2.2780178532000126,
      "grad_norm": 2.9375,
      "learning_rate": 1.3370120277991957e-05,
      "loss": 0.854,
      "step": 649980
    },
    {
      "epoch": 2.2780529007069084,
      "grad_norm": 2.4375,
      "learning_rate": 1.3369471249328255e-05,
      "loss": 0.8112,
      "step": 649990
    },
    {
      "epoch": 2.2780879482138037,
      "grad_norm": 2.5,
      "learning_rate": 1.3368822220664553e-05,
      "loss": 0.8709,
      "step": 650000
    },
    {
      "epoch": 2.2780879482138037,
      "eval_loss": 0.7619814276695251,
      "eval_runtime": 554.7229,
      "eval_samples_per_second": 685.813,
      "eval_steps_per_second": 57.151,
      "step": 650000
    },
    {
      "epoch": 2.2781229957206994,
      "grad_norm": 2.859375,
      "learning_rate": 1.3368173192000852e-05,
      "loss": 0.7762,
      "step": 650010
    },
    {
      "epoch": 2.278158043227595,
      "grad_norm": 2.625,
      "learning_rate": 1.336752416333715e-05,
      "loss": 0.7896,
      "step": 650020
    },
    {
      "epoch": 2.2781930907344905,
      "grad_norm": 2.890625,
      "learning_rate": 1.3366875134673448e-05,
      "loss": 0.8268,
      "step": 650030
    },
    {
      "epoch": 2.2782281382413863,
      "grad_norm": 2.984375,
      "learning_rate": 1.3366226106009746e-05,
      "loss": 0.767,
      "step": 650040
    },
    {
      "epoch": 2.2782631857482816,
      "grad_norm": 2.546875,
      "learning_rate": 1.3365577077346044e-05,
      "loss": 0.8046,
      "step": 650050
    },
    {
      "epoch": 2.2782982332551773,
      "grad_norm": 3.578125,
      "learning_rate": 1.3364928048682342e-05,
      "loss": 0.8048,
      "step": 650060
    },
    {
      "epoch": 2.278333280762073,
      "grad_norm": 2.453125,
      "learning_rate": 1.336427902001864e-05,
      "loss": 0.8301,
      "step": 650070
    },
    {
      "epoch": 2.2783683282689684,
      "grad_norm": 2.71875,
      "learning_rate": 1.336362999135494e-05,
      "loss": 0.8257,
      "step": 650080
    },
    {
      "epoch": 2.278403375775864,
      "grad_norm": 2.96875,
      "learning_rate": 1.3362980962691238e-05,
      "loss": 0.7614,
      "step": 650090
    },
    {
      "epoch": 2.27843842328276,
      "grad_norm": 2.515625,
      "learning_rate": 1.3362331934027536e-05,
      "loss": 0.8704,
      "step": 650100
    },
    {
      "epoch": 2.2784734707896552,
      "grad_norm": 3.03125,
      "learning_rate": 1.3361682905363834e-05,
      "loss": 0.7787,
      "step": 650110
    },
    {
      "epoch": 2.278508518296551,
      "grad_norm": 2.78125,
      "learning_rate": 1.3361033876700132e-05,
      "loss": 0.8049,
      "step": 650120
    },
    {
      "epoch": 2.2785435658034467,
      "grad_norm": 3.140625,
      "learning_rate": 1.3360384848036428e-05,
      "loss": 0.815,
      "step": 650130
    },
    {
      "epoch": 2.278578613310342,
      "grad_norm": 2.765625,
      "learning_rate": 1.3359735819372726e-05,
      "loss": 0.8431,
      "step": 650140
    },
    {
      "epoch": 2.278613660817238,
      "grad_norm": 2.953125,
      "learning_rate": 1.3359086790709024e-05,
      "loss": 0.7232,
      "step": 650150
    },
    {
      "epoch": 2.278648708324133,
      "grad_norm": 2.8125,
      "learning_rate": 1.3358437762045322e-05,
      "loss": 0.8106,
      "step": 650160
    },
    {
      "epoch": 2.278683755831029,
      "grad_norm": 3.265625,
      "learning_rate": 1.335778873338162e-05,
      "loss": 0.8095,
      "step": 650170
    },
    {
      "epoch": 2.2787188033379246,
      "grad_norm": 2.6875,
      "learning_rate": 1.3357139704717918e-05,
      "loss": 0.6939,
      "step": 650180
    },
    {
      "epoch": 2.27875385084482,
      "grad_norm": 2.453125,
      "learning_rate": 1.3356490676054218e-05,
      "loss": 0.8224,
      "step": 650190
    },
    {
      "epoch": 2.2787888983517157,
      "grad_norm": 2.59375,
      "learning_rate": 1.3355841647390516e-05,
      "loss": 0.8252,
      "step": 650200
    },
    {
      "epoch": 2.2788239458586115,
      "grad_norm": 2.9375,
      "learning_rate": 1.3355192618726814e-05,
      "loss": 0.8089,
      "step": 650210
    },
    {
      "epoch": 2.278858993365507,
      "grad_norm": 3.328125,
      "learning_rate": 1.3354543590063112e-05,
      "loss": 0.8109,
      "step": 650220
    },
    {
      "epoch": 2.2788940408724025,
      "grad_norm": 2.359375,
      "learning_rate": 1.335389456139941e-05,
      "loss": 0.8065,
      "step": 650230
    },
    {
      "epoch": 2.2789290883792983,
      "grad_norm": 3.546875,
      "learning_rate": 1.3353245532735708e-05,
      "loss": 0.8025,
      "step": 650240
    },
    {
      "epoch": 2.2789641358861936,
      "grad_norm": 3.53125,
      "learning_rate": 1.3352596504072006e-05,
      "loss": 0.8168,
      "step": 650250
    },
    {
      "epoch": 2.2789991833930894,
      "grad_norm": 3.0625,
      "learning_rate": 1.3351947475408306e-05,
      "loss": 0.8517,
      "step": 650260
    },
    {
      "epoch": 2.2790342308999847,
      "grad_norm": 2.953125,
      "learning_rate": 1.3351298446744604e-05,
      "loss": 0.8773,
      "step": 650270
    },
    {
      "epoch": 2.2790692784068804,
      "grad_norm": 2.71875,
      "learning_rate": 1.3350649418080902e-05,
      "loss": 0.9682,
      "step": 650280
    },
    {
      "epoch": 2.279104325913776,
      "grad_norm": 2.640625,
      "learning_rate": 1.33500003894172e-05,
      "loss": 0.8408,
      "step": 650290
    },
    {
      "epoch": 2.279139373420672,
      "grad_norm": 3.34375,
      "learning_rate": 1.3349351360753498e-05,
      "loss": 0.8226,
      "step": 650300
    },
    {
      "epoch": 2.2791744209275673,
      "grad_norm": 3.578125,
      "learning_rate": 1.3348702332089796e-05,
      "loss": 0.858,
      "step": 650310
    },
    {
      "epoch": 2.279209468434463,
      "grad_norm": 2.515625,
      "learning_rate": 1.3348053303426092e-05,
      "loss": 0.8497,
      "step": 650320
    },
    {
      "epoch": 2.2792445159413584,
      "grad_norm": 3.0,
      "learning_rate": 1.334740427476239e-05,
      "loss": 0.8011,
      "step": 650330
    },
    {
      "epoch": 2.279279563448254,
      "grad_norm": 3.140625,
      "learning_rate": 1.3346755246098688e-05,
      "loss": 0.7479,
      "step": 650340
    },
    {
      "epoch": 2.27931461095515,
      "grad_norm": 3.0625,
      "learning_rate": 1.3346106217434986e-05,
      "loss": 0.8812,
      "step": 650350
    },
    {
      "epoch": 2.279349658462045,
      "grad_norm": 2.90625,
      "learning_rate": 1.3345457188771284e-05,
      "loss": 0.8504,
      "step": 650360
    },
    {
      "epoch": 2.279384705968941,
      "grad_norm": 2.765625,
      "learning_rate": 1.3344808160107584e-05,
      "loss": 0.7363,
      "step": 650370
    },
    {
      "epoch": 2.2794197534758363,
      "grad_norm": 3.03125,
      "learning_rate": 1.3344159131443882e-05,
      "loss": 0.7247,
      "step": 650380
    },
    {
      "epoch": 2.279454800982732,
      "grad_norm": 2.421875,
      "learning_rate": 1.334351010278018e-05,
      "loss": 0.7144,
      "step": 650390
    },
    {
      "epoch": 2.2794898484896278,
      "grad_norm": 2.84375,
      "learning_rate": 1.3342861074116478e-05,
      "loss": 0.8233,
      "step": 650400
    },
    {
      "epoch": 2.2795248959965235,
      "grad_norm": 3.125,
      "learning_rate": 1.3342212045452776e-05,
      "loss": 0.8299,
      "step": 650410
    },
    {
      "epoch": 2.279559943503419,
      "grad_norm": 2.8125,
      "learning_rate": 1.3341563016789074e-05,
      "loss": 0.7922,
      "step": 650420
    },
    {
      "epoch": 2.2795949910103146,
      "grad_norm": 2.9375,
      "learning_rate": 1.3340913988125372e-05,
      "loss": 0.8392,
      "step": 650430
    },
    {
      "epoch": 2.27963003851721,
      "grad_norm": 3.140625,
      "learning_rate": 1.3340264959461672e-05,
      "loss": 0.7985,
      "step": 650440
    },
    {
      "epoch": 2.2796650860241057,
      "grad_norm": 2.828125,
      "learning_rate": 1.333961593079797e-05,
      "loss": 0.7583,
      "step": 650450
    },
    {
      "epoch": 2.2797001335310014,
      "grad_norm": 2.71875,
      "learning_rate": 1.3338966902134268e-05,
      "loss": 0.799,
      "step": 650460
    },
    {
      "epoch": 2.2797351810378967,
      "grad_norm": 3.0625,
      "learning_rate": 1.3338317873470566e-05,
      "loss": 0.8245,
      "step": 650470
    },
    {
      "epoch": 2.2797702285447925,
      "grad_norm": 2.734375,
      "learning_rate": 1.3337668844806864e-05,
      "loss": 0.7974,
      "step": 650480
    },
    {
      "epoch": 2.2798052760516883,
      "grad_norm": 3.234375,
      "learning_rate": 1.3337019816143162e-05,
      "loss": 0.7571,
      "step": 650490
    },
    {
      "epoch": 2.2798403235585836,
      "grad_norm": 2.59375,
      "learning_rate": 1.333637078747946e-05,
      "loss": 0.8711,
      "step": 650500
    },
    {
      "epoch": 2.2798753710654793,
      "grad_norm": 2.828125,
      "learning_rate": 1.3335721758815756e-05,
      "loss": 0.8254,
      "step": 650510
    },
    {
      "epoch": 2.279910418572375,
      "grad_norm": 2.90625,
      "learning_rate": 1.3335072730152054e-05,
      "loss": 0.8057,
      "step": 650520
    },
    {
      "epoch": 2.2799454660792704,
      "grad_norm": 2.984375,
      "learning_rate": 1.3334423701488352e-05,
      "loss": 0.7937,
      "step": 650530
    },
    {
      "epoch": 2.279980513586166,
      "grad_norm": 2.5625,
      "learning_rate": 1.333377467282465e-05,
      "loss": 0.7871,
      "step": 650540
    },
    {
      "epoch": 2.2800155610930615,
      "grad_norm": 2.78125,
      "learning_rate": 1.3333125644160948e-05,
      "loss": 0.9134,
      "step": 650550
    },
    {
      "epoch": 2.2800506085999572,
      "grad_norm": 3.296875,
      "learning_rate": 1.3332476615497248e-05,
      "loss": 0.9136,
      "step": 650560
    },
    {
      "epoch": 2.280085656106853,
      "grad_norm": 3.328125,
      "learning_rate": 1.3331827586833546e-05,
      "loss": 0.9319,
      "step": 650570
    },
    {
      "epoch": 2.2801207036137483,
      "grad_norm": 2.828125,
      "learning_rate": 1.3331178558169844e-05,
      "loss": 0.7786,
      "step": 650580
    },
    {
      "epoch": 2.280155751120644,
      "grad_norm": 2.59375,
      "learning_rate": 1.3330529529506142e-05,
      "loss": 0.8367,
      "step": 650590
    },
    {
      "epoch": 2.28019079862754,
      "grad_norm": 2.859375,
      "learning_rate": 1.332988050084244e-05,
      "loss": 0.8026,
      "step": 650600
    },
    {
      "epoch": 2.280225846134435,
      "grad_norm": 2.9375,
      "learning_rate": 1.3329231472178738e-05,
      "loss": 0.8243,
      "step": 650610
    },
    {
      "epoch": 2.280260893641331,
      "grad_norm": 2.828125,
      "learning_rate": 1.3328582443515036e-05,
      "loss": 0.8006,
      "step": 650620
    },
    {
      "epoch": 2.2802959411482266,
      "grad_norm": 2.578125,
      "learning_rate": 1.3327933414851335e-05,
      "loss": 0.8477,
      "step": 650630
    },
    {
      "epoch": 2.280330988655122,
      "grad_norm": 3.109375,
      "learning_rate": 1.3327284386187633e-05,
      "loss": 0.764,
      "step": 650640
    },
    {
      "epoch": 2.2803660361620177,
      "grad_norm": 3.03125,
      "learning_rate": 1.3326635357523931e-05,
      "loss": 0.8103,
      "step": 650650
    },
    {
      "epoch": 2.280401083668913,
      "grad_norm": 3.171875,
      "learning_rate": 1.332598632886023e-05,
      "loss": 0.838,
      "step": 650660
    },
    {
      "epoch": 2.280436131175809,
      "grad_norm": 2.828125,
      "learning_rate": 1.3325337300196527e-05,
      "loss": 0.8104,
      "step": 650670
    },
    {
      "epoch": 2.2804711786827045,
      "grad_norm": 2.921875,
      "learning_rate": 1.3324688271532825e-05,
      "loss": 0.7531,
      "step": 650680
    },
    {
      "epoch": 2.2805062261896,
      "grad_norm": 2.796875,
      "learning_rate": 1.3324039242869125e-05,
      "loss": 0.7886,
      "step": 650690
    },
    {
      "epoch": 2.2805412736964956,
      "grad_norm": 2.90625,
      "learning_rate": 1.332339021420542e-05,
      "loss": 0.858,
      "step": 650700
    },
    {
      "epoch": 2.2805763212033914,
      "grad_norm": 2.703125,
      "learning_rate": 1.3322741185541718e-05,
      "loss": 0.887,
      "step": 650710
    },
    {
      "epoch": 2.2806113687102867,
      "grad_norm": 2.90625,
      "learning_rate": 1.3322092156878016e-05,
      "loss": 0.7214,
      "step": 650720
    },
    {
      "epoch": 2.2806464162171824,
      "grad_norm": 3.21875,
      "learning_rate": 1.3321443128214314e-05,
      "loss": 0.7827,
      "step": 650730
    },
    {
      "epoch": 2.280681463724078,
      "grad_norm": 3.203125,
      "learning_rate": 1.3320794099550613e-05,
      "loss": 0.8018,
      "step": 650740
    },
    {
      "epoch": 2.2807165112309735,
      "grad_norm": 3.171875,
      "learning_rate": 1.3320145070886911e-05,
      "loss": 0.8619,
      "step": 650750
    },
    {
      "epoch": 2.2807515587378693,
      "grad_norm": 3.265625,
      "learning_rate": 1.331949604222321e-05,
      "loss": 0.8646,
      "step": 650760
    },
    {
      "epoch": 2.2807866062447646,
      "grad_norm": 2.65625,
      "learning_rate": 1.3318847013559507e-05,
      "loss": 0.8028,
      "step": 650770
    },
    {
      "epoch": 2.2808216537516603,
      "grad_norm": 3.0625,
      "learning_rate": 1.3318197984895805e-05,
      "loss": 0.7788,
      "step": 650780
    },
    {
      "epoch": 2.280856701258556,
      "grad_norm": 3.109375,
      "learning_rate": 1.3317548956232103e-05,
      "loss": 0.7597,
      "step": 650790
    },
    {
      "epoch": 2.2808917487654514,
      "grad_norm": 2.640625,
      "learning_rate": 1.3316899927568401e-05,
      "loss": 0.8757,
      "step": 650800
    },
    {
      "epoch": 2.280926796272347,
      "grad_norm": 2.96875,
      "learning_rate": 1.3316250898904701e-05,
      "loss": 0.8217,
      "step": 650810
    },
    {
      "epoch": 2.280961843779243,
      "grad_norm": 3.109375,
      "learning_rate": 1.3315601870240999e-05,
      "loss": 0.8808,
      "step": 650820
    },
    {
      "epoch": 2.2809968912861383,
      "grad_norm": 2.515625,
      "learning_rate": 1.3314952841577297e-05,
      "loss": 0.764,
      "step": 650830
    },
    {
      "epoch": 2.281031938793034,
      "grad_norm": 3.140625,
      "learning_rate": 1.3314303812913595e-05,
      "loss": 0.7436,
      "step": 650840
    },
    {
      "epoch": 2.2810669862999298,
      "grad_norm": 2.671875,
      "learning_rate": 1.3313654784249893e-05,
      "loss": 0.8024,
      "step": 650850
    },
    {
      "epoch": 2.281102033806825,
      "grad_norm": 2.859375,
      "learning_rate": 1.3313005755586191e-05,
      "loss": 0.829,
      "step": 650860
    },
    {
      "epoch": 2.281137081313721,
      "grad_norm": 3.3125,
      "learning_rate": 1.3312356726922489e-05,
      "loss": 0.7501,
      "step": 650870
    },
    {
      "epoch": 2.281172128820616,
      "grad_norm": 2.28125,
      "learning_rate": 1.3311707698258785e-05,
      "loss": 0.7839,
      "step": 650880
    },
    {
      "epoch": 2.281207176327512,
      "grad_norm": 3.0,
      "learning_rate": 1.3311058669595083e-05,
      "loss": 0.7804,
      "step": 650890
    },
    {
      "epoch": 2.2812422238344077,
      "grad_norm": 3.0,
      "learning_rate": 1.3310409640931381e-05,
      "loss": 0.8266,
      "step": 650900
    },
    {
      "epoch": 2.281277271341303,
      "grad_norm": 3.21875,
      "learning_rate": 1.330976061226768e-05,
      "loss": 0.8141,
      "step": 650910
    },
    {
      "epoch": 2.2813123188481987,
      "grad_norm": 3.15625,
      "learning_rate": 1.3309111583603979e-05,
      "loss": 0.8375,
      "step": 650920
    },
    {
      "epoch": 2.2813473663550945,
      "grad_norm": 3.140625,
      "learning_rate": 1.3308462554940277e-05,
      "loss": 0.792,
      "step": 650930
    },
    {
      "epoch": 2.28138241386199,
      "grad_norm": 2.828125,
      "learning_rate": 1.3307813526276575e-05,
      "loss": 0.8452,
      "step": 650940
    },
    {
      "epoch": 2.2814174613688856,
      "grad_norm": 2.84375,
      "learning_rate": 1.3307164497612873e-05,
      "loss": 0.8656,
      "step": 650950
    },
    {
      "epoch": 2.2814525088757813,
      "grad_norm": 3.09375,
      "learning_rate": 1.3306515468949171e-05,
      "loss": 0.8009,
      "step": 650960
    },
    {
      "epoch": 2.2814875563826766,
      "grad_norm": 2.953125,
      "learning_rate": 1.3305866440285469e-05,
      "loss": 0.8936,
      "step": 650970
    },
    {
      "epoch": 2.2815226038895724,
      "grad_norm": 3.046875,
      "learning_rate": 1.3305217411621767e-05,
      "loss": 0.7621,
      "step": 650980
    },
    {
      "epoch": 2.2815576513964677,
      "grad_norm": 2.859375,
      "learning_rate": 1.3304568382958067e-05,
      "loss": 0.7976,
      "step": 650990
    },
    {
      "epoch": 2.2815926989033635,
      "grad_norm": 2.609375,
      "learning_rate": 1.3303919354294365e-05,
      "loss": 0.7296,
      "step": 651000
    },
    {
      "epoch": 2.2816277464102592,
      "grad_norm": 2.734375,
      "learning_rate": 1.3303270325630663e-05,
      "loss": 0.8528,
      "step": 651010
    },
    {
      "epoch": 2.2816627939171545,
      "grad_norm": 2.953125,
      "learning_rate": 1.330262129696696e-05,
      "loss": 0.7353,
      "step": 651020
    },
    {
      "epoch": 2.2816978414240503,
      "grad_norm": 2.765625,
      "learning_rate": 1.3301972268303259e-05,
      "loss": 0.7465,
      "step": 651030
    },
    {
      "epoch": 2.281732888930946,
      "grad_norm": 3.171875,
      "learning_rate": 1.3301323239639557e-05,
      "loss": 0.8576,
      "step": 651040
    },
    {
      "epoch": 2.2817679364378414,
      "grad_norm": 3.140625,
      "learning_rate": 1.3300674210975855e-05,
      "loss": 0.863,
      "step": 651050
    },
    {
      "epoch": 2.281802983944737,
      "grad_norm": 2.84375,
      "learning_rate": 1.3300025182312154e-05,
      "loss": 0.8209,
      "step": 651060
    },
    {
      "epoch": 2.281838031451633,
      "grad_norm": 2.890625,
      "learning_rate": 1.3299376153648449e-05,
      "loss": 0.9183,
      "step": 651070
    },
    {
      "epoch": 2.281873078958528,
      "grad_norm": 3.078125,
      "learning_rate": 1.3298727124984747e-05,
      "loss": 0.7761,
      "step": 651080
    },
    {
      "epoch": 2.281908126465424,
      "grad_norm": 3.28125,
      "learning_rate": 1.3298078096321045e-05,
      "loss": 0.8272,
      "step": 651090
    },
    {
      "epoch": 2.2819431739723193,
      "grad_norm": 2.703125,
      "learning_rate": 1.3297429067657343e-05,
      "loss": 0.7347,
      "step": 651100
    },
    {
      "epoch": 2.281978221479215,
      "grad_norm": 3.03125,
      "learning_rate": 1.3296780038993643e-05,
      "loss": 0.8881,
      "step": 651110
    },
    {
      "epoch": 2.282013268986111,
      "grad_norm": 2.8125,
      "learning_rate": 1.329613101032994e-05,
      "loss": 0.8915,
      "step": 651120
    },
    {
      "epoch": 2.282048316493006,
      "grad_norm": 2.6875,
      "learning_rate": 1.3295481981666239e-05,
      "loss": 0.8177,
      "step": 651130
    },
    {
      "epoch": 2.282083363999902,
      "grad_norm": 2.859375,
      "learning_rate": 1.3294832953002537e-05,
      "loss": 0.7842,
      "step": 651140
    },
    {
      "epoch": 2.2821184115067976,
      "grad_norm": 3.109375,
      "learning_rate": 1.3294183924338835e-05,
      "loss": 0.7988,
      "step": 651150
    },
    {
      "epoch": 2.282153459013693,
      "grad_norm": 2.5,
      "learning_rate": 1.3293534895675133e-05,
      "loss": 0.7709,
      "step": 651160
    },
    {
      "epoch": 2.2821885065205887,
      "grad_norm": 2.9375,
      "learning_rate": 1.3292885867011432e-05,
      "loss": 0.8384,
      "step": 651170
    },
    {
      "epoch": 2.2822235540274844,
      "grad_norm": 3.09375,
      "learning_rate": 1.329223683834773e-05,
      "loss": 0.8167,
      "step": 651180
    },
    {
      "epoch": 2.2822586015343798,
      "grad_norm": 2.796875,
      "learning_rate": 1.3291587809684028e-05,
      "loss": 0.7836,
      "step": 651190
    },
    {
      "epoch": 2.2822936490412755,
      "grad_norm": 3.03125,
      "learning_rate": 1.3290938781020326e-05,
      "loss": 0.8202,
      "step": 651200
    },
    {
      "epoch": 2.282328696548171,
      "grad_norm": 2.96875,
      "learning_rate": 1.3290289752356624e-05,
      "loss": 0.7606,
      "step": 651210
    },
    {
      "epoch": 2.2823637440550666,
      "grad_norm": 2.9375,
      "learning_rate": 1.3289640723692922e-05,
      "loss": 0.7374,
      "step": 651220
    },
    {
      "epoch": 2.2823987915619623,
      "grad_norm": 2.859375,
      "learning_rate": 1.328899169502922e-05,
      "loss": 0.7911,
      "step": 651230
    },
    {
      "epoch": 2.2824338390688577,
      "grad_norm": 2.53125,
      "learning_rate": 1.328834266636552e-05,
      "loss": 0.9118,
      "step": 651240
    },
    {
      "epoch": 2.2824688865757534,
      "grad_norm": 2.53125,
      "learning_rate": 1.3287693637701818e-05,
      "loss": 0.7468,
      "step": 651250
    },
    {
      "epoch": 2.282503934082649,
      "grad_norm": 2.984375,
      "learning_rate": 1.3287044609038113e-05,
      "loss": 0.8413,
      "step": 651260
    },
    {
      "epoch": 2.2825389815895445,
      "grad_norm": 2.578125,
      "learning_rate": 1.328639558037441e-05,
      "loss": 0.7889,
      "step": 651270
    },
    {
      "epoch": 2.2825740290964402,
      "grad_norm": 2.984375,
      "learning_rate": 1.3285746551710709e-05,
      "loss": 0.8193,
      "step": 651280
    },
    {
      "epoch": 2.282609076603336,
      "grad_norm": 2.984375,
      "learning_rate": 1.3285097523047008e-05,
      "loss": 0.7871,
      "step": 651290
    },
    {
      "epoch": 2.2826441241102313,
      "grad_norm": 2.921875,
      "learning_rate": 1.3284448494383306e-05,
      "loss": 0.7708,
      "step": 651300
    },
    {
      "epoch": 2.282679171617127,
      "grad_norm": 3.0625,
      "learning_rate": 1.3283799465719604e-05,
      "loss": 0.8194,
      "step": 651310
    },
    {
      "epoch": 2.2827142191240224,
      "grad_norm": 2.765625,
      "learning_rate": 1.3283150437055902e-05,
      "loss": 0.8444,
      "step": 651320
    },
    {
      "epoch": 2.282749266630918,
      "grad_norm": 2.984375,
      "learning_rate": 1.32825014083922e-05,
      "loss": 0.8977,
      "step": 651330
    },
    {
      "epoch": 2.282784314137814,
      "grad_norm": 2.203125,
      "learning_rate": 1.3281852379728498e-05,
      "loss": 0.7626,
      "step": 651340
    },
    {
      "epoch": 2.282819361644709,
      "grad_norm": 2.859375,
      "learning_rate": 1.3281203351064796e-05,
      "loss": 0.7995,
      "step": 651350
    },
    {
      "epoch": 2.282854409151605,
      "grad_norm": 3.515625,
      "learning_rate": 1.3280554322401096e-05,
      "loss": 0.8575,
      "step": 651360
    },
    {
      "epoch": 2.2828894566585007,
      "grad_norm": 3.203125,
      "learning_rate": 1.3279905293737394e-05,
      "loss": 0.8099,
      "step": 651370
    },
    {
      "epoch": 2.282924504165396,
      "grad_norm": 2.84375,
      "learning_rate": 1.3279256265073692e-05,
      "loss": 0.8607,
      "step": 651380
    },
    {
      "epoch": 2.282959551672292,
      "grad_norm": 3.15625,
      "learning_rate": 1.327860723640999e-05,
      "loss": 0.8034,
      "step": 651390
    },
    {
      "epoch": 2.2829945991791876,
      "grad_norm": 2.890625,
      "learning_rate": 1.3277958207746288e-05,
      "loss": 0.8464,
      "step": 651400
    },
    {
      "epoch": 2.283029646686083,
      "grad_norm": 3.171875,
      "learning_rate": 1.3277309179082586e-05,
      "loss": 0.915,
      "step": 651410
    },
    {
      "epoch": 2.2830646941929786,
      "grad_norm": 3.171875,
      "learning_rate": 1.3276660150418884e-05,
      "loss": 0.7169,
      "step": 651420
    },
    {
      "epoch": 2.283099741699874,
      "grad_norm": 2.71875,
      "learning_rate": 1.3276011121755184e-05,
      "loss": 0.867,
      "step": 651430
    },
    {
      "epoch": 2.2831347892067697,
      "grad_norm": 2.8125,
      "learning_rate": 1.3275362093091482e-05,
      "loss": 0.7404,
      "step": 651440
    },
    {
      "epoch": 2.2831698367136655,
      "grad_norm": 2.59375,
      "learning_rate": 1.3274713064427776e-05,
      "loss": 0.8182,
      "step": 651450
    },
    {
      "epoch": 2.283204884220561,
      "grad_norm": 2.890625,
      "learning_rate": 1.3274064035764074e-05,
      "loss": 0.8188,
      "step": 651460
    },
    {
      "epoch": 2.2832399317274565,
      "grad_norm": 2.828125,
      "learning_rate": 1.3273415007100374e-05,
      "loss": 0.8589,
      "step": 651470
    },
    {
      "epoch": 2.2832749792343523,
      "grad_norm": 3.171875,
      "learning_rate": 1.3272765978436672e-05,
      "loss": 0.7935,
      "step": 651480
    },
    {
      "epoch": 2.2833100267412476,
      "grad_norm": 2.640625,
      "learning_rate": 1.327211694977297e-05,
      "loss": 0.7116,
      "step": 651490
    },
    {
      "epoch": 2.2833450742481434,
      "grad_norm": 3.234375,
      "learning_rate": 1.3271467921109268e-05,
      "loss": 0.7363,
      "step": 651500
    },
    {
      "epoch": 2.283380121755039,
      "grad_norm": 2.78125,
      "learning_rate": 1.3270818892445566e-05,
      "loss": 0.7688,
      "step": 651510
    },
    {
      "epoch": 2.2834151692619344,
      "grad_norm": 3.3125,
      "learning_rate": 1.3270169863781864e-05,
      "loss": 0.8873,
      "step": 651520
    },
    {
      "epoch": 2.28345021676883,
      "grad_norm": 3.15625,
      "learning_rate": 1.3269520835118162e-05,
      "loss": 0.8714,
      "step": 651530
    },
    {
      "epoch": 2.2834852642757255,
      "grad_norm": 2.765625,
      "learning_rate": 1.3268871806454462e-05,
      "loss": 0.776,
      "step": 651540
    },
    {
      "epoch": 2.2835203117826213,
      "grad_norm": 3.046875,
      "learning_rate": 1.326822277779076e-05,
      "loss": 0.8477,
      "step": 651550
    },
    {
      "epoch": 2.283555359289517,
      "grad_norm": 2.59375,
      "learning_rate": 1.3267573749127058e-05,
      "loss": 0.7414,
      "step": 651560
    },
    {
      "epoch": 2.283590406796413,
      "grad_norm": 3.015625,
      "learning_rate": 1.3266924720463356e-05,
      "loss": 0.7808,
      "step": 651570
    },
    {
      "epoch": 2.283625454303308,
      "grad_norm": 3.046875,
      "learning_rate": 1.3266275691799654e-05,
      "loss": 0.8352,
      "step": 651580
    },
    {
      "epoch": 2.283660501810204,
      "grad_norm": 3.140625,
      "learning_rate": 1.3265626663135952e-05,
      "loss": 0.7896,
      "step": 651590
    },
    {
      "epoch": 2.283695549317099,
      "grad_norm": 3.046875,
      "learning_rate": 1.326497763447225e-05,
      "loss": 0.8504,
      "step": 651600
    },
    {
      "epoch": 2.283730596823995,
      "grad_norm": 2.4375,
      "learning_rate": 1.326432860580855e-05,
      "loss": 0.7579,
      "step": 651610
    },
    {
      "epoch": 2.2837656443308907,
      "grad_norm": 2.59375,
      "learning_rate": 1.3263679577144847e-05,
      "loss": 0.7611,
      "step": 651620
    },
    {
      "epoch": 2.283800691837786,
      "grad_norm": 2.578125,
      "learning_rate": 1.3263030548481145e-05,
      "loss": 0.8332,
      "step": 651630
    },
    {
      "epoch": 2.2838357393446818,
      "grad_norm": 3.046875,
      "learning_rate": 1.326238151981744e-05,
      "loss": 0.81,
      "step": 651640
    },
    {
      "epoch": 2.283870786851577,
      "grad_norm": 2.890625,
      "learning_rate": 1.3261732491153738e-05,
      "loss": 0.9058,
      "step": 651650
    },
    {
      "epoch": 2.283905834358473,
      "grad_norm": 3.046875,
      "learning_rate": 1.3261083462490038e-05,
      "loss": 0.8648,
      "step": 651660
    },
    {
      "epoch": 2.2839408818653686,
      "grad_norm": 2.625,
      "learning_rate": 1.3260434433826336e-05,
      "loss": 0.6932,
      "step": 651670
    },
    {
      "epoch": 2.2839759293722643,
      "grad_norm": 2.953125,
      "learning_rate": 1.3259785405162634e-05,
      "loss": 0.8144,
      "step": 651680
    },
    {
      "epoch": 2.2840109768791597,
      "grad_norm": 3.21875,
      "learning_rate": 1.3259136376498932e-05,
      "loss": 0.8725,
      "step": 651690
    },
    {
      "epoch": 2.2840460243860554,
      "grad_norm": 3.421875,
      "learning_rate": 1.325848734783523e-05,
      "loss": 0.8917,
      "step": 651700
    },
    {
      "epoch": 2.2840810718929507,
      "grad_norm": 3.03125,
      "learning_rate": 1.3257838319171528e-05,
      "loss": 0.8228,
      "step": 651710
    },
    {
      "epoch": 2.2841161193998465,
      "grad_norm": 2.65625,
      "learning_rate": 1.3257189290507827e-05,
      "loss": 0.8372,
      "step": 651720
    },
    {
      "epoch": 2.2841511669067422,
      "grad_norm": 2.734375,
      "learning_rate": 1.3256540261844125e-05,
      "loss": 0.8306,
      "step": 651730
    },
    {
      "epoch": 2.2841862144136376,
      "grad_norm": 2.75,
      "learning_rate": 1.3255891233180423e-05,
      "loss": 0.9127,
      "step": 651740
    },
    {
      "epoch": 2.2842212619205333,
      "grad_norm": 2.515625,
      "learning_rate": 1.3255242204516721e-05,
      "loss": 0.7472,
      "step": 651750
    },
    {
      "epoch": 2.2842563094274286,
      "grad_norm": 2.53125,
      "learning_rate": 1.325459317585302e-05,
      "loss": 0.7753,
      "step": 651760
    },
    {
      "epoch": 2.2842913569343244,
      "grad_norm": 2.796875,
      "learning_rate": 1.3253944147189317e-05,
      "loss": 0.7733,
      "step": 651770
    },
    {
      "epoch": 2.28432640444122,
      "grad_norm": 2.78125,
      "learning_rate": 1.3253295118525615e-05,
      "loss": 0.8507,
      "step": 651780
    },
    {
      "epoch": 2.284361451948116,
      "grad_norm": 3.296875,
      "learning_rate": 1.3252646089861915e-05,
      "loss": 0.8663,
      "step": 651790
    },
    {
      "epoch": 2.284396499455011,
      "grad_norm": 3.015625,
      "learning_rate": 1.3251997061198213e-05,
      "loss": 0.8287,
      "step": 651800
    },
    {
      "epoch": 2.284431546961907,
      "grad_norm": 3.265625,
      "learning_rate": 1.3251348032534511e-05,
      "loss": 0.8383,
      "step": 651810
    },
    {
      "epoch": 2.2844665944688023,
      "grad_norm": 2.734375,
      "learning_rate": 1.3250699003870806e-05,
      "loss": 0.7999,
      "step": 651820
    },
    {
      "epoch": 2.284501641975698,
      "grad_norm": 2.71875,
      "learning_rate": 1.3250049975207104e-05,
      "loss": 0.7612,
      "step": 651830
    },
    {
      "epoch": 2.284536689482594,
      "grad_norm": 2.75,
      "learning_rate": 1.3249400946543403e-05,
      "loss": 0.8377,
      "step": 651840
    },
    {
      "epoch": 2.284571736989489,
      "grad_norm": 3.015625,
      "learning_rate": 1.3248751917879701e-05,
      "loss": 0.7455,
      "step": 651850
    },
    {
      "epoch": 2.284606784496385,
      "grad_norm": 3.453125,
      "learning_rate": 1.3248102889216e-05,
      "loss": 0.8779,
      "step": 651860
    },
    {
      "epoch": 2.2846418320032806,
      "grad_norm": 3.09375,
      "learning_rate": 1.3247453860552297e-05,
      "loss": 0.7209,
      "step": 651870
    },
    {
      "epoch": 2.284676879510176,
      "grad_norm": 2.625,
      "learning_rate": 1.3246804831888595e-05,
      "loss": 0.7139,
      "step": 651880
    },
    {
      "epoch": 2.2847119270170717,
      "grad_norm": 3.265625,
      "learning_rate": 1.3246155803224893e-05,
      "loss": 0.8037,
      "step": 651890
    },
    {
      "epoch": 2.2847469745239675,
      "grad_norm": 2.640625,
      "learning_rate": 1.3245506774561191e-05,
      "loss": 0.8076,
      "step": 651900
    },
    {
      "epoch": 2.284782022030863,
      "grad_norm": 2.6875,
      "learning_rate": 1.3244857745897491e-05,
      "loss": 0.8838,
      "step": 651910
    },
    {
      "epoch": 2.2848170695377585,
      "grad_norm": 2.84375,
      "learning_rate": 1.3244208717233789e-05,
      "loss": 0.8128,
      "step": 651920
    },
    {
      "epoch": 2.284852117044654,
      "grad_norm": 2.828125,
      "learning_rate": 1.3243559688570087e-05,
      "loss": 0.8075,
      "step": 651930
    },
    {
      "epoch": 2.2848871645515496,
      "grad_norm": 2.796875,
      "learning_rate": 1.3242910659906385e-05,
      "loss": 0.8511,
      "step": 651940
    },
    {
      "epoch": 2.2849222120584454,
      "grad_norm": 2.84375,
      "learning_rate": 1.3242261631242683e-05,
      "loss": 0.7895,
      "step": 651950
    },
    {
      "epoch": 2.2849572595653407,
      "grad_norm": 3.4375,
      "learning_rate": 1.3241612602578981e-05,
      "loss": 0.7249,
      "step": 651960
    },
    {
      "epoch": 2.2849923070722364,
      "grad_norm": 3.1875,
      "learning_rate": 1.3240963573915279e-05,
      "loss": 0.7635,
      "step": 651970
    },
    {
      "epoch": 2.285027354579132,
      "grad_norm": 3.3125,
      "learning_rate": 1.3240314545251579e-05,
      "loss": 0.8252,
      "step": 651980
    },
    {
      "epoch": 2.2850624020860275,
      "grad_norm": 2.53125,
      "learning_rate": 1.3239665516587877e-05,
      "loss": 0.7369,
      "step": 651990
    },
    {
      "epoch": 2.2850974495929233,
      "grad_norm": 2.984375,
      "learning_rate": 1.3239016487924175e-05,
      "loss": 0.8417,
      "step": 652000
    },
    {
      "epoch": 2.285132497099819,
      "grad_norm": 2.609375,
      "learning_rate": 1.323836745926047e-05,
      "loss": 0.7978,
      "step": 652010
    },
    {
      "epoch": 2.2851675446067143,
      "grad_norm": 3.0625,
      "learning_rate": 1.3237718430596769e-05,
      "loss": 0.7455,
      "step": 652020
    },
    {
      "epoch": 2.28520259211361,
      "grad_norm": 2.78125,
      "learning_rate": 1.3237069401933067e-05,
      "loss": 0.7498,
      "step": 652030
    },
    {
      "epoch": 2.2852376396205054,
      "grad_norm": 2.765625,
      "learning_rate": 1.3236420373269365e-05,
      "loss": 0.7782,
      "step": 652040
    },
    {
      "epoch": 2.285272687127401,
      "grad_norm": 2.828125,
      "learning_rate": 1.3235771344605663e-05,
      "loss": 0.7913,
      "step": 652050
    },
    {
      "epoch": 2.285307734634297,
      "grad_norm": 2.609375,
      "learning_rate": 1.3235122315941961e-05,
      "loss": 0.8404,
      "step": 652060
    },
    {
      "epoch": 2.2853427821411922,
      "grad_norm": 2.84375,
      "learning_rate": 1.3234473287278259e-05,
      "loss": 0.798,
      "step": 652070
    },
    {
      "epoch": 2.285377829648088,
      "grad_norm": 2.921875,
      "learning_rate": 1.3233824258614557e-05,
      "loss": 0.8413,
      "step": 652080
    },
    {
      "epoch": 2.2854128771549838,
      "grad_norm": 2.40625,
      "learning_rate": 1.3233175229950857e-05,
      "loss": 0.7915,
      "step": 652090
    },
    {
      "epoch": 2.285447924661879,
      "grad_norm": 3.0,
      "learning_rate": 1.3232526201287155e-05,
      "loss": 0.8638,
      "step": 652100
    },
    {
      "epoch": 2.285482972168775,
      "grad_norm": 3.0,
      "learning_rate": 1.3231877172623453e-05,
      "loss": 0.8737,
      "step": 652110
    },
    {
      "epoch": 2.2855180196756706,
      "grad_norm": 2.6875,
      "learning_rate": 1.323122814395975e-05,
      "loss": 0.8202,
      "step": 652120
    },
    {
      "epoch": 2.285553067182566,
      "grad_norm": 2.25,
      "learning_rate": 1.3230579115296049e-05,
      "loss": 0.7501,
      "step": 652130
    },
    {
      "epoch": 2.2855881146894617,
      "grad_norm": 3.328125,
      "learning_rate": 1.3229930086632347e-05,
      "loss": 0.9036,
      "step": 652140
    },
    {
      "epoch": 2.285623162196357,
      "grad_norm": 3.03125,
      "learning_rate": 1.3229281057968645e-05,
      "loss": 0.7398,
      "step": 652150
    },
    {
      "epoch": 2.2856582097032527,
      "grad_norm": 2.8125,
      "learning_rate": 1.3228632029304944e-05,
      "loss": 0.8671,
      "step": 652160
    },
    {
      "epoch": 2.2856932572101485,
      "grad_norm": 3.0625,
      "learning_rate": 1.3227983000641242e-05,
      "loss": 0.8652,
      "step": 652170
    },
    {
      "epoch": 2.285728304717044,
      "grad_norm": 2.4375,
      "learning_rate": 1.322733397197754e-05,
      "loss": 0.8186,
      "step": 652180
    },
    {
      "epoch": 2.2857633522239396,
      "grad_norm": 2.875,
      "learning_rate": 1.3226684943313838e-05,
      "loss": 0.8535,
      "step": 652190
    },
    {
      "epoch": 2.2857983997308353,
      "grad_norm": 3.0625,
      "learning_rate": 1.3226035914650135e-05,
      "loss": 0.8379,
      "step": 652200
    },
    {
      "epoch": 2.2858334472377306,
      "grad_norm": 2.78125,
      "learning_rate": 1.3225386885986433e-05,
      "loss": 0.8566,
      "step": 652210
    },
    {
      "epoch": 2.2858684947446264,
      "grad_norm": 3.34375,
      "learning_rate": 1.322473785732273e-05,
      "loss": 0.872,
      "step": 652220
    },
    {
      "epoch": 2.285903542251522,
      "grad_norm": 2.546875,
      "learning_rate": 1.3224088828659029e-05,
      "loss": 0.7925,
      "step": 652230
    },
    {
      "epoch": 2.2859385897584175,
      "grad_norm": 2.390625,
      "learning_rate": 1.3223439799995327e-05,
      "loss": 0.84,
      "step": 652240
    },
    {
      "epoch": 2.285973637265313,
      "grad_norm": 3.21875,
      "learning_rate": 1.3222790771331625e-05,
      "loss": 0.8168,
      "step": 652250
    },
    {
      "epoch": 2.2860086847722085,
      "grad_norm": 3.046875,
      "learning_rate": 1.3222141742667923e-05,
      "loss": 0.8476,
      "step": 652260
    },
    {
      "epoch": 2.2860437322791043,
      "grad_norm": 3.0625,
      "learning_rate": 1.3221492714004222e-05,
      "loss": 0.8127,
      "step": 652270
    },
    {
      "epoch": 2.286078779786,
      "grad_norm": 2.390625,
      "learning_rate": 1.322084368534052e-05,
      "loss": 0.7839,
      "step": 652280
    },
    {
      "epoch": 2.2861138272928954,
      "grad_norm": 2.796875,
      "learning_rate": 1.3220194656676818e-05,
      "loss": 0.8142,
      "step": 652290
    },
    {
      "epoch": 2.286148874799791,
      "grad_norm": 3.046875,
      "learning_rate": 1.3219545628013116e-05,
      "loss": 0.798,
      "step": 652300
    },
    {
      "epoch": 2.286183922306687,
      "grad_norm": 3.234375,
      "learning_rate": 1.3218896599349414e-05,
      "loss": 0.8794,
      "step": 652310
    },
    {
      "epoch": 2.286218969813582,
      "grad_norm": 2.703125,
      "learning_rate": 1.3218247570685712e-05,
      "loss": 0.7257,
      "step": 652320
    },
    {
      "epoch": 2.286254017320478,
      "grad_norm": 2.90625,
      "learning_rate": 1.321759854202201e-05,
      "loss": 0.789,
      "step": 652330
    },
    {
      "epoch": 2.2862890648273737,
      "grad_norm": 2.359375,
      "learning_rate": 1.321694951335831e-05,
      "loss": 0.8261,
      "step": 652340
    },
    {
      "epoch": 2.286324112334269,
      "grad_norm": 2.859375,
      "learning_rate": 1.3216300484694608e-05,
      "loss": 0.8231,
      "step": 652350
    },
    {
      "epoch": 2.286359159841165,
      "grad_norm": 3.203125,
      "learning_rate": 1.3215651456030906e-05,
      "loss": 0.7868,
      "step": 652360
    },
    {
      "epoch": 2.28639420734806,
      "grad_norm": 2.984375,
      "learning_rate": 1.3215002427367204e-05,
      "loss": 0.8416,
      "step": 652370
    },
    {
      "epoch": 2.286429254854956,
      "grad_norm": 3.21875,
      "learning_rate": 1.3214353398703502e-05,
      "loss": 0.7255,
      "step": 652380
    },
    {
      "epoch": 2.2864643023618516,
      "grad_norm": 3.25,
      "learning_rate": 1.3213704370039798e-05,
      "loss": 0.7754,
      "step": 652390
    },
    {
      "epoch": 2.286499349868747,
      "grad_norm": 3.09375,
      "learning_rate": 1.3213055341376096e-05,
      "loss": 0.8426,
      "step": 652400
    },
    {
      "epoch": 2.2865343973756427,
      "grad_norm": 2.6875,
      "learning_rate": 1.3212406312712394e-05,
      "loss": 0.7436,
      "step": 652410
    },
    {
      "epoch": 2.2865694448825384,
      "grad_norm": 2.609375,
      "learning_rate": 1.3211757284048692e-05,
      "loss": 0.7861,
      "step": 652420
    },
    {
      "epoch": 2.2866044923894338,
      "grad_norm": 2.734375,
      "learning_rate": 1.321110825538499e-05,
      "loss": 0.7408,
      "step": 652430
    },
    {
      "epoch": 2.2866395398963295,
      "grad_norm": 2.515625,
      "learning_rate": 1.3210459226721288e-05,
      "loss": 0.8273,
      "step": 652440
    },
    {
      "epoch": 2.2866745874032253,
      "grad_norm": 3.15625,
      "learning_rate": 1.3209810198057586e-05,
      "loss": 0.7647,
      "step": 652450
    },
    {
      "epoch": 2.2867096349101206,
      "grad_norm": 2.953125,
      "learning_rate": 1.3209161169393886e-05,
      "loss": 0.8021,
      "step": 652460
    },
    {
      "epoch": 2.2867446824170163,
      "grad_norm": 3.015625,
      "learning_rate": 1.3208512140730184e-05,
      "loss": 0.867,
      "step": 652470
    },
    {
      "epoch": 2.2867797299239117,
      "grad_norm": 3.125,
      "learning_rate": 1.3207863112066482e-05,
      "loss": 0.8181,
      "step": 652480
    },
    {
      "epoch": 2.2868147774308074,
      "grad_norm": 3.0625,
      "learning_rate": 1.320721408340278e-05,
      "loss": 0.8124,
      "step": 652490
    },
    {
      "epoch": 2.286849824937703,
      "grad_norm": 3.28125,
      "learning_rate": 1.3206565054739078e-05,
      "loss": 0.7659,
      "step": 652500
    },
    {
      "epoch": 2.2868848724445985,
      "grad_norm": 3.03125,
      "learning_rate": 1.3205916026075376e-05,
      "loss": 0.7734,
      "step": 652510
    },
    {
      "epoch": 2.2869199199514942,
      "grad_norm": 2.75,
      "learning_rate": 1.3205266997411674e-05,
      "loss": 0.7652,
      "step": 652520
    },
    {
      "epoch": 2.28695496745839,
      "grad_norm": 3.0625,
      "learning_rate": 1.3204617968747974e-05,
      "loss": 0.7631,
      "step": 652530
    },
    {
      "epoch": 2.2869900149652853,
      "grad_norm": 2.515625,
      "learning_rate": 1.3203968940084272e-05,
      "loss": 0.8122,
      "step": 652540
    },
    {
      "epoch": 2.287025062472181,
      "grad_norm": 2.9375,
      "learning_rate": 1.320331991142057e-05,
      "loss": 0.8192,
      "step": 652550
    },
    {
      "epoch": 2.287060109979077,
      "grad_norm": 2.515625,
      "learning_rate": 1.3202670882756868e-05,
      "loss": 0.7893,
      "step": 652560
    },
    {
      "epoch": 2.287095157485972,
      "grad_norm": 3.546875,
      "learning_rate": 1.3202021854093166e-05,
      "loss": 0.8858,
      "step": 652570
    },
    {
      "epoch": 2.287130204992868,
      "grad_norm": 3.171875,
      "learning_rate": 1.3201372825429462e-05,
      "loss": 0.806,
      "step": 652580
    },
    {
      "epoch": 2.287165252499763,
      "grad_norm": 2.546875,
      "learning_rate": 1.320072379676576e-05,
      "loss": 0.7684,
      "step": 652590
    },
    {
      "epoch": 2.287200300006659,
      "grad_norm": 2.421875,
      "learning_rate": 1.3200074768102058e-05,
      "loss": 0.8439,
      "step": 652600
    },
    {
      "epoch": 2.2872353475135547,
      "grad_norm": 2.875,
      "learning_rate": 1.3199425739438356e-05,
      "loss": 0.7845,
      "step": 652610
    },
    {
      "epoch": 2.28727039502045,
      "grad_norm": 3.34375,
      "learning_rate": 1.3198776710774654e-05,
      "loss": 0.8616,
      "step": 652620
    },
    {
      "epoch": 2.287305442527346,
      "grad_norm": 3.53125,
      "learning_rate": 1.3198127682110952e-05,
      "loss": 0.7777,
      "step": 652630
    },
    {
      "epoch": 2.2873404900342416,
      "grad_norm": 2.71875,
      "learning_rate": 1.3197478653447252e-05,
      "loss": 0.7943,
      "step": 652640
    },
    {
      "epoch": 2.287375537541137,
      "grad_norm": 2.796875,
      "learning_rate": 1.319682962478355e-05,
      "loss": 0.805,
      "step": 652650
    },
    {
      "epoch": 2.2874105850480326,
      "grad_norm": 2.90625,
      "learning_rate": 1.3196180596119848e-05,
      "loss": 0.7668,
      "step": 652660
    },
    {
      "epoch": 2.2874456325549284,
      "grad_norm": 2.453125,
      "learning_rate": 1.3195531567456146e-05,
      "loss": 0.8078,
      "step": 652670
    },
    {
      "epoch": 2.2874806800618237,
      "grad_norm": 2.578125,
      "learning_rate": 1.3194882538792444e-05,
      "loss": 0.8107,
      "step": 652680
    },
    {
      "epoch": 2.2875157275687195,
      "grad_norm": 3.046875,
      "learning_rate": 1.3194233510128742e-05,
      "loss": 0.8142,
      "step": 652690
    },
    {
      "epoch": 2.2875507750756148,
      "grad_norm": 2.9375,
      "learning_rate": 1.319358448146504e-05,
      "loss": 0.8129,
      "step": 652700
    },
    {
      "epoch": 2.2875858225825105,
      "grad_norm": 2.65625,
      "learning_rate": 1.319293545280134e-05,
      "loss": 0.7527,
      "step": 652710
    },
    {
      "epoch": 2.2876208700894063,
      "grad_norm": 3.0625,
      "learning_rate": 1.3192286424137638e-05,
      "loss": 0.8088,
      "step": 652720
    },
    {
      "epoch": 2.2876559175963016,
      "grad_norm": 3.078125,
      "learning_rate": 1.3191637395473936e-05,
      "loss": 0.8043,
      "step": 652730
    },
    {
      "epoch": 2.2876909651031974,
      "grad_norm": 2.90625,
      "learning_rate": 1.3190988366810234e-05,
      "loss": 0.7592,
      "step": 652740
    },
    {
      "epoch": 2.287726012610093,
      "grad_norm": 3.171875,
      "learning_rate": 1.3190339338146532e-05,
      "loss": 0.8482,
      "step": 652750
    },
    {
      "epoch": 2.2877610601169884,
      "grad_norm": 3.609375,
      "learning_rate": 1.3189690309482828e-05,
      "loss": 0.9507,
      "step": 652760
    },
    {
      "epoch": 2.287796107623884,
      "grad_norm": 2.9375,
      "learning_rate": 1.3189041280819126e-05,
      "loss": 0.798,
      "step": 652770
    },
    {
      "epoch": 2.28783115513078,
      "grad_norm": 2.953125,
      "learning_rate": 1.3188392252155424e-05,
      "loss": 0.8081,
      "step": 652780
    },
    {
      "epoch": 2.2878662026376753,
      "grad_norm": 3.15625,
      "learning_rate": 1.3187743223491722e-05,
      "loss": 0.8198,
      "step": 652790
    },
    {
      "epoch": 2.287901250144571,
      "grad_norm": 2.71875,
      "learning_rate": 1.318709419482802e-05,
      "loss": 0.8168,
      "step": 652800
    },
    {
      "epoch": 2.2879362976514663,
      "grad_norm": 2.671875,
      "learning_rate": 1.3186445166164318e-05,
      "loss": 0.773,
      "step": 652810
    },
    {
      "epoch": 2.287971345158362,
      "grad_norm": 2.359375,
      "learning_rate": 1.3185796137500618e-05,
      "loss": 0.7855,
      "step": 652820
    },
    {
      "epoch": 2.288006392665258,
      "grad_norm": 3.390625,
      "learning_rate": 1.3185147108836916e-05,
      "loss": 0.8329,
      "step": 652830
    },
    {
      "epoch": 2.288041440172153,
      "grad_norm": 3.34375,
      "learning_rate": 1.3184498080173214e-05,
      "loss": 0.8559,
      "step": 652840
    },
    {
      "epoch": 2.288076487679049,
      "grad_norm": 3.09375,
      "learning_rate": 1.3183849051509512e-05,
      "loss": 0.7588,
      "step": 652850
    },
    {
      "epoch": 2.2881115351859447,
      "grad_norm": 2.9375,
      "learning_rate": 1.318320002284581e-05,
      "loss": 0.7253,
      "step": 652860
    },
    {
      "epoch": 2.28814658269284,
      "grad_norm": 3.359375,
      "learning_rate": 1.3182550994182108e-05,
      "loss": 0.8093,
      "step": 652870
    },
    {
      "epoch": 2.2881816301997357,
      "grad_norm": 2.484375,
      "learning_rate": 1.3181901965518406e-05,
      "loss": 0.8211,
      "step": 652880
    },
    {
      "epoch": 2.2882166777066315,
      "grad_norm": 2.90625,
      "learning_rate": 1.3181252936854705e-05,
      "loss": 0.8234,
      "step": 652890
    },
    {
      "epoch": 2.288251725213527,
      "grad_norm": 2.671875,
      "learning_rate": 1.3180603908191003e-05,
      "loss": 0.8446,
      "step": 652900
    },
    {
      "epoch": 2.2882867727204226,
      "grad_norm": 2.953125,
      "learning_rate": 1.3179954879527301e-05,
      "loss": 0.8198,
      "step": 652910
    },
    {
      "epoch": 2.288321820227318,
      "grad_norm": 2.84375,
      "learning_rate": 1.31793058508636e-05,
      "loss": 0.8246,
      "step": 652920
    },
    {
      "epoch": 2.2883568677342137,
      "grad_norm": 2.78125,
      "learning_rate": 1.3178656822199897e-05,
      "loss": 0.7232,
      "step": 652930
    },
    {
      "epoch": 2.2883919152411094,
      "grad_norm": 2.78125,
      "learning_rate": 1.3178007793536195e-05,
      "loss": 0.7968,
      "step": 652940
    },
    {
      "epoch": 2.288426962748005,
      "grad_norm": 3.0,
      "learning_rate": 1.3177358764872492e-05,
      "loss": 0.728,
      "step": 652950
    },
    {
      "epoch": 2.2884620102549005,
      "grad_norm": 2.828125,
      "learning_rate": 1.317670973620879e-05,
      "loss": 0.8679,
      "step": 652960
    },
    {
      "epoch": 2.2884970577617962,
      "grad_norm": 2.453125,
      "learning_rate": 1.3176060707545088e-05,
      "loss": 0.9102,
      "step": 652970
    },
    {
      "epoch": 2.2885321052686916,
      "grad_norm": 3.09375,
      "learning_rate": 1.3175411678881386e-05,
      "loss": 0.8206,
      "step": 652980
    },
    {
      "epoch": 2.2885671527755873,
      "grad_norm": 3.296875,
      "learning_rate": 1.3174762650217684e-05,
      "loss": 0.7963,
      "step": 652990
    },
    {
      "epoch": 2.288602200282483,
      "grad_norm": 2.828125,
      "learning_rate": 1.3174113621553982e-05,
      "loss": 0.8955,
      "step": 653000
    },
    {
      "epoch": 2.2886372477893784,
      "grad_norm": 2.8125,
      "learning_rate": 1.3173464592890281e-05,
      "loss": 0.812,
      "step": 653010
    },
    {
      "epoch": 2.288672295296274,
      "grad_norm": 3.125,
      "learning_rate": 1.317281556422658e-05,
      "loss": 0.8065,
      "step": 653020
    },
    {
      "epoch": 2.2887073428031695,
      "grad_norm": 2.765625,
      "learning_rate": 1.3172166535562877e-05,
      "loss": 0.7624,
      "step": 653030
    },
    {
      "epoch": 2.288742390310065,
      "grad_norm": 3.03125,
      "learning_rate": 1.3171517506899175e-05,
      "loss": 0.8897,
      "step": 653040
    },
    {
      "epoch": 2.288777437816961,
      "grad_norm": 2.953125,
      "learning_rate": 1.3170868478235473e-05,
      "loss": 0.8581,
      "step": 653050
    },
    {
      "epoch": 2.2888124853238567,
      "grad_norm": 2.953125,
      "learning_rate": 1.3170219449571771e-05,
      "loss": 0.8414,
      "step": 653060
    },
    {
      "epoch": 2.288847532830752,
      "grad_norm": 3.328125,
      "learning_rate": 1.316957042090807e-05,
      "loss": 0.8357,
      "step": 653070
    },
    {
      "epoch": 2.288882580337648,
      "grad_norm": 3.265625,
      "learning_rate": 1.3168921392244369e-05,
      "loss": 0.803,
      "step": 653080
    },
    {
      "epoch": 2.288917627844543,
      "grad_norm": 2.796875,
      "learning_rate": 1.3168272363580667e-05,
      "loss": 0.8304,
      "step": 653090
    },
    {
      "epoch": 2.288952675351439,
      "grad_norm": 3.03125,
      "learning_rate": 1.3167623334916965e-05,
      "loss": 0.7485,
      "step": 653100
    },
    {
      "epoch": 2.2889877228583346,
      "grad_norm": 2.578125,
      "learning_rate": 1.3166974306253263e-05,
      "loss": 0.8121,
      "step": 653110
    },
    {
      "epoch": 2.28902277036523,
      "grad_norm": 2.8125,
      "learning_rate": 1.3166325277589561e-05,
      "loss": 0.7856,
      "step": 653120
    },
    {
      "epoch": 2.2890578178721257,
      "grad_norm": 2.984375,
      "learning_rate": 1.3165676248925859e-05,
      "loss": 0.7576,
      "step": 653130
    },
    {
      "epoch": 2.2890928653790215,
      "grad_norm": 2.84375,
      "learning_rate": 1.3165027220262155e-05,
      "loss": 0.7984,
      "step": 653140
    },
    {
      "epoch": 2.2891279128859168,
      "grad_norm": 3.03125,
      "learning_rate": 1.3164378191598453e-05,
      "loss": 0.8445,
      "step": 653150
    },
    {
      "epoch": 2.2891629603928125,
      "grad_norm": 3.375,
      "learning_rate": 1.3163729162934751e-05,
      "loss": 0.8248,
      "step": 653160
    },
    {
      "epoch": 2.2891980078997083,
      "grad_norm": 2.90625,
      "learning_rate": 1.316308013427105e-05,
      "loss": 0.8544,
      "step": 653170
    },
    {
      "epoch": 2.2892330554066036,
      "grad_norm": 2.875,
      "learning_rate": 1.3162431105607347e-05,
      "loss": 0.8659,
      "step": 653180
    },
    {
      "epoch": 2.2892681029134994,
      "grad_norm": 3.0625,
      "learning_rate": 1.3161782076943647e-05,
      "loss": 0.8609,
      "step": 653190
    },
    {
      "epoch": 2.2893031504203947,
      "grad_norm": 3.3125,
      "learning_rate": 1.3161133048279945e-05,
      "loss": 0.7684,
      "step": 653200
    },
    {
      "epoch": 2.2893381979272904,
      "grad_norm": 2.640625,
      "learning_rate": 1.3160484019616243e-05,
      "loss": 0.8351,
      "step": 653210
    },
    {
      "epoch": 2.289373245434186,
      "grad_norm": 2.671875,
      "learning_rate": 1.3159834990952541e-05,
      "loss": 0.7744,
      "step": 653220
    },
    {
      "epoch": 2.2894082929410815,
      "grad_norm": 3.046875,
      "learning_rate": 1.3159185962288839e-05,
      "loss": 0.8193,
      "step": 653230
    },
    {
      "epoch": 2.2894433404479773,
      "grad_norm": 2.5625,
      "learning_rate": 1.3158536933625137e-05,
      "loss": 0.7202,
      "step": 653240
    },
    {
      "epoch": 2.289478387954873,
      "grad_norm": 3.140625,
      "learning_rate": 1.3157887904961435e-05,
      "loss": 0.8037,
      "step": 653250
    },
    {
      "epoch": 2.2895134354617683,
      "grad_norm": 3.28125,
      "learning_rate": 1.3157238876297735e-05,
      "loss": 0.8197,
      "step": 653260
    },
    {
      "epoch": 2.289548482968664,
      "grad_norm": 2.78125,
      "learning_rate": 1.3156589847634033e-05,
      "loss": 0.7551,
      "step": 653270
    },
    {
      "epoch": 2.28958353047556,
      "grad_norm": 3.171875,
      "learning_rate": 1.315594081897033e-05,
      "loss": 0.7608,
      "step": 653280
    },
    {
      "epoch": 2.289618577982455,
      "grad_norm": 2.703125,
      "learning_rate": 1.3155291790306629e-05,
      "loss": 0.7636,
      "step": 653290
    },
    {
      "epoch": 2.289653625489351,
      "grad_norm": 2.984375,
      "learning_rate": 1.3154642761642927e-05,
      "loss": 0.8545,
      "step": 653300
    },
    {
      "epoch": 2.2896886729962462,
      "grad_norm": 3.328125,
      "learning_rate": 1.3153993732979225e-05,
      "loss": 0.728,
      "step": 653310
    },
    {
      "epoch": 2.289723720503142,
      "grad_norm": 2.75,
      "learning_rate": 1.3153344704315523e-05,
      "loss": 0.8048,
      "step": 653320
    },
    {
      "epoch": 2.2897587680100377,
      "grad_norm": 3.59375,
      "learning_rate": 1.3152695675651819e-05,
      "loss": 0.8541,
      "step": 653330
    },
    {
      "epoch": 2.289793815516933,
      "grad_norm": 3.1875,
      "learning_rate": 1.3152046646988117e-05,
      "loss": 0.8439,
      "step": 653340
    },
    {
      "epoch": 2.289828863023829,
      "grad_norm": 3.0625,
      "learning_rate": 1.3151397618324415e-05,
      "loss": 0.788,
      "step": 653350
    },
    {
      "epoch": 2.2898639105307246,
      "grad_norm": 2.6875,
      "learning_rate": 1.3150748589660713e-05,
      "loss": 0.7952,
      "step": 653360
    },
    {
      "epoch": 2.28989895803762,
      "grad_norm": 2.515625,
      "learning_rate": 1.3150099560997013e-05,
      "loss": 0.8311,
      "step": 653370
    },
    {
      "epoch": 2.2899340055445156,
      "grad_norm": 3.0,
      "learning_rate": 1.314945053233331e-05,
      "loss": 0.7521,
      "step": 653380
    },
    {
      "epoch": 2.2899690530514114,
      "grad_norm": 3.390625,
      "learning_rate": 1.3148801503669609e-05,
      "loss": 0.8215,
      "step": 653390
    },
    {
      "epoch": 2.2900041005583067,
      "grad_norm": 3.09375,
      "learning_rate": 1.3148152475005907e-05,
      "loss": 0.7408,
      "step": 653400
    },
    {
      "epoch": 2.2900391480652025,
      "grad_norm": 3.15625,
      "learning_rate": 1.3147503446342205e-05,
      "loss": 0.9035,
      "step": 653410
    },
    {
      "epoch": 2.290074195572098,
      "grad_norm": 2.6875,
      "learning_rate": 1.3146854417678503e-05,
      "loss": 0.8478,
      "step": 653420
    },
    {
      "epoch": 2.2901092430789936,
      "grad_norm": 2.9375,
      "learning_rate": 1.31462053890148e-05,
      "loss": 0.7942,
      "step": 653430
    },
    {
      "epoch": 2.2901442905858893,
      "grad_norm": 2.890625,
      "learning_rate": 1.31455563603511e-05,
      "loss": 0.9414,
      "step": 653440
    },
    {
      "epoch": 2.2901793380927846,
      "grad_norm": 2.90625,
      "learning_rate": 1.3144907331687398e-05,
      "loss": 0.7198,
      "step": 653450
    },
    {
      "epoch": 2.2902143855996804,
      "grad_norm": 3.78125,
      "learning_rate": 1.3144258303023696e-05,
      "loss": 0.7838,
      "step": 653460
    },
    {
      "epoch": 2.290249433106576,
      "grad_norm": 2.890625,
      "learning_rate": 1.3143609274359994e-05,
      "loss": 0.806,
      "step": 653470
    },
    {
      "epoch": 2.2902844806134715,
      "grad_norm": 3.390625,
      "learning_rate": 1.3142960245696292e-05,
      "loss": 0.8527,
      "step": 653480
    },
    {
      "epoch": 2.290319528120367,
      "grad_norm": 2.953125,
      "learning_rate": 1.314231121703259e-05,
      "loss": 0.862,
      "step": 653490
    },
    {
      "epoch": 2.290354575627263,
      "grad_norm": 3.109375,
      "learning_rate": 1.3141662188368888e-05,
      "loss": 0.8495,
      "step": 653500
    },
    {
      "epoch": 2.2903896231341583,
      "grad_norm": 2.6875,
      "learning_rate": 1.3141013159705188e-05,
      "loss": 0.7216,
      "step": 653510
    },
    {
      "epoch": 2.290424670641054,
      "grad_norm": 2.828125,
      "learning_rate": 1.3140364131041483e-05,
      "loss": 0.84,
      "step": 653520
    },
    {
      "epoch": 2.2904597181479494,
      "grad_norm": 2.953125,
      "learning_rate": 1.313971510237778e-05,
      "loss": 0.7362,
      "step": 653530
    },
    {
      "epoch": 2.290494765654845,
      "grad_norm": 2.921875,
      "learning_rate": 1.3139066073714079e-05,
      "loss": 0.7337,
      "step": 653540
    },
    {
      "epoch": 2.290529813161741,
      "grad_norm": 3.375,
      "learning_rate": 1.3138417045050377e-05,
      "loss": 0.7584,
      "step": 653550
    },
    {
      "epoch": 2.290564860668636,
      "grad_norm": 2.734375,
      "learning_rate": 1.3137768016386676e-05,
      "loss": 0.8453,
      "step": 653560
    },
    {
      "epoch": 2.290599908175532,
      "grad_norm": 3.1875,
      "learning_rate": 1.3137118987722974e-05,
      "loss": 0.7922,
      "step": 653570
    },
    {
      "epoch": 2.2906349556824277,
      "grad_norm": 2.9375,
      "learning_rate": 1.3136469959059272e-05,
      "loss": 0.7232,
      "step": 653580
    },
    {
      "epoch": 2.290670003189323,
      "grad_norm": 3.171875,
      "learning_rate": 1.313582093039557e-05,
      "loss": 0.7987,
      "step": 653590
    },
    {
      "epoch": 2.2907050506962188,
      "grad_norm": 2.875,
      "learning_rate": 1.3135171901731868e-05,
      "loss": 0.7585,
      "step": 653600
    },
    {
      "epoch": 2.2907400982031145,
      "grad_norm": 2.4375,
      "learning_rate": 1.3134522873068166e-05,
      "loss": 0.8681,
      "step": 653610
    },
    {
      "epoch": 2.29077514571001,
      "grad_norm": 3.015625,
      "learning_rate": 1.3133873844404464e-05,
      "loss": 0.8014,
      "step": 653620
    },
    {
      "epoch": 2.2908101932169056,
      "grad_norm": 2.546875,
      "learning_rate": 1.3133224815740764e-05,
      "loss": 0.7947,
      "step": 653630
    },
    {
      "epoch": 2.290845240723801,
      "grad_norm": 2.5,
      "learning_rate": 1.3132575787077062e-05,
      "loss": 0.7757,
      "step": 653640
    },
    {
      "epoch": 2.2908802882306967,
      "grad_norm": 2.703125,
      "learning_rate": 1.313192675841336e-05,
      "loss": 0.8367,
      "step": 653650
    },
    {
      "epoch": 2.2909153357375924,
      "grad_norm": 3.046875,
      "learning_rate": 1.3131277729749658e-05,
      "loss": 0.8187,
      "step": 653660
    },
    {
      "epoch": 2.2909503832444877,
      "grad_norm": 2.703125,
      "learning_rate": 1.3130628701085956e-05,
      "loss": 0.9149,
      "step": 653670
    },
    {
      "epoch": 2.2909854307513835,
      "grad_norm": 3.140625,
      "learning_rate": 1.3129979672422254e-05,
      "loss": 0.8703,
      "step": 653680
    },
    {
      "epoch": 2.2910204782582793,
      "grad_norm": 2.671875,
      "learning_rate": 1.3129330643758552e-05,
      "loss": 0.8266,
      "step": 653690
    },
    {
      "epoch": 2.2910555257651746,
      "grad_norm": 3.1875,
      "learning_rate": 1.3128681615094852e-05,
      "loss": 0.8524,
      "step": 653700
    },
    {
      "epoch": 2.2910905732720703,
      "grad_norm": 2.921875,
      "learning_rate": 1.3128032586431146e-05,
      "loss": 0.7858,
      "step": 653710
    },
    {
      "epoch": 2.291125620778966,
      "grad_norm": 3.15625,
      "learning_rate": 1.3127383557767444e-05,
      "loss": 0.7387,
      "step": 653720
    },
    {
      "epoch": 2.2911606682858614,
      "grad_norm": 2.8125,
      "learning_rate": 1.3126734529103742e-05,
      "loss": 0.7966,
      "step": 653730
    },
    {
      "epoch": 2.291195715792757,
      "grad_norm": 3.15625,
      "learning_rate": 1.3126085500440042e-05,
      "loss": 0.8039,
      "step": 653740
    },
    {
      "epoch": 2.2912307632996525,
      "grad_norm": 2.953125,
      "learning_rate": 1.312543647177634e-05,
      "loss": 0.8536,
      "step": 653750
    },
    {
      "epoch": 2.2912658108065482,
      "grad_norm": 2.921875,
      "learning_rate": 1.3124787443112638e-05,
      "loss": 0.8488,
      "step": 653760
    },
    {
      "epoch": 2.291300858313444,
      "grad_norm": 2.8125,
      "learning_rate": 1.3124138414448936e-05,
      "loss": 0.825,
      "step": 653770
    },
    {
      "epoch": 2.2913359058203393,
      "grad_norm": 3.015625,
      "learning_rate": 1.3123489385785234e-05,
      "loss": 0.7415,
      "step": 653780
    },
    {
      "epoch": 2.291370953327235,
      "grad_norm": 2.4375,
      "learning_rate": 1.3122840357121532e-05,
      "loss": 0.779,
      "step": 653790
    },
    {
      "epoch": 2.291406000834131,
      "grad_norm": 2.90625,
      "learning_rate": 1.312219132845783e-05,
      "loss": 0.7803,
      "step": 653800
    },
    {
      "epoch": 2.291441048341026,
      "grad_norm": 2.765625,
      "learning_rate": 1.312154229979413e-05,
      "loss": 0.7602,
      "step": 653810
    },
    {
      "epoch": 2.291476095847922,
      "grad_norm": 2.59375,
      "learning_rate": 1.3120893271130428e-05,
      "loss": 0.7851,
      "step": 653820
    },
    {
      "epoch": 2.2915111433548176,
      "grad_norm": 3.09375,
      "learning_rate": 1.3120244242466726e-05,
      "loss": 0.7918,
      "step": 653830
    },
    {
      "epoch": 2.291546190861713,
      "grad_norm": 2.671875,
      "learning_rate": 1.3119595213803024e-05,
      "loss": 0.8141,
      "step": 653840
    },
    {
      "epoch": 2.2915812383686087,
      "grad_norm": 2.546875,
      "learning_rate": 1.3118946185139322e-05,
      "loss": 0.7989,
      "step": 653850
    },
    {
      "epoch": 2.291616285875504,
      "grad_norm": 3.03125,
      "learning_rate": 1.311829715647562e-05,
      "loss": 0.7855,
      "step": 653860
    },
    {
      "epoch": 2.2916513333824,
      "grad_norm": 3.109375,
      "learning_rate": 1.3117648127811918e-05,
      "loss": 0.829,
      "step": 653870
    },
    {
      "epoch": 2.2916863808892955,
      "grad_norm": 2.796875,
      "learning_rate": 1.3116999099148217e-05,
      "loss": 0.8067,
      "step": 653880
    },
    {
      "epoch": 2.291721428396191,
      "grad_norm": 3.046875,
      "learning_rate": 1.3116350070484512e-05,
      "loss": 0.8566,
      "step": 653890
    },
    {
      "epoch": 2.2917564759030866,
      "grad_norm": 2.890625,
      "learning_rate": 1.311570104182081e-05,
      "loss": 0.7335,
      "step": 653900
    },
    {
      "epoch": 2.2917915234099824,
      "grad_norm": 2.875,
      "learning_rate": 1.3115052013157108e-05,
      "loss": 0.8238,
      "step": 653910
    },
    {
      "epoch": 2.2918265709168777,
      "grad_norm": 3.0625,
      "learning_rate": 1.3114402984493408e-05,
      "loss": 0.802,
      "step": 653920
    },
    {
      "epoch": 2.2918616184237735,
      "grad_norm": 3.328125,
      "learning_rate": 1.3113753955829706e-05,
      "loss": 0.7324,
      "step": 653930
    },
    {
      "epoch": 2.291896665930669,
      "grad_norm": 2.765625,
      "learning_rate": 1.3113104927166004e-05,
      "loss": 0.9061,
      "step": 653940
    },
    {
      "epoch": 2.2919317134375645,
      "grad_norm": 2.515625,
      "learning_rate": 1.3112455898502302e-05,
      "loss": 0.8444,
      "step": 653950
    },
    {
      "epoch": 2.2919667609444603,
      "grad_norm": 3.078125,
      "learning_rate": 1.31118068698386e-05,
      "loss": 0.8631,
      "step": 653960
    },
    {
      "epoch": 2.2920018084513556,
      "grad_norm": 2.890625,
      "learning_rate": 1.3111157841174898e-05,
      "loss": 0.8016,
      "step": 653970
    },
    {
      "epoch": 2.2920368559582514,
      "grad_norm": 2.78125,
      "learning_rate": 1.3110508812511196e-05,
      "loss": 0.8357,
      "step": 653980
    },
    {
      "epoch": 2.292071903465147,
      "grad_norm": 2.796875,
      "learning_rate": 1.3109859783847495e-05,
      "loss": 0.7939,
      "step": 653990
    },
    {
      "epoch": 2.2921069509720424,
      "grad_norm": 2.890625,
      "learning_rate": 1.3109210755183793e-05,
      "loss": 0.807,
      "step": 654000
    },
    {
      "epoch": 2.292141998478938,
      "grad_norm": 3.015625,
      "learning_rate": 1.3108561726520091e-05,
      "loss": 0.817,
      "step": 654010
    },
    {
      "epoch": 2.292177045985834,
      "grad_norm": 2.671875,
      "learning_rate": 1.310791269785639e-05,
      "loss": 0.7851,
      "step": 654020
    },
    {
      "epoch": 2.2922120934927293,
      "grad_norm": 2.25,
      "learning_rate": 1.3107263669192687e-05,
      "loss": 0.8134,
      "step": 654030
    },
    {
      "epoch": 2.292247140999625,
      "grad_norm": 2.65625,
      "learning_rate": 1.3106614640528985e-05,
      "loss": 0.7683,
      "step": 654040
    },
    {
      "epoch": 2.2922821885065208,
      "grad_norm": 3.125,
      "learning_rate": 1.3105965611865283e-05,
      "loss": 0.9047,
      "step": 654050
    },
    {
      "epoch": 2.292317236013416,
      "grad_norm": 2.625,
      "learning_rate": 1.3105316583201583e-05,
      "loss": 0.7983,
      "step": 654060
    },
    {
      "epoch": 2.292352283520312,
      "grad_norm": 2.84375,
      "learning_rate": 1.3104667554537881e-05,
      "loss": 0.8465,
      "step": 654070
    },
    {
      "epoch": 2.292387331027207,
      "grad_norm": 2.625,
      "learning_rate": 1.3104018525874176e-05,
      "loss": 0.7255,
      "step": 654080
    },
    {
      "epoch": 2.292422378534103,
      "grad_norm": 2.671875,
      "learning_rate": 1.3103369497210474e-05,
      "loss": 0.8095,
      "step": 654090
    },
    {
      "epoch": 2.2924574260409987,
      "grad_norm": 2.859375,
      "learning_rate": 1.3102720468546772e-05,
      "loss": 0.8353,
      "step": 654100
    },
    {
      "epoch": 2.292492473547894,
      "grad_norm": 3.421875,
      "learning_rate": 1.3102071439883071e-05,
      "loss": 0.8626,
      "step": 654110
    },
    {
      "epoch": 2.2925275210547897,
      "grad_norm": 3.625,
      "learning_rate": 1.310142241121937e-05,
      "loss": 0.8217,
      "step": 654120
    },
    {
      "epoch": 2.2925625685616855,
      "grad_norm": 2.921875,
      "learning_rate": 1.3100773382555667e-05,
      "loss": 0.8127,
      "step": 654130
    },
    {
      "epoch": 2.292597616068581,
      "grad_norm": 3.0,
      "learning_rate": 1.3100124353891965e-05,
      "loss": 0.8384,
      "step": 654140
    },
    {
      "epoch": 2.2926326635754766,
      "grad_norm": 2.765625,
      "learning_rate": 1.3099475325228263e-05,
      "loss": 0.7974,
      "step": 654150
    },
    {
      "epoch": 2.2926677110823723,
      "grad_norm": 2.78125,
      "learning_rate": 1.3098826296564561e-05,
      "loss": 0.7777,
      "step": 654160
    },
    {
      "epoch": 2.2927027585892676,
      "grad_norm": 2.515625,
      "learning_rate": 1.309817726790086e-05,
      "loss": 0.7598,
      "step": 654170
    },
    {
      "epoch": 2.2927378060961634,
      "grad_norm": 2.796875,
      "learning_rate": 1.3097528239237159e-05,
      "loss": 0.7305,
      "step": 654180
    },
    {
      "epoch": 2.2927728536030587,
      "grad_norm": 2.96875,
      "learning_rate": 1.3096879210573457e-05,
      "loss": 0.8716,
      "step": 654190
    },
    {
      "epoch": 2.2928079011099545,
      "grad_norm": 3.328125,
      "learning_rate": 1.3096230181909755e-05,
      "loss": 0.8499,
      "step": 654200
    },
    {
      "epoch": 2.2928429486168502,
      "grad_norm": 2.703125,
      "learning_rate": 1.3095581153246053e-05,
      "loss": 0.8075,
      "step": 654210
    },
    {
      "epoch": 2.2928779961237455,
      "grad_norm": 3.171875,
      "learning_rate": 1.3094932124582351e-05,
      "loss": 0.8455,
      "step": 654220
    },
    {
      "epoch": 2.2929130436306413,
      "grad_norm": 3.90625,
      "learning_rate": 1.3094283095918649e-05,
      "loss": 0.8602,
      "step": 654230
    },
    {
      "epoch": 2.292948091137537,
      "grad_norm": 2.71875,
      "learning_rate": 1.3093634067254947e-05,
      "loss": 0.7775,
      "step": 654240
    },
    {
      "epoch": 2.2929831386444324,
      "grad_norm": 3.25,
      "learning_rate": 1.3092985038591247e-05,
      "loss": 0.7884,
      "step": 654250
    },
    {
      "epoch": 2.293018186151328,
      "grad_norm": 2.515625,
      "learning_rate": 1.3092336009927545e-05,
      "loss": 0.7935,
      "step": 654260
    },
    {
      "epoch": 2.293053233658224,
      "grad_norm": 3.40625,
      "learning_rate": 1.309168698126384e-05,
      "loss": 0.824,
      "step": 654270
    },
    {
      "epoch": 2.293088281165119,
      "grad_norm": 2.96875,
      "learning_rate": 1.3091037952600137e-05,
      "loss": 0.7508,
      "step": 654280
    },
    {
      "epoch": 2.293123328672015,
      "grad_norm": 3.15625,
      "learning_rate": 1.3090388923936437e-05,
      "loss": 0.8372,
      "step": 654290
    },
    {
      "epoch": 2.2931583761789103,
      "grad_norm": 2.78125,
      "learning_rate": 1.3089739895272735e-05,
      "loss": 0.8052,
      "step": 654300
    },
    {
      "epoch": 2.293193423685806,
      "grad_norm": 2.953125,
      "learning_rate": 1.3089090866609033e-05,
      "loss": 0.8589,
      "step": 654310
    },
    {
      "epoch": 2.293228471192702,
      "grad_norm": 3.34375,
      "learning_rate": 1.3088441837945331e-05,
      "loss": 0.8749,
      "step": 654320
    },
    {
      "epoch": 2.2932635186995975,
      "grad_norm": 3.0625,
      "learning_rate": 1.3087792809281629e-05,
      "loss": 0.8297,
      "step": 654330
    },
    {
      "epoch": 2.293298566206493,
      "grad_norm": 3.015625,
      "learning_rate": 1.3087143780617927e-05,
      "loss": 0.7706,
      "step": 654340
    },
    {
      "epoch": 2.2933336137133886,
      "grad_norm": 2.703125,
      "learning_rate": 1.3086494751954225e-05,
      "loss": 0.8453,
      "step": 654350
    },
    {
      "epoch": 2.293368661220284,
      "grad_norm": 2.65625,
      "learning_rate": 1.3085845723290525e-05,
      "loss": 0.8524,
      "step": 654360
    },
    {
      "epoch": 2.2934037087271797,
      "grad_norm": 3.15625,
      "learning_rate": 1.3085196694626823e-05,
      "loss": 0.8454,
      "step": 654370
    },
    {
      "epoch": 2.2934387562340754,
      "grad_norm": 2.578125,
      "learning_rate": 1.308454766596312e-05,
      "loss": 0.8487,
      "step": 654380
    },
    {
      "epoch": 2.2934738037409708,
      "grad_norm": 3.15625,
      "learning_rate": 1.3083898637299419e-05,
      "loss": 0.791,
      "step": 654390
    },
    {
      "epoch": 2.2935088512478665,
      "grad_norm": 2.75,
      "learning_rate": 1.3083249608635717e-05,
      "loss": 0.7754,
      "step": 654400
    },
    {
      "epoch": 2.293543898754762,
      "grad_norm": 2.359375,
      "learning_rate": 1.3082600579972015e-05,
      "loss": 0.791,
      "step": 654410
    },
    {
      "epoch": 2.2935789462616576,
      "grad_norm": 3.0625,
      "learning_rate": 1.3081951551308313e-05,
      "loss": 0.809,
      "step": 654420
    },
    {
      "epoch": 2.2936139937685533,
      "grad_norm": 2.953125,
      "learning_rate": 1.3081302522644612e-05,
      "loss": 0.8241,
      "step": 654430
    },
    {
      "epoch": 2.293649041275449,
      "grad_norm": 2.46875,
      "learning_rate": 1.308065349398091e-05,
      "loss": 0.8092,
      "step": 654440
    },
    {
      "epoch": 2.2936840887823444,
      "grad_norm": 2.5,
      "learning_rate": 1.3080004465317208e-05,
      "loss": 0.7455,
      "step": 654450
    },
    {
      "epoch": 2.29371913628924,
      "grad_norm": 2.65625,
      "learning_rate": 1.3079355436653503e-05,
      "loss": 0.795,
      "step": 654460
    },
    {
      "epoch": 2.2937541837961355,
      "grad_norm": 2.453125,
      "learning_rate": 1.3078706407989803e-05,
      "loss": 0.7298,
      "step": 654470
    },
    {
      "epoch": 2.2937892313030313,
      "grad_norm": 2.515625,
      "learning_rate": 1.30780573793261e-05,
      "loss": 0.7468,
      "step": 654480
    },
    {
      "epoch": 2.293824278809927,
      "grad_norm": 3.484375,
      "learning_rate": 1.3077408350662399e-05,
      "loss": 0.8166,
      "step": 654490
    },
    {
      "epoch": 2.2938593263168223,
      "grad_norm": 2.953125,
      "learning_rate": 1.3076759321998697e-05,
      "loss": 0.8707,
      "step": 654500
    },
    {
      "epoch": 2.293894373823718,
      "grad_norm": 3.09375,
      "learning_rate": 1.3076110293334995e-05,
      "loss": 0.8638,
      "step": 654510
    },
    {
      "epoch": 2.293929421330614,
      "grad_norm": 3.34375,
      "learning_rate": 1.3075461264671293e-05,
      "loss": 0.767,
      "step": 654520
    },
    {
      "epoch": 2.293964468837509,
      "grad_norm": 2.6875,
      "learning_rate": 1.307481223600759e-05,
      "loss": 0.8698,
      "step": 654530
    },
    {
      "epoch": 2.293999516344405,
      "grad_norm": 2.75,
      "learning_rate": 1.307416320734389e-05,
      "loss": 0.8428,
      "step": 654540
    },
    {
      "epoch": 2.2940345638513007,
      "grad_norm": 3.0625,
      "learning_rate": 1.3073514178680188e-05,
      "loss": 0.8361,
      "step": 654550
    },
    {
      "epoch": 2.294069611358196,
      "grad_norm": 3.46875,
      "learning_rate": 1.3072865150016486e-05,
      "loss": 0.865,
      "step": 654560
    },
    {
      "epoch": 2.2941046588650917,
      "grad_norm": 2.703125,
      "learning_rate": 1.3072216121352784e-05,
      "loss": 0.7959,
      "step": 654570
    },
    {
      "epoch": 2.294139706371987,
      "grad_norm": 3.1875,
      "learning_rate": 1.3071567092689082e-05,
      "loss": 0.7662,
      "step": 654580
    },
    {
      "epoch": 2.294174753878883,
      "grad_norm": 2.625,
      "learning_rate": 1.307091806402538e-05,
      "loss": 0.7766,
      "step": 654590
    },
    {
      "epoch": 2.2942098013857786,
      "grad_norm": 3.078125,
      "learning_rate": 1.3070269035361678e-05,
      "loss": 0.8309,
      "step": 654600
    },
    {
      "epoch": 2.294244848892674,
      "grad_norm": 3.234375,
      "learning_rate": 1.3069620006697978e-05,
      "loss": 0.8053,
      "step": 654610
    },
    {
      "epoch": 2.2942798963995696,
      "grad_norm": 2.5,
      "learning_rate": 1.3068970978034276e-05,
      "loss": 0.7931,
      "step": 654620
    },
    {
      "epoch": 2.2943149439064654,
      "grad_norm": 2.78125,
      "learning_rate": 1.3068321949370574e-05,
      "loss": 0.8302,
      "step": 654630
    },
    {
      "epoch": 2.2943499914133607,
      "grad_norm": 3.15625,
      "learning_rate": 1.3067672920706872e-05,
      "loss": 0.8076,
      "step": 654640
    },
    {
      "epoch": 2.2943850389202565,
      "grad_norm": 2.90625,
      "learning_rate": 1.3067023892043167e-05,
      "loss": 0.7539,
      "step": 654650
    },
    {
      "epoch": 2.2944200864271522,
      "grad_norm": 3.3125,
      "learning_rate": 1.3066374863379466e-05,
      "loss": 0.9007,
      "step": 654660
    },
    {
      "epoch": 2.2944551339340475,
      "grad_norm": 3.234375,
      "learning_rate": 1.3065725834715764e-05,
      "loss": 0.7681,
      "step": 654670
    },
    {
      "epoch": 2.2944901814409433,
      "grad_norm": 3.171875,
      "learning_rate": 1.3065076806052062e-05,
      "loss": 0.8582,
      "step": 654680
    },
    {
      "epoch": 2.2945252289478386,
      "grad_norm": 2.53125,
      "learning_rate": 1.306442777738836e-05,
      "loss": 0.8249,
      "step": 654690
    },
    {
      "epoch": 2.2945602764547344,
      "grad_norm": 2.65625,
      "learning_rate": 1.3063778748724658e-05,
      "loss": 0.8187,
      "step": 654700
    },
    {
      "epoch": 2.29459532396163,
      "grad_norm": 2.84375,
      "learning_rate": 1.3063129720060956e-05,
      "loss": 0.7682,
      "step": 654710
    },
    {
      "epoch": 2.2946303714685254,
      "grad_norm": 2.890625,
      "learning_rate": 1.3062480691397254e-05,
      "loss": 0.8388,
      "step": 654720
    },
    {
      "epoch": 2.294665418975421,
      "grad_norm": 2.921875,
      "learning_rate": 1.3061831662733554e-05,
      "loss": 0.844,
      "step": 654730
    },
    {
      "epoch": 2.294700466482317,
      "grad_norm": 3.46875,
      "learning_rate": 1.3061182634069852e-05,
      "loss": 0.8982,
      "step": 654740
    },
    {
      "epoch": 2.2947355139892123,
      "grad_norm": 2.5625,
      "learning_rate": 1.306053360540615e-05,
      "loss": 0.7,
      "step": 654750
    },
    {
      "epoch": 2.294770561496108,
      "grad_norm": 3.15625,
      "learning_rate": 1.3059884576742448e-05,
      "loss": 0.8954,
      "step": 654760
    },
    {
      "epoch": 2.294805609003004,
      "grad_norm": 2.640625,
      "learning_rate": 1.3059235548078746e-05,
      "loss": 0.7304,
      "step": 654770
    },
    {
      "epoch": 2.294840656509899,
      "grad_norm": 3.21875,
      "learning_rate": 1.3058586519415044e-05,
      "loss": 0.7785,
      "step": 654780
    },
    {
      "epoch": 2.294875704016795,
      "grad_norm": 3.09375,
      "learning_rate": 1.3057937490751342e-05,
      "loss": 0.7825,
      "step": 654790
    },
    {
      "epoch": 2.29491075152369,
      "grad_norm": 3.21875,
      "learning_rate": 1.3057288462087642e-05,
      "loss": 0.7813,
      "step": 654800
    },
    {
      "epoch": 2.294945799030586,
      "grad_norm": 2.6875,
      "learning_rate": 1.305663943342394e-05,
      "loss": 0.8187,
      "step": 654810
    },
    {
      "epoch": 2.2949808465374817,
      "grad_norm": 3.109375,
      "learning_rate": 1.3055990404760238e-05,
      "loss": 0.7741,
      "step": 654820
    },
    {
      "epoch": 2.295015894044377,
      "grad_norm": 3.1875,
      "learning_rate": 1.3055341376096532e-05,
      "loss": 0.7835,
      "step": 654830
    },
    {
      "epoch": 2.2950509415512728,
      "grad_norm": 2.59375,
      "learning_rate": 1.3054692347432832e-05,
      "loss": 0.8183,
      "step": 654840
    },
    {
      "epoch": 2.2950859890581685,
      "grad_norm": 2.921875,
      "learning_rate": 1.305404331876913e-05,
      "loss": 0.7332,
      "step": 654850
    },
    {
      "epoch": 2.295121036565064,
      "grad_norm": 2.640625,
      "learning_rate": 1.3053394290105428e-05,
      "loss": 0.7234,
      "step": 654860
    },
    {
      "epoch": 2.2951560840719596,
      "grad_norm": 2.734375,
      "learning_rate": 1.3052745261441726e-05,
      "loss": 0.7753,
      "step": 654870
    },
    {
      "epoch": 2.2951911315788553,
      "grad_norm": 3.28125,
      "learning_rate": 1.3052096232778024e-05,
      "loss": 0.868,
      "step": 654880
    },
    {
      "epoch": 2.2952261790857507,
      "grad_norm": 2.84375,
      "learning_rate": 1.3051447204114322e-05,
      "loss": 0.7729,
      "step": 654890
    },
    {
      "epoch": 2.2952612265926464,
      "grad_norm": 3.28125,
      "learning_rate": 1.305079817545062e-05,
      "loss": 0.8458,
      "step": 654900
    },
    {
      "epoch": 2.2952962740995417,
      "grad_norm": 2.546875,
      "learning_rate": 1.305014914678692e-05,
      "loss": 0.7852,
      "step": 654910
    },
    {
      "epoch": 2.2953313216064375,
      "grad_norm": 2.953125,
      "learning_rate": 1.3049500118123218e-05,
      "loss": 0.8237,
      "step": 654920
    },
    {
      "epoch": 2.2953663691133332,
      "grad_norm": 3.1875,
      "learning_rate": 1.3048851089459516e-05,
      "loss": 0.8443,
      "step": 654930
    },
    {
      "epoch": 2.2954014166202286,
      "grad_norm": 2.78125,
      "learning_rate": 1.3048202060795814e-05,
      "loss": 0.7942,
      "step": 654940
    },
    {
      "epoch": 2.2954364641271243,
      "grad_norm": 2.71875,
      "learning_rate": 1.3047553032132112e-05,
      "loss": 0.7636,
      "step": 654950
    },
    {
      "epoch": 2.29547151163402,
      "grad_norm": 3.3125,
      "learning_rate": 1.304690400346841e-05,
      "loss": 0.7979,
      "step": 654960
    },
    {
      "epoch": 2.2955065591409154,
      "grad_norm": 2.84375,
      "learning_rate": 1.3046254974804708e-05,
      "loss": 0.7965,
      "step": 654970
    },
    {
      "epoch": 2.295541606647811,
      "grad_norm": 3.046875,
      "learning_rate": 1.3045605946141008e-05,
      "loss": 0.797,
      "step": 654980
    },
    {
      "epoch": 2.295576654154707,
      "grad_norm": 2.921875,
      "learning_rate": 1.3044956917477306e-05,
      "loss": 0.7844,
      "step": 654990
    },
    {
      "epoch": 2.295611701661602,
      "grad_norm": 3.265625,
      "learning_rate": 1.3044307888813604e-05,
      "loss": 0.8852,
      "step": 655000
    },
    {
      "epoch": 2.295611701661602,
      "eval_loss": 0.7619009613990784,
      "eval_runtime": 561.6866,
      "eval_samples_per_second": 677.31,
      "eval_steps_per_second": 56.443,
      "step": 655000
    },
    {
      "epoch": 2.295646749168498,
      "grad_norm": 2.71875,
      "learning_rate": 1.3043658860149902e-05,
      "loss": 0.8079,
      "step": 655010
    },
    {
      "epoch": 2.2956817966753933,
      "grad_norm": 2.75,
      "learning_rate": 1.3043009831486198e-05,
      "loss": 0.806,
      "step": 655020
    },
    {
      "epoch": 2.295716844182289,
      "grad_norm": 2.375,
      "learning_rate": 1.3042360802822496e-05,
      "loss": 0.7565,
      "step": 655030
    },
    {
      "epoch": 2.295751891689185,
      "grad_norm": 2.359375,
      "learning_rate": 1.3041711774158794e-05,
      "loss": 0.7793,
      "step": 655040
    },
    {
      "epoch": 2.29578693919608,
      "grad_norm": 3.078125,
      "learning_rate": 1.3041062745495092e-05,
      "loss": 0.8154,
      "step": 655050
    },
    {
      "epoch": 2.295821986702976,
      "grad_norm": 3.078125,
      "learning_rate": 1.304041371683139e-05,
      "loss": 0.7201,
      "step": 655060
    },
    {
      "epoch": 2.2958570342098716,
      "grad_norm": 3.21875,
      "learning_rate": 1.3039764688167688e-05,
      "loss": 0.7622,
      "step": 655070
    },
    {
      "epoch": 2.295892081716767,
      "grad_norm": 3.015625,
      "learning_rate": 1.3039115659503986e-05,
      "loss": 0.7737,
      "step": 655080
    },
    {
      "epoch": 2.2959271292236627,
      "grad_norm": 2.875,
      "learning_rate": 1.3038466630840286e-05,
      "loss": 0.7706,
      "step": 655090
    },
    {
      "epoch": 2.2959621767305585,
      "grad_norm": 2.890625,
      "learning_rate": 1.3037817602176584e-05,
      "loss": 0.7,
      "step": 655100
    },
    {
      "epoch": 2.295997224237454,
      "grad_norm": 2.984375,
      "learning_rate": 1.3037168573512882e-05,
      "loss": 0.8149,
      "step": 655110
    },
    {
      "epoch": 2.2960322717443495,
      "grad_norm": 3.125,
      "learning_rate": 1.303651954484918e-05,
      "loss": 0.8483,
      "step": 655120
    },
    {
      "epoch": 2.296067319251245,
      "grad_norm": 3.25,
      "learning_rate": 1.3035870516185478e-05,
      "loss": 0.7727,
      "step": 655130
    },
    {
      "epoch": 2.2961023667581406,
      "grad_norm": 2.8125,
      "learning_rate": 1.3035221487521776e-05,
      "loss": 0.7779,
      "step": 655140
    },
    {
      "epoch": 2.2961374142650364,
      "grad_norm": 2.546875,
      "learning_rate": 1.3034572458858074e-05,
      "loss": 0.8218,
      "step": 655150
    },
    {
      "epoch": 2.2961724617719317,
      "grad_norm": 3.4375,
      "learning_rate": 1.3033923430194373e-05,
      "loss": 0.7785,
      "step": 655160
    },
    {
      "epoch": 2.2962075092788274,
      "grad_norm": 2.75,
      "learning_rate": 1.3033274401530671e-05,
      "loss": 0.8093,
      "step": 655170
    },
    {
      "epoch": 2.296242556785723,
      "grad_norm": 3.390625,
      "learning_rate": 1.303262537286697e-05,
      "loss": 0.9121,
      "step": 655180
    },
    {
      "epoch": 2.2962776042926185,
      "grad_norm": 2.9375,
      "learning_rate": 1.3031976344203267e-05,
      "loss": 0.7863,
      "step": 655190
    },
    {
      "epoch": 2.2963126517995143,
      "grad_norm": 2.609375,
      "learning_rate": 1.3031327315539565e-05,
      "loss": 0.7753,
      "step": 655200
    },
    {
      "epoch": 2.29634769930641,
      "grad_norm": 3.046875,
      "learning_rate": 1.3030678286875862e-05,
      "loss": 0.7792,
      "step": 655210
    },
    {
      "epoch": 2.2963827468133053,
      "grad_norm": 2.75,
      "learning_rate": 1.303002925821216e-05,
      "loss": 0.7019,
      "step": 655220
    },
    {
      "epoch": 2.296417794320201,
      "grad_norm": 2.96875,
      "learning_rate": 1.3029380229548458e-05,
      "loss": 0.7201,
      "step": 655230
    },
    {
      "epoch": 2.2964528418270964,
      "grad_norm": 3.125,
      "learning_rate": 1.3028731200884756e-05,
      "loss": 0.8382,
      "step": 655240
    },
    {
      "epoch": 2.296487889333992,
      "grad_norm": 3.59375,
      "learning_rate": 1.3028082172221054e-05,
      "loss": 0.9104,
      "step": 655250
    },
    {
      "epoch": 2.296522936840888,
      "grad_norm": 2.609375,
      "learning_rate": 1.3027433143557352e-05,
      "loss": 0.7659,
      "step": 655260
    },
    {
      "epoch": 2.2965579843477832,
      "grad_norm": 2.546875,
      "learning_rate": 1.302678411489365e-05,
      "loss": 0.8009,
      "step": 655270
    },
    {
      "epoch": 2.296593031854679,
      "grad_norm": 2.8125,
      "learning_rate": 1.302613508622995e-05,
      "loss": 0.8884,
      "step": 655280
    },
    {
      "epoch": 2.2966280793615748,
      "grad_norm": 3.046875,
      "learning_rate": 1.3025486057566247e-05,
      "loss": 0.8472,
      "step": 655290
    },
    {
      "epoch": 2.29666312686847,
      "grad_norm": 2.53125,
      "learning_rate": 1.3024837028902545e-05,
      "loss": 0.848,
      "step": 655300
    },
    {
      "epoch": 2.296698174375366,
      "grad_norm": 3.109375,
      "learning_rate": 1.3024188000238843e-05,
      "loss": 0.9035,
      "step": 655310
    },
    {
      "epoch": 2.2967332218822616,
      "grad_norm": 2.609375,
      "learning_rate": 1.3023538971575141e-05,
      "loss": 0.744,
      "step": 655320
    },
    {
      "epoch": 2.296768269389157,
      "grad_norm": 2.828125,
      "learning_rate": 1.302288994291144e-05,
      "loss": 0.8711,
      "step": 655330
    },
    {
      "epoch": 2.2968033168960527,
      "grad_norm": 3.03125,
      "learning_rate": 1.3022240914247737e-05,
      "loss": 0.8142,
      "step": 655340
    },
    {
      "epoch": 2.296838364402948,
      "grad_norm": 2.65625,
      "learning_rate": 1.3021591885584037e-05,
      "loss": 0.7659,
      "step": 655350
    },
    {
      "epoch": 2.2968734119098437,
      "grad_norm": 2.71875,
      "learning_rate": 1.3020942856920335e-05,
      "loss": 0.8784,
      "step": 655360
    },
    {
      "epoch": 2.2969084594167395,
      "grad_norm": 3.109375,
      "learning_rate": 1.3020293828256633e-05,
      "loss": 0.8294,
      "step": 655370
    },
    {
      "epoch": 2.296943506923635,
      "grad_norm": 3.34375,
      "learning_rate": 1.3019644799592931e-05,
      "loss": 0.8484,
      "step": 655380
    },
    {
      "epoch": 2.2969785544305306,
      "grad_norm": 2.734375,
      "learning_rate": 1.3018995770929229e-05,
      "loss": 0.7845,
      "step": 655390
    },
    {
      "epoch": 2.2970136019374263,
      "grad_norm": 2.328125,
      "learning_rate": 1.3018346742265525e-05,
      "loss": 0.7791,
      "step": 655400
    },
    {
      "epoch": 2.2970486494443216,
      "grad_norm": 2.875,
      "learning_rate": 1.3017697713601823e-05,
      "loss": 0.79,
      "step": 655410
    },
    {
      "epoch": 2.2970836969512174,
      "grad_norm": 3.359375,
      "learning_rate": 1.3017048684938121e-05,
      "loss": 0.8578,
      "step": 655420
    },
    {
      "epoch": 2.297118744458113,
      "grad_norm": 2.78125,
      "learning_rate": 1.301639965627442e-05,
      "loss": 0.8864,
      "step": 655430
    },
    {
      "epoch": 2.2971537919650085,
      "grad_norm": 3.328125,
      "learning_rate": 1.3015750627610717e-05,
      "loss": 0.7329,
      "step": 655440
    },
    {
      "epoch": 2.297188839471904,
      "grad_norm": 2.71875,
      "learning_rate": 1.3015101598947015e-05,
      "loss": 0.7063,
      "step": 655450
    },
    {
      "epoch": 2.2972238869787995,
      "grad_norm": 2.703125,
      "learning_rate": 1.3014452570283315e-05,
      "loss": 0.7493,
      "step": 655460
    },
    {
      "epoch": 2.2972589344856953,
      "grad_norm": 2.96875,
      "learning_rate": 1.3013803541619613e-05,
      "loss": 0.7908,
      "step": 655470
    },
    {
      "epoch": 2.297293981992591,
      "grad_norm": 3.046875,
      "learning_rate": 1.3013154512955911e-05,
      "loss": 0.8328,
      "step": 655480
    },
    {
      "epoch": 2.2973290294994864,
      "grad_norm": 3.109375,
      "learning_rate": 1.3012505484292209e-05,
      "loss": 0.8673,
      "step": 655490
    },
    {
      "epoch": 2.297364077006382,
      "grad_norm": 2.546875,
      "learning_rate": 1.3011856455628507e-05,
      "loss": 0.7275,
      "step": 655500
    },
    {
      "epoch": 2.297399124513278,
      "grad_norm": 2.578125,
      "learning_rate": 1.3011207426964805e-05,
      "loss": 0.76,
      "step": 655510
    },
    {
      "epoch": 2.297434172020173,
      "grad_norm": 2.78125,
      "learning_rate": 1.3010558398301103e-05,
      "loss": 0.7646,
      "step": 655520
    },
    {
      "epoch": 2.297469219527069,
      "grad_norm": 2.71875,
      "learning_rate": 1.3009909369637403e-05,
      "loss": 0.8124,
      "step": 655530
    },
    {
      "epoch": 2.2975042670339647,
      "grad_norm": 2.5625,
      "learning_rate": 1.30092603409737e-05,
      "loss": 0.8056,
      "step": 655540
    },
    {
      "epoch": 2.29753931454086,
      "grad_norm": 2.578125,
      "learning_rate": 1.3008611312309999e-05,
      "loss": 0.7563,
      "step": 655550
    },
    {
      "epoch": 2.297574362047756,
      "grad_norm": 2.96875,
      "learning_rate": 1.3007962283646297e-05,
      "loss": 0.9213,
      "step": 655560
    },
    {
      "epoch": 2.297609409554651,
      "grad_norm": 2.84375,
      "learning_rate": 1.3007313254982595e-05,
      "loss": 0.7955,
      "step": 655570
    },
    {
      "epoch": 2.297644457061547,
      "grad_norm": 2.859375,
      "learning_rate": 1.3006664226318893e-05,
      "loss": 0.7526,
      "step": 655580
    },
    {
      "epoch": 2.2976795045684426,
      "grad_norm": 3.140625,
      "learning_rate": 1.3006015197655189e-05,
      "loss": 0.8278,
      "step": 655590
    },
    {
      "epoch": 2.2977145520753384,
      "grad_norm": 2.828125,
      "learning_rate": 1.3005366168991487e-05,
      "loss": 0.7856,
      "step": 655600
    },
    {
      "epoch": 2.2977495995822337,
      "grad_norm": 2.703125,
      "learning_rate": 1.3004717140327785e-05,
      "loss": 0.7145,
      "step": 655610
    },
    {
      "epoch": 2.2977846470891294,
      "grad_norm": 2.515625,
      "learning_rate": 1.3004068111664083e-05,
      "loss": 0.8216,
      "step": 655620
    },
    {
      "epoch": 2.2978196945960248,
      "grad_norm": 2.890625,
      "learning_rate": 1.3003419083000381e-05,
      "loss": 0.8508,
      "step": 655630
    },
    {
      "epoch": 2.2978547421029205,
      "grad_norm": 3.1875,
      "learning_rate": 1.300277005433668e-05,
      "loss": 0.9045,
      "step": 655640
    },
    {
      "epoch": 2.2978897896098163,
      "grad_norm": 2.96875,
      "learning_rate": 1.3002121025672979e-05,
      "loss": 0.8122,
      "step": 655650
    },
    {
      "epoch": 2.2979248371167116,
      "grad_norm": 3.25,
      "learning_rate": 1.3001471997009277e-05,
      "loss": 0.8384,
      "step": 655660
    },
    {
      "epoch": 2.2979598846236073,
      "grad_norm": 3.3125,
      "learning_rate": 1.3000822968345575e-05,
      "loss": 0.854,
      "step": 655670
    },
    {
      "epoch": 2.2979949321305027,
      "grad_norm": 2.96875,
      "learning_rate": 1.3000173939681873e-05,
      "loss": 0.8713,
      "step": 655680
    },
    {
      "epoch": 2.2980299796373984,
      "grad_norm": 2.6875,
      "learning_rate": 1.299952491101817e-05,
      "loss": 0.8471,
      "step": 655690
    },
    {
      "epoch": 2.298065027144294,
      "grad_norm": 2.9375,
      "learning_rate": 1.2998875882354469e-05,
      "loss": 0.8304,
      "step": 655700
    },
    {
      "epoch": 2.29810007465119,
      "grad_norm": 2.71875,
      "learning_rate": 1.2998226853690768e-05,
      "loss": 0.8036,
      "step": 655710
    },
    {
      "epoch": 2.2981351221580852,
      "grad_norm": 2.875,
      "learning_rate": 1.2997577825027066e-05,
      "loss": 0.7507,
      "step": 655720
    },
    {
      "epoch": 2.298170169664981,
      "grad_norm": 3.15625,
      "learning_rate": 1.2996928796363364e-05,
      "loss": 0.7971,
      "step": 655730
    },
    {
      "epoch": 2.2982052171718763,
      "grad_norm": 2.84375,
      "learning_rate": 1.2996279767699662e-05,
      "loss": 0.8102,
      "step": 655740
    },
    {
      "epoch": 2.298240264678772,
      "grad_norm": 3.171875,
      "learning_rate": 1.299563073903596e-05,
      "loss": 0.8784,
      "step": 655750
    },
    {
      "epoch": 2.298275312185668,
      "grad_norm": 2.90625,
      "learning_rate": 1.2994981710372258e-05,
      "loss": 0.8376,
      "step": 655760
    },
    {
      "epoch": 2.298310359692563,
      "grad_norm": 3.40625,
      "learning_rate": 1.2994332681708555e-05,
      "loss": 0.7934,
      "step": 655770
    },
    {
      "epoch": 2.298345407199459,
      "grad_norm": 3.0625,
      "learning_rate": 1.2993683653044853e-05,
      "loss": 0.8131,
      "step": 655780
    },
    {
      "epoch": 2.298380454706354,
      "grad_norm": 2.359375,
      "learning_rate": 1.299303462438115e-05,
      "loss": 0.8743,
      "step": 655790
    },
    {
      "epoch": 2.29841550221325,
      "grad_norm": 3.09375,
      "learning_rate": 1.2992385595717449e-05,
      "loss": 0.8337,
      "step": 655800
    },
    {
      "epoch": 2.2984505497201457,
      "grad_norm": 3.078125,
      "learning_rate": 1.2991736567053747e-05,
      "loss": 0.8218,
      "step": 655810
    },
    {
      "epoch": 2.2984855972270415,
      "grad_norm": 2.859375,
      "learning_rate": 1.2991087538390045e-05,
      "loss": 0.7452,
      "step": 655820
    },
    {
      "epoch": 2.298520644733937,
      "grad_norm": 2.515625,
      "learning_rate": 1.2990438509726344e-05,
      "loss": 0.7652,
      "step": 655830
    },
    {
      "epoch": 2.2985556922408326,
      "grad_norm": 2.671875,
      "learning_rate": 1.2989789481062642e-05,
      "loss": 0.786,
      "step": 655840
    },
    {
      "epoch": 2.298590739747728,
      "grad_norm": 2.578125,
      "learning_rate": 1.298914045239894e-05,
      "loss": 0.7298,
      "step": 655850
    },
    {
      "epoch": 2.2986257872546236,
      "grad_norm": 2.953125,
      "learning_rate": 1.2988491423735238e-05,
      "loss": 0.7916,
      "step": 655860
    },
    {
      "epoch": 2.2986608347615194,
      "grad_norm": 3.125,
      "learning_rate": 1.2987842395071536e-05,
      "loss": 0.8312,
      "step": 655870
    },
    {
      "epoch": 2.2986958822684147,
      "grad_norm": 2.28125,
      "learning_rate": 1.2987193366407834e-05,
      "loss": 0.7687,
      "step": 655880
    },
    {
      "epoch": 2.2987309297753105,
      "grad_norm": 2.375,
      "learning_rate": 1.2986544337744134e-05,
      "loss": 0.7449,
      "step": 655890
    },
    {
      "epoch": 2.298765977282206,
      "grad_norm": 3.3125,
      "learning_rate": 1.2985895309080432e-05,
      "loss": 0.8343,
      "step": 655900
    },
    {
      "epoch": 2.2988010247891015,
      "grad_norm": 2.59375,
      "learning_rate": 1.298524628041673e-05,
      "loss": 0.7867,
      "step": 655910
    },
    {
      "epoch": 2.2988360722959973,
      "grad_norm": 3.15625,
      "learning_rate": 1.2984597251753028e-05,
      "loss": 0.7981,
      "step": 655920
    },
    {
      "epoch": 2.298871119802893,
      "grad_norm": 3.09375,
      "learning_rate": 1.2983948223089326e-05,
      "loss": 0.809,
      "step": 655930
    },
    {
      "epoch": 2.2989061673097884,
      "grad_norm": 2.8125,
      "learning_rate": 1.2983299194425624e-05,
      "loss": 0.8079,
      "step": 655940
    },
    {
      "epoch": 2.298941214816684,
      "grad_norm": 2.484375,
      "learning_rate": 1.2982650165761922e-05,
      "loss": 0.7726,
      "step": 655950
    },
    {
      "epoch": 2.2989762623235794,
      "grad_norm": 2.8125,
      "learning_rate": 1.2982001137098218e-05,
      "loss": 0.8639,
      "step": 655960
    },
    {
      "epoch": 2.299011309830475,
      "grad_norm": 3.234375,
      "learning_rate": 1.2981352108434516e-05,
      "loss": 0.8122,
      "step": 655970
    },
    {
      "epoch": 2.299046357337371,
      "grad_norm": 2.65625,
      "learning_rate": 1.2980703079770814e-05,
      "loss": 0.8511,
      "step": 655980
    },
    {
      "epoch": 2.2990814048442663,
      "grad_norm": 2.671875,
      "learning_rate": 1.2980054051107112e-05,
      "loss": 0.7862,
      "step": 655990
    },
    {
      "epoch": 2.299116452351162,
      "grad_norm": 2.515625,
      "learning_rate": 1.297940502244341e-05,
      "loss": 0.7246,
      "step": 656000
    },
    {
      "epoch": 2.299151499858058,
      "grad_norm": 3.0,
      "learning_rate": 1.297875599377971e-05,
      "loss": 0.8069,
      "step": 656010
    },
    {
      "epoch": 2.299186547364953,
      "grad_norm": 2.546875,
      "learning_rate": 1.2978106965116008e-05,
      "loss": 0.8366,
      "step": 656020
    },
    {
      "epoch": 2.299221594871849,
      "grad_norm": 3.015625,
      "learning_rate": 1.2977457936452306e-05,
      "loss": 0.8142,
      "step": 656030
    },
    {
      "epoch": 2.2992566423787446,
      "grad_norm": 2.765625,
      "learning_rate": 1.2976808907788604e-05,
      "loss": 0.7592,
      "step": 656040
    },
    {
      "epoch": 2.29929168988564,
      "grad_norm": 2.609375,
      "learning_rate": 1.2976159879124902e-05,
      "loss": 0.7642,
      "step": 656050
    },
    {
      "epoch": 2.2993267373925357,
      "grad_norm": 3.140625,
      "learning_rate": 1.29755108504612e-05,
      "loss": 0.7797,
      "step": 656060
    },
    {
      "epoch": 2.299361784899431,
      "grad_norm": 2.671875,
      "learning_rate": 1.2974861821797498e-05,
      "loss": 0.8507,
      "step": 656070
    },
    {
      "epoch": 2.2993968324063268,
      "grad_norm": 2.84375,
      "learning_rate": 1.2974212793133798e-05,
      "loss": 0.7932,
      "step": 656080
    },
    {
      "epoch": 2.2994318799132225,
      "grad_norm": 2.84375,
      "learning_rate": 1.2973563764470096e-05,
      "loss": 0.7773,
      "step": 656090
    },
    {
      "epoch": 2.299466927420118,
      "grad_norm": 3.03125,
      "learning_rate": 1.2972914735806394e-05,
      "loss": 0.7841,
      "step": 656100
    },
    {
      "epoch": 2.2995019749270136,
      "grad_norm": 2.828125,
      "learning_rate": 1.2972265707142692e-05,
      "loss": 0.7855,
      "step": 656110
    },
    {
      "epoch": 2.2995370224339093,
      "grad_norm": 3.203125,
      "learning_rate": 1.297161667847899e-05,
      "loss": 0.8375,
      "step": 656120
    },
    {
      "epoch": 2.2995720699408047,
      "grad_norm": 2.40625,
      "learning_rate": 1.2970967649815288e-05,
      "loss": 0.7371,
      "step": 656130
    },
    {
      "epoch": 2.2996071174477004,
      "grad_norm": 2.953125,
      "learning_rate": 1.2970318621151586e-05,
      "loss": 0.7993,
      "step": 656140
    },
    {
      "epoch": 2.299642164954596,
      "grad_norm": 3.1875,
      "learning_rate": 1.2969669592487882e-05,
      "loss": 0.8267,
      "step": 656150
    },
    {
      "epoch": 2.2996772124614915,
      "grad_norm": 2.28125,
      "learning_rate": 1.296902056382418e-05,
      "loss": 0.767,
      "step": 656160
    },
    {
      "epoch": 2.2997122599683872,
      "grad_norm": 2.453125,
      "learning_rate": 1.2968371535160478e-05,
      "loss": 0.7418,
      "step": 656170
    },
    {
      "epoch": 2.2997473074752826,
      "grad_norm": 3.0625,
      "learning_rate": 1.2967722506496776e-05,
      "loss": 0.7672,
      "step": 656180
    },
    {
      "epoch": 2.2997823549821783,
      "grad_norm": 3.15625,
      "learning_rate": 1.2967073477833076e-05,
      "loss": 0.8179,
      "step": 656190
    },
    {
      "epoch": 2.299817402489074,
      "grad_norm": 2.96875,
      "learning_rate": 1.2966424449169374e-05,
      "loss": 0.8199,
      "step": 656200
    },
    {
      "epoch": 2.2998524499959694,
      "grad_norm": 2.9375,
      "learning_rate": 1.2965775420505672e-05,
      "loss": 0.8002,
      "step": 656210
    },
    {
      "epoch": 2.299887497502865,
      "grad_norm": 3.3125,
      "learning_rate": 1.296512639184197e-05,
      "loss": 0.7345,
      "step": 656220
    },
    {
      "epoch": 2.299922545009761,
      "grad_norm": 2.59375,
      "learning_rate": 1.2964477363178268e-05,
      "loss": 0.9229,
      "step": 656230
    },
    {
      "epoch": 2.299957592516656,
      "grad_norm": 3.21875,
      "learning_rate": 1.2963828334514566e-05,
      "loss": 0.849,
      "step": 656240
    },
    {
      "epoch": 2.299992640023552,
      "grad_norm": 2.921875,
      "learning_rate": 1.2963179305850864e-05,
      "loss": 0.8326,
      "step": 656250
    },
    {
      "epoch": 2.3000276875304477,
      "grad_norm": 2.828125,
      "learning_rate": 1.2962530277187163e-05,
      "loss": 0.7808,
      "step": 656260
    },
    {
      "epoch": 2.300062735037343,
      "grad_norm": 2.71875,
      "learning_rate": 1.2961881248523461e-05,
      "loss": 0.7647,
      "step": 656270
    },
    {
      "epoch": 2.300097782544239,
      "grad_norm": 3.015625,
      "learning_rate": 1.296123221985976e-05,
      "loss": 0.8022,
      "step": 656280
    },
    {
      "epoch": 2.300132830051134,
      "grad_norm": 3.0625,
      "learning_rate": 1.2960583191196057e-05,
      "loss": 0.8203,
      "step": 656290
    },
    {
      "epoch": 2.30016787755803,
      "grad_norm": 2.5,
      "learning_rate": 1.2959934162532355e-05,
      "loss": 0.7886,
      "step": 656300
    },
    {
      "epoch": 2.3002029250649256,
      "grad_norm": 2.90625,
      "learning_rate": 1.2959285133868653e-05,
      "loss": 0.8899,
      "step": 656310
    },
    {
      "epoch": 2.300237972571821,
      "grad_norm": 3.25,
      "learning_rate": 1.2958636105204951e-05,
      "loss": 0.8447,
      "step": 656320
    },
    {
      "epoch": 2.3002730200787167,
      "grad_norm": 2.875,
      "learning_rate": 1.2957987076541251e-05,
      "loss": 0.7446,
      "step": 656330
    },
    {
      "epoch": 2.3003080675856125,
      "grad_norm": 2.765625,
      "learning_rate": 1.2957338047877546e-05,
      "loss": 0.765,
      "step": 656340
    },
    {
      "epoch": 2.3003431150925078,
      "grad_norm": 3.125,
      "learning_rate": 1.2956689019213844e-05,
      "loss": 0.8089,
      "step": 656350
    },
    {
      "epoch": 2.3003781625994035,
      "grad_norm": 2.828125,
      "learning_rate": 1.2956039990550142e-05,
      "loss": 0.8074,
      "step": 656360
    },
    {
      "epoch": 2.3004132101062993,
      "grad_norm": 3.046875,
      "learning_rate": 1.295539096188644e-05,
      "loss": 0.8339,
      "step": 656370
    },
    {
      "epoch": 2.3004482576131946,
      "grad_norm": 3.046875,
      "learning_rate": 1.295474193322274e-05,
      "loss": 0.8367,
      "step": 656380
    },
    {
      "epoch": 2.3004833051200904,
      "grad_norm": 2.734375,
      "learning_rate": 1.2954092904559037e-05,
      "loss": 0.8205,
      "step": 656390
    },
    {
      "epoch": 2.3005183526269857,
      "grad_norm": 2.71875,
      "learning_rate": 1.2953443875895335e-05,
      "loss": 0.7788,
      "step": 656400
    },
    {
      "epoch": 2.3005534001338814,
      "grad_norm": 3.3125,
      "learning_rate": 1.2952794847231633e-05,
      "loss": 0.8021,
      "step": 656410
    },
    {
      "epoch": 2.300588447640777,
      "grad_norm": 3.21875,
      "learning_rate": 1.2952145818567931e-05,
      "loss": 0.7202,
      "step": 656420
    },
    {
      "epoch": 2.3006234951476725,
      "grad_norm": 2.796875,
      "learning_rate": 1.295149678990423e-05,
      "loss": 0.8355,
      "step": 656430
    },
    {
      "epoch": 2.3006585426545683,
      "grad_norm": 3.046875,
      "learning_rate": 1.2950847761240529e-05,
      "loss": 0.844,
      "step": 656440
    },
    {
      "epoch": 2.300693590161464,
      "grad_norm": 2.90625,
      "learning_rate": 1.2950198732576827e-05,
      "loss": 0.7966,
      "step": 656450
    },
    {
      "epoch": 2.3007286376683593,
      "grad_norm": 3.140625,
      "learning_rate": 1.2949549703913125e-05,
      "loss": 0.8379,
      "step": 656460
    },
    {
      "epoch": 2.300763685175255,
      "grad_norm": 2.75,
      "learning_rate": 1.2948900675249423e-05,
      "loss": 0.8155,
      "step": 656470
    },
    {
      "epoch": 2.300798732682151,
      "grad_norm": 2.890625,
      "learning_rate": 1.2948251646585721e-05,
      "loss": 0.8202,
      "step": 656480
    },
    {
      "epoch": 2.300833780189046,
      "grad_norm": 2.65625,
      "learning_rate": 1.2947602617922019e-05,
      "loss": 0.7741,
      "step": 656490
    },
    {
      "epoch": 2.300868827695942,
      "grad_norm": 2.46875,
      "learning_rate": 1.2946953589258317e-05,
      "loss": 0.8327,
      "step": 656500
    },
    {
      "epoch": 2.3009038752028372,
      "grad_norm": 2.984375,
      "learning_rate": 1.2946304560594617e-05,
      "loss": 0.7995,
      "step": 656510
    },
    {
      "epoch": 2.300938922709733,
      "grad_norm": 2.859375,
      "learning_rate": 1.2945655531930915e-05,
      "loss": 0.8347,
      "step": 656520
    },
    {
      "epoch": 2.3009739702166288,
      "grad_norm": 2.484375,
      "learning_rate": 1.294500650326721e-05,
      "loss": 0.7622,
      "step": 656530
    },
    {
      "epoch": 2.301009017723524,
      "grad_norm": 2.9375,
      "learning_rate": 1.2944357474603507e-05,
      "loss": 0.8248,
      "step": 656540
    },
    {
      "epoch": 2.30104406523042,
      "grad_norm": 2.640625,
      "learning_rate": 1.2943708445939805e-05,
      "loss": 0.78,
      "step": 656550
    },
    {
      "epoch": 2.3010791127373156,
      "grad_norm": 2.828125,
      "learning_rate": 1.2943059417276105e-05,
      "loss": 0.6878,
      "step": 656560
    },
    {
      "epoch": 2.301114160244211,
      "grad_norm": 3.265625,
      "learning_rate": 1.2942410388612403e-05,
      "loss": 0.8371,
      "step": 656570
    },
    {
      "epoch": 2.3011492077511067,
      "grad_norm": 2.890625,
      "learning_rate": 1.2941761359948701e-05,
      "loss": 0.8006,
      "step": 656580
    },
    {
      "epoch": 2.3011842552580024,
      "grad_norm": 2.625,
      "learning_rate": 1.2941112331284999e-05,
      "loss": 0.7848,
      "step": 656590
    },
    {
      "epoch": 2.3012193027648977,
      "grad_norm": 3.078125,
      "learning_rate": 1.2940463302621297e-05,
      "loss": 0.8565,
      "step": 656600
    },
    {
      "epoch": 2.3012543502717935,
      "grad_norm": 2.796875,
      "learning_rate": 1.2939814273957595e-05,
      "loss": 0.7662,
      "step": 656610
    },
    {
      "epoch": 2.301289397778689,
      "grad_norm": 2.703125,
      "learning_rate": 1.2939165245293893e-05,
      "loss": 0.7932,
      "step": 656620
    },
    {
      "epoch": 2.3013244452855846,
      "grad_norm": 3.109375,
      "learning_rate": 1.2938516216630193e-05,
      "loss": 0.8458,
      "step": 656630
    },
    {
      "epoch": 2.3013594927924803,
      "grad_norm": 2.875,
      "learning_rate": 1.293786718796649e-05,
      "loss": 0.7355,
      "step": 656640
    },
    {
      "epoch": 2.3013945402993756,
      "grad_norm": 3.09375,
      "learning_rate": 1.2937218159302789e-05,
      "loss": 0.8465,
      "step": 656650
    },
    {
      "epoch": 2.3014295878062714,
      "grad_norm": 3.171875,
      "learning_rate": 1.2936569130639087e-05,
      "loss": 0.812,
      "step": 656660
    },
    {
      "epoch": 2.301464635313167,
      "grad_norm": 2.609375,
      "learning_rate": 1.2935920101975385e-05,
      "loss": 0.8544,
      "step": 656670
    },
    {
      "epoch": 2.3014996828200625,
      "grad_norm": 2.515625,
      "learning_rate": 1.2935271073311683e-05,
      "loss": 0.8515,
      "step": 656680
    },
    {
      "epoch": 2.301534730326958,
      "grad_norm": 2.796875,
      "learning_rate": 1.293462204464798e-05,
      "loss": 0.8897,
      "step": 656690
    },
    {
      "epoch": 2.301569777833854,
      "grad_norm": 3.1875,
      "learning_rate": 1.293397301598428e-05,
      "loss": 0.7959,
      "step": 656700
    },
    {
      "epoch": 2.3016048253407493,
      "grad_norm": 3.03125,
      "learning_rate": 1.2933323987320575e-05,
      "loss": 0.8203,
      "step": 656710
    },
    {
      "epoch": 2.301639872847645,
      "grad_norm": 3.171875,
      "learning_rate": 1.2932674958656873e-05,
      "loss": 0.8202,
      "step": 656720
    },
    {
      "epoch": 2.3016749203545404,
      "grad_norm": 2.6875,
      "learning_rate": 1.2932025929993171e-05,
      "loss": 0.7836,
      "step": 656730
    },
    {
      "epoch": 2.301709967861436,
      "grad_norm": 2.8125,
      "learning_rate": 1.293137690132947e-05,
      "loss": 0.8077,
      "step": 656740
    },
    {
      "epoch": 2.301745015368332,
      "grad_norm": 2.734375,
      "learning_rate": 1.2930727872665769e-05,
      "loss": 0.8151,
      "step": 656750
    },
    {
      "epoch": 2.301780062875227,
      "grad_norm": 2.578125,
      "learning_rate": 1.2930078844002067e-05,
      "loss": 0.8413,
      "step": 656760
    },
    {
      "epoch": 2.301815110382123,
      "grad_norm": 3.28125,
      "learning_rate": 1.2929429815338365e-05,
      "loss": 0.8174,
      "step": 656770
    },
    {
      "epoch": 2.3018501578890187,
      "grad_norm": 3.265625,
      "learning_rate": 1.2928780786674663e-05,
      "loss": 0.8357,
      "step": 656780
    },
    {
      "epoch": 2.301885205395914,
      "grad_norm": 3.6875,
      "learning_rate": 1.292813175801096e-05,
      "loss": 0.7782,
      "step": 656790
    },
    {
      "epoch": 2.3019202529028098,
      "grad_norm": 3.015625,
      "learning_rate": 1.2927482729347259e-05,
      "loss": 0.9091,
      "step": 656800
    },
    {
      "epoch": 2.3019553004097055,
      "grad_norm": 2.640625,
      "learning_rate": 1.2926833700683558e-05,
      "loss": 0.7776,
      "step": 656810
    },
    {
      "epoch": 2.301990347916601,
      "grad_norm": 3.078125,
      "learning_rate": 1.2926184672019856e-05,
      "loss": 0.8878,
      "step": 656820
    },
    {
      "epoch": 2.3020253954234966,
      "grad_norm": 2.96875,
      "learning_rate": 1.2925535643356154e-05,
      "loss": 0.7911,
      "step": 656830
    },
    {
      "epoch": 2.302060442930392,
      "grad_norm": 2.703125,
      "learning_rate": 1.2924886614692452e-05,
      "loss": 0.8179,
      "step": 656840
    },
    {
      "epoch": 2.3020954904372877,
      "grad_norm": 2.984375,
      "learning_rate": 1.292423758602875e-05,
      "loss": 0.7956,
      "step": 656850
    },
    {
      "epoch": 2.3021305379441834,
      "grad_norm": 3.21875,
      "learning_rate": 1.2923588557365048e-05,
      "loss": 0.8023,
      "step": 656860
    },
    {
      "epoch": 2.3021655854510787,
      "grad_norm": 2.765625,
      "learning_rate": 1.2922939528701346e-05,
      "loss": 0.806,
      "step": 656870
    },
    {
      "epoch": 2.3022006329579745,
      "grad_norm": 2.75,
      "learning_rate": 1.2922290500037646e-05,
      "loss": 0.859,
      "step": 656880
    },
    {
      "epoch": 2.3022356804648703,
      "grad_norm": 2.890625,
      "learning_rate": 1.2921641471373944e-05,
      "loss": 0.8081,
      "step": 656890
    },
    {
      "epoch": 2.3022707279717656,
      "grad_norm": 2.875,
      "learning_rate": 1.2920992442710239e-05,
      "loss": 0.7903,
      "step": 656900
    },
    {
      "epoch": 2.3023057754786613,
      "grad_norm": 2.578125,
      "learning_rate": 1.2920343414046537e-05,
      "loss": 0.7973,
      "step": 656910
    },
    {
      "epoch": 2.302340822985557,
      "grad_norm": 2.71875,
      "learning_rate": 1.2919694385382835e-05,
      "loss": 0.8501,
      "step": 656920
    },
    {
      "epoch": 2.3023758704924524,
      "grad_norm": 2.734375,
      "learning_rate": 1.2919045356719134e-05,
      "loss": 0.7558,
      "step": 656930
    },
    {
      "epoch": 2.302410917999348,
      "grad_norm": 2.453125,
      "learning_rate": 1.2918396328055432e-05,
      "loss": 0.7872,
      "step": 656940
    },
    {
      "epoch": 2.3024459655062435,
      "grad_norm": 2.828125,
      "learning_rate": 1.291774729939173e-05,
      "loss": 0.7975,
      "step": 656950
    },
    {
      "epoch": 2.3024810130131392,
      "grad_norm": 3.046875,
      "learning_rate": 1.2917098270728028e-05,
      "loss": 0.7662,
      "step": 656960
    },
    {
      "epoch": 2.302516060520035,
      "grad_norm": 3.09375,
      "learning_rate": 1.2916449242064326e-05,
      "loss": 0.8215,
      "step": 656970
    },
    {
      "epoch": 2.3025511080269307,
      "grad_norm": 2.859375,
      "learning_rate": 1.2915800213400624e-05,
      "loss": 0.7813,
      "step": 656980
    },
    {
      "epoch": 2.302586155533826,
      "grad_norm": 2.828125,
      "learning_rate": 1.2915151184736924e-05,
      "loss": 0.7986,
      "step": 656990
    },
    {
      "epoch": 2.302621203040722,
      "grad_norm": 2.8125,
      "learning_rate": 1.2914502156073222e-05,
      "loss": 0.7818,
      "step": 657000
    },
    {
      "epoch": 2.302656250547617,
      "grad_norm": 2.953125,
      "learning_rate": 1.291385312740952e-05,
      "loss": 0.7557,
      "step": 657010
    },
    {
      "epoch": 2.302691298054513,
      "grad_norm": 3.03125,
      "learning_rate": 1.2913204098745818e-05,
      "loss": 0.8446,
      "step": 657020
    },
    {
      "epoch": 2.3027263455614086,
      "grad_norm": 2.765625,
      "learning_rate": 1.2912555070082116e-05,
      "loss": 0.7681,
      "step": 657030
    },
    {
      "epoch": 2.302761393068304,
      "grad_norm": 2.90625,
      "learning_rate": 1.2911906041418414e-05,
      "loss": 0.827,
      "step": 657040
    },
    {
      "epoch": 2.3027964405751997,
      "grad_norm": 2.859375,
      "learning_rate": 1.2911257012754712e-05,
      "loss": 0.8726,
      "step": 657050
    },
    {
      "epoch": 2.302831488082095,
      "grad_norm": 3.1875,
      "learning_rate": 1.2910607984091012e-05,
      "loss": 0.7398,
      "step": 657060
    },
    {
      "epoch": 2.302866535588991,
      "grad_norm": 2.40625,
      "learning_rate": 1.290995895542731e-05,
      "loss": 0.7546,
      "step": 657070
    },
    {
      "epoch": 2.3029015830958866,
      "grad_norm": 2.296875,
      "learning_rate": 1.2909309926763608e-05,
      "loss": 0.7438,
      "step": 657080
    },
    {
      "epoch": 2.3029366306027823,
      "grad_norm": 2.890625,
      "learning_rate": 1.2908660898099902e-05,
      "loss": 0.8078,
      "step": 657090
    },
    {
      "epoch": 2.3029716781096776,
      "grad_norm": 2.921875,
      "learning_rate": 1.29080118694362e-05,
      "loss": 0.7723,
      "step": 657100
    },
    {
      "epoch": 2.3030067256165734,
      "grad_norm": 2.875,
      "learning_rate": 1.29073628407725e-05,
      "loss": 0.8224,
      "step": 657110
    },
    {
      "epoch": 2.3030417731234687,
      "grad_norm": 2.96875,
      "learning_rate": 1.2906713812108798e-05,
      "loss": 0.8134,
      "step": 657120
    },
    {
      "epoch": 2.3030768206303645,
      "grad_norm": 2.90625,
      "learning_rate": 1.2906064783445096e-05,
      "loss": 0.7308,
      "step": 657130
    },
    {
      "epoch": 2.30311186813726,
      "grad_norm": 3.59375,
      "learning_rate": 1.2905415754781394e-05,
      "loss": 0.8514,
      "step": 657140
    },
    {
      "epoch": 2.3031469156441555,
      "grad_norm": 2.75,
      "learning_rate": 1.2904766726117692e-05,
      "loss": 0.8261,
      "step": 657150
    },
    {
      "epoch": 2.3031819631510513,
      "grad_norm": 2.4375,
      "learning_rate": 1.290411769745399e-05,
      "loss": 0.7532,
      "step": 657160
    },
    {
      "epoch": 2.303217010657947,
      "grad_norm": 3.0,
      "learning_rate": 1.2903468668790288e-05,
      "loss": 0.8238,
      "step": 657170
    },
    {
      "epoch": 2.3032520581648424,
      "grad_norm": 2.640625,
      "learning_rate": 1.2902819640126588e-05,
      "loss": 0.803,
      "step": 657180
    },
    {
      "epoch": 2.303287105671738,
      "grad_norm": 2.8125,
      "learning_rate": 1.2902170611462886e-05,
      "loss": 0.8262,
      "step": 657190
    },
    {
      "epoch": 2.303322153178634,
      "grad_norm": 2.96875,
      "learning_rate": 1.2901521582799184e-05,
      "loss": 0.7994,
      "step": 657200
    },
    {
      "epoch": 2.303357200685529,
      "grad_norm": 2.625,
      "learning_rate": 1.2900872554135482e-05,
      "loss": 0.7382,
      "step": 657210
    },
    {
      "epoch": 2.303392248192425,
      "grad_norm": 2.84375,
      "learning_rate": 1.290022352547178e-05,
      "loss": 0.7989,
      "step": 657220
    },
    {
      "epoch": 2.3034272956993203,
      "grad_norm": 3.03125,
      "learning_rate": 1.2899574496808078e-05,
      "loss": 0.7607,
      "step": 657230
    },
    {
      "epoch": 2.303462343206216,
      "grad_norm": 3.03125,
      "learning_rate": 1.2898925468144376e-05,
      "loss": 0.8245,
      "step": 657240
    },
    {
      "epoch": 2.3034973907131118,
      "grad_norm": 2.84375,
      "learning_rate": 1.2898276439480676e-05,
      "loss": 0.7978,
      "step": 657250
    },
    {
      "epoch": 2.303532438220007,
      "grad_norm": 3.171875,
      "learning_rate": 1.2897627410816974e-05,
      "loss": 0.8124,
      "step": 657260
    },
    {
      "epoch": 2.303567485726903,
      "grad_norm": 2.640625,
      "learning_rate": 1.2896978382153272e-05,
      "loss": 0.7966,
      "step": 657270
    },
    {
      "epoch": 2.3036025332337986,
      "grad_norm": 2.671875,
      "learning_rate": 1.2896329353489566e-05,
      "loss": 0.7692,
      "step": 657280
    },
    {
      "epoch": 2.303637580740694,
      "grad_norm": 2.9375,
      "learning_rate": 1.2895680324825866e-05,
      "loss": 0.7672,
      "step": 657290
    },
    {
      "epoch": 2.3036726282475897,
      "grad_norm": 2.859375,
      "learning_rate": 1.2895031296162164e-05,
      "loss": 0.8261,
      "step": 657300
    },
    {
      "epoch": 2.3037076757544854,
      "grad_norm": 3.0625,
      "learning_rate": 1.2894382267498462e-05,
      "loss": 0.8504,
      "step": 657310
    },
    {
      "epoch": 2.3037427232613807,
      "grad_norm": 3.25,
      "learning_rate": 1.289373323883476e-05,
      "loss": 0.7747,
      "step": 657320
    },
    {
      "epoch": 2.3037777707682765,
      "grad_norm": 3.40625,
      "learning_rate": 1.2893084210171058e-05,
      "loss": 0.8636,
      "step": 657330
    },
    {
      "epoch": 2.303812818275172,
      "grad_norm": 2.875,
      "learning_rate": 1.2892435181507356e-05,
      "loss": 0.7872,
      "step": 657340
    },
    {
      "epoch": 2.3038478657820676,
      "grad_norm": 2.625,
      "learning_rate": 1.2891786152843654e-05,
      "loss": 0.7351,
      "step": 657350
    },
    {
      "epoch": 2.3038829132889633,
      "grad_norm": 3.21875,
      "learning_rate": 1.2891137124179954e-05,
      "loss": 0.7554,
      "step": 657360
    },
    {
      "epoch": 2.3039179607958586,
      "grad_norm": 3.0,
      "learning_rate": 1.2890488095516252e-05,
      "loss": 0.8339,
      "step": 657370
    },
    {
      "epoch": 2.3039530083027544,
      "grad_norm": 3.09375,
      "learning_rate": 1.288983906685255e-05,
      "loss": 0.9454,
      "step": 657380
    },
    {
      "epoch": 2.30398805580965,
      "grad_norm": 3.40625,
      "learning_rate": 1.2889190038188848e-05,
      "loss": 0.8624,
      "step": 657390
    },
    {
      "epoch": 2.3040231033165455,
      "grad_norm": 3.109375,
      "learning_rate": 1.2888541009525146e-05,
      "loss": 0.6968,
      "step": 657400
    },
    {
      "epoch": 2.3040581508234412,
      "grad_norm": 2.515625,
      "learning_rate": 1.2887891980861444e-05,
      "loss": 0.7619,
      "step": 657410
    },
    {
      "epoch": 2.304093198330337,
      "grad_norm": 2.6875,
      "learning_rate": 1.2887242952197742e-05,
      "loss": 0.7725,
      "step": 657420
    },
    {
      "epoch": 2.3041282458372323,
      "grad_norm": 2.953125,
      "learning_rate": 1.2886593923534041e-05,
      "loss": 0.7657,
      "step": 657430
    },
    {
      "epoch": 2.304163293344128,
      "grad_norm": 3.078125,
      "learning_rate": 1.288594489487034e-05,
      "loss": 0.7719,
      "step": 657440
    },
    {
      "epoch": 2.3041983408510234,
      "grad_norm": 2.71875,
      "learning_rate": 1.2885295866206637e-05,
      "loss": 0.7843,
      "step": 657450
    },
    {
      "epoch": 2.304233388357919,
      "grad_norm": 3.0,
      "learning_rate": 1.2884646837542935e-05,
      "loss": 0.7886,
      "step": 657460
    },
    {
      "epoch": 2.304268435864815,
      "grad_norm": 2.59375,
      "learning_rate": 1.2883997808879232e-05,
      "loss": 0.722,
      "step": 657470
    },
    {
      "epoch": 2.30430348337171,
      "grad_norm": 2.703125,
      "learning_rate": 1.288334878021553e-05,
      "loss": 0.8247,
      "step": 657480
    },
    {
      "epoch": 2.304338530878606,
      "grad_norm": 2.890625,
      "learning_rate": 1.2882699751551828e-05,
      "loss": 0.728,
      "step": 657490
    },
    {
      "epoch": 2.3043735783855017,
      "grad_norm": 2.828125,
      "learning_rate": 1.2882050722888126e-05,
      "loss": 0.7649,
      "step": 657500
    },
    {
      "epoch": 2.304408625892397,
      "grad_norm": 2.921875,
      "learning_rate": 1.2881401694224424e-05,
      "loss": 0.8647,
      "step": 657510
    },
    {
      "epoch": 2.304443673399293,
      "grad_norm": 2.53125,
      "learning_rate": 1.2880752665560722e-05,
      "loss": 0.7636,
      "step": 657520
    },
    {
      "epoch": 2.3044787209061885,
      "grad_norm": 2.84375,
      "learning_rate": 1.288010363689702e-05,
      "loss": 0.8088,
      "step": 657530
    },
    {
      "epoch": 2.304513768413084,
      "grad_norm": 3.546875,
      "learning_rate": 1.287945460823332e-05,
      "loss": 0.8778,
      "step": 657540
    },
    {
      "epoch": 2.3045488159199796,
      "grad_norm": 2.90625,
      "learning_rate": 1.2878805579569617e-05,
      "loss": 0.7479,
      "step": 657550
    },
    {
      "epoch": 2.304583863426875,
      "grad_norm": 2.78125,
      "learning_rate": 1.2878156550905915e-05,
      "loss": 0.7749,
      "step": 657560
    },
    {
      "epoch": 2.3046189109337707,
      "grad_norm": 2.875,
      "learning_rate": 1.2877507522242213e-05,
      "loss": 0.7788,
      "step": 657570
    },
    {
      "epoch": 2.3046539584406665,
      "grad_norm": 2.78125,
      "learning_rate": 1.2876858493578511e-05,
      "loss": 0.8662,
      "step": 657580
    },
    {
      "epoch": 2.3046890059475618,
      "grad_norm": 2.671875,
      "learning_rate": 1.287620946491481e-05,
      "loss": 0.8034,
      "step": 657590
    },
    {
      "epoch": 2.3047240534544575,
      "grad_norm": 2.890625,
      "learning_rate": 1.2875560436251107e-05,
      "loss": 0.8035,
      "step": 657600
    },
    {
      "epoch": 2.3047591009613533,
      "grad_norm": 2.4375,
      "learning_rate": 1.2874911407587407e-05,
      "loss": 0.8762,
      "step": 657610
    },
    {
      "epoch": 2.3047941484682486,
      "grad_norm": 3.203125,
      "learning_rate": 1.2874262378923705e-05,
      "loss": 0.8035,
      "step": 657620
    },
    {
      "epoch": 2.3048291959751444,
      "grad_norm": 2.765625,
      "learning_rate": 1.2873613350260003e-05,
      "loss": 0.7682,
      "step": 657630
    },
    {
      "epoch": 2.30486424348204,
      "grad_norm": 2.78125,
      "learning_rate": 1.2872964321596301e-05,
      "loss": 0.7822,
      "step": 657640
    },
    {
      "epoch": 2.3048992909889354,
      "grad_norm": 3.109375,
      "learning_rate": 1.2872315292932599e-05,
      "loss": 0.8107,
      "step": 657650
    },
    {
      "epoch": 2.304934338495831,
      "grad_norm": 3.015625,
      "learning_rate": 1.2871666264268895e-05,
      "loss": 0.7796,
      "step": 657660
    },
    {
      "epoch": 2.3049693860027265,
      "grad_norm": 3.703125,
      "learning_rate": 1.2871017235605193e-05,
      "loss": 0.7894,
      "step": 657670
    },
    {
      "epoch": 2.3050044335096223,
      "grad_norm": 2.71875,
      "learning_rate": 1.2870368206941491e-05,
      "loss": 0.7789,
      "step": 657680
    },
    {
      "epoch": 2.305039481016518,
      "grad_norm": 2.703125,
      "learning_rate": 1.286971917827779e-05,
      "loss": 0.8188,
      "step": 657690
    },
    {
      "epoch": 2.3050745285234133,
      "grad_norm": 3.125,
      "learning_rate": 1.2869070149614087e-05,
      "loss": 0.8304,
      "step": 657700
    },
    {
      "epoch": 2.305109576030309,
      "grad_norm": 3.46875,
      "learning_rate": 1.2868421120950385e-05,
      "loss": 0.8191,
      "step": 657710
    },
    {
      "epoch": 2.305144623537205,
      "grad_norm": 2.8125,
      "learning_rate": 1.2867772092286683e-05,
      "loss": 0.8433,
      "step": 657720
    },
    {
      "epoch": 2.3051796710441,
      "grad_norm": 2.734375,
      "learning_rate": 1.2867123063622983e-05,
      "loss": 0.747,
      "step": 657730
    },
    {
      "epoch": 2.305214718550996,
      "grad_norm": 2.484375,
      "learning_rate": 1.2866474034959281e-05,
      "loss": 0.7629,
      "step": 657740
    },
    {
      "epoch": 2.3052497660578917,
      "grad_norm": 2.828125,
      "learning_rate": 1.2865825006295579e-05,
      "loss": 0.8497,
      "step": 657750
    },
    {
      "epoch": 2.305284813564787,
      "grad_norm": 2.78125,
      "learning_rate": 1.2865175977631877e-05,
      "loss": 0.8362,
      "step": 657760
    },
    {
      "epoch": 2.3053198610716827,
      "grad_norm": 3.390625,
      "learning_rate": 1.2864526948968175e-05,
      "loss": 0.923,
      "step": 657770
    },
    {
      "epoch": 2.305354908578578,
      "grad_norm": 2.921875,
      "learning_rate": 1.2863877920304473e-05,
      "loss": 0.7859,
      "step": 657780
    },
    {
      "epoch": 2.305389956085474,
      "grad_norm": 2.875,
      "learning_rate": 1.2863228891640771e-05,
      "loss": 0.7878,
      "step": 657790
    },
    {
      "epoch": 2.3054250035923696,
      "grad_norm": 2.546875,
      "learning_rate": 1.286257986297707e-05,
      "loss": 0.7785,
      "step": 657800
    },
    {
      "epoch": 2.305460051099265,
      "grad_norm": 2.734375,
      "learning_rate": 1.2861930834313369e-05,
      "loss": 0.8252,
      "step": 657810
    },
    {
      "epoch": 2.3054950986061606,
      "grad_norm": 2.796875,
      "learning_rate": 1.2861281805649667e-05,
      "loss": 0.7936,
      "step": 657820
    },
    {
      "epoch": 2.3055301461130564,
      "grad_norm": 2.75,
      "learning_rate": 1.2860632776985965e-05,
      "loss": 0.8238,
      "step": 657830
    },
    {
      "epoch": 2.3055651936199517,
      "grad_norm": 3.09375,
      "learning_rate": 1.2859983748322261e-05,
      "loss": 0.7837,
      "step": 657840
    },
    {
      "epoch": 2.3056002411268475,
      "grad_norm": 3.171875,
      "learning_rate": 1.2859334719658559e-05,
      "loss": 0.8262,
      "step": 657850
    },
    {
      "epoch": 2.3056352886337432,
      "grad_norm": 3.359375,
      "learning_rate": 1.2858685690994857e-05,
      "loss": 0.822,
      "step": 657860
    },
    {
      "epoch": 2.3056703361406385,
      "grad_norm": 2.78125,
      "learning_rate": 1.2858036662331155e-05,
      "loss": 0.8092,
      "step": 657870
    },
    {
      "epoch": 2.3057053836475343,
      "grad_norm": 2.5625,
      "learning_rate": 1.2857387633667453e-05,
      "loss": 0.8042,
      "step": 657880
    },
    {
      "epoch": 2.3057404311544296,
      "grad_norm": 3.234375,
      "learning_rate": 1.2856738605003751e-05,
      "loss": 0.8257,
      "step": 657890
    },
    {
      "epoch": 2.3057754786613254,
      "grad_norm": 2.953125,
      "learning_rate": 1.2856089576340049e-05,
      "loss": 0.8105,
      "step": 657900
    },
    {
      "epoch": 2.305810526168221,
      "grad_norm": 3.09375,
      "learning_rate": 1.2855440547676349e-05,
      "loss": 0.8337,
      "step": 657910
    },
    {
      "epoch": 2.3058455736751164,
      "grad_norm": 2.671875,
      "learning_rate": 1.2854791519012647e-05,
      "loss": 0.791,
      "step": 657920
    },
    {
      "epoch": 2.305880621182012,
      "grad_norm": 2.84375,
      "learning_rate": 1.2854142490348945e-05,
      "loss": 0.8504,
      "step": 657930
    },
    {
      "epoch": 2.305915668688908,
      "grad_norm": 3.125,
      "learning_rate": 1.2853493461685243e-05,
      "loss": 0.835,
      "step": 657940
    },
    {
      "epoch": 2.3059507161958033,
      "grad_norm": 2.71875,
      "learning_rate": 1.285284443302154e-05,
      "loss": 0.7457,
      "step": 657950
    },
    {
      "epoch": 2.305985763702699,
      "grad_norm": 3.03125,
      "learning_rate": 1.2852195404357839e-05,
      "loss": 0.7656,
      "step": 657960
    },
    {
      "epoch": 2.306020811209595,
      "grad_norm": 2.6875,
      "learning_rate": 1.2851546375694137e-05,
      "loss": 0.8376,
      "step": 657970
    },
    {
      "epoch": 2.30605585871649,
      "grad_norm": 3.359375,
      "learning_rate": 1.2850897347030436e-05,
      "loss": 0.9096,
      "step": 657980
    },
    {
      "epoch": 2.306090906223386,
      "grad_norm": 3.484375,
      "learning_rate": 1.2850248318366734e-05,
      "loss": 0.9364,
      "step": 657990
    },
    {
      "epoch": 2.306125953730281,
      "grad_norm": 2.90625,
      "learning_rate": 1.2849599289703032e-05,
      "loss": 0.8588,
      "step": 658000
    },
    {
      "epoch": 2.306161001237177,
      "grad_norm": 6.8125,
      "learning_rate": 1.284895026103933e-05,
      "loss": 0.7862,
      "step": 658010
    },
    {
      "epoch": 2.3061960487440727,
      "grad_norm": 3.0,
      "learning_rate": 1.2848301232375628e-05,
      "loss": 0.8514,
      "step": 658020
    },
    {
      "epoch": 2.306231096250968,
      "grad_norm": 2.9375,
      "learning_rate": 1.2847652203711925e-05,
      "loss": 0.714,
      "step": 658030
    },
    {
      "epoch": 2.3062661437578638,
      "grad_norm": 2.828125,
      "learning_rate": 1.2847003175048223e-05,
      "loss": 0.8188,
      "step": 658040
    },
    {
      "epoch": 2.3063011912647595,
      "grad_norm": 2.90625,
      "learning_rate": 1.284635414638452e-05,
      "loss": 0.7816,
      "step": 658050
    },
    {
      "epoch": 2.306336238771655,
      "grad_norm": 2.609375,
      "learning_rate": 1.2845705117720819e-05,
      "loss": 0.7866,
      "step": 658060
    },
    {
      "epoch": 2.3063712862785506,
      "grad_norm": 3.578125,
      "learning_rate": 1.2845056089057117e-05,
      "loss": 0.8301,
      "step": 658070
    },
    {
      "epoch": 2.3064063337854463,
      "grad_norm": 2.859375,
      "learning_rate": 1.2844407060393415e-05,
      "loss": 0.7846,
      "step": 658080
    },
    {
      "epoch": 2.3064413812923417,
      "grad_norm": 2.96875,
      "learning_rate": 1.2843758031729714e-05,
      "loss": 0.7853,
      "step": 658090
    },
    {
      "epoch": 2.3064764287992374,
      "grad_norm": 3.203125,
      "learning_rate": 1.2843109003066012e-05,
      "loss": 0.8409,
      "step": 658100
    },
    {
      "epoch": 2.3065114763061327,
      "grad_norm": 3.15625,
      "learning_rate": 1.284245997440231e-05,
      "loss": 0.866,
      "step": 658110
    },
    {
      "epoch": 2.3065465238130285,
      "grad_norm": 2.875,
      "learning_rate": 1.2841810945738608e-05,
      "loss": 0.7543,
      "step": 658120
    },
    {
      "epoch": 2.3065815713199243,
      "grad_norm": 2.515625,
      "learning_rate": 1.2841161917074906e-05,
      "loss": 0.8605,
      "step": 658130
    },
    {
      "epoch": 2.3066166188268196,
      "grad_norm": 2.828125,
      "learning_rate": 1.2840512888411204e-05,
      "loss": 0.8481,
      "step": 658140
    },
    {
      "epoch": 2.3066516663337153,
      "grad_norm": 3.140625,
      "learning_rate": 1.2839863859747502e-05,
      "loss": 0.912,
      "step": 658150
    },
    {
      "epoch": 2.306686713840611,
      "grad_norm": 2.71875,
      "learning_rate": 1.2839214831083802e-05,
      "loss": 0.7525,
      "step": 658160
    },
    {
      "epoch": 2.3067217613475064,
      "grad_norm": 3.34375,
      "learning_rate": 1.28385658024201e-05,
      "loss": 0.854,
      "step": 658170
    },
    {
      "epoch": 2.306756808854402,
      "grad_norm": 2.984375,
      "learning_rate": 1.2837916773756398e-05,
      "loss": 0.8283,
      "step": 658180
    },
    {
      "epoch": 2.306791856361298,
      "grad_norm": 2.75,
      "learning_rate": 1.2837267745092696e-05,
      "loss": 0.8251,
      "step": 658190
    },
    {
      "epoch": 2.3068269038681932,
      "grad_norm": 2.671875,
      "learning_rate": 1.2836618716428994e-05,
      "loss": 0.7845,
      "step": 658200
    },
    {
      "epoch": 2.306861951375089,
      "grad_norm": 2.65625,
      "learning_rate": 1.2835969687765292e-05,
      "loss": 0.8158,
      "step": 658210
    },
    {
      "epoch": 2.3068969988819843,
      "grad_norm": 3.25,
      "learning_rate": 1.2835320659101588e-05,
      "loss": 0.8373,
      "step": 658220
    },
    {
      "epoch": 2.30693204638888,
      "grad_norm": 2.75,
      "learning_rate": 1.2834671630437886e-05,
      "loss": 0.7558,
      "step": 658230
    },
    {
      "epoch": 2.306967093895776,
      "grad_norm": 3.125,
      "learning_rate": 1.2834022601774184e-05,
      "loss": 0.7952,
      "step": 658240
    },
    {
      "epoch": 2.3070021414026716,
      "grad_norm": 2.90625,
      "learning_rate": 1.2833373573110482e-05,
      "loss": 0.7565,
      "step": 658250
    },
    {
      "epoch": 2.307037188909567,
      "grad_norm": 3.03125,
      "learning_rate": 1.283272454444678e-05,
      "loss": 0.8616,
      "step": 658260
    },
    {
      "epoch": 2.3070722364164626,
      "grad_norm": 2.765625,
      "learning_rate": 1.2832075515783078e-05,
      "loss": 0.8547,
      "step": 658270
    },
    {
      "epoch": 2.307107283923358,
      "grad_norm": 3.4375,
      "learning_rate": 1.2831426487119378e-05,
      "loss": 0.8597,
      "step": 658280
    },
    {
      "epoch": 2.3071423314302537,
      "grad_norm": 2.671875,
      "learning_rate": 1.2830777458455676e-05,
      "loss": 0.8254,
      "step": 658290
    },
    {
      "epoch": 2.3071773789371495,
      "grad_norm": 2.78125,
      "learning_rate": 1.2830128429791974e-05,
      "loss": 0.8254,
      "step": 658300
    },
    {
      "epoch": 2.307212426444045,
      "grad_norm": 2.984375,
      "learning_rate": 1.2829479401128272e-05,
      "loss": 0.7864,
      "step": 658310
    },
    {
      "epoch": 2.3072474739509405,
      "grad_norm": 3.078125,
      "learning_rate": 1.282883037246457e-05,
      "loss": 0.8453,
      "step": 658320
    },
    {
      "epoch": 2.307282521457836,
      "grad_norm": 2.78125,
      "learning_rate": 1.2828181343800868e-05,
      "loss": 0.7912,
      "step": 658330
    },
    {
      "epoch": 2.3073175689647316,
      "grad_norm": 2.90625,
      "learning_rate": 1.2827532315137166e-05,
      "loss": 0.8268,
      "step": 658340
    },
    {
      "epoch": 2.3073526164716274,
      "grad_norm": 2.96875,
      "learning_rate": 1.2826883286473466e-05,
      "loss": 0.8446,
      "step": 658350
    },
    {
      "epoch": 2.307387663978523,
      "grad_norm": 2.71875,
      "learning_rate": 1.2826234257809764e-05,
      "loss": 0.7715,
      "step": 658360
    },
    {
      "epoch": 2.3074227114854184,
      "grad_norm": 2.8125,
      "learning_rate": 1.2825585229146062e-05,
      "loss": 0.7599,
      "step": 658370
    },
    {
      "epoch": 2.307457758992314,
      "grad_norm": 3.109375,
      "learning_rate": 1.282493620048236e-05,
      "loss": 0.7658,
      "step": 658380
    },
    {
      "epoch": 2.3074928064992095,
      "grad_norm": 3.1875,
      "learning_rate": 1.2824287171818658e-05,
      "loss": 0.8123,
      "step": 658390
    },
    {
      "epoch": 2.3075278540061053,
      "grad_norm": 3.25,
      "learning_rate": 1.2823638143154956e-05,
      "loss": 0.8239,
      "step": 658400
    },
    {
      "epoch": 2.307562901513001,
      "grad_norm": 3.078125,
      "learning_rate": 1.2822989114491252e-05,
      "loss": 0.8171,
      "step": 658410
    },
    {
      "epoch": 2.3075979490198963,
      "grad_norm": 2.796875,
      "learning_rate": 1.282234008582755e-05,
      "loss": 0.7646,
      "step": 658420
    },
    {
      "epoch": 2.307632996526792,
      "grad_norm": 3.09375,
      "learning_rate": 1.2821691057163848e-05,
      "loss": 0.8449,
      "step": 658430
    },
    {
      "epoch": 2.3076680440336874,
      "grad_norm": 3.0625,
      "learning_rate": 1.2821042028500146e-05,
      "loss": 0.8904,
      "step": 658440
    },
    {
      "epoch": 2.307703091540583,
      "grad_norm": 3.1875,
      "learning_rate": 1.2820392999836444e-05,
      "loss": 0.8377,
      "step": 658450
    },
    {
      "epoch": 2.307738139047479,
      "grad_norm": 2.578125,
      "learning_rate": 1.2819743971172744e-05,
      "loss": 0.8177,
      "step": 658460
    },
    {
      "epoch": 2.3077731865543747,
      "grad_norm": 2.84375,
      "learning_rate": 1.2819094942509042e-05,
      "loss": 0.8432,
      "step": 658470
    },
    {
      "epoch": 2.30780823406127,
      "grad_norm": 2.828125,
      "learning_rate": 1.281844591384534e-05,
      "loss": 0.8046,
      "step": 658480
    },
    {
      "epoch": 2.3078432815681658,
      "grad_norm": 3.15625,
      "learning_rate": 1.2817796885181638e-05,
      "loss": 0.8689,
      "step": 658490
    },
    {
      "epoch": 2.307878329075061,
      "grad_norm": 2.609375,
      "learning_rate": 1.2817147856517936e-05,
      "loss": 0.8288,
      "step": 658500
    },
    {
      "epoch": 2.307913376581957,
      "grad_norm": 2.984375,
      "learning_rate": 1.2816498827854234e-05,
      "loss": 0.8506,
      "step": 658510
    },
    {
      "epoch": 2.3079484240888526,
      "grad_norm": 2.96875,
      "learning_rate": 1.2815849799190532e-05,
      "loss": 0.8036,
      "step": 658520
    },
    {
      "epoch": 2.307983471595748,
      "grad_norm": 3.375,
      "learning_rate": 1.2815200770526831e-05,
      "loss": 0.8948,
      "step": 658530
    },
    {
      "epoch": 2.3080185191026437,
      "grad_norm": 2.6875,
      "learning_rate": 1.281455174186313e-05,
      "loss": 0.8015,
      "step": 658540
    },
    {
      "epoch": 2.3080535666095394,
      "grad_norm": 2.921875,
      "learning_rate": 1.2813902713199427e-05,
      "loss": 0.7338,
      "step": 658550
    },
    {
      "epoch": 2.3080886141164347,
      "grad_norm": 3.296875,
      "learning_rate": 1.2813253684535725e-05,
      "loss": 0.8115,
      "step": 658560
    },
    {
      "epoch": 2.3081236616233305,
      "grad_norm": 2.75,
      "learning_rate": 1.2812604655872023e-05,
      "loss": 0.8234,
      "step": 658570
    },
    {
      "epoch": 2.3081587091302262,
      "grad_norm": 2.625,
      "learning_rate": 1.2811955627208321e-05,
      "loss": 0.8822,
      "step": 658580
    },
    {
      "epoch": 2.3081937566371216,
      "grad_norm": 3.015625,
      "learning_rate": 1.281130659854462e-05,
      "loss": 0.8363,
      "step": 658590
    },
    {
      "epoch": 2.3082288041440173,
      "grad_norm": 2.96875,
      "learning_rate": 1.2810657569880916e-05,
      "loss": 0.8545,
      "step": 658600
    },
    {
      "epoch": 2.3082638516509126,
      "grad_norm": 2.65625,
      "learning_rate": 1.2810008541217214e-05,
      "loss": 0.7663,
      "step": 658610
    },
    {
      "epoch": 2.3082988991578084,
      "grad_norm": 2.78125,
      "learning_rate": 1.2809359512553512e-05,
      "loss": 0.7635,
      "step": 658620
    },
    {
      "epoch": 2.308333946664704,
      "grad_norm": 3.0,
      "learning_rate": 1.280871048388981e-05,
      "loss": 0.8844,
      "step": 658630
    },
    {
      "epoch": 2.3083689941715995,
      "grad_norm": 2.625,
      "learning_rate": 1.280806145522611e-05,
      "loss": 0.7873,
      "step": 658640
    },
    {
      "epoch": 2.3084040416784952,
      "grad_norm": 3.09375,
      "learning_rate": 1.2807412426562407e-05,
      "loss": 0.8603,
      "step": 658650
    },
    {
      "epoch": 2.308439089185391,
      "grad_norm": 2.921875,
      "learning_rate": 1.2806763397898705e-05,
      "loss": 0.7522,
      "step": 658660
    },
    {
      "epoch": 2.3084741366922863,
      "grad_norm": 3.125,
      "learning_rate": 1.2806114369235003e-05,
      "loss": 0.8177,
      "step": 658670
    },
    {
      "epoch": 2.308509184199182,
      "grad_norm": 2.734375,
      "learning_rate": 1.2805465340571301e-05,
      "loss": 0.8596,
      "step": 658680
    },
    {
      "epoch": 2.308544231706078,
      "grad_norm": 3.125,
      "learning_rate": 1.28048163119076e-05,
      "loss": 0.8562,
      "step": 658690
    },
    {
      "epoch": 2.308579279212973,
      "grad_norm": 2.75,
      "learning_rate": 1.2804167283243897e-05,
      "loss": 0.8054,
      "step": 658700
    },
    {
      "epoch": 2.308614326719869,
      "grad_norm": 2.640625,
      "learning_rate": 1.2803518254580197e-05,
      "loss": 0.7986,
      "step": 658710
    },
    {
      "epoch": 2.308649374226764,
      "grad_norm": 2.6875,
      "learning_rate": 1.2802869225916495e-05,
      "loss": 0.7138,
      "step": 658720
    },
    {
      "epoch": 2.30868442173366,
      "grad_norm": 2.734375,
      "learning_rate": 1.2802220197252793e-05,
      "loss": 0.7846,
      "step": 658730
    },
    {
      "epoch": 2.3087194692405557,
      "grad_norm": 2.84375,
      "learning_rate": 1.2801571168589091e-05,
      "loss": 0.8285,
      "step": 658740
    },
    {
      "epoch": 2.308754516747451,
      "grad_norm": 2.421875,
      "learning_rate": 1.2800922139925389e-05,
      "loss": 0.8,
      "step": 658750
    },
    {
      "epoch": 2.308789564254347,
      "grad_norm": 2.765625,
      "learning_rate": 1.2800273111261687e-05,
      "loss": 0.8044,
      "step": 658760
    },
    {
      "epoch": 2.3088246117612425,
      "grad_norm": 3.1875,
      "learning_rate": 1.2799624082597985e-05,
      "loss": 0.8259,
      "step": 658770
    },
    {
      "epoch": 2.308859659268138,
      "grad_norm": 3.453125,
      "learning_rate": 1.2798975053934281e-05,
      "loss": 0.885,
      "step": 658780
    },
    {
      "epoch": 2.3088947067750336,
      "grad_norm": 2.984375,
      "learning_rate": 1.279832602527058e-05,
      "loss": 0.8862,
      "step": 658790
    },
    {
      "epoch": 2.3089297542819294,
      "grad_norm": 2.5625,
      "learning_rate": 1.2797676996606877e-05,
      "loss": 0.8442,
      "step": 658800
    },
    {
      "epoch": 2.3089648017888247,
      "grad_norm": 2.8125,
      "learning_rate": 1.2797027967943175e-05,
      "loss": 0.7692,
      "step": 658810
    },
    {
      "epoch": 2.3089998492957204,
      "grad_norm": 3.015625,
      "learning_rate": 1.2796378939279473e-05,
      "loss": 0.8224,
      "step": 658820
    },
    {
      "epoch": 2.3090348968026158,
      "grad_norm": 2.71875,
      "learning_rate": 1.2795729910615773e-05,
      "loss": 0.8745,
      "step": 658830
    },
    {
      "epoch": 2.3090699443095115,
      "grad_norm": 2.8125,
      "learning_rate": 1.2795080881952071e-05,
      "loss": 0.7146,
      "step": 658840
    },
    {
      "epoch": 2.3091049918164073,
      "grad_norm": 3.125,
      "learning_rate": 1.2794431853288369e-05,
      "loss": 0.784,
      "step": 658850
    },
    {
      "epoch": 2.3091400393233026,
      "grad_norm": 2.4375,
      "learning_rate": 1.2793782824624667e-05,
      "loss": 0.7468,
      "step": 658860
    },
    {
      "epoch": 2.3091750868301983,
      "grad_norm": 3.4375,
      "learning_rate": 1.2793133795960965e-05,
      "loss": 0.9033,
      "step": 658870
    },
    {
      "epoch": 2.309210134337094,
      "grad_norm": 2.640625,
      "learning_rate": 1.2792484767297263e-05,
      "loss": 0.8303,
      "step": 658880
    },
    {
      "epoch": 2.3092451818439894,
      "grad_norm": 3.015625,
      "learning_rate": 1.2791835738633561e-05,
      "loss": 0.8921,
      "step": 658890
    },
    {
      "epoch": 2.309280229350885,
      "grad_norm": 3.09375,
      "learning_rate": 1.279118670996986e-05,
      "loss": 0.8491,
      "step": 658900
    },
    {
      "epoch": 2.309315276857781,
      "grad_norm": 3.296875,
      "learning_rate": 1.2790537681306159e-05,
      "loss": 0.8363,
      "step": 658910
    },
    {
      "epoch": 2.3093503243646762,
      "grad_norm": 2.875,
      "learning_rate": 1.2789888652642457e-05,
      "loss": 0.8401,
      "step": 658920
    },
    {
      "epoch": 2.309385371871572,
      "grad_norm": 2.625,
      "learning_rate": 1.2789239623978755e-05,
      "loss": 0.7763,
      "step": 658930
    },
    {
      "epoch": 2.3094204193784673,
      "grad_norm": 2.4375,
      "learning_rate": 1.2788590595315053e-05,
      "loss": 0.8506,
      "step": 658940
    },
    {
      "epoch": 2.309455466885363,
      "grad_norm": 2.703125,
      "learning_rate": 1.278794156665135e-05,
      "loss": 0.8085,
      "step": 658950
    },
    {
      "epoch": 2.309490514392259,
      "grad_norm": 2.9375,
      "learning_rate": 1.2787292537987649e-05,
      "loss": 0.8699,
      "step": 658960
    },
    {
      "epoch": 2.309525561899154,
      "grad_norm": 2.921875,
      "learning_rate": 1.2786643509323945e-05,
      "loss": 0.7766,
      "step": 658970
    },
    {
      "epoch": 2.30956060940605,
      "grad_norm": 2.984375,
      "learning_rate": 1.2785994480660243e-05,
      "loss": 0.7774,
      "step": 658980
    },
    {
      "epoch": 2.3095956569129457,
      "grad_norm": 2.90625,
      "learning_rate": 1.2785345451996541e-05,
      "loss": 0.7929,
      "step": 658990
    },
    {
      "epoch": 2.309630704419841,
      "grad_norm": 2.8125,
      "learning_rate": 1.2784696423332839e-05,
      "loss": 0.7491,
      "step": 659000
    },
    {
      "epoch": 2.3096657519267367,
      "grad_norm": 2.6875,
      "learning_rate": 1.2784047394669139e-05,
      "loss": 0.7617,
      "step": 659010
    },
    {
      "epoch": 2.3097007994336325,
      "grad_norm": 3.359375,
      "learning_rate": 1.2783398366005437e-05,
      "loss": 0.8469,
      "step": 659020
    },
    {
      "epoch": 2.309735846940528,
      "grad_norm": 2.8125,
      "learning_rate": 1.2782749337341735e-05,
      "loss": 0.6503,
      "step": 659030
    },
    {
      "epoch": 2.3097708944474236,
      "grad_norm": 2.625,
      "learning_rate": 1.2782100308678033e-05,
      "loss": 0.7581,
      "step": 659040
    },
    {
      "epoch": 2.309805941954319,
      "grad_norm": 3.0,
      "learning_rate": 1.278145128001433e-05,
      "loss": 0.7774,
      "step": 659050
    },
    {
      "epoch": 2.3098409894612146,
      "grad_norm": 2.96875,
      "learning_rate": 1.2780802251350629e-05,
      "loss": 0.8008,
      "step": 659060
    },
    {
      "epoch": 2.3098760369681104,
      "grad_norm": 3.046875,
      "learning_rate": 1.2780153222686927e-05,
      "loss": 0.8313,
      "step": 659070
    },
    {
      "epoch": 2.3099110844750057,
      "grad_norm": 2.578125,
      "learning_rate": 1.2779504194023226e-05,
      "loss": 0.6971,
      "step": 659080
    },
    {
      "epoch": 2.3099461319819015,
      "grad_norm": 3.015625,
      "learning_rate": 1.2778855165359524e-05,
      "loss": 0.8451,
      "step": 659090
    },
    {
      "epoch": 2.309981179488797,
      "grad_norm": 3.25,
      "learning_rate": 1.2778206136695822e-05,
      "loss": 0.8426,
      "step": 659100
    },
    {
      "epoch": 2.3100162269956925,
      "grad_norm": 3.046875,
      "learning_rate": 1.277755710803212e-05,
      "loss": 0.8481,
      "step": 659110
    },
    {
      "epoch": 2.3100512745025883,
      "grad_norm": 2.859375,
      "learning_rate": 1.2776908079368418e-05,
      "loss": 0.7801,
      "step": 659120
    },
    {
      "epoch": 2.310086322009484,
      "grad_norm": 2.90625,
      "learning_rate": 1.2776259050704716e-05,
      "loss": 0.7684,
      "step": 659130
    },
    {
      "epoch": 2.3101213695163794,
      "grad_norm": 2.953125,
      "learning_rate": 1.2775610022041014e-05,
      "loss": 0.7953,
      "step": 659140
    },
    {
      "epoch": 2.310156417023275,
      "grad_norm": 2.8125,
      "learning_rate": 1.2774960993377314e-05,
      "loss": 0.7918,
      "step": 659150
    },
    {
      "epoch": 2.3101914645301704,
      "grad_norm": 3.03125,
      "learning_rate": 1.2774311964713609e-05,
      "loss": 0.8658,
      "step": 659160
    },
    {
      "epoch": 2.310226512037066,
      "grad_norm": 2.84375,
      "learning_rate": 1.2773662936049907e-05,
      "loss": 0.8525,
      "step": 659170
    },
    {
      "epoch": 2.310261559543962,
      "grad_norm": 3.03125,
      "learning_rate": 1.2773013907386205e-05,
      "loss": 0.8872,
      "step": 659180
    },
    {
      "epoch": 2.3102966070508573,
      "grad_norm": 2.78125,
      "learning_rate": 1.2772364878722504e-05,
      "loss": 0.8431,
      "step": 659190
    },
    {
      "epoch": 2.310331654557753,
      "grad_norm": 2.9375,
      "learning_rate": 1.2771715850058802e-05,
      "loss": 0.8615,
      "step": 659200
    },
    {
      "epoch": 2.310366702064649,
      "grad_norm": 2.765625,
      "learning_rate": 1.27710668213951e-05,
      "loss": 0.8081,
      "step": 659210
    },
    {
      "epoch": 2.310401749571544,
      "grad_norm": 3.078125,
      "learning_rate": 1.2770417792731398e-05,
      "loss": 0.9394,
      "step": 659220
    },
    {
      "epoch": 2.31043679707844,
      "grad_norm": 3.03125,
      "learning_rate": 1.2769768764067696e-05,
      "loss": 0.8249,
      "step": 659230
    },
    {
      "epoch": 2.3104718445853356,
      "grad_norm": 3.078125,
      "learning_rate": 1.2769119735403994e-05,
      "loss": 0.8097,
      "step": 659240
    },
    {
      "epoch": 2.310506892092231,
      "grad_norm": 3.03125,
      "learning_rate": 1.2768470706740292e-05,
      "loss": 0.7459,
      "step": 659250
    },
    {
      "epoch": 2.3105419395991267,
      "grad_norm": 3.140625,
      "learning_rate": 1.2767821678076592e-05,
      "loss": 0.8144,
      "step": 659260
    },
    {
      "epoch": 2.310576987106022,
      "grad_norm": 2.9375,
      "learning_rate": 1.276717264941289e-05,
      "loss": 0.7269,
      "step": 659270
    },
    {
      "epoch": 2.3106120346129178,
      "grad_norm": 2.546875,
      "learning_rate": 1.2766523620749188e-05,
      "loss": 0.8182,
      "step": 659280
    },
    {
      "epoch": 2.3106470821198135,
      "grad_norm": 2.734375,
      "learning_rate": 1.2765874592085486e-05,
      "loss": 0.8288,
      "step": 659290
    },
    {
      "epoch": 2.310682129626709,
      "grad_norm": 3.34375,
      "learning_rate": 1.2765225563421784e-05,
      "loss": 0.8343,
      "step": 659300
    },
    {
      "epoch": 2.3107171771336046,
      "grad_norm": 2.859375,
      "learning_rate": 1.2764576534758082e-05,
      "loss": 0.7814,
      "step": 659310
    },
    {
      "epoch": 2.3107522246405003,
      "grad_norm": 3.21875,
      "learning_rate": 1.276392750609438e-05,
      "loss": 0.8727,
      "step": 659320
    },
    {
      "epoch": 2.3107872721473957,
      "grad_norm": 2.75,
      "learning_rate": 1.276327847743068e-05,
      "loss": 0.836,
      "step": 659330
    },
    {
      "epoch": 2.3108223196542914,
      "grad_norm": 3.125,
      "learning_rate": 1.2762629448766978e-05,
      "loss": 0.8138,
      "step": 659340
    },
    {
      "epoch": 2.310857367161187,
      "grad_norm": 2.90625,
      "learning_rate": 1.2761980420103272e-05,
      "loss": 0.738,
      "step": 659350
    },
    {
      "epoch": 2.3108924146680825,
      "grad_norm": 3.171875,
      "learning_rate": 1.276133139143957e-05,
      "loss": 0.8012,
      "step": 659360
    },
    {
      "epoch": 2.3109274621749782,
      "grad_norm": 3.109375,
      "learning_rate": 1.2760682362775868e-05,
      "loss": 0.8927,
      "step": 659370
    },
    {
      "epoch": 2.3109625096818736,
      "grad_norm": 3.21875,
      "learning_rate": 1.2760033334112168e-05,
      "loss": 0.8518,
      "step": 659380
    },
    {
      "epoch": 2.3109975571887693,
      "grad_norm": 2.9375,
      "learning_rate": 1.2759384305448466e-05,
      "loss": 0.9163,
      "step": 659390
    },
    {
      "epoch": 2.311032604695665,
      "grad_norm": 2.3125,
      "learning_rate": 1.2758735276784764e-05,
      "loss": 0.9017,
      "step": 659400
    },
    {
      "epoch": 2.3110676522025604,
      "grad_norm": 2.765625,
      "learning_rate": 1.2758086248121062e-05,
      "loss": 0.7698,
      "step": 659410
    },
    {
      "epoch": 2.311102699709456,
      "grad_norm": 3.171875,
      "learning_rate": 1.275743721945736e-05,
      "loss": 0.8265,
      "step": 659420
    },
    {
      "epoch": 2.311137747216352,
      "grad_norm": 3.15625,
      "learning_rate": 1.2756788190793658e-05,
      "loss": 0.7535,
      "step": 659430
    },
    {
      "epoch": 2.311172794723247,
      "grad_norm": 2.515625,
      "learning_rate": 1.2756139162129956e-05,
      "loss": 0.8113,
      "step": 659440
    },
    {
      "epoch": 2.311207842230143,
      "grad_norm": 3.078125,
      "learning_rate": 1.2755490133466256e-05,
      "loss": 0.8721,
      "step": 659450
    },
    {
      "epoch": 2.3112428897370387,
      "grad_norm": 3.375,
      "learning_rate": 1.2754841104802554e-05,
      "loss": 0.8081,
      "step": 659460
    },
    {
      "epoch": 2.311277937243934,
      "grad_norm": 3.171875,
      "learning_rate": 1.2754192076138852e-05,
      "loss": 0.853,
      "step": 659470
    },
    {
      "epoch": 2.31131298475083,
      "grad_norm": 2.8125,
      "learning_rate": 1.275354304747515e-05,
      "loss": 0.7747,
      "step": 659480
    },
    {
      "epoch": 2.311348032257725,
      "grad_norm": 3.140625,
      "learning_rate": 1.2752894018811448e-05,
      "loss": 0.8028,
      "step": 659490
    },
    {
      "epoch": 2.311383079764621,
      "grad_norm": 2.765625,
      "learning_rate": 1.2752244990147746e-05,
      "loss": 0.8044,
      "step": 659500
    },
    {
      "epoch": 2.3114181272715166,
      "grad_norm": 2.8125,
      "learning_rate": 1.2751595961484044e-05,
      "loss": 0.7742,
      "step": 659510
    },
    {
      "epoch": 2.311453174778412,
      "grad_norm": 2.65625,
      "learning_rate": 1.2750946932820344e-05,
      "loss": 0.8303,
      "step": 659520
    },
    {
      "epoch": 2.3114882222853077,
      "grad_norm": 2.625,
      "learning_rate": 1.2750297904156642e-05,
      "loss": 0.8007,
      "step": 659530
    },
    {
      "epoch": 2.3115232697922035,
      "grad_norm": 2.6875,
      "learning_rate": 1.2749648875492936e-05,
      "loss": 0.8306,
      "step": 659540
    },
    {
      "epoch": 2.3115583172990988,
      "grad_norm": 2.40625,
      "learning_rate": 1.2748999846829234e-05,
      "loss": 0.8251,
      "step": 659550
    },
    {
      "epoch": 2.3115933648059945,
      "grad_norm": 2.8125,
      "learning_rate": 1.2748350818165534e-05,
      "loss": 0.7541,
      "step": 659560
    },
    {
      "epoch": 2.3116284123128903,
      "grad_norm": 2.71875,
      "learning_rate": 1.2747701789501832e-05,
      "loss": 0.791,
      "step": 659570
    },
    {
      "epoch": 2.3116634598197856,
      "grad_norm": 3.015625,
      "learning_rate": 1.274705276083813e-05,
      "loss": 0.8154,
      "step": 659580
    },
    {
      "epoch": 2.3116985073266814,
      "grad_norm": 3.375,
      "learning_rate": 1.2746403732174428e-05,
      "loss": 0.8837,
      "step": 659590
    },
    {
      "epoch": 2.3117335548335767,
      "grad_norm": 2.703125,
      "learning_rate": 1.2745754703510726e-05,
      "loss": 0.742,
      "step": 659600
    },
    {
      "epoch": 2.3117686023404724,
      "grad_norm": 2.640625,
      "learning_rate": 1.2745105674847024e-05,
      "loss": 0.7897,
      "step": 659610
    },
    {
      "epoch": 2.311803649847368,
      "grad_norm": 2.9375,
      "learning_rate": 1.2744456646183322e-05,
      "loss": 0.7237,
      "step": 659620
    },
    {
      "epoch": 2.311838697354264,
      "grad_norm": 2.84375,
      "learning_rate": 1.2743807617519622e-05,
      "loss": 0.8572,
      "step": 659630
    },
    {
      "epoch": 2.3118737448611593,
      "grad_norm": 2.65625,
      "learning_rate": 1.274315858885592e-05,
      "loss": 0.7355,
      "step": 659640
    },
    {
      "epoch": 2.311908792368055,
      "grad_norm": 2.671875,
      "learning_rate": 1.2742509560192218e-05,
      "loss": 0.7953,
      "step": 659650
    },
    {
      "epoch": 2.3119438398749503,
      "grad_norm": 2.78125,
      "learning_rate": 1.2741860531528516e-05,
      "loss": 0.8141,
      "step": 659660
    },
    {
      "epoch": 2.311978887381846,
      "grad_norm": 3.34375,
      "learning_rate": 1.2741211502864814e-05,
      "loss": 0.8225,
      "step": 659670
    },
    {
      "epoch": 2.312013934888742,
      "grad_norm": 3.171875,
      "learning_rate": 1.2740562474201112e-05,
      "loss": 0.8354,
      "step": 659680
    },
    {
      "epoch": 2.312048982395637,
      "grad_norm": 2.78125,
      "learning_rate": 1.273991344553741e-05,
      "loss": 0.8075,
      "step": 659690
    },
    {
      "epoch": 2.312084029902533,
      "grad_norm": 3.0625,
      "learning_rate": 1.273926441687371e-05,
      "loss": 0.8308,
      "step": 659700
    },
    {
      "epoch": 2.3121190774094282,
      "grad_norm": 2.875,
      "learning_rate": 1.2738615388210007e-05,
      "loss": 0.7485,
      "step": 659710
    },
    {
      "epoch": 2.312154124916324,
      "grad_norm": 2.65625,
      "learning_rate": 1.2737966359546302e-05,
      "loss": 0.8221,
      "step": 659720
    },
    {
      "epoch": 2.3121891724232198,
      "grad_norm": 3.09375,
      "learning_rate": 1.27373173308826e-05,
      "loss": 0.8469,
      "step": 659730
    },
    {
      "epoch": 2.3122242199301155,
      "grad_norm": 3.21875,
      "learning_rate": 1.27366683022189e-05,
      "loss": 0.7436,
      "step": 659740
    },
    {
      "epoch": 2.312259267437011,
      "grad_norm": 2.984375,
      "learning_rate": 1.2736019273555198e-05,
      "loss": 0.8795,
      "step": 659750
    },
    {
      "epoch": 2.3122943149439066,
      "grad_norm": 2.96875,
      "learning_rate": 1.2735370244891496e-05,
      "loss": 0.8333,
      "step": 659760
    },
    {
      "epoch": 2.312329362450802,
      "grad_norm": 2.96875,
      "learning_rate": 1.2734721216227794e-05,
      "loss": 0.8487,
      "step": 659770
    },
    {
      "epoch": 2.3123644099576977,
      "grad_norm": 2.96875,
      "learning_rate": 1.2734072187564092e-05,
      "loss": 0.8347,
      "step": 659780
    },
    {
      "epoch": 2.3123994574645934,
      "grad_norm": 3.15625,
      "learning_rate": 1.273342315890039e-05,
      "loss": 0.8243,
      "step": 659790
    },
    {
      "epoch": 2.3124345049714887,
      "grad_norm": 2.75,
      "learning_rate": 1.2732774130236688e-05,
      "loss": 0.7918,
      "step": 659800
    },
    {
      "epoch": 2.3124695524783845,
      "grad_norm": 2.78125,
      "learning_rate": 1.2732125101572987e-05,
      "loss": 0.806,
      "step": 659810
    },
    {
      "epoch": 2.3125045999852802,
      "grad_norm": 2.515625,
      "learning_rate": 1.2731476072909285e-05,
      "loss": 0.7642,
      "step": 659820
    },
    {
      "epoch": 2.3125396474921756,
      "grad_norm": 2.875,
      "learning_rate": 1.2730827044245583e-05,
      "loss": 0.7925,
      "step": 659830
    },
    {
      "epoch": 2.3125746949990713,
      "grad_norm": 3.203125,
      "learning_rate": 1.2730178015581881e-05,
      "loss": 0.7717,
      "step": 659840
    },
    {
      "epoch": 2.312609742505967,
      "grad_norm": 2.6875,
      "learning_rate": 1.272952898691818e-05,
      "loss": 0.7964,
      "step": 659850
    },
    {
      "epoch": 2.3126447900128624,
      "grad_norm": 3.1875,
      "learning_rate": 1.2728879958254477e-05,
      "loss": 0.753,
      "step": 659860
    },
    {
      "epoch": 2.312679837519758,
      "grad_norm": 2.890625,
      "learning_rate": 1.2728230929590775e-05,
      "loss": 0.746,
      "step": 659870
    },
    {
      "epoch": 2.3127148850266535,
      "grad_norm": 2.984375,
      "learning_rate": 1.2727581900927075e-05,
      "loss": 0.8041,
      "step": 659880
    },
    {
      "epoch": 2.312749932533549,
      "grad_norm": 2.671875,
      "learning_rate": 1.2726932872263373e-05,
      "loss": 0.8144,
      "step": 659890
    },
    {
      "epoch": 2.312784980040445,
      "grad_norm": 3.171875,
      "learning_rate": 1.2726283843599671e-05,
      "loss": 0.8477,
      "step": 659900
    },
    {
      "epoch": 2.3128200275473403,
      "grad_norm": 3.203125,
      "learning_rate": 1.2725634814935966e-05,
      "loss": 0.798,
      "step": 659910
    },
    {
      "epoch": 2.312855075054236,
      "grad_norm": 3.046875,
      "learning_rate": 1.2724985786272264e-05,
      "loss": 0.8246,
      "step": 659920
    },
    {
      "epoch": 2.312890122561132,
      "grad_norm": 3.203125,
      "learning_rate": 1.2724336757608563e-05,
      "loss": 0.7643,
      "step": 659930
    },
    {
      "epoch": 2.312925170068027,
      "grad_norm": 2.8125,
      "learning_rate": 1.2723687728944861e-05,
      "loss": 0.8694,
      "step": 659940
    },
    {
      "epoch": 2.312960217574923,
      "grad_norm": 2.84375,
      "learning_rate": 1.272303870028116e-05,
      "loss": 0.906,
      "step": 659950
    },
    {
      "epoch": 2.3129952650818186,
      "grad_norm": 2.65625,
      "learning_rate": 1.2722389671617457e-05,
      "loss": 0.7694,
      "step": 659960
    },
    {
      "epoch": 2.313030312588714,
      "grad_norm": 2.671875,
      "learning_rate": 1.2721740642953755e-05,
      "loss": 0.8277,
      "step": 659970
    },
    {
      "epoch": 2.3130653600956097,
      "grad_norm": 2.484375,
      "learning_rate": 1.2721091614290053e-05,
      "loss": 0.8258,
      "step": 659980
    },
    {
      "epoch": 2.313100407602505,
      "grad_norm": 2.515625,
      "learning_rate": 1.2720442585626351e-05,
      "loss": 0.7655,
      "step": 659990
    },
    {
      "epoch": 2.3131354551094008,
      "grad_norm": 3.21875,
      "learning_rate": 1.2719793556962651e-05,
      "loss": 0.8012,
      "step": 660000
    },
    {
      "epoch": 2.3131354551094008,
      "eval_loss": 0.7608261704444885,
      "eval_runtime": 562.7428,
      "eval_samples_per_second": 676.039,
      "eval_steps_per_second": 56.337,
      "step": 660000
    },
    {
      "epoch": 2.3131705026162965,
      "grad_norm": 2.515625,
      "learning_rate": 1.2719144528298949e-05,
      "loss": 0.943,
      "step": 660010
    },
    {
      "epoch": 2.313205550123192,
      "grad_norm": 2.703125,
      "learning_rate": 1.2718495499635247e-05,
      "loss": 0.7438,
      "step": 660020
    },
    {
      "epoch": 2.3132405976300876,
      "grad_norm": 2.828125,
      "learning_rate": 1.2717846470971545e-05,
      "loss": 0.7806,
      "step": 660030
    },
    {
      "epoch": 2.3132756451369834,
      "grad_norm": 2.875,
      "learning_rate": 1.2717197442307843e-05,
      "loss": 0.7529,
      "step": 660040
    },
    {
      "epoch": 2.3133106926438787,
      "grad_norm": 2.4375,
      "learning_rate": 1.2716548413644141e-05,
      "loss": 0.788,
      "step": 660050
    },
    {
      "epoch": 2.3133457401507744,
      "grad_norm": 3.34375,
      "learning_rate": 1.2715899384980439e-05,
      "loss": 0.8866,
      "step": 660060
    },
    {
      "epoch": 2.31338078765767,
      "grad_norm": 2.890625,
      "learning_rate": 1.2715250356316739e-05,
      "loss": 0.824,
      "step": 660070
    },
    {
      "epoch": 2.3134158351645655,
      "grad_norm": 2.84375,
      "learning_rate": 1.2714601327653037e-05,
      "loss": 0.8303,
      "step": 660080
    },
    {
      "epoch": 2.3134508826714613,
      "grad_norm": 2.671875,
      "learning_rate": 1.2713952298989335e-05,
      "loss": 0.7714,
      "step": 660090
    },
    {
      "epoch": 2.3134859301783566,
      "grad_norm": 2.453125,
      "learning_rate": 1.271330327032563e-05,
      "loss": 0.7862,
      "step": 660100
    },
    {
      "epoch": 2.3135209776852523,
      "grad_norm": 2.875,
      "learning_rate": 1.2712654241661929e-05,
      "loss": 0.8043,
      "step": 660110
    },
    {
      "epoch": 2.313556025192148,
      "grad_norm": 3.015625,
      "learning_rate": 1.2712005212998227e-05,
      "loss": 0.8248,
      "step": 660120
    },
    {
      "epoch": 2.3135910726990434,
      "grad_norm": 2.671875,
      "learning_rate": 1.2711356184334525e-05,
      "loss": 0.7736,
      "step": 660130
    },
    {
      "epoch": 2.313626120205939,
      "grad_norm": 3.21875,
      "learning_rate": 1.2710707155670823e-05,
      "loss": 0.8343,
      "step": 660140
    },
    {
      "epoch": 2.313661167712835,
      "grad_norm": 2.671875,
      "learning_rate": 1.2710058127007121e-05,
      "loss": 0.8015,
      "step": 660150
    },
    {
      "epoch": 2.3136962152197302,
      "grad_norm": 2.875,
      "learning_rate": 1.2709409098343419e-05,
      "loss": 0.8058,
      "step": 660160
    },
    {
      "epoch": 2.313731262726626,
      "grad_norm": 2.9375,
      "learning_rate": 1.2708760069679717e-05,
      "loss": 0.7499,
      "step": 660170
    },
    {
      "epoch": 2.3137663102335218,
      "grad_norm": 2.53125,
      "learning_rate": 1.2708111041016017e-05,
      "loss": 0.8102,
      "step": 660180
    },
    {
      "epoch": 2.313801357740417,
      "grad_norm": 2.390625,
      "learning_rate": 1.2707462012352315e-05,
      "loss": 0.8038,
      "step": 660190
    },
    {
      "epoch": 2.313836405247313,
      "grad_norm": 2.734375,
      "learning_rate": 1.2706812983688613e-05,
      "loss": 0.7611,
      "step": 660200
    },
    {
      "epoch": 2.313871452754208,
      "grad_norm": 2.78125,
      "learning_rate": 1.270616395502491e-05,
      "loss": 0.9061,
      "step": 660210
    },
    {
      "epoch": 2.313906500261104,
      "grad_norm": 3.1875,
      "learning_rate": 1.2705514926361209e-05,
      "loss": 0.8696,
      "step": 660220
    },
    {
      "epoch": 2.3139415477679997,
      "grad_norm": 3.25,
      "learning_rate": 1.2704865897697507e-05,
      "loss": 0.8233,
      "step": 660230
    },
    {
      "epoch": 2.313976595274895,
      "grad_norm": 3.1875,
      "learning_rate": 1.2704216869033805e-05,
      "loss": 0.7913,
      "step": 660240
    },
    {
      "epoch": 2.3140116427817907,
      "grad_norm": 2.921875,
      "learning_rate": 1.2703567840370104e-05,
      "loss": 0.8299,
      "step": 660250
    },
    {
      "epoch": 2.3140466902886865,
      "grad_norm": 3.09375,
      "learning_rate": 1.2702918811706402e-05,
      "loss": 0.8671,
      "step": 660260
    },
    {
      "epoch": 2.314081737795582,
      "grad_norm": 3.046875,
      "learning_rate": 1.27022697830427e-05,
      "loss": 0.7738,
      "step": 660270
    },
    {
      "epoch": 2.3141167853024776,
      "grad_norm": 3.375,
      "learning_rate": 1.2701620754378998e-05,
      "loss": 0.8873,
      "step": 660280
    },
    {
      "epoch": 2.3141518328093733,
      "grad_norm": 2.625,
      "learning_rate": 1.2700971725715295e-05,
      "loss": 0.8471,
      "step": 660290
    },
    {
      "epoch": 2.3141868803162686,
      "grad_norm": 2.71875,
      "learning_rate": 1.2700322697051593e-05,
      "loss": 0.8231,
      "step": 660300
    },
    {
      "epoch": 2.3142219278231644,
      "grad_norm": 2.75,
      "learning_rate": 1.269967366838789e-05,
      "loss": 0.8484,
      "step": 660310
    },
    {
      "epoch": 2.3142569753300597,
      "grad_norm": 2.515625,
      "learning_rate": 1.2699024639724189e-05,
      "loss": 0.7906,
      "step": 660320
    },
    {
      "epoch": 2.3142920228369555,
      "grad_norm": 2.96875,
      "learning_rate": 1.2698375611060487e-05,
      "loss": 0.7872,
      "step": 660330
    },
    {
      "epoch": 2.314327070343851,
      "grad_norm": 2.90625,
      "learning_rate": 1.2697726582396785e-05,
      "loss": 0.8411,
      "step": 660340
    },
    {
      "epoch": 2.3143621178507465,
      "grad_norm": 3.15625,
      "learning_rate": 1.2697077553733083e-05,
      "loss": 0.7379,
      "step": 660350
    },
    {
      "epoch": 2.3143971653576423,
      "grad_norm": 2.71875,
      "learning_rate": 1.2696428525069382e-05,
      "loss": 0.9375,
      "step": 660360
    },
    {
      "epoch": 2.314432212864538,
      "grad_norm": 3.328125,
      "learning_rate": 1.269577949640568e-05,
      "loss": 0.868,
      "step": 660370
    },
    {
      "epoch": 2.3144672603714334,
      "grad_norm": 3.15625,
      "learning_rate": 1.2695130467741978e-05,
      "loss": 0.7558,
      "step": 660380
    },
    {
      "epoch": 2.314502307878329,
      "grad_norm": 3.46875,
      "learning_rate": 1.2694481439078276e-05,
      "loss": 0.803,
      "step": 660390
    },
    {
      "epoch": 2.314537355385225,
      "grad_norm": 3.265625,
      "learning_rate": 1.2693832410414574e-05,
      "loss": 0.803,
      "step": 660400
    },
    {
      "epoch": 2.31457240289212,
      "grad_norm": 2.84375,
      "learning_rate": 1.2693183381750872e-05,
      "loss": 0.7636,
      "step": 660410
    },
    {
      "epoch": 2.314607450399016,
      "grad_norm": 3.0625,
      "learning_rate": 1.269253435308717e-05,
      "loss": 0.7932,
      "step": 660420
    },
    {
      "epoch": 2.3146424979059113,
      "grad_norm": 2.59375,
      "learning_rate": 1.269188532442347e-05,
      "loss": 0.7398,
      "step": 660430
    },
    {
      "epoch": 2.314677545412807,
      "grad_norm": 2.921875,
      "learning_rate": 1.2691236295759768e-05,
      "loss": 0.8057,
      "step": 660440
    },
    {
      "epoch": 2.3147125929197028,
      "grad_norm": 2.53125,
      "learning_rate": 1.2690587267096066e-05,
      "loss": 0.745,
      "step": 660450
    },
    {
      "epoch": 2.314747640426598,
      "grad_norm": 2.890625,
      "learning_rate": 1.2689938238432364e-05,
      "loss": 0.8561,
      "step": 660460
    },
    {
      "epoch": 2.314782687933494,
      "grad_norm": 2.65625,
      "learning_rate": 1.2689289209768662e-05,
      "loss": 0.7956,
      "step": 660470
    },
    {
      "epoch": 2.3148177354403896,
      "grad_norm": 2.578125,
      "learning_rate": 1.2688640181104958e-05,
      "loss": 0.7667,
      "step": 660480
    },
    {
      "epoch": 2.314852782947285,
      "grad_norm": 3.0,
      "learning_rate": 1.2687991152441256e-05,
      "loss": 0.834,
      "step": 660490
    },
    {
      "epoch": 2.3148878304541807,
      "grad_norm": 3.5,
      "learning_rate": 1.2687342123777554e-05,
      "loss": 0.8902,
      "step": 660500
    },
    {
      "epoch": 2.3149228779610764,
      "grad_norm": 3.265625,
      "learning_rate": 1.2686693095113852e-05,
      "loss": 0.8108,
      "step": 660510
    },
    {
      "epoch": 2.3149579254679717,
      "grad_norm": 3.046875,
      "learning_rate": 1.268604406645015e-05,
      "loss": 0.8397,
      "step": 660520
    },
    {
      "epoch": 2.3149929729748675,
      "grad_norm": 3.421875,
      "learning_rate": 1.2685395037786448e-05,
      "loss": 0.8414,
      "step": 660530
    },
    {
      "epoch": 2.315028020481763,
      "grad_norm": 2.9375,
      "learning_rate": 1.2684746009122746e-05,
      "loss": 0.8563,
      "step": 660540
    },
    {
      "epoch": 2.3150630679886586,
      "grad_norm": 3.3125,
      "learning_rate": 1.2684096980459046e-05,
      "loss": 0.8538,
      "step": 660550
    },
    {
      "epoch": 2.3150981154955543,
      "grad_norm": 3.390625,
      "learning_rate": 1.2683447951795344e-05,
      "loss": 0.818,
      "step": 660560
    },
    {
      "epoch": 2.3151331630024496,
      "grad_norm": 3.40625,
      "learning_rate": 1.2682798923131642e-05,
      "loss": 0.8323,
      "step": 660570
    },
    {
      "epoch": 2.3151682105093454,
      "grad_norm": 3.125,
      "learning_rate": 1.268214989446794e-05,
      "loss": 0.8605,
      "step": 660580
    },
    {
      "epoch": 2.315203258016241,
      "grad_norm": 2.6875,
      "learning_rate": 1.2681500865804238e-05,
      "loss": 0.7766,
      "step": 660590
    },
    {
      "epoch": 2.3152383055231365,
      "grad_norm": 3.109375,
      "learning_rate": 1.2680851837140536e-05,
      "loss": 0.7482,
      "step": 660600
    },
    {
      "epoch": 2.3152733530300322,
      "grad_norm": 2.921875,
      "learning_rate": 1.2680202808476834e-05,
      "loss": 0.8458,
      "step": 660610
    },
    {
      "epoch": 2.315308400536928,
      "grad_norm": 2.9375,
      "learning_rate": 1.2679553779813134e-05,
      "loss": 0.8038,
      "step": 660620
    },
    {
      "epoch": 2.3153434480438233,
      "grad_norm": 3.203125,
      "learning_rate": 1.2678904751149432e-05,
      "loss": 0.8295,
      "step": 660630
    },
    {
      "epoch": 2.315378495550719,
      "grad_norm": 2.921875,
      "learning_rate": 1.267825572248573e-05,
      "loss": 0.8378,
      "step": 660640
    },
    {
      "epoch": 2.3154135430576144,
      "grad_norm": 3.171875,
      "learning_rate": 1.2677606693822028e-05,
      "loss": 0.8273,
      "step": 660650
    },
    {
      "epoch": 2.31544859056451,
      "grad_norm": 2.90625,
      "learning_rate": 1.2676957665158324e-05,
      "loss": 0.7831,
      "step": 660660
    },
    {
      "epoch": 2.315483638071406,
      "grad_norm": 3.015625,
      "learning_rate": 1.2676308636494622e-05,
      "loss": 0.8273,
      "step": 660670
    },
    {
      "epoch": 2.315518685578301,
      "grad_norm": 2.609375,
      "learning_rate": 1.267565960783092e-05,
      "loss": 0.8894,
      "step": 660680
    },
    {
      "epoch": 2.315553733085197,
      "grad_norm": 3.1875,
      "learning_rate": 1.2675010579167218e-05,
      "loss": 0.7975,
      "step": 660690
    },
    {
      "epoch": 2.3155887805920927,
      "grad_norm": 2.703125,
      "learning_rate": 1.2674361550503516e-05,
      "loss": 0.7947,
      "step": 660700
    },
    {
      "epoch": 2.315623828098988,
      "grad_norm": 2.828125,
      "learning_rate": 1.2673712521839814e-05,
      "loss": 0.7671,
      "step": 660710
    },
    {
      "epoch": 2.315658875605884,
      "grad_norm": 3.15625,
      "learning_rate": 1.2673063493176112e-05,
      "loss": 0.9129,
      "step": 660720
    },
    {
      "epoch": 2.3156939231127796,
      "grad_norm": 3.359375,
      "learning_rate": 1.2672414464512412e-05,
      "loss": 0.8417,
      "step": 660730
    },
    {
      "epoch": 2.315728970619675,
      "grad_norm": 3.625,
      "learning_rate": 1.267176543584871e-05,
      "loss": 0.804,
      "step": 660740
    },
    {
      "epoch": 2.3157640181265706,
      "grad_norm": 2.953125,
      "learning_rate": 1.2671116407185008e-05,
      "loss": 0.8599,
      "step": 660750
    },
    {
      "epoch": 2.315799065633466,
      "grad_norm": 3.09375,
      "learning_rate": 1.2670467378521306e-05,
      "loss": 0.833,
      "step": 660760
    },
    {
      "epoch": 2.3158341131403617,
      "grad_norm": 2.921875,
      "learning_rate": 1.2669818349857604e-05,
      "loss": 0.7679,
      "step": 660770
    },
    {
      "epoch": 2.3158691606472575,
      "grad_norm": 2.9375,
      "learning_rate": 1.2669169321193902e-05,
      "loss": 0.8272,
      "step": 660780
    },
    {
      "epoch": 2.3159042081541528,
      "grad_norm": 2.59375,
      "learning_rate": 1.26685202925302e-05,
      "loss": 0.7731,
      "step": 660790
    },
    {
      "epoch": 2.3159392556610485,
      "grad_norm": 2.8125,
      "learning_rate": 1.26678712638665e-05,
      "loss": 0.8132,
      "step": 660800
    },
    {
      "epoch": 2.3159743031679443,
      "grad_norm": 2.5,
      "learning_rate": 1.2667222235202797e-05,
      "loss": 0.7926,
      "step": 660810
    },
    {
      "epoch": 2.3160093506748396,
      "grad_norm": 2.265625,
      "learning_rate": 1.2666573206539095e-05,
      "loss": 0.7721,
      "step": 660820
    },
    {
      "epoch": 2.3160443981817354,
      "grad_norm": 3.0,
      "learning_rate": 1.2665924177875393e-05,
      "loss": 0.8404,
      "step": 660830
    },
    {
      "epoch": 2.316079445688631,
      "grad_norm": 2.390625,
      "learning_rate": 1.2665275149211691e-05,
      "loss": 0.7825,
      "step": 660840
    },
    {
      "epoch": 2.3161144931955264,
      "grad_norm": 3.25,
      "learning_rate": 1.2664626120547988e-05,
      "loss": 0.8204,
      "step": 660850
    },
    {
      "epoch": 2.316149540702422,
      "grad_norm": 2.75,
      "learning_rate": 1.2663977091884286e-05,
      "loss": 0.7748,
      "step": 660860
    },
    {
      "epoch": 2.3161845882093175,
      "grad_norm": 2.9375,
      "learning_rate": 1.2663328063220584e-05,
      "loss": 0.8654,
      "step": 660870
    },
    {
      "epoch": 2.3162196357162133,
      "grad_norm": 2.515625,
      "learning_rate": 1.2662679034556882e-05,
      "loss": 0.8259,
      "step": 660880
    },
    {
      "epoch": 2.316254683223109,
      "grad_norm": 3.03125,
      "learning_rate": 1.266203000589318e-05,
      "loss": 0.7639,
      "step": 660890
    },
    {
      "epoch": 2.3162897307300048,
      "grad_norm": 2.859375,
      "learning_rate": 1.2661380977229478e-05,
      "loss": 0.7874,
      "step": 660900
    },
    {
      "epoch": 2.3163247782369,
      "grad_norm": 3.078125,
      "learning_rate": 1.2660731948565777e-05,
      "loss": 0.8647,
      "step": 660910
    },
    {
      "epoch": 2.316359825743796,
      "grad_norm": 3.265625,
      "learning_rate": 1.2660082919902075e-05,
      "loss": 0.8053,
      "step": 660920
    },
    {
      "epoch": 2.316394873250691,
      "grad_norm": 3.25,
      "learning_rate": 1.2659433891238373e-05,
      "loss": 0.8007,
      "step": 660930
    },
    {
      "epoch": 2.316429920757587,
      "grad_norm": 2.578125,
      "learning_rate": 1.2658784862574671e-05,
      "loss": 0.7667,
      "step": 660940
    },
    {
      "epoch": 2.3164649682644827,
      "grad_norm": 2.46875,
      "learning_rate": 1.265813583391097e-05,
      "loss": 0.7389,
      "step": 660950
    },
    {
      "epoch": 2.316500015771378,
      "grad_norm": 3.03125,
      "learning_rate": 1.2657486805247267e-05,
      "loss": 0.7763,
      "step": 660960
    },
    {
      "epoch": 2.3165350632782737,
      "grad_norm": 2.609375,
      "learning_rate": 1.2656837776583565e-05,
      "loss": 0.8213,
      "step": 660970
    },
    {
      "epoch": 2.316570110785169,
      "grad_norm": 3.171875,
      "learning_rate": 1.2656188747919865e-05,
      "loss": 0.8436,
      "step": 660980
    },
    {
      "epoch": 2.316605158292065,
      "grad_norm": 2.859375,
      "learning_rate": 1.2655539719256163e-05,
      "loss": 0.8377,
      "step": 660990
    },
    {
      "epoch": 2.3166402057989606,
      "grad_norm": 2.75,
      "learning_rate": 1.2654890690592461e-05,
      "loss": 0.792,
      "step": 661000
    },
    {
      "epoch": 2.3166752533058563,
      "grad_norm": 2.765625,
      "learning_rate": 1.2654241661928759e-05,
      "loss": 0.7512,
      "step": 661010
    },
    {
      "epoch": 2.3167103008127516,
      "grad_norm": 3.03125,
      "learning_rate": 1.2653592633265057e-05,
      "loss": 0.8285,
      "step": 661020
    },
    {
      "epoch": 2.3167453483196474,
      "grad_norm": 3.1875,
      "learning_rate": 1.2652943604601355e-05,
      "loss": 0.7744,
      "step": 661030
    },
    {
      "epoch": 2.3167803958265427,
      "grad_norm": 2.703125,
      "learning_rate": 1.2652294575937651e-05,
      "loss": 0.8114,
      "step": 661040
    },
    {
      "epoch": 2.3168154433334385,
      "grad_norm": 3.21875,
      "learning_rate": 1.265164554727395e-05,
      "loss": 0.8126,
      "step": 661050
    },
    {
      "epoch": 2.3168504908403342,
      "grad_norm": 2.96875,
      "learning_rate": 1.2650996518610247e-05,
      "loss": 0.8098,
      "step": 661060
    },
    {
      "epoch": 2.3168855383472295,
      "grad_norm": 3.09375,
      "learning_rate": 1.2650347489946545e-05,
      "loss": 0.7897,
      "step": 661070
    },
    {
      "epoch": 2.3169205858541253,
      "grad_norm": 2.375,
      "learning_rate": 1.2649698461282843e-05,
      "loss": 0.8045,
      "step": 661080
    },
    {
      "epoch": 2.3169556333610206,
      "grad_norm": 3.03125,
      "learning_rate": 1.2649049432619141e-05,
      "loss": 0.7958,
      "step": 661090
    },
    {
      "epoch": 2.3169906808679164,
      "grad_norm": 2.75,
      "learning_rate": 1.2648400403955441e-05,
      "loss": 0.9052,
      "step": 661100
    },
    {
      "epoch": 2.317025728374812,
      "grad_norm": 2.609375,
      "learning_rate": 1.2647751375291739e-05,
      "loss": 0.7783,
      "step": 661110
    },
    {
      "epoch": 2.317060775881708,
      "grad_norm": 2.90625,
      "learning_rate": 1.2647102346628037e-05,
      "loss": 0.74,
      "step": 661120
    },
    {
      "epoch": 2.317095823388603,
      "grad_norm": 2.625,
      "learning_rate": 1.2646453317964335e-05,
      "loss": 0.751,
      "step": 661130
    },
    {
      "epoch": 2.317130870895499,
      "grad_norm": 2.65625,
      "learning_rate": 1.2645804289300633e-05,
      "loss": 0.7469,
      "step": 661140
    },
    {
      "epoch": 2.3171659184023943,
      "grad_norm": 2.5625,
      "learning_rate": 1.2645155260636931e-05,
      "loss": 0.8046,
      "step": 661150
    },
    {
      "epoch": 2.31720096590929,
      "grad_norm": 3.15625,
      "learning_rate": 1.264450623197323e-05,
      "loss": 0.8191,
      "step": 661160
    },
    {
      "epoch": 2.317236013416186,
      "grad_norm": 3.015625,
      "learning_rate": 1.2643857203309529e-05,
      "loss": 0.8168,
      "step": 661170
    },
    {
      "epoch": 2.317271060923081,
      "grad_norm": 2.8125,
      "learning_rate": 1.2643208174645827e-05,
      "loss": 0.7824,
      "step": 661180
    },
    {
      "epoch": 2.317306108429977,
      "grad_norm": 2.890625,
      "learning_rate": 1.2642559145982125e-05,
      "loss": 0.7863,
      "step": 661190
    },
    {
      "epoch": 2.3173411559368726,
      "grad_norm": 3.515625,
      "learning_rate": 1.2641910117318423e-05,
      "loss": 0.891,
      "step": 661200
    },
    {
      "epoch": 2.317376203443768,
      "grad_norm": 2.734375,
      "learning_rate": 1.264126108865472e-05,
      "loss": 0.748,
      "step": 661210
    },
    {
      "epoch": 2.3174112509506637,
      "grad_norm": 3.09375,
      "learning_rate": 1.2640612059991019e-05,
      "loss": 0.8035,
      "step": 661220
    },
    {
      "epoch": 2.3174462984575595,
      "grad_norm": 3.078125,
      "learning_rate": 1.2639963031327315e-05,
      "loss": 0.8797,
      "step": 661230
    },
    {
      "epoch": 2.3174813459644548,
      "grad_norm": 2.84375,
      "learning_rate": 1.2639314002663613e-05,
      "loss": 0.7847,
      "step": 661240
    },
    {
      "epoch": 2.3175163934713505,
      "grad_norm": 2.921875,
      "learning_rate": 1.2638664973999911e-05,
      "loss": 0.8047,
      "step": 661250
    },
    {
      "epoch": 2.317551440978246,
      "grad_norm": 2.96875,
      "learning_rate": 1.2638015945336209e-05,
      "loss": 0.8011,
      "step": 661260
    },
    {
      "epoch": 2.3175864884851416,
      "grad_norm": 2.6875,
      "learning_rate": 1.2637366916672507e-05,
      "loss": 0.7935,
      "step": 661270
    },
    {
      "epoch": 2.3176215359920374,
      "grad_norm": 2.875,
      "learning_rate": 1.2636717888008807e-05,
      "loss": 0.7671,
      "step": 661280
    },
    {
      "epoch": 2.3176565834989327,
      "grad_norm": 2.6875,
      "learning_rate": 1.2636068859345105e-05,
      "loss": 0.7952,
      "step": 661290
    },
    {
      "epoch": 2.3176916310058284,
      "grad_norm": 2.609375,
      "learning_rate": 1.2635419830681403e-05,
      "loss": 0.7841,
      "step": 661300
    },
    {
      "epoch": 2.317726678512724,
      "grad_norm": 3.109375,
      "learning_rate": 1.26347708020177e-05,
      "loss": 0.8422,
      "step": 661310
    },
    {
      "epoch": 2.3177617260196195,
      "grad_norm": 3.40625,
      "learning_rate": 1.2634121773353999e-05,
      "loss": 0.8445,
      "step": 661320
    },
    {
      "epoch": 2.3177967735265153,
      "grad_norm": 2.828125,
      "learning_rate": 1.2633472744690297e-05,
      "loss": 0.7876,
      "step": 661330
    },
    {
      "epoch": 2.317831821033411,
      "grad_norm": 2.921875,
      "learning_rate": 1.2632823716026595e-05,
      "loss": 0.8689,
      "step": 661340
    },
    {
      "epoch": 2.3178668685403063,
      "grad_norm": 2.640625,
      "learning_rate": 1.2632174687362894e-05,
      "loss": 0.896,
      "step": 661350
    },
    {
      "epoch": 2.317901916047202,
      "grad_norm": 2.9375,
      "learning_rate": 1.2631525658699192e-05,
      "loss": 0.7009,
      "step": 661360
    },
    {
      "epoch": 2.3179369635540974,
      "grad_norm": 2.96875,
      "learning_rate": 1.263087663003549e-05,
      "loss": 0.833,
      "step": 661370
    },
    {
      "epoch": 2.317972011060993,
      "grad_norm": 3.09375,
      "learning_rate": 1.2630227601371788e-05,
      "loss": 0.7979,
      "step": 661380
    },
    {
      "epoch": 2.318007058567889,
      "grad_norm": 3.171875,
      "learning_rate": 1.2629578572708086e-05,
      "loss": 0.7821,
      "step": 661390
    },
    {
      "epoch": 2.3180421060747842,
      "grad_norm": 2.765625,
      "learning_rate": 1.2628929544044384e-05,
      "loss": 0.8082,
      "step": 661400
    },
    {
      "epoch": 2.31807715358168,
      "grad_norm": 2.953125,
      "learning_rate": 1.2628280515380682e-05,
      "loss": 0.8362,
      "step": 661410
    },
    {
      "epoch": 2.3181122010885757,
      "grad_norm": 2.984375,
      "learning_rate": 1.2627631486716979e-05,
      "loss": 0.8333,
      "step": 661420
    },
    {
      "epoch": 2.318147248595471,
      "grad_norm": 3.140625,
      "learning_rate": 1.2626982458053277e-05,
      "loss": 0.8469,
      "step": 661430
    },
    {
      "epoch": 2.318182296102367,
      "grad_norm": 2.671875,
      "learning_rate": 1.2626333429389575e-05,
      "loss": 0.8109,
      "step": 661440
    },
    {
      "epoch": 2.3182173436092626,
      "grad_norm": 2.796875,
      "learning_rate": 1.2625684400725873e-05,
      "loss": 0.7448,
      "step": 661450
    },
    {
      "epoch": 2.318252391116158,
      "grad_norm": 2.78125,
      "learning_rate": 1.2625035372062172e-05,
      "loss": 0.8619,
      "step": 661460
    },
    {
      "epoch": 2.3182874386230536,
      "grad_norm": 2.734375,
      "learning_rate": 1.262438634339847e-05,
      "loss": 0.7396,
      "step": 661470
    },
    {
      "epoch": 2.318322486129949,
      "grad_norm": 3.578125,
      "learning_rate": 1.2623737314734768e-05,
      "loss": 0.8099,
      "step": 661480
    },
    {
      "epoch": 2.3183575336368447,
      "grad_norm": 3.328125,
      "learning_rate": 1.2623088286071066e-05,
      "loss": 0.7635,
      "step": 661490
    },
    {
      "epoch": 2.3183925811437405,
      "grad_norm": 3.59375,
      "learning_rate": 1.2622439257407364e-05,
      "loss": 0.8728,
      "step": 661500
    },
    {
      "epoch": 2.318427628650636,
      "grad_norm": 3.484375,
      "learning_rate": 1.2621790228743662e-05,
      "loss": 0.8003,
      "step": 661510
    },
    {
      "epoch": 2.3184626761575315,
      "grad_norm": 3.0625,
      "learning_rate": 1.262114120007996e-05,
      "loss": 0.8394,
      "step": 661520
    },
    {
      "epoch": 2.3184977236644273,
      "grad_norm": 2.734375,
      "learning_rate": 1.262049217141626e-05,
      "loss": 0.806,
      "step": 661530
    },
    {
      "epoch": 2.3185327711713226,
      "grad_norm": 2.890625,
      "learning_rate": 1.2619843142752558e-05,
      "loss": 0.8541,
      "step": 661540
    },
    {
      "epoch": 2.3185678186782184,
      "grad_norm": 2.890625,
      "learning_rate": 1.2619194114088856e-05,
      "loss": 0.8937,
      "step": 661550
    },
    {
      "epoch": 2.318602866185114,
      "grad_norm": 2.984375,
      "learning_rate": 1.2618545085425154e-05,
      "loss": 0.8511,
      "step": 661560
    },
    {
      "epoch": 2.3186379136920094,
      "grad_norm": 2.78125,
      "learning_rate": 1.2617896056761452e-05,
      "loss": 0.8035,
      "step": 661570
    },
    {
      "epoch": 2.318672961198905,
      "grad_norm": 2.90625,
      "learning_rate": 1.261724702809775e-05,
      "loss": 0.8171,
      "step": 661580
    },
    {
      "epoch": 2.3187080087058005,
      "grad_norm": 3.328125,
      "learning_rate": 1.2616597999434048e-05,
      "loss": 0.7523,
      "step": 661590
    },
    {
      "epoch": 2.3187430562126963,
      "grad_norm": 2.953125,
      "learning_rate": 1.2615948970770344e-05,
      "loss": 0.7914,
      "step": 661600
    },
    {
      "epoch": 2.318778103719592,
      "grad_norm": 3.15625,
      "learning_rate": 1.2615299942106642e-05,
      "loss": 0.8033,
      "step": 661610
    },
    {
      "epoch": 2.3188131512264873,
      "grad_norm": 2.875,
      "learning_rate": 1.261465091344294e-05,
      "loss": 0.8411,
      "step": 661620
    },
    {
      "epoch": 2.318848198733383,
      "grad_norm": 2.859375,
      "learning_rate": 1.2614001884779238e-05,
      "loss": 0.8624,
      "step": 661630
    },
    {
      "epoch": 2.318883246240279,
      "grad_norm": 3.1875,
      "learning_rate": 1.2613352856115536e-05,
      "loss": 0.8411,
      "step": 661640
    },
    {
      "epoch": 2.318918293747174,
      "grad_norm": 3.0625,
      "learning_rate": 1.2612703827451836e-05,
      "loss": 0.794,
      "step": 661650
    },
    {
      "epoch": 2.31895334125407,
      "grad_norm": 2.59375,
      "learning_rate": 1.2612054798788134e-05,
      "loss": 0.8113,
      "step": 661660
    },
    {
      "epoch": 2.3189883887609657,
      "grad_norm": 2.75,
      "learning_rate": 1.2611405770124432e-05,
      "loss": 0.8083,
      "step": 661670
    },
    {
      "epoch": 2.319023436267861,
      "grad_norm": 2.796875,
      "learning_rate": 1.261075674146073e-05,
      "loss": 0.7993,
      "step": 661680
    },
    {
      "epoch": 2.3190584837747568,
      "grad_norm": 3.0625,
      "learning_rate": 1.2610107712797028e-05,
      "loss": 0.8708,
      "step": 661690
    },
    {
      "epoch": 2.319093531281652,
      "grad_norm": 2.703125,
      "learning_rate": 1.2609458684133326e-05,
      "loss": 0.8207,
      "step": 661700
    },
    {
      "epoch": 2.319128578788548,
      "grad_norm": 2.921875,
      "learning_rate": 1.2608809655469626e-05,
      "loss": 0.866,
      "step": 661710
    },
    {
      "epoch": 2.3191636262954436,
      "grad_norm": 3.0,
      "learning_rate": 1.2608160626805924e-05,
      "loss": 0.8319,
      "step": 661720
    },
    {
      "epoch": 2.319198673802339,
      "grad_norm": 2.96875,
      "learning_rate": 1.2607511598142222e-05,
      "loss": 0.8659,
      "step": 661730
    },
    {
      "epoch": 2.3192337213092347,
      "grad_norm": 2.484375,
      "learning_rate": 1.260686256947852e-05,
      "loss": 0.7961,
      "step": 661740
    },
    {
      "epoch": 2.3192687688161304,
      "grad_norm": 2.734375,
      "learning_rate": 1.2606213540814818e-05,
      "loss": 0.8073,
      "step": 661750
    },
    {
      "epoch": 2.3193038163230257,
      "grad_norm": 3.09375,
      "learning_rate": 1.2605564512151116e-05,
      "loss": 0.8657,
      "step": 661760
    },
    {
      "epoch": 2.3193388638299215,
      "grad_norm": 3.421875,
      "learning_rate": 1.2604915483487414e-05,
      "loss": 0.7935,
      "step": 661770
    },
    {
      "epoch": 2.3193739113368173,
      "grad_norm": 2.921875,
      "learning_rate": 1.2604266454823714e-05,
      "loss": 0.8171,
      "step": 661780
    },
    {
      "epoch": 2.3194089588437126,
      "grad_norm": 2.75,
      "learning_rate": 1.2603617426160008e-05,
      "loss": 0.7782,
      "step": 661790
    },
    {
      "epoch": 2.3194440063506083,
      "grad_norm": 3.125,
      "learning_rate": 1.2602968397496306e-05,
      "loss": 0.7585,
      "step": 661800
    },
    {
      "epoch": 2.3194790538575036,
      "grad_norm": 3.328125,
      "learning_rate": 1.2602319368832604e-05,
      "loss": 0.8599,
      "step": 661810
    },
    {
      "epoch": 2.3195141013643994,
      "grad_norm": 3.59375,
      "learning_rate": 1.2601670340168902e-05,
      "loss": 0.8546,
      "step": 661820
    },
    {
      "epoch": 2.319549148871295,
      "grad_norm": 2.90625,
      "learning_rate": 1.2601021311505202e-05,
      "loss": 0.8007,
      "step": 661830
    },
    {
      "epoch": 2.3195841963781905,
      "grad_norm": 3.0,
      "learning_rate": 1.26003722828415e-05,
      "loss": 0.7898,
      "step": 661840
    },
    {
      "epoch": 2.3196192438850862,
      "grad_norm": 3.171875,
      "learning_rate": 1.2599723254177798e-05,
      "loss": 0.821,
      "step": 661850
    },
    {
      "epoch": 2.319654291391982,
      "grad_norm": 3.3125,
      "learning_rate": 1.2599074225514096e-05,
      "loss": 0.8125,
      "step": 661860
    },
    {
      "epoch": 2.3196893388988773,
      "grad_norm": 3.109375,
      "learning_rate": 1.2598425196850394e-05,
      "loss": 0.8328,
      "step": 661870
    },
    {
      "epoch": 2.319724386405773,
      "grad_norm": 3.0,
      "learning_rate": 1.2597776168186692e-05,
      "loss": 0.8156,
      "step": 661880
    },
    {
      "epoch": 2.319759433912669,
      "grad_norm": 2.703125,
      "learning_rate": 1.259712713952299e-05,
      "loss": 0.7535,
      "step": 661890
    },
    {
      "epoch": 2.319794481419564,
      "grad_norm": 2.6875,
      "learning_rate": 1.259647811085929e-05,
      "loss": 0.8264,
      "step": 661900
    },
    {
      "epoch": 2.31982952892646,
      "grad_norm": 2.796875,
      "learning_rate": 1.2595829082195588e-05,
      "loss": 0.8,
      "step": 661910
    },
    {
      "epoch": 2.319864576433355,
      "grad_norm": 3.15625,
      "learning_rate": 1.2595180053531886e-05,
      "loss": 0.8529,
      "step": 661920
    },
    {
      "epoch": 2.319899623940251,
      "grad_norm": 2.9375,
      "learning_rate": 1.2594531024868184e-05,
      "loss": 0.7622,
      "step": 661930
    },
    {
      "epoch": 2.3199346714471467,
      "grad_norm": 2.859375,
      "learning_rate": 1.2593881996204482e-05,
      "loss": 0.8458,
      "step": 661940
    },
    {
      "epoch": 2.319969718954042,
      "grad_norm": 2.875,
      "learning_rate": 1.259323296754078e-05,
      "loss": 0.7725,
      "step": 661950
    },
    {
      "epoch": 2.320004766460938,
      "grad_norm": 3.03125,
      "learning_rate": 1.2592583938877078e-05,
      "loss": 0.9204,
      "step": 661960
    },
    {
      "epoch": 2.3200398139678335,
      "grad_norm": 3.171875,
      "learning_rate": 1.2591934910213377e-05,
      "loss": 0.7614,
      "step": 661970
    },
    {
      "epoch": 2.320074861474729,
      "grad_norm": 2.4375,
      "learning_rate": 1.2591285881549672e-05,
      "loss": 0.7408,
      "step": 661980
    },
    {
      "epoch": 2.3201099089816246,
      "grad_norm": 3.140625,
      "learning_rate": 1.259063685288597e-05,
      "loss": 0.8082,
      "step": 661990
    },
    {
      "epoch": 2.3201449564885204,
      "grad_norm": 2.84375,
      "learning_rate": 1.2589987824222268e-05,
      "loss": 0.7589,
      "step": 662000
    },
    {
      "epoch": 2.3201800039954157,
      "grad_norm": 2.796875,
      "learning_rate": 1.2589338795558568e-05,
      "loss": 0.8609,
      "step": 662010
    },
    {
      "epoch": 2.3202150515023114,
      "grad_norm": 3.0,
      "learning_rate": 1.2588689766894866e-05,
      "loss": 0.8378,
      "step": 662020
    },
    {
      "epoch": 2.3202500990092068,
      "grad_norm": 2.609375,
      "learning_rate": 1.2588040738231164e-05,
      "loss": 0.8492,
      "step": 662030
    },
    {
      "epoch": 2.3202851465161025,
      "grad_norm": 3.046875,
      "learning_rate": 1.2587391709567462e-05,
      "loss": 0.7983,
      "step": 662040
    },
    {
      "epoch": 2.3203201940229983,
      "grad_norm": 2.703125,
      "learning_rate": 1.258674268090376e-05,
      "loss": 0.8142,
      "step": 662050
    },
    {
      "epoch": 2.3203552415298936,
      "grad_norm": 2.703125,
      "learning_rate": 1.2586093652240058e-05,
      "loss": 0.8151,
      "step": 662060
    },
    {
      "epoch": 2.3203902890367893,
      "grad_norm": 2.890625,
      "learning_rate": 1.2585444623576356e-05,
      "loss": 0.8403,
      "step": 662070
    },
    {
      "epoch": 2.320425336543685,
      "grad_norm": 2.796875,
      "learning_rate": 1.2584795594912655e-05,
      "loss": 0.8358,
      "step": 662080
    },
    {
      "epoch": 2.3204603840505804,
      "grad_norm": 3.234375,
      "learning_rate": 1.2584146566248953e-05,
      "loss": 0.91,
      "step": 662090
    },
    {
      "epoch": 2.320495431557476,
      "grad_norm": 2.9375,
      "learning_rate": 1.2583497537585251e-05,
      "loss": 0.8554,
      "step": 662100
    },
    {
      "epoch": 2.320530479064372,
      "grad_norm": 3.171875,
      "learning_rate": 1.258284850892155e-05,
      "loss": 0.8229,
      "step": 662110
    },
    {
      "epoch": 2.3205655265712672,
      "grad_norm": 2.71875,
      "learning_rate": 1.2582199480257847e-05,
      "loss": 0.7654,
      "step": 662120
    },
    {
      "epoch": 2.320600574078163,
      "grad_norm": 3.0625,
      "learning_rate": 1.2581550451594145e-05,
      "loss": 0.8099,
      "step": 662130
    },
    {
      "epoch": 2.3206356215850583,
      "grad_norm": 2.875,
      "learning_rate": 1.2580901422930443e-05,
      "loss": 0.8241,
      "step": 662140
    },
    {
      "epoch": 2.320670669091954,
      "grad_norm": 2.9375,
      "learning_rate": 1.2580252394266743e-05,
      "loss": 0.8448,
      "step": 662150
    },
    {
      "epoch": 2.32070571659885,
      "grad_norm": 2.828125,
      "learning_rate": 1.2579603365603041e-05,
      "loss": 0.9257,
      "step": 662160
    },
    {
      "epoch": 2.320740764105745,
      "grad_norm": 3.0625,
      "learning_rate": 1.2578954336939336e-05,
      "loss": 0.7647,
      "step": 662170
    },
    {
      "epoch": 2.320775811612641,
      "grad_norm": 2.84375,
      "learning_rate": 1.2578305308275634e-05,
      "loss": 0.8191,
      "step": 662180
    },
    {
      "epoch": 2.3208108591195367,
      "grad_norm": 2.96875,
      "learning_rate": 1.2577656279611933e-05,
      "loss": 0.7485,
      "step": 662190
    },
    {
      "epoch": 2.320845906626432,
      "grad_norm": 2.546875,
      "learning_rate": 1.2577007250948231e-05,
      "loss": 0.793,
      "step": 662200
    },
    {
      "epoch": 2.3208809541333277,
      "grad_norm": 2.59375,
      "learning_rate": 1.257635822228453e-05,
      "loss": 0.8125,
      "step": 662210
    },
    {
      "epoch": 2.3209160016402235,
      "grad_norm": 2.59375,
      "learning_rate": 1.2575709193620827e-05,
      "loss": 0.7557,
      "step": 662220
    },
    {
      "epoch": 2.320951049147119,
      "grad_norm": 3.171875,
      "learning_rate": 1.2575060164957125e-05,
      "loss": 0.8037,
      "step": 662230
    },
    {
      "epoch": 2.3209860966540146,
      "grad_norm": 2.78125,
      "learning_rate": 1.2574411136293423e-05,
      "loss": 0.8377,
      "step": 662240
    },
    {
      "epoch": 2.32102114416091,
      "grad_norm": 2.9375,
      "learning_rate": 1.2573762107629721e-05,
      "loss": 0.799,
      "step": 662250
    },
    {
      "epoch": 2.3210561916678056,
      "grad_norm": 2.859375,
      "learning_rate": 1.2573113078966021e-05,
      "loss": 0.7687,
      "step": 662260
    },
    {
      "epoch": 2.3210912391747014,
      "grad_norm": 2.515625,
      "learning_rate": 1.2572464050302319e-05,
      "loss": 0.82,
      "step": 662270
    },
    {
      "epoch": 2.321126286681597,
      "grad_norm": 3.109375,
      "learning_rate": 1.2571815021638617e-05,
      "loss": 0.7412,
      "step": 662280
    },
    {
      "epoch": 2.3211613341884925,
      "grad_norm": 3.15625,
      "learning_rate": 1.2571165992974915e-05,
      "loss": 0.8773,
      "step": 662290
    },
    {
      "epoch": 2.3211963816953882,
      "grad_norm": 2.78125,
      "learning_rate": 1.2570516964311213e-05,
      "loss": 0.8257,
      "step": 662300
    },
    {
      "epoch": 2.3212314292022835,
      "grad_norm": 2.734375,
      "learning_rate": 1.2569867935647511e-05,
      "loss": 0.8238,
      "step": 662310
    },
    {
      "epoch": 2.3212664767091793,
      "grad_norm": 2.828125,
      "learning_rate": 1.2569218906983809e-05,
      "loss": 0.7528,
      "step": 662320
    },
    {
      "epoch": 2.321301524216075,
      "grad_norm": 2.875,
      "learning_rate": 1.2568569878320109e-05,
      "loss": 0.7512,
      "step": 662330
    },
    {
      "epoch": 2.3213365717229704,
      "grad_norm": 3.453125,
      "learning_rate": 1.2567920849656407e-05,
      "loss": 0.8048,
      "step": 662340
    },
    {
      "epoch": 2.321371619229866,
      "grad_norm": 2.5,
      "learning_rate": 1.2567271820992705e-05,
      "loss": 0.7616,
      "step": 662350
    },
    {
      "epoch": 2.3214066667367614,
      "grad_norm": 2.421875,
      "learning_rate": 1.2566622792329e-05,
      "loss": 0.8468,
      "step": 662360
    },
    {
      "epoch": 2.321441714243657,
      "grad_norm": 3.078125,
      "learning_rate": 1.2565973763665297e-05,
      "loss": 0.7936,
      "step": 662370
    },
    {
      "epoch": 2.321476761750553,
      "grad_norm": 2.609375,
      "learning_rate": 1.2565324735001597e-05,
      "loss": 0.7664,
      "step": 662380
    },
    {
      "epoch": 2.3215118092574487,
      "grad_norm": 3.109375,
      "learning_rate": 1.2564675706337895e-05,
      "loss": 0.7937,
      "step": 662390
    },
    {
      "epoch": 2.321546856764344,
      "grad_norm": 2.953125,
      "learning_rate": 1.2564026677674193e-05,
      "loss": 0.8483,
      "step": 662400
    },
    {
      "epoch": 2.32158190427124,
      "grad_norm": 2.75,
      "learning_rate": 1.2563377649010491e-05,
      "loss": 0.8626,
      "step": 662410
    },
    {
      "epoch": 2.321616951778135,
      "grad_norm": 2.421875,
      "learning_rate": 1.2562728620346789e-05,
      "loss": 0.846,
      "step": 662420
    },
    {
      "epoch": 2.321651999285031,
      "grad_norm": 3.0,
      "learning_rate": 1.2562079591683087e-05,
      "loss": 0.7894,
      "step": 662430
    },
    {
      "epoch": 2.3216870467919266,
      "grad_norm": 3.203125,
      "learning_rate": 1.2561430563019385e-05,
      "loss": 0.7528,
      "step": 662440
    },
    {
      "epoch": 2.321722094298822,
      "grad_norm": 2.75,
      "learning_rate": 1.2560781534355685e-05,
      "loss": 0.8345,
      "step": 662450
    },
    {
      "epoch": 2.3217571418057177,
      "grad_norm": 2.859375,
      "learning_rate": 1.2560132505691983e-05,
      "loss": 0.8155,
      "step": 662460
    },
    {
      "epoch": 2.3217921893126134,
      "grad_norm": 2.59375,
      "learning_rate": 1.255948347702828e-05,
      "loss": 0.8063,
      "step": 662470
    },
    {
      "epoch": 2.3218272368195088,
      "grad_norm": 2.953125,
      "learning_rate": 1.2558834448364579e-05,
      "loss": 0.8359,
      "step": 662480
    },
    {
      "epoch": 2.3218622843264045,
      "grad_norm": 3.359375,
      "learning_rate": 1.2558185419700877e-05,
      "loss": 0.7937,
      "step": 662490
    },
    {
      "epoch": 2.3218973318333003,
      "grad_norm": 2.71875,
      "learning_rate": 1.2557536391037175e-05,
      "loss": 0.8125,
      "step": 662500
    },
    {
      "epoch": 2.3219323793401956,
      "grad_norm": 2.796875,
      "learning_rate": 1.2556887362373473e-05,
      "loss": 0.7757,
      "step": 662510
    },
    {
      "epoch": 2.3219674268470913,
      "grad_norm": 2.75,
      "learning_rate": 1.2556238333709772e-05,
      "loss": 0.8339,
      "step": 662520
    },
    {
      "epoch": 2.3220024743539867,
      "grad_norm": 2.390625,
      "learning_rate": 1.255558930504607e-05,
      "loss": 0.7748,
      "step": 662530
    },
    {
      "epoch": 2.3220375218608824,
      "grad_norm": 3.0,
      "learning_rate": 1.2554940276382368e-05,
      "loss": 0.8303,
      "step": 662540
    },
    {
      "epoch": 2.322072569367778,
      "grad_norm": 2.796875,
      "learning_rate": 1.2554291247718663e-05,
      "loss": 0.7275,
      "step": 662550
    },
    {
      "epoch": 2.3221076168746735,
      "grad_norm": 2.96875,
      "learning_rate": 1.2553642219054963e-05,
      "loss": 0.8821,
      "step": 662560
    },
    {
      "epoch": 2.3221426643815692,
      "grad_norm": 3.109375,
      "learning_rate": 1.255299319039126e-05,
      "loss": 0.812,
      "step": 662570
    },
    {
      "epoch": 2.322177711888465,
      "grad_norm": 2.765625,
      "learning_rate": 1.2552344161727559e-05,
      "loss": 0.8109,
      "step": 662580
    },
    {
      "epoch": 2.3222127593953603,
      "grad_norm": 3.03125,
      "learning_rate": 1.2551695133063857e-05,
      "loss": 0.7846,
      "step": 662590
    },
    {
      "epoch": 2.322247806902256,
      "grad_norm": 3.359375,
      "learning_rate": 1.2551046104400155e-05,
      "loss": 0.8501,
      "step": 662600
    },
    {
      "epoch": 2.322282854409152,
      "grad_norm": 2.953125,
      "learning_rate": 1.2550397075736453e-05,
      "loss": 0.8223,
      "step": 662610
    },
    {
      "epoch": 2.322317901916047,
      "grad_norm": 2.703125,
      "learning_rate": 1.254974804707275e-05,
      "loss": 0.8004,
      "step": 662620
    },
    {
      "epoch": 2.322352949422943,
      "grad_norm": 2.921875,
      "learning_rate": 1.254909901840905e-05,
      "loss": 0.8377,
      "step": 662630
    },
    {
      "epoch": 2.322387996929838,
      "grad_norm": 2.390625,
      "learning_rate": 1.2548449989745348e-05,
      "loss": 0.7288,
      "step": 662640
    },
    {
      "epoch": 2.322423044436734,
      "grad_norm": 2.953125,
      "learning_rate": 1.2547800961081646e-05,
      "loss": 0.7989,
      "step": 662650
    },
    {
      "epoch": 2.3224580919436297,
      "grad_norm": 2.640625,
      "learning_rate": 1.2547151932417944e-05,
      "loss": 0.8577,
      "step": 662660
    },
    {
      "epoch": 2.322493139450525,
      "grad_norm": 2.796875,
      "learning_rate": 1.2546502903754242e-05,
      "loss": 0.8509,
      "step": 662670
    },
    {
      "epoch": 2.322528186957421,
      "grad_norm": 2.78125,
      "learning_rate": 1.254585387509054e-05,
      "loss": 0.769,
      "step": 662680
    },
    {
      "epoch": 2.3225632344643166,
      "grad_norm": 2.734375,
      "learning_rate": 1.2545204846426838e-05,
      "loss": 0.7748,
      "step": 662690
    },
    {
      "epoch": 2.322598281971212,
      "grad_norm": 2.78125,
      "learning_rate": 1.2544555817763138e-05,
      "loss": 0.7865,
      "step": 662700
    },
    {
      "epoch": 2.3226333294781076,
      "grad_norm": 2.890625,
      "learning_rate": 1.2543906789099436e-05,
      "loss": 0.8173,
      "step": 662710
    },
    {
      "epoch": 2.3226683769850034,
      "grad_norm": 2.484375,
      "learning_rate": 1.2543257760435734e-05,
      "loss": 0.7277,
      "step": 662720
    },
    {
      "epoch": 2.3227034244918987,
      "grad_norm": 2.578125,
      "learning_rate": 1.2542608731772029e-05,
      "loss": 0.7862,
      "step": 662730
    },
    {
      "epoch": 2.3227384719987945,
      "grad_norm": 2.546875,
      "learning_rate": 1.2541959703108328e-05,
      "loss": 0.8293,
      "step": 662740
    },
    {
      "epoch": 2.3227735195056898,
      "grad_norm": 2.953125,
      "learning_rate": 1.2541310674444626e-05,
      "loss": 0.8425,
      "step": 662750
    },
    {
      "epoch": 2.3228085670125855,
      "grad_norm": 2.78125,
      "learning_rate": 1.2540661645780924e-05,
      "loss": 0.7612,
      "step": 662760
    },
    {
      "epoch": 2.3228436145194813,
      "grad_norm": 3.40625,
      "learning_rate": 1.2540012617117222e-05,
      "loss": 0.7628,
      "step": 662770
    },
    {
      "epoch": 2.3228786620263766,
      "grad_norm": 2.984375,
      "learning_rate": 1.253936358845352e-05,
      "loss": 0.7519,
      "step": 662780
    },
    {
      "epoch": 2.3229137095332724,
      "grad_norm": 3.109375,
      "learning_rate": 1.2538714559789818e-05,
      "loss": 0.7871,
      "step": 662790
    },
    {
      "epoch": 2.322948757040168,
      "grad_norm": 3.140625,
      "learning_rate": 1.2538065531126116e-05,
      "loss": 0.8367,
      "step": 662800
    },
    {
      "epoch": 2.3229838045470634,
      "grad_norm": 2.546875,
      "learning_rate": 1.2537416502462416e-05,
      "loss": 0.8406,
      "step": 662810
    },
    {
      "epoch": 2.323018852053959,
      "grad_norm": 2.328125,
      "learning_rate": 1.2536767473798714e-05,
      "loss": 0.7765,
      "step": 662820
    },
    {
      "epoch": 2.323053899560855,
      "grad_norm": 2.828125,
      "learning_rate": 1.2536118445135012e-05,
      "loss": 0.8514,
      "step": 662830
    },
    {
      "epoch": 2.3230889470677503,
      "grad_norm": 2.765625,
      "learning_rate": 1.253546941647131e-05,
      "loss": 0.843,
      "step": 662840
    },
    {
      "epoch": 2.323123994574646,
      "grad_norm": 2.921875,
      "learning_rate": 1.2534820387807608e-05,
      "loss": 0.9616,
      "step": 662850
    },
    {
      "epoch": 2.3231590420815413,
      "grad_norm": 2.421875,
      "learning_rate": 1.2534171359143906e-05,
      "loss": 0.6769,
      "step": 662860
    },
    {
      "epoch": 2.323194089588437,
      "grad_norm": 2.828125,
      "learning_rate": 1.2533522330480204e-05,
      "loss": 0.7715,
      "step": 662870
    },
    {
      "epoch": 2.323229137095333,
      "grad_norm": 2.6875,
      "learning_rate": 1.2532873301816504e-05,
      "loss": 0.7599,
      "step": 662880
    },
    {
      "epoch": 2.323264184602228,
      "grad_norm": 2.5,
      "learning_rate": 1.2532224273152802e-05,
      "loss": 0.8594,
      "step": 662890
    },
    {
      "epoch": 2.323299232109124,
      "grad_norm": 2.59375,
      "learning_rate": 1.25315752444891e-05,
      "loss": 0.7432,
      "step": 662900
    },
    {
      "epoch": 2.3233342796160197,
      "grad_norm": 2.5,
      "learning_rate": 1.2530926215825398e-05,
      "loss": 0.6685,
      "step": 662910
    },
    {
      "epoch": 2.323369327122915,
      "grad_norm": 3.0625,
      "learning_rate": 1.2530277187161692e-05,
      "loss": 0.7535,
      "step": 662920
    },
    {
      "epoch": 2.3234043746298108,
      "grad_norm": 3.265625,
      "learning_rate": 1.2529628158497992e-05,
      "loss": 0.8248,
      "step": 662930
    },
    {
      "epoch": 2.3234394221367065,
      "grad_norm": 2.875,
      "learning_rate": 1.252897912983429e-05,
      "loss": 0.8028,
      "step": 662940
    },
    {
      "epoch": 2.323474469643602,
      "grad_norm": 2.84375,
      "learning_rate": 1.2528330101170588e-05,
      "loss": 0.8625,
      "step": 662950
    },
    {
      "epoch": 2.3235095171504976,
      "grad_norm": 3.046875,
      "learning_rate": 1.2527681072506886e-05,
      "loss": 0.8568,
      "step": 662960
    },
    {
      "epoch": 2.323544564657393,
      "grad_norm": 2.75,
      "learning_rate": 1.2527032043843184e-05,
      "loss": 0.813,
      "step": 662970
    },
    {
      "epoch": 2.3235796121642887,
      "grad_norm": 3.046875,
      "learning_rate": 1.2526383015179482e-05,
      "loss": 0.8556,
      "step": 662980
    },
    {
      "epoch": 2.3236146596711844,
      "grad_norm": 2.765625,
      "learning_rate": 1.252573398651578e-05,
      "loss": 0.8692,
      "step": 662990
    },
    {
      "epoch": 2.3236497071780797,
      "grad_norm": 2.8125,
      "learning_rate": 1.252508495785208e-05,
      "loss": 0.8179,
      "step": 663000
    },
    {
      "epoch": 2.3236847546849755,
      "grad_norm": 2.84375,
      "learning_rate": 1.2524435929188378e-05,
      "loss": 0.9027,
      "step": 663010
    },
    {
      "epoch": 2.3237198021918712,
      "grad_norm": 2.734375,
      "learning_rate": 1.2523786900524676e-05,
      "loss": 0.7832,
      "step": 663020
    },
    {
      "epoch": 2.3237548496987666,
      "grad_norm": 2.640625,
      "learning_rate": 1.2523137871860974e-05,
      "loss": 0.8227,
      "step": 663030
    },
    {
      "epoch": 2.3237898972056623,
      "grad_norm": 3.078125,
      "learning_rate": 1.2522488843197272e-05,
      "loss": 0.7629,
      "step": 663040
    },
    {
      "epoch": 2.323824944712558,
      "grad_norm": 2.609375,
      "learning_rate": 1.252183981453357e-05,
      "loss": 0.8439,
      "step": 663050
    },
    {
      "epoch": 2.3238599922194534,
      "grad_norm": 2.765625,
      "learning_rate": 1.2521190785869868e-05,
      "loss": 0.7924,
      "step": 663060
    },
    {
      "epoch": 2.323895039726349,
      "grad_norm": 2.546875,
      "learning_rate": 1.2520541757206167e-05,
      "loss": 0.8937,
      "step": 663070
    },
    {
      "epoch": 2.3239300872332445,
      "grad_norm": 2.921875,
      "learning_rate": 1.2519892728542465e-05,
      "loss": 0.806,
      "step": 663080
    },
    {
      "epoch": 2.32396513474014,
      "grad_norm": 2.59375,
      "learning_rate": 1.2519243699878763e-05,
      "loss": 0.7661,
      "step": 663090
    },
    {
      "epoch": 2.324000182247036,
      "grad_norm": 3.796875,
      "learning_rate": 1.2518594671215061e-05,
      "loss": 0.9121,
      "step": 663100
    },
    {
      "epoch": 2.3240352297539313,
      "grad_norm": 2.953125,
      "learning_rate": 1.2517945642551358e-05,
      "loss": 0.7423,
      "step": 663110
    },
    {
      "epoch": 2.324070277260827,
      "grad_norm": 2.890625,
      "learning_rate": 1.2517296613887656e-05,
      "loss": 0.7262,
      "step": 663120
    },
    {
      "epoch": 2.324105324767723,
      "grad_norm": 2.828125,
      "learning_rate": 1.2516647585223954e-05,
      "loss": 0.8754,
      "step": 663130
    },
    {
      "epoch": 2.324140372274618,
      "grad_norm": 2.8125,
      "learning_rate": 1.2515998556560252e-05,
      "loss": 0.7322,
      "step": 663140
    },
    {
      "epoch": 2.324175419781514,
      "grad_norm": 3.0,
      "learning_rate": 1.251534952789655e-05,
      "loss": 0.8817,
      "step": 663150
    },
    {
      "epoch": 2.3242104672884096,
      "grad_norm": 3.25,
      "learning_rate": 1.2514700499232848e-05,
      "loss": 0.826,
      "step": 663160
    },
    {
      "epoch": 2.324245514795305,
      "grad_norm": 3.421875,
      "learning_rate": 1.2514051470569146e-05,
      "loss": 0.8465,
      "step": 663170
    },
    {
      "epoch": 2.3242805623022007,
      "grad_norm": 2.765625,
      "learning_rate": 1.2513402441905445e-05,
      "loss": 0.7809,
      "step": 663180
    },
    {
      "epoch": 2.324315609809096,
      "grad_norm": 3.015625,
      "learning_rate": 1.2512753413241743e-05,
      "loss": 0.7513,
      "step": 663190
    },
    {
      "epoch": 2.3243506573159918,
      "grad_norm": 2.40625,
      "learning_rate": 1.2512104384578041e-05,
      "loss": 0.8319,
      "step": 663200
    },
    {
      "epoch": 2.3243857048228875,
      "grad_norm": 2.796875,
      "learning_rate": 1.251145535591434e-05,
      "loss": 0.8496,
      "step": 663210
    },
    {
      "epoch": 2.324420752329783,
      "grad_norm": 3.34375,
      "learning_rate": 1.2510806327250637e-05,
      "loss": 0.7949,
      "step": 663220
    },
    {
      "epoch": 2.3244557998366786,
      "grad_norm": 2.90625,
      "learning_rate": 1.2510157298586935e-05,
      "loss": 0.8554,
      "step": 663230
    },
    {
      "epoch": 2.3244908473435744,
      "grad_norm": 3.0625,
      "learning_rate": 1.2509508269923233e-05,
      "loss": 0.7836,
      "step": 663240
    },
    {
      "epoch": 2.3245258948504697,
      "grad_norm": 3.078125,
      "learning_rate": 1.2508859241259533e-05,
      "loss": 0.8551,
      "step": 663250
    },
    {
      "epoch": 2.3245609423573654,
      "grad_norm": 2.515625,
      "learning_rate": 1.2508210212595831e-05,
      "loss": 0.7683,
      "step": 663260
    },
    {
      "epoch": 2.324595989864261,
      "grad_norm": 2.75,
      "learning_rate": 1.2507561183932129e-05,
      "loss": 0.8072,
      "step": 663270
    },
    {
      "epoch": 2.3246310373711565,
      "grad_norm": 3.046875,
      "learning_rate": 1.2506912155268427e-05,
      "loss": 0.7961,
      "step": 663280
    },
    {
      "epoch": 2.3246660848780523,
      "grad_norm": 2.859375,
      "learning_rate": 1.2506263126604725e-05,
      "loss": 0.7605,
      "step": 663290
    },
    {
      "epoch": 2.3247011323849476,
      "grad_norm": 2.5,
      "learning_rate": 1.2505614097941021e-05,
      "loss": 0.751,
      "step": 663300
    },
    {
      "epoch": 2.3247361798918433,
      "grad_norm": 3.09375,
      "learning_rate": 1.250496506927732e-05,
      "loss": 0.8901,
      "step": 663310
    },
    {
      "epoch": 2.324771227398739,
      "grad_norm": 3.1875,
      "learning_rate": 1.2504316040613617e-05,
      "loss": 0.8146,
      "step": 663320
    },
    {
      "epoch": 2.3248062749056344,
      "grad_norm": 3.203125,
      "learning_rate": 1.2503667011949915e-05,
      "loss": 0.8266,
      "step": 663330
    },
    {
      "epoch": 2.32484132241253,
      "grad_norm": 2.71875,
      "learning_rate": 1.2503017983286213e-05,
      "loss": 0.7806,
      "step": 663340
    },
    {
      "epoch": 2.324876369919426,
      "grad_norm": 3.390625,
      "learning_rate": 1.2502368954622511e-05,
      "loss": 0.9303,
      "step": 663350
    },
    {
      "epoch": 2.3249114174263212,
      "grad_norm": 2.90625,
      "learning_rate": 1.2501719925958811e-05,
      "loss": 0.7853,
      "step": 663360
    },
    {
      "epoch": 2.324946464933217,
      "grad_norm": 3.140625,
      "learning_rate": 1.2501070897295109e-05,
      "loss": 0.7811,
      "step": 663370
    },
    {
      "epoch": 2.3249815124401128,
      "grad_norm": 2.84375,
      "learning_rate": 1.2500421868631407e-05,
      "loss": 0.7327,
      "step": 663380
    },
    {
      "epoch": 2.325016559947008,
      "grad_norm": 2.953125,
      "learning_rate": 1.2499772839967705e-05,
      "loss": 0.7823,
      "step": 663390
    },
    {
      "epoch": 2.325051607453904,
      "grad_norm": 3.171875,
      "learning_rate": 1.2499123811304003e-05,
      "loss": 0.8649,
      "step": 663400
    },
    {
      "epoch": 2.325086654960799,
      "grad_norm": 2.234375,
      "learning_rate": 1.2498474782640301e-05,
      "loss": 0.8003,
      "step": 663410
    },
    {
      "epoch": 2.325121702467695,
      "grad_norm": 2.734375,
      "learning_rate": 1.2497825753976599e-05,
      "loss": 0.7702,
      "step": 663420
    },
    {
      "epoch": 2.3251567499745907,
      "grad_norm": 2.859375,
      "learning_rate": 1.2497176725312899e-05,
      "loss": 0.8136,
      "step": 663430
    },
    {
      "epoch": 2.325191797481486,
      "grad_norm": 2.625,
      "learning_rate": 1.2496527696649195e-05,
      "loss": 0.7345,
      "step": 663440
    },
    {
      "epoch": 2.3252268449883817,
      "grad_norm": 2.90625,
      "learning_rate": 1.2495878667985493e-05,
      "loss": 0.7328,
      "step": 663450
    },
    {
      "epoch": 2.3252618924952775,
      "grad_norm": 2.671875,
      "learning_rate": 1.2495229639321791e-05,
      "loss": 0.7966,
      "step": 663460
    },
    {
      "epoch": 2.325296940002173,
      "grad_norm": 3.03125,
      "learning_rate": 1.2494580610658089e-05,
      "loss": 0.8654,
      "step": 663470
    },
    {
      "epoch": 2.3253319875090686,
      "grad_norm": 3.234375,
      "learning_rate": 1.2493931581994387e-05,
      "loss": 0.8421,
      "step": 663480
    },
    {
      "epoch": 2.3253670350159643,
      "grad_norm": 3.03125,
      "learning_rate": 1.2493282553330687e-05,
      "loss": 0.8603,
      "step": 663490
    },
    {
      "epoch": 2.3254020825228596,
      "grad_norm": 2.8125,
      "learning_rate": 1.2492633524666985e-05,
      "loss": 0.8923,
      "step": 663500
    },
    {
      "epoch": 2.3254371300297554,
      "grad_norm": 2.9375,
      "learning_rate": 1.2491984496003283e-05,
      "loss": 0.7578,
      "step": 663510
    },
    {
      "epoch": 2.3254721775366507,
      "grad_norm": 3.09375,
      "learning_rate": 1.249133546733958e-05,
      "loss": 0.8828,
      "step": 663520
    },
    {
      "epoch": 2.3255072250435465,
      "grad_norm": 3.171875,
      "learning_rate": 1.2490686438675877e-05,
      "loss": 0.8201,
      "step": 663530
    },
    {
      "epoch": 2.325542272550442,
      "grad_norm": 3.015625,
      "learning_rate": 1.2490037410012175e-05,
      "loss": 0.816,
      "step": 663540
    },
    {
      "epoch": 2.3255773200573375,
      "grad_norm": 2.609375,
      "learning_rate": 1.2489388381348475e-05,
      "loss": 0.7922,
      "step": 663550
    },
    {
      "epoch": 2.3256123675642333,
      "grad_norm": 2.765625,
      "learning_rate": 1.2488739352684773e-05,
      "loss": 0.8385,
      "step": 663560
    },
    {
      "epoch": 2.325647415071129,
      "grad_norm": 2.84375,
      "learning_rate": 1.248809032402107e-05,
      "loss": 0.7278,
      "step": 663570
    },
    {
      "epoch": 2.3256824625780244,
      "grad_norm": 3.34375,
      "learning_rate": 1.2487441295357369e-05,
      "loss": 0.7529,
      "step": 663580
    },
    {
      "epoch": 2.32571751008492,
      "grad_norm": 2.984375,
      "learning_rate": 1.2486792266693667e-05,
      "loss": 0.8325,
      "step": 663590
    },
    {
      "epoch": 2.325752557591816,
      "grad_norm": 2.703125,
      "learning_rate": 1.2486143238029965e-05,
      "loss": 0.787,
      "step": 663600
    },
    {
      "epoch": 2.325787605098711,
      "grad_norm": 2.65625,
      "learning_rate": 1.2485494209366263e-05,
      "loss": 0.8588,
      "step": 663610
    },
    {
      "epoch": 2.325822652605607,
      "grad_norm": 2.875,
      "learning_rate": 1.2484845180702562e-05,
      "loss": 0.7988,
      "step": 663620
    },
    {
      "epoch": 2.3258577001125023,
      "grad_norm": 3.546875,
      "learning_rate": 1.2484196152038859e-05,
      "loss": 0.8939,
      "step": 663630
    },
    {
      "epoch": 2.325892747619398,
      "grad_norm": 2.765625,
      "learning_rate": 1.2483547123375157e-05,
      "loss": 0.7163,
      "step": 663640
    },
    {
      "epoch": 2.3259277951262938,
      "grad_norm": 2.765625,
      "learning_rate": 1.2482898094711455e-05,
      "loss": 0.82,
      "step": 663650
    },
    {
      "epoch": 2.3259628426331895,
      "grad_norm": 2.640625,
      "learning_rate": 1.2482249066047753e-05,
      "loss": 0.8307,
      "step": 663660
    },
    {
      "epoch": 2.325997890140085,
      "grad_norm": 2.5,
      "learning_rate": 1.2481600037384052e-05,
      "loss": 0.8067,
      "step": 663670
    },
    {
      "epoch": 2.3260329376469806,
      "grad_norm": 2.84375,
      "learning_rate": 1.248095100872035e-05,
      "loss": 0.7417,
      "step": 663680
    },
    {
      "epoch": 2.326067985153876,
      "grad_norm": 2.75,
      "learning_rate": 1.2480301980056648e-05,
      "loss": 0.7978,
      "step": 663690
    },
    {
      "epoch": 2.3261030326607717,
      "grad_norm": 2.8125,
      "learning_rate": 1.2479652951392946e-05,
      "loss": 0.7998,
      "step": 663700
    },
    {
      "epoch": 2.3261380801676674,
      "grad_norm": 2.90625,
      "learning_rate": 1.2479003922729244e-05,
      "loss": 0.7975,
      "step": 663710
    },
    {
      "epoch": 2.3261731276745627,
      "grad_norm": 2.75,
      "learning_rate": 1.247835489406554e-05,
      "loss": 0.8207,
      "step": 663720
    },
    {
      "epoch": 2.3262081751814585,
      "grad_norm": 2.921875,
      "learning_rate": 1.247770586540184e-05,
      "loss": 0.7975,
      "step": 663730
    },
    {
      "epoch": 2.326243222688354,
      "grad_norm": 2.9375,
      "learning_rate": 1.2477056836738138e-05,
      "loss": 0.8521,
      "step": 663740
    },
    {
      "epoch": 2.3262782701952496,
      "grad_norm": 3.5,
      "learning_rate": 1.2476407808074436e-05,
      "loss": 0.7659,
      "step": 663750
    },
    {
      "epoch": 2.3263133177021453,
      "grad_norm": 3.34375,
      "learning_rate": 1.2475758779410734e-05,
      "loss": 0.8401,
      "step": 663760
    },
    {
      "epoch": 2.326348365209041,
      "grad_norm": 2.328125,
      "learning_rate": 1.2475109750747032e-05,
      "loss": 0.83,
      "step": 663770
    },
    {
      "epoch": 2.3263834127159364,
      "grad_norm": 3.171875,
      "learning_rate": 1.247446072208333e-05,
      "loss": 0.8558,
      "step": 663780
    },
    {
      "epoch": 2.326418460222832,
      "grad_norm": 3.375,
      "learning_rate": 1.2473811693419628e-05,
      "loss": 0.8518,
      "step": 663790
    },
    {
      "epoch": 2.3264535077297275,
      "grad_norm": 3.09375,
      "learning_rate": 1.2473162664755928e-05,
      "loss": 0.7886,
      "step": 663800
    },
    {
      "epoch": 2.3264885552366232,
      "grad_norm": 3.046875,
      "learning_rate": 1.2472513636092224e-05,
      "loss": 0.8801,
      "step": 663810
    },
    {
      "epoch": 2.326523602743519,
      "grad_norm": 2.796875,
      "learning_rate": 1.2471864607428522e-05,
      "loss": 0.7383,
      "step": 663820
    },
    {
      "epoch": 2.3265586502504143,
      "grad_norm": 2.953125,
      "learning_rate": 1.247121557876482e-05,
      "loss": 0.6932,
      "step": 663830
    },
    {
      "epoch": 2.32659369775731,
      "grad_norm": 2.859375,
      "learning_rate": 1.2470566550101118e-05,
      "loss": 0.8151,
      "step": 663840
    },
    {
      "epoch": 2.326628745264206,
      "grad_norm": 2.9375,
      "learning_rate": 1.2469917521437416e-05,
      "loss": 0.8041,
      "step": 663850
    },
    {
      "epoch": 2.326663792771101,
      "grad_norm": 3.078125,
      "learning_rate": 1.2469268492773716e-05,
      "loss": 0.8045,
      "step": 663860
    },
    {
      "epoch": 2.326698840277997,
      "grad_norm": 2.546875,
      "learning_rate": 1.2468619464110014e-05,
      "loss": 0.7378,
      "step": 663870
    },
    {
      "epoch": 2.3267338877848927,
      "grad_norm": 2.46875,
      "learning_rate": 1.2467970435446312e-05,
      "loss": 0.7611,
      "step": 663880
    },
    {
      "epoch": 2.326768935291788,
      "grad_norm": 2.875,
      "learning_rate": 1.246732140678261e-05,
      "loss": 0.7342,
      "step": 663890
    },
    {
      "epoch": 2.3268039827986837,
      "grad_norm": 2.796875,
      "learning_rate": 1.2466672378118908e-05,
      "loss": 0.7753,
      "step": 663900
    },
    {
      "epoch": 2.326839030305579,
      "grad_norm": 3.203125,
      "learning_rate": 1.2466023349455206e-05,
      "loss": 0.8408,
      "step": 663910
    },
    {
      "epoch": 2.326874077812475,
      "grad_norm": 2.484375,
      "learning_rate": 1.2465374320791504e-05,
      "loss": 0.7394,
      "step": 663920
    },
    {
      "epoch": 2.3269091253193706,
      "grad_norm": 2.9375,
      "learning_rate": 1.2464725292127802e-05,
      "loss": 0.7799,
      "step": 663930
    },
    {
      "epoch": 2.326944172826266,
      "grad_norm": 2.90625,
      "learning_rate": 1.24640762634641e-05,
      "loss": 0.8221,
      "step": 663940
    },
    {
      "epoch": 2.3269792203331616,
      "grad_norm": 3.03125,
      "learning_rate": 1.2463427234800398e-05,
      "loss": 0.83,
      "step": 663950
    },
    {
      "epoch": 2.3270142678400574,
      "grad_norm": 3.296875,
      "learning_rate": 1.2462778206136696e-05,
      "loss": 0.8366,
      "step": 663960
    },
    {
      "epoch": 2.3270493153469527,
      "grad_norm": 2.546875,
      "learning_rate": 1.2462129177472994e-05,
      "loss": 0.8244,
      "step": 663970
    },
    {
      "epoch": 2.3270843628538485,
      "grad_norm": 3.203125,
      "learning_rate": 1.2461480148809294e-05,
      "loss": 0.7585,
      "step": 663980
    },
    {
      "epoch": 2.327119410360744,
      "grad_norm": 3.21875,
      "learning_rate": 1.2460831120145592e-05,
      "loss": 0.8455,
      "step": 663990
    },
    {
      "epoch": 2.3271544578676395,
      "grad_norm": 2.96875,
      "learning_rate": 1.2460182091481888e-05,
      "loss": 0.8013,
      "step": 664000
    },
    {
      "epoch": 2.3271895053745353,
      "grad_norm": 2.75,
      "learning_rate": 1.2459533062818186e-05,
      "loss": 0.8198,
      "step": 664010
    },
    {
      "epoch": 2.3272245528814306,
      "grad_norm": 2.703125,
      "learning_rate": 1.2458884034154484e-05,
      "loss": 0.6842,
      "step": 664020
    },
    {
      "epoch": 2.3272596003883264,
      "grad_norm": 3.15625,
      "learning_rate": 1.2458235005490782e-05,
      "loss": 0.8198,
      "step": 664030
    },
    {
      "epoch": 2.327294647895222,
      "grad_norm": 3.28125,
      "learning_rate": 1.2457585976827082e-05,
      "loss": 0.7902,
      "step": 664040
    },
    {
      "epoch": 2.3273296954021174,
      "grad_norm": 3.3125,
      "learning_rate": 1.245693694816338e-05,
      "loss": 0.869,
      "step": 664050
    },
    {
      "epoch": 2.327364742909013,
      "grad_norm": 2.984375,
      "learning_rate": 1.2456287919499678e-05,
      "loss": 0.9026,
      "step": 664060
    },
    {
      "epoch": 2.327399790415909,
      "grad_norm": 3.015625,
      "learning_rate": 1.2455638890835976e-05,
      "loss": 0.7966,
      "step": 664070
    },
    {
      "epoch": 2.3274348379228043,
      "grad_norm": 3.03125,
      "learning_rate": 1.2454989862172274e-05,
      "loss": 0.8993,
      "step": 664080
    },
    {
      "epoch": 2.3274698854297,
      "grad_norm": 2.90625,
      "learning_rate": 1.2454340833508572e-05,
      "loss": 0.8455,
      "step": 664090
    },
    {
      "epoch": 2.3275049329365958,
      "grad_norm": 2.765625,
      "learning_rate": 1.245369180484487e-05,
      "loss": 0.8602,
      "step": 664100
    },
    {
      "epoch": 2.327539980443491,
      "grad_norm": 3.0,
      "learning_rate": 1.2453042776181168e-05,
      "loss": 0.8842,
      "step": 664110
    },
    {
      "epoch": 2.327575027950387,
      "grad_norm": 3.078125,
      "learning_rate": 1.2452393747517466e-05,
      "loss": 0.8149,
      "step": 664120
    },
    {
      "epoch": 2.327610075457282,
      "grad_norm": 2.9375,
      "learning_rate": 1.2451744718853764e-05,
      "loss": 0.838,
      "step": 664130
    },
    {
      "epoch": 2.327645122964178,
      "grad_norm": 2.3125,
      "learning_rate": 1.2451095690190062e-05,
      "loss": 0.7639,
      "step": 664140
    },
    {
      "epoch": 2.3276801704710737,
      "grad_norm": 3.125,
      "learning_rate": 1.245044666152636e-05,
      "loss": 0.7961,
      "step": 664150
    },
    {
      "epoch": 2.327715217977969,
      "grad_norm": 2.78125,
      "learning_rate": 1.2449797632862658e-05,
      "loss": 0.8041,
      "step": 664160
    },
    {
      "epoch": 2.3277502654848647,
      "grad_norm": 2.796875,
      "learning_rate": 1.2449148604198958e-05,
      "loss": 0.8532,
      "step": 664170
    },
    {
      "epoch": 2.3277853129917605,
      "grad_norm": 3.046875,
      "learning_rate": 1.2448499575535256e-05,
      "loss": 0.8051,
      "step": 664180
    },
    {
      "epoch": 2.327820360498656,
      "grad_norm": 2.984375,
      "learning_rate": 1.2447850546871552e-05,
      "loss": 0.8424,
      "step": 664190
    },
    {
      "epoch": 2.3278554080055516,
      "grad_norm": 2.96875,
      "learning_rate": 1.244720151820785e-05,
      "loss": 0.8434,
      "step": 664200
    },
    {
      "epoch": 2.3278904555124473,
      "grad_norm": 2.765625,
      "learning_rate": 1.2446552489544148e-05,
      "loss": 0.8255,
      "step": 664210
    },
    {
      "epoch": 2.3279255030193426,
      "grad_norm": 2.96875,
      "learning_rate": 1.2445903460880448e-05,
      "loss": 0.7728,
      "step": 664220
    },
    {
      "epoch": 2.3279605505262384,
      "grad_norm": 2.859375,
      "learning_rate": 1.2445254432216746e-05,
      "loss": 0.7871,
      "step": 664230
    },
    {
      "epoch": 2.3279955980331337,
      "grad_norm": 3.078125,
      "learning_rate": 1.2444605403553044e-05,
      "loss": 0.8475,
      "step": 664240
    },
    {
      "epoch": 2.3280306455400295,
      "grad_norm": 3.328125,
      "learning_rate": 1.2443956374889342e-05,
      "loss": 0.8347,
      "step": 664250
    },
    {
      "epoch": 2.3280656930469252,
      "grad_norm": 2.75,
      "learning_rate": 1.244330734622564e-05,
      "loss": 0.8393,
      "step": 664260
    },
    {
      "epoch": 2.3281007405538205,
      "grad_norm": 3.484375,
      "learning_rate": 1.2442658317561938e-05,
      "loss": 0.7329,
      "step": 664270
    },
    {
      "epoch": 2.3281357880607163,
      "grad_norm": 3.203125,
      "learning_rate": 1.2442009288898236e-05,
      "loss": 0.8236,
      "step": 664280
    },
    {
      "epoch": 2.328170835567612,
      "grad_norm": 3.328125,
      "learning_rate": 1.2441360260234534e-05,
      "loss": 0.8832,
      "step": 664290
    },
    {
      "epoch": 2.3282058830745074,
      "grad_norm": 3.171875,
      "learning_rate": 1.2440711231570832e-05,
      "loss": 0.7782,
      "step": 664300
    },
    {
      "epoch": 2.328240930581403,
      "grad_norm": 2.78125,
      "learning_rate": 1.244006220290713e-05,
      "loss": 0.8577,
      "step": 664310
    },
    {
      "epoch": 2.328275978088299,
      "grad_norm": 2.890625,
      "learning_rate": 1.2439413174243428e-05,
      "loss": 0.7767,
      "step": 664320
    },
    {
      "epoch": 2.328311025595194,
      "grad_norm": 2.53125,
      "learning_rate": 1.2438764145579726e-05,
      "loss": 0.8515,
      "step": 664330
    },
    {
      "epoch": 2.32834607310209,
      "grad_norm": 3.265625,
      "learning_rate": 1.2438115116916024e-05,
      "loss": 0.7614,
      "step": 664340
    },
    {
      "epoch": 2.3283811206089853,
      "grad_norm": 2.953125,
      "learning_rate": 1.2437466088252323e-05,
      "loss": 0.8573,
      "step": 664350
    },
    {
      "epoch": 2.328416168115881,
      "grad_norm": 2.828125,
      "learning_rate": 1.2436817059588621e-05,
      "loss": 0.8455,
      "step": 664360
    },
    {
      "epoch": 2.328451215622777,
      "grad_norm": 2.875,
      "learning_rate": 1.243616803092492e-05,
      "loss": 0.7819,
      "step": 664370
    },
    {
      "epoch": 2.328486263129672,
      "grad_norm": 2.65625,
      "learning_rate": 1.2435519002261216e-05,
      "loss": 0.8348,
      "step": 664380
    },
    {
      "epoch": 2.328521310636568,
      "grad_norm": 2.78125,
      "learning_rate": 1.2434869973597514e-05,
      "loss": 0.8347,
      "step": 664390
    },
    {
      "epoch": 2.3285563581434636,
      "grad_norm": 2.75,
      "learning_rate": 1.2434220944933812e-05,
      "loss": 0.8775,
      "step": 664400
    },
    {
      "epoch": 2.328591405650359,
      "grad_norm": 3.484375,
      "learning_rate": 1.2433571916270111e-05,
      "loss": 0.8968,
      "step": 664410
    },
    {
      "epoch": 2.3286264531572547,
      "grad_norm": 2.859375,
      "learning_rate": 1.243292288760641e-05,
      "loss": 0.8306,
      "step": 664420
    },
    {
      "epoch": 2.3286615006641505,
      "grad_norm": 2.859375,
      "learning_rate": 1.2432273858942707e-05,
      "loss": 0.8422,
      "step": 664430
    },
    {
      "epoch": 2.3286965481710458,
      "grad_norm": 2.890625,
      "learning_rate": 1.2431624830279005e-05,
      "loss": 0.8463,
      "step": 664440
    },
    {
      "epoch": 2.3287315956779415,
      "grad_norm": 2.578125,
      "learning_rate": 1.2430975801615303e-05,
      "loss": 0.7102,
      "step": 664450
    },
    {
      "epoch": 2.328766643184837,
      "grad_norm": 2.90625,
      "learning_rate": 1.2430326772951601e-05,
      "loss": 0.7456,
      "step": 664460
    },
    {
      "epoch": 2.3288016906917326,
      "grad_norm": 3.015625,
      "learning_rate": 1.24296777442879e-05,
      "loss": 0.8642,
      "step": 664470
    },
    {
      "epoch": 2.3288367381986284,
      "grad_norm": 2.84375,
      "learning_rate": 1.2429028715624197e-05,
      "loss": 0.8283,
      "step": 664480
    },
    {
      "epoch": 2.3288717857055237,
      "grad_norm": 2.78125,
      "learning_rate": 1.2428379686960495e-05,
      "loss": 0.8459,
      "step": 664490
    },
    {
      "epoch": 2.3289068332124194,
      "grad_norm": 2.734375,
      "learning_rate": 1.2427730658296793e-05,
      "loss": 0.9145,
      "step": 664500
    },
    {
      "epoch": 2.328941880719315,
      "grad_norm": 2.5625,
      "learning_rate": 1.2427081629633091e-05,
      "loss": 0.7165,
      "step": 664510
    },
    {
      "epoch": 2.3289769282262105,
      "grad_norm": 3.0625,
      "learning_rate": 1.242643260096939e-05,
      "loss": 0.785,
      "step": 664520
    },
    {
      "epoch": 2.3290119757331063,
      "grad_norm": 3.234375,
      "learning_rate": 1.2425783572305689e-05,
      "loss": 0.8517,
      "step": 664530
    },
    {
      "epoch": 2.329047023240002,
      "grad_norm": 3.21875,
      "learning_rate": 1.2425134543641987e-05,
      "loss": 0.8484,
      "step": 664540
    },
    {
      "epoch": 2.3290820707468973,
      "grad_norm": 3.375,
      "learning_rate": 1.2424485514978285e-05,
      "loss": 0.832,
      "step": 664550
    },
    {
      "epoch": 2.329117118253793,
      "grad_norm": 3.15625,
      "learning_rate": 1.2423836486314583e-05,
      "loss": 0.8558,
      "step": 664560
    },
    {
      "epoch": 2.3291521657606884,
      "grad_norm": 3.0,
      "learning_rate": 1.242318745765088e-05,
      "loss": 0.7984,
      "step": 664570
    },
    {
      "epoch": 2.329187213267584,
      "grad_norm": 2.78125,
      "learning_rate": 1.2422538428987177e-05,
      "loss": 0.7644,
      "step": 664580
    },
    {
      "epoch": 2.32922226077448,
      "grad_norm": 3.28125,
      "learning_rate": 1.2421889400323477e-05,
      "loss": 0.8528,
      "step": 664590
    },
    {
      "epoch": 2.3292573082813752,
      "grad_norm": 2.9375,
      "learning_rate": 1.2421240371659775e-05,
      "loss": 0.8429,
      "step": 664600
    },
    {
      "epoch": 2.329292355788271,
      "grad_norm": 3.046875,
      "learning_rate": 1.2420591342996073e-05,
      "loss": 0.7996,
      "step": 664610
    },
    {
      "epoch": 2.3293274032951667,
      "grad_norm": 2.765625,
      "learning_rate": 1.2419942314332371e-05,
      "loss": 0.8466,
      "step": 664620
    },
    {
      "epoch": 2.329362450802062,
      "grad_norm": 2.9375,
      "learning_rate": 1.2419293285668669e-05,
      "loss": 0.823,
      "step": 664630
    },
    {
      "epoch": 2.329397498308958,
      "grad_norm": 2.703125,
      "learning_rate": 1.2418644257004967e-05,
      "loss": 0.8381,
      "step": 664640
    },
    {
      "epoch": 2.3294325458158536,
      "grad_norm": 3.078125,
      "learning_rate": 1.2417995228341265e-05,
      "loss": 0.7908,
      "step": 664650
    },
    {
      "epoch": 2.329467593322749,
      "grad_norm": 2.53125,
      "learning_rate": 1.2417346199677563e-05,
      "loss": 0.8401,
      "step": 664660
    },
    {
      "epoch": 2.3295026408296446,
      "grad_norm": 2.609375,
      "learning_rate": 1.2416697171013861e-05,
      "loss": 0.7912,
      "step": 664670
    },
    {
      "epoch": 2.32953768833654,
      "grad_norm": 2.765625,
      "learning_rate": 1.2416048142350159e-05,
      "loss": 0.884,
      "step": 664680
    },
    {
      "epoch": 2.3295727358434357,
      "grad_norm": 2.921875,
      "learning_rate": 1.2415399113686457e-05,
      "loss": 0.7573,
      "step": 664690
    },
    {
      "epoch": 2.3296077833503315,
      "grad_norm": 2.9375,
      "learning_rate": 1.2414750085022755e-05,
      "loss": 0.7632,
      "step": 664700
    },
    {
      "epoch": 2.329642830857227,
      "grad_norm": 2.671875,
      "learning_rate": 1.2414101056359053e-05,
      "loss": 0.8498,
      "step": 664710
    },
    {
      "epoch": 2.3296778783641225,
      "grad_norm": 3.09375,
      "learning_rate": 1.2413452027695353e-05,
      "loss": 0.8737,
      "step": 664720
    },
    {
      "epoch": 2.3297129258710183,
      "grad_norm": 2.546875,
      "learning_rate": 1.241280299903165e-05,
      "loss": 0.8117,
      "step": 664730
    },
    {
      "epoch": 2.3297479733779136,
      "grad_norm": 3.46875,
      "learning_rate": 1.2412153970367949e-05,
      "loss": 0.8672,
      "step": 664740
    },
    {
      "epoch": 2.3297830208848094,
      "grad_norm": 3.375,
      "learning_rate": 1.2411504941704247e-05,
      "loss": 0.7067,
      "step": 664750
    },
    {
      "epoch": 2.329818068391705,
      "grad_norm": 3.03125,
      "learning_rate": 1.2410855913040543e-05,
      "loss": 0.8149,
      "step": 664760
    },
    {
      "epoch": 2.3298531158986004,
      "grad_norm": 2.609375,
      "learning_rate": 1.2410206884376843e-05,
      "loss": 0.8734,
      "step": 664770
    },
    {
      "epoch": 2.329888163405496,
      "grad_norm": 3.125,
      "learning_rate": 1.240955785571314e-05,
      "loss": 0.8188,
      "step": 664780
    },
    {
      "epoch": 2.3299232109123915,
      "grad_norm": 3.203125,
      "learning_rate": 1.2408908827049439e-05,
      "loss": 0.835,
      "step": 664790
    },
    {
      "epoch": 2.3299582584192873,
      "grad_norm": 2.515625,
      "learning_rate": 1.2408259798385737e-05,
      "loss": 0.7776,
      "step": 664800
    },
    {
      "epoch": 2.329993305926183,
      "grad_norm": 2.953125,
      "learning_rate": 1.2407610769722035e-05,
      "loss": 0.8784,
      "step": 664810
    },
    {
      "epoch": 2.3300283534330783,
      "grad_norm": 3.265625,
      "learning_rate": 1.2406961741058333e-05,
      "loss": 0.7791,
      "step": 664820
    },
    {
      "epoch": 2.330063400939974,
      "grad_norm": 3.234375,
      "learning_rate": 1.240631271239463e-05,
      "loss": 0.7183,
      "step": 664830
    },
    {
      "epoch": 2.33009844844687,
      "grad_norm": 3.03125,
      "learning_rate": 1.240566368373093e-05,
      "loss": 0.8918,
      "step": 664840
    },
    {
      "epoch": 2.330133495953765,
      "grad_norm": 3.203125,
      "learning_rate": 1.2405014655067227e-05,
      "loss": 0.8287,
      "step": 664850
    },
    {
      "epoch": 2.330168543460661,
      "grad_norm": 2.953125,
      "learning_rate": 1.2404365626403525e-05,
      "loss": 0.7751,
      "step": 664860
    },
    {
      "epoch": 2.3302035909675567,
      "grad_norm": 2.953125,
      "learning_rate": 1.2403716597739823e-05,
      "loss": 0.8026,
      "step": 664870
    },
    {
      "epoch": 2.330238638474452,
      "grad_norm": 2.921875,
      "learning_rate": 1.240306756907612e-05,
      "loss": 0.7638,
      "step": 664880
    },
    {
      "epoch": 2.3302736859813478,
      "grad_norm": 3.171875,
      "learning_rate": 1.2402418540412419e-05,
      "loss": 0.8344,
      "step": 664890
    },
    {
      "epoch": 2.330308733488243,
      "grad_norm": 2.734375,
      "learning_rate": 1.2401769511748718e-05,
      "loss": 0.8264,
      "step": 664900
    },
    {
      "epoch": 2.330343780995139,
      "grad_norm": 3.546875,
      "learning_rate": 1.2401120483085016e-05,
      "loss": 0.8044,
      "step": 664910
    },
    {
      "epoch": 2.3303788285020346,
      "grad_norm": 3.40625,
      "learning_rate": 1.2400471454421314e-05,
      "loss": 0.8078,
      "step": 664920
    },
    {
      "epoch": 2.3304138760089304,
      "grad_norm": 2.53125,
      "learning_rate": 1.2399822425757612e-05,
      "loss": 0.7854,
      "step": 664930
    },
    {
      "epoch": 2.3304489235158257,
      "grad_norm": 2.765625,
      "learning_rate": 1.2399173397093909e-05,
      "loss": 0.7487,
      "step": 664940
    },
    {
      "epoch": 2.3304839710227214,
      "grad_norm": 3.453125,
      "learning_rate": 1.2398524368430207e-05,
      "loss": 0.8701,
      "step": 664950
    },
    {
      "epoch": 2.3305190185296167,
      "grad_norm": 3.390625,
      "learning_rate": 1.2397875339766506e-05,
      "loss": 0.8453,
      "step": 664960
    },
    {
      "epoch": 2.3305540660365125,
      "grad_norm": 3.203125,
      "learning_rate": 1.2397226311102804e-05,
      "loss": 0.8156,
      "step": 664970
    },
    {
      "epoch": 2.3305891135434083,
      "grad_norm": 3.265625,
      "learning_rate": 1.2396577282439102e-05,
      "loss": 0.7814,
      "step": 664980
    },
    {
      "epoch": 2.3306241610503036,
      "grad_norm": 3.015625,
      "learning_rate": 1.23959282537754e-05,
      "loss": 0.8165,
      "step": 664990
    },
    {
      "epoch": 2.3306592085571993,
      "grad_norm": 3.140625,
      "learning_rate": 1.2395279225111698e-05,
      "loss": 0.7636,
      "step": 665000
    },
    {
      "epoch": 2.3306592085571993,
      "eval_loss": 0.7611120939254761,
      "eval_runtime": 562.3791,
      "eval_samples_per_second": 676.476,
      "eval_steps_per_second": 56.373,
      "step": 665000
    },
    {
      "epoch": 2.3306942560640946,
      "grad_norm": 2.65625,
      "learning_rate": 1.2394630196447996e-05,
      "loss": 0.7814,
      "step": 665010
    },
    {
      "epoch": 2.3307293035709904,
      "grad_norm": 2.71875,
      "learning_rate": 1.2393981167784294e-05,
      "loss": 0.7552,
      "step": 665020
    },
    {
      "epoch": 2.330764351077886,
      "grad_norm": 2.6875,
      "learning_rate": 1.2393332139120594e-05,
      "loss": 0.7915,
      "step": 665030
    },
    {
      "epoch": 2.330799398584782,
      "grad_norm": 3.0625,
      "learning_rate": 1.239268311045689e-05,
      "loss": 0.8117,
      "step": 665040
    },
    {
      "epoch": 2.3308344460916772,
      "grad_norm": 2.703125,
      "learning_rate": 1.2392034081793188e-05,
      "loss": 0.8391,
      "step": 665050
    },
    {
      "epoch": 2.330869493598573,
      "grad_norm": 2.90625,
      "learning_rate": 1.2391385053129486e-05,
      "loss": 0.8152,
      "step": 665060
    },
    {
      "epoch": 2.3309045411054683,
      "grad_norm": 2.65625,
      "learning_rate": 1.2390736024465784e-05,
      "loss": 0.7927,
      "step": 665070
    },
    {
      "epoch": 2.330939588612364,
      "grad_norm": 3.4375,
      "learning_rate": 1.2390086995802084e-05,
      "loss": 0.7459,
      "step": 665080
    },
    {
      "epoch": 2.33097463611926,
      "grad_norm": 2.90625,
      "learning_rate": 1.2389437967138382e-05,
      "loss": 0.832,
      "step": 665090
    },
    {
      "epoch": 2.331009683626155,
      "grad_norm": 3.515625,
      "learning_rate": 1.238878893847468e-05,
      "loss": 0.7875,
      "step": 665100
    },
    {
      "epoch": 2.331044731133051,
      "grad_norm": 2.984375,
      "learning_rate": 1.2388139909810978e-05,
      "loss": 0.8373,
      "step": 665110
    },
    {
      "epoch": 2.331079778639946,
      "grad_norm": 2.78125,
      "learning_rate": 1.2387490881147276e-05,
      "loss": 0.8359,
      "step": 665120
    },
    {
      "epoch": 2.331114826146842,
      "grad_norm": 2.640625,
      "learning_rate": 1.2386841852483572e-05,
      "loss": 0.8254,
      "step": 665130
    },
    {
      "epoch": 2.3311498736537377,
      "grad_norm": 2.53125,
      "learning_rate": 1.2386192823819872e-05,
      "loss": 0.8218,
      "step": 665140
    },
    {
      "epoch": 2.3311849211606335,
      "grad_norm": 2.9375,
      "learning_rate": 1.238554379515617e-05,
      "loss": 0.8778,
      "step": 665150
    },
    {
      "epoch": 2.331219968667529,
      "grad_norm": 2.75,
      "learning_rate": 1.2384894766492468e-05,
      "loss": 0.7853,
      "step": 665160
    },
    {
      "epoch": 2.3312550161744245,
      "grad_norm": 2.734375,
      "learning_rate": 1.2384245737828766e-05,
      "loss": 0.7329,
      "step": 665170
    },
    {
      "epoch": 2.33129006368132,
      "grad_norm": 3.375,
      "learning_rate": 1.2383596709165064e-05,
      "loss": 0.7858,
      "step": 665180
    },
    {
      "epoch": 2.3313251111882156,
      "grad_norm": 3.15625,
      "learning_rate": 1.2382947680501362e-05,
      "loss": 0.8347,
      "step": 665190
    },
    {
      "epoch": 2.3313601586951114,
      "grad_norm": 3.046875,
      "learning_rate": 1.238229865183766e-05,
      "loss": 0.8088,
      "step": 665200
    },
    {
      "epoch": 2.3313952062020067,
      "grad_norm": 2.71875,
      "learning_rate": 1.238164962317396e-05,
      "loss": 0.804,
      "step": 665210
    },
    {
      "epoch": 2.3314302537089024,
      "grad_norm": 2.796875,
      "learning_rate": 1.2381000594510258e-05,
      "loss": 0.8301,
      "step": 665220
    },
    {
      "epoch": 2.331465301215798,
      "grad_norm": 2.59375,
      "learning_rate": 1.2380351565846554e-05,
      "loss": 0.7654,
      "step": 665230
    },
    {
      "epoch": 2.3315003487226935,
      "grad_norm": 3.03125,
      "learning_rate": 1.2379702537182852e-05,
      "loss": 0.7522,
      "step": 665240
    },
    {
      "epoch": 2.3315353962295893,
      "grad_norm": 2.515625,
      "learning_rate": 1.237905350851915e-05,
      "loss": 0.8435,
      "step": 665250
    },
    {
      "epoch": 2.331570443736485,
      "grad_norm": 3.265625,
      "learning_rate": 1.2378404479855448e-05,
      "loss": 0.8746,
      "step": 665260
    },
    {
      "epoch": 2.3316054912433803,
      "grad_norm": 2.703125,
      "learning_rate": 1.2377755451191748e-05,
      "loss": 0.8085,
      "step": 665270
    },
    {
      "epoch": 2.331640538750276,
      "grad_norm": 2.625,
      "learning_rate": 1.2377106422528046e-05,
      "loss": 0.8115,
      "step": 665280
    },
    {
      "epoch": 2.3316755862571714,
      "grad_norm": 3.109375,
      "learning_rate": 1.2376457393864344e-05,
      "loss": 0.7091,
      "step": 665290
    },
    {
      "epoch": 2.331710633764067,
      "grad_norm": 2.515625,
      "learning_rate": 1.2375808365200642e-05,
      "loss": 0.7762,
      "step": 665300
    },
    {
      "epoch": 2.331745681270963,
      "grad_norm": 2.796875,
      "learning_rate": 1.237515933653694e-05,
      "loss": 0.7461,
      "step": 665310
    },
    {
      "epoch": 2.3317807287778582,
      "grad_norm": 3.015625,
      "learning_rate": 1.2374510307873238e-05,
      "loss": 0.8094,
      "step": 665320
    },
    {
      "epoch": 2.331815776284754,
      "grad_norm": 2.25,
      "learning_rate": 1.2373861279209536e-05,
      "loss": 0.7481,
      "step": 665330
    },
    {
      "epoch": 2.3318508237916498,
      "grad_norm": 2.671875,
      "learning_rate": 1.2373212250545834e-05,
      "loss": 0.8493,
      "step": 665340
    },
    {
      "epoch": 2.331885871298545,
      "grad_norm": 2.75,
      "learning_rate": 1.2372563221882132e-05,
      "loss": 0.7972,
      "step": 665350
    },
    {
      "epoch": 2.331920918805441,
      "grad_norm": 3.5625,
      "learning_rate": 1.237191419321843e-05,
      "loss": 0.8426,
      "step": 665360
    },
    {
      "epoch": 2.3319559663123366,
      "grad_norm": 2.8125,
      "learning_rate": 1.2371265164554728e-05,
      "loss": 0.8026,
      "step": 665370
    },
    {
      "epoch": 2.331991013819232,
      "grad_norm": 2.953125,
      "learning_rate": 1.2370616135891026e-05,
      "loss": 0.8264,
      "step": 665380
    },
    {
      "epoch": 2.3320260613261277,
      "grad_norm": 2.875,
      "learning_rate": 1.2369967107227325e-05,
      "loss": 0.8339,
      "step": 665390
    },
    {
      "epoch": 2.332061108833023,
      "grad_norm": 2.765625,
      "learning_rate": 1.2369318078563623e-05,
      "loss": 0.7663,
      "step": 665400
    },
    {
      "epoch": 2.3320961563399187,
      "grad_norm": 3.15625,
      "learning_rate": 1.236866904989992e-05,
      "loss": 0.9159,
      "step": 665410
    },
    {
      "epoch": 2.3321312038468145,
      "grad_norm": 3.109375,
      "learning_rate": 1.2368020021236218e-05,
      "loss": 0.7031,
      "step": 665420
    },
    {
      "epoch": 2.33216625135371,
      "grad_norm": 2.828125,
      "learning_rate": 1.2367370992572516e-05,
      "loss": 0.7436,
      "step": 665430
    },
    {
      "epoch": 2.3322012988606056,
      "grad_norm": 3.34375,
      "learning_rate": 1.2366721963908814e-05,
      "loss": 0.8763,
      "step": 665440
    },
    {
      "epoch": 2.3322363463675013,
      "grad_norm": 3.90625,
      "learning_rate": 1.2366072935245113e-05,
      "loss": 0.8662,
      "step": 665450
    },
    {
      "epoch": 2.3322713938743966,
      "grad_norm": 2.90625,
      "learning_rate": 1.2365423906581411e-05,
      "loss": 0.8073,
      "step": 665460
    },
    {
      "epoch": 2.3323064413812924,
      "grad_norm": 2.84375,
      "learning_rate": 1.236477487791771e-05,
      "loss": 0.8223,
      "step": 665470
    },
    {
      "epoch": 2.332341488888188,
      "grad_norm": 3.015625,
      "learning_rate": 1.2364125849254007e-05,
      "loss": 0.7747,
      "step": 665480
    },
    {
      "epoch": 2.3323765363950835,
      "grad_norm": 2.65625,
      "learning_rate": 1.2363476820590305e-05,
      "loss": 0.8942,
      "step": 665490
    },
    {
      "epoch": 2.3324115839019792,
      "grad_norm": 2.75,
      "learning_rate": 1.2362827791926603e-05,
      "loss": 0.8123,
      "step": 665500
    },
    {
      "epoch": 2.3324466314088745,
      "grad_norm": 2.875,
      "learning_rate": 1.2362178763262901e-05,
      "loss": 0.7741,
      "step": 665510
    },
    {
      "epoch": 2.3324816789157703,
      "grad_norm": 3.015625,
      "learning_rate": 1.23615297345992e-05,
      "loss": 0.8041,
      "step": 665520
    },
    {
      "epoch": 2.332516726422666,
      "grad_norm": 3.0,
      "learning_rate": 1.2360880705935497e-05,
      "loss": 0.8257,
      "step": 665530
    },
    {
      "epoch": 2.3325517739295614,
      "grad_norm": 2.6875,
      "learning_rate": 1.2360231677271795e-05,
      "loss": 0.8553,
      "step": 665540
    },
    {
      "epoch": 2.332586821436457,
      "grad_norm": 2.875,
      "learning_rate": 1.2359582648608093e-05,
      "loss": 0.8531,
      "step": 665550
    },
    {
      "epoch": 2.332621868943353,
      "grad_norm": 2.828125,
      "learning_rate": 1.2358933619944391e-05,
      "loss": 0.7595,
      "step": 665560
    },
    {
      "epoch": 2.332656916450248,
      "grad_norm": 3.15625,
      "learning_rate": 1.235828459128069e-05,
      "loss": 0.7991,
      "step": 665570
    },
    {
      "epoch": 2.332691963957144,
      "grad_norm": 2.421875,
      "learning_rate": 1.2357635562616989e-05,
      "loss": 0.7564,
      "step": 665580
    },
    {
      "epoch": 2.3327270114640397,
      "grad_norm": 3.0625,
      "learning_rate": 1.2356986533953287e-05,
      "loss": 0.8775,
      "step": 665590
    },
    {
      "epoch": 2.332762058970935,
      "grad_norm": 2.484375,
      "learning_rate": 1.2356337505289583e-05,
      "loss": 0.8074,
      "step": 665600
    },
    {
      "epoch": 2.332797106477831,
      "grad_norm": 3.171875,
      "learning_rate": 1.2355688476625881e-05,
      "loss": 0.8773,
      "step": 665610
    },
    {
      "epoch": 2.332832153984726,
      "grad_norm": 2.984375,
      "learning_rate": 1.235503944796218e-05,
      "loss": 0.8571,
      "step": 665620
    },
    {
      "epoch": 2.332867201491622,
      "grad_norm": 2.875,
      "learning_rate": 1.2354390419298479e-05,
      "loss": 0.8581,
      "step": 665630
    },
    {
      "epoch": 2.3329022489985176,
      "grad_norm": 2.859375,
      "learning_rate": 1.2353741390634777e-05,
      "loss": 0.7465,
      "step": 665640
    },
    {
      "epoch": 2.332937296505413,
      "grad_norm": 2.546875,
      "learning_rate": 1.2353092361971075e-05,
      "loss": 0.8658,
      "step": 665650
    },
    {
      "epoch": 2.3329723440123087,
      "grad_norm": 3.90625,
      "learning_rate": 1.2352443333307373e-05,
      "loss": 0.8082,
      "step": 665660
    },
    {
      "epoch": 2.3330073915192044,
      "grad_norm": 3.078125,
      "learning_rate": 1.2351794304643671e-05,
      "loss": 0.8588,
      "step": 665670
    },
    {
      "epoch": 2.3330424390260998,
      "grad_norm": 2.5,
      "learning_rate": 1.2351145275979969e-05,
      "loss": 0.719,
      "step": 665680
    },
    {
      "epoch": 2.3330774865329955,
      "grad_norm": 2.71875,
      "learning_rate": 1.2350496247316267e-05,
      "loss": 0.8533,
      "step": 665690
    },
    {
      "epoch": 2.3331125340398913,
      "grad_norm": 3.6875,
      "learning_rate": 1.2349847218652565e-05,
      "loss": 0.9138,
      "step": 665700
    },
    {
      "epoch": 2.3331475815467866,
      "grad_norm": 2.609375,
      "learning_rate": 1.2349198189988863e-05,
      "loss": 0.7206,
      "step": 665710
    },
    {
      "epoch": 2.3331826290536823,
      "grad_norm": 3.140625,
      "learning_rate": 1.2348549161325161e-05,
      "loss": 0.8135,
      "step": 665720
    },
    {
      "epoch": 2.3332176765605777,
      "grad_norm": 2.921875,
      "learning_rate": 1.2347900132661459e-05,
      "loss": 0.8774,
      "step": 665730
    },
    {
      "epoch": 2.3332527240674734,
      "grad_norm": 3.03125,
      "learning_rate": 1.2347251103997757e-05,
      "loss": 0.812,
      "step": 665740
    },
    {
      "epoch": 2.333287771574369,
      "grad_norm": 2.984375,
      "learning_rate": 1.2346602075334055e-05,
      "loss": 0.7844,
      "step": 665750
    },
    {
      "epoch": 2.3333228190812645,
      "grad_norm": 3.65625,
      "learning_rate": 1.2345953046670355e-05,
      "loss": 0.795,
      "step": 665760
    },
    {
      "epoch": 2.3333578665881602,
      "grad_norm": 2.90625,
      "learning_rate": 1.2345304018006653e-05,
      "loss": 0.8208,
      "step": 665770
    },
    {
      "epoch": 2.333392914095056,
      "grad_norm": 2.890625,
      "learning_rate": 1.234465498934295e-05,
      "loss": 0.7756,
      "step": 665780
    },
    {
      "epoch": 2.3334279616019513,
      "grad_norm": 3.0625,
      "learning_rate": 1.2344005960679247e-05,
      "loss": 0.7626,
      "step": 665790
    },
    {
      "epoch": 2.333463009108847,
      "grad_norm": 3.359375,
      "learning_rate": 1.2343356932015545e-05,
      "loss": 0.866,
      "step": 665800
    },
    {
      "epoch": 2.333498056615743,
      "grad_norm": 2.921875,
      "learning_rate": 1.2342707903351843e-05,
      "loss": 0.7434,
      "step": 665810
    },
    {
      "epoch": 2.333533104122638,
      "grad_norm": 2.90625,
      "learning_rate": 1.2342058874688143e-05,
      "loss": 0.7684,
      "step": 665820
    },
    {
      "epoch": 2.333568151629534,
      "grad_norm": 3.421875,
      "learning_rate": 1.234140984602444e-05,
      "loss": 0.8736,
      "step": 665830
    },
    {
      "epoch": 2.333603199136429,
      "grad_norm": 2.625,
      "learning_rate": 1.2340760817360739e-05,
      "loss": 0.759,
      "step": 665840
    },
    {
      "epoch": 2.333638246643325,
      "grad_norm": 2.921875,
      "learning_rate": 1.2340111788697037e-05,
      "loss": 0.7353,
      "step": 665850
    },
    {
      "epoch": 2.3336732941502207,
      "grad_norm": 3.296875,
      "learning_rate": 1.2339462760033335e-05,
      "loss": 0.77,
      "step": 665860
    },
    {
      "epoch": 2.333708341657116,
      "grad_norm": 2.984375,
      "learning_rate": 1.2338813731369633e-05,
      "loss": 0.8495,
      "step": 665870
    },
    {
      "epoch": 2.333743389164012,
      "grad_norm": 3.15625,
      "learning_rate": 1.233816470270593e-05,
      "loss": 0.8867,
      "step": 665880
    },
    {
      "epoch": 2.3337784366709076,
      "grad_norm": 3.390625,
      "learning_rate": 1.2337515674042229e-05,
      "loss": 0.8372,
      "step": 665890
    },
    {
      "epoch": 2.333813484177803,
      "grad_norm": 3.0,
      "learning_rate": 1.2336866645378527e-05,
      "loss": 0.8158,
      "step": 665900
    },
    {
      "epoch": 2.3338485316846986,
      "grad_norm": 2.46875,
      "learning_rate": 1.2336217616714825e-05,
      "loss": 0.7679,
      "step": 665910
    },
    {
      "epoch": 2.3338835791915944,
      "grad_norm": 2.953125,
      "learning_rate": 1.2335568588051123e-05,
      "loss": 0.7505,
      "step": 665920
    },
    {
      "epoch": 2.3339186266984897,
      "grad_norm": 2.703125,
      "learning_rate": 1.233491955938742e-05,
      "loss": 0.7782,
      "step": 665930
    },
    {
      "epoch": 2.3339536742053855,
      "grad_norm": 2.9375,
      "learning_rate": 1.233427053072372e-05,
      "loss": 0.8128,
      "step": 665940
    },
    {
      "epoch": 2.333988721712281,
      "grad_norm": 3.0,
      "learning_rate": 1.2333621502060018e-05,
      "loss": 0.7813,
      "step": 665950
    },
    {
      "epoch": 2.3340237692191765,
      "grad_norm": 2.90625,
      "learning_rate": 1.2332972473396316e-05,
      "loss": 0.796,
      "step": 665960
    },
    {
      "epoch": 2.3340588167260723,
      "grad_norm": 2.921875,
      "learning_rate": 1.2332323444732614e-05,
      "loss": 0.8931,
      "step": 665970
    },
    {
      "epoch": 2.3340938642329676,
      "grad_norm": 2.640625,
      "learning_rate": 1.233167441606891e-05,
      "loss": 0.8141,
      "step": 665980
    },
    {
      "epoch": 2.3341289117398634,
      "grad_norm": 2.828125,
      "learning_rate": 1.2331025387405209e-05,
      "loss": 0.8364,
      "step": 665990
    },
    {
      "epoch": 2.334163959246759,
      "grad_norm": 2.9375,
      "learning_rate": 1.2330376358741508e-05,
      "loss": 0.8263,
      "step": 666000
    },
    {
      "epoch": 2.3341990067536544,
      "grad_norm": 3.09375,
      "learning_rate": 1.2329727330077806e-05,
      "loss": 0.7982,
      "step": 666010
    },
    {
      "epoch": 2.33423405426055,
      "grad_norm": 3.578125,
      "learning_rate": 1.2329078301414104e-05,
      "loss": 0.8625,
      "step": 666020
    },
    {
      "epoch": 2.334269101767446,
      "grad_norm": 2.546875,
      "learning_rate": 1.2328429272750402e-05,
      "loss": 0.7517,
      "step": 666030
    },
    {
      "epoch": 2.3343041492743413,
      "grad_norm": 3.34375,
      "learning_rate": 1.23277802440867e-05,
      "loss": 0.8452,
      "step": 666040
    },
    {
      "epoch": 2.334339196781237,
      "grad_norm": 3.203125,
      "learning_rate": 1.2327131215422998e-05,
      "loss": 0.8023,
      "step": 666050
    },
    {
      "epoch": 2.3343742442881323,
      "grad_norm": 2.75,
      "learning_rate": 1.2326482186759296e-05,
      "loss": 0.7927,
      "step": 666060
    },
    {
      "epoch": 2.334409291795028,
      "grad_norm": 2.984375,
      "learning_rate": 1.2325833158095594e-05,
      "loss": 0.8397,
      "step": 666070
    },
    {
      "epoch": 2.334444339301924,
      "grad_norm": 2.40625,
      "learning_rate": 1.2325184129431892e-05,
      "loss": 0.8146,
      "step": 666080
    },
    {
      "epoch": 2.334479386808819,
      "grad_norm": 2.828125,
      "learning_rate": 1.232453510076819e-05,
      "loss": 0.8536,
      "step": 666090
    },
    {
      "epoch": 2.334514434315715,
      "grad_norm": 3.015625,
      "learning_rate": 1.2323886072104488e-05,
      "loss": 0.8189,
      "step": 666100
    },
    {
      "epoch": 2.3345494818226107,
      "grad_norm": 2.96875,
      "learning_rate": 1.2323237043440786e-05,
      "loss": 0.8103,
      "step": 666110
    },
    {
      "epoch": 2.334584529329506,
      "grad_norm": 3.34375,
      "learning_rate": 1.2322588014777086e-05,
      "loss": 0.8375,
      "step": 666120
    },
    {
      "epoch": 2.3346195768364018,
      "grad_norm": 2.984375,
      "learning_rate": 1.2321938986113384e-05,
      "loss": 0.8557,
      "step": 666130
    },
    {
      "epoch": 2.3346546243432975,
      "grad_norm": 2.390625,
      "learning_rate": 1.2321289957449682e-05,
      "loss": 0.743,
      "step": 666140
    },
    {
      "epoch": 2.334689671850193,
      "grad_norm": 2.84375,
      "learning_rate": 1.232064092878598e-05,
      "loss": 0.8147,
      "step": 666150
    },
    {
      "epoch": 2.3347247193570886,
      "grad_norm": 2.921875,
      "learning_rate": 1.2319991900122278e-05,
      "loss": 0.8788,
      "step": 666160
    },
    {
      "epoch": 2.334759766863984,
      "grad_norm": 2.765625,
      "learning_rate": 1.2319342871458574e-05,
      "loss": 0.7383,
      "step": 666170
    },
    {
      "epoch": 2.3347948143708797,
      "grad_norm": 2.609375,
      "learning_rate": 1.2318693842794874e-05,
      "loss": 0.7633,
      "step": 666180
    },
    {
      "epoch": 2.3348298618777754,
      "grad_norm": 3.0625,
      "learning_rate": 1.2318044814131172e-05,
      "loss": 0.7066,
      "step": 666190
    },
    {
      "epoch": 2.3348649093846707,
      "grad_norm": 2.78125,
      "learning_rate": 1.231739578546747e-05,
      "loss": 0.8283,
      "step": 666200
    },
    {
      "epoch": 2.3348999568915665,
      "grad_norm": 2.8125,
      "learning_rate": 1.2316746756803768e-05,
      "loss": 0.8372,
      "step": 666210
    },
    {
      "epoch": 2.3349350043984622,
      "grad_norm": 2.59375,
      "learning_rate": 1.2316097728140066e-05,
      "loss": 0.8336,
      "step": 666220
    },
    {
      "epoch": 2.3349700519053576,
      "grad_norm": 2.53125,
      "learning_rate": 1.2315448699476364e-05,
      "loss": 0.788,
      "step": 666230
    },
    {
      "epoch": 2.3350050994122533,
      "grad_norm": 2.546875,
      "learning_rate": 1.2314799670812662e-05,
      "loss": 0.8621,
      "step": 666240
    },
    {
      "epoch": 2.335040146919149,
      "grad_norm": 3.3125,
      "learning_rate": 1.2314150642148962e-05,
      "loss": 0.8543,
      "step": 666250
    },
    {
      "epoch": 2.3350751944260444,
      "grad_norm": 3.0,
      "learning_rate": 1.2313501613485258e-05,
      "loss": 0.8955,
      "step": 666260
    },
    {
      "epoch": 2.33511024193294,
      "grad_norm": 2.71875,
      "learning_rate": 1.2312852584821556e-05,
      "loss": 0.809,
      "step": 666270
    },
    {
      "epoch": 2.3351452894398355,
      "grad_norm": 2.9375,
      "learning_rate": 1.2312203556157854e-05,
      "loss": 0.8128,
      "step": 666280
    },
    {
      "epoch": 2.335180336946731,
      "grad_norm": 2.40625,
      "learning_rate": 1.2311554527494152e-05,
      "loss": 0.8032,
      "step": 666290
    },
    {
      "epoch": 2.335215384453627,
      "grad_norm": 2.53125,
      "learning_rate": 1.231090549883045e-05,
      "loss": 0.7796,
      "step": 666300
    },
    {
      "epoch": 2.3352504319605227,
      "grad_norm": 3.0,
      "learning_rate": 1.231025647016675e-05,
      "loss": 0.8072,
      "step": 666310
    },
    {
      "epoch": 2.335285479467418,
      "grad_norm": 3.046875,
      "learning_rate": 1.2309607441503048e-05,
      "loss": 0.8711,
      "step": 666320
    },
    {
      "epoch": 2.335320526974314,
      "grad_norm": 2.90625,
      "learning_rate": 1.2308958412839346e-05,
      "loss": 0.8218,
      "step": 666330
    },
    {
      "epoch": 2.335355574481209,
      "grad_norm": 3.03125,
      "learning_rate": 1.2308309384175644e-05,
      "loss": 0.7497,
      "step": 666340
    },
    {
      "epoch": 2.335390621988105,
      "grad_norm": 2.953125,
      "learning_rate": 1.230766035551194e-05,
      "loss": 0.8445,
      "step": 666350
    },
    {
      "epoch": 2.3354256694950006,
      "grad_norm": 3.0625,
      "learning_rate": 1.2307011326848238e-05,
      "loss": 0.8319,
      "step": 666360
    },
    {
      "epoch": 2.335460717001896,
      "grad_norm": 3.015625,
      "learning_rate": 1.2306362298184538e-05,
      "loss": 0.8324,
      "step": 666370
    },
    {
      "epoch": 2.3354957645087917,
      "grad_norm": 3.0,
      "learning_rate": 1.2305713269520836e-05,
      "loss": 0.8489,
      "step": 666380
    },
    {
      "epoch": 2.335530812015687,
      "grad_norm": 3.46875,
      "learning_rate": 1.2305064240857134e-05,
      "loss": 0.8554,
      "step": 666390
    },
    {
      "epoch": 2.3355658595225828,
      "grad_norm": 2.96875,
      "learning_rate": 1.2304415212193432e-05,
      "loss": 0.7459,
      "step": 666400
    },
    {
      "epoch": 2.3356009070294785,
      "grad_norm": 3.078125,
      "learning_rate": 1.230376618352973e-05,
      "loss": 0.8157,
      "step": 666410
    },
    {
      "epoch": 2.3356359545363743,
      "grad_norm": 2.65625,
      "learning_rate": 1.2303117154866028e-05,
      "loss": 0.7905,
      "step": 666420
    },
    {
      "epoch": 2.3356710020432696,
      "grad_norm": 2.9375,
      "learning_rate": 1.2302468126202328e-05,
      "loss": 0.7565,
      "step": 666430
    },
    {
      "epoch": 2.3357060495501654,
      "grad_norm": 3.0,
      "learning_rate": 1.2301819097538626e-05,
      "loss": 0.8256,
      "step": 666440
    },
    {
      "epoch": 2.3357410970570607,
      "grad_norm": 2.859375,
      "learning_rate": 1.2301170068874922e-05,
      "loss": 0.7712,
      "step": 666450
    },
    {
      "epoch": 2.3357761445639564,
      "grad_norm": 2.875,
      "learning_rate": 1.230052104021122e-05,
      "loss": 0.8216,
      "step": 666460
    },
    {
      "epoch": 2.335811192070852,
      "grad_norm": 3.265625,
      "learning_rate": 1.2299872011547518e-05,
      "loss": 0.8203,
      "step": 666470
    },
    {
      "epoch": 2.3358462395777475,
      "grad_norm": 3.0,
      "learning_rate": 1.2299222982883816e-05,
      "loss": 0.8712,
      "step": 666480
    },
    {
      "epoch": 2.3358812870846433,
      "grad_norm": 2.5,
      "learning_rate": 1.2298573954220116e-05,
      "loss": 0.8489,
      "step": 666490
    },
    {
      "epoch": 2.335916334591539,
      "grad_norm": 3.046875,
      "learning_rate": 1.2297924925556414e-05,
      "loss": 0.8133,
      "step": 666500
    },
    {
      "epoch": 2.3359513820984343,
      "grad_norm": 2.5,
      "learning_rate": 1.2297275896892712e-05,
      "loss": 0.839,
      "step": 666510
    },
    {
      "epoch": 2.33598642960533,
      "grad_norm": 2.671875,
      "learning_rate": 1.229662686822901e-05,
      "loss": 0.8779,
      "step": 666520
    },
    {
      "epoch": 2.336021477112226,
      "grad_norm": 3.203125,
      "learning_rate": 1.2295977839565308e-05,
      "loss": 0.8232,
      "step": 666530
    },
    {
      "epoch": 2.336056524619121,
      "grad_norm": 3.1875,
      "learning_rate": 1.2295328810901604e-05,
      "loss": 0.8333,
      "step": 666540
    },
    {
      "epoch": 2.336091572126017,
      "grad_norm": 2.9375,
      "learning_rate": 1.2294679782237904e-05,
      "loss": 0.8091,
      "step": 666550
    },
    {
      "epoch": 2.3361266196329122,
      "grad_norm": 3.140625,
      "learning_rate": 1.2294030753574202e-05,
      "loss": 0.8368,
      "step": 666560
    },
    {
      "epoch": 2.336161667139808,
      "grad_norm": 3.484375,
      "learning_rate": 1.22933817249105e-05,
      "loss": 0.8688,
      "step": 666570
    },
    {
      "epoch": 2.3361967146467038,
      "grad_norm": 2.8125,
      "learning_rate": 1.2292732696246798e-05,
      "loss": 0.8573,
      "step": 666580
    },
    {
      "epoch": 2.336231762153599,
      "grad_norm": 2.4375,
      "learning_rate": 1.2292083667583096e-05,
      "loss": 0.8939,
      "step": 666590
    },
    {
      "epoch": 2.336266809660495,
      "grad_norm": 2.890625,
      "learning_rate": 1.2291434638919394e-05,
      "loss": 0.7371,
      "step": 666600
    },
    {
      "epoch": 2.3363018571673906,
      "grad_norm": 2.90625,
      "learning_rate": 1.2290785610255692e-05,
      "loss": 0.8334,
      "step": 666610
    },
    {
      "epoch": 2.336336904674286,
      "grad_norm": 2.828125,
      "learning_rate": 1.2290136581591991e-05,
      "loss": 0.7948,
      "step": 666620
    },
    {
      "epoch": 2.3363719521811817,
      "grad_norm": 3.34375,
      "learning_rate": 1.228948755292829e-05,
      "loss": 0.8435,
      "step": 666630
    },
    {
      "epoch": 2.3364069996880774,
      "grad_norm": 3.53125,
      "learning_rate": 1.2288838524264586e-05,
      "loss": 0.8361,
      "step": 666640
    },
    {
      "epoch": 2.3364420471949727,
      "grad_norm": 2.90625,
      "learning_rate": 1.2288189495600884e-05,
      "loss": 0.7702,
      "step": 666650
    },
    {
      "epoch": 2.3364770947018685,
      "grad_norm": 3.234375,
      "learning_rate": 1.2287540466937182e-05,
      "loss": 0.788,
      "step": 666660
    },
    {
      "epoch": 2.336512142208764,
      "grad_norm": 3.09375,
      "learning_rate": 1.2286891438273481e-05,
      "loss": 0.8395,
      "step": 666670
    },
    {
      "epoch": 2.3365471897156596,
      "grad_norm": 2.828125,
      "learning_rate": 1.228624240960978e-05,
      "loss": 0.8124,
      "step": 666680
    },
    {
      "epoch": 2.3365822372225553,
      "grad_norm": 2.953125,
      "learning_rate": 1.2285593380946077e-05,
      "loss": 0.767,
      "step": 666690
    },
    {
      "epoch": 2.3366172847294506,
      "grad_norm": 3.21875,
      "learning_rate": 1.2284944352282375e-05,
      "loss": 0.881,
      "step": 666700
    },
    {
      "epoch": 2.3366523322363464,
      "grad_norm": 3.1875,
      "learning_rate": 1.2284295323618673e-05,
      "loss": 0.8499,
      "step": 666710
    },
    {
      "epoch": 2.336687379743242,
      "grad_norm": 2.921875,
      "learning_rate": 1.2283646294954971e-05,
      "loss": 0.7178,
      "step": 666720
    },
    {
      "epoch": 2.3367224272501375,
      "grad_norm": 2.65625,
      "learning_rate": 1.228299726629127e-05,
      "loss": 0.7939,
      "step": 666730
    },
    {
      "epoch": 2.336757474757033,
      "grad_norm": 2.875,
      "learning_rate": 1.2282348237627567e-05,
      "loss": 0.8239,
      "step": 666740
    },
    {
      "epoch": 2.336792522263929,
      "grad_norm": 3.03125,
      "learning_rate": 1.2281699208963865e-05,
      "loss": 0.7794,
      "step": 666750
    },
    {
      "epoch": 2.3368275697708243,
      "grad_norm": 2.4375,
      "learning_rate": 1.2281050180300163e-05,
      "loss": 0.7886,
      "step": 666760
    },
    {
      "epoch": 2.33686261727772,
      "grad_norm": 2.78125,
      "learning_rate": 1.2280401151636461e-05,
      "loss": 0.7351,
      "step": 666770
    },
    {
      "epoch": 2.3368976647846154,
      "grad_norm": 2.875,
      "learning_rate": 1.227975212297276e-05,
      "loss": 0.7857,
      "step": 666780
    },
    {
      "epoch": 2.336932712291511,
      "grad_norm": 3.0,
      "learning_rate": 1.2279103094309057e-05,
      "loss": 0.8113,
      "step": 666790
    },
    {
      "epoch": 2.336967759798407,
      "grad_norm": 2.890625,
      "learning_rate": 1.2278454065645357e-05,
      "loss": 0.7698,
      "step": 666800
    },
    {
      "epoch": 2.337002807305302,
      "grad_norm": 2.859375,
      "learning_rate": 1.2277805036981655e-05,
      "loss": 0.7689,
      "step": 666810
    },
    {
      "epoch": 2.337037854812198,
      "grad_norm": 3.3125,
      "learning_rate": 1.2277156008317951e-05,
      "loss": 0.8143,
      "step": 666820
    },
    {
      "epoch": 2.3370729023190937,
      "grad_norm": 3.03125,
      "learning_rate": 1.227650697965425e-05,
      "loss": 0.8395,
      "step": 666830
    },
    {
      "epoch": 2.337107949825989,
      "grad_norm": 2.96875,
      "learning_rate": 1.2275857950990547e-05,
      "loss": 0.8483,
      "step": 666840
    },
    {
      "epoch": 2.3371429973328848,
      "grad_norm": 3.234375,
      "learning_rate": 1.2275208922326845e-05,
      "loss": 0.8628,
      "step": 666850
    },
    {
      "epoch": 2.3371780448397805,
      "grad_norm": 2.671875,
      "learning_rate": 1.2274559893663145e-05,
      "loss": 0.8387,
      "step": 666860
    },
    {
      "epoch": 2.337213092346676,
      "grad_norm": 2.78125,
      "learning_rate": 1.2273910864999443e-05,
      "loss": 0.8076,
      "step": 666870
    },
    {
      "epoch": 2.3372481398535716,
      "grad_norm": 2.765625,
      "learning_rate": 1.2273261836335741e-05,
      "loss": 0.7729,
      "step": 666880
    },
    {
      "epoch": 2.337283187360467,
      "grad_norm": 2.890625,
      "learning_rate": 1.2272612807672039e-05,
      "loss": 0.7421,
      "step": 666890
    },
    {
      "epoch": 2.3373182348673627,
      "grad_norm": 3.25,
      "learning_rate": 1.2271963779008337e-05,
      "loss": 0.7913,
      "step": 666900
    },
    {
      "epoch": 2.3373532823742584,
      "grad_norm": 2.703125,
      "learning_rate": 1.2271314750344635e-05,
      "loss": 0.798,
      "step": 666910
    },
    {
      "epoch": 2.3373883298811537,
      "grad_norm": 2.921875,
      "learning_rate": 1.2270665721680933e-05,
      "loss": 0.8325,
      "step": 666920
    },
    {
      "epoch": 2.3374233773880495,
      "grad_norm": 2.84375,
      "learning_rate": 1.2270016693017231e-05,
      "loss": 0.8757,
      "step": 666930
    },
    {
      "epoch": 2.3374584248949453,
      "grad_norm": 2.546875,
      "learning_rate": 1.2269367664353529e-05,
      "loss": 0.784,
      "step": 666940
    },
    {
      "epoch": 2.3374934724018406,
      "grad_norm": 2.84375,
      "learning_rate": 1.2268718635689827e-05,
      "loss": 0.7851,
      "step": 666950
    },
    {
      "epoch": 2.3375285199087363,
      "grad_norm": 3.3125,
      "learning_rate": 1.2268069607026125e-05,
      "loss": 0.7415,
      "step": 666960
    },
    {
      "epoch": 2.337563567415632,
      "grad_norm": 3.375,
      "learning_rate": 1.2267420578362423e-05,
      "loss": 0.8664,
      "step": 666970
    },
    {
      "epoch": 2.3375986149225274,
      "grad_norm": 3.125,
      "learning_rate": 1.2266771549698723e-05,
      "loss": 0.8151,
      "step": 666980
    },
    {
      "epoch": 2.337633662429423,
      "grad_norm": 2.828125,
      "learning_rate": 1.226612252103502e-05,
      "loss": 0.8,
      "step": 666990
    },
    {
      "epoch": 2.3376687099363185,
      "grad_norm": 3.09375,
      "learning_rate": 1.2265473492371319e-05,
      "loss": 0.7964,
      "step": 667000
    },
    {
      "epoch": 2.3377037574432142,
      "grad_norm": 3.0,
      "learning_rate": 1.2264824463707615e-05,
      "loss": 0.8761,
      "step": 667010
    },
    {
      "epoch": 2.33773880495011,
      "grad_norm": 3.546875,
      "learning_rate": 1.2264175435043913e-05,
      "loss": 0.8043,
      "step": 667020
    },
    {
      "epoch": 2.3377738524570053,
      "grad_norm": 3.0625,
      "learning_rate": 1.2263526406380211e-05,
      "loss": 0.7408,
      "step": 667030
    },
    {
      "epoch": 2.337808899963901,
      "grad_norm": 3.1875,
      "learning_rate": 1.226287737771651e-05,
      "loss": 0.9008,
      "step": 667040
    },
    {
      "epoch": 2.337843947470797,
      "grad_norm": 3.21875,
      "learning_rate": 1.2262228349052809e-05,
      "loss": 0.8428,
      "step": 667050
    },
    {
      "epoch": 2.337878994977692,
      "grad_norm": 2.875,
      "learning_rate": 1.2261579320389107e-05,
      "loss": 0.8591,
      "step": 667060
    },
    {
      "epoch": 2.337914042484588,
      "grad_norm": 2.890625,
      "learning_rate": 1.2260930291725405e-05,
      "loss": 0.7457,
      "step": 667070
    },
    {
      "epoch": 2.3379490899914837,
      "grad_norm": 3.0625,
      "learning_rate": 1.2260281263061703e-05,
      "loss": 0.8491,
      "step": 667080
    },
    {
      "epoch": 2.337984137498379,
      "grad_norm": 2.953125,
      "learning_rate": 1.2259632234398e-05,
      "loss": 0.8243,
      "step": 667090
    },
    {
      "epoch": 2.3380191850052747,
      "grad_norm": 3.390625,
      "learning_rate": 1.2258983205734299e-05,
      "loss": 0.7881,
      "step": 667100
    },
    {
      "epoch": 2.33805423251217,
      "grad_norm": 2.671875,
      "learning_rate": 1.2258334177070597e-05,
      "loss": 0.8122,
      "step": 667110
    },
    {
      "epoch": 2.338089280019066,
      "grad_norm": 3.015625,
      "learning_rate": 1.2257685148406895e-05,
      "loss": 0.8779,
      "step": 667120
    },
    {
      "epoch": 2.3381243275259616,
      "grad_norm": 2.59375,
      "learning_rate": 1.2257036119743193e-05,
      "loss": 0.7694,
      "step": 667130
    },
    {
      "epoch": 2.338159375032857,
      "grad_norm": 2.796875,
      "learning_rate": 1.225638709107949e-05,
      "loss": 0.7714,
      "step": 667140
    },
    {
      "epoch": 2.3381944225397526,
      "grad_norm": 3.21875,
      "learning_rate": 1.2255738062415789e-05,
      "loss": 0.8007,
      "step": 667150
    },
    {
      "epoch": 2.3382294700466484,
      "grad_norm": 2.875,
      "learning_rate": 1.2255089033752087e-05,
      "loss": 0.8568,
      "step": 667160
    },
    {
      "epoch": 2.3382645175535437,
      "grad_norm": 2.6875,
      "learning_rate": 1.2254440005088386e-05,
      "loss": 0.7393,
      "step": 667170
    },
    {
      "epoch": 2.3382995650604395,
      "grad_norm": 2.734375,
      "learning_rate": 1.2253790976424684e-05,
      "loss": 0.8257,
      "step": 667180
    },
    {
      "epoch": 2.338334612567335,
      "grad_norm": 3.078125,
      "learning_rate": 1.2253141947760982e-05,
      "loss": 0.7687,
      "step": 667190
    },
    {
      "epoch": 2.3383696600742305,
      "grad_norm": 2.984375,
      "learning_rate": 1.2252492919097279e-05,
      "loss": 0.8468,
      "step": 667200
    },
    {
      "epoch": 2.3384047075811263,
      "grad_norm": 2.328125,
      "learning_rate": 1.2251843890433577e-05,
      "loss": 0.8546,
      "step": 667210
    },
    {
      "epoch": 2.3384397550880216,
      "grad_norm": 3.078125,
      "learning_rate": 1.2251194861769876e-05,
      "loss": 0.7474,
      "step": 667220
    },
    {
      "epoch": 2.3384748025949174,
      "grad_norm": 3.09375,
      "learning_rate": 1.2250545833106174e-05,
      "loss": 0.791,
      "step": 667230
    },
    {
      "epoch": 2.338509850101813,
      "grad_norm": 2.671875,
      "learning_rate": 1.2249896804442472e-05,
      "loss": 0.8285,
      "step": 667240
    },
    {
      "epoch": 2.3385448976087084,
      "grad_norm": 2.984375,
      "learning_rate": 1.224924777577877e-05,
      "loss": 0.7426,
      "step": 667250
    },
    {
      "epoch": 2.338579945115604,
      "grad_norm": 3.046875,
      "learning_rate": 1.2248598747115068e-05,
      "loss": 0.7984,
      "step": 667260
    },
    {
      "epoch": 2.3386149926225,
      "grad_norm": 3.515625,
      "learning_rate": 1.2247949718451366e-05,
      "loss": 0.8041,
      "step": 667270
    },
    {
      "epoch": 2.3386500401293953,
      "grad_norm": 3.140625,
      "learning_rate": 1.2247300689787664e-05,
      "loss": 0.8281,
      "step": 667280
    },
    {
      "epoch": 2.338685087636291,
      "grad_norm": 2.71875,
      "learning_rate": 1.2246651661123962e-05,
      "loss": 0.8514,
      "step": 667290
    },
    {
      "epoch": 2.3387201351431868,
      "grad_norm": 2.921875,
      "learning_rate": 1.224600263246026e-05,
      "loss": 0.8392,
      "step": 667300
    },
    {
      "epoch": 2.338755182650082,
      "grad_norm": 3.046875,
      "learning_rate": 1.2245353603796558e-05,
      "loss": 0.8447,
      "step": 667310
    },
    {
      "epoch": 2.338790230156978,
      "grad_norm": 2.75,
      "learning_rate": 1.2244704575132856e-05,
      "loss": 0.7748,
      "step": 667320
    },
    {
      "epoch": 2.338825277663873,
      "grad_norm": 2.859375,
      "learning_rate": 1.2244055546469154e-05,
      "loss": 0.8267,
      "step": 667330
    },
    {
      "epoch": 2.338860325170769,
      "grad_norm": 2.484375,
      "learning_rate": 1.2243406517805452e-05,
      "loss": 0.7843,
      "step": 667340
    },
    {
      "epoch": 2.3388953726776647,
      "grad_norm": 2.515625,
      "learning_rate": 1.2242757489141752e-05,
      "loss": 0.789,
      "step": 667350
    },
    {
      "epoch": 2.33893042018456,
      "grad_norm": 3.109375,
      "learning_rate": 1.224210846047805e-05,
      "loss": 0.7776,
      "step": 667360
    },
    {
      "epoch": 2.3389654676914557,
      "grad_norm": 2.78125,
      "learning_rate": 1.2241459431814348e-05,
      "loss": 0.7252,
      "step": 667370
    },
    {
      "epoch": 2.3390005151983515,
      "grad_norm": 3.359375,
      "learning_rate": 1.2240810403150646e-05,
      "loss": 0.8814,
      "step": 667380
    },
    {
      "epoch": 2.339035562705247,
      "grad_norm": 3.140625,
      "learning_rate": 1.2240161374486942e-05,
      "loss": 0.8278,
      "step": 667390
    },
    {
      "epoch": 2.3390706102121426,
      "grad_norm": 3.03125,
      "learning_rate": 1.223951234582324e-05,
      "loss": 0.8303,
      "step": 667400
    },
    {
      "epoch": 2.3391056577190383,
      "grad_norm": 2.875,
      "learning_rate": 1.223886331715954e-05,
      "loss": 0.8135,
      "step": 667410
    },
    {
      "epoch": 2.3391407052259336,
      "grad_norm": 2.65625,
      "learning_rate": 1.2238214288495838e-05,
      "loss": 0.7524,
      "step": 667420
    },
    {
      "epoch": 2.3391757527328294,
      "grad_norm": 2.515625,
      "learning_rate": 1.2237565259832136e-05,
      "loss": 0.7754,
      "step": 667430
    },
    {
      "epoch": 2.3392108002397247,
      "grad_norm": 2.921875,
      "learning_rate": 1.2236916231168434e-05,
      "loss": 0.7654,
      "step": 667440
    },
    {
      "epoch": 2.3392458477466205,
      "grad_norm": 2.59375,
      "learning_rate": 1.2236267202504732e-05,
      "loss": 0.8653,
      "step": 667450
    },
    {
      "epoch": 2.3392808952535162,
      "grad_norm": 3.1875,
      "learning_rate": 1.223561817384103e-05,
      "loss": 0.7054,
      "step": 667460
    },
    {
      "epoch": 2.3393159427604115,
      "grad_norm": 3.203125,
      "learning_rate": 1.2234969145177328e-05,
      "loss": 0.8809,
      "step": 667470
    },
    {
      "epoch": 2.3393509902673073,
      "grad_norm": 2.921875,
      "learning_rate": 1.2234320116513626e-05,
      "loss": 0.7393,
      "step": 667480
    },
    {
      "epoch": 2.339386037774203,
      "grad_norm": 3.46875,
      "learning_rate": 1.2233671087849924e-05,
      "loss": 0.8434,
      "step": 667490
    },
    {
      "epoch": 2.3394210852810984,
      "grad_norm": 2.765625,
      "learning_rate": 1.2233022059186222e-05,
      "loss": 0.7562,
      "step": 667500
    },
    {
      "epoch": 2.339456132787994,
      "grad_norm": 2.984375,
      "learning_rate": 1.223237303052252e-05,
      "loss": 0.8811,
      "step": 667510
    },
    {
      "epoch": 2.33949118029489,
      "grad_norm": 3.0625,
      "learning_rate": 1.2231724001858818e-05,
      "loss": 0.8929,
      "step": 667520
    },
    {
      "epoch": 2.339526227801785,
      "grad_norm": 2.6875,
      "learning_rate": 1.2231074973195118e-05,
      "loss": 0.8437,
      "step": 667530
    },
    {
      "epoch": 2.339561275308681,
      "grad_norm": 3.296875,
      "learning_rate": 1.2230425944531416e-05,
      "loss": 0.8926,
      "step": 667540
    },
    {
      "epoch": 2.3395963228155763,
      "grad_norm": 2.890625,
      "learning_rate": 1.2229776915867714e-05,
      "loss": 0.7957,
      "step": 667550
    },
    {
      "epoch": 2.339631370322472,
      "grad_norm": 2.625,
      "learning_rate": 1.2229127887204012e-05,
      "loss": 0.7477,
      "step": 667560
    },
    {
      "epoch": 2.339666417829368,
      "grad_norm": 3.328125,
      "learning_rate": 1.222847885854031e-05,
      "loss": 0.8786,
      "step": 667570
    },
    {
      "epoch": 2.3397014653362636,
      "grad_norm": 2.890625,
      "learning_rate": 1.2227829829876606e-05,
      "loss": 0.782,
      "step": 667580
    },
    {
      "epoch": 2.339736512843159,
      "grad_norm": 2.96875,
      "learning_rate": 1.2227180801212906e-05,
      "loss": 0.8695,
      "step": 667590
    },
    {
      "epoch": 2.3397715603500546,
      "grad_norm": 2.375,
      "learning_rate": 1.2226531772549204e-05,
      "loss": 0.7947,
      "step": 667600
    },
    {
      "epoch": 2.33980660785695,
      "grad_norm": 3.21875,
      "learning_rate": 1.2225882743885502e-05,
      "loss": 0.7908,
      "step": 667610
    },
    {
      "epoch": 2.3398416553638457,
      "grad_norm": 3.0,
      "learning_rate": 1.22252337152218e-05,
      "loss": 0.8061,
      "step": 667620
    },
    {
      "epoch": 2.3398767028707415,
      "grad_norm": 2.75,
      "learning_rate": 1.2224584686558098e-05,
      "loss": 0.8513,
      "step": 667630
    },
    {
      "epoch": 2.3399117503776368,
      "grad_norm": 2.765625,
      "learning_rate": 1.2223935657894396e-05,
      "loss": 0.8274,
      "step": 667640
    },
    {
      "epoch": 2.3399467978845325,
      "grad_norm": 2.265625,
      "learning_rate": 1.2223286629230694e-05,
      "loss": 0.7577,
      "step": 667650
    },
    {
      "epoch": 2.339981845391428,
      "grad_norm": 2.9375,
      "learning_rate": 1.2222637600566993e-05,
      "loss": 0.7879,
      "step": 667660
    },
    {
      "epoch": 2.3400168928983236,
      "grad_norm": 2.875,
      "learning_rate": 1.222198857190329e-05,
      "loss": 0.8251,
      "step": 667670
    },
    {
      "epoch": 2.3400519404052194,
      "grad_norm": 2.90625,
      "learning_rate": 1.2221339543239588e-05,
      "loss": 0.8376,
      "step": 667680
    },
    {
      "epoch": 2.340086987912115,
      "grad_norm": 2.578125,
      "learning_rate": 1.2220690514575886e-05,
      "loss": 0.7755,
      "step": 667690
    },
    {
      "epoch": 2.3401220354190104,
      "grad_norm": 3.171875,
      "learning_rate": 1.2220041485912184e-05,
      "loss": 0.8009,
      "step": 667700
    },
    {
      "epoch": 2.340157082925906,
      "grad_norm": 2.859375,
      "learning_rate": 1.2219392457248482e-05,
      "loss": 0.8228,
      "step": 667710
    },
    {
      "epoch": 2.3401921304328015,
      "grad_norm": 3.40625,
      "learning_rate": 1.2218743428584781e-05,
      "loss": 0.8348,
      "step": 667720
    },
    {
      "epoch": 2.3402271779396973,
      "grad_norm": 2.46875,
      "learning_rate": 1.221809439992108e-05,
      "loss": 0.7452,
      "step": 667730
    },
    {
      "epoch": 2.340262225446593,
      "grad_norm": 3.015625,
      "learning_rate": 1.2217445371257377e-05,
      "loss": 0.8839,
      "step": 667740
    },
    {
      "epoch": 2.3402972729534883,
      "grad_norm": 2.796875,
      "learning_rate": 1.2216796342593675e-05,
      "loss": 0.7665,
      "step": 667750
    },
    {
      "epoch": 2.340332320460384,
      "grad_norm": 2.640625,
      "learning_rate": 1.2216147313929972e-05,
      "loss": 0.7238,
      "step": 667760
    },
    {
      "epoch": 2.3403673679672794,
      "grad_norm": 3.265625,
      "learning_rate": 1.2215498285266271e-05,
      "loss": 0.8071,
      "step": 667770
    },
    {
      "epoch": 2.340402415474175,
      "grad_norm": 2.734375,
      "learning_rate": 1.221484925660257e-05,
      "loss": 0.7279,
      "step": 667780
    },
    {
      "epoch": 2.340437462981071,
      "grad_norm": 2.8125,
      "learning_rate": 1.2214200227938867e-05,
      "loss": 0.808,
      "step": 667790
    },
    {
      "epoch": 2.3404725104879667,
      "grad_norm": 3.0625,
      "learning_rate": 1.2213551199275165e-05,
      "loss": 0.768,
      "step": 667800
    },
    {
      "epoch": 2.340507557994862,
      "grad_norm": 2.84375,
      "learning_rate": 1.2212902170611463e-05,
      "loss": 0.7417,
      "step": 667810
    },
    {
      "epoch": 2.3405426055017577,
      "grad_norm": 2.953125,
      "learning_rate": 1.2212253141947761e-05,
      "loss": 0.8512,
      "step": 667820
    },
    {
      "epoch": 2.340577653008653,
      "grad_norm": 2.984375,
      "learning_rate": 1.221160411328406e-05,
      "loss": 0.8524,
      "step": 667830
    },
    {
      "epoch": 2.340612700515549,
      "grad_norm": 2.71875,
      "learning_rate": 1.2210955084620359e-05,
      "loss": 0.8149,
      "step": 667840
    },
    {
      "epoch": 2.3406477480224446,
      "grad_norm": 2.90625,
      "learning_rate": 1.2210306055956657e-05,
      "loss": 0.8616,
      "step": 667850
    },
    {
      "epoch": 2.34068279552934,
      "grad_norm": 2.90625,
      "learning_rate": 1.2209657027292953e-05,
      "loss": 0.8598,
      "step": 667860
    },
    {
      "epoch": 2.3407178430362356,
      "grad_norm": 2.953125,
      "learning_rate": 1.2209007998629251e-05,
      "loss": 0.8612,
      "step": 667870
    },
    {
      "epoch": 2.3407528905431314,
      "grad_norm": 2.9375,
      "learning_rate": 1.220835896996555e-05,
      "loss": 0.8629,
      "step": 667880
    },
    {
      "epoch": 2.3407879380500267,
      "grad_norm": 2.90625,
      "learning_rate": 1.2207709941301847e-05,
      "loss": 0.7699,
      "step": 667890
    },
    {
      "epoch": 2.3408229855569225,
      "grad_norm": 2.671875,
      "learning_rate": 1.2207060912638147e-05,
      "loss": 0.7852,
      "step": 667900
    },
    {
      "epoch": 2.3408580330638182,
      "grad_norm": 2.609375,
      "learning_rate": 1.2206411883974445e-05,
      "loss": 0.8543,
      "step": 667910
    },
    {
      "epoch": 2.3408930805707135,
      "grad_norm": 2.8125,
      "learning_rate": 1.2205762855310743e-05,
      "loss": 0.8118,
      "step": 667920
    },
    {
      "epoch": 2.3409281280776093,
      "grad_norm": 2.609375,
      "learning_rate": 1.2205113826647041e-05,
      "loss": 0.7803,
      "step": 667930
    },
    {
      "epoch": 2.3409631755845046,
      "grad_norm": 3.046875,
      "learning_rate": 1.2204464797983339e-05,
      "loss": 0.8466,
      "step": 667940
    },
    {
      "epoch": 2.3409982230914004,
      "grad_norm": 2.515625,
      "learning_rate": 1.2203815769319635e-05,
      "loss": 0.793,
      "step": 667950
    },
    {
      "epoch": 2.341033270598296,
      "grad_norm": 2.8125,
      "learning_rate": 1.2203166740655935e-05,
      "loss": 0.7304,
      "step": 667960
    },
    {
      "epoch": 2.3410683181051914,
      "grad_norm": 3.4375,
      "learning_rate": 1.2202517711992233e-05,
      "loss": 0.8799,
      "step": 667970
    },
    {
      "epoch": 2.341103365612087,
      "grad_norm": 1.8828125,
      "learning_rate": 1.2201868683328531e-05,
      "loss": 0.73,
      "step": 667980
    },
    {
      "epoch": 2.341138413118983,
      "grad_norm": 2.90625,
      "learning_rate": 1.2201219654664829e-05,
      "loss": 0.8633,
      "step": 667990
    },
    {
      "epoch": 2.3411734606258783,
      "grad_norm": 2.8125,
      "learning_rate": 1.2200570626001127e-05,
      "loss": 0.8192,
      "step": 668000
    },
    {
      "epoch": 2.341208508132774,
      "grad_norm": 3.0625,
      "learning_rate": 1.2199921597337425e-05,
      "loss": 0.819,
      "step": 668010
    },
    {
      "epoch": 2.34124355563967,
      "grad_norm": 2.34375,
      "learning_rate": 1.2199272568673723e-05,
      "loss": 0.7949,
      "step": 668020
    },
    {
      "epoch": 2.341278603146565,
      "grad_norm": 2.546875,
      "learning_rate": 1.2198623540010023e-05,
      "loss": 0.7802,
      "step": 668030
    },
    {
      "epoch": 2.341313650653461,
      "grad_norm": 3.09375,
      "learning_rate": 1.219797451134632e-05,
      "loss": 0.8323,
      "step": 668040
    },
    {
      "epoch": 2.341348698160356,
      "grad_norm": 2.546875,
      "learning_rate": 1.2197325482682617e-05,
      "loss": 0.8243,
      "step": 668050
    },
    {
      "epoch": 2.341383745667252,
      "grad_norm": 2.9375,
      "learning_rate": 1.2196676454018915e-05,
      "loss": 0.829,
      "step": 668060
    },
    {
      "epoch": 2.3414187931741477,
      "grad_norm": 2.8125,
      "learning_rate": 1.2196027425355213e-05,
      "loss": 0.7678,
      "step": 668070
    },
    {
      "epoch": 2.341453840681043,
      "grad_norm": 2.6875,
      "learning_rate": 1.2195378396691513e-05,
      "loss": 0.7731,
      "step": 668080
    },
    {
      "epoch": 2.3414888881879388,
      "grad_norm": 2.953125,
      "learning_rate": 1.219472936802781e-05,
      "loss": 0.8285,
      "step": 668090
    },
    {
      "epoch": 2.3415239356948345,
      "grad_norm": 2.625,
      "learning_rate": 1.2194080339364109e-05,
      "loss": 0.7545,
      "step": 668100
    },
    {
      "epoch": 2.34155898320173,
      "grad_norm": 2.859375,
      "learning_rate": 1.2193431310700407e-05,
      "loss": 0.7839,
      "step": 668110
    },
    {
      "epoch": 2.3415940307086256,
      "grad_norm": 2.84375,
      "learning_rate": 1.2192782282036705e-05,
      "loss": 0.8506,
      "step": 668120
    },
    {
      "epoch": 2.3416290782155214,
      "grad_norm": 3.046875,
      "learning_rate": 1.2192133253373003e-05,
      "loss": 0.8793,
      "step": 668130
    },
    {
      "epoch": 2.3416641257224167,
      "grad_norm": 2.5625,
      "learning_rate": 1.21914842247093e-05,
      "loss": 0.8285,
      "step": 668140
    },
    {
      "epoch": 2.3416991732293124,
      "grad_norm": 2.671875,
      "learning_rate": 1.2190835196045599e-05,
      "loss": 0.7698,
      "step": 668150
    },
    {
      "epoch": 2.3417342207362077,
      "grad_norm": 3.234375,
      "learning_rate": 1.2190186167381897e-05,
      "loss": 0.7709,
      "step": 668160
    },
    {
      "epoch": 2.3417692682431035,
      "grad_norm": 2.96875,
      "learning_rate": 1.2189537138718195e-05,
      "loss": 0.8205,
      "step": 668170
    },
    {
      "epoch": 2.3418043157499993,
      "grad_norm": 3.03125,
      "learning_rate": 1.2188888110054493e-05,
      "loss": 0.8513,
      "step": 668180
    },
    {
      "epoch": 2.3418393632568946,
      "grad_norm": 3.03125,
      "learning_rate": 1.218823908139079e-05,
      "loss": 0.7688,
      "step": 668190
    },
    {
      "epoch": 2.3418744107637903,
      "grad_norm": 3.21875,
      "learning_rate": 1.2187590052727089e-05,
      "loss": 0.8936,
      "step": 668200
    },
    {
      "epoch": 2.341909458270686,
      "grad_norm": 2.765625,
      "learning_rate": 1.2186941024063388e-05,
      "loss": 0.805,
      "step": 668210
    },
    {
      "epoch": 2.3419445057775814,
      "grad_norm": 2.921875,
      "learning_rate": 1.2186291995399686e-05,
      "loss": 0.7832,
      "step": 668220
    },
    {
      "epoch": 2.341979553284477,
      "grad_norm": 2.984375,
      "learning_rate": 1.2185642966735983e-05,
      "loss": 0.8659,
      "step": 668230
    },
    {
      "epoch": 2.342014600791373,
      "grad_norm": 2.859375,
      "learning_rate": 1.218499393807228e-05,
      "loss": 0.7557,
      "step": 668240
    },
    {
      "epoch": 2.3420496482982682,
      "grad_norm": 3.328125,
      "learning_rate": 1.2184344909408579e-05,
      "loss": 0.8607,
      "step": 668250
    },
    {
      "epoch": 2.342084695805164,
      "grad_norm": 3.1875,
      "learning_rate": 1.2183695880744877e-05,
      "loss": 0.7967,
      "step": 668260
    },
    {
      "epoch": 2.3421197433120593,
      "grad_norm": 3.09375,
      "learning_rate": 1.2183046852081176e-05,
      "loss": 0.77,
      "step": 668270
    },
    {
      "epoch": 2.342154790818955,
      "grad_norm": 3.15625,
      "learning_rate": 1.2182397823417474e-05,
      "loss": 0.8374,
      "step": 668280
    },
    {
      "epoch": 2.342189838325851,
      "grad_norm": 3.015625,
      "learning_rate": 1.2181748794753772e-05,
      "loss": 0.8396,
      "step": 668290
    },
    {
      "epoch": 2.342224885832746,
      "grad_norm": 2.59375,
      "learning_rate": 1.218109976609007e-05,
      "loss": 0.8091,
      "step": 668300
    },
    {
      "epoch": 2.342259933339642,
      "grad_norm": 2.78125,
      "learning_rate": 1.2180450737426368e-05,
      "loss": 0.817,
      "step": 668310
    },
    {
      "epoch": 2.3422949808465376,
      "grad_norm": 3.140625,
      "learning_rate": 1.2179801708762666e-05,
      "loss": 0.8728,
      "step": 668320
    },
    {
      "epoch": 2.342330028353433,
      "grad_norm": 3.21875,
      "learning_rate": 1.2179152680098964e-05,
      "loss": 0.7956,
      "step": 668330
    },
    {
      "epoch": 2.3423650758603287,
      "grad_norm": 3.203125,
      "learning_rate": 1.2178503651435262e-05,
      "loss": 0.8746,
      "step": 668340
    },
    {
      "epoch": 2.3424001233672245,
      "grad_norm": 2.9375,
      "learning_rate": 1.217785462277156e-05,
      "loss": 0.7991,
      "step": 668350
    },
    {
      "epoch": 2.34243517087412,
      "grad_norm": 2.5625,
      "learning_rate": 1.2177205594107858e-05,
      "loss": 0.7973,
      "step": 668360
    },
    {
      "epoch": 2.3424702183810155,
      "grad_norm": 3.265625,
      "learning_rate": 1.2176556565444156e-05,
      "loss": 0.7715,
      "step": 668370
    },
    {
      "epoch": 2.342505265887911,
      "grad_norm": 3.21875,
      "learning_rate": 1.2175907536780454e-05,
      "loss": 0.9483,
      "step": 668380
    },
    {
      "epoch": 2.3425403133948066,
      "grad_norm": 2.96875,
      "learning_rate": 1.2175258508116754e-05,
      "loss": 0.8305,
      "step": 668390
    },
    {
      "epoch": 2.3425753609017024,
      "grad_norm": 2.875,
      "learning_rate": 1.2174609479453052e-05,
      "loss": 0.8086,
      "step": 668400
    },
    {
      "epoch": 2.3426104084085977,
      "grad_norm": 2.59375,
      "learning_rate": 1.217396045078935e-05,
      "loss": 0.7775,
      "step": 668410
    },
    {
      "epoch": 2.3426454559154934,
      "grad_norm": 2.9375,
      "learning_rate": 1.2173311422125646e-05,
      "loss": 0.8221,
      "step": 668420
    },
    {
      "epoch": 2.342680503422389,
      "grad_norm": 2.953125,
      "learning_rate": 1.2172662393461944e-05,
      "loss": 0.881,
      "step": 668430
    },
    {
      "epoch": 2.3427155509292845,
      "grad_norm": 2.890625,
      "learning_rate": 1.2172013364798242e-05,
      "loss": 0.7948,
      "step": 668440
    },
    {
      "epoch": 2.3427505984361803,
      "grad_norm": 2.96875,
      "learning_rate": 1.2171364336134542e-05,
      "loss": 0.7372,
      "step": 668450
    },
    {
      "epoch": 2.342785645943076,
      "grad_norm": 2.3125,
      "learning_rate": 1.217071530747084e-05,
      "loss": 0.7635,
      "step": 668460
    },
    {
      "epoch": 2.3428206934499713,
      "grad_norm": 3.453125,
      "learning_rate": 1.2170066278807138e-05,
      "loss": 0.8024,
      "step": 668470
    },
    {
      "epoch": 2.342855740956867,
      "grad_norm": 2.96875,
      "learning_rate": 1.2169417250143436e-05,
      "loss": 0.7953,
      "step": 668480
    },
    {
      "epoch": 2.3428907884637624,
      "grad_norm": 2.796875,
      "learning_rate": 1.2168768221479734e-05,
      "loss": 0.7934,
      "step": 668490
    },
    {
      "epoch": 2.342925835970658,
      "grad_norm": 3.328125,
      "learning_rate": 1.2168119192816032e-05,
      "loss": 0.8783,
      "step": 668500
    },
    {
      "epoch": 2.342960883477554,
      "grad_norm": 3.09375,
      "learning_rate": 1.216747016415233e-05,
      "loss": 0.8491,
      "step": 668510
    },
    {
      "epoch": 2.3429959309844492,
      "grad_norm": 2.859375,
      "learning_rate": 1.2166821135488628e-05,
      "loss": 0.8394,
      "step": 668520
    },
    {
      "epoch": 2.343030978491345,
      "grad_norm": 3.5625,
      "learning_rate": 1.2166172106824926e-05,
      "loss": 0.8085,
      "step": 668530
    },
    {
      "epoch": 2.3430660259982408,
      "grad_norm": 2.875,
      "learning_rate": 1.2165523078161224e-05,
      "loss": 0.813,
      "step": 668540
    },
    {
      "epoch": 2.343101073505136,
      "grad_norm": 2.4375,
      "learning_rate": 1.2164874049497522e-05,
      "loss": 0.6969,
      "step": 668550
    },
    {
      "epoch": 2.343136121012032,
      "grad_norm": 2.859375,
      "learning_rate": 1.216422502083382e-05,
      "loss": 0.8271,
      "step": 668560
    },
    {
      "epoch": 2.3431711685189276,
      "grad_norm": 3.375,
      "learning_rate": 1.2163575992170118e-05,
      "loss": 0.8666,
      "step": 668570
    },
    {
      "epoch": 2.343206216025823,
      "grad_norm": 3.0625,
      "learning_rate": 1.2162926963506418e-05,
      "loss": 0.8127,
      "step": 668580
    },
    {
      "epoch": 2.3432412635327187,
      "grad_norm": 3.75,
      "learning_rate": 1.2162277934842716e-05,
      "loss": 0.7843,
      "step": 668590
    },
    {
      "epoch": 2.343276311039614,
      "grad_norm": 3.140625,
      "learning_rate": 1.2161628906179014e-05,
      "loss": 0.7466,
      "step": 668600
    },
    {
      "epoch": 2.3433113585465097,
      "grad_norm": 2.703125,
      "learning_rate": 1.216097987751531e-05,
      "loss": 0.8132,
      "step": 668610
    },
    {
      "epoch": 2.3433464060534055,
      "grad_norm": 2.9375,
      "learning_rate": 1.2160330848851608e-05,
      "loss": 0.8531,
      "step": 668620
    },
    {
      "epoch": 2.343381453560301,
      "grad_norm": 2.890625,
      "learning_rate": 1.2159681820187908e-05,
      "loss": 0.8439,
      "step": 668630
    },
    {
      "epoch": 2.3434165010671966,
      "grad_norm": 2.640625,
      "learning_rate": 1.2159032791524206e-05,
      "loss": 0.8403,
      "step": 668640
    },
    {
      "epoch": 2.3434515485740923,
      "grad_norm": 2.625,
      "learning_rate": 1.2158383762860504e-05,
      "loss": 0.8728,
      "step": 668650
    },
    {
      "epoch": 2.3434865960809876,
      "grad_norm": 3.5625,
      "learning_rate": 1.2157734734196802e-05,
      "loss": 0.8038,
      "step": 668660
    },
    {
      "epoch": 2.3435216435878834,
      "grad_norm": 2.59375,
      "learning_rate": 1.21570857055331e-05,
      "loss": 0.8159,
      "step": 668670
    },
    {
      "epoch": 2.343556691094779,
      "grad_norm": 2.703125,
      "learning_rate": 1.2156436676869398e-05,
      "loss": 0.8901,
      "step": 668680
    },
    {
      "epoch": 2.3435917386016745,
      "grad_norm": 2.984375,
      "learning_rate": 1.2155787648205696e-05,
      "loss": 0.7913,
      "step": 668690
    },
    {
      "epoch": 2.3436267861085702,
      "grad_norm": 2.703125,
      "learning_rate": 1.2155138619541994e-05,
      "loss": 0.8314,
      "step": 668700
    },
    {
      "epoch": 2.3436618336154655,
      "grad_norm": 3.015625,
      "learning_rate": 1.2154489590878292e-05,
      "loss": 0.897,
      "step": 668710
    },
    {
      "epoch": 2.3436968811223613,
      "grad_norm": 3.390625,
      "learning_rate": 1.215384056221459e-05,
      "loss": 0.8227,
      "step": 668720
    },
    {
      "epoch": 2.343731928629257,
      "grad_norm": 3.0625,
      "learning_rate": 1.2153191533550888e-05,
      "loss": 0.8081,
      "step": 668730
    },
    {
      "epoch": 2.3437669761361524,
      "grad_norm": 2.9375,
      "learning_rate": 1.2152542504887186e-05,
      "loss": 0.7681,
      "step": 668740
    },
    {
      "epoch": 2.343802023643048,
      "grad_norm": 2.953125,
      "learning_rate": 1.2151893476223484e-05,
      "loss": 0.7888,
      "step": 668750
    },
    {
      "epoch": 2.343837071149944,
      "grad_norm": 2.421875,
      "learning_rate": 1.2151244447559784e-05,
      "loss": 0.7483,
      "step": 668760
    },
    {
      "epoch": 2.343872118656839,
      "grad_norm": 2.71875,
      "learning_rate": 1.2150595418896082e-05,
      "loss": 0.8009,
      "step": 668770
    },
    {
      "epoch": 2.343907166163735,
      "grad_norm": 2.375,
      "learning_rate": 1.214994639023238e-05,
      "loss": 0.7628,
      "step": 668780
    },
    {
      "epoch": 2.3439422136706307,
      "grad_norm": 3.015625,
      "learning_rate": 1.2149297361568678e-05,
      "loss": 0.7384,
      "step": 668790
    },
    {
      "epoch": 2.343977261177526,
      "grad_norm": 3.078125,
      "learning_rate": 1.2148648332904974e-05,
      "loss": 0.8574,
      "step": 668800
    },
    {
      "epoch": 2.344012308684422,
      "grad_norm": 2.734375,
      "learning_rate": 1.2147999304241272e-05,
      "loss": 0.8343,
      "step": 668810
    },
    {
      "epoch": 2.344047356191317,
      "grad_norm": 2.828125,
      "learning_rate": 1.2147350275577572e-05,
      "loss": 0.8195,
      "step": 668820
    },
    {
      "epoch": 2.344082403698213,
      "grad_norm": 2.75,
      "learning_rate": 1.214670124691387e-05,
      "loss": 0.8073,
      "step": 668830
    },
    {
      "epoch": 2.3441174512051086,
      "grad_norm": 3.703125,
      "learning_rate": 1.2146052218250168e-05,
      "loss": 0.8434,
      "step": 668840
    },
    {
      "epoch": 2.344152498712004,
      "grad_norm": 2.328125,
      "learning_rate": 1.2145403189586466e-05,
      "loss": 0.8249,
      "step": 668850
    },
    {
      "epoch": 2.3441875462188997,
      "grad_norm": 2.859375,
      "learning_rate": 1.2144754160922764e-05,
      "loss": 0.8306,
      "step": 668860
    },
    {
      "epoch": 2.3442225937257954,
      "grad_norm": 2.5625,
      "learning_rate": 1.2144105132259062e-05,
      "loss": 0.7406,
      "step": 668870
    },
    {
      "epoch": 2.3442576412326908,
      "grad_norm": 2.734375,
      "learning_rate": 1.214345610359536e-05,
      "loss": 0.8406,
      "step": 668880
    },
    {
      "epoch": 2.3442926887395865,
      "grad_norm": 3.109375,
      "learning_rate": 1.2142807074931658e-05,
      "loss": 0.7861,
      "step": 668890
    },
    {
      "epoch": 2.3443277362464823,
      "grad_norm": 2.765625,
      "learning_rate": 1.2142158046267956e-05,
      "loss": 0.9011,
      "step": 668900
    },
    {
      "epoch": 2.3443627837533776,
      "grad_norm": 3.078125,
      "learning_rate": 1.2141509017604254e-05,
      "loss": 0.8228,
      "step": 668910
    },
    {
      "epoch": 2.3443978312602733,
      "grad_norm": 3.046875,
      "learning_rate": 1.2140859988940552e-05,
      "loss": 0.801,
      "step": 668920
    },
    {
      "epoch": 2.3444328787671687,
      "grad_norm": 2.78125,
      "learning_rate": 1.214021096027685e-05,
      "loss": 0.8359,
      "step": 668930
    },
    {
      "epoch": 2.3444679262740644,
      "grad_norm": 2.953125,
      "learning_rate": 1.213956193161315e-05,
      "loss": 0.8117,
      "step": 668940
    },
    {
      "epoch": 2.34450297378096,
      "grad_norm": 2.5625,
      "learning_rate": 1.2138912902949447e-05,
      "loss": 0.8247,
      "step": 668950
    },
    {
      "epoch": 2.344538021287856,
      "grad_norm": 3.09375,
      "learning_rate": 1.2138263874285745e-05,
      "loss": 0.8298,
      "step": 668960
    },
    {
      "epoch": 2.3445730687947512,
      "grad_norm": 2.71875,
      "learning_rate": 1.2137614845622043e-05,
      "loss": 0.8669,
      "step": 668970
    },
    {
      "epoch": 2.344608116301647,
      "grad_norm": 2.78125,
      "learning_rate": 1.2136965816958341e-05,
      "loss": 0.7431,
      "step": 668980
    },
    {
      "epoch": 2.3446431638085423,
      "grad_norm": 2.78125,
      "learning_rate": 1.2136316788294638e-05,
      "loss": 0.8052,
      "step": 668990
    },
    {
      "epoch": 2.344678211315438,
      "grad_norm": 2.859375,
      "learning_rate": 1.2135667759630937e-05,
      "loss": 0.7678,
      "step": 669000
    },
    {
      "epoch": 2.344713258822334,
      "grad_norm": 3.03125,
      "learning_rate": 1.2135018730967235e-05,
      "loss": 0.8076,
      "step": 669010
    },
    {
      "epoch": 2.344748306329229,
      "grad_norm": 2.828125,
      "learning_rate": 1.2134369702303533e-05,
      "loss": 0.8059,
      "step": 669020
    },
    {
      "epoch": 2.344783353836125,
      "grad_norm": 3.0,
      "learning_rate": 1.2133720673639831e-05,
      "loss": 0.8004,
      "step": 669030
    },
    {
      "epoch": 2.34481840134302,
      "grad_norm": 3.03125,
      "learning_rate": 1.213307164497613e-05,
      "loss": 0.8516,
      "step": 669040
    },
    {
      "epoch": 2.344853448849916,
      "grad_norm": 3.03125,
      "learning_rate": 1.2132422616312427e-05,
      "loss": 0.8223,
      "step": 669050
    },
    {
      "epoch": 2.3448884963568117,
      "grad_norm": 3.171875,
      "learning_rate": 1.2131773587648725e-05,
      "loss": 0.7598,
      "step": 669060
    },
    {
      "epoch": 2.3449235438637075,
      "grad_norm": 3.328125,
      "learning_rate": 1.2131124558985025e-05,
      "loss": 0.7904,
      "step": 669070
    },
    {
      "epoch": 2.344958591370603,
      "grad_norm": 2.703125,
      "learning_rate": 1.2130475530321321e-05,
      "loss": 0.7999,
      "step": 669080
    },
    {
      "epoch": 2.3449936388774986,
      "grad_norm": 2.84375,
      "learning_rate": 1.212982650165762e-05,
      "loss": 0.771,
      "step": 669090
    },
    {
      "epoch": 2.345028686384394,
      "grad_norm": 2.96875,
      "learning_rate": 1.2129177472993917e-05,
      "loss": 0.7877,
      "step": 669100
    },
    {
      "epoch": 2.3450637338912896,
      "grad_norm": 3.03125,
      "learning_rate": 1.2128528444330215e-05,
      "loss": 0.796,
      "step": 669110
    },
    {
      "epoch": 2.3450987813981854,
      "grad_norm": 2.375,
      "learning_rate": 1.2127879415666513e-05,
      "loss": 0.7406,
      "step": 669120
    },
    {
      "epoch": 2.3451338289050807,
      "grad_norm": 3.234375,
      "learning_rate": 1.2127230387002813e-05,
      "loss": 0.7942,
      "step": 669130
    },
    {
      "epoch": 2.3451688764119765,
      "grad_norm": 3.5,
      "learning_rate": 1.2126581358339111e-05,
      "loss": 0.9216,
      "step": 669140
    },
    {
      "epoch": 2.3452039239188722,
      "grad_norm": 3.03125,
      "learning_rate": 1.2125932329675409e-05,
      "loss": 0.8208,
      "step": 669150
    },
    {
      "epoch": 2.3452389714257675,
      "grad_norm": 2.875,
      "learning_rate": 1.2125283301011707e-05,
      "loss": 0.7714,
      "step": 669160
    },
    {
      "epoch": 2.3452740189326633,
      "grad_norm": 3.109375,
      "learning_rate": 1.2124634272348005e-05,
      "loss": 0.8415,
      "step": 669170
    },
    {
      "epoch": 2.345309066439559,
      "grad_norm": 2.21875,
      "learning_rate": 1.2123985243684303e-05,
      "loss": 0.7894,
      "step": 669180
    },
    {
      "epoch": 2.3453441139464544,
      "grad_norm": 2.8125,
      "learning_rate": 1.2123336215020601e-05,
      "loss": 0.8012,
      "step": 669190
    },
    {
      "epoch": 2.34537916145335,
      "grad_norm": 3.0,
      "learning_rate": 1.2122687186356899e-05,
      "loss": 0.7454,
      "step": 669200
    },
    {
      "epoch": 2.3454142089602454,
      "grad_norm": 3.046875,
      "learning_rate": 1.2122038157693197e-05,
      "loss": 0.8252,
      "step": 669210
    },
    {
      "epoch": 2.345449256467141,
      "grad_norm": 2.671875,
      "learning_rate": 1.2121389129029495e-05,
      "loss": 0.8029,
      "step": 669220
    },
    {
      "epoch": 2.345484303974037,
      "grad_norm": 2.71875,
      "learning_rate": 1.2120740100365793e-05,
      "loss": 0.81,
      "step": 669230
    },
    {
      "epoch": 2.3455193514809323,
      "grad_norm": 2.84375,
      "learning_rate": 1.2120091071702091e-05,
      "loss": 0.7679,
      "step": 669240
    },
    {
      "epoch": 2.345554398987828,
      "grad_norm": 2.875,
      "learning_rate": 1.211944204303839e-05,
      "loss": 0.8043,
      "step": 669250
    },
    {
      "epoch": 2.345589446494724,
      "grad_norm": 2.625,
      "learning_rate": 1.2118793014374689e-05,
      "loss": 0.783,
      "step": 669260
    },
    {
      "epoch": 2.345624494001619,
      "grad_norm": 3.6875,
      "learning_rate": 1.2118143985710985e-05,
      "loss": 0.8113,
      "step": 669270
    },
    {
      "epoch": 2.345659541508515,
      "grad_norm": 3.0625,
      "learning_rate": 1.2117494957047283e-05,
      "loss": 0.7569,
      "step": 669280
    },
    {
      "epoch": 2.3456945890154106,
      "grad_norm": 2.640625,
      "learning_rate": 1.2116845928383581e-05,
      "loss": 0.827,
      "step": 669290
    },
    {
      "epoch": 2.345729636522306,
      "grad_norm": 2.53125,
      "learning_rate": 1.2116196899719879e-05,
      "loss": 0.828,
      "step": 669300
    },
    {
      "epoch": 2.3457646840292017,
      "grad_norm": 3.453125,
      "learning_rate": 1.2115547871056179e-05,
      "loss": 0.8696,
      "step": 669310
    },
    {
      "epoch": 2.345799731536097,
      "grad_norm": 2.921875,
      "learning_rate": 1.2114898842392477e-05,
      "loss": 0.8578,
      "step": 669320
    },
    {
      "epoch": 2.3458347790429928,
      "grad_norm": 2.8125,
      "learning_rate": 1.2114249813728775e-05,
      "loss": 0.8413,
      "step": 669330
    },
    {
      "epoch": 2.3458698265498885,
      "grad_norm": 2.921875,
      "learning_rate": 1.2113600785065073e-05,
      "loss": 0.7874,
      "step": 669340
    },
    {
      "epoch": 2.345904874056784,
      "grad_norm": 2.5,
      "learning_rate": 1.211295175640137e-05,
      "loss": 0.7744,
      "step": 669350
    },
    {
      "epoch": 2.3459399215636796,
      "grad_norm": 2.5625,
      "learning_rate": 1.2112302727737667e-05,
      "loss": 0.7287,
      "step": 669360
    },
    {
      "epoch": 2.3459749690705753,
      "grad_norm": 2.921875,
      "learning_rate": 1.2111653699073967e-05,
      "loss": 0.8551,
      "step": 669370
    },
    {
      "epoch": 2.3460100165774707,
      "grad_norm": 2.90625,
      "learning_rate": 1.2111004670410265e-05,
      "loss": 0.8486,
      "step": 669380
    },
    {
      "epoch": 2.3460450640843664,
      "grad_norm": 3.34375,
      "learning_rate": 1.2110355641746563e-05,
      "loss": 0.7977,
      "step": 669390
    },
    {
      "epoch": 2.346080111591262,
      "grad_norm": 2.796875,
      "learning_rate": 1.210970661308286e-05,
      "loss": 0.8287,
      "step": 669400
    },
    {
      "epoch": 2.3461151590981575,
      "grad_norm": 3.203125,
      "learning_rate": 1.2109057584419159e-05,
      "loss": 0.7311,
      "step": 669410
    },
    {
      "epoch": 2.3461502066050532,
      "grad_norm": 2.796875,
      "learning_rate": 1.2108408555755457e-05,
      "loss": 0.8733,
      "step": 669420
    },
    {
      "epoch": 2.3461852541119486,
      "grad_norm": 3.15625,
      "learning_rate": 1.2107759527091755e-05,
      "loss": 0.8001,
      "step": 669430
    },
    {
      "epoch": 2.3462203016188443,
      "grad_norm": 3.34375,
      "learning_rate": 1.2107110498428054e-05,
      "loss": 0.8413,
      "step": 669440
    },
    {
      "epoch": 2.34625534912574,
      "grad_norm": 3.203125,
      "learning_rate": 1.2106461469764352e-05,
      "loss": 0.8773,
      "step": 669450
    },
    {
      "epoch": 2.3462903966326354,
      "grad_norm": 2.6875,
      "learning_rate": 1.2105812441100649e-05,
      "loss": 0.7775,
      "step": 669460
    },
    {
      "epoch": 2.346325444139531,
      "grad_norm": 3.28125,
      "learning_rate": 1.2105163412436947e-05,
      "loss": 0.8369,
      "step": 669470
    },
    {
      "epoch": 2.346360491646427,
      "grad_norm": 3.046875,
      "learning_rate": 1.2104514383773245e-05,
      "loss": 0.8318,
      "step": 669480
    },
    {
      "epoch": 2.346395539153322,
      "grad_norm": 3.09375,
      "learning_rate": 1.2103865355109544e-05,
      "loss": 0.778,
      "step": 669490
    },
    {
      "epoch": 2.346430586660218,
      "grad_norm": 3.1875,
      "learning_rate": 1.2103216326445842e-05,
      "loss": 0.7353,
      "step": 669500
    },
    {
      "epoch": 2.3464656341671137,
      "grad_norm": 2.75,
      "learning_rate": 1.210256729778214e-05,
      "loss": 0.8023,
      "step": 669510
    },
    {
      "epoch": 2.346500681674009,
      "grad_norm": 3.140625,
      "learning_rate": 1.2101918269118438e-05,
      "loss": 0.8465,
      "step": 669520
    },
    {
      "epoch": 2.346535729180905,
      "grad_norm": 2.375,
      "learning_rate": 1.2101269240454736e-05,
      "loss": 0.8321,
      "step": 669530
    },
    {
      "epoch": 2.3465707766878,
      "grad_norm": 2.5,
      "learning_rate": 1.2100620211791034e-05,
      "loss": 0.7666,
      "step": 669540
    },
    {
      "epoch": 2.346605824194696,
      "grad_norm": 2.625,
      "learning_rate": 1.2099971183127332e-05,
      "loss": 0.7803,
      "step": 669550
    },
    {
      "epoch": 2.3466408717015916,
      "grad_norm": 3.234375,
      "learning_rate": 1.209932215446363e-05,
      "loss": 0.8092,
      "step": 669560
    },
    {
      "epoch": 2.346675919208487,
      "grad_norm": 3.453125,
      "learning_rate": 1.2098673125799928e-05,
      "loss": 0.8169,
      "step": 669570
    },
    {
      "epoch": 2.3467109667153827,
      "grad_norm": 3.015625,
      "learning_rate": 1.2098024097136226e-05,
      "loss": 0.8184,
      "step": 669580
    },
    {
      "epoch": 2.3467460142222785,
      "grad_norm": 3.484375,
      "learning_rate": 1.2097375068472524e-05,
      "loss": 0.9347,
      "step": 669590
    },
    {
      "epoch": 2.346781061729174,
      "grad_norm": 3.125,
      "learning_rate": 1.2096726039808822e-05,
      "loss": 0.7799,
      "step": 669600
    },
    {
      "epoch": 2.3468161092360695,
      "grad_norm": 2.953125,
      "learning_rate": 1.209607701114512e-05,
      "loss": 0.7514,
      "step": 669610
    },
    {
      "epoch": 2.3468511567429653,
      "grad_norm": 2.5625,
      "learning_rate": 1.209542798248142e-05,
      "loss": 0.7758,
      "step": 669620
    },
    {
      "epoch": 2.3468862042498606,
      "grad_norm": 2.8125,
      "learning_rate": 1.2094778953817718e-05,
      "loss": 0.771,
      "step": 669630
    },
    {
      "epoch": 2.3469212517567564,
      "grad_norm": 3.15625,
      "learning_rate": 1.2094129925154016e-05,
      "loss": 0.7065,
      "step": 669640
    },
    {
      "epoch": 2.3469562992636517,
      "grad_norm": 2.90625,
      "learning_rate": 1.2093480896490312e-05,
      "loss": 0.8285,
      "step": 669650
    },
    {
      "epoch": 2.3469913467705474,
      "grad_norm": 3.078125,
      "learning_rate": 1.209283186782661e-05,
      "loss": 0.8458,
      "step": 669660
    },
    {
      "epoch": 2.347026394277443,
      "grad_norm": 3.125,
      "learning_rate": 1.2092182839162908e-05,
      "loss": 0.8133,
      "step": 669670
    },
    {
      "epoch": 2.3470614417843385,
      "grad_norm": 3.078125,
      "learning_rate": 1.2091533810499208e-05,
      "loss": 0.8838,
      "step": 669680
    },
    {
      "epoch": 2.3470964892912343,
      "grad_norm": 3.03125,
      "learning_rate": 1.2090884781835506e-05,
      "loss": 0.8605,
      "step": 669690
    },
    {
      "epoch": 2.34713153679813,
      "grad_norm": 2.75,
      "learning_rate": 1.2090235753171804e-05,
      "loss": 0.7623,
      "step": 669700
    },
    {
      "epoch": 2.3471665843050253,
      "grad_norm": 2.875,
      "learning_rate": 1.2089586724508102e-05,
      "loss": 0.8358,
      "step": 669710
    },
    {
      "epoch": 2.347201631811921,
      "grad_norm": 3.015625,
      "learning_rate": 1.20889376958444e-05,
      "loss": 0.7956,
      "step": 669720
    },
    {
      "epoch": 2.347236679318817,
      "grad_norm": 2.96875,
      "learning_rate": 1.2088288667180698e-05,
      "loss": 0.8543,
      "step": 669730
    },
    {
      "epoch": 2.347271726825712,
      "grad_norm": 3.140625,
      "learning_rate": 1.2087639638516996e-05,
      "loss": 0.8601,
      "step": 669740
    },
    {
      "epoch": 2.347306774332608,
      "grad_norm": 2.6875,
      "learning_rate": 1.2086990609853294e-05,
      "loss": 0.727,
      "step": 669750
    },
    {
      "epoch": 2.3473418218395032,
      "grad_norm": 2.984375,
      "learning_rate": 1.2086341581189592e-05,
      "loss": 0.7967,
      "step": 669760
    },
    {
      "epoch": 2.347376869346399,
      "grad_norm": 3.109375,
      "learning_rate": 1.208569255252589e-05,
      "loss": 0.8461,
      "step": 669770
    },
    {
      "epoch": 2.3474119168532948,
      "grad_norm": 3.0,
      "learning_rate": 1.2085043523862188e-05,
      "loss": 0.8983,
      "step": 669780
    },
    {
      "epoch": 2.34744696436019,
      "grad_norm": 2.734375,
      "learning_rate": 1.2084394495198486e-05,
      "loss": 0.7691,
      "step": 669790
    },
    {
      "epoch": 2.347482011867086,
      "grad_norm": 3.03125,
      "learning_rate": 1.2083745466534786e-05,
      "loss": 0.7401,
      "step": 669800
    },
    {
      "epoch": 2.3475170593739816,
      "grad_norm": 3.046875,
      "learning_rate": 1.2083096437871084e-05,
      "loss": 0.8002,
      "step": 669810
    },
    {
      "epoch": 2.347552106880877,
      "grad_norm": 2.640625,
      "learning_rate": 1.2082447409207382e-05,
      "loss": 0.7883,
      "step": 669820
    },
    {
      "epoch": 2.3475871543877727,
      "grad_norm": 2.71875,
      "learning_rate": 1.2081798380543678e-05,
      "loss": 0.8183,
      "step": 669830
    },
    {
      "epoch": 2.3476222018946684,
      "grad_norm": 2.96875,
      "learning_rate": 1.2081149351879976e-05,
      "loss": 0.8206,
      "step": 669840
    },
    {
      "epoch": 2.3476572494015637,
      "grad_norm": 2.734375,
      "learning_rate": 1.2080500323216274e-05,
      "loss": 0.8376,
      "step": 669850
    },
    {
      "epoch": 2.3476922969084595,
      "grad_norm": 3.0,
      "learning_rate": 1.2079851294552574e-05,
      "loss": 0.8929,
      "step": 669860
    },
    {
      "epoch": 2.347727344415355,
      "grad_norm": 2.65625,
      "learning_rate": 1.2079202265888872e-05,
      "loss": 0.7375,
      "step": 669870
    },
    {
      "epoch": 2.3477623919222506,
      "grad_norm": 3.484375,
      "learning_rate": 1.207855323722517e-05,
      "loss": 0.8838,
      "step": 669880
    },
    {
      "epoch": 2.3477974394291463,
      "grad_norm": 2.796875,
      "learning_rate": 1.2077904208561468e-05,
      "loss": 0.8814,
      "step": 669890
    },
    {
      "epoch": 2.3478324869360416,
      "grad_norm": 2.78125,
      "learning_rate": 1.2077255179897766e-05,
      "loss": 0.8878,
      "step": 669900
    },
    {
      "epoch": 2.3478675344429374,
      "grad_norm": 3.046875,
      "learning_rate": 1.2076606151234064e-05,
      "loss": 0.7991,
      "step": 669910
    },
    {
      "epoch": 2.347902581949833,
      "grad_norm": 2.828125,
      "learning_rate": 1.2075957122570362e-05,
      "loss": 0.8004,
      "step": 669920
    },
    {
      "epoch": 2.3479376294567285,
      "grad_norm": 2.890625,
      "learning_rate": 1.207530809390666e-05,
      "loss": 0.847,
      "step": 669930
    },
    {
      "epoch": 2.347972676963624,
      "grad_norm": 2.84375,
      "learning_rate": 1.2074659065242958e-05,
      "loss": 0.8687,
      "step": 669940
    },
    {
      "epoch": 2.34800772447052,
      "grad_norm": 3.234375,
      "learning_rate": 1.2074010036579256e-05,
      "loss": 0.8473,
      "step": 669950
    },
    {
      "epoch": 2.3480427719774153,
      "grad_norm": 3.078125,
      "learning_rate": 1.2073361007915554e-05,
      "loss": 0.7538,
      "step": 669960
    },
    {
      "epoch": 2.348077819484311,
      "grad_norm": 3.125,
      "learning_rate": 1.2072711979251852e-05,
      "loss": 0.8159,
      "step": 669970
    },
    {
      "epoch": 2.3481128669912064,
      "grad_norm": 3.109375,
      "learning_rate": 1.207206295058815e-05,
      "loss": 0.9028,
      "step": 669980
    },
    {
      "epoch": 2.348147914498102,
      "grad_norm": 3.046875,
      "learning_rate": 1.207141392192445e-05,
      "loss": 0.7249,
      "step": 669990
    },
    {
      "epoch": 2.348182962004998,
      "grad_norm": 2.34375,
      "learning_rate": 1.2070764893260747e-05,
      "loss": 0.7154,
      "step": 670000
    },
    {
      "epoch": 2.348182962004998,
      "eval_loss": 0.7602644562721252,
      "eval_runtime": 553.7406,
      "eval_samples_per_second": 687.029,
      "eval_steps_per_second": 57.252,
      "step": 670000
    },
    {
      "epoch": 2.348218009511893,
      "grad_norm": 3.09375,
      "learning_rate": 1.2070115864597045e-05,
      "loss": 0.7649,
      "step": 670010
    },
    {
      "epoch": 2.348253057018789,
      "grad_norm": 2.9375,
      "learning_rate": 1.2069466835933342e-05,
      "loss": 0.8586,
      "step": 670020
    },
    {
      "epoch": 2.3482881045256847,
      "grad_norm": 2.75,
      "learning_rate": 1.206881780726964e-05,
      "loss": 0.7652,
      "step": 670030
    },
    {
      "epoch": 2.34832315203258,
      "grad_norm": 2.8125,
      "learning_rate": 1.206816877860594e-05,
      "loss": 0.7964,
      "step": 670040
    },
    {
      "epoch": 2.3483581995394758,
      "grad_norm": 3.109375,
      "learning_rate": 1.2067519749942237e-05,
      "loss": 0.7574,
      "step": 670050
    },
    {
      "epoch": 2.3483932470463715,
      "grad_norm": 2.453125,
      "learning_rate": 1.2066870721278535e-05,
      "loss": 0.8374,
      "step": 670060
    },
    {
      "epoch": 2.348428294553267,
      "grad_norm": 3.140625,
      "learning_rate": 1.2066221692614833e-05,
      "loss": 0.8151,
      "step": 670070
    },
    {
      "epoch": 2.3484633420601626,
      "grad_norm": 3.0625,
      "learning_rate": 1.2065572663951131e-05,
      "loss": 0.7904,
      "step": 670080
    },
    {
      "epoch": 2.348498389567058,
      "grad_norm": 3.09375,
      "learning_rate": 1.206492363528743e-05,
      "loss": 0.7634,
      "step": 670090
    },
    {
      "epoch": 2.3485334370739537,
      "grad_norm": 2.859375,
      "learning_rate": 1.2064274606623727e-05,
      "loss": 0.7645,
      "step": 670100
    },
    {
      "epoch": 2.3485684845808494,
      "grad_norm": 2.640625,
      "learning_rate": 1.2063625577960027e-05,
      "loss": 0.7099,
      "step": 670110
    },
    {
      "epoch": 2.3486035320877447,
      "grad_norm": 2.796875,
      "learning_rate": 1.2062976549296323e-05,
      "loss": 0.8148,
      "step": 670120
    },
    {
      "epoch": 2.3486385795946405,
      "grad_norm": 3.09375,
      "learning_rate": 1.2062327520632621e-05,
      "loss": 0.8226,
      "step": 670130
    },
    {
      "epoch": 2.3486736271015363,
      "grad_norm": 3.234375,
      "learning_rate": 1.206167849196892e-05,
      "loss": 0.7998,
      "step": 670140
    },
    {
      "epoch": 2.3487086746084316,
      "grad_norm": 3.140625,
      "learning_rate": 1.2061029463305217e-05,
      "loss": 0.7404,
      "step": 670150
    },
    {
      "epoch": 2.3487437221153273,
      "grad_norm": 3.25,
      "learning_rate": 1.2060380434641515e-05,
      "loss": 0.7719,
      "step": 670160
    },
    {
      "epoch": 2.348778769622223,
      "grad_norm": 2.65625,
      "learning_rate": 1.2059731405977815e-05,
      "loss": 0.8076,
      "step": 670170
    },
    {
      "epoch": 2.3488138171291184,
      "grad_norm": 3.296875,
      "learning_rate": 1.2059082377314113e-05,
      "loss": 0.7945,
      "step": 670180
    },
    {
      "epoch": 2.348848864636014,
      "grad_norm": 2.53125,
      "learning_rate": 1.2058433348650411e-05,
      "loss": 0.7564,
      "step": 670190
    },
    {
      "epoch": 2.3488839121429095,
      "grad_norm": 2.765625,
      "learning_rate": 1.2057784319986709e-05,
      "loss": 0.8706,
      "step": 670200
    },
    {
      "epoch": 2.3489189596498052,
      "grad_norm": 2.84375,
      "learning_rate": 1.2057135291323005e-05,
      "loss": 0.8341,
      "step": 670210
    },
    {
      "epoch": 2.348954007156701,
      "grad_norm": 3.0,
      "learning_rate": 1.2056486262659303e-05,
      "loss": 0.8561,
      "step": 670220
    },
    {
      "epoch": 2.3489890546635968,
      "grad_norm": 2.59375,
      "learning_rate": 1.2055837233995603e-05,
      "loss": 0.8124,
      "step": 670230
    },
    {
      "epoch": 2.349024102170492,
      "grad_norm": 2.71875,
      "learning_rate": 1.2055188205331901e-05,
      "loss": 0.8469,
      "step": 670240
    },
    {
      "epoch": 2.349059149677388,
      "grad_norm": 2.53125,
      "learning_rate": 1.2054539176668199e-05,
      "loss": 0.8599,
      "step": 670250
    },
    {
      "epoch": 2.349094197184283,
      "grad_norm": 3.484375,
      "learning_rate": 1.2053890148004497e-05,
      "loss": 0.8463,
      "step": 670260
    },
    {
      "epoch": 2.349129244691179,
      "grad_norm": 2.859375,
      "learning_rate": 1.2053241119340795e-05,
      "loss": 0.8611,
      "step": 670270
    },
    {
      "epoch": 2.3491642921980747,
      "grad_norm": 3.171875,
      "learning_rate": 1.2052592090677093e-05,
      "loss": 0.8206,
      "step": 670280
    },
    {
      "epoch": 2.34919933970497,
      "grad_norm": 2.65625,
      "learning_rate": 1.2051943062013391e-05,
      "loss": 0.7708,
      "step": 670290
    },
    {
      "epoch": 2.3492343872118657,
      "grad_norm": 3.140625,
      "learning_rate": 1.2051294033349689e-05,
      "loss": 0.7931,
      "step": 670300
    },
    {
      "epoch": 2.349269434718761,
      "grad_norm": 2.890625,
      "learning_rate": 1.2050645004685987e-05,
      "loss": 0.8276,
      "step": 670310
    },
    {
      "epoch": 2.349304482225657,
      "grad_norm": 2.890625,
      "learning_rate": 1.2049995976022285e-05,
      "loss": 0.9038,
      "step": 670320
    },
    {
      "epoch": 2.3493395297325526,
      "grad_norm": 2.546875,
      "learning_rate": 1.2049346947358583e-05,
      "loss": 0.8347,
      "step": 670330
    },
    {
      "epoch": 2.3493745772394483,
      "grad_norm": 3.078125,
      "learning_rate": 1.2048697918694881e-05,
      "loss": 0.8207,
      "step": 670340
    },
    {
      "epoch": 2.3494096247463436,
      "grad_norm": 3.28125,
      "learning_rate": 1.204804889003118e-05,
      "loss": 0.7146,
      "step": 670350
    },
    {
      "epoch": 2.3494446722532394,
      "grad_norm": 2.8125,
      "learning_rate": 1.2047399861367479e-05,
      "loss": 0.8012,
      "step": 670360
    },
    {
      "epoch": 2.3494797197601347,
      "grad_norm": 3.546875,
      "learning_rate": 1.2046750832703777e-05,
      "loss": 0.8583,
      "step": 670370
    },
    {
      "epoch": 2.3495147672670305,
      "grad_norm": 2.90625,
      "learning_rate": 1.2046101804040075e-05,
      "loss": 0.8741,
      "step": 670380
    },
    {
      "epoch": 2.349549814773926,
      "grad_norm": 2.796875,
      "learning_rate": 1.2045452775376373e-05,
      "loss": 0.8842,
      "step": 670390
    },
    {
      "epoch": 2.3495848622808215,
      "grad_norm": 3.046875,
      "learning_rate": 1.2044803746712669e-05,
      "loss": 0.7444,
      "step": 670400
    },
    {
      "epoch": 2.3496199097877173,
      "grad_norm": 2.8125,
      "learning_rate": 1.2044154718048969e-05,
      "loss": 0.8327,
      "step": 670410
    },
    {
      "epoch": 2.3496549572946126,
      "grad_norm": 3.53125,
      "learning_rate": 1.2043505689385267e-05,
      "loss": 0.7527,
      "step": 670420
    },
    {
      "epoch": 2.3496900048015084,
      "grad_norm": 2.921875,
      "learning_rate": 1.2042856660721565e-05,
      "loss": 0.8097,
      "step": 670430
    },
    {
      "epoch": 2.349725052308404,
      "grad_norm": 2.546875,
      "learning_rate": 1.2042207632057863e-05,
      "loss": 0.8591,
      "step": 670440
    },
    {
      "epoch": 2.3497600998153,
      "grad_norm": 3.046875,
      "learning_rate": 1.204155860339416e-05,
      "loss": 0.7965,
      "step": 670450
    },
    {
      "epoch": 2.349795147322195,
      "grad_norm": 2.8125,
      "learning_rate": 1.2040909574730459e-05,
      "loss": 0.7354,
      "step": 670460
    },
    {
      "epoch": 2.349830194829091,
      "grad_norm": 3.0,
      "learning_rate": 1.2040260546066757e-05,
      "loss": 0.7889,
      "step": 670470
    },
    {
      "epoch": 2.3498652423359863,
      "grad_norm": 2.84375,
      "learning_rate": 1.2039611517403056e-05,
      "loss": 0.8237,
      "step": 670480
    },
    {
      "epoch": 2.349900289842882,
      "grad_norm": 2.859375,
      "learning_rate": 1.2038962488739353e-05,
      "loss": 0.8327,
      "step": 670490
    },
    {
      "epoch": 2.3499353373497778,
      "grad_norm": 3.125,
      "learning_rate": 1.203831346007565e-05,
      "loss": 0.8309,
      "step": 670500
    },
    {
      "epoch": 2.349970384856673,
      "grad_norm": 2.609375,
      "learning_rate": 1.2037664431411949e-05,
      "loss": 0.8363,
      "step": 670510
    },
    {
      "epoch": 2.350005432363569,
      "grad_norm": 2.546875,
      "learning_rate": 1.2037015402748247e-05,
      "loss": 0.8467,
      "step": 670520
    },
    {
      "epoch": 2.3500404798704646,
      "grad_norm": 3.140625,
      "learning_rate": 1.2036366374084545e-05,
      "loss": 0.82,
      "step": 670530
    },
    {
      "epoch": 2.35007552737736,
      "grad_norm": 2.671875,
      "learning_rate": 1.2035717345420844e-05,
      "loss": 0.724,
      "step": 670540
    },
    {
      "epoch": 2.3501105748842557,
      "grad_norm": 3.109375,
      "learning_rate": 1.2035068316757142e-05,
      "loss": 0.7615,
      "step": 670550
    },
    {
      "epoch": 2.3501456223911514,
      "grad_norm": 3.3125,
      "learning_rate": 1.203441928809344e-05,
      "loss": 0.8937,
      "step": 670560
    },
    {
      "epoch": 2.3501806698980467,
      "grad_norm": 2.734375,
      "learning_rate": 1.2033770259429738e-05,
      "loss": 0.8389,
      "step": 670570
    },
    {
      "epoch": 2.3502157174049425,
      "grad_norm": 2.640625,
      "learning_rate": 1.2033121230766036e-05,
      "loss": 0.7701,
      "step": 670580
    },
    {
      "epoch": 2.350250764911838,
      "grad_norm": 2.46875,
      "learning_rate": 1.2032472202102334e-05,
      "loss": 0.7645,
      "step": 670590
    },
    {
      "epoch": 2.3502858124187336,
      "grad_norm": 2.875,
      "learning_rate": 1.2031823173438632e-05,
      "loss": 0.8823,
      "step": 670600
    },
    {
      "epoch": 2.3503208599256293,
      "grad_norm": 3.453125,
      "learning_rate": 1.203117414477493e-05,
      "loss": 0.9002,
      "step": 670610
    },
    {
      "epoch": 2.3503559074325246,
      "grad_norm": 2.4375,
      "learning_rate": 1.2030525116111228e-05,
      "loss": 0.8163,
      "step": 670620
    },
    {
      "epoch": 2.3503909549394204,
      "grad_norm": 2.15625,
      "learning_rate": 1.2029876087447526e-05,
      "loss": 0.8381,
      "step": 670630
    },
    {
      "epoch": 2.350426002446316,
      "grad_norm": 2.890625,
      "learning_rate": 1.2029227058783824e-05,
      "loss": 0.8592,
      "step": 670640
    },
    {
      "epoch": 2.3504610499532115,
      "grad_norm": 3.046875,
      "learning_rate": 1.2028578030120122e-05,
      "loss": 0.7539,
      "step": 670650
    },
    {
      "epoch": 2.3504960974601072,
      "grad_norm": 2.890625,
      "learning_rate": 1.2027929001456422e-05,
      "loss": 0.8112,
      "step": 670660
    },
    {
      "epoch": 2.350531144967003,
      "grad_norm": 2.78125,
      "learning_rate": 1.202727997279272e-05,
      "loss": 0.7589,
      "step": 670670
    },
    {
      "epoch": 2.3505661924738983,
      "grad_norm": 3.171875,
      "learning_rate": 1.2026630944129016e-05,
      "loss": 0.7973,
      "step": 670680
    },
    {
      "epoch": 2.350601239980794,
      "grad_norm": 2.515625,
      "learning_rate": 1.2025981915465314e-05,
      "loss": 0.7737,
      "step": 670690
    },
    {
      "epoch": 2.3506362874876894,
      "grad_norm": 2.921875,
      "learning_rate": 1.2025332886801612e-05,
      "loss": 0.8104,
      "step": 670700
    },
    {
      "epoch": 2.350671334994585,
      "grad_norm": 2.890625,
      "learning_rate": 1.202468385813791e-05,
      "loss": 0.7433,
      "step": 670710
    },
    {
      "epoch": 2.350706382501481,
      "grad_norm": 3.671875,
      "learning_rate": 1.202403482947421e-05,
      "loss": 0.7659,
      "step": 670720
    },
    {
      "epoch": 2.350741430008376,
      "grad_norm": 2.953125,
      "learning_rate": 1.2023385800810508e-05,
      "loss": 0.8516,
      "step": 670730
    },
    {
      "epoch": 2.350776477515272,
      "grad_norm": 2.828125,
      "learning_rate": 1.2022736772146806e-05,
      "loss": 0.8479,
      "step": 670740
    },
    {
      "epoch": 2.3508115250221677,
      "grad_norm": 2.890625,
      "learning_rate": 1.2022087743483104e-05,
      "loss": 0.8292,
      "step": 670750
    },
    {
      "epoch": 2.350846572529063,
      "grad_norm": 2.8125,
      "learning_rate": 1.2021438714819402e-05,
      "loss": 0.7681,
      "step": 670760
    },
    {
      "epoch": 2.350881620035959,
      "grad_norm": 2.609375,
      "learning_rate": 1.2020789686155698e-05,
      "loss": 0.7529,
      "step": 670770
    },
    {
      "epoch": 2.3509166675428546,
      "grad_norm": 3.078125,
      "learning_rate": 1.2020140657491998e-05,
      "loss": 0.8259,
      "step": 670780
    },
    {
      "epoch": 2.35095171504975,
      "grad_norm": 2.9375,
      "learning_rate": 1.2019491628828296e-05,
      "loss": 0.8925,
      "step": 670790
    },
    {
      "epoch": 2.3509867625566456,
      "grad_norm": 3.203125,
      "learning_rate": 1.2018842600164594e-05,
      "loss": 0.8559,
      "step": 670800
    },
    {
      "epoch": 2.351021810063541,
      "grad_norm": 3.046875,
      "learning_rate": 1.2018193571500892e-05,
      "loss": 0.7817,
      "step": 670810
    },
    {
      "epoch": 2.3510568575704367,
      "grad_norm": 2.703125,
      "learning_rate": 1.201754454283719e-05,
      "loss": 0.8415,
      "step": 670820
    },
    {
      "epoch": 2.3510919050773325,
      "grad_norm": 3.3125,
      "learning_rate": 1.2016895514173488e-05,
      "loss": 0.8062,
      "step": 670830
    },
    {
      "epoch": 2.3511269525842278,
      "grad_norm": 2.671875,
      "learning_rate": 1.2016246485509786e-05,
      "loss": 0.8643,
      "step": 670840
    },
    {
      "epoch": 2.3511620000911235,
      "grad_norm": 3.203125,
      "learning_rate": 1.2015597456846086e-05,
      "loss": 0.7356,
      "step": 670850
    },
    {
      "epoch": 2.3511970475980193,
      "grad_norm": 2.59375,
      "learning_rate": 1.2014948428182384e-05,
      "loss": 0.7591,
      "step": 670860
    },
    {
      "epoch": 2.3512320951049146,
      "grad_norm": 2.703125,
      "learning_rate": 1.201429939951868e-05,
      "loss": 0.7606,
      "step": 670870
    },
    {
      "epoch": 2.3512671426118104,
      "grad_norm": 2.609375,
      "learning_rate": 1.2013650370854978e-05,
      "loss": 0.8064,
      "step": 670880
    },
    {
      "epoch": 2.351302190118706,
      "grad_norm": 2.703125,
      "learning_rate": 1.2013001342191276e-05,
      "loss": 0.7681,
      "step": 670890
    },
    {
      "epoch": 2.3513372376256014,
      "grad_norm": 2.625,
      "learning_rate": 1.2012352313527576e-05,
      "loss": 0.7783,
      "step": 670900
    },
    {
      "epoch": 2.351372285132497,
      "grad_norm": 2.78125,
      "learning_rate": 1.2011703284863874e-05,
      "loss": 0.7775,
      "step": 670910
    },
    {
      "epoch": 2.3514073326393925,
      "grad_norm": 2.796875,
      "learning_rate": 1.2011054256200172e-05,
      "loss": 0.7475,
      "step": 670920
    },
    {
      "epoch": 2.3514423801462883,
      "grad_norm": 2.921875,
      "learning_rate": 1.201040522753647e-05,
      "loss": 0.8381,
      "step": 670930
    },
    {
      "epoch": 2.351477427653184,
      "grad_norm": 2.90625,
      "learning_rate": 1.2009756198872768e-05,
      "loss": 0.8249,
      "step": 670940
    },
    {
      "epoch": 2.3515124751600793,
      "grad_norm": 3.390625,
      "learning_rate": 1.2009107170209066e-05,
      "loss": 0.912,
      "step": 670950
    },
    {
      "epoch": 2.351547522666975,
      "grad_norm": 3.3125,
      "learning_rate": 1.2008458141545364e-05,
      "loss": 0.6768,
      "step": 670960
    },
    {
      "epoch": 2.351582570173871,
      "grad_norm": 3.21875,
      "learning_rate": 1.2007809112881662e-05,
      "loss": 0.7926,
      "step": 670970
    },
    {
      "epoch": 2.351617617680766,
      "grad_norm": 3.109375,
      "learning_rate": 1.200716008421796e-05,
      "loss": 0.7217,
      "step": 670980
    },
    {
      "epoch": 2.351652665187662,
      "grad_norm": 3.578125,
      "learning_rate": 1.2006511055554258e-05,
      "loss": 0.796,
      "step": 670990
    },
    {
      "epoch": 2.3516877126945577,
      "grad_norm": 3.03125,
      "learning_rate": 1.2005862026890556e-05,
      "loss": 0.7826,
      "step": 671000
    },
    {
      "epoch": 2.351722760201453,
      "grad_norm": 2.90625,
      "learning_rate": 1.2005212998226854e-05,
      "loss": 0.9209,
      "step": 671010
    },
    {
      "epoch": 2.3517578077083487,
      "grad_norm": 2.984375,
      "learning_rate": 1.2004563969563152e-05,
      "loss": 0.8383,
      "step": 671020
    },
    {
      "epoch": 2.351792855215244,
      "grad_norm": 2.828125,
      "learning_rate": 1.2003914940899452e-05,
      "loss": 0.8671,
      "step": 671030
    },
    {
      "epoch": 2.35182790272214,
      "grad_norm": 2.5,
      "learning_rate": 1.200326591223575e-05,
      "loss": 0.86,
      "step": 671040
    },
    {
      "epoch": 2.3518629502290356,
      "grad_norm": 3.28125,
      "learning_rate": 1.2002616883572048e-05,
      "loss": 0.8098,
      "step": 671050
    },
    {
      "epoch": 2.351897997735931,
      "grad_norm": 2.90625,
      "learning_rate": 1.2001967854908344e-05,
      "loss": 0.8477,
      "step": 671060
    },
    {
      "epoch": 2.3519330452428266,
      "grad_norm": 2.96875,
      "learning_rate": 1.2001318826244642e-05,
      "loss": 0.8705,
      "step": 671070
    },
    {
      "epoch": 2.3519680927497224,
      "grad_norm": 2.796875,
      "learning_rate": 1.200066979758094e-05,
      "loss": 0.7898,
      "step": 671080
    },
    {
      "epoch": 2.3520031402566177,
      "grad_norm": 2.5625,
      "learning_rate": 1.200002076891724e-05,
      "loss": 0.774,
      "step": 671090
    },
    {
      "epoch": 2.3520381877635135,
      "grad_norm": 2.734375,
      "learning_rate": 1.1999371740253538e-05,
      "loss": 0.8043,
      "step": 671100
    },
    {
      "epoch": 2.3520732352704092,
      "grad_norm": 3.078125,
      "learning_rate": 1.1998722711589836e-05,
      "loss": 0.77,
      "step": 671110
    },
    {
      "epoch": 2.3521082827773045,
      "grad_norm": 3.3125,
      "learning_rate": 1.1998073682926134e-05,
      "loss": 0.8078,
      "step": 671120
    },
    {
      "epoch": 2.3521433302842003,
      "grad_norm": 3.21875,
      "learning_rate": 1.1997424654262432e-05,
      "loss": 0.8411,
      "step": 671130
    },
    {
      "epoch": 2.3521783777910956,
      "grad_norm": 2.203125,
      "learning_rate": 1.199677562559873e-05,
      "loss": 0.751,
      "step": 671140
    },
    {
      "epoch": 2.3522134252979914,
      "grad_norm": 3.046875,
      "learning_rate": 1.1996126596935028e-05,
      "loss": 0.8784,
      "step": 671150
    },
    {
      "epoch": 2.352248472804887,
      "grad_norm": 3.015625,
      "learning_rate": 1.1995477568271326e-05,
      "loss": 0.8375,
      "step": 671160
    },
    {
      "epoch": 2.3522835203117824,
      "grad_norm": 2.828125,
      "learning_rate": 1.1994828539607624e-05,
      "loss": 0.826,
      "step": 671170
    },
    {
      "epoch": 2.352318567818678,
      "grad_norm": 2.921875,
      "learning_rate": 1.1994179510943922e-05,
      "loss": 0.9076,
      "step": 671180
    },
    {
      "epoch": 2.352353615325574,
      "grad_norm": 3.21875,
      "learning_rate": 1.199353048228022e-05,
      "loss": 0.904,
      "step": 671190
    },
    {
      "epoch": 2.3523886628324693,
      "grad_norm": 2.671875,
      "learning_rate": 1.1992881453616518e-05,
      "loss": 0.8108,
      "step": 671200
    },
    {
      "epoch": 2.352423710339365,
      "grad_norm": 2.9375,
      "learning_rate": 1.1992232424952817e-05,
      "loss": 0.8984,
      "step": 671210
    },
    {
      "epoch": 2.352458757846261,
      "grad_norm": 3.078125,
      "learning_rate": 1.1991583396289115e-05,
      "loss": 0.848,
      "step": 671220
    },
    {
      "epoch": 2.352493805353156,
      "grad_norm": 3.078125,
      "learning_rate": 1.1990934367625413e-05,
      "loss": 0.7502,
      "step": 671230
    },
    {
      "epoch": 2.352528852860052,
      "grad_norm": 2.84375,
      "learning_rate": 1.199028533896171e-05,
      "loss": 0.7619,
      "step": 671240
    },
    {
      "epoch": 2.352563900366947,
      "grad_norm": 3.28125,
      "learning_rate": 1.1989636310298008e-05,
      "loss": 0.8502,
      "step": 671250
    },
    {
      "epoch": 2.352598947873843,
      "grad_norm": 2.796875,
      "learning_rate": 1.1988987281634306e-05,
      "loss": 0.7887,
      "step": 671260
    },
    {
      "epoch": 2.3526339953807387,
      "grad_norm": 2.59375,
      "learning_rate": 1.1988338252970605e-05,
      "loss": 0.6823,
      "step": 671270
    },
    {
      "epoch": 2.352669042887634,
      "grad_norm": 3.15625,
      "learning_rate": 1.1987689224306903e-05,
      "loss": 0.7952,
      "step": 671280
    },
    {
      "epoch": 2.3527040903945298,
      "grad_norm": 3.265625,
      "learning_rate": 1.1987040195643201e-05,
      "loss": 0.87,
      "step": 671290
    },
    {
      "epoch": 2.3527391379014255,
      "grad_norm": 3.421875,
      "learning_rate": 1.19863911669795e-05,
      "loss": 0.866,
      "step": 671300
    },
    {
      "epoch": 2.352774185408321,
      "grad_norm": 2.96875,
      "learning_rate": 1.1985742138315797e-05,
      "loss": 0.8201,
      "step": 671310
    },
    {
      "epoch": 2.3528092329152166,
      "grad_norm": 2.625,
      "learning_rate": 1.1985093109652095e-05,
      "loss": 0.8104,
      "step": 671320
    },
    {
      "epoch": 2.3528442804221124,
      "grad_norm": 4.75,
      "learning_rate": 1.1984444080988393e-05,
      "loss": 0.802,
      "step": 671330
    },
    {
      "epoch": 2.3528793279290077,
      "grad_norm": 3.171875,
      "learning_rate": 1.1983795052324691e-05,
      "loss": 0.7307,
      "step": 671340
    },
    {
      "epoch": 2.3529143754359034,
      "grad_norm": 3.09375,
      "learning_rate": 1.198314602366099e-05,
      "loss": 0.9483,
      "step": 671350
    },
    {
      "epoch": 2.3529494229427987,
      "grad_norm": 2.90625,
      "learning_rate": 1.1982496994997287e-05,
      "loss": 0.8377,
      "step": 671360
    },
    {
      "epoch": 2.3529844704496945,
      "grad_norm": 2.8125,
      "learning_rate": 1.1981847966333585e-05,
      "loss": 0.8304,
      "step": 671370
    },
    {
      "epoch": 2.3530195179565903,
      "grad_norm": 3.203125,
      "learning_rate": 1.1981198937669883e-05,
      "loss": 0.8419,
      "step": 671380
    },
    {
      "epoch": 2.3530545654634856,
      "grad_norm": 2.953125,
      "learning_rate": 1.1980549909006183e-05,
      "loss": 0.7844,
      "step": 671390
    },
    {
      "epoch": 2.3530896129703813,
      "grad_norm": 2.515625,
      "learning_rate": 1.1979900880342481e-05,
      "loss": 0.7406,
      "step": 671400
    },
    {
      "epoch": 2.353124660477277,
      "grad_norm": 2.5625,
      "learning_rate": 1.1979251851678779e-05,
      "loss": 0.7324,
      "step": 671410
    },
    {
      "epoch": 2.3531597079841724,
      "grad_norm": 3.0625,
      "learning_rate": 1.1978602823015077e-05,
      "loss": 0.8556,
      "step": 671420
    },
    {
      "epoch": 2.353194755491068,
      "grad_norm": 2.765625,
      "learning_rate": 1.1977953794351373e-05,
      "loss": 0.867,
      "step": 671430
    },
    {
      "epoch": 2.353229802997964,
      "grad_norm": 2.734375,
      "learning_rate": 1.1977304765687671e-05,
      "loss": 0.7826,
      "step": 671440
    },
    {
      "epoch": 2.3532648505048592,
      "grad_norm": 2.671875,
      "learning_rate": 1.1976655737023971e-05,
      "loss": 0.8396,
      "step": 671450
    },
    {
      "epoch": 2.353299898011755,
      "grad_norm": 2.625,
      "learning_rate": 1.1976006708360269e-05,
      "loss": 0.7773,
      "step": 671460
    },
    {
      "epoch": 2.3533349455186503,
      "grad_norm": 2.78125,
      "learning_rate": 1.1975357679696567e-05,
      "loss": 0.7889,
      "step": 671470
    },
    {
      "epoch": 2.353369993025546,
      "grad_norm": 2.828125,
      "learning_rate": 1.1974708651032865e-05,
      "loss": 0.7167,
      "step": 671480
    },
    {
      "epoch": 2.353405040532442,
      "grad_norm": 2.578125,
      "learning_rate": 1.1974059622369163e-05,
      "loss": 0.7207,
      "step": 671490
    },
    {
      "epoch": 2.353440088039337,
      "grad_norm": 2.953125,
      "learning_rate": 1.1973410593705461e-05,
      "loss": 0.9086,
      "step": 671500
    },
    {
      "epoch": 2.353475135546233,
      "grad_norm": 2.921875,
      "learning_rate": 1.1972761565041759e-05,
      "loss": 0.8067,
      "step": 671510
    },
    {
      "epoch": 2.3535101830531286,
      "grad_norm": 2.5,
      "learning_rate": 1.1972112536378059e-05,
      "loss": 0.7317,
      "step": 671520
    },
    {
      "epoch": 2.353545230560024,
      "grad_norm": 2.84375,
      "learning_rate": 1.1971463507714355e-05,
      "loss": 0.8531,
      "step": 671530
    },
    {
      "epoch": 2.3535802780669197,
      "grad_norm": 3.21875,
      "learning_rate": 1.1970814479050653e-05,
      "loss": 0.7643,
      "step": 671540
    },
    {
      "epoch": 2.3536153255738155,
      "grad_norm": 3.6875,
      "learning_rate": 1.1970165450386951e-05,
      "loss": 0.8364,
      "step": 671550
    },
    {
      "epoch": 2.353650373080711,
      "grad_norm": 2.921875,
      "learning_rate": 1.1969516421723249e-05,
      "loss": 0.7813,
      "step": 671560
    },
    {
      "epoch": 2.3536854205876065,
      "grad_norm": 3.0,
      "learning_rate": 1.1968867393059547e-05,
      "loss": 0.8267,
      "step": 671570
    },
    {
      "epoch": 2.353720468094502,
      "grad_norm": 2.75,
      "learning_rate": 1.1968218364395847e-05,
      "loss": 0.8286,
      "step": 671580
    },
    {
      "epoch": 2.3537555156013976,
      "grad_norm": 3.359375,
      "learning_rate": 1.1967569335732145e-05,
      "loss": 0.7955,
      "step": 671590
    },
    {
      "epoch": 2.3537905631082934,
      "grad_norm": 2.703125,
      "learning_rate": 1.1966920307068443e-05,
      "loss": 0.8346,
      "step": 671600
    },
    {
      "epoch": 2.353825610615189,
      "grad_norm": 3.078125,
      "learning_rate": 1.196627127840474e-05,
      "loss": 0.8374,
      "step": 671610
    },
    {
      "epoch": 2.3538606581220844,
      "grad_norm": 3.34375,
      "learning_rate": 1.1965622249741037e-05,
      "loss": 0.8004,
      "step": 671620
    },
    {
      "epoch": 2.35389570562898,
      "grad_norm": 3.34375,
      "learning_rate": 1.1964973221077337e-05,
      "loss": 0.8511,
      "step": 671630
    },
    {
      "epoch": 2.3539307531358755,
      "grad_norm": 2.875,
      "learning_rate": 1.1964324192413635e-05,
      "loss": 0.7615,
      "step": 671640
    },
    {
      "epoch": 2.3539658006427713,
      "grad_norm": 2.609375,
      "learning_rate": 1.1963675163749933e-05,
      "loss": 0.8822,
      "step": 671650
    },
    {
      "epoch": 2.354000848149667,
      "grad_norm": 3.140625,
      "learning_rate": 1.196302613508623e-05,
      "loss": 0.8048,
      "step": 671660
    },
    {
      "epoch": 2.3540358956565623,
      "grad_norm": 2.484375,
      "learning_rate": 1.1962377106422529e-05,
      "loss": 0.837,
      "step": 671670
    },
    {
      "epoch": 2.354070943163458,
      "grad_norm": 2.71875,
      "learning_rate": 1.1961728077758827e-05,
      "loss": 0.7671,
      "step": 671680
    },
    {
      "epoch": 2.3541059906703534,
      "grad_norm": 2.90625,
      "learning_rate": 1.1961079049095125e-05,
      "loss": 0.7412,
      "step": 671690
    },
    {
      "epoch": 2.354141038177249,
      "grad_norm": 2.8125,
      "learning_rate": 1.1960430020431424e-05,
      "loss": 0.8328,
      "step": 671700
    },
    {
      "epoch": 2.354176085684145,
      "grad_norm": 2.671875,
      "learning_rate": 1.195978099176772e-05,
      "loss": 0.8204,
      "step": 671710
    },
    {
      "epoch": 2.3542111331910407,
      "grad_norm": 2.953125,
      "learning_rate": 1.1959131963104019e-05,
      "loss": 0.7529,
      "step": 671720
    },
    {
      "epoch": 2.354246180697936,
      "grad_norm": 2.703125,
      "learning_rate": 1.1958482934440317e-05,
      "loss": 0.7993,
      "step": 671730
    },
    {
      "epoch": 2.3542812282048318,
      "grad_norm": 3.3125,
      "learning_rate": 1.1957833905776615e-05,
      "loss": 0.8447,
      "step": 671740
    },
    {
      "epoch": 2.354316275711727,
      "grad_norm": 3.46875,
      "learning_rate": 1.1957184877112913e-05,
      "loss": 0.8674,
      "step": 671750
    },
    {
      "epoch": 2.354351323218623,
      "grad_norm": 3.0625,
      "learning_rate": 1.1956535848449212e-05,
      "loss": 0.7799,
      "step": 671760
    },
    {
      "epoch": 2.3543863707255186,
      "grad_norm": 2.953125,
      "learning_rate": 1.195588681978551e-05,
      "loss": 0.7854,
      "step": 671770
    },
    {
      "epoch": 2.354421418232414,
      "grad_norm": 2.984375,
      "learning_rate": 1.1955237791121808e-05,
      "loss": 0.7805,
      "step": 671780
    },
    {
      "epoch": 2.3544564657393097,
      "grad_norm": 3.03125,
      "learning_rate": 1.1954588762458106e-05,
      "loss": 0.8237,
      "step": 671790
    },
    {
      "epoch": 2.3544915132462054,
      "grad_norm": 2.890625,
      "learning_rate": 1.1953939733794404e-05,
      "loss": 0.8005,
      "step": 671800
    },
    {
      "epoch": 2.3545265607531007,
      "grad_norm": 2.984375,
      "learning_rate": 1.19532907051307e-05,
      "loss": 0.7664,
      "step": 671810
    },
    {
      "epoch": 2.3545616082599965,
      "grad_norm": 2.640625,
      "learning_rate": 1.1952641676467e-05,
      "loss": 0.7934,
      "step": 671820
    },
    {
      "epoch": 2.3545966557668923,
      "grad_norm": 2.4375,
      "learning_rate": 1.1951992647803298e-05,
      "loss": 0.7432,
      "step": 671830
    },
    {
      "epoch": 2.3546317032737876,
      "grad_norm": 3.046875,
      "learning_rate": 1.1951343619139596e-05,
      "loss": 0.799,
      "step": 671840
    },
    {
      "epoch": 2.3546667507806833,
      "grad_norm": 2.96875,
      "learning_rate": 1.1950694590475894e-05,
      "loss": 0.8658,
      "step": 671850
    },
    {
      "epoch": 2.3547017982875786,
      "grad_norm": 2.921875,
      "learning_rate": 1.1950045561812192e-05,
      "loss": 0.8039,
      "step": 671860
    },
    {
      "epoch": 2.3547368457944744,
      "grad_norm": 2.765625,
      "learning_rate": 1.194939653314849e-05,
      "loss": 0.8314,
      "step": 671870
    },
    {
      "epoch": 2.35477189330137,
      "grad_norm": 2.578125,
      "learning_rate": 1.1948747504484788e-05,
      "loss": 0.8063,
      "step": 671880
    },
    {
      "epoch": 2.3548069408082655,
      "grad_norm": 3.0,
      "learning_rate": 1.1948098475821088e-05,
      "loss": 0.8024,
      "step": 671890
    },
    {
      "epoch": 2.3548419883151612,
      "grad_norm": 2.65625,
      "learning_rate": 1.1947449447157384e-05,
      "loss": 0.6896,
      "step": 671900
    },
    {
      "epoch": 2.354877035822057,
      "grad_norm": 3.046875,
      "learning_rate": 1.1946800418493682e-05,
      "loss": 0.8068,
      "step": 671910
    },
    {
      "epoch": 2.3549120833289523,
      "grad_norm": 2.984375,
      "learning_rate": 1.194615138982998e-05,
      "loss": 0.8229,
      "step": 671920
    },
    {
      "epoch": 2.354947130835848,
      "grad_norm": 2.78125,
      "learning_rate": 1.1945502361166278e-05,
      "loss": 0.7403,
      "step": 671930
    },
    {
      "epoch": 2.354982178342744,
      "grad_norm": 2.71875,
      "learning_rate": 1.1944853332502578e-05,
      "loss": 0.8121,
      "step": 671940
    },
    {
      "epoch": 2.355017225849639,
      "grad_norm": 3.109375,
      "learning_rate": 1.1944204303838876e-05,
      "loss": 0.7721,
      "step": 671950
    },
    {
      "epoch": 2.355052273356535,
      "grad_norm": 3.046875,
      "learning_rate": 1.1943555275175174e-05,
      "loss": 0.8619,
      "step": 671960
    },
    {
      "epoch": 2.35508732086343,
      "grad_norm": 2.9375,
      "learning_rate": 1.1942906246511472e-05,
      "loss": 0.869,
      "step": 671970
    },
    {
      "epoch": 2.355122368370326,
      "grad_norm": 3.078125,
      "learning_rate": 1.194225721784777e-05,
      "loss": 0.7941,
      "step": 671980
    },
    {
      "epoch": 2.3551574158772217,
      "grad_norm": 3.109375,
      "learning_rate": 1.1941608189184068e-05,
      "loss": 0.825,
      "step": 671990
    },
    {
      "epoch": 2.355192463384117,
      "grad_norm": 3.046875,
      "learning_rate": 1.1940959160520366e-05,
      "loss": 0.7793,
      "step": 672000
    },
    {
      "epoch": 2.355227510891013,
      "grad_norm": 2.71875,
      "learning_rate": 1.1940310131856664e-05,
      "loss": 0.8742,
      "step": 672010
    },
    {
      "epoch": 2.3552625583979085,
      "grad_norm": 3.0,
      "learning_rate": 1.1939661103192962e-05,
      "loss": 0.8006,
      "step": 672020
    },
    {
      "epoch": 2.355297605904804,
      "grad_norm": 2.5,
      "learning_rate": 1.193901207452926e-05,
      "loss": 0.8325,
      "step": 672030
    },
    {
      "epoch": 2.3553326534116996,
      "grad_norm": 2.796875,
      "learning_rate": 1.1938363045865558e-05,
      "loss": 0.7828,
      "step": 672040
    },
    {
      "epoch": 2.3553677009185954,
      "grad_norm": 3.53125,
      "learning_rate": 1.1937714017201856e-05,
      "loss": 0.7372,
      "step": 672050
    },
    {
      "epoch": 2.3554027484254907,
      "grad_norm": 2.5625,
      "learning_rate": 1.1937064988538154e-05,
      "loss": 0.7835,
      "step": 672060
    },
    {
      "epoch": 2.3554377959323864,
      "grad_norm": 3.34375,
      "learning_rate": 1.1936415959874454e-05,
      "loss": 0.9449,
      "step": 672070
    },
    {
      "epoch": 2.3554728434392818,
      "grad_norm": 2.703125,
      "learning_rate": 1.1935766931210752e-05,
      "loss": 0.7154,
      "step": 672080
    },
    {
      "epoch": 2.3555078909461775,
      "grad_norm": 3.28125,
      "learning_rate": 1.1935117902547048e-05,
      "loss": 0.8051,
      "step": 672090
    },
    {
      "epoch": 2.3555429384530733,
      "grad_norm": 3.25,
      "learning_rate": 1.1934468873883346e-05,
      "loss": 0.8012,
      "step": 672100
    },
    {
      "epoch": 2.3555779859599686,
      "grad_norm": 3.125,
      "learning_rate": 1.1933819845219644e-05,
      "loss": 0.8658,
      "step": 672110
    },
    {
      "epoch": 2.3556130334668643,
      "grad_norm": 2.578125,
      "learning_rate": 1.1933170816555942e-05,
      "loss": 0.782,
      "step": 672120
    },
    {
      "epoch": 2.35564808097376,
      "grad_norm": 2.59375,
      "learning_rate": 1.1932521787892242e-05,
      "loss": 0.8,
      "step": 672130
    },
    {
      "epoch": 2.3556831284806554,
      "grad_norm": 2.6875,
      "learning_rate": 1.193187275922854e-05,
      "loss": 0.8144,
      "step": 672140
    },
    {
      "epoch": 2.355718175987551,
      "grad_norm": 3.375,
      "learning_rate": 1.1931223730564838e-05,
      "loss": 0.8191,
      "step": 672150
    },
    {
      "epoch": 2.355753223494447,
      "grad_norm": 3.015625,
      "learning_rate": 1.1930574701901136e-05,
      "loss": 0.8513,
      "step": 672160
    },
    {
      "epoch": 2.3557882710013422,
      "grad_norm": 2.828125,
      "learning_rate": 1.1929925673237434e-05,
      "loss": 0.8219,
      "step": 672170
    },
    {
      "epoch": 2.355823318508238,
      "grad_norm": 3.125,
      "learning_rate": 1.1929276644573732e-05,
      "loss": 0.7774,
      "step": 672180
    },
    {
      "epoch": 2.3558583660151333,
      "grad_norm": 2.953125,
      "learning_rate": 1.192862761591003e-05,
      "loss": 0.8457,
      "step": 672190
    },
    {
      "epoch": 2.355893413522029,
      "grad_norm": 2.78125,
      "learning_rate": 1.1927978587246328e-05,
      "loss": 0.8298,
      "step": 672200
    },
    {
      "epoch": 2.355928461028925,
      "grad_norm": 2.515625,
      "learning_rate": 1.1927329558582626e-05,
      "loss": 0.7828,
      "step": 672210
    },
    {
      "epoch": 2.35596350853582,
      "grad_norm": 2.921875,
      "learning_rate": 1.1926680529918924e-05,
      "loss": 0.7343,
      "step": 672220
    },
    {
      "epoch": 2.355998556042716,
      "grad_norm": 2.53125,
      "learning_rate": 1.1926031501255222e-05,
      "loss": 0.853,
      "step": 672230
    },
    {
      "epoch": 2.3560336035496117,
      "grad_norm": 3.03125,
      "learning_rate": 1.192538247259152e-05,
      "loss": 0.8541,
      "step": 672240
    },
    {
      "epoch": 2.356068651056507,
      "grad_norm": 2.75,
      "learning_rate": 1.192473344392782e-05,
      "loss": 0.786,
      "step": 672250
    },
    {
      "epoch": 2.3561036985634027,
      "grad_norm": 3.203125,
      "learning_rate": 1.1924084415264117e-05,
      "loss": 0.7591,
      "step": 672260
    },
    {
      "epoch": 2.3561387460702985,
      "grad_norm": 2.671875,
      "learning_rate": 1.1923435386600415e-05,
      "loss": 0.7434,
      "step": 672270
    },
    {
      "epoch": 2.356173793577194,
      "grad_norm": 3.171875,
      "learning_rate": 1.1922786357936712e-05,
      "loss": 0.8664,
      "step": 672280
    },
    {
      "epoch": 2.3562088410840896,
      "grad_norm": 2.484375,
      "learning_rate": 1.192213732927301e-05,
      "loss": 0.8018,
      "step": 672290
    },
    {
      "epoch": 2.356243888590985,
      "grad_norm": 2.515625,
      "learning_rate": 1.1921488300609308e-05,
      "loss": 0.8897,
      "step": 672300
    },
    {
      "epoch": 2.3562789360978806,
      "grad_norm": 2.65625,
      "learning_rate": 1.1920839271945607e-05,
      "loss": 0.7543,
      "step": 672310
    },
    {
      "epoch": 2.3563139836047764,
      "grad_norm": 2.671875,
      "learning_rate": 1.1920190243281905e-05,
      "loss": 0.8186,
      "step": 672320
    },
    {
      "epoch": 2.3563490311116717,
      "grad_norm": 2.859375,
      "learning_rate": 1.1919541214618203e-05,
      "loss": 0.8566,
      "step": 672330
    },
    {
      "epoch": 2.3563840786185675,
      "grad_norm": 2.5625,
      "learning_rate": 1.1918892185954501e-05,
      "loss": 0.7843,
      "step": 672340
    },
    {
      "epoch": 2.3564191261254632,
      "grad_norm": 2.78125,
      "learning_rate": 1.19182431572908e-05,
      "loss": 0.7639,
      "step": 672350
    },
    {
      "epoch": 2.3564541736323585,
      "grad_norm": 2.953125,
      "learning_rate": 1.1917594128627097e-05,
      "loss": 0.8123,
      "step": 672360
    },
    {
      "epoch": 2.3564892211392543,
      "grad_norm": 2.953125,
      "learning_rate": 1.1916945099963395e-05,
      "loss": 0.8709,
      "step": 672370
    },
    {
      "epoch": 2.35652426864615,
      "grad_norm": 3.0,
      "learning_rate": 1.1916296071299693e-05,
      "loss": 0.7764,
      "step": 672380
    },
    {
      "epoch": 2.3565593161530454,
      "grad_norm": 2.890625,
      "learning_rate": 1.1915647042635991e-05,
      "loss": 0.7842,
      "step": 672390
    },
    {
      "epoch": 2.356594363659941,
      "grad_norm": 2.96875,
      "learning_rate": 1.191499801397229e-05,
      "loss": 0.8187,
      "step": 672400
    },
    {
      "epoch": 2.3566294111668364,
      "grad_norm": 2.609375,
      "learning_rate": 1.1914348985308587e-05,
      "loss": 0.7477,
      "step": 672410
    },
    {
      "epoch": 2.356664458673732,
      "grad_norm": 2.75,
      "learning_rate": 1.1913699956644885e-05,
      "loss": 0.8891,
      "step": 672420
    },
    {
      "epoch": 2.356699506180628,
      "grad_norm": 2.875,
      "learning_rate": 1.1913050927981183e-05,
      "loss": 0.8252,
      "step": 672430
    },
    {
      "epoch": 2.3567345536875233,
      "grad_norm": 2.8125,
      "learning_rate": 1.1912401899317483e-05,
      "loss": 0.7627,
      "step": 672440
    },
    {
      "epoch": 2.356769601194419,
      "grad_norm": 3.171875,
      "learning_rate": 1.1911752870653781e-05,
      "loss": 0.8275,
      "step": 672450
    },
    {
      "epoch": 2.356804648701315,
      "grad_norm": 3.296875,
      "learning_rate": 1.1911103841990079e-05,
      "loss": 0.8415,
      "step": 672460
    },
    {
      "epoch": 2.35683969620821,
      "grad_norm": 3.296875,
      "learning_rate": 1.1910454813326375e-05,
      "loss": 0.8564,
      "step": 672470
    },
    {
      "epoch": 2.356874743715106,
      "grad_norm": 3.328125,
      "learning_rate": 1.1909805784662673e-05,
      "loss": 0.8105,
      "step": 672480
    },
    {
      "epoch": 2.3569097912220016,
      "grad_norm": 3.1875,
      "learning_rate": 1.1909156755998973e-05,
      "loss": 0.7806,
      "step": 672490
    },
    {
      "epoch": 2.356944838728897,
      "grad_norm": 2.875,
      "learning_rate": 1.1908507727335271e-05,
      "loss": 0.7494,
      "step": 672500
    },
    {
      "epoch": 2.3569798862357927,
      "grad_norm": 3.015625,
      "learning_rate": 1.1907858698671569e-05,
      "loss": 0.7893,
      "step": 672510
    },
    {
      "epoch": 2.357014933742688,
      "grad_norm": 2.625,
      "learning_rate": 1.1907209670007867e-05,
      "loss": 0.7253,
      "step": 672520
    },
    {
      "epoch": 2.3570499812495838,
      "grad_norm": 2.640625,
      "learning_rate": 1.1906560641344165e-05,
      "loss": 0.815,
      "step": 672530
    },
    {
      "epoch": 2.3570850287564795,
      "grad_norm": 2.3125,
      "learning_rate": 1.1905911612680463e-05,
      "loss": 0.7065,
      "step": 672540
    },
    {
      "epoch": 2.357120076263375,
      "grad_norm": 2.71875,
      "learning_rate": 1.1905262584016761e-05,
      "loss": 0.8181,
      "step": 672550
    },
    {
      "epoch": 2.3571551237702706,
      "grad_norm": 2.515625,
      "learning_rate": 1.1904613555353059e-05,
      "loss": 0.8268,
      "step": 672560
    },
    {
      "epoch": 2.3571901712771663,
      "grad_norm": 3.609375,
      "learning_rate": 1.1903964526689357e-05,
      "loss": 0.7701,
      "step": 672570
    },
    {
      "epoch": 2.3572252187840617,
      "grad_norm": 2.890625,
      "learning_rate": 1.1903315498025655e-05,
      "loss": 0.714,
      "step": 672580
    },
    {
      "epoch": 2.3572602662909574,
      "grad_norm": 2.875,
      "learning_rate": 1.1902666469361953e-05,
      "loss": 0.7469,
      "step": 672590
    },
    {
      "epoch": 2.357295313797853,
      "grad_norm": 3.015625,
      "learning_rate": 1.1902017440698251e-05,
      "loss": 0.7286,
      "step": 672600
    },
    {
      "epoch": 2.3573303613047485,
      "grad_norm": 3.125,
      "learning_rate": 1.1901368412034549e-05,
      "loss": 0.8824,
      "step": 672610
    },
    {
      "epoch": 2.3573654088116442,
      "grad_norm": 2.59375,
      "learning_rate": 1.1900719383370849e-05,
      "loss": 0.7302,
      "step": 672620
    },
    {
      "epoch": 2.3574004563185396,
      "grad_norm": 2.640625,
      "learning_rate": 1.1900070354707147e-05,
      "loss": 0.8104,
      "step": 672630
    },
    {
      "epoch": 2.3574355038254353,
      "grad_norm": 2.859375,
      "learning_rate": 1.1899421326043445e-05,
      "loss": 0.8189,
      "step": 672640
    },
    {
      "epoch": 2.357470551332331,
      "grad_norm": 2.9375,
      "learning_rate": 1.1898772297379741e-05,
      "loss": 0.7341,
      "step": 672650
    },
    {
      "epoch": 2.3575055988392264,
      "grad_norm": 2.859375,
      "learning_rate": 1.1898123268716039e-05,
      "loss": 0.8498,
      "step": 672660
    },
    {
      "epoch": 2.357540646346122,
      "grad_norm": 2.8125,
      "learning_rate": 1.1897474240052337e-05,
      "loss": 0.7884,
      "step": 672670
    },
    {
      "epoch": 2.357575693853018,
      "grad_norm": 3.03125,
      "learning_rate": 1.1896825211388637e-05,
      "loss": 0.8385,
      "step": 672680
    },
    {
      "epoch": 2.357610741359913,
      "grad_norm": 2.84375,
      "learning_rate": 1.1896176182724935e-05,
      "loss": 0.7663,
      "step": 672690
    },
    {
      "epoch": 2.357645788866809,
      "grad_norm": 3.171875,
      "learning_rate": 1.1895527154061233e-05,
      "loss": 0.8162,
      "step": 672700
    },
    {
      "epoch": 2.3576808363737047,
      "grad_norm": 3.453125,
      "learning_rate": 1.189487812539753e-05,
      "loss": 0.7657,
      "step": 672710
    },
    {
      "epoch": 2.3577158838806,
      "grad_norm": 2.671875,
      "learning_rate": 1.1894229096733829e-05,
      "loss": 0.8506,
      "step": 672720
    },
    {
      "epoch": 2.357750931387496,
      "grad_norm": 2.65625,
      "learning_rate": 1.1893580068070127e-05,
      "loss": 0.8105,
      "step": 672730
    },
    {
      "epoch": 2.357785978894391,
      "grad_norm": 2.78125,
      "learning_rate": 1.1892931039406425e-05,
      "loss": 0.759,
      "step": 672740
    },
    {
      "epoch": 2.357821026401287,
      "grad_norm": 2.59375,
      "learning_rate": 1.1892282010742723e-05,
      "loss": 0.7696,
      "step": 672750
    },
    {
      "epoch": 2.3578560739081826,
      "grad_norm": 3.03125,
      "learning_rate": 1.189163298207902e-05,
      "loss": 0.7836,
      "step": 672760
    },
    {
      "epoch": 2.357891121415078,
      "grad_norm": 2.90625,
      "learning_rate": 1.1890983953415319e-05,
      "loss": 0.8118,
      "step": 672770
    },
    {
      "epoch": 2.3579261689219737,
      "grad_norm": 3.328125,
      "learning_rate": 1.1890334924751617e-05,
      "loss": 0.8083,
      "step": 672780
    },
    {
      "epoch": 2.3579612164288695,
      "grad_norm": 2.9375,
      "learning_rate": 1.1889685896087915e-05,
      "loss": 0.8841,
      "step": 672790
    },
    {
      "epoch": 2.357996263935765,
      "grad_norm": 3.078125,
      "learning_rate": 1.1889036867424214e-05,
      "loss": 0.8001,
      "step": 672800
    },
    {
      "epoch": 2.3580313114426605,
      "grad_norm": 2.84375,
      "learning_rate": 1.1888387838760512e-05,
      "loss": 0.8456,
      "step": 672810
    },
    {
      "epoch": 2.3580663589495563,
      "grad_norm": 2.4375,
      "learning_rate": 1.188773881009681e-05,
      "loss": 0.773,
      "step": 672820
    },
    {
      "epoch": 2.3581014064564516,
      "grad_norm": 3.015625,
      "learning_rate": 1.1887089781433108e-05,
      "loss": 0.8701,
      "step": 672830
    },
    {
      "epoch": 2.3581364539633474,
      "grad_norm": 2.71875,
      "learning_rate": 1.1886440752769405e-05,
      "loss": 0.8782,
      "step": 672840
    },
    {
      "epoch": 2.3581715014702427,
      "grad_norm": 3.046875,
      "learning_rate": 1.1885791724105703e-05,
      "loss": 0.7541,
      "step": 672850
    },
    {
      "epoch": 2.3582065489771384,
      "grad_norm": 2.671875,
      "learning_rate": 1.1885142695442002e-05,
      "loss": 0.7658,
      "step": 672860
    },
    {
      "epoch": 2.358241596484034,
      "grad_norm": 2.6875,
      "learning_rate": 1.18844936667783e-05,
      "loss": 0.7896,
      "step": 672870
    },
    {
      "epoch": 2.35827664399093,
      "grad_norm": 2.984375,
      "learning_rate": 1.1883844638114598e-05,
      "loss": 0.8142,
      "step": 672880
    },
    {
      "epoch": 2.3583116914978253,
      "grad_norm": 3.359375,
      "learning_rate": 1.1883195609450896e-05,
      "loss": 0.8449,
      "step": 672890
    },
    {
      "epoch": 2.358346739004721,
      "grad_norm": 2.765625,
      "learning_rate": 1.1882546580787194e-05,
      "loss": 0.7571,
      "step": 672900
    },
    {
      "epoch": 2.3583817865116163,
      "grad_norm": 3.140625,
      "learning_rate": 1.1881897552123492e-05,
      "loss": 0.7698,
      "step": 672910
    },
    {
      "epoch": 2.358416834018512,
      "grad_norm": 3.484375,
      "learning_rate": 1.188124852345979e-05,
      "loss": 0.928,
      "step": 672920
    },
    {
      "epoch": 2.358451881525408,
      "grad_norm": 3.609375,
      "learning_rate": 1.188059949479609e-05,
      "loss": 0.8067,
      "step": 672930
    },
    {
      "epoch": 2.358486929032303,
      "grad_norm": 3.203125,
      "learning_rate": 1.1879950466132386e-05,
      "loss": 0.8163,
      "step": 672940
    },
    {
      "epoch": 2.358521976539199,
      "grad_norm": 3.328125,
      "learning_rate": 1.1879301437468684e-05,
      "loss": 0.7941,
      "step": 672950
    },
    {
      "epoch": 2.3585570240460942,
      "grad_norm": 3.640625,
      "learning_rate": 1.1878652408804982e-05,
      "loss": 0.8557,
      "step": 672960
    },
    {
      "epoch": 2.35859207155299,
      "grad_norm": 3.140625,
      "learning_rate": 1.187800338014128e-05,
      "loss": 0.8588,
      "step": 672970
    },
    {
      "epoch": 2.3586271190598858,
      "grad_norm": 2.703125,
      "learning_rate": 1.1877354351477578e-05,
      "loss": 0.7639,
      "step": 672980
    },
    {
      "epoch": 2.3586621665667815,
      "grad_norm": 2.9375,
      "learning_rate": 1.1876705322813878e-05,
      "loss": 0.7998,
      "step": 672990
    },
    {
      "epoch": 2.358697214073677,
      "grad_norm": 3.109375,
      "learning_rate": 1.1876056294150176e-05,
      "loss": 0.8285,
      "step": 673000
    },
    {
      "epoch": 2.3587322615805726,
      "grad_norm": 2.984375,
      "learning_rate": 1.1875407265486474e-05,
      "loss": 0.8939,
      "step": 673010
    },
    {
      "epoch": 2.358767309087468,
      "grad_norm": 2.875,
      "learning_rate": 1.1874758236822772e-05,
      "loss": 0.8273,
      "step": 673020
    },
    {
      "epoch": 2.3588023565943637,
      "grad_norm": 2.90625,
      "learning_rate": 1.1874109208159068e-05,
      "loss": 0.7407,
      "step": 673030
    },
    {
      "epoch": 2.3588374041012594,
      "grad_norm": 3.234375,
      "learning_rate": 1.1873460179495368e-05,
      "loss": 0.7306,
      "step": 673040
    },
    {
      "epoch": 2.3588724516081547,
      "grad_norm": 2.96875,
      "learning_rate": 1.1872811150831666e-05,
      "loss": 0.8141,
      "step": 673050
    },
    {
      "epoch": 2.3589074991150505,
      "grad_norm": 3.03125,
      "learning_rate": 1.1872162122167964e-05,
      "loss": 0.7259,
      "step": 673060
    },
    {
      "epoch": 2.358942546621946,
      "grad_norm": 3.375,
      "learning_rate": 1.1871513093504262e-05,
      "loss": 0.8648,
      "step": 673070
    },
    {
      "epoch": 2.3589775941288416,
      "grad_norm": 2.90625,
      "learning_rate": 1.187086406484056e-05,
      "loss": 0.8733,
      "step": 673080
    },
    {
      "epoch": 2.3590126416357373,
      "grad_norm": 3.171875,
      "learning_rate": 1.1870215036176858e-05,
      "loss": 0.8248,
      "step": 673090
    },
    {
      "epoch": 2.359047689142633,
      "grad_norm": 2.53125,
      "learning_rate": 1.1869566007513156e-05,
      "loss": 0.809,
      "step": 673100
    },
    {
      "epoch": 2.3590827366495284,
      "grad_norm": 2.890625,
      "learning_rate": 1.1868916978849456e-05,
      "loss": 0.8368,
      "step": 673110
    },
    {
      "epoch": 2.359117784156424,
      "grad_norm": 2.765625,
      "learning_rate": 1.1868267950185752e-05,
      "loss": 0.7455,
      "step": 673120
    },
    {
      "epoch": 2.3591528316633195,
      "grad_norm": 3.09375,
      "learning_rate": 1.186761892152205e-05,
      "loss": 0.7915,
      "step": 673130
    },
    {
      "epoch": 2.359187879170215,
      "grad_norm": 3.15625,
      "learning_rate": 1.1866969892858348e-05,
      "loss": 0.8741,
      "step": 673140
    },
    {
      "epoch": 2.359222926677111,
      "grad_norm": 3.203125,
      "learning_rate": 1.1866320864194646e-05,
      "loss": 0.8021,
      "step": 673150
    },
    {
      "epoch": 2.3592579741840063,
      "grad_norm": 2.703125,
      "learning_rate": 1.1865671835530944e-05,
      "loss": 0.8514,
      "step": 673160
    },
    {
      "epoch": 2.359293021690902,
      "grad_norm": 2.4375,
      "learning_rate": 1.1865022806867244e-05,
      "loss": 0.803,
      "step": 673170
    },
    {
      "epoch": 2.359328069197798,
      "grad_norm": 3.09375,
      "learning_rate": 1.1864373778203542e-05,
      "loss": 0.7706,
      "step": 673180
    },
    {
      "epoch": 2.359363116704693,
      "grad_norm": 2.921875,
      "learning_rate": 1.186372474953984e-05,
      "loss": 0.9186,
      "step": 673190
    },
    {
      "epoch": 2.359398164211589,
      "grad_norm": 3.234375,
      "learning_rate": 1.1863075720876138e-05,
      "loss": 0.8083,
      "step": 673200
    },
    {
      "epoch": 2.3594332117184846,
      "grad_norm": 2.53125,
      "learning_rate": 1.1862426692212436e-05,
      "loss": 0.8378,
      "step": 673210
    },
    {
      "epoch": 2.35946825922538,
      "grad_norm": 3.1875,
      "learning_rate": 1.1861777663548732e-05,
      "loss": 0.8643,
      "step": 673220
    },
    {
      "epoch": 2.3595033067322757,
      "grad_norm": 2.734375,
      "learning_rate": 1.1861128634885032e-05,
      "loss": 0.8275,
      "step": 673230
    },
    {
      "epoch": 2.359538354239171,
      "grad_norm": 3.03125,
      "learning_rate": 1.186047960622133e-05,
      "loss": 0.8057,
      "step": 673240
    },
    {
      "epoch": 2.359573401746067,
      "grad_norm": 3.15625,
      "learning_rate": 1.1859830577557628e-05,
      "loss": 0.7759,
      "step": 673250
    },
    {
      "epoch": 2.3596084492529625,
      "grad_norm": 3.125,
      "learning_rate": 1.1859181548893926e-05,
      "loss": 0.8457,
      "step": 673260
    },
    {
      "epoch": 2.359643496759858,
      "grad_norm": 2.21875,
      "learning_rate": 1.1858532520230224e-05,
      "loss": 0.8383,
      "step": 673270
    },
    {
      "epoch": 2.3596785442667536,
      "grad_norm": 2.34375,
      "learning_rate": 1.1857883491566522e-05,
      "loss": 0.7302,
      "step": 673280
    },
    {
      "epoch": 2.3597135917736494,
      "grad_norm": 2.828125,
      "learning_rate": 1.185723446290282e-05,
      "loss": 0.8176,
      "step": 673290
    },
    {
      "epoch": 2.3597486392805447,
      "grad_norm": 2.828125,
      "learning_rate": 1.185658543423912e-05,
      "loss": 0.7784,
      "step": 673300
    },
    {
      "epoch": 2.3597836867874404,
      "grad_norm": 2.671875,
      "learning_rate": 1.1855936405575416e-05,
      "loss": 0.8291,
      "step": 673310
    },
    {
      "epoch": 2.359818734294336,
      "grad_norm": 2.515625,
      "learning_rate": 1.1855287376911714e-05,
      "loss": 0.7423,
      "step": 673320
    },
    {
      "epoch": 2.3598537818012315,
      "grad_norm": 2.671875,
      "learning_rate": 1.1854638348248012e-05,
      "loss": 0.7754,
      "step": 673330
    },
    {
      "epoch": 2.3598888293081273,
      "grad_norm": 2.59375,
      "learning_rate": 1.185398931958431e-05,
      "loss": 0.7506,
      "step": 673340
    },
    {
      "epoch": 2.3599238768150226,
      "grad_norm": 3.046875,
      "learning_rate": 1.185334029092061e-05,
      "loss": 0.7576,
      "step": 673350
    },
    {
      "epoch": 2.3599589243219183,
      "grad_norm": 2.8125,
      "learning_rate": 1.1852691262256908e-05,
      "loss": 0.8305,
      "step": 673360
    },
    {
      "epoch": 2.359993971828814,
      "grad_norm": 2.625,
      "learning_rate": 1.1852042233593206e-05,
      "loss": 0.9155,
      "step": 673370
    },
    {
      "epoch": 2.3600290193357094,
      "grad_norm": 2.515625,
      "learning_rate": 1.1851393204929504e-05,
      "loss": 0.7472,
      "step": 673380
    },
    {
      "epoch": 2.360064066842605,
      "grad_norm": 3.0,
      "learning_rate": 1.1850744176265802e-05,
      "loss": 0.7992,
      "step": 673390
    },
    {
      "epoch": 2.360099114349501,
      "grad_norm": 2.234375,
      "learning_rate": 1.18500951476021e-05,
      "loss": 0.8196,
      "step": 673400
    },
    {
      "epoch": 2.3601341618563962,
      "grad_norm": 2.59375,
      "learning_rate": 1.1849446118938398e-05,
      "loss": 0.7706,
      "step": 673410
    },
    {
      "epoch": 2.360169209363292,
      "grad_norm": 2.84375,
      "learning_rate": 1.1848797090274696e-05,
      "loss": 0.7531,
      "step": 673420
    },
    {
      "epoch": 2.3602042568701878,
      "grad_norm": 3.546875,
      "learning_rate": 1.1848148061610994e-05,
      "loss": 0.8344,
      "step": 673430
    },
    {
      "epoch": 2.360239304377083,
      "grad_norm": 3.734375,
      "learning_rate": 1.1847499032947292e-05,
      "loss": 0.8101,
      "step": 673440
    },
    {
      "epoch": 2.360274351883979,
      "grad_norm": 2.84375,
      "learning_rate": 1.184685000428359e-05,
      "loss": 0.8419,
      "step": 673450
    },
    {
      "epoch": 2.360309399390874,
      "grad_norm": 2.984375,
      "learning_rate": 1.1846200975619888e-05,
      "loss": 0.7734,
      "step": 673460
    },
    {
      "epoch": 2.36034444689777,
      "grad_norm": 3.75,
      "learning_rate": 1.1845551946956186e-05,
      "loss": 0.8539,
      "step": 673470
    },
    {
      "epoch": 2.3603794944046657,
      "grad_norm": 3.203125,
      "learning_rate": 1.1844902918292485e-05,
      "loss": 0.8514,
      "step": 673480
    },
    {
      "epoch": 2.360414541911561,
      "grad_norm": 2.5625,
      "learning_rate": 1.1844253889628783e-05,
      "loss": 0.7863,
      "step": 673490
    },
    {
      "epoch": 2.3604495894184567,
      "grad_norm": 2.875,
      "learning_rate": 1.184360486096508e-05,
      "loss": 0.8931,
      "step": 673500
    },
    {
      "epoch": 2.3604846369253525,
      "grad_norm": 2.578125,
      "learning_rate": 1.1842955832301378e-05,
      "loss": 0.7864,
      "step": 673510
    },
    {
      "epoch": 2.360519684432248,
      "grad_norm": 2.8125,
      "learning_rate": 1.1842306803637676e-05,
      "loss": 0.7918,
      "step": 673520
    },
    {
      "epoch": 2.3605547319391436,
      "grad_norm": 2.703125,
      "learning_rate": 1.1841657774973974e-05,
      "loss": 0.7377,
      "step": 673530
    },
    {
      "epoch": 2.3605897794460393,
      "grad_norm": 3.375,
      "learning_rate": 1.1841008746310273e-05,
      "loss": 0.8233,
      "step": 673540
    },
    {
      "epoch": 2.3606248269529346,
      "grad_norm": 3.25,
      "learning_rate": 1.1840359717646571e-05,
      "loss": 0.7296,
      "step": 673550
    },
    {
      "epoch": 2.3606598744598304,
      "grad_norm": 2.953125,
      "learning_rate": 1.183971068898287e-05,
      "loss": 0.7845,
      "step": 673560
    },
    {
      "epoch": 2.3606949219667257,
      "grad_norm": 2.8125,
      "learning_rate": 1.1839061660319167e-05,
      "loss": 0.8223,
      "step": 673570
    },
    {
      "epoch": 2.3607299694736215,
      "grad_norm": 3.0,
      "learning_rate": 1.1838412631655465e-05,
      "loss": 0.7649,
      "step": 673580
    },
    {
      "epoch": 2.360765016980517,
      "grad_norm": 2.953125,
      "learning_rate": 1.1837763602991763e-05,
      "loss": 0.8237,
      "step": 673590
    },
    {
      "epoch": 2.3608000644874125,
      "grad_norm": 3.03125,
      "learning_rate": 1.1837114574328061e-05,
      "loss": 0.7679,
      "step": 673600
    },
    {
      "epoch": 2.3608351119943083,
      "grad_norm": 2.453125,
      "learning_rate": 1.183646554566436e-05,
      "loss": 0.7956,
      "step": 673610
    },
    {
      "epoch": 2.360870159501204,
      "grad_norm": 2.5,
      "learning_rate": 1.1835816517000657e-05,
      "loss": 0.7571,
      "step": 673620
    },
    {
      "epoch": 2.3609052070080994,
      "grad_norm": 3.109375,
      "learning_rate": 1.1835167488336955e-05,
      "loss": 0.7865,
      "step": 673630
    },
    {
      "epoch": 2.360940254514995,
      "grad_norm": 2.921875,
      "learning_rate": 1.1834518459673253e-05,
      "loss": 0.8441,
      "step": 673640
    },
    {
      "epoch": 2.360975302021891,
      "grad_norm": 3.15625,
      "learning_rate": 1.1833869431009551e-05,
      "loss": 0.8757,
      "step": 673650
    },
    {
      "epoch": 2.361010349528786,
      "grad_norm": 2.640625,
      "learning_rate": 1.1833220402345851e-05,
      "loss": 0.7224,
      "step": 673660
    },
    {
      "epoch": 2.361045397035682,
      "grad_norm": 2.5,
      "learning_rate": 1.1832571373682149e-05,
      "loss": 0.7736,
      "step": 673670
    },
    {
      "epoch": 2.3610804445425773,
      "grad_norm": 3.0625,
      "learning_rate": 1.1831922345018447e-05,
      "loss": 0.6828,
      "step": 673680
    },
    {
      "epoch": 2.361115492049473,
      "grad_norm": 2.90625,
      "learning_rate": 1.1831273316354743e-05,
      "loss": 0.8684,
      "step": 673690
    },
    {
      "epoch": 2.3611505395563688,
      "grad_norm": 2.953125,
      "learning_rate": 1.1830624287691041e-05,
      "loss": 0.8189,
      "step": 673700
    },
    {
      "epoch": 2.361185587063264,
      "grad_norm": 2.96875,
      "learning_rate": 1.182997525902734e-05,
      "loss": 0.87,
      "step": 673710
    },
    {
      "epoch": 2.36122063457016,
      "grad_norm": 3.3125,
      "learning_rate": 1.1829326230363639e-05,
      "loss": 0.7958,
      "step": 673720
    },
    {
      "epoch": 2.3612556820770556,
      "grad_norm": 2.75,
      "learning_rate": 1.1828677201699937e-05,
      "loss": 0.8552,
      "step": 673730
    },
    {
      "epoch": 2.361290729583951,
      "grad_norm": 2.859375,
      "learning_rate": 1.1828028173036235e-05,
      "loss": 0.8011,
      "step": 673740
    },
    {
      "epoch": 2.3613257770908467,
      "grad_norm": 3.21875,
      "learning_rate": 1.1827379144372533e-05,
      "loss": 0.7626,
      "step": 673750
    },
    {
      "epoch": 2.3613608245977424,
      "grad_norm": 2.765625,
      "learning_rate": 1.1826730115708831e-05,
      "loss": 0.8551,
      "step": 673760
    },
    {
      "epoch": 2.3613958721046377,
      "grad_norm": 2.4375,
      "learning_rate": 1.1826081087045129e-05,
      "loss": 0.8176,
      "step": 673770
    },
    {
      "epoch": 2.3614309196115335,
      "grad_norm": 2.59375,
      "learning_rate": 1.1825432058381427e-05,
      "loss": 0.7321,
      "step": 673780
    },
    {
      "epoch": 2.361465967118429,
      "grad_norm": 2.75,
      "learning_rate": 1.1824783029717725e-05,
      "loss": 0.7422,
      "step": 673790
    },
    {
      "epoch": 2.3615010146253246,
      "grad_norm": 2.828125,
      "learning_rate": 1.1824134001054023e-05,
      "loss": 0.8528,
      "step": 673800
    },
    {
      "epoch": 2.3615360621322203,
      "grad_norm": 2.953125,
      "learning_rate": 1.1823484972390321e-05,
      "loss": 0.7839,
      "step": 673810
    },
    {
      "epoch": 2.3615711096391157,
      "grad_norm": 3.078125,
      "learning_rate": 1.1822835943726619e-05,
      "loss": 0.8006,
      "step": 673820
    },
    {
      "epoch": 2.3616061571460114,
      "grad_norm": 2.234375,
      "learning_rate": 1.1822186915062917e-05,
      "loss": 0.7448,
      "step": 673830
    },
    {
      "epoch": 2.361641204652907,
      "grad_norm": 3.265625,
      "learning_rate": 1.1821537886399215e-05,
      "loss": 0.8153,
      "step": 673840
    },
    {
      "epoch": 2.3616762521598025,
      "grad_norm": 2.765625,
      "learning_rate": 1.1820888857735515e-05,
      "loss": 0.7644,
      "step": 673850
    },
    {
      "epoch": 2.3617112996666982,
      "grad_norm": 2.40625,
      "learning_rate": 1.1820239829071813e-05,
      "loss": 0.7599,
      "step": 673860
    },
    {
      "epoch": 2.361746347173594,
      "grad_norm": 3.09375,
      "learning_rate": 1.181959080040811e-05,
      "loss": 0.8623,
      "step": 673870
    },
    {
      "epoch": 2.3617813946804893,
      "grad_norm": 2.609375,
      "learning_rate": 1.1818941771744407e-05,
      "loss": 0.7205,
      "step": 673880
    },
    {
      "epoch": 2.361816442187385,
      "grad_norm": 3.015625,
      "learning_rate": 1.1818292743080705e-05,
      "loss": 0.8299,
      "step": 673890
    },
    {
      "epoch": 2.3618514896942804,
      "grad_norm": 2.75,
      "learning_rate": 1.1817643714417005e-05,
      "loss": 0.7673,
      "step": 673900
    },
    {
      "epoch": 2.361886537201176,
      "grad_norm": 3.0,
      "learning_rate": 1.1816994685753303e-05,
      "loss": 0.8395,
      "step": 673910
    },
    {
      "epoch": 2.361921584708072,
      "grad_norm": 3.1875,
      "learning_rate": 1.18163456570896e-05,
      "loss": 0.8117,
      "step": 673920
    },
    {
      "epoch": 2.361956632214967,
      "grad_norm": 3.0,
      "learning_rate": 1.1815696628425899e-05,
      "loss": 0.8405,
      "step": 673930
    },
    {
      "epoch": 2.361991679721863,
      "grad_norm": 3.09375,
      "learning_rate": 1.1815047599762197e-05,
      "loss": 0.8611,
      "step": 673940
    },
    {
      "epoch": 2.3620267272287587,
      "grad_norm": 3.3125,
      "learning_rate": 1.1814398571098495e-05,
      "loss": 0.7887,
      "step": 673950
    },
    {
      "epoch": 2.362061774735654,
      "grad_norm": 2.453125,
      "learning_rate": 1.1813749542434793e-05,
      "loss": 0.7532,
      "step": 673960
    },
    {
      "epoch": 2.36209682224255,
      "grad_norm": 2.46875,
      "learning_rate": 1.181310051377109e-05,
      "loss": 0.7879,
      "step": 673970
    },
    {
      "epoch": 2.3621318697494456,
      "grad_norm": 2.65625,
      "learning_rate": 1.1812451485107389e-05,
      "loss": 0.826,
      "step": 673980
    },
    {
      "epoch": 2.362166917256341,
      "grad_norm": 3.125,
      "learning_rate": 1.1811802456443687e-05,
      "loss": 0.8105,
      "step": 673990
    },
    {
      "epoch": 2.3622019647632366,
      "grad_norm": 2.96875,
      "learning_rate": 1.1811153427779985e-05,
      "loss": 0.8063,
      "step": 674000
    },
    {
      "epoch": 2.362237012270132,
      "grad_norm": 2.875,
      "learning_rate": 1.1810504399116283e-05,
      "loss": 0.8092,
      "step": 674010
    },
    {
      "epoch": 2.3622720597770277,
      "grad_norm": 3.0,
      "learning_rate": 1.180985537045258e-05,
      "loss": 0.6753,
      "step": 674020
    },
    {
      "epoch": 2.3623071072839235,
      "grad_norm": 3.15625,
      "learning_rate": 1.180920634178888e-05,
      "loss": 0.8916,
      "step": 674030
    },
    {
      "epoch": 2.3623421547908188,
      "grad_norm": 2.8125,
      "learning_rate": 1.1808557313125178e-05,
      "loss": 0.7331,
      "step": 674040
    },
    {
      "epoch": 2.3623772022977145,
      "grad_norm": 3.421875,
      "learning_rate": 1.1807908284461476e-05,
      "loss": 0.9123,
      "step": 674050
    },
    {
      "epoch": 2.3624122498046103,
      "grad_norm": 2.90625,
      "learning_rate": 1.1807259255797774e-05,
      "loss": 0.8331,
      "step": 674060
    },
    {
      "epoch": 2.3624472973115056,
      "grad_norm": 3.046875,
      "learning_rate": 1.180661022713407e-05,
      "loss": 0.834,
      "step": 674070
    },
    {
      "epoch": 2.3624823448184014,
      "grad_norm": 2.890625,
      "learning_rate": 1.1805961198470369e-05,
      "loss": 0.8388,
      "step": 674080
    },
    {
      "epoch": 2.362517392325297,
      "grad_norm": 2.84375,
      "learning_rate": 1.1805312169806668e-05,
      "loss": 0.7388,
      "step": 674090
    },
    {
      "epoch": 2.3625524398321924,
      "grad_norm": 3.1875,
      "learning_rate": 1.1804663141142966e-05,
      "loss": 0.8078,
      "step": 674100
    },
    {
      "epoch": 2.362587487339088,
      "grad_norm": 2.703125,
      "learning_rate": 1.1804014112479264e-05,
      "loss": 0.7976,
      "step": 674110
    },
    {
      "epoch": 2.3626225348459835,
      "grad_norm": 3.0625,
      "learning_rate": 1.1803365083815562e-05,
      "loss": 0.8937,
      "step": 674120
    },
    {
      "epoch": 2.3626575823528793,
      "grad_norm": 3.109375,
      "learning_rate": 1.180271605515186e-05,
      "loss": 0.8199,
      "step": 674130
    },
    {
      "epoch": 2.362692629859775,
      "grad_norm": 2.953125,
      "learning_rate": 1.1802067026488158e-05,
      "loss": 0.7279,
      "step": 674140
    },
    {
      "epoch": 2.3627276773666703,
      "grad_norm": 2.90625,
      "learning_rate": 1.1801417997824456e-05,
      "loss": 0.7195,
      "step": 674150
    },
    {
      "epoch": 2.362762724873566,
      "grad_norm": 3.0625,
      "learning_rate": 1.1800768969160754e-05,
      "loss": 0.8513,
      "step": 674160
    },
    {
      "epoch": 2.362797772380462,
      "grad_norm": 3.015625,
      "learning_rate": 1.1800119940497052e-05,
      "loss": 0.7462,
      "step": 674170
    },
    {
      "epoch": 2.362832819887357,
      "grad_norm": 3.109375,
      "learning_rate": 1.179947091183335e-05,
      "loss": 0.8062,
      "step": 674180
    },
    {
      "epoch": 2.362867867394253,
      "grad_norm": 2.765625,
      "learning_rate": 1.1798821883169648e-05,
      "loss": 0.8119,
      "step": 674190
    },
    {
      "epoch": 2.3629029149011487,
      "grad_norm": 2.796875,
      "learning_rate": 1.1798172854505946e-05,
      "loss": 0.8202,
      "step": 674200
    },
    {
      "epoch": 2.362937962408044,
      "grad_norm": 3.25,
      "learning_rate": 1.1797523825842246e-05,
      "loss": 0.8451,
      "step": 674210
    },
    {
      "epoch": 2.3629730099149397,
      "grad_norm": 3.3125,
      "learning_rate": 1.1796874797178544e-05,
      "loss": 0.7764,
      "step": 674220
    },
    {
      "epoch": 2.363008057421835,
      "grad_norm": 3.203125,
      "learning_rate": 1.1796225768514842e-05,
      "loss": 0.839,
      "step": 674230
    },
    {
      "epoch": 2.363043104928731,
      "grad_norm": 3.25,
      "learning_rate": 1.179557673985114e-05,
      "loss": 0.9178,
      "step": 674240
    },
    {
      "epoch": 2.3630781524356266,
      "grad_norm": 2.828125,
      "learning_rate": 1.1794927711187436e-05,
      "loss": 0.8003,
      "step": 674250
    },
    {
      "epoch": 2.3631131999425223,
      "grad_norm": 2.734375,
      "learning_rate": 1.1794278682523734e-05,
      "loss": 0.8333,
      "step": 674260
    },
    {
      "epoch": 2.3631482474494176,
      "grad_norm": 3.234375,
      "learning_rate": 1.1793629653860034e-05,
      "loss": 0.7893,
      "step": 674270
    },
    {
      "epoch": 2.3631832949563134,
      "grad_norm": 2.46875,
      "learning_rate": 1.1792980625196332e-05,
      "loss": 0.7711,
      "step": 674280
    },
    {
      "epoch": 2.3632183424632087,
      "grad_norm": 3.03125,
      "learning_rate": 1.179233159653263e-05,
      "loss": 0.835,
      "step": 674290
    },
    {
      "epoch": 2.3632533899701045,
      "grad_norm": 2.625,
      "learning_rate": 1.1791682567868928e-05,
      "loss": 0.8471,
      "step": 674300
    },
    {
      "epoch": 2.3632884374770002,
      "grad_norm": 2.890625,
      "learning_rate": 1.1791033539205226e-05,
      "loss": 0.8474,
      "step": 674310
    },
    {
      "epoch": 2.3633234849838956,
      "grad_norm": 2.671875,
      "learning_rate": 1.1790384510541524e-05,
      "loss": 0.8131,
      "step": 674320
    },
    {
      "epoch": 2.3633585324907913,
      "grad_norm": 2.640625,
      "learning_rate": 1.1789735481877822e-05,
      "loss": 0.7279,
      "step": 674330
    },
    {
      "epoch": 2.3633935799976866,
      "grad_norm": 3.03125,
      "learning_rate": 1.1789086453214122e-05,
      "loss": 0.7527,
      "step": 674340
    },
    {
      "epoch": 2.3634286275045824,
      "grad_norm": 2.9375,
      "learning_rate": 1.1788437424550418e-05,
      "loss": 0.7964,
      "step": 674350
    },
    {
      "epoch": 2.363463675011478,
      "grad_norm": 3.171875,
      "learning_rate": 1.1787788395886716e-05,
      "loss": 0.7283,
      "step": 674360
    },
    {
      "epoch": 2.363498722518374,
      "grad_norm": 2.8125,
      "learning_rate": 1.1787139367223014e-05,
      "loss": 0.8552,
      "step": 674370
    },
    {
      "epoch": 2.363533770025269,
      "grad_norm": 2.9375,
      "learning_rate": 1.1786490338559312e-05,
      "loss": 0.8182,
      "step": 674380
    },
    {
      "epoch": 2.363568817532165,
      "grad_norm": 2.890625,
      "learning_rate": 1.178584130989561e-05,
      "loss": 0.7751,
      "step": 674390
    },
    {
      "epoch": 2.3636038650390603,
      "grad_norm": 2.453125,
      "learning_rate": 1.178519228123191e-05,
      "loss": 0.7243,
      "step": 674400
    },
    {
      "epoch": 2.363638912545956,
      "grad_norm": 2.75,
      "learning_rate": 1.1784543252568208e-05,
      "loss": 0.8285,
      "step": 674410
    },
    {
      "epoch": 2.363673960052852,
      "grad_norm": 3.234375,
      "learning_rate": 1.1783894223904506e-05,
      "loss": 0.8724,
      "step": 674420
    },
    {
      "epoch": 2.363709007559747,
      "grad_norm": 3.53125,
      "learning_rate": 1.1783245195240804e-05,
      "loss": 0.7554,
      "step": 674430
    },
    {
      "epoch": 2.363744055066643,
      "grad_norm": 2.5625,
      "learning_rate": 1.17825961665771e-05,
      "loss": 0.7943,
      "step": 674440
    },
    {
      "epoch": 2.363779102573538,
      "grad_norm": 2.671875,
      "learning_rate": 1.17819471379134e-05,
      "loss": 0.826,
      "step": 674450
    },
    {
      "epoch": 2.363814150080434,
      "grad_norm": 2.90625,
      "learning_rate": 1.1781298109249698e-05,
      "loss": 0.7384,
      "step": 674460
    },
    {
      "epoch": 2.3638491975873297,
      "grad_norm": 2.875,
      "learning_rate": 1.1780649080585996e-05,
      "loss": 0.7699,
      "step": 674470
    },
    {
      "epoch": 2.3638842450942255,
      "grad_norm": 2.65625,
      "learning_rate": 1.1780000051922294e-05,
      "loss": 0.704,
      "step": 674480
    },
    {
      "epoch": 2.3639192926011208,
      "grad_norm": 2.765625,
      "learning_rate": 1.1779351023258592e-05,
      "loss": 0.7395,
      "step": 674490
    },
    {
      "epoch": 2.3639543401080165,
      "grad_norm": 3.046875,
      "learning_rate": 1.177870199459489e-05,
      "loss": 0.7592,
      "step": 674500
    },
    {
      "epoch": 2.363989387614912,
      "grad_norm": 2.875,
      "learning_rate": 1.1778052965931188e-05,
      "loss": 0.798,
      "step": 674510
    },
    {
      "epoch": 2.3640244351218076,
      "grad_norm": 2.671875,
      "learning_rate": 1.1777403937267487e-05,
      "loss": 0.7876,
      "step": 674520
    },
    {
      "epoch": 2.3640594826287034,
      "grad_norm": 3.109375,
      "learning_rate": 1.1776754908603785e-05,
      "loss": 0.8084,
      "step": 674530
    },
    {
      "epoch": 2.3640945301355987,
      "grad_norm": 3.09375,
      "learning_rate": 1.1776105879940082e-05,
      "loss": 0.8594,
      "step": 674540
    },
    {
      "epoch": 2.3641295776424944,
      "grad_norm": 3.015625,
      "learning_rate": 1.177545685127638e-05,
      "loss": 0.8274,
      "step": 674550
    },
    {
      "epoch": 2.36416462514939,
      "grad_norm": 2.46875,
      "learning_rate": 1.1774807822612678e-05,
      "loss": 0.8367,
      "step": 674560
    },
    {
      "epoch": 2.3641996726562855,
      "grad_norm": 2.375,
      "learning_rate": 1.1774158793948976e-05,
      "loss": 0.8443,
      "step": 674570
    },
    {
      "epoch": 2.3642347201631813,
      "grad_norm": 2.90625,
      "learning_rate": 1.1773509765285275e-05,
      "loss": 0.8042,
      "step": 674580
    },
    {
      "epoch": 2.364269767670077,
      "grad_norm": 2.75,
      "learning_rate": 1.1772860736621573e-05,
      "loss": 0.7952,
      "step": 674590
    },
    {
      "epoch": 2.3643048151769723,
      "grad_norm": 2.78125,
      "learning_rate": 1.1772211707957871e-05,
      "loss": 0.8868,
      "step": 674600
    },
    {
      "epoch": 2.364339862683868,
      "grad_norm": 2.875,
      "learning_rate": 1.177156267929417e-05,
      "loss": 0.7158,
      "step": 674610
    },
    {
      "epoch": 2.3643749101907634,
      "grad_norm": 2.984375,
      "learning_rate": 1.1770913650630467e-05,
      "loss": 0.8896,
      "step": 674620
    },
    {
      "epoch": 2.364409957697659,
      "grad_norm": 2.875,
      "learning_rate": 1.1770264621966764e-05,
      "loss": 0.7886,
      "step": 674630
    },
    {
      "epoch": 2.364445005204555,
      "grad_norm": 2.984375,
      "learning_rate": 1.1769615593303063e-05,
      "loss": 0.8068,
      "step": 674640
    },
    {
      "epoch": 2.3644800527114502,
      "grad_norm": 2.671875,
      "learning_rate": 1.1768966564639361e-05,
      "loss": 0.7501,
      "step": 674650
    },
    {
      "epoch": 2.364515100218346,
      "grad_norm": 2.625,
      "learning_rate": 1.176831753597566e-05,
      "loss": 0.7624,
      "step": 674660
    },
    {
      "epoch": 2.3645501477252417,
      "grad_norm": 2.921875,
      "learning_rate": 1.1767668507311957e-05,
      "loss": 0.7851,
      "step": 674670
    },
    {
      "epoch": 2.364585195232137,
      "grad_norm": 2.71875,
      "learning_rate": 1.1767019478648255e-05,
      "loss": 0.8762,
      "step": 674680
    },
    {
      "epoch": 2.364620242739033,
      "grad_norm": 2.59375,
      "learning_rate": 1.1766370449984553e-05,
      "loss": 0.7477,
      "step": 674690
    },
    {
      "epoch": 2.3646552902459286,
      "grad_norm": 2.5625,
      "learning_rate": 1.1765721421320851e-05,
      "loss": 0.7448,
      "step": 674700
    },
    {
      "epoch": 2.364690337752824,
      "grad_norm": 3.1875,
      "learning_rate": 1.1765072392657151e-05,
      "loss": 0.8507,
      "step": 674710
    },
    {
      "epoch": 2.3647253852597196,
      "grad_norm": 2.96875,
      "learning_rate": 1.1764423363993447e-05,
      "loss": 0.755,
      "step": 674720
    },
    {
      "epoch": 2.364760432766615,
      "grad_norm": 2.59375,
      "learning_rate": 1.1763774335329745e-05,
      "loss": 0.7183,
      "step": 674730
    },
    {
      "epoch": 2.3647954802735107,
      "grad_norm": 2.9375,
      "learning_rate": 1.1763125306666043e-05,
      "loss": 0.8904,
      "step": 674740
    },
    {
      "epoch": 2.3648305277804065,
      "grad_norm": 2.859375,
      "learning_rate": 1.1762476278002341e-05,
      "loss": 0.7366,
      "step": 674750
    },
    {
      "epoch": 2.364865575287302,
      "grad_norm": 3.4375,
      "learning_rate": 1.1761827249338641e-05,
      "loss": 0.8508,
      "step": 674760
    },
    {
      "epoch": 2.3649006227941975,
      "grad_norm": 2.90625,
      "learning_rate": 1.1761178220674939e-05,
      "loss": 0.8426,
      "step": 674770
    },
    {
      "epoch": 2.3649356703010933,
      "grad_norm": 2.96875,
      "learning_rate": 1.1760529192011237e-05,
      "loss": 0.8855,
      "step": 674780
    },
    {
      "epoch": 2.3649707178079886,
      "grad_norm": 3.171875,
      "learning_rate": 1.1759880163347535e-05,
      "loss": 0.8023,
      "step": 674790
    },
    {
      "epoch": 2.3650057653148844,
      "grad_norm": 2.515625,
      "learning_rate": 1.1759231134683833e-05,
      "loss": 0.7587,
      "step": 674800
    },
    {
      "epoch": 2.36504081282178,
      "grad_norm": 3.171875,
      "learning_rate": 1.1758582106020131e-05,
      "loss": 0.7185,
      "step": 674810
    },
    {
      "epoch": 2.3650758603286755,
      "grad_norm": 3.3125,
      "learning_rate": 1.1757933077356429e-05,
      "loss": 0.8206,
      "step": 674820
    },
    {
      "epoch": 2.365110907835571,
      "grad_norm": 3.09375,
      "learning_rate": 1.1757284048692727e-05,
      "loss": 0.8406,
      "step": 674830
    },
    {
      "epoch": 2.3651459553424665,
      "grad_norm": 3.015625,
      "learning_rate": 1.1756635020029025e-05,
      "loss": 0.8393,
      "step": 674840
    },
    {
      "epoch": 2.3651810028493623,
      "grad_norm": 2.734375,
      "learning_rate": 1.1755985991365323e-05,
      "loss": 0.7678,
      "step": 674850
    },
    {
      "epoch": 2.365216050356258,
      "grad_norm": 3.015625,
      "learning_rate": 1.1755336962701621e-05,
      "loss": 0.8394,
      "step": 674860
    },
    {
      "epoch": 2.3652510978631534,
      "grad_norm": 2.6875,
      "learning_rate": 1.1754687934037919e-05,
      "loss": 0.7944,
      "step": 674870
    },
    {
      "epoch": 2.365286145370049,
      "grad_norm": 2.984375,
      "learning_rate": 1.1754038905374217e-05,
      "loss": 0.785,
      "step": 674880
    },
    {
      "epoch": 2.365321192876945,
      "grad_norm": 2.609375,
      "learning_rate": 1.1753389876710517e-05,
      "loss": 0.7865,
      "step": 674890
    },
    {
      "epoch": 2.36535624038384,
      "grad_norm": 2.6875,
      "learning_rate": 1.1752740848046815e-05,
      "loss": 0.7837,
      "step": 674900
    },
    {
      "epoch": 2.365391287890736,
      "grad_norm": 2.5625,
      "learning_rate": 1.1752091819383111e-05,
      "loss": 0.832,
      "step": 674910
    },
    {
      "epoch": 2.3654263353976317,
      "grad_norm": 3.015625,
      "learning_rate": 1.1751442790719409e-05,
      "loss": 0.8678,
      "step": 674920
    },
    {
      "epoch": 2.365461382904527,
      "grad_norm": 2.46875,
      "learning_rate": 1.1750793762055707e-05,
      "loss": 0.8422,
      "step": 674930
    },
    {
      "epoch": 2.3654964304114228,
      "grad_norm": 2.96875,
      "learning_rate": 1.1750144733392005e-05,
      "loss": 0.8229,
      "step": 674940
    },
    {
      "epoch": 2.365531477918318,
      "grad_norm": 2.8125,
      "learning_rate": 1.1749495704728305e-05,
      "loss": 0.7557,
      "step": 674950
    },
    {
      "epoch": 2.365566525425214,
      "grad_norm": 2.9375,
      "learning_rate": 1.1748846676064603e-05,
      "loss": 0.7894,
      "step": 674960
    },
    {
      "epoch": 2.3656015729321096,
      "grad_norm": 2.84375,
      "learning_rate": 1.17481976474009e-05,
      "loss": 0.8035,
      "step": 674970
    },
    {
      "epoch": 2.365636620439005,
      "grad_norm": 2.3125,
      "learning_rate": 1.1747548618737199e-05,
      "loss": 0.8173,
      "step": 674980
    },
    {
      "epoch": 2.3656716679459007,
      "grad_norm": 3.046875,
      "learning_rate": 1.1746899590073497e-05,
      "loss": 0.869,
      "step": 674990
    },
    {
      "epoch": 2.3657067154527964,
      "grad_norm": 2.90625,
      "learning_rate": 1.1746250561409795e-05,
      "loss": 0.7963,
      "step": 675000
    },
    {
      "epoch": 2.3657067154527964,
      "eval_loss": 0.7604450583457947,
      "eval_runtime": 555.046,
      "eval_samples_per_second": 685.413,
      "eval_steps_per_second": 57.118,
      "step": 675000
    },
    {
      "epoch": 2.3657417629596917,
      "grad_norm": 3.109375,
      "learning_rate": 1.1745601532746093e-05,
      "loss": 0.8176,
      "step": 675010
    },
    {
      "epoch": 2.3657768104665875,
      "grad_norm": 2.484375,
      "learning_rate": 1.174495250408239e-05,
      "loss": 0.7966,
      "step": 675020
    },
    {
      "epoch": 2.3658118579734833,
      "grad_norm": 2.609375,
      "learning_rate": 1.1744303475418689e-05,
      "loss": 0.8003,
      "step": 675030
    },
    {
      "epoch": 2.3658469054803786,
      "grad_norm": 2.671875,
      "learning_rate": 1.1743654446754987e-05,
      "loss": 0.8116,
      "step": 675040
    },
    {
      "epoch": 2.3658819529872743,
      "grad_norm": 2.46875,
      "learning_rate": 1.1743005418091285e-05,
      "loss": 0.7397,
      "step": 675050
    },
    {
      "epoch": 2.3659170004941696,
      "grad_norm": 2.984375,
      "learning_rate": 1.1742356389427583e-05,
      "loss": 0.8067,
      "step": 675060
    },
    {
      "epoch": 2.3659520480010654,
      "grad_norm": 2.53125,
      "learning_rate": 1.1741707360763882e-05,
      "loss": 0.8393,
      "step": 675070
    },
    {
      "epoch": 2.365987095507961,
      "grad_norm": 2.625,
      "learning_rate": 1.174105833210018e-05,
      "loss": 0.7299,
      "step": 675080
    },
    {
      "epoch": 2.3660221430148565,
      "grad_norm": 2.578125,
      "learning_rate": 1.1740409303436478e-05,
      "loss": 0.8237,
      "step": 675090
    },
    {
      "epoch": 2.3660571905217522,
      "grad_norm": 2.484375,
      "learning_rate": 1.1739760274772775e-05,
      "loss": 0.794,
      "step": 675100
    },
    {
      "epoch": 2.366092238028648,
      "grad_norm": 2.765625,
      "learning_rate": 1.1739111246109073e-05,
      "loss": 0.804,
      "step": 675110
    },
    {
      "epoch": 2.3661272855355433,
      "grad_norm": 3.21875,
      "learning_rate": 1.173846221744537e-05,
      "loss": 0.8181,
      "step": 675120
    },
    {
      "epoch": 2.366162333042439,
      "grad_norm": 3.046875,
      "learning_rate": 1.173781318878167e-05,
      "loss": 0.8939,
      "step": 675130
    },
    {
      "epoch": 2.366197380549335,
      "grad_norm": 3.015625,
      "learning_rate": 1.1737164160117968e-05,
      "loss": 0.7618,
      "step": 675140
    },
    {
      "epoch": 2.36623242805623,
      "grad_norm": 3.125,
      "learning_rate": 1.1736515131454266e-05,
      "loss": 0.8151,
      "step": 675150
    },
    {
      "epoch": 2.366267475563126,
      "grad_norm": 2.703125,
      "learning_rate": 1.1735866102790564e-05,
      "loss": 0.8488,
      "step": 675160
    },
    {
      "epoch": 2.366302523070021,
      "grad_norm": 3.1875,
      "learning_rate": 1.1735217074126862e-05,
      "loss": 0.8027,
      "step": 675170
    },
    {
      "epoch": 2.366337570576917,
      "grad_norm": 3.265625,
      "learning_rate": 1.173456804546316e-05,
      "loss": 0.8116,
      "step": 675180
    },
    {
      "epoch": 2.3663726180838127,
      "grad_norm": 2.734375,
      "learning_rate": 1.1733919016799458e-05,
      "loss": 0.7275,
      "step": 675190
    },
    {
      "epoch": 2.366407665590708,
      "grad_norm": 3.046875,
      "learning_rate": 1.1733269988135756e-05,
      "loss": 0.8453,
      "step": 675200
    },
    {
      "epoch": 2.366442713097604,
      "grad_norm": 3.0625,
      "learning_rate": 1.1732620959472054e-05,
      "loss": 0.8171,
      "step": 675210
    },
    {
      "epoch": 2.3664777606044995,
      "grad_norm": 2.90625,
      "learning_rate": 1.1731971930808352e-05,
      "loss": 0.8246,
      "step": 675220
    },
    {
      "epoch": 2.366512808111395,
      "grad_norm": 3.09375,
      "learning_rate": 1.173132290214465e-05,
      "loss": 0.7973,
      "step": 675230
    },
    {
      "epoch": 2.3665478556182906,
      "grad_norm": 2.78125,
      "learning_rate": 1.1730673873480948e-05,
      "loss": 0.8766,
      "step": 675240
    },
    {
      "epoch": 2.3665829031251864,
      "grad_norm": 2.953125,
      "learning_rate": 1.1730024844817246e-05,
      "loss": 0.862,
      "step": 675250
    },
    {
      "epoch": 2.3666179506320817,
      "grad_norm": 3.171875,
      "learning_rate": 1.1729375816153546e-05,
      "loss": 0.7875,
      "step": 675260
    },
    {
      "epoch": 2.3666529981389774,
      "grad_norm": 3.078125,
      "learning_rate": 1.1728726787489844e-05,
      "loss": 0.8127,
      "step": 675270
    },
    {
      "epoch": 2.3666880456458728,
      "grad_norm": 3.09375,
      "learning_rate": 1.1728077758826142e-05,
      "loss": 0.8463,
      "step": 675280
    },
    {
      "epoch": 2.3667230931527685,
      "grad_norm": 2.4375,
      "learning_rate": 1.1727428730162438e-05,
      "loss": 0.7428,
      "step": 675290
    },
    {
      "epoch": 2.3667581406596643,
      "grad_norm": 3.125,
      "learning_rate": 1.1726779701498736e-05,
      "loss": 0.7694,
      "step": 675300
    },
    {
      "epoch": 2.3667931881665596,
      "grad_norm": 3.0625,
      "learning_rate": 1.1726130672835036e-05,
      "loss": 0.8578,
      "step": 675310
    },
    {
      "epoch": 2.3668282356734553,
      "grad_norm": 3.21875,
      "learning_rate": 1.1725481644171334e-05,
      "loss": 0.7783,
      "step": 675320
    },
    {
      "epoch": 2.366863283180351,
      "grad_norm": 2.6875,
      "learning_rate": 1.1724832615507632e-05,
      "loss": 0.8298,
      "step": 675330
    },
    {
      "epoch": 2.3668983306872464,
      "grad_norm": 3.109375,
      "learning_rate": 1.172418358684393e-05,
      "loss": 0.7971,
      "step": 675340
    },
    {
      "epoch": 2.366933378194142,
      "grad_norm": 2.96875,
      "learning_rate": 1.1723534558180228e-05,
      "loss": 0.8534,
      "step": 675350
    },
    {
      "epoch": 2.366968425701038,
      "grad_norm": 3.109375,
      "learning_rate": 1.1722885529516526e-05,
      "loss": 0.8594,
      "step": 675360
    },
    {
      "epoch": 2.3670034732079333,
      "grad_norm": 2.453125,
      "learning_rate": 1.1722236500852824e-05,
      "loss": 0.8351,
      "step": 675370
    },
    {
      "epoch": 2.367038520714829,
      "grad_norm": 2.96875,
      "learning_rate": 1.1721587472189122e-05,
      "loss": 0.8117,
      "step": 675380
    },
    {
      "epoch": 2.3670735682217243,
      "grad_norm": 2.78125,
      "learning_rate": 1.172093844352542e-05,
      "loss": 0.7632,
      "step": 675390
    },
    {
      "epoch": 2.36710861572862,
      "grad_norm": 3.171875,
      "learning_rate": 1.1720289414861718e-05,
      "loss": 0.7938,
      "step": 675400
    },
    {
      "epoch": 2.367143663235516,
      "grad_norm": 2.8125,
      "learning_rate": 1.1719640386198016e-05,
      "loss": 0.8206,
      "step": 675410
    },
    {
      "epoch": 2.367178710742411,
      "grad_norm": 2.828125,
      "learning_rate": 1.1718991357534314e-05,
      "loss": 0.8339,
      "step": 675420
    },
    {
      "epoch": 2.367213758249307,
      "grad_norm": 2.984375,
      "learning_rate": 1.1718342328870612e-05,
      "loss": 0.8482,
      "step": 675430
    },
    {
      "epoch": 2.3672488057562027,
      "grad_norm": 2.78125,
      "learning_rate": 1.1717693300206912e-05,
      "loss": 0.7997,
      "step": 675440
    },
    {
      "epoch": 2.367283853263098,
      "grad_norm": 3.09375,
      "learning_rate": 1.171704427154321e-05,
      "loss": 0.8098,
      "step": 675450
    },
    {
      "epoch": 2.3673189007699937,
      "grad_norm": 3.0625,
      "learning_rate": 1.1716395242879508e-05,
      "loss": 0.8514,
      "step": 675460
    },
    {
      "epoch": 2.3673539482768895,
      "grad_norm": 2.6875,
      "learning_rate": 1.1715746214215806e-05,
      "loss": 0.7134,
      "step": 675470
    },
    {
      "epoch": 2.367388995783785,
      "grad_norm": 3.109375,
      "learning_rate": 1.1715097185552102e-05,
      "loss": 0.8414,
      "step": 675480
    },
    {
      "epoch": 2.3674240432906806,
      "grad_norm": 2.640625,
      "learning_rate": 1.17144481568884e-05,
      "loss": 0.7294,
      "step": 675490
    },
    {
      "epoch": 2.367459090797576,
      "grad_norm": 2.921875,
      "learning_rate": 1.17137991282247e-05,
      "loss": 0.8285,
      "step": 675500
    },
    {
      "epoch": 2.3674941383044716,
      "grad_norm": 3.078125,
      "learning_rate": 1.1713150099560998e-05,
      "loss": 0.7528,
      "step": 675510
    },
    {
      "epoch": 2.3675291858113674,
      "grad_norm": 3.046875,
      "learning_rate": 1.1712501070897296e-05,
      "loss": 0.747,
      "step": 675520
    },
    {
      "epoch": 2.3675642333182627,
      "grad_norm": 3.21875,
      "learning_rate": 1.1711852042233594e-05,
      "loss": 0.8459,
      "step": 675530
    },
    {
      "epoch": 2.3675992808251585,
      "grad_norm": 2.875,
      "learning_rate": 1.1711203013569892e-05,
      "loss": 0.8671,
      "step": 675540
    },
    {
      "epoch": 2.3676343283320542,
      "grad_norm": 2.765625,
      "learning_rate": 1.171055398490619e-05,
      "loss": 0.7738,
      "step": 675550
    },
    {
      "epoch": 2.3676693758389495,
      "grad_norm": 2.859375,
      "learning_rate": 1.1709904956242488e-05,
      "loss": 0.8326,
      "step": 675560
    },
    {
      "epoch": 2.3677044233458453,
      "grad_norm": 2.703125,
      "learning_rate": 1.1709255927578786e-05,
      "loss": 0.8873,
      "step": 675570
    },
    {
      "epoch": 2.367739470852741,
      "grad_norm": 3.515625,
      "learning_rate": 1.1708606898915084e-05,
      "loss": 0.9025,
      "step": 675580
    },
    {
      "epoch": 2.3677745183596364,
      "grad_norm": 2.890625,
      "learning_rate": 1.1707957870251382e-05,
      "loss": 0.911,
      "step": 675590
    },
    {
      "epoch": 2.367809565866532,
      "grad_norm": 2.453125,
      "learning_rate": 1.170730884158768e-05,
      "loss": 0.7773,
      "step": 675600
    },
    {
      "epoch": 2.3678446133734274,
      "grad_norm": 3.265625,
      "learning_rate": 1.1706659812923978e-05,
      "loss": 0.7507,
      "step": 675610
    },
    {
      "epoch": 2.367879660880323,
      "grad_norm": 2.640625,
      "learning_rate": 1.1706010784260278e-05,
      "loss": 0.8504,
      "step": 675620
    },
    {
      "epoch": 2.367914708387219,
      "grad_norm": 2.625,
      "learning_rate": 1.1705361755596576e-05,
      "loss": 0.8493,
      "step": 675630
    },
    {
      "epoch": 2.3679497558941147,
      "grad_norm": 3.515625,
      "learning_rate": 1.1704712726932874e-05,
      "loss": 0.8436,
      "step": 675640
    },
    {
      "epoch": 2.36798480340101,
      "grad_norm": 2.75,
      "learning_rate": 1.1704063698269172e-05,
      "loss": 0.7152,
      "step": 675650
    },
    {
      "epoch": 2.368019850907906,
      "grad_norm": 2.546875,
      "learning_rate": 1.1703414669605468e-05,
      "loss": 0.7773,
      "step": 675660
    },
    {
      "epoch": 2.368054898414801,
      "grad_norm": 2.78125,
      "learning_rate": 1.1702765640941766e-05,
      "loss": 0.7817,
      "step": 675670
    },
    {
      "epoch": 2.368089945921697,
      "grad_norm": 2.6875,
      "learning_rate": 1.1702116612278066e-05,
      "loss": 0.8277,
      "step": 675680
    },
    {
      "epoch": 2.3681249934285926,
      "grad_norm": 3.328125,
      "learning_rate": 1.1701467583614364e-05,
      "loss": 0.7944,
      "step": 675690
    },
    {
      "epoch": 2.368160040935488,
      "grad_norm": 3.125,
      "learning_rate": 1.1700818554950662e-05,
      "loss": 0.9127,
      "step": 675700
    },
    {
      "epoch": 2.3681950884423837,
      "grad_norm": 2.9375,
      "learning_rate": 1.170016952628696e-05,
      "loss": 0.7616,
      "step": 675710
    },
    {
      "epoch": 2.368230135949279,
      "grad_norm": 3.1875,
      "learning_rate": 1.1699520497623258e-05,
      "loss": 0.9027,
      "step": 675720
    },
    {
      "epoch": 2.3682651834561748,
      "grad_norm": 2.6875,
      "learning_rate": 1.1698871468959556e-05,
      "loss": 0.9157,
      "step": 675730
    },
    {
      "epoch": 2.3683002309630705,
      "grad_norm": 3.015625,
      "learning_rate": 1.1698222440295854e-05,
      "loss": 0.7707,
      "step": 675740
    },
    {
      "epoch": 2.3683352784699663,
      "grad_norm": 3.203125,
      "learning_rate": 1.1697573411632153e-05,
      "loss": 0.8743,
      "step": 675750
    },
    {
      "epoch": 2.3683703259768616,
      "grad_norm": 3.140625,
      "learning_rate": 1.169692438296845e-05,
      "loss": 0.7949,
      "step": 675760
    },
    {
      "epoch": 2.3684053734837573,
      "grad_norm": 3.359375,
      "learning_rate": 1.1696275354304748e-05,
      "loss": 0.8284,
      "step": 675770
    },
    {
      "epoch": 2.3684404209906527,
      "grad_norm": 2.8125,
      "learning_rate": 1.1695626325641046e-05,
      "loss": 0.825,
      "step": 675780
    },
    {
      "epoch": 2.3684754684975484,
      "grad_norm": 2.71875,
      "learning_rate": 1.1694977296977344e-05,
      "loss": 0.8376,
      "step": 675790
    },
    {
      "epoch": 2.368510516004444,
      "grad_norm": 2.65625,
      "learning_rate": 1.1694328268313642e-05,
      "loss": 0.8136,
      "step": 675800
    },
    {
      "epoch": 2.3685455635113395,
      "grad_norm": 3.25,
      "learning_rate": 1.1693679239649941e-05,
      "loss": 0.8294,
      "step": 675810
    },
    {
      "epoch": 2.3685806110182352,
      "grad_norm": 3.234375,
      "learning_rate": 1.169303021098624e-05,
      "loss": 0.8209,
      "step": 675820
    },
    {
      "epoch": 2.368615658525131,
      "grad_norm": 2.796875,
      "learning_rate": 1.1692381182322537e-05,
      "loss": 0.8048,
      "step": 675830
    },
    {
      "epoch": 2.3686507060320263,
      "grad_norm": 2.9375,
      "learning_rate": 1.1691732153658835e-05,
      "loss": 0.848,
      "step": 675840
    },
    {
      "epoch": 2.368685753538922,
      "grad_norm": 3.125,
      "learning_rate": 1.1691083124995132e-05,
      "loss": 0.797,
      "step": 675850
    },
    {
      "epoch": 2.368720801045818,
      "grad_norm": 2.890625,
      "learning_rate": 1.1690434096331431e-05,
      "loss": 0.7246,
      "step": 675860
    },
    {
      "epoch": 2.368755848552713,
      "grad_norm": 2.71875,
      "learning_rate": 1.168978506766773e-05,
      "loss": 0.8006,
      "step": 675870
    },
    {
      "epoch": 2.368790896059609,
      "grad_norm": 2.578125,
      "learning_rate": 1.1689136039004027e-05,
      "loss": 0.7867,
      "step": 675880
    },
    {
      "epoch": 2.368825943566504,
      "grad_norm": 2.6875,
      "learning_rate": 1.1688487010340325e-05,
      "loss": 0.7293,
      "step": 675890
    },
    {
      "epoch": 2.3688609910734,
      "grad_norm": 2.71875,
      "learning_rate": 1.1687837981676623e-05,
      "loss": 0.8286,
      "step": 675900
    },
    {
      "epoch": 2.3688960385802957,
      "grad_norm": 3.0625,
      "learning_rate": 1.1687188953012921e-05,
      "loss": 0.8469,
      "step": 675910
    },
    {
      "epoch": 2.368931086087191,
      "grad_norm": 2.34375,
      "learning_rate": 1.168653992434922e-05,
      "loss": 0.78,
      "step": 675920
    },
    {
      "epoch": 2.368966133594087,
      "grad_norm": 2.640625,
      "learning_rate": 1.1685890895685519e-05,
      "loss": 0.797,
      "step": 675930
    },
    {
      "epoch": 2.3690011811009826,
      "grad_norm": 3.0625,
      "learning_rate": 1.1685241867021817e-05,
      "loss": 0.8499,
      "step": 675940
    },
    {
      "epoch": 2.369036228607878,
      "grad_norm": 3.375,
      "learning_rate": 1.1684592838358113e-05,
      "loss": 0.9501,
      "step": 675950
    },
    {
      "epoch": 2.3690712761147736,
      "grad_norm": 2.296875,
      "learning_rate": 1.1683943809694411e-05,
      "loss": 0.7831,
      "step": 675960
    },
    {
      "epoch": 2.3691063236216694,
      "grad_norm": 3.28125,
      "learning_rate": 1.168329478103071e-05,
      "loss": 0.7989,
      "step": 675970
    },
    {
      "epoch": 2.3691413711285647,
      "grad_norm": 2.515625,
      "learning_rate": 1.1682645752367007e-05,
      "loss": 0.7185,
      "step": 675980
    },
    {
      "epoch": 2.3691764186354605,
      "grad_norm": 3.421875,
      "learning_rate": 1.1681996723703307e-05,
      "loss": 0.7884,
      "step": 675990
    },
    {
      "epoch": 2.369211466142356,
      "grad_norm": 2.75,
      "learning_rate": 1.1681347695039605e-05,
      "loss": 0.7927,
      "step": 676000
    },
    {
      "epoch": 2.3692465136492515,
      "grad_norm": 2.84375,
      "learning_rate": 1.1680698666375903e-05,
      "loss": 0.7776,
      "step": 676010
    },
    {
      "epoch": 2.3692815611561473,
      "grad_norm": 2.75,
      "learning_rate": 1.1680049637712201e-05,
      "loss": 0.7096,
      "step": 676020
    },
    {
      "epoch": 2.3693166086630426,
      "grad_norm": 3.0,
      "learning_rate": 1.1679400609048499e-05,
      "loss": 0.8037,
      "step": 676030
    },
    {
      "epoch": 2.3693516561699384,
      "grad_norm": 3.34375,
      "learning_rate": 1.1678751580384795e-05,
      "loss": 0.8637,
      "step": 676040
    },
    {
      "epoch": 2.369386703676834,
      "grad_norm": 2.875,
      "learning_rate": 1.1678102551721095e-05,
      "loss": 0.7463,
      "step": 676050
    },
    {
      "epoch": 2.3694217511837294,
      "grad_norm": 2.828125,
      "learning_rate": 1.1677453523057393e-05,
      "loss": 0.8177,
      "step": 676060
    },
    {
      "epoch": 2.369456798690625,
      "grad_norm": 3.125,
      "learning_rate": 1.1676804494393691e-05,
      "loss": 0.861,
      "step": 676070
    },
    {
      "epoch": 2.369491846197521,
      "grad_norm": 3.0,
      "learning_rate": 1.1676155465729989e-05,
      "loss": 0.7564,
      "step": 676080
    },
    {
      "epoch": 2.3695268937044163,
      "grad_norm": 2.96875,
      "learning_rate": 1.1675506437066287e-05,
      "loss": 0.8669,
      "step": 676090
    },
    {
      "epoch": 2.369561941211312,
      "grad_norm": 2.796875,
      "learning_rate": 1.1674857408402585e-05,
      "loss": 0.8039,
      "step": 676100
    },
    {
      "epoch": 2.3695969887182073,
      "grad_norm": 3.234375,
      "learning_rate": 1.1674208379738885e-05,
      "loss": 0.8785,
      "step": 676110
    },
    {
      "epoch": 2.369632036225103,
      "grad_norm": 2.90625,
      "learning_rate": 1.1673559351075183e-05,
      "loss": 0.7593,
      "step": 676120
    },
    {
      "epoch": 2.369667083731999,
      "grad_norm": 3.0625,
      "learning_rate": 1.1672910322411479e-05,
      "loss": 0.8109,
      "step": 676130
    },
    {
      "epoch": 2.369702131238894,
      "grad_norm": 2.984375,
      "learning_rate": 1.1672261293747777e-05,
      "loss": 0.8555,
      "step": 676140
    },
    {
      "epoch": 2.36973717874579,
      "grad_norm": 2.78125,
      "learning_rate": 1.1671612265084075e-05,
      "loss": 0.8772,
      "step": 676150
    },
    {
      "epoch": 2.3697722262526857,
      "grad_norm": 2.96875,
      "learning_rate": 1.1670963236420373e-05,
      "loss": 0.8491,
      "step": 676160
    },
    {
      "epoch": 2.369807273759581,
      "grad_norm": 2.734375,
      "learning_rate": 1.1670314207756673e-05,
      "loss": 0.8076,
      "step": 676170
    },
    {
      "epoch": 2.3698423212664768,
      "grad_norm": 2.71875,
      "learning_rate": 1.166966517909297e-05,
      "loss": 0.7337,
      "step": 676180
    },
    {
      "epoch": 2.3698773687733725,
      "grad_norm": 3.109375,
      "learning_rate": 1.1669016150429269e-05,
      "loss": 0.8759,
      "step": 676190
    },
    {
      "epoch": 2.369912416280268,
      "grad_norm": 2.609375,
      "learning_rate": 1.1668367121765567e-05,
      "loss": 0.7868,
      "step": 676200
    },
    {
      "epoch": 2.3699474637871636,
      "grad_norm": 3.015625,
      "learning_rate": 1.1667718093101865e-05,
      "loss": 0.8085,
      "step": 676210
    },
    {
      "epoch": 2.369982511294059,
      "grad_norm": 2.78125,
      "learning_rate": 1.1667069064438163e-05,
      "loss": 0.7662,
      "step": 676220
    },
    {
      "epoch": 2.3700175588009547,
      "grad_norm": 3.15625,
      "learning_rate": 1.166642003577446e-05,
      "loss": 0.8612,
      "step": 676230
    },
    {
      "epoch": 2.3700526063078504,
      "grad_norm": 2.859375,
      "learning_rate": 1.1665771007110759e-05,
      "loss": 0.7916,
      "step": 676240
    },
    {
      "epoch": 2.3700876538147457,
      "grad_norm": 3.0625,
      "learning_rate": 1.1665121978447057e-05,
      "loss": 0.7499,
      "step": 676250
    },
    {
      "epoch": 2.3701227013216415,
      "grad_norm": 3.0625,
      "learning_rate": 1.1664472949783355e-05,
      "loss": 0.86,
      "step": 676260
    },
    {
      "epoch": 2.3701577488285372,
      "grad_norm": 3.046875,
      "learning_rate": 1.1663823921119653e-05,
      "loss": 0.8642,
      "step": 676270
    },
    {
      "epoch": 2.3701927963354326,
      "grad_norm": 3.109375,
      "learning_rate": 1.166317489245595e-05,
      "loss": 0.8165,
      "step": 676280
    },
    {
      "epoch": 2.3702278438423283,
      "grad_norm": 3.203125,
      "learning_rate": 1.1662525863792249e-05,
      "loss": 0.7945,
      "step": 676290
    },
    {
      "epoch": 2.370262891349224,
      "grad_norm": 2.9375,
      "learning_rate": 1.1661876835128548e-05,
      "loss": 0.818,
      "step": 676300
    },
    {
      "epoch": 2.3702979388561194,
      "grad_norm": 2.609375,
      "learning_rate": 1.1661227806464846e-05,
      "loss": 0.8664,
      "step": 676310
    },
    {
      "epoch": 2.370332986363015,
      "grad_norm": 3.203125,
      "learning_rate": 1.1660578777801143e-05,
      "loss": 0.7747,
      "step": 676320
    },
    {
      "epoch": 2.3703680338699105,
      "grad_norm": 2.875,
      "learning_rate": 1.165992974913744e-05,
      "loss": 0.8402,
      "step": 676330
    },
    {
      "epoch": 2.370403081376806,
      "grad_norm": 3.015625,
      "learning_rate": 1.1659280720473739e-05,
      "loss": 0.8118,
      "step": 676340
    },
    {
      "epoch": 2.370438128883702,
      "grad_norm": 2.828125,
      "learning_rate": 1.1658631691810038e-05,
      "loss": 0.824,
      "step": 676350
    },
    {
      "epoch": 2.3704731763905973,
      "grad_norm": 2.875,
      "learning_rate": 1.1657982663146336e-05,
      "loss": 0.8038,
      "step": 676360
    },
    {
      "epoch": 2.370508223897493,
      "grad_norm": 2.96875,
      "learning_rate": 1.1657333634482634e-05,
      "loss": 0.7878,
      "step": 676370
    },
    {
      "epoch": 2.370543271404389,
      "grad_norm": 3.296875,
      "learning_rate": 1.1656684605818932e-05,
      "loss": 0.8608,
      "step": 676380
    },
    {
      "epoch": 2.370578318911284,
      "grad_norm": 2.8125,
      "learning_rate": 1.165603557715523e-05,
      "loss": 0.8764,
      "step": 676390
    },
    {
      "epoch": 2.37061336641818,
      "grad_norm": 3.015625,
      "learning_rate": 1.1655386548491528e-05,
      "loss": 0.9031,
      "step": 676400
    },
    {
      "epoch": 2.3706484139250756,
      "grad_norm": 2.921875,
      "learning_rate": 1.1654737519827826e-05,
      "loss": 0.7727,
      "step": 676410
    },
    {
      "epoch": 2.370683461431971,
      "grad_norm": 2.796875,
      "learning_rate": 1.1654088491164124e-05,
      "loss": 0.7549,
      "step": 676420
    },
    {
      "epoch": 2.3707185089388667,
      "grad_norm": 2.859375,
      "learning_rate": 1.1653439462500422e-05,
      "loss": 0.8181,
      "step": 676430
    },
    {
      "epoch": 2.370753556445762,
      "grad_norm": 2.828125,
      "learning_rate": 1.165279043383672e-05,
      "loss": 0.8072,
      "step": 676440
    },
    {
      "epoch": 2.370788603952658,
      "grad_norm": 3.046875,
      "learning_rate": 1.1652141405173018e-05,
      "loss": 0.851,
      "step": 676450
    },
    {
      "epoch": 2.3708236514595535,
      "grad_norm": 2.71875,
      "learning_rate": 1.1651492376509316e-05,
      "loss": 0.7548,
      "step": 676460
    },
    {
      "epoch": 2.370858698966449,
      "grad_norm": 2.90625,
      "learning_rate": 1.1650843347845614e-05,
      "loss": 0.8499,
      "step": 676470
    },
    {
      "epoch": 2.3708937464733446,
      "grad_norm": 2.515625,
      "learning_rate": 1.1650194319181914e-05,
      "loss": 0.7217,
      "step": 676480
    },
    {
      "epoch": 2.3709287939802404,
      "grad_norm": 3.046875,
      "learning_rate": 1.1649545290518212e-05,
      "loss": 0.8679,
      "step": 676490
    },
    {
      "epoch": 2.3709638414871357,
      "grad_norm": 2.28125,
      "learning_rate": 1.164889626185451e-05,
      "loss": 0.7772,
      "step": 676500
    },
    {
      "epoch": 2.3709988889940314,
      "grad_norm": 2.953125,
      "learning_rate": 1.1648247233190806e-05,
      "loss": 0.8233,
      "step": 676510
    },
    {
      "epoch": 2.371033936500927,
      "grad_norm": 2.890625,
      "learning_rate": 1.1647598204527104e-05,
      "loss": 0.7818,
      "step": 676520
    },
    {
      "epoch": 2.3710689840078225,
      "grad_norm": 2.796875,
      "learning_rate": 1.1646949175863402e-05,
      "loss": 0.8447,
      "step": 676530
    },
    {
      "epoch": 2.3711040315147183,
      "grad_norm": 2.984375,
      "learning_rate": 1.1646300147199702e-05,
      "loss": 0.8461,
      "step": 676540
    },
    {
      "epoch": 2.3711390790216136,
      "grad_norm": 2.984375,
      "learning_rate": 1.1645651118536e-05,
      "loss": 0.7511,
      "step": 676550
    },
    {
      "epoch": 2.3711741265285093,
      "grad_norm": 2.8125,
      "learning_rate": 1.1645002089872298e-05,
      "loss": 0.7784,
      "step": 676560
    },
    {
      "epoch": 2.371209174035405,
      "grad_norm": 3.078125,
      "learning_rate": 1.1644353061208596e-05,
      "loss": 0.8609,
      "step": 676570
    },
    {
      "epoch": 2.3712442215423004,
      "grad_norm": 3.21875,
      "learning_rate": 1.1643704032544894e-05,
      "loss": 0.8279,
      "step": 676580
    },
    {
      "epoch": 2.371279269049196,
      "grad_norm": 2.921875,
      "learning_rate": 1.1643055003881192e-05,
      "loss": 0.8246,
      "step": 676590
    },
    {
      "epoch": 2.371314316556092,
      "grad_norm": 2.53125,
      "learning_rate": 1.164240597521749e-05,
      "loss": 0.814,
      "step": 676600
    },
    {
      "epoch": 2.3713493640629872,
      "grad_norm": 2.84375,
      "learning_rate": 1.1641756946553788e-05,
      "loss": 0.849,
      "step": 676610
    },
    {
      "epoch": 2.371384411569883,
      "grad_norm": 2.671875,
      "learning_rate": 1.1641107917890086e-05,
      "loss": 0.7801,
      "step": 676620
    },
    {
      "epoch": 2.3714194590767788,
      "grad_norm": 2.703125,
      "learning_rate": 1.1640458889226384e-05,
      "loss": 0.8207,
      "step": 676630
    },
    {
      "epoch": 2.371454506583674,
      "grad_norm": 2.71875,
      "learning_rate": 1.1639809860562682e-05,
      "loss": 0.7837,
      "step": 676640
    },
    {
      "epoch": 2.37148955409057,
      "grad_norm": 2.921875,
      "learning_rate": 1.163916083189898e-05,
      "loss": 0.8182,
      "step": 676650
    },
    {
      "epoch": 2.371524601597465,
      "grad_norm": 2.9375,
      "learning_rate": 1.163851180323528e-05,
      "loss": 0.7776,
      "step": 676660
    },
    {
      "epoch": 2.371559649104361,
      "grad_norm": 3.03125,
      "learning_rate": 1.1637862774571578e-05,
      "loss": 0.7993,
      "step": 676670
    },
    {
      "epoch": 2.3715946966112567,
      "grad_norm": 3.125,
      "learning_rate": 1.1637213745907876e-05,
      "loss": 0.7116,
      "step": 676680
    },
    {
      "epoch": 2.371629744118152,
      "grad_norm": 3.046875,
      "learning_rate": 1.1636564717244174e-05,
      "loss": 0.8048,
      "step": 676690
    },
    {
      "epoch": 2.3716647916250477,
      "grad_norm": 3.21875,
      "learning_rate": 1.163591568858047e-05,
      "loss": 0.8105,
      "step": 676700
    },
    {
      "epoch": 2.3716998391319435,
      "grad_norm": 2.53125,
      "learning_rate": 1.1635266659916768e-05,
      "loss": 0.7345,
      "step": 676710
    },
    {
      "epoch": 2.371734886638839,
      "grad_norm": 2.828125,
      "learning_rate": 1.1634617631253068e-05,
      "loss": 0.7437,
      "step": 676720
    },
    {
      "epoch": 2.3717699341457346,
      "grad_norm": 2.78125,
      "learning_rate": 1.1633968602589366e-05,
      "loss": 0.7964,
      "step": 676730
    },
    {
      "epoch": 2.3718049816526303,
      "grad_norm": 3.265625,
      "learning_rate": 1.1633319573925664e-05,
      "loss": 0.8497,
      "step": 676740
    },
    {
      "epoch": 2.3718400291595256,
      "grad_norm": 2.34375,
      "learning_rate": 1.1632670545261962e-05,
      "loss": 0.8187,
      "step": 676750
    },
    {
      "epoch": 2.3718750766664214,
      "grad_norm": 3.265625,
      "learning_rate": 1.163202151659826e-05,
      "loss": 0.8142,
      "step": 676760
    },
    {
      "epoch": 2.3719101241733167,
      "grad_norm": 2.65625,
      "learning_rate": 1.1631372487934558e-05,
      "loss": 0.763,
      "step": 676770
    },
    {
      "epoch": 2.3719451716802125,
      "grad_norm": 3.03125,
      "learning_rate": 1.1630723459270856e-05,
      "loss": 0.7535,
      "step": 676780
    },
    {
      "epoch": 2.371980219187108,
      "grad_norm": 2.90625,
      "learning_rate": 1.1630074430607154e-05,
      "loss": 0.8395,
      "step": 676790
    },
    {
      "epoch": 2.3720152666940035,
      "grad_norm": 2.984375,
      "learning_rate": 1.1629425401943452e-05,
      "loss": 0.8291,
      "step": 676800
    },
    {
      "epoch": 2.3720503142008993,
      "grad_norm": 3.203125,
      "learning_rate": 1.162877637327975e-05,
      "loss": 0.7436,
      "step": 676810
    },
    {
      "epoch": 2.372085361707795,
      "grad_norm": 3.046875,
      "learning_rate": 1.1628127344616048e-05,
      "loss": 0.8931,
      "step": 676820
    },
    {
      "epoch": 2.3721204092146904,
      "grad_norm": 2.984375,
      "learning_rate": 1.1627478315952346e-05,
      "loss": 0.7577,
      "step": 676830
    },
    {
      "epoch": 2.372155456721586,
      "grad_norm": 2.859375,
      "learning_rate": 1.1626829287288644e-05,
      "loss": 0.8184,
      "step": 676840
    },
    {
      "epoch": 2.372190504228482,
      "grad_norm": 2.71875,
      "learning_rate": 1.1626180258624943e-05,
      "loss": 0.7991,
      "step": 676850
    },
    {
      "epoch": 2.372225551735377,
      "grad_norm": 3.015625,
      "learning_rate": 1.1625531229961241e-05,
      "loss": 0.8545,
      "step": 676860
    },
    {
      "epoch": 2.372260599242273,
      "grad_norm": 3.046875,
      "learning_rate": 1.162488220129754e-05,
      "loss": 0.7634,
      "step": 676870
    },
    {
      "epoch": 2.3722956467491683,
      "grad_norm": 2.578125,
      "learning_rate": 1.1624233172633837e-05,
      "loss": 0.7439,
      "step": 676880
    },
    {
      "epoch": 2.372330694256064,
      "grad_norm": 2.78125,
      "learning_rate": 1.1623584143970134e-05,
      "loss": 0.8983,
      "step": 676890
    },
    {
      "epoch": 2.37236574176296,
      "grad_norm": 3.078125,
      "learning_rate": 1.1622935115306433e-05,
      "loss": 0.8151,
      "step": 676900
    },
    {
      "epoch": 2.3724007892698555,
      "grad_norm": 3.171875,
      "learning_rate": 1.1622286086642731e-05,
      "loss": 0.8468,
      "step": 676910
    },
    {
      "epoch": 2.372435836776751,
      "grad_norm": 3.25,
      "learning_rate": 1.162163705797903e-05,
      "loss": 0.8311,
      "step": 676920
    },
    {
      "epoch": 2.3724708842836466,
      "grad_norm": 3.21875,
      "learning_rate": 1.1620988029315327e-05,
      "loss": 0.7936,
      "step": 676930
    },
    {
      "epoch": 2.372505931790542,
      "grad_norm": 2.71875,
      "learning_rate": 1.1620339000651625e-05,
      "loss": 0.6819,
      "step": 676940
    },
    {
      "epoch": 2.3725409792974377,
      "grad_norm": 2.984375,
      "learning_rate": 1.1619689971987923e-05,
      "loss": 0.6989,
      "step": 676950
    },
    {
      "epoch": 2.3725760268043334,
      "grad_norm": 2.875,
      "learning_rate": 1.1619040943324221e-05,
      "loss": 0.8898,
      "step": 676960
    },
    {
      "epoch": 2.3726110743112288,
      "grad_norm": 2.84375,
      "learning_rate": 1.1618391914660521e-05,
      "loss": 0.8419,
      "step": 676970
    },
    {
      "epoch": 2.3726461218181245,
      "grad_norm": 3.25,
      "learning_rate": 1.1617742885996817e-05,
      "loss": 0.8411,
      "step": 676980
    },
    {
      "epoch": 2.37268116932502,
      "grad_norm": 2.828125,
      "learning_rate": 1.1617093857333115e-05,
      "loss": 0.7952,
      "step": 676990
    },
    {
      "epoch": 2.3727162168319156,
      "grad_norm": 2.71875,
      "learning_rate": 1.1616444828669413e-05,
      "loss": 0.8227,
      "step": 677000
    },
    {
      "epoch": 2.3727512643388113,
      "grad_norm": 2.8125,
      "learning_rate": 1.1615795800005711e-05,
      "loss": 0.8163,
      "step": 677010
    },
    {
      "epoch": 2.372786311845707,
      "grad_norm": 2.890625,
      "learning_rate": 1.161514677134201e-05,
      "loss": 0.7214,
      "step": 677020
    },
    {
      "epoch": 2.3728213593526024,
      "grad_norm": 2.8125,
      "learning_rate": 1.1614497742678309e-05,
      "loss": 0.7875,
      "step": 677030
    },
    {
      "epoch": 2.372856406859498,
      "grad_norm": 3.171875,
      "learning_rate": 1.1613848714014607e-05,
      "loss": 0.8109,
      "step": 677040
    },
    {
      "epoch": 2.3728914543663935,
      "grad_norm": 2.859375,
      "learning_rate": 1.1613199685350905e-05,
      "loss": 0.7963,
      "step": 677050
    },
    {
      "epoch": 2.3729265018732892,
      "grad_norm": 2.859375,
      "learning_rate": 1.1612550656687203e-05,
      "loss": 0.7242,
      "step": 677060
    },
    {
      "epoch": 2.372961549380185,
      "grad_norm": 3.015625,
      "learning_rate": 1.16119016280235e-05,
      "loss": 0.7566,
      "step": 677070
    },
    {
      "epoch": 2.3729965968870803,
      "grad_norm": 3.03125,
      "learning_rate": 1.1611252599359797e-05,
      "loss": 0.8115,
      "step": 677080
    },
    {
      "epoch": 2.373031644393976,
      "grad_norm": 2.75,
      "learning_rate": 1.1610603570696097e-05,
      "loss": 0.7875,
      "step": 677090
    },
    {
      "epoch": 2.3730666919008714,
      "grad_norm": 2.15625,
      "learning_rate": 1.1609954542032395e-05,
      "loss": 0.8141,
      "step": 677100
    },
    {
      "epoch": 2.373101739407767,
      "grad_norm": 3.265625,
      "learning_rate": 1.1609305513368693e-05,
      "loss": 0.8422,
      "step": 677110
    },
    {
      "epoch": 2.373136786914663,
      "grad_norm": 2.671875,
      "learning_rate": 1.1608656484704991e-05,
      "loss": 0.8549,
      "step": 677120
    },
    {
      "epoch": 2.3731718344215587,
      "grad_norm": 3.125,
      "learning_rate": 1.1608007456041289e-05,
      "loss": 0.8978,
      "step": 677130
    },
    {
      "epoch": 2.373206881928454,
      "grad_norm": 2.828125,
      "learning_rate": 1.1607358427377587e-05,
      "loss": 0.8539,
      "step": 677140
    },
    {
      "epoch": 2.3732419294353497,
      "grad_norm": 3.21875,
      "learning_rate": 1.1606709398713885e-05,
      "loss": 0.8053,
      "step": 677150
    },
    {
      "epoch": 2.373276976942245,
      "grad_norm": 2.875,
      "learning_rate": 1.1606060370050185e-05,
      "loss": 0.8171,
      "step": 677160
    },
    {
      "epoch": 2.373312024449141,
      "grad_norm": 2.890625,
      "learning_rate": 1.1605411341386481e-05,
      "loss": 0.7665,
      "step": 677170
    },
    {
      "epoch": 2.3733470719560366,
      "grad_norm": 2.96875,
      "learning_rate": 1.1604762312722779e-05,
      "loss": 0.8761,
      "step": 677180
    },
    {
      "epoch": 2.373382119462932,
      "grad_norm": 3.0,
      "learning_rate": 1.1604113284059077e-05,
      "loss": 0.8317,
      "step": 677190
    },
    {
      "epoch": 2.3734171669698276,
      "grad_norm": 2.75,
      "learning_rate": 1.1603464255395375e-05,
      "loss": 0.8262,
      "step": 677200
    },
    {
      "epoch": 2.3734522144767234,
      "grad_norm": 2.6875,
      "learning_rate": 1.1602815226731675e-05,
      "loss": 0.7615,
      "step": 677210
    },
    {
      "epoch": 2.3734872619836187,
      "grad_norm": 2.859375,
      "learning_rate": 1.1602166198067973e-05,
      "loss": 0.8939,
      "step": 677220
    },
    {
      "epoch": 2.3735223094905145,
      "grad_norm": 2.90625,
      "learning_rate": 1.160151716940427e-05,
      "loss": 0.7859,
      "step": 677230
    },
    {
      "epoch": 2.37355735699741,
      "grad_norm": 3.578125,
      "learning_rate": 1.1600868140740569e-05,
      "loss": 0.8496,
      "step": 677240
    },
    {
      "epoch": 2.3735924045043055,
      "grad_norm": 2.828125,
      "learning_rate": 1.1600219112076867e-05,
      "loss": 0.854,
      "step": 677250
    },
    {
      "epoch": 2.3736274520112013,
      "grad_norm": 2.59375,
      "learning_rate": 1.1599570083413163e-05,
      "loss": 0.7639,
      "step": 677260
    },
    {
      "epoch": 2.3736624995180966,
      "grad_norm": 3.140625,
      "learning_rate": 1.1598921054749463e-05,
      "loss": 0.8936,
      "step": 677270
    },
    {
      "epoch": 2.3736975470249924,
      "grad_norm": 2.484375,
      "learning_rate": 1.159827202608576e-05,
      "loss": 0.9074,
      "step": 677280
    },
    {
      "epoch": 2.373732594531888,
      "grad_norm": 2.734375,
      "learning_rate": 1.1597622997422059e-05,
      "loss": 0.756,
      "step": 677290
    },
    {
      "epoch": 2.3737676420387834,
      "grad_norm": 2.734375,
      "learning_rate": 1.1596973968758357e-05,
      "loss": 0.8029,
      "step": 677300
    },
    {
      "epoch": 2.373802689545679,
      "grad_norm": 2.8125,
      "learning_rate": 1.1596324940094655e-05,
      "loss": 0.8787,
      "step": 677310
    },
    {
      "epoch": 2.373837737052575,
      "grad_norm": 2.625,
      "learning_rate": 1.1595675911430953e-05,
      "loss": 0.8068,
      "step": 677320
    },
    {
      "epoch": 2.3738727845594703,
      "grad_norm": 2.96875,
      "learning_rate": 1.159502688276725e-05,
      "loss": 0.8303,
      "step": 677330
    },
    {
      "epoch": 2.373907832066366,
      "grad_norm": 2.75,
      "learning_rate": 1.159437785410355e-05,
      "loss": 0.8338,
      "step": 677340
    },
    {
      "epoch": 2.3739428795732618,
      "grad_norm": 3.140625,
      "learning_rate": 1.1593728825439848e-05,
      "loss": 0.7742,
      "step": 677350
    },
    {
      "epoch": 2.373977927080157,
      "grad_norm": 2.9375,
      "learning_rate": 1.1593079796776145e-05,
      "loss": 0.8313,
      "step": 677360
    },
    {
      "epoch": 2.374012974587053,
      "grad_norm": 2.9375,
      "learning_rate": 1.1592430768112443e-05,
      "loss": 0.8036,
      "step": 677370
    },
    {
      "epoch": 2.374048022093948,
      "grad_norm": 2.765625,
      "learning_rate": 1.159178173944874e-05,
      "loss": 0.7806,
      "step": 677380
    },
    {
      "epoch": 2.374083069600844,
      "grad_norm": 3.203125,
      "learning_rate": 1.1591132710785039e-05,
      "loss": 0.7722,
      "step": 677390
    },
    {
      "epoch": 2.3741181171077397,
      "grad_norm": 2.515625,
      "learning_rate": 1.1590483682121338e-05,
      "loss": 0.7954,
      "step": 677400
    },
    {
      "epoch": 2.374153164614635,
      "grad_norm": 3.078125,
      "learning_rate": 1.1589834653457636e-05,
      "loss": 0.7687,
      "step": 677410
    },
    {
      "epoch": 2.3741882121215307,
      "grad_norm": 3.125,
      "learning_rate": 1.1589185624793934e-05,
      "loss": 0.8362,
      "step": 677420
    },
    {
      "epoch": 2.3742232596284265,
      "grad_norm": 3.15625,
      "learning_rate": 1.1588536596130232e-05,
      "loss": 0.7939,
      "step": 677430
    },
    {
      "epoch": 2.374258307135322,
      "grad_norm": 2.875,
      "learning_rate": 1.158788756746653e-05,
      "loss": 0.8649,
      "step": 677440
    },
    {
      "epoch": 2.3742933546422176,
      "grad_norm": 2.59375,
      "learning_rate": 1.1587238538802828e-05,
      "loss": 0.7765,
      "step": 677450
    },
    {
      "epoch": 2.3743284021491133,
      "grad_norm": 2.96875,
      "learning_rate": 1.1586589510139126e-05,
      "loss": 0.7929,
      "step": 677460
    },
    {
      "epoch": 2.3743634496560087,
      "grad_norm": 3.4375,
      "learning_rate": 1.1585940481475424e-05,
      "loss": 0.8457,
      "step": 677470
    },
    {
      "epoch": 2.3743984971629044,
      "grad_norm": 2.75,
      "learning_rate": 1.1585291452811722e-05,
      "loss": 0.8228,
      "step": 677480
    },
    {
      "epoch": 2.3744335446697997,
      "grad_norm": 2.453125,
      "learning_rate": 1.158464242414802e-05,
      "loss": 0.7711,
      "step": 677490
    },
    {
      "epoch": 2.3744685921766955,
      "grad_norm": 3.015625,
      "learning_rate": 1.1583993395484318e-05,
      "loss": 0.8369,
      "step": 677500
    },
    {
      "epoch": 2.3745036396835912,
      "grad_norm": 2.78125,
      "learning_rate": 1.1583344366820616e-05,
      "loss": 0.7876,
      "step": 677510
    },
    {
      "epoch": 2.3745386871904866,
      "grad_norm": 2.59375,
      "learning_rate": 1.1582695338156916e-05,
      "loss": 0.7689,
      "step": 677520
    },
    {
      "epoch": 2.3745737346973823,
      "grad_norm": 2.828125,
      "learning_rate": 1.1582046309493214e-05,
      "loss": 0.9189,
      "step": 677530
    },
    {
      "epoch": 2.374608782204278,
      "grad_norm": 2.8125,
      "learning_rate": 1.158139728082951e-05,
      "loss": 0.8338,
      "step": 677540
    },
    {
      "epoch": 2.3746438297111734,
      "grad_norm": 2.921875,
      "learning_rate": 1.1580748252165808e-05,
      "loss": 0.7767,
      "step": 677550
    },
    {
      "epoch": 2.374678877218069,
      "grad_norm": 2.59375,
      "learning_rate": 1.1580099223502106e-05,
      "loss": 0.7989,
      "step": 677560
    },
    {
      "epoch": 2.374713924724965,
      "grad_norm": 2.765625,
      "learning_rate": 1.1579450194838404e-05,
      "loss": 0.7759,
      "step": 677570
    },
    {
      "epoch": 2.37474897223186,
      "grad_norm": 2.625,
      "learning_rate": 1.1578801166174704e-05,
      "loss": 0.8027,
      "step": 677580
    },
    {
      "epoch": 2.374784019738756,
      "grad_norm": 3.0,
      "learning_rate": 1.1578152137511002e-05,
      "loss": 0.8455,
      "step": 677590
    },
    {
      "epoch": 2.3748190672456513,
      "grad_norm": 2.453125,
      "learning_rate": 1.15775031088473e-05,
      "loss": 0.7286,
      "step": 677600
    },
    {
      "epoch": 2.374854114752547,
      "grad_norm": 2.5625,
      "learning_rate": 1.1576854080183598e-05,
      "loss": 0.7852,
      "step": 677610
    },
    {
      "epoch": 2.374889162259443,
      "grad_norm": 2.65625,
      "learning_rate": 1.1576205051519896e-05,
      "loss": 0.8134,
      "step": 677620
    },
    {
      "epoch": 2.374924209766338,
      "grad_norm": 2.625,
      "learning_rate": 1.1575556022856194e-05,
      "loss": 0.765,
      "step": 677630
    },
    {
      "epoch": 2.374959257273234,
      "grad_norm": 2.953125,
      "learning_rate": 1.1574906994192492e-05,
      "loss": 0.8102,
      "step": 677640
    },
    {
      "epoch": 2.3749943047801296,
      "grad_norm": 2.890625,
      "learning_rate": 1.157425796552879e-05,
      "loss": 0.7948,
      "step": 677650
    },
    {
      "epoch": 2.375029352287025,
      "grad_norm": 3.0,
      "learning_rate": 1.1573608936865088e-05,
      "loss": 0.8046,
      "step": 677660
    },
    {
      "epoch": 2.3750643997939207,
      "grad_norm": 3.1875,
      "learning_rate": 1.1572959908201386e-05,
      "loss": 0.8745,
      "step": 677670
    },
    {
      "epoch": 2.3750994473008165,
      "grad_norm": 2.828125,
      "learning_rate": 1.1572310879537684e-05,
      "loss": 0.7341,
      "step": 677680
    },
    {
      "epoch": 2.3751344948077118,
      "grad_norm": 2.65625,
      "learning_rate": 1.1571661850873982e-05,
      "loss": 0.7579,
      "step": 677690
    },
    {
      "epoch": 2.3751695423146075,
      "grad_norm": 3.0,
      "learning_rate": 1.157101282221028e-05,
      "loss": 0.7614,
      "step": 677700
    },
    {
      "epoch": 2.375204589821503,
      "grad_norm": 2.921875,
      "learning_rate": 1.157036379354658e-05,
      "loss": 0.7944,
      "step": 677710
    },
    {
      "epoch": 2.3752396373283986,
      "grad_norm": 2.921875,
      "learning_rate": 1.1569714764882878e-05,
      "loss": 0.905,
      "step": 677720
    },
    {
      "epoch": 2.3752746848352944,
      "grad_norm": 2.859375,
      "learning_rate": 1.1569065736219174e-05,
      "loss": 0.7945,
      "step": 677730
    },
    {
      "epoch": 2.3753097323421897,
      "grad_norm": 2.734375,
      "learning_rate": 1.1568416707555472e-05,
      "loss": 0.7595,
      "step": 677740
    },
    {
      "epoch": 2.3753447798490854,
      "grad_norm": 2.453125,
      "learning_rate": 1.156776767889177e-05,
      "loss": 0.7224,
      "step": 677750
    },
    {
      "epoch": 2.375379827355981,
      "grad_norm": 2.59375,
      "learning_rate": 1.156711865022807e-05,
      "loss": 0.7195,
      "step": 677760
    },
    {
      "epoch": 2.3754148748628765,
      "grad_norm": 2.921875,
      "learning_rate": 1.1566469621564368e-05,
      "loss": 0.8744,
      "step": 677770
    },
    {
      "epoch": 2.3754499223697723,
      "grad_norm": 3.21875,
      "learning_rate": 1.1565820592900666e-05,
      "loss": 0.75,
      "step": 677780
    },
    {
      "epoch": 2.375484969876668,
      "grad_norm": 2.765625,
      "learning_rate": 1.1565171564236964e-05,
      "loss": 0.7987,
      "step": 677790
    },
    {
      "epoch": 2.3755200173835633,
      "grad_norm": 3.3125,
      "learning_rate": 1.1564522535573262e-05,
      "loss": 0.8316,
      "step": 677800
    },
    {
      "epoch": 2.375555064890459,
      "grad_norm": 2.984375,
      "learning_rate": 1.156387350690956e-05,
      "loss": 0.8583,
      "step": 677810
    },
    {
      "epoch": 2.3755901123973544,
      "grad_norm": 3.1875,
      "learning_rate": 1.1563224478245858e-05,
      "loss": 0.7695,
      "step": 677820
    },
    {
      "epoch": 2.37562515990425,
      "grad_norm": 2.59375,
      "learning_rate": 1.1562575449582156e-05,
      "loss": 0.8464,
      "step": 677830
    },
    {
      "epoch": 2.375660207411146,
      "grad_norm": 2.75,
      "learning_rate": 1.1561926420918454e-05,
      "loss": 0.7173,
      "step": 677840
    },
    {
      "epoch": 2.3756952549180412,
      "grad_norm": 3.0625,
      "learning_rate": 1.1561277392254752e-05,
      "loss": 0.7875,
      "step": 677850
    },
    {
      "epoch": 2.375730302424937,
      "grad_norm": 2.953125,
      "learning_rate": 1.156062836359105e-05,
      "loss": 0.7999,
      "step": 677860
    },
    {
      "epoch": 2.3757653499318327,
      "grad_norm": 2.8125,
      "learning_rate": 1.1559979334927348e-05,
      "loss": 0.8566,
      "step": 677870
    },
    {
      "epoch": 2.375800397438728,
      "grad_norm": 2.859375,
      "learning_rate": 1.1559330306263646e-05,
      "loss": 0.7459,
      "step": 677880
    },
    {
      "epoch": 2.375835444945624,
      "grad_norm": 3.390625,
      "learning_rate": 1.1558681277599946e-05,
      "loss": 0.8682,
      "step": 677890
    },
    {
      "epoch": 2.3758704924525196,
      "grad_norm": 3.25,
      "learning_rate": 1.1558032248936244e-05,
      "loss": 0.8539,
      "step": 677900
    },
    {
      "epoch": 2.375905539959415,
      "grad_norm": 3.03125,
      "learning_rate": 1.1557383220272542e-05,
      "loss": 0.8453,
      "step": 677910
    },
    {
      "epoch": 2.3759405874663106,
      "grad_norm": 3.0,
      "learning_rate": 1.1556734191608838e-05,
      "loss": 0.7976,
      "step": 677920
    },
    {
      "epoch": 2.375975634973206,
      "grad_norm": 2.84375,
      "learning_rate": 1.1556085162945136e-05,
      "loss": 0.778,
      "step": 677930
    },
    {
      "epoch": 2.3760106824801017,
      "grad_norm": 2.3125,
      "learning_rate": 1.1555436134281434e-05,
      "loss": 0.6929,
      "step": 677940
    },
    {
      "epoch": 2.3760457299869975,
      "grad_norm": 3.078125,
      "learning_rate": 1.1554787105617734e-05,
      "loss": 0.7672,
      "step": 677950
    },
    {
      "epoch": 2.376080777493893,
      "grad_norm": 3.03125,
      "learning_rate": 1.1554138076954032e-05,
      "loss": 0.8498,
      "step": 677960
    },
    {
      "epoch": 2.3761158250007886,
      "grad_norm": 2.890625,
      "learning_rate": 1.155348904829033e-05,
      "loss": 0.78,
      "step": 677970
    },
    {
      "epoch": 2.3761508725076843,
      "grad_norm": 2.8125,
      "learning_rate": 1.1552840019626628e-05,
      "loss": 0.8203,
      "step": 677980
    },
    {
      "epoch": 2.3761859200145796,
      "grad_norm": 2.6875,
      "learning_rate": 1.1552190990962926e-05,
      "loss": 0.8804,
      "step": 677990
    },
    {
      "epoch": 2.3762209675214754,
      "grad_norm": 3.1875,
      "learning_rate": 1.1551541962299224e-05,
      "loss": 0.7541,
      "step": 678000
    },
    {
      "epoch": 2.376256015028371,
      "grad_norm": 2.5625,
      "learning_rate": 1.1550892933635522e-05,
      "loss": 0.8019,
      "step": 678010
    },
    {
      "epoch": 2.3762910625352665,
      "grad_norm": 2.84375,
      "learning_rate": 1.155024390497182e-05,
      "loss": 0.7988,
      "step": 678020
    },
    {
      "epoch": 2.376326110042162,
      "grad_norm": 3.265625,
      "learning_rate": 1.1549594876308118e-05,
      "loss": 0.8288,
      "step": 678030
    },
    {
      "epoch": 2.3763611575490575,
      "grad_norm": 3.21875,
      "learning_rate": 1.1548945847644416e-05,
      "loss": 0.8154,
      "step": 678040
    },
    {
      "epoch": 2.3763962050559533,
      "grad_norm": 2.859375,
      "learning_rate": 1.1548296818980714e-05,
      "loss": 0.7495,
      "step": 678050
    },
    {
      "epoch": 2.376431252562849,
      "grad_norm": 2.75,
      "learning_rate": 1.1547647790317012e-05,
      "loss": 0.8123,
      "step": 678060
    },
    {
      "epoch": 2.3764663000697444,
      "grad_norm": 3.125,
      "learning_rate": 1.1546998761653311e-05,
      "loss": 0.7836,
      "step": 678070
    },
    {
      "epoch": 2.37650134757664,
      "grad_norm": 3.34375,
      "learning_rate": 1.154634973298961e-05,
      "loss": 0.8504,
      "step": 678080
    },
    {
      "epoch": 2.376536395083536,
      "grad_norm": 2.875,
      "learning_rate": 1.1545700704325907e-05,
      "loss": 0.8199,
      "step": 678090
    },
    {
      "epoch": 2.376571442590431,
      "grad_norm": 3.390625,
      "learning_rate": 1.1545051675662205e-05,
      "loss": 0.7952,
      "step": 678100
    },
    {
      "epoch": 2.376606490097327,
      "grad_norm": 3.09375,
      "learning_rate": 1.1544402646998502e-05,
      "loss": 0.7558,
      "step": 678110
    },
    {
      "epoch": 2.3766415376042227,
      "grad_norm": 2.625,
      "learning_rate": 1.15437536183348e-05,
      "loss": 0.8611,
      "step": 678120
    },
    {
      "epoch": 2.376676585111118,
      "grad_norm": 2.796875,
      "learning_rate": 1.15431045896711e-05,
      "loss": 0.7514,
      "step": 678130
    },
    {
      "epoch": 2.3767116326180138,
      "grad_norm": 2.921875,
      "learning_rate": 1.1542455561007397e-05,
      "loss": 0.8305,
      "step": 678140
    },
    {
      "epoch": 2.376746680124909,
      "grad_norm": 2.96875,
      "learning_rate": 1.1541806532343695e-05,
      "loss": 0.7619,
      "step": 678150
    },
    {
      "epoch": 2.376781727631805,
      "grad_norm": 2.796875,
      "learning_rate": 1.1541157503679993e-05,
      "loss": 0.8247,
      "step": 678160
    },
    {
      "epoch": 2.3768167751387006,
      "grad_norm": 3.078125,
      "learning_rate": 1.1540508475016291e-05,
      "loss": 0.7811,
      "step": 678170
    },
    {
      "epoch": 2.376851822645596,
      "grad_norm": 2.90625,
      "learning_rate": 1.153985944635259e-05,
      "loss": 0.7903,
      "step": 678180
    },
    {
      "epoch": 2.3768868701524917,
      "grad_norm": 2.875,
      "learning_rate": 1.1539210417688887e-05,
      "loss": 0.908,
      "step": 678190
    },
    {
      "epoch": 2.3769219176593874,
      "grad_norm": 3.734375,
      "learning_rate": 1.1538561389025185e-05,
      "loss": 0.9071,
      "step": 678200
    },
    {
      "epoch": 2.3769569651662827,
      "grad_norm": 2.703125,
      "learning_rate": 1.1537912360361483e-05,
      "loss": 0.759,
      "step": 678210
    },
    {
      "epoch": 2.3769920126731785,
      "grad_norm": 3.0,
      "learning_rate": 1.1537263331697781e-05,
      "loss": 0.7743,
      "step": 678220
    },
    {
      "epoch": 2.3770270601800743,
      "grad_norm": 2.78125,
      "learning_rate": 1.153661430303408e-05,
      "loss": 0.8158,
      "step": 678230
    },
    {
      "epoch": 2.3770621076869696,
      "grad_norm": 2.890625,
      "learning_rate": 1.1535965274370377e-05,
      "loss": 0.798,
      "step": 678240
    },
    {
      "epoch": 2.3770971551938653,
      "grad_norm": 2.75,
      "learning_rate": 1.1535316245706675e-05,
      "loss": 0.9182,
      "step": 678250
    },
    {
      "epoch": 2.3771322027007606,
      "grad_norm": 2.671875,
      "learning_rate": 1.1534667217042975e-05,
      "loss": 0.8236,
      "step": 678260
    },
    {
      "epoch": 2.3771672502076564,
      "grad_norm": 3.015625,
      "learning_rate": 1.1534018188379273e-05,
      "loss": 0.8127,
      "step": 678270
    },
    {
      "epoch": 2.377202297714552,
      "grad_norm": 3.53125,
      "learning_rate": 1.1533369159715571e-05,
      "loss": 0.8706,
      "step": 678280
    },
    {
      "epoch": 2.377237345221448,
      "grad_norm": 3.046875,
      "learning_rate": 1.1532720131051869e-05,
      "loss": 0.7853,
      "step": 678290
    },
    {
      "epoch": 2.3772723927283432,
      "grad_norm": 2.578125,
      "learning_rate": 1.1532071102388165e-05,
      "loss": 0.7499,
      "step": 678300
    },
    {
      "epoch": 2.377307440235239,
      "grad_norm": 3.34375,
      "learning_rate": 1.1531422073724465e-05,
      "loss": 0.7788,
      "step": 678310
    },
    {
      "epoch": 2.3773424877421343,
      "grad_norm": 2.96875,
      "learning_rate": 1.1530773045060763e-05,
      "loss": 0.8242,
      "step": 678320
    },
    {
      "epoch": 2.37737753524903,
      "grad_norm": 2.59375,
      "learning_rate": 1.1530124016397061e-05,
      "loss": 0.8028,
      "step": 678330
    },
    {
      "epoch": 2.377412582755926,
      "grad_norm": 2.90625,
      "learning_rate": 1.1529474987733359e-05,
      "loss": 0.7275,
      "step": 678340
    },
    {
      "epoch": 2.377447630262821,
      "grad_norm": 3.265625,
      "learning_rate": 1.1528825959069657e-05,
      "loss": 0.817,
      "step": 678350
    },
    {
      "epoch": 2.377482677769717,
      "grad_norm": 3.09375,
      "learning_rate": 1.1528176930405955e-05,
      "loss": 0.7952,
      "step": 678360
    },
    {
      "epoch": 2.377517725276612,
      "grad_norm": 2.90625,
      "learning_rate": 1.1527527901742253e-05,
      "loss": 0.8669,
      "step": 678370
    },
    {
      "epoch": 2.377552772783508,
      "grad_norm": 2.953125,
      "learning_rate": 1.1526878873078553e-05,
      "loss": 0.7495,
      "step": 678380
    },
    {
      "epoch": 2.3775878202904037,
      "grad_norm": 2.796875,
      "learning_rate": 1.1526229844414849e-05,
      "loss": 0.7791,
      "step": 678390
    },
    {
      "epoch": 2.3776228677972995,
      "grad_norm": 3.140625,
      "learning_rate": 1.1525580815751147e-05,
      "loss": 0.7266,
      "step": 678400
    },
    {
      "epoch": 2.377657915304195,
      "grad_norm": 2.875,
      "learning_rate": 1.1524931787087445e-05,
      "loss": 0.7783,
      "step": 678410
    },
    {
      "epoch": 2.3776929628110905,
      "grad_norm": 2.921875,
      "learning_rate": 1.1524282758423743e-05,
      "loss": 0.767,
      "step": 678420
    },
    {
      "epoch": 2.377728010317986,
      "grad_norm": 2.578125,
      "learning_rate": 1.1523633729760041e-05,
      "loss": 0.7824,
      "step": 678430
    },
    {
      "epoch": 2.3777630578248816,
      "grad_norm": 3.328125,
      "learning_rate": 1.152298470109634e-05,
      "loss": 0.8515,
      "step": 678440
    },
    {
      "epoch": 2.3777981053317774,
      "grad_norm": 2.796875,
      "learning_rate": 1.1522335672432639e-05,
      "loss": 0.8247,
      "step": 678450
    },
    {
      "epoch": 2.3778331528386727,
      "grad_norm": 3.03125,
      "learning_rate": 1.1521686643768937e-05,
      "loss": 0.8912,
      "step": 678460
    },
    {
      "epoch": 2.3778682003455685,
      "grad_norm": 2.71875,
      "learning_rate": 1.1521037615105235e-05,
      "loss": 0.7709,
      "step": 678470
    },
    {
      "epoch": 2.377903247852464,
      "grad_norm": 2.828125,
      "learning_rate": 1.1520388586441533e-05,
      "loss": 0.8526,
      "step": 678480
    },
    {
      "epoch": 2.3779382953593595,
      "grad_norm": 3.046875,
      "learning_rate": 1.1519739557777829e-05,
      "loss": 0.888,
      "step": 678490
    },
    {
      "epoch": 2.3779733428662553,
      "grad_norm": 3.21875,
      "learning_rate": 1.1519090529114129e-05,
      "loss": 0.8548,
      "step": 678500
    },
    {
      "epoch": 2.378008390373151,
      "grad_norm": 3.015625,
      "learning_rate": 1.1518441500450427e-05,
      "loss": 0.7756,
      "step": 678510
    },
    {
      "epoch": 2.3780434378800464,
      "grad_norm": 2.65625,
      "learning_rate": 1.1517792471786725e-05,
      "loss": 0.8263,
      "step": 678520
    },
    {
      "epoch": 2.378078485386942,
      "grad_norm": 3.34375,
      "learning_rate": 1.1517143443123023e-05,
      "loss": 0.8207,
      "step": 678530
    },
    {
      "epoch": 2.3781135328938374,
      "grad_norm": 2.8125,
      "learning_rate": 1.151649441445932e-05,
      "loss": 0.7943,
      "step": 678540
    },
    {
      "epoch": 2.378148580400733,
      "grad_norm": 2.65625,
      "learning_rate": 1.1515845385795619e-05,
      "loss": 0.8219,
      "step": 678550
    },
    {
      "epoch": 2.378183627907629,
      "grad_norm": 2.484375,
      "learning_rate": 1.1515196357131917e-05,
      "loss": 0.7378,
      "step": 678560
    },
    {
      "epoch": 2.3782186754145243,
      "grad_norm": 2.609375,
      "learning_rate": 1.1514547328468216e-05,
      "loss": 0.8041,
      "step": 678570
    },
    {
      "epoch": 2.37825372292142,
      "grad_norm": 2.75,
      "learning_rate": 1.1513898299804513e-05,
      "loss": 0.8048,
      "step": 678580
    },
    {
      "epoch": 2.3782887704283158,
      "grad_norm": 2.875,
      "learning_rate": 1.151324927114081e-05,
      "loss": 0.8117,
      "step": 678590
    },
    {
      "epoch": 2.378323817935211,
      "grad_norm": 2.84375,
      "learning_rate": 1.1512600242477109e-05,
      "loss": 0.7494,
      "step": 678600
    },
    {
      "epoch": 2.378358865442107,
      "grad_norm": 2.328125,
      "learning_rate": 1.1511951213813407e-05,
      "loss": 0.723,
      "step": 678610
    },
    {
      "epoch": 2.3783939129490026,
      "grad_norm": 2.6875,
      "learning_rate": 1.1511302185149706e-05,
      "loss": 0.7352,
      "step": 678620
    },
    {
      "epoch": 2.378428960455898,
      "grad_norm": 2.8125,
      "learning_rate": 1.1510653156486004e-05,
      "loss": 0.8568,
      "step": 678630
    },
    {
      "epoch": 2.3784640079627937,
      "grad_norm": 2.71875,
      "learning_rate": 1.1510004127822302e-05,
      "loss": 0.7853,
      "step": 678640
    },
    {
      "epoch": 2.378499055469689,
      "grad_norm": 3.109375,
      "learning_rate": 1.15093550991586e-05,
      "loss": 0.8726,
      "step": 678650
    },
    {
      "epoch": 2.3785341029765847,
      "grad_norm": 2.765625,
      "learning_rate": 1.1508706070494898e-05,
      "loss": 0.8415,
      "step": 678660
    },
    {
      "epoch": 2.3785691504834805,
      "grad_norm": 2.9375,
      "learning_rate": 1.1508057041831195e-05,
      "loss": 0.8471,
      "step": 678670
    },
    {
      "epoch": 2.378604197990376,
      "grad_norm": 2.703125,
      "learning_rate": 1.1507408013167494e-05,
      "loss": 0.934,
      "step": 678680
    },
    {
      "epoch": 2.3786392454972716,
      "grad_norm": 3.171875,
      "learning_rate": 1.1506758984503792e-05,
      "loss": 0.8317,
      "step": 678690
    },
    {
      "epoch": 2.3786742930041673,
      "grad_norm": 2.5625,
      "learning_rate": 1.150610995584009e-05,
      "loss": 0.872,
      "step": 678700
    },
    {
      "epoch": 2.3787093405110626,
      "grad_norm": 3.46875,
      "learning_rate": 1.1505460927176388e-05,
      "loss": 0.8531,
      "step": 678710
    },
    {
      "epoch": 2.3787443880179584,
      "grad_norm": 2.71875,
      "learning_rate": 1.1504811898512686e-05,
      "loss": 0.8004,
      "step": 678720
    },
    {
      "epoch": 2.378779435524854,
      "grad_norm": 3.015625,
      "learning_rate": 1.1504162869848984e-05,
      "loss": 0.89,
      "step": 678730
    },
    {
      "epoch": 2.3788144830317495,
      "grad_norm": 2.71875,
      "learning_rate": 1.1503513841185282e-05,
      "loss": 0.7997,
      "step": 678740
    },
    {
      "epoch": 2.3788495305386452,
      "grad_norm": 2.8125,
      "learning_rate": 1.1502864812521582e-05,
      "loss": 0.8703,
      "step": 678750
    },
    {
      "epoch": 2.3788845780455405,
      "grad_norm": 3.015625,
      "learning_rate": 1.150221578385788e-05,
      "loss": 0.8486,
      "step": 678760
    },
    {
      "epoch": 2.3789196255524363,
      "grad_norm": 2.65625,
      "learning_rate": 1.1501566755194176e-05,
      "loss": 0.7542,
      "step": 678770
    },
    {
      "epoch": 2.378954673059332,
      "grad_norm": 3.421875,
      "learning_rate": 1.1500917726530474e-05,
      "loss": 0.8813,
      "step": 678780
    },
    {
      "epoch": 2.3789897205662274,
      "grad_norm": 2.84375,
      "learning_rate": 1.1500268697866772e-05,
      "loss": 0.8365,
      "step": 678790
    },
    {
      "epoch": 2.379024768073123,
      "grad_norm": 2.859375,
      "learning_rate": 1.149961966920307e-05,
      "loss": 0.8382,
      "step": 678800
    },
    {
      "epoch": 2.379059815580019,
      "grad_norm": 3.03125,
      "learning_rate": 1.149897064053937e-05,
      "loss": 0.8374,
      "step": 678810
    },
    {
      "epoch": 2.379094863086914,
      "grad_norm": 2.921875,
      "learning_rate": 1.1498321611875668e-05,
      "loss": 0.7798,
      "step": 678820
    },
    {
      "epoch": 2.37912991059381,
      "grad_norm": 2.578125,
      "learning_rate": 1.1497672583211966e-05,
      "loss": 0.7876,
      "step": 678830
    },
    {
      "epoch": 2.3791649581007057,
      "grad_norm": 2.78125,
      "learning_rate": 1.1497023554548264e-05,
      "loss": 0.7417,
      "step": 678840
    },
    {
      "epoch": 2.379200005607601,
      "grad_norm": 2.984375,
      "learning_rate": 1.1496374525884562e-05,
      "loss": 0.8159,
      "step": 678850
    },
    {
      "epoch": 2.379235053114497,
      "grad_norm": 3.015625,
      "learning_rate": 1.149572549722086e-05,
      "loss": 0.8056,
      "step": 678860
    },
    {
      "epoch": 2.379270100621392,
      "grad_norm": 2.828125,
      "learning_rate": 1.1495076468557158e-05,
      "loss": 0.858,
      "step": 678870
    },
    {
      "epoch": 2.379305148128288,
      "grad_norm": 2.703125,
      "learning_rate": 1.1494427439893456e-05,
      "loss": 0.7806,
      "step": 678880
    },
    {
      "epoch": 2.3793401956351836,
      "grad_norm": 2.9375,
      "learning_rate": 1.1493778411229754e-05,
      "loss": 0.9041,
      "step": 678890
    },
    {
      "epoch": 2.379375243142079,
      "grad_norm": 2.890625,
      "learning_rate": 1.1493129382566052e-05,
      "loss": 0.7647,
      "step": 678900
    },
    {
      "epoch": 2.3794102906489747,
      "grad_norm": 3.0625,
      "learning_rate": 1.149248035390235e-05,
      "loss": 0.8047,
      "step": 678910
    },
    {
      "epoch": 2.3794453381558704,
      "grad_norm": 3.0625,
      "learning_rate": 1.1491831325238648e-05,
      "loss": 0.8701,
      "step": 678920
    },
    {
      "epoch": 2.3794803856627658,
      "grad_norm": 3.0,
      "learning_rate": 1.1491182296574948e-05,
      "loss": 0.8727,
      "step": 678930
    },
    {
      "epoch": 2.3795154331696615,
      "grad_norm": 2.8125,
      "learning_rate": 1.1490533267911246e-05,
      "loss": 0.8073,
      "step": 678940
    },
    {
      "epoch": 2.3795504806765573,
      "grad_norm": 2.609375,
      "learning_rate": 1.1489884239247544e-05,
      "loss": 0.8022,
      "step": 678950
    },
    {
      "epoch": 2.3795855281834526,
      "grad_norm": 2.71875,
      "learning_rate": 1.148923521058384e-05,
      "loss": 0.7747,
      "step": 678960
    },
    {
      "epoch": 2.3796205756903483,
      "grad_norm": 3.046875,
      "learning_rate": 1.1488586181920138e-05,
      "loss": 0.8938,
      "step": 678970
    },
    {
      "epoch": 2.3796556231972437,
      "grad_norm": 2.390625,
      "learning_rate": 1.1487937153256436e-05,
      "loss": 0.8066,
      "step": 678980
    },
    {
      "epoch": 2.3796906707041394,
      "grad_norm": 3.0,
      "learning_rate": 1.1487288124592736e-05,
      "loss": 0.8344,
      "step": 678990
    },
    {
      "epoch": 2.379725718211035,
      "grad_norm": 2.796875,
      "learning_rate": 1.1486639095929034e-05,
      "loss": 0.7777,
      "step": 679000
    },
    {
      "epoch": 2.3797607657179305,
      "grad_norm": 2.9375,
      "learning_rate": 1.1485990067265332e-05,
      "loss": 0.874,
      "step": 679010
    },
    {
      "epoch": 2.3797958132248263,
      "grad_norm": 2.921875,
      "learning_rate": 1.148534103860163e-05,
      "loss": 0.8468,
      "step": 679020
    },
    {
      "epoch": 2.379830860731722,
      "grad_norm": 3.109375,
      "learning_rate": 1.1484692009937928e-05,
      "loss": 0.8708,
      "step": 679030
    },
    {
      "epoch": 2.3798659082386173,
      "grad_norm": 3.015625,
      "learning_rate": 1.1484042981274226e-05,
      "loss": 0.8009,
      "step": 679040
    },
    {
      "epoch": 2.379900955745513,
      "grad_norm": 2.5625,
      "learning_rate": 1.1483393952610524e-05,
      "loss": 0.7874,
      "step": 679050
    },
    {
      "epoch": 2.379936003252409,
      "grad_norm": 2.96875,
      "learning_rate": 1.1482744923946822e-05,
      "loss": 0.8052,
      "step": 679060
    },
    {
      "epoch": 2.379971050759304,
      "grad_norm": 2.6875,
      "learning_rate": 1.148209589528312e-05,
      "loss": 0.8213,
      "step": 679070
    },
    {
      "epoch": 2.3800060982662,
      "grad_norm": 2.53125,
      "learning_rate": 1.1481446866619418e-05,
      "loss": 0.7374,
      "step": 679080
    },
    {
      "epoch": 2.3800411457730952,
      "grad_norm": 2.859375,
      "learning_rate": 1.1480797837955716e-05,
      "loss": 0.7615,
      "step": 679090
    },
    {
      "epoch": 2.380076193279991,
      "grad_norm": 2.3125,
      "learning_rate": 1.1480148809292014e-05,
      "loss": 0.6856,
      "step": 679100
    },
    {
      "epoch": 2.3801112407868867,
      "grad_norm": 3.28125,
      "learning_rate": 1.1479499780628312e-05,
      "loss": 0.8214,
      "step": 679110
    },
    {
      "epoch": 2.380146288293782,
      "grad_norm": 2.203125,
      "learning_rate": 1.1478850751964611e-05,
      "loss": 0.8412,
      "step": 679120
    },
    {
      "epoch": 2.380181335800678,
      "grad_norm": 2.90625,
      "learning_rate": 1.147820172330091e-05,
      "loss": 0.8046,
      "step": 679130
    },
    {
      "epoch": 2.3802163833075736,
      "grad_norm": 2.75,
      "learning_rate": 1.1477552694637206e-05,
      "loss": 0.8307,
      "step": 679140
    },
    {
      "epoch": 2.380251430814469,
      "grad_norm": 5.46875,
      "learning_rate": 1.1476903665973504e-05,
      "loss": 0.7917,
      "step": 679150
    },
    {
      "epoch": 2.3802864783213646,
      "grad_norm": 3.59375,
      "learning_rate": 1.1476254637309802e-05,
      "loss": 0.8007,
      "step": 679160
    },
    {
      "epoch": 2.3803215258282604,
      "grad_norm": 3.671875,
      "learning_rate": 1.1475605608646101e-05,
      "loss": 0.8608,
      "step": 679170
    },
    {
      "epoch": 2.3803565733351557,
      "grad_norm": 2.78125,
      "learning_rate": 1.14749565799824e-05,
      "loss": 0.7768,
      "step": 679180
    },
    {
      "epoch": 2.3803916208420515,
      "grad_norm": 2.84375,
      "learning_rate": 1.1474307551318697e-05,
      "loss": 0.8687,
      "step": 679190
    },
    {
      "epoch": 2.380426668348947,
      "grad_norm": 3.328125,
      "learning_rate": 1.1473658522654995e-05,
      "loss": 0.8489,
      "step": 679200
    },
    {
      "epoch": 2.3804617158558425,
      "grad_norm": 3.140625,
      "learning_rate": 1.1473009493991293e-05,
      "loss": 0.8565,
      "step": 679210
    },
    {
      "epoch": 2.3804967633627383,
      "grad_norm": 3.0,
      "learning_rate": 1.1472360465327591e-05,
      "loss": 0.7362,
      "step": 679220
    },
    {
      "epoch": 2.3805318108696336,
      "grad_norm": 2.8125,
      "learning_rate": 1.147171143666389e-05,
      "loss": 0.8527,
      "step": 679230
    },
    {
      "epoch": 2.3805668583765294,
      "grad_norm": 3.296875,
      "learning_rate": 1.1471062408000187e-05,
      "loss": 0.8347,
      "step": 679240
    },
    {
      "epoch": 2.380601905883425,
      "grad_norm": 3.390625,
      "learning_rate": 1.1470413379336485e-05,
      "loss": 0.8642,
      "step": 679250
    },
    {
      "epoch": 2.3806369533903204,
      "grad_norm": 2.859375,
      "learning_rate": 1.1469764350672783e-05,
      "loss": 0.7742,
      "step": 679260
    },
    {
      "epoch": 2.380672000897216,
      "grad_norm": 2.84375,
      "learning_rate": 1.1469115322009081e-05,
      "loss": 0.7493,
      "step": 679270
    },
    {
      "epoch": 2.380707048404112,
      "grad_norm": 2.71875,
      "learning_rate": 1.146846629334538e-05,
      "loss": 0.8362,
      "step": 679280
    },
    {
      "epoch": 2.3807420959110073,
      "grad_norm": 2.40625,
      "learning_rate": 1.1467817264681677e-05,
      "loss": 0.808,
      "step": 679290
    },
    {
      "epoch": 2.380777143417903,
      "grad_norm": 2.84375,
      "learning_rate": 1.1467168236017977e-05,
      "loss": 0.8488,
      "step": 679300
    },
    {
      "epoch": 2.3808121909247983,
      "grad_norm": 2.671875,
      "learning_rate": 1.1466519207354275e-05,
      "loss": 0.7919,
      "step": 679310
    },
    {
      "epoch": 2.380847238431694,
      "grad_norm": 3.28125,
      "learning_rate": 1.1465870178690573e-05,
      "loss": 0.9234,
      "step": 679320
    },
    {
      "epoch": 2.38088228593859,
      "grad_norm": 3.171875,
      "learning_rate": 1.146522115002687e-05,
      "loss": 0.8518,
      "step": 679330
    },
    {
      "epoch": 2.380917333445485,
      "grad_norm": 2.765625,
      "learning_rate": 1.1464572121363167e-05,
      "loss": 0.8218,
      "step": 679340
    },
    {
      "epoch": 2.380952380952381,
      "grad_norm": 3.1875,
      "learning_rate": 1.1463923092699465e-05,
      "loss": 0.8773,
      "step": 679350
    },
    {
      "epoch": 2.3809874284592767,
      "grad_norm": 2.921875,
      "learning_rate": 1.1463274064035765e-05,
      "loss": 0.8222,
      "step": 679360
    },
    {
      "epoch": 2.381022475966172,
      "grad_norm": 2.953125,
      "learning_rate": 1.1462625035372063e-05,
      "loss": 0.7682,
      "step": 679370
    },
    {
      "epoch": 2.3810575234730678,
      "grad_norm": 2.9375,
      "learning_rate": 1.1461976006708361e-05,
      "loss": 0.8368,
      "step": 679380
    },
    {
      "epoch": 2.3810925709799635,
      "grad_norm": 2.90625,
      "learning_rate": 1.1461326978044659e-05,
      "loss": 0.8152,
      "step": 679390
    },
    {
      "epoch": 2.381127618486859,
      "grad_norm": 3.46875,
      "learning_rate": 1.1460677949380957e-05,
      "loss": 0.8345,
      "step": 679400
    },
    {
      "epoch": 2.3811626659937546,
      "grad_norm": 3.4375,
      "learning_rate": 1.1460028920717255e-05,
      "loss": 0.7967,
      "step": 679410
    },
    {
      "epoch": 2.38119771350065,
      "grad_norm": 2.5625,
      "learning_rate": 1.1459379892053553e-05,
      "loss": 0.8027,
      "step": 679420
    },
    {
      "epoch": 2.3812327610075457,
      "grad_norm": 2.9375,
      "learning_rate": 1.1458730863389851e-05,
      "loss": 0.8411,
      "step": 679430
    },
    {
      "epoch": 2.3812678085144414,
      "grad_norm": 3.578125,
      "learning_rate": 1.1458081834726149e-05,
      "loss": 0.9035,
      "step": 679440
    },
    {
      "epoch": 2.3813028560213367,
      "grad_norm": 2.828125,
      "learning_rate": 1.1457432806062447e-05,
      "loss": 0.8198,
      "step": 679450
    },
    {
      "epoch": 2.3813379035282325,
      "grad_norm": 3.515625,
      "learning_rate": 1.1456783777398745e-05,
      "loss": 0.8135,
      "step": 679460
    },
    {
      "epoch": 2.3813729510351282,
      "grad_norm": 2.71875,
      "learning_rate": 1.1456134748735043e-05,
      "loss": 0.7837,
      "step": 679470
    },
    {
      "epoch": 2.3814079985420236,
      "grad_norm": 2.84375,
      "learning_rate": 1.1455485720071343e-05,
      "loss": 0.7975,
      "step": 679480
    },
    {
      "epoch": 2.3814430460489193,
      "grad_norm": 2.90625,
      "learning_rate": 1.145483669140764e-05,
      "loss": 0.7826,
      "step": 679490
    },
    {
      "epoch": 2.381478093555815,
      "grad_norm": 2.5625,
      "learning_rate": 1.1454187662743939e-05,
      "loss": 0.7371,
      "step": 679500
    },
    {
      "epoch": 2.3815131410627104,
      "grad_norm": 2.328125,
      "learning_rate": 1.1453538634080237e-05,
      "loss": 0.822,
      "step": 679510
    },
    {
      "epoch": 2.381548188569606,
      "grad_norm": 2.53125,
      "learning_rate": 1.1452889605416533e-05,
      "loss": 0.7002,
      "step": 679520
    },
    {
      "epoch": 2.3815832360765015,
      "grad_norm": 2.421875,
      "learning_rate": 1.1452240576752831e-05,
      "loss": 0.7379,
      "step": 679530
    },
    {
      "epoch": 2.381618283583397,
      "grad_norm": 3.171875,
      "learning_rate": 1.145159154808913e-05,
      "loss": 0.8099,
      "step": 679540
    },
    {
      "epoch": 2.381653331090293,
      "grad_norm": 2.5625,
      "learning_rate": 1.1450942519425429e-05,
      "loss": 0.7723,
      "step": 679550
    },
    {
      "epoch": 2.3816883785971887,
      "grad_norm": 3.640625,
      "learning_rate": 1.1450293490761727e-05,
      "loss": 0.8048,
      "step": 679560
    },
    {
      "epoch": 2.381723426104084,
      "grad_norm": 2.859375,
      "learning_rate": 1.1449644462098025e-05,
      "loss": 0.6971,
      "step": 679570
    },
    {
      "epoch": 2.38175847361098,
      "grad_norm": 3.234375,
      "learning_rate": 1.1448995433434323e-05,
      "loss": 0.7845,
      "step": 679580
    },
    {
      "epoch": 2.381793521117875,
      "grad_norm": 3.3125,
      "learning_rate": 1.144834640477062e-05,
      "loss": 0.8448,
      "step": 679590
    },
    {
      "epoch": 2.381828568624771,
      "grad_norm": 2.8125,
      "learning_rate": 1.1447697376106919e-05,
      "loss": 0.8581,
      "step": 679600
    },
    {
      "epoch": 2.3818636161316666,
      "grad_norm": 2.25,
      "learning_rate": 1.1447048347443217e-05,
      "loss": 0.7744,
      "step": 679610
    },
    {
      "epoch": 2.381898663638562,
      "grad_norm": 2.8125,
      "learning_rate": 1.1446399318779515e-05,
      "loss": 0.8531,
      "step": 679620
    },
    {
      "epoch": 2.3819337111454577,
      "grad_norm": 2.984375,
      "learning_rate": 1.1445750290115813e-05,
      "loss": 0.7798,
      "step": 679630
    },
    {
      "epoch": 2.381968758652353,
      "grad_norm": 2.875,
      "learning_rate": 1.144510126145211e-05,
      "loss": 0.793,
      "step": 679640
    },
    {
      "epoch": 2.382003806159249,
      "grad_norm": 2.546875,
      "learning_rate": 1.1444452232788409e-05,
      "loss": 0.7951,
      "step": 679650
    },
    {
      "epoch": 2.3820388536661445,
      "grad_norm": 2.734375,
      "learning_rate": 1.1443803204124707e-05,
      "loss": 0.7975,
      "step": 679660
    },
    {
      "epoch": 2.3820739011730403,
      "grad_norm": 3.03125,
      "learning_rate": 1.1443154175461006e-05,
      "loss": 0.7808,
      "step": 679670
    },
    {
      "epoch": 2.3821089486799356,
      "grad_norm": 2.65625,
      "learning_rate": 1.1442505146797304e-05,
      "loss": 0.7032,
      "step": 679680
    },
    {
      "epoch": 2.3821439961868314,
      "grad_norm": 2.71875,
      "learning_rate": 1.1441856118133602e-05,
      "loss": 0.7362,
      "step": 679690
    },
    {
      "epoch": 2.3821790436937267,
      "grad_norm": 3.765625,
      "learning_rate": 1.14412070894699e-05,
      "loss": 0.9062,
      "step": 679700
    },
    {
      "epoch": 2.3822140912006224,
      "grad_norm": 2.9375,
      "learning_rate": 1.1440558060806197e-05,
      "loss": 0.7746,
      "step": 679710
    },
    {
      "epoch": 2.382249138707518,
      "grad_norm": 2.59375,
      "learning_rate": 1.1439909032142496e-05,
      "loss": 0.7835,
      "step": 679720
    },
    {
      "epoch": 2.3822841862144135,
      "grad_norm": 3.140625,
      "learning_rate": 1.1439260003478794e-05,
      "loss": 0.7583,
      "step": 679730
    },
    {
      "epoch": 2.3823192337213093,
      "grad_norm": 2.53125,
      "learning_rate": 1.1438610974815092e-05,
      "loss": 0.8192,
      "step": 679740
    },
    {
      "epoch": 2.3823542812282046,
      "grad_norm": 2.65625,
      "learning_rate": 1.143796194615139e-05,
      "loss": 0.6994,
      "step": 679750
    },
    {
      "epoch": 2.3823893287351003,
      "grad_norm": 2.875,
      "learning_rate": 1.1437312917487688e-05,
      "loss": 0.8169,
      "step": 679760
    },
    {
      "epoch": 2.382424376241996,
      "grad_norm": 3.09375,
      "learning_rate": 1.1436663888823986e-05,
      "loss": 0.867,
      "step": 679770
    },
    {
      "epoch": 2.382459423748892,
      "grad_norm": 3.09375,
      "learning_rate": 1.1436014860160284e-05,
      "loss": 0.8397,
      "step": 679780
    },
    {
      "epoch": 2.382494471255787,
      "grad_norm": 3.015625,
      "learning_rate": 1.1435365831496584e-05,
      "loss": 0.7927,
      "step": 679790
    },
    {
      "epoch": 2.382529518762683,
      "grad_norm": 2.765625,
      "learning_rate": 1.143471680283288e-05,
      "loss": 0.7526,
      "step": 679800
    },
    {
      "epoch": 2.3825645662695782,
      "grad_norm": 3.078125,
      "learning_rate": 1.1434067774169178e-05,
      "loss": 0.7928,
      "step": 679810
    },
    {
      "epoch": 2.382599613776474,
      "grad_norm": 3.21875,
      "learning_rate": 1.1433418745505476e-05,
      "loss": 0.7777,
      "step": 679820
    },
    {
      "epoch": 2.3826346612833698,
      "grad_norm": 2.84375,
      "learning_rate": 1.1432769716841774e-05,
      "loss": 0.8061,
      "step": 679830
    },
    {
      "epoch": 2.382669708790265,
      "grad_norm": 2.890625,
      "learning_rate": 1.1432120688178072e-05,
      "loss": 0.7426,
      "step": 679840
    },
    {
      "epoch": 2.382704756297161,
      "grad_norm": 2.640625,
      "learning_rate": 1.1431471659514372e-05,
      "loss": 0.7834,
      "step": 679850
    },
    {
      "epoch": 2.3827398038040566,
      "grad_norm": 2.453125,
      "learning_rate": 1.143082263085067e-05,
      "loss": 0.7967,
      "step": 679860
    },
    {
      "epoch": 2.382774851310952,
      "grad_norm": 2.40625,
      "learning_rate": 1.1430173602186968e-05,
      "loss": 0.7768,
      "step": 679870
    },
    {
      "epoch": 2.3828098988178477,
      "grad_norm": 3.15625,
      "learning_rate": 1.1429524573523266e-05,
      "loss": 0.8064,
      "step": 679880
    },
    {
      "epoch": 2.3828449463247434,
      "grad_norm": 3.3125,
      "learning_rate": 1.1428875544859564e-05,
      "loss": 0.8514,
      "step": 679890
    },
    {
      "epoch": 2.3828799938316387,
      "grad_norm": 2.59375,
      "learning_rate": 1.142822651619586e-05,
      "loss": 0.7516,
      "step": 679900
    },
    {
      "epoch": 2.3829150413385345,
      "grad_norm": 2.125,
      "learning_rate": 1.142757748753216e-05,
      "loss": 0.79,
      "step": 679910
    },
    {
      "epoch": 2.38295008884543,
      "grad_norm": 2.734375,
      "learning_rate": 1.1426928458868458e-05,
      "loss": 0.8863,
      "step": 679920
    },
    {
      "epoch": 2.3829851363523256,
      "grad_norm": 3.046875,
      "learning_rate": 1.1426279430204756e-05,
      "loss": 0.7888,
      "step": 679930
    },
    {
      "epoch": 2.3830201838592213,
      "grad_norm": 3.1875,
      "learning_rate": 1.1425630401541054e-05,
      "loss": 0.8156,
      "step": 679940
    },
    {
      "epoch": 2.3830552313661166,
      "grad_norm": 2.78125,
      "learning_rate": 1.1424981372877352e-05,
      "loss": 0.8056,
      "step": 679950
    },
    {
      "epoch": 2.3830902788730124,
      "grad_norm": 2.484375,
      "learning_rate": 1.142433234421365e-05,
      "loss": 0.8076,
      "step": 679960
    },
    {
      "epoch": 2.383125326379908,
      "grad_norm": 2.65625,
      "learning_rate": 1.1423683315549948e-05,
      "loss": 0.7232,
      "step": 679970
    },
    {
      "epoch": 2.3831603738868035,
      "grad_norm": 2.828125,
      "learning_rate": 1.1423034286886248e-05,
      "loss": 0.8206,
      "step": 679980
    },
    {
      "epoch": 2.383195421393699,
      "grad_norm": 2.96875,
      "learning_rate": 1.1422385258222544e-05,
      "loss": 0.7745,
      "step": 679990
    },
    {
      "epoch": 2.383230468900595,
      "grad_norm": 2.921875,
      "learning_rate": 1.1421736229558842e-05,
      "loss": 0.7948,
      "step": 680000
    },
    {
      "epoch": 2.383230468900595,
      "eval_loss": 0.7601303458213806,
      "eval_runtime": 551.507,
      "eval_samples_per_second": 689.812,
      "eval_steps_per_second": 57.484,
      "step": 680000
    },
    {
      "epoch": 2.3832655164074903,
      "grad_norm": 2.375,
      "learning_rate": 1.142108720089514e-05,
      "loss": 0.7806,
      "step": 680010
    },
    {
      "epoch": 2.383300563914386,
      "grad_norm": 3.265625,
      "learning_rate": 1.1420438172231438e-05,
      "loss": 0.8661,
      "step": 680020
    },
    {
      "epoch": 2.3833356114212814,
      "grad_norm": 2.734375,
      "learning_rate": 1.1419789143567738e-05,
      "loss": 0.7898,
      "step": 680030
    },
    {
      "epoch": 2.383370658928177,
      "grad_norm": 2.890625,
      "learning_rate": 1.1419140114904036e-05,
      "loss": 0.8549,
      "step": 680040
    },
    {
      "epoch": 2.383405706435073,
      "grad_norm": 2.921875,
      "learning_rate": 1.1418491086240334e-05,
      "loss": 0.7949,
      "step": 680050
    },
    {
      "epoch": 2.383440753941968,
      "grad_norm": 3.53125,
      "learning_rate": 1.1417842057576632e-05,
      "loss": 0.7255,
      "step": 680060
    },
    {
      "epoch": 2.383475801448864,
      "grad_norm": 2.71875,
      "learning_rate": 1.141719302891293e-05,
      "loss": 0.8126,
      "step": 680070
    },
    {
      "epoch": 2.3835108489557597,
      "grad_norm": 2.40625,
      "learning_rate": 1.1416544000249226e-05,
      "loss": 0.7933,
      "step": 680080
    },
    {
      "epoch": 2.383545896462655,
      "grad_norm": 2.9375,
      "learning_rate": 1.1415894971585526e-05,
      "loss": 0.8958,
      "step": 680090
    },
    {
      "epoch": 2.383580943969551,
      "grad_norm": 2.203125,
      "learning_rate": 1.1415245942921824e-05,
      "loss": 0.8434,
      "step": 680100
    },
    {
      "epoch": 2.3836159914764465,
      "grad_norm": 3.21875,
      "learning_rate": 1.1414596914258122e-05,
      "loss": 0.803,
      "step": 680110
    },
    {
      "epoch": 2.383651038983342,
      "grad_norm": 2.734375,
      "learning_rate": 1.141394788559442e-05,
      "loss": 0.754,
      "step": 680120
    },
    {
      "epoch": 2.3836860864902376,
      "grad_norm": 3.40625,
      "learning_rate": 1.1413298856930718e-05,
      "loss": 0.7687,
      "step": 680130
    },
    {
      "epoch": 2.383721133997133,
      "grad_norm": 2.578125,
      "learning_rate": 1.1412649828267016e-05,
      "loss": 0.8776,
      "step": 680140
    },
    {
      "epoch": 2.3837561815040287,
      "grad_norm": 2.390625,
      "learning_rate": 1.1412000799603314e-05,
      "loss": 0.7669,
      "step": 680150
    },
    {
      "epoch": 2.3837912290109244,
      "grad_norm": 3.296875,
      "learning_rate": 1.1411351770939614e-05,
      "loss": 0.8572,
      "step": 680160
    },
    {
      "epoch": 2.3838262765178198,
      "grad_norm": 2.90625,
      "learning_rate": 1.1410702742275912e-05,
      "loss": 0.854,
      "step": 680170
    },
    {
      "epoch": 2.3838613240247155,
      "grad_norm": 2.71875,
      "learning_rate": 1.1410053713612208e-05,
      "loss": 0.7483,
      "step": 680180
    },
    {
      "epoch": 2.3838963715316113,
      "grad_norm": 3.09375,
      "learning_rate": 1.1409404684948506e-05,
      "loss": 0.9369,
      "step": 680190
    },
    {
      "epoch": 2.3839314190385066,
      "grad_norm": 2.84375,
      "learning_rate": 1.1408755656284804e-05,
      "loss": 0.8113,
      "step": 680200
    },
    {
      "epoch": 2.3839664665454023,
      "grad_norm": 3.078125,
      "learning_rate": 1.1408106627621102e-05,
      "loss": 0.8134,
      "step": 680210
    },
    {
      "epoch": 2.384001514052298,
      "grad_norm": 2.703125,
      "learning_rate": 1.1407457598957402e-05,
      "loss": 0.7747,
      "step": 680220
    },
    {
      "epoch": 2.3840365615591934,
      "grad_norm": 2.984375,
      "learning_rate": 1.14068085702937e-05,
      "loss": 0.8561,
      "step": 680230
    },
    {
      "epoch": 2.384071609066089,
      "grad_norm": 3.171875,
      "learning_rate": 1.1406159541629998e-05,
      "loss": 0.8225,
      "step": 680240
    },
    {
      "epoch": 2.3841066565729845,
      "grad_norm": 2.90625,
      "learning_rate": 1.1405510512966296e-05,
      "loss": 0.7312,
      "step": 680250
    },
    {
      "epoch": 2.3841417040798802,
      "grad_norm": 2.671875,
      "learning_rate": 1.1404861484302594e-05,
      "loss": 0.8336,
      "step": 680260
    },
    {
      "epoch": 2.384176751586776,
      "grad_norm": 3.1875,
      "learning_rate": 1.1404212455638892e-05,
      "loss": 0.8358,
      "step": 680270
    },
    {
      "epoch": 2.3842117990936713,
      "grad_norm": 2.90625,
      "learning_rate": 1.140356342697519e-05,
      "loss": 0.8646,
      "step": 680280
    },
    {
      "epoch": 2.384246846600567,
      "grad_norm": 2.4375,
      "learning_rate": 1.1402914398311488e-05,
      "loss": 0.7035,
      "step": 680290
    },
    {
      "epoch": 2.384281894107463,
      "grad_norm": 3.171875,
      "learning_rate": 1.1402265369647786e-05,
      "loss": 0.7063,
      "step": 680300
    },
    {
      "epoch": 2.384316941614358,
      "grad_norm": 3.203125,
      "learning_rate": 1.1401616340984084e-05,
      "loss": 0.8639,
      "step": 680310
    },
    {
      "epoch": 2.384351989121254,
      "grad_norm": 2.671875,
      "learning_rate": 1.1400967312320382e-05,
      "loss": 0.8277,
      "step": 680320
    },
    {
      "epoch": 2.3843870366281497,
      "grad_norm": 3.234375,
      "learning_rate": 1.140031828365668e-05,
      "loss": 0.8587,
      "step": 680330
    },
    {
      "epoch": 2.384422084135045,
      "grad_norm": 3.1875,
      "learning_rate": 1.139966925499298e-05,
      "loss": 0.8095,
      "step": 680340
    },
    {
      "epoch": 2.3844571316419407,
      "grad_norm": 3.4375,
      "learning_rate": 1.1399020226329277e-05,
      "loss": 0.8261,
      "step": 680350
    },
    {
      "epoch": 2.384492179148836,
      "grad_norm": 2.953125,
      "learning_rate": 1.1398371197665575e-05,
      "loss": 0.8857,
      "step": 680360
    },
    {
      "epoch": 2.384527226655732,
      "grad_norm": 2.828125,
      "learning_rate": 1.1397722169001872e-05,
      "loss": 0.8567,
      "step": 680370
    },
    {
      "epoch": 2.3845622741626276,
      "grad_norm": 2.84375,
      "learning_rate": 1.139707314033817e-05,
      "loss": 0.7828,
      "step": 680380
    },
    {
      "epoch": 2.384597321669523,
      "grad_norm": 3.03125,
      "learning_rate": 1.1396424111674468e-05,
      "loss": 0.842,
      "step": 680390
    },
    {
      "epoch": 2.3846323691764186,
      "grad_norm": 2.53125,
      "learning_rate": 1.1395775083010767e-05,
      "loss": 0.7896,
      "step": 680400
    },
    {
      "epoch": 2.3846674166833144,
      "grad_norm": 2.578125,
      "learning_rate": 1.1395126054347065e-05,
      "loss": 0.7632,
      "step": 680410
    },
    {
      "epoch": 2.3847024641902097,
      "grad_norm": 2.78125,
      "learning_rate": 1.1394477025683363e-05,
      "loss": 0.7364,
      "step": 680420
    },
    {
      "epoch": 2.3847375116971055,
      "grad_norm": 3.0,
      "learning_rate": 1.1393827997019661e-05,
      "loss": 0.8261,
      "step": 680430
    },
    {
      "epoch": 2.384772559204001,
      "grad_norm": 3.3125,
      "learning_rate": 1.139317896835596e-05,
      "loss": 0.8305,
      "step": 680440
    },
    {
      "epoch": 2.3848076067108965,
      "grad_norm": 3.171875,
      "learning_rate": 1.1392529939692257e-05,
      "loss": 0.7923,
      "step": 680450
    },
    {
      "epoch": 2.3848426542177923,
      "grad_norm": 2.984375,
      "learning_rate": 1.1391880911028555e-05,
      "loss": 0.8776,
      "step": 680460
    },
    {
      "epoch": 2.3848777017246876,
      "grad_norm": 2.859375,
      "learning_rate": 1.1391231882364853e-05,
      "loss": 0.8164,
      "step": 680470
    },
    {
      "epoch": 2.3849127492315834,
      "grad_norm": 2.5625,
      "learning_rate": 1.1390582853701151e-05,
      "loss": 0.7556,
      "step": 680480
    },
    {
      "epoch": 2.384947796738479,
      "grad_norm": 2.625,
      "learning_rate": 1.138993382503745e-05,
      "loss": 0.8452,
      "step": 680490
    },
    {
      "epoch": 2.3849828442453744,
      "grad_norm": 3.234375,
      "learning_rate": 1.1389284796373747e-05,
      "loss": 0.8394,
      "step": 680500
    },
    {
      "epoch": 2.38501789175227,
      "grad_norm": 2.21875,
      "learning_rate": 1.1388635767710045e-05,
      "loss": 0.7851,
      "step": 680510
    },
    {
      "epoch": 2.385052939259166,
      "grad_norm": 2.53125,
      "learning_rate": 1.1387986739046343e-05,
      "loss": 0.8759,
      "step": 680520
    },
    {
      "epoch": 2.3850879867660613,
      "grad_norm": 2.6875,
      "learning_rate": 1.1387337710382643e-05,
      "loss": 0.8203,
      "step": 680530
    },
    {
      "epoch": 2.385123034272957,
      "grad_norm": 2.640625,
      "learning_rate": 1.1386688681718941e-05,
      "loss": 0.8097,
      "step": 680540
    },
    {
      "epoch": 2.385158081779853,
      "grad_norm": 2.609375,
      "learning_rate": 1.1386039653055237e-05,
      "loss": 0.8656,
      "step": 680550
    },
    {
      "epoch": 2.385193129286748,
      "grad_norm": 2.9375,
      "learning_rate": 1.1385390624391535e-05,
      "loss": 0.8034,
      "step": 680560
    },
    {
      "epoch": 2.385228176793644,
      "grad_norm": 2.859375,
      "learning_rate": 1.1384741595727833e-05,
      "loss": 0.8163,
      "step": 680570
    },
    {
      "epoch": 2.385263224300539,
      "grad_norm": 3.546875,
      "learning_rate": 1.1384092567064133e-05,
      "loss": 0.9037,
      "step": 680580
    },
    {
      "epoch": 2.385298271807435,
      "grad_norm": 3.078125,
      "learning_rate": 1.1383443538400431e-05,
      "loss": 0.7579,
      "step": 680590
    },
    {
      "epoch": 2.3853333193143307,
      "grad_norm": 2.859375,
      "learning_rate": 1.1382794509736729e-05,
      "loss": 0.8062,
      "step": 680600
    },
    {
      "epoch": 2.385368366821226,
      "grad_norm": 3.21875,
      "learning_rate": 1.1382145481073027e-05,
      "loss": 0.8031,
      "step": 680610
    },
    {
      "epoch": 2.3854034143281218,
      "grad_norm": 2.921875,
      "learning_rate": 1.1381496452409325e-05,
      "loss": 0.8661,
      "step": 680620
    },
    {
      "epoch": 2.3854384618350175,
      "grad_norm": 3.328125,
      "learning_rate": 1.1380847423745623e-05,
      "loss": 0.8359,
      "step": 680630
    },
    {
      "epoch": 2.385473509341913,
      "grad_norm": 2.84375,
      "learning_rate": 1.1380198395081921e-05,
      "loss": 0.7771,
      "step": 680640
    },
    {
      "epoch": 2.3855085568488086,
      "grad_norm": 2.796875,
      "learning_rate": 1.1379549366418219e-05,
      "loss": 0.8045,
      "step": 680650
    },
    {
      "epoch": 2.3855436043557043,
      "grad_norm": 2.65625,
      "learning_rate": 1.1378900337754517e-05,
      "loss": 0.8379,
      "step": 680660
    },
    {
      "epoch": 2.3855786518625997,
      "grad_norm": 3.0,
      "learning_rate": 1.1378251309090815e-05,
      "loss": 0.8022,
      "step": 680670
    },
    {
      "epoch": 2.3856136993694954,
      "grad_norm": 2.921875,
      "learning_rate": 1.1377602280427113e-05,
      "loss": 0.8042,
      "step": 680680
    },
    {
      "epoch": 2.3856487468763907,
      "grad_norm": 3.546875,
      "learning_rate": 1.1376953251763411e-05,
      "loss": 0.8285,
      "step": 680690
    },
    {
      "epoch": 2.3856837943832865,
      "grad_norm": 2.9375,
      "learning_rate": 1.1376304223099709e-05,
      "loss": 0.8076,
      "step": 680700
    },
    {
      "epoch": 2.3857188418901822,
      "grad_norm": 2.65625,
      "learning_rate": 1.1375655194436009e-05,
      "loss": 0.8233,
      "step": 680710
    },
    {
      "epoch": 2.3857538893970776,
      "grad_norm": 2.5,
      "learning_rate": 1.1375006165772307e-05,
      "loss": 0.8443,
      "step": 680720
    },
    {
      "epoch": 2.3857889369039733,
      "grad_norm": 3.078125,
      "learning_rate": 1.1374357137108605e-05,
      "loss": 0.8117,
      "step": 680730
    },
    {
      "epoch": 2.385823984410869,
      "grad_norm": 2.78125,
      "learning_rate": 1.1373708108444901e-05,
      "loss": 0.7507,
      "step": 680740
    },
    {
      "epoch": 2.3858590319177644,
      "grad_norm": 3.078125,
      "learning_rate": 1.1373059079781199e-05,
      "loss": 0.8283,
      "step": 680750
    },
    {
      "epoch": 2.38589407942466,
      "grad_norm": 2.46875,
      "learning_rate": 1.1372410051117497e-05,
      "loss": 0.7417,
      "step": 680760
    },
    {
      "epoch": 2.385929126931556,
      "grad_norm": 2.953125,
      "learning_rate": 1.1371761022453797e-05,
      "loss": 0.7943,
      "step": 680770
    },
    {
      "epoch": 2.385964174438451,
      "grad_norm": 3.140625,
      "learning_rate": 1.1371111993790095e-05,
      "loss": 0.9314,
      "step": 680780
    },
    {
      "epoch": 2.385999221945347,
      "grad_norm": 2.984375,
      "learning_rate": 1.1370462965126393e-05,
      "loss": 0.87,
      "step": 680790
    },
    {
      "epoch": 2.3860342694522423,
      "grad_norm": 2.9375,
      "learning_rate": 1.136981393646269e-05,
      "loss": 0.7908,
      "step": 680800
    },
    {
      "epoch": 2.386069316959138,
      "grad_norm": 2.671875,
      "learning_rate": 1.1369164907798989e-05,
      "loss": 0.7754,
      "step": 680810
    },
    {
      "epoch": 2.386104364466034,
      "grad_norm": 3.015625,
      "learning_rate": 1.1368515879135287e-05,
      "loss": 0.7616,
      "step": 680820
    },
    {
      "epoch": 2.386139411972929,
      "grad_norm": 2.734375,
      "learning_rate": 1.1367866850471586e-05,
      "loss": 0.7943,
      "step": 680830
    },
    {
      "epoch": 2.386174459479825,
      "grad_norm": 2.609375,
      "learning_rate": 1.1367217821807883e-05,
      "loss": 0.7117,
      "step": 680840
    },
    {
      "epoch": 2.3862095069867206,
      "grad_norm": 2.625,
      "learning_rate": 1.136656879314418e-05,
      "loss": 0.7864,
      "step": 680850
    },
    {
      "epoch": 2.386244554493616,
      "grad_norm": 3.265625,
      "learning_rate": 1.1365919764480479e-05,
      "loss": 0.76,
      "step": 680860
    },
    {
      "epoch": 2.3862796020005117,
      "grad_norm": 3.078125,
      "learning_rate": 1.1365270735816777e-05,
      "loss": 0.787,
      "step": 680870
    },
    {
      "epoch": 2.3863146495074075,
      "grad_norm": 3.046875,
      "learning_rate": 1.1364621707153075e-05,
      "loss": 0.803,
      "step": 680880
    },
    {
      "epoch": 2.3863496970143028,
      "grad_norm": 3.109375,
      "learning_rate": 1.1363972678489374e-05,
      "loss": 0.7852,
      "step": 680890
    },
    {
      "epoch": 2.3863847445211985,
      "grad_norm": 3.296875,
      "learning_rate": 1.1363323649825672e-05,
      "loss": 0.8353,
      "step": 680900
    },
    {
      "epoch": 2.386419792028094,
      "grad_norm": 2.765625,
      "learning_rate": 1.136267462116197e-05,
      "loss": 0.859,
      "step": 680910
    },
    {
      "epoch": 2.3864548395349896,
      "grad_norm": 2.578125,
      "learning_rate": 1.1362025592498268e-05,
      "loss": 0.7408,
      "step": 680920
    },
    {
      "epoch": 2.3864898870418854,
      "grad_norm": 2.765625,
      "learning_rate": 1.1361376563834565e-05,
      "loss": 0.9179,
      "step": 680930
    },
    {
      "epoch": 2.386524934548781,
      "grad_norm": 2.890625,
      "learning_rate": 1.1360727535170863e-05,
      "loss": 0.7701,
      "step": 680940
    },
    {
      "epoch": 2.3865599820556764,
      "grad_norm": 3.140625,
      "learning_rate": 1.1360078506507162e-05,
      "loss": 0.7512,
      "step": 680950
    },
    {
      "epoch": 2.386595029562572,
      "grad_norm": 2.875,
      "learning_rate": 1.135942947784346e-05,
      "loss": 0.8563,
      "step": 680960
    },
    {
      "epoch": 2.3866300770694675,
      "grad_norm": 2.40625,
      "learning_rate": 1.1358780449179758e-05,
      "loss": 0.7736,
      "step": 680970
    },
    {
      "epoch": 2.3866651245763633,
      "grad_norm": 2.6875,
      "learning_rate": 1.1358131420516056e-05,
      "loss": 0.8799,
      "step": 680980
    },
    {
      "epoch": 2.386700172083259,
      "grad_norm": 3.140625,
      "learning_rate": 1.1357482391852354e-05,
      "loss": 0.8821,
      "step": 680990
    },
    {
      "epoch": 2.3867352195901543,
      "grad_norm": 3.109375,
      "learning_rate": 1.1356833363188652e-05,
      "loss": 0.7803,
      "step": 681000
    },
    {
      "epoch": 2.38677026709705,
      "grad_norm": 2.59375,
      "learning_rate": 1.135618433452495e-05,
      "loss": 0.7578,
      "step": 681010
    },
    {
      "epoch": 2.3868053146039454,
      "grad_norm": 2.359375,
      "learning_rate": 1.1355535305861248e-05,
      "loss": 0.7725,
      "step": 681020
    },
    {
      "epoch": 2.386840362110841,
      "grad_norm": 3.25,
      "learning_rate": 1.1354886277197546e-05,
      "loss": 0.8175,
      "step": 681030
    },
    {
      "epoch": 2.386875409617737,
      "grad_norm": 3.328125,
      "learning_rate": 1.1354237248533844e-05,
      "loss": 0.7844,
      "step": 681040
    },
    {
      "epoch": 2.3869104571246327,
      "grad_norm": 3.296875,
      "learning_rate": 1.1353588219870142e-05,
      "loss": 0.8078,
      "step": 681050
    },
    {
      "epoch": 2.386945504631528,
      "grad_norm": 3.0,
      "learning_rate": 1.135293919120644e-05,
      "loss": 0.8066,
      "step": 681060
    },
    {
      "epoch": 2.3869805521384238,
      "grad_norm": 2.9375,
      "learning_rate": 1.135229016254274e-05,
      "loss": 0.7512,
      "step": 681070
    },
    {
      "epoch": 2.387015599645319,
      "grad_norm": 3.09375,
      "learning_rate": 1.1351641133879038e-05,
      "loss": 0.8187,
      "step": 681080
    },
    {
      "epoch": 2.387050647152215,
      "grad_norm": 3.46875,
      "learning_rate": 1.1350992105215336e-05,
      "loss": 0.8079,
      "step": 681090
    },
    {
      "epoch": 2.3870856946591106,
      "grad_norm": 2.8125,
      "learning_rate": 1.1350343076551634e-05,
      "loss": 0.8378,
      "step": 681100
    },
    {
      "epoch": 2.387120742166006,
      "grad_norm": 2.6875,
      "learning_rate": 1.1349694047887932e-05,
      "loss": 0.8461,
      "step": 681110
    },
    {
      "epoch": 2.3871557896729017,
      "grad_norm": 3.09375,
      "learning_rate": 1.1349045019224228e-05,
      "loss": 0.8093,
      "step": 681120
    },
    {
      "epoch": 2.3871908371797974,
      "grad_norm": 3.046875,
      "learning_rate": 1.1348395990560528e-05,
      "loss": 0.8351,
      "step": 681130
    },
    {
      "epoch": 2.3872258846866927,
      "grad_norm": 2.703125,
      "learning_rate": 1.1347746961896826e-05,
      "loss": 0.7968,
      "step": 681140
    },
    {
      "epoch": 2.3872609321935885,
      "grad_norm": 2.90625,
      "learning_rate": 1.1347097933233124e-05,
      "loss": 0.7246,
      "step": 681150
    },
    {
      "epoch": 2.3872959797004842,
      "grad_norm": 2.796875,
      "learning_rate": 1.1346448904569422e-05,
      "loss": 0.899,
      "step": 681160
    },
    {
      "epoch": 2.3873310272073796,
      "grad_norm": 2.828125,
      "learning_rate": 1.134579987590572e-05,
      "loss": 0.8772,
      "step": 681170
    },
    {
      "epoch": 2.3873660747142753,
      "grad_norm": 2.71875,
      "learning_rate": 1.1345150847242018e-05,
      "loss": 0.7296,
      "step": 681180
    },
    {
      "epoch": 2.3874011222211706,
      "grad_norm": 2.78125,
      "learning_rate": 1.1344501818578316e-05,
      "loss": 0.808,
      "step": 681190
    },
    {
      "epoch": 2.3874361697280664,
      "grad_norm": 2.515625,
      "learning_rate": 1.1343852789914616e-05,
      "loss": 0.798,
      "step": 681200
    },
    {
      "epoch": 2.387471217234962,
      "grad_norm": 3.0625,
      "learning_rate": 1.1343203761250912e-05,
      "loss": 0.7565,
      "step": 681210
    },
    {
      "epoch": 2.3875062647418575,
      "grad_norm": 3.34375,
      "learning_rate": 1.134255473258721e-05,
      "loss": 0.7342,
      "step": 681220
    },
    {
      "epoch": 2.387541312248753,
      "grad_norm": 2.859375,
      "learning_rate": 1.1341905703923508e-05,
      "loss": 0.7806,
      "step": 681230
    },
    {
      "epoch": 2.387576359755649,
      "grad_norm": 3.015625,
      "learning_rate": 1.1341256675259806e-05,
      "loss": 0.8312,
      "step": 681240
    },
    {
      "epoch": 2.3876114072625443,
      "grad_norm": 2.375,
      "learning_rate": 1.1340607646596104e-05,
      "loss": 0.7386,
      "step": 681250
    },
    {
      "epoch": 2.38764645476944,
      "grad_norm": 2.859375,
      "learning_rate": 1.1339958617932404e-05,
      "loss": 0.7527,
      "step": 681260
    },
    {
      "epoch": 2.387681502276336,
      "grad_norm": 2.8125,
      "learning_rate": 1.1339309589268702e-05,
      "loss": 0.7931,
      "step": 681270
    },
    {
      "epoch": 2.387716549783231,
      "grad_norm": 2.78125,
      "learning_rate": 1.1338660560605e-05,
      "loss": 0.7937,
      "step": 681280
    },
    {
      "epoch": 2.387751597290127,
      "grad_norm": 3.109375,
      "learning_rate": 1.1338011531941298e-05,
      "loss": 0.7299,
      "step": 681290
    },
    {
      "epoch": 2.387786644797022,
      "grad_norm": 2.828125,
      "learning_rate": 1.1337362503277596e-05,
      "loss": 0.8291,
      "step": 681300
    },
    {
      "epoch": 2.387821692303918,
      "grad_norm": 2.8125,
      "learning_rate": 1.1336713474613892e-05,
      "loss": 0.7732,
      "step": 681310
    },
    {
      "epoch": 2.3878567398108137,
      "grad_norm": 3.0,
      "learning_rate": 1.1336064445950192e-05,
      "loss": 0.8614,
      "step": 681320
    },
    {
      "epoch": 2.387891787317709,
      "grad_norm": 3.234375,
      "learning_rate": 1.133541541728649e-05,
      "loss": 0.735,
      "step": 681330
    },
    {
      "epoch": 2.3879268348246048,
      "grad_norm": 3.375,
      "learning_rate": 1.1334766388622788e-05,
      "loss": 0.8096,
      "step": 681340
    },
    {
      "epoch": 2.3879618823315005,
      "grad_norm": 2.8125,
      "learning_rate": 1.1334117359959086e-05,
      "loss": 0.7684,
      "step": 681350
    },
    {
      "epoch": 2.387996929838396,
      "grad_norm": 3.125,
      "learning_rate": 1.1333468331295384e-05,
      "loss": 0.8375,
      "step": 681360
    },
    {
      "epoch": 2.3880319773452916,
      "grad_norm": 3.25,
      "learning_rate": 1.1332819302631682e-05,
      "loss": 0.8579,
      "step": 681370
    },
    {
      "epoch": 2.3880670248521874,
      "grad_norm": 3.046875,
      "learning_rate": 1.1332170273967981e-05,
      "loss": 0.8355,
      "step": 681380
    },
    {
      "epoch": 2.3881020723590827,
      "grad_norm": 3.109375,
      "learning_rate": 1.133152124530428e-05,
      "loss": 0.8534,
      "step": 681390
    },
    {
      "epoch": 2.3881371198659784,
      "grad_norm": 2.484375,
      "learning_rate": 1.1330872216640576e-05,
      "loss": 0.7556,
      "step": 681400
    },
    {
      "epoch": 2.3881721673728737,
      "grad_norm": 2.78125,
      "learning_rate": 1.1330223187976874e-05,
      "loss": 0.7767,
      "step": 681410
    },
    {
      "epoch": 2.3882072148797695,
      "grad_norm": 3.6875,
      "learning_rate": 1.1329574159313172e-05,
      "loss": 0.8173,
      "step": 681420
    },
    {
      "epoch": 2.3882422623866653,
      "grad_norm": 2.6875,
      "learning_rate": 1.132892513064947e-05,
      "loss": 0.7695,
      "step": 681430
    },
    {
      "epoch": 2.3882773098935606,
      "grad_norm": 3.46875,
      "learning_rate": 1.132827610198577e-05,
      "loss": 0.776,
      "step": 681440
    },
    {
      "epoch": 2.3883123574004563,
      "grad_norm": 2.484375,
      "learning_rate": 1.1327627073322067e-05,
      "loss": 0.8579,
      "step": 681450
    },
    {
      "epoch": 2.388347404907352,
      "grad_norm": 3.125,
      "learning_rate": 1.1326978044658365e-05,
      "loss": 0.8039,
      "step": 681460
    },
    {
      "epoch": 2.3883824524142474,
      "grad_norm": 2.953125,
      "learning_rate": 1.1326329015994663e-05,
      "loss": 0.819,
      "step": 681470
    },
    {
      "epoch": 2.388417499921143,
      "grad_norm": 3.15625,
      "learning_rate": 1.1325679987330961e-05,
      "loss": 0.8336,
      "step": 681480
    },
    {
      "epoch": 2.388452547428039,
      "grad_norm": 2.90625,
      "learning_rate": 1.1325030958667258e-05,
      "loss": 0.8392,
      "step": 681490
    },
    {
      "epoch": 2.3884875949349342,
      "grad_norm": 3.1875,
      "learning_rate": 1.1324381930003557e-05,
      "loss": 0.8569,
      "step": 681500
    },
    {
      "epoch": 2.38852264244183,
      "grad_norm": 2.296875,
      "learning_rate": 1.1323732901339855e-05,
      "loss": 0.7547,
      "step": 681510
    },
    {
      "epoch": 2.3885576899487253,
      "grad_norm": 2.609375,
      "learning_rate": 1.1323083872676153e-05,
      "loss": 0.77,
      "step": 681520
    },
    {
      "epoch": 2.388592737455621,
      "grad_norm": 2.921875,
      "learning_rate": 1.1322434844012451e-05,
      "loss": 0.7559,
      "step": 681530
    },
    {
      "epoch": 2.388627784962517,
      "grad_norm": 2.484375,
      "learning_rate": 1.132178581534875e-05,
      "loss": 0.7763,
      "step": 681540
    },
    {
      "epoch": 2.388662832469412,
      "grad_norm": 2.8125,
      "learning_rate": 1.1321136786685047e-05,
      "loss": 0.8299,
      "step": 681550
    },
    {
      "epoch": 2.388697879976308,
      "grad_norm": 2.875,
      "learning_rate": 1.1320487758021345e-05,
      "loss": 0.8424,
      "step": 681560
    },
    {
      "epoch": 2.3887329274832036,
      "grad_norm": 3.25,
      "learning_rate": 1.1319838729357645e-05,
      "loss": 0.7421,
      "step": 681570
    },
    {
      "epoch": 2.388767974990099,
      "grad_norm": 2.9375,
      "learning_rate": 1.1319189700693943e-05,
      "loss": 0.8821,
      "step": 681580
    },
    {
      "epoch": 2.3888030224969947,
      "grad_norm": 2.90625,
      "learning_rate": 1.131854067203024e-05,
      "loss": 0.795,
      "step": 681590
    },
    {
      "epoch": 2.3888380700038905,
      "grad_norm": 3.0625,
      "learning_rate": 1.1317891643366537e-05,
      "loss": 0.8193,
      "step": 681600
    },
    {
      "epoch": 2.388873117510786,
      "grad_norm": 2.90625,
      "learning_rate": 1.1317242614702835e-05,
      "loss": 0.8877,
      "step": 681610
    },
    {
      "epoch": 2.3889081650176816,
      "grad_norm": 3.0,
      "learning_rate": 1.1316593586039135e-05,
      "loss": 0.7805,
      "step": 681620
    },
    {
      "epoch": 2.388943212524577,
      "grad_norm": 2.4375,
      "learning_rate": 1.1315944557375433e-05,
      "loss": 0.7901,
      "step": 681630
    },
    {
      "epoch": 2.3889782600314726,
      "grad_norm": 3.0625,
      "learning_rate": 1.1315295528711731e-05,
      "loss": 0.8374,
      "step": 681640
    },
    {
      "epoch": 2.3890133075383684,
      "grad_norm": 3.0625,
      "learning_rate": 1.1314646500048029e-05,
      "loss": 0.8374,
      "step": 681650
    },
    {
      "epoch": 2.3890483550452637,
      "grad_norm": 2.734375,
      "learning_rate": 1.1313997471384327e-05,
      "loss": 0.8227,
      "step": 681660
    },
    {
      "epoch": 2.3890834025521595,
      "grad_norm": 2.75,
      "learning_rate": 1.1313348442720625e-05,
      "loss": 0.8107,
      "step": 681670
    },
    {
      "epoch": 2.389118450059055,
      "grad_norm": 2.921875,
      "learning_rate": 1.1312699414056923e-05,
      "loss": 0.8204,
      "step": 681680
    },
    {
      "epoch": 2.3891534975659505,
      "grad_norm": 2.984375,
      "learning_rate": 1.1312050385393221e-05,
      "loss": 0.7904,
      "step": 681690
    },
    {
      "epoch": 2.3891885450728463,
      "grad_norm": 3.046875,
      "learning_rate": 1.1311401356729519e-05,
      "loss": 0.8547,
      "step": 681700
    },
    {
      "epoch": 2.389223592579742,
      "grad_norm": 3.296875,
      "learning_rate": 1.1310752328065817e-05,
      "loss": 0.767,
      "step": 681710
    },
    {
      "epoch": 2.3892586400866374,
      "grad_norm": 2.875,
      "learning_rate": 1.1310103299402115e-05,
      "loss": 0.8075,
      "step": 681720
    },
    {
      "epoch": 2.389293687593533,
      "grad_norm": 3.09375,
      "learning_rate": 1.1309454270738413e-05,
      "loss": 0.7558,
      "step": 681730
    },
    {
      "epoch": 2.3893287351004284,
      "grad_norm": 3.0625,
      "learning_rate": 1.1308805242074711e-05,
      "loss": 0.8325,
      "step": 681740
    },
    {
      "epoch": 2.389363782607324,
      "grad_norm": 3.328125,
      "learning_rate": 1.130815621341101e-05,
      "loss": 0.867,
      "step": 681750
    },
    {
      "epoch": 2.38939883011422,
      "grad_norm": 3.3125,
      "learning_rate": 1.1307507184747309e-05,
      "loss": 0.8295,
      "step": 681760
    },
    {
      "epoch": 2.3894338776211153,
      "grad_norm": 2.71875,
      "learning_rate": 1.1306858156083607e-05,
      "loss": 0.8105,
      "step": 681770
    },
    {
      "epoch": 2.389468925128011,
      "grad_norm": 2.734375,
      "learning_rate": 1.1306209127419903e-05,
      "loss": 0.8176,
      "step": 681780
    },
    {
      "epoch": 2.3895039726349068,
      "grad_norm": 3.15625,
      "learning_rate": 1.1305560098756201e-05,
      "loss": 0.789,
      "step": 681790
    },
    {
      "epoch": 2.389539020141802,
      "grad_norm": 2.65625,
      "learning_rate": 1.1304911070092499e-05,
      "loss": 0.7746,
      "step": 681800
    },
    {
      "epoch": 2.389574067648698,
      "grad_norm": 2.25,
      "learning_rate": 1.1304262041428799e-05,
      "loss": 0.8113,
      "step": 681810
    },
    {
      "epoch": 2.3896091151555936,
      "grad_norm": 2.375,
      "learning_rate": 1.1303613012765097e-05,
      "loss": 0.8353,
      "step": 681820
    },
    {
      "epoch": 2.389644162662489,
      "grad_norm": 2.640625,
      "learning_rate": 1.1302963984101395e-05,
      "loss": 0.8248,
      "step": 681830
    },
    {
      "epoch": 2.3896792101693847,
      "grad_norm": 3.125,
      "learning_rate": 1.1302314955437693e-05,
      "loss": 0.9056,
      "step": 681840
    },
    {
      "epoch": 2.38971425767628,
      "grad_norm": 2.875,
      "learning_rate": 1.130166592677399e-05,
      "loss": 0.7761,
      "step": 681850
    },
    {
      "epoch": 2.3897493051831757,
      "grad_norm": 3.046875,
      "learning_rate": 1.1301016898110289e-05,
      "loss": 0.8166,
      "step": 681860
    },
    {
      "epoch": 2.3897843526900715,
      "grad_norm": 2.78125,
      "learning_rate": 1.1300367869446587e-05,
      "loss": 0.7601,
      "step": 681870
    },
    {
      "epoch": 2.389819400196967,
      "grad_norm": 2.890625,
      "learning_rate": 1.1299718840782885e-05,
      "loss": 0.8455,
      "step": 681880
    },
    {
      "epoch": 2.3898544477038626,
      "grad_norm": 2.3125,
      "learning_rate": 1.1299069812119183e-05,
      "loss": 0.754,
      "step": 681890
    },
    {
      "epoch": 2.3898894952107583,
      "grad_norm": 3.15625,
      "learning_rate": 1.129842078345548e-05,
      "loss": 0.7926,
      "step": 681900
    },
    {
      "epoch": 2.3899245427176536,
      "grad_norm": 3.21875,
      "learning_rate": 1.1297771754791779e-05,
      "loss": 0.8958,
      "step": 681910
    },
    {
      "epoch": 2.3899595902245494,
      "grad_norm": 3.203125,
      "learning_rate": 1.1297122726128077e-05,
      "loss": 0.7616,
      "step": 681920
    },
    {
      "epoch": 2.389994637731445,
      "grad_norm": 3.171875,
      "learning_rate": 1.1296473697464376e-05,
      "loss": 0.8197,
      "step": 681930
    },
    {
      "epoch": 2.3900296852383405,
      "grad_norm": 3.03125,
      "learning_rate": 1.1295824668800674e-05,
      "loss": 0.8635,
      "step": 681940
    },
    {
      "epoch": 2.3900647327452362,
      "grad_norm": 2.65625,
      "learning_rate": 1.1295175640136972e-05,
      "loss": 0.8034,
      "step": 681950
    },
    {
      "epoch": 2.3900997802521315,
      "grad_norm": 2.578125,
      "learning_rate": 1.1294526611473269e-05,
      "loss": 0.8558,
      "step": 681960
    },
    {
      "epoch": 2.3901348277590273,
      "grad_norm": 2.828125,
      "learning_rate": 1.1293877582809567e-05,
      "loss": 0.7723,
      "step": 681970
    },
    {
      "epoch": 2.390169875265923,
      "grad_norm": 3.046875,
      "learning_rate": 1.1293228554145865e-05,
      "loss": 0.7565,
      "step": 681980
    },
    {
      "epoch": 2.3902049227728184,
      "grad_norm": 3.40625,
      "learning_rate": 1.1292579525482164e-05,
      "loss": 0.8176,
      "step": 681990
    },
    {
      "epoch": 2.390239970279714,
      "grad_norm": 2.9375,
      "learning_rate": 1.1291930496818462e-05,
      "loss": 0.8401,
      "step": 682000
    },
    {
      "epoch": 2.39027501778661,
      "grad_norm": 2.90625,
      "learning_rate": 1.129128146815476e-05,
      "loss": 0.8067,
      "step": 682010
    },
    {
      "epoch": 2.390310065293505,
      "grad_norm": 3.40625,
      "learning_rate": 1.1290632439491058e-05,
      "loss": 0.7768,
      "step": 682020
    },
    {
      "epoch": 2.390345112800401,
      "grad_norm": 2.703125,
      "learning_rate": 1.1289983410827356e-05,
      "loss": 0.8754,
      "step": 682030
    },
    {
      "epoch": 2.3903801603072967,
      "grad_norm": 2.953125,
      "learning_rate": 1.1289334382163654e-05,
      "loss": 0.8604,
      "step": 682040
    },
    {
      "epoch": 2.390415207814192,
      "grad_norm": 2.515625,
      "learning_rate": 1.1288685353499952e-05,
      "loss": 0.7612,
      "step": 682050
    },
    {
      "epoch": 2.390450255321088,
      "grad_norm": 2.734375,
      "learning_rate": 1.128803632483625e-05,
      "loss": 0.7665,
      "step": 682060
    },
    {
      "epoch": 2.390485302827983,
      "grad_norm": 2.625,
      "learning_rate": 1.1287387296172548e-05,
      "loss": 0.6957,
      "step": 682070
    },
    {
      "epoch": 2.390520350334879,
      "grad_norm": 2.578125,
      "learning_rate": 1.1286738267508846e-05,
      "loss": 0.8605,
      "step": 682080
    },
    {
      "epoch": 2.3905553978417746,
      "grad_norm": 3.390625,
      "learning_rate": 1.1286089238845144e-05,
      "loss": 0.8294,
      "step": 682090
    },
    {
      "epoch": 2.39059044534867,
      "grad_norm": 2.703125,
      "learning_rate": 1.1285440210181442e-05,
      "loss": 0.7926,
      "step": 682100
    },
    {
      "epoch": 2.3906254928555657,
      "grad_norm": 2.59375,
      "learning_rate": 1.128479118151774e-05,
      "loss": 0.8077,
      "step": 682110
    },
    {
      "epoch": 2.3906605403624615,
      "grad_norm": 3.0,
      "learning_rate": 1.128414215285404e-05,
      "loss": 0.7867,
      "step": 682120
    },
    {
      "epoch": 2.3906955878693568,
      "grad_norm": 2.5,
      "learning_rate": 1.1283493124190338e-05,
      "loss": 0.7443,
      "step": 682130
    },
    {
      "epoch": 2.3907306353762525,
      "grad_norm": 3.328125,
      "learning_rate": 1.1282844095526636e-05,
      "loss": 0.7936,
      "step": 682140
    },
    {
      "epoch": 2.3907656828831483,
      "grad_norm": 3.03125,
      "learning_rate": 1.1282195066862932e-05,
      "loss": 0.8947,
      "step": 682150
    },
    {
      "epoch": 2.3908007303900436,
      "grad_norm": 3.09375,
      "learning_rate": 1.128154603819923e-05,
      "loss": 0.8043,
      "step": 682160
    },
    {
      "epoch": 2.3908357778969394,
      "grad_norm": 2.734375,
      "learning_rate": 1.128089700953553e-05,
      "loss": 0.8197,
      "step": 682170
    },
    {
      "epoch": 2.3908708254038347,
      "grad_norm": 3.0,
      "learning_rate": 1.1280247980871828e-05,
      "loss": 0.757,
      "step": 682180
    },
    {
      "epoch": 2.3909058729107304,
      "grad_norm": 2.546875,
      "learning_rate": 1.1279598952208126e-05,
      "loss": 0.8039,
      "step": 682190
    },
    {
      "epoch": 2.390940920417626,
      "grad_norm": 2.859375,
      "learning_rate": 1.1278949923544424e-05,
      "loss": 0.7942,
      "step": 682200
    },
    {
      "epoch": 2.390975967924522,
      "grad_norm": 2.734375,
      "learning_rate": 1.1278300894880722e-05,
      "loss": 0.7907,
      "step": 682210
    },
    {
      "epoch": 2.3910110154314173,
      "grad_norm": 3.1875,
      "learning_rate": 1.127765186621702e-05,
      "loss": 0.7516,
      "step": 682220
    },
    {
      "epoch": 2.391046062938313,
      "grad_norm": 2.984375,
      "learning_rate": 1.1277002837553318e-05,
      "loss": 0.7743,
      "step": 682230
    },
    {
      "epoch": 2.3910811104452083,
      "grad_norm": 3.328125,
      "learning_rate": 1.1276353808889618e-05,
      "loss": 0.7225,
      "step": 682240
    },
    {
      "epoch": 2.391116157952104,
      "grad_norm": 3.421875,
      "learning_rate": 1.1275704780225914e-05,
      "loss": 0.8159,
      "step": 682250
    },
    {
      "epoch": 2.391151205459,
      "grad_norm": 3.359375,
      "learning_rate": 1.1275055751562212e-05,
      "loss": 0.7862,
      "step": 682260
    },
    {
      "epoch": 2.391186252965895,
      "grad_norm": 2.65625,
      "learning_rate": 1.127440672289851e-05,
      "loss": 0.8381,
      "step": 682270
    },
    {
      "epoch": 2.391221300472791,
      "grad_norm": 2.578125,
      "learning_rate": 1.1273757694234808e-05,
      "loss": 0.7829,
      "step": 682280
    },
    {
      "epoch": 2.3912563479796862,
      "grad_norm": 3.140625,
      "learning_rate": 1.1273108665571106e-05,
      "loss": 0.8653,
      "step": 682290
    },
    {
      "epoch": 2.391291395486582,
      "grad_norm": 2.625,
      "learning_rate": 1.1272459636907406e-05,
      "loss": 0.8084,
      "step": 682300
    },
    {
      "epoch": 2.3913264429934777,
      "grad_norm": 2.953125,
      "learning_rate": 1.1271810608243704e-05,
      "loss": 0.8743,
      "step": 682310
    },
    {
      "epoch": 2.3913614905003735,
      "grad_norm": 2.453125,
      "learning_rate": 1.1271161579580002e-05,
      "loss": 0.801,
      "step": 682320
    },
    {
      "epoch": 2.391396538007269,
      "grad_norm": 2.78125,
      "learning_rate": 1.12705125509163e-05,
      "loss": 0.8802,
      "step": 682330
    },
    {
      "epoch": 2.3914315855141646,
      "grad_norm": 2.734375,
      "learning_rate": 1.1269863522252596e-05,
      "loss": 0.7716,
      "step": 682340
    },
    {
      "epoch": 2.39146663302106,
      "grad_norm": 2.765625,
      "learning_rate": 1.1269214493588894e-05,
      "loss": 0.8179,
      "step": 682350
    },
    {
      "epoch": 2.3915016805279556,
      "grad_norm": 2.9375,
      "learning_rate": 1.1268565464925194e-05,
      "loss": 0.7905,
      "step": 682360
    },
    {
      "epoch": 2.3915367280348514,
      "grad_norm": 2.875,
      "learning_rate": 1.1267916436261492e-05,
      "loss": 0.8826,
      "step": 682370
    },
    {
      "epoch": 2.3915717755417467,
      "grad_norm": 3.0625,
      "learning_rate": 1.126726740759779e-05,
      "loss": 0.817,
      "step": 682380
    },
    {
      "epoch": 2.3916068230486425,
      "grad_norm": 2.828125,
      "learning_rate": 1.1266618378934088e-05,
      "loss": 0.7223,
      "step": 682390
    },
    {
      "epoch": 2.391641870555538,
      "grad_norm": 2.828125,
      "learning_rate": 1.1265969350270386e-05,
      "loss": 0.8397,
      "step": 682400
    },
    {
      "epoch": 2.3916769180624335,
      "grad_norm": 3.03125,
      "learning_rate": 1.1265320321606684e-05,
      "loss": 0.8422,
      "step": 682410
    },
    {
      "epoch": 2.3917119655693293,
      "grad_norm": 3.390625,
      "learning_rate": 1.1264671292942982e-05,
      "loss": 0.8402,
      "step": 682420
    },
    {
      "epoch": 2.391747013076225,
      "grad_norm": 2.65625,
      "learning_rate": 1.126402226427928e-05,
      "loss": 0.8014,
      "step": 682430
    },
    {
      "epoch": 2.3917820605831204,
      "grad_norm": 2.796875,
      "learning_rate": 1.1263373235615578e-05,
      "loss": 0.8343,
      "step": 682440
    },
    {
      "epoch": 2.391817108090016,
      "grad_norm": 2.625,
      "learning_rate": 1.1262724206951876e-05,
      "loss": 0.8241,
      "step": 682450
    },
    {
      "epoch": 2.3918521555969114,
      "grad_norm": 3.015625,
      "learning_rate": 1.1262075178288174e-05,
      "loss": 0.8226,
      "step": 682460
    },
    {
      "epoch": 2.391887203103807,
      "grad_norm": 3.046875,
      "learning_rate": 1.1261426149624472e-05,
      "loss": 0.7961,
      "step": 682470
    },
    {
      "epoch": 2.391922250610703,
      "grad_norm": 3.109375,
      "learning_rate": 1.1260777120960772e-05,
      "loss": 0.8087,
      "step": 682480
    },
    {
      "epoch": 2.3919572981175983,
      "grad_norm": 2.65625,
      "learning_rate": 1.126012809229707e-05,
      "loss": 0.8209,
      "step": 682490
    },
    {
      "epoch": 2.391992345624494,
      "grad_norm": 2.6875,
      "learning_rate": 1.1259479063633368e-05,
      "loss": 0.8314,
      "step": 682500
    },
    {
      "epoch": 2.39202739313139,
      "grad_norm": 2.796875,
      "learning_rate": 1.1258830034969666e-05,
      "loss": 0.8347,
      "step": 682510
    },
    {
      "epoch": 2.392062440638285,
      "grad_norm": 2.71875,
      "learning_rate": 1.1258181006305964e-05,
      "loss": 0.7509,
      "step": 682520
    },
    {
      "epoch": 2.392097488145181,
      "grad_norm": 2.671875,
      "learning_rate": 1.125753197764226e-05,
      "loss": 0.7364,
      "step": 682530
    },
    {
      "epoch": 2.3921325356520766,
      "grad_norm": 2.65625,
      "learning_rate": 1.125688294897856e-05,
      "loss": 0.7641,
      "step": 682540
    },
    {
      "epoch": 2.392167583158972,
      "grad_norm": 2.703125,
      "learning_rate": 1.1256233920314858e-05,
      "loss": 0.8282,
      "step": 682550
    },
    {
      "epoch": 2.3922026306658677,
      "grad_norm": 3.34375,
      "learning_rate": 1.1255584891651156e-05,
      "loss": 0.6939,
      "step": 682560
    },
    {
      "epoch": 2.392237678172763,
      "grad_norm": 2.59375,
      "learning_rate": 1.1254935862987454e-05,
      "loss": 0.8128,
      "step": 682570
    },
    {
      "epoch": 2.3922727256796588,
      "grad_norm": 2.953125,
      "learning_rate": 1.1254286834323752e-05,
      "loss": 0.7766,
      "step": 682580
    },
    {
      "epoch": 2.3923077731865545,
      "grad_norm": 3.203125,
      "learning_rate": 1.125363780566005e-05,
      "loss": 0.7961,
      "step": 682590
    },
    {
      "epoch": 2.39234282069345,
      "grad_norm": 3.1875,
      "learning_rate": 1.1252988776996348e-05,
      "loss": 0.8009,
      "step": 682600
    },
    {
      "epoch": 2.3923778682003456,
      "grad_norm": 3.4375,
      "learning_rate": 1.1252339748332647e-05,
      "loss": 0.8583,
      "step": 682610
    },
    {
      "epoch": 2.3924129157072413,
      "grad_norm": 2.765625,
      "learning_rate": 1.1251690719668944e-05,
      "loss": 0.7803,
      "step": 682620
    },
    {
      "epoch": 2.3924479632141367,
      "grad_norm": 2.890625,
      "learning_rate": 1.1251041691005242e-05,
      "loss": 0.8088,
      "step": 682630
    },
    {
      "epoch": 2.3924830107210324,
      "grad_norm": 2.859375,
      "learning_rate": 1.125039266234154e-05,
      "loss": 0.8196,
      "step": 682640
    },
    {
      "epoch": 2.392518058227928,
      "grad_norm": 2.8125,
      "learning_rate": 1.1249743633677838e-05,
      "loss": 0.7652,
      "step": 682650
    },
    {
      "epoch": 2.3925531057348235,
      "grad_norm": 2.875,
      "learning_rate": 1.1249094605014136e-05,
      "loss": 0.842,
      "step": 682660
    },
    {
      "epoch": 2.3925881532417193,
      "grad_norm": 2.53125,
      "learning_rate": 1.1248445576350435e-05,
      "loss": 0.749,
      "step": 682670
    },
    {
      "epoch": 2.3926232007486146,
      "grad_norm": 3.34375,
      "learning_rate": 1.1247796547686733e-05,
      "loss": 0.8591,
      "step": 682680
    },
    {
      "epoch": 2.3926582482555103,
      "grad_norm": 2.921875,
      "learning_rate": 1.1247147519023031e-05,
      "loss": 0.8,
      "step": 682690
    },
    {
      "epoch": 2.392693295762406,
      "grad_norm": 2.625,
      "learning_rate": 1.124649849035933e-05,
      "loss": 0.7854,
      "step": 682700
    },
    {
      "epoch": 2.3927283432693014,
      "grad_norm": 2.765625,
      "learning_rate": 1.1245849461695627e-05,
      "loss": 0.7131,
      "step": 682710
    },
    {
      "epoch": 2.392763390776197,
      "grad_norm": 2.875,
      "learning_rate": 1.1245200433031925e-05,
      "loss": 0.8697,
      "step": 682720
    },
    {
      "epoch": 2.392798438283093,
      "grad_norm": 3.015625,
      "learning_rate": 1.1244551404368223e-05,
      "loss": 0.7984,
      "step": 682730
    },
    {
      "epoch": 2.3928334857899882,
      "grad_norm": 3.0625,
      "learning_rate": 1.1243902375704521e-05,
      "loss": 0.8081,
      "step": 682740
    },
    {
      "epoch": 2.392868533296884,
      "grad_norm": 2.90625,
      "learning_rate": 1.124325334704082e-05,
      "loss": 0.7683,
      "step": 682750
    },
    {
      "epoch": 2.3929035808037797,
      "grad_norm": 3.359375,
      "learning_rate": 1.1242604318377117e-05,
      "loss": 0.7865,
      "step": 682760
    },
    {
      "epoch": 2.392938628310675,
      "grad_norm": 3.0625,
      "learning_rate": 1.1241955289713415e-05,
      "loss": 0.7722,
      "step": 682770
    },
    {
      "epoch": 2.392973675817571,
      "grad_norm": 3.171875,
      "learning_rate": 1.1241306261049713e-05,
      "loss": 0.8296,
      "step": 682780
    },
    {
      "epoch": 2.393008723324466,
      "grad_norm": 2.625,
      "learning_rate": 1.1240657232386013e-05,
      "loss": 0.7779,
      "step": 682790
    },
    {
      "epoch": 2.393043770831362,
      "grad_norm": 3.078125,
      "learning_rate": 1.1240008203722311e-05,
      "loss": 0.8748,
      "step": 682800
    },
    {
      "epoch": 2.3930788183382576,
      "grad_norm": 2.78125,
      "learning_rate": 1.1239359175058607e-05,
      "loss": 0.8286,
      "step": 682810
    },
    {
      "epoch": 2.393113865845153,
      "grad_norm": 2.640625,
      "learning_rate": 1.1238710146394905e-05,
      "loss": 0.781,
      "step": 682820
    },
    {
      "epoch": 2.3931489133520487,
      "grad_norm": 2.75,
      "learning_rate": 1.1238061117731203e-05,
      "loss": 0.7827,
      "step": 682830
    },
    {
      "epoch": 2.3931839608589445,
      "grad_norm": 2.984375,
      "learning_rate": 1.1237412089067501e-05,
      "loss": 0.9093,
      "step": 682840
    },
    {
      "epoch": 2.39321900836584,
      "grad_norm": 2.984375,
      "learning_rate": 1.1236763060403801e-05,
      "loss": 0.7874,
      "step": 682850
    },
    {
      "epoch": 2.3932540558727355,
      "grad_norm": 2.65625,
      "learning_rate": 1.1236114031740099e-05,
      "loss": 0.7758,
      "step": 682860
    },
    {
      "epoch": 2.3932891033796313,
      "grad_norm": 2.796875,
      "learning_rate": 1.1235465003076397e-05,
      "loss": 0.857,
      "step": 682870
    },
    {
      "epoch": 2.3933241508865266,
      "grad_norm": 2.828125,
      "learning_rate": 1.1234815974412695e-05,
      "loss": 0.7717,
      "step": 682880
    },
    {
      "epoch": 2.3933591983934224,
      "grad_norm": 3.0,
      "learning_rate": 1.1234166945748993e-05,
      "loss": 0.7393,
      "step": 682890
    },
    {
      "epoch": 2.3933942459003177,
      "grad_norm": 2.796875,
      "learning_rate": 1.123351791708529e-05,
      "loss": 0.8036,
      "step": 682900
    },
    {
      "epoch": 2.3934292934072134,
      "grad_norm": 2.8125,
      "learning_rate": 1.1232868888421589e-05,
      "loss": 0.8169,
      "step": 682910
    },
    {
      "epoch": 2.393464340914109,
      "grad_norm": 2.890625,
      "learning_rate": 1.1232219859757887e-05,
      "loss": 0.7713,
      "step": 682920
    },
    {
      "epoch": 2.3934993884210045,
      "grad_norm": 3.078125,
      "learning_rate": 1.1231570831094185e-05,
      "loss": 0.7995,
      "step": 682930
    },
    {
      "epoch": 2.3935344359279003,
      "grad_norm": 3.046875,
      "learning_rate": 1.1230921802430483e-05,
      "loss": 0.8458,
      "step": 682940
    },
    {
      "epoch": 2.393569483434796,
      "grad_norm": 2.671875,
      "learning_rate": 1.1230272773766781e-05,
      "loss": 0.7811,
      "step": 682950
    },
    {
      "epoch": 2.3936045309416913,
      "grad_norm": 3.046875,
      "learning_rate": 1.1229623745103079e-05,
      "loss": 0.7525,
      "step": 682960
    },
    {
      "epoch": 2.393639578448587,
      "grad_norm": 2.640625,
      "learning_rate": 1.1228974716439377e-05,
      "loss": 0.8236,
      "step": 682970
    },
    {
      "epoch": 2.393674625955483,
      "grad_norm": 2.65625,
      "learning_rate": 1.1228325687775677e-05,
      "loss": 0.762,
      "step": 682980
    },
    {
      "epoch": 2.393709673462378,
      "grad_norm": 2.703125,
      "learning_rate": 1.1227676659111975e-05,
      "loss": 0.7785,
      "step": 682990
    },
    {
      "epoch": 2.393744720969274,
      "grad_norm": 3.015625,
      "learning_rate": 1.1227027630448271e-05,
      "loss": 0.8468,
      "step": 683000
    },
    {
      "epoch": 2.3937797684761692,
      "grad_norm": 2.734375,
      "learning_rate": 1.1226378601784569e-05,
      "loss": 0.7678,
      "step": 683010
    },
    {
      "epoch": 2.393814815983065,
      "grad_norm": 2.828125,
      "learning_rate": 1.1225729573120867e-05,
      "loss": 0.807,
      "step": 683020
    },
    {
      "epoch": 2.3938498634899608,
      "grad_norm": 3.078125,
      "learning_rate": 1.1225080544457167e-05,
      "loss": 0.7849,
      "step": 683030
    },
    {
      "epoch": 2.393884910996856,
      "grad_norm": 2.5625,
      "learning_rate": 1.1224431515793465e-05,
      "loss": 0.7681,
      "step": 683040
    },
    {
      "epoch": 2.393919958503752,
      "grad_norm": 3.0625,
      "learning_rate": 1.1223782487129763e-05,
      "loss": 0.8473,
      "step": 683050
    },
    {
      "epoch": 2.3939550060106476,
      "grad_norm": 2.734375,
      "learning_rate": 1.122313345846606e-05,
      "loss": 0.7753,
      "step": 683060
    },
    {
      "epoch": 2.393990053517543,
      "grad_norm": 3.015625,
      "learning_rate": 1.1222484429802359e-05,
      "loss": 0.8786,
      "step": 683070
    },
    {
      "epoch": 2.3940251010244387,
      "grad_norm": 2.8125,
      "learning_rate": 1.1221835401138657e-05,
      "loss": 0.8511,
      "step": 683080
    },
    {
      "epoch": 2.3940601485313344,
      "grad_norm": 2.671875,
      "learning_rate": 1.1221186372474955e-05,
      "loss": 0.7822,
      "step": 683090
    },
    {
      "epoch": 2.3940951960382297,
      "grad_norm": 3.125,
      "learning_rate": 1.1220537343811253e-05,
      "loss": 0.8781,
      "step": 683100
    },
    {
      "epoch": 2.3941302435451255,
      "grad_norm": 3.359375,
      "learning_rate": 1.121988831514755e-05,
      "loss": 0.777,
      "step": 683110
    },
    {
      "epoch": 2.394165291052021,
      "grad_norm": 3.03125,
      "learning_rate": 1.1219239286483849e-05,
      "loss": 0.8539,
      "step": 683120
    },
    {
      "epoch": 2.3942003385589166,
      "grad_norm": 2.640625,
      "learning_rate": 1.1218590257820147e-05,
      "loss": 0.6948,
      "step": 683130
    },
    {
      "epoch": 2.3942353860658123,
      "grad_norm": 2.90625,
      "learning_rate": 1.1217941229156445e-05,
      "loss": 0.8387,
      "step": 683140
    },
    {
      "epoch": 2.3942704335727076,
      "grad_norm": 3.046875,
      "learning_rate": 1.1217292200492743e-05,
      "loss": 0.8403,
      "step": 683150
    },
    {
      "epoch": 2.3943054810796034,
      "grad_norm": 3.171875,
      "learning_rate": 1.1216643171829042e-05,
      "loss": 0.7887,
      "step": 683160
    },
    {
      "epoch": 2.394340528586499,
      "grad_norm": 2.796875,
      "learning_rate": 1.121599414316534e-05,
      "loss": 0.7788,
      "step": 683170
    },
    {
      "epoch": 2.3943755760933945,
      "grad_norm": 3.203125,
      "learning_rate": 1.1215345114501638e-05,
      "loss": 0.7756,
      "step": 683180
    },
    {
      "epoch": 2.3944106236002902,
      "grad_norm": 2.78125,
      "learning_rate": 1.1214696085837935e-05,
      "loss": 0.7826,
      "step": 683190
    },
    {
      "epoch": 2.394445671107186,
      "grad_norm": 2.9375,
      "learning_rate": 1.1214047057174233e-05,
      "loss": 0.7907,
      "step": 683200
    },
    {
      "epoch": 2.3944807186140813,
      "grad_norm": 2.953125,
      "learning_rate": 1.121339802851053e-05,
      "loss": 0.8231,
      "step": 683210
    },
    {
      "epoch": 2.394515766120977,
      "grad_norm": 2.6875,
      "learning_rate": 1.121274899984683e-05,
      "loss": 0.7615,
      "step": 683220
    },
    {
      "epoch": 2.3945508136278724,
      "grad_norm": 2.71875,
      "learning_rate": 1.1212099971183128e-05,
      "loss": 0.7774,
      "step": 683230
    },
    {
      "epoch": 2.394585861134768,
      "grad_norm": 2.671875,
      "learning_rate": 1.1211450942519426e-05,
      "loss": 0.764,
      "step": 683240
    },
    {
      "epoch": 2.394620908641664,
      "grad_norm": 3.09375,
      "learning_rate": 1.1210801913855724e-05,
      "loss": 0.8039,
      "step": 683250
    },
    {
      "epoch": 2.394655956148559,
      "grad_norm": 3.125,
      "learning_rate": 1.1210152885192022e-05,
      "loss": 0.8647,
      "step": 683260
    },
    {
      "epoch": 2.394691003655455,
      "grad_norm": 3.03125,
      "learning_rate": 1.120950385652832e-05,
      "loss": 0.7609,
      "step": 683270
    },
    {
      "epoch": 2.3947260511623507,
      "grad_norm": 2.8125,
      "learning_rate": 1.1208854827864618e-05,
      "loss": 0.8372,
      "step": 683280
    },
    {
      "epoch": 2.394761098669246,
      "grad_norm": 2.984375,
      "learning_rate": 1.1208205799200916e-05,
      "loss": 0.796,
      "step": 683290
    },
    {
      "epoch": 2.394796146176142,
      "grad_norm": 3.1875,
      "learning_rate": 1.1207556770537214e-05,
      "loss": 0.8553,
      "step": 683300
    },
    {
      "epoch": 2.3948311936830375,
      "grad_norm": 2.578125,
      "learning_rate": 1.1206907741873512e-05,
      "loss": 0.8158,
      "step": 683310
    },
    {
      "epoch": 2.394866241189933,
      "grad_norm": 2.8125,
      "learning_rate": 1.120625871320981e-05,
      "loss": 0.8231,
      "step": 683320
    },
    {
      "epoch": 2.3949012886968286,
      "grad_norm": 2.859375,
      "learning_rate": 1.1205609684546108e-05,
      "loss": 0.7768,
      "step": 683330
    },
    {
      "epoch": 2.394936336203724,
      "grad_norm": 2.796875,
      "learning_rate": 1.1204960655882408e-05,
      "loss": 0.7648,
      "step": 683340
    },
    {
      "epoch": 2.3949713837106197,
      "grad_norm": 2.734375,
      "learning_rate": 1.1204311627218706e-05,
      "loss": 0.8241,
      "step": 683350
    },
    {
      "epoch": 2.3950064312175154,
      "grad_norm": 2.640625,
      "learning_rate": 1.1203662598555004e-05,
      "loss": 0.8112,
      "step": 683360
    },
    {
      "epoch": 2.3950414787244108,
      "grad_norm": 2.484375,
      "learning_rate": 1.1203013569891302e-05,
      "loss": 0.7667,
      "step": 683370
    },
    {
      "epoch": 2.3950765262313065,
      "grad_norm": 3.46875,
      "learning_rate": 1.1202364541227598e-05,
      "loss": 0.7838,
      "step": 683380
    },
    {
      "epoch": 2.3951115737382023,
      "grad_norm": 2.78125,
      "learning_rate": 1.1201715512563896e-05,
      "loss": 0.8233,
      "step": 683390
    },
    {
      "epoch": 2.3951466212450976,
      "grad_norm": 2.890625,
      "learning_rate": 1.1201066483900196e-05,
      "loss": 0.8164,
      "step": 683400
    },
    {
      "epoch": 2.3951816687519933,
      "grad_norm": 3.109375,
      "learning_rate": 1.1200417455236494e-05,
      "loss": 0.8514,
      "step": 683410
    },
    {
      "epoch": 2.395216716258889,
      "grad_norm": 2.828125,
      "learning_rate": 1.1199768426572792e-05,
      "loss": 0.7471,
      "step": 683420
    },
    {
      "epoch": 2.3952517637657844,
      "grad_norm": 3.953125,
      "learning_rate": 1.119911939790909e-05,
      "loss": 0.8657,
      "step": 683430
    },
    {
      "epoch": 2.39528681127268,
      "grad_norm": 2.984375,
      "learning_rate": 1.1198470369245388e-05,
      "loss": 0.8555,
      "step": 683440
    },
    {
      "epoch": 2.3953218587795755,
      "grad_norm": 3.125,
      "learning_rate": 1.1197821340581686e-05,
      "loss": 0.8769,
      "step": 683450
    },
    {
      "epoch": 2.3953569062864712,
      "grad_norm": 3.015625,
      "learning_rate": 1.1197172311917984e-05,
      "loss": 0.8648,
      "step": 683460
    },
    {
      "epoch": 2.395391953793367,
      "grad_norm": 2.84375,
      "learning_rate": 1.1196523283254282e-05,
      "loss": 0.8379,
      "step": 683470
    },
    {
      "epoch": 2.3954270013002623,
      "grad_norm": 2.609375,
      "learning_rate": 1.119587425459058e-05,
      "loss": 0.762,
      "step": 683480
    },
    {
      "epoch": 2.395462048807158,
      "grad_norm": 2.84375,
      "learning_rate": 1.1195225225926878e-05,
      "loss": 0.8304,
      "step": 683490
    },
    {
      "epoch": 2.395497096314054,
      "grad_norm": 2.53125,
      "learning_rate": 1.1194576197263176e-05,
      "loss": 0.8799,
      "step": 683500
    },
    {
      "epoch": 2.395532143820949,
      "grad_norm": 3.171875,
      "learning_rate": 1.1193927168599474e-05,
      "loss": 0.8633,
      "step": 683510
    },
    {
      "epoch": 2.395567191327845,
      "grad_norm": 2.828125,
      "learning_rate": 1.1193278139935772e-05,
      "loss": 0.7694,
      "step": 683520
    },
    {
      "epoch": 2.3956022388347407,
      "grad_norm": 3.0,
      "learning_rate": 1.1192629111272072e-05,
      "loss": 0.7925,
      "step": 683530
    },
    {
      "epoch": 2.395637286341636,
      "grad_norm": 3.0,
      "learning_rate": 1.119198008260837e-05,
      "loss": 0.7672,
      "step": 683540
    },
    {
      "epoch": 2.3956723338485317,
      "grad_norm": 3.125,
      "learning_rate": 1.1191331053944668e-05,
      "loss": 0.8639,
      "step": 683550
    },
    {
      "epoch": 2.395707381355427,
      "grad_norm": 2.578125,
      "learning_rate": 1.1190682025280964e-05,
      "loss": 0.7793,
      "step": 683560
    },
    {
      "epoch": 2.395742428862323,
      "grad_norm": 3.359375,
      "learning_rate": 1.1190032996617262e-05,
      "loss": 0.8271,
      "step": 683570
    },
    {
      "epoch": 2.3957774763692186,
      "grad_norm": 3.015625,
      "learning_rate": 1.1189383967953562e-05,
      "loss": 0.8895,
      "step": 683580
    },
    {
      "epoch": 2.3958125238761143,
      "grad_norm": 2.75,
      "learning_rate": 1.118873493928986e-05,
      "loss": 0.8312,
      "step": 683590
    },
    {
      "epoch": 2.3958475713830096,
      "grad_norm": 2.921875,
      "learning_rate": 1.1188085910626158e-05,
      "loss": 0.7897,
      "step": 683600
    },
    {
      "epoch": 2.3958826188899054,
      "grad_norm": 3.125,
      "learning_rate": 1.1187436881962456e-05,
      "loss": 0.8319,
      "step": 683610
    },
    {
      "epoch": 2.3959176663968007,
      "grad_norm": 2.609375,
      "learning_rate": 1.1186787853298754e-05,
      "loss": 0.8951,
      "step": 683620
    },
    {
      "epoch": 2.3959527139036965,
      "grad_norm": 2.359375,
      "learning_rate": 1.1186138824635052e-05,
      "loss": 0.774,
      "step": 683630
    },
    {
      "epoch": 2.395987761410592,
      "grad_norm": 3.3125,
      "learning_rate": 1.118548979597135e-05,
      "loss": 0.7778,
      "step": 683640
    },
    {
      "epoch": 2.3960228089174875,
      "grad_norm": 2.8125,
      "learning_rate": 1.118484076730765e-05,
      "loss": 0.7521,
      "step": 683650
    },
    {
      "epoch": 2.3960578564243833,
      "grad_norm": 2.515625,
      "learning_rate": 1.1184191738643946e-05,
      "loss": 0.7685,
      "step": 683660
    },
    {
      "epoch": 2.3960929039312786,
      "grad_norm": 3.140625,
      "learning_rate": 1.1183542709980244e-05,
      "loss": 0.7791,
      "step": 683670
    },
    {
      "epoch": 2.3961279514381744,
      "grad_norm": 2.5625,
      "learning_rate": 1.1182893681316542e-05,
      "loss": 0.8131,
      "step": 683680
    },
    {
      "epoch": 2.39616299894507,
      "grad_norm": 2.90625,
      "learning_rate": 1.118224465265284e-05,
      "loss": 0.8055,
      "step": 683690
    },
    {
      "epoch": 2.396198046451966,
      "grad_norm": 2.78125,
      "learning_rate": 1.1181595623989138e-05,
      "loss": 0.7688,
      "step": 683700
    },
    {
      "epoch": 2.396233093958861,
      "grad_norm": 2.640625,
      "learning_rate": 1.1180946595325437e-05,
      "loss": 0.7895,
      "step": 683710
    },
    {
      "epoch": 2.396268141465757,
      "grad_norm": 2.890625,
      "learning_rate": 1.1180297566661735e-05,
      "loss": 0.784,
      "step": 683720
    },
    {
      "epoch": 2.3963031889726523,
      "grad_norm": 2.484375,
      "learning_rate": 1.1179648537998033e-05,
      "loss": 0.7661,
      "step": 683730
    },
    {
      "epoch": 2.396338236479548,
      "grad_norm": 3.03125,
      "learning_rate": 1.1178999509334331e-05,
      "loss": 0.7629,
      "step": 683740
    },
    {
      "epoch": 2.396373283986444,
      "grad_norm": 2.921875,
      "learning_rate": 1.1178350480670628e-05,
      "loss": 0.8098,
      "step": 683750
    },
    {
      "epoch": 2.396408331493339,
      "grad_norm": 2.734375,
      "learning_rate": 1.1177701452006926e-05,
      "loss": 0.7995,
      "step": 683760
    },
    {
      "epoch": 2.396443379000235,
      "grad_norm": 3.609375,
      "learning_rate": 1.1177052423343225e-05,
      "loss": 0.8908,
      "step": 683770
    },
    {
      "epoch": 2.39647842650713,
      "grad_norm": 3.046875,
      "learning_rate": 1.1176403394679523e-05,
      "loss": 0.792,
      "step": 683780
    },
    {
      "epoch": 2.396513474014026,
      "grad_norm": 3.109375,
      "learning_rate": 1.1175754366015821e-05,
      "loss": 0.825,
      "step": 683790
    },
    {
      "epoch": 2.3965485215209217,
      "grad_norm": 3.03125,
      "learning_rate": 1.117510533735212e-05,
      "loss": 0.8221,
      "step": 683800
    },
    {
      "epoch": 2.3965835690278174,
      "grad_norm": 3.0625,
      "learning_rate": 1.1174456308688417e-05,
      "loss": 0.8029,
      "step": 683810
    },
    {
      "epoch": 2.3966186165347128,
      "grad_norm": 2.546875,
      "learning_rate": 1.1173807280024715e-05,
      "loss": 0.7572,
      "step": 683820
    },
    {
      "epoch": 2.3966536640416085,
      "grad_norm": 3.125,
      "learning_rate": 1.1173158251361013e-05,
      "loss": 0.843,
      "step": 683830
    },
    {
      "epoch": 2.396688711548504,
      "grad_norm": 2.359375,
      "learning_rate": 1.1172509222697313e-05,
      "loss": 0.8293,
      "step": 683840
    },
    {
      "epoch": 2.3967237590553996,
      "grad_norm": 2.734375,
      "learning_rate": 1.117186019403361e-05,
      "loss": 0.7399,
      "step": 683850
    },
    {
      "epoch": 2.3967588065622953,
      "grad_norm": 3.359375,
      "learning_rate": 1.1171211165369907e-05,
      "loss": 0.8566,
      "step": 683860
    },
    {
      "epoch": 2.3967938540691907,
      "grad_norm": 3.125,
      "learning_rate": 1.1170562136706205e-05,
      "loss": 0.8063,
      "step": 683870
    },
    {
      "epoch": 2.3968289015760864,
      "grad_norm": 2.890625,
      "learning_rate": 1.1169913108042503e-05,
      "loss": 0.7711,
      "step": 683880
    },
    {
      "epoch": 2.396863949082982,
      "grad_norm": 3.015625,
      "learning_rate": 1.1169264079378803e-05,
      "loss": 0.7782,
      "step": 683890
    },
    {
      "epoch": 2.3968989965898775,
      "grad_norm": 2.515625,
      "learning_rate": 1.1168615050715101e-05,
      "loss": 0.8715,
      "step": 683900
    },
    {
      "epoch": 2.3969340440967732,
      "grad_norm": 2.578125,
      "learning_rate": 1.1167966022051399e-05,
      "loss": 0.7552,
      "step": 683910
    },
    {
      "epoch": 2.396969091603669,
      "grad_norm": 3.390625,
      "learning_rate": 1.1167316993387697e-05,
      "loss": 0.766,
      "step": 683920
    },
    {
      "epoch": 2.3970041391105643,
      "grad_norm": 2.84375,
      "learning_rate": 1.1166667964723995e-05,
      "loss": 0.8295,
      "step": 683930
    },
    {
      "epoch": 2.39703918661746,
      "grad_norm": 3.0625,
      "learning_rate": 1.1166018936060291e-05,
      "loss": 0.7978,
      "step": 683940
    },
    {
      "epoch": 2.3970742341243554,
      "grad_norm": 3.046875,
      "learning_rate": 1.1165369907396591e-05,
      "loss": 0.7326,
      "step": 683950
    },
    {
      "epoch": 2.397109281631251,
      "grad_norm": 3.1875,
      "learning_rate": 1.1164720878732889e-05,
      "loss": 0.8545,
      "step": 683960
    },
    {
      "epoch": 2.397144329138147,
      "grad_norm": 3.046875,
      "learning_rate": 1.1164071850069187e-05,
      "loss": 0.8285,
      "step": 683970
    },
    {
      "epoch": 2.397179376645042,
      "grad_norm": 2.5625,
      "learning_rate": 1.1163422821405485e-05,
      "loss": 0.8628,
      "step": 683980
    },
    {
      "epoch": 2.397214424151938,
      "grad_norm": 2.8125,
      "learning_rate": 1.1162773792741783e-05,
      "loss": 0.7718,
      "step": 683990
    },
    {
      "epoch": 2.3972494716588337,
      "grad_norm": 2.203125,
      "learning_rate": 1.1162124764078081e-05,
      "loss": 0.7353,
      "step": 684000
    },
    {
      "epoch": 2.397284519165729,
      "grad_norm": 2.703125,
      "learning_rate": 1.1161475735414379e-05,
      "loss": 0.7541,
      "step": 684010
    },
    {
      "epoch": 2.397319566672625,
      "grad_norm": 3.0,
      "learning_rate": 1.1160826706750679e-05,
      "loss": 0.7679,
      "step": 684020
    },
    {
      "epoch": 2.3973546141795206,
      "grad_norm": 3.46875,
      "learning_rate": 1.1160177678086975e-05,
      "loss": 0.881,
      "step": 684030
    },
    {
      "epoch": 2.397389661686416,
      "grad_norm": 2.84375,
      "learning_rate": 1.1159528649423273e-05,
      "loss": 0.8417,
      "step": 684040
    },
    {
      "epoch": 2.3974247091933116,
      "grad_norm": 2.46875,
      "learning_rate": 1.1158879620759571e-05,
      "loss": 0.7696,
      "step": 684050
    },
    {
      "epoch": 2.397459756700207,
      "grad_norm": 2.84375,
      "learning_rate": 1.1158230592095869e-05,
      "loss": 0.771,
      "step": 684060
    },
    {
      "epoch": 2.3974948042071027,
      "grad_norm": 3.25,
      "learning_rate": 1.1157581563432167e-05,
      "loss": 0.7994,
      "step": 684070
    },
    {
      "epoch": 2.3975298517139985,
      "grad_norm": 2.953125,
      "learning_rate": 1.1156932534768467e-05,
      "loss": 0.7718,
      "step": 684080
    },
    {
      "epoch": 2.3975648992208938,
      "grad_norm": 2.890625,
      "learning_rate": 1.1156283506104765e-05,
      "loss": 0.8147,
      "step": 684090
    },
    {
      "epoch": 2.3975999467277895,
      "grad_norm": 2.4375,
      "learning_rate": 1.1155634477441063e-05,
      "loss": 0.7868,
      "step": 684100
    },
    {
      "epoch": 2.3976349942346853,
      "grad_norm": 2.59375,
      "learning_rate": 1.115498544877736e-05,
      "loss": 0.7214,
      "step": 684110
    },
    {
      "epoch": 2.3976700417415806,
      "grad_norm": 2.921875,
      "learning_rate": 1.1154336420113659e-05,
      "loss": 0.8139,
      "step": 684120
    },
    {
      "epoch": 2.3977050892484764,
      "grad_norm": 2.78125,
      "learning_rate": 1.1153687391449957e-05,
      "loss": 0.7801,
      "step": 684130
    },
    {
      "epoch": 2.397740136755372,
      "grad_norm": 2.5,
      "learning_rate": 1.1153038362786255e-05,
      "loss": 0.8195,
      "step": 684140
    },
    {
      "epoch": 2.3977751842622674,
      "grad_norm": 2.8125,
      "learning_rate": 1.1152389334122553e-05,
      "loss": 0.8439,
      "step": 684150
    },
    {
      "epoch": 2.397810231769163,
      "grad_norm": 3.359375,
      "learning_rate": 1.115174030545885e-05,
      "loss": 0.9354,
      "step": 684160
    },
    {
      "epoch": 2.3978452792760585,
      "grad_norm": 2.921875,
      "learning_rate": 1.1151091276795149e-05,
      "loss": 0.7459,
      "step": 684170
    },
    {
      "epoch": 2.3978803267829543,
      "grad_norm": 2.5625,
      "learning_rate": 1.1150442248131447e-05,
      "loss": 0.8252,
      "step": 684180
    },
    {
      "epoch": 2.39791537428985,
      "grad_norm": 2.609375,
      "learning_rate": 1.1149793219467745e-05,
      "loss": 0.8899,
      "step": 684190
    },
    {
      "epoch": 2.3979504217967453,
      "grad_norm": 2.8125,
      "learning_rate": 1.1149144190804044e-05,
      "loss": 0.7831,
      "step": 684200
    },
    {
      "epoch": 2.397985469303641,
      "grad_norm": 2.4375,
      "learning_rate": 1.1148495162140342e-05,
      "loss": 0.7232,
      "step": 684210
    },
    {
      "epoch": 2.398020516810537,
      "grad_norm": 3.34375,
      "learning_rate": 1.1147846133476639e-05,
      "loss": 0.8704,
      "step": 684220
    },
    {
      "epoch": 2.398055564317432,
      "grad_norm": 2.71875,
      "learning_rate": 1.1147197104812937e-05,
      "loss": 0.7822,
      "step": 684230
    },
    {
      "epoch": 2.398090611824328,
      "grad_norm": 2.859375,
      "learning_rate": 1.1146548076149235e-05,
      "loss": 0.7423,
      "step": 684240
    },
    {
      "epoch": 2.3981256593312237,
      "grad_norm": 3.03125,
      "learning_rate": 1.1145899047485533e-05,
      "loss": 0.8483,
      "step": 684250
    },
    {
      "epoch": 2.398160706838119,
      "grad_norm": 3.65625,
      "learning_rate": 1.1145250018821832e-05,
      "loss": 0.8939,
      "step": 684260
    },
    {
      "epoch": 2.3981957543450148,
      "grad_norm": 3.46875,
      "learning_rate": 1.114460099015813e-05,
      "loss": 0.8383,
      "step": 684270
    },
    {
      "epoch": 2.39823080185191,
      "grad_norm": 3.234375,
      "learning_rate": 1.1143951961494428e-05,
      "loss": 0.769,
      "step": 684280
    },
    {
      "epoch": 2.398265849358806,
      "grad_norm": 3.109375,
      "learning_rate": 1.1143302932830726e-05,
      "loss": 0.7177,
      "step": 684290
    },
    {
      "epoch": 2.3983008968657016,
      "grad_norm": 2.78125,
      "learning_rate": 1.1142653904167024e-05,
      "loss": 0.8783,
      "step": 684300
    },
    {
      "epoch": 2.398335944372597,
      "grad_norm": 2.859375,
      "learning_rate": 1.1142004875503322e-05,
      "loss": 0.8392,
      "step": 684310
    },
    {
      "epoch": 2.3983709918794927,
      "grad_norm": 3.140625,
      "learning_rate": 1.114135584683962e-05,
      "loss": 0.8145,
      "step": 684320
    },
    {
      "epoch": 2.3984060393863884,
      "grad_norm": 2.828125,
      "learning_rate": 1.1140706818175918e-05,
      "loss": 0.7909,
      "step": 684330
    },
    {
      "epoch": 2.3984410868932837,
      "grad_norm": 2.484375,
      "learning_rate": 1.1140057789512216e-05,
      "loss": 0.7465,
      "step": 684340
    },
    {
      "epoch": 2.3984761344001795,
      "grad_norm": 2.703125,
      "learning_rate": 1.1139408760848514e-05,
      "loss": 0.9011,
      "step": 684350
    },
    {
      "epoch": 2.3985111819070752,
      "grad_norm": 3.40625,
      "learning_rate": 1.1138759732184812e-05,
      "loss": 0.759,
      "step": 684360
    },
    {
      "epoch": 2.3985462294139706,
      "grad_norm": 2.953125,
      "learning_rate": 1.113811070352111e-05,
      "loss": 0.8407,
      "step": 684370
    },
    {
      "epoch": 2.3985812769208663,
      "grad_norm": 2.65625,
      "learning_rate": 1.1137461674857408e-05,
      "loss": 0.798,
      "step": 684380
    },
    {
      "epoch": 2.3986163244277616,
      "grad_norm": 3.25,
      "learning_rate": 1.1136812646193708e-05,
      "loss": 0.8663,
      "step": 684390
    },
    {
      "epoch": 2.3986513719346574,
      "grad_norm": 2.65625,
      "learning_rate": 1.1136163617530006e-05,
      "loss": 0.7965,
      "step": 684400
    },
    {
      "epoch": 2.398686419441553,
      "grad_norm": 2.734375,
      "learning_rate": 1.1135514588866302e-05,
      "loss": 0.8768,
      "step": 684410
    },
    {
      "epoch": 2.3987214669484485,
      "grad_norm": 2.875,
      "learning_rate": 1.11348655602026e-05,
      "loss": 0.7956,
      "step": 684420
    },
    {
      "epoch": 2.398756514455344,
      "grad_norm": 3.015625,
      "learning_rate": 1.1134216531538898e-05,
      "loss": 0.7515,
      "step": 684430
    },
    {
      "epoch": 2.39879156196224,
      "grad_norm": 3.09375,
      "learning_rate": 1.1133567502875198e-05,
      "loss": 0.7899,
      "step": 684440
    },
    {
      "epoch": 2.3988266094691353,
      "grad_norm": 3.015625,
      "learning_rate": 1.1132918474211496e-05,
      "loss": 0.8629,
      "step": 684450
    },
    {
      "epoch": 2.398861656976031,
      "grad_norm": 3.71875,
      "learning_rate": 1.1132269445547794e-05,
      "loss": 0.8628,
      "step": 684460
    },
    {
      "epoch": 2.398896704482927,
      "grad_norm": 3.296875,
      "learning_rate": 1.1131620416884092e-05,
      "loss": 0.822,
      "step": 684470
    },
    {
      "epoch": 2.398931751989822,
      "grad_norm": 3.015625,
      "learning_rate": 1.113097138822039e-05,
      "loss": 0.8176,
      "step": 684480
    },
    {
      "epoch": 2.398966799496718,
      "grad_norm": 3.03125,
      "learning_rate": 1.1130322359556688e-05,
      "loss": 0.7479,
      "step": 684490
    },
    {
      "epoch": 2.399001847003613,
      "grad_norm": 2.59375,
      "learning_rate": 1.1129673330892986e-05,
      "loss": 0.8583,
      "step": 684500
    },
    {
      "epoch": 2.399036894510509,
      "grad_norm": 2.890625,
      "learning_rate": 1.1129024302229284e-05,
      "loss": 0.8103,
      "step": 684510
    },
    {
      "epoch": 2.3990719420174047,
      "grad_norm": 2.984375,
      "learning_rate": 1.1128375273565582e-05,
      "loss": 0.8884,
      "step": 684520
    },
    {
      "epoch": 2.3991069895243,
      "grad_norm": 2.421875,
      "learning_rate": 1.112772624490188e-05,
      "loss": 0.8098,
      "step": 684530
    },
    {
      "epoch": 2.3991420370311958,
      "grad_norm": 2.5625,
      "learning_rate": 1.1127077216238178e-05,
      "loss": 0.7843,
      "step": 684540
    },
    {
      "epoch": 2.3991770845380915,
      "grad_norm": 2.65625,
      "learning_rate": 1.1126428187574476e-05,
      "loss": 0.7331,
      "step": 684550
    },
    {
      "epoch": 2.399212132044987,
      "grad_norm": 2.796875,
      "learning_rate": 1.1125779158910774e-05,
      "loss": 0.7595,
      "step": 684560
    },
    {
      "epoch": 2.3992471795518826,
      "grad_norm": 2.78125,
      "learning_rate": 1.1125130130247074e-05,
      "loss": 0.8053,
      "step": 684570
    },
    {
      "epoch": 2.3992822270587784,
      "grad_norm": 3.125,
      "learning_rate": 1.1124481101583372e-05,
      "loss": 0.8301,
      "step": 684580
    },
    {
      "epoch": 2.3993172745656737,
      "grad_norm": 3.109375,
      "learning_rate": 1.112383207291967e-05,
      "loss": 0.8233,
      "step": 684590
    },
    {
      "epoch": 2.3993523220725694,
      "grad_norm": 2.296875,
      "learning_rate": 1.1123183044255966e-05,
      "loss": 0.81,
      "step": 684600
    },
    {
      "epoch": 2.3993873695794647,
      "grad_norm": 2.84375,
      "learning_rate": 1.1122534015592264e-05,
      "loss": 0.7691,
      "step": 684610
    },
    {
      "epoch": 2.3994224170863605,
      "grad_norm": 3.0,
      "learning_rate": 1.1121884986928562e-05,
      "loss": 0.8265,
      "step": 684620
    },
    {
      "epoch": 2.3994574645932563,
      "grad_norm": 3.125,
      "learning_rate": 1.1121235958264862e-05,
      "loss": 0.8385,
      "step": 684630
    },
    {
      "epoch": 2.3994925121001516,
      "grad_norm": 3.203125,
      "learning_rate": 1.112058692960116e-05,
      "loss": 0.8509,
      "step": 684640
    },
    {
      "epoch": 2.3995275596070473,
      "grad_norm": 2.734375,
      "learning_rate": 1.1119937900937458e-05,
      "loss": 0.8045,
      "step": 684650
    },
    {
      "epoch": 2.399562607113943,
      "grad_norm": 2.609375,
      "learning_rate": 1.1119288872273756e-05,
      "loss": 0.7674,
      "step": 684660
    },
    {
      "epoch": 2.3995976546208384,
      "grad_norm": 2.578125,
      "learning_rate": 1.1118639843610054e-05,
      "loss": 0.8079,
      "step": 684670
    },
    {
      "epoch": 2.399632702127734,
      "grad_norm": 3.03125,
      "learning_rate": 1.1117990814946352e-05,
      "loss": 0.9207,
      "step": 684680
    },
    {
      "epoch": 2.39966774963463,
      "grad_norm": 3.203125,
      "learning_rate": 1.111734178628265e-05,
      "loss": 0.8113,
      "step": 684690
    },
    {
      "epoch": 2.3997027971415252,
      "grad_norm": 2.859375,
      "learning_rate": 1.1116692757618948e-05,
      "loss": 0.8958,
      "step": 684700
    },
    {
      "epoch": 2.399737844648421,
      "grad_norm": 2.875,
      "learning_rate": 1.1116043728955246e-05,
      "loss": 0.7838,
      "step": 684710
    },
    {
      "epoch": 2.3997728921553163,
      "grad_norm": 2.890625,
      "learning_rate": 1.1115394700291544e-05,
      "loss": 0.8029,
      "step": 684720
    },
    {
      "epoch": 2.399807939662212,
      "grad_norm": 2.8125,
      "learning_rate": 1.1114745671627842e-05,
      "loss": 0.7585,
      "step": 684730
    },
    {
      "epoch": 2.399842987169108,
      "grad_norm": 2.984375,
      "learning_rate": 1.111409664296414e-05,
      "loss": 0.8517,
      "step": 684740
    },
    {
      "epoch": 2.399878034676003,
      "grad_norm": 3.234375,
      "learning_rate": 1.111344761430044e-05,
      "loss": 0.8523,
      "step": 684750
    },
    {
      "epoch": 2.399913082182899,
      "grad_norm": 2.796875,
      "learning_rate": 1.1112798585636738e-05,
      "loss": 0.8668,
      "step": 684760
    },
    {
      "epoch": 2.3999481296897947,
      "grad_norm": 2.546875,
      "learning_rate": 1.1112149556973036e-05,
      "loss": 0.7582,
      "step": 684770
    },
    {
      "epoch": 2.39998317719669,
      "grad_norm": 2.359375,
      "learning_rate": 1.1111500528309334e-05,
      "loss": 0.8338,
      "step": 684780
    },
    {
      "epoch": 2.4000182247035857,
      "grad_norm": 3.0,
      "learning_rate": 1.111085149964563e-05,
      "loss": 0.7821,
      "step": 684790
    },
    {
      "epoch": 2.4000532722104815,
      "grad_norm": 2.5,
      "learning_rate": 1.1110202470981928e-05,
      "loss": 0.7729,
      "step": 684800
    },
    {
      "epoch": 2.400088319717377,
      "grad_norm": 3.296875,
      "learning_rate": 1.1109553442318228e-05,
      "loss": 0.8189,
      "step": 684810
    },
    {
      "epoch": 2.4001233672242726,
      "grad_norm": 3.0625,
      "learning_rate": 1.1108904413654526e-05,
      "loss": 0.8077,
      "step": 684820
    },
    {
      "epoch": 2.400158414731168,
      "grad_norm": 4.78125,
      "learning_rate": 1.1108255384990824e-05,
      "loss": 0.7893,
      "step": 684830
    },
    {
      "epoch": 2.4001934622380636,
      "grad_norm": 3.015625,
      "learning_rate": 1.1107606356327122e-05,
      "loss": 0.8069,
      "step": 684840
    },
    {
      "epoch": 2.4002285097449594,
      "grad_norm": 2.703125,
      "learning_rate": 1.110695732766342e-05,
      "loss": 0.8662,
      "step": 684850
    },
    {
      "epoch": 2.4002635572518547,
      "grad_norm": 2.90625,
      "learning_rate": 1.1106308298999718e-05,
      "loss": 0.7755,
      "step": 684860
    },
    {
      "epoch": 2.4002986047587505,
      "grad_norm": 2.78125,
      "learning_rate": 1.1105659270336016e-05,
      "loss": 0.8435,
      "step": 684870
    },
    {
      "epoch": 2.400333652265646,
      "grad_norm": 3.4375,
      "learning_rate": 1.1105010241672314e-05,
      "loss": 0.8133,
      "step": 684880
    },
    {
      "epoch": 2.4003686997725415,
      "grad_norm": 2.828125,
      "learning_rate": 1.1104361213008612e-05,
      "loss": 0.8159,
      "step": 684890
    },
    {
      "epoch": 2.4004037472794373,
      "grad_norm": 2.96875,
      "learning_rate": 1.110371218434491e-05,
      "loss": 0.8304,
      "step": 684900
    },
    {
      "epoch": 2.400438794786333,
      "grad_norm": 2.453125,
      "learning_rate": 1.1103063155681208e-05,
      "loss": 0.7578,
      "step": 684910
    },
    {
      "epoch": 2.4004738422932284,
      "grad_norm": 2.609375,
      "learning_rate": 1.1102414127017506e-05,
      "loss": 0.8498,
      "step": 684920
    },
    {
      "epoch": 2.400508889800124,
      "grad_norm": 2.625,
      "learning_rate": 1.1101765098353804e-05,
      "loss": 0.7715,
      "step": 684930
    },
    {
      "epoch": 2.4005439373070194,
      "grad_norm": 2.65625,
      "learning_rate": 1.1101116069690103e-05,
      "loss": 0.7048,
      "step": 684940
    },
    {
      "epoch": 2.400578984813915,
      "grad_norm": 3.015625,
      "learning_rate": 1.1100467041026401e-05,
      "loss": 0.8352,
      "step": 684950
    },
    {
      "epoch": 2.400614032320811,
      "grad_norm": 2.875,
      "learning_rate": 1.10998180123627e-05,
      "loss": 0.856,
      "step": 684960
    },
    {
      "epoch": 2.4006490798277067,
      "grad_norm": 2.625,
      "learning_rate": 1.1099168983698996e-05,
      "loss": 0.8294,
      "step": 684970
    },
    {
      "epoch": 2.400684127334602,
      "grad_norm": 2.515625,
      "learning_rate": 1.1098519955035294e-05,
      "loss": 0.8329,
      "step": 684980
    },
    {
      "epoch": 2.4007191748414978,
      "grad_norm": 2.515625,
      "learning_rate": 1.1097870926371593e-05,
      "loss": 0.7887,
      "step": 684990
    },
    {
      "epoch": 2.400754222348393,
      "grad_norm": 3.28125,
      "learning_rate": 1.1097221897707891e-05,
      "loss": 0.8241,
      "step": 685000
    },
    {
      "epoch": 2.400754222348393,
      "eval_loss": 0.7591322064399719,
      "eval_runtime": 565.2901,
      "eval_samples_per_second": 672.992,
      "eval_steps_per_second": 56.083,
      "step": 685000
    },
    {
      "epoch": 2.400789269855289,
      "grad_norm": 2.90625,
      "learning_rate": 1.109657286904419e-05,
      "loss": 0.851,
      "step": 685010
    },
    {
      "epoch": 2.4008243173621846,
      "grad_norm": 2.53125,
      "learning_rate": 1.1095923840380487e-05,
      "loss": 0.769,
      "step": 685020
    },
    {
      "epoch": 2.40085936486908,
      "grad_norm": 2.546875,
      "learning_rate": 1.1095274811716785e-05,
      "loss": 0.803,
      "step": 685030
    },
    {
      "epoch": 2.4008944123759757,
      "grad_norm": 3.140625,
      "learning_rate": 1.1094625783053083e-05,
      "loss": 0.8599,
      "step": 685040
    },
    {
      "epoch": 2.400929459882871,
      "grad_norm": 2.5625,
      "learning_rate": 1.1093976754389381e-05,
      "loss": 0.8488,
      "step": 685050
    },
    {
      "epoch": 2.4009645073897667,
      "grad_norm": 2.84375,
      "learning_rate": 1.1093327725725681e-05,
      "loss": 0.7691,
      "step": 685060
    },
    {
      "epoch": 2.4009995548966625,
      "grad_norm": 3.1875,
      "learning_rate": 1.1092678697061977e-05,
      "loss": 0.8291,
      "step": 685070
    },
    {
      "epoch": 2.4010346024035583,
      "grad_norm": 2.640625,
      "learning_rate": 1.1092029668398275e-05,
      "loss": 0.8043,
      "step": 685080
    },
    {
      "epoch": 2.4010696499104536,
      "grad_norm": 2.875,
      "learning_rate": 1.1091380639734573e-05,
      "loss": 0.8601,
      "step": 685090
    },
    {
      "epoch": 2.4011046974173493,
      "grad_norm": 3.171875,
      "learning_rate": 1.1090731611070871e-05,
      "loss": 0.8234,
      "step": 685100
    },
    {
      "epoch": 2.4011397449242446,
      "grad_norm": 3.28125,
      "learning_rate": 1.109008258240717e-05,
      "loss": 0.7772,
      "step": 685110
    },
    {
      "epoch": 2.4011747924311404,
      "grad_norm": 2.796875,
      "learning_rate": 1.1089433553743469e-05,
      "loss": 0.8142,
      "step": 685120
    },
    {
      "epoch": 2.401209839938036,
      "grad_norm": 3.15625,
      "learning_rate": 1.1088784525079767e-05,
      "loss": 0.8632,
      "step": 685130
    },
    {
      "epoch": 2.4012448874449315,
      "grad_norm": 2.640625,
      "learning_rate": 1.1088135496416065e-05,
      "loss": 0.8028,
      "step": 685140
    },
    {
      "epoch": 2.4012799349518272,
      "grad_norm": 3.015625,
      "learning_rate": 1.1087486467752363e-05,
      "loss": 0.8004,
      "step": 685150
    },
    {
      "epoch": 2.401314982458723,
      "grad_norm": 2.484375,
      "learning_rate": 1.108683743908866e-05,
      "loss": 0.7729,
      "step": 685160
    },
    {
      "epoch": 2.4013500299656183,
      "grad_norm": 2.78125,
      "learning_rate": 1.1086188410424957e-05,
      "loss": 0.8649,
      "step": 685170
    },
    {
      "epoch": 2.401385077472514,
      "grad_norm": 2.890625,
      "learning_rate": 1.1085539381761257e-05,
      "loss": 0.8467,
      "step": 685180
    },
    {
      "epoch": 2.40142012497941,
      "grad_norm": 2.65625,
      "learning_rate": 1.1084890353097555e-05,
      "loss": 0.7875,
      "step": 685190
    },
    {
      "epoch": 2.401455172486305,
      "grad_norm": 3.296875,
      "learning_rate": 1.1084241324433853e-05,
      "loss": 0.7093,
      "step": 685200
    },
    {
      "epoch": 2.401490219993201,
      "grad_norm": 3.0,
      "learning_rate": 1.1083592295770151e-05,
      "loss": 0.8231,
      "step": 685210
    },
    {
      "epoch": 2.401525267500096,
      "grad_norm": 3.09375,
      "learning_rate": 1.1082943267106449e-05,
      "loss": 0.8112,
      "step": 685220
    },
    {
      "epoch": 2.401560315006992,
      "grad_norm": 3.25,
      "learning_rate": 1.1082294238442747e-05,
      "loss": 0.8307,
      "step": 685230
    },
    {
      "epoch": 2.4015953625138877,
      "grad_norm": 2.921875,
      "learning_rate": 1.1081645209779045e-05,
      "loss": 0.7948,
      "step": 685240
    },
    {
      "epoch": 2.401630410020783,
      "grad_norm": 2.953125,
      "learning_rate": 1.1080996181115345e-05,
      "loss": 0.8245,
      "step": 685250
    },
    {
      "epoch": 2.401665457527679,
      "grad_norm": 2.765625,
      "learning_rate": 1.1080347152451641e-05,
      "loss": 0.84,
      "step": 685260
    },
    {
      "epoch": 2.4017005050345746,
      "grad_norm": 3.5,
      "learning_rate": 1.1079698123787939e-05,
      "loss": 0.8333,
      "step": 685270
    },
    {
      "epoch": 2.40173555254147,
      "grad_norm": 2.890625,
      "learning_rate": 1.1079049095124237e-05,
      "loss": 0.7372,
      "step": 685280
    },
    {
      "epoch": 2.4017706000483656,
      "grad_norm": 3.25,
      "learning_rate": 1.1078400066460535e-05,
      "loss": 0.8248,
      "step": 685290
    },
    {
      "epoch": 2.4018056475552614,
      "grad_norm": 2.96875,
      "learning_rate": 1.1077751037796835e-05,
      "loss": 0.8232,
      "step": 685300
    },
    {
      "epoch": 2.4018406950621567,
      "grad_norm": 2.78125,
      "learning_rate": 1.1077102009133133e-05,
      "loss": 0.7991,
      "step": 685310
    },
    {
      "epoch": 2.4018757425690525,
      "grad_norm": 2.59375,
      "learning_rate": 1.107645298046943e-05,
      "loss": 0.8502,
      "step": 685320
    },
    {
      "epoch": 2.4019107900759478,
      "grad_norm": 2.53125,
      "learning_rate": 1.1075803951805729e-05,
      "loss": 0.7829,
      "step": 685330
    },
    {
      "epoch": 2.4019458375828435,
      "grad_norm": 2.859375,
      "learning_rate": 1.1075154923142027e-05,
      "loss": 0.7945,
      "step": 685340
    },
    {
      "epoch": 2.4019808850897393,
      "grad_norm": 3.015625,
      "learning_rate": 1.1074505894478323e-05,
      "loss": 0.8599,
      "step": 685350
    },
    {
      "epoch": 2.4020159325966346,
      "grad_norm": 2.84375,
      "learning_rate": 1.1073856865814623e-05,
      "loss": 0.8131,
      "step": 685360
    },
    {
      "epoch": 2.4020509801035304,
      "grad_norm": 2.734375,
      "learning_rate": 1.107320783715092e-05,
      "loss": 0.723,
      "step": 685370
    },
    {
      "epoch": 2.402086027610426,
      "grad_norm": 3.828125,
      "learning_rate": 1.1072558808487219e-05,
      "loss": 0.9594,
      "step": 685380
    },
    {
      "epoch": 2.4021210751173214,
      "grad_norm": 3.109375,
      "learning_rate": 1.1071909779823517e-05,
      "loss": 0.7954,
      "step": 685390
    },
    {
      "epoch": 2.402156122624217,
      "grad_norm": 2.8125,
      "learning_rate": 1.1071260751159815e-05,
      "loss": 0.8521,
      "step": 685400
    },
    {
      "epoch": 2.402191170131113,
      "grad_norm": 3.34375,
      "learning_rate": 1.1070611722496113e-05,
      "loss": 0.8689,
      "step": 685410
    },
    {
      "epoch": 2.4022262176380083,
      "grad_norm": 2.65625,
      "learning_rate": 1.106996269383241e-05,
      "loss": 0.831,
      "step": 685420
    },
    {
      "epoch": 2.402261265144904,
      "grad_norm": 2.625,
      "learning_rate": 1.106931366516871e-05,
      "loss": 0.8248,
      "step": 685430
    },
    {
      "epoch": 2.4022963126517993,
      "grad_norm": 3.0,
      "learning_rate": 1.1068664636505007e-05,
      "loss": 0.8079,
      "step": 685440
    },
    {
      "epoch": 2.402331360158695,
      "grad_norm": 3.296875,
      "learning_rate": 1.1068015607841305e-05,
      "loss": 0.7057,
      "step": 685450
    },
    {
      "epoch": 2.402366407665591,
      "grad_norm": 2.71875,
      "learning_rate": 1.1067366579177603e-05,
      "loss": 0.7874,
      "step": 685460
    },
    {
      "epoch": 2.402401455172486,
      "grad_norm": 3.0,
      "learning_rate": 1.10667175505139e-05,
      "loss": 0.8031,
      "step": 685470
    },
    {
      "epoch": 2.402436502679382,
      "grad_norm": 2.890625,
      "learning_rate": 1.1066068521850199e-05,
      "loss": 0.7877,
      "step": 685480
    },
    {
      "epoch": 2.4024715501862777,
      "grad_norm": 2.859375,
      "learning_rate": 1.1065419493186498e-05,
      "loss": 0.766,
      "step": 685490
    },
    {
      "epoch": 2.402506597693173,
      "grad_norm": 2.828125,
      "learning_rate": 1.1064770464522796e-05,
      "loss": 0.7773,
      "step": 685500
    },
    {
      "epoch": 2.4025416452000687,
      "grad_norm": 2.890625,
      "learning_rate": 1.1064121435859094e-05,
      "loss": 0.8039,
      "step": 685510
    },
    {
      "epoch": 2.4025766927069645,
      "grad_norm": 2.984375,
      "learning_rate": 1.1063472407195392e-05,
      "loss": 0.885,
      "step": 685520
    },
    {
      "epoch": 2.40261174021386,
      "grad_norm": 2.953125,
      "learning_rate": 1.106282337853169e-05,
      "loss": 0.8603,
      "step": 685530
    },
    {
      "epoch": 2.4026467877207556,
      "grad_norm": 3.15625,
      "learning_rate": 1.1062174349867988e-05,
      "loss": 0.8127,
      "step": 685540
    },
    {
      "epoch": 2.402681835227651,
      "grad_norm": 3.109375,
      "learning_rate": 1.1061525321204286e-05,
      "loss": 0.7421,
      "step": 685550
    },
    {
      "epoch": 2.4027168827345466,
      "grad_norm": 2.71875,
      "learning_rate": 1.1060876292540584e-05,
      "loss": 0.7533,
      "step": 685560
    },
    {
      "epoch": 2.4027519302414424,
      "grad_norm": 2.875,
      "learning_rate": 1.1060227263876882e-05,
      "loss": 0.8065,
      "step": 685570
    },
    {
      "epoch": 2.4027869777483377,
      "grad_norm": 2.421875,
      "learning_rate": 1.105957823521318e-05,
      "loss": 0.8207,
      "step": 685580
    },
    {
      "epoch": 2.4028220252552335,
      "grad_norm": 2.421875,
      "learning_rate": 1.1058929206549478e-05,
      "loss": 0.7567,
      "step": 685590
    },
    {
      "epoch": 2.4028570727621292,
      "grad_norm": 3.21875,
      "learning_rate": 1.1058280177885776e-05,
      "loss": 0.8175,
      "step": 685600
    },
    {
      "epoch": 2.4028921202690245,
      "grad_norm": 2.578125,
      "learning_rate": 1.1057631149222076e-05,
      "loss": 0.7902,
      "step": 685610
    },
    {
      "epoch": 2.4029271677759203,
      "grad_norm": 3.28125,
      "learning_rate": 1.1056982120558374e-05,
      "loss": 0.8148,
      "step": 685620
    },
    {
      "epoch": 2.402962215282816,
      "grad_norm": 3.359375,
      "learning_rate": 1.105633309189467e-05,
      "loss": 0.814,
      "step": 685630
    },
    {
      "epoch": 2.4029972627897114,
      "grad_norm": 3.25,
      "learning_rate": 1.1055684063230968e-05,
      "loss": 0.8367,
      "step": 685640
    },
    {
      "epoch": 2.403032310296607,
      "grad_norm": 3.109375,
      "learning_rate": 1.1055035034567266e-05,
      "loss": 0.8384,
      "step": 685650
    },
    {
      "epoch": 2.4030673578035024,
      "grad_norm": 2.890625,
      "learning_rate": 1.1054386005903564e-05,
      "loss": 0.7679,
      "step": 685660
    },
    {
      "epoch": 2.403102405310398,
      "grad_norm": 2.671875,
      "learning_rate": 1.1053736977239864e-05,
      "loss": 0.7807,
      "step": 685670
    },
    {
      "epoch": 2.403137452817294,
      "grad_norm": 2.953125,
      "learning_rate": 1.1053087948576162e-05,
      "loss": 0.8201,
      "step": 685680
    },
    {
      "epoch": 2.4031725003241893,
      "grad_norm": 3.09375,
      "learning_rate": 1.105243891991246e-05,
      "loss": 0.8408,
      "step": 685690
    },
    {
      "epoch": 2.403207547831085,
      "grad_norm": 2.8125,
      "learning_rate": 1.1051789891248758e-05,
      "loss": 0.853,
      "step": 685700
    },
    {
      "epoch": 2.403242595337981,
      "grad_norm": 3.015625,
      "learning_rate": 1.1051140862585056e-05,
      "loss": 0.842,
      "step": 685710
    },
    {
      "epoch": 2.403277642844876,
      "grad_norm": 2.65625,
      "learning_rate": 1.1050491833921354e-05,
      "loss": 0.834,
      "step": 685720
    },
    {
      "epoch": 2.403312690351772,
      "grad_norm": 2.53125,
      "learning_rate": 1.1049842805257652e-05,
      "loss": 0.8126,
      "step": 685730
    },
    {
      "epoch": 2.4033477378586676,
      "grad_norm": 3.109375,
      "learning_rate": 1.104919377659395e-05,
      "loss": 0.7938,
      "step": 685740
    },
    {
      "epoch": 2.403382785365563,
      "grad_norm": 3.515625,
      "learning_rate": 1.1048544747930248e-05,
      "loss": 0.8561,
      "step": 685750
    },
    {
      "epoch": 2.4034178328724587,
      "grad_norm": 2.6875,
      "learning_rate": 1.1047895719266546e-05,
      "loss": 0.7612,
      "step": 685760
    },
    {
      "epoch": 2.403452880379354,
      "grad_norm": 2.828125,
      "learning_rate": 1.1047246690602844e-05,
      "loss": 0.7894,
      "step": 685770
    },
    {
      "epoch": 2.4034879278862498,
      "grad_norm": 2.546875,
      "learning_rate": 1.1046597661939142e-05,
      "loss": 0.8143,
      "step": 685780
    },
    {
      "epoch": 2.4035229753931455,
      "grad_norm": 2.484375,
      "learning_rate": 1.104594863327544e-05,
      "loss": 0.7985,
      "step": 685790
    },
    {
      "epoch": 2.403558022900041,
      "grad_norm": 2.953125,
      "learning_rate": 1.104529960461174e-05,
      "loss": 0.8499,
      "step": 685800
    },
    {
      "epoch": 2.4035930704069366,
      "grad_norm": 2.59375,
      "learning_rate": 1.1044650575948038e-05,
      "loss": 0.8758,
      "step": 685810
    },
    {
      "epoch": 2.4036281179138324,
      "grad_norm": 2.8125,
      "learning_rate": 1.1044001547284334e-05,
      "loss": 0.7796,
      "step": 685820
    },
    {
      "epoch": 2.4036631654207277,
      "grad_norm": 3.25,
      "learning_rate": 1.1043352518620632e-05,
      "loss": 0.787,
      "step": 685830
    },
    {
      "epoch": 2.4036982129276234,
      "grad_norm": 2.6875,
      "learning_rate": 1.104270348995693e-05,
      "loss": 0.7958,
      "step": 685840
    },
    {
      "epoch": 2.403733260434519,
      "grad_norm": 2.78125,
      "learning_rate": 1.104205446129323e-05,
      "loss": 0.8073,
      "step": 685850
    },
    {
      "epoch": 2.4037683079414145,
      "grad_norm": 2.90625,
      "learning_rate": 1.1041405432629528e-05,
      "loss": 0.807,
      "step": 685860
    },
    {
      "epoch": 2.4038033554483103,
      "grad_norm": 2.796875,
      "learning_rate": 1.1040756403965826e-05,
      "loss": 0.7467,
      "step": 685870
    },
    {
      "epoch": 2.4038384029552056,
      "grad_norm": 2.84375,
      "learning_rate": 1.1040107375302124e-05,
      "loss": 0.7704,
      "step": 685880
    },
    {
      "epoch": 2.4038734504621013,
      "grad_norm": 2.9375,
      "learning_rate": 1.1039458346638422e-05,
      "loss": 0.7882,
      "step": 685890
    },
    {
      "epoch": 2.403908497968997,
      "grad_norm": 2.65625,
      "learning_rate": 1.103880931797472e-05,
      "loss": 0.8397,
      "step": 685900
    },
    {
      "epoch": 2.4039435454758924,
      "grad_norm": 3.21875,
      "learning_rate": 1.1038160289311018e-05,
      "loss": 0.7844,
      "step": 685910
    },
    {
      "epoch": 2.403978592982788,
      "grad_norm": 2.75,
      "learning_rate": 1.1037511260647316e-05,
      "loss": 0.715,
      "step": 685920
    },
    {
      "epoch": 2.404013640489684,
      "grad_norm": 3.0625,
      "learning_rate": 1.1036862231983614e-05,
      "loss": 0.8467,
      "step": 685930
    },
    {
      "epoch": 2.4040486879965792,
      "grad_norm": 3.453125,
      "learning_rate": 1.1036213203319912e-05,
      "loss": 0.8019,
      "step": 685940
    },
    {
      "epoch": 2.404083735503475,
      "grad_norm": 2.84375,
      "learning_rate": 1.103556417465621e-05,
      "loss": 0.8321,
      "step": 685950
    },
    {
      "epoch": 2.4041187830103707,
      "grad_norm": 2.859375,
      "learning_rate": 1.1034915145992508e-05,
      "loss": 0.8498,
      "step": 685960
    },
    {
      "epoch": 2.404153830517266,
      "grad_norm": 3.140625,
      "learning_rate": 1.1034266117328806e-05,
      "loss": 0.7901,
      "step": 685970
    },
    {
      "epoch": 2.404188878024162,
      "grad_norm": 3.296875,
      "learning_rate": 1.1033617088665105e-05,
      "loss": 0.8321,
      "step": 685980
    },
    {
      "epoch": 2.404223925531057,
      "grad_norm": 2.34375,
      "learning_rate": 1.1032968060001403e-05,
      "loss": 0.7431,
      "step": 685990
    },
    {
      "epoch": 2.404258973037953,
      "grad_norm": 2.609375,
      "learning_rate": 1.1032319031337701e-05,
      "loss": 0.8541,
      "step": 686000
    },
    {
      "epoch": 2.4042940205448486,
      "grad_norm": 3.1875,
      "learning_rate": 1.1031670002673998e-05,
      "loss": 0.8409,
      "step": 686010
    },
    {
      "epoch": 2.404329068051744,
      "grad_norm": 3.390625,
      "learning_rate": 1.1031020974010296e-05,
      "loss": 0.784,
      "step": 686020
    },
    {
      "epoch": 2.4043641155586397,
      "grad_norm": 3.015625,
      "learning_rate": 1.1030371945346594e-05,
      "loss": 0.8406,
      "step": 686030
    },
    {
      "epoch": 2.4043991630655355,
      "grad_norm": 3.0625,
      "learning_rate": 1.1029722916682893e-05,
      "loss": 0.8717,
      "step": 686040
    },
    {
      "epoch": 2.404434210572431,
      "grad_norm": 2.8125,
      "learning_rate": 1.1029073888019191e-05,
      "loss": 0.7435,
      "step": 686050
    },
    {
      "epoch": 2.4044692580793265,
      "grad_norm": 2.75,
      "learning_rate": 1.102842485935549e-05,
      "loss": 0.7704,
      "step": 686060
    },
    {
      "epoch": 2.4045043055862223,
      "grad_norm": 2.890625,
      "learning_rate": 1.1027775830691787e-05,
      "loss": 0.8224,
      "step": 686070
    },
    {
      "epoch": 2.4045393530931176,
      "grad_norm": 3.046875,
      "learning_rate": 1.1027126802028085e-05,
      "loss": 0.7416,
      "step": 686080
    },
    {
      "epoch": 2.4045744006000134,
      "grad_norm": 3.21875,
      "learning_rate": 1.1026477773364383e-05,
      "loss": 0.8288,
      "step": 686090
    },
    {
      "epoch": 2.4046094481069087,
      "grad_norm": 3.015625,
      "learning_rate": 1.1025828744700681e-05,
      "loss": 0.7633,
      "step": 686100
    },
    {
      "epoch": 2.4046444956138044,
      "grad_norm": 2.9375,
      "learning_rate": 1.102517971603698e-05,
      "loss": 0.8479,
      "step": 686110
    },
    {
      "epoch": 2.4046795431207,
      "grad_norm": 2.546875,
      "learning_rate": 1.1024530687373277e-05,
      "loss": 0.8882,
      "step": 686120
    },
    {
      "epoch": 2.4047145906275955,
      "grad_norm": 2.8125,
      "learning_rate": 1.1023881658709575e-05,
      "loss": 0.8204,
      "step": 686130
    },
    {
      "epoch": 2.4047496381344913,
      "grad_norm": 3.265625,
      "learning_rate": 1.1023232630045873e-05,
      "loss": 0.7948,
      "step": 686140
    },
    {
      "epoch": 2.404784685641387,
      "grad_norm": 3.125,
      "learning_rate": 1.1022583601382171e-05,
      "loss": 0.752,
      "step": 686150
    },
    {
      "epoch": 2.4048197331482823,
      "grad_norm": 3.0625,
      "learning_rate": 1.1021934572718471e-05,
      "loss": 0.8854,
      "step": 686160
    },
    {
      "epoch": 2.404854780655178,
      "grad_norm": 2.359375,
      "learning_rate": 1.1021285544054769e-05,
      "loss": 0.7654,
      "step": 686170
    },
    {
      "epoch": 2.404889828162074,
      "grad_norm": 2.890625,
      "learning_rate": 1.1020636515391067e-05,
      "loss": 0.7949,
      "step": 686180
    },
    {
      "epoch": 2.404924875668969,
      "grad_norm": 2.796875,
      "learning_rate": 1.1019987486727365e-05,
      "loss": 0.8351,
      "step": 686190
    },
    {
      "epoch": 2.404959923175865,
      "grad_norm": 2.96875,
      "learning_rate": 1.1019338458063661e-05,
      "loss": 0.8599,
      "step": 686200
    },
    {
      "epoch": 2.4049949706827602,
      "grad_norm": 2.6875,
      "learning_rate": 1.101868942939996e-05,
      "loss": 0.795,
      "step": 686210
    },
    {
      "epoch": 2.405030018189656,
      "grad_norm": 3.078125,
      "learning_rate": 1.1018040400736259e-05,
      "loss": 0.8141,
      "step": 686220
    },
    {
      "epoch": 2.4050650656965518,
      "grad_norm": 2.96875,
      "learning_rate": 1.1017391372072557e-05,
      "loss": 0.8366,
      "step": 686230
    },
    {
      "epoch": 2.4051001132034475,
      "grad_norm": 2.9375,
      "learning_rate": 1.1016742343408855e-05,
      "loss": 0.7648,
      "step": 686240
    },
    {
      "epoch": 2.405135160710343,
      "grad_norm": 2.96875,
      "learning_rate": 1.1016093314745153e-05,
      "loss": 0.8005,
      "step": 686250
    },
    {
      "epoch": 2.4051702082172386,
      "grad_norm": 2.46875,
      "learning_rate": 1.1015444286081451e-05,
      "loss": 0.8199,
      "step": 686260
    },
    {
      "epoch": 2.405205255724134,
      "grad_norm": 2.921875,
      "learning_rate": 1.1014795257417749e-05,
      "loss": 0.7575,
      "step": 686270
    },
    {
      "epoch": 2.4052403032310297,
      "grad_norm": 2.546875,
      "learning_rate": 1.1014146228754047e-05,
      "loss": 0.877,
      "step": 686280
    },
    {
      "epoch": 2.4052753507379254,
      "grad_norm": 2.9375,
      "learning_rate": 1.1013497200090345e-05,
      "loss": 0.7684,
      "step": 686290
    },
    {
      "epoch": 2.4053103982448207,
      "grad_norm": 2.78125,
      "learning_rate": 1.1012848171426643e-05,
      "loss": 0.7916,
      "step": 686300
    },
    {
      "epoch": 2.4053454457517165,
      "grad_norm": 3.1875,
      "learning_rate": 1.1012199142762941e-05,
      "loss": 0.8313,
      "step": 686310
    },
    {
      "epoch": 2.405380493258612,
      "grad_norm": 2.578125,
      "learning_rate": 1.1011550114099239e-05,
      "loss": 0.8759,
      "step": 686320
    },
    {
      "epoch": 2.4054155407655076,
      "grad_norm": 2.859375,
      "learning_rate": 1.1010901085435537e-05,
      "loss": 0.8784,
      "step": 686330
    },
    {
      "epoch": 2.4054505882724033,
      "grad_norm": 2.96875,
      "learning_rate": 1.1010252056771837e-05,
      "loss": 0.7296,
      "step": 686340
    },
    {
      "epoch": 2.405485635779299,
      "grad_norm": 2.546875,
      "learning_rate": 1.1009603028108135e-05,
      "loss": 0.8311,
      "step": 686350
    },
    {
      "epoch": 2.4055206832861944,
      "grad_norm": 3.4375,
      "learning_rate": 1.1008953999444433e-05,
      "loss": 0.8578,
      "step": 686360
    },
    {
      "epoch": 2.40555573079309,
      "grad_norm": 2.796875,
      "learning_rate": 1.100830497078073e-05,
      "loss": 0.8238,
      "step": 686370
    },
    {
      "epoch": 2.4055907782999855,
      "grad_norm": 2.828125,
      "learning_rate": 1.1007655942117027e-05,
      "loss": 0.8264,
      "step": 686380
    },
    {
      "epoch": 2.4056258258068812,
      "grad_norm": 2.421875,
      "learning_rate": 1.1007006913453325e-05,
      "loss": 0.8211,
      "step": 686390
    },
    {
      "epoch": 2.405660873313777,
      "grad_norm": 3.234375,
      "learning_rate": 1.1006357884789625e-05,
      "loss": 0.8398,
      "step": 686400
    },
    {
      "epoch": 2.4056959208206723,
      "grad_norm": 2.75,
      "learning_rate": 1.1005708856125923e-05,
      "loss": 0.7984,
      "step": 686410
    },
    {
      "epoch": 2.405730968327568,
      "grad_norm": 2.796875,
      "learning_rate": 1.100505982746222e-05,
      "loss": 0.8184,
      "step": 686420
    },
    {
      "epoch": 2.4057660158344634,
      "grad_norm": 3.078125,
      "learning_rate": 1.1004410798798519e-05,
      "loss": 0.7137,
      "step": 686430
    },
    {
      "epoch": 2.405801063341359,
      "grad_norm": 3.171875,
      "learning_rate": 1.1003761770134817e-05,
      "loss": 0.7984,
      "step": 686440
    },
    {
      "epoch": 2.405836110848255,
      "grad_norm": 2.953125,
      "learning_rate": 1.1003112741471115e-05,
      "loss": 0.868,
      "step": 686450
    },
    {
      "epoch": 2.4058711583551506,
      "grad_norm": 2.828125,
      "learning_rate": 1.1002463712807413e-05,
      "loss": 0.8253,
      "step": 686460
    },
    {
      "epoch": 2.405906205862046,
      "grad_norm": 3.1875,
      "learning_rate": 1.1001814684143712e-05,
      "loss": 0.8736,
      "step": 686470
    },
    {
      "epoch": 2.4059412533689417,
      "grad_norm": 3.609375,
      "learning_rate": 1.1001165655480009e-05,
      "loss": 0.8752,
      "step": 686480
    },
    {
      "epoch": 2.405976300875837,
      "grad_norm": 2.6875,
      "learning_rate": 1.1000516626816307e-05,
      "loss": 0.8239,
      "step": 686490
    },
    {
      "epoch": 2.406011348382733,
      "grad_norm": 2.90625,
      "learning_rate": 1.0999867598152605e-05,
      "loss": 0.7575,
      "step": 686500
    },
    {
      "epoch": 2.4060463958896285,
      "grad_norm": 2.421875,
      "learning_rate": 1.0999218569488903e-05,
      "loss": 0.7737,
      "step": 686510
    },
    {
      "epoch": 2.406081443396524,
      "grad_norm": 3.703125,
      "learning_rate": 1.09985695408252e-05,
      "loss": 0.7827,
      "step": 686520
    },
    {
      "epoch": 2.4061164909034196,
      "grad_norm": 2.59375,
      "learning_rate": 1.09979205121615e-05,
      "loss": 0.8311,
      "step": 686530
    },
    {
      "epoch": 2.4061515384103154,
      "grad_norm": 3.203125,
      "learning_rate": 1.0997271483497798e-05,
      "loss": 0.9339,
      "step": 686540
    },
    {
      "epoch": 2.4061865859172107,
      "grad_norm": 2.875,
      "learning_rate": 1.0996622454834096e-05,
      "loss": 0.7698,
      "step": 686550
    },
    {
      "epoch": 2.4062216334241064,
      "grad_norm": 2.328125,
      "learning_rate": 1.0995973426170394e-05,
      "loss": 0.8111,
      "step": 686560
    },
    {
      "epoch": 2.406256680931002,
      "grad_norm": 2.46875,
      "learning_rate": 1.099532439750669e-05,
      "loss": 0.8053,
      "step": 686570
    },
    {
      "epoch": 2.4062917284378975,
      "grad_norm": 2.5,
      "learning_rate": 1.099467536884299e-05,
      "loss": 0.7418,
      "step": 686580
    },
    {
      "epoch": 2.4063267759447933,
      "grad_norm": 3.21875,
      "learning_rate": 1.0994026340179288e-05,
      "loss": 0.7804,
      "step": 686590
    },
    {
      "epoch": 2.4063618234516886,
      "grad_norm": 3.375,
      "learning_rate": 1.0993377311515586e-05,
      "loss": 0.8883,
      "step": 686600
    },
    {
      "epoch": 2.4063968709585843,
      "grad_norm": 3.015625,
      "learning_rate": 1.0992728282851884e-05,
      "loss": 0.824,
      "step": 686610
    },
    {
      "epoch": 2.40643191846548,
      "grad_norm": 2.828125,
      "learning_rate": 1.0992079254188182e-05,
      "loss": 0.769,
      "step": 686620
    },
    {
      "epoch": 2.4064669659723754,
      "grad_norm": 2.625,
      "learning_rate": 1.099143022552448e-05,
      "loss": 0.7677,
      "step": 686630
    },
    {
      "epoch": 2.406502013479271,
      "grad_norm": 2.625,
      "learning_rate": 1.0990781196860778e-05,
      "loss": 0.7807,
      "step": 686640
    },
    {
      "epoch": 2.406537060986167,
      "grad_norm": 2.875,
      "learning_rate": 1.0990132168197078e-05,
      "loss": 0.8124,
      "step": 686650
    },
    {
      "epoch": 2.4065721084930622,
      "grad_norm": 2.90625,
      "learning_rate": 1.0989483139533376e-05,
      "loss": 0.7479,
      "step": 686660
    },
    {
      "epoch": 2.406607155999958,
      "grad_norm": 2.859375,
      "learning_rate": 1.0988834110869672e-05,
      "loss": 0.7999,
      "step": 686670
    },
    {
      "epoch": 2.4066422035068538,
      "grad_norm": 3.03125,
      "learning_rate": 1.098818508220597e-05,
      "loss": 0.873,
      "step": 686680
    },
    {
      "epoch": 2.406677251013749,
      "grad_norm": 3.25,
      "learning_rate": 1.0987536053542268e-05,
      "loss": 0.8619,
      "step": 686690
    },
    {
      "epoch": 2.406712298520645,
      "grad_norm": 3.015625,
      "learning_rate": 1.0986887024878566e-05,
      "loss": 0.871,
      "step": 686700
    },
    {
      "epoch": 2.40674734602754,
      "grad_norm": 3.4375,
      "learning_rate": 1.0986237996214866e-05,
      "loss": 0.8534,
      "step": 686710
    },
    {
      "epoch": 2.406782393534436,
      "grad_norm": 2.625,
      "learning_rate": 1.0985588967551164e-05,
      "loss": 0.7889,
      "step": 686720
    },
    {
      "epoch": 2.4068174410413317,
      "grad_norm": 2.84375,
      "learning_rate": 1.0984939938887462e-05,
      "loss": 0.7624,
      "step": 686730
    },
    {
      "epoch": 2.406852488548227,
      "grad_norm": 3.296875,
      "learning_rate": 1.098429091022376e-05,
      "loss": 0.8325,
      "step": 686740
    },
    {
      "epoch": 2.4068875360551227,
      "grad_norm": 3.71875,
      "learning_rate": 1.0983641881560058e-05,
      "loss": 0.8922,
      "step": 686750
    },
    {
      "epoch": 2.4069225835620185,
      "grad_norm": 3.046875,
      "learning_rate": 1.0982992852896354e-05,
      "loss": 0.8604,
      "step": 686760
    },
    {
      "epoch": 2.406957631068914,
      "grad_norm": 3.15625,
      "learning_rate": 1.0982343824232654e-05,
      "loss": 0.8263,
      "step": 686770
    },
    {
      "epoch": 2.4069926785758096,
      "grad_norm": 3.0,
      "learning_rate": 1.0981694795568952e-05,
      "loss": 0.8147,
      "step": 686780
    },
    {
      "epoch": 2.4070277260827053,
      "grad_norm": 2.90625,
      "learning_rate": 1.098104576690525e-05,
      "loss": 0.8597,
      "step": 686790
    },
    {
      "epoch": 2.4070627735896006,
      "grad_norm": 3.109375,
      "learning_rate": 1.0980396738241548e-05,
      "loss": 0.8152,
      "step": 686800
    },
    {
      "epoch": 2.4070978210964964,
      "grad_norm": 2.875,
      "learning_rate": 1.0979747709577846e-05,
      "loss": 0.7976,
      "step": 686810
    },
    {
      "epoch": 2.4071328686033917,
      "grad_norm": 3.21875,
      "learning_rate": 1.0979098680914144e-05,
      "loss": 0.817,
      "step": 686820
    },
    {
      "epoch": 2.4071679161102875,
      "grad_norm": 3.234375,
      "learning_rate": 1.0978449652250442e-05,
      "loss": 0.7858,
      "step": 686830
    },
    {
      "epoch": 2.4072029636171832,
      "grad_norm": 2.78125,
      "learning_rate": 1.0977800623586742e-05,
      "loss": 0.7931,
      "step": 686840
    },
    {
      "epoch": 2.4072380111240785,
      "grad_norm": 3.234375,
      "learning_rate": 1.0977151594923038e-05,
      "loss": 0.8479,
      "step": 686850
    },
    {
      "epoch": 2.4072730586309743,
      "grad_norm": 2.765625,
      "learning_rate": 1.0976502566259336e-05,
      "loss": 0.7558,
      "step": 686860
    },
    {
      "epoch": 2.40730810613787,
      "grad_norm": 2.71875,
      "learning_rate": 1.0975853537595634e-05,
      "loss": 0.8189,
      "step": 686870
    },
    {
      "epoch": 2.4073431536447654,
      "grad_norm": 3.34375,
      "learning_rate": 1.0975204508931932e-05,
      "loss": 0.8528,
      "step": 686880
    },
    {
      "epoch": 2.407378201151661,
      "grad_norm": 2.6875,
      "learning_rate": 1.0974555480268232e-05,
      "loss": 0.7649,
      "step": 686890
    },
    {
      "epoch": 2.407413248658557,
      "grad_norm": 3.078125,
      "learning_rate": 1.097390645160453e-05,
      "loss": 0.8021,
      "step": 686900
    },
    {
      "epoch": 2.407448296165452,
      "grad_norm": 3.078125,
      "learning_rate": 1.0973257422940828e-05,
      "loss": 0.8796,
      "step": 686910
    },
    {
      "epoch": 2.407483343672348,
      "grad_norm": 3.390625,
      "learning_rate": 1.0972608394277126e-05,
      "loss": 0.8463,
      "step": 686920
    },
    {
      "epoch": 2.4075183911792433,
      "grad_norm": 2.84375,
      "learning_rate": 1.0971959365613424e-05,
      "loss": 0.8267,
      "step": 686930
    },
    {
      "epoch": 2.407553438686139,
      "grad_norm": 2.625,
      "learning_rate": 1.0971310336949722e-05,
      "loss": 0.7487,
      "step": 686940
    },
    {
      "epoch": 2.407588486193035,
      "grad_norm": 3.28125,
      "learning_rate": 1.097066130828602e-05,
      "loss": 0.7915,
      "step": 686950
    },
    {
      "epoch": 2.40762353369993,
      "grad_norm": 2.6875,
      "learning_rate": 1.0970012279622318e-05,
      "loss": 0.7996,
      "step": 686960
    },
    {
      "epoch": 2.407658581206826,
      "grad_norm": 2.78125,
      "learning_rate": 1.0969363250958616e-05,
      "loss": 0.8146,
      "step": 686970
    },
    {
      "epoch": 2.4076936287137216,
      "grad_norm": 3.171875,
      "learning_rate": 1.0968714222294914e-05,
      "loss": 0.8242,
      "step": 686980
    },
    {
      "epoch": 2.407728676220617,
      "grad_norm": 2.796875,
      "learning_rate": 1.0968065193631212e-05,
      "loss": 0.9129,
      "step": 686990
    },
    {
      "epoch": 2.4077637237275127,
      "grad_norm": 3.25,
      "learning_rate": 1.096741616496751e-05,
      "loss": 0.8323,
      "step": 687000
    },
    {
      "epoch": 2.4077987712344084,
      "grad_norm": 2.8125,
      "learning_rate": 1.0966767136303808e-05,
      "loss": 0.7762,
      "step": 687010
    },
    {
      "epoch": 2.4078338187413038,
      "grad_norm": 2.9375,
      "learning_rate": 1.0966118107640108e-05,
      "loss": 0.7862,
      "step": 687020
    },
    {
      "epoch": 2.4078688662481995,
      "grad_norm": 2.5,
      "learning_rate": 1.0965469078976406e-05,
      "loss": 0.7249,
      "step": 687030
    },
    {
      "epoch": 2.407903913755095,
      "grad_norm": 3.015625,
      "learning_rate": 1.0964820050312702e-05,
      "loss": 0.8204,
      "step": 687040
    },
    {
      "epoch": 2.4079389612619906,
      "grad_norm": 3.1875,
      "learning_rate": 1.0964171021649e-05,
      "loss": 0.86,
      "step": 687050
    },
    {
      "epoch": 2.4079740087688863,
      "grad_norm": 3.015625,
      "learning_rate": 1.0963521992985298e-05,
      "loss": 0.8008,
      "step": 687060
    },
    {
      "epoch": 2.4080090562757817,
      "grad_norm": 3.015625,
      "learning_rate": 1.0962872964321596e-05,
      "loss": 0.7565,
      "step": 687070
    },
    {
      "epoch": 2.4080441037826774,
      "grad_norm": 3.0,
      "learning_rate": 1.0962223935657896e-05,
      "loss": 0.8212,
      "step": 687080
    },
    {
      "epoch": 2.408079151289573,
      "grad_norm": 3.5,
      "learning_rate": 1.0961574906994194e-05,
      "loss": 0.825,
      "step": 687090
    },
    {
      "epoch": 2.4081141987964685,
      "grad_norm": 3.03125,
      "learning_rate": 1.0960925878330492e-05,
      "loss": 0.8555,
      "step": 687100
    },
    {
      "epoch": 2.4081492463033642,
      "grad_norm": 2.796875,
      "learning_rate": 1.096027684966679e-05,
      "loss": 0.7952,
      "step": 687110
    },
    {
      "epoch": 2.40818429381026,
      "grad_norm": 2.734375,
      "learning_rate": 1.0959627821003088e-05,
      "loss": 0.7366,
      "step": 687120
    },
    {
      "epoch": 2.4082193413171553,
      "grad_norm": 2.921875,
      "learning_rate": 1.0958978792339386e-05,
      "loss": 0.8247,
      "step": 687130
    },
    {
      "epoch": 2.408254388824051,
      "grad_norm": 2.703125,
      "learning_rate": 1.0958329763675684e-05,
      "loss": 0.8289,
      "step": 687140
    },
    {
      "epoch": 2.4082894363309464,
      "grad_norm": 2.921875,
      "learning_rate": 1.0957680735011982e-05,
      "loss": 0.7409,
      "step": 687150
    },
    {
      "epoch": 2.408324483837842,
      "grad_norm": 2.4375,
      "learning_rate": 1.095703170634828e-05,
      "loss": 0.822,
      "step": 687160
    },
    {
      "epoch": 2.408359531344738,
      "grad_norm": 3.0625,
      "learning_rate": 1.0956382677684578e-05,
      "loss": 0.8533,
      "step": 687170
    },
    {
      "epoch": 2.408394578851633,
      "grad_norm": 2.96875,
      "learning_rate": 1.0955733649020876e-05,
      "loss": 0.7909,
      "step": 687180
    },
    {
      "epoch": 2.408429626358529,
      "grad_norm": 3.015625,
      "learning_rate": 1.0955084620357174e-05,
      "loss": 0.7812,
      "step": 687190
    },
    {
      "epoch": 2.4084646738654247,
      "grad_norm": 2.640625,
      "learning_rate": 1.0954435591693473e-05,
      "loss": 0.7988,
      "step": 687200
    },
    {
      "epoch": 2.40849972137232,
      "grad_norm": 2.96875,
      "learning_rate": 1.0953786563029771e-05,
      "loss": 0.8085,
      "step": 687210
    },
    {
      "epoch": 2.408534768879216,
      "grad_norm": 2.90625,
      "learning_rate": 1.095313753436607e-05,
      "loss": 0.787,
      "step": 687220
    },
    {
      "epoch": 2.4085698163861116,
      "grad_norm": 3.21875,
      "learning_rate": 1.0952488505702366e-05,
      "loss": 0.8072,
      "step": 687230
    },
    {
      "epoch": 2.408604863893007,
      "grad_norm": 3.171875,
      "learning_rate": 1.0951839477038664e-05,
      "loss": 0.7929,
      "step": 687240
    },
    {
      "epoch": 2.4086399113999026,
      "grad_norm": 3.015625,
      "learning_rate": 1.0951190448374962e-05,
      "loss": 0.7815,
      "step": 687250
    },
    {
      "epoch": 2.408674958906798,
      "grad_norm": 2.96875,
      "learning_rate": 1.0950541419711261e-05,
      "loss": 0.7635,
      "step": 687260
    },
    {
      "epoch": 2.4087100064136937,
      "grad_norm": 2.65625,
      "learning_rate": 1.094989239104756e-05,
      "loss": 0.8099,
      "step": 687270
    },
    {
      "epoch": 2.4087450539205895,
      "grad_norm": 2.90625,
      "learning_rate": 1.0949243362383857e-05,
      "loss": 0.7115,
      "step": 687280
    },
    {
      "epoch": 2.4087801014274848,
      "grad_norm": 3.09375,
      "learning_rate": 1.0948594333720155e-05,
      "loss": 0.7529,
      "step": 687290
    },
    {
      "epoch": 2.4088151489343805,
      "grad_norm": 2.84375,
      "learning_rate": 1.0947945305056453e-05,
      "loss": 0.8101,
      "step": 687300
    },
    {
      "epoch": 2.4088501964412763,
      "grad_norm": 3.265625,
      "learning_rate": 1.0947296276392751e-05,
      "loss": 0.8009,
      "step": 687310
    },
    {
      "epoch": 2.4088852439481716,
      "grad_norm": 3.3125,
      "learning_rate": 1.094664724772905e-05,
      "loss": 0.8697,
      "step": 687320
    },
    {
      "epoch": 2.4089202914550674,
      "grad_norm": 3.21875,
      "learning_rate": 1.0945998219065347e-05,
      "loss": 0.8594,
      "step": 687330
    },
    {
      "epoch": 2.408955338961963,
      "grad_norm": 3.4375,
      "learning_rate": 1.0945349190401645e-05,
      "loss": 0.8254,
      "step": 687340
    },
    {
      "epoch": 2.4089903864688584,
      "grad_norm": 2.5625,
      "learning_rate": 1.0944700161737943e-05,
      "loss": 0.7507,
      "step": 687350
    },
    {
      "epoch": 2.409025433975754,
      "grad_norm": 3.015625,
      "learning_rate": 1.0944051133074241e-05,
      "loss": 0.8379,
      "step": 687360
    },
    {
      "epoch": 2.4090604814826495,
      "grad_norm": 2.953125,
      "learning_rate": 1.094340210441054e-05,
      "loss": 0.7941,
      "step": 687370
    },
    {
      "epoch": 2.4090955289895453,
      "grad_norm": 3.0,
      "learning_rate": 1.0942753075746837e-05,
      "loss": 0.8148,
      "step": 687380
    },
    {
      "epoch": 2.409130576496441,
      "grad_norm": 2.6875,
      "learning_rate": 1.0942104047083137e-05,
      "loss": 0.7735,
      "step": 687390
    },
    {
      "epoch": 2.4091656240033363,
      "grad_norm": 2.921875,
      "learning_rate": 1.0941455018419435e-05,
      "loss": 0.7609,
      "step": 687400
    },
    {
      "epoch": 2.409200671510232,
      "grad_norm": 3.015625,
      "learning_rate": 1.0940805989755733e-05,
      "loss": 0.7275,
      "step": 687410
    },
    {
      "epoch": 2.409235719017128,
      "grad_norm": 2.375,
      "learning_rate": 1.094015696109203e-05,
      "loss": 0.7424,
      "step": 687420
    },
    {
      "epoch": 2.409270766524023,
      "grad_norm": 3.078125,
      "learning_rate": 1.0939507932428327e-05,
      "loss": 0.8012,
      "step": 687430
    },
    {
      "epoch": 2.409305814030919,
      "grad_norm": 2.671875,
      "learning_rate": 1.0938858903764627e-05,
      "loss": 0.7249,
      "step": 687440
    },
    {
      "epoch": 2.4093408615378147,
      "grad_norm": 2.859375,
      "learning_rate": 1.0938209875100925e-05,
      "loss": 0.7666,
      "step": 687450
    },
    {
      "epoch": 2.40937590904471,
      "grad_norm": 3.234375,
      "learning_rate": 1.0937560846437223e-05,
      "loss": 0.8191,
      "step": 687460
    },
    {
      "epoch": 2.4094109565516058,
      "grad_norm": 2.453125,
      "learning_rate": 1.0936911817773521e-05,
      "loss": 0.8278,
      "step": 687470
    },
    {
      "epoch": 2.409446004058501,
      "grad_norm": 3.015625,
      "learning_rate": 1.0936262789109819e-05,
      "loss": 0.6523,
      "step": 687480
    },
    {
      "epoch": 2.409481051565397,
      "grad_norm": 3.328125,
      "learning_rate": 1.0935613760446117e-05,
      "loss": 0.8269,
      "step": 687490
    },
    {
      "epoch": 2.4095160990722926,
      "grad_norm": 2.96875,
      "learning_rate": 1.0934964731782415e-05,
      "loss": 0.6579,
      "step": 687500
    },
    {
      "epoch": 2.409551146579188,
      "grad_norm": 2.828125,
      "learning_rate": 1.0934315703118713e-05,
      "loss": 0.7943,
      "step": 687510
    },
    {
      "epoch": 2.4095861940860837,
      "grad_norm": 3.265625,
      "learning_rate": 1.0933666674455011e-05,
      "loss": 0.887,
      "step": 687520
    },
    {
      "epoch": 2.4096212415929794,
      "grad_norm": 2.953125,
      "learning_rate": 1.0933017645791309e-05,
      "loss": 0.7495,
      "step": 687530
    },
    {
      "epoch": 2.4096562890998747,
      "grad_norm": 2.859375,
      "learning_rate": 1.0932368617127607e-05,
      "loss": 0.7939,
      "step": 687540
    },
    {
      "epoch": 2.4096913366067705,
      "grad_norm": 2.65625,
      "learning_rate": 1.0931719588463905e-05,
      "loss": 0.8046,
      "step": 687550
    },
    {
      "epoch": 2.4097263841136662,
      "grad_norm": 2.671875,
      "learning_rate": 1.0931070559800203e-05,
      "loss": 0.8272,
      "step": 687560
    },
    {
      "epoch": 2.4097614316205616,
      "grad_norm": 2.875,
      "learning_rate": 1.0930421531136503e-05,
      "loss": 0.822,
      "step": 687570
    },
    {
      "epoch": 2.4097964791274573,
      "grad_norm": 3.109375,
      "learning_rate": 1.09297725024728e-05,
      "loss": 0.7944,
      "step": 687580
    },
    {
      "epoch": 2.4098315266343526,
      "grad_norm": 2.953125,
      "learning_rate": 1.0929123473809099e-05,
      "loss": 0.8007,
      "step": 687590
    },
    {
      "epoch": 2.4098665741412484,
      "grad_norm": 3.015625,
      "learning_rate": 1.0928474445145397e-05,
      "loss": 0.8354,
      "step": 687600
    },
    {
      "epoch": 2.409901621648144,
      "grad_norm": 3.203125,
      "learning_rate": 1.0927825416481693e-05,
      "loss": 0.7794,
      "step": 687610
    },
    {
      "epoch": 2.40993666915504,
      "grad_norm": 2.6875,
      "learning_rate": 1.0927176387817991e-05,
      "loss": 0.845,
      "step": 687620
    },
    {
      "epoch": 2.409971716661935,
      "grad_norm": 3.03125,
      "learning_rate": 1.092652735915429e-05,
      "loss": 0.7889,
      "step": 687630
    },
    {
      "epoch": 2.410006764168831,
      "grad_norm": 2.875,
      "learning_rate": 1.0925878330490589e-05,
      "loss": 0.8049,
      "step": 687640
    },
    {
      "epoch": 2.4100418116757263,
      "grad_norm": 3.015625,
      "learning_rate": 1.0925229301826887e-05,
      "loss": 0.8446,
      "step": 687650
    },
    {
      "epoch": 2.410076859182622,
      "grad_norm": 3.0,
      "learning_rate": 1.0924580273163185e-05,
      "loss": 0.7521,
      "step": 687660
    },
    {
      "epoch": 2.410111906689518,
      "grad_norm": 2.765625,
      "learning_rate": 1.0923931244499483e-05,
      "loss": 0.8106,
      "step": 687670
    },
    {
      "epoch": 2.410146954196413,
      "grad_norm": 2.609375,
      "learning_rate": 1.092328221583578e-05,
      "loss": 0.7417,
      "step": 687680
    },
    {
      "epoch": 2.410182001703309,
      "grad_norm": 3.203125,
      "learning_rate": 1.0922633187172079e-05,
      "loss": 0.8448,
      "step": 687690
    },
    {
      "epoch": 2.410217049210204,
      "grad_norm": 2.90625,
      "learning_rate": 1.0921984158508377e-05,
      "loss": 0.8269,
      "step": 687700
    },
    {
      "epoch": 2.4102520967171,
      "grad_norm": 3.046875,
      "learning_rate": 1.0921335129844675e-05,
      "loss": 0.7462,
      "step": 687710
    },
    {
      "epoch": 2.4102871442239957,
      "grad_norm": 3.734375,
      "learning_rate": 1.0920686101180973e-05,
      "loss": 0.7863,
      "step": 687720
    },
    {
      "epoch": 2.4103221917308915,
      "grad_norm": 3.390625,
      "learning_rate": 1.092003707251727e-05,
      "loss": 0.8253,
      "step": 687730
    },
    {
      "epoch": 2.4103572392377868,
      "grad_norm": 2.84375,
      "learning_rate": 1.0919388043853569e-05,
      "loss": 0.713,
      "step": 687740
    },
    {
      "epoch": 2.4103922867446825,
      "grad_norm": 2.9375,
      "learning_rate": 1.0918739015189868e-05,
      "loss": 0.8633,
      "step": 687750
    },
    {
      "epoch": 2.410427334251578,
      "grad_norm": 3.0,
      "learning_rate": 1.0918089986526166e-05,
      "loss": 0.8382,
      "step": 687760
    },
    {
      "epoch": 2.4104623817584736,
      "grad_norm": 2.5,
      "learning_rate": 1.0917440957862464e-05,
      "loss": 0.7357,
      "step": 687770
    },
    {
      "epoch": 2.4104974292653694,
      "grad_norm": 2.859375,
      "learning_rate": 1.0916791929198762e-05,
      "loss": 0.8177,
      "step": 687780
    },
    {
      "epoch": 2.4105324767722647,
      "grad_norm": 2.734375,
      "learning_rate": 1.091614290053506e-05,
      "loss": 0.8369,
      "step": 687790
    },
    {
      "epoch": 2.4105675242791604,
      "grad_norm": 2.9375,
      "learning_rate": 1.0915493871871357e-05,
      "loss": 0.8496,
      "step": 687800
    },
    {
      "epoch": 2.410602571786056,
      "grad_norm": 2.71875,
      "learning_rate": 1.0914844843207656e-05,
      "loss": 0.7687,
      "step": 687810
    },
    {
      "epoch": 2.4106376192929515,
      "grad_norm": 2.875,
      "learning_rate": 1.0914195814543954e-05,
      "loss": 0.8895,
      "step": 687820
    },
    {
      "epoch": 2.4106726667998473,
      "grad_norm": 3.15625,
      "learning_rate": 1.0913546785880252e-05,
      "loss": 0.8185,
      "step": 687830
    },
    {
      "epoch": 2.410707714306743,
      "grad_norm": 2.84375,
      "learning_rate": 1.091289775721655e-05,
      "loss": 0.827,
      "step": 687840
    },
    {
      "epoch": 2.4107427618136383,
      "grad_norm": 2.859375,
      "learning_rate": 1.0912248728552848e-05,
      "loss": 0.7777,
      "step": 687850
    },
    {
      "epoch": 2.410777809320534,
      "grad_norm": 3.25,
      "learning_rate": 1.0911599699889146e-05,
      "loss": 0.7711,
      "step": 687860
    },
    {
      "epoch": 2.4108128568274294,
      "grad_norm": 3.296875,
      "learning_rate": 1.0910950671225444e-05,
      "loss": 0.8491,
      "step": 687870
    },
    {
      "epoch": 2.410847904334325,
      "grad_norm": 3.328125,
      "learning_rate": 1.0910301642561744e-05,
      "loss": 0.9251,
      "step": 687880
    },
    {
      "epoch": 2.410882951841221,
      "grad_norm": 2.625,
      "learning_rate": 1.090965261389804e-05,
      "loss": 0.8159,
      "step": 687890
    },
    {
      "epoch": 2.4109179993481162,
      "grad_norm": 3.140625,
      "learning_rate": 1.0909003585234338e-05,
      "loss": 0.828,
      "step": 687900
    },
    {
      "epoch": 2.410953046855012,
      "grad_norm": 2.828125,
      "learning_rate": 1.0908354556570636e-05,
      "loss": 0.8289,
      "step": 687910
    },
    {
      "epoch": 2.4109880943619078,
      "grad_norm": 3.203125,
      "learning_rate": 1.0907705527906934e-05,
      "loss": 0.7738,
      "step": 687920
    },
    {
      "epoch": 2.411023141868803,
      "grad_norm": 2.609375,
      "learning_rate": 1.0907056499243232e-05,
      "loss": 0.7636,
      "step": 687930
    },
    {
      "epoch": 2.411058189375699,
      "grad_norm": 3.03125,
      "learning_rate": 1.0906407470579532e-05,
      "loss": 0.8942,
      "step": 687940
    },
    {
      "epoch": 2.4110932368825946,
      "grad_norm": 3.234375,
      "learning_rate": 1.090575844191583e-05,
      "loss": 0.7234,
      "step": 687950
    },
    {
      "epoch": 2.41112828438949,
      "grad_norm": 2.828125,
      "learning_rate": 1.0905109413252128e-05,
      "loss": 0.7695,
      "step": 687960
    },
    {
      "epoch": 2.4111633318963857,
      "grad_norm": 2.875,
      "learning_rate": 1.0904460384588426e-05,
      "loss": 0.7757,
      "step": 687970
    },
    {
      "epoch": 2.411198379403281,
      "grad_norm": 3.1875,
      "learning_rate": 1.0903811355924722e-05,
      "loss": 0.7818,
      "step": 687980
    },
    {
      "epoch": 2.4112334269101767,
      "grad_norm": 2.84375,
      "learning_rate": 1.0903162327261022e-05,
      "loss": 0.7738,
      "step": 687990
    },
    {
      "epoch": 2.4112684744170725,
      "grad_norm": 2.84375,
      "learning_rate": 1.090251329859732e-05,
      "loss": 0.7621,
      "step": 688000
    },
    {
      "epoch": 2.411303521923968,
      "grad_norm": 2.75,
      "learning_rate": 1.0901864269933618e-05,
      "loss": 0.7686,
      "step": 688010
    },
    {
      "epoch": 2.4113385694308636,
      "grad_norm": 3.171875,
      "learning_rate": 1.0901215241269916e-05,
      "loss": 0.8264,
      "step": 688020
    },
    {
      "epoch": 2.4113736169377593,
      "grad_norm": 3.046875,
      "learning_rate": 1.0900566212606214e-05,
      "loss": 0.7589,
      "step": 688030
    },
    {
      "epoch": 2.4114086644446546,
      "grad_norm": 3.0,
      "learning_rate": 1.0899917183942512e-05,
      "loss": 0.7976,
      "step": 688040
    },
    {
      "epoch": 2.4114437119515504,
      "grad_norm": 3.0625,
      "learning_rate": 1.089926815527881e-05,
      "loss": 0.7844,
      "step": 688050
    },
    {
      "epoch": 2.411478759458446,
      "grad_norm": 2.875,
      "learning_rate": 1.089861912661511e-05,
      "loss": 0.8456,
      "step": 688060
    },
    {
      "epoch": 2.4115138069653415,
      "grad_norm": 2.78125,
      "learning_rate": 1.0897970097951408e-05,
      "loss": 0.7898,
      "step": 688070
    },
    {
      "epoch": 2.411548854472237,
      "grad_norm": 2.703125,
      "learning_rate": 1.0897321069287704e-05,
      "loss": 0.8297,
      "step": 688080
    },
    {
      "epoch": 2.4115839019791325,
      "grad_norm": 2.71875,
      "learning_rate": 1.0896672040624002e-05,
      "loss": 0.8753,
      "step": 688090
    },
    {
      "epoch": 2.4116189494860283,
      "grad_norm": 3.234375,
      "learning_rate": 1.08960230119603e-05,
      "loss": 0.842,
      "step": 688100
    },
    {
      "epoch": 2.411653996992924,
      "grad_norm": 3.125,
      "learning_rate": 1.0895373983296598e-05,
      "loss": 0.7446,
      "step": 688110
    },
    {
      "epoch": 2.4116890444998194,
      "grad_norm": 3.03125,
      "learning_rate": 1.0894724954632898e-05,
      "loss": 0.7994,
      "step": 688120
    },
    {
      "epoch": 2.411724092006715,
      "grad_norm": 2.625,
      "learning_rate": 1.0894075925969196e-05,
      "loss": 0.9205,
      "step": 688130
    },
    {
      "epoch": 2.411759139513611,
      "grad_norm": 3.015625,
      "learning_rate": 1.0893426897305494e-05,
      "loss": 0.9095,
      "step": 688140
    },
    {
      "epoch": 2.411794187020506,
      "grad_norm": 2.765625,
      "learning_rate": 1.0892777868641792e-05,
      "loss": 0.797,
      "step": 688150
    },
    {
      "epoch": 2.411829234527402,
      "grad_norm": 3.34375,
      "learning_rate": 1.089212883997809e-05,
      "loss": 0.8584,
      "step": 688160
    },
    {
      "epoch": 2.4118642820342977,
      "grad_norm": 2.90625,
      "learning_rate": 1.0891479811314386e-05,
      "loss": 0.7799,
      "step": 688170
    },
    {
      "epoch": 2.411899329541193,
      "grad_norm": 2.609375,
      "learning_rate": 1.0890830782650686e-05,
      "loss": 0.8674,
      "step": 688180
    },
    {
      "epoch": 2.4119343770480888,
      "grad_norm": 3.0625,
      "learning_rate": 1.0890181753986984e-05,
      "loss": 0.7705,
      "step": 688190
    },
    {
      "epoch": 2.411969424554984,
      "grad_norm": 3.0,
      "learning_rate": 1.0889532725323282e-05,
      "loss": 0.6878,
      "step": 688200
    },
    {
      "epoch": 2.41200447206188,
      "grad_norm": 2.65625,
      "learning_rate": 1.088888369665958e-05,
      "loss": 0.7939,
      "step": 688210
    },
    {
      "epoch": 2.4120395195687756,
      "grad_norm": 3.0,
      "learning_rate": 1.0888234667995878e-05,
      "loss": 0.7955,
      "step": 688220
    },
    {
      "epoch": 2.412074567075671,
      "grad_norm": 3.21875,
      "learning_rate": 1.0887585639332176e-05,
      "loss": 0.7272,
      "step": 688230
    },
    {
      "epoch": 2.4121096145825667,
      "grad_norm": 2.96875,
      "learning_rate": 1.0886936610668474e-05,
      "loss": 0.8301,
      "step": 688240
    },
    {
      "epoch": 2.4121446620894624,
      "grad_norm": 2.5625,
      "learning_rate": 1.0886287582004773e-05,
      "loss": 0.7419,
      "step": 688250
    },
    {
      "epoch": 2.4121797095963577,
      "grad_norm": 3.078125,
      "learning_rate": 1.0885638553341071e-05,
      "loss": 0.8057,
      "step": 688260
    },
    {
      "epoch": 2.4122147571032535,
      "grad_norm": 3.5625,
      "learning_rate": 1.0884989524677368e-05,
      "loss": 0.869,
      "step": 688270
    },
    {
      "epoch": 2.4122498046101493,
      "grad_norm": 2.84375,
      "learning_rate": 1.0884340496013666e-05,
      "loss": 0.8246,
      "step": 688280
    },
    {
      "epoch": 2.4122848521170446,
      "grad_norm": 3.65625,
      "learning_rate": 1.0883691467349964e-05,
      "loss": 0.8448,
      "step": 688290
    },
    {
      "epoch": 2.4123198996239403,
      "grad_norm": 2.890625,
      "learning_rate": 1.0883042438686263e-05,
      "loss": 0.8482,
      "step": 688300
    },
    {
      "epoch": 2.4123549471308356,
      "grad_norm": 3.15625,
      "learning_rate": 1.0882393410022561e-05,
      "loss": 0.8067,
      "step": 688310
    },
    {
      "epoch": 2.4123899946377314,
      "grad_norm": 2.734375,
      "learning_rate": 1.088174438135886e-05,
      "loss": 0.8069,
      "step": 688320
    },
    {
      "epoch": 2.412425042144627,
      "grad_norm": 2.96875,
      "learning_rate": 1.0881095352695157e-05,
      "loss": 0.8242,
      "step": 688330
    },
    {
      "epoch": 2.4124600896515225,
      "grad_norm": 2.875,
      "learning_rate": 1.0880446324031455e-05,
      "loss": 0.8725,
      "step": 688340
    },
    {
      "epoch": 2.4124951371584182,
      "grad_norm": 2.875,
      "learning_rate": 1.0879797295367753e-05,
      "loss": 0.7945,
      "step": 688350
    },
    {
      "epoch": 2.412530184665314,
      "grad_norm": 3.4375,
      "learning_rate": 1.0879148266704051e-05,
      "loss": 0.7893,
      "step": 688360
    },
    {
      "epoch": 2.4125652321722093,
      "grad_norm": 2.828125,
      "learning_rate": 1.087849923804035e-05,
      "loss": 0.8326,
      "step": 688370
    },
    {
      "epoch": 2.412600279679105,
      "grad_norm": 2.890625,
      "learning_rate": 1.0877850209376647e-05,
      "loss": 0.7526,
      "step": 688380
    },
    {
      "epoch": 2.412635327186001,
      "grad_norm": 3.0,
      "learning_rate": 1.0877201180712945e-05,
      "loss": 0.7775,
      "step": 688390
    },
    {
      "epoch": 2.412670374692896,
      "grad_norm": 3.34375,
      "learning_rate": 1.0876552152049243e-05,
      "loss": 0.7767,
      "step": 688400
    },
    {
      "epoch": 2.412705422199792,
      "grad_norm": 2.640625,
      "learning_rate": 1.0875903123385541e-05,
      "loss": 0.8651,
      "step": 688410
    },
    {
      "epoch": 2.412740469706687,
      "grad_norm": 2.265625,
      "learning_rate": 1.087525409472184e-05,
      "loss": 0.8416,
      "step": 688420
    },
    {
      "epoch": 2.412775517213583,
      "grad_norm": 2.921875,
      "learning_rate": 1.0874605066058139e-05,
      "loss": 0.798,
      "step": 688430
    },
    {
      "epoch": 2.4128105647204787,
      "grad_norm": 2.9375,
      "learning_rate": 1.0873956037394437e-05,
      "loss": 0.8866,
      "step": 688440
    },
    {
      "epoch": 2.412845612227374,
      "grad_norm": 2.78125,
      "learning_rate": 1.0873307008730733e-05,
      "loss": 0.8152,
      "step": 688450
    },
    {
      "epoch": 2.41288065973427,
      "grad_norm": 2.1875,
      "learning_rate": 1.0872657980067031e-05,
      "loss": 0.7862,
      "step": 688460
    },
    {
      "epoch": 2.4129157072411656,
      "grad_norm": 3.53125,
      "learning_rate": 1.087200895140333e-05,
      "loss": 0.824,
      "step": 688470
    },
    {
      "epoch": 2.412950754748061,
      "grad_norm": 3.21875,
      "learning_rate": 1.0871359922739627e-05,
      "loss": 0.8472,
      "step": 688480
    },
    {
      "epoch": 2.4129858022549566,
      "grad_norm": 3.359375,
      "learning_rate": 1.0870710894075927e-05,
      "loss": 0.8234,
      "step": 688490
    },
    {
      "epoch": 2.4130208497618524,
      "grad_norm": 3.203125,
      "learning_rate": 1.0870061865412225e-05,
      "loss": 0.738,
      "step": 688500
    },
    {
      "epoch": 2.4130558972687477,
      "grad_norm": 3.078125,
      "learning_rate": 1.0869412836748523e-05,
      "loss": 0.7997,
      "step": 688510
    },
    {
      "epoch": 2.4130909447756435,
      "grad_norm": 3.375,
      "learning_rate": 1.0868763808084821e-05,
      "loss": 0.748,
      "step": 688520
    },
    {
      "epoch": 2.4131259922825388,
      "grad_norm": 2.28125,
      "learning_rate": 1.0868114779421119e-05,
      "loss": 0.7153,
      "step": 688530
    },
    {
      "epoch": 2.4131610397894345,
      "grad_norm": 2.53125,
      "learning_rate": 1.0867465750757417e-05,
      "loss": 0.7545,
      "step": 688540
    },
    {
      "epoch": 2.4131960872963303,
      "grad_norm": 3.109375,
      "learning_rate": 1.0866816722093715e-05,
      "loss": 0.8512,
      "step": 688550
    },
    {
      "epoch": 2.4132311348032256,
      "grad_norm": 2.53125,
      "learning_rate": 1.0866167693430013e-05,
      "loss": 0.7789,
      "step": 688560
    },
    {
      "epoch": 2.4132661823101214,
      "grad_norm": 2.625,
      "learning_rate": 1.0865518664766311e-05,
      "loss": 0.7522,
      "step": 688570
    },
    {
      "epoch": 2.413301229817017,
      "grad_norm": 2.78125,
      "learning_rate": 1.0864869636102609e-05,
      "loss": 0.7795,
      "step": 688580
    },
    {
      "epoch": 2.4133362773239124,
      "grad_norm": 3.171875,
      "learning_rate": 1.0864220607438907e-05,
      "loss": 0.9101,
      "step": 688590
    },
    {
      "epoch": 2.413371324830808,
      "grad_norm": 2.84375,
      "learning_rate": 1.0863571578775205e-05,
      "loss": 0.7113,
      "step": 688600
    },
    {
      "epoch": 2.413406372337704,
      "grad_norm": 2.671875,
      "learning_rate": 1.0862922550111505e-05,
      "loss": 0.7893,
      "step": 688610
    },
    {
      "epoch": 2.4134414198445993,
      "grad_norm": 3.078125,
      "learning_rate": 1.0862273521447803e-05,
      "loss": 0.9065,
      "step": 688620
    },
    {
      "epoch": 2.413476467351495,
      "grad_norm": 2.875,
      "learning_rate": 1.08616244927841e-05,
      "loss": 0.8139,
      "step": 688630
    },
    {
      "epoch": 2.4135115148583903,
      "grad_norm": 2.84375,
      "learning_rate": 1.0860975464120397e-05,
      "loss": 0.7628,
      "step": 688640
    },
    {
      "epoch": 2.413546562365286,
      "grad_norm": 2.8125,
      "learning_rate": 1.0860326435456695e-05,
      "loss": 0.7727,
      "step": 688650
    },
    {
      "epoch": 2.413581609872182,
      "grad_norm": 2.609375,
      "learning_rate": 1.0859677406792993e-05,
      "loss": 0.7685,
      "step": 688660
    },
    {
      "epoch": 2.413616657379077,
      "grad_norm": 2.90625,
      "learning_rate": 1.0859028378129293e-05,
      "loss": 0.8823,
      "step": 688670
    },
    {
      "epoch": 2.413651704885973,
      "grad_norm": 2.953125,
      "learning_rate": 1.085837934946559e-05,
      "loss": 0.8461,
      "step": 688680
    },
    {
      "epoch": 2.4136867523928687,
      "grad_norm": 2.625,
      "learning_rate": 1.0857730320801889e-05,
      "loss": 0.828,
      "step": 688690
    },
    {
      "epoch": 2.413721799899764,
      "grad_norm": 3.140625,
      "learning_rate": 1.0857081292138187e-05,
      "loss": 0.7552,
      "step": 688700
    },
    {
      "epoch": 2.4137568474066597,
      "grad_norm": 2.375,
      "learning_rate": 1.0856432263474485e-05,
      "loss": 0.7733,
      "step": 688710
    },
    {
      "epoch": 2.4137918949135555,
      "grad_norm": 3.5,
      "learning_rate": 1.0855783234810783e-05,
      "loss": 0.8268,
      "step": 688720
    },
    {
      "epoch": 2.413826942420451,
      "grad_norm": 2.578125,
      "learning_rate": 1.085513420614708e-05,
      "loss": 0.8793,
      "step": 688730
    },
    {
      "epoch": 2.4138619899273466,
      "grad_norm": 2.734375,
      "learning_rate": 1.0854485177483379e-05,
      "loss": 0.837,
      "step": 688740
    },
    {
      "epoch": 2.413897037434242,
      "grad_norm": 2.796875,
      "learning_rate": 1.0853836148819677e-05,
      "loss": 0.8633,
      "step": 688750
    },
    {
      "epoch": 2.4139320849411376,
      "grad_norm": 3.1875,
      "learning_rate": 1.0853187120155975e-05,
      "loss": 0.795,
      "step": 688760
    },
    {
      "epoch": 2.4139671324480334,
      "grad_norm": 2.53125,
      "learning_rate": 1.0852538091492273e-05,
      "loss": 0.8079,
      "step": 688770
    },
    {
      "epoch": 2.4140021799549287,
      "grad_norm": 2.828125,
      "learning_rate": 1.085188906282857e-05,
      "loss": 0.8299,
      "step": 688780
    },
    {
      "epoch": 2.4140372274618245,
      "grad_norm": 3.0,
      "learning_rate": 1.0851240034164869e-05,
      "loss": 0.7872,
      "step": 688790
    },
    {
      "epoch": 2.4140722749687202,
      "grad_norm": 3.015625,
      "learning_rate": 1.0850591005501168e-05,
      "loss": 0.7969,
      "step": 688800
    },
    {
      "epoch": 2.4141073224756155,
      "grad_norm": 3.140625,
      "learning_rate": 1.0849941976837466e-05,
      "loss": 0.831,
      "step": 688810
    },
    {
      "epoch": 2.4141423699825113,
      "grad_norm": 3.5625,
      "learning_rate": 1.0849292948173764e-05,
      "loss": 0.8166,
      "step": 688820
    },
    {
      "epoch": 2.414177417489407,
      "grad_norm": 3.265625,
      "learning_rate": 1.084864391951006e-05,
      "loss": 0.8653,
      "step": 688830
    },
    {
      "epoch": 2.4142124649963024,
      "grad_norm": 3.109375,
      "learning_rate": 1.0847994890846359e-05,
      "loss": 0.7665,
      "step": 688840
    },
    {
      "epoch": 2.414247512503198,
      "grad_norm": 2.40625,
      "learning_rate": 1.0847345862182658e-05,
      "loss": 0.7906,
      "step": 688850
    },
    {
      "epoch": 2.4142825600100934,
      "grad_norm": 2.296875,
      "learning_rate": 1.0846696833518956e-05,
      "loss": 0.8402,
      "step": 688860
    },
    {
      "epoch": 2.414317607516989,
      "grad_norm": 3.296875,
      "learning_rate": 1.0846047804855254e-05,
      "loss": 0.782,
      "step": 688870
    },
    {
      "epoch": 2.414352655023885,
      "grad_norm": 2.75,
      "learning_rate": 1.0845398776191552e-05,
      "loss": 0.8771,
      "step": 688880
    },
    {
      "epoch": 2.4143877025307807,
      "grad_norm": 2.328125,
      "learning_rate": 1.084474974752785e-05,
      "loss": 0.7346,
      "step": 688890
    },
    {
      "epoch": 2.414422750037676,
      "grad_norm": 2.921875,
      "learning_rate": 1.0844100718864148e-05,
      "loss": 0.837,
      "step": 688900
    },
    {
      "epoch": 2.414457797544572,
      "grad_norm": 2.53125,
      "learning_rate": 1.0843451690200446e-05,
      "loss": 0.8414,
      "step": 688910
    },
    {
      "epoch": 2.414492845051467,
      "grad_norm": 3.234375,
      "learning_rate": 1.0842802661536744e-05,
      "loss": 0.8601,
      "step": 688920
    },
    {
      "epoch": 2.414527892558363,
      "grad_norm": 3.046875,
      "learning_rate": 1.0842153632873042e-05,
      "loss": 0.8121,
      "step": 688930
    },
    {
      "epoch": 2.4145629400652586,
      "grad_norm": 2.796875,
      "learning_rate": 1.084150460420934e-05,
      "loss": 0.7828,
      "step": 688940
    },
    {
      "epoch": 2.414597987572154,
      "grad_norm": 2.875,
      "learning_rate": 1.0840855575545638e-05,
      "loss": 0.803,
      "step": 688950
    },
    {
      "epoch": 2.4146330350790497,
      "grad_norm": 2.71875,
      "learning_rate": 1.0840206546881936e-05,
      "loss": 0.776,
      "step": 688960
    },
    {
      "epoch": 2.414668082585945,
      "grad_norm": 3.015625,
      "learning_rate": 1.0839557518218234e-05,
      "loss": 0.773,
      "step": 688970
    },
    {
      "epoch": 2.4147031300928408,
      "grad_norm": 2.90625,
      "learning_rate": 1.0838908489554534e-05,
      "loss": 0.829,
      "step": 688980
    },
    {
      "epoch": 2.4147381775997365,
      "grad_norm": 3.0625,
      "learning_rate": 1.0838259460890832e-05,
      "loss": 0.8239,
      "step": 688990
    },
    {
      "epoch": 2.4147732251066323,
      "grad_norm": 2.6875,
      "learning_rate": 1.083761043222713e-05,
      "loss": 0.8196,
      "step": 689000
    },
    {
      "epoch": 2.4148082726135276,
      "grad_norm": 2.90625,
      "learning_rate": 1.0836961403563428e-05,
      "loss": 0.829,
      "step": 689010
    },
    {
      "epoch": 2.4148433201204234,
      "grad_norm": 2.640625,
      "learning_rate": 1.0836312374899724e-05,
      "loss": 0.8491,
      "step": 689020
    },
    {
      "epoch": 2.4148783676273187,
      "grad_norm": 3.03125,
      "learning_rate": 1.0835663346236022e-05,
      "loss": 0.7906,
      "step": 689030
    },
    {
      "epoch": 2.4149134151342144,
      "grad_norm": 2.71875,
      "learning_rate": 1.0835014317572322e-05,
      "loss": 0.7894,
      "step": 689040
    },
    {
      "epoch": 2.41494846264111,
      "grad_norm": 3.171875,
      "learning_rate": 1.083436528890862e-05,
      "loss": 0.866,
      "step": 689050
    },
    {
      "epoch": 2.4149835101480055,
      "grad_norm": 2.90625,
      "learning_rate": 1.0833716260244918e-05,
      "loss": 0.7989,
      "step": 689060
    },
    {
      "epoch": 2.4150185576549013,
      "grad_norm": 2.71875,
      "learning_rate": 1.0833067231581216e-05,
      "loss": 0.7466,
      "step": 689070
    },
    {
      "epoch": 2.4150536051617966,
      "grad_norm": 2.890625,
      "learning_rate": 1.0832418202917514e-05,
      "loss": 0.7987,
      "step": 689080
    },
    {
      "epoch": 2.4150886526686923,
      "grad_norm": 2.84375,
      "learning_rate": 1.0831769174253812e-05,
      "loss": 0.7951,
      "step": 689090
    },
    {
      "epoch": 2.415123700175588,
      "grad_norm": 3.25,
      "learning_rate": 1.083112014559011e-05,
      "loss": 0.7867,
      "step": 689100
    },
    {
      "epoch": 2.415158747682484,
      "grad_norm": 2.8125,
      "learning_rate": 1.0830471116926408e-05,
      "loss": 0.892,
      "step": 689110
    },
    {
      "epoch": 2.415193795189379,
      "grad_norm": 2.90625,
      "learning_rate": 1.0829822088262706e-05,
      "loss": 0.7946,
      "step": 689120
    },
    {
      "epoch": 2.415228842696275,
      "grad_norm": 2.84375,
      "learning_rate": 1.0829173059599004e-05,
      "loss": 0.8793,
      "step": 689130
    },
    {
      "epoch": 2.4152638902031702,
      "grad_norm": 2.921875,
      "learning_rate": 1.0828524030935302e-05,
      "loss": 0.7533,
      "step": 689140
    },
    {
      "epoch": 2.415298937710066,
      "grad_norm": 2.453125,
      "learning_rate": 1.08278750022716e-05,
      "loss": 0.8171,
      "step": 689150
    },
    {
      "epoch": 2.4153339852169617,
      "grad_norm": 3.859375,
      "learning_rate": 1.08272259736079e-05,
      "loss": 0.8388,
      "step": 689160
    },
    {
      "epoch": 2.415369032723857,
      "grad_norm": 2.78125,
      "learning_rate": 1.0826576944944198e-05,
      "loss": 0.7765,
      "step": 689170
    },
    {
      "epoch": 2.415404080230753,
      "grad_norm": 2.875,
      "learning_rate": 1.0825927916280496e-05,
      "loss": 0.7976,
      "step": 689180
    },
    {
      "epoch": 2.4154391277376486,
      "grad_norm": 3.0625,
      "learning_rate": 1.0825278887616794e-05,
      "loss": 0.8,
      "step": 689190
    },
    {
      "epoch": 2.415474175244544,
      "grad_norm": 2.875,
      "learning_rate": 1.0824629858953092e-05,
      "loss": 0.7666,
      "step": 689200
    },
    {
      "epoch": 2.4155092227514396,
      "grad_norm": 2.9375,
      "learning_rate": 1.0823980830289388e-05,
      "loss": 0.8621,
      "step": 689210
    },
    {
      "epoch": 2.4155442702583354,
      "grad_norm": 2.375,
      "learning_rate": 1.0823331801625688e-05,
      "loss": 0.7178,
      "step": 689220
    },
    {
      "epoch": 2.4155793177652307,
      "grad_norm": 3.03125,
      "learning_rate": 1.0822682772961986e-05,
      "loss": 0.7919,
      "step": 689230
    },
    {
      "epoch": 2.4156143652721265,
      "grad_norm": 3.3125,
      "learning_rate": 1.0822033744298284e-05,
      "loss": 0.8184,
      "step": 689240
    },
    {
      "epoch": 2.415649412779022,
      "grad_norm": 3.0625,
      "learning_rate": 1.0821384715634582e-05,
      "loss": 0.7993,
      "step": 689250
    },
    {
      "epoch": 2.4156844602859175,
      "grad_norm": 3.390625,
      "learning_rate": 1.082073568697088e-05,
      "loss": 0.8749,
      "step": 689260
    },
    {
      "epoch": 2.4157195077928133,
      "grad_norm": 2.65625,
      "learning_rate": 1.0820086658307178e-05,
      "loss": 0.8181,
      "step": 689270
    },
    {
      "epoch": 2.4157545552997086,
      "grad_norm": 2.984375,
      "learning_rate": 1.0819437629643476e-05,
      "loss": 0.9621,
      "step": 689280
    },
    {
      "epoch": 2.4157896028066044,
      "grad_norm": 2.640625,
      "learning_rate": 1.0818788600979776e-05,
      "loss": 0.8289,
      "step": 689290
    },
    {
      "epoch": 2.4158246503135,
      "grad_norm": 3.359375,
      "learning_rate": 1.0818139572316072e-05,
      "loss": 0.8798,
      "step": 689300
    },
    {
      "epoch": 2.4158596978203954,
      "grad_norm": 2.265625,
      "learning_rate": 1.081749054365237e-05,
      "loss": 0.776,
      "step": 689310
    },
    {
      "epoch": 2.415894745327291,
      "grad_norm": 3.171875,
      "learning_rate": 1.0816841514988668e-05,
      "loss": 0.9651,
      "step": 689320
    },
    {
      "epoch": 2.415929792834187,
      "grad_norm": 2.5,
      "learning_rate": 1.0816192486324966e-05,
      "loss": 0.7981,
      "step": 689330
    },
    {
      "epoch": 2.4159648403410823,
      "grad_norm": 2.8125,
      "learning_rate": 1.0815543457661264e-05,
      "loss": 0.6988,
      "step": 689340
    },
    {
      "epoch": 2.415999887847978,
      "grad_norm": 2.828125,
      "learning_rate": 1.0814894428997564e-05,
      "loss": 0.8244,
      "step": 689350
    },
    {
      "epoch": 2.4160349353548733,
      "grad_norm": 3.09375,
      "learning_rate": 1.0814245400333862e-05,
      "loss": 0.8163,
      "step": 689360
    },
    {
      "epoch": 2.416069982861769,
      "grad_norm": 2.84375,
      "learning_rate": 1.081359637167016e-05,
      "loss": 0.7951,
      "step": 689370
    },
    {
      "epoch": 2.416105030368665,
      "grad_norm": 2.828125,
      "learning_rate": 1.0812947343006458e-05,
      "loss": 0.7892,
      "step": 689380
    },
    {
      "epoch": 2.41614007787556,
      "grad_norm": 2.96875,
      "learning_rate": 1.0812298314342754e-05,
      "loss": 0.8675,
      "step": 689390
    },
    {
      "epoch": 2.416175125382456,
      "grad_norm": 2.890625,
      "learning_rate": 1.0811649285679054e-05,
      "loss": 0.928,
      "step": 689400
    },
    {
      "epoch": 2.4162101728893517,
      "grad_norm": 2.390625,
      "learning_rate": 1.0811000257015352e-05,
      "loss": 0.8168,
      "step": 689410
    },
    {
      "epoch": 2.416245220396247,
      "grad_norm": 2.828125,
      "learning_rate": 1.081035122835165e-05,
      "loss": 0.8052,
      "step": 689420
    },
    {
      "epoch": 2.4162802679031428,
      "grad_norm": 3.015625,
      "learning_rate": 1.0809702199687948e-05,
      "loss": 0.81,
      "step": 689430
    },
    {
      "epoch": 2.4163153154100385,
      "grad_norm": 3.125,
      "learning_rate": 1.0809053171024246e-05,
      "loss": 0.8127,
      "step": 689440
    },
    {
      "epoch": 2.416350362916934,
      "grad_norm": 2.734375,
      "learning_rate": 1.0808404142360544e-05,
      "loss": 0.7666,
      "step": 689450
    },
    {
      "epoch": 2.4163854104238296,
      "grad_norm": 2.6875,
      "learning_rate": 1.0807755113696842e-05,
      "loss": 0.7863,
      "step": 689460
    },
    {
      "epoch": 2.416420457930725,
      "grad_norm": 3.109375,
      "learning_rate": 1.0807106085033141e-05,
      "loss": 0.8452,
      "step": 689470
    },
    {
      "epoch": 2.4164555054376207,
      "grad_norm": 2.890625,
      "learning_rate": 1.080645705636944e-05,
      "loss": 0.8142,
      "step": 689480
    },
    {
      "epoch": 2.4164905529445164,
      "grad_norm": 2.96875,
      "learning_rate": 1.0805808027705736e-05,
      "loss": 0.8349,
      "step": 689490
    },
    {
      "epoch": 2.4165256004514117,
      "grad_norm": 2.625,
      "learning_rate": 1.0805158999042034e-05,
      "loss": 0.7551,
      "step": 689500
    },
    {
      "epoch": 2.4165606479583075,
      "grad_norm": 2.875,
      "learning_rate": 1.0804509970378332e-05,
      "loss": 0.9193,
      "step": 689510
    },
    {
      "epoch": 2.4165956954652033,
      "grad_norm": 3.046875,
      "learning_rate": 1.080386094171463e-05,
      "loss": 0.7506,
      "step": 689520
    },
    {
      "epoch": 2.4166307429720986,
      "grad_norm": 3.203125,
      "learning_rate": 1.080321191305093e-05,
      "loss": 0.7605,
      "step": 689530
    },
    {
      "epoch": 2.4166657904789943,
      "grad_norm": 2.921875,
      "learning_rate": 1.0802562884387227e-05,
      "loss": 0.8071,
      "step": 689540
    },
    {
      "epoch": 2.41670083798589,
      "grad_norm": 2.59375,
      "learning_rate": 1.0801913855723525e-05,
      "loss": 0.7496,
      "step": 689550
    },
    {
      "epoch": 2.4167358854927854,
      "grad_norm": 2.4375,
      "learning_rate": 1.0801264827059823e-05,
      "loss": 0.8339,
      "step": 689560
    },
    {
      "epoch": 2.416770932999681,
      "grad_norm": 2.859375,
      "learning_rate": 1.0800615798396121e-05,
      "loss": 0.8079,
      "step": 689570
    },
    {
      "epoch": 2.4168059805065765,
      "grad_norm": 3.484375,
      "learning_rate": 1.0799966769732418e-05,
      "loss": 0.8455,
      "step": 689580
    },
    {
      "epoch": 2.4168410280134722,
      "grad_norm": 3.1875,
      "learning_rate": 1.0799317741068717e-05,
      "loss": 0.8072,
      "step": 689590
    },
    {
      "epoch": 2.416876075520368,
      "grad_norm": 2.984375,
      "learning_rate": 1.0798668712405015e-05,
      "loss": 0.7987,
      "step": 689600
    },
    {
      "epoch": 2.4169111230272633,
      "grad_norm": 2.828125,
      "learning_rate": 1.0798019683741313e-05,
      "loss": 0.8077,
      "step": 689610
    },
    {
      "epoch": 2.416946170534159,
      "grad_norm": 2.828125,
      "learning_rate": 1.0797370655077611e-05,
      "loss": 0.8784,
      "step": 689620
    },
    {
      "epoch": 2.416981218041055,
      "grad_norm": 2.515625,
      "learning_rate": 1.079672162641391e-05,
      "loss": 0.8354,
      "step": 689630
    },
    {
      "epoch": 2.41701626554795,
      "grad_norm": 2.953125,
      "learning_rate": 1.0796072597750207e-05,
      "loss": 0.8256,
      "step": 689640
    },
    {
      "epoch": 2.417051313054846,
      "grad_norm": 3.59375,
      "learning_rate": 1.0795423569086505e-05,
      "loss": 0.8407,
      "step": 689650
    },
    {
      "epoch": 2.4170863605617416,
      "grad_norm": 3.1875,
      "learning_rate": 1.0794774540422805e-05,
      "loss": 0.8252,
      "step": 689660
    },
    {
      "epoch": 2.417121408068637,
      "grad_norm": 2.71875,
      "learning_rate": 1.0794125511759103e-05,
      "loss": 0.8621,
      "step": 689670
    },
    {
      "epoch": 2.4171564555755327,
      "grad_norm": 3.3125,
      "learning_rate": 1.07934764830954e-05,
      "loss": 0.8034,
      "step": 689680
    },
    {
      "epoch": 2.417191503082428,
      "grad_norm": 2.78125,
      "learning_rate": 1.0792827454431697e-05,
      "loss": 0.8913,
      "step": 689690
    },
    {
      "epoch": 2.417226550589324,
      "grad_norm": 3.125,
      "learning_rate": 1.0792178425767995e-05,
      "loss": 0.8324,
      "step": 689700
    },
    {
      "epoch": 2.4172615980962195,
      "grad_norm": 2.296875,
      "learning_rate": 1.0791529397104295e-05,
      "loss": 0.845,
      "step": 689710
    },
    {
      "epoch": 2.417296645603115,
      "grad_norm": 3.4375,
      "learning_rate": 1.0790880368440593e-05,
      "loss": 0.7832,
      "step": 689720
    },
    {
      "epoch": 2.4173316931100106,
      "grad_norm": 2.734375,
      "learning_rate": 1.0790231339776891e-05,
      "loss": 0.8305,
      "step": 689730
    },
    {
      "epoch": 2.4173667406169064,
      "grad_norm": 2.828125,
      "learning_rate": 1.0789582311113189e-05,
      "loss": 0.7862,
      "step": 689740
    },
    {
      "epoch": 2.4174017881238017,
      "grad_norm": 2.609375,
      "learning_rate": 1.0788933282449487e-05,
      "loss": 0.7852,
      "step": 689750
    },
    {
      "epoch": 2.4174368356306974,
      "grad_norm": 3.140625,
      "learning_rate": 1.0788284253785785e-05,
      "loss": 0.7743,
      "step": 689760
    },
    {
      "epoch": 2.417471883137593,
      "grad_norm": 2.71875,
      "learning_rate": 1.0787635225122083e-05,
      "loss": 0.7612,
      "step": 689770
    },
    {
      "epoch": 2.4175069306444885,
      "grad_norm": 3.015625,
      "learning_rate": 1.0786986196458381e-05,
      "loss": 0.8371,
      "step": 689780
    },
    {
      "epoch": 2.4175419781513843,
      "grad_norm": 2.890625,
      "learning_rate": 1.0786337167794679e-05,
      "loss": 0.7221,
      "step": 689790
    },
    {
      "epoch": 2.4175770256582796,
      "grad_norm": 2.84375,
      "learning_rate": 1.0785688139130977e-05,
      "loss": 0.7762,
      "step": 689800
    },
    {
      "epoch": 2.4176120731651753,
      "grad_norm": 2.59375,
      "learning_rate": 1.0785039110467275e-05,
      "loss": 0.8531,
      "step": 689810
    },
    {
      "epoch": 2.417647120672071,
      "grad_norm": 2.96875,
      "learning_rate": 1.0784390081803573e-05,
      "loss": 0.7887,
      "step": 689820
    },
    {
      "epoch": 2.4176821681789664,
      "grad_norm": 3.0625,
      "learning_rate": 1.0783741053139871e-05,
      "loss": 0.7956,
      "step": 689830
    },
    {
      "epoch": 2.417717215685862,
      "grad_norm": 2.9375,
      "learning_rate": 1.078309202447617e-05,
      "loss": 0.7347,
      "step": 689840
    },
    {
      "epoch": 2.417752263192758,
      "grad_norm": 3.046875,
      "learning_rate": 1.0782442995812469e-05,
      "loss": 0.7943,
      "step": 689850
    },
    {
      "epoch": 2.4177873106996532,
      "grad_norm": 2.625,
      "learning_rate": 1.0781793967148765e-05,
      "loss": 0.823,
      "step": 689860
    },
    {
      "epoch": 2.417822358206549,
      "grad_norm": 3.0625,
      "learning_rate": 1.0781144938485063e-05,
      "loss": 0.8144,
      "step": 689870
    },
    {
      "epoch": 2.4178574057134448,
      "grad_norm": 2.6875,
      "learning_rate": 1.0780495909821361e-05,
      "loss": 0.7154,
      "step": 689880
    },
    {
      "epoch": 2.41789245322034,
      "grad_norm": 2.71875,
      "learning_rate": 1.0779846881157659e-05,
      "loss": 0.8242,
      "step": 689890
    },
    {
      "epoch": 2.417927500727236,
      "grad_norm": 2.890625,
      "learning_rate": 1.0779197852493959e-05,
      "loss": 0.7929,
      "step": 689900
    },
    {
      "epoch": 2.417962548234131,
      "grad_norm": 2.515625,
      "learning_rate": 1.0778548823830257e-05,
      "loss": 0.6993,
      "step": 689910
    },
    {
      "epoch": 2.417997595741027,
      "grad_norm": 2.859375,
      "learning_rate": 1.0777899795166555e-05,
      "loss": 0.7435,
      "step": 689920
    },
    {
      "epoch": 2.4180326432479227,
      "grad_norm": 3.015625,
      "learning_rate": 1.0777250766502853e-05,
      "loss": 0.81,
      "step": 689930
    },
    {
      "epoch": 2.418067690754818,
      "grad_norm": 3.515625,
      "learning_rate": 1.077660173783915e-05,
      "loss": 0.857,
      "step": 689940
    },
    {
      "epoch": 2.4181027382617137,
      "grad_norm": 2.703125,
      "learning_rate": 1.0775952709175449e-05,
      "loss": 0.7466,
      "step": 689950
    },
    {
      "epoch": 2.4181377857686095,
      "grad_norm": 2.9375,
      "learning_rate": 1.0775303680511747e-05,
      "loss": 0.7697,
      "step": 689960
    },
    {
      "epoch": 2.418172833275505,
      "grad_norm": 2.65625,
      "learning_rate": 1.0774654651848045e-05,
      "loss": 0.8136,
      "step": 689970
    },
    {
      "epoch": 2.4182078807824006,
      "grad_norm": 3.390625,
      "learning_rate": 1.0774005623184343e-05,
      "loss": 0.8432,
      "step": 689980
    },
    {
      "epoch": 2.4182429282892963,
      "grad_norm": 2.828125,
      "learning_rate": 1.077335659452064e-05,
      "loss": 0.8116,
      "step": 689990
    },
    {
      "epoch": 2.4182779757961916,
      "grad_norm": 3.40625,
      "learning_rate": 1.0772707565856939e-05,
      "loss": 0.8036,
      "step": 690000
    },
    {
      "epoch": 2.4182779757961916,
      "eval_loss": 0.7590867877006531,
      "eval_runtime": 551.32,
      "eval_samples_per_second": 690.046,
      "eval_steps_per_second": 57.504,
      "step": 690000
    },
    {
      "epoch": 2.4183130233030874,
      "grad_norm": 2.84375,
      "learning_rate": 1.0772058537193237e-05,
      "loss": 0.7624,
      "step": 690010
    },
    {
      "epoch": 2.4183480708099827,
      "grad_norm": 2.640625,
      "learning_rate": 1.0771409508529536e-05,
      "loss": 0.7604,
      "step": 690020
    },
    {
      "epoch": 2.4183831183168785,
      "grad_norm": 2.625,
      "learning_rate": 1.0770760479865834e-05,
      "loss": 0.8316,
      "step": 690030
    },
    {
      "epoch": 2.4184181658237742,
      "grad_norm": 2.9375,
      "learning_rate": 1.0770111451202132e-05,
      "loss": 0.8081,
      "step": 690040
    },
    {
      "epoch": 2.4184532133306695,
      "grad_norm": 3.0,
      "learning_rate": 1.0769462422538429e-05,
      "loss": 0.778,
      "step": 690050
    },
    {
      "epoch": 2.4184882608375653,
      "grad_norm": 3.265625,
      "learning_rate": 1.0768813393874727e-05,
      "loss": 0.7265,
      "step": 690060
    },
    {
      "epoch": 2.418523308344461,
      "grad_norm": 2.96875,
      "learning_rate": 1.0768164365211025e-05,
      "loss": 0.7932,
      "step": 690070
    },
    {
      "epoch": 2.4185583558513564,
      "grad_norm": 2.984375,
      "learning_rate": 1.0767515336547324e-05,
      "loss": 0.8215,
      "step": 690080
    },
    {
      "epoch": 2.418593403358252,
      "grad_norm": 2.8125,
      "learning_rate": 1.0766866307883622e-05,
      "loss": 0.7838,
      "step": 690090
    },
    {
      "epoch": 2.418628450865148,
      "grad_norm": 2.71875,
      "learning_rate": 1.076621727921992e-05,
      "loss": 0.8152,
      "step": 690100
    },
    {
      "epoch": 2.418663498372043,
      "grad_norm": 3.015625,
      "learning_rate": 1.0765568250556218e-05,
      "loss": 0.8003,
      "step": 690110
    },
    {
      "epoch": 2.418698545878939,
      "grad_norm": 3.46875,
      "learning_rate": 1.0764919221892516e-05,
      "loss": 0.8461,
      "step": 690120
    },
    {
      "epoch": 2.4187335933858343,
      "grad_norm": 2.296875,
      "learning_rate": 1.0764270193228814e-05,
      "loss": 0.6619,
      "step": 690130
    },
    {
      "epoch": 2.41876864089273,
      "grad_norm": 2.8125,
      "learning_rate": 1.0763621164565112e-05,
      "loss": 0.7993,
      "step": 690140
    },
    {
      "epoch": 2.418803688399626,
      "grad_norm": 3.109375,
      "learning_rate": 1.076297213590141e-05,
      "loss": 0.737,
      "step": 690150
    },
    {
      "epoch": 2.418838735906521,
      "grad_norm": 3.078125,
      "learning_rate": 1.0762323107237708e-05,
      "loss": 0.8417,
      "step": 690160
    },
    {
      "epoch": 2.418873783413417,
      "grad_norm": 3.203125,
      "learning_rate": 1.0761674078574006e-05,
      "loss": 0.766,
      "step": 690170
    },
    {
      "epoch": 2.4189088309203126,
      "grad_norm": 3.015625,
      "learning_rate": 1.0761025049910304e-05,
      "loss": 0.7738,
      "step": 690180
    },
    {
      "epoch": 2.418943878427208,
      "grad_norm": 3.203125,
      "learning_rate": 1.0760376021246602e-05,
      "loss": 0.8318,
      "step": 690190
    },
    {
      "epoch": 2.4189789259341037,
      "grad_norm": 3.015625,
      "learning_rate": 1.07597269925829e-05,
      "loss": 0.7539,
      "step": 690200
    },
    {
      "epoch": 2.4190139734409994,
      "grad_norm": 2.96875,
      "learning_rate": 1.07590779639192e-05,
      "loss": 0.7825,
      "step": 690210
    },
    {
      "epoch": 2.4190490209478948,
      "grad_norm": 3.109375,
      "learning_rate": 1.0758428935255498e-05,
      "loss": 0.8012,
      "step": 690220
    },
    {
      "epoch": 2.4190840684547905,
      "grad_norm": 2.96875,
      "learning_rate": 1.0757779906591796e-05,
      "loss": 0.8698,
      "step": 690230
    },
    {
      "epoch": 2.419119115961686,
      "grad_norm": 2.921875,
      "learning_rate": 1.0757130877928092e-05,
      "loss": 0.7916,
      "step": 690240
    },
    {
      "epoch": 2.4191541634685816,
      "grad_norm": 2.546875,
      "learning_rate": 1.075648184926439e-05,
      "loss": 0.7899,
      "step": 690250
    },
    {
      "epoch": 2.4191892109754773,
      "grad_norm": 3.046875,
      "learning_rate": 1.075583282060069e-05,
      "loss": 0.8091,
      "step": 690260
    },
    {
      "epoch": 2.419224258482373,
      "grad_norm": 2.9375,
      "learning_rate": 1.0755183791936988e-05,
      "loss": 0.8061,
      "step": 690270
    },
    {
      "epoch": 2.4192593059892684,
      "grad_norm": 3.234375,
      "learning_rate": 1.0754534763273286e-05,
      "loss": 0.76,
      "step": 690280
    },
    {
      "epoch": 2.419294353496164,
      "grad_norm": 2.609375,
      "learning_rate": 1.0753885734609584e-05,
      "loss": 0.8243,
      "step": 690290
    },
    {
      "epoch": 2.4193294010030595,
      "grad_norm": 5.21875,
      "learning_rate": 1.0753236705945882e-05,
      "loss": 0.8198,
      "step": 690300
    },
    {
      "epoch": 2.4193644485099552,
      "grad_norm": 2.765625,
      "learning_rate": 1.075258767728218e-05,
      "loss": 0.7757,
      "step": 690310
    },
    {
      "epoch": 2.419399496016851,
      "grad_norm": 3.078125,
      "learning_rate": 1.0751938648618478e-05,
      "loss": 0.8616,
      "step": 690320
    },
    {
      "epoch": 2.4194345435237463,
      "grad_norm": 3.03125,
      "learning_rate": 1.0751289619954776e-05,
      "loss": 0.822,
      "step": 690330
    },
    {
      "epoch": 2.419469591030642,
      "grad_norm": 2.640625,
      "learning_rate": 1.0750640591291074e-05,
      "loss": 0.802,
      "step": 690340
    },
    {
      "epoch": 2.4195046385375374,
      "grad_norm": 3.3125,
      "learning_rate": 1.0749991562627372e-05,
      "loss": 0.7906,
      "step": 690350
    },
    {
      "epoch": 2.419539686044433,
      "grad_norm": 2.8125,
      "learning_rate": 1.074934253396367e-05,
      "loss": 0.8224,
      "step": 690360
    },
    {
      "epoch": 2.419574733551329,
      "grad_norm": 2.9375,
      "learning_rate": 1.0748693505299968e-05,
      "loss": 0.887,
      "step": 690370
    },
    {
      "epoch": 2.4196097810582247,
      "grad_norm": 2.375,
      "learning_rate": 1.0748044476636266e-05,
      "loss": 0.7576,
      "step": 690380
    },
    {
      "epoch": 2.41964482856512,
      "grad_norm": 3.03125,
      "learning_rate": 1.0747395447972566e-05,
      "loss": 0.6955,
      "step": 690390
    },
    {
      "epoch": 2.4196798760720157,
      "grad_norm": 2.703125,
      "learning_rate": 1.0746746419308864e-05,
      "loss": 0.7657,
      "step": 690400
    },
    {
      "epoch": 2.419714923578911,
      "grad_norm": 2.828125,
      "learning_rate": 1.0746097390645162e-05,
      "loss": 0.8773,
      "step": 690410
    },
    {
      "epoch": 2.419749971085807,
      "grad_norm": 2.671875,
      "learning_rate": 1.074544836198146e-05,
      "loss": 0.8572,
      "step": 690420
    },
    {
      "epoch": 2.4197850185927026,
      "grad_norm": 2.640625,
      "learning_rate": 1.0744799333317756e-05,
      "loss": 0.8239,
      "step": 690430
    },
    {
      "epoch": 2.419820066099598,
      "grad_norm": 2.578125,
      "learning_rate": 1.0744150304654054e-05,
      "loss": 0.7525,
      "step": 690440
    },
    {
      "epoch": 2.4198551136064936,
      "grad_norm": 2.640625,
      "learning_rate": 1.0743501275990354e-05,
      "loss": 0.7887,
      "step": 690450
    },
    {
      "epoch": 2.4198901611133894,
      "grad_norm": 3.078125,
      "learning_rate": 1.0742852247326652e-05,
      "loss": 0.806,
      "step": 690460
    },
    {
      "epoch": 2.4199252086202847,
      "grad_norm": 2.890625,
      "learning_rate": 1.074220321866295e-05,
      "loss": 0.825,
      "step": 690470
    },
    {
      "epoch": 2.4199602561271805,
      "grad_norm": 2.546875,
      "learning_rate": 1.0741554189999248e-05,
      "loss": 0.7609,
      "step": 690480
    },
    {
      "epoch": 2.4199953036340762,
      "grad_norm": 3.078125,
      "learning_rate": 1.0740905161335546e-05,
      "loss": 0.7807,
      "step": 690490
    },
    {
      "epoch": 2.4200303511409715,
      "grad_norm": 2.953125,
      "learning_rate": 1.0740256132671844e-05,
      "loss": 0.837,
      "step": 690500
    },
    {
      "epoch": 2.4200653986478673,
      "grad_norm": 2.953125,
      "learning_rate": 1.0739607104008142e-05,
      "loss": 0.8121,
      "step": 690510
    },
    {
      "epoch": 2.4201004461547626,
      "grad_norm": 2.828125,
      "learning_rate": 1.073895807534444e-05,
      "loss": 0.8236,
      "step": 690520
    },
    {
      "epoch": 2.4201354936616584,
      "grad_norm": 2.453125,
      "learning_rate": 1.0738309046680738e-05,
      "loss": 0.7846,
      "step": 690530
    },
    {
      "epoch": 2.420170541168554,
      "grad_norm": 3.0625,
      "learning_rate": 1.0737660018017036e-05,
      "loss": 0.8104,
      "step": 690540
    },
    {
      "epoch": 2.4202055886754494,
      "grad_norm": 3.21875,
      "learning_rate": 1.0737010989353334e-05,
      "loss": 0.858,
      "step": 690550
    },
    {
      "epoch": 2.420240636182345,
      "grad_norm": 2.546875,
      "learning_rate": 1.0736361960689632e-05,
      "loss": 0.8158,
      "step": 690560
    },
    {
      "epoch": 2.420275683689241,
      "grad_norm": 3.0,
      "learning_rate": 1.0735712932025931e-05,
      "loss": 0.8187,
      "step": 690570
    },
    {
      "epoch": 2.4203107311961363,
      "grad_norm": 3.046875,
      "learning_rate": 1.073506390336223e-05,
      "loss": 0.8252,
      "step": 690580
    },
    {
      "epoch": 2.420345778703032,
      "grad_norm": 2.875,
      "learning_rate": 1.0734414874698527e-05,
      "loss": 0.8247,
      "step": 690590
    },
    {
      "epoch": 2.420380826209928,
      "grad_norm": 3.234375,
      "learning_rate": 1.0733765846034825e-05,
      "loss": 0.7996,
      "step": 690600
    },
    {
      "epoch": 2.420415873716823,
      "grad_norm": 2.671875,
      "learning_rate": 1.0733116817371123e-05,
      "loss": 0.8064,
      "step": 690610
    },
    {
      "epoch": 2.420450921223719,
      "grad_norm": 3.046875,
      "learning_rate": 1.073246778870742e-05,
      "loss": 0.8304,
      "step": 690620
    },
    {
      "epoch": 2.420485968730614,
      "grad_norm": 7.4375,
      "learning_rate": 1.073181876004372e-05,
      "loss": 0.8293,
      "step": 690630
    },
    {
      "epoch": 2.42052101623751,
      "grad_norm": 2.734375,
      "learning_rate": 1.0731169731380017e-05,
      "loss": 0.8246,
      "step": 690640
    },
    {
      "epoch": 2.4205560637444057,
      "grad_norm": 3.21875,
      "learning_rate": 1.0730520702716315e-05,
      "loss": 0.7843,
      "step": 690650
    },
    {
      "epoch": 2.420591111251301,
      "grad_norm": 3.046875,
      "learning_rate": 1.0729871674052613e-05,
      "loss": 0.847,
      "step": 690660
    },
    {
      "epoch": 2.4206261587581968,
      "grad_norm": 2.734375,
      "learning_rate": 1.0729222645388911e-05,
      "loss": 0.724,
      "step": 690670
    },
    {
      "epoch": 2.4206612062650925,
      "grad_norm": 2.453125,
      "learning_rate": 1.072857361672521e-05,
      "loss": 0.7904,
      "step": 690680
    },
    {
      "epoch": 2.420696253771988,
      "grad_norm": 2.9375,
      "learning_rate": 1.0727924588061507e-05,
      "loss": 0.8369,
      "step": 690690
    },
    {
      "epoch": 2.4207313012788836,
      "grad_norm": 3.1875,
      "learning_rate": 1.0727275559397807e-05,
      "loss": 0.7916,
      "step": 690700
    },
    {
      "epoch": 2.4207663487857793,
      "grad_norm": 2.796875,
      "learning_rate": 1.0726626530734103e-05,
      "loss": 0.8144,
      "step": 690710
    },
    {
      "epoch": 2.4208013962926747,
      "grad_norm": 2.640625,
      "learning_rate": 1.0725977502070401e-05,
      "loss": 0.8157,
      "step": 690720
    },
    {
      "epoch": 2.4208364437995704,
      "grad_norm": 3.375,
      "learning_rate": 1.07253284734067e-05,
      "loss": 0.836,
      "step": 690730
    },
    {
      "epoch": 2.4208714913064657,
      "grad_norm": 2.875,
      "learning_rate": 1.0724679444742997e-05,
      "loss": 0.8498,
      "step": 690740
    },
    {
      "epoch": 2.4209065388133615,
      "grad_norm": 3.046875,
      "learning_rate": 1.0724030416079295e-05,
      "loss": 0.8757,
      "step": 690750
    },
    {
      "epoch": 2.4209415863202572,
      "grad_norm": 3.234375,
      "learning_rate": 1.0723381387415595e-05,
      "loss": 0.791,
      "step": 690760
    },
    {
      "epoch": 2.4209766338271526,
      "grad_norm": 2.5,
      "learning_rate": 1.0722732358751893e-05,
      "loss": 0.8158,
      "step": 690770
    },
    {
      "epoch": 2.4210116813340483,
      "grad_norm": 3.0,
      "learning_rate": 1.0722083330088191e-05,
      "loss": 0.8024,
      "step": 690780
    },
    {
      "epoch": 2.421046728840944,
      "grad_norm": 3.390625,
      "learning_rate": 1.0721434301424489e-05,
      "loss": 0.8317,
      "step": 690790
    },
    {
      "epoch": 2.4210817763478394,
      "grad_norm": 2.78125,
      "learning_rate": 1.0720785272760785e-05,
      "loss": 0.8177,
      "step": 690800
    },
    {
      "epoch": 2.421116823854735,
      "grad_norm": 3.015625,
      "learning_rate": 1.0720136244097085e-05,
      "loss": 0.7853,
      "step": 690810
    },
    {
      "epoch": 2.421151871361631,
      "grad_norm": 2.828125,
      "learning_rate": 1.0719487215433383e-05,
      "loss": 0.8125,
      "step": 690820
    },
    {
      "epoch": 2.421186918868526,
      "grad_norm": 2.71875,
      "learning_rate": 1.0718838186769681e-05,
      "loss": 0.7628,
      "step": 690830
    },
    {
      "epoch": 2.421221966375422,
      "grad_norm": 3.328125,
      "learning_rate": 1.0718189158105979e-05,
      "loss": 0.8444,
      "step": 690840
    },
    {
      "epoch": 2.4212570138823173,
      "grad_norm": 2.6875,
      "learning_rate": 1.0717540129442277e-05,
      "loss": 0.7835,
      "step": 690850
    },
    {
      "epoch": 2.421292061389213,
      "grad_norm": 2.765625,
      "learning_rate": 1.0716891100778575e-05,
      "loss": 0.8996,
      "step": 690860
    },
    {
      "epoch": 2.421327108896109,
      "grad_norm": 3.5625,
      "learning_rate": 1.0716242072114873e-05,
      "loss": 0.8522,
      "step": 690870
    },
    {
      "epoch": 2.421362156403004,
      "grad_norm": 3.34375,
      "learning_rate": 1.0715593043451173e-05,
      "loss": 0.8303,
      "step": 690880
    },
    {
      "epoch": 2.4213972039099,
      "grad_norm": 2.609375,
      "learning_rate": 1.071494401478747e-05,
      "loss": 0.7744,
      "step": 690890
    },
    {
      "epoch": 2.4214322514167956,
      "grad_norm": 2.8125,
      "learning_rate": 1.0714294986123767e-05,
      "loss": 0.7781,
      "step": 690900
    },
    {
      "epoch": 2.421467298923691,
      "grad_norm": 2.953125,
      "learning_rate": 1.0713645957460065e-05,
      "loss": 0.7537,
      "step": 690910
    },
    {
      "epoch": 2.4215023464305867,
      "grad_norm": 2.984375,
      "learning_rate": 1.0712996928796363e-05,
      "loss": 0.7842,
      "step": 690920
    },
    {
      "epoch": 2.4215373939374825,
      "grad_norm": 2.640625,
      "learning_rate": 1.0712347900132661e-05,
      "loss": 0.7477,
      "step": 690930
    },
    {
      "epoch": 2.4215724414443778,
      "grad_norm": 3.078125,
      "learning_rate": 1.071169887146896e-05,
      "loss": 0.8288,
      "step": 690940
    },
    {
      "epoch": 2.4216074889512735,
      "grad_norm": 2.71875,
      "learning_rate": 1.0711049842805259e-05,
      "loss": 0.7876,
      "step": 690950
    },
    {
      "epoch": 2.421642536458169,
      "grad_norm": 3.109375,
      "learning_rate": 1.0710400814141557e-05,
      "loss": 0.7665,
      "step": 690960
    },
    {
      "epoch": 2.4216775839650646,
      "grad_norm": 3.078125,
      "learning_rate": 1.0709751785477855e-05,
      "loss": 0.8749,
      "step": 690970
    },
    {
      "epoch": 2.4217126314719604,
      "grad_norm": 2.5625,
      "learning_rate": 1.0709102756814153e-05,
      "loss": 0.6592,
      "step": 690980
    },
    {
      "epoch": 2.4217476789788557,
      "grad_norm": 2.625,
      "learning_rate": 1.0708453728150449e-05,
      "loss": 0.8111,
      "step": 690990
    },
    {
      "epoch": 2.4217827264857514,
      "grad_norm": 2.484375,
      "learning_rate": 1.0707804699486749e-05,
      "loss": 0.798,
      "step": 691000
    },
    {
      "epoch": 2.421817773992647,
      "grad_norm": 2.78125,
      "learning_rate": 1.0707155670823047e-05,
      "loss": 0.7888,
      "step": 691010
    },
    {
      "epoch": 2.4218528214995425,
      "grad_norm": 4.875,
      "learning_rate": 1.0706506642159345e-05,
      "loss": 0.7438,
      "step": 691020
    },
    {
      "epoch": 2.4218878690064383,
      "grad_norm": 3.0625,
      "learning_rate": 1.0705857613495643e-05,
      "loss": 0.8277,
      "step": 691030
    },
    {
      "epoch": 2.421922916513334,
      "grad_norm": 2.796875,
      "learning_rate": 1.070520858483194e-05,
      "loss": 0.8904,
      "step": 691040
    },
    {
      "epoch": 2.4219579640202293,
      "grad_norm": 3.0625,
      "learning_rate": 1.0704559556168239e-05,
      "loss": 0.8007,
      "step": 691050
    },
    {
      "epoch": 2.421993011527125,
      "grad_norm": 3.21875,
      "learning_rate": 1.0703910527504538e-05,
      "loss": 0.9066,
      "step": 691060
    },
    {
      "epoch": 2.4220280590340204,
      "grad_norm": 2.96875,
      "learning_rate": 1.0703261498840836e-05,
      "loss": 0.8429,
      "step": 691070
    },
    {
      "epoch": 2.422063106540916,
      "grad_norm": 3.3125,
      "learning_rate": 1.0702612470177134e-05,
      "loss": 0.8029,
      "step": 691080
    },
    {
      "epoch": 2.422098154047812,
      "grad_norm": 2.6875,
      "learning_rate": 1.070196344151343e-05,
      "loss": 0.7976,
      "step": 691090
    },
    {
      "epoch": 2.4221332015547072,
      "grad_norm": 2.890625,
      "learning_rate": 1.0701314412849729e-05,
      "loss": 0.8356,
      "step": 691100
    },
    {
      "epoch": 2.422168249061603,
      "grad_norm": 2.90625,
      "learning_rate": 1.0700665384186027e-05,
      "loss": 0.7335,
      "step": 691110
    },
    {
      "epoch": 2.4222032965684988,
      "grad_norm": 3.1875,
      "learning_rate": 1.0700016355522326e-05,
      "loss": 0.8278,
      "step": 691120
    },
    {
      "epoch": 2.422238344075394,
      "grad_norm": 2.859375,
      "learning_rate": 1.0699367326858624e-05,
      "loss": 0.7958,
      "step": 691130
    },
    {
      "epoch": 2.42227339158229,
      "grad_norm": 2.78125,
      "learning_rate": 1.0698718298194922e-05,
      "loss": 0.7487,
      "step": 691140
    },
    {
      "epoch": 2.4223084390891856,
      "grad_norm": 2.625,
      "learning_rate": 1.069806926953122e-05,
      "loss": 0.8426,
      "step": 691150
    },
    {
      "epoch": 2.422343486596081,
      "grad_norm": 2.78125,
      "learning_rate": 1.0697420240867518e-05,
      "loss": 0.8097,
      "step": 691160
    },
    {
      "epoch": 2.4223785341029767,
      "grad_norm": 2.953125,
      "learning_rate": 1.0696771212203816e-05,
      "loss": 0.8527,
      "step": 691170
    },
    {
      "epoch": 2.422413581609872,
      "grad_norm": 2.59375,
      "learning_rate": 1.0696122183540114e-05,
      "loss": 0.7546,
      "step": 691180
    },
    {
      "epoch": 2.4224486291167677,
      "grad_norm": 3.453125,
      "learning_rate": 1.0695473154876412e-05,
      "loss": 0.8579,
      "step": 691190
    },
    {
      "epoch": 2.4224836766236635,
      "grad_norm": 3.25,
      "learning_rate": 1.069482412621271e-05,
      "loss": 0.8011,
      "step": 691200
    },
    {
      "epoch": 2.422518724130559,
      "grad_norm": 2.703125,
      "learning_rate": 1.0694175097549008e-05,
      "loss": 0.8094,
      "step": 691210
    },
    {
      "epoch": 2.4225537716374546,
      "grad_norm": 3.21875,
      "learning_rate": 1.0693526068885306e-05,
      "loss": 0.8013,
      "step": 691220
    },
    {
      "epoch": 2.4225888191443503,
      "grad_norm": 2.828125,
      "learning_rate": 1.0692877040221604e-05,
      "loss": 0.8056,
      "step": 691230
    },
    {
      "epoch": 2.4226238666512456,
      "grad_norm": 2.53125,
      "learning_rate": 1.0692228011557902e-05,
      "loss": 0.7923,
      "step": 691240
    },
    {
      "epoch": 2.4226589141581414,
      "grad_norm": 2.625,
      "learning_rate": 1.0691578982894202e-05,
      "loss": 0.7547,
      "step": 691250
    },
    {
      "epoch": 2.422693961665037,
      "grad_norm": 2.640625,
      "learning_rate": 1.06909299542305e-05,
      "loss": 0.863,
      "step": 691260
    },
    {
      "epoch": 2.4227290091719325,
      "grad_norm": 2.9375,
      "learning_rate": 1.0690280925566796e-05,
      "loss": 0.806,
      "step": 691270
    },
    {
      "epoch": 2.422764056678828,
      "grad_norm": 3.03125,
      "learning_rate": 1.0689631896903094e-05,
      "loss": 0.8784,
      "step": 691280
    },
    {
      "epoch": 2.4227991041857235,
      "grad_norm": 2.921875,
      "learning_rate": 1.0688982868239392e-05,
      "loss": 0.8952,
      "step": 691290
    },
    {
      "epoch": 2.4228341516926193,
      "grad_norm": 2.890625,
      "learning_rate": 1.0688333839575692e-05,
      "loss": 0.8444,
      "step": 691300
    },
    {
      "epoch": 2.422869199199515,
      "grad_norm": 2.65625,
      "learning_rate": 1.068768481091199e-05,
      "loss": 0.7906,
      "step": 691310
    },
    {
      "epoch": 2.4229042467064104,
      "grad_norm": 2.921875,
      "learning_rate": 1.0687035782248288e-05,
      "loss": 0.8166,
      "step": 691320
    },
    {
      "epoch": 2.422939294213306,
      "grad_norm": 3.140625,
      "learning_rate": 1.0686386753584586e-05,
      "loss": 0.8308,
      "step": 691330
    },
    {
      "epoch": 2.422974341720202,
      "grad_norm": 2.796875,
      "learning_rate": 1.0685737724920884e-05,
      "loss": 0.858,
      "step": 691340
    },
    {
      "epoch": 2.423009389227097,
      "grad_norm": 2.78125,
      "learning_rate": 1.0685088696257182e-05,
      "loss": 0.8071,
      "step": 691350
    },
    {
      "epoch": 2.423044436733993,
      "grad_norm": 2.640625,
      "learning_rate": 1.068443966759348e-05,
      "loss": 0.7597,
      "step": 691360
    },
    {
      "epoch": 2.4230794842408887,
      "grad_norm": 2.90625,
      "learning_rate": 1.0683790638929778e-05,
      "loss": 0.8546,
      "step": 691370
    },
    {
      "epoch": 2.423114531747784,
      "grad_norm": 2.890625,
      "learning_rate": 1.0683141610266076e-05,
      "loss": 0.829,
      "step": 691380
    },
    {
      "epoch": 2.4231495792546798,
      "grad_norm": 3.109375,
      "learning_rate": 1.0682492581602374e-05,
      "loss": 0.7904,
      "step": 691390
    },
    {
      "epoch": 2.423184626761575,
      "grad_norm": 3.078125,
      "learning_rate": 1.0681843552938672e-05,
      "loss": 0.7878,
      "step": 691400
    },
    {
      "epoch": 2.423219674268471,
      "grad_norm": 3.203125,
      "learning_rate": 1.068119452427497e-05,
      "loss": 0.8076,
      "step": 691410
    },
    {
      "epoch": 2.4232547217753666,
      "grad_norm": 4.09375,
      "learning_rate": 1.0680545495611268e-05,
      "loss": 0.7738,
      "step": 691420
    },
    {
      "epoch": 2.423289769282262,
      "grad_norm": 2.984375,
      "learning_rate": 1.0679896466947568e-05,
      "loss": 0.7808,
      "step": 691430
    },
    {
      "epoch": 2.4233248167891577,
      "grad_norm": 3.140625,
      "learning_rate": 1.0679247438283866e-05,
      "loss": 0.7997,
      "step": 691440
    },
    {
      "epoch": 2.4233598642960534,
      "grad_norm": 2.9375,
      "learning_rate": 1.0678598409620164e-05,
      "loss": 0.789,
      "step": 691450
    },
    {
      "epoch": 2.4233949118029487,
      "grad_norm": 2.59375,
      "learning_rate": 1.067794938095646e-05,
      "loss": 0.8511,
      "step": 691460
    },
    {
      "epoch": 2.4234299593098445,
      "grad_norm": 3.0,
      "learning_rate": 1.0677300352292758e-05,
      "loss": 0.8134,
      "step": 691470
    },
    {
      "epoch": 2.4234650068167403,
      "grad_norm": 3.3125,
      "learning_rate": 1.0676651323629056e-05,
      "loss": 0.8218,
      "step": 691480
    },
    {
      "epoch": 2.4235000543236356,
      "grad_norm": 2.390625,
      "learning_rate": 1.0676002294965356e-05,
      "loss": 0.7722,
      "step": 691490
    },
    {
      "epoch": 2.4235351018305313,
      "grad_norm": 2.484375,
      "learning_rate": 1.0675353266301654e-05,
      "loss": 0.818,
      "step": 691500
    },
    {
      "epoch": 2.4235701493374266,
      "grad_norm": 3.03125,
      "learning_rate": 1.0674704237637952e-05,
      "loss": 0.871,
      "step": 691510
    },
    {
      "epoch": 2.4236051968443224,
      "grad_norm": 2.578125,
      "learning_rate": 1.067405520897425e-05,
      "loss": 0.7979,
      "step": 691520
    },
    {
      "epoch": 2.423640244351218,
      "grad_norm": 3.484375,
      "learning_rate": 1.0673406180310548e-05,
      "loss": 0.8996,
      "step": 691530
    },
    {
      "epoch": 2.423675291858114,
      "grad_norm": 2.71875,
      "learning_rate": 1.0672757151646846e-05,
      "loss": 0.758,
      "step": 691540
    },
    {
      "epoch": 2.4237103393650092,
      "grad_norm": 2.78125,
      "learning_rate": 1.0672108122983144e-05,
      "loss": 0.8027,
      "step": 691550
    },
    {
      "epoch": 2.423745386871905,
      "grad_norm": 2.515625,
      "learning_rate": 1.0671459094319442e-05,
      "loss": 0.8119,
      "step": 691560
    },
    {
      "epoch": 2.4237804343788003,
      "grad_norm": 3.0,
      "learning_rate": 1.067081006565574e-05,
      "loss": 0.7732,
      "step": 691570
    },
    {
      "epoch": 2.423815481885696,
      "grad_norm": 2.796875,
      "learning_rate": 1.0670161036992038e-05,
      "loss": 0.7836,
      "step": 691580
    },
    {
      "epoch": 2.423850529392592,
      "grad_norm": 2.546875,
      "learning_rate": 1.0669512008328336e-05,
      "loss": 0.8184,
      "step": 691590
    },
    {
      "epoch": 2.423885576899487,
      "grad_norm": 2.9375,
      "learning_rate": 1.0668862979664634e-05,
      "loss": 0.7687,
      "step": 691600
    },
    {
      "epoch": 2.423920624406383,
      "grad_norm": 2.984375,
      "learning_rate": 1.0668213951000934e-05,
      "loss": 0.8296,
      "step": 691610
    },
    {
      "epoch": 2.423955671913278,
      "grad_norm": 2.8125,
      "learning_rate": 1.0667564922337232e-05,
      "loss": 0.8307,
      "step": 691620
    },
    {
      "epoch": 2.423990719420174,
      "grad_norm": 2.8125,
      "learning_rate": 1.066691589367353e-05,
      "loss": 0.7907,
      "step": 691630
    },
    {
      "epoch": 2.4240257669270697,
      "grad_norm": 2.875,
      "learning_rate": 1.0666266865009828e-05,
      "loss": 0.8027,
      "step": 691640
    },
    {
      "epoch": 2.4240608144339655,
      "grad_norm": 2.734375,
      "learning_rate": 1.0665617836346124e-05,
      "loss": 0.7692,
      "step": 691650
    },
    {
      "epoch": 2.424095861940861,
      "grad_norm": 2.453125,
      "learning_rate": 1.0664968807682422e-05,
      "loss": 0.7865,
      "step": 691660
    },
    {
      "epoch": 2.4241309094477566,
      "grad_norm": 3.28125,
      "learning_rate": 1.0664319779018722e-05,
      "loss": 0.8367,
      "step": 691670
    },
    {
      "epoch": 2.424165956954652,
      "grad_norm": 2.75,
      "learning_rate": 1.066367075035502e-05,
      "loss": 0.9019,
      "step": 691680
    },
    {
      "epoch": 2.4242010044615476,
      "grad_norm": 3.171875,
      "learning_rate": 1.0663021721691318e-05,
      "loss": 0.7664,
      "step": 691690
    },
    {
      "epoch": 2.4242360519684434,
      "grad_norm": 2.484375,
      "learning_rate": 1.0662372693027616e-05,
      "loss": 0.8571,
      "step": 691700
    },
    {
      "epoch": 2.4242710994753387,
      "grad_norm": 2.734375,
      "learning_rate": 1.0661723664363914e-05,
      "loss": 0.7791,
      "step": 691710
    },
    {
      "epoch": 2.4243061469822345,
      "grad_norm": 3.0,
      "learning_rate": 1.0661074635700212e-05,
      "loss": 0.7616,
      "step": 691720
    },
    {
      "epoch": 2.4243411944891298,
      "grad_norm": 3.0625,
      "learning_rate": 1.066042560703651e-05,
      "loss": 0.8918,
      "step": 691730
    },
    {
      "epoch": 2.4243762419960255,
      "grad_norm": 3.484375,
      "learning_rate": 1.0659776578372808e-05,
      "loss": 0.7948,
      "step": 691740
    },
    {
      "epoch": 2.4244112895029213,
      "grad_norm": 3.03125,
      "learning_rate": 1.0659127549709106e-05,
      "loss": 0.7615,
      "step": 691750
    },
    {
      "epoch": 2.424446337009817,
      "grad_norm": 2.859375,
      "learning_rate": 1.0658478521045404e-05,
      "loss": 0.8106,
      "step": 691760
    },
    {
      "epoch": 2.4244813845167124,
      "grad_norm": 2.484375,
      "learning_rate": 1.0657829492381702e-05,
      "loss": 0.739,
      "step": 691770
    },
    {
      "epoch": 2.424516432023608,
      "grad_norm": 2.828125,
      "learning_rate": 1.0657180463718e-05,
      "loss": 0.8252,
      "step": 691780
    },
    {
      "epoch": 2.4245514795305034,
      "grad_norm": 2.859375,
      "learning_rate": 1.0656531435054298e-05,
      "loss": 0.7865,
      "step": 691790
    },
    {
      "epoch": 2.424586527037399,
      "grad_norm": 3.234375,
      "learning_rate": 1.0655882406390597e-05,
      "loss": 0.8299,
      "step": 691800
    },
    {
      "epoch": 2.424621574544295,
      "grad_norm": 2.625,
      "learning_rate": 1.0655233377726895e-05,
      "loss": 0.7177,
      "step": 691810
    },
    {
      "epoch": 2.4246566220511903,
      "grad_norm": 2.8125,
      "learning_rate": 1.0654584349063193e-05,
      "loss": 0.756,
      "step": 691820
    },
    {
      "epoch": 2.424691669558086,
      "grad_norm": 3.109375,
      "learning_rate": 1.0653935320399491e-05,
      "loss": 0.8155,
      "step": 691830
    },
    {
      "epoch": 2.4247267170649818,
      "grad_norm": 2.765625,
      "learning_rate": 1.0653286291735788e-05,
      "loss": 0.8468,
      "step": 691840
    },
    {
      "epoch": 2.424761764571877,
      "grad_norm": 2.96875,
      "learning_rate": 1.0652637263072087e-05,
      "loss": 0.7857,
      "step": 691850
    },
    {
      "epoch": 2.424796812078773,
      "grad_norm": 2.640625,
      "learning_rate": 1.0651988234408385e-05,
      "loss": 0.7976,
      "step": 691860
    },
    {
      "epoch": 2.4248318595856686,
      "grad_norm": 3.1875,
      "learning_rate": 1.0651339205744683e-05,
      "loss": 0.8487,
      "step": 691870
    },
    {
      "epoch": 2.424866907092564,
      "grad_norm": 2.828125,
      "learning_rate": 1.0650690177080981e-05,
      "loss": 0.7852,
      "step": 691880
    },
    {
      "epoch": 2.4249019545994597,
      "grad_norm": 2.75,
      "learning_rate": 1.065004114841728e-05,
      "loss": 0.7802,
      "step": 691890
    },
    {
      "epoch": 2.424937002106355,
      "grad_norm": 2.640625,
      "learning_rate": 1.0649392119753577e-05,
      "loss": 0.8473,
      "step": 691900
    },
    {
      "epoch": 2.4249720496132507,
      "grad_norm": 3.0,
      "learning_rate": 1.0648743091089875e-05,
      "loss": 0.7517,
      "step": 691910
    },
    {
      "epoch": 2.4250070971201465,
      "grad_norm": 2.546875,
      "learning_rate": 1.0648094062426175e-05,
      "loss": 0.7647,
      "step": 691920
    },
    {
      "epoch": 2.425042144627042,
      "grad_norm": 3.015625,
      "learning_rate": 1.0647445033762471e-05,
      "loss": 0.8358,
      "step": 691930
    },
    {
      "epoch": 2.4250771921339376,
      "grad_norm": 2.734375,
      "learning_rate": 1.064679600509877e-05,
      "loss": 0.8063,
      "step": 691940
    },
    {
      "epoch": 2.4251122396408333,
      "grad_norm": 3.359375,
      "learning_rate": 1.0646146976435067e-05,
      "loss": 0.8836,
      "step": 691950
    },
    {
      "epoch": 2.4251472871477286,
      "grad_norm": 2.671875,
      "learning_rate": 1.0645497947771365e-05,
      "loss": 0.8555,
      "step": 691960
    },
    {
      "epoch": 2.4251823346546244,
      "grad_norm": 3.171875,
      "learning_rate": 1.0644848919107663e-05,
      "loss": 0.8615,
      "step": 691970
    },
    {
      "epoch": 2.42521738216152,
      "grad_norm": 2.671875,
      "learning_rate": 1.0644199890443963e-05,
      "loss": 0.8674,
      "step": 691980
    },
    {
      "epoch": 2.4252524296684155,
      "grad_norm": 2.71875,
      "learning_rate": 1.0643550861780261e-05,
      "loss": 0.8474,
      "step": 691990
    },
    {
      "epoch": 2.4252874771753112,
      "grad_norm": 3.171875,
      "learning_rate": 1.0642901833116559e-05,
      "loss": 0.7502,
      "step": 692000
    },
    {
      "epoch": 2.4253225246822065,
      "grad_norm": 2.65625,
      "learning_rate": 1.0642252804452857e-05,
      "loss": 0.7418,
      "step": 692010
    },
    {
      "epoch": 2.4253575721891023,
      "grad_norm": 2.75,
      "learning_rate": 1.0641603775789155e-05,
      "loss": 0.8342,
      "step": 692020
    },
    {
      "epoch": 2.425392619695998,
      "grad_norm": 2.796875,
      "learning_rate": 1.0640954747125451e-05,
      "loss": 0.7764,
      "step": 692030
    },
    {
      "epoch": 2.4254276672028934,
      "grad_norm": 2.734375,
      "learning_rate": 1.0640305718461751e-05,
      "loss": 0.7706,
      "step": 692040
    },
    {
      "epoch": 2.425462714709789,
      "grad_norm": 2.84375,
      "learning_rate": 1.0639656689798049e-05,
      "loss": 0.8297,
      "step": 692050
    },
    {
      "epoch": 2.425497762216685,
      "grad_norm": 3.03125,
      "learning_rate": 1.0639007661134347e-05,
      "loss": 0.7297,
      "step": 692060
    },
    {
      "epoch": 2.42553280972358,
      "grad_norm": 3.1875,
      "learning_rate": 1.0638358632470645e-05,
      "loss": 0.7965,
      "step": 692070
    },
    {
      "epoch": 2.425567857230476,
      "grad_norm": 2.4375,
      "learning_rate": 1.0637709603806943e-05,
      "loss": 0.8183,
      "step": 692080
    },
    {
      "epoch": 2.4256029047373717,
      "grad_norm": 2.65625,
      "learning_rate": 1.0637060575143241e-05,
      "loss": 0.8465,
      "step": 692090
    },
    {
      "epoch": 2.425637952244267,
      "grad_norm": 3.109375,
      "learning_rate": 1.0636411546479539e-05,
      "loss": 0.7626,
      "step": 692100
    },
    {
      "epoch": 2.425672999751163,
      "grad_norm": 3.359375,
      "learning_rate": 1.0635762517815839e-05,
      "loss": 0.8325,
      "step": 692110
    },
    {
      "epoch": 2.425708047258058,
      "grad_norm": 2.6875,
      "learning_rate": 1.0635113489152135e-05,
      "loss": 0.8568,
      "step": 692120
    },
    {
      "epoch": 2.425743094764954,
      "grad_norm": 3.234375,
      "learning_rate": 1.0634464460488433e-05,
      "loss": 0.7701,
      "step": 692130
    },
    {
      "epoch": 2.4257781422718496,
      "grad_norm": 3.453125,
      "learning_rate": 1.0633815431824731e-05,
      "loss": 0.9063,
      "step": 692140
    },
    {
      "epoch": 2.425813189778745,
      "grad_norm": 2.796875,
      "learning_rate": 1.0633166403161029e-05,
      "loss": 0.8372,
      "step": 692150
    },
    {
      "epoch": 2.4258482372856407,
      "grad_norm": 2.859375,
      "learning_rate": 1.0632517374497329e-05,
      "loss": 0.7882,
      "step": 692160
    },
    {
      "epoch": 2.4258832847925365,
      "grad_norm": 3.296875,
      "learning_rate": 1.0631868345833627e-05,
      "loss": 0.795,
      "step": 692170
    },
    {
      "epoch": 2.4259183322994318,
      "grad_norm": 2.78125,
      "learning_rate": 1.0631219317169925e-05,
      "loss": 0.8271,
      "step": 692180
    },
    {
      "epoch": 2.4259533798063275,
      "grad_norm": 3.09375,
      "learning_rate": 1.0630570288506223e-05,
      "loss": 0.9166,
      "step": 692190
    },
    {
      "epoch": 2.4259884273132233,
      "grad_norm": 2.703125,
      "learning_rate": 1.062992125984252e-05,
      "loss": 0.818,
      "step": 692200
    },
    {
      "epoch": 2.4260234748201186,
      "grad_norm": 2.796875,
      "learning_rate": 1.0629272231178817e-05,
      "loss": 0.8819,
      "step": 692210
    },
    {
      "epoch": 2.4260585223270144,
      "grad_norm": 3.03125,
      "learning_rate": 1.0628623202515117e-05,
      "loss": 0.81,
      "step": 692220
    },
    {
      "epoch": 2.4260935698339097,
      "grad_norm": 2.609375,
      "learning_rate": 1.0627974173851415e-05,
      "loss": 0.8273,
      "step": 692230
    },
    {
      "epoch": 2.4261286173408054,
      "grad_norm": 2.71875,
      "learning_rate": 1.0627325145187713e-05,
      "loss": 0.821,
      "step": 692240
    },
    {
      "epoch": 2.426163664847701,
      "grad_norm": 2.46875,
      "learning_rate": 1.062667611652401e-05,
      "loss": 0.7025,
      "step": 692250
    },
    {
      "epoch": 2.4261987123545965,
      "grad_norm": 2.578125,
      "learning_rate": 1.0626027087860309e-05,
      "loss": 0.6859,
      "step": 692260
    },
    {
      "epoch": 2.4262337598614923,
      "grad_norm": 2.6875,
      "learning_rate": 1.0625378059196607e-05,
      "loss": 0.7863,
      "step": 692270
    },
    {
      "epoch": 2.426268807368388,
      "grad_norm": 2.890625,
      "learning_rate": 1.0624729030532905e-05,
      "loss": 0.8119,
      "step": 692280
    },
    {
      "epoch": 2.4263038548752833,
      "grad_norm": 3.265625,
      "learning_rate": 1.0624080001869204e-05,
      "loss": 0.7833,
      "step": 692290
    },
    {
      "epoch": 2.426338902382179,
      "grad_norm": 2.546875,
      "learning_rate": 1.0623430973205502e-05,
      "loss": 0.8551,
      "step": 692300
    },
    {
      "epoch": 2.426373949889075,
      "grad_norm": 3.03125,
      "learning_rate": 1.0622781944541799e-05,
      "loss": 0.7795,
      "step": 692310
    },
    {
      "epoch": 2.42640899739597,
      "grad_norm": 3.328125,
      "learning_rate": 1.0622132915878097e-05,
      "loss": 0.8145,
      "step": 692320
    },
    {
      "epoch": 2.426444044902866,
      "grad_norm": 2.53125,
      "learning_rate": 1.0621483887214395e-05,
      "loss": 0.817,
      "step": 692330
    },
    {
      "epoch": 2.4264790924097612,
      "grad_norm": 2.859375,
      "learning_rate": 1.0620834858550693e-05,
      "loss": 0.8799,
      "step": 692340
    },
    {
      "epoch": 2.426514139916657,
      "grad_norm": 3.015625,
      "learning_rate": 1.0620185829886992e-05,
      "loss": 0.7501,
      "step": 692350
    },
    {
      "epoch": 2.4265491874235527,
      "grad_norm": 2.609375,
      "learning_rate": 1.061953680122329e-05,
      "loss": 0.8994,
      "step": 692360
    },
    {
      "epoch": 2.426584234930448,
      "grad_norm": 2.78125,
      "learning_rate": 1.0618887772559588e-05,
      "loss": 0.7869,
      "step": 692370
    },
    {
      "epoch": 2.426619282437344,
      "grad_norm": 2.84375,
      "learning_rate": 1.0618238743895886e-05,
      "loss": 0.7089,
      "step": 692380
    },
    {
      "epoch": 2.4266543299442396,
      "grad_norm": 2.9375,
      "learning_rate": 1.0617589715232184e-05,
      "loss": 0.8021,
      "step": 692390
    },
    {
      "epoch": 2.426689377451135,
      "grad_norm": 3.09375,
      "learning_rate": 1.0616940686568482e-05,
      "loss": 0.8629,
      "step": 692400
    },
    {
      "epoch": 2.4267244249580306,
      "grad_norm": 2.765625,
      "learning_rate": 1.061629165790478e-05,
      "loss": 0.8439,
      "step": 692410
    },
    {
      "epoch": 2.4267594724649264,
      "grad_norm": 2.75,
      "learning_rate": 1.0615642629241078e-05,
      "loss": 0.7251,
      "step": 692420
    },
    {
      "epoch": 2.4267945199718217,
      "grad_norm": 2.53125,
      "learning_rate": 1.0614993600577376e-05,
      "loss": 0.7634,
      "step": 692430
    },
    {
      "epoch": 2.4268295674787175,
      "grad_norm": 2.46875,
      "learning_rate": 1.0614344571913674e-05,
      "loss": 0.8415,
      "step": 692440
    },
    {
      "epoch": 2.426864614985613,
      "grad_norm": 2.703125,
      "learning_rate": 1.0613695543249972e-05,
      "loss": 0.7713,
      "step": 692450
    },
    {
      "epoch": 2.4268996624925085,
      "grad_norm": 2.796875,
      "learning_rate": 1.061304651458627e-05,
      "loss": 0.8381,
      "step": 692460
    },
    {
      "epoch": 2.4269347099994043,
      "grad_norm": 3.40625,
      "learning_rate": 1.061239748592257e-05,
      "loss": 0.8657,
      "step": 692470
    },
    {
      "epoch": 2.4269697575062996,
      "grad_norm": 2.796875,
      "learning_rate": 1.0611748457258868e-05,
      "loss": 0.772,
      "step": 692480
    },
    {
      "epoch": 2.4270048050131954,
      "grad_norm": 3.109375,
      "learning_rate": 1.0611099428595166e-05,
      "loss": 0.8465,
      "step": 692490
    },
    {
      "epoch": 2.427039852520091,
      "grad_norm": 3.28125,
      "learning_rate": 1.0610450399931462e-05,
      "loss": 0.8322,
      "step": 692500
    },
    {
      "epoch": 2.4270749000269864,
      "grad_norm": 2.859375,
      "learning_rate": 1.060980137126776e-05,
      "loss": 0.773,
      "step": 692510
    },
    {
      "epoch": 2.427109947533882,
      "grad_norm": 3.234375,
      "learning_rate": 1.0609152342604058e-05,
      "loss": 0.8715,
      "step": 692520
    },
    {
      "epoch": 2.427144995040778,
      "grad_norm": 2.875,
      "learning_rate": 1.0608503313940358e-05,
      "loss": 0.8209,
      "step": 692530
    },
    {
      "epoch": 2.4271800425476733,
      "grad_norm": 3.234375,
      "learning_rate": 1.0607854285276656e-05,
      "loss": 0.8386,
      "step": 692540
    },
    {
      "epoch": 2.427215090054569,
      "grad_norm": 2.65625,
      "learning_rate": 1.0607205256612954e-05,
      "loss": 0.7653,
      "step": 692550
    },
    {
      "epoch": 2.4272501375614643,
      "grad_norm": 2.765625,
      "learning_rate": 1.0606556227949252e-05,
      "loss": 0.7767,
      "step": 692560
    },
    {
      "epoch": 2.42728518506836,
      "grad_norm": 2.9375,
      "learning_rate": 1.060590719928555e-05,
      "loss": 0.7985,
      "step": 692570
    },
    {
      "epoch": 2.427320232575256,
      "grad_norm": 3.21875,
      "learning_rate": 1.0605258170621848e-05,
      "loss": 0.8342,
      "step": 692580
    },
    {
      "epoch": 2.427355280082151,
      "grad_norm": 2.703125,
      "learning_rate": 1.0604609141958146e-05,
      "loss": 0.7846,
      "step": 692590
    },
    {
      "epoch": 2.427390327589047,
      "grad_norm": 2.734375,
      "learning_rate": 1.0603960113294444e-05,
      "loss": 0.8374,
      "step": 692600
    },
    {
      "epoch": 2.4274253750959427,
      "grad_norm": 2.71875,
      "learning_rate": 1.0603311084630742e-05,
      "loss": 0.839,
      "step": 692610
    },
    {
      "epoch": 2.427460422602838,
      "grad_norm": 3.171875,
      "learning_rate": 1.060266205596704e-05,
      "loss": 0.8633,
      "step": 692620
    },
    {
      "epoch": 2.4274954701097338,
      "grad_norm": 2.8125,
      "learning_rate": 1.0602013027303338e-05,
      "loss": 0.8183,
      "step": 692630
    },
    {
      "epoch": 2.4275305176166295,
      "grad_norm": 2.953125,
      "learning_rate": 1.0601363998639636e-05,
      "loss": 0.7382,
      "step": 692640
    },
    {
      "epoch": 2.427565565123525,
      "grad_norm": 2.734375,
      "learning_rate": 1.0600714969975934e-05,
      "loss": 0.8152,
      "step": 692650
    },
    {
      "epoch": 2.4276006126304206,
      "grad_norm": 2.9375,
      "learning_rate": 1.0600065941312234e-05,
      "loss": 0.7683,
      "step": 692660
    },
    {
      "epoch": 2.427635660137316,
      "grad_norm": 3.1875,
      "learning_rate": 1.0599416912648532e-05,
      "loss": 0.8126,
      "step": 692670
    },
    {
      "epoch": 2.4276707076442117,
      "grad_norm": 2.609375,
      "learning_rate": 1.059876788398483e-05,
      "loss": 0.7769,
      "step": 692680
    },
    {
      "epoch": 2.4277057551511074,
      "grad_norm": 2.828125,
      "learning_rate": 1.0598118855321126e-05,
      "loss": 0.7731,
      "step": 692690
    },
    {
      "epoch": 2.4277408026580027,
      "grad_norm": 3.28125,
      "learning_rate": 1.0597469826657424e-05,
      "loss": 0.9034,
      "step": 692700
    },
    {
      "epoch": 2.4277758501648985,
      "grad_norm": 3.03125,
      "learning_rate": 1.0596820797993724e-05,
      "loss": 0.7739,
      "step": 692710
    },
    {
      "epoch": 2.4278108976717943,
      "grad_norm": 2.84375,
      "learning_rate": 1.0596171769330022e-05,
      "loss": 0.7509,
      "step": 692720
    },
    {
      "epoch": 2.4278459451786896,
      "grad_norm": 2.828125,
      "learning_rate": 1.059552274066632e-05,
      "loss": 0.8062,
      "step": 692730
    },
    {
      "epoch": 2.4278809926855853,
      "grad_norm": 2.5625,
      "learning_rate": 1.0594873712002618e-05,
      "loss": 0.8207,
      "step": 692740
    },
    {
      "epoch": 2.427916040192481,
      "grad_norm": 3.53125,
      "learning_rate": 1.0594224683338916e-05,
      "loss": 0.7777,
      "step": 692750
    },
    {
      "epoch": 2.4279510876993764,
      "grad_norm": 2.890625,
      "learning_rate": 1.0593575654675214e-05,
      "loss": 0.809,
      "step": 692760
    },
    {
      "epoch": 2.427986135206272,
      "grad_norm": 2.953125,
      "learning_rate": 1.0592926626011512e-05,
      "loss": 0.8236,
      "step": 692770
    },
    {
      "epoch": 2.4280211827131675,
      "grad_norm": 3.171875,
      "learning_rate": 1.059227759734781e-05,
      "loss": 0.7494,
      "step": 692780
    },
    {
      "epoch": 2.4280562302200632,
      "grad_norm": 2.453125,
      "learning_rate": 1.0591628568684108e-05,
      "loss": 0.786,
      "step": 692790
    },
    {
      "epoch": 2.428091277726959,
      "grad_norm": 2.921875,
      "learning_rate": 1.0590979540020406e-05,
      "loss": 0.7675,
      "step": 692800
    },
    {
      "epoch": 2.4281263252338543,
      "grad_norm": 2.875,
      "learning_rate": 1.0590330511356704e-05,
      "loss": 0.8385,
      "step": 692810
    },
    {
      "epoch": 2.42816137274075,
      "grad_norm": 2.828125,
      "learning_rate": 1.0589681482693002e-05,
      "loss": 0.7662,
      "step": 692820
    },
    {
      "epoch": 2.428196420247646,
      "grad_norm": 3.5,
      "learning_rate": 1.05890324540293e-05,
      "loss": 0.8982,
      "step": 692830
    },
    {
      "epoch": 2.428231467754541,
      "grad_norm": 3.296875,
      "learning_rate": 1.05883834253656e-05,
      "loss": 0.7948,
      "step": 692840
    },
    {
      "epoch": 2.428266515261437,
      "grad_norm": 3.28125,
      "learning_rate": 1.0587734396701897e-05,
      "loss": 0.8321,
      "step": 692850
    },
    {
      "epoch": 2.4283015627683326,
      "grad_norm": 2.90625,
      "learning_rate": 1.0587085368038195e-05,
      "loss": 0.8332,
      "step": 692860
    },
    {
      "epoch": 2.428336610275228,
      "grad_norm": 2.78125,
      "learning_rate": 1.0586436339374492e-05,
      "loss": 0.8049,
      "step": 692870
    },
    {
      "epoch": 2.4283716577821237,
      "grad_norm": 2.703125,
      "learning_rate": 1.058578731071079e-05,
      "loss": 0.7063,
      "step": 692880
    },
    {
      "epoch": 2.428406705289019,
      "grad_norm": 3.15625,
      "learning_rate": 1.0585138282047088e-05,
      "loss": 0.8912,
      "step": 692890
    },
    {
      "epoch": 2.428441752795915,
      "grad_norm": 2.46875,
      "learning_rate": 1.0584489253383387e-05,
      "loss": 0.8815,
      "step": 692900
    },
    {
      "epoch": 2.4284768003028105,
      "grad_norm": 3.140625,
      "learning_rate": 1.0583840224719685e-05,
      "loss": 0.7896,
      "step": 692910
    },
    {
      "epoch": 2.4285118478097063,
      "grad_norm": 2.96875,
      "learning_rate": 1.0583191196055983e-05,
      "loss": 0.8019,
      "step": 692920
    },
    {
      "epoch": 2.4285468953166016,
      "grad_norm": 2.84375,
      "learning_rate": 1.0582542167392281e-05,
      "loss": 0.7873,
      "step": 692930
    },
    {
      "epoch": 2.4285819428234974,
      "grad_norm": 3.421875,
      "learning_rate": 1.058189313872858e-05,
      "loss": 0.8971,
      "step": 692940
    },
    {
      "epoch": 2.4286169903303927,
      "grad_norm": 2.96875,
      "learning_rate": 1.0581244110064877e-05,
      "loss": 0.8307,
      "step": 692950
    },
    {
      "epoch": 2.4286520378372884,
      "grad_norm": 2.875,
      "learning_rate": 1.0580595081401175e-05,
      "loss": 0.8588,
      "step": 692960
    },
    {
      "epoch": 2.428687085344184,
      "grad_norm": 2.875,
      "learning_rate": 1.0579946052737473e-05,
      "loss": 0.8058,
      "step": 692970
    },
    {
      "epoch": 2.4287221328510795,
      "grad_norm": 2.6875,
      "learning_rate": 1.0579297024073771e-05,
      "loss": 0.8344,
      "step": 692980
    },
    {
      "epoch": 2.4287571803579753,
      "grad_norm": 2.9375,
      "learning_rate": 1.057864799541007e-05,
      "loss": 0.789,
      "step": 692990
    },
    {
      "epoch": 2.4287922278648706,
      "grad_norm": 2.78125,
      "learning_rate": 1.0577998966746367e-05,
      "loss": 0.7672,
      "step": 693000
    },
    {
      "epoch": 2.4288272753717663,
      "grad_norm": 2.96875,
      "learning_rate": 1.0577349938082665e-05,
      "loss": 0.8903,
      "step": 693010
    },
    {
      "epoch": 2.428862322878662,
      "grad_norm": 2.859375,
      "learning_rate": 1.0576700909418965e-05,
      "loss": 0.8083,
      "step": 693020
    },
    {
      "epoch": 2.428897370385558,
      "grad_norm": 3.109375,
      "learning_rate": 1.0576051880755263e-05,
      "loss": 0.8129,
      "step": 693030
    },
    {
      "epoch": 2.428932417892453,
      "grad_norm": 2.90625,
      "learning_rate": 1.0575402852091561e-05,
      "loss": 0.7407,
      "step": 693040
    },
    {
      "epoch": 2.428967465399349,
      "grad_norm": 3.390625,
      "learning_rate": 1.0574753823427859e-05,
      "loss": 0.8521,
      "step": 693050
    },
    {
      "epoch": 2.4290025129062442,
      "grad_norm": 2.921875,
      "learning_rate": 1.0574104794764155e-05,
      "loss": 0.8487,
      "step": 693060
    },
    {
      "epoch": 2.42903756041314,
      "grad_norm": 3.328125,
      "learning_rate": 1.0573455766100453e-05,
      "loss": 0.8118,
      "step": 693070
    },
    {
      "epoch": 2.4290726079200358,
      "grad_norm": 2.984375,
      "learning_rate": 1.0572806737436753e-05,
      "loss": 0.7703,
      "step": 693080
    },
    {
      "epoch": 2.429107655426931,
      "grad_norm": 2.796875,
      "learning_rate": 1.0572157708773051e-05,
      "loss": 0.8304,
      "step": 693090
    },
    {
      "epoch": 2.429142702933827,
      "grad_norm": 2.984375,
      "learning_rate": 1.0571508680109349e-05,
      "loss": 0.7737,
      "step": 693100
    },
    {
      "epoch": 2.429177750440722,
      "grad_norm": 2.640625,
      "learning_rate": 1.0570859651445647e-05,
      "loss": 0.7885,
      "step": 693110
    },
    {
      "epoch": 2.429212797947618,
      "grad_norm": 2.453125,
      "learning_rate": 1.0570210622781945e-05,
      "loss": 0.799,
      "step": 693120
    },
    {
      "epoch": 2.4292478454545137,
      "grad_norm": 3.390625,
      "learning_rate": 1.0569561594118243e-05,
      "loss": 0.8613,
      "step": 693130
    },
    {
      "epoch": 2.4292828929614094,
      "grad_norm": 2.484375,
      "learning_rate": 1.0568912565454541e-05,
      "loss": 0.8579,
      "step": 693140
    },
    {
      "epoch": 2.4293179404683047,
      "grad_norm": 2.625,
      "learning_rate": 1.056826353679084e-05,
      "loss": 0.7162,
      "step": 693150
    },
    {
      "epoch": 2.4293529879752005,
      "grad_norm": 2.828125,
      "learning_rate": 1.0567614508127137e-05,
      "loss": 0.8128,
      "step": 693160
    },
    {
      "epoch": 2.429388035482096,
      "grad_norm": 2.53125,
      "learning_rate": 1.0566965479463435e-05,
      "loss": 0.7445,
      "step": 693170
    },
    {
      "epoch": 2.4294230829889916,
      "grad_norm": 2.875,
      "learning_rate": 1.0566316450799733e-05,
      "loss": 0.8591,
      "step": 693180
    },
    {
      "epoch": 2.4294581304958873,
      "grad_norm": 2.90625,
      "learning_rate": 1.0565667422136031e-05,
      "loss": 0.7287,
      "step": 693190
    },
    {
      "epoch": 2.4294931780027826,
      "grad_norm": 2.859375,
      "learning_rate": 1.0565018393472329e-05,
      "loss": 0.7707,
      "step": 693200
    },
    {
      "epoch": 2.4295282255096784,
      "grad_norm": 2.6875,
      "learning_rate": 1.0564369364808629e-05,
      "loss": 0.7448,
      "step": 693210
    },
    {
      "epoch": 2.429563273016574,
      "grad_norm": 3.296875,
      "learning_rate": 1.0563720336144927e-05,
      "loss": 0.8195,
      "step": 693220
    },
    {
      "epoch": 2.4295983205234695,
      "grad_norm": 3.046875,
      "learning_rate": 1.0563071307481225e-05,
      "loss": 0.824,
      "step": 693230
    },
    {
      "epoch": 2.4296333680303652,
      "grad_norm": 3.265625,
      "learning_rate": 1.0562422278817523e-05,
      "loss": 0.8295,
      "step": 693240
    },
    {
      "epoch": 2.429668415537261,
      "grad_norm": 2.625,
      "learning_rate": 1.0561773250153819e-05,
      "loss": 0.7846,
      "step": 693250
    },
    {
      "epoch": 2.4297034630441563,
      "grad_norm": 3.203125,
      "learning_rate": 1.0561124221490119e-05,
      "loss": 0.8294,
      "step": 693260
    },
    {
      "epoch": 2.429738510551052,
      "grad_norm": 2.84375,
      "learning_rate": 1.0560475192826417e-05,
      "loss": 0.796,
      "step": 693270
    },
    {
      "epoch": 2.4297735580579474,
      "grad_norm": 2.484375,
      "learning_rate": 1.0559826164162715e-05,
      "loss": 0.7643,
      "step": 693280
    },
    {
      "epoch": 2.429808605564843,
      "grad_norm": 3.0,
      "learning_rate": 1.0559177135499013e-05,
      "loss": 0.7723,
      "step": 693290
    },
    {
      "epoch": 2.429843653071739,
      "grad_norm": 2.84375,
      "learning_rate": 1.055852810683531e-05,
      "loss": 0.7853,
      "step": 693300
    },
    {
      "epoch": 2.429878700578634,
      "grad_norm": 2.84375,
      "learning_rate": 1.0557879078171609e-05,
      "loss": 0.7844,
      "step": 693310
    },
    {
      "epoch": 2.42991374808553,
      "grad_norm": 2.65625,
      "learning_rate": 1.0557230049507907e-05,
      "loss": 0.7703,
      "step": 693320
    },
    {
      "epoch": 2.4299487955924257,
      "grad_norm": 2.875,
      "learning_rate": 1.0556581020844206e-05,
      "loss": 0.8243,
      "step": 693330
    },
    {
      "epoch": 2.429983843099321,
      "grad_norm": 2.859375,
      "learning_rate": 1.0555931992180503e-05,
      "loss": 0.845,
      "step": 693340
    },
    {
      "epoch": 2.430018890606217,
      "grad_norm": 3.03125,
      "learning_rate": 1.05552829635168e-05,
      "loss": 0.8405,
      "step": 693350
    },
    {
      "epoch": 2.4300539381131125,
      "grad_norm": 3.15625,
      "learning_rate": 1.0554633934853099e-05,
      "loss": 0.8053,
      "step": 693360
    },
    {
      "epoch": 2.430088985620008,
      "grad_norm": 3.203125,
      "learning_rate": 1.0553984906189397e-05,
      "loss": 0.7932,
      "step": 693370
    },
    {
      "epoch": 2.4301240331269036,
      "grad_norm": 2.703125,
      "learning_rate": 1.0553335877525695e-05,
      "loss": 0.771,
      "step": 693380
    },
    {
      "epoch": 2.430159080633799,
      "grad_norm": 2.4375,
      "learning_rate": 1.0552686848861994e-05,
      "loss": 0.7548,
      "step": 693390
    },
    {
      "epoch": 2.4301941281406947,
      "grad_norm": 2.96875,
      "learning_rate": 1.0552037820198292e-05,
      "loss": 0.8606,
      "step": 693400
    },
    {
      "epoch": 2.4302291756475904,
      "grad_norm": 2.875,
      "learning_rate": 1.055138879153459e-05,
      "loss": 0.7935,
      "step": 693410
    },
    {
      "epoch": 2.4302642231544858,
      "grad_norm": 3.421875,
      "learning_rate": 1.0550739762870888e-05,
      "loss": 0.797,
      "step": 693420
    },
    {
      "epoch": 2.4302992706613815,
      "grad_norm": 2.921875,
      "learning_rate": 1.0550090734207186e-05,
      "loss": 0.761,
      "step": 693430
    },
    {
      "epoch": 2.4303343181682773,
      "grad_norm": 2.78125,
      "learning_rate": 1.0549441705543483e-05,
      "loss": 0.7386,
      "step": 693440
    },
    {
      "epoch": 2.4303693656751726,
      "grad_norm": 2.78125,
      "learning_rate": 1.0548792676879782e-05,
      "loss": 0.8826,
      "step": 693450
    },
    {
      "epoch": 2.4304044131820683,
      "grad_norm": 2.515625,
      "learning_rate": 1.054814364821608e-05,
      "loss": 0.8408,
      "step": 693460
    },
    {
      "epoch": 2.430439460688964,
      "grad_norm": 2.671875,
      "learning_rate": 1.0547494619552378e-05,
      "loss": 0.8078,
      "step": 693470
    },
    {
      "epoch": 2.4304745081958594,
      "grad_norm": 2.859375,
      "learning_rate": 1.0546845590888676e-05,
      "loss": 0.8359,
      "step": 693480
    },
    {
      "epoch": 2.430509555702755,
      "grad_norm": 2.84375,
      "learning_rate": 1.0546196562224974e-05,
      "loss": 0.8409,
      "step": 693490
    },
    {
      "epoch": 2.4305446032096505,
      "grad_norm": 2.765625,
      "learning_rate": 1.0545547533561272e-05,
      "loss": 0.7522,
      "step": 693500
    },
    {
      "epoch": 2.4305796507165462,
      "grad_norm": 3.203125,
      "learning_rate": 1.054489850489757e-05,
      "loss": 0.7755,
      "step": 693510
    },
    {
      "epoch": 2.430614698223442,
      "grad_norm": 2.9375,
      "learning_rate": 1.054424947623387e-05,
      "loss": 0.7874,
      "step": 693520
    },
    {
      "epoch": 2.4306497457303373,
      "grad_norm": 2.484375,
      "learning_rate": 1.0543600447570166e-05,
      "loss": 0.8216,
      "step": 693530
    },
    {
      "epoch": 2.430684793237233,
      "grad_norm": 2.859375,
      "learning_rate": 1.0542951418906464e-05,
      "loss": 0.885,
      "step": 693540
    },
    {
      "epoch": 2.430719840744129,
      "grad_norm": 3.15625,
      "learning_rate": 1.0542302390242762e-05,
      "loss": 0.7673,
      "step": 693550
    },
    {
      "epoch": 2.430754888251024,
      "grad_norm": 2.9375,
      "learning_rate": 1.054165336157906e-05,
      "loss": 0.7904,
      "step": 693560
    },
    {
      "epoch": 2.43078993575792,
      "grad_norm": 3.25,
      "learning_rate": 1.054100433291536e-05,
      "loss": 0.8308,
      "step": 693570
    },
    {
      "epoch": 2.4308249832648157,
      "grad_norm": 3.0,
      "learning_rate": 1.0540355304251658e-05,
      "loss": 0.8709,
      "step": 693580
    },
    {
      "epoch": 2.430860030771711,
      "grad_norm": 2.75,
      "learning_rate": 1.0539706275587956e-05,
      "loss": 0.8141,
      "step": 693590
    },
    {
      "epoch": 2.4308950782786067,
      "grad_norm": 2.625,
      "learning_rate": 1.0539057246924254e-05,
      "loss": 0.7606,
      "step": 693600
    },
    {
      "epoch": 2.430930125785502,
      "grad_norm": 2.515625,
      "learning_rate": 1.0538408218260552e-05,
      "loss": 0.7822,
      "step": 693610
    },
    {
      "epoch": 2.430965173292398,
      "grad_norm": 2.65625,
      "learning_rate": 1.053775918959685e-05,
      "loss": 0.7915,
      "step": 693620
    },
    {
      "epoch": 2.4310002207992936,
      "grad_norm": 3.109375,
      "learning_rate": 1.0537110160933148e-05,
      "loss": 0.734,
      "step": 693630
    },
    {
      "epoch": 2.431035268306189,
      "grad_norm": 2.40625,
      "learning_rate": 1.0536461132269446e-05,
      "loss": 0.764,
      "step": 693640
    },
    {
      "epoch": 2.4310703158130846,
      "grad_norm": 2.859375,
      "learning_rate": 1.0535812103605744e-05,
      "loss": 0.8107,
      "step": 693650
    },
    {
      "epoch": 2.4311053633199804,
      "grad_norm": 3.28125,
      "learning_rate": 1.0535163074942042e-05,
      "loss": 0.8088,
      "step": 693660
    },
    {
      "epoch": 2.4311404108268757,
      "grad_norm": 2.5,
      "learning_rate": 1.053451404627834e-05,
      "loss": 0.8082,
      "step": 693670
    },
    {
      "epoch": 2.4311754583337715,
      "grad_norm": 3.0625,
      "learning_rate": 1.0533865017614638e-05,
      "loss": 0.8261,
      "step": 693680
    },
    {
      "epoch": 2.4312105058406672,
      "grad_norm": 2.515625,
      "learning_rate": 1.0533215988950936e-05,
      "loss": 0.7367,
      "step": 693690
    },
    {
      "epoch": 2.4312455533475625,
      "grad_norm": 2.921875,
      "learning_rate": 1.0532566960287236e-05,
      "loss": 0.8317,
      "step": 693700
    },
    {
      "epoch": 2.4312806008544583,
      "grad_norm": 2.5625,
      "learning_rate": 1.0531917931623534e-05,
      "loss": 0.8684,
      "step": 693710
    },
    {
      "epoch": 2.4313156483613536,
      "grad_norm": 2.609375,
      "learning_rate": 1.053126890295983e-05,
      "loss": 0.8304,
      "step": 693720
    },
    {
      "epoch": 2.4313506958682494,
      "grad_norm": 2.921875,
      "learning_rate": 1.0530619874296128e-05,
      "loss": 0.8793,
      "step": 693730
    },
    {
      "epoch": 2.431385743375145,
      "grad_norm": 2.5625,
      "learning_rate": 1.0529970845632426e-05,
      "loss": 0.7129,
      "step": 693740
    },
    {
      "epoch": 2.4314207908820404,
      "grad_norm": 2.78125,
      "learning_rate": 1.0529321816968724e-05,
      "loss": 0.8572,
      "step": 693750
    },
    {
      "epoch": 2.431455838388936,
      "grad_norm": 3.015625,
      "learning_rate": 1.0528672788305024e-05,
      "loss": 0.8134,
      "step": 693760
    },
    {
      "epoch": 2.431490885895832,
      "grad_norm": 3.234375,
      "learning_rate": 1.0528023759641322e-05,
      "loss": 0.7616,
      "step": 693770
    },
    {
      "epoch": 2.4315259334027273,
      "grad_norm": 2.5625,
      "learning_rate": 1.052737473097762e-05,
      "loss": 0.7749,
      "step": 693780
    },
    {
      "epoch": 2.431560980909623,
      "grad_norm": 3.046875,
      "learning_rate": 1.0526725702313918e-05,
      "loss": 0.8496,
      "step": 693790
    },
    {
      "epoch": 2.431596028416519,
      "grad_norm": 2.6875,
      "learning_rate": 1.0526076673650216e-05,
      "loss": 0.8131,
      "step": 693800
    },
    {
      "epoch": 2.431631075923414,
      "grad_norm": 2.828125,
      "learning_rate": 1.0525427644986514e-05,
      "loss": 0.7799,
      "step": 693810
    },
    {
      "epoch": 2.43166612343031,
      "grad_norm": 3.015625,
      "learning_rate": 1.0524778616322812e-05,
      "loss": 0.7715,
      "step": 693820
    },
    {
      "epoch": 2.431701170937205,
      "grad_norm": 2.984375,
      "learning_rate": 1.052412958765911e-05,
      "loss": 0.772,
      "step": 693830
    },
    {
      "epoch": 2.431736218444101,
      "grad_norm": 2.765625,
      "learning_rate": 1.0523480558995408e-05,
      "loss": 0.8199,
      "step": 693840
    },
    {
      "epoch": 2.4317712659509967,
      "grad_norm": 3.484375,
      "learning_rate": 1.0522831530331706e-05,
      "loss": 0.8534,
      "step": 693850
    },
    {
      "epoch": 2.431806313457892,
      "grad_norm": 2.90625,
      "learning_rate": 1.0522182501668004e-05,
      "loss": 0.7339,
      "step": 693860
    },
    {
      "epoch": 2.4318413609647878,
      "grad_norm": 3.0,
      "learning_rate": 1.0521533473004302e-05,
      "loss": 0.855,
      "step": 693870
    },
    {
      "epoch": 2.4318764084716835,
      "grad_norm": 2.5625,
      "learning_rate": 1.0520884444340602e-05,
      "loss": 0.87,
      "step": 693880
    },
    {
      "epoch": 2.431911455978579,
      "grad_norm": 2.796875,
      "learning_rate": 1.05202354156769e-05,
      "loss": 0.8341,
      "step": 693890
    },
    {
      "epoch": 2.4319465034854746,
      "grad_norm": 3.03125,
      "learning_rate": 1.0519586387013198e-05,
      "loss": 0.771,
      "step": 693900
    },
    {
      "epoch": 2.4319815509923703,
      "grad_norm": 2.671875,
      "learning_rate": 1.0518937358349494e-05,
      "loss": 0.7816,
      "step": 693910
    },
    {
      "epoch": 2.4320165984992657,
      "grad_norm": 2.734375,
      "learning_rate": 1.0518288329685792e-05,
      "loss": 0.8445,
      "step": 693920
    },
    {
      "epoch": 2.4320516460061614,
      "grad_norm": 3.046875,
      "learning_rate": 1.051763930102209e-05,
      "loss": 0.8458,
      "step": 693930
    },
    {
      "epoch": 2.4320866935130567,
      "grad_norm": 3.125,
      "learning_rate": 1.051699027235839e-05,
      "loss": 0.846,
      "step": 693940
    },
    {
      "epoch": 2.4321217410199525,
      "grad_norm": 3.453125,
      "learning_rate": 1.0516341243694688e-05,
      "loss": 0.8422,
      "step": 693950
    },
    {
      "epoch": 2.4321567885268482,
      "grad_norm": 3.015625,
      "learning_rate": 1.0515692215030986e-05,
      "loss": 0.8139,
      "step": 693960
    },
    {
      "epoch": 2.4321918360337436,
      "grad_norm": 3.328125,
      "learning_rate": 1.0515043186367284e-05,
      "loss": 0.783,
      "step": 693970
    },
    {
      "epoch": 2.4322268835406393,
      "grad_norm": 3.078125,
      "learning_rate": 1.0514394157703582e-05,
      "loss": 0.7825,
      "step": 693980
    },
    {
      "epoch": 2.432261931047535,
      "grad_norm": 3.125,
      "learning_rate": 1.051374512903988e-05,
      "loss": 0.8148,
      "step": 693990
    },
    {
      "epoch": 2.4322969785544304,
      "grad_norm": 2.953125,
      "learning_rate": 1.0513096100376178e-05,
      "loss": 0.8222,
      "step": 694000
    },
    {
      "epoch": 2.432332026061326,
      "grad_norm": 2.828125,
      "learning_rate": 1.0512447071712476e-05,
      "loss": 0.8395,
      "step": 694010
    },
    {
      "epoch": 2.432367073568222,
      "grad_norm": 2.734375,
      "learning_rate": 1.0511798043048774e-05,
      "loss": 0.8598,
      "step": 694020
    },
    {
      "epoch": 2.432402121075117,
      "grad_norm": 3.21875,
      "learning_rate": 1.0511149014385072e-05,
      "loss": 0.7732,
      "step": 694030
    },
    {
      "epoch": 2.432437168582013,
      "grad_norm": 2.8125,
      "learning_rate": 1.051049998572137e-05,
      "loss": 0.8188,
      "step": 694040
    },
    {
      "epoch": 2.4324722160889083,
      "grad_norm": 3.09375,
      "learning_rate": 1.0509850957057668e-05,
      "loss": 0.8845,
      "step": 694050
    },
    {
      "epoch": 2.432507263595804,
      "grad_norm": 3.4375,
      "learning_rate": 1.0509201928393966e-05,
      "loss": 0.8806,
      "step": 694060
    },
    {
      "epoch": 2.4325423111027,
      "grad_norm": 3.40625,
      "learning_rate": 1.0508552899730265e-05,
      "loss": 0.8056,
      "step": 694070
    },
    {
      "epoch": 2.432577358609595,
      "grad_norm": 2.953125,
      "learning_rate": 1.0507903871066563e-05,
      "loss": 0.8267,
      "step": 694080
    },
    {
      "epoch": 2.432612406116491,
      "grad_norm": 2.828125,
      "learning_rate": 1.0507254842402861e-05,
      "loss": 0.7694,
      "step": 694090
    },
    {
      "epoch": 2.4326474536233866,
      "grad_norm": 2.796875,
      "learning_rate": 1.0506605813739158e-05,
      "loss": 0.7604,
      "step": 694100
    },
    {
      "epoch": 2.432682501130282,
      "grad_norm": 3.234375,
      "learning_rate": 1.0505956785075456e-05,
      "loss": 0.7854,
      "step": 694110
    },
    {
      "epoch": 2.4327175486371777,
      "grad_norm": 3.0625,
      "learning_rate": 1.0505307756411755e-05,
      "loss": 0.7532,
      "step": 694120
    },
    {
      "epoch": 2.4327525961440735,
      "grad_norm": 2.484375,
      "learning_rate": 1.0504658727748053e-05,
      "loss": 0.8348,
      "step": 694130
    },
    {
      "epoch": 2.432787643650969,
      "grad_norm": 2.65625,
      "learning_rate": 1.0504009699084351e-05,
      "loss": 0.7479,
      "step": 694140
    },
    {
      "epoch": 2.4328226911578645,
      "grad_norm": 2.78125,
      "learning_rate": 1.050336067042065e-05,
      "loss": 0.7182,
      "step": 694150
    },
    {
      "epoch": 2.43285773866476,
      "grad_norm": 2.9375,
      "learning_rate": 1.0502711641756947e-05,
      "loss": 0.7876,
      "step": 694160
    },
    {
      "epoch": 2.4328927861716556,
      "grad_norm": 2.640625,
      "learning_rate": 1.0502062613093245e-05,
      "loss": 0.8156,
      "step": 694170
    },
    {
      "epoch": 2.4329278336785514,
      "grad_norm": 3.09375,
      "learning_rate": 1.0501413584429543e-05,
      "loss": 0.8015,
      "step": 694180
    },
    {
      "epoch": 2.4329628811854467,
      "grad_norm": 3.125,
      "learning_rate": 1.0500764555765841e-05,
      "loss": 0.8263,
      "step": 694190
    },
    {
      "epoch": 2.4329979286923424,
      "grad_norm": 2.515625,
      "learning_rate": 1.050011552710214e-05,
      "loss": 0.8352,
      "step": 694200
    },
    {
      "epoch": 2.433032976199238,
      "grad_norm": 2.875,
      "learning_rate": 1.0499466498438437e-05,
      "loss": 0.9011,
      "step": 694210
    },
    {
      "epoch": 2.4330680237061335,
      "grad_norm": 3.296875,
      "learning_rate": 1.0498817469774735e-05,
      "loss": 0.8943,
      "step": 694220
    },
    {
      "epoch": 2.4331030712130293,
      "grad_norm": 2.9375,
      "learning_rate": 1.0498168441111033e-05,
      "loss": 0.767,
      "step": 694230
    },
    {
      "epoch": 2.433138118719925,
      "grad_norm": 2.75,
      "learning_rate": 1.0497519412447331e-05,
      "loss": 0.8259,
      "step": 694240
    },
    {
      "epoch": 2.4331731662268203,
      "grad_norm": 3.171875,
      "learning_rate": 1.0496870383783631e-05,
      "loss": 0.8484,
      "step": 694250
    },
    {
      "epoch": 2.433208213733716,
      "grad_norm": 3.015625,
      "learning_rate": 1.0496221355119929e-05,
      "loss": 0.7695,
      "step": 694260
    },
    {
      "epoch": 2.4332432612406114,
      "grad_norm": 2.96875,
      "learning_rate": 1.0495572326456227e-05,
      "loss": 0.732,
      "step": 694270
    },
    {
      "epoch": 2.433278308747507,
      "grad_norm": 2.984375,
      "learning_rate": 1.0494923297792523e-05,
      "loss": 0.8186,
      "step": 694280
    },
    {
      "epoch": 2.433313356254403,
      "grad_norm": 2.875,
      "learning_rate": 1.0494274269128821e-05,
      "loss": 0.8274,
      "step": 694290
    },
    {
      "epoch": 2.4333484037612987,
      "grad_norm": 2.625,
      "learning_rate": 1.049362524046512e-05,
      "loss": 0.7653,
      "step": 694300
    },
    {
      "epoch": 2.433383451268194,
      "grad_norm": 3.671875,
      "learning_rate": 1.0492976211801419e-05,
      "loss": 0.8541,
      "step": 694310
    },
    {
      "epoch": 2.4334184987750898,
      "grad_norm": 3.125,
      "learning_rate": 1.0492327183137717e-05,
      "loss": 0.7691,
      "step": 694320
    },
    {
      "epoch": 2.433453546281985,
      "grad_norm": 3.15625,
      "learning_rate": 1.0491678154474015e-05,
      "loss": 0.7531,
      "step": 694330
    },
    {
      "epoch": 2.433488593788881,
      "grad_norm": 2.984375,
      "learning_rate": 1.0491029125810313e-05,
      "loss": 0.7676,
      "step": 694340
    },
    {
      "epoch": 2.4335236412957766,
      "grad_norm": 2.75,
      "learning_rate": 1.0490380097146611e-05,
      "loss": 0.8067,
      "step": 694350
    },
    {
      "epoch": 2.433558688802672,
      "grad_norm": 3.046875,
      "learning_rate": 1.0489731068482909e-05,
      "loss": 0.8052,
      "step": 694360
    },
    {
      "epoch": 2.4335937363095677,
      "grad_norm": 2.796875,
      "learning_rate": 1.0489082039819207e-05,
      "loss": 0.8833,
      "step": 694370
    },
    {
      "epoch": 2.433628783816463,
      "grad_norm": 3.0625,
      "learning_rate": 1.0488433011155505e-05,
      "loss": 0.8425,
      "step": 694380
    },
    {
      "epoch": 2.4336638313233587,
      "grad_norm": 3.125,
      "learning_rate": 1.0487783982491803e-05,
      "loss": 0.8107,
      "step": 694390
    },
    {
      "epoch": 2.4336988788302545,
      "grad_norm": 3.21875,
      "learning_rate": 1.0487134953828101e-05,
      "loss": 0.8253,
      "step": 694400
    },
    {
      "epoch": 2.4337339263371502,
      "grad_norm": 3.03125,
      "learning_rate": 1.0486485925164399e-05,
      "loss": 0.8598,
      "step": 694410
    },
    {
      "epoch": 2.4337689738440456,
      "grad_norm": 3.046875,
      "learning_rate": 1.0485836896500697e-05,
      "loss": 0.8207,
      "step": 694420
    },
    {
      "epoch": 2.4338040213509413,
      "grad_norm": 2.8125,
      "learning_rate": 1.0485187867836997e-05,
      "loss": 0.7666,
      "step": 694430
    },
    {
      "epoch": 2.4338390688578366,
      "grad_norm": 3.09375,
      "learning_rate": 1.0484538839173295e-05,
      "loss": 0.7892,
      "step": 694440
    },
    {
      "epoch": 2.4338741163647324,
      "grad_norm": 3.484375,
      "learning_rate": 1.0483889810509593e-05,
      "loss": 0.8576,
      "step": 694450
    },
    {
      "epoch": 2.433909163871628,
      "grad_norm": 2.359375,
      "learning_rate": 1.048324078184589e-05,
      "loss": 0.7672,
      "step": 694460
    },
    {
      "epoch": 2.4339442113785235,
      "grad_norm": 2.765625,
      "learning_rate": 1.0482591753182187e-05,
      "loss": 0.7707,
      "step": 694470
    },
    {
      "epoch": 2.433979258885419,
      "grad_norm": 2.71875,
      "learning_rate": 1.0481942724518485e-05,
      "loss": 0.7487,
      "step": 694480
    },
    {
      "epoch": 2.434014306392315,
      "grad_norm": 2.71875,
      "learning_rate": 1.0481293695854785e-05,
      "loss": 0.7313,
      "step": 694490
    },
    {
      "epoch": 2.4340493538992103,
      "grad_norm": 3.125,
      "learning_rate": 1.0480644667191083e-05,
      "loss": 0.8268,
      "step": 694500
    },
    {
      "epoch": 2.434084401406106,
      "grad_norm": 3.0625,
      "learning_rate": 1.047999563852738e-05,
      "loss": 0.876,
      "step": 694510
    },
    {
      "epoch": 2.434119448913002,
      "grad_norm": 2.9375,
      "learning_rate": 1.0479346609863679e-05,
      "loss": 0.7623,
      "step": 694520
    },
    {
      "epoch": 2.434154496419897,
      "grad_norm": 2.609375,
      "learning_rate": 1.0478697581199977e-05,
      "loss": 0.7808,
      "step": 694530
    },
    {
      "epoch": 2.434189543926793,
      "grad_norm": 3.109375,
      "learning_rate": 1.0478048552536275e-05,
      "loss": 0.857,
      "step": 694540
    },
    {
      "epoch": 2.434224591433688,
      "grad_norm": 2.671875,
      "learning_rate": 1.0477399523872573e-05,
      "loss": 0.8428,
      "step": 694550
    },
    {
      "epoch": 2.434259638940584,
      "grad_norm": 3.03125,
      "learning_rate": 1.0476750495208872e-05,
      "loss": 0.8161,
      "step": 694560
    },
    {
      "epoch": 2.4342946864474797,
      "grad_norm": 2.796875,
      "learning_rate": 1.0476101466545169e-05,
      "loss": 0.8602,
      "step": 694570
    },
    {
      "epoch": 2.434329733954375,
      "grad_norm": 2.671875,
      "learning_rate": 1.0475452437881467e-05,
      "loss": 0.8948,
      "step": 694580
    },
    {
      "epoch": 2.4343647814612708,
      "grad_norm": 2.703125,
      "learning_rate": 1.0474803409217765e-05,
      "loss": 0.7882,
      "step": 694590
    },
    {
      "epoch": 2.4343998289681665,
      "grad_norm": 2.9375,
      "learning_rate": 1.0474154380554063e-05,
      "loss": 0.7873,
      "step": 694600
    },
    {
      "epoch": 2.434434876475062,
      "grad_norm": 2.703125,
      "learning_rate": 1.047350535189036e-05,
      "loss": 0.8358,
      "step": 694610
    },
    {
      "epoch": 2.4344699239819576,
      "grad_norm": 2.78125,
      "learning_rate": 1.047285632322666e-05,
      "loss": 0.7919,
      "step": 694620
    },
    {
      "epoch": 2.4345049714888534,
      "grad_norm": 3.140625,
      "learning_rate": 1.0472207294562958e-05,
      "loss": 0.8146,
      "step": 694630
    },
    {
      "epoch": 2.4345400189957487,
      "grad_norm": 2.953125,
      "learning_rate": 1.0471558265899256e-05,
      "loss": 0.8477,
      "step": 694640
    },
    {
      "epoch": 2.4345750665026444,
      "grad_norm": 3.125,
      "learning_rate": 1.0470909237235554e-05,
      "loss": 0.8187,
      "step": 694650
    },
    {
      "epoch": 2.4346101140095397,
      "grad_norm": 3.03125,
      "learning_rate": 1.047026020857185e-05,
      "loss": 0.7809,
      "step": 694660
    },
    {
      "epoch": 2.4346451615164355,
      "grad_norm": 2.625,
      "learning_rate": 1.046961117990815e-05,
      "loss": 0.7759,
      "step": 694670
    },
    {
      "epoch": 2.4346802090233313,
      "grad_norm": 3.0625,
      "learning_rate": 1.0468962151244448e-05,
      "loss": 0.8553,
      "step": 694680
    },
    {
      "epoch": 2.4347152565302266,
      "grad_norm": 2.78125,
      "learning_rate": 1.0468313122580746e-05,
      "loss": 0.8545,
      "step": 694690
    },
    {
      "epoch": 2.4347503040371223,
      "grad_norm": 2.8125,
      "learning_rate": 1.0467664093917044e-05,
      "loss": 0.7741,
      "step": 694700
    },
    {
      "epoch": 2.434785351544018,
      "grad_norm": 3.28125,
      "learning_rate": 1.0467015065253342e-05,
      "loss": 0.7579,
      "step": 694710
    },
    {
      "epoch": 2.4348203990509134,
      "grad_norm": 3.171875,
      "learning_rate": 1.046636603658964e-05,
      "loss": 0.8239,
      "step": 694720
    },
    {
      "epoch": 2.434855446557809,
      "grad_norm": 2.90625,
      "learning_rate": 1.0465717007925938e-05,
      "loss": 0.8383,
      "step": 694730
    },
    {
      "epoch": 2.434890494064705,
      "grad_norm": 3.21875,
      "learning_rate": 1.0465067979262238e-05,
      "loss": 0.7815,
      "step": 694740
    },
    {
      "epoch": 2.4349255415716002,
      "grad_norm": 2.84375,
      "learning_rate": 1.0464418950598534e-05,
      "loss": 0.8792,
      "step": 694750
    },
    {
      "epoch": 2.434960589078496,
      "grad_norm": 3.0625,
      "learning_rate": 1.0463769921934832e-05,
      "loss": 0.744,
      "step": 694760
    },
    {
      "epoch": 2.4349956365853913,
      "grad_norm": 3.109375,
      "learning_rate": 1.046312089327113e-05,
      "loss": 0.8068,
      "step": 694770
    },
    {
      "epoch": 2.435030684092287,
      "grad_norm": 2.578125,
      "learning_rate": 1.0462471864607428e-05,
      "loss": 0.7526,
      "step": 694780
    },
    {
      "epoch": 2.435065731599183,
      "grad_norm": 3.078125,
      "learning_rate": 1.0461822835943726e-05,
      "loss": 0.7894,
      "step": 694790
    },
    {
      "epoch": 2.435100779106078,
      "grad_norm": 3.0,
      "learning_rate": 1.0461173807280026e-05,
      "loss": 0.7971,
      "step": 694800
    },
    {
      "epoch": 2.435135826612974,
      "grad_norm": 2.28125,
      "learning_rate": 1.0460524778616324e-05,
      "loss": 0.8218,
      "step": 694810
    },
    {
      "epoch": 2.4351708741198697,
      "grad_norm": 2.765625,
      "learning_rate": 1.0459875749952622e-05,
      "loss": 0.7645,
      "step": 694820
    },
    {
      "epoch": 2.435205921626765,
      "grad_norm": 2.828125,
      "learning_rate": 1.045922672128892e-05,
      "loss": 0.8329,
      "step": 694830
    },
    {
      "epoch": 2.4352409691336607,
      "grad_norm": 2.734375,
      "learning_rate": 1.0458577692625218e-05,
      "loss": 0.7326,
      "step": 694840
    },
    {
      "epoch": 2.4352760166405565,
      "grad_norm": 2.703125,
      "learning_rate": 1.0457928663961514e-05,
      "loss": 0.7855,
      "step": 694850
    },
    {
      "epoch": 2.435311064147452,
      "grad_norm": 2.78125,
      "learning_rate": 1.0457279635297814e-05,
      "loss": 0.8656,
      "step": 694860
    },
    {
      "epoch": 2.4353461116543476,
      "grad_norm": 2.671875,
      "learning_rate": 1.0456630606634112e-05,
      "loss": 0.8971,
      "step": 694870
    },
    {
      "epoch": 2.435381159161243,
      "grad_norm": 3.21875,
      "learning_rate": 1.045598157797041e-05,
      "loss": 0.7883,
      "step": 694880
    },
    {
      "epoch": 2.4354162066681386,
      "grad_norm": 2.90625,
      "learning_rate": 1.0455332549306708e-05,
      "loss": 0.7476,
      "step": 694890
    },
    {
      "epoch": 2.4354512541750344,
      "grad_norm": 2.84375,
      "learning_rate": 1.0454683520643006e-05,
      "loss": 0.8007,
      "step": 694900
    },
    {
      "epoch": 2.4354863016819297,
      "grad_norm": 2.953125,
      "learning_rate": 1.0454034491979304e-05,
      "loss": 0.8289,
      "step": 694910
    },
    {
      "epoch": 2.4355213491888255,
      "grad_norm": 3.703125,
      "learning_rate": 1.0453385463315602e-05,
      "loss": 0.8496,
      "step": 694920
    },
    {
      "epoch": 2.435556396695721,
      "grad_norm": 2.953125,
      "learning_rate": 1.0452736434651902e-05,
      "loss": 0.7682,
      "step": 694930
    },
    {
      "epoch": 2.4355914442026165,
      "grad_norm": 2.703125,
      "learning_rate": 1.0452087405988198e-05,
      "loss": 0.7983,
      "step": 694940
    },
    {
      "epoch": 2.4356264917095123,
      "grad_norm": 3.109375,
      "learning_rate": 1.0451438377324496e-05,
      "loss": 0.7847,
      "step": 694950
    },
    {
      "epoch": 2.435661539216408,
      "grad_norm": 3.359375,
      "learning_rate": 1.0450789348660794e-05,
      "loss": 0.8417,
      "step": 694960
    },
    {
      "epoch": 2.4356965867233034,
      "grad_norm": 2.578125,
      "learning_rate": 1.0450140319997092e-05,
      "loss": 0.7364,
      "step": 694970
    },
    {
      "epoch": 2.435731634230199,
      "grad_norm": 3.140625,
      "learning_rate": 1.0449491291333392e-05,
      "loss": 0.7875,
      "step": 694980
    },
    {
      "epoch": 2.4357666817370944,
      "grad_norm": 2.921875,
      "learning_rate": 1.044884226266969e-05,
      "loss": 0.7896,
      "step": 694990
    },
    {
      "epoch": 2.43580172924399,
      "grad_norm": 3.171875,
      "learning_rate": 1.0448193234005988e-05,
      "loss": 0.7222,
      "step": 695000
    },
    {
      "epoch": 2.43580172924399,
      "eval_loss": 0.7579347491264343,
      "eval_runtime": 554.6689,
      "eval_samples_per_second": 685.879,
      "eval_steps_per_second": 57.157,
      "step": 695000
    },
    {
      "epoch": 2.435836776750886,
      "grad_norm": 3.453125,
      "learning_rate": 1.0447544205342286e-05,
      "loss": 0.7892,
      "step": 695010
    },
    {
      "epoch": 2.4358718242577813,
      "grad_norm": 3.078125,
      "learning_rate": 1.0446895176678584e-05,
      "loss": 0.9236,
      "step": 695020
    },
    {
      "epoch": 2.435906871764677,
      "grad_norm": 2.671875,
      "learning_rate": 1.0446246148014882e-05,
      "loss": 0.767,
      "step": 695030
    },
    {
      "epoch": 2.4359419192715728,
      "grad_norm": 3.046875,
      "learning_rate": 1.044559711935118e-05,
      "loss": 0.8313,
      "step": 695040
    },
    {
      "epoch": 2.435976966778468,
      "grad_norm": 2.703125,
      "learning_rate": 1.0444948090687478e-05,
      "loss": 0.7876,
      "step": 695050
    },
    {
      "epoch": 2.436012014285364,
      "grad_norm": 2.96875,
      "learning_rate": 1.0444299062023776e-05,
      "loss": 0.8015,
      "step": 695060
    },
    {
      "epoch": 2.4360470617922596,
      "grad_norm": 3.359375,
      "learning_rate": 1.0443650033360074e-05,
      "loss": 0.868,
      "step": 695070
    },
    {
      "epoch": 2.436082109299155,
      "grad_norm": 3.171875,
      "learning_rate": 1.0443001004696372e-05,
      "loss": 0.8116,
      "step": 695080
    },
    {
      "epoch": 2.4361171568060507,
      "grad_norm": 2.34375,
      "learning_rate": 1.044235197603267e-05,
      "loss": 0.7814,
      "step": 695090
    },
    {
      "epoch": 2.436152204312946,
      "grad_norm": 2.890625,
      "learning_rate": 1.0441702947368968e-05,
      "loss": 0.838,
      "step": 695100
    },
    {
      "epoch": 2.4361872518198417,
      "grad_norm": 3.328125,
      "learning_rate": 1.0441053918705267e-05,
      "loss": 0.8118,
      "step": 695110
    },
    {
      "epoch": 2.4362222993267375,
      "grad_norm": 3.078125,
      "learning_rate": 1.0440404890041565e-05,
      "loss": 0.8494,
      "step": 695120
    },
    {
      "epoch": 2.436257346833633,
      "grad_norm": 2.90625,
      "learning_rate": 1.0439755861377862e-05,
      "loss": 0.7903,
      "step": 695130
    },
    {
      "epoch": 2.4362923943405286,
      "grad_norm": 2.953125,
      "learning_rate": 1.043910683271416e-05,
      "loss": 0.8665,
      "step": 695140
    },
    {
      "epoch": 2.4363274418474243,
      "grad_norm": 3.46875,
      "learning_rate": 1.0438457804050458e-05,
      "loss": 0.8228,
      "step": 695150
    },
    {
      "epoch": 2.4363624893543196,
      "grad_norm": 2.78125,
      "learning_rate": 1.0437808775386756e-05,
      "loss": 0.8143,
      "step": 695160
    },
    {
      "epoch": 2.4363975368612154,
      "grad_norm": 2.71875,
      "learning_rate": 1.0437159746723055e-05,
      "loss": 0.7292,
      "step": 695170
    },
    {
      "epoch": 2.436432584368111,
      "grad_norm": 3.046875,
      "learning_rate": 1.0436510718059353e-05,
      "loss": 0.8112,
      "step": 695180
    },
    {
      "epoch": 2.4364676318750065,
      "grad_norm": 3.015625,
      "learning_rate": 1.0435861689395651e-05,
      "loss": 0.8944,
      "step": 695190
    },
    {
      "epoch": 2.4365026793819022,
      "grad_norm": 2.59375,
      "learning_rate": 1.043521266073195e-05,
      "loss": 0.718,
      "step": 695200
    },
    {
      "epoch": 2.4365377268887976,
      "grad_norm": 2.984375,
      "learning_rate": 1.0434563632068247e-05,
      "loss": 0.8137,
      "step": 695210
    },
    {
      "epoch": 2.4365727743956933,
      "grad_norm": 2.671875,
      "learning_rate": 1.0433914603404545e-05,
      "loss": 0.7394,
      "step": 695220
    },
    {
      "epoch": 2.436607821902589,
      "grad_norm": 2.390625,
      "learning_rate": 1.0433265574740843e-05,
      "loss": 0.7382,
      "step": 695230
    },
    {
      "epoch": 2.4366428694094844,
      "grad_norm": 2.71875,
      "learning_rate": 1.0432616546077141e-05,
      "loss": 0.8653,
      "step": 695240
    },
    {
      "epoch": 2.43667791691638,
      "grad_norm": 2.90625,
      "learning_rate": 1.043196751741344e-05,
      "loss": 0.7985,
      "step": 695250
    },
    {
      "epoch": 2.436712964423276,
      "grad_norm": 2.890625,
      "learning_rate": 1.0431318488749737e-05,
      "loss": 0.8428,
      "step": 695260
    },
    {
      "epoch": 2.436748011930171,
      "grad_norm": 2.609375,
      "learning_rate": 1.0430669460086035e-05,
      "loss": 0.7775,
      "step": 695270
    },
    {
      "epoch": 2.436783059437067,
      "grad_norm": 2.65625,
      "learning_rate": 1.0430020431422333e-05,
      "loss": 0.7929,
      "step": 695280
    },
    {
      "epoch": 2.4368181069439627,
      "grad_norm": 3.109375,
      "learning_rate": 1.0429371402758633e-05,
      "loss": 0.8751,
      "step": 695290
    },
    {
      "epoch": 2.436853154450858,
      "grad_norm": 2.78125,
      "learning_rate": 1.0428722374094931e-05,
      "loss": 0.8354,
      "step": 695300
    },
    {
      "epoch": 2.436888201957754,
      "grad_norm": 3.171875,
      "learning_rate": 1.0428073345431229e-05,
      "loss": 0.82,
      "step": 695310
    },
    {
      "epoch": 2.436923249464649,
      "grad_norm": 2.90625,
      "learning_rate": 1.0427424316767525e-05,
      "loss": 0.7379,
      "step": 695320
    },
    {
      "epoch": 2.436958296971545,
      "grad_norm": 3.171875,
      "learning_rate": 1.0426775288103823e-05,
      "loss": 0.8078,
      "step": 695330
    },
    {
      "epoch": 2.4369933444784406,
      "grad_norm": 3.078125,
      "learning_rate": 1.0426126259440121e-05,
      "loss": 0.8109,
      "step": 695340
    },
    {
      "epoch": 2.437028391985336,
      "grad_norm": 2.8125,
      "learning_rate": 1.0425477230776421e-05,
      "loss": 0.7538,
      "step": 695350
    },
    {
      "epoch": 2.4370634394922317,
      "grad_norm": 3.3125,
      "learning_rate": 1.0424828202112719e-05,
      "loss": 0.8778,
      "step": 695360
    },
    {
      "epoch": 2.4370984869991275,
      "grad_norm": 3.0625,
      "learning_rate": 1.0424179173449017e-05,
      "loss": 0.8707,
      "step": 695370
    },
    {
      "epoch": 2.4371335345060228,
      "grad_norm": 2.484375,
      "learning_rate": 1.0423530144785315e-05,
      "loss": 0.7969,
      "step": 695380
    },
    {
      "epoch": 2.4371685820129185,
      "grad_norm": 3.3125,
      "learning_rate": 1.0422881116121613e-05,
      "loss": 0.8409,
      "step": 695390
    },
    {
      "epoch": 2.4372036295198143,
      "grad_norm": 2.890625,
      "learning_rate": 1.0422232087457911e-05,
      "loss": 0.754,
      "step": 695400
    },
    {
      "epoch": 2.4372386770267096,
      "grad_norm": 2.609375,
      "learning_rate": 1.0421583058794209e-05,
      "loss": 0.7452,
      "step": 695410
    },
    {
      "epoch": 2.4372737245336054,
      "grad_norm": 3.15625,
      "learning_rate": 1.0420934030130507e-05,
      "loss": 0.7991,
      "step": 695420
    },
    {
      "epoch": 2.4373087720405007,
      "grad_norm": 2.625,
      "learning_rate": 1.0420285001466805e-05,
      "loss": 0.7405,
      "step": 695430
    },
    {
      "epoch": 2.4373438195473964,
      "grad_norm": 2.734375,
      "learning_rate": 1.0419635972803103e-05,
      "loss": 0.8343,
      "step": 695440
    },
    {
      "epoch": 2.437378867054292,
      "grad_norm": 3.0625,
      "learning_rate": 1.0418986944139401e-05,
      "loss": 0.773,
      "step": 695450
    },
    {
      "epoch": 2.4374139145611875,
      "grad_norm": 2.90625,
      "learning_rate": 1.0418337915475699e-05,
      "loss": 0.787,
      "step": 695460
    },
    {
      "epoch": 2.4374489620680833,
      "grad_norm": 3.171875,
      "learning_rate": 1.0417688886811997e-05,
      "loss": 0.776,
      "step": 695470
    },
    {
      "epoch": 2.437484009574979,
      "grad_norm": 3.203125,
      "learning_rate": 1.0417039858148297e-05,
      "loss": 0.8454,
      "step": 695480
    },
    {
      "epoch": 2.4375190570818743,
      "grad_norm": 2.703125,
      "learning_rate": 1.0416390829484595e-05,
      "loss": 0.8161,
      "step": 695490
    },
    {
      "epoch": 2.43755410458877,
      "grad_norm": 3.03125,
      "learning_rate": 1.0415741800820893e-05,
      "loss": 0.7875,
      "step": 695500
    },
    {
      "epoch": 2.437589152095666,
      "grad_norm": 2.828125,
      "learning_rate": 1.0415092772157189e-05,
      "loss": 0.7489,
      "step": 695510
    },
    {
      "epoch": 2.437624199602561,
      "grad_norm": 2.96875,
      "learning_rate": 1.0414443743493487e-05,
      "loss": 0.7416,
      "step": 695520
    },
    {
      "epoch": 2.437659247109457,
      "grad_norm": 3.03125,
      "learning_rate": 1.0413794714829787e-05,
      "loss": 0.7451,
      "step": 695530
    },
    {
      "epoch": 2.4376942946163522,
      "grad_norm": 3.0625,
      "learning_rate": 1.0413145686166085e-05,
      "loss": 0.8874,
      "step": 695540
    },
    {
      "epoch": 2.437729342123248,
      "grad_norm": 2.9375,
      "learning_rate": 1.0412496657502383e-05,
      "loss": 0.8316,
      "step": 695550
    },
    {
      "epoch": 2.4377643896301437,
      "grad_norm": 3.09375,
      "learning_rate": 1.041184762883868e-05,
      "loss": 0.7975,
      "step": 695560
    },
    {
      "epoch": 2.4377994371370395,
      "grad_norm": 2.546875,
      "learning_rate": 1.0411198600174979e-05,
      "loss": 0.8339,
      "step": 695570
    },
    {
      "epoch": 2.437834484643935,
      "grad_norm": 2.921875,
      "learning_rate": 1.0410549571511277e-05,
      "loss": 0.8603,
      "step": 695580
    },
    {
      "epoch": 2.4378695321508306,
      "grad_norm": 2.5625,
      "learning_rate": 1.0409900542847575e-05,
      "loss": 0.7301,
      "step": 695590
    },
    {
      "epoch": 2.437904579657726,
      "grad_norm": 2.78125,
      "learning_rate": 1.0409251514183873e-05,
      "loss": 0.8346,
      "step": 695600
    },
    {
      "epoch": 2.4379396271646216,
      "grad_norm": 3.359375,
      "learning_rate": 1.040860248552017e-05,
      "loss": 0.8312,
      "step": 695610
    },
    {
      "epoch": 2.4379746746715174,
      "grad_norm": 2.671875,
      "learning_rate": 1.0407953456856469e-05,
      "loss": 0.773,
      "step": 695620
    },
    {
      "epoch": 2.4380097221784127,
      "grad_norm": 2.71875,
      "learning_rate": 1.0407304428192767e-05,
      "loss": 0.7906,
      "step": 695630
    },
    {
      "epoch": 2.4380447696853085,
      "grad_norm": 3.4375,
      "learning_rate": 1.0406655399529065e-05,
      "loss": 0.8509,
      "step": 695640
    },
    {
      "epoch": 2.438079817192204,
      "grad_norm": 2.9375,
      "learning_rate": 1.0406006370865363e-05,
      "loss": 0.7933,
      "step": 695650
    },
    {
      "epoch": 2.4381148646990995,
      "grad_norm": 2.890625,
      "learning_rate": 1.0405357342201662e-05,
      "loss": 0.8845,
      "step": 695660
    },
    {
      "epoch": 2.4381499122059953,
      "grad_norm": 2.859375,
      "learning_rate": 1.040470831353796e-05,
      "loss": 0.8624,
      "step": 695670
    },
    {
      "epoch": 2.438184959712891,
      "grad_norm": 2.8125,
      "learning_rate": 1.0404059284874258e-05,
      "loss": 0.8513,
      "step": 695680
    },
    {
      "epoch": 2.4382200072197864,
      "grad_norm": 2.765625,
      "learning_rate": 1.0403410256210555e-05,
      "loss": 0.7288,
      "step": 695690
    },
    {
      "epoch": 2.438255054726682,
      "grad_norm": 3.015625,
      "learning_rate": 1.0402761227546853e-05,
      "loss": 0.8024,
      "step": 695700
    },
    {
      "epoch": 2.4382901022335774,
      "grad_norm": 2.984375,
      "learning_rate": 1.040211219888315e-05,
      "loss": 0.7641,
      "step": 695710
    },
    {
      "epoch": 2.438325149740473,
      "grad_norm": 2.734375,
      "learning_rate": 1.040146317021945e-05,
      "loss": 0.7619,
      "step": 695720
    },
    {
      "epoch": 2.438360197247369,
      "grad_norm": 3.28125,
      "learning_rate": 1.0400814141555748e-05,
      "loss": 0.825,
      "step": 695730
    },
    {
      "epoch": 2.4383952447542643,
      "grad_norm": 3.46875,
      "learning_rate": 1.0400165112892046e-05,
      "loss": 0.7649,
      "step": 695740
    },
    {
      "epoch": 2.43843029226116,
      "grad_norm": 3.21875,
      "learning_rate": 1.0399516084228344e-05,
      "loss": 0.8747,
      "step": 695750
    },
    {
      "epoch": 2.4384653397680554,
      "grad_norm": 2.984375,
      "learning_rate": 1.0398867055564642e-05,
      "loss": 0.8824,
      "step": 695760
    },
    {
      "epoch": 2.438500387274951,
      "grad_norm": 2.671875,
      "learning_rate": 1.039821802690094e-05,
      "loss": 0.7639,
      "step": 695770
    },
    {
      "epoch": 2.438535434781847,
      "grad_norm": 3.03125,
      "learning_rate": 1.039756899823724e-05,
      "loss": 0.8405,
      "step": 695780
    },
    {
      "epoch": 2.4385704822887426,
      "grad_norm": 3.0625,
      "learning_rate": 1.0396919969573536e-05,
      "loss": 0.8635,
      "step": 695790
    },
    {
      "epoch": 2.438605529795638,
      "grad_norm": 3.109375,
      "learning_rate": 1.0396270940909834e-05,
      "loss": 0.8642,
      "step": 695800
    },
    {
      "epoch": 2.4386405773025337,
      "grad_norm": 2.953125,
      "learning_rate": 1.0395621912246132e-05,
      "loss": 0.7886,
      "step": 695810
    },
    {
      "epoch": 2.438675624809429,
      "grad_norm": 2.96875,
      "learning_rate": 1.039497288358243e-05,
      "loss": 0.7918,
      "step": 695820
    },
    {
      "epoch": 2.4387106723163248,
      "grad_norm": 2.984375,
      "learning_rate": 1.0394323854918728e-05,
      "loss": 0.7932,
      "step": 695830
    },
    {
      "epoch": 2.4387457198232205,
      "grad_norm": 3.09375,
      "learning_rate": 1.0393674826255028e-05,
      "loss": 0.7445,
      "step": 695840
    },
    {
      "epoch": 2.438780767330116,
      "grad_norm": 2.859375,
      "learning_rate": 1.0393025797591326e-05,
      "loss": 0.812,
      "step": 695850
    },
    {
      "epoch": 2.4388158148370116,
      "grad_norm": 2.875,
      "learning_rate": 1.0392376768927624e-05,
      "loss": 0.833,
      "step": 695860
    },
    {
      "epoch": 2.4388508623439074,
      "grad_norm": 2.71875,
      "learning_rate": 1.0391727740263922e-05,
      "loss": 0.7953,
      "step": 695870
    },
    {
      "epoch": 2.4388859098508027,
      "grad_norm": 2.859375,
      "learning_rate": 1.0391078711600218e-05,
      "loss": 0.7705,
      "step": 695880
    },
    {
      "epoch": 2.4389209573576984,
      "grad_norm": 2.453125,
      "learning_rate": 1.0390429682936516e-05,
      "loss": 0.7905,
      "step": 695890
    },
    {
      "epoch": 2.438956004864594,
      "grad_norm": 2.78125,
      "learning_rate": 1.0389780654272816e-05,
      "loss": 0.8111,
      "step": 695900
    },
    {
      "epoch": 2.4389910523714895,
      "grad_norm": 2.890625,
      "learning_rate": 1.0389131625609114e-05,
      "loss": 0.821,
      "step": 695910
    },
    {
      "epoch": 2.4390260998783853,
      "grad_norm": 3.625,
      "learning_rate": 1.0388482596945412e-05,
      "loss": 0.7768,
      "step": 695920
    },
    {
      "epoch": 2.4390611473852806,
      "grad_norm": 3.46875,
      "learning_rate": 1.038783356828171e-05,
      "loss": 0.915,
      "step": 695930
    },
    {
      "epoch": 2.4390961948921763,
      "grad_norm": 3.40625,
      "learning_rate": 1.0387184539618008e-05,
      "loss": 0.833,
      "step": 695940
    },
    {
      "epoch": 2.439131242399072,
      "grad_norm": 3.234375,
      "learning_rate": 1.0386535510954306e-05,
      "loss": 0.813,
      "step": 695950
    },
    {
      "epoch": 2.4391662899059674,
      "grad_norm": 2.484375,
      "learning_rate": 1.0385886482290604e-05,
      "loss": 0.7727,
      "step": 695960
    },
    {
      "epoch": 2.439201337412863,
      "grad_norm": 2.84375,
      "learning_rate": 1.0385237453626904e-05,
      "loss": 0.7777,
      "step": 695970
    },
    {
      "epoch": 2.439236384919759,
      "grad_norm": 2.609375,
      "learning_rate": 1.03845884249632e-05,
      "loss": 0.7233,
      "step": 695980
    },
    {
      "epoch": 2.4392714324266542,
      "grad_norm": 3.0625,
      "learning_rate": 1.0383939396299498e-05,
      "loss": 0.8902,
      "step": 695990
    },
    {
      "epoch": 2.43930647993355,
      "grad_norm": 2.796875,
      "learning_rate": 1.0383290367635796e-05,
      "loss": 0.7668,
      "step": 696000
    },
    {
      "epoch": 2.4393415274404457,
      "grad_norm": 2.84375,
      "learning_rate": 1.0382641338972094e-05,
      "loss": 0.7516,
      "step": 696010
    },
    {
      "epoch": 2.439376574947341,
      "grad_norm": 2.6875,
      "learning_rate": 1.0381992310308394e-05,
      "loss": 0.7992,
      "step": 696020
    },
    {
      "epoch": 2.439411622454237,
      "grad_norm": 3.28125,
      "learning_rate": 1.0381343281644692e-05,
      "loss": 0.7291,
      "step": 696030
    },
    {
      "epoch": 2.439446669961132,
      "grad_norm": 2.96875,
      "learning_rate": 1.038069425298099e-05,
      "loss": 0.7897,
      "step": 696040
    },
    {
      "epoch": 2.439481717468028,
      "grad_norm": 3.21875,
      "learning_rate": 1.0380045224317288e-05,
      "loss": 0.7763,
      "step": 696050
    },
    {
      "epoch": 2.4395167649749236,
      "grad_norm": 3.515625,
      "learning_rate": 1.0379396195653586e-05,
      "loss": 0.9009,
      "step": 696060
    },
    {
      "epoch": 2.439551812481819,
      "grad_norm": 3.1875,
      "learning_rate": 1.0378747166989882e-05,
      "loss": 0.8125,
      "step": 696070
    },
    {
      "epoch": 2.4395868599887147,
      "grad_norm": 3.015625,
      "learning_rate": 1.0378098138326182e-05,
      "loss": 0.715,
      "step": 696080
    },
    {
      "epoch": 2.4396219074956105,
      "grad_norm": 2.5625,
      "learning_rate": 1.037744910966248e-05,
      "loss": 0.8288,
      "step": 696090
    },
    {
      "epoch": 2.439656955002506,
      "grad_norm": 2.90625,
      "learning_rate": 1.0376800080998778e-05,
      "loss": 0.7723,
      "step": 696100
    },
    {
      "epoch": 2.4396920025094015,
      "grad_norm": 2.59375,
      "learning_rate": 1.0376151052335076e-05,
      "loss": 0.7646,
      "step": 696110
    },
    {
      "epoch": 2.4397270500162973,
      "grad_norm": 2.953125,
      "learning_rate": 1.0375502023671374e-05,
      "loss": 0.7979,
      "step": 696120
    },
    {
      "epoch": 2.4397620975231926,
      "grad_norm": 3.0625,
      "learning_rate": 1.0374852995007672e-05,
      "loss": 0.8244,
      "step": 696130
    },
    {
      "epoch": 2.4397971450300884,
      "grad_norm": 2.328125,
      "learning_rate": 1.037420396634397e-05,
      "loss": 0.7599,
      "step": 696140
    },
    {
      "epoch": 2.4398321925369837,
      "grad_norm": 2.9375,
      "learning_rate": 1.037355493768027e-05,
      "loss": 0.7689,
      "step": 696150
    },
    {
      "epoch": 2.4398672400438794,
      "grad_norm": 2.78125,
      "learning_rate": 1.0372905909016566e-05,
      "loss": 0.7964,
      "step": 696160
    },
    {
      "epoch": 2.439902287550775,
      "grad_norm": 3.34375,
      "learning_rate": 1.0372256880352864e-05,
      "loss": 0.9073,
      "step": 696170
    },
    {
      "epoch": 2.4399373350576705,
      "grad_norm": 3.109375,
      "learning_rate": 1.0371607851689162e-05,
      "loss": 0.7859,
      "step": 696180
    },
    {
      "epoch": 2.4399723825645663,
      "grad_norm": 3.390625,
      "learning_rate": 1.037095882302546e-05,
      "loss": 0.8096,
      "step": 696190
    },
    {
      "epoch": 2.440007430071462,
      "grad_norm": 2.890625,
      "learning_rate": 1.0370309794361758e-05,
      "loss": 0.8282,
      "step": 696200
    },
    {
      "epoch": 2.4400424775783573,
      "grad_norm": 2.8125,
      "learning_rate": 1.0369660765698058e-05,
      "loss": 0.7419,
      "step": 696210
    },
    {
      "epoch": 2.440077525085253,
      "grad_norm": 2.703125,
      "learning_rate": 1.0369011737034356e-05,
      "loss": 0.806,
      "step": 696220
    },
    {
      "epoch": 2.440112572592149,
      "grad_norm": 2.875,
      "learning_rate": 1.0368362708370654e-05,
      "loss": 0.7602,
      "step": 696230
    },
    {
      "epoch": 2.440147620099044,
      "grad_norm": 2.796875,
      "learning_rate": 1.0367713679706952e-05,
      "loss": 0.8192,
      "step": 696240
    },
    {
      "epoch": 2.44018266760594,
      "grad_norm": 3.015625,
      "learning_rate": 1.036706465104325e-05,
      "loss": 0.8409,
      "step": 696250
    },
    {
      "epoch": 2.4402177151128353,
      "grad_norm": 2.84375,
      "learning_rate": 1.0366415622379546e-05,
      "loss": 0.8748,
      "step": 696260
    },
    {
      "epoch": 2.440252762619731,
      "grad_norm": 3.015625,
      "learning_rate": 1.0365766593715846e-05,
      "loss": 0.8489,
      "step": 696270
    },
    {
      "epoch": 2.4402878101266268,
      "grad_norm": 2.5,
      "learning_rate": 1.0365117565052144e-05,
      "loss": 0.7587,
      "step": 696280
    },
    {
      "epoch": 2.440322857633522,
      "grad_norm": 2.734375,
      "learning_rate": 1.0364468536388442e-05,
      "loss": 0.7913,
      "step": 696290
    },
    {
      "epoch": 2.440357905140418,
      "grad_norm": 2.625,
      "learning_rate": 1.036381950772474e-05,
      "loss": 0.7726,
      "step": 696300
    },
    {
      "epoch": 2.4403929526473136,
      "grad_norm": 2.59375,
      "learning_rate": 1.0363170479061038e-05,
      "loss": 0.72,
      "step": 696310
    },
    {
      "epoch": 2.440428000154209,
      "grad_norm": 3.328125,
      "learning_rate": 1.0362521450397336e-05,
      "loss": 0.9032,
      "step": 696320
    },
    {
      "epoch": 2.4404630476611047,
      "grad_norm": 2.875,
      "learning_rate": 1.0361872421733635e-05,
      "loss": 0.8037,
      "step": 696330
    },
    {
      "epoch": 2.4404980951680004,
      "grad_norm": 2.78125,
      "learning_rate": 1.0361223393069933e-05,
      "loss": 0.7769,
      "step": 696340
    },
    {
      "epoch": 2.4405331426748957,
      "grad_norm": 3.046875,
      "learning_rate": 1.036057436440623e-05,
      "loss": 0.7852,
      "step": 696350
    },
    {
      "epoch": 2.4405681901817915,
      "grad_norm": 2.703125,
      "learning_rate": 1.0359925335742528e-05,
      "loss": 0.8284,
      "step": 696360
    },
    {
      "epoch": 2.440603237688687,
      "grad_norm": 3.171875,
      "learning_rate": 1.0359276307078826e-05,
      "loss": 0.8842,
      "step": 696370
    },
    {
      "epoch": 2.4406382851955826,
      "grad_norm": 2.796875,
      "learning_rate": 1.0358627278415124e-05,
      "loss": 0.7857,
      "step": 696380
    },
    {
      "epoch": 2.4406733327024783,
      "grad_norm": 3.015625,
      "learning_rate": 1.0357978249751423e-05,
      "loss": 0.7926,
      "step": 696390
    },
    {
      "epoch": 2.4407083802093736,
      "grad_norm": 3.296875,
      "learning_rate": 1.0357329221087721e-05,
      "loss": 0.843,
      "step": 696400
    },
    {
      "epoch": 2.4407434277162694,
      "grad_norm": 3.25,
      "learning_rate": 1.035668019242402e-05,
      "loss": 0.8613,
      "step": 696410
    },
    {
      "epoch": 2.440778475223165,
      "grad_norm": 3.078125,
      "learning_rate": 1.0356031163760317e-05,
      "loss": 0.806,
      "step": 696420
    },
    {
      "epoch": 2.4408135227300605,
      "grad_norm": 3.140625,
      "learning_rate": 1.0355382135096615e-05,
      "loss": 0.8527,
      "step": 696430
    },
    {
      "epoch": 2.4408485702369562,
      "grad_norm": 3.109375,
      "learning_rate": 1.0354733106432913e-05,
      "loss": 0.8029,
      "step": 696440
    },
    {
      "epoch": 2.440883617743852,
      "grad_norm": 2.84375,
      "learning_rate": 1.0354084077769211e-05,
      "loss": 0.8335,
      "step": 696450
    },
    {
      "epoch": 2.4409186652507473,
      "grad_norm": 3.28125,
      "learning_rate": 1.035343504910551e-05,
      "loss": 0.7746,
      "step": 696460
    },
    {
      "epoch": 2.440953712757643,
      "grad_norm": 2.75,
      "learning_rate": 1.0352786020441807e-05,
      "loss": 0.8085,
      "step": 696470
    },
    {
      "epoch": 2.4409887602645384,
      "grad_norm": 3.25,
      "learning_rate": 1.0352136991778105e-05,
      "loss": 0.7325,
      "step": 696480
    },
    {
      "epoch": 2.441023807771434,
      "grad_norm": 2.890625,
      "learning_rate": 1.0351487963114403e-05,
      "loss": 0.7888,
      "step": 696490
    },
    {
      "epoch": 2.44105885527833,
      "grad_norm": 2.875,
      "learning_rate": 1.0350838934450701e-05,
      "loss": 0.7686,
      "step": 696500
    },
    {
      "epoch": 2.441093902785225,
      "grad_norm": 2.796875,
      "learning_rate": 1.0350189905787e-05,
      "loss": 0.7944,
      "step": 696510
    },
    {
      "epoch": 2.441128950292121,
      "grad_norm": 2.890625,
      "learning_rate": 1.0349540877123299e-05,
      "loss": 0.7706,
      "step": 696520
    },
    {
      "epoch": 2.4411639977990167,
      "grad_norm": 2.953125,
      "learning_rate": 1.0348891848459597e-05,
      "loss": 0.8424,
      "step": 696530
    },
    {
      "epoch": 2.441199045305912,
      "grad_norm": 3.1875,
      "learning_rate": 1.0348242819795893e-05,
      "loss": 0.7896,
      "step": 696540
    },
    {
      "epoch": 2.441234092812808,
      "grad_norm": 2.5,
      "learning_rate": 1.0347593791132191e-05,
      "loss": 0.7984,
      "step": 696550
    },
    {
      "epoch": 2.4412691403197035,
      "grad_norm": 3.984375,
      "learning_rate": 1.034694476246849e-05,
      "loss": 0.8397,
      "step": 696560
    },
    {
      "epoch": 2.441304187826599,
      "grad_norm": 2.6875,
      "learning_rate": 1.0346295733804789e-05,
      "loss": 0.7663,
      "step": 696570
    },
    {
      "epoch": 2.4413392353334946,
      "grad_norm": 3.046875,
      "learning_rate": 1.0345646705141087e-05,
      "loss": 0.7877,
      "step": 696580
    },
    {
      "epoch": 2.44137428284039,
      "grad_norm": 2.859375,
      "learning_rate": 1.0344997676477385e-05,
      "loss": 0.9095,
      "step": 696590
    },
    {
      "epoch": 2.4414093303472857,
      "grad_norm": 2.8125,
      "learning_rate": 1.0344348647813683e-05,
      "loss": 0.8487,
      "step": 696600
    },
    {
      "epoch": 2.4414443778541814,
      "grad_norm": 2.640625,
      "learning_rate": 1.0343699619149981e-05,
      "loss": 0.7223,
      "step": 696610
    },
    {
      "epoch": 2.4414794253610768,
      "grad_norm": 2.625,
      "learning_rate": 1.0343050590486279e-05,
      "loss": 0.735,
      "step": 696620
    },
    {
      "epoch": 2.4415144728679725,
      "grad_norm": 3.125,
      "learning_rate": 1.0342401561822577e-05,
      "loss": 0.845,
      "step": 696630
    },
    {
      "epoch": 2.4415495203748683,
      "grad_norm": 2.96875,
      "learning_rate": 1.0341752533158875e-05,
      "loss": 0.882,
      "step": 696640
    },
    {
      "epoch": 2.4415845678817636,
      "grad_norm": 2.890625,
      "learning_rate": 1.0341103504495173e-05,
      "loss": 0.8217,
      "step": 696650
    },
    {
      "epoch": 2.4416196153886593,
      "grad_norm": 3.328125,
      "learning_rate": 1.0340454475831471e-05,
      "loss": 0.8169,
      "step": 696660
    },
    {
      "epoch": 2.441654662895555,
      "grad_norm": 3.1875,
      "learning_rate": 1.0339805447167769e-05,
      "loss": 0.7746,
      "step": 696670
    },
    {
      "epoch": 2.4416897104024504,
      "grad_norm": 2.78125,
      "learning_rate": 1.0339156418504067e-05,
      "loss": 0.8022,
      "step": 696680
    },
    {
      "epoch": 2.441724757909346,
      "grad_norm": 2.609375,
      "learning_rate": 1.0338507389840365e-05,
      "loss": 0.7372,
      "step": 696690
    },
    {
      "epoch": 2.4417598054162415,
      "grad_norm": 2.515625,
      "learning_rate": 1.0337858361176665e-05,
      "loss": 0.7988,
      "step": 696700
    },
    {
      "epoch": 2.4417948529231372,
      "grad_norm": 2.484375,
      "learning_rate": 1.0337209332512963e-05,
      "loss": 0.7526,
      "step": 696710
    },
    {
      "epoch": 2.441829900430033,
      "grad_norm": 2.921875,
      "learning_rate": 1.033656030384926e-05,
      "loss": 0.8298,
      "step": 696720
    },
    {
      "epoch": 2.4418649479369283,
      "grad_norm": 2.875,
      "learning_rate": 1.0335911275185557e-05,
      "loss": 0.7628,
      "step": 696730
    },
    {
      "epoch": 2.441899995443824,
      "grad_norm": 3.0,
      "learning_rate": 1.0335262246521855e-05,
      "loss": 0.9055,
      "step": 696740
    },
    {
      "epoch": 2.44193504295072,
      "grad_norm": 2.703125,
      "learning_rate": 1.0334613217858153e-05,
      "loss": 0.9033,
      "step": 696750
    },
    {
      "epoch": 2.441970090457615,
      "grad_norm": 2.578125,
      "learning_rate": 1.0333964189194453e-05,
      "loss": 0.7638,
      "step": 696760
    },
    {
      "epoch": 2.442005137964511,
      "grad_norm": 2.890625,
      "learning_rate": 1.033331516053075e-05,
      "loss": 0.7826,
      "step": 696770
    },
    {
      "epoch": 2.4420401854714067,
      "grad_norm": 3.140625,
      "learning_rate": 1.0332666131867049e-05,
      "loss": 0.8224,
      "step": 696780
    },
    {
      "epoch": 2.442075232978302,
      "grad_norm": 3.15625,
      "learning_rate": 1.0332017103203347e-05,
      "loss": 0.8297,
      "step": 696790
    },
    {
      "epoch": 2.4421102804851977,
      "grad_norm": 2.53125,
      "learning_rate": 1.0331368074539645e-05,
      "loss": 0.8301,
      "step": 696800
    },
    {
      "epoch": 2.442145327992093,
      "grad_norm": 2.9375,
      "learning_rate": 1.0330719045875943e-05,
      "loss": 0.819,
      "step": 696810
    },
    {
      "epoch": 2.442180375498989,
      "grad_norm": 2.859375,
      "learning_rate": 1.033007001721224e-05,
      "loss": 0.7909,
      "step": 696820
    },
    {
      "epoch": 2.4422154230058846,
      "grad_norm": 2.78125,
      "learning_rate": 1.0329420988548539e-05,
      "loss": 0.8944,
      "step": 696830
    },
    {
      "epoch": 2.44225047051278,
      "grad_norm": 2.65625,
      "learning_rate": 1.0328771959884837e-05,
      "loss": 0.7756,
      "step": 696840
    },
    {
      "epoch": 2.4422855180196756,
      "grad_norm": 3.3125,
      "learning_rate": 1.0328122931221135e-05,
      "loss": 0.9185,
      "step": 696850
    },
    {
      "epoch": 2.4423205655265714,
      "grad_norm": 2.859375,
      "learning_rate": 1.0327473902557433e-05,
      "loss": 0.8235,
      "step": 696860
    },
    {
      "epoch": 2.4423556130334667,
      "grad_norm": 3.078125,
      "learning_rate": 1.032682487389373e-05,
      "loss": 0.8473,
      "step": 696870
    },
    {
      "epoch": 2.4423906605403625,
      "grad_norm": 3.25,
      "learning_rate": 1.032617584523003e-05,
      "loss": 0.7736,
      "step": 696880
    },
    {
      "epoch": 2.4424257080472582,
      "grad_norm": 3.328125,
      "learning_rate": 1.0325526816566328e-05,
      "loss": 0.841,
      "step": 696890
    },
    {
      "epoch": 2.4424607555541535,
      "grad_norm": 2.796875,
      "learning_rate": 1.0324877787902626e-05,
      "loss": 0.8848,
      "step": 696900
    },
    {
      "epoch": 2.4424958030610493,
      "grad_norm": 2.765625,
      "learning_rate": 1.0324228759238924e-05,
      "loss": 0.8347,
      "step": 696910
    },
    {
      "epoch": 2.4425308505679446,
      "grad_norm": 2.875,
      "learning_rate": 1.032357973057522e-05,
      "loss": 0.7689,
      "step": 696920
    },
    {
      "epoch": 2.4425658980748404,
      "grad_norm": 2.703125,
      "learning_rate": 1.0322930701911519e-05,
      "loss": 0.7845,
      "step": 696930
    },
    {
      "epoch": 2.442600945581736,
      "grad_norm": 2.828125,
      "learning_rate": 1.0322281673247818e-05,
      "loss": 0.8464,
      "step": 696940
    },
    {
      "epoch": 2.442635993088632,
      "grad_norm": 2.96875,
      "learning_rate": 1.0321632644584116e-05,
      "loss": 0.8163,
      "step": 696950
    },
    {
      "epoch": 2.442671040595527,
      "grad_norm": 2.8125,
      "learning_rate": 1.0320983615920414e-05,
      "loss": 0.8304,
      "step": 696960
    },
    {
      "epoch": 2.442706088102423,
      "grad_norm": 2.640625,
      "learning_rate": 1.0320334587256712e-05,
      "loss": 0.7696,
      "step": 696970
    },
    {
      "epoch": 2.4427411356093183,
      "grad_norm": 3.0,
      "learning_rate": 1.031968555859301e-05,
      "loss": 0.819,
      "step": 696980
    },
    {
      "epoch": 2.442776183116214,
      "grad_norm": 3.171875,
      "learning_rate": 1.0319036529929308e-05,
      "loss": 0.7764,
      "step": 696990
    },
    {
      "epoch": 2.44281123062311,
      "grad_norm": 3.0625,
      "learning_rate": 1.0318387501265606e-05,
      "loss": 0.7473,
      "step": 697000
    },
    {
      "epoch": 2.442846278130005,
      "grad_norm": 2.8125,
      "learning_rate": 1.0317738472601904e-05,
      "loss": 0.8158,
      "step": 697010
    },
    {
      "epoch": 2.442881325636901,
      "grad_norm": 2.765625,
      "learning_rate": 1.0317089443938202e-05,
      "loss": 0.8452,
      "step": 697020
    },
    {
      "epoch": 2.442916373143796,
      "grad_norm": 2.96875,
      "learning_rate": 1.03164404152745e-05,
      "loss": 0.8353,
      "step": 697030
    },
    {
      "epoch": 2.442951420650692,
      "grad_norm": 2.921875,
      "learning_rate": 1.0315791386610798e-05,
      "loss": 0.7786,
      "step": 697040
    },
    {
      "epoch": 2.4429864681575877,
      "grad_norm": 2.96875,
      "learning_rate": 1.0315142357947096e-05,
      "loss": 0.82,
      "step": 697050
    },
    {
      "epoch": 2.4430215156644834,
      "grad_norm": 2.375,
      "learning_rate": 1.0314493329283394e-05,
      "loss": 0.7706,
      "step": 697060
    },
    {
      "epoch": 2.4430565631713788,
      "grad_norm": 2.65625,
      "learning_rate": 1.0313844300619694e-05,
      "loss": 0.8033,
      "step": 697070
    },
    {
      "epoch": 2.4430916106782745,
      "grad_norm": 2.859375,
      "learning_rate": 1.0313195271955992e-05,
      "loss": 0.7963,
      "step": 697080
    },
    {
      "epoch": 2.44312665818517,
      "grad_norm": 3.265625,
      "learning_rate": 1.031254624329229e-05,
      "loss": 0.8824,
      "step": 697090
    },
    {
      "epoch": 2.4431617056920656,
      "grad_norm": 2.90625,
      "learning_rate": 1.0311897214628588e-05,
      "loss": 0.8263,
      "step": 697100
    },
    {
      "epoch": 2.4431967531989613,
      "grad_norm": 3.015625,
      "learning_rate": 1.0311248185964884e-05,
      "loss": 0.8196,
      "step": 697110
    },
    {
      "epoch": 2.4432318007058567,
      "grad_norm": 3.171875,
      "learning_rate": 1.0310599157301184e-05,
      "loss": 0.7474,
      "step": 697120
    },
    {
      "epoch": 2.4432668482127524,
      "grad_norm": 3.109375,
      "learning_rate": 1.0309950128637482e-05,
      "loss": 0.857,
      "step": 697130
    },
    {
      "epoch": 2.443301895719648,
      "grad_norm": 3.3125,
      "learning_rate": 1.030930109997378e-05,
      "loss": 0.7923,
      "step": 697140
    },
    {
      "epoch": 2.4433369432265435,
      "grad_norm": 2.75,
      "learning_rate": 1.0308652071310078e-05,
      "loss": 0.8148,
      "step": 697150
    },
    {
      "epoch": 2.4433719907334392,
      "grad_norm": 3.015625,
      "learning_rate": 1.0308003042646376e-05,
      "loss": 0.8666,
      "step": 697160
    },
    {
      "epoch": 2.443407038240335,
      "grad_norm": 2.828125,
      "learning_rate": 1.0307354013982674e-05,
      "loss": 0.7708,
      "step": 697170
    },
    {
      "epoch": 2.4434420857472303,
      "grad_norm": 2.796875,
      "learning_rate": 1.0306704985318972e-05,
      "loss": 0.8774,
      "step": 697180
    },
    {
      "epoch": 2.443477133254126,
      "grad_norm": 3.1875,
      "learning_rate": 1.0306055956655272e-05,
      "loss": 0.8183,
      "step": 697190
    },
    {
      "epoch": 2.4435121807610214,
      "grad_norm": 3.234375,
      "learning_rate": 1.0305406927991568e-05,
      "loss": 0.7737,
      "step": 697200
    },
    {
      "epoch": 2.443547228267917,
      "grad_norm": 3.03125,
      "learning_rate": 1.0304757899327866e-05,
      "loss": 0.7772,
      "step": 697210
    },
    {
      "epoch": 2.443582275774813,
      "grad_norm": 2.5,
      "learning_rate": 1.0304108870664164e-05,
      "loss": 0.7774,
      "step": 697220
    },
    {
      "epoch": 2.443617323281708,
      "grad_norm": 3.109375,
      "learning_rate": 1.0303459842000462e-05,
      "loss": 0.889,
      "step": 697230
    },
    {
      "epoch": 2.443652370788604,
      "grad_norm": 3.03125,
      "learning_rate": 1.030281081333676e-05,
      "loss": 0.8118,
      "step": 697240
    },
    {
      "epoch": 2.4436874182954997,
      "grad_norm": 2.953125,
      "learning_rate": 1.030216178467306e-05,
      "loss": 0.7743,
      "step": 697250
    },
    {
      "epoch": 2.443722465802395,
      "grad_norm": 2.96875,
      "learning_rate": 1.0301512756009358e-05,
      "loss": 0.8202,
      "step": 697260
    },
    {
      "epoch": 2.443757513309291,
      "grad_norm": 3.03125,
      "learning_rate": 1.0300863727345656e-05,
      "loss": 0.8066,
      "step": 697270
    },
    {
      "epoch": 2.4437925608161866,
      "grad_norm": 3.015625,
      "learning_rate": 1.0300214698681954e-05,
      "loss": 0.814,
      "step": 697280
    },
    {
      "epoch": 2.443827608323082,
      "grad_norm": 2.625,
      "learning_rate": 1.029956567001825e-05,
      "loss": 0.7756,
      "step": 697290
    },
    {
      "epoch": 2.4438626558299776,
      "grad_norm": 3.96875,
      "learning_rate": 1.0298916641354548e-05,
      "loss": 0.8485,
      "step": 697300
    },
    {
      "epoch": 2.443897703336873,
      "grad_norm": 3.109375,
      "learning_rate": 1.0298267612690848e-05,
      "loss": 0.8363,
      "step": 697310
    },
    {
      "epoch": 2.4439327508437687,
      "grad_norm": 2.921875,
      "learning_rate": 1.0297618584027146e-05,
      "loss": 0.8335,
      "step": 697320
    },
    {
      "epoch": 2.4439677983506645,
      "grad_norm": 2.65625,
      "learning_rate": 1.0296969555363444e-05,
      "loss": 0.7878,
      "step": 697330
    },
    {
      "epoch": 2.44400284585756,
      "grad_norm": 2.65625,
      "learning_rate": 1.0296320526699742e-05,
      "loss": 0.8073,
      "step": 697340
    },
    {
      "epoch": 2.4440378933644555,
      "grad_norm": 2.5,
      "learning_rate": 1.029567149803604e-05,
      "loss": 0.7093,
      "step": 697350
    },
    {
      "epoch": 2.4440729408713513,
      "grad_norm": 2.625,
      "learning_rate": 1.0295022469372338e-05,
      "loss": 0.7613,
      "step": 697360
    },
    {
      "epoch": 2.4441079883782466,
      "grad_norm": 3.34375,
      "learning_rate": 1.0294373440708636e-05,
      "loss": 0.8009,
      "step": 697370
    },
    {
      "epoch": 2.4441430358851424,
      "grad_norm": 3.359375,
      "learning_rate": 1.0293724412044935e-05,
      "loss": 0.8239,
      "step": 697380
    },
    {
      "epoch": 2.444178083392038,
      "grad_norm": 3.0,
      "learning_rate": 1.0293075383381232e-05,
      "loss": 0.8191,
      "step": 697390
    },
    {
      "epoch": 2.4442131308989334,
      "grad_norm": 2.515625,
      "learning_rate": 1.029242635471753e-05,
      "loss": 0.8131,
      "step": 697400
    },
    {
      "epoch": 2.444248178405829,
      "grad_norm": 3.375,
      "learning_rate": 1.0291777326053828e-05,
      "loss": 0.8525,
      "step": 697410
    },
    {
      "epoch": 2.4442832259127245,
      "grad_norm": 3.09375,
      "learning_rate": 1.0291128297390126e-05,
      "loss": 0.7901,
      "step": 697420
    },
    {
      "epoch": 2.4443182734196203,
      "grad_norm": 2.84375,
      "learning_rate": 1.0290479268726425e-05,
      "loss": 0.9086,
      "step": 697430
    },
    {
      "epoch": 2.444353320926516,
      "grad_norm": 2.71875,
      "learning_rate": 1.0289830240062723e-05,
      "loss": 0.7361,
      "step": 697440
    },
    {
      "epoch": 2.4443883684334113,
      "grad_norm": 2.890625,
      "learning_rate": 1.0289181211399021e-05,
      "loss": 0.739,
      "step": 697450
    },
    {
      "epoch": 2.444423415940307,
      "grad_norm": 3.0625,
      "learning_rate": 1.028853218273532e-05,
      "loss": 0.7342,
      "step": 697460
    },
    {
      "epoch": 2.444458463447203,
      "grad_norm": 2.953125,
      "learning_rate": 1.0287883154071617e-05,
      "loss": 0.7777,
      "step": 697470
    },
    {
      "epoch": 2.444493510954098,
      "grad_norm": 2.875,
      "learning_rate": 1.0287234125407914e-05,
      "loss": 0.7881,
      "step": 697480
    },
    {
      "epoch": 2.444528558460994,
      "grad_norm": 3.515625,
      "learning_rate": 1.0286585096744213e-05,
      "loss": 0.8141,
      "step": 697490
    },
    {
      "epoch": 2.4445636059678897,
      "grad_norm": 3.34375,
      "learning_rate": 1.0285936068080511e-05,
      "loss": 0.859,
      "step": 697500
    },
    {
      "epoch": 2.444598653474785,
      "grad_norm": 2.84375,
      "learning_rate": 1.028528703941681e-05,
      "loss": 0.8537,
      "step": 697510
    },
    {
      "epoch": 2.4446337009816808,
      "grad_norm": 2.765625,
      "learning_rate": 1.0284638010753107e-05,
      "loss": 0.7984,
      "step": 697520
    },
    {
      "epoch": 2.444668748488576,
      "grad_norm": 2.859375,
      "learning_rate": 1.0283988982089405e-05,
      "loss": 0.8258,
      "step": 697530
    },
    {
      "epoch": 2.444703795995472,
      "grad_norm": 3.328125,
      "learning_rate": 1.0283339953425703e-05,
      "loss": 0.7873,
      "step": 697540
    },
    {
      "epoch": 2.4447388435023676,
      "grad_norm": 2.703125,
      "learning_rate": 1.0282690924762001e-05,
      "loss": 0.7869,
      "step": 697550
    },
    {
      "epoch": 2.444773891009263,
      "grad_norm": 2.75,
      "learning_rate": 1.0282041896098301e-05,
      "loss": 0.7647,
      "step": 697560
    },
    {
      "epoch": 2.4448089385161587,
      "grad_norm": 3.0625,
      "learning_rate": 1.0281392867434599e-05,
      "loss": 0.8471,
      "step": 697570
    },
    {
      "epoch": 2.4448439860230544,
      "grad_norm": 2.828125,
      "learning_rate": 1.0280743838770895e-05,
      "loss": 0.8888,
      "step": 697580
    },
    {
      "epoch": 2.4448790335299497,
      "grad_norm": 3.15625,
      "learning_rate": 1.0280094810107193e-05,
      "loss": 0.7942,
      "step": 697590
    },
    {
      "epoch": 2.4449140810368455,
      "grad_norm": 2.90625,
      "learning_rate": 1.0279445781443491e-05,
      "loss": 0.8647,
      "step": 697600
    },
    {
      "epoch": 2.4449491285437412,
      "grad_norm": 3.1875,
      "learning_rate": 1.027879675277979e-05,
      "loss": 0.8406,
      "step": 697610
    },
    {
      "epoch": 2.4449841760506366,
      "grad_norm": 2.8125,
      "learning_rate": 1.0278147724116089e-05,
      "loss": 0.905,
      "step": 697620
    },
    {
      "epoch": 2.4450192235575323,
      "grad_norm": 3.0625,
      "learning_rate": 1.0277498695452387e-05,
      "loss": 0.8051,
      "step": 697630
    },
    {
      "epoch": 2.4450542710644276,
      "grad_norm": 3.296875,
      "learning_rate": 1.0276849666788685e-05,
      "loss": 0.8092,
      "step": 697640
    },
    {
      "epoch": 2.4450893185713234,
      "grad_norm": 2.90625,
      "learning_rate": 1.0276200638124983e-05,
      "loss": 0.8475,
      "step": 697650
    },
    {
      "epoch": 2.445124366078219,
      "grad_norm": 2.921875,
      "learning_rate": 1.0275551609461281e-05,
      "loss": 0.7563,
      "step": 697660
    },
    {
      "epoch": 2.4451594135851145,
      "grad_norm": 2.9375,
      "learning_rate": 1.0274902580797579e-05,
      "loss": 0.8849,
      "step": 697670
    },
    {
      "epoch": 2.44519446109201,
      "grad_norm": 2.46875,
      "learning_rate": 1.0274253552133877e-05,
      "loss": 0.7892,
      "step": 697680
    },
    {
      "epoch": 2.445229508598906,
      "grad_norm": 2.984375,
      "learning_rate": 1.0273604523470175e-05,
      "loss": 0.8307,
      "step": 697690
    },
    {
      "epoch": 2.4452645561058013,
      "grad_norm": 3.15625,
      "learning_rate": 1.0272955494806473e-05,
      "loss": 0.7576,
      "step": 697700
    },
    {
      "epoch": 2.445299603612697,
      "grad_norm": 2.515625,
      "learning_rate": 1.0272306466142771e-05,
      "loss": 0.7523,
      "step": 697710
    },
    {
      "epoch": 2.445334651119593,
      "grad_norm": 3.015625,
      "learning_rate": 1.0271657437479069e-05,
      "loss": 0.7702,
      "step": 697720
    },
    {
      "epoch": 2.445369698626488,
      "grad_norm": 2.71875,
      "learning_rate": 1.0271008408815367e-05,
      "loss": 0.7745,
      "step": 697730
    },
    {
      "epoch": 2.445404746133384,
      "grad_norm": 2.484375,
      "learning_rate": 1.0270359380151667e-05,
      "loss": 0.7391,
      "step": 697740
    },
    {
      "epoch": 2.445439793640279,
      "grad_norm": 2.53125,
      "learning_rate": 1.0269710351487965e-05,
      "loss": 0.7748,
      "step": 697750
    },
    {
      "epoch": 2.445474841147175,
      "grad_norm": 3.203125,
      "learning_rate": 1.0269061322824261e-05,
      "loss": 0.81,
      "step": 697760
    },
    {
      "epoch": 2.4455098886540707,
      "grad_norm": 2.984375,
      "learning_rate": 1.0268412294160559e-05,
      "loss": 0.7896,
      "step": 697770
    },
    {
      "epoch": 2.445544936160966,
      "grad_norm": 3.015625,
      "learning_rate": 1.0267763265496857e-05,
      "loss": 0.7804,
      "step": 697780
    },
    {
      "epoch": 2.445579983667862,
      "grad_norm": 3.296875,
      "learning_rate": 1.0267114236833155e-05,
      "loss": 0.8828,
      "step": 697790
    },
    {
      "epoch": 2.4456150311747575,
      "grad_norm": 2.90625,
      "learning_rate": 1.0266465208169455e-05,
      "loss": 0.9374,
      "step": 697800
    },
    {
      "epoch": 2.445650078681653,
      "grad_norm": 3.40625,
      "learning_rate": 1.0265816179505753e-05,
      "loss": 0.84,
      "step": 697810
    },
    {
      "epoch": 2.4456851261885486,
      "grad_norm": 2.609375,
      "learning_rate": 1.026516715084205e-05,
      "loss": 0.7963,
      "step": 697820
    },
    {
      "epoch": 2.4457201736954444,
      "grad_norm": 2.890625,
      "learning_rate": 1.0264518122178349e-05,
      "loss": 0.7912,
      "step": 697830
    },
    {
      "epoch": 2.4457552212023397,
      "grad_norm": 3.484375,
      "learning_rate": 1.0263869093514647e-05,
      "loss": 0.8613,
      "step": 697840
    },
    {
      "epoch": 2.4457902687092354,
      "grad_norm": 2.921875,
      "learning_rate": 1.0263220064850945e-05,
      "loss": 0.7085,
      "step": 697850
    },
    {
      "epoch": 2.4458253162161308,
      "grad_norm": 2.9375,
      "learning_rate": 1.0262571036187243e-05,
      "loss": 0.8029,
      "step": 697860
    },
    {
      "epoch": 2.4458603637230265,
      "grad_norm": 2.953125,
      "learning_rate": 1.026192200752354e-05,
      "loss": 0.8206,
      "step": 697870
    },
    {
      "epoch": 2.4458954112299223,
      "grad_norm": 2.71875,
      "learning_rate": 1.0261272978859839e-05,
      "loss": 0.7971,
      "step": 697880
    },
    {
      "epoch": 2.4459304587368176,
      "grad_norm": 2.765625,
      "learning_rate": 1.0260623950196137e-05,
      "loss": 0.7655,
      "step": 697890
    },
    {
      "epoch": 2.4459655062437133,
      "grad_norm": 2.953125,
      "learning_rate": 1.0259974921532435e-05,
      "loss": 0.7992,
      "step": 697900
    },
    {
      "epoch": 2.446000553750609,
      "grad_norm": 2.859375,
      "learning_rate": 1.0259325892868733e-05,
      "loss": 0.7955,
      "step": 697910
    },
    {
      "epoch": 2.4460356012575044,
      "grad_norm": 2.828125,
      "learning_rate": 1.025867686420503e-05,
      "loss": 0.7369,
      "step": 697920
    },
    {
      "epoch": 2.4460706487644,
      "grad_norm": 2.546875,
      "learning_rate": 1.025802783554133e-05,
      "loss": 0.6965,
      "step": 697930
    },
    {
      "epoch": 2.446105696271296,
      "grad_norm": 3.09375,
      "learning_rate": 1.0257378806877628e-05,
      "loss": 0.8633,
      "step": 697940
    },
    {
      "epoch": 2.4461407437781912,
      "grad_norm": 2.859375,
      "learning_rate": 1.0256729778213925e-05,
      "loss": 0.8021,
      "step": 697950
    },
    {
      "epoch": 2.446175791285087,
      "grad_norm": 2.828125,
      "learning_rate": 1.0256080749550223e-05,
      "loss": 0.7848,
      "step": 697960
    },
    {
      "epoch": 2.4462108387919823,
      "grad_norm": 3.09375,
      "learning_rate": 1.025543172088652e-05,
      "loss": 0.8346,
      "step": 697970
    },
    {
      "epoch": 2.446245886298878,
      "grad_norm": 2.703125,
      "learning_rate": 1.025478269222282e-05,
      "loss": 0.8027,
      "step": 697980
    },
    {
      "epoch": 2.446280933805774,
      "grad_norm": 2.890625,
      "learning_rate": 1.0254133663559118e-05,
      "loss": 0.7812,
      "step": 697990
    },
    {
      "epoch": 2.446315981312669,
      "grad_norm": 2.78125,
      "learning_rate": 1.0253484634895416e-05,
      "loss": 0.7433,
      "step": 698000
    },
    {
      "epoch": 2.446351028819565,
      "grad_norm": 2.96875,
      "learning_rate": 1.0252835606231714e-05,
      "loss": 0.8565,
      "step": 698010
    },
    {
      "epoch": 2.4463860763264607,
      "grad_norm": 3.109375,
      "learning_rate": 1.0252186577568012e-05,
      "loss": 0.7451,
      "step": 698020
    },
    {
      "epoch": 2.446421123833356,
      "grad_norm": 2.765625,
      "learning_rate": 1.025153754890431e-05,
      "loss": 0.7641,
      "step": 698030
    },
    {
      "epoch": 2.4464561713402517,
      "grad_norm": 2.90625,
      "learning_rate": 1.0250888520240608e-05,
      "loss": 0.7742,
      "step": 698040
    },
    {
      "epoch": 2.4464912188471475,
      "grad_norm": 2.765625,
      "learning_rate": 1.0250239491576906e-05,
      "loss": 0.7319,
      "step": 698050
    },
    {
      "epoch": 2.446526266354043,
      "grad_norm": 2.984375,
      "learning_rate": 1.0249590462913204e-05,
      "loss": 0.8488,
      "step": 698060
    },
    {
      "epoch": 2.4465613138609386,
      "grad_norm": 3.015625,
      "learning_rate": 1.0248941434249502e-05,
      "loss": 0.8247,
      "step": 698070
    },
    {
      "epoch": 2.446596361367834,
      "grad_norm": 2.875,
      "learning_rate": 1.02482924055858e-05,
      "loss": 0.8824,
      "step": 698080
    },
    {
      "epoch": 2.4466314088747296,
      "grad_norm": 2.859375,
      "learning_rate": 1.0247643376922098e-05,
      "loss": 0.8173,
      "step": 698090
    },
    {
      "epoch": 2.4466664563816254,
      "grad_norm": 2.828125,
      "learning_rate": 1.0246994348258396e-05,
      "loss": 0.9164,
      "step": 698100
    },
    {
      "epoch": 2.4467015038885207,
      "grad_norm": 2.875,
      "learning_rate": 1.0246345319594696e-05,
      "loss": 0.8658,
      "step": 698110
    },
    {
      "epoch": 2.4467365513954165,
      "grad_norm": 2.640625,
      "learning_rate": 1.0245696290930994e-05,
      "loss": 0.8064,
      "step": 698120
    },
    {
      "epoch": 2.446771598902312,
      "grad_norm": 2.515625,
      "learning_rate": 1.0245047262267292e-05,
      "loss": 0.7729,
      "step": 698130
    },
    {
      "epoch": 2.4468066464092075,
      "grad_norm": 2.984375,
      "learning_rate": 1.0244398233603588e-05,
      "loss": 0.8525,
      "step": 698140
    },
    {
      "epoch": 2.4468416939161033,
      "grad_norm": 2.609375,
      "learning_rate": 1.0243749204939886e-05,
      "loss": 0.7768,
      "step": 698150
    },
    {
      "epoch": 2.446876741422999,
      "grad_norm": 2.875,
      "learning_rate": 1.0243100176276184e-05,
      "loss": 0.7657,
      "step": 698160
    },
    {
      "epoch": 2.4469117889298944,
      "grad_norm": 2.84375,
      "learning_rate": 1.0242451147612484e-05,
      "loss": 0.7893,
      "step": 698170
    },
    {
      "epoch": 2.44694683643679,
      "grad_norm": 3.046875,
      "learning_rate": 1.0241802118948782e-05,
      "loss": 0.7964,
      "step": 698180
    },
    {
      "epoch": 2.4469818839436854,
      "grad_norm": 3.3125,
      "learning_rate": 1.024115309028508e-05,
      "loss": 0.7453,
      "step": 698190
    },
    {
      "epoch": 2.447016931450581,
      "grad_norm": 3.171875,
      "learning_rate": 1.0240504061621378e-05,
      "loss": 0.772,
      "step": 698200
    },
    {
      "epoch": 2.447051978957477,
      "grad_norm": 2.96875,
      "learning_rate": 1.0239855032957676e-05,
      "loss": 0.8349,
      "step": 698210
    },
    {
      "epoch": 2.4470870264643727,
      "grad_norm": 2.828125,
      "learning_rate": 1.0239206004293974e-05,
      "loss": 0.8725,
      "step": 698220
    },
    {
      "epoch": 2.447122073971268,
      "grad_norm": 3.359375,
      "learning_rate": 1.0238556975630272e-05,
      "loss": 0.894,
      "step": 698230
    },
    {
      "epoch": 2.4471571214781638,
      "grad_norm": 2.953125,
      "learning_rate": 1.023790794696657e-05,
      "loss": 0.8782,
      "step": 698240
    },
    {
      "epoch": 2.447192168985059,
      "grad_norm": 2.859375,
      "learning_rate": 1.0237258918302868e-05,
      "loss": 0.7883,
      "step": 698250
    },
    {
      "epoch": 2.447227216491955,
      "grad_norm": 2.953125,
      "learning_rate": 1.0236609889639166e-05,
      "loss": 0.8025,
      "step": 698260
    },
    {
      "epoch": 2.4472622639988506,
      "grad_norm": 3.078125,
      "learning_rate": 1.0235960860975464e-05,
      "loss": 0.7989,
      "step": 698270
    },
    {
      "epoch": 2.447297311505746,
      "grad_norm": 2.609375,
      "learning_rate": 1.0235311832311762e-05,
      "loss": 0.7618,
      "step": 698280
    },
    {
      "epoch": 2.4473323590126417,
      "grad_norm": 2.609375,
      "learning_rate": 1.0234662803648062e-05,
      "loss": 0.8811,
      "step": 698290
    },
    {
      "epoch": 2.447367406519537,
      "grad_norm": 2.96875,
      "learning_rate": 1.023401377498436e-05,
      "loss": 0.6899,
      "step": 698300
    },
    {
      "epoch": 2.4474024540264327,
      "grad_norm": 2.65625,
      "learning_rate": 1.0233364746320658e-05,
      "loss": 0.8118,
      "step": 698310
    },
    {
      "epoch": 2.4474375015333285,
      "grad_norm": 3.109375,
      "learning_rate": 1.0232715717656956e-05,
      "loss": 0.827,
      "step": 698320
    },
    {
      "epoch": 2.4474725490402243,
      "grad_norm": 3.046875,
      "learning_rate": 1.0232066688993252e-05,
      "loss": 0.8044,
      "step": 698330
    },
    {
      "epoch": 2.4475075965471196,
      "grad_norm": 2.796875,
      "learning_rate": 1.023141766032955e-05,
      "loss": 0.7391,
      "step": 698340
    },
    {
      "epoch": 2.4475426440540153,
      "grad_norm": 2.9375,
      "learning_rate": 1.023076863166585e-05,
      "loss": 0.7905,
      "step": 698350
    },
    {
      "epoch": 2.4475776915609107,
      "grad_norm": 2.546875,
      "learning_rate": 1.0230119603002148e-05,
      "loss": 0.7208,
      "step": 698360
    },
    {
      "epoch": 2.4476127390678064,
      "grad_norm": 3.046875,
      "learning_rate": 1.0229470574338446e-05,
      "loss": 0.8624,
      "step": 698370
    },
    {
      "epoch": 2.447647786574702,
      "grad_norm": 2.734375,
      "learning_rate": 1.0228821545674744e-05,
      "loss": 0.86,
      "step": 698380
    },
    {
      "epoch": 2.4476828340815975,
      "grad_norm": 2.6875,
      "learning_rate": 1.0228172517011042e-05,
      "loss": 0.7805,
      "step": 698390
    },
    {
      "epoch": 2.4477178815884932,
      "grad_norm": 3.109375,
      "learning_rate": 1.022752348834734e-05,
      "loss": 0.8501,
      "step": 698400
    },
    {
      "epoch": 2.4477529290953886,
      "grad_norm": 2.828125,
      "learning_rate": 1.0226874459683638e-05,
      "loss": 0.745,
      "step": 698410
    },
    {
      "epoch": 2.4477879766022843,
      "grad_norm": 2.96875,
      "learning_rate": 1.0226225431019936e-05,
      "loss": 0.7798,
      "step": 698420
    },
    {
      "epoch": 2.44782302410918,
      "grad_norm": 3.328125,
      "learning_rate": 1.0225576402356234e-05,
      "loss": 0.8085,
      "step": 698430
    },
    {
      "epoch": 2.447858071616076,
      "grad_norm": 3.0,
      "learning_rate": 1.0224927373692532e-05,
      "loss": 0.8205,
      "step": 698440
    },
    {
      "epoch": 2.447893119122971,
      "grad_norm": 2.984375,
      "learning_rate": 1.022427834502883e-05,
      "loss": 0.7676,
      "step": 698450
    },
    {
      "epoch": 2.447928166629867,
      "grad_norm": 2.84375,
      "learning_rate": 1.0223629316365128e-05,
      "loss": 0.7746,
      "step": 698460
    },
    {
      "epoch": 2.447963214136762,
      "grad_norm": 2.546875,
      "learning_rate": 1.0222980287701426e-05,
      "loss": 0.811,
      "step": 698470
    },
    {
      "epoch": 2.447998261643658,
      "grad_norm": 3.125,
      "learning_rate": 1.0222331259037726e-05,
      "loss": 0.856,
      "step": 698480
    },
    {
      "epoch": 2.4480333091505537,
      "grad_norm": 3.09375,
      "learning_rate": 1.0221682230374024e-05,
      "loss": 0.7555,
      "step": 698490
    },
    {
      "epoch": 2.448068356657449,
      "grad_norm": 3.375,
      "learning_rate": 1.0221033201710322e-05,
      "loss": 0.8331,
      "step": 698500
    },
    {
      "epoch": 2.448103404164345,
      "grad_norm": 2.796875,
      "learning_rate": 1.022038417304662e-05,
      "loss": 0.784,
      "step": 698510
    },
    {
      "epoch": 2.4481384516712406,
      "grad_norm": 2.453125,
      "learning_rate": 1.0219735144382916e-05,
      "loss": 0.7565,
      "step": 698520
    },
    {
      "epoch": 2.448173499178136,
      "grad_norm": 2.46875,
      "learning_rate": 1.0219086115719216e-05,
      "loss": 0.8084,
      "step": 698530
    },
    {
      "epoch": 2.4482085466850316,
      "grad_norm": 3.046875,
      "learning_rate": 1.0218437087055514e-05,
      "loss": 0.8585,
      "step": 698540
    },
    {
      "epoch": 2.4482435941919274,
      "grad_norm": 3.015625,
      "learning_rate": 1.0217788058391812e-05,
      "loss": 0.8103,
      "step": 698550
    },
    {
      "epoch": 2.4482786416988227,
      "grad_norm": 2.953125,
      "learning_rate": 1.021713902972811e-05,
      "loss": 0.834,
      "step": 698560
    },
    {
      "epoch": 2.4483136892057185,
      "grad_norm": 3.015625,
      "learning_rate": 1.0216490001064408e-05,
      "loss": 0.7934,
      "step": 698570
    },
    {
      "epoch": 2.4483487367126138,
      "grad_norm": 3.234375,
      "learning_rate": 1.0215840972400706e-05,
      "loss": 0.7721,
      "step": 698580
    },
    {
      "epoch": 2.4483837842195095,
      "grad_norm": 2.5625,
      "learning_rate": 1.0215191943737004e-05,
      "loss": 0.7985,
      "step": 698590
    },
    {
      "epoch": 2.4484188317264053,
      "grad_norm": 2.828125,
      "learning_rate": 1.0214542915073303e-05,
      "loss": 0.8244,
      "step": 698600
    },
    {
      "epoch": 2.4484538792333006,
      "grad_norm": 2.703125,
      "learning_rate": 1.02138938864096e-05,
      "loss": 0.8238,
      "step": 698610
    },
    {
      "epoch": 2.4484889267401964,
      "grad_norm": 2.890625,
      "learning_rate": 1.0213244857745898e-05,
      "loss": 0.825,
      "step": 698620
    },
    {
      "epoch": 2.448523974247092,
      "grad_norm": 2.875,
      "learning_rate": 1.0212595829082196e-05,
      "loss": 0.8049,
      "step": 698630
    },
    {
      "epoch": 2.4485590217539874,
      "grad_norm": 2.5625,
      "learning_rate": 1.0211946800418494e-05,
      "loss": 0.7997,
      "step": 698640
    },
    {
      "epoch": 2.448594069260883,
      "grad_norm": 2.78125,
      "learning_rate": 1.0211297771754792e-05,
      "loss": 0.851,
      "step": 698650
    },
    {
      "epoch": 2.448629116767779,
      "grad_norm": 2.640625,
      "learning_rate": 1.0210648743091091e-05,
      "loss": 0.7887,
      "step": 698660
    },
    {
      "epoch": 2.4486641642746743,
      "grad_norm": 2.71875,
      "learning_rate": 1.020999971442739e-05,
      "loss": 0.8461,
      "step": 698670
    },
    {
      "epoch": 2.44869921178157,
      "grad_norm": 2.5625,
      "learning_rate": 1.0209350685763687e-05,
      "loss": 0.8349,
      "step": 698680
    },
    {
      "epoch": 2.4487342592884653,
      "grad_norm": 3.640625,
      "learning_rate": 1.0208701657099985e-05,
      "loss": 0.8766,
      "step": 698690
    },
    {
      "epoch": 2.448769306795361,
      "grad_norm": 2.671875,
      "learning_rate": 1.0208052628436282e-05,
      "loss": 0.786,
      "step": 698700
    },
    {
      "epoch": 2.448804354302257,
      "grad_norm": 2.9375,
      "learning_rate": 1.020740359977258e-05,
      "loss": 0.7678,
      "step": 698710
    },
    {
      "epoch": 2.448839401809152,
      "grad_norm": 2.75,
      "learning_rate": 1.020675457110888e-05,
      "loss": 0.7646,
      "step": 698720
    },
    {
      "epoch": 2.448874449316048,
      "grad_norm": 3.125,
      "learning_rate": 1.0206105542445177e-05,
      "loss": 0.7711,
      "step": 698730
    },
    {
      "epoch": 2.4489094968229437,
      "grad_norm": 3.015625,
      "learning_rate": 1.0205456513781475e-05,
      "loss": 0.8271,
      "step": 698740
    },
    {
      "epoch": 2.448944544329839,
      "grad_norm": 3.140625,
      "learning_rate": 1.0204807485117773e-05,
      "loss": 0.7114,
      "step": 698750
    },
    {
      "epoch": 2.4489795918367347,
      "grad_norm": 2.9375,
      "learning_rate": 1.0204158456454071e-05,
      "loss": 0.803,
      "step": 698760
    },
    {
      "epoch": 2.4490146393436305,
      "grad_norm": 4.90625,
      "learning_rate": 1.020350942779037e-05,
      "loss": 0.774,
      "step": 698770
    },
    {
      "epoch": 2.449049686850526,
      "grad_norm": 2.609375,
      "learning_rate": 1.0202860399126667e-05,
      "loss": 0.7971,
      "step": 698780
    },
    {
      "epoch": 2.4490847343574216,
      "grad_norm": 2.71875,
      "learning_rate": 1.0202211370462967e-05,
      "loss": 0.8217,
      "step": 698790
    },
    {
      "epoch": 2.449119781864317,
      "grad_norm": 3.46875,
      "learning_rate": 1.0201562341799263e-05,
      "loss": 0.8128,
      "step": 698800
    },
    {
      "epoch": 2.4491548293712126,
      "grad_norm": 3.3125,
      "learning_rate": 1.0200913313135561e-05,
      "loss": 0.9517,
      "step": 698810
    },
    {
      "epoch": 2.4491898768781084,
      "grad_norm": 2.78125,
      "learning_rate": 1.020026428447186e-05,
      "loss": 0.7619,
      "step": 698820
    },
    {
      "epoch": 2.4492249243850037,
      "grad_norm": 2.96875,
      "learning_rate": 1.0199615255808157e-05,
      "loss": 0.789,
      "step": 698830
    },
    {
      "epoch": 2.4492599718918995,
      "grad_norm": 2.796875,
      "learning_rate": 1.0198966227144457e-05,
      "loss": 0.7936,
      "step": 698840
    },
    {
      "epoch": 2.4492950193987952,
      "grad_norm": 3.375,
      "learning_rate": 1.0198317198480755e-05,
      "loss": 0.8539,
      "step": 698850
    },
    {
      "epoch": 2.4493300669056906,
      "grad_norm": 2.71875,
      "learning_rate": 1.0197668169817053e-05,
      "loss": 0.8147,
      "step": 698860
    },
    {
      "epoch": 2.4493651144125863,
      "grad_norm": 3.453125,
      "learning_rate": 1.0197019141153351e-05,
      "loss": 0.7839,
      "step": 698870
    },
    {
      "epoch": 2.449400161919482,
      "grad_norm": 3.109375,
      "learning_rate": 1.0196370112489649e-05,
      "loss": 0.7906,
      "step": 698880
    },
    {
      "epoch": 2.4494352094263774,
      "grad_norm": 3.125,
      "learning_rate": 1.0195721083825945e-05,
      "loss": 0.7689,
      "step": 698890
    },
    {
      "epoch": 2.449470256933273,
      "grad_norm": 2.59375,
      "learning_rate": 1.0195072055162245e-05,
      "loss": 0.7377,
      "step": 698900
    },
    {
      "epoch": 2.4495053044401685,
      "grad_norm": 2.578125,
      "learning_rate": 1.0194423026498543e-05,
      "loss": 0.7567,
      "step": 698910
    },
    {
      "epoch": 2.449540351947064,
      "grad_norm": 2.828125,
      "learning_rate": 1.0193773997834841e-05,
      "loss": 0.8149,
      "step": 698920
    },
    {
      "epoch": 2.44957539945396,
      "grad_norm": 3.265625,
      "learning_rate": 1.0193124969171139e-05,
      "loss": 0.839,
      "step": 698930
    },
    {
      "epoch": 2.4496104469608553,
      "grad_norm": 3.140625,
      "learning_rate": 1.0192475940507437e-05,
      "loss": 0.8254,
      "step": 698940
    },
    {
      "epoch": 2.449645494467751,
      "grad_norm": 2.953125,
      "learning_rate": 1.0191826911843735e-05,
      "loss": 0.8673,
      "step": 698950
    },
    {
      "epoch": 2.449680541974647,
      "grad_norm": 2.90625,
      "learning_rate": 1.0191177883180033e-05,
      "loss": 0.824,
      "step": 698960
    },
    {
      "epoch": 2.449715589481542,
      "grad_norm": 2.671875,
      "learning_rate": 1.0190528854516333e-05,
      "loss": 0.7724,
      "step": 698970
    },
    {
      "epoch": 2.449750636988438,
      "grad_norm": 2.859375,
      "learning_rate": 1.018987982585263e-05,
      "loss": 0.8351,
      "step": 698980
    },
    {
      "epoch": 2.4497856844953336,
      "grad_norm": 3.015625,
      "learning_rate": 1.0189230797188927e-05,
      "loss": 0.8445,
      "step": 698990
    },
    {
      "epoch": 2.449820732002229,
      "grad_norm": 2.96875,
      "learning_rate": 1.0188581768525225e-05,
      "loss": 0.8261,
      "step": 699000
    },
    {
      "epoch": 2.4498557795091247,
      "grad_norm": 2.703125,
      "learning_rate": 1.0187932739861523e-05,
      "loss": 0.7777,
      "step": 699010
    },
    {
      "epoch": 2.44989082701602,
      "grad_norm": 3.09375,
      "learning_rate": 1.0187283711197821e-05,
      "loss": 0.7749,
      "step": 699020
    },
    {
      "epoch": 2.4499258745229158,
      "grad_norm": 3.046875,
      "learning_rate": 1.018663468253412e-05,
      "loss": 0.7962,
      "step": 699030
    },
    {
      "epoch": 2.4499609220298115,
      "grad_norm": 2.96875,
      "learning_rate": 1.0185985653870419e-05,
      "loss": 0.7456,
      "step": 699040
    },
    {
      "epoch": 2.449995969536707,
      "grad_norm": 2.703125,
      "learning_rate": 1.0185336625206717e-05,
      "loss": 0.7607,
      "step": 699050
    },
    {
      "epoch": 2.4500310170436026,
      "grad_norm": 2.71875,
      "learning_rate": 1.0184687596543015e-05,
      "loss": 0.7963,
      "step": 699060
    },
    {
      "epoch": 2.4500660645504984,
      "grad_norm": 2.625,
      "learning_rate": 1.0184038567879313e-05,
      "loss": 0.7236,
      "step": 699070
    },
    {
      "epoch": 2.4501011120573937,
      "grad_norm": 2.96875,
      "learning_rate": 1.018338953921561e-05,
      "loss": 0.8574,
      "step": 699080
    },
    {
      "epoch": 2.4501361595642894,
      "grad_norm": 3.171875,
      "learning_rate": 1.0182740510551909e-05,
      "loss": 0.8503,
      "step": 699090
    },
    {
      "epoch": 2.450171207071185,
      "grad_norm": 3.171875,
      "learning_rate": 1.0182091481888207e-05,
      "loss": 0.7927,
      "step": 699100
    },
    {
      "epoch": 2.4502062545780805,
      "grad_norm": 2.859375,
      "learning_rate": 1.0181442453224505e-05,
      "loss": 0.7951,
      "step": 699110
    },
    {
      "epoch": 2.4502413020849763,
      "grad_norm": 2.96875,
      "learning_rate": 1.0180793424560803e-05,
      "loss": 0.7801,
      "step": 699120
    },
    {
      "epoch": 2.4502763495918716,
      "grad_norm": 2.953125,
      "learning_rate": 1.01801443958971e-05,
      "loss": 0.7988,
      "step": 699130
    },
    {
      "epoch": 2.4503113970987673,
      "grad_norm": 3.015625,
      "learning_rate": 1.0179495367233399e-05,
      "loss": 0.8127,
      "step": 699140
    },
    {
      "epoch": 2.450346444605663,
      "grad_norm": 3.09375,
      "learning_rate": 1.0178846338569698e-05,
      "loss": 0.7455,
      "step": 699150
    },
    {
      "epoch": 2.4503814921125584,
      "grad_norm": 3.0625,
      "learning_rate": 1.0178197309905996e-05,
      "loss": 0.7531,
      "step": 699160
    },
    {
      "epoch": 2.450416539619454,
      "grad_norm": 3.171875,
      "learning_rate": 1.0177548281242293e-05,
      "loss": 0.8097,
      "step": 699170
    },
    {
      "epoch": 2.45045158712635,
      "grad_norm": 2.4375,
      "learning_rate": 1.017689925257859e-05,
      "loss": 0.7903,
      "step": 699180
    },
    {
      "epoch": 2.4504866346332452,
      "grad_norm": 2.828125,
      "learning_rate": 1.0176250223914889e-05,
      "loss": 0.8249,
      "step": 699190
    },
    {
      "epoch": 2.450521682140141,
      "grad_norm": 2.890625,
      "learning_rate": 1.0175601195251187e-05,
      "loss": 0.7044,
      "step": 699200
    },
    {
      "epoch": 2.4505567296470367,
      "grad_norm": 2.65625,
      "learning_rate": 1.0174952166587486e-05,
      "loss": 0.7684,
      "step": 699210
    },
    {
      "epoch": 2.450591777153932,
      "grad_norm": 2.765625,
      "learning_rate": 1.0174303137923784e-05,
      "loss": 0.7692,
      "step": 699220
    },
    {
      "epoch": 2.450626824660828,
      "grad_norm": 3.03125,
      "learning_rate": 1.0173654109260082e-05,
      "loss": 0.9076,
      "step": 699230
    },
    {
      "epoch": 2.450661872167723,
      "grad_norm": 2.3125,
      "learning_rate": 1.017300508059638e-05,
      "loss": 0.737,
      "step": 699240
    },
    {
      "epoch": 2.450696919674619,
      "grad_norm": 3.34375,
      "learning_rate": 1.0172356051932678e-05,
      "loss": 0.8536,
      "step": 699250
    },
    {
      "epoch": 2.4507319671815146,
      "grad_norm": 2.890625,
      "learning_rate": 1.0171707023268976e-05,
      "loss": 0.7937,
      "step": 699260
    },
    {
      "epoch": 2.45076701468841,
      "grad_norm": 2.921875,
      "learning_rate": 1.0171057994605274e-05,
      "loss": 0.7904,
      "step": 699270
    },
    {
      "epoch": 2.4508020621953057,
      "grad_norm": 3.203125,
      "learning_rate": 1.0170408965941572e-05,
      "loss": 0.931,
      "step": 699280
    },
    {
      "epoch": 2.4508371097022015,
      "grad_norm": 2.828125,
      "learning_rate": 1.016975993727787e-05,
      "loss": 0.8533,
      "step": 699290
    },
    {
      "epoch": 2.450872157209097,
      "grad_norm": 3.0,
      "learning_rate": 1.0169110908614168e-05,
      "loss": 0.7948,
      "step": 699300
    },
    {
      "epoch": 2.4509072047159925,
      "grad_norm": 2.734375,
      "learning_rate": 1.0168461879950466e-05,
      "loss": 0.8586,
      "step": 699310
    },
    {
      "epoch": 2.4509422522228883,
      "grad_norm": 3.0,
      "learning_rate": 1.0167812851286764e-05,
      "loss": 0.8086,
      "step": 699320
    },
    {
      "epoch": 2.4509772997297836,
      "grad_norm": 3.171875,
      "learning_rate": 1.0167163822623062e-05,
      "loss": 0.8428,
      "step": 699330
    },
    {
      "epoch": 2.4510123472366794,
      "grad_norm": 2.796875,
      "learning_rate": 1.0166514793959362e-05,
      "loss": 0.7843,
      "step": 699340
    },
    {
      "epoch": 2.4510473947435747,
      "grad_norm": 2.84375,
      "learning_rate": 1.016586576529566e-05,
      "loss": 0.7493,
      "step": 699350
    },
    {
      "epoch": 2.4510824422504705,
      "grad_norm": 2.84375,
      "learning_rate": 1.0165216736631956e-05,
      "loss": 0.8054,
      "step": 699360
    },
    {
      "epoch": 2.451117489757366,
      "grad_norm": 3.140625,
      "learning_rate": 1.0164567707968254e-05,
      "loss": 0.8215,
      "step": 699370
    },
    {
      "epoch": 2.4511525372642615,
      "grad_norm": 2.734375,
      "learning_rate": 1.0163918679304552e-05,
      "loss": 0.7943,
      "step": 699380
    },
    {
      "epoch": 2.4511875847711573,
      "grad_norm": 2.96875,
      "learning_rate": 1.0163269650640852e-05,
      "loss": 0.784,
      "step": 699390
    },
    {
      "epoch": 2.451222632278053,
      "grad_norm": 2.828125,
      "learning_rate": 1.016262062197715e-05,
      "loss": 0.8379,
      "step": 699400
    },
    {
      "epoch": 2.4512576797849484,
      "grad_norm": 3.1875,
      "learning_rate": 1.0161971593313448e-05,
      "loss": 0.7982,
      "step": 699410
    },
    {
      "epoch": 2.451292727291844,
      "grad_norm": 3.0625,
      "learning_rate": 1.0161322564649746e-05,
      "loss": 0.7927,
      "step": 699420
    },
    {
      "epoch": 2.45132777479874,
      "grad_norm": 2.8125,
      "learning_rate": 1.0160673535986044e-05,
      "loss": 0.7899,
      "step": 699430
    },
    {
      "epoch": 2.451362822305635,
      "grad_norm": 2.9375,
      "learning_rate": 1.0160024507322342e-05,
      "loss": 0.8445,
      "step": 699440
    },
    {
      "epoch": 2.451397869812531,
      "grad_norm": 3.0,
      "learning_rate": 1.015937547865864e-05,
      "loss": 0.7912,
      "step": 699450
    },
    {
      "epoch": 2.4514329173194263,
      "grad_norm": 2.953125,
      "learning_rate": 1.0158726449994938e-05,
      "loss": 0.7018,
      "step": 699460
    },
    {
      "epoch": 2.451467964826322,
      "grad_norm": 3.3125,
      "learning_rate": 1.0158077421331236e-05,
      "loss": 0.7973,
      "step": 699470
    },
    {
      "epoch": 2.4515030123332178,
      "grad_norm": 3.484375,
      "learning_rate": 1.0157428392667534e-05,
      "loss": 0.8433,
      "step": 699480
    },
    {
      "epoch": 2.451538059840113,
      "grad_norm": 2.984375,
      "learning_rate": 1.0156779364003832e-05,
      "loss": 0.8621,
      "step": 699490
    },
    {
      "epoch": 2.451573107347009,
      "grad_norm": 2.859375,
      "learning_rate": 1.015613033534013e-05,
      "loss": 0.8124,
      "step": 699500
    },
    {
      "epoch": 2.4516081548539046,
      "grad_norm": 3.34375,
      "learning_rate": 1.0155481306676428e-05,
      "loss": 0.8546,
      "step": 699510
    },
    {
      "epoch": 2.4516432023608,
      "grad_norm": 2.984375,
      "learning_rate": 1.0154832278012728e-05,
      "loss": 0.7924,
      "step": 699520
    },
    {
      "epoch": 2.4516782498676957,
      "grad_norm": 3.140625,
      "learning_rate": 1.0154183249349026e-05,
      "loss": 0.8736,
      "step": 699530
    },
    {
      "epoch": 2.4517132973745914,
      "grad_norm": 2.9375,
      "learning_rate": 1.0153534220685324e-05,
      "loss": 0.7982,
      "step": 699540
    },
    {
      "epoch": 2.4517483448814867,
      "grad_norm": 3.109375,
      "learning_rate": 1.015288519202162e-05,
      "loss": 0.7774,
      "step": 699550
    },
    {
      "epoch": 2.4517833923883825,
      "grad_norm": 2.625,
      "learning_rate": 1.0152236163357918e-05,
      "loss": 0.8188,
      "step": 699560
    },
    {
      "epoch": 2.451818439895278,
      "grad_norm": 3.0625,
      "learning_rate": 1.0151587134694216e-05,
      "loss": 0.8547,
      "step": 699570
    },
    {
      "epoch": 2.4518534874021736,
      "grad_norm": 2.671875,
      "learning_rate": 1.0150938106030516e-05,
      "loss": 0.6791,
      "step": 699580
    },
    {
      "epoch": 2.4518885349090693,
      "grad_norm": 2.609375,
      "learning_rate": 1.0150289077366814e-05,
      "loss": 0.8015,
      "step": 699590
    },
    {
      "epoch": 2.451923582415965,
      "grad_norm": 3.140625,
      "learning_rate": 1.0149640048703112e-05,
      "loss": 0.776,
      "step": 699600
    },
    {
      "epoch": 2.4519586299228604,
      "grad_norm": 2.734375,
      "learning_rate": 1.014899102003941e-05,
      "loss": 0.8369,
      "step": 699610
    },
    {
      "epoch": 2.451993677429756,
      "grad_norm": 3.140625,
      "learning_rate": 1.0148341991375708e-05,
      "loss": 0.7715,
      "step": 699620
    },
    {
      "epoch": 2.4520287249366515,
      "grad_norm": 2.515625,
      "learning_rate": 1.0147692962712006e-05,
      "loss": 0.8805,
      "step": 699630
    },
    {
      "epoch": 2.4520637724435472,
      "grad_norm": 2.625,
      "learning_rate": 1.0147043934048304e-05,
      "loss": 0.7436,
      "step": 699640
    },
    {
      "epoch": 2.452098819950443,
      "grad_norm": 2.734375,
      "learning_rate": 1.0146394905384602e-05,
      "loss": 0.8344,
      "step": 699650
    },
    {
      "epoch": 2.4521338674573383,
      "grad_norm": 2.984375,
      "learning_rate": 1.01457458767209e-05,
      "loss": 0.7486,
      "step": 699660
    },
    {
      "epoch": 2.452168914964234,
      "grad_norm": 2.734375,
      "learning_rate": 1.0145096848057198e-05,
      "loss": 0.7855,
      "step": 699670
    },
    {
      "epoch": 2.4522039624711294,
      "grad_norm": 3.390625,
      "learning_rate": 1.0144447819393496e-05,
      "loss": 0.8534,
      "step": 699680
    },
    {
      "epoch": 2.452239009978025,
      "grad_norm": 3.03125,
      "learning_rate": 1.0143798790729794e-05,
      "loss": 0.8931,
      "step": 699690
    },
    {
      "epoch": 2.452274057484921,
      "grad_norm": 3.296875,
      "learning_rate": 1.0143149762066093e-05,
      "loss": 0.8429,
      "step": 699700
    },
    {
      "epoch": 2.4523091049918166,
      "grad_norm": 2.390625,
      "learning_rate": 1.0142500733402391e-05,
      "loss": 0.7974,
      "step": 699710
    },
    {
      "epoch": 2.452344152498712,
      "grad_norm": 3.015625,
      "learning_rate": 1.014185170473869e-05,
      "loss": 0.8081,
      "step": 699720
    },
    {
      "epoch": 2.4523792000056077,
      "grad_norm": 2.34375,
      "learning_rate": 1.0141202676074987e-05,
      "loss": 0.8305,
      "step": 699730
    },
    {
      "epoch": 2.452414247512503,
      "grad_norm": 3.375,
      "learning_rate": 1.0140553647411284e-05,
      "loss": 0.7161,
      "step": 699740
    },
    {
      "epoch": 2.452449295019399,
      "grad_norm": 2.984375,
      "learning_rate": 1.0139904618747582e-05,
      "loss": 0.7802,
      "step": 699750
    },
    {
      "epoch": 2.4524843425262945,
      "grad_norm": 2.5,
      "learning_rate": 1.0139255590083881e-05,
      "loss": 0.8222,
      "step": 699760
    },
    {
      "epoch": 2.45251939003319,
      "grad_norm": 2.921875,
      "learning_rate": 1.013860656142018e-05,
      "loss": 0.7947,
      "step": 699770
    },
    {
      "epoch": 2.4525544375400856,
      "grad_norm": 2.734375,
      "learning_rate": 1.0137957532756477e-05,
      "loss": 0.8303,
      "step": 699780
    },
    {
      "epoch": 2.4525894850469814,
      "grad_norm": 3.109375,
      "learning_rate": 1.0137308504092775e-05,
      "loss": 0.7791,
      "step": 699790
    },
    {
      "epoch": 2.4526245325538767,
      "grad_norm": 2.859375,
      "learning_rate": 1.0136659475429073e-05,
      "loss": 0.8001,
      "step": 699800
    },
    {
      "epoch": 2.4526595800607724,
      "grad_norm": 2.71875,
      "learning_rate": 1.0136010446765371e-05,
      "loss": 0.7667,
      "step": 699810
    },
    {
      "epoch": 2.452694627567668,
      "grad_norm": 3.203125,
      "learning_rate": 1.013536141810167e-05,
      "loss": 0.7775,
      "step": 699820
    },
    {
      "epoch": 2.4527296750745635,
      "grad_norm": 2.671875,
      "learning_rate": 1.0134712389437967e-05,
      "loss": 0.7946,
      "step": 699830
    },
    {
      "epoch": 2.4527647225814593,
      "grad_norm": 2.296875,
      "learning_rate": 1.0134063360774265e-05,
      "loss": 0.7907,
      "step": 699840
    },
    {
      "epoch": 2.4527997700883546,
      "grad_norm": 3.0,
      "learning_rate": 1.0133414332110563e-05,
      "loss": 0.8502,
      "step": 699850
    },
    {
      "epoch": 2.4528348175952503,
      "grad_norm": 3.0,
      "learning_rate": 1.0132765303446861e-05,
      "loss": 0.8116,
      "step": 699860
    },
    {
      "epoch": 2.452869865102146,
      "grad_norm": 2.953125,
      "learning_rate": 1.013211627478316e-05,
      "loss": 0.8076,
      "step": 699870
    },
    {
      "epoch": 2.4529049126090414,
      "grad_norm": 2.875,
      "learning_rate": 1.0131467246119457e-05,
      "loss": 0.837,
      "step": 699880
    },
    {
      "epoch": 2.452939960115937,
      "grad_norm": 3.453125,
      "learning_rate": 1.0130818217455757e-05,
      "loss": 0.7112,
      "step": 699890
    },
    {
      "epoch": 2.452975007622833,
      "grad_norm": 2.65625,
      "learning_rate": 1.0130169188792055e-05,
      "loss": 0.7562,
      "step": 699900
    },
    {
      "epoch": 2.4530100551297283,
      "grad_norm": 2.796875,
      "learning_rate": 1.0129520160128353e-05,
      "loss": 0.7851,
      "step": 699910
    },
    {
      "epoch": 2.453045102636624,
      "grad_norm": 2.796875,
      "learning_rate": 1.0128871131464651e-05,
      "loss": 0.8587,
      "step": 699920
    },
    {
      "epoch": 2.4530801501435198,
      "grad_norm": 2.953125,
      "learning_rate": 1.0128222102800947e-05,
      "loss": 0.7693,
      "step": 699930
    },
    {
      "epoch": 2.453115197650415,
      "grad_norm": 2.71875,
      "learning_rate": 1.0127573074137247e-05,
      "loss": 0.8068,
      "step": 699940
    },
    {
      "epoch": 2.453150245157311,
      "grad_norm": 3.46875,
      "learning_rate": 1.0126924045473545e-05,
      "loss": 0.8349,
      "step": 699950
    },
    {
      "epoch": 2.453185292664206,
      "grad_norm": 3.171875,
      "learning_rate": 1.0126275016809843e-05,
      "loss": 0.8369,
      "step": 699960
    },
    {
      "epoch": 2.453220340171102,
      "grad_norm": 2.765625,
      "learning_rate": 1.0125625988146141e-05,
      "loss": 0.8603,
      "step": 699970
    },
    {
      "epoch": 2.4532553876779977,
      "grad_norm": 3.203125,
      "learning_rate": 1.0124976959482439e-05,
      "loss": 0.8323,
      "step": 699980
    },
    {
      "epoch": 2.453290435184893,
      "grad_norm": 2.53125,
      "learning_rate": 1.0124327930818737e-05,
      "loss": 0.8051,
      "step": 699990
    },
    {
      "epoch": 2.4533254826917887,
      "grad_norm": 2.8125,
      "learning_rate": 1.0123678902155035e-05,
      "loss": 0.7811,
      "step": 700000
    },
    {
      "epoch": 2.4533254826917887,
      "eval_loss": 0.7583553791046143,
      "eval_runtime": 553.2893,
      "eval_samples_per_second": 687.59,
      "eval_steps_per_second": 57.299,
      "step": 700000
    },
    {
      "epoch": 2.4533605301986845,
      "grad_norm": 3.203125,
      "learning_rate": 1.0123029873491335e-05,
      "loss": 0.8005,
      "step": 700010
    },
    {
      "epoch": 2.45339557770558,
      "grad_norm": 2.5625,
      "learning_rate": 1.0122380844827631e-05,
      "loss": 0.8008,
      "step": 700020
    },
    {
      "epoch": 2.4534306252124756,
      "grad_norm": 3.1875,
      "learning_rate": 1.0121731816163929e-05,
      "loss": 0.8595,
      "step": 700030
    },
    {
      "epoch": 2.4534656727193713,
      "grad_norm": 2.65625,
      "learning_rate": 1.0121082787500227e-05,
      "loss": 0.7675,
      "step": 700040
    },
    {
      "epoch": 2.4535007202262666,
      "grad_norm": 3.1875,
      "learning_rate": 1.0120433758836525e-05,
      "loss": 0.7998,
      "step": 700050
    },
    {
      "epoch": 2.4535357677331624,
      "grad_norm": 3.59375,
      "learning_rate": 1.0119784730172823e-05,
      "loss": 0.8447,
      "step": 700060
    },
    {
      "epoch": 2.4535708152400577,
      "grad_norm": 3.265625,
      "learning_rate": 1.0119135701509123e-05,
      "loss": 0.8002,
      "step": 700070
    },
    {
      "epoch": 2.4536058627469535,
      "grad_norm": 3.40625,
      "learning_rate": 1.011848667284542e-05,
      "loss": 0.8456,
      "step": 700080
    },
    {
      "epoch": 2.4536409102538492,
      "grad_norm": 3.0,
      "learning_rate": 1.0117837644181719e-05,
      "loss": 0.9064,
      "step": 700090
    },
    {
      "epoch": 2.4536759577607445,
      "grad_norm": 2.828125,
      "learning_rate": 1.0117188615518017e-05,
      "loss": 0.7092,
      "step": 700100
    },
    {
      "epoch": 2.4537110052676403,
      "grad_norm": 2.90625,
      "learning_rate": 1.0116539586854313e-05,
      "loss": 0.7176,
      "step": 700110
    },
    {
      "epoch": 2.453746052774536,
      "grad_norm": 2.828125,
      "learning_rate": 1.0115890558190611e-05,
      "loss": 0.7727,
      "step": 700120
    },
    {
      "epoch": 2.4537811002814314,
      "grad_norm": 3.34375,
      "learning_rate": 1.011524152952691e-05,
      "loss": 0.8621,
      "step": 700130
    },
    {
      "epoch": 2.453816147788327,
      "grad_norm": 2.71875,
      "learning_rate": 1.0114592500863209e-05,
      "loss": 0.8346,
      "step": 700140
    },
    {
      "epoch": 2.453851195295223,
      "grad_norm": 3.0625,
      "learning_rate": 1.0113943472199507e-05,
      "loss": 0.8063,
      "step": 700150
    },
    {
      "epoch": 2.453886242802118,
      "grad_norm": 3.65625,
      "learning_rate": 1.0113294443535805e-05,
      "loss": 0.8838,
      "step": 700160
    },
    {
      "epoch": 2.453921290309014,
      "grad_norm": 2.9375,
      "learning_rate": 1.0112645414872103e-05,
      "loss": 0.8225,
      "step": 700170
    },
    {
      "epoch": 2.4539563378159093,
      "grad_norm": 2.875,
      "learning_rate": 1.01119963862084e-05,
      "loss": 0.8042,
      "step": 700180
    },
    {
      "epoch": 2.453991385322805,
      "grad_norm": 3.046875,
      "learning_rate": 1.0111347357544699e-05,
      "loss": 0.8338,
      "step": 700190
    },
    {
      "epoch": 2.454026432829701,
      "grad_norm": 2.625,
      "learning_rate": 1.0110698328880998e-05,
      "loss": 0.7274,
      "step": 700200
    },
    {
      "epoch": 2.454061480336596,
      "grad_norm": 2.890625,
      "learning_rate": 1.0110049300217295e-05,
      "loss": 0.7897,
      "step": 700210
    },
    {
      "epoch": 2.454096527843492,
      "grad_norm": 3.25,
      "learning_rate": 1.0109400271553593e-05,
      "loss": 0.7708,
      "step": 700220
    },
    {
      "epoch": 2.4541315753503876,
      "grad_norm": 2.921875,
      "learning_rate": 1.010875124288989e-05,
      "loss": 0.7619,
      "step": 700230
    },
    {
      "epoch": 2.454166622857283,
      "grad_norm": 2.484375,
      "learning_rate": 1.0108102214226189e-05,
      "loss": 0.7227,
      "step": 700240
    },
    {
      "epoch": 2.4542016703641787,
      "grad_norm": 2.6875,
      "learning_rate": 1.0107453185562488e-05,
      "loss": 0.7969,
      "step": 700250
    },
    {
      "epoch": 2.4542367178710744,
      "grad_norm": 3.015625,
      "learning_rate": 1.0106804156898786e-05,
      "loss": 0.7835,
      "step": 700260
    },
    {
      "epoch": 2.4542717653779698,
      "grad_norm": 2.890625,
      "learning_rate": 1.0106155128235084e-05,
      "loss": 0.7546,
      "step": 700270
    },
    {
      "epoch": 2.4543068128848655,
      "grad_norm": 3.203125,
      "learning_rate": 1.0105506099571382e-05,
      "loss": 0.8323,
      "step": 700280
    },
    {
      "epoch": 2.454341860391761,
      "grad_norm": 2.75,
      "learning_rate": 1.010485707090768e-05,
      "loss": 0.7511,
      "step": 700290
    },
    {
      "epoch": 2.4543769078986566,
      "grad_norm": 3.1875,
      "learning_rate": 1.0104208042243977e-05,
      "loss": 0.8277,
      "step": 700300
    },
    {
      "epoch": 2.4544119554055523,
      "grad_norm": 2.734375,
      "learning_rate": 1.0103559013580276e-05,
      "loss": 0.745,
      "step": 700310
    },
    {
      "epoch": 2.4544470029124477,
      "grad_norm": 3.15625,
      "learning_rate": 1.0102909984916574e-05,
      "loss": 0.7784,
      "step": 700320
    },
    {
      "epoch": 2.4544820504193434,
      "grad_norm": 3.3125,
      "learning_rate": 1.0102260956252872e-05,
      "loss": 0.866,
      "step": 700330
    },
    {
      "epoch": 2.454517097926239,
      "grad_norm": 3.09375,
      "learning_rate": 1.010161192758917e-05,
      "loss": 0.8004,
      "step": 700340
    },
    {
      "epoch": 2.4545521454331345,
      "grad_norm": 2.953125,
      "learning_rate": 1.0100962898925468e-05,
      "loss": 0.8466,
      "step": 700350
    },
    {
      "epoch": 2.4545871929400302,
      "grad_norm": 3.5,
      "learning_rate": 1.0100313870261766e-05,
      "loss": 0.8663,
      "step": 700360
    },
    {
      "epoch": 2.454622240446926,
      "grad_norm": 2.984375,
      "learning_rate": 1.0099664841598064e-05,
      "loss": 0.7286,
      "step": 700370
    },
    {
      "epoch": 2.4546572879538213,
      "grad_norm": 2.75,
      "learning_rate": 1.0099015812934364e-05,
      "loss": 0.8678,
      "step": 700380
    },
    {
      "epoch": 2.454692335460717,
      "grad_norm": 2.78125,
      "learning_rate": 1.0098366784270662e-05,
      "loss": 0.7949,
      "step": 700390
    },
    {
      "epoch": 2.4547273829676124,
      "grad_norm": 3.125,
      "learning_rate": 1.0097717755606958e-05,
      "loss": 0.8116,
      "step": 700400
    },
    {
      "epoch": 2.454762430474508,
      "grad_norm": 3.0,
      "learning_rate": 1.0097068726943256e-05,
      "loss": 0.8645,
      "step": 700410
    },
    {
      "epoch": 2.454797477981404,
      "grad_norm": 2.765625,
      "learning_rate": 1.0096419698279554e-05,
      "loss": 0.7506,
      "step": 700420
    },
    {
      "epoch": 2.454832525488299,
      "grad_norm": 4.125,
      "learning_rate": 1.0095770669615852e-05,
      "loss": 0.7817,
      "step": 700430
    },
    {
      "epoch": 2.454867572995195,
      "grad_norm": 3.125,
      "learning_rate": 1.0095121640952152e-05,
      "loss": 0.8915,
      "step": 700440
    },
    {
      "epoch": 2.4549026205020907,
      "grad_norm": 3.46875,
      "learning_rate": 1.009447261228845e-05,
      "loss": 0.807,
      "step": 700450
    },
    {
      "epoch": 2.454937668008986,
      "grad_norm": 3.265625,
      "learning_rate": 1.0093823583624748e-05,
      "loss": 0.8113,
      "step": 700460
    },
    {
      "epoch": 2.454972715515882,
      "grad_norm": 3.015625,
      "learning_rate": 1.0093174554961046e-05,
      "loss": 0.834,
      "step": 700470
    },
    {
      "epoch": 2.4550077630227776,
      "grad_norm": 2.703125,
      "learning_rate": 1.0092525526297344e-05,
      "loss": 0.7534,
      "step": 700480
    },
    {
      "epoch": 2.455042810529673,
      "grad_norm": 2.21875,
      "learning_rate": 1.0091876497633642e-05,
      "loss": 0.7713,
      "step": 700490
    },
    {
      "epoch": 2.4550778580365686,
      "grad_norm": 2.390625,
      "learning_rate": 1.009122746896994e-05,
      "loss": 0.8039,
      "step": 700500
    },
    {
      "epoch": 2.455112905543464,
      "grad_norm": 3.078125,
      "learning_rate": 1.0090578440306238e-05,
      "loss": 0.8358,
      "step": 700510
    },
    {
      "epoch": 2.4551479530503597,
      "grad_norm": 2.171875,
      "learning_rate": 1.0089929411642536e-05,
      "loss": 0.7134,
      "step": 700520
    },
    {
      "epoch": 2.4551830005572555,
      "grad_norm": 2.640625,
      "learning_rate": 1.0089280382978834e-05,
      "loss": 0.7933,
      "step": 700530
    },
    {
      "epoch": 2.455218048064151,
      "grad_norm": 2.890625,
      "learning_rate": 1.0088631354315132e-05,
      "loss": 0.8518,
      "step": 700540
    },
    {
      "epoch": 2.4552530955710465,
      "grad_norm": 2.546875,
      "learning_rate": 1.008798232565143e-05,
      "loss": 0.7543,
      "step": 700550
    },
    {
      "epoch": 2.4552881430779423,
      "grad_norm": 2.96875,
      "learning_rate": 1.008733329698773e-05,
      "loss": 0.798,
      "step": 700560
    },
    {
      "epoch": 2.4553231905848376,
      "grad_norm": 2.984375,
      "learning_rate": 1.0086684268324028e-05,
      "loss": 0.7853,
      "step": 700570
    },
    {
      "epoch": 2.4553582380917334,
      "grad_norm": 2.75,
      "learning_rate": 1.0086035239660324e-05,
      "loss": 0.8291,
      "step": 700580
    },
    {
      "epoch": 2.455393285598629,
      "grad_norm": 3.359375,
      "learning_rate": 1.0085386210996622e-05,
      "loss": 0.8629,
      "step": 700590
    },
    {
      "epoch": 2.4554283331055244,
      "grad_norm": 2.5625,
      "learning_rate": 1.008473718233292e-05,
      "loss": 0.7778,
      "step": 700600
    },
    {
      "epoch": 2.45546338061242,
      "grad_norm": 3.328125,
      "learning_rate": 1.0084088153669218e-05,
      "loss": 0.8312,
      "step": 700610
    },
    {
      "epoch": 2.4554984281193155,
      "grad_norm": 3.171875,
      "learning_rate": 1.0083439125005518e-05,
      "loss": 0.9019,
      "step": 700620
    },
    {
      "epoch": 2.4555334756262113,
      "grad_norm": 2.8125,
      "learning_rate": 1.0082790096341816e-05,
      "loss": 0.8499,
      "step": 700630
    },
    {
      "epoch": 2.455568523133107,
      "grad_norm": 2.59375,
      "learning_rate": 1.0082141067678114e-05,
      "loss": 0.7769,
      "step": 700640
    },
    {
      "epoch": 2.4556035706400023,
      "grad_norm": 2.84375,
      "learning_rate": 1.0081492039014412e-05,
      "loss": 0.7865,
      "step": 700650
    },
    {
      "epoch": 2.455638618146898,
      "grad_norm": 3.28125,
      "learning_rate": 1.008084301035071e-05,
      "loss": 0.852,
      "step": 700660
    },
    {
      "epoch": 2.455673665653794,
      "grad_norm": 2.890625,
      "learning_rate": 1.0080193981687008e-05,
      "loss": 0.8235,
      "step": 700670
    },
    {
      "epoch": 2.455708713160689,
      "grad_norm": 3.15625,
      "learning_rate": 1.0079544953023306e-05,
      "loss": 0.8066,
      "step": 700680
    },
    {
      "epoch": 2.455743760667585,
      "grad_norm": 2.984375,
      "learning_rate": 1.0078895924359604e-05,
      "loss": 0.7557,
      "step": 700690
    },
    {
      "epoch": 2.4557788081744807,
      "grad_norm": 2.84375,
      "learning_rate": 1.0078246895695902e-05,
      "loss": 0.7834,
      "step": 700700
    },
    {
      "epoch": 2.455813855681376,
      "grad_norm": 2.796875,
      "learning_rate": 1.00775978670322e-05,
      "loss": 0.7625,
      "step": 700710
    },
    {
      "epoch": 2.4558489031882718,
      "grad_norm": 2.59375,
      "learning_rate": 1.0076948838368498e-05,
      "loss": 0.8479,
      "step": 700720
    },
    {
      "epoch": 2.455883950695167,
      "grad_norm": 2.90625,
      "learning_rate": 1.0076299809704796e-05,
      "loss": 0.8155,
      "step": 700730
    },
    {
      "epoch": 2.455918998202063,
      "grad_norm": 2.671875,
      "learning_rate": 1.0075650781041094e-05,
      "loss": 0.8134,
      "step": 700740
    },
    {
      "epoch": 2.4559540457089586,
      "grad_norm": 2.84375,
      "learning_rate": 1.0075001752377394e-05,
      "loss": 0.7497,
      "step": 700750
    },
    {
      "epoch": 2.455989093215854,
      "grad_norm": 2.875,
      "learning_rate": 1.0074352723713692e-05,
      "loss": 0.819,
      "step": 700760
    },
    {
      "epoch": 2.4560241407227497,
      "grad_norm": 3.03125,
      "learning_rate": 1.0073703695049988e-05,
      "loss": 0.8416,
      "step": 700770
    },
    {
      "epoch": 2.4560591882296454,
      "grad_norm": 3.21875,
      "learning_rate": 1.0073054666386286e-05,
      "loss": 0.812,
      "step": 700780
    },
    {
      "epoch": 2.4560942357365407,
      "grad_norm": 2.859375,
      "learning_rate": 1.0072405637722584e-05,
      "loss": 0.7474,
      "step": 700790
    },
    {
      "epoch": 2.4561292832434365,
      "grad_norm": 2.796875,
      "learning_rate": 1.0071756609058884e-05,
      "loss": 0.7892,
      "step": 700800
    },
    {
      "epoch": 2.4561643307503322,
      "grad_norm": 2.8125,
      "learning_rate": 1.0071107580395182e-05,
      "loss": 0.7699,
      "step": 700810
    },
    {
      "epoch": 2.4561993782572276,
      "grad_norm": 3.3125,
      "learning_rate": 1.007045855173148e-05,
      "loss": 0.8169,
      "step": 700820
    },
    {
      "epoch": 2.4562344257641233,
      "grad_norm": 3.265625,
      "learning_rate": 1.0069809523067778e-05,
      "loss": 0.8424,
      "step": 700830
    },
    {
      "epoch": 2.4562694732710186,
      "grad_norm": 3.0625,
      "learning_rate": 1.0069160494404076e-05,
      "loss": 0.858,
      "step": 700840
    },
    {
      "epoch": 2.4563045207779144,
      "grad_norm": 2.703125,
      "learning_rate": 1.0068511465740374e-05,
      "loss": 0.8097,
      "step": 700850
    },
    {
      "epoch": 2.45633956828481,
      "grad_norm": 3.25,
      "learning_rate": 1.0067862437076672e-05,
      "loss": 0.8412,
      "step": 700860
    },
    {
      "epoch": 2.456374615791706,
      "grad_norm": 2.859375,
      "learning_rate": 1.006721340841297e-05,
      "loss": 0.8587,
      "step": 700870
    },
    {
      "epoch": 2.456409663298601,
      "grad_norm": 2.25,
      "learning_rate": 1.0066564379749268e-05,
      "loss": 0.7204,
      "step": 700880
    },
    {
      "epoch": 2.456444710805497,
      "grad_norm": 3.140625,
      "learning_rate": 1.0065915351085566e-05,
      "loss": 0.8488,
      "step": 700890
    },
    {
      "epoch": 2.4564797583123923,
      "grad_norm": 2.671875,
      "learning_rate": 1.0065266322421864e-05,
      "loss": 0.7953,
      "step": 700900
    },
    {
      "epoch": 2.456514805819288,
      "grad_norm": 3.21875,
      "learning_rate": 1.0064617293758162e-05,
      "loss": 0.929,
      "step": 700910
    },
    {
      "epoch": 2.456549853326184,
      "grad_norm": 2.953125,
      "learning_rate": 1.006396826509446e-05,
      "loss": 0.8461,
      "step": 700920
    },
    {
      "epoch": 2.456584900833079,
      "grad_norm": 2.671875,
      "learning_rate": 1.006331923643076e-05,
      "loss": 0.7875,
      "step": 700930
    },
    {
      "epoch": 2.456619948339975,
      "grad_norm": 3.0625,
      "learning_rate": 1.0062670207767057e-05,
      "loss": 0.8624,
      "step": 700940
    },
    {
      "epoch": 2.45665499584687,
      "grad_norm": 2.609375,
      "learning_rate": 1.0062021179103355e-05,
      "loss": 0.8113,
      "step": 700950
    },
    {
      "epoch": 2.456690043353766,
      "grad_norm": 3.234375,
      "learning_rate": 1.0061372150439652e-05,
      "loss": 0.7127,
      "step": 700960
    },
    {
      "epoch": 2.4567250908606617,
      "grad_norm": 3.234375,
      "learning_rate": 1.006072312177595e-05,
      "loss": 0.8215,
      "step": 700970
    },
    {
      "epoch": 2.4567601383675575,
      "grad_norm": 2.65625,
      "learning_rate": 1.0060074093112248e-05,
      "loss": 0.7829,
      "step": 700980
    },
    {
      "epoch": 2.456795185874453,
      "grad_norm": 2.75,
      "learning_rate": 1.0059425064448547e-05,
      "loss": 0.8542,
      "step": 700990
    },
    {
      "epoch": 2.4568302333813485,
      "grad_norm": 3.078125,
      "learning_rate": 1.0058776035784845e-05,
      "loss": 0.8464,
      "step": 701000
    },
    {
      "epoch": 2.456865280888244,
      "grad_norm": 3.21875,
      "learning_rate": 1.0058127007121143e-05,
      "loss": 0.8988,
      "step": 701010
    },
    {
      "epoch": 2.4569003283951396,
      "grad_norm": 2.71875,
      "learning_rate": 1.0057477978457441e-05,
      "loss": 0.8257,
      "step": 701020
    },
    {
      "epoch": 2.4569353759020354,
      "grad_norm": 3.328125,
      "learning_rate": 1.005682894979374e-05,
      "loss": 0.9453,
      "step": 701030
    },
    {
      "epoch": 2.4569704234089307,
      "grad_norm": 2.515625,
      "learning_rate": 1.0056179921130037e-05,
      "loss": 0.7881,
      "step": 701040
    },
    {
      "epoch": 2.4570054709158264,
      "grad_norm": 2.921875,
      "learning_rate": 1.0055530892466335e-05,
      "loss": 0.791,
      "step": 701050
    },
    {
      "epoch": 2.4570405184227218,
      "grad_norm": 3.28125,
      "learning_rate": 1.0054881863802633e-05,
      "loss": 0.7866,
      "step": 701060
    },
    {
      "epoch": 2.4570755659296175,
      "grad_norm": 2.734375,
      "learning_rate": 1.0054232835138931e-05,
      "loss": 0.8183,
      "step": 701070
    },
    {
      "epoch": 2.4571106134365133,
      "grad_norm": 3.203125,
      "learning_rate": 1.005358380647523e-05,
      "loss": 0.8503,
      "step": 701080
    },
    {
      "epoch": 2.457145660943409,
      "grad_norm": 2.953125,
      "learning_rate": 1.0052934777811527e-05,
      "loss": 0.8892,
      "step": 701090
    },
    {
      "epoch": 2.4571807084503043,
      "grad_norm": 3.171875,
      "learning_rate": 1.0052285749147825e-05,
      "loss": 0.8037,
      "step": 701100
    },
    {
      "epoch": 2.4572157559572,
      "grad_norm": 3.078125,
      "learning_rate": 1.0051636720484125e-05,
      "loss": 0.8284,
      "step": 701110
    },
    {
      "epoch": 2.4572508034640954,
      "grad_norm": 2.484375,
      "learning_rate": 1.0050987691820423e-05,
      "loss": 0.8029,
      "step": 701120
    },
    {
      "epoch": 2.457285850970991,
      "grad_norm": 2.8125,
      "learning_rate": 1.0050338663156721e-05,
      "loss": 0.8274,
      "step": 701130
    },
    {
      "epoch": 2.457320898477887,
      "grad_norm": 2.734375,
      "learning_rate": 1.0049689634493019e-05,
      "loss": 0.8609,
      "step": 701140
    },
    {
      "epoch": 2.4573559459847822,
      "grad_norm": 3.046875,
      "learning_rate": 1.0049040605829315e-05,
      "loss": 0.8398,
      "step": 701150
    },
    {
      "epoch": 2.457390993491678,
      "grad_norm": 2.890625,
      "learning_rate": 1.0048391577165613e-05,
      "loss": 0.806,
      "step": 701160
    },
    {
      "epoch": 2.4574260409985738,
      "grad_norm": 2.640625,
      "learning_rate": 1.0047742548501913e-05,
      "loss": 0.8188,
      "step": 701170
    },
    {
      "epoch": 2.457461088505469,
      "grad_norm": 2.9375,
      "learning_rate": 1.0047093519838211e-05,
      "loss": 0.7519,
      "step": 701180
    },
    {
      "epoch": 2.457496136012365,
      "grad_norm": 2.921875,
      "learning_rate": 1.0046444491174509e-05,
      "loss": 0.7608,
      "step": 701190
    },
    {
      "epoch": 2.4575311835192606,
      "grad_norm": 2.671875,
      "learning_rate": 1.0045795462510807e-05,
      "loss": 0.7637,
      "step": 701200
    },
    {
      "epoch": 2.457566231026156,
      "grad_norm": 2.875,
      "learning_rate": 1.0045146433847105e-05,
      "loss": 0.7962,
      "step": 701210
    },
    {
      "epoch": 2.4576012785330517,
      "grad_norm": 2.9375,
      "learning_rate": 1.0044497405183403e-05,
      "loss": 0.8228,
      "step": 701220
    },
    {
      "epoch": 2.457636326039947,
      "grad_norm": 3.078125,
      "learning_rate": 1.0043848376519701e-05,
      "loss": 0.8014,
      "step": 701230
    },
    {
      "epoch": 2.4576713735468427,
      "grad_norm": 3.234375,
      "learning_rate": 1.0043199347855999e-05,
      "loss": 0.7979,
      "step": 701240
    },
    {
      "epoch": 2.4577064210537385,
      "grad_norm": 2.96875,
      "learning_rate": 1.0042550319192297e-05,
      "loss": 0.7561,
      "step": 701250
    },
    {
      "epoch": 2.457741468560634,
      "grad_norm": 2.609375,
      "learning_rate": 1.0041901290528595e-05,
      "loss": 0.8265,
      "step": 701260
    },
    {
      "epoch": 2.4577765160675296,
      "grad_norm": 2.765625,
      "learning_rate": 1.0041252261864893e-05,
      "loss": 0.825,
      "step": 701270
    },
    {
      "epoch": 2.4578115635744253,
      "grad_norm": 2.578125,
      "learning_rate": 1.0040603233201191e-05,
      "loss": 0.8389,
      "step": 701280
    },
    {
      "epoch": 2.4578466110813206,
      "grad_norm": 2.625,
      "learning_rate": 1.003995420453749e-05,
      "loss": 0.7842,
      "step": 701290
    },
    {
      "epoch": 2.4578816585882164,
      "grad_norm": 3.953125,
      "learning_rate": 1.0039305175873789e-05,
      "loss": 0.8817,
      "step": 701300
    },
    {
      "epoch": 2.457916706095112,
      "grad_norm": 2.6875,
      "learning_rate": 1.0038656147210087e-05,
      "loss": 0.7884,
      "step": 701310
    },
    {
      "epoch": 2.4579517536020075,
      "grad_norm": 2.5,
      "learning_rate": 1.0038007118546385e-05,
      "loss": 0.6883,
      "step": 701320
    },
    {
      "epoch": 2.457986801108903,
      "grad_norm": 2.71875,
      "learning_rate": 1.0037358089882683e-05,
      "loss": 0.7986,
      "step": 701330
    },
    {
      "epoch": 2.4580218486157985,
      "grad_norm": 3.375,
      "learning_rate": 1.0036709061218979e-05,
      "loss": 0.7883,
      "step": 701340
    },
    {
      "epoch": 2.4580568961226943,
      "grad_norm": 2.84375,
      "learning_rate": 1.0036060032555279e-05,
      "loss": 0.7364,
      "step": 701350
    },
    {
      "epoch": 2.45809194362959,
      "grad_norm": 3.171875,
      "learning_rate": 1.0035411003891577e-05,
      "loss": 0.7988,
      "step": 701360
    },
    {
      "epoch": 2.4581269911364854,
      "grad_norm": 2.671875,
      "learning_rate": 1.0034761975227875e-05,
      "loss": 0.8316,
      "step": 701370
    },
    {
      "epoch": 2.458162038643381,
      "grad_norm": 2.828125,
      "learning_rate": 1.0034112946564173e-05,
      "loss": 0.8362,
      "step": 701380
    },
    {
      "epoch": 2.458197086150277,
      "grad_norm": 2.640625,
      "learning_rate": 1.003346391790047e-05,
      "loss": 0.7544,
      "step": 701390
    },
    {
      "epoch": 2.458232133657172,
      "grad_norm": 2.71875,
      "learning_rate": 1.0032814889236769e-05,
      "loss": 0.8264,
      "step": 701400
    },
    {
      "epoch": 2.458267181164068,
      "grad_norm": 2.8125,
      "learning_rate": 1.0032165860573067e-05,
      "loss": 0.8702,
      "step": 701410
    },
    {
      "epoch": 2.4583022286709637,
      "grad_norm": 2.796875,
      "learning_rate": 1.0031516831909366e-05,
      "loss": 0.7771,
      "step": 701420
    },
    {
      "epoch": 2.458337276177859,
      "grad_norm": 2.890625,
      "learning_rate": 1.0030867803245663e-05,
      "loss": 0.8144,
      "step": 701430
    },
    {
      "epoch": 2.458372323684755,
      "grad_norm": 2.84375,
      "learning_rate": 1.003021877458196e-05,
      "loss": 0.7837,
      "step": 701440
    },
    {
      "epoch": 2.45840737119165,
      "grad_norm": 2.578125,
      "learning_rate": 1.0029569745918259e-05,
      "loss": 0.8387,
      "step": 701450
    },
    {
      "epoch": 2.458442418698546,
      "grad_norm": 2.953125,
      "learning_rate": 1.0028920717254557e-05,
      "loss": 0.8983,
      "step": 701460
    },
    {
      "epoch": 2.4584774662054416,
      "grad_norm": 2.921875,
      "learning_rate": 1.0028271688590855e-05,
      "loss": 0.8587,
      "step": 701470
    },
    {
      "epoch": 2.458512513712337,
      "grad_norm": 2.65625,
      "learning_rate": 1.0027622659927154e-05,
      "loss": 0.8043,
      "step": 701480
    },
    {
      "epoch": 2.4585475612192327,
      "grad_norm": 2.734375,
      "learning_rate": 1.0026973631263452e-05,
      "loss": 0.8346,
      "step": 701490
    },
    {
      "epoch": 2.4585826087261284,
      "grad_norm": 2.828125,
      "learning_rate": 1.002632460259975e-05,
      "loss": 0.7677,
      "step": 701500
    },
    {
      "epoch": 2.4586176562330238,
      "grad_norm": 2.796875,
      "learning_rate": 1.0025675573936048e-05,
      "loss": 0.7178,
      "step": 701510
    },
    {
      "epoch": 2.4586527037399195,
      "grad_norm": 2.65625,
      "learning_rate": 1.0025026545272346e-05,
      "loss": 0.7478,
      "step": 701520
    },
    {
      "epoch": 2.4586877512468153,
      "grad_norm": 3.015625,
      "learning_rate": 1.0024377516608644e-05,
      "loss": 0.8208,
      "step": 701530
    },
    {
      "epoch": 2.4587227987537106,
      "grad_norm": 2.421875,
      "learning_rate": 1.0023728487944942e-05,
      "loss": 0.7257,
      "step": 701540
    },
    {
      "epoch": 2.4587578462606063,
      "grad_norm": 2.96875,
      "learning_rate": 1.002307945928124e-05,
      "loss": 0.9404,
      "step": 701550
    },
    {
      "epoch": 2.4587928937675017,
      "grad_norm": 3.234375,
      "learning_rate": 1.0022430430617538e-05,
      "loss": 0.9042,
      "step": 701560
    },
    {
      "epoch": 2.4588279412743974,
      "grad_norm": 2.84375,
      "learning_rate": 1.0021781401953836e-05,
      "loss": 0.8393,
      "step": 701570
    },
    {
      "epoch": 2.458862988781293,
      "grad_norm": 2.421875,
      "learning_rate": 1.0021132373290134e-05,
      "loss": 0.8084,
      "step": 701580
    },
    {
      "epoch": 2.4588980362881885,
      "grad_norm": 3.078125,
      "learning_rate": 1.0020483344626432e-05,
      "loss": 0.7954,
      "step": 701590
    },
    {
      "epoch": 2.4589330837950842,
      "grad_norm": 2.90625,
      "learning_rate": 1.0019834315962732e-05,
      "loss": 0.8475,
      "step": 701600
    },
    {
      "epoch": 2.45896813130198,
      "grad_norm": 2.890625,
      "learning_rate": 1.001918528729903e-05,
      "loss": 0.8542,
      "step": 701610
    },
    {
      "epoch": 2.4590031788088753,
      "grad_norm": 3.4375,
      "learning_rate": 1.0018536258635326e-05,
      "loss": 0.7968,
      "step": 701620
    },
    {
      "epoch": 2.459038226315771,
      "grad_norm": 3.015625,
      "learning_rate": 1.0017887229971624e-05,
      "loss": 0.8042,
      "step": 701630
    },
    {
      "epoch": 2.459073273822667,
      "grad_norm": 2.78125,
      "learning_rate": 1.0017238201307922e-05,
      "loss": 0.7854,
      "step": 701640
    },
    {
      "epoch": 2.459108321329562,
      "grad_norm": 3.1875,
      "learning_rate": 1.001658917264422e-05,
      "loss": 0.7487,
      "step": 701650
    },
    {
      "epoch": 2.459143368836458,
      "grad_norm": 3.109375,
      "learning_rate": 1.001594014398052e-05,
      "loss": 0.7853,
      "step": 701660
    },
    {
      "epoch": 2.459178416343353,
      "grad_norm": 2.953125,
      "learning_rate": 1.0015291115316818e-05,
      "loss": 0.8309,
      "step": 701670
    },
    {
      "epoch": 2.459213463850249,
      "grad_norm": 3.328125,
      "learning_rate": 1.0014642086653116e-05,
      "loss": 0.8636,
      "step": 701680
    },
    {
      "epoch": 2.4592485113571447,
      "grad_norm": 2.984375,
      "learning_rate": 1.0013993057989414e-05,
      "loss": 0.7851,
      "step": 701690
    },
    {
      "epoch": 2.45928355886404,
      "grad_norm": 2.921875,
      "learning_rate": 1.0013344029325712e-05,
      "loss": 0.8369,
      "step": 701700
    },
    {
      "epoch": 2.459318606370936,
      "grad_norm": 2.953125,
      "learning_rate": 1.0012695000662008e-05,
      "loss": 0.7807,
      "step": 701710
    },
    {
      "epoch": 2.4593536538778316,
      "grad_norm": 2.921875,
      "learning_rate": 1.0012045971998308e-05,
      "loss": 0.7478,
      "step": 701720
    },
    {
      "epoch": 2.459388701384727,
      "grad_norm": 2.859375,
      "learning_rate": 1.0011396943334606e-05,
      "loss": 0.7967,
      "step": 701730
    },
    {
      "epoch": 2.4594237488916226,
      "grad_norm": 2.9375,
      "learning_rate": 1.0010747914670904e-05,
      "loss": 0.792,
      "step": 701740
    },
    {
      "epoch": 2.4594587963985184,
      "grad_norm": 2.765625,
      "learning_rate": 1.0010098886007202e-05,
      "loss": 0.8024,
      "step": 701750
    },
    {
      "epoch": 2.4594938439054137,
      "grad_norm": 2.75,
      "learning_rate": 1.00094498573435e-05,
      "loss": 0.759,
      "step": 701760
    },
    {
      "epoch": 2.4595288914123095,
      "grad_norm": 3.046875,
      "learning_rate": 1.0008800828679798e-05,
      "loss": 0.8289,
      "step": 701770
    },
    {
      "epoch": 2.4595639389192048,
      "grad_norm": 2.90625,
      "learning_rate": 1.0008151800016096e-05,
      "loss": 0.8033,
      "step": 701780
    },
    {
      "epoch": 2.4595989864261005,
      "grad_norm": 2.96875,
      "learning_rate": 1.0007502771352396e-05,
      "loss": 0.8231,
      "step": 701790
    },
    {
      "epoch": 2.4596340339329963,
      "grad_norm": 3.0,
      "learning_rate": 1.0006853742688694e-05,
      "loss": 0.7552,
      "step": 701800
    },
    {
      "epoch": 2.4596690814398916,
      "grad_norm": 4.125,
      "learning_rate": 1.000620471402499e-05,
      "loss": 0.766,
      "step": 701810
    },
    {
      "epoch": 2.4597041289467874,
      "grad_norm": 3.203125,
      "learning_rate": 1.0005555685361288e-05,
      "loss": 0.8832,
      "step": 701820
    },
    {
      "epoch": 2.459739176453683,
      "grad_norm": 3.140625,
      "learning_rate": 1.0004906656697586e-05,
      "loss": 0.8579,
      "step": 701830
    },
    {
      "epoch": 2.4597742239605784,
      "grad_norm": 3.546875,
      "learning_rate": 1.0004257628033886e-05,
      "loss": 0.8245,
      "step": 701840
    },
    {
      "epoch": 2.459809271467474,
      "grad_norm": 2.53125,
      "learning_rate": 1.0003608599370184e-05,
      "loss": 0.7393,
      "step": 701850
    },
    {
      "epoch": 2.45984431897437,
      "grad_norm": 3.015625,
      "learning_rate": 1.0002959570706482e-05,
      "loss": 0.7948,
      "step": 701860
    },
    {
      "epoch": 2.4598793664812653,
      "grad_norm": 2.875,
      "learning_rate": 1.000231054204278e-05,
      "loss": 0.7097,
      "step": 701870
    },
    {
      "epoch": 2.459914413988161,
      "grad_norm": 2.703125,
      "learning_rate": 1.0001661513379078e-05,
      "loss": 0.8006,
      "step": 701880
    },
    {
      "epoch": 2.4599494614950563,
      "grad_norm": 3.09375,
      "learning_rate": 1.0001012484715376e-05,
      "loss": 0.8084,
      "step": 701890
    },
    {
      "epoch": 2.459984509001952,
      "grad_norm": 3.015625,
      "learning_rate": 1.0000363456051674e-05,
      "loss": 0.8163,
      "step": 701900
    },
    {
      "epoch": 2.460019556508848,
      "grad_norm": 3.40625,
      "learning_rate": 9.999714427387972e-06,
      "loss": 0.8141,
      "step": 701910
    },
    {
      "epoch": 2.460054604015743,
      "grad_norm": 2.609375,
      "learning_rate": 9.99906539872427e-06,
      "loss": 0.7709,
      "step": 701920
    },
    {
      "epoch": 2.460089651522639,
      "grad_norm": 2.375,
      "learning_rate": 9.998416370060568e-06,
      "loss": 0.7051,
      "step": 701930
    },
    {
      "epoch": 2.4601246990295347,
      "grad_norm": 2.609375,
      "learning_rate": 9.997767341396866e-06,
      "loss": 0.7693,
      "step": 701940
    },
    {
      "epoch": 2.46015974653643,
      "grad_norm": 2.75,
      "learning_rate": 9.997118312733164e-06,
      "loss": 0.8011,
      "step": 701950
    },
    {
      "epoch": 2.4601947940433258,
      "grad_norm": 2.859375,
      "learning_rate": 9.996469284069462e-06,
      "loss": 0.7556,
      "step": 701960
    },
    {
      "epoch": 2.4602298415502215,
      "grad_norm": 3.1875,
      "learning_rate": 9.995820255405761e-06,
      "loss": 0.8742,
      "step": 701970
    },
    {
      "epoch": 2.460264889057117,
      "grad_norm": 2.453125,
      "learning_rate": 9.99517122674206e-06,
      "loss": 0.7849,
      "step": 701980
    },
    {
      "epoch": 2.4602999365640126,
      "grad_norm": 2.625,
      "learning_rate": 9.994522198078357e-06,
      "loss": 0.8454,
      "step": 701990
    },
    {
      "epoch": 2.460334984070908,
      "grad_norm": 3.125,
      "learning_rate": 9.993873169414654e-06,
      "loss": 0.84,
      "step": 702000
    },
    {
      "epoch": 2.4603700315778037,
      "grad_norm": 2.828125,
      "learning_rate": 9.993224140750952e-06,
      "loss": 0.769,
      "step": 702010
    },
    {
      "epoch": 2.4604050790846994,
      "grad_norm": 2.890625,
      "learning_rate": 9.99257511208725e-06,
      "loss": 0.7431,
      "step": 702020
    },
    {
      "epoch": 2.4604401265915947,
      "grad_norm": 2.625,
      "learning_rate": 9.99192608342355e-06,
      "loss": 0.7726,
      "step": 702030
    },
    {
      "epoch": 2.4604751740984905,
      "grad_norm": 2.25,
      "learning_rate": 9.991277054759847e-06,
      "loss": 0.7111,
      "step": 702040
    },
    {
      "epoch": 2.4605102216053862,
      "grad_norm": 2.96875,
      "learning_rate": 9.990628026096145e-06,
      "loss": 0.7515,
      "step": 702050
    },
    {
      "epoch": 2.4605452691122816,
      "grad_norm": 3.078125,
      "learning_rate": 9.989978997432443e-06,
      "loss": 0.7747,
      "step": 702060
    },
    {
      "epoch": 2.4605803166191773,
      "grad_norm": 2.625,
      "learning_rate": 9.989329968768741e-06,
      "loss": 0.7664,
      "step": 702070
    },
    {
      "epoch": 2.460615364126073,
      "grad_norm": 2.765625,
      "learning_rate": 9.98868094010504e-06,
      "loss": 0.8689,
      "step": 702080
    },
    {
      "epoch": 2.4606504116329684,
      "grad_norm": 3.265625,
      "learning_rate": 9.988031911441337e-06,
      "loss": 0.7548,
      "step": 702090
    },
    {
      "epoch": 2.460685459139864,
      "grad_norm": 2.875,
      "learning_rate": 9.987382882777635e-06,
      "loss": 0.8386,
      "step": 702100
    },
    {
      "epoch": 2.4607205066467595,
      "grad_norm": 3.0,
      "learning_rate": 9.986733854113933e-06,
      "loss": 0.8665,
      "step": 702110
    },
    {
      "epoch": 2.460755554153655,
      "grad_norm": 2.96875,
      "learning_rate": 9.986084825450231e-06,
      "loss": 0.7751,
      "step": 702120
    },
    {
      "epoch": 2.460790601660551,
      "grad_norm": 3.6875,
      "learning_rate": 9.98543579678653e-06,
      "loss": 0.8759,
      "step": 702130
    },
    {
      "epoch": 2.4608256491674463,
      "grad_norm": 3.453125,
      "learning_rate": 9.984786768122827e-06,
      "loss": 0.802,
      "step": 702140
    },
    {
      "epoch": 2.460860696674342,
      "grad_norm": 2.984375,
      "learning_rate": 9.984137739459127e-06,
      "loss": 0.8371,
      "step": 702150
    },
    {
      "epoch": 2.460895744181238,
      "grad_norm": 3.015625,
      "learning_rate": 9.983488710795425e-06,
      "loss": 0.7863,
      "step": 702160
    },
    {
      "epoch": 2.460930791688133,
      "grad_norm": 3.09375,
      "learning_rate": 9.982839682131723e-06,
      "loss": 0.9137,
      "step": 702170
    },
    {
      "epoch": 2.460965839195029,
      "grad_norm": 2.921875,
      "learning_rate": 9.98219065346802e-06,
      "loss": 0.9021,
      "step": 702180
    },
    {
      "epoch": 2.4610008867019246,
      "grad_norm": 3.359375,
      "learning_rate": 9.981541624804317e-06,
      "loss": 0.842,
      "step": 702190
    },
    {
      "epoch": 2.46103593420882,
      "grad_norm": 3.453125,
      "learning_rate": 9.980892596140615e-06,
      "loss": 0.8761,
      "step": 702200
    },
    {
      "epoch": 2.4610709817157157,
      "grad_norm": 3.015625,
      "learning_rate": 9.980243567476915e-06,
      "loss": 0.7407,
      "step": 702210
    },
    {
      "epoch": 2.461106029222611,
      "grad_norm": 2.734375,
      "learning_rate": 9.979594538813213e-06,
      "loss": 0.8405,
      "step": 702220
    },
    {
      "epoch": 2.4611410767295068,
      "grad_norm": 3.0,
      "learning_rate": 9.978945510149511e-06,
      "loss": 0.763,
      "step": 702230
    },
    {
      "epoch": 2.4611761242364025,
      "grad_norm": 3.078125,
      "learning_rate": 9.978296481485809e-06,
      "loss": 0.8136,
      "step": 702240
    },
    {
      "epoch": 2.4612111717432983,
      "grad_norm": 3.125,
      "learning_rate": 9.977647452822107e-06,
      "loss": 0.8027,
      "step": 702250
    },
    {
      "epoch": 2.4612462192501936,
      "grad_norm": 3.015625,
      "learning_rate": 9.976998424158405e-06,
      "loss": 0.8703,
      "step": 702260
    },
    {
      "epoch": 2.4612812667570894,
      "grad_norm": 3.0,
      "learning_rate": 9.976349395494703e-06,
      "loss": 0.8321,
      "step": 702270
    },
    {
      "epoch": 2.4613163142639847,
      "grad_norm": 2.9375,
      "learning_rate": 9.975700366831001e-06,
      "loss": 0.8279,
      "step": 702280
    },
    {
      "epoch": 2.4613513617708804,
      "grad_norm": 3.109375,
      "learning_rate": 9.975051338167299e-06,
      "loss": 0.7601,
      "step": 702290
    },
    {
      "epoch": 2.461386409277776,
      "grad_norm": 3.15625,
      "learning_rate": 9.974402309503597e-06,
      "loss": 0.821,
      "step": 702300
    },
    {
      "epoch": 2.4614214567846715,
      "grad_norm": 3.125,
      "learning_rate": 9.973753280839895e-06,
      "loss": 0.7869,
      "step": 702310
    },
    {
      "epoch": 2.4614565042915673,
      "grad_norm": 2.828125,
      "learning_rate": 9.973104252176193e-06,
      "loss": 0.8402,
      "step": 702320
    },
    {
      "epoch": 2.4614915517984626,
      "grad_norm": 2.890625,
      "learning_rate": 9.972455223512491e-06,
      "loss": 0.8256,
      "step": 702330
    },
    {
      "epoch": 2.4615265993053583,
      "grad_norm": 2.8125,
      "learning_rate": 9.97180619484879e-06,
      "loss": 0.7853,
      "step": 702340
    },
    {
      "epoch": 2.461561646812254,
      "grad_norm": 2.75,
      "learning_rate": 9.971157166185089e-06,
      "loss": 0.8507,
      "step": 702350
    },
    {
      "epoch": 2.46159669431915,
      "grad_norm": 3.25,
      "learning_rate": 9.970508137521387e-06,
      "loss": 0.7932,
      "step": 702360
    },
    {
      "epoch": 2.461631741826045,
      "grad_norm": 2.9375,
      "learning_rate": 9.969859108857683e-06,
      "loss": 0.8769,
      "step": 702370
    },
    {
      "epoch": 2.461666789332941,
      "grad_norm": 3.484375,
      "learning_rate": 9.969210080193981e-06,
      "loss": 0.8934,
      "step": 702380
    },
    {
      "epoch": 2.4617018368398362,
      "grad_norm": 2.3125,
      "learning_rate": 9.96856105153028e-06,
      "loss": 0.7397,
      "step": 702390
    },
    {
      "epoch": 2.461736884346732,
      "grad_norm": 3.34375,
      "learning_rate": 9.967912022866579e-06,
      "loss": 0.7549,
      "step": 702400
    },
    {
      "epoch": 2.4617719318536277,
      "grad_norm": 3.234375,
      "learning_rate": 9.967262994202877e-06,
      "loss": 0.8006,
      "step": 702410
    },
    {
      "epoch": 2.461806979360523,
      "grad_norm": 2.953125,
      "learning_rate": 9.966613965539175e-06,
      "loss": 0.8414,
      "step": 702420
    },
    {
      "epoch": 2.461842026867419,
      "grad_norm": 2.640625,
      "learning_rate": 9.965964936875473e-06,
      "loss": 0.8587,
      "step": 702430
    },
    {
      "epoch": 2.461877074374314,
      "grad_norm": 2.75,
      "learning_rate": 9.96531590821177e-06,
      "loss": 0.7771,
      "step": 702440
    },
    {
      "epoch": 2.46191212188121,
      "grad_norm": 2.6875,
      "learning_rate": 9.964666879548069e-06,
      "loss": 0.7513,
      "step": 702450
    },
    {
      "epoch": 2.4619471693881056,
      "grad_norm": 2.9375,
      "learning_rate": 9.964017850884368e-06,
      "loss": 0.7775,
      "step": 702460
    },
    {
      "epoch": 2.4619822168950014,
      "grad_norm": 2.8125,
      "learning_rate": 9.963368822220665e-06,
      "loss": 0.7793,
      "step": 702470
    },
    {
      "epoch": 2.4620172644018967,
      "grad_norm": 2.84375,
      "learning_rate": 9.962719793556963e-06,
      "loss": 0.7333,
      "step": 702480
    },
    {
      "epoch": 2.4620523119087925,
      "grad_norm": 3.125,
      "learning_rate": 9.96207076489326e-06,
      "loss": 0.8682,
      "step": 702490
    },
    {
      "epoch": 2.462087359415688,
      "grad_norm": 3.125,
      "learning_rate": 9.961421736229559e-06,
      "loss": 0.8822,
      "step": 702500
    },
    {
      "epoch": 2.4621224069225836,
      "grad_norm": 2.640625,
      "learning_rate": 9.960772707565857e-06,
      "loss": 0.8012,
      "step": 702510
    },
    {
      "epoch": 2.4621574544294793,
      "grad_norm": 2.71875,
      "learning_rate": 9.960123678902156e-06,
      "loss": 0.7264,
      "step": 702520
    },
    {
      "epoch": 2.4621925019363746,
      "grad_norm": 2.90625,
      "learning_rate": 9.959474650238454e-06,
      "loss": 0.8152,
      "step": 702530
    },
    {
      "epoch": 2.4622275494432704,
      "grad_norm": 2.59375,
      "learning_rate": 9.958825621574752e-06,
      "loss": 0.8068,
      "step": 702540
    },
    {
      "epoch": 2.462262596950166,
      "grad_norm": 3.140625,
      "learning_rate": 9.95817659291105e-06,
      "loss": 0.772,
      "step": 702550
    },
    {
      "epoch": 2.4622976444570615,
      "grad_norm": 2.9375,
      "learning_rate": 9.957527564247347e-06,
      "loss": 0.7817,
      "step": 702560
    },
    {
      "epoch": 2.462332691963957,
      "grad_norm": 2.96875,
      "learning_rate": 9.956878535583645e-06,
      "loss": 0.7691,
      "step": 702570
    },
    {
      "epoch": 2.462367739470853,
      "grad_norm": 2.90625,
      "learning_rate": 9.956229506919944e-06,
      "loss": 0.8089,
      "step": 702580
    },
    {
      "epoch": 2.4624027869777483,
      "grad_norm": 3.09375,
      "learning_rate": 9.955580478256242e-06,
      "loss": 0.872,
      "step": 702590
    },
    {
      "epoch": 2.462437834484644,
      "grad_norm": 2.953125,
      "learning_rate": 9.95493144959254e-06,
      "loss": 0.8231,
      "step": 702600
    },
    {
      "epoch": 2.4624728819915394,
      "grad_norm": 2.296875,
      "learning_rate": 9.954282420928838e-06,
      "loss": 0.7383,
      "step": 702610
    },
    {
      "epoch": 2.462507929498435,
      "grad_norm": 2.53125,
      "learning_rate": 9.953633392265136e-06,
      "loss": 0.7418,
      "step": 702620
    },
    {
      "epoch": 2.462542977005331,
      "grad_norm": 3.046875,
      "learning_rate": 9.952984363601434e-06,
      "loss": 0.794,
      "step": 702630
    },
    {
      "epoch": 2.462578024512226,
      "grad_norm": 2.484375,
      "learning_rate": 9.952335334937732e-06,
      "loss": 0.7846,
      "step": 702640
    },
    {
      "epoch": 2.462613072019122,
      "grad_norm": 3.0625,
      "learning_rate": 9.95168630627403e-06,
      "loss": 0.825,
      "step": 702650
    },
    {
      "epoch": 2.4626481195260177,
      "grad_norm": 2.78125,
      "learning_rate": 9.951037277610328e-06,
      "loss": 0.8158,
      "step": 702660
    },
    {
      "epoch": 2.462683167032913,
      "grad_norm": 2.859375,
      "learning_rate": 9.950388248946626e-06,
      "loss": 0.7809,
      "step": 702670
    },
    {
      "epoch": 2.4627182145398088,
      "grad_norm": 3.15625,
      "learning_rate": 9.949739220282924e-06,
      "loss": 0.7895,
      "step": 702680
    },
    {
      "epoch": 2.4627532620467045,
      "grad_norm": 2.796875,
      "learning_rate": 9.949090191619222e-06,
      "loss": 0.8817,
      "step": 702690
    },
    {
      "epoch": 2.4627883095536,
      "grad_norm": 3.328125,
      "learning_rate": 9.948441162955522e-06,
      "loss": 0.8157,
      "step": 702700
    },
    {
      "epoch": 2.4628233570604956,
      "grad_norm": 3.09375,
      "learning_rate": 9.94779213429182e-06,
      "loss": 0.857,
      "step": 702710
    },
    {
      "epoch": 2.462858404567391,
      "grad_norm": 2.828125,
      "learning_rate": 9.947143105628118e-06,
      "loss": 0.8629,
      "step": 702720
    },
    {
      "epoch": 2.4628934520742867,
      "grad_norm": 2.90625,
      "learning_rate": 9.946494076964416e-06,
      "loss": 0.8601,
      "step": 702730
    },
    {
      "epoch": 2.4629284995811824,
      "grad_norm": 3.046875,
      "learning_rate": 9.945845048300714e-06,
      "loss": 0.8512,
      "step": 702740
    },
    {
      "epoch": 2.4629635470880777,
      "grad_norm": 2.875,
      "learning_rate": 9.94519601963701e-06,
      "loss": 0.7492,
      "step": 702750
    },
    {
      "epoch": 2.4629985945949735,
      "grad_norm": 3.140625,
      "learning_rate": 9.94454699097331e-06,
      "loss": 0.8426,
      "step": 702760
    },
    {
      "epoch": 2.4630336421018693,
      "grad_norm": 3.046875,
      "learning_rate": 9.943897962309608e-06,
      "loss": 0.8593,
      "step": 702770
    },
    {
      "epoch": 2.4630686896087646,
      "grad_norm": 2.953125,
      "learning_rate": 9.943248933645906e-06,
      "loss": 0.8156,
      "step": 702780
    },
    {
      "epoch": 2.4631037371156603,
      "grad_norm": 3.359375,
      "learning_rate": 9.942599904982204e-06,
      "loss": 0.8225,
      "step": 702790
    },
    {
      "epoch": 2.463138784622556,
      "grad_norm": 3.375,
      "learning_rate": 9.941950876318502e-06,
      "loss": 0.8524,
      "step": 702800
    },
    {
      "epoch": 2.4631738321294514,
      "grad_norm": 3.078125,
      "learning_rate": 9.9413018476548e-06,
      "loss": 0.8215,
      "step": 702810
    },
    {
      "epoch": 2.463208879636347,
      "grad_norm": 2.984375,
      "learning_rate": 9.940652818991098e-06,
      "loss": 0.8605,
      "step": 702820
    },
    {
      "epoch": 2.4632439271432425,
      "grad_norm": 3.34375,
      "learning_rate": 9.940003790327398e-06,
      "loss": 0.8126,
      "step": 702830
    },
    {
      "epoch": 2.4632789746501382,
      "grad_norm": 3.015625,
      "learning_rate": 9.939354761663694e-06,
      "loss": 0.8125,
      "step": 702840
    },
    {
      "epoch": 2.463314022157034,
      "grad_norm": 2.953125,
      "learning_rate": 9.938705732999992e-06,
      "loss": 0.7826,
      "step": 702850
    },
    {
      "epoch": 2.4633490696639293,
      "grad_norm": 2.890625,
      "learning_rate": 9.93805670433629e-06,
      "loss": 0.7698,
      "step": 702860
    },
    {
      "epoch": 2.463384117170825,
      "grad_norm": 2.9375,
      "learning_rate": 9.937407675672588e-06,
      "loss": 0.787,
      "step": 702870
    },
    {
      "epoch": 2.463419164677721,
      "grad_norm": 2.546875,
      "learning_rate": 9.936758647008886e-06,
      "loss": 0.7619,
      "step": 702880
    },
    {
      "epoch": 2.463454212184616,
      "grad_norm": 3.078125,
      "learning_rate": 9.936109618345186e-06,
      "loss": 0.7386,
      "step": 702890
    },
    {
      "epoch": 2.463489259691512,
      "grad_norm": 3.0625,
      "learning_rate": 9.935460589681484e-06,
      "loss": 0.7893,
      "step": 702900
    },
    {
      "epoch": 2.4635243071984076,
      "grad_norm": 3.15625,
      "learning_rate": 9.934811561017782e-06,
      "loss": 0.7923,
      "step": 702910
    },
    {
      "epoch": 2.463559354705303,
      "grad_norm": 2.546875,
      "learning_rate": 9.93416253235408e-06,
      "loss": 0.7928,
      "step": 702920
    },
    {
      "epoch": 2.4635944022121987,
      "grad_norm": 3.171875,
      "learning_rate": 9.933513503690378e-06,
      "loss": 0.8223,
      "step": 702930
    },
    {
      "epoch": 2.463629449719094,
      "grad_norm": 2.8125,
      "learning_rate": 9.932864475026676e-06,
      "loss": 0.8786,
      "step": 702940
    },
    {
      "epoch": 2.46366449722599,
      "grad_norm": 2.6875,
      "learning_rate": 9.932215446362974e-06,
      "loss": 0.8479,
      "step": 702950
    },
    {
      "epoch": 2.4636995447328855,
      "grad_norm": 3.78125,
      "learning_rate": 9.931566417699272e-06,
      "loss": 0.7902,
      "step": 702960
    },
    {
      "epoch": 2.463734592239781,
      "grad_norm": 3.5,
      "learning_rate": 9.93091738903557e-06,
      "loss": 0.769,
      "step": 702970
    },
    {
      "epoch": 2.4637696397466766,
      "grad_norm": 3.109375,
      "learning_rate": 9.930268360371868e-06,
      "loss": 0.8457,
      "step": 702980
    },
    {
      "epoch": 2.4638046872535724,
      "grad_norm": 2.5625,
      "learning_rate": 9.929619331708166e-06,
      "loss": 0.7642,
      "step": 702990
    },
    {
      "epoch": 2.4638397347604677,
      "grad_norm": 3.03125,
      "learning_rate": 9.928970303044464e-06,
      "loss": 0.7733,
      "step": 703000
    },
    {
      "epoch": 2.4638747822673635,
      "grad_norm": 3.171875,
      "learning_rate": 9.928321274380764e-06,
      "loss": 0.8503,
      "step": 703010
    },
    {
      "epoch": 2.463909829774259,
      "grad_norm": 2.46875,
      "learning_rate": 9.927672245717062e-06,
      "loss": 0.7729,
      "step": 703020
    },
    {
      "epoch": 2.4639448772811545,
      "grad_norm": 2.828125,
      "learning_rate": 9.927023217053358e-06,
      "loss": 0.7811,
      "step": 703030
    },
    {
      "epoch": 2.4639799247880503,
      "grad_norm": 2.96875,
      "learning_rate": 9.926374188389656e-06,
      "loss": 0.7784,
      "step": 703040
    },
    {
      "epoch": 2.4640149722949456,
      "grad_norm": 3.078125,
      "learning_rate": 9.925725159725954e-06,
      "loss": 0.9149,
      "step": 703050
    },
    {
      "epoch": 2.4640500198018414,
      "grad_norm": 2.59375,
      "learning_rate": 9.925076131062252e-06,
      "loss": 0.8262,
      "step": 703060
    },
    {
      "epoch": 2.464085067308737,
      "grad_norm": 2.375,
      "learning_rate": 9.924427102398552e-06,
      "loss": 0.8572,
      "step": 703070
    },
    {
      "epoch": 2.4641201148156324,
      "grad_norm": 3.03125,
      "learning_rate": 9.92377807373485e-06,
      "loss": 0.8589,
      "step": 703080
    },
    {
      "epoch": 2.464155162322528,
      "grad_norm": 3.265625,
      "learning_rate": 9.923129045071148e-06,
      "loss": 0.7537,
      "step": 703090
    },
    {
      "epoch": 2.464190209829424,
      "grad_norm": 2.75,
      "learning_rate": 9.922480016407446e-06,
      "loss": 0.8141,
      "step": 703100
    },
    {
      "epoch": 2.4642252573363193,
      "grad_norm": 2.8125,
      "learning_rate": 9.921830987743744e-06,
      "loss": 0.8595,
      "step": 703110
    },
    {
      "epoch": 2.464260304843215,
      "grad_norm": 2.609375,
      "learning_rate": 9.92118195908004e-06,
      "loss": 0.7827,
      "step": 703120
    },
    {
      "epoch": 2.4642953523501108,
      "grad_norm": 3.421875,
      "learning_rate": 9.92053293041634e-06,
      "loss": 0.7732,
      "step": 703130
    },
    {
      "epoch": 2.464330399857006,
      "grad_norm": 2.703125,
      "learning_rate": 9.919883901752638e-06,
      "loss": 0.7998,
      "step": 703140
    },
    {
      "epoch": 2.464365447363902,
      "grad_norm": 3.078125,
      "learning_rate": 9.919234873088936e-06,
      "loss": 0.8551,
      "step": 703150
    },
    {
      "epoch": 2.464400494870797,
      "grad_norm": 3.125,
      "learning_rate": 9.918585844425234e-06,
      "loss": 0.8414,
      "step": 703160
    },
    {
      "epoch": 2.464435542377693,
      "grad_norm": 2.5,
      "learning_rate": 9.917936815761532e-06,
      "loss": 0.7234,
      "step": 703170
    },
    {
      "epoch": 2.4644705898845887,
      "grad_norm": 3.109375,
      "learning_rate": 9.91728778709783e-06,
      "loss": 0.8242,
      "step": 703180
    },
    {
      "epoch": 2.464505637391484,
      "grad_norm": 3.265625,
      "learning_rate": 9.916638758434128e-06,
      "loss": 0.7719,
      "step": 703190
    },
    {
      "epoch": 2.4645406848983797,
      "grad_norm": 2.765625,
      "learning_rate": 9.915989729770427e-06,
      "loss": 0.743,
      "step": 703200
    },
    {
      "epoch": 2.4645757324052755,
      "grad_norm": 2.53125,
      "learning_rate": 9.915340701106725e-06,
      "loss": 0.8317,
      "step": 703210
    },
    {
      "epoch": 2.464610779912171,
      "grad_norm": 3.625,
      "learning_rate": 9.914691672443022e-06,
      "loss": 0.8646,
      "step": 703220
    },
    {
      "epoch": 2.4646458274190666,
      "grad_norm": 2.96875,
      "learning_rate": 9.91404264377932e-06,
      "loss": 0.8611,
      "step": 703230
    },
    {
      "epoch": 2.4646808749259623,
      "grad_norm": 3.453125,
      "learning_rate": 9.913393615115618e-06,
      "loss": 0.8611,
      "step": 703240
    },
    {
      "epoch": 2.4647159224328576,
      "grad_norm": 2.921875,
      "learning_rate": 9.912744586451917e-06,
      "loss": 0.8137,
      "step": 703250
    },
    {
      "epoch": 2.4647509699397534,
      "grad_norm": 2.484375,
      "learning_rate": 9.912095557788215e-06,
      "loss": 0.8385,
      "step": 703260
    },
    {
      "epoch": 2.4647860174466487,
      "grad_norm": 2.8125,
      "learning_rate": 9.911446529124513e-06,
      "loss": 0.7793,
      "step": 703270
    },
    {
      "epoch": 2.4648210649535445,
      "grad_norm": 3.078125,
      "learning_rate": 9.910797500460811e-06,
      "loss": 0.7785,
      "step": 703280
    },
    {
      "epoch": 2.4648561124604402,
      "grad_norm": 2.78125,
      "learning_rate": 9.91014847179711e-06,
      "loss": 0.8647,
      "step": 703290
    },
    {
      "epoch": 2.4648911599673355,
      "grad_norm": 2.859375,
      "learning_rate": 9.909499443133407e-06,
      "loss": 0.771,
      "step": 703300
    },
    {
      "epoch": 2.4649262074742313,
      "grad_norm": 2.890625,
      "learning_rate": 9.908850414469705e-06,
      "loss": 0.721,
      "step": 703310
    },
    {
      "epoch": 2.464961254981127,
      "grad_norm": 2.609375,
      "learning_rate": 9.908201385806003e-06,
      "loss": 0.7697,
      "step": 703320
    },
    {
      "epoch": 2.4649963024880224,
      "grad_norm": 3.03125,
      "learning_rate": 9.907552357142301e-06,
      "loss": 0.8405,
      "step": 703330
    },
    {
      "epoch": 2.465031349994918,
      "grad_norm": 2.9375,
      "learning_rate": 9.9069033284786e-06,
      "loss": 0.7933,
      "step": 703340
    },
    {
      "epoch": 2.465066397501814,
      "grad_norm": 3.234375,
      "learning_rate": 9.906254299814897e-06,
      "loss": 0.8105,
      "step": 703350
    },
    {
      "epoch": 2.465101445008709,
      "grad_norm": 2.96875,
      "learning_rate": 9.905605271151195e-06,
      "loss": 0.8908,
      "step": 703360
    },
    {
      "epoch": 2.465136492515605,
      "grad_norm": 2.78125,
      "learning_rate": 9.904956242487493e-06,
      "loss": 0.7821,
      "step": 703370
    },
    {
      "epoch": 2.4651715400225003,
      "grad_norm": 3.0,
      "learning_rate": 9.904307213823793e-06,
      "loss": 0.8783,
      "step": 703380
    },
    {
      "epoch": 2.465206587529396,
      "grad_norm": 2.984375,
      "learning_rate": 9.903658185160091e-06,
      "loss": 0.7789,
      "step": 703390
    },
    {
      "epoch": 2.465241635036292,
      "grad_norm": 2.4375,
      "learning_rate": 9.903009156496389e-06,
      "loss": 0.7459,
      "step": 703400
    },
    {
      "epoch": 2.465276682543187,
      "grad_norm": 2.6875,
      "learning_rate": 9.902360127832685e-06,
      "loss": 0.7807,
      "step": 703410
    },
    {
      "epoch": 2.465311730050083,
      "grad_norm": 2.71875,
      "learning_rate": 9.901711099168983e-06,
      "loss": 0.8337,
      "step": 703420
    },
    {
      "epoch": 2.4653467775569786,
      "grad_norm": 2.765625,
      "learning_rate": 9.901062070505281e-06,
      "loss": 0.7651,
      "step": 703430
    },
    {
      "epoch": 2.465381825063874,
      "grad_norm": 2.953125,
      "learning_rate": 9.900413041841581e-06,
      "loss": 0.8806,
      "step": 703440
    },
    {
      "epoch": 2.4654168725707697,
      "grad_norm": 2.90625,
      "learning_rate": 9.899764013177879e-06,
      "loss": 0.8129,
      "step": 703450
    },
    {
      "epoch": 2.4654519200776654,
      "grad_norm": 2.78125,
      "learning_rate": 9.899114984514177e-06,
      "loss": 0.8041,
      "step": 703460
    },
    {
      "epoch": 2.4654869675845608,
      "grad_norm": 2.78125,
      "learning_rate": 9.898465955850475e-06,
      "loss": 0.7521,
      "step": 703470
    },
    {
      "epoch": 2.4655220150914565,
      "grad_norm": 2.875,
      "learning_rate": 9.897816927186773e-06,
      "loss": 0.837,
      "step": 703480
    },
    {
      "epoch": 2.465557062598352,
      "grad_norm": 3.0625,
      "learning_rate": 9.897167898523071e-06,
      "loss": 0.8786,
      "step": 703490
    },
    {
      "epoch": 2.4655921101052476,
      "grad_norm": 2.84375,
      "learning_rate": 9.896518869859369e-06,
      "loss": 0.8349,
      "step": 703500
    },
    {
      "epoch": 2.4656271576121433,
      "grad_norm": 2.65625,
      "learning_rate": 9.895869841195667e-06,
      "loss": 0.7076,
      "step": 703510
    },
    {
      "epoch": 2.4656622051190387,
      "grad_norm": 2.84375,
      "learning_rate": 9.895220812531965e-06,
      "loss": 0.8488,
      "step": 703520
    },
    {
      "epoch": 2.4656972526259344,
      "grad_norm": 2.796875,
      "learning_rate": 9.894571783868263e-06,
      "loss": 0.8184,
      "step": 703530
    },
    {
      "epoch": 2.46573230013283,
      "grad_norm": 2.8125,
      "learning_rate": 9.893922755204561e-06,
      "loss": 0.7916,
      "step": 703540
    },
    {
      "epoch": 2.4657673476397255,
      "grad_norm": 3.265625,
      "learning_rate": 9.893273726540859e-06,
      "loss": 0.7855,
      "step": 703550
    },
    {
      "epoch": 2.4658023951466213,
      "grad_norm": 2.3125,
      "learning_rate": 9.892624697877159e-06,
      "loss": 0.779,
      "step": 703560
    },
    {
      "epoch": 2.465837442653517,
      "grad_norm": 3.0625,
      "learning_rate": 9.891975669213457e-06,
      "loss": 0.833,
      "step": 703570
    },
    {
      "epoch": 2.4658724901604123,
      "grad_norm": 3.09375,
      "learning_rate": 9.891326640549755e-06,
      "loss": 0.7697,
      "step": 703580
    },
    {
      "epoch": 2.465907537667308,
      "grad_norm": 3.234375,
      "learning_rate": 9.890677611886051e-06,
      "loss": 0.7871,
      "step": 703590
    },
    {
      "epoch": 2.4659425851742034,
      "grad_norm": 3.203125,
      "learning_rate": 9.890028583222349e-06,
      "loss": 0.8674,
      "step": 703600
    },
    {
      "epoch": 2.465977632681099,
      "grad_norm": 2.734375,
      "learning_rate": 9.889379554558647e-06,
      "loss": 0.7945,
      "step": 703610
    },
    {
      "epoch": 2.466012680187995,
      "grad_norm": 3.046875,
      "learning_rate": 9.888730525894947e-06,
      "loss": 0.7804,
      "step": 703620
    },
    {
      "epoch": 2.4660477276948907,
      "grad_norm": 2.453125,
      "learning_rate": 9.888081497231245e-06,
      "loss": 0.8122,
      "step": 703630
    },
    {
      "epoch": 2.466082775201786,
      "grad_norm": 3.203125,
      "learning_rate": 9.887432468567543e-06,
      "loss": 0.8047,
      "step": 703640
    },
    {
      "epoch": 2.4661178227086817,
      "grad_norm": 3.03125,
      "learning_rate": 9.88678343990384e-06,
      "loss": 0.788,
      "step": 703650
    },
    {
      "epoch": 2.466152870215577,
      "grad_norm": 2.84375,
      "learning_rate": 9.886134411240139e-06,
      "loss": 0.7253,
      "step": 703660
    },
    {
      "epoch": 2.466187917722473,
      "grad_norm": 3.234375,
      "learning_rate": 9.885485382576437e-06,
      "loss": 0.8376,
      "step": 703670
    },
    {
      "epoch": 2.4662229652293686,
      "grad_norm": 2.953125,
      "learning_rate": 9.884836353912735e-06,
      "loss": 0.7418,
      "step": 703680
    },
    {
      "epoch": 2.466258012736264,
      "grad_norm": 3.1875,
      "learning_rate": 9.884187325249033e-06,
      "loss": 0.8246,
      "step": 703690
    },
    {
      "epoch": 2.4662930602431596,
      "grad_norm": 2.90625,
      "learning_rate": 9.88353829658533e-06,
      "loss": 0.822,
      "step": 703700
    },
    {
      "epoch": 2.466328107750055,
      "grad_norm": 2.328125,
      "learning_rate": 9.882889267921629e-06,
      "loss": 0.8589,
      "step": 703710
    },
    {
      "epoch": 2.4663631552569507,
      "grad_norm": 2.90625,
      "learning_rate": 9.882240239257927e-06,
      "loss": 0.7946,
      "step": 703720
    },
    {
      "epoch": 2.4663982027638465,
      "grad_norm": 3.265625,
      "learning_rate": 9.881591210594225e-06,
      "loss": 0.8116,
      "step": 703730
    },
    {
      "epoch": 2.4664332502707422,
      "grad_norm": 3.0,
      "learning_rate": 9.880942181930523e-06,
      "loss": 0.9054,
      "step": 703740
    },
    {
      "epoch": 2.4664682977776375,
      "grad_norm": 3.796875,
      "learning_rate": 9.880293153266822e-06,
      "loss": 0.7893,
      "step": 703750
    },
    {
      "epoch": 2.4665033452845333,
      "grad_norm": 3.03125,
      "learning_rate": 9.87964412460312e-06,
      "loss": 0.7929,
      "step": 703760
    },
    {
      "epoch": 2.4665383927914286,
      "grad_norm": 2.609375,
      "learning_rate": 9.878995095939418e-06,
      "loss": 0.8019,
      "step": 703770
    },
    {
      "epoch": 2.4665734402983244,
      "grad_norm": 3.203125,
      "learning_rate": 9.878346067275715e-06,
      "loss": 0.8233,
      "step": 703780
    },
    {
      "epoch": 2.46660848780522,
      "grad_norm": 3.203125,
      "learning_rate": 9.877697038612013e-06,
      "loss": 0.7634,
      "step": 703790
    },
    {
      "epoch": 2.4666435353121154,
      "grad_norm": 3.078125,
      "learning_rate": 9.877048009948312e-06,
      "loss": 0.8655,
      "step": 703800
    },
    {
      "epoch": 2.466678582819011,
      "grad_norm": 3.28125,
      "learning_rate": 9.87639898128461e-06,
      "loss": 0.7453,
      "step": 703810
    },
    {
      "epoch": 2.466713630325907,
      "grad_norm": 2.90625,
      "learning_rate": 9.875749952620908e-06,
      "loss": 0.8086,
      "step": 703820
    },
    {
      "epoch": 2.4667486778328023,
      "grad_norm": 4.5,
      "learning_rate": 9.875100923957206e-06,
      "loss": 0.8051,
      "step": 703830
    },
    {
      "epoch": 2.466783725339698,
      "grad_norm": 2.78125,
      "learning_rate": 9.874451895293504e-06,
      "loss": 0.7877,
      "step": 703840
    },
    {
      "epoch": 2.466818772846594,
      "grad_norm": 3.25,
      "learning_rate": 9.873802866629802e-06,
      "loss": 0.8679,
      "step": 703850
    },
    {
      "epoch": 2.466853820353489,
      "grad_norm": 3.0625,
      "learning_rate": 9.8731538379661e-06,
      "loss": 0.8414,
      "step": 703860
    },
    {
      "epoch": 2.466888867860385,
      "grad_norm": 2.59375,
      "learning_rate": 9.8725048093024e-06,
      "loss": 0.7966,
      "step": 703870
    },
    {
      "epoch": 2.46692391536728,
      "grad_norm": 3.03125,
      "learning_rate": 9.871855780638696e-06,
      "loss": 0.7858,
      "step": 703880
    },
    {
      "epoch": 2.466958962874176,
      "grad_norm": 3.140625,
      "learning_rate": 9.871206751974994e-06,
      "loss": 0.772,
      "step": 703890
    },
    {
      "epoch": 2.4669940103810717,
      "grad_norm": 2.859375,
      "learning_rate": 9.870557723311292e-06,
      "loss": 0.8221,
      "step": 703900
    },
    {
      "epoch": 2.467029057887967,
      "grad_norm": 2.5625,
      "learning_rate": 9.86990869464759e-06,
      "loss": 0.7416,
      "step": 703910
    },
    {
      "epoch": 2.4670641053948628,
      "grad_norm": 3.34375,
      "learning_rate": 9.869259665983888e-06,
      "loss": 0.8384,
      "step": 703920
    },
    {
      "epoch": 2.4670991529017585,
      "grad_norm": 3.078125,
      "learning_rate": 9.868610637320188e-06,
      "loss": 0.7983,
      "step": 703930
    },
    {
      "epoch": 2.467134200408654,
      "grad_norm": 2.984375,
      "learning_rate": 9.867961608656486e-06,
      "loss": 0.8048,
      "step": 703940
    },
    {
      "epoch": 2.4671692479155496,
      "grad_norm": 2.859375,
      "learning_rate": 9.867312579992784e-06,
      "loss": 0.8522,
      "step": 703950
    },
    {
      "epoch": 2.4672042954224453,
      "grad_norm": 2.953125,
      "learning_rate": 9.866663551329082e-06,
      "loss": 0.6972,
      "step": 703960
    },
    {
      "epoch": 2.4672393429293407,
      "grad_norm": 2.984375,
      "learning_rate": 9.866014522665378e-06,
      "loss": 0.7957,
      "step": 703970
    },
    {
      "epoch": 2.4672743904362364,
      "grad_norm": 2.640625,
      "learning_rate": 9.865365494001676e-06,
      "loss": 0.7994,
      "step": 703980
    },
    {
      "epoch": 2.4673094379431317,
      "grad_norm": 3.15625,
      "learning_rate": 9.864716465337976e-06,
      "loss": 0.8643,
      "step": 703990
    },
    {
      "epoch": 2.4673444854500275,
      "grad_norm": 3.03125,
      "learning_rate": 9.864067436674274e-06,
      "loss": 0.8188,
      "step": 704000
    },
    {
      "epoch": 2.4673795329569232,
      "grad_norm": 2.703125,
      "learning_rate": 9.863418408010572e-06,
      "loss": 0.7872,
      "step": 704010
    },
    {
      "epoch": 2.4674145804638186,
      "grad_norm": 3.078125,
      "learning_rate": 9.86276937934687e-06,
      "loss": 0.8655,
      "step": 704020
    },
    {
      "epoch": 2.4674496279707143,
      "grad_norm": 2.796875,
      "learning_rate": 9.862120350683168e-06,
      "loss": 0.8365,
      "step": 704030
    },
    {
      "epoch": 2.46748467547761,
      "grad_norm": 2.9375,
      "learning_rate": 9.861471322019466e-06,
      "loss": 0.8187,
      "step": 704040
    },
    {
      "epoch": 2.4675197229845054,
      "grad_norm": 3.25,
      "learning_rate": 9.860822293355764e-06,
      "loss": 0.8106,
      "step": 704050
    },
    {
      "epoch": 2.467554770491401,
      "grad_norm": 2.828125,
      "learning_rate": 9.860173264692062e-06,
      "loss": 0.7874,
      "step": 704060
    },
    {
      "epoch": 2.467589817998297,
      "grad_norm": 2.9375,
      "learning_rate": 9.85952423602836e-06,
      "loss": 0.8086,
      "step": 704070
    },
    {
      "epoch": 2.4676248655051922,
      "grad_norm": 2.65625,
      "learning_rate": 9.858875207364658e-06,
      "loss": 0.7692,
      "step": 704080
    },
    {
      "epoch": 2.467659913012088,
      "grad_norm": 3.453125,
      "learning_rate": 9.858226178700956e-06,
      "loss": 0.8224,
      "step": 704090
    },
    {
      "epoch": 2.4676949605189833,
      "grad_norm": 2.71875,
      "learning_rate": 9.857577150037254e-06,
      "loss": 0.7386,
      "step": 704100
    },
    {
      "epoch": 2.467730008025879,
      "grad_norm": 3.171875,
      "learning_rate": 9.856928121373554e-06,
      "loss": 0.7357,
      "step": 704110
    },
    {
      "epoch": 2.467765055532775,
      "grad_norm": 3.0,
      "learning_rate": 9.856279092709852e-06,
      "loss": 0.7809,
      "step": 704120
    },
    {
      "epoch": 2.46780010303967,
      "grad_norm": 2.890625,
      "learning_rate": 9.85563006404615e-06,
      "loss": 0.7662,
      "step": 704130
    },
    {
      "epoch": 2.467835150546566,
      "grad_norm": 2.734375,
      "learning_rate": 9.854981035382448e-06,
      "loss": 0.7661,
      "step": 704140
    },
    {
      "epoch": 2.4678701980534616,
      "grad_norm": 3.390625,
      "learning_rate": 9.854332006718746e-06,
      "loss": 0.7465,
      "step": 704150
    },
    {
      "epoch": 2.467905245560357,
      "grad_norm": 3.046875,
      "learning_rate": 9.853682978055042e-06,
      "loss": 0.7826,
      "step": 704160
    },
    {
      "epoch": 2.4679402930672527,
      "grad_norm": 3.1875,
      "learning_rate": 9.853033949391342e-06,
      "loss": 0.8593,
      "step": 704170
    },
    {
      "epoch": 2.4679753405741485,
      "grad_norm": 2.703125,
      "learning_rate": 9.85238492072764e-06,
      "loss": 0.8325,
      "step": 704180
    },
    {
      "epoch": 2.468010388081044,
      "grad_norm": 2.65625,
      "learning_rate": 9.851735892063938e-06,
      "loss": 0.8261,
      "step": 704190
    },
    {
      "epoch": 2.4680454355879395,
      "grad_norm": 3.125,
      "learning_rate": 9.851086863400236e-06,
      "loss": 0.7927,
      "step": 704200
    },
    {
      "epoch": 2.468080483094835,
      "grad_norm": 3.0,
      "learning_rate": 9.850437834736534e-06,
      "loss": 0.8013,
      "step": 704210
    },
    {
      "epoch": 2.4681155306017306,
      "grad_norm": 2.421875,
      "learning_rate": 9.849788806072832e-06,
      "loss": 0.8322,
      "step": 704220
    },
    {
      "epoch": 2.4681505781086264,
      "grad_norm": 2.765625,
      "learning_rate": 9.84913977740913e-06,
      "loss": 0.8026,
      "step": 704230
    },
    {
      "epoch": 2.4681856256155217,
      "grad_norm": 2.609375,
      "learning_rate": 9.84849074874543e-06,
      "loss": 0.7687,
      "step": 704240
    },
    {
      "epoch": 2.4682206731224174,
      "grad_norm": 2.328125,
      "learning_rate": 9.847841720081726e-06,
      "loss": 0.8216,
      "step": 704250
    },
    {
      "epoch": 2.468255720629313,
      "grad_norm": 3.5,
      "learning_rate": 9.847192691418024e-06,
      "loss": 0.7826,
      "step": 704260
    },
    {
      "epoch": 2.4682907681362085,
      "grad_norm": 3.09375,
      "learning_rate": 9.846543662754322e-06,
      "loss": 0.7992,
      "step": 704270
    },
    {
      "epoch": 2.4683258156431043,
      "grad_norm": 3.0,
      "learning_rate": 9.84589463409062e-06,
      "loss": 0.8435,
      "step": 704280
    },
    {
      "epoch": 2.46836086315,
      "grad_norm": 2.78125,
      "learning_rate": 9.845245605426918e-06,
      "loss": 0.8027,
      "step": 704290
    },
    {
      "epoch": 2.4683959106568953,
      "grad_norm": 3.25,
      "learning_rate": 9.844596576763217e-06,
      "loss": 0.8037,
      "step": 704300
    },
    {
      "epoch": 2.468430958163791,
      "grad_norm": 2.9375,
      "learning_rate": 9.843947548099515e-06,
      "loss": 0.8615,
      "step": 704310
    },
    {
      "epoch": 2.4684660056706864,
      "grad_norm": 3.578125,
      "learning_rate": 9.843298519435813e-06,
      "loss": 0.794,
      "step": 704320
    },
    {
      "epoch": 2.468501053177582,
      "grad_norm": 2.5625,
      "learning_rate": 9.842649490772111e-06,
      "loss": 0.7393,
      "step": 704330
    },
    {
      "epoch": 2.468536100684478,
      "grad_norm": 2.78125,
      "learning_rate": 9.84200046210841e-06,
      "loss": 0.805,
      "step": 704340
    },
    {
      "epoch": 2.4685711481913732,
      "grad_norm": 2.6875,
      "learning_rate": 9.841351433444707e-06,
      "loss": 0.7565,
      "step": 704350
    },
    {
      "epoch": 2.468606195698269,
      "grad_norm": 2.8125,
      "learning_rate": 9.840702404781005e-06,
      "loss": 0.7746,
      "step": 704360
    },
    {
      "epoch": 2.4686412432051648,
      "grad_norm": 3.046875,
      "learning_rate": 9.840053376117303e-06,
      "loss": 0.8067,
      "step": 704370
    },
    {
      "epoch": 2.46867629071206,
      "grad_norm": 3.234375,
      "learning_rate": 9.839404347453601e-06,
      "loss": 0.8429,
      "step": 704380
    },
    {
      "epoch": 2.468711338218956,
      "grad_norm": 2.484375,
      "learning_rate": 9.8387553187899e-06,
      "loss": 0.7178,
      "step": 704390
    },
    {
      "epoch": 2.4687463857258516,
      "grad_norm": 2.796875,
      "learning_rate": 9.838106290126197e-06,
      "loss": 0.7369,
      "step": 704400
    },
    {
      "epoch": 2.468781433232747,
      "grad_norm": 2.53125,
      "learning_rate": 9.837457261462495e-06,
      "loss": 0.8158,
      "step": 704410
    },
    {
      "epoch": 2.4688164807396427,
      "grad_norm": 2.75,
      "learning_rate": 9.836808232798795e-06,
      "loss": 0.8177,
      "step": 704420
    },
    {
      "epoch": 2.468851528246538,
      "grad_norm": 2.671875,
      "learning_rate": 9.836159204135093e-06,
      "loss": 0.7688,
      "step": 704430
    },
    {
      "epoch": 2.4688865757534337,
      "grad_norm": 3.28125,
      "learning_rate": 9.83551017547139e-06,
      "loss": 0.8288,
      "step": 704440
    },
    {
      "epoch": 2.4689216232603295,
      "grad_norm": 3.109375,
      "learning_rate": 9.834861146807687e-06,
      "loss": 0.7771,
      "step": 704450
    },
    {
      "epoch": 2.468956670767225,
      "grad_norm": 3.46875,
      "learning_rate": 9.834212118143985e-06,
      "loss": 0.8158,
      "step": 704460
    },
    {
      "epoch": 2.4689917182741206,
      "grad_norm": 2.9375,
      "learning_rate": 9.833563089480283e-06,
      "loss": 0.7956,
      "step": 704470
    },
    {
      "epoch": 2.4690267657810163,
      "grad_norm": 3.328125,
      "learning_rate": 9.832914060816583e-06,
      "loss": 0.8175,
      "step": 704480
    },
    {
      "epoch": 2.4690618132879116,
      "grad_norm": 2.671875,
      "learning_rate": 9.832265032152881e-06,
      "loss": 0.8014,
      "step": 704490
    },
    {
      "epoch": 2.4690968607948074,
      "grad_norm": 2.578125,
      "learning_rate": 9.831616003489179e-06,
      "loss": 0.819,
      "step": 704500
    },
    {
      "epoch": 2.469131908301703,
      "grad_norm": 3.15625,
      "learning_rate": 9.830966974825477e-06,
      "loss": 0.8325,
      "step": 704510
    },
    {
      "epoch": 2.4691669558085985,
      "grad_norm": 2.390625,
      "learning_rate": 9.830317946161775e-06,
      "loss": 0.8253,
      "step": 704520
    },
    {
      "epoch": 2.469202003315494,
      "grad_norm": 2.703125,
      "learning_rate": 9.829668917498071e-06,
      "loss": 0.8213,
      "step": 704530
    },
    {
      "epoch": 2.4692370508223895,
      "grad_norm": 3.390625,
      "learning_rate": 9.829019888834371e-06,
      "loss": 0.7955,
      "step": 704540
    },
    {
      "epoch": 2.4692720983292853,
      "grad_norm": 3.359375,
      "learning_rate": 9.828370860170669e-06,
      "loss": 0.829,
      "step": 704550
    },
    {
      "epoch": 2.469307145836181,
      "grad_norm": 2.953125,
      "learning_rate": 9.827721831506967e-06,
      "loss": 0.801,
      "step": 704560
    },
    {
      "epoch": 2.4693421933430764,
      "grad_norm": 2.796875,
      "learning_rate": 9.827072802843265e-06,
      "loss": 0.8015,
      "step": 704570
    },
    {
      "epoch": 2.469377240849972,
      "grad_norm": 2.875,
      "learning_rate": 9.826423774179563e-06,
      "loss": 0.8378,
      "step": 704580
    },
    {
      "epoch": 2.469412288356868,
      "grad_norm": 3.1875,
      "learning_rate": 9.825774745515861e-06,
      "loss": 0.7465,
      "step": 704590
    },
    {
      "epoch": 2.469447335863763,
      "grad_norm": 3.1875,
      "learning_rate": 9.825125716852159e-06,
      "loss": 0.8895,
      "step": 704600
    },
    {
      "epoch": 2.469482383370659,
      "grad_norm": 2.984375,
      "learning_rate": 9.824476688188459e-06,
      "loss": 0.7807,
      "step": 704610
    },
    {
      "epoch": 2.4695174308775547,
      "grad_norm": 3.0625,
      "learning_rate": 9.823827659524757e-06,
      "loss": 0.855,
      "step": 704620
    },
    {
      "epoch": 2.46955247838445,
      "grad_norm": 2.84375,
      "learning_rate": 9.823178630861053e-06,
      "loss": 0.8892,
      "step": 704630
    },
    {
      "epoch": 2.469587525891346,
      "grad_norm": 3.609375,
      "learning_rate": 9.822529602197351e-06,
      "loss": 0.8908,
      "step": 704640
    },
    {
      "epoch": 2.469622573398241,
      "grad_norm": 3.015625,
      "learning_rate": 9.821880573533649e-06,
      "loss": 0.9088,
      "step": 704650
    },
    {
      "epoch": 2.469657620905137,
      "grad_norm": 3.125,
      "learning_rate": 9.821231544869949e-06,
      "loss": 0.885,
      "step": 704660
    },
    {
      "epoch": 2.4696926684120326,
      "grad_norm": 2.921875,
      "learning_rate": 9.820582516206247e-06,
      "loss": 0.8487,
      "step": 704670
    },
    {
      "epoch": 2.469727715918928,
      "grad_norm": 2.828125,
      "learning_rate": 9.819933487542545e-06,
      "loss": 0.8331,
      "step": 704680
    },
    {
      "epoch": 2.4697627634258237,
      "grad_norm": 3.125,
      "learning_rate": 9.819284458878843e-06,
      "loss": 0.7676,
      "step": 704690
    },
    {
      "epoch": 2.4697978109327194,
      "grad_norm": 2.609375,
      "learning_rate": 9.81863543021514e-06,
      "loss": 0.8077,
      "step": 704700
    },
    {
      "epoch": 2.4698328584396148,
      "grad_norm": 2.671875,
      "learning_rate": 9.817986401551439e-06,
      "loss": 0.856,
      "step": 704710
    },
    {
      "epoch": 2.4698679059465105,
      "grad_norm": 3.53125,
      "learning_rate": 9.817337372887737e-06,
      "loss": 0.8451,
      "step": 704720
    },
    {
      "epoch": 2.4699029534534063,
      "grad_norm": 2.65625,
      "learning_rate": 9.816688344224035e-06,
      "loss": 0.79,
      "step": 704730
    },
    {
      "epoch": 2.4699380009603016,
      "grad_norm": 2.671875,
      "learning_rate": 9.816039315560333e-06,
      "loss": 0.7719,
      "step": 704740
    },
    {
      "epoch": 2.4699730484671973,
      "grad_norm": 2.78125,
      "learning_rate": 9.81539028689663e-06,
      "loss": 0.8639,
      "step": 704750
    },
    {
      "epoch": 2.4700080959740927,
      "grad_norm": 2.75,
      "learning_rate": 9.814741258232929e-06,
      "loss": 0.7542,
      "step": 704760
    },
    {
      "epoch": 2.4700431434809884,
      "grad_norm": 2.53125,
      "learning_rate": 9.814092229569227e-06,
      "loss": 0.7684,
      "step": 704770
    },
    {
      "epoch": 2.470078190987884,
      "grad_norm": 3.03125,
      "learning_rate": 9.813443200905525e-06,
      "loss": 0.7853,
      "step": 704780
    },
    {
      "epoch": 2.4701132384947795,
      "grad_norm": 2.609375,
      "learning_rate": 9.812794172241824e-06,
      "loss": 0.8439,
      "step": 704790
    },
    {
      "epoch": 2.4701482860016752,
      "grad_norm": 2.71875,
      "learning_rate": 9.812145143578122e-06,
      "loss": 0.7826,
      "step": 704800
    },
    {
      "epoch": 2.470183333508571,
      "grad_norm": 3.1875,
      "learning_rate": 9.81149611491442e-06,
      "loss": 0.8219,
      "step": 704810
    },
    {
      "epoch": 2.4702183810154663,
      "grad_norm": 2.40625,
      "learning_rate": 9.810847086250717e-06,
      "loss": 0.7599,
      "step": 704820
    },
    {
      "epoch": 2.470253428522362,
      "grad_norm": 3.15625,
      "learning_rate": 9.810198057587015e-06,
      "loss": 0.7499,
      "step": 704830
    },
    {
      "epoch": 2.470288476029258,
      "grad_norm": 3.3125,
      "learning_rate": 9.809549028923313e-06,
      "loss": 0.7977,
      "step": 704840
    },
    {
      "epoch": 2.470323523536153,
      "grad_norm": 3.3125,
      "learning_rate": 9.808900000259612e-06,
      "loss": 0.8768,
      "step": 704850
    },
    {
      "epoch": 2.470358571043049,
      "grad_norm": 2.34375,
      "learning_rate": 9.80825097159591e-06,
      "loss": 0.7858,
      "step": 704860
    },
    {
      "epoch": 2.470393618549944,
      "grad_norm": 2.640625,
      "learning_rate": 9.807601942932208e-06,
      "loss": 0.8396,
      "step": 704870
    },
    {
      "epoch": 2.47042866605684,
      "grad_norm": 2.953125,
      "learning_rate": 9.806952914268506e-06,
      "loss": 0.8354,
      "step": 704880
    },
    {
      "epoch": 2.4704637135637357,
      "grad_norm": 2.96875,
      "learning_rate": 9.806303885604804e-06,
      "loss": 0.8106,
      "step": 704890
    },
    {
      "epoch": 2.4704987610706315,
      "grad_norm": 2.9375,
      "learning_rate": 9.805654856941102e-06,
      "loss": 0.7829,
      "step": 704900
    },
    {
      "epoch": 2.470533808577527,
      "grad_norm": 3.03125,
      "learning_rate": 9.8050058282774e-06,
      "loss": 0.7528,
      "step": 704910
    },
    {
      "epoch": 2.4705688560844226,
      "grad_norm": 2.984375,
      "learning_rate": 9.804356799613698e-06,
      "loss": 0.8494,
      "step": 704920
    },
    {
      "epoch": 2.470603903591318,
      "grad_norm": 2.484375,
      "learning_rate": 9.803707770949996e-06,
      "loss": 0.8314,
      "step": 704930
    },
    {
      "epoch": 2.4706389510982136,
      "grad_norm": 3.015625,
      "learning_rate": 9.803058742286294e-06,
      "loss": 0.8437,
      "step": 704940
    },
    {
      "epoch": 2.4706739986051094,
      "grad_norm": 3.03125,
      "learning_rate": 9.802409713622592e-06,
      "loss": 0.7816,
      "step": 704950
    },
    {
      "epoch": 2.4707090461120047,
      "grad_norm": 3.078125,
      "learning_rate": 9.80176068495889e-06,
      "loss": 0.8077,
      "step": 704960
    },
    {
      "epoch": 2.4707440936189005,
      "grad_norm": 2.578125,
      "learning_rate": 9.80111165629519e-06,
      "loss": 0.7359,
      "step": 704970
    },
    {
      "epoch": 2.4707791411257958,
      "grad_norm": 2.921875,
      "learning_rate": 9.800462627631488e-06,
      "loss": 0.8261,
      "step": 704980
    },
    {
      "epoch": 2.4708141886326915,
      "grad_norm": 3.265625,
      "learning_rate": 9.799813598967786e-06,
      "loss": 0.8421,
      "step": 704990
    },
    {
      "epoch": 2.4708492361395873,
      "grad_norm": 3.328125,
      "learning_rate": 9.799164570304082e-06,
      "loss": 0.8261,
      "step": 705000
    },
    {
      "epoch": 2.4708492361395873,
      "eval_loss": 0.7573106288909912,
      "eval_runtime": 554.403,
      "eval_samples_per_second": 686.208,
      "eval_steps_per_second": 57.184,
      "step": 705000
    },
    {
      "epoch": 2.470884283646483,
      "grad_norm": 2.703125,
      "learning_rate": 9.79851554164038e-06,
      "loss": 0.794,
      "step": 705010
    },
    {
      "epoch": 2.4709193311533784,
      "grad_norm": 2.875,
      "learning_rate": 9.797866512976678e-06,
      "loss": 0.824,
      "step": 705020
    },
    {
      "epoch": 2.470954378660274,
      "grad_norm": 2.84375,
      "learning_rate": 9.797217484312978e-06,
      "loss": 0.8296,
      "step": 705030
    },
    {
      "epoch": 2.4709894261671694,
      "grad_norm": 3.15625,
      "learning_rate": 9.796568455649276e-06,
      "loss": 0.8789,
      "step": 705040
    },
    {
      "epoch": 2.471024473674065,
      "grad_norm": 3.171875,
      "learning_rate": 9.795919426985574e-06,
      "loss": 0.8666,
      "step": 705050
    },
    {
      "epoch": 2.471059521180961,
      "grad_norm": 3.046875,
      "learning_rate": 9.795270398321872e-06,
      "loss": 0.8159,
      "step": 705060
    },
    {
      "epoch": 2.4710945686878563,
      "grad_norm": 2.984375,
      "learning_rate": 9.79462136965817e-06,
      "loss": 0.8372,
      "step": 705070
    },
    {
      "epoch": 2.471129616194752,
      "grad_norm": 3.203125,
      "learning_rate": 9.793972340994468e-06,
      "loss": 0.7893,
      "step": 705080
    },
    {
      "epoch": 2.4711646637016473,
      "grad_norm": 2.640625,
      "learning_rate": 9.793323312330766e-06,
      "loss": 0.8284,
      "step": 705090
    },
    {
      "epoch": 2.471199711208543,
      "grad_norm": 2.953125,
      "learning_rate": 9.792674283667064e-06,
      "loss": 0.821,
      "step": 705100
    },
    {
      "epoch": 2.471234758715439,
      "grad_norm": 2.875,
      "learning_rate": 9.792025255003362e-06,
      "loss": 0.7712,
      "step": 705110
    },
    {
      "epoch": 2.4712698062223346,
      "grad_norm": 2.859375,
      "learning_rate": 9.79137622633966e-06,
      "loss": 0.8556,
      "step": 705120
    },
    {
      "epoch": 2.47130485372923,
      "grad_norm": 3.3125,
      "learning_rate": 9.790727197675958e-06,
      "loss": 0.7742,
      "step": 705130
    },
    {
      "epoch": 2.4713399012361257,
      "grad_norm": 3.109375,
      "learning_rate": 9.790078169012256e-06,
      "loss": 0.8759,
      "step": 705140
    },
    {
      "epoch": 2.471374948743021,
      "grad_norm": 2.875,
      "learning_rate": 9.789429140348554e-06,
      "loss": 0.7871,
      "step": 705150
    },
    {
      "epoch": 2.4714099962499168,
      "grad_norm": 3.265625,
      "learning_rate": 9.788780111684854e-06,
      "loss": 0.7668,
      "step": 705160
    },
    {
      "epoch": 2.4714450437568125,
      "grad_norm": 3.15625,
      "learning_rate": 9.788131083021152e-06,
      "loss": 0.78,
      "step": 705170
    },
    {
      "epoch": 2.471480091263708,
      "grad_norm": 2.9375,
      "learning_rate": 9.78748205435745e-06,
      "loss": 0.799,
      "step": 705180
    },
    {
      "epoch": 2.4715151387706036,
      "grad_norm": 2.875,
      "learning_rate": 9.786833025693746e-06,
      "loss": 0.9065,
      "step": 705190
    },
    {
      "epoch": 2.4715501862774993,
      "grad_norm": 2.71875,
      "learning_rate": 9.786183997030044e-06,
      "loss": 0.8636,
      "step": 705200
    },
    {
      "epoch": 2.4715852337843947,
      "grad_norm": 2.6875,
      "learning_rate": 9.785534968366344e-06,
      "loss": 0.8431,
      "step": 705210
    },
    {
      "epoch": 2.4716202812912904,
      "grad_norm": 2.890625,
      "learning_rate": 9.784885939702642e-06,
      "loss": 0.8921,
      "step": 705220
    },
    {
      "epoch": 2.471655328798186,
      "grad_norm": 2.5625,
      "learning_rate": 9.78423691103894e-06,
      "loss": 0.7491,
      "step": 705230
    },
    {
      "epoch": 2.4716903763050815,
      "grad_norm": 3.046875,
      "learning_rate": 9.783587882375238e-06,
      "loss": 0.7601,
      "step": 705240
    },
    {
      "epoch": 2.4717254238119772,
      "grad_norm": 3.4375,
      "learning_rate": 9.782938853711536e-06,
      "loss": 0.7421,
      "step": 705250
    },
    {
      "epoch": 2.4717604713188726,
      "grad_norm": 2.734375,
      "learning_rate": 9.782289825047834e-06,
      "loss": 0.7561,
      "step": 705260
    },
    {
      "epoch": 2.4717955188257683,
      "grad_norm": 2.75,
      "learning_rate": 9.781640796384132e-06,
      "loss": 0.8219,
      "step": 705270
    },
    {
      "epoch": 2.471830566332664,
      "grad_norm": 2.703125,
      "learning_rate": 9.780991767720432e-06,
      "loss": 0.8278,
      "step": 705280
    },
    {
      "epoch": 2.4718656138395594,
      "grad_norm": 3.09375,
      "learning_rate": 9.780342739056728e-06,
      "loss": 0.7801,
      "step": 705290
    },
    {
      "epoch": 2.471900661346455,
      "grad_norm": 3.03125,
      "learning_rate": 9.779693710393026e-06,
      "loss": 0.763,
      "step": 705300
    },
    {
      "epoch": 2.471935708853351,
      "grad_norm": 3.015625,
      "learning_rate": 9.779044681729324e-06,
      "loss": 0.7817,
      "step": 705310
    },
    {
      "epoch": 2.471970756360246,
      "grad_norm": 2.875,
      "learning_rate": 9.778395653065622e-06,
      "loss": 0.7822,
      "step": 705320
    },
    {
      "epoch": 2.472005803867142,
      "grad_norm": 3.0625,
      "learning_rate": 9.77774662440192e-06,
      "loss": 0.8487,
      "step": 705330
    },
    {
      "epoch": 2.4720408513740377,
      "grad_norm": 3.0,
      "learning_rate": 9.77709759573822e-06,
      "loss": 0.8385,
      "step": 705340
    },
    {
      "epoch": 2.472075898880933,
      "grad_norm": 3.203125,
      "learning_rate": 9.776448567074518e-06,
      "loss": 0.7821,
      "step": 705350
    },
    {
      "epoch": 2.472110946387829,
      "grad_norm": 3.109375,
      "learning_rate": 9.775799538410816e-06,
      "loss": 0.7935,
      "step": 705360
    },
    {
      "epoch": 2.472145993894724,
      "grad_norm": 2.6875,
      "learning_rate": 9.775150509747114e-06,
      "loss": 0.7455,
      "step": 705370
    },
    {
      "epoch": 2.47218104140162,
      "grad_norm": 2.96875,
      "learning_rate": 9.77450148108341e-06,
      "loss": 0.8026,
      "step": 705380
    },
    {
      "epoch": 2.4722160889085156,
      "grad_norm": 3.125,
      "learning_rate": 9.773852452419708e-06,
      "loss": 0.8306,
      "step": 705390
    },
    {
      "epoch": 2.472251136415411,
      "grad_norm": 2.953125,
      "learning_rate": 9.773203423756008e-06,
      "loss": 0.8249,
      "step": 705400
    },
    {
      "epoch": 2.4722861839223067,
      "grad_norm": 3.421875,
      "learning_rate": 9.772554395092306e-06,
      "loss": 0.7642,
      "step": 705410
    },
    {
      "epoch": 2.4723212314292025,
      "grad_norm": 3.046875,
      "learning_rate": 9.771905366428604e-06,
      "loss": 0.8651,
      "step": 705420
    },
    {
      "epoch": 2.4723562789360978,
      "grad_norm": 3.0625,
      "learning_rate": 9.771256337764902e-06,
      "loss": 0.7404,
      "step": 705430
    },
    {
      "epoch": 2.4723913264429935,
      "grad_norm": 2.921875,
      "learning_rate": 9.7706073091012e-06,
      "loss": 0.7408,
      "step": 705440
    },
    {
      "epoch": 2.4724263739498893,
      "grad_norm": 3.671875,
      "learning_rate": 9.769958280437498e-06,
      "loss": 0.8191,
      "step": 705450
    },
    {
      "epoch": 2.4724614214567846,
      "grad_norm": 2.765625,
      "learning_rate": 9.769309251773796e-06,
      "loss": 0.783,
      "step": 705460
    },
    {
      "epoch": 2.4724964689636804,
      "grad_norm": 2.90625,
      "learning_rate": 9.768660223110094e-06,
      "loss": 0.8381,
      "step": 705470
    },
    {
      "epoch": 2.4725315164705757,
      "grad_norm": 2.890625,
      "learning_rate": 9.768011194446392e-06,
      "loss": 0.774,
      "step": 705480
    },
    {
      "epoch": 2.4725665639774714,
      "grad_norm": 2.984375,
      "learning_rate": 9.76736216578269e-06,
      "loss": 0.8114,
      "step": 705490
    },
    {
      "epoch": 2.472601611484367,
      "grad_norm": 2.53125,
      "learning_rate": 9.766713137118988e-06,
      "loss": 0.7941,
      "step": 705500
    },
    {
      "epoch": 2.4726366589912625,
      "grad_norm": 2.875,
      "learning_rate": 9.766064108455286e-06,
      "loss": 0.756,
      "step": 705510
    },
    {
      "epoch": 2.4726717064981583,
      "grad_norm": 2.296875,
      "learning_rate": 9.765415079791585e-06,
      "loss": 0.7581,
      "step": 705520
    },
    {
      "epoch": 2.472706754005054,
      "grad_norm": 2.59375,
      "learning_rate": 9.764766051127883e-06,
      "loss": 0.7428,
      "step": 705530
    },
    {
      "epoch": 2.4727418015119493,
      "grad_norm": 2.8125,
      "learning_rate": 9.764117022464181e-06,
      "loss": 0.8079,
      "step": 705540
    },
    {
      "epoch": 2.472776849018845,
      "grad_norm": 2.84375,
      "learning_rate": 9.76346799380048e-06,
      "loss": 0.8417,
      "step": 705550
    },
    {
      "epoch": 2.472811896525741,
      "grad_norm": 2.78125,
      "learning_rate": 9.762818965136777e-06,
      "loss": 0.8156,
      "step": 705560
    },
    {
      "epoch": 2.472846944032636,
      "grad_norm": 2.8125,
      "learning_rate": 9.762169936473074e-06,
      "loss": 0.7479,
      "step": 705570
    },
    {
      "epoch": 2.472881991539532,
      "grad_norm": 3.1875,
      "learning_rate": 9.761520907809373e-06,
      "loss": 0.8103,
      "step": 705580
    },
    {
      "epoch": 2.4729170390464272,
      "grad_norm": 2.921875,
      "learning_rate": 9.760871879145671e-06,
      "loss": 0.7646,
      "step": 705590
    },
    {
      "epoch": 2.472952086553323,
      "grad_norm": 3.453125,
      "learning_rate": 9.76022285048197e-06,
      "loss": 0.8723,
      "step": 705600
    },
    {
      "epoch": 2.4729871340602188,
      "grad_norm": 3.078125,
      "learning_rate": 9.759573821818267e-06,
      "loss": 0.8017,
      "step": 705610
    },
    {
      "epoch": 2.473022181567114,
      "grad_norm": 2.734375,
      "learning_rate": 9.758924793154565e-06,
      "loss": 0.839,
      "step": 705620
    },
    {
      "epoch": 2.47305722907401,
      "grad_norm": 3.109375,
      "learning_rate": 9.758275764490863e-06,
      "loss": 0.8106,
      "step": 705630
    },
    {
      "epoch": 2.4730922765809056,
      "grad_norm": 2.5,
      "learning_rate": 9.757626735827161e-06,
      "loss": 0.7877,
      "step": 705640
    },
    {
      "epoch": 2.473127324087801,
      "grad_norm": 3.09375,
      "learning_rate": 9.756977707163461e-06,
      "loss": 0.7802,
      "step": 705650
    },
    {
      "epoch": 2.4731623715946967,
      "grad_norm": 3.296875,
      "learning_rate": 9.756328678499757e-06,
      "loss": 0.8566,
      "step": 705660
    },
    {
      "epoch": 2.4731974191015924,
      "grad_norm": 2.78125,
      "learning_rate": 9.755679649836055e-06,
      "loss": 0.7239,
      "step": 705670
    },
    {
      "epoch": 2.4732324666084877,
      "grad_norm": 2.625,
      "learning_rate": 9.755030621172353e-06,
      "loss": 0.7433,
      "step": 705680
    },
    {
      "epoch": 2.4732675141153835,
      "grad_norm": 2.796875,
      "learning_rate": 9.754381592508651e-06,
      "loss": 0.79,
      "step": 705690
    },
    {
      "epoch": 2.473302561622279,
      "grad_norm": 12.0,
      "learning_rate": 9.75373256384495e-06,
      "loss": 0.8309,
      "step": 705700
    },
    {
      "epoch": 2.4733376091291746,
      "grad_norm": 2.8125,
      "learning_rate": 9.753083535181249e-06,
      "loss": 0.7314,
      "step": 705710
    },
    {
      "epoch": 2.4733726566360703,
      "grad_norm": 3.125,
      "learning_rate": 9.752434506517547e-06,
      "loss": 0.7866,
      "step": 705720
    },
    {
      "epoch": 2.4734077041429656,
      "grad_norm": 2.96875,
      "learning_rate": 9.751785477853845e-06,
      "loss": 0.9203,
      "step": 705730
    },
    {
      "epoch": 2.4734427516498614,
      "grad_norm": 3.25,
      "learning_rate": 9.751136449190143e-06,
      "loss": 0.7564,
      "step": 705740
    },
    {
      "epoch": 2.473477799156757,
      "grad_norm": 2.90625,
      "learning_rate": 9.750487420526441e-06,
      "loss": 0.7272,
      "step": 705750
    },
    {
      "epoch": 2.4735128466636525,
      "grad_norm": 3.03125,
      "learning_rate": 9.749838391862739e-06,
      "loss": 0.8315,
      "step": 705760
    },
    {
      "epoch": 2.473547894170548,
      "grad_norm": 3.359375,
      "learning_rate": 9.749189363199037e-06,
      "loss": 0.8311,
      "step": 705770
    },
    {
      "epoch": 2.473582941677444,
      "grad_norm": 2.890625,
      "learning_rate": 9.748540334535335e-06,
      "loss": 0.8457,
      "step": 705780
    },
    {
      "epoch": 2.4736179891843393,
      "grad_norm": 2.96875,
      "learning_rate": 9.747891305871633e-06,
      "loss": 0.7672,
      "step": 705790
    },
    {
      "epoch": 2.473653036691235,
      "grad_norm": 2.453125,
      "learning_rate": 9.747242277207931e-06,
      "loss": 0.6986,
      "step": 705800
    },
    {
      "epoch": 2.4736880841981304,
      "grad_norm": 3.3125,
      "learning_rate": 9.746593248544229e-06,
      "loss": 0.7895,
      "step": 705810
    },
    {
      "epoch": 2.473723131705026,
      "grad_norm": 2.375,
      "learning_rate": 9.745944219880527e-06,
      "loss": 0.7759,
      "step": 705820
    },
    {
      "epoch": 2.473758179211922,
      "grad_norm": 2.875,
      "learning_rate": 9.745295191216827e-06,
      "loss": 0.8636,
      "step": 705830
    },
    {
      "epoch": 2.473793226718817,
      "grad_norm": 2.59375,
      "learning_rate": 9.744646162553125e-06,
      "loss": 0.7842,
      "step": 705840
    },
    {
      "epoch": 2.473828274225713,
      "grad_norm": 3.1875,
      "learning_rate": 9.743997133889421e-06,
      "loss": 0.779,
      "step": 705850
    },
    {
      "epoch": 2.4738633217326087,
      "grad_norm": 3.0625,
      "learning_rate": 9.743348105225719e-06,
      "loss": 0.8028,
      "step": 705860
    },
    {
      "epoch": 2.473898369239504,
      "grad_norm": 2.9375,
      "learning_rate": 9.742699076562017e-06,
      "loss": 0.7495,
      "step": 705870
    },
    {
      "epoch": 2.4739334167463998,
      "grad_norm": 3.0,
      "learning_rate": 9.742050047898315e-06,
      "loss": 0.7613,
      "step": 705880
    },
    {
      "epoch": 2.4739684642532955,
      "grad_norm": 2.828125,
      "learning_rate": 9.741401019234615e-06,
      "loss": 0.7862,
      "step": 705890
    },
    {
      "epoch": 2.474003511760191,
      "grad_norm": 2.625,
      "learning_rate": 9.740751990570913e-06,
      "loss": 0.727,
      "step": 705900
    },
    {
      "epoch": 2.4740385592670866,
      "grad_norm": 2.921875,
      "learning_rate": 9.74010296190721e-06,
      "loss": 0.8148,
      "step": 705910
    },
    {
      "epoch": 2.474073606773982,
      "grad_norm": 3.140625,
      "learning_rate": 9.739453933243509e-06,
      "loss": 0.7704,
      "step": 705920
    },
    {
      "epoch": 2.4741086542808777,
      "grad_norm": 2.53125,
      "learning_rate": 9.738804904579807e-06,
      "loss": 0.7214,
      "step": 705930
    },
    {
      "epoch": 2.4741437017877734,
      "grad_norm": 2.921875,
      "learning_rate": 9.738155875916103e-06,
      "loss": 0.7911,
      "step": 705940
    },
    {
      "epoch": 2.4741787492946687,
      "grad_norm": 2.859375,
      "learning_rate": 9.737506847252403e-06,
      "loss": 0.8443,
      "step": 705950
    },
    {
      "epoch": 2.4742137968015645,
      "grad_norm": 2.96875,
      "learning_rate": 9.7368578185887e-06,
      "loss": 0.8416,
      "step": 705960
    },
    {
      "epoch": 2.4742488443084603,
      "grad_norm": 2.609375,
      "learning_rate": 9.736208789924999e-06,
      "loss": 0.7979,
      "step": 705970
    },
    {
      "epoch": 2.4742838918153556,
      "grad_norm": 2.9375,
      "learning_rate": 9.735559761261297e-06,
      "loss": 0.8509,
      "step": 705980
    },
    {
      "epoch": 2.4743189393222513,
      "grad_norm": 3.078125,
      "learning_rate": 9.734910732597595e-06,
      "loss": 0.841,
      "step": 705990
    },
    {
      "epoch": 2.474353986829147,
      "grad_norm": 3.078125,
      "learning_rate": 9.734261703933893e-06,
      "loss": 0.8324,
      "step": 706000
    },
    {
      "epoch": 2.4743890343360424,
      "grad_norm": 2.640625,
      "learning_rate": 9.733612675270192e-06,
      "loss": 0.8152,
      "step": 706010
    },
    {
      "epoch": 2.474424081842938,
      "grad_norm": 2.75,
      "learning_rate": 9.73296364660649e-06,
      "loss": 0.7344,
      "step": 706020
    },
    {
      "epoch": 2.4744591293498335,
      "grad_norm": 2.625,
      "learning_rate": 9.732314617942788e-06,
      "loss": 0.7962,
      "step": 706030
    },
    {
      "epoch": 2.4744941768567292,
      "grad_norm": 2.84375,
      "learning_rate": 9.731665589279085e-06,
      "loss": 0.8815,
      "step": 706040
    },
    {
      "epoch": 2.474529224363625,
      "grad_norm": 3.1875,
      "learning_rate": 9.731016560615383e-06,
      "loss": 0.7788,
      "step": 706050
    },
    {
      "epoch": 2.4745642718705203,
      "grad_norm": 2.8125,
      "learning_rate": 9.73036753195168e-06,
      "loss": 0.7767,
      "step": 706060
    },
    {
      "epoch": 2.474599319377416,
      "grad_norm": 2.4375,
      "learning_rate": 9.72971850328798e-06,
      "loss": 0.7952,
      "step": 706070
    },
    {
      "epoch": 2.474634366884312,
      "grad_norm": 3.28125,
      "learning_rate": 9.729069474624278e-06,
      "loss": 0.793,
      "step": 706080
    },
    {
      "epoch": 2.474669414391207,
      "grad_norm": 2.875,
      "learning_rate": 9.728420445960576e-06,
      "loss": 0.6691,
      "step": 706090
    },
    {
      "epoch": 2.474704461898103,
      "grad_norm": 2.90625,
      "learning_rate": 9.727771417296874e-06,
      "loss": 0.7644,
      "step": 706100
    },
    {
      "epoch": 2.4747395094049986,
      "grad_norm": 3.15625,
      "learning_rate": 9.727122388633172e-06,
      "loss": 0.7881,
      "step": 706110
    },
    {
      "epoch": 2.474774556911894,
      "grad_norm": 3.390625,
      "learning_rate": 9.72647335996947e-06,
      "loss": 0.8843,
      "step": 706120
    },
    {
      "epoch": 2.4748096044187897,
      "grad_norm": 2.5625,
      "learning_rate": 9.725824331305768e-06,
      "loss": 0.7295,
      "step": 706130
    },
    {
      "epoch": 2.474844651925685,
      "grad_norm": 2.71875,
      "learning_rate": 9.725175302642066e-06,
      "loss": 0.8432,
      "step": 706140
    },
    {
      "epoch": 2.474879699432581,
      "grad_norm": 2.65625,
      "learning_rate": 9.724526273978364e-06,
      "loss": 0.6973,
      "step": 706150
    },
    {
      "epoch": 2.4749147469394766,
      "grad_norm": 2.40625,
      "learning_rate": 9.723877245314662e-06,
      "loss": 0.7776,
      "step": 706160
    },
    {
      "epoch": 2.474949794446372,
      "grad_norm": 2.9375,
      "learning_rate": 9.72322821665096e-06,
      "loss": 0.8715,
      "step": 706170
    },
    {
      "epoch": 2.4749848419532676,
      "grad_norm": 2.671875,
      "learning_rate": 9.722579187987258e-06,
      "loss": 0.8104,
      "step": 706180
    },
    {
      "epoch": 2.4750198894601634,
      "grad_norm": 2.59375,
      "learning_rate": 9.721930159323556e-06,
      "loss": 0.8529,
      "step": 706190
    },
    {
      "epoch": 2.4750549369670587,
      "grad_norm": 3.46875,
      "learning_rate": 9.721281130659856e-06,
      "loss": 0.813,
      "step": 706200
    },
    {
      "epoch": 2.4750899844739545,
      "grad_norm": 3.078125,
      "learning_rate": 9.720632101996154e-06,
      "loss": 0.7282,
      "step": 706210
    },
    {
      "epoch": 2.47512503198085,
      "grad_norm": 2.921875,
      "learning_rate": 9.719983073332452e-06,
      "loss": 0.8688,
      "step": 706220
    },
    {
      "epoch": 2.4751600794877455,
      "grad_norm": 2.9375,
      "learning_rate": 9.719334044668748e-06,
      "loss": 0.7298,
      "step": 706230
    },
    {
      "epoch": 2.4751951269946413,
      "grad_norm": 3.0,
      "learning_rate": 9.718685016005046e-06,
      "loss": 0.7954,
      "step": 706240
    },
    {
      "epoch": 2.4752301745015366,
      "grad_norm": 2.890625,
      "learning_rate": 9.718035987341346e-06,
      "loss": 0.7944,
      "step": 706250
    },
    {
      "epoch": 2.4752652220084324,
      "grad_norm": 3.015625,
      "learning_rate": 9.717386958677644e-06,
      "loss": 0.8535,
      "step": 706260
    },
    {
      "epoch": 2.475300269515328,
      "grad_norm": 3.0,
      "learning_rate": 9.716737930013942e-06,
      "loss": 0.8133,
      "step": 706270
    },
    {
      "epoch": 2.475335317022224,
      "grad_norm": 3.296875,
      "learning_rate": 9.71608890135024e-06,
      "loss": 0.7667,
      "step": 706280
    },
    {
      "epoch": 2.475370364529119,
      "grad_norm": 2.9375,
      "learning_rate": 9.715439872686538e-06,
      "loss": 0.7853,
      "step": 706290
    },
    {
      "epoch": 2.475405412036015,
      "grad_norm": 2.6875,
      "learning_rate": 9.714790844022836e-06,
      "loss": 0.8652,
      "step": 706300
    },
    {
      "epoch": 2.4754404595429103,
      "grad_norm": 3.171875,
      "learning_rate": 9.714141815359134e-06,
      "loss": 0.8509,
      "step": 706310
    },
    {
      "epoch": 2.475475507049806,
      "grad_norm": 2.765625,
      "learning_rate": 9.713492786695432e-06,
      "loss": 0.7934,
      "step": 706320
    },
    {
      "epoch": 2.4755105545567018,
      "grad_norm": 2.484375,
      "learning_rate": 9.71284375803173e-06,
      "loss": 0.7515,
      "step": 706330
    },
    {
      "epoch": 2.475545602063597,
      "grad_norm": 2.75,
      "learning_rate": 9.712194729368028e-06,
      "loss": 0.8053,
      "step": 706340
    },
    {
      "epoch": 2.475580649570493,
      "grad_norm": 2.453125,
      "learning_rate": 9.711545700704326e-06,
      "loss": 0.767,
      "step": 706350
    },
    {
      "epoch": 2.475615697077388,
      "grad_norm": 3.203125,
      "learning_rate": 9.710896672040624e-06,
      "loss": 0.7915,
      "step": 706360
    },
    {
      "epoch": 2.475650744584284,
      "grad_norm": 2.984375,
      "learning_rate": 9.710247643376922e-06,
      "loss": 0.8065,
      "step": 706370
    },
    {
      "epoch": 2.4756857920911797,
      "grad_norm": 2.453125,
      "learning_rate": 9.709598614713222e-06,
      "loss": 0.7621,
      "step": 706380
    },
    {
      "epoch": 2.4757208395980754,
      "grad_norm": 2.59375,
      "learning_rate": 9.70894958604952e-06,
      "loss": 0.8816,
      "step": 706390
    },
    {
      "epoch": 2.4757558871049707,
      "grad_norm": 2.671875,
      "learning_rate": 9.708300557385818e-06,
      "loss": 0.7952,
      "step": 706400
    },
    {
      "epoch": 2.4757909346118665,
      "grad_norm": 3.25,
      "learning_rate": 9.707651528722116e-06,
      "loss": 0.8139,
      "step": 706410
    },
    {
      "epoch": 2.475825982118762,
      "grad_norm": 3.0,
      "learning_rate": 9.707002500058412e-06,
      "loss": 0.7878,
      "step": 706420
    },
    {
      "epoch": 2.4758610296256576,
      "grad_norm": 3.078125,
      "learning_rate": 9.70635347139471e-06,
      "loss": 0.8237,
      "step": 706430
    },
    {
      "epoch": 2.4758960771325533,
      "grad_norm": 2.96875,
      "learning_rate": 9.70570444273101e-06,
      "loss": 0.7571,
      "step": 706440
    },
    {
      "epoch": 2.4759311246394486,
      "grad_norm": 2.65625,
      "learning_rate": 9.705055414067308e-06,
      "loss": 0.7519,
      "step": 706450
    },
    {
      "epoch": 2.4759661721463444,
      "grad_norm": 3.046875,
      "learning_rate": 9.704406385403606e-06,
      "loss": 0.8393,
      "step": 706460
    },
    {
      "epoch": 2.47600121965324,
      "grad_norm": 3.109375,
      "learning_rate": 9.703757356739904e-06,
      "loss": 0.7298,
      "step": 706470
    },
    {
      "epoch": 2.4760362671601355,
      "grad_norm": 3.234375,
      "learning_rate": 9.703108328076202e-06,
      "loss": 0.8474,
      "step": 706480
    },
    {
      "epoch": 2.4760713146670312,
      "grad_norm": 3.0,
      "learning_rate": 9.7024592994125e-06,
      "loss": 0.6977,
      "step": 706490
    },
    {
      "epoch": 2.476106362173927,
      "grad_norm": 2.609375,
      "learning_rate": 9.701810270748798e-06,
      "loss": 0.8442,
      "step": 706500
    },
    {
      "epoch": 2.4761414096808223,
      "grad_norm": 2.75,
      "learning_rate": 9.701161242085096e-06,
      "loss": 0.7114,
      "step": 706510
    },
    {
      "epoch": 2.476176457187718,
      "grad_norm": 2.640625,
      "learning_rate": 9.700512213421394e-06,
      "loss": 0.8097,
      "step": 706520
    },
    {
      "epoch": 2.4762115046946134,
      "grad_norm": 2.828125,
      "learning_rate": 9.699863184757692e-06,
      "loss": 0.7832,
      "step": 706530
    },
    {
      "epoch": 2.476246552201509,
      "grad_norm": 3.390625,
      "learning_rate": 9.69921415609399e-06,
      "loss": 0.8174,
      "step": 706540
    },
    {
      "epoch": 2.476281599708405,
      "grad_norm": 2.96875,
      "learning_rate": 9.698565127430288e-06,
      "loss": 0.7898,
      "step": 706550
    },
    {
      "epoch": 2.4763166472153,
      "grad_norm": 3.328125,
      "learning_rate": 9.697916098766587e-06,
      "loss": 0.8092,
      "step": 706560
    },
    {
      "epoch": 2.476351694722196,
      "grad_norm": 2.96875,
      "learning_rate": 9.697267070102885e-06,
      "loss": 0.8804,
      "step": 706570
    },
    {
      "epoch": 2.4763867422290917,
      "grad_norm": 3.4375,
      "learning_rate": 9.696618041439183e-06,
      "loss": 0.8844,
      "step": 706580
    },
    {
      "epoch": 2.476421789735987,
      "grad_norm": 2.640625,
      "learning_rate": 9.695969012775481e-06,
      "loss": 0.7254,
      "step": 706590
    },
    {
      "epoch": 2.476456837242883,
      "grad_norm": 2.53125,
      "learning_rate": 9.695319984111778e-06,
      "loss": 0.7443,
      "step": 706600
    },
    {
      "epoch": 2.4764918847497785,
      "grad_norm": 2.765625,
      "learning_rate": 9.694670955448076e-06,
      "loss": 0.7663,
      "step": 706610
    },
    {
      "epoch": 2.476526932256674,
      "grad_norm": 2.796875,
      "learning_rate": 9.694021926784375e-06,
      "loss": 0.8087,
      "step": 706620
    },
    {
      "epoch": 2.4765619797635696,
      "grad_norm": 2.828125,
      "learning_rate": 9.693372898120673e-06,
      "loss": 0.7524,
      "step": 706630
    },
    {
      "epoch": 2.476597027270465,
      "grad_norm": 2.984375,
      "learning_rate": 9.692723869456971e-06,
      "loss": 0.8355,
      "step": 706640
    },
    {
      "epoch": 2.4766320747773607,
      "grad_norm": 3.0625,
      "learning_rate": 9.69207484079327e-06,
      "loss": 0.8392,
      "step": 706650
    },
    {
      "epoch": 2.4766671222842565,
      "grad_norm": 2.984375,
      "learning_rate": 9.691425812129567e-06,
      "loss": 0.8635,
      "step": 706660
    },
    {
      "epoch": 2.4767021697911518,
      "grad_norm": 2.640625,
      "learning_rate": 9.690776783465865e-06,
      "loss": 0.8647,
      "step": 706670
    },
    {
      "epoch": 2.4767372172980475,
      "grad_norm": 2.921875,
      "learning_rate": 9.690127754802163e-06,
      "loss": 0.8301,
      "step": 706680
    },
    {
      "epoch": 2.4767722648049433,
      "grad_norm": 2.765625,
      "learning_rate": 9.689478726138463e-06,
      "loss": 0.7953,
      "step": 706690
    },
    {
      "epoch": 2.4768073123118386,
      "grad_norm": 2.828125,
      "learning_rate": 9.68882969747476e-06,
      "loss": 0.8844,
      "step": 706700
    },
    {
      "epoch": 2.4768423598187344,
      "grad_norm": 2.890625,
      "learning_rate": 9.688180668811057e-06,
      "loss": 0.7672,
      "step": 706710
    },
    {
      "epoch": 2.47687740732563,
      "grad_norm": 3.359375,
      "learning_rate": 9.687531640147355e-06,
      "loss": 0.7948,
      "step": 706720
    },
    {
      "epoch": 2.4769124548325254,
      "grad_norm": 3.15625,
      "learning_rate": 9.686882611483653e-06,
      "loss": 0.7952,
      "step": 706730
    },
    {
      "epoch": 2.476947502339421,
      "grad_norm": 2.859375,
      "learning_rate": 9.686233582819951e-06,
      "loss": 0.7791,
      "step": 706740
    },
    {
      "epoch": 2.4769825498463165,
      "grad_norm": 3.296875,
      "learning_rate": 9.685584554156251e-06,
      "loss": 0.8055,
      "step": 706750
    },
    {
      "epoch": 2.4770175973532123,
      "grad_norm": 2.75,
      "learning_rate": 9.684935525492549e-06,
      "loss": 0.7407,
      "step": 706760
    },
    {
      "epoch": 2.477052644860108,
      "grad_norm": 2.78125,
      "learning_rate": 9.684286496828847e-06,
      "loss": 0.849,
      "step": 706770
    },
    {
      "epoch": 2.4770876923670033,
      "grad_norm": 2.984375,
      "learning_rate": 9.683637468165145e-06,
      "loss": 0.7922,
      "step": 706780
    },
    {
      "epoch": 2.477122739873899,
      "grad_norm": 2.8125,
      "learning_rate": 9.682988439501441e-06,
      "loss": 0.7698,
      "step": 706790
    },
    {
      "epoch": 2.477157787380795,
      "grad_norm": 3.234375,
      "learning_rate": 9.682339410837741e-06,
      "loss": 0.7427,
      "step": 706800
    },
    {
      "epoch": 2.47719283488769,
      "grad_norm": 2.515625,
      "learning_rate": 9.681690382174039e-06,
      "loss": 0.7604,
      "step": 706810
    },
    {
      "epoch": 2.477227882394586,
      "grad_norm": 3.109375,
      "learning_rate": 9.681041353510337e-06,
      "loss": 0.8497,
      "step": 706820
    },
    {
      "epoch": 2.4772629299014817,
      "grad_norm": 2.6875,
      "learning_rate": 9.680392324846635e-06,
      "loss": 0.7662,
      "step": 706830
    },
    {
      "epoch": 2.477297977408377,
      "grad_norm": 2.78125,
      "learning_rate": 9.679743296182933e-06,
      "loss": 0.7172,
      "step": 706840
    },
    {
      "epoch": 2.4773330249152727,
      "grad_norm": 2.90625,
      "learning_rate": 9.679094267519231e-06,
      "loss": 0.8837,
      "step": 706850
    },
    {
      "epoch": 2.477368072422168,
      "grad_norm": 2.6875,
      "learning_rate": 9.678445238855529e-06,
      "loss": 0.7391,
      "step": 706860
    },
    {
      "epoch": 2.477403119929064,
      "grad_norm": 2.515625,
      "learning_rate": 9.677796210191829e-06,
      "loss": 0.7761,
      "step": 706870
    },
    {
      "epoch": 2.4774381674359596,
      "grad_norm": 2.875,
      "learning_rate": 9.677147181528127e-06,
      "loss": 0.9204,
      "step": 706880
    },
    {
      "epoch": 2.477473214942855,
      "grad_norm": 3.03125,
      "learning_rate": 9.676498152864423e-06,
      "loss": 0.8291,
      "step": 706890
    },
    {
      "epoch": 2.4775082624497506,
      "grad_norm": 3.296875,
      "learning_rate": 9.675849124200721e-06,
      "loss": 0.7497,
      "step": 706900
    },
    {
      "epoch": 2.4775433099566464,
      "grad_norm": 2.75,
      "learning_rate": 9.675200095537019e-06,
      "loss": 0.7766,
      "step": 706910
    },
    {
      "epoch": 2.4775783574635417,
      "grad_norm": 2.875,
      "learning_rate": 9.674551066873317e-06,
      "loss": 0.7713,
      "step": 706920
    },
    {
      "epoch": 2.4776134049704375,
      "grad_norm": 2.84375,
      "learning_rate": 9.673902038209617e-06,
      "loss": 0.7658,
      "step": 706930
    },
    {
      "epoch": 2.4776484524773332,
      "grad_norm": 3.25,
      "learning_rate": 9.673253009545915e-06,
      "loss": 0.8203,
      "step": 706940
    },
    {
      "epoch": 2.4776834999842285,
      "grad_norm": 2.9375,
      "learning_rate": 9.672603980882213e-06,
      "loss": 0.8022,
      "step": 706950
    },
    {
      "epoch": 2.4777185474911243,
      "grad_norm": 2.8125,
      "learning_rate": 9.67195495221851e-06,
      "loss": 0.754,
      "step": 706960
    },
    {
      "epoch": 2.4777535949980196,
      "grad_norm": 2.609375,
      "learning_rate": 9.671305923554809e-06,
      "loss": 0.7949,
      "step": 706970
    },
    {
      "epoch": 2.4777886425049154,
      "grad_norm": 2.8125,
      "learning_rate": 9.670656894891105e-06,
      "loss": 0.795,
      "step": 706980
    },
    {
      "epoch": 2.477823690011811,
      "grad_norm": 2.796875,
      "learning_rate": 9.670007866227405e-06,
      "loss": 0.7691,
      "step": 706990
    },
    {
      "epoch": 2.4778587375187064,
      "grad_norm": 3.21875,
      "learning_rate": 9.669358837563703e-06,
      "loss": 0.8667,
      "step": 707000
    },
    {
      "epoch": 2.477893785025602,
      "grad_norm": 3.078125,
      "learning_rate": 9.6687098089e-06,
      "loss": 0.8215,
      "step": 707010
    },
    {
      "epoch": 2.477928832532498,
      "grad_norm": 2.40625,
      "learning_rate": 9.668060780236299e-06,
      "loss": 0.7893,
      "step": 707020
    },
    {
      "epoch": 2.4779638800393933,
      "grad_norm": 2.78125,
      "learning_rate": 9.667411751572597e-06,
      "loss": 0.7495,
      "step": 707030
    },
    {
      "epoch": 2.477998927546289,
      "grad_norm": 3.25,
      "learning_rate": 9.666762722908895e-06,
      "loss": 0.7934,
      "step": 707040
    },
    {
      "epoch": 2.478033975053185,
      "grad_norm": 2.5,
      "learning_rate": 9.666113694245193e-06,
      "loss": 0.7558,
      "step": 707050
    },
    {
      "epoch": 2.47806902256008,
      "grad_norm": 3.140625,
      "learning_rate": 9.665464665581492e-06,
      "loss": 0.75,
      "step": 707060
    },
    {
      "epoch": 2.478104070066976,
      "grad_norm": 2.5,
      "learning_rate": 9.664815636917789e-06,
      "loss": 0.8405,
      "step": 707070
    },
    {
      "epoch": 2.478139117573871,
      "grad_norm": 2.875,
      "learning_rate": 9.664166608254087e-06,
      "loss": 0.7855,
      "step": 707080
    },
    {
      "epoch": 2.478174165080767,
      "grad_norm": 2.625,
      "learning_rate": 9.663517579590385e-06,
      "loss": 0.7665,
      "step": 707090
    },
    {
      "epoch": 2.4782092125876627,
      "grad_norm": 2.875,
      "learning_rate": 9.662868550926683e-06,
      "loss": 0.7552,
      "step": 707100
    },
    {
      "epoch": 2.478244260094558,
      "grad_norm": 2.5,
      "learning_rate": 9.662219522262982e-06,
      "loss": 0.7349,
      "step": 707110
    },
    {
      "epoch": 2.4782793076014538,
      "grad_norm": 3.0625,
      "learning_rate": 9.66157049359928e-06,
      "loss": 0.7897,
      "step": 707120
    },
    {
      "epoch": 2.4783143551083495,
      "grad_norm": 2.6875,
      "learning_rate": 9.660921464935578e-06,
      "loss": 0.7953,
      "step": 707130
    },
    {
      "epoch": 2.478349402615245,
      "grad_norm": 2.703125,
      "learning_rate": 9.660272436271876e-06,
      "loss": 0.8414,
      "step": 707140
    },
    {
      "epoch": 2.4783844501221406,
      "grad_norm": 3.09375,
      "learning_rate": 9.659623407608174e-06,
      "loss": 0.8327,
      "step": 707150
    },
    {
      "epoch": 2.4784194976290363,
      "grad_norm": 2.78125,
      "learning_rate": 9.658974378944472e-06,
      "loss": 0.8854,
      "step": 707160
    },
    {
      "epoch": 2.4784545451359317,
      "grad_norm": 3.1875,
      "learning_rate": 9.65832535028077e-06,
      "loss": 0.8614,
      "step": 707170
    },
    {
      "epoch": 2.4784895926428274,
      "grad_norm": 2.90625,
      "learning_rate": 9.657676321617068e-06,
      "loss": 0.8793,
      "step": 707180
    },
    {
      "epoch": 2.4785246401497227,
      "grad_norm": 3.109375,
      "learning_rate": 9.657027292953366e-06,
      "loss": 0.853,
      "step": 707190
    },
    {
      "epoch": 2.4785596876566185,
      "grad_norm": 2.734375,
      "learning_rate": 9.656378264289664e-06,
      "loss": 0.8366,
      "step": 707200
    },
    {
      "epoch": 2.4785947351635143,
      "grad_norm": 3.390625,
      "learning_rate": 9.655729235625962e-06,
      "loss": 0.786,
      "step": 707210
    },
    {
      "epoch": 2.4786297826704096,
      "grad_norm": 3.34375,
      "learning_rate": 9.65508020696226e-06,
      "loss": 0.8442,
      "step": 707220
    },
    {
      "epoch": 2.4786648301773053,
      "grad_norm": 2.65625,
      "learning_rate": 9.654431178298558e-06,
      "loss": 0.8039,
      "step": 707230
    },
    {
      "epoch": 2.478699877684201,
      "grad_norm": 3.015625,
      "learning_rate": 9.653782149634858e-06,
      "loss": 0.7797,
      "step": 707240
    },
    {
      "epoch": 2.4787349251910964,
      "grad_norm": 3.015625,
      "learning_rate": 9.653133120971156e-06,
      "loss": 0.7726,
      "step": 707250
    },
    {
      "epoch": 2.478769972697992,
      "grad_norm": 3.46875,
      "learning_rate": 9.652484092307452e-06,
      "loss": 0.8719,
      "step": 707260
    },
    {
      "epoch": 2.478805020204888,
      "grad_norm": 3.03125,
      "learning_rate": 9.65183506364375e-06,
      "loss": 0.7571,
      "step": 707270
    },
    {
      "epoch": 2.4788400677117832,
      "grad_norm": 2.828125,
      "learning_rate": 9.651186034980048e-06,
      "loss": 0.7568,
      "step": 707280
    },
    {
      "epoch": 2.478875115218679,
      "grad_norm": 3.015625,
      "learning_rate": 9.650537006316346e-06,
      "loss": 0.8344,
      "step": 707290
    },
    {
      "epoch": 2.4789101627255743,
      "grad_norm": 2.6875,
      "learning_rate": 9.649887977652646e-06,
      "loss": 0.7729,
      "step": 707300
    },
    {
      "epoch": 2.47894521023247,
      "grad_norm": 3.140625,
      "learning_rate": 9.649238948988944e-06,
      "loss": 0.8342,
      "step": 707310
    },
    {
      "epoch": 2.478980257739366,
      "grad_norm": 2.296875,
      "learning_rate": 9.648589920325242e-06,
      "loss": 0.8118,
      "step": 707320
    },
    {
      "epoch": 2.479015305246261,
      "grad_norm": 2.765625,
      "learning_rate": 9.64794089166154e-06,
      "loss": 0.8508,
      "step": 707330
    },
    {
      "epoch": 2.479050352753157,
      "grad_norm": 3.046875,
      "learning_rate": 9.647291862997838e-06,
      "loss": 0.7811,
      "step": 707340
    },
    {
      "epoch": 2.4790854002600526,
      "grad_norm": 3.203125,
      "learning_rate": 9.646642834334136e-06,
      "loss": 0.8657,
      "step": 707350
    },
    {
      "epoch": 2.479120447766948,
      "grad_norm": 3.09375,
      "learning_rate": 9.645993805670434e-06,
      "loss": 0.8409,
      "step": 707360
    },
    {
      "epoch": 2.4791554952738437,
      "grad_norm": 3.203125,
      "learning_rate": 9.645344777006732e-06,
      "loss": 0.826,
      "step": 707370
    },
    {
      "epoch": 2.4791905427807395,
      "grad_norm": 3.984375,
      "learning_rate": 9.64469574834303e-06,
      "loss": 0.8204,
      "step": 707380
    },
    {
      "epoch": 2.479225590287635,
      "grad_norm": 2.578125,
      "learning_rate": 9.644046719679328e-06,
      "loss": 0.7851,
      "step": 707390
    },
    {
      "epoch": 2.4792606377945305,
      "grad_norm": 2.828125,
      "learning_rate": 9.643397691015626e-06,
      "loss": 0.7418,
      "step": 707400
    },
    {
      "epoch": 2.479295685301426,
      "grad_norm": 2.296875,
      "learning_rate": 9.642748662351924e-06,
      "loss": 0.7698,
      "step": 707410
    },
    {
      "epoch": 2.4793307328083216,
      "grad_norm": 3.0,
      "learning_rate": 9.642099633688224e-06,
      "loss": 0.7468,
      "step": 707420
    },
    {
      "epoch": 2.4793657803152174,
      "grad_norm": 2.71875,
      "learning_rate": 9.641450605024522e-06,
      "loss": 0.8755,
      "step": 707430
    },
    {
      "epoch": 2.4794008278221127,
      "grad_norm": 2.8125,
      "learning_rate": 9.64080157636082e-06,
      "loss": 0.758,
      "step": 707440
    },
    {
      "epoch": 2.4794358753290084,
      "grad_norm": 2.8125,
      "learning_rate": 9.640152547697116e-06,
      "loss": 0.8626,
      "step": 707450
    },
    {
      "epoch": 2.479470922835904,
      "grad_norm": 2.90625,
      "learning_rate": 9.639503519033414e-06,
      "loss": 0.7903,
      "step": 707460
    },
    {
      "epoch": 2.4795059703427995,
      "grad_norm": 2.546875,
      "learning_rate": 9.638854490369712e-06,
      "loss": 0.8224,
      "step": 707470
    },
    {
      "epoch": 2.4795410178496953,
      "grad_norm": 2.734375,
      "learning_rate": 9.638205461706012e-06,
      "loss": 0.7561,
      "step": 707480
    },
    {
      "epoch": 2.479576065356591,
      "grad_norm": 2.78125,
      "learning_rate": 9.63755643304231e-06,
      "loss": 0.8399,
      "step": 707490
    },
    {
      "epoch": 2.4796111128634863,
      "grad_norm": 2.421875,
      "learning_rate": 9.636907404378608e-06,
      "loss": 0.7192,
      "step": 707500
    },
    {
      "epoch": 2.479646160370382,
      "grad_norm": 3.421875,
      "learning_rate": 9.636258375714906e-06,
      "loss": 0.7847,
      "step": 707510
    },
    {
      "epoch": 2.4796812078772774,
      "grad_norm": 3.203125,
      "learning_rate": 9.635609347051204e-06,
      "loss": 0.7986,
      "step": 707520
    },
    {
      "epoch": 2.479716255384173,
      "grad_norm": 3.34375,
      "learning_rate": 9.634960318387502e-06,
      "loss": 0.8314,
      "step": 707530
    },
    {
      "epoch": 2.479751302891069,
      "grad_norm": 2.875,
      "learning_rate": 9.6343112897238e-06,
      "loss": 0.8284,
      "step": 707540
    },
    {
      "epoch": 2.4797863503979647,
      "grad_norm": 2.53125,
      "learning_rate": 9.633662261060098e-06,
      "loss": 0.7477,
      "step": 707550
    },
    {
      "epoch": 2.47982139790486,
      "grad_norm": 3.25,
      "learning_rate": 9.633013232396396e-06,
      "loss": 0.7931,
      "step": 707560
    },
    {
      "epoch": 2.4798564454117558,
      "grad_norm": 2.59375,
      "learning_rate": 9.632364203732694e-06,
      "loss": 0.8083,
      "step": 707570
    },
    {
      "epoch": 2.479891492918651,
      "grad_norm": 3.0,
      "learning_rate": 9.631715175068992e-06,
      "loss": 0.758,
      "step": 707580
    },
    {
      "epoch": 2.479926540425547,
      "grad_norm": 3.21875,
      "learning_rate": 9.63106614640529e-06,
      "loss": 0.7988,
      "step": 707590
    },
    {
      "epoch": 2.4799615879324426,
      "grad_norm": 2.71875,
      "learning_rate": 9.630417117741588e-06,
      "loss": 0.7628,
      "step": 707600
    },
    {
      "epoch": 2.479996635439338,
      "grad_norm": 2.90625,
      "learning_rate": 9.629768089077888e-06,
      "loss": 0.8326,
      "step": 707610
    },
    {
      "epoch": 2.4800316829462337,
      "grad_norm": 3.1875,
      "learning_rate": 9.629119060414186e-06,
      "loss": 0.8937,
      "step": 707620
    },
    {
      "epoch": 2.480066730453129,
      "grad_norm": 2.9375,
      "learning_rate": 9.628470031750484e-06,
      "loss": 0.8073,
      "step": 707630
    },
    {
      "epoch": 2.4801017779600247,
      "grad_norm": 2.4375,
      "learning_rate": 9.62782100308678e-06,
      "loss": 0.7869,
      "step": 707640
    },
    {
      "epoch": 2.4801368254669205,
      "grad_norm": 3.109375,
      "learning_rate": 9.627171974423078e-06,
      "loss": 0.8185,
      "step": 707650
    },
    {
      "epoch": 2.4801718729738162,
      "grad_norm": 2.78125,
      "learning_rate": 9.626522945759378e-06,
      "loss": 0.8141,
      "step": 707660
    },
    {
      "epoch": 2.4802069204807116,
      "grad_norm": 3.0625,
      "learning_rate": 9.625873917095676e-06,
      "loss": 0.7914,
      "step": 707670
    },
    {
      "epoch": 2.4802419679876073,
      "grad_norm": 2.53125,
      "learning_rate": 9.625224888431974e-06,
      "loss": 0.6817,
      "step": 707680
    },
    {
      "epoch": 2.4802770154945026,
      "grad_norm": 5.375,
      "learning_rate": 9.624575859768272e-06,
      "loss": 0.8343,
      "step": 707690
    },
    {
      "epoch": 2.4803120630013984,
      "grad_norm": 2.921875,
      "learning_rate": 9.62392683110457e-06,
      "loss": 0.8091,
      "step": 707700
    },
    {
      "epoch": 2.480347110508294,
      "grad_norm": 2.8125,
      "learning_rate": 9.623277802440868e-06,
      "loss": 0.7876,
      "step": 707710
    },
    {
      "epoch": 2.4803821580151895,
      "grad_norm": 2.765625,
      "learning_rate": 9.622628773777166e-06,
      "loss": 0.7198,
      "step": 707720
    },
    {
      "epoch": 2.4804172055220852,
      "grad_norm": 2.6875,
      "learning_rate": 9.621979745113464e-06,
      "loss": 0.7333,
      "step": 707730
    },
    {
      "epoch": 2.4804522530289805,
      "grad_norm": 3.0625,
      "learning_rate": 9.621330716449762e-06,
      "loss": 0.8074,
      "step": 707740
    },
    {
      "epoch": 2.4804873005358763,
      "grad_norm": 3.15625,
      "learning_rate": 9.62068168778606e-06,
      "loss": 0.8788,
      "step": 707750
    },
    {
      "epoch": 2.480522348042772,
      "grad_norm": 3.234375,
      "learning_rate": 9.620032659122358e-06,
      "loss": 0.8063,
      "step": 707760
    },
    {
      "epoch": 2.480557395549668,
      "grad_norm": 2.65625,
      "learning_rate": 9.619383630458656e-06,
      "loss": 0.8671,
      "step": 707770
    },
    {
      "epoch": 2.480592443056563,
      "grad_norm": 2.890625,
      "learning_rate": 9.618734601794954e-06,
      "loss": 0.8589,
      "step": 707780
    },
    {
      "epoch": 2.480627490563459,
      "grad_norm": 2.84375,
      "learning_rate": 9.618085573131253e-06,
      "loss": 0.8649,
      "step": 707790
    },
    {
      "epoch": 2.480662538070354,
      "grad_norm": 2.984375,
      "learning_rate": 9.617436544467551e-06,
      "loss": 0.7131,
      "step": 707800
    },
    {
      "epoch": 2.48069758557725,
      "grad_norm": 2.90625,
      "learning_rate": 9.61678751580385e-06,
      "loss": 0.7526,
      "step": 707810
    },
    {
      "epoch": 2.4807326330841457,
      "grad_norm": 3.09375,
      "learning_rate": 9.616138487140147e-06,
      "loss": 0.7746,
      "step": 707820
    },
    {
      "epoch": 2.480767680591041,
      "grad_norm": 3.28125,
      "learning_rate": 9.615489458476444e-06,
      "loss": 0.8616,
      "step": 707830
    },
    {
      "epoch": 2.480802728097937,
      "grad_norm": 2.9375,
      "learning_rate": 9.614840429812742e-06,
      "loss": 0.7872,
      "step": 707840
    },
    {
      "epoch": 2.4808377756048325,
      "grad_norm": 2.75,
      "learning_rate": 9.614191401149041e-06,
      "loss": 0.8297,
      "step": 707850
    },
    {
      "epoch": 2.480872823111728,
      "grad_norm": 2.65625,
      "learning_rate": 9.61354237248534e-06,
      "loss": 0.7491,
      "step": 707860
    },
    {
      "epoch": 2.4809078706186236,
      "grad_norm": 3.03125,
      "learning_rate": 9.612893343821637e-06,
      "loss": 0.7987,
      "step": 707870
    },
    {
      "epoch": 2.4809429181255194,
      "grad_norm": 2.984375,
      "learning_rate": 9.612244315157935e-06,
      "loss": 0.8155,
      "step": 707880
    },
    {
      "epoch": 2.4809779656324147,
      "grad_norm": 2.53125,
      "learning_rate": 9.611595286494233e-06,
      "loss": 0.7767,
      "step": 707890
    },
    {
      "epoch": 2.4810130131393104,
      "grad_norm": 2.703125,
      "learning_rate": 9.610946257830531e-06,
      "loss": 0.7391,
      "step": 707900
    },
    {
      "epoch": 2.4810480606462058,
      "grad_norm": 3.1875,
      "learning_rate": 9.61029722916683e-06,
      "loss": 0.8352,
      "step": 707910
    },
    {
      "epoch": 2.4810831081531015,
      "grad_norm": 2.9375,
      "learning_rate": 9.609648200503127e-06,
      "loss": 0.6733,
      "step": 707920
    },
    {
      "epoch": 2.4811181556599973,
      "grad_norm": 3.125,
      "learning_rate": 9.608999171839425e-06,
      "loss": 0.8502,
      "step": 707930
    },
    {
      "epoch": 2.4811532031668926,
      "grad_norm": 2.921875,
      "learning_rate": 9.608350143175723e-06,
      "loss": 0.8557,
      "step": 707940
    },
    {
      "epoch": 2.4811882506737883,
      "grad_norm": 2.65625,
      "learning_rate": 9.607701114512021e-06,
      "loss": 0.827,
      "step": 707950
    },
    {
      "epoch": 2.481223298180684,
      "grad_norm": 3.4375,
      "learning_rate": 9.60705208584832e-06,
      "loss": 0.8089,
      "step": 707960
    },
    {
      "epoch": 2.4812583456875794,
      "grad_norm": 3.390625,
      "learning_rate": 9.606403057184619e-06,
      "loss": 0.7874,
      "step": 707970
    },
    {
      "epoch": 2.481293393194475,
      "grad_norm": 2.796875,
      "learning_rate": 9.605754028520917e-06,
      "loss": 0.7286,
      "step": 707980
    },
    {
      "epoch": 2.481328440701371,
      "grad_norm": 3.078125,
      "learning_rate": 9.605104999857215e-06,
      "loss": 0.8037,
      "step": 707990
    },
    {
      "epoch": 2.4813634882082662,
      "grad_norm": 3.4375,
      "learning_rate": 9.604455971193513e-06,
      "loss": 0.876,
      "step": 708000
    },
    {
      "epoch": 2.481398535715162,
      "grad_norm": 2.875,
      "learning_rate": 9.60380694252981e-06,
      "loss": 0.7748,
      "step": 708010
    },
    {
      "epoch": 2.4814335832220573,
      "grad_norm": 3.0625,
      "learning_rate": 9.603157913866107e-06,
      "loss": 0.8112,
      "step": 708020
    },
    {
      "epoch": 2.481468630728953,
      "grad_norm": 2.71875,
      "learning_rate": 9.602508885202407e-06,
      "loss": 0.7456,
      "step": 708030
    },
    {
      "epoch": 2.481503678235849,
      "grad_norm": 3.015625,
      "learning_rate": 9.601859856538705e-06,
      "loss": 0.8834,
      "step": 708040
    },
    {
      "epoch": 2.481538725742744,
      "grad_norm": 2.90625,
      "learning_rate": 9.601210827875003e-06,
      "loss": 0.7634,
      "step": 708050
    },
    {
      "epoch": 2.48157377324964,
      "grad_norm": 2.90625,
      "learning_rate": 9.600561799211301e-06,
      "loss": 0.7997,
      "step": 708060
    },
    {
      "epoch": 2.4816088207565357,
      "grad_norm": 2.578125,
      "learning_rate": 9.599912770547599e-06,
      "loss": 0.7753,
      "step": 708070
    },
    {
      "epoch": 2.481643868263431,
      "grad_norm": 2.859375,
      "learning_rate": 9.599263741883897e-06,
      "loss": 0.7943,
      "step": 708080
    },
    {
      "epoch": 2.4816789157703267,
      "grad_norm": 3.359375,
      "learning_rate": 9.598614713220195e-06,
      "loss": 0.8441,
      "step": 708090
    },
    {
      "epoch": 2.4817139632772225,
      "grad_norm": 2.921875,
      "learning_rate": 9.597965684556495e-06,
      "loss": 0.7982,
      "step": 708100
    },
    {
      "epoch": 2.481749010784118,
      "grad_norm": 2.90625,
      "learning_rate": 9.597316655892791e-06,
      "loss": 0.82,
      "step": 708110
    },
    {
      "epoch": 2.4817840582910136,
      "grad_norm": 2.9375,
      "learning_rate": 9.596667627229089e-06,
      "loss": 0.7981,
      "step": 708120
    },
    {
      "epoch": 2.481819105797909,
      "grad_norm": 2.75,
      "learning_rate": 9.596018598565387e-06,
      "loss": 0.7778,
      "step": 708130
    },
    {
      "epoch": 2.4818541533048046,
      "grad_norm": 3.21875,
      "learning_rate": 9.595369569901685e-06,
      "loss": 0.7305,
      "step": 708140
    },
    {
      "epoch": 2.4818892008117004,
      "grad_norm": 3.015625,
      "learning_rate": 9.594720541237983e-06,
      "loss": 0.8023,
      "step": 708150
    },
    {
      "epoch": 2.4819242483185957,
      "grad_norm": 3.140625,
      "learning_rate": 9.594071512574283e-06,
      "loss": 0.7953,
      "step": 708160
    },
    {
      "epoch": 2.4819592958254915,
      "grad_norm": 2.921875,
      "learning_rate": 9.59342248391058e-06,
      "loss": 0.8079,
      "step": 708170
    },
    {
      "epoch": 2.481994343332387,
      "grad_norm": 3.265625,
      "learning_rate": 9.592773455246879e-06,
      "loss": 0.7446,
      "step": 708180
    },
    {
      "epoch": 2.4820293908392825,
      "grad_norm": 2.515625,
      "learning_rate": 9.592124426583177e-06,
      "loss": 0.7673,
      "step": 708190
    },
    {
      "epoch": 2.4820644383461783,
      "grad_norm": 2.890625,
      "learning_rate": 9.591475397919473e-06,
      "loss": 0.8824,
      "step": 708200
    },
    {
      "epoch": 2.482099485853074,
      "grad_norm": 2.875,
      "learning_rate": 9.590826369255773e-06,
      "loss": 0.7676,
      "step": 708210
    },
    {
      "epoch": 2.4821345333599694,
      "grad_norm": 3.25,
      "learning_rate": 9.59017734059207e-06,
      "loss": 0.828,
      "step": 708220
    },
    {
      "epoch": 2.482169580866865,
      "grad_norm": 2.4375,
      "learning_rate": 9.589528311928369e-06,
      "loss": 0.7632,
      "step": 708230
    },
    {
      "epoch": 2.4822046283737604,
      "grad_norm": 2.828125,
      "learning_rate": 9.588879283264667e-06,
      "loss": 0.8018,
      "step": 708240
    },
    {
      "epoch": 2.482239675880656,
      "grad_norm": 2.75,
      "learning_rate": 9.588230254600965e-06,
      "loss": 0.7387,
      "step": 708250
    },
    {
      "epoch": 2.482274723387552,
      "grad_norm": 3.25,
      "learning_rate": 9.587581225937263e-06,
      "loss": 0.8372,
      "step": 708260
    },
    {
      "epoch": 2.4823097708944473,
      "grad_norm": 3.171875,
      "learning_rate": 9.58693219727356e-06,
      "loss": 0.8673,
      "step": 708270
    },
    {
      "epoch": 2.482344818401343,
      "grad_norm": 3.359375,
      "learning_rate": 9.58628316860986e-06,
      "loss": 0.8614,
      "step": 708280
    },
    {
      "epoch": 2.482379865908239,
      "grad_norm": 3.296875,
      "learning_rate": 9.585634139946158e-06,
      "loss": 0.885,
      "step": 708290
    },
    {
      "epoch": 2.482414913415134,
      "grad_norm": 2.796875,
      "learning_rate": 9.584985111282455e-06,
      "loss": 0.7515,
      "step": 708300
    },
    {
      "epoch": 2.48244996092203,
      "grad_norm": 2.84375,
      "learning_rate": 9.584336082618753e-06,
      "loss": 0.7587,
      "step": 708310
    },
    {
      "epoch": 2.4824850084289256,
      "grad_norm": 2.75,
      "learning_rate": 9.58368705395505e-06,
      "loss": 0.7059,
      "step": 708320
    },
    {
      "epoch": 2.482520055935821,
      "grad_norm": 2.921875,
      "learning_rate": 9.583038025291349e-06,
      "loss": 0.8,
      "step": 708330
    },
    {
      "epoch": 2.4825551034427167,
      "grad_norm": 3.421875,
      "learning_rate": 9.582388996627648e-06,
      "loss": 0.7773,
      "step": 708340
    },
    {
      "epoch": 2.482590150949612,
      "grad_norm": 2.421875,
      "learning_rate": 9.581739967963946e-06,
      "loss": 0.9058,
      "step": 708350
    },
    {
      "epoch": 2.4826251984565078,
      "grad_norm": 2.59375,
      "learning_rate": 9.581090939300244e-06,
      "loss": 0.7709,
      "step": 708360
    },
    {
      "epoch": 2.4826602459634035,
      "grad_norm": 2.71875,
      "learning_rate": 9.580441910636542e-06,
      "loss": 0.789,
      "step": 708370
    },
    {
      "epoch": 2.482695293470299,
      "grad_norm": 3.484375,
      "learning_rate": 9.57979288197284e-06,
      "loss": 0.8208,
      "step": 708380
    },
    {
      "epoch": 2.4827303409771946,
      "grad_norm": 2.375,
      "learning_rate": 9.579143853309137e-06,
      "loss": 0.8114,
      "step": 708390
    },
    {
      "epoch": 2.4827653884840903,
      "grad_norm": 2.71875,
      "learning_rate": 9.578494824645436e-06,
      "loss": 0.8173,
      "step": 708400
    },
    {
      "epoch": 2.4828004359909857,
      "grad_norm": 3.21875,
      "learning_rate": 9.577845795981734e-06,
      "loss": 0.8758,
      "step": 708410
    },
    {
      "epoch": 2.4828354834978814,
      "grad_norm": 2.921875,
      "learning_rate": 9.577196767318032e-06,
      "loss": 0.8333,
      "step": 708420
    },
    {
      "epoch": 2.482870531004777,
      "grad_norm": 3.03125,
      "learning_rate": 9.57654773865433e-06,
      "loss": 0.7809,
      "step": 708430
    },
    {
      "epoch": 2.4829055785116725,
      "grad_norm": 2.921875,
      "learning_rate": 9.575898709990628e-06,
      "loss": 0.7894,
      "step": 708440
    },
    {
      "epoch": 2.4829406260185682,
      "grad_norm": 3.046875,
      "learning_rate": 9.575249681326926e-06,
      "loss": 0.8571,
      "step": 708450
    },
    {
      "epoch": 2.4829756735254636,
      "grad_norm": 3.015625,
      "learning_rate": 9.574600652663224e-06,
      "loss": 0.8457,
      "step": 708460
    },
    {
      "epoch": 2.4830107210323593,
      "grad_norm": 3.171875,
      "learning_rate": 9.573951623999524e-06,
      "loss": 0.8505,
      "step": 708470
    },
    {
      "epoch": 2.483045768539255,
      "grad_norm": 2.390625,
      "learning_rate": 9.57330259533582e-06,
      "loss": 0.7995,
      "step": 708480
    },
    {
      "epoch": 2.4830808160461504,
      "grad_norm": 3.03125,
      "learning_rate": 9.572653566672118e-06,
      "loss": 0.8,
      "step": 708490
    },
    {
      "epoch": 2.483115863553046,
      "grad_norm": 2.953125,
      "learning_rate": 9.572004538008416e-06,
      "loss": 0.7063,
      "step": 708500
    },
    {
      "epoch": 2.483150911059942,
      "grad_norm": 2.65625,
      "learning_rate": 9.571355509344714e-06,
      "loss": 0.858,
      "step": 708510
    },
    {
      "epoch": 2.483185958566837,
      "grad_norm": 2.96875,
      "learning_rate": 9.570706480681014e-06,
      "loss": 0.7819,
      "step": 708520
    },
    {
      "epoch": 2.483221006073733,
      "grad_norm": 3.234375,
      "learning_rate": 9.570057452017312e-06,
      "loss": 0.7728,
      "step": 708530
    },
    {
      "epoch": 2.4832560535806287,
      "grad_norm": 2.921875,
      "learning_rate": 9.56940842335361e-06,
      "loss": 0.7699,
      "step": 708540
    },
    {
      "epoch": 2.483291101087524,
      "grad_norm": 2.71875,
      "learning_rate": 9.568759394689908e-06,
      "loss": 0.8051,
      "step": 708550
    },
    {
      "epoch": 2.48332614859442,
      "grad_norm": 2.484375,
      "learning_rate": 9.568110366026206e-06,
      "loss": 0.7671,
      "step": 708560
    },
    {
      "epoch": 2.483361196101315,
      "grad_norm": 3.359375,
      "learning_rate": 9.567461337362504e-06,
      "loss": 0.8075,
      "step": 708570
    },
    {
      "epoch": 2.483396243608211,
      "grad_norm": 2.40625,
      "learning_rate": 9.566812308698802e-06,
      "loss": 0.771,
      "step": 708580
    },
    {
      "epoch": 2.4834312911151066,
      "grad_norm": 2.84375,
      "learning_rate": 9.5661632800351e-06,
      "loss": 0.8611,
      "step": 708590
    },
    {
      "epoch": 2.483466338622002,
      "grad_norm": 2.75,
      "learning_rate": 9.565514251371398e-06,
      "loss": 0.7206,
      "step": 708600
    },
    {
      "epoch": 2.4835013861288977,
      "grad_norm": 2.609375,
      "learning_rate": 9.564865222707696e-06,
      "loss": 0.7797,
      "step": 708610
    },
    {
      "epoch": 2.4835364336357935,
      "grad_norm": 2.75,
      "learning_rate": 9.564216194043994e-06,
      "loss": 0.688,
      "step": 708620
    },
    {
      "epoch": 2.4835714811426888,
      "grad_norm": 3.0625,
      "learning_rate": 9.563567165380292e-06,
      "loss": 0.8391,
      "step": 708630
    },
    {
      "epoch": 2.4836065286495845,
      "grad_norm": 3.0,
      "learning_rate": 9.56291813671659e-06,
      "loss": 0.8231,
      "step": 708640
    },
    {
      "epoch": 2.4836415761564803,
      "grad_norm": 3.203125,
      "learning_rate": 9.56226910805289e-06,
      "loss": 0.8029,
      "step": 708650
    },
    {
      "epoch": 2.4836766236633756,
      "grad_norm": 2.90625,
      "learning_rate": 9.561620079389188e-06,
      "loss": 0.8055,
      "step": 708660
    },
    {
      "epoch": 2.4837116711702714,
      "grad_norm": 2.859375,
      "learning_rate": 9.560971050725484e-06,
      "loss": 0.7771,
      "step": 708670
    },
    {
      "epoch": 2.4837467186771667,
      "grad_norm": 2.71875,
      "learning_rate": 9.560322022061782e-06,
      "loss": 0.8455,
      "step": 708680
    },
    {
      "epoch": 2.4837817661840624,
      "grad_norm": 2.984375,
      "learning_rate": 9.55967299339808e-06,
      "loss": 0.8545,
      "step": 708690
    },
    {
      "epoch": 2.483816813690958,
      "grad_norm": 3.5,
      "learning_rate": 9.559023964734378e-06,
      "loss": 0.8028,
      "step": 708700
    },
    {
      "epoch": 2.4838518611978535,
      "grad_norm": 3.59375,
      "learning_rate": 9.558374936070678e-06,
      "loss": 0.9314,
      "step": 708710
    },
    {
      "epoch": 2.4838869087047493,
      "grad_norm": 2.421875,
      "learning_rate": 9.557725907406976e-06,
      "loss": 0.6899,
      "step": 708720
    },
    {
      "epoch": 2.483921956211645,
      "grad_norm": 3.046875,
      "learning_rate": 9.557076878743274e-06,
      "loss": 0.8544,
      "step": 708730
    },
    {
      "epoch": 2.4839570037185403,
      "grad_norm": 2.984375,
      "learning_rate": 9.556427850079572e-06,
      "loss": 0.7752,
      "step": 708740
    },
    {
      "epoch": 2.483992051225436,
      "grad_norm": 2.546875,
      "learning_rate": 9.55577882141587e-06,
      "loss": 0.7537,
      "step": 708750
    },
    {
      "epoch": 2.484027098732332,
      "grad_norm": 2.65625,
      "learning_rate": 9.555129792752168e-06,
      "loss": 0.8059,
      "step": 708760
    },
    {
      "epoch": 2.484062146239227,
      "grad_norm": 2.828125,
      "learning_rate": 9.554480764088466e-06,
      "loss": 0.8582,
      "step": 708770
    },
    {
      "epoch": 2.484097193746123,
      "grad_norm": 3.140625,
      "learning_rate": 9.553831735424764e-06,
      "loss": 0.762,
      "step": 708780
    },
    {
      "epoch": 2.4841322412530182,
      "grad_norm": 3.203125,
      "learning_rate": 9.553182706761062e-06,
      "loss": 0.8492,
      "step": 708790
    },
    {
      "epoch": 2.484167288759914,
      "grad_norm": 3.234375,
      "learning_rate": 9.55253367809736e-06,
      "loss": 0.7568,
      "step": 708800
    },
    {
      "epoch": 2.4842023362668098,
      "grad_norm": 2.84375,
      "learning_rate": 9.551884649433658e-06,
      "loss": 0.877,
      "step": 708810
    },
    {
      "epoch": 2.484237383773705,
      "grad_norm": 3.046875,
      "learning_rate": 9.551235620769956e-06,
      "loss": 0.7995,
      "step": 708820
    },
    {
      "epoch": 2.484272431280601,
      "grad_norm": 2.890625,
      "learning_rate": 9.550586592106255e-06,
      "loss": 0.7845,
      "step": 708830
    },
    {
      "epoch": 2.4843074787874966,
      "grad_norm": 2.859375,
      "learning_rate": 9.549937563442553e-06,
      "loss": 0.8251,
      "step": 708840
    },
    {
      "epoch": 2.484342526294392,
      "grad_norm": 2.625,
      "learning_rate": 9.549288534778851e-06,
      "loss": 0.7628,
      "step": 708850
    },
    {
      "epoch": 2.4843775738012877,
      "grad_norm": 3.234375,
      "learning_rate": 9.548639506115148e-06,
      "loss": 0.8369,
      "step": 708860
    },
    {
      "epoch": 2.4844126213081834,
      "grad_norm": 3.0,
      "learning_rate": 9.547990477451446e-06,
      "loss": 0.8268,
      "step": 708870
    },
    {
      "epoch": 2.4844476688150787,
      "grad_norm": 2.875,
      "learning_rate": 9.547341448787744e-06,
      "loss": 0.8163,
      "step": 708880
    },
    {
      "epoch": 2.4844827163219745,
      "grad_norm": 2.875,
      "learning_rate": 9.546692420124043e-06,
      "loss": 0.8987,
      "step": 708890
    },
    {
      "epoch": 2.48451776382887,
      "grad_norm": 2.578125,
      "learning_rate": 9.546043391460341e-06,
      "loss": 0.7377,
      "step": 708900
    },
    {
      "epoch": 2.4845528113357656,
      "grad_norm": 3.03125,
      "learning_rate": 9.54539436279664e-06,
      "loss": 0.8652,
      "step": 708910
    },
    {
      "epoch": 2.4845878588426613,
      "grad_norm": 3.171875,
      "learning_rate": 9.544745334132937e-06,
      "loss": 0.7927,
      "step": 708920
    },
    {
      "epoch": 2.484622906349557,
      "grad_norm": 2.859375,
      "learning_rate": 9.544096305469235e-06,
      "loss": 0.7773,
      "step": 708930
    },
    {
      "epoch": 2.4846579538564524,
      "grad_norm": 2.625,
      "learning_rate": 9.543447276805533e-06,
      "loss": 0.6973,
      "step": 708940
    },
    {
      "epoch": 2.484693001363348,
      "grad_norm": 3.078125,
      "learning_rate": 9.542798248141831e-06,
      "loss": 0.855,
      "step": 708950
    },
    {
      "epoch": 2.4847280488702435,
      "grad_norm": 2.765625,
      "learning_rate": 9.54214921947813e-06,
      "loss": 0.8069,
      "step": 708960
    },
    {
      "epoch": 2.484763096377139,
      "grad_norm": 2.40625,
      "learning_rate": 9.541500190814427e-06,
      "loss": 0.8401,
      "step": 708970
    },
    {
      "epoch": 2.484798143884035,
      "grad_norm": 2.96875,
      "learning_rate": 9.540851162150725e-06,
      "loss": 0.7734,
      "step": 708980
    },
    {
      "epoch": 2.4848331913909303,
      "grad_norm": 2.6875,
      "learning_rate": 9.540202133487023e-06,
      "loss": 0.8395,
      "step": 708990
    },
    {
      "epoch": 2.484868238897826,
      "grad_norm": 2.625,
      "learning_rate": 9.539553104823321e-06,
      "loss": 0.8267,
      "step": 709000
    },
    {
      "epoch": 2.4849032864047214,
      "grad_norm": 3.28125,
      "learning_rate": 9.53890407615962e-06,
      "loss": 0.7849,
      "step": 709010
    },
    {
      "epoch": 2.484938333911617,
      "grad_norm": 2.53125,
      "learning_rate": 9.538255047495919e-06,
      "loss": 0.8603,
      "step": 709020
    },
    {
      "epoch": 2.484973381418513,
      "grad_norm": 2.5,
      "learning_rate": 9.537606018832217e-06,
      "loss": 0.8183,
      "step": 709030
    },
    {
      "epoch": 2.4850084289254086,
      "grad_norm": 2.8125,
      "learning_rate": 9.536956990168515e-06,
      "loss": 0.9473,
      "step": 709040
    },
    {
      "epoch": 2.485043476432304,
      "grad_norm": 3.65625,
      "learning_rate": 9.536307961504811e-06,
      "loss": 0.823,
      "step": 709050
    },
    {
      "epoch": 2.4850785239391997,
      "grad_norm": 3.40625,
      "learning_rate": 9.53565893284111e-06,
      "loss": 0.8907,
      "step": 709060
    },
    {
      "epoch": 2.485113571446095,
      "grad_norm": 3.125,
      "learning_rate": 9.535009904177409e-06,
      "loss": 0.791,
      "step": 709070
    },
    {
      "epoch": 2.4851486189529908,
      "grad_norm": 2.6875,
      "learning_rate": 9.534360875513707e-06,
      "loss": 0.7814,
      "step": 709080
    },
    {
      "epoch": 2.4851836664598865,
      "grad_norm": 2.578125,
      "learning_rate": 9.533711846850005e-06,
      "loss": 0.7141,
      "step": 709090
    },
    {
      "epoch": 2.485218713966782,
      "grad_norm": 3.203125,
      "learning_rate": 9.533062818186303e-06,
      "loss": 0.8616,
      "step": 709100
    },
    {
      "epoch": 2.4852537614736776,
      "grad_norm": 2.734375,
      "learning_rate": 9.532413789522601e-06,
      "loss": 0.8445,
      "step": 709110
    },
    {
      "epoch": 2.4852888089805734,
      "grad_norm": 3.3125,
      "learning_rate": 9.531764760858899e-06,
      "loss": 0.8633,
      "step": 709120
    },
    {
      "epoch": 2.4853238564874687,
      "grad_norm": 2.90625,
      "learning_rate": 9.531115732195197e-06,
      "loss": 0.8066,
      "step": 709130
    },
    {
      "epoch": 2.4853589039943644,
      "grad_norm": 2.78125,
      "learning_rate": 9.530466703531495e-06,
      "loss": 0.7468,
      "step": 709140
    },
    {
      "epoch": 2.48539395150126,
      "grad_norm": 3.0,
      "learning_rate": 9.529817674867793e-06,
      "loss": 0.7936,
      "step": 709150
    },
    {
      "epoch": 2.4854289990081555,
      "grad_norm": 2.765625,
      "learning_rate": 9.529168646204091e-06,
      "loss": 0.7393,
      "step": 709160
    },
    {
      "epoch": 2.4854640465150513,
      "grad_norm": 2.96875,
      "learning_rate": 9.528519617540389e-06,
      "loss": 0.8126,
      "step": 709170
    },
    {
      "epoch": 2.4854990940219466,
      "grad_norm": 3.09375,
      "learning_rate": 9.527870588876687e-06,
      "loss": 0.8432,
      "step": 709180
    },
    {
      "epoch": 2.4855341415288423,
      "grad_norm": 3.140625,
      "learning_rate": 9.527221560212985e-06,
      "loss": 0.8272,
      "step": 709190
    },
    {
      "epoch": 2.485569189035738,
      "grad_norm": 2.515625,
      "learning_rate": 9.526572531549285e-06,
      "loss": 0.7935,
      "step": 709200
    },
    {
      "epoch": 2.4856042365426334,
      "grad_norm": 3.0625,
      "learning_rate": 9.525923502885583e-06,
      "loss": 0.7728,
      "step": 709210
    },
    {
      "epoch": 2.485639284049529,
      "grad_norm": 2.796875,
      "learning_rate": 9.52527447422188e-06,
      "loss": 0.8323,
      "step": 709220
    },
    {
      "epoch": 2.485674331556425,
      "grad_norm": 3.15625,
      "learning_rate": 9.524625445558179e-06,
      "loss": 0.8257,
      "step": 709230
    },
    {
      "epoch": 2.4857093790633202,
      "grad_norm": 2.828125,
      "learning_rate": 9.523976416894475e-06,
      "loss": 0.7739,
      "step": 709240
    },
    {
      "epoch": 2.485744426570216,
      "grad_norm": 3.5,
      "learning_rate": 9.523327388230773e-06,
      "loss": 0.8345,
      "step": 709250
    },
    {
      "epoch": 2.4857794740771118,
      "grad_norm": 2.40625,
      "learning_rate": 9.522678359567073e-06,
      "loss": 0.7417,
      "step": 709260
    },
    {
      "epoch": 2.485814521584007,
      "grad_norm": 2.90625,
      "learning_rate": 9.52202933090337e-06,
      "loss": 0.8038,
      "step": 709270
    },
    {
      "epoch": 2.485849569090903,
      "grad_norm": 2.9375,
      "learning_rate": 9.521380302239669e-06,
      "loss": 0.7962,
      "step": 709280
    },
    {
      "epoch": 2.485884616597798,
      "grad_norm": 2.703125,
      "learning_rate": 9.520731273575967e-06,
      "loss": 0.7341,
      "step": 709290
    },
    {
      "epoch": 2.485919664104694,
      "grad_norm": 3.15625,
      "learning_rate": 9.520082244912265e-06,
      "loss": 0.8501,
      "step": 709300
    },
    {
      "epoch": 2.4859547116115897,
      "grad_norm": 3.28125,
      "learning_rate": 9.519433216248563e-06,
      "loss": 0.7535,
      "step": 709310
    },
    {
      "epoch": 2.485989759118485,
      "grad_norm": 2.5625,
      "learning_rate": 9.51878418758486e-06,
      "loss": 0.6947,
      "step": 709320
    },
    {
      "epoch": 2.4860248066253807,
      "grad_norm": 2.375,
      "learning_rate": 9.518135158921159e-06,
      "loss": 0.7456,
      "step": 709330
    },
    {
      "epoch": 2.4860598541322765,
      "grad_norm": 2.84375,
      "learning_rate": 9.517486130257457e-06,
      "loss": 0.9164,
      "step": 709340
    },
    {
      "epoch": 2.486094901639172,
      "grad_norm": 2.828125,
      "learning_rate": 9.516837101593755e-06,
      "loss": 0.8022,
      "step": 709350
    },
    {
      "epoch": 2.4861299491460676,
      "grad_norm": 2.609375,
      "learning_rate": 9.516188072930053e-06,
      "loss": 0.8202,
      "step": 709360
    },
    {
      "epoch": 2.4861649966529633,
      "grad_norm": 2.828125,
      "learning_rate": 9.51553904426635e-06,
      "loss": 0.7886,
      "step": 709370
    },
    {
      "epoch": 2.4862000441598586,
      "grad_norm": 2.609375,
      "learning_rate": 9.51489001560265e-06,
      "loss": 0.7549,
      "step": 709380
    },
    {
      "epoch": 2.4862350916667544,
      "grad_norm": 2.734375,
      "learning_rate": 9.514240986938948e-06,
      "loss": 0.7729,
      "step": 709390
    },
    {
      "epoch": 2.4862701391736497,
      "grad_norm": 2.828125,
      "learning_rate": 9.513591958275246e-06,
      "loss": 0.8551,
      "step": 709400
    },
    {
      "epoch": 2.4863051866805455,
      "grad_norm": 2.84375,
      "learning_rate": 9.512942929611544e-06,
      "loss": 0.8046,
      "step": 709410
    },
    {
      "epoch": 2.486340234187441,
      "grad_norm": 2.59375,
      "learning_rate": 9.51229390094784e-06,
      "loss": 0.8002,
      "step": 709420
    },
    {
      "epoch": 2.4863752816943365,
      "grad_norm": 2.640625,
      "learning_rate": 9.511644872284139e-06,
      "loss": 0.8223,
      "step": 709430
    },
    {
      "epoch": 2.4864103292012323,
      "grad_norm": 3.171875,
      "learning_rate": 9.510995843620438e-06,
      "loss": 0.8208,
      "step": 709440
    },
    {
      "epoch": 2.486445376708128,
      "grad_norm": 2.8125,
      "learning_rate": 9.510346814956736e-06,
      "loss": 0.8146,
      "step": 709450
    },
    {
      "epoch": 2.4864804242150234,
      "grad_norm": 2.734375,
      "learning_rate": 9.509697786293034e-06,
      "loss": 0.8144,
      "step": 709460
    },
    {
      "epoch": 2.486515471721919,
      "grad_norm": 2.53125,
      "learning_rate": 9.509048757629332e-06,
      "loss": 0.8169,
      "step": 709470
    },
    {
      "epoch": 2.486550519228815,
      "grad_norm": 3.0625,
      "learning_rate": 9.50839972896563e-06,
      "loss": 0.8304,
      "step": 709480
    },
    {
      "epoch": 2.48658556673571,
      "grad_norm": 2.859375,
      "learning_rate": 9.507750700301928e-06,
      "loss": 0.767,
      "step": 709490
    },
    {
      "epoch": 2.486620614242606,
      "grad_norm": 2.953125,
      "learning_rate": 9.507101671638226e-06,
      "loss": 0.7541,
      "step": 709500
    },
    {
      "epoch": 2.4866556617495013,
      "grad_norm": 2.421875,
      "learning_rate": 9.506452642974526e-06,
      "loss": 0.7775,
      "step": 709510
    },
    {
      "epoch": 2.486690709256397,
      "grad_norm": 2.828125,
      "learning_rate": 9.505803614310822e-06,
      "loss": 0.7991,
      "step": 709520
    },
    {
      "epoch": 2.4867257567632928,
      "grad_norm": 2.890625,
      "learning_rate": 9.50515458564712e-06,
      "loss": 0.7834,
      "step": 709530
    },
    {
      "epoch": 2.486760804270188,
      "grad_norm": 2.796875,
      "learning_rate": 9.504505556983418e-06,
      "loss": 0.7781,
      "step": 709540
    },
    {
      "epoch": 2.486795851777084,
      "grad_norm": 3.453125,
      "learning_rate": 9.503856528319716e-06,
      "loss": 0.7675,
      "step": 709550
    },
    {
      "epoch": 2.4868308992839796,
      "grad_norm": 3.4375,
      "learning_rate": 9.503207499656014e-06,
      "loss": 0.7632,
      "step": 709560
    },
    {
      "epoch": 2.486865946790875,
      "grad_norm": 3.1875,
      "learning_rate": 9.502558470992314e-06,
      "loss": 0.8202,
      "step": 709570
    },
    {
      "epoch": 2.4869009942977707,
      "grad_norm": 2.8125,
      "learning_rate": 9.501909442328612e-06,
      "loss": 0.8017,
      "step": 709580
    },
    {
      "epoch": 2.4869360418046664,
      "grad_norm": 2.640625,
      "learning_rate": 9.50126041366491e-06,
      "loss": 0.8232,
      "step": 709590
    },
    {
      "epoch": 2.4869710893115617,
      "grad_norm": 2.375,
      "learning_rate": 9.500611385001208e-06,
      "loss": 0.8545,
      "step": 709600
    },
    {
      "epoch": 2.4870061368184575,
      "grad_norm": 2.65625,
      "learning_rate": 9.499962356337504e-06,
      "loss": 0.7774,
      "step": 709610
    },
    {
      "epoch": 2.487041184325353,
      "grad_norm": 2.84375,
      "learning_rate": 9.499313327673804e-06,
      "loss": 0.7941,
      "step": 709620
    },
    {
      "epoch": 2.4870762318322486,
      "grad_norm": 2.8125,
      "learning_rate": 9.498664299010102e-06,
      "loss": 0.7781,
      "step": 709630
    },
    {
      "epoch": 2.4871112793391443,
      "grad_norm": 2.78125,
      "learning_rate": 9.4980152703464e-06,
      "loss": 0.7559,
      "step": 709640
    },
    {
      "epoch": 2.4871463268460396,
      "grad_norm": 2.609375,
      "learning_rate": 9.497366241682698e-06,
      "loss": 0.721,
      "step": 709650
    },
    {
      "epoch": 2.4871813743529354,
      "grad_norm": 2.84375,
      "learning_rate": 9.496717213018996e-06,
      "loss": 0.7513,
      "step": 709660
    },
    {
      "epoch": 2.487216421859831,
      "grad_norm": 2.859375,
      "learning_rate": 9.496068184355294e-06,
      "loss": 0.7864,
      "step": 709670
    },
    {
      "epoch": 2.4872514693667265,
      "grad_norm": 2.890625,
      "learning_rate": 9.495419155691592e-06,
      "loss": 0.7725,
      "step": 709680
    },
    {
      "epoch": 2.4872865168736222,
      "grad_norm": 2.640625,
      "learning_rate": 9.494770127027892e-06,
      "loss": 0.7245,
      "step": 709690
    },
    {
      "epoch": 2.487321564380518,
      "grad_norm": 2.578125,
      "learning_rate": 9.49412109836419e-06,
      "loss": 0.7263,
      "step": 709700
    },
    {
      "epoch": 2.4873566118874133,
      "grad_norm": 2.984375,
      "learning_rate": 9.493472069700486e-06,
      "loss": 0.8658,
      "step": 709710
    },
    {
      "epoch": 2.487391659394309,
      "grad_norm": 3.109375,
      "learning_rate": 9.492823041036784e-06,
      "loss": 0.8268,
      "step": 709720
    },
    {
      "epoch": 2.4874267069012044,
      "grad_norm": 2.625,
      "learning_rate": 9.492174012373082e-06,
      "loss": 0.8337,
      "step": 709730
    },
    {
      "epoch": 2.4874617544081,
      "grad_norm": 3.09375,
      "learning_rate": 9.49152498370938e-06,
      "loss": 0.7662,
      "step": 709740
    },
    {
      "epoch": 2.487496801914996,
      "grad_norm": 2.984375,
      "learning_rate": 9.49087595504568e-06,
      "loss": 0.7884,
      "step": 709750
    },
    {
      "epoch": 2.487531849421891,
      "grad_norm": 2.765625,
      "learning_rate": 9.490226926381978e-06,
      "loss": 0.7984,
      "step": 709760
    },
    {
      "epoch": 2.487566896928787,
      "grad_norm": 3.171875,
      "learning_rate": 9.489577897718276e-06,
      "loss": 0.821,
      "step": 709770
    },
    {
      "epoch": 2.4876019444356827,
      "grad_norm": 3.125,
      "learning_rate": 9.488928869054574e-06,
      "loss": 0.8495,
      "step": 709780
    },
    {
      "epoch": 2.487636991942578,
      "grad_norm": 2.734375,
      "learning_rate": 9.488279840390872e-06,
      "loss": 0.7646,
      "step": 709790
    },
    {
      "epoch": 2.487672039449474,
      "grad_norm": 2.78125,
      "learning_rate": 9.487630811727168e-06,
      "loss": 0.7754,
      "step": 709800
    },
    {
      "epoch": 2.4877070869563696,
      "grad_norm": 3.125,
      "learning_rate": 9.486981783063468e-06,
      "loss": 0.8783,
      "step": 709810
    },
    {
      "epoch": 2.487742134463265,
      "grad_norm": 2.828125,
      "learning_rate": 9.486332754399766e-06,
      "loss": 0.8156,
      "step": 709820
    },
    {
      "epoch": 2.4877771819701606,
      "grad_norm": 2.75,
      "learning_rate": 9.485683725736064e-06,
      "loss": 0.763,
      "step": 709830
    },
    {
      "epoch": 2.487812229477056,
      "grad_norm": 2.96875,
      "learning_rate": 9.485034697072362e-06,
      "loss": 0.9215,
      "step": 709840
    },
    {
      "epoch": 2.4878472769839517,
      "grad_norm": 3.171875,
      "learning_rate": 9.48438566840866e-06,
      "loss": 0.8123,
      "step": 709850
    },
    {
      "epoch": 2.4878823244908475,
      "grad_norm": 2.59375,
      "learning_rate": 9.483736639744958e-06,
      "loss": 0.7876,
      "step": 709860
    },
    {
      "epoch": 2.4879173719977428,
      "grad_norm": 2.65625,
      "learning_rate": 9.483087611081256e-06,
      "loss": 0.752,
      "step": 709870
    },
    {
      "epoch": 2.4879524195046385,
      "grad_norm": 2.765625,
      "learning_rate": 9.482438582417556e-06,
      "loss": 0.794,
      "step": 709880
    },
    {
      "epoch": 2.4879874670115343,
      "grad_norm": 3.0625,
      "learning_rate": 9.481789553753852e-06,
      "loss": 0.8617,
      "step": 709890
    },
    {
      "epoch": 2.4880225145184296,
      "grad_norm": 3.03125,
      "learning_rate": 9.48114052509015e-06,
      "loss": 0.7079,
      "step": 709900
    },
    {
      "epoch": 2.4880575620253254,
      "grad_norm": 2.953125,
      "learning_rate": 9.480491496426448e-06,
      "loss": 0.8176,
      "step": 709910
    },
    {
      "epoch": 2.488092609532221,
      "grad_norm": 7.5,
      "learning_rate": 9.479842467762746e-06,
      "loss": 0.8311,
      "step": 709920
    },
    {
      "epoch": 2.4881276570391164,
      "grad_norm": 3.34375,
      "learning_rate": 9.479193439099046e-06,
      "loss": 0.8476,
      "step": 709930
    },
    {
      "epoch": 2.488162704546012,
      "grad_norm": 2.46875,
      "learning_rate": 9.478544410435344e-06,
      "loss": 0.7734,
      "step": 709940
    },
    {
      "epoch": 2.4881977520529075,
      "grad_norm": 3.25,
      "learning_rate": 9.477895381771642e-06,
      "loss": 0.7902,
      "step": 709950
    },
    {
      "epoch": 2.4882327995598033,
      "grad_norm": 2.640625,
      "learning_rate": 9.47724635310794e-06,
      "loss": 0.7189,
      "step": 709960
    },
    {
      "epoch": 2.488267847066699,
      "grad_norm": 3.046875,
      "learning_rate": 9.476597324444238e-06,
      "loss": 0.8282,
      "step": 709970
    },
    {
      "epoch": 2.4883028945735943,
      "grad_norm": 2.78125,
      "learning_rate": 9.475948295780536e-06,
      "loss": 0.8095,
      "step": 709980
    },
    {
      "epoch": 2.48833794208049,
      "grad_norm": 3.0625,
      "learning_rate": 9.475299267116834e-06,
      "loss": 0.8111,
      "step": 709990
    },
    {
      "epoch": 2.488372989587386,
      "grad_norm": 2.796875,
      "learning_rate": 9.474650238453132e-06,
      "loss": 0.83,
      "step": 710000
    },
    {
      "epoch": 2.488372989587386,
      "eval_loss": 0.7576663494110107,
      "eval_runtime": 552.1312,
      "eval_samples_per_second": 689.032,
      "eval_steps_per_second": 57.419,
      "step": 710000
    },
    {
      "epoch": 2.488408037094281,
      "grad_norm": 2.671875,
      "learning_rate": 9.47400120978943e-06,
      "loss": 0.8177,
      "step": 710010
    },
    {
      "epoch": 2.488443084601177,
      "grad_norm": 2.84375,
      "learning_rate": 9.473352181125728e-06,
      "loss": 0.7871,
      "step": 710020
    },
    {
      "epoch": 2.4884781321080727,
      "grad_norm": 2.875,
      "learning_rate": 9.472703152462026e-06,
      "loss": 0.7173,
      "step": 710030
    },
    {
      "epoch": 2.488513179614968,
      "grad_norm": 2.390625,
      "learning_rate": 9.472054123798324e-06,
      "loss": 0.6777,
      "step": 710040
    },
    {
      "epoch": 2.4885482271218637,
      "grad_norm": 3.046875,
      "learning_rate": 9.471405095134622e-06,
      "loss": 0.8498,
      "step": 710050
    },
    {
      "epoch": 2.488583274628759,
      "grad_norm": 3.28125,
      "learning_rate": 9.470756066470921e-06,
      "loss": 0.8895,
      "step": 710060
    },
    {
      "epoch": 2.488618322135655,
      "grad_norm": 3.234375,
      "learning_rate": 9.47010703780722e-06,
      "loss": 0.8674,
      "step": 710070
    },
    {
      "epoch": 2.4886533696425506,
      "grad_norm": 2.984375,
      "learning_rate": 9.469458009143516e-06,
      "loss": 0.766,
      "step": 710080
    },
    {
      "epoch": 2.488688417149446,
      "grad_norm": 3.015625,
      "learning_rate": 9.468808980479814e-06,
      "loss": 0.7444,
      "step": 710090
    },
    {
      "epoch": 2.4887234646563416,
      "grad_norm": 2.84375,
      "learning_rate": 9.468159951816112e-06,
      "loss": 0.7668,
      "step": 710100
    },
    {
      "epoch": 2.4887585121632374,
      "grad_norm": 3.078125,
      "learning_rate": 9.46751092315241e-06,
      "loss": 0.723,
      "step": 710110
    },
    {
      "epoch": 2.4887935596701327,
      "grad_norm": 2.96875,
      "learning_rate": 9.46686189448871e-06,
      "loss": 0.7313,
      "step": 710120
    },
    {
      "epoch": 2.4888286071770285,
      "grad_norm": 3.015625,
      "learning_rate": 9.466212865825007e-06,
      "loss": 0.7985,
      "step": 710130
    },
    {
      "epoch": 2.4888636546839242,
      "grad_norm": 3.0,
      "learning_rate": 9.465563837161305e-06,
      "loss": 0.8539,
      "step": 710140
    },
    {
      "epoch": 2.4888987021908195,
      "grad_norm": 2.703125,
      "learning_rate": 9.464914808497603e-06,
      "loss": 0.7993,
      "step": 710150
    },
    {
      "epoch": 2.4889337496977153,
      "grad_norm": 2.671875,
      "learning_rate": 9.464265779833901e-06,
      "loss": 0.8428,
      "step": 710160
    },
    {
      "epoch": 2.4889687972046106,
      "grad_norm": 3.09375,
      "learning_rate": 9.4636167511702e-06,
      "loss": 0.7753,
      "step": 710170
    },
    {
      "epoch": 2.4890038447115064,
      "grad_norm": 2.796875,
      "learning_rate": 9.462967722506497e-06,
      "loss": 0.8612,
      "step": 710180
    },
    {
      "epoch": 2.489038892218402,
      "grad_norm": 2.546875,
      "learning_rate": 9.462318693842795e-06,
      "loss": 0.8478,
      "step": 710190
    },
    {
      "epoch": 2.489073939725298,
      "grad_norm": 3.078125,
      "learning_rate": 9.461669665179093e-06,
      "loss": 0.8261,
      "step": 710200
    },
    {
      "epoch": 2.489108987232193,
      "grad_norm": 3.046875,
      "learning_rate": 9.461020636515391e-06,
      "loss": 0.8475,
      "step": 710210
    },
    {
      "epoch": 2.489144034739089,
      "grad_norm": 2.421875,
      "learning_rate": 9.46037160785169e-06,
      "loss": 0.8381,
      "step": 710220
    },
    {
      "epoch": 2.4891790822459843,
      "grad_norm": 2.609375,
      "learning_rate": 9.459722579187987e-06,
      "loss": 0.8167,
      "step": 710230
    },
    {
      "epoch": 2.48921412975288,
      "grad_norm": 3.921875,
      "learning_rate": 9.459073550524287e-06,
      "loss": 0.9019,
      "step": 710240
    },
    {
      "epoch": 2.489249177259776,
      "grad_norm": 3.125,
      "learning_rate": 9.458424521860585e-06,
      "loss": 0.9,
      "step": 710250
    },
    {
      "epoch": 2.489284224766671,
      "grad_norm": 2.796875,
      "learning_rate": 9.457775493196883e-06,
      "loss": 0.8493,
      "step": 710260
    },
    {
      "epoch": 2.489319272273567,
      "grad_norm": 2.78125,
      "learning_rate": 9.45712646453318e-06,
      "loss": 0.7239,
      "step": 710270
    },
    {
      "epoch": 2.489354319780462,
      "grad_norm": 2.34375,
      "learning_rate": 9.456477435869477e-06,
      "loss": 0.7206,
      "step": 710280
    },
    {
      "epoch": 2.489389367287358,
      "grad_norm": 3.03125,
      "learning_rate": 9.455828407205775e-06,
      "loss": 0.7907,
      "step": 710290
    },
    {
      "epoch": 2.4894244147942537,
      "grad_norm": 2.8125,
      "learning_rate": 9.455179378542075e-06,
      "loss": 0.8618,
      "step": 710300
    },
    {
      "epoch": 2.4894594623011495,
      "grad_norm": 2.671875,
      "learning_rate": 9.454530349878373e-06,
      "loss": 0.7724,
      "step": 710310
    },
    {
      "epoch": 2.4894945098080448,
      "grad_norm": 2.75,
      "learning_rate": 9.453881321214671e-06,
      "loss": 0.7655,
      "step": 710320
    },
    {
      "epoch": 2.4895295573149405,
      "grad_norm": 2.796875,
      "learning_rate": 9.453232292550969e-06,
      "loss": 0.8021,
      "step": 710330
    },
    {
      "epoch": 2.489564604821836,
      "grad_norm": 3.046875,
      "learning_rate": 9.452583263887267e-06,
      "loss": 0.816,
      "step": 710340
    },
    {
      "epoch": 2.4895996523287316,
      "grad_norm": 3.34375,
      "learning_rate": 9.451934235223565e-06,
      "loss": 0.8425,
      "step": 710350
    },
    {
      "epoch": 2.4896346998356274,
      "grad_norm": 3.296875,
      "learning_rate": 9.451285206559863e-06,
      "loss": 0.8765,
      "step": 710360
    },
    {
      "epoch": 2.4896697473425227,
      "grad_norm": 3.078125,
      "learning_rate": 9.450636177896161e-06,
      "loss": 0.8871,
      "step": 710370
    },
    {
      "epoch": 2.4897047948494184,
      "grad_norm": 2.640625,
      "learning_rate": 9.449987149232459e-06,
      "loss": 0.8161,
      "step": 710380
    },
    {
      "epoch": 2.4897398423563137,
      "grad_norm": 3.703125,
      "learning_rate": 9.449338120568757e-06,
      "loss": 0.8963,
      "step": 710390
    },
    {
      "epoch": 2.4897748898632095,
      "grad_norm": 2.6875,
      "learning_rate": 9.448689091905055e-06,
      "loss": 0.8649,
      "step": 710400
    },
    {
      "epoch": 2.4898099373701053,
      "grad_norm": 3.09375,
      "learning_rate": 9.448040063241353e-06,
      "loss": 0.8102,
      "step": 710410
    },
    {
      "epoch": 2.489844984877001,
      "grad_norm": 3.15625,
      "learning_rate": 9.447391034577651e-06,
      "loss": 0.7777,
      "step": 710420
    },
    {
      "epoch": 2.4898800323838963,
      "grad_norm": 3.0,
      "learning_rate": 9.44674200591395e-06,
      "loss": 0.8112,
      "step": 710430
    },
    {
      "epoch": 2.489915079890792,
      "grad_norm": 2.875,
      "learning_rate": 9.446092977250249e-06,
      "loss": 0.8506,
      "step": 710440
    },
    {
      "epoch": 2.4899501273976874,
      "grad_norm": 2.875,
      "learning_rate": 9.445443948586547e-06,
      "loss": 0.7698,
      "step": 710450
    },
    {
      "epoch": 2.489985174904583,
      "grad_norm": 3.046875,
      "learning_rate": 9.444794919922843e-06,
      "loss": 0.8119,
      "step": 710460
    },
    {
      "epoch": 2.490020222411479,
      "grad_norm": 3.03125,
      "learning_rate": 9.444145891259141e-06,
      "loss": 0.847,
      "step": 710470
    },
    {
      "epoch": 2.4900552699183742,
      "grad_norm": 3.046875,
      "learning_rate": 9.44349686259544e-06,
      "loss": 0.8379,
      "step": 710480
    },
    {
      "epoch": 2.49009031742527,
      "grad_norm": 3.125,
      "learning_rate": 9.442847833931739e-06,
      "loss": 0.8753,
      "step": 710490
    },
    {
      "epoch": 2.4901253649321657,
      "grad_norm": 2.90625,
      "learning_rate": 9.442198805268037e-06,
      "loss": 0.8766,
      "step": 710500
    },
    {
      "epoch": 2.490160412439061,
      "grad_norm": 2.671875,
      "learning_rate": 9.441549776604335e-06,
      "loss": 0.809,
      "step": 710510
    },
    {
      "epoch": 2.490195459945957,
      "grad_norm": 2.765625,
      "learning_rate": 9.440900747940633e-06,
      "loss": 0.7725,
      "step": 710520
    },
    {
      "epoch": 2.4902305074528526,
      "grad_norm": 2.671875,
      "learning_rate": 9.44025171927693e-06,
      "loss": 0.8094,
      "step": 710530
    },
    {
      "epoch": 2.490265554959748,
      "grad_norm": 3.09375,
      "learning_rate": 9.439602690613229e-06,
      "loss": 0.782,
      "step": 710540
    },
    {
      "epoch": 2.4903006024666436,
      "grad_norm": 2.96875,
      "learning_rate": 9.438953661949527e-06,
      "loss": 0.7963,
      "step": 710550
    },
    {
      "epoch": 2.490335649973539,
      "grad_norm": 3.28125,
      "learning_rate": 9.438304633285825e-06,
      "loss": 0.9326,
      "step": 710560
    },
    {
      "epoch": 2.4903706974804347,
      "grad_norm": 3.390625,
      "learning_rate": 9.437655604622123e-06,
      "loss": 0.8854,
      "step": 710570
    },
    {
      "epoch": 2.4904057449873305,
      "grad_norm": 2.8125,
      "learning_rate": 9.43700657595842e-06,
      "loss": 0.7714,
      "step": 710580
    },
    {
      "epoch": 2.490440792494226,
      "grad_norm": 2.859375,
      "learning_rate": 9.436357547294719e-06,
      "loss": 0.842,
      "step": 710590
    },
    {
      "epoch": 2.4904758400011215,
      "grad_norm": 2.8125,
      "learning_rate": 9.435708518631017e-06,
      "loss": 0.8359,
      "step": 710600
    },
    {
      "epoch": 2.4905108875080173,
      "grad_norm": 3.578125,
      "learning_rate": 9.435059489967316e-06,
      "loss": 0.9253,
      "step": 710610
    },
    {
      "epoch": 2.4905459350149126,
      "grad_norm": 2.75,
      "learning_rate": 9.434410461303614e-06,
      "loss": 0.7977,
      "step": 710620
    },
    {
      "epoch": 2.4905809825218084,
      "grad_norm": 2.9375,
      "learning_rate": 9.433761432639912e-06,
      "loss": 0.8388,
      "step": 710630
    },
    {
      "epoch": 2.490616030028704,
      "grad_norm": 2.5625,
      "learning_rate": 9.43311240397621e-06,
      "loss": 0.8024,
      "step": 710640
    },
    {
      "epoch": 2.4906510775355994,
      "grad_norm": 2.765625,
      "learning_rate": 9.432463375312507e-06,
      "loss": 0.8566,
      "step": 710650
    },
    {
      "epoch": 2.490686125042495,
      "grad_norm": 2.828125,
      "learning_rate": 9.431814346648805e-06,
      "loss": 0.7819,
      "step": 710660
    },
    {
      "epoch": 2.4907211725493905,
      "grad_norm": 3.25,
      "learning_rate": 9.431165317985104e-06,
      "loss": 0.8358,
      "step": 710670
    },
    {
      "epoch": 2.4907562200562863,
      "grad_norm": 3.09375,
      "learning_rate": 9.430516289321402e-06,
      "loss": 0.791,
      "step": 710680
    },
    {
      "epoch": 2.490791267563182,
      "grad_norm": 3.640625,
      "learning_rate": 9.4298672606577e-06,
      "loss": 0.9139,
      "step": 710690
    },
    {
      "epoch": 2.4908263150700773,
      "grad_norm": 2.484375,
      "learning_rate": 9.429218231993998e-06,
      "loss": 0.8387,
      "step": 710700
    },
    {
      "epoch": 2.490861362576973,
      "grad_norm": 3.28125,
      "learning_rate": 9.428569203330296e-06,
      "loss": 0.7721,
      "step": 710710
    },
    {
      "epoch": 2.490896410083869,
      "grad_norm": 2.8125,
      "learning_rate": 9.427920174666594e-06,
      "loss": 0.7735,
      "step": 710720
    },
    {
      "epoch": 2.490931457590764,
      "grad_norm": 3.28125,
      "learning_rate": 9.427271146002894e-06,
      "loss": 0.8018,
      "step": 710730
    },
    {
      "epoch": 2.49096650509766,
      "grad_norm": 2.625,
      "learning_rate": 9.42662211733919e-06,
      "loss": 0.7562,
      "step": 710740
    },
    {
      "epoch": 2.4910015526045557,
      "grad_norm": 2.921875,
      "learning_rate": 9.425973088675488e-06,
      "loss": 0.7525,
      "step": 710750
    },
    {
      "epoch": 2.491036600111451,
      "grad_norm": 3.03125,
      "learning_rate": 9.425324060011786e-06,
      "loss": 0.7956,
      "step": 710760
    },
    {
      "epoch": 2.4910716476183468,
      "grad_norm": 2.921875,
      "learning_rate": 9.424675031348084e-06,
      "loss": 0.8393,
      "step": 710770
    },
    {
      "epoch": 2.491106695125242,
      "grad_norm": 2.921875,
      "learning_rate": 9.424026002684382e-06,
      "loss": 0.8427,
      "step": 710780
    },
    {
      "epoch": 2.491141742632138,
      "grad_norm": 2.8125,
      "learning_rate": 9.423376974020682e-06,
      "loss": 0.7525,
      "step": 710790
    },
    {
      "epoch": 2.4911767901390336,
      "grad_norm": 3.046875,
      "learning_rate": 9.42272794535698e-06,
      "loss": 0.8913,
      "step": 710800
    },
    {
      "epoch": 2.491211837645929,
      "grad_norm": 3.28125,
      "learning_rate": 9.422078916693278e-06,
      "loss": 0.8315,
      "step": 710810
    },
    {
      "epoch": 2.4912468851528247,
      "grad_norm": 2.859375,
      "learning_rate": 9.421429888029576e-06,
      "loss": 0.8745,
      "step": 710820
    },
    {
      "epoch": 2.4912819326597204,
      "grad_norm": 2.984375,
      "learning_rate": 9.420780859365874e-06,
      "loss": 0.6875,
      "step": 710830
    },
    {
      "epoch": 2.4913169801666157,
      "grad_norm": 2.65625,
      "learning_rate": 9.42013183070217e-06,
      "loss": 0.9128,
      "step": 710840
    },
    {
      "epoch": 2.4913520276735115,
      "grad_norm": 3.234375,
      "learning_rate": 9.41948280203847e-06,
      "loss": 0.86,
      "step": 710850
    },
    {
      "epoch": 2.4913870751804073,
      "grad_norm": 2.859375,
      "learning_rate": 9.418833773374768e-06,
      "loss": 0.8284,
      "step": 710860
    },
    {
      "epoch": 2.4914221226873026,
      "grad_norm": 3.046875,
      "learning_rate": 9.418184744711066e-06,
      "loss": 0.7404,
      "step": 710870
    },
    {
      "epoch": 2.4914571701941983,
      "grad_norm": 3.09375,
      "learning_rate": 9.417535716047364e-06,
      "loss": 0.867,
      "step": 710880
    },
    {
      "epoch": 2.4914922177010936,
      "grad_norm": 2.8125,
      "learning_rate": 9.416886687383662e-06,
      "loss": 0.7697,
      "step": 710890
    },
    {
      "epoch": 2.4915272652079894,
      "grad_norm": 2.796875,
      "learning_rate": 9.41623765871996e-06,
      "loss": 0.7676,
      "step": 710900
    },
    {
      "epoch": 2.491562312714885,
      "grad_norm": 2.9375,
      "learning_rate": 9.415588630056258e-06,
      "loss": 0.8786,
      "step": 710910
    },
    {
      "epoch": 2.4915973602217805,
      "grad_norm": 2.78125,
      "learning_rate": 9.414939601392558e-06,
      "loss": 0.7958,
      "step": 710920
    },
    {
      "epoch": 2.4916324077286762,
      "grad_norm": 2.984375,
      "learning_rate": 9.414290572728854e-06,
      "loss": 0.8263,
      "step": 710930
    },
    {
      "epoch": 2.491667455235572,
      "grad_norm": 2.78125,
      "learning_rate": 9.413641544065152e-06,
      "loss": 0.7933,
      "step": 710940
    },
    {
      "epoch": 2.4917025027424673,
      "grad_norm": 2.703125,
      "learning_rate": 9.41299251540145e-06,
      "loss": 0.7434,
      "step": 710950
    },
    {
      "epoch": 2.491737550249363,
      "grad_norm": 3.125,
      "learning_rate": 9.412343486737748e-06,
      "loss": 0.8487,
      "step": 710960
    },
    {
      "epoch": 2.491772597756259,
      "grad_norm": 2.015625,
      "learning_rate": 9.411694458074046e-06,
      "loss": 0.7479,
      "step": 710970
    },
    {
      "epoch": 2.491807645263154,
      "grad_norm": 3.109375,
      "learning_rate": 9.411045429410346e-06,
      "loss": 0.6905,
      "step": 710980
    },
    {
      "epoch": 2.49184269277005,
      "grad_norm": 2.640625,
      "learning_rate": 9.410396400746644e-06,
      "loss": 0.87,
      "step": 710990
    },
    {
      "epoch": 2.491877740276945,
      "grad_norm": 2.71875,
      "learning_rate": 9.409747372082942e-06,
      "loss": 0.7774,
      "step": 711000
    },
    {
      "epoch": 2.491912787783841,
      "grad_norm": 3.140625,
      "learning_rate": 9.40909834341924e-06,
      "loss": 0.8289,
      "step": 711010
    },
    {
      "epoch": 2.4919478352907367,
      "grad_norm": 2.953125,
      "learning_rate": 9.408449314755536e-06,
      "loss": 0.8211,
      "step": 711020
    },
    {
      "epoch": 2.491982882797632,
      "grad_norm": 2.890625,
      "learning_rate": 9.407800286091836e-06,
      "loss": 0.8376,
      "step": 711030
    },
    {
      "epoch": 2.492017930304528,
      "grad_norm": 3.203125,
      "learning_rate": 9.407151257428134e-06,
      "loss": 0.816,
      "step": 711040
    },
    {
      "epoch": 2.4920529778114235,
      "grad_norm": 3.015625,
      "learning_rate": 9.406502228764432e-06,
      "loss": 0.7768,
      "step": 711050
    },
    {
      "epoch": 2.492088025318319,
      "grad_norm": 2.828125,
      "learning_rate": 9.40585320010073e-06,
      "loss": 0.8287,
      "step": 711060
    },
    {
      "epoch": 2.4921230728252146,
      "grad_norm": 2.734375,
      "learning_rate": 9.405204171437028e-06,
      "loss": 0.6375,
      "step": 711070
    },
    {
      "epoch": 2.4921581203321104,
      "grad_norm": 3.25,
      "learning_rate": 9.404555142773326e-06,
      "loss": 0.8688,
      "step": 711080
    },
    {
      "epoch": 2.4921931678390057,
      "grad_norm": 2.96875,
      "learning_rate": 9.403906114109624e-06,
      "loss": 0.8007,
      "step": 711090
    },
    {
      "epoch": 2.4922282153459014,
      "grad_norm": 2.796875,
      "learning_rate": 9.403257085445923e-06,
      "loss": 0.7354,
      "step": 711100
    },
    {
      "epoch": 2.4922632628527968,
      "grad_norm": 3.140625,
      "learning_rate": 9.402608056782221e-06,
      "loss": 0.7994,
      "step": 711110
    },
    {
      "epoch": 2.4922983103596925,
      "grad_norm": 2.796875,
      "learning_rate": 9.401959028118518e-06,
      "loss": 0.7496,
      "step": 711120
    },
    {
      "epoch": 2.4923333578665883,
      "grad_norm": 3.1875,
      "learning_rate": 9.401309999454816e-06,
      "loss": 0.7958,
      "step": 711130
    },
    {
      "epoch": 2.4923684053734836,
      "grad_norm": 2.5625,
      "learning_rate": 9.400660970791114e-06,
      "loss": 0.8557,
      "step": 711140
    },
    {
      "epoch": 2.4924034528803793,
      "grad_norm": 2.875,
      "learning_rate": 9.400011942127412e-06,
      "loss": 0.8883,
      "step": 711150
    },
    {
      "epoch": 2.492438500387275,
      "grad_norm": 2.890625,
      "learning_rate": 9.399362913463711e-06,
      "loss": 0.815,
      "step": 711160
    },
    {
      "epoch": 2.4924735478941704,
      "grad_norm": 2.96875,
      "learning_rate": 9.39871388480001e-06,
      "loss": 0.8203,
      "step": 711170
    },
    {
      "epoch": 2.492508595401066,
      "grad_norm": 3.046875,
      "learning_rate": 9.398064856136307e-06,
      "loss": 0.7921,
      "step": 711180
    },
    {
      "epoch": 2.492543642907962,
      "grad_norm": 2.421875,
      "learning_rate": 9.397415827472605e-06,
      "loss": 0.7874,
      "step": 711190
    },
    {
      "epoch": 2.4925786904148572,
      "grad_norm": 3.03125,
      "learning_rate": 9.396766798808903e-06,
      "loss": 0.7821,
      "step": 711200
    },
    {
      "epoch": 2.492613737921753,
      "grad_norm": 3.359375,
      "learning_rate": 9.3961177701452e-06,
      "loss": 0.7792,
      "step": 711210
    },
    {
      "epoch": 2.4926487854286483,
      "grad_norm": 2.90625,
      "learning_rate": 9.3954687414815e-06,
      "loss": 0.8839,
      "step": 711220
    },
    {
      "epoch": 2.492683832935544,
      "grad_norm": 2.9375,
      "learning_rate": 9.394819712817797e-06,
      "loss": 0.7576,
      "step": 711230
    },
    {
      "epoch": 2.49271888044244,
      "grad_norm": 2.71875,
      "learning_rate": 9.394170684154095e-06,
      "loss": 0.8102,
      "step": 711240
    },
    {
      "epoch": 2.492753927949335,
      "grad_norm": 3.03125,
      "learning_rate": 9.393521655490393e-06,
      "loss": 0.8113,
      "step": 711250
    },
    {
      "epoch": 2.492788975456231,
      "grad_norm": 2.9375,
      "learning_rate": 9.392872626826691e-06,
      "loss": 0.7816,
      "step": 711260
    },
    {
      "epoch": 2.4928240229631267,
      "grad_norm": 2.65625,
      "learning_rate": 9.39222359816299e-06,
      "loss": 0.7942,
      "step": 711270
    },
    {
      "epoch": 2.492859070470022,
      "grad_norm": 3.140625,
      "learning_rate": 9.391574569499289e-06,
      "loss": 0.8207,
      "step": 711280
    },
    {
      "epoch": 2.4928941179769177,
      "grad_norm": 2.609375,
      "learning_rate": 9.390925540835587e-06,
      "loss": 0.7205,
      "step": 711290
    },
    {
      "epoch": 2.4929291654838135,
      "grad_norm": 3.015625,
      "learning_rate": 9.390276512171885e-06,
      "loss": 0.7776,
      "step": 711300
    },
    {
      "epoch": 2.492964212990709,
      "grad_norm": 2.984375,
      "learning_rate": 9.389627483508181e-06,
      "loss": 0.92,
      "step": 711310
    },
    {
      "epoch": 2.4929992604976046,
      "grad_norm": 3.234375,
      "learning_rate": 9.38897845484448e-06,
      "loss": 0.8598,
      "step": 711320
    },
    {
      "epoch": 2.4930343080045,
      "grad_norm": 2.875,
      "learning_rate": 9.388329426180777e-06,
      "loss": 0.8036,
      "step": 711330
    },
    {
      "epoch": 2.4930693555113956,
      "grad_norm": 2.71875,
      "learning_rate": 9.387680397517077e-06,
      "loss": 0.8008,
      "step": 711340
    },
    {
      "epoch": 2.4931044030182914,
      "grad_norm": 3.0,
      "learning_rate": 9.387031368853375e-06,
      "loss": 0.8396,
      "step": 711350
    },
    {
      "epoch": 2.4931394505251867,
      "grad_norm": 3.328125,
      "learning_rate": 9.386382340189673e-06,
      "loss": 0.7704,
      "step": 711360
    },
    {
      "epoch": 2.4931744980320825,
      "grad_norm": 3.515625,
      "learning_rate": 9.385733311525971e-06,
      "loss": 0.7707,
      "step": 711370
    },
    {
      "epoch": 2.4932095455389782,
      "grad_norm": 2.8125,
      "learning_rate": 9.385084282862269e-06,
      "loss": 0.8294,
      "step": 711380
    },
    {
      "epoch": 2.4932445930458735,
      "grad_norm": 3.03125,
      "learning_rate": 9.384435254198567e-06,
      "loss": 0.8851,
      "step": 711390
    },
    {
      "epoch": 2.4932796405527693,
      "grad_norm": 2.984375,
      "learning_rate": 9.383786225534865e-06,
      "loss": 0.8455,
      "step": 711400
    },
    {
      "epoch": 2.493314688059665,
      "grad_norm": 2.703125,
      "learning_rate": 9.383137196871163e-06,
      "loss": 0.6838,
      "step": 711410
    },
    {
      "epoch": 2.4933497355665604,
      "grad_norm": 3.046875,
      "learning_rate": 9.382488168207461e-06,
      "loss": 0.7572,
      "step": 711420
    },
    {
      "epoch": 2.493384783073456,
      "grad_norm": 3.3125,
      "learning_rate": 9.381839139543759e-06,
      "loss": 0.7791,
      "step": 711430
    },
    {
      "epoch": 2.4934198305803514,
      "grad_norm": 3.140625,
      "learning_rate": 9.381190110880057e-06,
      "loss": 0.7402,
      "step": 711440
    },
    {
      "epoch": 2.493454878087247,
      "grad_norm": 2.484375,
      "learning_rate": 9.380541082216355e-06,
      "loss": 0.7642,
      "step": 711450
    },
    {
      "epoch": 2.493489925594143,
      "grad_norm": 2.640625,
      "learning_rate": 9.379892053552653e-06,
      "loss": 0.7435,
      "step": 711460
    },
    {
      "epoch": 2.4935249731010383,
      "grad_norm": 3.484375,
      "learning_rate": 9.379243024888953e-06,
      "loss": 0.8741,
      "step": 711470
    },
    {
      "epoch": 2.493560020607934,
      "grad_norm": 2.984375,
      "learning_rate": 9.37859399622525e-06,
      "loss": 0.8428,
      "step": 711480
    },
    {
      "epoch": 2.49359506811483,
      "grad_norm": 3.109375,
      "learning_rate": 9.377944967561547e-06,
      "loss": 0.8442,
      "step": 711490
    },
    {
      "epoch": 2.493630115621725,
      "grad_norm": 2.953125,
      "learning_rate": 9.377295938897845e-06,
      "loss": 0.9077,
      "step": 711500
    },
    {
      "epoch": 2.493665163128621,
      "grad_norm": 2.40625,
      "learning_rate": 9.376646910234143e-06,
      "loss": 0.8759,
      "step": 711510
    },
    {
      "epoch": 2.4937002106355166,
      "grad_norm": 2.765625,
      "learning_rate": 9.375997881570443e-06,
      "loss": 0.8637,
      "step": 711520
    },
    {
      "epoch": 2.493735258142412,
      "grad_norm": 3.015625,
      "learning_rate": 9.37534885290674e-06,
      "loss": 0.7765,
      "step": 711530
    },
    {
      "epoch": 2.4937703056493077,
      "grad_norm": 3.3125,
      "learning_rate": 9.374699824243039e-06,
      "loss": 0.861,
      "step": 711540
    },
    {
      "epoch": 2.493805353156203,
      "grad_norm": 3.15625,
      "learning_rate": 9.374050795579337e-06,
      "loss": 0.7929,
      "step": 711550
    },
    {
      "epoch": 2.4938404006630988,
      "grad_norm": 2.796875,
      "learning_rate": 9.373401766915635e-06,
      "loss": 0.7662,
      "step": 711560
    },
    {
      "epoch": 2.4938754481699945,
      "grad_norm": 2.90625,
      "learning_rate": 9.372752738251933e-06,
      "loss": 0.8156,
      "step": 711570
    },
    {
      "epoch": 2.4939104956768903,
      "grad_norm": 3.015625,
      "learning_rate": 9.37210370958823e-06,
      "loss": 0.7526,
      "step": 711580
    },
    {
      "epoch": 2.4939455431837856,
      "grad_norm": 2.65625,
      "learning_rate": 9.371454680924529e-06,
      "loss": 0.8172,
      "step": 711590
    },
    {
      "epoch": 2.4939805906906813,
      "grad_norm": 2.640625,
      "learning_rate": 9.370805652260827e-06,
      "loss": 0.8068,
      "step": 711600
    },
    {
      "epoch": 2.4940156381975767,
      "grad_norm": 3.03125,
      "learning_rate": 9.370156623597125e-06,
      "loss": 0.8429,
      "step": 711610
    },
    {
      "epoch": 2.4940506857044724,
      "grad_norm": 2.609375,
      "learning_rate": 9.369507594933423e-06,
      "loss": 0.7602,
      "step": 711620
    },
    {
      "epoch": 2.494085733211368,
      "grad_norm": 3.0,
      "learning_rate": 9.36885856626972e-06,
      "loss": 0.7897,
      "step": 711630
    },
    {
      "epoch": 2.4941207807182635,
      "grad_norm": 3.0625,
      "learning_rate": 9.368209537606019e-06,
      "loss": 0.827,
      "step": 711640
    },
    {
      "epoch": 2.4941558282251592,
      "grad_norm": 3.0,
      "learning_rate": 9.367560508942318e-06,
      "loss": 0.8235,
      "step": 711650
    },
    {
      "epoch": 2.4941908757320546,
      "grad_norm": 2.984375,
      "learning_rate": 9.366911480278616e-06,
      "loss": 0.7171,
      "step": 711660
    },
    {
      "epoch": 2.4942259232389503,
      "grad_norm": 2.859375,
      "learning_rate": 9.366262451614914e-06,
      "loss": 0.853,
      "step": 711670
    },
    {
      "epoch": 2.494260970745846,
      "grad_norm": 2.734375,
      "learning_rate": 9.36561342295121e-06,
      "loss": 0.7842,
      "step": 711680
    },
    {
      "epoch": 2.494296018252742,
      "grad_norm": 2.625,
      "learning_rate": 9.364964394287509e-06,
      "loss": 0.7688,
      "step": 711690
    },
    {
      "epoch": 2.494331065759637,
      "grad_norm": 2.484375,
      "learning_rate": 9.364315365623807e-06,
      "loss": 0.7298,
      "step": 711700
    },
    {
      "epoch": 2.494366113266533,
      "grad_norm": 2.703125,
      "learning_rate": 9.363666336960106e-06,
      "loss": 0.7901,
      "step": 711710
    },
    {
      "epoch": 2.494401160773428,
      "grad_norm": 2.9375,
      "learning_rate": 9.363017308296404e-06,
      "loss": 0.8629,
      "step": 711720
    },
    {
      "epoch": 2.494436208280324,
      "grad_norm": 2.59375,
      "learning_rate": 9.362368279632702e-06,
      "loss": 0.7411,
      "step": 711730
    },
    {
      "epoch": 2.4944712557872197,
      "grad_norm": 3.015625,
      "learning_rate": 9.361719250969e-06,
      "loss": 0.8356,
      "step": 711740
    },
    {
      "epoch": 2.494506303294115,
      "grad_norm": 3.359375,
      "learning_rate": 9.361070222305298e-06,
      "loss": 0.7672,
      "step": 711750
    },
    {
      "epoch": 2.494541350801011,
      "grad_norm": 2.84375,
      "learning_rate": 9.360421193641596e-06,
      "loss": 0.8037,
      "step": 711760
    },
    {
      "epoch": 2.494576398307906,
      "grad_norm": 3.0,
      "learning_rate": 9.359772164977894e-06,
      "loss": 0.7895,
      "step": 711770
    },
    {
      "epoch": 2.494611445814802,
      "grad_norm": 2.375,
      "learning_rate": 9.359123136314192e-06,
      "loss": 0.7511,
      "step": 711780
    },
    {
      "epoch": 2.4946464933216976,
      "grad_norm": 3.15625,
      "learning_rate": 9.35847410765049e-06,
      "loss": 0.7513,
      "step": 711790
    },
    {
      "epoch": 2.4946815408285934,
      "grad_norm": 3.4375,
      "learning_rate": 9.357825078986788e-06,
      "loss": 0.875,
      "step": 711800
    },
    {
      "epoch": 2.4947165883354887,
      "grad_norm": 2.6875,
      "learning_rate": 9.357176050323086e-06,
      "loss": 0.8559,
      "step": 711810
    },
    {
      "epoch": 2.4947516358423845,
      "grad_norm": 3.296875,
      "learning_rate": 9.356527021659384e-06,
      "loss": 0.8755,
      "step": 711820
    },
    {
      "epoch": 2.4947866833492798,
      "grad_norm": 2.546875,
      "learning_rate": 9.355877992995684e-06,
      "loss": 0.795,
      "step": 711830
    },
    {
      "epoch": 2.4948217308561755,
      "grad_norm": 2.921875,
      "learning_rate": 9.355228964331982e-06,
      "loss": 0.8027,
      "step": 711840
    },
    {
      "epoch": 2.4948567783630713,
      "grad_norm": 2.859375,
      "learning_rate": 9.35457993566828e-06,
      "loss": 0.7767,
      "step": 711850
    },
    {
      "epoch": 2.4948918258699666,
      "grad_norm": 3.234375,
      "learning_rate": 9.353930907004578e-06,
      "loss": 0.8366,
      "step": 711860
    },
    {
      "epoch": 2.4949268733768624,
      "grad_norm": 4.09375,
      "learning_rate": 9.353281878340874e-06,
      "loss": 0.8083,
      "step": 711870
    },
    {
      "epoch": 2.494961920883758,
      "grad_norm": 2.78125,
      "learning_rate": 9.352632849677172e-06,
      "loss": 0.759,
      "step": 711880
    },
    {
      "epoch": 2.4949969683906534,
      "grad_norm": 2.734375,
      "learning_rate": 9.351983821013472e-06,
      "loss": 0.768,
      "step": 711890
    },
    {
      "epoch": 2.495032015897549,
      "grad_norm": 2.59375,
      "learning_rate": 9.35133479234977e-06,
      "loss": 0.8195,
      "step": 711900
    },
    {
      "epoch": 2.495067063404445,
      "grad_norm": 2.984375,
      "learning_rate": 9.350685763686068e-06,
      "loss": 0.7589,
      "step": 711910
    },
    {
      "epoch": 2.4951021109113403,
      "grad_norm": 3.46875,
      "learning_rate": 9.350036735022366e-06,
      "loss": 0.8403,
      "step": 711920
    },
    {
      "epoch": 2.495137158418236,
      "grad_norm": 2.515625,
      "learning_rate": 9.349387706358664e-06,
      "loss": 0.7847,
      "step": 711930
    },
    {
      "epoch": 2.4951722059251313,
      "grad_norm": 3.0625,
      "learning_rate": 9.348738677694962e-06,
      "loss": 0.7117,
      "step": 711940
    },
    {
      "epoch": 2.495207253432027,
      "grad_norm": 3.375,
      "learning_rate": 9.34808964903126e-06,
      "loss": 0.8449,
      "step": 711950
    },
    {
      "epoch": 2.495242300938923,
      "grad_norm": 3.03125,
      "learning_rate": 9.347440620367558e-06,
      "loss": 0.7826,
      "step": 711960
    },
    {
      "epoch": 2.495277348445818,
      "grad_norm": 3.4375,
      "learning_rate": 9.346791591703856e-06,
      "loss": 0.7412,
      "step": 711970
    },
    {
      "epoch": 2.495312395952714,
      "grad_norm": 2.953125,
      "learning_rate": 9.346142563040154e-06,
      "loss": 0.7953,
      "step": 711980
    },
    {
      "epoch": 2.4953474434596097,
      "grad_norm": 3.03125,
      "learning_rate": 9.345493534376452e-06,
      "loss": 0.8059,
      "step": 711990
    },
    {
      "epoch": 2.495382490966505,
      "grad_norm": 2.859375,
      "learning_rate": 9.34484450571275e-06,
      "loss": 0.8605,
      "step": 712000
    },
    {
      "epoch": 2.4954175384734008,
      "grad_norm": 2.984375,
      "learning_rate": 9.344195477049048e-06,
      "loss": 0.785,
      "step": 712010
    },
    {
      "epoch": 2.4954525859802965,
      "grad_norm": 2.640625,
      "learning_rate": 9.343546448385348e-06,
      "loss": 0.7869,
      "step": 712020
    },
    {
      "epoch": 2.495487633487192,
      "grad_norm": 2.546875,
      "learning_rate": 9.342897419721646e-06,
      "loss": 0.8187,
      "step": 712030
    },
    {
      "epoch": 2.4955226809940876,
      "grad_norm": 2.703125,
      "learning_rate": 9.342248391057944e-06,
      "loss": 0.8518,
      "step": 712040
    },
    {
      "epoch": 2.495557728500983,
      "grad_norm": 2.859375,
      "learning_rate": 9.341599362394242e-06,
      "loss": 0.8149,
      "step": 712050
    },
    {
      "epoch": 2.4955927760078787,
      "grad_norm": 2.34375,
      "learning_rate": 9.340950333730538e-06,
      "loss": 0.8178,
      "step": 712060
    },
    {
      "epoch": 2.4956278235147744,
      "grad_norm": 3.15625,
      "learning_rate": 9.340301305066838e-06,
      "loss": 0.784,
      "step": 712070
    },
    {
      "epoch": 2.4956628710216697,
      "grad_norm": 2.84375,
      "learning_rate": 9.339652276403136e-06,
      "loss": 0.8129,
      "step": 712080
    },
    {
      "epoch": 2.4956979185285655,
      "grad_norm": 3.265625,
      "learning_rate": 9.339003247739434e-06,
      "loss": 0.7999,
      "step": 712090
    },
    {
      "epoch": 2.4957329660354612,
      "grad_norm": 2.671875,
      "learning_rate": 9.338354219075732e-06,
      "loss": 0.7604,
      "step": 712100
    },
    {
      "epoch": 2.4957680135423566,
      "grad_norm": 2.9375,
      "learning_rate": 9.33770519041203e-06,
      "loss": 0.8018,
      "step": 712110
    },
    {
      "epoch": 2.4958030610492523,
      "grad_norm": 2.609375,
      "learning_rate": 9.337056161748328e-06,
      "loss": 0.7424,
      "step": 712120
    },
    {
      "epoch": 2.495838108556148,
      "grad_norm": 3.234375,
      "learning_rate": 9.336407133084626e-06,
      "loss": 0.9159,
      "step": 712130
    },
    {
      "epoch": 2.4958731560630434,
      "grad_norm": 2.796875,
      "learning_rate": 9.335758104420926e-06,
      "loss": 0.7749,
      "step": 712140
    },
    {
      "epoch": 2.495908203569939,
      "grad_norm": 2.796875,
      "learning_rate": 9.335109075757222e-06,
      "loss": 0.8072,
      "step": 712150
    },
    {
      "epoch": 2.4959432510768345,
      "grad_norm": 2.96875,
      "learning_rate": 9.33446004709352e-06,
      "loss": 0.7225,
      "step": 712160
    },
    {
      "epoch": 2.49597829858373,
      "grad_norm": 2.71875,
      "learning_rate": 9.333811018429818e-06,
      "loss": 0.8096,
      "step": 712170
    },
    {
      "epoch": 2.496013346090626,
      "grad_norm": 3.265625,
      "learning_rate": 9.333161989766116e-06,
      "loss": 0.7809,
      "step": 712180
    },
    {
      "epoch": 2.4960483935975213,
      "grad_norm": 3.5625,
      "learning_rate": 9.332512961102414e-06,
      "loss": 0.8199,
      "step": 712190
    },
    {
      "epoch": 2.496083441104417,
      "grad_norm": 3.71875,
      "learning_rate": 9.331863932438714e-06,
      "loss": 0.8531,
      "step": 712200
    },
    {
      "epoch": 2.496118488611313,
      "grad_norm": 2.65625,
      "learning_rate": 9.331214903775012e-06,
      "loss": 0.7641,
      "step": 712210
    },
    {
      "epoch": 2.496153536118208,
      "grad_norm": 2.921875,
      "learning_rate": 9.33056587511131e-06,
      "loss": 0.8467,
      "step": 712220
    },
    {
      "epoch": 2.496188583625104,
      "grad_norm": 2.8125,
      "learning_rate": 9.329916846447608e-06,
      "loss": 0.7979,
      "step": 712230
    },
    {
      "epoch": 2.4962236311319996,
      "grad_norm": 2.5625,
      "learning_rate": 9.329267817783906e-06,
      "loss": 0.8066,
      "step": 712240
    },
    {
      "epoch": 2.496258678638895,
      "grad_norm": 2.6875,
      "learning_rate": 9.328618789120202e-06,
      "loss": 0.8076,
      "step": 712250
    },
    {
      "epoch": 2.4962937261457907,
      "grad_norm": 2.71875,
      "learning_rate": 9.327969760456502e-06,
      "loss": 0.7576,
      "step": 712260
    },
    {
      "epoch": 2.496328773652686,
      "grad_norm": 3.0,
      "learning_rate": 9.3273207317928e-06,
      "loss": 0.8511,
      "step": 712270
    },
    {
      "epoch": 2.4963638211595818,
      "grad_norm": 2.625,
      "learning_rate": 9.326671703129098e-06,
      "loss": 0.7821,
      "step": 712280
    },
    {
      "epoch": 2.4963988686664775,
      "grad_norm": 3.171875,
      "learning_rate": 9.326022674465396e-06,
      "loss": 0.8683,
      "step": 712290
    },
    {
      "epoch": 2.496433916173373,
      "grad_norm": 3.03125,
      "learning_rate": 9.325373645801694e-06,
      "loss": 0.7998,
      "step": 712300
    },
    {
      "epoch": 2.4964689636802686,
      "grad_norm": 2.640625,
      "learning_rate": 9.324724617137992e-06,
      "loss": 0.7694,
      "step": 712310
    },
    {
      "epoch": 2.4965040111871644,
      "grad_norm": 2.921875,
      "learning_rate": 9.32407558847429e-06,
      "loss": 0.8308,
      "step": 712320
    },
    {
      "epoch": 2.4965390586940597,
      "grad_norm": 3.140625,
      "learning_rate": 9.32342655981059e-06,
      "loss": 0.8324,
      "step": 712330
    },
    {
      "epoch": 2.4965741062009554,
      "grad_norm": 3.015625,
      "learning_rate": 9.322777531146886e-06,
      "loss": 0.8309,
      "step": 712340
    },
    {
      "epoch": 2.496609153707851,
      "grad_norm": 3.078125,
      "learning_rate": 9.322128502483184e-06,
      "loss": 0.79,
      "step": 712350
    },
    {
      "epoch": 2.4966442012147465,
      "grad_norm": 3.03125,
      "learning_rate": 9.321479473819482e-06,
      "loss": 0.9134,
      "step": 712360
    },
    {
      "epoch": 2.4966792487216423,
      "grad_norm": 2.875,
      "learning_rate": 9.32083044515578e-06,
      "loss": 0.7283,
      "step": 712370
    },
    {
      "epoch": 2.4967142962285376,
      "grad_norm": 2.5625,
      "learning_rate": 9.32018141649208e-06,
      "loss": 0.8241,
      "step": 712380
    },
    {
      "epoch": 2.4967493437354333,
      "grad_norm": 2.609375,
      "learning_rate": 9.319532387828377e-06,
      "loss": 0.7442,
      "step": 712390
    },
    {
      "epoch": 2.496784391242329,
      "grad_norm": 3.265625,
      "learning_rate": 9.318883359164675e-06,
      "loss": 0.7864,
      "step": 712400
    },
    {
      "epoch": 2.4968194387492244,
      "grad_norm": 3.015625,
      "learning_rate": 9.318234330500973e-06,
      "loss": 0.8383,
      "step": 712410
    },
    {
      "epoch": 2.49685448625612,
      "grad_norm": 2.546875,
      "learning_rate": 9.317585301837271e-06,
      "loss": 0.7271,
      "step": 712420
    },
    {
      "epoch": 2.496889533763016,
      "grad_norm": 2.96875,
      "learning_rate": 9.316936273173568e-06,
      "loss": 0.838,
      "step": 712430
    },
    {
      "epoch": 2.4969245812699112,
      "grad_norm": 2.75,
      "learning_rate": 9.316287244509867e-06,
      "loss": 0.8123,
      "step": 712440
    },
    {
      "epoch": 2.496959628776807,
      "grad_norm": 3.0625,
      "learning_rate": 9.315638215846165e-06,
      "loss": 0.8098,
      "step": 712450
    },
    {
      "epoch": 2.4969946762837028,
      "grad_norm": 2.796875,
      "learning_rate": 9.314989187182463e-06,
      "loss": 0.7828,
      "step": 712460
    },
    {
      "epoch": 2.497029723790598,
      "grad_norm": 2.9375,
      "learning_rate": 9.314340158518761e-06,
      "loss": 0.7938,
      "step": 712470
    },
    {
      "epoch": 2.497064771297494,
      "grad_norm": 2.703125,
      "learning_rate": 9.31369112985506e-06,
      "loss": 0.7598,
      "step": 712480
    },
    {
      "epoch": 2.497099818804389,
      "grad_norm": 3.015625,
      "learning_rate": 9.313042101191357e-06,
      "loss": 0.7622,
      "step": 712490
    },
    {
      "epoch": 2.497134866311285,
      "grad_norm": 2.90625,
      "learning_rate": 9.312393072527655e-06,
      "loss": 0.6941,
      "step": 712500
    },
    {
      "epoch": 2.4971699138181807,
      "grad_norm": 3.515625,
      "learning_rate": 9.311744043863955e-06,
      "loss": 0.8522,
      "step": 712510
    },
    {
      "epoch": 2.497204961325076,
      "grad_norm": 2.671875,
      "learning_rate": 9.311095015200253e-06,
      "loss": 0.7518,
      "step": 712520
    },
    {
      "epoch": 2.4972400088319717,
      "grad_norm": 2.84375,
      "learning_rate": 9.31044598653655e-06,
      "loss": 0.8175,
      "step": 712530
    },
    {
      "epoch": 2.4972750563388675,
      "grad_norm": 2.78125,
      "learning_rate": 9.309796957872847e-06,
      "loss": 0.8093,
      "step": 712540
    },
    {
      "epoch": 2.497310103845763,
      "grad_norm": 2.859375,
      "learning_rate": 9.309147929209145e-06,
      "loss": 0.8398,
      "step": 712550
    },
    {
      "epoch": 2.4973451513526586,
      "grad_norm": 2.828125,
      "learning_rate": 9.308498900545443e-06,
      "loss": 0.7764,
      "step": 712560
    },
    {
      "epoch": 2.4973801988595543,
      "grad_norm": 3.34375,
      "learning_rate": 9.307849871881743e-06,
      "loss": 0.8452,
      "step": 712570
    },
    {
      "epoch": 2.4974152463664496,
      "grad_norm": 3.15625,
      "learning_rate": 9.307200843218041e-06,
      "loss": 0.7572,
      "step": 712580
    },
    {
      "epoch": 2.4974502938733454,
      "grad_norm": 3.0625,
      "learning_rate": 9.306551814554339e-06,
      "loss": 0.7861,
      "step": 712590
    },
    {
      "epoch": 2.4974853413802407,
      "grad_norm": 2.765625,
      "learning_rate": 9.305902785890637e-06,
      "loss": 0.796,
      "step": 712600
    },
    {
      "epoch": 2.4975203888871365,
      "grad_norm": 2.921875,
      "learning_rate": 9.305253757226935e-06,
      "loss": 0.7758,
      "step": 712610
    },
    {
      "epoch": 2.497555436394032,
      "grad_norm": 3.125,
      "learning_rate": 9.304604728563233e-06,
      "loss": 0.879,
      "step": 712620
    },
    {
      "epoch": 2.4975904839009275,
      "grad_norm": 3.40625,
      "learning_rate": 9.303955699899531e-06,
      "loss": 0.8181,
      "step": 712630
    },
    {
      "epoch": 2.4976255314078233,
      "grad_norm": 3.125,
      "learning_rate": 9.303306671235829e-06,
      "loss": 0.814,
      "step": 712640
    },
    {
      "epoch": 2.497660578914719,
      "grad_norm": 3.203125,
      "learning_rate": 9.302657642572127e-06,
      "loss": 0.823,
      "step": 712650
    },
    {
      "epoch": 2.4976956264216144,
      "grad_norm": 3.140625,
      "learning_rate": 9.302008613908425e-06,
      "loss": 0.8162,
      "step": 712660
    },
    {
      "epoch": 2.49773067392851,
      "grad_norm": 2.6875,
      "learning_rate": 9.301359585244723e-06,
      "loss": 0.8507,
      "step": 712670
    },
    {
      "epoch": 2.497765721435406,
      "grad_norm": 2.890625,
      "learning_rate": 9.300710556581021e-06,
      "loss": 0.8355,
      "step": 712680
    },
    {
      "epoch": 2.497800768942301,
      "grad_norm": 2.9375,
      "learning_rate": 9.30006152791732e-06,
      "loss": 0.8291,
      "step": 712690
    },
    {
      "epoch": 2.497835816449197,
      "grad_norm": 2.890625,
      "learning_rate": 9.299412499253619e-06,
      "loss": 0.8398,
      "step": 712700
    },
    {
      "epoch": 2.4978708639560923,
      "grad_norm": 2.875,
      "learning_rate": 9.298763470589917e-06,
      "loss": 0.8322,
      "step": 712710
    },
    {
      "epoch": 2.497905911462988,
      "grad_norm": 3.296875,
      "learning_rate": 9.298114441926213e-06,
      "loss": 0.8102,
      "step": 712720
    },
    {
      "epoch": 2.4979409589698838,
      "grad_norm": 3.1875,
      "learning_rate": 9.297465413262511e-06,
      "loss": 0.8438,
      "step": 712730
    },
    {
      "epoch": 2.497976006476779,
      "grad_norm": 2.984375,
      "learning_rate": 9.296816384598809e-06,
      "loss": 0.8115,
      "step": 712740
    },
    {
      "epoch": 2.498011053983675,
      "grad_norm": 2.765625,
      "learning_rate": 9.296167355935109e-06,
      "loss": 0.8124,
      "step": 712750
    },
    {
      "epoch": 2.4980461014905706,
      "grad_norm": 2.53125,
      "learning_rate": 9.295518327271407e-06,
      "loss": 0.7604,
      "step": 712760
    },
    {
      "epoch": 2.498081148997466,
      "grad_norm": 2.859375,
      "learning_rate": 9.294869298607705e-06,
      "loss": 0.8326,
      "step": 712770
    },
    {
      "epoch": 2.4981161965043617,
      "grad_norm": 2.890625,
      "learning_rate": 9.294220269944003e-06,
      "loss": 0.8718,
      "step": 712780
    },
    {
      "epoch": 2.4981512440112574,
      "grad_norm": 3.3125,
      "learning_rate": 9.2935712412803e-06,
      "loss": 0.7973,
      "step": 712790
    },
    {
      "epoch": 2.4981862915181527,
      "grad_norm": 3.0625,
      "learning_rate": 9.292922212616599e-06,
      "loss": 0.781,
      "step": 712800
    },
    {
      "epoch": 2.4982213390250485,
      "grad_norm": 3.0625,
      "learning_rate": 9.292273183952897e-06,
      "loss": 0.8774,
      "step": 712810
    },
    {
      "epoch": 2.498256386531944,
      "grad_norm": 2.75,
      "learning_rate": 9.291624155289195e-06,
      "loss": 0.8012,
      "step": 712820
    },
    {
      "epoch": 2.4982914340388396,
      "grad_norm": 2.40625,
      "learning_rate": 9.290975126625493e-06,
      "loss": 0.8352,
      "step": 712830
    },
    {
      "epoch": 2.4983264815457353,
      "grad_norm": 2.953125,
      "learning_rate": 9.29032609796179e-06,
      "loss": 0.842,
      "step": 712840
    },
    {
      "epoch": 2.4983615290526306,
      "grad_norm": 2.640625,
      "learning_rate": 9.289677069298089e-06,
      "loss": 0.7488,
      "step": 712850
    },
    {
      "epoch": 2.4983965765595264,
      "grad_norm": 2.984375,
      "learning_rate": 9.289028040634387e-06,
      "loss": 0.762,
      "step": 712860
    },
    {
      "epoch": 2.498431624066422,
      "grad_norm": 2.953125,
      "learning_rate": 9.288379011970685e-06,
      "loss": 0.8146,
      "step": 712870
    },
    {
      "epoch": 2.4984666715733175,
      "grad_norm": 2.5,
      "learning_rate": 9.287729983306984e-06,
      "loss": 0.8565,
      "step": 712880
    },
    {
      "epoch": 2.4985017190802132,
      "grad_norm": 2.59375,
      "learning_rate": 9.287080954643282e-06,
      "loss": 0.875,
      "step": 712890
    },
    {
      "epoch": 2.498536766587109,
      "grad_norm": 2.84375,
      "learning_rate": 9.286431925979579e-06,
      "loss": 0.8143,
      "step": 712900
    },
    {
      "epoch": 2.4985718140940043,
      "grad_norm": 2.921875,
      "learning_rate": 9.285782897315877e-06,
      "loss": 0.8148,
      "step": 712910
    },
    {
      "epoch": 2.4986068616009,
      "grad_norm": 3.0,
      "learning_rate": 9.285133868652175e-06,
      "loss": 0.8577,
      "step": 712920
    },
    {
      "epoch": 2.4986419091077954,
      "grad_norm": 2.46875,
      "learning_rate": 9.284484839988474e-06,
      "loss": 0.7773,
      "step": 712930
    },
    {
      "epoch": 2.498676956614691,
      "grad_norm": 3.015625,
      "learning_rate": 9.283835811324772e-06,
      "loss": 0.841,
      "step": 712940
    },
    {
      "epoch": 2.498712004121587,
      "grad_norm": 4.125,
      "learning_rate": 9.28318678266107e-06,
      "loss": 0.779,
      "step": 712950
    },
    {
      "epoch": 2.4987470516284827,
      "grad_norm": 3.375,
      "learning_rate": 9.282537753997368e-06,
      "loss": 0.798,
      "step": 712960
    },
    {
      "epoch": 2.498782099135378,
      "grad_norm": 2.640625,
      "learning_rate": 9.281888725333666e-06,
      "loss": 0.8493,
      "step": 712970
    },
    {
      "epoch": 2.4988171466422737,
      "grad_norm": 2.6875,
      "learning_rate": 9.281239696669964e-06,
      "loss": 0.7877,
      "step": 712980
    },
    {
      "epoch": 2.498852194149169,
      "grad_norm": 2.78125,
      "learning_rate": 9.280590668006262e-06,
      "loss": 0.8287,
      "step": 712990
    },
    {
      "epoch": 2.498887241656065,
      "grad_norm": 2.796875,
      "learning_rate": 9.27994163934256e-06,
      "loss": 0.7714,
      "step": 713000
    },
    {
      "epoch": 2.4989222891629606,
      "grad_norm": 2.5,
      "learning_rate": 9.279292610678858e-06,
      "loss": 0.7501,
      "step": 713010
    },
    {
      "epoch": 2.498957336669856,
      "grad_norm": 3.421875,
      "learning_rate": 9.278643582015156e-06,
      "loss": 0.7992,
      "step": 713020
    },
    {
      "epoch": 2.4989923841767516,
      "grad_norm": 2.921875,
      "learning_rate": 9.277994553351454e-06,
      "loss": 0.8044,
      "step": 713030
    },
    {
      "epoch": 2.499027431683647,
      "grad_norm": 3.203125,
      "learning_rate": 9.277345524687752e-06,
      "loss": 0.8134,
      "step": 713040
    },
    {
      "epoch": 2.4990624791905427,
      "grad_norm": 2.796875,
      "learning_rate": 9.27669649602405e-06,
      "loss": 0.7754,
      "step": 713050
    },
    {
      "epoch": 2.4990975266974385,
      "grad_norm": 2.984375,
      "learning_rate": 9.27604746736035e-06,
      "loss": 0.7674,
      "step": 713060
    },
    {
      "epoch": 2.499132574204334,
      "grad_norm": 3.09375,
      "learning_rate": 9.275398438696648e-06,
      "loss": 0.8303,
      "step": 713070
    },
    {
      "epoch": 2.4991676217112295,
      "grad_norm": 3.328125,
      "learning_rate": 9.274749410032946e-06,
      "loss": 0.8622,
      "step": 713080
    },
    {
      "epoch": 2.4992026692181253,
      "grad_norm": 3.21875,
      "learning_rate": 9.274100381369242e-06,
      "loss": 0.8495,
      "step": 713090
    },
    {
      "epoch": 2.4992377167250206,
      "grad_norm": 3.328125,
      "learning_rate": 9.27345135270554e-06,
      "loss": 0.8089,
      "step": 713100
    },
    {
      "epoch": 2.4992727642319164,
      "grad_norm": 2.625,
      "learning_rate": 9.272802324041838e-06,
      "loss": 0.7531,
      "step": 713110
    },
    {
      "epoch": 2.499307811738812,
      "grad_norm": 2.765625,
      "learning_rate": 9.272153295378138e-06,
      "loss": 0.8012,
      "step": 713120
    },
    {
      "epoch": 2.4993428592457074,
      "grad_norm": 3.125,
      "learning_rate": 9.271504266714436e-06,
      "loss": 0.7973,
      "step": 713130
    },
    {
      "epoch": 2.499377906752603,
      "grad_norm": 3.09375,
      "learning_rate": 9.270855238050734e-06,
      "loss": 0.8336,
      "step": 713140
    },
    {
      "epoch": 2.499412954259499,
      "grad_norm": 2.84375,
      "learning_rate": 9.270206209387032e-06,
      "loss": 0.8487,
      "step": 713150
    },
    {
      "epoch": 2.4994480017663943,
      "grad_norm": 2.84375,
      "learning_rate": 9.26955718072333e-06,
      "loss": 0.8025,
      "step": 713160
    },
    {
      "epoch": 2.49948304927329,
      "grad_norm": 2.5,
      "learning_rate": 9.268908152059628e-06,
      "loss": 0.7643,
      "step": 713170
    },
    {
      "epoch": 2.4995180967801858,
      "grad_norm": 3.015625,
      "learning_rate": 9.268259123395926e-06,
      "loss": 0.7872,
      "step": 713180
    },
    {
      "epoch": 2.499553144287081,
      "grad_norm": 2.8125,
      "learning_rate": 9.267610094732224e-06,
      "loss": 0.8142,
      "step": 713190
    },
    {
      "epoch": 2.499588191793977,
      "grad_norm": 3.34375,
      "learning_rate": 9.266961066068522e-06,
      "loss": 0.9223,
      "step": 713200
    },
    {
      "epoch": 2.499623239300872,
      "grad_norm": 2.640625,
      "learning_rate": 9.26631203740482e-06,
      "loss": 0.7866,
      "step": 713210
    },
    {
      "epoch": 2.499658286807768,
      "grad_norm": 2.9375,
      "learning_rate": 9.265663008741118e-06,
      "loss": 0.7931,
      "step": 713220
    },
    {
      "epoch": 2.4996933343146637,
      "grad_norm": 2.6875,
      "learning_rate": 9.265013980077416e-06,
      "loss": 0.7819,
      "step": 713230
    },
    {
      "epoch": 2.499728381821559,
      "grad_norm": 2.671875,
      "learning_rate": 9.264364951413716e-06,
      "loss": 0.817,
      "step": 713240
    },
    {
      "epoch": 2.4997634293284547,
      "grad_norm": 2.90625,
      "learning_rate": 9.263715922750014e-06,
      "loss": 0.8039,
      "step": 713250
    },
    {
      "epoch": 2.4997984768353505,
      "grad_norm": 2.609375,
      "learning_rate": 9.263066894086312e-06,
      "loss": 0.7491,
      "step": 713260
    },
    {
      "epoch": 2.499833524342246,
      "grad_norm": 2.890625,
      "learning_rate": 9.26241786542261e-06,
      "loss": 0.7556,
      "step": 713270
    },
    {
      "epoch": 2.4998685718491416,
      "grad_norm": 2.984375,
      "learning_rate": 9.261768836758906e-06,
      "loss": 0.8325,
      "step": 713280
    },
    {
      "epoch": 2.4999036193560373,
      "grad_norm": 3.046875,
      "learning_rate": 9.261119808095204e-06,
      "loss": 0.8439,
      "step": 713290
    },
    {
      "epoch": 2.4999386668629326,
      "grad_norm": 2.640625,
      "learning_rate": 9.260470779431504e-06,
      "loss": 0.861,
      "step": 713300
    },
    {
      "epoch": 2.4999737143698284,
      "grad_norm": 3.65625,
      "learning_rate": 9.259821750767802e-06,
      "loss": 0.7487,
      "step": 713310
    },
    {
      "epoch": 2.5000087618767237,
      "grad_norm": 2.671875,
      "learning_rate": 9.2591727221041e-06,
      "loss": 0.7726,
      "step": 713320
    },
    {
      "epoch": 2.5000438093836195,
      "grad_norm": 2.671875,
      "learning_rate": 9.258523693440398e-06,
      "loss": 0.7416,
      "step": 713330
    },
    {
      "epoch": 2.5000788568905152,
      "grad_norm": 3.171875,
      "learning_rate": 9.257874664776696e-06,
      "loss": 0.7836,
      "step": 713340
    },
    {
      "epoch": 2.5001139043974105,
      "grad_norm": 3.140625,
      "learning_rate": 9.257225636112994e-06,
      "loss": 0.8589,
      "step": 713350
    },
    {
      "epoch": 2.5001489519043063,
      "grad_norm": 3.015625,
      "learning_rate": 9.256576607449292e-06,
      "loss": 0.7498,
      "step": 713360
    },
    {
      "epoch": 2.5001839994112016,
      "grad_norm": 3.34375,
      "learning_rate": 9.25592757878559e-06,
      "loss": 0.8535,
      "step": 713370
    },
    {
      "epoch": 2.5002190469180974,
      "grad_norm": 3.09375,
      "learning_rate": 9.255278550121888e-06,
      "loss": 0.9315,
      "step": 713380
    },
    {
      "epoch": 2.500254094424993,
      "grad_norm": 2.34375,
      "learning_rate": 9.254629521458186e-06,
      "loss": 0.7874,
      "step": 713390
    },
    {
      "epoch": 2.500289141931889,
      "grad_norm": 2.96875,
      "learning_rate": 9.253980492794484e-06,
      "loss": 0.8106,
      "step": 713400
    },
    {
      "epoch": 2.500324189438784,
      "grad_norm": 2.578125,
      "learning_rate": 9.253331464130782e-06,
      "loss": 0.8107,
      "step": 713410
    },
    {
      "epoch": 2.50035923694568,
      "grad_norm": 2.484375,
      "learning_rate": 9.25268243546708e-06,
      "loss": 0.7885,
      "step": 713420
    },
    {
      "epoch": 2.5003942844525753,
      "grad_norm": 2.921875,
      "learning_rate": 9.25203340680338e-06,
      "loss": 0.7898,
      "step": 713430
    },
    {
      "epoch": 2.500429331959471,
      "grad_norm": 2.75,
      "learning_rate": 9.251384378139677e-06,
      "loss": 0.785,
      "step": 713440
    },
    {
      "epoch": 2.500464379466367,
      "grad_norm": 2.671875,
      "learning_rate": 9.250735349475975e-06,
      "loss": 0.7636,
      "step": 713450
    },
    {
      "epoch": 2.500499426973262,
      "grad_norm": 2.4375,
      "learning_rate": 9.250086320812273e-06,
      "loss": 0.7403,
      "step": 713460
    },
    {
      "epoch": 2.500534474480158,
      "grad_norm": 2.921875,
      "learning_rate": 9.24943729214857e-06,
      "loss": 0.7653,
      "step": 713470
    },
    {
      "epoch": 2.500569521987053,
      "grad_norm": 2.65625,
      "learning_rate": 9.24878826348487e-06,
      "loss": 0.7928,
      "step": 713480
    },
    {
      "epoch": 2.500604569493949,
      "grad_norm": 2.390625,
      "learning_rate": 9.248139234821167e-06,
      "loss": 0.7929,
      "step": 713490
    },
    {
      "epoch": 2.5006396170008447,
      "grad_norm": 2.703125,
      "learning_rate": 9.247490206157465e-06,
      "loss": 0.7215,
      "step": 713500
    },
    {
      "epoch": 2.5006746645077405,
      "grad_norm": 2.640625,
      "learning_rate": 9.246841177493763e-06,
      "loss": 0.8856,
      "step": 713510
    },
    {
      "epoch": 2.5007097120146358,
      "grad_norm": 2.671875,
      "learning_rate": 9.246192148830061e-06,
      "loss": 0.734,
      "step": 713520
    },
    {
      "epoch": 2.5007447595215315,
      "grad_norm": 3.609375,
      "learning_rate": 9.24554312016636e-06,
      "loss": 0.8809,
      "step": 713530
    },
    {
      "epoch": 2.500779807028427,
      "grad_norm": 3.25,
      "learning_rate": 9.244894091502657e-06,
      "loss": 0.7176,
      "step": 713540
    },
    {
      "epoch": 2.5008148545353226,
      "grad_norm": 2.984375,
      "learning_rate": 9.244245062838957e-06,
      "loss": 0.8466,
      "step": 713550
    },
    {
      "epoch": 2.5008499020422184,
      "grad_norm": 2.625,
      "learning_rate": 9.243596034175253e-06,
      "loss": 0.8831,
      "step": 713560
    },
    {
      "epoch": 2.5008849495491137,
      "grad_norm": 3.09375,
      "learning_rate": 9.242947005511551e-06,
      "loss": 0.8276,
      "step": 713570
    },
    {
      "epoch": 2.5009199970560094,
      "grad_norm": 3.0625,
      "learning_rate": 9.24229797684785e-06,
      "loss": 0.7914,
      "step": 713580
    },
    {
      "epoch": 2.500955044562905,
      "grad_norm": 2.59375,
      "learning_rate": 9.241648948184147e-06,
      "loss": 0.7923,
      "step": 713590
    },
    {
      "epoch": 2.5009900920698005,
      "grad_norm": 2.921875,
      "learning_rate": 9.240999919520445e-06,
      "loss": 0.8261,
      "step": 713600
    },
    {
      "epoch": 2.5010251395766963,
      "grad_norm": 3.109375,
      "learning_rate": 9.240350890856745e-06,
      "loss": 0.7055,
      "step": 713610
    },
    {
      "epoch": 2.501060187083592,
      "grad_norm": 2.9375,
      "learning_rate": 9.239701862193043e-06,
      "loss": 0.726,
      "step": 713620
    },
    {
      "epoch": 2.5010952345904873,
      "grad_norm": 3.0,
      "learning_rate": 9.239052833529341e-06,
      "loss": 0.775,
      "step": 713630
    },
    {
      "epoch": 2.501130282097383,
      "grad_norm": 3.0625,
      "learning_rate": 9.238403804865639e-06,
      "loss": 0.9004,
      "step": 713640
    },
    {
      "epoch": 2.5011653296042784,
      "grad_norm": 2.671875,
      "learning_rate": 9.237754776201937e-06,
      "loss": 0.7761,
      "step": 713650
    },
    {
      "epoch": 2.501200377111174,
      "grad_norm": 2.546875,
      "learning_rate": 9.237105747538233e-06,
      "loss": 0.7883,
      "step": 713660
    },
    {
      "epoch": 2.50123542461807,
      "grad_norm": 2.71875,
      "learning_rate": 9.236456718874533e-06,
      "loss": 0.8289,
      "step": 713670
    },
    {
      "epoch": 2.5012704721249652,
      "grad_norm": 2.90625,
      "learning_rate": 9.235807690210831e-06,
      "loss": 0.777,
      "step": 713680
    },
    {
      "epoch": 2.501305519631861,
      "grad_norm": 2.71875,
      "learning_rate": 9.235158661547129e-06,
      "loss": 0.7604,
      "step": 713690
    },
    {
      "epoch": 2.5013405671387567,
      "grad_norm": 2.828125,
      "learning_rate": 9.234509632883427e-06,
      "loss": 0.8083,
      "step": 713700
    },
    {
      "epoch": 2.501375614645652,
      "grad_norm": 2.96875,
      "learning_rate": 9.233860604219725e-06,
      "loss": 0.8377,
      "step": 713710
    },
    {
      "epoch": 2.501410662152548,
      "grad_norm": 3.234375,
      "learning_rate": 9.233211575556023e-06,
      "loss": 0.8609,
      "step": 713720
    },
    {
      "epoch": 2.5014457096594436,
      "grad_norm": 2.515625,
      "learning_rate": 9.232562546892321e-06,
      "loss": 0.8523,
      "step": 713730
    },
    {
      "epoch": 2.501480757166339,
      "grad_norm": 2.875,
      "learning_rate": 9.23191351822862e-06,
      "loss": 0.7992,
      "step": 713740
    },
    {
      "epoch": 2.5015158046732346,
      "grad_norm": 2.75,
      "learning_rate": 9.231264489564917e-06,
      "loss": 0.8203,
      "step": 713750
    },
    {
      "epoch": 2.50155085218013,
      "grad_norm": 3.0625,
      "learning_rate": 9.230615460901215e-06,
      "loss": 0.8829,
      "step": 713760
    },
    {
      "epoch": 2.5015858996870257,
      "grad_norm": 2.953125,
      "learning_rate": 9.229966432237513e-06,
      "loss": 0.8185,
      "step": 713770
    },
    {
      "epoch": 2.5016209471939215,
      "grad_norm": 2.90625,
      "learning_rate": 9.229317403573811e-06,
      "loss": 0.8068,
      "step": 713780
    },
    {
      "epoch": 2.5016559947008172,
      "grad_norm": 2.90625,
      "learning_rate": 9.22866837491011e-06,
      "loss": 0.763,
      "step": 713790
    },
    {
      "epoch": 2.5016910422077125,
      "grad_norm": 3.203125,
      "learning_rate": 9.228019346246409e-06,
      "loss": 0.8092,
      "step": 713800
    },
    {
      "epoch": 2.5017260897146083,
      "grad_norm": 3.046875,
      "learning_rate": 9.227370317582707e-06,
      "loss": 0.7626,
      "step": 713810
    },
    {
      "epoch": 2.5017611372215036,
      "grad_norm": 3.078125,
      "learning_rate": 9.226721288919005e-06,
      "loss": 0.7558,
      "step": 713820
    },
    {
      "epoch": 2.5017961847283994,
      "grad_norm": 2.96875,
      "learning_rate": 9.226072260255303e-06,
      "loss": 0.8677,
      "step": 713830
    },
    {
      "epoch": 2.501831232235295,
      "grad_norm": 2.890625,
      "learning_rate": 9.225423231591599e-06,
      "loss": 0.7877,
      "step": 713840
    },
    {
      "epoch": 2.5018662797421904,
      "grad_norm": 3.453125,
      "learning_rate": 9.224774202927899e-06,
      "loss": 0.909,
      "step": 713850
    },
    {
      "epoch": 2.501901327249086,
      "grad_norm": 3.1875,
      "learning_rate": 9.224125174264197e-06,
      "loss": 0.8312,
      "step": 713860
    },
    {
      "epoch": 2.5019363747559815,
      "grad_norm": 2.890625,
      "learning_rate": 9.223476145600495e-06,
      "loss": 0.8115,
      "step": 713870
    },
    {
      "epoch": 2.5019714222628773,
      "grad_norm": 2.59375,
      "learning_rate": 9.222827116936793e-06,
      "loss": 0.7834,
      "step": 713880
    },
    {
      "epoch": 2.502006469769773,
      "grad_norm": 2.796875,
      "learning_rate": 9.22217808827309e-06,
      "loss": 0.8363,
      "step": 713890
    },
    {
      "epoch": 2.502041517276669,
      "grad_norm": 3.484375,
      "learning_rate": 9.221529059609389e-06,
      "loss": 0.8336,
      "step": 713900
    },
    {
      "epoch": 2.502076564783564,
      "grad_norm": 3.0625,
      "learning_rate": 9.220880030945687e-06,
      "loss": 0.8551,
      "step": 713910
    },
    {
      "epoch": 2.50211161229046,
      "grad_norm": 2.71875,
      "learning_rate": 9.220231002281986e-06,
      "loss": 0.8572,
      "step": 713920
    },
    {
      "epoch": 2.502146659797355,
      "grad_norm": 3.0,
      "learning_rate": 9.219581973618284e-06,
      "loss": 0.8223,
      "step": 713930
    },
    {
      "epoch": 2.502181707304251,
      "grad_norm": 2.875,
      "learning_rate": 9.21893294495458e-06,
      "loss": 0.8633,
      "step": 713940
    },
    {
      "epoch": 2.5022167548111467,
      "grad_norm": 3.125,
      "learning_rate": 9.218283916290879e-06,
      "loss": 0.7784,
      "step": 713950
    },
    {
      "epoch": 2.502251802318042,
      "grad_norm": 2.703125,
      "learning_rate": 9.217634887627177e-06,
      "loss": 0.812,
      "step": 713960
    },
    {
      "epoch": 2.5022868498249378,
      "grad_norm": 2.671875,
      "learning_rate": 9.216985858963475e-06,
      "loss": 0.8074,
      "step": 713970
    },
    {
      "epoch": 2.502321897331833,
      "grad_norm": 3.140625,
      "learning_rate": 9.216336830299774e-06,
      "loss": 0.8255,
      "step": 713980
    },
    {
      "epoch": 2.502356944838729,
      "grad_norm": 2.625,
      "learning_rate": 9.215687801636072e-06,
      "loss": 0.8012,
      "step": 713990
    },
    {
      "epoch": 2.5023919923456246,
      "grad_norm": 3.171875,
      "learning_rate": 9.21503877297237e-06,
      "loss": 0.774,
      "step": 714000
    },
    {
      "epoch": 2.5024270398525204,
      "grad_norm": 2.703125,
      "learning_rate": 9.214389744308668e-06,
      "loss": 0.8238,
      "step": 714010
    },
    {
      "epoch": 2.5024620873594157,
      "grad_norm": 3.0625,
      "learning_rate": 9.213740715644966e-06,
      "loss": 0.8694,
      "step": 714020
    },
    {
      "epoch": 2.5024971348663114,
      "grad_norm": 2.703125,
      "learning_rate": 9.213091686981264e-06,
      "loss": 0.7557,
      "step": 714030
    },
    {
      "epoch": 2.5025321823732067,
      "grad_norm": 2.796875,
      "learning_rate": 9.212442658317562e-06,
      "loss": 0.8283,
      "step": 714040
    },
    {
      "epoch": 2.5025672298801025,
      "grad_norm": 2.984375,
      "learning_rate": 9.21179362965386e-06,
      "loss": 0.7183,
      "step": 714050
    },
    {
      "epoch": 2.5026022773869983,
      "grad_norm": 3.78125,
      "learning_rate": 9.211144600990158e-06,
      "loss": 0.8772,
      "step": 714060
    },
    {
      "epoch": 2.5026373248938936,
      "grad_norm": 2.75,
      "learning_rate": 9.210495572326456e-06,
      "loss": 0.836,
      "step": 714070
    },
    {
      "epoch": 2.5026723724007893,
      "grad_norm": 3.3125,
      "learning_rate": 9.209846543662754e-06,
      "loss": 0.831,
      "step": 714080
    },
    {
      "epoch": 2.5027074199076846,
      "grad_norm": 2.734375,
      "learning_rate": 9.209197514999052e-06,
      "loss": 0.7203,
      "step": 714090
    },
    {
      "epoch": 2.5027424674145804,
      "grad_norm": 2.984375,
      "learning_rate": 9.208548486335352e-06,
      "loss": 0.8093,
      "step": 714100
    },
    {
      "epoch": 2.502777514921476,
      "grad_norm": 3.25,
      "learning_rate": 9.20789945767165e-06,
      "loss": 0.765,
      "step": 714110
    },
    {
      "epoch": 2.502812562428372,
      "grad_norm": 2.6875,
      "learning_rate": 9.207250429007948e-06,
      "loss": 0.7588,
      "step": 714120
    },
    {
      "epoch": 2.5028476099352672,
      "grad_norm": 2.96875,
      "learning_rate": 9.206601400344244e-06,
      "loss": 0.8825,
      "step": 714130
    },
    {
      "epoch": 2.502882657442163,
      "grad_norm": 2.703125,
      "learning_rate": 9.205952371680542e-06,
      "loss": 0.8123,
      "step": 714140
    },
    {
      "epoch": 2.5029177049490583,
      "grad_norm": 2.78125,
      "learning_rate": 9.20530334301684e-06,
      "loss": 0.7957,
      "step": 714150
    },
    {
      "epoch": 2.502952752455954,
      "grad_norm": 2.953125,
      "learning_rate": 9.20465431435314e-06,
      "loss": 0.8167,
      "step": 714160
    },
    {
      "epoch": 2.50298779996285,
      "grad_norm": 3.5,
      "learning_rate": 9.204005285689438e-06,
      "loss": 0.8622,
      "step": 714170
    },
    {
      "epoch": 2.503022847469745,
      "grad_norm": 3.109375,
      "learning_rate": 9.203356257025736e-06,
      "loss": 0.7557,
      "step": 714180
    },
    {
      "epoch": 2.503057894976641,
      "grad_norm": 3.171875,
      "learning_rate": 9.202707228362034e-06,
      "loss": 0.7332,
      "step": 714190
    },
    {
      "epoch": 2.503092942483536,
      "grad_norm": 3.046875,
      "learning_rate": 9.202058199698332e-06,
      "loss": 0.8569,
      "step": 714200
    },
    {
      "epoch": 2.503127989990432,
      "grad_norm": 3.046875,
      "learning_rate": 9.20140917103463e-06,
      "loss": 0.8038,
      "step": 714210
    },
    {
      "epoch": 2.5031630374973277,
      "grad_norm": 2.953125,
      "learning_rate": 9.200760142370928e-06,
      "loss": 0.7731,
      "step": 714220
    },
    {
      "epoch": 2.5031980850042235,
      "grad_norm": 2.625,
      "learning_rate": 9.200111113707226e-06,
      "loss": 0.7494,
      "step": 714230
    },
    {
      "epoch": 2.503233132511119,
      "grad_norm": 2.75,
      "learning_rate": 9.199462085043524e-06,
      "loss": 0.7539,
      "step": 714240
    },
    {
      "epoch": 2.5032681800180145,
      "grad_norm": 2.25,
      "learning_rate": 9.198813056379822e-06,
      "loss": 0.838,
      "step": 714250
    },
    {
      "epoch": 2.50330322752491,
      "grad_norm": 3.15625,
      "learning_rate": 9.19816402771612e-06,
      "loss": 0.8136,
      "step": 714260
    },
    {
      "epoch": 2.5033382750318056,
      "grad_norm": 2.78125,
      "learning_rate": 9.197514999052418e-06,
      "loss": 0.7541,
      "step": 714270
    },
    {
      "epoch": 2.5033733225387014,
      "grad_norm": 2.9375,
      "learning_rate": 9.196865970388716e-06,
      "loss": 0.8474,
      "step": 714280
    },
    {
      "epoch": 2.5034083700455967,
      "grad_norm": 3.046875,
      "learning_rate": 9.196216941725016e-06,
      "loss": 0.8577,
      "step": 714290
    },
    {
      "epoch": 2.5034434175524924,
      "grad_norm": 3.234375,
      "learning_rate": 9.195567913061314e-06,
      "loss": 0.7657,
      "step": 714300
    },
    {
      "epoch": 2.5034784650593878,
      "grad_norm": 2.796875,
      "learning_rate": 9.19491888439761e-06,
      "loss": 0.7425,
      "step": 714310
    },
    {
      "epoch": 2.5035135125662835,
      "grad_norm": 3.28125,
      "learning_rate": 9.194269855733908e-06,
      "loss": 0.8659,
      "step": 714320
    },
    {
      "epoch": 2.5035485600731793,
      "grad_norm": 2.65625,
      "learning_rate": 9.193620827070206e-06,
      "loss": 0.8135,
      "step": 714330
    },
    {
      "epoch": 2.503583607580075,
      "grad_norm": 3.46875,
      "learning_rate": 9.192971798406506e-06,
      "loss": 0.7961,
      "step": 714340
    },
    {
      "epoch": 2.5036186550869703,
      "grad_norm": 3.234375,
      "learning_rate": 9.192322769742804e-06,
      "loss": 0.8094,
      "step": 714350
    },
    {
      "epoch": 2.503653702593866,
      "grad_norm": 2.671875,
      "learning_rate": 9.191673741079102e-06,
      "loss": 0.7973,
      "step": 714360
    },
    {
      "epoch": 2.5036887501007614,
      "grad_norm": 2.8125,
      "learning_rate": 9.1910247124154e-06,
      "loss": 0.7935,
      "step": 714370
    },
    {
      "epoch": 2.503723797607657,
      "grad_norm": 2.953125,
      "learning_rate": 9.190375683751698e-06,
      "loss": 0.8269,
      "step": 714380
    },
    {
      "epoch": 2.503758845114553,
      "grad_norm": 3.40625,
      "learning_rate": 9.189726655087996e-06,
      "loss": 0.8731,
      "step": 714390
    },
    {
      "epoch": 2.5037938926214482,
      "grad_norm": 3.328125,
      "learning_rate": 9.189077626424294e-06,
      "loss": 0.7987,
      "step": 714400
    },
    {
      "epoch": 2.503828940128344,
      "grad_norm": 3.0,
      "learning_rate": 9.188428597760592e-06,
      "loss": 0.677,
      "step": 714410
    },
    {
      "epoch": 2.5038639876352393,
      "grad_norm": 3.0625,
      "learning_rate": 9.18777956909689e-06,
      "loss": 0.7783,
      "step": 714420
    },
    {
      "epoch": 2.503899035142135,
      "grad_norm": 2.421875,
      "learning_rate": 9.187130540433188e-06,
      "loss": 0.718,
      "step": 714430
    },
    {
      "epoch": 2.503934082649031,
      "grad_norm": 3.078125,
      "learning_rate": 9.186481511769486e-06,
      "loss": 0.8343,
      "step": 714440
    },
    {
      "epoch": 2.5039691301559266,
      "grad_norm": 2.703125,
      "learning_rate": 9.185832483105784e-06,
      "loss": 0.8392,
      "step": 714450
    },
    {
      "epoch": 2.504004177662822,
      "grad_norm": 2.671875,
      "learning_rate": 9.185183454442082e-06,
      "loss": 0.7433,
      "step": 714460
    },
    {
      "epoch": 2.5040392251697177,
      "grad_norm": 2.515625,
      "learning_rate": 9.184534425778382e-06,
      "loss": 0.7992,
      "step": 714470
    },
    {
      "epoch": 2.504074272676613,
      "grad_norm": 2.921875,
      "learning_rate": 9.18388539711468e-06,
      "loss": 0.7711,
      "step": 714480
    },
    {
      "epoch": 2.5041093201835087,
      "grad_norm": 2.53125,
      "learning_rate": 9.183236368450978e-06,
      "loss": 0.7264,
      "step": 714490
    },
    {
      "epoch": 2.5041443676904045,
      "grad_norm": 3.296875,
      "learning_rate": 9.182587339787274e-06,
      "loss": 0.8803,
      "step": 714500
    },
    {
      "epoch": 2.5041794151973,
      "grad_norm": 2.421875,
      "learning_rate": 9.181938311123572e-06,
      "loss": 0.7899,
      "step": 714510
    },
    {
      "epoch": 2.5042144627041956,
      "grad_norm": 2.953125,
      "learning_rate": 9.18128928245987e-06,
      "loss": 0.7909,
      "step": 714520
    },
    {
      "epoch": 2.504249510211091,
      "grad_norm": 3.1875,
      "learning_rate": 9.18064025379617e-06,
      "loss": 0.8354,
      "step": 714530
    },
    {
      "epoch": 2.5042845577179866,
      "grad_norm": 3.265625,
      "learning_rate": 9.179991225132468e-06,
      "loss": 0.8357,
      "step": 714540
    },
    {
      "epoch": 2.5043196052248824,
      "grad_norm": 2.875,
      "learning_rate": 9.179342196468766e-06,
      "loss": 0.8094,
      "step": 714550
    },
    {
      "epoch": 2.504354652731778,
      "grad_norm": 2.8125,
      "learning_rate": 9.178693167805064e-06,
      "loss": 0.8018,
      "step": 714560
    },
    {
      "epoch": 2.5043897002386735,
      "grad_norm": 2.390625,
      "learning_rate": 9.178044139141362e-06,
      "loss": 0.7906,
      "step": 714570
    },
    {
      "epoch": 2.5044247477455692,
      "grad_norm": 3.0,
      "learning_rate": 9.17739511047766e-06,
      "loss": 0.7935,
      "step": 714580
    },
    {
      "epoch": 2.5044597952524645,
      "grad_norm": 2.828125,
      "learning_rate": 9.176746081813958e-06,
      "loss": 0.793,
      "step": 714590
    },
    {
      "epoch": 2.5044948427593603,
      "grad_norm": 2.78125,
      "learning_rate": 9.176097053150256e-06,
      "loss": 0.799,
      "step": 714600
    },
    {
      "epoch": 2.504529890266256,
      "grad_norm": 2.921875,
      "learning_rate": 9.175448024486554e-06,
      "loss": 0.8279,
      "step": 714610
    },
    {
      "epoch": 2.5045649377731514,
      "grad_norm": 2.859375,
      "learning_rate": 9.174798995822852e-06,
      "loss": 0.8036,
      "step": 714620
    },
    {
      "epoch": 2.504599985280047,
      "grad_norm": 2.96875,
      "learning_rate": 9.17414996715915e-06,
      "loss": 0.7584,
      "step": 714630
    },
    {
      "epoch": 2.5046350327869424,
      "grad_norm": 2.640625,
      "learning_rate": 9.173500938495448e-06,
      "loss": 0.7651,
      "step": 714640
    },
    {
      "epoch": 2.504670080293838,
      "grad_norm": 2.515625,
      "learning_rate": 9.172851909831747e-06,
      "loss": 0.741,
      "step": 714650
    },
    {
      "epoch": 2.504705127800734,
      "grad_norm": 2.53125,
      "learning_rate": 9.172202881168045e-06,
      "loss": 0.75,
      "step": 714660
    },
    {
      "epoch": 2.5047401753076297,
      "grad_norm": 3.34375,
      "learning_rate": 9.171553852504343e-06,
      "loss": 0.8051,
      "step": 714670
    },
    {
      "epoch": 2.504775222814525,
      "grad_norm": 2.8125,
      "learning_rate": 9.170904823840641e-06,
      "loss": 0.7928,
      "step": 714680
    },
    {
      "epoch": 2.504810270321421,
      "grad_norm": 2.921875,
      "learning_rate": 9.170255795176938e-06,
      "loss": 0.7686,
      "step": 714690
    },
    {
      "epoch": 2.504845317828316,
      "grad_norm": 2.71875,
      "learning_rate": 9.169606766513236e-06,
      "loss": 0.8195,
      "step": 714700
    },
    {
      "epoch": 2.504880365335212,
      "grad_norm": 2.984375,
      "learning_rate": 9.168957737849535e-06,
      "loss": 0.8343,
      "step": 714710
    },
    {
      "epoch": 2.5049154128421076,
      "grad_norm": 3.296875,
      "learning_rate": 9.168308709185833e-06,
      "loss": 0.8318,
      "step": 714720
    },
    {
      "epoch": 2.504950460349003,
      "grad_norm": 2.640625,
      "learning_rate": 9.167659680522131e-06,
      "loss": 0.7874,
      "step": 714730
    },
    {
      "epoch": 2.5049855078558987,
      "grad_norm": 3.328125,
      "learning_rate": 9.16701065185843e-06,
      "loss": 0.8917,
      "step": 714740
    },
    {
      "epoch": 2.505020555362794,
      "grad_norm": 3.140625,
      "learning_rate": 9.166361623194727e-06,
      "loss": 0.805,
      "step": 714750
    },
    {
      "epoch": 2.5050556028696898,
      "grad_norm": 2.296875,
      "learning_rate": 9.165712594531025e-06,
      "loss": 0.8206,
      "step": 714760
    },
    {
      "epoch": 2.5050906503765855,
      "grad_norm": 2.9375,
      "learning_rate": 9.165063565867323e-06,
      "loss": 0.8033,
      "step": 714770
    },
    {
      "epoch": 2.5051256978834813,
      "grad_norm": 3.25,
      "learning_rate": 9.164414537203621e-06,
      "loss": 0.7365,
      "step": 714780
    },
    {
      "epoch": 2.5051607453903766,
      "grad_norm": 2.96875,
      "learning_rate": 9.16376550853992e-06,
      "loss": 0.818,
      "step": 714790
    },
    {
      "epoch": 2.5051957928972723,
      "grad_norm": 2.84375,
      "learning_rate": 9.163116479876217e-06,
      "loss": 0.7754,
      "step": 714800
    },
    {
      "epoch": 2.5052308404041677,
      "grad_norm": 2.765625,
      "learning_rate": 9.162467451212515e-06,
      "loss": 0.7419,
      "step": 714810
    },
    {
      "epoch": 2.5052658879110634,
      "grad_norm": 3.015625,
      "learning_rate": 9.161818422548813e-06,
      "loss": 0.7863,
      "step": 714820
    },
    {
      "epoch": 2.505300935417959,
      "grad_norm": 2.703125,
      "learning_rate": 9.161169393885111e-06,
      "loss": 0.7941,
      "step": 714830
    },
    {
      "epoch": 2.5053359829248545,
      "grad_norm": 2.75,
      "learning_rate": 9.160520365221411e-06,
      "loss": 0.8403,
      "step": 714840
    },
    {
      "epoch": 2.5053710304317502,
      "grad_norm": 2.953125,
      "learning_rate": 9.159871336557709e-06,
      "loss": 0.8435,
      "step": 714850
    },
    {
      "epoch": 2.5054060779386456,
      "grad_norm": 2.78125,
      "learning_rate": 9.159222307894007e-06,
      "loss": 0.7661,
      "step": 714860
    },
    {
      "epoch": 2.5054411254455413,
      "grad_norm": 2.984375,
      "learning_rate": 9.158573279230305e-06,
      "loss": 0.7959,
      "step": 714870
    },
    {
      "epoch": 2.505476172952437,
      "grad_norm": 2.734375,
      "learning_rate": 9.157924250566601e-06,
      "loss": 0.741,
      "step": 714880
    },
    {
      "epoch": 2.505511220459333,
      "grad_norm": 2.984375,
      "learning_rate": 9.157275221902901e-06,
      "loss": 0.8003,
      "step": 714890
    },
    {
      "epoch": 2.505546267966228,
      "grad_norm": 2.859375,
      "learning_rate": 9.156626193239199e-06,
      "loss": 0.8192,
      "step": 714900
    },
    {
      "epoch": 2.505581315473124,
      "grad_norm": 2.296875,
      "learning_rate": 9.155977164575497e-06,
      "loss": 0.7514,
      "step": 714910
    },
    {
      "epoch": 2.505616362980019,
      "grad_norm": 2.828125,
      "learning_rate": 9.155328135911795e-06,
      "loss": 0.8456,
      "step": 714920
    },
    {
      "epoch": 2.505651410486915,
      "grad_norm": 3.0625,
      "learning_rate": 9.154679107248093e-06,
      "loss": 0.7503,
      "step": 714930
    },
    {
      "epoch": 2.5056864579938107,
      "grad_norm": 3.125,
      "learning_rate": 9.154030078584391e-06,
      "loss": 0.805,
      "step": 714940
    },
    {
      "epoch": 2.505721505500706,
      "grad_norm": 3.140625,
      "learning_rate": 9.153381049920689e-06,
      "loss": 0.7736,
      "step": 714950
    },
    {
      "epoch": 2.505756553007602,
      "grad_norm": 2.828125,
      "learning_rate": 9.152732021256989e-06,
      "loss": 0.8276,
      "step": 714960
    },
    {
      "epoch": 2.5057916005144976,
      "grad_norm": 2.859375,
      "learning_rate": 9.152082992593285e-06,
      "loss": 0.7924,
      "step": 714970
    },
    {
      "epoch": 2.505826648021393,
      "grad_norm": 3.1875,
      "learning_rate": 9.151433963929583e-06,
      "loss": 0.854,
      "step": 714980
    },
    {
      "epoch": 2.5058616955282886,
      "grad_norm": 2.640625,
      "learning_rate": 9.150784935265881e-06,
      "loss": 0.7652,
      "step": 714990
    },
    {
      "epoch": 2.5058967430351844,
      "grad_norm": 2.859375,
      "learning_rate": 9.150135906602179e-06,
      "loss": 0.8104,
      "step": 715000
    },
    {
      "epoch": 2.5058967430351844,
      "eval_loss": 0.7565529346466064,
      "eval_runtime": 557.9201,
      "eval_samples_per_second": 681.883,
      "eval_steps_per_second": 56.824,
      "step": 715000
    },
    {
      "epoch": 2.5059317905420797,
      "grad_norm": 2.984375,
      "learning_rate": 9.149486877938477e-06,
      "loss": 0.7792,
      "step": 715010
    },
    {
      "epoch": 2.5059668380489755,
      "grad_norm": 2.71875,
      "learning_rate": 9.148837849274777e-06,
      "loss": 0.8228,
      "step": 715020
    },
    {
      "epoch": 2.506001885555871,
      "grad_norm": 3.28125,
      "learning_rate": 9.148188820611075e-06,
      "loss": 0.8215,
      "step": 715030
    },
    {
      "epoch": 2.5060369330627665,
      "grad_norm": 3.1875,
      "learning_rate": 9.147539791947373e-06,
      "loss": 0.843,
      "step": 715040
    },
    {
      "epoch": 2.5060719805696623,
      "grad_norm": 2.9375,
      "learning_rate": 9.14689076328367e-06,
      "loss": 0.7983,
      "step": 715050
    },
    {
      "epoch": 2.5061070280765576,
      "grad_norm": 3.0625,
      "learning_rate": 9.146241734619969e-06,
      "loss": 0.7936,
      "step": 715060
    },
    {
      "epoch": 2.5061420755834534,
      "grad_norm": 2.828125,
      "learning_rate": 9.145592705956265e-06,
      "loss": 0.7901,
      "step": 715070
    },
    {
      "epoch": 2.506177123090349,
      "grad_norm": 2.703125,
      "learning_rate": 9.144943677292565e-06,
      "loss": 0.8261,
      "step": 715080
    },
    {
      "epoch": 2.5062121705972444,
      "grad_norm": 3.140625,
      "learning_rate": 9.144294648628863e-06,
      "loss": 0.7729,
      "step": 715090
    },
    {
      "epoch": 2.50624721810414,
      "grad_norm": 2.640625,
      "learning_rate": 9.14364561996516e-06,
      "loss": 0.7859,
      "step": 715100
    },
    {
      "epoch": 2.506282265611036,
      "grad_norm": 2.9375,
      "learning_rate": 9.142996591301459e-06,
      "loss": 0.8036,
      "step": 715110
    },
    {
      "epoch": 2.5063173131179313,
      "grad_norm": 3.015625,
      "learning_rate": 9.142347562637757e-06,
      "loss": 0.8082,
      "step": 715120
    },
    {
      "epoch": 2.506352360624827,
      "grad_norm": 2.703125,
      "learning_rate": 9.141698533974055e-06,
      "loss": 0.7915,
      "step": 715130
    },
    {
      "epoch": 2.5063874081317223,
      "grad_norm": 2.921875,
      "learning_rate": 9.141049505310353e-06,
      "loss": 0.7415,
      "step": 715140
    },
    {
      "epoch": 2.506422455638618,
      "grad_norm": 3.125,
      "learning_rate": 9.140400476646652e-06,
      "loss": 0.6931,
      "step": 715150
    },
    {
      "epoch": 2.506457503145514,
      "grad_norm": 2.71875,
      "learning_rate": 9.139751447982949e-06,
      "loss": 0.8615,
      "step": 715160
    },
    {
      "epoch": 2.5064925506524096,
      "grad_norm": 2.625,
      "learning_rate": 9.139102419319247e-06,
      "loss": 0.7888,
      "step": 715170
    },
    {
      "epoch": 2.506527598159305,
      "grad_norm": 3.09375,
      "learning_rate": 9.138453390655545e-06,
      "loss": 0.8088,
      "step": 715180
    },
    {
      "epoch": 2.5065626456662007,
      "grad_norm": 2.78125,
      "learning_rate": 9.137804361991843e-06,
      "loss": 0.8208,
      "step": 715190
    },
    {
      "epoch": 2.506597693173096,
      "grad_norm": 3.3125,
      "learning_rate": 9.137155333328142e-06,
      "loss": 0.8056,
      "step": 715200
    },
    {
      "epoch": 2.5066327406799918,
      "grad_norm": 2.890625,
      "learning_rate": 9.13650630466444e-06,
      "loss": 0.8937,
      "step": 715210
    },
    {
      "epoch": 2.5066677881868875,
      "grad_norm": 3.3125,
      "learning_rate": 9.135857276000738e-06,
      "loss": 0.8022,
      "step": 715220
    },
    {
      "epoch": 2.506702835693783,
      "grad_norm": 2.765625,
      "learning_rate": 9.135208247337036e-06,
      "loss": 0.7482,
      "step": 715230
    },
    {
      "epoch": 2.5067378832006786,
      "grad_norm": 2.984375,
      "learning_rate": 9.134559218673334e-06,
      "loss": 0.7495,
      "step": 715240
    },
    {
      "epoch": 2.506772930707574,
      "grad_norm": 2.671875,
      "learning_rate": 9.13391019000963e-06,
      "loss": 0.8336,
      "step": 715250
    },
    {
      "epoch": 2.5068079782144697,
      "grad_norm": 2.640625,
      "learning_rate": 9.13326116134593e-06,
      "loss": 0.7157,
      "step": 715260
    },
    {
      "epoch": 2.5068430257213654,
      "grad_norm": 2.90625,
      "learning_rate": 9.132612132682228e-06,
      "loss": 0.8363,
      "step": 715270
    },
    {
      "epoch": 2.506878073228261,
      "grad_norm": 3.46875,
      "learning_rate": 9.131963104018526e-06,
      "loss": 0.7838,
      "step": 715280
    },
    {
      "epoch": 2.5069131207351565,
      "grad_norm": 2.734375,
      "learning_rate": 9.131314075354824e-06,
      "loss": 0.7422,
      "step": 715290
    },
    {
      "epoch": 2.5069481682420522,
      "grad_norm": 2.53125,
      "learning_rate": 9.130665046691122e-06,
      "loss": 0.7642,
      "step": 715300
    },
    {
      "epoch": 2.5069832157489476,
      "grad_norm": 2.9375,
      "learning_rate": 9.13001601802742e-06,
      "loss": 0.7941,
      "step": 715310
    },
    {
      "epoch": 2.5070182632558433,
      "grad_norm": 3.171875,
      "learning_rate": 9.129366989363718e-06,
      "loss": 0.8041,
      "step": 715320
    },
    {
      "epoch": 2.507053310762739,
      "grad_norm": 3.015625,
      "learning_rate": 9.128717960700018e-06,
      "loss": 0.779,
      "step": 715330
    },
    {
      "epoch": 2.5070883582696344,
      "grad_norm": 2.5625,
      "learning_rate": 9.128068932036316e-06,
      "loss": 0.7822,
      "step": 715340
    },
    {
      "epoch": 2.50712340577653,
      "grad_norm": 2.21875,
      "learning_rate": 9.127419903372612e-06,
      "loss": 0.7655,
      "step": 715350
    },
    {
      "epoch": 2.5071584532834255,
      "grad_norm": 2.484375,
      "learning_rate": 9.12677087470891e-06,
      "loss": 0.8173,
      "step": 715360
    },
    {
      "epoch": 2.507193500790321,
      "grad_norm": 2.640625,
      "learning_rate": 9.126121846045208e-06,
      "loss": 0.8076,
      "step": 715370
    },
    {
      "epoch": 2.507228548297217,
      "grad_norm": 2.828125,
      "learning_rate": 9.125472817381506e-06,
      "loss": 0.8149,
      "step": 715380
    },
    {
      "epoch": 2.5072635958041127,
      "grad_norm": 3.078125,
      "learning_rate": 9.124823788717806e-06,
      "loss": 0.9089,
      "step": 715390
    },
    {
      "epoch": 2.507298643311008,
      "grad_norm": 3.390625,
      "learning_rate": 9.124174760054104e-06,
      "loss": 0.8198,
      "step": 715400
    },
    {
      "epoch": 2.507333690817904,
      "grad_norm": 3.125,
      "learning_rate": 9.123525731390402e-06,
      "loss": 0.8594,
      "step": 715410
    },
    {
      "epoch": 2.507368738324799,
      "grad_norm": 2.96875,
      "learning_rate": 9.1228767027267e-06,
      "loss": 0.8365,
      "step": 715420
    },
    {
      "epoch": 2.507403785831695,
      "grad_norm": 3.1875,
      "learning_rate": 9.122227674062998e-06,
      "loss": 0.8893,
      "step": 715430
    },
    {
      "epoch": 2.5074388333385906,
      "grad_norm": 2.84375,
      "learning_rate": 9.121578645399296e-06,
      "loss": 0.8415,
      "step": 715440
    },
    {
      "epoch": 2.507473880845486,
      "grad_norm": 2.78125,
      "learning_rate": 9.120929616735594e-06,
      "loss": 0.7606,
      "step": 715450
    },
    {
      "epoch": 2.5075089283523817,
      "grad_norm": 3.046875,
      "learning_rate": 9.120280588071892e-06,
      "loss": 0.8378,
      "step": 715460
    },
    {
      "epoch": 2.507543975859277,
      "grad_norm": 3.109375,
      "learning_rate": 9.11963155940819e-06,
      "loss": 0.7978,
      "step": 715470
    },
    {
      "epoch": 2.5075790233661728,
      "grad_norm": 3.03125,
      "learning_rate": 9.118982530744488e-06,
      "loss": 0.7913,
      "step": 715480
    },
    {
      "epoch": 2.5076140708730685,
      "grad_norm": 2.859375,
      "learning_rate": 9.118333502080786e-06,
      "loss": 0.8383,
      "step": 715490
    },
    {
      "epoch": 2.5076491183799643,
      "grad_norm": 2.65625,
      "learning_rate": 9.117684473417084e-06,
      "loss": 0.7729,
      "step": 715500
    },
    {
      "epoch": 2.5076841658868596,
      "grad_norm": 2.65625,
      "learning_rate": 9.117035444753384e-06,
      "loss": 0.7645,
      "step": 715510
    },
    {
      "epoch": 2.5077192133937554,
      "grad_norm": 2.609375,
      "learning_rate": 9.116386416089682e-06,
      "loss": 0.8199,
      "step": 715520
    },
    {
      "epoch": 2.5077542609006507,
      "grad_norm": 3.15625,
      "learning_rate": 9.11573738742598e-06,
      "loss": 0.8992,
      "step": 715530
    },
    {
      "epoch": 2.5077893084075464,
      "grad_norm": 2.8125,
      "learning_rate": 9.115088358762276e-06,
      "loss": 0.8517,
      "step": 715540
    },
    {
      "epoch": 2.507824355914442,
      "grad_norm": 2.765625,
      "learning_rate": 9.114439330098574e-06,
      "loss": 0.7185,
      "step": 715550
    },
    {
      "epoch": 2.5078594034213375,
      "grad_norm": 2.828125,
      "learning_rate": 9.113790301434872e-06,
      "loss": 0.7949,
      "step": 715560
    },
    {
      "epoch": 2.5078944509282333,
      "grad_norm": 3.234375,
      "learning_rate": 9.113141272771172e-06,
      "loss": 0.8619,
      "step": 715570
    },
    {
      "epoch": 2.5079294984351286,
      "grad_norm": 2.84375,
      "learning_rate": 9.11249224410747e-06,
      "loss": 0.7707,
      "step": 715580
    },
    {
      "epoch": 2.5079645459420243,
      "grad_norm": 3.125,
      "learning_rate": 9.111843215443768e-06,
      "loss": 0.8375,
      "step": 715590
    },
    {
      "epoch": 2.50799959344892,
      "grad_norm": 3.140625,
      "learning_rate": 9.111194186780066e-06,
      "loss": 0.8366,
      "step": 715600
    },
    {
      "epoch": 2.508034640955816,
      "grad_norm": 3.28125,
      "learning_rate": 9.110545158116364e-06,
      "loss": 0.7667,
      "step": 715610
    },
    {
      "epoch": 2.508069688462711,
      "grad_norm": 2.828125,
      "learning_rate": 9.109896129452662e-06,
      "loss": 0.8221,
      "step": 715620
    },
    {
      "epoch": 2.508104735969607,
      "grad_norm": 2.59375,
      "learning_rate": 9.10924710078896e-06,
      "loss": 0.8295,
      "step": 715630
    },
    {
      "epoch": 2.5081397834765022,
      "grad_norm": 2.765625,
      "learning_rate": 9.108598072125258e-06,
      "loss": 0.7504,
      "step": 715640
    },
    {
      "epoch": 2.508174830983398,
      "grad_norm": 3.0625,
      "learning_rate": 9.107949043461556e-06,
      "loss": 0.872,
      "step": 715650
    },
    {
      "epoch": 2.5082098784902938,
      "grad_norm": 3.125,
      "learning_rate": 9.107300014797854e-06,
      "loss": 0.8396,
      "step": 715660
    },
    {
      "epoch": 2.508244925997189,
      "grad_norm": 2.953125,
      "learning_rate": 9.106650986134152e-06,
      "loss": 0.8175,
      "step": 715670
    },
    {
      "epoch": 2.508279973504085,
      "grad_norm": 2.671875,
      "learning_rate": 9.10600195747045e-06,
      "loss": 0.7492,
      "step": 715680
    },
    {
      "epoch": 2.50831502101098,
      "grad_norm": 2.90625,
      "learning_rate": 9.105352928806748e-06,
      "loss": 0.8012,
      "step": 715690
    },
    {
      "epoch": 2.508350068517876,
      "grad_norm": 2.96875,
      "learning_rate": 9.104703900143047e-06,
      "loss": 0.7906,
      "step": 715700
    },
    {
      "epoch": 2.5083851160247717,
      "grad_norm": 2.953125,
      "learning_rate": 9.104054871479345e-06,
      "loss": 0.787,
      "step": 715710
    },
    {
      "epoch": 2.5084201635316674,
      "grad_norm": 3.015625,
      "learning_rate": 9.103405842815643e-06,
      "loss": 0.7986,
      "step": 715720
    },
    {
      "epoch": 2.5084552110385627,
      "grad_norm": 2.734375,
      "learning_rate": 9.10275681415194e-06,
      "loss": 0.8032,
      "step": 715730
    },
    {
      "epoch": 2.5084902585454585,
      "grad_norm": 2.796875,
      "learning_rate": 9.102107785488238e-06,
      "loss": 0.713,
      "step": 715740
    },
    {
      "epoch": 2.508525306052354,
      "grad_norm": 2.671875,
      "learning_rate": 9.101458756824537e-06,
      "loss": 0.8255,
      "step": 715750
    },
    {
      "epoch": 2.5085603535592496,
      "grad_norm": 3.25,
      "learning_rate": 9.100809728160835e-06,
      "loss": 0.778,
      "step": 715760
    },
    {
      "epoch": 2.5085954010661453,
      "grad_norm": 2.75,
      "learning_rate": 9.100160699497133e-06,
      "loss": 0.7545,
      "step": 715770
    },
    {
      "epoch": 2.5086304485730406,
      "grad_norm": 3.0,
      "learning_rate": 9.099511670833431e-06,
      "loss": 0.8205,
      "step": 715780
    },
    {
      "epoch": 2.5086654960799364,
      "grad_norm": 2.796875,
      "learning_rate": 9.09886264216973e-06,
      "loss": 0.7421,
      "step": 715790
    },
    {
      "epoch": 2.5087005435868317,
      "grad_norm": 3.09375,
      "learning_rate": 9.098213613506027e-06,
      "loss": 0.7912,
      "step": 715800
    },
    {
      "epoch": 2.5087355910937275,
      "grad_norm": 3.03125,
      "learning_rate": 9.097564584842325e-06,
      "loss": 0.7234,
      "step": 715810
    },
    {
      "epoch": 2.508770638600623,
      "grad_norm": 2.578125,
      "learning_rate": 9.096915556178623e-06,
      "loss": 0.7809,
      "step": 715820
    },
    {
      "epoch": 2.508805686107519,
      "grad_norm": 3.328125,
      "learning_rate": 9.096266527514921e-06,
      "loss": 0.8404,
      "step": 715830
    },
    {
      "epoch": 2.5088407336144143,
      "grad_norm": 2.90625,
      "learning_rate": 9.09561749885122e-06,
      "loss": 0.8409,
      "step": 715840
    },
    {
      "epoch": 2.50887578112131,
      "grad_norm": 2.859375,
      "learning_rate": 9.094968470187517e-06,
      "loss": 0.8275,
      "step": 715850
    },
    {
      "epoch": 2.5089108286282054,
      "grad_norm": 2.734375,
      "learning_rate": 9.094319441523815e-06,
      "loss": 0.85,
      "step": 715860
    },
    {
      "epoch": 2.508945876135101,
      "grad_norm": 3.046875,
      "learning_rate": 9.093670412860113e-06,
      "loss": 0.7932,
      "step": 715870
    },
    {
      "epoch": 2.508980923641997,
      "grad_norm": 3.140625,
      "learning_rate": 9.093021384196413e-06,
      "loss": 0.7674,
      "step": 715880
    },
    {
      "epoch": 2.509015971148892,
      "grad_norm": 3.421875,
      "learning_rate": 9.092372355532711e-06,
      "loss": 0.773,
      "step": 715890
    },
    {
      "epoch": 2.509051018655788,
      "grad_norm": 3.390625,
      "learning_rate": 9.091723326869009e-06,
      "loss": 0.8595,
      "step": 715900
    },
    {
      "epoch": 2.5090860661626833,
      "grad_norm": 2.9375,
      "learning_rate": 9.091074298205305e-06,
      "loss": 0.8199,
      "step": 715910
    },
    {
      "epoch": 2.509121113669579,
      "grad_norm": 3.265625,
      "learning_rate": 9.090425269541603e-06,
      "loss": 0.7787,
      "step": 715920
    },
    {
      "epoch": 2.5091561611764748,
      "grad_norm": 3.015625,
      "learning_rate": 9.089776240877901e-06,
      "loss": 0.7975,
      "step": 715930
    },
    {
      "epoch": 2.5091912086833705,
      "grad_norm": 2.90625,
      "learning_rate": 9.089127212214201e-06,
      "loss": 0.817,
      "step": 715940
    },
    {
      "epoch": 2.509226256190266,
      "grad_norm": 2.6875,
      "learning_rate": 9.088478183550499e-06,
      "loss": 0.6907,
      "step": 715950
    },
    {
      "epoch": 2.5092613036971616,
      "grad_norm": 2.5,
      "learning_rate": 9.087829154886797e-06,
      "loss": 0.7723,
      "step": 715960
    },
    {
      "epoch": 2.509296351204057,
      "grad_norm": 3.296875,
      "learning_rate": 9.087180126223095e-06,
      "loss": 0.833,
      "step": 715970
    },
    {
      "epoch": 2.5093313987109527,
      "grad_norm": 2.84375,
      "learning_rate": 9.086531097559393e-06,
      "loss": 0.7634,
      "step": 715980
    },
    {
      "epoch": 2.5093664462178484,
      "grad_norm": 3.390625,
      "learning_rate": 9.085882068895691e-06,
      "loss": 0.7795,
      "step": 715990
    },
    {
      "epoch": 2.5094014937247437,
      "grad_norm": 2.65625,
      "learning_rate": 9.08523304023199e-06,
      "loss": 0.6917,
      "step": 716000
    },
    {
      "epoch": 2.5094365412316395,
      "grad_norm": 3.0,
      "learning_rate": 9.084584011568287e-06,
      "loss": 0.8359,
      "step": 716010
    },
    {
      "epoch": 2.509471588738535,
      "grad_norm": 3.125,
      "learning_rate": 9.083934982904585e-06,
      "loss": 0.7569,
      "step": 716020
    },
    {
      "epoch": 2.5095066362454306,
      "grad_norm": 3.28125,
      "learning_rate": 9.083285954240883e-06,
      "loss": 0.8135,
      "step": 716030
    },
    {
      "epoch": 2.5095416837523263,
      "grad_norm": 2.765625,
      "learning_rate": 9.082636925577181e-06,
      "loss": 0.6612,
      "step": 716040
    },
    {
      "epoch": 2.509576731259222,
      "grad_norm": 2.71875,
      "learning_rate": 9.081987896913479e-06,
      "loss": 0.7571,
      "step": 716050
    },
    {
      "epoch": 2.5096117787661174,
      "grad_norm": 3.0,
      "learning_rate": 9.081338868249779e-06,
      "loss": 0.8878,
      "step": 716060
    },
    {
      "epoch": 2.509646826273013,
      "grad_norm": 2.734375,
      "learning_rate": 9.080689839586077e-06,
      "loss": 0.8003,
      "step": 716070
    },
    {
      "epoch": 2.5096818737799085,
      "grad_norm": 3.3125,
      "learning_rate": 9.080040810922375e-06,
      "loss": 0.8086,
      "step": 716080
    },
    {
      "epoch": 2.5097169212868042,
      "grad_norm": 3.171875,
      "learning_rate": 9.079391782258673e-06,
      "loss": 0.8233,
      "step": 716090
    },
    {
      "epoch": 2.5097519687937,
      "grad_norm": 2.546875,
      "learning_rate": 9.078742753594969e-06,
      "loss": 0.7471,
      "step": 716100
    },
    {
      "epoch": 2.5097870163005953,
      "grad_norm": 2.765625,
      "learning_rate": 9.078093724931267e-06,
      "loss": 0.7085,
      "step": 716110
    },
    {
      "epoch": 2.509822063807491,
      "grad_norm": 2.8125,
      "learning_rate": 9.077444696267567e-06,
      "loss": 0.7673,
      "step": 716120
    },
    {
      "epoch": 2.5098571113143864,
      "grad_norm": 2.96875,
      "learning_rate": 9.076795667603865e-06,
      "loss": 0.7445,
      "step": 716130
    },
    {
      "epoch": 2.509892158821282,
      "grad_norm": 3.71875,
      "learning_rate": 9.076146638940163e-06,
      "loss": 0.8437,
      "step": 716140
    },
    {
      "epoch": 2.509927206328178,
      "grad_norm": 2.703125,
      "learning_rate": 9.07549761027646e-06,
      "loss": 0.733,
      "step": 716150
    },
    {
      "epoch": 2.5099622538350737,
      "grad_norm": 2.890625,
      "learning_rate": 9.074848581612759e-06,
      "loss": 0.804,
      "step": 716160
    },
    {
      "epoch": 2.509997301341969,
      "grad_norm": 3.171875,
      "learning_rate": 9.074199552949057e-06,
      "loss": 0.862,
      "step": 716170
    },
    {
      "epoch": 2.5100323488488647,
      "grad_norm": 2.78125,
      "learning_rate": 9.073550524285355e-06,
      "loss": 0.7628,
      "step": 716180
    },
    {
      "epoch": 2.51006739635576,
      "grad_norm": 2.796875,
      "learning_rate": 9.072901495621654e-06,
      "loss": 0.7752,
      "step": 716190
    },
    {
      "epoch": 2.510102443862656,
      "grad_norm": 3.25,
      "learning_rate": 9.07225246695795e-06,
      "loss": 0.8406,
      "step": 716200
    },
    {
      "epoch": 2.5101374913695516,
      "grad_norm": 2.6875,
      "learning_rate": 9.071603438294249e-06,
      "loss": 0.8015,
      "step": 716210
    },
    {
      "epoch": 2.510172538876447,
      "grad_norm": 2.90625,
      "learning_rate": 9.070954409630547e-06,
      "loss": 0.9113,
      "step": 716220
    },
    {
      "epoch": 2.5102075863833426,
      "grad_norm": 2.96875,
      "learning_rate": 9.070305380966845e-06,
      "loss": 0.7945,
      "step": 716230
    },
    {
      "epoch": 2.5102426338902384,
      "grad_norm": 2.875,
      "learning_rate": 9.069656352303144e-06,
      "loss": 0.8333,
      "step": 716240
    },
    {
      "epoch": 2.5102776813971337,
      "grad_norm": 3.15625,
      "learning_rate": 9.069007323639442e-06,
      "loss": 0.7566,
      "step": 716250
    },
    {
      "epoch": 2.5103127289040295,
      "grad_norm": 3.140625,
      "learning_rate": 9.06835829497574e-06,
      "loss": 0.8384,
      "step": 716260
    },
    {
      "epoch": 2.510347776410925,
      "grad_norm": 2.484375,
      "learning_rate": 9.067709266312038e-06,
      "loss": 0.804,
      "step": 716270
    },
    {
      "epoch": 2.5103828239178205,
      "grad_norm": 3.03125,
      "learning_rate": 9.067060237648336e-06,
      "loss": 0.8598,
      "step": 716280
    },
    {
      "epoch": 2.5104178714247163,
      "grad_norm": 2.921875,
      "learning_rate": 9.066411208984633e-06,
      "loss": 0.8096,
      "step": 716290
    },
    {
      "epoch": 2.5104529189316116,
      "grad_norm": 3.015625,
      "learning_rate": 9.065762180320932e-06,
      "loss": 0.8348,
      "step": 716300
    },
    {
      "epoch": 2.5104879664385074,
      "grad_norm": 2.8125,
      "learning_rate": 9.06511315165723e-06,
      "loss": 0.8479,
      "step": 716310
    },
    {
      "epoch": 2.510523013945403,
      "grad_norm": 3.03125,
      "learning_rate": 9.064464122993528e-06,
      "loss": 0.8049,
      "step": 716320
    },
    {
      "epoch": 2.5105580614522984,
      "grad_norm": 2.8125,
      "learning_rate": 9.063815094329826e-06,
      "loss": 0.8187,
      "step": 716330
    },
    {
      "epoch": 2.510593108959194,
      "grad_norm": 2.96875,
      "learning_rate": 9.063166065666124e-06,
      "loss": 0.8953,
      "step": 716340
    },
    {
      "epoch": 2.51062815646609,
      "grad_norm": 2.75,
      "learning_rate": 9.062517037002422e-06,
      "loss": 0.8291,
      "step": 716350
    },
    {
      "epoch": 2.5106632039729853,
      "grad_norm": 2.9375,
      "learning_rate": 9.06186800833872e-06,
      "loss": 0.801,
      "step": 716360
    },
    {
      "epoch": 2.510698251479881,
      "grad_norm": 2.71875,
      "learning_rate": 9.06121897967502e-06,
      "loss": 0.812,
      "step": 716370
    },
    {
      "epoch": 2.5107332989867768,
      "grad_norm": 2.828125,
      "learning_rate": 9.060569951011316e-06,
      "loss": 0.7939,
      "step": 716380
    },
    {
      "epoch": 2.510768346493672,
      "grad_norm": 3.078125,
      "learning_rate": 9.059920922347614e-06,
      "loss": 0.8369,
      "step": 716390
    },
    {
      "epoch": 2.510803394000568,
      "grad_norm": 3.0625,
      "learning_rate": 9.059271893683912e-06,
      "loss": 0.8423,
      "step": 716400
    },
    {
      "epoch": 2.510838441507463,
      "grad_norm": 3.03125,
      "learning_rate": 9.05862286502021e-06,
      "loss": 0.8318,
      "step": 716410
    },
    {
      "epoch": 2.510873489014359,
      "grad_norm": 2.546875,
      "learning_rate": 9.057973836356508e-06,
      "loss": 0.7433,
      "step": 716420
    },
    {
      "epoch": 2.5109085365212547,
      "grad_norm": 2.609375,
      "learning_rate": 9.057324807692808e-06,
      "loss": 0.7089,
      "step": 716430
    },
    {
      "epoch": 2.5109435840281504,
      "grad_norm": 2.796875,
      "learning_rate": 9.056675779029106e-06,
      "loss": 0.8572,
      "step": 716440
    },
    {
      "epoch": 2.5109786315350457,
      "grad_norm": 3.078125,
      "learning_rate": 9.056026750365404e-06,
      "loss": 0.8292,
      "step": 716450
    },
    {
      "epoch": 2.5110136790419415,
      "grad_norm": 3.0625,
      "learning_rate": 9.055377721701702e-06,
      "loss": 0.7725,
      "step": 716460
    },
    {
      "epoch": 2.511048726548837,
      "grad_norm": 3.15625,
      "learning_rate": 9.054728693038e-06,
      "loss": 0.7829,
      "step": 716470
    },
    {
      "epoch": 2.5110837740557326,
      "grad_norm": 3.578125,
      "learning_rate": 9.054079664374298e-06,
      "loss": 0.788,
      "step": 716480
    },
    {
      "epoch": 2.5111188215626283,
      "grad_norm": 3.0,
      "learning_rate": 9.053430635710596e-06,
      "loss": 0.8289,
      "step": 716490
    },
    {
      "epoch": 2.5111538690695236,
      "grad_norm": 3.0625,
      "learning_rate": 9.052781607046894e-06,
      "loss": 0.7828,
      "step": 716500
    },
    {
      "epoch": 2.5111889165764194,
      "grad_norm": 3.234375,
      "learning_rate": 9.052132578383192e-06,
      "loss": 0.8619,
      "step": 716510
    },
    {
      "epoch": 2.5112239640833147,
      "grad_norm": 2.78125,
      "learning_rate": 9.05148354971949e-06,
      "loss": 0.8872,
      "step": 716520
    },
    {
      "epoch": 2.5112590115902105,
      "grad_norm": 2.734375,
      "learning_rate": 9.050834521055788e-06,
      "loss": 0.8404,
      "step": 716530
    },
    {
      "epoch": 2.5112940590971062,
      "grad_norm": 2.765625,
      "learning_rate": 9.050185492392086e-06,
      "loss": 0.8229,
      "step": 716540
    },
    {
      "epoch": 2.511329106604002,
      "grad_norm": 2.890625,
      "learning_rate": 9.049536463728386e-06,
      "loss": 0.6667,
      "step": 716550
    },
    {
      "epoch": 2.5113641541108973,
      "grad_norm": 2.953125,
      "learning_rate": 9.048887435064684e-06,
      "loss": 0.7881,
      "step": 716560
    },
    {
      "epoch": 2.511399201617793,
      "grad_norm": 3.0625,
      "learning_rate": 9.04823840640098e-06,
      "loss": 0.8554,
      "step": 716570
    },
    {
      "epoch": 2.5114342491246884,
      "grad_norm": 3.0625,
      "learning_rate": 9.047589377737278e-06,
      "loss": 0.775,
      "step": 716580
    },
    {
      "epoch": 2.511469296631584,
      "grad_norm": 2.890625,
      "learning_rate": 9.046940349073576e-06,
      "loss": 0.7266,
      "step": 716590
    },
    {
      "epoch": 2.51150434413848,
      "grad_norm": 3.078125,
      "learning_rate": 9.046291320409874e-06,
      "loss": 0.8122,
      "step": 716600
    },
    {
      "epoch": 2.511539391645375,
      "grad_norm": 3.015625,
      "learning_rate": 9.045642291746174e-06,
      "loss": 0.7557,
      "step": 716610
    },
    {
      "epoch": 2.511574439152271,
      "grad_norm": 2.796875,
      "learning_rate": 9.044993263082472e-06,
      "loss": 0.8404,
      "step": 716620
    },
    {
      "epoch": 2.5116094866591663,
      "grad_norm": 2.71875,
      "learning_rate": 9.04434423441877e-06,
      "loss": 0.8157,
      "step": 716630
    },
    {
      "epoch": 2.511644534166062,
      "grad_norm": 2.703125,
      "learning_rate": 9.043695205755068e-06,
      "loss": 0.8348,
      "step": 716640
    },
    {
      "epoch": 2.511679581672958,
      "grad_norm": 2.9375,
      "learning_rate": 9.043046177091366e-06,
      "loss": 0.8289,
      "step": 716650
    },
    {
      "epoch": 2.5117146291798536,
      "grad_norm": 2.984375,
      "learning_rate": 9.042397148427664e-06,
      "loss": 0.8279,
      "step": 716660
    },
    {
      "epoch": 2.511749676686749,
      "grad_norm": 3.0625,
      "learning_rate": 9.041748119763962e-06,
      "loss": 0.7767,
      "step": 716670
    },
    {
      "epoch": 2.5117847241936446,
      "grad_norm": 2.546875,
      "learning_rate": 9.04109909110026e-06,
      "loss": 0.8221,
      "step": 716680
    },
    {
      "epoch": 2.51181977170054,
      "grad_norm": 3.15625,
      "learning_rate": 9.040450062436558e-06,
      "loss": 0.8607,
      "step": 716690
    },
    {
      "epoch": 2.5118548192074357,
      "grad_norm": 3.015625,
      "learning_rate": 9.039801033772856e-06,
      "loss": 0.7037,
      "step": 716700
    },
    {
      "epoch": 2.5118898667143315,
      "grad_norm": 3.125,
      "learning_rate": 9.039152005109154e-06,
      "loss": 0.848,
      "step": 716710
    },
    {
      "epoch": 2.5119249142212268,
      "grad_norm": 3.546875,
      "learning_rate": 9.038502976445452e-06,
      "loss": 0.7764,
      "step": 716720
    },
    {
      "epoch": 2.5119599617281225,
      "grad_norm": 2.734375,
      "learning_rate": 9.03785394778175e-06,
      "loss": 0.7184,
      "step": 716730
    },
    {
      "epoch": 2.511995009235018,
      "grad_norm": 2.71875,
      "learning_rate": 9.03720491911805e-06,
      "loss": 0.6921,
      "step": 716740
    },
    {
      "epoch": 2.5120300567419136,
      "grad_norm": 2.796875,
      "learning_rate": 9.036555890454348e-06,
      "loss": 0.8138,
      "step": 716750
    },
    {
      "epoch": 2.5120651042488094,
      "grad_norm": 2.359375,
      "learning_rate": 9.035906861790644e-06,
      "loss": 0.7393,
      "step": 716760
    },
    {
      "epoch": 2.512100151755705,
      "grad_norm": 3.109375,
      "learning_rate": 9.035257833126942e-06,
      "loss": 0.8396,
      "step": 716770
    },
    {
      "epoch": 2.5121351992626004,
      "grad_norm": 2.875,
      "learning_rate": 9.03460880446324e-06,
      "loss": 0.8035,
      "step": 716780
    },
    {
      "epoch": 2.512170246769496,
      "grad_norm": 2.71875,
      "learning_rate": 9.03395977579954e-06,
      "loss": 0.7312,
      "step": 716790
    },
    {
      "epoch": 2.5122052942763915,
      "grad_norm": 2.84375,
      "learning_rate": 9.033310747135838e-06,
      "loss": 0.8227,
      "step": 716800
    },
    {
      "epoch": 2.5122403417832873,
      "grad_norm": 3.03125,
      "learning_rate": 9.032661718472136e-06,
      "loss": 0.7108,
      "step": 716810
    },
    {
      "epoch": 2.512275389290183,
      "grad_norm": 3.390625,
      "learning_rate": 9.032012689808434e-06,
      "loss": 0.8023,
      "step": 716820
    },
    {
      "epoch": 2.5123104367970783,
      "grad_norm": 3.0,
      "learning_rate": 9.031363661144732e-06,
      "loss": 0.8072,
      "step": 716830
    },
    {
      "epoch": 2.512345484303974,
      "grad_norm": 2.90625,
      "learning_rate": 9.03071463248103e-06,
      "loss": 0.7087,
      "step": 716840
    },
    {
      "epoch": 2.5123805318108694,
      "grad_norm": 3.515625,
      "learning_rate": 9.030065603817328e-06,
      "loss": 0.807,
      "step": 716850
    },
    {
      "epoch": 2.512415579317765,
      "grad_norm": 3.015625,
      "learning_rate": 9.029416575153626e-06,
      "loss": 0.8013,
      "step": 716860
    },
    {
      "epoch": 2.512450626824661,
      "grad_norm": 2.46875,
      "learning_rate": 9.028767546489924e-06,
      "loss": 0.8125,
      "step": 716870
    },
    {
      "epoch": 2.5124856743315567,
      "grad_norm": 2.984375,
      "learning_rate": 9.028118517826222e-06,
      "loss": 0.894,
      "step": 716880
    },
    {
      "epoch": 2.512520721838452,
      "grad_norm": 3.328125,
      "learning_rate": 9.02746948916252e-06,
      "loss": 0.7996,
      "step": 716890
    },
    {
      "epoch": 2.5125557693453477,
      "grad_norm": 2.84375,
      "learning_rate": 9.026820460498818e-06,
      "loss": 0.7407,
      "step": 716900
    },
    {
      "epoch": 2.512590816852243,
      "grad_norm": 3.1875,
      "learning_rate": 9.026171431835116e-06,
      "loss": 0.7634,
      "step": 716910
    },
    {
      "epoch": 2.512625864359139,
      "grad_norm": 3.109375,
      "learning_rate": 9.025522403171415e-06,
      "loss": 0.8568,
      "step": 716920
    },
    {
      "epoch": 2.5126609118660346,
      "grad_norm": 3.125,
      "learning_rate": 9.024873374507713e-06,
      "loss": 0.8724,
      "step": 716930
    },
    {
      "epoch": 2.51269595937293,
      "grad_norm": 3.265625,
      "learning_rate": 9.024224345844011e-06,
      "loss": 0.8292,
      "step": 716940
    },
    {
      "epoch": 2.5127310068798256,
      "grad_norm": 3.34375,
      "learning_rate": 9.023575317180308e-06,
      "loss": 0.8718,
      "step": 716950
    },
    {
      "epoch": 2.512766054386721,
      "grad_norm": 2.859375,
      "learning_rate": 9.022926288516606e-06,
      "loss": 0.7544,
      "step": 716960
    },
    {
      "epoch": 2.5128011018936167,
      "grad_norm": 2.40625,
      "learning_rate": 9.022277259852904e-06,
      "loss": 0.8065,
      "step": 716970
    },
    {
      "epoch": 2.5128361494005125,
      "grad_norm": 2.921875,
      "learning_rate": 9.021628231189203e-06,
      "loss": 0.8616,
      "step": 716980
    },
    {
      "epoch": 2.5128711969074082,
      "grad_norm": 3.1875,
      "learning_rate": 9.020979202525501e-06,
      "loss": 0.7696,
      "step": 716990
    },
    {
      "epoch": 2.5129062444143035,
      "grad_norm": 2.921875,
      "learning_rate": 9.0203301738618e-06,
      "loss": 0.8208,
      "step": 717000
    },
    {
      "epoch": 2.5129412919211993,
      "grad_norm": 2.484375,
      "learning_rate": 9.019681145198097e-06,
      "loss": 0.7117,
      "step": 717010
    },
    {
      "epoch": 2.5129763394280946,
      "grad_norm": 2.5625,
      "learning_rate": 9.019032116534395e-06,
      "loss": 0.8117,
      "step": 717020
    },
    {
      "epoch": 2.5130113869349904,
      "grad_norm": 3.265625,
      "learning_rate": 9.018383087870693e-06,
      "loss": 0.837,
      "step": 717030
    },
    {
      "epoch": 2.513046434441886,
      "grad_norm": 2.625,
      "learning_rate": 9.017734059206991e-06,
      "loss": 0.8321,
      "step": 717040
    },
    {
      "epoch": 2.5130814819487814,
      "grad_norm": 2.78125,
      "learning_rate": 9.01708503054329e-06,
      "loss": 0.8619,
      "step": 717050
    },
    {
      "epoch": 2.513116529455677,
      "grad_norm": 2.921875,
      "learning_rate": 9.016436001879587e-06,
      "loss": 0.7764,
      "step": 717060
    },
    {
      "epoch": 2.5131515769625725,
      "grad_norm": 3.140625,
      "learning_rate": 9.015786973215885e-06,
      "loss": 0.7988,
      "step": 717070
    },
    {
      "epoch": 2.5131866244694683,
      "grad_norm": 3.109375,
      "learning_rate": 9.015137944552183e-06,
      "loss": 0.8531,
      "step": 717080
    },
    {
      "epoch": 2.513221671976364,
      "grad_norm": 2.953125,
      "learning_rate": 9.014488915888481e-06,
      "loss": 0.7633,
      "step": 717090
    },
    {
      "epoch": 2.51325671948326,
      "grad_norm": 3.21875,
      "learning_rate": 9.013839887224781e-06,
      "loss": 0.8137,
      "step": 717100
    },
    {
      "epoch": 2.513291766990155,
      "grad_norm": 2.859375,
      "learning_rate": 9.013190858561079e-06,
      "loss": 0.7762,
      "step": 717110
    },
    {
      "epoch": 2.513326814497051,
      "grad_norm": 2.65625,
      "learning_rate": 9.012541829897377e-06,
      "loss": 0.837,
      "step": 717120
    },
    {
      "epoch": 2.513361862003946,
      "grad_norm": 2.765625,
      "learning_rate": 9.011892801233675e-06,
      "loss": 0.7566,
      "step": 717130
    },
    {
      "epoch": 2.513396909510842,
      "grad_norm": 2.875,
      "learning_rate": 9.011243772569971e-06,
      "loss": 0.7763,
      "step": 717140
    },
    {
      "epoch": 2.5134319570177377,
      "grad_norm": 2.703125,
      "learning_rate": 9.01059474390627e-06,
      "loss": 0.8118,
      "step": 717150
    },
    {
      "epoch": 2.513467004524633,
      "grad_norm": 3.109375,
      "learning_rate": 9.009945715242569e-06,
      "loss": 0.8602,
      "step": 717160
    },
    {
      "epoch": 2.5135020520315288,
      "grad_norm": 3.25,
      "learning_rate": 9.009296686578867e-06,
      "loss": 0.7376,
      "step": 717170
    },
    {
      "epoch": 2.513537099538424,
      "grad_norm": 2.6875,
      "learning_rate": 9.008647657915165e-06,
      "loss": 0.8482,
      "step": 717180
    },
    {
      "epoch": 2.51357214704532,
      "grad_norm": 2.828125,
      "learning_rate": 9.007998629251463e-06,
      "loss": 0.8286,
      "step": 717190
    },
    {
      "epoch": 2.5136071945522156,
      "grad_norm": 2.984375,
      "learning_rate": 9.007349600587761e-06,
      "loss": 0.8472,
      "step": 717200
    },
    {
      "epoch": 2.5136422420591114,
      "grad_norm": 2.90625,
      "learning_rate": 9.006700571924059e-06,
      "loss": 0.8354,
      "step": 717210
    },
    {
      "epoch": 2.5136772895660067,
      "grad_norm": 3.234375,
      "learning_rate": 9.006051543260357e-06,
      "loss": 0.7695,
      "step": 717220
    },
    {
      "epoch": 2.5137123370729024,
      "grad_norm": 3.078125,
      "learning_rate": 9.005402514596655e-06,
      "loss": 0.771,
      "step": 717230
    },
    {
      "epoch": 2.5137473845797977,
      "grad_norm": 3.40625,
      "learning_rate": 9.004753485932953e-06,
      "loss": 0.8397,
      "step": 717240
    },
    {
      "epoch": 2.5137824320866935,
      "grad_norm": 3.296875,
      "learning_rate": 9.004104457269251e-06,
      "loss": 0.8407,
      "step": 717250
    },
    {
      "epoch": 2.5138174795935893,
      "grad_norm": 2.96875,
      "learning_rate": 9.003455428605549e-06,
      "loss": 0.8559,
      "step": 717260
    },
    {
      "epoch": 2.5138525271004846,
      "grad_norm": 3.09375,
      "learning_rate": 9.002806399941847e-06,
      "loss": 0.7747,
      "step": 717270
    },
    {
      "epoch": 2.5138875746073803,
      "grad_norm": 3.671875,
      "learning_rate": 9.002157371278145e-06,
      "loss": 0.8475,
      "step": 717280
    },
    {
      "epoch": 2.5139226221142756,
      "grad_norm": 3.25,
      "learning_rate": 9.001508342614445e-06,
      "loss": 0.8021,
      "step": 717290
    },
    {
      "epoch": 2.5139576696211714,
      "grad_norm": 3.171875,
      "learning_rate": 9.000859313950743e-06,
      "loss": 0.9015,
      "step": 717300
    },
    {
      "epoch": 2.513992717128067,
      "grad_norm": 2.90625,
      "learning_rate": 9.00021028528704e-06,
      "loss": 0.7696,
      "step": 717310
    },
    {
      "epoch": 2.514027764634963,
      "grad_norm": 2.828125,
      "learning_rate": 8.999561256623337e-06,
      "loss": 0.7809,
      "step": 717320
    },
    {
      "epoch": 2.5140628121418582,
      "grad_norm": 2.59375,
      "learning_rate": 8.998912227959635e-06,
      "loss": 0.7401,
      "step": 717330
    },
    {
      "epoch": 2.514097859648754,
      "grad_norm": 3.09375,
      "learning_rate": 8.998263199295935e-06,
      "loss": 0.8731,
      "step": 717340
    },
    {
      "epoch": 2.5141329071556493,
      "grad_norm": 3.015625,
      "learning_rate": 8.997614170632233e-06,
      "loss": 0.8054,
      "step": 717350
    },
    {
      "epoch": 2.514167954662545,
      "grad_norm": 2.6875,
      "learning_rate": 8.99696514196853e-06,
      "loss": 0.8805,
      "step": 717360
    },
    {
      "epoch": 2.514203002169441,
      "grad_norm": 3.09375,
      "learning_rate": 8.996316113304829e-06,
      "loss": 0.7842,
      "step": 717370
    },
    {
      "epoch": 2.514238049676336,
      "grad_norm": 2.46875,
      "learning_rate": 8.995667084641127e-06,
      "loss": 0.837,
      "step": 717380
    },
    {
      "epoch": 2.514273097183232,
      "grad_norm": 3.1875,
      "learning_rate": 8.995018055977425e-06,
      "loss": 0.7766,
      "step": 717390
    },
    {
      "epoch": 2.514308144690127,
      "grad_norm": 3.328125,
      "learning_rate": 8.994369027313723e-06,
      "loss": 0.9411,
      "step": 717400
    },
    {
      "epoch": 2.514343192197023,
      "grad_norm": 2.828125,
      "learning_rate": 8.993719998650022e-06,
      "loss": 0.9062,
      "step": 717410
    },
    {
      "epoch": 2.5143782397039187,
      "grad_norm": 2.8125,
      "learning_rate": 8.993070969986319e-06,
      "loss": 0.7692,
      "step": 717420
    },
    {
      "epoch": 2.5144132872108145,
      "grad_norm": 3.046875,
      "learning_rate": 8.992421941322617e-06,
      "loss": 0.8496,
      "step": 717430
    },
    {
      "epoch": 2.51444833471771,
      "grad_norm": 2.765625,
      "learning_rate": 8.991772912658915e-06,
      "loss": 0.7878,
      "step": 717440
    },
    {
      "epoch": 2.5144833822246055,
      "grad_norm": 2.484375,
      "learning_rate": 8.991123883995213e-06,
      "loss": 0.7854,
      "step": 717450
    },
    {
      "epoch": 2.514518429731501,
      "grad_norm": 2.765625,
      "learning_rate": 8.99047485533151e-06,
      "loss": 0.9013,
      "step": 717460
    },
    {
      "epoch": 2.5145534772383966,
      "grad_norm": 2.90625,
      "learning_rate": 8.98982582666781e-06,
      "loss": 0.8088,
      "step": 717470
    },
    {
      "epoch": 2.5145885247452924,
      "grad_norm": 3.46875,
      "learning_rate": 8.989176798004108e-06,
      "loss": 0.8524,
      "step": 717480
    },
    {
      "epoch": 2.5146235722521877,
      "grad_norm": 2.96875,
      "learning_rate": 8.988527769340406e-06,
      "loss": 0.7907,
      "step": 717490
    },
    {
      "epoch": 2.5146586197590834,
      "grad_norm": 3.109375,
      "learning_rate": 8.987878740676704e-06,
      "loss": 0.8725,
      "step": 717500
    },
    {
      "epoch": 2.5146936672659788,
      "grad_norm": 2.6875,
      "learning_rate": 8.987229712013e-06,
      "loss": 0.8108,
      "step": 717510
    },
    {
      "epoch": 2.5147287147728745,
      "grad_norm": 2.515625,
      "learning_rate": 8.986580683349299e-06,
      "loss": 0.7207,
      "step": 717520
    },
    {
      "epoch": 2.5147637622797703,
      "grad_norm": 2.84375,
      "learning_rate": 8.985931654685598e-06,
      "loss": 0.7716,
      "step": 717530
    },
    {
      "epoch": 2.514798809786666,
      "grad_norm": 2.921875,
      "learning_rate": 8.985282626021896e-06,
      "loss": 0.859,
      "step": 717540
    },
    {
      "epoch": 2.5148338572935613,
      "grad_norm": 2.734375,
      "learning_rate": 8.984633597358194e-06,
      "loss": 0.7032,
      "step": 717550
    },
    {
      "epoch": 2.514868904800457,
      "grad_norm": 2.75,
      "learning_rate": 8.983984568694492e-06,
      "loss": 0.7972,
      "step": 717560
    },
    {
      "epoch": 2.5149039523073524,
      "grad_norm": 2.953125,
      "learning_rate": 8.98333554003079e-06,
      "loss": 0.9226,
      "step": 717570
    },
    {
      "epoch": 2.514938999814248,
      "grad_norm": 2.84375,
      "learning_rate": 8.982686511367088e-06,
      "loss": 0.7982,
      "step": 717580
    },
    {
      "epoch": 2.514974047321144,
      "grad_norm": 2.65625,
      "learning_rate": 8.982037482703386e-06,
      "loss": 0.8097,
      "step": 717590
    },
    {
      "epoch": 2.5150090948280392,
      "grad_norm": 3.109375,
      "learning_rate": 8.981388454039686e-06,
      "loss": 0.8083,
      "step": 717600
    },
    {
      "epoch": 2.515044142334935,
      "grad_norm": 3.265625,
      "learning_rate": 8.980739425375982e-06,
      "loss": 0.8617,
      "step": 717610
    },
    {
      "epoch": 2.5150791898418308,
      "grad_norm": 2.765625,
      "learning_rate": 8.98009039671228e-06,
      "loss": 0.7472,
      "step": 717620
    },
    {
      "epoch": 2.515114237348726,
      "grad_norm": 2.90625,
      "learning_rate": 8.979441368048578e-06,
      "loss": 0.8253,
      "step": 717630
    },
    {
      "epoch": 2.515149284855622,
      "grad_norm": 3.421875,
      "learning_rate": 8.978792339384876e-06,
      "loss": 0.7436,
      "step": 717640
    },
    {
      "epoch": 2.5151843323625176,
      "grad_norm": 2.859375,
      "learning_rate": 8.978143310721176e-06,
      "loss": 0.7816,
      "step": 717650
    },
    {
      "epoch": 2.515219379869413,
      "grad_norm": 2.765625,
      "learning_rate": 8.977494282057474e-06,
      "loss": 0.7883,
      "step": 717660
    },
    {
      "epoch": 2.5152544273763087,
      "grad_norm": 2.921875,
      "learning_rate": 8.976845253393772e-06,
      "loss": 0.7588,
      "step": 717670
    },
    {
      "epoch": 2.515289474883204,
      "grad_norm": 2.828125,
      "learning_rate": 8.97619622473007e-06,
      "loss": 0.7791,
      "step": 717680
    },
    {
      "epoch": 2.5153245223900997,
      "grad_norm": 2.3125,
      "learning_rate": 8.975547196066368e-06,
      "loss": 0.8093,
      "step": 717690
    },
    {
      "epoch": 2.5153595698969955,
      "grad_norm": 3.0625,
      "learning_rate": 8.974898167402664e-06,
      "loss": 0.7752,
      "step": 717700
    },
    {
      "epoch": 2.515394617403891,
      "grad_norm": 3.203125,
      "learning_rate": 8.974249138738964e-06,
      "loss": 0.7739,
      "step": 717710
    },
    {
      "epoch": 2.5154296649107866,
      "grad_norm": 3.25,
      "learning_rate": 8.973600110075262e-06,
      "loss": 0.7655,
      "step": 717720
    },
    {
      "epoch": 2.5154647124176823,
      "grad_norm": 2.859375,
      "learning_rate": 8.97295108141156e-06,
      "loss": 0.8425,
      "step": 717730
    },
    {
      "epoch": 2.5154997599245776,
      "grad_norm": 2.984375,
      "learning_rate": 8.972302052747858e-06,
      "loss": 0.7427,
      "step": 717740
    },
    {
      "epoch": 2.5155348074314734,
      "grad_norm": 2.796875,
      "learning_rate": 8.971653024084156e-06,
      "loss": 0.7406,
      "step": 717750
    },
    {
      "epoch": 2.515569854938369,
      "grad_norm": 3.171875,
      "learning_rate": 8.971003995420454e-06,
      "loss": 0.8884,
      "step": 717760
    },
    {
      "epoch": 2.5156049024452645,
      "grad_norm": 3.0625,
      "learning_rate": 8.970354966756752e-06,
      "loss": 0.8337,
      "step": 717770
    },
    {
      "epoch": 2.5156399499521602,
      "grad_norm": 2.484375,
      "learning_rate": 8.969705938093052e-06,
      "loss": 0.7663,
      "step": 717780
    },
    {
      "epoch": 2.5156749974590555,
      "grad_norm": 3.390625,
      "learning_rate": 8.969056909429348e-06,
      "loss": 0.7428,
      "step": 717790
    },
    {
      "epoch": 2.5157100449659513,
      "grad_norm": 3.28125,
      "learning_rate": 8.968407880765646e-06,
      "loss": 0.8465,
      "step": 717800
    },
    {
      "epoch": 2.515745092472847,
      "grad_norm": 2.609375,
      "learning_rate": 8.967758852101944e-06,
      "loss": 0.8559,
      "step": 717810
    },
    {
      "epoch": 2.515780139979743,
      "grad_norm": 2.953125,
      "learning_rate": 8.967109823438242e-06,
      "loss": 0.7694,
      "step": 717820
    },
    {
      "epoch": 2.515815187486638,
      "grad_norm": 2.65625,
      "learning_rate": 8.96646079477454e-06,
      "loss": 0.8329,
      "step": 717830
    },
    {
      "epoch": 2.515850234993534,
      "grad_norm": 3.296875,
      "learning_rate": 8.96581176611084e-06,
      "loss": 0.8403,
      "step": 717840
    },
    {
      "epoch": 2.515885282500429,
      "grad_norm": 3.140625,
      "learning_rate": 8.965162737447138e-06,
      "loss": 0.7305,
      "step": 717850
    },
    {
      "epoch": 2.515920330007325,
      "grad_norm": 3.03125,
      "learning_rate": 8.964513708783436e-06,
      "loss": 0.7978,
      "step": 717860
    },
    {
      "epoch": 2.5159553775142207,
      "grad_norm": 2.765625,
      "learning_rate": 8.963864680119734e-06,
      "loss": 0.8401,
      "step": 717870
    },
    {
      "epoch": 2.515990425021116,
      "grad_norm": 2.921875,
      "learning_rate": 8.963215651456032e-06,
      "loss": 0.7112,
      "step": 717880
    },
    {
      "epoch": 2.516025472528012,
      "grad_norm": 2.578125,
      "learning_rate": 8.96256662279233e-06,
      "loss": 0.7127,
      "step": 717890
    },
    {
      "epoch": 2.516060520034907,
      "grad_norm": 2.8125,
      "learning_rate": 8.961917594128628e-06,
      "loss": 0.8363,
      "step": 717900
    },
    {
      "epoch": 2.516095567541803,
      "grad_norm": 3.09375,
      "learning_rate": 8.961268565464926e-06,
      "loss": 0.815,
      "step": 717910
    },
    {
      "epoch": 2.5161306150486986,
      "grad_norm": 3.234375,
      "learning_rate": 8.960619536801224e-06,
      "loss": 0.8571,
      "step": 717920
    },
    {
      "epoch": 2.5161656625555944,
      "grad_norm": 3.09375,
      "learning_rate": 8.959970508137522e-06,
      "loss": 0.758,
      "step": 717930
    },
    {
      "epoch": 2.5162007100624897,
      "grad_norm": 3.015625,
      "learning_rate": 8.95932147947382e-06,
      "loss": 0.7906,
      "step": 717940
    },
    {
      "epoch": 2.5162357575693854,
      "grad_norm": 2.609375,
      "learning_rate": 8.958672450810118e-06,
      "loss": 0.768,
      "step": 717950
    },
    {
      "epoch": 2.5162708050762808,
      "grad_norm": 2.90625,
      "learning_rate": 8.958023422146417e-06,
      "loss": 0.8532,
      "step": 717960
    },
    {
      "epoch": 2.5163058525831765,
      "grad_norm": 2.65625,
      "learning_rate": 8.957374393482715e-06,
      "loss": 0.8615,
      "step": 717970
    },
    {
      "epoch": 2.5163409000900723,
      "grad_norm": 3.15625,
      "learning_rate": 8.956725364819012e-06,
      "loss": 0.8394,
      "step": 717980
    },
    {
      "epoch": 2.5163759475969676,
      "grad_norm": 3.0,
      "learning_rate": 8.95607633615531e-06,
      "loss": 0.8368,
      "step": 717990
    },
    {
      "epoch": 2.5164109951038633,
      "grad_norm": 3.28125,
      "learning_rate": 8.955427307491608e-06,
      "loss": 0.7903,
      "step": 718000
    },
    {
      "epoch": 2.5164460426107587,
      "grad_norm": 3.125,
      "learning_rate": 8.954778278827906e-06,
      "loss": 0.8281,
      "step": 718010
    },
    {
      "epoch": 2.5164810901176544,
      "grad_norm": 2.953125,
      "learning_rate": 8.954129250164205e-06,
      "loss": 0.8019,
      "step": 718020
    },
    {
      "epoch": 2.51651613762455,
      "grad_norm": 3.046875,
      "learning_rate": 8.953480221500503e-06,
      "loss": 0.9028,
      "step": 718030
    },
    {
      "epoch": 2.516551185131446,
      "grad_norm": 2.65625,
      "learning_rate": 8.952831192836801e-06,
      "loss": 0.7286,
      "step": 718040
    },
    {
      "epoch": 2.5165862326383412,
      "grad_norm": 3.171875,
      "learning_rate": 8.9521821641731e-06,
      "loss": 0.8541,
      "step": 718050
    },
    {
      "epoch": 2.516621280145237,
      "grad_norm": 2.921875,
      "learning_rate": 8.951533135509397e-06,
      "loss": 0.6995,
      "step": 718060
    },
    {
      "epoch": 2.5166563276521323,
      "grad_norm": 3.53125,
      "learning_rate": 8.950884106845695e-06,
      "loss": 0.8094,
      "step": 718070
    },
    {
      "epoch": 2.516691375159028,
      "grad_norm": 3.015625,
      "learning_rate": 8.950235078181993e-06,
      "loss": 0.7911,
      "step": 718080
    },
    {
      "epoch": 2.516726422665924,
      "grad_norm": 2.875,
      "learning_rate": 8.949586049518291e-06,
      "loss": 0.8952,
      "step": 718090
    },
    {
      "epoch": 2.516761470172819,
      "grad_norm": 3.265625,
      "learning_rate": 8.94893702085459e-06,
      "loss": 0.7854,
      "step": 718100
    },
    {
      "epoch": 2.516796517679715,
      "grad_norm": 2.765625,
      "learning_rate": 8.948287992190887e-06,
      "loss": 0.7696,
      "step": 718110
    },
    {
      "epoch": 2.51683156518661,
      "grad_norm": 2.921875,
      "learning_rate": 8.947638963527185e-06,
      "loss": 0.7736,
      "step": 718120
    },
    {
      "epoch": 2.516866612693506,
      "grad_norm": 3.015625,
      "learning_rate": 8.946989934863483e-06,
      "loss": 0.9066,
      "step": 718130
    },
    {
      "epoch": 2.5169016602004017,
      "grad_norm": 2.71875,
      "learning_rate": 8.946340906199781e-06,
      "loss": 0.8257,
      "step": 718140
    },
    {
      "epoch": 2.5169367077072975,
      "grad_norm": 3.140625,
      "learning_rate": 8.945691877536081e-06,
      "loss": 0.7748,
      "step": 718150
    },
    {
      "epoch": 2.516971755214193,
      "grad_norm": 2.8125,
      "learning_rate": 8.945042848872379e-06,
      "loss": 0.8355,
      "step": 718160
    },
    {
      "epoch": 2.5170068027210886,
      "grad_norm": 2.8125,
      "learning_rate": 8.944393820208675e-06,
      "loss": 0.9513,
      "step": 718170
    },
    {
      "epoch": 2.517041850227984,
      "grad_norm": 2.375,
      "learning_rate": 8.943744791544973e-06,
      "loss": 0.78,
      "step": 718180
    },
    {
      "epoch": 2.5170768977348796,
      "grad_norm": 2.53125,
      "learning_rate": 8.943095762881271e-06,
      "loss": 0.7755,
      "step": 718190
    },
    {
      "epoch": 2.5171119452417754,
      "grad_norm": 3.015625,
      "learning_rate": 8.942446734217571e-06,
      "loss": 0.7839,
      "step": 718200
    },
    {
      "epoch": 2.5171469927486707,
      "grad_norm": 2.515625,
      "learning_rate": 8.941797705553869e-06,
      "loss": 0.8538,
      "step": 718210
    },
    {
      "epoch": 2.5171820402555665,
      "grad_norm": 3.15625,
      "learning_rate": 8.941148676890167e-06,
      "loss": 0.8619,
      "step": 718220
    },
    {
      "epoch": 2.517217087762462,
      "grad_norm": 2.953125,
      "learning_rate": 8.940499648226465e-06,
      "loss": 0.8407,
      "step": 718230
    },
    {
      "epoch": 2.5172521352693575,
      "grad_norm": 2.9375,
      "learning_rate": 8.939850619562763e-06,
      "loss": 0.8451,
      "step": 718240
    },
    {
      "epoch": 2.5172871827762533,
      "grad_norm": 2.625,
      "learning_rate": 8.939201590899061e-06,
      "loss": 0.872,
      "step": 718250
    },
    {
      "epoch": 2.517322230283149,
      "grad_norm": 2.984375,
      "learning_rate": 8.938552562235359e-06,
      "loss": 0.8607,
      "step": 718260
    },
    {
      "epoch": 2.5173572777900444,
      "grad_norm": 3.0625,
      "learning_rate": 8.937903533571657e-06,
      "loss": 0.7945,
      "step": 718270
    },
    {
      "epoch": 2.51739232529694,
      "grad_norm": 3.25,
      "learning_rate": 8.937254504907955e-06,
      "loss": 0.7718,
      "step": 718280
    },
    {
      "epoch": 2.5174273728038354,
      "grad_norm": 2.75,
      "learning_rate": 8.936605476244253e-06,
      "loss": 0.7602,
      "step": 718290
    },
    {
      "epoch": 2.517462420310731,
      "grad_norm": 3.03125,
      "learning_rate": 8.935956447580551e-06,
      "loss": 0.7824,
      "step": 718300
    },
    {
      "epoch": 2.517497467817627,
      "grad_norm": 3.109375,
      "learning_rate": 8.935307418916849e-06,
      "loss": 0.8277,
      "step": 718310
    },
    {
      "epoch": 2.5175325153245223,
      "grad_norm": 3.3125,
      "learning_rate": 8.934658390253147e-06,
      "loss": 0.7875,
      "step": 718320
    },
    {
      "epoch": 2.517567562831418,
      "grad_norm": 3.09375,
      "learning_rate": 8.934009361589447e-06,
      "loss": 0.7256,
      "step": 718330
    },
    {
      "epoch": 2.5176026103383133,
      "grad_norm": 3.28125,
      "learning_rate": 8.933360332925745e-06,
      "loss": 0.8312,
      "step": 718340
    },
    {
      "epoch": 2.517637657845209,
      "grad_norm": 2.90625,
      "learning_rate": 8.932711304262043e-06,
      "loss": 0.7548,
      "step": 718350
    },
    {
      "epoch": 2.517672705352105,
      "grad_norm": 3.1875,
      "learning_rate": 8.932062275598339e-06,
      "loss": 0.8445,
      "step": 718360
    },
    {
      "epoch": 2.5177077528590006,
      "grad_norm": 3.125,
      "learning_rate": 8.931413246934637e-06,
      "loss": 0.7767,
      "step": 718370
    },
    {
      "epoch": 2.517742800365896,
      "grad_norm": 2.96875,
      "learning_rate": 8.930764218270935e-06,
      "loss": 0.8357,
      "step": 718380
    },
    {
      "epoch": 2.5177778478727917,
      "grad_norm": 3.125,
      "learning_rate": 8.930115189607235e-06,
      "loss": 0.8101,
      "step": 718390
    },
    {
      "epoch": 2.517812895379687,
      "grad_norm": 2.84375,
      "learning_rate": 8.929466160943533e-06,
      "loss": 0.8211,
      "step": 718400
    },
    {
      "epoch": 2.5178479428865828,
      "grad_norm": 3.203125,
      "learning_rate": 8.92881713227983e-06,
      "loss": 0.7145,
      "step": 718410
    },
    {
      "epoch": 2.5178829903934785,
      "grad_norm": 2.546875,
      "learning_rate": 8.928168103616129e-06,
      "loss": 0.8339,
      "step": 718420
    },
    {
      "epoch": 2.517918037900374,
      "grad_norm": 2.875,
      "learning_rate": 8.927519074952427e-06,
      "loss": 0.8225,
      "step": 718430
    },
    {
      "epoch": 2.5179530854072696,
      "grad_norm": 3.921875,
      "learning_rate": 8.926870046288725e-06,
      "loss": 0.7867,
      "step": 718440
    },
    {
      "epoch": 2.517988132914165,
      "grad_norm": 3.140625,
      "learning_rate": 8.926221017625023e-06,
      "loss": 0.7912,
      "step": 718450
    },
    {
      "epoch": 2.5180231804210607,
      "grad_norm": 2.578125,
      "learning_rate": 8.92557198896132e-06,
      "loss": 0.8458,
      "step": 718460
    },
    {
      "epoch": 2.5180582279279564,
      "grad_norm": 2.65625,
      "learning_rate": 8.924922960297619e-06,
      "loss": 0.7384,
      "step": 718470
    },
    {
      "epoch": 2.518093275434852,
      "grad_norm": 2.75,
      "learning_rate": 8.924273931633917e-06,
      "loss": 0.7176,
      "step": 718480
    },
    {
      "epoch": 2.5181283229417475,
      "grad_norm": 2.890625,
      "learning_rate": 8.923624902970215e-06,
      "loss": 0.868,
      "step": 718490
    },
    {
      "epoch": 2.5181633704486432,
      "grad_norm": 2.265625,
      "learning_rate": 8.922975874306513e-06,
      "loss": 0.7515,
      "step": 718500
    },
    {
      "epoch": 2.5181984179555386,
      "grad_norm": 2.875,
      "learning_rate": 8.922326845642812e-06,
      "loss": 0.7695,
      "step": 718510
    },
    {
      "epoch": 2.5182334654624343,
      "grad_norm": 3.21875,
      "learning_rate": 8.92167781697911e-06,
      "loss": 0.8071,
      "step": 718520
    },
    {
      "epoch": 2.51826851296933,
      "grad_norm": 2.5,
      "learning_rate": 8.921028788315408e-06,
      "loss": 0.8081,
      "step": 718530
    },
    {
      "epoch": 2.5183035604762254,
      "grad_norm": 2.765625,
      "learning_rate": 8.920379759651706e-06,
      "loss": 0.8227,
      "step": 718540
    },
    {
      "epoch": 2.518338607983121,
      "grad_norm": 2.859375,
      "learning_rate": 8.919730730988003e-06,
      "loss": 0.7855,
      "step": 718550
    },
    {
      "epoch": 2.5183736554900165,
      "grad_norm": 3.234375,
      "learning_rate": 8.9190817023243e-06,
      "loss": 0.8379,
      "step": 718560
    },
    {
      "epoch": 2.518408702996912,
      "grad_norm": 3.046875,
      "learning_rate": 8.9184326736606e-06,
      "loss": 0.8106,
      "step": 718570
    },
    {
      "epoch": 2.518443750503808,
      "grad_norm": 2.46875,
      "learning_rate": 8.917783644996898e-06,
      "loss": 0.7927,
      "step": 718580
    },
    {
      "epoch": 2.5184787980107037,
      "grad_norm": 2.984375,
      "learning_rate": 8.917134616333196e-06,
      "loss": 0.8728,
      "step": 718590
    },
    {
      "epoch": 2.518513845517599,
      "grad_norm": 3.109375,
      "learning_rate": 8.916485587669494e-06,
      "loss": 0.8938,
      "step": 718600
    },
    {
      "epoch": 2.518548893024495,
      "grad_norm": 3.125,
      "learning_rate": 8.915836559005792e-06,
      "loss": 0.8271,
      "step": 718610
    },
    {
      "epoch": 2.51858394053139,
      "grad_norm": 2.84375,
      "learning_rate": 8.91518753034209e-06,
      "loss": 0.7854,
      "step": 718620
    },
    {
      "epoch": 2.518618988038286,
      "grad_norm": 2.578125,
      "learning_rate": 8.914538501678388e-06,
      "loss": 0.7116,
      "step": 718630
    },
    {
      "epoch": 2.5186540355451816,
      "grad_norm": 3.015625,
      "learning_rate": 8.913889473014686e-06,
      "loss": 0.7882,
      "step": 718640
    },
    {
      "epoch": 2.518689083052077,
      "grad_norm": 2.921875,
      "learning_rate": 8.913240444350984e-06,
      "loss": 0.8122,
      "step": 718650
    },
    {
      "epoch": 2.5187241305589727,
      "grad_norm": 3.1875,
      "learning_rate": 8.912591415687282e-06,
      "loss": 0.8189,
      "step": 718660
    },
    {
      "epoch": 2.518759178065868,
      "grad_norm": 3.0,
      "learning_rate": 8.91194238702358e-06,
      "loss": 0.8299,
      "step": 718670
    },
    {
      "epoch": 2.518794225572764,
      "grad_norm": 3.234375,
      "learning_rate": 8.911293358359878e-06,
      "loss": 0.7857,
      "step": 718680
    },
    {
      "epoch": 2.5188292730796595,
      "grad_norm": 3.328125,
      "learning_rate": 8.910644329696176e-06,
      "loss": 0.8585,
      "step": 718690
    },
    {
      "epoch": 2.5188643205865553,
      "grad_norm": 2.78125,
      "learning_rate": 8.909995301032476e-06,
      "loss": 0.7357,
      "step": 718700
    },
    {
      "epoch": 2.5188993680934506,
      "grad_norm": 2.734375,
      "learning_rate": 8.909346272368774e-06,
      "loss": 0.8537,
      "step": 718710
    },
    {
      "epoch": 2.5189344156003464,
      "grad_norm": 2.84375,
      "learning_rate": 8.908697243705072e-06,
      "loss": 0.8124,
      "step": 718720
    },
    {
      "epoch": 2.5189694631072417,
      "grad_norm": 3.109375,
      "learning_rate": 8.908048215041368e-06,
      "loss": 0.8357,
      "step": 718730
    },
    {
      "epoch": 2.5190045106141374,
      "grad_norm": 2.890625,
      "learning_rate": 8.907399186377666e-06,
      "loss": 0.8316,
      "step": 718740
    },
    {
      "epoch": 2.519039558121033,
      "grad_norm": 2.625,
      "learning_rate": 8.906750157713966e-06,
      "loss": 0.8175,
      "step": 718750
    },
    {
      "epoch": 2.5190746056279285,
      "grad_norm": 3.296875,
      "learning_rate": 8.906101129050264e-06,
      "loss": 0.7735,
      "step": 718760
    },
    {
      "epoch": 2.5191096531348243,
      "grad_norm": 3.078125,
      "learning_rate": 8.905452100386562e-06,
      "loss": 0.8416,
      "step": 718770
    },
    {
      "epoch": 2.5191447006417196,
      "grad_norm": 3.15625,
      "learning_rate": 8.90480307172286e-06,
      "loss": 0.8224,
      "step": 718780
    },
    {
      "epoch": 2.5191797481486153,
      "grad_norm": 3.046875,
      "learning_rate": 8.904154043059158e-06,
      "loss": 0.7904,
      "step": 718790
    },
    {
      "epoch": 2.519214795655511,
      "grad_norm": 2.640625,
      "learning_rate": 8.903505014395456e-06,
      "loss": 0.7636,
      "step": 718800
    },
    {
      "epoch": 2.519249843162407,
      "grad_norm": 2.828125,
      "learning_rate": 8.902855985731754e-06,
      "loss": 0.8211,
      "step": 718810
    },
    {
      "epoch": 2.519284890669302,
      "grad_norm": 3.15625,
      "learning_rate": 8.902206957068054e-06,
      "loss": 0.829,
      "step": 718820
    },
    {
      "epoch": 2.519319938176198,
      "grad_norm": 2.90625,
      "learning_rate": 8.90155792840435e-06,
      "loss": 0.7719,
      "step": 718830
    },
    {
      "epoch": 2.5193549856830932,
      "grad_norm": 3.203125,
      "learning_rate": 8.900908899740648e-06,
      "loss": 0.8457,
      "step": 718840
    },
    {
      "epoch": 2.519390033189989,
      "grad_norm": 2.90625,
      "learning_rate": 8.900259871076946e-06,
      "loss": 0.8105,
      "step": 718850
    },
    {
      "epoch": 2.5194250806968848,
      "grad_norm": 3.015625,
      "learning_rate": 8.899610842413244e-06,
      "loss": 0.8128,
      "step": 718860
    },
    {
      "epoch": 2.51946012820378,
      "grad_norm": 3.40625,
      "learning_rate": 8.898961813749542e-06,
      "loss": 0.75,
      "step": 718870
    },
    {
      "epoch": 2.519495175710676,
      "grad_norm": 3.109375,
      "learning_rate": 8.898312785085842e-06,
      "loss": 0.9138,
      "step": 718880
    },
    {
      "epoch": 2.519530223217571,
      "grad_norm": 2.921875,
      "learning_rate": 8.89766375642214e-06,
      "loss": 0.767,
      "step": 718890
    },
    {
      "epoch": 2.519565270724467,
      "grad_norm": 2.984375,
      "learning_rate": 8.897014727758438e-06,
      "loss": 0.8308,
      "step": 718900
    },
    {
      "epoch": 2.5196003182313627,
      "grad_norm": 2.90625,
      "learning_rate": 8.896365699094736e-06,
      "loss": 0.8294,
      "step": 718910
    },
    {
      "epoch": 2.5196353657382584,
      "grad_norm": 2.828125,
      "learning_rate": 8.895716670431032e-06,
      "loss": 0.7748,
      "step": 718920
    },
    {
      "epoch": 2.5196704132451537,
      "grad_norm": 2.859375,
      "learning_rate": 8.89506764176733e-06,
      "loss": 0.7856,
      "step": 718930
    },
    {
      "epoch": 2.5197054607520495,
      "grad_norm": 2.59375,
      "learning_rate": 8.89441861310363e-06,
      "loss": 0.7666,
      "step": 718940
    },
    {
      "epoch": 2.519740508258945,
      "grad_norm": 3.109375,
      "learning_rate": 8.893769584439928e-06,
      "loss": 0.8001,
      "step": 718950
    },
    {
      "epoch": 2.5197755557658406,
      "grad_norm": 3.0,
      "learning_rate": 8.893120555776226e-06,
      "loss": 0.7759,
      "step": 718960
    },
    {
      "epoch": 2.5198106032727363,
      "grad_norm": 2.734375,
      "learning_rate": 8.892471527112524e-06,
      "loss": 0.8387,
      "step": 718970
    },
    {
      "epoch": 2.5198456507796316,
      "grad_norm": 2.5625,
      "learning_rate": 8.891822498448822e-06,
      "loss": 0.7975,
      "step": 718980
    },
    {
      "epoch": 2.5198806982865274,
      "grad_norm": 3.046875,
      "learning_rate": 8.89117346978512e-06,
      "loss": 0.7652,
      "step": 718990
    },
    {
      "epoch": 2.519915745793423,
      "grad_norm": 2.84375,
      "learning_rate": 8.890524441121418e-06,
      "loss": 0.8548,
      "step": 719000
    },
    {
      "epoch": 2.5199507933003185,
      "grad_norm": 2.9375,
      "learning_rate": 8.889875412457718e-06,
      "loss": 0.866,
      "step": 719010
    },
    {
      "epoch": 2.519985840807214,
      "grad_norm": 3.40625,
      "learning_rate": 8.889226383794014e-06,
      "loss": 0.7869,
      "step": 719020
    },
    {
      "epoch": 2.52002088831411,
      "grad_norm": 3.15625,
      "learning_rate": 8.888577355130312e-06,
      "loss": 0.82,
      "step": 719030
    },
    {
      "epoch": 2.5200559358210053,
      "grad_norm": 3.375,
      "learning_rate": 8.88792832646661e-06,
      "loss": 0.8394,
      "step": 719040
    },
    {
      "epoch": 2.520090983327901,
      "grad_norm": 3.28125,
      "learning_rate": 8.887279297802908e-06,
      "loss": 0.8182,
      "step": 719050
    },
    {
      "epoch": 2.5201260308347964,
      "grad_norm": 2.71875,
      "learning_rate": 8.886630269139208e-06,
      "loss": 0.769,
      "step": 719060
    },
    {
      "epoch": 2.520161078341692,
      "grad_norm": 2.5625,
      "learning_rate": 8.885981240475506e-06,
      "loss": 0.8024,
      "step": 719070
    },
    {
      "epoch": 2.520196125848588,
      "grad_norm": 3.0625,
      "learning_rate": 8.885332211811804e-06,
      "loss": 0.7591,
      "step": 719080
    },
    {
      "epoch": 2.5202311733554836,
      "grad_norm": 3.0,
      "learning_rate": 8.884683183148102e-06,
      "loss": 0.8345,
      "step": 719090
    },
    {
      "epoch": 2.520266220862379,
      "grad_norm": 2.96875,
      "learning_rate": 8.8840341544844e-06,
      "loss": 0.8141,
      "step": 719100
    },
    {
      "epoch": 2.5203012683692747,
      "grad_norm": 2.9375,
      "learning_rate": 8.883385125820696e-06,
      "loss": 0.888,
      "step": 719110
    },
    {
      "epoch": 2.52033631587617,
      "grad_norm": 2.9375,
      "learning_rate": 8.882736097156996e-06,
      "loss": 0.7349,
      "step": 719120
    },
    {
      "epoch": 2.5203713633830658,
      "grad_norm": 3.0,
      "learning_rate": 8.882087068493294e-06,
      "loss": 0.8294,
      "step": 719130
    },
    {
      "epoch": 2.5204064108899615,
      "grad_norm": 2.75,
      "learning_rate": 8.881438039829592e-06,
      "loss": 0.7052,
      "step": 719140
    },
    {
      "epoch": 2.520441458396857,
      "grad_norm": 3.0625,
      "learning_rate": 8.88078901116589e-06,
      "loss": 0.8423,
      "step": 719150
    },
    {
      "epoch": 2.5204765059037526,
      "grad_norm": 3.328125,
      "learning_rate": 8.880139982502188e-06,
      "loss": 0.853,
      "step": 719160
    },
    {
      "epoch": 2.520511553410648,
      "grad_norm": 2.859375,
      "learning_rate": 8.879490953838486e-06,
      "loss": 0.8217,
      "step": 719170
    },
    {
      "epoch": 2.5205466009175437,
      "grad_norm": 2.3125,
      "learning_rate": 8.878841925174784e-06,
      "loss": 0.7949,
      "step": 719180
    },
    {
      "epoch": 2.5205816484244394,
      "grad_norm": 3.09375,
      "learning_rate": 8.878192896511083e-06,
      "loss": 0.7977,
      "step": 719190
    },
    {
      "epoch": 2.520616695931335,
      "grad_norm": 3.125,
      "learning_rate": 8.87754386784738e-06,
      "loss": 0.8208,
      "step": 719200
    },
    {
      "epoch": 2.5206517434382305,
      "grad_norm": 2.96875,
      "learning_rate": 8.876894839183678e-06,
      "loss": 0.7883,
      "step": 719210
    },
    {
      "epoch": 2.5206867909451263,
      "grad_norm": 2.703125,
      "learning_rate": 8.876245810519976e-06,
      "loss": 0.8252,
      "step": 719220
    },
    {
      "epoch": 2.5207218384520216,
      "grad_norm": 2.671875,
      "learning_rate": 8.875596781856274e-06,
      "loss": 0.8603,
      "step": 719230
    },
    {
      "epoch": 2.5207568859589173,
      "grad_norm": 3.046875,
      "learning_rate": 8.874947753192572e-06,
      "loss": 0.7985,
      "step": 719240
    },
    {
      "epoch": 2.520791933465813,
      "grad_norm": 3.328125,
      "learning_rate": 8.874298724528871e-06,
      "loss": 0.8467,
      "step": 719250
    },
    {
      "epoch": 2.5208269809727084,
      "grad_norm": 3.0,
      "learning_rate": 8.87364969586517e-06,
      "loss": 0.9048,
      "step": 719260
    },
    {
      "epoch": 2.520862028479604,
      "grad_norm": 3.03125,
      "learning_rate": 8.873000667201467e-06,
      "loss": 0.7342,
      "step": 719270
    },
    {
      "epoch": 2.5208970759864995,
      "grad_norm": 3.484375,
      "learning_rate": 8.872351638537765e-06,
      "loss": 0.7847,
      "step": 719280
    },
    {
      "epoch": 2.5209321234933952,
      "grad_norm": 3.0,
      "learning_rate": 8.871702609874063e-06,
      "loss": 0.7834,
      "step": 719290
    },
    {
      "epoch": 2.520967171000291,
      "grad_norm": 3.078125,
      "learning_rate": 8.871053581210361e-06,
      "loss": 0.8469,
      "step": 719300
    },
    {
      "epoch": 2.5210022185071868,
      "grad_norm": 2.75,
      "learning_rate": 8.87040455254666e-06,
      "loss": 0.8066,
      "step": 719310
    },
    {
      "epoch": 2.521037266014082,
      "grad_norm": 3.1875,
      "learning_rate": 8.869755523882957e-06,
      "loss": 0.7544,
      "step": 719320
    },
    {
      "epoch": 2.521072313520978,
      "grad_norm": 3.1875,
      "learning_rate": 8.869106495219255e-06,
      "loss": 0.7914,
      "step": 719330
    },
    {
      "epoch": 2.521107361027873,
      "grad_norm": 3.03125,
      "learning_rate": 8.868457466555553e-06,
      "loss": 0.7862,
      "step": 719340
    },
    {
      "epoch": 2.521142408534769,
      "grad_norm": 3.234375,
      "learning_rate": 8.867808437891851e-06,
      "loss": 0.7389,
      "step": 719350
    },
    {
      "epoch": 2.5211774560416647,
      "grad_norm": 2.734375,
      "learning_rate": 8.86715940922815e-06,
      "loss": 0.8258,
      "step": 719360
    },
    {
      "epoch": 2.52121250354856,
      "grad_norm": 2.609375,
      "learning_rate": 8.866510380564449e-06,
      "loss": 0.8419,
      "step": 719370
    },
    {
      "epoch": 2.5212475510554557,
      "grad_norm": 2.40625,
      "learning_rate": 8.865861351900747e-06,
      "loss": 0.7692,
      "step": 719380
    },
    {
      "epoch": 2.521282598562351,
      "grad_norm": 2.859375,
      "learning_rate": 8.865212323237043e-06,
      "loss": 0.876,
      "step": 719390
    },
    {
      "epoch": 2.521317646069247,
      "grad_norm": 2.6875,
      "learning_rate": 8.864563294573341e-06,
      "loss": 0.8179,
      "step": 719400
    },
    {
      "epoch": 2.5213526935761426,
      "grad_norm": 3.5625,
      "learning_rate": 8.86391426590964e-06,
      "loss": 0.8276,
      "step": 719410
    },
    {
      "epoch": 2.5213877410830383,
      "grad_norm": 2.96875,
      "learning_rate": 8.863265237245937e-06,
      "loss": 0.8081,
      "step": 719420
    },
    {
      "epoch": 2.5214227885899336,
      "grad_norm": 2.828125,
      "learning_rate": 8.862616208582237e-06,
      "loss": 0.7908,
      "step": 719430
    },
    {
      "epoch": 2.5214578360968294,
      "grad_norm": 2.921875,
      "learning_rate": 8.861967179918535e-06,
      "loss": 0.7632,
      "step": 719440
    },
    {
      "epoch": 2.5214928836037247,
      "grad_norm": 2.5,
      "learning_rate": 8.861318151254833e-06,
      "loss": 0.7852,
      "step": 719450
    },
    {
      "epoch": 2.5215279311106205,
      "grad_norm": 2.921875,
      "learning_rate": 8.860669122591131e-06,
      "loss": 0.7275,
      "step": 719460
    },
    {
      "epoch": 2.521562978617516,
      "grad_norm": 2.859375,
      "learning_rate": 8.860020093927429e-06,
      "loss": 0.8094,
      "step": 719470
    },
    {
      "epoch": 2.5215980261244115,
      "grad_norm": 2.796875,
      "learning_rate": 8.859371065263727e-06,
      "loss": 0.7732,
      "step": 719480
    },
    {
      "epoch": 2.5216330736313073,
      "grad_norm": 2.703125,
      "learning_rate": 8.858722036600025e-06,
      "loss": 0.8046,
      "step": 719490
    },
    {
      "epoch": 2.5216681211382026,
      "grad_norm": 2.71875,
      "learning_rate": 8.858073007936323e-06,
      "loss": 0.8573,
      "step": 719500
    },
    {
      "epoch": 2.5217031686450984,
      "grad_norm": 2.703125,
      "learning_rate": 8.857423979272621e-06,
      "loss": 0.8808,
      "step": 719510
    },
    {
      "epoch": 2.521738216151994,
      "grad_norm": 2.5625,
      "learning_rate": 8.856774950608919e-06,
      "loss": 0.8007,
      "step": 719520
    },
    {
      "epoch": 2.52177326365889,
      "grad_norm": 2.96875,
      "learning_rate": 8.856125921945217e-06,
      "loss": 0.949,
      "step": 719530
    },
    {
      "epoch": 2.521808311165785,
      "grad_norm": 3.234375,
      "learning_rate": 8.855476893281515e-06,
      "loss": 0.8082,
      "step": 719540
    },
    {
      "epoch": 2.521843358672681,
      "grad_norm": 2.578125,
      "learning_rate": 8.854827864617813e-06,
      "loss": 0.7892,
      "step": 719550
    },
    {
      "epoch": 2.5218784061795763,
      "grad_norm": 2.859375,
      "learning_rate": 8.854178835954113e-06,
      "loss": 0.7438,
      "step": 719560
    },
    {
      "epoch": 2.521913453686472,
      "grad_norm": 2.78125,
      "learning_rate": 8.85352980729041e-06,
      "loss": 0.8144,
      "step": 719570
    },
    {
      "epoch": 2.5219485011933678,
      "grad_norm": 2.421875,
      "learning_rate": 8.852880778626707e-06,
      "loss": 0.8547,
      "step": 719580
    },
    {
      "epoch": 2.521983548700263,
      "grad_norm": 3.53125,
      "learning_rate": 8.852231749963005e-06,
      "loss": 0.8013,
      "step": 719590
    },
    {
      "epoch": 2.522018596207159,
      "grad_norm": 3.53125,
      "learning_rate": 8.851582721299303e-06,
      "loss": 0.8384,
      "step": 719600
    },
    {
      "epoch": 2.522053643714054,
      "grad_norm": 2.796875,
      "learning_rate": 8.850933692635603e-06,
      "loss": 0.7999,
      "step": 719610
    },
    {
      "epoch": 2.52208869122095,
      "grad_norm": 3.0,
      "learning_rate": 8.8502846639719e-06,
      "loss": 0.7691,
      "step": 719620
    },
    {
      "epoch": 2.5221237387278457,
      "grad_norm": 2.984375,
      "learning_rate": 8.849635635308199e-06,
      "loss": 0.7176,
      "step": 719630
    },
    {
      "epoch": 2.5221587862347414,
      "grad_norm": 3.359375,
      "learning_rate": 8.848986606644497e-06,
      "loss": 0.7131,
      "step": 719640
    },
    {
      "epoch": 2.5221938337416367,
      "grad_norm": 2.953125,
      "learning_rate": 8.848337577980795e-06,
      "loss": 0.7652,
      "step": 719650
    },
    {
      "epoch": 2.5222288812485325,
      "grad_norm": 2.71875,
      "learning_rate": 8.847688549317093e-06,
      "loss": 0.7584,
      "step": 719660
    },
    {
      "epoch": 2.522263928755428,
      "grad_norm": 3.140625,
      "learning_rate": 8.84703952065339e-06,
      "loss": 0.8304,
      "step": 719670
    },
    {
      "epoch": 2.5222989762623236,
      "grad_norm": 2.875,
      "learning_rate": 8.846390491989689e-06,
      "loss": 0.8243,
      "step": 719680
    },
    {
      "epoch": 2.5223340237692193,
      "grad_norm": 3.03125,
      "learning_rate": 8.845741463325987e-06,
      "loss": 0.765,
      "step": 719690
    },
    {
      "epoch": 2.5223690712761146,
      "grad_norm": 2.96875,
      "learning_rate": 8.845092434662285e-06,
      "loss": 0.7936,
      "step": 719700
    },
    {
      "epoch": 2.5224041187830104,
      "grad_norm": 3.21875,
      "learning_rate": 8.844443405998583e-06,
      "loss": 0.7528,
      "step": 719710
    },
    {
      "epoch": 2.5224391662899057,
      "grad_norm": 3.046875,
      "learning_rate": 8.84379437733488e-06,
      "loss": 0.8329,
      "step": 719720
    },
    {
      "epoch": 2.5224742137968015,
      "grad_norm": 2.8125,
      "learning_rate": 8.843145348671179e-06,
      "loss": 0.8218,
      "step": 719730
    },
    {
      "epoch": 2.5225092613036972,
      "grad_norm": 2.765625,
      "learning_rate": 8.842496320007478e-06,
      "loss": 0.7633,
      "step": 719740
    },
    {
      "epoch": 2.522544308810593,
      "grad_norm": 3.125,
      "learning_rate": 8.841847291343776e-06,
      "loss": 0.8018,
      "step": 719750
    },
    {
      "epoch": 2.5225793563174883,
      "grad_norm": 3.09375,
      "learning_rate": 8.841198262680074e-06,
      "loss": 0.8197,
      "step": 719760
    },
    {
      "epoch": 2.522614403824384,
      "grad_norm": 2.921875,
      "learning_rate": 8.84054923401637e-06,
      "loss": 0.775,
      "step": 719770
    },
    {
      "epoch": 2.5226494513312794,
      "grad_norm": 2.8125,
      "learning_rate": 8.839900205352669e-06,
      "loss": 0.8221,
      "step": 719780
    },
    {
      "epoch": 2.522684498838175,
      "grad_norm": 3.0,
      "learning_rate": 8.839251176688967e-06,
      "loss": 0.8343,
      "step": 719790
    },
    {
      "epoch": 2.522719546345071,
      "grad_norm": 2.765625,
      "learning_rate": 8.838602148025266e-06,
      "loss": 0.783,
      "step": 719800
    },
    {
      "epoch": 2.522754593851966,
      "grad_norm": 2.984375,
      "learning_rate": 8.837953119361564e-06,
      "loss": 0.7762,
      "step": 719810
    },
    {
      "epoch": 2.522789641358862,
      "grad_norm": 3.265625,
      "learning_rate": 8.837304090697862e-06,
      "loss": 0.7018,
      "step": 719820
    },
    {
      "epoch": 2.5228246888657573,
      "grad_norm": 2.671875,
      "learning_rate": 8.83665506203416e-06,
      "loss": 0.7341,
      "step": 719830
    },
    {
      "epoch": 2.522859736372653,
      "grad_norm": 2.671875,
      "learning_rate": 8.836006033370458e-06,
      "loss": 0.7831,
      "step": 719840
    },
    {
      "epoch": 2.522894783879549,
      "grad_norm": 3.09375,
      "learning_rate": 8.835357004706756e-06,
      "loss": 0.7537,
      "step": 719850
    },
    {
      "epoch": 2.5229298313864446,
      "grad_norm": 2.8125,
      "learning_rate": 8.834707976043054e-06,
      "loss": 0.6943,
      "step": 719860
    },
    {
      "epoch": 2.52296487889334,
      "grad_norm": 2.625,
      "learning_rate": 8.834058947379352e-06,
      "loss": 0.787,
      "step": 719870
    },
    {
      "epoch": 2.5229999264002356,
      "grad_norm": 2.78125,
      "learning_rate": 8.83340991871565e-06,
      "loss": 0.7932,
      "step": 719880
    },
    {
      "epoch": 2.523034973907131,
      "grad_norm": 3.140625,
      "learning_rate": 8.832760890051948e-06,
      "loss": 0.8267,
      "step": 719890
    },
    {
      "epoch": 2.5230700214140267,
      "grad_norm": 2.734375,
      "learning_rate": 8.832111861388246e-06,
      "loss": 0.732,
      "step": 719900
    },
    {
      "epoch": 2.5231050689209225,
      "grad_norm": 2.90625,
      "learning_rate": 8.831462832724544e-06,
      "loss": 0.8316,
      "step": 719910
    },
    {
      "epoch": 2.5231401164278178,
      "grad_norm": 2.984375,
      "learning_rate": 8.830813804060844e-06,
      "loss": 0.7712,
      "step": 719920
    },
    {
      "epoch": 2.5231751639347135,
      "grad_norm": 2.875,
      "learning_rate": 8.830164775397142e-06,
      "loss": 0.7612,
      "step": 719930
    },
    {
      "epoch": 2.523210211441609,
      "grad_norm": 2.890625,
      "learning_rate": 8.82951574673344e-06,
      "loss": 0.7762,
      "step": 719940
    },
    {
      "epoch": 2.5232452589485046,
      "grad_norm": 2.8125,
      "learning_rate": 8.828866718069738e-06,
      "loss": 0.8628,
      "step": 719950
    },
    {
      "epoch": 2.5232803064554004,
      "grad_norm": 3.1875,
      "learning_rate": 8.828217689406034e-06,
      "loss": 0.7946,
      "step": 719960
    },
    {
      "epoch": 2.523315353962296,
      "grad_norm": 3.453125,
      "learning_rate": 8.827568660742332e-06,
      "loss": 0.8373,
      "step": 719970
    },
    {
      "epoch": 2.5233504014691914,
      "grad_norm": 2.78125,
      "learning_rate": 8.826919632078632e-06,
      "loss": 0.7962,
      "step": 719980
    },
    {
      "epoch": 2.523385448976087,
      "grad_norm": 2.5625,
      "learning_rate": 8.82627060341493e-06,
      "loss": 0.7969,
      "step": 719990
    },
    {
      "epoch": 2.5234204964829825,
      "grad_norm": 3.0,
      "learning_rate": 8.825621574751228e-06,
      "loss": 0.8072,
      "step": 720000
    },
    {
      "epoch": 2.5234204964829825,
      "eval_loss": 0.7563644051551819,
      "eval_runtime": 560.9953,
      "eval_samples_per_second": 678.145,
      "eval_steps_per_second": 56.512,
      "step": 720000
    },
    {
      "epoch": 2.5234555439898783,
      "grad_norm": 2.6875,
      "learning_rate": 8.824972546087526e-06,
      "loss": 0.8315,
      "step": 720010
    },
    {
      "epoch": 2.523490591496774,
      "grad_norm": 2.5,
      "learning_rate": 8.824323517423824e-06,
      "loss": 0.865,
      "step": 720020
    },
    {
      "epoch": 2.5235256390036693,
      "grad_norm": 2.796875,
      "learning_rate": 8.823674488760122e-06,
      "loss": 0.8142,
      "step": 720030
    },
    {
      "epoch": 2.523560686510565,
      "grad_norm": 3.34375,
      "learning_rate": 8.82302546009642e-06,
      "loss": 0.8236,
      "step": 720040
    },
    {
      "epoch": 2.5235957340174604,
      "grad_norm": 2.765625,
      "learning_rate": 8.822376431432718e-06,
      "loss": 0.9074,
      "step": 720050
    },
    {
      "epoch": 2.523630781524356,
      "grad_norm": 3.265625,
      "learning_rate": 8.821727402769016e-06,
      "loss": 0.8166,
      "step": 720060
    },
    {
      "epoch": 2.523665829031252,
      "grad_norm": 3.234375,
      "learning_rate": 8.821078374105314e-06,
      "loss": 0.8993,
      "step": 720070
    },
    {
      "epoch": 2.5237008765381477,
      "grad_norm": 2.96875,
      "learning_rate": 8.820429345441612e-06,
      "loss": 0.7783,
      "step": 720080
    },
    {
      "epoch": 2.523735924045043,
      "grad_norm": 3.359375,
      "learning_rate": 8.81978031677791e-06,
      "loss": 0.7416,
      "step": 720090
    },
    {
      "epoch": 2.5237709715519387,
      "grad_norm": 2.921875,
      "learning_rate": 8.819131288114208e-06,
      "loss": 0.8247,
      "step": 720100
    },
    {
      "epoch": 2.523806019058834,
      "grad_norm": 3.09375,
      "learning_rate": 8.818482259450508e-06,
      "loss": 0.7721,
      "step": 720110
    },
    {
      "epoch": 2.52384106656573,
      "grad_norm": 3.09375,
      "learning_rate": 8.817833230786806e-06,
      "loss": 0.8103,
      "step": 720120
    },
    {
      "epoch": 2.5238761140726256,
      "grad_norm": 2.875,
      "learning_rate": 8.817184202123104e-06,
      "loss": 0.7967,
      "step": 720130
    },
    {
      "epoch": 2.523911161579521,
      "grad_norm": 3.109375,
      "learning_rate": 8.816535173459402e-06,
      "loss": 0.8571,
      "step": 720140
    },
    {
      "epoch": 2.5239462090864166,
      "grad_norm": 2.625,
      "learning_rate": 8.815886144795698e-06,
      "loss": 0.8369,
      "step": 720150
    },
    {
      "epoch": 2.523981256593312,
      "grad_norm": 3.203125,
      "learning_rate": 8.815237116131998e-06,
      "loss": 0.8088,
      "step": 720160
    },
    {
      "epoch": 2.5240163041002077,
      "grad_norm": 2.84375,
      "learning_rate": 8.814588087468296e-06,
      "loss": 0.8367,
      "step": 720170
    },
    {
      "epoch": 2.5240513516071035,
      "grad_norm": 2.734375,
      "learning_rate": 8.813939058804594e-06,
      "loss": 0.8743,
      "step": 720180
    },
    {
      "epoch": 2.5240863991139992,
      "grad_norm": 2.8125,
      "learning_rate": 8.813290030140892e-06,
      "loss": 0.798,
      "step": 720190
    },
    {
      "epoch": 2.5241214466208945,
      "grad_norm": 3.0,
      "learning_rate": 8.81264100147719e-06,
      "loss": 0.8821,
      "step": 720200
    },
    {
      "epoch": 2.5241564941277903,
      "grad_norm": 3.078125,
      "learning_rate": 8.811991972813488e-06,
      "loss": 0.7505,
      "step": 720210
    },
    {
      "epoch": 2.5241915416346856,
      "grad_norm": 3.265625,
      "learning_rate": 8.811342944149786e-06,
      "loss": 0.8337,
      "step": 720220
    },
    {
      "epoch": 2.5242265891415814,
      "grad_norm": 2.421875,
      "learning_rate": 8.810693915486085e-06,
      "loss": 0.7903,
      "step": 720230
    },
    {
      "epoch": 2.524261636648477,
      "grad_norm": 2.75,
      "learning_rate": 8.810044886822382e-06,
      "loss": 0.8002,
      "step": 720240
    },
    {
      "epoch": 2.5242966841553724,
      "grad_norm": 2.765625,
      "learning_rate": 8.80939585815868e-06,
      "loss": 0.7428,
      "step": 720250
    },
    {
      "epoch": 2.524331731662268,
      "grad_norm": 2.578125,
      "learning_rate": 8.808746829494978e-06,
      "loss": 0.7724,
      "step": 720260
    },
    {
      "epoch": 2.524366779169164,
      "grad_norm": 2.484375,
      "learning_rate": 8.808097800831276e-06,
      "loss": 0.8012,
      "step": 720270
    },
    {
      "epoch": 2.5244018266760593,
      "grad_norm": 2.65625,
      "learning_rate": 8.807448772167574e-06,
      "loss": 0.8277,
      "step": 720280
    },
    {
      "epoch": 2.524436874182955,
      "grad_norm": 3.296875,
      "learning_rate": 8.806799743503873e-06,
      "loss": 0.7649,
      "step": 720290
    },
    {
      "epoch": 2.524471921689851,
      "grad_norm": 3.15625,
      "learning_rate": 8.806150714840171e-06,
      "loss": 0.8114,
      "step": 720300
    },
    {
      "epoch": 2.524506969196746,
      "grad_norm": 2.9375,
      "learning_rate": 8.80550168617647e-06,
      "loss": 0.8379,
      "step": 720310
    },
    {
      "epoch": 2.524542016703642,
      "grad_norm": 2.78125,
      "learning_rate": 8.804852657512767e-06,
      "loss": 0.7913,
      "step": 720320
    },
    {
      "epoch": 2.524577064210537,
      "grad_norm": 2.5625,
      "learning_rate": 8.804203628849064e-06,
      "loss": 0.7741,
      "step": 720330
    },
    {
      "epoch": 2.524612111717433,
      "grad_norm": 3.046875,
      "learning_rate": 8.803554600185362e-06,
      "loss": 0.7153,
      "step": 720340
    },
    {
      "epoch": 2.5246471592243287,
      "grad_norm": 3.09375,
      "learning_rate": 8.802905571521661e-06,
      "loss": 0.7709,
      "step": 720350
    },
    {
      "epoch": 2.524682206731224,
      "grad_norm": 2.734375,
      "learning_rate": 8.80225654285796e-06,
      "loss": 0.7738,
      "step": 720360
    },
    {
      "epoch": 2.5247172542381198,
      "grad_norm": 2.734375,
      "learning_rate": 8.801607514194257e-06,
      "loss": 0.8474,
      "step": 720370
    },
    {
      "epoch": 2.5247523017450155,
      "grad_norm": 2.515625,
      "learning_rate": 8.800958485530555e-06,
      "loss": 0.8575,
      "step": 720380
    },
    {
      "epoch": 2.524787349251911,
      "grad_norm": 3.015625,
      "learning_rate": 8.800309456866853e-06,
      "loss": 0.823,
      "step": 720390
    },
    {
      "epoch": 2.5248223967588066,
      "grad_norm": 3.21875,
      "learning_rate": 8.799660428203151e-06,
      "loss": 0.8852,
      "step": 720400
    },
    {
      "epoch": 2.5248574442657024,
      "grad_norm": 3.1875,
      "learning_rate": 8.79901139953945e-06,
      "loss": 0.8709,
      "step": 720410
    },
    {
      "epoch": 2.5248924917725977,
      "grad_norm": 2.96875,
      "learning_rate": 8.798362370875749e-06,
      "loss": 0.7427,
      "step": 720420
    },
    {
      "epoch": 2.5249275392794934,
      "grad_norm": 3.09375,
      "learning_rate": 8.797713342212045e-06,
      "loss": 0.7441,
      "step": 720430
    },
    {
      "epoch": 2.5249625867863887,
      "grad_norm": 2.78125,
      "learning_rate": 8.797064313548343e-06,
      "loss": 0.8459,
      "step": 720440
    },
    {
      "epoch": 2.5249976342932845,
      "grad_norm": 3.109375,
      "learning_rate": 8.796415284884641e-06,
      "loss": 0.9127,
      "step": 720450
    },
    {
      "epoch": 2.5250326818001803,
      "grad_norm": 2.953125,
      "learning_rate": 8.79576625622094e-06,
      "loss": 0.8282,
      "step": 720460
    },
    {
      "epoch": 2.525067729307076,
      "grad_norm": 2.671875,
      "learning_rate": 8.795117227557239e-06,
      "loss": 0.7679,
      "step": 720470
    },
    {
      "epoch": 2.5251027768139713,
      "grad_norm": 2.328125,
      "learning_rate": 8.794468198893537e-06,
      "loss": 0.8127,
      "step": 720480
    },
    {
      "epoch": 2.525137824320867,
      "grad_norm": 3.015625,
      "learning_rate": 8.793819170229835e-06,
      "loss": 0.8927,
      "step": 720490
    },
    {
      "epoch": 2.5251728718277624,
      "grad_norm": 3.09375,
      "learning_rate": 8.793170141566133e-06,
      "loss": 0.8303,
      "step": 720500
    },
    {
      "epoch": 2.525207919334658,
      "grad_norm": 2.546875,
      "learning_rate": 8.792521112902431e-06,
      "loss": 0.7816,
      "step": 720510
    },
    {
      "epoch": 2.525242966841554,
      "grad_norm": 2.84375,
      "learning_rate": 8.791872084238727e-06,
      "loss": 0.796,
      "step": 720520
    },
    {
      "epoch": 2.5252780143484492,
      "grad_norm": 2.921875,
      "learning_rate": 8.791223055575027e-06,
      "loss": 0.782,
      "step": 720530
    },
    {
      "epoch": 2.525313061855345,
      "grad_norm": 2.546875,
      "learning_rate": 8.790574026911325e-06,
      "loss": 0.8487,
      "step": 720540
    },
    {
      "epoch": 2.5253481093622403,
      "grad_norm": 2.625,
      "learning_rate": 8.789924998247623e-06,
      "loss": 0.7798,
      "step": 720550
    },
    {
      "epoch": 2.525383156869136,
      "grad_norm": 3.0625,
      "learning_rate": 8.789275969583921e-06,
      "loss": 0.8435,
      "step": 720560
    },
    {
      "epoch": 2.525418204376032,
      "grad_norm": 2.890625,
      "learning_rate": 8.788626940920219e-06,
      "loss": 0.8393,
      "step": 720570
    },
    {
      "epoch": 2.5254532518829276,
      "grad_norm": 2.953125,
      "learning_rate": 8.787977912256517e-06,
      "loss": 0.8717,
      "step": 720580
    },
    {
      "epoch": 2.525488299389823,
      "grad_norm": 3.203125,
      "learning_rate": 8.787328883592815e-06,
      "loss": 0.7375,
      "step": 720590
    },
    {
      "epoch": 2.5255233468967186,
      "grad_norm": 2.625,
      "learning_rate": 8.786679854929115e-06,
      "loss": 0.6805,
      "step": 720600
    },
    {
      "epoch": 2.525558394403614,
      "grad_norm": 2.6875,
      "learning_rate": 8.786030826265413e-06,
      "loss": 0.8143,
      "step": 720610
    },
    {
      "epoch": 2.5255934419105097,
      "grad_norm": 2.84375,
      "learning_rate": 8.785381797601709e-06,
      "loss": 0.7884,
      "step": 720620
    },
    {
      "epoch": 2.5256284894174055,
      "grad_norm": 2.796875,
      "learning_rate": 8.784732768938007e-06,
      "loss": 0.7642,
      "step": 720630
    },
    {
      "epoch": 2.525663536924301,
      "grad_norm": 2.75,
      "learning_rate": 8.784083740274305e-06,
      "loss": 0.9667,
      "step": 720640
    },
    {
      "epoch": 2.5256985844311965,
      "grad_norm": 2.671875,
      "learning_rate": 8.783434711610603e-06,
      "loss": 0.791,
      "step": 720650
    },
    {
      "epoch": 2.525733631938092,
      "grad_norm": 2.8125,
      "learning_rate": 8.782785682946903e-06,
      "loss": 0.7838,
      "step": 720660
    },
    {
      "epoch": 2.5257686794449876,
      "grad_norm": 3.046875,
      "learning_rate": 8.7821366542832e-06,
      "loss": 0.8336,
      "step": 720670
    },
    {
      "epoch": 2.5258037269518834,
      "grad_norm": 2.765625,
      "learning_rate": 8.781487625619499e-06,
      "loss": 0.8261,
      "step": 720680
    },
    {
      "epoch": 2.525838774458779,
      "grad_norm": 2.515625,
      "learning_rate": 8.780838596955797e-06,
      "loss": 0.7604,
      "step": 720690
    },
    {
      "epoch": 2.5258738219656744,
      "grad_norm": 2.703125,
      "learning_rate": 8.780189568292095e-06,
      "loss": 0.7976,
      "step": 720700
    },
    {
      "epoch": 2.52590886947257,
      "grad_norm": 2.9375,
      "learning_rate": 8.779540539628393e-06,
      "loss": 0.8693,
      "step": 720710
    },
    {
      "epoch": 2.5259439169794655,
      "grad_norm": 3.1875,
      "learning_rate": 8.77889151096469e-06,
      "loss": 0.8016,
      "step": 720720
    },
    {
      "epoch": 2.5259789644863613,
      "grad_norm": 2.703125,
      "learning_rate": 8.778242482300989e-06,
      "loss": 0.8183,
      "step": 720730
    },
    {
      "epoch": 2.526014011993257,
      "grad_norm": 3.265625,
      "learning_rate": 8.777593453637287e-06,
      "loss": 0.8775,
      "step": 720740
    },
    {
      "epoch": 2.5260490595001523,
      "grad_norm": 2.859375,
      "learning_rate": 8.776944424973585e-06,
      "loss": 0.8313,
      "step": 720750
    },
    {
      "epoch": 2.526084107007048,
      "grad_norm": 3.203125,
      "learning_rate": 8.776295396309883e-06,
      "loss": 0.8143,
      "step": 720760
    },
    {
      "epoch": 2.5261191545139434,
      "grad_norm": 2.75,
      "learning_rate": 8.77564636764618e-06,
      "loss": 0.7995,
      "step": 720770
    },
    {
      "epoch": 2.526154202020839,
      "grad_norm": 2.6875,
      "learning_rate": 8.77499733898248e-06,
      "loss": 0.7743,
      "step": 720780
    },
    {
      "epoch": 2.526189249527735,
      "grad_norm": 2.921875,
      "learning_rate": 8.774348310318778e-06,
      "loss": 0.8303,
      "step": 720790
    },
    {
      "epoch": 2.5262242970346307,
      "grad_norm": 2.765625,
      "learning_rate": 8.773699281655075e-06,
      "loss": 0.8545,
      "step": 720800
    },
    {
      "epoch": 2.526259344541526,
      "grad_norm": 3.09375,
      "learning_rate": 8.773050252991373e-06,
      "loss": 0.8397,
      "step": 720810
    },
    {
      "epoch": 2.5262943920484218,
      "grad_norm": 3.203125,
      "learning_rate": 8.77240122432767e-06,
      "loss": 0.7664,
      "step": 720820
    },
    {
      "epoch": 2.526329439555317,
      "grad_norm": 2.75,
      "learning_rate": 8.771752195663969e-06,
      "loss": 0.7814,
      "step": 720830
    },
    {
      "epoch": 2.526364487062213,
      "grad_norm": 3.296875,
      "learning_rate": 8.771103167000268e-06,
      "loss": 0.7707,
      "step": 720840
    },
    {
      "epoch": 2.5263995345691086,
      "grad_norm": 3.140625,
      "learning_rate": 8.770454138336566e-06,
      "loss": 0.8611,
      "step": 720850
    },
    {
      "epoch": 2.526434582076004,
      "grad_norm": 2.796875,
      "learning_rate": 8.769805109672864e-06,
      "loss": 0.8025,
      "step": 720860
    },
    {
      "epoch": 2.5264696295828997,
      "grad_norm": 4.03125,
      "learning_rate": 8.769156081009162e-06,
      "loss": 0.812,
      "step": 720870
    },
    {
      "epoch": 2.526504677089795,
      "grad_norm": 3.21875,
      "learning_rate": 8.76850705234546e-06,
      "loss": 0.7997,
      "step": 720880
    },
    {
      "epoch": 2.5265397245966907,
      "grad_norm": 3.09375,
      "learning_rate": 8.767858023681758e-06,
      "loss": 0.7702,
      "step": 720890
    },
    {
      "epoch": 2.5265747721035865,
      "grad_norm": 2.703125,
      "learning_rate": 8.767208995018056e-06,
      "loss": 0.7587,
      "step": 720900
    },
    {
      "epoch": 2.5266098196104823,
      "grad_norm": 2.828125,
      "learning_rate": 8.766559966354354e-06,
      "loss": 0.7425,
      "step": 720910
    },
    {
      "epoch": 2.5266448671173776,
      "grad_norm": 2.875,
      "learning_rate": 8.765910937690652e-06,
      "loss": 0.8589,
      "step": 720920
    },
    {
      "epoch": 2.5266799146242733,
      "grad_norm": 2.578125,
      "learning_rate": 8.76526190902695e-06,
      "loss": 0.863,
      "step": 720930
    },
    {
      "epoch": 2.5267149621311686,
      "grad_norm": 3.34375,
      "learning_rate": 8.764612880363248e-06,
      "loss": 0.8388,
      "step": 720940
    },
    {
      "epoch": 2.5267500096380644,
      "grad_norm": 2.875,
      "learning_rate": 8.763963851699546e-06,
      "loss": 0.7482,
      "step": 720950
    },
    {
      "epoch": 2.52678505714496,
      "grad_norm": 3.453125,
      "learning_rate": 8.763314823035846e-06,
      "loss": 0.8556,
      "step": 720960
    },
    {
      "epoch": 2.5268201046518555,
      "grad_norm": 3.390625,
      "learning_rate": 8.762665794372144e-06,
      "loss": 0.8403,
      "step": 720970
    },
    {
      "epoch": 2.5268551521587512,
      "grad_norm": 3.3125,
      "learning_rate": 8.762016765708442e-06,
      "loss": 0.8001,
      "step": 720980
    },
    {
      "epoch": 2.5268901996656465,
      "grad_norm": 2.515625,
      "learning_rate": 8.761367737044738e-06,
      "loss": 0.7473,
      "step": 720990
    },
    {
      "epoch": 2.5269252471725423,
      "grad_norm": 2.40625,
      "learning_rate": 8.760718708381036e-06,
      "loss": 0.7458,
      "step": 721000
    },
    {
      "epoch": 2.526960294679438,
      "grad_norm": 2.953125,
      "learning_rate": 8.760069679717334e-06,
      "loss": 0.7646,
      "step": 721010
    },
    {
      "epoch": 2.526995342186334,
      "grad_norm": 2.78125,
      "learning_rate": 8.759420651053634e-06,
      "loss": 0.784,
      "step": 721020
    },
    {
      "epoch": 2.527030389693229,
      "grad_norm": 3.078125,
      "learning_rate": 8.758771622389932e-06,
      "loss": 0.8596,
      "step": 721030
    },
    {
      "epoch": 2.527065437200125,
      "grad_norm": 3.0625,
      "learning_rate": 8.75812259372623e-06,
      "loss": 0.7888,
      "step": 721040
    },
    {
      "epoch": 2.52710048470702,
      "grad_norm": 3.328125,
      "learning_rate": 8.757473565062528e-06,
      "loss": 0.8365,
      "step": 721050
    },
    {
      "epoch": 2.527135532213916,
      "grad_norm": 2.375,
      "learning_rate": 8.756824536398826e-06,
      "loss": 0.7937,
      "step": 721060
    },
    {
      "epoch": 2.5271705797208117,
      "grad_norm": 2.875,
      "learning_rate": 8.756175507735124e-06,
      "loss": 0.8595,
      "step": 721070
    },
    {
      "epoch": 2.527205627227707,
      "grad_norm": 2.3125,
      "learning_rate": 8.755526479071422e-06,
      "loss": 0.7932,
      "step": 721080
    },
    {
      "epoch": 2.527240674734603,
      "grad_norm": 3.28125,
      "learning_rate": 8.75487745040772e-06,
      "loss": 0.8694,
      "step": 721090
    },
    {
      "epoch": 2.527275722241498,
      "grad_norm": 2.9375,
      "learning_rate": 8.754228421744018e-06,
      "loss": 0.7821,
      "step": 721100
    },
    {
      "epoch": 2.527310769748394,
      "grad_norm": 2.75,
      "learning_rate": 8.753579393080316e-06,
      "loss": 0.776,
      "step": 721110
    },
    {
      "epoch": 2.5273458172552896,
      "grad_norm": 2.921875,
      "learning_rate": 8.752930364416614e-06,
      "loss": 0.8421,
      "step": 721120
    },
    {
      "epoch": 2.5273808647621854,
      "grad_norm": 2.828125,
      "learning_rate": 8.752281335752912e-06,
      "loss": 0.8164,
      "step": 721130
    },
    {
      "epoch": 2.5274159122690807,
      "grad_norm": 2.875,
      "learning_rate": 8.75163230708921e-06,
      "loss": 0.7353,
      "step": 721140
    },
    {
      "epoch": 2.5274509597759764,
      "grad_norm": 3.265625,
      "learning_rate": 8.75098327842551e-06,
      "loss": 0.8409,
      "step": 721150
    },
    {
      "epoch": 2.5274860072828718,
      "grad_norm": 2.9375,
      "learning_rate": 8.750334249761808e-06,
      "loss": 0.833,
      "step": 721160
    },
    {
      "epoch": 2.5275210547897675,
      "grad_norm": 2.453125,
      "learning_rate": 8.749685221098106e-06,
      "loss": 0.7467,
      "step": 721170
    },
    {
      "epoch": 2.5275561022966633,
      "grad_norm": 2.859375,
      "learning_rate": 8.749036192434402e-06,
      "loss": 0.7655,
      "step": 721180
    },
    {
      "epoch": 2.5275911498035586,
      "grad_norm": 2.578125,
      "learning_rate": 8.7483871637707e-06,
      "loss": 0.843,
      "step": 721190
    },
    {
      "epoch": 2.5276261973104543,
      "grad_norm": 2.734375,
      "learning_rate": 8.747738135107e-06,
      "loss": 0.7696,
      "step": 721200
    },
    {
      "epoch": 2.5276612448173497,
      "grad_norm": 3.28125,
      "learning_rate": 8.747089106443298e-06,
      "loss": 0.8075,
      "step": 721210
    },
    {
      "epoch": 2.5276962923242454,
      "grad_norm": 3.265625,
      "learning_rate": 8.746440077779596e-06,
      "loss": 0.8584,
      "step": 721220
    },
    {
      "epoch": 2.527731339831141,
      "grad_norm": 2.828125,
      "learning_rate": 8.745791049115894e-06,
      "loss": 0.8514,
      "step": 721230
    },
    {
      "epoch": 2.527766387338037,
      "grad_norm": 3.1875,
      "learning_rate": 8.745142020452192e-06,
      "loss": 0.7887,
      "step": 721240
    },
    {
      "epoch": 2.5278014348449322,
      "grad_norm": 2.96875,
      "learning_rate": 8.74449299178849e-06,
      "loss": 0.7321,
      "step": 721250
    },
    {
      "epoch": 2.527836482351828,
      "grad_norm": 2.984375,
      "learning_rate": 8.743843963124788e-06,
      "loss": 0.8348,
      "step": 721260
    },
    {
      "epoch": 2.5278715298587233,
      "grad_norm": 2.90625,
      "learning_rate": 8.743194934461086e-06,
      "loss": 0.7759,
      "step": 721270
    },
    {
      "epoch": 2.527906577365619,
      "grad_norm": 2.8125,
      "learning_rate": 8.742545905797384e-06,
      "loss": 0.788,
      "step": 721280
    },
    {
      "epoch": 2.527941624872515,
      "grad_norm": 2.90625,
      "learning_rate": 8.741896877133682e-06,
      "loss": 0.802,
      "step": 721290
    },
    {
      "epoch": 2.52797667237941,
      "grad_norm": 3.015625,
      "learning_rate": 8.74124784846998e-06,
      "loss": 0.7901,
      "step": 721300
    },
    {
      "epoch": 2.528011719886306,
      "grad_norm": 3.25,
      "learning_rate": 8.740598819806278e-06,
      "loss": 0.8339,
      "step": 721310
    },
    {
      "epoch": 2.528046767393201,
      "grad_norm": 2.875,
      "learning_rate": 8.739949791142576e-06,
      "loss": 0.8111,
      "step": 721320
    },
    {
      "epoch": 2.528081814900097,
      "grad_norm": 2.84375,
      "learning_rate": 8.739300762478876e-06,
      "loss": 0.8024,
      "step": 721330
    },
    {
      "epoch": 2.5281168624069927,
      "grad_norm": 2.921875,
      "learning_rate": 8.738651733815174e-06,
      "loss": 0.852,
      "step": 721340
    },
    {
      "epoch": 2.5281519099138885,
      "grad_norm": 2.609375,
      "learning_rate": 8.738002705151472e-06,
      "loss": 0.7513,
      "step": 721350
    },
    {
      "epoch": 2.528186957420784,
      "grad_norm": 2.90625,
      "learning_rate": 8.73735367648777e-06,
      "loss": 0.8248,
      "step": 721360
    },
    {
      "epoch": 2.5282220049276796,
      "grad_norm": 2.71875,
      "learning_rate": 8.736704647824066e-06,
      "loss": 0.7375,
      "step": 721370
    },
    {
      "epoch": 2.528257052434575,
      "grad_norm": 2.53125,
      "learning_rate": 8.736055619160364e-06,
      "loss": 0.7752,
      "step": 721380
    },
    {
      "epoch": 2.5282920999414706,
      "grad_norm": 2.609375,
      "learning_rate": 8.735406590496664e-06,
      "loss": 0.7365,
      "step": 721390
    },
    {
      "epoch": 2.5283271474483664,
      "grad_norm": 3.34375,
      "learning_rate": 8.734757561832962e-06,
      "loss": 0.7904,
      "step": 721400
    },
    {
      "epoch": 2.5283621949552617,
      "grad_norm": 3.515625,
      "learning_rate": 8.73410853316926e-06,
      "loss": 0.821,
      "step": 721410
    },
    {
      "epoch": 2.5283972424621575,
      "grad_norm": 3.015625,
      "learning_rate": 8.733459504505558e-06,
      "loss": 0.7912,
      "step": 721420
    },
    {
      "epoch": 2.528432289969053,
      "grad_norm": 3.015625,
      "learning_rate": 8.732810475841856e-06,
      "loss": 0.8523,
      "step": 721430
    },
    {
      "epoch": 2.5284673374759485,
      "grad_norm": 3.171875,
      "learning_rate": 8.732161447178154e-06,
      "loss": 0.8403,
      "step": 721440
    },
    {
      "epoch": 2.5285023849828443,
      "grad_norm": 2.9375,
      "learning_rate": 8.731512418514452e-06,
      "loss": 0.7876,
      "step": 721450
    },
    {
      "epoch": 2.52853743248974,
      "grad_norm": 2.71875,
      "learning_rate": 8.73086338985075e-06,
      "loss": 0.7984,
      "step": 721460
    },
    {
      "epoch": 2.5285724799966354,
      "grad_norm": 2.765625,
      "learning_rate": 8.730214361187048e-06,
      "loss": 0.8286,
      "step": 721470
    },
    {
      "epoch": 2.528607527503531,
      "grad_norm": 2.28125,
      "learning_rate": 8.729565332523346e-06,
      "loss": 0.7829,
      "step": 721480
    },
    {
      "epoch": 2.5286425750104264,
      "grad_norm": 3.09375,
      "learning_rate": 8.728916303859644e-06,
      "loss": 0.836,
      "step": 721490
    },
    {
      "epoch": 2.528677622517322,
      "grad_norm": 3.28125,
      "learning_rate": 8.728267275195942e-06,
      "loss": 0.8371,
      "step": 721500
    },
    {
      "epoch": 2.528712670024218,
      "grad_norm": 2.9375,
      "learning_rate": 8.727618246532241e-06,
      "loss": 0.884,
      "step": 721510
    },
    {
      "epoch": 2.5287477175311133,
      "grad_norm": 2.53125,
      "learning_rate": 8.72696921786854e-06,
      "loss": 0.8765,
      "step": 721520
    },
    {
      "epoch": 2.528782765038009,
      "grad_norm": 3.28125,
      "learning_rate": 8.726320189204837e-06,
      "loss": 0.7324,
      "step": 721530
    },
    {
      "epoch": 2.5288178125449043,
      "grad_norm": 2.921875,
      "learning_rate": 8.725671160541135e-06,
      "loss": 0.8474,
      "step": 721540
    },
    {
      "epoch": 2.5288528600518,
      "grad_norm": 2.671875,
      "learning_rate": 8.725022131877433e-06,
      "loss": 0.8154,
      "step": 721550
    },
    {
      "epoch": 2.528887907558696,
      "grad_norm": 3.015625,
      "learning_rate": 8.72437310321373e-06,
      "loss": 0.8446,
      "step": 721560
    },
    {
      "epoch": 2.5289229550655916,
      "grad_norm": 2.875,
      "learning_rate": 8.72372407455003e-06,
      "loss": 0.8461,
      "step": 721570
    },
    {
      "epoch": 2.528958002572487,
      "grad_norm": 2.65625,
      "learning_rate": 8.723075045886327e-06,
      "loss": 0.7793,
      "step": 721580
    },
    {
      "epoch": 2.5289930500793827,
      "grad_norm": 2.53125,
      "learning_rate": 8.722426017222625e-06,
      "loss": 0.7357,
      "step": 721590
    },
    {
      "epoch": 2.529028097586278,
      "grad_norm": 3.109375,
      "learning_rate": 8.721776988558923e-06,
      "loss": 0.787,
      "step": 721600
    },
    {
      "epoch": 2.5290631450931738,
      "grad_norm": 3.140625,
      "learning_rate": 8.721127959895221e-06,
      "loss": 0.8146,
      "step": 721610
    },
    {
      "epoch": 2.5290981926000695,
      "grad_norm": 2.96875,
      "learning_rate": 8.72047893123152e-06,
      "loss": 0.9342,
      "step": 721620
    },
    {
      "epoch": 2.529133240106965,
      "grad_norm": 2.78125,
      "learning_rate": 8.719829902567817e-06,
      "loss": 0.8398,
      "step": 721630
    },
    {
      "epoch": 2.5291682876138606,
      "grad_norm": 2.640625,
      "learning_rate": 8.719180873904117e-06,
      "loss": 0.7777,
      "step": 721640
    },
    {
      "epoch": 2.5292033351207563,
      "grad_norm": 2.953125,
      "learning_rate": 8.718531845240413e-06,
      "loss": 0.8386,
      "step": 721650
    },
    {
      "epoch": 2.5292383826276517,
      "grad_norm": 2.546875,
      "learning_rate": 8.717882816576711e-06,
      "loss": 0.8194,
      "step": 721660
    },
    {
      "epoch": 2.5292734301345474,
      "grad_norm": 2.703125,
      "learning_rate": 8.71723378791301e-06,
      "loss": 0.7956,
      "step": 721670
    },
    {
      "epoch": 2.529308477641443,
      "grad_norm": 2.828125,
      "learning_rate": 8.716584759249307e-06,
      "loss": 0.7683,
      "step": 721680
    },
    {
      "epoch": 2.5293435251483385,
      "grad_norm": 3.09375,
      "learning_rate": 8.715935730585605e-06,
      "loss": 0.9104,
      "step": 721690
    },
    {
      "epoch": 2.5293785726552342,
      "grad_norm": 3.328125,
      "learning_rate": 8.715286701921905e-06,
      "loss": 0.8176,
      "step": 721700
    },
    {
      "epoch": 2.5294136201621296,
      "grad_norm": 2.953125,
      "learning_rate": 8.714637673258203e-06,
      "loss": 0.7899,
      "step": 721710
    },
    {
      "epoch": 2.5294486676690253,
      "grad_norm": 3.203125,
      "learning_rate": 8.713988644594501e-06,
      "loss": 0.8221,
      "step": 721720
    },
    {
      "epoch": 2.529483715175921,
      "grad_norm": 3.03125,
      "learning_rate": 8.713339615930799e-06,
      "loss": 0.8289,
      "step": 721730
    },
    {
      "epoch": 2.529518762682817,
      "grad_norm": 3.078125,
      "learning_rate": 8.712690587267095e-06,
      "loss": 0.77,
      "step": 721740
    },
    {
      "epoch": 2.529553810189712,
      "grad_norm": 2.703125,
      "learning_rate": 8.712041558603395e-06,
      "loss": 0.7278,
      "step": 721750
    },
    {
      "epoch": 2.529588857696608,
      "grad_norm": 2.96875,
      "learning_rate": 8.711392529939693e-06,
      "loss": 0.8522,
      "step": 721760
    },
    {
      "epoch": 2.529623905203503,
      "grad_norm": 2.84375,
      "learning_rate": 8.710743501275991e-06,
      "loss": 0.7708,
      "step": 721770
    },
    {
      "epoch": 2.529658952710399,
      "grad_norm": 2.875,
      "learning_rate": 8.710094472612289e-06,
      "loss": 0.8031,
      "step": 721780
    },
    {
      "epoch": 2.5296940002172947,
      "grad_norm": 3.15625,
      "learning_rate": 8.709445443948587e-06,
      "loss": 0.8012,
      "step": 721790
    },
    {
      "epoch": 2.52972904772419,
      "grad_norm": 2.953125,
      "learning_rate": 8.708796415284885e-06,
      "loss": 0.8475,
      "step": 721800
    },
    {
      "epoch": 2.529764095231086,
      "grad_norm": 3.109375,
      "learning_rate": 8.708147386621183e-06,
      "loss": 0.8754,
      "step": 721810
    },
    {
      "epoch": 2.529799142737981,
      "grad_norm": 2.71875,
      "learning_rate": 8.707498357957483e-06,
      "loss": 0.8086,
      "step": 721820
    },
    {
      "epoch": 2.529834190244877,
      "grad_norm": 2.640625,
      "learning_rate": 8.70684932929378e-06,
      "loss": 0.7685,
      "step": 721830
    },
    {
      "epoch": 2.5298692377517726,
      "grad_norm": 2.984375,
      "learning_rate": 8.706200300630077e-06,
      "loss": 0.7819,
      "step": 721840
    },
    {
      "epoch": 2.5299042852586684,
      "grad_norm": 2.5625,
      "learning_rate": 8.705551271966375e-06,
      "loss": 0.7911,
      "step": 721850
    },
    {
      "epoch": 2.5299393327655637,
      "grad_norm": 3.046875,
      "learning_rate": 8.704902243302673e-06,
      "loss": 0.8355,
      "step": 721860
    },
    {
      "epoch": 2.5299743802724595,
      "grad_norm": 2.328125,
      "learning_rate": 8.704253214638971e-06,
      "loss": 0.7834,
      "step": 721870
    },
    {
      "epoch": 2.530009427779355,
      "grad_norm": 3.109375,
      "learning_rate": 8.70360418597527e-06,
      "loss": 0.8183,
      "step": 721880
    },
    {
      "epoch": 2.5300444752862505,
      "grad_norm": 2.9375,
      "learning_rate": 8.702955157311569e-06,
      "loss": 0.8206,
      "step": 721890
    },
    {
      "epoch": 2.5300795227931463,
      "grad_norm": 3.265625,
      "learning_rate": 8.702306128647867e-06,
      "loss": 0.8698,
      "step": 721900
    },
    {
      "epoch": 2.5301145703000416,
      "grad_norm": 2.921875,
      "learning_rate": 8.701657099984165e-06,
      "loss": 0.8062,
      "step": 721910
    },
    {
      "epoch": 2.5301496178069374,
      "grad_norm": 2.90625,
      "learning_rate": 8.701008071320463e-06,
      "loss": 0.8013,
      "step": 721920
    },
    {
      "epoch": 2.5301846653138327,
      "grad_norm": 3.140625,
      "learning_rate": 8.700359042656759e-06,
      "loss": 0.8757,
      "step": 721930
    },
    {
      "epoch": 2.5302197128207284,
      "grad_norm": 3.09375,
      "learning_rate": 8.699710013993059e-06,
      "loss": 0.8777,
      "step": 721940
    },
    {
      "epoch": 2.530254760327624,
      "grad_norm": 2.6875,
      "learning_rate": 8.699060985329357e-06,
      "loss": 0.8451,
      "step": 721950
    },
    {
      "epoch": 2.53028980783452,
      "grad_norm": 3.28125,
      "learning_rate": 8.698411956665655e-06,
      "loss": 0.8729,
      "step": 721960
    },
    {
      "epoch": 2.5303248553414153,
      "grad_norm": 3.515625,
      "learning_rate": 8.697762928001953e-06,
      "loss": 0.8137,
      "step": 721970
    },
    {
      "epoch": 2.530359902848311,
      "grad_norm": 3.171875,
      "learning_rate": 8.69711389933825e-06,
      "loss": 0.797,
      "step": 721980
    },
    {
      "epoch": 2.5303949503552063,
      "grad_norm": 2.828125,
      "learning_rate": 8.696464870674549e-06,
      "loss": 0.7854,
      "step": 721990
    },
    {
      "epoch": 2.530429997862102,
      "grad_norm": 2.921875,
      "learning_rate": 8.695815842010847e-06,
      "loss": 0.7347,
      "step": 722000
    },
    {
      "epoch": 2.530465045368998,
      "grad_norm": 3.09375,
      "learning_rate": 8.695166813347146e-06,
      "loss": 0.7853,
      "step": 722010
    },
    {
      "epoch": 2.530500092875893,
      "grad_norm": 2.59375,
      "learning_rate": 8.694517784683444e-06,
      "loss": 0.7425,
      "step": 722020
    },
    {
      "epoch": 2.530535140382789,
      "grad_norm": 2.703125,
      "learning_rate": 8.69386875601974e-06,
      "loss": 0.7139,
      "step": 722030
    },
    {
      "epoch": 2.5305701878896842,
      "grad_norm": 2.640625,
      "learning_rate": 8.693219727356039e-06,
      "loss": 0.8031,
      "step": 722040
    },
    {
      "epoch": 2.53060523539658,
      "grad_norm": 3.09375,
      "learning_rate": 8.692570698692337e-06,
      "loss": 0.7939,
      "step": 722050
    },
    {
      "epoch": 2.5306402829034758,
      "grad_norm": 2.8125,
      "learning_rate": 8.691921670028636e-06,
      "loss": 0.8182,
      "step": 722060
    },
    {
      "epoch": 2.5306753304103715,
      "grad_norm": 2.71875,
      "learning_rate": 8.691272641364934e-06,
      "loss": 0.8284,
      "step": 722070
    },
    {
      "epoch": 2.530710377917267,
      "grad_norm": 2.765625,
      "learning_rate": 8.690623612701232e-06,
      "loss": 0.8441,
      "step": 722080
    },
    {
      "epoch": 2.5307454254241626,
      "grad_norm": 2.90625,
      "learning_rate": 8.68997458403753e-06,
      "loss": 0.7707,
      "step": 722090
    },
    {
      "epoch": 2.530780472931058,
      "grad_norm": 2.515625,
      "learning_rate": 8.689325555373828e-06,
      "loss": 0.8337,
      "step": 722100
    },
    {
      "epoch": 2.5308155204379537,
      "grad_norm": 3.03125,
      "learning_rate": 8.688676526710126e-06,
      "loss": 0.8288,
      "step": 722110
    },
    {
      "epoch": 2.5308505679448494,
      "grad_norm": 2.8125,
      "learning_rate": 8.688027498046424e-06,
      "loss": 0.7374,
      "step": 722120
    },
    {
      "epoch": 2.5308856154517447,
      "grad_norm": 3.328125,
      "learning_rate": 8.687378469382722e-06,
      "loss": 0.8794,
      "step": 722130
    },
    {
      "epoch": 2.5309206629586405,
      "grad_norm": 2.953125,
      "learning_rate": 8.68672944071902e-06,
      "loss": 0.7916,
      "step": 722140
    },
    {
      "epoch": 2.530955710465536,
      "grad_norm": 2.859375,
      "learning_rate": 8.686080412055318e-06,
      "loss": 0.8,
      "step": 722150
    },
    {
      "epoch": 2.5309907579724316,
      "grad_norm": 2.453125,
      "learning_rate": 8.685431383391616e-06,
      "loss": 0.7918,
      "step": 722160
    },
    {
      "epoch": 2.5310258054793273,
      "grad_norm": 2.734375,
      "learning_rate": 8.684782354727914e-06,
      "loss": 0.7416,
      "step": 722170
    },
    {
      "epoch": 2.531060852986223,
      "grad_norm": 2.984375,
      "learning_rate": 8.684133326064212e-06,
      "loss": 0.8273,
      "step": 722180
    },
    {
      "epoch": 2.5310959004931184,
      "grad_norm": 2.859375,
      "learning_rate": 8.683484297400512e-06,
      "loss": 0.8497,
      "step": 722190
    },
    {
      "epoch": 2.531130948000014,
      "grad_norm": 2.484375,
      "learning_rate": 8.68283526873681e-06,
      "loss": 0.7248,
      "step": 722200
    },
    {
      "epoch": 2.5311659955069095,
      "grad_norm": 3.046875,
      "learning_rate": 8.682186240073106e-06,
      "loss": 0.7622,
      "step": 722210
    },
    {
      "epoch": 2.531201043013805,
      "grad_norm": 2.90625,
      "learning_rate": 8.681537211409404e-06,
      "loss": 0.776,
      "step": 722220
    },
    {
      "epoch": 2.531236090520701,
      "grad_norm": 2.96875,
      "learning_rate": 8.680888182745702e-06,
      "loss": 0.8326,
      "step": 722230
    },
    {
      "epoch": 2.5312711380275963,
      "grad_norm": 3.109375,
      "learning_rate": 8.680239154082e-06,
      "loss": 0.7769,
      "step": 722240
    },
    {
      "epoch": 2.531306185534492,
      "grad_norm": 3.015625,
      "learning_rate": 8.6795901254183e-06,
      "loss": 0.7749,
      "step": 722250
    },
    {
      "epoch": 2.5313412330413874,
      "grad_norm": 2.8125,
      "learning_rate": 8.678941096754598e-06,
      "loss": 0.8161,
      "step": 722260
    },
    {
      "epoch": 2.531376280548283,
      "grad_norm": 2.890625,
      "learning_rate": 8.678292068090896e-06,
      "loss": 0.8079,
      "step": 722270
    },
    {
      "epoch": 2.531411328055179,
      "grad_norm": 2.796875,
      "learning_rate": 8.677643039427194e-06,
      "loss": 0.7848,
      "step": 722280
    },
    {
      "epoch": 2.5314463755620746,
      "grad_norm": 3.171875,
      "learning_rate": 8.676994010763492e-06,
      "loss": 0.8284,
      "step": 722290
    },
    {
      "epoch": 2.53148142306897,
      "grad_norm": 3.1875,
      "learning_rate": 8.67634498209979e-06,
      "loss": 0.7389,
      "step": 722300
    },
    {
      "epoch": 2.5315164705758657,
      "grad_norm": 3.265625,
      "learning_rate": 8.675695953436088e-06,
      "loss": 0.8164,
      "step": 722310
    },
    {
      "epoch": 2.531551518082761,
      "grad_norm": 2.859375,
      "learning_rate": 8.675046924772386e-06,
      "loss": 0.8404,
      "step": 722320
    },
    {
      "epoch": 2.531586565589657,
      "grad_norm": 2.4375,
      "learning_rate": 8.674397896108684e-06,
      "loss": 0.7495,
      "step": 722330
    },
    {
      "epoch": 2.5316216130965525,
      "grad_norm": 3.0,
      "learning_rate": 8.673748867444982e-06,
      "loss": 0.7837,
      "step": 722340
    },
    {
      "epoch": 2.531656660603448,
      "grad_norm": 2.859375,
      "learning_rate": 8.67309983878128e-06,
      "loss": 0.7629,
      "step": 722350
    },
    {
      "epoch": 2.5316917081103436,
      "grad_norm": 2.84375,
      "learning_rate": 8.672450810117578e-06,
      "loss": 0.8276,
      "step": 722360
    },
    {
      "epoch": 2.531726755617239,
      "grad_norm": 2.984375,
      "learning_rate": 8.671801781453878e-06,
      "loss": 0.8401,
      "step": 722370
    },
    {
      "epoch": 2.5317618031241347,
      "grad_norm": 3.25,
      "learning_rate": 8.671152752790176e-06,
      "loss": 0.7854,
      "step": 722380
    },
    {
      "epoch": 2.5317968506310304,
      "grad_norm": 4.375,
      "learning_rate": 8.670503724126474e-06,
      "loss": 0.7682,
      "step": 722390
    },
    {
      "epoch": 2.531831898137926,
      "grad_norm": 2.96875,
      "learning_rate": 8.66985469546277e-06,
      "loss": 0.8536,
      "step": 722400
    },
    {
      "epoch": 2.5318669456448215,
      "grad_norm": 2.8125,
      "learning_rate": 8.669205666799068e-06,
      "loss": 0.8543,
      "step": 722410
    },
    {
      "epoch": 2.5319019931517173,
      "grad_norm": 3.1875,
      "learning_rate": 8.668556638135366e-06,
      "loss": 0.797,
      "step": 722420
    },
    {
      "epoch": 2.5319370406586126,
      "grad_norm": 2.8125,
      "learning_rate": 8.667907609471666e-06,
      "loss": 0.7578,
      "step": 722430
    },
    {
      "epoch": 2.5319720881655083,
      "grad_norm": 2.90625,
      "learning_rate": 8.667258580807964e-06,
      "loss": 0.8178,
      "step": 722440
    },
    {
      "epoch": 2.532007135672404,
      "grad_norm": 3.3125,
      "learning_rate": 8.666609552144262e-06,
      "loss": 0.8772,
      "step": 722450
    },
    {
      "epoch": 2.5320421831792994,
      "grad_norm": 2.734375,
      "learning_rate": 8.66596052348056e-06,
      "loss": 0.8673,
      "step": 722460
    },
    {
      "epoch": 2.532077230686195,
      "grad_norm": 3.296875,
      "learning_rate": 8.665311494816858e-06,
      "loss": 0.8172,
      "step": 722470
    },
    {
      "epoch": 2.5321122781930905,
      "grad_norm": 2.71875,
      "learning_rate": 8.664662466153156e-06,
      "loss": 0.8049,
      "step": 722480
    },
    {
      "epoch": 2.5321473256999862,
      "grad_norm": 2.71875,
      "learning_rate": 8.664013437489454e-06,
      "loss": 0.8398,
      "step": 722490
    },
    {
      "epoch": 2.532182373206882,
      "grad_norm": 3.125,
      "learning_rate": 8.663364408825752e-06,
      "loss": 0.7663,
      "step": 722500
    },
    {
      "epoch": 2.5322174207137778,
      "grad_norm": 2.921875,
      "learning_rate": 8.66271538016205e-06,
      "loss": 0.7313,
      "step": 722510
    },
    {
      "epoch": 2.532252468220673,
      "grad_norm": 2.796875,
      "learning_rate": 8.662066351498348e-06,
      "loss": 0.7746,
      "step": 722520
    },
    {
      "epoch": 2.532287515727569,
      "grad_norm": 3.734375,
      "learning_rate": 8.661417322834646e-06,
      "loss": 0.8768,
      "step": 722530
    },
    {
      "epoch": 2.532322563234464,
      "grad_norm": 2.546875,
      "learning_rate": 8.660768294170944e-06,
      "loss": 0.8069,
      "step": 722540
    },
    {
      "epoch": 2.53235761074136,
      "grad_norm": 2.84375,
      "learning_rate": 8.660119265507242e-06,
      "loss": 0.7883,
      "step": 722550
    },
    {
      "epoch": 2.5323926582482557,
      "grad_norm": 3.046875,
      "learning_rate": 8.659470236843541e-06,
      "loss": 0.814,
      "step": 722560
    },
    {
      "epoch": 2.532427705755151,
      "grad_norm": 2.578125,
      "learning_rate": 8.65882120817984e-06,
      "loss": 0.8257,
      "step": 722570
    },
    {
      "epoch": 2.5324627532620467,
      "grad_norm": 3.125,
      "learning_rate": 8.658172179516137e-06,
      "loss": 0.7864,
      "step": 722580
    },
    {
      "epoch": 2.532497800768942,
      "grad_norm": 2.625,
      "learning_rate": 8.657523150852434e-06,
      "loss": 0.8329,
      "step": 722590
    },
    {
      "epoch": 2.532532848275838,
      "grad_norm": 3.140625,
      "learning_rate": 8.656874122188732e-06,
      "loss": 0.8069,
      "step": 722600
    },
    {
      "epoch": 2.5325678957827336,
      "grad_norm": 2.8125,
      "learning_rate": 8.656225093525031e-06,
      "loss": 0.9224,
      "step": 722610
    },
    {
      "epoch": 2.5326029432896293,
      "grad_norm": 2.75,
      "learning_rate": 8.65557606486133e-06,
      "loss": 0.8106,
      "step": 722620
    },
    {
      "epoch": 2.5326379907965246,
      "grad_norm": 3.53125,
      "learning_rate": 8.654927036197627e-06,
      "loss": 0.7835,
      "step": 722630
    },
    {
      "epoch": 2.5326730383034204,
      "grad_norm": 2.515625,
      "learning_rate": 8.654278007533925e-06,
      "loss": 0.7249,
      "step": 722640
    },
    {
      "epoch": 2.5327080858103157,
      "grad_norm": 2.953125,
      "learning_rate": 8.653628978870223e-06,
      "loss": 0.8447,
      "step": 722650
    },
    {
      "epoch": 2.5327431333172115,
      "grad_norm": 3.234375,
      "learning_rate": 8.652979950206521e-06,
      "loss": 0.7874,
      "step": 722660
    },
    {
      "epoch": 2.532778180824107,
      "grad_norm": 2.4375,
      "learning_rate": 8.65233092154282e-06,
      "loss": 0.8542,
      "step": 722670
    },
    {
      "epoch": 2.5328132283310025,
      "grad_norm": 2.5625,
      "learning_rate": 8.651681892879117e-06,
      "loss": 0.7945,
      "step": 722680
    },
    {
      "epoch": 2.5328482758378983,
      "grad_norm": 3.390625,
      "learning_rate": 8.651032864215415e-06,
      "loss": 0.8161,
      "step": 722690
    },
    {
      "epoch": 2.5328833233447936,
      "grad_norm": 3.3125,
      "learning_rate": 8.650383835551713e-06,
      "loss": 0.7916,
      "step": 722700
    },
    {
      "epoch": 2.5329183708516894,
      "grad_norm": 2.671875,
      "learning_rate": 8.649734806888011e-06,
      "loss": 0.7321,
      "step": 722710
    },
    {
      "epoch": 2.532953418358585,
      "grad_norm": 3.09375,
      "learning_rate": 8.64908577822431e-06,
      "loss": 0.8017,
      "step": 722720
    },
    {
      "epoch": 2.532988465865481,
      "grad_norm": 2.734375,
      "learning_rate": 8.648436749560607e-06,
      "loss": 0.7961,
      "step": 722730
    },
    {
      "epoch": 2.533023513372376,
      "grad_norm": 2.890625,
      "learning_rate": 8.647787720896907e-06,
      "loss": 0.7141,
      "step": 722740
    },
    {
      "epoch": 2.533058560879272,
      "grad_norm": 2.765625,
      "learning_rate": 8.647138692233205e-06,
      "loss": 0.794,
      "step": 722750
    },
    {
      "epoch": 2.5330936083861673,
      "grad_norm": 3.640625,
      "learning_rate": 8.646489663569503e-06,
      "loss": 0.8299,
      "step": 722760
    },
    {
      "epoch": 2.533128655893063,
      "grad_norm": 2.953125,
      "learning_rate": 8.645840634905801e-06,
      "loss": 0.8086,
      "step": 722770
    },
    {
      "epoch": 2.5331637033999588,
      "grad_norm": 3.359375,
      "learning_rate": 8.645191606242097e-06,
      "loss": 0.8696,
      "step": 722780
    },
    {
      "epoch": 2.533198750906854,
      "grad_norm": 3.25,
      "learning_rate": 8.644542577578395e-06,
      "loss": 0.8188,
      "step": 722790
    },
    {
      "epoch": 2.53323379841375,
      "grad_norm": 3.421875,
      "learning_rate": 8.643893548914695e-06,
      "loss": 0.7877,
      "step": 722800
    },
    {
      "epoch": 2.533268845920645,
      "grad_norm": 2.421875,
      "learning_rate": 8.643244520250993e-06,
      "loss": 0.8533,
      "step": 722810
    },
    {
      "epoch": 2.533303893427541,
      "grad_norm": 2.828125,
      "learning_rate": 8.642595491587291e-06,
      "loss": 0.7604,
      "step": 722820
    },
    {
      "epoch": 2.5333389409344367,
      "grad_norm": 3.3125,
      "learning_rate": 8.641946462923589e-06,
      "loss": 0.8188,
      "step": 722830
    },
    {
      "epoch": 2.5333739884413324,
      "grad_norm": 3.625,
      "learning_rate": 8.641297434259887e-06,
      "loss": 0.8041,
      "step": 722840
    },
    {
      "epoch": 2.5334090359482277,
      "grad_norm": 2.75,
      "learning_rate": 8.640648405596185e-06,
      "loss": 0.7634,
      "step": 722850
    },
    {
      "epoch": 2.5334440834551235,
      "grad_norm": 2.984375,
      "learning_rate": 8.639999376932483e-06,
      "loss": 0.8456,
      "step": 722860
    },
    {
      "epoch": 2.533479130962019,
      "grad_norm": 2.875,
      "learning_rate": 8.639350348268781e-06,
      "loss": 0.7636,
      "step": 722870
    },
    {
      "epoch": 2.5335141784689146,
      "grad_norm": 2.90625,
      "learning_rate": 8.638701319605079e-06,
      "loss": 0.7848,
      "step": 722880
    },
    {
      "epoch": 2.5335492259758103,
      "grad_norm": 2.953125,
      "learning_rate": 8.638052290941377e-06,
      "loss": 0.8227,
      "step": 722890
    },
    {
      "epoch": 2.5335842734827057,
      "grad_norm": 2.625,
      "learning_rate": 8.637403262277675e-06,
      "loss": 0.6948,
      "step": 722900
    },
    {
      "epoch": 2.5336193209896014,
      "grad_norm": 2.734375,
      "learning_rate": 8.636754233613973e-06,
      "loss": 0.8944,
      "step": 722910
    },
    {
      "epoch": 2.533654368496497,
      "grad_norm": 2.734375,
      "learning_rate": 8.636105204950273e-06,
      "loss": 0.8211,
      "step": 722920
    },
    {
      "epoch": 2.5336894160033925,
      "grad_norm": 2.5,
      "learning_rate": 8.63545617628657e-06,
      "loss": 0.7784,
      "step": 722930
    },
    {
      "epoch": 2.5337244635102882,
      "grad_norm": 2.5,
      "learning_rate": 8.634807147622869e-06,
      "loss": 0.751,
      "step": 722940
    },
    {
      "epoch": 2.533759511017184,
      "grad_norm": 2.921875,
      "learning_rate": 8.634158118959167e-06,
      "loss": 0.8048,
      "step": 722950
    },
    {
      "epoch": 2.5337945585240793,
      "grad_norm": 2.359375,
      "learning_rate": 8.633509090295465e-06,
      "loss": 0.8203,
      "step": 722960
    },
    {
      "epoch": 2.533829606030975,
      "grad_norm": 2.859375,
      "learning_rate": 8.632860061631761e-06,
      "loss": 0.8433,
      "step": 722970
    },
    {
      "epoch": 2.5338646535378704,
      "grad_norm": 2.78125,
      "learning_rate": 8.63221103296806e-06,
      "loss": 0.8044,
      "step": 722980
    },
    {
      "epoch": 2.533899701044766,
      "grad_norm": 2.75,
      "learning_rate": 8.631562004304359e-06,
      "loss": 0.8526,
      "step": 722990
    },
    {
      "epoch": 2.533934748551662,
      "grad_norm": 3.09375,
      "learning_rate": 8.630912975640657e-06,
      "loss": 0.7835,
      "step": 723000
    },
    {
      "epoch": 2.533969796058557,
      "grad_norm": 3.234375,
      "learning_rate": 8.630263946976955e-06,
      "loss": 0.8401,
      "step": 723010
    },
    {
      "epoch": 2.534004843565453,
      "grad_norm": 3.515625,
      "learning_rate": 8.629614918313253e-06,
      "loss": 0.8625,
      "step": 723020
    },
    {
      "epoch": 2.5340398910723487,
      "grad_norm": 2.625,
      "learning_rate": 8.62896588964955e-06,
      "loss": 0.7401,
      "step": 723030
    },
    {
      "epoch": 2.534074938579244,
      "grad_norm": 3.28125,
      "learning_rate": 8.628316860985849e-06,
      "loss": 0.8054,
      "step": 723040
    },
    {
      "epoch": 2.53410998608614,
      "grad_norm": 2.421875,
      "learning_rate": 8.627667832322148e-06,
      "loss": 0.781,
      "step": 723050
    },
    {
      "epoch": 2.5341450335930356,
      "grad_norm": 2.703125,
      "learning_rate": 8.627018803658445e-06,
      "loss": 0.8022,
      "step": 723060
    },
    {
      "epoch": 2.534180081099931,
      "grad_norm": 3.171875,
      "learning_rate": 8.626369774994743e-06,
      "loss": 0.7999,
      "step": 723070
    },
    {
      "epoch": 2.5342151286068266,
      "grad_norm": 2.984375,
      "learning_rate": 8.62572074633104e-06,
      "loss": 0.7808,
      "step": 723080
    },
    {
      "epoch": 2.534250176113722,
      "grad_norm": 3.0625,
      "learning_rate": 8.625071717667339e-06,
      "loss": 0.7798,
      "step": 723090
    },
    {
      "epoch": 2.5342852236206177,
      "grad_norm": 3.046875,
      "learning_rate": 8.624422689003637e-06,
      "loss": 0.8324,
      "step": 723100
    },
    {
      "epoch": 2.5343202711275135,
      "grad_norm": 3.4375,
      "learning_rate": 8.623773660339936e-06,
      "loss": 0.8539,
      "step": 723110
    },
    {
      "epoch": 2.534355318634409,
      "grad_norm": 2.9375,
      "learning_rate": 8.623124631676234e-06,
      "loss": 0.7538,
      "step": 723120
    },
    {
      "epoch": 2.5343903661413045,
      "grad_norm": 2.6875,
      "learning_rate": 8.622475603012532e-06,
      "loss": 0.7929,
      "step": 723130
    },
    {
      "epoch": 2.5344254136482003,
      "grad_norm": 2.8125,
      "learning_rate": 8.62182657434883e-06,
      "loss": 0.7481,
      "step": 723140
    },
    {
      "epoch": 2.5344604611550956,
      "grad_norm": 3.484375,
      "learning_rate": 8.621177545685127e-06,
      "loss": 0.7516,
      "step": 723150
    },
    {
      "epoch": 2.5344955086619914,
      "grad_norm": 3.140625,
      "learning_rate": 8.620528517021426e-06,
      "loss": 0.8847,
      "step": 723160
    },
    {
      "epoch": 2.534530556168887,
      "grad_norm": 2.53125,
      "learning_rate": 8.619879488357724e-06,
      "loss": 0.8483,
      "step": 723170
    },
    {
      "epoch": 2.5345656036757824,
      "grad_norm": 2.8125,
      "learning_rate": 8.619230459694022e-06,
      "loss": 0.8258,
      "step": 723180
    },
    {
      "epoch": 2.534600651182678,
      "grad_norm": 3.015625,
      "learning_rate": 8.61858143103032e-06,
      "loss": 0.8626,
      "step": 723190
    },
    {
      "epoch": 2.5346356986895735,
      "grad_norm": 2.65625,
      "learning_rate": 8.617932402366618e-06,
      "loss": 0.7852,
      "step": 723200
    },
    {
      "epoch": 2.5346707461964693,
      "grad_norm": 3.140625,
      "learning_rate": 8.617283373702916e-06,
      "loss": 0.8218,
      "step": 723210
    },
    {
      "epoch": 2.534705793703365,
      "grad_norm": 2.65625,
      "learning_rate": 8.616634345039214e-06,
      "loss": 0.8722,
      "step": 723220
    },
    {
      "epoch": 2.5347408412102608,
      "grad_norm": 3.328125,
      "learning_rate": 8.615985316375514e-06,
      "loss": 0.756,
      "step": 723230
    },
    {
      "epoch": 2.534775888717156,
      "grad_norm": 3.03125,
      "learning_rate": 8.615336287711812e-06,
      "loss": 0.8272,
      "step": 723240
    },
    {
      "epoch": 2.534810936224052,
      "grad_norm": 2.859375,
      "learning_rate": 8.614687259048108e-06,
      "loss": 0.8796,
      "step": 723250
    },
    {
      "epoch": 2.534845983730947,
      "grad_norm": 2.9375,
      "learning_rate": 8.614038230384406e-06,
      "loss": 0.8315,
      "step": 723260
    },
    {
      "epoch": 2.534881031237843,
      "grad_norm": 2.578125,
      "learning_rate": 8.613389201720704e-06,
      "loss": 0.7751,
      "step": 723270
    },
    {
      "epoch": 2.5349160787447387,
      "grad_norm": 3.09375,
      "learning_rate": 8.612740173057002e-06,
      "loss": 0.8178,
      "step": 723280
    },
    {
      "epoch": 2.534951126251634,
      "grad_norm": 2.796875,
      "learning_rate": 8.612091144393302e-06,
      "loss": 0.7799,
      "step": 723290
    },
    {
      "epoch": 2.5349861737585297,
      "grad_norm": 3.015625,
      "learning_rate": 8.6114421157296e-06,
      "loss": 0.8792,
      "step": 723300
    },
    {
      "epoch": 2.535021221265425,
      "grad_norm": 2.578125,
      "learning_rate": 8.610793087065898e-06,
      "loss": 0.7852,
      "step": 723310
    },
    {
      "epoch": 2.535056268772321,
      "grad_norm": 2.578125,
      "learning_rate": 8.610144058402196e-06,
      "loss": 0.7646,
      "step": 723320
    },
    {
      "epoch": 2.5350913162792166,
      "grad_norm": 3.390625,
      "learning_rate": 8.609495029738494e-06,
      "loss": 0.8651,
      "step": 723330
    },
    {
      "epoch": 2.5351263637861123,
      "grad_norm": 2.953125,
      "learning_rate": 8.60884600107479e-06,
      "loss": 0.7324,
      "step": 723340
    },
    {
      "epoch": 2.5351614112930076,
      "grad_norm": 3.046875,
      "learning_rate": 8.60819697241109e-06,
      "loss": 0.7705,
      "step": 723350
    },
    {
      "epoch": 2.5351964587999034,
      "grad_norm": 2.890625,
      "learning_rate": 8.607547943747388e-06,
      "loss": 0.7969,
      "step": 723360
    },
    {
      "epoch": 2.5352315063067987,
      "grad_norm": 2.8125,
      "learning_rate": 8.606898915083686e-06,
      "loss": 0.8352,
      "step": 723370
    },
    {
      "epoch": 2.5352665538136945,
      "grad_norm": 3.171875,
      "learning_rate": 8.606249886419984e-06,
      "loss": 0.8423,
      "step": 723380
    },
    {
      "epoch": 2.5353016013205902,
      "grad_norm": 3.15625,
      "learning_rate": 8.605600857756282e-06,
      "loss": 0.8229,
      "step": 723390
    },
    {
      "epoch": 2.5353366488274856,
      "grad_norm": 2.453125,
      "learning_rate": 8.60495182909258e-06,
      "loss": 0.8786,
      "step": 723400
    },
    {
      "epoch": 2.5353716963343813,
      "grad_norm": 3.265625,
      "learning_rate": 8.604302800428878e-06,
      "loss": 0.8155,
      "step": 723410
    },
    {
      "epoch": 2.5354067438412766,
      "grad_norm": 2.671875,
      "learning_rate": 8.603653771765178e-06,
      "loss": 0.8578,
      "step": 723420
    },
    {
      "epoch": 2.5354417913481724,
      "grad_norm": 3.15625,
      "learning_rate": 8.603004743101476e-06,
      "loss": 0.8628,
      "step": 723430
    },
    {
      "epoch": 2.535476838855068,
      "grad_norm": 2.859375,
      "learning_rate": 8.602355714437772e-06,
      "loss": 0.7938,
      "step": 723440
    },
    {
      "epoch": 2.535511886361964,
      "grad_norm": 2.734375,
      "learning_rate": 8.60170668577407e-06,
      "loss": 0.7943,
      "step": 723450
    },
    {
      "epoch": 2.535546933868859,
      "grad_norm": 2.53125,
      "learning_rate": 8.601057657110368e-06,
      "loss": 0.8507,
      "step": 723460
    },
    {
      "epoch": 2.535581981375755,
      "grad_norm": 3.265625,
      "learning_rate": 8.600408628446668e-06,
      "loss": 0.8316,
      "step": 723470
    },
    {
      "epoch": 2.5356170288826503,
      "grad_norm": 2.875,
      "learning_rate": 8.599759599782966e-06,
      "loss": 0.8446,
      "step": 723480
    },
    {
      "epoch": 2.535652076389546,
      "grad_norm": 2.53125,
      "learning_rate": 8.599110571119264e-06,
      "loss": 0.788,
      "step": 723490
    },
    {
      "epoch": 2.535687123896442,
      "grad_norm": 3.078125,
      "learning_rate": 8.598461542455562e-06,
      "loss": 0.8606,
      "step": 723500
    },
    {
      "epoch": 2.535722171403337,
      "grad_norm": 2.96875,
      "learning_rate": 8.59781251379186e-06,
      "loss": 0.8203,
      "step": 723510
    },
    {
      "epoch": 2.535757218910233,
      "grad_norm": 2.671875,
      "learning_rate": 8.597163485128158e-06,
      "loss": 0.8151,
      "step": 723520
    },
    {
      "epoch": 2.535792266417128,
      "grad_norm": 2.53125,
      "learning_rate": 8.596514456464456e-06,
      "loss": 0.7864,
      "step": 723530
    },
    {
      "epoch": 2.535827313924024,
      "grad_norm": 3.078125,
      "learning_rate": 8.595865427800754e-06,
      "loss": 0.8002,
      "step": 723540
    },
    {
      "epoch": 2.5358623614309197,
      "grad_norm": 2.828125,
      "learning_rate": 8.595216399137052e-06,
      "loss": 0.8281,
      "step": 723550
    },
    {
      "epoch": 2.5358974089378155,
      "grad_norm": 3.375,
      "learning_rate": 8.59456737047335e-06,
      "loss": 0.7565,
      "step": 723560
    },
    {
      "epoch": 2.5359324564447108,
      "grad_norm": 2.703125,
      "learning_rate": 8.593918341809648e-06,
      "loss": 0.7203,
      "step": 723570
    },
    {
      "epoch": 2.5359675039516065,
      "grad_norm": 2.6875,
      "learning_rate": 8.593269313145946e-06,
      "loss": 0.7828,
      "step": 723580
    },
    {
      "epoch": 2.536002551458502,
      "grad_norm": 2.953125,
      "learning_rate": 8.592620284482244e-06,
      "loss": 0.787,
      "step": 723590
    },
    {
      "epoch": 2.5360375989653976,
      "grad_norm": 3.03125,
      "learning_rate": 8.591971255818544e-06,
      "loss": 0.7544,
      "step": 723600
    },
    {
      "epoch": 2.5360726464722934,
      "grad_norm": 2.78125,
      "learning_rate": 8.591322227154842e-06,
      "loss": 0.8323,
      "step": 723610
    },
    {
      "epoch": 2.5361076939791887,
      "grad_norm": 2.921875,
      "learning_rate": 8.590673198491138e-06,
      "loss": 0.766,
      "step": 723620
    },
    {
      "epoch": 2.5361427414860844,
      "grad_norm": 3.234375,
      "learning_rate": 8.590024169827436e-06,
      "loss": 0.8462,
      "step": 723630
    },
    {
      "epoch": 2.5361777889929797,
      "grad_norm": 2.609375,
      "learning_rate": 8.589375141163734e-06,
      "loss": 0.821,
      "step": 723640
    },
    {
      "epoch": 2.5362128364998755,
      "grad_norm": 3.03125,
      "learning_rate": 8.588726112500032e-06,
      "loss": 0.8262,
      "step": 723650
    },
    {
      "epoch": 2.5362478840067713,
      "grad_norm": 2.953125,
      "learning_rate": 8.588077083836332e-06,
      "loss": 0.7147,
      "step": 723660
    },
    {
      "epoch": 2.536282931513667,
      "grad_norm": 3.03125,
      "learning_rate": 8.58742805517263e-06,
      "loss": 0.8391,
      "step": 723670
    },
    {
      "epoch": 2.5363179790205623,
      "grad_norm": 3.140625,
      "learning_rate": 8.586779026508928e-06,
      "loss": 0.8928,
      "step": 723680
    },
    {
      "epoch": 2.536353026527458,
      "grad_norm": 2.796875,
      "learning_rate": 8.586129997845226e-06,
      "loss": 0.7562,
      "step": 723690
    },
    {
      "epoch": 2.5363880740343534,
      "grad_norm": 2.515625,
      "learning_rate": 8.585480969181524e-06,
      "loss": 0.7858,
      "step": 723700
    },
    {
      "epoch": 2.536423121541249,
      "grad_norm": 2.828125,
      "learning_rate": 8.584831940517822e-06,
      "loss": 0.728,
      "step": 723710
    },
    {
      "epoch": 2.536458169048145,
      "grad_norm": 3.125,
      "learning_rate": 8.58418291185412e-06,
      "loss": 0.8563,
      "step": 723720
    },
    {
      "epoch": 2.5364932165550402,
      "grad_norm": 2.890625,
      "learning_rate": 8.583533883190418e-06,
      "loss": 0.7551,
      "step": 723730
    },
    {
      "epoch": 2.536528264061936,
      "grad_norm": 2.8125,
      "learning_rate": 8.582884854526716e-06,
      "loss": 0.8089,
      "step": 723740
    },
    {
      "epoch": 2.5365633115688313,
      "grad_norm": 3.078125,
      "learning_rate": 8.582235825863014e-06,
      "loss": 0.8189,
      "step": 723750
    },
    {
      "epoch": 2.536598359075727,
      "grad_norm": 2.65625,
      "learning_rate": 8.581586797199312e-06,
      "loss": 0.7922,
      "step": 723760
    },
    {
      "epoch": 2.536633406582623,
      "grad_norm": 2.34375,
      "learning_rate": 8.58093776853561e-06,
      "loss": 0.7965,
      "step": 723770
    },
    {
      "epoch": 2.5366684540895186,
      "grad_norm": 2.703125,
      "learning_rate": 8.58028873987191e-06,
      "loss": 0.8488,
      "step": 723780
    },
    {
      "epoch": 2.536703501596414,
      "grad_norm": 3.328125,
      "learning_rate": 8.579639711208207e-06,
      "loss": 0.8006,
      "step": 723790
    },
    {
      "epoch": 2.5367385491033096,
      "grad_norm": 2.96875,
      "learning_rate": 8.578990682544505e-06,
      "loss": 0.8755,
      "step": 723800
    },
    {
      "epoch": 2.536773596610205,
      "grad_norm": 2.9375,
      "learning_rate": 8.578341653880802e-06,
      "loss": 0.8608,
      "step": 723810
    },
    {
      "epoch": 2.5368086441171007,
      "grad_norm": 3.109375,
      "learning_rate": 8.5776926252171e-06,
      "loss": 0.8162,
      "step": 723820
    },
    {
      "epoch": 2.5368436916239965,
      "grad_norm": 2.4375,
      "learning_rate": 8.577043596553398e-06,
      "loss": 0.8265,
      "step": 723830
    },
    {
      "epoch": 2.536878739130892,
      "grad_norm": 2.953125,
      "learning_rate": 8.576394567889697e-06,
      "loss": 0.8513,
      "step": 723840
    },
    {
      "epoch": 2.5369137866377875,
      "grad_norm": 2.96875,
      "learning_rate": 8.575745539225995e-06,
      "loss": 0.8236,
      "step": 723850
    },
    {
      "epoch": 2.536948834144683,
      "grad_norm": 2.765625,
      "learning_rate": 8.575096510562293e-06,
      "loss": 0.7957,
      "step": 723860
    },
    {
      "epoch": 2.5369838816515786,
      "grad_norm": 2.828125,
      "learning_rate": 8.574447481898591e-06,
      "loss": 0.8709,
      "step": 723870
    },
    {
      "epoch": 2.5370189291584744,
      "grad_norm": 3.109375,
      "learning_rate": 8.57379845323489e-06,
      "loss": 0.8395,
      "step": 723880
    },
    {
      "epoch": 2.53705397666537,
      "grad_norm": 2.53125,
      "learning_rate": 8.573149424571187e-06,
      "loss": 0.7746,
      "step": 723890
    },
    {
      "epoch": 2.5370890241722655,
      "grad_norm": 2.5625,
      "learning_rate": 8.572500395907485e-06,
      "loss": 0.7784,
      "step": 723900
    },
    {
      "epoch": 2.537124071679161,
      "grad_norm": 2.703125,
      "learning_rate": 8.571851367243783e-06,
      "loss": 0.7994,
      "step": 723910
    },
    {
      "epoch": 2.5371591191860565,
      "grad_norm": 3.015625,
      "learning_rate": 8.571202338580081e-06,
      "loss": 0.8356,
      "step": 723920
    },
    {
      "epoch": 2.5371941666929523,
      "grad_norm": 3.015625,
      "learning_rate": 8.57055330991638e-06,
      "loss": 0.8244,
      "step": 723930
    },
    {
      "epoch": 2.537229214199848,
      "grad_norm": 2.875,
      "learning_rate": 8.569904281252677e-06,
      "loss": 0.8045,
      "step": 723940
    },
    {
      "epoch": 2.5372642617067434,
      "grad_norm": 2.453125,
      "learning_rate": 8.569255252588975e-06,
      "loss": 0.8341,
      "step": 723950
    },
    {
      "epoch": 2.537299309213639,
      "grad_norm": 2.65625,
      "learning_rate": 8.568606223925273e-06,
      "loss": 0.84,
      "step": 723960
    },
    {
      "epoch": 2.5373343567205344,
      "grad_norm": 2.671875,
      "learning_rate": 8.567957195261573e-06,
      "loss": 0.7673,
      "step": 723970
    },
    {
      "epoch": 2.53736940422743,
      "grad_norm": 2.59375,
      "learning_rate": 8.567308166597871e-06,
      "loss": 0.8356,
      "step": 723980
    },
    {
      "epoch": 2.537404451734326,
      "grad_norm": 3.109375,
      "learning_rate": 8.566659137934169e-06,
      "loss": 0.8852,
      "step": 723990
    },
    {
      "epoch": 2.5374394992412217,
      "grad_norm": 3.0625,
      "learning_rate": 8.566010109270465e-06,
      "loss": 0.8283,
      "step": 724000
    },
    {
      "epoch": 2.537474546748117,
      "grad_norm": 2.65625,
      "learning_rate": 8.565361080606763e-06,
      "loss": 0.8473,
      "step": 724010
    },
    {
      "epoch": 2.5375095942550128,
      "grad_norm": 3.0625,
      "learning_rate": 8.564712051943063e-06,
      "loss": 0.9421,
      "step": 724020
    },
    {
      "epoch": 2.537544641761908,
      "grad_norm": 2.953125,
      "learning_rate": 8.564063023279361e-06,
      "loss": 0.7319,
      "step": 724030
    },
    {
      "epoch": 2.537579689268804,
      "grad_norm": 2.90625,
      "learning_rate": 8.563413994615659e-06,
      "loss": 0.7437,
      "step": 724040
    },
    {
      "epoch": 2.5376147367756996,
      "grad_norm": 2.90625,
      "learning_rate": 8.562764965951957e-06,
      "loss": 0.8359,
      "step": 724050
    },
    {
      "epoch": 2.537649784282595,
      "grad_norm": 2.921875,
      "learning_rate": 8.562115937288255e-06,
      "loss": 0.8347,
      "step": 724060
    },
    {
      "epoch": 2.5376848317894907,
      "grad_norm": 3.09375,
      "learning_rate": 8.561466908624553e-06,
      "loss": 0.767,
      "step": 724070
    },
    {
      "epoch": 2.537719879296386,
      "grad_norm": 2.90625,
      "learning_rate": 8.560817879960851e-06,
      "loss": 0.8086,
      "step": 724080
    },
    {
      "epoch": 2.5377549268032817,
      "grad_norm": 3.1875,
      "learning_rate": 8.560168851297149e-06,
      "loss": 0.764,
      "step": 724090
    },
    {
      "epoch": 2.5377899743101775,
      "grad_norm": 2.75,
      "learning_rate": 8.559519822633447e-06,
      "loss": 0.7119,
      "step": 724100
    },
    {
      "epoch": 2.5378250218170733,
      "grad_norm": 2.828125,
      "learning_rate": 8.558870793969745e-06,
      "loss": 0.7927,
      "step": 724110
    },
    {
      "epoch": 2.5378600693239686,
      "grad_norm": 3.15625,
      "learning_rate": 8.558221765306043e-06,
      "loss": 0.8633,
      "step": 724120
    },
    {
      "epoch": 2.5378951168308643,
      "grad_norm": 3.0625,
      "learning_rate": 8.557572736642341e-06,
      "loss": 0.8271,
      "step": 724130
    },
    {
      "epoch": 2.5379301643377596,
      "grad_norm": 2.640625,
      "learning_rate": 8.556923707978639e-06,
      "loss": 0.7847,
      "step": 724140
    },
    {
      "epoch": 2.5379652118446554,
      "grad_norm": 3.15625,
      "learning_rate": 8.556274679314939e-06,
      "loss": 0.8444,
      "step": 724150
    },
    {
      "epoch": 2.538000259351551,
      "grad_norm": 3.03125,
      "learning_rate": 8.555625650651237e-06,
      "loss": 0.8622,
      "step": 724160
    },
    {
      "epoch": 2.5380353068584465,
      "grad_norm": 3.078125,
      "learning_rate": 8.554976621987535e-06,
      "loss": 0.9264,
      "step": 724170
    },
    {
      "epoch": 2.5380703543653422,
      "grad_norm": 2.453125,
      "learning_rate": 8.554327593323833e-06,
      "loss": 0.7811,
      "step": 724180
    },
    {
      "epoch": 2.5381054018722375,
      "grad_norm": 3.109375,
      "learning_rate": 8.553678564660129e-06,
      "loss": 0.8153,
      "step": 724190
    },
    {
      "epoch": 2.5381404493791333,
      "grad_norm": 2.921875,
      "learning_rate": 8.553029535996427e-06,
      "loss": 0.89,
      "step": 724200
    },
    {
      "epoch": 2.538175496886029,
      "grad_norm": 2.921875,
      "learning_rate": 8.552380507332727e-06,
      "loss": 0.7938,
      "step": 724210
    },
    {
      "epoch": 2.538210544392925,
      "grad_norm": 2.703125,
      "learning_rate": 8.551731478669025e-06,
      "loss": 0.8672,
      "step": 724220
    },
    {
      "epoch": 2.53824559189982,
      "grad_norm": 3.296875,
      "learning_rate": 8.551082450005323e-06,
      "loss": 0.7276,
      "step": 724230
    },
    {
      "epoch": 2.538280639406716,
      "grad_norm": 2.9375,
      "learning_rate": 8.55043342134162e-06,
      "loss": 0.7553,
      "step": 724240
    },
    {
      "epoch": 2.538315686913611,
      "grad_norm": 2.9375,
      "learning_rate": 8.549784392677919e-06,
      "loss": 0.7595,
      "step": 724250
    },
    {
      "epoch": 2.538350734420507,
      "grad_norm": 2.78125,
      "learning_rate": 8.549135364014217e-06,
      "loss": 0.8221,
      "step": 724260
    },
    {
      "epoch": 2.5383857819274027,
      "grad_norm": 2.984375,
      "learning_rate": 8.548486335350515e-06,
      "loss": 0.7376,
      "step": 724270
    },
    {
      "epoch": 2.538420829434298,
      "grad_norm": 2.78125,
      "learning_rate": 8.547837306686813e-06,
      "loss": 0.7863,
      "step": 724280
    },
    {
      "epoch": 2.538455876941194,
      "grad_norm": 2.6875,
      "learning_rate": 8.54718827802311e-06,
      "loss": 0.8141,
      "step": 724290
    },
    {
      "epoch": 2.5384909244480895,
      "grad_norm": 3.078125,
      "learning_rate": 8.546539249359409e-06,
      "loss": 0.8173,
      "step": 724300
    },
    {
      "epoch": 2.538525971954985,
      "grad_norm": 2.625,
      "learning_rate": 8.545890220695707e-06,
      "loss": 0.7059,
      "step": 724310
    },
    {
      "epoch": 2.5385610194618806,
      "grad_norm": 3.171875,
      "learning_rate": 8.545241192032005e-06,
      "loss": 0.8264,
      "step": 724320
    },
    {
      "epoch": 2.5385960669687764,
      "grad_norm": 3.0,
      "learning_rate": 8.544592163368304e-06,
      "loss": 0.8177,
      "step": 724330
    },
    {
      "epoch": 2.5386311144756717,
      "grad_norm": 2.921875,
      "learning_rate": 8.543943134704602e-06,
      "loss": 0.8489,
      "step": 724340
    },
    {
      "epoch": 2.5386661619825674,
      "grad_norm": 3.21875,
      "learning_rate": 8.5432941060409e-06,
      "loss": 0.8668,
      "step": 724350
    },
    {
      "epoch": 2.5387012094894628,
      "grad_norm": 2.8125,
      "learning_rate": 8.542645077377198e-06,
      "loss": 0.7827,
      "step": 724360
    },
    {
      "epoch": 2.5387362569963585,
      "grad_norm": 2.9375,
      "learning_rate": 8.541996048713496e-06,
      "loss": 0.8692,
      "step": 724370
    },
    {
      "epoch": 2.5387713045032543,
      "grad_norm": 2.671875,
      "learning_rate": 8.541347020049793e-06,
      "loss": 0.7256,
      "step": 724380
    },
    {
      "epoch": 2.5388063520101496,
      "grad_norm": 3.03125,
      "learning_rate": 8.540697991386092e-06,
      "loss": 0.8195,
      "step": 724390
    },
    {
      "epoch": 2.5388413995170453,
      "grad_norm": 3.4375,
      "learning_rate": 8.54004896272239e-06,
      "loss": 0.9134,
      "step": 724400
    },
    {
      "epoch": 2.538876447023941,
      "grad_norm": 3.578125,
      "learning_rate": 8.539399934058688e-06,
      "loss": 0.838,
      "step": 724410
    },
    {
      "epoch": 2.5389114945308364,
      "grad_norm": 2.59375,
      "learning_rate": 8.538750905394986e-06,
      "loss": 0.761,
      "step": 724420
    },
    {
      "epoch": 2.538946542037732,
      "grad_norm": 2.890625,
      "learning_rate": 8.538101876731284e-06,
      "loss": 0.7805,
      "step": 724430
    },
    {
      "epoch": 2.538981589544628,
      "grad_norm": 2.953125,
      "learning_rate": 8.537452848067582e-06,
      "loss": 0.8294,
      "step": 724440
    },
    {
      "epoch": 2.5390166370515233,
      "grad_norm": 3.171875,
      "learning_rate": 8.53680381940388e-06,
      "loss": 0.8849,
      "step": 724450
    },
    {
      "epoch": 2.539051684558419,
      "grad_norm": 3.125,
      "learning_rate": 8.53615479074018e-06,
      "loss": 0.8841,
      "step": 724460
    },
    {
      "epoch": 2.5390867320653143,
      "grad_norm": 3.0,
      "learning_rate": 8.535505762076476e-06,
      "loss": 0.8735,
      "step": 724470
    },
    {
      "epoch": 2.53912177957221,
      "grad_norm": 2.65625,
      "learning_rate": 8.534856733412774e-06,
      "loss": 0.8,
      "step": 724480
    },
    {
      "epoch": 2.539156827079106,
      "grad_norm": 2.65625,
      "learning_rate": 8.534207704749072e-06,
      "loss": 0.7718,
      "step": 724490
    },
    {
      "epoch": 2.5391918745860016,
      "grad_norm": 2.953125,
      "learning_rate": 8.53355867608537e-06,
      "loss": 0.8367,
      "step": 724500
    },
    {
      "epoch": 2.539226922092897,
      "grad_norm": 3.359375,
      "learning_rate": 8.532909647421668e-06,
      "loss": 0.8058,
      "step": 724510
    },
    {
      "epoch": 2.5392619695997927,
      "grad_norm": 3.078125,
      "learning_rate": 8.532260618757968e-06,
      "loss": 0.7967,
      "step": 724520
    },
    {
      "epoch": 2.539297017106688,
      "grad_norm": 2.984375,
      "learning_rate": 8.531611590094266e-06,
      "loss": 0.8289,
      "step": 724530
    },
    {
      "epoch": 2.5393320646135837,
      "grad_norm": 3.53125,
      "learning_rate": 8.530962561430564e-06,
      "loss": 0.7749,
      "step": 724540
    },
    {
      "epoch": 2.5393671121204795,
      "grad_norm": 3.265625,
      "learning_rate": 8.530313532766862e-06,
      "loss": 0.7607,
      "step": 724550
    },
    {
      "epoch": 2.539402159627375,
      "grad_norm": 2.90625,
      "learning_rate": 8.52966450410316e-06,
      "loss": 0.7306,
      "step": 724560
    },
    {
      "epoch": 2.5394372071342706,
      "grad_norm": 2.59375,
      "learning_rate": 8.529015475439458e-06,
      "loss": 0.7206,
      "step": 724570
    },
    {
      "epoch": 2.539472254641166,
      "grad_norm": 3.25,
      "learning_rate": 8.528366446775756e-06,
      "loss": 0.8318,
      "step": 724580
    },
    {
      "epoch": 2.5395073021480616,
      "grad_norm": 3.109375,
      "learning_rate": 8.527717418112054e-06,
      "loss": 0.7837,
      "step": 724590
    },
    {
      "epoch": 2.5395423496549574,
      "grad_norm": 3.09375,
      "learning_rate": 8.527068389448352e-06,
      "loss": 0.8401,
      "step": 724600
    },
    {
      "epoch": 2.539577397161853,
      "grad_norm": 3.078125,
      "learning_rate": 8.52641936078465e-06,
      "loss": 0.8913,
      "step": 724610
    },
    {
      "epoch": 2.5396124446687485,
      "grad_norm": 2.71875,
      "learning_rate": 8.525770332120948e-06,
      "loss": 0.8338,
      "step": 724620
    },
    {
      "epoch": 2.5396474921756442,
      "grad_norm": 2.859375,
      "learning_rate": 8.525121303457246e-06,
      "loss": 0.782,
      "step": 724630
    },
    {
      "epoch": 2.5396825396825395,
      "grad_norm": 3.15625,
      "learning_rate": 8.524472274793546e-06,
      "loss": 0.8005,
      "step": 724640
    },
    {
      "epoch": 2.5397175871894353,
      "grad_norm": 2.8125,
      "learning_rate": 8.523823246129844e-06,
      "loss": 0.7978,
      "step": 724650
    },
    {
      "epoch": 2.539752634696331,
      "grad_norm": 2.546875,
      "learning_rate": 8.52317421746614e-06,
      "loss": 0.8163,
      "step": 724660
    },
    {
      "epoch": 2.5397876822032264,
      "grad_norm": 2.9375,
      "learning_rate": 8.522525188802438e-06,
      "loss": 0.8524,
      "step": 724670
    },
    {
      "epoch": 2.539822729710122,
      "grad_norm": 2.59375,
      "learning_rate": 8.521876160138736e-06,
      "loss": 0.7468,
      "step": 724680
    },
    {
      "epoch": 2.5398577772170174,
      "grad_norm": 4.0,
      "learning_rate": 8.521227131475034e-06,
      "loss": 0.8204,
      "step": 724690
    },
    {
      "epoch": 2.539892824723913,
      "grad_norm": 2.828125,
      "learning_rate": 8.520578102811334e-06,
      "loss": 0.7335,
      "step": 724700
    },
    {
      "epoch": 2.539927872230809,
      "grad_norm": 2.765625,
      "learning_rate": 8.519929074147632e-06,
      "loss": 0.9001,
      "step": 724710
    },
    {
      "epoch": 2.5399629197377047,
      "grad_norm": 3.421875,
      "learning_rate": 8.51928004548393e-06,
      "loss": 0.7424,
      "step": 724720
    },
    {
      "epoch": 2.5399979672446,
      "grad_norm": 2.515625,
      "learning_rate": 8.518631016820228e-06,
      "loss": 0.7394,
      "step": 724730
    },
    {
      "epoch": 2.540033014751496,
      "grad_norm": 3.03125,
      "learning_rate": 8.517981988156526e-06,
      "loss": 0.8799,
      "step": 724740
    },
    {
      "epoch": 2.540068062258391,
      "grad_norm": 2.609375,
      "learning_rate": 8.517332959492822e-06,
      "loss": 0.8102,
      "step": 724750
    },
    {
      "epoch": 2.540103109765287,
      "grad_norm": 2.859375,
      "learning_rate": 8.516683930829122e-06,
      "loss": 0.8441,
      "step": 724760
    },
    {
      "epoch": 2.5401381572721826,
      "grad_norm": 3.0,
      "learning_rate": 8.51603490216542e-06,
      "loss": 0.7849,
      "step": 724770
    },
    {
      "epoch": 2.540173204779078,
      "grad_norm": 2.8125,
      "learning_rate": 8.515385873501718e-06,
      "loss": 0.7077,
      "step": 724780
    },
    {
      "epoch": 2.5402082522859737,
      "grad_norm": 2.78125,
      "learning_rate": 8.514736844838016e-06,
      "loss": 0.8102,
      "step": 724790
    },
    {
      "epoch": 2.540243299792869,
      "grad_norm": 3.125,
      "learning_rate": 8.514087816174314e-06,
      "loss": 0.7739,
      "step": 724800
    },
    {
      "epoch": 2.5402783472997648,
      "grad_norm": 3.015625,
      "learning_rate": 8.513438787510612e-06,
      "loss": 0.7956,
      "step": 724810
    },
    {
      "epoch": 2.5403133948066605,
      "grad_norm": 3.34375,
      "learning_rate": 8.51278975884691e-06,
      "loss": 0.8489,
      "step": 724820
    },
    {
      "epoch": 2.5403484423135563,
      "grad_norm": 2.796875,
      "learning_rate": 8.51214073018321e-06,
      "loss": 0.8469,
      "step": 724830
    },
    {
      "epoch": 2.5403834898204516,
      "grad_norm": 2.8125,
      "learning_rate": 8.511491701519507e-06,
      "loss": 0.7212,
      "step": 724840
    },
    {
      "epoch": 2.5404185373273473,
      "grad_norm": 2.890625,
      "learning_rate": 8.510842672855804e-06,
      "loss": 0.7925,
      "step": 724850
    },
    {
      "epoch": 2.5404535848342427,
      "grad_norm": 2.828125,
      "learning_rate": 8.510193644192102e-06,
      "loss": 0.7486,
      "step": 724860
    },
    {
      "epoch": 2.5404886323411384,
      "grad_norm": 2.96875,
      "learning_rate": 8.5095446155284e-06,
      "loss": 0.8447,
      "step": 724870
    },
    {
      "epoch": 2.540523679848034,
      "grad_norm": 3.015625,
      "learning_rate": 8.5088955868647e-06,
      "loss": 0.7812,
      "step": 724880
    },
    {
      "epoch": 2.5405587273549295,
      "grad_norm": 2.953125,
      "learning_rate": 8.508246558200997e-06,
      "loss": 0.7685,
      "step": 724890
    },
    {
      "epoch": 2.5405937748618252,
      "grad_norm": 2.734375,
      "learning_rate": 8.507597529537295e-06,
      "loss": 0.8443,
      "step": 724900
    },
    {
      "epoch": 2.5406288223687206,
      "grad_norm": 3.234375,
      "learning_rate": 8.506948500873593e-06,
      "loss": 0.8449,
      "step": 724910
    },
    {
      "epoch": 2.5406638698756163,
      "grad_norm": 2.8125,
      "learning_rate": 8.506299472209891e-06,
      "loss": 0.8612,
      "step": 724920
    },
    {
      "epoch": 2.540698917382512,
      "grad_norm": 2.671875,
      "learning_rate": 8.50565044354619e-06,
      "loss": 0.7459,
      "step": 724930
    },
    {
      "epoch": 2.540733964889408,
      "grad_norm": 2.53125,
      "learning_rate": 8.505001414882487e-06,
      "loss": 0.8144,
      "step": 724940
    },
    {
      "epoch": 2.540769012396303,
      "grad_norm": 3.03125,
      "learning_rate": 8.504352386218785e-06,
      "loss": 0.8036,
      "step": 724950
    },
    {
      "epoch": 2.540804059903199,
      "grad_norm": 3.3125,
      "learning_rate": 8.503703357555083e-06,
      "loss": 0.7869,
      "step": 724960
    },
    {
      "epoch": 2.540839107410094,
      "grad_norm": 2.78125,
      "learning_rate": 8.503054328891381e-06,
      "loss": 0.7563,
      "step": 724970
    },
    {
      "epoch": 2.54087415491699,
      "grad_norm": 2.90625,
      "learning_rate": 8.50240530022768e-06,
      "loss": 0.8624,
      "step": 724980
    },
    {
      "epoch": 2.5409092024238857,
      "grad_norm": 3.03125,
      "learning_rate": 8.501756271563977e-06,
      "loss": 0.8103,
      "step": 724990
    },
    {
      "epoch": 2.540944249930781,
      "grad_norm": 2.515625,
      "learning_rate": 8.501107242900275e-06,
      "loss": 0.7461,
      "step": 725000
    },
    {
      "epoch": 2.540944249930781,
      "eval_loss": 0.7553853392601013,
      "eval_runtime": 554.9979,
      "eval_samples_per_second": 685.473,
      "eval_steps_per_second": 57.123,
      "step": 725000
    },
    {
      "epoch": 2.540979297437677,
      "grad_norm": 3.21875,
      "learning_rate": 8.500458214236575e-06,
      "loss": 0.8478,
      "step": 725010
    },
    {
      "epoch": 2.541014344944572,
      "grad_norm": 2.859375,
      "learning_rate": 8.499809185572873e-06,
      "loss": 0.7859,
      "step": 725020
    },
    {
      "epoch": 2.541049392451468,
      "grad_norm": 2.71875,
      "learning_rate": 8.499160156909171e-06,
      "loss": 0.823,
      "step": 725030
    },
    {
      "epoch": 2.5410844399583636,
      "grad_norm": 3.125,
      "learning_rate": 8.498511128245467e-06,
      "loss": 0.7967,
      "step": 725040
    },
    {
      "epoch": 2.5411194874652594,
      "grad_norm": 2.9375,
      "learning_rate": 8.497862099581765e-06,
      "loss": 0.8206,
      "step": 725050
    },
    {
      "epoch": 2.5411545349721547,
      "grad_norm": 2.578125,
      "learning_rate": 8.497213070918063e-06,
      "loss": 0.7566,
      "step": 725060
    },
    {
      "epoch": 2.5411895824790505,
      "grad_norm": 2.59375,
      "learning_rate": 8.496564042254363e-06,
      "loss": 0.8285,
      "step": 725070
    },
    {
      "epoch": 2.541224629985946,
      "grad_norm": 2.890625,
      "learning_rate": 8.495915013590661e-06,
      "loss": 0.8168,
      "step": 725080
    },
    {
      "epoch": 2.5412596774928415,
      "grad_norm": 3.09375,
      "learning_rate": 8.495265984926959e-06,
      "loss": 0.8425,
      "step": 725090
    },
    {
      "epoch": 2.5412947249997373,
      "grad_norm": 2.75,
      "learning_rate": 8.494616956263257e-06,
      "loss": 0.7846,
      "step": 725100
    },
    {
      "epoch": 2.5413297725066326,
      "grad_norm": 2.53125,
      "learning_rate": 8.493967927599555e-06,
      "loss": 0.7987,
      "step": 725110
    },
    {
      "epoch": 2.5413648200135284,
      "grad_norm": 2.65625,
      "learning_rate": 8.493318898935853e-06,
      "loss": 0.7923,
      "step": 725120
    },
    {
      "epoch": 2.5413998675204237,
      "grad_norm": 3.15625,
      "learning_rate": 8.492669870272151e-06,
      "loss": 0.846,
      "step": 725130
    },
    {
      "epoch": 2.5414349150273194,
      "grad_norm": 2.71875,
      "learning_rate": 8.492020841608449e-06,
      "loss": 0.7535,
      "step": 725140
    },
    {
      "epoch": 2.541469962534215,
      "grad_norm": 3.09375,
      "learning_rate": 8.491371812944747e-06,
      "loss": 0.8231,
      "step": 725150
    },
    {
      "epoch": 2.541505010041111,
      "grad_norm": 2.96875,
      "learning_rate": 8.490722784281045e-06,
      "loss": 0.7528,
      "step": 725160
    },
    {
      "epoch": 2.5415400575480063,
      "grad_norm": 2.890625,
      "learning_rate": 8.490073755617343e-06,
      "loss": 0.7444,
      "step": 725170
    },
    {
      "epoch": 2.541575105054902,
      "grad_norm": 2.78125,
      "learning_rate": 8.489424726953641e-06,
      "loss": 0.79,
      "step": 725180
    },
    {
      "epoch": 2.5416101525617973,
      "grad_norm": 2.90625,
      "learning_rate": 8.48877569828994e-06,
      "loss": 0.7855,
      "step": 725190
    },
    {
      "epoch": 2.541645200068693,
      "grad_norm": 2.875,
      "learning_rate": 8.488126669626239e-06,
      "loss": 0.7813,
      "step": 725200
    },
    {
      "epoch": 2.541680247575589,
      "grad_norm": 3.28125,
      "learning_rate": 8.487477640962537e-06,
      "loss": 0.7467,
      "step": 725210
    },
    {
      "epoch": 2.541715295082484,
      "grad_norm": 3.421875,
      "learning_rate": 8.486828612298833e-06,
      "loss": 0.6924,
      "step": 725220
    },
    {
      "epoch": 2.54175034258938,
      "grad_norm": 2.203125,
      "learning_rate": 8.486179583635131e-06,
      "loss": 0.734,
      "step": 725230
    },
    {
      "epoch": 2.5417853900962752,
      "grad_norm": 2.8125,
      "learning_rate": 8.485530554971429e-06,
      "loss": 0.7984,
      "step": 725240
    },
    {
      "epoch": 2.541820437603171,
      "grad_norm": 3.1875,
      "learning_rate": 8.484881526307729e-06,
      "loss": 0.8219,
      "step": 725250
    },
    {
      "epoch": 2.5418554851100668,
      "grad_norm": 3.0625,
      "learning_rate": 8.484232497644027e-06,
      "loss": 0.7543,
      "step": 725260
    },
    {
      "epoch": 2.5418905326169625,
      "grad_norm": 2.984375,
      "learning_rate": 8.483583468980325e-06,
      "loss": 0.871,
      "step": 725270
    },
    {
      "epoch": 2.541925580123858,
      "grad_norm": 2.6875,
      "learning_rate": 8.482934440316623e-06,
      "loss": 0.7442,
      "step": 725280
    },
    {
      "epoch": 2.5419606276307536,
      "grad_norm": 3.015625,
      "learning_rate": 8.48228541165292e-06,
      "loss": 0.8193,
      "step": 725290
    },
    {
      "epoch": 2.541995675137649,
      "grad_norm": 2.90625,
      "learning_rate": 8.481636382989219e-06,
      "loss": 0.7778,
      "step": 725300
    },
    {
      "epoch": 2.5420307226445447,
      "grad_norm": 3.1875,
      "learning_rate": 8.480987354325517e-06,
      "loss": 0.7844,
      "step": 725310
    },
    {
      "epoch": 2.5420657701514404,
      "grad_norm": 2.984375,
      "learning_rate": 8.480338325661815e-06,
      "loss": 0.7762,
      "step": 725320
    },
    {
      "epoch": 2.5421008176583357,
      "grad_norm": 3.234375,
      "learning_rate": 8.479689296998113e-06,
      "loss": 0.8183,
      "step": 725330
    },
    {
      "epoch": 2.5421358651652315,
      "grad_norm": 2.90625,
      "learning_rate": 8.47904026833441e-06,
      "loss": 0.7524,
      "step": 725340
    },
    {
      "epoch": 2.542170912672127,
      "grad_norm": 2.765625,
      "learning_rate": 8.478391239670709e-06,
      "loss": 0.8225,
      "step": 725350
    },
    {
      "epoch": 2.5422059601790226,
      "grad_norm": 3.15625,
      "learning_rate": 8.477742211007007e-06,
      "loss": 0.7727,
      "step": 725360
    },
    {
      "epoch": 2.5422410076859183,
      "grad_norm": 3.09375,
      "learning_rate": 8.477093182343305e-06,
      "loss": 0.7946,
      "step": 725370
    },
    {
      "epoch": 2.542276055192814,
      "grad_norm": 2.96875,
      "learning_rate": 8.476444153679604e-06,
      "loss": 0.8078,
      "step": 725380
    },
    {
      "epoch": 2.5423111026997094,
      "grad_norm": 3.34375,
      "learning_rate": 8.475795125015902e-06,
      "loss": 0.8539,
      "step": 725390
    },
    {
      "epoch": 2.542346150206605,
      "grad_norm": 3.390625,
      "learning_rate": 8.4751460963522e-06,
      "loss": 0.7892,
      "step": 725400
    },
    {
      "epoch": 2.5423811977135005,
      "grad_norm": 3.078125,
      "learning_rate": 8.474497067688497e-06,
      "loss": 0.7481,
      "step": 725410
    },
    {
      "epoch": 2.542416245220396,
      "grad_norm": 3.21875,
      "learning_rate": 8.473848039024795e-06,
      "loss": 0.7989,
      "step": 725420
    },
    {
      "epoch": 2.542451292727292,
      "grad_norm": 2.890625,
      "learning_rate": 8.473199010361094e-06,
      "loss": 0.7739,
      "step": 725430
    },
    {
      "epoch": 2.5424863402341873,
      "grad_norm": 3.1875,
      "learning_rate": 8.472549981697392e-06,
      "loss": 0.8594,
      "step": 725440
    },
    {
      "epoch": 2.542521387741083,
      "grad_norm": 3.015625,
      "learning_rate": 8.47190095303369e-06,
      "loss": 0.8097,
      "step": 725450
    },
    {
      "epoch": 2.5425564352479784,
      "grad_norm": 3.078125,
      "learning_rate": 8.471251924369988e-06,
      "loss": 0.7852,
      "step": 725460
    },
    {
      "epoch": 2.542591482754874,
      "grad_norm": 2.671875,
      "learning_rate": 8.470602895706286e-06,
      "loss": 0.8005,
      "step": 725470
    },
    {
      "epoch": 2.54262653026177,
      "grad_norm": 3.046875,
      "learning_rate": 8.469953867042584e-06,
      "loss": 0.784,
      "step": 725480
    },
    {
      "epoch": 2.5426615777686656,
      "grad_norm": 2.734375,
      "learning_rate": 8.469304838378882e-06,
      "loss": 0.7626,
      "step": 725490
    },
    {
      "epoch": 2.542696625275561,
      "grad_norm": 2.890625,
      "learning_rate": 8.468655809715182e-06,
      "loss": 0.8206,
      "step": 725500
    },
    {
      "epoch": 2.5427316727824567,
      "grad_norm": 2.671875,
      "learning_rate": 8.468006781051478e-06,
      "loss": 0.8384,
      "step": 725510
    },
    {
      "epoch": 2.542766720289352,
      "grad_norm": 3.046875,
      "learning_rate": 8.467357752387776e-06,
      "loss": 0.833,
      "step": 725520
    },
    {
      "epoch": 2.542801767796248,
      "grad_norm": 2.578125,
      "learning_rate": 8.466708723724074e-06,
      "loss": 0.7284,
      "step": 725530
    },
    {
      "epoch": 2.5428368153031435,
      "grad_norm": 2.84375,
      "learning_rate": 8.466059695060372e-06,
      "loss": 0.7966,
      "step": 725540
    },
    {
      "epoch": 2.542871862810039,
      "grad_norm": 3.21875,
      "learning_rate": 8.46541066639667e-06,
      "loss": 0.7667,
      "step": 725550
    },
    {
      "epoch": 2.5429069103169346,
      "grad_norm": 3.046875,
      "learning_rate": 8.46476163773297e-06,
      "loss": 0.7205,
      "step": 725560
    },
    {
      "epoch": 2.5429419578238304,
      "grad_norm": 3.109375,
      "learning_rate": 8.464112609069268e-06,
      "loss": 0.8349,
      "step": 725570
    },
    {
      "epoch": 2.5429770053307257,
      "grad_norm": 2.90625,
      "learning_rate": 8.463463580405566e-06,
      "loss": 0.838,
      "step": 725580
    },
    {
      "epoch": 2.5430120528376214,
      "grad_norm": 2.6875,
      "learning_rate": 8.462814551741864e-06,
      "loss": 0.7362,
      "step": 725590
    },
    {
      "epoch": 2.543047100344517,
      "grad_norm": 2.6875,
      "learning_rate": 8.46216552307816e-06,
      "loss": 0.8751,
      "step": 725600
    },
    {
      "epoch": 2.5430821478514125,
      "grad_norm": 2.53125,
      "learning_rate": 8.461516494414458e-06,
      "loss": 0.7975,
      "step": 725610
    },
    {
      "epoch": 2.5431171953583083,
      "grad_norm": 2.671875,
      "learning_rate": 8.460867465750758e-06,
      "loss": 0.8093,
      "step": 725620
    },
    {
      "epoch": 2.5431522428652036,
      "grad_norm": 2.96875,
      "learning_rate": 8.460218437087056e-06,
      "loss": 0.8754,
      "step": 725630
    },
    {
      "epoch": 2.5431872903720993,
      "grad_norm": 2.921875,
      "learning_rate": 8.459569408423354e-06,
      "loss": 0.7727,
      "step": 725640
    },
    {
      "epoch": 2.543222337878995,
      "grad_norm": 2.671875,
      "learning_rate": 8.458920379759652e-06,
      "loss": 0.8441,
      "step": 725650
    },
    {
      "epoch": 2.5432573853858904,
      "grad_norm": 3.015625,
      "learning_rate": 8.45827135109595e-06,
      "loss": 0.7989,
      "step": 725660
    },
    {
      "epoch": 2.543292432892786,
      "grad_norm": 2.578125,
      "learning_rate": 8.457622322432248e-06,
      "loss": 0.7821,
      "step": 725670
    },
    {
      "epoch": 2.543327480399682,
      "grad_norm": 3.0,
      "learning_rate": 8.456973293768548e-06,
      "loss": 0.7704,
      "step": 725680
    },
    {
      "epoch": 2.5433625279065772,
      "grad_norm": 3.140625,
      "learning_rate": 8.456324265104844e-06,
      "loss": 0.7217,
      "step": 725690
    },
    {
      "epoch": 2.543397575413473,
      "grad_norm": 3.09375,
      "learning_rate": 8.455675236441142e-06,
      "loss": 0.77,
      "step": 725700
    },
    {
      "epoch": 2.5434326229203688,
      "grad_norm": 2.84375,
      "learning_rate": 8.45502620777744e-06,
      "loss": 0.7676,
      "step": 725710
    },
    {
      "epoch": 2.543467670427264,
      "grad_norm": 2.890625,
      "learning_rate": 8.454377179113738e-06,
      "loss": 0.7889,
      "step": 725720
    },
    {
      "epoch": 2.54350271793416,
      "grad_norm": 2.984375,
      "learning_rate": 8.453728150450036e-06,
      "loss": 0.8511,
      "step": 725730
    },
    {
      "epoch": 2.543537765441055,
      "grad_norm": 3.90625,
      "learning_rate": 8.453079121786336e-06,
      "loss": 0.8006,
      "step": 725740
    },
    {
      "epoch": 2.543572812947951,
      "grad_norm": 2.78125,
      "learning_rate": 8.452430093122634e-06,
      "loss": 0.8002,
      "step": 725750
    },
    {
      "epoch": 2.5436078604548467,
      "grad_norm": 3.140625,
      "learning_rate": 8.451781064458932e-06,
      "loss": 0.8444,
      "step": 725760
    },
    {
      "epoch": 2.5436429079617424,
      "grad_norm": 3.5625,
      "learning_rate": 8.45113203579523e-06,
      "loss": 0.881,
      "step": 725770
    },
    {
      "epoch": 2.5436779554686377,
      "grad_norm": 3.03125,
      "learning_rate": 8.450483007131528e-06,
      "loss": 0.7758,
      "step": 725780
    },
    {
      "epoch": 2.5437130029755335,
      "grad_norm": 2.953125,
      "learning_rate": 8.449833978467824e-06,
      "loss": 0.8292,
      "step": 725790
    },
    {
      "epoch": 2.543748050482429,
      "grad_norm": 3.140625,
      "learning_rate": 8.449184949804124e-06,
      "loss": 0.7895,
      "step": 725800
    },
    {
      "epoch": 2.5437830979893246,
      "grad_norm": 3.109375,
      "learning_rate": 8.448535921140422e-06,
      "loss": 0.7324,
      "step": 725810
    },
    {
      "epoch": 2.5438181454962203,
      "grad_norm": 2.828125,
      "learning_rate": 8.44788689247672e-06,
      "loss": 0.8088,
      "step": 725820
    },
    {
      "epoch": 2.5438531930031156,
      "grad_norm": 2.453125,
      "learning_rate": 8.447237863813018e-06,
      "loss": 0.744,
      "step": 725830
    },
    {
      "epoch": 2.5438882405100114,
      "grad_norm": 3.109375,
      "learning_rate": 8.446588835149316e-06,
      "loss": 0.7975,
      "step": 725840
    },
    {
      "epoch": 2.5439232880169067,
      "grad_norm": 2.796875,
      "learning_rate": 8.445939806485614e-06,
      "loss": 0.7936,
      "step": 725850
    },
    {
      "epoch": 2.5439583355238025,
      "grad_norm": 3.03125,
      "learning_rate": 8.445290777821912e-06,
      "loss": 0.8357,
      "step": 725860
    },
    {
      "epoch": 2.543993383030698,
      "grad_norm": 3.21875,
      "learning_rate": 8.444641749158212e-06,
      "loss": 0.8309,
      "step": 725870
    },
    {
      "epoch": 2.544028430537594,
      "grad_norm": 2.890625,
      "learning_rate": 8.443992720494508e-06,
      "loss": 0.729,
      "step": 725880
    },
    {
      "epoch": 2.5440634780444893,
      "grad_norm": 2.953125,
      "learning_rate": 8.443343691830806e-06,
      "loss": 0.7768,
      "step": 725890
    },
    {
      "epoch": 2.544098525551385,
      "grad_norm": 3.0,
      "learning_rate": 8.442694663167104e-06,
      "loss": 0.8779,
      "step": 725900
    },
    {
      "epoch": 2.5441335730582804,
      "grad_norm": 2.96875,
      "learning_rate": 8.442045634503402e-06,
      "loss": 0.8094,
      "step": 725910
    },
    {
      "epoch": 2.544168620565176,
      "grad_norm": 2.96875,
      "learning_rate": 8.4413966058397e-06,
      "loss": 0.9836,
      "step": 725920
    },
    {
      "epoch": 2.544203668072072,
      "grad_norm": 3.828125,
      "learning_rate": 8.440747577176e-06,
      "loss": 0.837,
      "step": 725930
    },
    {
      "epoch": 2.544238715578967,
      "grad_norm": 2.578125,
      "learning_rate": 8.440098548512298e-06,
      "loss": 0.8526,
      "step": 725940
    },
    {
      "epoch": 2.544273763085863,
      "grad_norm": 2.703125,
      "learning_rate": 8.439449519848596e-06,
      "loss": 0.8781,
      "step": 725950
    },
    {
      "epoch": 2.5443088105927583,
      "grad_norm": 2.5625,
      "learning_rate": 8.438800491184894e-06,
      "loss": 0.7705,
      "step": 725960
    },
    {
      "epoch": 2.544343858099654,
      "grad_norm": 3.328125,
      "learning_rate": 8.438151462521192e-06,
      "loss": 0.7896,
      "step": 725970
    },
    {
      "epoch": 2.54437890560655,
      "grad_norm": 3.015625,
      "learning_rate": 8.43750243385749e-06,
      "loss": 0.7516,
      "step": 725980
    },
    {
      "epoch": 2.5444139531134455,
      "grad_norm": 3.46875,
      "learning_rate": 8.436853405193788e-06,
      "loss": 0.8298,
      "step": 725990
    },
    {
      "epoch": 2.544449000620341,
      "grad_norm": 2.6875,
      "learning_rate": 8.436204376530086e-06,
      "loss": 0.8286,
      "step": 726000
    },
    {
      "epoch": 2.5444840481272366,
      "grad_norm": 2.609375,
      "learning_rate": 8.435555347866384e-06,
      "loss": 0.8736,
      "step": 726010
    },
    {
      "epoch": 2.544519095634132,
      "grad_norm": 2.90625,
      "learning_rate": 8.434906319202682e-06,
      "loss": 0.7347,
      "step": 726020
    },
    {
      "epoch": 2.5445541431410277,
      "grad_norm": 2.96875,
      "learning_rate": 8.43425729053898e-06,
      "loss": 0.7832,
      "step": 726030
    },
    {
      "epoch": 2.5445891906479234,
      "grad_norm": 2.53125,
      "learning_rate": 8.433608261875278e-06,
      "loss": 0.7569,
      "step": 726040
    },
    {
      "epoch": 2.5446242381548188,
      "grad_norm": 2.59375,
      "learning_rate": 8.432959233211577e-06,
      "loss": 0.7562,
      "step": 726050
    },
    {
      "epoch": 2.5446592856617145,
      "grad_norm": 2.6875,
      "learning_rate": 8.432310204547875e-06,
      "loss": 0.7691,
      "step": 726060
    },
    {
      "epoch": 2.54469433316861,
      "grad_norm": 2.796875,
      "learning_rate": 8.431661175884172e-06,
      "loss": 0.7297,
      "step": 726070
    },
    {
      "epoch": 2.5447293806755056,
      "grad_norm": 2.984375,
      "learning_rate": 8.43101214722047e-06,
      "loss": 0.785,
      "step": 726080
    },
    {
      "epoch": 2.5447644281824013,
      "grad_norm": 3.5625,
      "learning_rate": 8.430363118556768e-06,
      "loss": 0.809,
      "step": 726090
    },
    {
      "epoch": 2.544799475689297,
      "grad_norm": 2.875,
      "learning_rate": 8.429714089893066e-06,
      "loss": 0.8394,
      "step": 726100
    },
    {
      "epoch": 2.5448345231961924,
      "grad_norm": 2.609375,
      "learning_rate": 8.429065061229365e-06,
      "loss": 0.7161,
      "step": 726110
    },
    {
      "epoch": 2.544869570703088,
      "grad_norm": 3.0,
      "learning_rate": 8.428416032565663e-06,
      "loss": 0.8715,
      "step": 726120
    },
    {
      "epoch": 2.5449046182099835,
      "grad_norm": 3.1875,
      "learning_rate": 8.427767003901961e-06,
      "loss": 0.731,
      "step": 726130
    },
    {
      "epoch": 2.5449396657168792,
      "grad_norm": 2.625,
      "learning_rate": 8.42711797523826e-06,
      "loss": 0.7564,
      "step": 726140
    },
    {
      "epoch": 2.544974713223775,
      "grad_norm": 2.875,
      "learning_rate": 8.426468946574557e-06,
      "loss": 0.8829,
      "step": 726150
    },
    {
      "epoch": 2.5450097607306703,
      "grad_norm": 2.421875,
      "learning_rate": 8.425819917910854e-06,
      "loss": 0.7836,
      "step": 726160
    },
    {
      "epoch": 2.545044808237566,
      "grad_norm": 2.890625,
      "learning_rate": 8.425170889247153e-06,
      "loss": 0.795,
      "step": 726170
    },
    {
      "epoch": 2.5450798557444614,
      "grad_norm": 2.984375,
      "learning_rate": 8.424521860583451e-06,
      "loss": 0.95,
      "step": 726180
    },
    {
      "epoch": 2.545114903251357,
      "grad_norm": 3.0625,
      "learning_rate": 8.42387283191975e-06,
      "loss": 0.841,
      "step": 726190
    },
    {
      "epoch": 2.545149950758253,
      "grad_norm": 3.0625,
      "learning_rate": 8.423223803256047e-06,
      "loss": 0.8665,
      "step": 726200
    },
    {
      "epoch": 2.5451849982651487,
      "grad_norm": 2.96875,
      "learning_rate": 8.422574774592345e-06,
      "loss": 0.8493,
      "step": 726210
    },
    {
      "epoch": 2.545220045772044,
      "grad_norm": 3.0625,
      "learning_rate": 8.421925745928643e-06,
      "loss": 0.877,
      "step": 726220
    },
    {
      "epoch": 2.5452550932789397,
      "grad_norm": 2.609375,
      "learning_rate": 8.421276717264943e-06,
      "loss": 0.8193,
      "step": 726230
    },
    {
      "epoch": 2.545290140785835,
      "grad_norm": 3.0625,
      "learning_rate": 8.420627688601241e-06,
      "loss": 0.8036,
      "step": 726240
    },
    {
      "epoch": 2.545325188292731,
      "grad_norm": 2.9375,
      "learning_rate": 8.419978659937539e-06,
      "loss": 0.765,
      "step": 726250
    },
    {
      "epoch": 2.5453602357996266,
      "grad_norm": 2.828125,
      "learning_rate": 8.419329631273835e-06,
      "loss": 0.8137,
      "step": 726260
    },
    {
      "epoch": 2.545395283306522,
      "grad_norm": 2.84375,
      "learning_rate": 8.418680602610133e-06,
      "loss": 0.7655,
      "step": 726270
    },
    {
      "epoch": 2.5454303308134176,
      "grad_norm": 3.171875,
      "learning_rate": 8.418031573946431e-06,
      "loss": 0.789,
      "step": 726280
    },
    {
      "epoch": 2.545465378320313,
      "grad_norm": 2.765625,
      "learning_rate": 8.417382545282731e-06,
      "loss": 0.8059,
      "step": 726290
    },
    {
      "epoch": 2.5455004258272087,
      "grad_norm": 2.984375,
      "learning_rate": 8.416733516619029e-06,
      "loss": 0.8152,
      "step": 726300
    },
    {
      "epoch": 2.5455354733341045,
      "grad_norm": 2.609375,
      "learning_rate": 8.416084487955327e-06,
      "loss": 0.8385,
      "step": 726310
    },
    {
      "epoch": 2.545570520841,
      "grad_norm": 3.015625,
      "learning_rate": 8.415435459291625e-06,
      "loss": 0.8979,
      "step": 726320
    },
    {
      "epoch": 2.5456055683478955,
      "grad_norm": 3.78125,
      "learning_rate": 8.414786430627923e-06,
      "loss": 0.8579,
      "step": 726330
    },
    {
      "epoch": 2.5456406158547913,
      "grad_norm": 2.78125,
      "learning_rate": 8.414137401964221e-06,
      "loss": 0.7902,
      "step": 726340
    },
    {
      "epoch": 2.5456756633616866,
      "grad_norm": 2.59375,
      "learning_rate": 8.413488373300519e-06,
      "loss": 0.7666,
      "step": 726350
    },
    {
      "epoch": 2.5457107108685824,
      "grad_norm": 2.59375,
      "learning_rate": 8.412839344636817e-06,
      "loss": 0.7859,
      "step": 726360
    },
    {
      "epoch": 2.545745758375478,
      "grad_norm": 2.53125,
      "learning_rate": 8.412190315973115e-06,
      "loss": 0.7947,
      "step": 726370
    },
    {
      "epoch": 2.5457808058823734,
      "grad_norm": 2.890625,
      "learning_rate": 8.411541287309413e-06,
      "loss": 0.8412,
      "step": 726380
    },
    {
      "epoch": 2.545815853389269,
      "grad_norm": 2.75,
      "learning_rate": 8.410892258645711e-06,
      "loss": 0.7601,
      "step": 726390
    },
    {
      "epoch": 2.5458509008961645,
      "grad_norm": 2.71875,
      "learning_rate": 8.410243229982009e-06,
      "loss": 0.7919,
      "step": 726400
    },
    {
      "epoch": 2.5458859484030603,
      "grad_norm": 2.796875,
      "learning_rate": 8.409594201318307e-06,
      "loss": 0.8592,
      "step": 726410
    },
    {
      "epoch": 2.545920995909956,
      "grad_norm": 2.8125,
      "learning_rate": 8.408945172654607e-06,
      "loss": 0.7404,
      "step": 726420
    },
    {
      "epoch": 2.5459560434168518,
      "grad_norm": 3.0,
      "learning_rate": 8.408296143990905e-06,
      "loss": 0.8512,
      "step": 726430
    },
    {
      "epoch": 2.545991090923747,
      "grad_norm": 3.296875,
      "learning_rate": 8.407647115327203e-06,
      "loss": 0.8734,
      "step": 726440
    },
    {
      "epoch": 2.546026138430643,
      "grad_norm": 3.171875,
      "learning_rate": 8.406998086663499e-06,
      "loss": 0.8016,
      "step": 726450
    },
    {
      "epoch": 2.546061185937538,
      "grad_norm": 3.0,
      "learning_rate": 8.406349057999797e-06,
      "loss": 0.742,
      "step": 726460
    },
    {
      "epoch": 2.546096233444434,
      "grad_norm": 3.0625,
      "learning_rate": 8.405700029336097e-06,
      "loss": 0.853,
      "step": 726470
    },
    {
      "epoch": 2.5461312809513297,
      "grad_norm": 3.078125,
      "learning_rate": 8.405051000672395e-06,
      "loss": 0.7713,
      "step": 726480
    },
    {
      "epoch": 2.546166328458225,
      "grad_norm": 2.75,
      "learning_rate": 8.404401972008693e-06,
      "loss": 0.8481,
      "step": 726490
    },
    {
      "epoch": 2.5462013759651208,
      "grad_norm": 2.578125,
      "learning_rate": 8.40375294334499e-06,
      "loss": 0.7496,
      "step": 726500
    },
    {
      "epoch": 2.546236423472016,
      "grad_norm": 3.375,
      "learning_rate": 8.403103914681289e-06,
      "loss": 0.8151,
      "step": 726510
    },
    {
      "epoch": 2.546271470978912,
      "grad_norm": 3.03125,
      "learning_rate": 8.402454886017587e-06,
      "loss": 0.8588,
      "step": 726520
    },
    {
      "epoch": 2.5463065184858076,
      "grad_norm": 2.796875,
      "learning_rate": 8.401805857353885e-06,
      "loss": 0.8484,
      "step": 726530
    },
    {
      "epoch": 2.5463415659927033,
      "grad_norm": 3.328125,
      "learning_rate": 8.401156828690183e-06,
      "loss": 0.783,
      "step": 726540
    },
    {
      "epoch": 2.5463766134995987,
      "grad_norm": 2.953125,
      "learning_rate": 8.40050780002648e-06,
      "loss": 0.7748,
      "step": 726550
    },
    {
      "epoch": 2.5464116610064944,
      "grad_norm": 3.078125,
      "learning_rate": 8.399858771362779e-06,
      "loss": 0.7896,
      "step": 726560
    },
    {
      "epoch": 2.5464467085133897,
      "grad_norm": 3.078125,
      "learning_rate": 8.399209742699077e-06,
      "loss": 0.7658,
      "step": 726570
    },
    {
      "epoch": 2.5464817560202855,
      "grad_norm": 2.6875,
      "learning_rate": 8.398560714035375e-06,
      "loss": 0.8145,
      "step": 726580
    },
    {
      "epoch": 2.5465168035271812,
      "grad_norm": 3.578125,
      "learning_rate": 8.397911685371673e-06,
      "loss": 0.8419,
      "step": 726590
    },
    {
      "epoch": 2.5465518510340766,
      "grad_norm": 3.015625,
      "learning_rate": 8.397262656707972e-06,
      "loss": 0.6647,
      "step": 726600
    },
    {
      "epoch": 2.5465868985409723,
      "grad_norm": 3.265625,
      "learning_rate": 8.39661362804427e-06,
      "loss": 0.8068,
      "step": 726610
    },
    {
      "epoch": 2.5466219460478676,
      "grad_norm": 3.171875,
      "learning_rate": 8.395964599380568e-06,
      "loss": 0.8572,
      "step": 726620
    },
    {
      "epoch": 2.5466569935547634,
      "grad_norm": 3.03125,
      "learning_rate": 8.395315570716865e-06,
      "loss": 0.8497,
      "step": 726630
    },
    {
      "epoch": 2.546692041061659,
      "grad_norm": 2.828125,
      "learning_rate": 8.394666542053163e-06,
      "loss": 0.7726,
      "step": 726640
    },
    {
      "epoch": 2.546727088568555,
      "grad_norm": 3.015625,
      "learning_rate": 8.39401751338946e-06,
      "loss": 0.7696,
      "step": 726650
    },
    {
      "epoch": 2.54676213607545,
      "grad_norm": 2.734375,
      "learning_rate": 8.39336848472576e-06,
      "loss": 0.7764,
      "step": 726660
    },
    {
      "epoch": 2.546797183582346,
      "grad_norm": 2.78125,
      "learning_rate": 8.392719456062058e-06,
      "loss": 0.8126,
      "step": 726670
    },
    {
      "epoch": 2.5468322310892413,
      "grad_norm": 2.65625,
      "learning_rate": 8.392070427398356e-06,
      "loss": 0.7963,
      "step": 726680
    },
    {
      "epoch": 2.546867278596137,
      "grad_norm": 2.421875,
      "learning_rate": 8.391421398734654e-06,
      "loss": 0.7779,
      "step": 726690
    },
    {
      "epoch": 2.546902326103033,
      "grad_norm": 2.9375,
      "learning_rate": 8.390772370070952e-06,
      "loss": 0.7461,
      "step": 726700
    },
    {
      "epoch": 2.546937373609928,
      "grad_norm": 2.921875,
      "learning_rate": 8.39012334140725e-06,
      "loss": 0.8433,
      "step": 726710
    },
    {
      "epoch": 2.546972421116824,
      "grad_norm": 2.90625,
      "learning_rate": 8.389474312743548e-06,
      "loss": 0.7761,
      "step": 726720
    },
    {
      "epoch": 2.547007468623719,
      "grad_norm": 2.921875,
      "learning_rate": 8.388825284079846e-06,
      "loss": 0.7587,
      "step": 726730
    },
    {
      "epoch": 2.547042516130615,
      "grad_norm": 2.65625,
      "learning_rate": 8.388176255416144e-06,
      "loss": 0.8083,
      "step": 726740
    },
    {
      "epoch": 2.5470775636375107,
      "grad_norm": 2.828125,
      "learning_rate": 8.387527226752442e-06,
      "loss": 0.7992,
      "step": 726750
    },
    {
      "epoch": 2.5471126111444065,
      "grad_norm": 3.125,
      "learning_rate": 8.38687819808874e-06,
      "loss": 0.7731,
      "step": 726760
    },
    {
      "epoch": 2.5471476586513018,
      "grad_norm": 2.53125,
      "learning_rate": 8.386229169425038e-06,
      "loss": 0.7763,
      "step": 726770
    },
    {
      "epoch": 2.5471827061581975,
      "grad_norm": 3.234375,
      "learning_rate": 8.385580140761338e-06,
      "loss": 0.8094,
      "step": 726780
    },
    {
      "epoch": 2.547217753665093,
      "grad_norm": 2.78125,
      "learning_rate": 8.384931112097636e-06,
      "loss": 0.7461,
      "step": 726790
    },
    {
      "epoch": 2.5472528011719886,
      "grad_norm": 3.0625,
      "learning_rate": 8.384282083433934e-06,
      "loss": 0.7781,
      "step": 726800
    },
    {
      "epoch": 2.5472878486788844,
      "grad_norm": 3.15625,
      "learning_rate": 8.383633054770232e-06,
      "loss": 0.7642,
      "step": 726810
    },
    {
      "epoch": 2.5473228961857797,
      "grad_norm": 2.84375,
      "learning_rate": 8.382984026106528e-06,
      "loss": 0.7765,
      "step": 726820
    },
    {
      "epoch": 2.5473579436926754,
      "grad_norm": 3.09375,
      "learning_rate": 8.382334997442826e-06,
      "loss": 0.7694,
      "step": 726830
    },
    {
      "epoch": 2.5473929911995707,
      "grad_norm": 3.4375,
      "learning_rate": 8.381685968779126e-06,
      "loss": 0.8044,
      "step": 726840
    },
    {
      "epoch": 2.5474280387064665,
      "grad_norm": 2.9375,
      "learning_rate": 8.381036940115424e-06,
      "loss": 0.8318,
      "step": 726850
    },
    {
      "epoch": 2.5474630862133623,
      "grad_norm": 2.984375,
      "learning_rate": 8.380387911451722e-06,
      "loss": 0.8269,
      "step": 726860
    },
    {
      "epoch": 2.547498133720258,
      "grad_norm": 2.328125,
      "learning_rate": 8.37973888278802e-06,
      "loss": 0.7879,
      "step": 726870
    },
    {
      "epoch": 2.5475331812271533,
      "grad_norm": 2.671875,
      "learning_rate": 8.379089854124318e-06,
      "loss": 0.8919,
      "step": 726880
    },
    {
      "epoch": 2.547568228734049,
      "grad_norm": 2.921875,
      "learning_rate": 8.378440825460616e-06,
      "loss": 0.8438,
      "step": 726890
    },
    {
      "epoch": 2.5476032762409444,
      "grad_norm": 2.78125,
      "learning_rate": 8.377791796796914e-06,
      "loss": 0.7616,
      "step": 726900
    },
    {
      "epoch": 2.54763832374784,
      "grad_norm": 2.65625,
      "learning_rate": 8.377142768133214e-06,
      "loss": 0.8466,
      "step": 726910
    },
    {
      "epoch": 2.547673371254736,
      "grad_norm": 2.8125,
      "learning_rate": 8.37649373946951e-06,
      "loss": 0.7958,
      "step": 726920
    },
    {
      "epoch": 2.5477084187616312,
      "grad_norm": 2.9375,
      "learning_rate": 8.375844710805808e-06,
      "loss": 0.8939,
      "step": 726930
    },
    {
      "epoch": 2.547743466268527,
      "grad_norm": 2.8125,
      "learning_rate": 8.375195682142106e-06,
      "loss": 0.7714,
      "step": 726940
    },
    {
      "epoch": 2.5477785137754227,
      "grad_norm": 3.390625,
      "learning_rate": 8.374546653478404e-06,
      "loss": 0.8796,
      "step": 726950
    },
    {
      "epoch": 2.547813561282318,
      "grad_norm": 3.28125,
      "learning_rate": 8.373897624814702e-06,
      "loss": 0.7914,
      "step": 726960
    },
    {
      "epoch": 2.547848608789214,
      "grad_norm": 3.171875,
      "learning_rate": 8.373248596151002e-06,
      "loss": 0.8799,
      "step": 726970
    },
    {
      "epoch": 2.5478836562961096,
      "grad_norm": 2.625,
      "learning_rate": 8.3725995674873e-06,
      "loss": 0.8065,
      "step": 726980
    },
    {
      "epoch": 2.547918703803005,
      "grad_norm": 2.65625,
      "learning_rate": 8.371950538823598e-06,
      "loss": 0.8387,
      "step": 726990
    },
    {
      "epoch": 2.5479537513099006,
      "grad_norm": 3.1875,
      "learning_rate": 8.371301510159896e-06,
      "loss": 0.8416,
      "step": 727000
    },
    {
      "epoch": 2.547988798816796,
      "grad_norm": 3.234375,
      "learning_rate": 8.370652481496192e-06,
      "loss": 0.7843,
      "step": 727010
    },
    {
      "epoch": 2.5480238463236917,
      "grad_norm": 2.953125,
      "learning_rate": 8.370003452832492e-06,
      "loss": 0.7683,
      "step": 727020
    },
    {
      "epoch": 2.5480588938305875,
      "grad_norm": 3.0,
      "learning_rate": 8.36935442416879e-06,
      "loss": 0.8254,
      "step": 727030
    },
    {
      "epoch": 2.548093941337483,
      "grad_norm": 2.625,
      "learning_rate": 8.368705395505088e-06,
      "loss": 0.7997,
      "step": 727040
    },
    {
      "epoch": 2.5481289888443786,
      "grad_norm": 2.75,
      "learning_rate": 8.368056366841386e-06,
      "loss": 0.7523,
      "step": 727050
    },
    {
      "epoch": 2.5481640363512743,
      "grad_norm": 3.078125,
      "learning_rate": 8.367407338177684e-06,
      "loss": 0.9085,
      "step": 727060
    },
    {
      "epoch": 2.5481990838581696,
      "grad_norm": 2.796875,
      "learning_rate": 8.366758309513982e-06,
      "loss": 0.7939,
      "step": 727070
    },
    {
      "epoch": 2.5482341313650654,
      "grad_norm": 2.703125,
      "learning_rate": 8.36610928085028e-06,
      "loss": 0.8625,
      "step": 727080
    },
    {
      "epoch": 2.548269178871961,
      "grad_norm": 3.203125,
      "learning_rate": 8.36546025218658e-06,
      "loss": 0.7791,
      "step": 727090
    },
    {
      "epoch": 2.5483042263788565,
      "grad_norm": 2.9375,
      "learning_rate": 8.364811223522876e-06,
      "loss": 0.7662,
      "step": 727100
    },
    {
      "epoch": 2.548339273885752,
      "grad_norm": 2.890625,
      "learning_rate": 8.364162194859174e-06,
      "loss": 0.8631,
      "step": 727110
    },
    {
      "epoch": 2.5483743213926475,
      "grad_norm": 3.203125,
      "learning_rate": 8.363513166195472e-06,
      "loss": 0.9291,
      "step": 727120
    },
    {
      "epoch": 2.5484093688995433,
      "grad_norm": 2.9375,
      "learning_rate": 8.36286413753177e-06,
      "loss": 0.8647,
      "step": 727130
    },
    {
      "epoch": 2.548444416406439,
      "grad_norm": 2.78125,
      "learning_rate": 8.362215108868068e-06,
      "loss": 0.7858,
      "step": 727140
    },
    {
      "epoch": 2.548479463913335,
      "grad_norm": 2.796875,
      "learning_rate": 8.361566080204367e-06,
      "loss": 0.8564,
      "step": 727150
    },
    {
      "epoch": 2.54851451142023,
      "grad_norm": 3.28125,
      "learning_rate": 8.360917051540665e-06,
      "loss": 0.808,
      "step": 727160
    },
    {
      "epoch": 2.548549558927126,
      "grad_norm": 2.859375,
      "learning_rate": 8.360268022876963e-06,
      "loss": 0.8303,
      "step": 727170
    },
    {
      "epoch": 2.548584606434021,
      "grad_norm": 3.0625,
      "learning_rate": 8.359618994213261e-06,
      "loss": 0.8535,
      "step": 727180
    },
    {
      "epoch": 2.548619653940917,
      "grad_norm": 2.59375,
      "learning_rate": 8.35896996554956e-06,
      "loss": 0.8234,
      "step": 727190
    },
    {
      "epoch": 2.5486547014478127,
      "grad_norm": 3.125,
      "learning_rate": 8.358320936885856e-06,
      "loss": 0.7566,
      "step": 727200
    },
    {
      "epoch": 2.548689748954708,
      "grad_norm": 2.953125,
      "learning_rate": 8.357671908222155e-06,
      "loss": 0.8542,
      "step": 727210
    },
    {
      "epoch": 2.5487247964616038,
      "grad_norm": 2.90625,
      "learning_rate": 8.357022879558453e-06,
      "loss": 0.7742,
      "step": 727220
    },
    {
      "epoch": 2.548759843968499,
      "grad_norm": 3.4375,
      "learning_rate": 8.356373850894751e-06,
      "loss": 0.8193,
      "step": 727230
    },
    {
      "epoch": 2.548794891475395,
      "grad_norm": 3.09375,
      "learning_rate": 8.35572482223105e-06,
      "loss": 0.8233,
      "step": 727240
    },
    {
      "epoch": 2.5488299389822906,
      "grad_norm": 2.703125,
      "learning_rate": 8.355075793567347e-06,
      "loss": 0.8291,
      "step": 727250
    },
    {
      "epoch": 2.5488649864891864,
      "grad_norm": 3.125,
      "learning_rate": 8.354426764903645e-06,
      "loss": 0.8227,
      "step": 727260
    },
    {
      "epoch": 2.5489000339960817,
      "grad_norm": 2.921875,
      "learning_rate": 8.353777736239943e-06,
      "loss": 0.6933,
      "step": 727270
    },
    {
      "epoch": 2.5489350815029774,
      "grad_norm": 2.75,
      "learning_rate": 8.353128707576243e-06,
      "loss": 0.8717,
      "step": 727280
    },
    {
      "epoch": 2.5489701290098727,
      "grad_norm": 2.84375,
      "learning_rate": 8.35247967891254e-06,
      "loss": 0.7185,
      "step": 727290
    },
    {
      "epoch": 2.5490051765167685,
      "grad_norm": 2.65625,
      "learning_rate": 8.351830650248837e-06,
      "loss": 0.8034,
      "step": 727300
    },
    {
      "epoch": 2.5490402240236643,
      "grad_norm": 2.640625,
      "learning_rate": 8.351181621585135e-06,
      "loss": 0.8507,
      "step": 727310
    },
    {
      "epoch": 2.5490752715305596,
      "grad_norm": 2.828125,
      "learning_rate": 8.350532592921433e-06,
      "loss": 0.8064,
      "step": 727320
    },
    {
      "epoch": 2.5491103190374553,
      "grad_norm": 2.8125,
      "learning_rate": 8.349883564257733e-06,
      "loss": 0.8851,
      "step": 727330
    },
    {
      "epoch": 2.5491453665443506,
      "grad_norm": 2.71875,
      "learning_rate": 8.349234535594031e-06,
      "loss": 0.8008,
      "step": 727340
    },
    {
      "epoch": 2.5491804140512464,
      "grad_norm": 3.03125,
      "learning_rate": 8.348585506930329e-06,
      "loss": 0.7653,
      "step": 727350
    },
    {
      "epoch": 2.549215461558142,
      "grad_norm": 2.578125,
      "learning_rate": 8.347936478266627e-06,
      "loss": 0.7912,
      "step": 727360
    },
    {
      "epoch": 2.549250509065038,
      "grad_norm": 2.859375,
      "learning_rate": 8.347287449602925e-06,
      "loss": 0.7008,
      "step": 727370
    },
    {
      "epoch": 2.5492855565719332,
      "grad_norm": 2.953125,
      "learning_rate": 8.346638420939223e-06,
      "loss": 0.8693,
      "step": 727380
    },
    {
      "epoch": 2.549320604078829,
      "grad_norm": 2.9375,
      "learning_rate": 8.345989392275521e-06,
      "loss": 0.7842,
      "step": 727390
    },
    {
      "epoch": 2.5493556515857243,
      "grad_norm": 2.8125,
      "learning_rate": 8.345340363611819e-06,
      "loss": 0.8437,
      "step": 727400
    },
    {
      "epoch": 2.54939069909262,
      "grad_norm": 2.734375,
      "learning_rate": 8.344691334948117e-06,
      "loss": 0.8554,
      "step": 727410
    },
    {
      "epoch": 2.549425746599516,
      "grad_norm": 2.90625,
      "learning_rate": 8.344042306284415e-06,
      "loss": 0.7557,
      "step": 727420
    },
    {
      "epoch": 2.549460794106411,
      "grad_norm": 3.140625,
      "learning_rate": 8.343393277620713e-06,
      "loss": 0.8334,
      "step": 727430
    },
    {
      "epoch": 2.549495841613307,
      "grad_norm": 3.515625,
      "learning_rate": 8.342744248957011e-06,
      "loss": 0.803,
      "step": 727440
    },
    {
      "epoch": 2.549530889120202,
      "grad_norm": 2.71875,
      "learning_rate": 8.342095220293309e-06,
      "loss": 0.7284,
      "step": 727450
    },
    {
      "epoch": 2.549565936627098,
      "grad_norm": 3.078125,
      "learning_rate": 8.341446191629609e-06,
      "loss": 0.7659,
      "step": 727460
    },
    {
      "epoch": 2.5496009841339937,
      "grad_norm": 2.921875,
      "learning_rate": 8.340797162965907e-06,
      "loss": 0.8074,
      "step": 727470
    },
    {
      "epoch": 2.5496360316408895,
      "grad_norm": 2.75,
      "learning_rate": 8.340148134302203e-06,
      "loss": 0.7945,
      "step": 727480
    },
    {
      "epoch": 2.549671079147785,
      "grad_norm": 2.984375,
      "learning_rate": 8.339499105638501e-06,
      "loss": 0.7581,
      "step": 727490
    },
    {
      "epoch": 2.5497061266546805,
      "grad_norm": 2.71875,
      "learning_rate": 8.338850076974799e-06,
      "loss": 0.7487,
      "step": 727500
    },
    {
      "epoch": 2.549741174161576,
      "grad_norm": 3.421875,
      "learning_rate": 8.338201048311097e-06,
      "loss": 0.8445,
      "step": 727510
    },
    {
      "epoch": 2.5497762216684716,
      "grad_norm": 2.59375,
      "learning_rate": 8.337552019647397e-06,
      "loss": 0.8535,
      "step": 727520
    },
    {
      "epoch": 2.5498112691753674,
      "grad_norm": 2.8125,
      "learning_rate": 8.336902990983695e-06,
      "loss": 0.7798,
      "step": 727530
    },
    {
      "epoch": 2.5498463166822627,
      "grad_norm": 2.640625,
      "learning_rate": 8.336253962319993e-06,
      "loss": 0.8224,
      "step": 727540
    },
    {
      "epoch": 2.5498813641891585,
      "grad_norm": 3.25,
      "learning_rate": 8.33560493365629e-06,
      "loss": 0.7297,
      "step": 727550
    },
    {
      "epoch": 2.5499164116960538,
      "grad_norm": 2.859375,
      "learning_rate": 8.334955904992589e-06,
      "loss": 0.8084,
      "step": 727560
    },
    {
      "epoch": 2.5499514592029495,
      "grad_norm": 2.96875,
      "learning_rate": 8.334306876328887e-06,
      "loss": 0.7642,
      "step": 727570
    },
    {
      "epoch": 2.5499865067098453,
      "grad_norm": 3.0625,
      "learning_rate": 8.333657847665185e-06,
      "loss": 0.8118,
      "step": 727580
    },
    {
      "epoch": 2.550021554216741,
      "grad_norm": 3.296875,
      "learning_rate": 8.333008819001483e-06,
      "loss": 0.7664,
      "step": 727590
    },
    {
      "epoch": 2.5500566017236364,
      "grad_norm": 3.0625,
      "learning_rate": 8.33235979033778e-06,
      "loss": 0.8013,
      "step": 727600
    },
    {
      "epoch": 2.550091649230532,
      "grad_norm": 2.890625,
      "learning_rate": 8.331710761674079e-06,
      "loss": 0.8381,
      "step": 727610
    },
    {
      "epoch": 2.5501266967374274,
      "grad_norm": 2.984375,
      "learning_rate": 8.331061733010377e-06,
      "loss": 0.7222,
      "step": 727620
    },
    {
      "epoch": 2.550161744244323,
      "grad_norm": 2.71875,
      "learning_rate": 8.330412704346675e-06,
      "loss": 0.7494,
      "step": 727630
    },
    {
      "epoch": 2.550196791751219,
      "grad_norm": 3.046875,
      "learning_rate": 8.329763675682974e-06,
      "loss": 0.7625,
      "step": 727640
    },
    {
      "epoch": 2.5502318392581143,
      "grad_norm": 2.6875,
      "learning_rate": 8.329114647019272e-06,
      "loss": 0.7801,
      "step": 727650
    },
    {
      "epoch": 2.55026688676501,
      "grad_norm": 2.84375,
      "learning_rate": 8.32846561835557e-06,
      "loss": 0.8714,
      "step": 727660
    },
    {
      "epoch": 2.5503019342719053,
      "grad_norm": 3.015625,
      "learning_rate": 8.327816589691867e-06,
      "loss": 0.8014,
      "step": 727670
    },
    {
      "epoch": 2.550336981778801,
      "grad_norm": 2.984375,
      "learning_rate": 8.327167561028165e-06,
      "loss": 0.8495,
      "step": 727680
    },
    {
      "epoch": 2.550372029285697,
      "grad_norm": 3.1875,
      "learning_rate": 8.326518532364463e-06,
      "loss": 0.7572,
      "step": 727690
    },
    {
      "epoch": 2.5504070767925926,
      "grad_norm": 2.984375,
      "learning_rate": 8.325869503700762e-06,
      "loss": 0.8024,
      "step": 727700
    },
    {
      "epoch": 2.550442124299488,
      "grad_norm": 2.78125,
      "learning_rate": 8.32522047503706e-06,
      "loss": 0.8036,
      "step": 727710
    },
    {
      "epoch": 2.5504771718063837,
      "grad_norm": 2.8125,
      "learning_rate": 8.324571446373358e-06,
      "loss": 0.8267,
      "step": 727720
    },
    {
      "epoch": 2.550512219313279,
      "grad_norm": 3.03125,
      "learning_rate": 8.323922417709656e-06,
      "loss": 0.856,
      "step": 727730
    },
    {
      "epoch": 2.5505472668201747,
      "grad_norm": 3.53125,
      "learning_rate": 8.323273389045954e-06,
      "loss": 0.8464,
      "step": 727740
    },
    {
      "epoch": 2.5505823143270705,
      "grad_norm": 2.921875,
      "learning_rate": 8.322624360382252e-06,
      "loss": 0.8757,
      "step": 727750
    },
    {
      "epoch": 2.550617361833966,
      "grad_norm": 2.875,
      "learning_rate": 8.32197533171855e-06,
      "loss": 0.8279,
      "step": 727760
    },
    {
      "epoch": 2.5506524093408616,
      "grad_norm": 2.765625,
      "learning_rate": 8.321326303054848e-06,
      "loss": 0.7884,
      "step": 727770
    },
    {
      "epoch": 2.550687456847757,
      "grad_norm": 3.1875,
      "learning_rate": 8.320677274391146e-06,
      "loss": 0.8181,
      "step": 727780
    },
    {
      "epoch": 2.5507225043546526,
      "grad_norm": 2.625,
      "learning_rate": 8.320028245727444e-06,
      "loss": 0.8411,
      "step": 727790
    },
    {
      "epoch": 2.5507575518615484,
      "grad_norm": 2.890625,
      "learning_rate": 8.319379217063742e-06,
      "loss": 0.7594,
      "step": 727800
    },
    {
      "epoch": 2.550792599368444,
      "grad_norm": 2.796875,
      "learning_rate": 8.31873018840004e-06,
      "loss": 0.795,
      "step": 727810
    },
    {
      "epoch": 2.5508276468753395,
      "grad_norm": 3.171875,
      "learning_rate": 8.318081159736338e-06,
      "loss": 0.7913,
      "step": 727820
    },
    {
      "epoch": 2.5508626943822352,
      "grad_norm": 2.734375,
      "learning_rate": 8.317432131072638e-06,
      "loss": 0.8376,
      "step": 727830
    },
    {
      "epoch": 2.5508977418891305,
      "grad_norm": 2.875,
      "learning_rate": 8.316783102408936e-06,
      "loss": 0.7881,
      "step": 727840
    },
    {
      "epoch": 2.5509327893960263,
      "grad_norm": 3.265625,
      "learning_rate": 8.316134073745234e-06,
      "loss": 0.8106,
      "step": 727850
    },
    {
      "epoch": 2.550967836902922,
      "grad_norm": 2.78125,
      "learning_rate": 8.31548504508153e-06,
      "loss": 0.8573,
      "step": 727860
    },
    {
      "epoch": 2.5510028844098174,
      "grad_norm": 3.0625,
      "learning_rate": 8.314836016417828e-06,
      "loss": 0.8231,
      "step": 727870
    },
    {
      "epoch": 2.551037931916713,
      "grad_norm": 2.671875,
      "learning_rate": 8.314186987754128e-06,
      "loss": 0.8513,
      "step": 727880
    },
    {
      "epoch": 2.5510729794236084,
      "grad_norm": 2.78125,
      "learning_rate": 8.313537959090426e-06,
      "loss": 0.8721,
      "step": 727890
    },
    {
      "epoch": 2.551108026930504,
      "grad_norm": 2.875,
      "learning_rate": 8.312888930426724e-06,
      "loss": 0.8266,
      "step": 727900
    },
    {
      "epoch": 2.5511430744374,
      "grad_norm": 2.90625,
      "learning_rate": 8.312239901763022e-06,
      "loss": 0.8697,
      "step": 727910
    },
    {
      "epoch": 2.5511781219442957,
      "grad_norm": 3.203125,
      "learning_rate": 8.31159087309932e-06,
      "loss": 0.8043,
      "step": 727920
    },
    {
      "epoch": 2.551213169451191,
      "grad_norm": 3.21875,
      "learning_rate": 8.310941844435618e-06,
      "loss": 0.7721,
      "step": 727930
    },
    {
      "epoch": 2.551248216958087,
      "grad_norm": 2.9375,
      "learning_rate": 8.310292815771916e-06,
      "loss": 0.7561,
      "step": 727940
    },
    {
      "epoch": 2.551283264464982,
      "grad_norm": 2.109375,
      "learning_rate": 8.309643787108214e-06,
      "loss": 0.8374,
      "step": 727950
    },
    {
      "epoch": 2.551318311971878,
      "grad_norm": 2.921875,
      "learning_rate": 8.308994758444512e-06,
      "loss": 0.7923,
      "step": 727960
    },
    {
      "epoch": 2.5513533594787736,
      "grad_norm": 2.796875,
      "learning_rate": 8.30834572978081e-06,
      "loss": 0.8146,
      "step": 727970
    },
    {
      "epoch": 2.551388406985669,
      "grad_norm": 3.171875,
      "learning_rate": 8.307696701117108e-06,
      "loss": 0.7968,
      "step": 727980
    },
    {
      "epoch": 2.5514234544925647,
      "grad_norm": 2.40625,
      "learning_rate": 8.307047672453406e-06,
      "loss": 0.8253,
      "step": 727990
    },
    {
      "epoch": 2.55145850199946,
      "grad_norm": 2.59375,
      "learning_rate": 8.306398643789704e-06,
      "loss": 0.7922,
      "step": 728000
    },
    {
      "epoch": 2.5514935495063558,
      "grad_norm": 3.140625,
      "learning_rate": 8.305749615126004e-06,
      "loss": 0.7994,
      "step": 728010
    },
    {
      "epoch": 2.5515285970132515,
      "grad_norm": 2.8125,
      "learning_rate": 8.305100586462302e-06,
      "loss": 0.746,
      "step": 728020
    },
    {
      "epoch": 2.5515636445201473,
      "grad_norm": 2.859375,
      "learning_rate": 8.3044515577986e-06,
      "loss": 0.8312,
      "step": 728030
    },
    {
      "epoch": 2.5515986920270426,
      "grad_norm": 3.046875,
      "learning_rate": 8.303802529134896e-06,
      "loss": 0.8145,
      "step": 728040
    },
    {
      "epoch": 2.5516337395339383,
      "grad_norm": 3.0625,
      "learning_rate": 8.303153500471194e-06,
      "loss": 0.7754,
      "step": 728050
    },
    {
      "epoch": 2.5516687870408337,
      "grad_norm": 2.859375,
      "learning_rate": 8.302504471807492e-06,
      "loss": 0.7978,
      "step": 728060
    },
    {
      "epoch": 2.5517038345477294,
      "grad_norm": 2.640625,
      "learning_rate": 8.301855443143792e-06,
      "loss": 0.7984,
      "step": 728070
    },
    {
      "epoch": 2.551738882054625,
      "grad_norm": 2.8125,
      "learning_rate": 8.30120641448009e-06,
      "loss": 0.7627,
      "step": 728080
    },
    {
      "epoch": 2.5517739295615205,
      "grad_norm": 2.765625,
      "learning_rate": 8.300557385816388e-06,
      "loss": 0.8569,
      "step": 728090
    },
    {
      "epoch": 2.5518089770684163,
      "grad_norm": 2.625,
      "learning_rate": 8.299908357152686e-06,
      "loss": 0.8332,
      "step": 728100
    },
    {
      "epoch": 2.5518440245753116,
      "grad_norm": 2.71875,
      "learning_rate": 8.299259328488984e-06,
      "loss": 0.801,
      "step": 728110
    },
    {
      "epoch": 2.5518790720822073,
      "grad_norm": 2.421875,
      "learning_rate": 8.298610299825282e-06,
      "loss": 0.8103,
      "step": 728120
    },
    {
      "epoch": 2.551914119589103,
      "grad_norm": 2.8125,
      "learning_rate": 8.29796127116158e-06,
      "loss": 0.8354,
      "step": 728130
    },
    {
      "epoch": 2.551949167095999,
      "grad_norm": 2.71875,
      "learning_rate": 8.297312242497878e-06,
      "loss": 0.7296,
      "step": 728140
    },
    {
      "epoch": 2.551984214602894,
      "grad_norm": 2.921875,
      "learning_rate": 8.296663213834176e-06,
      "loss": 0.8152,
      "step": 728150
    },
    {
      "epoch": 2.55201926210979,
      "grad_norm": 2.5,
      "learning_rate": 8.296014185170474e-06,
      "loss": 0.7889,
      "step": 728160
    },
    {
      "epoch": 2.5520543096166852,
      "grad_norm": 2.8125,
      "learning_rate": 8.295365156506772e-06,
      "loss": 0.7824,
      "step": 728170
    },
    {
      "epoch": 2.552089357123581,
      "grad_norm": 2.984375,
      "learning_rate": 8.29471612784307e-06,
      "loss": 0.8408,
      "step": 728180
    },
    {
      "epoch": 2.5521244046304767,
      "grad_norm": 2.546875,
      "learning_rate": 8.29406709917937e-06,
      "loss": 0.7978,
      "step": 728190
    },
    {
      "epoch": 2.552159452137372,
      "grad_norm": 2.71875,
      "learning_rate": 8.293418070515668e-06,
      "loss": 0.8152,
      "step": 728200
    },
    {
      "epoch": 2.552194499644268,
      "grad_norm": 3.015625,
      "learning_rate": 8.292769041851966e-06,
      "loss": 0.7702,
      "step": 728210
    },
    {
      "epoch": 2.552229547151163,
      "grad_norm": 3.5,
      "learning_rate": 8.292120013188264e-06,
      "loss": 0.8269,
      "step": 728220
    },
    {
      "epoch": 2.552264594658059,
      "grad_norm": 3.125,
      "learning_rate": 8.29147098452456e-06,
      "loss": 0.8694,
      "step": 728230
    },
    {
      "epoch": 2.5522996421649546,
      "grad_norm": 2.65625,
      "learning_rate": 8.290821955860858e-06,
      "loss": 0.7951,
      "step": 728240
    },
    {
      "epoch": 2.5523346896718504,
      "grad_norm": 2.875,
      "learning_rate": 8.290172927197158e-06,
      "loss": 0.7812,
      "step": 728250
    },
    {
      "epoch": 2.5523697371787457,
      "grad_norm": 2.65625,
      "learning_rate": 8.289523898533456e-06,
      "loss": 0.7876,
      "step": 728260
    },
    {
      "epoch": 2.5524047846856415,
      "grad_norm": 2.484375,
      "learning_rate": 8.288874869869754e-06,
      "loss": 0.8102,
      "step": 728270
    },
    {
      "epoch": 2.552439832192537,
      "grad_norm": 2.40625,
      "learning_rate": 8.288225841206052e-06,
      "loss": 0.7733,
      "step": 728280
    },
    {
      "epoch": 2.5524748796994325,
      "grad_norm": 2.734375,
      "learning_rate": 8.28757681254235e-06,
      "loss": 0.7095,
      "step": 728290
    },
    {
      "epoch": 2.5525099272063283,
      "grad_norm": 2.890625,
      "learning_rate": 8.286927783878648e-06,
      "loss": 0.8005,
      "step": 728300
    },
    {
      "epoch": 2.5525449747132236,
      "grad_norm": 3.15625,
      "learning_rate": 8.286278755214946e-06,
      "loss": 0.8961,
      "step": 728310
    },
    {
      "epoch": 2.5525800222201194,
      "grad_norm": 2.65625,
      "learning_rate": 8.285629726551245e-06,
      "loss": 0.7893,
      "step": 728320
    },
    {
      "epoch": 2.552615069727015,
      "grad_norm": 3.046875,
      "learning_rate": 8.284980697887542e-06,
      "loss": 0.8334,
      "step": 728330
    },
    {
      "epoch": 2.5526501172339104,
      "grad_norm": 3.0625,
      "learning_rate": 8.28433166922384e-06,
      "loss": 0.8359,
      "step": 728340
    },
    {
      "epoch": 2.552685164740806,
      "grad_norm": 3.125,
      "learning_rate": 8.283682640560138e-06,
      "loss": 0.8468,
      "step": 728350
    },
    {
      "epoch": 2.552720212247702,
      "grad_norm": 3.375,
      "learning_rate": 8.283033611896436e-06,
      "loss": 0.7506,
      "step": 728360
    },
    {
      "epoch": 2.5527552597545973,
      "grad_norm": 3.296875,
      "learning_rate": 8.282384583232734e-06,
      "loss": 0.7789,
      "step": 728370
    },
    {
      "epoch": 2.552790307261493,
      "grad_norm": 3.09375,
      "learning_rate": 8.281735554569033e-06,
      "loss": 0.7116,
      "step": 728380
    },
    {
      "epoch": 2.5528253547683883,
      "grad_norm": 2.953125,
      "learning_rate": 8.281086525905331e-06,
      "loss": 0.8023,
      "step": 728390
    },
    {
      "epoch": 2.552860402275284,
      "grad_norm": 3.15625,
      "learning_rate": 8.28043749724163e-06,
      "loss": 0.8863,
      "step": 728400
    },
    {
      "epoch": 2.55289544978218,
      "grad_norm": 2.9375,
      "learning_rate": 8.279788468577927e-06,
      "loss": 0.8348,
      "step": 728410
    },
    {
      "epoch": 2.5529304972890756,
      "grad_norm": 3.140625,
      "learning_rate": 8.279139439914224e-06,
      "loss": 0.8527,
      "step": 728420
    },
    {
      "epoch": 2.552965544795971,
      "grad_norm": 3.171875,
      "learning_rate": 8.278490411250523e-06,
      "loss": 0.8892,
      "step": 728430
    },
    {
      "epoch": 2.5530005923028667,
      "grad_norm": 2.828125,
      "learning_rate": 8.277841382586821e-06,
      "loss": 0.7517,
      "step": 728440
    },
    {
      "epoch": 2.553035639809762,
      "grad_norm": 2.78125,
      "learning_rate": 8.27719235392312e-06,
      "loss": 0.7225,
      "step": 728450
    },
    {
      "epoch": 2.5530706873166578,
      "grad_norm": 3.203125,
      "learning_rate": 8.276543325259417e-06,
      "loss": 0.8749,
      "step": 728460
    },
    {
      "epoch": 2.5531057348235535,
      "grad_norm": 2.734375,
      "learning_rate": 8.275894296595715e-06,
      "loss": 0.785,
      "step": 728470
    },
    {
      "epoch": 2.553140782330449,
      "grad_norm": 2.625,
      "learning_rate": 8.275245267932013e-06,
      "loss": 0.7373,
      "step": 728480
    },
    {
      "epoch": 2.5531758298373446,
      "grad_norm": 2.75,
      "learning_rate": 8.274596239268311e-06,
      "loss": 0.8307,
      "step": 728490
    },
    {
      "epoch": 2.55321087734424,
      "grad_norm": 2.453125,
      "learning_rate": 8.273947210604611e-06,
      "loss": 0.8258,
      "step": 728500
    },
    {
      "epoch": 2.5532459248511357,
      "grad_norm": 2.84375,
      "learning_rate": 8.273298181940907e-06,
      "loss": 0.8098,
      "step": 728510
    },
    {
      "epoch": 2.5532809723580314,
      "grad_norm": 3.140625,
      "learning_rate": 8.272649153277205e-06,
      "loss": 0.8391,
      "step": 728520
    },
    {
      "epoch": 2.553316019864927,
      "grad_norm": 3.015625,
      "learning_rate": 8.272000124613503e-06,
      "loss": 0.7936,
      "step": 728530
    },
    {
      "epoch": 2.5533510673718225,
      "grad_norm": 2.375,
      "learning_rate": 8.271351095949801e-06,
      "loss": 0.7715,
      "step": 728540
    },
    {
      "epoch": 2.5533861148787182,
      "grad_norm": 2.625,
      "learning_rate": 8.2707020672861e-06,
      "loss": 0.7672,
      "step": 728550
    },
    {
      "epoch": 2.5534211623856136,
      "grad_norm": 2.8125,
      "learning_rate": 8.270053038622399e-06,
      "loss": 0.7636,
      "step": 728560
    },
    {
      "epoch": 2.5534562098925093,
      "grad_norm": 2.90625,
      "learning_rate": 8.269404009958697e-06,
      "loss": 0.7302,
      "step": 728570
    },
    {
      "epoch": 2.553491257399405,
      "grad_norm": 3.046875,
      "learning_rate": 8.268754981294995e-06,
      "loss": 0.7973,
      "step": 728580
    },
    {
      "epoch": 2.5535263049063004,
      "grad_norm": 2.671875,
      "learning_rate": 8.268105952631293e-06,
      "loss": 0.7478,
      "step": 728590
    },
    {
      "epoch": 2.553561352413196,
      "grad_norm": 2.96875,
      "learning_rate": 8.267456923967591e-06,
      "loss": 0.8557,
      "step": 728600
    },
    {
      "epoch": 2.5535963999200915,
      "grad_norm": 2.90625,
      "learning_rate": 8.266807895303887e-06,
      "loss": 0.8544,
      "step": 728610
    },
    {
      "epoch": 2.5536314474269872,
      "grad_norm": 3.125,
      "learning_rate": 8.266158866640187e-06,
      "loss": 0.8882,
      "step": 728620
    },
    {
      "epoch": 2.553666494933883,
      "grad_norm": 2.875,
      "learning_rate": 8.265509837976485e-06,
      "loss": 0.756,
      "step": 728630
    },
    {
      "epoch": 2.5537015424407787,
      "grad_norm": 2.796875,
      "learning_rate": 8.264860809312783e-06,
      "loss": 0.8061,
      "step": 728640
    },
    {
      "epoch": 2.553736589947674,
      "grad_norm": 2.734375,
      "learning_rate": 8.264211780649081e-06,
      "loss": 0.845,
      "step": 728650
    },
    {
      "epoch": 2.55377163745457,
      "grad_norm": 3.90625,
      "learning_rate": 8.263562751985379e-06,
      "loss": 0.8484,
      "step": 728660
    },
    {
      "epoch": 2.553806684961465,
      "grad_norm": 2.796875,
      "learning_rate": 8.262913723321677e-06,
      "loss": 0.8175,
      "step": 728670
    },
    {
      "epoch": 2.553841732468361,
      "grad_norm": 2.71875,
      "learning_rate": 8.262264694657975e-06,
      "loss": 0.8617,
      "step": 728680
    },
    {
      "epoch": 2.5538767799752566,
      "grad_norm": 3.046875,
      "learning_rate": 8.261615665994275e-06,
      "loss": 0.8084,
      "step": 728690
    },
    {
      "epoch": 2.553911827482152,
      "grad_norm": 3.21875,
      "learning_rate": 8.260966637330571e-06,
      "loss": 0.8328,
      "step": 728700
    },
    {
      "epoch": 2.5539468749890477,
      "grad_norm": 2.59375,
      "learning_rate": 8.260317608666869e-06,
      "loss": 0.8351,
      "step": 728710
    },
    {
      "epoch": 2.553981922495943,
      "grad_norm": 2.765625,
      "learning_rate": 8.259668580003167e-06,
      "loss": 0.8951,
      "step": 728720
    },
    {
      "epoch": 2.554016970002839,
      "grad_norm": 3.140625,
      "learning_rate": 8.259019551339465e-06,
      "loss": 0.7903,
      "step": 728730
    },
    {
      "epoch": 2.5540520175097345,
      "grad_norm": 3.0,
      "learning_rate": 8.258370522675765e-06,
      "loss": 0.8852,
      "step": 728740
    },
    {
      "epoch": 2.5540870650166303,
      "grad_norm": 2.6875,
      "learning_rate": 8.257721494012063e-06,
      "loss": 0.9,
      "step": 728750
    },
    {
      "epoch": 2.5541221125235256,
      "grad_norm": 3.09375,
      "learning_rate": 8.25707246534836e-06,
      "loss": 0.7723,
      "step": 728760
    },
    {
      "epoch": 2.5541571600304214,
      "grad_norm": 2.984375,
      "learning_rate": 8.256423436684659e-06,
      "loss": 0.7679,
      "step": 728770
    },
    {
      "epoch": 2.5541922075373167,
      "grad_norm": 2.96875,
      "learning_rate": 8.255774408020957e-06,
      "loss": 0.7821,
      "step": 728780
    },
    {
      "epoch": 2.5542272550442124,
      "grad_norm": 2.9375,
      "learning_rate": 8.255125379357255e-06,
      "loss": 0.8691,
      "step": 728790
    },
    {
      "epoch": 2.554262302551108,
      "grad_norm": 2.859375,
      "learning_rate": 8.254476350693553e-06,
      "loss": 0.7741,
      "step": 728800
    },
    {
      "epoch": 2.5542973500580035,
      "grad_norm": 2.96875,
      "learning_rate": 8.25382732202985e-06,
      "loss": 0.8751,
      "step": 728810
    },
    {
      "epoch": 2.5543323975648993,
      "grad_norm": 2.953125,
      "learning_rate": 8.253178293366149e-06,
      "loss": 0.8612,
      "step": 728820
    },
    {
      "epoch": 2.5543674450717946,
      "grad_norm": 3.5,
      "learning_rate": 8.252529264702447e-06,
      "loss": 0.7726,
      "step": 728830
    },
    {
      "epoch": 2.5544024925786903,
      "grad_norm": 2.6875,
      "learning_rate": 8.251880236038745e-06,
      "loss": 0.826,
      "step": 728840
    },
    {
      "epoch": 2.554437540085586,
      "grad_norm": 3.0,
      "learning_rate": 8.251231207375043e-06,
      "loss": 0.7892,
      "step": 728850
    },
    {
      "epoch": 2.554472587592482,
      "grad_norm": 3.375,
      "learning_rate": 8.25058217871134e-06,
      "loss": 0.8155,
      "step": 728860
    },
    {
      "epoch": 2.554507635099377,
      "grad_norm": 2.90625,
      "learning_rate": 8.24993315004764e-06,
      "loss": 0.7613,
      "step": 728870
    },
    {
      "epoch": 2.554542682606273,
      "grad_norm": 3.015625,
      "learning_rate": 8.249284121383938e-06,
      "loss": 0.7603,
      "step": 728880
    },
    {
      "epoch": 2.5545777301131682,
      "grad_norm": 2.875,
      "learning_rate": 8.248635092720235e-06,
      "loss": 0.7666,
      "step": 728890
    },
    {
      "epoch": 2.554612777620064,
      "grad_norm": 2.953125,
      "learning_rate": 8.247986064056533e-06,
      "loss": 0.8747,
      "step": 728900
    },
    {
      "epoch": 2.5546478251269598,
      "grad_norm": 3.09375,
      "learning_rate": 8.24733703539283e-06,
      "loss": 0.7159,
      "step": 728910
    },
    {
      "epoch": 2.554682872633855,
      "grad_norm": 2.515625,
      "learning_rate": 8.246688006729129e-06,
      "loss": 0.8066,
      "step": 728920
    },
    {
      "epoch": 2.554717920140751,
      "grad_norm": 3.109375,
      "learning_rate": 8.246038978065428e-06,
      "loss": 0.8603,
      "step": 728930
    },
    {
      "epoch": 2.554752967647646,
      "grad_norm": 2.796875,
      "learning_rate": 8.245389949401726e-06,
      "loss": 0.8351,
      "step": 728940
    },
    {
      "epoch": 2.554788015154542,
      "grad_norm": 3.03125,
      "learning_rate": 8.244740920738024e-06,
      "loss": 0.7797,
      "step": 728950
    },
    {
      "epoch": 2.5548230626614377,
      "grad_norm": 3.125,
      "learning_rate": 8.244091892074322e-06,
      "loss": 0.9366,
      "step": 728960
    },
    {
      "epoch": 2.5548581101683334,
      "grad_norm": 3.1875,
      "learning_rate": 8.24344286341062e-06,
      "loss": 0.7525,
      "step": 728970
    },
    {
      "epoch": 2.5548931576752287,
      "grad_norm": 2.703125,
      "learning_rate": 8.242793834746918e-06,
      "loss": 0.8702,
      "step": 728980
    },
    {
      "epoch": 2.5549282051821245,
      "grad_norm": 3.484375,
      "learning_rate": 8.242144806083216e-06,
      "loss": 0.8195,
      "step": 728990
    },
    {
      "epoch": 2.55496325268902,
      "grad_norm": 2.90625,
      "learning_rate": 8.241495777419514e-06,
      "loss": 0.7784,
      "step": 729000
    },
    {
      "epoch": 2.5549983001959156,
      "grad_norm": 3.15625,
      "learning_rate": 8.240846748755812e-06,
      "loss": 0.7949,
      "step": 729010
    },
    {
      "epoch": 2.5550333477028113,
      "grad_norm": 3.234375,
      "learning_rate": 8.24019772009211e-06,
      "loss": 0.8586,
      "step": 729020
    },
    {
      "epoch": 2.5550683952097066,
      "grad_norm": 2.921875,
      "learning_rate": 8.239548691428408e-06,
      "loss": 0.7797,
      "step": 729030
    },
    {
      "epoch": 2.5551034427166024,
      "grad_norm": 3.140625,
      "learning_rate": 8.238899662764706e-06,
      "loss": 0.7897,
      "step": 729040
    },
    {
      "epoch": 2.5551384902234977,
      "grad_norm": 2.96875,
      "learning_rate": 8.238250634101006e-06,
      "loss": 0.8209,
      "step": 729050
    },
    {
      "epoch": 2.5551735377303935,
      "grad_norm": 2.796875,
      "learning_rate": 8.237601605437304e-06,
      "loss": 0.836,
      "step": 729060
    },
    {
      "epoch": 2.555208585237289,
      "grad_norm": 3.34375,
      "learning_rate": 8.236952576773602e-06,
      "loss": 0.8708,
      "step": 729070
    },
    {
      "epoch": 2.555243632744185,
      "grad_norm": 2.859375,
      "learning_rate": 8.236303548109898e-06,
      "loss": 0.8887,
      "step": 729080
    },
    {
      "epoch": 2.5552786802510803,
      "grad_norm": 2.796875,
      "learning_rate": 8.235654519446196e-06,
      "loss": 0.8196,
      "step": 729090
    },
    {
      "epoch": 2.555313727757976,
      "grad_norm": 3.09375,
      "learning_rate": 8.235005490782494e-06,
      "loss": 0.8498,
      "step": 729100
    },
    {
      "epoch": 2.5553487752648714,
      "grad_norm": 2.546875,
      "learning_rate": 8.234356462118794e-06,
      "loss": 0.7537,
      "step": 729110
    },
    {
      "epoch": 2.555383822771767,
      "grad_norm": 2.765625,
      "learning_rate": 8.233707433455092e-06,
      "loss": 0.8325,
      "step": 729120
    },
    {
      "epoch": 2.555418870278663,
      "grad_norm": 2.859375,
      "learning_rate": 8.23305840479139e-06,
      "loss": 0.8263,
      "step": 729130
    },
    {
      "epoch": 2.555453917785558,
      "grad_norm": 2.71875,
      "learning_rate": 8.232409376127688e-06,
      "loss": 0.8206,
      "step": 729140
    },
    {
      "epoch": 2.555488965292454,
      "grad_norm": 2.421875,
      "learning_rate": 8.231760347463986e-06,
      "loss": 0.8095,
      "step": 729150
    },
    {
      "epoch": 2.5555240127993493,
      "grad_norm": 2.84375,
      "learning_rate": 8.231111318800284e-06,
      "loss": 0.7929,
      "step": 729160
    },
    {
      "epoch": 2.555559060306245,
      "grad_norm": 2.890625,
      "learning_rate": 8.230462290136582e-06,
      "loss": 0.8157,
      "step": 729170
    },
    {
      "epoch": 2.555594107813141,
      "grad_norm": 3.171875,
      "learning_rate": 8.22981326147288e-06,
      "loss": 0.8216,
      "step": 729180
    },
    {
      "epoch": 2.5556291553200365,
      "grad_norm": 2.875,
      "learning_rate": 8.229164232809178e-06,
      "loss": 0.8038,
      "step": 729190
    },
    {
      "epoch": 2.555664202826932,
      "grad_norm": 3.203125,
      "learning_rate": 8.228515204145476e-06,
      "loss": 0.7668,
      "step": 729200
    },
    {
      "epoch": 2.5556992503338276,
      "grad_norm": 3.1875,
      "learning_rate": 8.227866175481774e-06,
      "loss": 0.8074,
      "step": 729210
    },
    {
      "epoch": 2.555734297840723,
      "grad_norm": 3.109375,
      "learning_rate": 8.227217146818072e-06,
      "loss": 0.7759,
      "step": 729220
    },
    {
      "epoch": 2.5557693453476187,
      "grad_norm": 2.53125,
      "learning_rate": 8.22656811815437e-06,
      "loss": 0.8035,
      "step": 729230
    },
    {
      "epoch": 2.5558043928545144,
      "grad_norm": 2.671875,
      "learning_rate": 8.22591908949067e-06,
      "loss": 0.8365,
      "step": 729240
    },
    {
      "epoch": 2.5558394403614098,
      "grad_norm": 3.28125,
      "learning_rate": 8.225270060826968e-06,
      "loss": 0.7048,
      "step": 729250
    },
    {
      "epoch": 2.5558744878683055,
      "grad_norm": 2.734375,
      "learning_rate": 8.224621032163266e-06,
      "loss": 0.8464,
      "step": 729260
    },
    {
      "epoch": 2.555909535375201,
      "grad_norm": 2.78125,
      "learning_rate": 8.223972003499562e-06,
      "loss": 0.8295,
      "step": 729270
    },
    {
      "epoch": 2.5559445828820966,
      "grad_norm": 2.765625,
      "learning_rate": 8.22332297483586e-06,
      "loss": 0.8191,
      "step": 729280
    },
    {
      "epoch": 2.5559796303889923,
      "grad_norm": 3.125,
      "learning_rate": 8.22267394617216e-06,
      "loss": 0.8247,
      "step": 729290
    },
    {
      "epoch": 2.556014677895888,
      "grad_norm": 2.84375,
      "learning_rate": 8.222024917508458e-06,
      "loss": 0.8322,
      "step": 729300
    },
    {
      "epoch": 2.5560497254027834,
      "grad_norm": 2.984375,
      "learning_rate": 8.221375888844756e-06,
      "loss": 0.866,
      "step": 729310
    },
    {
      "epoch": 2.556084772909679,
      "grad_norm": 2.875,
      "learning_rate": 8.220726860181054e-06,
      "loss": 0.7959,
      "step": 729320
    },
    {
      "epoch": 2.5561198204165745,
      "grad_norm": 3.15625,
      "learning_rate": 8.220077831517352e-06,
      "loss": 0.7821,
      "step": 729330
    },
    {
      "epoch": 2.5561548679234702,
      "grad_norm": 2.984375,
      "learning_rate": 8.21942880285365e-06,
      "loss": 0.7653,
      "step": 729340
    },
    {
      "epoch": 2.556189915430366,
      "grad_norm": 3.0625,
      "learning_rate": 8.218779774189948e-06,
      "loss": 0.7753,
      "step": 729350
    },
    {
      "epoch": 2.5562249629372613,
      "grad_norm": 2.96875,
      "learning_rate": 8.218130745526246e-06,
      "loss": 0.8051,
      "step": 729360
    },
    {
      "epoch": 2.556260010444157,
      "grad_norm": 2.53125,
      "learning_rate": 8.217481716862544e-06,
      "loss": 0.7946,
      "step": 729370
    },
    {
      "epoch": 2.5562950579510524,
      "grad_norm": 3.015625,
      "learning_rate": 8.216832688198842e-06,
      "loss": 0.8341,
      "step": 729380
    },
    {
      "epoch": 2.556330105457948,
      "grad_norm": 2.921875,
      "learning_rate": 8.21618365953514e-06,
      "loss": 0.8274,
      "step": 729390
    },
    {
      "epoch": 2.556365152964844,
      "grad_norm": 2.875,
      "learning_rate": 8.215534630871438e-06,
      "loss": 0.8431,
      "step": 729400
    },
    {
      "epoch": 2.5564002004717397,
      "grad_norm": 3.390625,
      "learning_rate": 8.214885602207736e-06,
      "loss": 0.8182,
      "step": 729410
    },
    {
      "epoch": 2.556435247978635,
      "grad_norm": 3.375,
      "learning_rate": 8.214236573544035e-06,
      "loss": 0.7847,
      "step": 729420
    },
    {
      "epoch": 2.5564702954855307,
      "grad_norm": 2.203125,
      "learning_rate": 8.213587544880333e-06,
      "loss": 0.7967,
      "step": 729430
    },
    {
      "epoch": 2.556505342992426,
      "grad_norm": 2.671875,
      "learning_rate": 8.212938516216631e-06,
      "loss": 0.7623,
      "step": 729440
    },
    {
      "epoch": 2.556540390499322,
      "grad_norm": 3.109375,
      "learning_rate": 8.21228948755293e-06,
      "loss": 0.8132,
      "step": 729450
    },
    {
      "epoch": 2.5565754380062176,
      "grad_norm": 2.875,
      "learning_rate": 8.211640458889226e-06,
      "loss": 0.8448,
      "step": 729460
    },
    {
      "epoch": 2.556610485513113,
      "grad_norm": 3.15625,
      "learning_rate": 8.210991430225524e-06,
      "loss": 0.8396,
      "step": 729470
    },
    {
      "epoch": 2.5566455330200086,
      "grad_norm": 3.046875,
      "learning_rate": 8.210342401561823e-06,
      "loss": 0.8075,
      "step": 729480
    },
    {
      "epoch": 2.556680580526904,
      "grad_norm": 2.53125,
      "learning_rate": 8.209693372898121e-06,
      "loss": 0.8781,
      "step": 729490
    },
    {
      "epoch": 2.5567156280337997,
      "grad_norm": 3.109375,
      "learning_rate": 8.20904434423442e-06,
      "loss": 0.8427,
      "step": 729500
    },
    {
      "epoch": 2.5567506755406955,
      "grad_norm": 2.9375,
      "learning_rate": 8.208395315570717e-06,
      "loss": 0.8139,
      "step": 729510
    },
    {
      "epoch": 2.556785723047591,
      "grad_norm": 3.0625,
      "learning_rate": 8.207746286907015e-06,
      "loss": 0.826,
      "step": 729520
    },
    {
      "epoch": 2.5568207705544865,
      "grad_norm": 2.828125,
      "learning_rate": 8.207097258243313e-06,
      "loss": 0.7101,
      "step": 729530
    },
    {
      "epoch": 2.5568558180613823,
      "grad_norm": 2.4375,
      "learning_rate": 8.206448229579611e-06,
      "loss": 0.787,
      "step": 729540
    },
    {
      "epoch": 2.5568908655682776,
      "grad_norm": 2.84375,
      "learning_rate": 8.20579920091591e-06,
      "loss": 0.7676,
      "step": 729550
    },
    {
      "epoch": 2.5569259130751734,
      "grad_norm": 2.734375,
      "learning_rate": 8.205150172252207e-06,
      "loss": 0.8411,
      "step": 729560
    },
    {
      "epoch": 2.556960960582069,
      "grad_norm": 2.953125,
      "learning_rate": 8.204501143588505e-06,
      "loss": 0.856,
      "step": 729570
    },
    {
      "epoch": 2.5569960080889644,
      "grad_norm": 3.0,
      "learning_rate": 8.203852114924803e-06,
      "loss": 0.8792,
      "step": 729580
    },
    {
      "epoch": 2.55703105559586,
      "grad_norm": 2.546875,
      "learning_rate": 8.203203086261101e-06,
      "loss": 0.7758,
      "step": 729590
    },
    {
      "epoch": 2.557066103102756,
      "grad_norm": 2.796875,
      "learning_rate": 8.202554057597401e-06,
      "loss": 0.8218,
      "step": 729600
    },
    {
      "epoch": 2.5571011506096513,
      "grad_norm": 3.21875,
      "learning_rate": 8.201905028933699e-06,
      "loss": 0.8236,
      "step": 729610
    },
    {
      "epoch": 2.557136198116547,
      "grad_norm": 3.5,
      "learning_rate": 8.201256000269997e-06,
      "loss": 0.81,
      "step": 729620
    },
    {
      "epoch": 2.557171245623443,
      "grad_norm": 3.21875,
      "learning_rate": 8.200606971606295e-06,
      "loss": 0.854,
      "step": 729630
    },
    {
      "epoch": 2.557206293130338,
      "grad_norm": 2.890625,
      "learning_rate": 8.199957942942591e-06,
      "loss": 0.7953,
      "step": 729640
    },
    {
      "epoch": 2.557241340637234,
      "grad_norm": 2.8125,
      "learning_rate": 8.19930891427889e-06,
      "loss": 0.8395,
      "step": 729650
    },
    {
      "epoch": 2.557276388144129,
      "grad_norm": 2.671875,
      "learning_rate": 8.198659885615189e-06,
      "loss": 0.8017,
      "step": 729660
    },
    {
      "epoch": 2.557311435651025,
      "grad_norm": 2.78125,
      "learning_rate": 8.198010856951487e-06,
      "loss": 0.7857,
      "step": 729670
    },
    {
      "epoch": 2.5573464831579207,
      "grad_norm": 3.25,
      "learning_rate": 8.197361828287785e-06,
      "loss": 0.7568,
      "step": 729680
    },
    {
      "epoch": 2.557381530664816,
      "grad_norm": 3.1875,
      "learning_rate": 8.196712799624083e-06,
      "loss": 0.7209,
      "step": 729690
    },
    {
      "epoch": 2.5574165781717118,
      "grad_norm": 3.0625,
      "learning_rate": 8.196063770960381e-06,
      "loss": 0.7839,
      "step": 729700
    },
    {
      "epoch": 2.5574516256786075,
      "grad_norm": 3.0,
      "learning_rate": 8.195414742296679e-06,
      "loss": 0.835,
      "step": 729710
    },
    {
      "epoch": 2.557486673185503,
      "grad_norm": 2.9375,
      "learning_rate": 8.194765713632977e-06,
      "loss": 0.8253,
      "step": 729720
    },
    {
      "epoch": 2.5575217206923986,
      "grad_norm": 2.796875,
      "learning_rate": 8.194116684969277e-06,
      "loss": 0.7919,
      "step": 729730
    },
    {
      "epoch": 2.5575567681992943,
      "grad_norm": 2.921875,
      "learning_rate": 8.193467656305573e-06,
      "loss": 0.8381,
      "step": 729740
    },
    {
      "epoch": 2.5575918157061897,
      "grad_norm": 3.46875,
      "learning_rate": 8.192818627641871e-06,
      "loss": 0.8092,
      "step": 729750
    },
    {
      "epoch": 2.5576268632130854,
      "grad_norm": 3.125,
      "learning_rate": 8.192169598978169e-06,
      "loss": 0.804,
      "step": 729760
    },
    {
      "epoch": 2.5576619107199807,
      "grad_norm": 3.78125,
      "learning_rate": 8.191520570314467e-06,
      "loss": 0.8339,
      "step": 729770
    },
    {
      "epoch": 2.5576969582268765,
      "grad_norm": 3.0625,
      "learning_rate": 8.190871541650765e-06,
      "loss": 0.7999,
      "step": 729780
    },
    {
      "epoch": 2.5577320057337722,
      "grad_norm": 2.921875,
      "learning_rate": 8.190222512987065e-06,
      "loss": 0.7844,
      "step": 729790
    },
    {
      "epoch": 2.557767053240668,
      "grad_norm": 3.3125,
      "learning_rate": 8.189573484323363e-06,
      "loss": 0.8451,
      "step": 729800
    },
    {
      "epoch": 2.5578021007475633,
      "grad_norm": 3.359375,
      "learning_rate": 8.18892445565966e-06,
      "loss": 0.8211,
      "step": 729810
    },
    {
      "epoch": 2.557837148254459,
      "grad_norm": 2.703125,
      "learning_rate": 8.188275426995959e-06,
      "loss": 0.8236,
      "step": 729820
    },
    {
      "epoch": 2.5578721957613544,
      "grad_norm": 3.15625,
      "learning_rate": 8.187626398332255e-06,
      "loss": 0.8138,
      "step": 729830
    },
    {
      "epoch": 2.55790724326825,
      "grad_norm": 2.53125,
      "learning_rate": 8.186977369668555e-06,
      "loss": 0.8408,
      "step": 729840
    },
    {
      "epoch": 2.557942290775146,
      "grad_norm": 3.078125,
      "learning_rate": 8.186328341004853e-06,
      "loss": 0.8304,
      "step": 729850
    },
    {
      "epoch": 2.557977338282041,
      "grad_norm": 2.859375,
      "learning_rate": 8.18567931234115e-06,
      "loss": 0.8238,
      "step": 729860
    },
    {
      "epoch": 2.558012385788937,
      "grad_norm": 3.140625,
      "learning_rate": 8.185030283677449e-06,
      "loss": 0.8368,
      "step": 729870
    },
    {
      "epoch": 2.5580474332958323,
      "grad_norm": 2.84375,
      "learning_rate": 8.184381255013747e-06,
      "loss": 0.8069,
      "step": 729880
    },
    {
      "epoch": 2.558082480802728,
      "grad_norm": 3.0625,
      "learning_rate": 8.183732226350045e-06,
      "loss": 0.7153,
      "step": 729890
    },
    {
      "epoch": 2.558117528309624,
      "grad_norm": 2.75,
      "learning_rate": 8.183083197686343e-06,
      "loss": 0.8271,
      "step": 729900
    },
    {
      "epoch": 2.5581525758165196,
      "grad_norm": 2.359375,
      "learning_rate": 8.182434169022642e-06,
      "loss": 0.7502,
      "step": 729910
    },
    {
      "epoch": 2.558187623323415,
      "grad_norm": 2.5625,
      "learning_rate": 8.18178514035894e-06,
      "loss": 0.7517,
      "step": 729920
    },
    {
      "epoch": 2.5582226708303106,
      "grad_norm": 3.34375,
      "learning_rate": 8.181136111695237e-06,
      "loss": 0.787,
      "step": 729930
    },
    {
      "epoch": 2.558257718337206,
      "grad_norm": 3.234375,
      "learning_rate": 8.180487083031535e-06,
      "loss": 0.8234,
      "step": 729940
    },
    {
      "epoch": 2.5582927658441017,
      "grad_norm": 2.453125,
      "learning_rate": 8.179838054367833e-06,
      "loss": 0.7617,
      "step": 729950
    },
    {
      "epoch": 2.5583278133509975,
      "grad_norm": 3.203125,
      "learning_rate": 8.17918902570413e-06,
      "loss": 0.751,
      "step": 729960
    },
    {
      "epoch": 2.5583628608578928,
      "grad_norm": 2.9375,
      "learning_rate": 8.17853999704043e-06,
      "loss": 0.8044,
      "step": 729970
    },
    {
      "epoch": 2.5583979083647885,
      "grad_norm": 3.078125,
      "learning_rate": 8.177890968376728e-06,
      "loss": 0.8702,
      "step": 729980
    },
    {
      "epoch": 2.558432955871684,
      "grad_norm": 2.609375,
      "learning_rate": 8.177241939713026e-06,
      "loss": 0.7444,
      "step": 729990
    },
    {
      "epoch": 2.5584680033785796,
      "grad_norm": 3.0,
      "learning_rate": 8.176592911049324e-06,
      "loss": 0.7903,
      "step": 730000
    },
    {
      "epoch": 2.5584680033785796,
      "eval_loss": 0.7549558281898499,
      "eval_runtime": 551.9523,
      "eval_samples_per_second": 689.255,
      "eval_steps_per_second": 57.438,
      "step": 730000
    },
    {
      "epoch": 2.5585030508854754,
      "grad_norm": 3.234375,
      "learning_rate": 8.175943882385622e-06,
      "loss": 0.8913,
      "step": 730010
    },
    {
      "epoch": 2.558538098392371,
      "grad_norm": 2.65625,
      "learning_rate": 8.175294853721919e-06,
      "loss": 0.8017,
      "step": 730020
    },
    {
      "epoch": 2.5585731458992664,
      "grad_norm": 3.046875,
      "learning_rate": 8.174645825058218e-06,
      "loss": 0.8749,
      "step": 730030
    },
    {
      "epoch": 2.558608193406162,
      "grad_norm": 2.640625,
      "learning_rate": 8.173996796394516e-06,
      "loss": 0.7447,
      "step": 730040
    },
    {
      "epoch": 2.5586432409130575,
      "grad_norm": 2.8125,
      "learning_rate": 8.173347767730814e-06,
      "loss": 0.8208,
      "step": 730050
    },
    {
      "epoch": 2.5586782884199533,
      "grad_norm": 2.703125,
      "learning_rate": 8.172698739067112e-06,
      "loss": 0.8216,
      "step": 730060
    },
    {
      "epoch": 2.558713335926849,
      "grad_norm": 2.875,
      "learning_rate": 8.17204971040341e-06,
      "loss": 0.8427,
      "step": 730070
    },
    {
      "epoch": 2.5587483834337443,
      "grad_norm": 2.65625,
      "learning_rate": 8.171400681739708e-06,
      "loss": 0.707,
      "step": 730080
    },
    {
      "epoch": 2.55878343094064,
      "grad_norm": 2.953125,
      "learning_rate": 8.170751653076006e-06,
      "loss": 0.7755,
      "step": 730090
    },
    {
      "epoch": 2.5588184784475354,
      "grad_norm": 2.78125,
      "learning_rate": 8.170102624412306e-06,
      "loss": 0.8226,
      "step": 730100
    },
    {
      "epoch": 2.558853525954431,
      "grad_norm": 2.78125,
      "learning_rate": 8.169453595748602e-06,
      "loss": 0.7853,
      "step": 730110
    },
    {
      "epoch": 2.558888573461327,
      "grad_norm": 2.359375,
      "learning_rate": 8.1688045670849e-06,
      "loss": 0.8551,
      "step": 730120
    },
    {
      "epoch": 2.5589236209682227,
      "grad_norm": 3.296875,
      "learning_rate": 8.168155538421198e-06,
      "loss": 0.8048,
      "step": 730130
    },
    {
      "epoch": 2.558958668475118,
      "grad_norm": 2.859375,
      "learning_rate": 8.167506509757496e-06,
      "loss": 0.8355,
      "step": 730140
    },
    {
      "epoch": 2.5589937159820138,
      "grad_norm": 2.953125,
      "learning_rate": 8.166857481093796e-06,
      "loss": 0.8586,
      "step": 730150
    },
    {
      "epoch": 2.559028763488909,
      "grad_norm": 2.796875,
      "learning_rate": 8.166208452430094e-06,
      "loss": 0.8136,
      "step": 730160
    },
    {
      "epoch": 2.559063810995805,
      "grad_norm": 2.5625,
      "learning_rate": 8.165559423766392e-06,
      "loss": 0.8126,
      "step": 730170
    },
    {
      "epoch": 2.5590988585027006,
      "grad_norm": 3.078125,
      "learning_rate": 8.16491039510269e-06,
      "loss": 0.7264,
      "step": 730180
    },
    {
      "epoch": 2.559133906009596,
      "grad_norm": 3.34375,
      "learning_rate": 8.164261366438988e-06,
      "loss": 0.946,
      "step": 730190
    },
    {
      "epoch": 2.5591689535164917,
      "grad_norm": 3.0625,
      "learning_rate": 8.163612337775286e-06,
      "loss": 0.831,
      "step": 730200
    },
    {
      "epoch": 2.559204001023387,
      "grad_norm": 2.703125,
      "learning_rate": 8.162963309111584e-06,
      "loss": 0.7396,
      "step": 730210
    },
    {
      "epoch": 2.5592390485302827,
      "grad_norm": 3.0625,
      "learning_rate": 8.162314280447882e-06,
      "loss": 0.7539,
      "step": 730220
    },
    {
      "epoch": 2.5592740960371785,
      "grad_norm": 3.296875,
      "learning_rate": 8.16166525178418e-06,
      "loss": 0.7763,
      "step": 730230
    },
    {
      "epoch": 2.5593091435440742,
      "grad_norm": 2.890625,
      "learning_rate": 8.161016223120478e-06,
      "loss": 0.7821,
      "step": 730240
    },
    {
      "epoch": 2.5593441910509696,
      "grad_norm": 3.125,
      "learning_rate": 8.160367194456776e-06,
      "loss": 0.8102,
      "step": 730250
    },
    {
      "epoch": 2.5593792385578653,
      "grad_norm": 3.46875,
      "learning_rate": 8.159718165793074e-06,
      "loss": 0.825,
      "step": 730260
    },
    {
      "epoch": 2.5594142860647606,
      "grad_norm": 3.375,
      "learning_rate": 8.159069137129372e-06,
      "loss": 0.8148,
      "step": 730270
    },
    {
      "epoch": 2.5594493335716564,
      "grad_norm": 2.390625,
      "learning_rate": 8.158420108465672e-06,
      "loss": 0.7925,
      "step": 730280
    },
    {
      "epoch": 2.559484381078552,
      "grad_norm": 2.90625,
      "learning_rate": 8.15777107980197e-06,
      "loss": 0.8298,
      "step": 730290
    },
    {
      "epoch": 2.5595194285854475,
      "grad_norm": 2.703125,
      "learning_rate": 8.157122051138266e-06,
      "loss": 0.7775,
      "step": 730300
    },
    {
      "epoch": 2.559554476092343,
      "grad_norm": 2.71875,
      "learning_rate": 8.156473022474564e-06,
      "loss": 0.7522,
      "step": 730310
    },
    {
      "epoch": 2.5595895235992385,
      "grad_norm": 2.875,
      "learning_rate": 8.155823993810862e-06,
      "loss": 0.7765,
      "step": 730320
    },
    {
      "epoch": 2.5596245711061343,
      "grad_norm": 3.0625,
      "learning_rate": 8.15517496514716e-06,
      "loss": 0.8287,
      "step": 730330
    },
    {
      "epoch": 2.55965961861303,
      "grad_norm": 2.8125,
      "learning_rate": 8.15452593648346e-06,
      "loss": 0.7627,
      "step": 730340
    },
    {
      "epoch": 2.559694666119926,
      "grad_norm": 2.9375,
      "learning_rate": 8.153876907819758e-06,
      "loss": 0.7089,
      "step": 730350
    },
    {
      "epoch": 2.559729713626821,
      "grad_norm": 3.203125,
      "learning_rate": 8.153227879156056e-06,
      "loss": 0.8761,
      "step": 730360
    },
    {
      "epoch": 2.559764761133717,
      "grad_norm": 2.65625,
      "learning_rate": 8.152578850492354e-06,
      "loss": 0.8191,
      "step": 730370
    },
    {
      "epoch": 2.559799808640612,
      "grad_norm": 3.140625,
      "learning_rate": 8.151929821828652e-06,
      "loss": 0.7828,
      "step": 730380
    },
    {
      "epoch": 2.559834856147508,
      "grad_norm": 2.671875,
      "learning_rate": 8.15128079316495e-06,
      "loss": 0.7589,
      "step": 730390
    },
    {
      "epoch": 2.5598699036544037,
      "grad_norm": 3.203125,
      "learning_rate": 8.150631764501248e-06,
      "loss": 0.7533,
      "step": 730400
    },
    {
      "epoch": 2.559904951161299,
      "grad_norm": 2.78125,
      "learning_rate": 8.149982735837546e-06,
      "loss": 0.7957,
      "step": 730410
    },
    {
      "epoch": 2.5599399986681948,
      "grad_norm": 2.75,
      "learning_rate": 8.149333707173844e-06,
      "loss": 0.8468,
      "step": 730420
    },
    {
      "epoch": 2.55997504617509,
      "grad_norm": 3.203125,
      "learning_rate": 8.148684678510142e-06,
      "loss": 0.7494,
      "step": 730430
    },
    {
      "epoch": 2.560010093681986,
      "grad_norm": 2.8125,
      "learning_rate": 8.14803564984644e-06,
      "loss": 0.7943,
      "step": 730440
    },
    {
      "epoch": 2.5600451411888816,
      "grad_norm": 2.78125,
      "learning_rate": 8.147386621182738e-06,
      "loss": 0.7582,
      "step": 730450
    },
    {
      "epoch": 2.5600801886957774,
      "grad_norm": 3.75,
      "learning_rate": 8.146737592519038e-06,
      "loss": 0.8103,
      "step": 730460
    },
    {
      "epoch": 2.5601152362026727,
      "grad_norm": 3.15625,
      "learning_rate": 8.146088563855336e-06,
      "loss": 0.8361,
      "step": 730470
    },
    {
      "epoch": 2.5601502837095684,
      "grad_norm": 2.671875,
      "learning_rate": 8.145439535191634e-06,
      "loss": 0.8023,
      "step": 730480
    },
    {
      "epoch": 2.5601853312164637,
      "grad_norm": 3.25,
      "learning_rate": 8.14479050652793e-06,
      "loss": 0.7778,
      "step": 730490
    },
    {
      "epoch": 2.5602203787233595,
      "grad_norm": 2.25,
      "learning_rate": 8.144141477864228e-06,
      "loss": 0.7412,
      "step": 730500
    },
    {
      "epoch": 2.5602554262302553,
      "grad_norm": 2.875,
      "learning_rate": 8.143492449200526e-06,
      "loss": 0.7388,
      "step": 730510
    },
    {
      "epoch": 2.5602904737371506,
      "grad_norm": 2.765625,
      "learning_rate": 8.142843420536826e-06,
      "loss": 0.7871,
      "step": 730520
    },
    {
      "epoch": 2.5603255212440463,
      "grad_norm": 2.625,
      "learning_rate": 8.142194391873124e-06,
      "loss": 0.77,
      "step": 730530
    },
    {
      "epoch": 2.5603605687509416,
      "grad_norm": 2.515625,
      "learning_rate": 8.141545363209422e-06,
      "loss": 0.7672,
      "step": 730540
    },
    {
      "epoch": 2.5603956162578374,
      "grad_norm": 2.5625,
      "learning_rate": 8.14089633454572e-06,
      "loss": 0.6889,
      "step": 730550
    },
    {
      "epoch": 2.560430663764733,
      "grad_norm": 2.703125,
      "learning_rate": 8.140247305882018e-06,
      "loss": 0.9268,
      "step": 730560
    },
    {
      "epoch": 2.560465711271629,
      "grad_norm": 3.3125,
      "learning_rate": 8.139598277218316e-06,
      "loss": 0.81,
      "step": 730570
    },
    {
      "epoch": 2.5605007587785242,
      "grad_norm": 4.15625,
      "learning_rate": 8.138949248554614e-06,
      "loss": 0.7692,
      "step": 730580
    },
    {
      "epoch": 2.56053580628542,
      "grad_norm": 2.9375,
      "learning_rate": 8.138300219890912e-06,
      "loss": 0.8593,
      "step": 730590
    },
    {
      "epoch": 2.5605708537923153,
      "grad_norm": 3.09375,
      "learning_rate": 8.13765119122721e-06,
      "loss": 0.8145,
      "step": 730600
    },
    {
      "epoch": 2.560605901299211,
      "grad_norm": 2.671875,
      "learning_rate": 8.137002162563508e-06,
      "loss": 0.894,
      "step": 730610
    },
    {
      "epoch": 2.560640948806107,
      "grad_norm": 2.8125,
      "learning_rate": 8.136353133899806e-06,
      "loss": 0.8513,
      "step": 730620
    },
    {
      "epoch": 2.560675996313002,
      "grad_norm": 2.859375,
      "learning_rate": 8.135704105236104e-06,
      "loss": 0.7589,
      "step": 730630
    },
    {
      "epoch": 2.560711043819898,
      "grad_norm": 3.265625,
      "learning_rate": 8.135055076572401e-06,
      "loss": 0.8625,
      "step": 730640
    },
    {
      "epoch": 2.560746091326793,
      "grad_norm": 2.921875,
      "learning_rate": 8.134406047908701e-06,
      "loss": 0.8462,
      "step": 730650
    },
    {
      "epoch": 2.560781138833689,
      "grad_norm": 3.25,
      "learning_rate": 8.133757019245e-06,
      "loss": 0.8163,
      "step": 730660
    },
    {
      "epoch": 2.5608161863405847,
      "grad_norm": 2.515625,
      "learning_rate": 8.133107990581297e-06,
      "loss": 0.8409,
      "step": 730670
    },
    {
      "epoch": 2.5608512338474805,
      "grad_norm": 2.953125,
      "learning_rate": 8.132458961917593e-06,
      "loss": 0.8393,
      "step": 730680
    },
    {
      "epoch": 2.560886281354376,
      "grad_norm": 3.03125,
      "learning_rate": 8.131809933253891e-06,
      "loss": 0.8298,
      "step": 730690
    },
    {
      "epoch": 2.5609213288612716,
      "grad_norm": 3.109375,
      "learning_rate": 8.131160904590191e-06,
      "loss": 0.8563,
      "step": 730700
    },
    {
      "epoch": 2.560956376368167,
      "grad_norm": 3.3125,
      "learning_rate": 8.13051187592649e-06,
      "loss": 0.8139,
      "step": 730710
    },
    {
      "epoch": 2.5609914238750626,
      "grad_norm": 2.40625,
      "learning_rate": 8.129862847262787e-06,
      "loss": 0.8409,
      "step": 730720
    },
    {
      "epoch": 2.5610264713819584,
      "grad_norm": 3.046875,
      "learning_rate": 8.129213818599085e-06,
      "loss": 0.7491,
      "step": 730730
    },
    {
      "epoch": 2.5610615188888537,
      "grad_norm": 2.75,
      "learning_rate": 8.128564789935383e-06,
      "loss": 0.796,
      "step": 730740
    },
    {
      "epoch": 2.5610965663957495,
      "grad_norm": 2.953125,
      "learning_rate": 8.127915761271681e-06,
      "loss": 0.7993,
      "step": 730750
    },
    {
      "epoch": 2.5611316139026448,
      "grad_norm": 2.984375,
      "learning_rate": 8.12726673260798e-06,
      "loss": 0.8333,
      "step": 730760
    },
    {
      "epoch": 2.5611666614095405,
      "grad_norm": 2.9375,
      "learning_rate": 8.126617703944277e-06,
      "loss": 0.8423,
      "step": 730770
    },
    {
      "epoch": 2.5612017089164363,
      "grad_norm": 2.9375,
      "learning_rate": 8.125968675280575e-06,
      "loss": 0.8031,
      "step": 730780
    },
    {
      "epoch": 2.561236756423332,
      "grad_norm": 3.46875,
      "learning_rate": 8.125319646616873e-06,
      "loss": 0.7675,
      "step": 730790
    },
    {
      "epoch": 2.5612718039302274,
      "grad_norm": 2.96875,
      "learning_rate": 8.124670617953171e-06,
      "loss": 0.7665,
      "step": 730800
    },
    {
      "epoch": 2.561306851437123,
      "grad_norm": 3.109375,
      "learning_rate": 8.12402158928947e-06,
      "loss": 0.8355,
      "step": 730810
    },
    {
      "epoch": 2.5613418989440184,
      "grad_norm": 2.59375,
      "learning_rate": 8.123372560625767e-06,
      "loss": 0.7632,
      "step": 730820
    },
    {
      "epoch": 2.561376946450914,
      "grad_norm": 3.046875,
      "learning_rate": 8.122723531962067e-06,
      "loss": 0.8653,
      "step": 730830
    },
    {
      "epoch": 2.56141199395781,
      "grad_norm": 2.90625,
      "learning_rate": 8.122074503298365e-06,
      "loss": 0.8507,
      "step": 730840
    },
    {
      "epoch": 2.5614470414647053,
      "grad_norm": 3.4375,
      "learning_rate": 8.121425474634663e-06,
      "loss": 0.8261,
      "step": 730850
    },
    {
      "epoch": 2.561482088971601,
      "grad_norm": 2.921875,
      "learning_rate": 8.120776445970961e-06,
      "loss": 0.7614,
      "step": 730860
    },
    {
      "epoch": 2.5615171364784963,
      "grad_norm": 3.484375,
      "learning_rate": 8.120127417307257e-06,
      "loss": 0.7682,
      "step": 730870
    },
    {
      "epoch": 2.561552183985392,
      "grad_norm": 3.234375,
      "learning_rate": 8.119478388643555e-06,
      "loss": 0.9306,
      "step": 730880
    },
    {
      "epoch": 2.561587231492288,
      "grad_norm": 3.125,
      "learning_rate": 8.118829359979855e-06,
      "loss": 0.7507,
      "step": 730890
    },
    {
      "epoch": 2.5616222789991836,
      "grad_norm": 2.4375,
      "learning_rate": 8.118180331316153e-06,
      "loss": 0.7378,
      "step": 730900
    },
    {
      "epoch": 2.561657326506079,
      "grad_norm": 2.421875,
      "learning_rate": 8.117531302652451e-06,
      "loss": 0.7974,
      "step": 730910
    },
    {
      "epoch": 2.5616923740129747,
      "grad_norm": 3.0625,
      "learning_rate": 8.116882273988749e-06,
      "loss": 0.7834,
      "step": 730920
    },
    {
      "epoch": 2.56172742151987,
      "grad_norm": 3.4375,
      "learning_rate": 8.116233245325047e-06,
      "loss": 0.8724,
      "step": 730930
    },
    {
      "epoch": 2.5617624690267657,
      "grad_norm": 2.5,
      "learning_rate": 8.115584216661345e-06,
      "loss": 0.7706,
      "step": 730940
    },
    {
      "epoch": 2.5617975165336615,
      "grad_norm": 3.015625,
      "learning_rate": 8.114935187997645e-06,
      "loss": 0.8715,
      "step": 730950
    },
    {
      "epoch": 2.561832564040557,
      "grad_norm": 2.71875,
      "learning_rate": 8.114286159333941e-06,
      "loss": 0.7413,
      "step": 730960
    },
    {
      "epoch": 2.5618676115474526,
      "grad_norm": 3.046875,
      "learning_rate": 8.113637130670239e-06,
      "loss": 0.8377,
      "step": 730970
    },
    {
      "epoch": 2.5619026590543483,
      "grad_norm": 3.28125,
      "learning_rate": 8.112988102006537e-06,
      "loss": 0.8348,
      "step": 730980
    },
    {
      "epoch": 2.5619377065612436,
      "grad_norm": 2.46875,
      "learning_rate": 8.112339073342835e-06,
      "loss": 0.7917,
      "step": 730990
    },
    {
      "epoch": 2.5619727540681394,
      "grad_norm": 2.734375,
      "learning_rate": 8.111690044679133e-06,
      "loss": 0.7253,
      "step": 731000
    },
    {
      "epoch": 2.562007801575035,
      "grad_norm": 3.703125,
      "learning_rate": 8.111041016015433e-06,
      "loss": 0.8302,
      "step": 731010
    },
    {
      "epoch": 2.5620428490819305,
      "grad_norm": 3.390625,
      "learning_rate": 8.11039198735173e-06,
      "loss": 0.8168,
      "step": 731020
    },
    {
      "epoch": 2.5620778965888262,
      "grad_norm": 2.890625,
      "learning_rate": 8.109742958688029e-06,
      "loss": 0.7217,
      "step": 731030
    },
    {
      "epoch": 2.5621129440957215,
      "grad_norm": 2.984375,
      "learning_rate": 8.109093930024327e-06,
      "loss": 0.7581,
      "step": 731040
    },
    {
      "epoch": 2.5621479916026173,
      "grad_norm": 3.046875,
      "learning_rate": 8.108444901360623e-06,
      "loss": 0.7848,
      "step": 731050
    },
    {
      "epoch": 2.562183039109513,
      "grad_norm": 3.203125,
      "learning_rate": 8.107795872696921e-06,
      "loss": 0.8366,
      "step": 731060
    },
    {
      "epoch": 2.562218086616409,
      "grad_norm": 2.84375,
      "learning_rate": 8.10714684403322e-06,
      "loss": 0.7901,
      "step": 731070
    },
    {
      "epoch": 2.562253134123304,
      "grad_norm": 3.4375,
      "learning_rate": 8.106497815369519e-06,
      "loss": 0.8223,
      "step": 731080
    },
    {
      "epoch": 2.5622881816302,
      "grad_norm": 3.21875,
      "learning_rate": 8.105848786705817e-06,
      "loss": 0.7841,
      "step": 731090
    },
    {
      "epoch": 2.562323229137095,
      "grad_norm": 3.109375,
      "learning_rate": 8.105199758042115e-06,
      "loss": 0.9387,
      "step": 731100
    },
    {
      "epoch": 2.562358276643991,
      "grad_norm": 2.921875,
      "learning_rate": 8.104550729378413e-06,
      "loss": 0.7864,
      "step": 731110
    },
    {
      "epoch": 2.5623933241508867,
      "grad_norm": 3.078125,
      "learning_rate": 8.10390170071471e-06,
      "loss": 0.8247,
      "step": 731120
    },
    {
      "epoch": 2.562428371657782,
      "grad_norm": 3.03125,
      "learning_rate": 8.103252672051009e-06,
      "loss": 0.7371,
      "step": 731130
    },
    {
      "epoch": 2.562463419164678,
      "grad_norm": 2.65625,
      "learning_rate": 8.102603643387308e-06,
      "loss": 0.8154,
      "step": 731140
    },
    {
      "epoch": 2.562498466671573,
      "grad_norm": 2.203125,
      "learning_rate": 8.101954614723605e-06,
      "loss": 0.8163,
      "step": 731150
    },
    {
      "epoch": 2.562533514178469,
      "grad_norm": 3.09375,
      "learning_rate": 8.101305586059903e-06,
      "loss": 0.781,
      "step": 731160
    },
    {
      "epoch": 2.5625685616853646,
      "grad_norm": 2.890625,
      "learning_rate": 8.1006565573962e-06,
      "loss": 0.7663,
      "step": 731170
    },
    {
      "epoch": 2.5626036091922604,
      "grad_norm": 2.796875,
      "learning_rate": 8.100007528732499e-06,
      "loss": 0.7557,
      "step": 731180
    },
    {
      "epoch": 2.5626386566991557,
      "grad_norm": 3.40625,
      "learning_rate": 8.099358500068798e-06,
      "loss": 0.8019,
      "step": 731190
    },
    {
      "epoch": 2.5626737042060515,
      "grad_norm": 3.03125,
      "learning_rate": 8.098709471405096e-06,
      "loss": 0.8007,
      "step": 731200
    },
    {
      "epoch": 2.5627087517129468,
      "grad_norm": 2.84375,
      "learning_rate": 8.098060442741394e-06,
      "loss": 0.883,
      "step": 731210
    },
    {
      "epoch": 2.5627437992198425,
      "grad_norm": 2.734375,
      "learning_rate": 8.097411414077692e-06,
      "loss": 0.8333,
      "step": 731220
    },
    {
      "epoch": 2.5627788467267383,
      "grad_norm": 2.8125,
      "learning_rate": 8.09676238541399e-06,
      "loss": 0.7586,
      "step": 731230
    },
    {
      "epoch": 2.5628138942336336,
      "grad_norm": 2.703125,
      "learning_rate": 8.096113356750287e-06,
      "loss": 0.789,
      "step": 731240
    },
    {
      "epoch": 2.5628489417405294,
      "grad_norm": 3.0625,
      "learning_rate": 8.095464328086586e-06,
      "loss": 0.8289,
      "step": 731250
    },
    {
      "epoch": 2.5628839892474247,
      "grad_norm": 2.6875,
      "learning_rate": 8.094815299422884e-06,
      "loss": 0.7335,
      "step": 731260
    },
    {
      "epoch": 2.5629190367543204,
      "grad_norm": 2.546875,
      "learning_rate": 8.094166270759182e-06,
      "loss": 0.7385,
      "step": 731270
    },
    {
      "epoch": 2.562954084261216,
      "grad_norm": 2.734375,
      "learning_rate": 8.09351724209548e-06,
      "loss": 0.8089,
      "step": 731280
    },
    {
      "epoch": 2.562989131768112,
      "grad_norm": 3.015625,
      "learning_rate": 8.092868213431778e-06,
      "loss": 0.8199,
      "step": 731290
    },
    {
      "epoch": 2.5630241792750073,
      "grad_norm": 3.0,
      "learning_rate": 8.092219184768076e-06,
      "loss": 0.8054,
      "step": 731300
    },
    {
      "epoch": 2.563059226781903,
      "grad_norm": 2.640625,
      "learning_rate": 8.091570156104374e-06,
      "loss": 0.7887,
      "step": 731310
    },
    {
      "epoch": 2.5630942742887983,
      "grad_norm": 2.734375,
      "learning_rate": 8.090921127440674e-06,
      "loss": 0.8114,
      "step": 731320
    },
    {
      "epoch": 2.563129321795694,
      "grad_norm": 3.4375,
      "learning_rate": 8.090272098776972e-06,
      "loss": 0.7711,
      "step": 731330
    },
    {
      "epoch": 2.56316436930259,
      "grad_norm": 2.890625,
      "learning_rate": 8.089623070113268e-06,
      "loss": 0.8072,
      "step": 731340
    },
    {
      "epoch": 2.563199416809485,
      "grad_norm": 2.890625,
      "learning_rate": 8.088974041449566e-06,
      "loss": 0.7944,
      "step": 731350
    },
    {
      "epoch": 2.563234464316381,
      "grad_norm": 2.84375,
      "learning_rate": 8.088325012785864e-06,
      "loss": 0.7796,
      "step": 731360
    },
    {
      "epoch": 2.5632695118232762,
      "grad_norm": 2.765625,
      "learning_rate": 8.087675984122162e-06,
      "loss": 0.8502,
      "step": 731370
    },
    {
      "epoch": 2.563304559330172,
      "grad_norm": 2.796875,
      "learning_rate": 8.087026955458462e-06,
      "loss": 0.749,
      "step": 731380
    },
    {
      "epoch": 2.5633396068370677,
      "grad_norm": 3.0,
      "learning_rate": 8.08637792679476e-06,
      "loss": 0.863,
      "step": 731390
    },
    {
      "epoch": 2.5633746543439635,
      "grad_norm": 3.296875,
      "learning_rate": 8.085728898131058e-06,
      "loss": 0.824,
      "step": 731400
    },
    {
      "epoch": 2.563409701850859,
      "grad_norm": 3.1875,
      "learning_rate": 8.085079869467356e-06,
      "loss": 0.8407,
      "step": 731410
    },
    {
      "epoch": 2.5634447493577546,
      "grad_norm": 3.109375,
      "learning_rate": 8.084430840803654e-06,
      "loss": 0.8427,
      "step": 731420
    },
    {
      "epoch": 2.56347979686465,
      "grad_norm": 3.296875,
      "learning_rate": 8.083781812139952e-06,
      "loss": 0.83,
      "step": 731430
    },
    {
      "epoch": 2.5635148443715456,
      "grad_norm": 3.5,
      "learning_rate": 8.08313278347625e-06,
      "loss": 0.8721,
      "step": 731440
    },
    {
      "epoch": 2.5635498918784414,
      "grad_norm": 3.625,
      "learning_rate": 8.082483754812548e-06,
      "loss": 0.8283,
      "step": 731450
    },
    {
      "epoch": 2.5635849393853367,
      "grad_norm": 2.75,
      "learning_rate": 8.081834726148846e-06,
      "loss": 0.7792,
      "step": 731460
    },
    {
      "epoch": 2.5636199868922325,
      "grad_norm": 2.421875,
      "learning_rate": 8.081185697485144e-06,
      "loss": 0.7413,
      "step": 731470
    },
    {
      "epoch": 2.563655034399128,
      "grad_norm": 2.5625,
      "learning_rate": 8.080536668821442e-06,
      "loss": 0.7875,
      "step": 731480
    },
    {
      "epoch": 2.5636900819060235,
      "grad_norm": 2.703125,
      "learning_rate": 8.07988764015774e-06,
      "loss": 0.7412,
      "step": 731490
    },
    {
      "epoch": 2.5637251294129193,
      "grad_norm": 3.0625,
      "learning_rate": 8.07923861149404e-06,
      "loss": 0.8672,
      "step": 731500
    },
    {
      "epoch": 2.563760176919815,
      "grad_norm": 3.03125,
      "learning_rate": 8.078589582830338e-06,
      "loss": 0.7466,
      "step": 731510
    },
    {
      "epoch": 2.5637952244267104,
      "grad_norm": 3.15625,
      "learning_rate": 8.077940554166634e-06,
      "loss": 0.832,
      "step": 731520
    },
    {
      "epoch": 2.563830271933606,
      "grad_norm": 3.21875,
      "learning_rate": 8.077291525502932e-06,
      "loss": 0.7812,
      "step": 731530
    },
    {
      "epoch": 2.5638653194405014,
      "grad_norm": 2.84375,
      "learning_rate": 8.07664249683923e-06,
      "loss": 0.8108,
      "step": 731540
    },
    {
      "epoch": 2.563900366947397,
      "grad_norm": 2.984375,
      "learning_rate": 8.075993468175528e-06,
      "loss": 0.9042,
      "step": 731550
    },
    {
      "epoch": 2.563935414454293,
      "grad_norm": 3.09375,
      "learning_rate": 8.075344439511828e-06,
      "loss": 0.8628,
      "step": 731560
    },
    {
      "epoch": 2.5639704619611883,
      "grad_norm": 2.6875,
      "learning_rate": 8.074695410848126e-06,
      "loss": 0.8117,
      "step": 731570
    },
    {
      "epoch": 2.564005509468084,
      "grad_norm": 3.046875,
      "learning_rate": 8.074046382184424e-06,
      "loss": 0.821,
      "step": 731580
    },
    {
      "epoch": 2.5640405569749793,
      "grad_norm": 2.546875,
      "learning_rate": 8.073397353520722e-06,
      "loss": 0.8194,
      "step": 731590
    },
    {
      "epoch": 2.564075604481875,
      "grad_norm": 3.21875,
      "learning_rate": 8.07274832485702e-06,
      "loss": 0.8091,
      "step": 731600
    },
    {
      "epoch": 2.564110651988771,
      "grad_norm": 2.9375,
      "learning_rate": 8.072099296193318e-06,
      "loss": 0.8492,
      "step": 731610
    },
    {
      "epoch": 2.5641456994956666,
      "grad_norm": 2.53125,
      "learning_rate": 8.071450267529616e-06,
      "loss": 0.7462,
      "step": 731620
    },
    {
      "epoch": 2.564180747002562,
      "grad_norm": 3.015625,
      "learning_rate": 8.070801238865914e-06,
      "loss": 0.7882,
      "step": 731630
    },
    {
      "epoch": 2.5642157945094577,
      "grad_norm": 2.546875,
      "learning_rate": 8.070152210202212e-06,
      "loss": 0.7843,
      "step": 731640
    },
    {
      "epoch": 2.564250842016353,
      "grad_norm": 2.609375,
      "learning_rate": 8.06950318153851e-06,
      "loss": 0.8116,
      "step": 731650
    },
    {
      "epoch": 2.5642858895232488,
      "grad_norm": 3.171875,
      "learning_rate": 8.068854152874808e-06,
      "loss": 0.7716,
      "step": 731660
    },
    {
      "epoch": 2.5643209370301445,
      "grad_norm": 2.296875,
      "learning_rate": 8.068205124211106e-06,
      "loss": 0.7596,
      "step": 731670
    },
    {
      "epoch": 2.56435598453704,
      "grad_norm": 3.078125,
      "learning_rate": 8.067556095547404e-06,
      "loss": 0.7804,
      "step": 731680
    },
    {
      "epoch": 2.5643910320439356,
      "grad_norm": 2.921875,
      "learning_rate": 8.066907066883703e-06,
      "loss": 0.847,
      "step": 731690
    },
    {
      "epoch": 2.564426079550831,
      "grad_norm": 2.46875,
      "learning_rate": 8.066258038220001e-06,
      "loss": 0.7772,
      "step": 731700
    },
    {
      "epoch": 2.5644611270577267,
      "grad_norm": 3.109375,
      "learning_rate": 8.065609009556298e-06,
      "loss": 0.7555,
      "step": 731710
    },
    {
      "epoch": 2.5644961745646224,
      "grad_norm": 2.828125,
      "learning_rate": 8.064959980892596e-06,
      "loss": 0.8234,
      "step": 731720
    },
    {
      "epoch": 2.564531222071518,
      "grad_norm": 3.1875,
      "learning_rate": 8.064310952228894e-06,
      "loss": 0.8868,
      "step": 731730
    },
    {
      "epoch": 2.5645662695784135,
      "grad_norm": 2.625,
      "learning_rate": 8.063661923565193e-06,
      "loss": 0.7697,
      "step": 731740
    },
    {
      "epoch": 2.5646013170853093,
      "grad_norm": 2.6875,
      "learning_rate": 8.063012894901491e-06,
      "loss": 0.7961,
      "step": 731750
    },
    {
      "epoch": 2.5646363645922046,
      "grad_norm": 2.78125,
      "learning_rate": 8.06236386623779e-06,
      "loss": 0.8211,
      "step": 731760
    },
    {
      "epoch": 2.5646714120991003,
      "grad_norm": 2.921875,
      "learning_rate": 8.061714837574087e-06,
      "loss": 0.7761,
      "step": 731770
    },
    {
      "epoch": 2.564706459605996,
      "grad_norm": 2.84375,
      "learning_rate": 8.061065808910385e-06,
      "loss": 0.84,
      "step": 731780
    },
    {
      "epoch": 2.5647415071128914,
      "grad_norm": 2.875,
      "learning_rate": 8.060416780246683e-06,
      "loss": 0.8317,
      "step": 731790
    },
    {
      "epoch": 2.564776554619787,
      "grad_norm": 2.65625,
      "learning_rate": 8.059767751582981e-06,
      "loss": 0.8332,
      "step": 731800
    },
    {
      "epoch": 2.5648116021266825,
      "grad_norm": 2.28125,
      "learning_rate": 8.05911872291928e-06,
      "loss": 0.7635,
      "step": 731810
    },
    {
      "epoch": 2.5648466496335782,
      "grad_norm": 2.828125,
      "learning_rate": 8.058469694255577e-06,
      "loss": 0.7321,
      "step": 731820
    },
    {
      "epoch": 2.564881697140474,
      "grad_norm": 2.875,
      "learning_rate": 8.057820665591875e-06,
      "loss": 0.8494,
      "step": 731830
    },
    {
      "epoch": 2.5649167446473697,
      "grad_norm": 2.578125,
      "learning_rate": 8.057171636928173e-06,
      "loss": 0.828,
      "step": 731840
    },
    {
      "epoch": 2.564951792154265,
      "grad_norm": 3.078125,
      "learning_rate": 8.056522608264471e-06,
      "loss": 0.7861,
      "step": 731850
    },
    {
      "epoch": 2.564986839661161,
      "grad_norm": 2.96875,
      "learning_rate": 8.05587357960077e-06,
      "loss": 0.8427,
      "step": 731860
    },
    {
      "epoch": 2.565021887168056,
      "grad_norm": 2.828125,
      "learning_rate": 8.055224550937069e-06,
      "loss": 0.7403,
      "step": 731870
    },
    {
      "epoch": 2.565056934674952,
      "grad_norm": 3.40625,
      "learning_rate": 8.054575522273367e-06,
      "loss": 0.8136,
      "step": 731880
    },
    {
      "epoch": 2.5650919821818476,
      "grad_norm": 3.25,
      "learning_rate": 8.053926493609665e-06,
      "loss": 0.8645,
      "step": 731890
    },
    {
      "epoch": 2.565127029688743,
      "grad_norm": 2.65625,
      "learning_rate": 8.053277464945961e-06,
      "loss": 0.7751,
      "step": 731900
    },
    {
      "epoch": 2.5651620771956387,
      "grad_norm": 2.875,
      "learning_rate": 8.05262843628226e-06,
      "loss": 0.7704,
      "step": 731910
    },
    {
      "epoch": 2.565197124702534,
      "grad_norm": 3.046875,
      "learning_rate": 8.051979407618557e-06,
      "loss": 0.7756,
      "step": 731920
    },
    {
      "epoch": 2.56523217220943,
      "grad_norm": 2.6875,
      "learning_rate": 8.051330378954857e-06,
      "loss": 0.845,
      "step": 731930
    },
    {
      "epoch": 2.5652672197163255,
      "grad_norm": 3.046875,
      "learning_rate": 8.050681350291155e-06,
      "loss": 0.8946,
      "step": 731940
    },
    {
      "epoch": 2.5653022672232213,
      "grad_norm": 2.953125,
      "learning_rate": 8.050032321627453e-06,
      "loss": 0.7657,
      "step": 731950
    },
    {
      "epoch": 2.5653373147301166,
      "grad_norm": 3.265625,
      "learning_rate": 8.049383292963751e-06,
      "loss": 0.7902,
      "step": 731960
    },
    {
      "epoch": 2.5653723622370124,
      "grad_norm": 3.890625,
      "learning_rate": 8.048734264300049e-06,
      "loss": 0.8292,
      "step": 731970
    },
    {
      "epoch": 2.5654074097439077,
      "grad_norm": 3.078125,
      "learning_rate": 8.048085235636347e-06,
      "loss": 0.7768,
      "step": 731980
    },
    {
      "epoch": 2.5654424572508034,
      "grad_norm": 2.90625,
      "learning_rate": 8.047436206972645e-06,
      "loss": 0.7899,
      "step": 731990
    },
    {
      "epoch": 2.565477504757699,
      "grad_norm": 2.8125,
      "learning_rate": 8.046787178308943e-06,
      "loss": 0.824,
      "step": 732000
    },
    {
      "epoch": 2.5655125522645945,
      "grad_norm": 3.046875,
      "learning_rate": 8.046138149645241e-06,
      "loss": 0.7556,
      "step": 732010
    },
    {
      "epoch": 2.5655475997714903,
      "grad_norm": 2.71875,
      "learning_rate": 8.045489120981539e-06,
      "loss": 0.8975,
      "step": 732020
    },
    {
      "epoch": 2.5655826472783856,
      "grad_norm": 3.34375,
      "learning_rate": 8.044840092317837e-06,
      "loss": 0.8624,
      "step": 732030
    },
    {
      "epoch": 2.5656176947852813,
      "grad_norm": 3.21875,
      "learning_rate": 8.044191063654135e-06,
      "loss": 0.8958,
      "step": 732040
    },
    {
      "epoch": 2.565652742292177,
      "grad_norm": 3.078125,
      "learning_rate": 8.043542034990435e-06,
      "loss": 0.8051,
      "step": 732050
    },
    {
      "epoch": 2.565687789799073,
      "grad_norm": 2.875,
      "learning_rate": 8.042893006326733e-06,
      "loss": 0.8714,
      "step": 732060
    },
    {
      "epoch": 2.565722837305968,
      "grad_norm": 2.71875,
      "learning_rate": 8.04224397766303e-06,
      "loss": 0.7752,
      "step": 732070
    },
    {
      "epoch": 2.565757884812864,
      "grad_norm": 2.546875,
      "learning_rate": 8.041594948999329e-06,
      "loss": 0.7972,
      "step": 732080
    },
    {
      "epoch": 2.5657929323197592,
      "grad_norm": 3.265625,
      "learning_rate": 8.040945920335625e-06,
      "loss": 0.7871,
      "step": 732090
    },
    {
      "epoch": 2.565827979826655,
      "grad_norm": 2.8125,
      "learning_rate": 8.040296891671923e-06,
      "loss": 0.7667,
      "step": 732100
    },
    {
      "epoch": 2.5658630273335508,
      "grad_norm": 2.828125,
      "learning_rate": 8.039647863008223e-06,
      "loss": 0.791,
      "step": 732110
    },
    {
      "epoch": 2.565898074840446,
      "grad_norm": 2.890625,
      "learning_rate": 8.03899883434452e-06,
      "loss": 0.7982,
      "step": 732120
    },
    {
      "epoch": 2.565933122347342,
      "grad_norm": 3.140625,
      "learning_rate": 8.038349805680819e-06,
      "loss": 0.7534,
      "step": 732130
    },
    {
      "epoch": 2.565968169854237,
      "grad_norm": 2.734375,
      "learning_rate": 8.037700777017117e-06,
      "loss": 0.7807,
      "step": 732140
    },
    {
      "epoch": 2.566003217361133,
      "grad_norm": 2.859375,
      "learning_rate": 8.037051748353415e-06,
      "loss": 0.8523,
      "step": 732150
    },
    {
      "epoch": 2.5660382648680287,
      "grad_norm": 2.734375,
      "learning_rate": 8.036402719689713e-06,
      "loss": 0.8584,
      "step": 732160
    },
    {
      "epoch": 2.5660733123749244,
      "grad_norm": 3.09375,
      "learning_rate": 8.03575369102601e-06,
      "loss": 0.7814,
      "step": 732170
    },
    {
      "epoch": 2.5661083598818197,
      "grad_norm": 2.96875,
      "learning_rate": 8.035104662362309e-06,
      "loss": 0.7582,
      "step": 732180
    },
    {
      "epoch": 2.5661434073887155,
      "grad_norm": 2.640625,
      "learning_rate": 8.034455633698607e-06,
      "loss": 0.8067,
      "step": 732190
    },
    {
      "epoch": 2.566178454895611,
      "grad_norm": 3.109375,
      "learning_rate": 8.033806605034905e-06,
      "loss": 0.9342,
      "step": 732200
    },
    {
      "epoch": 2.5662135024025066,
      "grad_norm": 2.625,
      "learning_rate": 8.033157576371203e-06,
      "loss": 0.8224,
      "step": 732210
    },
    {
      "epoch": 2.5662485499094023,
      "grad_norm": 3.375,
      "learning_rate": 8.0325085477075e-06,
      "loss": 0.8398,
      "step": 732220
    },
    {
      "epoch": 2.5662835974162976,
      "grad_norm": 3.015625,
      "learning_rate": 8.031859519043799e-06,
      "loss": 0.7992,
      "step": 732230
    },
    {
      "epoch": 2.5663186449231934,
      "grad_norm": 2.5625,
      "learning_rate": 8.031210490380098e-06,
      "loss": 0.8312,
      "step": 732240
    },
    {
      "epoch": 2.566353692430089,
      "grad_norm": 3.203125,
      "learning_rate": 8.030561461716396e-06,
      "loss": 0.7448,
      "step": 732250
    },
    {
      "epoch": 2.5663887399369845,
      "grad_norm": 3.15625,
      "learning_rate": 8.029912433052694e-06,
      "loss": 0.8049,
      "step": 732260
    },
    {
      "epoch": 2.5664237874438802,
      "grad_norm": 3.0,
      "learning_rate": 8.029263404388992e-06,
      "loss": 0.7783,
      "step": 732270
    },
    {
      "epoch": 2.566458834950776,
      "grad_norm": 2.6875,
      "learning_rate": 8.028614375725289e-06,
      "loss": 0.7647,
      "step": 732280
    },
    {
      "epoch": 2.5664938824576713,
      "grad_norm": 2.84375,
      "learning_rate": 8.027965347061588e-06,
      "loss": 0.7731,
      "step": 732290
    },
    {
      "epoch": 2.566528929964567,
      "grad_norm": 2.921875,
      "learning_rate": 8.027316318397886e-06,
      "loss": 0.7535,
      "step": 732300
    },
    {
      "epoch": 2.5665639774714624,
      "grad_norm": 2.90625,
      "learning_rate": 8.026667289734184e-06,
      "loss": 0.8213,
      "step": 732310
    },
    {
      "epoch": 2.566599024978358,
      "grad_norm": 2.6875,
      "learning_rate": 8.026018261070482e-06,
      "loss": 0.7573,
      "step": 732320
    },
    {
      "epoch": 2.566634072485254,
      "grad_norm": 3.296875,
      "learning_rate": 8.02536923240678e-06,
      "loss": 0.796,
      "step": 732330
    },
    {
      "epoch": 2.566669119992149,
      "grad_norm": 2.78125,
      "learning_rate": 8.024720203743078e-06,
      "loss": 0.7836,
      "step": 732340
    },
    {
      "epoch": 2.566704167499045,
      "grad_norm": 3.125,
      "learning_rate": 8.024071175079376e-06,
      "loss": 0.7985,
      "step": 732350
    },
    {
      "epoch": 2.5667392150059407,
      "grad_norm": 2.921875,
      "learning_rate": 8.023422146415676e-06,
      "loss": 0.7968,
      "step": 732360
    },
    {
      "epoch": 2.566774262512836,
      "grad_norm": 3.015625,
      "learning_rate": 8.022773117751972e-06,
      "loss": 0.7508,
      "step": 732370
    },
    {
      "epoch": 2.566809310019732,
      "grad_norm": 2.90625,
      "learning_rate": 8.02212408908827e-06,
      "loss": 0.856,
      "step": 732380
    },
    {
      "epoch": 2.5668443575266275,
      "grad_norm": 2.78125,
      "learning_rate": 8.021475060424568e-06,
      "loss": 0.8067,
      "step": 732390
    },
    {
      "epoch": 2.566879405033523,
      "grad_norm": 2.90625,
      "learning_rate": 8.020826031760866e-06,
      "loss": 0.7477,
      "step": 732400
    },
    {
      "epoch": 2.5669144525404186,
      "grad_norm": 3.140625,
      "learning_rate": 8.020177003097164e-06,
      "loss": 0.8412,
      "step": 732410
    },
    {
      "epoch": 2.566949500047314,
      "grad_norm": 3.0,
      "learning_rate": 8.019527974433464e-06,
      "loss": 0.7902,
      "step": 732420
    },
    {
      "epoch": 2.5669845475542097,
      "grad_norm": 3.09375,
      "learning_rate": 8.018878945769762e-06,
      "loss": 0.79,
      "step": 732430
    },
    {
      "epoch": 2.5670195950611054,
      "grad_norm": 2.71875,
      "learning_rate": 8.01822991710606e-06,
      "loss": 0.7558,
      "step": 732440
    },
    {
      "epoch": 2.567054642568001,
      "grad_norm": 3.03125,
      "learning_rate": 8.017580888442358e-06,
      "loss": 0.8289,
      "step": 732450
    },
    {
      "epoch": 2.5670896900748965,
      "grad_norm": 3.015625,
      "learning_rate": 8.016931859778654e-06,
      "loss": 0.7835,
      "step": 732460
    },
    {
      "epoch": 2.5671247375817923,
      "grad_norm": 3.203125,
      "learning_rate": 8.016282831114952e-06,
      "loss": 0.8143,
      "step": 732470
    },
    {
      "epoch": 2.5671597850886876,
      "grad_norm": 3.03125,
      "learning_rate": 8.015633802451252e-06,
      "loss": 0.8162,
      "step": 732480
    },
    {
      "epoch": 2.5671948325955833,
      "grad_norm": 2.640625,
      "learning_rate": 8.01498477378755e-06,
      "loss": 0.7461,
      "step": 732490
    },
    {
      "epoch": 2.567229880102479,
      "grad_norm": 3.359375,
      "learning_rate": 8.014335745123848e-06,
      "loss": 0.9027,
      "step": 732500
    },
    {
      "epoch": 2.5672649276093744,
      "grad_norm": 2.59375,
      "learning_rate": 8.013686716460146e-06,
      "loss": 0.7156,
      "step": 732510
    },
    {
      "epoch": 2.56729997511627,
      "grad_norm": 2.765625,
      "learning_rate": 8.013037687796444e-06,
      "loss": 0.8649,
      "step": 732520
    },
    {
      "epoch": 2.5673350226231655,
      "grad_norm": 3.09375,
      "learning_rate": 8.012388659132742e-06,
      "loss": 0.7955,
      "step": 732530
    },
    {
      "epoch": 2.5673700701300612,
      "grad_norm": 2.984375,
      "learning_rate": 8.01173963046904e-06,
      "loss": 0.8167,
      "step": 732540
    },
    {
      "epoch": 2.567405117636957,
      "grad_norm": 3.0,
      "learning_rate": 8.01109060180534e-06,
      "loss": 0.7877,
      "step": 732550
    },
    {
      "epoch": 2.5674401651438528,
      "grad_norm": 2.796875,
      "learning_rate": 8.010441573141636e-06,
      "loss": 0.75,
      "step": 732560
    },
    {
      "epoch": 2.567475212650748,
      "grad_norm": 3.21875,
      "learning_rate": 8.009792544477934e-06,
      "loss": 0.904,
      "step": 732570
    },
    {
      "epoch": 2.567510260157644,
      "grad_norm": 2.5625,
      "learning_rate": 8.009143515814232e-06,
      "loss": 0.8182,
      "step": 732580
    },
    {
      "epoch": 2.567545307664539,
      "grad_norm": 2.78125,
      "learning_rate": 8.00849448715053e-06,
      "loss": 0.8517,
      "step": 732590
    },
    {
      "epoch": 2.567580355171435,
      "grad_norm": 2.96875,
      "learning_rate": 8.00784545848683e-06,
      "loss": 0.7823,
      "step": 732600
    },
    {
      "epoch": 2.5676154026783307,
      "grad_norm": 3.40625,
      "learning_rate": 8.007196429823128e-06,
      "loss": 0.8097,
      "step": 732610
    },
    {
      "epoch": 2.567650450185226,
      "grad_norm": 2.9375,
      "learning_rate": 8.006547401159426e-06,
      "loss": 0.7304,
      "step": 732620
    },
    {
      "epoch": 2.5676854976921217,
      "grad_norm": 3.078125,
      "learning_rate": 8.005898372495724e-06,
      "loss": 0.8374,
      "step": 732630
    },
    {
      "epoch": 2.567720545199017,
      "grad_norm": 3.125,
      "learning_rate": 8.005249343832022e-06,
      "loss": 0.7762,
      "step": 732640
    },
    {
      "epoch": 2.567755592705913,
      "grad_norm": 2.875,
      "learning_rate": 8.004600315168318e-06,
      "loss": 0.8264,
      "step": 732650
    },
    {
      "epoch": 2.5677906402128086,
      "grad_norm": 3.265625,
      "learning_rate": 8.003951286504618e-06,
      "loss": 0.7116,
      "step": 732660
    },
    {
      "epoch": 2.5678256877197043,
      "grad_norm": 3.578125,
      "learning_rate": 8.003302257840916e-06,
      "loss": 0.8137,
      "step": 732670
    },
    {
      "epoch": 2.5678607352265996,
      "grad_norm": 2.90625,
      "learning_rate": 8.002653229177214e-06,
      "loss": 0.8474,
      "step": 732680
    },
    {
      "epoch": 2.5678957827334954,
      "grad_norm": 2.75,
      "learning_rate": 8.002004200513512e-06,
      "loss": 0.766,
      "step": 732690
    },
    {
      "epoch": 2.5679308302403907,
      "grad_norm": 2.734375,
      "learning_rate": 8.00135517184981e-06,
      "loss": 0.7706,
      "step": 732700
    },
    {
      "epoch": 2.5679658777472865,
      "grad_norm": 2.46875,
      "learning_rate": 8.000706143186108e-06,
      "loss": 0.8307,
      "step": 732710
    },
    {
      "epoch": 2.568000925254182,
      "grad_norm": 2.65625,
      "learning_rate": 8.000057114522406e-06,
      "loss": 0.7934,
      "step": 732720
    },
    {
      "epoch": 2.5680359727610775,
      "grad_norm": 3.359375,
      "learning_rate": 7.999408085858706e-06,
      "loss": 0.8296,
      "step": 732730
    },
    {
      "epoch": 2.5680710202679733,
      "grad_norm": 2.9375,
      "learning_rate": 7.998759057195004e-06,
      "loss": 0.7981,
      "step": 732740
    },
    {
      "epoch": 2.5681060677748686,
      "grad_norm": 2.96875,
      "learning_rate": 7.9981100285313e-06,
      "loss": 0.7875,
      "step": 732750
    },
    {
      "epoch": 2.5681411152817644,
      "grad_norm": 2.84375,
      "learning_rate": 7.997460999867598e-06,
      "loss": 0.8038,
      "step": 732760
    },
    {
      "epoch": 2.56817616278866,
      "grad_norm": 3.09375,
      "learning_rate": 7.996811971203896e-06,
      "loss": 0.8881,
      "step": 732770
    },
    {
      "epoch": 2.568211210295556,
      "grad_norm": 3.171875,
      "learning_rate": 7.996162942540194e-06,
      "loss": 0.8425,
      "step": 732780
    },
    {
      "epoch": 2.568246257802451,
      "grad_norm": 3.546875,
      "learning_rate": 7.995513913876494e-06,
      "loss": 0.8438,
      "step": 732790
    },
    {
      "epoch": 2.568281305309347,
      "grad_norm": 2.8125,
      "learning_rate": 7.994864885212792e-06,
      "loss": 0.7747,
      "step": 732800
    },
    {
      "epoch": 2.5683163528162423,
      "grad_norm": 3.03125,
      "learning_rate": 7.99421585654909e-06,
      "loss": 0.8027,
      "step": 732810
    },
    {
      "epoch": 2.568351400323138,
      "grad_norm": 2.984375,
      "learning_rate": 7.993566827885388e-06,
      "loss": 0.7542,
      "step": 732820
    },
    {
      "epoch": 2.568386447830034,
      "grad_norm": 3.125,
      "learning_rate": 7.992917799221685e-06,
      "loss": 0.7402,
      "step": 732830
    },
    {
      "epoch": 2.568421495336929,
      "grad_norm": 2.625,
      "learning_rate": 7.992268770557983e-06,
      "loss": 0.788,
      "step": 732840
    },
    {
      "epoch": 2.568456542843825,
      "grad_norm": 3.0,
      "learning_rate": 7.991619741894281e-06,
      "loss": 0.7948,
      "step": 732850
    },
    {
      "epoch": 2.56849159035072,
      "grad_norm": 2.640625,
      "learning_rate": 7.99097071323058e-06,
      "loss": 0.7662,
      "step": 732860
    },
    {
      "epoch": 2.568526637857616,
      "grad_norm": 3.015625,
      "learning_rate": 7.990321684566877e-06,
      "loss": 0.8399,
      "step": 732870
    },
    {
      "epoch": 2.5685616853645117,
      "grad_norm": 3.0625,
      "learning_rate": 7.989672655903175e-06,
      "loss": 0.7791,
      "step": 732880
    },
    {
      "epoch": 2.5685967328714074,
      "grad_norm": 2.9375,
      "learning_rate": 7.989023627239473e-06,
      "loss": 0.7813,
      "step": 732890
    },
    {
      "epoch": 2.5686317803783028,
      "grad_norm": 3.125,
      "learning_rate": 7.988374598575771e-06,
      "loss": 0.7851,
      "step": 732900
    },
    {
      "epoch": 2.5686668278851985,
      "grad_norm": 2.875,
      "learning_rate": 7.987725569912071e-06,
      "loss": 0.7579,
      "step": 732910
    },
    {
      "epoch": 2.568701875392094,
      "grad_norm": 2.796875,
      "learning_rate": 7.98707654124837e-06,
      "loss": 0.7082,
      "step": 732920
    },
    {
      "epoch": 2.5687369228989896,
      "grad_norm": 3.25,
      "learning_rate": 7.986427512584665e-06,
      "loss": 0.8228,
      "step": 732930
    },
    {
      "epoch": 2.5687719704058853,
      "grad_norm": 3.703125,
      "learning_rate": 7.985778483920963e-06,
      "loss": 0.8308,
      "step": 732940
    },
    {
      "epoch": 2.5688070179127807,
      "grad_norm": 3.515625,
      "learning_rate": 7.985129455257261e-06,
      "loss": 0.8453,
      "step": 732950
    },
    {
      "epoch": 2.5688420654196764,
      "grad_norm": 3.109375,
      "learning_rate": 7.98448042659356e-06,
      "loss": 0.862,
      "step": 732960
    },
    {
      "epoch": 2.5688771129265717,
      "grad_norm": 3.28125,
      "learning_rate": 7.98383139792986e-06,
      "loss": 0.8314,
      "step": 732970
    },
    {
      "epoch": 2.5689121604334675,
      "grad_norm": 2.90625,
      "learning_rate": 7.983182369266157e-06,
      "loss": 0.8205,
      "step": 732980
    },
    {
      "epoch": 2.5689472079403632,
      "grad_norm": 3.953125,
      "learning_rate": 7.982533340602455e-06,
      "loss": 0.7726,
      "step": 732990
    },
    {
      "epoch": 2.568982255447259,
      "grad_norm": 2.796875,
      "learning_rate": 7.981884311938753e-06,
      "loss": 0.7459,
      "step": 733000
    },
    {
      "epoch": 2.5690173029541543,
      "grad_norm": 3.046875,
      "learning_rate": 7.981235283275051e-06,
      "loss": 0.753,
      "step": 733010
    },
    {
      "epoch": 2.56905235046105,
      "grad_norm": 3.109375,
      "learning_rate": 7.98058625461135e-06,
      "loss": 0.8138,
      "step": 733020
    },
    {
      "epoch": 2.5690873979679454,
      "grad_norm": 2.8125,
      "learning_rate": 7.979937225947647e-06,
      "loss": 0.7323,
      "step": 733030
    },
    {
      "epoch": 2.569122445474841,
      "grad_norm": 2.765625,
      "learning_rate": 7.979288197283945e-06,
      "loss": 0.8038,
      "step": 733040
    },
    {
      "epoch": 2.569157492981737,
      "grad_norm": 2.546875,
      "learning_rate": 7.978639168620243e-06,
      "loss": 0.7641,
      "step": 733050
    },
    {
      "epoch": 2.569192540488632,
      "grad_norm": 2.765625,
      "learning_rate": 7.977990139956541e-06,
      "loss": 0.8313,
      "step": 733060
    },
    {
      "epoch": 2.569227587995528,
      "grad_norm": 2.75,
      "learning_rate": 7.97734111129284e-06,
      "loss": 0.7718,
      "step": 733070
    },
    {
      "epoch": 2.5692626355024233,
      "grad_norm": 2.75,
      "learning_rate": 7.976692082629137e-06,
      "loss": 0.7639,
      "step": 733080
    },
    {
      "epoch": 2.569297683009319,
      "grad_norm": 3.140625,
      "learning_rate": 7.976043053965435e-06,
      "loss": 0.8113,
      "step": 733090
    },
    {
      "epoch": 2.569332730516215,
      "grad_norm": 3.140625,
      "learning_rate": 7.975394025301735e-06,
      "loss": 0.7742,
      "step": 733100
    },
    {
      "epoch": 2.5693677780231106,
      "grad_norm": 2.671875,
      "learning_rate": 7.974744996638033e-06,
      "loss": 0.7809,
      "step": 733110
    },
    {
      "epoch": 2.569402825530006,
      "grad_norm": 3.046875,
      "learning_rate": 7.97409596797433e-06,
      "loss": 0.8291,
      "step": 733120
    },
    {
      "epoch": 2.5694378730369016,
      "grad_norm": 3.546875,
      "learning_rate": 7.973446939310627e-06,
      "loss": 0.8256,
      "step": 733130
    },
    {
      "epoch": 2.569472920543797,
      "grad_norm": 2.78125,
      "learning_rate": 7.972797910646925e-06,
      "loss": 0.8094,
      "step": 733140
    },
    {
      "epoch": 2.5695079680506927,
      "grad_norm": 2.984375,
      "learning_rate": 7.972148881983225e-06,
      "loss": 0.7194,
      "step": 733150
    },
    {
      "epoch": 2.5695430155575885,
      "grad_norm": 3.4375,
      "learning_rate": 7.971499853319523e-06,
      "loss": 0.8325,
      "step": 733160
    },
    {
      "epoch": 2.5695780630644838,
      "grad_norm": 2.96875,
      "learning_rate": 7.970850824655821e-06,
      "loss": 0.7609,
      "step": 733170
    },
    {
      "epoch": 2.5696131105713795,
      "grad_norm": 2.8125,
      "learning_rate": 7.970201795992119e-06,
      "loss": 0.8038,
      "step": 733180
    },
    {
      "epoch": 2.569648158078275,
      "grad_norm": 3.015625,
      "learning_rate": 7.969552767328417e-06,
      "loss": 0.8283,
      "step": 733190
    },
    {
      "epoch": 2.5696832055851706,
      "grad_norm": 3.21875,
      "learning_rate": 7.968903738664715e-06,
      "loss": 0.7654,
      "step": 733200
    },
    {
      "epoch": 2.5697182530920664,
      "grad_norm": 2.90625,
      "learning_rate": 7.968254710001013e-06,
      "loss": 0.8329,
      "step": 733210
    },
    {
      "epoch": 2.569753300598962,
      "grad_norm": 2.78125,
      "learning_rate": 7.967605681337311e-06,
      "loss": 0.8242,
      "step": 733220
    },
    {
      "epoch": 2.5697883481058574,
      "grad_norm": 2.796875,
      "learning_rate": 7.966956652673609e-06,
      "loss": 0.8501,
      "step": 733230
    },
    {
      "epoch": 2.569823395612753,
      "grad_norm": 2.6875,
      "learning_rate": 7.966307624009907e-06,
      "loss": 0.8034,
      "step": 733240
    },
    {
      "epoch": 2.5698584431196485,
      "grad_norm": 2.9375,
      "learning_rate": 7.965658595346205e-06,
      "loss": 0.8369,
      "step": 733250
    },
    {
      "epoch": 2.5698934906265443,
      "grad_norm": 2.6875,
      "learning_rate": 7.965009566682503e-06,
      "loss": 0.7903,
      "step": 733260
    },
    {
      "epoch": 2.56992853813344,
      "grad_norm": 2.671875,
      "learning_rate": 7.964360538018801e-06,
      "loss": 0.8652,
      "step": 733270
    },
    {
      "epoch": 2.5699635856403353,
      "grad_norm": 2.890625,
      "learning_rate": 7.9637115093551e-06,
      "loss": 0.8347,
      "step": 733280
    },
    {
      "epoch": 2.569998633147231,
      "grad_norm": 2.90625,
      "learning_rate": 7.963062480691399e-06,
      "loss": 0.7675,
      "step": 733290
    },
    {
      "epoch": 2.5700336806541264,
      "grad_norm": 2.6875,
      "learning_rate": 7.962413452027697e-06,
      "loss": 0.8635,
      "step": 733300
    },
    {
      "epoch": 2.570068728161022,
      "grad_norm": 3.15625,
      "learning_rate": 7.961764423363993e-06,
      "loss": 0.8455,
      "step": 733310
    },
    {
      "epoch": 2.570103775667918,
      "grad_norm": 2.8125,
      "learning_rate": 7.961115394700291e-06,
      "loss": 0.7383,
      "step": 733320
    },
    {
      "epoch": 2.5701388231748137,
      "grad_norm": 3.265625,
      "learning_rate": 7.960466366036589e-06,
      "loss": 0.8211,
      "step": 733330
    },
    {
      "epoch": 2.570173870681709,
      "grad_norm": 3.125,
      "learning_rate": 7.959817337372889e-06,
      "loss": 0.788,
      "step": 733340
    },
    {
      "epoch": 2.5702089181886048,
      "grad_norm": 2.8125,
      "learning_rate": 7.959168308709187e-06,
      "loss": 0.8331,
      "step": 733350
    },
    {
      "epoch": 2.5702439656955,
      "grad_norm": 3.078125,
      "learning_rate": 7.958519280045485e-06,
      "loss": 0.8273,
      "step": 733360
    },
    {
      "epoch": 2.570279013202396,
      "grad_norm": 3.234375,
      "learning_rate": 7.957870251381783e-06,
      "loss": 0.8375,
      "step": 733370
    },
    {
      "epoch": 2.5703140607092916,
      "grad_norm": 3.15625,
      "learning_rate": 7.95722122271808e-06,
      "loss": 0.8337,
      "step": 733380
    },
    {
      "epoch": 2.570349108216187,
      "grad_norm": 3.390625,
      "learning_rate": 7.956572194054379e-06,
      "loss": 0.8954,
      "step": 733390
    },
    {
      "epoch": 2.5703841557230827,
      "grad_norm": 2.796875,
      "learning_rate": 7.955923165390677e-06,
      "loss": 0.8304,
      "step": 733400
    },
    {
      "epoch": 2.570419203229978,
      "grad_norm": 3.0625,
      "learning_rate": 7.955274136726975e-06,
      "loss": 0.8623,
      "step": 733410
    },
    {
      "epoch": 2.5704542507368737,
      "grad_norm": 2.328125,
      "learning_rate": 7.954625108063273e-06,
      "loss": 0.7817,
      "step": 733420
    },
    {
      "epoch": 2.5704892982437695,
      "grad_norm": 3.015625,
      "learning_rate": 7.95397607939957e-06,
      "loss": 0.8023,
      "step": 733430
    },
    {
      "epoch": 2.5705243457506652,
      "grad_norm": 3.0625,
      "learning_rate": 7.953327050735869e-06,
      "loss": 0.8072,
      "step": 733440
    },
    {
      "epoch": 2.5705593932575606,
      "grad_norm": 3.0625,
      "learning_rate": 7.952678022072167e-06,
      "loss": 0.8306,
      "step": 733450
    },
    {
      "epoch": 2.5705944407644563,
      "grad_norm": 2.921875,
      "learning_rate": 7.952028993408466e-06,
      "loss": 0.787,
      "step": 733460
    },
    {
      "epoch": 2.5706294882713516,
      "grad_norm": 2.59375,
      "learning_rate": 7.951379964744764e-06,
      "loss": 0.8086,
      "step": 733470
    },
    {
      "epoch": 2.5706645357782474,
      "grad_norm": 2.796875,
      "learning_rate": 7.950730936081062e-06,
      "loss": 0.8096,
      "step": 733480
    },
    {
      "epoch": 2.570699583285143,
      "grad_norm": 2.875,
      "learning_rate": 7.95008190741736e-06,
      "loss": 0.7572,
      "step": 733490
    },
    {
      "epoch": 2.5707346307920385,
      "grad_norm": 3.15625,
      "learning_rate": 7.949432878753657e-06,
      "loss": 0.7358,
      "step": 733500
    },
    {
      "epoch": 2.570769678298934,
      "grad_norm": 2.875,
      "learning_rate": 7.948783850089955e-06,
      "loss": 0.8191,
      "step": 733510
    },
    {
      "epoch": 2.5708047258058295,
      "grad_norm": 2.921875,
      "learning_rate": 7.948134821426254e-06,
      "loss": 0.8717,
      "step": 733520
    },
    {
      "epoch": 2.5708397733127253,
      "grad_norm": 2.84375,
      "learning_rate": 7.947485792762552e-06,
      "loss": 0.7948,
      "step": 733530
    },
    {
      "epoch": 2.570874820819621,
      "grad_norm": 2.453125,
      "learning_rate": 7.94683676409885e-06,
      "loss": 0.7827,
      "step": 733540
    },
    {
      "epoch": 2.570909868326517,
      "grad_norm": 3.359375,
      "learning_rate": 7.946187735435148e-06,
      "loss": 0.8661,
      "step": 733550
    },
    {
      "epoch": 2.570944915833412,
      "grad_norm": 3.125,
      "learning_rate": 7.945538706771446e-06,
      "loss": 0.8014,
      "step": 733560
    },
    {
      "epoch": 2.570979963340308,
      "grad_norm": 2.890625,
      "learning_rate": 7.944889678107744e-06,
      "loss": 0.849,
      "step": 733570
    },
    {
      "epoch": 2.571015010847203,
      "grad_norm": 2.75,
      "learning_rate": 7.944240649444042e-06,
      "loss": 0.827,
      "step": 733580
    },
    {
      "epoch": 2.571050058354099,
      "grad_norm": 2.984375,
      "learning_rate": 7.94359162078034e-06,
      "loss": 0.8514,
      "step": 733590
    },
    {
      "epoch": 2.5710851058609947,
      "grad_norm": 3.15625,
      "learning_rate": 7.942942592116638e-06,
      "loss": 0.7793,
      "step": 733600
    },
    {
      "epoch": 2.57112015336789,
      "grad_norm": 2.90625,
      "learning_rate": 7.942293563452936e-06,
      "loss": 0.7961,
      "step": 733610
    },
    {
      "epoch": 2.5711552008747858,
      "grad_norm": 3.15625,
      "learning_rate": 7.941644534789234e-06,
      "loss": 0.7966,
      "step": 733620
    },
    {
      "epoch": 2.5711902483816815,
      "grad_norm": 2.625,
      "learning_rate": 7.940995506125532e-06,
      "loss": 0.8219,
      "step": 733630
    },
    {
      "epoch": 2.571225295888577,
      "grad_norm": 2.734375,
      "learning_rate": 7.94034647746183e-06,
      "loss": 0.8559,
      "step": 733640
    },
    {
      "epoch": 2.5712603433954726,
      "grad_norm": 2.53125,
      "learning_rate": 7.93969744879813e-06,
      "loss": 0.8032,
      "step": 733650
    },
    {
      "epoch": 2.5712953909023684,
      "grad_norm": 2.9375,
      "learning_rate": 7.939048420134428e-06,
      "loss": 0.7253,
      "step": 733660
    },
    {
      "epoch": 2.5713304384092637,
      "grad_norm": 2.859375,
      "learning_rate": 7.938399391470726e-06,
      "loss": 0.7897,
      "step": 733670
    },
    {
      "epoch": 2.5713654859161594,
      "grad_norm": 3.0625,
      "learning_rate": 7.937750362807024e-06,
      "loss": 0.8645,
      "step": 733680
    },
    {
      "epoch": 2.5714005334230547,
      "grad_norm": 2.953125,
      "learning_rate": 7.93710133414332e-06,
      "loss": 0.7789,
      "step": 733690
    },
    {
      "epoch": 2.5714355809299505,
      "grad_norm": 3.015625,
      "learning_rate": 7.93645230547962e-06,
      "loss": 0.8441,
      "step": 733700
    },
    {
      "epoch": 2.5714706284368463,
      "grad_norm": 2.875,
      "learning_rate": 7.935803276815918e-06,
      "loss": 0.8831,
      "step": 733710
    },
    {
      "epoch": 2.5715056759437416,
      "grad_norm": 2.765625,
      "learning_rate": 7.935154248152216e-06,
      "loss": 0.7849,
      "step": 733720
    },
    {
      "epoch": 2.5715407234506373,
      "grad_norm": 2.46875,
      "learning_rate": 7.934505219488514e-06,
      "loss": 0.8174,
      "step": 733730
    },
    {
      "epoch": 2.571575770957533,
      "grad_norm": 2.90625,
      "learning_rate": 7.933856190824812e-06,
      "loss": 0.7648,
      "step": 733740
    },
    {
      "epoch": 2.5716108184644284,
      "grad_norm": 2.890625,
      "learning_rate": 7.93320716216111e-06,
      "loss": 0.7925,
      "step": 733750
    },
    {
      "epoch": 2.571645865971324,
      "grad_norm": 3.15625,
      "learning_rate": 7.932558133497408e-06,
      "loss": 0.9212,
      "step": 733760
    },
    {
      "epoch": 2.57168091347822,
      "grad_norm": 2.40625,
      "learning_rate": 7.931909104833708e-06,
      "loss": 0.766,
      "step": 733770
    },
    {
      "epoch": 2.5717159609851152,
      "grad_norm": 3.390625,
      "learning_rate": 7.931260076170004e-06,
      "loss": 0.8457,
      "step": 733780
    },
    {
      "epoch": 2.571751008492011,
      "grad_norm": 3.390625,
      "learning_rate": 7.930611047506302e-06,
      "loss": 0.8174,
      "step": 733790
    },
    {
      "epoch": 2.5717860559989063,
      "grad_norm": 3.171875,
      "learning_rate": 7.9299620188426e-06,
      "loss": 0.8944,
      "step": 733800
    },
    {
      "epoch": 2.571821103505802,
      "grad_norm": 3.265625,
      "learning_rate": 7.929312990178898e-06,
      "loss": 0.8734,
      "step": 733810
    },
    {
      "epoch": 2.571856151012698,
      "grad_norm": 2.765625,
      "learning_rate": 7.928663961515196e-06,
      "loss": 0.799,
      "step": 733820
    },
    {
      "epoch": 2.5718911985195936,
      "grad_norm": 3.140625,
      "learning_rate": 7.928014932851496e-06,
      "loss": 0.7729,
      "step": 733830
    },
    {
      "epoch": 2.571926246026489,
      "grad_norm": 3.546875,
      "learning_rate": 7.927365904187794e-06,
      "loss": 0.8258,
      "step": 733840
    },
    {
      "epoch": 2.5719612935333847,
      "grad_norm": 3.25,
      "learning_rate": 7.926716875524092e-06,
      "loss": 0.8085,
      "step": 733850
    },
    {
      "epoch": 2.57199634104028,
      "grad_norm": 2.6875,
      "learning_rate": 7.92606784686039e-06,
      "loss": 0.7969,
      "step": 733860
    },
    {
      "epoch": 2.5720313885471757,
      "grad_norm": 3.1875,
      "learning_rate": 7.925418818196688e-06,
      "loss": 0.8331,
      "step": 733870
    },
    {
      "epoch": 2.5720664360540715,
      "grad_norm": 3.078125,
      "learning_rate": 7.924769789532984e-06,
      "loss": 0.7333,
      "step": 733880
    },
    {
      "epoch": 2.572101483560967,
      "grad_norm": 2.40625,
      "learning_rate": 7.924120760869284e-06,
      "loss": 0.7433,
      "step": 733890
    },
    {
      "epoch": 2.5721365310678626,
      "grad_norm": 3.078125,
      "learning_rate": 7.923471732205582e-06,
      "loss": 0.836,
      "step": 733900
    },
    {
      "epoch": 2.572171578574758,
      "grad_norm": 3.296875,
      "learning_rate": 7.92282270354188e-06,
      "loss": 0.8023,
      "step": 733910
    },
    {
      "epoch": 2.5722066260816536,
      "grad_norm": 3.40625,
      "learning_rate": 7.922173674878178e-06,
      "loss": 0.7661,
      "step": 733920
    },
    {
      "epoch": 2.5722416735885494,
      "grad_norm": 2.796875,
      "learning_rate": 7.921524646214476e-06,
      "loss": 0.8605,
      "step": 733930
    },
    {
      "epoch": 2.572276721095445,
      "grad_norm": 2.515625,
      "learning_rate": 7.920875617550774e-06,
      "loss": 0.7899,
      "step": 733940
    },
    {
      "epoch": 2.5723117686023405,
      "grad_norm": 2.90625,
      "learning_rate": 7.920226588887072e-06,
      "loss": 0.743,
      "step": 733950
    },
    {
      "epoch": 2.572346816109236,
      "grad_norm": 3.0625,
      "learning_rate": 7.919577560223371e-06,
      "loss": 0.8533,
      "step": 733960
    },
    {
      "epoch": 2.5723818636161315,
      "grad_norm": 2.265625,
      "learning_rate": 7.918928531559668e-06,
      "loss": 0.8273,
      "step": 733970
    },
    {
      "epoch": 2.5724169111230273,
      "grad_norm": 3.03125,
      "learning_rate": 7.918279502895966e-06,
      "loss": 0.8076,
      "step": 733980
    },
    {
      "epoch": 2.572451958629923,
      "grad_norm": 2.53125,
      "learning_rate": 7.917630474232264e-06,
      "loss": 0.7425,
      "step": 733990
    },
    {
      "epoch": 2.5724870061368184,
      "grad_norm": 3.296875,
      "learning_rate": 7.916981445568562e-06,
      "loss": 0.8158,
      "step": 734000
    },
    {
      "epoch": 2.572522053643714,
      "grad_norm": 3.0,
      "learning_rate": 7.916332416904861e-06,
      "loss": 0.7917,
      "step": 734010
    },
    {
      "epoch": 2.5725571011506094,
      "grad_norm": 2.953125,
      "learning_rate": 7.91568338824116e-06,
      "loss": 0.7369,
      "step": 734020
    },
    {
      "epoch": 2.572592148657505,
      "grad_norm": 2.828125,
      "learning_rate": 7.915034359577457e-06,
      "loss": 0.8439,
      "step": 734030
    },
    {
      "epoch": 2.572627196164401,
      "grad_norm": 2.65625,
      "learning_rate": 7.914385330913755e-06,
      "loss": 0.7701,
      "step": 734040
    },
    {
      "epoch": 2.5726622436712967,
      "grad_norm": 2.96875,
      "learning_rate": 7.913736302250053e-06,
      "loss": 0.7633,
      "step": 734050
    },
    {
      "epoch": 2.572697291178192,
      "grad_norm": 2.828125,
      "learning_rate": 7.91308727358635e-06,
      "loss": 0.7215,
      "step": 734060
    },
    {
      "epoch": 2.5727323386850878,
      "grad_norm": 2.953125,
      "learning_rate": 7.91243824492265e-06,
      "loss": 0.8363,
      "step": 734070
    },
    {
      "epoch": 2.572767386191983,
      "grad_norm": 3.265625,
      "learning_rate": 7.911789216258947e-06,
      "loss": 0.8183,
      "step": 734080
    },
    {
      "epoch": 2.572802433698879,
      "grad_norm": 3.234375,
      "learning_rate": 7.911140187595245e-06,
      "loss": 0.8516,
      "step": 734090
    },
    {
      "epoch": 2.5728374812057746,
      "grad_norm": 2.609375,
      "learning_rate": 7.910491158931543e-06,
      "loss": 0.8046,
      "step": 734100
    },
    {
      "epoch": 2.57287252871267,
      "grad_norm": 2.765625,
      "learning_rate": 7.909842130267841e-06,
      "loss": 0.8259,
      "step": 734110
    },
    {
      "epoch": 2.5729075762195657,
      "grad_norm": 2.765625,
      "learning_rate": 7.90919310160414e-06,
      "loss": 0.7379,
      "step": 734120
    },
    {
      "epoch": 2.572942623726461,
      "grad_norm": 2.8125,
      "learning_rate": 7.908544072940437e-06,
      "loss": 0.8835,
      "step": 734130
    },
    {
      "epoch": 2.5729776712333567,
      "grad_norm": 2.828125,
      "learning_rate": 7.907895044276737e-06,
      "loss": 0.6821,
      "step": 734140
    },
    {
      "epoch": 2.5730127187402525,
      "grad_norm": 3.0,
      "learning_rate": 7.907246015613035e-06,
      "loss": 0.8572,
      "step": 734150
    },
    {
      "epoch": 2.5730477662471483,
      "grad_norm": 2.53125,
      "learning_rate": 7.906596986949331e-06,
      "loss": 0.7454,
      "step": 734160
    },
    {
      "epoch": 2.5730828137540436,
      "grad_norm": 3.078125,
      "learning_rate": 7.90594795828563e-06,
      "loss": 0.7845,
      "step": 734170
    },
    {
      "epoch": 2.5731178612609393,
      "grad_norm": 3.015625,
      "learning_rate": 7.905298929621927e-06,
      "loss": 0.7871,
      "step": 734180
    },
    {
      "epoch": 2.5731529087678346,
      "grad_norm": 2.625,
      "learning_rate": 7.904649900958225e-06,
      "loss": 0.7913,
      "step": 734190
    },
    {
      "epoch": 2.5731879562747304,
      "grad_norm": 2.640625,
      "learning_rate": 7.904000872294525e-06,
      "loss": 0.7975,
      "step": 734200
    },
    {
      "epoch": 2.573223003781626,
      "grad_norm": 3.390625,
      "learning_rate": 7.903351843630823e-06,
      "loss": 0.8995,
      "step": 734210
    },
    {
      "epoch": 2.5732580512885215,
      "grad_norm": 2.5,
      "learning_rate": 7.902702814967121e-06,
      "loss": 0.7998,
      "step": 734220
    },
    {
      "epoch": 2.5732930987954172,
      "grad_norm": 2.8125,
      "learning_rate": 7.902053786303419e-06,
      "loss": 0.7934,
      "step": 734230
    },
    {
      "epoch": 2.5733281463023125,
      "grad_norm": 2.828125,
      "learning_rate": 7.901404757639717e-06,
      "loss": 0.8416,
      "step": 734240
    },
    {
      "epoch": 2.5733631938092083,
      "grad_norm": 3.078125,
      "learning_rate": 7.900755728976015e-06,
      "loss": 0.8695,
      "step": 734250
    },
    {
      "epoch": 2.573398241316104,
      "grad_norm": 2.828125,
      "learning_rate": 7.900106700312313e-06,
      "loss": 0.7885,
      "step": 734260
    },
    {
      "epoch": 2.573433288823,
      "grad_norm": 3.03125,
      "learning_rate": 7.899457671648611e-06,
      "loss": 0.803,
      "step": 734270
    },
    {
      "epoch": 2.573468336329895,
      "grad_norm": 2.84375,
      "learning_rate": 7.898808642984909e-06,
      "loss": 0.7618,
      "step": 734280
    },
    {
      "epoch": 2.573503383836791,
      "grad_norm": 2.640625,
      "learning_rate": 7.898159614321207e-06,
      "loss": 0.7681,
      "step": 734290
    },
    {
      "epoch": 2.573538431343686,
      "grad_norm": 2.90625,
      "learning_rate": 7.897510585657505e-06,
      "loss": 0.7803,
      "step": 734300
    },
    {
      "epoch": 2.573573478850582,
      "grad_norm": 2.671875,
      "learning_rate": 7.896861556993803e-06,
      "loss": 0.8097,
      "step": 734310
    },
    {
      "epoch": 2.5736085263574777,
      "grad_norm": 2.65625,
      "learning_rate": 7.896212528330103e-06,
      "loss": 0.7791,
      "step": 734320
    },
    {
      "epoch": 2.573643573864373,
      "grad_norm": 2.8125,
      "learning_rate": 7.8955634996664e-06,
      "loss": 0.7683,
      "step": 734330
    },
    {
      "epoch": 2.573678621371269,
      "grad_norm": 3.390625,
      "learning_rate": 7.894914471002699e-06,
      "loss": 0.9272,
      "step": 734340
    },
    {
      "epoch": 2.573713668878164,
      "grad_norm": 2.9375,
      "learning_rate": 7.894265442338995e-06,
      "loss": 0.7097,
      "step": 734350
    },
    {
      "epoch": 2.57374871638506,
      "grad_norm": 2.84375,
      "learning_rate": 7.893616413675293e-06,
      "loss": 0.7738,
      "step": 734360
    },
    {
      "epoch": 2.5737837638919556,
      "grad_norm": 2.859375,
      "learning_rate": 7.892967385011591e-06,
      "loss": 0.8822,
      "step": 734370
    },
    {
      "epoch": 2.5738188113988514,
      "grad_norm": 2.828125,
      "learning_rate": 7.89231835634789e-06,
      "loss": 0.8402,
      "step": 734380
    },
    {
      "epoch": 2.5738538589057467,
      "grad_norm": 3.0,
      "learning_rate": 7.891669327684189e-06,
      "loss": 0.7247,
      "step": 734390
    },
    {
      "epoch": 2.5738889064126425,
      "grad_norm": 3.078125,
      "learning_rate": 7.891020299020487e-06,
      "loss": 0.835,
      "step": 734400
    },
    {
      "epoch": 2.5739239539195378,
      "grad_norm": 2.921875,
      "learning_rate": 7.890371270356785e-06,
      "loss": 0.8702,
      "step": 734410
    },
    {
      "epoch": 2.5739590014264335,
      "grad_norm": 3.078125,
      "learning_rate": 7.889722241693083e-06,
      "loss": 0.7658,
      "step": 734420
    },
    {
      "epoch": 2.5739940489333293,
      "grad_norm": 3.046875,
      "learning_rate": 7.88907321302938e-06,
      "loss": 0.8345,
      "step": 734430
    },
    {
      "epoch": 2.5740290964402246,
      "grad_norm": 2.84375,
      "learning_rate": 7.888424184365679e-06,
      "loss": 0.7379,
      "step": 734440
    },
    {
      "epoch": 2.5740641439471204,
      "grad_norm": 3.578125,
      "learning_rate": 7.887775155701977e-06,
      "loss": 0.8551,
      "step": 734450
    },
    {
      "epoch": 2.5740991914540157,
      "grad_norm": 2.484375,
      "learning_rate": 7.887126127038275e-06,
      "loss": 0.7184,
      "step": 734460
    },
    {
      "epoch": 2.5741342389609114,
      "grad_norm": 2.90625,
      "learning_rate": 7.886477098374573e-06,
      "loss": 0.772,
      "step": 734470
    },
    {
      "epoch": 2.574169286467807,
      "grad_norm": 2.890625,
      "learning_rate": 7.88582806971087e-06,
      "loss": 0.7646,
      "step": 734480
    },
    {
      "epoch": 2.574204333974703,
      "grad_norm": 2.515625,
      "learning_rate": 7.885179041047169e-06,
      "loss": 0.7734,
      "step": 734490
    },
    {
      "epoch": 2.5742393814815983,
      "grad_norm": 2.65625,
      "learning_rate": 7.884530012383467e-06,
      "loss": 0.8101,
      "step": 734500
    },
    {
      "epoch": 2.574274428988494,
      "grad_norm": 2.5,
      "learning_rate": 7.883880983719766e-06,
      "loss": 0.7871,
      "step": 734510
    },
    {
      "epoch": 2.5743094764953893,
      "grad_norm": 2.640625,
      "learning_rate": 7.883231955056064e-06,
      "loss": 0.7769,
      "step": 734520
    },
    {
      "epoch": 2.574344524002285,
      "grad_norm": 2.984375,
      "learning_rate": 7.88258292639236e-06,
      "loss": 0.7534,
      "step": 734530
    },
    {
      "epoch": 2.574379571509181,
      "grad_norm": 2.96875,
      "learning_rate": 7.881933897728659e-06,
      "loss": 0.791,
      "step": 734540
    },
    {
      "epoch": 2.574414619016076,
      "grad_norm": 2.796875,
      "learning_rate": 7.881284869064957e-06,
      "loss": 0.7773,
      "step": 734550
    },
    {
      "epoch": 2.574449666522972,
      "grad_norm": 2.9375,
      "learning_rate": 7.880635840401256e-06,
      "loss": 0.7572,
      "step": 734560
    },
    {
      "epoch": 2.5744847140298672,
      "grad_norm": 3.0625,
      "learning_rate": 7.879986811737554e-06,
      "loss": 0.8439,
      "step": 734570
    },
    {
      "epoch": 2.574519761536763,
      "grad_norm": 3.09375,
      "learning_rate": 7.879337783073852e-06,
      "loss": 0.7523,
      "step": 734580
    },
    {
      "epoch": 2.5745548090436587,
      "grad_norm": 3.171875,
      "learning_rate": 7.87868875441015e-06,
      "loss": 0.8596,
      "step": 734590
    },
    {
      "epoch": 2.5745898565505545,
      "grad_norm": 2.875,
      "learning_rate": 7.878039725746448e-06,
      "loss": 0.7633,
      "step": 734600
    },
    {
      "epoch": 2.57462490405745,
      "grad_norm": 2.765625,
      "learning_rate": 7.877390697082746e-06,
      "loss": 0.7793,
      "step": 734610
    },
    {
      "epoch": 2.5746599515643456,
      "grad_norm": 2.40625,
      "learning_rate": 7.876741668419044e-06,
      "loss": 0.8211,
      "step": 734620
    },
    {
      "epoch": 2.574694999071241,
      "grad_norm": 2.5,
      "learning_rate": 7.876092639755342e-06,
      "loss": 0.8439,
      "step": 734630
    },
    {
      "epoch": 2.5747300465781366,
      "grad_norm": 2.6875,
      "learning_rate": 7.87544361109164e-06,
      "loss": 0.8559,
      "step": 734640
    },
    {
      "epoch": 2.5747650940850324,
      "grad_norm": 3.3125,
      "learning_rate": 7.874794582427938e-06,
      "loss": 0.9167,
      "step": 734650
    },
    {
      "epoch": 2.5748001415919277,
      "grad_norm": 3.34375,
      "learning_rate": 7.874145553764236e-06,
      "loss": 0.769,
      "step": 734660
    },
    {
      "epoch": 2.5748351890988235,
      "grad_norm": 2.84375,
      "learning_rate": 7.873496525100534e-06,
      "loss": 0.8111,
      "step": 734670
    },
    {
      "epoch": 2.574870236605719,
      "grad_norm": 3.171875,
      "learning_rate": 7.872847496436832e-06,
      "loss": 0.7587,
      "step": 734680
    },
    {
      "epoch": 2.5749052841126145,
      "grad_norm": 2.625,
      "learning_rate": 7.872198467773132e-06,
      "loss": 0.8035,
      "step": 734690
    },
    {
      "epoch": 2.5749403316195103,
      "grad_norm": 2.59375,
      "learning_rate": 7.87154943910943e-06,
      "loss": 0.7629,
      "step": 734700
    },
    {
      "epoch": 2.574975379126406,
      "grad_norm": 3.109375,
      "learning_rate": 7.870900410445728e-06,
      "loss": 0.803,
      "step": 734710
    },
    {
      "epoch": 2.5750104266333014,
      "grad_norm": 2.78125,
      "learning_rate": 7.870251381782024e-06,
      "loss": 0.7983,
      "step": 734720
    },
    {
      "epoch": 2.575045474140197,
      "grad_norm": 3.03125,
      "learning_rate": 7.869602353118322e-06,
      "loss": 0.8731,
      "step": 734730
    },
    {
      "epoch": 2.5750805216470924,
      "grad_norm": 2.984375,
      "learning_rate": 7.86895332445462e-06,
      "loss": 0.7899,
      "step": 734740
    },
    {
      "epoch": 2.575115569153988,
      "grad_norm": 3.015625,
      "learning_rate": 7.86830429579092e-06,
      "loss": 0.7636,
      "step": 734750
    },
    {
      "epoch": 2.575150616660884,
      "grad_norm": 3.171875,
      "learning_rate": 7.867655267127218e-06,
      "loss": 0.9319,
      "step": 734760
    },
    {
      "epoch": 2.5751856641677793,
      "grad_norm": 2.546875,
      "learning_rate": 7.867006238463516e-06,
      "loss": 0.8012,
      "step": 734770
    },
    {
      "epoch": 2.575220711674675,
      "grad_norm": 2.484375,
      "learning_rate": 7.866357209799814e-06,
      "loss": 0.7884,
      "step": 734780
    },
    {
      "epoch": 2.5752557591815703,
      "grad_norm": 3.296875,
      "learning_rate": 7.865708181136112e-06,
      "loss": 0.9185,
      "step": 734790
    },
    {
      "epoch": 2.575290806688466,
      "grad_norm": 2.84375,
      "learning_rate": 7.86505915247241e-06,
      "loss": 0.7916,
      "step": 734800
    },
    {
      "epoch": 2.575325854195362,
      "grad_norm": 2.671875,
      "learning_rate": 7.864410123808708e-06,
      "loss": 0.8439,
      "step": 734810
    },
    {
      "epoch": 2.5753609017022576,
      "grad_norm": 3.3125,
      "learning_rate": 7.863761095145006e-06,
      "loss": 0.8007,
      "step": 734820
    },
    {
      "epoch": 2.575395949209153,
      "grad_norm": 2.734375,
      "learning_rate": 7.863112066481304e-06,
      "loss": 0.8035,
      "step": 734830
    },
    {
      "epoch": 2.5754309967160487,
      "grad_norm": 3.078125,
      "learning_rate": 7.862463037817602e-06,
      "loss": 0.8599,
      "step": 734840
    },
    {
      "epoch": 2.575466044222944,
      "grad_norm": 3.03125,
      "learning_rate": 7.8618140091539e-06,
      "loss": 0.7293,
      "step": 734850
    },
    {
      "epoch": 2.5755010917298398,
      "grad_norm": 3.609375,
      "learning_rate": 7.861164980490198e-06,
      "loss": 0.8474,
      "step": 734860
    },
    {
      "epoch": 2.5755361392367355,
      "grad_norm": 2.90625,
      "learning_rate": 7.860515951826498e-06,
      "loss": 0.8552,
      "step": 734870
    },
    {
      "epoch": 2.575571186743631,
      "grad_norm": 2.78125,
      "learning_rate": 7.859866923162796e-06,
      "loss": 0.8562,
      "step": 734880
    },
    {
      "epoch": 2.5756062342505266,
      "grad_norm": 2.609375,
      "learning_rate": 7.859217894499094e-06,
      "loss": 0.774,
      "step": 734890
    },
    {
      "epoch": 2.5756412817574224,
      "grad_norm": 3.390625,
      "learning_rate": 7.858568865835392e-06,
      "loss": 0.9593,
      "step": 734900
    },
    {
      "epoch": 2.5756763292643177,
      "grad_norm": 3.046875,
      "learning_rate": 7.857919837171688e-06,
      "loss": 0.8333,
      "step": 734910
    },
    {
      "epoch": 2.5757113767712134,
      "grad_norm": 2.78125,
      "learning_rate": 7.857270808507986e-06,
      "loss": 0.8129,
      "step": 734920
    },
    {
      "epoch": 2.575746424278109,
      "grad_norm": 3.21875,
      "learning_rate": 7.856621779844286e-06,
      "loss": 0.7719,
      "step": 734930
    },
    {
      "epoch": 2.5757814717850045,
      "grad_norm": 2.953125,
      "learning_rate": 7.855972751180584e-06,
      "loss": 0.7865,
      "step": 734940
    },
    {
      "epoch": 2.5758165192919003,
      "grad_norm": 2.953125,
      "learning_rate": 7.855323722516882e-06,
      "loss": 0.798,
      "step": 734950
    },
    {
      "epoch": 2.5758515667987956,
      "grad_norm": 3.296875,
      "learning_rate": 7.85467469385318e-06,
      "loss": 0.7751,
      "step": 734960
    },
    {
      "epoch": 2.5758866143056913,
      "grad_norm": 3.28125,
      "learning_rate": 7.854025665189478e-06,
      "loss": 0.7936,
      "step": 734970
    },
    {
      "epoch": 2.575921661812587,
      "grad_norm": 2.65625,
      "learning_rate": 7.853376636525776e-06,
      "loss": 0.7494,
      "step": 734980
    },
    {
      "epoch": 2.5759567093194824,
      "grad_norm": 2.90625,
      "learning_rate": 7.852727607862074e-06,
      "loss": 0.7883,
      "step": 734990
    },
    {
      "epoch": 2.575991756826378,
      "grad_norm": 3.15625,
      "learning_rate": 7.852078579198372e-06,
      "loss": 0.8922,
      "step": 735000
    },
    {
      "epoch": 2.575991756826378,
      "eval_loss": 0.7550594210624695,
      "eval_runtime": 563.2291,
      "eval_samples_per_second": 675.455,
      "eval_steps_per_second": 56.288,
      "step": 735000
    },
    {
      "epoch": 2.576026804333274,
      "grad_norm": 3.140625,
      "learning_rate": 7.85142955053467e-06,
      "loss": 0.7871,
      "step": 735010
    },
    {
      "epoch": 2.5760618518401692,
      "grad_norm": 2.4375,
      "learning_rate": 7.850780521870968e-06,
      "loss": 0.8,
      "step": 735020
    },
    {
      "epoch": 2.576096899347065,
      "grad_norm": 2.703125,
      "learning_rate": 7.850131493207266e-06,
      "loss": 0.7627,
      "step": 735030
    },
    {
      "epoch": 2.5761319468539607,
      "grad_norm": 3.03125,
      "learning_rate": 7.849482464543564e-06,
      "loss": 0.8013,
      "step": 735040
    },
    {
      "epoch": 2.576166994360856,
      "grad_norm": 2.90625,
      "learning_rate": 7.848833435879862e-06,
      "loss": 0.8472,
      "step": 735050
    },
    {
      "epoch": 2.576202041867752,
      "grad_norm": 2.59375,
      "learning_rate": 7.848184407216161e-06,
      "loss": 0.7037,
      "step": 735060
    },
    {
      "epoch": 2.576237089374647,
      "grad_norm": 3.125,
      "learning_rate": 7.84753537855246e-06,
      "loss": 0.7977,
      "step": 735070
    },
    {
      "epoch": 2.576272136881543,
      "grad_norm": 3.40625,
      "learning_rate": 7.846886349888757e-06,
      "loss": 0.7152,
      "step": 735080
    },
    {
      "epoch": 2.5763071843884386,
      "grad_norm": 2.703125,
      "learning_rate": 7.846237321225055e-06,
      "loss": 0.797,
      "step": 735090
    },
    {
      "epoch": 2.5763422318953344,
      "grad_norm": 2.6875,
      "learning_rate": 7.845588292561352e-06,
      "loss": 0.7115,
      "step": 735100
    },
    {
      "epoch": 2.5763772794022297,
      "grad_norm": 3.140625,
      "learning_rate": 7.844939263897651e-06,
      "loss": 0.7829,
      "step": 735110
    },
    {
      "epoch": 2.5764123269091255,
      "grad_norm": 2.46875,
      "learning_rate": 7.84429023523395e-06,
      "loss": 0.7904,
      "step": 735120
    },
    {
      "epoch": 2.576447374416021,
      "grad_norm": 2.46875,
      "learning_rate": 7.843641206570247e-06,
      "loss": 0.6969,
      "step": 735130
    },
    {
      "epoch": 2.5764824219229165,
      "grad_norm": 3.15625,
      "learning_rate": 7.842992177906545e-06,
      "loss": 0.8119,
      "step": 735140
    },
    {
      "epoch": 2.5765174694298123,
      "grad_norm": 2.703125,
      "learning_rate": 7.842343149242843e-06,
      "loss": 0.7778,
      "step": 735150
    },
    {
      "epoch": 2.5765525169367076,
      "grad_norm": 2.6875,
      "learning_rate": 7.841694120579141e-06,
      "loss": 0.727,
      "step": 735160
    },
    {
      "epoch": 2.5765875644436034,
      "grad_norm": 3.34375,
      "learning_rate": 7.84104509191544e-06,
      "loss": 0.8312,
      "step": 735170
    },
    {
      "epoch": 2.5766226119504987,
      "grad_norm": 2.75,
      "learning_rate": 7.84039606325174e-06,
      "loss": 0.6981,
      "step": 735180
    },
    {
      "epoch": 2.5766576594573944,
      "grad_norm": 2.625,
      "learning_rate": 7.839747034588035e-06,
      "loss": 0.8214,
      "step": 735190
    },
    {
      "epoch": 2.57669270696429,
      "grad_norm": 3.0,
      "learning_rate": 7.839098005924333e-06,
      "loss": 0.8149,
      "step": 735200
    },
    {
      "epoch": 2.576727754471186,
      "grad_norm": 2.78125,
      "learning_rate": 7.838448977260631e-06,
      "loss": 0.8087,
      "step": 735210
    },
    {
      "epoch": 2.5767628019780813,
      "grad_norm": 2.65625,
      "learning_rate": 7.83779994859693e-06,
      "loss": 0.7611,
      "step": 735220
    },
    {
      "epoch": 2.576797849484977,
      "grad_norm": 2.484375,
      "learning_rate": 7.837150919933227e-06,
      "loss": 0.7627,
      "step": 735230
    },
    {
      "epoch": 2.5768328969918723,
      "grad_norm": 2.859375,
      "learning_rate": 7.836501891269527e-06,
      "loss": 0.805,
      "step": 735240
    },
    {
      "epoch": 2.576867944498768,
      "grad_norm": 3.0,
      "learning_rate": 7.835852862605825e-06,
      "loss": 0.8037,
      "step": 735250
    },
    {
      "epoch": 2.576902992005664,
      "grad_norm": 3.375,
      "learning_rate": 7.835203833942123e-06,
      "loss": 0.8134,
      "step": 735260
    },
    {
      "epoch": 2.576938039512559,
      "grad_norm": 3.09375,
      "learning_rate": 7.834554805278421e-06,
      "loss": 0.8122,
      "step": 735270
    },
    {
      "epoch": 2.576973087019455,
      "grad_norm": 2.84375,
      "learning_rate": 7.83390577661472e-06,
      "loss": 0.7935,
      "step": 735280
    },
    {
      "epoch": 2.5770081345263502,
      "grad_norm": 3.140625,
      "learning_rate": 7.833256747951015e-06,
      "loss": 0.8046,
      "step": 735290
    },
    {
      "epoch": 2.577043182033246,
      "grad_norm": 2.96875,
      "learning_rate": 7.832607719287315e-06,
      "loss": 0.748,
      "step": 735300
    },
    {
      "epoch": 2.5770782295401418,
      "grad_norm": 2.71875,
      "learning_rate": 7.831958690623613e-06,
      "loss": 0.7622,
      "step": 735310
    },
    {
      "epoch": 2.5771132770470375,
      "grad_norm": 2.859375,
      "learning_rate": 7.831309661959911e-06,
      "loss": 0.8514,
      "step": 735320
    },
    {
      "epoch": 2.577148324553933,
      "grad_norm": 2.75,
      "learning_rate": 7.83066063329621e-06,
      "loss": 0.7346,
      "step": 735330
    },
    {
      "epoch": 2.5771833720608286,
      "grad_norm": 3.328125,
      "learning_rate": 7.830011604632507e-06,
      "loss": 0.8422,
      "step": 735340
    },
    {
      "epoch": 2.577218419567724,
      "grad_norm": 2.671875,
      "learning_rate": 7.829362575968805e-06,
      "loss": 0.8814,
      "step": 735350
    },
    {
      "epoch": 2.5772534670746197,
      "grad_norm": 2.640625,
      "learning_rate": 7.828713547305103e-06,
      "loss": 0.7452,
      "step": 735360
    },
    {
      "epoch": 2.5772885145815154,
      "grad_norm": 2.6875,
      "learning_rate": 7.828064518641403e-06,
      "loss": 0.795,
      "step": 735370
    },
    {
      "epoch": 2.5773235620884107,
      "grad_norm": 2.5625,
      "learning_rate": 7.8274154899777e-06,
      "loss": 0.7486,
      "step": 735380
    },
    {
      "epoch": 2.5773586095953065,
      "grad_norm": 2.53125,
      "learning_rate": 7.826766461313997e-06,
      "loss": 0.7831,
      "step": 735390
    },
    {
      "epoch": 2.577393657102202,
      "grad_norm": 2.8125,
      "learning_rate": 7.826117432650295e-06,
      "loss": 0.8035,
      "step": 735400
    },
    {
      "epoch": 2.5774287046090976,
      "grad_norm": 2.75,
      "learning_rate": 7.825468403986593e-06,
      "loss": 0.8997,
      "step": 735410
    },
    {
      "epoch": 2.5774637521159933,
      "grad_norm": 3.140625,
      "learning_rate": 7.824819375322893e-06,
      "loss": 0.7318,
      "step": 735420
    },
    {
      "epoch": 2.577498799622889,
      "grad_norm": 2.65625,
      "learning_rate": 7.824170346659191e-06,
      "loss": 0.8208,
      "step": 735430
    },
    {
      "epoch": 2.5775338471297844,
      "grad_norm": 2.953125,
      "learning_rate": 7.823521317995489e-06,
      "loss": 0.778,
      "step": 735440
    },
    {
      "epoch": 2.57756889463668,
      "grad_norm": 3.046875,
      "learning_rate": 7.822872289331787e-06,
      "loss": 0.8473,
      "step": 735450
    },
    {
      "epoch": 2.5776039421435755,
      "grad_norm": 3.25,
      "learning_rate": 7.822223260668085e-06,
      "loss": 0.8395,
      "step": 735460
    },
    {
      "epoch": 2.5776389896504712,
      "grad_norm": 2.234375,
      "learning_rate": 7.821574232004381e-06,
      "loss": 0.7808,
      "step": 735470
    },
    {
      "epoch": 2.577674037157367,
      "grad_norm": 3.015625,
      "learning_rate": 7.820925203340681e-06,
      "loss": 0.8074,
      "step": 735480
    },
    {
      "epoch": 2.5777090846642623,
      "grad_norm": 3.0,
      "learning_rate": 7.820276174676979e-06,
      "loss": 0.7618,
      "step": 735490
    },
    {
      "epoch": 2.577744132171158,
      "grad_norm": 2.78125,
      "learning_rate": 7.819627146013277e-06,
      "loss": 0.8293,
      "step": 735500
    },
    {
      "epoch": 2.5777791796780534,
      "grad_norm": 2.734375,
      "learning_rate": 7.818978117349575e-06,
      "loss": 0.7461,
      "step": 735510
    },
    {
      "epoch": 2.577814227184949,
      "grad_norm": 3.140625,
      "learning_rate": 7.818329088685873e-06,
      "loss": 0.7478,
      "step": 735520
    },
    {
      "epoch": 2.577849274691845,
      "grad_norm": 3.015625,
      "learning_rate": 7.817680060022171e-06,
      "loss": 0.8072,
      "step": 735530
    },
    {
      "epoch": 2.5778843221987406,
      "grad_norm": 2.875,
      "learning_rate": 7.817031031358469e-06,
      "loss": 0.7723,
      "step": 735540
    },
    {
      "epoch": 2.577919369705636,
      "grad_norm": 2.59375,
      "learning_rate": 7.816382002694769e-06,
      "loss": 0.8211,
      "step": 735550
    },
    {
      "epoch": 2.5779544172125317,
      "grad_norm": 3.078125,
      "learning_rate": 7.815732974031067e-06,
      "loss": 0.8079,
      "step": 735560
    },
    {
      "epoch": 2.577989464719427,
      "grad_norm": 2.890625,
      "learning_rate": 7.815083945367363e-06,
      "loss": 0.8221,
      "step": 735570
    },
    {
      "epoch": 2.578024512226323,
      "grad_norm": 2.84375,
      "learning_rate": 7.814434916703661e-06,
      "loss": 0.7712,
      "step": 735580
    },
    {
      "epoch": 2.5780595597332185,
      "grad_norm": 2.703125,
      "learning_rate": 7.813785888039959e-06,
      "loss": 0.7622,
      "step": 735590
    },
    {
      "epoch": 2.578094607240114,
      "grad_norm": 2.5625,
      "learning_rate": 7.813136859376257e-06,
      "loss": 0.8749,
      "step": 735600
    },
    {
      "epoch": 2.5781296547470096,
      "grad_norm": 3.0625,
      "learning_rate": 7.812487830712557e-06,
      "loss": 0.8465,
      "step": 735610
    },
    {
      "epoch": 2.578164702253905,
      "grad_norm": 3.203125,
      "learning_rate": 7.811838802048855e-06,
      "loss": 0.7996,
      "step": 735620
    },
    {
      "epoch": 2.5781997497608007,
      "grad_norm": 3.21875,
      "learning_rate": 7.811189773385153e-06,
      "loss": 0.7798,
      "step": 735630
    },
    {
      "epoch": 2.5782347972676964,
      "grad_norm": 2.8125,
      "learning_rate": 7.81054074472145e-06,
      "loss": 0.8314,
      "step": 735640
    },
    {
      "epoch": 2.578269844774592,
      "grad_norm": 3.171875,
      "learning_rate": 7.809891716057749e-06,
      "loss": 0.8348,
      "step": 735650
    },
    {
      "epoch": 2.5783048922814875,
      "grad_norm": 2.75,
      "learning_rate": 7.809242687394047e-06,
      "loss": 0.7701,
      "step": 735660
    },
    {
      "epoch": 2.5783399397883833,
      "grad_norm": 2.875,
      "learning_rate": 7.808593658730345e-06,
      "loss": 0.8953,
      "step": 735670
    },
    {
      "epoch": 2.5783749872952786,
      "grad_norm": 3.1875,
      "learning_rate": 7.807944630066643e-06,
      "loss": 0.8173,
      "step": 735680
    },
    {
      "epoch": 2.5784100348021743,
      "grad_norm": 3.5,
      "learning_rate": 7.80729560140294e-06,
      "loss": 0.7706,
      "step": 735690
    },
    {
      "epoch": 2.57844508230907,
      "grad_norm": 3.296875,
      "learning_rate": 7.806646572739239e-06,
      "loss": 0.9,
      "step": 735700
    },
    {
      "epoch": 2.5784801298159654,
      "grad_norm": 2.75,
      "learning_rate": 7.805997544075537e-06,
      "loss": 0.8361,
      "step": 735710
    },
    {
      "epoch": 2.578515177322861,
      "grad_norm": 2.765625,
      "learning_rate": 7.805348515411835e-06,
      "loss": 0.755,
      "step": 735720
    },
    {
      "epoch": 2.5785502248297565,
      "grad_norm": 2.6875,
      "learning_rate": 7.804699486748134e-06,
      "loss": 0.8488,
      "step": 735730
    },
    {
      "epoch": 2.5785852723366522,
      "grad_norm": 2.828125,
      "learning_rate": 7.804050458084432e-06,
      "loss": 0.8117,
      "step": 735740
    },
    {
      "epoch": 2.578620319843548,
      "grad_norm": 3.28125,
      "learning_rate": 7.80340142942073e-06,
      "loss": 0.8319,
      "step": 735750
    },
    {
      "epoch": 2.5786553673504438,
      "grad_norm": 3.125,
      "learning_rate": 7.802752400757027e-06,
      "loss": 0.8152,
      "step": 735760
    },
    {
      "epoch": 2.578690414857339,
      "grad_norm": 2.5,
      "learning_rate": 7.802103372093325e-06,
      "loss": 0.682,
      "step": 735770
    },
    {
      "epoch": 2.578725462364235,
      "grad_norm": 3.1875,
      "learning_rate": 7.801454343429623e-06,
      "loss": 0.8364,
      "step": 735780
    },
    {
      "epoch": 2.57876050987113,
      "grad_norm": 3.125,
      "learning_rate": 7.800805314765922e-06,
      "loss": 0.783,
      "step": 735790
    },
    {
      "epoch": 2.578795557378026,
      "grad_norm": 3.28125,
      "learning_rate": 7.80015628610222e-06,
      "loss": 0.8702,
      "step": 735800
    },
    {
      "epoch": 2.5788306048849217,
      "grad_norm": 3.015625,
      "learning_rate": 7.799507257438518e-06,
      "loss": 0.8046,
      "step": 735810
    },
    {
      "epoch": 2.578865652391817,
      "grad_norm": 2.75,
      "learning_rate": 7.798858228774816e-06,
      "loss": 0.7935,
      "step": 735820
    },
    {
      "epoch": 2.5789006998987127,
      "grad_norm": 2.390625,
      "learning_rate": 7.798209200111114e-06,
      "loss": 0.8032,
      "step": 735830
    },
    {
      "epoch": 2.578935747405608,
      "grad_norm": 3.234375,
      "learning_rate": 7.797560171447412e-06,
      "loss": 0.743,
      "step": 735840
    },
    {
      "epoch": 2.578970794912504,
      "grad_norm": 2.734375,
      "learning_rate": 7.79691114278371e-06,
      "loss": 0.7877,
      "step": 735850
    },
    {
      "epoch": 2.5790058424193996,
      "grad_norm": 3.328125,
      "learning_rate": 7.796262114120008e-06,
      "loss": 0.7642,
      "step": 735860
    },
    {
      "epoch": 2.5790408899262953,
      "grad_norm": 2.875,
      "learning_rate": 7.795613085456306e-06,
      "loss": 0.7875,
      "step": 735870
    },
    {
      "epoch": 2.5790759374331906,
      "grad_norm": 2.765625,
      "learning_rate": 7.794964056792604e-06,
      "loss": 0.8925,
      "step": 735880
    },
    {
      "epoch": 2.5791109849400864,
      "grad_norm": 3.015625,
      "learning_rate": 7.794315028128902e-06,
      "loss": 0.8369,
      "step": 735890
    },
    {
      "epoch": 2.5791460324469817,
      "grad_norm": 2.828125,
      "learning_rate": 7.7936659994652e-06,
      "loss": 0.7293,
      "step": 735900
    },
    {
      "epoch": 2.5791810799538775,
      "grad_norm": 3.21875,
      "learning_rate": 7.7930169708015e-06,
      "loss": 0.7966,
      "step": 735910
    },
    {
      "epoch": 2.5792161274607732,
      "grad_norm": 2.734375,
      "learning_rate": 7.792367942137798e-06,
      "loss": 0.8233,
      "step": 735920
    },
    {
      "epoch": 2.5792511749676685,
      "grad_norm": 3.03125,
      "learning_rate": 7.791718913474096e-06,
      "loss": 0.7686,
      "step": 735930
    },
    {
      "epoch": 2.5792862224745643,
      "grad_norm": 2.84375,
      "learning_rate": 7.791069884810392e-06,
      "loss": 0.8363,
      "step": 735940
    },
    {
      "epoch": 2.5793212699814596,
      "grad_norm": 3.6875,
      "learning_rate": 7.79042085614669e-06,
      "loss": 0.8459,
      "step": 735950
    },
    {
      "epoch": 2.5793563174883554,
      "grad_norm": 3.015625,
      "learning_rate": 7.789771827482988e-06,
      "loss": 0.8657,
      "step": 735960
    },
    {
      "epoch": 2.579391364995251,
      "grad_norm": 2.578125,
      "learning_rate": 7.789122798819288e-06,
      "loss": 0.763,
      "step": 735970
    },
    {
      "epoch": 2.579426412502147,
      "grad_norm": 3.125,
      "learning_rate": 7.788473770155586e-06,
      "loss": 0.7116,
      "step": 735980
    },
    {
      "epoch": 2.579461460009042,
      "grad_norm": 2.671875,
      "learning_rate": 7.787824741491884e-06,
      "loss": 0.7091,
      "step": 735990
    },
    {
      "epoch": 2.579496507515938,
      "grad_norm": 2.5625,
      "learning_rate": 7.787175712828182e-06,
      "loss": 0.8032,
      "step": 736000
    },
    {
      "epoch": 2.5795315550228333,
      "grad_norm": 3.125,
      "learning_rate": 7.78652668416448e-06,
      "loss": 0.7765,
      "step": 736010
    },
    {
      "epoch": 2.579566602529729,
      "grad_norm": 2.796875,
      "learning_rate": 7.785877655500778e-06,
      "loss": 0.8103,
      "step": 736020
    },
    {
      "epoch": 2.579601650036625,
      "grad_norm": 2.5625,
      "learning_rate": 7.785228626837076e-06,
      "loss": 0.7561,
      "step": 736030
    },
    {
      "epoch": 2.57963669754352,
      "grad_norm": 2.9375,
      "learning_rate": 7.784579598173374e-06,
      "loss": 0.7797,
      "step": 736040
    },
    {
      "epoch": 2.579671745050416,
      "grad_norm": 2.828125,
      "learning_rate": 7.783930569509672e-06,
      "loss": 0.7923,
      "step": 736050
    },
    {
      "epoch": 2.579706792557311,
      "grad_norm": 3.234375,
      "learning_rate": 7.78328154084597e-06,
      "loss": 0.8539,
      "step": 736060
    },
    {
      "epoch": 2.579741840064207,
      "grad_norm": 2.796875,
      "learning_rate": 7.782632512182268e-06,
      "loss": 0.7698,
      "step": 736070
    },
    {
      "epoch": 2.5797768875711027,
      "grad_norm": 3.1875,
      "learning_rate": 7.781983483518566e-06,
      "loss": 0.9069,
      "step": 736080
    },
    {
      "epoch": 2.5798119350779984,
      "grad_norm": 3.125,
      "learning_rate": 7.781334454854864e-06,
      "loss": 0.765,
      "step": 736090
    },
    {
      "epoch": 2.5798469825848938,
      "grad_norm": 2.8125,
      "learning_rate": 7.780685426191164e-06,
      "loss": 0.7907,
      "step": 736100
    },
    {
      "epoch": 2.5798820300917895,
      "grad_norm": 3.03125,
      "learning_rate": 7.780036397527462e-06,
      "loss": 0.7888,
      "step": 736110
    },
    {
      "epoch": 2.579917077598685,
      "grad_norm": 3.265625,
      "learning_rate": 7.77938736886376e-06,
      "loss": 0.7761,
      "step": 736120
    },
    {
      "epoch": 2.5799521251055806,
      "grad_norm": 3.109375,
      "learning_rate": 7.778738340200056e-06,
      "loss": 0.824,
      "step": 736130
    },
    {
      "epoch": 2.5799871726124763,
      "grad_norm": 2.96875,
      "learning_rate": 7.778089311536354e-06,
      "loss": 0.807,
      "step": 736140
    },
    {
      "epoch": 2.5800222201193717,
      "grad_norm": 2.96875,
      "learning_rate": 7.777440282872654e-06,
      "loss": 0.805,
      "step": 736150
    },
    {
      "epoch": 2.5800572676262674,
      "grad_norm": 3.25,
      "learning_rate": 7.776791254208952e-06,
      "loss": 0.7262,
      "step": 736160
    },
    {
      "epoch": 2.5800923151331627,
      "grad_norm": 3.078125,
      "learning_rate": 7.77614222554525e-06,
      "loss": 0.8379,
      "step": 736170
    },
    {
      "epoch": 2.5801273626400585,
      "grad_norm": 2.859375,
      "learning_rate": 7.775493196881548e-06,
      "loss": 0.8173,
      "step": 736180
    },
    {
      "epoch": 2.5801624101469542,
      "grad_norm": 3.234375,
      "learning_rate": 7.774844168217846e-06,
      "loss": 0.7778,
      "step": 736190
    },
    {
      "epoch": 2.58019745765385,
      "grad_norm": 2.90625,
      "learning_rate": 7.774195139554144e-06,
      "loss": 0.8639,
      "step": 736200
    },
    {
      "epoch": 2.5802325051607453,
      "grad_norm": 2.515625,
      "learning_rate": 7.773546110890442e-06,
      "loss": 0.7941,
      "step": 736210
    },
    {
      "epoch": 2.580267552667641,
      "grad_norm": 3.1875,
      "learning_rate": 7.772897082226741e-06,
      "loss": 0.9084,
      "step": 736220
    },
    {
      "epoch": 2.5803026001745364,
      "grad_norm": 2.90625,
      "learning_rate": 7.772248053563038e-06,
      "loss": 0.8245,
      "step": 736230
    },
    {
      "epoch": 2.580337647681432,
      "grad_norm": 3.03125,
      "learning_rate": 7.771599024899336e-06,
      "loss": 0.8102,
      "step": 736240
    },
    {
      "epoch": 2.580372695188328,
      "grad_norm": 3.390625,
      "learning_rate": 7.770949996235634e-06,
      "loss": 0.8293,
      "step": 736250
    },
    {
      "epoch": 2.580407742695223,
      "grad_norm": 2.8125,
      "learning_rate": 7.770300967571932e-06,
      "loss": 0.8118,
      "step": 736260
    },
    {
      "epoch": 2.580442790202119,
      "grad_norm": 3.25,
      "learning_rate": 7.76965193890823e-06,
      "loss": 0.8542,
      "step": 736270
    },
    {
      "epoch": 2.5804778377090147,
      "grad_norm": 3.125,
      "learning_rate": 7.76900291024453e-06,
      "loss": 0.7816,
      "step": 736280
    },
    {
      "epoch": 2.58051288521591,
      "grad_norm": 2.875,
      "learning_rate": 7.768353881580827e-06,
      "loss": 0.7338,
      "step": 736290
    },
    {
      "epoch": 2.580547932722806,
      "grad_norm": 3.09375,
      "learning_rate": 7.767704852917125e-06,
      "loss": 0.8864,
      "step": 736300
    },
    {
      "epoch": 2.5805829802297016,
      "grad_norm": 3.0625,
      "learning_rate": 7.767055824253423e-06,
      "loss": 0.8204,
      "step": 736310
    },
    {
      "epoch": 2.580618027736597,
      "grad_norm": 2.921875,
      "learning_rate": 7.76640679558972e-06,
      "loss": 0.8509,
      "step": 736320
    },
    {
      "epoch": 2.5806530752434926,
      "grad_norm": 3.03125,
      "learning_rate": 7.765757766926018e-06,
      "loss": 0.7337,
      "step": 736330
    },
    {
      "epoch": 2.580688122750388,
      "grad_norm": 3.234375,
      "learning_rate": 7.765108738262317e-06,
      "loss": 0.9134,
      "step": 736340
    },
    {
      "epoch": 2.5807231702572837,
      "grad_norm": 3.015625,
      "learning_rate": 7.764459709598615e-06,
      "loss": 0.8129,
      "step": 736350
    },
    {
      "epoch": 2.5807582177641795,
      "grad_norm": 2.625,
      "learning_rate": 7.763810680934913e-06,
      "loss": 0.8257,
      "step": 736360
    },
    {
      "epoch": 2.5807932652710748,
      "grad_norm": 2.890625,
      "learning_rate": 7.763161652271211e-06,
      "loss": 0.8427,
      "step": 736370
    },
    {
      "epoch": 2.5808283127779705,
      "grad_norm": 3.5,
      "learning_rate": 7.76251262360751e-06,
      "loss": 0.8009,
      "step": 736380
    },
    {
      "epoch": 2.5808633602848663,
      "grad_norm": 3.046875,
      "learning_rate": 7.761863594943807e-06,
      "loss": 0.765,
      "step": 736390
    },
    {
      "epoch": 2.5808984077917616,
      "grad_norm": 2.984375,
      "learning_rate": 7.761214566280105e-06,
      "loss": 0.8172,
      "step": 736400
    },
    {
      "epoch": 2.5809334552986574,
      "grad_norm": 2.953125,
      "learning_rate": 7.760565537616403e-06,
      "loss": 0.7635,
      "step": 736410
    },
    {
      "epoch": 2.580968502805553,
      "grad_norm": 3.0,
      "learning_rate": 7.759916508952701e-06,
      "loss": 0.7287,
      "step": 736420
    },
    {
      "epoch": 2.5810035503124484,
      "grad_norm": 2.96875,
      "learning_rate": 7.759267480289e-06,
      "loss": 0.8176,
      "step": 736430
    },
    {
      "epoch": 2.581038597819344,
      "grad_norm": 3.203125,
      "learning_rate": 7.758618451625297e-06,
      "loss": 0.787,
      "step": 736440
    },
    {
      "epoch": 2.5810736453262395,
      "grad_norm": 2.59375,
      "learning_rate": 7.757969422961595e-06,
      "loss": 0.8602,
      "step": 736450
    },
    {
      "epoch": 2.5811086928331353,
      "grad_norm": 2.703125,
      "learning_rate": 7.757320394297895e-06,
      "loss": 0.7359,
      "step": 736460
    },
    {
      "epoch": 2.581143740340031,
      "grad_norm": 2.46875,
      "learning_rate": 7.756671365634193e-06,
      "loss": 0.8155,
      "step": 736470
    },
    {
      "epoch": 2.581178787846927,
      "grad_norm": 2.703125,
      "learning_rate": 7.756022336970491e-06,
      "loss": 0.786,
      "step": 736480
    },
    {
      "epoch": 2.581213835353822,
      "grad_norm": 3.578125,
      "learning_rate": 7.755373308306789e-06,
      "loss": 0.9018,
      "step": 736490
    },
    {
      "epoch": 2.581248882860718,
      "grad_norm": 3.078125,
      "learning_rate": 7.754724279643087e-06,
      "loss": 0.8226,
      "step": 736500
    },
    {
      "epoch": 2.581283930367613,
      "grad_norm": 3.03125,
      "learning_rate": 7.754075250979383e-06,
      "loss": 0.8642,
      "step": 736510
    },
    {
      "epoch": 2.581318977874509,
      "grad_norm": 2.890625,
      "learning_rate": 7.753426222315683e-06,
      "loss": 0.8108,
      "step": 736520
    },
    {
      "epoch": 2.5813540253814047,
      "grad_norm": 3.484375,
      "learning_rate": 7.752777193651981e-06,
      "loss": 0.8331,
      "step": 736530
    },
    {
      "epoch": 2.5813890728883,
      "grad_norm": 3.046875,
      "learning_rate": 7.752128164988279e-06,
      "loss": 0.7715,
      "step": 736540
    },
    {
      "epoch": 2.5814241203951958,
      "grad_norm": 3.015625,
      "learning_rate": 7.751479136324577e-06,
      "loss": 0.7943,
      "step": 736550
    },
    {
      "epoch": 2.581459167902091,
      "grad_norm": 2.84375,
      "learning_rate": 7.750830107660875e-06,
      "loss": 0.7996,
      "step": 736560
    },
    {
      "epoch": 2.581494215408987,
      "grad_norm": 3.15625,
      "learning_rate": 7.750181078997173e-06,
      "loss": 0.8003,
      "step": 736570
    },
    {
      "epoch": 2.5815292629158826,
      "grad_norm": 3.015625,
      "learning_rate": 7.749532050333471e-06,
      "loss": 0.7926,
      "step": 736580
    },
    {
      "epoch": 2.5815643104227783,
      "grad_norm": 2.953125,
      "learning_rate": 7.74888302166977e-06,
      "loss": 0.8611,
      "step": 736590
    },
    {
      "epoch": 2.5815993579296737,
      "grad_norm": 2.78125,
      "learning_rate": 7.748233993006067e-06,
      "loss": 0.7747,
      "step": 736600
    },
    {
      "epoch": 2.5816344054365694,
      "grad_norm": 2.703125,
      "learning_rate": 7.747584964342365e-06,
      "loss": 0.8329,
      "step": 736610
    },
    {
      "epoch": 2.5816694529434647,
      "grad_norm": 3.046875,
      "learning_rate": 7.746935935678663e-06,
      "loss": 0.7986,
      "step": 736620
    },
    {
      "epoch": 2.5817045004503605,
      "grad_norm": 3.375,
      "learning_rate": 7.746286907014961e-06,
      "loss": 0.8426,
      "step": 736630
    },
    {
      "epoch": 2.5817395479572562,
      "grad_norm": 2.921875,
      "learning_rate": 7.745637878351259e-06,
      "loss": 0.7421,
      "step": 736640
    },
    {
      "epoch": 2.5817745954641516,
      "grad_norm": 3.109375,
      "learning_rate": 7.744988849687559e-06,
      "loss": 0.7792,
      "step": 736650
    },
    {
      "epoch": 2.5818096429710473,
      "grad_norm": 2.46875,
      "learning_rate": 7.744339821023857e-06,
      "loss": 0.7203,
      "step": 736660
    },
    {
      "epoch": 2.5818446904779426,
      "grad_norm": 2.9375,
      "learning_rate": 7.743690792360155e-06,
      "loss": 0.8191,
      "step": 736670
    },
    {
      "epoch": 2.5818797379848384,
      "grad_norm": 2.65625,
      "learning_rate": 7.743041763696453e-06,
      "loss": 0.8084,
      "step": 736680
    },
    {
      "epoch": 2.581914785491734,
      "grad_norm": 3.5,
      "learning_rate": 7.74239273503275e-06,
      "loss": 0.761,
      "step": 736690
    },
    {
      "epoch": 2.58194983299863,
      "grad_norm": 3.0625,
      "learning_rate": 7.741743706369049e-06,
      "loss": 0.8992,
      "step": 736700
    },
    {
      "epoch": 2.581984880505525,
      "grad_norm": 2.53125,
      "learning_rate": 7.741094677705347e-06,
      "loss": 0.8493,
      "step": 736710
    },
    {
      "epoch": 2.582019928012421,
      "grad_norm": 3.078125,
      "learning_rate": 7.740445649041645e-06,
      "loss": 0.8219,
      "step": 736720
    },
    {
      "epoch": 2.5820549755193163,
      "grad_norm": 2.953125,
      "learning_rate": 7.739796620377943e-06,
      "loss": 0.8906,
      "step": 736730
    },
    {
      "epoch": 2.582090023026212,
      "grad_norm": 2.515625,
      "learning_rate": 7.73914759171424e-06,
      "loss": 0.8474,
      "step": 736740
    },
    {
      "epoch": 2.582125070533108,
      "grad_norm": 2.859375,
      "learning_rate": 7.738498563050539e-06,
      "loss": 0.8512,
      "step": 736750
    },
    {
      "epoch": 2.582160118040003,
      "grad_norm": 3.015625,
      "learning_rate": 7.737849534386837e-06,
      "loss": 0.7366,
      "step": 736760
    },
    {
      "epoch": 2.582195165546899,
      "grad_norm": 3.03125,
      "learning_rate": 7.737200505723136e-06,
      "loss": 0.7874,
      "step": 736770
    },
    {
      "epoch": 2.582230213053794,
      "grad_norm": 2.421875,
      "learning_rate": 7.736551477059434e-06,
      "loss": 0.7777,
      "step": 736780
    },
    {
      "epoch": 2.58226526056069,
      "grad_norm": 3.234375,
      "learning_rate": 7.73590244839573e-06,
      "loss": 0.8392,
      "step": 736790
    },
    {
      "epoch": 2.5823003080675857,
      "grad_norm": 2.703125,
      "learning_rate": 7.735253419732029e-06,
      "loss": 0.7817,
      "step": 736800
    },
    {
      "epoch": 2.5823353555744815,
      "grad_norm": 2.734375,
      "learning_rate": 7.734604391068327e-06,
      "loss": 0.8303,
      "step": 736810
    },
    {
      "epoch": 2.5823704030813768,
      "grad_norm": 2.640625,
      "learning_rate": 7.733955362404625e-06,
      "loss": 0.8538,
      "step": 736820
    },
    {
      "epoch": 2.5824054505882725,
      "grad_norm": 2.78125,
      "learning_rate": 7.733306333740924e-06,
      "loss": 0.6928,
      "step": 736830
    },
    {
      "epoch": 2.582440498095168,
      "grad_norm": 2.953125,
      "learning_rate": 7.732657305077222e-06,
      "loss": 0.8558,
      "step": 736840
    },
    {
      "epoch": 2.5824755456020636,
      "grad_norm": 3.1875,
      "learning_rate": 7.73200827641352e-06,
      "loss": 0.93,
      "step": 736850
    },
    {
      "epoch": 2.5825105931089594,
      "grad_norm": 3.34375,
      "learning_rate": 7.731359247749818e-06,
      "loss": 0.8445,
      "step": 736860
    },
    {
      "epoch": 2.5825456406158547,
      "grad_norm": 2.78125,
      "learning_rate": 7.730710219086116e-06,
      "loss": 0.8464,
      "step": 736870
    },
    {
      "epoch": 2.5825806881227504,
      "grad_norm": 2.84375,
      "learning_rate": 7.730061190422413e-06,
      "loss": 0.8217,
      "step": 736880
    },
    {
      "epoch": 2.5826157356296457,
      "grad_norm": 2.6875,
      "learning_rate": 7.729412161758712e-06,
      "loss": 0.8525,
      "step": 736890
    },
    {
      "epoch": 2.5826507831365415,
      "grad_norm": 2.890625,
      "learning_rate": 7.72876313309501e-06,
      "loss": 0.8088,
      "step": 736900
    },
    {
      "epoch": 2.5826858306434373,
      "grad_norm": 2.703125,
      "learning_rate": 7.728114104431308e-06,
      "loss": 0.7581,
      "step": 736910
    },
    {
      "epoch": 2.582720878150333,
      "grad_norm": 2.875,
      "learning_rate": 7.727465075767606e-06,
      "loss": 0.8482,
      "step": 736920
    },
    {
      "epoch": 2.5827559256572283,
      "grad_norm": 3.078125,
      "learning_rate": 7.726816047103904e-06,
      "loss": 0.8085,
      "step": 736930
    },
    {
      "epoch": 2.582790973164124,
      "grad_norm": 2.984375,
      "learning_rate": 7.726167018440202e-06,
      "loss": 0.7879,
      "step": 736940
    },
    {
      "epoch": 2.5828260206710194,
      "grad_norm": 3.0,
      "learning_rate": 7.7255179897765e-06,
      "loss": 0.8092,
      "step": 736950
    },
    {
      "epoch": 2.582861068177915,
      "grad_norm": 3.25,
      "learning_rate": 7.7248689611128e-06,
      "loss": 0.8175,
      "step": 736960
    },
    {
      "epoch": 2.582896115684811,
      "grad_norm": 3.265625,
      "learning_rate": 7.724219932449098e-06,
      "loss": 0.8935,
      "step": 736970
    },
    {
      "epoch": 2.5829311631917062,
      "grad_norm": 3.28125,
      "learning_rate": 7.723570903785394e-06,
      "loss": 0.8353,
      "step": 736980
    },
    {
      "epoch": 2.582966210698602,
      "grad_norm": 2.640625,
      "learning_rate": 7.722921875121692e-06,
      "loss": 0.8404,
      "step": 736990
    },
    {
      "epoch": 2.5830012582054973,
      "grad_norm": 2.90625,
      "learning_rate": 7.72227284645799e-06,
      "loss": 0.7682,
      "step": 737000
    },
    {
      "epoch": 2.583036305712393,
      "grad_norm": 2.71875,
      "learning_rate": 7.72162381779429e-06,
      "loss": 0.7255,
      "step": 737010
    },
    {
      "epoch": 2.583071353219289,
      "grad_norm": 2.875,
      "learning_rate": 7.720974789130588e-06,
      "loss": 0.7656,
      "step": 737020
    },
    {
      "epoch": 2.5831064007261846,
      "grad_norm": 2.53125,
      "learning_rate": 7.720325760466886e-06,
      "loss": 0.6359,
      "step": 737030
    },
    {
      "epoch": 2.58314144823308,
      "grad_norm": 3.234375,
      "learning_rate": 7.719676731803184e-06,
      "loss": 0.8795,
      "step": 737040
    },
    {
      "epoch": 2.5831764957399757,
      "grad_norm": 2.796875,
      "learning_rate": 7.719027703139482e-06,
      "loss": 0.8191,
      "step": 737050
    },
    {
      "epoch": 2.583211543246871,
      "grad_norm": 2.96875,
      "learning_rate": 7.71837867447578e-06,
      "loss": 0.7978,
      "step": 737060
    },
    {
      "epoch": 2.5832465907537667,
      "grad_norm": 3.15625,
      "learning_rate": 7.717729645812078e-06,
      "loss": 0.7426,
      "step": 737070
    },
    {
      "epoch": 2.5832816382606625,
      "grad_norm": 3.0625,
      "learning_rate": 7.717080617148376e-06,
      "loss": 0.7696,
      "step": 737080
    },
    {
      "epoch": 2.583316685767558,
      "grad_norm": 3.171875,
      "learning_rate": 7.716431588484674e-06,
      "loss": 0.8327,
      "step": 737090
    },
    {
      "epoch": 2.5833517332744536,
      "grad_norm": 2.703125,
      "learning_rate": 7.715782559820972e-06,
      "loss": 0.8523,
      "step": 737100
    },
    {
      "epoch": 2.583386780781349,
      "grad_norm": 2.6875,
      "learning_rate": 7.71513353115727e-06,
      "loss": 0.8114,
      "step": 737110
    },
    {
      "epoch": 2.5834218282882446,
      "grad_norm": 2.84375,
      "learning_rate": 7.714484502493568e-06,
      "loss": 0.7478,
      "step": 737120
    },
    {
      "epoch": 2.5834568757951404,
      "grad_norm": 2.796875,
      "learning_rate": 7.713835473829866e-06,
      "loss": 0.782,
      "step": 737130
    },
    {
      "epoch": 2.583491923302036,
      "grad_norm": 3.078125,
      "learning_rate": 7.713186445166166e-06,
      "loss": 0.8111,
      "step": 737140
    },
    {
      "epoch": 2.5835269708089315,
      "grad_norm": 2.796875,
      "learning_rate": 7.712537416502464e-06,
      "loss": 0.8442,
      "step": 737150
    },
    {
      "epoch": 2.583562018315827,
      "grad_norm": 2.90625,
      "learning_rate": 7.711888387838762e-06,
      "loss": 0.7924,
      "step": 737160
    },
    {
      "epoch": 2.5835970658227225,
      "grad_norm": 2.8125,
      "learning_rate": 7.711239359175058e-06,
      "loss": 0.9126,
      "step": 737170
    },
    {
      "epoch": 2.5836321133296183,
      "grad_norm": 2.890625,
      "learning_rate": 7.710590330511356e-06,
      "loss": 0.839,
      "step": 737180
    },
    {
      "epoch": 2.583667160836514,
      "grad_norm": 3.328125,
      "learning_rate": 7.709941301847654e-06,
      "loss": 0.8692,
      "step": 737190
    },
    {
      "epoch": 2.5837022083434094,
      "grad_norm": 2.625,
      "learning_rate": 7.709292273183954e-06,
      "loss": 0.813,
      "step": 737200
    },
    {
      "epoch": 2.583737255850305,
      "grad_norm": 3.171875,
      "learning_rate": 7.708643244520252e-06,
      "loss": 0.8502,
      "step": 737210
    },
    {
      "epoch": 2.5837723033572004,
      "grad_norm": 2.78125,
      "learning_rate": 7.70799421585655e-06,
      "loss": 0.7661,
      "step": 737220
    },
    {
      "epoch": 2.583807350864096,
      "grad_norm": 2.578125,
      "learning_rate": 7.707345187192848e-06,
      "loss": 0.7872,
      "step": 737230
    },
    {
      "epoch": 2.583842398370992,
      "grad_norm": 2.71875,
      "learning_rate": 7.706696158529146e-06,
      "loss": 0.6924,
      "step": 737240
    },
    {
      "epoch": 2.5838774458778877,
      "grad_norm": 2.5625,
      "learning_rate": 7.706047129865444e-06,
      "loss": 0.7769,
      "step": 737250
    },
    {
      "epoch": 2.583912493384783,
      "grad_norm": 3.0625,
      "learning_rate": 7.705398101201742e-06,
      "loss": 0.8202,
      "step": 737260
    },
    {
      "epoch": 2.5839475408916788,
      "grad_norm": 3.703125,
      "learning_rate": 7.70474907253804e-06,
      "loss": 0.8485,
      "step": 737270
    },
    {
      "epoch": 2.583982588398574,
      "grad_norm": 2.9375,
      "learning_rate": 7.704100043874338e-06,
      "loss": 0.8456,
      "step": 737280
    },
    {
      "epoch": 2.58401763590547,
      "grad_norm": 2.90625,
      "learning_rate": 7.703451015210636e-06,
      "loss": 0.8153,
      "step": 737290
    },
    {
      "epoch": 2.5840526834123656,
      "grad_norm": 3.0,
      "learning_rate": 7.702801986546934e-06,
      "loss": 0.7813,
      "step": 737300
    },
    {
      "epoch": 2.584087730919261,
      "grad_norm": 2.921875,
      "learning_rate": 7.702152957883232e-06,
      "loss": 0.7565,
      "step": 737310
    },
    {
      "epoch": 2.5841227784261567,
      "grad_norm": 3.1875,
      "learning_rate": 7.701503929219531e-06,
      "loss": 0.7743,
      "step": 737320
    },
    {
      "epoch": 2.584157825933052,
      "grad_norm": 3.078125,
      "learning_rate": 7.70085490055583e-06,
      "loss": 0.7343,
      "step": 737330
    },
    {
      "epoch": 2.5841928734399477,
      "grad_norm": 2.140625,
      "learning_rate": 7.700205871892127e-06,
      "loss": 0.7448,
      "step": 737340
    },
    {
      "epoch": 2.5842279209468435,
      "grad_norm": 2.875,
      "learning_rate": 7.699556843228424e-06,
      "loss": 0.7604,
      "step": 737350
    },
    {
      "epoch": 2.5842629684537393,
      "grad_norm": 3.125,
      "learning_rate": 7.698907814564722e-06,
      "loss": 0.7909,
      "step": 737360
    },
    {
      "epoch": 2.5842980159606346,
      "grad_norm": 2.625,
      "learning_rate": 7.69825878590102e-06,
      "loss": 0.8165,
      "step": 737370
    },
    {
      "epoch": 2.5843330634675303,
      "grad_norm": 2.75,
      "learning_rate": 7.69760975723732e-06,
      "loss": 0.6888,
      "step": 737380
    },
    {
      "epoch": 2.5843681109744256,
      "grad_norm": 2.46875,
      "learning_rate": 7.696960728573617e-06,
      "loss": 0.8257,
      "step": 737390
    },
    {
      "epoch": 2.5844031584813214,
      "grad_norm": 3.390625,
      "learning_rate": 7.696311699909915e-06,
      "loss": 0.7045,
      "step": 737400
    },
    {
      "epoch": 2.584438205988217,
      "grad_norm": 2.96875,
      "learning_rate": 7.695662671246213e-06,
      "loss": 0.7973,
      "step": 737410
    },
    {
      "epoch": 2.5844732534951125,
      "grad_norm": 2.671875,
      "learning_rate": 7.695013642582511e-06,
      "loss": 0.7825,
      "step": 737420
    },
    {
      "epoch": 2.5845083010020082,
      "grad_norm": 2.8125,
      "learning_rate": 7.69436461391881e-06,
      "loss": 0.8713,
      "step": 737430
    },
    {
      "epoch": 2.5845433485089035,
      "grad_norm": 2.96875,
      "learning_rate": 7.693715585255107e-06,
      "loss": 0.8177,
      "step": 737440
    },
    {
      "epoch": 2.5845783960157993,
      "grad_norm": 2.671875,
      "learning_rate": 7.693066556591405e-06,
      "loss": 0.8038,
      "step": 737450
    },
    {
      "epoch": 2.584613443522695,
      "grad_norm": 3.203125,
      "learning_rate": 7.692417527927703e-06,
      "loss": 0.8147,
      "step": 737460
    },
    {
      "epoch": 2.584648491029591,
      "grad_norm": 3.125,
      "learning_rate": 7.691768499264001e-06,
      "loss": 0.7713,
      "step": 737470
    },
    {
      "epoch": 2.584683538536486,
      "grad_norm": 2.578125,
      "learning_rate": 7.6911194706003e-06,
      "loss": 0.8644,
      "step": 737480
    },
    {
      "epoch": 2.584718586043382,
      "grad_norm": 2.828125,
      "learning_rate": 7.690470441936597e-06,
      "loss": 0.7867,
      "step": 737490
    },
    {
      "epoch": 2.584753633550277,
      "grad_norm": 2.703125,
      "learning_rate": 7.689821413272895e-06,
      "loss": 0.7537,
      "step": 737500
    },
    {
      "epoch": 2.584788681057173,
      "grad_norm": 2.890625,
      "learning_rate": 7.689172384609195e-06,
      "loss": 0.8363,
      "step": 737510
    },
    {
      "epoch": 2.5848237285640687,
      "grad_norm": 3.03125,
      "learning_rate": 7.688523355945493e-06,
      "loss": 0.8368,
      "step": 737520
    },
    {
      "epoch": 2.584858776070964,
      "grad_norm": 2.921875,
      "learning_rate": 7.687874327281791e-06,
      "loss": 0.8511,
      "step": 737530
    },
    {
      "epoch": 2.58489382357786,
      "grad_norm": 2.875,
      "learning_rate": 7.687225298618087e-06,
      "loss": 0.9015,
      "step": 737540
    },
    {
      "epoch": 2.584928871084755,
      "grad_norm": 2.515625,
      "learning_rate": 7.686576269954385e-06,
      "loss": 0.7841,
      "step": 737550
    },
    {
      "epoch": 2.584963918591651,
      "grad_norm": 2.78125,
      "learning_rate": 7.685927241290685e-06,
      "loss": 0.8254,
      "step": 737560
    },
    {
      "epoch": 2.5849989660985466,
      "grad_norm": 2.484375,
      "learning_rate": 7.685278212626983e-06,
      "loss": 0.816,
      "step": 737570
    },
    {
      "epoch": 2.5850340136054424,
      "grad_norm": 2.671875,
      "learning_rate": 7.684629183963281e-06,
      "loss": 0.7756,
      "step": 737580
    },
    {
      "epoch": 2.5850690611123377,
      "grad_norm": 3.03125,
      "learning_rate": 7.68398015529958e-06,
      "loss": 0.8065,
      "step": 737590
    },
    {
      "epoch": 2.5851041086192335,
      "grad_norm": 3.046875,
      "learning_rate": 7.683331126635877e-06,
      "loss": 0.8603,
      "step": 737600
    },
    {
      "epoch": 2.5851391561261288,
      "grad_norm": 2.90625,
      "learning_rate": 7.682682097972175e-06,
      "loss": 0.8089,
      "step": 737610
    },
    {
      "epoch": 2.5851742036330245,
      "grad_norm": 2.453125,
      "learning_rate": 7.682033069308473e-06,
      "loss": 0.7767,
      "step": 737620
    },
    {
      "epoch": 2.5852092511399203,
      "grad_norm": 3.34375,
      "learning_rate": 7.681384040644773e-06,
      "loss": 0.8015,
      "step": 737630
    },
    {
      "epoch": 2.5852442986468156,
      "grad_norm": 3.140625,
      "learning_rate": 7.68073501198107e-06,
      "loss": 0.8423,
      "step": 737640
    },
    {
      "epoch": 2.5852793461537114,
      "grad_norm": 3.421875,
      "learning_rate": 7.680085983317367e-06,
      "loss": 0.7851,
      "step": 737650
    },
    {
      "epoch": 2.585314393660607,
      "grad_norm": 2.5625,
      "learning_rate": 7.679436954653665e-06,
      "loss": 0.8682,
      "step": 737660
    },
    {
      "epoch": 2.5853494411675024,
      "grad_norm": 3.359375,
      "learning_rate": 7.678787925989963e-06,
      "loss": 0.8099,
      "step": 737670
    },
    {
      "epoch": 2.585384488674398,
      "grad_norm": 3.09375,
      "learning_rate": 7.678138897326261e-06,
      "loss": 0.7557,
      "step": 737680
    },
    {
      "epoch": 2.585419536181294,
      "grad_norm": 3.015625,
      "learning_rate": 7.677489868662561e-06,
      "loss": 0.8087,
      "step": 737690
    },
    {
      "epoch": 2.5854545836881893,
      "grad_norm": 3.4375,
      "learning_rate": 7.676840839998859e-06,
      "loss": 0.8841,
      "step": 737700
    },
    {
      "epoch": 2.585489631195085,
      "grad_norm": 2.5625,
      "learning_rate": 7.676191811335157e-06,
      "loss": 0.8031,
      "step": 737710
    },
    {
      "epoch": 2.5855246787019803,
      "grad_norm": 2.796875,
      "learning_rate": 7.675542782671455e-06,
      "loss": 0.7726,
      "step": 737720
    },
    {
      "epoch": 2.585559726208876,
      "grad_norm": 2.8125,
      "learning_rate": 7.674893754007751e-06,
      "loss": 0.7889,
      "step": 737730
    },
    {
      "epoch": 2.585594773715772,
      "grad_norm": 3.234375,
      "learning_rate": 7.67424472534405e-06,
      "loss": 0.8507,
      "step": 737740
    },
    {
      "epoch": 2.5856298212226676,
      "grad_norm": 3.15625,
      "learning_rate": 7.673595696680349e-06,
      "loss": 0.833,
      "step": 737750
    },
    {
      "epoch": 2.585664868729563,
      "grad_norm": 3.078125,
      "learning_rate": 7.672946668016647e-06,
      "loss": 0.8014,
      "step": 737760
    },
    {
      "epoch": 2.5856999162364587,
      "grad_norm": 2.578125,
      "learning_rate": 7.672297639352945e-06,
      "loss": 0.7413,
      "step": 737770
    },
    {
      "epoch": 2.585734963743354,
      "grad_norm": 2.875,
      "learning_rate": 7.671648610689243e-06,
      "loss": 0.8245,
      "step": 737780
    },
    {
      "epoch": 2.5857700112502497,
      "grad_norm": 3.015625,
      "learning_rate": 7.670999582025541e-06,
      "loss": 0.8056,
      "step": 737790
    },
    {
      "epoch": 2.5858050587571455,
      "grad_norm": 2.734375,
      "learning_rate": 7.670350553361839e-06,
      "loss": 0.8066,
      "step": 737800
    },
    {
      "epoch": 2.585840106264041,
      "grad_norm": 2.625,
      "learning_rate": 7.669701524698137e-06,
      "loss": 0.7557,
      "step": 737810
    },
    {
      "epoch": 2.5858751537709366,
      "grad_norm": 3.25,
      "learning_rate": 7.669052496034435e-06,
      "loss": 0.8191,
      "step": 737820
    },
    {
      "epoch": 2.585910201277832,
      "grad_norm": 3.03125,
      "learning_rate": 7.668403467370733e-06,
      "loss": 0.8478,
      "step": 737830
    },
    {
      "epoch": 2.5859452487847276,
      "grad_norm": 3.125,
      "learning_rate": 7.667754438707031e-06,
      "loss": 0.8555,
      "step": 737840
    },
    {
      "epoch": 2.5859802962916234,
      "grad_norm": 3.609375,
      "learning_rate": 7.667105410043329e-06,
      "loss": 0.8361,
      "step": 737850
    },
    {
      "epoch": 2.586015343798519,
      "grad_norm": 2.96875,
      "learning_rate": 7.666456381379627e-06,
      "loss": 0.7238,
      "step": 737860
    },
    {
      "epoch": 2.5860503913054145,
      "grad_norm": 3.09375,
      "learning_rate": 7.665807352715927e-06,
      "loss": 0.9324,
      "step": 737870
    },
    {
      "epoch": 2.5860854388123102,
      "grad_norm": 3.3125,
      "learning_rate": 7.665158324052225e-06,
      "loss": 0.7288,
      "step": 737880
    },
    {
      "epoch": 2.5861204863192055,
      "grad_norm": 3.25,
      "learning_rate": 7.664509295388523e-06,
      "loss": 0.855,
      "step": 737890
    },
    {
      "epoch": 2.5861555338261013,
      "grad_norm": 3.171875,
      "learning_rate": 7.66386026672482e-06,
      "loss": 0.8664,
      "step": 737900
    },
    {
      "epoch": 2.586190581332997,
      "grad_norm": 2.6875,
      "learning_rate": 7.663211238061119e-06,
      "loss": 0.7684,
      "step": 737910
    },
    {
      "epoch": 2.5862256288398924,
      "grad_norm": 3.25,
      "learning_rate": 7.662562209397415e-06,
      "loss": 0.7877,
      "step": 737920
    },
    {
      "epoch": 2.586260676346788,
      "grad_norm": 2.78125,
      "learning_rate": 7.661913180733715e-06,
      "loss": 0.9121,
      "step": 737930
    },
    {
      "epoch": 2.5862957238536834,
      "grad_norm": 2.671875,
      "learning_rate": 7.661264152070013e-06,
      "loss": 0.7526,
      "step": 737940
    },
    {
      "epoch": 2.586330771360579,
      "grad_norm": 3.125,
      "learning_rate": 7.66061512340631e-06,
      "loss": 0.7588,
      "step": 737950
    },
    {
      "epoch": 2.586365818867475,
      "grad_norm": 2.859375,
      "learning_rate": 7.659966094742609e-06,
      "loss": 0.7689,
      "step": 737960
    },
    {
      "epoch": 2.5864008663743707,
      "grad_norm": 2.65625,
      "learning_rate": 7.659317066078907e-06,
      "loss": 0.8657,
      "step": 737970
    },
    {
      "epoch": 2.586435913881266,
      "grad_norm": 2.890625,
      "learning_rate": 7.658668037415205e-06,
      "loss": 0.7737,
      "step": 737980
    },
    {
      "epoch": 2.586470961388162,
      "grad_norm": 3.015625,
      "learning_rate": 7.658019008751503e-06,
      "loss": 0.8457,
      "step": 737990
    },
    {
      "epoch": 2.586506008895057,
      "grad_norm": 3.03125,
      "learning_rate": 7.657369980087802e-06,
      "loss": 0.7672,
      "step": 738000
    },
    {
      "epoch": 2.586541056401953,
      "grad_norm": 2.84375,
      "learning_rate": 7.656720951424099e-06,
      "loss": 0.8174,
      "step": 738010
    },
    {
      "epoch": 2.5865761039088486,
      "grad_norm": 3.015625,
      "learning_rate": 7.656071922760397e-06,
      "loss": 0.776,
      "step": 738020
    },
    {
      "epoch": 2.586611151415744,
      "grad_norm": 2.8125,
      "learning_rate": 7.655422894096695e-06,
      "loss": 0.7329,
      "step": 738030
    },
    {
      "epoch": 2.5866461989226397,
      "grad_norm": 2.875,
      "learning_rate": 7.654773865432993e-06,
      "loss": 0.8192,
      "step": 738040
    },
    {
      "epoch": 2.586681246429535,
      "grad_norm": 3.25,
      "learning_rate": 7.65412483676929e-06,
      "loss": 0.8483,
      "step": 738050
    },
    {
      "epoch": 2.5867162939364308,
      "grad_norm": 2.796875,
      "learning_rate": 7.65347580810559e-06,
      "loss": 0.9028,
      "step": 738060
    },
    {
      "epoch": 2.5867513414433265,
      "grad_norm": 3.1875,
      "learning_rate": 7.652826779441888e-06,
      "loss": 0.8349,
      "step": 738070
    },
    {
      "epoch": 2.5867863889502223,
      "grad_norm": 2.6875,
      "learning_rate": 7.652177750778186e-06,
      "loss": 0.8328,
      "step": 738080
    },
    {
      "epoch": 2.5868214364571176,
      "grad_norm": 3.3125,
      "learning_rate": 7.651528722114484e-06,
      "loss": 0.8283,
      "step": 738090
    },
    {
      "epoch": 2.5868564839640134,
      "grad_norm": 2.875,
      "learning_rate": 7.650879693450782e-06,
      "loss": 0.7273,
      "step": 738100
    },
    {
      "epoch": 2.5868915314709087,
      "grad_norm": 3.0625,
      "learning_rate": 7.65023066478708e-06,
      "loss": 0.8993,
      "step": 738110
    },
    {
      "epoch": 2.5869265789778044,
      "grad_norm": 3.125,
      "learning_rate": 7.649581636123378e-06,
      "loss": 0.8667,
      "step": 738120
    },
    {
      "epoch": 2.5869616264847,
      "grad_norm": 2.609375,
      "learning_rate": 7.648932607459676e-06,
      "loss": 0.7962,
      "step": 738130
    },
    {
      "epoch": 2.5869966739915955,
      "grad_norm": 2.53125,
      "learning_rate": 7.648283578795974e-06,
      "loss": 0.8338,
      "step": 738140
    },
    {
      "epoch": 2.5870317214984913,
      "grad_norm": 3.453125,
      "learning_rate": 7.647634550132272e-06,
      "loss": 0.8072,
      "step": 738150
    },
    {
      "epoch": 2.5870667690053866,
      "grad_norm": 2.921875,
      "learning_rate": 7.64698552146857e-06,
      "loss": 0.8287,
      "step": 738160
    },
    {
      "epoch": 2.5871018165122823,
      "grad_norm": 3.109375,
      "learning_rate": 7.646336492804868e-06,
      "loss": 0.8164,
      "step": 738170
    },
    {
      "epoch": 2.587136864019178,
      "grad_norm": 2.890625,
      "learning_rate": 7.645687464141168e-06,
      "loss": 0.9166,
      "step": 738180
    },
    {
      "epoch": 2.587171911526074,
      "grad_norm": 2.78125,
      "learning_rate": 7.645038435477466e-06,
      "loss": 0.7714,
      "step": 738190
    },
    {
      "epoch": 2.587206959032969,
      "grad_norm": 3.09375,
      "learning_rate": 7.644389406813762e-06,
      "loss": 0.8002,
      "step": 738200
    },
    {
      "epoch": 2.587242006539865,
      "grad_norm": 2.984375,
      "learning_rate": 7.64374037815006e-06,
      "loss": 0.8254,
      "step": 738210
    },
    {
      "epoch": 2.5872770540467602,
      "grad_norm": 2.640625,
      "learning_rate": 7.643091349486358e-06,
      "loss": 0.8887,
      "step": 738220
    },
    {
      "epoch": 2.587312101553656,
      "grad_norm": 3.125,
      "learning_rate": 7.642442320822656e-06,
      "loss": 0.8437,
      "step": 738230
    },
    {
      "epoch": 2.5873471490605517,
      "grad_norm": 2.65625,
      "learning_rate": 7.641793292158956e-06,
      "loss": 0.7522,
      "step": 738240
    },
    {
      "epoch": 2.587382196567447,
      "grad_norm": 2.75,
      "learning_rate": 7.641144263495254e-06,
      "loss": 0.7828,
      "step": 738250
    },
    {
      "epoch": 2.587417244074343,
      "grad_norm": 3.015625,
      "learning_rate": 7.640495234831552e-06,
      "loss": 0.8212,
      "step": 738260
    },
    {
      "epoch": 2.587452291581238,
      "grad_norm": 3.03125,
      "learning_rate": 7.63984620616785e-06,
      "loss": 0.88,
      "step": 738270
    },
    {
      "epoch": 2.587487339088134,
      "grad_norm": 3.046875,
      "learning_rate": 7.639197177504148e-06,
      "loss": 0.7622,
      "step": 738280
    },
    {
      "epoch": 2.5875223865950296,
      "grad_norm": 3.390625,
      "learning_rate": 7.638548148840444e-06,
      "loss": 0.8267,
      "step": 738290
    },
    {
      "epoch": 2.5875574341019254,
      "grad_norm": 2.90625,
      "learning_rate": 7.637899120176744e-06,
      "loss": 0.7989,
      "step": 738300
    },
    {
      "epoch": 2.5875924816088207,
      "grad_norm": 2.921875,
      "learning_rate": 7.637250091513042e-06,
      "loss": 0.8139,
      "step": 738310
    },
    {
      "epoch": 2.5876275291157165,
      "grad_norm": 2.671875,
      "learning_rate": 7.63660106284934e-06,
      "loss": 0.8153,
      "step": 738320
    },
    {
      "epoch": 2.587662576622612,
      "grad_norm": 2.828125,
      "learning_rate": 7.635952034185638e-06,
      "loss": 0.8138,
      "step": 738330
    },
    {
      "epoch": 2.5876976241295075,
      "grad_norm": 2.6875,
      "learning_rate": 7.635303005521936e-06,
      "loss": 0.7547,
      "step": 738340
    },
    {
      "epoch": 2.5877326716364033,
      "grad_norm": 2.53125,
      "learning_rate": 7.634653976858234e-06,
      "loss": 0.8069,
      "step": 738350
    },
    {
      "epoch": 2.5877677191432986,
      "grad_norm": 3.21875,
      "learning_rate": 7.634004948194532e-06,
      "loss": 0.82,
      "step": 738360
    },
    {
      "epoch": 2.5878027666501944,
      "grad_norm": 2.859375,
      "learning_rate": 7.633355919530832e-06,
      "loss": 0.7827,
      "step": 738370
    },
    {
      "epoch": 2.5878378141570897,
      "grad_norm": 2.96875,
      "learning_rate": 7.63270689086713e-06,
      "loss": 0.7851,
      "step": 738380
    },
    {
      "epoch": 2.5878728616639854,
      "grad_norm": 2.796875,
      "learning_rate": 7.632057862203426e-06,
      "loss": 0.7786,
      "step": 738390
    },
    {
      "epoch": 2.587907909170881,
      "grad_norm": 3.1875,
      "learning_rate": 7.631408833539724e-06,
      "loss": 0.8535,
      "step": 738400
    },
    {
      "epoch": 2.587942956677777,
      "grad_norm": 2.921875,
      "learning_rate": 7.630759804876022e-06,
      "loss": 0.8703,
      "step": 738410
    },
    {
      "epoch": 2.5879780041846723,
      "grad_norm": 3.515625,
      "learning_rate": 7.630110776212322e-06,
      "loss": 0.82,
      "step": 738420
    },
    {
      "epoch": 2.588013051691568,
      "grad_norm": 2.9375,
      "learning_rate": 7.62946174754862e-06,
      "loss": 0.8114,
      "step": 738430
    },
    {
      "epoch": 2.5880480991984633,
      "grad_norm": 2.703125,
      "learning_rate": 7.628812718884918e-06,
      "loss": 0.8521,
      "step": 738440
    },
    {
      "epoch": 2.588083146705359,
      "grad_norm": 2.640625,
      "learning_rate": 7.628163690221216e-06,
      "loss": 0.8342,
      "step": 738450
    },
    {
      "epoch": 2.588118194212255,
      "grad_norm": 2.75,
      "learning_rate": 7.627514661557514e-06,
      "loss": 0.8251,
      "step": 738460
    },
    {
      "epoch": 2.58815324171915,
      "grad_norm": 3.0,
      "learning_rate": 7.626865632893812e-06,
      "loss": 0.8058,
      "step": 738470
    },
    {
      "epoch": 2.588188289226046,
      "grad_norm": 3.25,
      "learning_rate": 7.626216604230109e-06,
      "loss": 0.8084,
      "step": 738480
    },
    {
      "epoch": 2.5882233367329412,
      "grad_norm": 2.671875,
      "learning_rate": 7.625567575566407e-06,
      "loss": 0.7572,
      "step": 738490
    },
    {
      "epoch": 2.588258384239837,
      "grad_norm": 2.609375,
      "learning_rate": 7.624918546902706e-06,
      "loss": 0.7193,
      "step": 738500
    },
    {
      "epoch": 2.5882934317467328,
      "grad_norm": 2.515625,
      "learning_rate": 7.624269518239004e-06,
      "loss": 0.7845,
      "step": 738510
    },
    {
      "epoch": 2.5883284792536285,
      "grad_norm": 3.3125,
      "learning_rate": 7.623620489575302e-06,
      "loss": 0.8671,
      "step": 738520
    },
    {
      "epoch": 2.588363526760524,
      "grad_norm": 3.125,
      "learning_rate": 7.6229714609116005e-06,
      "loss": 0.8289,
      "step": 738530
    },
    {
      "epoch": 2.5883985742674196,
      "grad_norm": 3.28125,
      "learning_rate": 7.6223224322478985e-06,
      "loss": 0.842,
      "step": 738540
    },
    {
      "epoch": 2.588433621774315,
      "grad_norm": 3.140625,
      "learning_rate": 7.6216734035841965e-06,
      "loss": 0.8297,
      "step": 738550
    },
    {
      "epoch": 2.5884686692812107,
      "grad_norm": 3.140625,
      "learning_rate": 7.6210243749204945e-06,
      "loss": 0.7661,
      "step": 738560
    },
    {
      "epoch": 2.5885037167881064,
      "grad_norm": 2.484375,
      "learning_rate": 7.620375346256793e-06,
      "loss": 0.8541,
      "step": 738570
    },
    {
      "epoch": 2.5885387642950017,
      "grad_norm": 2.75,
      "learning_rate": 7.61972631759309e-06,
      "loss": 0.7792,
      "step": 738580
    },
    {
      "epoch": 2.5885738118018975,
      "grad_norm": 3.3125,
      "learning_rate": 7.6190772889293885e-06,
      "loss": 0.7841,
      "step": 738590
    },
    {
      "epoch": 2.588608859308793,
      "grad_norm": 2.65625,
      "learning_rate": 7.6184282602656865e-06,
      "loss": 0.773,
      "step": 738600
    },
    {
      "epoch": 2.5886439068156886,
      "grad_norm": 3.078125,
      "learning_rate": 7.6177792316019845e-06,
      "loss": 0.7113,
      "step": 738610
    },
    {
      "epoch": 2.5886789543225843,
      "grad_norm": 3.625,
      "learning_rate": 7.617130202938283e-06,
      "loss": 0.7525,
      "step": 738620
    },
    {
      "epoch": 2.58871400182948,
      "grad_norm": 2.765625,
      "learning_rate": 7.616481174274581e-06,
      "loss": 0.7879,
      "step": 738630
    },
    {
      "epoch": 2.5887490493363754,
      "grad_norm": 3.3125,
      "learning_rate": 7.615832145610879e-06,
      "loss": 0.8449,
      "step": 738640
    },
    {
      "epoch": 2.588784096843271,
      "grad_norm": 2.890625,
      "learning_rate": 7.615183116947177e-06,
      "loss": 0.7803,
      "step": 738650
    },
    {
      "epoch": 2.5888191443501665,
      "grad_norm": 2.84375,
      "learning_rate": 7.614534088283476e-06,
      "loss": 0.7809,
      "step": 738660
    },
    {
      "epoch": 2.5888541918570622,
      "grad_norm": 2.625,
      "learning_rate": 7.6138850596197725e-06,
      "loss": 0.7831,
      "step": 738670
    },
    {
      "epoch": 2.588889239363958,
      "grad_norm": 2.90625,
      "learning_rate": 7.613236030956071e-06,
      "loss": 0.7662,
      "step": 738680
    },
    {
      "epoch": 2.5889242868708533,
      "grad_norm": 2.671875,
      "learning_rate": 7.612587002292369e-06,
      "loss": 0.7412,
      "step": 738690
    },
    {
      "epoch": 2.588959334377749,
      "grad_norm": 2.421875,
      "learning_rate": 7.611937973628667e-06,
      "loss": 0.8188,
      "step": 738700
    },
    {
      "epoch": 2.5889943818846444,
      "grad_norm": 3.3125,
      "learning_rate": 7.611288944964965e-06,
      "loss": 0.9012,
      "step": 738710
    },
    {
      "epoch": 2.58902942939154,
      "grad_norm": 2.859375,
      "learning_rate": 7.610639916301264e-06,
      "loss": 0.779,
      "step": 738720
    },
    {
      "epoch": 2.589064476898436,
      "grad_norm": 2.765625,
      "learning_rate": 7.609990887637562e-06,
      "loss": 0.839,
      "step": 738730
    },
    {
      "epoch": 2.5890995244053316,
      "grad_norm": 2.703125,
      "learning_rate": 7.60934185897386e-06,
      "loss": 0.765,
      "step": 738740
    },
    {
      "epoch": 2.589134571912227,
      "grad_norm": 2.65625,
      "learning_rate": 7.608692830310159e-06,
      "loss": 0.7801,
      "step": 738750
    },
    {
      "epoch": 2.5891696194191227,
      "grad_norm": 3.796875,
      "learning_rate": 7.608043801646457e-06,
      "loss": 0.8792,
      "step": 738760
    },
    {
      "epoch": 2.589204666926018,
      "grad_norm": 2.828125,
      "learning_rate": 7.607394772982754e-06,
      "loss": 0.7879,
      "step": 738770
    },
    {
      "epoch": 2.589239714432914,
      "grad_norm": 2.6875,
      "learning_rate": 7.606745744319052e-06,
      "loss": 0.8171,
      "step": 738780
    },
    {
      "epoch": 2.5892747619398095,
      "grad_norm": 3.328125,
      "learning_rate": 7.60609671565535e-06,
      "loss": 0.8446,
      "step": 738790
    },
    {
      "epoch": 2.589309809446705,
      "grad_norm": 2.9375,
      "learning_rate": 7.605447686991648e-06,
      "loss": 0.754,
      "step": 738800
    },
    {
      "epoch": 2.5893448569536006,
      "grad_norm": 3.125,
      "learning_rate": 7.604798658327947e-06,
      "loss": 0.7859,
      "step": 738810
    },
    {
      "epoch": 2.589379904460496,
      "grad_norm": 2.734375,
      "learning_rate": 7.604149629664245e-06,
      "loss": 0.7366,
      "step": 738820
    },
    {
      "epoch": 2.5894149519673917,
      "grad_norm": 2.734375,
      "learning_rate": 7.603500601000543e-06,
      "loss": 0.845,
      "step": 738830
    },
    {
      "epoch": 2.5894499994742874,
      "grad_norm": 2.78125,
      "learning_rate": 7.602851572336842e-06,
      "loss": 0.7744,
      "step": 738840
    },
    {
      "epoch": 2.589485046981183,
      "grad_norm": 3.28125,
      "learning_rate": 7.60220254367314e-06,
      "loss": 0.84,
      "step": 738850
    },
    {
      "epoch": 2.5895200944880785,
      "grad_norm": 2.859375,
      "learning_rate": 7.601553515009437e-06,
      "loss": 0.8137,
      "step": 738860
    },
    {
      "epoch": 2.5895551419949743,
      "grad_norm": 3.046875,
      "learning_rate": 7.600904486345735e-06,
      "loss": 0.8096,
      "step": 738870
    },
    {
      "epoch": 2.5895901895018696,
      "grad_norm": 2.609375,
      "learning_rate": 7.600255457682033e-06,
      "loss": 0.7977,
      "step": 738880
    },
    {
      "epoch": 2.5896252370087653,
      "grad_norm": 3.03125,
      "learning_rate": 7.599606429018331e-06,
      "loss": 0.817,
      "step": 738890
    },
    {
      "epoch": 2.589660284515661,
      "grad_norm": 2.765625,
      "learning_rate": 7.59895740035463e-06,
      "loss": 0.7001,
      "step": 738900
    },
    {
      "epoch": 2.5896953320225564,
      "grad_norm": 3.0,
      "learning_rate": 7.598308371690928e-06,
      "loss": 0.8199,
      "step": 738910
    },
    {
      "epoch": 2.589730379529452,
      "grad_norm": 3.25,
      "learning_rate": 7.597659343027226e-06,
      "loss": 0.7585,
      "step": 738920
    },
    {
      "epoch": 2.589765427036348,
      "grad_norm": 3.140625,
      "learning_rate": 7.597010314363525e-06,
      "loss": 0.8729,
      "step": 738930
    },
    {
      "epoch": 2.5898004745432432,
      "grad_norm": 2.625,
      "learning_rate": 7.596361285699823e-06,
      "loss": 0.7595,
      "step": 738940
    },
    {
      "epoch": 2.589835522050139,
      "grad_norm": 2.96875,
      "learning_rate": 7.595712257036119e-06,
      "loss": 0.8007,
      "step": 738950
    },
    {
      "epoch": 2.5898705695570348,
      "grad_norm": 2.75,
      "learning_rate": 7.595063228372418e-06,
      "loss": 0.7715,
      "step": 738960
    },
    {
      "epoch": 2.58990561706393,
      "grad_norm": 3.890625,
      "learning_rate": 7.594414199708716e-06,
      "loss": 0.9161,
      "step": 738970
    },
    {
      "epoch": 2.589940664570826,
      "grad_norm": 2.953125,
      "learning_rate": 7.593765171045014e-06,
      "loss": 0.8003,
      "step": 738980
    },
    {
      "epoch": 2.589975712077721,
      "grad_norm": 2.546875,
      "learning_rate": 7.593116142381313e-06,
      "loss": 0.7559,
      "step": 738990
    },
    {
      "epoch": 2.590010759584617,
      "grad_norm": 2.984375,
      "learning_rate": 7.592467113717611e-06,
      "loss": 0.8045,
      "step": 739000
    },
    {
      "epoch": 2.5900458070915127,
      "grad_norm": 3.34375,
      "learning_rate": 7.591818085053909e-06,
      "loss": 0.7749,
      "step": 739010
    },
    {
      "epoch": 2.590080854598408,
      "grad_norm": 2.765625,
      "learning_rate": 7.591169056390207e-06,
      "loss": 0.8163,
      "step": 739020
    },
    {
      "epoch": 2.5901159021053037,
      "grad_norm": 2.71875,
      "learning_rate": 7.5905200277265056e-06,
      "loss": 0.7714,
      "step": 739030
    },
    {
      "epoch": 2.5901509496121995,
      "grad_norm": 2.890625,
      "learning_rate": 7.5898709990628036e-06,
      "loss": 0.7199,
      "step": 739040
    },
    {
      "epoch": 2.590185997119095,
      "grad_norm": 3.28125,
      "learning_rate": 7.589221970399101e-06,
      "loss": 0.8223,
      "step": 739050
    },
    {
      "epoch": 2.5902210446259906,
      "grad_norm": 2.828125,
      "learning_rate": 7.588572941735399e-06,
      "loss": 0.7773,
      "step": 739060
    },
    {
      "epoch": 2.5902560921328863,
      "grad_norm": 2.859375,
      "learning_rate": 7.587923913071697e-06,
      "loss": 0.7952,
      "step": 739070
    },
    {
      "epoch": 2.5902911396397816,
      "grad_norm": 2.453125,
      "learning_rate": 7.5872748844079956e-06,
      "loss": 0.7694,
      "step": 739080
    },
    {
      "epoch": 2.5903261871466774,
      "grad_norm": 2.84375,
      "learning_rate": 7.5866258557442936e-06,
      "loss": 0.7685,
      "step": 739090
    },
    {
      "epoch": 2.5903612346535727,
      "grad_norm": 2.9375,
      "learning_rate": 7.5859768270805916e-06,
      "loss": 0.8296,
      "step": 739100
    },
    {
      "epoch": 2.5903962821604685,
      "grad_norm": 3.375,
      "learning_rate": 7.5853277984168896e-06,
      "loss": 0.8517,
      "step": 739110
    },
    {
      "epoch": 2.5904313296673642,
      "grad_norm": 2.5,
      "learning_rate": 7.584678769753188e-06,
      "loss": 0.8494,
      "step": 739120
    },
    {
      "epoch": 2.59046637717426,
      "grad_norm": 3.28125,
      "learning_rate": 7.584029741089486e-06,
      "loss": 0.8346,
      "step": 739130
    },
    {
      "epoch": 2.5905014246811553,
      "grad_norm": 3.15625,
      "learning_rate": 7.5833807124257836e-06,
      "loss": 0.8143,
      "step": 739140
    },
    {
      "epoch": 2.590536472188051,
      "grad_norm": 3.125,
      "learning_rate": 7.5827316837620816e-06,
      "loss": 0.7986,
      "step": 739150
    },
    {
      "epoch": 2.5905715196949464,
      "grad_norm": 3.21875,
      "learning_rate": 7.5820826550983796e-06,
      "loss": 0.8026,
      "step": 739160
    },
    {
      "epoch": 2.590606567201842,
      "grad_norm": 3.015625,
      "learning_rate": 7.581433626434678e-06,
      "loss": 0.8183,
      "step": 739170
    },
    {
      "epoch": 2.590641614708738,
      "grad_norm": 2.890625,
      "learning_rate": 7.580784597770976e-06,
      "loss": 0.8571,
      "step": 739180
    },
    {
      "epoch": 2.590676662215633,
      "grad_norm": 3.0625,
      "learning_rate": 7.580135569107274e-06,
      "loss": 0.8243,
      "step": 739190
    },
    {
      "epoch": 2.590711709722529,
      "grad_norm": 2.46875,
      "learning_rate": 7.579486540443572e-06,
      "loss": 0.7353,
      "step": 739200
    },
    {
      "epoch": 2.5907467572294243,
      "grad_norm": 3.375,
      "learning_rate": 7.578837511779871e-06,
      "loss": 0.7671,
      "step": 739210
    },
    {
      "epoch": 2.59078180473632,
      "grad_norm": 3.0,
      "learning_rate": 7.578188483116169e-06,
      "loss": 0.8778,
      "step": 739220
    },
    {
      "epoch": 2.590816852243216,
      "grad_norm": 3.125,
      "learning_rate": 7.577539454452467e-06,
      "loss": 0.893,
      "step": 739230
    },
    {
      "epoch": 2.5908518997501115,
      "grad_norm": 2.828125,
      "learning_rate": 7.576890425788764e-06,
      "loss": 0.7778,
      "step": 739240
    },
    {
      "epoch": 2.590886947257007,
      "grad_norm": 3.125,
      "learning_rate": 7.576241397125062e-06,
      "loss": 0.8281,
      "step": 739250
    },
    {
      "epoch": 2.5909219947639026,
      "grad_norm": 2.765625,
      "learning_rate": 7.57559236846136e-06,
      "loss": 0.8075,
      "step": 739260
    },
    {
      "epoch": 2.590957042270798,
      "grad_norm": 2.96875,
      "learning_rate": 7.574943339797659e-06,
      "loss": 0.7727,
      "step": 739270
    },
    {
      "epoch": 2.5909920897776937,
      "grad_norm": 3.0,
      "learning_rate": 7.574294311133957e-06,
      "loss": 0.8461,
      "step": 739280
    },
    {
      "epoch": 2.5910271372845894,
      "grad_norm": 3.265625,
      "learning_rate": 7.573645282470255e-06,
      "loss": 0.8542,
      "step": 739290
    },
    {
      "epoch": 2.5910621847914848,
      "grad_norm": 3.640625,
      "learning_rate": 7.572996253806554e-06,
      "loss": 0.8523,
      "step": 739300
    },
    {
      "epoch": 2.5910972322983805,
      "grad_norm": 3.359375,
      "learning_rate": 7.572347225142852e-06,
      "loss": 0.8263,
      "step": 739310
    },
    {
      "epoch": 2.591132279805276,
      "grad_norm": 2.96875,
      "learning_rate": 7.57169819647915e-06,
      "loss": 0.8849,
      "step": 739320
    },
    {
      "epoch": 2.5911673273121716,
      "grad_norm": 2.796875,
      "learning_rate": 7.571049167815447e-06,
      "loss": 0.83,
      "step": 739330
    },
    {
      "epoch": 2.5912023748190673,
      "grad_norm": 2.796875,
      "learning_rate": 7.570400139151745e-06,
      "loss": 0.7814,
      "step": 739340
    },
    {
      "epoch": 2.591237422325963,
      "grad_norm": 2.640625,
      "learning_rate": 7.569751110488043e-06,
      "loss": 0.7625,
      "step": 739350
    },
    {
      "epoch": 2.5912724698328584,
      "grad_norm": 2.890625,
      "learning_rate": 7.569102081824342e-06,
      "loss": 0.8121,
      "step": 739360
    },
    {
      "epoch": 2.591307517339754,
      "grad_norm": 3.015625,
      "learning_rate": 7.56845305316064e-06,
      "loss": 0.6763,
      "step": 739370
    },
    {
      "epoch": 2.5913425648466495,
      "grad_norm": 3.09375,
      "learning_rate": 7.567804024496938e-06,
      "loss": 0.8675,
      "step": 739380
    },
    {
      "epoch": 2.5913776123535452,
      "grad_norm": 2.703125,
      "learning_rate": 7.567154995833237e-06,
      "loss": 0.912,
      "step": 739390
    },
    {
      "epoch": 2.591412659860441,
      "grad_norm": 2.921875,
      "learning_rate": 7.566505967169535e-06,
      "loss": 0.8391,
      "step": 739400
    },
    {
      "epoch": 2.5914477073673363,
      "grad_norm": 2.921875,
      "learning_rate": 7.565856938505833e-06,
      "loss": 0.7848,
      "step": 739410
    },
    {
      "epoch": 2.591482754874232,
      "grad_norm": 3.03125,
      "learning_rate": 7.56520790984213e-06,
      "loss": 0.7749,
      "step": 739420
    },
    {
      "epoch": 2.5915178023811274,
      "grad_norm": 3.03125,
      "learning_rate": 7.564558881178428e-06,
      "loss": 0.7835,
      "step": 739430
    },
    {
      "epoch": 2.591552849888023,
      "grad_norm": 3.0625,
      "learning_rate": 7.563909852514726e-06,
      "loss": 0.7739,
      "step": 739440
    },
    {
      "epoch": 2.591587897394919,
      "grad_norm": 2.796875,
      "learning_rate": 7.563260823851025e-06,
      "loss": 0.7609,
      "step": 739450
    },
    {
      "epoch": 2.5916229449018147,
      "grad_norm": 2.578125,
      "learning_rate": 7.562611795187323e-06,
      "loss": 0.7731,
      "step": 739460
    },
    {
      "epoch": 2.59165799240871,
      "grad_norm": 3.359375,
      "learning_rate": 7.561962766523621e-06,
      "loss": 0.7814,
      "step": 739470
    },
    {
      "epoch": 2.5916930399156057,
      "grad_norm": 2.734375,
      "learning_rate": 7.56131373785992e-06,
      "loss": 0.7711,
      "step": 739480
    },
    {
      "epoch": 2.591728087422501,
      "grad_norm": 2.734375,
      "learning_rate": 7.560664709196218e-06,
      "loss": 0.8489,
      "step": 739490
    },
    {
      "epoch": 2.591763134929397,
      "grad_norm": 3.328125,
      "learning_rate": 7.560015680532516e-06,
      "loss": 0.8959,
      "step": 739500
    },
    {
      "epoch": 2.5917981824362926,
      "grad_norm": 2.75,
      "learning_rate": 7.559366651868814e-06,
      "loss": 0.7721,
      "step": 739510
    },
    {
      "epoch": 2.591833229943188,
      "grad_norm": 3.3125,
      "learning_rate": 7.558717623205111e-06,
      "loss": 0.8086,
      "step": 739520
    },
    {
      "epoch": 2.5918682774500836,
      "grad_norm": 3.234375,
      "learning_rate": 7.558068594541409e-06,
      "loss": 0.8991,
      "step": 739530
    },
    {
      "epoch": 2.591903324956979,
      "grad_norm": 3.09375,
      "learning_rate": 7.557419565877708e-06,
      "loss": 0.8301,
      "step": 739540
    },
    {
      "epoch": 2.5919383724638747,
      "grad_norm": 2.90625,
      "learning_rate": 7.556770537214006e-06,
      "loss": 0.8062,
      "step": 739550
    },
    {
      "epoch": 2.5919734199707705,
      "grad_norm": 2.640625,
      "learning_rate": 7.556121508550304e-06,
      "loss": 0.7728,
      "step": 739560
    },
    {
      "epoch": 2.5920084674776662,
      "grad_norm": 2.75,
      "learning_rate": 7.555472479886603e-06,
      "loss": 0.8259,
      "step": 739570
    },
    {
      "epoch": 2.5920435149845615,
      "grad_norm": 3.109375,
      "learning_rate": 7.554823451222901e-06,
      "loss": 0.7849,
      "step": 739580
    },
    {
      "epoch": 2.5920785624914573,
      "grad_norm": 2.53125,
      "learning_rate": 7.554174422559199e-06,
      "loss": 0.8335,
      "step": 739590
    },
    {
      "epoch": 2.5921136099983526,
      "grad_norm": 2.640625,
      "learning_rate": 7.553525393895497e-06,
      "loss": 0.7848,
      "step": 739600
    },
    {
      "epoch": 2.5921486575052484,
      "grad_norm": 2.96875,
      "learning_rate": 7.552876365231794e-06,
      "loss": 0.7497,
      "step": 739610
    },
    {
      "epoch": 2.592183705012144,
      "grad_norm": 3.3125,
      "learning_rate": 7.552227336568092e-06,
      "loss": 0.8883,
      "step": 739620
    },
    {
      "epoch": 2.5922187525190394,
      "grad_norm": 3.046875,
      "learning_rate": 7.551578307904391e-06,
      "loss": 0.7561,
      "step": 739630
    },
    {
      "epoch": 2.592253800025935,
      "grad_norm": 3.03125,
      "learning_rate": 7.550929279240689e-06,
      "loss": 0.7499,
      "step": 739640
    },
    {
      "epoch": 2.5922888475328305,
      "grad_norm": 3.421875,
      "learning_rate": 7.550280250576987e-06,
      "loss": 0.8354,
      "step": 739650
    },
    {
      "epoch": 2.5923238950397263,
      "grad_norm": 2.671875,
      "learning_rate": 7.549631221913285e-06,
      "loss": 0.8442,
      "step": 739660
    },
    {
      "epoch": 2.592358942546622,
      "grad_norm": 2.921875,
      "learning_rate": 7.5489821932495835e-06,
      "loss": 0.8446,
      "step": 739670
    },
    {
      "epoch": 2.592393990053518,
      "grad_norm": 2.859375,
      "learning_rate": 7.5483331645858815e-06,
      "loss": 0.8251,
      "step": 739680
    },
    {
      "epoch": 2.592429037560413,
      "grad_norm": 3.046875,
      "learning_rate": 7.5476841359221795e-06,
      "loss": 0.8411,
      "step": 739690
    },
    {
      "epoch": 2.592464085067309,
      "grad_norm": 2.734375,
      "learning_rate": 7.547035107258478e-06,
      "loss": 0.8269,
      "step": 739700
    },
    {
      "epoch": 2.592499132574204,
      "grad_norm": 3.140625,
      "learning_rate": 7.546386078594775e-06,
      "loss": 0.8177,
      "step": 739710
    },
    {
      "epoch": 2.5925341800811,
      "grad_norm": 2.71875,
      "learning_rate": 7.5457370499310735e-06,
      "loss": 0.8519,
      "step": 739720
    },
    {
      "epoch": 2.5925692275879957,
      "grad_norm": 2.703125,
      "learning_rate": 7.5450880212673715e-06,
      "loss": 0.7475,
      "step": 739730
    },
    {
      "epoch": 2.592604275094891,
      "grad_norm": 2.96875,
      "learning_rate": 7.5444389926036695e-06,
      "loss": 0.7951,
      "step": 739740
    },
    {
      "epoch": 2.5926393226017868,
      "grad_norm": 3.390625,
      "learning_rate": 7.5437899639399675e-06,
      "loss": 0.8808,
      "step": 739750
    },
    {
      "epoch": 2.592674370108682,
      "grad_norm": 3.015625,
      "learning_rate": 7.543140935276266e-06,
      "loss": 0.7946,
      "step": 739760
    },
    {
      "epoch": 2.592709417615578,
      "grad_norm": 3.09375,
      "learning_rate": 7.542491906612564e-06,
      "loss": 0.7259,
      "step": 739770
    },
    {
      "epoch": 2.5927444651224736,
      "grad_norm": 2.765625,
      "learning_rate": 7.541842877948862e-06,
      "loss": 0.8573,
      "step": 739780
    },
    {
      "epoch": 2.5927795126293693,
      "grad_norm": 2.84375,
      "learning_rate": 7.541193849285161e-06,
      "loss": 0.7952,
      "step": 739790
    },
    {
      "epoch": 2.5928145601362647,
      "grad_norm": 2.75,
      "learning_rate": 7.5405448206214575e-06,
      "loss": 0.7907,
      "step": 739800
    },
    {
      "epoch": 2.5928496076431604,
      "grad_norm": 3.109375,
      "learning_rate": 7.5398957919577555e-06,
      "loss": 0.8416,
      "step": 739810
    },
    {
      "epoch": 2.5928846551500557,
      "grad_norm": 2.71875,
      "learning_rate": 7.539246763294054e-06,
      "loss": 0.7865,
      "step": 739820
    },
    {
      "epoch": 2.5929197026569515,
      "grad_norm": 3.6875,
      "learning_rate": 7.538597734630352e-06,
      "loss": 0.8633,
      "step": 739830
    },
    {
      "epoch": 2.5929547501638472,
      "grad_norm": 2.546875,
      "learning_rate": 7.53794870596665e-06,
      "loss": 0.8607,
      "step": 739840
    },
    {
      "epoch": 2.5929897976707426,
      "grad_norm": 3.21875,
      "learning_rate": 7.537299677302949e-06,
      "loss": 0.8313,
      "step": 739850
    },
    {
      "epoch": 2.5930248451776383,
      "grad_norm": 2.953125,
      "learning_rate": 7.536650648639247e-06,
      "loss": 0.8633,
      "step": 739860
    },
    {
      "epoch": 2.5930598926845336,
      "grad_norm": 3.015625,
      "learning_rate": 7.536001619975545e-06,
      "loss": 0.8705,
      "step": 739870
    },
    {
      "epoch": 2.5930949401914294,
      "grad_norm": 3.203125,
      "learning_rate": 7.535352591311844e-06,
      "loss": 0.7836,
      "step": 739880
    },
    {
      "epoch": 2.593129987698325,
      "grad_norm": 2.75,
      "learning_rate": 7.53470356264814e-06,
      "loss": 0.7402,
      "step": 739890
    },
    {
      "epoch": 2.593165035205221,
      "grad_norm": 3.015625,
      "learning_rate": 7.534054533984438e-06,
      "loss": 0.8739,
      "step": 739900
    },
    {
      "epoch": 2.593200082712116,
      "grad_norm": 2.9375,
      "learning_rate": 7.533405505320737e-06,
      "loss": 0.8985,
      "step": 739910
    },
    {
      "epoch": 2.593235130219012,
      "grad_norm": 2.78125,
      "learning_rate": 7.532756476657035e-06,
      "loss": 0.8628,
      "step": 739920
    },
    {
      "epoch": 2.5932701777259073,
      "grad_norm": 3.21875,
      "learning_rate": 7.532107447993333e-06,
      "loss": 0.7731,
      "step": 739930
    },
    {
      "epoch": 2.593305225232803,
      "grad_norm": 2.9375,
      "learning_rate": 7.531458419329632e-06,
      "loss": 0.833,
      "step": 739940
    },
    {
      "epoch": 2.593340272739699,
      "grad_norm": 2.59375,
      "learning_rate": 7.53080939066593e-06,
      "loss": 0.8195,
      "step": 739950
    },
    {
      "epoch": 2.593375320246594,
      "grad_norm": 3.046875,
      "learning_rate": 7.530160362002228e-06,
      "loss": 0.82,
      "step": 739960
    },
    {
      "epoch": 2.59341036775349,
      "grad_norm": 3.078125,
      "learning_rate": 7.529511333338526e-06,
      "loss": 0.8574,
      "step": 739970
    },
    {
      "epoch": 2.593445415260385,
      "grad_norm": 2.734375,
      "learning_rate": 7.528862304674825e-06,
      "loss": 0.7951,
      "step": 739980
    },
    {
      "epoch": 2.593480462767281,
      "grad_norm": 2.96875,
      "learning_rate": 7.528213276011121e-06,
      "loss": 0.8508,
      "step": 739990
    },
    {
      "epoch": 2.5935155102741767,
      "grad_norm": 2.75,
      "learning_rate": 7.52756424734742e-06,
      "loss": 0.7385,
      "step": 740000
    },
    {
      "epoch": 2.5935155102741767,
      "eval_loss": 0.7547098994255066,
      "eval_runtime": 553.2638,
      "eval_samples_per_second": 687.621,
      "eval_steps_per_second": 57.302,
      "step": 740000
    },
    {
      "epoch": 2.5935505577810725,
      "grad_norm": 2.15625,
      "learning_rate": 7.526915218683718e-06,
      "loss": 0.7781,
      "step": 740010
    },
    {
      "epoch": 2.5935856052879678,
      "grad_norm": 2.46875,
      "learning_rate": 7.526266190020016e-06,
      "loss": 0.7686,
      "step": 740020
    },
    {
      "epoch": 2.5936206527948635,
      "grad_norm": 2.78125,
      "learning_rate": 7.525617161356315e-06,
      "loss": 0.7949,
      "step": 740030
    },
    {
      "epoch": 2.593655700301759,
      "grad_norm": 2.71875,
      "learning_rate": 7.524968132692613e-06,
      "loss": 0.7422,
      "step": 740040
    },
    {
      "epoch": 2.5936907478086546,
      "grad_norm": 2.703125,
      "learning_rate": 7.524319104028911e-06,
      "loss": 0.7972,
      "step": 740050
    },
    {
      "epoch": 2.5937257953155504,
      "grad_norm": 3.234375,
      "learning_rate": 7.523670075365209e-06,
      "loss": 0.803,
      "step": 740060
    },
    {
      "epoch": 2.5937608428224457,
      "grad_norm": 2.90625,
      "learning_rate": 7.523021046701508e-06,
      "loss": 0.7468,
      "step": 740070
    },
    {
      "epoch": 2.5937958903293414,
      "grad_norm": 3.0,
      "learning_rate": 7.522372018037804e-06,
      "loss": 0.7432,
      "step": 740080
    },
    {
      "epoch": 2.5938309378362367,
      "grad_norm": 2.71875,
      "learning_rate": 7.521722989374103e-06,
      "loss": 0.8723,
      "step": 740090
    },
    {
      "epoch": 2.5938659853431325,
      "grad_norm": 3.53125,
      "learning_rate": 7.521073960710401e-06,
      "loss": 0.8134,
      "step": 740100
    },
    {
      "epoch": 2.5939010328500283,
      "grad_norm": 3.015625,
      "learning_rate": 7.520424932046699e-06,
      "loss": 0.8944,
      "step": 740110
    },
    {
      "epoch": 2.593936080356924,
      "grad_norm": 2.859375,
      "learning_rate": 7.519775903382998e-06,
      "loss": 0.7763,
      "step": 740120
    },
    {
      "epoch": 2.5939711278638193,
      "grad_norm": 3.21875,
      "learning_rate": 7.519126874719296e-06,
      "loss": 0.7973,
      "step": 740130
    },
    {
      "epoch": 2.594006175370715,
      "grad_norm": 2.875,
      "learning_rate": 7.518477846055594e-06,
      "loss": 0.811,
      "step": 740140
    },
    {
      "epoch": 2.5940412228776104,
      "grad_norm": 3.0,
      "learning_rate": 7.517828817391892e-06,
      "loss": 0.7616,
      "step": 740150
    },
    {
      "epoch": 2.594076270384506,
      "grad_norm": 2.6875,
      "learning_rate": 7.5171797887281906e-06,
      "loss": 0.8583,
      "step": 740160
    },
    {
      "epoch": 2.594111317891402,
      "grad_norm": 3.46875,
      "learning_rate": 7.5165307600644886e-06,
      "loss": 0.7826,
      "step": 740170
    },
    {
      "epoch": 2.5941463653982972,
      "grad_norm": 2.71875,
      "learning_rate": 7.515881731400786e-06,
      "loss": 0.8343,
      "step": 740180
    },
    {
      "epoch": 2.594181412905193,
      "grad_norm": 3.21875,
      "learning_rate": 7.515232702737084e-06,
      "loss": 0.7441,
      "step": 740190
    },
    {
      "epoch": 2.5942164604120883,
      "grad_norm": 2.609375,
      "learning_rate": 7.514583674073382e-06,
      "loss": 0.7305,
      "step": 740200
    },
    {
      "epoch": 2.594251507918984,
      "grad_norm": 2.78125,
      "learning_rate": 7.51393464540968e-06,
      "loss": 0.9103,
      "step": 740210
    },
    {
      "epoch": 2.59428655542588,
      "grad_norm": 2.4375,
      "learning_rate": 7.5132856167459786e-06,
      "loss": 0.7978,
      "step": 740220
    },
    {
      "epoch": 2.5943216029327756,
      "grad_norm": 3.0,
      "learning_rate": 7.5126365880822766e-06,
      "loss": 0.7697,
      "step": 740230
    },
    {
      "epoch": 2.594356650439671,
      "grad_norm": 2.28125,
      "learning_rate": 7.5119875594185746e-06,
      "loss": 0.7109,
      "step": 740240
    },
    {
      "epoch": 2.5943916979465667,
      "grad_norm": 2.484375,
      "learning_rate": 7.511338530754873e-06,
      "loss": 0.8146,
      "step": 740250
    },
    {
      "epoch": 2.594426745453462,
      "grad_norm": 2.875,
      "learning_rate": 7.510689502091171e-06,
      "loss": 0.7609,
      "step": 740260
    },
    {
      "epoch": 2.5944617929603577,
      "grad_norm": 3.40625,
      "learning_rate": 7.5100404734274686e-06,
      "loss": 0.8727,
      "step": 740270
    },
    {
      "epoch": 2.5944968404672535,
      "grad_norm": 2.75,
      "learning_rate": 7.5093914447637666e-06,
      "loss": 0.8113,
      "step": 740280
    },
    {
      "epoch": 2.594531887974149,
      "grad_norm": 3.015625,
      "learning_rate": 7.5087424161000646e-06,
      "loss": 0.9013,
      "step": 740290
    },
    {
      "epoch": 2.5945669354810446,
      "grad_norm": 3.046875,
      "learning_rate": 7.5080933874363626e-06,
      "loss": 0.7656,
      "step": 740300
    },
    {
      "epoch": 2.5946019829879403,
      "grad_norm": 2.65625,
      "learning_rate": 7.507444358772661e-06,
      "loss": 0.8377,
      "step": 740310
    },
    {
      "epoch": 2.5946370304948356,
      "grad_norm": 3.015625,
      "learning_rate": 7.506795330108959e-06,
      "loss": 0.8513,
      "step": 740320
    },
    {
      "epoch": 2.5946720780017314,
      "grad_norm": 3.046875,
      "learning_rate": 7.506146301445257e-06,
      "loss": 0.9077,
      "step": 740330
    },
    {
      "epoch": 2.594707125508627,
      "grad_norm": 3.171875,
      "learning_rate": 7.505497272781556e-06,
      "loss": 0.7873,
      "step": 740340
    },
    {
      "epoch": 2.5947421730155225,
      "grad_norm": 2.875,
      "learning_rate": 7.504848244117854e-06,
      "loss": 0.8217,
      "step": 740350
    },
    {
      "epoch": 2.594777220522418,
      "grad_norm": 2.46875,
      "learning_rate": 7.504199215454151e-06,
      "loss": 0.8087,
      "step": 740360
    },
    {
      "epoch": 2.5948122680293135,
      "grad_norm": 2.671875,
      "learning_rate": 7.503550186790449e-06,
      "loss": 0.7813,
      "step": 740370
    },
    {
      "epoch": 2.5948473155362093,
      "grad_norm": 2.953125,
      "learning_rate": 7.502901158126747e-06,
      "loss": 0.747,
      "step": 740380
    },
    {
      "epoch": 2.594882363043105,
      "grad_norm": 2.859375,
      "learning_rate": 7.502252129463045e-06,
      "loss": 0.8192,
      "step": 740390
    },
    {
      "epoch": 2.594917410550001,
      "grad_norm": 2.875,
      "learning_rate": 7.501603100799344e-06,
      "loss": 0.8491,
      "step": 740400
    },
    {
      "epoch": 2.594952458056896,
      "grad_norm": 2.640625,
      "learning_rate": 7.500954072135642e-06,
      "loss": 0.7108,
      "step": 740410
    },
    {
      "epoch": 2.594987505563792,
      "grad_norm": 2.921875,
      "learning_rate": 7.50030504347194e-06,
      "loss": 0.8227,
      "step": 740420
    },
    {
      "epoch": 2.595022553070687,
      "grad_norm": 2.796875,
      "learning_rate": 7.499656014808239e-06,
      "loss": 0.7561,
      "step": 740430
    },
    {
      "epoch": 2.595057600577583,
      "grad_norm": 2.859375,
      "learning_rate": 7.499006986144537e-06,
      "loss": 0.8494,
      "step": 740440
    },
    {
      "epoch": 2.5950926480844787,
      "grad_norm": 2.921875,
      "learning_rate": 7.498357957480835e-06,
      "loss": 0.8297,
      "step": 740450
    },
    {
      "epoch": 2.595127695591374,
      "grad_norm": 2.734375,
      "learning_rate": 7.497708928817132e-06,
      "loss": 0.8025,
      "step": 740460
    },
    {
      "epoch": 2.5951627430982698,
      "grad_norm": 2.5625,
      "learning_rate": 7.49705990015343e-06,
      "loss": 0.8365,
      "step": 740470
    },
    {
      "epoch": 2.595197790605165,
      "grad_norm": 2.828125,
      "learning_rate": 7.496410871489728e-06,
      "loss": 0.783,
      "step": 740480
    },
    {
      "epoch": 2.595232838112061,
      "grad_norm": 3.140625,
      "learning_rate": 7.495761842826027e-06,
      "loss": 0.8432,
      "step": 740490
    },
    {
      "epoch": 2.5952678856189566,
      "grad_norm": 2.984375,
      "learning_rate": 7.495112814162325e-06,
      "loss": 0.8041,
      "step": 740500
    },
    {
      "epoch": 2.5953029331258524,
      "grad_norm": 2.796875,
      "learning_rate": 7.494463785498623e-06,
      "loss": 0.8187,
      "step": 740510
    },
    {
      "epoch": 2.5953379806327477,
      "grad_norm": 2.6875,
      "learning_rate": 7.493814756834921e-06,
      "loss": 0.8052,
      "step": 740520
    },
    {
      "epoch": 2.5953730281396434,
      "grad_norm": 2.9375,
      "learning_rate": 7.49316572817122e-06,
      "loss": 0.7593,
      "step": 740530
    },
    {
      "epoch": 2.5954080756465387,
      "grad_norm": 2.875,
      "learning_rate": 7.492516699507518e-06,
      "loss": 0.793,
      "step": 740540
    },
    {
      "epoch": 2.5954431231534345,
      "grad_norm": 2.53125,
      "learning_rate": 7.491867670843815e-06,
      "loss": 0.7708,
      "step": 740550
    },
    {
      "epoch": 2.5954781706603303,
      "grad_norm": 3.375,
      "learning_rate": 7.491218642180113e-06,
      "loss": 0.8585,
      "step": 740560
    },
    {
      "epoch": 2.5955132181672256,
      "grad_norm": 2.5,
      "learning_rate": 7.490569613516411e-06,
      "loss": 0.7104,
      "step": 740570
    },
    {
      "epoch": 2.5955482656741213,
      "grad_norm": 2.875,
      "learning_rate": 7.48992058485271e-06,
      "loss": 0.7899,
      "step": 740580
    },
    {
      "epoch": 2.5955833131810166,
      "grad_norm": 3.21875,
      "learning_rate": 7.489271556189008e-06,
      "loss": 0.8772,
      "step": 740590
    },
    {
      "epoch": 2.5956183606879124,
      "grad_norm": 3.015625,
      "learning_rate": 7.488622527525306e-06,
      "loss": 0.7722,
      "step": 740600
    },
    {
      "epoch": 2.595653408194808,
      "grad_norm": 3.140625,
      "learning_rate": 7.487973498861604e-06,
      "loss": 0.8697,
      "step": 740610
    },
    {
      "epoch": 2.595688455701704,
      "grad_norm": 2.78125,
      "learning_rate": 7.487324470197903e-06,
      "loss": 0.8201,
      "step": 740620
    },
    {
      "epoch": 2.5957235032085992,
      "grad_norm": 2.65625,
      "learning_rate": 7.486675441534201e-06,
      "loss": 0.8126,
      "step": 740630
    },
    {
      "epoch": 2.595758550715495,
      "grad_norm": 2.875,
      "learning_rate": 7.486026412870499e-06,
      "loss": 0.8921,
      "step": 740640
    },
    {
      "epoch": 2.5957935982223903,
      "grad_norm": 2.796875,
      "learning_rate": 7.485377384206796e-06,
      "loss": 0.7541,
      "step": 740650
    },
    {
      "epoch": 2.595828645729286,
      "grad_norm": 3.140625,
      "learning_rate": 7.484728355543094e-06,
      "loss": 0.7277,
      "step": 740660
    },
    {
      "epoch": 2.595863693236182,
      "grad_norm": 3.34375,
      "learning_rate": 7.484079326879393e-06,
      "loss": 0.8848,
      "step": 740670
    },
    {
      "epoch": 2.595898740743077,
      "grad_norm": 2.71875,
      "learning_rate": 7.483430298215691e-06,
      "loss": 0.7458,
      "step": 740680
    },
    {
      "epoch": 2.595933788249973,
      "grad_norm": 2.9375,
      "learning_rate": 7.482781269551989e-06,
      "loss": 0.8168,
      "step": 740690
    },
    {
      "epoch": 2.595968835756868,
      "grad_norm": 2.96875,
      "learning_rate": 7.482132240888287e-06,
      "loss": 0.8127,
      "step": 740700
    },
    {
      "epoch": 2.596003883263764,
      "grad_norm": 2.734375,
      "learning_rate": 7.481483212224586e-06,
      "loss": 0.7668,
      "step": 740710
    },
    {
      "epoch": 2.5960389307706597,
      "grad_norm": 3.03125,
      "learning_rate": 7.480834183560884e-06,
      "loss": 0.7987,
      "step": 740720
    },
    {
      "epoch": 2.5960739782775555,
      "grad_norm": 2.75,
      "learning_rate": 7.480185154897182e-06,
      "loss": 0.8362,
      "step": 740730
    },
    {
      "epoch": 2.596109025784451,
      "grad_norm": 2.890625,
      "learning_rate": 7.479536126233479e-06,
      "loss": 0.7248,
      "step": 740740
    },
    {
      "epoch": 2.5961440732913466,
      "grad_norm": 2.5625,
      "learning_rate": 7.478887097569777e-06,
      "loss": 0.8703,
      "step": 740750
    },
    {
      "epoch": 2.596179120798242,
      "grad_norm": 3.328125,
      "learning_rate": 7.478238068906075e-06,
      "loss": 0.8495,
      "step": 740760
    },
    {
      "epoch": 2.5962141683051376,
      "grad_norm": 2.859375,
      "learning_rate": 7.477589040242374e-06,
      "loss": 0.8354,
      "step": 740770
    },
    {
      "epoch": 2.5962492158120334,
      "grad_norm": 3.109375,
      "learning_rate": 7.476940011578672e-06,
      "loss": 0.9248,
      "step": 740780
    },
    {
      "epoch": 2.5962842633189287,
      "grad_norm": 3.03125,
      "learning_rate": 7.47629098291497e-06,
      "loss": 0.8503,
      "step": 740790
    },
    {
      "epoch": 2.5963193108258245,
      "grad_norm": 2.78125,
      "learning_rate": 7.4756419542512685e-06,
      "loss": 0.7866,
      "step": 740800
    },
    {
      "epoch": 2.5963543583327198,
      "grad_norm": 2.96875,
      "learning_rate": 7.4749929255875665e-06,
      "loss": 0.8303,
      "step": 740810
    },
    {
      "epoch": 2.5963894058396155,
      "grad_norm": 3.078125,
      "learning_rate": 7.4743438969238645e-06,
      "loss": 0.7385,
      "step": 740820
    },
    {
      "epoch": 2.5964244533465113,
      "grad_norm": 3.21875,
      "learning_rate": 7.473694868260162e-06,
      "loss": 0.814,
      "step": 740830
    },
    {
      "epoch": 2.596459500853407,
      "grad_norm": 3.109375,
      "learning_rate": 7.47304583959646e-06,
      "loss": 0.837,
      "step": 740840
    },
    {
      "epoch": 2.5964945483603024,
      "grad_norm": 3.28125,
      "learning_rate": 7.472396810932758e-06,
      "loss": 0.7963,
      "step": 740850
    },
    {
      "epoch": 2.596529595867198,
      "grad_norm": 3.71875,
      "learning_rate": 7.4717477822690565e-06,
      "loss": 0.8307,
      "step": 740860
    },
    {
      "epoch": 2.5965646433740934,
      "grad_norm": 3.296875,
      "learning_rate": 7.4710987536053545e-06,
      "loss": 0.784,
      "step": 740870
    },
    {
      "epoch": 2.596599690880989,
      "grad_norm": 3.546875,
      "learning_rate": 7.4704497249416525e-06,
      "loss": 0.8218,
      "step": 740880
    },
    {
      "epoch": 2.596634738387885,
      "grad_norm": 2.90625,
      "learning_rate": 7.469800696277951e-06,
      "loss": 0.8959,
      "step": 740890
    },
    {
      "epoch": 2.5966697858947803,
      "grad_norm": 2.578125,
      "learning_rate": 7.469151667614249e-06,
      "loss": 0.7453,
      "step": 740900
    },
    {
      "epoch": 2.596704833401676,
      "grad_norm": 2.765625,
      "learning_rate": 7.468502638950547e-06,
      "loss": 0.8171,
      "step": 740910
    },
    {
      "epoch": 2.5967398809085713,
      "grad_norm": 2.90625,
      "learning_rate": 7.467853610286845e-06,
      "loss": 0.8481,
      "step": 740920
    },
    {
      "epoch": 2.596774928415467,
      "grad_norm": 3.21875,
      "learning_rate": 7.4672045816231425e-06,
      "loss": 0.8057,
      "step": 740930
    },
    {
      "epoch": 2.596809975922363,
      "grad_norm": 3.15625,
      "learning_rate": 7.4665555529594405e-06,
      "loss": 0.7975,
      "step": 740940
    },
    {
      "epoch": 2.5968450234292586,
      "grad_norm": 3.015625,
      "learning_rate": 7.465906524295739e-06,
      "loss": 0.8186,
      "step": 740950
    },
    {
      "epoch": 2.596880070936154,
      "grad_norm": 3.1875,
      "learning_rate": 7.465257495632037e-06,
      "loss": 0.8847,
      "step": 740960
    },
    {
      "epoch": 2.5969151184430497,
      "grad_norm": 2.671875,
      "learning_rate": 7.464608466968335e-06,
      "loss": 0.8427,
      "step": 740970
    },
    {
      "epoch": 2.596950165949945,
      "grad_norm": 2.734375,
      "learning_rate": 7.463959438304634e-06,
      "loss": 0.7781,
      "step": 740980
    },
    {
      "epoch": 2.5969852134568407,
      "grad_norm": 2.953125,
      "learning_rate": 7.463310409640932e-06,
      "loss": 0.7175,
      "step": 740990
    },
    {
      "epoch": 2.5970202609637365,
      "grad_norm": 3.0625,
      "learning_rate": 7.46266138097723e-06,
      "loss": 0.8804,
      "step": 741000
    },
    {
      "epoch": 2.597055308470632,
      "grad_norm": 3.15625,
      "learning_rate": 7.462012352313528e-06,
      "loss": 0.8041,
      "step": 741010
    },
    {
      "epoch": 2.5970903559775276,
      "grad_norm": 2.765625,
      "learning_rate": 7.461363323649825e-06,
      "loss": 0.7333,
      "step": 741020
    },
    {
      "epoch": 2.597125403484423,
      "grad_norm": 2.828125,
      "learning_rate": 7.460714294986123e-06,
      "loss": 0.7842,
      "step": 741030
    },
    {
      "epoch": 2.5971604509913186,
      "grad_norm": 3.109375,
      "learning_rate": 7.460065266322422e-06,
      "loss": 0.8386,
      "step": 741040
    },
    {
      "epoch": 2.5971954984982144,
      "grad_norm": 3.265625,
      "learning_rate": 7.45941623765872e-06,
      "loss": 0.812,
      "step": 741050
    },
    {
      "epoch": 2.59723054600511,
      "grad_norm": 2.734375,
      "learning_rate": 7.458767208995018e-06,
      "loss": 0.8299,
      "step": 741060
    },
    {
      "epoch": 2.5972655935120055,
      "grad_norm": 2.9375,
      "learning_rate": 7.458118180331316e-06,
      "loss": 0.8247,
      "step": 741070
    },
    {
      "epoch": 2.5973006410189012,
      "grad_norm": 2.53125,
      "learning_rate": 7.457469151667615e-06,
      "loss": 0.8221,
      "step": 741080
    },
    {
      "epoch": 2.5973356885257965,
      "grad_norm": 3.234375,
      "learning_rate": 7.456820123003913e-06,
      "loss": 0.7971,
      "step": 741090
    },
    {
      "epoch": 2.5973707360326923,
      "grad_norm": 2.84375,
      "learning_rate": 7.456171094340211e-06,
      "loss": 0.7811,
      "step": 741100
    },
    {
      "epoch": 2.597405783539588,
      "grad_norm": 3.375,
      "learning_rate": 7.45552206567651e-06,
      "loss": 0.8294,
      "step": 741110
    },
    {
      "epoch": 2.5974408310464834,
      "grad_norm": 3.109375,
      "learning_rate": 7.454873037012806e-06,
      "loss": 0.8488,
      "step": 741120
    },
    {
      "epoch": 2.597475878553379,
      "grad_norm": 3.0625,
      "learning_rate": 7.454224008349105e-06,
      "loss": 0.835,
      "step": 741130
    },
    {
      "epoch": 2.5975109260602744,
      "grad_norm": 3.375,
      "learning_rate": 7.453574979685403e-06,
      "loss": 0.7817,
      "step": 741140
    },
    {
      "epoch": 2.59754597356717,
      "grad_norm": 2.828125,
      "learning_rate": 7.452925951021701e-06,
      "loss": 0.8003,
      "step": 741150
    },
    {
      "epoch": 2.597581021074066,
      "grad_norm": 2.46875,
      "learning_rate": 7.452276922357999e-06,
      "loss": 0.7992,
      "step": 741160
    },
    {
      "epoch": 2.5976160685809617,
      "grad_norm": 2.765625,
      "learning_rate": 7.451627893694298e-06,
      "loss": 0.7984,
      "step": 741170
    },
    {
      "epoch": 2.597651116087857,
      "grad_norm": 2.703125,
      "learning_rate": 7.450978865030596e-06,
      "loss": 0.7676,
      "step": 741180
    },
    {
      "epoch": 2.597686163594753,
      "grad_norm": 3.234375,
      "learning_rate": 7.450329836366894e-06,
      "loss": 0.7863,
      "step": 741190
    },
    {
      "epoch": 2.597721211101648,
      "grad_norm": 3.0625,
      "learning_rate": 7.449680807703193e-06,
      "loss": 0.8345,
      "step": 741200
    },
    {
      "epoch": 2.597756258608544,
      "grad_norm": 3.046875,
      "learning_rate": 7.449031779039489e-06,
      "loss": 0.8253,
      "step": 741210
    },
    {
      "epoch": 2.5977913061154396,
      "grad_norm": 3.265625,
      "learning_rate": 7.448382750375788e-06,
      "loss": 0.8274,
      "step": 741220
    },
    {
      "epoch": 2.597826353622335,
      "grad_norm": 3.09375,
      "learning_rate": 7.447733721712086e-06,
      "loss": 0.7724,
      "step": 741230
    },
    {
      "epoch": 2.5978614011292307,
      "grad_norm": 2.625,
      "learning_rate": 7.447084693048384e-06,
      "loss": 0.8043,
      "step": 741240
    },
    {
      "epoch": 2.597896448636126,
      "grad_norm": 2.40625,
      "learning_rate": 7.446435664384682e-06,
      "loss": 0.7512,
      "step": 741250
    },
    {
      "epoch": 2.5979314961430218,
      "grad_norm": 2.484375,
      "learning_rate": 7.445786635720981e-06,
      "loss": 0.8018,
      "step": 741260
    },
    {
      "epoch": 2.5979665436499175,
      "grad_norm": 2.984375,
      "learning_rate": 7.445137607057279e-06,
      "loss": 0.8201,
      "step": 741270
    },
    {
      "epoch": 2.5980015911568133,
      "grad_norm": 3.3125,
      "learning_rate": 7.444488578393577e-06,
      "loss": 0.7349,
      "step": 741280
    },
    {
      "epoch": 2.5980366386637086,
      "grad_norm": 2.78125,
      "learning_rate": 7.4438395497298756e-06,
      "loss": 0.8525,
      "step": 741290
    },
    {
      "epoch": 2.5980716861706044,
      "grad_norm": 3.3125,
      "learning_rate": 7.443190521066172e-06,
      "loss": 0.8004,
      "step": 741300
    },
    {
      "epoch": 2.5981067336774997,
      "grad_norm": 3.125,
      "learning_rate": 7.44254149240247e-06,
      "loss": 0.8543,
      "step": 741310
    },
    {
      "epoch": 2.5981417811843954,
      "grad_norm": 3.21875,
      "learning_rate": 7.441892463738769e-06,
      "loss": 0.8776,
      "step": 741320
    },
    {
      "epoch": 2.598176828691291,
      "grad_norm": 2.84375,
      "learning_rate": 7.441243435075067e-06,
      "loss": 0.8475,
      "step": 741330
    },
    {
      "epoch": 2.5982118761981865,
      "grad_norm": 2.734375,
      "learning_rate": 7.440594406411365e-06,
      "loss": 0.7804,
      "step": 741340
    },
    {
      "epoch": 2.5982469237050823,
      "grad_norm": 3.03125,
      "learning_rate": 7.4399453777476636e-06,
      "loss": 0.797,
      "step": 741350
    },
    {
      "epoch": 2.5982819712119776,
      "grad_norm": 3.5625,
      "learning_rate": 7.4392963490839616e-06,
      "loss": 0.8555,
      "step": 741360
    },
    {
      "epoch": 2.5983170187188733,
      "grad_norm": 2.78125,
      "learning_rate": 7.4386473204202596e-06,
      "loss": 0.7528,
      "step": 741370
    },
    {
      "epoch": 2.598352066225769,
      "grad_norm": 3.0,
      "learning_rate": 7.4379982917565576e-06,
      "loss": 0.7751,
      "step": 741380
    },
    {
      "epoch": 2.598387113732665,
      "grad_norm": 3.1875,
      "learning_rate": 7.437349263092856e-06,
      "loss": 0.7826,
      "step": 741390
    },
    {
      "epoch": 2.59842216123956,
      "grad_norm": 2.921875,
      "learning_rate": 7.436700234429153e-06,
      "loss": 0.7495,
      "step": 741400
    },
    {
      "epoch": 2.598457208746456,
      "grad_norm": 3.0625,
      "learning_rate": 7.4360512057654516e-06,
      "loss": 0.8162,
      "step": 741410
    },
    {
      "epoch": 2.5984922562533512,
      "grad_norm": 3.34375,
      "learning_rate": 7.4354021771017496e-06,
      "loss": 0.8789,
      "step": 741420
    },
    {
      "epoch": 2.598527303760247,
      "grad_norm": 3.328125,
      "learning_rate": 7.4347531484380476e-06,
      "loss": 0.7782,
      "step": 741430
    },
    {
      "epoch": 2.5985623512671427,
      "grad_norm": 3.0,
      "learning_rate": 7.434104119774346e-06,
      "loss": 0.8012,
      "step": 741440
    },
    {
      "epoch": 2.598597398774038,
      "grad_norm": 2.671875,
      "learning_rate": 7.433455091110644e-06,
      "loss": 0.8811,
      "step": 741450
    },
    {
      "epoch": 2.598632446280934,
      "grad_norm": 2.890625,
      "learning_rate": 7.432806062446942e-06,
      "loss": 0.8057,
      "step": 741460
    },
    {
      "epoch": 2.598667493787829,
      "grad_norm": 2.4375,
      "learning_rate": 7.43215703378324e-06,
      "loss": 0.7196,
      "step": 741470
    },
    {
      "epoch": 2.598702541294725,
      "grad_norm": 2.6875,
      "learning_rate": 7.431508005119539e-06,
      "loss": 0.8479,
      "step": 741480
    },
    {
      "epoch": 2.5987375888016206,
      "grad_norm": 2.890625,
      "learning_rate": 7.4308589764558356e-06,
      "loss": 0.7764,
      "step": 741490
    },
    {
      "epoch": 2.5987726363085164,
      "grad_norm": 3.03125,
      "learning_rate": 7.430209947792134e-06,
      "loss": 0.8255,
      "step": 741500
    },
    {
      "epoch": 2.5988076838154117,
      "grad_norm": 3.46875,
      "learning_rate": 7.429560919128432e-06,
      "loss": 0.8146,
      "step": 741510
    },
    {
      "epoch": 2.5988427313223075,
      "grad_norm": 2.984375,
      "learning_rate": 7.42891189046473e-06,
      "loss": 0.8417,
      "step": 741520
    },
    {
      "epoch": 2.598877778829203,
      "grad_norm": 2.828125,
      "learning_rate": 7.428262861801029e-06,
      "loss": 0.8328,
      "step": 741530
    },
    {
      "epoch": 2.5989128263360985,
      "grad_norm": 2.921875,
      "learning_rate": 7.427613833137327e-06,
      "loss": 0.7757,
      "step": 741540
    },
    {
      "epoch": 2.5989478738429943,
      "grad_norm": 2.953125,
      "learning_rate": 7.426964804473625e-06,
      "loss": 0.9045,
      "step": 741550
    },
    {
      "epoch": 2.5989829213498896,
      "grad_norm": 3.375,
      "learning_rate": 7.426315775809923e-06,
      "loss": 0.8317,
      "step": 741560
    },
    {
      "epoch": 2.5990179688567854,
      "grad_norm": 3.453125,
      "learning_rate": 7.425666747146222e-06,
      "loss": 0.8004,
      "step": 741570
    },
    {
      "epoch": 2.599053016363681,
      "grad_norm": 3.0,
      "learning_rate": 7.42501771848252e-06,
      "loss": 0.7984,
      "step": 741580
    },
    {
      "epoch": 2.5990880638705764,
      "grad_norm": 2.84375,
      "learning_rate": 7.424368689818817e-06,
      "loss": 0.7776,
      "step": 741590
    },
    {
      "epoch": 2.599123111377472,
      "grad_norm": 2.703125,
      "learning_rate": 7.423719661155115e-06,
      "loss": 0.7931,
      "step": 741600
    },
    {
      "epoch": 2.599158158884368,
      "grad_norm": 3.21875,
      "learning_rate": 7.423070632491413e-06,
      "loss": 0.8717,
      "step": 741610
    },
    {
      "epoch": 2.5991932063912633,
      "grad_norm": 3.34375,
      "learning_rate": 7.422421603827711e-06,
      "loss": 0.7123,
      "step": 741620
    },
    {
      "epoch": 2.599228253898159,
      "grad_norm": 2.796875,
      "learning_rate": 7.42177257516401e-06,
      "loss": 0.7335,
      "step": 741630
    },
    {
      "epoch": 2.5992633014050543,
      "grad_norm": 3.265625,
      "learning_rate": 7.421123546500308e-06,
      "loss": 0.8414,
      "step": 741640
    },
    {
      "epoch": 2.59929834891195,
      "grad_norm": 3.203125,
      "learning_rate": 7.420474517836606e-06,
      "loss": 0.888,
      "step": 741650
    },
    {
      "epoch": 2.599333396418846,
      "grad_norm": 2.984375,
      "learning_rate": 7.419825489172905e-06,
      "loss": 0.773,
      "step": 741660
    },
    {
      "epoch": 2.599368443925741,
      "grad_norm": 2.78125,
      "learning_rate": 7.419176460509203e-06,
      "loss": 0.7539,
      "step": 741670
    },
    {
      "epoch": 2.599403491432637,
      "grad_norm": 2.703125,
      "learning_rate": 7.4185274318455e-06,
      "loss": 0.7439,
      "step": 741680
    },
    {
      "epoch": 2.5994385389395327,
      "grad_norm": 3.390625,
      "learning_rate": 7.417878403181798e-06,
      "loss": 0.8766,
      "step": 741690
    },
    {
      "epoch": 2.599473586446428,
      "grad_norm": 3.03125,
      "learning_rate": 7.417229374518096e-06,
      "loss": 0.7886,
      "step": 741700
    },
    {
      "epoch": 2.5995086339533238,
      "grad_norm": 2.71875,
      "learning_rate": 7.416580345854394e-06,
      "loss": 0.7549,
      "step": 741710
    },
    {
      "epoch": 2.5995436814602195,
      "grad_norm": 3.140625,
      "learning_rate": 7.415931317190693e-06,
      "loss": 0.776,
      "step": 741720
    },
    {
      "epoch": 2.599578728967115,
      "grad_norm": 3.265625,
      "learning_rate": 7.415282288526991e-06,
      "loss": 0.8814,
      "step": 741730
    },
    {
      "epoch": 2.5996137764740106,
      "grad_norm": 3.15625,
      "learning_rate": 7.414633259863289e-06,
      "loss": 0.8192,
      "step": 741740
    },
    {
      "epoch": 2.599648823980906,
      "grad_norm": 2.8125,
      "learning_rate": 7.413984231199588e-06,
      "loss": 0.8206,
      "step": 741750
    },
    {
      "epoch": 2.5996838714878017,
      "grad_norm": 2.921875,
      "learning_rate": 7.413335202535886e-06,
      "loss": 0.8417,
      "step": 741760
    },
    {
      "epoch": 2.5997189189946974,
      "grad_norm": 3.265625,
      "learning_rate": 7.412686173872183e-06,
      "loss": 0.8352,
      "step": 741770
    },
    {
      "epoch": 2.599753966501593,
      "grad_norm": 3.046875,
      "learning_rate": 7.412037145208481e-06,
      "loss": 0.7283,
      "step": 741780
    },
    {
      "epoch": 2.5997890140084885,
      "grad_norm": 2.9375,
      "learning_rate": 7.411388116544779e-06,
      "loss": 0.7945,
      "step": 741790
    },
    {
      "epoch": 2.5998240615153843,
      "grad_norm": 3.21875,
      "learning_rate": 7.410739087881077e-06,
      "loss": 0.7755,
      "step": 741800
    },
    {
      "epoch": 2.5998591090222796,
      "grad_norm": 2.703125,
      "learning_rate": 7.410090059217376e-06,
      "loss": 0.7691,
      "step": 741810
    },
    {
      "epoch": 2.5998941565291753,
      "grad_norm": 2.71875,
      "learning_rate": 7.409441030553674e-06,
      "loss": 0.7732,
      "step": 741820
    },
    {
      "epoch": 2.599929204036071,
      "grad_norm": 2.71875,
      "learning_rate": 7.408792001889972e-06,
      "loss": 0.7519,
      "step": 741830
    },
    {
      "epoch": 2.5999642515429664,
      "grad_norm": 2.953125,
      "learning_rate": 7.408142973226271e-06,
      "loss": 0.8575,
      "step": 741840
    },
    {
      "epoch": 2.599999299049862,
      "grad_norm": 2.765625,
      "learning_rate": 7.407493944562569e-06,
      "loss": 0.7918,
      "step": 741850
    },
    {
      "epoch": 2.6000343465567575,
      "grad_norm": 2.875,
      "learning_rate": 7.406844915898867e-06,
      "loss": 0.7721,
      "step": 741860
    },
    {
      "epoch": 2.6000693940636532,
      "grad_norm": 3.109375,
      "learning_rate": 7.406195887235164e-06,
      "loss": 0.8051,
      "step": 741870
    },
    {
      "epoch": 2.600104441570549,
      "grad_norm": 2.890625,
      "learning_rate": 7.405546858571462e-06,
      "loss": 0.7704,
      "step": 741880
    },
    {
      "epoch": 2.6001394890774447,
      "grad_norm": 2.9375,
      "learning_rate": 7.40489782990776e-06,
      "loss": 0.8487,
      "step": 741890
    },
    {
      "epoch": 2.60017453658434,
      "grad_norm": 2.90625,
      "learning_rate": 7.404248801244059e-06,
      "loss": 0.7811,
      "step": 741900
    },
    {
      "epoch": 2.600209584091236,
      "grad_norm": 3.03125,
      "learning_rate": 7.403599772580357e-06,
      "loss": 0.8385,
      "step": 741910
    },
    {
      "epoch": 2.600244631598131,
      "grad_norm": 3.546875,
      "learning_rate": 7.402950743916655e-06,
      "loss": 0.8146,
      "step": 741920
    },
    {
      "epoch": 2.600279679105027,
      "grad_norm": 2.953125,
      "learning_rate": 7.4023017152529535e-06,
      "loss": 0.7859,
      "step": 741930
    },
    {
      "epoch": 2.6003147266119226,
      "grad_norm": 3.0625,
      "learning_rate": 7.4016526865892515e-06,
      "loss": 0.8264,
      "step": 741940
    },
    {
      "epoch": 2.600349774118818,
      "grad_norm": 2.8125,
      "learning_rate": 7.4010036579255495e-06,
      "loss": 0.7625,
      "step": 741950
    },
    {
      "epoch": 2.6003848216257137,
      "grad_norm": 3.1875,
      "learning_rate": 7.400354629261847e-06,
      "loss": 0.7804,
      "step": 741960
    },
    {
      "epoch": 2.600419869132609,
      "grad_norm": 3.046875,
      "learning_rate": 7.399705600598145e-06,
      "loss": 0.7991,
      "step": 741970
    },
    {
      "epoch": 2.600454916639505,
      "grad_norm": 3.171875,
      "learning_rate": 7.399056571934443e-06,
      "loss": 0.8034,
      "step": 741980
    },
    {
      "epoch": 2.6004899641464005,
      "grad_norm": 2.578125,
      "learning_rate": 7.3984075432707415e-06,
      "loss": 0.6874,
      "step": 741990
    },
    {
      "epoch": 2.6005250116532963,
      "grad_norm": 2.734375,
      "learning_rate": 7.3977585146070395e-06,
      "loss": 0.7922,
      "step": 742000
    },
    {
      "epoch": 2.6005600591601916,
      "grad_norm": 2.421875,
      "learning_rate": 7.3971094859433375e-06,
      "loss": 0.8073,
      "step": 742010
    },
    {
      "epoch": 2.6005951066670874,
      "grad_norm": 2.703125,
      "learning_rate": 7.3964604572796355e-06,
      "loss": 0.7813,
      "step": 742020
    },
    {
      "epoch": 2.6006301541739827,
      "grad_norm": 2.671875,
      "learning_rate": 7.395811428615934e-06,
      "loss": 0.8492,
      "step": 742030
    },
    {
      "epoch": 2.6006652016808784,
      "grad_norm": 2.390625,
      "learning_rate": 7.395162399952232e-06,
      "loss": 0.8393,
      "step": 742040
    },
    {
      "epoch": 2.600700249187774,
      "grad_norm": 2.859375,
      "learning_rate": 7.39451337128853e-06,
      "loss": 0.8571,
      "step": 742050
    },
    {
      "epoch": 2.6007352966946695,
      "grad_norm": 2.703125,
      "learning_rate": 7.3938643426248275e-06,
      "loss": 0.7541,
      "step": 742060
    },
    {
      "epoch": 2.6007703442015653,
      "grad_norm": 3.328125,
      "learning_rate": 7.3932153139611255e-06,
      "loss": 0.8045,
      "step": 742070
    },
    {
      "epoch": 2.6008053917084606,
      "grad_norm": 2.828125,
      "learning_rate": 7.392566285297424e-06,
      "loss": 0.8189,
      "step": 742080
    },
    {
      "epoch": 2.6008404392153563,
      "grad_norm": 2.4375,
      "learning_rate": 7.391917256633722e-06,
      "loss": 0.7787,
      "step": 742090
    },
    {
      "epoch": 2.600875486722252,
      "grad_norm": 2.734375,
      "learning_rate": 7.39126822797002e-06,
      "loss": 0.7822,
      "step": 742100
    },
    {
      "epoch": 2.600910534229148,
      "grad_norm": 2.84375,
      "learning_rate": 7.390619199306318e-06,
      "loss": 0.7934,
      "step": 742110
    },
    {
      "epoch": 2.600945581736043,
      "grad_norm": 2.609375,
      "learning_rate": 7.389970170642617e-06,
      "loss": 0.8522,
      "step": 742120
    },
    {
      "epoch": 2.600980629242939,
      "grad_norm": 2.640625,
      "learning_rate": 7.389321141978915e-06,
      "loss": 0.7335,
      "step": 742130
    },
    {
      "epoch": 2.6010156767498342,
      "grad_norm": 2.84375,
      "learning_rate": 7.388672113315213e-06,
      "loss": 0.813,
      "step": 742140
    },
    {
      "epoch": 2.60105072425673,
      "grad_norm": 2.953125,
      "learning_rate": 7.38802308465151e-06,
      "loss": 0.8223,
      "step": 742150
    },
    {
      "epoch": 2.6010857717636258,
      "grad_norm": 3.171875,
      "learning_rate": 7.387374055987808e-06,
      "loss": 0.8246,
      "step": 742160
    },
    {
      "epoch": 2.601120819270521,
      "grad_norm": 3.171875,
      "learning_rate": 7.386725027324106e-06,
      "loss": 0.8254,
      "step": 742170
    },
    {
      "epoch": 2.601155866777417,
      "grad_norm": 3.0625,
      "learning_rate": 7.386075998660405e-06,
      "loss": 0.7497,
      "step": 742180
    },
    {
      "epoch": 2.601190914284312,
      "grad_norm": 2.921875,
      "learning_rate": 7.385426969996703e-06,
      "loss": 0.8039,
      "step": 742190
    },
    {
      "epoch": 2.601225961791208,
      "grad_norm": 2.703125,
      "learning_rate": 7.384777941333001e-06,
      "loss": 0.8659,
      "step": 742200
    },
    {
      "epoch": 2.6012610092981037,
      "grad_norm": 2.796875,
      "learning_rate": 7.3841289126693e-06,
      "loss": 0.819,
      "step": 742210
    },
    {
      "epoch": 2.6012960568049994,
      "grad_norm": 3.078125,
      "learning_rate": 7.383479884005598e-06,
      "loss": 0.7949,
      "step": 742220
    },
    {
      "epoch": 2.6013311043118947,
      "grad_norm": 3.15625,
      "learning_rate": 7.382830855341896e-06,
      "loss": 0.8572,
      "step": 742230
    },
    {
      "epoch": 2.6013661518187905,
      "grad_norm": 2.84375,
      "learning_rate": 7.382181826678193e-06,
      "loss": 0.7879,
      "step": 742240
    },
    {
      "epoch": 2.601401199325686,
      "grad_norm": 3.171875,
      "learning_rate": 7.381532798014491e-06,
      "loss": 0.8385,
      "step": 742250
    },
    {
      "epoch": 2.6014362468325816,
      "grad_norm": 3.0,
      "learning_rate": 7.380883769350789e-06,
      "loss": 0.8506,
      "step": 742260
    },
    {
      "epoch": 2.6014712943394773,
      "grad_norm": 3.15625,
      "learning_rate": 7.380234740687088e-06,
      "loss": 0.8732,
      "step": 742270
    },
    {
      "epoch": 2.6015063418463726,
      "grad_norm": 2.765625,
      "learning_rate": 7.379585712023386e-06,
      "loss": 0.7978,
      "step": 742280
    },
    {
      "epoch": 2.6015413893532684,
      "grad_norm": 3.046875,
      "learning_rate": 7.378936683359684e-06,
      "loss": 0.8154,
      "step": 742290
    },
    {
      "epoch": 2.6015764368601637,
      "grad_norm": 2.71875,
      "learning_rate": 7.378287654695983e-06,
      "loss": 0.7415,
      "step": 742300
    },
    {
      "epoch": 2.6016114843670595,
      "grad_norm": 3.0625,
      "learning_rate": 7.377638626032281e-06,
      "loss": 0.8511,
      "step": 742310
    },
    {
      "epoch": 2.6016465318739552,
      "grad_norm": 3.609375,
      "learning_rate": 7.376989597368579e-06,
      "loss": 0.8039,
      "step": 742320
    },
    {
      "epoch": 2.601681579380851,
      "grad_norm": 2.71875,
      "learning_rate": 7.376340568704877e-06,
      "loss": 0.792,
      "step": 742330
    },
    {
      "epoch": 2.6017166268877463,
      "grad_norm": 3.0625,
      "learning_rate": 7.375691540041174e-06,
      "loss": 0.7791,
      "step": 742340
    },
    {
      "epoch": 2.601751674394642,
      "grad_norm": 3.140625,
      "learning_rate": 7.375042511377472e-06,
      "loss": 0.836,
      "step": 742350
    },
    {
      "epoch": 2.6017867219015374,
      "grad_norm": 3.375,
      "learning_rate": 7.374393482713771e-06,
      "loss": 0.8713,
      "step": 742360
    },
    {
      "epoch": 2.601821769408433,
      "grad_norm": 2.125,
      "learning_rate": 7.373744454050069e-06,
      "loss": 0.747,
      "step": 742370
    },
    {
      "epoch": 2.601856816915329,
      "grad_norm": 2.984375,
      "learning_rate": 7.373095425386367e-06,
      "loss": 0.7827,
      "step": 742380
    },
    {
      "epoch": 2.601891864422224,
      "grad_norm": 3.359375,
      "learning_rate": 7.372446396722666e-06,
      "loss": 0.8154,
      "step": 742390
    },
    {
      "epoch": 2.60192691192912,
      "grad_norm": 3.046875,
      "learning_rate": 7.371797368058964e-06,
      "loss": 0.7573,
      "step": 742400
    },
    {
      "epoch": 2.6019619594360153,
      "grad_norm": 2.859375,
      "learning_rate": 7.371148339395262e-06,
      "loss": 0.7969,
      "step": 742410
    },
    {
      "epoch": 2.601997006942911,
      "grad_norm": 2.875,
      "learning_rate": 7.37049931073156e-06,
      "loss": 0.7428,
      "step": 742420
    },
    {
      "epoch": 2.602032054449807,
      "grad_norm": 2.859375,
      "learning_rate": 7.369850282067857e-06,
      "loss": 0.7653,
      "step": 742430
    },
    {
      "epoch": 2.6020671019567025,
      "grad_norm": 3.0625,
      "learning_rate": 7.369201253404155e-06,
      "loss": 0.8053,
      "step": 742440
    },
    {
      "epoch": 2.602102149463598,
      "grad_norm": 2.859375,
      "learning_rate": 7.368552224740454e-06,
      "loss": 0.7844,
      "step": 742450
    },
    {
      "epoch": 2.6021371969704936,
      "grad_norm": 3.125,
      "learning_rate": 7.367903196076752e-06,
      "loss": 0.788,
      "step": 742460
    },
    {
      "epoch": 2.602172244477389,
      "grad_norm": 2.703125,
      "learning_rate": 7.36725416741305e-06,
      "loss": 0.787,
      "step": 742470
    },
    {
      "epoch": 2.6022072919842847,
      "grad_norm": 2.75,
      "learning_rate": 7.3666051387493486e-06,
      "loss": 0.8652,
      "step": 742480
    },
    {
      "epoch": 2.6022423394911804,
      "grad_norm": 2.6875,
      "learning_rate": 7.3659561100856466e-06,
      "loss": 0.7463,
      "step": 742490
    },
    {
      "epoch": 2.6022773869980758,
      "grad_norm": 2.796875,
      "learning_rate": 7.3653070814219446e-06,
      "loss": 0.7802,
      "step": 742500
    },
    {
      "epoch": 2.6023124345049715,
      "grad_norm": 3.03125,
      "learning_rate": 7.3646580527582426e-06,
      "loss": 0.7524,
      "step": 742510
    },
    {
      "epoch": 2.602347482011867,
      "grad_norm": 3.078125,
      "learning_rate": 7.364009024094541e-06,
      "loss": 0.7346,
      "step": 742520
    },
    {
      "epoch": 2.6023825295187626,
      "grad_norm": 3.21875,
      "learning_rate": 7.363359995430838e-06,
      "loss": 0.7956,
      "step": 742530
    },
    {
      "epoch": 2.6024175770256583,
      "grad_norm": 3.234375,
      "learning_rate": 7.3627109667671366e-06,
      "loss": 0.7814,
      "step": 742540
    },
    {
      "epoch": 2.602452624532554,
      "grad_norm": 2.421875,
      "learning_rate": 7.3620619381034346e-06,
      "loss": 0.8374,
      "step": 742550
    },
    {
      "epoch": 2.6024876720394494,
      "grad_norm": 3.109375,
      "learning_rate": 7.3614129094397326e-06,
      "loss": 0.8181,
      "step": 742560
    },
    {
      "epoch": 2.602522719546345,
      "grad_norm": 2.640625,
      "learning_rate": 7.3607638807760306e-06,
      "loss": 0.7914,
      "step": 742570
    },
    {
      "epoch": 2.6025577670532405,
      "grad_norm": 2.859375,
      "learning_rate": 7.360114852112329e-06,
      "loss": 0.8039,
      "step": 742580
    },
    {
      "epoch": 2.6025928145601362,
      "grad_norm": 3.015625,
      "learning_rate": 7.359465823448627e-06,
      "loss": 0.8098,
      "step": 742590
    },
    {
      "epoch": 2.602627862067032,
      "grad_norm": 2.46875,
      "learning_rate": 7.358816794784925e-06,
      "loss": 0.7689,
      "step": 742600
    },
    {
      "epoch": 2.6026629095739273,
      "grad_norm": 2.796875,
      "learning_rate": 7.358167766121224e-06,
      "loss": 0.7734,
      "step": 742610
    },
    {
      "epoch": 2.602697957080823,
      "grad_norm": 2.453125,
      "learning_rate": 7.3575187374575206e-06,
      "loss": 0.7866,
      "step": 742620
    },
    {
      "epoch": 2.6027330045877184,
      "grad_norm": 2.5625,
      "learning_rate": 7.356869708793819e-06,
      "loss": 0.7365,
      "step": 742630
    },
    {
      "epoch": 2.602768052094614,
      "grad_norm": 2.859375,
      "learning_rate": 7.356220680130117e-06,
      "loss": 0.7507,
      "step": 742640
    },
    {
      "epoch": 2.60280309960151,
      "grad_norm": 2.796875,
      "learning_rate": 7.355571651466415e-06,
      "loss": 0.7986,
      "step": 742650
    },
    {
      "epoch": 2.6028381471084057,
      "grad_norm": 3.28125,
      "learning_rate": 7.354922622802713e-06,
      "loss": 0.793,
      "step": 742660
    },
    {
      "epoch": 2.602873194615301,
      "grad_norm": 2.71875,
      "learning_rate": 7.354273594139012e-06,
      "loss": 0.7972,
      "step": 742670
    },
    {
      "epoch": 2.6029082421221967,
      "grad_norm": 2.71875,
      "learning_rate": 7.35362456547531e-06,
      "loss": 0.8548,
      "step": 742680
    },
    {
      "epoch": 2.602943289629092,
      "grad_norm": 3.3125,
      "learning_rate": 7.352975536811608e-06,
      "loss": 0.7645,
      "step": 742690
    },
    {
      "epoch": 2.602978337135988,
      "grad_norm": 3.0,
      "learning_rate": 7.352326508147907e-06,
      "loss": 0.7938,
      "step": 742700
    },
    {
      "epoch": 2.6030133846428836,
      "grad_norm": 2.90625,
      "learning_rate": 7.351677479484203e-06,
      "loss": 0.8998,
      "step": 742710
    },
    {
      "epoch": 2.603048432149779,
      "grad_norm": 2.53125,
      "learning_rate": 7.351028450820502e-06,
      "loss": 0.8325,
      "step": 742720
    },
    {
      "epoch": 2.6030834796566746,
      "grad_norm": 3.015625,
      "learning_rate": 7.3503794221568e-06,
      "loss": 0.8291,
      "step": 742730
    },
    {
      "epoch": 2.60311852716357,
      "grad_norm": 2.765625,
      "learning_rate": 7.349730393493098e-06,
      "loss": 0.7486,
      "step": 742740
    },
    {
      "epoch": 2.6031535746704657,
      "grad_norm": 3.3125,
      "learning_rate": 7.349081364829396e-06,
      "loss": 0.8524,
      "step": 742750
    },
    {
      "epoch": 2.6031886221773615,
      "grad_norm": 3.09375,
      "learning_rate": 7.348432336165695e-06,
      "loss": 0.8002,
      "step": 742760
    },
    {
      "epoch": 2.6032236696842572,
      "grad_norm": 2.65625,
      "learning_rate": 7.347783307501993e-06,
      "loss": 0.6856,
      "step": 742770
    },
    {
      "epoch": 2.6032587171911525,
      "grad_norm": 3.046875,
      "learning_rate": 7.347134278838291e-06,
      "loss": 0.7434,
      "step": 742780
    },
    {
      "epoch": 2.6032937646980483,
      "grad_norm": 2.625,
      "learning_rate": 7.34648525017459e-06,
      "loss": 0.7747,
      "step": 742790
    },
    {
      "epoch": 2.6033288122049436,
      "grad_norm": 2.921875,
      "learning_rate": 7.345836221510888e-06,
      "loss": 0.7872,
      "step": 742800
    },
    {
      "epoch": 2.6033638597118394,
      "grad_norm": 3.0,
      "learning_rate": 7.345187192847184e-06,
      "loss": 0.7547,
      "step": 742810
    },
    {
      "epoch": 2.603398907218735,
      "grad_norm": 3.03125,
      "learning_rate": 7.344538164183483e-06,
      "loss": 0.8133,
      "step": 742820
    },
    {
      "epoch": 2.6034339547256304,
      "grad_norm": 2.59375,
      "learning_rate": 7.343889135519781e-06,
      "loss": 0.8162,
      "step": 742830
    },
    {
      "epoch": 2.603469002232526,
      "grad_norm": 2.890625,
      "learning_rate": 7.343240106856079e-06,
      "loss": 0.7729,
      "step": 742840
    },
    {
      "epoch": 2.6035040497394215,
      "grad_norm": 3.109375,
      "learning_rate": 7.342591078192378e-06,
      "loss": 0.7344,
      "step": 742850
    },
    {
      "epoch": 2.6035390972463173,
      "grad_norm": 3.09375,
      "learning_rate": 7.341942049528676e-06,
      "loss": 0.8406,
      "step": 742860
    },
    {
      "epoch": 2.603574144753213,
      "grad_norm": 2.9375,
      "learning_rate": 7.341293020864974e-06,
      "loss": 0.8122,
      "step": 742870
    },
    {
      "epoch": 2.603609192260109,
      "grad_norm": 2.734375,
      "learning_rate": 7.340643992201272e-06,
      "loss": 0.7659,
      "step": 742880
    },
    {
      "epoch": 2.603644239767004,
      "grad_norm": 3.0625,
      "learning_rate": 7.339994963537571e-06,
      "loss": 0.7745,
      "step": 742890
    },
    {
      "epoch": 2.6036792872739,
      "grad_norm": 3.1875,
      "learning_rate": 7.339345934873867e-06,
      "loss": 0.7505,
      "step": 742900
    },
    {
      "epoch": 2.603714334780795,
      "grad_norm": 2.703125,
      "learning_rate": 7.338696906210166e-06,
      "loss": 0.6843,
      "step": 742910
    },
    {
      "epoch": 2.603749382287691,
      "grad_norm": 2.75,
      "learning_rate": 7.338047877546464e-06,
      "loss": 0.7254,
      "step": 742920
    },
    {
      "epoch": 2.6037844297945867,
      "grad_norm": 3.0625,
      "learning_rate": 7.337398848882762e-06,
      "loss": 0.8307,
      "step": 742930
    },
    {
      "epoch": 2.603819477301482,
      "grad_norm": 2.90625,
      "learning_rate": 7.336749820219061e-06,
      "loss": 0.781,
      "step": 742940
    },
    {
      "epoch": 2.6038545248083778,
      "grad_norm": 3.140625,
      "learning_rate": 7.336100791555359e-06,
      "loss": 0.7717,
      "step": 742950
    },
    {
      "epoch": 2.6038895723152735,
      "grad_norm": 2.828125,
      "learning_rate": 7.335451762891657e-06,
      "loss": 0.8393,
      "step": 742960
    },
    {
      "epoch": 2.603924619822169,
      "grad_norm": 2.796875,
      "learning_rate": 7.334802734227955e-06,
      "loss": 0.7279,
      "step": 742970
    },
    {
      "epoch": 2.6039596673290646,
      "grad_norm": 2.8125,
      "learning_rate": 7.334153705564254e-06,
      "loss": 0.7962,
      "step": 742980
    },
    {
      "epoch": 2.6039947148359603,
      "grad_norm": 3.140625,
      "learning_rate": 7.333504676900552e-06,
      "loss": 0.8069,
      "step": 742990
    },
    {
      "epoch": 2.6040297623428557,
      "grad_norm": 2.890625,
      "learning_rate": 7.332855648236849e-06,
      "loss": 0.8907,
      "step": 743000
    },
    {
      "epoch": 2.6040648098497514,
      "grad_norm": 2.578125,
      "learning_rate": 7.332206619573147e-06,
      "loss": 0.7397,
      "step": 743010
    },
    {
      "epoch": 2.6040998573566467,
      "grad_norm": 2.84375,
      "learning_rate": 7.331557590909445e-06,
      "loss": 0.7455,
      "step": 743020
    },
    {
      "epoch": 2.6041349048635425,
      "grad_norm": 2.5,
      "learning_rate": 7.330908562245744e-06,
      "loss": 0.7399,
      "step": 743030
    },
    {
      "epoch": 2.6041699523704382,
      "grad_norm": 3.140625,
      "learning_rate": 7.330259533582042e-06,
      "loss": 0.7403,
      "step": 743040
    },
    {
      "epoch": 2.6042049998773336,
      "grad_norm": 3.203125,
      "learning_rate": 7.32961050491834e-06,
      "loss": 0.8353,
      "step": 743050
    },
    {
      "epoch": 2.6042400473842293,
      "grad_norm": 2.609375,
      "learning_rate": 7.328961476254638e-06,
      "loss": 0.793,
      "step": 743060
    },
    {
      "epoch": 2.604275094891125,
      "grad_norm": 2.4375,
      "learning_rate": 7.3283124475909365e-06,
      "loss": 0.8125,
      "step": 743070
    },
    {
      "epoch": 2.6043101423980204,
      "grad_norm": 2.9375,
      "learning_rate": 7.3276634189272345e-06,
      "loss": 0.7844,
      "step": 743080
    },
    {
      "epoch": 2.604345189904916,
      "grad_norm": 2.828125,
      "learning_rate": 7.327014390263532e-06,
      "loss": 0.9551,
      "step": 743090
    },
    {
      "epoch": 2.604380237411812,
      "grad_norm": 3.171875,
      "learning_rate": 7.32636536159983e-06,
      "loss": 0.7981,
      "step": 743100
    },
    {
      "epoch": 2.604415284918707,
      "grad_norm": 2.671875,
      "learning_rate": 7.325716332936128e-06,
      "loss": 0.8654,
      "step": 743110
    },
    {
      "epoch": 2.604450332425603,
      "grad_norm": 3.140625,
      "learning_rate": 7.325067304272426e-06,
      "loss": 0.8054,
      "step": 743120
    },
    {
      "epoch": 2.6044853799324983,
      "grad_norm": 3.125,
      "learning_rate": 7.3244182756087245e-06,
      "loss": 0.8303,
      "step": 743130
    },
    {
      "epoch": 2.604520427439394,
      "grad_norm": 2.515625,
      "learning_rate": 7.3237692469450225e-06,
      "loss": 0.7879,
      "step": 743140
    },
    {
      "epoch": 2.60455547494629,
      "grad_norm": 2.984375,
      "learning_rate": 7.3231202182813205e-06,
      "loss": 0.8761,
      "step": 743150
    },
    {
      "epoch": 2.6045905224531856,
      "grad_norm": 2.890625,
      "learning_rate": 7.322471189617619e-06,
      "loss": 0.7795,
      "step": 743160
    },
    {
      "epoch": 2.604625569960081,
      "grad_norm": 3.1875,
      "learning_rate": 7.321822160953917e-06,
      "loss": 0.7765,
      "step": 743170
    },
    {
      "epoch": 2.6046606174669766,
      "grad_norm": 2.703125,
      "learning_rate": 7.321173132290215e-06,
      "loss": 0.7597,
      "step": 743180
    },
    {
      "epoch": 2.604695664973872,
      "grad_norm": 2.953125,
      "learning_rate": 7.3205241036265125e-06,
      "loss": 0.7818,
      "step": 743190
    },
    {
      "epoch": 2.6047307124807677,
      "grad_norm": 2.859375,
      "learning_rate": 7.3198750749628105e-06,
      "loss": 0.8161,
      "step": 743200
    },
    {
      "epoch": 2.6047657599876635,
      "grad_norm": 3.0,
      "learning_rate": 7.3192260462991085e-06,
      "loss": 0.8413,
      "step": 743210
    },
    {
      "epoch": 2.604800807494559,
      "grad_norm": 2.71875,
      "learning_rate": 7.318577017635407e-06,
      "loss": 0.6907,
      "step": 743220
    },
    {
      "epoch": 2.6048358550014545,
      "grad_norm": 3.4375,
      "learning_rate": 7.317927988971705e-06,
      "loss": 0.7843,
      "step": 743230
    },
    {
      "epoch": 2.60487090250835,
      "grad_norm": 2.609375,
      "learning_rate": 7.317278960308003e-06,
      "loss": 0.7619,
      "step": 743240
    },
    {
      "epoch": 2.6049059500152456,
      "grad_norm": 3.203125,
      "learning_rate": 7.316629931644302e-06,
      "loss": 0.7468,
      "step": 743250
    },
    {
      "epoch": 2.6049409975221414,
      "grad_norm": 3.125,
      "learning_rate": 7.3159809029806e-06,
      "loss": 0.7825,
      "step": 743260
    },
    {
      "epoch": 2.604976045029037,
      "grad_norm": 2.890625,
      "learning_rate": 7.315331874316898e-06,
      "loss": 0.8223,
      "step": 743270
    },
    {
      "epoch": 2.6050110925359324,
      "grad_norm": 3.21875,
      "learning_rate": 7.314682845653195e-06,
      "loss": 0.778,
      "step": 743280
    },
    {
      "epoch": 2.605046140042828,
      "grad_norm": 2.890625,
      "learning_rate": 7.314033816989493e-06,
      "loss": 0.808,
      "step": 743290
    },
    {
      "epoch": 2.6050811875497235,
      "grad_norm": 2.5625,
      "learning_rate": 7.313384788325791e-06,
      "loss": 0.7715,
      "step": 743300
    },
    {
      "epoch": 2.6051162350566193,
      "grad_norm": 2.734375,
      "learning_rate": 7.31273575966209e-06,
      "loss": 0.7629,
      "step": 743310
    },
    {
      "epoch": 2.605151282563515,
      "grad_norm": 3.15625,
      "learning_rate": 7.312086730998388e-06,
      "loss": 0.8449,
      "step": 743320
    },
    {
      "epoch": 2.6051863300704103,
      "grad_norm": 2.71875,
      "learning_rate": 7.311437702334686e-06,
      "loss": 0.8815,
      "step": 743330
    },
    {
      "epoch": 2.605221377577306,
      "grad_norm": 3.15625,
      "learning_rate": 7.310788673670985e-06,
      "loss": 0.8281,
      "step": 743340
    },
    {
      "epoch": 2.6052564250842014,
      "grad_norm": 2.796875,
      "learning_rate": 7.310139645007283e-06,
      "loss": 0.7857,
      "step": 743350
    },
    {
      "epoch": 2.605291472591097,
      "grad_norm": 3.25,
      "learning_rate": 7.309490616343581e-06,
      "loss": 0.7711,
      "step": 743360
    },
    {
      "epoch": 2.605326520097993,
      "grad_norm": 2.625,
      "learning_rate": 7.308841587679878e-06,
      "loss": 0.6951,
      "step": 743370
    },
    {
      "epoch": 2.6053615676048887,
      "grad_norm": 2.90625,
      "learning_rate": 7.308192559016176e-06,
      "loss": 0.7491,
      "step": 743380
    },
    {
      "epoch": 2.605396615111784,
      "grad_norm": 2.859375,
      "learning_rate": 7.307543530352474e-06,
      "loss": 0.8139,
      "step": 743390
    },
    {
      "epoch": 2.6054316626186798,
      "grad_norm": 3.0,
      "learning_rate": 7.306894501688773e-06,
      "loss": 0.841,
      "step": 743400
    },
    {
      "epoch": 2.605466710125575,
      "grad_norm": 3.0,
      "learning_rate": 7.306245473025071e-06,
      "loss": 0.7958,
      "step": 743410
    },
    {
      "epoch": 2.605501757632471,
      "grad_norm": 2.984375,
      "learning_rate": 7.305596444361369e-06,
      "loss": 0.8351,
      "step": 743420
    },
    {
      "epoch": 2.6055368051393666,
      "grad_norm": 2.96875,
      "learning_rate": 7.304947415697667e-06,
      "loss": 0.8422,
      "step": 743430
    },
    {
      "epoch": 2.605571852646262,
      "grad_norm": 3.0,
      "learning_rate": 7.304298387033966e-06,
      "loss": 0.7937,
      "step": 743440
    },
    {
      "epoch": 2.6056069001531577,
      "grad_norm": 3.265625,
      "learning_rate": 7.303649358370264e-06,
      "loss": 0.835,
      "step": 743450
    },
    {
      "epoch": 2.605641947660053,
      "grad_norm": 3.03125,
      "learning_rate": 7.303000329706562e-06,
      "loss": 0.8503,
      "step": 743460
    },
    {
      "epoch": 2.6056769951669487,
      "grad_norm": 2.984375,
      "learning_rate": 7.302351301042859e-06,
      "loss": 0.8108,
      "step": 743470
    },
    {
      "epoch": 2.6057120426738445,
      "grad_norm": 3.625,
      "learning_rate": 7.301702272379157e-06,
      "loss": 0.8925,
      "step": 743480
    },
    {
      "epoch": 2.6057470901807402,
      "grad_norm": 2.828125,
      "learning_rate": 7.301053243715456e-06,
      "loss": 0.7715,
      "step": 743490
    },
    {
      "epoch": 2.6057821376876356,
      "grad_norm": 2.8125,
      "learning_rate": 7.300404215051754e-06,
      "loss": 0.8398,
      "step": 743500
    },
    {
      "epoch": 2.6058171851945313,
      "grad_norm": 2.515625,
      "learning_rate": 7.299755186388052e-06,
      "loss": 0.751,
      "step": 743510
    },
    {
      "epoch": 2.6058522327014266,
      "grad_norm": 2.78125,
      "learning_rate": 7.29910615772435e-06,
      "loss": 0.7939,
      "step": 743520
    },
    {
      "epoch": 2.6058872802083224,
      "grad_norm": 2.96875,
      "learning_rate": 7.298457129060649e-06,
      "loss": 0.8709,
      "step": 743530
    },
    {
      "epoch": 2.605922327715218,
      "grad_norm": 2.859375,
      "learning_rate": 7.297808100396947e-06,
      "loss": 0.7508,
      "step": 743540
    },
    {
      "epoch": 2.6059573752221135,
      "grad_norm": 2.625,
      "learning_rate": 7.297159071733245e-06,
      "loss": 0.8377,
      "step": 743550
    },
    {
      "epoch": 2.605992422729009,
      "grad_norm": 3.1875,
      "learning_rate": 7.296510043069542e-06,
      "loss": 0.8216,
      "step": 743560
    },
    {
      "epoch": 2.6060274702359045,
      "grad_norm": 3.28125,
      "learning_rate": 7.29586101440584e-06,
      "loss": 0.8204,
      "step": 743570
    },
    {
      "epoch": 2.6060625177428003,
      "grad_norm": 3.296875,
      "learning_rate": 7.295211985742139e-06,
      "loss": 0.8797,
      "step": 743580
    },
    {
      "epoch": 2.606097565249696,
      "grad_norm": 3.140625,
      "learning_rate": 7.294562957078437e-06,
      "loss": 0.8571,
      "step": 743590
    },
    {
      "epoch": 2.606132612756592,
      "grad_norm": 2.9375,
      "learning_rate": 7.293913928414735e-06,
      "loss": 0.8497,
      "step": 743600
    },
    {
      "epoch": 2.606167660263487,
      "grad_norm": 2.984375,
      "learning_rate": 7.293264899751033e-06,
      "loss": 0.7277,
      "step": 743610
    },
    {
      "epoch": 2.606202707770383,
      "grad_norm": 3.203125,
      "learning_rate": 7.2926158710873316e-06,
      "loss": 0.8456,
      "step": 743620
    },
    {
      "epoch": 2.606237755277278,
      "grad_norm": 2.25,
      "learning_rate": 7.2919668424236296e-06,
      "loss": 0.6787,
      "step": 743630
    },
    {
      "epoch": 2.606272802784174,
      "grad_norm": 3.109375,
      "learning_rate": 7.2913178137599276e-06,
      "loss": 0.7935,
      "step": 743640
    },
    {
      "epoch": 2.6063078502910697,
      "grad_norm": 2.625,
      "learning_rate": 7.290668785096226e-06,
      "loss": 0.8104,
      "step": 743650
    },
    {
      "epoch": 2.606342897797965,
      "grad_norm": 2.96875,
      "learning_rate": 7.290019756432523e-06,
      "loss": 0.8294,
      "step": 743660
    },
    {
      "epoch": 2.6063779453048608,
      "grad_norm": 2.828125,
      "learning_rate": 7.289370727768821e-06,
      "loss": 0.8283,
      "step": 743670
    },
    {
      "epoch": 2.606412992811756,
      "grad_norm": 2.96875,
      "learning_rate": 7.2887216991051196e-06,
      "loss": 0.79,
      "step": 743680
    },
    {
      "epoch": 2.606448040318652,
      "grad_norm": 2.6875,
      "learning_rate": 7.2880726704414176e-06,
      "loss": 0.7643,
      "step": 743690
    },
    {
      "epoch": 2.6064830878255476,
      "grad_norm": 3.328125,
      "learning_rate": 7.2874236417777156e-06,
      "loss": 0.8461,
      "step": 743700
    },
    {
      "epoch": 2.6065181353324434,
      "grad_norm": 2.796875,
      "learning_rate": 7.286774613114014e-06,
      "loss": 0.7795,
      "step": 743710
    },
    {
      "epoch": 2.6065531828393387,
      "grad_norm": 2.953125,
      "learning_rate": 7.286125584450312e-06,
      "loss": 0.8471,
      "step": 743720
    },
    {
      "epoch": 2.6065882303462344,
      "grad_norm": 3.609375,
      "learning_rate": 7.28547655578661e-06,
      "loss": 0.8688,
      "step": 743730
    },
    {
      "epoch": 2.6066232778531297,
      "grad_norm": 2.734375,
      "learning_rate": 7.284827527122908e-06,
      "loss": 0.7794,
      "step": 743740
    },
    {
      "epoch": 2.6066583253600255,
      "grad_norm": 2.75,
      "learning_rate": 7.2841784984592056e-06,
      "loss": 0.7287,
      "step": 743750
    },
    {
      "epoch": 2.6066933728669213,
      "grad_norm": 3.015625,
      "learning_rate": 7.2835294697955036e-06,
      "loss": 0.7801,
      "step": 743760
    },
    {
      "epoch": 2.6067284203738166,
      "grad_norm": 3.625,
      "learning_rate": 7.282880441131802e-06,
      "loss": 0.8857,
      "step": 743770
    },
    {
      "epoch": 2.6067634678807123,
      "grad_norm": 2.46875,
      "learning_rate": 7.2822314124681e-06,
      "loss": 0.7939,
      "step": 743780
    },
    {
      "epoch": 2.6067985153876077,
      "grad_norm": 3.3125,
      "learning_rate": 7.281582383804398e-06,
      "loss": 0.8019,
      "step": 743790
    },
    {
      "epoch": 2.6068335628945034,
      "grad_norm": 3.15625,
      "learning_rate": 7.280933355140697e-06,
      "loss": 0.8573,
      "step": 743800
    },
    {
      "epoch": 2.606868610401399,
      "grad_norm": 2.609375,
      "learning_rate": 7.280284326476995e-06,
      "loss": 0.7882,
      "step": 743810
    },
    {
      "epoch": 2.606903657908295,
      "grad_norm": 2.59375,
      "learning_rate": 7.279635297813293e-06,
      "loss": 0.7104,
      "step": 743820
    },
    {
      "epoch": 2.6069387054151902,
      "grad_norm": 2.65625,
      "learning_rate": 7.278986269149591e-06,
      "loss": 0.8488,
      "step": 743830
    },
    {
      "epoch": 2.606973752922086,
      "grad_norm": 2.734375,
      "learning_rate": 7.278337240485888e-06,
      "loss": 0.8079,
      "step": 743840
    },
    {
      "epoch": 2.6070088004289813,
      "grad_norm": 2.78125,
      "learning_rate": 7.277688211822186e-06,
      "loss": 0.7697,
      "step": 743850
    },
    {
      "epoch": 2.607043847935877,
      "grad_norm": 2.65625,
      "learning_rate": 7.277039183158485e-06,
      "loss": 0.7865,
      "step": 743860
    },
    {
      "epoch": 2.607078895442773,
      "grad_norm": 3.015625,
      "learning_rate": 7.276390154494783e-06,
      "loss": 0.8509,
      "step": 743870
    },
    {
      "epoch": 2.607113942949668,
      "grad_norm": 2.734375,
      "learning_rate": 7.275741125831081e-06,
      "loss": 0.8239,
      "step": 743880
    },
    {
      "epoch": 2.607148990456564,
      "grad_norm": 2.453125,
      "learning_rate": 7.27509209716738e-06,
      "loss": 0.7888,
      "step": 743890
    },
    {
      "epoch": 2.607184037963459,
      "grad_norm": 2.921875,
      "learning_rate": 7.274443068503678e-06,
      "loss": 0.7818,
      "step": 743900
    },
    {
      "epoch": 2.607219085470355,
      "grad_norm": 2.875,
      "learning_rate": 7.273794039839976e-06,
      "loss": 0.8158,
      "step": 743910
    },
    {
      "epoch": 2.6072541329772507,
      "grad_norm": 2.921875,
      "learning_rate": 7.273145011176274e-06,
      "loss": 0.7918,
      "step": 743920
    },
    {
      "epoch": 2.6072891804841465,
      "grad_norm": 2.84375,
      "learning_rate": 7.272495982512573e-06,
      "loss": 0.8013,
      "step": 743930
    },
    {
      "epoch": 2.607324227991042,
      "grad_norm": 2.90625,
      "learning_rate": 7.271846953848869e-06,
      "loss": 0.823,
      "step": 743940
    },
    {
      "epoch": 2.6073592754979376,
      "grad_norm": 3.390625,
      "learning_rate": 7.271197925185168e-06,
      "loss": 0.873,
      "step": 743950
    },
    {
      "epoch": 2.607394323004833,
      "grad_norm": 2.953125,
      "learning_rate": 7.270548896521466e-06,
      "loss": 0.7833,
      "step": 743960
    },
    {
      "epoch": 2.6074293705117286,
      "grad_norm": 2.84375,
      "learning_rate": 7.269899867857764e-06,
      "loss": 0.7178,
      "step": 743970
    },
    {
      "epoch": 2.6074644180186244,
      "grad_norm": 3.25,
      "learning_rate": 7.269250839194062e-06,
      "loss": 0.7847,
      "step": 743980
    },
    {
      "epoch": 2.6074994655255197,
      "grad_norm": 2.96875,
      "learning_rate": 7.268601810530361e-06,
      "loss": 0.8126,
      "step": 743990
    },
    {
      "epoch": 2.6075345130324155,
      "grad_norm": 3.171875,
      "learning_rate": 7.267952781866659e-06,
      "loss": 0.759,
      "step": 744000
    },
    {
      "epoch": 2.6075695605393108,
      "grad_norm": 2.71875,
      "learning_rate": 7.267303753202957e-06,
      "loss": 0.7968,
      "step": 744010
    },
    {
      "epoch": 2.6076046080462065,
      "grad_norm": 2.9375,
      "learning_rate": 7.266654724539256e-06,
      "loss": 0.7974,
      "step": 744020
    },
    {
      "epoch": 2.6076396555531023,
      "grad_norm": 3.203125,
      "learning_rate": 7.266005695875552e-06,
      "loss": 0.8083,
      "step": 744030
    },
    {
      "epoch": 2.607674703059998,
      "grad_norm": 3.03125,
      "learning_rate": 7.265356667211851e-06,
      "loss": 0.8127,
      "step": 744040
    },
    {
      "epoch": 2.6077097505668934,
      "grad_norm": 2.859375,
      "learning_rate": 7.264707638548149e-06,
      "loss": 0.7797,
      "step": 744050
    },
    {
      "epoch": 2.607744798073789,
      "grad_norm": 2.9375,
      "learning_rate": 7.264058609884447e-06,
      "loss": 0.7987,
      "step": 744060
    },
    {
      "epoch": 2.6077798455806844,
      "grad_norm": 2.921875,
      "learning_rate": 7.263409581220745e-06,
      "loss": 0.8575,
      "step": 744070
    },
    {
      "epoch": 2.60781489308758,
      "grad_norm": 2.328125,
      "learning_rate": 7.262760552557044e-06,
      "loss": 0.7867,
      "step": 744080
    },
    {
      "epoch": 2.607849940594476,
      "grad_norm": 2.890625,
      "learning_rate": 7.262111523893342e-06,
      "loss": 0.7787,
      "step": 744090
    },
    {
      "epoch": 2.6078849881013713,
      "grad_norm": 3.40625,
      "learning_rate": 7.26146249522964e-06,
      "loss": 0.8141,
      "step": 744100
    },
    {
      "epoch": 2.607920035608267,
      "grad_norm": 2.625,
      "learning_rate": 7.260813466565939e-06,
      "loss": 0.7077,
      "step": 744110
    },
    {
      "epoch": 2.6079550831151623,
      "grad_norm": 3.015625,
      "learning_rate": 7.260164437902237e-06,
      "loss": 0.7981,
      "step": 744120
    },
    {
      "epoch": 2.607990130622058,
      "grad_norm": 2.859375,
      "learning_rate": 7.259515409238534e-06,
      "loss": 0.8661,
      "step": 744130
    },
    {
      "epoch": 2.608025178128954,
      "grad_norm": 3.25,
      "learning_rate": 7.258866380574832e-06,
      "loss": 0.7938,
      "step": 744140
    },
    {
      "epoch": 2.6080602256358496,
      "grad_norm": 3.453125,
      "learning_rate": 7.25821735191113e-06,
      "loss": 0.8642,
      "step": 744150
    },
    {
      "epoch": 2.608095273142745,
      "grad_norm": 3.203125,
      "learning_rate": 7.257568323247428e-06,
      "loss": 0.822,
      "step": 744160
    },
    {
      "epoch": 2.6081303206496407,
      "grad_norm": 3.125,
      "learning_rate": 7.256919294583727e-06,
      "loss": 0.8521,
      "step": 744170
    },
    {
      "epoch": 2.608165368156536,
      "grad_norm": 3.0625,
      "learning_rate": 7.256270265920025e-06,
      "loss": 0.8059,
      "step": 744180
    },
    {
      "epoch": 2.6082004156634317,
      "grad_norm": 3.078125,
      "learning_rate": 7.255621237256323e-06,
      "loss": 0.8078,
      "step": 744190
    },
    {
      "epoch": 2.6082354631703275,
      "grad_norm": 2.796875,
      "learning_rate": 7.2549722085926215e-06,
      "loss": 0.8363,
      "step": 744200
    },
    {
      "epoch": 2.608270510677223,
      "grad_norm": 2.8125,
      "learning_rate": 7.2543231799289195e-06,
      "loss": 0.742,
      "step": 744210
    },
    {
      "epoch": 2.6083055581841186,
      "grad_norm": 2.828125,
      "learning_rate": 7.253674151265216e-06,
      "loss": 0.8503,
      "step": 744220
    },
    {
      "epoch": 2.6083406056910143,
      "grad_norm": 2.78125,
      "learning_rate": 7.253025122601515e-06,
      "loss": 0.7623,
      "step": 744230
    },
    {
      "epoch": 2.6083756531979096,
      "grad_norm": 2.75,
      "learning_rate": 7.252376093937813e-06,
      "loss": 0.8156,
      "step": 744240
    },
    {
      "epoch": 2.6084107007048054,
      "grad_norm": 2.921875,
      "learning_rate": 7.251727065274111e-06,
      "loss": 0.7794,
      "step": 744250
    },
    {
      "epoch": 2.608445748211701,
      "grad_norm": 2.859375,
      "learning_rate": 7.2510780366104095e-06,
      "loss": 0.8224,
      "step": 744260
    },
    {
      "epoch": 2.6084807957185965,
      "grad_norm": 2.859375,
      "learning_rate": 7.2504290079467075e-06,
      "loss": 0.8152,
      "step": 744270
    },
    {
      "epoch": 2.6085158432254922,
      "grad_norm": 2.90625,
      "learning_rate": 7.2497799792830055e-06,
      "loss": 0.793,
      "step": 744280
    },
    {
      "epoch": 2.6085508907323876,
      "grad_norm": 3.359375,
      "learning_rate": 7.2491309506193035e-06,
      "loss": 0.7846,
      "step": 744290
    },
    {
      "epoch": 2.6085859382392833,
      "grad_norm": 2.84375,
      "learning_rate": 7.248481921955602e-06,
      "loss": 0.7911,
      "step": 744300
    },
    {
      "epoch": 2.608620985746179,
      "grad_norm": 2.734375,
      "learning_rate": 7.247832893291899e-06,
      "loss": 0.7606,
      "step": 744310
    },
    {
      "epoch": 2.6086560332530744,
      "grad_norm": 2.78125,
      "learning_rate": 7.2471838646281975e-06,
      "loss": 0.7479,
      "step": 744320
    },
    {
      "epoch": 2.60869108075997,
      "grad_norm": 3.375,
      "learning_rate": 7.2465348359644955e-06,
      "loss": 0.837,
      "step": 744330
    },
    {
      "epoch": 2.608726128266866,
      "grad_norm": 2.53125,
      "learning_rate": 7.2458858073007935e-06,
      "loss": 0.7993,
      "step": 744340
    },
    {
      "epoch": 2.608761175773761,
      "grad_norm": 3.4375,
      "learning_rate": 7.245236778637092e-06,
      "loss": 0.8783,
      "step": 744350
    },
    {
      "epoch": 2.608796223280657,
      "grad_norm": 3.078125,
      "learning_rate": 7.24458774997339e-06,
      "loss": 0.7454,
      "step": 744360
    },
    {
      "epoch": 2.6088312707875527,
      "grad_norm": 3.390625,
      "learning_rate": 7.243938721309688e-06,
      "loss": 0.7947,
      "step": 744370
    },
    {
      "epoch": 2.608866318294448,
      "grad_norm": 3.03125,
      "learning_rate": 7.243289692645986e-06,
      "loss": 0.8287,
      "step": 744380
    },
    {
      "epoch": 2.608901365801344,
      "grad_norm": 2.65625,
      "learning_rate": 7.242640663982285e-06,
      "loss": 0.7688,
      "step": 744390
    },
    {
      "epoch": 2.608936413308239,
      "grad_norm": 2.65625,
      "learning_rate": 7.241991635318583e-06,
      "loss": 0.8121,
      "step": 744400
    },
    {
      "epoch": 2.608971460815135,
      "grad_norm": 3.125,
      "learning_rate": 7.24134260665488e-06,
      "loss": 0.7961,
      "step": 744410
    },
    {
      "epoch": 2.6090065083220306,
      "grad_norm": 2.90625,
      "learning_rate": 7.240693577991178e-06,
      "loss": 0.7792,
      "step": 744420
    },
    {
      "epoch": 2.6090415558289264,
      "grad_norm": 3.328125,
      "learning_rate": 7.240044549327476e-06,
      "loss": 0.7236,
      "step": 744430
    },
    {
      "epoch": 2.6090766033358217,
      "grad_norm": 2.953125,
      "learning_rate": 7.239395520663775e-06,
      "loss": 0.8533,
      "step": 744440
    },
    {
      "epoch": 2.6091116508427175,
      "grad_norm": 3.046875,
      "learning_rate": 7.238746492000073e-06,
      "loss": 0.7835,
      "step": 744450
    },
    {
      "epoch": 2.6091466983496128,
      "grad_norm": 3.09375,
      "learning_rate": 7.238097463336371e-06,
      "loss": 0.8286,
      "step": 744460
    },
    {
      "epoch": 2.6091817458565085,
      "grad_norm": 2.765625,
      "learning_rate": 7.237448434672669e-06,
      "loss": 0.8307,
      "step": 744470
    },
    {
      "epoch": 2.6092167933634043,
      "grad_norm": 2.875,
      "learning_rate": 7.236799406008968e-06,
      "loss": 0.8384,
      "step": 744480
    },
    {
      "epoch": 2.6092518408702996,
      "grad_norm": 2.765625,
      "learning_rate": 7.236150377345266e-06,
      "loss": 0.7165,
      "step": 744490
    },
    {
      "epoch": 2.6092868883771954,
      "grad_norm": 2.96875,
      "learning_rate": 7.235501348681563e-06,
      "loss": 0.7624,
      "step": 744500
    },
    {
      "epoch": 2.6093219358840907,
      "grad_norm": 2.5625,
      "learning_rate": 7.234852320017861e-06,
      "loss": 0.8768,
      "step": 744510
    },
    {
      "epoch": 2.6093569833909864,
      "grad_norm": 2.90625,
      "learning_rate": 7.234203291354159e-06,
      "loss": 0.7917,
      "step": 744520
    },
    {
      "epoch": 2.609392030897882,
      "grad_norm": 2.90625,
      "learning_rate": 7.233554262690457e-06,
      "loss": 0.8028,
      "step": 744530
    },
    {
      "epoch": 2.609427078404778,
      "grad_norm": 2.640625,
      "learning_rate": 7.232905234026756e-06,
      "loss": 0.7965,
      "step": 744540
    },
    {
      "epoch": 2.6094621259116733,
      "grad_norm": 3.203125,
      "learning_rate": 7.232256205363054e-06,
      "loss": 0.7876,
      "step": 744550
    },
    {
      "epoch": 2.609497173418569,
      "grad_norm": 3.21875,
      "learning_rate": 7.231607176699352e-06,
      "loss": 0.7945,
      "step": 744560
    },
    {
      "epoch": 2.6095322209254643,
      "grad_norm": 2.828125,
      "learning_rate": 7.230958148035651e-06,
      "loss": 0.8378,
      "step": 744570
    },
    {
      "epoch": 2.60956726843236,
      "grad_norm": 2.828125,
      "learning_rate": 7.230309119371949e-06,
      "loss": 0.7872,
      "step": 744580
    },
    {
      "epoch": 2.609602315939256,
      "grad_norm": 3.125,
      "learning_rate": 7.229660090708247e-06,
      "loss": 0.7553,
      "step": 744590
    },
    {
      "epoch": 2.609637363446151,
      "grad_norm": 3.046875,
      "learning_rate": 7.229011062044544e-06,
      "loss": 0.8388,
      "step": 744600
    },
    {
      "epoch": 2.609672410953047,
      "grad_norm": 3.3125,
      "learning_rate": 7.228362033380842e-06,
      "loss": 0.7995,
      "step": 744610
    },
    {
      "epoch": 2.6097074584599422,
      "grad_norm": 2.96875,
      "learning_rate": 7.22771300471714e-06,
      "loss": 0.7741,
      "step": 744620
    },
    {
      "epoch": 2.609742505966838,
      "grad_norm": 2.1875,
      "learning_rate": 7.227063976053439e-06,
      "loss": 0.8542,
      "step": 744630
    },
    {
      "epoch": 2.6097775534737337,
      "grad_norm": 2.953125,
      "learning_rate": 7.226414947389737e-06,
      "loss": 0.7597,
      "step": 744640
    },
    {
      "epoch": 2.6098126009806295,
      "grad_norm": 2.78125,
      "learning_rate": 7.225765918726035e-06,
      "loss": 0.823,
      "step": 744650
    },
    {
      "epoch": 2.609847648487525,
      "grad_norm": 2.65625,
      "learning_rate": 7.225116890062334e-06,
      "loss": 0.7762,
      "step": 744660
    },
    {
      "epoch": 2.6098826959944206,
      "grad_norm": 2.734375,
      "learning_rate": 7.224467861398632e-06,
      "loss": 0.7965,
      "step": 744670
    },
    {
      "epoch": 2.609917743501316,
      "grad_norm": 3.203125,
      "learning_rate": 7.22381883273493e-06,
      "loss": 0.734,
      "step": 744680
    },
    {
      "epoch": 2.6099527910082116,
      "grad_norm": 3.203125,
      "learning_rate": 7.223169804071227e-06,
      "loss": 0.8926,
      "step": 744690
    },
    {
      "epoch": 2.6099878385151074,
      "grad_norm": 2.6875,
      "learning_rate": 7.222520775407525e-06,
      "loss": 0.7763,
      "step": 744700
    },
    {
      "epoch": 2.6100228860220027,
      "grad_norm": 2.96875,
      "learning_rate": 7.221871746743823e-06,
      "loss": 0.8241,
      "step": 744710
    },
    {
      "epoch": 2.6100579335288985,
      "grad_norm": 2.78125,
      "learning_rate": 7.221222718080122e-06,
      "loss": 0.7607,
      "step": 744720
    },
    {
      "epoch": 2.610092981035794,
      "grad_norm": 2.515625,
      "learning_rate": 7.22057368941642e-06,
      "loss": 0.73,
      "step": 744730
    },
    {
      "epoch": 2.6101280285426895,
      "grad_norm": 3.671875,
      "learning_rate": 7.219924660752718e-06,
      "loss": 0.8357,
      "step": 744740
    },
    {
      "epoch": 2.6101630760495853,
      "grad_norm": 2.609375,
      "learning_rate": 7.2192756320890166e-06,
      "loss": 0.76,
      "step": 744750
    },
    {
      "epoch": 2.610198123556481,
      "grad_norm": 2.984375,
      "learning_rate": 7.2186266034253146e-06,
      "loss": 0.8042,
      "step": 744760
    },
    {
      "epoch": 2.6102331710633764,
      "grad_norm": 3.390625,
      "learning_rate": 7.2179775747616126e-06,
      "loss": 0.8464,
      "step": 744770
    },
    {
      "epoch": 2.610268218570272,
      "grad_norm": 3.515625,
      "learning_rate": 7.21732854609791e-06,
      "loss": 0.7761,
      "step": 744780
    },
    {
      "epoch": 2.6103032660771675,
      "grad_norm": 2.703125,
      "learning_rate": 7.216679517434208e-06,
      "loss": 0.8123,
      "step": 744790
    },
    {
      "epoch": 2.610338313584063,
      "grad_norm": 3.015625,
      "learning_rate": 7.216030488770506e-06,
      "loss": 0.7206,
      "step": 744800
    },
    {
      "epoch": 2.610373361090959,
      "grad_norm": 2.8125,
      "learning_rate": 7.2153814601068046e-06,
      "loss": 0.838,
      "step": 744810
    },
    {
      "epoch": 2.6104084085978543,
      "grad_norm": 2.40625,
      "learning_rate": 7.2147324314431026e-06,
      "loss": 0.7288,
      "step": 744820
    },
    {
      "epoch": 2.61044345610475,
      "grad_norm": 2.375,
      "learning_rate": 7.2140834027794006e-06,
      "loss": 0.743,
      "step": 744830
    },
    {
      "epoch": 2.6104785036116454,
      "grad_norm": 2.921875,
      "learning_rate": 7.213434374115699e-06,
      "loss": 0.7687,
      "step": 744840
    },
    {
      "epoch": 2.610513551118541,
      "grad_norm": 2.578125,
      "learning_rate": 7.212785345451997e-06,
      "loss": 0.8045,
      "step": 744850
    },
    {
      "epoch": 2.610548598625437,
      "grad_norm": 2.8125,
      "learning_rate": 7.212136316788295e-06,
      "loss": 0.7923,
      "step": 744860
    },
    {
      "epoch": 2.6105836461323326,
      "grad_norm": 2.71875,
      "learning_rate": 7.211487288124593e-06,
      "loss": 0.7901,
      "step": 744870
    },
    {
      "epoch": 2.610618693639228,
      "grad_norm": 2.546875,
      "learning_rate": 7.2108382594608906e-06,
      "loss": 0.8291,
      "step": 744880
    },
    {
      "epoch": 2.6106537411461237,
      "grad_norm": 2.859375,
      "learning_rate": 7.2101892307971886e-06,
      "loss": 0.8071,
      "step": 744890
    },
    {
      "epoch": 2.610688788653019,
      "grad_norm": 2.5625,
      "learning_rate": 7.209540202133487e-06,
      "loss": 0.8227,
      "step": 744900
    },
    {
      "epoch": 2.6107238361599148,
      "grad_norm": 3.234375,
      "learning_rate": 7.208891173469785e-06,
      "loss": 0.8058,
      "step": 744910
    },
    {
      "epoch": 2.6107588836668105,
      "grad_norm": 2.859375,
      "learning_rate": 7.208242144806083e-06,
      "loss": 0.8551,
      "step": 744920
    },
    {
      "epoch": 2.610793931173706,
      "grad_norm": 2.84375,
      "learning_rate": 7.207593116142381e-06,
      "loss": 0.8084,
      "step": 744930
    },
    {
      "epoch": 2.6108289786806016,
      "grad_norm": 2.703125,
      "learning_rate": 7.20694408747868e-06,
      "loss": 0.8717,
      "step": 744940
    },
    {
      "epoch": 2.610864026187497,
      "grad_norm": 2.984375,
      "learning_rate": 7.206295058814978e-06,
      "loss": 0.7919,
      "step": 744950
    },
    {
      "epoch": 2.6108990736943927,
      "grad_norm": 2.75,
      "learning_rate": 7.205646030151276e-06,
      "loss": 0.7721,
      "step": 744960
    },
    {
      "epoch": 2.6109341212012884,
      "grad_norm": 3.171875,
      "learning_rate": 7.204997001487573e-06,
      "loss": 0.7412,
      "step": 744970
    },
    {
      "epoch": 2.610969168708184,
      "grad_norm": 3.09375,
      "learning_rate": 7.204347972823871e-06,
      "loss": 0.7876,
      "step": 744980
    },
    {
      "epoch": 2.6110042162150795,
      "grad_norm": 2.484375,
      "learning_rate": 7.20369894416017e-06,
      "loss": 0.7186,
      "step": 744990
    },
    {
      "epoch": 2.6110392637219753,
      "grad_norm": 3.3125,
      "learning_rate": 7.203049915496468e-06,
      "loss": 0.8457,
      "step": 745000
    },
    {
      "epoch": 2.6110392637219753,
      "eval_loss": 0.7550796270370483,
      "eval_runtime": 553.7461,
      "eval_samples_per_second": 687.022,
      "eval_steps_per_second": 57.252,
      "step": 745000
    },
    {
      "epoch": 2.6110743112288706,
      "grad_norm": 3.09375,
      "learning_rate": 7.202400886832766e-06,
      "loss": 0.8106,
      "step": 745010
    },
    {
      "epoch": 2.6111093587357663,
      "grad_norm": 2.890625,
      "learning_rate": 7.201751858169064e-06,
      "loss": 0.7897,
      "step": 745020
    },
    {
      "epoch": 2.611144406242662,
      "grad_norm": 3.03125,
      "learning_rate": 7.201102829505363e-06,
      "loss": 0.84,
      "step": 745030
    },
    {
      "epoch": 2.6111794537495574,
      "grad_norm": 3.078125,
      "learning_rate": 7.200453800841661e-06,
      "loss": 0.7237,
      "step": 745040
    },
    {
      "epoch": 2.611214501256453,
      "grad_norm": 2.453125,
      "learning_rate": 7.199804772177959e-06,
      "loss": 0.814,
      "step": 745050
    },
    {
      "epoch": 2.6112495487633485,
      "grad_norm": 2.921875,
      "learning_rate": 7.199155743514258e-06,
      "loss": 0.8219,
      "step": 745060
    },
    {
      "epoch": 2.6112845962702442,
      "grad_norm": 2.953125,
      "learning_rate": 7.198506714850554e-06,
      "loss": 0.821,
      "step": 745070
    },
    {
      "epoch": 2.61131964377714,
      "grad_norm": 2.96875,
      "learning_rate": 7.197857686186853e-06,
      "loss": 0.8379,
      "step": 745080
    },
    {
      "epoch": 2.6113546912840357,
      "grad_norm": 2.484375,
      "learning_rate": 7.197208657523151e-06,
      "loss": 0.7755,
      "step": 745090
    },
    {
      "epoch": 2.611389738790931,
      "grad_norm": 2.78125,
      "learning_rate": 7.196559628859449e-06,
      "loss": 0.7736,
      "step": 745100
    },
    {
      "epoch": 2.611424786297827,
      "grad_norm": 2.640625,
      "learning_rate": 7.195910600195747e-06,
      "loss": 0.7325,
      "step": 745110
    },
    {
      "epoch": 2.611459833804722,
      "grad_norm": 3.265625,
      "learning_rate": 7.195261571532046e-06,
      "loss": 0.7924,
      "step": 745120
    },
    {
      "epoch": 2.611494881311618,
      "grad_norm": 3.03125,
      "learning_rate": 7.194612542868344e-06,
      "loss": 0.7622,
      "step": 745130
    },
    {
      "epoch": 2.6115299288185136,
      "grad_norm": 2.703125,
      "learning_rate": 7.193963514204642e-06,
      "loss": 0.8208,
      "step": 745140
    },
    {
      "epoch": 2.611564976325409,
      "grad_norm": 2.859375,
      "learning_rate": 7.193314485540941e-06,
      "loss": 0.7079,
      "step": 745150
    },
    {
      "epoch": 2.6116000238323047,
      "grad_norm": 2.59375,
      "learning_rate": 7.192665456877237e-06,
      "loss": 0.7668,
      "step": 745160
    },
    {
      "epoch": 2.6116350713392,
      "grad_norm": 2.59375,
      "learning_rate": 7.192016428213535e-06,
      "loss": 0.8111,
      "step": 745170
    },
    {
      "epoch": 2.611670118846096,
      "grad_norm": 2.484375,
      "learning_rate": 7.191367399549834e-06,
      "loss": 0.8405,
      "step": 745180
    },
    {
      "epoch": 2.6117051663529915,
      "grad_norm": 2.953125,
      "learning_rate": 7.190718370886132e-06,
      "loss": 0.8069,
      "step": 745190
    },
    {
      "epoch": 2.6117402138598873,
      "grad_norm": 2.71875,
      "learning_rate": 7.19006934222243e-06,
      "loss": 0.707,
      "step": 745200
    },
    {
      "epoch": 2.6117752613667826,
      "grad_norm": 2.5,
      "learning_rate": 7.189420313558729e-06,
      "loss": 0.7774,
      "step": 745210
    },
    {
      "epoch": 2.6118103088736784,
      "grad_norm": 3.484375,
      "learning_rate": 7.188771284895027e-06,
      "loss": 0.8411,
      "step": 745220
    },
    {
      "epoch": 2.6118453563805737,
      "grad_norm": 2.84375,
      "learning_rate": 7.188122256231325e-06,
      "loss": 0.7318,
      "step": 745230
    },
    {
      "epoch": 2.6118804038874694,
      "grad_norm": 3.015625,
      "learning_rate": 7.187473227567623e-06,
      "loss": 0.802,
      "step": 745240
    },
    {
      "epoch": 2.611915451394365,
      "grad_norm": 2.671875,
      "learning_rate": 7.18682419890392e-06,
      "loss": 0.8274,
      "step": 745250
    },
    {
      "epoch": 2.6119504989012605,
      "grad_norm": 3.328125,
      "learning_rate": 7.186175170240218e-06,
      "loss": 0.7716,
      "step": 745260
    },
    {
      "epoch": 2.6119855464081563,
      "grad_norm": 3.03125,
      "learning_rate": 7.185526141576517e-06,
      "loss": 0.7813,
      "step": 745270
    },
    {
      "epoch": 2.6120205939150516,
      "grad_norm": 2.921875,
      "learning_rate": 7.184877112912815e-06,
      "loss": 0.8732,
      "step": 745280
    },
    {
      "epoch": 2.6120556414219473,
      "grad_norm": 3.640625,
      "learning_rate": 7.184228084249113e-06,
      "loss": 0.8133,
      "step": 745290
    },
    {
      "epoch": 2.612090688928843,
      "grad_norm": 2.90625,
      "learning_rate": 7.183579055585412e-06,
      "loss": 0.806,
      "step": 745300
    },
    {
      "epoch": 2.612125736435739,
      "grad_norm": 2.765625,
      "learning_rate": 7.18293002692171e-06,
      "loss": 0.7725,
      "step": 745310
    },
    {
      "epoch": 2.612160783942634,
      "grad_norm": 2.640625,
      "learning_rate": 7.182280998258008e-06,
      "loss": 0.6969,
      "step": 745320
    },
    {
      "epoch": 2.61219583144953,
      "grad_norm": 2.953125,
      "learning_rate": 7.181631969594306e-06,
      "loss": 0.7664,
      "step": 745330
    },
    {
      "epoch": 2.6122308789564253,
      "grad_norm": 3.09375,
      "learning_rate": 7.1809829409306045e-06,
      "loss": 0.8039,
      "step": 745340
    },
    {
      "epoch": 2.612265926463321,
      "grad_norm": 2.578125,
      "learning_rate": 7.180333912266901e-06,
      "loss": 0.7188,
      "step": 745350
    },
    {
      "epoch": 2.6123009739702168,
      "grad_norm": 3.0,
      "learning_rate": 7.1796848836032e-06,
      "loss": 0.8456,
      "step": 745360
    },
    {
      "epoch": 2.612336021477112,
      "grad_norm": 2.84375,
      "learning_rate": 7.179035854939498e-06,
      "loss": 0.7554,
      "step": 745370
    },
    {
      "epoch": 2.612371068984008,
      "grad_norm": 3.28125,
      "learning_rate": 7.178386826275796e-06,
      "loss": 0.812,
      "step": 745380
    },
    {
      "epoch": 2.612406116490903,
      "grad_norm": 3.515625,
      "learning_rate": 7.1777377976120945e-06,
      "loss": 0.7691,
      "step": 745390
    },
    {
      "epoch": 2.612441163997799,
      "grad_norm": 2.875,
      "learning_rate": 7.1770887689483925e-06,
      "loss": 0.7329,
      "step": 745400
    },
    {
      "epoch": 2.6124762115046947,
      "grad_norm": 3.34375,
      "learning_rate": 7.1764397402846905e-06,
      "loss": 0.7835,
      "step": 745410
    },
    {
      "epoch": 2.6125112590115904,
      "grad_norm": 3.0,
      "learning_rate": 7.1757907116209885e-06,
      "loss": 0.7798,
      "step": 745420
    },
    {
      "epoch": 2.6125463065184857,
      "grad_norm": 3.9375,
      "learning_rate": 7.175141682957287e-06,
      "loss": 0.76,
      "step": 745430
    },
    {
      "epoch": 2.6125813540253815,
      "grad_norm": 3.140625,
      "learning_rate": 7.174492654293584e-06,
      "loss": 0.7973,
      "step": 745440
    },
    {
      "epoch": 2.612616401532277,
      "grad_norm": 2.84375,
      "learning_rate": 7.1738436256298825e-06,
      "loss": 0.8271,
      "step": 745450
    },
    {
      "epoch": 2.6126514490391726,
      "grad_norm": 3.59375,
      "learning_rate": 7.1731945969661805e-06,
      "loss": 0.8808,
      "step": 745460
    },
    {
      "epoch": 2.6126864965460683,
      "grad_norm": 2.53125,
      "learning_rate": 7.1725455683024785e-06,
      "loss": 0.8359,
      "step": 745470
    },
    {
      "epoch": 2.6127215440529636,
      "grad_norm": 2.65625,
      "learning_rate": 7.1718965396387765e-06,
      "loss": 0.7698,
      "step": 745480
    },
    {
      "epoch": 2.6127565915598594,
      "grad_norm": 2.828125,
      "learning_rate": 7.171247510975075e-06,
      "loss": 0.7391,
      "step": 745490
    },
    {
      "epoch": 2.6127916390667547,
      "grad_norm": 3.03125,
      "learning_rate": 7.170598482311373e-06,
      "loss": 0.7871,
      "step": 745500
    },
    {
      "epoch": 2.6128266865736505,
      "grad_norm": 2.796875,
      "learning_rate": 7.169949453647671e-06,
      "loss": 0.8642,
      "step": 745510
    },
    {
      "epoch": 2.6128617340805462,
      "grad_norm": 3.109375,
      "learning_rate": 7.16930042498397e-06,
      "loss": 0.8187,
      "step": 745520
    },
    {
      "epoch": 2.612896781587442,
      "grad_norm": 3.0625,
      "learning_rate": 7.168651396320268e-06,
      "loss": 0.8341,
      "step": 745530
    },
    {
      "epoch": 2.6129318290943373,
      "grad_norm": 2.9375,
      "learning_rate": 7.168002367656565e-06,
      "loss": 0.7407,
      "step": 745540
    },
    {
      "epoch": 2.612966876601233,
      "grad_norm": 2.8125,
      "learning_rate": 7.167353338992863e-06,
      "loss": 0.8782,
      "step": 745550
    },
    {
      "epoch": 2.6130019241081284,
      "grad_norm": 3.0625,
      "learning_rate": 7.166704310329161e-06,
      "loss": 0.8708,
      "step": 745560
    },
    {
      "epoch": 2.613036971615024,
      "grad_norm": 2.953125,
      "learning_rate": 7.166055281665459e-06,
      "loss": 0.7609,
      "step": 745570
    },
    {
      "epoch": 2.61307201912192,
      "grad_norm": 2.71875,
      "learning_rate": 7.165406253001758e-06,
      "loss": 0.7841,
      "step": 745580
    },
    {
      "epoch": 2.613107066628815,
      "grad_norm": 2.625,
      "learning_rate": 7.164757224338056e-06,
      "loss": 0.7608,
      "step": 745590
    },
    {
      "epoch": 2.613142114135711,
      "grad_norm": 2.890625,
      "learning_rate": 7.164108195674354e-06,
      "loss": 0.7405,
      "step": 745600
    },
    {
      "epoch": 2.6131771616426067,
      "grad_norm": 3.140625,
      "learning_rate": 7.163459167010653e-06,
      "loss": 0.8711,
      "step": 745610
    },
    {
      "epoch": 2.613212209149502,
      "grad_norm": 2.703125,
      "learning_rate": 7.162810138346951e-06,
      "loss": 0.7791,
      "step": 745620
    },
    {
      "epoch": 2.613247256656398,
      "grad_norm": 2.828125,
      "learning_rate": 7.162161109683248e-06,
      "loss": 0.7499,
      "step": 745630
    },
    {
      "epoch": 2.6132823041632935,
      "grad_norm": 2.875,
      "learning_rate": 7.161512081019546e-06,
      "loss": 0.8031,
      "step": 745640
    },
    {
      "epoch": 2.613317351670189,
      "grad_norm": 2.953125,
      "learning_rate": 7.160863052355844e-06,
      "loss": 0.8042,
      "step": 745650
    },
    {
      "epoch": 2.6133523991770846,
      "grad_norm": 2.8125,
      "learning_rate": 7.160214023692142e-06,
      "loss": 0.8063,
      "step": 745660
    },
    {
      "epoch": 2.61338744668398,
      "grad_norm": 3.265625,
      "learning_rate": 7.159564995028441e-06,
      "loss": 0.8555,
      "step": 745670
    },
    {
      "epoch": 2.6134224941908757,
      "grad_norm": 2.515625,
      "learning_rate": 7.158915966364739e-06,
      "loss": 0.7622,
      "step": 745680
    },
    {
      "epoch": 2.6134575416977714,
      "grad_norm": 2.859375,
      "learning_rate": 7.158266937701037e-06,
      "loss": 0.7915,
      "step": 745690
    },
    {
      "epoch": 2.6134925892046668,
      "grad_norm": 3.15625,
      "learning_rate": 7.157617909037336e-06,
      "loss": 0.9047,
      "step": 745700
    },
    {
      "epoch": 2.6135276367115625,
      "grad_norm": 2.875,
      "learning_rate": 7.156968880373634e-06,
      "loss": 0.7498,
      "step": 745710
    },
    {
      "epoch": 2.6135626842184583,
      "grad_norm": 2.65625,
      "learning_rate": 7.15631985170993e-06,
      "loss": 0.7946,
      "step": 745720
    },
    {
      "epoch": 2.6135977317253536,
      "grad_norm": 2.671875,
      "learning_rate": 7.155670823046229e-06,
      "loss": 0.7365,
      "step": 745730
    },
    {
      "epoch": 2.6136327792322493,
      "grad_norm": 2.703125,
      "learning_rate": 7.155021794382527e-06,
      "loss": 0.7898,
      "step": 745740
    },
    {
      "epoch": 2.613667826739145,
      "grad_norm": 3.296875,
      "learning_rate": 7.154372765718825e-06,
      "loss": 0.7533,
      "step": 745750
    },
    {
      "epoch": 2.6137028742460404,
      "grad_norm": 2.6875,
      "learning_rate": 7.153723737055124e-06,
      "loss": 0.7903,
      "step": 745760
    },
    {
      "epoch": 2.613737921752936,
      "grad_norm": 3.03125,
      "learning_rate": 7.153074708391422e-06,
      "loss": 0.8159,
      "step": 745770
    },
    {
      "epoch": 2.6137729692598315,
      "grad_norm": 2.90625,
      "learning_rate": 7.15242567972772e-06,
      "loss": 0.879,
      "step": 745780
    },
    {
      "epoch": 2.6138080167667272,
      "grad_norm": 2.6875,
      "learning_rate": 7.151776651064018e-06,
      "loss": 0.7773,
      "step": 745790
    },
    {
      "epoch": 2.613843064273623,
      "grad_norm": 2.703125,
      "learning_rate": 7.151127622400317e-06,
      "loss": 0.7955,
      "step": 745800
    },
    {
      "epoch": 2.6138781117805188,
      "grad_norm": 3.296875,
      "learning_rate": 7.150478593736615e-06,
      "loss": 0.8307,
      "step": 745810
    },
    {
      "epoch": 2.613913159287414,
      "grad_norm": 3.25,
      "learning_rate": 7.149829565072912e-06,
      "loss": 0.7654,
      "step": 745820
    },
    {
      "epoch": 2.61394820679431,
      "grad_norm": 3.0625,
      "learning_rate": 7.14918053640921e-06,
      "loss": 0.9081,
      "step": 745830
    },
    {
      "epoch": 2.613983254301205,
      "grad_norm": 2.421875,
      "learning_rate": 7.148531507745508e-06,
      "loss": 0.7896,
      "step": 745840
    },
    {
      "epoch": 2.614018301808101,
      "grad_norm": 2.359375,
      "learning_rate": 7.147882479081807e-06,
      "loss": 0.6922,
      "step": 745850
    },
    {
      "epoch": 2.6140533493149967,
      "grad_norm": 3.046875,
      "learning_rate": 7.147233450418105e-06,
      "loss": 0.8171,
      "step": 745860
    },
    {
      "epoch": 2.614088396821892,
      "grad_norm": 3.0,
      "learning_rate": 7.146584421754403e-06,
      "loss": 0.7808,
      "step": 745870
    },
    {
      "epoch": 2.6141234443287877,
      "grad_norm": 2.640625,
      "learning_rate": 7.145935393090701e-06,
      "loss": 0.7759,
      "step": 745880
    },
    {
      "epoch": 2.614158491835683,
      "grad_norm": 3.28125,
      "learning_rate": 7.1452863644269996e-06,
      "loss": 0.7784,
      "step": 745890
    },
    {
      "epoch": 2.614193539342579,
      "grad_norm": 2.9375,
      "learning_rate": 7.1446373357632976e-06,
      "loss": 0.7696,
      "step": 745900
    },
    {
      "epoch": 2.6142285868494746,
      "grad_norm": 3.0,
      "learning_rate": 7.143988307099595e-06,
      "loss": 0.84,
      "step": 745910
    },
    {
      "epoch": 2.6142636343563703,
      "grad_norm": 2.8125,
      "learning_rate": 7.143339278435893e-06,
      "loss": 0.766,
      "step": 745920
    },
    {
      "epoch": 2.6142986818632656,
      "grad_norm": 3.265625,
      "learning_rate": 7.142690249772191e-06,
      "loss": 0.8755,
      "step": 745930
    },
    {
      "epoch": 2.6143337293701614,
      "grad_norm": 2.9375,
      "learning_rate": 7.1420412211084896e-06,
      "loss": 0.7826,
      "step": 745940
    },
    {
      "epoch": 2.6143687768770567,
      "grad_norm": 2.875,
      "learning_rate": 7.1413921924447876e-06,
      "loss": 0.7992,
      "step": 745950
    },
    {
      "epoch": 2.6144038243839525,
      "grad_norm": 2.734375,
      "learning_rate": 7.1407431637810856e-06,
      "loss": 0.749,
      "step": 745960
    },
    {
      "epoch": 2.6144388718908482,
      "grad_norm": 3.3125,
      "learning_rate": 7.1400941351173836e-06,
      "loss": 0.7949,
      "step": 745970
    },
    {
      "epoch": 2.6144739193977435,
      "grad_norm": 3.140625,
      "learning_rate": 7.139445106453682e-06,
      "loss": 0.7787,
      "step": 745980
    },
    {
      "epoch": 2.6145089669046393,
      "grad_norm": 3.4375,
      "learning_rate": 7.13879607778998e-06,
      "loss": 0.8328,
      "step": 745990
    },
    {
      "epoch": 2.6145440144115346,
      "grad_norm": 2.90625,
      "learning_rate": 7.138147049126278e-06,
      "loss": 0.8642,
      "step": 746000
    },
    {
      "epoch": 2.6145790619184304,
      "grad_norm": 3.0,
      "learning_rate": 7.1374980204625756e-06,
      "loss": 0.8262,
      "step": 746010
    },
    {
      "epoch": 2.614614109425326,
      "grad_norm": 3.1875,
      "learning_rate": 7.1368489917988736e-06,
      "loss": 0.9454,
      "step": 746020
    },
    {
      "epoch": 2.614649156932222,
      "grad_norm": 2.546875,
      "learning_rate": 7.1361999631351716e-06,
      "loss": 0.8149,
      "step": 746030
    },
    {
      "epoch": 2.614684204439117,
      "grad_norm": 2.640625,
      "learning_rate": 7.13555093447147e-06,
      "loss": 0.7592,
      "step": 746040
    },
    {
      "epoch": 2.614719251946013,
      "grad_norm": 2.984375,
      "learning_rate": 7.134901905807768e-06,
      "loss": 0.7247,
      "step": 746050
    },
    {
      "epoch": 2.6147542994529083,
      "grad_norm": 3.515625,
      "learning_rate": 7.134252877144066e-06,
      "loss": 0.7808,
      "step": 746060
    },
    {
      "epoch": 2.614789346959804,
      "grad_norm": 3.015625,
      "learning_rate": 7.133603848480365e-06,
      "loss": 0.9172,
      "step": 746070
    },
    {
      "epoch": 2.6148243944667,
      "grad_norm": 2.671875,
      "learning_rate": 7.132954819816663e-06,
      "loss": 0.7635,
      "step": 746080
    },
    {
      "epoch": 2.614859441973595,
      "grad_norm": 2.59375,
      "learning_rate": 7.132305791152961e-06,
      "loss": 0.8001,
      "step": 746090
    },
    {
      "epoch": 2.614894489480491,
      "grad_norm": 2.796875,
      "learning_rate": 7.131656762489258e-06,
      "loss": 0.793,
      "step": 746100
    },
    {
      "epoch": 2.614929536987386,
      "grad_norm": 2.828125,
      "learning_rate": 7.131007733825556e-06,
      "loss": 0.7492,
      "step": 746110
    },
    {
      "epoch": 2.614964584494282,
      "grad_norm": 3.28125,
      "learning_rate": 7.130358705161854e-06,
      "loss": 0.7911,
      "step": 746120
    },
    {
      "epoch": 2.6149996320011777,
      "grad_norm": 2.8125,
      "learning_rate": 7.129709676498153e-06,
      "loss": 0.7489,
      "step": 746130
    },
    {
      "epoch": 2.6150346795080734,
      "grad_norm": 3.296875,
      "learning_rate": 7.129060647834451e-06,
      "loss": 0.8368,
      "step": 746140
    },
    {
      "epoch": 2.6150697270149688,
      "grad_norm": 2.703125,
      "learning_rate": 7.128411619170749e-06,
      "loss": 0.8611,
      "step": 746150
    },
    {
      "epoch": 2.6151047745218645,
      "grad_norm": 2.953125,
      "learning_rate": 7.127762590507048e-06,
      "loss": 0.8015,
      "step": 746160
    },
    {
      "epoch": 2.61513982202876,
      "grad_norm": 3.046875,
      "learning_rate": 7.127113561843346e-06,
      "loss": 0.7804,
      "step": 746170
    },
    {
      "epoch": 2.6151748695356556,
      "grad_norm": 2.546875,
      "learning_rate": 7.126464533179644e-06,
      "loss": 0.821,
      "step": 746180
    },
    {
      "epoch": 2.6152099170425513,
      "grad_norm": 2.96875,
      "learning_rate": 7.125815504515941e-06,
      "loss": 0.6737,
      "step": 746190
    },
    {
      "epoch": 2.6152449645494467,
      "grad_norm": 3.0,
      "learning_rate": 7.125166475852239e-06,
      "loss": 0.7429,
      "step": 746200
    },
    {
      "epoch": 2.6152800120563424,
      "grad_norm": 2.421875,
      "learning_rate": 7.124517447188537e-06,
      "loss": 0.7781,
      "step": 746210
    },
    {
      "epoch": 2.6153150595632377,
      "grad_norm": 2.578125,
      "learning_rate": 7.123868418524836e-06,
      "loss": 0.7677,
      "step": 746220
    },
    {
      "epoch": 2.6153501070701335,
      "grad_norm": 2.859375,
      "learning_rate": 7.123219389861134e-06,
      "loss": 0.7744,
      "step": 746230
    },
    {
      "epoch": 2.6153851545770292,
      "grad_norm": 3.03125,
      "learning_rate": 7.122570361197432e-06,
      "loss": 0.7262,
      "step": 746240
    },
    {
      "epoch": 2.615420202083925,
      "grad_norm": 2.296875,
      "learning_rate": 7.121921332533731e-06,
      "loss": 0.7126,
      "step": 746250
    },
    {
      "epoch": 2.6154552495908203,
      "grad_norm": 2.546875,
      "learning_rate": 7.121272303870029e-06,
      "loss": 0.8112,
      "step": 746260
    },
    {
      "epoch": 2.615490297097716,
      "grad_norm": 3.015625,
      "learning_rate": 7.120623275206327e-06,
      "loss": 0.8382,
      "step": 746270
    },
    {
      "epoch": 2.6155253446046114,
      "grad_norm": 3.203125,
      "learning_rate": 7.119974246542625e-06,
      "loss": 0.7423,
      "step": 746280
    },
    {
      "epoch": 2.615560392111507,
      "grad_norm": 3.15625,
      "learning_rate": 7.119325217878922e-06,
      "loss": 0.7886,
      "step": 746290
    },
    {
      "epoch": 2.615595439618403,
      "grad_norm": 2.640625,
      "learning_rate": 7.11867618921522e-06,
      "loss": 0.8469,
      "step": 746300
    },
    {
      "epoch": 2.615630487125298,
      "grad_norm": 3.140625,
      "learning_rate": 7.118027160551519e-06,
      "loss": 0.7805,
      "step": 746310
    },
    {
      "epoch": 2.615665534632194,
      "grad_norm": 2.671875,
      "learning_rate": 7.117378131887817e-06,
      "loss": 0.7839,
      "step": 746320
    },
    {
      "epoch": 2.6157005821390893,
      "grad_norm": 2.96875,
      "learning_rate": 7.116729103224115e-06,
      "loss": 0.7833,
      "step": 746330
    },
    {
      "epoch": 2.615735629645985,
      "grad_norm": 2.453125,
      "learning_rate": 7.116080074560413e-06,
      "loss": 0.8147,
      "step": 746340
    },
    {
      "epoch": 2.615770677152881,
      "grad_norm": 2.5,
      "learning_rate": 7.115431045896712e-06,
      "loss": 0.8099,
      "step": 746350
    },
    {
      "epoch": 2.6158057246597766,
      "grad_norm": 2.9375,
      "learning_rate": 7.11478201723301e-06,
      "loss": 0.829,
      "step": 746360
    },
    {
      "epoch": 2.615840772166672,
      "grad_norm": 2.625,
      "learning_rate": 7.114132988569308e-06,
      "loss": 0.8052,
      "step": 746370
    },
    {
      "epoch": 2.6158758196735676,
      "grad_norm": 3.109375,
      "learning_rate": 7.113483959905605e-06,
      "loss": 0.7621,
      "step": 746380
    },
    {
      "epoch": 2.615910867180463,
      "grad_norm": 3.09375,
      "learning_rate": 7.112834931241903e-06,
      "loss": 0.8368,
      "step": 746390
    },
    {
      "epoch": 2.6159459146873587,
      "grad_norm": 2.84375,
      "learning_rate": 7.112185902578202e-06,
      "loss": 0.861,
      "step": 746400
    },
    {
      "epoch": 2.6159809621942545,
      "grad_norm": 2.84375,
      "learning_rate": 7.1115368739145e-06,
      "loss": 0.8256,
      "step": 746410
    },
    {
      "epoch": 2.61601600970115,
      "grad_norm": 3.03125,
      "learning_rate": 7.110887845250798e-06,
      "loss": 0.8213,
      "step": 746420
    },
    {
      "epoch": 2.6160510572080455,
      "grad_norm": 2.96875,
      "learning_rate": 7.110238816587096e-06,
      "loss": 0.8563,
      "step": 746430
    },
    {
      "epoch": 2.616086104714941,
      "grad_norm": 2.546875,
      "learning_rate": 7.109589787923395e-06,
      "loss": 0.759,
      "step": 746440
    },
    {
      "epoch": 2.6161211522218366,
      "grad_norm": 2.28125,
      "learning_rate": 7.108940759259693e-06,
      "loss": 0.7633,
      "step": 746450
    },
    {
      "epoch": 2.6161561997287324,
      "grad_norm": 3.515625,
      "learning_rate": 7.108291730595991e-06,
      "loss": 0.7999,
      "step": 746460
    },
    {
      "epoch": 2.616191247235628,
      "grad_norm": 2.625,
      "learning_rate": 7.1076427019322895e-06,
      "loss": 0.751,
      "step": 746470
    },
    {
      "epoch": 2.6162262947425234,
      "grad_norm": 3.28125,
      "learning_rate": 7.106993673268586e-06,
      "loss": 0.8226,
      "step": 746480
    },
    {
      "epoch": 2.616261342249419,
      "grad_norm": 2.4375,
      "learning_rate": 7.106344644604885e-06,
      "loss": 0.7836,
      "step": 746490
    },
    {
      "epoch": 2.6162963897563145,
      "grad_norm": 2.921875,
      "learning_rate": 7.105695615941183e-06,
      "loss": 0.8426,
      "step": 746500
    },
    {
      "epoch": 2.6163314372632103,
      "grad_norm": 3.625,
      "learning_rate": 7.105046587277481e-06,
      "loss": 0.8181,
      "step": 746510
    },
    {
      "epoch": 2.616366484770106,
      "grad_norm": 3.078125,
      "learning_rate": 7.104397558613779e-06,
      "loss": 0.7843,
      "step": 746520
    },
    {
      "epoch": 2.6164015322770013,
      "grad_norm": 2.765625,
      "learning_rate": 7.1037485299500775e-06,
      "loss": 0.8456,
      "step": 746530
    },
    {
      "epoch": 2.616436579783897,
      "grad_norm": 2.515625,
      "learning_rate": 7.1030995012863755e-06,
      "loss": 0.8194,
      "step": 746540
    },
    {
      "epoch": 2.6164716272907924,
      "grad_norm": 3.421875,
      "learning_rate": 7.1024504726226735e-06,
      "loss": 0.8362,
      "step": 746550
    },
    {
      "epoch": 2.616506674797688,
      "grad_norm": 2.875,
      "learning_rate": 7.101801443958972e-06,
      "loss": 0.7925,
      "step": 746560
    },
    {
      "epoch": 2.616541722304584,
      "grad_norm": 3.1875,
      "learning_rate": 7.101152415295269e-06,
      "loss": 0.7835,
      "step": 746570
    },
    {
      "epoch": 2.6165767698114797,
      "grad_norm": 2.953125,
      "learning_rate": 7.100503386631567e-06,
      "loss": 0.7458,
      "step": 746580
    },
    {
      "epoch": 2.616611817318375,
      "grad_norm": 3.078125,
      "learning_rate": 7.0998543579678655e-06,
      "loss": 0.7943,
      "step": 746590
    },
    {
      "epoch": 2.6166468648252708,
      "grad_norm": 3.25,
      "learning_rate": 7.0992053293041635e-06,
      "loss": 0.7481,
      "step": 746600
    },
    {
      "epoch": 2.616681912332166,
      "grad_norm": 3.25,
      "learning_rate": 7.0985563006404615e-06,
      "loss": 0.8251,
      "step": 746610
    },
    {
      "epoch": 2.616716959839062,
      "grad_norm": 3.140625,
      "learning_rate": 7.09790727197676e-06,
      "loss": 0.9047,
      "step": 746620
    },
    {
      "epoch": 2.6167520073459576,
      "grad_norm": 2.953125,
      "learning_rate": 7.097258243313058e-06,
      "loss": 0.7266,
      "step": 746630
    },
    {
      "epoch": 2.616787054852853,
      "grad_norm": 2.96875,
      "learning_rate": 7.096609214649356e-06,
      "loss": 0.8245,
      "step": 746640
    },
    {
      "epoch": 2.6168221023597487,
      "grad_norm": 3.1875,
      "learning_rate": 7.095960185985654e-06,
      "loss": 0.8538,
      "step": 746650
    },
    {
      "epoch": 2.616857149866644,
      "grad_norm": 2.78125,
      "learning_rate": 7.0953111573219515e-06,
      "loss": 0.745,
      "step": 746660
    },
    {
      "epoch": 2.6168921973735397,
      "grad_norm": 2.640625,
      "learning_rate": 7.0946621286582495e-06,
      "loss": 0.7415,
      "step": 746670
    },
    {
      "epoch": 2.6169272448804355,
      "grad_norm": 2.953125,
      "learning_rate": 7.094013099994548e-06,
      "loss": 0.7777,
      "step": 746680
    },
    {
      "epoch": 2.6169622923873312,
      "grad_norm": 2.875,
      "learning_rate": 7.093364071330846e-06,
      "loss": 0.7835,
      "step": 746690
    },
    {
      "epoch": 2.6169973398942266,
      "grad_norm": 3.21875,
      "learning_rate": 7.092715042667144e-06,
      "loss": 0.7866,
      "step": 746700
    },
    {
      "epoch": 2.6170323874011223,
      "grad_norm": 3.15625,
      "learning_rate": 7.092066014003443e-06,
      "loss": 0.7517,
      "step": 746710
    },
    {
      "epoch": 2.6170674349080176,
      "grad_norm": 2.921875,
      "learning_rate": 7.091416985339741e-06,
      "loss": 0.8722,
      "step": 746720
    },
    {
      "epoch": 2.6171024824149134,
      "grad_norm": 3.328125,
      "learning_rate": 7.090767956676039e-06,
      "loss": 0.7778,
      "step": 746730
    },
    {
      "epoch": 2.617137529921809,
      "grad_norm": 2.671875,
      "learning_rate": 7.090118928012337e-06,
      "loss": 0.7943,
      "step": 746740
    },
    {
      "epoch": 2.6171725774287045,
      "grad_norm": 2.90625,
      "learning_rate": 7.089469899348636e-06,
      "loss": 0.8102,
      "step": 746750
    },
    {
      "epoch": 2.6172076249356,
      "grad_norm": 3.171875,
      "learning_rate": 7.088820870684932e-06,
      "loss": 0.8683,
      "step": 746760
    },
    {
      "epoch": 2.6172426724424955,
      "grad_norm": 3.0,
      "learning_rate": 7.088171842021231e-06,
      "loss": 0.8087,
      "step": 746770
    },
    {
      "epoch": 2.6172777199493913,
      "grad_norm": 2.65625,
      "learning_rate": 7.087522813357529e-06,
      "loss": 0.8528,
      "step": 746780
    },
    {
      "epoch": 2.617312767456287,
      "grad_norm": 3.0625,
      "learning_rate": 7.086873784693827e-06,
      "loss": 0.8066,
      "step": 746790
    },
    {
      "epoch": 2.617347814963183,
      "grad_norm": 3.09375,
      "learning_rate": 7.086224756030126e-06,
      "loss": 0.837,
      "step": 746800
    },
    {
      "epoch": 2.617382862470078,
      "grad_norm": 2.8125,
      "learning_rate": 7.085575727366424e-06,
      "loss": 0.8234,
      "step": 746810
    },
    {
      "epoch": 2.617417909976974,
      "grad_norm": 2.8125,
      "learning_rate": 7.084926698702722e-06,
      "loss": 0.758,
      "step": 746820
    },
    {
      "epoch": 2.617452957483869,
      "grad_norm": 2.453125,
      "learning_rate": 7.08427767003902e-06,
      "loss": 0.8115,
      "step": 746830
    },
    {
      "epoch": 2.617488004990765,
      "grad_norm": 3.140625,
      "learning_rate": 7.083628641375319e-06,
      "loss": 0.8256,
      "step": 746840
    },
    {
      "epoch": 2.6175230524976607,
      "grad_norm": 3.09375,
      "learning_rate": 7.082979612711615e-06,
      "loss": 0.7886,
      "step": 746850
    },
    {
      "epoch": 2.617558100004556,
      "grad_norm": 3.09375,
      "learning_rate": 7.082330584047914e-06,
      "loss": 0.8323,
      "step": 746860
    },
    {
      "epoch": 2.617593147511452,
      "grad_norm": 3.28125,
      "learning_rate": 7.081681555384212e-06,
      "loss": 0.8615,
      "step": 746870
    },
    {
      "epoch": 2.617628195018347,
      "grad_norm": 3.265625,
      "learning_rate": 7.08103252672051e-06,
      "loss": 0.8976,
      "step": 746880
    },
    {
      "epoch": 2.617663242525243,
      "grad_norm": 2.6875,
      "learning_rate": 7.080383498056808e-06,
      "loss": 0.7174,
      "step": 746890
    },
    {
      "epoch": 2.6176982900321386,
      "grad_norm": 2.875,
      "learning_rate": 7.079734469393107e-06,
      "loss": 0.8425,
      "step": 746900
    },
    {
      "epoch": 2.6177333375390344,
      "grad_norm": 2.890625,
      "learning_rate": 7.079085440729405e-06,
      "loss": 0.8204,
      "step": 746910
    },
    {
      "epoch": 2.6177683850459297,
      "grad_norm": 3.25,
      "learning_rate": 7.078436412065703e-06,
      "loss": 0.9033,
      "step": 746920
    },
    {
      "epoch": 2.6178034325528254,
      "grad_norm": 2.703125,
      "learning_rate": 7.077787383402002e-06,
      "loss": 0.8545,
      "step": 746930
    },
    {
      "epoch": 2.6178384800597208,
      "grad_norm": 2.78125,
      "learning_rate": 7.0771383547383e-06,
      "loss": 0.7686,
      "step": 746940
    },
    {
      "epoch": 2.6178735275666165,
      "grad_norm": 2.734375,
      "learning_rate": 7.076489326074597e-06,
      "loss": 0.7932,
      "step": 746950
    },
    {
      "epoch": 2.6179085750735123,
      "grad_norm": 3.296875,
      "learning_rate": 7.075840297410895e-06,
      "loss": 0.8449,
      "step": 746960
    },
    {
      "epoch": 2.6179436225804076,
      "grad_norm": 3.0625,
      "learning_rate": 7.075191268747193e-06,
      "loss": 0.7881,
      "step": 746970
    },
    {
      "epoch": 2.6179786700873033,
      "grad_norm": 2.890625,
      "learning_rate": 7.074542240083491e-06,
      "loss": 0.8074,
      "step": 746980
    },
    {
      "epoch": 2.618013717594199,
      "grad_norm": 3.1875,
      "learning_rate": 7.07389321141979e-06,
      "loss": 0.8276,
      "step": 746990
    },
    {
      "epoch": 2.6180487651010944,
      "grad_norm": 3.296875,
      "learning_rate": 7.073244182756088e-06,
      "loss": 1.0185,
      "step": 747000
    },
    {
      "epoch": 2.61808381260799,
      "grad_norm": 2.9375,
      "learning_rate": 7.072595154092386e-06,
      "loss": 0.7483,
      "step": 747010
    },
    {
      "epoch": 2.618118860114886,
      "grad_norm": 2.484375,
      "learning_rate": 7.0719461254286846e-06,
      "loss": 0.8049,
      "step": 747020
    },
    {
      "epoch": 2.6181539076217812,
      "grad_norm": 2.609375,
      "learning_rate": 7.0712970967649826e-06,
      "loss": 0.7285,
      "step": 747030
    },
    {
      "epoch": 2.618188955128677,
      "grad_norm": 2.890625,
      "learning_rate": 7.07064806810128e-06,
      "loss": 0.8125,
      "step": 747040
    },
    {
      "epoch": 2.6182240026355723,
      "grad_norm": 3.078125,
      "learning_rate": 7.069999039437578e-06,
      "loss": 0.828,
      "step": 747050
    },
    {
      "epoch": 2.618259050142468,
      "grad_norm": 3.328125,
      "learning_rate": 7.069350010773876e-06,
      "loss": 0.8094,
      "step": 747060
    },
    {
      "epoch": 2.618294097649364,
      "grad_norm": 3.234375,
      "learning_rate": 7.068700982110174e-06,
      "loss": 0.832,
      "step": 747070
    },
    {
      "epoch": 2.6183291451562596,
      "grad_norm": 2.609375,
      "learning_rate": 7.0680519534464726e-06,
      "loss": 0.7968,
      "step": 747080
    },
    {
      "epoch": 2.618364192663155,
      "grad_norm": 3.3125,
      "learning_rate": 7.0674029247827706e-06,
      "loss": 0.8624,
      "step": 747090
    },
    {
      "epoch": 2.6183992401700507,
      "grad_norm": 3.140625,
      "learning_rate": 7.0667538961190686e-06,
      "loss": 0.7718,
      "step": 747100
    },
    {
      "epoch": 2.618434287676946,
      "grad_norm": 2.90625,
      "learning_rate": 7.066104867455367e-06,
      "loss": 0.7991,
      "step": 747110
    },
    {
      "epoch": 2.6184693351838417,
      "grad_norm": 3.171875,
      "learning_rate": 7.065455838791665e-06,
      "loss": 0.8026,
      "step": 747120
    },
    {
      "epoch": 2.6185043826907375,
      "grad_norm": 2.90625,
      "learning_rate": 7.064806810127962e-06,
      "loss": 0.8199,
      "step": 747130
    },
    {
      "epoch": 2.618539430197633,
      "grad_norm": 2.9375,
      "learning_rate": 7.0641577814642606e-06,
      "loss": 0.8977,
      "step": 747140
    },
    {
      "epoch": 2.6185744777045286,
      "grad_norm": 2.84375,
      "learning_rate": 7.0635087528005586e-06,
      "loss": 0.8316,
      "step": 747150
    },
    {
      "epoch": 2.618609525211424,
      "grad_norm": 3.046875,
      "learning_rate": 7.0628597241368566e-06,
      "loss": 0.7696,
      "step": 747160
    },
    {
      "epoch": 2.6186445727183196,
      "grad_norm": 2.609375,
      "learning_rate": 7.062210695473155e-06,
      "loss": 0.8296,
      "step": 747170
    },
    {
      "epoch": 2.6186796202252154,
      "grad_norm": 2.75,
      "learning_rate": 7.061561666809453e-06,
      "loss": 0.7631,
      "step": 747180
    },
    {
      "epoch": 2.618714667732111,
      "grad_norm": 2.890625,
      "learning_rate": 7.060912638145751e-06,
      "loss": 0.7612,
      "step": 747190
    },
    {
      "epoch": 2.6187497152390065,
      "grad_norm": 2.859375,
      "learning_rate": 7.06026360948205e-06,
      "loss": 0.7813,
      "step": 747200
    },
    {
      "epoch": 2.618784762745902,
      "grad_norm": 2.6875,
      "learning_rate": 7.059614580818348e-06,
      "loss": 0.7655,
      "step": 747210
    },
    {
      "epoch": 2.6188198102527975,
      "grad_norm": 2.890625,
      "learning_rate": 7.058965552154646e-06,
      "loss": 0.8187,
      "step": 747220
    },
    {
      "epoch": 2.6188548577596933,
      "grad_norm": 3.421875,
      "learning_rate": 7.058316523490943e-06,
      "loss": 0.8113,
      "step": 747230
    },
    {
      "epoch": 2.618889905266589,
      "grad_norm": 3.140625,
      "learning_rate": 7.057667494827241e-06,
      "loss": 0.8015,
      "step": 747240
    },
    {
      "epoch": 2.6189249527734844,
      "grad_norm": 2.3125,
      "learning_rate": 7.057018466163539e-06,
      "loss": 0.7523,
      "step": 747250
    },
    {
      "epoch": 2.61896000028038,
      "grad_norm": 2.765625,
      "learning_rate": 7.056369437499838e-06,
      "loss": 0.8433,
      "step": 747260
    },
    {
      "epoch": 2.6189950477872754,
      "grad_norm": 2.90625,
      "learning_rate": 7.055720408836136e-06,
      "loss": 0.7705,
      "step": 747270
    },
    {
      "epoch": 2.619030095294171,
      "grad_norm": 3.34375,
      "learning_rate": 7.055071380172434e-06,
      "loss": 0.7407,
      "step": 747280
    },
    {
      "epoch": 2.619065142801067,
      "grad_norm": 2.65625,
      "learning_rate": 7.054422351508732e-06,
      "loss": 0.8403,
      "step": 747290
    },
    {
      "epoch": 2.6191001903079627,
      "grad_norm": 3.296875,
      "learning_rate": 7.053773322845031e-06,
      "loss": 0.7947,
      "step": 747300
    },
    {
      "epoch": 2.619135237814858,
      "grad_norm": 2.921875,
      "learning_rate": 7.053124294181329e-06,
      "loss": 0.8717,
      "step": 747310
    },
    {
      "epoch": 2.6191702853217538,
      "grad_norm": 2.96875,
      "learning_rate": 7.052475265517626e-06,
      "loss": 0.8728,
      "step": 747320
    },
    {
      "epoch": 2.619205332828649,
      "grad_norm": 2.875,
      "learning_rate": 7.051826236853924e-06,
      "loss": 0.8484,
      "step": 747330
    },
    {
      "epoch": 2.619240380335545,
      "grad_norm": 3.21875,
      "learning_rate": 7.051177208190222e-06,
      "loss": 0.8448,
      "step": 747340
    },
    {
      "epoch": 2.6192754278424406,
      "grad_norm": 3.3125,
      "learning_rate": 7.050528179526521e-06,
      "loss": 0.8596,
      "step": 747350
    },
    {
      "epoch": 2.619310475349336,
      "grad_norm": 3.046875,
      "learning_rate": 7.049879150862819e-06,
      "loss": 0.8137,
      "step": 747360
    },
    {
      "epoch": 2.6193455228562317,
      "grad_norm": 3.25,
      "learning_rate": 7.049230122199117e-06,
      "loss": 0.807,
      "step": 747370
    },
    {
      "epoch": 2.619380570363127,
      "grad_norm": 2.671875,
      "learning_rate": 7.048581093535415e-06,
      "loss": 0.7765,
      "step": 747380
    },
    {
      "epoch": 2.6194156178700227,
      "grad_norm": 2.6875,
      "learning_rate": 7.047932064871714e-06,
      "loss": 0.8785,
      "step": 747390
    },
    {
      "epoch": 2.6194506653769185,
      "grad_norm": 3.03125,
      "learning_rate": 7.047283036208012e-06,
      "loss": 0.8045,
      "step": 747400
    },
    {
      "epoch": 2.6194857128838143,
      "grad_norm": 3.59375,
      "learning_rate": 7.04663400754431e-06,
      "loss": 0.8402,
      "step": 747410
    },
    {
      "epoch": 2.6195207603907096,
      "grad_norm": 2.8125,
      "learning_rate": 7.045984978880607e-06,
      "loss": 0.8226,
      "step": 747420
    },
    {
      "epoch": 2.6195558078976053,
      "grad_norm": 2.578125,
      "learning_rate": 7.045335950216905e-06,
      "loss": 0.7596,
      "step": 747430
    },
    {
      "epoch": 2.6195908554045007,
      "grad_norm": 3.109375,
      "learning_rate": 7.044686921553204e-06,
      "loss": 0.8714,
      "step": 747440
    },
    {
      "epoch": 2.6196259029113964,
      "grad_norm": 2.390625,
      "learning_rate": 7.044037892889502e-06,
      "loss": 0.8607,
      "step": 747450
    },
    {
      "epoch": 2.619660950418292,
      "grad_norm": 2.890625,
      "learning_rate": 7.0433888642258e-06,
      "loss": 0.8045,
      "step": 747460
    },
    {
      "epoch": 2.6196959979251875,
      "grad_norm": 3.015625,
      "learning_rate": 7.042739835562098e-06,
      "loss": 0.8372,
      "step": 747470
    },
    {
      "epoch": 2.6197310454320832,
      "grad_norm": 3.234375,
      "learning_rate": 7.042090806898397e-06,
      "loss": 0.8545,
      "step": 747480
    },
    {
      "epoch": 2.6197660929389786,
      "grad_norm": 2.453125,
      "learning_rate": 7.041441778234695e-06,
      "loss": 0.7445,
      "step": 747490
    },
    {
      "epoch": 2.6198011404458743,
      "grad_norm": 3.078125,
      "learning_rate": 7.040792749570993e-06,
      "loss": 0.7625,
      "step": 747500
    },
    {
      "epoch": 2.61983618795277,
      "grad_norm": 3.15625,
      "learning_rate": 7.04014372090729e-06,
      "loss": 0.7817,
      "step": 747510
    },
    {
      "epoch": 2.619871235459666,
      "grad_norm": 2.578125,
      "learning_rate": 7.039494692243588e-06,
      "loss": 0.7485,
      "step": 747520
    },
    {
      "epoch": 2.619906282966561,
      "grad_norm": 3.03125,
      "learning_rate": 7.038845663579886e-06,
      "loss": 0.7933,
      "step": 747530
    },
    {
      "epoch": 2.619941330473457,
      "grad_norm": 3.046875,
      "learning_rate": 7.038196634916185e-06,
      "loss": 0.8236,
      "step": 747540
    },
    {
      "epoch": 2.619976377980352,
      "grad_norm": 2.5,
      "learning_rate": 7.037547606252483e-06,
      "loss": 0.8131,
      "step": 747550
    },
    {
      "epoch": 2.620011425487248,
      "grad_norm": 2.65625,
      "learning_rate": 7.036898577588781e-06,
      "loss": 0.7441,
      "step": 747560
    },
    {
      "epoch": 2.6200464729941437,
      "grad_norm": 2.734375,
      "learning_rate": 7.03624954892508e-06,
      "loss": 0.7663,
      "step": 747570
    },
    {
      "epoch": 2.620081520501039,
      "grad_norm": 2.90625,
      "learning_rate": 7.035600520261378e-06,
      "loss": 0.8588,
      "step": 747580
    },
    {
      "epoch": 2.620116568007935,
      "grad_norm": 2.984375,
      "learning_rate": 7.034951491597676e-06,
      "loss": 0.8653,
      "step": 747590
    },
    {
      "epoch": 2.62015161551483,
      "grad_norm": 2.875,
      "learning_rate": 7.034302462933974e-06,
      "loss": 0.8254,
      "step": 747600
    },
    {
      "epoch": 2.620186663021726,
      "grad_norm": 3.171875,
      "learning_rate": 7.033653434270271e-06,
      "loss": 0.8302,
      "step": 747610
    },
    {
      "epoch": 2.6202217105286216,
      "grad_norm": 2.84375,
      "learning_rate": 7.033004405606569e-06,
      "loss": 0.8126,
      "step": 747620
    },
    {
      "epoch": 2.6202567580355174,
      "grad_norm": 3.109375,
      "learning_rate": 7.032355376942868e-06,
      "loss": 0.8669,
      "step": 747630
    },
    {
      "epoch": 2.6202918055424127,
      "grad_norm": 3.375,
      "learning_rate": 7.031706348279166e-06,
      "loss": 0.8427,
      "step": 747640
    },
    {
      "epoch": 2.6203268530493085,
      "grad_norm": 2.875,
      "learning_rate": 7.031057319615464e-06,
      "loss": 0.8277,
      "step": 747650
    },
    {
      "epoch": 2.6203619005562038,
      "grad_norm": 2.71875,
      "learning_rate": 7.0304082909517625e-06,
      "loss": 0.756,
      "step": 747660
    },
    {
      "epoch": 2.6203969480630995,
      "grad_norm": 2.859375,
      "learning_rate": 7.0297592622880605e-06,
      "loss": 0.7666,
      "step": 747670
    },
    {
      "epoch": 2.6204319955699953,
      "grad_norm": 2.90625,
      "learning_rate": 7.0291102336243585e-06,
      "loss": 0.8387,
      "step": 747680
    },
    {
      "epoch": 2.6204670430768906,
      "grad_norm": 2.90625,
      "learning_rate": 7.0284612049606565e-06,
      "loss": 0.8032,
      "step": 747690
    },
    {
      "epoch": 2.6205020905837864,
      "grad_norm": 3.234375,
      "learning_rate": 7.027812176296954e-06,
      "loss": 0.8393,
      "step": 747700
    },
    {
      "epoch": 2.6205371380906817,
      "grad_norm": 3.328125,
      "learning_rate": 7.027163147633252e-06,
      "loss": 0.7535,
      "step": 747710
    },
    {
      "epoch": 2.6205721855975774,
      "grad_norm": 3.046875,
      "learning_rate": 7.0265141189695505e-06,
      "loss": 0.7423,
      "step": 747720
    },
    {
      "epoch": 2.620607233104473,
      "grad_norm": 2.921875,
      "learning_rate": 7.0258650903058485e-06,
      "loss": 0.7484,
      "step": 747730
    },
    {
      "epoch": 2.620642280611369,
      "grad_norm": 3.0,
      "learning_rate": 7.0252160616421465e-06,
      "loss": 0.8006,
      "step": 747740
    },
    {
      "epoch": 2.6206773281182643,
      "grad_norm": 3.3125,
      "learning_rate": 7.024567032978445e-06,
      "loss": 0.8313,
      "step": 747750
    },
    {
      "epoch": 2.62071237562516,
      "grad_norm": 2.6875,
      "learning_rate": 7.023918004314743e-06,
      "loss": 0.7621,
      "step": 747760
    },
    {
      "epoch": 2.6207474231320553,
      "grad_norm": 2.859375,
      "learning_rate": 7.023268975651041e-06,
      "loss": 0.7572,
      "step": 747770
    },
    {
      "epoch": 2.620782470638951,
      "grad_norm": 3.296875,
      "learning_rate": 7.022619946987339e-06,
      "loss": 0.8071,
      "step": 747780
    },
    {
      "epoch": 2.620817518145847,
      "grad_norm": 3.125,
      "learning_rate": 7.0219709183236365e-06,
      "loss": 0.8187,
      "step": 747790
    },
    {
      "epoch": 2.620852565652742,
      "grad_norm": 3.0,
      "learning_rate": 7.0213218896599345e-06,
      "loss": 0.7024,
      "step": 747800
    },
    {
      "epoch": 2.620887613159638,
      "grad_norm": 8.875,
      "learning_rate": 7.020672860996233e-06,
      "loss": 0.7816,
      "step": 747810
    },
    {
      "epoch": 2.6209226606665332,
      "grad_norm": 3.265625,
      "learning_rate": 7.020023832332531e-06,
      "loss": 0.7378,
      "step": 747820
    },
    {
      "epoch": 2.620957708173429,
      "grad_norm": 2.6875,
      "learning_rate": 7.019374803668829e-06,
      "loss": 0.7223,
      "step": 747830
    },
    {
      "epoch": 2.6209927556803247,
      "grad_norm": 2.65625,
      "learning_rate": 7.018725775005127e-06,
      "loss": 0.8006,
      "step": 747840
    },
    {
      "epoch": 2.6210278031872205,
      "grad_norm": 2.84375,
      "learning_rate": 7.018076746341426e-06,
      "loss": 0.7632,
      "step": 747850
    },
    {
      "epoch": 2.621062850694116,
      "grad_norm": 2.71875,
      "learning_rate": 7.017427717677724e-06,
      "loss": 0.8652,
      "step": 747860
    },
    {
      "epoch": 2.6210978982010116,
      "grad_norm": 2.71875,
      "learning_rate": 7.016778689014022e-06,
      "loss": 0.8179,
      "step": 747870
    },
    {
      "epoch": 2.621132945707907,
      "grad_norm": 2.9375,
      "learning_rate": 7.016129660350321e-06,
      "loss": 0.7971,
      "step": 747880
    },
    {
      "epoch": 2.6211679932148026,
      "grad_norm": 3.140625,
      "learning_rate": 7.015480631686617e-06,
      "loss": 0.8988,
      "step": 747890
    },
    {
      "epoch": 2.6212030407216984,
      "grad_norm": 2.71875,
      "learning_rate": 7.014831603022916e-06,
      "loss": 0.8794,
      "step": 747900
    },
    {
      "epoch": 2.6212380882285937,
      "grad_norm": 2.78125,
      "learning_rate": 7.014182574359214e-06,
      "loss": 0.7838,
      "step": 747910
    },
    {
      "epoch": 2.6212731357354895,
      "grad_norm": 2.953125,
      "learning_rate": 7.013533545695512e-06,
      "loss": 0.9039,
      "step": 747920
    },
    {
      "epoch": 2.621308183242385,
      "grad_norm": 2.78125,
      "learning_rate": 7.01288451703181e-06,
      "loss": 0.8237,
      "step": 747930
    },
    {
      "epoch": 2.6213432307492806,
      "grad_norm": 2.640625,
      "learning_rate": 7.012235488368109e-06,
      "loss": 0.784,
      "step": 747940
    },
    {
      "epoch": 2.6213782782561763,
      "grad_norm": 2.78125,
      "learning_rate": 7.011586459704407e-06,
      "loss": 0.7494,
      "step": 747950
    },
    {
      "epoch": 2.621413325763072,
      "grad_norm": 3.015625,
      "learning_rate": 7.010937431040705e-06,
      "loss": 0.8424,
      "step": 747960
    },
    {
      "epoch": 2.6214483732699674,
      "grad_norm": 3.15625,
      "learning_rate": 7.010288402377004e-06,
      "loss": 0.8008,
      "step": 747970
    },
    {
      "epoch": 2.621483420776863,
      "grad_norm": 2.90625,
      "learning_rate": 7.0096393737133e-06,
      "loss": 0.8207,
      "step": 747980
    },
    {
      "epoch": 2.6215184682837585,
      "grad_norm": 2.984375,
      "learning_rate": 7.008990345049599e-06,
      "loss": 0.8312,
      "step": 747990
    },
    {
      "epoch": 2.621553515790654,
      "grad_norm": 3.21875,
      "learning_rate": 7.008341316385897e-06,
      "loss": 0.8388,
      "step": 748000
    },
    {
      "epoch": 2.62158856329755,
      "grad_norm": 2.6875,
      "learning_rate": 7.007692287722195e-06,
      "loss": 0.7891,
      "step": 748010
    },
    {
      "epoch": 2.6216236108044453,
      "grad_norm": 2.953125,
      "learning_rate": 7.007043259058493e-06,
      "loss": 0.7637,
      "step": 748020
    },
    {
      "epoch": 2.621658658311341,
      "grad_norm": 3.265625,
      "learning_rate": 7.006394230394792e-06,
      "loss": 0.9351,
      "step": 748030
    },
    {
      "epoch": 2.6216937058182364,
      "grad_norm": 2.65625,
      "learning_rate": 7.00574520173109e-06,
      "loss": 0.8171,
      "step": 748040
    },
    {
      "epoch": 2.621728753325132,
      "grad_norm": 2.921875,
      "learning_rate": 7.005096173067388e-06,
      "loss": 0.8776,
      "step": 748050
    },
    {
      "epoch": 2.621763800832028,
      "grad_norm": 3.0625,
      "learning_rate": 7.004447144403687e-06,
      "loss": 0.8277,
      "step": 748060
    },
    {
      "epoch": 2.6217988483389236,
      "grad_norm": 3.140625,
      "learning_rate": 7.003798115739985e-06,
      "loss": 0.7521,
      "step": 748070
    },
    {
      "epoch": 2.621833895845819,
      "grad_norm": 2.65625,
      "learning_rate": 7.003149087076281e-06,
      "loss": 0.8171,
      "step": 748080
    },
    {
      "epoch": 2.6218689433527147,
      "grad_norm": 3.3125,
      "learning_rate": 7.00250005841258e-06,
      "loss": 0.8388,
      "step": 748090
    },
    {
      "epoch": 2.62190399085961,
      "grad_norm": 2.96875,
      "learning_rate": 7.001851029748878e-06,
      "loss": 0.8799,
      "step": 748100
    },
    {
      "epoch": 2.6219390383665058,
      "grad_norm": 3.375,
      "learning_rate": 7.001202001085176e-06,
      "loss": 0.8221,
      "step": 748110
    },
    {
      "epoch": 2.6219740858734015,
      "grad_norm": 3.125,
      "learning_rate": 7.000552972421475e-06,
      "loss": 0.7909,
      "step": 748120
    },
    {
      "epoch": 2.622009133380297,
      "grad_norm": 2.78125,
      "learning_rate": 6.999903943757773e-06,
      "loss": 0.7626,
      "step": 748130
    },
    {
      "epoch": 2.6220441808871926,
      "grad_norm": 2.671875,
      "learning_rate": 6.999254915094071e-06,
      "loss": 0.7488,
      "step": 748140
    },
    {
      "epoch": 2.622079228394088,
      "grad_norm": 2.59375,
      "learning_rate": 6.998605886430369e-06,
      "loss": 0.8428,
      "step": 748150
    },
    {
      "epoch": 2.6221142759009837,
      "grad_norm": 3.046875,
      "learning_rate": 6.9979568577666676e-06,
      "loss": 0.7373,
      "step": 748160
    },
    {
      "epoch": 2.6221493234078794,
      "grad_norm": 2.6875,
      "learning_rate": 6.997307829102964e-06,
      "loss": 0.7823,
      "step": 748170
    },
    {
      "epoch": 2.622184370914775,
      "grad_norm": 2.5625,
      "learning_rate": 6.996658800439263e-06,
      "loss": 0.8042,
      "step": 748180
    },
    {
      "epoch": 2.6222194184216705,
      "grad_norm": 2.46875,
      "learning_rate": 6.996009771775561e-06,
      "loss": 0.7521,
      "step": 748190
    },
    {
      "epoch": 2.6222544659285663,
      "grad_norm": 3.0625,
      "learning_rate": 6.995360743111859e-06,
      "loss": 0.8179,
      "step": 748200
    },
    {
      "epoch": 2.6222895134354616,
      "grad_norm": 3.046875,
      "learning_rate": 6.9947117144481576e-06,
      "loss": 0.8652,
      "step": 748210
    },
    {
      "epoch": 2.6223245609423573,
      "grad_norm": 3.203125,
      "learning_rate": 6.9940626857844556e-06,
      "loss": 0.779,
      "step": 748220
    },
    {
      "epoch": 2.622359608449253,
      "grad_norm": 2.796875,
      "learning_rate": 6.9934136571207536e-06,
      "loss": 0.9331,
      "step": 748230
    },
    {
      "epoch": 2.6223946559561484,
      "grad_norm": 2.984375,
      "learning_rate": 6.9927646284570516e-06,
      "loss": 0.7649,
      "step": 748240
    },
    {
      "epoch": 2.622429703463044,
      "grad_norm": 2.4375,
      "learning_rate": 6.99211559979335e-06,
      "loss": 0.757,
      "step": 748250
    },
    {
      "epoch": 2.62246475096994,
      "grad_norm": 2.78125,
      "learning_rate": 6.991466571129647e-06,
      "loss": 0.8012,
      "step": 748260
    },
    {
      "epoch": 2.6224997984768352,
      "grad_norm": 2.890625,
      "learning_rate": 6.9908175424659456e-06,
      "loss": 0.73,
      "step": 748270
    },
    {
      "epoch": 2.622534845983731,
      "grad_norm": 2.953125,
      "learning_rate": 6.9901685138022436e-06,
      "loss": 0.7941,
      "step": 748280
    },
    {
      "epoch": 2.6225698934906267,
      "grad_norm": 2.90625,
      "learning_rate": 6.9895194851385416e-06,
      "loss": 0.8357,
      "step": 748290
    },
    {
      "epoch": 2.622604940997522,
      "grad_norm": 3.03125,
      "learning_rate": 6.98887045647484e-06,
      "loss": 0.8421,
      "step": 748300
    },
    {
      "epoch": 2.622639988504418,
      "grad_norm": 2.4375,
      "learning_rate": 6.988221427811138e-06,
      "loss": 0.8654,
      "step": 748310
    },
    {
      "epoch": 2.622675036011313,
      "grad_norm": 2.90625,
      "learning_rate": 6.987572399147436e-06,
      "loss": 0.8227,
      "step": 748320
    },
    {
      "epoch": 2.622710083518209,
      "grad_norm": 2.921875,
      "learning_rate": 6.986923370483734e-06,
      "loss": 0.7974,
      "step": 748330
    },
    {
      "epoch": 2.6227451310251046,
      "grad_norm": 2.671875,
      "learning_rate": 6.986274341820033e-06,
      "loss": 0.8082,
      "step": 748340
    },
    {
      "epoch": 2.622780178532,
      "grad_norm": 2.640625,
      "learning_rate": 6.985625313156331e-06,
      "loss": 0.8148,
      "step": 748350
    },
    {
      "epoch": 2.6228152260388957,
      "grad_norm": 2.796875,
      "learning_rate": 6.984976284492628e-06,
      "loss": 0.8597,
      "step": 748360
    },
    {
      "epoch": 2.6228502735457915,
      "grad_norm": 3.015625,
      "learning_rate": 6.984327255828926e-06,
      "loss": 0.7974,
      "step": 748370
    },
    {
      "epoch": 2.622885321052687,
      "grad_norm": 2.75,
      "learning_rate": 6.983678227165224e-06,
      "loss": 0.8511,
      "step": 748380
    },
    {
      "epoch": 2.6229203685595825,
      "grad_norm": 3.265625,
      "learning_rate": 6.983029198501522e-06,
      "loss": 0.8051,
      "step": 748390
    },
    {
      "epoch": 2.6229554160664783,
      "grad_norm": 3.34375,
      "learning_rate": 6.982380169837821e-06,
      "loss": 0.8469,
      "step": 748400
    },
    {
      "epoch": 2.6229904635733736,
      "grad_norm": 2.90625,
      "learning_rate": 6.981731141174119e-06,
      "loss": 0.7926,
      "step": 748410
    },
    {
      "epoch": 2.6230255110802694,
      "grad_norm": 3.21875,
      "learning_rate": 6.981082112510417e-06,
      "loss": 0.8166,
      "step": 748420
    },
    {
      "epoch": 2.6230605585871647,
      "grad_norm": 2.9375,
      "learning_rate": 6.980433083846716e-06,
      "loss": 0.7997,
      "step": 748430
    },
    {
      "epoch": 2.6230956060940605,
      "grad_norm": 2.765625,
      "learning_rate": 6.979784055183014e-06,
      "loss": 0.7289,
      "step": 748440
    },
    {
      "epoch": 2.623130653600956,
      "grad_norm": 2.921875,
      "learning_rate": 6.979135026519311e-06,
      "loss": 0.806,
      "step": 748450
    },
    {
      "epoch": 2.623165701107852,
      "grad_norm": 2.875,
      "learning_rate": 6.978485997855609e-06,
      "loss": 0.8521,
      "step": 748460
    },
    {
      "epoch": 2.6232007486147473,
      "grad_norm": 3.21875,
      "learning_rate": 6.977836969191907e-06,
      "loss": 0.8499,
      "step": 748470
    },
    {
      "epoch": 2.623235796121643,
      "grad_norm": 2.8125,
      "learning_rate": 6.977187940528205e-06,
      "loss": 0.795,
      "step": 748480
    },
    {
      "epoch": 2.6232708436285384,
      "grad_norm": 2.8125,
      "learning_rate": 6.976538911864504e-06,
      "loss": 0.8294,
      "step": 748490
    },
    {
      "epoch": 2.623305891135434,
      "grad_norm": 2.609375,
      "learning_rate": 6.975889883200802e-06,
      "loss": 0.7777,
      "step": 748500
    },
    {
      "epoch": 2.62334093864233,
      "grad_norm": 2.59375,
      "learning_rate": 6.9752408545371e-06,
      "loss": 0.7713,
      "step": 748510
    },
    {
      "epoch": 2.623375986149225,
      "grad_norm": 3.09375,
      "learning_rate": 6.974591825873399e-06,
      "loss": 0.8098,
      "step": 748520
    },
    {
      "epoch": 2.623411033656121,
      "grad_norm": 2.765625,
      "learning_rate": 6.973942797209697e-06,
      "loss": 0.737,
      "step": 748530
    },
    {
      "epoch": 2.6234460811630163,
      "grad_norm": 2.90625,
      "learning_rate": 6.973293768545995e-06,
      "loss": 0.803,
      "step": 748540
    },
    {
      "epoch": 2.623481128669912,
      "grad_norm": 2.4375,
      "learning_rate": 6.972644739882292e-06,
      "loss": 0.8594,
      "step": 748550
    },
    {
      "epoch": 2.6235161761768078,
      "grad_norm": 3.359375,
      "learning_rate": 6.97199571121859e-06,
      "loss": 0.7936,
      "step": 748560
    },
    {
      "epoch": 2.6235512236837035,
      "grad_norm": 3.34375,
      "learning_rate": 6.971346682554888e-06,
      "loss": 0.7608,
      "step": 748570
    },
    {
      "epoch": 2.623586271190599,
      "grad_norm": 2.78125,
      "learning_rate": 6.970697653891187e-06,
      "loss": 0.8582,
      "step": 748580
    },
    {
      "epoch": 2.6236213186974946,
      "grad_norm": 3.015625,
      "learning_rate": 6.970048625227485e-06,
      "loss": 0.8179,
      "step": 748590
    },
    {
      "epoch": 2.62365636620439,
      "grad_norm": 3.140625,
      "learning_rate": 6.969399596563783e-06,
      "loss": 0.7739,
      "step": 748600
    },
    {
      "epoch": 2.6236914137112857,
      "grad_norm": 3.171875,
      "learning_rate": 6.968750567900082e-06,
      "loss": 0.8236,
      "step": 748610
    },
    {
      "epoch": 2.6237264612181814,
      "grad_norm": 2.609375,
      "learning_rate": 6.96810153923638e-06,
      "loss": 0.7932,
      "step": 748620
    },
    {
      "epoch": 2.6237615087250767,
      "grad_norm": 2.53125,
      "learning_rate": 6.967452510572678e-06,
      "loss": 0.6838,
      "step": 748630
    },
    {
      "epoch": 2.6237965562319725,
      "grad_norm": 3.09375,
      "learning_rate": 6.966803481908975e-06,
      "loss": 0.8055,
      "step": 748640
    },
    {
      "epoch": 2.623831603738868,
      "grad_norm": 2.828125,
      "learning_rate": 6.966154453245273e-06,
      "loss": 0.8348,
      "step": 748650
    },
    {
      "epoch": 2.6238666512457636,
      "grad_norm": 2.84375,
      "learning_rate": 6.965505424581571e-06,
      "loss": 0.7743,
      "step": 748660
    },
    {
      "epoch": 2.6239016987526593,
      "grad_norm": 2.984375,
      "learning_rate": 6.96485639591787e-06,
      "loss": 0.7615,
      "step": 748670
    },
    {
      "epoch": 2.623936746259555,
      "grad_norm": 2.953125,
      "learning_rate": 6.964207367254168e-06,
      "loss": 0.7731,
      "step": 748680
    },
    {
      "epoch": 2.6239717937664504,
      "grad_norm": 2.8125,
      "learning_rate": 6.963558338590466e-06,
      "loss": 0.8304,
      "step": 748690
    },
    {
      "epoch": 2.624006841273346,
      "grad_norm": 2.40625,
      "learning_rate": 6.962909309926764e-06,
      "loss": 0.7544,
      "step": 748700
    },
    {
      "epoch": 2.6240418887802415,
      "grad_norm": 3.828125,
      "learning_rate": 6.962260281263063e-06,
      "loss": 0.7366,
      "step": 748710
    },
    {
      "epoch": 2.6240769362871372,
      "grad_norm": 3.28125,
      "learning_rate": 6.961611252599361e-06,
      "loss": 0.7552,
      "step": 748720
    },
    {
      "epoch": 2.624111983794033,
      "grad_norm": 2.78125,
      "learning_rate": 6.960962223935658e-06,
      "loss": 0.782,
      "step": 748730
    },
    {
      "epoch": 2.6241470313009283,
      "grad_norm": 3.59375,
      "learning_rate": 6.960313195271956e-06,
      "loss": 0.8402,
      "step": 748740
    },
    {
      "epoch": 2.624182078807824,
      "grad_norm": 3.078125,
      "learning_rate": 6.959664166608254e-06,
      "loss": 0.8283,
      "step": 748750
    },
    {
      "epoch": 2.6242171263147194,
      "grad_norm": 2.90625,
      "learning_rate": 6.959015137944553e-06,
      "loss": 0.783,
      "step": 748760
    },
    {
      "epoch": 2.624252173821615,
      "grad_norm": 3.0625,
      "learning_rate": 6.958366109280851e-06,
      "loss": 0.8381,
      "step": 748770
    },
    {
      "epoch": 2.624287221328511,
      "grad_norm": 2.71875,
      "learning_rate": 6.957717080617149e-06,
      "loss": 0.7571,
      "step": 748780
    },
    {
      "epoch": 2.6243222688354066,
      "grad_norm": 2.984375,
      "learning_rate": 6.957068051953447e-06,
      "loss": 0.8101,
      "step": 748790
    },
    {
      "epoch": 2.624357316342302,
      "grad_norm": 2.625,
      "learning_rate": 6.9564190232897455e-06,
      "loss": 0.6835,
      "step": 748800
    },
    {
      "epoch": 2.6243923638491977,
      "grad_norm": 3.078125,
      "learning_rate": 6.9557699946260435e-06,
      "loss": 0.8023,
      "step": 748810
    },
    {
      "epoch": 2.624427411356093,
      "grad_norm": 2.84375,
      "learning_rate": 6.9551209659623415e-06,
      "loss": 0.7998,
      "step": 748820
    },
    {
      "epoch": 2.624462458862989,
      "grad_norm": 2.609375,
      "learning_rate": 6.954471937298639e-06,
      "loss": 0.7692,
      "step": 748830
    },
    {
      "epoch": 2.6244975063698845,
      "grad_norm": 2.921875,
      "learning_rate": 6.953822908634937e-06,
      "loss": 0.8088,
      "step": 748840
    },
    {
      "epoch": 2.62453255387678,
      "grad_norm": 3.046875,
      "learning_rate": 6.9531738799712355e-06,
      "loss": 0.8224,
      "step": 748850
    },
    {
      "epoch": 2.6245676013836756,
      "grad_norm": 3.03125,
      "learning_rate": 6.9525248513075335e-06,
      "loss": 0.8721,
      "step": 748860
    },
    {
      "epoch": 2.624602648890571,
      "grad_norm": 2.671875,
      "learning_rate": 6.9518758226438315e-06,
      "loss": 0.8359,
      "step": 748870
    },
    {
      "epoch": 2.6246376963974667,
      "grad_norm": 3.359375,
      "learning_rate": 6.9512267939801295e-06,
      "loss": 0.7913,
      "step": 748880
    },
    {
      "epoch": 2.6246727439043624,
      "grad_norm": 2.734375,
      "learning_rate": 6.950577765316428e-06,
      "loss": 0.7522,
      "step": 748890
    },
    {
      "epoch": 2.624707791411258,
      "grad_norm": 3.015625,
      "learning_rate": 6.949928736652726e-06,
      "loss": 0.833,
      "step": 748900
    },
    {
      "epoch": 2.6247428389181535,
      "grad_norm": 2.5,
      "learning_rate": 6.949279707989024e-06,
      "loss": 0.8922,
      "step": 748910
    },
    {
      "epoch": 2.6247778864250493,
      "grad_norm": 3.03125,
      "learning_rate": 6.9486306793253215e-06,
      "loss": 0.7724,
      "step": 748920
    },
    {
      "epoch": 2.6248129339319446,
      "grad_norm": 2.9375,
      "learning_rate": 6.9479816506616195e-06,
      "loss": 0.7974,
      "step": 748930
    },
    {
      "epoch": 2.6248479814388403,
      "grad_norm": 2.875,
      "learning_rate": 6.9473326219979175e-06,
      "loss": 0.8145,
      "step": 748940
    },
    {
      "epoch": 2.624883028945736,
      "grad_norm": 2.828125,
      "learning_rate": 6.946683593334216e-06,
      "loss": 0.8183,
      "step": 748950
    },
    {
      "epoch": 2.6249180764526314,
      "grad_norm": 2.8125,
      "learning_rate": 6.946034564670514e-06,
      "loss": 0.7669,
      "step": 748960
    },
    {
      "epoch": 2.624953123959527,
      "grad_norm": 3.265625,
      "learning_rate": 6.945385536006812e-06,
      "loss": 0.8384,
      "step": 748970
    },
    {
      "epoch": 2.6249881714664225,
      "grad_norm": 2.90625,
      "learning_rate": 6.944736507343111e-06,
      "loss": 0.8641,
      "step": 748980
    },
    {
      "epoch": 2.6250232189733183,
      "grad_norm": 3.15625,
      "learning_rate": 6.944087478679409e-06,
      "loss": 0.7465,
      "step": 748990
    },
    {
      "epoch": 2.625058266480214,
      "grad_norm": 2.984375,
      "learning_rate": 6.943438450015707e-06,
      "loss": 0.7971,
      "step": 749000
    },
    {
      "epoch": 2.6250933139871098,
      "grad_norm": 2.640625,
      "learning_rate": 6.942789421352005e-06,
      "loss": 0.8447,
      "step": 749010
    },
    {
      "epoch": 2.625128361494005,
      "grad_norm": 2.609375,
      "learning_rate": 6.942140392688302e-06,
      "loss": 0.7978,
      "step": 749020
    },
    {
      "epoch": 2.625163409000901,
      "grad_norm": 2.625,
      "learning_rate": 6.9414913640246e-06,
      "loss": 0.7327,
      "step": 749030
    },
    {
      "epoch": 2.625198456507796,
      "grad_norm": 3.46875,
      "learning_rate": 6.940842335360899e-06,
      "loss": 0.757,
      "step": 749040
    },
    {
      "epoch": 2.625233504014692,
      "grad_norm": 3.15625,
      "learning_rate": 6.940193306697197e-06,
      "loss": 0.864,
      "step": 749050
    },
    {
      "epoch": 2.6252685515215877,
      "grad_norm": 3.15625,
      "learning_rate": 6.939544278033495e-06,
      "loss": 0.8184,
      "step": 749060
    },
    {
      "epoch": 2.625303599028483,
      "grad_norm": 2.84375,
      "learning_rate": 6.938895249369794e-06,
      "loss": 0.7362,
      "step": 749070
    },
    {
      "epoch": 2.6253386465353787,
      "grad_norm": 3.25,
      "learning_rate": 6.938246220706092e-06,
      "loss": 0.8226,
      "step": 749080
    },
    {
      "epoch": 2.625373694042274,
      "grad_norm": 3.109375,
      "learning_rate": 6.93759719204239e-06,
      "loss": 0.8069,
      "step": 749090
    },
    {
      "epoch": 2.62540874154917,
      "grad_norm": 2.453125,
      "learning_rate": 6.936948163378688e-06,
      "loss": 0.7366,
      "step": 749100
    },
    {
      "epoch": 2.6254437890560656,
      "grad_norm": 3.21875,
      "learning_rate": 6.936299134714985e-06,
      "loss": 0.7798,
      "step": 749110
    },
    {
      "epoch": 2.6254788365629613,
      "grad_norm": 2.6875,
      "learning_rate": 6.935650106051283e-06,
      "loss": 0.8462,
      "step": 749120
    },
    {
      "epoch": 2.6255138840698566,
      "grad_norm": 3.046875,
      "learning_rate": 6.935001077387582e-06,
      "loss": 0.8352,
      "step": 749130
    },
    {
      "epoch": 2.6255489315767524,
      "grad_norm": 2.84375,
      "learning_rate": 6.93435204872388e-06,
      "loss": 0.7816,
      "step": 749140
    },
    {
      "epoch": 2.6255839790836477,
      "grad_norm": 3.125,
      "learning_rate": 6.933703020060178e-06,
      "loss": 0.7611,
      "step": 749150
    },
    {
      "epoch": 2.6256190265905435,
      "grad_norm": 2.9375,
      "learning_rate": 6.933053991396477e-06,
      "loss": 0.8397,
      "step": 749160
    },
    {
      "epoch": 2.6256540740974392,
      "grad_norm": 3.21875,
      "learning_rate": 6.932404962732775e-06,
      "loss": 0.8587,
      "step": 749170
    },
    {
      "epoch": 2.6256891216043345,
      "grad_norm": 3.3125,
      "learning_rate": 6.931755934069073e-06,
      "loss": 0.7416,
      "step": 749180
    },
    {
      "epoch": 2.6257241691112303,
      "grad_norm": 3.15625,
      "learning_rate": 6.931106905405371e-06,
      "loss": 0.7572,
      "step": 749190
    },
    {
      "epoch": 2.6257592166181256,
      "grad_norm": 3.203125,
      "learning_rate": 6.930457876741668e-06,
      "loss": 0.777,
      "step": 749200
    },
    {
      "epoch": 2.6257942641250214,
      "grad_norm": 3.140625,
      "learning_rate": 6.929808848077966e-06,
      "loss": 0.749,
      "step": 749210
    },
    {
      "epoch": 2.625829311631917,
      "grad_norm": 3.4375,
      "learning_rate": 6.929159819414265e-06,
      "loss": 0.8446,
      "step": 749220
    },
    {
      "epoch": 2.625864359138813,
      "grad_norm": 3.15625,
      "learning_rate": 6.928510790750563e-06,
      "loss": 0.7371,
      "step": 749230
    },
    {
      "epoch": 2.625899406645708,
      "grad_norm": 3.015625,
      "learning_rate": 6.927861762086861e-06,
      "loss": 0.8,
      "step": 749240
    },
    {
      "epoch": 2.625934454152604,
      "grad_norm": 3.34375,
      "learning_rate": 6.927212733423159e-06,
      "loss": 0.8063,
      "step": 749250
    },
    {
      "epoch": 2.6259695016594993,
      "grad_norm": 2.8125,
      "learning_rate": 6.926563704759458e-06,
      "loss": 0.7784,
      "step": 749260
    },
    {
      "epoch": 2.626004549166395,
      "grad_norm": 2.6875,
      "learning_rate": 6.925914676095756e-06,
      "loss": 0.7215,
      "step": 749270
    },
    {
      "epoch": 2.626039596673291,
      "grad_norm": 3.3125,
      "learning_rate": 6.925265647432054e-06,
      "loss": 0.7242,
      "step": 749280
    },
    {
      "epoch": 2.626074644180186,
      "grad_norm": 3.03125,
      "learning_rate": 6.9246166187683526e-06,
      "loss": 0.8141,
      "step": 749290
    },
    {
      "epoch": 2.626109691687082,
      "grad_norm": 3.078125,
      "learning_rate": 6.923967590104649e-06,
      "loss": 0.8433,
      "step": 749300
    },
    {
      "epoch": 2.626144739193977,
      "grad_norm": 2.984375,
      "learning_rate": 6.923318561440948e-06,
      "loss": 0.8269,
      "step": 749310
    },
    {
      "epoch": 2.626179786700873,
      "grad_norm": 3.140625,
      "learning_rate": 6.922669532777246e-06,
      "loss": 0.8186,
      "step": 749320
    },
    {
      "epoch": 2.6262148342077687,
      "grad_norm": 2.4375,
      "learning_rate": 6.922020504113544e-06,
      "loss": 0.7819,
      "step": 749330
    },
    {
      "epoch": 2.6262498817146644,
      "grad_norm": 2.6875,
      "learning_rate": 6.921371475449842e-06,
      "loss": 0.7445,
      "step": 749340
    },
    {
      "epoch": 2.6262849292215598,
      "grad_norm": 3.21875,
      "learning_rate": 6.9207224467861406e-06,
      "loss": 0.7244,
      "step": 749350
    },
    {
      "epoch": 2.6263199767284555,
      "grad_norm": 3.140625,
      "learning_rate": 6.9200734181224386e-06,
      "loss": 0.7438,
      "step": 749360
    },
    {
      "epoch": 2.626355024235351,
      "grad_norm": 3.53125,
      "learning_rate": 6.9194243894587366e-06,
      "loss": 0.7718,
      "step": 749370
    },
    {
      "epoch": 2.6263900717422466,
      "grad_norm": 3.40625,
      "learning_rate": 6.918775360795035e-06,
      "loss": 0.8825,
      "step": 749380
    },
    {
      "epoch": 2.6264251192491423,
      "grad_norm": 3.390625,
      "learning_rate": 6.918126332131332e-06,
      "loss": 0.763,
      "step": 749390
    },
    {
      "epoch": 2.6264601667560377,
      "grad_norm": 2.34375,
      "learning_rate": 6.9174773034676306e-06,
      "loss": 0.757,
      "step": 749400
    },
    {
      "epoch": 2.6264952142629334,
      "grad_norm": 2.703125,
      "learning_rate": 6.9168282748039286e-06,
      "loss": 0.7741,
      "step": 749410
    },
    {
      "epoch": 2.6265302617698287,
      "grad_norm": 2.03125,
      "learning_rate": 6.9161792461402266e-06,
      "loss": 0.7163,
      "step": 749420
    },
    {
      "epoch": 2.6265653092767245,
      "grad_norm": 2.59375,
      "learning_rate": 6.9155302174765246e-06,
      "loss": 0.7836,
      "step": 749430
    },
    {
      "epoch": 2.6266003567836202,
      "grad_norm": 2.875,
      "learning_rate": 6.914881188812823e-06,
      "loss": 0.8492,
      "step": 749440
    },
    {
      "epoch": 2.626635404290516,
      "grad_norm": 2.546875,
      "learning_rate": 6.914232160149121e-06,
      "loss": 0.7361,
      "step": 749450
    },
    {
      "epoch": 2.6266704517974113,
      "grad_norm": 2.96875,
      "learning_rate": 6.913583131485419e-06,
      "loss": 0.8936,
      "step": 749460
    },
    {
      "epoch": 2.626705499304307,
      "grad_norm": 2.921875,
      "learning_rate": 6.912934102821718e-06,
      "loss": 0.8071,
      "step": 749470
    },
    {
      "epoch": 2.6267405468112024,
      "grad_norm": 2.625,
      "learning_rate": 6.912285074158016e-06,
      "loss": 0.8387,
      "step": 749480
    },
    {
      "epoch": 2.626775594318098,
      "grad_norm": 3.078125,
      "learning_rate": 6.9116360454943126e-06,
      "loss": 0.8303,
      "step": 749490
    },
    {
      "epoch": 2.626810641824994,
      "grad_norm": 3.1875,
      "learning_rate": 6.910987016830611e-06,
      "loss": 0.7881,
      "step": 749500
    },
    {
      "epoch": 2.626845689331889,
      "grad_norm": 3.421875,
      "learning_rate": 6.910337988166909e-06,
      "loss": 0.8314,
      "step": 749510
    },
    {
      "epoch": 2.626880736838785,
      "grad_norm": 2.765625,
      "learning_rate": 6.909688959503207e-06,
      "loss": 0.7441,
      "step": 749520
    },
    {
      "epoch": 2.6269157843456803,
      "grad_norm": 3.15625,
      "learning_rate": 6.909039930839506e-06,
      "loss": 0.7669,
      "step": 749530
    },
    {
      "epoch": 2.626950831852576,
      "grad_norm": 3.0625,
      "learning_rate": 6.908390902175804e-06,
      "loss": 0.8249,
      "step": 749540
    },
    {
      "epoch": 2.626985879359472,
      "grad_norm": 3.265625,
      "learning_rate": 6.907741873512102e-06,
      "loss": 0.7909,
      "step": 749550
    },
    {
      "epoch": 2.6270209268663676,
      "grad_norm": 2.875,
      "learning_rate": 6.907092844848401e-06,
      "loss": 0.7699,
      "step": 749560
    },
    {
      "epoch": 2.627055974373263,
      "grad_norm": 3.078125,
      "learning_rate": 6.906443816184699e-06,
      "loss": 0.7989,
      "step": 749570
    },
    {
      "epoch": 2.6270910218801586,
      "grad_norm": 3.0625,
      "learning_rate": 6.905794787520995e-06,
      "loss": 0.844,
      "step": 749580
    },
    {
      "epoch": 2.627126069387054,
      "grad_norm": 3.015625,
      "learning_rate": 6.905145758857294e-06,
      "loss": 0.7844,
      "step": 749590
    },
    {
      "epoch": 2.6271611168939497,
      "grad_norm": 3.375,
      "learning_rate": 6.904496730193592e-06,
      "loss": 0.8632,
      "step": 749600
    },
    {
      "epoch": 2.6271961644008455,
      "grad_norm": 3.234375,
      "learning_rate": 6.90384770152989e-06,
      "loss": 0.8464,
      "step": 749610
    },
    {
      "epoch": 2.627231211907741,
      "grad_norm": 2.90625,
      "learning_rate": 6.903198672866189e-06,
      "loss": 0.8077,
      "step": 749620
    },
    {
      "epoch": 2.6272662594146365,
      "grad_norm": 3.109375,
      "learning_rate": 6.902549644202487e-06,
      "loss": 0.8953,
      "step": 749630
    },
    {
      "epoch": 2.6273013069215323,
      "grad_norm": 3.078125,
      "learning_rate": 6.901900615538785e-06,
      "loss": 0.8137,
      "step": 749640
    },
    {
      "epoch": 2.6273363544284276,
      "grad_norm": 2.609375,
      "learning_rate": 6.901251586875083e-06,
      "loss": 0.8448,
      "step": 749650
    },
    {
      "epoch": 2.6273714019353234,
      "grad_norm": 3.046875,
      "learning_rate": 6.900602558211382e-06,
      "loss": 0.7252,
      "step": 749660
    },
    {
      "epoch": 2.627406449442219,
      "grad_norm": 3.0625,
      "learning_rate": 6.899953529547678e-06,
      "loss": 0.8256,
      "step": 749670
    },
    {
      "epoch": 2.6274414969491144,
      "grad_norm": 2.6875,
      "learning_rate": 6.899304500883977e-06,
      "loss": 0.774,
      "step": 749680
    },
    {
      "epoch": 2.62747654445601,
      "grad_norm": 3.453125,
      "learning_rate": 6.898655472220275e-06,
      "loss": 0.8308,
      "step": 749690
    },
    {
      "epoch": 2.6275115919629055,
      "grad_norm": 3.09375,
      "learning_rate": 6.898006443556573e-06,
      "loss": 0.7531,
      "step": 749700
    },
    {
      "epoch": 2.6275466394698013,
      "grad_norm": 2.84375,
      "learning_rate": 6.897357414892872e-06,
      "loss": 0.7823,
      "step": 749710
    },
    {
      "epoch": 2.627581686976697,
      "grad_norm": 2.53125,
      "learning_rate": 6.89670838622917e-06,
      "loss": 0.8295,
      "step": 749720
    },
    {
      "epoch": 2.627616734483593,
      "grad_norm": 2.734375,
      "learning_rate": 6.896059357565468e-06,
      "loss": 0.8113,
      "step": 749730
    },
    {
      "epoch": 2.627651781990488,
      "grad_norm": 2.96875,
      "learning_rate": 6.895410328901766e-06,
      "loss": 0.7903,
      "step": 749740
    },
    {
      "epoch": 2.627686829497384,
      "grad_norm": 3.34375,
      "learning_rate": 6.894761300238065e-06,
      "loss": 0.8517,
      "step": 749750
    },
    {
      "epoch": 2.627721877004279,
      "grad_norm": 3.0625,
      "learning_rate": 6.894112271574363e-06,
      "loss": 0.8309,
      "step": 749760
    },
    {
      "epoch": 2.627756924511175,
      "grad_norm": 3.0,
      "learning_rate": 6.89346324291066e-06,
      "loss": 0.7481,
      "step": 749770
    },
    {
      "epoch": 2.6277919720180707,
      "grad_norm": 2.75,
      "learning_rate": 6.892814214246958e-06,
      "loss": 0.751,
      "step": 749780
    },
    {
      "epoch": 2.627827019524966,
      "grad_norm": 3.0,
      "learning_rate": 6.892165185583256e-06,
      "loss": 0.8631,
      "step": 749790
    },
    {
      "epoch": 2.6278620670318618,
      "grad_norm": 2.859375,
      "learning_rate": 6.891516156919555e-06,
      "loss": 0.8148,
      "step": 749800
    },
    {
      "epoch": 2.627897114538757,
      "grad_norm": 2.8125,
      "learning_rate": 6.890867128255853e-06,
      "loss": 0.7608,
      "step": 749810
    },
    {
      "epoch": 2.627932162045653,
      "grad_norm": 3.0,
      "learning_rate": 6.890218099592151e-06,
      "loss": 0.8457,
      "step": 749820
    },
    {
      "epoch": 2.6279672095525486,
      "grad_norm": 3.25,
      "learning_rate": 6.889569070928449e-06,
      "loss": 0.8154,
      "step": 749830
    },
    {
      "epoch": 2.6280022570594443,
      "grad_norm": 3.359375,
      "learning_rate": 6.888920042264748e-06,
      "loss": 0.8546,
      "step": 749840
    },
    {
      "epoch": 2.6280373045663397,
      "grad_norm": 2.734375,
      "learning_rate": 6.888271013601046e-06,
      "loss": 0.8295,
      "step": 749850
    },
    {
      "epoch": 2.6280723520732354,
      "grad_norm": 3.21875,
      "learning_rate": 6.887621984937343e-06,
      "loss": 0.8357,
      "step": 749860
    },
    {
      "epoch": 2.6281073995801307,
      "grad_norm": 3.25,
      "learning_rate": 6.886972956273641e-06,
      "loss": 0.8002,
      "step": 749870
    },
    {
      "epoch": 2.6281424470870265,
      "grad_norm": 3.03125,
      "learning_rate": 6.886323927609939e-06,
      "loss": 0.7985,
      "step": 749880
    },
    {
      "epoch": 2.6281774945939222,
      "grad_norm": 3.015625,
      "learning_rate": 6.885674898946237e-06,
      "loss": 0.8121,
      "step": 749890
    },
    {
      "epoch": 2.6282125421008176,
      "grad_norm": 3.140625,
      "learning_rate": 6.885025870282536e-06,
      "loss": 0.8759,
      "step": 749900
    },
    {
      "epoch": 2.6282475896077133,
      "grad_norm": 2.515625,
      "learning_rate": 6.884376841618834e-06,
      "loss": 0.7378,
      "step": 749910
    },
    {
      "epoch": 2.6282826371146086,
      "grad_norm": 3.03125,
      "learning_rate": 6.883727812955132e-06,
      "loss": 0.8058,
      "step": 749920
    },
    {
      "epoch": 2.6283176846215044,
      "grad_norm": 2.9375,
      "learning_rate": 6.8830787842914305e-06,
      "loss": 0.7769,
      "step": 749930
    },
    {
      "epoch": 2.6283527321284,
      "grad_norm": 2.453125,
      "learning_rate": 6.8824297556277285e-06,
      "loss": 0.7462,
      "step": 749940
    },
    {
      "epoch": 2.628387779635296,
      "grad_norm": 2.875,
      "learning_rate": 6.8817807269640265e-06,
      "loss": 0.8152,
      "step": 749950
    },
    {
      "epoch": 2.628422827142191,
      "grad_norm": 3.234375,
      "learning_rate": 6.881131698300324e-06,
      "loss": 0.7477,
      "step": 749960
    },
    {
      "epoch": 2.628457874649087,
      "grad_norm": 2.71875,
      "learning_rate": 6.880482669636622e-06,
      "loss": 0.7581,
      "step": 749970
    },
    {
      "epoch": 2.6284929221559823,
      "grad_norm": 3.21875,
      "learning_rate": 6.87983364097292e-06,
      "loss": 0.7944,
      "step": 749980
    },
    {
      "epoch": 2.628527969662878,
      "grad_norm": 3.546875,
      "learning_rate": 6.8791846123092185e-06,
      "loss": 0.7047,
      "step": 749990
    },
    {
      "epoch": 2.628563017169774,
      "grad_norm": 2.90625,
      "learning_rate": 6.8785355836455165e-06,
      "loss": 0.8503,
      "step": 750000
    },
    {
      "epoch": 2.628563017169774,
      "eval_loss": 0.7538542747497559,
      "eval_runtime": 555.5692,
      "eval_samples_per_second": 684.768,
      "eval_steps_per_second": 57.064,
      "step": 750000
    },
    {
      "epoch": 2.628598064676669,
      "grad_norm": 3.453125,
      "learning_rate": 6.8778865549818145e-06,
      "loss": 0.6758,
      "step": 750010
    },
    {
      "epoch": 2.628633112183565,
      "grad_norm": 2.9375,
      "learning_rate": 6.877237526318113e-06,
      "loss": 0.7504,
      "step": 750020
    },
    {
      "epoch": 2.62866815969046,
      "grad_norm": 2.46875,
      "learning_rate": 6.876588497654411e-06,
      "loss": 0.8239,
      "step": 750030
    },
    {
      "epoch": 2.628703207197356,
      "grad_norm": 3.15625,
      "learning_rate": 6.875939468990709e-06,
      "loss": 0.7303,
      "step": 750040
    },
    {
      "epoch": 2.6287382547042517,
      "grad_norm": 3.3125,
      "learning_rate": 6.8752904403270065e-06,
      "loss": 0.8246,
      "step": 750050
    },
    {
      "epoch": 2.6287733022111475,
      "grad_norm": 2.84375,
      "learning_rate": 6.8746414116633045e-06,
      "loss": 0.7822,
      "step": 750060
    },
    {
      "epoch": 2.628808349718043,
      "grad_norm": 2.828125,
      "learning_rate": 6.8739923829996025e-06,
      "loss": 0.8126,
      "step": 750070
    },
    {
      "epoch": 2.6288433972249385,
      "grad_norm": 3.078125,
      "learning_rate": 6.873343354335901e-06,
      "loss": 0.8839,
      "step": 750080
    },
    {
      "epoch": 2.628878444731834,
      "grad_norm": 2.484375,
      "learning_rate": 6.872694325672199e-06,
      "loss": 0.8016,
      "step": 750090
    },
    {
      "epoch": 2.6289134922387296,
      "grad_norm": 2.75,
      "learning_rate": 6.872045297008497e-06,
      "loss": 0.8072,
      "step": 750100
    },
    {
      "epoch": 2.6289485397456254,
      "grad_norm": 3.15625,
      "learning_rate": 6.871396268344796e-06,
      "loss": 0.8355,
      "step": 750110
    },
    {
      "epoch": 2.6289835872525207,
      "grad_norm": 3.109375,
      "learning_rate": 6.870747239681094e-06,
      "loss": 0.8168,
      "step": 750120
    },
    {
      "epoch": 2.6290186347594164,
      "grad_norm": 3.03125,
      "learning_rate": 6.870098211017392e-06,
      "loss": 0.7373,
      "step": 750130
    },
    {
      "epoch": 2.6290536822663118,
      "grad_norm": 2.890625,
      "learning_rate": 6.869449182353689e-06,
      "loss": 0.7375,
      "step": 750140
    },
    {
      "epoch": 2.6290887297732075,
      "grad_norm": 2.4375,
      "learning_rate": 6.868800153689987e-06,
      "loss": 0.7506,
      "step": 750150
    },
    {
      "epoch": 2.6291237772801033,
      "grad_norm": 3.28125,
      "learning_rate": 6.868151125026285e-06,
      "loss": 0.7511,
      "step": 750160
    },
    {
      "epoch": 2.629158824786999,
      "grad_norm": 3.09375,
      "learning_rate": 6.867502096362584e-06,
      "loss": 0.8105,
      "step": 750170
    },
    {
      "epoch": 2.6291938722938943,
      "grad_norm": 3.0625,
      "learning_rate": 6.866853067698882e-06,
      "loss": 0.8085,
      "step": 750180
    },
    {
      "epoch": 2.62922891980079,
      "grad_norm": 3.234375,
      "learning_rate": 6.86620403903518e-06,
      "loss": 0.7863,
      "step": 750190
    },
    {
      "epoch": 2.6292639673076854,
      "grad_norm": 3.140625,
      "learning_rate": 6.865555010371478e-06,
      "loss": 0.8106,
      "step": 750200
    },
    {
      "epoch": 2.629299014814581,
      "grad_norm": 2.65625,
      "learning_rate": 6.864905981707777e-06,
      "loss": 0.8033,
      "step": 750210
    },
    {
      "epoch": 2.629334062321477,
      "grad_norm": 2.625,
      "learning_rate": 6.864256953044075e-06,
      "loss": 0.8381,
      "step": 750220
    },
    {
      "epoch": 2.6293691098283722,
      "grad_norm": 2.734375,
      "learning_rate": 6.863607924380373e-06,
      "loss": 0.7877,
      "step": 750230
    },
    {
      "epoch": 2.629404157335268,
      "grad_norm": 2.71875,
      "learning_rate": 6.86295889571667e-06,
      "loss": 0.8632,
      "step": 750240
    },
    {
      "epoch": 2.6294392048421633,
      "grad_norm": 2.59375,
      "learning_rate": 6.862309867052968e-06,
      "loss": 0.838,
      "step": 750250
    },
    {
      "epoch": 2.629474252349059,
      "grad_norm": 2.9375,
      "learning_rate": 6.861660838389267e-06,
      "loss": 0.7907,
      "step": 750260
    },
    {
      "epoch": 2.629509299855955,
      "grad_norm": 3.046875,
      "learning_rate": 6.861011809725565e-06,
      "loss": 0.858,
      "step": 750270
    },
    {
      "epoch": 2.6295443473628506,
      "grad_norm": 2.859375,
      "learning_rate": 6.860362781061863e-06,
      "loss": 0.83,
      "step": 750280
    },
    {
      "epoch": 2.629579394869746,
      "grad_norm": 3.171875,
      "learning_rate": 6.859713752398161e-06,
      "loss": 0.7552,
      "step": 750290
    },
    {
      "epoch": 2.6296144423766417,
      "grad_norm": 2.75,
      "learning_rate": 6.85906472373446e-06,
      "loss": 0.8167,
      "step": 750300
    },
    {
      "epoch": 2.629649489883537,
      "grad_norm": 3.15625,
      "learning_rate": 6.858415695070758e-06,
      "loss": 0.8149,
      "step": 750310
    },
    {
      "epoch": 2.6296845373904327,
      "grad_norm": 3.0625,
      "learning_rate": 6.857766666407056e-06,
      "loss": 0.7549,
      "step": 750320
    },
    {
      "epoch": 2.6297195848973285,
      "grad_norm": 2.703125,
      "learning_rate": 6.857117637743353e-06,
      "loss": 0.7819,
      "step": 750330
    },
    {
      "epoch": 2.629754632404224,
      "grad_norm": 3.203125,
      "learning_rate": 6.856468609079651e-06,
      "loss": 0.7575,
      "step": 750340
    },
    {
      "epoch": 2.6297896799111196,
      "grad_norm": 2.828125,
      "learning_rate": 6.85581958041595e-06,
      "loss": 0.8126,
      "step": 750350
    },
    {
      "epoch": 2.629824727418015,
      "grad_norm": 2.9375,
      "learning_rate": 6.855170551752248e-06,
      "loss": 0.8221,
      "step": 750360
    },
    {
      "epoch": 2.6298597749249106,
      "grad_norm": 3.109375,
      "learning_rate": 6.854521523088546e-06,
      "loss": 0.8132,
      "step": 750370
    },
    {
      "epoch": 2.6298948224318064,
      "grad_norm": 3.09375,
      "learning_rate": 6.853872494424844e-06,
      "loss": 0.8076,
      "step": 750380
    },
    {
      "epoch": 2.629929869938702,
      "grad_norm": 2.96875,
      "learning_rate": 6.853223465761143e-06,
      "loss": 0.7398,
      "step": 750390
    },
    {
      "epoch": 2.6299649174455975,
      "grad_norm": 3.328125,
      "learning_rate": 6.852574437097441e-06,
      "loss": 0.7984,
      "step": 750400
    },
    {
      "epoch": 2.629999964952493,
      "grad_norm": 3.21875,
      "learning_rate": 6.851925408433739e-06,
      "loss": 0.7972,
      "step": 750410
    },
    {
      "epoch": 2.6300350124593885,
      "grad_norm": 2.859375,
      "learning_rate": 6.8512763797700376e-06,
      "loss": 0.7156,
      "step": 750420
    },
    {
      "epoch": 2.6300700599662843,
      "grad_norm": 2.703125,
      "learning_rate": 6.850627351106334e-06,
      "loss": 0.7517,
      "step": 750430
    },
    {
      "epoch": 2.63010510747318,
      "grad_norm": 2.609375,
      "learning_rate": 6.849978322442632e-06,
      "loss": 0.8565,
      "step": 750440
    },
    {
      "epoch": 2.6301401549800754,
      "grad_norm": 2.515625,
      "learning_rate": 6.849329293778931e-06,
      "loss": 0.8154,
      "step": 750450
    },
    {
      "epoch": 2.630175202486971,
      "grad_norm": 2.78125,
      "learning_rate": 6.848680265115229e-06,
      "loss": 0.8382,
      "step": 750460
    },
    {
      "epoch": 2.6302102499938664,
      "grad_norm": 3.234375,
      "learning_rate": 6.848031236451527e-06,
      "loss": 0.7881,
      "step": 750470
    },
    {
      "epoch": 2.630245297500762,
      "grad_norm": 3.359375,
      "learning_rate": 6.8473822077878256e-06,
      "loss": 0.7947,
      "step": 750480
    },
    {
      "epoch": 2.630280345007658,
      "grad_norm": 3.015625,
      "learning_rate": 6.8467331791241236e-06,
      "loss": 0.7985,
      "step": 750490
    },
    {
      "epoch": 2.6303153925145537,
      "grad_norm": 3.09375,
      "learning_rate": 6.8460841504604216e-06,
      "loss": 0.7989,
      "step": 750500
    },
    {
      "epoch": 2.630350440021449,
      "grad_norm": 2.71875,
      "learning_rate": 6.8454351217967196e-06,
      "loss": 0.809,
      "step": 750510
    },
    {
      "epoch": 2.630385487528345,
      "grad_norm": 2.875,
      "learning_rate": 6.844786093133017e-06,
      "loss": 0.8521,
      "step": 750520
    },
    {
      "epoch": 2.63042053503524,
      "grad_norm": 2.640625,
      "learning_rate": 6.844137064469315e-06,
      "loss": 0.7845,
      "step": 750530
    },
    {
      "epoch": 2.630455582542136,
      "grad_norm": 2.65625,
      "learning_rate": 6.8434880358056136e-06,
      "loss": 0.8309,
      "step": 750540
    },
    {
      "epoch": 2.6304906300490316,
      "grad_norm": 2.109375,
      "learning_rate": 6.8428390071419116e-06,
      "loss": 0.7615,
      "step": 750550
    },
    {
      "epoch": 2.630525677555927,
      "grad_norm": 3.375,
      "learning_rate": 6.8421899784782096e-06,
      "loss": 0.7639,
      "step": 750560
    },
    {
      "epoch": 2.6305607250628227,
      "grad_norm": 2.9375,
      "learning_rate": 6.841540949814508e-06,
      "loss": 0.8856,
      "step": 750570
    },
    {
      "epoch": 2.630595772569718,
      "grad_norm": 2.90625,
      "learning_rate": 6.840891921150806e-06,
      "loss": 0.8021,
      "step": 750580
    },
    {
      "epoch": 2.6306308200766138,
      "grad_norm": 3.15625,
      "learning_rate": 6.840242892487104e-06,
      "loss": 0.6985,
      "step": 750590
    },
    {
      "epoch": 2.6306658675835095,
      "grad_norm": 3.328125,
      "learning_rate": 6.839593863823402e-06,
      "loss": 0.8343,
      "step": 750600
    },
    {
      "epoch": 2.6307009150904053,
      "grad_norm": 2.328125,
      "learning_rate": 6.8389448351596996e-06,
      "loss": 0.7737,
      "step": 750610
    },
    {
      "epoch": 2.6307359625973006,
      "grad_norm": 2.90625,
      "learning_rate": 6.8382958064959976e-06,
      "loss": 0.8864,
      "step": 750620
    },
    {
      "epoch": 2.6307710101041963,
      "grad_norm": 2.40625,
      "learning_rate": 6.837646777832296e-06,
      "loss": 0.7771,
      "step": 750630
    },
    {
      "epoch": 2.6308060576110917,
      "grad_norm": 2.828125,
      "learning_rate": 6.836997749168594e-06,
      "loss": 0.7713,
      "step": 750640
    },
    {
      "epoch": 2.6308411051179874,
      "grad_norm": 2.96875,
      "learning_rate": 6.836348720504892e-06,
      "loss": 0.7847,
      "step": 750650
    },
    {
      "epoch": 2.630876152624883,
      "grad_norm": 2.53125,
      "learning_rate": 6.835699691841191e-06,
      "loss": 0.7866,
      "step": 750660
    },
    {
      "epoch": 2.6309112001317785,
      "grad_norm": 3.359375,
      "learning_rate": 6.835050663177489e-06,
      "loss": 0.8849,
      "step": 750670
    },
    {
      "epoch": 2.6309462476386742,
      "grad_norm": 2.796875,
      "learning_rate": 6.834401634513787e-06,
      "loss": 0.7475,
      "step": 750680
    },
    {
      "epoch": 2.6309812951455696,
      "grad_norm": 2.875,
      "learning_rate": 6.833752605850085e-06,
      "loss": 0.7849,
      "step": 750690
    },
    {
      "epoch": 2.6310163426524653,
      "grad_norm": 2.703125,
      "learning_rate": 6.833103577186384e-06,
      "loss": 0.8009,
      "step": 750700
    },
    {
      "epoch": 2.631051390159361,
      "grad_norm": 3.28125,
      "learning_rate": 6.83245454852268e-06,
      "loss": 0.763,
      "step": 750710
    },
    {
      "epoch": 2.631086437666257,
      "grad_norm": 2.984375,
      "learning_rate": 6.831805519858979e-06,
      "loss": 0.7855,
      "step": 750720
    },
    {
      "epoch": 2.631121485173152,
      "grad_norm": 2.578125,
      "learning_rate": 6.831156491195277e-06,
      "loss": 0.8125,
      "step": 750730
    },
    {
      "epoch": 2.631156532680048,
      "grad_norm": 3.109375,
      "learning_rate": 6.830507462531575e-06,
      "loss": 0.7868,
      "step": 750740
    },
    {
      "epoch": 2.631191580186943,
      "grad_norm": 2.9375,
      "learning_rate": 6.829858433867873e-06,
      "loss": 0.8368,
      "step": 750750
    },
    {
      "epoch": 2.631226627693839,
      "grad_norm": 2.59375,
      "learning_rate": 6.829209405204172e-06,
      "loss": 0.7804,
      "step": 750760
    },
    {
      "epoch": 2.6312616752007347,
      "grad_norm": 3.1875,
      "learning_rate": 6.82856037654047e-06,
      "loss": 0.7849,
      "step": 750770
    },
    {
      "epoch": 2.63129672270763,
      "grad_norm": 2.875,
      "learning_rate": 6.827911347876768e-06,
      "loss": 0.8391,
      "step": 750780
    },
    {
      "epoch": 2.631331770214526,
      "grad_norm": 2.6875,
      "learning_rate": 6.827262319213067e-06,
      "loss": 0.7798,
      "step": 750790
    },
    {
      "epoch": 2.631366817721421,
      "grad_norm": 3.140625,
      "learning_rate": 6.826613290549363e-06,
      "loss": 0.7594,
      "step": 750800
    },
    {
      "epoch": 2.631401865228317,
      "grad_norm": 3.359375,
      "learning_rate": 6.825964261885662e-06,
      "loss": 0.7905,
      "step": 750810
    },
    {
      "epoch": 2.6314369127352126,
      "grad_norm": 2.734375,
      "learning_rate": 6.82531523322196e-06,
      "loss": 0.8365,
      "step": 750820
    },
    {
      "epoch": 2.6314719602421084,
      "grad_norm": 3.265625,
      "learning_rate": 6.824666204558258e-06,
      "loss": 0.9103,
      "step": 750830
    },
    {
      "epoch": 2.6315070077490037,
      "grad_norm": 2.78125,
      "learning_rate": 6.824017175894556e-06,
      "loss": 0.7811,
      "step": 750840
    },
    {
      "epoch": 2.6315420552558995,
      "grad_norm": 2.75,
      "learning_rate": 6.823368147230855e-06,
      "loss": 0.8647,
      "step": 750850
    },
    {
      "epoch": 2.6315771027627948,
      "grad_norm": 3.015625,
      "learning_rate": 6.822719118567153e-06,
      "loss": 0.8098,
      "step": 750860
    },
    {
      "epoch": 2.6316121502696905,
      "grad_norm": 3.234375,
      "learning_rate": 6.822070089903451e-06,
      "loss": 0.7838,
      "step": 750870
    },
    {
      "epoch": 2.6316471977765863,
      "grad_norm": 3.0625,
      "learning_rate": 6.82142106123975e-06,
      "loss": 0.8115,
      "step": 750880
    },
    {
      "epoch": 2.6316822452834816,
      "grad_norm": 2.46875,
      "learning_rate": 6.820772032576048e-06,
      "loss": 0.8188,
      "step": 750890
    },
    {
      "epoch": 2.6317172927903774,
      "grad_norm": 2.59375,
      "learning_rate": 6.820123003912345e-06,
      "loss": 0.7553,
      "step": 750900
    },
    {
      "epoch": 2.631752340297273,
      "grad_norm": 2.828125,
      "learning_rate": 6.819473975248643e-06,
      "loss": 0.8457,
      "step": 750910
    },
    {
      "epoch": 2.6317873878041684,
      "grad_norm": 3.09375,
      "learning_rate": 6.818824946584941e-06,
      "loss": 0.7712,
      "step": 750920
    },
    {
      "epoch": 2.631822435311064,
      "grad_norm": 2.71875,
      "learning_rate": 6.818175917921239e-06,
      "loss": 0.8097,
      "step": 750930
    },
    {
      "epoch": 2.63185748281796,
      "grad_norm": 2.765625,
      "learning_rate": 6.817526889257538e-06,
      "loss": 0.7967,
      "step": 750940
    },
    {
      "epoch": 2.6318925303248553,
      "grad_norm": 3.09375,
      "learning_rate": 6.816877860593836e-06,
      "loss": 0.8084,
      "step": 750950
    },
    {
      "epoch": 2.631927577831751,
      "grad_norm": 2.8125,
      "learning_rate": 6.816228831930134e-06,
      "loss": 0.8542,
      "step": 750960
    },
    {
      "epoch": 2.6319626253386463,
      "grad_norm": 2.734375,
      "learning_rate": 6.815579803266433e-06,
      "loss": 0.7909,
      "step": 750970
    },
    {
      "epoch": 2.631997672845542,
      "grad_norm": 2.890625,
      "learning_rate": 6.814930774602731e-06,
      "loss": 0.7582,
      "step": 750980
    },
    {
      "epoch": 2.632032720352438,
      "grad_norm": 2.875,
      "learning_rate": 6.814281745939027e-06,
      "loss": 0.8036,
      "step": 750990
    },
    {
      "epoch": 2.632067767859333,
      "grad_norm": 2.96875,
      "learning_rate": 6.813632717275326e-06,
      "loss": 0.785,
      "step": 751000
    },
    {
      "epoch": 2.632102815366229,
      "grad_norm": 2.8125,
      "learning_rate": 6.812983688611624e-06,
      "loss": 0.7967,
      "step": 751010
    },
    {
      "epoch": 2.6321378628731247,
      "grad_norm": 2.71875,
      "learning_rate": 6.812334659947922e-06,
      "loss": 0.8229,
      "step": 751020
    },
    {
      "epoch": 2.63217291038002,
      "grad_norm": 2.875,
      "learning_rate": 6.811685631284221e-06,
      "loss": 0.7494,
      "step": 751030
    },
    {
      "epoch": 2.6322079578869158,
      "grad_norm": 3.078125,
      "learning_rate": 6.811036602620519e-06,
      "loss": 0.7836,
      "step": 751040
    },
    {
      "epoch": 2.6322430053938115,
      "grad_norm": 3.0625,
      "learning_rate": 6.810387573956817e-06,
      "loss": 0.753,
      "step": 751050
    },
    {
      "epoch": 2.632278052900707,
      "grad_norm": 2.921875,
      "learning_rate": 6.809738545293115e-06,
      "loss": 0.824,
      "step": 751060
    },
    {
      "epoch": 2.6323131004076026,
      "grad_norm": 2.625,
      "learning_rate": 6.8090895166294135e-06,
      "loss": 0.7285,
      "step": 751070
    },
    {
      "epoch": 2.632348147914498,
      "grad_norm": 4.90625,
      "learning_rate": 6.80844048796571e-06,
      "loss": 0.7893,
      "step": 751080
    },
    {
      "epoch": 2.6323831954213937,
      "grad_norm": 2.96875,
      "learning_rate": 6.807791459302009e-06,
      "loss": 0.7515,
      "step": 751090
    },
    {
      "epoch": 2.6324182429282894,
      "grad_norm": 3.03125,
      "learning_rate": 6.807142430638307e-06,
      "loss": 0.7723,
      "step": 751100
    },
    {
      "epoch": 2.632453290435185,
      "grad_norm": 3.015625,
      "learning_rate": 6.806493401974605e-06,
      "loss": 0.7654,
      "step": 751110
    },
    {
      "epoch": 2.6324883379420805,
      "grad_norm": 2.71875,
      "learning_rate": 6.8058443733109035e-06,
      "loss": 0.8022,
      "step": 751120
    },
    {
      "epoch": 2.6325233854489762,
      "grad_norm": 2.6875,
      "learning_rate": 6.8051953446472015e-06,
      "loss": 0.7861,
      "step": 751130
    },
    {
      "epoch": 2.6325584329558716,
      "grad_norm": 2.71875,
      "learning_rate": 6.8045463159834995e-06,
      "loss": 0.6908,
      "step": 751140
    },
    {
      "epoch": 2.6325934804627673,
      "grad_norm": 2.59375,
      "learning_rate": 6.8038972873197975e-06,
      "loss": 0.8038,
      "step": 751150
    },
    {
      "epoch": 2.632628527969663,
      "grad_norm": 3.0625,
      "learning_rate": 6.803248258656096e-06,
      "loss": 0.8652,
      "step": 751160
    },
    {
      "epoch": 2.6326635754765584,
      "grad_norm": 3.078125,
      "learning_rate": 6.802599229992394e-06,
      "loss": 0.8311,
      "step": 751170
    },
    {
      "epoch": 2.632698622983454,
      "grad_norm": 2.5,
      "learning_rate": 6.8019502013286915e-06,
      "loss": 0.7591,
      "step": 751180
    },
    {
      "epoch": 2.6327336704903495,
      "grad_norm": 2.90625,
      "learning_rate": 6.8013011726649895e-06,
      "loss": 0.7492,
      "step": 751190
    },
    {
      "epoch": 2.632768717997245,
      "grad_norm": 2.890625,
      "learning_rate": 6.8006521440012875e-06,
      "loss": 0.8386,
      "step": 751200
    },
    {
      "epoch": 2.632803765504141,
      "grad_norm": 3.046875,
      "learning_rate": 6.800003115337586e-06,
      "loss": 0.7819,
      "step": 751210
    },
    {
      "epoch": 2.6328388130110367,
      "grad_norm": 2.40625,
      "learning_rate": 6.799354086673884e-06,
      "loss": 0.8482,
      "step": 751220
    },
    {
      "epoch": 2.632873860517932,
      "grad_norm": 3.296875,
      "learning_rate": 6.798705058010182e-06,
      "loss": 0.7684,
      "step": 751230
    },
    {
      "epoch": 2.632908908024828,
      "grad_norm": 3.34375,
      "learning_rate": 6.79805602934648e-06,
      "loss": 0.7605,
      "step": 751240
    },
    {
      "epoch": 2.632943955531723,
      "grad_norm": 2.640625,
      "learning_rate": 6.797407000682779e-06,
      "loss": 0.8787,
      "step": 751250
    },
    {
      "epoch": 2.632979003038619,
      "grad_norm": 2.828125,
      "learning_rate": 6.796757972019077e-06,
      "loss": 0.7673,
      "step": 751260
    },
    {
      "epoch": 2.6330140505455146,
      "grad_norm": 2.90625,
      "learning_rate": 6.796108943355374e-06,
      "loss": 0.7932,
      "step": 751270
    },
    {
      "epoch": 2.63304909805241,
      "grad_norm": 2.90625,
      "learning_rate": 6.795459914691672e-06,
      "loss": 0.7749,
      "step": 751280
    },
    {
      "epoch": 2.6330841455593057,
      "grad_norm": 3.53125,
      "learning_rate": 6.79481088602797e-06,
      "loss": 0.8421,
      "step": 751290
    },
    {
      "epoch": 2.633119193066201,
      "grad_norm": 2.953125,
      "learning_rate": 6.794161857364268e-06,
      "loss": 0.8563,
      "step": 751300
    },
    {
      "epoch": 2.6331542405730968,
      "grad_norm": 2.765625,
      "learning_rate": 6.793512828700567e-06,
      "loss": 0.8093,
      "step": 751310
    },
    {
      "epoch": 2.6331892880799925,
      "grad_norm": 3.234375,
      "learning_rate": 6.792863800036865e-06,
      "loss": 0.8337,
      "step": 751320
    },
    {
      "epoch": 2.6332243355868883,
      "grad_norm": 2.5625,
      "learning_rate": 6.792214771373163e-06,
      "loss": 0.7169,
      "step": 751330
    },
    {
      "epoch": 2.6332593830937836,
      "grad_norm": 2.703125,
      "learning_rate": 6.791565742709462e-06,
      "loss": 0.8145,
      "step": 751340
    },
    {
      "epoch": 2.6332944306006794,
      "grad_norm": 2.890625,
      "learning_rate": 6.79091671404576e-06,
      "loss": 0.8623,
      "step": 751350
    },
    {
      "epoch": 2.6333294781075747,
      "grad_norm": 3.0,
      "learning_rate": 6.790267685382058e-06,
      "loss": 0.6981,
      "step": 751360
    },
    {
      "epoch": 2.6333645256144704,
      "grad_norm": 2.921875,
      "learning_rate": 6.789618656718355e-06,
      "loss": 0.8659,
      "step": 751370
    },
    {
      "epoch": 2.633399573121366,
      "grad_norm": 3.140625,
      "learning_rate": 6.788969628054653e-06,
      "loss": 0.8687,
      "step": 751380
    },
    {
      "epoch": 2.6334346206282615,
      "grad_norm": 2.359375,
      "learning_rate": 6.788320599390951e-06,
      "loss": 0.7098,
      "step": 751390
    },
    {
      "epoch": 2.6334696681351573,
      "grad_norm": 3.125,
      "learning_rate": 6.78767157072725e-06,
      "loss": 0.7634,
      "step": 751400
    },
    {
      "epoch": 2.6335047156420526,
      "grad_norm": 3.046875,
      "learning_rate": 6.787022542063548e-06,
      "loss": 0.8129,
      "step": 751410
    },
    {
      "epoch": 2.6335397631489483,
      "grad_norm": 2.765625,
      "learning_rate": 6.786373513399846e-06,
      "loss": 0.8995,
      "step": 751420
    },
    {
      "epoch": 2.633574810655844,
      "grad_norm": 3.046875,
      "learning_rate": 6.785724484736145e-06,
      "loss": 0.8498,
      "step": 751430
    },
    {
      "epoch": 2.63360985816274,
      "grad_norm": 2.734375,
      "learning_rate": 6.785075456072443e-06,
      "loss": 0.7847,
      "step": 751440
    },
    {
      "epoch": 2.633644905669635,
      "grad_norm": 3.125,
      "learning_rate": 6.784426427408741e-06,
      "loss": 0.8111,
      "step": 751450
    },
    {
      "epoch": 2.633679953176531,
      "grad_norm": 2.78125,
      "learning_rate": 6.783777398745038e-06,
      "loss": 0.799,
      "step": 751460
    },
    {
      "epoch": 2.6337150006834262,
      "grad_norm": 2.640625,
      "learning_rate": 6.783128370081336e-06,
      "loss": 0.7385,
      "step": 751470
    },
    {
      "epoch": 2.633750048190322,
      "grad_norm": 2.53125,
      "learning_rate": 6.782479341417634e-06,
      "loss": 0.7236,
      "step": 751480
    },
    {
      "epoch": 2.6337850956972177,
      "grad_norm": 3.265625,
      "learning_rate": 6.781830312753933e-06,
      "loss": 0.785,
      "step": 751490
    },
    {
      "epoch": 2.633820143204113,
      "grad_norm": 3.15625,
      "learning_rate": 6.781181284090231e-06,
      "loss": 0.8966,
      "step": 751500
    },
    {
      "epoch": 2.633855190711009,
      "grad_norm": 2.640625,
      "learning_rate": 6.780532255426529e-06,
      "loss": 0.7953,
      "step": 751510
    },
    {
      "epoch": 2.633890238217904,
      "grad_norm": 2.921875,
      "learning_rate": 6.779883226762828e-06,
      "loss": 0.796,
      "step": 751520
    },
    {
      "epoch": 2.6339252857248,
      "grad_norm": 3.03125,
      "learning_rate": 6.779234198099126e-06,
      "loss": 0.8325,
      "step": 751530
    },
    {
      "epoch": 2.6339603332316956,
      "grad_norm": 3.625,
      "learning_rate": 6.778585169435424e-06,
      "loss": 0.8365,
      "step": 751540
    },
    {
      "epoch": 2.6339953807385914,
      "grad_norm": 2.984375,
      "learning_rate": 6.777936140771721e-06,
      "loss": 0.8173,
      "step": 751550
    },
    {
      "epoch": 2.6340304282454867,
      "grad_norm": 3.5,
      "learning_rate": 6.777287112108019e-06,
      "loss": 0.8256,
      "step": 751560
    },
    {
      "epoch": 2.6340654757523825,
      "grad_norm": 3.171875,
      "learning_rate": 6.776638083444317e-06,
      "loss": 0.8198,
      "step": 751570
    },
    {
      "epoch": 2.634100523259278,
      "grad_norm": 3.34375,
      "learning_rate": 6.775989054780616e-06,
      "loss": 0.7548,
      "step": 751580
    },
    {
      "epoch": 2.6341355707661736,
      "grad_norm": 3.15625,
      "learning_rate": 6.775340026116914e-06,
      "loss": 0.822,
      "step": 751590
    },
    {
      "epoch": 2.6341706182730693,
      "grad_norm": 3.125,
      "learning_rate": 6.774690997453212e-06,
      "loss": 0.7736,
      "step": 751600
    },
    {
      "epoch": 2.6342056657799646,
      "grad_norm": 2.890625,
      "learning_rate": 6.77404196878951e-06,
      "loss": 0.8085,
      "step": 751610
    },
    {
      "epoch": 2.6342407132868604,
      "grad_norm": 3.0,
      "learning_rate": 6.7733929401258086e-06,
      "loss": 0.7496,
      "step": 751620
    },
    {
      "epoch": 2.6342757607937557,
      "grad_norm": 3.125,
      "learning_rate": 6.7727439114621066e-06,
      "loss": 0.8266,
      "step": 751630
    },
    {
      "epoch": 2.6343108083006515,
      "grad_norm": 2.828125,
      "learning_rate": 6.7720948827984046e-06,
      "loss": 0.8139,
      "step": 751640
    },
    {
      "epoch": 2.634345855807547,
      "grad_norm": 2.890625,
      "learning_rate": 6.771445854134702e-06,
      "loss": 0.8225,
      "step": 751650
    },
    {
      "epoch": 2.634380903314443,
      "grad_norm": 2.984375,
      "learning_rate": 6.770796825471e-06,
      "loss": 0.8078,
      "step": 751660
    },
    {
      "epoch": 2.6344159508213383,
      "grad_norm": 3.0,
      "learning_rate": 6.7701477968072986e-06,
      "loss": 0.7593,
      "step": 751670
    },
    {
      "epoch": 2.634450998328234,
      "grad_norm": 2.828125,
      "learning_rate": 6.7694987681435966e-06,
      "loss": 0.847,
      "step": 751680
    },
    {
      "epoch": 2.6344860458351294,
      "grad_norm": 4.4375,
      "learning_rate": 6.7688497394798946e-06,
      "loss": 0.8665,
      "step": 751690
    },
    {
      "epoch": 2.634521093342025,
      "grad_norm": 2.859375,
      "learning_rate": 6.7682007108161926e-06,
      "loss": 0.8233,
      "step": 751700
    },
    {
      "epoch": 2.634556140848921,
      "grad_norm": 3.046875,
      "learning_rate": 6.767551682152491e-06,
      "loss": 0.8082,
      "step": 751710
    },
    {
      "epoch": 2.634591188355816,
      "grad_norm": 2.53125,
      "learning_rate": 6.766902653488789e-06,
      "loss": 0.7785,
      "step": 751720
    },
    {
      "epoch": 2.634626235862712,
      "grad_norm": 2.8125,
      "learning_rate": 6.766253624825087e-06,
      "loss": 0.8572,
      "step": 751730
    },
    {
      "epoch": 2.6346612833696073,
      "grad_norm": 2.84375,
      "learning_rate": 6.7656045961613846e-06,
      "loss": 0.7558,
      "step": 751740
    },
    {
      "epoch": 2.634696330876503,
      "grad_norm": 3.109375,
      "learning_rate": 6.7649555674976826e-06,
      "loss": 0.8487,
      "step": 751750
    },
    {
      "epoch": 2.6347313783833988,
      "grad_norm": 2.84375,
      "learning_rate": 6.764306538833981e-06,
      "loss": 0.7117,
      "step": 751760
    },
    {
      "epoch": 2.6347664258902945,
      "grad_norm": 2.734375,
      "learning_rate": 6.763657510170279e-06,
      "loss": 0.765,
      "step": 751770
    },
    {
      "epoch": 2.63480147339719,
      "grad_norm": 3.0625,
      "learning_rate": 6.763008481506577e-06,
      "loss": 0.8047,
      "step": 751780
    },
    {
      "epoch": 2.6348365209040856,
      "grad_norm": 2.75,
      "learning_rate": 6.762359452842875e-06,
      "loss": 0.7052,
      "step": 751790
    },
    {
      "epoch": 2.634871568410981,
      "grad_norm": 2.9375,
      "learning_rate": 6.761710424179174e-06,
      "loss": 0.7915,
      "step": 751800
    },
    {
      "epoch": 2.6349066159178767,
      "grad_norm": 3.046875,
      "learning_rate": 6.761061395515472e-06,
      "loss": 0.8655,
      "step": 751810
    },
    {
      "epoch": 2.6349416634247724,
      "grad_norm": 2.953125,
      "learning_rate": 6.76041236685177e-06,
      "loss": 0.864,
      "step": 751820
    },
    {
      "epoch": 2.6349767109316677,
      "grad_norm": 2.8125,
      "learning_rate": 6.759763338188069e-06,
      "loss": 0.7905,
      "step": 751830
    },
    {
      "epoch": 2.6350117584385635,
      "grad_norm": 2.859375,
      "learning_rate": 6.759114309524365e-06,
      "loss": 0.7676,
      "step": 751840
    },
    {
      "epoch": 2.635046805945459,
      "grad_norm": 2.90625,
      "learning_rate": 6.758465280860663e-06,
      "loss": 0.7978,
      "step": 751850
    },
    {
      "epoch": 2.6350818534523546,
      "grad_norm": 3.203125,
      "learning_rate": 6.757816252196962e-06,
      "loss": 0.8836,
      "step": 751860
    },
    {
      "epoch": 2.6351169009592503,
      "grad_norm": 3.40625,
      "learning_rate": 6.75716722353326e-06,
      "loss": 0.8557,
      "step": 751870
    },
    {
      "epoch": 2.635151948466146,
      "grad_norm": 3.109375,
      "learning_rate": 6.756518194869558e-06,
      "loss": 0.7811,
      "step": 751880
    },
    {
      "epoch": 2.6351869959730414,
      "grad_norm": 3.28125,
      "learning_rate": 6.755869166205857e-06,
      "loss": 0.8293,
      "step": 751890
    },
    {
      "epoch": 2.635222043479937,
      "grad_norm": 2.71875,
      "learning_rate": 6.755220137542155e-06,
      "loss": 0.7301,
      "step": 751900
    },
    {
      "epoch": 2.6352570909868325,
      "grad_norm": 2.8125,
      "learning_rate": 6.754571108878453e-06,
      "loss": 0.8579,
      "step": 751910
    },
    {
      "epoch": 2.6352921384937282,
      "grad_norm": 3.0,
      "learning_rate": 6.753922080214752e-06,
      "loss": 0.7879,
      "step": 751920
    },
    {
      "epoch": 2.635327186000624,
      "grad_norm": 2.953125,
      "learning_rate": 6.753273051551048e-06,
      "loss": 0.8071,
      "step": 751930
    },
    {
      "epoch": 2.6353622335075193,
      "grad_norm": 2.6875,
      "learning_rate": 6.752624022887346e-06,
      "loss": 0.7875,
      "step": 751940
    },
    {
      "epoch": 2.635397281014415,
      "grad_norm": 2.796875,
      "learning_rate": 6.751974994223645e-06,
      "loss": 0.7666,
      "step": 751950
    },
    {
      "epoch": 2.6354323285213104,
      "grad_norm": 3.03125,
      "learning_rate": 6.751325965559943e-06,
      "loss": 0.777,
      "step": 751960
    },
    {
      "epoch": 2.635467376028206,
      "grad_norm": 3.046875,
      "learning_rate": 6.750676936896241e-06,
      "loss": 0.8162,
      "step": 751970
    },
    {
      "epoch": 2.635502423535102,
      "grad_norm": 2.5625,
      "learning_rate": 6.75002790823254e-06,
      "loss": 0.7944,
      "step": 751980
    },
    {
      "epoch": 2.6355374710419976,
      "grad_norm": 2.90625,
      "learning_rate": 6.749378879568838e-06,
      "loss": 0.7642,
      "step": 751990
    },
    {
      "epoch": 2.635572518548893,
      "grad_norm": 3.1875,
      "learning_rate": 6.748729850905136e-06,
      "loss": 0.8725,
      "step": 752000
    },
    {
      "epoch": 2.6356075660557887,
      "grad_norm": 3.15625,
      "learning_rate": 6.748080822241434e-06,
      "loss": 0.8803,
      "step": 752010
    },
    {
      "epoch": 2.635642613562684,
      "grad_norm": 3.515625,
      "learning_rate": 6.747431793577731e-06,
      "loss": 0.8257,
      "step": 752020
    },
    {
      "epoch": 2.63567766106958,
      "grad_norm": 2.6875,
      "learning_rate": 6.746782764914029e-06,
      "loss": 0.8632,
      "step": 752030
    },
    {
      "epoch": 2.6357127085764755,
      "grad_norm": 2.90625,
      "learning_rate": 6.746133736250328e-06,
      "loss": 0.8273,
      "step": 752040
    },
    {
      "epoch": 2.635747756083371,
      "grad_norm": 3.015625,
      "learning_rate": 6.745484707586626e-06,
      "loss": 0.8188,
      "step": 752050
    },
    {
      "epoch": 2.6357828035902666,
      "grad_norm": 2.625,
      "learning_rate": 6.744835678922924e-06,
      "loss": 0.714,
      "step": 752060
    },
    {
      "epoch": 2.635817851097162,
      "grad_norm": 2.796875,
      "learning_rate": 6.744186650259223e-06,
      "loss": 0.838,
      "step": 752070
    },
    {
      "epoch": 2.6358528986040577,
      "grad_norm": 2.65625,
      "learning_rate": 6.743537621595521e-06,
      "loss": 0.6987,
      "step": 752080
    },
    {
      "epoch": 2.6358879461109535,
      "grad_norm": 3.28125,
      "learning_rate": 6.742888592931819e-06,
      "loss": 0.8218,
      "step": 752090
    },
    {
      "epoch": 2.635922993617849,
      "grad_norm": 3.046875,
      "learning_rate": 6.742239564268117e-06,
      "loss": 0.807,
      "step": 752100
    },
    {
      "epoch": 2.6359580411247445,
      "grad_norm": 2.90625,
      "learning_rate": 6.741590535604416e-06,
      "loss": 0.7874,
      "step": 752110
    },
    {
      "epoch": 2.6359930886316403,
      "grad_norm": 3.125,
      "learning_rate": 6.740941506940712e-06,
      "loss": 0.8166,
      "step": 752120
    },
    {
      "epoch": 2.6360281361385356,
      "grad_norm": 3.5625,
      "learning_rate": 6.740292478277011e-06,
      "loss": 0.8005,
      "step": 752130
    },
    {
      "epoch": 2.6360631836454314,
      "grad_norm": 2.96875,
      "learning_rate": 6.739643449613309e-06,
      "loss": 0.7741,
      "step": 752140
    },
    {
      "epoch": 2.636098231152327,
      "grad_norm": 2.78125,
      "learning_rate": 6.738994420949607e-06,
      "loss": 0.8512,
      "step": 752150
    },
    {
      "epoch": 2.6361332786592224,
      "grad_norm": 2.90625,
      "learning_rate": 6.738345392285906e-06,
      "loss": 0.7825,
      "step": 752160
    },
    {
      "epoch": 2.636168326166118,
      "grad_norm": 2.96875,
      "learning_rate": 6.737696363622204e-06,
      "loss": 0.776,
      "step": 752170
    },
    {
      "epoch": 2.6362033736730135,
      "grad_norm": 2.953125,
      "learning_rate": 6.737047334958502e-06,
      "loss": 0.7808,
      "step": 752180
    },
    {
      "epoch": 2.6362384211799093,
      "grad_norm": 3.109375,
      "learning_rate": 6.7363983062948e-06,
      "loss": 0.811,
      "step": 752190
    },
    {
      "epoch": 2.636273468686805,
      "grad_norm": 2.953125,
      "learning_rate": 6.7357492776310985e-06,
      "loss": 0.8269,
      "step": 752200
    },
    {
      "epoch": 2.6363085161937008,
      "grad_norm": 3.28125,
      "learning_rate": 6.735100248967395e-06,
      "loss": 0.8315,
      "step": 752210
    },
    {
      "epoch": 2.636343563700596,
      "grad_norm": 3.234375,
      "learning_rate": 6.734451220303694e-06,
      "loss": 0.776,
      "step": 752220
    },
    {
      "epoch": 2.636378611207492,
      "grad_norm": 3.734375,
      "learning_rate": 6.733802191639992e-06,
      "loss": 0.9231,
      "step": 752230
    },
    {
      "epoch": 2.636413658714387,
      "grad_norm": 3.390625,
      "learning_rate": 6.73315316297629e-06,
      "loss": 0.8579,
      "step": 752240
    },
    {
      "epoch": 2.636448706221283,
      "grad_norm": 2.828125,
      "learning_rate": 6.732504134312588e-06,
      "loss": 0.8316,
      "step": 752250
    },
    {
      "epoch": 2.6364837537281787,
      "grad_norm": 3.234375,
      "learning_rate": 6.7318551056488865e-06,
      "loss": 0.7967,
      "step": 752260
    },
    {
      "epoch": 2.636518801235074,
      "grad_norm": 3.078125,
      "learning_rate": 6.7312060769851845e-06,
      "loss": 0.8083,
      "step": 752270
    },
    {
      "epoch": 2.6365538487419697,
      "grad_norm": 2.859375,
      "learning_rate": 6.7305570483214825e-06,
      "loss": 0.7948,
      "step": 752280
    },
    {
      "epoch": 2.6365888962488655,
      "grad_norm": 3.15625,
      "learning_rate": 6.729908019657781e-06,
      "loss": 0.7992,
      "step": 752290
    },
    {
      "epoch": 2.636623943755761,
      "grad_norm": 2.84375,
      "learning_rate": 6.729258990994079e-06,
      "loss": 0.8342,
      "step": 752300
    },
    {
      "epoch": 2.6366589912626566,
      "grad_norm": 2.25,
      "learning_rate": 6.7286099623303765e-06,
      "loss": 0.7495,
      "step": 752310
    },
    {
      "epoch": 2.6366940387695523,
      "grad_norm": 2.75,
      "learning_rate": 6.7279609336666745e-06,
      "loss": 0.802,
      "step": 752320
    },
    {
      "epoch": 2.6367290862764476,
      "grad_norm": 2.8125,
      "learning_rate": 6.7273119050029725e-06,
      "loss": 0.8589,
      "step": 752330
    },
    {
      "epoch": 2.6367641337833434,
      "grad_norm": 2.796875,
      "learning_rate": 6.7266628763392705e-06,
      "loss": 0.7471,
      "step": 752340
    },
    {
      "epoch": 2.6367991812902387,
      "grad_norm": 3.09375,
      "learning_rate": 6.726013847675569e-06,
      "loss": 0.7914,
      "step": 752350
    },
    {
      "epoch": 2.6368342287971345,
      "grad_norm": 2.75,
      "learning_rate": 6.725364819011867e-06,
      "loss": 0.8155,
      "step": 752360
    },
    {
      "epoch": 2.6368692763040302,
      "grad_norm": 3.140625,
      "learning_rate": 6.724715790348165e-06,
      "loss": 0.7839,
      "step": 752370
    },
    {
      "epoch": 2.6369043238109255,
      "grad_norm": 2.640625,
      "learning_rate": 6.724066761684464e-06,
      "loss": 0.6843,
      "step": 752380
    },
    {
      "epoch": 2.6369393713178213,
      "grad_norm": 3.046875,
      "learning_rate": 6.723417733020762e-06,
      "loss": 0.7295,
      "step": 752390
    },
    {
      "epoch": 2.636974418824717,
      "grad_norm": 3.0,
      "learning_rate": 6.7227687043570585e-06,
      "loss": 0.8025,
      "step": 752400
    },
    {
      "epoch": 2.6370094663316124,
      "grad_norm": 3.125,
      "learning_rate": 6.722119675693357e-06,
      "loss": 0.7539,
      "step": 752410
    },
    {
      "epoch": 2.637044513838508,
      "grad_norm": 2.828125,
      "learning_rate": 6.721470647029655e-06,
      "loss": 0.7475,
      "step": 752420
    },
    {
      "epoch": 2.637079561345404,
      "grad_norm": 2.59375,
      "learning_rate": 6.720821618365953e-06,
      "loss": 0.8366,
      "step": 752430
    },
    {
      "epoch": 2.637114608852299,
      "grad_norm": 3.09375,
      "learning_rate": 6.720172589702252e-06,
      "loss": 0.906,
      "step": 752440
    },
    {
      "epoch": 2.637149656359195,
      "grad_norm": 3.03125,
      "learning_rate": 6.71952356103855e-06,
      "loss": 0.8509,
      "step": 752450
    },
    {
      "epoch": 2.6371847038660903,
      "grad_norm": 2.78125,
      "learning_rate": 6.718874532374848e-06,
      "loss": 0.8318,
      "step": 752460
    },
    {
      "epoch": 2.637219751372986,
      "grad_norm": 2.484375,
      "learning_rate": 6.718225503711147e-06,
      "loss": 0.7794,
      "step": 752470
    },
    {
      "epoch": 2.637254798879882,
      "grad_norm": 2.953125,
      "learning_rate": 6.717576475047445e-06,
      "loss": 0.7316,
      "step": 752480
    },
    {
      "epoch": 2.6372898463867775,
      "grad_norm": 3.21875,
      "learning_rate": 6.716927446383743e-06,
      "loss": 0.7126,
      "step": 752490
    },
    {
      "epoch": 2.637324893893673,
      "grad_norm": 2.765625,
      "learning_rate": 6.71627841772004e-06,
      "loss": 0.7409,
      "step": 752500
    },
    {
      "epoch": 2.6373599414005686,
      "grad_norm": 2.828125,
      "learning_rate": 6.715629389056338e-06,
      "loss": 0.7767,
      "step": 752510
    },
    {
      "epoch": 2.637394988907464,
      "grad_norm": 2.5625,
      "learning_rate": 6.714980360392636e-06,
      "loss": 0.8343,
      "step": 752520
    },
    {
      "epoch": 2.6374300364143597,
      "grad_norm": 2.78125,
      "learning_rate": 6.714331331728935e-06,
      "loss": 0.8211,
      "step": 752530
    },
    {
      "epoch": 2.6374650839212554,
      "grad_norm": 3.53125,
      "learning_rate": 6.713682303065233e-06,
      "loss": 0.8063,
      "step": 752540
    },
    {
      "epoch": 2.6375001314281508,
      "grad_norm": 2.890625,
      "learning_rate": 6.713033274401531e-06,
      "loss": 0.7889,
      "step": 752550
    },
    {
      "epoch": 2.6375351789350465,
      "grad_norm": 2.65625,
      "learning_rate": 6.712384245737829e-06,
      "loss": 0.7905,
      "step": 752560
    },
    {
      "epoch": 2.637570226441942,
      "grad_norm": 2.8125,
      "learning_rate": 6.711735217074128e-06,
      "loss": 0.7516,
      "step": 752570
    },
    {
      "epoch": 2.6376052739488376,
      "grad_norm": 2.6875,
      "learning_rate": 6.711086188410426e-06,
      "loss": 0.7688,
      "step": 752580
    },
    {
      "epoch": 2.6376403214557333,
      "grad_norm": 2.796875,
      "learning_rate": 6.710437159746723e-06,
      "loss": 0.836,
      "step": 752590
    },
    {
      "epoch": 2.637675368962629,
      "grad_norm": 3.21875,
      "learning_rate": 6.709788131083021e-06,
      "loss": 0.7607,
      "step": 752600
    },
    {
      "epoch": 2.6377104164695244,
      "grad_norm": 2.609375,
      "learning_rate": 6.709139102419319e-06,
      "loss": 0.8448,
      "step": 752610
    },
    {
      "epoch": 2.63774546397642,
      "grad_norm": 2.859375,
      "learning_rate": 6.708490073755618e-06,
      "loss": 0.7876,
      "step": 752620
    },
    {
      "epoch": 2.6377805114833155,
      "grad_norm": 2.84375,
      "learning_rate": 6.707841045091916e-06,
      "loss": 0.8136,
      "step": 752630
    },
    {
      "epoch": 2.6378155589902113,
      "grad_norm": 2.5625,
      "learning_rate": 6.707192016428214e-06,
      "loss": 0.789,
      "step": 752640
    },
    {
      "epoch": 2.637850606497107,
      "grad_norm": 2.734375,
      "learning_rate": 6.706542987764512e-06,
      "loss": 0.7895,
      "step": 752650
    },
    {
      "epoch": 2.6378856540040023,
      "grad_norm": 3.40625,
      "learning_rate": 6.705893959100811e-06,
      "loss": 0.7943,
      "step": 752660
    },
    {
      "epoch": 2.637920701510898,
      "grad_norm": 2.5625,
      "learning_rate": 6.705244930437109e-06,
      "loss": 0.7781,
      "step": 752670
    },
    {
      "epoch": 2.6379557490177934,
      "grad_norm": 2.65625,
      "learning_rate": 6.704595901773406e-06,
      "loss": 0.8227,
      "step": 752680
    },
    {
      "epoch": 2.637990796524689,
      "grad_norm": 3.15625,
      "learning_rate": 6.703946873109704e-06,
      "loss": 0.8446,
      "step": 752690
    },
    {
      "epoch": 2.638025844031585,
      "grad_norm": 2.53125,
      "learning_rate": 6.703297844446002e-06,
      "loss": 0.8609,
      "step": 752700
    },
    {
      "epoch": 2.6380608915384807,
      "grad_norm": 2.796875,
      "learning_rate": 6.702648815782301e-06,
      "loss": 0.8112,
      "step": 752710
    },
    {
      "epoch": 2.638095939045376,
      "grad_norm": 3.390625,
      "learning_rate": 6.701999787118599e-06,
      "loss": 0.834,
      "step": 752720
    },
    {
      "epoch": 2.6381309865522717,
      "grad_norm": 2.953125,
      "learning_rate": 6.701350758454897e-06,
      "loss": 0.8712,
      "step": 752730
    },
    {
      "epoch": 2.638166034059167,
      "grad_norm": 2.609375,
      "learning_rate": 6.700701729791195e-06,
      "loss": 0.8174,
      "step": 752740
    },
    {
      "epoch": 2.638201081566063,
      "grad_norm": 2.859375,
      "learning_rate": 6.7000527011274936e-06,
      "loss": 0.7598,
      "step": 752750
    },
    {
      "epoch": 2.6382361290729586,
      "grad_norm": 3.34375,
      "learning_rate": 6.6994036724637916e-06,
      "loss": 0.7771,
      "step": 752760
    },
    {
      "epoch": 2.638271176579854,
      "grad_norm": 2.578125,
      "learning_rate": 6.6987546438000896e-06,
      "loss": 0.8085,
      "step": 752770
    },
    {
      "epoch": 2.6383062240867496,
      "grad_norm": 2.609375,
      "learning_rate": 6.698105615136387e-06,
      "loss": 0.7401,
      "step": 752780
    },
    {
      "epoch": 2.638341271593645,
      "grad_norm": 3.171875,
      "learning_rate": 6.697456586472685e-06,
      "loss": 0.8553,
      "step": 752790
    },
    {
      "epoch": 2.6383763191005407,
      "grad_norm": 3.09375,
      "learning_rate": 6.696807557808983e-06,
      "loss": 0.8428,
      "step": 752800
    },
    {
      "epoch": 2.6384113666074365,
      "grad_norm": 3.078125,
      "learning_rate": 6.6961585291452816e-06,
      "loss": 0.8335,
      "step": 752810
    },
    {
      "epoch": 2.6384464141143322,
      "grad_norm": 2.953125,
      "learning_rate": 6.6955095004815796e-06,
      "loss": 0.8061,
      "step": 752820
    },
    {
      "epoch": 2.6384814616212275,
      "grad_norm": 2.765625,
      "learning_rate": 6.6948604718178776e-06,
      "loss": 0.8321,
      "step": 752830
    },
    {
      "epoch": 2.6385165091281233,
      "grad_norm": 2.984375,
      "learning_rate": 6.694211443154176e-06,
      "loss": 0.8149,
      "step": 752840
    },
    {
      "epoch": 2.6385515566350186,
      "grad_norm": 2.5625,
      "learning_rate": 6.693562414490474e-06,
      "loss": 0.7899,
      "step": 752850
    },
    {
      "epoch": 2.6385866041419144,
      "grad_norm": 3.109375,
      "learning_rate": 6.692913385826772e-06,
      "loss": 0.8923,
      "step": 752860
    },
    {
      "epoch": 2.63862165164881,
      "grad_norm": 3.125,
      "learning_rate": 6.6922643571630696e-06,
      "loss": 0.8067,
      "step": 752870
    },
    {
      "epoch": 2.6386566991557054,
      "grad_norm": 3.25,
      "learning_rate": 6.6916153284993676e-06,
      "loss": 0.8164,
      "step": 752880
    },
    {
      "epoch": 2.638691746662601,
      "grad_norm": 2.96875,
      "learning_rate": 6.6909662998356656e-06,
      "loss": 0.7889,
      "step": 752890
    },
    {
      "epoch": 2.6387267941694965,
      "grad_norm": 3.09375,
      "learning_rate": 6.690317271171964e-06,
      "loss": 0.8185,
      "step": 752900
    },
    {
      "epoch": 2.6387618416763923,
      "grad_norm": 2.703125,
      "learning_rate": 6.689668242508262e-06,
      "loss": 0.7729,
      "step": 752910
    },
    {
      "epoch": 2.638796889183288,
      "grad_norm": 3.0,
      "learning_rate": 6.68901921384456e-06,
      "loss": 0.8183,
      "step": 752920
    },
    {
      "epoch": 2.638831936690184,
      "grad_norm": 2.453125,
      "learning_rate": 6.688370185180859e-06,
      "loss": 0.7493,
      "step": 752930
    },
    {
      "epoch": 2.638866984197079,
      "grad_norm": 2.625,
      "learning_rate": 6.687721156517157e-06,
      "loss": 0.7955,
      "step": 752940
    },
    {
      "epoch": 2.638902031703975,
      "grad_norm": 2.6875,
      "learning_rate": 6.687072127853455e-06,
      "loss": 0.8192,
      "step": 752950
    },
    {
      "epoch": 2.63893707921087,
      "grad_norm": 2.78125,
      "learning_rate": 6.686423099189753e-06,
      "loss": 0.7976,
      "step": 752960
    },
    {
      "epoch": 2.638972126717766,
      "grad_norm": 2.78125,
      "learning_rate": 6.68577407052605e-06,
      "loss": 0.7195,
      "step": 752970
    },
    {
      "epoch": 2.6390071742246617,
      "grad_norm": 3.453125,
      "learning_rate": 6.685125041862348e-06,
      "loss": 0.8815,
      "step": 752980
    },
    {
      "epoch": 2.639042221731557,
      "grad_norm": 2.703125,
      "learning_rate": 6.684476013198647e-06,
      "loss": 0.8097,
      "step": 752990
    },
    {
      "epoch": 2.6390772692384528,
      "grad_norm": 2.765625,
      "learning_rate": 6.683826984534945e-06,
      "loss": 0.7915,
      "step": 753000
    },
    {
      "epoch": 2.639112316745348,
      "grad_norm": 2.578125,
      "learning_rate": 6.683177955871243e-06,
      "loss": 0.6918,
      "step": 753010
    },
    {
      "epoch": 2.639147364252244,
      "grad_norm": 2.703125,
      "learning_rate": 6.682528927207542e-06,
      "loss": 0.7599,
      "step": 753020
    },
    {
      "epoch": 2.6391824117591396,
      "grad_norm": 3.171875,
      "learning_rate": 6.68187989854384e-06,
      "loss": 0.8137,
      "step": 753030
    },
    {
      "epoch": 2.6392174592660353,
      "grad_norm": 3.09375,
      "learning_rate": 6.681230869880138e-06,
      "loss": 0.8439,
      "step": 753040
    },
    {
      "epoch": 2.6392525067729307,
      "grad_norm": 2.984375,
      "learning_rate": 6.680581841216436e-06,
      "loss": 0.8213,
      "step": 753050
    },
    {
      "epoch": 2.6392875542798264,
      "grad_norm": 3.0,
      "learning_rate": 6.679932812552733e-06,
      "loss": 0.8415,
      "step": 753060
    },
    {
      "epoch": 2.6393226017867217,
      "grad_norm": 3.015625,
      "learning_rate": 6.679283783889031e-06,
      "loss": 0.7449,
      "step": 753070
    },
    {
      "epoch": 2.6393576492936175,
      "grad_norm": 3.078125,
      "learning_rate": 6.67863475522533e-06,
      "loss": 0.8097,
      "step": 753080
    },
    {
      "epoch": 2.6393926968005132,
      "grad_norm": 3.125,
      "learning_rate": 6.677985726561628e-06,
      "loss": 0.7317,
      "step": 753090
    },
    {
      "epoch": 2.6394277443074086,
      "grad_norm": 2.640625,
      "learning_rate": 6.677336697897926e-06,
      "loss": 0.8542,
      "step": 753100
    },
    {
      "epoch": 2.6394627918143043,
      "grad_norm": 3.046875,
      "learning_rate": 6.676687669234224e-06,
      "loss": 0.7961,
      "step": 753110
    },
    {
      "epoch": 2.6394978393211996,
      "grad_norm": 2.890625,
      "learning_rate": 6.676038640570523e-06,
      "loss": 0.6927,
      "step": 753120
    },
    {
      "epoch": 2.6395328868280954,
      "grad_norm": 3.0,
      "learning_rate": 6.675389611906821e-06,
      "loss": 0.8444,
      "step": 753130
    },
    {
      "epoch": 2.639567934334991,
      "grad_norm": 3.484375,
      "learning_rate": 6.674740583243119e-06,
      "loss": 0.8019,
      "step": 753140
    },
    {
      "epoch": 2.639602981841887,
      "grad_norm": 3.109375,
      "learning_rate": 6.674091554579416e-06,
      "loss": 0.8509,
      "step": 753150
    },
    {
      "epoch": 2.6396380293487822,
      "grad_norm": 2.8125,
      "learning_rate": 6.673442525915714e-06,
      "loss": 0.8659,
      "step": 753160
    },
    {
      "epoch": 2.639673076855678,
      "grad_norm": 3.4375,
      "learning_rate": 6.672793497252013e-06,
      "loss": 0.8304,
      "step": 753170
    },
    {
      "epoch": 2.6397081243625733,
      "grad_norm": 2.796875,
      "learning_rate": 6.672144468588311e-06,
      "loss": 0.757,
      "step": 753180
    },
    {
      "epoch": 2.639743171869469,
      "grad_norm": 2.6875,
      "learning_rate": 6.671495439924609e-06,
      "loss": 0.7784,
      "step": 753190
    },
    {
      "epoch": 2.639778219376365,
      "grad_norm": 2.390625,
      "learning_rate": 6.670846411260907e-06,
      "loss": 0.8467,
      "step": 753200
    },
    {
      "epoch": 2.63981326688326,
      "grad_norm": 3.0625,
      "learning_rate": 6.670197382597206e-06,
      "loss": 0.8167,
      "step": 753210
    },
    {
      "epoch": 2.639848314390156,
      "grad_norm": 2.875,
      "learning_rate": 6.669548353933504e-06,
      "loss": 0.7536,
      "step": 753220
    },
    {
      "epoch": 2.639883361897051,
      "grad_norm": 3.03125,
      "learning_rate": 6.668899325269802e-06,
      "loss": 0.856,
      "step": 753230
    },
    {
      "epoch": 2.639918409403947,
      "grad_norm": 2.78125,
      "learning_rate": 6.668250296606101e-06,
      "loss": 0.7282,
      "step": 753240
    },
    {
      "epoch": 2.6399534569108427,
      "grad_norm": 3.15625,
      "learning_rate": 6.667601267942397e-06,
      "loss": 0.7925,
      "step": 753250
    },
    {
      "epoch": 2.6399885044177385,
      "grad_norm": 3.078125,
      "learning_rate": 6.666952239278696e-06,
      "loss": 0.8031,
      "step": 753260
    },
    {
      "epoch": 2.640023551924634,
      "grad_norm": 3.03125,
      "learning_rate": 6.666303210614994e-06,
      "loss": 0.8085,
      "step": 753270
    },
    {
      "epoch": 2.6400585994315295,
      "grad_norm": 3.046875,
      "learning_rate": 6.665654181951292e-06,
      "loss": 0.8095,
      "step": 753280
    },
    {
      "epoch": 2.640093646938425,
      "grad_norm": 3.171875,
      "learning_rate": 6.66500515328759e-06,
      "loss": 0.8297,
      "step": 753290
    },
    {
      "epoch": 2.6401286944453206,
      "grad_norm": 3.09375,
      "learning_rate": 6.664356124623889e-06,
      "loss": 0.8308,
      "step": 753300
    },
    {
      "epoch": 2.6401637419522164,
      "grad_norm": 3.015625,
      "learning_rate": 6.663707095960187e-06,
      "loss": 0.8069,
      "step": 753310
    },
    {
      "epoch": 2.6401987894591117,
      "grad_norm": 3.25,
      "learning_rate": 6.663058067296485e-06,
      "loss": 0.7897,
      "step": 753320
    },
    {
      "epoch": 2.6402338369660074,
      "grad_norm": 2.984375,
      "learning_rate": 6.6624090386327835e-06,
      "loss": 0.7251,
      "step": 753330
    },
    {
      "epoch": 2.6402688844729028,
      "grad_norm": 2.640625,
      "learning_rate": 6.66176000996908e-06,
      "loss": 0.7098,
      "step": 753340
    },
    {
      "epoch": 2.6403039319797985,
      "grad_norm": 2.4375,
      "learning_rate": 6.661110981305378e-06,
      "loss": 0.7174,
      "step": 753350
    },
    {
      "epoch": 2.6403389794866943,
      "grad_norm": 2.859375,
      "learning_rate": 6.660461952641677e-06,
      "loss": 0.786,
      "step": 753360
    },
    {
      "epoch": 2.64037402699359,
      "grad_norm": 2.5625,
      "learning_rate": 6.659812923977975e-06,
      "loss": 0.707,
      "step": 753370
    },
    {
      "epoch": 2.6404090745004853,
      "grad_norm": 2.828125,
      "learning_rate": 6.659163895314273e-06,
      "loss": 0.8183,
      "step": 753380
    },
    {
      "epoch": 2.640444122007381,
      "grad_norm": 3.015625,
      "learning_rate": 6.6585148666505715e-06,
      "loss": 0.7555,
      "step": 753390
    },
    {
      "epoch": 2.6404791695142764,
      "grad_norm": 3.296875,
      "learning_rate": 6.6578658379868695e-06,
      "loss": 0.7621,
      "step": 753400
    },
    {
      "epoch": 2.640514217021172,
      "grad_norm": 2.828125,
      "learning_rate": 6.6572168093231675e-06,
      "loss": 0.809,
      "step": 753410
    },
    {
      "epoch": 2.640549264528068,
      "grad_norm": 2.921875,
      "learning_rate": 6.6565677806594655e-06,
      "loss": 0.7591,
      "step": 753420
    },
    {
      "epoch": 2.6405843120349632,
      "grad_norm": 2.671875,
      "learning_rate": 6.655918751995764e-06,
      "loss": 0.7879,
      "step": 753430
    },
    {
      "epoch": 2.640619359541859,
      "grad_norm": 2.78125,
      "learning_rate": 6.655269723332061e-06,
      "loss": 0.781,
      "step": 753440
    },
    {
      "epoch": 2.6406544070487543,
      "grad_norm": 2.921875,
      "learning_rate": 6.6546206946683595e-06,
      "loss": 0.8252,
      "step": 753450
    },
    {
      "epoch": 2.64068945455565,
      "grad_norm": 2.890625,
      "learning_rate": 6.6539716660046575e-06,
      "loss": 0.8193,
      "step": 753460
    },
    {
      "epoch": 2.640724502062546,
      "grad_norm": 2.828125,
      "learning_rate": 6.6533226373409555e-06,
      "loss": 0.8241,
      "step": 753470
    },
    {
      "epoch": 2.6407595495694416,
      "grad_norm": 2.421875,
      "learning_rate": 6.652673608677254e-06,
      "loss": 0.7288,
      "step": 753480
    },
    {
      "epoch": 2.640794597076337,
      "grad_norm": 3.09375,
      "learning_rate": 6.652024580013552e-06,
      "loss": 0.7388,
      "step": 753490
    },
    {
      "epoch": 2.6408296445832327,
      "grad_norm": 2.8125,
      "learning_rate": 6.65137555134985e-06,
      "loss": 0.7129,
      "step": 753500
    },
    {
      "epoch": 2.640864692090128,
      "grad_norm": 2.765625,
      "learning_rate": 6.650726522686148e-06,
      "loss": 0.8038,
      "step": 753510
    },
    {
      "epoch": 2.6408997395970237,
      "grad_norm": 3.421875,
      "learning_rate": 6.650077494022447e-06,
      "loss": 0.84,
      "step": 753520
    },
    {
      "epoch": 2.6409347871039195,
      "grad_norm": 2.59375,
      "learning_rate": 6.6494284653587435e-06,
      "loss": 0.7612,
      "step": 753530
    },
    {
      "epoch": 2.640969834610815,
      "grad_norm": 2.859375,
      "learning_rate": 6.648779436695042e-06,
      "loss": 0.812,
      "step": 753540
    },
    {
      "epoch": 2.6410048821177106,
      "grad_norm": 2.71875,
      "learning_rate": 6.64813040803134e-06,
      "loss": 0.8274,
      "step": 753550
    },
    {
      "epoch": 2.6410399296246063,
      "grad_norm": 2.921875,
      "learning_rate": 6.647481379367638e-06,
      "loss": 0.7792,
      "step": 753560
    },
    {
      "epoch": 2.6410749771315016,
      "grad_norm": 2.78125,
      "learning_rate": 6.646832350703937e-06,
      "loss": 0.8165,
      "step": 753570
    },
    {
      "epoch": 2.6411100246383974,
      "grad_norm": 3.09375,
      "learning_rate": 6.646183322040235e-06,
      "loss": 0.8047,
      "step": 753580
    },
    {
      "epoch": 2.641145072145293,
      "grad_norm": 2.453125,
      "learning_rate": 6.645534293376533e-06,
      "loss": 0.7305,
      "step": 753590
    },
    {
      "epoch": 2.6411801196521885,
      "grad_norm": 3.171875,
      "learning_rate": 6.644885264712831e-06,
      "loss": 0.8249,
      "step": 753600
    },
    {
      "epoch": 2.641215167159084,
      "grad_norm": 2.84375,
      "learning_rate": 6.64423623604913e-06,
      "loss": 0.773,
      "step": 753610
    },
    {
      "epoch": 2.6412502146659795,
      "grad_norm": 2.921875,
      "learning_rate": 6.643587207385426e-06,
      "loss": 0.81,
      "step": 753620
    },
    {
      "epoch": 2.6412852621728753,
      "grad_norm": 3.03125,
      "learning_rate": 6.642938178721725e-06,
      "loss": 0.8473,
      "step": 753630
    },
    {
      "epoch": 2.641320309679771,
      "grad_norm": 2.90625,
      "learning_rate": 6.642289150058023e-06,
      "loss": 0.8138,
      "step": 753640
    },
    {
      "epoch": 2.6413553571866664,
      "grad_norm": 2.96875,
      "learning_rate": 6.641640121394321e-06,
      "loss": 0.8695,
      "step": 753650
    },
    {
      "epoch": 2.641390404693562,
      "grad_norm": 2.71875,
      "learning_rate": 6.640991092730619e-06,
      "loss": 0.8416,
      "step": 753660
    },
    {
      "epoch": 2.641425452200458,
      "grad_norm": 3.3125,
      "learning_rate": 6.640342064066918e-06,
      "loss": 0.7684,
      "step": 753670
    },
    {
      "epoch": 2.641460499707353,
      "grad_norm": 2.609375,
      "learning_rate": 6.639693035403216e-06,
      "loss": 0.7862,
      "step": 753680
    },
    {
      "epoch": 2.641495547214249,
      "grad_norm": 2.828125,
      "learning_rate": 6.639044006739514e-06,
      "loss": 0.7535,
      "step": 753690
    },
    {
      "epoch": 2.6415305947211447,
      "grad_norm": 3.0625,
      "learning_rate": 6.638394978075813e-06,
      "loss": 0.79,
      "step": 753700
    },
    {
      "epoch": 2.64156564222804,
      "grad_norm": 3.265625,
      "learning_rate": 6.637745949412111e-06,
      "loss": 0.9029,
      "step": 753710
    },
    {
      "epoch": 2.641600689734936,
      "grad_norm": 2.703125,
      "learning_rate": 6.637096920748408e-06,
      "loss": 0.747,
      "step": 753720
    },
    {
      "epoch": 2.641635737241831,
      "grad_norm": 2.8125,
      "learning_rate": 6.636447892084706e-06,
      "loss": 0.7285,
      "step": 753730
    },
    {
      "epoch": 2.641670784748727,
      "grad_norm": 3.265625,
      "learning_rate": 6.635798863421004e-06,
      "loss": 0.7958,
      "step": 753740
    },
    {
      "epoch": 2.6417058322556226,
      "grad_norm": 3.375,
      "learning_rate": 6.635149834757302e-06,
      "loss": 0.8133,
      "step": 753750
    },
    {
      "epoch": 2.6417408797625184,
      "grad_norm": 2.859375,
      "learning_rate": 6.634500806093601e-06,
      "loss": 0.8114,
      "step": 753760
    },
    {
      "epoch": 2.6417759272694137,
      "grad_norm": 3.375,
      "learning_rate": 6.633851777429899e-06,
      "loss": 0.7684,
      "step": 753770
    },
    {
      "epoch": 2.6418109747763094,
      "grad_norm": 3.859375,
      "learning_rate": 6.633202748766197e-06,
      "loss": 0.7749,
      "step": 753780
    },
    {
      "epoch": 2.6418460222832048,
      "grad_norm": 3.6875,
      "learning_rate": 6.632553720102496e-06,
      "loss": 0.8515,
      "step": 753790
    },
    {
      "epoch": 2.6418810697901005,
      "grad_norm": 3.28125,
      "learning_rate": 6.631904691438794e-06,
      "loss": 0.8067,
      "step": 753800
    },
    {
      "epoch": 2.6419161172969963,
      "grad_norm": 3.09375,
      "learning_rate": 6.631255662775091e-06,
      "loss": 0.7912,
      "step": 753810
    },
    {
      "epoch": 2.6419511648038916,
      "grad_norm": 3.078125,
      "learning_rate": 6.630606634111389e-06,
      "loss": 0.8181,
      "step": 753820
    },
    {
      "epoch": 2.6419862123107873,
      "grad_norm": 3.765625,
      "learning_rate": 6.629957605447687e-06,
      "loss": 0.6938,
      "step": 753830
    },
    {
      "epoch": 2.6420212598176827,
      "grad_norm": 3.078125,
      "learning_rate": 6.629308576783985e-06,
      "loss": 0.9065,
      "step": 753840
    },
    {
      "epoch": 2.6420563073245784,
      "grad_norm": 2.859375,
      "learning_rate": 6.628659548120284e-06,
      "loss": 0.8379,
      "step": 753850
    },
    {
      "epoch": 2.642091354831474,
      "grad_norm": 3.0,
      "learning_rate": 6.628010519456582e-06,
      "loss": 0.7807,
      "step": 753860
    },
    {
      "epoch": 2.64212640233837,
      "grad_norm": 3.375,
      "learning_rate": 6.62736149079288e-06,
      "loss": 0.8432,
      "step": 753870
    },
    {
      "epoch": 2.6421614498452652,
      "grad_norm": 2.8125,
      "learning_rate": 6.6267124621291786e-06,
      "loss": 0.717,
      "step": 753880
    },
    {
      "epoch": 2.642196497352161,
      "grad_norm": 2.96875,
      "learning_rate": 6.6260634334654766e-06,
      "loss": 0.7494,
      "step": 753890
    },
    {
      "epoch": 2.6422315448590563,
      "grad_norm": 2.640625,
      "learning_rate": 6.6254144048017746e-06,
      "loss": 0.7406,
      "step": 753900
    },
    {
      "epoch": 2.642266592365952,
      "grad_norm": 3.03125,
      "learning_rate": 6.624765376138072e-06,
      "loss": 0.7979,
      "step": 753910
    },
    {
      "epoch": 2.642301639872848,
      "grad_norm": 2.953125,
      "learning_rate": 6.62411634747437e-06,
      "loss": 0.8082,
      "step": 753920
    },
    {
      "epoch": 2.642336687379743,
      "grad_norm": 2.921875,
      "learning_rate": 6.623467318810668e-06,
      "loss": 0.7559,
      "step": 753930
    },
    {
      "epoch": 2.642371734886639,
      "grad_norm": 2.921875,
      "learning_rate": 6.6228182901469666e-06,
      "loss": 0.821,
      "step": 753940
    },
    {
      "epoch": 2.642406782393534,
      "grad_norm": 2.9375,
      "learning_rate": 6.6221692614832646e-06,
      "loss": 0.8695,
      "step": 753950
    },
    {
      "epoch": 2.64244182990043,
      "grad_norm": 3.140625,
      "learning_rate": 6.6215202328195626e-06,
      "loss": 0.8357,
      "step": 753960
    },
    {
      "epoch": 2.6424768774073257,
      "grad_norm": 3.078125,
      "learning_rate": 6.6208712041558606e-06,
      "loss": 0.7841,
      "step": 753970
    },
    {
      "epoch": 2.6425119249142215,
      "grad_norm": 3.34375,
      "learning_rate": 6.620222175492159e-06,
      "loss": 0.9282,
      "step": 753980
    },
    {
      "epoch": 2.642546972421117,
      "grad_norm": 3.015625,
      "learning_rate": 6.619573146828457e-06,
      "loss": 0.7716,
      "step": 753990
    },
    {
      "epoch": 2.6425820199280126,
      "grad_norm": 3.15625,
      "learning_rate": 6.6189241181647546e-06,
      "loss": 0.82,
      "step": 754000
    },
    {
      "epoch": 2.642617067434908,
      "grad_norm": 3.328125,
      "learning_rate": 6.6182750895010526e-06,
      "loss": 0.8602,
      "step": 754010
    },
    {
      "epoch": 2.6426521149418036,
      "grad_norm": 2.5625,
      "learning_rate": 6.6176260608373506e-06,
      "loss": 0.8123,
      "step": 754020
    },
    {
      "epoch": 2.6426871624486994,
      "grad_norm": 2.828125,
      "learning_rate": 6.616977032173649e-06,
      "loss": 0.814,
      "step": 754030
    },
    {
      "epoch": 2.6427222099555947,
      "grad_norm": 2.546875,
      "learning_rate": 6.616328003509947e-06,
      "loss": 0.7677,
      "step": 754040
    },
    {
      "epoch": 2.6427572574624905,
      "grad_norm": 3.46875,
      "learning_rate": 6.615678974846245e-06,
      "loss": 0.7852,
      "step": 754050
    },
    {
      "epoch": 2.6427923049693858,
      "grad_norm": 2.984375,
      "learning_rate": 6.615029946182543e-06,
      "loss": 0.8705,
      "step": 754060
    },
    {
      "epoch": 2.6428273524762815,
      "grad_norm": 3.046875,
      "learning_rate": 6.614380917518842e-06,
      "loss": 0.7816,
      "step": 754070
    },
    {
      "epoch": 2.6428623999831773,
      "grad_norm": 3.21875,
      "learning_rate": 6.61373188885514e-06,
      "loss": 0.8223,
      "step": 754080
    },
    {
      "epoch": 2.642897447490073,
      "grad_norm": 3.140625,
      "learning_rate": 6.613082860191437e-06,
      "loss": 0.8082,
      "step": 754090
    },
    {
      "epoch": 2.6429324949969684,
      "grad_norm": 2.265625,
      "learning_rate": 6.612433831527735e-06,
      "loss": 0.7808,
      "step": 754100
    },
    {
      "epoch": 2.642967542503864,
      "grad_norm": 2.65625,
      "learning_rate": 6.611784802864033e-06,
      "loss": 0.799,
      "step": 754110
    },
    {
      "epoch": 2.6430025900107594,
      "grad_norm": 2.90625,
      "learning_rate": 6.611135774200332e-06,
      "loss": 0.8757,
      "step": 754120
    },
    {
      "epoch": 2.643037637517655,
      "grad_norm": 2.453125,
      "learning_rate": 6.61048674553663e-06,
      "loss": 0.7595,
      "step": 754130
    },
    {
      "epoch": 2.643072685024551,
      "grad_norm": 3.03125,
      "learning_rate": 6.609837716872928e-06,
      "loss": 0.7942,
      "step": 754140
    },
    {
      "epoch": 2.6431077325314463,
      "grad_norm": 2.65625,
      "learning_rate": 6.609188688209226e-06,
      "loss": 0.785,
      "step": 754150
    },
    {
      "epoch": 2.643142780038342,
      "grad_norm": 2.75,
      "learning_rate": 6.608539659545525e-06,
      "loss": 0.737,
      "step": 754160
    },
    {
      "epoch": 2.6431778275452373,
      "grad_norm": 2.6875,
      "learning_rate": 6.607890630881823e-06,
      "loss": 0.7365,
      "step": 754170
    },
    {
      "epoch": 2.643212875052133,
      "grad_norm": 3.109375,
      "learning_rate": 6.607241602218121e-06,
      "loss": 0.8104,
      "step": 754180
    },
    {
      "epoch": 2.643247922559029,
      "grad_norm": 3.15625,
      "learning_rate": 6.606592573554418e-06,
      "loss": 0.8769,
      "step": 754190
    },
    {
      "epoch": 2.6432829700659246,
      "grad_norm": 3.359375,
      "learning_rate": 6.605943544890716e-06,
      "loss": 0.8285,
      "step": 754200
    },
    {
      "epoch": 2.64331801757282,
      "grad_norm": 2.984375,
      "learning_rate": 6.605294516227014e-06,
      "loss": 0.9028,
      "step": 754210
    },
    {
      "epoch": 2.6433530650797157,
      "grad_norm": 2.703125,
      "learning_rate": 6.604645487563313e-06,
      "loss": 0.8033,
      "step": 754220
    },
    {
      "epoch": 2.643388112586611,
      "grad_norm": 2.265625,
      "learning_rate": 6.603996458899611e-06,
      "loss": 0.7314,
      "step": 754230
    },
    {
      "epoch": 2.6434231600935068,
      "grad_norm": 3.0,
      "learning_rate": 6.603347430235909e-06,
      "loss": 0.8254,
      "step": 754240
    },
    {
      "epoch": 2.6434582076004025,
      "grad_norm": 2.765625,
      "learning_rate": 6.602698401572208e-06,
      "loss": 0.7611,
      "step": 754250
    },
    {
      "epoch": 2.643493255107298,
      "grad_norm": 2.875,
      "learning_rate": 6.602049372908506e-06,
      "loss": 0.7817,
      "step": 754260
    },
    {
      "epoch": 2.6435283026141936,
      "grad_norm": 2.671875,
      "learning_rate": 6.601400344244804e-06,
      "loss": 0.7742,
      "step": 754270
    },
    {
      "epoch": 2.643563350121089,
      "grad_norm": 3.21875,
      "learning_rate": 6.600751315581101e-06,
      "loss": 0.8258,
      "step": 754280
    },
    {
      "epoch": 2.6435983976279847,
      "grad_norm": 3.265625,
      "learning_rate": 6.600102286917399e-06,
      "loss": 0.861,
      "step": 754290
    },
    {
      "epoch": 2.6436334451348804,
      "grad_norm": 2.65625,
      "learning_rate": 6.599453258253697e-06,
      "loss": 0.7169,
      "step": 754300
    },
    {
      "epoch": 2.643668492641776,
      "grad_norm": 2.984375,
      "learning_rate": 6.598804229589996e-06,
      "loss": 0.7846,
      "step": 754310
    },
    {
      "epoch": 2.6437035401486715,
      "grad_norm": 2.890625,
      "learning_rate": 6.598155200926294e-06,
      "loss": 0.8372,
      "step": 754320
    },
    {
      "epoch": 2.6437385876555672,
      "grad_norm": 2.65625,
      "learning_rate": 6.597506172262592e-06,
      "loss": 0.7785,
      "step": 754330
    },
    {
      "epoch": 2.6437736351624626,
      "grad_norm": 2.671875,
      "learning_rate": 6.596857143598891e-06,
      "loss": 0.7822,
      "step": 754340
    },
    {
      "epoch": 2.6438086826693583,
      "grad_norm": 3.234375,
      "learning_rate": 6.596208114935189e-06,
      "loss": 0.7967,
      "step": 754350
    },
    {
      "epoch": 2.643843730176254,
      "grad_norm": 2.78125,
      "learning_rate": 6.595559086271487e-06,
      "loss": 0.74,
      "step": 754360
    },
    {
      "epoch": 2.6438787776831494,
      "grad_norm": 3.09375,
      "learning_rate": 6.594910057607785e-06,
      "loss": 0.8222,
      "step": 754370
    },
    {
      "epoch": 2.643913825190045,
      "grad_norm": 2.859375,
      "learning_rate": 6.594261028944082e-06,
      "loss": 0.8225,
      "step": 754380
    },
    {
      "epoch": 2.6439488726969405,
      "grad_norm": 3.140625,
      "learning_rate": 6.59361200028038e-06,
      "loss": 0.7711,
      "step": 754390
    },
    {
      "epoch": 2.643983920203836,
      "grad_norm": 2.78125,
      "learning_rate": 6.592962971616679e-06,
      "loss": 0.7771,
      "step": 754400
    },
    {
      "epoch": 2.644018967710732,
      "grad_norm": 2.75,
      "learning_rate": 6.592313942952977e-06,
      "loss": 0.8116,
      "step": 754410
    },
    {
      "epoch": 2.6440540152176277,
      "grad_norm": 2.78125,
      "learning_rate": 6.591664914289275e-06,
      "loss": 0.7576,
      "step": 754420
    },
    {
      "epoch": 2.644089062724523,
      "grad_norm": 3.109375,
      "learning_rate": 6.591015885625574e-06,
      "loss": 0.7516,
      "step": 754430
    },
    {
      "epoch": 2.644124110231419,
      "grad_norm": 2.328125,
      "learning_rate": 6.590366856961872e-06,
      "loss": 0.8804,
      "step": 754440
    },
    {
      "epoch": 2.644159157738314,
      "grad_norm": 2.96875,
      "learning_rate": 6.58971782829817e-06,
      "loss": 0.8083,
      "step": 754450
    },
    {
      "epoch": 2.64419420524521,
      "grad_norm": 3.0625,
      "learning_rate": 6.589068799634468e-06,
      "loss": 0.7902,
      "step": 754460
    },
    {
      "epoch": 2.6442292527521056,
      "grad_norm": 2.953125,
      "learning_rate": 6.588419770970765e-06,
      "loss": 0.7317,
      "step": 754470
    },
    {
      "epoch": 2.644264300259001,
      "grad_norm": 3.578125,
      "learning_rate": 6.587770742307063e-06,
      "loss": 0.8817,
      "step": 754480
    },
    {
      "epoch": 2.6442993477658967,
      "grad_norm": 2.609375,
      "learning_rate": 6.587121713643362e-06,
      "loss": 0.7677,
      "step": 754490
    },
    {
      "epoch": 2.644334395272792,
      "grad_norm": 3.1875,
      "learning_rate": 6.58647268497966e-06,
      "loss": 0.8765,
      "step": 754500
    },
    {
      "epoch": 2.6443694427796878,
      "grad_norm": 3.03125,
      "learning_rate": 6.585823656315958e-06,
      "loss": 0.8555,
      "step": 754510
    },
    {
      "epoch": 2.6444044902865835,
      "grad_norm": 2.765625,
      "learning_rate": 6.5851746276522565e-06,
      "loss": 0.742,
      "step": 754520
    },
    {
      "epoch": 2.6444395377934793,
      "grad_norm": 3.453125,
      "learning_rate": 6.5845255989885545e-06,
      "loss": 0.8077,
      "step": 754530
    },
    {
      "epoch": 2.6444745853003746,
      "grad_norm": 2.671875,
      "learning_rate": 6.5838765703248525e-06,
      "loss": 0.7642,
      "step": 754540
    },
    {
      "epoch": 2.6445096328072704,
      "grad_norm": 3.265625,
      "learning_rate": 6.5832275416611505e-06,
      "loss": 0.8281,
      "step": 754550
    },
    {
      "epoch": 2.6445446803141657,
      "grad_norm": 2.640625,
      "learning_rate": 6.582578512997448e-06,
      "loss": 0.7768,
      "step": 754560
    },
    {
      "epoch": 2.6445797278210614,
      "grad_norm": 3.328125,
      "learning_rate": 6.581929484333746e-06,
      "loss": 0.8609,
      "step": 754570
    },
    {
      "epoch": 2.644614775327957,
      "grad_norm": 2.78125,
      "learning_rate": 6.5812804556700445e-06,
      "loss": 0.8753,
      "step": 754580
    },
    {
      "epoch": 2.6446498228348525,
      "grad_norm": 2.46875,
      "learning_rate": 6.5806314270063425e-06,
      "loss": 0.7925,
      "step": 754590
    },
    {
      "epoch": 2.6446848703417483,
      "grad_norm": 3.171875,
      "learning_rate": 6.5799823983426405e-06,
      "loss": 0.7176,
      "step": 754600
    },
    {
      "epoch": 2.6447199178486436,
      "grad_norm": 2.578125,
      "learning_rate": 6.5793333696789385e-06,
      "loss": 0.8116,
      "step": 754610
    },
    {
      "epoch": 2.6447549653555393,
      "grad_norm": 2.703125,
      "learning_rate": 6.578684341015237e-06,
      "loss": 0.8518,
      "step": 754620
    },
    {
      "epoch": 2.644790012862435,
      "grad_norm": 3.359375,
      "learning_rate": 6.578035312351535e-06,
      "loss": 0.791,
      "step": 754630
    },
    {
      "epoch": 2.644825060369331,
      "grad_norm": 2.578125,
      "learning_rate": 6.577386283687833e-06,
      "loss": 0.7487,
      "step": 754640
    },
    {
      "epoch": 2.644860107876226,
      "grad_norm": 3.109375,
      "learning_rate": 6.576737255024132e-06,
      "loss": 0.8186,
      "step": 754650
    },
    {
      "epoch": 2.644895155383122,
      "grad_norm": 2.609375,
      "learning_rate": 6.5760882263604285e-06,
      "loss": 0.7954,
      "step": 754660
    },
    {
      "epoch": 2.6449302028900172,
      "grad_norm": 2.953125,
      "learning_rate": 6.575439197696727e-06,
      "loss": 0.8052,
      "step": 754670
    },
    {
      "epoch": 2.644965250396913,
      "grad_norm": 3.296875,
      "learning_rate": 6.574790169033025e-06,
      "loss": 0.8256,
      "step": 754680
    },
    {
      "epoch": 2.6450002979038088,
      "grad_norm": 2.875,
      "learning_rate": 6.574141140369323e-06,
      "loss": 0.8326,
      "step": 754690
    },
    {
      "epoch": 2.645035345410704,
      "grad_norm": 3.234375,
      "learning_rate": 6.573492111705621e-06,
      "loss": 0.8481,
      "step": 754700
    },
    {
      "epoch": 2.6450703929176,
      "grad_norm": 2.8125,
      "learning_rate": 6.57284308304192e-06,
      "loss": 0.7858,
      "step": 754710
    },
    {
      "epoch": 2.645105440424495,
      "grad_norm": 3.109375,
      "learning_rate": 6.572194054378218e-06,
      "loss": 0.8937,
      "step": 754720
    },
    {
      "epoch": 2.645140487931391,
      "grad_norm": 2.578125,
      "learning_rate": 6.571545025714516e-06,
      "loss": 0.8323,
      "step": 754730
    },
    {
      "epoch": 2.6451755354382867,
      "grad_norm": 3.09375,
      "learning_rate": 6.570895997050815e-06,
      "loss": 0.828,
      "step": 754740
    },
    {
      "epoch": 2.6452105829451824,
      "grad_norm": 2.890625,
      "learning_rate": 6.570246968387111e-06,
      "loss": 0.8191,
      "step": 754750
    },
    {
      "epoch": 2.6452456304520777,
      "grad_norm": 2.796875,
      "learning_rate": 6.569597939723409e-06,
      "loss": 0.7903,
      "step": 754760
    },
    {
      "epoch": 2.6452806779589735,
      "grad_norm": 2.5,
      "learning_rate": 6.568948911059708e-06,
      "loss": 0.7633,
      "step": 754770
    },
    {
      "epoch": 2.645315725465869,
      "grad_norm": 2.640625,
      "learning_rate": 6.568299882396006e-06,
      "loss": 0.8083,
      "step": 754780
    },
    {
      "epoch": 2.6453507729727646,
      "grad_norm": 2.96875,
      "learning_rate": 6.567650853732304e-06,
      "loss": 0.7125,
      "step": 754790
    },
    {
      "epoch": 2.6453858204796603,
      "grad_norm": 3.046875,
      "learning_rate": 6.567001825068603e-06,
      "loss": 0.9122,
      "step": 754800
    },
    {
      "epoch": 2.6454208679865556,
      "grad_norm": 2.90625,
      "learning_rate": 6.566352796404901e-06,
      "loss": 0.7652,
      "step": 754810
    },
    {
      "epoch": 2.6454559154934514,
      "grad_norm": 3.0625,
      "learning_rate": 6.565703767741199e-06,
      "loss": 0.8689,
      "step": 754820
    },
    {
      "epoch": 2.6454909630003467,
      "grad_norm": 2.71875,
      "learning_rate": 6.565054739077498e-06,
      "loss": 0.8059,
      "step": 754830
    },
    {
      "epoch": 2.6455260105072425,
      "grad_norm": 3.515625,
      "learning_rate": 6.564405710413796e-06,
      "loss": 0.8402,
      "step": 754840
    },
    {
      "epoch": 2.645561058014138,
      "grad_norm": 3.0625,
      "learning_rate": 6.563756681750092e-06,
      "loss": 0.823,
      "step": 754850
    },
    {
      "epoch": 2.645596105521034,
      "grad_norm": 3.546875,
      "learning_rate": 6.563107653086391e-06,
      "loss": 0.7752,
      "step": 754860
    },
    {
      "epoch": 2.6456311530279293,
      "grad_norm": 2.265625,
      "learning_rate": 6.562458624422689e-06,
      "loss": 0.816,
      "step": 754870
    },
    {
      "epoch": 2.645666200534825,
      "grad_norm": 2.734375,
      "learning_rate": 6.561809595758987e-06,
      "loss": 0.7964,
      "step": 754880
    },
    {
      "epoch": 2.6457012480417204,
      "grad_norm": 3.28125,
      "learning_rate": 6.561160567095286e-06,
      "loss": 0.8365,
      "step": 754890
    },
    {
      "epoch": 2.645736295548616,
      "grad_norm": 2.59375,
      "learning_rate": 6.560511538431584e-06,
      "loss": 0.7708,
      "step": 754900
    },
    {
      "epoch": 2.645771343055512,
      "grad_norm": 2.625,
      "learning_rate": 6.559862509767882e-06,
      "loss": 0.8454,
      "step": 754910
    },
    {
      "epoch": 2.645806390562407,
      "grad_norm": 2.765625,
      "learning_rate": 6.55921348110418e-06,
      "loss": 0.791,
      "step": 754920
    },
    {
      "epoch": 2.645841438069303,
      "grad_norm": 3.03125,
      "learning_rate": 6.558564452440479e-06,
      "loss": 0.7304,
      "step": 754930
    },
    {
      "epoch": 2.6458764855761987,
      "grad_norm": 2.671875,
      "learning_rate": 6.557915423776775e-06,
      "loss": 0.8207,
      "step": 754940
    },
    {
      "epoch": 2.645911533083094,
      "grad_norm": 2.75,
      "learning_rate": 6.557266395113074e-06,
      "loss": 0.8725,
      "step": 754950
    },
    {
      "epoch": 2.6459465805899898,
      "grad_norm": 2.625,
      "learning_rate": 6.556617366449372e-06,
      "loss": 0.8062,
      "step": 754960
    },
    {
      "epoch": 2.6459816280968855,
      "grad_norm": 2.453125,
      "learning_rate": 6.55596833778567e-06,
      "loss": 0.8273,
      "step": 754970
    },
    {
      "epoch": 2.646016675603781,
      "grad_norm": 2.78125,
      "learning_rate": 6.555319309121969e-06,
      "loss": 0.8169,
      "step": 754980
    },
    {
      "epoch": 2.6460517231106766,
      "grad_norm": 3.078125,
      "learning_rate": 6.554670280458267e-06,
      "loss": 0.8808,
      "step": 754990
    },
    {
      "epoch": 2.646086770617572,
      "grad_norm": 2.609375,
      "learning_rate": 6.554021251794565e-06,
      "loss": 0.8094,
      "step": 755000
    },
    {
      "epoch": 2.646086770617572,
      "eval_loss": 0.7543975114822388,
      "eval_runtime": 552.0915,
      "eval_samples_per_second": 689.081,
      "eval_steps_per_second": 57.423,
      "step": 755000
    },
    {
      "epoch": 2.6461218181244677,
      "grad_norm": 3.015625,
      "learning_rate": 6.553372223130863e-06,
      "loss": 0.7979,
      "step": 755010
    },
    {
      "epoch": 2.6461568656313634,
      "grad_norm": 2.6875,
      "learning_rate": 6.5527231944671616e-06,
      "loss": 0.7903,
      "step": 755020
    },
    {
      "epoch": 2.6461919131382587,
      "grad_norm": 3.53125,
      "learning_rate": 6.552074165803458e-06,
      "loss": 0.7591,
      "step": 755030
    },
    {
      "epoch": 2.6462269606451545,
      "grad_norm": 3.078125,
      "learning_rate": 6.551425137139757e-06,
      "loss": 0.8086,
      "step": 755040
    },
    {
      "epoch": 2.6462620081520503,
      "grad_norm": 2.875,
      "learning_rate": 6.550776108476055e-06,
      "loss": 0.8456,
      "step": 755050
    },
    {
      "epoch": 2.6462970556589456,
      "grad_norm": 3.078125,
      "learning_rate": 6.550127079812353e-06,
      "loss": 0.8416,
      "step": 755060
    },
    {
      "epoch": 2.6463321031658413,
      "grad_norm": 2.484375,
      "learning_rate": 6.5494780511486516e-06,
      "loss": 0.8037,
      "step": 755070
    },
    {
      "epoch": 2.646367150672737,
      "grad_norm": 3.125,
      "learning_rate": 6.5488290224849496e-06,
      "loss": 0.7718,
      "step": 755080
    },
    {
      "epoch": 2.6464021981796324,
      "grad_norm": 3.109375,
      "learning_rate": 6.5481799938212476e-06,
      "loss": 0.7114,
      "step": 755090
    },
    {
      "epoch": 2.646437245686528,
      "grad_norm": 2.796875,
      "learning_rate": 6.5475309651575456e-06,
      "loss": 0.8216,
      "step": 755100
    },
    {
      "epoch": 2.6464722931934235,
      "grad_norm": 2.71875,
      "learning_rate": 6.546881936493844e-06,
      "loss": 0.8322,
      "step": 755110
    },
    {
      "epoch": 2.6465073407003192,
      "grad_norm": 3.078125,
      "learning_rate": 6.546232907830142e-06,
      "loss": 0.8465,
      "step": 755120
    },
    {
      "epoch": 2.646542388207215,
      "grad_norm": 2.765625,
      "learning_rate": 6.5455838791664396e-06,
      "loss": 0.7241,
      "step": 755130
    },
    {
      "epoch": 2.6465774357141107,
      "grad_norm": 2.75,
      "learning_rate": 6.5449348505027376e-06,
      "loss": 0.8051,
      "step": 755140
    },
    {
      "epoch": 2.646612483221006,
      "grad_norm": 3.03125,
      "learning_rate": 6.5442858218390356e-06,
      "loss": 0.8513,
      "step": 755150
    },
    {
      "epoch": 2.646647530727902,
      "grad_norm": 3.21875,
      "learning_rate": 6.5436367931753336e-06,
      "loss": 0.7994,
      "step": 755160
    },
    {
      "epoch": 2.646682578234797,
      "grad_norm": 3.640625,
      "learning_rate": 6.542987764511632e-06,
      "loss": 0.846,
      "step": 755170
    },
    {
      "epoch": 2.646717625741693,
      "grad_norm": 2.78125,
      "learning_rate": 6.54233873584793e-06,
      "loss": 0.7333,
      "step": 755180
    },
    {
      "epoch": 2.6467526732485886,
      "grad_norm": 2.90625,
      "learning_rate": 6.541689707184228e-06,
      "loss": 0.8051,
      "step": 755190
    },
    {
      "epoch": 2.646787720755484,
      "grad_norm": 2.875,
      "learning_rate": 6.541040678520527e-06,
      "loss": 0.8017,
      "step": 755200
    },
    {
      "epoch": 2.6468227682623797,
      "grad_norm": 2.546875,
      "learning_rate": 6.540391649856825e-06,
      "loss": 0.752,
      "step": 755210
    },
    {
      "epoch": 2.646857815769275,
      "grad_norm": 2.609375,
      "learning_rate": 6.539742621193122e-06,
      "loss": 0.8694,
      "step": 755220
    },
    {
      "epoch": 2.646892863276171,
      "grad_norm": 2.734375,
      "learning_rate": 6.53909359252942e-06,
      "loss": 0.828,
      "step": 755230
    },
    {
      "epoch": 2.6469279107830666,
      "grad_norm": 2.53125,
      "learning_rate": 6.538444563865718e-06,
      "loss": 0.7363,
      "step": 755240
    },
    {
      "epoch": 2.6469629582899623,
      "grad_norm": 2.953125,
      "learning_rate": 6.537795535202016e-06,
      "loss": 0.7843,
      "step": 755250
    },
    {
      "epoch": 2.6469980057968576,
      "grad_norm": 3.0,
      "learning_rate": 6.537146506538315e-06,
      "loss": 0.9003,
      "step": 755260
    },
    {
      "epoch": 2.6470330533037534,
      "grad_norm": 2.921875,
      "learning_rate": 6.536497477874613e-06,
      "loss": 0.7703,
      "step": 755270
    },
    {
      "epoch": 2.6470681008106487,
      "grad_norm": 2.8125,
      "learning_rate": 6.535848449210911e-06,
      "loss": 0.8971,
      "step": 755280
    },
    {
      "epoch": 2.6471031483175445,
      "grad_norm": 3.625,
      "learning_rate": 6.53519942054721e-06,
      "loss": 0.8394,
      "step": 755290
    },
    {
      "epoch": 2.64713819582444,
      "grad_norm": 3.015625,
      "learning_rate": 6.534550391883508e-06,
      "loss": 0.8328,
      "step": 755300
    },
    {
      "epoch": 2.6471732433313355,
      "grad_norm": 2.921875,
      "learning_rate": 6.533901363219806e-06,
      "loss": 0.8363,
      "step": 755310
    },
    {
      "epoch": 2.6472082908382313,
      "grad_norm": 2.9375,
      "learning_rate": 6.533252334556103e-06,
      "loss": 0.8281,
      "step": 755320
    },
    {
      "epoch": 2.6472433383451266,
      "grad_norm": 3.140625,
      "learning_rate": 6.532603305892401e-06,
      "loss": 0.8307,
      "step": 755330
    },
    {
      "epoch": 2.6472783858520224,
      "grad_norm": 3.296875,
      "learning_rate": 6.531954277228699e-06,
      "loss": 0.8115,
      "step": 755340
    },
    {
      "epoch": 2.647313433358918,
      "grad_norm": 2.71875,
      "learning_rate": 6.531305248564998e-06,
      "loss": 0.8067,
      "step": 755350
    },
    {
      "epoch": 2.647348480865814,
      "grad_norm": 3.15625,
      "learning_rate": 6.530656219901296e-06,
      "loss": 0.8102,
      "step": 755360
    },
    {
      "epoch": 2.647383528372709,
      "grad_norm": 2.4375,
      "learning_rate": 6.530007191237594e-06,
      "loss": 0.9269,
      "step": 755370
    },
    {
      "epoch": 2.647418575879605,
      "grad_norm": 3.046875,
      "learning_rate": 6.529358162573893e-06,
      "loss": 0.8419,
      "step": 755380
    },
    {
      "epoch": 2.6474536233865003,
      "grad_norm": 3.03125,
      "learning_rate": 6.528709133910191e-06,
      "loss": 0.7757,
      "step": 755390
    },
    {
      "epoch": 2.647488670893396,
      "grad_norm": 3.40625,
      "learning_rate": 6.528060105246489e-06,
      "loss": 0.7538,
      "step": 755400
    },
    {
      "epoch": 2.6475237184002918,
      "grad_norm": 2.765625,
      "learning_rate": 6.527411076582786e-06,
      "loss": 0.7707,
      "step": 755410
    },
    {
      "epoch": 2.647558765907187,
      "grad_norm": 2.859375,
      "learning_rate": 6.526762047919084e-06,
      "loss": 0.8249,
      "step": 755420
    },
    {
      "epoch": 2.647593813414083,
      "grad_norm": 2.78125,
      "learning_rate": 6.526113019255382e-06,
      "loss": 0.6847,
      "step": 755430
    },
    {
      "epoch": 2.647628860920978,
      "grad_norm": 3.984375,
      "learning_rate": 6.525463990591681e-06,
      "loss": 0.8543,
      "step": 755440
    },
    {
      "epoch": 2.647663908427874,
      "grad_norm": 2.625,
      "learning_rate": 6.524814961927979e-06,
      "loss": 0.8116,
      "step": 755450
    },
    {
      "epoch": 2.6476989559347697,
      "grad_norm": 2.6875,
      "learning_rate": 6.524165933264277e-06,
      "loss": 0.7746,
      "step": 755460
    },
    {
      "epoch": 2.6477340034416654,
      "grad_norm": 2.484375,
      "learning_rate": 6.523516904600575e-06,
      "loss": 0.7967,
      "step": 755470
    },
    {
      "epoch": 2.6477690509485607,
      "grad_norm": 2.828125,
      "learning_rate": 6.522867875936874e-06,
      "loss": 0.8272,
      "step": 755480
    },
    {
      "epoch": 2.6478040984554565,
      "grad_norm": 2.84375,
      "learning_rate": 6.522218847273172e-06,
      "loss": 0.8205,
      "step": 755490
    },
    {
      "epoch": 2.647839145962352,
      "grad_norm": 2.59375,
      "learning_rate": 6.521569818609469e-06,
      "loss": 0.8008,
      "step": 755500
    },
    {
      "epoch": 2.6478741934692476,
      "grad_norm": 2.625,
      "learning_rate": 6.520920789945767e-06,
      "loss": 0.8051,
      "step": 755510
    },
    {
      "epoch": 2.6479092409761433,
      "grad_norm": 2.953125,
      "learning_rate": 6.520271761282065e-06,
      "loss": 0.8234,
      "step": 755520
    },
    {
      "epoch": 2.6479442884830386,
      "grad_norm": 3.109375,
      "learning_rate": 6.519622732618364e-06,
      "loss": 0.8147,
      "step": 755530
    },
    {
      "epoch": 2.6479793359899344,
      "grad_norm": 3.15625,
      "learning_rate": 6.518973703954662e-06,
      "loss": 0.8051,
      "step": 755540
    },
    {
      "epoch": 2.6480143834968297,
      "grad_norm": 3.0625,
      "learning_rate": 6.51832467529096e-06,
      "loss": 0.7668,
      "step": 755550
    },
    {
      "epoch": 2.6480494310037255,
      "grad_norm": 2.546875,
      "learning_rate": 6.517675646627258e-06,
      "loss": 0.8719,
      "step": 755560
    },
    {
      "epoch": 2.6480844785106212,
      "grad_norm": 3.0625,
      "learning_rate": 6.517026617963557e-06,
      "loss": 0.843,
      "step": 755570
    },
    {
      "epoch": 2.648119526017517,
      "grad_norm": 2.90625,
      "learning_rate": 6.516377589299855e-06,
      "loss": 0.8035,
      "step": 755580
    },
    {
      "epoch": 2.6481545735244123,
      "grad_norm": 3.03125,
      "learning_rate": 6.515728560636153e-06,
      "loss": 0.821,
      "step": 755590
    },
    {
      "epoch": 2.648189621031308,
      "grad_norm": 2.9375,
      "learning_rate": 6.51507953197245e-06,
      "loss": 0.8225,
      "step": 755600
    },
    {
      "epoch": 2.6482246685382034,
      "grad_norm": 2.46875,
      "learning_rate": 6.514430503308748e-06,
      "loss": 0.7495,
      "step": 755610
    },
    {
      "epoch": 2.648259716045099,
      "grad_norm": 3.109375,
      "learning_rate": 6.513781474645047e-06,
      "loss": 0.8083,
      "step": 755620
    },
    {
      "epoch": 2.648294763551995,
      "grad_norm": 2.9375,
      "learning_rate": 6.513132445981345e-06,
      "loss": 0.8167,
      "step": 755630
    },
    {
      "epoch": 2.64832981105889,
      "grad_norm": 2.875,
      "learning_rate": 6.512483417317643e-06,
      "loss": 0.7974,
      "step": 755640
    },
    {
      "epoch": 2.648364858565786,
      "grad_norm": 2.765625,
      "learning_rate": 6.511834388653941e-06,
      "loss": 0.7206,
      "step": 755650
    },
    {
      "epoch": 2.6483999060726813,
      "grad_norm": 2.546875,
      "learning_rate": 6.5111853599902395e-06,
      "loss": 0.7889,
      "step": 755660
    },
    {
      "epoch": 2.648434953579577,
      "grad_norm": 2.921875,
      "learning_rate": 6.5105363313265375e-06,
      "loss": 0.7943,
      "step": 755670
    },
    {
      "epoch": 2.648470001086473,
      "grad_norm": 3.09375,
      "learning_rate": 6.5098873026628355e-06,
      "loss": 0.8284,
      "step": 755680
    },
    {
      "epoch": 2.6485050485933685,
      "grad_norm": 2.375,
      "learning_rate": 6.509238273999133e-06,
      "loss": 0.7386,
      "step": 755690
    },
    {
      "epoch": 2.648540096100264,
      "grad_norm": 3.28125,
      "learning_rate": 6.508589245335431e-06,
      "loss": 0.8538,
      "step": 755700
    },
    {
      "epoch": 2.6485751436071596,
      "grad_norm": 3.109375,
      "learning_rate": 6.507940216671729e-06,
      "loss": 0.913,
      "step": 755710
    },
    {
      "epoch": 2.648610191114055,
      "grad_norm": 2.1875,
      "learning_rate": 6.5072911880080275e-06,
      "loss": 0.733,
      "step": 755720
    },
    {
      "epoch": 2.6486452386209507,
      "grad_norm": 3.0625,
      "learning_rate": 6.5066421593443255e-06,
      "loss": 0.8235,
      "step": 755730
    },
    {
      "epoch": 2.6486802861278465,
      "grad_norm": 2.578125,
      "learning_rate": 6.5059931306806235e-06,
      "loss": 0.8105,
      "step": 755740
    },
    {
      "epoch": 2.6487153336347418,
      "grad_norm": 2.96875,
      "learning_rate": 6.505344102016922e-06,
      "loss": 0.804,
      "step": 755750
    },
    {
      "epoch": 2.6487503811416375,
      "grad_norm": 3.15625,
      "learning_rate": 6.50469507335322e-06,
      "loss": 0.8393,
      "step": 755760
    },
    {
      "epoch": 2.648785428648533,
      "grad_norm": 2.984375,
      "learning_rate": 6.504046044689518e-06,
      "loss": 0.8272,
      "step": 755770
    },
    {
      "epoch": 2.6488204761554286,
      "grad_norm": 3.140625,
      "learning_rate": 6.503397016025816e-06,
      "loss": 0.7487,
      "step": 755780
    },
    {
      "epoch": 2.6488555236623244,
      "grad_norm": 2.75,
      "learning_rate": 6.5027479873621135e-06,
      "loss": 0.8527,
      "step": 755790
    },
    {
      "epoch": 2.64889057116922,
      "grad_norm": 3.5,
      "learning_rate": 6.5020989586984115e-06,
      "loss": 0.7889,
      "step": 755800
    },
    {
      "epoch": 2.6489256186761154,
      "grad_norm": 2.703125,
      "learning_rate": 6.50144993003471e-06,
      "loss": 0.7314,
      "step": 755810
    },
    {
      "epoch": 2.648960666183011,
      "grad_norm": 2.546875,
      "learning_rate": 6.500800901371008e-06,
      "loss": 0.7843,
      "step": 755820
    },
    {
      "epoch": 2.6489957136899065,
      "grad_norm": 2.875,
      "learning_rate": 6.500151872707306e-06,
      "loss": 0.7256,
      "step": 755830
    },
    {
      "epoch": 2.6490307611968023,
      "grad_norm": 2.359375,
      "learning_rate": 6.499502844043605e-06,
      "loss": 0.7253,
      "step": 755840
    },
    {
      "epoch": 2.649065808703698,
      "grad_norm": 3.125,
      "learning_rate": 6.498853815379903e-06,
      "loss": 0.7473,
      "step": 755850
    },
    {
      "epoch": 2.6491008562105933,
      "grad_norm": 3.015625,
      "learning_rate": 6.498204786716201e-06,
      "loss": 0.896,
      "step": 755860
    },
    {
      "epoch": 2.649135903717489,
      "grad_norm": 3.265625,
      "learning_rate": 6.497555758052499e-06,
      "loss": 0.8259,
      "step": 755870
    },
    {
      "epoch": 2.6491709512243844,
      "grad_norm": 2.84375,
      "learning_rate": 6.496906729388796e-06,
      "loss": 0.8153,
      "step": 755880
    },
    {
      "epoch": 2.64920599873128,
      "grad_norm": 3.546875,
      "learning_rate": 6.496257700725094e-06,
      "loss": 0.7962,
      "step": 755890
    },
    {
      "epoch": 2.649241046238176,
      "grad_norm": 2.796875,
      "learning_rate": 6.495608672061393e-06,
      "loss": 0.8047,
      "step": 755900
    },
    {
      "epoch": 2.6492760937450717,
      "grad_norm": 2.859375,
      "learning_rate": 6.494959643397691e-06,
      "loss": 0.7345,
      "step": 755910
    },
    {
      "epoch": 2.649311141251967,
      "grad_norm": 2.625,
      "learning_rate": 6.494310614733989e-06,
      "loss": 0.836,
      "step": 755920
    },
    {
      "epoch": 2.6493461887588627,
      "grad_norm": 3.1875,
      "learning_rate": 6.493661586070288e-06,
      "loss": 0.7363,
      "step": 755930
    },
    {
      "epoch": 2.649381236265758,
      "grad_norm": 3.328125,
      "learning_rate": 6.493012557406586e-06,
      "loss": 0.7861,
      "step": 755940
    },
    {
      "epoch": 2.649416283772654,
      "grad_norm": 3.296875,
      "learning_rate": 6.492363528742884e-06,
      "loss": 0.8051,
      "step": 755950
    },
    {
      "epoch": 2.6494513312795496,
      "grad_norm": 2.765625,
      "learning_rate": 6.491714500079182e-06,
      "loss": 0.8017,
      "step": 755960
    },
    {
      "epoch": 2.649486378786445,
      "grad_norm": 2.65625,
      "learning_rate": 6.491065471415479e-06,
      "loss": 0.6851,
      "step": 755970
    },
    {
      "epoch": 2.6495214262933406,
      "grad_norm": 3.203125,
      "learning_rate": 6.490416442751777e-06,
      "loss": 0.784,
      "step": 755980
    },
    {
      "epoch": 2.649556473800236,
      "grad_norm": 2.921875,
      "learning_rate": 6.489767414088076e-06,
      "loss": 0.8271,
      "step": 755990
    },
    {
      "epoch": 2.6495915213071317,
      "grad_norm": 3.5625,
      "learning_rate": 6.489118385424374e-06,
      "loss": 0.7911,
      "step": 756000
    },
    {
      "epoch": 2.6496265688140275,
      "grad_norm": 2.890625,
      "learning_rate": 6.488469356760672e-06,
      "loss": 0.8126,
      "step": 756010
    },
    {
      "epoch": 2.6496616163209232,
      "grad_norm": 2.703125,
      "learning_rate": 6.48782032809697e-06,
      "loss": 0.8122,
      "step": 756020
    },
    {
      "epoch": 2.6496966638278185,
      "grad_norm": 2.796875,
      "learning_rate": 6.487171299433269e-06,
      "loss": 0.8111,
      "step": 756030
    },
    {
      "epoch": 2.6497317113347143,
      "grad_norm": 3.234375,
      "learning_rate": 6.486522270769567e-06,
      "loss": 0.798,
      "step": 756040
    },
    {
      "epoch": 2.6497667588416096,
      "grad_norm": 2.640625,
      "learning_rate": 6.485873242105865e-06,
      "loss": 0.7496,
      "step": 756050
    },
    {
      "epoch": 2.6498018063485054,
      "grad_norm": 3.453125,
      "learning_rate": 6.485224213442164e-06,
      "loss": 0.7644,
      "step": 756060
    },
    {
      "epoch": 2.649836853855401,
      "grad_norm": 2.875,
      "learning_rate": 6.48457518477846e-06,
      "loss": 0.7513,
      "step": 756070
    },
    {
      "epoch": 2.6498719013622964,
      "grad_norm": 2.921875,
      "learning_rate": 6.483926156114759e-06,
      "loss": 0.7266,
      "step": 756080
    },
    {
      "epoch": 2.649906948869192,
      "grad_norm": 2.25,
      "learning_rate": 6.483277127451057e-06,
      "loss": 0.7633,
      "step": 756090
    },
    {
      "epoch": 2.6499419963760875,
      "grad_norm": 2.9375,
      "learning_rate": 6.482628098787355e-06,
      "loss": 0.7779,
      "step": 756100
    },
    {
      "epoch": 2.6499770438829833,
      "grad_norm": 3.390625,
      "learning_rate": 6.481979070123653e-06,
      "loss": 0.8492,
      "step": 756110
    },
    {
      "epoch": 2.650012091389879,
      "grad_norm": 3.234375,
      "learning_rate": 6.481330041459952e-06,
      "loss": 0.7749,
      "step": 756120
    },
    {
      "epoch": 2.650047138896775,
      "grad_norm": 2.71875,
      "learning_rate": 6.48068101279625e-06,
      "loss": 0.8136,
      "step": 756130
    },
    {
      "epoch": 2.65008218640367,
      "grad_norm": 2.875,
      "learning_rate": 6.480031984132548e-06,
      "loss": 0.8389,
      "step": 756140
    },
    {
      "epoch": 2.650117233910566,
      "grad_norm": 2.6875,
      "learning_rate": 6.4793829554688466e-06,
      "loss": 0.818,
      "step": 756150
    },
    {
      "epoch": 2.650152281417461,
      "grad_norm": 3.015625,
      "learning_rate": 6.478733926805143e-06,
      "loss": 0.742,
      "step": 756160
    },
    {
      "epoch": 2.650187328924357,
      "grad_norm": 2.984375,
      "learning_rate": 6.478084898141442e-06,
      "loss": 0.8876,
      "step": 756170
    },
    {
      "epoch": 2.6502223764312527,
      "grad_norm": 3.03125,
      "learning_rate": 6.47743586947774e-06,
      "loss": 0.807,
      "step": 756180
    },
    {
      "epoch": 2.650257423938148,
      "grad_norm": 2.984375,
      "learning_rate": 6.476786840814038e-06,
      "loss": 0.7445,
      "step": 756190
    },
    {
      "epoch": 2.6502924714450438,
      "grad_norm": 3.1875,
      "learning_rate": 6.476137812150336e-06,
      "loss": 0.835,
      "step": 756200
    },
    {
      "epoch": 2.650327518951939,
      "grad_norm": 4.34375,
      "learning_rate": 6.4754887834866346e-06,
      "loss": 0.7321,
      "step": 756210
    },
    {
      "epoch": 2.650362566458835,
      "grad_norm": 3.296875,
      "learning_rate": 6.4748397548229326e-06,
      "loss": 0.8032,
      "step": 756220
    },
    {
      "epoch": 2.6503976139657306,
      "grad_norm": 2.765625,
      "learning_rate": 6.4741907261592306e-06,
      "loss": 0.8037,
      "step": 756230
    },
    {
      "epoch": 2.6504326614726264,
      "grad_norm": 3.046875,
      "learning_rate": 6.473541697495529e-06,
      "loss": 0.9053,
      "step": 756240
    },
    {
      "epoch": 2.6504677089795217,
      "grad_norm": 3.0,
      "learning_rate": 6.472892668831827e-06,
      "loss": 0.8128,
      "step": 756250
    },
    {
      "epoch": 2.6505027564864174,
      "grad_norm": 2.515625,
      "learning_rate": 6.472243640168124e-06,
      "loss": 0.7739,
      "step": 756260
    },
    {
      "epoch": 2.6505378039933127,
      "grad_norm": 2.8125,
      "learning_rate": 6.4715946115044226e-06,
      "loss": 0.7701,
      "step": 756270
    },
    {
      "epoch": 2.6505728515002085,
      "grad_norm": 2.75,
      "learning_rate": 6.4709455828407206e-06,
      "loss": 0.8067,
      "step": 756280
    },
    {
      "epoch": 2.6506078990071043,
      "grad_norm": 3.046875,
      "learning_rate": 6.4702965541770186e-06,
      "loss": 0.8102,
      "step": 756290
    },
    {
      "epoch": 2.6506429465139996,
      "grad_norm": 2.875,
      "learning_rate": 6.469647525513317e-06,
      "loss": 0.8069,
      "step": 756300
    },
    {
      "epoch": 2.6506779940208953,
      "grad_norm": 3.15625,
      "learning_rate": 6.468998496849615e-06,
      "loss": 0.8009,
      "step": 756310
    },
    {
      "epoch": 2.650713041527791,
      "grad_norm": 3.21875,
      "learning_rate": 6.468349468185913e-06,
      "loss": 0.7046,
      "step": 756320
    },
    {
      "epoch": 2.6507480890346864,
      "grad_norm": 2.78125,
      "learning_rate": 6.467700439522211e-06,
      "loss": 0.782,
      "step": 756330
    },
    {
      "epoch": 2.650783136541582,
      "grad_norm": 2.171875,
      "learning_rate": 6.46705141085851e-06,
      "loss": 0.8294,
      "step": 756340
    },
    {
      "epoch": 2.650818184048478,
      "grad_norm": 2.765625,
      "learning_rate": 6.4664023821948066e-06,
      "loss": 0.8731,
      "step": 756350
    },
    {
      "epoch": 2.6508532315553732,
      "grad_norm": 3.21875,
      "learning_rate": 6.465753353531105e-06,
      "loss": 0.7919,
      "step": 756360
    },
    {
      "epoch": 2.650888279062269,
      "grad_norm": 2.65625,
      "learning_rate": 6.465104324867403e-06,
      "loss": 0.7868,
      "step": 756370
    },
    {
      "epoch": 2.6509233265691643,
      "grad_norm": 3.40625,
      "learning_rate": 6.464455296203701e-06,
      "loss": 0.708,
      "step": 756380
    },
    {
      "epoch": 2.65095837407606,
      "grad_norm": 2.90625,
      "learning_rate": 6.46380626754e-06,
      "loss": 0.7892,
      "step": 756390
    },
    {
      "epoch": 2.650993421582956,
      "grad_norm": 2.890625,
      "learning_rate": 6.463157238876298e-06,
      "loss": 0.7663,
      "step": 756400
    },
    {
      "epoch": 2.6510284690898516,
      "grad_norm": 2.96875,
      "learning_rate": 6.462508210212596e-06,
      "loss": 0.8799,
      "step": 756410
    },
    {
      "epoch": 2.651063516596747,
      "grad_norm": 3.09375,
      "learning_rate": 6.461859181548894e-06,
      "loss": 0.7628,
      "step": 756420
    },
    {
      "epoch": 2.6510985641036426,
      "grad_norm": 2.96875,
      "learning_rate": 6.461210152885193e-06,
      "loss": 0.8072,
      "step": 756430
    },
    {
      "epoch": 2.651133611610538,
      "grad_norm": 2.96875,
      "learning_rate": 6.460561124221489e-06,
      "loss": 0.7819,
      "step": 756440
    },
    {
      "epoch": 2.6511686591174337,
      "grad_norm": 3.21875,
      "learning_rate": 6.459912095557788e-06,
      "loss": 0.7794,
      "step": 756450
    },
    {
      "epoch": 2.6512037066243295,
      "grad_norm": 3.0,
      "learning_rate": 6.459263066894086e-06,
      "loss": 0.7825,
      "step": 756460
    },
    {
      "epoch": 2.651238754131225,
      "grad_norm": 3.046875,
      "learning_rate": 6.458614038230384e-06,
      "loss": 0.783,
      "step": 756470
    },
    {
      "epoch": 2.6512738016381205,
      "grad_norm": 3.25,
      "learning_rate": 6.457965009566683e-06,
      "loss": 0.7856,
      "step": 756480
    },
    {
      "epoch": 2.651308849145016,
      "grad_norm": 3.09375,
      "learning_rate": 6.457315980902981e-06,
      "loss": 0.75,
      "step": 756490
    },
    {
      "epoch": 2.6513438966519116,
      "grad_norm": 3.03125,
      "learning_rate": 6.456666952239279e-06,
      "loss": 0.7366,
      "step": 756500
    },
    {
      "epoch": 2.6513789441588074,
      "grad_norm": 3.3125,
      "learning_rate": 6.456017923575577e-06,
      "loss": 0.8243,
      "step": 756510
    },
    {
      "epoch": 2.651413991665703,
      "grad_norm": 3.125,
      "learning_rate": 6.455368894911876e-06,
      "loss": 0.73,
      "step": 756520
    },
    {
      "epoch": 2.6514490391725984,
      "grad_norm": 3.375,
      "learning_rate": 6.454719866248174e-06,
      "loss": 0.7781,
      "step": 756530
    },
    {
      "epoch": 2.651484086679494,
      "grad_norm": 3.015625,
      "learning_rate": 6.454070837584471e-06,
      "loss": 0.7502,
      "step": 756540
    },
    {
      "epoch": 2.6515191341863895,
      "grad_norm": 3.09375,
      "learning_rate": 6.453421808920769e-06,
      "loss": 0.7608,
      "step": 756550
    },
    {
      "epoch": 2.6515541816932853,
      "grad_norm": 2.78125,
      "learning_rate": 6.452772780257067e-06,
      "loss": 0.8078,
      "step": 756560
    },
    {
      "epoch": 2.651589229200181,
      "grad_norm": 3.1875,
      "learning_rate": 6.452123751593365e-06,
      "loss": 0.8327,
      "step": 756570
    },
    {
      "epoch": 2.6516242767070763,
      "grad_norm": 2.96875,
      "learning_rate": 6.451474722929664e-06,
      "loss": 0.8143,
      "step": 756580
    },
    {
      "epoch": 2.651659324213972,
      "grad_norm": 2.734375,
      "learning_rate": 6.450825694265962e-06,
      "loss": 0.853,
      "step": 756590
    },
    {
      "epoch": 2.6516943717208674,
      "grad_norm": 2.71875,
      "learning_rate": 6.45017666560226e-06,
      "loss": 0.8559,
      "step": 756600
    },
    {
      "epoch": 2.651729419227763,
      "grad_norm": 2.953125,
      "learning_rate": 6.449527636938559e-06,
      "loss": 0.7552,
      "step": 756610
    },
    {
      "epoch": 2.651764466734659,
      "grad_norm": 2.96875,
      "learning_rate": 6.448878608274857e-06,
      "loss": 0.8793,
      "step": 756620
    },
    {
      "epoch": 2.6517995142415547,
      "grad_norm": 2.75,
      "learning_rate": 6.448229579611154e-06,
      "loss": 0.8606,
      "step": 756630
    },
    {
      "epoch": 2.65183456174845,
      "grad_norm": 2.984375,
      "learning_rate": 6.447580550947452e-06,
      "loss": 0.8631,
      "step": 756640
    },
    {
      "epoch": 2.6518696092553458,
      "grad_norm": 2.5,
      "learning_rate": 6.44693152228375e-06,
      "loss": 0.7709,
      "step": 756650
    },
    {
      "epoch": 2.651904656762241,
      "grad_norm": 2.703125,
      "learning_rate": 6.446282493620048e-06,
      "loss": 0.8231,
      "step": 756660
    },
    {
      "epoch": 2.651939704269137,
      "grad_norm": 2.828125,
      "learning_rate": 6.445633464956347e-06,
      "loss": 0.8103,
      "step": 756670
    },
    {
      "epoch": 2.6519747517760326,
      "grad_norm": 3.078125,
      "learning_rate": 6.444984436292645e-06,
      "loss": 0.9074,
      "step": 756680
    },
    {
      "epoch": 2.652009799282928,
      "grad_norm": 2.765625,
      "learning_rate": 6.444335407628943e-06,
      "loss": 0.7743,
      "step": 756690
    },
    {
      "epoch": 2.6520448467898237,
      "grad_norm": 2.875,
      "learning_rate": 6.443686378965242e-06,
      "loss": 0.8408,
      "step": 756700
    },
    {
      "epoch": 2.652079894296719,
      "grad_norm": 2.53125,
      "learning_rate": 6.44303735030154e-06,
      "loss": 0.7308,
      "step": 756710
    },
    {
      "epoch": 2.6521149418036147,
      "grad_norm": 2.8125,
      "learning_rate": 6.442388321637838e-06,
      "loss": 0.7133,
      "step": 756720
    },
    {
      "epoch": 2.6521499893105105,
      "grad_norm": 3.140625,
      "learning_rate": 6.441739292974135e-06,
      "loss": 0.7996,
      "step": 756730
    },
    {
      "epoch": 2.6521850368174062,
      "grad_norm": 3.046875,
      "learning_rate": 6.441090264310433e-06,
      "loss": 0.789,
      "step": 756740
    },
    {
      "epoch": 2.6522200843243016,
      "grad_norm": 2.796875,
      "learning_rate": 6.440441235646731e-06,
      "loss": 0.8017,
      "step": 756750
    },
    {
      "epoch": 2.6522551318311973,
      "grad_norm": 2.546875,
      "learning_rate": 6.43979220698303e-06,
      "loss": 0.7899,
      "step": 756760
    },
    {
      "epoch": 2.6522901793380926,
      "grad_norm": 3.078125,
      "learning_rate": 6.439143178319328e-06,
      "loss": 0.8407,
      "step": 756770
    },
    {
      "epoch": 2.6523252268449884,
      "grad_norm": 2.9375,
      "learning_rate": 6.438494149655626e-06,
      "loss": 0.8467,
      "step": 756780
    },
    {
      "epoch": 2.652360274351884,
      "grad_norm": 2.578125,
      "learning_rate": 6.4378451209919245e-06,
      "loss": 0.7289,
      "step": 756790
    },
    {
      "epoch": 2.6523953218587795,
      "grad_norm": 2.59375,
      "learning_rate": 6.4371960923282225e-06,
      "loss": 0.7933,
      "step": 756800
    },
    {
      "epoch": 2.6524303693656752,
      "grad_norm": 2.796875,
      "learning_rate": 6.4365470636645205e-06,
      "loss": 0.829,
      "step": 756810
    },
    {
      "epoch": 2.6524654168725705,
      "grad_norm": 2.75,
      "learning_rate": 6.435898035000818e-06,
      "loss": 0.765,
      "step": 756820
    },
    {
      "epoch": 2.6525004643794663,
      "grad_norm": 2.203125,
      "learning_rate": 6.435249006337116e-06,
      "loss": 0.7044,
      "step": 756830
    },
    {
      "epoch": 2.652535511886362,
      "grad_norm": 2.53125,
      "learning_rate": 6.434599977673414e-06,
      "loss": 0.7895,
      "step": 756840
    },
    {
      "epoch": 2.652570559393258,
      "grad_norm": 3.296875,
      "learning_rate": 6.4339509490097125e-06,
      "loss": 0.7645,
      "step": 756850
    },
    {
      "epoch": 2.652605606900153,
      "grad_norm": 2.96875,
      "learning_rate": 6.4333019203460105e-06,
      "loss": 0.8211,
      "step": 756860
    },
    {
      "epoch": 2.652640654407049,
      "grad_norm": 2.890625,
      "learning_rate": 6.4326528916823085e-06,
      "loss": 0.7859,
      "step": 756870
    },
    {
      "epoch": 2.652675701913944,
      "grad_norm": 2.53125,
      "learning_rate": 6.4320038630186065e-06,
      "loss": 0.7877,
      "step": 756880
    },
    {
      "epoch": 2.65271074942084,
      "grad_norm": 2.703125,
      "learning_rate": 6.431354834354905e-06,
      "loss": 0.879,
      "step": 756890
    },
    {
      "epoch": 2.6527457969277357,
      "grad_norm": 3.125,
      "learning_rate": 6.430705805691203e-06,
      "loss": 0.8456,
      "step": 756900
    },
    {
      "epoch": 2.652780844434631,
      "grad_norm": 3.109375,
      "learning_rate": 6.430056777027501e-06,
      "loss": 0.7491,
      "step": 756910
    },
    {
      "epoch": 2.652815891941527,
      "grad_norm": 3.65625,
      "learning_rate": 6.4294077483637985e-06,
      "loss": 0.7974,
      "step": 756920
    },
    {
      "epoch": 2.652850939448422,
      "grad_norm": 2.890625,
      "learning_rate": 6.4287587197000965e-06,
      "loss": 0.8605,
      "step": 756930
    },
    {
      "epoch": 2.652885986955318,
      "grad_norm": 2.703125,
      "learning_rate": 6.428109691036395e-06,
      "loss": 0.6986,
      "step": 756940
    },
    {
      "epoch": 2.6529210344622136,
      "grad_norm": 3.25,
      "learning_rate": 6.427460662372693e-06,
      "loss": 0.8916,
      "step": 756950
    },
    {
      "epoch": 2.6529560819691094,
      "grad_norm": 3.0625,
      "learning_rate": 6.426811633708991e-06,
      "loss": 0.711,
      "step": 756960
    },
    {
      "epoch": 2.6529911294760047,
      "grad_norm": 2.515625,
      "learning_rate": 6.426162605045289e-06,
      "loss": 0.7569,
      "step": 756970
    },
    {
      "epoch": 2.6530261769829004,
      "grad_norm": 2.625,
      "learning_rate": 6.425513576381588e-06,
      "loss": 0.8703,
      "step": 756980
    },
    {
      "epoch": 2.6530612244897958,
      "grad_norm": 3.078125,
      "learning_rate": 6.424864547717886e-06,
      "loss": 0.8237,
      "step": 756990
    },
    {
      "epoch": 2.6530962719966915,
      "grad_norm": 3.171875,
      "learning_rate": 6.424215519054184e-06,
      "loss": 0.7533,
      "step": 757000
    },
    {
      "epoch": 2.6531313195035873,
      "grad_norm": 3.140625,
      "learning_rate": 6.423566490390481e-06,
      "loss": 0.7304,
      "step": 757010
    },
    {
      "epoch": 2.6531663670104826,
      "grad_norm": 2.609375,
      "learning_rate": 6.422917461726779e-06,
      "loss": 0.8773,
      "step": 757020
    },
    {
      "epoch": 2.6532014145173783,
      "grad_norm": 2.4375,
      "learning_rate": 6.422268433063078e-06,
      "loss": 0.799,
      "step": 757030
    },
    {
      "epoch": 2.6532364620242737,
      "grad_norm": 2.921875,
      "learning_rate": 6.421619404399376e-06,
      "loss": 0.8654,
      "step": 757040
    },
    {
      "epoch": 2.6532715095311694,
      "grad_norm": 3.09375,
      "learning_rate": 6.420970375735674e-06,
      "loss": 0.8327,
      "step": 757050
    },
    {
      "epoch": 2.653306557038065,
      "grad_norm": 2.765625,
      "learning_rate": 6.420321347071972e-06,
      "loss": 0.8082,
      "step": 757060
    },
    {
      "epoch": 2.653341604544961,
      "grad_norm": 3.25,
      "learning_rate": 6.419672318408271e-06,
      "loss": 0.8248,
      "step": 757070
    },
    {
      "epoch": 2.6533766520518562,
      "grad_norm": 2.46875,
      "learning_rate": 6.419023289744569e-06,
      "loss": 0.8023,
      "step": 757080
    },
    {
      "epoch": 2.653411699558752,
      "grad_norm": 2.8125,
      "learning_rate": 6.418374261080867e-06,
      "loss": 0.7853,
      "step": 757090
    },
    {
      "epoch": 2.6534467470656473,
      "grad_norm": 3.09375,
      "learning_rate": 6.417725232417164e-06,
      "loss": 0.887,
      "step": 757100
    },
    {
      "epoch": 2.653481794572543,
      "grad_norm": 3.296875,
      "learning_rate": 6.417076203753462e-06,
      "loss": 0.8443,
      "step": 757110
    },
    {
      "epoch": 2.653516842079439,
      "grad_norm": 2.59375,
      "learning_rate": 6.41642717508976e-06,
      "loss": 0.7557,
      "step": 757120
    },
    {
      "epoch": 2.653551889586334,
      "grad_norm": 2.8125,
      "learning_rate": 6.415778146426059e-06,
      "loss": 0.7898,
      "step": 757130
    },
    {
      "epoch": 2.65358693709323,
      "grad_norm": 2.84375,
      "learning_rate": 6.415129117762357e-06,
      "loss": 0.8147,
      "step": 757140
    },
    {
      "epoch": 2.653621984600125,
      "grad_norm": 3.0,
      "learning_rate": 6.414480089098655e-06,
      "loss": 0.7371,
      "step": 757150
    },
    {
      "epoch": 2.653657032107021,
      "grad_norm": 3.5,
      "learning_rate": 6.413831060434954e-06,
      "loss": 0.8871,
      "step": 757160
    },
    {
      "epoch": 2.6536920796139167,
      "grad_norm": 2.921875,
      "learning_rate": 6.413182031771252e-06,
      "loss": 0.7159,
      "step": 757170
    },
    {
      "epoch": 2.6537271271208125,
      "grad_norm": 2.984375,
      "learning_rate": 6.41253300310755e-06,
      "loss": 0.822,
      "step": 757180
    },
    {
      "epoch": 2.653762174627708,
      "grad_norm": 2.609375,
      "learning_rate": 6.411883974443849e-06,
      "loss": 0.8103,
      "step": 757190
    },
    {
      "epoch": 2.6537972221346036,
      "grad_norm": 2.9375,
      "learning_rate": 6.411234945780145e-06,
      "loss": 0.7397,
      "step": 757200
    },
    {
      "epoch": 2.653832269641499,
      "grad_norm": 2.84375,
      "learning_rate": 6.410585917116443e-06,
      "loss": 0.7515,
      "step": 757210
    },
    {
      "epoch": 2.6538673171483946,
      "grad_norm": 3.140625,
      "learning_rate": 6.409936888452742e-06,
      "loss": 0.8067,
      "step": 757220
    },
    {
      "epoch": 2.6539023646552904,
      "grad_norm": 2.578125,
      "learning_rate": 6.40928785978904e-06,
      "loss": 0.7189,
      "step": 757230
    },
    {
      "epoch": 2.6539374121621857,
      "grad_norm": 2.796875,
      "learning_rate": 6.408638831125338e-06,
      "loss": 0.7446,
      "step": 757240
    },
    {
      "epoch": 2.6539724596690815,
      "grad_norm": 2.921875,
      "learning_rate": 6.407989802461637e-06,
      "loss": 0.7713,
      "step": 757250
    },
    {
      "epoch": 2.6540075071759768,
      "grad_norm": 2.890625,
      "learning_rate": 6.407340773797935e-06,
      "loss": 0.8098,
      "step": 757260
    },
    {
      "epoch": 2.6540425546828725,
      "grad_norm": 3.046875,
      "learning_rate": 6.406691745134233e-06,
      "loss": 0.7754,
      "step": 757270
    },
    {
      "epoch": 2.6540776021897683,
      "grad_norm": 3.546875,
      "learning_rate": 6.406042716470531e-06,
      "loss": 0.8298,
      "step": 757280
    },
    {
      "epoch": 2.654112649696664,
      "grad_norm": 2.765625,
      "learning_rate": 6.405393687806828e-06,
      "loss": 0.8281,
      "step": 757290
    },
    {
      "epoch": 2.6541476972035594,
      "grad_norm": 3.21875,
      "learning_rate": 6.404744659143126e-06,
      "loss": 0.8267,
      "step": 757300
    },
    {
      "epoch": 2.654182744710455,
      "grad_norm": 2.6875,
      "learning_rate": 6.404095630479425e-06,
      "loss": 0.8462,
      "step": 757310
    },
    {
      "epoch": 2.6542177922173504,
      "grad_norm": 3.078125,
      "learning_rate": 6.403446601815723e-06,
      "loss": 0.8743,
      "step": 757320
    },
    {
      "epoch": 2.654252839724246,
      "grad_norm": 2.890625,
      "learning_rate": 6.402797573152021e-06,
      "loss": 0.7736,
      "step": 757330
    },
    {
      "epoch": 2.654287887231142,
      "grad_norm": 2.75,
      "learning_rate": 6.4021485444883196e-06,
      "loss": 0.8103,
      "step": 757340
    },
    {
      "epoch": 2.6543229347380373,
      "grad_norm": 3.171875,
      "learning_rate": 6.4014995158246176e-06,
      "loss": 0.8218,
      "step": 757350
    },
    {
      "epoch": 2.654357982244933,
      "grad_norm": 2.890625,
      "learning_rate": 6.4008504871609156e-06,
      "loss": 0.8009,
      "step": 757360
    },
    {
      "epoch": 2.6543930297518283,
      "grad_norm": 3.0,
      "learning_rate": 6.4002014584972136e-06,
      "loss": 0.7766,
      "step": 757370
    },
    {
      "epoch": 2.654428077258724,
      "grad_norm": 2.984375,
      "learning_rate": 6.399552429833512e-06,
      "loss": 0.7982,
      "step": 757380
    },
    {
      "epoch": 2.65446312476562,
      "grad_norm": 2.78125,
      "learning_rate": 6.398903401169809e-06,
      "loss": 0.7899,
      "step": 757390
    },
    {
      "epoch": 2.6544981722725156,
      "grad_norm": 2.953125,
      "learning_rate": 6.3982543725061076e-06,
      "loss": 0.9962,
      "step": 757400
    },
    {
      "epoch": 2.654533219779411,
      "grad_norm": 2.796875,
      "learning_rate": 6.3976053438424056e-06,
      "loss": 0.6956,
      "step": 757410
    },
    {
      "epoch": 2.6545682672863067,
      "grad_norm": 2.828125,
      "learning_rate": 6.3969563151787036e-06,
      "loss": 0.7602,
      "step": 757420
    },
    {
      "epoch": 2.654603314793202,
      "grad_norm": 3.234375,
      "learning_rate": 6.396307286515002e-06,
      "loss": 0.8761,
      "step": 757430
    },
    {
      "epoch": 2.6546383623000978,
      "grad_norm": 3.0,
      "learning_rate": 6.3956582578513e-06,
      "loss": 0.8077,
      "step": 757440
    },
    {
      "epoch": 2.6546734098069935,
      "grad_norm": 2.828125,
      "learning_rate": 6.395009229187598e-06,
      "loss": 0.7957,
      "step": 757450
    },
    {
      "epoch": 2.654708457313889,
      "grad_norm": 2.265625,
      "learning_rate": 6.394360200523896e-06,
      "loss": 0.8459,
      "step": 757460
    },
    {
      "epoch": 2.6547435048207846,
      "grad_norm": 2.875,
      "learning_rate": 6.393711171860195e-06,
      "loss": 0.7933,
      "step": 757470
    },
    {
      "epoch": 2.65477855232768,
      "grad_norm": 3.21875,
      "learning_rate": 6.3930621431964916e-06,
      "loss": 0.8572,
      "step": 757480
    },
    {
      "epoch": 2.6548135998345757,
      "grad_norm": 3.140625,
      "learning_rate": 6.39241311453279e-06,
      "loss": 0.7609,
      "step": 757490
    },
    {
      "epoch": 2.6548486473414714,
      "grad_norm": 2.75,
      "learning_rate": 6.391764085869088e-06,
      "loss": 0.7822,
      "step": 757500
    },
    {
      "epoch": 2.654883694848367,
      "grad_norm": 2.65625,
      "learning_rate": 6.391115057205386e-06,
      "loss": 0.7932,
      "step": 757510
    },
    {
      "epoch": 2.6549187423552625,
      "grad_norm": 2.84375,
      "learning_rate": 6.390466028541684e-06,
      "loss": 0.8046,
      "step": 757520
    },
    {
      "epoch": 2.6549537898621582,
      "grad_norm": 2.78125,
      "learning_rate": 6.389816999877983e-06,
      "loss": 0.7383,
      "step": 757530
    },
    {
      "epoch": 2.6549888373690536,
      "grad_norm": 2.5625,
      "learning_rate": 6.389167971214281e-06,
      "loss": 0.6915,
      "step": 757540
    },
    {
      "epoch": 2.6550238848759493,
      "grad_norm": 2.96875,
      "learning_rate": 6.388518942550579e-06,
      "loss": 0.7346,
      "step": 757550
    },
    {
      "epoch": 2.655058932382845,
      "grad_norm": 2.765625,
      "learning_rate": 6.387869913886878e-06,
      "loss": 0.8298,
      "step": 757560
    },
    {
      "epoch": 2.6550939798897404,
      "grad_norm": 2.921875,
      "learning_rate": 6.387220885223174e-06,
      "loss": 0.794,
      "step": 757570
    },
    {
      "epoch": 2.655129027396636,
      "grad_norm": 2.984375,
      "learning_rate": 6.386571856559473e-06,
      "loss": 0.7743,
      "step": 757580
    },
    {
      "epoch": 2.655164074903532,
      "grad_norm": 2.890625,
      "learning_rate": 6.385922827895771e-06,
      "loss": 0.7859,
      "step": 757590
    },
    {
      "epoch": 2.655199122410427,
      "grad_norm": 2.734375,
      "learning_rate": 6.385273799232069e-06,
      "loss": 0.785,
      "step": 757600
    },
    {
      "epoch": 2.655234169917323,
      "grad_norm": 2.78125,
      "learning_rate": 6.384624770568367e-06,
      "loss": 0.7264,
      "step": 757610
    },
    {
      "epoch": 2.6552692174242187,
      "grad_norm": 3.296875,
      "learning_rate": 6.383975741904666e-06,
      "loss": 0.8397,
      "step": 757620
    },
    {
      "epoch": 2.655304264931114,
      "grad_norm": 2.640625,
      "learning_rate": 6.383326713240964e-06,
      "loss": 0.7909,
      "step": 757630
    },
    {
      "epoch": 2.65533931243801,
      "grad_norm": 3.21875,
      "learning_rate": 6.382677684577262e-06,
      "loss": 0.8811,
      "step": 757640
    },
    {
      "epoch": 2.655374359944905,
      "grad_norm": 2.453125,
      "learning_rate": 6.382028655913561e-06,
      "loss": 0.7396,
      "step": 757650
    },
    {
      "epoch": 2.655409407451801,
      "grad_norm": 3.03125,
      "learning_rate": 6.381379627249859e-06,
      "loss": 0.7916,
      "step": 757660
    },
    {
      "epoch": 2.6554444549586966,
      "grad_norm": 2.90625,
      "learning_rate": 6.380730598586156e-06,
      "loss": 0.7843,
      "step": 757670
    },
    {
      "epoch": 2.655479502465592,
      "grad_norm": 3.140625,
      "learning_rate": 6.380081569922454e-06,
      "loss": 0.8053,
      "step": 757680
    },
    {
      "epoch": 2.6555145499724877,
      "grad_norm": 2.46875,
      "learning_rate": 6.379432541258752e-06,
      "loss": 0.7558,
      "step": 757690
    },
    {
      "epoch": 2.6555495974793835,
      "grad_norm": 3.015625,
      "learning_rate": 6.37878351259505e-06,
      "loss": 0.8419,
      "step": 757700
    },
    {
      "epoch": 2.6555846449862788,
      "grad_norm": 2.90625,
      "learning_rate": 6.378134483931349e-06,
      "loss": 0.8247,
      "step": 757710
    },
    {
      "epoch": 2.6556196924931745,
      "grad_norm": 3.765625,
      "learning_rate": 6.377485455267647e-06,
      "loss": 0.8189,
      "step": 757720
    },
    {
      "epoch": 2.6556547400000703,
      "grad_norm": 3.390625,
      "learning_rate": 6.376836426603945e-06,
      "loss": 0.807,
      "step": 757730
    },
    {
      "epoch": 2.6556897875069656,
      "grad_norm": 2.71875,
      "learning_rate": 6.376187397940244e-06,
      "loss": 0.8298,
      "step": 757740
    },
    {
      "epoch": 2.6557248350138614,
      "grad_norm": 2.640625,
      "learning_rate": 6.375538369276542e-06,
      "loss": 0.7501,
      "step": 757750
    },
    {
      "epoch": 2.6557598825207567,
      "grad_norm": 3.3125,
      "learning_rate": 6.374889340612838e-06,
      "loss": 0.793,
      "step": 757760
    },
    {
      "epoch": 2.6557949300276524,
      "grad_norm": 2.9375,
      "learning_rate": 6.374240311949137e-06,
      "loss": 0.8634,
      "step": 757770
    },
    {
      "epoch": 2.655829977534548,
      "grad_norm": 2.625,
      "learning_rate": 6.373591283285435e-06,
      "loss": 0.7938,
      "step": 757780
    },
    {
      "epoch": 2.655865025041444,
      "grad_norm": 3.375,
      "learning_rate": 6.372942254621733e-06,
      "loss": 0.8594,
      "step": 757790
    },
    {
      "epoch": 2.6559000725483393,
      "grad_norm": 3.09375,
      "learning_rate": 6.372293225958032e-06,
      "loss": 0.8129,
      "step": 757800
    },
    {
      "epoch": 2.655935120055235,
      "grad_norm": 3.03125,
      "learning_rate": 6.37164419729433e-06,
      "loss": 0.905,
      "step": 757810
    },
    {
      "epoch": 2.6559701675621303,
      "grad_norm": 2.75,
      "learning_rate": 6.370995168630628e-06,
      "loss": 0.759,
      "step": 757820
    },
    {
      "epoch": 2.656005215069026,
      "grad_norm": 3.046875,
      "learning_rate": 6.370346139966926e-06,
      "loss": 0.837,
      "step": 757830
    },
    {
      "epoch": 2.656040262575922,
      "grad_norm": 2.875,
      "learning_rate": 6.369697111303225e-06,
      "loss": 0.8023,
      "step": 757840
    },
    {
      "epoch": 2.656075310082817,
      "grad_norm": 3.125,
      "learning_rate": 6.369048082639523e-06,
      "loss": 0.7456,
      "step": 757850
    },
    {
      "epoch": 2.656110357589713,
      "grad_norm": 2.84375,
      "learning_rate": 6.36839905397582e-06,
      "loss": 0.7865,
      "step": 757860
    },
    {
      "epoch": 2.6561454050966082,
      "grad_norm": 2.84375,
      "learning_rate": 6.367750025312118e-06,
      "loss": 0.7282,
      "step": 757870
    },
    {
      "epoch": 2.656180452603504,
      "grad_norm": 3.046875,
      "learning_rate": 6.367100996648416e-06,
      "loss": 0.775,
      "step": 757880
    },
    {
      "epoch": 2.6562155001103998,
      "grad_norm": 3.25,
      "learning_rate": 6.366451967984715e-06,
      "loss": 0.8471,
      "step": 757890
    },
    {
      "epoch": 2.6562505476172955,
      "grad_norm": 3.140625,
      "learning_rate": 6.365802939321013e-06,
      "loss": 0.8498,
      "step": 757900
    },
    {
      "epoch": 2.656285595124191,
      "grad_norm": 2.5625,
      "learning_rate": 6.365153910657311e-06,
      "loss": 0.7577,
      "step": 757910
    },
    {
      "epoch": 2.6563206426310866,
      "grad_norm": 2.453125,
      "learning_rate": 6.364504881993609e-06,
      "loss": 0.7944,
      "step": 757920
    },
    {
      "epoch": 2.656355690137982,
      "grad_norm": 3.015625,
      "learning_rate": 6.3638558533299075e-06,
      "loss": 0.74,
      "step": 757930
    },
    {
      "epoch": 2.6563907376448777,
      "grad_norm": 2.640625,
      "learning_rate": 6.3632068246662055e-06,
      "loss": 0.8929,
      "step": 757940
    },
    {
      "epoch": 2.6564257851517734,
      "grad_norm": 2.625,
      "learning_rate": 6.362557796002503e-06,
      "loss": 0.8167,
      "step": 757950
    },
    {
      "epoch": 2.6564608326586687,
      "grad_norm": 3.109375,
      "learning_rate": 6.361908767338801e-06,
      "loss": 0.7866,
      "step": 757960
    },
    {
      "epoch": 2.6564958801655645,
      "grad_norm": 3.21875,
      "learning_rate": 6.361259738675099e-06,
      "loss": 0.8528,
      "step": 757970
    },
    {
      "epoch": 2.65653092767246,
      "grad_norm": 3.0625,
      "learning_rate": 6.3606107100113975e-06,
      "loss": 0.7101,
      "step": 757980
    },
    {
      "epoch": 2.6565659751793556,
      "grad_norm": 2.828125,
      "learning_rate": 6.3599616813476955e-06,
      "loss": 0.7819,
      "step": 757990
    },
    {
      "epoch": 2.6566010226862513,
      "grad_norm": 2.890625,
      "learning_rate": 6.3593126526839935e-06,
      "loss": 0.7917,
      "step": 758000
    },
    {
      "epoch": 2.656636070193147,
      "grad_norm": 2.984375,
      "learning_rate": 6.3586636240202915e-06,
      "loss": 0.8391,
      "step": 758010
    },
    {
      "epoch": 2.6566711177000424,
      "grad_norm": 2.734375,
      "learning_rate": 6.35801459535659e-06,
      "loss": 0.8019,
      "step": 758020
    },
    {
      "epoch": 2.656706165206938,
      "grad_norm": 2.984375,
      "learning_rate": 6.357365566692888e-06,
      "loss": 0.7719,
      "step": 758030
    },
    {
      "epoch": 2.6567412127138335,
      "grad_norm": 2.859375,
      "learning_rate": 6.3567165380291855e-06,
      "loss": 0.7617,
      "step": 758040
    },
    {
      "epoch": 2.656776260220729,
      "grad_norm": 2.78125,
      "learning_rate": 6.3560675093654835e-06,
      "loss": 0.7975,
      "step": 758050
    },
    {
      "epoch": 2.656811307727625,
      "grad_norm": 3.28125,
      "learning_rate": 6.3554184807017815e-06,
      "loss": 0.8284,
      "step": 758060
    },
    {
      "epoch": 2.6568463552345203,
      "grad_norm": 3.3125,
      "learning_rate": 6.3547694520380795e-06,
      "loss": 0.8697,
      "step": 758070
    },
    {
      "epoch": 2.656881402741416,
      "grad_norm": 2.609375,
      "learning_rate": 6.354120423374378e-06,
      "loss": 0.7915,
      "step": 758080
    },
    {
      "epoch": 2.6569164502483114,
      "grad_norm": 2.78125,
      "learning_rate": 6.353471394710676e-06,
      "loss": 0.8251,
      "step": 758090
    },
    {
      "epoch": 2.656951497755207,
      "grad_norm": 2.703125,
      "learning_rate": 6.352822366046974e-06,
      "loss": 0.8321,
      "step": 758100
    },
    {
      "epoch": 2.656986545262103,
      "grad_norm": 2.78125,
      "learning_rate": 6.352173337383273e-06,
      "loss": 0.8284,
      "step": 758110
    },
    {
      "epoch": 2.6570215927689986,
      "grad_norm": 2.8125,
      "learning_rate": 6.351524308719571e-06,
      "loss": 0.7163,
      "step": 758120
    },
    {
      "epoch": 2.657056640275894,
      "grad_norm": 3.375,
      "learning_rate": 6.350875280055869e-06,
      "loss": 0.9044,
      "step": 758130
    },
    {
      "epoch": 2.6570916877827897,
      "grad_norm": 3.25,
      "learning_rate": 6.350226251392166e-06,
      "loss": 0.8724,
      "step": 758140
    },
    {
      "epoch": 2.657126735289685,
      "grad_norm": 3.21875,
      "learning_rate": 6.349577222728464e-06,
      "loss": 0.7979,
      "step": 758150
    },
    {
      "epoch": 2.6571617827965808,
      "grad_norm": 2.953125,
      "learning_rate": 6.348928194064762e-06,
      "loss": 0.7378,
      "step": 758160
    },
    {
      "epoch": 2.6571968303034765,
      "grad_norm": 3.171875,
      "learning_rate": 6.348279165401061e-06,
      "loss": 0.8095,
      "step": 758170
    },
    {
      "epoch": 2.657231877810372,
      "grad_norm": 3.03125,
      "learning_rate": 6.347630136737359e-06,
      "loss": 0.7784,
      "step": 758180
    },
    {
      "epoch": 2.6572669253172676,
      "grad_norm": 3.21875,
      "learning_rate": 6.346981108073657e-06,
      "loss": 0.7869,
      "step": 758190
    },
    {
      "epoch": 2.657301972824163,
      "grad_norm": 3.203125,
      "learning_rate": 6.346332079409956e-06,
      "loss": 0.776,
      "step": 758200
    },
    {
      "epoch": 2.6573370203310587,
      "grad_norm": 2.84375,
      "learning_rate": 6.345683050746254e-06,
      "loss": 0.8458,
      "step": 758210
    },
    {
      "epoch": 2.6573720678379544,
      "grad_norm": 3.125,
      "learning_rate": 6.345034022082552e-06,
      "loss": 0.8436,
      "step": 758220
    },
    {
      "epoch": 2.65740711534485,
      "grad_norm": 3.390625,
      "learning_rate": 6.344384993418849e-06,
      "loss": 0.7713,
      "step": 758230
    },
    {
      "epoch": 2.6574421628517455,
      "grad_norm": 2.5625,
      "learning_rate": 6.343735964755147e-06,
      "loss": 0.7839,
      "step": 758240
    },
    {
      "epoch": 2.6574772103586413,
      "grad_norm": 2.640625,
      "learning_rate": 6.343086936091445e-06,
      "loss": 0.7562,
      "step": 758250
    },
    {
      "epoch": 2.6575122578655366,
      "grad_norm": 2.890625,
      "learning_rate": 6.342437907427744e-06,
      "loss": 0.7086,
      "step": 758260
    },
    {
      "epoch": 2.6575473053724323,
      "grad_norm": 3.328125,
      "learning_rate": 6.341788878764042e-06,
      "loss": 0.7888,
      "step": 758270
    },
    {
      "epoch": 2.657582352879328,
      "grad_norm": 3.421875,
      "learning_rate": 6.34113985010034e-06,
      "loss": 0.7909,
      "step": 758280
    },
    {
      "epoch": 2.6576174003862234,
      "grad_norm": 3.03125,
      "learning_rate": 6.340490821436639e-06,
      "loss": 0.8497,
      "step": 758290
    },
    {
      "epoch": 2.657652447893119,
      "grad_norm": 3.109375,
      "learning_rate": 6.339841792772937e-06,
      "loss": 0.8507,
      "step": 758300
    },
    {
      "epoch": 2.6576874954000145,
      "grad_norm": 3.234375,
      "learning_rate": 6.339192764109235e-06,
      "loss": 0.8319,
      "step": 758310
    },
    {
      "epoch": 2.6577225429069102,
      "grad_norm": 3.09375,
      "learning_rate": 6.338543735445533e-06,
      "loss": 0.8099,
      "step": 758320
    },
    {
      "epoch": 2.657757590413806,
      "grad_norm": 3.03125,
      "learning_rate": 6.33789470678183e-06,
      "loss": 0.8648,
      "step": 758330
    },
    {
      "epoch": 2.6577926379207018,
      "grad_norm": 3.453125,
      "learning_rate": 6.337245678118128e-06,
      "loss": 0.7733,
      "step": 758340
    },
    {
      "epoch": 2.657827685427597,
      "grad_norm": 2.75,
      "learning_rate": 6.336596649454427e-06,
      "loss": 0.725,
      "step": 758350
    },
    {
      "epoch": 2.657862732934493,
      "grad_norm": 2.65625,
      "learning_rate": 6.335947620790725e-06,
      "loss": 0.7035,
      "step": 758360
    },
    {
      "epoch": 2.657897780441388,
      "grad_norm": 2.671875,
      "learning_rate": 6.335298592127023e-06,
      "loss": 0.8347,
      "step": 758370
    },
    {
      "epoch": 2.657932827948284,
      "grad_norm": 2.9375,
      "learning_rate": 6.334649563463321e-06,
      "loss": 0.8245,
      "step": 758380
    },
    {
      "epoch": 2.6579678754551797,
      "grad_norm": 2.75,
      "learning_rate": 6.33400053479962e-06,
      "loss": 0.8424,
      "step": 758390
    },
    {
      "epoch": 2.658002922962075,
      "grad_norm": 3.21875,
      "learning_rate": 6.333351506135918e-06,
      "loss": 0.8072,
      "step": 758400
    },
    {
      "epoch": 2.6580379704689707,
      "grad_norm": 2.609375,
      "learning_rate": 6.332702477472216e-06,
      "loss": 0.7259,
      "step": 758410
    },
    {
      "epoch": 2.658073017975866,
      "grad_norm": 2.953125,
      "learning_rate": 6.332053448808513e-06,
      "loss": 0.8317,
      "step": 758420
    },
    {
      "epoch": 2.658108065482762,
      "grad_norm": 2.953125,
      "learning_rate": 6.331404420144811e-06,
      "loss": 0.9531,
      "step": 758430
    },
    {
      "epoch": 2.6581431129896576,
      "grad_norm": 3.140625,
      "learning_rate": 6.33075539148111e-06,
      "loss": 0.7791,
      "step": 758440
    },
    {
      "epoch": 2.6581781604965533,
      "grad_norm": 2.84375,
      "learning_rate": 6.330106362817408e-06,
      "loss": 0.7866,
      "step": 758450
    },
    {
      "epoch": 2.6582132080034486,
      "grad_norm": 3.015625,
      "learning_rate": 6.329457334153706e-06,
      "loss": 0.8275,
      "step": 758460
    },
    {
      "epoch": 2.6582482555103444,
      "grad_norm": 3.15625,
      "learning_rate": 6.328808305490004e-06,
      "loss": 0.7835,
      "step": 758470
    },
    {
      "epoch": 2.6582833030172397,
      "grad_norm": 2.859375,
      "learning_rate": 6.3281592768263026e-06,
      "loss": 0.8038,
      "step": 758480
    },
    {
      "epoch": 2.6583183505241355,
      "grad_norm": 2.625,
      "learning_rate": 6.3275102481626006e-06,
      "loss": 0.8097,
      "step": 758490
    },
    {
      "epoch": 2.658353398031031,
      "grad_norm": 2.671875,
      "learning_rate": 6.3268612194988986e-06,
      "loss": 0.7555,
      "step": 758500
    },
    {
      "epoch": 2.6583884455379265,
      "grad_norm": 3.28125,
      "learning_rate": 6.326212190835196e-06,
      "loss": 0.9355,
      "step": 758510
    },
    {
      "epoch": 2.6584234930448223,
      "grad_norm": 3.15625,
      "learning_rate": 6.325563162171494e-06,
      "loss": 0.8841,
      "step": 758520
    },
    {
      "epoch": 2.6584585405517176,
      "grad_norm": 2.953125,
      "learning_rate": 6.3249141335077926e-06,
      "loss": 0.849,
      "step": 758530
    },
    {
      "epoch": 2.6584935880586134,
      "grad_norm": 2.546875,
      "learning_rate": 6.3242651048440906e-06,
      "loss": 0.7571,
      "step": 758540
    },
    {
      "epoch": 2.658528635565509,
      "grad_norm": 3.0,
      "learning_rate": 6.3236160761803886e-06,
      "loss": 0.849,
      "step": 758550
    },
    {
      "epoch": 2.658563683072405,
      "grad_norm": 3.296875,
      "learning_rate": 6.3229670475166866e-06,
      "loss": 0.7837,
      "step": 758560
    },
    {
      "epoch": 2.6585987305793,
      "grad_norm": 3.015625,
      "learning_rate": 6.322318018852985e-06,
      "loss": 0.805,
      "step": 758570
    },
    {
      "epoch": 2.658633778086196,
      "grad_norm": 3.15625,
      "learning_rate": 6.321668990189283e-06,
      "loss": 0.7473,
      "step": 758580
    },
    {
      "epoch": 2.6586688255930913,
      "grad_norm": 3.40625,
      "learning_rate": 6.321019961525581e-06,
      "loss": 0.8038,
      "step": 758590
    },
    {
      "epoch": 2.658703873099987,
      "grad_norm": 2.90625,
      "learning_rate": 6.32037093286188e-06,
      "loss": 0.8354,
      "step": 758600
    },
    {
      "epoch": 2.6587389206068828,
      "grad_norm": 3.296875,
      "learning_rate": 6.3197219041981766e-06,
      "loss": 0.7499,
      "step": 758610
    },
    {
      "epoch": 2.658773968113778,
      "grad_norm": 3.09375,
      "learning_rate": 6.3190728755344746e-06,
      "loss": 0.8517,
      "step": 758620
    },
    {
      "epoch": 2.658809015620674,
      "grad_norm": 3.09375,
      "learning_rate": 6.318423846870773e-06,
      "loss": 0.7835,
      "step": 758630
    },
    {
      "epoch": 2.658844063127569,
      "grad_norm": 2.6875,
      "learning_rate": 6.317774818207071e-06,
      "loss": 0.7373,
      "step": 758640
    },
    {
      "epoch": 2.658879110634465,
      "grad_norm": 2.328125,
      "learning_rate": 6.317125789543369e-06,
      "loss": 0.7904,
      "step": 758650
    },
    {
      "epoch": 2.6589141581413607,
      "grad_norm": 3.125,
      "learning_rate": 6.316476760879668e-06,
      "loss": 0.8422,
      "step": 758660
    },
    {
      "epoch": 2.6589492056482564,
      "grad_norm": 3.234375,
      "learning_rate": 6.315827732215966e-06,
      "loss": 0.8023,
      "step": 758670
    },
    {
      "epoch": 2.6589842531551517,
      "grad_norm": 2.65625,
      "learning_rate": 6.315178703552264e-06,
      "loss": 0.7754,
      "step": 758680
    },
    {
      "epoch": 2.6590193006620475,
      "grad_norm": 2.703125,
      "learning_rate": 6.314529674888562e-06,
      "loss": 0.8198,
      "step": 758690
    },
    {
      "epoch": 2.659054348168943,
      "grad_norm": 3.015625,
      "learning_rate": 6.313880646224859e-06,
      "loss": 0.8288,
      "step": 758700
    },
    {
      "epoch": 2.6590893956758386,
      "grad_norm": 3.203125,
      "learning_rate": 6.313231617561157e-06,
      "loss": 0.8913,
      "step": 758710
    },
    {
      "epoch": 2.6591244431827343,
      "grad_norm": 2.59375,
      "learning_rate": 6.312582588897456e-06,
      "loss": 0.7466,
      "step": 758720
    },
    {
      "epoch": 2.6591594906896296,
      "grad_norm": 2.53125,
      "learning_rate": 6.311933560233754e-06,
      "loss": 0.7911,
      "step": 758730
    },
    {
      "epoch": 2.6591945381965254,
      "grad_norm": 2.71875,
      "learning_rate": 6.311284531570052e-06,
      "loss": 0.7574,
      "step": 758740
    },
    {
      "epoch": 2.6592295857034207,
      "grad_norm": 2.515625,
      "learning_rate": 6.310635502906351e-06,
      "loss": 0.7214,
      "step": 758750
    },
    {
      "epoch": 2.6592646332103165,
      "grad_norm": 2.453125,
      "learning_rate": 6.309986474242649e-06,
      "loss": 0.7523,
      "step": 758760
    },
    {
      "epoch": 2.6592996807172122,
      "grad_norm": 3.1875,
      "learning_rate": 6.309337445578947e-06,
      "loss": 0.7844,
      "step": 758770
    },
    {
      "epoch": 2.659334728224108,
      "grad_norm": 3.0,
      "learning_rate": 6.308688416915245e-06,
      "loss": 0.8361,
      "step": 758780
    },
    {
      "epoch": 2.6593697757310033,
      "grad_norm": 2.515625,
      "learning_rate": 6.308039388251544e-06,
      "loss": 0.7716,
      "step": 758790
    },
    {
      "epoch": 2.659404823237899,
      "grad_norm": 2.78125,
      "learning_rate": 6.30739035958784e-06,
      "loss": 0.8725,
      "step": 758800
    },
    {
      "epoch": 2.6594398707447944,
      "grad_norm": 2.84375,
      "learning_rate": 6.306741330924139e-06,
      "loss": 0.8156,
      "step": 758810
    },
    {
      "epoch": 2.65947491825169,
      "grad_norm": 2.65625,
      "learning_rate": 6.306092302260437e-06,
      "loss": 0.8219,
      "step": 758820
    },
    {
      "epoch": 2.659509965758586,
      "grad_norm": 2.640625,
      "learning_rate": 6.305443273596735e-06,
      "loss": 0.8037,
      "step": 758830
    },
    {
      "epoch": 2.659545013265481,
      "grad_norm": 2.90625,
      "learning_rate": 6.304794244933034e-06,
      "loss": 0.7536,
      "step": 758840
    },
    {
      "epoch": 2.659580060772377,
      "grad_norm": 2.796875,
      "learning_rate": 6.304145216269332e-06,
      "loss": 0.8031,
      "step": 758850
    },
    {
      "epoch": 2.6596151082792723,
      "grad_norm": 3.109375,
      "learning_rate": 6.30349618760563e-06,
      "loss": 0.7421,
      "step": 758860
    },
    {
      "epoch": 2.659650155786168,
      "grad_norm": 3.203125,
      "learning_rate": 6.302847158941928e-06,
      "loss": 0.7755,
      "step": 758870
    },
    {
      "epoch": 2.659685203293064,
      "grad_norm": 3.015625,
      "learning_rate": 6.302198130278227e-06,
      "loss": 0.8109,
      "step": 758880
    },
    {
      "epoch": 2.6597202507999596,
      "grad_norm": 2.84375,
      "learning_rate": 6.301549101614523e-06,
      "loss": 0.8437,
      "step": 758890
    },
    {
      "epoch": 2.659755298306855,
      "grad_norm": 2.734375,
      "learning_rate": 6.300900072950822e-06,
      "loss": 0.8015,
      "step": 758900
    },
    {
      "epoch": 2.6597903458137506,
      "grad_norm": 3.0,
      "learning_rate": 6.30025104428712e-06,
      "loss": 0.7908,
      "step": 758910
    },
    {
      "epoch": 2.659825393320646,
      "grad_norm": 2.765625,
      "learning_rate": 6.299602015623418e-06,
      "loss": 0.7788,
      "step": 758920
    },
    {
      "epoch": 2.6598604408275417,
      "grad_norm": 2.84375,
      "learning_rate": 6.298952986959716e-06,
      "loss": 0.7989,
      "step": 758930
    },
    {
      "epoch": 2.6598954883344375,
      "grad_norm": 3.078125,
      "learning_rate": 6.298303958296015e-06,
      "loss": 0.8667,
      "step": 758940
    },
    {
      "epoch": 2.6599305358413328,
      "grad_norm": 2.5625,
      "learning_rate": 6.297654929632313e-06,
      "loss": 0.7763,
      "step": 758950
    },
    {
      "epoch": 2.6599655833482285,
      "grad_norm": 2.921875,
      "learning_rate": 6.297005900968611e-06,
      "loss": 0.8079,
      "step": 758960
    },
    {
      "epoch": 2.6600006308551243,
      "grad_norm": 3.046875,
      "learning_rate": 6.29635687230491e-06,
      "loss": 0.7744,
      "step": 758970
    },
    {
      "epoch": 2.6600356783620196,
      "grad_norm": 2.796875,
      "learning_rate": 6.295707843641206e-06,
      "loss": 0.8234,
      "step": 758980
    },
    {
      "epoch": 2.6600707258689154,
      "grad_norm": 2.953125,
      "learning_rate": 6.295058814977505e-06,
      "loss": 0.8126,
      "step": 758990
    },
    {
      "epoch": 2.660105773375811,
      "grad_norm": 2.96875,
      "learning_rate": 6.294409786313803e-06,
      "loss": 0.7873,
      "step": 759000
    },
    {
      "epoch": 2.6601408208827064,
      "grad_norm": 2.625,
      "learning_rate": 6.293760757650101e-06,
      "loss": 0.7889,
      "step": 759010
    },
    {
      "epoch": 2.660175868389602,
      "grad_norm": 2.984375,
      "learning_rate": 6.293111728986399e-06,
      "loss": 0.789,
      "step": 759020
    },
    {
      "epoch": 2.6602109158964975,
      "grad_norm": 2.96875,
      "learning_rate": 6.292462700322698e-06,
      "loss": 0.8927,
      "step": 759030
    },
    {
      "epoch": 2.6602459634033933,
      "grad_norm": 3.0,
      "learning_rate": 6.291813671658996e-06,
      "loss": 0.8899,
      "step": 759040
    },
    {
      "epoch": 2.660281010910289,
      "grad_norm": 2.875,
      "learning_rate": 6.291164642995294e-06,
      "loss": 0.6953,
      "step": 759050
    },
    {
      "epoch": 2.6603160584171848,
      "grad_norm": 3.15625,
      "learning_rate": 6.2905156143315925e-06,
      "loss": 0.8769,
      "step": 759060
    },
    {
      "epoch": 2.66035110592408,
      "grad_norm": 3.140625,
      "learning_rate": 6.2898665856678905e-06,
      "loss": 0.8172,
      "step": 759070
    },
    {
      "epoch": 2.660386153430976,
      "grad_norm": 2.828125,
      "learning_rate": 6.289217557004188e-06,
      "loss": 0.7777,
      "step": 759080
    },
    {
      "epoch": 2.660421200937871,
      "grad_norm": 3.4375,
      "learning_rate": 6.288568528340486e-06,
      "loss": 0.7683,
      "step": 759090
    },
    {
      "epoch": 2.660456248444767,
      "grad_norm": 3.296875,
      "learning_rate": 6.287919499676784e-06,
      "loss": 0.8421,
      "step": 759100
    },
    {
      "epoch": 2.6604912959516627,
      "grad_norm": 6.375,
      "learning_rate": 6.287270471013082e-06,
      "loss": 0.8209,
      "step": 759110
    },
    {
      "epoch": 2.660526343458558,
      "grad_norm": 2.90625,
      "learning_rate": 6.2866214423493805e-06,
      "loss": 0.8427,
      "step": 759120
    },
    {
      "epoch": 2.6605613909654537,
      "grad_norm": 3.0625,
      "learning_rate": 6.2859724136856785e-06,
      "loss": 0.7656,
      "step": 759130
    },
    {
      "epoch": 2.660596438472349,
      "grad_norm": 2.875,
      "learning_rate": 6.2853233850219765e-06,
      "loss": 0.8348,
      "step": 759140
    },
    {
      "epoch": 2.660631485979245,
      "grad_norm": 2.53125,
      "learning_rate": 6.284674356358275e-06,
      "loss": 0.8046,
      "step": 759150
    },
    {
      "epoch": 2.6606665334861406,
      "grad_norm": 3.3125,
      "learning_rate": 6.284025327694573e-06,
      "loss": 0.8508,
      "step": 759160
    },
    {
      "epoch": 2.6607015809930363,
      "grad_norm": 2.703125,
      "learning_rate": 6.28337629903087e-06,
      "loss": 0.8155,
      "step": 759170
    },
    {
      "epoch": 2.6607366284999316,
      "grad_norm": 2.609375,
      "learning_rate": 6.2827272703671685e-06,
      "loss": 0.7364,
      "step": 759180
    },
    {
      "epoch": 2.6607716760068274,
      "grad_norm": 2.984375,
      "learning_rate": 6.2820782417034665e-06,
      "loss": 0.7645,
      "step": 759190
    },
    {
      "epoch": 2.6608067235137227,
      "grad_norm": 2.890625,
      "learning_rate": 6.2814292130397645e-06,
      "loss": 0.7969,
      "step": 759200
    },
    {
      "epoch": 2.6608417710206185,
      "grad_norm": 2.84375,
      "learning_rate": 6.280780184376063e-06,
      "loss": 0.7206,
      "step": 759210
    },
    {
      "epoch": 2.6608768185275142,
      "grad_norm": 3.203125,
      "learning_rate": 6.280131155712361e-06,
      "loss": 0.7806,
      "step": 759220
    },
    {
      "epoch": 2.6609118660344095,
      "grad_norm": 3.0625,
      "learning_rate": 6.279482127048659e-06,
      "loss": 0.8409,
      "step": 759230
    },
    {
      "epoch": 2.6609469135413053,
      "grad_norm": 3.0,
      "learning_rate": 6.278833098384957e-06,
      "loss": 0.8694,
      "step": 759240
    },
    {
      "epoch": 2.6609819610482006,
      "grad_norm": 3.09375,
      "learning_rate": 6.278184069721256e-06,
      "loss": 0.8808,
      "step": 759250
    },
    {
      "epoch": 2.6610170085550964,
      "grad_norm": 2.6875,
      "learning_rate": 6.277535041057554e-06,
      "loss": 0.8401,
      "step": 759260
    },
    {
      "epoch": 2.661052056061992,
      "grad_norm": 3.203125,
      "learning_rate": 6.276886012393851e-06,
      "loss": 0.7501,
      "step": 759270
    },
    {
      "epoch": 2.661087103568888,
      "grad_norm": 3.203125,
      "learning_rate": 6.276236983730149e-06,
      "loss": 0.8248,
      "step": 759280
    },
    {
      "epoch": 2.661122151075783,
      "grad_norm": 2.8125,
      "learning_rate": 6.275587955066447e-06,
      "loss": 0.7979,
      "step": 759290
    },
    {
      "epoch": 2.661157198582679,
      "grad_norm": 2.890625,
      "learning_rate": 6.274938926402746e-06,
      "loss": 0.7679,
      "step": 759300
    },
    {
      "epoch": 2.6611922460895743,
      "grad_norm": 2.734375,
      "learning_rate": 6.274289897739044e-06,
      "loss": 0.7813,
      "step": 759310
    },
    {
      "epoch": 2.66122729359647,
      "grad_norm": 2.796875,
      "learning_rate": 6.273640869075342e-06,
      "loss": 0.7713,
      "step": 759320
    },
    {
      "epoch": 2.661262341103366,
      "grad_norm": 3.21875,
      "learning_rate": 6.27299184041164e-06,
      "loss": 0.8196,
      "step": 759330
    },
    {
      "epoch": 2.661297388610261,
      "grad_norm": 2.40625,
      "learning_rate": 6.272342811747939e-06,
      "loss": 0.8116,
      "step": 759340
    },
    {
      "epoch": 2.661332436117157,
      "grad_norm": 3.09375,
      "learning_rate": 6.271693783084237e-06,
      "loss": 0.7921,
      "step": 759350
    },
    {
      "epoch": 2.661367483624052,
      "grad_norm": 2.609375,
      "learning_rate": 6.271044754420534e-06,
      "loss": 0.8166,
      "step": 759360
    },
    {
      "epoch": 2.661402531130948,
      "grad_norm": 2.96875,
      "learning_rate": 6.270395725756832e-06,
      "loss": 0.8563,
      "step": 759370
    },
    {
      "epoch": 2.6614375786378437,
      "grad_norm": 3.140625,
      "learning_rate": 6.26974669709313e-06,
      "loss": 0.7825,
      "step": 759380
    },
    {
      "epoch": 2.6614726261447395,
      "grad_norm": 3.25,
      "learning_rate": 6.269097668429429e-06,
      "loss": 0.8628,
      "step": 759390
    },
    {
      "epoch": 2.6615076736516348,
      "grad_norm": 3.171875,
      "learning_rate": 6.268448639765727e-06,
      "loss": 0.692,
      "step": 759400
    },
    {
      "epoch": 2.6615427211585305,
      "grad_norm": 3.453125,
      "learning_rate": 6.267799611102025e-06,
      "loss": 0.7726,
      "step": 759410
    },
    {
      "epoch": 2.661577768665426,
      "grad_norm": 3.140625,
      "learning_rate": 6.267150582438323e-06,
      "loss": 0.735,
      "step": 759420
    },
    {
      "epoch": 2.6616128161723216,
      "grad_norm": 3.296875,
      "learning_rate": 6.266501553774622e-06,
      "loss": 0.7586,
      "step": 759430
    },
    {
      "epoch": 2.6616478636792174,
      "grad_norm": 3.1875,
      "learning_rate": 6.26585252511092e-06,
      "loss": 0.7875,
      "step": 759440
    },
    {
      "epoch": 2.6616829111861127,
      "grad_norm": 3.0,
      "learning_rate": 6.265203496447217e-06,
      "loss": 0.7989,
      "step": 759450
    },
    {
      "epoch": 2.6617179586930084,
      "grad_norm": 3.15625,
      "learning_rate": 6.264554467783515e-06,
      "loss": 0.8603,
      "step": 759460
    },
    {
      "epoch": 2.6617530061999037,
      "grad_norm": 3.078125,
      "learning_rate": 6.263905439119813e-06,
      "loss": 0.7907,
      "step": 759470
    },
    {
      "epoch": 2.6617880537067995,
      "grad_norm": 2.984375,
      "learning_rate": 6.263256410456111e-06,
      "loss": 0.8612,
      "step": 759480
    },
    {
      "epoch": 2.6618231012136953,
      "grad_norm": 3.328125,
      "learning_rate": 6.26260738179241e-06,
      "loss": 0.806,
      "step": 759490
    },
    {
      "epoch": 2.661858148720591,
      "grad_norm": 2.9375,
      "learning_rate": 6.261958353128708e-06,
      "loss": 0.8321,
      "step": 759500
    },
    {
      "epoch": 2.6618931962274863,
      "grad_norm": 2.421875,
      "learning_rate": 6.261309324465006e-06,
      "loss": 0.7283,
      "step": 759510
    },
    {
      "epoch": 2.661928243734382,
      "grad_norm": 2.875,
      "learning_rate": 6.260660295801305e-06,
      "loss": 0.7996,
      "step": 759520
    },
    {
      "epoch": 2.6619632912412774,
      "grad_norm": 3.015625,
      "learning_rate": 6.260011267137603e-06,
      "loss": 0.7684,
      "step": 759530
    },
    {
      "epoch": 2.661998338748173,
      "grad_norm": 2.484375,
      "learning_rate": 6.259362238473901e-06,
      "loss": 0.7634,
      "step": 759540
    },
    {
      "epoch": 2.662033386255069,
      "grad_norm": 2.625,
      "learning_rate": 6.258713209810198e-06,
      "loss": 0.798,
      "step": 759550
    },
    {
      "epoch": 2.6620684337619642,
      "grad_norm": 2.640625,
      "learning_rate": 6.258064181146496e-06,
      "loss": 0.804,
      "step": 759560
    },
    {
      "epoch": 2.66210348126886,
      "grad_norm": 2.90625,
      "learning_rate": 6.257415152482794e-06,
      "loss": 0.8328,
      "step": 759570
    },
    {
      "epoch": 2.6621385287757553,
      "grad_norm": 3.234375,
      "learning_rate": 6.256766123819093e-06,
      "loss": 0.8098,
      "step": 759580
    },
    {
      "epoch": 2.662173576282651,
      "grad_norm": 2.703125,
      "learning_rate": 6.256117095155391e-06,
      "loss": 0.8199,
      "step": 759590
    },
    {
      "epoch": 2.662208623789547,
      "grad_norm": 2.640625,
      "learning_rate": 6.255468066491689e-06,
      "loss": 0.8417,
      "step": 759600
    },
    {
      "epoch": 2.6622436712964426,
      "grad_norm": 3.234375,
      "learning_rate": 6.2548190378279876e-06,
      "loss": 0.8537,
      "step": 759610
    },
    {
      "epoch": 2.662278718803338,
      "grad_norm": 2.515625,
      "learning_rate": 6.2541700091642856e-06,
      "loss": 0.7466,
      "step": 759620
    },
    {
      "epoch": 2.6623137663102336,
      "grad_norm": 2.859375,
      "learning_rate": 6.2535209805005836e-06,
      "loss": 0.7802,
      "step": 759630
    },
    {
      "epoch": 2.662348813817129,
      "grad_norm": 2.890625,
      "learning_rate": 6.252871951836881e-06,
      "loss": 0.8609,
      "step": 759640
    },
    {
      "epoch": 2.6623838613240247,
      "grad_norm": 3.015625,
      "learning_rate": 6.252222923173179e-06,
      "loss": 0.8899,
      "step": 759650
    },
    {
      "epoch": 2.6624189088309205,
      "grad_norm": 3.234375,
      "learning_rate": 6.251573894509477e-06,
      "loss": 0.8336,
      "step": 759660
    },
    {
      "epoch": 2.662453956337816,
      "grad_norm": 2.25,
      "learning_rate": 6.2509248658457756e-06,
      "loss": 0.7284,
      "step": 759670
    },
    {
      "epoch": 2.6624890038447115,
      "grad_norm": 2.515625,
      "learning_rate": 6.2502758371820736e-06,
      "loss": 0.679,
      "step": 759680
    },
    {
      "epoch": 2.662524051351607,
      "grad_norm": 2.96875,
      "learning_rate": 6.2496268085183716e-06,
      "loss": 0.8049,
      "step": 759690
    },
    {
      "epoch": 2.6625590988585026,
      "grad_norm": 3.171875,
      "learning_rate": 6.24897777985467e-06,
      "loss": 0.8208,
      "step": 759700
    },
    {
      "epoch": 2.6625941463653984,
      "grad_norm": 2.828125,
      "learning_rate": 6.2483287511909676e-06,
      "loss": 0.8051,
      "step": 759710
    },
    {
      "epoch": 2.662629193872294,
      "grad_norm": 3.09375,
      "learning_rate": 6.2476797225272656e-06,
      "loss": 0.7922,
      "step": 759720
    },
    {
      "epoch": 2.6626642413791894,
      "grad_norm": 2.375,
      "learning_rate": 6.247030693863564e-06,
      "loss": 0.7348,
      "step": 759730
    },
    {
      "epoch": 2.662699288886085,
      "grad_norm": 2.875,
      "learning_rate": 6.246381665199862e-06,
      "loss": 0.8676,
      "step": 759740
    },
    {
      "epoch": 2.6627343363929805,
      "grad_norm": 2.9375,
      "learning_rate": 6.24573263653616e-06,
      "loss": 0.805,
      "step": 759750
    },
    {
      "epoch": 2.6627693838998763,
      "grad_norm": 3.125,
      "learning_rate": 6.245083607872458e-06,
      "loss": 0.8112,
      "step": 759760
    },
    {
      "epoch": 2.662804431406772,
      "grad_norm": 2.921875,
      "learning_rate": 6.244434579208756e-06,
      "loss": 0.8229,
      "step": 759770
    },
    {
      "epoch": 2.6628394789136673,
      "grad_norm": 2.46875,
      "learning_rate": 6.243785550545054e-06,
      "loss": 0.8236,
      "step": 759780
    },
    {
      "epoch": 2.662874526420563,
      "grad_norm": 2.984375,
      "learning_rate": 6.243136521881353e-06,
      "loss": 0.8705,
      "step": 759790
    },
    {
      "epoch": 2.6629095739274584,
      "grad_norm": 2.90625,
      "learning_rate": 6.242487493217651e-06,
      "loss": 0.7641,
      "step": 759800
    },
    {
      "epoch": 2.662944621434354,
      "grad_norm": 2.953125,
      "learning_rate": 6.241838464553948e-06,
      "loss": 0.7092,
      "step": 759810
    },
    {
      "epoch": 2.66297966894125,
      "grad_norm": 2.921875,
      "learning_rate": 6.241189435890247e-06,
      "loss": 0.7913,
      "step": 759820
    },
    {
      "epoch": 2.6630147164481457,
      "grad_norm": 3.25,
      "learning_rate": 6.240540407226545e-06,
      "loss": 0.8623,
      "step": 759830
    },
    {
      "epoch": 2.663049763955041,
      "grad_norm": 3.078125,
      "learning_rate": 6.239891378562843e-06,
      "loss": 0.7594,
      "step": 759840
    },
    {
      "epoch": 2.6630848114619368,
      "grad_norm": 2.828125,
      "learning_rate": 6.239242349899141e-06,
      "loss": 0.7579,
      "step": 759850
    },
    {
      "epoch": 2.663119858968832,
      "grad_norm": 3.0,
      "learning_rate": 6.238593321235439e-06,
      "loss": 0.7643,
      "step": 759860
    },
    {
      "epoch": 2.663154906475728,
      "grad_norm": 2.546875,
      "learning_rate": 6.237944292571737e-06,
      "loss": 0.8402,
      "step": 759870
    },
    {
      "epoch": 2.6631899539826236,
      "grad_norm": 2.875,
      "learning_rate": 6.237295263908035e-06,
      "loss": 0.723,
      "step": 759880
    },
    {
      "epoch": 2.663225001489519,
      "grad_norm": 3.109375,
      "learning_rate": 6.236646235244334e-06,
      "loss": 0.8633,
      "step": 759890
    },
    {
      "epoch": 2.6632600489964147,
      "grad_norm": 3.421875,
      "learning_rate": 6.235997206580631e-06,
      "loss": 0.8128,
      "step": 759900
    },
    {
      "epoch": 2.66329509650331,
      "grad_norm": 2.46875,
      "learning_rate": 6.23534817791693e-06,
      "loss": 0.7063,
      "step": 759910
    },
    {
      "epoch": 2.6633301440102057,
      "grad_norm": 3.078125,
      "learning_rate": 6.234699149253228e-06,
      "loss": 0.7725,
      "step": 759920
    },
    {
      "epoch": 2.6633651915171015,
      "grad_norm": 2.875,
      "learning_rate": 6.234050120589526e-06,
      "loss": 0.8336,
      "step": 759930
    },
    {
      "epoch": 2.6634002390239973,
      "grad_norm": 2.796875,
      "learning_rate": 6.233401091925824e-06,
      "loss": 0.8083,
      "step": 759940
    },
    {
      "epoch": 2.6634352865308926,
      "grad_norm": 2.484375,
      "learning_rate": 6.232752063262122e-06,
      "loss": 0.8357,
      "step": 759950
    },
    {
      "epoch": 2.6634703340377883,
      "grad_norm": 2.828125,
      "learning_rate": 6.23210303459842e-06,
      "loss": 0.819,
      "step": 759960
    },
    {
      "epoch": 2.6635053815446836,
      "grad_norm": 3.46875,
      "learning_rate": 6.231454005934718e-06,
      "loss": 0.8839,
      "step": 759970
    },
    {
      "epoch": 2.6635404290515794,
      "grad_norm": 2.828125,
      "learning_rate": 6.230804977271017e-06,
      "loss": 0.8828,
      "step": 759980
    },
    {
      "epoch": 2.663575476558475,
      "grad_norm": 2.78125,
      "learning_rate": 6.230155948607314e-06,
      "loss": 0.7588,
      "step": 759990
    },
    {
      "epoch": 2.6636105240653705,
      "grad_norm": 3.171875,
      "learning_rate": 6.229506919943612e-06,
      "loss": 0.8476,
      "step": 760000
    },
    {
      "epoch": 2.6636105240653705,
      "eval_loss": 0.7543493509292603,
      "eval_runtime": 554.2205,
      "eval_samples_per_second": 686.434,
      "eval_steps_per_second": 57.203,
      "step": 760000
    },
    {
      "epoch": 2.6636455715722662,
      "grad_norm": 2.75,
      "learning_rate": 6.228857891279911e-06,
      "loss": 0.8244,
      "step": 760010
    },
    {
      "epoch": 2.6636806190791615,
      "grad_norm": 3.109375,
      "learning_rate": 6.228208862616209e-06,
      "loss": 0.8557,
      "step": 760020
    },
    {
      "epoch": 2.6637156665860573,
      "grad_norm": 2.734375,
      "learning_rate": 6.227559833952507e-06,
      "loss": 0.8136,
      "step": 760030
    },
    {
      "epoch": 2.663750714092953,
      "grad_norm": 2.71875,
      "learning_rate": 6.226910805288805e-06,
      "loss": 0.8324,
      "step": 760040
    },
    {
      "epoch": 2.663785761599849,
      "grad_norm": 3.125,
      "learning_rate": 6.226261776625103e-06,
      "loss": 0.8127,
      "step": 760050
    },
    {
      "epoch": 2.663820809106744,
      "grad_norm": 3.203125,
      "learning_rate": 6.225612747961401e-06,
      "loss": 0.8206,
      "step": 760060
    },
    {
      "epoch": 2.66385585661364,
      "grad_norm": 2.890625,
      "learning_rate": 6.2249637192977e-06,
      "loss": 0.7022,
      "step": 760070
    },
    {
      "epoch": 2.663890904120535,
      "grad_norm": 3.21875,
      "learning_rate": 6.224314690633998e-06,
      "loss": 0.8128,
      "step": 760080
    },
    {
      "epoch": 2.663925951627431,
      "grad_norm": 3.0625,
      "learning_rate": 6.223665661970295e-06,
      "loss": 0.8235,
      "step": 760090
    },
    {
      "epoch": 2.6639609991343267,
      "grad_norm": 2.90625,
      "learning_rate": 6.223016633306594e-06,
      "loss": 0.8661,
      "step": 760100
    },
    {
      "epoch": 2.663996046641222,
      "grad_norm": 3.109375,
      "learning_rate": 6.222367604642892e-06,
      "loss": 0.8307,
      "step": 760110
    },
    {
      "epoch": 2.664031094148118,
      "grad_norm": 2.96875,
      "learning_rate": 6.22171857597919e-06,
      "loss": 0.7353,
      "step": 760120
    },
    {
      "epoch": 2.664066141655013,
      "grad_norm": 3.640625,
      "learning_rate": 6.221069547315489e-06,
      "loss": 0.8902,
      "step": 760130
    },
    {
      "epoch": 2.664101189161909,
      "grad_norm": 2.625,
      "learning_rate": 6.220420518651786e-06,
      "loss": 0.7514,
      "step": 760140
    },
    {
      "epoch": 2.6641362366688046,
      "grad_norm": 2.875,
      "learning_rate": 6.219771489988084e-06,
      "loss": 0.8229,
      "step": 760150
    },
    {
      "epoch": 2.6641712841757004,
      "grad_norm": 2.703125,
      "learning_rate": 6.219122461324383e-06,
      "loss": 0.8016,
      "step": 760160
    },
    {
      "epoch": 2.6642063316825957,
      "grad_norm": 2.828125,
      "learning_rate": 6.218473432660681e-06,
      "loss": 0.7712,
      "step": 760170
    },
    {
      "epoch": 2.6642413791894914,
      "grad_norm": 3.0,
      "learning_rate": 6.217824403996978e-06,
      "loss": 0.8175,
      "step": 760180
    },
    {
      "epoch": 2.6642764266963868,
      "grad_norm": 3.03125,
      "learning_rate": 6.217175375333277e-06,
      "loss": 0.7751,
      "step": 760190
    },
    {
      "epoch": 2.6643114742032825,
      "grad_norm": 2.703125,
      "learning_rate": 6.216526346669575e-06,
      "loss": 0.7983,
      "step": 760200
    },
    {
      "epoch": 2.6643465217101783,
      "grad_norm": 3.09375,
      "learning_rate": 6.215877318005873e-06,
      "loss": 0.8381,
      "step": 760210
    },
    {
      "epoch": 2.6643815692170736,
      "grad_norm": 3.015625,
      "learning_rate": 6.2152282893421715e-06,
      "loss": 0.8485,
      "step": 760220
    },
    {
      "epoch": 2.6644166167239693,
      "grad_norm": 2.421875,
      "learning_rate": 6.214579260678469e-06,
      "loss": 0.825,
      "step": 760230
    },
    {
      "epoch": 2.664451664230865,
      "grad_norm": 2.671875,
      "learning_rate": 6.213930232014767e-06,
      "loss": 0.7842,
      "step": 760240
    },
    {
      "epoch": 2.6644867117377604,
      "grad_norm": 2.515625,
      "learning_rate": 6.2132812033510655e-06,
      "loss": 0.7952,
      "step": 760250
    },
    {
      "epoch": 2.664521759244656,
      "grad_norm": 3.28125,
      "learning_rate": 6.2126321746873635e-06,
      "loss": 0.8348,
      "step": 760260
    },
    {
      "epoch": 2.664556806751552,
      "grad_norm": 2.703125,
      "learning_rate": 6.2119831460236615e-06,
      "loss": 0.7641,
      "step": 760270
    },
    {
      "epoch": 2.6645918542584472,
      "grad_norm": 2.609375,
      "learning_rate": 6.2113341173599595e-06,
      "loss": 0.7784,
      "step": 760280
    },
    {
      "epoch": 2.664626901765343,
      "grad_norm": 2.640625,
      "learning_rate": 6.2106850886962575e-06,
      "loss": 0.7432,
      "step": 760290
    },
    {
      "epoch": 2.6646619492722383,
      "grad_norm": 2.859375,
      "learning_rate": 6.2100360600325555e-06,
      "loss": 0.8728,
      "step": 760300
    },
    {
      "epoch": 2.664696996779134,
      "grad_norm": 2.828125,
      "learning_rate": 6.2093870313688535e-06,
      "loss": 0.748,
      "step": 760310
    },
    {
      "epoch": 2.66473204428603,
      "grad_norm": 2.609375,
      "learning_rate": 6.2087380027051515e-06,
      "loss": 0.7978,
      "step": 760320
    },
    {
      "epoch": 2.664767091792925,
      "grad_norm": 2.75,
      "learning_rate": 6.2080889740414495e-06,
      "loss": 0.8007,
      "step": 760330
    },
    {
      "epoch": 2.664802139299821,
      "grad_norm": 3.25,
      "learning_rate": 6.207439945377748e-06,
      "loss": 0.7778,
      "step": 760340
    },
    {
      "epoch": 2.6648371868067167,
      "grad_norm": 2.953125,
      "learning_rate": 6.206790916714046e-06,
      "loss": 0.8284,
      "step": 760350
    },
    {
      "epoch": 2.664872234313612,
      "grad_norm": 2.890625,
      "learning_rate": 6.206141888050344e-06,
      "loss": 0.798,
      "step": 760360
    },
    {
      "epoch": 2.6649072818205077,
      "grad_norm": 2.953125,
      "learning_rate": 6.205492859386642e-06,
      "loss": 0.9097,
      "step": 760370
    },
    {
      "epoch": 2.6649423293274035,
      "grad_norm": 2.765625,
      "learning_rate": 6.20484383072294e-06,
      "loss": 0.8432,
      "step": 760380
    },
    {
      "epoch": 2.664977376834299,
      "grad_norm": 3.1875,
      "learning_rate": 6.204194802059238e-06,
      "loss": 0.8968,
      "step": 760390
    },
    {
      "epoch": 2.6650124243411946,
      "grad_norm": 2.734375,
      "learning_rate": 6.203545773395536e-06,
      "loss": 0.8443,
      "step": 760400
    },
    {
      "epoch": 2.66504747184809,
      "grad_norm": 3.046875,
      "learning_rate": 6.202896744731835e-06,
      "loss": 0.8186,
      "step": 760410
    },
    {
      "epoch": 2.6650825193549856,
      "grad_norm": 2.859375,
      "learning_rate": 6.202247716068132e-06,
      "loss": 0.7956,
      "step": 760420
    },
    {
      "epoch": 2.6651175668618814,
      "grad_norm": 2.890625,
      "learning_rate": 6.20159868740443e-06,
      "loss": 0.8481,
      "step": 760430
    },
    {
      "epoch": 2.665152614368777,
      "grad_norm": 2.609375,
      "learning_rate": 6.200949658740729e-06,
      "loss": 0.6831,
      "step": 760440
    },
    {
      "epoch": 2.6651876618756725,
      "grad_norm": 2.875,
      "learning_rate": 6.200300630077027e-06,
      "loss": 0.8405,
      "step": 760450
    },
    {
      "epoch": 2.6652227093825682,
      "grad_norm": 2.859375,
      "learning_rate": 6.199651601413325e-06,
      "loss": 0.8362,
      "step": 760460
    },
    {
      "epoch": 2.6652577568894635,
      "grad_norm": 3.1875,
      "learning_rate": 6.199002572749623e-06,
      "loss": 0.8053,
      "step": 760470
    },
    {
      "epoch": 2.6652928043963593,
      "grad_norm": 2.71875,
      "learning_rate": 6.198353544085921e-06,
      "loss": 0.8011,
      "step": 760480
    },
    {
      "epoch": 2.665327851903255,
      "grad_norm": 2.9375,
      "learning_rate": 6.197704515422219e-06,
      "loss": 0.7786,
      "step": 760490
    },
    {
      "epoch": 2.6653628994101504,
      "grad_norm": 3.484375,
      "learning_rate": 6.197055486758518e-06,
      "loss": 0.8541,
      "step": 760500
    },
    {
      "epoch": 2.665397946917046,
      "grad_norm": 2.765625,
      "learning_rate": 6.196406458094815e-06,
      "loss": 0.8336,
      "step": 760510
    },
    {
      "epoch": 2.6654329944239414,
      "grad_norm": 2.828125,
      "learning_rate": 6.195757429431113e-06,
      "loss": 0.7541,
      "step": 760520
    },
    {
      "epoch": 2.665468041930837,
      "grad_norm": 3.265625,
      "learning_rate": 6.195108400767412e-06,
      "loss": 0.7582,
      "step": 760530
    },
    {
      "epoch": 2.665503089437733,
      "grad_norm": 2.96875,
      "learning_rate": 6.19445937210371e-06,
      "loss": 0.8079,
      "step": 760540
    },
    {
      "epoch": 2.6655381369446287,
      "grad_norm": 2.96875,
      "learning_rate": 6.193810343440008e-06,
      "loss": 0.8513,
      "step": 760550
    },
    {
      "epoch": 2.665573184451524,
      "grad_norm": 2.796875,
      "learning_rate": 6.193161314776306e-06,
      "loss": 0.7701,
      "step": 760560
    },
    {
      "epoch": 2.66560823195842,
      "grad_norm": 2.8125,
      "learning_rate": 6.192512286112604e-06,
      "loss": 0.7666,
      "step": 760570
    },
    {
      "epoch": 2.665643279465315,
      "grad_norm": 2.859375,
      "learning_rate": 6.191863257448902e-06,
      "loss": 0.708,
      "step": 760580
    },
    {
      "epoch": 2.665678326972211,
      "grad_norm": 2.71875,
      "learning_rate": 6.191214228785201e-06,
      "loss": 0.8721,
      "step": 760590
    },
    {
      "epoch": 2.6657133744791066,
      "grad_norm": 3.453125,
      "learning_rate": 6.190565200121499e-06,
      "loss": 0.7879,
      "step": 760600
    },
    {
      "epoch": 2.665748421986002,
      "grad_norm": 2.546875,
      "learning_rate": 6.189916171457796e-06,
      "loss": 0.7915,
      "step": 760610
    },
    {
      "epoch": 2.6657834694928977,
      "grad_norm": 2.765625,
      "learning_rate": 6.189267142794095e-06,
      "loss": 0.8092,
      "step": 760620
    },
    {
      "epoch": 2.665818516999793,
      "grad_norm": 2.421875,
      "learning_rate": 6.188618114130393e-06,
      "loss": 0.7721,
      "step": 760630
    },
    {
      "epoch": 2.6658535645066888,
      "grad_norm": 2.4375,
      "learning_rate": 6.187969085466691e-06,
      "loss": 0.814,
      "step": 760640
    },
    {
      "epoch": 2.6658886120135845,
      "grad_norm": 3.234375,
      "learning_rate": 6.187320056802989e-06,
      "loss": 0.7812,
      "step": 760650
    },
    {
      "epoch": 2.6659236595204803,
      "grad_norm": 2.546875,
      "learning_rate": 6.186671028139287e-06,
      "loss": 0.753,
      "step": 760660
    },
    {
      "epoch": 2.6659587070273756,
      "grad_norm": 3.1875,
      "learning_rate": 6.186021999475585e-06,
      "loss": 0.7851,
      "step": 760670
    },
    {
      "epoch": 2.6659937545342713,
      "grad_norm": 2.796875,
      "learning_rate": 6.185372970811884e-06,
      "loss": 0.7555,
      "step": 760680
    },
    {
      "epoch": 2.6660288020411667,
      "grad_norm": 3.0625,
      "learning_rate": 6.184723942148182e-06,
      "loss": 0.7611,
      "step": 760690
    },
    {
      "epoch": 2.6660638495480624,
      "grad_norm": 2.859375,
      "learning_rate": 6.184074913484479e-06,
      "loss": 0.7566,
      "step": 760700
    },
    {
      "epoch": 2.666098897054958,
      "grad_norm": 3.265625,
      "learning_rate": 6.183425884820778e-06,
      "loss": 0.8653,
      "step": 760710
    },
    {
      "epoch": 2.6661339445618535,
      "grad_norm": 3.390625,
      "learning_rate": 6.182776856157076e-06,
      "loss": 0.8763,
      "step": 760720
    },
    {
      "epoch": 2.6661689920687492,
      "grad_norm": 2.65625,
      "learning_rate": 6.182127827493374e-06,
      "loss": 0.8081,
      "step": 760730
    },
    {
      "epoch": 2.6662040395756446,
      "grad_norm": 3.25,
      "learning_rate": 6.181478798829672e-06,
      "loss": 0.7715,
      "step": 760740
    },
    {
      "epoch": 2.6662390870825403,
      "grad_norm": 2.828125,
      "learning_rate": 6.18082977016597e-06,
      "loss": 0.77,
      "step": 760750
    },
    {
      "epoch": 2.666274134589436,
      "grad_norm": 2.796875,
      "learning_rate": 6.180180741502268e-06,
      "loss": 0.7757,
      "step": 760760
    },
    {
      "epoch": 2.666309182096332,
      "grad_norm": 2.890625,
      "learning_rate": 6.1795317128385666e-06,
      "loss": 0.7483,
      "step": 760770
    },
    {
      "epoch": 2.666344229603227,
      "grad_norm": 2.6875,
      "learning_rate": 6.1788826841748646e-06,
      "loss": 0.6728,
      "step": 760780
    },
    {
      "epoch": 2.666379277110123,
      "grad_norm": 2.875,
      "learning_rate": 6.178233655511162e-06,
      "loss": 0.7963,
      "step": 760790
    },
    {
      "epoch": 2.666414324617018,
      "grad_norm": 2.46875,
      "learning_rate": 6.1775846268474606e-06,
      "loss": 0.8571,
      "step": 760800
    },
    {
      "epoch": 2.666449372123914,
      "grad_norm": 3.421875,
      "learning_rate": 6.1769355981837586e-06,
      "loss": 0.8665,
      "step": 760810
    },
    {
      "epoch": 2.6664844196308097,
      "grad_norm": 2.796875,
      "learning_rate": 6.1762865695200566e-06,
      "loss": 0.7918,
      "step": 760820
    },
    {
      "epoch": 2.666519467137705,
      "grad_norm": 2.765625,
      "learning_rate": 6.1756375408563546e-06,
      "loss": 0.7959,
      "step": 760830
    },
    {
      "epoch": 2.666554514644601,
      "grad_norm": 2.9375,
      "learning_rate": 6.1749885121926526e-06,
      "loss": 0.7177,
      "step": 760840
    },
    {
      "epoch": 2.666589562151496,
      "grad_norm": 2.640625,
      "learning_rate": 6.1743394835289506e-06,
      "loss": 0.7752,
      "step": 760850
    },
    {
      "epoch": 2.666624609658392,
      "grad_norm": 3.078125,
      "learning_rate": 6.1736904548652486e-06,
      "loss": 0.7866,
      "step": 760860
    },
    {
      "epoch": 2.6666596571652876,
      "grad_norm": 2.875,
      "learning_rate": 6.173041426201547e-06,
      "loss": 0.7569,
      "step": 760870
    },
    {
      "epoch": 2.6666947046721834,
      "grad_norm": 2.96875,
      "learning_rate": 6.172392397537845e-06,
      "loss": 0.8334,
      "step": 760880
    },
    {
      "epoch": 2.6667297521790787,
      "grad_norm": 2.78125,
      "learning_rate": 6.171743368874143e-06,
      "loss": 0.7309,
      "step": 760890
    },
    {
      "epoch": 2.6667647996859745,
      "grad_norm": 3.0,
      "learning_rate": 6.171094340210441e-06,
      "loss": 0.8081,
      "step": 760900
    },
    {
      "epoch": 2.6667998471928698,
      "grad_norm": 2.65625,
      "learning_rate": 6.170445311546739e-06,
      "loss": 0.716,
      "step": 760910
    },
    {
      "epoch": 2.6668348946997655,
      "grad_norm": 2.953125,
      "learning_rate": 6.169796282883037e-06,
      "loss": 0.7897,
      "step": 760920
    },
    {
      "epoch": 2.6668699422066613,
      "grad_norm": 3.375,
      "learning_rate": 6.169147254219336e-06,
      "loss": 0.8486,
      "step": 760930
    },
    {
      "epoch": 2.6669049897135566,
      "grad_norm": 3.28125,
      "learning_rate": 6.168498225555633e-06,
      "loss": 0.8931,
      "step": 760940
    },
    {
      "epoch": 2.6669400372204524,
      "grad_norm": 2.96875,
      "learning_rate": 6.167849196891931e-06,
      "loss": 0.7474,
      "step": 760950
    },
    {
      "epoch": 2.6669750847273477,
      "grad_norm": 2.734375,
      "learning_rate": 6.16720016822823e-06,
      "loss": 0.767,
      "step": 760960
    },
    {
      "epoch": 2.6670101322342434,
      "grad_norm": 2.90625,
      "learning_rate": 6.166551139564528e-06,
      "loss": 0.7136,
      "step": 760970
    },
    {
      "epoch": 2.667045179741139,
      "grad_norm": 2.78125,
      "learning_rate": 6.165902110900825e-06,
      "loss": 0.8428,
      "step": 760980
    },
    {
      "epoch": 2.667080227248035,
      "grad_norm": 2.59375,
      "learning_rate": 6.165253082237124e-06,
      "loss": 0.8978,
      "step": 760990
    },
    {
      "epoch": 2.6671152747549303,
      "grad_norm": 2.625,
      "learning_rate": 6.164604053573422e-06,
      "loss": 0.7137,
      "step": 761000
    },
    {
      "epoch": 2.667150322261826,
      "grad_norm": 2.84375,
      "learning_rate": 6.16395502490972e-06,
      "loss": 0.8438,
      "step": 761010
    },
    {
      "epoch": 2.6671853697687213,
      "grad_norm": 2.96875,
      "learning_rate": 6.163305996246019e-06,
      "loss": 0.7736,
      "step": 761020
    },
    {
      "epoch": 2.667220417275617,
      "grad_norm": 2.734375,
      "learning_rate": 6.162656967582316e-06,
      "loss": 0.8429,
      "step": 761030
    },
    {
      "epoch": 2.667255464782513,
      "grad_norm": 2.796875,
      "learning_rate": 6.162007938918614e-06,
      "loss": 0.744,
      "step": 761040
    },
    {
      "epoch": 2.667290512289408,
      "grad_norm": 2.9375,
      "learning_rate": 6.161358910254913e-06,
      "loss": 0.8197,
      "step": 761050
    },
    {
      "epoch": 2.667325559796304,
      "grad_norm": 3.125,
      "learning_rate": 6.160709881591211e-06,
      "loss": 0.7602,
      "step": 761060
    },
    {
      "epoch": 2.6673606073031992,
      "grad_norm": 3.359375,
      "learning_rate": 6.160060852927509e-06,
      "loss": 0.886,
      "step": 761070
    },
    {
      "epoch": 2.667395654810095,
      "grad_norm": 2.78125,
      "learning_rate": 6.159411824263807e-06,
      "loss": 0.8069,
      "step": 761080
    },
    {
      "epoch": 2.6674307023169908,
      "grad_norm": 2.859375,
      "learning_rate": 6.158762795600105e-06,
      "loss": 0.7861,
      "step": 761090
    },
    {
      "epoch": 2.6674657498238865,
      "grad_norm": 2.953125,
      "learning_rate": 6.158113766936403e-06,
      "loss": 0.8884,
      "step": 761100
    },
    {
      "epoch": 2.667500797330782,
      "grad_norm": 2.96875,
      "learning_rate": 6.157464738272702e-06,
      "loss": 0.7667,
      "step": 761110
    },
    {
      "epoch": 2.6675358448376776,
      "grad_norm": 3.125,
      "learning_rate": 6.156815709608999e-06,
      "loss": 0.8153,
      "step": 761120
    },
    {
      "epoch": 2.667570892344573,
      "grad_norm": 3.25,
      "learning_rate": 6.156166680945297e-06,
      "loss": 0.7376,
      "step": 761130
    },
    {
      "epoch": 2.6676059398514687,
      "grad_norm": 2.96875,
      "learning_rate": 6.155517652281596e-06,
      "loss": 0.7594,
      "step": 761140
    },
    {
      "epoch": 2.6676409873583644,
      "grad_norm": 3.3125,
      "learning_rate": 6.154868623617894e-06,
      "loss": 0.776,
      "step": 761150
    },
    {
      "epoch": 2.6676760348652597,
      "grad_norm": 2.84375,
      "learning_rate": 6.154219594954192e-06,
      "loss": 0.8141,
      "step": 761160
    },
    {
      "epoch": 2.6677110823721555,
      "grad_norm": 2.859375,
      "learning_rate": 6.15357056629049e-06,
      "loss": 0.7907,
      "step": 761170
    },
    {
      "epoch": 2.667746129879051,
      "grad_norm": 2.671875,
      "learning_rate": 6.152921537626788e-06,
      "loss": 0.7786,
      "step": 761180
    },
    {
      "epoch": 2.6677811773859466,
      "grad_norm": 2.65625,
      "learning_rate": 6.152272508963086e-06,
      "loss": 0.8048,
      "step": 761190
    },
    {
      "epoch": 2.6678162248928423,
      "grad_norm": 3.140625,
      "learning_rate": 6.151623480299385e-06,
      "loss": 0.7711,
      "step": 761200
    },
    {
      "epoch": 2.667851272399738,
      "grad_norm": 3.1875,
      "learning_rate": 6.150974451635683e-06,
      "loss": 0.8005,
      "step": 761210
    },
    {
      "epoch": 2.6678863199066334,
      "grad_norm": 3.15625,
      "learning_rate": 6.15032542297198e-06,
      "loss": 0.8091,
      "step": 761220
    },
    {
      "epoch": 2.667921367413529,
      "grad_norm": 2.953125,
      "learning_rate": 6.149676394308279e-06,
      "loss": 0.8452,
      "step": 761230
    },
    {
      "epoch": 2.6679564149204245,
      "grad_norm": 3.203125,
      "learning_rate": 6.149027365644577e-06,
      "loss": 0.827,
      "step": 761240
    },
    {
      "epoch": 2.66799146242732,
      "grad_norm": 2.71875,
      "learning_rate": 6.148378336980875e-06,
      "loss": 0.7954,
      "step": 761250
    },
    {
      "epoch": 2.668026509934216,
      "grad_norm": 3.3125,
      "learning_rate": 6.147729308317173e-06,
      "loss": 0.7808,
      "step": 761260
    },
    {
      "epoch": 2.6680615574411113,
      "grad_norm": 2.921875,
      "learning_rate": 6.147080279653471e-06,
      "loss": 0.8253,
      "step": 761270
    },
    {
      "epoch": 2.668096604948007,
      "grad_norm": 2.84375,
      "learning_rate": 6.146431250989769e-06,
      "loss": 0.8132,
      "step": 761280
    },
    {
      "epoch": 2.6681316524549024,
      "grad_norm": 2.703125,
      "learning_rate": 6.145782222326067e-06,
      "loss": 0.7613,
      "step": 761290
    },
    {
      "epoch": 2.668166699961798,
      "grad_norm": 3.296875,
      "learning_rate": 6.145133193662366e-06,
      "loss": 0.8219,
      "step": 761300
    },
    {
      "epoch": 2.668201747468694,
      "grad_norm": 2.71875,
      "learning_rate": 6.144484164998663e-06,
      "loss": 0.8234,
      "step": 761310
    },
    {
      "epoch": 2.6682367949755896,
      "grad_norm": 3.1875,
      "learning_rate": 6.143835136334962e-06,
      "loss": 0.8474,
      "step": 761320
    },
    {
      "epoch": 2.668271842482485,
      "grad_norm": 3.0625,
      "learning_rate": 6.14318610767126e-06,
      "loss": 0.902,
      "step": 761330
    },
    {
      "epoch": 2.6683068899893807,
      "grad_norm": 2.671875,
      "learning_rate": 6.142537079007558e-06,
      "loss": 0.7345,
      "step": 761340
    },
    {
      "epoch": 2.668341937496276,
      "grad_norm": 2.828125,
      "learning_rate": 6.141888050343856e-06,
      "loss": 0.8002,
      "step": 761350
    },
    {
      "epoch": 2.6683769850031718,
      "grad_norm": 2.53125,
      "learning_rate": 6.141239021680154e-06,
      "loss": 0.7572,
      "step": 761360
    },
    {
      "epoch": 2.6684120325100675,
      "grad_norm": 2.53125,
      "learning_rate": 6.140589993016452e-06,
      "loss": 0.8004,
      "step": 761370
    },
    {
      "epoch": 2.668447080016963,
      "grad_norm": 2.90625,
      "learning_rate": 6.13994096435275e-06,
      "loss": 0.7855,
      "step": 761380
    },
    {
      "epoch": 2.6684821275238586,
      "grad_norm": 2.71875,
      "learning_rate": 6.1392919356890485e-06,
      "loss": 0.7164,
      "step": 761390
    },
    {
      "epoch": 2.668517175030754,
      "grad_norm": 2.71875,
      "learning_rate": 6.1386429070253465e-06,
      "loss": 0.8622,
      "step": 761400
    },
    {
      "epoch": 2.6685522225376497,
      "grad_norm": 2.859375,
      "learning_rate": 6.137993878361644e-06,
      "loss": 0.8494,
      "step": 761410
    },
    {
      "epoch": 2.6685872700445454,
      "grad_norm": 2.65625,
      "learning_rate": 6.1373448496979425e-06,
      "loss": 0.7021,
      "step": 761420
    },
    {
      "epoch": 2.668622317551441,
      "grad_norm": 2.84375,
      "learning_rate": 6.1366958210342405e-06,
      "loss": 0.7237,
      "step": 761430
    },
    {
      "epoch": 2.6686573650583365,
      "grad_norm": 3.0625,
      "learning_rate": 6.1360467923705385e-06,
      "loss": 0.807,
      "step": 761440
    },
    {
      "epoch": 2.6686924125652323,
      "grad_norm": 2.6875,
      "learning_rate": 6.1353977637068365e-06,
      "loss": 0.768,
      "step": 761450
    },
    {
      "epoch": 2.6687274600721276,
      "grad_norm": 3.0625,
      "learning_rate": 6.1347487350431345e-06,
      "loss": 0.8235,
      "step": 761460
    },
    {
      "epoch": 2.6687625075790233,
      "grad_norm": 3.0,
      "learning_rate": 6.1340997063794325e-06,
      "loss": 0.8193,
      "step": 761470
    },
    {
      "epoch": 2.668797555085919,
      "grad_norm": 2.8125,
      "learning_rate": 6.133450677715731e-06,
      "loss": 0.8091,
      "step": 761480
    },
    {
      "epoch": 2.6688326025928144,
      "grad_norm": 2.953125,
      "learning_rate": 6.132801649052029e-06,
      "loss": 0.7633,
      "step": 761490
    },
    {
      "epoch": 2.66886765009971,
      "grad_norm": 2.703125,
      "learning_rate": 6.1321526203883265e-06,
      "loss": 0.8074,
      "step": 761500
    },
    {
      "epoch": 2.6689026976066055,
      "grad_norm": 2.984375,
      "learning_rate": 6.131503591724625e-06,
      "loss": 0.7865,
      "step": 761510
    },
    {
      "epoch": 2.6689377451135012,
      "grad_norm": 2.921875,
      "learning_rate": 6.130854563060923e-06,
      "loss": 0.8727,
      "step": 761520
    },
    {
      "epoch": 2.668972792620397,
      "grad_norm": 2.734375,
      "learning_rate": 6.130205534397221e-06,
      "loss": 0.7857,
      "step": 761530
    },
    {
      "epoch": 2.6690078401272928,
      "grad_norm": 2.78125,
      "learning_rate": 6.12955650573352e-06,
      "loss": 0.73,
      "step": 761540
    },
    {
      "epoch": 2.669042887634188,
      "grad_norm": 2.921875,
      "learning_rate": 6.128907477069817e-06,
      "loss": 0.8036,
      "step": 761550
    },
    {
      "epoch": 2.669077935141084,
      "grad_norm": 2.6875,
      "learning_rate": 6.128258448406115e-06,
      "loss": 0.7657,
      "step": 761560
    },
    {
      "epoch": 2.669112982647979,
      "grad_norm": 2.6875,
      "learning_rate": 6.127609419742414e-06,
      "loss": 0.7658,
      "step": 761570
    },
    {
      "epoch": 2.669148030154875,
      "grad_norm": 2.8125,
      "learning_rate": 6.126960391078712e-06,
      "loss": 0.8578,
      "step": 761580
    },
    {
      "epoch": 2.6691830776617707,
      "grad_norm": 2.765625,
      "learning_rate": 6.126311362415009e-06,
      "loss": 0.8525,
      "step": 761590
    },
    {
      "epoch": 2.669218125168666,
      "grad_norm": 2.703125,
      "learning_rate": 6.125662333751308e-06,
      "loss": 0.8553,
      "step": 761600
    },
    {
      "epoch": 2.6692531726755617,
      "grad_norm": 2.4375,
      "learning_rate": 6.125013305087606e-06,
      "loss": 0.8549,
      "step": 761610
    },
    {
      "epoch": 2.6692882201824575,
      "grad_norm": 2.765625,
      "learning_rate": 6.124364276423904e-06,
      "loss": 0.7629,
      "step": 761620
    },
    {
      "epoch": 2.669323267689353,
      "grad_norm": 2.96875,
      "learning_rate": 6.123715247760203e-06,
      "loss": 0.8239,
      "step": 761630
    },
    {
      "epoch": 2.6693583151962486,
      "grad_norm": 2.625,
      "learning_rate": 6.1230662190965e-06,
      "loss": 0.7808,
      "step": 761640
    },
    {
      "epoch": 2.6693933627031443,
      "grad_norm": 3.078125,
      "learning_rate": 6.122417190432798e-06,
      "loss": 0.8452,
      "step": 761650
    },
    {
      "epoch": 2.6694284102100396,
      "grad_norm": 2.65625,
      "learning_rate": 6.121768161769097e-06,
      "loss": 0.7425,
      "step": 761660
    },
    {
      "epoch": 2.6694634577169354,
      "grad_norm": 2.546875,
      "learning_rate": 6.121119133105395e-06,
      "loss": 0.7679,
      "step": 761670
    },
    {
      "epoch": 2.6694985052238307,
      "grad_norm": 2.71875,
      "learning_rate": 6.120470104441693e-06,
      "loss": 0.7874,
      "step": 761680
    },
    {
      "epoch": 2.6695335527307265,
      "grad_norm": 3.140625,
      "learning_rate": 6.119821075777991e-06,
      "loss": 0.8016,
      "step": 761690
    },
    {
      "epoch": 2.669568600237622,
      "grad_norm": 2.578125,
      "learning_rate": 6.119172047114289e-06,
      "loss": 0.7742,
      "step": 761700
    },
    {
      "epoch": 2.6696036477445175,
      "grad_norm": 3.09375,
      "learning_rate": 6.118523018450587e-06,
      "loss": 0.8251,
      "step": 761710
    },
    {
      "epoch": 2.6696386952514133,
      "grad_norm": 2.90625,
      "learning_rate": 6.117873989786885e-06,
      "loss": 0.8719,
      "step": 761720
    },
    {
      "epoch": 2.669673742758309,
      "grad_norm": 3.015625,
      "learning_rate": 6.117224961123183e-06,
      "loss": 0.9195,
      "step": 761730
    },
    {
      "epoch": 2.6697087902652044,
      "grad_norm": 2.421875,
      "learning_rate": 6.116575932459481e-06,
      "loss": 0.7409,
      "step": 761740
    },
    {
      "epoch": 2.6697438377721,
      "grad_norm": 3.171875,
      "learning_rate": 6.11592690379578e-06,
      "loss": 0.8868,
      "step": 761750
    },
    {
      "epoch": 2.669778885278996,
      "grad_norm": 2.703125,
      "learning_rate": 6.115277875132078e-06,
      "loss": 0.8008,
      "step": 761760
    },
    {
      "epoch": 2.669813932785891,
      "grad_norm": 3.015625,
      "learning_rate": 6.114628846468376e-06,
      "loss": 0.796,
      "step": 761770
    },
    {
      "epoch": 2.669848980292787,
      "grad_norm": 2.765625,
      "learning_rate": 6.113979817804674e-06,
      "loss": 0.8666,
      "step": 761780
    },
    {
      "epoch": 2.6698840277996823,
      "grad_norm": 3.234375,
      "learning_rate": 6.113330789140972e-06,
      "loss": 0.769,
      "step": 761790
    },
    {
      "epoch": 2.669919075306578,
      "grad_norm": 2.84375,
      "learning_rate": 6.11268176047727e-06,
      "loss": 0.7955,
      "step": 761800
    },
    {
      "epoch": 2.6699541228134738,
      "grad_norm": 3.234375,
      "learning_rate": 6.112032731813568e-06,
      "loss": 0.838,
      "step": 761810
    },
    {
      "epoch": 2.6699891703203695,
      "grad_norm": 2.640625,
      "learning_rate": 6.111383703149867e-06,
      "loss": 0.7943,
      "step": 761820
    },
    {
      "epoch": 2.670024217827265,
      "grad_norm": 3.0625,
      "learning_rate": 6.110734674486164e-06,
      "loss": 0.7965,
      "step": 761830
    },
    {
      "epoch": 2.6700592653341606,
      "grad_norm": 3.21875,
      "learning_rate": 6.110085645822462e-06,
      "loss": 0.926,
      "step": 761840
    },
    {
      "epoch": 2.670094312841056,
      "grad_norm": 2.859375,
      "learning_rate": 6.109436617158761e-06,
      "loss": 0.8307,
      "step": 761850
    },
    {
      "epoch": 2.6701293603479517,
      "grad_norm": 2.96875,
      "learning_rate": 6.108787588495059e-06,
      "loss": 0.7972,
      "step": 761860
    },
    {
      "epoch": 2.6701644078548474,
      "grad_norm": 2.40625,
      "learning_rate": 6.108138559831357e-06,
      "loss": 0.8019,
      "step": 761870
    },
    {
      "epoch": 2.6701994553617427,
      "grad_norm": 2.921875,
      "learning_rate": 6.107489531167655e-06,
      "loss": 0.7841,
      "step": 761880
    },
    {
      "epoch": 2.6702345028686385,
      "grad_norm": 2.96875,
      "learning_rate": 6.106840502503953e-06,
      "loss": 0.8618,
      "step": 761890
    },
    {
      "epoch": 2.670269550375534,
      "grad_norm": 3.296875,
      "learning_rate": 6.106191473840251e-06,
      "loss": 0.8065,
      "step": 761900
    },
    {
      "epoch": 2.6703045978824296,
      "grad_norm": 2.78125,
      "learning_rate": 6.1055424451765496e-06,
      "loss": 0.8123,
      "step": 761910
    },
    {
      "epoch": 2.6703396453893253,
      "grad_norm": 3.203125,
      "learning_rate": 6.104893416512847e-06,
      "loss": 0.8045,
      "step": 761920
    },
    {
      "epoch": 2.670374692896221,
      "grad_norm": 2.796875,
      "learning_rate": 6.104244387849145e-06,
      "loss": 0.8328,
      "step": 761930
    },
    {
      "epoch": 2.6704097404031164,
      "grad_norm": 2.609375,
      "learning_rate": 6.1035953591854436e-06,
      "loss": 0.7693,
      "step": 761940
    },
    {
      "epoch": 2.670444787910012,
      "grad_norm": 3.1875,
      "learning_rate": 6.1029463305217416e-06,
      "loss": 0.8027,
      "step": 761950
    },
    {
      "epoch": 2.6704798354169075,
      "grad_norm": 2.546875,
      "learning_rate": 6.1022973018580396e-06,
      "loss": 0.7373,
      "step": 761960
    },
    {
      "epoch": 2.6705148829238032,
      "grad_norm": 3.234375,
      "learning_rate": 6.1016482731943376e-06,
      "loss": 0.7398,
      "step": 761970
    },
    {
      "epoch": 2.670549930430699,
      "grad_norm": 2.625,
      "learning_rate": 6.1009992445306356e-06,
      "loss": 0.8804,
      "step": 761980
    },
    {
      "epoch": 2.6705849779375943,
      "grad_norm": 2.96875,
      "learning_rate": 6.1003502158669336e-06,
      "loss": 0.7959,
      "step": 761990
    },
    {
      "epoch": 2.67062002544449,
      "grad_norm": 2.984375,
      "learning_rate": 6.099701187203232e-06,
      "loss": 0.8273,
      "step": 762000
    },
    {
      "epoch": 2.6706550729513854,
      "grad_norm": 3.140625,
      "learning_rate": 6.09905215853953e-06,
      "loss": 0.7732,
      "step": 762010
    },
    {
      "epoch": 2.670690120458281,
      "grad_norm": 2.75,
      "learning_rate": 6.0984031298758276e-06,
      "loss": 0.8278,
      "step": 762020
    },
    {
      "epoch": 2.670725167965177,
      "grad_norm": 3.203125,
      "learning_rate": 6.097754101212126e-06,
      "loss": 0.841,
      "step": 762030
    },
    {
      "epoch": 2.6707602154720727,
      "grad_norm": 2.90625,
      "learning_rate": 6.097105072548424e-06,
      "loss": 0.7408,
      "step": 762040
    },
    {
      "epoch": 2.670795262978968,
      "grad_norm": 3.109375,
      "learning_rate": 6.096456043884722e-06,
      "loss": 0.7977,
      "step": 762050
    },
    {
      "epoch": 2.6708303104858637,
      "grad_norm": 3.578125,
      "learning_rate": 6.09580701522102e-06,
      "loss": 0.8393,
      "step": 762060
    },
    {
      "epoch": 2.670865357992759,
      "grad_norm": 3.125,
      "learning_rate": 6.095157986557318e-06,
      "loss": 0.8397,
      "step": 762070
    },
    {
      "epoch": 2.670900405499655,
      "grad_norm": 2.828125,
      "learning_rate": 6.094508957893616e-06,
      "loss": 0.8021,
      "step": 762080
    },
    {
      "epoch": 2.6709354530065506,
      "grad_norm": 2.828125,
      "learning_rate": 6.093859929229915e-06,
      "loss": 0.7236,
      "step": 762090
    },
    {
      "epoch": 2.670970500513446,
      "grad_norm": 3.25,
      "learning_rate": 6.093210900566213e-06,
      "loss": 0.8636,
      "step": 762100
    },
    {
      "epoch": 2.6710055480203416,
      "grad_norm": 2.96875,
      "learning_rate": 6.09256187190251e-06,
      "loss": 0.8065,
      "step": 762110
    },
    {
      "epoch": 2.671040595527237,
      "grad_norm": 3.171875,
      "learning_rate": 6.091912843238809e-06,
      "loss": 0.8456,
      "step": 762120
    },
    {
      "epoch": 2.6710756430341327,
      "grad_norm": 2.9375,
      "learning_rate": 6.091263814575107e-06,
      "loss": 0.7781,
      "step": 762130
    },
    {
      "epoch": 2.6711106905410285,
      "grad_norm": 3.1875,
      "learning_rate": 6.090614785911405e-06,
      "loss": 0.8182,
      "step": 762140
    },
    {
      "epoch": 2.671145738047924,
      "grad_norm": 2.8125,
      "learning_rate": 6.089965757247704e-06,
      "loss": 0.7765,
      "step": 762150
    },
    {
      "epoch": 2.6711807855548195,
      "grad_norm": 3.171875,
      "learning_rate": 6.089316728584001e-06,
      "loss": 0.8155,
      "step": 762160
    },
    {
      "epoch": 2.6712158330617153,
      "grad_norm": 2.703125,
      "learning_rate": 6.088667699920299e-06,
      "loss": 0.7661,
      "step": 762170
    },
    {
      "epoch": 2.6712508805686106,
      "grad_norm": 2.609375,
      "learning_rate": 6.088018671256598e-06,
      "loss": 0.7312,
      "step": 762180
    },
    {
      "epoch": 2.6712859280755064,
      "grad_norm": 2.265625,
      "learning_rate": 6.087369642592896e-06,
      "loss": 0.7423,
      "step": 762190
    },
    {
      "epoch": 2.671320975582402,
      "grad_norm": 2.71875,
      "learning_rate": 6.086720613929193e-06,
      "loss": 0.8537,
      "step": 762200
    },
    {
      "epoch": 2.6713560230892974,
      "grad_norm": 2.796875,
      "learning_rate": 6.086071585265492e-06,
      "loss": 0.8145,
      "step": 762210
    },
    {
      "epoch": 2.671391070596193,
      "grad_norm": 2.65625,
      "learning_rate": 6.08542255660179e-06,
      "loss": 0.7678,
      "step": 762220
    },
    {
      "epoch": 2.6714261181030885,
      "grad_norm": 3.203125,
      "learning_rate": 6.084773527938088e-06,
      "loss": 0.8022,
      "step": 762230
    },
    {
      "epoch": 2.6714611656099843,
      "grad_norm": 3.046875,
      "learning_rate": 6.084124499274386e-06,
      "loss": 0.7994,
      "step": 762240
    },
    {
      "epoch": 2.67149621311688,
      "grad_norm": 2.828125,
      "learning_rate": 6.083475470610684e-06,
      "loss": 0.7403,
      "step": 762250
    },
    {
      "epoch": 2.6715312606237758,
      "grad_norm": 3.515625,
      "learning_rate": 6.082826441946982e-06,
      "loss": 0.8243,
      "step": 762260
    },
    {
      "epoch": 2.671566308130671,
      "grad_norm": 3.15625,
      "learning_rate": 6.082177413283281e-06,
      "loss": 0.7881,
      "step": 762270
    },
    {
      "epoch": 2.671601355637567,
      "grad_norm": 2.8125,
      "learning_rate": 6.081528384619579e-06,
      "loss": 0.7189,
      "step": 762280
    },
    {
      "epoch": 2.671636403144462,
      "grad_norm": 3.109375,
      "learning_rate": 6.080879355955877e-06,
      "loss": 0.7984,
      "step": 762290
    },
    {
      "epoch": 2.671671450651358,
      "grad_norm": 3.28125,
      "learning_rate": 6.080230327292175e-06,
      "loss": 0.8003,
      "step": 762300
    },
    {
      "epoch": 2.6717064981582537,
      "grad_norm": 2.8125,
      "learning_rate": 6.079581298628473e-06,
      "loss": 0.7656,
      "step": 762310
    },
    {
      "epoch": 2.671741545665149,
      "grad_norm": 2.53125,
      "learning_rate": 6.078932269964771e-06,
      "loss": 0.8446,
      "step": 762320
    },
    {
      "epoch": 2.6717765931720447,
      "grad_norm": 3.140625,
      "learning_rate": 6.078283241301069e-06,
      "loss": 0.7909,
      "step": 762330
    },
    {
      "epoch": 2.67181164067894,
      "grad_norm": 2.734375,
      "learning_rate": 6.077634212637368e-06,
      "loss": 0.7953,
      "step": 762340
    },
    {
      "epoch": 2.671846688185836,
      "grad_norm": 2.875,
      "learning_rate": 6.076985183973665e-06,
      "loss": 0.8085,
      "step": 762350
    },
    {
      "epoch": 2.6718817356927316,
      "grad_norm": 2.96875,
      "learning_rate": 6.076336155309963e-06,
      "loss": 0.8428,
      "step": 762360
    },
    {
      "epoch": 2.6719167831996273,
      "grad_norm": 2.1875,
      "learning_rate": 6.075687126646262e-06,
      "loss": 0.7676,
      "step": 762370
    },
    {
      "epoch": 2.6719518307065226,
      "grad_norm": 3.140625,
      "learning_rate": 6.07503809798256e-06,
      "loss": 0.7859,
      "step": 762380
    },
    {
      "epoch": 2.6719868782134184,
      "grad_norm": 3.1875,
      "learning_rate": 6.074389069318858e-06,
      "loss": 0.733,
      "step": 762390
    },
    {
      "epoch": 2.6720219257203137,
      "grad_norm": 2.8125,
      "learning_rate": 6.073740040655156e-06,
      "loss": 0.8111,
      "step": 762400
    },
    {
      "epoch": 2.6720569732272095,
      "grad_norm": 2.53125,
      "learning_rate": 6.073091011991454e-06,
      "loss": 0.812,
      "step": 762410
    },
    {
      "epoch": 2.6720920207341052,
      "grad_norm": 3.3125,
      "learning_rate": 6.072441983327752e-06,
      "loss": 0.8201,
      "step": 762420
    },
    {
      "epoch": 2.6721270682410005,
      "grad_norm": 3.328125,
      "learning_rate": 6.071792954664051e-06,
      "loss": 0.7958,
      "step": 762430
    },
    {
      "epoch": 2.6721621157478963,
      "grad_norm": 3.15625,
      "learning_rate": 6.071143926000348e-06,
      "loss": 0.824,
      "step": 762440
    },
    {
      "epoch": 2.6721971632547916,
      "grad_norm": 2.984375,
      "learning_rate": 6.070494897336646e-06,
      "loss": 0.813,
      "step": 762450
    },
    {
      "epoch": 2.6722322107616874,
      "grad_norm": 3.53125,
      "learning_rate": 6.069845868672945e-06,
      "loss": 0.7107,
      "step": 762460
    },
    {
      "epoch": 2.672267258268583,
      "grad_norm": 3.0,
      "learning_rate": 6.069196840009243e-06,
      "loss": 0.8085,
      "step": 762470
    },
    {
      "epoch": 2.672302305775479,
      "grad_norm": 2.71875,
      "learning_rate": 6.068547811345541e-06,
      "loss": 0.8135,
      "step": 762480
    },
    {
      "epoch": 2.672337353282374,
      "grad_norm": 2.625,
      "learning_rate": 6.067898782681839e-06,
      "loss": 0.738,
      "step": 762490
    },
    {
      "epoch": 2.67237240078927,
      "grad_norm": 2.53125,
      "learning_rate": 6.067249754018137e-06,
      "loss": 0.7697,
      "step": 762500
    },
    {
      "epoch": 2.6724074482961653,
      "grad_norm": 3.203125,
      "learning_rate": 6.066600725354435e-06,
      "loss": 0.7862,
      "step": 762510
    },
    {
      "epoch": 2.672442495803061,
      "grad_norm": 2.90625,
      "learning_rate": 6.0659516966907335e-06,
      "loss": 0.7232,
      "step": 762520
    },
    {
      "epoch": 2.672477543309957,
      "grad_norm": 3.09375,
      "learning_rate": 6.065302668027031e-06,
      "loss": 0.8934,
      "step": 762530
    },
    {
      "epoch": 2.672512590816852,
      "grad_norm": 2.828125,
      "learning_rate": 6.064653639363329e-06,
      "loss": 0.7834,
      "step": 762540
    },
    {
      "epoch": 2.672547638323748,
      "grad_norm": 3.15625,
      "learning_rate": 6.0640046106996275e-06,
      "loss": 0.7533,
      "step": 762550
    },
    {
      "epoch": 2.672582685830643,
      "grad_norm": 2.578125,
      "learning_rate": 6.0633555820359255e-06,
      "loss": 0.7776,
      "step": 762560
    },
    {
      "epoch": 2.672617733337539,
      "grad_norm": 3.1875,
      "learning_rate": 6.0627065533722235e-06,
      "loss": 0.8067,
      "step": 762570
    },
    {
      "epoch": 2.6726527808444347,
      "grad_norm": 2.84375,
      "learning_rate": 6.0620575247085215e-06,
      "loss": 0.7607,
      "step": 762580
    },
    {
      "epoch": 2.6726878283513305,
      "grad_norm": 2.859375,
      "learning_rate": 6.0614084960448195e-06,
      "loss": 0.7676,
      "step": 762590
    },
    {
      "epoch": 2.6727228758582258,
      "grad_norm": 2.796875,
      "learning_rate": 6.0607594673811175e-06,
      "loss": 0.8403,
      "step": 762600
    },
    {
      "epoch": 2.6727579233651215,
      "grad_norm": 2.890625,
      "learning_rate": 6.060110438717416e-06,
      "loss": 0.7276,
      "step": 762610
    },
    {
      "epoch": 2.672792970872017,
      "grad_norm": 2.890625,
      "learning_rate": 6.059461410053714e-06,
      "loss": 0.7591,
      "step": 762620
    },
    {
      "epoch": 2.6728280183789126,
      "grad_norm": 3.21875,
      "learning_rate": 6.0588123813900115e-06,
      "loss": 0.8811,
      "step": 762630
    },
    {
      "epoch": 2.6728630658858084,
      "grad_norm": 3.09375,
      "learning_rate": 6.05816335272631e-06,
      "loss": 0.7799,
      "step": 762640
    },
    {
      "epoch": 2.6728981133927037,
      "grad_norm": 2.3125,
      "learning_rate": 6.057514324062608e-06,
      "loss": 0.7782,
      "step": 762650
    },
    {
      "epoch": 2.6729331608995994,
      "grad_norm": 2.5625,
      "learning_rate": 6.056865295398906e-06,
      "loss": 0.8246,
      "step": 762660
    },
    {
      "epoch": 2.6729682084064947,
      "grad_norm": 3.171875,
      "learning_rate": 6.056216266735204e-06,
      "loss": 0.8118,
      "step": 762670
    },
    {
      "epoch": 2.6730032559133905,
      "grad_norm": 2.84375,
      "learning_rate": 6.055567238071502e-06,
      "loss": 0.7434,
      "step": 762680
    },
    {
      "epoch": 2.6730383034202863,
      "grad_norm": 2.75,
      "learning_rate": 6.0549182094078e-06,
      "loss": 0.7935,
      "step": 762690
    },
    {
      "epoch": 2.673073350927182,
      "grad_norm": 3.234375,
      "learning_rate": 6.054269180744099e-06,
      "loss": 0.7785,
      "step": 762700
    },
    {
      "epoch": 2.6731083984340773,
      "grad_norm": 2.75,
      "learning_rate": 6.053620152080397e-06,
      "loss": 0.7906,
      "step": 762710
    },
    {
      "epoch": 2.673143445940973,
      "grad_norm": 3.109375,
      "learning_rate": 6.052971123416694e-06,
      "loss": 0.7658,
      "step": 762720
    },
    {
      "epoch": 2.6731784934478684,
      "grad_norm": 3.15625,
      "learning_rate": 6.052322094752993e-06,
      "loss": 0.8059,
      "step": 762730
    },
    {
      "epoch": 2.673213540954764,
      "grad_norm": 3.015625,
      "learning_rate": 6.051673066089291e-06,
      "loss": 0.7916,
      "step": 762740
    },
    {
      "epoch": 2.67324858846166,
      "grad_norm": 3.34375,
      "learning_rate": 6.051024037425589e-06,
      "loss": 0.9544,
      "step": 762750
    },
    {
      "epoch": 2.6732836359685552,
      "grad_norm": 3.40625,
      "learning_rate": 6.050375008761887e-06,
      "loss": 0.8272,
      "step": 762760
    },
    {
      "epoch": 2.673318683475451,
      "grad_norm": 3.125,
      "learning_rate": 6.049725980098185e-06,
      "loss": 0.7824,
      "step": 762770
    },
    {
      "epoch": 2.6733537309823463,
      "grad_norm": 2.96875,
      "learning_rate": 6.049076951434483e-06,
      "loss": 0.8739,
      "step": 762780
    },
    {
      "epoch": 2.673388778489242,
      "grad_norm": 2.78125,
      "learning_rate": 6.048427922770781e-06,
      "loss": 0.8055,
      "step": 762790
    },
    {
      "epoch": 2.673423825996138,
      "grad_norm": 2.890625,
      "learning_rate": 6.04777889410708e-06,
      "loss": 0.8819,
      "step": 762800
    },
    {
      "epoch": 2.6734588735030336,
      "grad_norm": 3.421875,
      "learning_rate": 6.047129865443378e-06,
      "loss": 0.8695,
      "step": 762810
    },
    {
      "epoch": 2.673493921009929,
      "grad_norm": 2.78125,
      "learning_rate": 6.046480836779676e-06,
      "loss": 0.7369,
      "step": 762820
    },
    {
      "epoch": 2.6735289685168246,
      "grad_norm": 2.828125,
      "learning_rate": 6.045831808115974e-06,
      "loss": 0.7684,
      "step": 762830
    },
    {
      "epoch": 2.67356401602372,
      "grad_norm": 2.90625,
      "learning_rate": 6.045182779452272e-06,
      "loss": 0.7807,
      "step": 762840
    },
    {
      "epoch": 2.6735990635306157,
      "grad_norm": 3.21875,
      "learning_rate": 6.04453375078857e-06,
      "loss": 0.8088,
      "step": 762850
    },
    {
      "epoch": 2.6736341110375115,
      "grad_norm": 2.546875,
      "learning_rate": 6.043884722124868e-06,
      "loss": 0.8764,
      "step": 762860
    },
    {
      "epoch": 2.673669158544407,
      "grad_norm": 2.765625,
      "learning_rate": 6.043235693461166e-06,
      "loss": 0.7089,
      "step": 762870
    },
    {
      "epoch": 2.6737042060513025,
      "grad_norm": 3.21875,
      "learning_rate": 6.042586664797464e-06,
      "loss": 0.7609,
      "step": 762880
    },
    {
      "epoch": 2.6737392535581983,
      "grad_norm": 2.921875,
      "learning_rate": 6.041937636133763e-06,
      "loss": 0.7825,
      "step": 762890
    },
    {
      "epoch": 2.6737743010650936,
      "grad_norm": 2.890625,
      "learning_rate": 6.041288607470061e-06,
      "loss": 0.6758,
      "step": 762900
    },
    {
      "epoch": 2.6738093485719894,
      "grad_norm": 3.109375,
      "learning_rate": 6.040639578806358e-06,
      "loss": 0.8073,
      "step": 762910
    },
    {
      "epoch": 2.673844396078885,
      "grad_norm": 2.5625,
      "learning_rate": 6.039990550142657e-06,
      "loss": 0.7919,
      "step": 762920
    },
    {
      "epoch": 2.6738794435857804,
      "grad_norm": 2.90625,
      "learning_rate": 6.039341521478955e-06,
      "loss": 0.7899,
      "step": 762930
    },
    {
      "epoch": 2.673914491092676,
      "grad_norm": 2.953125,
      "learning_rate": 6.038692492815253e-06,
      "loss": 0.7972,
      "step": 762940
    },
    {
      "epoch": 2.6739495385995715,
      "grad_norm": 2.703125,
      "learning_rate": 6.038043464151552e-06,
      "loss": 0.7363,
      "step": 762950
    },
    {
      "epoch": 2.6739845861064673,
      "grad_norm": 2.296875,
      "learning_rate": 6.037394435487849e-06,
      "loss": 0.7979,
      "step": 762960
    },
    {
      "epoch": 2.674019633613363,
      "grad_norm": 3.171875,
      "learning_rate": 6.036745406824147e-06,
      "loss": 0.8921,
      "step": 762970
    },
    {
      "epoch": 2.6740546811202583,
      "grad_norm": 2.640625,
      "learning_rate": 6.036096378160446e-06,
      "loss": 0.7572,
      "step": 762980
    },
    {
      "epoch": 2.674089728627154,
      "grad_norm": 3.140625,
      "learning_rate": 6.035447349496744e-06,
      "loss": 0.7903,
      "step": 762990
    },
    {
      "epoch": 2.67412477613405,
      "grad_norm": 3.171875,
      "learning_rate": 6.034798320833041e-06,
      "loss": 0.8178,
      "step": 763000
    },
    {
      "epoch": 2.674159823640945,
      "grad_norm": 2.84375,
      "learning_rate": 6.03414929216934e-06,
      "loss": 0.8357,
      "step": 763010
    },
    {
      "epoch": 2.674194871147841,
      "grad_norm": 3.125,
      "learning_rate": 6.033500263505638e-06,
      "loss": 0.8326,
      "step": 763020
    },
    {
      "epoch": 2.6742299186547367,
      "grad_norm": 2.65625,
      "learning_rate": 6.032851234841936e-06,
      "loss": 0.782,
      "step": 763030
    },
    {
      "epoch": 2.674264966161632,
      "grad_norm": 3.5625,
      "learning_rate": 6.0322022061782346e-06,
      "loss": 0.7601,
      "step": 763040
    },
    {
      "epoch": 2.6743000136685278,
      "grad_norm": 2.84375,
      "learning_rate": 6.031553177514532e-06,
      "loss": 0.7258,
      "step": 763050
    },
    {
      "epoch": 2.674335061175423,
      "grad_norm": 3.140625,
      "learning_rate": 6.03090414885083e-06,
      "loss": 0.8371,
      "step": 763060
    },
    {
      "epoch": 2.674370108682319,
      "grad_norm": 2.796875,
      "learning_rate": 6.0302551201871286e-06,
      "loss": 0.7993,
      "step": 763070
    },
    {
      "epoch": 2.6744051561892146,
      "grad_norm": 2.828125,
      "learning_rate": 6.0296060915234266e-06,
      "loss": 0.7748,
      "step": 763080
    },
    {
      "epoch": 2.6744402036961104,
      "grad_norm": 2.734375,
      "learning_rate": 6.0289570628597246e-06,
      "loss": 0.8202,
      "step": 763090
    },
    {
      "epoch": 2.6744752512030057,
      "grad_norm": 3.1875,
      "learning_rate": 6.0283080341960226e-06,
      "loss": 0.8192,
      "step": 763100
    },
    {
      "epoch": 2.6745102987099014,
      "grad_norm": 2.90625,
      "learning_rate": 6.0276590055323206e-06,
      "loss": 0.7441,
      "step": 763110
    },
    {
      "epoch": 2.6745453462167967,
      "grad_norm": 2.84375,
      "learning_rate": 6.0270099768686186e-06,
      "loss": 0.7995,
      "step": 763120
    },
    {
      "epoch": 2.6745803937236925,
      "grad_norm": 3.09375,
      "learning_rate": 6.026360948204917e-06,
      "loss": 0.7958,
      "step": 763130
    },
    {
      "epoch": 2.6746154412305883,
      "grad_norm": 2.6875,
      "learning_rate": 6.025711919541215e-06,
      "loss": 0.8354,
      "step": 763140
    },
    {
      "epoch": 2.6746504887374836,
      "grad_norm": 2.875,
      "learning_rate": 6.0250628908775126e-06,
      "loss": 0.7516,
      "step": 763150
    },
    {
      "epoch": 2.6746855362443793,
      "grad_norm": 3.125,
      "learning_rate": 6.024413862213811e-06,
      "loss": 0.8075,
      "step": 763160
    },
    {
      "epoch": 2.6747205837512746,
      "grad_norm": 2.796875,
      "learning_rate": 6.023764833550109e-06,
      "loss": 0.7855,
      "step": 763170
    },
    {
      "epoch": 2.6747556312581704,
      "grad_norm": 2.921875,
      "learning_rate": 6.023115804886407e-06,
      "loss": 0.7364,
      "step": 763180
    },
    {
      "epoch": 2.674790678765066,
      "grad_norm": 2.5625,
      "learning_rate": 6.022466776222705e-06,
      "loss": 0.8378,
      "step": 763190
    },
    {
      "epoch": 2.674825726271962,
      "grad_norm": 3.015625,
      "learning_rate": 6.021817747559003e-06,
      "loss": 0.7937,
      "step": 763200
    },
    {
      "epoch": 2.6748607737788572,
      "grad_norm": 3.171875,
      "learning_rate": 6.021168718895301e-06,
      "loss": 0.8267,
      "step": 763210
    },
    {
      "epoch": 2.674895821285753,
      "grad_norm": 2.609375,
      "learning_rate": 6.020519690231599e-06,
      "loss": 0.733,
      "step": 763220
    },
    {
      "epoch": 2.6749308687926483,
      "grad_norm": 2.921875,
      "learning_rate": 6.019870661567898e-06,
      "loss": 0.7909,
      "step": 763230
    },
    {
      "epoch": 2.674965916299544,
      "grad_norm": 2.828125,
      "learning_rate": 6.019221632904195e-06,
      "loss": 0.9445,
      "step": 763240
    },
    {
      "epoch": 2.67500096380644,
      "grad_norm": 3.25,
      "learning_rate": 6.018572604240494e-06,
      "loss": 0.7854,
      "step": 763250
    },
    {
      "epoch": 2.675036011313335,
      "grad_norm": 2.921875,
      "learning_rate": 6.017923575576792e-06,
      "loss": 0.8011,
      "step": 763260
    },
    {
      "epoch": 2.675071058820231,
      "grad_norm": 2.625,
      "learning_rate": 6.01727454691309e-06,
      "loss": 0.7898,
      "step": 763270
    },
    {
      "epoch": 2.675106106327126,
      "grad_norm": 3.125,
      "learning_rate": 6.016625518249388e-06,
      "loss": 0.8718,
      "step": 763280
    },
    {
      "epoch": 2.675141153834022,
      "grad_norm": 2.796875,
      "learning_rate": 6.015976489585686e-06,
      "loss": 0.8304,
      "step": 763290
    },
    {
      "epoch": 2.6751762013409177,
      "grad_norm": 2.625,
      "learning_rate": 6.015327460921984e-06,
      "loss": 0.7562,
      "step": 763300
    },
    {
      "epoch": 2.6752112488478135,
      "grad_norm": 2.953125,
      "learning_rate": 6.014678432258282e-06,
      "loss": 0.8525,
      "step": 763310
    },
    {
      "epoch": 2.675246296354709,
      "grad_norm": 2.875,
      "learning_rate": 6.014029403594581e-06,
      "loss": 0.8279,
      "step": 763320
    },
    {
      "epoch": 2.6752813438616045,
      "grad_norm": 3.328125,
      "learning_rate": 6.013380374930878e-06,
      "loss": 0.7896,
      "step": 763330
    },
    {
      "epoch": 2.6753163913685,
      "grad_norm": 3.265625,
      "learning_rate": 6.012731346267176e-06,
      "loss": 0.8276,
      "step": 763340
    },
    {
      "epoch": 2.6753514388753956,
      "grad_norm": 2.84375,
      "learning_rate": 6.012082317603475e-06,
      "loss": 0.7735,
      "step": 763350
    },
    {
      "epoch": 2.6753864863822914,
      "grad_norm": 2.703125,
      "learning_rate": 6.011433288939773e-06,
      "loss": 0.6254,
      "step": 763360
    },
    {
      "epoch": 2.6754215338891867,
      "grad_norm": 2.90625,
      "learning_rate": 6.010784260276071e-06,
      "loss": 0.7561,
      "step": 763370
    },
    {
      "epoch": 2.6754565813960824,
      "grad_norm": 2.609375,
      "learning_rate": 6.010135231612369e-06,
      "loss": 0.7833,
      "step": 763380
    },
    {
      "epoch": 2.6754916289029778,
      "grad_norm": 3.265625,
      "learning_rate": 6.009486202948667e-06,
      "loss": 0.7477,
      "step": 763390
    },
    {
      "epoch": 2.6755266764098735,
      "grad_norm": 2.734375,
      "learning_rate": 6.008837174284965e-06,
      "loss": 0.7431,
      "step": 763400
    },
    {
      "epoch": 2.6755617239167693,
      "grad_norm": 3.46875,
      "learning_rate": 6.008188145621264e-06,
      "loss": 0.7676,
      "step": 763410
    },
    {
      "epoch": 2.675596771423665,
      "grad_norm": 3.859375,
      "learning_rate": 6.007539116957562e-06,
      "loss": 0.8337,
      "step": 763420
    },
    {
      "epoch": 2.6756318189305603,
      "grad_norm": 3.296875,
      "learning_rate": 6.006890088293859e-06,
      "loss": 0.9177,
      "step": 763430
    },
    {
      "epoch": 2.675666866437456,
      "grad_norm": 2.8125,
      "learning_rate": 6.006241059630158e-06,
      "loss": 0.836,
      "step": 763440
    },
    {
      "epoch": 2.6757019139443514,
      "grad_norm": 2.921875,
      "learning_rate": 6.005592030966456e-06,
      "loss": 0.7029,
      "step": 763450
    },
    {
      "epoch": 2.675736961451247,
      "grad_norm": 2.578125,
      "learning_rate": 6.004943002302754e-06,
      "loss": 0.7698,
      "step": 763460
    },
    {
      "epoch": 2.675772008958143,
      "grad_norm": 2.625,
      "learning_rate": 6.004293973639052e-06,
      "loss": 0.756,
      "step": 763470
    },
    {
      "epoch": 2.6758070564650382,
      "grad_norm": 3.46875,
      "learning_rate": 6.00364494497535e-06,
      "loss": 0.853,
      "step": 763480
    },
    {
      "epoch": 2.675842103971934,
      "grad_norm": 4.9375,
      "learning_rate": 6.002995916311648e-06,
      "loss": 0.7097,
      "step": 763490
    },
    {
      "epoch": 2.6758771514788293,
      "grad_norm": 2.828125,
      "learning_rate": 6.002346887647947e-06,
      "loss": 0.7321,
      "step": 763500
    },
    {
      "epoch": 2.675912198985725,
      "grad_norm": 2.84375,
      "learning_rate": 6.001697858984245e-06,
      "loss": 0.6878,
      "step": 763510
    },
    {
      "epoch": 2.675947246492621,
      "grad_norm": 2.84375,
      "learning_rate": 6.001048830320542e-06,
      "loss": 0.7718,
      "step": 763520
    },
    {
      "epoch": 2.6759822939995166,
      "grad_norm": 2.890625,
      "learning_rate": 6.000399801656841e-06,
      "loss": 0.7383,
      "step": 763530
    },
    {
      "epoch": 2.676017341506412,
      "grad_norm": 3.265625,
      "learning_rate": 5.999750772993139e-06,
      "loss": 0.8245,
      "step": 763540
    },
    {
      "epoch": 2.6760523890133077,
      "grad_norm": 2.8125,
      "learning_rate": 5.999101744329437e-06,
      "loss": 0.8279,
      "step": 763550
    },
    {
      "epoch": 2.676087436520203,
      "grad_norm": 2.8125,
      "learning_rate": 5.998452715665736e-06,
      "loss": 0.7991,
      "step": 763560
    },
    {
      "epoch": 2.6761224840270987,
      "grad_norm": 2.9375,
      "learning_rate": 5.997803687002033e-06,
      "loss": 0.8065,
      "step": 763570
    },
    {
      "epoch": 2.6761575315339945,
      "grad_norm": 2.96875,
      "learning_rate": 5.997154658338331e-06,
      "loss": 0.8332,
      "step": 763580
    },
    {
      "epoch": 2.67619257904089,
      "grad_norm": 2.859375,
      "learning_rate": 5.99650562967463e-06,
      "loss": 0.77,
      "step": 763590
    },
    {
      "epoch": 2.6762276265477856,
      "grad_norm": 3.0,
      "learning_rate": 5.995856601010928e-06,
      "loss": 0.7899,
      "step": 763600
    },
    {
      "epoch": 2.676262674054681,
      "grad_norm": 2.671875,
      "learning_rate": 5.995207572347226e-06,
      "loss": 0.8445,
      "step": 763610
    },
    {
      "epoch": 2.6762977215615766,
      "grad_norm": 2.9375,
      "learning_rate": 5.994558543683524e-06,
      "loss": 0.8541,
      "step": 763620
    },
    {
      "epoch": 2.6763327690684724,
      "grad_norm": 2.328125,
      "learning_rate": 5.993909515019822e-06,
      "loss": 0.7535,
      "step": 763630
    },
    {
      "epoch": 2.676367816575368,
      "grad_norm": 2.90625,
      "learning_rate": 5.99326048635612e-06,
      "loss": 0.7688,
      "step": 763640
    },
    {
      "epoch": 2.6764028640822635,
      "grad_norm": 2.296875,
      "learning_rate": 5.992611457692418e-06,
      "loss": 0.7777,
      "step": 763650
    },
    {
      "epoch": 2.6764379115891592,
      "grad_norm": 2.46875,
      "learning_rate": 5.991962429028716e-06,
      "loss": 0.7805,
      "step": 763660
    },
    {
      "epoch": 2.6764729590960545,
      "grad_norm": 3.0625,
      "learning_rate": 5.991313400365014e-06,
      "loss": 0.7908,
      "step": 763670
    },
    {
      "epoch": 2.6765080066029503,
      "grad_norm": 2.75,
      "learning_rate": 5.9906643717013125e-06,
      "loss": 0.8072,
      "step": 763680
    },
    {
      "epoch": 2.676543054109846,
      "grad_norm": 2.59375,
      "learning_rate": 5.9900153430376105e-06,
      "loss": 0.8224,
      "step": 763690
    },
    {
      "epoch": 2.6765781016167414,
      "grad_norm": 2.671875,
      "learning_rate": 5.9893663143739085e-06,
      "loss": 0.8814,
      "step": 763700
    },
    {
      "epoch": 2.676613149123637,
      "grad_norm": 3.4375,
      "learning_rate": 5.9887172857102065e-06,
      "loss": 0.8651,
      "step": 763710
    },
    {
      "epoch": 2.6766481966305324,
      "grad_norm": 3.09375,
      "learning_rate": 5.9880682570465045e-06,
      "loss": 0.8121,
      "step": 763720
    },
    {
      "epoch": 2.676683244137428,
      "grad_norm": 2.984375,
      "learning_rate": 5.9874192283828025e-06,
      "loss": 0.8358,
      "step": 763730
    },
    {
      "epoch": 2.676718291644324,
      "grad_norm": 3.390625,
      "learning_rate": 5.9867701997191005e-06,
      "loss": 0.8981,
      "step": 763740
    },
    {
      "epoch": 2.6767533391512197,
      "grad_norm": 3.890625,
      "learning_rate": 5.986121171055399e-06,
      "loss": 0.9032,
      "step": 763750
    },
    {
      "epoch": 2.676788386658115,
      "grad_norm": 3.078125,
      "learning_rate": 5.9854721423916965e-06,
      "loss": 0.8221,
      "step": 763760
    },
    {
      "epoch": 2.676823434165011,
      "grad_norm": 2.453125,
      "learning_rate": 5.9848231137279945e-06,
      "loss": 0.7793,
      "step": 763770
    },
    {
      "epoch": 2.676858481671906,
      "grad_norm": 2.953125,
      "learning_rate": 5.984174085064293e-06,
      "loss": 0.7788,
      "step": 763780
    },
    {
      "epoch": 2.676893529178802,
      "grad_norm": 3.109375,
      "learning_rate": 5.983525056400591e-06,
      "loss": 0.8244,
      "step": 763790
    },
    {
      "epoch": 2.6769285766856976,
      "grad_norm": 2.90625,
      "learning_rate": 5.982876027736889e-06,
      "loss": 0.8275,
      "step": 763800
    },
    {
      "epoch": 2.676963624192593,
      "grad_norm": 3.40625,
      "learning_rate": 5.982226999073187e-06,
      "loss": 0.7992,
      "step": 763810
    },
    {
      "epoch": 2.6769986716994887,
      "grad_norm": 2.9375,
      "learning_rate": 5.981577970409485e-06,
      "loss": 0.7758,
      "step": 763820
    },
    {
      "epoch": 2.677033719206384,
      "grad_norm": 2.53125,
      "learning_rate": 5.980928941745783e-06,
      "loss": 0.7546,
      "step": 763830
    },
    {
      "epoch": 2.6770687667132798,
      "grad_norm": 3.09375,
      "learning_rate": 5.980279913082082e-06,
      "loss": 0.8429,
      "step": 763840
    },
    {
      "epoch": 2.6771038142201755,
      "grad_norm": 3.03125,
      "learning_rate": 5.979630884418379e-06,
      "loss": 0.7818,
      "step": 763850
    },
    {
      "epoch": 2.6771388617270713,
      "grad_norm": 3.0625,
      "learning_rate": 5.978981855754677e-06,
      "loss": 0.8203,
      "step": 763860
    },
    {
      "epoch": 2.6771739092339666,
      "grad_norm": 2.703125,
      "learning_rate": 5.978332827090976e-06,
      "loss": 0.8401,
      "step": 763870
    },
    {
      "epoch": 2.6772089567408623,
      "grad_norm": 2.59375,
      "learning_rate": 5.977683798427274e-06,
      "loss": 0.7465,
      "step": 763880
    },
    {
      "epoch": 2.6772440042477577,
      "grad_norm": 3.234375,
      "learning_rate": 5.977034769763572e-06,
      "loss": 0.8313,
      "step": 763890
    },
    {
      "epoch": 2.6772790517546534,
      "grad_norm": 2.671875,
      "learning_rate": 5.97638574109987e-06,
      "loss": 0.7842,
      "step": 763900
    },
    {
      "epoch": 2.677314099261549,
      "grad_norm": 2.828125,
      "learning_rate": 5.975736712436168e-06,
      "loss": 0.7922,
      "step": 763910
    },
    {
      "epoch": 2.6773491467684445,
      "grad_norm": 3.578125,
      "learning_rate": 5.975087683772466e-06,
      "loss": 0.7539,
      "step": 763920
    },
    {
      "epoch": 2.6773841942753402,
      "grad_norm": 2.984375,
      "learning_rate": 5.974438655108765e-06,
      "loss": 0.78,
      "step": 763930
    },
    {
      "epoch": 2.6774192417822356,
      "grad_norm": 2.796875,
      "learning_rate": 5.973789626445062e-06,
      "loss": 0.8176,
      "step": 763940
    },
    {
      "epoch": 2.6774542892891313,
      "grad_norm": 2.625,
      "learning_rate": 5.97314059778136e-06,
      "loss": 0.7521,
      "step": 763950
    },
    {
      "epoch": 2.677489336796027,
      "grad_norm": 2.875,
      "learning_rate": 5.972491569117659e-06,
      "loss": 0.7517,
      "step": 763960
    },
    {
      "epoch": 2.677524384302923,
      "grad_norm": 2.40625,
      "learning_rate": 5.971842540453957e-06,
      "loss": 0.7892,
      "step": 763970
    },
    {
      "epoch": 2.677559431809818,
      "grad_norm": 2.875,
      "learning_rate": 5.971193511790255e-06,
      "loss": 0.8053,
      "step": 763980
    },
    {
      "epoch": 2.677594479316714,
      "grad_norm": 2.71875,
      "learning_rate": 5.970544483126553e-06,
      "loss": 0.8036,
      "step": 763990
    },
    {
      "epoch": 2.677629526823609,
      "grad_norm": 2.96875,
      "learning_rate": 5.969895454462851e-06,
      "loss": 0.8028,
      "step": 764000
    },
    {
      "epoch": 2.677664574330505,
      "grad_norm": 3.21875,
      "learning_rate": 5.969246425799149e-06,
      "loss": 0.8028,
      "step": 764010
    },
    {
      "epoch": 2.6776996218374007,
      "grad_norm": 2.75,
      "learning_rate": 5.968597397135448e-06,
      "loss": 0.8885,
      "step": 764020
    },
    {
      "epoch": 2.677734669344296,
      "grad_norm": 3.0625,
      "learning_rate": 5.967948368471746e-06,
      "loss": 0.7642,
      "step": 764030
    },
    {
      "epoch": 2.677769716851192,
      "grad_norm": 3.0625,
      "learning_rate": 5.967299339808043e-06,
      "loss": 0.7535,
      "step": 764040
    },
    {
      "epoch": 2.677804764358087,
      "grad_norm": 2.71875,
      "learning_rate": 5.966650311144342e-06,
      "loss": 0.8214,
      "step": 764050
    },
    {
      "epoch": 2.677839811864983,
      "grad_norm": 3.1875,
      "learning_rate": 5.96600128248064e-06,
      "loss": 0.8677,
      "step": 764060
    },
    {
      "epoch": 2.6778748593718786,
      "grad_norm": 3.203125,
      "learning_rate": 5.965352253816938e-06,
      "loss": 0.8525,
      "step": 764070
    },
    {
      "epoch": 2.6779099068787744,
      "grad_norm": 2.59375,
      "learning_rate": 5.964703225153236e-06,
      "loss": 0.7859,
      "step": 764080
    },
    {
      "epoch": 2.6779449543856697,
      "grad_norm": 3.015625,
      "learning_rate": 5.964054196489534e-06,
      "loss": 0.8175,
      "step": 764090
    },
    {
      "epoch": 2.6779800018925655,
      "grad_norm": 3.109375,
      "learning_rate": 5.963405167825832e-06,
      "loss": 0.8257,
      "step": 764100
    },
    {
      "epoch": 2.678015049399461,
      "grad_norm": 2.6875,
      "learning_rate": 5.962756139162131e-06,
      "loss": 0.7703,
      "step": 764110
    },
    {
      "epoch": 2.6780500969063565,
      "grad_norm": 2.765625,
      "learning_rate": 5.962107110498429e-06,
      "loss": 0.7865,
      "step": 764120
    },
    {
      "epoch": 2.6780851444132523,
      "grad_norm": 3.34375,
      "learning_rate": 5.961458081834726e-06,
      "loss": 0.8682,
      "step": 764130
    },
    {
      "epoch": 2.6781201919201476,
      "grad_norm": 2.8125,
      "learning_rate": 5.960809053171025e-06,
      "loss": 0.7737,
      "step": 764140
    },
    {
      "epoch": 2.6781552394270434,
      "grad_norm": 2.671875,
      "learning_rate": 5.960160024507323e-06,
      "loss": 0.8005,
      "step": 764150
    },
    {
      "epoch": 2.6781902869339387,
      "grad_norm": 3.125,
      "learning_rate": 5.959510995843621e-06,
      "loss": 0.7932,
      "step": 764160
    },
    {
      "epoch": 2.6782253344408344,
      "grad_norm": 2.859375,
      "learning_rate": 5.958861967179919e-06,
      "loss": 0.7585,
      "step": 764170
    },
    {
      "epoch": 2.67826038194773,
      "grad_norm": 2.28125,
      "learning_rate": 5.958212938516217e-06,
      "loss": 0.7761,
      "step": 764180
    },
    {
      "epoch": 2.678295429454626,
      "grad_norm": 2.921875,
      "learning_rate": 5.957563909852515e-06,
      "loss": 0.8143,
      "step": 764190
    },
    {
      "epoch": 2.6783304769615213,
      "grad_norm": 2.75,
      "learning_rate": 5.956914881188813e-06,
      "loss": 0.7183,
      "step": 764200
    },
    {
      "epoch": 2.678365524468417,
      "grad_norm": 3.234375,
      "learning_rate": 5.9562658525251116e-06,
      "loss": 0.8227,
      "step": 764210
    },
    {
      "epoch": 2.6784005719753123,
      "grad_norm": 2.921875,
      "learning_rate": 5.9556168238614096e-06,
      "loss": 0.748,
      "step": 764220
    },
    {
      "epoch": 2.678435619482208,
      "grad_norm": 2.921875,
      "learning_rate": 5.9549677951977076e-06,
      "loss": 0.7983,
      "step": 764230
    },
    {
      "epoch": 2.678470666989104,
      "grad_norm": 2.578125,
      "learning_rate": 5.9543187665340056e-06,
      "loss": 0.7774,
      "step": 764240
    },
    {
      "epoch": 2.678505714495999,
      "grad_norm": 2.609375,
      "learning_rate": 5.9536697378703036e-06,
      "loss": 0.7692,
      "step": 764250
    },
    {
      "epoch": 2.678540762002895,
      "grad_norm": 3.375,
      "learning_rate": 5.9530207092066016e-06,
      "loss": 0.8592,
      "step": 764260
    },
    {
      "epoch": 2.6785758095097907,
      "grad_norm": 2.8125,
      "learning_rate": 5.9523716805428996e-06,
      "loss": 0.8028,
      "step": 764270
    },
    {
      "epoch": 2.678610857016686,
      "grad_norm": 3.078125,
      "learning_rate": 5.9517226518791976e-06,
      "loss": 0.8057,
      "step": 764280
    },
    {
      "epoch": 2.6786459045235818,
      "grad_norm": 3.015625,
      "learning_rate": 5.9510736232154956e-06,
      "loss": 0.8083,
      "step": 764290
    },
    {
      "epoch": 2.6786809520304775,
      "grad_norm": 2.796875,
      "learning_rate": 5.950424594551794e-06,
      "loss": 0.7219,
      "step": 764300
    },
    {
      "epoch": 2.678715999537373,
      "grad_norm": 2.5,
      "learning_rate": 5.949775565888092e-06,
      "loss": 0.7614,
      "step": 764310
    },
    {
      "epoch": 2.6787510470442686,
      "grad_norm": 3.0,
      "learning_rate": 5.9491265372243896e-06,
      "loss": 0.7951,
      "step": 764320
    },
    {
      "epoch": 2.678786094551164,
      "grad_norm": 2.890625,
      "learning_rate": 5.948477508560688e-06,
      "loss": 0.8282,
      "step": 764330
    },
    {
      "epoch": 2.6788211420580597,
      "grad_norm": 2.8125,
      "learning_rate": 5.947828479896986e-06,
      "loss": 0.8347,
      "step": 764340
    },
    {
      "epoch": 2.6788561895649554,
      "grad_norm": 2.9375,
      "learning_rate": 5.947179451233284e-06,
      "loss": 0.8751,
      "step": 764350
    },
    {
      "epoch": 2.6788912370718507,
      "grad_norm": 2.9375,
      "learning_rate": 5.946530422569583e-06,
      "loss": 0.7985,
      "step": 764360
    },
    {
      "epoch": 2.6789262845787465,
      "grad_norm": 3.21875,
      "learning_rate": 5.94588139390588e-06,
      "loss": 0.7051,
      "step": 764370
    },
    {
      "epoch": 2.6789613320856422,
      "grad_norm": 3.28125,
      "learning_rate": 5.945232365242178e-06,
      "loss": 0.8306,
      "step": 764380
    },
    {
      "epoch": 2.6789963795925376,
      "grad_norm": 2.609375,
      "learning_rate": 5.944583336578477e-06,
      "loss": 0.7875,
      "step": 764390
    },
    {
      "epoch": 2.6790314270994333,
      "grad_norm": 2.953125,
      "learning_rate": 5.943934307914775e-06,
      "loss": 0.8268,
      "step": 764400
    },
    {
      "epoch": 2.679066474606329,
      "grad_norm": 3.125,
      "learning_rate": 5.943285279251072e-06,
      "loss": 0.7829,
      "step": 764410
    },
    {
      "epoch": 2.6791015221132244,
      "grad_norm": 2.890625,
      "learning_rate": 5.942636250587371e-06,
      "loss": 0.8458,
      "step": 764420
    },
    {
      "epoch": 2.67913656962012,
      "grad_norm": 2.796875,
      "learning_rate": 5.941987221923669e-06,
      "loss": 0.8215,
      "step": 764430
    },
    {
      "epoch": 2.6791716171270155,
      "grad_norm": 2.59375,
      "learning_rate": 5.941338193259967e-06,
      "loss": 0.7817,
      "step": 764440
    },
    {
      "epoch": 2.679206664633911,
      "grad_norm": 3.203125,
      "learning_rate": 5.940689164596266e-06,
      "loss": 0.8972,
      "step": 764450
    },
    {
      "epoch": 2.679241712140807,
      "grad_norm": 2.625,
      "learning_rate": 5.940040135932563e-06,
      "loss": 0.7576,
      "step": 764460
    },
    {
      "epoch": 2.6792767596477027,
      "grad_norm": 2.609375,
      "learning_rate": 5.939391107268861e-06,
      "loss": 0.7937,
      "step": 764470
    },
    {
      "epoch": 2.679311807154598,
      "grad_norm": 2.859375,
      "learning_rate": 5.93874207860516e-06,
      "loss": 0.8389,
      "step": 764480
    },
    {
      "epoch": 2.679346854661494,
      "grad_norm": 2.890625,
      "learning_rate": 5.938093049941458e-06,
      "loss": 0.9325,
      "step": 764490
    },
    {
      "epoch": 2.679381902168389,
      "grad_norm": 3.484375,
      "learning_rate": 5.937444021277756e-06,
      "loss": 0.7972,
      "step": 764500
    },
    {
      "epoch": 2.679416949675285,
      "grad_norm": 2.578125,
      "learning_rate": 5.936794992614054e-06,
      "loss": 0.7686,
      "step": 764510
    },
    {
      "epoch": 2.6794519971821806,
      "grad_norm": 2.5625,
      "learning_rate": 5.936145963950352e-06,
      "loss": 0.7893,
      "step": 764520
    },
    {
      "epoch": 2.679487044689076,
      "grad_norm": 2.625,
      "learning_rate": 5.93549693528665e-06,
      "loss": 0.772,
      "step": 764530
    },
    {
      "epoch": 2.6795220921959717,
      "grad_norm": 3.109375,
      "learning_rate": 5.934847906622949e-06,
      "loss": 0.8874,
      "step": 764540
    },
    {
      "epoch": 2.679557139702867,
      "grad_norm": 3.171875,
      "learning_rate": 5.934198877959247e-06,
      "loss": 0.8501,
      "step": 764550
    },
    {
      "epoch": 2.6795921872097628,
      "grad_norm": 3.234375,
      "learning_rate": 5.933549849295544e-06,
      "loss": 0.8063,
      "step": 764560
    },
    {
      "epoch": 2.6796272347166585,
      "grad_norm": 2.703125,
      "learning_rate": 5.932900820631843e-06,
      "loss": 0.7896,
      "step": 764570
    },
    {
      "epoch": 2.6796622822235543,
      "grad_norm": 2.8125,
      "learning_rate": 5.932251791968141e-06,
      "loss": 0.8097,
      "step": 764580
    },
    {
      "epoch": 2.6796973297304496,
      "grad_norm": 3.125,
      "learning_rate": 5.931602763304439e-06,
      "loss": 0.818,
      "step": 764590
    },
    {
      "epoch": 2.6797323772373454,
      "grad_norm": 2.953125,
      "learning_rate": 5.930953734640737e-06,
      "loss": 0.7617,
      "step": 764600
    },
    {
      "epoch": 2.6797674247442407,
      "grad_norm": 2.875,
      "learning_rate": 5.930304705977035e-06,
      "loss": 0.8112,
      "step": 764610
    },
    {
      "epoch": 2.6798024722511364,
      "grad_norm": 2.96875,
      "learning_rate": 5.929655677313333e-06,
      "loss": 0.734,
      "step": 764620
    },
    {
      "epoch": 2.679837519758032,
      "grad_norm": 2.6875,
      "learning_rate": 5.929006648649632e-06,
      "loss": 0.7977,
      "step": 764630
    },
    {
      "epoch": 2.6798725672649275,
      "grad_norm": 3.078125,
      "learning_rate": 5.92835761998593e-06,
      "loss": 0.7849,
      "step": 764640
    },
    {
      "epoch": 2.6799076147718233,
      "grad_norm": 3.40625,
      "learning_rate": 5.927708591322227e-06,
      "loss": 0.7627,
      "step": 764650
    },
    {
      "epoch": 2.6799426622787186,
      "grad_norm": 2.625,
      "learning_rate": 5.927059562658526e-06,
      "loss": 0.791,
      "step": 764660
    },
    {
      "epoch": 2.6799777097856143,
      "grad_norm": 3.234375,
      "learning_rate": 5.926410533994824e-06,
      "loss": 0.815,
      "step": 764670
    },
    {
      "epoch": 2.68001275729251,
      "grad_norm": 3.0,
      "learning_rate": 5.925761505331122e-06,
      "loss": 0.8619,
      "step": 764680
    },
    {
      "epoch": 2.680047804799406,
      "grad_norm": 2.90625,
      "learning_rate": 5.92511247666742e-06,
      "loss": 0.8546,
      "step": 764690
    },
    {
      "epoch": 2.680082852306301,
      "grad_norm": 3.421875,
      "learning_rate": 5.924463448003718e-06,
      "loss": 0.8436,
      "step": 764700
    },
    {
      "epoch": 2.680117899813197,
      "grad_norm": 2.75,
      "learning_rate": 5.923814419340016e-06,
      "loss": 0.8131,
      "step": 764710
    },
    {
      "epoch": 2.6801529473200922,
      "grad_norm": 2.84375,
      "learning_rate": 5.923165390676314e-06,
      "loss": 0.7437,
      "step": 764720
    },
    {
      "epoch": 2.680187994826988,
      "grad_norm": 3.484375,
      "learning_rate": 5.922516362012613e-06,
      "loss": 0.8624,
      "step": 764730
    },
    {
      "epoch": 2.6802230423338838,
      "grad_norm": 2.921875,
      "learning_rate": 5.92186733334891e-06,
      "loss": 0.8689,
      "step": 764740
    },
    {
      "epoch": 2.680258089840779,
      "grad_norm": 2.921875,
      "learning_rate": 5.921218304685209e-06,
      "loss": 0.7929,
      "step": 764750
    },
    {
      "epoch": 2.680293137347675,
      "grad_norm": 3.234375,
      "learning_rate": 5.920569276021507e-06,
      "loss": 0.8049,
      "step": 764760
    },
    {
      "epoch": 2.68032818485457,
      "grad_norm": 2.8125,
      "learning_rate": 5.919920247357805e-06,
      "loss": 0.6854,
      "step": 764770
    },
    {
      "epoch": 2.680363232361466,
      "grad_norm": 3.546875,
      "learning_rate": 5.919271218694103e-06,
      "loss": 0.8065,
      "step": 764780
    },
    {
      "epoch": 2.6803982798683617,
      "grad_norm": 2.65625,
      "learning_rate": 5.918622190030401e-06,
      "loss": 0.7657,
      "step": 764790
    },
    {
      "epoch": 2.6804333273752574,
      "grad_norm": 3.15625,
      "learning_rate": 5.917973161366699e-06,
      "loss": 0.8071,
      "step": 764800
    },
    {
      "epoch": 2.6804683748821527,
      "grad_norm": 3.359375,
      "learning_rate": 5.917324132702997e-06,
      "loss": 0.8057,
      "step": 764810
    },
    {
      "epoch": 2.6805034223890485,
      "grad_norm": 2.890625,
      "learning_rate": 5.9166751040392955e-06,
      "loss": 0.7905,
      "step": 764820
    },
    {
      "epoch": 2.680538469895944,
      "grad_norm": 3.75,
      "learning_rate": 5.9160260753755935e-06,
      "loss": 0.7952,
      "step": 764830
    },
    {
      "epoch": 2.6805735174028396,
      "grad_norm": 3.46875,
      "learning_rate": 5.915377046711891e-06,
      "loss": 0.85,
      "step": 764840
    },
    {
      "epoch": 2.6806085649097353,
      "grad_norm": 2.6875,
      "learning_rate": 5.9147280180481895e-06,
      "loss": 0.7567,
      "step": 764850
    },
    {
      "epoch": 2.6806436124166306,
      "grad_norm": 2.875,
      "learning_rate": 5.9140789893844875e-06,
      "loss": 0.8801,
      "step": 764860
    },
    {
      "epoch": 2.6806786599235264,
      "grad_norm": 2.734375,
      "learning_rate": 5.9134299607207855e-06,
      "loss": 0.7052,
      "step": 764870
    },
    {
      "epoch": 2.6807137074304217,
      "grad_norm": 3.328125,
      "learning_rate": 5.912780932057084e-06,
      "loss": 0.8056,
      "step": 764880
    },
    {
      "epoch": 2.6807487549373175,
      "grad_norm": 2.5,
      "learning_rate": 5.9121319033933815e-06,
      "loss": 0.832,
      "step": 764890
    },
    {
      "epoch": 2.680783802444213,
      "grad_norm": 2.6875,
      "learning_rate": 5.9114828747296795e-06,
      "loss": 0.7824,
      "step": 764900
    },
    {
      "epoch": 2.680818849951109,
      "grad_norm": 2.890625,
      "learning_rate": 5.910833846065978e-06,
      "loss": 0.8161,
      "step": 764910
    },
    {
      "epoch": 2.6808538974580043,
      "grad_norm": 2.84375,
      "learning_rate": 5.910184817402276e-06,
      "loss": 0.9288,
      "step": 764920
    },
    {
      "epoch": 2.6808889449649,
      "grad_norm": 3.296875,
      "learning_rate": 5.9095357887385735e-06,
      "loss": 0.8413,
      "step": 764930
    },
    {
      "epoch": 2.6809239924717954,
      "grad_norm": 2.875,
      "learning_rate": 5.908886760074872e-06,
      "loss": 0.788,
      "step": 764940
    },
    {
      "epoch": 2.680959039978691,
      "grad_norm": 2.921875,
      "learning_rate": 5.90823773141117e-06,
      "loss": 0.8524,
      "step": 764950
    },
    {
      "epoch": 2.680994087485587,
      "grad_norm": 2.390625,
      "learning_rate": 5.907588702747468e-06,
      "loss": 0.7866,
      "step": 764960
    },
    {
      "epoch": 2.681029134992482,
      "grad_norm": 2.625,
      "learning_rate": 5.906939674083767e-06,
      "loss": 0.7922,
      "step": 764970
    },
    {
      "epoch": 2.681064182499378,
      "grad_norm": 3.046875,
      "learning_rate": 5.906290645420064e-06,
      "loss": 0.8431,
      "step": 764980
    },
    {
      "epoch": 2.6810992300062733,
      "grad_norm": 3.09375,
      "learning_rate": 5.905641616756362e-06,
      "loss": 0.7898,
      "step": 764990
    },
    {
      "epoch": 2.681134277513169,
      "grad_norm": 2.46875,
      "learning_rate": 5.904992588092661e-06,
      "loss": 0.7369,
      "step": 765000
    },
    {
      "epoch": 2.681134277513169,
      "eval_loss": 0.7537264823913574,
      "eval_runtime": 565.373,
      "eval_samples_per_second": 672.894,
      "eval_steps_per_second": 56.074,
      "step": 765000
    },
    {
      "epoch": 2.6811693250200648,
      "grad_norm": 3.0625,
      "learning_rate": 5.904343559428959e-06,
      "loss": 0.8572,
      "step": 765010
    },
    {
      "epoch": 2.6812043725269605,
      "grad_norm": 3.421875,
      "learning_rate": 5.903694530765257e-06,
      "loss": 0.844,
      "step": 765020
    },
    {
      "epoch": 2.681239420033856,
      "grad_norm": 3.09375,
      "learning_rate": 5.903045502101555e-06,
      "loss": 0.7921,
      "step": 765030
    },
    {
      "epoch": 2.6812744675407516,
      "grad_norm": 2.8125,
      "learning_rate": 5.902396473437853e-06,
      "loss": 0.8417,
      "step": 765040
    },
    {
      "epoch": 2.681309515047647,
      "grad_norm": 2.640625,
      "learning_rate": 5.901747444774151e-06,
      "loss": 0.7367,
      "step": 765050
    },
    {
      "epoch": 2.6813445625545427,
      "grad_norm": 2.9375,
      "learning_rate": 5.90109841611045e-06,
      "loss": 0.8665,
      "step": 765060
    },
    {
      "epoch": 2.6813796100614384,
      "grad_norm": 2.828125,
      "learning_rate": 5.900449387446747e-06,
      "loss": 0.8022,
      "step": 765070
    },
    {
      "epoch": 2.6814146575683337,
      "grad_norm": 2.796875,
      "learning_rate": 5.899800358783045e-06,
      "loss": 0.8373,
      "step": 765080
    },
    {
      "epoch": 2.6814497050752295,
      "grad_norm": 3.34375,
      "learning_rate": 5.899151330119344e-06,
      "loss": 0.7842,
      "step": 765090
    },
    {
      "epoch": 2.681484752582125,
      "grad_norm": 3.0625,
      "learning_rate": 5.898502301455642e-06,
      "loss": 0.7827,
      "step": 765100
    },
    {
      "epoch": 2.6815198000890206,
      "grad_norm": 3.453125,
      "learning_rate": 5.89785327279194e-06,
      "loss": 0.741,
      "step": 765110
    },
    {
      "epoch": 2.6815548475959163,
      "grad_norm": 3.078125,
      "learning_rate": 5.897204244128238e-06,
      "loss": 0.7616,
      "step": 765120
    },
    {
      "epoch": 2.681589895102812,
      "grad_norm": 2.796875,
      "learning_rate": 5.896555215464536e-06,
      "loss": 0.8357,
      "step": 765130
    },
    {
      "epoch": 2.6816249426097074,
      "grad_norm": 2.953125,
      "learning_rate": 5.895906186800834e-06,
      "loss": 0.8627,
      "step": 765140
    },
    {
      "epoch": 2.681659990116603,
      "grad_norm": 3.484375,
      "learning_rate": 5.895257158137132e-06,
      "loss": 0.7937,
      "step": 765150
    },
    {
      "epoch": 2.6816950376234985,
      "grad_norm": 2.9375,
      "learning_rate": 5.894608129473431e-06,
      "loss": 0.8008,
      "step": 765160
    },
    {
      "epoch": 2.6817300851303942,
      "grad_norm": 3.171875,
      "learning_rate": 5.893959100809728e-06,
      "loss": 0.7807,
      "step": 765170
    },
    {
      "epoch": 2.68176513263729,
      "grad_norm": 2.71875,
      "learning_rate": 5.893310072146027e-06,
      "loss": 0.7563,
      "step": 765180
    },
    {
      "epoch": 2.6818001801441853,
      "grad_norm": 3.125,
      "learning_rate": 5.892661043482325e-06,
      "loss": 0.8205,
      "step": 765190
    },
    {
      "epoch": 2.681835227651081,
      "grad_norm": 2.609375,
      "learning_rate": 5.892012014818623e-06,
      "loss": 0.8074,
      "step": 765200
    },
    {
      "epoch": 2.6818702751579764,
      "grad_norm": 3.25,
      "learning_rate": 5.891362986154921e-06,
      "loss": 0.7705,
      "step": 765210
    },
    {
      "epoch": 2.681905322664872,
      "grad_norm": 2.96875,
      "learning_rate": 5.890713957491219e-06,
      "loss": 0.7849,
      "step": 765220
    },
    {
      "epoch": 2.681940370171768,
      "grad_norm": 2.515625,
      "learning_rate": 5.890064928827517e-06,
      "loss": 0.7588,
      "step": 765230
    },
    {
      "epoch": 2.6819754176786637,
      "grad_norm": 2.734375,
      "learning_rate": 5.889415900163815e-06,
      "loss": 0.798,
      "step": 765240
    },
    {
      "epoch": 2.682010465185559,
      "grad_norm": 2.8125,
      "learning_rate": 5.888766871500114e-06,
      "loss": 0.7206,
      "step": 765250
    },
    {
      "epoch": 2.6820455126924547,
      "grad_norm": 3.140625,
      "learning_rate": 5.888117842836411e-06,
      "loss": 0.7153,
      "step": 765260
    },
    {
      "epoch": 2.68208056019935,
      "grad_norm": 2.890625,
      "learning_rate": 5.887468814172709e-06,
      "loss": 0.785,
      "step": 765270
    },
    {
      "epoch": 2.682115607706246,
      "grad_norm": 2.953125,
      "learning_rate": 5.886819785509008e-06,
      "loss": 0.7913,
      "step": 765280
    },
    {
      "epoch": 2.6821506552131416,
      "grad_norm": 3.03125,
      "learning_rate": 5.886170756845306e-06,
      "loss": 0.8602,
      "step": 765290
    },
    {
      "epoch": 2.682185702720037,
      "grad_norm": 2.78125,
      "learning_rate": 5.885521728181604e-06,
      "loss": 0.8179,
      "step": 765300
    },
    {
      "epoch": 2.6822207502269326,
      "grad_norm": 3.0625,
      "learning_rate": 5.884872699517902e-06,
      "loss": 0.8292,
      "step": 765310
    },
    {
      "epoch": 2.682255797733828,
      "grad_norm": 2.65625,
      "learning_rate": 5.8842236708542e-06,
      "loss": 0.8126,
      "step": 765320
    },
    {
      "epoch": 2.6822908452407237,
      "grad_norm": 2.859375,
      "learning_rate": 5.883574642190498e-06,
      "loss": 0.8574,
      "step": 765330
    },
    {
      "epoch": 2.6823258927476195,
      "grad_norm": 2.859375,
      "learning_rate": 5.8829256135267966e-06,
      "loss": 0.7748,
      "step": 765340
    },
    {
      "epoch": 2.682360940254515,
      "grad_norm": 2.75,
      "learning_rate": 5.8822765848630946e-06,
      "loss": 0.7603,
      "step": 765350
    },
    {
      "epoch": 2.6823959877614105,
      "grad_norm": 2.796875,
      "learning_rate": 5.881627556199392e-06,
      "loss": 0.7375,
      "step": 765360
    },
    {
      "epoch": 2.6824310352683063,
      "grad_norm": 3.265625,
      "learning_rate": 5.8809785275356906e-06,
      "loss": 0.8499,
      "step": 765370
    },
    {
      "epoch": 2.6824660827752016,
      "grad_norm": 2.875,
      "learning_rate": 5.8803294988719886e-06,
      "loss": 0.7863,
      "step": 765380
    },
    {
      "epoch": 2.6825011302820974,
      "grad_norm": 3.078125,
      "learning_rate": 5.8796804702082866e-06,
      "loss": 0.8153,
      "step": 765390
    },
    {
      "epoch": 2.682536177788993,
      "grad_norm": 3.0625,
      "learning_rate": 5.8790314415445846e-06,
      "loss": 0.8523,
      "step": 765400
    },
    {
      "epoch": 2.6825712252958884,
      "grad_norm": 3.046875,
      "learning_rate": 5.8783824128808826e-06,
      "loss": 0.7972,
      "step": 765410
    },
    {
      "epoch": 2.682606272802784,
      "grad_norm": 2.84375,
      "learning_rate": 5.8777333842171806e-06,
      "loss": 0.7336,
      "step": 765420
    },
    {
      "epoch": 2.6826413203096795,
      "grad_norm": 2.34375,
      "learning_rate": 5.877084355553479e-06,
      "loss": 0.7579,
      "step": 765430
    },
    {
      "epoch": 2.6826763678165753,
      "grad_norm": 2.953125,
      "learning_rate": 5.876435326889777e-06,
      "loss": 0.7885,
      "step": 765440
    },
    {
      "epoch": 2.682711415323471,
      "grad_norm": 3.125,
      "learning_rate": 5.8757862982260746e-06,
      "loss": 0.7895,
      "step": 765450
    },
    {
      "epoch": 2.6827464628303668,
      "grad_norm": 2.796875,
      "learning_rate": 5.875137269562373e-06,
      "loss": 0.8552,
      "step": 765460
    },
    {
      "epoch": 2.682781510337262,
      "grad_norm": 2.65625,
      "learning_rate": 5.874488240898671e-06,
      "loss": 0.8405,
      "step": 765470
    },
    {
      "epoch": 2.682816557844158,
      "grad_norm": 2.984375,
      "learning_rate": 5.873839212234969e-06,
      "loss": 0.7483,
      "step": 765480
    },
    {
      "epoch": 2.682851605351053,
      "grad_norm": 2.75,
      "learning_rate": 5.873190183571268e-06,
      "loss": 0.7931,
      "step": 765490
    },
    {
      "epoch": 2.682886652857949,
      "grad_norm": 3.15625,
      "learning_rate": 5.872541154907565e-06,
      "loss": 0.8019,
      "step": 765500
    },
    {
      "epoch": 2.6829217003648447,
      "grad_norm": 3.328125,
      "learning_rate": 5.871892126243863e-06,
      "loss": 0.7079,
      "step": 765510
    },
    {
      "epoch": 2.68295674787174,
      "grad_norm": 2.734375,
      "learning_rate": 5.871243097580162e-06,
      "loss": 0.8379,
      "step": 765520
    },
    {
      "epoch": 2.6829917953786357,
      "grad_norm": 2.859375,
      "learning_rate": 5.87059406891646e-06,
      "loss": 0.8066,
      "step": 765530
    },
    {
      "epoch": 2.683026842885531,
      "grad_norm": 2.625,
      "learning_rate": 5.869945040252757e-06,
      "loss": 0.7929,
      "step": 765540
    },
    {
      "epoch": 2.683061890392427,
      "grad_norm": 3.140625,
      "learning_rate": 5.869296011589056e-06,
      "loss": 0.7842,
      "step": 765550
    },
    {
      "epoch": 2.6830969378993226,
      "grad_norm": 2.4375,
      "learning_rate": 5.868646982925354e-06,
      "loss": 0.8867,
      "step": 765560
    },
    {
      "epoch": 2.6831319854062183,
      "grad_norm": 2.453125,
      "learning_rate": 5.867997954261652e-06,
      "loss": 0.7832,
      "step": 765570
    },
    {
      "epoch": 2.6831670329131136,
      "grad_norm": 2.75,
      "learning_rate": 5.86734892559795e-06,
      "loss": 0.8054,
      "step": 765580
    },
    {
      "epoch": 2.6832020804200094,
      "grad_norm": 2.171875,
      "learning_rate": 5.866699896934248e-06,
      "loss": 0.7178,
      "step": 765590
    },
    {
      "epoch": 2.6832371279269047,
      "grad_norm": 2.875,
      "learning_rate": 5.866050868270546e-06,
      "loss": 0.853,
      "step": 765600
    },
    {
      "epoch": 2.6832721754338005,
      "grad_norm": 2.984375,
      "learning_rate": 5.865401839606845e-06,
      "loss": 0.8218,
      "step": 765610
    },
    {
      "epoch": 2.6833072229406962,
      "grad_norm": 3.078125,
      "learning_rate": 5.864752810943143e-06,
      "loss": 0.7913,
      "step": 765620
    },
    {
      "epoch": 2.6833422704475915,
      "grad_norm": 3.140625,
      "learning_rate": 5.864103782279441e-06,
      "loss": 0.9185,
      "step": 765630
    },
    {
      "epoch": 2.6833773179544873,
      "grad_norm": 2.765625,
      "learning_rate": 5.863454753615739e-06,
      "loss": 0.877,
      "step": 765640
    },
    {
      "epoch": 2.683412365461383,
      "grad_norm": 3.140625,
      "learning_rate": 5.862805724952037e-06,
      "loss": 0.7096,
      "step": 765650
    },
    {
      "epoch": 2.6834474129682784,
      "grad_norm": 2.984375,
      "learning_rate": 5.862156696288335e-06,
      "loss": 0.7525,
      "step": 765660
    },
    {
      "epoch": 2.683482460475174,
      "grad_norm": 2.71875,
      "learning_rate": 5.861507667624633e-06,
      "loss": 0.7424,
      "step": 765670
    },
    {
      "epoch": 2.68351750798207,
      "grad_norm": 2.75,
      "learning_rate": 5.860858638960931e-06,
      "loss": 0.7781,
      "step": 765680
    },
    {
      "epoch": 2.683552555488965,
      "grad_norm": 2.609375,
      "learning_rate": 5.860209610297229e-06,
      "loss": 0.7702,
      "step": 765690
    },
    {
      "epoch": 2.683587602995861,
      "grad_norm": 2.84375,
      "learning_rate": 5.859560581633527e-06,
      "loss": 0.7358,
      "step": 765700
    },
    {
      "epoch": 2.6836226505027563,
      "grad_norm": 2.75,
      "learning_rate": 5.858911552969826e-06,
      "loss": 0.7382,
      "step": 765710
    },
    {
      "epoch": 2.683657698009652,
      "grad_norm": 2.890625,
      "learning_rate": 5.858262524306124e-06,
      "loss": 0.8069,
      "step": 765720
    },
    {
      "epoch": 2.683692745516548,
      "grad_norm": 3.203125,
      "learning_rate": 5.857613495642422e-06,
      "loss": 0.814,
      "step": 765730
    },
    {
      "epoch": 2.6837277930234436,
      "grad_norm": 3.03125,
      "learning_rate": 5.85696446697872e-06,
      "loss": 0.7898,
      "step": 765740
    },
    {
      "epoch": 2.683762840530339,
      "grad_norm": 3.1875,
      "learning_rate": 5.856315438315018e-06,
      "loss": 0.8339,
      "step": 765750
    },
    {
      "epoch": 2.6837978880372346,
      "grad_norm": 2.78125,
      "learning_rate": 5.855666409651316e-06,
      "loss": 0.8421,
      "step": 765760
    },
    {
      "epoch": 2.68383293554413,
      "grad_norm": 3.046875,
      "learning_rate": 5.855017380987615e-06,
      "loss": 0.7694,
      "step": 765770
    },
    {
      "epoch": 2.6838679830510257,
      "grad_norm": 2.75,
      "learning_rate": 5.854368352323912e-06,
      "loss": 0.8585,
      "step": 765780
    },
    {
      "epoch": 2.6839030305579215,
      "grad_norm": 2.75,
      "learning_rate": 5.85371932366021e-06,
      "loss": 0.8573,
      "step": 765790
    },
    {
      "epoch": 2.6839380780648168,
      "grad_norm": 2.8125,
      "learning_rate": 5.853070294996509e-06,
      "loss": 0.7815,
      "step": 765800
    },
    {
      "epoch": 2.6839731255717125,
      "grad_norm": 3.46875,
      "learning_rate": 5.852421266332807e-06,
      "loss": 0.836,
      "step": 765810
    },
    {
      "epoch": 2.684008173078608,
      "grad_norm": 2.734375,
      "learning_rate": 5.851772237669105e-06,
      "loss": 0.8898,
      "step": 765820
    },
    {
      "epoch": 2.6840432205855036,
      "grad_norm": 3.265625,
      "learning_rate": 5.851123209005403e-06,
      "loss": 0.774,
      "step": 765830
    },
    {
      "epoch": 2.6840782680923994,
      "grad_norm": 3.078125,
      "learning_rate": 5.850474180341701e-06,
      "loss": 0.8043,
      "step": 765840
    },
    {
      "epoch": 2.684113315599295,
      "grad_norm": 2.890625,
      "learning_rate": 5.849825151677999e-06,
      "loss": 0.7694,
      "step": 765850
    },
    {
      "epoch": 2.6841483631061904,
      "grad_norm": 3.34375,
      "learning_rate": 5.849176123014298e-06,
      "loss": 0.8182,
      "step": 765860
    },
    {
      "epoch": 2.684183410613086,
      "grad_norm": 2.859375,
      "learning_rate": 5.848527094350595e-06,
      "loss": 0.7336,
      "step": 765870
    },
    {
      "epoch": 2.6842184581199815,
      "grad_norm": 3.125,
      "learning_rate": 5.847878065686893e-06,
      "loss": 0.7336,
      "step": 765880
    },
    {
      "epoch": 2.6842535056268773,
      "grad_norm": 2.671875,
      "learning_rate": 5.847229037023192e-06,
      "loss": 0.8226,
      "step": 765890
    },
    {
      "epoch": 2.684288553133773,
      "grad_norm": 3.328125,
      "learning_rate": 5.84658000835949e-06,
      "loss": 0.8867,
      "step": 765900
    },
    {
      "epoch": 2.6843236006406683,
      "grad_norm": 2.78125,
      "learning_rate": 5.845930979695788e-06,
      "loss": 0.8113,
      "step": 765910
    },
    {
      "epoch": 2.684358648147564,
      "grad_norm": 2.859375,
      "learning_rate": 5.845281951032086e-06,
      "loss": 0.7915,
      "step": 765920
    },
    {
      "epoch": 2.6843936956544594,
      "grad_norm": 3.0,
      "learning_rate": 5.844632922368384e-06,
      "loss": 0.7326,
      "step": 765930
    },
    {
      "epoch": 2.684428743161355,
      "grad_norm": 2.9375,
      "learning_rate": 5.843983893704682e-06,
      "loss": 0.8642,
      "step": 765940
    },
    {
      "epoch": 2.684463790668251,
      "grad_norm": 3.1875,
      "learning_rate": 5.8433348650409805e-06,
      "loss": 0.8469,
      "step": 765950
    },
    {
      "epoch": 2.6844988381751467,
      "grad_norm": 2.84375,
      "learning_rate": 5.8426858363772785e-06,
      "loss": 0.7974,
      "step": 765960
    },
    {
      "epoch": 2.684533885682042,
      "grad_norm": 3.03125,
      "learning_rate": 5.842036807713576e-06,
      "loss": 0.815,
      "step": 765970
    },
    {
      "epoch": 2.6845689331889377,
      "grad_norm": 2.703125,
      "learning_rate": 5.8413877790498745e-06,
      "loss": 0.7217,
      "step": 765980
    },
    {
      "epoch": 2.684603980695833,
      "grad_norm": 3.015625,
      "learning_rate": 5.8407387503861725e-06,
      "loss": 0.8398,
      "step": 765990
    },
    {
      "epoch": 2.684639028202729,
      "grad_norm": 3.234375,
      "learning_rate": 5.8400897217224705e-06,
      "loss": 0.8233,
      "step": 766000
    },
    {
      "epoch": 2.6846740757096246,
      "grad_norm": 2.8125,
      "learning_rate": 5.8394406930587685e-06,
      "loss": 0.8246,
      "step": 766010
    },
    {
      "epoch": 2.68470912321652,
      "grad_norm": 3.03125,
      "learning_rate": 5.8387916643950665e-06,
      "loss": 0.8871,
      "step": 766020
    },
    {
      "epoch": 2.6847441707234156,
      "grad_norm": 2.71875,
      "learning_rate": 5.8381426357313645e-06,
      "loss": 0.7773,
      "step": 766030
    },
    {
      "epoch": 2.684779218230311,
      "grad_norm": 3.109375,
      "learning_rate": 5.837493607067663e-06,
      "loss": 0.7852,
      "step": 766040
    },
    {
      "epoch": 2.6848142657372067,
      "grad_norm": 3.78125,
      "learning_rate": 5.836844578403961e-06,
      "loss": 0.8657,
      "step": 766050
    },
    {
      "epoch": 2.6848493132441025,
      "grad_norm": 2.59375,
      "learning_rate": 5.8361955497402585e-06,
      "loss": 0.8145,
      "step": 766060
    },
    {
      "epoch": 2.6848843607509982,
      "grad_norm": 2.875,
      "learning_rate": 5.835546521076557e-06,
      "loss": 0.8716,
      "step": 766070
    },
    {
      "epoch": 2.6849194082578935,
      "grad_norm": 2.53125,
      "learning_rate": 5.834897492412855e-06,
      "loss": 0.787,
      "step": 766080
    },
    {
      "epoch": 2.6849544557647893,
      "grad_norm": 2.828125,
      "learning_rate": 5.834248463749153e-06,
      "loss": 0.8529,
      "step": 766090
    },
    {
      "epoch": 2.6849895032716846,
      "grad_norm": 2.90625,
      "learning_rate": 5.833599435085451e-06,
      "loss": 0.7344,
      "step": 766100
    },
    {
      "epoch": 2.6850245507785804,
      "grad_norm": 3.0,
      "learning_rate": 5.832950406421749e-06,
      "loss": 0.7753,
      "step": 766110
    },
    {
      "epoch": 2.685059598285476,
      "grad_norm": 4.71875,
      "learning_rate": 5.832301377758047e-06,
      "loss": 0.7714,
      "step": 766120
    },
    {
      "epoch": 2.6850946457923714,
      "grad_norm": 2.9375,
      "learning_rate": 5.831652349094345e-06,
      "loss": 0.7986,
      "step": 766130
    },
    {
      "epoch": 2.685129693299267,
      "grad_norm": 3.1875,
      "learning_rate": 5.831003320430644e-06,
      "loss": 0.7449,
      "step": 766140
    },
    {
      "epoch": 2.6851647408061625,
      "grad_norm": 2.953125,
      "learning_rate": 5.830354291766941e-06,
      "loss": 0.7575,
      "step": 766150
    },
    {
      "epoch": 2.6851997883130583,
      "grad_norm": 3.15625,
      "learning_rate": 5.82970526310324e-06,
      "loss": 0.7968,
      "step": 766160
    },
    {
      "epoch": 2.685234835819954,
      "grad_norm": 3.234375,
      "learning_rate": 5.829056234439538e-06,
      "loss": 0.7676,
      "step": 766170
    },
    {
      "epoch": 2.68526988332685,
      "grad_norm": 2.890625,
      "learning_rate": 5.828407205775836e-06,
      "loss": 0.7984,
      "step": 766180
    },
    {
      "epoch": 2.685304930833745,
      "grad_norm": 3.265625,
      "learning_rate": 5.827758177112134e-06,
      "loss": 0.8412,
      "step": 766190
    },
    {
      "epoch": 2.685339978340641,
      "grad_norm": 2.78125,
      "learning_rate": 5.827109148448432e-06,
      "loss": 0.7892,
      "step": 766200
    },
    {
      "epoch": 2.685375025847536,
      "grad_norm": 2.84375,
      "learning_rate": 5.82646011978473e-06,
      "loss": 0.8231,
      "step": 766210
    },
    {
      "epoch": 2.685410073354432,
      "grad_norm": 2.75,
      "learning_rate": 5.825811091121028e-06,
      "loss": 0.764,
      "step": 766220
    },
    {
      "epoch": 2.6854451208613277,
      "grad_norm": 3.171875,
      "learning_rate": 5.825162062457327e-06,
      "loss": 0.778,
      "step": 766230
    },
    {
      "epoch": 2.685480168368223,
      "grad_norm": 3.03125,
      "learning_rate": 5.824513033793625e-06,
      "loss": 0.853,
      "step": 766240
    },
    {
      "epoch": 2.6855152158751188,
      "grad_norm": 2.40625,
      "learning_rate": 5.823864005129922e-06,
      "loss": 0.8274,
      "step": 766250
    },
    {
      "epoch": 2.685550263382014,
      "grad_norm": 3.078125,
      "learning_rate": 5.823214976466221e-06,
      "loss": 0.8308,
      "step": 766260
    },
    {
      "epoch": 2.68558531088891,
      "grad_norm": 3.125,
      "learning_rate": 5.822565947802519e-06,
      "loss": 0.7574,
      "step": 766270
    },
    {
      "epoch": 2.6856203583958056,
      "grad_norm": 3.171875,
      "learning_rate": 5.821916919138817e-06,
      "loss": 0.8981,
      "step": 766280
    },
    {
      "epoch": 2.6856554059027014,
      "grad_norm": 3.15625,
      "learning_rate": 5.821267890475116e-06,
      "loss": 0.7639,
      "step": 766290
    },
    {
      "epoch": 2.6856904534095967,
      "grad_norm": 2.796875,
      "learning_rate": 5.820618861811413e-06,
      "loss": 0.7436,
      "step": 766300
    },
    {
      "epoch": 2.6857255009164924,
      "grad_norm": 2.9375,
      "learning_rate": 5.819969833147711e-06,
      "loss": 0.9105,
      "step": 766310
    },
    {
      "epoch": 2.6857605484233877,
      "grad_norm": 3.125,
      "learning_rate": 5.81932080448401e-06,
      "loss": 0.8433,
      "step": 766320
    },
    {
      "epoch": 2.6857955959302835,
      "grad_norm": 2.890625,
      "learning_rate": 5.818671775820308e-06,
      "loss": 0.8218,
      "step": 766330
    },
    {
      "epoch": 2.6858306434371793,
      "grad_norm": 2.78125,
      "learning_rate": 5.818022747156605e-06,
      "loss": 0.7657,
      "step": 766340
    },
    {
      "epoch": 2.6858656909440746,
      "grad_norm": 3.1875,
      "learning_rate": 5.817373718492904e-06,
      "loss": 0.8122,
      "step": 766350
    },
    {
      "epoch": 2.6859007384509703,
      "grad_norm": 2.609375,
      "learning_rate": 5.816724689829202e-06,
      "loss": 0.7603,
      "step": 766360
    },
    {
      "epoch": 2.6859357859578656,
      "grad_norm": 2.96875,
      "learning_rate": 5.8160756611655e-06,
      "loss": 0.7219,
      "step": 766370
    },
    {
      "epoch": 2.6859708334647614,
      "grad_norm": 2.640625,
      "learning_rate": 5.815426632501799e-06,
      "loss": 0.7821,
      "step": 766380
    },
    {
      "epoch": 2.686005880971657,
      "grad_norm": 3.125,
      "learning_rate": 5.814777603838096e-06,
      "loss": 0.8166,
      "step": 766390
    },
    {
      "epoch": 2.686040928478553,
      "grad_norm": 2.921875,
      "learning_rate": 5.814128575174394e-06,
      "loss": 0.8028,
      "step": 766400
    },
    {
      "epoch": 2.6860759759854482,
      "grad_norm": 3.15625,
      "learning_rate": 5.813479546510693e-06,
      "loss": 0.8228,
      "step": 766410
    },
    {
      "epoch": 2.686111023492344,
      "grad_norm": 3.03125,
      "learning_rate": 5.812830517846991e-06,
      "loss": 0.8257,
      "step": 766420
    },
    {
      "epoch": 2.6861460709992393,
      "grad_norm": 2.921875,
      "learning_rate": 5.812181489183289e-06,
      "loss": 0.8162,
      "step": 766430
    },
    {
      "epoch": 2.686181118506135,
      "grad_norm": 3.09375,
      "learning_rate": 5.811532460519587e-06,
      "loss": 0.7982,
      "step": 766440
    },
    {
      "epoch": 2.686216166013031,
      "grad_norm": 2.546875,
      "learning_rate": 5.810883431855885e-06,
      "loss": 0.7401,
      "step": 766450
    },
    {
      "epoch": 2.686251213519926,
      "grad_norm": 2.609375,
      "learning_rate": 5.810234403192183e-06,
      "loss": 0.8124,
      "step": 766460
    },
    {
      "epoch": 2.686286261026822,
      "grad_norm": 3.03125,
      "learning_rate": 5.8095853745284816e-06,
      "loss": 0.7759,
      "step": 766470
    },
    {
      "epoch": 2.686321308533717,
      "grad_norm": 2.84375,
      "learning_rate": 5.808936345864779e-06,
      "loss": 0.7798,
      "step": 766480
    },
    {
      "epoch": 2.686356356040613,
      "grad_norm": 2.75,
      "learning_rate": 5.808287317201077e-06,
      "loss": 0.7413,
      "step": 766490
    },
    {
      "epoch": 2.6863914035475087,
      "grad_norm": 2.671875,
      "learning_rate": 5.8076382885373756e-06,
      "loss": 0.8663,
      "step": 766500
    },
    {
      "epoch": 2.6864264510544045,
      "grad_norm": 3.203125,
      "learning_rate": 5.8069892598736736e-06,
      "loss": 0.8161,
      "step": 766510
    },
    {
      "epoch": 2.6864614985613,
      "grad_norm": 3.140625,
      "learning_rate": 5.8063402312099716e-06,
      "loss": 0.8174,
      "step": 766520
    },
    {
      "epoch": 2.6864965460681955,
      "grad_norm": 2.875,
      "learning_rate": 5.8056912025462696e-06,
      "loss": 0.77,
      "step": 766530
    },
    {
      "epoch": 2.686531593575091,
      "grad_norm": 2.9375,
      "learning_rate": 5.8050421738825676e-06,
      "loss": 0.8064,
      "step": 766540
    },
    {
      "epoch": 2.6865666410819866,
      "grad_norm": 3.203125,
      "learning_rate": 5.8043931452188656e-06,
      "loss": 0.811,
      "step": 766550
    },
    {
      "epoch": 2.6866016885888824,
      "grad_norm": 3.40625,
      "learning_rate": 5.8037441165551636e-06,
      "loss": 0.8118,
      "step": 766560
    },
    {
      "epoch": 2.6866367360957777,
      "grad_norm": 2.796875,
      "learning_rate": 5.803095087891462e-06,
      "loss": 0.7694,
      "step": 766570
    },
    {
      "epoch": 2.6866717836026734,
      "grad_norm": 2.9375,
      "learning_rate": 5.8024460592277596e-06,
      "loss": 0.8609,
      "step": 766580
    },
    {
      "epoch": 2.6867068311095688,
      "grad_norm": 2.96875,
      "learning_rate": 5.801797030564058e-06,
      "loss": 0.841,
      "step": 766590
    },
    {
      "epoch": 2.6867418786164645,
      "grad_norm": 2.84375,
      "learning_rate": 5.801148001900356e-06,
      "loss": 0.6913,
      "step": 766600
    },
    {
      "epoch": 2.6867769261233603,
      "grad_norm": 2.921875,
      "learning_rate": 5.800498973236654e-06,
      "loss": 0.7878,
      "step": 766610
    },
    {
      "epoch": 2.686811973630256,
      "grad_norm": 2.46875,
      "learning_rate": 5.799849944572952e-06,
      "loss": 0.7963,
      "step": 766620
    },
    {
      "epoch": 2.6868470211371513,
      "grad_norm": 2.875,
      "learning_rate": 5.79920091590925e-06,
      "loss": 0.7631,
      "step": 766630
    },
    {
      "epoch": 2.686882068644047,
      "grad_norm": 3.015625,
      "learning_rate": 5.798551887245548e-06,
      "loss": 0.8951,
      "step": 766640
    },
    {
      "epoch": 2.6869171161509424,
      "grad_norm": 2.9375,
      "learning_rate": 5.797902858581846e-06,
      "loss": 0.8351,
      "step": 766650
    },
    {
      "epoch": 2.686952163657838,
      "grad_norm": 2.75,
      "learning_rate": 5.797253829918145e-06,
      "loss": 0.8059,
      "step": 766660
    },
    {
      "epoch": 2.686987211164734,
      "grad_norm": 3.625,
      "learning_rate": 5.796604801254442e-06,
      "loss": 0.7982,
      "step": 766670
    },
    {
      "epoch": 2.6870222586716292,
      "grad_norm": 2.484375,
      "learning_rate": 5.79595577259074e-06,
      "loss": 0.7892,
      "step": 766680
    },
    {
      "epoch": 2.687057306178525,
      "grad_norm": 2.765625,
      "learning_rate": 5.795306743927039e-06,
      "loss": 0.8265,
      "step": 766690
    },
    {
      "epoch": 2.6870923536854203,
      "grad_norm": 2.90625,
      "learning_rate": 5.794657715263337e-06,
      "loss": 0.7877,
      "step": 766700
    },
    {
      "epoch": 2.687127401192316,
      "grad_norm": 2.359375,
      "learning_rate": 5.794008686599635e-06,
      "loss": 0.8294,
      "step": 766710
    },
    {
      "epoch": 2.687162448699212,
      "grad_norm": 2.46875,
      "learning_rate": 5.793359657935933e-06,
      "loss": 0.7839,
      "step": 766720
    },
    {
      "epoch": 2.6871974962061076,
      "grad_norm": 2.8125,
      "learning_rate": 5.792710629272231e-06,
      "loss": 0.831,
      "step": 766730
    },
    {
      "epoch": 2.687232543713003,
      "grad_norm": 3.578125,
      "learning_rate": 5.792061600608529e-06,
      "loss": 0.6897,
      "step": 766740
    },
    {
      "epoch": 2.6872675912198987,
      "grad_norm": 2.765625,
      "learning_rate": 5.791412571944828e-06,
      "loss": 0.7998,
      "step": 766750
    },
    {
      "epoch": 2.687302638726794,
      "grad_norm": 2.671875,
      "learning_rate": 5.790763543281126e-06,
      "loss": 0.835,
      "step": 766760
    },
    {
      "epoch": 2.6873376862336897,
      "grad_norm": 2.296875,
      "learning_rate": 5.790114514617423e-06,
      "loss": 0.7736,
      "step": 766770
    },
    {
      "epoch": 2.6873727337405855,
      "grad_norm": 2.6875,
      "learning_rate": 5.789465485953722e-06,
      "loss": 0.85,
      "step": 766780
    },
    {
      "epoch": 2.687407781247481,
      "grad_norm": 3.125,
      "learning_rate": 5.78881645729002e-06,
      "loss": 0.8565,
      "step": 766790
    },
    {
      "epoch": 2.6874428287543766,
      "grad_norm": 2.84375,
      "learning_rate": 5.788167428626318e-06,
      "loss": 0.7959,
      "step": 766800
    },
    {
      "epoch": 2.687477876261272,
      "grad_norm": 2.859375,
      "learning_rate": 5.787518399962616e-06,
      "loss": 0.9009,
      "step": 766810
    },
    {
      "epoch": 2.6875129237681676,
      "grad_norm": 3.171875,
      "learning_rate": 5.786869371298914e-06,
      "loss": 0.8183,
      "step": 766820
    },
    {
      "epoch": 2.6875479712750634,
      "grad_norm": 6.875,
      "learning_rate": 5.786220342635212e-06,
      "loss": 0.7959,
      "step": 766830
    },
    {
      "epoch": 2.687583018781959,
      "grad_norm": 2.84375,
      "learning_rate": 5.785571313971511e-06,
      "loss": 0.8105,
      "step": 766840
    },
    {
      "epoch": 2.6876180662888545,
      "grad_norm": 3.125,
      "learning_rate": 5.784922285307809e-06,
      "loss": 0.7886,
      "step": 766850
    },
    {
      "epoch": 2.6876531137957502,
      "grad_norm": 3.140625,
      "learning_rate": 5.784273256644106e-06,
      "loss": 0.724,
      "step": 766860
    },
    {
      "epoch": 2.6876881613026455,
      "grad_norm": 2.78125,
      "learning_rate": 5.783624227980405e-06,
      "loss": 0.8457,
      "step": 766870
    },
    {
      "epoch": 2.6877232088095413,
      "grad_norm": 3.0625,
      "learning_rate": 5.782975199316703e-06,
      "loss": 0.8471,
      "step": 766880
    },
    {
      "epoch": 2.687758256316437,
      "grad_norm": 2.671875,
      "learning_rate": 5.782326170653001e-06,
      "loss": 0.8374,
      "step": 766890
    },
    {
      "epoch": 2.6877933038233324,
      "grad_norm": 2.515625,
      "learning_rate": 5.7816771419893e-06,
      "loss": 0.8244,
      "step": 766900
    },
    {
      "epoch": 2.687828351330228,
      "grad_norm": 3.109375,
      "learning_rate": 5.781028113325597e-06,
      "loss": 0.8304,
      "step": 766910
    },
    {
      "epoch": 2.687863398837124,
      "grad_norm": 3.078125,
      "learning_rate": 5.780379084661895e-06,
      "loss": 0.8001,
      "step": 766920
    },
    {
      "epoch": 2.687898446344019,
      "grad_norm": 2.59375,
      "learning_rate": 5.779730055998194e-06,
      "loss": 0.8216,
      "step": 766930
    },
    {
      "epoch": 2.687933493850915,
      "grad_norm": 3.1875,
      "learning_rate": 5.779081027334492e-06,
      "loss": 0.8121,
      "step": 766940
    },
    {
      "epoch": 2.6879685413578107,
      "grad_norm": 2.796875,
      "learning_rate": 5.778431998670789e-06,
      "loss": 0.7665,
      "step": 766950
    },
    {
      "epoch": 2.688003588864706,
      "grad_norm": 2.828125,
      "learning_rate": 5.777782970007088e-06,
      "loss": 0.8016,
      "step": 766960
    },
    {
      "epoch": 2.688038636371602,
      "grad_norm": 3.296875,
      "learning_rate": 5.777133941343386e-06,
      "loss": 0.8288,
      "step": 766970
    },
    {
      "epoch": 2.688073683878497,
      "grad_norm": 2.9375,
      "learning_rate": 5.776484912679684e-06,
      "loss": 0.7615,
      "step": 766980
    },
    {
      "epoch": 2.688108731385393,
      "grad_norm": 2.75,
      "learning_rate": 5.775835884015983e-06,
      "loss": 0.8336,
      "step": 766990
    },
    {
      "epoch": 2.6881437788922886,
      "grad_norm": 2.875,
      "learning_rate": 5.77518685535228e-06,
      "loss": 0.7144,
      "step": 767000
    },
    {
      "epoch": 2.688178826399184,
      "grad_norm": 3.4375,
      "learning_rate": 5.774537826688578e-06,
      "loss": 0.9122,
      "step": 767010
    },
    {
      "epoch": 2.6882138739060797,
      "grad_norm": 3.5,
      "learning_rate": 5.773888798024877e-06,
      "loss": 0.8124,
      "step": 767020
    },
    {
      "epoch": 2.6882489214129754,
      "grad_norm": 2.609375,
      "learning_rate": 5.773239769361175e-06,
      "loss": 0.6872,
      "step": 767030
    },
    {
      "epoch": 2.6882839689198708,
      "grad_norm": 2.859375,
      "learning_rate": 5.772590740697473e-06,
      "loss": 0.8102,
      "step": 767040
    },
    {
      "epoch": 2.6883190164267665,
      "grad_norm": 2.828125,
      "learning_rate": 5.771941712033771e-06,
      "loss": 0.8213,
      "step": 767050
    },
    {
      "epoch": 2.6883540639336623,
      "grad_norm": 2.71875,
      "learning_rate": 5.771292683370069e-06,
      "loss": 0.7971,
      "step": 767060
    },
    {
      "epoch": 2.6883891114405576,
      "grad_norm": 2.796875,
      "learning_rate": 5.770643654706367e-06,
      "loss": 0.8674,
      "step": 767070
    },
    {
      "epoch": 2.6884241589474533,
      "grad_norm": 2.265625,
      "learning_rate": 5.769994626042665e-06,
      "loss": 0.7738,
      "step": 767080
    },
    {
      "epoch": 2.6884592064543487,
      "grad_norm": 2.625,
      "learning_rate": 5.769345597378963e-06,
      "loss": 0.686,
      "step": 767090
    },
    {
      "epoch": 2.6884942539612444,
      "grad_norm": 2.828125,
      "learning_rate": 5.768696568715261e-06,
      "loss": 0.8744,
      "step": 767100
    },
    {
      "epoch": 2.68852930146814,
      "grad_norm": 2.984375,
      "learning_rate": 5.7680475400515595e-06,
      "loss": 0.7793,
      "step": 767110
    },
    {
      "epoch": 2.688564348975036,
      "grad_norm": 3.390625,
      "learning_rate": 5.7673985113878575e-06,
      "loss": 0.8992,
      "step": 767120
    },
    {
      "epoch": 2.6885993964819312,
      "grad_norm": 3.359375,
      "learning_rate": 5.7667494827241555e-06,
      "loss": 0.8311,
      "step": 767130
    },
    {
      "epoch": 2.688634443988827,
      "grad_norm": 2.84375,
      "learning_rate": 5.7661004540604535e-06,
      "loss": 0.75,
      "step": 767140
    },
    {
      "epoch": 2.6886694914957223,
      "grad_norm": 2.953125,
      "learning_rate": 5.7654514253967515e-06,
      "loss": 0.7362,
      "step": 767150
    },
    {
      "epoch": 2.688704539002618,
      "grad_norm": 2.609375,
      "learning_rate": 5.7648023967330495e-06,
      "loss": 0.8364,
      "step": 767160
    },
    {
      "epoch": 2.688739586509514,
      "grad_norm": 3.015625,
      "learning_rate": 5.7641533680693475e-06,
      "loss": 0.79,
      "step": 767170
    },
    {
      "epoch": 2.688774634016409,
      "grad_norm": 3.046875,
      "learning_rate": 5.763504339405646e-06,
      "loss": 0.7581,
      "step": 767180
    },
    {
      "epoch": 2.688809681523305,
      "grad_norm": 2.703125,
      "learning_rate": 5.7628553107419435e-06,
      "loss": 0.7204,
      "step": 767190
    },
    {
      "epoch": 2.6888447290302,
      "grad_norm": 3.140625,
      "learning_rate": 5.7622062820782415e-06,
      "loss": 0.8443,
      "step": 767200
    },
    {
      "epoch": 2.688879776537096,
      "grad_norm": 3.3125,
      "learning_rate": 5.76155725341454e-06,
      "loss": 0.7139,
      "step": 767210
    },
    {
      "epoch": 2.6889148240439917,
      "grad_norm": 2.640625,
      "learning_rate": 5.760908224750838e-06,
      "loss": 0.7607,
      "step": 767220
    },
    {
      "epoch": 2.6889498715508875,
      "grad_norm": 3.109375,
      "learning_rate": 5.760259196087136e-06,
      "loss": 0.7915,
      "step": 767230
    },
    {
      "epoch": 2.688984919057783,
      "grad_norm": 2.90625,
      "learning_rate": 5.759610167423434e-06,
      "loss": 0.761,
      "step": 767240
    },
    {
      "epoch": 2.6890199665646786,
      "grad_norm": 3.046875,
      "learning_rate": 5.758961138759732e-06,
      "loss": 0.8244,
      "step": 767250
    },
    {
      "epoch": 2.689055014071574,
      "grad_norm": 3.09375,
      "learning_rate": 5.75831211009603e-06,
      "loss": 0.8178,
      "step": 767260
    },
    {
      "epoch": 2.6890900615784696,
      "grad_norm": 2.890625,
      "learning_rate": 5.757663081432329e-06,
      "loss": 0.7418,
      "step": 767270
    },
    {
      "epoch": 2.6891251090853654,
      "grad_norm": 3.078125,
      "learning_rate": 5.757014052768626e-06,
      "loss": 0.8673,
      "step": 767280
    },
    {
      "epoch": 2.6891601565922607,
      "grad_norm": 2.6875,
      "learning_rate": 5.756365024104924e-06,
      "loss": 0.734,
      "step": 767290
    },
    {
      "epoch": 2.6891952040991565,
      "grad_norm": 3.03125,
      "learning_rate": 5.755715995441223e-06,
      "loss": 0.7358,
      "step": 767300
    },
    {
      "epoch": 2.689230251606052,
      "grad_norm": 2.859375,
      "learning_rate": 5.755066966777521e-06,
      "loss": 0.9098,
      "step": 767310
    },
    {
      "epoch": 2.6892652991129475,
      "grad_norm": 3.140625,
      "learning_rate": 5.754417938113819e-06,
      "loss": 0.8597,
      "step": 767320
    },
    {
      "epoch": 2.6893003466198433,
      "grad_norm": 2.671875,
      "learning_rate": 5.753768909450117e-06,
      "loss": 0.7102,
      "step": 767330
    },
    {
      "epoch": 2.689335394126739,
      "grad_norm": 2.890625,
      "learning_rate": 5.753119880786415e-06,
      "loss": 0.833,
      "step": 767340
    },
    {
      "epoch": 2.6893704416336344,
      "grad_norm": 3.5625,
      "learning_rate": 5.752470852122713e-06,
      "loss": 0.7797,
      "step": 767350
    },
    {
      "epoch": 2.68940548914053,
      "grad_norm": 2.953125,
      "learning_rate": 5.751821823459012e-06,
      "loss": 0.7555,
      "step": 767360
    },
    {
      "epoch": 2.6894405366474254,
      "grad_norm": 2.96875,
      "learning_rate": 5.75117279479531e-06,
      "loss": 0.7813,
      "step": 767370
    },
    {
      "epoch": 2.689475584154321,
      "grad_norm": 2.875,
      "learning_rate": 5.750523766131607e-06,
      "loss": 0.7987,
      "step": 767380
    },
    {
      "epoch": 2.689510631661217,
      "grad_norm": 2.765625,
      "learning_rate": 5.749874737467906e-06,
      "loss": 0.7656,
      "step": 767390
    },
    {
      "epoch": 2.6895456791681123,
      "grad_norm": 3.265625,
      "learning_rate": 5.749225708804204e-06,
      "loss": 0.8227,
      "step": 767400
    },
    {
      "epoch": 2.689580726675008,
      "grad_norm": 3.421875,
      "learning_rate": 5.748576680140502e-06,
      "loss": 0.8766,
      "step": 767410
    },
    {
      "epoch": 2.6896157741819033,
      "grad_norm": 3.28125,
      "learning_rate": 5.7479276514768e-06,
      "loss": 0.8081,
      "step": 767420
    },
    {
      "epoch": 2.689650821688799,
      "grad_norm": 2.921875,
      "learning_rate": 5.747278622813098e-06,
      "loss": 0.7708,
      "step": 767430
    },
    {
      "epoch": 2.689685869195695,
      "grad_norm": 3.046875,
      "learning_rate": 5.746629594149396e-06,
      "loss": 0.8424,
      "step": 767440
    },
    {
      "epoch": 2.6897209167025906,
      "grad_norm": 2.890625,
      "learning_rate": 5.745980565485695e-06,
      "loss": 0.7711,
      "step": 767450
    },
    {
      "epoch": 2.689755964209486,
      "grad_norm": 2.953125,
      "learning_rate": 5.745331536821993e-06,
      "loss": 0.7934,
      "step": 767460
    },
    {
      "epoch": 2.6897910117163817,
      "grad_norm": 3.296875,
      "learning_rate": 5.74468250815829e-06,
      "loss": 0.798,
      "step": 767470
    },
    {
      "epoch": 2.689826059223277,
      "grad_norm": 2.78125,
      "learning_rate": 5.744033479494589e-06,
      "loss": 0.7365,
      "step": 767480
    },
    {
      "epoch": 2.6898611067301728,
      "grad_norm": 3.1875,
      "learning_rate": 5.743384450830887e-06,
      "loss": 0.7858,
      "step": 767490
    },
    {
      "epoch": 2.6898961542370685,
      "grad_norm": 2.59375,
      "learning_rate": 5.742735422167185e-06,
      "loss": 0.8405,
      "step": 767500
    },
    {
      "epoch": 2.689931201743964,
      "grad_norm": 2.90625,
      "learning_rate": 5.742086393503483e-06,
      "loss": 0.7284,
      "step": 767510
    },
    {
      "epoch": 2.6899662492508596,
      "grad_norm": 2.828125,
      "learning_rate": 5.741437364839781e-06,
      "loss": 0.7654,
      "step": 767520
    },
    {
      "epoch": 2.690001296757755,
      "grad_norm": 2.546875,
      "learning_rate": 5.740788336176079e-06,
      "loss": 0.7902,
      "step": 767530
    },
    {
      "epoch": 2.6900363442646507,
      "grad_norm": 2.75,
      "learning_rate": 5.740139307512378e-06,
      "loss": 0.7699,
      "step": 767540
    },
    {
      "epoch": 2.6900713917715464,
      "grad_norm": 2.8125,
      "learning_rate": 5.739490278848676e-06,
      "loss": 0.8101,
      "step": 767550
    },
    {
      "epoch": 2.690106439278442,
      "grad_norm": 2.75,
      "learning_rate": 5.738841250184974e-06,
      "loss": 0.7925,
      "step": 767560
    },
    {
      "epoch": 2.6901414867853375,
      "grad_norm": 3.0625,
      "learning_rate": 5.738192221521272e-06,
      "loss": 0.7403,
      "step": 767570
    },
    {
      "epoch": 2.6901765342922332,
      "grad_norm": 2.953125,
      "learning_rate": 5.73754319285757e-06,
      "loss": 0.7794,
      "step": 767580
    },
    {
      "epoch": 2.6902115817991286,
      "grad_norm": 2.984375,
      "learning_rate": 5.736894164193868e-06,
      "loss": 0.8459,
      "step": 767590
    },
    {
      "epoch": 2.6902466293060243,
      "grad_norm": 2.4375,
      "learning_rate": 5.736245135530166e-06,
      "loss": 0.6736,
      "step": 767600
    },
    {
      "epoch": 2.69028167681292,
      "grad_norm": 2.984375,
      "learning_rate": 5.735596106866464e-06,
      "loss": 0.7955,
      "step": 767610
    },
    {
      "epoch": 2.6903167243198154,
      "grad_norm": 2.90625,
      "learning_rate": 5.734947078202762e-06,
      "loss": 0.9001,
      "step": 767620
    },
    {
      "epoch": 2.690351771826711,
      "grad_norm": 3.03125,
      "learning_rate": 5.73429804953906e-06,
      "loss": 0.782,
      "step": 767630
    },
    {
      "epoch": 2.6903868193336065,
      "grad_norm": 2.640625,
      "learning_rate": 5.7336490208753586e-06,
      "loss": 0.7973,
      "step": 767640
    },
    {
      "epoch": 2.690421866840502,
      "grad_norm": 2.78125,
      "learning_rate": 5.7329999922116566e-06,
      "loss": 0.7695,
      "step": 767650
    },
    {
      "epoch": 2.690456914347398,
      "grad_norm": 3.015625,
      "learning_rate": 5.7323509635479546e-06,
      "loss": 0.7642,
      "step": 767660
    },
    {
      "epoch": 2.6904919618542937,
      "grad_norm": 3.125,
      "learning_rate": 5.7317019348842526e-06,
      "loss": 0.7855,
      "step": 767670
    },
    {
      "epoch": 2.690527009361189,
      "grad_norm": 2.921875,
      "learning_rate": 5.7310529062205506e-06,
      "loss": 0.7951,
      "step": 767680
    },
    {
      "epoch": 2.690562056868085,
      "grad_norm": 2.984375,
      "learning_rate": 5.7304038775568486e-06,
      "loss": 0.7666,
      "step": 767690
    },
    {
      "epoch": 2.69059710437498,
      "grad_norm": 2.78125,
      "learning_rate": 5.729754848893147e-06,
      "loss": 0.7571,
      "step": 767700
    },
    {
      "epoch": 2.690632151881876,
      "grad_norm": 2.84375,
      "learning_rate": 5.7291058202294446e-06,
      "loss": 0.883,
      "step": 767710
    },
    {
      "epoch": 2.6906671993887716,
      "grad_norm": 2.234375,
      "learning_rate": 5.7284567915657426e-06,
      "loss": 0.7633,
      "step": 767720
    },
    {
      "epoch": 2.690702246895667,
      "grad_norm": 2.15625,
      "learning_rate": 5.727807762902041e-06,
      "loss": 0.7334,
      "step": 767730
    },
    {
      "epoch": 2.6907372944025627,
      "grad_norm": 2.75,
      "learning_rate": 5.727158734238339e-06,
      "loss": 0.7422,
      "step": 767740
    },
    {
      "epoch": 2.690772341909458,
      "grad_norm": 2.984375,
      "learning_rate": 5.7265097055746366e-06,
      "loss": 0.8018,
      "step": 767750
    },
    {
      "epoch": 2.690807389416354,
      "grad_norm": 3.28125,
      "learning_rate": 5.725860676910935e-06,
      "loss": 0.8428,
      "step": 767760
    },
    {
      "epoch": 2.6908424369232495,
      "grad_norm": 2.46875,
      "learning_rate": 5.725211648247233e-06,
      "loss": 0.7367,
      "step": 767770
    },
    {
      "epoch": 2.6908774844301453,
      "grad_norm": 2.84375,
      "learning_rate": 5.724562619583531e-06,
      "loss": 0.8303,
      "step": 767780
    },
    {
      "epoch": 2.6909125319370406,
      "grad_norm": 2.484375,
      "learning_rate": 5.72391359091983e-06,
      "loss": 0.8088,
      "step": 767790
    },
    {
      "epoch": 2.6909475794439364,
      "grad_norm": 3.15625,
      "learning_rate": 5.723264562256127e-06,
      "loss": 0.7653,
      "step": 767800
    },
    {
      "epoch": 2.6909826269508317,
      "grad_norm": 2.96875,
      "learning_rate": 5.722615533592425e-06,
      "loss": 0.7986,
      "step": 767810
    },
    {
      "epoch": 2.6910176744577274,
      "grad_norm": 2.53125,
      "learning_rate": 5.721966504928724e-06,
      "loss": 0.7719,
      "step": 767820
    },
    {
      "epoch": 2.691052721964623,
      "grad_norm": 2.984375,
      "learning_rate": 5.721317476265022e-06,
      "loss": 0.7333,
      "step": 767830
    },
    {
      "epoch": 2.6910877694715185,
      "grad_norm": 2.484375,
      "learning_rate": 5.72066844760132e-06,
      "loss": 0.7829,
      "step": 767840
    },
    {
      "epoch": 2.6911228169784143,
      "grad_norm": 2.96875,
      "learning_rate": 5.720019418937618e-06,
      "loss": 0.7764,
      "step": 767850
    },
    {
      "epoch": 2.6911578644853096,
      "grad_norm": 2.609375,
      "learning_rate": 5.719370390273916e-06,
      "loss": 0.8692,
      "step": 767860
    },
    {
      "epoch": 2.6911929119922053,
      "grad_norm": 3.125,
      "learning_rate": 5.718721361610214e-06,
      "loss": 0.7911,
      "step": 767870
    },
    {
      "epoch": 2.691227959499101,
      "grad_norm": 3.015625,
      "learning_rate": 5.718072332946513e-06,
      "loss": 0.8376,
      "step": 767880
    },
    {
      "epoch": 2.691263007005997,
      "grad_norm": 2.65625,
      "learning_rate": 5.71742330428281e-06,
      "loss": 0.7717,
      "step": 767890
    },
    {
      "epoch": 2.691298054512892,
      "grad_norm": 2.75,
      "learning_rate": 5.716774275619108e-06,
      "loss": 0.8576,
      "step": 767900
    },
    {
      "epoch": 2.691333102019788,
      "grad_norm": 2.859375,
      "learning_rate": 5.716125246955407e-06,
      "loss": 0.7929,
      "step": 767910
    },
    {
      "epoch": 2.6913681495266832,
      "grad_norm": 3.1875,
      "learning_rate": 5.715476218291705e-06,
      "loss": 0.8072,
      "step": 767920
    },
    {
      "epoch": 2.691403197033579,
      "grad_norm": 2.5,
      "learning_rate": 5.714827189628003e-06,
      "loss": 0.7911,
      "step": 767930
    },
    {
      "epoch": 2.6914382445404748,
      "grad_norm": 2.53125,
      "learning_rate": 5.714178160964301e-06,
      "loss": 0.7662,
      "step": 767940
    },
    {
      "epoch": 2.69147329204737,
      "grad_norm": 3.109375,
      "learning_rate": 5.713529132300599e-06,
      "loss": 0.792,
      "step": 767950
    },
    {
      "epoch": 2.691508339554266,
      "grad_norm": 3.359375,
      "learning_rate": 5.712880103636897e-06,
      "loss": 0.8197,
      "step": 767960
    },
    {
      "epoch": 2.691543387061161,
      "grad_norm": 2.28125,
      "learning_rate": 5.712231074973196e-06,
      "loss": 0.8296,
      "step": 767970
    },
    {
      "epoch": 2.691578434568057,
      "grad_norm": 3.0625,
      "learning_rate": 5.711582046309494e-06,
      "loss": 0.8565,
      "step": 767980
    },
    {
      "epoch": 2.6916134820749527,
      "grad_norm": 2.78125,
      "learning_rate": 5.710933017645791e-06,
      "loss": 0.85,
      "step": 767990
    },
    {
      "epoch": 2.6916485295818484,
      "grad_norm": 3.140625,
      "learning_rate": 5.71028398898209e-06,
      "loss": 0.83,
      "step": 768000
    },
    {
      "epoch": 2.6916835770887437,
      "grad_norm": 2.953125,
      "learning_rate": 5.709634960318388e-06,
      "loss": 0.7591,
      "step": 768010
    },
    {
      "epoch": 2.6917186245956395,
      "grad_norm": 2.796875,
      "learning_rate": 5.708985931654686e-06,
      "loss": 0.834,
      "step": 768020
    },
    {
      "epoch": 2.691753672102535,
      "grad_norm": 2.890625,
      "learning_rate": 5.708336902990984e-06,
      "loss": 0.7553,
      "step": 768030
    },
    {
      "epoch": 2.6917887196094306,
      "grad_norm": 3.09375,
      "learning_rate": 5.707687874327282e-06,
      "loss": 0.8289,
      "step": 768040
    },
    {
      "epoch": 2.6918237671163263,
      "grad_norm": 3.15625,
      "learning_rate": 5.70703884566358e-06,
      "loss": 0.7765,
      "step": 768050
    },
    {
      "epoch": 2.6918588146232216,
      "grad_norm": 3.0625,
      "learning_rate": 5.706389816999878e-06,
      "loss": 0.9049,
      "step": 768060
    },
    {
      "epoch": 2.6918938621301174,
      "grad_norm": 2.765625,
      "learning_rate": 5.705740788336177e-06,
      "loss": 0.8185,
      "step": 768070
    },
    {
      "epoch": 2.6919289096370127,
      "grad_norm": 3.15625,
      "learning_rate": 5.705091759672474e-06,
      "loss": 0.7924,
      "step": 768080
    },
    {
      "epoch": 2.6919639571439085,
      "grad_norm": 3.046875,
      "learning_rate": 5.704442731008773e-06,
      "loss": 0.842,
      "step": 768090
    },
    {
      "epoch": 2.691999004650804,
      "grad_norm": 3.0625,
      "learning_rate": 5.703793702345071e-06,
      "loss": 0.7969,
      "step": 768100
    },
    {
      "epoch": 2.6920340521577,
      "grad_norm": 2.890625,
      "learning_rate": 5.703144673681369e-06,
      "loss": 0.8045,
      "step": 768110
    },
    {
      "epoch": 2.6920690996645953,
      "grad_norm": 2.84375,
      "learning_rate": 5.702495645017667e-06,
      "loss": 0.7946,
      "step": 768120
    },
    {
      "epoch": 2.692104147171491,
      "grad_norm": 2.828125,
      "learning_rate": 5.701846616353965e-06,
      "loss": 0.8335,
      "step": 768130
    },
    {
      "epoch": 2.6921391946783864,
      "grad_norm": 2.765625,
      "learning_rate": 5.701197587690263e-06,
      "loss": 0.758,
      "step": 768140
    },
    {
      "epoch": 2.692174242185282,
      "grad_norm": 2.25,
      "learning_rate": 5.700548559026561e-06,
      "loss": 0.7713,
      "step": 768150
    },
    {
      "epoch": 2.692209289692178,
      "grad_norm": 2.875,
      "learning_rate": 5.69989953036286e-06,
      "loss": 0.726,
      "step": 768160
    },
    {
      "epoch": 2.692244337199073,
      "grad_norm": 2.890625,
      "learning_rate": 5.699250501699158e-06,
      "loss": 0.7853,
      "step": 768170
    },
    {
      "epoch": 2.692279384705969,
      "grad_norm": 2.46875,
      "learning_rate": 5.698601473035455e-06,
      "loss": 0.7913,
      "step": 768180
    },
    {
      "epoch": 2.6923144322128643,
      "grad_norm": 3.1875,
      "learning_rate": 5.697952444371754e-06,
      "loss": 0.8921,
      "step": 768190
    },
    {
      "epoch": 2.69234947971976,
      "grad_norm": 2.59375,
      "learning_rate": 5.697303415708052e-06,
      "loss": 0.8132,
      "step": 768200
    },
    {
      "epoch": 2.6923845272266558,
      "grad_norm": 2.953125,
      "learning_rate": 5.69665438704435e-06,
      "loss": 0.7856,
      "step": 768210
    },
    {
      "epoch": 2.6924195747335515,
      "grad_norm": 2.625,
      "learning_rate": 5.696005358380648e-06,
      "loss": 0.7821,
      "step": 768220
    },
    {
      "epoch": 2.692454622240447,
      "grad_norm": 3.390625,
      "learning_rate": 5.695356329716946e-06,
      "loss": 0.7203,
      "step": 768230
    },
    {
      "epoch": 2.6924896697473426,
      "grad_norm": 2.671875,
      "learning_rate": 5.694707301053244e-06,
      "loss": 0.7171,
      "step": 768240
    },
    {
      "epoch": 2.692524717254238,
      "grad_norm": 2.828125,
      "learning_rate": 5.6940582723895425e-06,
      "loss": 0.7701,
      "step": 768250
    },
    {
      "epoch": 2.6925597647611337,
      "grad_norm": 3.328125,
      "learning_rate": 5.6934092437258405e-06,
      "loss": 0.8062,
      "step": 768260
    },
    {
      "epoch": 2.6925948122680294,
      "grad_norm": 2.828125,
      "learning_rate": 5.692760215062138e-06,
      "loss": 0.8562,
      "step": 768270
    },
    {
      "epoch": 2.6926298597749247,
      "grad_norm": 3.53125,
      "learning_rate": 5.6921111863984365e-06,
      "loss": 0.7555,
      "step": 768280
    },
    {
      "epoch": 2.6926649072818205,
      "grad_norm": 3.0,
      "learning_rate": 5.6914621577347345e-06,
      "loss": 0.7299,
      "step": 768290
    },
    {
      "epoch": 2.6926999547887163,
      "grad_norm": 2.75,
      "learning_rate": 5.6908131290710325e-06,
      "loss": 0.7675,
      "step": 768300
    },
    {
      "epoch": 2.6927350022956116,
      "grad_norm": 3.015625,
      "learning_rate": 5.690164100407331e-06,
      "loss": 0.7938,
      "step": 768310
    },
    {
      "epoch": 2.6927700498025073,
      "grad_norm": 3.359375,
      "learning_rate": 5.6895150717436285e-06,
      "loss": 0.8456,
      "step": 768320
    },
    {
      "epoch": 2.692805097309403,
      "grad_norm": 3.171875,
      "learning_rate": 5.6888660430799265e-06,
      "loss": 0.9039,
      "step": 768330
    },
    {
      "epoch": 2.6928401448162984,
      "grad_norm": 3.0,
      "learning_rate": 5.688217014416225e-06,
      "loss": 0.8311,
      "step": 768340
    },
    {
      "epoch": 2.692875192323194,
      "grad_norm": 3.109375,
      "learning_rate": 5.687567985752523e-06,
      "loss": 0.8529,
      "step": 768350
    },
    {
      "epoch": 2.6929102398300895,
      "grad_norm": 3.109375,
      "learning_rate": 5.6869189570888205e-06,
      "loss": 0.7678,
      "step": 768360
    },
    {
      "epoch": 2.6929452873369852,
      "grad_norm": 3.328125,
      "learning_rate": 5.686269928425119e-06,
      "loss": 0.844,
      "step": 768370
    },
    {
      "epoch": 2.692980334843881,
      "grad_norm": 3.109375,
      "learning_rate": 5.685620899761417e-06,
      "loss": 0.8082,
      "step": 768380
    },
    {
      "epoch": 2.6930153823507768,
      "grad_norm": 2.890625,
      "learning_rate": 5.684971871097715e-06,
      "loss": 0.7858,
      "step": 768390
    },
    {
      "epoch": 2.693050429857672,
      "grad_norm": 2.6875,
      "learning_rate": 5.684322842434014e-06,
      "loss": 0.8025,
      "step": 768400
    },
    {
      "epoch": 2.693085477364568,
      "grad_norm": 2.796875,
      "learning_rate": 5.683673813770311e-06,
      "loss": 0.899,
      "step": 768410
    },
    {
      "epoch": 2.693120524871463,
      "grad_norm": 3.328125,
      "learning_rate": 5.683024785106609e-06,
      "loss": 0.9097,
      "step": 768420
    },
    {
      "epoch": 2.693155572378359,
      "grad_norm": 2.953125,
      "learning_rate": 5.682375756442908e-06,
      "loss": 0.8832,
      "step": 768430
    },
    {
      "epoch": 2.6931906198852547,
      "grad_norm": 2.75,
      "learning_rate": 5.681726727779206e-06,
      "loss": 0.8263,
      "step": 768440
    },
    {
      "epoch": 2.69322566739215,
      "grad_norm": 2.4375,
      "learning_rate": 5.681077699115504e-06,
      "loss": 0.7563,
      "step": 768450
    },
    {
      "epoch": 2.6932607148990457,
      "grad_norm": 2.96875,
      "learning_rate": 5.680428670451802e-06,
      "loss": 0.7709,
      "step": 768460
    },
    {
      "epoch": 2.693295762405941,
      "grad_norm": 2.875,
      "learning_rate": 5.6797796417881e-06,
      "loss": 0.7767,
      "step": 768470
    },
    {
      "epoch": 2.693330809912837,
      "grad_norm": 3.109375,
      "learning_rate": 5.679130613124398e-06,
      "loss": 0.7991,
      "step": 768480
    },
    {
      "epoch": 2.6933658574197326,
      "grad_norm": 2.96875,
      "learning_rate": 5.678481584460696e-06,
      "loss": 0.8832,
      "step": 768490
    },
    {
      "epoch": 2.6934009049266283,
      "grad_norm": 2.796875,
      "learning_rate": 5.677832555796995e-06,
      "loss": 0.74,
      "step": 768500
    },
    {
      "epoch": 2.6934359524335236,
      "grad_norm": 3.296875,
      "learning_rate": 5.677183527133292e-06,
      "loss": 0.8404,
      "step": 768510
    },
    {
      "epoch": 2.6934709999404194,
      "grad_norm": 2.921875,
      "learning_rate": 5.676534498469591e-06,
      "loss": 0.8096,
      "step": 768520
    },
    {
      "epoch": 2.6935060474473147,
      "grad_norm": 2.609375,
      "learning_rate": 5.675885469805889e-06,
      "loss": 0.7789,
      "step": 768530
    },
    {
      "epoch": 2.6935410949542105,
      "grad_norm": 3.0,
      "learning_rate": 5.675236441142187e-06,
      "loss": 0.7758,
      "step": 768540
    },
    {
      "epoch": 2.693576142461106,
      "grad_norm": 2.8125,
      "learning_rate": 5.674587412478485e-06,
      "loss": 0.6989,
      "step": 768550
    },
    {
      "epoch": 2.6936111899680015,
      "grad_norm": 2.75,
      "learning_rate": 5.673938383814783e-06,
      "loss": 0.8144,
      "step": 768560
    },
    {
      "epoch": 2.6936462374748973,
      "grad_norm": 3.140625,
      "learning_rate": 5.673289355151081e-06,
      "loss": 0.8402,
      "step": 768570
    },
    {
      "epoch": 2.6936812849817926,
      "grad_norm": 2.9375,
      "learning_rate": 5.672640326487379e-06,
      "loss": 0.7913,
      "step": 768580
    },
    {
      "epoch": 2.6937163324886884,
      "grad_norm": 2.921875,
      "learning_rate": 5.671991297823678e-06,
      "loss": 0.7662,
      "step": 768590
    },
    {
      "epoch": 2.693751379995584,
      "grad_norm": 2.65625,
      "learning_rate": 5.671342269159975e-06,
      "loss": 0.8244,
      "step": 768600
    },
    {
      "epoch": 2.69378642750248,
      "grad_norm": 2.75,
      "learning_rate": 5.670693240496273e-06,
      "loss": 0.7346,
      "step": 768610
    },
    {
      "epoch": 2.693821475009375,
      "grad_norm": 2.9375,
      "learning_rate": 5.670044211832572e-06,
      "loss": 0.7813,
      "step": 768620
    },
    {
      "epoch": 2.693856522516271,
      "grad_norm": 2.703125,
      "learning_rate": 5.66939518316887e-06,
      "loss": 0.8014,
      "step": 768630
    },
    {
      "epoch": 2.6938915700231663,
      "grad_norm": 2.796875,
      "learning_rate": 5.668746154505168e-06,
      "loss": 0.7821,
      "step": 768640
    },
    {
      "epoch": 2.693926617530062,
      "grad_norm": 2.84375,
      "learning_rate": 5.668097125841466e-06,
      "loss": 0.7704,
      "step": 768650
    },
    {
      "epoch": 2.6939616650369578,
      "grad_norm": 2.796875,
      "learning_rate": 5.667448097177764e-06,
      "loss": 0.7824,
      "step": 768660
    },
    {
      "epoch": 2.693996712543853,
      "grad_norm": 2.875,
      "learning_rate": 5.666799068514062e-06,
      "loss": 0.7925,
      "step": 768670
    },
    {
      "epoch": 2.694031760050749,
      "grad_norm": 3.21875,
      "learning_rate": 5.666150039850361e-06,
      "loss": 0.8495,
      "step": 768680
    },
    {
      "epoch": 2.694066807557644,
      "grad_norm": 2.5625,
      "learning_rate": 5.665501011186658e-06,
      "loss": 0.8608,
      "step": 768690
    },
    {
      "epoch": 2.69410185506454,
      "grad_norm": 2.9375,
      "learning_rate": 5.664851982522956e-06,
      "loss": 0.7804,
      "step": 768700
    },
    {
      "epoch": 2.6941369025714357,
      "grad_norm": 3.1875,
      "learning_rate": 5.664202953859255e-06,
      "loss": 0.8074,
      "step": 768710
    },
    {
      "epoch": 2.6941719500783314,
      "grad_norm": 2.859375,
      "learning_rate": 5.663553925195553e-06,
      "loss": 0.7894,
      "step": 768720
    },
    {
      "epoch": 2.6942069975852267,
      "grad_norm": 3.015625,
      "learning_rate": 5.662904896531851e-06,
      "loss": 0.7733,
      "step": 768730
    },
    {
      "epoch": 2.6942420450921225,
      "grad_norm": 3.71875,
      "learning_rate": 5.662255867868149e-06,
      "loss": 0.82,
      "step": 768740
    },
    {
      "epoch": 2.694277092599018,
      "grad_norm": 2.921875,
      "learning_rate": 5.661606839204447e-06,
      "loss": 0.7769,
      "step": 768750
    },
    {
      "epoch": 2.6943121401059136,
      "grad_norm": 3.0,
      "learning_rate": 5.660957810540745e-06,
      "loss": 0.8006,
      "step": 768760
    },
    {
      "epoch": 2.6943471876128093,
      "grad_norm": 2.828125,
      "learning_rate": 5.6603087818770436e-06,
      "loss": 0.7698,
      "step": 768770
    },
    {
      "epoch": 2.6943822351197046,
      "grad_norm": 2.6875,
      "learning_rate": 5.6596597532133416e-06,
      "loss": 0.7213,
      "step": 768780
    },
    {
      "epoch": 2.6944172826266004,
      "grad_norm": 2.96875,
      "learning_rate": 5.659010724549639e-06,
      "loss": 0.7579,
      "step": 768790
    },
    {
      "epoch": 2.6944523301334957,
      "grad_norm": 3.4375,
      "learning_rate": 5.6583616958859376e-06,
      "loss": 0.9289,
      "step": 768800
    },
    {
      "epoch": 2.6944873776403915,
      "grad_norm": 3.5625,
      "learning_rate": 5.6577126672222356e-06,
      "loss": 0.7944,
      "step": 768810
    },
    {
      "epoch": 2.6945224251472872,
      "grad_norm": 2.8125,
      "learning_rate": 5.6570636385585336e-06,
      "loss": 0.8115,
      "step": 768820
    },
    {
      "epoch": 2.694557472654183,
      "grad_norm": 2.921875,
      "learning_rate": 5.6564146098948316e-06,
      "loss": 0.8582,
      "step": 768830
    },
    {
      "epoch": 2.6945925201610783,
      "grad_norm": 2.59375,
      "learning_rate": 5.6557655812311296e-06,
      "loss": 0.8534,
      "step": 768840
    },
    {
      "epoch": 2.694627567667974,
      "grad_norm": 2.9375,
      "learning_rate": 5.6551165525674276e-06,
      "loss": 0.7814,
      "step": 768850
    },
    {
      "epoch": 2.6946626151748694,
      "grad_norm": 2.890625,
      "learning_rate": 5.654467523903726e-06,
      "loss": 0.7609,
      "step": 768860
    },
    {
      "epoch": 2.694697662681765,
      "grad_norm": 2.75,
      "learning_rate": 5.653818495240024e-06,
      "loss": 0.8151,
      "step": 768870
    },
    {
      "epoch": 2.694732710188661,
      "grad_norm": 2.859375,
      "learning_rate": 5.6531694665763216e-06,
      "loss": 0.9188,
      "step": 768880
    },
    {
      "epoch": 2.694767757695556,
      "grad_norm": 3.46875,
      "learning_rate": 5.65252043791262e-06,
      "loss": 0.8417,
      "step": 768890
    },
    {
      "epoch": 2.694802805202452,
      "grad_norm": 2.921875,
      "learning_rate": 5.651871409248918e-06,
      "loss": 0.7578,
      "step": 768900
    },
    {
      "epoch": 2.6948378527093473,
      "grad_norm": 2.609375,
      "learning_rate": 5.651222380585216e-06,
      "loss": 0.8098,
      "step": 768910
    },
    {
      "epoch": 2.694872900216243,
      "grad_norm": 2.5625,
      "learning_rate": 5.650573351921514e-06,
      "loss": 0.7825,
      "step": 768920
    },
    {
      "epoch": 2.694907947723139,
      "grad_norm": 2.734375,
      "learning_rate": 5.649924323257812e-06,
      "loss": 0.7226,
      "step": 768930
    },
    {
      "epoch": 2.6949429952300346,
      "grad_norm": 3.078125,
      "learning_rate": 5.64927529459411e-06,
      "loss": 0.8327,
      "step": 768940
    },
    {
      "epoch": 2.69497804273693,
      "grad_norm": 2.75,
      "learning_rate": 5.648626265930409e-06,
      "loss": 0.802,
      "step": 768950
    },
    {
      "epoch": 2.6950130902438256,
      "grad_norm": 2.921875,
      "learning_rate": 5.647977237266707e-06,
      "loss": 0.7906,
      "step": 768960
    },
    {
      "epoch": 2.695048137750721,
      "grad_norm": 2.75,
      "learning_rate": 5.647328208603005e-06,
      "loss": 0.7989,
      "step": 768970
    },
    {
      "epoch": 2.6950831852576167,
      "grad_norm": 3.078125,
      "learning_rate": 5.646679179939303e-06,
      "loss": 0.7555,
      "step": 768980
    },
    {
      "epoch": 2.6951182327645125,
      "grad_norm": 3.171875,
      "learning_rate": 5.646030151275601e-06,
      "loss": 0.8655,
      "step": 768990
    },
    {
      "epoch": 2.6951532802714078,
      "grad_norm": 2.734375,
      "learning_rate": 5.645381122611899e-06,
      "loss": 0.8879,
      "step": 769000
    },
    {
      "epoch": 2.6951883277783035,
      "grad_norm": 2.984375,
      "learning_rate": 5.644732093948197e-06,
      "loss": 0.7598,
      "step": 769010
    },
    {
      "epoch": 2.695223375285199,
      "grad_norm": 3.09375,
      "learning_rate": 5.644083065284495e-06,
      "loss": 0.8867,
      "step": 769020
    },
    {
      "epoch": 2.6952584227920946,
      "grad_norm": 3.109375,
      "learning_rate": 5.643434036620793e-06,
      "loss": 0.7861,
      "step": 769030
    },
    {
      "epoch": 2.6952934702989904,
      "grad_norm": 2.53125,
      "learning_rate": 5.642785007957091e-06,
      "loss": 0.6832,
      "step": 769040
    },
    {
      "epoch": 2.695328517805886,
      "grad_norm": 2.9375,
      "learning_rate": 5.64213597929339e-06,
      "loss": 0.8331,
      "step": 769050
    },
    {
      "epoch": 2.6953635653127814,
      "grad_norm": 3.15625,
      "learning_rate": 5.641486950629688e-06,
      "loss": 0.8081,
      "step": 769060
    },
    {
      "epoch": 2.695398612819677,
      "grad_norm": 3.046875,
      "learning_rate": 5.640837921965986e-06,
      "loss": 0.8755,
      "step": 769070
    },
    {
      "epoch": 2.6954336603265725,
      "grad_norm": 3.234375,
      "learning_rate": 5.640188893302284e-06,
      "loss": 0.8639,
      "step": 769080
    },
    {
      "epoch": 2.6954687078334683,
      "grad_norm": 2.828125,
      "learning_rate": 5.639539864638582e-06,
      "loss": 0.794,
      "step": 769090
    },
    {
      "epoch": 2.695503755340364,
      "grad_norm": 2.984375,
      "learning_rate": 5.63889083597488e-06,
      "loss": 0.8806,
      "step": 769100
    },
    {
      "epoch": 2.6955388028472593,
      "grad_norm": 2.84375,
      "learning_rate": 5.638241807311179e-06,
      "loss": 0.7656,
      "step": 769110
    },
    {
      "epoch": 2.695573850354155,
      "grad_norm": 3.0625,
      "learning_rate": 5.637592778647476e-06,
      "loss": 0.8543,
      "step": 769120
    },
    {
      "epoch": 2.6956088978610504,
      "grad_norm": 3.078125,
      "learning_rate": 5.636943749983774e-06,
      "loss": 0.8252,
      "step": 769130
    },
    {
      "epoch": 2.695643945367946,
      "grad_norm": 2.828125,
      "learning_rate": 5.636294721320073e-06,
      "loss": 0.7591,
      "step": 769140
    },
    {
      "epoch": 2.695678992874842,
      "grad_norm": 2.96875,
      "learning_rate": 5.635645692656371e-06,
      "loss": 0.7773,
      "step": 769150
    },
    {
      "epoch": 2.6957140403817377,
      "grad_norm": 2.59375,
      "learning_rate": 5.634996663992668e-06,
      "loss": 0.7554,
      "step": 769160
    },
    {
      "epoch": 2.695749087888633,
      "grad_norm": 2.671875,
      "learning_rate": 5.634347635328967e-06,
      "loss": 0.7873,
      "step": 769170
    },
    {
      "epoch": 2.6957841353955287,
      "grad_norm": 2.609375,
      "learning_rate": 5.633698606665265e-06,
      "loss": 0.7939,
      "step": 769180
    },
    {
      "epoch": 2.695819182902424,
      "grad_norm": 3.203125,
      "learning_rate": 5.633049578001563e-06,
      "loss": 0.8769,
      "step": 769190
    },
    {
      "epoch": 2.69585423040932,
      "grad_norm": 3.140625,
      "learning_rate": 5.632400549337862e-06,
      "loss": 0.8577,
      "step": 769200
    },
    {
      "epoch": 2.6958892779162156,
      "grad_norm": 2.6875,
      "learning_rate": 5.631751520674159e-06,
      "loss": 0.7462,
      "step": 769210
    },
    {
      "epoch": 2.695924325423111,
      "grad_norm": 2.96875,
      "learning_rate": 5.631102492010457e-06,
      "loss": 0.8197,
      "step": 769220
    },
    {
      "epoch": 2.6959593729300066,
      "grad_norm": 2.875,
      "learning_rate": 5.630453463346756e-06,
      "loss": 0.7209,
      "step": 769230
    },
    {
      "epoch": 2.695994420436902,
      "grad_norm": 3.296875,
      "learning_rate": 5.629804434683054e-06,
      "loss": 0.8584,
      "step": 769240
    },
    {
      "epoch": 2.6960294679437977,
      "grad_norm": 3.3125,
      "learning_rate": 5.629155406019352e-06,
      "loss": 0.8208,
      "step": 769250
    },
    {
      "epoch": 2.6960645154506935,
      "grad_norm": 2.890625,
      "learning_rate": 5.62850637735565e-06,
      "loss": 0.8574,
      "step": 769260
    },
    {
      "epoch": 2.6960995629575892,
      "grad_norm": 3.1875,
      "learning_rate": 5.627857348691948e-06,
      "loss": 0.7712,
      "step": 769270
    },
    {
      "epoch": 2.6961346104644845,
      "grad_norm": 3.265625,
      "learning_rate": 5.627208320028246e-06,
      "loss": 0.7666,
      "step": 769280
    },
    {
      "epoch": 2.6961696579713803,
      "grad_norm": 3.109375,
      "learning_rate": 5.626559291364545e-06,
      "loss": 0.7917,
      "step": 769290
    },
    {
      "epoch": 2.6962047054782756,
      "grad_norm": 2.9375,
      "learning_rate": 5.625910262700842e-06,
      "loss": 0.7624,
      "step": 769300
    },
    {
      "epoch": 2.6962397529851714,
      "grad_norm": 2.984375,
      "learning_rate": 5.62526123403714e-06,
      "loss": 0.7597,
      "step": 769310
    },
    {
      "epoch": 2.696274800492067,
      "grad_norm": 2.84375,
      "learning_rate": 5.624612205373439e-06,
      "loss": 0.7848,
      "step": 769320
    },
    {
      "epoch": 2.6963098479989625,
      "grad_norm": 3.015625,
      "learning_rate": 5.623963176709737e-06,
      "loss": 0.75,
      "step": 769330
    },
    {
      "epoch": 2.696344895505858,
      "grad_norm": 2.625,
      "learning_rate": 5.623314148046035e-06,
      "loss": 0.7525,
      "step": 769340
    },
    {
      "epoch": 2.6963799430127535,
      "grad_norm": 2.953125,
      "learning_rate": 5.622665119382333e-06,
      "loss": 0.7822,
      "step": 769350
    },
    {
      "epoch": 2.6964149905196493,
      "grad_norm": 3.109375,
      "learning_rate": 5.622016090718631e-06,
      "loss": 0.7592,
      "step": 769360
    },
    {
      "epoch": 2.696450038026545,
      "grad_norm": 3.15625,
      "learning_rate": 5.621367062054929e-06,
      "loss": 0.8709,
      "step": 769370
    },
    {
      "epoch": 2.696485085533441,
      "grad_norm": 2.96875,
      "learning_rate": 5.6207180333912275e-06,
      "loss": 0.7763,
      "step": 769380
    },
    {
      "epoch": 2.696520133040336,
      "grad_norm": 2.6875,
      "learning_rate": 5.6200690047275255e-06,
      "loss": 0.8033,
      "step": 769390
    },
    {
      "epoch": 2.696555180547232,
      "grad_norm": 2.734375,
      "learning_rate": 5.619419976063823e-06,
      "loss": 0.7691,
      "step": 769400
    },
    {
      "epoch": 2.696590228054127,
      "grad_norm": 3.46875,
      "learning_rate": 5.6187709474001215e-06,
      "loss": 0.7767,
      "step": 769410
    },
    {
      "epoch": 2.696625275561023,
      "grad_norm": 2.609375,
      "learning_rate": 5.6181219187364195e-06,
      "loss": 0.7904,
      "step": 769420
    },
    {
      "epoch": 2.6966603230679187,
      "grad_norm": 2.984375,
      "learning_rate": 5.6174728900727175e-06,
      "loss": 0.7766,
      "step": 769430
    },
    {
      "epoch": 2.696695370574814,
      "grad_norm": 2.765625,
      "learning_rate": 5.6168238614090155e-06,
      "loss": 0.7682,
      "step": 769440
    },
    {
      "epoch": 2.6967304180817098,
      "grad_norm": 2.515625,
      "learning_rate": 5.6161748327453135e-06,
      "loss": 0.7874,
      "step": 769450
    },
    {
      "epoch": 2.696765465588605,
      "grad_norm": 3.65625,
      "learning_rate": 5.6155258040816115e-06,
      "loss": 0.8522,
      "step": 769460
    },
    {
      "epoch": 2.696800513095501,
      "grad_norm": 2.796875,
      "learning_rate": 5.61487677541791e-06,
      "loss": 0.7812,
      "step": 769470
    },
    {
      "epoch": 2.6968355606023966,
      "grad_norm": 2.75,
      "learning_rate": 5.614227746754208e-06,
      "loss": 0.8145,
      "step": 769480
    },
    {
      "epoch": 2.6968706081092924,
      "grad_norm": 2.828125,
      "learning_rate": 5.6135787180905055e-06,
      "loss": 0.8522,
      "step": 769490
    },
    {
      "epoch": 2.6969056556161877,
      "grad_norm": 2.765625,
      "learning_rate": 5.612929689426804e-06,
      "loss": 0.7718,
      "step": 769500
    },
    {
      "epoch": 2.6969407031230834,
      "grad_norm": 2.96875,
      "learning_rate": 5.612280660763102e-06,
      "loss": 0.7639,
      "step": 769510
    },
    {
      "epoch": 2.6969757506299787,
      "grad_norm": 2.765625,
      "learning_rate": 5.6116316320994e-06,
      "loss": 0.7776,
      "step": 769520
    },
    {
      "epoch": 2.6970107981368745,
      "grad_norm": 2.953125,
      "learning_rate": 5.610982603435698e-06,
      "loss": 0.8014,
      "step": 769530
    },
    {
      "epoch": 2.6970458456437703,
      "grad_norm": 2.828125,
      "learning_rate": 5.610333574771996e-06,
      "loss": 0.7748,
      "step": 769540
    },
    {
      "epoch": 2.6970808931506656,
      "grad_norm": 2.640625,
      "learning_rate": 5.609684546108294e-06,
      "loss": 0.8311,
      "step": 769550
    },
    {
      "epoch": 2.6971159406575613,
      "grad_norm": 2.84375,
      "learning_rate": 5.609035517444592e-06,
      "loss": 0.7553,
      "step": 769560
    },
    {
      "epoch": 2.697150988164457,
      "grad_norm": 3.1875,
      "learning_rate": 5.608386488780891e-06,
      "loss": 0.8198,
      "step": 769570
    },
    {
      "epoch": 2.6971860356713524,
      "grad_norm": 2.796875,
      "learning_rate": 5.607737460117189e-06,
      "loss": 0.7752,
      "step": 769580
    },
    {
      "epoch": 2.697221083178248,
      "grad_norm": 2.765625,
      "learning_rate": 5.607088431453486e-06,
      "loss": 0.7497,
      "step": 769590
    },
    {
      "epoch": 2.697256130685144,
      "grad_norm": 3.078125,
      "learning_rate": 5.606439402789785e-06,
      "loss": 0.8232,
      "step": 769600
    },
    {
      "epoch": 2.6972911781920392,
      "grad_norm": 2.875,
      "learning_rate": 5.605790374126083e-06,
      "loss": 0.7458,
      "step": 769610
    },
    {
      "epoch": 2.697326225698935,
      "grad_norm": 2.859375,
      "learning_rate": 5.605141345462381e-06,
      "loss": 0.7349,
      "step": 769620
    },
    {
      "epoch": 2.6973612732058303,
      "grad_norm": 2.984375,
      "learning_rate": 5.604492316798679e-06,
      "loss": 0.8435,
      "step": 769630
    },
    {
      "epoch": 2.697396320712726,
      "grad_norm": 2.8125,
      "learning_rate": 5.603843288134977e-06,
      "loss": 0.8115,
      "step": 769640
    },
    {
      "epoch": 2.697431368219622,
      "grad_norm": 3.109375,
      "learning_rate": 5.603194259471275e-06,
      "loss": 0.8974,
      "step": 769650
    },
    {
      "epoch": 2.697466415726517,
      "grad_norm": 2.640625,
      "learning_rate": 5.602545230807574e-06,
      "loss": 0.8394,
      "step": 769660
    },
    {
      "epoch": 2.697501463233413,
      "grad_norm": 3.265625,
      "learning_rate": 5.601896202143872e-06,
      "loss": 0.7794,
      "step": 769670
    },
    {
      "epoch": 2.6975365107403086,
      "grad_norm": 3.359375,
      "learning_rate": 5.601247173480169e-06,
      "loss": 0.8522,
      "step": 769680
    },
    {
      "epoch": 2.697571558247204,
      "grad_norm": 3.453125,
      "learning_rate": 5.600598144816468e-06,
      "loss": 0.8631,
      "step": 769690
    },
    {
      "epoch": 2.6976066057540997,
      "grad_norm": 2.8125,
      "learning_rate": 5.599949116152766e-06,
      "loss": 0.7739,
      "step": 769700
    },
    {
      "epoch": 2.6976416532609955,
      "grad_norm": 2.8125,
      "learning_rate": 5.599300087489064e-06,
      "loss": 0.8457,
      "step": 769710
    },
    {
      "epoch": 2.697676700767891,
      "grad_norm": 3.078125,
      "learning_rate": 5.598651058825363e-06,
      "loss": 0.8182,
      "step": 769720
    },
    {
      "epoch": 2.6977117482747865,
      "grad_norm": 2.578125,
      "learning_rate": 5.59800203016166e-06,
      "loss": 0.7713,
      "step": 769730
    },
    {
      "epoch": 2.697746795781682,
      "grad_norm": 2.828125,
      "learning_rate": 5.597353001497958e-06,
      "loss": 0.8125,
      "step": 769740
    },
    {
      "epoch": 2.6977818432885776,
      "grad_norm": 2.734375,
      "learning_rate": 5.596703972834257e-06,
      "loss": 0.736,
      "step": 769750
    },
    {
      "epoch": 2.6978168907954734,
      "grad_norm": 2.65625,
      "learning_rate": 5.596054944170555e-06,
      "loss": 0.7959,
      "step": 769760
    },
    {
      "epoch": 2.697851938302369,
      "grad_norm": 2.671875,
      "learning_rate": 5.595405915506853e-06,
      "loss": 0.7928,
      "step": 769770
    },
    {
      "epoch": 2.6978869858092644,
      "grad_norm": 2.546875,
      "learning_rate": 5.594756886843151e-06,
      "loss": 0.8162,
      "step": 769780
    },
    {
      "epoch": 2.69792203331616,
      "grad_norm": 3.0625,
      "learning_rate": 5.594107858179449e-06,
      "loss": 0.8626,
      "step": 769790
    },
    {
      "epoch": 2.6979570808230555,
      "grad_norm": 3.03125,
      "learning_rate": 5.593458829515747e-06,
      "loss": 0.8231,
      "step": 769800
    },
    {
      "epoch": 2.6979921283299513,
      "grad_norm": 3.25,
      "learning_rate": 5.592809800852046e-06,
      "loss": 0.8674,
      "step": 769810
    },
    {
      "epoch": 2.698027175836847,
      "grad_norm": 2.375,
      "learning_rate": 5.592160772188343e-06,
      "loss": 0.8209,
      "step": 769820
    },
    {
      "epoch": 2.6980622233437423,
      "grad_norm": 4.0625,
      "learning_rate": 5.591511743524641e-06,
      "loss": 0.7424,
      "step": 769830
    },
    {
      "epoch": 2.698097270850638,
      "grad_norm": 2.421875,
      "learning_rate": 5.59086271486094e-06,
      "loss": 0.8022,
      "step": 769840
    },
    {
      "epoch": 2.6981323183575334,
      "grad_norm": 2.890625,
      "learning_rate": 5.590213686197238e-06,
      "loss": 0.7856,
      "step": 769850
    },
    {
      "epoch": 2.698167365864429,
      "grad_norm": 2.375,
      "learning_rate": 5.589564657533536e-06,
      "loss": 0.8291,
      "step": 769860
    },
    {
      "epoch": 2.698202413371325,
      "grad_norm": 3.375,
      "learning_rate": 5.588915628869834e-06,
      "loss": 0.7788,
      "step": 769870
    },
    {
      "epoch": 2.6982374608782207,
      "grad_norm": 2.890625,
      "learning_rate": 5.588266600206132e-06,
      "loss": 0.8566,
      "step": 769880
    },
    {
      "epoch": 2.698272508385116,
      "grad_norm": 2.96875,
      "learning_rate": 5.58761757154243e-06,
      "loss": 0.887,
      "step": 769890
    },
    {
      "epoch": 2.6983075558920118,
      "grad_norm": 3.21875,
      "learning_rate": 5.5869685428787286e-06,
      "loss": 0.8408,
      "step": 769900
    },
    {
      "epoch": 2.698342603398907,
      "grad_norm": 3.0,
      "learning_rate": 5.5863195142150266e-06,
      "loss": 0.8811,
      "step": 769910
    },
    {
      "epoch": 2.698377650905803,
      "grad_norm": 3.171875,
      "learning_rate": 5.585670485551324e-06,
      "loss": 0.814,
      "step": 769920
    },
    {
      "epoch": 2.6984126984126986,
      "grad_norm": 3.0,
      "learning_rate": 5.5850214568876226e-06,
      "loss": 0.7543,
      "step": 769930
    },
    {
      "epoch": 2.698447745919594,
      "grad_norm": 3.140625,
      "learning_rate": 5.5843724282239206e-06,
      "loss": 0.7486,
      "step": 769940
    },
    {
      "epoch": 2.6984827934264897,
      "grad_norm": 2.78125,
      "learning_rate": 5.5837233995602186e-06,
      "loss": 0.795,
      "step": 769950
    },
    {
      "epoch": 2.698517840933385,
      "grad_norm": 3.078125,
      "learning_rate": 5.5830743708965166e-06,
      "loss": 0.8264,
      "step": 769960
    },
    {
      "epoch": 2.6985528884402807,
      "grad_norm": 2.734375,
      "learning_rate": 5.5824253422328146e-06,
      "loss": 0.7563,
      "step": 769970
    },
    {
      "epoch": 2.6985879359471765,
      "grad_norm": 3.234375,
      "learning_rate": 5.5817763135691126e-06,
      "loss": 0.8552,
      "step": 769980
    },
    {
      "epoch": 2.6986229834540723,
      "grad_norm": 2.953125,
      "learning_rate": 5.5811272849054106e-06,
      "loss": 0.8279,
      "step": 769990
    },
    {
      "epoch": 2.6986580309609676,
      "grad_norm": 3.203125,
      "learning_rate": 5.580478256241709e-06,
      "loss": 0.8124,
      "step": 770000
    },
    {
      "epoch": 2.6986580309609676,
      "eval_loss": 0.7527007460594177,
      "eval_runtime": 553.7,
      "eval_samples_per_second": 687.08,
      "eval_steps_per_second": 57.257,
      "step": 770000
    },
    {
      "epoch": 2.6986930784678633,
      "grad_norm": 3.109375,
      "learning_rate": 5.5798292275780066e-06,
      "loss": 0.7853,
      "step": 770010
    },
    {
      "epoch": 2.6987281259747586,
      "grad_norm": 3.109375,
      "learning_rate": 5.579180198914305e-06,
      "loss": 0.8716,
      "step": 770020
    },
    {
      "epoch": 2.6987631734816544,
      "grad_norm": 2.96875,
      "learning_rate": 5.578531170250603e-06,
      "loss": 0.8213,
      "step": 770030
    },
    {
      "epoch": 2.69879822098855,
      "grad_norm": 2.71875,
      "learning_rate": 5.577882141586901e-06,
      "loss": 0.7533,
      "step": 770040
    },
    {
      "epoch": 2.6988332684954455,
      "grad_norm": 3.234375,
      "learning_rate": 5.577233112923199e-06,
      "loss": 0.8052,
      "step": 770050
    },
    {
      "epoch": 2.6988683160023412,
      "grad_norm": 2.796875,
      "learning_rate": 5.576584084259497e-06,
      "loss": 0.6969,
      "step": 770060
    },
    {
      "epoch": 2.6989033635092365,
      "grad_norm": 2.84375,
      "learning_rate": 5.575935055595795e-06,
      "loss": 0.7895,
      "step": 770070
    },
    {
      "epoch": 2.6989384110161323,
      "grad_norm": 2.65625,
      "learning_rate": 5.575286026932093e-06,
      "loss": 0.8157,
      "step": 770080
    },
    {
      "epoch": 2.698973458523028,
      "grad_norm": 3.03125,
      "learning_rate": 5.574636998268392e-06,
      "loss": 0.8284,
      "step": 770090
    },
    {
      "epoch": 2.699008506029924,
      "grad_norm": 2.90625,
      "learning_rate": 5.573987969604689e-06,
      "loss": 0.8774,
      "step": 770100
    },
    {
      "epoch": 2.699043553536819,
      "grad_norm": 3.203125,
      "learning_rate": 5.573338940940987e-06,
      "loss": 0.8042,
      "step": 770110
    },
    {
      "epoch": 2.699078601043715,
      "grad_norm": 2.90625,
      "learning_rate": 5.572689912277286e-06,
      "loss": 0.7487,
      "step": 770120
    },
    {
      "epoch": 2.69911364855061,
      "grad_norm": 2.953125,
      "learning_rate": 5.572040883613584e-06,
      "loss": 0.7193,
      "step": 770130
    },
    {
      "epoch": 2.699148696057506,
      "grad_norm": 3.015625,
      "learning_rate": 5.571391854949882e-06,
      "loss": 0.8505,
      "step": 770140
    },
    {
      "epoch": 2.6991837435644017,
      "grad_norm": 2.625,
      "learning_rate": 5.57074282628618e-06,
      "loss": 0.8609,
      "step": 770150
    },
    {
      "epoch": 2.699218791071297,
      "grad_norm": 3.0625,
      "learning_rate": 5.570093797622478e-06,
      "loss": 0.7677,
      "step": 770160
    },
    {
      "epoch": 2.699253838578193,
      "grad_norm": 2.890625,
      "learning_rate": 5.569444768958776e-06,
      "loss": 0.7207,
      "step": 770170
    },
    {
      "epoch": 2.699288886085088,
      "grad_norm": 2.890625,
      "learning_rate": 5.568795740295075e-06,
      "loss": 0.7985,
      "step": 770180
    },
    {
      "epoch": 2.699323933591984,
      "grad_norm": 2.8125,
      "learning_rate": 5.568146711631373e-06,
      "loss": 0.7937,
      "step": 770190
    },
    {
      "epoch": 2.6993589810988796,
      "grad_norm": 2.5,
      "learning_rate": 5.56749768296767e-06,
      "loss": 0.8494,
      "step": 770200
    },
    {
      "epoch": 2.6993940286057754,
      "grad_norm": 3.046875,
      "learning_rate": 5.566848654303969e-06,
      "loss": 0.8251,
      "step": 770210
    },
    {
      "epoch": 2.6994290761126707,
      "grad_norm": 3.0625,
      "learning_rate": 5.566199625640267e-06,
      "loss": 0.7799,
      "step": 770220
    },
    {
      "epoch": 2.6994641236195664,
      "grad_norm": 2.828125,
      "learning_rate": 5.565550596976565e-06,
      "loss": 0.8478,
      "step": 770230
    },
    {
      "epoch": 2.6994991711264618,
      "grad_norm": 2.796875,
      "learning_rate": 5.564901568312864e-06,
      "loss": 0.7691,
      "step": 770240
    },
    {
      "epoch": 2.6995342186333575,
      "grad_norm": 2.96875,
      "learning_rate": 5.564252539649161e-06,
      "loss": 0.7821,
      "step": 770250
    },
    {
      "epoch": 2.6995692661402533,
      "grad_norm": 3.71875,
      "learning_rate": 5.563603510985459e-06,
      "loss": 0.823,
      "step": 770260
    },
    {
      "epoch": 2.6996043136471486,
      "grad_norm": 2.984375,
      "learning_rate": 5.562954482321758e-06,
      "loss": 0.7682,
      "step": 770270
    },
    {
      "epoch": 2.6996393611540443,
      "grad_norm": 3.046875,
      "learning_rate": 5.562305453658056e-06,
      "loss": 0.8524,
      "step": 770280
    },
    {
      "epoch": 2.6996744086609397,
      "grad_norm": 3.1875,
      "learning_rate": 5.561656424994353e-06,
      "loss": 0.7637,
      "step": 770290
    },
    {
      "epoch": 2.6997094561678354,
      "grad_norm": 2.71875,
      "learning_rate": 5.561007396330652e-06,
      "loss": 0.7928,
      "step": 770300
    },
    {
      "epoch": 2.699744503674731,
      "grad_norm": 2.75,
      "learning_rate": 5.56035836766695e-06,
      "loss": 0.7762,
      "step": 770310
    },
    {
      "epoch": 2.699779551181627,
      "grad_norm": 2.953125,
      "learning_rate": 5.559709339003248e-06,
      "loss": 0.783,
      "step": 770320
    },
    {
      "epoch": 2.6998145986885222,
      "grad_norm": 2.78125,
      "learning_rate": 5.559060310339547e-06,
      "loss": 0.7815,
      "step": 770330
    },
    {
      "epoch": 2.699849646195418,
      "grad_norm": 2.8125,
      "learning_rate": 5.558411281675844e-06,
      "loss": 0.8782,
      "step": 770340
    },
    {
      "epoch": 2.6998846937023133,
      "grad_norm": 2.640625,
      "learning_rate": 5.557762253012142e-06,
      "loss": 0.8472,
      "step": 770350
    },
    {
      "epoch": 2.699919741209209,
      "grad_norm": 2.890625,
      "learning_rate": 5.557113224348441e-06,
      "loss": 0.9137,
      "step": 770360
    },
    {
      "epoch": 2.699954788716105,
      "grad_norm": 2.84375,
      "learning_rate": 5.556464195684739e-06,
      "loss": 0.8196,
      "step": 770370
    },
    {
      "epoch": 2.699989836223,
      "grad_norm": 3.046875,
      "learning_rate": 5.555815167021037e-06,
      "loss": 0.76,
      "step": 770380
    },
    {
      "epoch": 2.700024883729896,
      "grad_norm": 2.625,
      "learning_rate": 5.555166138357335e-06,
      "loss": 0.8459,
      "step": 770390
    },
    {
      "epoch": 2.700059931236791,
      "grad_norm": 2.5,
      "learning_rate": 5.554517109693633e-06,
      "loss": 0.7479,
      "step": 770400
    },
    {
      "epoch": 2.700094978743687,
      "grad_norm": 2.921875,
      "learning_rate": 5.553868081029931e-06,
      "loss": 0.7742,
      "step": 770410
    },
    {
      "epoch": 2.7001300262505827,
      "grad_norm": 3.40625,
      "learning_rate": 5.553219052366229e-06,
      "loss": 0.7603,
      "step": 770420
    },
    {
      "epoch": 2.7001650737574785,
      "grad_norm": 3.015625,
      "learning_rate": 5.552570023702527e-06,
      "loss": 0.7996,
      "step": 770430
    },
    {
      "epoch": 2.700200121264374,
      "grad_norm": 2.40625,
      "learning_rate": 5.551920995038825e-06,
      "loss": 0.7529,
      "step": 770440
    },
    {
      "epoch": 2.7002351687712696,
      "grad_norm": 2.96875,
      "learning_rate": 5.551271966375124e-06,
      "loss": 0.7685,
      "step": 770450
    },
    {
      "epoch": 2.700270216278165,
      "grad_norm": 3.078125,
      "learning_rate": 5.550622937711422e-06,
      "loss": 0.6922,
      "step": 770460
    },
    {
      "epoch": 2.7003052637850606,
      "grad_norm": 2.59375,
      "learning_rate": 5.54997390904772e-06,
      "loss": 0.7418,
      "step": 770470
    },
    {
      "epoch": 2.7003403112919564,
      "grad_norm": 2.671875,
      "learning_rate": 5.549324880384018e-06,
      "loss": 0.8365,
      "step": 770480
    },
    {
      "epoch": 2.7003753587988517,
      "grad_norm": 3.21875,
      "learning_rate": 5.548675851720316e-06,
      "loss": 0.7763,
      "step": 770490
    },
    {
      "epoch": 2.7004104063057475,
      "grad_norm": 3.25,
      "learning_rate": 5.548026823056614e-06,
      "loss": 0.8356,
      "step": 770500
    },
    {
      "epoch": 2.700445453812643,
      "grad_norm": 2.796875,
      "learning_rate": 5.547377794392912e-06,
      "loss": 0.8423,
      "step": 770510
    },
    {
      "epoch": 2.7004805013195385,
      "grad_norm": 2.734375,
      "learning_rate": 5.5467287657292105e-06,
      "loss": 0.7144,
      "step": 770520
    },
    {
      "epoch": 2.7005155488264343,
      "grad_norm": 3.03125,
      "learning_rate": 5.546079737065508e-06,
      "loss": 0.832,
      "step": 770530
    },
    {
      "epoch": 2.70055059633333,
      "grad_norm": 3.078125,
      "learning_rate": 5.545430708401806e-06,
      "loss": 0.8635,
      "step": 770540
    },
    {
      "epoch": 2.7005856438402254,
      "grad_norm": 2.546875,
      "learning_rate": 5.5447816797381045e-06,
      "loss": 0.7893,
      "step": 770550
    },
    {
      "epoch": 2.700620691347121,
      "grad_norm": 3.078125,
      "learning_rate": 5.5441326510744025e-06,
      "loss": 0.8408,
      "step": 770560
    },
    {
      "epoch": 2.7006557388540164,
      "grad_norm": 2.703125,
      "learning_rate": 5.5434836224107005e-06,
      "loss": 0.7069,
      "step": 770570
    },
    {
      "epoch": 2.700690786360912,
      "grad_norm": 2.953125,
      "learning_rate": 5.5428345937469985e-06,
      "loss": 0.7251,
      "step": 770580
    },
    {
      "epoch": 2.700725833867808,
      "grad_norm": 3.3125,
      "learning_rate": 5.5421855650832965e-06,
      "loss": 0.8353,
      "step": 770590
    },
    {
      "epoch": 2.7007608813747033,
      "grad_norm": 3.078125,
      "learning_rate": 5.5415365364195945e-06,
      "loss": 0.7998,
      "step": 770600
    },
    {
      "epoch": 2.700795928881599,
      "grad_norm": 2.34375,
      "learning_rate": 5.540887507755893e-06,
      "loss": 0.7184,
      "step": 770610
    },
    {
      "epoch": 2.7008309763884943,
      "grad_norm": 2.640625,
      "learning_rate": 5.5402384790921905e-06,
      "loss": 0.7898,
      "step": 770620
    },
    {
      "epoch": 2.70086602389539,
      "grad_norm": 2.59375,
      "learning_rate": 5.5395894504284885e-06,
      "loss": 0.7729,
      "step": 770630
    },
    {
      "epoch": 2.700901071402286,
      "grad_norm": 2.53125,
      "learning_rate": 5.538940421764787e-06,
      "loss": 0.7691,
      "step": 770640
    },
    {
      "epoch": 2.7009361189091816,
      "grad_norm": 3.015625,
      "learning_rate": 5.538291393101085e-06,
      "loss": 0.8478,
      "step": 770650
    },
    {
      "epoch": 2.700971166416077,
      "grad_norm": 2.890625,
      "learning_rate": 5.537642364437383e-06,
      "loss": 0.8067,
      "step": 770660
    },
    {
      "epoch": 2.7010062139229727,
      "grad_norm": 2.75,
      "learning_rate": 5.536993335773681e-06,
      "loss": 0.7151,
      "step": 770670
    },
    {
      "epoch": 2.701041261429868,
      "grad_norm": 3.3125,
      "learning_rate": 5.536344307109979e-06,
      "loss": 0.8194,
      "step": 770680
    },
    {
      "epoch": 2.7010763089367638,
      "grad_norm": 3.375,
      "learning_rate": 5.535695278446277e-06,
      "loss": 0.7612,
      "step": 770690
    },
    {
      "epoch": 2.7011113564436595,
      "grad_norm": 3.40625,
      "learning_rate": 5.535046249782576e-06,
      "loss": 0.787,
      "step": 770700
    },
    {
      "epoch": 2.701146403950555,
      "grad_norm": 2.53125,
      "learning_rate": 5.534397221118874e-06,
      "loss": 0.7387,
      "step": 770710
    },
    {
      "epoch": 2.7011814514574506,
      "grad_norm": 3.578125,
      "learning_rate": 5.533748192455171e-06,
      "loss": 0.8113,
      "step": 770720
    },
    {
      "epoch": 2.701216498964346,
      "grad_norm": 2.953125,
      "learning_rate": 5.53309916379147e-06,
      "loss": 0.8155,
      "step": 770730
    },
    {
      "epoch": 2.7012515464712417,
      "grad_norm": 2.90625,
      "learning_rate": 5.532450135127768e-06,
      "loss": 0.7308,
      "step": 770740
    },
    {
      "epoch": 2.7012865939781374,
      "grad_norm": 2.90625,
      "learning_rate": 5.531801106464066e-06,
      "loss": 0.7664,
      "step": 770750
    },
    {
      "epoch": 2.701321641485033,
      "grad_norm": 2.671875,
      "learning_rate": 5.531152077800364e-06,
      "loss": 0.7287,
      "step": 770760
    },
    {
      "epoch": 2.7013566889919285,
      "grad_norm": 2.953125,
      "learning_rate": 5.530503049136662e-06,
      "loss": 0.7957,
      "step": 770770
    },
    {
      "epoch": 2.7013917364988242,
      "grad_norm": 2.875,
      "learning_rate": 5.52985402047296e-06,
      "loss": 0.7518,
      "step": 770780
    },
    {
      "epoch": 2.7014267840057196,
      "grad_norm": 2.75,
      "learning_rate": 5.529204991809259e-06,
      "loss": 0.9211,
      "step": 770790
    },
    {
      "epoch": 2.7014618315126153,
      "grad_norm": 3.09375,
      "learning_rate": 5.528555963145557e-06,
      "loss": 0.8705,
      "step": 770800
    },
    {
      "epoch": 2.701496879019511,
      "grad_norm": 3.15625,
      "learning_rate": 5.527906934481854e-06,
      "loss": 0.7801,
      "step": 770810
    },
    {
      "epoch": 2.7015319265264064,
      "grad_norm": 3.0,
      "learning_rate": 5.527257905818153e-06,
      "loss": 0.8255,
      "step": 770820
    },
    {
      "epoch": 2.701566974033302,
      "grad_norm": 3.3125,
      "learning_rate": 5.526608877154451e-06,
      "loss": 0.8809,
      "step": 770830
    },
    {
      "epoch": 2.7016020215401975,
      "grad_norm": 2.8125,
      "learning_rate": 5.525959848490749e-06,
      "loss": 0.7953,
      "step": 770840
    },
    {
      "epoch": 2.701637069047093,
      "grad_norm": 3.0625,
      "learning_rate": 5.525310819827047e-06,
      "loss": 0.8317,
      "step": 770850
    },
    {
      "epoch": 2.701672116553989,
      "grad_norm": 2.921875,
      "learning_rate": 5.524661791163345e-06,
      "loss": 0.7711,
      "step": 770860
    },
    {
      "epoch": 2.7017071640608847,
      "grad_norm": 3.671875,
      "learning_rate": 5.524012762499643e-06,
      "loss": 0.7924,
      "step": 770870
    },
    {
      "epoch": 2.70174221156778,
      "grad_norm": 3.375,
      "learning_rate": 5.523363733835942e-06,
      "loss": 0.89,
      "step": 770880
    },
    {
      "epoch": 2.701777259074676,
      "grad_norm": 2.515625,
      "learning_rate": 5.52271470517224e-06,
      "loss": 0.7399,
      "step": 770890
    },
    {
      "epoch": 2.701812306581571,
      "grad_norm": 2.421875,
      "learning_rate": 5.522065676508537e-06,
      "loss": 0.7419,
      "step": 770900
    },
    {
      "epoch": 2.701847354088467,
      "grad_norm": 2.859375,
      "learning_rate": 5.521416647844836e-06,
      "loss": 0.7918,
      "step": 770910
    },
    {
      "epoch": 2.7018824015953626,
      "grad_norm": 3.328125,
      "learning_rate": 5.520767619181134e-06,
      "loss": 0.843,
      "step": 770920
    },
    {
      "epoch": 2.701917449102258,
      "grad_norm": 3.3125,
      "learning_rate": 5.520118590517432e-06,
      "loss": 0.8254,
      "step": 770930
    },
    {
      "epoch": 2.7019524966091537,
      "grad_norm": 2.796875,
      "learning_rate": 5.51946956185373e-06,
      "loss": 0.7766,
      "step": 770940
    },
    {
      "epoch": 2.7019875441160495,
      "grad_norm": 2.984375,
      "learning_rate": 5.518820533190028e-06,
      "loss": 0.8094,
      "step": 770950
    },
    {
      "epoch": 2.702022591622945,
      "grad_norm": 3.046875,
      "learning_rate": 5.518171504526326e-06,
      "loss": 0.7954,
      "step": 770960
    },
    {
      "epoch": 2.7020576391298405,
      "grad_norm": 3.15625,
      "learning_rate": 5.517522475862624e-06,
      "loss": 0.8087,
      "step": 770970
    },
    {
      "epoch": 2.7020926866367363,
      "grad_norm": 3.109375,
      "learning_rate": 5.516873447198923e-06,
      "loss": 0.78,
      "step": 770980
    },
    {
      "epoch": 2.7021277341436316,
      "grad_norm": 2.59375,
      "learning_rate": 5.516224418535221e-06,
      "loss": 0.8176,
      "step": 770990
    },
    {
      "epoch": 2.7021627816505274,
      "grad_norm": 3.078125,
      "learning_rate": 5.515575389871519e-06,
      "loss": 0.7935,
      "step": 771000
    },
    {
      "epoch": 2.7021978291574227,
      "grad_norm": 2.875,
      "learning_rate": 5.514926361207817e-06,
      "loss": 0.789,
      "step": 771010
    },
    {
      "epoch": 2.7022328766643184,
      "grad_norm": 2.453125,
      "learning_rate": 5.514277332544115e-06,
      "loss": 0.8015,
      "step": 771020
    },
    {
      "epoch": 2.702267924171214,
      "grad_norm": 2.75,
      "learning_rate": 5.513628303880413e-06,
      "loss": 0.7752,
      "step": 771030
    },
    {
      "epoch": 2.70230297167811,
      "grad_norm": 2.96875,
      "learning_rate": 5.512979275216711e-06,
      "loss": 0.8129,
      "step": 771040
    },
    {
      "epoch": 2.7023380191850053,
      "grad_norm": 3.078125,
      "learning_rate": 5.512330246553009e-06,
      "loss": 0.7943,
      "step": 771050
    },
    {
      "epoch": 2.702373066691901,
      "grad_norm": 2.96875,
      "learning_rate": 5.511681217889307e-06,
      "loss": 0.7483,
      "step": 771060
    },
    {
      "epoch": 2.7024081141987963,
      "grad_norm": 2.984375,
      "learning_rate": 5.5110321892256056e-06,
      "loss": 0.8068,
      "step": 771070
    },
    {
      "epoch": 2.702443161705692,
      "grad_norm": 2.484375,
      "learning_rate": 5.5103831605619036e-06,
      "loss": 0.7788,
      "step": 771080
    },
    {
      "epoch": 2.702478209212588,
      "grad_norm": 3.546875,
      "learning_rate": 5.509734131898201e-06,
      "loss": 0.8268,
      "step": 771090
    },
    {
      "epoch": 2.702513256719483,
      "grad_norm": 3.171875,
      "learning_rate": 5.5090851032344996e-06,
      "loss": 0.7965,
      "step": 771100
    },
    {
      "epoch": 2.702548304226379,
      "grad_norm": 2.953125,
      "learning_rate": 5.5084360745707976e-06,
      "loss": 0.8134,
      "step": 771110
    },
    {
      "epoch": 2.7025833517332742,
      "grad_norm": 2.75,
      "learning_rate": 5.5077870459070956e-06,
      "loss": 0.79,
      "step": 771120
    },
    {
      "epoch": 2.70261839924017,
      "grad_norm": 2.765625,
      "learning_rate": 5.507138017243394e-06,
      "loss": 0.77,
      "step": 771130
    },
    {
      "epoch": 2.7026534467470658,
      "grad_norm": 2.796875,
      "learning_rate": 5.5064889885796916e-06,
      "loss": 0.7886,
      "step": 771140
    },
    {
      "epoch": 2.7026884942539615,
      "grad_norm": 2.953125,
      "learning_rate": 5.5058399599159896e-06,
      "loss": 0.7412,
      "step": 771150
    },
    {
      "epoch": 2.702723541760857,
      "grad_norm": 3.15625,
      "learning_rate": 5.505190931252288e-06,
      "loss": 0.8446,
      "step": 771160
    },
    {
      "epoch": 2.7027585892677526,
      "grad_norm": 3.296875,
      "learning_rate": 5.504541902588586e-06,
      "loss": 0.7914,
      "step": 771170
    },
    {
      "epoch": 2.702793636774648,
      "grad_norm": 3.203125,
      "learning_rate": 5.503892873924884e-06,
      "loss": 0.7739,
      "step": 771180
    },
    {
      "epoch": 2.7028286842815437,
      "grad_norm": 2.609375,
      "learning_rate": 5.503243845261182e-06,
      "loss": 0.8213,
      "step": 771190
    },
    {
      "epoch": 2.7028637317884394,
      "grad_norm": 2.5625,
      "learning_rate": 5.50259481659748e-06,
      "loss": 0.7884,
      "step": 771200
    },
    {
      "epoch": 2.7028987792953347,
      "grad_norm": 3.4375,
      "learning_rate": 5.501945787933778e-06,
      "loss": 0.7662,
      "step": 771210
    },
    {
      "epoch": 2.7029338268022305,
      "grad_norm": 2.96875,
      "learning_rate": 5.501296759270077e-06,
      "loss": 0.7548,
      "step": 771220
    },
    {
      "epoch": 2.702968874309126,
      "grad_norm": 3.375,
      "learning_rate": 5.500647730606374e-06,
      "loss": 0.7649,
      "step": 771230
    },
    {
      "epoch": 2.7030039218160216,
      "grad_norm": 3.171875,
      "learning_rate": 5.499998701942672e-06,
      "loss": 0.8059,
      "step": 771240
    },
    {
      "epoch": 2.7030389693229173,
      "grad_norm": 2.75,
      "learning_rate": 5.499349673278971e-06,
      "loss": 0.7844,
      "step": 771250
    },
    {
      "epoch": 2.703074016829813,
      "grad_norm": 3.25,
      "learning_rate": 5.498700644615269e-06,
      "loss": 0.863,
      "step": 771260
    },
    {
      "epoch": 2.7031090643367084,
      "grad_norm": 3.234375,
      "learning_rate": 5.498051615951567e-06,
      "loss": 0.7936,
      "step": 771270
    },
    {
      "epoch": 2.703144111843604,
      "grad_norm": 2.625,
      "learning_rate": 5.497402587287865e-06,
      "loss": 0.7931,
      "step": 771280
    },
    {
      "epoch": 2.7031791593504995,
      "grad_norm": 3.078125,
      "learning_rate": 5.496753558624163e-06,
      "loss": 0.7959,
      "step": 771290
    },
    {
      "epoch": 2.703214206857395,
      "grad_norm": 3.265625,
      "learning_rate": 5.496104529960461e-06,
      "loss": 0.8175,
      "step": 771300
    },
    {
      "epoch": 2.703249254364291,
      "grad_norm": 2.6875,
      "learning_rate": 5.49545550129676e-06,
      "loss": 0.7874,
      "step": 771310
    },
    {
      "epoch": 2.7032843018711863,
      "grad_norm": 2.640625,
      "learning_rate": 5.494806472633058e-06,
      "loss": 0.8735,
      "step": 771320
    },
    {
      "epoch": 2.703319349378082,
      "grad_norm": 2.71875,
      "learning_rate": 5.494157443969355e-06,
      "loss": 0.8267,
      "step": 771330
    },
    {
      "epoch": 2.7033543968849774,
      "grad_norm": 2.84375,
      "learning_rate": 5.493508415305654e-06,
      "loss": 0.7934,
      "step": 771340
    },
    {
      "epoch": 2.703389444391873,
      "grad_norm": 3.046875,
      "learning_rate": 5.492859386641952e-06,
      "loss": 0.8019,
      "step": 771350
    },
    {
      "epoch": 2.703424491898769,
      "grad_norm": 3.078125,
      "learning_rate": 5.49221035797825e-06,
      "loss": 0.8472,
      "step": 771360
    },
    {
      "epoch": 2.7034595394056646,
      "grad_norm": 2.921875,
      "learning_rate": 5.491561329314548e-06,
      "loss": 0.7838,
      "step": 771370
    },
    {
      "epoch": 2.70349458691256,
      "grad_norm": 2.671875,
      "learning_rate": 5.490912300650846e-06,
      "loss": 0.8618,
      "step": 771380
    },
    {
      "epoch": 2.7035296344194557,
      "grad_norm": 3.3125,
      "learning_rate": 5.490263271987144e-06,
      "loss": 0.8227,
      "step": 771390
    },
    {
      "epoch": 2.703564681926351,
      "grad_norm": 2.828125,
      "learning_rate": 5.489614243323442e-06,
      "loss": 0.8159,
      "step": 771400
    },
    {
      "epoch": 2.703599729433247,
      "grad_norm": 2.421875,
      "learning_rate": 5.488965214659741e-06,
      "loss": 0.7544,
      "step": 771410
    },
    {
      "epoch": 2.7036347769401425,
      "grad_norm": 2.640625,
      "learning_rate": 5.488316185996038e-06,
      "loss": 0.7904,
      "step": 771420
    },
    {
      "epoch": 2.703669824447038,
      "grad_norm": 2.546875,
      "learning_rate": 5.487667157332337e-06,
      "loss": 0.6714,
      "step": 771430
    },
    {
      "epoch": 2.7037048719539336,
      "grad_norm": 3.015625,
      "learning_rate": 5.487018128668635e-06,
      "loss": 0.771,
      "step": 771440
    },
    {
      "epoch": 2.703739919460829,
      "grad_norm": 2.953125,
      "learning_rate": 5.486369100004933e-06,
      "loss": 0.8346,
      "step": 771450
    },
    {
      "epoch": 2.7037749669677247,
      "grad_norm": 2.453125,
      "learning_rate": 5.485720071341231e-06,
      "loss": 0.7711,
      "step": 771460
    },
    {
      "epoch": 2.7038100144746204,
      "grad_norm": 2.671875,
      "learning_rate": 5.485071042677529e-06,
      "loss": 0.7137,
      "step": 771470
    },
    {
      "epoch": 2.703845061981516,
      "grad_norm": 2.828125,
      "learning_rate": 5.484422014013827e-06,
      "loss": 0.7032,
      "step": 771480
    },
    {
      "epoch": 2.7038801094884115,
      "grad_norm": 3.3125,
      "learning_rate": 5.483772985350125e-06,
      "loss": 0.8383,
      "step": 771490
    },
    {
      "epoch": 2.7039151569953073,
      "grad_norm": 2.84375,
      "learning_rate": 5.483123956686424e-06,
      "loss": 0.8445,
      "step": 771500
    },
    {
      "epoch": 2.7039502045022026,
      "grad_norm": 3.15625,
      "learning_rate": 5.482474928022721e-06,
      "loss": 0.746,
      "step": 771510
    },
    {
      "epoch": 2.7039852520090983,
      "grad_norm": 3.078125,
      "learning_rate": 5.481825899359019e-06,
      "loss": 0.7591,
      "step": 771520
    },
    {
      "epoch": 2.704020299515994,
      "grad_norm": 2.828125,
      "learning_rate": 5.481176870695318e-06,
      "loss": 0.8736,
      "step": 771530
    },
    {
      "epoch": 2.7040553470228894,
      "grad_norm": 3.125,
      "learning_rate": 5.480527842031616e-06,
      "loss": 0.7286,
      "step": 771540
    },
    {
      "epoch": 2.704090394529785,
      "grad_norm": 3.21875,
      "learning_rate": 5.479878813367914e-06,
      "loss": 0.8885,
      "step": 771550
    },
    {
      "epoch": 2.7041254420366805,
      "grad_norm": 3.515625,
      "learning_rate": 5.479229784704212e-06,
      "loss": 0.9037,
      "step": 771560
    },
    {
      "epoch": 2.7041604895435762,
      "grad_norm": 2.75,
      "learning_rate": 5.47858075604051e-06,
      "loss": 0.7997,
      "step": 771570
    },
    {
      "epoch": 2.704195537050472,
      "grad_norm": 2.9375,
      "learning_rate": 5.477931727376808e-06,
      "loss": 0.8098,
      "step": 771580
    },
    {
      "epoch": 2.7042305845573678,
      "grad_norm": 3.3125,
      "learning_rate": 5.477282698713107e-06,
      "loss": 0.7671,
      "step": 771590
    },
    {
      "epoch": 2.704265632064263,
      "grad_norm": 2.828125,
      "learning_rate": 5.476633670049405e-06,
      "loss": 0.7639,
      "step": 771600
    },
    {
      "epoch": 2.704300679571159,
      "grad_norm": 3.125,
      "learning_rate": 5.475984641385702e-06,
      "loss": 0.8577,
      "step": 771610
    },
    {
      "epoch": 2.704335727078054,
      "grad_norm": 2.9375,
      "learning_rate": 5.475335612722001e-06,
      "loss": 0.8147,
      "step": 771620
    },
    {
      "epoch": 2.70437077458495,
      "grad_norm": 2.71875,
      "learning_rate": 5.474686584058299e-06,
      "loss": 0.8453,
      "step": 771630
    },
    {
      "epoch": 2.7044058220918457,
      "grad_norm": 3.171875,
      "learning_rate": 5.474037555394597e-06,
      "loss": 0.8637,
      "step": 771640
    },
    {
      "epoch": 2.704440869598741,
      "grad_norm": 2.703125,
      "learning_rate": 5.4733885267308955e-06,
      "loss": 0.8447,
      "step": 771650
    },
    {
      "epoch": 2.7044759171056367,
      "grad_norm": 2.734375,
      "learning_rate": 5.472739498067193e-06,
      "loss": 0.8742,
      "step": 771660
    },
    {
      "epoch": 2.704510964612532,
      "grad_norm": 3.28125,
      "learning_rate": 5.472090469403491e-06,
      "loss": 0.8499,
      "step": 771670
    },
    {
      "epoch": 2.704546012119428,
      "grad_norm": 2.859375,
      "learning_rate": 5.4714414407397895e-06,
      "loss": 0.7726,
      "step": 771680
    },
    {
      "epoch": 2.7045810596263236,
      "grad_norm": 2.9375,
      "learning_rate": 5.4707924120760875e-06,
      "loss": 0.7839,
      "step": 771690
    },
    {
      "epoch": 2.7046161071332193,
      "grad_norm": 3.234375,
      "learning_rate": 5.470143383412385e-06,
      "loss": 0.7727,
      "step": 771700
    },
    {
      "epoch": 2.7046511546401146,
      "grad_norm": 3.140625,
      "learning_rate": 5.4694943547486835e-06,
      "loss": 0.7867,
      "step": 771710
    },
    {
      "epoch": 2.7046862021470104,
      "grad_norm": 2.734375,
      "learning_rate": 5.4688453260849815e-06,
      "loss": 0.8568,
      "step": 771720
    },
    {
      "epoch": 2.7047212496539057,
      "grad_norm": 3.328125,
      "learning_rate": 5.4681962974212795e-06,
      "loss": 0.7462,
      "step": 771730
    },
    {
      "epoch": 2.7047562971608015,
      "grad_norm": 2.765625,
      "learning_rate": 5.467547268757578e-06,
      "loss": 0.884,
      "step": 771740
    },
    {
      "epoch": 2.704791344667697,
      "grad_norm": 2.71875,
      "learning_rate": 5.4668982400938755e-06,
      "loss": 0.7988,
      "step": 771750
    },
    {
      "epoch": 2.7048263921745925,
      "grad_norm": 3.265625,
      "learning_rate": 5.4662492114301735e-06,
      "loss": 0.7945,
      "step": 771760
    },
    {
      "epoch": 2.7048614396814883,
      "grad_norm": 2.84375,
      "learning_rate": 5.465600182766472e-06,
      "loss": 0.8035,
      "step": 771770
    },
    {
      "epoch": 2.7048964871883836,
      "grad_norm": 2.34375,
      "learning_rate": 5.46495115410277e-06,
      "loss": 0.7431,
      "step": 771780
    },
    {
      "epoch": 2.7049315346952794,
      "grad_norm": 2.390625,
      "learning_rate": 5.464302125439068e-06,
      "loss": 0.7983,
      "step": 771790
    },
    {
      "epoch": 2.704966582202175,
      "grad_norm": 3.0625,
      "learning_rate": 5.463653096775366e-06,
      "loss": 0.7957,
      "step": 771800
    },
    {
      "epoch": 2.705001629709071,
      "grad_norm": 3.296875,
      "learning_rate": 5.463004068111664e-06,
      "loss": 0.7684,
      "step": 771810
    },
    {
      "epoch": 2.705036677215966,
      "grad_norm": 3.265625,
      "learning_rate": 5.462355039447962e-06,
      "loss": 0.8161,
      "step": 771820
    },
    {
      "epoch": 2.705071724722862,
      "grad_norm": 3.109375,
      "learning_rate": 5.46170601078426e-06,
      "loss": 0.8497,
      "step": 771830
    },
    {
      "epoch": 2.7051067722297573,
      "grad_norm": 3.15625,
      "learning_rate": 5.461056982120558e-06,
      "loss": 0.8182,
      "step": 771840
    },
    {
      "epoch": 2.705141819736653,
      "grad_norm": 2.90625,
      "learning_rate": 5.460407953456856e-06,
      "loss": 0.7594,
      "step": 771850
    },
    {
      "epoch": 2.7051768672435488,
      "grad_norm": 2.5,
      "learning_rate": 5.459758924793155e-06,
      "loss": 0.818,
      "step": 771860
    },
    {
      "epoch": 2.705211914750444,
      "grad_norm": 2.65625,
      "learning_rate": 5.459109896129453e-06,
      "loss": 0.8264,
      "step": 771870
    },
    {
      "epoch": 2.70524696225734,
      "grad_norm": 3.078125,
      "learning_rate": 5.458460867465751e-06,
      "loss": 0.823,
      "step": 771880
    },
    {
      "epoch": 2.705282009764235,
      "grad_norm": 2.859375,
      "learning_rate": 5.457811838802049e-06,
      "loss": 0.7427,
      "step": 771890
    },
    {
      "epoch": 2.705317057271131,
      "grad_norm": 2.984375,
      "learning_rate": 5.457162810138347e-06,
      "loss": 0.7792,
      "step": 771900
    },
    {
      "epoch": 2.7053521047780267,
      "grad_norm": 2.984375,
      "learning_rate": 5.456513781474645e-06,
      "loss": 0.8805,
      "step": 771910
    },
    {
      "epoch": 2.7053871522849224,
      "grad_norm": 2.84375,
      "learning_rate": 5.455864752810943e-06,
      "loss": 0.8221,
      "step": 771920
    },
    {
      "epoch": 2.7054221997918178,
      "grad_norm": 3.671875,
      "learning_rate": 5.455215724147242e-06,
      "loss": 0.8632,
      "step": 771930
    },
    {
      "epoch": 2.7054572472987135,
      "grad_norm": 2.75,
      "learning_rate": 5.454566695483539e-06,
      "loss": 0.753,
      "step": 771940
    },
    {
      "epoch": 2.705492294805609,
      "grad_norm": 3.0,
      "learning_rate": 5.453917666819837e-06,
      "loss": 0.7268,
      "step": 771950
    },
    {
      "epoch": 2.7055273423125046,
      "grad_norm": 2.9375,
      "learning_rate": 5.453268638156136e-06,
      "loss": 0.8502,
      "step": 771960
    },
    {
      "epoch": 2.7055623898194003,
      "grad_norm": 2.921875,
      "learning_rate": 5.452619609492434e-06,
      "loss": 0.7836,
      "step": 771970
    },
    {
      "epoch": 2.7055974373262957,
      "grad_norm": 3.0,
      "learning_rate": 5.451970580828732e-06,
      "loss": 0.798,
      "step": 771980
    },
    {
      "epoch": 2.7056324848331914,
      "grad_norm": 2.890625,
      "learning_rate": 5.45132155216503e-06,
      "loss": 0.8155,
      "step": 771990
    },
    {
      "epoch": 2.7056675323400867,
      "grad_norm": 2.71875,
      "learning_rate": 5.450672523501328e-06,
      "loss": 0.8118,
      "step": 772000
    },
    {
      "epoch": 2.7057025798469825,
      "grad_norm": 2.65625,
      "learning_rate": 5.450023494837626e-06,
      "loss": 0.7948,
      "step": 772010
    },
    {
      "epoch": 2.7057376273538782,
      "grad_norm": 2.84375,
      "learning_rate": 5.449374466173925e-06,
      "loss": 0.7809,
      "step": 772020
    },
    {
      "epoch": 2.705772674860774,
      "grad_norm": 3.171875,
      "learning_rate": 5.448725437510222e-06,
      "loss": 0.7967,
      "step": 772030
    },
    {
      "epoch": 2.7058077223676693,
      "grad_norm": 2.875,
      "learning_rate": 5.44807640884652e-06,
      "loss": 0.8291,
      "step": 772040
    },
    {
      "epoch": 2.705842769874565,
      "grad_norm": 3.03125,
      "learning_rate": 5.447427380182819e-06,
      "loss": 0.8246,
      "step": 772050
    },
    {
      "epoch": 2.7058778173814604,
      "grad_norm": 2.828125,
      "learning_rate": 5.446778351519117e-06,
      "loss": 0.7699,
      "step": 772060
    },
    {
      "epoch": 2.705912864888356,
      "grad_norm": 3.046875,
      "learning_rate": 5.446129322855415e-06,
      "loss": 0.6577,
      "step": 772070
    },
    {
      "epoch": 2.705947912395252,
      "grad_norm": 3.265625,
      "learning_rate": 5.445480294191713e-06,
      "loss": 0.7976,
      "step": 772080
    },
    {
      "epoch": 2.705982959902147,
      "grad_norm": 2.875,
      "learning_rate": 5.444831265528011e-06,
      "loss": 0.8566,
      "step": 772090
    },
    {
      "epoch": 2.706018007409043,
      "grad_norm": 2.578125,
      "learning_rate": 5.444182236864309e-06,
      "loss": 0.7811,
      "step": 772100
    },
    {
      "epoch": 2.7060530549159383,
      "grad_norm": 2.875,
      "learning_rate": 5.443533208200608e-06,
      "loss": 0.892,
      "step": 772110
    },
    {
      "epoch": 2.706088102422834,
      "grad_norm": 3.046875,
      "learning_rate": 5.442884179536906e-06,
      "loss": 0.8166,
      "step": 772120
    },
    {
      "epoch": 2.70612314992973,
      "grad_norm": 2.921875,
      "learning_rate": 5.442235150873203e-06,
      "loss": 0.8305,
      "step": 772130
    },
    {
      "epoch": 2.7061581974366256,
      "grad_norm": 3.453125,
      "learning_rate": 5.441586122209502e-06,
      "loss": 0.7479,
      "step": 772140
    },
    {
      "epoch": 2.706193244943521,
      "grad_norm": 2.734375,
      "learning_rate": 5.4409370935458e-06,
      "loss": 0.8594,
      "step": 772150
    },
    {
      "epoch": 2.7062282924504166,
      "grad_norm": 2.625,
      "learning_rate": 5.440288064882098e-06,
      "loss": 0.7473,
      "step": 772160
    },
    {
      "epoch": 2.706263339957312,
      "grad_norm": 3.890625,
      "learning_rate": 5.439639036218396e-06,
      "loss": 0.8266,
      "step": 772170
    },
    {
      "epoch": 2.7062983874642077,
      "grad_norm": 2.109375,
      "learning_rate": 5.438990007554694e-06,
      "loss": 0.6991,
      "step": 772180
    },
    {
      "epoch": 2.7063334349711035,
      "grad_norm": 2.96875,
      "learning_rate": 5.438340978890992e-06,
      "loss": 0.8622,
      "step": 772190
    },
    {
      "epoch": 2.7063684824779988,
      "grad_norm": 2.71875,
      "learning_rate": 5.4376919502272906e-06,
      "loss": 0.8473,
      "step": 772200
    },
    {
      "epoch": 2.7064035299848945,
      "grad_norm": 2.953125,
      "learning_rate": 5.4370429215635886e-06,
      "loss": 0.7719,
      "step": 772210
    },
    {
      "epoch": 2.7064385774917903,
      "grad_norm": 2.75,
      "learning_rate": 5.436393892899886e-06,
      "loss": 0.7277,
      "step": 772220
    },
    {
      "epoch": 2.7064736249986856,
      "grad_norm": 2.90625,
      "learning_rate": 5.4357448642361846e-06,
      "loss": 0.7897,
      "step": 772230
    },
    {
      "epoch": 2.7065086725055814,
      "grad_norm": 3.03125,
      "learning_rate": 5.4350958355724826e-06,
      "loss": 0.9306,
      "step": 772240
    },
    {
      "epoch": 2.706543720012477,
      "grad_norm": 3.140625,
      "learning_rate": 5.4344468069087806e-06,
      "loss": 0.9068,
      "step": 772250
    },
    {
      "epoch": 2.7065787675193724,
      "grad_norm": 3.34375,
      "learning_rate": 5.433797778245079e-06,
      "loss": 0.9231,
      "step": 772260
    },
    {
      "epoch": 2.706613815026268,
      "grad_norm": 3.0,
      "learning_rate": 5.4331487495813766e-06,
      "loss": 0.8597,
      "step": 772270
    },
    {
      "epoch": 2.7066488625331635,
      "grad_norm": 2.890625,
      "learning_rate": 5.4324997209176746e-06,
      "loss": 0.7463,
      "step": 772280
    },
    {
      "epoch": 2.7066839100400593,
      "grad_norm": 3.125,
      "learning_rate": 5.431850692253973e-06,
      "loss": 0.8342,
      "step": 772290
    },
    {
      "epoch": 2.706718957546955,
      "grad_norm": 3.109375,
      "learning_rate": 5.431201663590271e-06,
      "loss": 0.8159,
      "step": 772300
    },
    {
      "epoch": 2.7067540050538503,
      "grad_norm": 2.9375,
      "learning_rate": 5.4305526349265686e-06,
      "loss": 0.7944,
      "step": 772310
    },
    {
      "epoch": 2.706789052560746,
      "grad_norm": 2.578125,
      "learning_rate": 5.429903606262867e-06,
      "loss": 0.8258,
      "step": 772320
    },
    {
      "epoch": 2.706824100067642,
      "grad_norm": 2.484375,
      "learning_rate": 5.429254577599165e-06,
      "loss": 0.7276,
      "step": 772330
    },
    {
      "epoch": 2.706859147574537,
      "grad_norm": 2.953125,
      "learning_rate": 5.428605548935463e-06,
      "loss": 0.7774,
      "step": 772340
    },
    {
      "epoch": 2.706894195081433,
      "grad_norm": 3.15625,
      "learning_rate": 5.427956520271761e-06,
      "loss": 0.8168,
      "step": 772350
    },
    {
      "epoch": 2.7069292425883287,
      "grad_norm": 2.796875,
      "learning_rate": 5.427307491608059e-06,
      "loss": 0.8184,
      "step": 772360
    },
    {
      "epoch": 2.706964290095224,
      "grad_norm": 3.21875,
      "learning_rate": 5.426658462944357e-06,
      "loss": 0.8428,
      "step": 772370
    },
    {
      "epoch": 2.7069993376021197,
      "grad_norm": 2.765625,
      "learning_rate": 5.426009434280656e-06,
      "loss": 0.7799,
      "step": 772380
    },
    {
      "epoch": 2.707034385109015,
      "grad_norm": 3.28125,
      "learning_rate": 5.425360405616954e-06,
      "loss": 0.7802,
      "step": 772390
    },
    {
      "epoch": 2.707069432615911,
      "grad_norm": 3.046875,
      "learning_rate": 5.424711376953252e-06,
      "loss": 0.8492,
      "step": 772400
    },
    {
      "epoch": 2.7071044801228066,
      "grad_norm": 2.859375,
      "learning_rate": 5.42406234828955e-06,
      "loss": 0.8111,
      "step": 772410
    },
    {
      "epoch": 2.7071395276297023,
      "grad_norm": 2.984375,
      "learning_rate": 5.423413319625848e-06,
      "loss": 0.7667,
      "step": 772420
    },
    {
      "epoch": 2.7071745751365976,
      "grad_norm": 2.640625,
      "learning_rate": 5.422764290962146e-06,
      "loss": 0.8222,
      "step": 772430
    },
    {
      "epoch": 2.7072096226434934,
      "grad_norm": 3.1875,
      "learning_rate": 5.422115262298444e-06,
      "loss": 0.757,
      "step": 772440
    },
    {
      "epoch": 2.7072446701503887,
      "grad_norm": 3.21875,
      "learning_rate": 5.421466233634743e-06,
      "loss": 0.8103,
      "step": 772450
    },
    {
      "epoch": 2.7072797176572845,
      "grad_norm": 2.875,
      "learning_rate": 5.42081720497104e-06,
      "loss": 0.8108,
      "step": 772460
    },
    {
      "epoch": 2.7073147651641802,
      "grad_norm": 2.453125,
      "learning_rate": 5.420168176307338e-06,
      "loss": 0.7871,
      "step": 772470
    },
    {
      "epoch": 2.7073498126710756,
      "grad_norm": 2.8125,
      "learning_rate": 5.419519147643637e-06,
      "loss": 0.8075,
      "step": 772480
    },
    {
      "epoch": 2.7073848601779713,
      "grad_norm": 2.921875,
      "learning_rate": 5.418870118979935e-06,
      "loss": 0.8005,
      "step": 772490
    },
    {
      "epoch": 2.7074199076848666,
      "grad_norm": 2.859375,
      "learning_rate": 5.418221090316233e-06,
      "loss": 0.7707,
      "step": 772500
    },
    {
      "epoch": 2.7074549551917624,
      "grad_norm": 3.046875,
      "learning_rate": 5.417572061652531e-06,
      "loss": 0.8799,
      "step": 772510
    },
    {
      "epoch": 2.707490002698658,
      "grad_norm": 2.890625,
      "learning_rate": 5.416923032988829e-06,
      "loss": 0.82,
      "step": 772520
    },
    {
      "epoch": 2.707525050205554,
      "grad_norm": 3.203125,
      "learning_rate": 5.416274004325127e-06,
      "loss": 0.6978,
      "step": 772530
    },
    {
      "epoch": 2.707560097712449,
      "grad_norm": 3.515625,
      "learning_rate": 5.415624975661426e-06,
      "loss": 0.7469,
      "step": 772540
    },
    {
      "epoch": 2.707595145219345,
      "grad_norm": 3.1875,
      "learning_rate": 5.414975946997723e-06,
      "loss": 0.8599,
      "step": 772550
    },
    {
      "epoch": 2.7076301927262403,
      "grad_norm": 2.859375,
      "learning_rate": 5.414326918334021e-06,
      "loss": 0.7816,
      "step": 772560
    },
    {
      "epoch": 2.707665240233136,
      "grad_norm": 2.609375,
      "learning_rate": 5.41367788967032e-06,
      "loss": 0.985,
      "step": 772570
    },
    {
      "epoch": 2.707700287740032,
      "grad_norm": 2.984375,
      "learning_rate": 5.413028861006618e-06,
      "loss": 0.7896,
      "step": 772580
    },
    {
      "epoch": 2.707735335246927,
      "grad_norm": 2.828125,
      "learning_rate": 5.412379832342916e-06,
      "loss": 0.7796,
      "step": 772590
    },
    {
      "epoch": 2.707770382753823,
      "grad_norm": 2.84375,
      "learning_rate": 5.411730803679214e-06,
      "loss": 0.8198,
      "step": 772600
    },
    {
      "epoch": 2.707805430260718,
      "grad_norm": 3.046875,
      "learning_rate": 5.411081775015512e-06,
      "loss": 0.8666,
      "step": 772610
    },
    {
      "epoch": 2.707840477767614,
      "grad_norm": 2.8125,
      "learning_rate": 5.41043274635181e-06,
      "loss": 0.8289,
      "step": 772620
    },
    {
      "epoch": 2.7078755252745097,
      "grad_norm": 2.703125,
      "learning_rate": 5.409783717688109e-06,
      "loss": 0.7798,
      "step": 772630
    },
    {
      "epoch": 2.7079105727814055,
      "grad_norm": 2.703125,
      "learning_rate": 5.409134689024406e-06,
      "loss": 0.8873,
      "step": 772640
    },
    {
      "epoch": 2.7079456202883008,
      "grad_norm": 2.765625,
      "learning_rate": 5.408485660360704e-06,
      "loss": 0.7569,
      "step": 772650
    },
    {
      "epoch": 2.7079806677951965,
      "grad_norm": 2.90625,
      "learning_rate": 5.407836631697003e-06,
      "loss": 0.8059,
      "step": 772660
    },
    {
      "epoch": 2.708015715302092,
      "grad_norm": 2.796875,
      "learning_rate": 5.407187603033301e-06,
      "loss": 0.828,
      "step": 772670
    },
    {
      "epoch": 2.7080507628089876,
      "grad_norm": 3.125,
      "learning_rate": 5.406538574369599e-06,
      "loss": 0.8258,
      "step": 772680
    },
    {
      "epoch": 2.7080858103158834,
      "grad_norm": 2.8125,
      "learning_rate": 5.405889545705897e-06,
      "loss": 0.7709,
      "step": 772690
    },
    {
      "epoch": 2.7081208578227787,
      "grad_norm": 2.828125,
      "learning_rate": 5.405240517042195e-06,
      "loss": 0.8428,
      "step": 772700
    },
    {
      "epoch": 2.7081559053296744,
      "grad_norm": 2.921875,
      "learning_rate": 5.404591488378493e-06,
      "loss": 0.869,
      "step": 772710
    },
    {
      "epoch": 2.7081909528365697,
      "grad_norm": 3.09375,
      "learning_rate": 5.403942459714792e-06,
      "loss": 0.8117,
      "step": 772720
    },
    {
      "epoch": 2.7082260003434655,
      "grad_norm": 3.125,
      "learning_rate": 5.40329343105109e-06,
      "loss": 0.7314,
      "step": 772730
    },
    {
      "epoch": 2.7082610478503613,
      "grad_norm": 3.234375,
      "learning_rate": 5.402644402387387e-06,
      "loss": 0.8107,
      "step": 772740
    },
    {
      "epoch": 2.708296095357257,
      "grad_norm": 2.90625,
      "learning_rate": 5.401995373723686e-06,
      "loss": 0.8201,
      "step": 772750
    },
    {
      "epoch": 2.7083311428641523,
      "grad_norm": 2.78125,
      "learning_rate": 5.401346345059984e-06,
      "loss": 0.7681,
      "step": 772760
    },
    {
      "epoch": 2.708366190371048,
      "grad_norm": 3.421875,
      "learning_rate": 5.400697316396282e-06,
      "loss": 0.859,
      "step": 772770
    },
    {
      "epoch": 2.7084012378779434,
      "grad_norm": 3.515625,
      "learning_rate": 5.40004828773258e-06,
      "loss": 0.7948,
      "step": 772780
    },
    {
      "epoch": 2.708436285384839,
      "grad_norm": 2.984375,
      "learning_rate": 5.399399259068878e-06,
      "loss": 0.8035,
      "step": 772790
    },
    {
      "epoch": 2.708471332891735,
      "grad_norm": 3.1875,
      "learning_rate": 5.398750230405176e-06,
      "loss": 0.7696,
      "step": 772800
    },
    {
      "epoch": 2.7085063803986302,
      "grad_norm": 3.484375,
      "learning_rate": 5.3981012017414745e-06,
      "loss": 0.897,
      "step": 772810
    },
    {
      "epoch": 2.708541427905526,
      "grad_norm": 2.78125,
      "learning_rate": 5.3974521730777725e-06,
      "loss": 0.7759,
      "step": 772820
    },
    {
      "epoch": 2.7085764754124213,
      "grad_norm": 3.0,
      "learning_rate": 5.39680314441407e-06,
      "loss": 0.7975,
      "step": 772830
    },
    {
      "epoch": 2.708611522919317,
      "grad_norm": 2.90625,
      "learning_rate": 5.3961541157503685e-06,
      "loss": 0.7718,
      "step": 772840
    },
    {
      "epoch": 2.708646570426213,
      "grad_norm": 2.78125,
      "learning_rate": 5.3955050870866665e-06,
      "loss": 0.9229,
      "step": 772850
    },
    {
      "epoch": 2.7086816179331086,
      "grad_norm": 2.8125,
      "learning_rate": 5.3948560584229645e-06,
      "loss": 0.7155,
      "step": 772860
    },
    {
      "epoch": 2.708716665440004,
      "grad_norm": 2.75,
      "learning_rate": 5.3942070297592625e-06,
      "loss": 0.8027,
      "step": 772870
    },
    {
      "epoch": 2.7087517129468996,
      "grad_norm": 3.34375,
      "learning_rate": 5.3935580010955605e-06,
      "loss": 0.833,
      "step": 772880
    },
    {
      "epoch": 2.708786760453795,
      "grad_norm": 2.75,
      "learning_rate": 5.3929089724318585e-06,
      "loss": 0.7534,
      "step": 772890
    },
    {
      "epoch": 2.7088218079606907,
      "grad_norm": 2.65625,
      "learning_rate": 5.3922599437681565e-06,
      "loss": 0.7912,
      "step": 772900
    },
    {
      "epoch": 2.7088568554675865,
      "grad_norm": 2.859375,
      "learning_rate": 5.391610915104455e-06,
      "loss": 0.802,
      "step": 772910
    },
    {
      "epoch": 2.708891902974482,
      "grad_norm": 2.703125,
      "learning_rate": 5.390961886440753e-06,
      "loss": 0.8211,
      "step": 772920
    },
    {
      "epoch": 2.7089269504813775,
      "grad_norm": 2.9375,
      "learning_rate": 5.390312857777051e-06,
      "loss": 0.8783,
      "step": 772930
    },
    {
      "epoch": 2.708961997988273,
      "grad_norm": 2.921875,
      "learning_rate": 5.389663829113349e-06,
      "loss": 0.7813,
      "step": 772940
    },
    {
      "epoch": 2.7089970454951686,
      "grad_norm": 3.15625,
      "learning_rate": 5.389014800449647e-06,
      "loss": 0.8262,
      "step": 772950
    },
    {
      "epoch": 2.7090320930020644,
      "grad_norm": 2.765625,
      "learning_rate": 5.388365771785945e-06,
      "loss": 0.7785,
      "step": 772960
    },
    {
      "epoch": 2.70906714050896,
      "grad_norm": 3.140625,
      "learning_rate": 5.387716743122243e-06,
      "loss": 0.8024,
      "step": 772970
    },
    {
      "epoch": 2.7091021880158555,
      "grad_norm": 2.5625,
      "learning_rate": 5.387067714458541e-06,
      "loss": 0.8064,
      "step": 772980
    },
    {
      "epoch": 2.709137235522751,
      "grad_norm": 3.21875,
      "learning_rate": 5.386418685794839e-06,
      "loss": 0.8047,
      "step": 772990
    },
    {
      "epoch": 2.7091722830296465,
      "grad_norm": 2.875,
      "learning_rate": 5.385769657131138e-06,
      "loss": 0.7921,
      "step": 773000
    },
    {
      "epoch": 2.7092073305365423,
      "grad_norm": 2.8125,
      "learning_rate": 5.385120628467436e-06,
      "loss": 0.7447,
      "step": 773010
    },
    {
      "epoch": 2.709242378043438,
      "grad_norm": 3.28125,
      "learning_rate": 5.384471599803733e-06,
      "loss": 0.8576,
      "step": 773020
    },
    {
      "epoch": 2.7092774255503334,
      "grad_norm": 3.234375,
      "learning_rate": 5.383822571140032e-06,
      "loss": 0.8369,
      "step": 773030
    },
    {
      "epoch": 2.709312473057229,
      "grad_norm": 2.640625,
      "learning_rate": 5.38317354247633e-06,
      "loss": 0.8687,
      "step": 773040
    },
    {
      "epoch": 2.7093475205641244,
      "grad_norm": 2.640625,
      "learning_rate": 5.382524513812628e-06,
      "loss": 0.7614,
      "step": 773050
    },
    {
      "epoch": 2.70938256807102,
      "grad_norm": 2.609375,
      "learning_rate": 5.381875485148927e-06,
      "loss": 0.8199,
      "step": 773060
    },
    {
      "epoch": 2.709417615577916,
      "grad_norm": 3.4375,
      "learning_rate": 5.381226456485224e-06,
      "loss": 0.7645,
      "step": 773070
    },
    {
      "epoch": 2.7094526630848117,
      "grad_norm": 2.921875,
      "learning_rate": 5.380577427821522e-06,
      "loss": 0.8241,
      "step": 773080
    },
    {
      "epoch": 2.709487710591707,
      "grad_norm": 2.59375,
      "learning_rate": 5.379928399157821e-06,
      "loss": 0.7437,
      "step": 773090
    },
    {
      "epoch": 2.7095227580986028,
      "grad_norm": 2.984375,
      "learning_rate": 5.379279370494119e-06,
      "loss": 0.9011,
      "step": 773100
    },
    {
      "epoch": 2.709557805605498,
      "grad_norm": 3.1875,
      "learning_rate": 5.378630341830416e-06,
      "loss": 0.8381,
      "step": 773110
    },
    {
      "epoch": 2.709592853112394,
      "grad_norm": 2.875,
      "learning_rate": 5.377981313166715e-06,
      "loss": 0.783,
      "step": 773120
    },
    {
      "epoch": 2.7096279006192896,
      "grad_norm": 2.71875,
      "learning_rate": 5.377332284503013e-06,
      "loss": 0.7864,
      "step": 773130
    },
    {
      "epoch": 2.709662948126185,
      "grad_norm": 2.859375,
      "learning_rate": 5.376683255839311e-06,
      "loss": 0.7763,
      "step": 773140
    },
    {
      "epoch": 2.7096979956330807,
      "grad_norm": 2.5625,
      "learning_rate": 5.37603422717561e-06,
      "loss": 0.8221,
      "step": 773150
    },
    {
      "epoch": 2.709733043139976,
      "grad_norm": 3.078125,
      "learning_rate": 5.375385198511907e-06,
      "loss": 0.774,
      "step": 773160
    },
    {
      "epoch": 2.7097680906468717,
      "grad_norm": 2.75,
      "learning_rate": 5.374736169848205e-06,
      "loss": 0.8303,
      "step": 773170
    },
    {
      "epoch": 2.7098031381537675,
      "grad_norm": 3.296875,
      "learning_rate": 5.374087141184504e-06,
      "loss": 0.8183,
      "step": 773180
    },
    {
      "epoch": 2.7098381856606633,
      "grad_norm": 3.03125,
      "learning_rate": 5.373438112520802e-06,
      "loss": 0.849,
      "step": 773190
    },
    {
      "epoch": 2.7098732331675586,
      "grad_norm": 3.1875,
      "learning_rate": 5.3727890838571e-06,
      "loss": 0.7645,
      "step": 773200
    },
    {
      "epoch": 2.7099082806744543,
      "grad_norm": 2.796875,
      "learning_rate": 5.372140055193398e-06,
      "loss": 0.8215,
      "step": 773210
    },
    {
      "epoch": 2.7099433281813496,
      "grad_norm": 3.09375,
      "learning_rate": 5.371491026529696e-06,
      "loss": 0.7796,
      "step": 773220
    },
    {
      "epoch": 2.7099783756882454,
      "grad_norm": 2.609375,
      "learning_rate": 5.370841997865994e-06,
      "loss": 0.754,
      "step": 773230
    },
    {
      "epoch": 2.710013423195141,
      "grad_norm": 2.609375,
      "learning_rate": 5.370192969202293e-06,
      "loss": 0.7708,
      "step": 773240
    },
    {
      "epoch": 2.7100484707020365,
      "grad_norm": 2.703125,
      "learning_rate": 5.36954394053859e-06,
      "loss": 0.7628,
      "step": 773250
    },
    {
      "epoch": 2.7100835182089322,
      "grad_norm": 2.96875,
      "learning_rate": 5.368894911874888e-06,
      "loss": 0.7904,
      "step": 773260
    },
    {
      "epoch": 2.7101185657158275,
      "grad_norm": 2.140625,
      "learning_rate": 5.368245883211187e-06,
      "loss": 0.8389,
      "step": 773270
    },
    {
      "epoch": 2.7101536132227233,
      "grad_norm": 3.328125,
      "learning_rate": 5.367596854547485e-06,
      "loss": 0.7786,
      "step": 773280
    },
    {
      "epoch": 2.710188660729619,
      "grad_norm": 2.78125,
      "learning_rate": 5.366947825883783e-06,
      "loss": 0.7891,
      "step": 773290
    },
    {
      "epoch": 2.710223708236515,
      "grad_norm": 2.90625,
      "learning_rate": 5.366298797220081e-06,
      "loss": 0.8794,
      "step": 773300
    },
    {
      "epoch": 2.71025875574341,
      "grad_norm": 2.953125,
      "learning_rate": 5.365649768556379e-06,
      "loss": 0.7969,
      "step": 773310
    },
    {
      "epoch": 2.710293803250306,
      "grad_norm": 3.15625,
      "learning_rate": 5.365000739892677e-06,
      "loss": 0.8333,
      "step": 773320
    },
    {
      "epoch": 2.710328850757201,
      "grad_norm": 3.0,
      "learning_rate": 5.364351711228975e-06,
      "loss": 0.7377,
      "step": 773330
    },
    {
      "epoch": 2.710363898264097,
      "grad_norm": 3.046875,
      "learning_rate": 5.3637026825652736e-06,
      "loss": 0.7836,
      "step": 773340
    },
    {
      "epoch": 2.7103989457709927,
      "grad_norm": 2.765625,
      "learning_rate": 5.363053653901571e-06,
      "loss": 0.8123,
      "step": 773350
    },
    {
      "epoch": 2.710433993277888,
      "grad_norm": 3.25,
      "learning_rate": 5.3624046252378696e-06,
      "loss": 0.8373,
      "step": 773360
    },
    {
      "epoch": 2.710469040784784,
      "grad_norm": 2.953125,
      "learning_rate": 5.3617555965741676e-06,
      "loss": 0.8152,
      "step": 773370
    },
    {
      "epoch": 2.710504088291679,
      "grad_norm": 3.171875,
      "learning_rate": 5.3611065679104656e-06,
      "loss": 0.7412,
      "step": 773380
    },
    {
      "epoch": 2.710539135798575,
      "grad_norm": 3.390625,
      "learning_rate": 5.3604575392467636e-06,
      "loss": 0.7788,
      "step": 773390
    },
    {
      "epoch": 2.7105741833054706,
      "grad_norm": 2.375,
      "learning_rate": 5.3598085105830616e-06,
      "loss": 0.8326,
      "step": 773400
    },
    {
      "epoch": 2.7106092308123664,
      "grad_norm": 3.046875,
      "learning_rate": 5.3591594819193596e-06,
      "loss": 0.7051,
      "step": 773410
    },
    {
      "epoch": 2.7106442783192617,
      "grad_norm": 3.03125,
      "learning_rate": 5.3585104532556576e-06,
      "loss": 0.8444,
      "step": 773420
    },
    {
      "epoch": 2.7106793258261574,
      "grad_norm": 3.0,
      "learning_rate": 5.357861424591956e-06,
      "loss": 0.7977,
      "step": 773430
    },
    {
      "epoch": 2.7107143733330528,
      "grad_norm": 2.890625,
      "learning_rate": 5.3572123959282536e-06,
      "loss": 0.8464,
      "step": 773440
    },
    {
      "epoch": 2.7107494208399485,
      "grad_norm": 2.65625,
      "learning_rate": 5.3565633672645516e-06,
      "loss": 0.8282,
      "step": 773450
    },
    {
      "epoch": 2.7107844683468443,
      "grad_norm": 2.90625,
      "learning_rate": 5.35591433860085e-06,
      "loss": 0.8837,
      "step": 773460
    },
    {
      "epoch": 2.7108195158537396,
      "grad_norm": 2.890625,
      "learning_rate": 5.355265309937148e-06,
      "loss": 0.8041,
      "step": 773470
    },
    {
      "epoch": 2.7108545633606353,
      "grad_norm": 2.890625,
      "learning_rate": 5.354616281273446e-06,
      "loss": 0.7915,
      "step": 773480
    },
    {
      "epoch": 2.7108896108675307,
      "grad_norm": 3.390625,
      "learning_rate": 5.353967252609744e-06,
      "loss": 0.8587,
      "step": 773490
    },
    {
      "epoch": 2.7109246583744264,
      "grad_norm": 3.21875,
      "learning_rate": 5.353318223946042e-06,
      "loss": 0.7847,
      "step": 773500
    },
    {
      "epoch": 2.710959705881322,
      "grad_norm": 2.671875,
      "learning_rate": 5.35266919528234e-06,
      "loss": 0.7936,
      "step": 773510
    },
    {
      "epoch": 2.710994753388218,
      "grad_norm": 2.921875,
      "learning_rate": 5.352020166618639e-06,
      "loss": 0.7821,
      "step": 773520
    },
    {
      "epoch": 2.7110298008951133,
      "grad_norm": 3.046875,
      "learning_rate": 5.351371137954937e-06,
      "loss": 0.8432,
      "step": 773530
    },
    {
      "epoch": 2.711064848402009,
      "grad_norm": 2.796875,
      "learning_rate": 5.350722109291234e-06,
      "loss": 0.7568,
      "step": 773540
    },
    {
      "epoch": 2.7110998959089043,
      "grad_norm": 3.25,
      "learning_rate": 5.350073080627533e-06,
      "loss": 0.7279,
      "step": 773550
    },
    {
      "epoch": 2.7111349434158,
      "grad_norm": 3.25,
      "learning_rate": 5.349424051963831e-06,
      "loss": 0.8724,
      "step": 773560
    },
    {
      "epoch": 2.711169990922696,
      "grad_norm": 2.78125,
      "learning_rate": 5.348775023300129e-06,
      "loss": 0.7925,
      "step": 773570
    },
    {
      "epoch": 2.711205038429591,
      "grad_norm": 2.921875,
      "learning_rate": 5.348125994636427e-06,
      "loss": 0.7878,
      "step": 773580
    },
    {
      "epoch": 2.711240085936487,
      "grad_norm": 3.078125,
      "learning_rate": 5.347476965972725e-06,
      "loss": 0.7589,
      "step": 773590
    },
    {
      "epoch": 2.7112751334433827,
      "grad_norm": 2.984375,
      "learning_rate": 5.346827937309023e-06,
      "loss": 0.7403,
      "step": 773600
    },
    {
      "epoch": 2.711310180950278,
      "grad_norm": 2.703125,
      "learning_rate": 5.346178908645322e-06,
      "loss": 0.7504,
      "step": 773610
    },
    {
      "epoch": 2.7113452284571737,
      "grad_norm": 3.078125,
      "learning_rate": 5.34552987998162e-06,
      "loss": 0.7807,
      "step": 773620
    },
    {
      "epoch": 2.7113802759640695,
      "grad_norm": 2.984375,
      "learning_rate": 5.344880851317917e-06,
      "loss": 0.7605,
      "step": 773630
    },
    {
      "epoch": 2.711415323470965,
      "grad_norm": 2.875,
      "learning_rate": 5.344231822654216e-06,
      "loss": 0.8682,
      "step": 773640
    },
    {
      "epoch": 2.7114503709778606,
      "grad_norm": 2.8125,
      "learning_rate": 5.343582793990514e-06,
      "loss": 0.7453,
      "step": 773650
    },
    {
      "epoch": 2.711485418484756,
      "grad_norm": 2.96875,
      "learning_rate": 5.342933765326812e-06,
      "loss": 0.9029,
      "step": 773660
    },
    {
      "epoch": 2.7115204659916516,
      "grad_norm": 2.875,
      "learning_rate": 5.342284736663111e-06,
      "loss": 0.7873,
      "step": 773670
    },
    {
      "epoch": 2.7115555134985474,
      "grad_norm": 2.46875,
      "learning_rate": 5.341635707999408e-06,
      "loss": 0.7966,
      "step": 773680
    },
    {
      "epoch": 2.7115905610054427,
      "grad_norm": 2.84375,
      "learning_rate": 5.340986679335706e-06,
      "loss": 0.8196,
      "step": 773690
    },
    {
      "epoch": 2.7116256085123385,
      "grad_norm": 2.78125,
      "learning_rate": 5.340337650672005e-06,
      "loss": 0.7551,
      "step": 773700
    },
    {
      "epoch": 2.7116606560192342,
      "grad_norm": 3.09375,
      "learning_rate": 5.339688622008303e-06,
      "loss": 0.8116,
      "step": 773710
    },
    {
      "epoch": 2.7116957035261295,
      "grad_norm": 2.90625,
      "learning_rate": 5.3390395933446e-06,
      "loss": 0.8105,
      "step": 773720
    },
    {
      "epoch": 2.7117307510330253,
      "grad_norm": 3.296875,
      "learning_rate": 5.338390564680899e-06,
      "loss": 0.8084,
      "step": 773730
    },
    {
      "epoch": 2.711765798539921,
      "grad_norm": 2.40625,
      "learning_rate": 5.337741536017197e-06,
      "loss": 0.8066,
      "step": 773740
    },
    {
      "epoch": 2.7118008460468164,
      "grad_norm": 3.28125,
      "learning_rate": 5.337092507353495e-06,
      "loss": 0.7735,
      "step": 773750
    },
    {
      "epoch": 2.711835893553712,
      "grad_norm": 2.859375,
      "learning_rate": 5.336443478689793e-06,
      "loss": 0.8408,
      "step": 773760
    },
    {
      "epoch": 2.7118709410606074,
      "grad_norm": 3.59375,
      "learning_rate": 5.335794450026091e-06,
      "loss": 0.8509,
      "step": 773770
    },
    {
      "epoch": 2.711905988567503,
      "grad_norm": 2.578125,
      "learning_rate": 5.335145421362389e-06,
      "loss": 0.753,
      "step": 773780
    },
    {
      "epoch": 2.711941036074399,
      "grad_norm": 3.125,
      "learning_rate": 5.334496392698688e-06,
      "loss": 0.9145,
      "step": 773790
    },
    {
      "epoch": 2.7119760835812947,
      "grad_norm": 2.859375,
      "learning_rate": 5.333847364034986e-06,
      "loss": 0.8529,
      "step": 773800
    },
    {
      "epoch": 2.71201113108819,
      "grad_norm": 2.96875,
      "learning_rate": 5.333198335371284e-06,
      "loss": 0.8087,
      "step": 773810
    },
    {
      "epoch": 2.712046178595086,
      "grad_norm": 3.234375,
      "learning_rate": 5.332549306707582e-06,
      "loss": 0.7416,
      "step": 773820
    },
    {
      "epoch": 2.712081226101981,
      "grad_norm": 2.609375,
      "learning_rate": 5.33190027804388e-06,
      "loss": 0.7757,
      "step": 773830
    },
    {
      "epoch": 2.712116273608877,
      "grad_norm": 2.109375,
      "learning_rate": 5.331251249380178e-06,
      "loss": 0.758,
      "step": 773840
    },
    {
      "epoch": 2.7121513211157726,
      "grad_norm": 3.015625,
      "learning_rate": 5.330602220716476e-06,
      "loss": 0.7105,
      "step": 773850
    },
    {
      "epoch": 2.712186368622668,
      "grad_norm": 2.453125,
      "learning_rate": 5.329953192052775e-06,
      "loss": 0.7904,
      "step": 773860
    },
    {
      "epoch": 2.7122214161295637,
      "grad_norm": 2.796875,
      "learning_rate": 5.329304163389072e-06,
      "loss": 0.8108,
      "step": 773870
    },
    {
      "epoch": 2.712256463636459,
      "grad_norm": 3.234375,
      "learning_rate": 5.32865513472537e-06,
      "loss": 0.7891,
      "step": 773880
    },
    {
      "epoch": 2.7122915111433548,
      "grad_norm": 2.640625,
      "learning_rate": 5.328006106061669e-06,
      "loss": 0.8226,
      "step": 773890
    },
    {
      "epoch": 2.7123265586502505,
      "grad_norm": 3.25,
      "learning_rate": 5.327357077397967e-06,
      "loss": 0.8198,
      "step": 773900
    },
    {
      "epoch": 2.7123616061571463,
      "grad_norm": 3.3125,
      "learning_rate": 5.326708048734265e-06,
      "loss": 0.947,
      "step": 773910
    },
    {
      "epoch": 2.7123966536640416,
      "grad_norm": 2.640625,
      "learning_rate": 5.326059020070563e-06,
      "loss": 0.8216,
      "step": 773920
    },
    {
      "epoch": 2.7124317011709373,
      "grad_norm": 2.765625,
      "learning_rate": 5.325409991406861e-06,
      "loss": 0.7756,
      "step": 773930
    },
    {
      "epoch": 2.7124667486778327,
      "grad_norm": 2.78125,
      "learning_rate": 5.324760962743159e-06,
      "loss": 0.8065,
      "step": 773940
    },
    {
      "epoch": 2.7125017961847284,
      "grad_norm": 2.765625,
      "learning_rate": 5.3241119340794575e-06,
      "loss": 0.7548,
      "step": 773950
    },
    {
      "epoch": 2.712536843691624,
      "grad_norm": 2.65625,
      "learning_rate": 5.323462905415755e-06,
      "loss": 0.7611,
      "step": 773960
    },
    {
      "epoch": 2.7125718911985195,
      "grad_norm": 3.375,
      "learning_rate": 5.322813876752053e-06,
      "loss": 0.7907,
      "step": 773970
    },
    {
      "epoch": 2.7126069387054152,
      "grad_norm": 2.890625,
      "learning_rate": 5.3221648480883515e-06,
      "loss": 0.8521,
      "step": 773980
    },
    {
      "epoch": 2.7126419862123106,
      "grad_norm": 2.875,
      "learning_rate": 5.3215158194246495e-06,
      "loss": 0.7245,
      "step": 773990
    },
    {
      "epoch": 2.7126770337192063,
      "grad_norm": 2.546875,
      "learning_rate": 5.3208667907609475e-06,
      "loss": 0.771,
      "step": 774000
    },
    {
      "epoch": 2.712712081226102,
      "grad_norm": 2.890625,
      "learning_rate": 5.3202177620972455e-06,
      "loss": 0.7686,
      "step": 774010
    },
    {
      "epoch": 2.712747128732998,
      "grad_norm": 3.34375,
      "learning_rate": 5.3195687334335435e-06,
      "loss": 0.7614,
      "step": 774020
    },
    {
      "epoch": 2.712782176239893,
      "grad_norm": 3.046875,
      "learning_rate": 5.3189197047698415e-06,
      "loss": 0.8245,
      "step": 774030
    },
    {
      "epoch": 2.712817223746789,
      "grad_norm": 2.609375,
      "learning_rate": 5.31827067610614e-06,
      "loss": 0.8399,
      "step": 774040
    },
    {
      "epoch": 2.7128522712536842,
      "grad_norm": 3.0,
      "learning_rate": 5.3176216474424375e-06,
      "loss": 0.8238,
      "step": 774050
    },
    {
      "epoch": 2.71288731876058,
      "grad_norm": 2.515625,
      "learning_rate": 5.3169726187787355e-06,
      "loss": 0.7959,
      "step": 774060
    },
    {
      "epoch": 2.7129223662674757,
      "grad_norm": 2.84375,
      "learning_rate": 5.316323590115034e-06,
      "loss": 0.7817,
      "step": 774070
    },
    {
      "epoch": 2.712957413774371,
      "grad_norm": 3.015625,
      "learning_rate": 5.315674561451332e-06,
      "loss": 0.7648,
      "step": 774080
    },
    {
      "epoch": 2.712992461281267,
      "grad_norm": 2.71875,
      "learning_rate": 5.31502553278763e-06,
      "loss": 0.8075,
      "step": 774090
    },
    {
      "epoch": 2.713027508788162,
      "grad_norm": 2.84375,
      "learning_rate": 5.314376504123928e-06,
      "loss": 0.7925,
      "step": 774100
    },
    {
      "epoch": 2.713062556295058,
      "grad_norm": 3.046875,
      "learning_rate": 5.313727475460226e-06,
      "loss": 0.7699,
      "step": 774110
    },
    {
      "epoch": 2.7130976038019536,
      "grad_norm": 3.515625,
      "learning_rate": 5.313078446796524e-06,
      "loss": 0.769,
      "step": 774120
    },
    {
      "epoch": 2.7131326513088494,
      "grad_norm": 3.109375,
      "learning_rate": 5.312429418132823e-06,
      "loss": 0.8542,
      "step": 774130
    },
    {
      "epoch": 2.7131676988157447,
      "grad_norm": 3.046875,
      "learning_rate": 5.311780389469121e-06,
      "loss": 0.8419,
      "step": 774140
    },
    {
      "epoch": 2.7132027463226405,
      "grad_norm": 2.828125,
      "learning_rate": 5.311131360805418e-06,
      "loss": 0.865,
      "step": 774150
    },
    {
      "epoch": 2.713237793829536,
      "grad_norm": 3.390625,
      "learning_rate": 5.310482332141717e-06,
      "loss": 0.8675,
      "step": 774160
    },
    {
      "epoch": 2.7132728413364315,
      "grad_norm": 2.9375,
      "learning_rate": 5.309833303478015e-06,
      "loss": 0.8461,
      "step": 774170
    },
    {
      "epoch": 2.7133078888433273,
      "grad_norm": 3.171875,
      "learning_rate": 5.309184274814313e-06,
      "loss": 0.7798,
      "step": 774180
    },
    {
      "epoch": 2.7133429363502226,
      "grad_norm": 3.09375,
      "learning_rate": 5.308535246150611e-06,
      "loss": 0.8862,
      "step": 774190
    },
    {
      "epoch": 2.7133779838571184,
      "grad_norm": 3.125,
      "learning_rate": 5.307886217486909e-06,
      "loss": 0.7915,
      "step": 774200
    },
    {
      "epoch": 2.7134130313640137,
      "grad_norm": 2.78125,
      "learning_rate": 5.307237188823207e-06,
      "loss": 0.7628,
      "step": 774210
    },
    {
      "epoch": 2.7134480788709094,
      "grad_norm": 2.65625,
      "learning_rate": 5.306588160159506e-06,
      "loss": 0.7523,
      "step": 774220
    },
    {
      "epoch": 2.713483126377805,
      "grad_norm": 2.8125,
      "learning_rate": 5.305939131495804e-06,
      "loss": 0.8403,
      "step": 774230
    },
    {
      "epoch": 2.713518173884701,
      "grad_norm": 2.75,
      "learning_rate": 5.305290102832101e-06,
      "loss": 0.8285,
      "step": 774240
    },
    {
      "epoch": 2.7135532213915963,
      "grad_norm": 2.796875,
      "learning_rate": 5.3046410741684e-06,
      "loss": 0.8439,
      "step": 774250
    },
    {
      "epoch": 2.713588268898492,
      "grad_norm": 2.78125,
      "learning_rate": 5.303992045504698e-06,
      "loss": 0.7812,
      "step": 774260
    },
    {
      "epoch": 2.7136233164053873,
      "grad_norm": 2.65625,
      "learning_rate": 5.303343016840996e-06,
      "loss": 0.7813,
      "step": 774270
    },
    {
      "epoch": 2.713658363912283,
      "grad_norm": 3.015625,
      "learning_rate": 5.302693988177294e-06,
      "loss": 0.8102,
      "step": 774280
    },
    {
      "epoch": 2.713693411419179,
      "grad_norm": 2.40625,
      "learning_rate": 5.302044959513592e-06,
      "loss": 0.7877,
      "step": 774290
    },
    {
      "epoch": 2.713728458926074,
      "grad_norm": 3.40625,
      "learning_rate": 5.30139593084989e-06,
      "loss": 0.8293,
      "step": 774300
    },
    {
      "epoch": 2.71376350643297,
      "grad_norm": 3.03125,
      "learning_rate": 5.300746902186188e-06,
      "loss": 0.8002,
      "step": 774310
    },
    {
      "epoch": 2.7137985539398652,
      "grad_norm": 2.9375,
      "learning_rate": 5.300097873522487e-06,
      "loss": 0.74,
      "step": 774320
    },
    {
      "epoch": 2.713833601446761,
      "grad_norm": 2.484375,
      "learning_rate": 5.299448844858785e-06,
      "loss": 0.7143,
      "step": 774330
    },
    {
      "epoch": 2.7138686489536568,
      "grad_norm": 3.171875,
      "learning_rate": 5.298799816195083e-06,
      "loss": 0.7999,
      "step": 774340
    },
    {
      "epoch": 2.7139036964605525,
      "grad_norm": 3.15625,
      "learning_rate": 5.298150787531381e-06,
      "loss": 0.815,
      "step": 774350
    },
    {
      "epoch": 2.713938743967448,
      "grad_norm": 2.921875,
      "learning_rate": 5.297501758867679e-06,
      "loss": 0.837,
      "step": 774360
    },
    {
      "epoch": 2.7139737914743436,
      "grad_norm": 2.671875,
      "learning_rate": 5.296852730203977e-06,
      "loss": 0.8379,
      "step": 774370
    },
    {
      "epoch": 2.714008838981239,
      "grad_norm": 2.859375,
      "learning_rate": 5.296203701540275e-06,
      "loss": 0.8109,
      "step": 774380
    },
    {
      "epoch": 2.7140438864881347,
      "grad_norm": 3.140625,
      "learning_rate": 5.295554672876573e-06,
      "loss": 0.8366,
      "step": 774390
    },
    {
      "epoch": 2.7140789339950304,
      "grad_norm": 2.90625,
      "learning_rate": 5.294905644212871e-06,
      "loss": 0.7509,
      "step": 774400
    },
    {
      "epoch": 2.7141139815019257,
      "grad_norm": 3.578125,
      "learning_rate": 5.29425661554917e-06,
      "loss": 0.786,
      "step": 774410
    },
    {
      "epoch": 2.7141490290088215,
      "grad_norm": 3.3125,
      "learning_rate": 5.293607586885468e-06,
      "loss": 0.8156,
      "step": 774420
    },
    {
      "epoch": 2.714184076515717,
      "grad_norm": 3.015625,
      "learning_rate": 5.292958558221765e-06,
      "loss": 0.6874,
      "step": 774430
    },
    {
      "epoch": 2.7142191240226126,
      "grad_norm": 3.03125,
      "learning_rate": 5.292309529558064e-06,
      "loss": 0.8314,
      "step": 774440
    },
    {
      "epoch": 2.7142541715295083,
      "grad_norm": 2.6875,
      "learning_rate": 5.291660500894362e-06,
      "loss": 0.798,
      "step": 774450
    },
    {
      "epoch": 2.714289219036404,
      "grad_norm": 2.796875,
      "learning_rate": 5.29101147223066e-06,
      "loss": 0.8327,
      "step": 774460
    },
    {
      "epoch": 2.7143242665432994,
      "grad_norm": 2.9375,
      "learning_rate": 5.2903624435669586e-06,
      "loss": 0.7722,
      "step": 774470
    },
    {
      "epoch": 2.714359314050195,
      "grad_norm": 2.609375,
      "learning_rate": 5.289713414903256e-06,
      "loss": 0.7714,
      "step": 774480
    },
    {
      "epoch": 2.7143943615570905,
      "grad_norm": 2.96875,
      "learning_rate": 5.289064386239554e-06,
      "loss": 0.8371,
      "step": 774490
    },
    {
      "epoch": 2.714429409063986,
      "grad_norm": 2.9375,
      "learning_rate": 5.2884153575758526e-06,
      "loss": 0.823,
      "step": 774500
    },
    {
      "epoch": 2.714464456570882,
      "grad_norm": 2.75,
      "learning_rate": 5.2877663289121506e-06,
      "loss": 0.7489,
      "step": 774510
    },
    {
      "epoch": 2.7144995040777773,
      "grad_norm": 3.015625,
      "learning_rate": 5.287117300248448e-06,
      "loss": 0.7601,
      "step": 774520
    },
    {
      "epoch": 2.714534551584673,
      "grad_norm": 2.90625,
      "learning_rate": 5.2864682715847466e-06,
      "loss": 0.8043,
      "step": 774530
    },
    {
      "epoch": 2.7145695990915684,
      "grad_norm": 3.15625,
      "learning_rate": 5.2858192429210446e-06,
      "loss": 0.8063,
      "step": 774540
    },
    {
      "epoch": 2.714604646598464,
      "grad_norm": 2.90625,
      "learning_rate": 5.2851702142573426e-06,
      "loss": 0.8615,
      "step": 774550
    },
    {
      "epoch": 2.71463969410536,
      "grad_norm": 3.03125,
      "learning_rate": 5.284521185593641e-06,
      "loss": 0.8229,
      "step": 774560
    },
    {
      "epoch": 2.7146747416122556,
      "grad_norm": 2.53125,
      "learning_rate": 5.2838721569299386e-06,
      "loss": 0.7832,
      "step": 774570
    },
    {
      "epoch": 2.714709789119151,
      "grad_norm": 3.34375,
      "learning_rate": 5.2832231282662366e-06,
      "loss": 0.7741,
      "step": 774580
    },
    {
      "epoch": 2.7147448366260467,
      "grad_norm": 2.78125,
      "learning_rate": 5.282574099602535e-06,
      "loss": 0.8175,
      "step": 774590
    },
    {
      "epoch": 2.714779884132942,
      "grad_norm": 2.703125,
      "learning_rate": 5.281925070938833e-06,
      "loss": 0.8592,
      "step": 774600
    },
    {
      "epoch": 2.714814931639838,
      "grad_norm": 3.140625,
      "learning_rate": 5.281276042275131e-06,
      "loss": 0.8858,
      "step": 774610
    },
    {
      "epoch": 2.7148499791467335,
      "grad_norm": 2.96875,
      "learning_rate": 5.280627013611429e-06,
      "loss": 0.8415,
      "step": 774620
    },
    {
      "epoch": 2.714885026653629,
      "grad_norm": 3.0,
      "learning_rate": 5.279977984947727e-06,
      "loss": 0.7458,
      "step": 774630
    },
    {
      "epoch": 2.7149200741605246,
      "grad_norm": 2.4375,
      "learning_rate": 5.279328956284025e-06,
      "loss": 0.7748,
      "step": 774640
    },
    {
      "epoch": 2.71495512166742,
      "grad_norm": 2.625,
      "learning_rate": 5.278679927620324e-06,
      "loss": 0.8185,
      "step": 774650
    },
    {
      "epoch": 2.7149901691743157,
      "grad_norm": 3.078125,
      "learning_rate": 5.278030898956622e-06,
      "loss": 0.7997,
      "step": 774660
    },
    {
      "epoch": 2.7150252166812114,
      "grad_norm": 2.421875,
      "learning_rate": 5.277381870292919e-06,
      "loss": 0.7788,
      "step": 774670
    },
    {
      "epoch": 2.715060264188107,
      "grad_norm": 2.9375,
      "learning_rate": 5.276732841629218e-06,
      "loss": 0.7904,
      "step": 774680
    },
    {
      "epoch": 2.7150953116950025,
      "grad_norm": 3.171875,
      "learning_rate": 5.276083812965516e-06,
      "loss": 0.8018,
      "step": 774690
    },
    {
      "epoch": 2.7151303592018983,
      "grad_norm": 3.203125,
      "learning_rate": 5.275434784301814e-06,
      "loss": 0.783,
      "step": 774700
    },
    {
      "epoch": 2.7151654067087936,
      "grad_norm": 3.125,
      "learning_rate": 5.274785755638112e-06,
      "loss": 0.7618,
      "step": 774710
    },
    {
      "epoch": 2.7152004542156893,
      "grad_norm": 2.859375,
      "learning_rate": 5.27413672697441e-06,
      "loss": 0.8359,
      "step": 774720
    },
    {
      "epoch": 2.715235501722585,
      "grad_norm": 3.25,
      "learning_rate": 5.273487698310708e-06,
      "loss": 0.7969,
      "step": 774730
    },
    {
      "epoch": 2.7152705492294804,
      "grad_norm": 3.4375,
      "learning_rate": 5.272838669647007e-06,
      "loss": 0.8245,
      "step": 774740
    },
    {
      "epoch": 2.715305596736376,
      "grad_norm": 3.328125,
      "learning_rate": 5.272189640983305e-06,
      "loss": 0.7551,
      "step": 774750
    },
    {
      "epoch": 2.7153406442432715,
      "grad_norm": 3.09375,
      "learning_rate": 5.271540612319602e-06,
      "loss": 0.7566,
      "step": 774760
    },
    {
      "epoch": 2.7153756917501672,
      "grad_norm": 3.265625,
      "learning_rate": 5.270891583655901e-06,
      "loss": 0.8527,
      "step": 774770
    },
    {
      "epoch": 2.715410739257063,
      "grad_norm": 2.8125,
      "learning_rate": 5.270242554992199e-06,
      "loss": 0.8518,
      "step": 774780
    },
    {
      "epoch": 2.7154457867639588,
      "grad_norm": 2.921875,
      "learning_rate": 5.269593526328497e-06,
      "loss": 0.7704,
      "step": 774790
    },
    {
      "epoch": 2.715480834270854,
      "grad_norm": 3.0625,
      "learning_rate": 5.268944497664795e-06,
      "loss": 0.8502,
      "step": 774800
    },
    {
      "epoch": 2.71551588177775,
      "grad_norm": 3.1875,
      "learning_rate": 5.268295469001093e-06,
      "loss": 0.789,
      "step": 774810
    },
    {
      "epoch": 2.715550929284645,
      "grad_norm": 3.3125,
      "learning_rate": 5.267646440337391e-06,
      "loss": 0.7792,
      "step": 774820
    },
    {
      "epoch": 2.715585976791541,
      "grad_norm": 2.75,
      "learning_rate": 5.266997411673689e-06,
      "loss": 0.7524,
      "step": 774830
    },
    {
      "epoch": 2.7156210242984367,
      "grad_norm": 3.25,
      "learning_rate": 5.266348383009988e-06,
      "loss": 0.7649,
      "step": 774840
    },
    {
      "epoch": 2.715656071805332,
      "grad_norm": 3.09375,
      "learning_rate": 5.265699354346285e-06,
      "loss": 0.7946,
      "step": 774850
    },
    {
      "epoch": 2.7156911193122277,
      "grad_norm": 2.984375,
      "learning_rate": 5.265050325682584e-06,
      "loss": 0.799,
      "step": 774860
    },
    {
      "epoch": 2.715726166819123,
      "grad_norm": 3.390625,
      "learning_rate": 5.264401297018882e-06,
      "loss": 0.8354,
      "step": 774870
    },
    {
      "epoch": 2.715761214326019,
      "grad_norm": 3.109375,
      "learning_rate": 5.26375226835518e-06,
      "loss": 0.8589,
      "step": 774880
    },
    {
      "epoch": 2.7157962618329146,
      "grad_norm": 3.515625,
      "learning_rate": 5.263103239691478e-06,
      "loss": 0.8091,
      "step": 774890
    },
    {
      "epoch": 2.7158313093398103,
      "grad_norm": 3.25,
      "learning_rate": 5.262454211027776e-06,
      "loss": 0.7865,
      "step": 774900
    },
    {
      "epoch": 2.7158663568467056,
      "grad_norm": 2.59375,
      "learning_rate": 5.261805182364074e-06,
      "loss": 0.7531,
      "step": 774910
    },
    {
      "epoch": 2.7159014043536014,
      "grad_norm": 2.4375,
      "learning_rate": 5.261156153700372e-06,
      "loss": 0.8357,
      "step": 774920
    },
    {
      "epoch": 2.7159364518604967,
      "grad_norm": 2.90625,
      "learning_rate": 5.260507125036671e-06,
      "loss": 0.7818,
      "step": 774930
    },
    {
      "epoch": 2.7159714993673925,
      "grad_norm": 2.984375,
      "learning_rate": 5.259858096372969e-06,
      "loss": 0.7344,
      "step": 774940
    },
    {
      "epoch": 2.716006546874288,
      "grad_norm": 2.671875,
      "learning_rate": 5.259209067709266e-06,
      "loss": 0.7948,
      "step": 774950
    },
    {
      "epoch": 2.7160415943811835,
      "grad_norm": 2.875,
      "learning_rate": 5.258560039045565e-06,
      "loss": 0.723,
      "step": 774960
    },
    {
      "epoch": 2.7160766418880793,
      "grad_norm": 2.921875,
      "learning_rate": 5.257911010381863e-06,
      "loss": 0.8367,
      "step": 774970
    },
    {
      "epoch": 2.716111689394975,
      "grad_norm": 2.65625,
      "learning_rate": 5.257261981718161e-06,
      "loss": 0.8259,
      "step": 774980
    },
    {
      "epoch": 2.7161467369018704,
      "grad_norm": 2.6875,
      "learning_rate": 5.256612953054459e-06,
      "loss": 0.7899,
      "step": 774990
    },
    {
      "epoch": 2.716181784408766,
      "grad_norm": 3.234375,
      "learning_rate": 5.255963924390757e-06,
      "loss": 0.812,
      "step": 775000
    },
    {
      "epoch": 2.716181784408766,
      "eval_loss": 0.7533217072486877,
      "eval_runtime": 560.8671,
      "eval_samples_per_second": 678.3,
      "eval_steps_per_second": 56.525,
      "step": 775000
    },
    {
      "epoch": 2.716216831915662,
      "grad_norm": 2.859375,
      "learning_rate": 5.255314895727055e-06,
      "loss": 0.7432,
      "step": 775010
    },
    {
      "epoch": 2.716251879422557,
      "grad_norm": 3.0625,
      "learning_rate": 5.254665867063354e-06,
      "loss": 0.7862,
      "step": 775020
    },
    {
      "epoch": 2.716286926929453,
      "grad_norm": 3.125,
      "learning_rate": 5.254016838399652e-06,
      "loss": 0.8624,
      "step": 775030
    },
    {
      "epoch": 2.7163219744363483,
      "grad_norm": 2.640625,
      "learning_rate": 5.253367809735949e-06,
      "loss": 0.8029,
      "step": 775040
    },
    {
      "epoch": 2.716357021943244,
      "grad_norm": 3.09375,
      "learning_rate": 5.252718781072248e-06,
      "loss": 0.796,
      "step": 775050
    },
    {
      "epoch": 2.71639206945014,
      "grad_norm": 2.65625,
      "learning_rate": 5.252069752408546e-06,
      "loss": 0.7837,
      "step": 775060
    },
    {
      "epoch": 2.7164271169570355,
      "grad_norm": 2.515625,
      "learning_rate": 5.251420723744844e-06,
      "loss": 0.8277,
      "step": 775070
    },
    {
      "epoch": 2.716462164463931,
      "grad_norm": 2.921875,
      "learning_rate": 5.2507716950811425e-06,
      "loss": 0.9525,
      "step": 775080
    },
    {
      "epoch": 2.7164972119708266,
      "grad_norm": 2.890625,
      "learning_rate": 5.25012266641744e-06,
      "loss": 0.7786,
      "step": 775090
    },
    {
      "epoch": 2.716532259477722,
      "grad_norm": 2.84375,
      "learning_rate": 5.249473637753738e-06,
      "loss": 0.8443,
      "step": 775100
    },
    {
      "epoch": 2.7165673069846177,
      "grad_norm": 3.328125,
      "learning_rate": 5.2488246090900365e-06,
      "loss": 0.8456,
      "step": 775110
    },
    {
      "epoch": 2.7166023544915134,
      "grad_norm": 2.9375,
      "learning_rate": 5.2481755804263345e-06,
      "loss": 0.7733,
      "step": 775120
    },
    {
      "epoch": 2.7166374019984088,
      "grad_norm": 3.109375,
      "learning_rate": 5.2475265517626325e-06,
      "loss": 0.8826,
      "step": 775130
    },
    {
      "epoch": 2.7166724495053045,
      "grad_norm": 3.03125,
      "learning_rate": 5.2468775230989305e-06,
      "loss": 0.8329,
      "step": 775140
    },
    {
      "epoch": 2.7167074970122,
      "grad_norm": 2.6875,
      "learning_rate": 5.2462284944352285e-06,
      "loss": 0.7436,
      "step": 775150
    },
    {
      "epoch": 2.7167425445190956,
      "grad_norm": 3.171875,
      "learning_rate": 5.2455794657715265e-06,
      "loss": 0.7831,
      "step": 775160
    },
    {
      "epoch": 2.7167775920259913,
      "grad_norm": 2.515625,
      "learning_rate": 5.244930437107825e-06,
      "loss": 0.7398,
      "step": 775170
    },
    {
      "epoch": 2.716812639532887,
      "grad_norm": 3.265625,
      "learning_rate": 5.2442814084441225e-06,
      "loss": 0.7316,
      "step": 775180
    },
    {
      "epoch": 2.7168476870397824,
      "grad_norm": 2.71875,
      "learning_rate": 5.2436323797804205e-06,
      "loss": 0.7656,
      "step": 775190
    },
    {
      "epoch": 2.716882734546678,
      "grad_norm": 2.71875,
      "learning_rate": 5.242983351116719e-06,
      "loss": 0.77,
      "step": 775200
    },
    {
      "epoch": 2.7169177820535735,
      "grad_norm": 2.546875,
      "learning_rate": 5.242334322453017e-06,
      "loss": 0.7802,
      "step": 775210
    },
    {
      "epoch": 2.7169528295604692,
      "grad_norm": 3.03125,
      "learning_rate": 5.241685293789315e-06,
      "loss": 0.8109,
      "step": 775220
    },
    {
      "epoch": 2.716987877067365,
      "grad_norm": 3.3125,
      "learning_rate": 5.241036265125613e-06,
      "loss": 0.744,
      "step": 775230
    },
    {
      "epoch": 2.7170229245742603,
      "grad_norm": 2.375,
      "learning_rate": 5.240387236461911e-06,
      "loss": 0.8449,
      "step": 775240
    },
    {
      "epoch": 2.717057972081156,
      "grad_norm": 2.5625,
      "learning_rate": 5.239738207798209e-06,
      "loss": 0.7375,
      "step": 775250
    },
    {
      "epoch": 2.7170930195880514,
      "grad_norm": 3.0625,
      "learning_rate": 5.239089179134507e-06,
      "loss": 0.8131,
      "step": 775260
    },
    {
      "epoch": 2.717128067094947,
      "grad_norm": 3.015625,
      "learning_rate": 5.238440150470806e-06,
      "loss": 0.805,
      "step": 775270
    },
    {
      "epoch": 2.717163114601843,
      "grad_norm": 2.453125,
      "learning_rate": 5.237791121807103e-06,
      "loss": 0.7292,
      "step": 775280
    },
    {
      "epoch": 2.7171981621087387,
      "grad_norm": 3.25,
      "learning_rate": 5.237142093143402e-06,
      "loss": 0.8789,
      "step": 775290
    },
    {
      "epoch": 2.717233209615634,
      "grad_norm": 3.015625,
      "learning_rate": 5.2364930644797e-06,
      "loss": 0.814,
      "step": 775300
    },
    {
      "epoch": 2.7172682571225297,
      "grad_norm": 2.734375,
      "learning_rate": 5.235844035815998e-06,
      "loss": 0.8176,
      "step": 775310
    },
    {
      "epoch": 2.717303304629425,
      "grad_norm": 3.15625,
      "learning_rate": 5.235195007152296e-06,
      "loss": 0.753,
      "step": 775320
    },
    {
      "epoch": 2.717338352136321,
      "grad_norm": 3.109375,
      "learning_rate": 5.234545978488594e-06,
      "loss": 0.7862,
      "step": 775330
    },
    {
      "epoch": 2.7173733996432166,
      "grad_norm": 2.828125,
      "learning_rate": 5.233896949824892e-06,
      "loss": 0.8121,
      "step": 775340
    },
    {
      "epoch": 2.717408447150112,
      "grad_norm": 3.25,
      "learning_rate": 5.23324792116119e-06,
      "loss": 0.8241,
      "step": 775350
    },
    {
      "epoch": 2.7174434946570076,
      "grad_norm": 2.703125,
      "learning_rate": 5.232598892497489e-06,
      "loss": 0.7371,
      "step": 775360
    },
    {
      "epoch": 2.717478542163903,
      "grad_norm": 2.578125,
      "learning_rate": 5.231949863833786e-06,
      "loss": 0.7217,
      "step": 775370
    },
    {
      "epoch": 2.7175135896707987,
      "grad_norm": 3.140625,
      "learning_rate": 5.231300835170084e-06,
      "loss": 0.8139,
      "step": 775380
    },
    {
      "epoch": 2.7175486371776945,
      "grad_norm": 2.65625,
      "learning_rate": 5.230651806506383e-06,
      "loss": 0.8417,
      "step": 775390
    },
    {
      "epoch": 2.71758368468459,
      "grad_norm": 2.734375,
      "learning_rate": 5.230002777842681e-06,
      "loss": 0.8047,
      "step": 775400
    },
    {
      "epoch": 2.7176187321914855,
      "grad_norm": 2.5625,
      "learning_rate": 5.229353749178979e-06,
      "loss": 0.7942,
      "step": 775410
    },
    {
      "epoch": 2.7176537796983813,
      "grad_norm": 3.109375,
      "learning_rate": 5.228704720515277e-06,
      "loss": 0.8379,
      "step": 775420
    },
    {
      "epoch": 2.7176888272052766,
      "grad_norm": 2.84375,
      "learning_rate": 5.228055691851575e-06,
      "loss": 0.791,
      "step": 775430
    },
    {
      "epoch": 2.7177238747121724,
      "grad_norm": 2.84375,
      "learning_rate": 5.227406663187873e-06,
      "loss": 0.8133,
      "step": 775440
    },
    {
      "epoch": 2.717758922219068,
      "grad_norm": 3.1875,
      "learning_rate": 5.226757634524172e-06,
      "loss": 0.86,
      "step": 775450
    },
    {
      "epoch": 2.7177939697259634,
      "grad_norm": 2.828125,
      "learning_rate": 5.226108605860469e-06,
      "loss": 0.7936,
      "step": 775460
    },
    {
      "epoch": 2.717829017232859,
      "grad_norm": 3.03125,
      "learning_rate": 5.225459577196767e-06,
      "loss": 0.9015,
      "step": 775470
    },
    {
      "epoch": 2.7178640647397545,
      "grad_norm": 2.765625,
      "learning_rate": 5.224810548533066e-06,
      "loss": 0.7987,
      "step": 775480
    },
    {
      "epoch": 2.7178991122466503,
      "grad_norm": 2.734375,
      "learning_rate": 5.224161519869364e-06,
      "loss": 0.8023,
      "step": 775490
    },
    {
      "epoch": 2.717934159753546,
      "grad_norm": 3.03125,
      "learning_rate": 5.223512491205662e-06,
      "loss": 0.807,
      "step": 775500
    },
    {
      "epoch": 2.7179692072604418,
      "grad_norm": 3.0,
      "learning_rate": 5.22286346254196e-06,
      "loss": 0.8792,
      "step": 775510
    },
    {
      "epoch": 2.718004254767337,
      "grad_norm": 3.0625,
      "learning_rate": 5.222214433878258e-06,
      "loss": 0.8524,
      "step": 775520
    },
    {
      "epoch": 2.718039302274233,
      "grad_norm": 3.046875,
      "learning_rate": 5.221565405214556e-06,
      "loss": 0.7688,
      "step": 775530
    },
    {
      "epoch": 2.718074349781128,
      "grad_norm": 2.5625,
      "learning_rate": 5.220916376550855e-06,
      "loss": 0.8168,
      "step": 775540
    },
    {
      "epoch": 2.718109397288024,
      "grad_norm": 2.578125,
      "learning_rate": 5.220267347887153e-06,
      "loss": 0.7721,
      "step": 775550
    },
    {
      "epoch": 2.7181444447949197,
      "grad_norm": 2.390625,
      "learning_rate": 5.21961831922345e-06,
      "loss": 0.8115,
      "step": 775560
    },
    {
      "epoch": 2.718179492301815,
      "grad_norm": 2.859375,
      "learning_rate": 5.218969290559749e-06,
      "loss": 0.7706,
      "step": 775570
    },
    {
      "epoch": 2.7182145398087108,
      "grad_norm": 2.671875,
      "learning_rate": 5.218320261896047e-06,
      "loss": 0.801,
      "step": 775580
    },
    {
      "epoch": 2.718249587315606,
      "grad_norm": 3.3125,
      "learning_rate": 5.217671233232345e-06,
      "loss": 0.8116,
      "step": 775590
    },
    {
      "epoch": 2.718284634822502,
      "grad_norm": 3.03125,
      "learning_rate": 5.2170222045686436e-06,
      "loss": 0.7295,
      "step": 775600
    },
    {
      "epoch": 2.7183196823293976,
      "grad_norm": 2.953125,
      "learning_rate": 5.216373175904941e-06,
      "loss": 0.7979,
      "step": 775610
    },
    {
      "epoch": 2.7183547298362933,
      "grad_norm": 3.09375,
      "learning_rate": 5.215724147241239e-06,
      "loss": 0.8211,
      "step": 775620
    },
    {
      "epoch": 2.7183897773431887,
      "grad_norm": 3.078125,
      "learning_rate": 5.2150751185775376e-06,
      "loss": 0.7697,
      "step": 775630
    },
    {
      "epoch": 2.7184248248500844,
      "grad_norm": 2.890625,
      "learning_rate": 5.2144260899138356e-06,
      "loss": 0.8355,
      "step": 775640
    },
    {
      "epoch": 2.7184598723569797,
      "grad_norm": 2.9375,
      "learning_rate": 5.213777061250133e-06,
      "loss": 0.8813,
      "step": 775650
    },
    {
      "epoch": 2.7184949198638755,
      "grad_norm": 2.8125,
      "learning_rate": 5.2131280325864316e-06,
      "loss": 0.7782,
      "step": 775660
    },
    {
      "epoch": 2.7185299673707712,
      "grad_norm": 3.234375,
      "learning_rate": 5.2124790039227296e-06,
      "loss": 0.8491,
      "step": 775670
    },
    {
      "epoch": 2.7185650148776666,
      "grad_norm": 3.40625,
      "learning_rate": 5.2118299752590276e-06,
      "loss": 0.8476,
      "step": 775680
    },
    {
      "epoch": 2.7186000623845623,
      "grad_norm": 2.484375,
      "learning_rate": 5.2111809465953256e-06,
      "loss": 0.688,
      "step": 775690
    },
    {
      "epoch": 2.7186351098914576,
      "grad_norm": 2.734375,
      "learning_rate": 5.2105319179316236e-06,
      "loss": 0.9548,
      "step": 775700
    },
    {
      "epoch": 2.7186701573983534,
      "grad_norm": 2.71875,
      "learning_rate": 5.2098828892679216e-06,
      "loss": 0.8826,
      "step": 775710
    },
    {
      "epoch": 2.718705204905249,
      "grad_norm": 3.0625,
      "learning_rate": 5.20923386060422e-06,
      "loss": 0.8336,
      "step": 775720
    },
    {
      "epoch": 2.718740252412145,
      "grad_norm": 2.984375,
      "learning_rate": 5.208584831940518e-06,
      "loss": 0.7755,
      "step": 775730
    },
    {
      "epoch": 2.71877529991904,
      "grad_norm": 3.140625,
      "learning_rate": 5.207935803276816e-06,
      "loss": 0.7905,
      "step": 775740
    },
    {
      "epoch": 2.718810347425936,
      "grad_norm": 2.84375,
      "learning_rate": 5.207286774613114e-06,
      "loss": 0.8491,
      "step": 775750
    },
    {
      "epoch": 2.7188453949328313,
      "grad_norm": 3.25,
      "learning_rate": 5.206637745949412e-06,
      "loss": 0.8401,
      "step": 775760
    },
    {
      "epoch": 2.718880442439727,
      "grad_norm": 2.859375,
      "learning_rate": 5.20598871728571e-06,
      "loss": 0.8543,
      "step": 775770
    },
    {
      "epoch": 2.718915489946623,
      "grad_norm": 3.078125,
      "learning_rate": 5.205339688622008e-06,
      "loss": 0.8505,
      "step": 775780
    },
    {
      "epoch": 2.718950537453518,
      "grad_norm": 2.921875,
      "learning_rate": 5.204690659958306e-06,
      "loss": 0.8448,
      "step": 775790
    },
    {
      "epoch": 2.718985584960414,
      "grad_norm": 2.609375,
      "learning_rate": 5.204041631294604e-06,
      "loss": 0.7945,
      "step": 775800
    },
    {
      "epoch": 2.719020632467309,
      "grad_norm": 2.765625,
      "learning_rate": 5.203392602630902e-06,
      "loss": 0.7516,
      "step": 775810
    },
    {
      "epoch": 2.719055679974205,
      "grad_norm": 2.984375,
      "learning_rate": 5.202743573967201e-06,
      "loss": 0.8074,
      "step": 775820
    },
    {
      "epoch": 2.7190907274811007,
      "grad_norm": 3.203125,
      "learning_rate": 5.202094545303499e-06,
      "loss": 0.7929,
      "step": 775830
    },
    {
      "epoch": 2.7191257749879965,
      "grad_norm": 2.625,
      "learning_rate": 5.201445516639797e-06,
      "loss": 0.7924,
      "step": 775840
    },
    {
      "epoch": 2.7191608224948918,
      "grad_norm": 3.078125,
      "learning_rate": 5.200796487976095e-06,
      "loss": 0.7694,
      "step": 775850
    },
    {
      "epoch": 2.7191958700017875,
      "grad_norm": 3.21875,
      "learning_rate": 5.200147459312393e-06,
      "loss": 0.8077,
      "step": 775860
    },
    {
      "epoch": 2.719230917508683,
      "grad_norm": 2.875,
      "learning_rate": 5.199498430648691e-06,
      "loss": 0.787,
      "step": 775870
    },
    {
      "epoch": 2.7192659650155786,
      "grad_norm": 2.921875,
      "learning_rate": 5.19884940198499e-06,
      "loss": 0.7935,
      "step": 775880
    },
    {
      "epoch": 2.7193010125224744,
      "grad_norm": 3.078125,
      "learning_rate": 5.198200373321287e-06,
      "loss": 0.8434,
      "step": 775890
    },
    {
      "epoch": 2.7193360600293697,
      "grad_norm": 2.703125,
      "learning_rate": 5.197551344657585e-06,
      "loss": 0.8172,
      "step": 775900
    },
    {
      "epoch": 2.7193711075362654,
      "grad_norm": 3.015625,
      "learning_rate": 5.196902315993884e-06,
      "loss": 0.7538,
      "step": 775910
    },
    {
      "epoch": 2.7194061550431607,
      "grad_norm": 3.0625,
      "learning_rate": 5.196253287330182e-06,
      "loss": 0.7792,
      "step": 775920
    },
    {
      "epoch": 2.7194412025500565,
      "grad_norm": 3.234375,
      "learning_rate": 5.195604258666479e-06,
      "loss": 0.7418,
      "step": 775930
    },
    {
      "epoch": 2.7194762500569523,
      "grad_norm": 2.921875,
      "learning_rate": 5.194955230002778e-06,
      "loss": 0.8063,
      "step": 775940
    },
    {
      "epoch": 2.719511297563848,
      "grad_norm": 3.046875,
      "learning_rate": 5.194306201339076e-06,
      "loss": 0.7691,
      "step": 775950
    },
    {
      "epoch": 2.7195463450707433,
      "grad_norm": 3.09375,
      "learning_rate": 5.193657172675374e-06,
      "loss": 0.7921,
      "step": 775960
    },
    {
      "epoch": 2.719581392577639,
      "grad_norm": 2.734375,
      "learning_rate": 5.193008144011673e-06,
      "loss": 0.7982,
      "step": 775970
    },
    {
      "epoch": 2.7196164400845344,
      "grad_norm": 3.40625,
      "learning_rate": 5.19235911534797e-06,
      "loss": 0.8942,
      "step": 775980
    },
    {
      "epoch": 2.71965148759143,
      "grad_norm": 3.0,
      "learning_rate": 5.191710086684268e-06,
      "loss": 0.8541,
      "step": 775990
    },
    {
      "epoch": 2.719686535098326,
      "grad_norm": 3.0625,
      "learning_rate": 5.191061058020567e-06,
      "loss": 0.8371,
      "step": 776000
    },
    {
      "epoch": 2.7197215826052212,
      "grad_norm": 2.578125,
      "learning_rate": 5.190412029356865e-06,
      "loss": 0.8767,
      "step": 776010
    },
    {
      "epoch": 2.719756630112117,
      "grad_norm": 2.828125,
      "learning_rate": 5.189763000693163e-06,
      "loss": 0.7821,
      "step": 776020
    },
    {
      "epoch": 2.7197916776190123,
      "grad_norm": 3.171875,
      "learning_rate": 5.189113972029461e-06,
      "loss": 0.8503,
      "step": 776030
    },
    {
      "epoch": 2.719826725125908,
      "grad_norm": 3.296875,
      "learning_rate": 5.188464943365759e-06,
      "loss": 0.8615,
      "step": 776040
    },
    {
      "epoch": 2.719861772632804,
      "grad_norm": 2.46875,
      "learning_rate": 5.187815914702057e-06,
      "loss": 0.8739,
      "step": 776050
    },
    {
      "epoch": 2.7198968201396996,
      "grad_norm": 2.453125,
      "learning_rate": 5.187166886038356e-06,
      "loss": 0.7847,
      "step": 776060
    },
    {
      "epoch": 2.719931867646595,
      "grad_norm": 3.046875,
      "learning_rate": 5.186517857374654e-06,
      "loss": 0.8001,
      "step": 776070
    },
    {
      "epoch": 2.7199669151534906,
      "grad_norm": 3.15625,
      "learning_rate": 5.185868828710951e-06,
      "loss": 0.819,
      "step": 776080
    },
    {
      "epoch": 2.720001962660386,
      "grad_norm": 3.328125,
      "learning_rate": 5.18521980004725e-06,
      "loss": 0.8594,
      "step": 776090
    },
    {
      "epoch": 2.7200370101672817,
      "grad_norm": 2.703125,
      "learning_rate": 5.184570771383548e-06,
      "loss": 0.7688,
      "step": 776100
    },
    {
      "epoch": 2.7200720576741775,
      "grad_norm": 2.65625,
      "learning_rate": 5.183921742719846e-06,
      "loss": 0.8284,
      "step": 776110
    },
    {
      "epoch": 2.720107105181073,
      "grad_norm": 2.578125,
      "learning_rate": 5.183272714056144e-06,
      "loss": 0.8052,
      "step": 776120
    },
    {
      "epoch": 2.7201421526879686,
      "grad_norm": 3.28125,
      "learning_rate": 5.182623685392442e-06,
      "loss": 0.8336,
      "step": 776130
    },
    {
      "epoch": 2.720177200194864,
      "grad_norm": 3.390625,
      "learning_rate": 5.18197465672874e-06,
      "loss": 0.7465,
      "step": 776140
    },
    {
      "epoch": 2.7202122477017596,
      "grad_norm": 3.53125,
      "learning_rate": 5.181325628065039e-06,
      "loss": 0.7295,
      "step": 776150
    },
    {
      "epoch": 2.7202472952086554,
      "grad_norm": 2.78125,
      "learning_rate": 5.180676599401337e-06,
      "loss": 0.7771,
      "step": 776160
    },
    {
      "epoch": 2.720282342715551,
      "grad_norm": 2.90625,
      "learning_rate": 5.180027570737634e-06,
      "loss": 0.8334,
      "step": 776170
    },
    {
      "epoch": 2.7203173902224465,
      "grad_norm": 3.21875,
      "learning_rate": 5.179378542073933e-06,
      "loss": 0.8222,
      "step": 776180
    },
    {
      "epoch": 2.720352437729342,
      "grad_norm": 2.734375,
      "learning_rate": 5.178729513410231e-06,
      "loss": 0.7241,
      "step": 776190
    },
    {
      "epoch": 2.7203874852362375,
      "grad_norm": 3.078125,
      "learning_rate": 5.178080484746529e-06,
      "loss": 0.8348,
      "step": 776200
    },
    {
      "epoch": 2.7204225327431333,
      "grad_norm": 3.3125,
      "learning_rate": 5.177431456082827e-06,
      "loss": 0.7902,
      "step": 776210
    },
    {
      "epoch": 2.720457580250029,
      "grad_norm": 2.53125,
      "learning_rate": 5.176782427419125e-06,
      "loss": 0.8004,
      "step": 776220
    },
    {
      "epoch": 2.7204926277569244,
      "grad_norm": 3.09375,
      "learning_rate": 5.176133398755423e-06,
      "loss": 0.9349,
      "step": 776230
    },
    {
      "epoch": 2.72052767526382,
      "grad_norm": 2.171875,
      "learning_rate": 5.175484370091721e-06,
      "loss": 0.8054,
      "step": 776240
    },
    {
      "epoch": 2.720562722770716,
      "grad_norm": 2.71875,
      "learning_rate": 5.1748353414280195e-06,
      "loss": 0.7059,
      "step": 776250
    },
    {
      "epoch": 2.720597770277611,
      "grad_norm": 3.203125,
      "learning_rate": 5.174186312764317e-06,
      "loss": 0.7217,
      "step": 776260
    },
    {
      "epoch": 2.720632817784507,
      "grad_norm": 2.546875,
      "learning_rate": 5.1735372841006155e-06,
      "loss": 0.7339,
      "step": 776270
    },
    {
      "epoch": 2.7206678652914027,
      "grad_norm": 2.8125,
      "learning_rate": 5.1728882554369135e-06,
      "loss": 0.7721,
      "step": 776280
    },
    {
      "epoch": 2.720702912798298,
      "grad_norm": 2.90625,
      "learning_rate": 5.1722392267732115e-06,
      "loss": 0.8153,
      "step": 776290
    },
    {
      "epoch": 2.7207379603051938,
      "grad_norm": 2.9375,
      "learning_rate": 5.1715901981095095e-06,
      "loss": 0.8181,
      "step": 776300
    },
    {
      "epoch": 2.720773007812089,
      "grad_norm": 3.03125,
      "learning_rate": 5.1709411694458075e-06,
      "loss": 0.8266,
      "step": 776310
    },
    {
      "epoch": 2.720808055318985,
      "grad_norm": 2.515625,
      "learning_rate": 5.1702921407821055e-06,
      "loss": 0.8721,
      "step": 776320
    },
    {
      "epoch": 2.7208431028258806,
      "grad_norm": 2.828125,
      "learning_rate": 5.1696431121184035e-06,
      "loss": 0.7551,
      "step": 776330
    },
    {
      "epoch": 2.720878150332776,
      "grad_norm": 3.078125,
      "learning_rate": 5.168994083454702e-06,
      "loss": 0.758,
      "step": 776340
    },
    {
      "epoch": 2.7209131978396717,
      "grad_norm": 2.46875,
      "learning_rate": 5.168345054791e-06,
      "loss": 0.7925,
      "step": 776350
    },
    {
      "epoch": 2.7209482453465674,
      "grad_norm": 3.203125,
      "learning_rate": 5.1676960261272975e-06,
      "loss": 0.8192,
      "step": 776360
    },
    {
      "epoch": 2.7209832928534627,
      "grad_norm": 2.734375,
      "learning_rate": 5.167046997463596e-06,
      "loss": 0.8362,
      "step": 776370
    },
    {
      "epoch": 2.7210183403603585,
      "grad_norm": 2.765625,
      "learning_rate": 5.166397968799894e-06,
      "loss": 0.8227,
      "step": 776380
    },
    {
      "epoch": 2.7210533878672543,
      "grad_norm": 3.0625,
      "learning_rate": 5.165748940136192e-06,
      "loss": 0.8589,
      "step": 776390
    },
    {
      "epoch": 2.7210884353741496,
      "grad_norm": 2.625,
      "learning_rate": 5.16509991147249e-06,
      "loss": 0.8118,
      "step": 776400
    },
    {
      "epoch": 2.7211234828810453,
      "grad_norm": 3.21875,
      "learning_rate": 5.164450882808788e-06,
      "loss": 0.7901,
      "step": 776410
    },
    {
      "epoch": 2.7211585303879406,
      "grad_norm": 3.03125,
      "learning_rate": 5.163801854145086e-06,
      "loss": 0.8382,
      "step": 776420
    },
    {
      "epoch": 2.7211935778948364,
      "grad_norm": 2.765625,
      "learning_rate": 5.163152825481385e-06,
      "loss": 0.8888,
      "step": 776430
    },
    {
      "epoch": 2.721228625401732,
      "grad_norm": 2.828125,
      "learning_rate": 5.162503796817683e-06,
      "loss": 0.752,
      "step": 776440
    },
    {
      "epoch": 2.721263672908628,
      "grad_norm": 2.78125,
      "learning_rate": 5.16185476815398e-06,
      "loss": 0.8531,
      "step": 776450
    },
    {
      "epoch": 2.7212987204155232,
      "grad_norm": 3.25,
      "learning_rate": 5.161205739490279e-06,
      "loss": 0.7667,
      "step": 776460
    },
    {
      "epoch": 2.721333767922419,
      "grad_norm": 3.0,
      "learning_rate": 5.160556710826577e-06,
      "loss": 0.8032,
      "step": 776470
    },
    {
      "epoch": 2.7213688154293143,
      "grad_norm": 3.078125,
      "learning_rate": 5.159907682162875e-06,
      "loss": 0.6956,
      "step": 776480
    },
    {
      "epoch": 2.72140386293621,
      "grad_norm": 3.40625,
      "learning_rate": 5.159258653499174e-06,
      "loss": 0.7608,
      "step": 776490
    },
    {
      "epoch": 2.721438910443106,
      "grad_norm": 2.921875,
      "learning_rate": 5.158609624835471e-06,
      "loss": 0.7582,
      "step": 776500
    },
    {
      "epoch": 2.721473957950001,
      "grad_norm": 2.875,
      "learning_rate": 5.157960596171769e-06,
      "loss": 0.7967,
      "step": 776510
    },
    {
      "epoch": 2.721509005456897,
      "grad_norm": 3.28125,
      "learning_rate": 5.157311567508068e-06,
      "loss": 0.8367,
      "step": 776520
    },
    {
      "epoch": 2.721544052963792,
      "grad_norm": 3.375,
      "learning_rate": 5.156662538844366e-06,
      "loss": 0.8399,
      "step": 776530
    },
    {
      "epoch": 2.721579100470688,
      "grad_norm": 2.53125,
      "learning_rate": 5.156013510180664e-06,
      "loss": 0.9507,
      "step": 776540
    },
    {
      "epoch": 2.7216141479775837,
      "grad_norm": 2.671875,
      "learning_rate": 5.155364481516962e-06,
      "loss": 0.8851,
      "step": 776550
    },
    {
      "epoch": 2.7216491954844795,
      "grad_norm": 2.890625,
      "learning_rate": 5.15471545285326e-06,
      "loss": 0.8332,
      "step": 776560
    },
    {
      "epoch": 2.721684242991375,
      "grad_norm": 3.375,
      "learning_rate": 5.154066424189558e-06,
      "loss": 0.8201,
      "step": 776570
    },
    {
      "epoch": 2.7217192904982705,
      "grad_norm": 3.0,
      "learning_rate": 5.153417395525857e-06,
      "loss": 0.8438,
      "step": 776580
    },
    {
      "epoch": 2.721754338005166,
      "grad_norm": 2.3125,
      "learning_rate": 5.152768366862154e-06,
      "loss": 0.8309,
      "step": 776590
    },
    {
      "epoch": 2.7217893855120616,
      "grad_norm": 3.5625,
      "learning_rate": 5.152119338198452e-06,
      "loss": 0.894,
      "step": 776600
    },
    {
      "epoch": 2.7218244330189574,
      "grad_norm": 2.8125,
      "learning_rate": 5.151470309534751e-06,
      "loss": 0.721,
      "step": 776610
    },
    {
      "epoch": 2.7218594805258527,
      "grad_norm": 3.1875,
      "learning_rate": 5.150821280871049e-06,
      "loss": 0.753,
      "step": 776620
    },
    {
      "epoch": 2.7218945280327485,
      "grad_norm": 3.25,
      "learning_rate": 5.150172252207347e-06,
      "loss": 0.9035,
      "step": 776630
    },
    {
      "epoch": 2.7219295755396438,
      "grad_norm": 2.4375,
      "learning_rate": 5.149523223543645e-06,
      "loss": 0.7485,
      "step": 776640
    },
    {
      "epoch": 2.7219646230465395,
      "grad_norm": 2.59375,
      "learning_rate": 5.148874194879943e-06,
      "loss": 0.7796,
      "step": 776650
    },
    {
      "epoch": 2.7219996705534353,
      "grad_norm": 3.109375,
      "learning_rate": 5.148225166216241e-06,
      "loss": 0.8211,
      "step": 776660
    },
    {
      "epoch": 2.722034718060331,
      "grad_norm": 3.265625,
      "learning_rate": 5.147576137552539e-06,
      "loss": 0.8253,
      "step": 776670
    },
    {
      "epoch": 2.7220697655672264,
      "grad_norm": 2.875,
      "learning_rate": 5.146927108888838e-06,
      "loss": 0.8314,
      "step": 776680
    },
    {
      "epoch": 2.722104813074122,
      "grad_norm": 3.09375,
      "learning_rate": 5.146278080225135e-06,
      "loss": 0.7725,
      "step": 776690
    },
    {
      "epoch": 2.7221398605810174,
      "grad_norm": 2.984375,
      "learning_rate": 5.145629051561434e-06,
      "loss": 0.7866,
      "step": 776700
    },
    {
      "epoch": 2.722174908087913,
      "grad_norm": 2.625,
      "learning_rate": 5.144980022897732e-06,
      "loss": 0.77,
      "step": 776710
    },
    {
      "epoch": 2.722209955594809,
      "grad_norm": 2.765625,
      "learning_rate": 5.14433099423403e-06,
      "loss": 0.7264,
      "step": 776720
    },
    {
      "epoch": 2.7222450031017043,
      "grad_norm": 2.84375,
      "learning_rate": 5.143681965570328e-06,
      "loss": 0.7275,
      "step": 776730
    },
    {
      "epoch": 2.7222800506086,
      "grad_norm": 2.90625,
      "learning_rate": 5.143032936906626e-06,
      "loss": 0.8134,
      "step": 776740
    },
    {
      "epoch": 2.7223150981154953,
      "grad_norm": 3.0,
      "learning_rate": 5.142383908242924e-06,
      "loss": 0.7876,
      "step": 776750
    },
    {
      "epoch": 2.722350145622391,
      "grad_norm": 3.0625,
      "learning_rate": 5.141734879579222e-06,
      "loss": 0.7893,
      "step": 776760
    },
    {
      "epoch": 2.722385193129287,
      "grad_norm": 2.71875,
      "learning_rate": 5.1410858509155206e-06,
      "loss": 0.8197,
      "step": 776770
    },
    {
      "epoch": 2.7224202406361826,
      "grad_norm": 2.875,
      "learning_rate": 5.140436822251818e-06,
      "loss": 0.8127,
      "step": 776780
    },
    {
      "epoch": 2.722455288143078,
      "grad_norm": 2.5625,
      "learning_rate": 5.139787793588116e-06,
      "loss": 0.7752,
      "step": 776790
    },
    {
      "epoch": 2.7224903356499737,
      "grad_norm": 2.640625,
      "learning_rate": 5.1391387649244146e-06,
      "loss": 0.7334,
      "step": 776800
    },
    {
      "epoch": 2.722525383156869,
      "grad_norm": 2.65625,
      "learning_rate": 5.1384897362607126e-06,
      "loss": 0.7976,
      "step": 776810
    },
    {
      "epoch": 2.7225604306637647,
      "grad_norm": 3.03125,
      "learning_rate": 5.1378407075970106e-06,
      "loss": 0.8217,
      "step": 776820
    },
    {
      "epoch": 2.7225954781706605,
      "grad_norm": 3.078125,
      "learning_rate": 5.1371916789333086e-06,
      "loss": 0.7912,
      "step": 776830
    },
    {
      "epoch": 2.722630525677556,
      "grad_norm": 2.984375,
      "learning_rate": 5.1365426502696066e-06,
      "loss": 0.8433,
      "step": 776840
    },
    {
      "epoch": 2.7226655731844516,
      "grad_norm": 2.515625,
      "learning_rate": 5.1358936216059046e-06,
      "loss": 0.8175,
      "step": 776850
    },
    {
      "epoch": 2.722700620691347,
      "grad_norm": 3.21875,
      "learning_rate": 5.135244592942203e-06,
      "loss": 0.7931,
      "step": 776860
    },
    {
      "epoch": 2.7227356681982426,
      "grad_norm": 2.71875,
      "learning_rate": 5.134595564278501e-06,
      "loss": 0.8141,
      "step": 776870
    },
    {
      "epoch": 2.7227707157051384,
      "grad_norm": 3.0,
      "learning_rate": 5.1339465356147986e-06,
      "loss": 0.8264,
      "step": 776880
    },
    {
      "epoch": 2.722805763212034,
      "grad_norm": 2.890625,
      "learning_rate": 5.133297506951097e-06,
      "loss": 0.7741,
      "step": 776890
    },
    {
      "epoch": 2.7228408107189295,
      "grad_norm": 3.296875,
      "learning_rate": 5.132648478287395e-06,
      "loss": 0.823,
      "step": 776900
    },
    {
      "epoch": 2.7228758582258252,
      "grad_norm": 2.875,
      "learning_rate": 5.131999449623693e-06,
      "loss": 0.7533,
      "step": 776910
    },
    {
      "epoch": 2.7229109057327205,
      "grad_norm": 2.90625,
      "learning_rate": 5.131350420959991e-06,
      "loss": 0.828,
      "step": 776920
    },
    {
      "epoch": 2.7229459532396163,
      "grad_norm": 2.484375,
      "learning_rate": 5.130701392296289e-06,
      "loss": 0.7295,
      "step": 776930
    },
    {
      "epoch": 2.722981000746512,
      "grad_norm": 3.125,
      "learning_rate": 5.130052363632587e-06,
      "loss": 0.7921,
      "step": 776940
    },
    {
      "epoch": 2.7230160482534074,
      "grad_norm": 2.671875,
      "learning_rate": 5.129403334968886e-06,
      "loss": 0.8284,
      "step": 776950
    },
    {
      "epoch": 2.723051095760303,
      "grad_norm": 2.6875,
      "learning_rate": 5.128754306305184e-06,
      "loss": 0.802,
      "step": 776960
    },
    {
      "epoch": 2.7230861432671984,
      "grad_norm": 2.734375,
      "learning_rate": 5.128105277641481e-06,
      "loss": 0.8063,
      "step": 776970
    },
    {
      "epoch": 2.723121190774094,
      "grad_norm": 3.140625,
      "learning_rate": 5.12745624897778e-06,
      "loss": 0.8439,
      "step": 776980
    },
    {
      "epoch": 2.72315623828099,
      "grad_norm": 2.8125,
      "learning_rate": 5.126807220314078e-06,
      "loss": 0.888,
      "step": 776990
    },
    {
      "epoch": 2.7231912857878857,
      "grad_norm": 2.703125,
      "learning_rate": 5.126158191650376e-06,
      "loss": 0.8563,
      "step": 777000
    },
    {
      "epoch": 2.723226333294781,
      "grad_norm": 2.359375,
      "learning_rate": 5.125509162986675e-06,
      "loss": 0.8017,
      "step": 777010
    },
    {
      "epoch": 2.723261380801677,
      "grad_norm": 3.296875,
      "learning_rate": 5.124860134322972e-06,
      "loss": 0.8216,
      "step": 777020
    },
    {
      "epoch": 2.723296428308572,
      "grad_norm": 2.90625,
      "learning_rate": 5.12421110565927e-06,
      "loss": 0.819,
      "step": 777030
    },
    {
      "epoch": 2.723331475815468,
      "grad_norm": 2.71875,
      "learning_rate": 5.123562076995569e-06,
      "loss": 0.7773,
      "step": 777040
    },
    {
      "epoch": 2.7233665233223636,
      "grad_norm": 2.34375,
      "learning_rate": 5.122913048331867e-06,
      "loss": 0.7303,
      "step": 777050
    },
    {
      "epoch": 2.723401570829259,
      "grad_norm": 3.09375,
      "learning_rate": 5.122264019668164e-06,
      "loss": 0.8156,
      "step": 777060
    },
    {
      "epoch": 2.7234366183361547,
      "grad_norm": 2.953125,
      "learning_rate": 5.121614991004463e-06,
      "loss": 0.8561,
      "step": 777070
    },
    {
      "epoch": 2.72347166584305,
      "grad_norm": 2.71875,
      "learning_rate": 5.120965962340761e-06,
      "loss": 0.7435,
      "step": 777080
    },
    {
      "epoch": 2.7235067133499458,
      "grad_norm": 3.171875,
      "learning_rate": 5.120316933677059e-06,
      "loss": 0.8344,
      "step": 777090
    },
    {
      "epoch": 2.7235417608568415,
      "grad_norm": 3.140625,
      "learning_rate": 5.119667905013358e-06,
      "loss": 0.8235,
      "step": 777100
    },
    {
      "epoch": 2.7235768083637373,
      "grad_norm": 3.109375,
      "learning_rate": 5.119018876349655e-06,
      "loss": 0.8069,
      "step": 777110
    },
    {
      "epoch": 2.7236118558706326,
      "grad_norm": 3.09375,
      "learning_rate": 5.118369847685953e-06,
      "loss": 0.8821,
      "step": 777120
    },
    {
      "epoch": 2.7236469033775283,
      "grad_norm": 2.90625,
      "learning_rate": 5.117720819022252e-06,
      "loss": 0.8606,
      "step": 777130
    },
    {
      "epoch": 2.7236819508844237,
      "grad_norm": 2.734375,
      "learning_rate": 5.11707179035855e-06,
      "loss": 0.7814,
      "step": 777140
    },
    {
      "epoch": 2.7237169983913194,
      "grad_norm": 3.171875,
      "learning_rate": 5.116422761694848e-06,
      "loss": 0.8133,
      "step": 777150
    },
    {
      "epoch": 2.723752045898215,
      "grad_norm": 2.765625,
      "learning_rate": 5.115773733031146e-06,
      "loss": 0.8304,
      "step": 777160
    },
    {
      "epoch": 2.7237870934051105,
      "grad_norm": 3.0,
      "learning_rate": 5.115124704367444e-06,
      "loss": 0.8517,
      "step": 777170
    },
    {
      "epoch": 2.7238221409120063,
      "grad_norm": 2.78125,
      "learning_rate": 5.114475675703742e-06,
      "loss": 0.7387,
      "step": 777180
    },
    {
      "epoch": 2.7238571884189016,
      "grad_norm": 2.828125,
      "learning_rate": 5.11382664704004e-06,
      "loss": 0.7872,
      "step": 777190
    },
    {
      "epoch": 2.7238922359257973,
      "grad_norm": 2.953125,
      "learning_rate": 5.113177618376338e-06,
      "loss": 0.803,
      "step": 777200
    },
    {
      "epoch": 2.723927283432693,
      "grad_norm": 3.09375,
      "learning_rate": 5.112528589712636e-06,
      "loss": 0.8387,
      "step": 777210
    },
    {
      "epoch": 2.723962330939589,
      "grad_norm": 3.609375,
      "learning_rate": 5.111879561048935e-06,
      "loss": 0.7691,
      "step": 777220
    },
    {
      "epoch": 2.723997378446484,
      "grad_norm": 3.140625,
      "learning_rate": 5.111230532385233e-06,
      "loss": 0.7592,
      "step": 777230
    },
    {
      "epoch": 2.72403242595338,
      "grad_norm": 3.0625,
      "learning_rate": 5.110581503721531e-06,
      "loss": 0.7593,
      "step": 777240
    },
    {
      "epoch": 2.7240674734602752,
      "grad_norm": 2.5625,
      "learning_rate": 5.109932475057829e-06,
      "loss": 0.7296,
      "step": 777250
    },
    {
      "epoch": 2.724102520967171,
      "grad_norm": 2.953125,
      "learning_rate": 5.109283446394127e-06,
      "loss": 0.8161,
      "step": 777260
    },
    {
      "epoch": 2.7241375684740667,
      "grad_norm": 3.0,
      "learning_rate": 5.108634417730425e-06,
      "loss": 0.7924,
      "step": 777270
    },
    {
      "epoch": 2.724172615980962,
      "grad_norm": 3.140625,
      "learning_rate": 5.107985389066723e-06,
      "loss": 0.7982,
      "step": 777280
    },
    {
      "epoch": 2.724207663487858,
      "grad_norm": 2.78125,
      "learning_rate": 5.107336360403022e-06,
      "loss": 0.7665,
      "step": 777290
    },
    {
      "epoch": 2.724242710994753,
      "grad_norm": 2.515625,
      "learning_rate": 5.106687331739319e-06,
      "loss": 0.7454,
      "step": 777300
    },
    {
      "epoch": 2.724277758501649,
      "grad_norm": 2.875,
      "learning_rate": 5.106038303075617e-06,
      "loss": 0.8455,
      "step": 777310
    },
    {
      "epoch": 2.7243128060085446,
      "grad_norm": 2.953125,
      "learning_rate": 5.105389274411916e-06,
      "loss": 0.738,
      "step": 777320
    },
    {
      "epoch": 2.7243478535154404,
      "grad_norm": 2.703125,
      "learning_rate": 5.104740245748214e-06,
      "loss": 0.742,
      "step": 777330
    },
    {
      "epoch": 2.7243829010223357,
      "grad_norm": 2.703125,
      "learning_rate": 5.104091217084512e-06,
      "loss": 0.684,
      "step": 777340
    },
    {
      "epoch": 2.7244179485292315,
      "grad_norm": 3.140625,
      "learning_rate": 5.10344218842081e-06,
      "loss": 0.8111,
      "step": 777350
    },
    {
      "epoch": 2.724452996036127,
      "grad_norm": 3.453125,
      "learning_rate": 5.102793159757108e-06,
      "loss": 0.7805,
      "step": 777360
    },
    {
      "epoch": 2.7244880435430225,
      "grad_norm": 3.078125,
      "learning_rate": 5.102144131093406e-06,
      "loss": 0.7926,
      "step": 777370
    },
    {
      "epoch": 2.7245230910499183,
      "grad_norm": 3.109375,
      "learning_rate": 5.1014951024297045e-06,
      "loss": 0.8205,
      "step": 777380
    },
    {
      "epoch": 2.7245581385568136,
      "grad_norm": 3.015625,
      "learning_rate": 5.100846073766002e-06,
      "loss": 0.8295,
      "step": 777390
    },
    {
      "epoch": 2.7245931860637094,
      "grad_norm": 3.078125,
      "learning_rate": 5.1001970451023e-06,
      "loss": 0.7971,
      "step": 777400
    },
    {
      "epoch": 2.7246282335706047,
      "grad_norm": 3.1875,
      "learning_rate": 5.0995480164385985e-06,
      "loss": 0.7562,
      "step": 777410
    },
    {
      "epoch": 2.7246632810775004,
      "grad_norm": 3.078125,
      "learning_rate": 5.0988989877748965e-06,
      "loss": 0.7353,
      "step": 777420
    },
    {
      "epoch": 2.724698328584396,
      "grad_norm": 2.90625,
      "learning_rate": 5.0982499591111945e-06,
      "loss": 0.8171,
      "step": 777430
    },
    {
      "epoch": 2.724733376091292,
      "grad_norm": 2.703125,
      "learning_rate": 5.0976009304474925e-06,
      "loss": 0.7829,
      "step": 777440
    },
    {
      "epoch": 2.7247684235981873,
      "grad_norm": 2.765625,
      "learning_rate": 5.0969519017837905e-06,
      "loss": 0.7871,
      "step": 777450
    },
    {
      "epoch": 2.724803471105083,
      "grad_norm": 3.1875,
      "learning_rate": 5.0963028731200885e-06,
      "loss": 0.9236,
      "step": 777460
    },
    {
      "epoch": 2.7248385186119783,
      "grad_norm": 2.578125,
      "learning_rate": 5.095653844456387e-06,
      "loss": 0.7755,
      "step": 777470
    },
    {
      "epoch": 2.724873566118874,
      "grad_norm": 2.984375,
      "learning_rate": 5.095004815792685e-06,
      "loss": 0.7745,
      "step": 777480
    },
    {
      "epoch": 2.72490861362577,
      "grad_norm": 3.171875,
      "learning_rate": 5.0943557871289825e-06,
      "loss": 0.7457,
      "step": 777490
    },
    {
      "epoch": 2.724943661132665,
      "grad_norm": 3.421875,
      "learning_rate": 5.093706758465281e-06,
      "loss": 0.8275,
      "step": 777500
    },
    {
      "epoch": 2.724978708639561,
      "grad_norm": 2.796875,
      "learning_rate": 5.093057729801579e-06,
      "loss": 0.777,
      "step": 777510
    },
    {
      "epoch": 2.7250137561464562,
      "grad_norm": 2.75,
      "learning_rate": 5.092408701137877e-06,
      "loss": 0.7893,
      "step": 777520
    },
    {
      "epoch": 2.725048803653352,
      "grad_norm": 2.546875,
      "learning_rate": 5.091759672474175e-06,
      "loss": 0.7988,
      "step": 777530
    },
    {
      "epoch": 2.7250838511602478,
      "grad_norm": 3.140625,
      "learning_rate": 5.091110643810473e-06,
      "loss": 0.8211,
      "step": 777540
    },
    {
      "epoch": 2.7251188986671435,
      "grad_norm": 2.890625,
      "learning_rate": 5.090461615146771e-06,
      "loss": 0.7939,
      "step": 777550
    },
    {
      "epoch": 2.725153946174039,
      "grad_norm": 3.078125,
      "learning_rate": 5.08981258648307e-06,
      "loss": 0.8295,
      "step": 777560
    },
    {
      "epoch": 2.7251889936809346,
      "grad_norm": 3.390625,
      "learning_rate": 5.089163557819368e-06,
      "loss": 0.8263,
      "step": 777570
    },
    {
      "epoch": 2.72522404118783,
      "grad_norm": 2.875,
      "learning_rate": 5.088514529155665e-06,
      "loss": 0.6936,
      "step": 777580
    },
    {
      "epoch": 2.7252590886947257,
      "grad_norm": 2.796875,
      "learning_rate": 5.087865500491964e-06,
      "loss": 0.806,
      "step": 777590
    },
    {
      "epoch": 2.7252941362016214,
      "grad_norm": 2.453125,
      "learning_rate": 5.087216471828262e-06,
      "loss": 0.848,
      "step": 777600
    },
    {
      "epoch": 2.7253291837085167,
      "grad_norm": 2.0625,
      "learning_rate": 5.08656744316456e-06,
      "loss": 0.8642,
      "step": 777610
    },
    {
      "epoch": 2.7253642312154125,
      "grad_norm": 3.046875,
      "learning_rate": 5.085918414500858e-06,
      "loss": 0.8999,
      "step": 777620
    },
    {
      "epoch": 2.7253992787223082,
      "grad_norm": 3.15625,
      "learning_rate": 5.085269385837156e-06,
      "loss": 0.9108,
      "step": 777630
    },
    {
      "epoch": 2.7254343262292036,
      "grad_norm": 2.640625,
      "learning_rate": 5.084620357173454e-06,
      "loss": 0.7755,
      "step": 777640
    },
    {
      "epoch": 2.7254693737360993,
      "grad_norm": 2.78125,
      "learning_rate": 5.083971328509753e-06,
      "loss": 0.875,
      "step": 777650
    },
    {
      "epoch": 2.725504421242995,
      "grad_norm": 2.46875,
      "learning_rate": 5.083322299846051e-06,
      "loss": 0.8543,
      "step": 777660
    },
    {
      "epoch": 2.7255394687498904,
      "grad_norm": 2.875,
      "learning_rate": 5.082673271182348e-06,
      "loss": 0.7914,
      "step": 777670
    },
    {
      "epoch": 2.725574516256786,
      "grad_norm": 2.5625,
      "learning_rate": 5.082024242518647e-06,
      "loss": 0.7492,
      "step": 777680
    },
    {
      "epoch": 2.7256095637636815,
      "grad_norm": 2.84375,
      "learning_rate": 5.081375213854945e-06,
      "loss": 0.7028,
      "step": 777690
    },
    {
      "epoch": 2.7256446112705772,
      "grad_norm": 2.84375,
      "learning_rate": 5.080726185191243e-06,
      "loss": 0.8516,
      "step": 777700
    },
    {
      "epoch": 2.725679658777473,
      "grad_norm": 2.96875,
      "learning_rate": 5.080077156527541e-06,
      "loss": 0.8565,
      "step": 777710
    },
    {
      "epoch": 2.7257147062843687,
      "grad_norm": 3.40625,
      "learning_rate": 5.079428127863839e-06,
      "loss": 0.8673,
      "step": 777720
    },
    {
      "epoch": 2.725749753791264,
      "grad_norm": 3.140625,
      "learning_rate": 5.078779099200137e-06,
      "loss": 0.7959,
      "step": 777730
    },
    {
      "epoch": 2.72578480129816,
      "grad_norm": 2.75,
      "learning_rate": 5.078130070536435e-06,
      "loss": 0.7858,
      "step": 777740
    },
    {
      "epoch": 2.725819848805055,
      "grad_norm": 3.09375,
      "learning_rate": 5.077481041872734e-06,
      "loss": 0.8916,
      "step": 777750
    },
    {
      "epoch": 2.725854896311951,
      "grad_norm": 2.859375,
      "learning_rate": 5.076832013209032e-06,
      "loss": 0.8248,
      "step": 777760
    },
    {
      "epoch": 2.7258899438188466,
      "grad_norm": 2.984375,
      "learning_rate": 5.07618298454533e-06,
      "loss": 0.7886,
      "step": 777770
    },
    {
      "epoch": 2.725924991325742,
      "grad_norm": 3.25,
      "learning_rate": 5.075533955881628e-06,
      "loss": 0.7422,
      "step": 777780
    },
    {
      "epoch": 2.7259600388326377,
      "grad_norm": 3.421875,
      "learning_rate": 5.074884927217926e-06,
      "loss": 0.7929,
      "step": 777790
    },
    {
      "epoch": 2.725995086339533,
      "grad_norm": 3.0,
      "learning_rate": 5.074235898554224e-06,
      "loss": 0.8076,
      "step": 777800
    },
    {
      "epoch": 2.726030133846429,
      "grad_norm": 2.84375,
      "learning_rate": 5.073586869890523e-06,
      "loss": 0.7267,
      "step": 777810
    },
    {
      "epoch": 2.7260651813533245,
      "grad_norm": 2.515625,
      "learning_rate": 5.07293784122682e-06,
      "loss": 0.8006,
      "step": 777820
    },
    {
      "epoch": 2.7261002288602203,
      "grad_norm": 3.140625,
      "learning_rate": 5.072288812563118e-06,
      "loss": 0.7967,
      "step": 777830
    },
    {
      "epoch": 2.7261352763671156,
      "grad_norm": 3.234375,
      "learning_rate": 5.071639783899417e-06,
      "loss": 0.8576,
      "step": 777840
    },
    {
      "epoch": 2.7261703238740114,
      "grad_norm": 2.859375,
      "learning_rate": 5.070990755235715e-06,
      "loss": 0.7729,
      "step": 777850
    },
    {
      "epoch": 2.7262053713809067,
      "grad_norm": 3.171875,
      "learning_rate": 5.070341726572012e-06,
      "loss": 0.7811,
      "step": 777860
    },
    {
      "epoch": 2.7262404188878024,
      "grad_norm": 2.75,
      "learning_rate": 5.069692697908311e-06,
      "loss": 0.8615,
      "step": 777870
    },
    {
      "epoch": 2.726275466394698,
      "grad_norm": 2.5,
      "learning_rate": 5.069043669244609e-06,
      "loss": 0.7985,
      "step": 777880
    },
    {
      "epoch": 2.7263105139015935,
      "grad_norm": 2.828125,
      "learning_rate": 5.068394640580907e-06,
      "loss": 0.7341,
      "step": 777890
    },
    {
      "epoch": 2.7263455614084893,
      "grad_norm": 3.09375,
      "learning_rate": 5.0677456119172056e-06,
      "loss": 0.8428,
      "step": 777900
    },
    {
      "epoch": 2.7263806089153846,
      "grad_norm": 2.734375,
      "learning_rate": 5.067096583253503e-06,
      "loss": 0.8064,
      "step": 777910
    },
    {
      "epoch": 2.7264156564222803,
      "grad_norm": 3.1875,
      "learning_rate": 5.066447554589801e-06,
      "loss": 0.8255,
      "step": 777920
    },
    {
      "epoch": 2.726450703929176,
      "grad_norm": 2.84375,
      "learning_rate": 5.0657985259260996e-06,
      "loss": 0.7665,
      "step": 777930
    },
    {
      "epoch": 2.726485751436072,
      "grad_norm": 2.921875,
      "learning_rate": 5.0651494972623976e-06,
      "loss": 0.8497,
      "step": 777940
    },
    {
      "epoch": 2.726520798942967,
      "grad_norm": 3.140625,
      "learning_rate": 5.0645004685986956e-06,
      "loss": 0.8576,
      "step": 777950
    },
    {
      "epoch": 2.726555846449863,
      "grad_norm": 2.875,
      "learning_rate": 5.0638514399349936e-06,
      "loss": 0.8464,
      "step": 777960
    },
    {
      "epoch": 2.7265908939567582,
      "grad_norm": 2.9375,
      "learning_rate": 5.0632024112712916e-06,
      "loss": 0.8355,
      "step": 777970
    },
    {
      "epoch": 2.726625941463654,
      "grad_norm": 2.5625,
      "learning_rate": 5.0625533826075896e-06,
      "loss": 0.7846,
      "step": 777980
    },
    {
      "epoch": 2.7266609889705498,
      "grad_norm": 3.234375,
      "learning_rate": 5.061904353943888e-06,
      "loss": 0.7354,
      "step": 777990
    },
    {
      "epoch": 2.726696036477445,
      "grad_norm": 2.796875,
      "learning_rate": 5.0612553252801856e-06,
      "loss": 0.8088,
      "step": 778000
    },
    {
      "epoch": 2.726731083984341,
      "grad_norm": 2.890625,
      "learning_rate": 5.0606062966164836e-06,
      "loss": 0.8121,
      "step": 778010
    },
    {
      "epoch": 2.726766131491236,
      "grad_norm": 2.671875,
      "learning_rate": 5.059957267952782e-06,
      "loss": 0.8955,
      "step": 778020
    },
    {
      "epoch": 2.726801178998132,
      "grad_norm": 2.828125,
      "learning_rate": 5.05930823928908e-06,
      "loss": 0.7989,
      "step": 778030
    },
    {
      "epoch": 2.7268362265050277,
      "grad_norm": 3.015625,
      "learning_rate": 5.058659210625378e-06,
      "loss": 0.8664,
      "step": 778040
    },
    {
      "epoch": 2.7268712740119234,
      "grad_norm": 3.34375,
      "learning_rate": 5.058010181961676e-06,
      "loss": 0.8172,
      "step": 778050
    },
    {
      "epoch": 2.7269063215188187,
      "grad_norm": 3.328125,
      "learning_rate": 5.057361153297974e-06,
      "loss": 0.8899,
      "step": 778060
    },
    {
      "epoch": 2.7269413690257145,
      "grad_norm": 2.703125,
      "learning_rate": 5.056712124634272e-06,
      "loss": 0.8275,
      "step": 778070
    },
    {
      "epoch": 2.72697641653261,
      "grad_norm": 3.4375,
      "learning_rate": 5.056063095970571e-06,
      "loss": 0.8699,
      "step": 778080
    },
    {
      "epoch": 2.7270114640395056,
      "grad_norm": 3.25,
      "learning_rate": 5.055414067306869e-06,
      "loss": 0.7745,
      "step": 778090
    },
    {
      "epoch": 2.7270465115464013,
      "grad_norm": 2.828125,
      "learning_rate": 5.054765038643166e-06,
      "loss": 0.7533,
      "step": 778100
    },
    {
      "epoch": 2.7270815590532966,
      "grad_norm": 3.03125,
      "learning_rate": 5.054116009979465e-06,
      "loss": 0.8412,
      "step": 778110
    },
    {
      "epoch": 2.7271166065601924,
      "grad_norm": 2.640625,
      "learning_rate": 5.053466981315763e-06,
      "loss": 0.8278,
      "step": 778120
    },
    {
      "epoch": 2.7271516540670877,
      "grad_norm": 2.859375,
      "learning_rate": 5.052817952652061e-06,
      "loss": 0.717,
      "step": 778130
    },
    {
      "epoch": 2.7271867015739835,
      "grad_norm": 3.296875,
      "learning_rate": 5.052168923988359e-06,
      "loss": 0.7775,
      "step": 778140
    },
    {
      "epoch": 2.727221749080879,
      "grad_norm": 2.828125,
      "learning_rate": 5.051519895324657e-06,
      "loss": 0.8329,
      "step": 778150
    },
    {
      "epoch": 2.727256796587775,
      "grad_norm": 2.765625,
      "learning_rate": 5.050870866660955e-06,
      "loss": 0.8198,
      "step": 778160
    },
    {
      "epoch": 2.7272918440946703,
      "grad_norm": 2.9375,
      "learning_rate": 5.050221837997253e-06,
      "loss": 0.8233,
      "step": 778170
    },
    {
      "epoch": 2.727326891601566,
      "grad_norm": 2.6875,
      "learning_rate": 5.049572809333552e-06,
      "loss": 0.7631,
      "step": 778180
    },
    {
      "epoch": 2.7273619391084614,
      "grad_norm": 2.6875,
      "learning_rate": 5.048923780669849e-06,
      "loss": 0.7372,
      "step": 778190
    },
    {
      "epoch": 2.727396986615357,
      "grad_norm": 3.171875,
      "learning_rate": 5.048274752006148e-06,
      "loss": 0.7546,
      "step": 778200
    },
    {
      "epoch": 2.727432034122253,
      "grad_norm": 3.234375,
      "learning_rate": 5.047625723342446e-06,
      "loss": 0.8508,
      "step": 778210
    },
    {
      "epoch": 2.727467081629148,
      "grad_norm": 2.96875,
      "learning_rate": 5.046976694678744e-06,
      "loss": 0.8615,
      "step": 778220
    },
    {
      "epoch": 2.727502129136044,
      "grad_norm": 2.90625,
      "learning_rate": 5.046327666015042e-06,
      "loss": 0.7893,
      "step": 778230
    },
    {
      "epoch": 2.7275371766429393,
      "grad_norm": 2.515625,
      "learning_rate": 5.04567863735134e-06,
      "loss": 0.8441,
      "step": 778240
    },
    {
      "epoch": 2.727572224149835,
      "grad_norm": 2.90625,
      "learning_rate": 5.045029608687638e-06,
      "loss": 0.8268,
      "step": 778250
    },
    {
      "epoch": 2.727607271656731,
      "grad_norm": 3.21875,
      "learning_rate": 5.044380580023936e-06,
      "loss": 0.8406,
      "step": 778260
    },
    {
      "epoch": 2.7276423191636265,
      "grad_norm": 2.609375,
      "learning_rate": 5.043731551360235e-06,
      "loss": 0.7542,
      "step": 778270
    },
    {
      "epoch": 2.727677366670522,
      "grad_norm": 3.15625,
      "learning_rate": 5.043082522696533e-06,
      "loss": 0.8248,
      "step": 778280
    },
    {
      "epoch": 2.7277124141774176,
      "grad_norm": 2.921875,
      "learning_rate": 5.04243349403283e-06,
      "loss": 0.8314,
      "step": 778290
    },
    {
      "epoch": 2.727747461684313,
      "grad_norm": 3.078125,
      "learning_rate": 5.041784465369129e-06,
      "loss": 0.761,
      "step": 778300
    },
    {
      "epoch": 2.7277825091912087,
      "grad_norm": 2.65625,
      "learning_rate": 5.041135436705427e-06,
      "loss": 0.7232,
      "step": 778310
    },
    {
      "epoch": 2.7278175566981044,
      "grad_norm": 3.625,
      "learning_rate": 5.040486408041725e-06,
      "loss": 0.7582,
      "step": 778320
    },
    {
      "epoch": 2.7278526042049998,
      "grad_norm": 2.90625,
      "learning_rate": 5.039837379378023e-06,
      "loss": 0.9056,
      "step": 778330
    },
    {
      "epoch": 2.7278876517118955,
      "grad_norm": 2.765625,
      "learning_rate": 5.039188350714321e-06,
      "loss": 0.7151,
      "step": 778340
    },
    {
      "epoch": 2.727922699218791,
      "grad_norm": 2.640625,
      "learning_rate": 5.038539322050619e-06,
      "loss": 0.8202,
      "step": 778350
    },
    {
      "epoch": 2.7279577467256866,
      "grad_norm": 3.171875,
      "learning_rate": 5.037890293386918e-06,
      "loss": 0.7548,
      "step": 778360
    },
    {
      "epoch": 2.7279927942325823,
      "grad_norm": 2.6875,
      "learning_rate": 5.037241264723216e-06,
      "loss": 0.778,
      "step": 778370
    },
    {
      "epoch": 2.728027841739478,
      "grad_norm": 2.546875,
      "learning_rate": 5.036592236059513e-06,
      "loss": 0.7723,
      "step": 778380
    },
    {
      "epoch": 2.7280628892463734,
      "grad_norm": 2.6875,
      "learning_rate": 5.035943207395812e-06,
      "loss": 0.7306,
      "step": 778390
    },
    {
      "epoch": 2.728097936753269,
      "grad_norm": 2.671875,
      "learning_rate": 5.03529417873211e-06,
      "loss": 0.8858,
      "step": 778400
    },
    {
      "epoch": 2.7281329842601645,
      "grad_norm": 3.203125,
      "learning_rate": 5.034645150068408e-06,
      "loss": 0.8185,
      "step": 778410
    },
    {
      "epoch": 2.7281680317670602,
      "grad_norm": 3.0,
      "learning_rate": 5.033996121404707e-06,
      "loss": 0.8041,
      "step": 778420
    },
    {
      "epoch": 2.728203079273956,
      "grad_norm": 2.75,
      "learning_rate": 5.033347092741004e-06,
      "loss": 0.7957,
      "step": 778430
    },
    {
      "epoch": 2.7282381267808513,
      "grad_norm": 2.796875,
      "learning_rate": 5.032698064077302e-06,
      "loss": 0.8564,
      "step": 778440
    },
    {
      "epoch": 2.728273174287747,
      "grad_norm": 3.0625,
      "learning_rate": 5.032049035413601e-06,
      "loss": 0.7804,
      "step": 778450
    },
    {
      "epoch": 2.7283082217946424,
      "grad_norm": 2.875,
      "learning_rate": 5.031400006749899e-06,
      "loss": 0.8467,
      "step": 778460
    },
    {
      "epoch": 2.728343269301538,
      "grad_norm": 3.140625,
      "learning_rate": 5.030750978086196e-06,
      "loss": 0.8327,
      "step": 778470
    },
    {
      "epoch": 2.728378316808434,
      "grad_norm": 2.671875,
      "learning_rate": 5.030101949422495e-06,
      "loss": 0.7893,
      "step": 778480
    },
    {
      "epoch": 2.7284133643153297,
      "grad_norm": 2.828125,
      "learning_rate": 5.029452920758793e-06,
      "loss": 0.802,
      "step": 778490
    },
    {
      "epoch": 2.728448411822225,
      "grad_norm": 2.828125,
      "learning_rate": 5.028803892095091e-06,
      "loss": 0.8198,
      "step": 778500
    },
    {
      "epoch": 2.7284834593291207,
      "grad_norm": 2.8125,
      "learning_rate": 5.0281548634313895e-06,
      "loss": 0.7878,
      "step": 778510
    },
    {
      "epoch": 2.728518506836016,
      "grad_norm": 3.21875,
      "learning_rate": 5.027505834767687e-06,
      "loss": 0.8694,
      "step": 778520
    },
    {
      "epoch": 2.728553554342912,
      "grad_norm": 2.671875,
      "learning_rate": 5.026856806103985e-06,
      "loss": 0.8047,
      "step": 778530
    },
    {
      "epoch": 2.7285886018498076,
      "grad_norm": 3.296875,
      "learning_rate": 5.0262077774402835e-06,
      "loss": 0.8297,
      "step": 778540
    },
    {
      "epoch": 2.728623649356703,
      "grad_norm": 3.046875,
      "learning_rate": 5.0255587487765815e-06,
      "loss": 0.7658,
      "step": 778550
    },
    {
      "epoch": 2.7286586968635986,
      "grad_norm": 3.15625,
      "learning_rate": 5.0249097201128795e-06,
      "loss": 0.8113,
      "step": 778560
    },
    {
      "epoch": 2.728693744370494,
      "grad_norm": 3.078125,
      "learning_rate": 5.0242606914491775e-06,
      "loss": 0.8172,
      "step": 778570
    },
    {
      "epoch": 2.7287287918773897,
      "grad_norm": 2.875,
      "learning_rate": 5.0236116627854755e-06,
      "loss": 0.735,
      "step": 778580
    },
    {
      "epoch": 2.7287638393842855,
      "grad_norm": 2.984375,
      "learning_rate": 5.0229626341217735e-06,
      "loss": 0.7521,
      "step": 778590
    },
    {
      "epoch": 2.728798886891181,
      "grad_norm": 2.75,
      "learning_rate": 5.0223136054580715e-06,
      "loss": 0.884,
      "step": 778600
    },
    {
      "epoch": 2.7288339343980765,
      "grad_norm": 2.40625,
      "learning_rate": 5.0216645767943695e-06,
      "loss": 0.8076,
      "step": 778610
    },
    {
      "epoch": 2.7288689819049723,
      "grad_norm": 2.71875,
      "learning_rate": 5.0210155481306675e-06,
      "loss": 0.7472,
      "step": 778620
    },
    {
      "epoch": 2.7289040294118676,
      "grad_norm": 2.828125,
      "learning_rate": 5.020366519466966e-06,
      "loss": 0.7407,
      "step": 778630
    },
    {
      "epoch": 2.7289390769187634,
      "grad_norm": 2.859375,
      "learning_rate": 5.019717490803264e-06,
      "loss": 0.8212,
      "step": 778640
    },
    {
      "epoch": 2.728974124425659,
      "grad_norm": 3.203125,
      "learning_rate": 5.019068462139562e-06,
      "loss": 0.8078,
      "step": 778650
    },
    {
      "epoch": 2.7290091719325544,
      "grad_norm": 3.171875,
      "learning_rate": 5.01841943347586e-06,
      "loss": 0.7863,
      "step": 778660
    },
    {
      "epoch": 2.72904421943945,
      "grad_norm": 2.703125,
      "learning_rate": 5.017770404812158e-06,
      "loss": 0.798,
      "step": 778670
    },
    {
      "epoch": 2.7290792669463455,
      "grad_norm": 2.953125,
      "learning_rate": 5.017121376148456e-06,
      "loss": 0.8051,
      "step": 778680
    },
    {
      "epoch": 2.7291143144532413,
      "grad_norm": 3.296875,
      "learning_rate": 5.016472347484754e-06,
      "loss": 0.8811,
      "step": 778690
    },
    {
      "epoch": 2.729149361960137,
      "grad_norm": 2.9375,
      "learning_rate": 5.015823318821053e-06,
      "loss": 0.7259,
      "step": 778700
    },
    {
      "epoch": 2.729184409467033,
      "grad_norm": 3.0,
      "learning_rate": 5.01517429015735e-06,
      "loss": 0.8393,
      "step": 778710
    },
    {
      "epoch": 2.729219456973928,
      "grad_norm": 2.65625,
      "learning_rate": 5.014525261493648e-06,
      "loss": 0.8725,
      "step": 778720
    },
    {
      "epoch": 2.729254504480824,
      "grad_norm": 2.5625,
      "learning_rate": 5.013876232829947e-06,
      "loss": 0.8188,
      "step": 778730
    },
    {
      "epoch": 2.729289551987719,
      "grad_norm": 3.421875,
      "learning_rate": 5.013227204166245e-06,
      "loss": 0.8313,
      "step": 778740
    },
    {
      "epoch": 2.729324599494615,
      "grad_norm": 3.3125,
      "learning_rate": 5.012578175502543e-06,
      "loss": 0.8312,
      "step": 778750
    },
    {
      "epoch": 2.7293596470015107,
      "grad_norm": 2.953125,
      "learning_rate": 5.011929146838841e-06,
      "loss": 0.8209,
      "step": 778760
    },
    {
      "epoch": 2.729394694508406,
      "grad_norm": 3.578125,
      "learning_rate": 5.011280118175139e-06,
      "loss": 0.8386,
      "step": 778770
    },
    {
      "epoch": 2.7294297420153018,
      "grad_norm": 2.90625,
      "learning_rate": 5.010631089511437e-06,
      "loss": 0.751,
      "step": 778780
    },
    {
      "epoch": 2.729464789522197,
      "grad_norm": 3.0625,
      "learning_rate": 5.009982060847736e-06,
      "loss": 0.8005,
      "step": 778790
    },
    {
      "epoch": 2.729499837029093,
      "grad_norm": 3.203125,
      "learning_rate": 5.009333032184033e-06,
      "loss": 0.8085,
      "step": 778800
    },
    {
      "epoch": 2.7295348845359886,
      "grad_norm": 3.265625,
      "learning_rate": 5.008684003520331e-06,
      "loss": 0.7956,
      "step": 778810
    },
    {
      "epoch": 2.7295699320428843,
      "grad_norm": 2.953125,
      "learning_rate": 5.00803497485663e-06,
      "loss": 0.7684,
      "step": 778820
    },
    {
      "epoch": 2.7296049795497797,
      "grad_norm": 3.09375,
      "learning_rate": 5.007385946192928e-06,
      "loss": 0.7815,
      "step": 778830
    },
    {
      "epoch": 2.7296400270566754,
      "grad_norm": 3.296875,
      "learning_rate": 5.006736917529226e-06,
      "loss": 0.878,
      "step": 778840
    },
    {
      "epoch": 2.7296750745635707,
      "grad_norm": 2.78125,
      "learning_rate": 5.006087888865524e-06,
      "loss": 0.7497,
      "step": 778850
    },
    {
      "epoch": 2.7297101220704665,
      "grad_norm": 3.203125,
      "learning_rate": 5.005438860201822e-06,
      "loss": 0.8019,
      "step": 778860
    },
    {
      "epoch": 2.7297451695773622,
      "grad_norm": 2.921875,
      "learning_rate": 5.00478983153812e-06,
      "loss": 0.7574,
      "step": 778870
    },
    {
      "epoch": 2.7297802170842576,
      "grad_norm": 3.171875,
      "learning_rate": 5.004140802874419e-06,
      "loss": 0.7817,
      "step": 778880
    },
    {
      "epoch": 2.7298152645911533,
      "grad_norm": 2.65625,
      "learning_rate": 5.003491774210717e-06,
      "loss": 0.7731,
      "step": 778890
    },
    {
      "epoch": 2.729850312098049,
      "grad_norm": 3.0625,
      "learning_rate": 5.002842745547014e-06,
      "loss": 0.8251,
      "step": 778900
    },
    {
      "epoch": 2.7298853596049444,
      "grad_norm": 2.90625,
      "learning_rate": 5.002193716883313e-06,
      "loss": 0.773,
      "step": 778910
    },
    {
      "epoch": 2.72992040711184,
      "grad_norm": 2.921875,
      "learning_rate": 5.001544688219611e-06,
      "loss": 0.785,
      "step": 778920
    },
    {
      "epoch": 2.729955454618736,
      "grad_norm": 2.796875,
      "learning_rate": 5.000895659555909e-06,
      "loss": 0.802,
      "step": 778930
    },
    {
      "epoch": 2.729990502125631,
      "grad_norm": 2.78125,
      "learning_rate": 5.000246630892207e-06,
      "loss": 0.8567,
      "step": 778940
    },
    {
      "epoch": 2.730025549632527,
      "grad_norm": 2.34375,
      "learning_rate": 4.999597602228505e-06,
      "loss": 0.7966,
      "step": 778950
    },
    {
      "epoch": 2.7300605971394223,
      "grad_norm": 2.8125,
      "learning_rate": 4.998948573564803e-06,
      "loss": 0.8129,
      "step": 778960
    },
    {
      "epoch": 2.730095644646318,
      "grad_norm": 3.015625,
      "learning_rate": 4.998299544901102e-06,
      "loss": 0.7885,
      "step": 778970
    },
    {
      "epoch": 2.730130692153214,
      "grad_norm": 2.953125,
      "learning_rate": 4.9976505162374e-06,
      "loss": 0.9062,
      "step": 778980
    },
    {
      "epoch": 2.730165739660109,
      "grad_norm": 2.703125,
      "learning_rate": 4.997001487573697e-06,
      "loss": 0.8661,
      "step": 778990
    },
    {
      "epoch": 2.730200787167005,
      "grad_norm": 3.359375,
      "learning_rate": 4.996352458909996e-06,
      "loss": 0.8011,
      "step": 779000
    },
    {
      "epoch": 2.7302358346739006,
      "grad_norm": 2.875,
      "learning_rate": 4.995703430246294e-06,
      "loss": 0.8055,
      "step": 779010
    },
    {
      "epoch": 2.730270882180796,
      "grad_norm": 3.3125,
      "learning_rate": 4.995054401582592e-06,
      "loss": 0.876,
      "step": 779020
    },
    {
      "epoch": 2.7303059296876917,
      "grad_norm": 2.765625,
      "learning_rate": 4.99440537291889e-06,
      "loss": 0.8123,
      "step": 779030
    },
    {
      "epoch": 2.7303409771945875,
      "grad_norm": 3.453125,
      "learning_rate": 4.993756344255188e-06,
      "loss": 0.8114,
      "step": 779040
    },
    {
      "epoch": 2.7303760247014828,
      "grad_norm": 2.6875,
      "learning_rate": 4.993107315591486e-06,
      "loss": 0.7489,
      "step": 779050
    },
    {
      "epoch": 2.7304110722083785,
      "grad_norm": 3.40625,
      "learning_rate": 4.9924582869277846e-06,
      "loss": 0.906,
      "step": 779060
    },
    {
      "epoch": 2.730446119715274,
      "grad_norm": 3.21875,
      "learning_rate": 4.9918092582640826e-06,
      "loss": 0.7968,
      "step": 779070
    },
    {
      "epoch": 2.7304811672221696,
      "grad_norm": 2.4375,
      "learning_rate": 4.9911602296003806e-06,
      "loss": 0.8333,
      "step": 779080
    },
    {
      "epoch": 2.7305162147290654,
      "grad_norm": 3.328125,
      "learning_rate": 4.9905112009366786e-06,
      "loss": 0.7665,
      "step": 779090
    },
    {
      "epoch": 2.730551262235961,
      "grad_norm": 2.640625,
      "learning_rate": 4.9898621722729766e-06,
      "loss": 0.7319,
      "step": 779100
    },
    {
      "epoch": 2.7305863097428564,
      "grad_norm": 3.21875,
      "learning_rate": 4.9892131436092746e-06,
      "loss": 0.8211,
      "step": 779110
    },
    {
      "epoch": 2.730621357249752,
      "grad_norm": 2.53125,
      "learning_rate": 4.9885641149455726e-06,
      "loss": 0.7823,
      "step": 779120
    },
    {
      "epoch": 2.7306564047566475,
      "grad_norm": 2.6875,
      "learning_rate": 4.9879150862818706e-06,
      "loss": 0.7633,
      "step": 779130
    },
    {
      "epoch": 2.7306914522635433,
      "grad_norm": 4.0,
      "learning_rate": 4.9872660576181686e-06,
      "loss": 0.8551,
      "step": 779140
    },
    {
      "epoch": 2.730726499770439,
      "grad_norm": 2.4375,
      "learning_rate": 4.9866170289544666e-06,
      "loss": 0.7875,
      "step": 779150
    },
    {
      "epoch": 2.7307615472773343,
      "grad_norm": 2.890625,
      "learning_rate": 4.985968000290765e-06,
      "loss": 0.8545,
      "step": 779160
    },
    {
      "epoch": 2.73079659478423,
      "grad_norm": 2.90625,
      "learning_rate": 4.985318971627063e-06,
      "loss": 0.7861,
      "step": 779170
    },
    {
      "epoch": 2.7308316422911254,
      "grad_norm": 2.84375,
      "learning_rate": 4.984669942963361e-06,
      "loss": 0.7376,
      "step": 779180
    },
    {
      "epoch": 2.730866689798021,
      "grad_norm": 3.484375,
      "learning_rate": 4.984020914299659e-06,
      "loss": 0.8926,
      "step": 779190
    },
    {
      "epoch": 2.730901737304917,
      "grad_norm": 2.671875,
      "learning_rate": 4.983371885635957e-06,
      "loss": 0.7512,
      "step": 779200
    },
    {
      "epoch": 2.7309367848118127,
      "grad_norm": 2.828125,
      "learning_rate": 4.982722856972255e-06,
      "loss": 0.7999,
      "step": 779210
    },
    {
      "epoch": 2.730971832318708,
      "grad_norm": 3.0,
      "learning_rate": 4.982073828308554e-06,
      "loss": 0.8418,
      "step": 779220
    },
    {
      "epoch": 2.7310068798256038,
      "grad_norm": 3.09375,
      "learning_rate": 4.981424799644851e-06,
      "loss": 0.777,
      "step": 779230
    },
    {
      "epoch": 2.731041927332499,
      "grad_norm": 2.734375,
      "learning_rate": 4.980775770981149e-06,
      "loss": 0.8511,
      "step": 779240
    },
    {
      "epoch": 2.731076974839395,
      "grad_norm": 3.15625,
      "learning_rate": 4.980126742317448e-06,
      "loss": 0.7853,
      "step": 779250
    },
    {
      "epoch": 2.7311120223462906,
      "grad_norm": 3.25,
      "learning_rate": 4.979477713653746e-06,
      "loss": 0.7926,
      "step": 779260
    },
    {
      "epoch": 2.731147069853186,
      "grad_norm": 3.328125,
      "learning_rate": 4.978828684990043e-06,
      "loss": 0.7965,
      "step": 779270
    },
    {
      "epoch": 2.7311821173600817,
      "grad_norm": 3.734375,
      "learning_rate": 4.978179656326342e-06,
      "loss": 0.8537,
      "step": 779280
    },
    {
      "epoch": 2.731217164866977,
      "grad_norm": 3.15625,
      "learning_rate": 4.97753062766264e-06,
      "loss": 0.793,
      "step": 779290
    },
    {
      "epoch": 2.7312522123738727,
      "grad_norm": 2.484375,
      "learning_rate": 4.976881598998938e-06,
      "loss": 0.7355,
      "step": 779300
    },
    {
      "epoch": 2.7312872598807685,
      "grad_norm": 3.140625,
      "learning_rate": 4.976232570335237e-06,
      "loss": 0.8677,
      "step": 779310
    },
    {
      "epoch": 2.7313223073876642,
      "grad_norm": 2.4375,
      "learning_rate": 4.975583541671534e-06,
      "loss": 0.7551,
      "step": 779320
    },
    {
      "epoch": 2.7313573548945596,
      "grad_norm": 2.171875,
      "learning_rate": 4.974934513007832e-06,
      "loss": 0.7264,
      "step": 779330
    },
    {
      "epoch": 2.7313924024014553,
      "grad_norm": 2.6875,
      "learning_rate": 4.974285484344131e-06,
      "loss": 0.7899,
      "step": 779340
    },
    {
      "epoch": 2.7314274499083506,
      "grad_norm": 2.953125,
      "learning_rate": 4.973636455680429e-06,
      "loss": 0.7984,
      "step": 779350
    },
    {
      "epoch": 2.7314624974152464,
      "grad_norm": 3.140625,
      "learning_rate": 4.972987427016727e-06,
      "loss": 0.8581,
      "step": 779360
    },
    {
      "epoch": 2.731497544922142,
      "grad_norm": 3.296875,
      "learning_rate": 4.972338398353025e-06,
      "loss": 0.8035,
      "step": 779370
    },
    {
      "epoch": 2.7315325924290375,
      "grad_norm": 3.140625,
      "learning_rate": 4.971689369689323e-06,
      "loss": 0.8886,
      "step": 779380
    },
    {
      "epoch": 2.731567639935933,
      "grad_norm": 2.734375,
      "learning_rate": 4.971040341025621e-06,
      "loss": 0.8508,
      "step": 779390
    },
    {
      "epoch": 2.7316026874428285,
      "grad_norm": 2.890625,
      "learning_rate": 4.97039131236192e-06,
      "loss": 0.7926,
      "step": 779400
    },
    {
      "epoch": 2.7316377349497243,
      "grad_norm": 2.84375,
      "learning_rate": 4.969742283698217e-06,
      "loss": 0.7591,
      "step": 779410
    },
    {
      "epoch": 2.73167278245662,
      "grad_norm": 2.75,
      "learning_rate": 4.969093255034515e-06,
      "loss": 0.8239,
      "step": 779420
    },
    {
      "epoch": 2.731707829963516,
      "grad_norm": 2.921875,
      "learning_rate": 4.968444226370814e-06,
      "loss": 0.8747,
      "step": 779430
    },
    {
      "epoch": 2.731742877470411,
      "grad_norm": 2.578125,
      "learning_rate": 4.967795197707112e-06,
      "loss": 0.7955,
      "step": 779440
    },
    {
      "epoch": 2.731777924977307,
      "grad_norm": 2.71875,
      "learning_rate": 4.96714616904341e-06,
      "loss": 0.7369,
      "step": 779450
    },
    {
      "epoch": 2.731812972484202,
      "grad_norm": 2.5625,
      "learning_rate": 4.966497140379708e-06,
      "loss": 0.7265,
      "step": 779460
    },
    {
      "epoch": 2.731848019991098,
      "grad_norm": 2.625,
      "learning_rate": 4.965848111716006e-06,
      "loss": 0.7933,
      "step": 779470
    },
    {
      "epoch": 2.7318830674979937,
      "grad_norm": 2.75,
      "learning_rate": 4.965199083052304e-06,
      "loss": 0.783,
      "step": 779480
    },
    {
      "epoch": 2.731918115004889,
      "grad_norm": 3.109375,
      "learning_rate": 4.964550054388603e-06,
      "loss": 0.8462,
      "step": 779490
    },
    {
      "epoch": 2.7319531625117848,
      "grad_norm": 2.5,
      "learning_rate": 4.963901025724901e-06,
      "loss": 0.7948,
      "step": 779500
    },
    {
      "epoch": 2.73198821001868,
      "grad_norm": 3.125,
      "learning_rate": 4.963251997061198e-06,
      "loss": 0.8683,
      "step": 779510
    },
    {
      "epoch": 2.732023257525576,
      "grad_norm": 3.0625,
      "learning_rate": 4.962602968397497e-06,
      "loss": 0.7966,
      "step": 779520
    },
    {
      "epoch": 2.7320583050324716,
      "grad_norm": 3.484375,
      "learning_rate": 4.961953939733795e-06,
      "loss": 0.7515,
      "step": 779530
    },
    {
      "epoch": 2.7320933525393674,
      "grad_norm": 2.96875,
      "learning_rate": 4.961304911070093e-06,
      "loss": 0.7501,
      "step": 779540
    },
    {
      "epoch": 2.7321284000462627,
      "grad_norm": 3.03125,
      "learning_rate": 4.960655882406391e-06,
      "loss": 0.8414,
      "step": 779550
    },
    {
      "epoch": 2.7321634475531584,
      "grad_norm": 2.984375,
      "learning_rate": 4.960006853742689e-06,
      "loss": 0.8468,
      "step": 779560
    },
    {
      "epoch": 2.7321984950600537,
      "grad_norm": 2.8125,
      "learning_rate": 4.959357825078987e-06,
      "loss": 0.9232,
      "step": 779570
    },
    {
      "epoch": 2.7322335425669495,
      "grad_norm": 2.84375,
      "learning_rate": 4.958708796415286e-06,
      "loss": 0.7899,
      "step": 779580
    },
    {
      "epoch": 2.7322685900738453,
      "grad_norm": 2.203125,
      "learning_rate": 4.958059767751584e-06,
      "loss": 0.716,
      "step": 779590
    },
    {
      "epoch": 2.7323036375807406,
      "grad_norm": 3.09375,
      "learning_rate": 4.957410739087881e-06,
      "loss": 0.8991,
      "step": 779600
    },
    {
      "epoch": 2.7323386850876363,
      "grad_norm": 3.15625,
      "learning_rate": 4.95676171042418e-06,
      "loss": 0.8038,
      "step": 779610
    },
    {
      "epoch": 2.7323737325945316,
      "grad_norm": 3.03125,
      "learning_rate": 4.956112681760478e-06,
      "loss": 0.7865,
      "step": 779620
    },
    {
      "epoch": 2.7324087801014274,
      "grad_norm": 3.078125,
      "learning_rate": 4.955463653096776e-06,
      "loss": 0.7711,
      "step": 779630
    },
    {
      "epoch": 2.732443827608323,
      "grad_norm": 2.484375,
      "learning_rate": 4.954814624433074e-06,
      "loss": 0.7201,
      "step": 779640
    },
    {
      "epoch": 2.732478875115219,
      "grad_norm": 3.0,
      "learning_rate": 4.954165595769372e-06,
      "loss": 0.7896,
      "step": 779650
    },
    {
      "epoch": 2.7325139226221142,
      "grad_norm": 2.578125,
      "learning_rate": 4.95351656710567e-06,
      "loss": 0.8272,
      "step": 779660
    },
    {
      "epoch": 2.73254897012901,
      "grad_norm": 3.078125,
      "learning_rate": 4.952867538441968e-06,
      "loss": 0.7858,
      "step": 779670
    },
    {
      "epoch": 2.7325840176359053,
      "grad_norm": 4.0625,
      "learning_rate": 4.9522185097782665e-06,
      "loss": 0.7564,
      "step": 779680
    },
    {
      "epoch": 2.732619065142801,
      "grad_norm": 2.421875,
      "learning_rate": 4.9515694811145645e-06,
      "loss": 0.7215,
      "step": 779690
    },
    {
      "epoch": 2.732654112649697,
      "grad_norm": 2.890625,
      "learning_rate": 4.9509204524508625e-06,
      "loss": 0.7753,
      "step": 779700
    },
    {
      "epoch": 2.732689160156592,
      "grad_norm": 3.125,
      "learning_rate": 4.9502714237871605e-06,
      "loss": 0.8951,
      "step": 779710
    },
    {
      "epoch": 2.732724207663488,
      "grad_norm": 3.1875,
      "learning_rate": 4.9496223951234585e-06,
      "loss": 0.7831,
      "step": 779720
    },
    {
      "epoch": 2.732759255170383,
      "grad_norm": 3.03125,
      "learning_rate": 4.9489733664597565e-06,
      "loss": 0.8087,
      "step": 779730
    },
    {
      "epoch": 2.732794302677279,
      "grad_norm": 3.0,
      "learning_rate": 4.9483243377960545e-06,
      "loss": 0.7715,
      "step": 779740
    },
    {
      "epoch": 2.7328293501841747,
      "grad_norm": 2.796875,
      "learning_rate": 4.9476753091323525e-06,
      "loss": 0.7232,
      "step": 779750
    },
    {
      "epoch": 2.7328643976910705,
      "grad_norm": 2.984375,
      "learning_rate": 4.9470262804686505e-06,
      "loss": 0.8425,
      "step": 779760
    },
    {
      "epoch": 2.732899445197966,
      "grad_norm": 2.8125,
      "learning_rate": 4.946377251804949e-06,
      "loss": 0.8223,
      "step": 779770
    },
    {
      "epoch": 2.7329344927048616,
      "grad_norm": 3.171875,
      "learning_rate": 4.945728223141247e-06,
      "loss": 0.8761,
      "step": 779780
    },
    {
      "epoch": 2.732969540211757,
      "grad_norm": 3.0625,
      "learning_rate": 4.9450791944775445e-06,
      "loss": 0.8605,
      "step": 779790
    },
    {
      "epoch": 2.7330045877186526,
      "grad_norm": 2.828125,
      "learning_rate": 4.944430165813843e-06,
      "loss": 0.7617,
      "step": 779800
    },
    {
      "epoch": 2.7330396352255484,
      "grad_norm": 3.140625,
      "learning_rate": 4.943781137150141e-06,
      "loss": 0.8098,
      "step": 779810
    },
    {
      "epoch": 2.7330746827324437,
      "grad_norm": 2.984375,
      "learning_rate": 4.943132108486439e-06,
      "loss": 0.8041,
      "step": 779820
    },
    {
      "epoch": 2.7331097302393395,
      "grad_norm": 3.296875,
      "learning_rate": 4.942483079822738e-06,
      "loss": 0.9172,
      "step": 779830
    },
    {
      "epoch": 2.7331447777462348,
      "grad_norm": 3.21875,
      "learning_rate": 4.941834051159035e-06,
      "loss": 0.8334,
      "step": 779840
    },
    {
      "epoch": 2.7331798252531305,
      "grad_norm": 2.625,
      "learning_rate": 4.941185022495333e-06,
      "loss": 0.7356,
      "step": 779850
    },
    {
      "epoch": 2.7332148727600263,
      "grad_norm": 3.15625,
      "learning_rate": 4.940535993831632e-06,
      "loss": 0.8487,
      "step": 779860
    },
    {
      "epoch": 2.733249920266922,
      "grad_norm": 2.5,
      "learning_rate": 4.93988696516793e-06,
      "loss": 0.7615,
      "step": 779870
    },
    {
      "epoch": 2.7332849677738174,
      "grad_norm": 3.34375,
      "learning_rate": 4.939237936504227e-06,
      "loss": 0.7907,
      "step": 779880
    },
    {
      "epoch": 2.733320015280713,
      "grad_norm": 2.96875,
      "learning_rate": 4.938588907840526e-06,
      "loss": 0.8019,
      "step": 779890
    },
    {
      "epoch": 2.7333550627876084,
      "grad_norm": 2.5,
      "learning_rate": 4.937939879176824e-06,
      "loss": 0.8029,
      "step": 779900
    },
    {
      "epoch": 2.733390110294504,
      "grad_norm": 3.0625,
      "learning_rate": 4.937290850513122e-06,
      "loss": 0.7946,
      "step": 779910
    },
    {
      "epoch": 2.7334251578014,
      "grad_norm": 2.765625,
      "learning_rate": 4.936641821849421e-06,
      "loss": 0.8714,
      "step": 779920
    },
    {
      "epoch": 2.7334602053082953,
      "grad_norm": 2.71875,
      "learning_rate": 4.935992793185718e-06,
      "loss": 0.7347,
      "step": 779930
    },
    {
      "epoch": 2.733495252815191,
      "grad_norm": 2.953125,
      "learning_rate": 4.935343764522016e-06,
      "loss": 0.8452,
      "step": 779940
    },
    {
      "epoch": 2.7335303003220863,
      "grad_norm": 3.078125,
      "learning_rate": 4.934694735858315e-06,
      "loss": 0.7767,
      "step": 779950
    },
    {
      "epoch": 2.733565347828982,
      "grad_norm": 3.28125,
      "learning_rate": 4.934045707194613e-06,
      "loss": 0.7984,
      "step": 779960
    },
    {
      "epoch": 2.733600395335878,
      "grad_norm": 2.890625,
      "learning_rate": 4.933396678530911e-06,
      "loss": 0.7508,
      "step": 779970
    },
    {
      "epoch": 2.7336354428427736,
      "grad_norm": 2.78125,
      "learning_rate": 4.932747649867209e-06,
      "loss": 0.8153,
      "step": 779980
    },
    {
      "epoch": 2.733670490349669,
      "grad_norm": 3.234375,
      "learning_rate": 4.932098621203507e-06,
      "loss": 0.9499,
      "step": 779990
    },
    {
      "epoch": 2.7337055378565647,
      "grad_norm": 2.796875,
      "learning_rate": 4.931449592539805e-06,
      "loss": 0.7665,
      "step": 780000
    },
    {
      "epoch": 2.7337055378565647,
      "eval_loss": 0.7525817155838013,
      "eval_runtime": 556.9458,
      "eval_samples_per_second": 683.076,
      "eval_steps_per_second": 56.923,
      "step": 780000
    },
    {
      "epoch": 2.73374058536346,
      "grad_norm": 2.5,
      "learning_rate": 4.930800563876104e-06,
      "loss": 0.8115,
      "step": 780010
    },
    {
      "epoch": 2.7337756328703557,
      "grad_norm": 3.0625,
      "learning_rate": 4.930151535212402e-06,
      "loss": 0.8331,
      "step": 780020
    },
    {
      "epoch": 2.7338106803772515,
      "grad_norm": 2.984375,
      "learning_rate": 4.929502506548699e-06,
      "loss": 0.7517,
      "step": 780030
    },
    {
      "epoch": 2.733845727884147,
      "grad_norm": 2.96875,
      "learning_rate": 4.928853477884998e-06,
      "loss": 0.82,
      "step": 780040
    },
    {
      "epoch": 2.7338807753910426,
      "grad_norm": 3.0,
      "learning_rate": 4.928204449221296e-06,
      "loss": 0.7409,
      "step": 780050
    },
    {
      "epoch": 2.733915822897938,
      "grad_norm": 3.078125,
      "learning_rate": 4.927555420557594e-06,
      "loss": 0.7297,
      "step": 780060
    },
    {
      "epoch": 2.7339508704048336,
      "grad_norm": 2.703125,
      "learning_rate": 4.926906391893892e-06,
      "loss": 0.8385,
      "step": 780070
    },
    {
      "epoch": 2.7339859179117294,
      "grad_norm": 2.796875,
      "learning_rate": 4.92625736323019e-06,
      "loss": 0.7449,
      "step": 780080
    },
    {
      "epoch": 2.734020965418625,
      "grad_norm": 2.625,
      "learning_rate": 4.925608334566488e-06,
      "loss": 0.7977,
      "step": 780090
    },
    {
      "epoch": 2.7340560129255205,
      "grad_norm": 3.046875,
      "learning_rate": 4.924959305902786e-06,
      "loss": 0.8326,
      "step": 780100
    },
    {
      "epoch": 2.7340910604324162,
      "grad_norm": 3.1875,
      "learning_rate": 4.924310277239085e-06,
      "loss": 0.941,
      "step": 780110
    },
    {
      "epoch": 2.7341261079393115,
      "grad_norm": 3.34375,
      "learning_rate": 4.923661248575382e-06,
      "loss": 0.8708,
      "step": 780120
    },
    {
      "epoch": 2.7341611554462073,
      "grad_norm": 3.046875,
      "learning_rate": 4.923012219911681e-06,
      "loss": 0.76,
      "step": 780130
    },
    {
      "epoch": 2.734196202953103,
      "grad_norm": 2.90625,
      "learning_rate": 4.922363191247979e-06,
      "loss": 0.846,
      "step": 780140
    },
    {
      "epoch": 2.7342312504599984,
      "grad_norm": 2.6875,
      "learning_rate": 4.921714162584277e-06,
      "loss": 0.839,
      "step": 780150
    },
    {
      "epoch": 2.734266297966894,
      "grad_norm": 2.984375,
      "learning_rate": 4.921065133920575e-06,
      "loss": 0.8326,
      "step": 780160
    },
    {
      "epoch": 2.7343013454737894,
      "grad_norm": 2.875,
      "learning_rate": 4.920416105256873e-06,
      "loss": 0.7527,
      "step": 780170
    },
    {
      "epoch": 2.734336392980685,
      "grad_norm": 2.5625,
      "learning_rate": 4.919767076593171e-06,
      "loss": 0.7773,
      "step": 780180
    },
    {
      "epoch": 2.734371440487581,
      "grad_norm": 2.359375,
      "learning_rate": 4.919118047929469e-06,
      "loss": 0.736,
      "step": 780190
    },
    {
      "epoch": 2.7344064879944767,
      "grad_norm": 2.734375,
      "learning_rate": 4.9184690192657676e-06,
      "loss": 0.8121,
      "step": 780200
    },
    {
      "epoch": 2.734441535501372,
      "grad_norm": 3.375,
      "learning_rate": 4.917819990602065e-06,
      "loss": 0.813,
      "step": 780210
    },
    {
      "epoch": 2.734476583008268,
      "grad_norm": 2.59375,
      "learning_rate": 4.917170961938363e-06,
      "loss": 0.7842,
      "step": 780220
    },
    {
      "epoch": 2.734511630515163,
      "grad_norm": 2.875,
      "learning_rate": 4.9165219332746616e-06,
      "loss": 0.7992,
      "step": 780230
    },
    {
      "epoch": 2.734546678022059,
      "grad_norm": 2.90625,
      "learning_rate": 4.9158729046109596e-06,
      "loss": 0.8253,
      "step": 780240
    },
    {
      "epoch": 2.7345817255289546,
      "grad_norm": 3.0625,
      "learning_rate": 4.9152238759472576e-06,
      "loss": 0.7518,
      "step": 780250
    },
    {
      "epoch": 2.73461677303585,
      "grad_norm": 3.09375,
      "learning_rate": 4.9145748472835556e-06,
      "loss": 0.7553,
      "step": 780260
    },
    {
      "epoch": 2.7346518205427457,
      "grad_norm": 2.640625,
      "learning_rate": 4.9139258186198536e-06,
      "loss": 0.7942,
      "step": 780270
    },
    {
      "epoch": 2.7346868680496415,
      "grad_norm": 2.6875,
      "learning_rate": 4.9132767899561516e-06,
      "loss": 0.7624,
      "step": 780280
    },
    {
      "epoch": 2.7347219155565368,
      "grad_norm": 3.0625,
      "learning_rate": 4.91262776129245e-06,
      "loss": 0.8275,
      "step": 780290
    },
    {
      "epoch": 2.7347569630634325,
      "grad_norm": 2.8125,
      "learning_rate": 4.911978732628748e-06,
      "loss": 0.8671,
      "step": 780300
    },
    {
      "epoch": 2.7347920105703283,
      "grad_norm": 3.234375,
      "learning_rate": 4.9113297039650456e-06,
      "loss": 0.7253,
      "step": 780310
    },
    {
      "epoch": 2.7348270580772236,
      "grad_norm": 2.640625,
      "learning_rate": 4.910680675301344e-06,
      "loss": 0.7663,
      "step": 780320
    },
    {
      "epoch": 2.7348621055841194,
      "grad_norm": 3.203125,
      "learning_rate": 4.910031646637642e-06,
      "loss": 0.7577,
      "step": 780330
    },
    {
      "epoch": 2.7348971530910147,
      "grad_norm": 2.09375,
      "learning_rate": 4.90938261797394e-06,
      "loss": 0.7652,
      "step": 780340
    },
    {
      "epoch": 2.7349322005979104,
      "grad_norm": 2.234375,
      "learning_rate": 4.908733589310238e-06,
      "loss": 0.7526,
      "step": 780350
    },
    {
      "epoch": 2.734967248104806,
      "grad_norm": 2.828125,
      "learning_rate": 4.908084560646536e-06,
      "loss": 0.8128,
      "step": 780360
    },
    {
      "epoch": 2.735002295611702,
      "grad_norm": 3.453125,
      "learning_rate": 4.907435531982834e-06,
      "loss": 0.9093,
      "step": 780370
    },
    {
      "epoch": 2.7350373431185973,
      "grad_norm": 2.40625,
      "learning_rate": 4.906786503319133e-06,
      "loss": 0.7682,
      "step": 780380
    },
    {
      "epoch": 2.735072390625493,
      "grad_norm": 3.140625,
      "learning_rate": 4.906137474655431e-06,
      "loss": 0.8092,
      "step": 780390
    },
    {
      "epoch": 2.7351074381323883,
      "grad_norm": 2.90625,
      "learning_rate": 4.905488445991728e-06,
      "loss": 0.7903,
      "step": 780400
    },
    {
      "epoch": 2.735142485639284,
      "grad_norm": 3.578125,
      "learning_rate": 4.904839417328027e-06,
      "loss": 0.8219,
      "step": 780410
    },
    {
      "epoch": 2.73517753314618,
      "grad_norm": 2.8125,
      "learning_rate": 4.904190388664325e-06,
      "loss": 0.8206,
      "step": 780420
    },
    {
      "epoch": 2.735212580653075,
      "grad_norm": 2.65625,
      "learning_rate": 4.903541360000623e-06,
      "loss": 0.6983,
      "step": 780430
    },
    {
      "epoch": 2.735247628159971,
      "grad_norm": 3.28125,
      "learning_rate": 4.902892331336922e-06,
      "loss": 0.7699,
      "step": 780440
    },
    {
      "epoch": 2.7352826756668662,
      "grad_norm": 2.640625,
      "learning_rate": 4.902243302673219e-06,
      "loss": 0.7348,
      "step": 780450
    },
    {
      "epoch": 2.735317723173762,
      "grad_norm": 3.140625,
      "learning_rate": 4.901594274009517e-06,
      "loss": 0.763,
      "step": 780460
    },
    {
      "epoch": 2.7353527706806577,
      "grad_norm": 3.15625,
      "learning_rate": 4.900945245345816e-06,
      "loss": 0.8566,
      "step": 780470
    },
    {
      "epoch": 2.7353878181875535,
      "grad_norm": 2.703125,
      "learning_rate": 4.900296216682114e-06,
      "loss": 0.8061,
      "step": 780480
    },
    {
      "epoch": 2.735422865694449,
      "grad_norm": 2.703125,
      "learning_rate": 4.899647188018412e-06,
      "loss": 0.7502,
      "step": 780490
    },
    {
      "epoch": 2.7354579132013446,
      "grad_norm": 3.234375,
      "learning_rate": 4.89899815935471e-06,
      "loss": 0.7603,
      "step": 780500
    },
    {
      "epoch": 2.73549296070824,
      "grad_norm": 2.859375,
      "learning_rate": 4.898349130691008e-06,
      "loss": 0.813,
      "step": 780510
    },
    {
      "epoch": 2.7355280082151356,
      "grad_norm": 3.046875,
      "learning_rate": 4.897700102027306e-06,
      "loss": 0.8582,
      "step": 780520
    },
    {
      "epoch": 2.7355630557220314,
      "grad_norm": 2.71875,
      "learning_rate": 4.897051073363604e-06,
      "loss": 0.7606,
      "step": 780530
    },
    {
      "epoch": 2.7355981032289267,
      "grad_norm": 2.953125,
      "learning_rate": 4.896402044699902e-06,
      "loss": 0.8301,
      "step": 780540
    },
    {
      "epoch": 2.7356331507358225,
      "grad_norm": 2.859375,
      "learning_rate": 4.8957530160362e-06,
      "loss": 0.7391,
      "step": 780550
    },
    {
      "epoch": 2.735668198242718,
      "grad_norm": 2.859375,
      "learning_rate": 4.895103987372499e-06,
      "loss": 0.8745,
      "step": 780560
    },
    {
      "epoch": 2.7357032457496135,
      "grad_norm": 2.453125,
      "learning_rate": 4.894454958708797e-06,
      "loss": 0.7495,
      "step": 780570
    },
    {
      "epoch": 2.7357382932565093,
      "grad_norm": 2.75,
      "learning_rate": 4.893805930045095e-06,
      "loss": 0.8129,
      "step": 780580
    },
    {
      "epoch": 2.735773340763405,
      "grad_norm": 2.671875,
      "learning_rate": 4.893156901381393e-06,
      "loss": 0.8086,
      "step": 780590
    },
    {
      "epoch": 2.7358083882703004,
      "grad_norm": 2.890625,
      "learning_rate": 4.892507872717691e-06,
      "loss": 0.8111,
      "step": 780600
    },
    {
      "epoch": 2.735843435777196,
      "grad_norm": 2.84375,
      "learning_rate": 4.891858844053989e-06,
      "loss": 0.8205,
      "step": 780610
    },
    {
      "epoch": 2.7358784832840914,
      "grad_norm": 2.828125,
      "learning_rate": 4.891209815390287e-06,
      "loss": 0.8247,
      "step": 780620
    },
    {
      "epoch": 2.735913530790987,
      "grad_norm": 3.59375,
      "learning_rate": 4.890560786726586e-06,
      "loss": 0.823,
      "step": 780630
    },
    {
      "epoch": 2.735948578297883,
      "grad_norm": 2.6875,
      "learning_rate": 4.889911758062883e-06,
      "loss": 0.8101,
      "step": 780640
    },
    {
      "epoch": 2.7359836258047783,
      "grad_norm": 2.765625,
      "learning_rate": 4.889262729399181e-06,
      "loss": 0.7885,
      "step": 780650
    },
    {
      "epoch": 2.736018673311674,
      "grad_norm": 2.765625,
      "learning_rate": 4.88861370073548e-06,
      "loss": 0.7862,
      "step": 780660
    },
    {
      "epoch": 2.7360537208185693,
      "grad_norm": 2.96875,
      "learning_rate": 4.887964672071778e-06,
      "loss": 0.8182,
      "step": 780670
    },
    {
      "epoch": 2.736088768325465,
      "grad_norm": 2.71875,
      "learning_rate": 4.887315643408076e-06,
      "loss": 0.8188,
      "step": 780680
    },
    {
      "epoch": 2.736123815832361,
      "grad_norm": 2.5625,
      "learning_rate": 4.886666614744374e-06,
      "loss": 0.7577,
      "step": 780690
    },
    {
      "epoch": 2.7361588633392566,
      "grad_norm": 2.546875,
      "learning_rate": 4.886017586080672e-06,
      "loss": 0.7689,
      "step": 780700
    },
    {
      "epoch": 2.736193910846152,
      "grad_norm": 3.03125,
      "learning_rate": 4.88536855741697e-06,
      "loss": 0.8081,
      "step": 780710
    },
    {
      "epoch": 2.7362289583530477,
      "grad_norm": 3.484375,
      "learning_rate": 4.884719528753269e-06,
      "loss": 0.8372,
      "step": 780720
    },
    {
      "epoch": 2.736264005859943,
      "grad_norm": 2.828125,
      "learning_rate": 4.884070500089566e-06,
      "loss": 0.7661,
      "step": 780730
    },
    {
      "epoch": 2.7362990533668388,
      "grad_norm": 2.8125,
      "learning_rate": 4.883421471425864e-06,
      "loss": 0.8178,
      "step": 780740
    },
    {
      "epoch": 2.7363341008737345,
      "grad_norm": 2.921875,
      "learning_rate": 4.882772442762163e-06,
      "loss": 0.8702,
      "step": 780750
    },
    {
      "epoch": 2.73636914838063,
      "grad_norm": 3.125,
      "learning_rate": 4.882123414098461e-06,
      "loss": 0.8026,
      "step": 780760
    },
    {
      "epoch": 2.7364041958875256,
      "grad_norm": 2.953125,
      "learning_rate": 4.881474385434759e-06,
      "loss": 0.7544,
      "step": 780770
    },
    {
      "epoch": 2.736439243394421,
      "grad_norm": 2.90625,
      "learning_rate": 4.880825356771057e-06,
      "loss": 0.8377,
      "step": 780780
    },
    {
      "epoch": 2.7364742909013167,
      "grad_norm": 3.0625,
      "learning_rate": 4.880176328107355e-06,
      "loss": 0.8568,
      "step": 780790
    },
    {
      "epoch": 2.7365093384082124,
      "grad_norm": 3.125,
      "learning_rate": 4.879527299443653e-06,
      "loss": 0.7953,
      "step": 780800
    },
    {
      "epoch": 2.736544385915108,
      "grad_norm": 2.90625,
      "learning_rate": 4.8788782707799515e-06,
      "loss": 0.7875,
      "step": 780810
    },
    {
      "epoch": 2.7365794334220035,
      "grad_norm": 3.078125,
      "learning_rate": 4.878229242116249e-06,
      "loss": 0.7062,
      "step": 780820
    },
    {
      "epoch": 2.7366144809288993,
      "grad_norm": 3.125,
      "learning_rate": 4.877580213452547e-06,
      "loss": 0.8128,
      "step": 780830
    },
    {
      "epoch": 2.7366495284357946,
      "grad_norm": 2.875,
      "learning_rate": 4.8769311847888455e-06,
      "loss": 0.8207,
      "step": 780840
    },
    {
      "epoch": 2.7366845759426903,
      "grad_norm": 2.640625,
      "learning_rate": 4.8762821561251435e-06,
      "loss": 0.7562,
      "step": 780850
    },
    {
      "epoch": 2.736719623449586,
      "grad_norm": 2.90625,
      "learning_rate": 4.8756331274614415e-06,
      "loss": 0.8901,
      "step": 780860
    },
    {
      "epoch": 2.7367546709564814,
      "grad_norm": 2.640625,
      "learning_rate": 4.8749840987977395e-06,
      "loss": 0.7621,
      "step": 780870
    },
    {
      "epoch": 2.736789718463377,
      "grad_norm": 3.15625,
      "learning_rate": 4.8743350701340375e-06,
      "loss": 0.8162,
      "step": 780880
    },
    {
      "epoch": 2.7368247659702725,
      "grad_norm": 3.03125,
      "learning_rate": 4.8736860414703355e-06,
      "loss": 0.8773,
      "step": 780890
    },
    {
      "epoch": 2.7368598134771682,
      "grad_norm": 2.859375,
      "learning_rate": 4.873037012806634e-06,
      "loss": 0.7918,
      "step": 780900
    },
    {
      "epoch": 2.736894860984064,
      "grad_norm": 2.546875,
      "learning_rate": 4.872387984142932e-06,
      "loss": 0.8145,
      "step": 780910
    },
    {
      "epoch": 2.7369299084909597,
      "grad_norm": 2.71875,
      "learning_rate": 4.8717389554792295e-06,
      "loss": 0.786,
      "step": 780920
    },
    {
      "epoch": 2.736964955997855,
      "grad_norm": 3.046875,
      "learning_rate": 4.871089926815528e-06,
      "loss": 0.7508,
      "step": 780930
    },
    {
      "epoch": 2.737000003504751,
      "grad_norm": 3.078125,
      "learning_rate": 4.870440898151826e-06,
      "loss": 0.8168,
      "step": 780940
    },
    {
      "epoch": 2.737035051011646,
      "grad_norm": 2.8125,
      "learning_rate": 4.869791869488124e-06,
      "loss": 0.7798,
      "step": 780950
    },
    {
      "epoch": 2.737070098518542,
      "grad_norm": 2.65625,
      "learning_rate": 4.869142840824422e-06,
      "loss": 0.7713,
      "step": 780960
    },
    {
      "epoch": 2.7371051460254376,
      "grad_norm": 2.921875,
      "learning_rate": 4.86849381216072e-06,
      "loss": 0.8345,
      "step": 780970
    },
    {
      "epoch": 2.737140193532333,
      "grad_norm": 2.65625,
      "learning_rate": 4.867844783497018e-06,
      "loss": 0.7913,
      "step": 780980
    },
    {
      "epoch": 2.7371752410392287,
      "grad_norm": 3.0625,
      "learning_rate": 4.867195754833317e-06,
      "loss": 0.824,
      "step": 780990
    },
    {
      "epoch": 2.737210288546124,
      "grad_norm": 3.09375,
      "learning_rate": 4.866546726169615e-06,
      "loss": 0.7449,
      "step": 781000
    },
    {
      "epoch": 2.73724533605302,
      "grad_norm": 2.484375,
      "learning_rate": 4.865897697505912e-06,
      "loss": 0.8683,
      "step": 781010
    },
    {
      "epoch": 2.7372803835599155,
      "grad_norm": 2.484375,
      "learning_rate": 4.865248668842211e-06,
      "loss": 0.828,
      "step": 781020
    },
    {
      "epoch": 2.7373154310668113,
      "grad_norm": 2.9375,
      "learning_rate": 4.864599640178509e-06,
      "loss": 0.8031,
      "step": 781030
    },
    {
      "epoch": 2.7373504785737066,
      "grad_norm": 2.984375,
      "learning_rate": 4.863950611514807e-06,
      "loss": 0.6893,
      "step": 781040
    },
    {
      "epoch": 2.7373855260806024,
      "grad_norm": 3.15625,
      "learning_rate": 4.863301582851105e-06,
      "loss": 0.832,
      "step": 781050
    },
    {
      "epoch": 2.7374205735874977,
      "grad_norm": 2.21875,
      "learning_rate": 4.862652554187403e-06,
      "loss": 0.7867,
      "step": 781060
    },
    {
      "epoch": 2.7374556210943934,
      "grad_norm": 2.890625,
      "learning_rate": 4.862003525523701e-06,
      "loss": 0.8181,
      "step": 781070
    },
    {
      "epoch": 2.737490668601289,
      "grad_norm": 2.984375,
      "learning_rate": 4.861354496859999e-06,
      "loss": 0.781,
      "step": 781080
    },
    {
      "epoch": 2.7375257161081845,
      "grad_norm": 3.359375,
      "learning_rate": 4.860705468196298e-06,
      "loss": 0.8627,
      "step": 781090
    },
    {
      "epoch": 2.7375607636150803,
      "grad_norm": 2.96875,
      "learning_rate": 4.860056439532596e-06,
      "loss": 0.8127,
      "step": 781100
    },
    {
      "epoch": 2.7375958111219756,
      "grad_norm": 3.40625,
      "learning_rate": 4.859407410868894e-06,
      "loss": 0.8746,
      "step": 781110
    },
    {
      "epoch": 2.7376308586288713,
      "grad_norm": 3.109375,
      "learning_rate": 4.858758382205192e-06,
      "loss": 0.838,
      "step": 781120
    },
    {
      "epoch": 2.737665906135767,
      "grad_norm": 2.8125,
      "learning_rate": 4.85810935354149e-06,
      "loss": 0.8323,
      "step": 781130
    },
    {
      "epoch": 2.737700953642663,
      "grad_norm": 2.78125,
      "learning_rate": 4.857460324877788e-06,
      "loss": 0.823,
      "step": 781140
    },
    {
      "epoch": 2.737736001149558,
      "grad_norm": 2.953125,
      "learning_rate": 4.856811296214086e-06,
      "loss": 0.8308,
      "step": 781150
    },
    {
      "epoch": 2.737771048656454,
      "grad_norm": 3.125,
      "learning_rate": 4.856162267550384e-06,
      "loss": 0.8365,
      "step": 781160
    },
    {
      "epoch": 2.7378060961633492,
      "grad_norm": 2.84375,
      "learning_rate": 4.855513238886682e-06,
      "loss": 0.8284,
      "step": 781170
    },
    {
      "epoch": 2.737841143670245,
      "grad_norm": 2.484375,
      "learning_rate": 4.854864210222981e-06,
      "loss": 0.8013,
      "step": 781180
    },
    {
      "epoch": 2.7378761911771408,
      "grad_norm": 2.96875,
      "learning_rate": 4.854215181559279e-06,
      "loss": 0.9266,
      "step": 781190
    },
    {
      "epoch": 2.737911238684036,
      "grad_norm": 2.8125,
      "learning_rate": 4.853566152895576e-06,
      "loss": 0.851,
      "step": 781200
    },
    {
      "epoch": 2.737946286190932,
      "grad_norm": 2.96875,
      "learning_rate": 4.852917124231875e-06,
      "loss": 0.8102,
      "step": 781210
    },
    {
      "epoch": 2.737981333697827,
      "grad_norm": 2.765625,
      "learning_rate": 4.852268095568173e-06,
      "loss": 0.7443,
      "step": 781220
    },
    {
      "epoch": 2.738016381204723,
      "grad_norm": 2.46875,
      "learning_rate": 4.851619066904471e-06,
      "loss": 0.7846,
      "step": 781230
    },
    {
      "epoch": 2.7380514287116187,
      "grad_norm": 2.953125,
      "learning_rate": 4.85097003824077e-06,
      "loss": 0.8357,
      "step": 781240
    },
    {
      "epoch": 2.7380864762185144,
      "grad_norm": 2.25,
      "learning_rate": 4.850321009577067e-06,
      "loss": 0.7655,
      "step": 781250
    },
    {
      "epoch": 2.7381215237254097,
      "grad_norm": 3.078125,
      "learning_rate": 4.849671980913365e-06,
      "loss": 0.7836,
      "step": 781260
    },
    {
      "epoch": 2.7381565712323055,
      "grad_norm": 2.921875,
      "learning_rate": 4.849022952249664e-06,
      "loss": 0.8485,
      "step": 781270
    },
    {
      "epoch": 2.738191618739201,
      "grad_norm": 2.71875,
      "learning_rate": 4.848373923585962e-06,
      "loss": 0.736,
      "step": 781280
    },
    {
      "epoch": 2.7382266662460966,
      "grad_norm": 2.9375,
      "learning_rate": 4.84772489492226e-06,
      "loss": 0.8293,
      "step": 781290
    },
    {
      "epoch": 2.7382617137529923,
      "grad_norm": 2.609375,
      "learning_rate": 4.847075866258558e-06,
      "loss": 0.8231,
      "step": 781300
    },
    {
      "epoch": 2.7382967612598876,
      "grad_norm": 2.96875,
      "learning_rate": 4.846426837594856e-06,
      "loss": 0.8012,
      "step": 781310
    },
    {
      "epoch": 2.7383318087667834,
      "grad_norm": 2.9375,
      "learning_rate": 4.845777808931154e-06,
      "loss": 0.8107,
      "step": 781320
    },
    {
      "epoch": 2.7383668562736787,
      "grad_norm": 3.3125,
      "learning_rate": 4.8451287802674526e-06,
      "loss": 0.7764,
      "step": 781330
    },
    {
      "epoch": 2.7384019037805745,
      "grad_norm": 2.671875,
      "learning_rate": 4.84447975160375e-06,
      "loss": 0.7401,
      "step": 781340
    },
    {
      "epoch": 2.7384369512874702,
      "grad_norm": 3.125,
      "learning_rate": 4.843830722940048e-06,
      "loss": 0.8488,
      "step": 781350
    },
    {
      "epoch": 2.738471998794366,
      "grad_norm": 3.421875,
      "learning_rate": 4.8431816942763466e-06,
      "loss": 0.784,
      "step": 781360
    },
    {
      "epoch": 2.7385070463012613,
      "grad_norm": 3.25,
      "learning_rate": 4.8425326656126446e-06,
      "loss": 0.835,
      "step": 781370
    },
    {
      "epoch": 2.738542093808157,
      "grad_norm": 2.765625,
      "learning_rate": 4.8418836369489426e-06,
      "loss": 0.8519,
      "step": 781380
    },
    {
      "epoch": 2.7385771413150524,
      "grad_norm": 2.8125,
      "learning_rate": 4.8412346082852406e-06,
      "loss": 0.7799,
      "step": 781390
    },
    {
      "epoch": 2.738612188821948,
      "grad_norm": 2.9375,
      "learning_rate": 4.8405855796215386e-06,
      "loss": 0.7738,
      "step": 781400
    },
    {
      "epoch": 2.738647236328844,
      "grad_norm": 2.984375,
      "learning_rate": 4.8399365509578366e-06,
      "loss": 0.7841,
      "step": 781410
    },
    {
      "epoch": 2.738682283835739,
      "grad_norm": 3.15625,
      "learning_rate": 4.839287522294135e-06,
      "loss": 0.9116,
      "step": 781420
    },
    {
      "epoch": 2.738717331342635,
      "grad_norm": 3.03125,
      "learning_rate": 4.838638493630433e-06,
      "loss": 0.7905,
      "step": 781430
    },
    {
      "epoch": 2.7387523788495303,
      "grad_norm": 3.265625,
      "learning_rate": 4.8379894649667306e-06,
      "loss": 0.8053,
      "step": 781440
    },
    {
      "epoch": 2.738787426356426,
      "grad_norm": 2.921875,
      "learning_rate": 4.837340436303029e-06,
      "loss": 0.756,
      "step": 781450
    },
    {
      "epoch": 2.738822473863322,
      "grad_norm": 2.703125,
      "learning_rate": 4.836691407639327e-06,
      "loss": 0.83,
      "step": 781460
    },
    {
      "epoch": 2.7388575213702175,
      "grad_norm": 2.625,
      "learning_rate": 4.836042378975625e-06,
      "loss": 0.768,
      "step": 781470
    },
    {
      "epoch": 2.738892568877113,
      "grad_norm": 2.765625,
      "learning_rate": 4.835393350311923e-06,
      "loss": 0.8192,
      "step": 781480
    },
    {
      "epoch": 2.7389276163840086,
      "grad_norm": 2.953125,
      "learning_rate": 4.834744321648221e-06,
      "loss": 0.8591,
      "step": 781490
    },
    {
      "epoch": 2.738962663890904,
      "grad_norm": 2.875,
      "learning_rate": 4.834095292984519e-06,
      "loss": 0.8655,
      "step": 781500
    },
    {
      "epoch": 2.7389977113977997,
      "grad_norm": 2.796875,
      "learning_rate": 4.833446264320817e-06,
      "loss": 0.7787,
      "step": 781510
    },
    {
      "epoch": 2.7390327589046954,
      "grad_norm": 2.765625,
      "learning_rate": 4.832797235657116e-06,
      "loss": 0.744,
      "step": 781520
    },
    {
      "epoch": 2.7390678064115908,
      "grad_norm": 2.9375,
      "learning_rate": 4.832148206993413e-06,
      "loss": 0.8175,
      "step": 781530
    },
    {
      "epoch": 2.7391028539184865,
      "grad_norm": 2.25,
      "learning_rate": 4.831499178329712e-06,
      "loss": 0.7473,
      "step": 781540
    },
    {
      "epoch": 2.7391379014253823,
      "grad_norm": 3.125,
      "learning_rate": 4.83085014966601e-06,
      "loss": 0.7975,
      "step": 781550
    },
    {
      "epoch": 2.7391729489322776,
      "grad_norm": 3.203125,
      "learning_rate": 4.830201121002308e-06,
      "loss": 0.8392,
      "step": 781560
    },
    {
      "epoch": 2.7392079964391733,
      "grad_norm": 2.71875,
      "learning_rate": 4.829552092338606e-06,
      "loss": 0.8047,
      "step": 781570
    },
    {
      "epoch": 2.739243043946069,
      "grad_norm": 4.03125,
      "learning_rate": 4.828903063674904e-06,
      "loss": 0.8519,
      "step": 781580
    },
    {
      "epoch": 2.7392780914529644,
      "grad_norm": 2.703125,
      "learning_rate": 4.828254035011202e-06,
      "loss": 0.726,
      "step": 781590
    },
    {
      "epoch": 2.73931313895986,
      "grad_norm": 2.8125,
      "learning_rate": 4.8276050063475e-06,
      "loss": 0.774,
      "step": 781600
    },
    {
      "epoch": 2.7393481864667555,
      "grad_norm": 2.828125,
      "learning_rate": 4.826955977683799e-06,
      "loss": 0.8228,
      "step": 781610
    },
    {
      "epoch": 2.7393832339736512,
      "grad_norm": 2.71875,
      "learning_rate": 4.826306949020096e-06,
      "loss": 0.808,
      "step": 781620
    },
    {
      "epoch": 2.739418281480547,
      "grad_norm": 3.03125,
      "learning_rate": 4.825657920356394e-06,
      "loss": 0.885,
      "step": 781630
    },
    {
      "epoch": 2.7394533289874423,
      "grad_norm": 3.1875,
      "learning_rate": 4.825008891692693e-06,
      "loss": 0.8042,
      "step": 781640
    },
    {
      "epoch": 2.739488376494338,
      "grad_norm": 3.15625,
      "learning_rate": 4.824359863028991e-06,
      "loss": 0.815,
      "step": 781650
    },
    {
      "epoch": 2.739523424001234,
      "grad_norm": 3.359375,
      "learning_rate": 4.823710834365289e-06,
      "loss": 0.7691,
      "step": 781660
    },
    {
      "epoch": 2.739558471508129,
      "grad_norm": 2.5,
      "learning_rate": 4.823061805701587e-06,
      "loss": 0.7041,
      "step": 781670
    },
    {
      "epoch": 2.739593519015025,
      "grad_norm": 3.046875,
      "learning_rate": 4.822412777037885e-06,
      "loss": 0.7305,
      "step": 781680
    },
    {
      "epoch": 2.7396285665219207,
      "grad_norm": 2.953125,
      "learning_rate": 4.821763748374183e-06,
      "loss": 0.786,
      "step": 781690
    },
    {
      "epoch": 2.739663614028816,
      "grad_norm": 3.015625,
      "learning_rate": 4.821114719710482e-06,
      "loss": 0.7478,
      "step": 781700
    },
    {
      "epoch": 2.7396986615357117,
      "grad_norm": 2.65625,
      "learning_rate": 4.82046569104678e-06,
      "loss": 0.817,
      "step": 781710
    },
    {
      "epoch": 2.739733709042607,
      "grad_norm": 3.125,
      "learning_rate": 4.819816662383077e-06,
      "loss": 0.8153,
      "step": 781720
    },
    {
      "epoch": 2.739768756549503,
      "grad_norm": 3.21875,
      "learning_rate": 4.819167633719376e-06,
      "loss": 0.8273,
      "step": 781730
    },
    {
      "epoch": 2.7398038040563986,
      "grad_norm": 2.640625,
      "learning_rate": 4.818518605055674e-06,
      "loss": 0.8169,
      "step": 781740
    },
    {
      "epoch": 2.7398388515632943,
      "grad_norm": 2.65625,
      "learning_rate": 4.817869576391972e-06,
      "loss": 0.7958,
      "step": 781750
    },
    {
      "epoch": 2.7398738990701896,
      "grad_norm": 2.9375,
      "learning_rate": 4.817220547728271e-06,
      "loss": 0.6917,
      "step": 781760
    },
    {
      "epoch": 2.7399089465770854,
      "grad_norm": 3.1875,
      "learning_rate": 4.816571519064568e-06,
      "loss": 0.8963,
      "step": 781770
    },
    {
      "epoch": 2.7399439940839807,
      "grad_norm": 3.046875,
      "learning_rate": 4.815922490400866e-06,
      "loss": 0.733,
      "step": 781780
    },
    {
      "epoch": 2.7399790415908765,
      "grad_norm": 2.640625,
      "learning_rate": 4.815273461737165e-06,
      "loss": 0.7784,
      "step": 781790
    },
    {
      "epoch": 2.740014089097772,
      "grad_norm": 3.515625,
      "learning_rate": 4.814624433073463e-06,
      "loss": 0.7688,
      "step": 781800
    },
    {
      "epoch": 2.7400491366046675,
      "grad_norm": 2.125,
      "learning_rate": 4.81397540440976e-06,
      "loss": 0.7524,
      "step": 781810
    },
    {
      "epoch": 2.7400841841115633,
      "grad_norm": 2.625,
      "learning_rate": 4.813326375746059e-06,
      "loss": 0.8499,
      "step": 781820
    },
    {
      "epoch": 2.7401192316184586,
      "grad_norm": 2.84375,
      "learning_rate": 4.812677347082357e-06,
      "loss": 0.7513,
      "step": 781830
    },
    {
      "epoch": 2.7401542791253544,
      "grad_norm": 2.703125,
      "learning_rate": 4.812028318418655e-06,
      "loss": 0.7322,
      "step": 781840
    },
    {
      "epoch": 2.74018932663225,
      "grad_norm": 3.3125,
      "learning_rate": 4.811379289754954e-06,
      "loss": 0.8595,
      "step": 781850
    },
    {
      "epoch": 2.740224374139146,
      "grad_norm": 2.625,
      "learning_rate": 4.810730261091251e-06,
      "loss": 0.8464,
      "step": 781860
    },
    {
      "epoch": 2.740259421646041,
      "grad_norm": 2.96875,
      "learning_rate": 4.810081232427549e-06,
      "loss": 0.8325,
      "step": 781870
    },
    {
      "epoch": 2.740294469152937,
      "grad_norm": 2.609375,
      "learning_rate": 4.809432203763848e-06,
      "loss": 0.7818,
      "step": 781880
    },
    {
      "epoch": 2.7403295166598323,
      "grad_norm": 2.890625,
      "learning_rate": 4.808783175100146e-06,
      "loss": 0.8097,
      "step": 781890
    },
    {
      "epoch": 2.740364564166728,
      "grad_norm": 2.75,
      "learning_rate": 4.808134146436444e-06,
      "loss": 0.7627,
      "step": 781900
    },
    {
      "epoch": 2.740399611673624,
      "grad_norm": 3.09375,
      "learning_rate": 4.807485117772742e-06,
      "loss": 0.7763,
      "step": 781910
    },
    {
      "epoch": 2.740434659180519,
      "grad_norm": 2.96875,
      "learning_rate": 4.80683608910904e-06,
      "loss": 0.8028,
      "step": 781920
    },
    {
      "epoch": 2.740469706687415,
      "grad_norm": 2.90625,
      "learning_rate": 4.806187060445338e-06,
      "loss": 0.8128,
      "step": 781930
    },
    {
      "epoch": 2.74050475419431,
      "grad_norm": 2.890625,
      "learning_rate": 4.8055380317816365e-06,
      "loss": 0.7779,
      "step": 781940
    },
    {
      "epoch": 2.740539801701206,
      "grad_norm": 2.609375,
      "learning_rate": 4.804889003117934e-06,
      "loss": 0.8351,
      "step": 781950
    },
    {
      "epoch": 2.7405748492081017,
      "grad_norm": 3.09375,
      "learning_rate": 4.804239974454232e-06,
      "loss": 0.8077,
      "step": 781960
    },
    {
      "epoch": 2.7406098967149974,
      "grad_norm": 2.875,
      "learning_rate": 4.8035909457905305e-06,
      "loss": 0.8529,
      "step": 781970
    },
    {
      "epoch": 2.7406449442218928,
      "grad_norm": 3.015625,
      "learning_rate": 4.8029419171268285e-06,
      "loss": 0.8057,
      "step": 781980
    },
    {
      "epoch": 2.7406799917287885,
      "grad_norm": 3.125,
      "learning_rate": 4.8022928884631265e-06,
      "loss": 0.7641,
      "step": 781990
    },
    {
      "epoch": 2.740715039235684,
      "grad_norm": 2.875,
      "learning_rate": 4.8016438597994245e-06,
      "loss": 0.7762,
      "step": 782000
    },
    {
      "epoch": 2.7407500867425796,
      "grad_norm": 2.6875,
      "learning_rate": 4.8009948311357225e-06,
      "loss": 0.7861,
      "step": 782010
    },
    {
      "epoch": 2.7407851342494753,
      "grad_norm": 2.90625,
      "learning_rate": 4.8003458024720205e-06,
      "loss": 0.8386,
      "step": 782020
    },
    {
      "epoch": 2.7408201817563707,
      "grad_norm": 3.265625,
      "learning_rate": 4.7996967738083185e-06,
      "loss": 0.7839,
      "step": 782030
    },
    {
      "epoch": 2.7408552292632664,
      "grad_norm": 2.8125,
      "learning_rate": 4.799047745144617e-06,
      "loss": 0.8053,
      "step": 782040
    },
    {
      "epoch": 2.7408902767701617,
      "grad_norm": 2.734375,
      "learning_rate": 4.7983987164809145e-06,
      "loss": 0.6726,
      "step": 782050
    },
    {
      "epoch": 2.7409253242770575,
      "grad_norm": 2.640625,
      "learning_rate": 4.797749687817213e-06,
      "loss": 0.7902,
      "step": 782060
    },
    {
      "epoch": 2.7409603717839532,
      "grad_norm": 3.125,
      "learning_rate": 4.797100659153511e-06,
      "loss": 0.7707,
      "step": 782070
    },
    {
      "epoch": 2.740995419290849,
      "grad_norm": 2.625,
      "learning_rate": 4.796451630489809e-06,
      "loss": 0.8209,
      "step": 782080
    },
    {
      "epoch": 2.7410304667977443,
      "grad_norm": 3.046875,
      "learning_rate": 4.795802601826107e-06,
      "loss": 0.7997,
      "step": 782090
    },
    {
      "epoch": 2.74106551430464,
      "grad_norm": 2.921875,
      "learning_rate": 4.795153573162405e-06,
      "loss": 0.8295,
      "step": 782100
    },
    {
      "epoch": 2.7411005618115354,
      "grad_norm": 3.09375,
      "learning_rate": 4.794504544498703e-06,
      "loss": 0.8572,
      "step": 782110
    },
    {
      "epoch": 2.741135609318431,
      "grad_norm": 2.921875,
      "learning_rate": 4.793855515835001e-06,
      "loss": 0.7966,
      "step": 782120
    },
    {
      "epoch": 2.741170656825327,
      "grad_norm": 2.859375,
      "learning_rate": 4.7932064871713e-06,
      "loss": 0.8048,
      "step": 782130
    },
    {
      "epoch": 2.741205704332222,
      "grad_norm": 3.0625,
      "learning_rate": 4.792557458507597e-06,
      "loss": 0.87,
      "step": 782140
    },
    {
      "epoch": 2.741240751839118,
      "grad_norm": 2.9375,
      "learning_rate": 4.791908429843895e-06,
      "loss": 0.84,
      "step": 782150
    },
    {
      "epoch": 2.7412757993460133,
      "grad_norm": 2.578125,
      "learning_rate": 4.791259401180194e-06,
      "loss": 0.7286,
      "step": 782160
    },
    {
      "epoch": 2.741310846852909,
      "grad_norm": 2.9375,
      "learning_rate": 4.790610372516492e-06,
      "loss": 0.8181,
      "step": 782170
    },
    {
      "epoch": 2.741345894359805,
      "grad_norm": 3.09375,
      "learning_rate": 4.78996134385279e-06,
      "loss": 0.7983,
      "step": 782180
    },
    {
      "epoch": 2.7413809418667006,
      "grad_norm": 3.359375,
      "learning_rate": 4.789312315189088e-06,
      "loss": 0.836,
      "step": 782190
    },
    {
      "epoch": 2.741415989373596,
      "grad_norm": 2.734375,
      "learning_rate": 4.788663286525386e-06,
      "loss": 0.7643,
      "step": 782200
    },
    {
      "epoch": 2.7414510368804916,
      "grad_norm": 2.890625,
      "learning_rate": 4.788014257861684e-06,
      "loss": 0.83,
      "step": 782210
    },
    {
      "epoch": 2.741486084387387,
      "grad_norm": 3.234375,
      "learning_rate": 4.787365229197983e-06,
      "loss": 0.8665,
      "step": 782220
    },
    {
      "epoch": 2.7415211318942827,
      "grad_norm": 2.96875,
      "learning_rate": 4.786716200534281e-06,
      "loss": 0.8234,
      "step": 782230
    },
    {
      "epoch": 2.7415561794011785,
      "grad_norm": 2.765625,
      "learning_rate": 4.786067171870578e-06,
      "loss": 0.8113,
      "step": 782240
    },
    {
      "epoch": 2.7415912269080738,
      "grad_norm": 2.515625,
      "learning_rate": 4.785418143206877e-06,
      "loss": 0.8083,
      "step": 782250
    },
    {
      "epoch": 2.7416262744149695,
      "grad_norm": 2.71875,
      "learning_rate": 4.784769114543175e-06,
      "loss": 0.8205,
      "step": 782260
    },
    {
      "epoch": 2.741661321921865,
      "grad_norm": 2.84375,
      "learning_rate": 4.784120085879473e-06,
      "loss": 0.8286,
      "step": 782270
    },
    {
      "epoch": 2.7416963694287606,
      "grad_norm": 2.625,
      "learning_rate": 4.783471057215771e-06,
      "loss": 0.8552,
      "step": 782280
    },
    {
      "epoch": 2.7417314169356564,
      "grad_norm": 2.609375,
      "learning_rate": 4.782822028552069e-06,
      "loss": 0.6953,
      "step": 782290
    },
    {
      "epoch": 2.741766464442552,
      "grad_norm": 3.203125,
      "learning_rate": 4.782172999888367e-06,
      "loss": 0.8227,
      "step": 782300
    },
    {
      "epoch": 2.7418015119494474,
      "grad_norm": 3.328125,
      "learning_rate": 4.781523971224666e-06,
      "loss": 0.8915,
      "step": 782310
    },
    {
      "epoch": 2.741836559456343,
      "grad_norm": 3.1875,
      "learning_rate": 4.780874942560964e-06,
      "loss": 0.7991,
      "step": 782320
    },
    {
      "epoch": 2.7418716069632385,
      "grad_norm": 2.75,
      "learning_rate": 4.780225913897261e-06,
      "loss": 0.8173,
      "step": 782330
    },
    {
      "epoch": 2.7419066544701343,
      "grad_norm": 2.671875,
      "learning_rate": 4.77957688523356e-06,
      "loss": 0.8633,
      "step": 782340
    },
    {
      "epoch": 2.74194170197703,
      "grad_norm": 2.75,
      "learning_rate": 4.778927856569858e-06,
      "loss": 0.8387,
      "step": 782350
    },
    {
      "epoch": 2.7419767494839253,
      "grad_norm": 3.203125,
      "learning_rate": 4.778278827906156e-06,
      "loss": 0.8366,
      "step": 782360
    },
    {
      "epoch": 2.742011796990821,
      "grad_norm": 2.9375,
      "learning_rate": 4.777629799242455e-06,
      "loss": 0.7236,
      "step": 782370
    },
    {
      "epoch": 2.7420468444977164,
      "grad_norm": 3.015625,
      "learning_rate": 4.776980770578752e-06,
      "loss": 0.7766,
      "step": 782380
    },
    {
      "epoch": 2.742081892004612,
      "grad_norm": 2.546875,
      "learning_rate": 4.77633174191505e-06,
      "loss": 0.7928,
      "step": 782390
    },
    {
      "epoch": 2.742116939511508,
      "grad_norm": 2.828125,
      "learning_rate": 4.775682713251349e-06,
      "loss": 0.8197,
      "step": 782400
    },
    {
      "epoch": 2.7421519870184037,
      "grad_norm": 3.03125,
      "learning_rate": 4.775033684587647e-06,
      "loss": 0.7601,
      "step": 782410
    },
    {
      "epoch": 2.742187034525299,
      "grad_norm": 3.5625,
      "learning_rate": 4.774384655923944e-06,
      "loss": 0.9107,
      "step": 782420
    },
    {
      "epoch": 2.7422220820321948,
      "grad_norm": 2.921875,
      "learning_rate": 4.773735627260243e-06,
      "loss": 0.8009,
      "step": 782430
    },
    {
      "epoch": 2.74225712953909,
      "grad_norm": 2.828125,
      "learning_rate": 4.773086598596541e-06,
      "loss": 0.7362,
      "step": 782440
    },
    {
      "epoch": 2.742292177045986,
      "grad_norm": 2.828125,
      "learning_rate": 4.772437569932839e-06,
      "loss": 0.7631,
      "step": 782450
    },
    {
      "epoch": 2.7423272245528816,
      "grad_norm": 2.734375,
      "learning_rate": 4.771788541269137e-06,
      "loss": 0.846,
      "step": 782460
    },
    {
      "epoch": 2.742362272059777,
      "grad_norm": 2.859375,
      "learning_rate": 4.771139512605435e-06,
      "loss": 0.7845,
      "step": 782470
    },
    {
      "epoch": 2.7423973195666727,
      "grad_norm": 2.796875,
      "learning_rate": 4.770490483941733e-06,
      "loss": 0.7657,
      "step": 782480
    },
    {
      "epoch": 2.742432367073568,
      "grad_norm": 2.625,
      "learning_rate": 4.7698414552780316e-06,
      "loss": 0.7493,
      "step": 782490
    },
    {
      "epoch": 2.7424674145804637,
      "grad_norm": 2.734375,
      "learning_rate": 4.7691924266143296e-06,
      "loss": 0.8003,
      "step": 782500
    },
    {
      "epoch": 2.7425024620873595,
      "grad_norm": 2.828125,
      "learning_rate": 4.7685433979506276e-06,
      "loss": 0.7488,
      "step": 782510
    },
    {
      "epoch": 2.7425375095942552,
      "grad_norm": 3.1875,
      "learning_rate": 4.7678943692869256e-06,
      "loss": 0.9026,
      "step": 782520
    },
    {
      "epoch": 2.7425725571011506,
      "grad_norm": 3.125,
      "learning_rate": 4.7672453406232236e-06,
      "loss": 0.8381,
      "step": 782530
    },
    {
      "epoch": 2.7426076046080463,
      "grad_norm": 3.078125,
      "learning_rate": 4.7665963119595216e-06,
      "loss": 0.8789,
      "step": 782540
    },
    {
      "epoch": 2.7426426521149416,
      "grad_norm": 3.15625,
      "learning_rate": 4.7659472832958196e-06,
      "loss": 0.7426,
      "step": 782550
    },
    {
      "epoch": 2.7426776996218374,
      "grad_norm": 2.859375,
      "learning_rate": 4.7652982546321176e-06,
      "loss": 0.7634,
      "step": 782560
    },
    {
      "epoch": 2.742712747128733,
      "grad_norm": 3.171875,
      "learning_rate": 4.7646492259684156e-06,
      "loss": 0.7816,
      "step": 782570
    },
    {
      "epoch": 2.7427477946356285,
      "grad_norm": 2.8125,
      "learning_rate": 4.7640001973047136e-06,
      "loss": 0.802,
      "step": 782580
    },
    {
      "epoch": 2.742782842142524,
      "grad_norm": 3.25,
      "learning_rate": 4.763351168641012e-06,
      "loss": 0.8292,
      "step": 782590
    },
    {
      "epoch": 2.7428178896494195,
      "grad_norm": 2.546875,
      "learning_rate": 4.76270213997731e-06,
      "loss": 0.7897,
      "step": 782600
    },
    {
      "epoch": 2.7428529371563153,
      "grad_norm": 2.953125,
      "learning_rate": 4.762053111313608e-06,
      "loss": 0.8787,
      "step": 782610
    },
    {
      "epoch": 2.742887984663211,
      "grad_norm": 2.78125,
      "learning_rate": 4.761404082649906e-06,
      "loss": 0.7583,
      "step": 782620
    },
    {
      "epoch": 2.742923032170107,
      "grad_norm": 3.328125,
      "learning_rate": 4.760755053986204e-06,
      "loss": 0.7788,
      "step": 782630
    },
    {
      "epoch": 2.742958079677002,
      "grad_norm": 3.28125,
      "learning_rate": 4.760106025322502e-06,
      "loss": 0.8345,
      "step": 782640
    },
    {
      "epoch": 2.742993127183898,
      "grad_norm": 3.234375,
      "learning_rate": 4.759456996658801e-06,
      "loss": 0.8541,
      "step": 782650
    },
    {
      "epoch": 2.743028174690793,
      "grad_norm": 2.59375,
      "learning_rate": 4.758807967995098e-06,
      "loss": 0.7832,
      "step": 782660
    },
    {
      "epoch": 2.743063222197689,
      "grad_norm": 2.796875,
      "learning_rate": 4.758158939331396e-06,
      "loss": 0.7931,
      "step": 782670
    },
    {
      "epoch": 2.7430982697045847,
      "grad_norm": 2.765625,
      "learning_rate": 4.757509910667695e-06,
      "loss": 0.779,
      "step": 782680
    },
    {
      "epoch": 2.74313331721148,
      "grad_norm": 3.0625,
      "learning_rate": 4.756860882003993e-06,
      "loss": 0.795,
      "step": 782690
    },
    {
      "epoch": 2.7431683647183758,
      "grad_norm": 2.78125,
      "learning_rate": 4.756211853340291e-06,
      "loss": 0.7563,
      "step": 782700
    },
    {
      "epoch": 2.743203412225271,
      "grad_norm": 2.71875,
      "learning_rate": 4.755562824676589e-06,
      "loss": 0.817,
      "step": 782710
    },
    {
      "epoch": 2.743238459732167,
      "grad_norm": 2.859375,
      "learning_rate": 4.754913796012887e-06,
      "loss": 0.8111,
      "step": 782720
    },
    {
      "epoch": 2.7432735072390626,
      "grad_norm": 2.203125,
      "learning_rate": 4.754264767349185e-06,
      "loss": 0.8134,
      "step": 782730
    },
    {
      "epoch": 2.7433085547459584,
      "grad_norm": 3.40625,
      "learning_rate": 4.753615738685484e-06,
      "loss": 0.823,
      "step": 782740
    },
    {
      "epoch": 2.7433436022528537,
      "grad_norm": 3.046875,
      "learning_rate": 4.752966710021781e-06,
      "loss": 0.7461,
      "step": 782750
    },
    {
      "epoch": 2.7433786497597494,
      "grad_norm": 2.8125,
      "learning_rate": 4.752317681358079e-06,
      "loss": 0.791,
      "step": 782760
    },
    {
      "epoch": 2.7434136972666447,
      "grad_norm": 2.96875,
      "learning_rate": 4.751668652694378e-06,
      "loss": 0.793,
      "step": 782770
    },
    {
      "epoch": 2.7434487447735405,
      "grad_norm": 2.515625,
      "learning_rate": 4.751019624030676e-06,
      "loss": 0.7834,
      "step": 782780
    },
    {
      "epoch": 2.7434837922804363,
      "grad_norm": 2.59375,
      "learning_rate": 4.750370595366974e-06,
      "loss": 0.7556,
      "step": 782790
    },
    {
      "epoch": 2.7435188397873316,
      "grad_norm": 2.984375,
      "learning_rate": 4.749721566703272e-06,
      "loss": 0.7792,
      "step": 782800
    },
    {
      "epoch": 2.7435538872942273,
      "grad_norm": 2.890625,
      "learning_rate": 4.74907253803957e-06,
      "loss": 0.747,
      "step": 782810
    },
    {
      "epoch": 2.7435889348011226,
      "grad_norm": 3.296875,
      "learning_rate": 4.748423509375868e-06,
      "loss": 0.8261,
      "step": 782820
    },
    {
      "epoch": 2.7436239823080184,
      "grad_norm": 3.078125,
      "learning_rate": 4.747774480712167e-06,
      "loss": 0.764,
      "step": 782830
    },
    {
      "epoch": 2.743659029814914,
      "grad_norm": 2.84375,
      "learning_rate": 4.747125452048465e-06,
      "loss": 0.7333,
      "step": 782840
    },
    {
      "epoch": 2.74369407732181,
      "grad_norm": 3.09375,
      "learning_rate": 4.746476423384762e-06,
      "loss": 0.8311,
      "step": 782850
    },
    {
      "epoch": 2.7437291248287052,
      "grad_norm": 2.859375,
      "learning_rate": 4.745827394721061e-06,
      "loss": 0.8304,
      "step": 782860
    },
    {
      "epoch": 2.743764172335601,
      "grad_norm": 2.984375,
      "learning_rate": 4.745178366057359e-06,
      "loss": 0.837,
      "step": 782870
    },
    {
      "epoch": 2.7437992198424963,
      "grad_norm": 2.96875,
      "learning_rate": 4.744529337393657e-06,
      "loss": 0.8578,
      "step": 782880
    },
    {
      "epoch": 2.743834267349392,
      "grad_norm": 2.890625,
      "learning_rate": 4.743880308729955e-06,
      "loss": 0.8294,
      "step": 782890
    },
    {
      "epoch": 2.743869314856288,
      "grad_norm": 2.96875,
      "learning_rate": 4.743231280066253e-06,
      "loss": 0.6948,
      "step": 782900
    },
    {
      "epoch": 2.743904362363183,
      "grad_norm": 3.015625,
      "learning_rate": 4.742582251402551e-06,
      "loss": 0.8641,
      "step": 782910
    },
    {
      "epoch": 2.743939409870079,
      "grad_norm": 3.0,
      "learning_rate": 4.74193322273885e-06,
      "loss": 0.7192,
      "step": 782920
    },
    {
      "epoch": 2.7439744573769747,
      "grad_norm": 2.640625,
      "learning_rate": 4.741284194075148e-06,
      "loss": 0.8125,
      "step": 782930
    },
    {
      "epoch": 2.74400950488387,
      "grad_norm": 2.765625,
      "learning_rate": 4.740635165411445e-06,
      "loss": 0.7236,
      "step": 782940
    },
    {
      "epoch": 2.7440445523907657,
      "grad_norm": 3.4375,
      "learning_rate": 4.739986136747744e-06,
      "loss": 0.8158,
      "step": 782950
    },
    {
      "epoch": 2.7440795998976615,
      "grad_norm": 3.4375,
      "learning_rate": 4.739337108084042e-06,
      "loss": 0.8861,
      "step": 782960
    },
    {
      "epoch": 2.744114647404557,
      "grad_norm": 2.78125,
      "learning_rate": 4.73868807942034e-06,
      "loss": 0.8162,
      "step": 782970
    },
    {
      "epoch": 2.7441496949114526,
      "grad_norm": 2.609375,
      "learning_rate": 4.738039050756638e-06,
      "loss": 0.7578,
      "step": 782980
    },
    {
      "epoch": 2.744184742418348,
      "grad_norm": 3.5,
      "learning_rate": 4.737390022092936e-06,
      "loss": 0.8155,
      "step": 782990
    },
    {
      "epoch": 2.7442197899252436,
      "grad_norm": 3.3125,
      "learning_rate": 4.736740993429234e-06,
      "loss": 0.7934,
      "step": 783000
    },
    {
      "epoch": 2.7442548374321394,
      "grad_norm": 2.84375,
      "learning_rate": 4.736091964765532e-06,
      "loss": 0.8237,
      "step": 783010
    },
    {
      "epoch": 2.7442898849390347,
      "grad_norm": 3.046875,
      "learning_rate": 4.735442936101831e-06,
      "loss": 0.8121,
      "step": 783020
    },
    {
      "epoch": 2.7443249324459305,
      "grad_norm": 2.96875,
      "learning_rate": 4.734793907438128e-06,
      "loss": 0.8657,
      "step": 783030
    },
    {
      "epoch": 2.744359979952826,
      "grad_norm": 3.15625,
      "learning_rate": 4.734144878774427e-06,
      "loss": 0.7287,
      "step": 783040
    },
    {
      "epoch": 2.7443950274597215,
      "grad_norm": 3.046875,
      "learning_rate": 4.733495850110725e-06,
      "loss": 0.8092,
      "step": 783050
    },
    {
      "epoch": 2.7444300749666173,
      "grad_norm": 2.890625,
      "learning_rate": 4.732846821447023e-06,
      "loss": 0.8131,
      "step": 783060
    },
    {
      "epoch": 2.744465122473513,
      "grad_norm": 2.484375,
      "learning_rate": 4.732197792783321e-06,
      "loss": 0.8587,
      "step": 783070
    },
    {
      "epoch": 2.7445001699804084,
      "grad_norm": 2.515625,
      "learning_rate": 4.731548764119619e-06,
      "loss": 0.8006,
      "step": 783080
    },
    {
      "epoch": 2.744535217487304,
      "grad_norm": 3.34375,
      "learning_rate": 4.730899735455917e-06,
      "loss": 0.7741,
      "step": 783090
    },
    {
      "epoch": 2.7445702649941994,
      "grad_norm": 3.203125,
      "learning_rate": 4.730250706792215e-06,
      "loss": 0.8578,
      "step": 783100
    },
    {
      "epoch": 2.744605312501095,
      "grad_norm": 2.515625,
      "learning_rate": 4.7296016781285135e-06,
      "loss": 0.7415,
      "step": 783110
    },
    {
      "epoch": 2.744640360007991,
      "grad_norm": 3.171875,
      "learning_rate": 4.7289526494648115e-06,
      "loss": 0.8245,
      "step": 783120
    },
    {
      "epoch": 2.7446754075148867,
      "grad_norm": 2.703125,
      "learning_rate": 4.728303620801109e-06,
      "loss": 0.797,
      "step": 783130
    },
    {
      "epoch": 2.744710455021782,
      "grad_norm": 2.734375,
      "learning_rate": 4.7276545921374075e-06,
      "loss": 0.781,
      "step": 783140
    },
    {
      "epoch": 2.7447455025286778,
      "grad_norm": 2.796875,
      "learning_rate": 4.7270055634737055e-06,
      "loss": 0.6901,
      "step": 783150
    },
    {
      "epoch": 2.744780550035573,
      "grad_norm": 3.625,
      "learning_rate": 4.7263565348100035e-06,
      "loss": 0.826,
      "step": 783160
    },
    {
      "epoch": 2.744815597542469,
      "grad_norm": 2.75,
      "learning_rate": 4.725707506146302e-06,
      "loss": 0.7947,
      "step": 783170
    },
    {
      "epoch": 2.7448506450493646,
      "grad_norm": 2.765625,
      "learning_rate": 4.7250584774825995e-06,
      "loss": 0.8133,
      "step": 783180
    },
    {
      "epoch": 2.74488569255626,
      "grad_norm": 2.796875,
      "learning_rate": 4.7244094488188975e-06,
      "loss": 0.752,
      "step": 783190
    },
    {
      "epoch": 2.7449207400631557,
      "grad_norm": 3.171875,
      "learning_rate": 4.723760420155196e-06,
      "loss": 0.817,
      "step": 783200
    },
    {
      "epoch": 2.744955787570051,
      "grad_norm": 3.046875,
      "learning_rate": 4.723111391491494e-06,
      "loss": 0.8278,
      "step": 783210
    },
    {
      "epoch": 2.7449908350769467,
      "grad_norm": 2.8125,
      "learning_rate": 4.7224623628277915e-06,
      "loss": 0.7834,
      "step": 783220
    },
    {
      "epoch": 2.7450258825838425,
      "grad_norm": 3.65625,
      "learning_rate": 4.72181333416409e-06,
      "loss": 0.7643,
      "step": 783230
    },
    {
      "epoch": 2.7450609300907383,
      "grad_norm": 2.8125,
      "learning_rate": 4.721164305500388e-06,
      "loss": 0.7369,
      "step": 783240
    },
    {
      "epoch": 2.7450959775976336,
      "grad_norm": 3.078125,
      "learning_rate": 4.720515276836686e-06,
      "loss": 0.7706,
      "step": 783250
    },
    {
      "epoch": 2.7451310251045293,
      "grad_norm": 3.109375,
      "learning_rate": 4.719866248172985e-06,
      "loss": 0.8376,
      "step": 783260
    },
    {
      "epoch": 2.7451660726114246,
      "grad_norm": 3.140625,
      "learning_rate": 4.719217219509282e-06,
      "loss": 0.792,
      "step": 783270
    },
    {
      "epoch": 2.7452011201183204,
      "grad_norm": 2.71875,
      "learning_rate": 4.71856819084558e-06,
      "loss": 0.8713,
      "step": 783280
    },
    {
      "epoch": 2.745236167625216,
      "grad_norm": 2.234375,
      "learning_rate": 4.717919162181879e-06,
      "loss": 0.7925,
      "step": 783290
    },
    {
      "epoch": 2.7452712151321115,
      "grad_norm": 2.703125,
      "learning_rate": 4.717270133518177e-06,
      "loss": 0.8048,
      "step": 783300
    },
    {
      "epoch": 2.7453062626390072,
      "grad_norm": 2.671875,
      "learning_rate": 4.716621104854475e-06,
      "loss": 0.817,
      "step": 783310
    },
    {
      "epoch": 2.7453413101459025,
      "grad_norm": 3.078125,
      "learning_rate": 4.715972076190773e-06,
      "loss": 0.7748,
      "step": 783320
    },
    {
      "epoch": 2.7453763576527983,
      "grad_norm": 2.78125,
      "learning_rate": 4.715323047527071e-06,
      "loss": 0.7705,
      "step": 783330
    },
    {
      "epoch": 2.745411405159694,
      "grad_norm": 2.34375,
      "learning_rate": 4.714674018863369e-06,
      "loss": 0.7135,
      "step": 783340
    },
    {
      "epoch": 2.74544645266659,
      "grad_norm": 3.03125,
      "learning_rate": 4.714024990199668e-06,
      "loss": 0.9148,
      "step": 783350
    },
    {
      "epoch": 2.745481500173485,
      "grad_norm": 3.1875,
      "learning_rate": 4.713375961535965e-06,
      "loss": 0.8054,
      "step": 783360
    },
    {
      "epoch": 2.745516547680381,
      "grad_norm": 3.109375,
      "learning_rate": 4.712726932872263e-06,
      "loss": 0.7746,
      "step": 783370
    },
    {
      "epoch": 2.745551595187276,
      "grad_norm": 3.25,
      "learning_rate": 4.712077904208562e-06,
      "loss": 0.775,
      "step": 783380
    },
    {
      "epoch": 2.745586642694172,
      "grad_norm": 2.875,
      "learning_rate": 4.71142887554486e-06,
      "loss": 0.8108,
      "step": 783390
    },
    {
      "epoch": 2.7456216902010677,
      "grad_norm": 3.109375,
      "learning_rate": 4.710779846881158e-06,
      "loss": 0.8778,
      "step": 783400
    },
    {
      "epoch": 2.745656737707963,
      "grad_norm": 2.6875,
      "learning_rate": 4.710130818217456e-06,
      "loss": 0.8136,
      "step": 783410
    },
    {
      "epoch": 2.745691785214859,
      "grad_norm": 2.90625,
      "learning_rate": 4.709481789553754e-06,
      "loss": 0.7779,
      "step": 783420
    },
    {
      "epoch": 2.745726832721754,
      "grad_norm": 3.03125,
      "learning_rate": 4.708832760890052e-06,
      "loss": 0.8597,
      "step": 783430
    },
    {
      "epoch": 2.74576188022865,
      "grad_norm": 2.765625,
      "learning_rate": 4.70818373222635e-06,
      "loss": 0.7955,
      "step": 783440
    },
    {
      "epoch": 2.7457969277355456,
      "grad_norm": 3.234375,
      "learning_rate": 4.707534703562649e-06,
      "loss": 0.8125,
      "step": 783450
    },
    {
      "epoch": 2.7458319752424414,
      "grad_norm": 3.0625,
      "learning_rate": 4.706885674898946e-06,
      "loss": 0.7856,
      "step": 783460
    },
    {
      "epoch": 2.7458670227493367,
      "grad_norm": 3.0,
      "learning_rate": 4.706236646235245e-06,
      "loss": 0.6962,
      "step": 783470
    },
    {
      "epoch": 2.7459020702562325,
      "grad_norm": 2.984375,
      "learning_rate": 4.705587617571543e-06,
      "loss": 0.8364,
      "step": 783480
    },
    {
      "epoch": 2.7459371177631278,
      "grad_norm": 2.671875,
      "learning_rate": 4.704938588907841e-06,
      "loss": 0.7222,
      "step": 783490
    },
    {
      "epoch": 2.7459721652700235,
      "grad_norm": 3.40625,
      "learning_rate": 4.704289560244139e-06,
      "loss": 0.8935,
      "step": 783500
    },
    {
      "epoch": 2.7460072127769193,
      "grad_norm": 3.34375,
      "learning_rate": 4.703640531580437e-06,
      "loss": 0.8223,
      "step": 783510
    },
    {
      "epoch": 2.7460422602838146,
      "grad_norm": 3.359375,
      "learning_rate": 4.702991502916735e-06,
      "loss": 0.9241,
      "step": 783520
    },
    {
      "epoch": 2.7460773077907104,
      "grad_norm": 2.84375,
      "learning_rate": 4.702342474253033e-06,
      "loss": 0.7132,
      "step": 783530
    },
    {
      "epoch": 2.7461123552976057,
      "grad_norm": 2.984375,
      "learning_rate": 4.701693445589332e-06,
      "loss": 0.8081,
      "step": 783540
    },
    {
      "epoch": 2.7461474028045014,
      "grad_norm": 2.96875,
      "learning_rate": 4.701044416925629e-06,
      "loss": 0.867,
      "step": 783550
    },
    {
      "epoch": 2.746182450311397,
      "grad_norm": 2.546875,
      "learning_rate": 4.700395388261927e-06,
      "loss": 0.761,
      "step": 783560
    },
    {
      "epoch": 2.746217497818293,
      "grad_norm": 2.5,
      "learning_rate": 4.699746359598226e-06,
      "loss": 0.7814,
      "step": 783570
    },
    {
      "epoch": 2.7462525453251883,
      "grad_norm": 3.03125,
      "learning_rate": 4.699097330934524e-06,
      "loss": 0.7764,
      "step": 783580
    },
    {
      "epoch": 2.746287592832084,
      "grad_norm": 2.6875,
      "learning_rate": 4.698448302270822e-06,
      "loss": 0.7604,
      "step": 783590
    },
    {
      "epoch": 2.7463226403389793,
      "grad_norm": 2.921875,
      "learning_rate": 4.69779927360712e-06,
      "loss": 0.7813,
      "step": 783600
    },
    {
      "epoch": 2.746357687845875,
      "grad_norm": 3.28125,
      "learning_rate": 4.697150244943418e-06,
      "loss": 0.813,
      "step": 783610
    },
    {
      "epoch": 2.746392735352771,
      "grad_norm": 2.546875,
      "learning_rate": 4.696501216279716e-06,
      "loss": 0.7845,
      "step": 783620
    },
    {
      "epoch": 2.746427782859666,
      "grad_norm": 2.84375,
      "learning_rate": 4.6958521876160146e-06,
      "loss": 0.8199,
      "step": 783630
    },
    {
      "epoch": 2.746462830366562,
      "grad_norm": 2.546875,
      "learning_rate": 4.6952031589523126e-06,
      "loss": 0.8388,
      "step": 783640
    },
    {
      "epoch": 2.7464978778734572,
      "grad_norm": 3.28125,
      "learning_rate": 4.69455413028861e-06,
      "loss": 0.8207,
      "step": 783650
    },
    {
      "epoch": 2.746532925380353,
      "grad_norm": 2.71875,
      "learning_rate": 4.6939051016249086e-06,
      "loss": 0.7893,
      "step": 783660
    },
    {
      "epoch": 2.7465679728872487,
      "grad_norm": 3.0,
      "learning_rate": 4.6932560729612066e-06,
      "loss": 0.8936,
      "step": 783670
    },
    {
      "epoch": 2.7466030203941445,
      "grad_norm": 3.15625,
      "learning_rate": 4.6926070442975046e-06,
      "loss": 0.757,
      "step": 783680
    },
    {
      "epoch": 2.74663806790104,
      "grad_norm": 2.921875,
      "learning_rate": 4.6919580156338026e-06,
      "loss": 0.8378,
      "step": 783690
    },
    {
      "epoch": 2.7466731154079356,
      "grad_norm": 3.296875,
      "learning_rate": 4.6913089869701006e-06,
      "loss": 0.7377,
      "step": 783700
    },
    {
      "epoch": 2.746708162914831,
      "grad_norm": 2.53125,
      "learning_rate": 4.6906599583063986e-06,
      "loss": 0.753,
      "step": 783710
    },
    {
      "epoch": 2.7467432104217266,
      "grad_norm": 2.84375,
      "learning_rate": 4.690010929642697e-06,
      "loss": 0.763,
      "step": 783720
    },
    {
      "epoch": 2.7467782579286224,
      "grad_norm": 3.015625,
      "learning_rate": 4.689361900978995e-06,
      "loss": 0.8912,
      "step": 783730
    },
    {
      "epoch": 2.7468133054355177,
      "grad_norm": 2.984375,
      "learning_rate": 4.6887128723152926e-06,
      "loss": 0.8709,
      "step": 783740
    },
    {
      "epoch": 2.7468483529424135,
      "grad_norm": 3.078125,
      "learning_rate": 4.688063843651591e-06,
      "loss": 0.8216,
      "step": 783750
    },
    {
      "epoch": 2.746883400449309,
      "grad_norm": 3.0625,
      "learning_rate": 4.687414814987889e-06,
      "loss": 0.805,
      "step": 783760
    },
    {
      "epoch": 2.7469184479562045,
      "grad_norm": 3.28125,
      "learning_rate": 4.686765786324187e-06,
      "loss": 0.8119,
      "step": 783770
    },
    {
      "epoch": 2.7469534954631003,
      "grad_norm": 3.40625,
      "learning_rate": 4.686116757660486e-06,
      "loss": 0.8514,
      "step": 783780
    },
    {
      "epoch": 2.746988542969996,
      "grad_norm": 2.875,
      "learning_rate": 4.685467728996783e-06,
      "loss": 0.9298,
      "step": 783790
    },
    {
      "epoch": 2.7470235904768914,
      "grad_norm": 3.0,
      "learning_rate": 4.684818700333081e-06,
      "loss": 0.8381,
      "step": 783800
    },
    {
      "epoch": 2.747058637983787,
      "grad_norm": 3.3125,
      "learning_rate": 4.68416967166938e-06,
      "loss": 0.7344,
      "step": 783810
    },
    {
      "epoch": 2.7470936854906824,
      "grad_norm": 3.6875,
      "learning_rate": 4.683520643005678e-06,
      "loss": 0.748,
      "step": 783820
    },
    {
      "epoch": 2.747128732997578,
      "grad_norm": 3.109375,
      "learning_rate": 4.682871614341975e-06,
      "loss": 0.851,
      "step": 783830
    },
    {
      "epoch": 2.747163780504474,
      "grad_norm": 2.96875,
      "learning_rate": 4.682222585678274e-06,
      "loss": 0.7711,
      "step": 783840
    },
    {
      "epoch": 2.7471988280113693,
      "grad_norm": 2.984375,
      "learning_rate": 4.681573557014572e-06,
      "loss": 0.8325,
      "step": 783850
    },
    {
      "epoch": 2.747233875518265,
      "grad_norm": 2.84375,
      "learning_rate": 4.68092452835087e-06,
      "loss": 0.8199,
      "step": 783860
    },
    {
      "epoch": 2.7472689230251603,
      "grad_norm": 2.859375,
      "learning_rate": 4.680275499687168e-06,
      "loss": 0.7651,
      "step": 783870
    },
    {
      "epoch": 2.747303970532056,
      "grad_norm": 2.921875,
      "learning_rate": 4.679626471023466e-06,
      "loss": 0.7663,
      "step": 783880
    },
    {
      "epoch": 2.747339018038952,
      "grad_norm": 3.09375,
      "learning_rate": 4.678977442359764e-06,
      "loss": 0.7865,
      "step": 783890
    },
    {
      "epoch": 2.7473740655458476,
      "grad_norm": 3.046875,
      "learning_rate": 4.678328413696063e-06,
      "loss": 0.7858,
      "step": 783900
    },
    {
      "epoch": 2.747409113052743,
      "grad_norm": 3.125,
      "learning_rate": 4.677679385032361e-06,
      "loss": 0.8109,
      "step": 783910
    },
    {
      "epoch": 2.7474441605596387,
      "grad_norm": 2.921875,
      "learning_rate": 4.677030356368659e-06,
      "loss": 0.8432,
      "step": 783920
    },
    {
      "epoch": 2.747479208066534,
      "grad_norm": 2.890625,
      "learning_rate": 4.676381327704957e-06,
      "loss": 0.7975,
      "step": 783930
    },
    {
      "epoch": 2.7475142555734298,
      "grad_norm": 2.8125,
      "learning_rate": 4.675732299041255e-06,
      "loss": 0.8596,
      "step": 783940
    },
    {
      "epoch": 2.7475493030803255,
      "grad_norm": 3.1875,
      "learning_rate": 4.675083270377553e-06,
      "loss": 0.7992,
      "step": 783950
    },
    {
      "epoch": 2.747584350587221,
      "grad_norm": 3.5625,
      "learning_rate": 4.674434241713851e-06,
      "loss": 0.8178,
      "step": 783960
    },
    {
      "epoch": 2.7476193980941166,
      "grad_norm": 3.421875,
      "learning_rate": 4.67378521305015e-06,
      "loss": 0.8559,
      "step": 783970
    },
    {
      "epoch": 2.747654445601012,
      "grad_norm": 3.34375,
      "learning_rate": 4.673136184386447e-06,
      "loss": 0.7865,
      "step": 783980
    },
    {
      "epoch": 2.7476894931079077,
      "grad_norm": 3.3125,
      "learning_rate": 4.672487155722745e-06,
      "loss": 0.8016,
      "step": 783990
    },
    {
      "epoch": 2.7477245406148034,
      "grad_norm": 2.796875,
      "learning_rate": 4.671838127059044e-06,
      "loss": 0.8286,
      "step": 784000
    },
    {
      "epoch": 2.747759588121699,
      "grad_norm": 2.703125,
      "learning_rate": 4.671189098395342e-06,
      "loss": 0.7803,
      "step": 784010
    },
    {
      "epoch": 2.7477946356285945,
      "grad_norm": 3.21875,
      "learning_rate": 4.67054006973164e-06,
      "loss": 0.8228,
      "step": 784020
    },
    {
      "epoch": 2.7478296831354903,
      "grad_norm": 3.1875,
      "learning_rate": 4.669891041067938e-06,
      "loss": 0.776,
      "step": 784030
    },
    {
      "epoch": 2.7478647306423856,
      "grad_norm": 2.921875,
      "learning_rate": 4.669242012404236e-06,
      "loss": 0.9053,
      "step": 784040
    },
    {
      "epoch": 2.7478997781492813,
      "grad_norm": 4.1875,
      "learning_rate": 4.668592983740534e-06,
      "loss": 0.7148,
      "step": 784050
    },
    {
      "epoch": 2.747934825656177,
      "grad_norm": 2.75,
      "learning_rate": 4.667943955076833e-06,
      "loss": 0.7905,
      "step": 784060
    },
    {
      "epoch": 2.7479698731630724,
      "grad_norm": 3.171875,
      "learning_rate": 4.66729492641313e-06,
      "loss": 0.8985,
      "step": 784070
    },
    {
      "epoch": 2.748004920669968,
      "grad_norm": 2.828125,
      "learning_rate": 4.666645897749428e-06,
      "loss": 0.6818,
      "step": 784080
    },
    {
      "epoch": 2.7480399681768635,
      "grad_norm": 2.8125,
      "learning_rate": 4.665996869085727e-06,
      "loss": 0.8027,
      "step": 784090
    },
    {
      "epoch": 2.7480750156837592,
      "grad_norm": 2.6875,
      "learning_rate": 4.665347840422025e-06,
      "loss": 0.8055,
      "step": 784100
    },
    {
      "epoch": 2.748110063190655,
      "grad_norm": 2.609375,
      "learning_rate": 4.664698811758323e-06,
      "loss": 0.7547,
      "step": 784110
    },
    {
      "epoch": 2.7481451106975507,
      "grad_norm": 3.015625,
      "learning_rate": 4.664049783094621e-06,
      "loss": 0.8673,
      "step": 784120
    },
    {
      "epoch": 2.748180158204446,
      "grad_norm": 3.25,
      "learning_rate": 4.663400754430919e-06,
      "loss": 0.9254,
      "step": 784130
    },
    {
      "epoch": 2.748215205711342,
      "grad_norm": 3.015625,
      "learning_rate": 4.662751725767217e-06,
      "loss": 0.8413,
      "step": 784140
    },
    {
      "epoch": 2.748250253218237,
      "grad_norm": 3.0,
      "learning_rate": 4.662102697103516e-06,
      "loss": 0.7615,
      "step": 784150
    },
    {
      "epoch": 2.748285300725133,
      "grad_norm": 2.96875,
      "learning_rate": 4.661453668439813e-06,
      "loss": 0.8386,
      "step": 784160
    },
    {
      "epoch": 2.7483203482320286,
      "grad_norm": 2.984375,
      "learning_rate": 4.660804639776111e-06,
      "loss": 0.8528,
      "step": 784170
    },
    {
      "epoch": 2.748355395738924,
      "grad_norm": 2.578125,
      "learning_rate": 4.66015561111241e-06,
      "loss": 0.7821,
      "step": 784180
    },
    {
      "epoch": 2.7483904432458197,
      "grad_norm": 2.984375,
      "learning_rate": 4.659506582448708e-06,
      "loss": 0.7423,
      "step": 784190
    },
    {
      "epoch": 2.7484254907527155,
      "grad_norm": 2.8125,
      "learning_rate": 4.658857553785006e-06,
      "loss": 0.7418,
      "step": 784200
    },
    {
      "epoch": 2.748460538259611,
      "grad_norm": 3.0,
      "learning_rate": 4.658208525121304e-06,
      "loss": 0.8585,
      "step": 784210
    },
    {
      "epoch": 2.7484955857665065,
      "grad_norm": 3.09375,
      "learning_rate": 4.657559496457602e-06,
      "loss": 0.8478,
      "step": 784220
    },
    {
      "epoch": 2.7485306332734023,
      "grad_norm": 2.96875,
      "learning_rate": 4.6569104677939e-06,
      "loss": 0.8573,
      "step": 784230
    },
    {
      "epoch": 2.7485656807802976,
      "grad_norm": 3.125,
      "learning_rate": 4.6562614391301985e-06,
      "loss": 0.8375,
      "step": 784240
    },
    {
      "epoch": 2.7486007282871934,
      "grad_norm": 2.796875,
      "learning_rate": 4.6556124104664965e-06,
      "loss": 0.787,
      "step": 784250
    },
    {
      "epoch": 2.7486357757940887,
      "grad_norm": 2.609375,
      "learning_rate": 4.654963381802794e-06,
      "loss": 0.7417,
      "step": 784260
    },
    {
      "epoch": 2.7486708233009844,
      "grad_norm": 2.796875,
      "learning_rate": 4.6543143531390925e-06,
      "loss": 0.7119,
      "step": 784270
    },
    {
      "epoch": 2.74870587080788,
      "grad_norm": 2.75,
      "learning_rate": 4.6536653244753905e-06,
      "loss": 0.7728,
      "step": 784280
    },
    {
      "epoch": 2.7487409183147755,
      "grad_norm": 3.203125,
      "learning_rate": 4.6530162958116885e-06,
      "loss": 0.8025,
      "step": 784290
    },
    {
      "epoch": 2.7487759658216713,
      "grad_norm": 2.859375,
      "learning_rate": 4.6523672671479865e-06,
      "loss": 0.7946,
      "step": 784300
    },
    {
      "epoch": 2.748811013328567,
      "grad_norm": 2.5625,
      "learning_rate": 4.6517182384842845e-06,
      "loss": 0.7887,
      "step": 784310
    },
    {
      "epoch": 2.7488460608354623,
      "grad_norm": 3.5625,
      "learning_rate": 4.6510692098205825e-06,
      "loss": 0.8842,
      "step": 784320
    },
    {
      "epoch": 2.748881108342358,
      "grad_norm": 3.0,
      "learning_rate": 4.650420181156881e-06,
      "loss": 0.8021,
      "step": 784330
    },
    {
      "epoch": 2.748916155849254,
      "grad_norm": 3.09375,
      "learning_rate": 4.649771152493179e-06,
      "loss": 0.7685,
      "step": 784340
    },
    {
      "epoch": 2.748951203356149,
      "grad_norm": 2.71875,
      "learning_rate": 4.6491221238294765e-06,
      "loss": 0.8173,
      "step": 784350
    },
    {
      "epoch": 2.748986250863045,
      "grad_norm": 3.140625,
      "learning_rate": 4.648473095165775e-06,
      "loss": 0.8511,
      "step": 784360
    },
    {
      "epoch": 2.7490212983699402,
      "grad_norm": 2.671875,
      "learning_rate": 4.647824066502073e-06,
      "loss": 0.8018,
      "step": 784370
    },
    {
      "epoch": 2.749056345876836,
      "grad_norm": 2.9375,
      "learning_rate": 4.647175037838371e-06,
      "loss": 0.8066,
      "step": 784380
    },
    {
      "epoch": 2.7490913933837318,
      "grad_norm": 2.6875,
      "learning_rate": 4.646526009174669e-06,
      "loss": 0.7604,
      "step": 784390
    },
    {
      "epoch": 2.7491264408906275,
      "grad_norm": 2.765625,
      "learning_rate": 4.645876980510967e-06,
      "loss": 0.7862,
      "step": 784400
    },
    {
      "epoch": 2.749161488397523,
      "grad_norm": 2.578125,
      "learning_rate": 4.645227951847265e-06,
      "loss": 0.8367,
      "step": 784410
    },
    {
      "epoch": 2.7491965359044186,
      "grad_norm": 3.1875,
      "learning_rate": 4.644578923183563e-06,
      "loss": 0.8369,
      "step": 784420
    },
    {
      "epoch": 2.749231583411314,
      "grad_norm": 2.96875,
      "learning_rate": 4.643929894519862e-06,
      "loss": 0.8736,
      "step": 784430
    },
    {
      "epoch": 2.7492666309182097,
      "grad_norm": 3.3125,
      "learning_rate": 4.64328086585616e-06,
      "loss": 0.8228,
      "step": 784440
    },
    {
      "epoch": 2.7493016784251054,
      "grad_norm": 3.046875,
      "learning_rate": 4.642631837192458e-06,
      "loss": 0.8556,
      "step": 784450
    },
    {
      "epoch": 2.7493367259320007,
      "grad_norm": 3.21875,
      "learning_rate": 4.641982808528756e-06,
      "loss": 0.824,
      "step": 784460
    },
    {
      "epoch": 2.7493717734388965,
      "grad_norm": 2.9375,
      "learning_rate": 4.641333779865054e-06,
      "loss": 0.8259,
      "step": 784470
    },
    {
      "epoch": 2.749406820945792,
      "grad_norm": 2.453125,
      "learning_rate": 4.640684751201352e-06,
      "loss": 0.778,
      "step": 784480
    },
    {
      "epoch": 2.7494418684526876,
      "grad_norm": 2.875,
      "learning_rate": 4.64003572253765e-06,
      "loss": 0.8054,
      "step": 784490
    },
    {
      "epoch": 2.7494769159595833,
      "grad_norm": 3.25,
      "learning_rate": 4.639386693873948e-06,
      "loss": 0.755,
      "step": 784500
    },
    {
      "epoch": 2.749511963466479,
      "grad_norm": 2.890625,
      "learning_rate": 4.638737665210246e-06,
      "loss": 0.743,
      "step": 784510
    },
    {
      "epoch": 2.7495470109733744,
      "grad_norm": 3.296875,
      "learning_rate": 4.638088636546545e-06,
      "loss": 0.7843,
      "step": 784520
    },
    {
      "epoch": 2.74958205848027,
      "grad_norm": 2.90625,
      "learning_rate": 4.637439607882843e-06,
      "loss": 0.6974,
      "step": 784530
    },
    {
      "epoch": 2.7496171059871655,
      "grad_norm": 2.8125,
      "learning_rate": 4.63679057921914e-06,
      "loss": 0.7318,
      "step": 784540
    },
    {
      "epoch": 2.7496521534940612,
      "grad_norm": 3.0625,
      "learning_rate": 4.636141550555439e-06,
      "loss": 0.8175,
      "step": 784550
    },
    {
      "epoch": 2.749687201000957,
      "grad_norm": 2.84375,
      "learning_rate": 4.635492521891737e-06,
      "loss": 0.7618,
      "step": 784560
    },
    {
      "epoch": 2.7497222485078523,
      "grad_norm": 2.59375,
      "learning_rate": 4.634843493228035e-06,
      "loss": 0.7481,
      "step": 784570
    },
    {
      "epoch": 2.749757296014748,
      "grad_norm": 3.25,
      "learning_rate": 4.634194464564334e-06,
      "loss": 0.9154,
      "step": 784580
    },
    {
      "epoch": 2.7497923435216434,
      "grad_norm": 3.046875,
      "learning_rate": 4.633545435900631e-06,
      "loss": 0.6723,
      "step": 784590
    },
    {
      "epoch": 2.749827391028539,
      "grad_norm": 2.8125,
      "learning_rate": 4.632896407236929e-06,
      "loss": 0.8125,
      "step": 784600
    },
    {
      "epoch": 2.749862438535435,
      "grad_norm": 2.5,
      "learning_rate": 4.632247378573228e-06,
      "loss": 0.6991,
      "step": 784610
    },
    {
      "epoch": 2.7498974860423306,
      "grad_norm": 2.828125,
      "learning_rate": 4.631598349909526e-06,
      "loss": 0.7503,
      "step": 784620
    },
    {
      "epoch": 2.749932533549226,
      "grad_norm": 3.3125,
      "learning_rate": 4.630949321245823e-06,
      "loss": 0.8606,
      "step": 784630
    },
    {
      "epoch": 2.7499675810561217,
      "grad_norm": 3.078125,
      "learning_rate": 4.630300292582122e-06,
      "loss": 0.768,
      "step": 784640
    },
    {
      "epoch": 2.750002628563017,
      "grad_norm": 2.9375,
      "learning_rate": 4.62965126391842e-06,
      "loss": 0.8025,
      "step": 784650
    },
    {
      "epoch": 2.750037676069913,
      "grad_norm": 3.375,
      "learning_rate": 4.629002235254718e-06,
      "loss": 0.8216,
      "step": 784660
    },
    {
      "epoch": 2.7500727235768085,
      "grad_norm": 3.296875,
      "learning_rate": 4.628353206591017e-06,
      "loss": 0.8716,
      "step": 784670
    },
    {
      "epoch": 2.750107771083704,
      "grad_norm": 3.046875,
      "learning_rate": 4.627704177927314e-06,
      "loss": 0.7405,
      "step": 784680
    },
    {
      "epoch": 2.7501428185905996,
      "grad_norm": 3.34375,
      "learning_rate": 4.627055149263612e-06,
      "loss": 0.7564,
      "step": 784690
    },
    {
      "epoch": 2.750177866097495,
      "grad_norm": 2.921875,
      "learning_rate": 4.626406120599911e-06,
      "loss": 0.7036,
      "step": 784700
    },
    {
      "epoch": 2.7502129136043907,
      "grad_norm": 3.34375,
      "learning_rate": 4.625757091936209e-06,
      "loss": 0.8664,
      "step": 784710
    },
    {
      "epoch": 2.7502479611112864,
      "grad_norm": 2.9375,
      "learning_rate": 4.625108063272507e-06,
      "loss": 0.7893,
      "step": 784720
    },
    {
      "epoch": 2.750283008618182,
      "grad_norm": 2.5625,
      "learning_rate": 4.624459034608805e-06,
      "loss": 0.7707,
      "step": 784730
    },
    {
      "epoch": 2.7503180561250775,
      "grad_norm": 2.765625,
      "learning_rate": 4.623810005945103e-06,
      "loss": 0.7829,
      "step": 784740
    },
    {
      "epoch": 2.7503531036319733,
      "grad_norm": 2.78125,
      "learning_rate": 4.623160977281401e-06,
      "loss": 0.7508,
      "step": 784750
    },
    {
      "epoch": 2.7503881511388686,
      "grad_norm": 3.515625,
      "learning_rate": 4.6225119486176996e-06,
      "loss": 0.7909,
      "step": 784760
    },
    {
      "epoch": 2.7504231986457643,
      "grad_norm": 3.203125,
      "learning_rate": 4.621862919953997e-06,
      "loss": 0.8452,
      "step": 784770
    },
    {
      "epoch": 2.75045824615266,
      "grad_norm": 3.03125,
      "learning_rate": 4.621213891290295e-06,
      "loss": 0.8872,
      "step": 784780
    },
    {
      "epoch": 2.7504932936595554,
      "grad_norm": 2.984375,
      "learning_rate": 4.6205648626265936e-06,
      "loss": 0.8136,
      "step": 784790
    },
    {
      "epoch": 2.750528341166451,
      "grad_norm": 2.765625,
      "learning_rate": 4.6199158339628916e-06,
      "loss": 0.7953,
      "step": 784800
    },
    {
      "epoch": 2.7505633886733465,
      "grad_norm": 3.0,
      "learning_rate": 4.6192668052991896e-06,
      "loss": 0.881,
      "step": 784810
    },
    {
      "epoch": 2.7505984361802422,
      "grad_norm": 3.15625,
      "learning_rate": 4.6186177766354876e-06,
      "loss": 0.8296,
      "step": 784820
    },
    {
      "epoch": 2.750633483687138,
      "grad_norm": 2.90625,
      "learning_rate": 4.6179687479717856e-06,
      "loss": 0.7445,
      "step": 784830
    },
    {
      "epoch": 2.7506685311940338,
      "grad_norm": 3.125,
      "learning_rate": 4.6173197193080836e-06,
      "loss": 0.8794,
      "step": 784840
    },
    {
      "epoch": 2.750703578700929,
      "grad_norm": 2.96875,
      "learning_rate": 4.616670690644382e-06,
      "loss": 0.762,
      "step": 784850
    },
    {
      "epoch": 2.750738626207825,
      "grad_norm": 2.75,
      "learning_rate": 4.61602166198068e-06,
      "loss": 0.7994,
      "step": 784860
    },
    {
      "epoch": 2.75077367371472,
      "grad_norm": 3.015625,
      "learning_rate": 4.6153726333169776e-06,
      "loss": 0.753,
      "step": 784870
    },
    {
      "epoch": 2.750808721221616,
      "grad_norm": 2.765625,
      "learning_rate": 4.614723604653276e-06,
      "loss": 0.7949,
      "step": 784880
    },
    {
      "epoch": 2.7508437687285117,
      "grad_norm": 2.65625,
      "learning_rate": 4.614074575989574e-06,
      "loss": 0.7393,
      "step": 784890
    },
    {
      "epoch": 2.750878816235407,
      "grad_norm": 2.875,
      "learning_rate": 4.613425547325872e-06,
      "loss": 0.7798,
      "step": 784900
    },
    {
      "epoch": 2.7509138637423027,
      "grad_norm": 3.0,
      "learning_rate": 4.61277651866217e-06,
      "loss": 0.7155,
      "step": 784910
    },
    {
      "epoch": 2.750948911249198,
      "grad_norm": 2.6875,
      "learning_rate": 4.612127489998468e-06,
      "loss": 0.7426,
      "step": 784920
    },
    {
      "epoch": 2.750983958756094,
      "grad_norm": 3.0,
      "learning_rate": 4.611478461334766e-06,
      "loss": 0.7608,
      "step": 784930
    },
    {
      "epoch": 2.7510190062629896,
      "grad_norm": 2.734375,
      "learning_rate": 4.610829432671064e-06,
      "loss": 0.7137,
      "step": 784940
    },
    {
      "epoch": 2.7510540537698853,
      "grad_norm": 3.09375,
      "learning_rate": 4.610180404007363e-06,
      "loss": 0.8793,
      "step": 784950
    },
    {
      "epoch": 2.7510891012767806,
      "grad_norm": 2.6875,
      "learning_rate": 4.60953137534366e-06,
      "loss": 0.7652,
      "step": 784960
    },
    {
      "epoch": 2.7511241487836764,
      "grad_norm": 3.046875,
      "learning_rate": 4.608882346679959e-06,
      "loss": 0.7837,
      "step": 784970
    },
    {
      "epoch": 2.7511591962905717,
      "grad_norm": 2.796875,
      "learning_rate": 4.608233318016257e-06,
      "loss": 0.7938,
      "step": 784980
    },
    {
      "epoch": 2.7511942437974675,
      "grad_norm": 2.828125,
      "learning_rate": 4.607584289352555e-06,
      "loss": 0.8384,
      "step": 784990
    },
    {
      "epoch": 2.7512292913043632,
      "grad_norm": 2.890625,
      "learning_rate": 4.606935260688853e-06,
      "loss": 0.8348,
      "step": 785000
    },
    {
      "epoch": 2.7512292913043632,
      "eval_loss": 0.7522585988044739,
      "eval_runtime": 553.2272,
      "eval_samples_per_second": 687.667,
      "eval_steps_per_second": 57.306,
      "step": 785000
    },
    {
      "epoch": 2.7512643388112585,
      "grad_norm": 3.359375,
      "learning_rate": 4.606286232025151e-06,
      "loss": 0.8218,
      "step": 785010
    },
    {
      "epoch": 2.7512993863181543,
      "grad_norm": 2.796875,
      "learning_rate": 4.605637203361449e-06,
      "loss": 0.7917,
      "step": 785020
    },
    {
      "epoch": 2.7513344338250496,
      "grad_norm": 2.96875,
      "learning_rate": 4.604988174697747e-06,
      "loss": 0.7461,
      "step": 785030
    },
    {
      "epoch": 2.7513694813319454,
      "grad_norm": 2.546875,
      "learning_rate": 4.604339146034046e-06,
      "loss": 0.7254,
      "step": 785040
    },
    {
      "epoch": 2.751404528838841,
      "grad_norm": 2.53125,
      "learning_rate": 4.603690117370344e-06,
      "loss": 0.8043,
      "step": 785050
    },
    {
      "epoch": 2.751439576345737,
      "grad_norm": 2.796875,
      "learning_rate": 4.603041088706641e-06,
      "loss": 0.7525,
      "step": 785060
    },
    {
      "epoch": 2.751474623852632,
      "grad_norm": 2.875,
      "learning_rate": 4.60239206004294e-06,
      "loss": 0.7869,
      "step": 785070
    },
    {
      "epoch": 2.751509671359528,
      "grad_norm": 3.40625,
      "learning_rate": 4.601743031379238e-06,
      "loss": 0.8504,
      "step": 785080
    },
    {
      "epoch": 2.7515447188664233,
      "grad_norm": 2.671875,
      "learning_rate": 4.601094002715536e-06,
      "loss": 0.789,
      "step": 785090
    },
    {
      "epoch": 2.751579766373319,
      "grad_norm": 2.78125,
      "learning_rate": 4.600444974051834e-06,
      "loss": 0.8743,
      "step": 785100
    },
    {
      "epoch": 2.751614813880215,
      "grad_norm": 3.109375,
      "learning_rate": 4.599795945388132e-06,
      "loss": 0.8653,
      "step": 785110
    },
    {
      "epoch": 2.75164986138711,
      "grad_norm": 3.03125,
      "learning_rate": 4.59914691672443e-06,
      "loss": 0.8002,
      "step": 785120
    },
    {
      "epoch": 2.751684908894006,
      "grad_norm": 3.140625,
      "learning_rate": 4.598497888060729e-06,
      "loss": 0.8567,
      "step": 785130
    },
    {
      "epoch": 2.751719956400901,
      "grad_norm": 3.046875,
      "learning_rate": 4.597848859397027e-06,
      "loss": 0.8857,
      "step": 785140
    },
    {
      "epoch": 2.751755003907797,
      "grad_norm": 2.828125,
      "learning_rate": 4.597199830733324e-06,
      "loss": 0.7736,
      "step": 785150
    },
    {
      "epoch": 2.7517900514146927,
      "grad_norm": 3.015625,
      "learning_rate": 4.596550802069623e-06,
      "loss": 0.6891,
      "step": 785160
    },
    {
      "epoch": 2.7518250989215884,
      "grad_norm": 3.40625,
      "learning_rate": 4.595901773405921e-06,
      "loss": 0.7607,
      "step": 785170
    },
    {
      "epoch": 2.7518601464284838,
      "grad_norm": 2.765625,
      "learning_rate": 4.595252744742219e-06,
      "loss": 0.7584,
      "step": 785180
    },
    {
      "epoch": 2.7518951939353795,
      "grad_norm": 2.265625,
      "learning_rate": 4.594603716078518e-06,
      "loss": 0.7688,
      "step": 785190
    },
    {
      "epoch": 2.751930241442275,
      "grad_norm": 3.3125,
      "learning_rate": 4.593954687414815e-06,
      "loss": 0.7755,
      "step": 785200
    },
    {
      "epoch": 2.7519652889491706,
      "grad_norm": 2.609375,
      "learning_rate": 4.593305658751113e-06,
      "loss": 0.7479,
      "step": 785210
    },
    {
      "epoch": 2.7520003364560663,
      "grad_norm": 2.71875,
      "learning_rate": 4.592656630087412e-06,
      "loss": 0.7394,
      "step": 785220
    },
    {
      "epoch": 2.7520353839629617,
      "grad_norm": 3.546875,
      "learning_rate": 4.59200760142371e-06,
      "loss": 0.8959,
      "step": 785230
    },
    {
      "epoch": 2.7520704314698574,
      "grad_norm": 2.890625,
      "learning_rate": 4.591358572760007e-06,
      "loss": 0.8586,
      "step": 785240
    },
    {
      "epoch": 2.7521054789767527,
      "grad_norm": 2.609375,
      "learning_rate": 4.590709544096306e-06,
      "loss": 0.6826,
      "step": 785250
    },
    {
      "epoch": 2.7521405264836485,
      "grad_norm": 2.78125,
      "learning_rate": 4.590060515432604e-06,
      "loss": 0.8069,
      "step": 785260
    },
    {
      "epoch": 2.7521755739905442,
      "grad_norm": 3.21875,
      "learning_rate": 4.589411486768902e-06,
      "loss": 0.7733,
      "step": 785270
    },
    {
      "epoch": 2.75221062149744,
      "grad_norm": 2.71875,
      "learning_rate": 4.588762458105201e-06,
      "loss": 0.7593,
      "step": 785280
    },
    {
      "epoch": 2.7522456690043353,
      "grad_norm": 2.796875,
      "learning_rate": 4.588113429441498e-06,
      "loss": 0.8467,
      "step": 785290
    },
    {
      "epoch": 2.752280716511231,
      "grad_norm": 2.859375,
      "learning_rate": 4.587464400777796e-06,
      "loss": 0.7853,
      "step": 785300
    },
    {
      "epoch": 2.7523157640181264,
      "grad_norm": 2.984375,
      "learning_rate": 4.586815372114095e-06,
      "loss": 0.8037,
      "step": 785310
    },
    {
      "epoch": 2.752350811525022,
      "grad_norm": 2.875,
      "learning_rate": 4.586166343450393e-06,
      "loss": 0.8557,
      "step": 785320
    },
    {
      "epoch": 2.752385859031918,
      "grad_norm": 3.1875,
      "learning_rate": 4.585517314786691e-06,
      "loss": 0.8022,
      "step": 785330
    },
    {
      "epoch": 2.752420906538813,
      "grad_norm": 2.953125,
      "learning_rate": 4.584868286122989e-06,
      "loss": 0.7931,
      "step": 785340
    },
    {
      "epoch": 2.752455954045709,
      "grad_norm": 3.109375,
      "learning_rate": 4.584219257459287e-06,
      "loss": 0.7386,
      "step": 785350
    },
    {
      "epoch": 2.7524910015526043,
      "grad_norm": 3.296875,
      "learning_rate": 4.583570228795585e-06,
      "loss": 0.8128,
      "step": 785360
    },
    {
      "epoch": 2.7525260490595,
      "grad_norm": 4.0,
      "learning_rate": 4.582921200131883e-06,
      "loss": 0.8078,
      "step": 785370
    },
    {
      "epoch": 2.752561096566396,
      "grad_norm": 2.953125,
      "learning_rate": 4.5822721714681815e-06,
      "loss": 0.774,
      "step": 785380
    },
    {
      "epoch": 2.7525961440732916,
      "grad_norm": 2.921875,
      "learning_rate": 4.581623142804479e-06,
      "loss": 0.8187,
      "step": 785390
    },
    {
      "epoch": 2.752631191580187,
      "grad_norm": 3.0625,
      "learning_rate": 4.5809741141407775e-06,
      "loss": 0.7428,
      "step": 785400
    },
    {
      "epoch": 2.7526662390870826,
      "grad_norm": 3.1875,
      "learning_rate": 4.5803250854770755e-06,
      "loss": 0.8463,
      "step": 785410
    },
    {
      "epoch": 2.752701286593978,
      "grad_norm": 3.1875,
      "learning_rate": 4.5796760568133735e-06,
      "loss": 0.7813,
      "step": 785420
    },
    {
      "epoch": 2.7527363341008737,
      "grad_norm": 3.125,
      "learning_rate": 4.5790270281496715e-06,
      "loss": 0.8087,
      "step": 785430
    },
    {
      "epoch": 2.7527713816077695,
      "grad_norm": 3.3125,
      "learning_rate": 4.5783779994859695e-06,
      "loss": 0.8348,
      "step": 785440
    },
    {
      "epoch": 2.7528064291146648,
      "grad_norm": 3.046875,
      "learning_rate": 4.5777289708222675e-06,
      "loss": 0.7962,
      "step": 785450
    },
    {
      "epoch": 2.7528414766215605,
      "grad_norm": 2.671875,
      "learning_rate": 4.5770799421585655e-06,
      "loss": 0.8009,
      "step": 785460
    },
    {
      "epoch": 2.752876524128456,
      "grad_norm": 2.734375,
      "learning_rate": 4.576430913494864e-06,
      "loss": 0.8262,
      "step": 785470
    },
    {
      "epoch": 2.7529115716353516,
      "grad_norm": 2.796875,
      "learning_rate": 4.5757818848311615e-06,
      "loss": 0.8086,
      "step": 785480
    },
    {
      "epoch": 2.7529466191422474,
      "grad_norm": 2.84375,
      "learning_rate": 4.5751328561674595e-06,
      "loss": 0.7853,
      "step": 785490
    },
    {
      "epoch": 2.752981666649143,
      "grad_norm": 3.609375,
      "learning_rate": 4.574483827503758e-06,
      "loss": 0.753,
      "step": 785500
    },
    {
      "epoch": 2.7530167141560384,
      "grad_norm": 2.578125,
      "learning_rate": 4.573834798840056e-06,
      "loss": 0.7307,
      "step": 785510
    },
    {
      "epoch": 2.753051761662934,
      "grad_norm": 2.90625,
      "learning_rate": 4.573185770176354e-06,
      "loss": 0.8177,
      "step": 785520
    },
    {
      "epoch": 2.7530868091698295,
      "grad_norm": 2.828125,
      "learning_rate": 4.572536741512652e-06,
      "loss": 0.819,
      "step": 785530
    },
    {
      "epoch": 2.7531218566767253,
      "grad_norm": 2.96875,
      "learning_rate": 4.57188771284895e-06,
      "loss": 0.7979,
      "step": 785540
    },
    {
      "epoch": 2.753156904183621,
      "grad_norm": 3.15625,
      "learning_rate": 4.571238684185248e-06,
      "loss": 0.8302,
      "step": 785550
    },
    {
      "epoch": 2.7531919516905163,
      "grad_norm": 3.25,
      "learning_rate": 4.570589655521547e-06,
      "loss": 0.7729,
      "step": 785560
    },
    {
      "epoch": 2.753226999197412,
      "grad_norm": 3.03125,
      "learning_rate": 4.569940626857844e-06,
      "loss": 0.9029,
      "step": 785570
    },
    {
      "epoch": 2.753262046704308,
      "grad_norm": 2.53125,
      "learning_rate": 4.569291598194142e-06,
      "loss": 0.7412,
      "step": 785580
    },
    {
      "epoch": 2.753297094211203,
      "grad_norm": 2.890625,
      "learning_rate": 4.568642569530441e-06,
      "loss": 0.8302,
      "step": 785590
    },
    {
      "epoch": 2.753332141718099,
      "grad_norm": 2.921875,
      "learning_rate": 4.567993540866739e-06,
      "loss": 0.8021,
      "step": 785600
    },
    {
      "epoch": 2.7533671892249947,
      "grad_norm": 2.734375,
      "learning_rate": 4.567344512203037e-06,
      "loss": 0.8227,
      "step": 785610
    },
    {
      "epoch": 2.75340223673189,
      "grad_norm": 3.203125,
      "learning_rate": 4.566695483539335e-06,
      "loss": 0.7611,
      "step": 785620
    },
    {
      "epoch": 2.7534372842387858,
      "grad_norm": 3.234375,
      "learning_rate": 4.566046454875633e-06,
      "loss": 0.8241,
      "step": 785630
    },
    {
      "epoch": 2.753472331745681,
      "grad_norm": 3.09375,
      "learning_rate": 4.565397426211931e-06,
      "loss": 0.7788,
      "step": 785640
    },
    {
      "epoch": 2.753507379252577,
      "grad_norm": 2.84375,
      "learning_rate": 4.56474839754823e-06,
      "loss": 0.7475,
      "step": 785650
    },
    {
      "epoch": 2.7535424267594726,
      "grad_norm": 3.359375,
      "learning_rate": 4.564099368884528e-06,
      "loss": 0.8271,
      "step": 785660
    },
    {
      "epoch": 2.753577474266368,
      "grad_norm": 2.90625,
      "learning_rate": 4.563450340220825e-06,
      "loss": 0.8462,
      "step": 785670
    },
    {
      "epoch": 2.7536125217732637,
      "grad_norm": 2.453125,
      "learning_rate": 4.562801311557124e-06,
      "loss": 0.8249,
      "step": 785680
    },
    {
      "epoch": 2.7536475692801594,
      "grad_norm": 2.734375,
      "learning_rate": 4.562152282893422e-06,
      "loss": 0.7409,
      "step": 785690
    },
    {
      "epoch": 2.7536826167870547,
      "grad_norm": 3.078125,
      "learning_rate": 4.56150325422972e-06,
      "loss": 0.8351,
      "step": 785700
    },
    {
      "epoch": 2.7537176642939505,
      "grad_norm": 2.78125,
      "learning_rate": 4.560854225566019e-06,
      "loss": 0.7414,
      "step": 785710
    },
    {
      "epoch": 2.7537527118008462,
      "grad_norm": 2.5625,
      "learning_rate": 4.560205196902316e-06,
      "loss": 0.8329,
      "step": 785720
    },
    {
      "epoch": 2.7537877593077416,
      "grad_norm": 2.53125,
      "learning_rate": 4.559556168238614e-06,
      "loss": 0.7598,
      "step": 785730
    },
    {
      "epoch": 2.7538228068146373,
      "grad_norm": 2.984375,
      "learning_rate": 4.558907139574913e-06,
      "loss": 0.7726,
      "step": 785740
    },
    {
      "epoch": 2.7538578543215326,
      "grad_norm": 3.03125,
      "learning_rate": 4.558258110911211e-06,
      "loss": 0.7529,
      "step": 785750
    },
    {
      "epoch": 2.7538929018284284,
      "grad_norm": 2.84375,
      "learning_rate": 4.557609082247508e-06,
      "loss": 0.7986,
      "step": 785760
    },
    {
      "epoch": 2.753927949335324,
      "grad_norm": 2.859375,
      "learning_rate": 4.556960053583807e-06,
      "loss": 0.7676,
      "step": 785770
    },
    {
      "epoch": 2.75396299684222,
      "grad_norm": 2.546875,
      "learning_rate": 4.556311024920105e-06,
      "loss": 0.8084,
      "step": 785780
    },
    {
      "epoch": 2.753998044349115,
      "grad_norm": 2.71875,
      "learning_rate": 4.555661996256403e-06,
      "loss": 0.8314,
      "step": 785790
    },
    {
      "epoch": 2.754033091856011,
      "grad_norm": 2.359375,
      "learning_rate": 4.555012967592701e-06,
      "loss": 0.7678,
      "step": 785800
    },
    {
      "epoch": 2.7540681393629063,
      "grad_norm": 2.828125,
      "learning_rate": 4.554363938928999e-06,
      "loss": 0.7738,
      "step": 785810
    },
    {
      "epoch": 2.754103186869802,
      "grad_norm": 2.84375,
      "learning_rate": 4.553714910265297e-06,
      "loss": 0.8268,
      "step": 785820
    },
    {
      "epoch": 2.754138234376698,
      "grad_norm": 2.671875,
      "learning_rate": 4.553065881601596e-06,
      "loss": 0.7439,
      "step": 785830
    },
    {
      "epoch": 2.754173281883593,
      "grad_norm": 2.953125,
      "learning_rate": 4.552416852937894e-06,
      "loss": 0.8109,
      "step": 785840
    },
    {
      "epoch": 2.754208329390489,
      "grad_norm": 3.390625,
      "learning_rate": 4.551767824274192e-06,
      "loss": 0.7847,
      "step": 785850
    },
    {
      "epoch": 2.754243376897384,
      "grad_norm": 3.171875,
      "learning_rate": 4.55111879561049e-06,
      "loss": 0.8402,
      "step": 785860
    },
    {
      "epoch": 2.75427842440428,
      "grad_norm": 3.3125,
      "learning_rate": 4.550469766946788e-06,
      "loss": 0.8967,
      "step": 785870
    },
    {
      "epoch": 2.7543134719111757,
      "grad_norm": 3.4375,
      "learning_rate": 4.549820738283086e-06,
      "loss": 0.9301,
      "step": 785880
    },
    {
      "epoch": 2.7543485194180715,
      "grad_norm": 3.109375,
      "learning_rate": 4.549171709619384e-06,
      "loss": 0.8075,
      "step": 785890
    },
    {
      "epoch": 2.7543835669249668,
      "grad_norm": 3.34375,
      "learning_rate": 4.548522680955682e-06,
      "loss": 0.7897,
      "step": 785900
    },
    {
      "epoch": 2.7544186144318625,
      "grad_norm": 2.84375,
      "learning_rate": 4.54787365229198e-06,
      "loss": 0.744,
      "step": 785910
    },
    {
      "epoch": 2.754453661938758,
      "grad_norm": 2.734375,
      "learning_rate": 4.547224623628278e-06,
      "loss": 0.8982,
      "step": 785920
    },
    {
      "epoch": 2.7544887094456536,
      "grad_norm": 3.296875,
      "learning_rate": 4.5465755949645766e-06,
      "loss": 0.8295,
      "step": 785930
    },
    {
      "epoch": 2.7545237569525494,
      "grad_norm": 2.703125,
      "learning_rate": 4.5459265663008746e-06,
      "loss": 0.8169,
      "step": 785940
    },
    {
      "epoch": 2.7545588044594447,
      "grad_norm": 2.921875,
      "learning_rate": 4.5452775376371726e-06,
      "loss": 0.7832,
      "step": 785950
    },
    {
      "epoch": 2.7545938519663404,
      "grad_norm": 2.625,
      "learning_rate": 4.5446285089734706e-06,
      "loss": 0.8392,
      "step": 785960
    },
    {
      "epoch": 2.7546288994732357,
      "grad_norm": 2.8125,
      "learning_rate": 4.5439794803097686e-06,
      "loss": 0.6997,
      "step": 785970
    },
    {
      "epoch": 2.7546639469801315,
      "grad_norm": 2.640625,
      "learning_rate": 4.5433304516460666e-06,
      "loss": 0.8536,
      "step": 785980
    },
    {
      "epoch": 2.7546989944870273,
      "grad_norm": 3.0625,
      "learning_rate": 4.542681422982365e-06,
      "loss": 0.8108,
      "step": 785990
    },
    {
      "epoch": 2.754734041993923,
      "grad_norm": 3.34375,
      "learning_rate": 4.5420323943186626e-06,
      "loss": 0.8485,
      "step": 786000
    },
    {
      "epoch": 2.7547690895008183,
      "grad_norm": 2.8125,
      "learning_rate": 4.5413833656549606e-06,
      "loss": 0.8358,
      "step": 786010
    },
    {
      "epoch": 2.754804137007714,
      "grad_norm": 2.640625,
      "learning_rate": 4.540734336991259e-06,
      "loss": 0.8888,
      "step": 786020
    },
    {
      "epoch": 2.7548391845146094,
      "grad_norm": 3.421875,
      "learning_rate": 4.540085308327557e-06,
      "loss": 0.8162,
      "step": 786030
    },
    {
      "epoch": 2.754874232021505,
      "grad_norm": 2.984375,
      "learning_rate": 4.5394362796638546e-06,
      "loss": 0.7665,
      "step": 786040
    },
    {
      "epoch": 2.754909279528401,
      "grad_norm": 3.0,
      "learning_rate": 4.538787251000153e-06,
      "loss": 0.7825,
      "step": 786050
    },
    {
      "epoch": 2.7549443270352962,
      "grad_norm": 3.109375,
      "learning_rate": 4.538138222336451e-06,
      "loss": 0.8248,
      "step": 786060
    },
    {
      "epoch": 2.754979374542192,
      "grad_norm": 3.203125,
      "learning_rate": 4.537489193672749e-06,
      "loss": 0.886,
      "step": 786070
    },
    {
      "epoch": 2.7550144220490873,
      "grad_norm": 3.234375,
      "learning_rate": 4.536840165009048e-06,
      "loss": 0.8982,
      "step": 786080
    },
    {
      "epoch": 2.755049469555983,
      "grad_norm": 2.59375,
      "learning_rate": 4.536191136345345e-06,
      "loss": 0.7565,
      "step": 786090
    },
    {
      "epoch": 2.755084517062879,
      "grad_norm": 2.71875,
      "learning_rate": 4.535542107681643e-06,
      "loss": 0.7574,
      "step": 786100
    },
    {
      "epoch": 2.7551195645697746,
      "grad_norm": 2.953125,
      "learning_rate": 4.534893079017942e-06,
      "loss": 0.8216,
      "step": 786110
    },
    {
      "epoch": 2.75515461207667,
      "grad_norm": 2.90625,
      "learning_rate": 4.53424405035424e-06,
      "loss": 0.7825,
      "step": 786120
    },
    {
      "epoch": 2.7551896595835657,
      "grad_norm": 3.171875,
      "learning_rate": 4.533595021690538e-06,
      "loss": 0.8278,
      "step": 786130
    },
    {
      "epoch": 2.755224707090461,
      "grad_norm": 3.15625,
      "learning_rate": 4.532945993026836e-06,
      "loss": 0.8162,
      "step": 786140
    },
    {
      "epoch": 2.7552597545973567,
      "grad_norm": 2.71875,
      "learning_rate": 4.532296964363134e-06,
      "loss": 0.7664,
      "step": 786150
    },
    {
      "epoch": 2.7552948021042525,
      "grad_norm": 3.4375,
      "learning_rate": 4.531647935699432e-06,
      "loss": 0.7351,
      "step": 786160
    },
    {
      "epoch": 2.755329849611148,
      "grad_norm": 2.59375,
      "learning_rate": 4.530998907035731e-06,
      "loss": 0.7425,
      "step": 786170
    },
    {
      "epoch": 2.7553648971180436,
      "grad_norm": 2.84375,
      "learning_rate": 4.530349878372029e-06,
      "loss": 0.7321,
      "step": 786180
    },
    {
      "epoch": 2.755399944624939,
      "grad_norm": 3.203125,
      "learning_rate": 4.529700849708326e-06,
      "loss": 0.8563,
      "step": 786190
    },
    {
      "epoch": 2.7554349921318346,
      "grad_norm": 2.96875,
      "learning_rate": 4.529051821044625e-06,
      "loss": 0.7634,
      "step": 786200
    },
    {
      "epoch": 2.7554700396387304,
      "grad_norm": 2.578125,
      "learning_rate": 4.528402792380923e-06,
      "loss": 0.7236,
      "step": 786210
    },
    {
      "epoch": 2.755505087145626,
      "grad_norm": 3.46875,
      "learning_rate": 4.527753763717221e-06,
      "loss": 0.8639,
      "step": 786220
    },
    {
      "epoch": 2.7555401346525215,
      "grad_norm": 2.8125,
      "learning_rate": 4.527104735053519e-06,
      "loss": 0.8761,
      "step": 786230
    },
    {
      "epoch": 2.755575182159417,
      "grad_norm": 3.359375,
      "learning_rate": 4.526455706389817e-06,
      "loss": 0.7713,
      "step": 786240
    },
    {
      "epoch": 2.7556102296663125,
      "grad_norm": 2.828125,
      "learning_rate": 4.525806677726115e-06,
      "loss": 0.8025,
      "step": 786250
    },
    {
      "epoch": 2.7556452771732083,
      "grad_norm": 2.953125,
      "learning_rate": 4.525157649062414e-06,
      "loss": 0.7488,
      "step": 786260
    },
    {
      "epoch": 2.755680324680104,
      "grad_norm": 2.890625,
      "learning_rate": 4.524508620398712e-06,
      "loss": 0.8179,
      "step": 786270
    },
    {
      "epoch": 2.7557153721869994,
      "grad_norm": 2.34375,
      "learning_rate": 4.523859591735009e-06,
      "loss": 0.7992,
      "step": 786280
    },
    {
      "epoch": 2.755750419693895,
      "grad_norm": 2.703125,
      "learning_rate": 4.523210563071308e-06,
      "loss": 0.789,
      "step": 786290
    },
    {
      "epoch": 2.7557854672007904,
      "grad_norm": 3.390625,
      "learning_rate": 4.522561534407606e-06,
      "loss": 0.8724,
      "step": 786300
    },
    {
      "epoch": 2.755820514707686,
      "grad_norm": 2.953125,
      "learning_rate": 4.521912505743904e-06,
      "loss": 0.8504,
      "step": 786310
    },
    {
      "epoch": 2.755855562214582,
      "grad_norm": 2.96875,
      "learning_rate": 4.521263477080202e-06,
      "loss": 0.8021,
      "step": 786320
    },
    {
      "epoch": 2.7558906097214777,
      "grad_norm": 3.1875,
      "learning_rate": 4.5206144484165e-06,
      "loss": 0.7745,
      "step": 786330
    },
    {
      "epoch": 2.755925657228373,
      "grad_norm": 2.828125,
      "learning_rate": 4.519965419752798e-06,
      "loss": 0.8147,
      "step": 786340
    },
    {
      "epoch": 2.7559607047352688,
      "grad_norm": 3.125,
      "learning_rate": 4.519316391089096e-06,
      "loss": 0.7809,
      "step": 786350
    },
    {
      "epoch": 2.755995752242164,
      "grad_norm": 2.765625,
      "learning_rate": 4.518667362425395e-06,
      "loss": 0.7912,
      "step": 786360
    },
    {
      "epoch": 2.75603079974906,
      "grad_norm": 3.03125,
      "learning_rate": 4.518018333761692e-06,
      "loss": 0.7469,
      "step": 786370
    },
    {
      "epoch": 2.7560658472559556,
      "grad_norm": 2.984375,
      "learning_rate": 4.517369305097991e-06,
      "loss": 0.7927,
      "step": 786380
    },
    {
      "epoch": 2.756100894762851,
      "grad_norm": 3.390625,
      "learning_rate": 4.516720276434289e-06,
      "loss": 0.8037,
      "step": 786390
    },
    {
      "epoch": 2.7561359422697467,
      "grad_norm": 2.6875,
      "learning_rate": 4.516071247770587e-06,
      "loss": 0.7614,
      "step": 786400
    },
    {
      "epoch": 2.756170989776642,
      "grad_norm": 2.9375,
      "learning_rate": 4.515422219106885e-06,
      "loss": 0.8514,
      "step": 786410
    },
    {
      "epoch": 2.7562060372835377,
      "grad_norm": 2.703125,
      "learning_rate": 4.514773190443183e-06,
      "loss": 0.8263,
      "step": 786420
    },
    {
      "epoch": 2.7562410847904335,
      "grad_norm": 2.953125,
      "learning_rate": 4.514124161779481e-06,
      "loss": 0.7111,
      "step": 786430
    },
    {
      "epoch": 2.7562761322973293,
      "grad_norm": 2.859375,
      "learning_rate": 4.513475133115779e-06,
      "loss": 0.7733,
      "step": 786440
    },
    {
      "epoch": 2.7563111798042246,
      "grad_norm": 3.25,
      "learning_rate": 4.512826104452078e-06,
      "loss": 0.8196,
      "step": 786450
    },
    {
      "epoch": 2.7563462273111203,
      "grad_norm": 3.109375,
      "learning_rate": 4.512177075788376e-06,
      "loss": 0.8275,
      "step": 786460
    },
    {
      "epoch": 2.7563812748180156,
      "grad_norm": 2.65625,
      "learning_rate": 4.511528047124673e-06,
      "loss": 0.7948,
      "step": 786470
    },
    {
      "epoch": 2.7564163223249114,
      "grad_norm": 2.546875,
      "learning_rate": 4.510879018460972e-06,
      "loss": 0.7517,
      "step": 786480
    },
    {
      "epoch": 2.756451369831807,
      "grad_norm": 3.234375,
      "learning_rate": 4.51022998979727e-06,
      "loss": 0.7403,
      "step": 786490
    },
    {
      "epoch": 2.7564864173387025,
      "grad_norm": 2.640625,
      "learning_rate": 4.509580961133568e-06,
      "loss": 0.7449,
      "step": 786500
    },
    {
      "epoch": 2.7565214648455982,
      "grad_norm": 3.171875,
      "learning_rate": 4.508931932469866e-06,
      "loss": 0.8017,
      "step": 786510
    },
    {
      "epoch": 2.7565565123524935,
      "grad_norm": 2.609375,
      "learning_rate": 4.508282903806164e-06,
      "loss": 0.7214,
      "step": 786520
    },
    {
      "epoch": 2.7565915598593893,
      "grad_norm": 2.546875,
      "learning_rate": 4.507633875142462e-06,
      "loss": 0.7803,
      "step": 786530
    },
    {
      "epoch": 2.756626607366285,
      "grad_norm": 4.40625,
      "learning_rate": 4.5069848464787605e-06,
      "loss": 0.7183,
      "step": 786540
    },
    {
      "epoch": 2.756661654873181,
      "grad_norm": 2.703125,
      "learning_rate": 4.5063358178150585e-06,
      "loss": 0.7525,
      "step": 786550
    },
    {
      "epoch": 2.756696702380076,
      "grad_norm": 2.53125,
      "learning_rate": 4.505686789151356e-06,
      "loss": 0.772,
      "step": 786560
    },
    {
      "epoch": 2.756731749886972,
      "grad_norm": 3.203125,
      "learning_rate": 4.5050377604876545e-06,
      "loss": 0.8345,
      "step": 786570
    },
    {
      "epoch": 2.756766797393867,
      "grad_norm": 3.078125,
      "learning_rate": 4.5043887318239525e-06,
      "loss": 0.8187,
      "step": 786580
    },
    {
      "epoch": 2.756801844900763,
      "grad_norm": 3.609375,
      "learning_rate": 4.5037397031602505e-06,
      "loss": 0.7447,
      "step": 786590
    },
    {
      "epoch": 2.7568368924076587,
      "grad_norm": 3.0,
      "learning_rate": 4.503090674496549e-06,
      "loss": 0.8852,
      "step": 786600
    },
    {
      "epoch": 2.756871939914554,
      "grad_norm": 3.1875,
      "learning_rate": 4.5024416458328465e-06,
      "loss": 0.9367,
      "step": 786610
    },
    {
      "epoch": 2.75690698742145,
      "grad_norm": 2.984375,
      "learning_rate": 4.5017926171691445e-06,
      "loss": 0.9012,
      "step": 786620
    },
    {
      "epoch": 2.756942034928345,
      "grad_norm": 2.9375,
      "learning_rate": 4.501143588505443e-06,
      "loss": 0.7913,
      "step": 786630
    },
    {
      "epoch": 2.756977082435241,
      "grad_norm": 2.234375,
      "learning_rate": 4.500494559841741e-06,
      "loss": 0.7559,
      "step": 786640
    },
    {
      "epoch": 2.7570121299421366,
      "grad_norm": 2.90625,
      "learning_rate": 4.499845531178039e-06,
      "loss": 0.8338,
      "step": 786650
    },
    {
      "epoch": 2.7570471774490324,
      "grad_norm": 2.625,
      "learning_rate": 4.499196502514337e-06,
      "loss": 0.7938,
      "step": 786660
    },
    {
      "epoch": 2.7570822249559277,
      "grad_norm": 2.40625,
      "learning_rate": 4.498547473850635e-06,
      "loss": 0.8234,
      "step": 786670
    },
    {
      "epoch": 2.7571172724628235,
      "grad_norm": 3.15625,
      "learning_rate": 4.497898445186933e-06,
      "loss": 0.8522,
      "step": 786680
    },
    {
      "epoch": 2.7571523199697188,
      "grad_norm": 2.953125,
      "learning_rate": 4.497249416523232e-06,
      "loss": 0.9172,
      "step": 786690
    },
    {
      "epoch": 2.7571873674766145,
      "grad_norm": 3.09375,
      "learning_rate": 4.496600387859529e-06,
      "loss": 0.7703,
      "step": 786700
    },
    {
      "epoch": 2.7572224149835103,
      "grad_norm": 2.484375,
      "learning_rate": 4.495951359195827e-06,
      "loss": 0.7551,
      "step": 786710
    },
    {
      "epoch": 2.7572574624904056,
      "grad_norm": 2.8125,
      "learning_rate": 4.495302330532126e-06,
      "loss": 0.7459,
      "step": 786720
    },
    {
      "epoch": 2.7572925099973014,
      "grad_norm": 3.03125,
      "learning_rate": 4.494653301868424e-06,
      "loss": 0.8249,
      "step": 786730
    },
    {
      "epoch": 2.7573275575041967,
      "grad_norm": 3.609375,
      "learning_rate": 4.494004273204722e-06,
      "loss": 0.8298,
      "step": 786740
    },
    {
      "epoch": 2.7573626050110924,
      "grad_norm": 2.765625,
      "learning_rate": 4.49335524454102e-06,
      "loss": 0.8716,
      "step": 786750
    },
    {
      "epoch": 2.757397652517988,
      "grad_norm": 3.0625,
      "learning_rate": 4.492706215877318e-06,
      "loss": 0.8371,
      "step": 786760
    },
    {
      "epoch": 2.757432700024884,
      "grad_norm": 3.0,
      "learning_rate": 4.492057187213616e-06,
      "loss": 0.8726,
      "step": 786770
    },
    {
      "epoch": 2.7574677475317793,
      "grad_norm": 3.1875,
      "learning_rate": 4.491408158549914e-06,
      "loss": 0.8307,
      "step": 786780
    },
    {
      "epoch": 2.757502795038675,
      "grad_norm": 2.84375,
      "learning_rate": 4.490759129886213e-06,
      "loss": 0.7675,
      "step": 786790
    },
    {
      "epoch": 2.7575378425455703,
      "grad_norm": 3.28125,
      "learning_rate": 4.49011010122251e-06,
      "loss": 0.797,
      "step": 786800
    },
    {
      "epoch": 2.757572890052466,
      "grad_norm": 3.265625,
      "learning_rate": 4.489461072558809e-06,
      "loss": 0.7954,
      "step": 786810
    },
    {
      "epoch": 2.757607937559362,
      "grad_norm": 3.46875,
      "learning_rate": 4.488812043895107e-06,
      "loss": 0.8071,
      "step": 786820
    },
    {
      "epoch": 2.757642985066257,
      "grad_norm": 2.96875,
      "learning_rate": 4.488163015231405e-06,
      "loss": 0.7949,
      "step": 786830
    },
    {
      "epoch": 2.757678032573153,
      "grad_norm": 3.078125,
      "learning_rate": 4.487513986567703e-06,
      "loss": 0.7733,
      "step": 786840
    },
    {
      "epoch": 2.7577130800800482,
      "grad_norm": 2.828125,
      "learning_rate": 4.486864957904001e-06,
      "loss": 0.7311,
      "step": 786850
    },
    {
      "epoch": 2.757748127586944,
      "grad_norm": 3.21875,
      "learning_rate": 4.486215929240299e-06,
      "loss": 0.7799,
      "step": 786860
    },
    {
      "epoch": 2.7577831750938397,
      "grad_norm": 2.90625,
      "learning_rate": 4.485566900576597e-06,
      "loss": 0.7874,
      "step": 786870
    },
    {
      "epoch": 2.7578182226007355,
      "grad_norm": 2.96875,
      "learning_rate": 4.484917871912896e-06,
      "loss": 0.7541,
      "step": 786880
    },
    {
      "epoch": 2.757853270107631,
      "grad_norm": 2.640625,
      "learning_rate": 4.484268843249193e-06,
      "loss": 0.7489,
      "step": 786890
    },
    {
      "epoch": 2.7578883176145266,
      "grad_norm": 2.90625,
      "learning_rate": 4.483619814585491e-06,
      "loss": 0.7467,
      "step": 786900
    },
    {
      "epoch": 2.757923365121422,
      "grad_norm": 2.984375,
      "learning_rate": 4.48297078592179e-06,
      "loss": 0.7401,
      "step": 786910
    },
    {
      "epoch": 2.7579584126283176,
      "grad_norm": 2.6875,
      "learning_rate": 4.482321757258088e-06,
      "loss": 0.7431,
      "step": 786920
    },
    {
      "epoch": 2.7579934601352134,
      "grad_norm": 2.96875,
      "learning_rate": 4.481672728594386e-06,
      "loss": 0.8425,
      "step": 786930
    },
    {
      "epoch": 2.7580285076421087,
      "grad_norm": 2.765625,
      "learning_rate": 4.481023699930684e-06,
      "loss": 0.6963,
      "step": 786940
    },
    {
      "epoch": 2.7580635551490045,
      "grad_norm": 3.0625,
      "learning_rate": 4.480374671266982e-06,
      "loss": 0.8913,
      "step": 786950
    },
    {
      "epoch": 2.7580986026559002,
      "grad_norm": 3.28125,
      "learning_rate": 4.47972564260328e-06,
      "loss": 0.7755,
      "step": 786960
    },
    {
      "epoch": 2.7581336501627955,
      "grad_norm": 3.328125,
      "learning_rate": 4.479076613939579e-06,
      "loss": 0.8046,
      "step": 786970
    },
    {
      "epoch": 2.7581686976696913,
      "grad_norm": 2.734375,
      "learning_rate": 4.478427585275876e-06,
      "loss": 0.7618,
      "step": 786980
    },
    {
      "epoch": 2.758203745176587,
      "grad_norm": 3.09375,
      "learning_rate": 4.477778556612174e-06,
      "loss": 0.7787,
      "step": 786990
    },
    {
      "epoch": 2.7582387926834824,
      "grad_norm": 2.828125,
      "learning_rate": 4.477129527948473e-06,
      "loss": 0.8647,
      "step": 787000
    },
    {
      "epoch": 2.758273840190378,
      "grad_norm": 3.296875,
      "learning_rate": 4.476480499284771e-06,
      "loss": 0.7801,
      "step": 787010
    },
    {
      "epoch": 2.7583088876972734,
      "grad_norm": 3.03125,
      "learning_rate": 4.475831470621069e-06,
      "loss": 0.7385,
      "step": 787020
    },
    {
      "epoch": 2.758343935204169,
      "grad_norm": 2.34375,
      "learning_rate": 4.475182441957367e-06,
      "loss": 0.7904,
      "step": 787030
    },
    {
      "epoch": 2.758378982711065,
      "grad_norm": 2.84375,
      "learning_rate": 4.474533413293665e-06,
      "loss": 0.8336,
      "step": 787040
    },
    {
      "epoch": 2.7584140302179607,
      "grad_norm": 3.109375,
      "learning_rate": 4.473884384629963e-06,
      "loss": 0.8527,
      "step": 787050
    },
    {
      "epoch": 2.758449077724856,
      "grad_norm": 3.15625,
      "learning_rate": 4.4732353559662616e-06,
      "loss": 0.7478,
      "step": 787060
    },
    {
      "epoch": 2.758484125231752,
      "grad_norm": 3.4375,
      "learning_rate": 4.4725863273025596e-06,
      "loss": 0.9198,
      "step": 787070
    },
    {
      "epoch": 2.758519172738647,
      "grad_norm": 3.140625,
      "learning_rate": 4.471937298638857e-06,
      "loss": 0.8319,
      "step": 787080
    },
    {
      "epoch": 2.758554220245543,
      "grad_norm": 2.71875,
      "learning_rate": 4.4712882699751556e-06,
      "loss": 0.8974,
      "step": 787090
    },
    {
      "epoch": 2.7585892677524386,
      "grad_norm": 3.125,
      "learning_rate": 4.4706392413114536e-06,
      "loss": 0.7481,
      "step": 787100
    },
    {
      "epoch": 2.758624315259334,
      "grad_norm": 2.828125,
      "learning_rate": 4.4699902126477516e-06,
      "loss": 0.7716,
      "step": 787110
    },
    {
      "epoch": 2.7586593627662297,
      "grad_norm": 3.03125,
      "learning_rate": 4.46934118398405e-06,
      "loss": 0.8322,
      "step": 787120
    },
    {
      "epoch": 2.758694410273125,
      "grad_norm": 3.5625,
      "learning_rate": 4.4686921553203476e-06,
      "loss": 0.8224,
      "step": 787130
    },
    {
      "epoch": 2.7587294577800208,
      "grad_norm": 2.765625,
      "learning_rate": 4.4680431266566456e-06,
      "loss": 0.82,
      "step": 787140
    },
    {
      "epoch": 2.7587645052869165,
      "grad_norm": 3.21875,
      "learning_rate": 4.467394097992944e-06,
      "loss": 0.834,
      "step": 787150
    },
    {
      "epoch": 2.7587995527938123,
      "grad_norm": 2.640625,
      "learning_rate": 4.466745069329242e-06,
      "loss": 0.7933,
      "step": 787160
    },
    {
      "epoch": 2.7588346003007076,
      "grad_norm": 3.0,
      "learning_rate": 4.4660960406655396e-06,
      "loss": 0.7936,
      "step": 787170
    },
    {
      "epoch": 2.7588696478076034,
      "grad_norm": 3.265625,
      "learning_rate": 4.465447012001838e-06,
      "loss": 0.8154,
      "step": 787180
    },
    {
      "epoch": 2.7589046953144987,
      "grad_norm": 3.328125,
      "learning_rate": 4.464797983338136e-06,
      "loss": 0.7215,
      "step": 787190
    },
    {
      "epoch": 2.7589397428213944,
      "grad_norm": 3.125,
      "learning_rate": 4.464148954674434e-06,
      "loss": 0.8399,
      "step": 787200
    },
    {
      "epoch": 2.75897479032829,
      "grad_norm": 2.6875,
      "learning_rate": 4.463499926010733e-06,
      "loss": 0.8155,
      "step": 787210
    },
    {
      "epoch": 2.7590098378351855,
      "grad_norm": 2.96875,
      "learning_rate": 4.46285089734703e-06,
      "loss": 0.8567,
      "step": 787220
    },
    {
      "epoch": 2.7590448853420813,
      "grad_norm": 2.71875,
      "learning_rate": 4.462201868683328e-06,
      "loss": 0.7883,
      "step": 787230
    },
    {
      "epoch": 2.7590799328489766,
      "grad_norm": 3.140625,
      "learning_rate": 4.461552840019627e-06,
      "loss": 0.8437,
      "step": 787240
    },
    {
      "epoch": 2.7591149803558723,
      "grad_norm": 3.078125,
      "learning_rate": 4.460903811355925e-06,
      "loss": 0.8542,
      "step": 787250
    },
    {
      "epoch": 2.759150027862768,
      "grad_norm": 2.96875,
      "learning_rate": 4.460254782692223e-06,
      "loss": 0.8375,
      "step": 787260
    },
    {
      "epoch": 2.759185075369664,
      "grad_norm": 3.125,
      "learning_rate": 4.459605754028521e-06,
      "loss": 0.8305,
      "step": 787270
    },
    {
      "epoch": 2.759220122876559,
      "grad_norm": 2.859375,
      "learning_rate": 4.458956725364819e-06,
      "loss": 0.8352,
      "step": 787280
    },
    {
      "epoch": 2.759255170383455,
      "grad_norm": 2.8125,
      "learning_rate": 4.458307696701117e-06,
      "loss": 0.8127,
      "step": 787290
    },
    {
      "epoch": 2.7592902178903502,
      "grad_norm": 3.0625,
      "learning_rate": 4.457658668037415e-06,
      "loss": 0.7839,
      "step": 787300
    },
    {
      "epoch": 2.759325265397246,
      "grad_norm": 3.046875,
      "learning_rate": 4.457009639373713e-06,
      "loss": 0.8028,
      "step": 787310
    },
    {
      "epoch": 2.7593603129041417,
      "grad_norm": 2.90625,
      "learning_rate": 4.456360610710011e-06,
      "loss": 0.775,
      "step": 787320
    },
    {
      "epoch": 2.759395360411037,
      "grad_norm": 3.03125,
      "learning_rate": 4.45571158204631e-06,
      "loss": 0.7866,
      "step": 787330
    },
    {
      "epoch": 2.759430407917933,
      "grad_norm": 2.90625,
      "learning_rate": 4.455062553382608e-06,
      "loss": 0.7778,
      "step": 787340
    },
    {
      "epoch": 2.759465455424828,
      "grad_norm": 2.6875,
      "learning_rate": 4.454413524718906e-06,
      "loss": 0.7658,
      "step": 787350
    },
    {
      "epoch": 2.759500502931724,
      "grad_norm": 2.9375,
      "learning_rate": 4.453764496055204e-06,
      "loss": 0.8039,
      "step": 787360
    },
    {
      "epoch": 2.7595355504386196,
      "grad_norm": 2.53125,
      "learning_rate": 4.453115467391502e-06,
      "loss": 0.681,
      "step": 787370
    },
    {
      "epoch": 2.7595705979455154,
      "grad_norm": 3.09375,
      "learning_rate": 4.4524664387278e-06,
      "loss": 0.7779,
      "step": 787380
    },
    {
      "epoch": 2.7596056454524107,
      "grad_norm": 2.671875,
      "learning_rate": 4.451817410064098e-06,
      "loss": 0.755,
      "step": 787390
    },
    {
      "epoch": 2.7596406929593065,
      "grad_norm": 2.84375,
      "learning_rate": 4.451168381400397e-06,
      "loss": 0.7863,
      "step": 787400
    },
    {
      "epoch": 2.759675740466202,
      "grad_norm": 3.046875,
      "learning_rate": 4.450519352736694e-06,
      "loss": 0.8057,
      "step": 787410
    },
    {
      "epoch": 2.7597107879730975,
      "grad_norm": 3.171875,
      "learning_rate": 4.449870324072992e-06,
      "loss": 0.7329,
      "step": 787420
    },
    {
      "epoch": 2.7597458354799933,
      "grad_norm": 2.78125,
      "learning_rate": 4.449221295409291e-06,
      "loss": 0.8162,
      "step": 787430
    },
    {
      "epoch": 2.7597808829868886,
      "grad_norm": 2.765625,
      "learning_rate": 4.448572266745589e-06,
      "loss": 0.8474,
      "step": 787440
    },
    {
      "epoch": 2.7598159304937844,
      "grad_norm": 2.75,
      "learning_rate": 4.447923238081887e-06,
      "loss": 0.8669,
      "step": 787450
    },
    {
      "epoch": 2.7598509780006797,
      "grad_norm": 2.390625,
      "learning_rate": 4.447274209418185e-06,
      "loss": 0.7585,
      "step": 787460
    },
    {
      "epoch": 2.7598860255075754,
      "grad_norm": 2.796875,
      "learning_rate": 4.446625180754483e-06,
      "loss": 0.7748,
      "step": 787470
    },
    {
      "epoch": 2.759921073014471,
      "grad_norm": 2.9375,
      "learning_rate": 4.445976152090781e-06,
      "loss": 0.7438,
      "step": 787480
    },
    {
      "epoch": 2.759956120521367,
      "grad_norm": 3.0625,
      "learning_rate": 4.44532712342708e-06,
      "loss": 0.7203,
      "step": 787490
    },
    {
      "epoch": 2.7599911680282623,
      "grad_norm": 2.78125,
      "learning_rate": 4.444678094763377e-06,
      "loss": 0.881,
      "step": 787500
    },
    {
      "epoch": 2.760026215535158,
      "grad_norm": 2.71875,
      "learning_rate": 4.444029066099675e-06,
      "loss": 0.7519,
      "step": 787510
    },
    {
      "epoch": 2.7600612630420533,
      "grad_norm": 2.8125,
      "learning_rate": 4.443380037435974e-06,
      "loss": 0.7646,
      "step": 787520
    },
    {
      "epoch": 2.760096310548949,
      "grad_norm": 3.03125,
      "learning_rate": 4.442731008772272e-06,
      "loss": 0.8045,
      "step": 787530
    },
    {
      "epoch": 2.760131358055845,
      "grad_norm": 2.953125,
      "learning_rate": 4.44208198010857e-06,
      "loss": 0.818,
      "step": 787540
    },
    {
      "epoch": 2.76016640556274,
      "grad_norm": 3.0625,
      "learning_rate": 4.441432951444868e-06,
      "loss": 0.8503,
      "step": 787550
    },
    {
      "epoch": 2.760201453069636,
      "grad_norm": 3.078125,
      "learning_rate": 4.440783922781166e-06,
      "loss": 0.8005,
      "step": 787560
    },
    {
      "epoch": 2.7602365005765312,
      "grad_norm": 3.25,
      "learning_rate": 4.440134894117464e-06,
      "loss": 0.7736,
      "step": 787570
    },
    {
      "epoch": 2.760271548083427,
      "grad_norm": 2.9375,
      "learning_rate": 4.439485865453763e-06,
      "loss": 0.7854,
      "step": 787580
    },
    {
      "epoch": 2.7603065955903228,
      "grad_norm": 3.453125,
      "learning_rate": 4.438836836790061e-06,
      "loss": 0.7842,
      "step": 787590
    },
    {
      "epoch": 2.7603416430972185,
      "grad_norm": 3.125,
      "learning_rate": 4.438187808126358e-06,
      "loss": 0.8187,
      "step": 787600
    },
    {
      "epoch": 2.760376690604114,
      "grad_norm": 3.390625,
      "learning_rate": 4.437538779462657e-06,
      "loss": 0.8586,
      "step": 787610
    },
    {
      "epoch": 2.7604117381110096,
      "grad_norm": 2.828125,
      "learning_rate": 4.436889750798955e-06,
      "loss": 0.7814,
      "step": 787620
    },
    {
      "epoch": 2.760446785617905,
      "grad_norm": 2.640625,
      "learning_rate": 4.436240722135253e-06,
      "loss": 0.7894,
      "step": 787630
    },
    {
      "epoch": 2.7604818331248007,
      "grad_norm": 3.0625,
      "learning_rate": 4.435591693471551e-06,
      "loss": 0.8182,
      "step": 787640
    },
    {
      "epoch": 2.7605168806316964,
      "grad_norm": 2.5625,
      "learning_rate": 4.434942664807849e-06,
      "loss": 0.8302,
      "step": 787650
    },
    {
      "epoch": 2.7605519281385917,
      "grad_norm": 2.8125,
      "learning_rate": 4.434293636144147e-06,
      "loss": 0.8056,
      "step": 787660
    },
    {
      "epoch": 2.7605869756454875,
      "grad_norm": 2.578125,
      "learning_rate": 4.4336446074804455e-06,
      "loss": 0.8583,
      "step": 787670
    },
    {
      "epoch": 2.760622023152383,
      "grad_norm": 3.0625,
      "learning_rate": 4.4329955788167435e-06,
      "loss": 0.8429,
      "step": 787680
    },
    {
      "epoch": 2.7606570706592786,
      "grad_norm": 2.890625,
      "learning_rate": 4.432346550153041e-06,
      "loss": 0.7325,
      "step": 787690
    },
    {
      "epoch": 2.7606921181661743,
      "grad_norm": 2.96875,
      "learning_rate": 4.4316975214893395e-06,
      "loss": 0.8712,
      "step": 787700
    },
    {
      "epoch": 2.76072716567307,
      "grad_norm": 2.984375,
      "learning_rate": 4.4310484928256375e-06,
      "loss": 0.8278,
      "step": 787710
    },
    {
      "epoch": 2.7607622131799654,
      "grad_norm": 3.28125,
      "learning_rate": 4.4303994641619355e-06,
      "loss": 0.8177,
      "step": 787720
    },
    {
      "epoch": 2.760797260686861,
      "grad_norm": 2.953125,
      "learning_rate": 4.4297504354982335e-06,
      "loss": 0.8273,
      "step": 787730
    },
    {
      "epoch": 2.7608323081937565,
      "grad_norm": 2.859375,
      "learning_rate": 4.4291014068345315e-06,
      "loss": 0.705,
      "step": 787740
    },
    {
      "epoch": 2.7608673557006522,
      "grad_norm": 3.15625,
      "learning_rate": 4.4284523781708295e-06,
      "loss": 0.7847,
      "step": 787750
    },
    {
      "epoch": 2.760902403207548,
      "grad_norm": 2.765625,
      "learning_rate": 4.427803349507128e-06,
      "loss": 0.8123,
      "step": 787760
    },
    {
      "epoch": 2.7609374507144433,
      "grad_norm": 2.796875,
      "learning_rate": 4.427154320843426e-06,
      "loss": 0.83,
      "step": 787770
    },
    {
      "epoch": 2.760972498221339,
      "grad_norm": 2.890625,
      "learning_rate": 4.4265052921797235e-06,
      "loss": 0.8026,
      "step": 787780
    },
    {
      "epoch": 2.7610075457282344,
      "grad_norm": 2.953125,
      "learning_rate": 4.425856263516022e-06,
      "loss": 0.812,
      "step": 787790
    },
    {
      "epoch": 2.76104259323513,
      "grad_norm": 3.09375,
      "learning_rate": 4.42520723485232e-06,
      "loss": 0.814,
      "step": 787800
    },
    {
      "epoch": 2.761077640742026,
      "grad_norm": 2.765625,
      "learning_rate": 4.424558206188618e-06,
      "loss": 0.8749,
      "step": 787810
    },
    {
      "epoch": 2.7611126882489216,
      "grad_norm": 2.875,
      "learning_rate": 4.423909177524916e-06,
      "loss": 0.8235,
      "step": 787820
    },
    {
      "epoch": 2.761147735755817,
      "grad_norm": 3.046875,
      "learning_rate": 4.423260148861214e-06,
      "loss": 0.8542,
      "step": 787830
    },
    {
      "epoch": 2.7611827832627127,
      "grad_norm": 3.125,
      "learning_rate": 4.422611120197512e-06,
      "loss": 0.7712,
      "step": 787840
    },
    {
      "epoch": 2.761217830769608,
      "grad_norm": 2.59375,
      "learning_rate": 4.42196209153381e-06,
      "loss": 0.7845,
      "step": 787850
    },
    {
      "epoch": 2.761252878276504,
      "grad_norm": 2.796875,
      "learning_rate": 4.421313062870109e-06,
      "loss": 0.7295,
      "step": 787860
    },
    {
      "epoch": 2.7612879257833995,
      "grad_norm": 2.703125,
      "learning_rate": 4.420664034206407e-06,
      "loss": 0.747,
      "step": 787870
    },
    {
      "epoch": 2.761322973290295,
      "grad_norm": 2.828125,
      "learning_rate": 4.420015005542705e-06,
      "loss": 0.7777,
      "step": 787880
    },
    {
      "epoch": 2.7613580207971906,
      "grad_norm": 2.609375,
      "learning_rate": 4.419365976879003e-06,
      "loss": 0.7819,
      "step": 787890
    },
    {
      "epoch": 2.761393068304086,
      "grad_norm": 3.109375,
      "learning_rate": 4.418716948215301e-06,
      "loss": 0.7749,
      "step": 787900
    },
    {
      "epoch": 2.7614281158109817,
      "grad_norm": 2.765625,
      "learning_rate": 4.418067919551599e-06,
      "loss": 0.8589,
      "step": 787910
    },
    {
      "epoch": 2.7614631633178774,
      "grad_norm": 2.65625,
      "learning_rate": 4.417418890887897e-06,
      "loss": 0.8671,
      "step": 787920
    },
    {
      "epoch": 2.761498210824773,
      "grad_norm": 2.890625,
      "learning_rate": 4.416769862224195e-06,
      "loss": 0.832,
      "step": 787930
    },
    {
      "epoch": 2.7615332583316685,
      "grad_norm": 2.265625,
      "learning_rate": 4.416120833560493e-06,
      "loss": 0.7905,
      "step": 787940
    },
    {
      "epoch": 2.7615683058385643,
      "grad_norm": 3.203125,
      "learning_rate": 4.415471804896792e-06,
      "loss": 0.8048,
      "step": 787950
    },
    {
      "epoch": 2.7616033533454596,
      "grad_norm": 2.453125,
      "learning_rate": 4.41482277623309e-06,
      "loss": 0.7525,
      "step": 787960
    },
    {
      "epoch": 2.7616384008523553,
      "grad_norm": 3.03125,
      "learning_rate": 4.414173747569387e-06,
      "loss": 0.7752,
      "step": 787970
    },
    {
      "epoch": 2.761673448359251,
      "grad_norm": 2.03125,
      "learning_rate": 4.413524718905686e-06,
      "loss": 0.7729,
      "step": 787980
    },
    {
      "epoch": 2.7617084958661464,
      "grad_norm": 3.171875,
      "learning_rate": 4.412875690241984e-06,
      "loss": 0.7449,
      "step": 787990
    },
    {
      "epoch": 2.761743543373042,
      "grad_norm": 3.109375,
      "learning_rate": 4.412226661578282e-06,
      "loss": 0.83,
      "step": 788000
    },
    {
      "epoch": 2.7617785908799375,
      "grad_norm": 3.328125,
      "learning_rate": 4.411577632914581e-06,
      "loss": 0.8368,
      "step": 788010
    },
    {
      "epoch": 2.7618136383868332,
      "grad_norm": 3.140625,
      "learning_rate": 4.410928604250878e-06,
      "loss": 0.9171,
      "step": 788020
    },
    {
      "epoch": 2.761848685893729,
      "grad_norm": 3.421875,
      "learning_rate": 4.410279575587176e-06,
      "loss": 0.8651,
      "step": 788030
    },
    {
      "epoch": 2.7618837334006248,
      "grad_norm": 2.875,
      "learning_rate": 4.409630546923475e-06,
      "loss": 0.8402,
      "step": 788040
    },
    {
      "epoch": 2.76191878090752,
      "grad_norm": 3.078125,
      "learning_rate": 4.408981518259773e-06,
      "loss": 0.765,
      "step": 788050
    },
    {
      "epoch": 2.761953828414416,
      "grad_norm": 3.09375,
      "learning_rate": 4.408332489596071e-06,
      "loss": 0.8725,
      "step": 788060
    },
    {
      "epoch": 2.761988875921311,
      "grad_norm": 2.8125,
      "learning_rate": 4.407683460932369e-06,
      "loss": 0.8132,
      "step": 788070
    },
    {
      "epoch": 2.762023923428207,
      "grad_norm": 2.765625,
      "learning_rate": 4.407034432268667e-06,
      "loss": 0.79,
      "step": 788080
    },
    {
      "epoch": 2.7620589709351027,
      "grad_norm": 3.171875,
      "learning_rate": 4.406385403604965e-06,
      "loss": 0.8905,
      "step": 788090
    },
    {
      "epoch": 2.762094018441998,
      "grad_norm": 2.515625,
      "learning_rate": 4.405736374941264e-06,
      "loss": 0.7917,
      "step": 788100
    },
    {
      "epoch": 2.7621290659488937,
      "grad_norm": 2.828125,
      "learning_rate": 4.405087346277561e-06,
      "loss": 0.7915,
      "step": 788110
    },
    {
      "epoch": 2.762164113455789,
      "grad_norm": 3.328125,
      "learning_rate": 4.404438317613859e-06,
      "loss": 0.7849,
      "step": 788120
    },
    {
      "epoch": 2.762199160962685,
      "grad_norm": 2.796875,
      "learning_rate": 4.403789288950158e-06,
      "loss": 0.8287,
      "step": 788130
    },
    {
      "epoch": 2.7622342084695806,
      "grad_norm": 3.078125,
      "learning_rate": 4.403140260286456e-06,
      "loss": 0.8406,
      "step": 788140
    },
    {
      "epoch": 2.7622692559764763,
      "grad_norm": 2.90625,
      "learning_rate": 4.402491231622754e-06,
      "loss": 0.8124,
      "step": 788150
    },
    {
      "epoch": 2.7623043034833716,
      "grad_norm": 2.953125,
      "learning_rate": 4.401842202959052e-06,
      "loss": 0.7945,
      "step": 788160
    },
    {
      "epoch": 2.7623393509902674,
      "grad_norm": 3.046875,
      "learning_rate": 4.40119317429535e-06,
      "loss": 0.8369,
      "step": 788170
    },
    {
      "epoch": 2.7623743984971627,
      "grad_norm": 3.375,
      "learning_rate": 4.400544145631648e-06,
      "loss": 0.8012,
      "step": 788180
    },
    {
      "epoch": 2.7624094460040585,
      "grad_norm": 3.15625,
      "learning_rate": 4.3998951169679466e-06,
      "loss": 0.8507,
      "step": 788190
    },
    {
      "epoch": 2.7624444935109542,
      "grad_norm": 3.078125,
      "learning_rate": 4.3992460883042446e-06,
      "loss": 0.8193,
      "step": 788200
    },
    {
      "epoch": 2.7624795410178495,
      "grad_norm": 3.28125,
      "learning_rate": 4.398597059640542e-06,
      "loss": 0.7703,
      "step": 788210
    },
    {
      "epoch": 2.7625145885247453,
      "grad_norm": 3.1875,
      "learning_rate": 4.3979480309768406e-06,
      "loss": 0.8,
      "step": 788220
    },
    {
      "epoch": 2.762549636031641,
      "grad_norm": 2.796875,
      "learning_rate": 4.3972990023131386e-06,
      "loss": 0.818,
      "step": 788230
    },
    {
      "epoch": 2.7625846835385364,
      "grad_norm": 3.375,
      "learning_rate": 4.3966499736494366e-06,
      "loss": 0.7435,
      "step": 788240
    },
    {
      "epoch": 2.762619731045432,
      "grad_norm": 3.234375,
      "learning_rate": 4.3960009449857346e-06,
      "loss": 0.7731,
      "step": 788250
    },
    {
      "epoch": 2.762654778552328,
      "grad_norm": 3.171875,
      "learning_rate": 4.3953519163220326e-06,
      "loss": 0.7419,
      "step": 788260
    },
    {
      "epoch": 2.762689826059223,
      "grad_norm": 2.84375,
      "learning_rate": 4.3947028876583306e-06,
      "loss": 0.8244,
      "step": 788270
    },
    {
      "epoch": 2.762724873566119,
      "grad_norm": 2.578125,
      "learning_rate": 4.3940538589946286e-06,
      "loss": 0.777,
      "step": 788280
    },
    {
      "epoch": 2.7627599210730143,
      "grad_norm": 3.0,
      "learning_rate": 4.393404830330927e-06,
      "loss": 0.7722,
      "step": 788290
    },
    {
      "epoch": 2.76279496857991,
      "grad_norm": 3.078125,
      "learning_rate": 4.3927558016672246e-06,
      "loss": 0.7758,
      "step": 788300
    },
    {
      "epoch": 2.762830016086806,
      "grad_norm": 3.03125,
      "learning_rate": 4.392106773003523e-06,
      "loss": 0.78,
      "step": 788310
    },
    {
      "epoch": 2.762865063593701,
      "grad_norm": 3.15625,
      "learning_rate": 4.391457744339821e-06,
      "loss": 0.74,
      "step": 788320
    },
    {
      "epoch": 2.762900111100597,
      "grad_norm": 2.40625,
      "learning_rate": 4.390808715676119e-06,
      "loss": 0.7356,
      "step": 788330
    },
    {
      "epoch": 2.7629351586074926,
      "grad_norm": 2.71875,
      "learning_rate": 4.390159687012417e-06,
      "loss": 0.7908,
      "step": 788340
    },
    {
      "epoch": 2.762970206114388,
      "grad_norm": 2.609375,
      "learning_rate": 4.389510658348715e-06,
      "loss": 0.6912,
      "step": 788350
    },
    {
      "epoch": 2.7630052536212837,
      "grad_norm": 2.90625,
      "learning_rate": 4.388861629685013e-06,
      "loss": 0.7546,
      "step": 788360
    },
    {
      "epoch": 2.7630403011281794,
      "grad_norm": 2.78125,
      "learning_rate": 4.388212601021311e-06,
      "loss": 0.8203,
      "step": 788370
    },
    {
      "epoch": 2.7630753486350748,
      "grad_norm": 2.96875,
      "learning_rate": 4.38756357235761e-06,
      "loss": 0.7524,
      "step": 788380
    },
    {
      "epoch": 2.7631103961419705,
      "grad_norm": 3.0625,
      "learning_rate": 4.386914543693908e-06,
      "loss": 0.8053,
      "step": 788390
    },
    {
      "epoch": 2.763145443648866,
      "grad_norm": 3.328125,
      "learning_rate": 4.386265515030205e-06,
      "loss": 0.7931,
      "step": 788400
    },
    {
      "epoch": 2.7631804911557616,
      "grad_norm": 2.921875,
      "learning_rate": 4.385616486366504e-06,
      "loss": 0.8073,
      "step": 788410
    },
    {
      "epoch": 2.7632155386626573,
      "grad_norm": 2.734375,
      "learning_rate": 4.384967457702802e-06,
      "loss": 0.8026,
      "step": 788420
    },
    {
      "epoch": 2.763250586169553,
      "grad_norm": 3.15625,
      "learning_rate": 4.3843184290391e-06,
      "loss": 0.9,
      "step": 788430
    },
    {
      "epoch": 2.7632856336764484,
      "grad_norm": 3.15625,
      "learning_rate": 4.383669400375398e-06,
      "loss": 0.8503,
      "step": 788440
    },
    {
      "epoch": 2.763320681183344,
      "grad_norm": 3.03125,
      "learning_rate": 4.383020371711696e-06,
      "loss": 0.7795,
      "step": 788450
    },
    {
      "epoch": 2.7633557286902395,
      "grad_norm": 3.078125,
      "learning_rate": 4.382371343047994e-06,
      "loss": 0.8614,
      "step": 788460
    },
    {
      "epoch": 2.7633907761971352,
      "grad_norm": 3.296875,
      "learning_rate": 4.381722314384293e-06,
      "loss": 0.8108,
      "step": 788470
    },
    {
      "epoch": 2.763425823704031,
      "grad_norm": 2.5,
      "learning_rate": 4.381073285720591e-06,
      "loss": 0.8322,
      "step": 788480
    },
    {
      "epoch": 2.7634608712109263,
      "grad_norm": 3.609375,
      "learning_rate": 4.380424257056888e-06,
      "loss": 0.7689,
      "step": 788490
    },
    {
      "epoch": 2.763495918717822,
      "grad_norm": 3.25,
      "learning_rate": 4.379775228393187e-06,
      "loss": 0.7503,
      "step": 788500
    },
    {
      "epoch": 2.7635309662247174,
      "grad_norm": 2.859375,
      "learning_rate": 4.379126199729485e-06,
      "loss": 0.7339,
      "step": 788510
    },
    {
      "epoch": 2.763566013731613,
      "grad_norm": 3.0625,
      "learning_rate": 4.378477171065783e-06,
      "loss": 0.7249,
      "step": 788520
    },
    {
      "epoch": 2.763601061238509,
      "grad_norm": 2.71875,
      "learning_rate": 4.377828142402082e-06,
      "loss": 0.8322,
      "step": 788530
    },
    {
      "epoch": 2.7636361087454047,
      "grad_norm": 2.609375,
      "learning_rate": 4.377179113738379e-06,
      "loss": 0.7883,
      "step": 788540
    },
    {
      "epoch": 2.7636711562523,
      "grad_norm": 3.078125,
      "learning_rate": 4.376530085074677e-06,
      "loss": 0.8232,
      "step": 788550
    },
    {
      "epoch": 2.7637062037591957,
      "grad_norm": 2.921875,
      "learning_rate": 4.375881056410976e-06,
      "loss": 0.7612,
      "step": 788560
    },
    {
      "epoch": 2.763741251266091,
      "grad_norm": 3.3125,
      "learning_rate": 4.375232027747274e-06,
      "loss": 0.7726,
      "step": 788570
    },
    {
      "epoch": 2.763776298772987,
      "grad_norm": 3.0625,
      "learning_rate": 4.374582999083571e-06,
      "loss": 0.7529,
      "step": 788580
    },
    {
      "epoch": 2.7638113462798826,
      "grad_norm": 2.78125,
      "learning_rate": 4.37393397041987e-06,
      "loss": 0.8799,
      "step": 788590
    },
    {
      "epoch": 2.763846393786778,
      "grad_norm": 3.03125,
      "learning_rate": 4.373284941756168e-06,
      "loss": 0.8437,
      "step": 788600
    },
    {
      "epoch": 2.7638814412936736,
      "grad_norm": 3.0625,
      "learning_rate": 4.372635913092466e-06,
      "loss": 0.7628,
      "step": 788610
    },
    {
      "epoch": 2.763916488800569,
      "grad_norm": 2.765625,
      "learning_rate": 4.371986884428765e-06,
      "loss": 0.8522,
      "step": 788620
    },
    {
      "epoch": 2.7639515363074647,
      "grad_norm": 3.375,
      "learning_rate": 4.371337855765062e-06,
      "loss": 0.7918,
      "step": 788630
    },
    {
      "epoch": 2.7639865838143605,
      "grad_norm": 3.578125,
      "learning_rate": 4.37068882710136e-06,
      "loss": 0.8465,
      "step": 788640
    },
    {
      "epoch": 2.7640216313212562,
      "grad_norm": 3.234375,
      "learning_rate": 4.370039798437659e-06,
      "loss": 0.7781,
      "step": 788650
    },
    {
      "epoch": 2.7640566788281515,
      "grad_norm": 2.953125,
      "learning_rate": 4.369390769773957e-06,
      "loss": 0.8305,
      "step": 788660
    },
    {
      "epoch": 2.7640917263350473,
      "grad_norm": 2.765625,
      "learning_rate": 4.368741741110255e-06,
      "loss": 0.8219,
      "step": 788670
    },
    {
      "epoch": 2.7641267738419426,
      "grad_norm": 2.65625,
      "learning_rate": 4.368092712446553e-06,
      "loss": 0.7059,
      "step": 788680
    },
    {
      "epoch": 2.7641618213488384,
      "grad_norm": 2.671875,
      "learning_rate": 4.367443683782851e-06,
      "loss": 0.8317,
      "step": 788690
    },
    {
      "epoch": 2.764196868855734,
      "grad_norm": 2.703125,
      "learning_rate": 4.366794655119149e-06,
      "loss": 0.7734,
      "step": 788700
    },
    {
      "epoch": 2.7642319163626294,
      "grad_norm": 2.703125,
      "learning_rate": 4.366145626455447e-06,
      "loss": 0.8473,
      "step": 788710
    },
    {
      "epoch": 2.764266963869525,
      "grad_norm": 3.296875,
      "learning_rate": 4.365496597791745e-06,
      "loss": 0.8945,
      "step": 788720
    },
    {
      "epoch": 2.7643020113764205,
      "grad_norm": 2.6875,
      "learning_rate": 4.364847569128043e-06,
      "loss": 0.7605,
      "step": 788730
    },
    {
      "epoch": 2.7643370588833163,
      "grad_norm": 3.265625,
      "learning_rate": 4.364198540464342e-06,
      "loss": 0.8258,
      "step": 788740
    },
    {
      "epoch": 2.764372106390212,
      "grad_norm": 2.640625,
      "learning_rate": 4.36354951180064e-06,
      "loss": 0.7851,
      "step": 788750
    },
    {
      "epoch": 2.764407153897108,
      "grad_norm": 2.921875,
      "learning_rate": 4.362900483136938e-06,
      "loss": 0.8441,
      "step": 788760
    },
    {
      "epoch": 2.764442201404003,
      "grad_norm": 2.84375,
      "learning_rate": 4.362251454473236e-06,
      "loss": 0.834,
      "step": 788770
    },
    {
      "epoch": 2.764477248910899,
      "grad_norm": 2.609375,
      "learning_rate": 4.361602425809534e-06,
      "loss": 0.7441,
      "step": 788780
    },
    {
      "epoch": 2.764512296417794,
      "grad_norm": 2.890625,
      "learning_rate": 4.360953397145832e-06,
      "loss": 0.7548,
      "step": 788790
    },
    {
      "epoch": 2.76454734392469,
      "grad_norm": 3.125,
      "learning_rate": 4.36030436848213e-06,
      "loss": 0.7846,
      "step": 788800
    },
    {
      "epoch": 2.7645823914315857,
      "grad_norm": 3.21875,
      "learning_rate": 4.3596553398184285e-06,
      "loss": 0.9058,
      "step": 788810
    },
    {
      "epoch": 2.764617438938481,
      "grad_norm": 2.78125,
      "learning_rate": 4.359006311154726e-06,
      "loss": 0.7726,
      "step": 788820
    },
    {
      "epoch": 2.7646524864453768,
      "grad_norm": 2.796875,
      "learning_rate": 4.358357282491024e-06,
      "loss": 0.7544,
      "step": 788830
    },
    {
      "epoch": 2.764687533952272,
      "grad_norm": 2.765625,
      "learning_rate": 4.3577082538273225e-06,
      "loss": 0.761,
      "step": 788840
    },
    {
      "epoch": 2.764722581459168,
      "grad_norm": 3.0,
      "learning_rate": 4.3570592251636205e-06,
      "loss": 0.7926,
      "step": 788850
    },
    {
      "epoch": 2.7647576289660636,
      "grad_norm": 2.78125,
      "learning_rate": 4.3564101964999185e-06,
      "loss": 0.8201,
      "step": 788860
    },
    {
      "epoch": 2.7647926764729593,
      "grad_norm": 2.765625,
      "learning_rate": 4.3557611678362165e-06,
      "loss": 0.778,
      "step": 788870
    },
    {
      "epoch": 2.7648277239798547,
      "grad_norm": 2.90625,
      "learning_rate": 4.3551121391725145e-06,
      "loss": 0.7584,
      "step": 788880
    },
    {
      "epoch": 2.7648627714867504,
      "grad_norm": 3.34375,
      "learning_rate": 4.3544631105088125e-06,
      "loss": 0.7843,
      "step": 788890
    },
    {
      "epoch": 2.7648978189936457,
      "grad_norm": 3.0,
      "learning_rate": 4.353814081845111e-06,
      "loss": 0.7807,
      "step": 788900
    },
    {
      "epoch": 2.7649328665005415,
      "grad_norm": 3.09375,
      "learning_rate": 4.3531650531814085e-06,
      "loss": 0.8647,
      "step": 788910
    },
    {
      "epoch": 2.7649679140074372,
      "grad_norm": 2.578125,
      "learning_rate": 4.3525160245177065e-06,
      "loss": 0.7457,
      "step": 788920
    },
    {
      "epoch": 2.7650029615143326,
      "grad_norm": 2.734375,
      "learning_rate": 4.351866995854005e-06,
      "loss": 0.8341,
      "step": 788930
    },
    {
      "epoch": 2.7650380090212283,
      "grad_norm": 2.6875,
      "learning_rate": 4.351217967190303e-06,
      "loss": 0.7766,
      "step": 788940
    },
    {
      "epoch": 2.7650730565281236,
      "grad_norm": 2.984375,
      "learning_rate": 4.350568938526601e-06,
      "loss": 0.7832,
      "step": 788950
    },
    {
      "epoch": 2.7651081040350194,
      "grad_norm": 2.828125,
      "learning_rate": 4.349919909862899e-06,
      "loss": 0.7804,
      "step": 788960
    },
    {
      "epoch": 2.765143151541915,
      "grad_norm": 3.1875,
      "learning_rate": 4.349270881199197e-06,
      "loss": 0.7988,
      "step": 788970
    },
    {
      "epoch": 2.765178199048811,
      "grad_norm": 3.03125,
      "learning_rate": 4.348621852535495e-06,
      "loss": 0.8299,
      "step": 788980
    },
    {
      "epoch": 2.765213246555706,
      "grad_norm": 3.15625,
      "learning_rate": 4.347972823871794e-06,
      "loss": 0.8538,
      "step": 788990
    },
    {
      "epoch": 2.765248294062602,
      "grad_norm": 2.640625,
      "learning_rate": 4.347323795208092e-06,
      "loss": 0.8743,
      "step": 789000
    },
    {
      "epoch": 2.7652833415694973,
      "grad_norm": 3.125,
      "learning_rate": 4.346674766544389e-06,
      "loss": 0.7978,
      "step": 789010
    },
    {
      "epoch": 2.765318389076393,
      "grad_norm": 2.71875,
      "learning_rate": 4.346025737880688e-06,
      "loss": 0.8339,
      "step": 789020
    },
    {
      "epoch": 2.765353436583289,
      "grad_norm": 2.625,
      "learning_rate": 4.345376709216986e-06,
      "loss": 0.7664,
      "step": 789030
    },
    {
      "epoch": 2.765388484090184,
      "grad_norm": 2.84375,
      "learning_rate": 4.344727680553284e-06,
      "loss": 0.8059,
      "step": 789040
    },
    {
      "epoch": 2.76542353159708,
      "grad_norm": 2.640625,
      "learning_rate": 4.344078651889582e-06,
      "loss": 0.8175,
      "step": 789050
    },
    {
      "epoch": 2.765458579103975,
      "grad_norm": 3.265625,
      "learning_rate": 4.34342962322588e-06,
      "loss": 0.8101,
      "step": 789060
    },
    {
      "epoch": 2.765493626610871,
      "grad_norm": 3.125,
      "learning_rate": 4.342780594562178e-06,
      "loss": 0.7989,
      "step": 789070
    },
    {
      "epoch": 2.7655286741177667,
      "grad_norm": 2.890625,
      "learning_rate": 4.342131565898477e-06,
      "loss": 0.7355,
      "step": 789080
    },
    {
      "epoch": 2.7655637216246625,
      "grad_norm": 3.015625,
      "learning_rate": 4.341482537234775e-06,
      "loss": 0.8465,
      "step": 789090
    },
    {
      "epoch": 2.7655987691315578,
      "grad_norm": 3.125,
      "learning_rate": 4.340833508571072e-06,
      "loss": 0.7813,
      "step": 789100
    },
    {
      "epoch": 2.7656338166384535,
      "grad_norm": 2.75,
      "learning_rate": 4.340184479907371e-06,
      "loss": 0.8493,
      "step": 789110
    },
    {
      "epoch": 2.765668864145349,
      "grad_norm": 2.828125,
      "learning_rate": 4.339535451243669e-06,
      "loss": 0.8052,
      "step": 789120
    },
    {
      "epoch": 2.7657039116522446,
      "grad_norm": 2.75,
      "learning_rate": 4.338886422579967e-06,
      "loss": 0.7887,
      "step": 789130
    },
    {
      "epoch": 2.7657389591591404,
      "grad_norm": 2.734375,
      "learning_rate": 4.338237393916265e-06,
      "loss": 0.78,
      "step": 789140
    },
    {
      "epoch": 2.7657740066660357,
      "grad_norm": 2.640625,
      "learning_rate": 4.337588365252563e-06,
      "loss": 0.7539,
      "step": 789150
    },
    {
      "epoch": 2.7658090541729314,
      "grad_norm": 2.65625,
      "learning_rate": 4.336939336588861e-06,
      "loss": 0.7979,
      "step": 789160
    },
    {
      "epoch": 2.7658441016798267,
      "grad_norm": 2.5625,
      "learning_rate": 4.33629030792516e-06,
      "loss": 0.7737,
      "step": 789170
    },
    {
      "epoch": 2.7658791491867225,
      "grad_norm": 3.078125,
      "learning_rate": 4.335641279261458e-06,
      "loss": 0.7854,
      "step": 789180
    },
    {
      "epoch": 2.7659141966936183,
      "grad_norm": 3.265625,
      "learning_rate": 4.334992250597755e-06,
      "loss": 0.8174,
      "step": 789190
    },
    {
      "epoch": 2.765949244200514,
      "grad_norm": 3.25,
      "learning_rate": 4.334343221934054e-06,
      "loss": 0.7992,
      "step": 789200
    },
    {
      "epoch": 2.7659842917074093,
      "grad_norm": 2.515625,
      "learning_rate": 4.333694193270352e-06,
      "loss": 0.7428,
      "step": 789210
    },
    {
      "epoch": 2.766019339214305,
      "grad_norm": 2.890625,
      "learning_rate": 4.33304516460665e-06,
      "loss": 0.824,
      "step": 789220
    },
    {
      "epoch": 2.7660543867212004,
      "grad_norm": 2.875,
      "learning_rate": 4.332396135942948e-06,
      "loss": 0.7222,
      "step": 789230
    },
    {
      "epoch": 2.766089434228096,
      "grad_norm": 2.515625,
      "learning_rate": 4.331747107279246e-06,
      "loss": 0.8271,
      "step": 789240
    },
    {
      "epoch": 2.766124481734992,
      "grad_norm": 2.921875,
      "learning_rate": 4.331098078615544e-06,
      "loss": 0.7791,
      "step": 789250
    },
    {
      "epoch": 2.7661595292418872,
      "grad_norm": 2.953125,
      "learning_rate": 4.330449049951842e-06,
      "loss": 0.7474,
      "step": 789260
    },
    {
      "epoch": 2.766194576748783,
      "grad_norm": 2.84375,
      "learning_rate": 4.329800021288141e-06,
      "loss": 0.7741,
      "step": 789270
    },
    {
      "epoch": 2.7662296242556783,
      "grad_norm": 2.84375,
      "learning_rate": 4.329150992624439e-06,
      "loss": 0.8898,
      "step": 789280
    },
    {
      "epoch": 2.766264671762574,
      "grad_norm": 3.0,
      "learning_rate": 4.328501963960737e-06,
      "loss": 0.7886,
      "step": 789290
    },
    {
      "epoch": 2.76629971926947,
      "grad_norm": 3.453125,
      "learning_rate": 4.327852935297035e-06,
      "loss": 0.7952,
      "step": 789300
    },
    {
      "epoch": 2.7663347667763656,
      "grad_norm": 3.359375,
      "learning_rate": 4.327203906633333e-06,
      "loss": 0.8235,
      "step": 789310
    },
    {
      "epoch": 2.766369814283261,
      "grad_norm": 2.734375,
      "learning_rate": 4.326554877969631e-06,
      "loss": 0.8565,
      "step": 789320
    },
    {
      "epoch": 2.7664048617901567,
      "grad_norm": 3.109375,
      "learning_rate": 4.3259058493059296e-06,
      "loss": 0.8225,
      "step": 789330
    },
    {
      "epoch": 2.766439909297052,
      "grad_norm": 3.046875,
      "learning_rate": 4.325256820642227e-06,
      "loss": 0.8646,
      "step": 789340
    },
    {
      "epoch": 2.7664749568039477,
      "grad_norm": 2.765625,
      "learning_rate": 4.324607791978525e-06,
      "loss": 0.892,
      "step": 789350
    },
    {
      "epoch": 2.7665100043108435,
      "grad_norm": 2.984375,
      "learning_rate": 4.3239587633148236e-06,
      "loss": 0.8026,
      "step": 789360
    },
    {
      "epoch": 2.766545051817739,
      "grad_norm": 2.9375,
      "learning_rate": 4.3233097346511216e-06,
      "loss": 0.8016,
      "step": 789370
    },
    {
      "epoch": 2.7665800993246346,
      "grad_norm": 2.34375,
      "learning_rate": 4.322660705987419e-06,
      "loss": 0.8199,
      "step": 789380
    },
    {
      "epoch": 2.76661514683153,
      "grad_norm": 2.828125,
      "learning_rate": 4.3220116773237176e-06,
      "loss": 0.7982,
      "step": 789390
    },
    {
      "epoch": 2.7666501943384256,
      "grad_norm": 2.859375,
      "learning_rate": 4.3213626486600156e-06,
      "loss": 0.8009,
      "step": 789400
    },
    {
      "epoch": 2.7666852418453214,
      "grad_norm": 2.78125,
      "learning_rate": 4.3207136199963136e-06,
      "loss": 0.818,
      "step": 789410
    },
    {
      "epoch": 2.766720289352217,
      "grad_norm": 2.78125,
      "learning_rate": 4.320064591332612e-06,
      "loss": 0.7334,
      "step": 789420
    },
    {
      "epoch": 2.7667553368591125,
      "grad_norm": 2.9375,
      "learning_rate": 4.3194155626689096e-06,
      "loss": 0.8008,
      "step": 789430
    },
    {
      "epoch": 2.766790384366008,
      "grad_norm": 3.171875,
      "learning_rate": 4.3187665340052076e-06,
      "loss": 0.7472,
      "step": 789440
    },
    {
      "epoch": 2.7668254318729035,
      "grad_norm": 2.5,
      "learning_rate": 4.318117505341506e-06,
      "loss": 0.8072,
      "step": 789450
    },
    {
      "epoch": 2.7668604793797993,
      "grad_norm": 3.265625,
      "learning_rate": 4.317468476677804e-06,
      "loss": 0.8947,
      "step": 789460
    },
    {
      "epoch": 2.766895526886695,
      "grad_norm": 3.109375,
      "learning_rate": 4.316819448014102e-06,
      "loss": 0.7917,
      "step": 789470
    },
    {
      "epoch": 2.7669305743935904,
      "grad_norm": 2.828125,
      "learning_rate": 4.3161704193504e-06,
      "loss": 0.7827,
      "step": 789480
    },
    {
      "epoch": 2.766965621900486,
      "grad_norm": 3.171875,
      "learning_rate": 4.315521390686698e-06,
      "loss": 0.8542,
      "step": 789490
    },
    {
      "epoch": 2.7670006694073814,
      "grad_norm": 2.75,
      "learning_rate": 4.314872362022996e-06,
      "loss": 0.8836,
      "step": 789500
    },
    {
      "epoch": 2.767035716914277,
      "grad_norm": 2.59375,
      "learning_rate": 4.314223333359295e-06,
      "loss": 0.8404,
      "step": 789510
    },
    {
      "epoch": 2.767070764421173,
      "grad_norm": 3.078125,
      "learning_rate": 4.313574304695592e-06,
      "loss": 0.7877,
      "step": 789520
    },
    {
      "epoch": 2.7671058119280687,
      "grad_norm": 2.8125,
      "learning_rate": 4.31292527603189e-06,
      "loss": 0.7722,
      "step": 789530
    },
    {
      "epoch": 2.767140859434964,
      "grad_norm": 3.234375,
      "learning_rate": 4.312276247368189e-06,
      "loss": 0.7664,
      "step": 789540
    },
    {
      "epoch": 2.7671759069418598,
      "grad_norm": 2.515625,
      "learning_rate": 4.311627218704487e-06,
      "loss": 0.7263,
      "step": 789550
    },
    {
      "epoch": 2.767210954448755,
      "grad_norm": 2.6875,
      "learning_rate": 4.310978190040785e-06,
      "loss": 0.7908,
      "step": 789560
    },
    {
      "epoch": 2.767246001955651,
      "grad_norm": 2.75,
      "learning_rate": 4.310329161377083e-06,
      "loss": 0.8001,
      "step": 789570
    },
    {
      "epoch": 2.7672810494625466,
      "grad_norm": 2.46875,
      "learning_rate": 4.309680132713381e-06,
      "loss": 0.8419,
      "step": 789580
    },
    {
      "epoch": 2.767316096969442,
      "grad_norm": 3.140625,
      "learning_rate": 4.309031104049679e-06,
      "loss": 0.7566,
      "step": 789590
    },
    {
      "epoch": 2.7673511444763377,
      "grad_norm": 2.421875,
      "learning_rate": 4.308382075385978e-06,
      "loss": 0.6897,
      "step": 789600
    },
    {
      "epoch": 2.7673861919832334,
      "grad_norm": 2.890625,
      "learning_rate": 4.307733046722276e-06,
      "loss": 0.7952,
      "step": 789610
    },
    {
      "epoch": 2.7674212394901287,
      "grad_norm": 3.171875,
      "learning_rate": 4.307084018058573e-06,
      "loss": 0.7586,
      "step": 789620
    },
    {
      "epoch": 2.7674562869970245,
      "grad_norm": 3.03125,
      "learning_rate": 4.306434989394872e-06,
      "loss": 0.825,
      "step": 789630
    },
    {
      "epoch": 2.7674913345039203,
      "grad_norm": 2.6875,
      "learning_rate": 4.30578596073117e-06,
      "loss": 0.8452,
      "step": 789640
    },
    {
      "epoch": 2.7675263820108156,
      "grad_norm": 2.875,
      "learning_rate": 4.305136932067468e-06,
      "loss": 0.8475,
      "step": 789650
    },
    {
      "epoch": 2.7675614295177113,
      "grad_norm": 2.859375,
      "learning_rate": 4.304487903403766e-06,
      "loss": 0.8105,
      "step": 789660
    },
    {
      "epoch": 2.7675964770246066,
      "grad_norm": 3.234375,
      "learning_rate": 4.303838874740064e-06,
      "loss": 0.855,
      "step": 789670
    },
    {
      "epoch": 2.7676315245315024,
      "grad_norm": 2.78125,
      "learning_rate": 4.303189846076362e-06,
      "loss": 0.7571,
      "step": 789680
    },
    {
      "epoch": 2.767666572038398,
      "grad_norm": 2.578125,
      "learning_rate": 4.302540817412661e-06,
      "loss": 0.8074,
      "step": 789690
    },
    {
      "epoch": 2.767701619545294,
      "grad_norm": 2.65625,
      "learning_rate": 4.301891788748959e-06,
      "loss": 0.891,
      "step": 789700
    },
    {
      "epoch": 2.7677366670521892,
      "grad_norm": 3.359375,
      "learning_rate": 4.301242760085256e-06,
      "loss": 0.8349,
      "step": 789710
    },
    {
      "epoch": 2.767771714559085,
      "grad_norm": 2.703125,
      "learning_rate": 4.300593731421555e-06,
      "loss": 0.8662,
      "step": 789720
    },
    {
      "epoch": 2.7678067620659803,
      "grad_norm": 3.1875,
      "learning_rate": 4.299944702757853e-06,
      "loss": 0.8774,
      "step": 789730
    },
    {
      "epoch": 2.767841809572876,
      "grad_norm": 2.421875,
      "learning_rate": 4.299295674094151e-06,
      "loss": 0.7353,
      "step": 789740
    },
    {
      "epoch": 2.767876857079772,
      "grad_norm": 3.125,
      "learning_rate": 4.298646645430449e-06,
      "loss": 0.7952,
      "step": 789750
    },
    {
      "epoch": 2.767911904586667,
      "grad_norm": 3.109375,
      "learning_rate": 4.297997616766747e-06,
      "loss": 0.795,
      "step": 789760
    },
    {
      "epoch": 2.767946952093563,
      "grad_norm": 4.03125,
      "learning_rate": 4.297348588103045e-06,
      "loss": 0.8331,
      "step": 789770
    },
    {
      "epoch": 2.767981999600458,
      "grad_norm": 2.453125,
      "learning_rate": 4.296699559439343e-06,
      "loss": 0.8242,
      "step": 789780
    },
    {
      "epoch": 2.768017047107354,
      "grad_norm": 2.890625,
      "learning_rate": 4.296050530775642e-06,
      "loss": 0.7808,
      "step": 789790
    },
    {
      "epoch": 2.7680520946142497,
      "grad_norm": 3.15625,
      "learning_rate": 4.29540150211194e-06,
      "loss": 0.8293,
      "step": 789800
    },
    {
      "epoch": 2.7680871421211455,
      "grad_norm": 2.890625,
      "learning_rate": 4.294752473448238e-06,
      "loss": 0.7412,
      "step": 789810
    },
    {
      "epoch": 2.768122189628041,
      "grad_norm": 2.84375,
      "learning_rate": 4.294103444784536e-06,
      "loss": 0.783,
      "step": 789820
    },
    {
      "epoch": 2.7681572371349366,
      "grad_norm": 3.0,
      "learning_rate": 4.293454416120834e-06,
      "loss": 0.8114,
      "step": 789830
    },
    {
      "epoch": 2.768192284641832,
      "grad_norm": 3.046875,
      "learning_rate": 4.292805387457132e-06,
      "loss": 0.872,
      "step": 789840
    },
    {
      "epoch": 2.7682273321487276,
      "grad_norm": 2.703125,
      "learning_rate": 4.29215635879343e-06,
      "loss": 0.7613,
      "step": 789850
    },
    {
      "epoch": 2.7682623796556234,
      "grad_norm": 3.21875,
      "learning_rate": 4.291507330129728e-06,
      "loss": 0.8135,
      "step": 789860
    },
    {
      "epoch": 2.7682974271625187,
      "grad_norm": 2.890625,
      "learning_rate": 4.290858301466026e-06,
      "loss": 0.8397,
      "step": 789870
    },
    {
      "epoch": 2.7683324746694145,
      "grad_norm": 2.5625,
      "learning_rate": 4.290209272802325e-06,
      "loss": 0.7613,
      "step": 789880
    },
    {
      "epoch": 2.7683675221763098,
      "grad_norm": 2.84375,
      "learning_rate": 4.289560244138623e-06,
      "loss": 0.7964,
      "step": 789890
    },
    {
      "epoch": 2.7684025696832055,
      "grad_norm": 3.046875,
      "learning_rate": 4.28891121547492e-06,
      "loss": 0.7732,
      "step": 789900
    },
    {
      "epoch": 2.7684376171901013,
      "grad_norm": 3.140625,
      "learning_rate": 4.288262186811219e-06,
      "loss": 0.8513,
      "step": 789910
    },
    {
      "epoch": 2.768472664696997,
      "grad_norm": 3.140625,
      "learning_rate": 4.287613158147517e-06,
      "loss": 0.8857,
      "step": 789920
    },
    {
      "epoch": 2.7685077122038924,
      "grad_norm": 3.15625,
      "learning_rate": 4.286964129483815e-06,
      "loss": 0.8127,
      "step": 789930
    },
    {
      "epoch": 2.768542759710788,
      "grad_norm": 3.125,
      "learning_rate": 4.2863151008201135e-06,
      "loss": 0.7937,
      "step": 789940
    },
    {
      "epoch": 2.7685778072176834,
      "grad_norm": 2.625,
      "learning_rate": 4.285666072156411e-06,
      "loss": 0.7442,
      "step": 789950
    },
    {
      "epoch": 2.768612854724579,
      "grad_norm": 3.125,
      "learning_rate": 4.285017043492709e-06,
      "loss": 0.7974,
      "step": 789960
    },
    {
      "epoch": 2.768647902231475,
      "grad_norm": 3.09375,
      "learning_rate": 4.2843680148290075e-06,
      "loss": 0.8259,
      "step": 789970
    },
    {
      "epoch": 2.7686829497383703,
      "grad_norm": 3.046875,
      "learning_rate": 4.2837189861653055e-06,
      "loss": 0.8367,
      "step": 789980
    },
    {
      "epoch": 2.768717997245266,
      "grad_norm": 2.765625,
      "learning_rate": 4.283069957501603e-06,
      "loss": 0.8277,
      "step": 789990
    },
    {
      "epoch": 2.7687530447521613,
      "grad_norm": 3.609375,
      "learning_rate": 4.2824209288379015e-06,
      "loss": 0.7694,
      "step": 790000
    },
    {
      "epoch": 2.7687530447521613,
      "eval_loss": 0.7520397901535034,
      "eval_runtime": 550.547,
      "eval_samples_per_second": 691.015,
      "eval_steps_per_second": 57.585,
      "step": 790000
    },
    {
      "epoch": 2.768788092259057,
      "grad_norm": 3.046875,
      "learning_rate": 4.2817719001741995e-06,
      "loss": 0.8101,
      "step": 790010
    },
    {
      "epoch": 2.768823139765953,
      "grad_norm": 2.96875,
      "learning_rate": 4.2811228715104975e-06,
      "loss": 0.7891,
      "step": 790020
    },
    {
      "epoch": 2.7688581872728486,
      "grad_norm": 3.15625,
      "learning_rate": 4.280473842846796e-06,
      "loss": 0.8672,
      "step": 790030
    },
    {
      "epoch": 2.768893234779744,
      "grad_norm": 2.421875,
      "learning_rate": 4.2798248141830935e-06,
      "loss": 0.8435,
      "step": 790040
    },
    {
      "epoch": 2.7689282822866397,
      "grad_norm": 2.625,
      "learning_rate": 4.2791757855193915e-06,
      "loss": 0.7386,
      "step": 790050
    },
    {
      "epoch": 2.768963329793535,
      "grad_norm": 2.953125,
      "learning_rate": 4.27852675685569e-06,
      "loss": 0.8062,
      "step": 790060
    },
    {
      "epoch": 2.7689983773004307,
      "grad_norm": 2.71875,
      "learning_rate": 4.277877728191988e-06,
      "loss": 0.842,
      "step": 790070
    },
    {
      "epoch": 2.7690334248073265,
      "grad_norm": 3.265625,
      "learning_rate": 4.277228699528286e-06,
      "loss": 0.7672,
      "step": 790080
    },
    {
      "epoch": 2.769068472314222,
      "grad_norm": 2.875,
      "learning_rate": 4.276579670864584e-06,
      "loss": 0.8175,
      "step": 790090
    },
    {
      "epoch": 2.7691035198211176,
      "grad_norm": 2.734375,
      "learning_rate": 4.275930642200882e-06,
      "loss": 0.7972,
      "step": 790100
    },
    {
      "epoch": 2.769138567328013,
      "grad_norm": 3.03125,
      "learning_rate": 4.27528161353718e-06,
      "loss": 0.8128,
      "step": 790110
    },
    {
      "epoch": 2.7691736148349086,
      "grad_norm": 2.609375,
      "learning_rate": 4.274632584873479e-06,
      "loss": 0.7887,
      "step": 790120
    },
    {
      "epoch": 2.7692086623418044,
      "grad_norm": 2.9375,
      "learning_rate": 4.273983556209776e-06,
      "loss": 0.765,
      "step": 790130
    },
    {
      "epoch": 2.7692437098487,
      "grad_norm": 2.515625,
      "learning_rate": 4.273334527546074e-06,
      "loss": 0.8421,
      "step": 790140
    },
    {
      "epoch": 2.7692787573555955,
      "grad_norm": 3.34375,
      "learning_rate": 4.272685498882373e-06,
      "loss": 0.8213,
      "step": 790150
    },
    {
      "epoch": 2.7693138048624912,
      "grad_norm": 2.71875,
      "learning_rate": 4.272036470218671e-06,
      "loss": 0.8114,
      "step": 790160
    },
    {
      "epoch": 2.7693488523693865,
      "grad_norm": 2.734375,
      "learning_rate": 4.271387441554969e-06,
      "loss": 0.7435,
      "step": 790170
    },
    {
      "epoch": 2.7693838998762823,
      "grad_norm": 3.0,
      "learning_rate": 4.270738412891267e-06,
      "loss": 0.7797,
      "step": 790180
    },
    {
      "epoch": 2.769418947383178,
      "grad_norm": 3.375,
      "learning_rate": 4.270089384227565e-06,
      "loss": 0.7225,
      "step": 790190
    },
    {
      "epoch": 2.7694539948900734,
      "grad_norm": 3.015625,
      "learning_rate": 4.269440355563863e-06,
      "loss": 0.8796,
      "step": 790200
    },
    {
      "epoch": 2.769489042396969,
      "grad_norm": 2.78125,
      "learning_rate": 4.268791326900161e-06,
      "loss": 0.8401,
      "step": 790210
    },
    {
      "epoch": 2.7695240899038644,
      "grad_norm": 3.265625,
      "learning_rate": 4.26814229823646e-06,
      "loss": 0.8049,
      "step": 790220
    },
    {
      "epoch": 2.76955913741076,
      "grad_norm": 3.15625,
      "learning_rate": 4.267493269572757e-06,
      "loss": 0.768,
      "step": 790230
    },
    {
      "epoch": 2.769594184917656,
      "grad_norm": 3.03125,
      "learning_rate": 4.266844240909056e-06,
      "loss": 0.846,
      "step": 790240
    },
    {
      "epoch": 2.7696292324245517,
      "grad_norm": 3.109375,
      "learning_rate": 4.266195212245354e-06,
      "loss": 0.8231,
      "step": 790250
    },
    {
      "epoch": 2.769664279931447,
      "grad_norm": 3.140625,
      "learning_rate": 4.265546183581652e-06,
      "loss": 0.824,
      "step": 790260
    },
    {
      "epoch": 2.769699327438343,
      "grad_norm": 3.390625,
      "learning_rate": 4.26489715491795e-06,
      "loss": 0.7842,
      "step": 790270
    },
    {
      "epoch": 2.769734374945238,
      "grad_norm": 2.25,
      "learning_rate": 4.264248126254248e-06,
      "loss": 0.8126,
      "step": 790280
    },
    {
      "epoch": 2.769769422452134,
      "grad_norm": 2.578125,
      "learning_rate": 4.263599097590546e-06,
      "loss": 0.7931,
      "step": 790290
    },
    {
      "epoch": 2.7698044699590296,
      "grad_norm": 3.046875,
      "learning_rate": 4.262950068926844e-06,
      "loss": 0.8347,
      "step": 790300
    },
    {
      "epoch": 2.769839517465925,
      "grad_norm": 2.46875,
      "learning_rate": 4.262301040263143e-06,
      "loss": 0.8397,
      "step": 790310
    },
    {
      "epoch": 2.7698745649728207,
      "grad_norm": 2.515625,
      "learning_rate": 4.26165201159944e-06,
      "loss": 0.8349,
      "step": 790320
    },
    {
      "epoch": 2.769909612479716,
      "grad_norm": 3.03125,
      "learning_rate": 4.261002982935738e-06,
      "loss": 0.7254,
      "step": 790330
    },
    {
      "epoch": 2.7699446599866118,
      "grad_norm": 3.1875,
      "learning_rate": 4.260353954272037e-06,
      "loss": 0.7518,
      "step": 790340
    },
    {
      "epoch": 2.7699797074935075,
      "grad_norm": 2.65625,
      "learning_rate": 4.259704925608335e-06,
      "loss": 0.749,
      "step": 790350
    },
    {
      "epoch": 2.7700147550004033,
      "grad_norm": 2.96875,
      "learning_rate": 4.259055896944633e-06,
      "loss": 0.7923,
      "step": 790360
    },
    {
      "epoch": 2.7700498025072986,
      "grad_norm": 2.921875,
      "learning_rate": 4.258406868280931e-06,
      "loss": 0.8213,
      "step": 790370
    },
    {
      "epoch": 2.7700848500141944,
      "grad_norm": 2.75,
      "learning_rate": 4.257757839617229e-06,
      "loss": 0.7852,
      "step": 790380
    },
    {
      "epoch": 2.7701198975210897,
      "grad_norm": 2.703125,
      "learning_rate": 4.257108810953527e-06,
      "loss": 0.8002,
      "step": 790390
    },
    {
      "epoch": 2.7701549450279854,
      "grad_norm": 3.3125,
      "learning_rate": 4.256459782289826e-06,
      "loss": 0.8113,
      "step": 790400
    },
    {
      "epoch": 2.770189992534881,
      "grad_norm": 2.890625,
      "learning_rate": 4.255810753626124e-06,
      "loss": 0.8142,
      "step": 790410
    },
    {
      "epoch": 2.7702250400417765,
      "grad_norm": 2.984375,
      "learning_rate": 4.255161724962421e-06,
      "loss": 0.8667,
      "step": 790420
    },
    {
      "epoch": 2.7702600875486723,
      "grad_norm": 3.125,
      "learning_rate": 4.25451269629872e-06,
      "loss": 0.7501,
      "step": 790430
    },
    {
      "epoch": 2.7702951350555676,
      "grad_norm": 2.484375,
      "learning_rate": 4.253863667635018e-06,
      "loss": 0.7483,
      "step": 790440
    },
    {
      "epoch": 2.7703301825624633,
      "grad_norm": 3.03125,
      "learning_rate": 4.253214638971316e-06,
      "loss": 0.784,
      "step": 790450
    },
    {
      "epoch": 2.770365230069359,
      "grad_norm": 2.859375,
      "learning_rate": 4.252565610307614e-06,
      "loss": 0.8096,
      "step": 790460
    },
    {
      "epoch": 2.770400277576255,
      "grad_norm": 2.859375,
      "learning_rate": 4.251916581643912e-06,
      "loss": 0.7766,
      "step": 790470
    },
    {
      "epoch": 2.77043532508315,
      "grad_norm": 2.671875,
      "learning_rate": 4.25126755298021e-06,
      "loss": 0.8094,
      "step": 790480
    },
    {
      "epoch": 2.770470372590046,
      "grad_norm": 2.9375,
      "learning_rate": 4.2506185243165086e-06,
      "loss": 0.7946,
      "step": 790490
    },
    {
      "epoch": 2.7705054200969412,
      "grad_norm": 3.078125,
      "learning_rate": 4.2499694956528066e-06,
      "loss": 0.8043,
      "step": 790500
    },
    {
      "epoch": 2.770540467603837,
      "grad_norm": 3.25,
      "learning_rate": 4.249320466989104e-06,
      "loss": 0.8333,
      "step": 790510
    },
    {
      "epoch": 2.7705755151107327,
      "grad_norm": 2.796875,
      "learning_rate": 4.2486714383254026e-06,
      "loss": 0.7539,
      "step": 790520
    },
    {
      "epoch": 2.770610562617628,
      "grad_norm": 2.734375,
      "learning_rate": 4.2480224096617006e-06,
      "loss": 0.8858,
      "step": 790530
    },
    {
      "epoch": 2.770645610124524,
      "grad_norm": 2.65625,
      "learning_rate": 4.2473733809979986e-06,
      "loss": 0.7811,
      "step": 790540
    },
    {
      "epoch": 2.770680657631419,
      "grad_norm": 2.9375,
      "learning_rate": 4.246724352334297e-06,
      "loss": 0.7195,
      "step": 790550
    },
    {
      "epoch": 2.770715705138315,
      "grad_norm": 3.234375,
      "learning_rate": 4.2460753236705946e-06,
      "loss": 0.7979,
      "step": 790560
    },
    {
      "epoch": 2.7707507526452106,
      "grad_norm": 2.796875,
      "learning_rate": 4.2454262950068926e-06,
      "loss": 0.8515,
      "step": 790570
    },
    {
      "epoch": 2.7707858001521064,
      "grad_norm": 2.5625,
      "learning_rate": 4.244777266343191e-06,
      "loss": 0.8235,
      "step": 790580
    },
    {
      "epoch": 2.7708208476590017,
      "grad_norm": 3.03125,
      "learning_rate": 4.244128237679489e-06,
      "loss": 0.817,
      "step": 790590
    },
    {
      "epoch": 2.7708558951658975,
      "grad_norm": 2.71875,
      "learning_rate": 4.243479209015787e-06,
      "loss": 0.7306,
      "step": 790600
    },
    {
      "epoch": 2.770890942672793,
      "grad_norm": 3.0,
      "learning_rate": 4.242830180352085e-06,
      "loss": 0.781,
      "step": 790610
    },
    {
      "epoch": 2.7709259901796885,
      "grad_norm": 2.609375,
      "learning_rate": 4.242181151688383e-06,
      "loss": 0.8103,
      "step": 790620
    },
    {
      "epoch": 2.7709610376865843,
      "grad_norm": 3.03125,
      "learning_rate": 4.241532123024681e-06,
      "loss": 0.7815,
      "step": 790630
    },
    {
      "epoch": 2.7709960851934796,
      "grad_norm": 2.890625,
      "learning_rate": 4.240883094360979e-06,
      "loss": 0.8004,
      "step": 790640
    },
    {
      "epoch": 2.7710311327003754,
      "grad_norm": 2.734375,
      "learning_rate": 4.240234065697277e-06,
      "loss": 0.8725,
      "step": 790650
    },
    {
      "epoch": 2.7710661802072707,
      "grad_norm": 2.9375,
      "learning_rate": 4.239585037033575e-06,
      "loss": 0.7796,
      "step": 790660
    },
    {
      "epoch": 2.7711012277141664,
      "grad_norm": 2.40625,
      "learning_rate": 4.238936008369874e-06,
      "loss": 0.8009,
      "step": 790670
    },
    {
      "epoch": 2.771136275221062,
      "grad_norm": 2.671875,
      "learning_rate": 4.238286979706172e-06,
      "loss": 0.8484,
      "step": 790680
    },
    {
      "epoch": 2.771171322727958,
      "grad_norm": 2.953125,
      "learning_rate": 4.23763795104247e-06,
      "loss": 0.7192,
      "step": 790690
    },
    {
      "epoch": 2.7712063702348533,
      "grad_norm": 2.71875,
      "learning_rate": 4.236988922378768e-06,
      "loss": 0.7104,
      "step": 790700
    },
    {
      "epoch": 2.771241417741749,
      "grad_norm": 2.734375,
      "learning_rate": 4.236339893715066e-06,
      "loss": 0.8315,
      "step": 790710
    },
    {
      "epoch": 2.7712764652486443,
      "grad_norm": 3.078125,
      "learning_rate": 4.235690865051364e-06,
      "loss": 0.9072,
      "step": 790720
    },
    {
      "epoch": 2.77131151275554,
      "grad_norm": 2.859375,
      "learning_rate": 4.235041836387662e-06,
      "loss": 0.7994,
      "step": 790730
    },
    {
      "epoch": 2.771346560262436,
      "grad_norm": 3.125,
      "learning_rate": 4.234392807723961e-06,
      "loss": 0.7871,
      "step": 790740
    },
    {
      "epoch": 2.771381607769331,
      "grad_norm": 2.875,
      "learning_rate": 4.233743779060258e-06,
      "loss": 0.6833,
      "step": 790750
    },
    {
      "epoch": 2.771416655276227,
      "grad_norm": 2.84375,
      "learning_rate": 4.233094750396556e-06,
      "loss": 0.8127,
      "step": 790760
    },
    {
      "epoch": 2.7714517027831223,
      "grad_norm": 3.328125,
      "learning_rate": 4.232445721732855e-06,
      "loss": 0.9203,
      "step": 790770
    },
    {
      "epoch": 2.771486750290018,
      "grad_norm": 2.53125,
      "learning_rate": 4.231796693069153e-06,
      "loss": 0.8585,
      "step": 790780
    },
    {
      "epoch": 2.7715217977969138,
      "grad_norm": 2.921875,
      "learning_rate": 4.231147664405451e-06,
      "loss": 0.8132,
      "step": 790790
    },
    {
      "epoch": 2.7715568453038095,
      "grad_norm": 3.390625,
      "learning_rate": 4.230498635741749e-06,
      "loss": 0.7971,
      "step": 790800
    },
    {
      "epoch": 2.771591892810705,
      "grad_norm": 2.96875,
      "learning_rate": 4.229849607078047e-06,
      "loss": 0.7974,
      "step": 790810
    },
    {
      "epoch": 2.7716269403176006,
      "grad_norm": 2.5,
      "learning_rate": 4.229200578414345e-06,
      "loss": 0.6887,
      "step": 790820
    },
    {
      "epoch": 2.771661987824496,
      "grad_norm": 2.609375,
      "learning_rate": 4.228551549750644e-06,
      "loss": 0.7742,
      "step": 790830
    },
    {
      "epoch": 2.7716970353313917,
      "grad_norm": 2.59375,
      "learning_rate": 4.227902521086941e-06,
      "loss": 0.8169,
      "step": 790840
    },
    {
      "epoch": 2.7717320828382874,
      "grad_norm": 2.609375,
      "learning_rate": 4.227253492423239e-06,
      "loss": 0.8362,
      "step": 790850
    },
    {
      "epoch": 2.7717671303451827,
      "grad_norm": 2.46875,
      "learning_rate": 4.226604463759538e-06,
      "loss": 0.8015,
      "step": 790860
    },
    {
      "epoch": 2.7718021778520785,
      "grad_norm": 3.046875,
      "learning_rate": 4.225955435095836e-06,
      "loss": 0.7735,
      "step": 790870
    },
    {
      "epoch": 2.7718372253589743,
      "grad_norm": 2.5,
      "learning_rate": 4.225306406432134e-06,
      "loss": 0.8546,
      "step": 790880
    },
    {
      "epoch": 2.7718722728658696,
      "grad_norm": 2.6875,
      "learning_rate": 4.224657377768432e-06,
      "loss": 0.7842,
      "step": 790890
    },
    {
      "epoch": 2.7719073203727653,
      "grad_norm": 2.9375,
      "learning_rate": 4.22400834910473e-06,
      "loss": 0.7974,
      "step": 790900
    },
    {
      "epoch": 2.771942367879661,
      "grad_norm": 2.625,
      "learning_rate": 4.223359320441028e-06,
      "loss": 0.7542,
      "step": 790910
    },
    {
      "epoch": 2.7719774153865564,
      "grad_norm": 2.875,
      "learning_rate": 4.222710291777327e-06,
      "loss": 0.7727,
      "step": 790920
    },
    {
      "epoch": 2.772012462893452,
      "grad_norm": 2.609375,
      "learning_rate": 4.222061263113624e-06,
      "loss": 0.7924,
      "step": 790930
    },
    {
      "epoch": 2.7720475104003475,
      "grad_norm": 2.296875,
      "learning_rate": 4.221412234449922e-06,
      "loss": 0.7237,
      "step": 790940
    },
    {
      "epoch": 2.7720825579072432,
      "grad_norm": 3.75,
      "learning_rate": 4.220763205786221e-06,
      "loss": 0.8069,
      "step": 790950
    },
    {
      "epoch": 2.772117605414139,
      "grad_norm": 2.6875,
      "learning_rate": 4.220114177122519e-06,
      "loss": 0.8014,
      "step": 790960
    },
    {
      "epoch": 2.7721526529210343,
      "grad_norm": 2.9375,
      "learning_rate": 4.219465148458817e-06,
      "loss": 0.8687,
      "step": 790970
    },
    {
      "epoch": 2.77218770042793,
      "grad_norm": 3.21875,
      "learning_rate": 4.218816119795115e-06,
      "loss": 0.8037,
      "step": 790980
    },
    {
      "epoch": 2.772222747934826,
      "grad_norm": 2.703125,
      "learning_rate": 4.218167091131413e-06,
      "loss": 0.754,
      "step": 790990
    },
    {
      "epoch": 2.772257795441721,
      "grad_norm": 3.140625,
      "learning_rate": 4.217518062467711e-06,
      "loss": 0.8915,
      "step": 791000
    },
    {
      "epoch": 2.772292842948617,
      "grad_norm": 3.171875,
      "learning_rate": 4.21686903380401e-06,
      "loss": 0.7541,
      "step": 791010
    },
    {
      "epoch": 2.7723278904555126,
      "grad_norm": 3.640625,
      "learning_rate": 4.216220005140308e-06,
      "loss": 0.8325,
      "step": 791020
    },
    {
      "epoch": 2.772362937962408,
      "grad_norm": 2.8125,
      "learning_rate": 4.215570976476605e-06,
      "loss": 0.7882,
      "step": 791030
    },
    {
      "epoch": 2.7723979854693037,
      "grad_norm": 2.84375,
      "learning_rate": 4.214921947812904e-06,
      "loss": 0.7611,
      "step": 791040
    },
    {
      "epoch": 2.772433032976199,
      "grad_norm": 2.59375,
      "learning_rate": 4.214272919149202e-06,
      "loss": 0.7211,
      "step": 791050
    },
    {
      "epoch": 2.772468080483095,
      "grad_norm": 3.1875,
      "learning_rate": 4.2136238904855e-06,
      "loss": 0.7737,
      "step": 791060
    },
    {
      "epoch": 2.7725031279899905,
      "grad_norm": 2.609375,
      "learning_rate": 4.212974861821798e-06,
      "loss": 0.7685,
      "step": 791070
    },
    {
      "epoch": 2.7725381754968863,
      "grad_norm": 2.921875,
      "learning_rate": 4.212325833158096e-06,
      "loss": 0.7477,
      "step": 791080
    },
    {
      "epoch": 2.7725732230037816,
      "grad_norm": 2.796875,
      "learning_rate": 4.211676804494394e-06,
      "loss": 0.7745,
      "step": 791090
    },
    {
      "epoch": 2.7726082705106774,
      "grad_norm": 3.375,
      "learning_rate": 4.2110277758306925e-06,
      "loss": 0.8329,
      "step": 791100
    },
    {
      "epoch": 2.7726433180175727,
      "grad_norm": 2.578125,
      "learning_rate": 4.2103787471669905e-06,
      "loss": 0.8085,
      "step": 791110
    },
    {
      "epoch": 2.7726783655244684,
      "grad_norm": 2.390625,
      "learning_rate": 4.209729718503288e-06,
      "loss": 0.7679,
      "step": 791120
    },
    {
      "epoch": 2.772713413031364,
      "grad_norm": 3.296875,
      "learning_rate": 4.2090806898395865e-06,
      "loss": 0.8978,
      "step": 791130
    },
    {
      "epoch": 2.7727484605382595,
      "grad_norm": 2.5,
      "learning_rate": 4.2084316611758845e-06,
      "loss": 0.7694,
      "step": 791140
    },
    {
      "epoch": 2.7727835080451553,
      "grad_norm": 3.15625,
      "learning_rate": 4.2077826325121825e-06,
      "loss": 0.8722,
      "step": 791150
    },
    {
      "epoch": 2.7728185555520506,
      "grad_norm": 3.140625,
      "learning_rate": 4.2071336038484805e-06,
      "loss": 0.7813,
      "step": 791160
    },
    {
      "epoch": 2.7728536030589463,
      "grad_norm": 3.046875,
      "learning_rate": 4.2064845751847785e-06,
      "loss": 0.8702,
      "step": 791170
    },
    {
      "epoch": 2.772888650565842,
      "grad_norm": 2.59375,
      "learning_rate": 4.2058355465210765e-06,
      "loss": 0.8496,
      "step": 791180
    },
    {
      "epoch": 2.772923698072738,
      "grad_norm": 2.390625,
      "learning_rate": 4.2051865178573745e-06,
      "loss": 0.8443,
      "step": 791190
    },
    {
      "epoch": 2.772958745579633,
      "grad_norm": 2.953125,
      "learning_rate": 4.204537489193673e-06,
      "loss": 0.7859,
      "step": 791200
    },
    {
      "epoch": 2.772993793086529,
      "grad_norm": 2.703125,
      "learning_rate": 4.203888460529971e-06,
      "loss": 0.7481,
      "step": 791210
    },
    {
      "epoch": 2.7730288405934242,
      "grad_norm": 2.75,
      "learning_rate": 4.203239431866269e-06,
      "loss": 0.8024,
      "step": 791220
    },
    {
      "epoch": 2.77306388810032,
      "grad_norm": 2.65625,
      "learning_rate": 4.202590403202567e-06,
      "loss": 0.8815,
      "step": 791230
    },
    {
      "epoch": 2.7730989356072158,
      "grad_norm": 2.765625,
      "learning_rate": 4.201941374538865e-06,
      "loss": 0.8237,
      "step": 791240
    },
    {
      "epoch": 2.773133983114111,
      "grad_norm": 2.71875,
      "learning_rate": 4.201292345875163e-06,
      "loss": 0.8841,
      "step": 791250
    },
    {
      "epoch": 2.773169030621007,
      "grad_norm": 3.015625,
      "learning_rate": 4.200643317211461e-06,
      "loss": 0.8653,
      "step": 791260
    },
    {
      "epoch": 2.773204078127902,
      "grad_norm": 2.71875,
      "learning_rate": 4.199994288547759e-06,
      "loss": 0.7813,
      "step": 791270
    },
    {
      "epoch": 2.773239125634798,
      "grad_norm": 3.1875,
      "learning_rate": 4.199345259884057e-06,
      "loss": 0.8514,
      "step": 791280
    },
    {
      "epoch": 2.7732741731416937,
      "grad_norm": 2.859375,
      "learning_rate": 4.198696231220356e-06,
      "loss": 0.8144,
      "step": 791290
    },
    {
      "epoch": 2.7733092206485894,
      "grad_norm": 3.0,
      "learning_rate": 4.198047202556654e-06,
      "loss": 0.7397,
      "step": 791300
    },
    {
      "epoch": 2.7733442681554847,
      "grad_norm": 3.234375,
      "learning_rate": 4.197398173892951e-06,
      "loss": 0.8422,
      "step": 791310
    },
    {
      "epoch": 2.7733793156623805,
      "grad_norm": 3.171875,
      "learning_rate": 4.19674914522925e-06,
      "loss": 0.8447,
      "step": 791320
    },
    {
      "epoch": 2.773414363169276,
      "grad_norm": 2.875,
      "learning_rate": 4.196100116565548e-06,
      "loss": 0.7989,
      "step": 791330
    },
    {
      "epoch": 2.7734494106761716,
      "grad_norm": 3.015625,
      "learning_rate": 4.195451087901846e-06,
      "loss": 0.8254,
      "step": 791340
    },
    {
      "epoch": 2.7734844581830673,
      "grad_norm": 3.4375,
      "learning_rate": 4.194802059238145e-06,
      "loss": 0.8245,
      "step": 791350
    },
    {
      "epoch": 2.7735195056899626,
      "grad_norm": 2.90625,
      "learning_rate": 4.194153030574442e-06,
      "loss": 0.7157,
      "step": 791360
    },
    {
      "epoch": 2.7735545531968584,
      "grad_norm": 3.203125,
      "learning_rate": 4.19350400191074e-06,
      "loss": 0.8331,
      "step": 791370
    },
    {
      "epoch": 2.7735896007037537,
      "grad_norm": 2.84375,
      "learning_rate": 4.192854973247039e-06,
      "loss": 0.7443,
      "step": 791380
    },
    {
      "epoch": 2.7736246482106495,
      "grad_norm": 2.984375,
      "learning_rate": 4.192205944583337e-06,
      "loss": 0.7897,
      "step": 791390
    },
    {
      "epoch": 2.7736596957175452,
      "grad_norm": 3.09375,
      "learning_rate": 4.191556915919634e-06,
      "loss": 0.7871,
      "step": 791400
    },
    {
      "epoch": 2.773694743224441,
      "grad_norm": 2.8125,
      "learning_rate": 4.190907887255933e-06,
      "loss": 0.7894,
      "step": 791410
    },
    {
      "epoch": 2.7737297907313363,
      "grad_norm": 2.90625,
      "learning_rate": 4.190258858592231e-06,
      "loss": 0.7687,
      "step": 791420
    },
    {
      "epoch": 2.773764838238232,
      "grad_norm": 3.109375,
      "learning_rate": 4.189609829928529e-06,
      "loss": 0.8569,
      "step": 791430
    },
    {
      "epoch": 2.7737998857451274,
      "grad_norm": 2.78125,
      "learning_rate": 4.188960801264828e-06,
      "loss": 0.7831,
      "step": 791440
    },
    {
      "epoch": 2.773834933252023,
      "grad_norm": 2.5,
      "learning_rate": 4.188311772601125e-06,
      "loss": 0.8194,
      "step": 791450
    },
    {
      "epoch": 2.773869980758919,
      "grad_norm": 3.0,
      "learning_rate": 4.187662743937423e-06,
      "loss": 0.7119,
      "step": 791460
    },
    {
      "epoch": 2.773905028265814,
      "grad_norm": 3.6875,
      "learning_rate": 4.187013715273722e-06,
      "loss": 0.8829,
      "step": 791470
    },
    {
      "epoch": 2.77394007577271,
      "grad_norm": 2.921875,
      "learning_rate": 4.18636468661002e-06,
      "loss": 0.8009,
      "step": 791480
    },
    {
      "epoch": 2.7739751232796053,
      "grad_norm": 2.6875,
      "learning_rate": 4.185715657946318e-06,
      "loss": 0.7523,
      "step": 791490
    },
    {
      "epoch": 2.774010170786501,
      "grad_norm": 2.8125,
      "learning_rate": 4.185066629282616e-06,
      "loss": 0.7528,
      "step": 791500
    },
    {
      "epoch": 2.774045218293397,
      "grad_norm": 3.40625,
      "learning_rate": 4.184417600618914e-06,
      "loss": 0.7997,
      "step": 791510
    },
    {
      "epoch": 2.7740802658002925,
      "grad_norm": 2.625,
      "learning_rate": 4.183768571955212e-06,
      "loss": 0.8528,
      "step": 791520
    },
    {
      "epoch": 2.774115313307188,
      "grad_norm": 2.765625,
      "learning_rate": 4.183119543291511e-06,
      "loss": 0.7628,
      "step": 791530
    },
    {
      "epoch": 2.7741503608140836,
      "grad_norm": 2.90625,
      "learning_rate": 4.182470514627809e-06,
      "loss": 0.8061,
      "step": 791540
    },
    {
      "epoch": 2.774185408320979,
      "grad_norm": 2.640625,
      "learning_rate": 4.181821485964106e-06,
      "loss": 0.8476,
      "step": 791550
    },
    {
      "epoch": 2.7742204558278747,
      "grad_norm": 2.765625,
      "learning_rate": 4.181172457300405e-06,
      "loss": 0.758,
      "step": 791560
    },
    {
      "epoch": 2.7742555033347704,
      "grad_norm": 2.75,
      "learning_rate": 4.180523428636703e-06,
      "loss": 0.7969,
      "step": 791570
    },
    {
      "epoch": 2.7742905508416658,
      "grad_norm": 3.046875,
      "learning_rate": 4.179874399973001e-06,
      "loss": 0.8482,
      "step": 791580
    },
    {
      "epoch": 2.7743255983485615,
      "grad_norm": 2.8125,
      "learning_rate": 4.179225371309299e-06,
      "loss": 0.7616,
      "step": 791590
    },
    {
      "epoch": 2.774360645855457,
      "grad_norm": 2.796875,
      "learning_rate": 4.178576342645597e-06,
      "loss": 0.7841,
      "step": 791600
    },
    {
      "epoch": 2.7743956933623526,
      "grad_norm": 2.6875,
      "learning_rate": 4.177927313981895e-06,
      "loss": 0.8536,
      "step": 791610
    },
    {
      "epoch": 2.7744307408692483,
      "grad_norm": 2.984375,
      "learning_rate": 4.177278285318193e-06,
      "loss": 0.7334,
      "step": 791620
    },
    {
      "epoch": 2.774465788376144,
      "grad_norm": 2.84375,
      "learning_rate": 4.1766292566544916e-06,
      "loss": 0.7658,
      "step": 791630
    },
    {
      "epoch": 2.7745008358830394,
      "grad_norm": 2.859375,
      "learning_rate": 4.175980227990789e-06,
      "loss": 0.8046,
      "step": 791640
    },
    {
      "epoch": 2.774535883389935,
      "grad_norm": 2.765625,
      "learning_rate": 4.1753311993270876e-06,
      "loss": 0.72,
      "step": 791650
    },
    {
      "epoch": 2.7745709308968305,
      "grad_norm": 3.21875,
      "learning_rate": 4.1746821706633856e-06,
      "loss": 0.7991,
      "step": 791660
    },
    {
      "epoch": 2.7746059784037262,
      "grad_norm": 2.890625,
      "learning_rate": 4.1740331419996836e-06,
      "loss": 0.8294,
      "step": 791670
    },
    {
      "epoch": 2.774641025910622,
      "grad_norm": 2.4375,
      "learning_rate": 4.1733841133359816e-06,
      "loss": 0.7701,
      "step": 791680
    },
    {
      "epoch": 2.7746760734175173,
      "grad_norm": 2.890625,
      "learning_rate": 4.1727350846722796e-06,
      "loss": 0.8257,
      "step": 791690
    },
    {
      "epoch": 2.774711120924413,
      "grad_norm": 2.625,
      "learning_rate": 4.1720860560085776e-06,
      "loss": 0.768,
      "step": 791700
    },
    {
      "epoch": 2.7747461684313084,
      "grad_norm": 2.75,
      "learning_rate": 4.1714370273448756e-06,
      "loss": 0.7339,
      "step": 791710
    },
    {
      "epoch": 2.774781215938204,
      "grad_norm": 3.234375,
      "learning_rate": 4.170787998681174e-06,
      "loss": 0.7784,
      "step": 791720
    },
    {
      "epoch": 2.7748162634451,
      "grad_norm": 3.0,
      "learning_rate": 4.1701389700174716e-06,
      "loss": 0.8231,
      "step": 791730
    },
    {
      "epoch": 2.7748513109519957,
      "grad_norm": 2.71875,
      "learning_rate": 4.1694899413537696e-06,
      "loss": 0.6948,
      "step": 791740
    },
    {
      "epoch": 2.774886358458891,
      "grad_norm": 3.40625,
      "learning_rate": 4.168840912690068e-06,
      "loss": 0.8317,
      "step": 791750
    },
    {
      "epoch": 2.7749214059657867,
      "grad_norm": 2.890625,
      "learning_rate": 4.168191884026366e-06,
      "loss": 0.7671,
      "step": 791760
    },
    {
      "epoch": 2.774956453472682,
      "grad_norm": 2.78125,
      "learning_rate": 4.167542855362664e-06,
      "loss": 0.8034,
      "step": 791770
    },
    {
      "epoch": 2.774991500979578,
      "grad_norm": 2.78125,
      "learning_rate": 4.166893826698962e-06,
      "loss": 0.8848,
      "step": 791780
    },
    {
      "epoch": 2.7750265484864736,
      "grad_norm": 3.125,
      "learning_rate": 4.16624479803526e-06,
      "loss": 0.8308,
      "step": 791790
    },
    {
      "epoch": 2.775061595993369,
      "grad_norm": 3.046875,
      "learning_rate": 4.165595769371558e-06,
      "loss": 0.8163,
      "step": 791800
    },
    {
      "epoch": 2.7750966435002646,
      "grad_norm": 2.9375,
      "learning_rate": 4.164946740707857e-06,
      "loss": 0.7726,
      "step": 791810
    },
    {
      "epoch": 2.77513169100716,
      "grad_norm": 2.875,
      "learning_rate": 4.164297712044155e-06,
      "loss": 0.7929,
      "step": 791820
    },
    {
      "epoch": 2.7751667385140557,
      "grad_norm": 3.125,
      "learning_rate": 4.163648683380452e-06,
      "loss": 0.7787,
      "step": 791830
    },
    {
      "epoch": 2.7752017860209515,
      "grad_norm": 2.9375,
      "learning_rate": 4.162999654716751e-06,
      "loss": 0.7743,
      "step": 791840
    },
    {
      "epoch": 2.7752368335278472,
      "grad_norm": 3.0625,
      "learning_rate": 4.162350626053049e-06,
      "loss": 0.7883,
      "step": 791850
    },
    {
      "epoch": 2.7752718810347425,
      "grad_norm": 2.75,
      "learning_rate": 4.161701597389347e-06,
      "loss": 0.8142,
      "step": 791860
    },
    {
      "epoch": 2.7753069285416383,
      "grad_norm": 2.5625,
      "learning_rate": 4.161052568725645e-06,
      "loss": 0.7839,
      "step": 791870
    },
    {
      "epoch": 2.7753419760485336,
      "grad_norm": 2.859375,
      "learning_rate": 4.160403540061943e-06,
      "loss": 0.8398,
      "step": 791880
    },
    {
      "epoch": 2.7753770235554294,
      "grad_norm": 2.96875,
      "learning_rate": 4.159754511398241e-06,
      "loss": 0.8323,
      "step": 791890
    },
    {
      "epoch": 2.775412071062325,
      "grad_norm": 3.25,
      "learning_rate": 4.15910548273454e-06,
      "loss": 0.9034,
      "step": 791900
    },
    {
      "epoch": 2.7754471185692204,
      "grad_norm": 3.171875,
      "learning_rate": 4.158456454070838e-06,
      "loss": 0.8839,
      "step": 791910
    },
    {
      "epoch": 2.775482166076116,
      "grad_norm": 3.109375,
      "learning_rate": 4.157807425407135e-06,
      "loss": 0.8431,
      "step": 791920
    },
    {
      "epoch": 2.7755172135830115,
      "grad_norm": 2.8125,
      "learning_rate": 4.157158396743434e-06,
      "loss": 0.8764,
      "step": 791930
    },
    {
      "epoch": 2.7755522610899073,
      "grad_norm": 2.40625,
      "learning_rate": 4.156509368079732e-06,
      "loss": 0.8599,
      "step": 791940
    },
    {
      "epoch": 2.775587308596803,
      "grad_norm": 2.46875,
      "learning_rate": 4.15586033941603e-06,
      "loss": 0.7623,
      "step": 791950
    },
    {
      "epoch": 2.775622356103699,
      "grad_norm": 2.65625,
      "learning_rate": 4.155211310752329e-06,
      "loss": 0.7423,
      "step": 791960
    },
    {
      "epoch": 2.775657403610594,
      "grad_norm": 2.75,
      "learning_rate": 4.154562282088626e-06,
      "loss": 0.8354,
      "step": 791970
    },
    {
      "epoch": 2.77569245111749,
      "grad_norm": 2.734375,
      "learning_rate": 4.153913253424924e-06,
      "loss": 0.7991,
      "step": 791980
    },
    {
      "epoch": 2.775727498624385,
      "grad_norm": 2.640625,
      "learning_rate": 4.153264224761223e-06,
      "loss": 0.767,
      "step": 791990
    },
    {
      "epoch": 2.775762546131281,
      "grad_norm": 3.046875,
      "learning_rate": 4.152615196097521e-06,
      "loss": 0.8678,
      "step": 792000
    },
    {
      "epoch": 2.7757975936381767,
      "grad_norm": 3.109375,
      "learning_rate": 4.151966167433819e-06,
      "loss": 0.7434,
      "step": 792010
    },
    {
      "epoch": 2.775832641145072,
      "grad_norm": 3.21875,
      "learning_rate": 4.151317138770117e-06,
      "loss": 0.7709,
      "step": 792020
    },
    {
      "epoch": 2.7758676886519678,
      "grad_norm": 2.859375,
      "learning_rate": 4.150668110106415e-06,
      "loss": 0.7572,
      "step": 792030
    },
    {
      "epoch": 2.775902736158863,
      "grad_norm": 2.96875,
      "learning_rate": 4.150019081442713e-06,
      "loss": 0.8183,
      "step": 792040
    },
    {
      "epoch": 2.775937783665759,
      "grad_norm": 3.078125,
      "learning_rate": 4.149370052779012e-06,
      "loss": 0.7376,
      "step": 792050
    },
    {
      "epoch": 2.7759728311726546,
      "grad_norm": 2.921875,
      "learning_rate": 4.148721024115309e-06,
      "loss": 0.8676,
      "step": 792060
    },
    {
      "epoch": 2.7760078786795503,
      "grad_norm": 3.171875,
      "learning_rate": 4.148071995451607e-06,
      "loss": 0.825,
      "step": 792070
    },
    {
      "epoch": 2.7760429261864457,
      "grad_norm": 3.15625,
      "learning_rate": 4.147422966787906e-06,
      "loss": 0.776,
      "step": 792080
    },
    {
      "epoch": 2.7760779736933414,
      "grad_norm": 2.671875,
      "learning_rate": 4.146773938124204e-06,
      "loss": 0.7719,
      "step": 792090
    },
    {
      "epoch": 2.7761130212002367,
      "grad_norm": 2.828125,
      "learning_rate": 4.146124909460502e-06,
      "loss": 0.7302,
      "step": 792100
    },
    {
      "epoch": 2.7761480687071325,
      "grad_norm": 2.703125,
      "learning_rate": 4.1454758807968e-06,
      "loss": 0.8446,
      "step": 792110
    },
    {
      "epoch": 2.7761831162140282,
      "grad_norm": 2.6875,
      "learning_rate": 4.144826852133098e-06,
      "loss": 0.8353,
      "step": 792120
    },
    {
      "epoch": 2.7762181637209236,
      "grad_norm": 2.9375,
      "learning_rate": 4.144177823469396e-06,
      "loss": 0.7929,
      "step": 792130
    },
    {
      "epoch": 2.7762532112278193,
      "grad_norm": 2.671875,
      "learning_rate": 4.143528794805694e-06,
      "loss": 0.767,
      "step": 792140
    },
    {
      "epoch": 2.7762882587347146,
      "grad_norm": 3.15625,
      "learning_rate": 4.142879766141993e-06,
      "loss": 0.8599,
      "step": 792150
    },
    {
      "epoch": 2.7763233062416104,
      "grad_norm": 2.515625,
      "learning_rate": 4.14223073747829e-06,
      "loss": 0.765,
      "step": 792160
    },
    {
      "epoch": 2.776358353748506,
      "grad_norm": 2.921875,
      "learning_rate": 4.141581708814589e-06,
      "loss": 0.8173,
      "step": 792170
    },
    {
      "epoch": 2.776393401255402,
      "grad_norm": 3.203125,
      "learning_rate": 4.140932680150887e-06,
      "loss": 0.8381,
      "step": 792180
    },
    {
      "epoch": 2.776428448762297,
      "grad_norm": 2.78125,
      "learning_rate": 4.140283651487185e-06,
      "loss": 0.7988,
      "step": 792190
    },
    {
      "epoch": 2.776463496269193,
      "grad_norm": 3.25,
      "learning_rate": 4.139634622823483e-06,
      "loss": 0.833,
      "step": 792200
    },
    {
      "epoch": 2.7764985437760883,
      "grad_norm": 2.921875,
      "learning_rate": 4.138985594159781e-06,
      "loss": 0.8735,
      "step": 792210
    },
    {
      "epoch": 2.776533591282984,
      "grad_norm": 2.46875,
      "learning_rate": 4.138336565496079e-06,
      "loss": 0.7661,
      "step": 792220
    },
    {
      "epoch": 2.77656863878988,
      "grad_norm": 3.40625,
      "learning_rate": 4.137687536832377e-06,
      "loss": 0.802,
      "step": 792230
    },
    {
      "epoch": 2.776603686296775,
      "grad_norm": 3.34375,
      "learning_rate": 4.1370385081686755e-06,
      "loss": 0.8081,
      "step": 792240
    },
    {
      "epoch": 2.776638733803671,
      "grad_norm": 3.1875,
      "learning_rate": 4.136389479504973e-06,
      "loss": 0.8647,
      "step": 792250
    },
    {
      "epoch": 2.7766737813105666,
      "grad_norm": 3.3125,
      "learning_rate": 4.135740450841271e-06,
      "loss": 0.81,
      "step": 792260
    },
    {
      "epoch": 2.776708828817462,
      "grad_norm": 3.234375,
      "learning_rate": 4.1350914221775695e-06,
      "loss": 0.8783,
      "step": 792270
    },
    {
      "epoch": 2.7767438763243577,
      "grad_norm": 2.953125,
      "learning_rate": 4.1344423935138675e-06,
      "loss": 0.8225,
      "step": 792280
    },
    {
      "epoch": 2.7767789238312535,
      "grad_norm": 2.90625,
      "learning_rate": 4.1337933648501655e-06,
      "loss": 0.7465,
      "step": 792290
    },
    {
      "epoch": 2.776813971338149,
      "grad_norm": 3.28125,
      "learning_rate": 4.1331443361864635e-06,
      "loss": 0.8701,
      "step": 792300
    },
    {
      "epoch": 2.7768490188450445,
      "grad_norm": 2.859375,
      "learning_rate": 4.1324953075227615e-06,
      "loss": 0.787,
      "step": 792310
    },
    {
      "epoch": 2.77688406635194,
      "grad_norm": 2.4375,
      "learning_rate": 4.1318462788590595e-06,
      "loss": 0.741,
      "step": 792320
    },
    {
      "epoch": 2.7769191138588356,
      "grad_norm": 3.546875,
      "learning_rate": 4.131197250195358e-06,
      "loss": 0.8205,
      "step": 792330
    },
    {
      "epoch": 2.7769541613657314,
      "grad_norm": 2.6875,
      "learning_rate": 4.1305482215316555e-06,
      "loss": 0.8312,
      "step": 792340
    },
    {
      "epoch": 2.7769892088726267,
      "grad_norm": 2.4375,
      "learning_rate": 4.1298991928679535e-06,
      "loss": 0.7027,
      "step": 792350
    },
    {
      "epoch": 2.7770242563795224,
      "grad_norm": 3.03125,
      "learning_rate": 4.129250164204252e-06,
      "loss": 0.8423,
      "step": 792360
    },
    {
      "epoch": 2.777059303886418,
      "grad_norm": 2.578125,
      "learning_rate": 4.12860113554055e-06,
      "loss": 0.8006,
      "step": 792370
    },
    {
      "epoch": 2.7770943513933135,
      "grad_norm": 3.296875,
      "learning_rate": 4.127952106876848e-06,
      "loss": 0.8049,
      "step": 792380
    },
    {
      "epoch": 2.7771293989002093,
      "grad_norm": 3.3125,
      "learning_rate": 4.127303078213146e-06,
      "loss": 0.7794,
      "step": 792390
    },
    {
      "epoch": 2.777164446407105,
      "grad_norm": 3.515625,
      "learning_rate": 4.126654049549444e-06,
      "loss": 0.802,
      "step": 792400
    },
    {
      "epoch": 2.7771994939140003,
      "grad_norm": 2.84375,
      "learning_rate": 4.126005020885742e-06,
      "loss": 0.8602,
      "step": 792410
    },
    {
      "epoch": 2.777234541420896,
      "grad_norm": 3.203125,
      "learning_rate": 4.125355992222041e-06,
      "loss": 0.8022,
      "step": 792420
    },
    {
      "epoch": 2.7772695889277914,
      "grad_norm": 3.3125,
      "learning_rate": 4.124706963558339e-06,
      "loss": 0.7479,
      "step": 792430
    },
    {
      "epoch": 2.777304636434687,
      "grad_norm": 3.390625,
      "learning_rate": 4.124057934894636e-06,
      "loss": 0.8337,
      "step": 792440
    },
    {
      "epoch": 2.777339683941583,
      "grad_norm": 3.09375,
      "learning_rate": 4.123408906230935e-06,
      "loss": 0.7278,
      "step": 792450
    },
    {
      "epoch": 2.7773747314484787,
      "grad_norm": 3.0,
      "learning_rate": 4.122759877567233e-06,
      "loss": 0.7259,
      "step": 792460
    },
    {
      "epoch": 2.777409778955374,
      "grad_norm": 2.46875,
      "learning_rate": 4.122110848903531e-06,
      "loss": 0.7578,
      "step": 792470
    },
    {
      "epoch": 2.7774448264622698,
      "grad_norm": 3.0625,
      "learning_rate": 4.12146182023983e-06,
      "loss": 0.7916,
      "step": 792480
    },
    {
      "epoch": 2.777479873969165,
      "grad_norm": 2.609375,
      "learning_rate": 4.120812791576127e-06,
      "loss": 0.8572,
      "step": 792490
    },
    {
      "epoch": 2.777514921476061,
      "grad_norm": 2.90625,
      "learning_rate": 4.120163762912425e-06,
      "loss": 0.7691,
      "step": 792500
    },
    {
      "epoch": 2.7775499689829566,
      "grad_norm": 2.984375,
      "learning_rate": 4.119514734248724e-06,
      "loss": 0.8488,
      "step": 792510
    },
    {
      "epoch": 2.777585016489852,
      "grad_norm": 3.078125,
      "learning_rate": 4.118865705585022e-06,
      "loss": 0.7754,
      "step": 792520
    },
    {
      "epoch": 2.7776200639967477,
      "grad_norm": 2.6875,
      "learning_rate": 4.118216676921319e-06,
      "loss": 0.8001,
      "step": 792530
    },
    {
      "epoch": 2.777655111503643,
      "grad_norm": 2.75,
      "learning_rate": 4.117567648257618e-06,
      "loss": 0.8149,
      "step": 792540
    },
    {
      "epoch": 2.7776901590105387,
      "grad_norm": 3.046875,
      "learning_rate": 4.116918619593916e-06,
      "loss": 0.8184,
      "step": 792550
    },
    {
      "epoch": 2.7777252065174345,
      "grad_norm": 3.25,
      "learning_rate": 4.116269590930214e-06,
      "loss": 0.7979,
      "step": 792560
    },
    {
      "epoch": 2.7777602540243302,
      "grad_norm": 2.578125,
      "learning_rate": 4.115620562266512e-06,
      "loss": 0.7616,
      "step": 792570
    },
    {
      "epoch": 2.7777953015312256,
      "grad_norm": 2.875,
      "learning_rate": 4.11497153360281e-06,
      "loss": 0.8382,
      "step": 792580
    },
    {
      "epoch": 2.7778303490381213,
      "grad_norm": 2.671875,
      "learning_rate": 4.114322504939108e-06,
      "loss": 0.7849,
      "step": 792590
    },
    {
      "epoch": 2.7778653965450166,
      "grad_norm": 2.921875,
      "learning_rate": 4.113673476275407e-06,
      "loss": 0.8333,
      "step": 792600
    },
    {
      "epoch": 2.7779004440519124,
      "grad_norm": 3.96875,
      "learning_rate": 4.113024447611705e-06,
      "loss": 0.7913,
      "step": 792610
    },
    {
      "epoch": 2.777935491558808,
      "grad_norm": 3.09375,
      "learning_rate": 4.112375418948003e-06,
      "loss": 0.7593,
      "step": 792620
    },
    {
      "epoch": 2.7779705390657035,
      "grad_norm": 3.0,
      "learning_rate": 4.111726390284301e-06,
      "loss": 0.9093,
      "step": 792630
    },
    {
      "epoch": 2.778005586572599,
      "grad_norm": 2.8125,
      "learning_rate": 4.111077361620599e-06,
      "loss": 0.8369,
      "step": 792640
    },
    {
      "epoch": 2.7780406340794945,
      "grad_norm": 2.5625,
      "learning_rate": 4.110428332956897e-06,
      "loss": 0.7762,
      "step": 792650
    },
    {
      "epoch": 2.7780756815863903,
      "grad_norm": 3.140625,
      "learning_rate": 4.109779304293195e-06,
      "loss": 0.8059,
      "step": 792660
    },
    {
      "epoch": 2.778110729093286,
      "grad_norm": 2.625,
      "learning_rate": 4.109130275629493e-06,
      "loss": 0.7886,
      "step": 792670
    },
    {
      "epoch": 2.778145776600182,
      "grad_norm": 3.125,
      "learning_rate": 4.108481246965791e-06,
      "loss": 0.767,
      "step": 792680
    },
    {
      "epoch": 2.778180824107077,
      "grad_norm": 2.765625,
      "learning_rate": 4.107832218302089e-06,
      "loss": 0.7486,
      "step": 792690
    },
    {
      "epoch": 2.778215871613973,
      "grad_norm": 2.890625,
      "learning_rate": 4.107183189638388e-06,
      "loss": 0.7765,
      "step": 792700
    },
    {
      "epoch": 2.778250919120868,
      "grad_norm": 3.0625,
      "learning_rate": 4.106534160974686e-06,
      "loss": 0.7832,
      "step": 792710
    },
    {
      "epoch": 2.778285966627764,
      "grad_norm": 3.015625,
      "learning_rate": 4.105885132310984e-06,
      "loss": 0.7829,
      "step": 792720
    },
    {
      "epoch": 2.7783210141346597,
      "grad_norm": 2.984375,
      "learning_rate": 4.105236103647282e-06,
      "loss": 0.7707,
      "step": 792730
    },
    {
      "epoch": 2.778356061641555,
      "grad_norm": 2.375,
      "learning_rate": 4.10458707498358e-06,
      "loss": 0.8191,
      "step": 792740
    },
    {
      "epoch": 2.7783911091484508,
      "grad_norm": 2.75,
      "learning_rate": 4.103938046319878e-06,
      "loss": 0.7695,
      "step": 792750
    },
    {
      "epoch": 2.778426156655346,
      "grad_norm": 3.40625,
      "learning_rate": 4.1032890176561766e-06,
      "loss": 0.8587,
      "step": 792760
    },
    {
      "epoch": 2.778461204162242,
      "grad_norm": 2.5625,
      "learning_rate": 4.102639988992474e-06,
      "loss": 0.8229,
      "step": 792770
    },
    {
      "epoch": 2.7784962516691376,
      "grad_norm": 2.828125,
      "learning_rate": 4.101990960328772e-06,
      "loss": 0.7244,
      "step": 792780
    },
    {
      "epoch": 2.7785312991760334,
      "grad_norm": 2.515625,
      "learning_rate": 4.1013419316650706e-06,
      "loss": 0.8177,
      "step": 792790
    },
    {
      "epoch": 2.7785663466829287,
      "grad_norm": 3.046875,
      "learning_rate": 4.1006929030013686e-06,
      "loss": 0.7852,
      "step": 792800
    },
    {
      "epoch": 2.7786013941898244,
      "grad_norm": 3.03125,
      "learning_rate": 4.1000438743376666e-06,
      "loss": 0.8723,
      "step": 792810
    },
    {
      "epoch": 2.7786364416967197,
      "grad_norm": 3.046875,
      "learning_rate": 4.0993948456739646e-06,
      "loss": 0.7828,
      "step": 792820
    },
    {
      "epoch": 2.7786714892036155,
      "grad_norm": 3.03125,
      "learning_rate": 4.0987458170102626e-06,
      "loss": 0.7838,
      "step": 792830
    },
    {
      "epoch": 2.7787065367105113,
      "grad_norm": 3.375,
      "learning_rate": 4.0980967883465606e-06,
      "loss": 0.8001,
      "step": 792840
    },
    {
      "epoch": 2.7787415842174066,
      "grad_norm": 2.65625,
      "learning_rate": 4.097447759682859e-06,
      "loss": 0.7855,
      "step": 792850
    },
    {
      "epoch": 2.7787766317243023,
      "grad_norm": 2.953125,
      "learning_rate": 4.0967987310191566e-06,
      "loss": 0.733,
      "step": 792860
    },
    {
      "epoch": 2.7788116792311977,
      "grad_norm": 2.9375,
      "learning_rate": 4.0961497023554545e-06,
      "loss": 0.7778,
      "step": 792870
    },
    {
      "epoch": 2.7788467267380934,
      "grad_norm": 3.40625,
      "learning_rate": 4.095500673691753e-06,
      "loss": 0.7324,
      "step": 792880
    },
    {
      "epoch": 2.778881774244989,
      "grad_norm": 2.671875,
      "learning_rate": 4.094851645028051e-06,
      "loss": 0.901,
      "step": 792890
    },
    {
      "epoch": 2.778916821751885,
      "grad_norm": 2.65625,
      "learning_rate": 4.094202616364349e-06,
      "loss": 0.7471,
      "step": 792900
    },
    {
      "epoch": 2.7789518692587802,
      "grad_norm": 2.703125,
      "learning_rate": 4.093553587700647e-06,
      "loss": 0.7633,
      "step": 792910
    },
    {
      "epoch": 2.778986916765676,
      "grad_norm": 3.09375,
      "learning_rate": 4.092904559036945e-06,
      "loss": 0.907,
      "step": 792920
    },
    {
      "epoch": 2.7790219642725713,
      "grad_norm": 3.109375,
      "learning_rate": 4.092255530373243e-06,
      "loss": 0.8491,
      "step": 792930
    },
    {
      "epoch": 2.779057011779467,
      "grad_norm": 2.59375,
      "learning_rate": 4.091606501709542e-06,
      "loss": 0.8128,
      "step": 792940
    },
    {
      "epoch": 2.779092059286363,
      "grad_norm": 2.8125,
      "learning_rate": 4.09095747304584e-06,
      "loss": 0.782,
      "step": 792950
    },
    {
      "epoch": 2.779127106793258,
      "grad_norm": 2.625,
      "learning_rate": 4.090308444382137e-06,
      "loss": 0.7806,
      "step": 792960
    },
    {
      "epoch": 2.779162154300154,
      "grad_norm": 2.90625,
      "learning_rate": 4.089659415718436e-06,
      "loss": 0.75,
      "step": 792970
    },
    {
      "epoch": 2.779197201807049,
      "grad_norm": 2.75,
      "learning_rate": 4.089010387054734e-06,
      "loss": 0.8004,
      "step": 792980
    },
    {
      "epoch": 2.779232249313945,
      "grad_norm": 2.765625,
      "learning_rate": 4.088361358391032e-06,
      "loss": 0.7979,
      "step": 792990
    },
    {
      "epoch": 2.7792672968208407,
      "grad_norm": 2.921875,
      "learning_rate": 4.08771232972733e-06,
      "loss": 0.8281,
      "step": 793000
    },
    {
      "epoch": 2.7793023443277365,
      "grad_norm": 2.703125,
      "learning_rate": 4.087063301063628e-06,
      "loss": 0.7761,
      "step": 793010
    },
    {
      "epoch": 2.779337391834632,
      "grad_norm": 2.765625,
      "learning_rate": 4.086414272399926e-06,
      "loss": 0.776,
      "step": 793020
    },
    {
      "epoch": 2.7793724393415276,
      "grad_norm": 2.84375,
      "learning_rate": 4.085765243736225e-06,
      "loss": 0.8341,
      "step": 793030
    },
    {
      "epoch": 2.779407486848423,
      "grad_norm": 2.796875,
      "learning_rate": 4.085116215072523e-06,
      "loss": 0.7525,
      "step": 793040
    },
    {
      "epoch": 2.7794425343553186,
      "grad_norm": 2.828125,
      "learning_rate": 4.08446718640882e-06,
      "loss": 0.7242,
      "step": 793050
    },
    {
      "epoch": 2.7794775818622144,
      "grad_norm": 3.21875,
      "learning_rate": 4.083818157745119e-06,
      "loss": 0.8309,
      "step": 793060
    },
    {
      "epoch": 2.7795126293691097,
      "grad_norm": 2.734375,
      "learning_rate": 4.083169129081417e-06,
      "loss": 0.7313,
      "step": 793070
    },
    {
      "epoch": 2.7795476768760055,
      "grad_norm": 3.15625,
      "learning_rate": 4.082520100417715e-06,
      "loss": 0.7319,
      "step": 793080
    },
    {
      "epoch": 2.7795827243829008,
      "grad_norm": 3.1875,
      "learning_rate": 4.081871071754013e-06,
      "loss": 0.9095,
      "step": 793090
    },
    {
      "epoch": 2.7796177718897965,
      "grad_norm": 3.109375,
      "learning_rate": 4.081222043090311e-06,
      "loss": 0.7594,
      "step": 793100
    },
    {
      "epoch": 2.7796528193966923,
      "grad_norm": 3.015625,
      "learning_rate": 4.080573014426609e-06,
      "loss": 0.7965,
      "step": 793110
    },
    {
      "epoch": 2.779687866903588,
      "grad_norm": 2.84375,
      "learning_rate": 4.079923985762907e-06,
      "loss": 0.811,
      "step": 793120
    },
    {
      "epoch": 2.7797229144104834,
      "grad_norm": 2.96875,
      "learning_rate": 4.079274957099206e-06,
      "loss": 0.8697,
      "step": 793130
    },
    {
      "epoch": 2.779757961917379,
      "grad_norm": 2.515625,
      "learning_rate": 4.078625928435503e-06,
      "loss": 0.7566,
      "step": 793140
    },
    {
      "epoch": 2.7797930094242744,
      "grad_norm": 2.75,
      "learning_rate": 4.077976899771802e-06,
      "loss": 0.7249,
      "step": 793150
    },
    {
      "epoch": 2.77982805693117,
      "grad_norm": 3.015625,
      "learning_rate": 4.0773278711081e-06,
      "loss": 0.8014,
      "step": 793160
    },
    {
      "epoch": 2.779863104438066,
      "grad_norm": 3.234375,
      "learning_rate": 4.076678842444398e-06,
      "loss": 0.7903,
      "step": 793170
    },
    {
      "epoch": 2.7798981519449613,
      "grad_norm": 3.1875,
      "learning_rate": 4.076029813780696e-06,
      "loss": 0.7886,
      "step": 793180
    },
    {
      "epoch": 2.779933199451857,
      "grad_norm": 2.84375,
      "learning_rate": 4.075380785116994e-06,
      "loss": 0.7851,
      "step": 793190
    },
    {
      "epoch": 2.7799682469587523,
      "grad_norm": 2.75,
      "learning_rate": 4.074731756453292e-06,
      "loss": 0.8019,
      "step": 793200
    },
    {
      "epoch": 2.780003294465648,
      "grad_norm": 3.109375,
      "learning_rate": 4.07408272778959e-06,
      "loss": 0.782,
      "step": 793210
    },
    {
      "epoch": 2.780038341972544,
      "grad_norm": 2.890625,
      "learning_rate": 4.073433699125889e-06,
      "loss": 0.8328,
      "step": 793220
    },
    {
      "epoch": 2.7800733894794396,
      "grad_norm": 2.75,
      "learning_rate": 4.072784670462187e-06,
      "loss": 0.8728,
      "step": 793230
    },
    {
      "epoch": 2.780108436986335,
      "grad_norm": 3.390625,
      "learning_rate": 4.072135641798484e-06,
      "loss": 0.8369,
      "step": 793240
    },
    {
      "epoch": 2.7801434844932307,
      "grad_norm": 2.8125,
      "learning_rate": 4.071486613134783e-06,
      "loss": 0.7749,
      "step": 793250
    },
    {
      "epoch": 2.780178532000126,
      "grad_norm": 2.65625,
      "learning_rate": 4.070837584471081e-06,
      "loss": 0.7601,
      "step": 793260
    },
    {
      "epoch": 2.7802135795070217,
      "grad_norm": 2.90625,
      "learning_rate": 4.070188555807379e-06,
      "loss": 0.7779,
      "step": 793270
    },
    {
      "epoch": 2.7802486270139175,
      "grad_norm": 3.015625,
      "learning_rate": 4.069539527143678e-06,
      "loss": 0.8242,
      "step": 793280
    },
    {
      "epoch": 2.780283674520813,
      "grad_norm": 3.421875,
      "learning_rate": 4.068890498479975e-06,
      "loss": 0.7659,
      "step": 793290
    },
    {
      "epoch": 2.7803187220277086,
      "grad_norm": 2.78125,
      "learning_rate": 4.068241469816273e-06,
      "loss": 0.8042,
      "step": 793300
    },
    {
      "epoch": 2.780353769534604,
      "grad_norm": 2.890625,
      "learning_rate": 4.067592441152572e-06,
      "loss": 0.7819,
      "step": 793310
    },
    {
      "epoch": 2.7803888170414996,
      "grad_norm": 3.3125,
      "learning_rate": 4.06694341248887e-06,
      "loss": 0.7967,
      "step": 793320
    },
    {
      "epoch": 2.7804238645483954,
      "grad_norm": 2.6875,
      "learning_rate": 4.066294383825167e-06,
      "loss": 0.7801,
      "step": 793330
    },
    {
      "epoch": 2.780458912055291,
      "grad_norm": 2.5625,
      "learning_rate": 4.065645355161466e-06,
      "loss": 0.7057,
      "step": 793340
    },
    {
      "epoch": 2.7804939595621865,
      "grad_norm": 3.234375,
      "learning_rate": 4.064996326497764e-06,
      "loss": 0.786,
      "step": 793350
    },
    {
      "epoch": 2.7805290070690822,
      "grad_norm": 3.078125,
      "learning_rate": 4.064347297834062e-06,
      "loss": 0.8212,
      "step": 793360
    },
    {
      "epoch": 2.7805640545759776,
      "grad_norm": 2.875,
      "learning_rate": 4.0636982691703605e-06,
      "loss": 0.8173,
      "step": 793370
    },
    {
      "epoch": 2.7805991020828733,
      "grad_norm": 2.625,
      "learning_rate": 4.063049240506658e-06,
      "loss": 0.8405,
      "step": 793380
    },
    {
      "epoch": 2.780634149589769,
      "grad_norm": 3.0,
      "learning_rate": 4.062400211842956e-06,
      "loss": 0.8175,
      "step": 793390
    },
    {
      "epoch": 2.7806691970966644,
      "grad_norm": 3.390625,
      "learning_rate": 4.0617511831792545e-06,
      "loss": 0.7432,
      "step": 793400
    },
    {
      "epoch": 2.78070424460356,
      "grad_norm": 2.71875,
      "learning_rate": 4.0611021545155525e-06,
      "loss": 0.7727,
      "step": 793410
    },
    {
      "epoch": 2.7807392921104555,
      "grad_norm": 2.984375,
      "learning_rate": 4.0604531258518505e-06,
      "loss": 0.8059,
      "step": 793420
    },
    {
      "epoch": 2.780774339617351,
      "grad_norm": 2.875,
      "learning_rate": 4.0598040971881485e-06,
      "loss": 0.7831,
      "step": 793430
    },
    {
      "epoch": 2.780809387124247,
      "grad_norm": 3.078125,
      "learning_rate": 4.0591550685244465e-06,
      "loss": 0.885,
      "step": 793440
    },
    {
      "epoch": 2.7808444346311427,
      "grad_norm": 2.78125,
      "learning_rate": 4.0585060398607445e-06,
      "loss": 0.7551,
      "step": 793450
    },
    {
      "epoch": 2.780879482138038,
      "grad_norm": 2.84375,
      "learning_rate": 4.057857011197043e-06,
      "loss": 0.8227,
      "step": 793460
    },
    {
      "epoch": 2.780914529644934,
      "grad_norm": 2.859375,
      "learning_rate": 4.0572079825333405e-06,
      "loss": 0.859,
      "step": 793470
    },
    {
      "epoch": 2.780949577151829,
      "grad_norm": 2.890625,
      "learning_rate": 4.0565589538696385e-06,
      "loss": 0.8307,
      "step": 793480
    },
    {
      "epoch": 2.780984624658725,
      "grad_norm": 3.078125,
      "learning_rate": 4.055909925205937e-06,
      "loss": 0.8319,
      "step": 793490
    },
    {
      "epoch": 2.7810196721656206,
      "grad_norm": 3.5,
      "learning_rate": 4.055260896542235e-06,
      "loss": 0.8662,
      "step": 793500
    },
    {
      "epoch": 2.781054719672516,
      "grad_norm": 3.25,
      "learning_rate": 4.054611867878533e-06,
      "loss": 0.7516,
      "step": 793510
    },
    {
      "epoch": 2.7810897671794117,
      "grad_norm": 2.78125,
      "learning_rate": 4.053962839214831e-06,
      "loss": 0.7409,
      "step": 793520
    },
    {
      "epoch": 2.7811248146863075,
      "grad_norm": 2.96875,
      "learning_rate": 4.053313810551129e-06,
      "loss": 0.888,
      "step": 793530
    },
    {
      "epoch": 2.7811598621932028,
      "grad_norm": 3.21875,
      "learning_rate": 4.052664781887427e-06,
      "loss": 0.8134,
      "step": 793540
    },
    {
      "epoch": 2.7811949097000985,
      "grad_norm": 2.734375,
      "learning_rate": 4.052015753223725e-06,
      "loss": 0.8548,
      "step": 793550
    },
    {
      "epoch": 2.7812299572069943,
      "grad_norm": 2.6875,
      "learning_rate": 4.051366724560024e-06,
      "loss": 0.7891,
      "step": 793560
    },
    {
      "epoch": 2.7812650047138896,
      "grad_norm": 3.703125,
      "learning_rate": 4.050717695896321e-06,
      "loss": 0.8018,
      "step": 793570
    },
    {
      "epoch": 2.7813000522207854,
      "grad_norm": 2.828125,
      "learning_rate": 4.05006866723262e-06,
      "loss": 0.8033,
      "step": 793580
    },
    {
      "epoch": 2.7813350997276807,
      "grad_norm": 2.359375,
      "learning_rate": 4.049419638568918e-06,
      "loss": 0.7812,
      "step": 793590
    },
    {
      "epoch": 2.7813701472345764,
      "grad_norm": 3.0625,
      "learning_rate": 4.048770609905216e-06,
      "loss": 0.8806,
      "step": 793600
    },
    {
      "epoch": 2.781405194741472,
      "grad_norm": 2.6875,
      "learning_rate": 4.048121581241514e-06,
      "loss": 0.8615,
      "step": 793610
    },
    {
      "epoch": 2.7814402422483675,
      "grad_norm": 2.90625,
      "learning_rate": 4.047472552577812e-06,
      "loss": 0.7679,
      "step": 793620
    },
    {
      "epoch": 2.7814752897552633,
      "grad_norm": 2.765625,
      "learning_rate": 4.04682352391411e-06,
      "loss": 0.8231,
      "step": 793630
    },
    {
      "epoch": 2.781510337262159,
      "grad_norm": 2.953125,
      "learning_rate": 4.046174495250408e-06,
      "loss": 0.8243,
      "step": 793640
    },
    {
      "epoch": 2.7815453847690543,
      "grad_norm": 2.859375,
      "learning_rate": 4.045525466586707e-06,
      "loss": 0.8099,
      "step": 793650
    },
    {
      "epoch": 2.78158043227595,
      "grad_norm": 3.140625,
      "learning_rate": 4.044876437923004e-06,
      "loss": 0.8189,
      "step": 793660
    },
    {
      "epoch": 2.781615479782846,
      "grad_norm": 2.65625,
      "learning_rate": 4.044227409259302e-06,
      "loss": 0.7934,
      "step": 793670
    },
    {
      "epoch": 2.781650527289741,
      "grad_norm": 2.90625,
      "learning_rate": 4.043578380595601e-06,
      "loss": 0.8544,
      "step": 793680
    },
    {
      "epoch": 2.781685574796637,
      "grad_norm": 3.40625,
      "learning_rate": 4.042929351931899e-06,
      "loss": 0.8715,
      "step": 793690
    },
    {
      "epoch": 2.7817206223035322,
      "grad_norm": 2.9375,
      "learning_rate": 4.042280323268197e-06,
      "loss": 0.746,
      "step": 793700
    },
    {
      "epoch": 2.781755669810428,
      "grad_norm": 2.609375,
      "learning_rate": 4.041631294604495e-06,
      "loss": 0.8482,
      "step": 793710
    },
    {
      "epoch": 2.7817907173173237,
      "grad_norm": 3.203125,
      "learning_rate": 4.040982265940793e-06,
      "loss": 0.7381,
      "step": 793720
    },
    {
      "epoch": 2.7818257648242195,
      "grad_norm": 3.484375,
      "learning_rate": 4.040333237277091e-06,
      "loss": 0.8158,
      "step": 793730
    },
    {
      "epoch": 2.781860812331115,
      "grad_norm": 3.078125,
      "learning_rate": 4.03968420861339e-06,
      "loss": 0.7102,
      "step": 793740
    },
    {
      "epoch": 2.7818958598380106,
      "grad_norm": 2.5625,
      "learning_rate": 4.039035179949688e-06,
      "loss": 0.7836,
      "step": 793750
    },
    {
      "epoch": 2.781930907344906,
      "grad_norm": 2.8125,
      "learning_rate": 4.038386151285985e-06,
      "loss": 0.7444,
      "step": 793760
    },
    {
      "epoch": 2.7819659548518016,
      "grad_norm": 2.65625,
      "learning_rate": 4.037737122622284e-06,
      "loss": 0.7881,
      "step": 793770
    },
    {
      "epoch": 2.7820010023586974,
      "grad_norm": 3.046875,
      "learning_rate": 4.037088093958582e-06,
      "loss": 0.8159,
      "step": 793780
    },
    {
      "epoch": 2.7820360498655927,
      "grad_norm": 2.875,
      "learning_rate": 4.03643906529488e-06,
      "loss": 0.8881,
      "step": 793790
    },
    {
      "epoch": 2.7820710973724885,
      "grad_norm": 2.96875,
      "learning_rate": 4.035790036631178e-06,
      "loss": 0.8661,
      "step": 793800
    },
    {
      "epoch": 2.782106144879384,
      "grad_norm": 3.265625,
      "learning_rate": 4.035141007967476e-06,
      "loss": 0.8348,
      "step": 793810
    },
    {
      "epoch": 2.7821411923862795,
      "grad_norm": 3.375,
      "learning_rate": 4.034491979303774e-06,
      "loss": 0.848,
      "step": 793820
    },
    {
      "epoch": 2.7821762398931753,
      "grad_norm": 2.421875,
      "learning_rate": 4.033842950640073e-06,
      "loss": 0.8362,
      "step": 793830
    },
    {
      "epoch": 2.782211287400071,
      "grad_norm": 2.59375,
      "learning_rate": 4.033193921976371e-06,
      "loss": 0.7856,
      "step": 793840
    },
    {
      "epoch": 2.7822463349069664,
      "grad_norm": 3.203125,
      "learning_rate": 4.032544893312668e-06,
      "loss": 0.8293,
      "step": 793850
    },
    {
      "epoch": 2.782281382413862,
      "grad_norm": 2.625,
      "learning_rate": 4.031895864648967e-06,
      "loss": 0.7977,
      "step": 793860
    },
    {
      "epoch": 2.7823164299207575,
      "grad_norm": 2.765625,
      "learning_rate": 4.031246835985265e-06,
      "loss": 0.7669,
      "step": 793870
    },
    {
      "epoch": 2.782351477427653,
      "grad_norm": 3.171875,
      "learning_rate": 4.030597807321563e-06,
      "loss": 0.9034,
      "step": 793880
    },
    {
      "epoch": 2.782386524934549,
      "grad_norm": 2.828125,
      "learning_rate": 4.0299487786578616e-06,
      "loss": 0.7877,
      "step": 793890
    },
    {
      "epoch": 2.7824215724414443,
      "grad_norm": 2.609375,
      "learning_rate": 4.029299749994159e-06,
      "loss": 0.8061,
      "step": 793900
    },
    {
      "epoch": 2.78245661994834,
      "grad_norm": 3.15625,
      "learning_rate": 4.028650721330457e-06,
      "loss": 0.771,
      "step": 793910
    },
    {
      "epoch": 2.7824916674552354,
      "grad_norm": 2.96875,
      "learning_rate": 4.0280016926667556e-06,
      "loss": 0.8138,
      "step": 793920
    },
    {
      "epoch": 2.782526714962131,
      "grad_norm": 3.125,
      "learning_rate": 4.0273526640030536e-06,
      "loss": 0.8353,
      "step": 793930
    },
    {
      "epoch": 2.782561762469027,
      "grad_norm": 3.15625,
      "learning_rate": 4.026703635339351e-06,
      "loss": 0.7843,
      "step": 793940
    },
    {
      "epoch": 2.7825968099759226,
      "grad_norm": 3.125,
      "learning_rate": 4.0260546066756496e-06,
      "loss": 0.7655,
      "step": 793950
    },
    {
      "epoch": 2.782631857482818,
      "grad_norm": 2.953125,
      "learning_rate": 4.0254055780119476e-06,
      "loss": 0.793,
      "step": 793960
    },
    {
      "epoch": 2.7826669049897137,
      "grad_norm": 3.0625,
      "learning_rate": 4.0247565493482455e-06,
      "loss": 0.8273,
      "step": 793970
    },
    {
      "epoch": 2.782701952496609,
      "grad_norm": 3.125,
      "learning_rate": 4.0241075206845435e-06,
      "loss": 0.8444,
      "step": 793980
    },
    {
      "epoch": 2.7827370000035048,
      "grad_norm": 2.796875,
      "learning_rate": 4.0234584920208415e-06,
      "loss": 0.7489,
      "step": 793990
    },
    {
      "epoch": 2.7827720475104005,
      "grad_norm": 2.296875,
      "learning_rate": 4.0228094633571395e-06,
      "loss": 0.8363,
      "step": 794000
    },
    {
      "epoch": 2.782807095017296,
      "grad_norm": 2.984375,
      "learning_rate": 4.022160434693438e-06,
      "loss": 0.8172,
      "step": 794010
    },
    {
      "epoch": 2.7828421425241916,
      "grad_norm": 2.640625,
      "learning_rate": 4.021511406029736e-06,
      "loss": 0.7749,
      "step": 794020
    },
    {
      "epoch": 2.782877190031087,
      "grad_norm": 2.796875,
      "learning_rate": 4.020862377366034e-06,
      "loss": 0.8573,
      "step": 794030
    },
    {
      "epoch": 2.7829122375379827,
      "grad_norm": 3.109375,
      "learning_rate": 4.020213348702332e-06,
      "loss": 0.799,
      "step": 794040
    },
    {
      "epoch": 2.7829472850448784,
      "grad_norm": 3.265625,
      "learning_rate": 4.01956432003863e-06,
      "loss": 0.7997,
      "step": 794050
    },
    {
      "epoch": 2.782982332551774,
      "grad_norm": 2.453125,
      "learning_rate": 4.018915291374928e-06,
      "loss": 0.8022,
      "step": 794060
    },
    {
      "epoch": 2.7830173800586695,
      "grad_norm": 2.5,
      "learning_rate": 4.018266262711226e-06,
      "loss": 0.7407,
      "step": 794070
    },
    {
      "epoch": 2.7830524275655653,
      "grad_norm": 2.46875,
      "learning_rate": 4.017617234047524e-06,
      "loss": 0.8226,
      "step": 794080
    },
    {
      "epoch": 2.7830874750724606,
      "grad_norm": 2.765625,
      "learning_rate": 4.016968205383822e-06,
      "loss": 0.8142,
      "step": 794090
    },
    {
      "epoch": 2.7831225225793563,
      "grad_norm": 3.109375,
      "learning_rate": 4.01631917672012e-06,
      "loss": 0.8057,
      "step": 794100
    },
    {
      "epoch": 2.783157570086252,
      "grad_norm": 2.890625,
      "learning_rate": 4.015670148056419e-06,
      "loss": 0.7741,
      "step": 794110
    },
    {
      "epoch": 2.7831926175931474,
      "grad_norm": 3.28125,
      "learning_rate": 4.015021119392717e-06,
      "loss": 0.7989,
      "step": 794120
    },
    {
      "epoch": 2.783227665100043,
      "grad_norm": 3.265625,
      "learning_rate": 4.014372090729015e-06,
      "loss": 0.7738,
      "step": 794130
    },
    {
      "epoch": 2.7832627126069385,
      "grad_norm": 3.28125,
      "learning_rate": 4.013723062065313e-06,
      "loss": 0.7867,
      "step": 794140
    },
    {
      "epoch": 2.7832977601138342,
      "grad_norm": 2.90625,
      "learning_rate": 4.013074033401611e-06,
      "loss": 0.7737,
      "step": 794150
    },
    {
      "epoch": 2.78333280762073,
      "grad_norm": 2.59375,
      "learning_rate": 4.012425004737909e-06,
      "loss": 0.7802,
      "step": 794160
    },
    {
      "epoch": 2.7833678551276257,
      "grad_norm": 2.875,
      "learning_rate": 4.011775976074208e-06,
      "loss": 0.8023,
      "step": 794170
    },
    {
      "epoch": 2.783402902634521,
      "grad_norm": 2.59375,
      "learning_rate": 4.011126947410505e-06,
      "loss": 0.8203,
      "step": 794180
    },
    {
      "epoch": 2.783437950141417,
      "grad_norm": 2.609375,
      "learning_rate": 4.010477918746803e-06,
      "loss": 0.8402,
      "step": 794190
    },
    {
      "epoch": 2.783472997648312,
      "grad_norm": 2.375,
      "learning_rate": 4.009828890083102e-06,
      "loss": 0.8446,
      "step": 794200
    },
    {
      "epoch": 2.783508045155208,
      "grad_norm": 3.0,
      "learning_rate": 4.0091798614194e-06,
      "loss": 0.789,
      "step": 794210
    },
    {
      "epoch": 2.7835430926621036,
      "grad_norm": 2.578125,
      "learning_rate": 4.008530832755698e-06,
      "loss": 0.7506,
      "step": 794220
    },
    {
      "epoch": 2.783578140168999,
      "grad_norm": 2.953125,
      "learning_rate": 4.007881804091996e-06,
      "loss": 0.8045,
      "step": 794230
    },
    {
      "epoch": 2.7836131876758947,
      "grad_norm": 2.90625,
      "learning_rate": 4.007232775428294e-06,
      "loss": 0.7836,
      "step": 794240
    },
    {
      "epoch": 2.78364823518279,
      "grad_norm": 2.8125,
      "learning_rate": 4.006583746764592e-06,
      "loss": 0.7834,
      "step": 794250
    },
    {
      "epoch": 2.783683282689686,
      "grad_norm": 3.15625,
      "learning_rate": 4.005934718100891e-06,
      "loss": 0.8587,
      "step": 794260
    },
    {
      "epoch": 2.7837183301965815,
      "grad_norm": 3.296875,
      "learning_rate": 4.005285689437188e-06,
      "loss": 0.8801,
      "step": 794270
    },
    {
      "epoch": 2.7837533777034773,
      "grad_norm": 2.796875,
      "learning_rate": 4.004636660773486e-06,
      "loss": 0.7651,
      "step": 794280
    },
    {
      "epoch": 2.7837884252103726,
      "grad_norm": 2.5,
      "learning_rate": 4.003987632109785e-06,
      "loss": 0.7931,
      "step": 794290
    },
    {
      "epoch": 2.7838234727172684,
      "grad_norm": 2.859375,
      "learning_rate": 4.003338603446083e-06,
      "loss": 0.8065,
      "step": 794300
    },
    {
      "epoch": 2.7838585202241637,
      "grad_norm": 3.0,
      "learning_rate": 4.002689574782381e-06,
      "loss": 0.7325,
      "step": 794310
    },
    {
      "epoch": 2.7838935677310594,
      "grad_norm": 3.0,
      "learning_rate": 4.002040546118679e-06,
      "loss": 0.8661,
      "step": 794320
    },
    {
      "epoch": 2.783928615237955,
      "grad_norm": 2.890625,
      "learning_rate": 4.001391517454977e-06,
      "loss": 0.7903,
      "step": 794330
    },
    {
      "epoch": 2.7839636627448505,
      "grad_norm": 2.984375,
      "learning_rate": 4.000742488791275e-06,
      "loss": 0.8256,
      "step": 794340
    },
    {
      "epoch": 2.7839987102517463,
      "grad_norm": 3.15625,
      "learning_rate": 4.000093460127574e-06,
      "loss": 0.8073,
      "step": 794350
    },
    {
      "epoch": 2.7840337577586416,
      "grad_norm": 2.921875,
      "learning_rate": 3.999444431463872e-06,
      "loss": 0.7614,
      "step": 794360
    },
    {
      "epoch": 2.7840688052655373,
      "grad_norm": 2.671875,
      "learning_rate": 3.998795402800169e-06,
      "loss": 0.7819,
      "step": 794370
    },
    {
      "epoch": 2.784103852772433,
      "grad_norm": 3.140625,
      "learning_rate": 3.998146374136468e-06,
      "loss": 0.7533,
      "step": 794380
    },
    {
      "epoch": 2.784138900279329,
      "grad_norm": 2.921875,
      "learning_rate": 3.997497345472766e-06,
      "loss": 0.8819,
      "step": 794390
    },
    {
      "epoch": 2.784173947786224,
      "grad_norm": 3.0,
      "learning_rate": 3.996848316809064e-06,
      "loss": 0.7899,
      "step": 794400
    },
    {
      "epoch": 2.78420899529312,
      "grad_norm": 2.453125,
      "learning_rate": 3.996199288145362e-06,
      "loss": 0.8401,
      "step": 794410
    },
    {
      "epoch": 2.7842440428000153,
      "grad_norm": 3.203125,
      "learning_rate": 3.99555025948166e-06,
      "loss": 0.7871,
      "step": 794420
    },
    {
      "epoch": 2.784279090306911,
      "grad_norm": 3.359375,
      "learning_rate": 3.994901230817958e-06,
      "loss": 0.8035,
      "step": 794430
    },
    {
      "epoch": 2.7843141378138068,
      "grad_norm": 3.21875,
      "learning_rate": 3.994252202154257e-06,
      "loss": 0.8226,
      "step": 794440
    },
    {
      "epoch": 2.784349185320702,
      "grad_norm": 3.09375,
      "learning_rate": 3.993603173490555e-06,
      "loss": 0.842,
      "step": 794450
    },
    {
      "epoch": 2.784384232827598,
      "grad_norm": 2.671875,
      "learning_rate": 3.992954144826852e-06,
      "loss": 0.7733,
      "step": 794460
    },
    {
      "epoch": 2.784419280334493,
      "grad_norm": 2.796875,
      "learning_rate": 3.992305116163151e-06,
      "loss": 0.7783,
      "step": 794470
    },
    {
      "epoch": 2.784454327841389,
      "grad_norm": 3.5,
      "learning_rate": 3.991656087499449e-06,
      "loss": 0.8262,
      "step": 794480
    },
    {
      "epoch": 2.7844893753482847,
      "grad_norm": 2.890625,
      "learning_rate": 3.991007058835747e-06,
      "loss": 0.8534,
      "step": 794490
    },
    {
      "epoch": 2.7845244228551804,
      "grad_norm": 2.9375,
      "learning_rate": 3.990358030172045e-06,
      "loss": 0.9161,
      "step": 794500
    },
    {
      "epoch": 2.7845594703620757,
      "grad_norm": 2.796875,
      "learning_rate": 3.989709001508343e-06,
      "loss": 0.7936,
      "step": 794510
    },
    {
      "epoch": 2.7845945178689715,
      "grad_norm": 3.25,
      "learning_rate": 3.989059972844641e-06,
      "loss": 0.7452,
      "step": 794520
    },
    {
      "epoch": 2.784629565375867,
      "grad_norm": 2.84375,
      "learning_rate": 3.9884109441809395e-06,
      "loss": 0.7965,
      "step": 794530
    },
    {
      "epoch": 2.7846646128827626,
      "grad_norm": 2.828125,
      "learning_rate": 3.9877619155172375e-06,
      "loss": 0.7673,
      "step": 794540
    },
    {
      "epoch": 2.7846996603896583,
      "grad_norm": 2.515625,
      "learning_rate": 3.987112886853535e-06,
      "loss": 0.7809,
      "step": 794550
    },
    {
      "epoch": 2.7847347078965536,
      "grad_norm": 3.015625,
      "learning_rate": 3.9864638581898335e-06,
      "loss": 0.8169,
      "step": 794560
    },
    {
      "epoch": 2.7847697554034494,
      "grad_norm": 3.15625,
      "learning_rate": 3.9858148295261315e-06,
      "loss": 0.8264,
      "step": 794570
    },
    {
      "epoch": 2.7848048029103447,
      "grad_norm": 3.53125,
      "learning_rate": 3.9851658008624295e-06,
      "loss": 0.8537,
      "step": 794580
    },
    {
      "epoch": 2.7848398504172405,
      "grad_norm": 2.84375,
      "learning_rate": 3.9845167721987275e-06,
      "loss": 0.7747,
      "step": 794590
    },
    {
      "epoch": 2.7848748979241362,
      "grad_norm": 2.859375,
      "learning_rate": 3.9838677435350255e-06,
      "loss": 0.8091,
      "step": 794600
    },
    {
      "epoch": 2.784909945431032,
      "grad_norm": 3.078125,
      "learning_rate": 3.9832187148713235e-06,
      "loss": 0.791,
      "step": 794610
    },
    {
      "epoch": 2.7849449929379273,
      "grad_norm": 3.21875,
      "learning_rate": 3.9825696862076215e-06,
      "loss": 0.803,
      "step": 794620
    },
    {
      "epoch": 2.784980040444823,
      "grad_norm": 3.0,
      "learning_rate": 3.98192065754392e-06,
      "loss": 0.7966,
      "step": 794630
    },
    {
      "epoch": 2.7850150879517184,
      "grad_norm": 3.046875,
      "learning_rate": 3.981271628880218e-06,
      "loss": 0.7921,
      "step": 794640
    },
    {
      "epoch": 2.785050135458614,
      "grad_norm": 3.03125,
      "learning_rate": 3.980622600216516e-06,
      "loss": 0.8719,
      "step": 794650
    },
    {
      "epoch": 2.78508518296551,
      "grad_norm": 2.890625,
      "learning_rate": 3.979973571552814e-06,
      "loss": 0.7317,
      "step": 794660
    },
    {
      "epoch": 2.785120230472405,
      "grad_norm": 2.9375,
      "learning_rate": 3.979324542889112e-06,
      "loss": 0.7857,
      "step": 794670
    },
    {
      "epoch": 2.785155277979301,
      "grad_norm": 2.96875,
      "learning_rate": 3.97867551422541e-06,
      "loss": 0.765,
      "step": 794680
    },
    {
      "epoch": 2.7851903254861963,
      "grad_norm": 3.203125,
      "learning_rate": 3.978026485561709e-06,
      "loss": 0.8223,
      "step": 794690
    },
    {
      "epoch": 2.785225372993092,
      "grad_norm": 2.859375,
      "learning_rate": 3.977377456898006e-06,
      "loss": 0.7988,
      "step": 794700
    },
    {
      "epoch": 2.785260420499988,
      "grad_norm": 2.640625,
      "learning_rate": 3.976728428234304e-06,
      "loss": 0.8577,
      "step": 794710
    },
    {
      "epoch": 2.7852954680068835,
      "grad_norm": 2.703125,
      "learning_rate": 3.976079399570603e-06,
      "loss": 0.7791,
      "step": 794720
    },
    {
      "epoch": 2.785330515513779,
      "grad_norm": 3.171875,
      "learning_rate": 3.975430370906901e-06,
      "loss": 0.8584,
      "step": 794730
    },
    {
      "epoch": 2.7853655630206746,
      "grad_norm": 2.96875,
      "learning_rate": 3.974781342243198e-06,
      "loss": 0.7679,
      "step": 794740
    },
    {
      "epoch": 2.78540061052757,
      "grad_norm": 2.671875,
      "learning_rate": 3.974132313579497e-06,
      "loss": 0.7569,
      "step": 794750
    },
    {
      "epoch": 2.7854356580344657,
      "grad_norm": 2.71875,
      "learning_rate": 3.973483284915795e-06,
      "loss": 0.8468,
      "step": 794760
    },
    {
      "epoch": 2.7854707055413614,
      "grad_norm": 2.953125,
      "learning_rate": 3.972834256252093e-06,
      "loss": 0.8559,
      "step": 794770
    },
    {
      "epoch": 2.7855057530482568,
      "grad_norm": 2.875,
      "learning_rate": 3.972185227588392e-06,
      "loss": 0.7841,
      "step": 794780
    },
    {
      "epoch": 2.7855408005551525,
      "grad_norm": 2.859375,
      "learning_rate": 3.971536198924689e-06,
      "loss": 0.7685,
      "step": 794790
    },
    {
      "epoch": 2.785575848062048,
      "grad_norm": 3.0,
      "learning_rate": 3.970887170260987e-06,
      "loss": 0.794,
      "step": 794800
    },
    {
      "epoch": 2.7856108955689436,
      "grad_norm": 3.078125,
      "learning_rate": 3.970238141597286e-06,
      "loss": 0.7639,
      "step": 794810
    },
    {
      "epoch": 2.7856459430758393,
      "grad_norm": 2.734375,
      "learning_rate": 3.969589112933584e-06,
      "loss": 0.7762,
      "step": 794820
    },
    {
      "epoch": 2.785680990582735,
      "grad_norm": 2.78125,
      "learning_rate": 3.968940084269882e-06,
      "loss": 0.7145,
      "step": 794830
    },
    {
      "epoch": 2.7857160380896304,
      "grad_norm": 2.765625,
      "learning_rate": 3.96829105560618e-06,
      "loss": 0.8042,
      "step": 794840
    },
    {
      "epoch": 2.785751085596526,
      "grad_norm": 2.4375,
      "learning_rate": 3.967642026942478e-06,
      "loss": 0.734,
      "step": 794850
    },
    {
      "epoch": 2.7857861331034215,
      "grad_norm": 2.953125,
      "learning_rate": 3.966992998278776e-06,
      "loss": 0.8695,
      "step": 794860
    },
    {
      "epoch": 2.7858211806103172,
      "grad_norm": 2.6875,
      "learning_rate": 3.966343969615075e-06,
      "loss": 0.776,
      "step": 794870
    },
    {
      "epoch": 2.785856228117213,
      "grad_norm": 2.828125,
      "learning_rate": 3.965694940951372e-06,
      "loss": 0.778,
      "step": 794880
    },
    {
      "epoch": 2.7858912756241083,
      "grad_norm": 2.6875,
      "learning_rate": 3.96504591228767e-06,
      "loss": 0.8337,
      "step": 794890
    },
    {
      "epoch": 2.785926323131004,
      "grad_norm": 2.703125,
      "learning_rate": 3.964396883623969e-06,
      "loss": 0.8252,
      "step": 794900
    },
    {
      "epoch": 2.7859613706379,
      "grad_norm": 2.9375,
      "learning_rate": 3.963747854960267e-06,
      "loss": 0.8291,
      "step": 794910
    },
    {
      "epoch": 2.785996418144795,
      "grad_norm": 3.6875,
      "learning_rate": 3.963098826296565e-06,
      "loss": 0.8533,
      "step": 794920
    },
    {
      "epoch": 2.786031465651691,
      "grad_norm": 3.0625,
      "learning_rate": 3.962449797632863e-06,
      "loss": 0.8549,
      "step": 794930
    },
    {
      "epoch": 2.7860665131585867,
      "grad_norm": 3.0,
      "learning_rate": 3.961800768969161e-06,
      "loss": 0.7774,
      "step": 794940
    },
    {
      "epoch": 2.786101560665482,
      "grad_norm": 2.703125,
      "learning_rate": 3.961151740305459e-06,
      "loss": 0.8087,
      "step": 794950
    },
    {
      "epoch": 2.7861366081723777,
      "grad_norm": 2.734375,
      "learning_rate": 3.960502711641758e-06,
      "loss": 0.7569,
      "step": 794960
    },
    {
      "epoch": 2.786171655679273,
      "grad_norm": 3.21875,
      "learning_rate": 3.959853682978056e-06,
      "loss": 0.8928,
      "step": 794970
    },
    {
      "epoch": 2.786206703186169,
      "grad_norm": 2.96875,
      "learning_rate": 3.959204654314353e-06,
      "loss": 0.7825,
      "step": 794980
    },
    {
      "epoch": 2.7862417506930646,
      "grad_norm": 3.265625,
      "learning_rate": 3.958555625650652e-06,
      "loss": 0.8188,
      "step": 794990
    },
    {
      "epoch": 2.78627679819996,
      "grad_norm": 2.875,
      "learning_rate": 3.95790659698695e-06,
      "loss": 0.7464,
      "step": 795000
    },
    {
      "epoch": 2.78627679819996,
      "eval_loss": 0.7519378066062927,
      "eval_runtime": 551.6313,
      "eval_samples_per_second": 689.656,
      "eval_steps_per_second": 57.471,
      "step": 795000
    },
    {
      "epoch": 2.7863118457068556,
      "grad_norm": 2.53125,
      "learning_rate": 3.957257568323248e-06,
      "loss": 0.8214,
      "step": 795010
    },
    {
      "epoch": 2.7863468932137514,
      "grad_norm": 2.875,
      "learning_rate": 3.956608539659546e-06,
      "loss": 0.7736,
      "step": 795020
    },
    {
      "epoch": 2.7863819407206467,
      "grad_norm": 3.21875,
      "learning_rate": 3.955959510995844e-06,
      "loss": 0.7581,
      "step": 795030
    },
    {
      "epoch": 2.7864169882275425,
      "grad_norm": 3.09375,
      "learning_rate": 3.955310482332142e-06,
      "loss": 0.7514,
      "step": 795040
    },
    {
      "epoch": 2.7864520357344382,
      "grad_norm": 2.859375,
      "learning_rate": 3.95466145366844e-06,
      "loss": 0.8027,
      "step": 795050
    },
    {
      "epoch": 2.7864870832413335,
      "grad_norm": 3.171875,
      "learning_rate": 3.9540124250047386e-06,
      "loss": 0.7382,
      "step": 795060
    },
    {
      "epoch": 2.7865221307482293,
      "grad_norm": 3.515625,
      "learning_rate": 3.953363396341036e-06,
      "loss": 0.8276,
      "step": 795070
    },
    {
      "epoch": 2.7865571782551246,
      "grad_norm": 3.234375,
      "learning_rate": 3.9527143676773345e-06,
      "loss": 0.783,
      "step": 795080
    },
    {
      "epoch": 2.7865922257620204,
      "grad_norm": 2.296875,
      "learning_rate": 3.9520653390136325e-06,
      "loss": 0.798,
      "step": 795090
    },
    {
      "epoch": 2.786627273268916,
      "grad_norm": 3.421875,
      "learning_rate": 3.9514163103499305e-06,
      "loss": 0.8025,
      "step": 795100
    },
    {
      "epoch": 2.786662320775812,
      "grad_norm": 2.8125,
      "learning_rate": 3.9507672816862285e-06,
      "loss": 0.7336,
      "step": 795110
    },
    {
      "epoch": 2.786697368282707,
      "grad_norm": 2.609375,
      "learning_rate": 3.9501182530225265e-06,
      "loss": 0.7871,
      "step": 795120
    },
    {
      "epoch": 2.786732415789603,
      "grad_norm": 2.90625,
      "learning_rate": 3.9494692243588245e-06,
      "loss": 0.8471,
      "step": 795130
    },
    {
      "epoch": 2.7867674632964983,
      "grad_norm": 2.875,
      "learning_rate": 3.9488201956951225e-06,
      "loss": 0.7881,
      "step": 795140
    },
    {
      "epoch": 2.786802510803394,
      "grad_norm": 3.125,
      "learning_rate": 3.948171167031421e-06,
      "loss": 0.85,
      "step": 795150
    },
    {
      "epoch": 2.78683755831029,
      "grad_norm": 3.09375,
      "learning_rate": 3.947522138367719e-06,
      "loss": 0.8005,
      "step": 795160
    },
    {
      "epoch": 2.786872605817185,
      "grad_norm": 2.84375,
      "learning_rate": 3.9468731097040165e-06,
      "loss": 0.8015,
      "step": 795170
    },
    {
      "epoch": 2.786907653324081,
      "grad_norm": 2.828125,
      "learning_rate": 3.946224081040315e-06,
      "loss": 0.8431,
      "step": 795180
    },
    {
      "epoch": 2.786942700830976,
      "grad_norm": 2.53125,
      "learning_rate": 3.945575052376613e-06,
      "loss": 0.7402,
      "step": 795190
    },
    {
      "epoch": 2.786977748337872,
      "grad_norm": 2.828125,
      "learning_rate": 3.944926023712911e-06,
      "loss": 0.7757,
      "step": 795200
    },
    {
      "epoch": 2.7870127958447677,
      "grad_norm": 2.640625,
      "learning_rate": 3.944276995049209e-06,
      "loss": 0.7202,
      "step": 795210
    },
    {
      "epoch": 2.7870478433516634,
      "grad_norm": 2.4375,
      "learning_rate": 3.943627966385507e-06,
      "loss": 0.7725,
      "step": 795220
    },
    {
      "epoch": 2.7870828908585588,
      "grad_norm": 3.0625,
      "learning_rate": 3.942978937721805e-06,
      "loss": 0.7492,
      "step": 795230
    },
    {
      "epoch": 2.7871179383654545,
      "grad_norm": 3.109375,
      "learning_rate": 3.942329909058104e-06,
      "loss": 0.8801,
      "step": 795240
    },
    {
      "epoch": 2.78715298587235,
      "grad_norm": 3.015625,
      "learning_rate": 3.941680880394402e-06,
      "loss": 0.7678,
      "step": 795250
    },
    {
      "epoch": 2.7871880333792456,
      "grad_norm": 2.921875,
      "learning_rate": 3.941031851730699e-06,
      "loss": 0.854,
      "step": 795260
    },
    {
      "epoch": 2.7872230808861413,
      "grad_norm": 2.8125,
      "learning_rate": 3.940382823066998e-06,
      "loss": 0.7877,
      "step": 795270
    },
    {
      "epoch": 2.7872581283930367,
      "grad_norm": 3.25,
      "learning_rate": 3.939733794403296e-06,
      "loss": 0.9082,
      "step": 795280
    },
    {
      "epoch": 2.7872931758999324,
      "grad_norm": 2.84375,
      "learning_rate": 3.939084765739594e-06,
      "loss": 0.7753,
      "step": 795290
    },
    {
      "epoch": 2.7873282234068277,
      "grad_norm": 3.09375,
      "learning_rate": 3.938435737075893e-06,
      "loss": 0.7783,
      "step": 795300
    },
    {
      "epoch": 2.7873632709137235,
      "grad_norm": 3.15625,
      "learning_rate": 3.93778670841219e-06,
      "loss": 0.82,
      "step": 795310
    },
    {
      "epoch": 2.7873983184206192,
      "grad_norm": 2.59375,
      "learning_rate": 3.937137679748488e-06,
      "loss": 0.8406,
      "step": 795320
    },
    {
      "epoch": 2.787433365927515,
      "grad_norm": 2.546875,
      "learning_rate": 3.936488651084787e-06,
      "loss": 0.7919,
      "step": 795330
    },
    {
      "epoch": 2.7874684134344103,
      "grad_norm": 2.90625,
      "learning_rate": 3.935839622421085e-06,
      "loss": 0.7704,
      "step": 795340
    },
    {
      "epoch": 2.787503460941306,
      "grad_norm": 2.5625,
      "learning_rate": 3.935190593757382e-06,
      "loss": 0.822,
      "step": 795350
    },
    {
      "epoch": 2.7875385084482014,
      "grad_norm": 2.859375,
      "learning_rate": 3.934541565093681e-06,
      "loss": 0.7534,
      "step": 795360
    },
    {
      "epoch": 2.787573555955097,
      "grad_norm": 2.484375,
      "learning_rate": 3.933892536429979e-06,
      "loss": 0.7601,
      "step": 795370
    },
    {
      "epoch": 2.787608603461993,
      "grad_norm": 2.296875,
      "learning_rate": 3.933243507766277e-06,
      "loss": 0.7691,
      "step": 795380
    },
    {
      "epoch": 2.787643650968888,
      "grad_norm": 3.25,
      "learning_rate": 3.932594479102576e-06,
      "loss": 0.8038,
      "step": 795390
    },
    {
      "epoch": 2.787678698475784,
      "grad_norm": 3.078125,
      "learning_rate": 3.931945450438873e-06,
      "loss": 0.8043,
      "step": 795400
    },
    {
      "epoch": 2.7877137459826793,
      "grad_norm": 2.65625,
      "learning_rate": 3.931296421775171e-06,
      "loss": 0.8312,
      "step": 795410
    },
    {
      "epoch": 2.787748793489575,
      "grad_norm": 2.78125,
      "learning_rate": 3.93064739311147e-06,
      "loss": 0.7564,
      "step": 795420
    },
    {
      "epoch": 2.787783840996471,
      "grad_norm": 2.84375,
      "learning_rate": 3.929998364447768e-06,
      "loss": 0.7374,
      "step": 795430
    },
    {
      "epoch": 2.7878188885033666,
      "grad_norm": 2.71875,
      "learning_rate": 3.929349335784066e-06,
      "loss": 0.782,
      "step": 795440
    },
    {
      "epoch": 2.787853936010262,
      "grad_norm": 2.921875,
      "learning_rate": 3.928700307120364e-06,
      "loss": 0.7802,
      "step": 795450
    },
    {
      "epoch": 2.7878889835171576,
      "grad_norm": 2.859375,
      "learning_rate": 3.928051278456662e-06,
      "loss": 0.7488,
      "step": 795460
    },
    {
      "epoch": 2.787924031024053,
      "grad_norm": 3.078125,
      "learning_rate": 3.92740224979296e-06,
      "loss": 0.7454,
      "step": 795470
    },
    {
      "epoch": 2.7879590785309487,
      "grad_norm": 2.75,
      "learning_rate": 3.926753221129258e-06,
      "loss": 0.7487,
      "step": 795480
    },
    {
      "epoch": 2.7879941260378445,
      "grad_norm": 2.875,
      "learning_rate": 3.926104192465557e-06,
      "loss": 0.758,
      "step": 795490
    },
    {
      "epoch": 2.78802917354474,
      "grad_norm": 2.765625,
      "learning_rate": 3.925455163801854e-06,
      "loss": 0.7668,
      "step": 795500
    },
    {
      "epoch": 2.7880642210516355,
      "grad_norm": 2.953125,
      "learning_rate": 3.924806135138153e-06,
      "loss": 0.7717,
      "step": 795510
    },
    {
      "epoch": 2.788099268558531,
      "grad_norm": 2.84375,
      "learning_rate": 3.924157106474451e-06,
      "loss": 0.7582,
      "step": 795520
    },
    {
      "epoch": 2.7881343160654266,
      "grad_norm": 3.421875,
      "learning_rate": 3.923508077810749e-06,
      "loss": 0.8427,
      "step": 795530
    },
    {
      "epoch": 2.7881693635723224,
      "grad_norm": 2.828125,
      "learning_rate": 3.922859049147047e-06,
      "loss": 0.8609,
      "step": 795540
    },
    {
      "epoch": 2.788204411079218,
      "grad_norm": 2.375,
      "learning_rate": 3.922210020483345e-06,
      "loss": 0.7543,
      "step": 795550
    },
    {
      "epoch": 2.7882394585861134,
      "grad_norm": 2.703125,
      "learning_rate": 3.921560991819643e-06,
      "loss": 0.8207,
      "step": 795560
    },
    {
      "epoch": 2.788274506093009,
      "grad_norm": 2.890625,
      "learning_rate": 3.920911963155941e-06,
      "loss": 0.8559,
      "step": 795570
    },
    {
      "epoch": 2.7883095535999045,
      "grad_norm": 2.9375,
      "learning_rate": 3.92026293449224e-06,
      "loss": 0.7736,
      "step": 795580
    },
    {
      "epoch": 2.7883446011068003,
      "grad_norm": 2.953125,
      "learning_rate": 3.919613905828537e-06,
      "loss": 0.7913,
      "step": 795590
    },
    {
      "epoch": 2.788379648613696,
      "grad_norm": 3.109375,
      "learning_rate": 3.918964877164835e-06,
      "loss": 0.862,
      "step": 795600
    },
    {
      "epoch": 2.7884146961205913,
      "grad_norm": 2.953125,
      "learning_rate": 3.918315848501134e-06,
      "loss": 0.8417,
      "step": 795610
    },
    {
      "epoch": 2.788449743627487,
      "grad_norm": 3.078125,
      "learning_rate": 3.917666819837432e-06,
      "loss": 0.7198,
      "step": 795620
    },
    {
      "epoch": 2.7884847911343824,
      "grad_norm": 2.90625,
      "learning_rate": 3.91701779117373e-06,
      "loss": 0.8174,
      "step": 795630
    },
    {
      "epoch": 2.788519838641278,
      "grad_norm": 2.125,
      "learning_rate": 3.916368762510028e-06,
      "loss": 0.8323,
      "step": 795640
    },
    {
      "epoch": 2.788554886148174,
      "grad_norm": 3.15625,
      "learning_rate": 3.915719733846326e-06,
      "loss": 0.7897,
      "step": 795650
    },
    {
      "epoch": 2.7885899336550697,
      "grad_norm": 2.875,
      "learning_rate": 3.915070705182624e-06,
      "loss": 0.8545,
      "step": 795660
    },
    {
      "epoch": 2.788624981161965,
      "grad_norm": 3.390625,
      "learning_rate": 3.9144216765189225e-06,
      "loss": 0.8206,
      "step": 795670
    },
    {
      "epoch": 2.7886600286688608,
      "grad_norm": 3.0625,
      "learning_rate": 3.91377264785522e-06,
      "loss": 0.7967,
      "step": 795680
    },
    {
      "epoch": 2.788695076175756,
      "grad_norm": 3.359375,
      "learning_rate": 3.913123619191518e-06,
      "loss": 0.7995,
      "step": 795690
    },
    {
      "epoch": 2.788730123682652,
      "grad_norm": 2.625,
      "learning_rate": 3.9124745905278165e-06,
      "loss": 0.8002,
      "step": 795700
    },
    {
      "epoch": 2.7887651711895476,
      "grad_norm": 2.96875,
      "learning_rate": 3.9118255618641145e-06,
      "loss": 0.8018,
      "step": 795710
    },
    {
      "epoch": 2.788800218696443,
      "grad_norm": 2.296875,
      "learning_rate": 3.9111765332004125e-06,
      "loss": 0.7573,
      "step": 795720
    },
    {
      "epoch": 2.7888352662033387,
      "grad_norm": 3.046875,
      "learning_rate": 3.9105275045367105e-06,
      "loss": 0.7512,
      "step": 795730
    },
    {
      "epoch": 2.788870313710234,
      "grad_norm": 2.78125,
      "learning_rate": 3.9098784758730085e-06,
      "loss": 0.8221,
      "step": 795740
    },
    {
      "epoch": 2.7889053612171297,
      "grad_norm": 2.796875,
      "learning_rate": 3.9092294472093065e-06,
      "loss": 0.7744,
      "step": 795750
    },
    {
      "epoch": 2.7889404087240255,
      "grad_norm": 2.8125,
      "learning_rate": 3.908580418545605e-06,
      "loss": 0.7554,
      "step": 795760
    },
    {
      "epoch": 2.7889754562309212,
      "grad_norm": 2.703125,
      "learning_rate": 3.907931389881903e-06,
      "loss": 0.7751,
      "step": 795770
    },
    {
      "epoch": 2.7890105037378166,
      "grad_norm": 2.4375,
      "learning_rate": 3.9072823612182005e-06,
      "loss": 0.7781,
      "step": 795780
    },
    {
      "epoch": 2.7890455512447123,
      "grad_norm": 2.6875,
      "learning_rate": 3.906633332554499e-06,
      "loss": 0.7765,
      "step": 795790
    },
    {
      "epoch": 2.7890805987516076,
      "grad_norm": 3.359375,
      "learning_rate": 3.905984303890797e-06,
      "loss": 0.7626,
      "step": 795800
    },
    {
      "epoch": 2.7891156462585034,
      "grad_norm": 2.796875,
      "learning_rate": 3.905335275227095e-06,
      "loss": 0.802,
      "step": 795810
    },
    {
      "epoch": 2.789150693765399,
      "grad_norm": 3.0,
      "learning_rate": 3.904686246563393e-06,
      "loss": 0.7938,
      "step": 795820
    },
    {
      "epoch": 2.7891857412722945,
      "grad_norm": 3.21875,
      "learning_rate": 3.904037217899691e-06,
      "loss": 0.7599,
      "step": 795830
    },
    {
      "epoch": 2.78922078877919,
      "grad_norm": 3.09375,
      "learning_rate": 3.903388189235989e-06,
      "loss": 0.7705,
      "step": 795840
    },
    {
      "epoch": 2.7892558362860855,
      "grad_norm": 3.453125,
      "learning_rate": 3.902739160572288e-06,
      "loss": 0.8228,
      "step": 795850
    },
    {
      "epoch": 2.7892908837929813,
      "grad_norm": 3.171875,
      "learning_rate": 3.902090131908586e-06,
      "loss": 0.8236,
      "step": 795860
    },
    {
      "epoch": 2.789325931299877,
      "grad_norm": 3.546875,
      "learning_rate": 3.901441103244883e-06,
      "loss": 0.7714,
      "step": 795870
    },
    {
      "epoch": 2.789360978806773,
      "grad_norm": 2.96875,
      "learning_rate": 3.900792074581182e-06,
      "loss": 0.73,
      "step": 795880
    },
    {
      "epoch": 2.789396026313668,
      "grad_norm": 2.875,
      "learning_rate": 3.90014304591748e-06,
      "loss": 0.8375,
      "step": 795890
    },
    {
      "epoch": 2.789431073820564,
      "grad_norm": 2.65625,
      "learning_rate": 3.899494017253778e-06,
      "loss": 0.771,
      "step": 795900
    },
    {
      "epoch": 2.789466121327459,
      "grad_norm": 2.578125,
      "learning_rate": 3.898844988590076e-06,
      "loss": 0.8472,
      "step": 795910
    },
    {
      "epoch": 2.789501168834355,
      "grad_norm": 2.9375,
      "learning_rate": 3.898195959926374e-06,
      "loss": 0.7912,
      "step": 795920
    },
    {
      "epoch": 2.7895362163412507,
      "grad_norm": 3.0625,
      "learning_rate": 3.897546931262672e-06,
      "loss": 0.7927,
      "step": 795930
    },
    {
      "epoch": 2.789571263848146,
      "grad_norm": 3.515625,
      "learning_rate": 3.896897902598971e-06,
      "loss": 0.8052,
      "step": 795940
    },
    {
      "epoch": 2.789606311355042,
      "grad_norm": 2.78125,
      "learning_rate": 3.896248873935269e-06,
      "loss": 0.8196,
      "step": 795950
    },
    {
      "epoch": 2.789641358861937,
      "grad_norm": 2.609375,
      "learning_rate": 3.895599845271567e-06,
      "loss": 0.766,
      "step": 795960
    },
    {
      "epoch": 2.789676406368833,
      "grad_norm": 3.34375,
      "learning_rate": 3.894950816607865e-06,
      "loss": 0.7826,
      "step": 795970
    },
    {
      "epoch": 2.7897114538757286,
      "grad_norm": 3.0,
      "learning_rate": 3.894301787944163e-06,
      "loss": 0.8282,
      "step": 795980
    },
    {
      "epoch": 2.7897465013826244,
      "grad_norm": 3.171875,
      "learning_rate": 3.893652759280461e-06,
      "loss": 0.7633,
      "step": 795990
    },
    {
      "epoch": 2.7897815488895197,
      "grad_norm": 3.09375,
      "learning_rate": 3.893003730616759e-06,
      "loss": 0.761,
      "step": 796000
    },
    {
      "epoch": 2.7898165963964154,
      "grad_norm": 2.703125,
      "learning_rate": 3.892354701953057e-06,
      "loss": 0.7092,
      "step": 796010
    },
    {
      "epoch": 2.7898516439033108,
      "grad_norm": 2.765625,
      "learning_rate": 3.891705673289355e-06,
      "loss": 0.7628,
      "step": 796020
    },
    {
      "epoch": 2.7898866914102065,
      "grad_norm": 2.765625,
      "learning_rate": 3.891056644625653e-06,
      "loss": 0.8105,
      "step": 796030
    },
    {
      "epoch": 2.7899217389171023,
      "grad_norm": 2.90625,
      "learning_rate": 3.890407615961952e-06,
      "loss": 0.8768,
      "step": 796040
    },
    {
      "epoch": 2.7899567864239976,
      "grad_norm": 3.015625,
      "learning_rate": 3.88975858729825e-06,
      "loss": 0.7555,
      "step": 796050
    },
    {
      "epoch": 2.7899918339308933,
      "grad_norm": 3.28125,
      "learning_rate": 3.889109558634548e-06,
      "loss": 0.7393,
      "step": 796060
    },
    {
      "epoch": 2.7900268814377887,
      "grad_norm": 3.125,
      "learning_rate": 3.888460529970846e-06,
      "loss": 0.7765,
      "step": 796070
    },
    {
      "epoch": 2.7900619289446844,
      "grad_norm": 2.796875,
      "learning_rate": 3.887811501307144e-06,
      "loss": 0.7535,
      "step": 796080
    },
    {
      "epoch": 2.79009697645158,
      "grad_norm": 2.9375,
      "learning_rate": 3.887162472643442e-06,
      "loss": 0.8045,
      "step": 796090
    },
    {
      "epoch": 2.790132023958476,
      "grad_norm": 3.328125,
      "learning_rate": 3.886513443979741e-06,
      "loss": 0.7655,
      "step": 796100
    },
    {
      "epoch": 2.7901670714653712,
      "grad_norm": 3.3125,
      "learning_rate": 3.885864415316038e-06,
      "loss": 0.8603,
      "step": 796110
    },
    {
      "epoch": 2.790202118972267,
      "grad_norm": 3.328125,
      "learning_rate": 3.885215386652336e-06,
      "loss": 0.8156,
      "step": 796120
    },
    {
      "epoch": 2.7902371664791623,
      "grad_norm": 2.453125,
      "learning_rate": 3.884566357988635e-06,
      "loss": 0.8141,
      "step": 796130
    },
    {
      "epoch": 2.790272213986058,
      "grad_norm": 2.203125,
      "learning_rate": 3.883917329324933e-06,
      "loss": 0.7875,
      "step": 796140
    },
    {
      "epoch": 2.790307261492954,
      "grad_norm": 2.46875,
      "learning_rate": 3.88326830066123e-06,
      "loss": 0.7526,
      "step": 796150
    },
    {
      "epoch": 2.790342308999849,
      "grad_norm": 2.984375,
      "learning_rate": 3.882619271997529e-06,
      "loss": 0.7932,
      "step": 796160
    },
    {
      "epoch": 2.790377356506745,
      "grad_norm": 3.015625,
      "learning_rate": 3.881970243333827e-06,
      "loss": 0.7481,
      "step": 796170
    },
    {
      "epoch": 2.79041240401364,
      "grad_norm": 2.859375,
      "learning_rate": 3.881321214670125e-06,
      "loss": 0.81,
      "step": 796180
    },
    {
      "epoch": 2.790447451520536,
      "grad_norm": 2.8125,
      "learning_rate": 3.8806721860064235e-06,
      "loss": 0.8103,
      "step": 796190
    },
    {
      "epoch": 2.7904824990274317,
      "grad_norm": 2.75,
      "learning_rate": 3.880023157342721e-06,
      "loss": 0.7972,
      "step": 796200
    },
    {
      "epoch": 2.7905175465343275,
      "grad_norm": 2.828125,
      "learning_rate": 3.879374128679019e-06,
      "loss": 0.8011,
      "step": 796210
    },
    {
      "epoch": 2.790552594041223,
      "grad_norm": 3.015625,
      "learning_rate": 3.8787251000153175e-06,
      "loss": 0.783,
      "step": 796220
    },
    {
      "epoch": 2.7905876415481186,
      "grad_norm": 2.96875,
      "learning_rate": 3.8780760713516155e-06,
      "loss": 0.753,
      "step": 796230
    },
    {
      "epoch": 2.790622689055014,
      "grad_norm": 2.984375,
      "learning_rate": 3.8774270426879135e-06,
      "loss": 0.7348,
      "step": 796240
    },
    {
      "epoch": 2.7906577365619096,
      "grad_norm": 3.046875,
      "learning_rate": 3.8767780140242115e-06,
      "loss": 0.7365,
      "step": 796250
    },
    {
      "epoch": 2.7906927840688054,
      "grad_norm": 2.984375,
      "learning_rate": 3.8761289853605095e-06,
      "loss": 0.7478,
      "step": 796260
    },
    {
      "epoch": 2.7907278315757007,
      "grad_norm": 2.78125,
      "learning_rate": 3.8754799566968075e-06,
      "loss": 0.7036,
      "step": 796270
    },
    {
      "epoch": 2.7907628790825965,
      "grad_norm": 2.53125,
      "learning_rate": 3.874830928033106e-06,
      "loss": 0.7904,
      "step": 796280
    },
    {
      "epoch": 2.790797926589492,
      "grad_norm": 3.203125,
      "learning_rate": 3.8741818993694035e-06,
      "loss": 0.8771,
      "step": 796290
    },
    {
      "epoch": 2.7908329740963875,
      "grad_norm": 3.015625,
      "learning_rate": 3.8735328707057015e-06,
      "loss": 0.867,
      "step": 796300
    },
    {
      "epoch": 2.7908680216032833,
      "grad_norm": 3.015625,
      "learning_rate": 3.872883842042e-06,
      "loss": 0.7699,
      "step": 796310
    },
    {
      "epoch": 2.790903069110179,
      "grad_norm": 2.8125,
      "learning_rate": 3.872234813378298e-06,
      "loss": 0.8015,
      "step": 796320
    },
    {
      "epoch": 2.7909381166170744,
      "grad_norm": 2.46875,
      "learning_rate": 3.871585784714596e-06,
      "loss": 0.7267,
      "step": 796330
    },
    {
      "epoch": 2.79097316412397,
      "grad_norm": 3.171875,
      "learning_rate": 3.870936756050894e-06,
      "loss": 0.8209,
      "step": 796340
    },
    {
      "epoch": 2.7910082116308654,
      "grad_norm": 3.015625,
      "learning_rate": 3.870287727387192e-06,
      "loss": 0.768,
      "step": 796350
    },
    {
      "epoch": 2.791043259137761,
      "grad_norm": 2.828125,
      "learning_rate": 3.86963869872349e-06,
      "loss": 0.7962,
      "step": 796360
    },
    {
      "epoch": 2.791078306644657,
      "grad_norm": 3.078125,
      "learning_rate": 3.868989670059789e-06,
      "loss": 0.8146,
      "step": 796370
    },
    {
      "epoch": 2.7911133541515527,
      "grad_norm": 2.640625,
      "learning_rate": 3.868340641396087e-06,
      "loss": 0.8405,
      "step": 796380
    },
    {
      "epoch": 2.791148401658448,
      "grad_norm": 2.953125,
      "learning_rate": 3.867691612732384e-06,
      "loss": 0.7683,
      "step": 796390
    },
    {
      "epoch": 2.7911834491653438,
      "grad_norm": 2.890625,
      "learning_rate": 3.867042584068683e-06,
      "loss": 0.7922,
      "step": 796400
    },
    {
      "epoch": 2.791218496672239,
      "grad_norm": 3.0,
      "learning_rate": 3.866393555404981e-06,
      "loss": 0.7418,
      "step": 796410
    },
    {
      "epoch": 2.791253544179135,
      "grad_norm": 2.625,
      "learning_rate": 3.865744526741279e-06,
      "loss": 0.7648,
      "step": 796420
    },
    {
      "epoch": 2.7912885916860306,
      "grad_norm": 2.921875,
      "learning_rate": 3.865095498077577e-06,
      "loss": 0.7598,
      "step": 796430
    },
    {
      "epoch": 2.791323639192926,
      "grad_norm": 2.734375,
      "learning_rate": 3.864446469413875e-06,
      "loss": 0.851,
      "step": 796440
    },
    {
      "epoch": 2.7913586866998217,
      "grad_norm": 3.15625,
      "learning_rate": 3.863797440750173e-06,
      "loss": 0.7568,
      "step": 796450
    },
    {
      "epoch": 2.791393734206717,
      "grad_norm": 3.4375,
      "learning_rate": 3.863148412086471e-06,
      "loss": 0.8276,
      "step": 796460
    },
    {
      "epoch": 2.7914287817136128,
      "grad_norm": 2.359375,
      "learning_rate": 3.86249938342277e-06,
      "loss": 0.7654,
      "step": 796470
    },
    {
      "epoch": 2.7914638292205085,
      "grad_norm": 2.59375,
      "learning_rate": 3.861850354759067e-06,
      "loss": 0.7492,
      "step": 796480
    },
    {
      "epoch": 2.7914988767274043,
      "grad_norm": 2.765625,
      "learning_rate": 3.861201326095366e-06,
      "loss": 0.7954,
      "step": 796490
    },
    {
      "epoch": 2.7915339242342996,
      "grad_norm": 3.390625,
      "learning_rate": 3.860552297431664e-06,
      "loss": 0.789,
      "step": 796500
    },
    {
      "epoch": 2.7915689717411953,
      "grad_norm": 2.984375,
      "learning_rate": 3.859903268767962e-06,
      "loss": 0.8189,
      "step": 796510
    },
    {
      "epoch": 2.7916040192480907,
      "grad_norm": 2.421875,
      "learning_rate": 3.85925424010426e-06,
      "loss": 0.7409,
      "step": 796520
    },
    {
      "epoch": 2.7916390667549864,
      "grad_norm": 3.0,
      "learning_rate": 3.858605211440558e-06,
      "loss": 0.8724,
      "step": 796530
    },
    {
      "epoch": 2.791674114261882,
      "grad_norm": 2.765625,
      "learning_rate": 3.857956182776856e-06,
      "loss": 0.8309,
      "step": 796540
    },
    {
      "epoch": 2.7917091617687775,
      "grad_norm": 2.8125,
      "learning_rate": 3.857307154113154e-06,
      "loss": 0.7733,
      "step": 796550
    },
    {
      "epoch": 2.7917442092756732,
      "grad_norm": 3.234375,
      "learning_rate": 3.856658125449453e-06,
      "loss": 0.8962,
      "step": 796560
    },
    {
      "epoch": 2.7917792567825686,
      "grad_norm": 2.9375,
      "learning_rate": 3.856009096785751e-06,
      "loss": 0.8211,
      "step": 796570
    },
    {
      "epoch": 2.7918143042894643,
      "grad_norm": 3.234375,
      "learning_rate": 3.855360068122048e-06,
      "loss": 0.8237,
      "step": 796580
    },
    {
      "epoch": 2.79184935179636,
      "grad_norm": 2.65625,
      "learning_rate": 3.854711039458347e-06,
      "loss": 0.8248,
      "step": 796590
    },
    {
      "epoch": 2.791884399303256,
      "grad_norm": 2.859375,
      "learning_rate": 3.854062010794645e-06,
      "loss": 0.7708,
      "step": 796600
    },
    {
      "epoch": 2.791919446810151,
      "grad_norm": 2.890625,
      "learning_rate": 3.853412982130943e-06,
      "loss": 0.8121,
      "step": 796610
    },
    {
      "epoch": 2.791954494317047,
      "grad_norm": 2.609375,
      "learning_rate": 3.852763953467241e-06,
      "loss": 0.7446,
      "step": 796620
    },
    {
      "epoch": 2.791989541823942,
      "grad_norm": 2.6875,
      "learning_rate": 3.852114924803539e-06,
      "loss": 0.7594,
      "step": 796630
    },
    {
      "epoch": 2.792024589330838,
      "grad_norm": 2.609375,
      "learning_rate": 3.851465896139837e-06,
      "loss": 0.7211,
      "step": 796640
    },
    {
      "epoch": 2.7920596368377337,
      "grad_norm": 2.578125,
      "learning_rate": 3.850816867476136e-06,
      "loss": 0.8491,
      "step": 796650
    },
    {
      "epoch": 2.792094684344629,
      "grad_norm": 2.9375,
      "learning_rate": 3.850167838812434e-06,
      "loss": 0.8034,
      "step": 796660
    },
    {
      "epoch": 2.792129731851525,
      "grad_norm": 3.109375,
      "learning_rate": 3.849518810148731e-06,
      "loss": 0.7758,
      "step": 796670
    },
    {
      "epoch": 2.79216477935842,
      "grad_norm": 3.640625,
      "learning_rate": 3.84886978148503e-06,
      "loss": 0.8266,
      "step": 796680
    },
    {
      "epoch": 2.792199826865316,
      "grad_norm": 2.84375,
      "learning_rate": 3.848220752821328e-06,
      "loss": 0.8355,
      "step": 796690
    },
    {
      "epoch": 2.7922348743722116,
      "grad_norm": 3.0,
      "learning_rate": 3.847571724157626e-06,
      "loss": 0.7166,
      "step": 796700
    },
    {
      "epoch": 2.7922699218791074,
      "grad_norm": 2.90625,
      "learning_rate": 3.846922695493925e-06,
      "loss": 0.9028,
      "step": 796710
    },
    {
      "epoch": 2.7923049693860027,
      "grad_norm": 3.203125,
      "learning_rate": 3.846273666830222e-06,
      "loss": 0.7562,
      "step": 796720
    },
    {
      "epoch": 2.7923400168928985,
      "grad_norm": 2.515625,
      "learning_rate": 3.84562463816652e-06,
      "loss": 0.7162,
      "step": 796730
    },
    {
      "epoch": 2.7923750643997938,
      "grad_norm": 2.578125,
      "learning_rate": 3.844975609502819e-06,
      "loss": 0.8017,
      "step": 796740
    },
    {
      "epoch": 2.7924101119066895,
      "grad_norm": 2.453125,
      "learning_rate": 3.844326580839117e-06,
      "loss": 0.7611,
      "step": 796750
    },
    {
      "epoch": 2.7924451594135853,
      "grad_norm": 2.578125,
      "learning_rate": 3.843677552175414e-06,
      "loss": 0.8036,
      "step": 796760
    },
    {
      "epoch": 2.7924802069204806,
      "grad_norm": 2.671875,
      "learning_rate": 3.843028523511713e-06,
      "loss": 0.84,
      "step": 796770
    },
    {
      "epoch": 2.7925152544273764,
      "grad_norm": 2.875,
      "learning_rate": 3.842379494848011e-06,
      "loss": 0.8125,
      "step": 796780
    },
    {
      "epoch": 2.7925503019342717,
      "grad_norm": 3.34375,
      "learning_rate": 3.841730466184309e-06,
      "loss": 0.8135,
      "step": 796790
    },
    {
      "epoch": 2.7925853494411674,
      "grad_norm": 3.125,
      "learning_rate": 3.8410814375206075e-06,
      "loss": 0.7685,
      "step": 796800
    },
    {
      "epoch": 2.792620396948063,
      "grad_norm": 2.703125,
      "learning_rate": 3.840432408856905e-06,
      "loss": 0.7892,
      "step": 796810
    },
    {
      "epoch": 2.792655444454959,
      "grad_norm": 2.890625,
      "learning_rate": 3.839783380193203e-06,
      "loss": 0.8495,
      "step": 796820
    },
    {
      "epoch": 2.7926904919618543,
      "grad_norm": 2.71875,
      "learning_rate": 3.8391343515295015e-06,
      "loss": 0.8727,
      "step": 796830
    },
    {
      "epoch": 2.79272553946875,
      "grad_norm": 2.625,
      "learning_rate": 3.8384853228657995e-06,
      "loss": 0.8303,
      "step": 796840
    },
    {
      "epoch": 2.7927605869756453,
      "grad_norm": 2.71875,
      "learning_rate": 3.8378362942020975e-06,
      "loss": 0.8076,
      "step": 796850
    },
    {
      "epoch": 2.792795634482541,
      "grad_norm": 3.125,
      "learning_rate": 3.8371872655383955e-06,
      "loss": 0.753,
      "step": 796860
    },
    {
      "epoch": 2.792830681989437,
      "grad_norm": 2.65625,
      "learning_rate": 3.8365382368746935e-06,
      "loss": 0.85,
      "step": 796870
    },
    {
      "epoch": 2.792865729496332,
      "grad_norm": 2.6875,
      "learning_rate": 3.8358892082109915e-06,
      "loss": 0.7654,
      "step": 796880
    },
    {
      "epoch": 2.792900777003228,
      "grad_norm": 2.65625,
      "learning_rate": 3.83524017954729e-06,
      "loss": 0.7678,
      "step": 796890
    },
    {
      "epoch": 2.7929358245101232,
      "grad_norm": 2.515625,
      "learning_rate": 3.834591150883588e-06,
      "loss": 0.8102,
      "step": 796900
    },
    {
      "epoch": 2.792970872017019,
      "grad_norm": 2.765625,
      "learning_rate": 3.8339421222198855e-06,
      "loss": 0.773,
      "step": 796910
    },
    {
      "epoch": 2.7930059195239147,
      "grad_norm": 3.421875,
      "learning_rate": 3.833293093556184e-06,
      "loss": 0.8032,
      "step": 796920
    },
    {
      "epoch": 2.7930409670308105,
      "grad_norm": 2.90625,
      "learning_rate": 3.832644064892482e-06,
      "loss": 0.8609,
      "step": 796930
    },
    {
      "epoch": 2.793076014537706,
      "grad_norm": 2.828125,
      "learning_rate": 3.83199503622878e-06,
      "loss": 0.86,
      "step": 796940
    },
    {
      "epoch": 2.7931110620446016,
      "grad_norm": 3.3125,
      "learning_rate": 3.831346007565078e-06,
      "loss": 0.8276,
      "step": 796950
    },
    {
      "epoch": 2.793146109551497,
      "grad_norm": 2.8125,
      "learning_rate": 3.830696978901376e-06,
      "loss": 0.8557,
      "step": 796960
    },
    {
      "epoch": 2.7931811570583926,
      "grad_norm": 2.921875,
      "learning_rate": 3.830047950237674e-06,
      "loss": 0.7904,
      "step": 796970
    },
    {
      "epoch": 2.7932162045652884,
      "grad_norm": 2.6875,
      "learning_rate": 3.829398921573972e-06,
      "loss": 0.7475,
      "step": 796980
    },
    {
      "epoch": 2.7932512520721837,
      "grad_norm": 3.015625,
      "learning_rate": 3.828749892910271e-06,
      "loss": 0.8304,
      "step": 796990
    },
    {
      "epoch": 2.7932862995790795,
      "grad_norm": 3.15625,
      "learning_rate": 3.828100864246568e-06,
      "loss": 0.7775,
      "step": 797000
    },
    {
      "epoch": 2.793321347085975,
      "grad_norm": 2.828125,
      "learning_rate": 3.827451835582866e-06,
      "loss": 0.8471,
      "step": 797010
    },
    {
      "epoch": 2.7933563945928706,
      "grad_norm": 3.046875,
      "learning_rate": 3.826802806919165e-06,
      "loss": 0.8536,
      "step": 797020
    },
    {
      "epoch": 2.7933914420997663,
      "grad_norm": 3.0,
      "learning_rate": 3.826153778255463e-06,
      "loss": 0.7942,
      "step": 797030
    },
    {
      "epoch": 2.793426489606662,
      "grad_norm": 2.890625,
      "learning_rate": 3.825504749591761e-06,
      "loss": 0.9449,
      "step": 797040
    },
    {
      "epoch": 2.7934615371135574,
      "grad_norm": 2.75,
      "learning_rate": 3.824855720928059e-06,
      "loss": 0.7004,
      "step": 797050
    },
    {
      "epoch": 2.793496584620453,
      "grad_norm": 2.875,
      "learning_rate": 3.824206692264357e-06,
      "loss": 0.7763,
      "step": 797060
    },
    {
      "epoch": 2.7935316321273485,
      "grad_norm": 3.078125,
      "learning_rate": 3.823557663600655e-06,
      "loss": 0.7884,
      "step": 797070
    },
    {
      "epoch": 2.793566679634244,
      "grad_norm": 3.21875,
      "learning_rate": 3.822908634936954e-06,
      "loss": 0.8454,
      "step": 797080
    },
    {
      "epoch": 2.79360172714114,
      "grad_norm": 2.34375,
      "learning_rate": 3.822259606273251e-06,
      "loss": 0.7959,
      "step": 797090
    },
    {
      "epoch": 2.7936367746480353,
      "grad_norm": 2.5,
      "learning_rate": 3.821610577609549e-06,
      "loss": 0.7499,
      "step": 797100
    },
    {
      "epoch": 2.793671822154931,
      "grad_norm": 2.78125,
      "learning_rate": 3.820961548945848e-06,
      "loss": 0.7978,
      "step": 797110
    },
    {
      "epoch": 2.7937068696618264,
      "grad_norm": 3.828125,
      "learning_rate": 3.820312520282146e-06,
      "loss": 0.8674,
      "step": 797120
    },
    {
      "epoch": 2.793741917168722,
      "grad_norm": 2.828125,
      "learning_rate": 3.819663491618444e-06,
      "loss": 0.8354,
      "step": 797130
    },
    {
      "epoch": 2.793776964675618,
      "grad_norm": 2.984375,
      "learning_rate": 3.819014462954742e-06,
      "loss": 0.7744,
      "step": 797140
    },
    {
      "epoch": 2.7938120121825136,
      "grad_norm": 2.71875,
      "learning_rate": 3.81836543429104e-06,
      "loss": 0.7157,
      "step": 797150
    },
    {
      "epoch": 2.793847059689409,
      "grad_norm": 2.8125,
      "learning_rate": 3.817716405627338e-06,
      "loss": 0.7491,
      "step": 797160
    },
    {
      "epoch": 2.7938821071963047,
      "grad_norm": 3.046875,
      "learning_rate": 3.817067376963637e-06,
      "loss": 0.8118,
      "step": 797170
    },
    {
      "epoch": 2.7939171547032,
      "grad_norm": 3.125,
      "learning_rate": 3.816418348299935e-06,
      "loss": 0.8191,
      "step": 797180
    },
    {
      "epoch": 2.7939522022100958,
      "grad_norm": 2.984375,
      "learning_rate": 3.815769319636232e-06,
      "loss": 0.7131,
      "step": 797190
    },
    {
      "epoch": 2.7939872497169915,
      "grad_norm": 3.0,
      "learning_rate": 3.815120290972531e-06,
      "loss": 0.8038,
      "step": 797200
    },
    {
      "epoch": 2.794022297223887,
      "grad_norm": 2.9375,
      "learning_rate": 3.814471262308829e-06,
      "loss": 0.858,
      "step": 797210
    },
    {
      "epoch": 2.7940573447307826,
      "grad_norm": 3.25,
      "learning_rate": 3.8138222336451273e-06,
      "loss": 0.8448,
      "step": 797220
    },
    {
      "epoch": 2.794092392237678,
      "grad_norm": 2.828125,
      "learning_rate": 3.8131732049814253e-06,
      "loss": 0.754,
      "step": 797230
    },
    {
      "epoch": 2.7941274397445737,
      "grad_norm": 3.046875,
      "learning_rate": 3.812524176317723e-06,
      "loss": 0.8063,
      "step": 797240
    },
    {
      "epoch": 2.7941624872514694,
      "grad_norm": 2.953125,
      "learning_rate": 3.8118751476540213e-06,
      "loss": 0.7455,
      "step": 797250
    },
    {
      "epoch": 2.794197534758365,
      "grad_norm": 2.765625,
      "learning_rate": 3.8112261189903193e-06,
      "loss": 0.6999,
      "step": 797260
    },
    {
      "epoch": 2.7942325822652605,
      "grad_norm": 2.953125,
      "learning_rate": 3.8105770903266177e-06,
      "loss": 0.7673,
      "step": 797270
    },
    {
      "epoch": 2.7942676297721563,
      "grad_norm": 3.0625,
      "learning_rate": 3.8099280616629153e-06,
      "loss": 0.7734,
      "step": 797280
    },
    {
      "epoch": 2.7943026772790516,
      "grad_norm": 2.9375,
      "learning_rate": 3.8092790329992133e-06,
      "loss": 0.7347,
      "step": 797290
    },
    {
      "epoch": 2.7943377247859473,
      "grad_norm": 2.96875,
      "learning_rate": 3.8086300043355117e-06,
      "loss": 0.8206,
      "step": 797300
    },
    {
      "epoch": 2.794372772292843,
      "grad_norm": 2.8125,
      "learning_rate": 3.8079809756718097e-06,
      "loss": 0.7244,
      "step": 797310
    },
    {
      "epoch": 2.7944078197997384,
      "grad_norm": 3.046875,
      "learning_rate": 3.807331947008108e-06,
      "loss": 0.8395,
      "step": 797320
    },
    {
      "epoch": 2.794442867306634,
      "grad_norm": 3.46875,
      "learning_rate": 3.8066829183444057e-06,
      "loss": 0.7584,
      "step": 797330
    },
    {
      "epoch": 2.7944779148135295,
      "grad_norm": 2.84375,
      "learning_rate": 3.806033889680704e-06,
      "loss": 0.7429,
      "step": 797340
    },
    {
      "epoch": 2.7945129623204252,
      "grad_norm": 3.265625,
      "learning_rate": 3.805384861017002e-06,
      "loss": 0.8094,
      "step": 797350
    },
    {
      "epoch": 2.794548009827321,
      "grad_norm": 2.71875,
      "learning_rate": 3.8047358323533005e-06,
      "loss": 0.8498,
      "step": 797360
    },
    {
      "epoch": 2.7945830573342167,
      "grad_norm": 2.953125,
      "learning_rate": 3.8040868036895985e-06,
      "loss": 0.796,
      "step": 797370
    },
    {
      "epoch": 2.794618104841112,
      "grad_norm": 3.078125,
      "learning_rate": 3.803437775025896e-06,
      "loss": 0.7857,
      "step": 797380
    },
    {
      "epoch": 2.794653152348008,
      "grad_norm": 3.046875,
      "learning_rate": 3.8027887463621945e-06,
      "loss": 0.9319,
      "step": 797390
    },
    {
      "epoch": 2.794688199854903,
      "grad_norm": 3.34375,
      "learning_rate": 3.8021397176984925e-06,
      "loss": 0.7854,
      "step": 797400
    },
    {
      "epoch": 2.794723247361799,
      "grad_norm": 3.015625,
      "learning_rate": 3.801490689034791e-06,
      "loss": 0.7794,
      "step": 797410
    },
    {
      "epoch": 2.7947582948686946,
      "grad_norm": 3.078125,
      "learning_rate": 3.8008416603710885e-06,
      "loss": 0.7863,
      "step": 797420
    },
    {
      "epoch": 2.79479334237559,
      "grad_norm": 3.265625,
      "learning_rate": 3.8001926317073865e-06,
      "loss": 0.7915,
      "step": 797430
    },
    {
      "epoch": 2.7948283898824857,
      "grad_norm": 3.03125,
      "learning_rate": 3.799543603043685e-06,
      "loss": 0.7709,
      "step": 797440
    },
    {
      "epoch": 2.794863437389381,
      "grad_norm": 3.125,
      "learning_rate": 3.7988945743799834e-06,
      "loss": 0.7872,
      "step": 797450
    },
    {
      "epoch": 2.794898484896277,
      "grad_norm": 3.34375,
      "learning_rate": 3.7982455457162814e-06,
      "loss": 0.8658,
      "step": 797460
    },
    {
      "epoch": 2.7949335324031725,
      "grad_norm": 3.203125,
      "learning_rate": 3.797596517052579e-06,
      "loss": 0.8956,
      "step": 797470
    },
    {
      "epoch": 2.7949685799100683,
      "grad_norm": 2.75,
      "learning_rate": 3.7969474883888774e-06,
      "loss": 0.7953,
      "step": 797480
    },
    {
      "epoch": 2.7950036274169636,
      "grad_norm": 2.578125,
      "learning_rate": 3.7962984597251754e-06,
      "loss": 0.7655,
      "step": 797490
    },
    {
      "epoch": 2.7950386749238594,
      "grad_norm": 2.546875,
      "learning_rate": 3.795649431061474e-06,
      "loss": 0.8797,
      "step": 797500
    },
    {
      "epoch": 2.7950737224307547,
      "grad_norm": 2.90625,
      "learning_rate": 3.795000402397772e-06,
      "loss": 0.7668,
      "step": 797510
    },
    {
      "epoch": 2.7951087699376505,
      "grad_norm": 3.109375,
      "learning_rate": 3.7943513737340694e-06,
      "loss": 0.7414,
      "step": 797520
    },
    {
      "epoch": 2.795143817444546,
      "grad_norm": 3.328125,
      "learning_rate": 3.793702345070368e-06,
      "loss": 0.8434,
      "step": 797530
    },
    {
      "epoch": 2.7951788649514415,
      "grad_norm": 2.6875,
      "learning_rate": 3.7930533164066662e-06,
      "loss": 0.7082,
      "step": 797540
    },
    {
      "epoch": 2.7952139124583373,
      "grad_norm": 2.921875,
      "learning_rate": 3.7924042877429642e-06,
      "loss": 0.7957,
      "step": 797550
    },
    {
      "epoch": 2.795248959965233,
      "grad_norm": 2.484375,
      "learning_rate": 3.791755259079262e-06,
      "loss": 0.8178,
      "step": 797560
    },
    {
      "epoch": 2.7952840074721284,
      "grad_norm": 2.65625,
      "learning_rate": 3.7911062304155602e-06,
      "loss": 0.8037,
      "step": 797570
    },
    {
      "epoch": 2.795319054979024,
      "grad_norm": 2.703125,
      "learning_rate": 3.7904572017518582e-06,
      "loss": 0.7636,
      "step": 797580
    },
    {
      "epoch": 2.79535410248592,
      "grad_norm": 2.859375,
      "learning_rate": 3.7898081730881567e-06,
      "loss": 0.8118,
      "step": 797590
    },
    {
      "epoch": 2.795389149992815,
      "grad_norm": 2.6875,
      "learning_rate": 3.7891591444244547e-06,
      "loss": 0.8097,
      "step": 797600
    },
    {
      "epoch": 2.795424197499711,
      "grad_norm": 2.9375,
      "learning_rate": 3.7885101157607522e-06,
      "loss": 0.7306,
      "step": 797610
    },
    {
      "epoch": 2.7954592450066063,
      "grad_norm": 2.71875,
      "learning_rate": 3.7878610870970507e-06,
      "loss": 0.8446,
      "step": 797620
    },
    {
      "epoch": 2.795494292513502,
      "grad_norm": 2.921875,
      "learning_rate": 3.7872120584333487e-06,
      "loss": 0.7285,
      "step": 797630
    },
    {
      "epoch": 2.7955293400203978,
      "grad_norm": 2.671875,
      "learning_rate": 3.786563029769647e-06,
      "loss": 0.7807,
      "step": 797640
    },
    {
      "epoch": 2.795564387527293,
      "grad_norm": 3.09375,
      "learning_rate": 3.7859140011059455e-06,
      "loss": 0.8452,
      "step": 797650
    },
    {
      "epoch": 2.795599435034189,
      "grad_norm": 2.796875,
      "learning_rate": 3.7852649724422427e-06,
      "loss": 0.7921,
      "step": 797660
    },
    {
      "epoch": 2.7956344825410846,
      "grad_norm": 3.140625,
      "learning_rate": 3.784615943778541e-06,
      "loss": 0.7847,
      "step": 797670
    },
    {
      "epoch": 2.79566953004798,
      "grad_norm": 2.859375,
      "learning_rate": 3.7839669151148395e-06,
      "loss": 0.7912,
      "step": 797680
    },
    {
      "epoch": 2.7957045775548757,
      "grad_norm": 2.515625,
      "learning_rate": 3.7833178864511375e-06,
      "loss": 0.7781,
      "step": 797690
    },
    {
      "epoch": 2.7957396250617714,
      "grad_norm": 2.8125,
      "learning_rate": 3.782668857787436e-06,
      "loss": 0.8042,
      "step": 797700
    },
    {
      "epoch": 2.7957746725686667,
      "grad_norm": 2.8125,
      "learning_rate": 3.7820198291237335e-06,
      "loss": 0.8374,
      "step": 797710
    },
    {
      "epoch": 2.7958097200755625,
      "grad_norm": 3.015625,
      "learning_rate": 3.7813708004600315e-06,
      "loss": 0.7745,
      "step": 797720
    },
    {
      "epoch": 2.795844767582458,
      "grad_norm": 2.546875,
      "learning_rate": 3.78072177179633e-06,
      "loss": 0.8577,
      "step": 797730
    },
    {
      "epoch": 2.7958798150893536,
      "grad_norm": 2.953125,
      "learning_rate": 3.780072743132628e-06,
      "loss": 0.7896,
      "step": 797740
    },
    {
      "epoch": 2.7959148625962493,
      "grad_norm": 2.921875,
      "learning_rate": 3.7794237144689255e-06,
      "loss": 0.7509,
      "step": 797750
    },
    {
      "epoch": 2.795949910103145,
      "grad_norm": 3.09375,
      "learning_rate": 3.778774685805224e-06,
      "loss": 0.8172,
      "step": 797760
    },
    {
      "epoch": 2.7959849576100404,
      "grad_norm": 3.421875,
      "learning_rate": 3.7781256571415224e-06,
      "loss": 0.8423,
      "step": 797770
    },
    {
      "epoch": 2.796020005116936,
      "grad_norm": 2.65625,
      "learning_rate": 3.7774766284778204e-06,
      "loss": 0.8475,
      "step": 797780
    },
    {
      "epoch": 2.7960550526238315,
      "grad_norm": 2.90625,
      "learning_rate": 3.7768275998141188e-06,
      "loss": 0.7079,
      "step": 797790
    },
    {
      "epoch": 2.7960901001307272,
      "grad_norm": 2.78125,
      "learning_rate": 3.7761785711504164e-06,
      "loss": 0.7931,
      "step": 797800
    },
    {
      "epoch": 2.796125147637623,
      "grad_norm": 2.515625,
      "learning_rate": 3.7755295424867144e-06,
      "loss": 0.7844,
      "step": 797810
    },
    {
      "epoch": 2.7961601951445183,
      "grad_norm": 2.84375,
      "learning_rate": 3.7748805138230128e-06,
      "loss": 0.7473,
      "step": 797820
    },
    {
      "epoch": 2.796195242651414,
      "grad_norm": 3.140625,
      "learning_rate": 3.7742314851593108e-06,
      "loss": 0.8551,
      "step": 797830
    },
    {
      "epoch": 2.7962302901583094,
      "grad_norm": 2.953125,
      "learning_rate": 3.773582456495609e-06,
      "loss": 0.7732,
      "step": 797840
    },
    {
      "epoch": 2.796265337665205,
      "grad_norm": 3.125,
      "learning_rate": 3.7729334278319068e-06,
      "loss": 0.8502,
      "step": 797850
    },
    {
      "epoch": 2.796300385172101,
      "grad_norm": 2.703125,
      "learning_rate": 3.7722843991682048e-06,
      "loss": 0.7831,
      "step": 797860
    },
    {
      "epoch": 2.7963354326789966,
      "grad_norm": 2.84375,
      "learning_rate": 3.771635370504503e-06,
      "loss": 0.7987,
      "step": 797870
    },
    {
      "epoch": 2.796370480185892,
      "grad_norm": 3.046875,
      "learning_rate": 3.7709863418408016e-06,
      "loss": 0.8453,
      "step": 797880
    },
    {
      "epoch": 2.7964055276927877,
      "grad_norm": 2.46875,
      "learning_rate": 3.770337313177099e-06,
      "loss": 0.8159,
      "step": 797890
    },
    {
      "epoch": 2.796440575199683,
      "grad_norm": 3.140625,
      "learning_rate": 3.769688284513397e-06,
      "loss": 0.741,
      "step": 797900
    },
    {
      "epoch": 2.796475622706579,
      "grad_norm": 2.890625,
      "learning_rate": 3.7690392558496956e-06,
      "loss": 0.852,
      "step": 797910
    },
    {
      "epoch": 2.7965106702134745,
      "grad_norm": 2.859375,
      "learning_rate": 3.7683902271859936e-06,
      "loss": 0.8174,
      "step": 797920
    },
    {
      "epoch": 2.79654571772037,
      "grad_norm": 3.046875,
      "learning_rate": 3.767741198522292e-06,
      "loss": 0.7222,
      "step": 797930
    },
    {
      "epoch": 2.7965807652272656,
      "grad_norm": 2.578125,
      "learning_rate": 3.7670921698585896e-06,
      "loss": 0.8395,
      "step": 797940
    },
    {
      "epoch": 2.796615812734161,
      "grad_norm": 2.875,
      "learning_rate": 3.7664431411948876e-06,
      "loss": 0.7757,
      "step": 797950
    },
    {
      "epoch": 2.7966508602410567,
      "grad_norm": 2.6875,
      "learning_rate": 3.765794112531186e-06,
      "loss": 0.8132,
      "step": 797960
    },
    {
      "epoch": 2.7966859077479524,
      "grad_norm": 2.6875,
      "learning_rate": 3.7651450838674845e-06,
      "loss": 0.8262,
      "step": 797970
    },
    {
      "epoch": 2.796720955254848,
      "grad_norm": 2.484375,
      "learning_rate": 3.7644960552037825e-06,
      "loss": 0.8176,
      "step": 797980
    },
    {
      "epoch": 2.7967560027617435,
      "grad_norm": 3.078125,
      "learning_rate": 3.76384702654008e-06,
      "loss": 0.8413,
      "step": 797990
    },
    {
      "epoch": 2.7967910502686393,
      "grad_norm": 3.03125,
      "learning_rate": 3.7631979978763785e-06,
      "loss": 0.7927,
      "step": 798000
    },
    {
      "epoch": 2.7968260977755346,
      "grad_norm": 3.21875,
      "learning_rate": 3.7625489692126765e-06,
      "loss": 0.77,
      "step": 798010
    },
    {
      "epoch": 2.7968611452824303,
      "grad_norm": 2.96875,
      "learning_rate": 3.761899940548975e-06,
      "loss": 0.8169,
      "step": 798020
    },
    {
      "epoch": 2.796896192789326,
      "grad_norm": 3.0625,
      "learning_rate": 3.7612509118852725e-06,
      "loss": 0.8776,
      "step": 798030
    },
    {
      "epoch": 2.7969312402962214,
      "grad_norm": 2.96875,
      "learning_rate": 3.7606018832215705e-06,
      "loss": 0.8009,
      "step": 798040
    },
    {
      "epoch": 2.796966287803117,
      "grad_norm": 3.015625,
      "learning_rate": 3.759952854557869e-06,
      "loss": 0.7234,
      "step": 798050
    },
    {
      "epoch": 2.7970013353100125,
      "grad_norm": 2.765625,
      "learning_rate": 3.759303825894167e-06,
      "loss": 0.8324,
      "step": 798060
    },
    {
      "epoch": 2.7970363828169083,
      "grad_norm": 3.125,
      "learning_rate": 3.7586547972304653e-06,
      "loss": 0.7909,
      "step": 798070
    },
    {
      "epoch": 2.797071430323804,
      "grad_norm": 3.203125,
      "learning_rate": 3.758005768566763e-06,
      "loss": 0.8135,
      "step": 798080
    },
    {
      "epoch": 2.7971064778306998,
      "grad_norm": 2.875,
      "learning_rate": 3.7573567399030613e-06,
      "loss": 0.8117,
      "step": 798090
    },
    {
      "epoch": 2.797141525337595,
      "grad_norm": 3.0,
      "learning_rate": 3.7567077112393593e-06,
      "loss": 0.8639,
      "step": 798100
    },
    {
      "epoch": 2.797176572844491,
      "grad_norm": 2.859375,
      "learning_rate": 3.7560586825756577e-06,
      "loss": 0.7877,
      "step": 798110
    },
    {
      "epoch": 2.797211620351386,
      "grad_norm": 3.1875,
      "learning_rate": 3.7554096539119557e-06,
      "loss": 0.7438,
      "step": 798120
    },
    {
      "epoch": 2.797246667858282,
      "grad_norm": 3.421875,
      "learning_rate": 3.7547606252482533e-06,
      "loss": 0.7676,
      "step": 798130
    },
    {
      "epoch": 2.7972817153651777,
      "grad_norm": 2.859375,
      "learning_rate": 3.7541115965845517e-06,
      "loss": 0.7565,
      "step": 798140
    },
    {
      "epoch": 2.797316762872073,
      "grad_norm": 3.078125,
      "learning_rate": 3.7534625679208497e-06,
      "loss": 0.8471,
      "step": 798150
    },
    {
      "epoch": 2.7973518103789687,
      "grad_norm": 3.453125,
      "learning_rate": 3.752813539257148e-06,
      "loss": 0.7269,
      "step": 798160
    },
    {
      "epoch": 2.797386857885864,
      "grad_norm": 2.703125,
      "learning_rate": 3.752164510593446e-06,
      "loss": 0.7598,
      "step": 798170
    },
    {
      "epoch": 2.79742190539276,
      "grad_norm": 3.96875,
      "learning_rate": 3.7515154819297437e-06,
      "loss": 0.6903,
      "step": 798180
    },
    {
      "epoch": 2.7974569528996556,
      "grad_norm": 3.25,
      "learning_rate": 3.750866453266042e-06,
      "loss": 0.7585,
      "step": 798190
    },
    {
      "epoch": 2.7974920004065513,
      "grad_norm": 3.171875,
      "learning_rate": 3.7502174246023406e-06,
      "loss": 0.7529,
      "step": 798200
    },
    {
      "epoch": 2.7975270479134466,
      "grad_norm": 2.984375,
      "learning_rate": 3.7495683959386386e-06,
      "loss": 0.7466,
      "step": 798210
    },
    {
      "epoch": 2.7975620954203424,
      "grad_norm": 3.203125,
      "learning_rate": 3.748919367274936e-06,
      "loss": 0.8168,
      "step": 798220
    },
    {
      "epoch": 2.7975971429272377,
      "grad_norm": 2.640625,
      "learning_rate": 3.7482703386112346e-06,
      "loss": 0.7752,
      "step": 798230
    },
    {
      "epoch": 2.7976321904341335,
      "grad_norm": 3.09375,
      "learning_rate": 3.7476213099475326e-06,
      "loss": 0.8689,
      "step": 798240
    },
    {
      "epoch": 2.7976672379410292,
      "grad_norm": 2.734375,
      "learning_rate": 3.746972281283831e-06,
      "loss": 0.7599,
      "step": 798250
    },
    {
      "epoch": 2.7977022854479245,
      "grad_norm": 2.84375,
      "learning_rate": 3.746323252620129e-06,
      "loss": 0.7997,
      "step": 798260
    },
    {
      "epoch": 2.7977373329548203,
      "grad_norm": 2.453125,
      "learning_rate": 3.7456742239564266e-06,
      "loss": 0.788,
      "step": 798270
    },
    {
      "epoch": 2.7977723804617156,
      "grad_norm": 2.875,
      "learning_rate": 3.745025195292725e-06,
      "loss": 0.7885,
      "step": 798280
    },
    {
      "epoch": 2.7978074279686114,
      "grad_norm": 3.171875,
      "learning_rate": 3.744376166629023e-06,
      "loss": 0.7645,
      "step": 798290
    },
    {
      "epoch": 2.797842475475507,
      "grad_norm": 2.453125,
      "learning_rate": 3.7437271379653214e-06,
      "loss": 0.7188,
      "step": 798300
    },
    {
      "epoch": 2.797877522982403,
      "grad_norm": 2.703125,
      "learning_rate": 3.74307810930162e-06,
      "loss": 0.7998,
      "step": 798310
    },
    {
      "epoch": 2.797912570489298,
      "grad_norm": 2.890625,
      "learning_rate": 3.7424290806379174e-06,
      "loss": 0.7695,
      "step": 798320
    },
    {
      "epoch": 2.797947617996194,
      "grad_norm": 2.515625,
      "learning_rate": 3.7417800519742154e-06,
      "loss": 0.7365,
      "step": 798330
    },
    {
      "epoch": 2.7979826655030893,
      "grad_norm": 2.84375,
      "learning_rate": 3.741131023310514e-06,
      "loss": 0.8172,
      "step": 798340
    },
    {
      "epoch": 2.798017713009985,
      "grad_norm": 2.78125,
      "learning_rate": 3.740481994646812e-06,
      "loss": 0.7875,
      "step": 798350
    },
    {
      "epoch": 2.798052760516881,
      "grad_norm": 3.046875,
      "learning_rate": 3.7398329659831094e-06,
      "loss": 0.7833,
      "step": 798360
    },
    {
      "epoch": 2.798087808023776,
      "grad_norm": 3.0625,
      "learning_rate": 3.739183937319408e-06,
      "loss": 0.8031,
      "step": 798370
    },
    {
      "epoch": 2.798122855530672,
      "grad_norm": 2.53125,
      "learning_rate": 3.738534908655706e-06,
      "loss": 0.7082,
      "step": 798380
    },
    {
      "epoch": 2.798157903037567,
      "grad_norm": 2.921875,
      "learning_rate": 3.7378858799920043e-06,
      "loss": 0.7634,
      "step": 798390
    },
    {
      "epoch": 2.798192950544463,
      "grad_norm": 3.078125,
      "learning_rate": 3.7372368513283027e-06,
      "loss": 0.8629,
      "step": 798400
    },
    {
      "epoch": 2.7982279980513587,
      "grad_norm": 2.890625,
      "learning_rate": 3.7365878226646e-06,
      "loss": 0.7821,
      "step": 798410
    },
    {
      "epoch": 2.7982630455582544,
      "grad_norm": 2.4375,
      "learning_rate": 3.7359387940008983e-06,
      "loss": 0.7487,
      "step": 798420
    },
    {
      "epoch": 2.7982980930651498,
      "grad_norm": 3.171875,
      "learning_rate": 3.7352897653371967e-06,
      "loss": 0.8428,
      "step": 798430
    },
    {
      "epoch": 2.7983331405720455,
      "grad_norm": 2.921875,
      "learning_rate": 3.7346407366734947e-06,
      "loss": 0.7591,
      "step": 798440
    },
    {
      "epoch": 2.798368188078941,
      "grad_norm": 2.953125,
      "learning_rate": 3.733991708009793e-06,
      "loss": 0.8317,
      "step": 798450
    },
    {
      "epoch": 2.7984032355858366,
      "grad_norm": 2.890625,
      "learning_rate": 3.7333426793460907e-06,
      "loss": 0.8115,
      "step": 798460
    },
    {
      "epoch": 2.7984382830927323,
      "grad_norm": 2.890625,
      "learning_rate": 3.7326936506823887e-06,
      "loss": 0.7851,
      "step": 798470
    },
    {
      "epoch": 2.7984733305996277,
      "grad_norm": 3.25,
      "learning_rate": 3.732044622018687e-06,
      "loss": 0.793,
      "step": 798480
    },
    {
      "epoch": 2.7985083781065234,
      "grad_norm": 2.953125,
      "learning_rate": 3.731395593354985e-06,
      "loss": 0.8489,
      "step": 798490
    },
    {
      "epoch": 2.7985434256134187,
      "grad_norm": 3.078125,
      "learning_rate": 3.7307465646912827e-06,
      "loss": 0.8643,
      "step": 798500
    },
    {
      "epoch": 2.7985784731203145,
      "grad_norm": 3.1875,
      "learning_rate": 3.730097536027581e-06,
      "loss": 0.7514,
      "step": 798510
    },
    {
      "epoch": 2.7986135206272102,
      "grad_norm": 2.953125,
      "learning_rate": 3.7294485073638795e-06,
      "loss": 0.7889,
      "step": 798520
    },
    {
      "epoch": 2.798648568134106,
      "grad_norm": 2.671875,
      "learning_rate": 3.7287994787001775e-06,
      "loss": 0.7239,
      "step": 798530
    },
    {
      "epoch": 2.7986836156410013,
      "grad_norm": 2.90625,
      "learning_rate": 3.728150450036476e-06,
      "loss": 0.8271,
      "step": 798540
    },
    {
      "epoch": 2.798718663147897,
      "grad_norm": 2.8125,
      "learning_rate": 3.7275014213727735e-06,
      "loss": 0.8186,
      "step": 798550
    },
    {
      "epoch": 2.7987537106547924,
      "grad_norm": 3.515625,
      "learning_rate": 3.7268523927090715e-06,
      "loss": 0.815,
      "step": 798560
    },
    {
      "epoch": 2.798788758161688,
      "grad_norm": 2.328125,
      "learning_rate": 3.72620336404537e-06,
      "loss": 0.8057,
      "step": 798570
    },
    {
      "epoch": 2.798823805668584,
      "grad_norm": 3.109375,
      "learning_rate": 3.725554335381668e-06,
      "loss": 0.8318,
      "step": 798580
    },
    {
      "epoch": 2.7988588531754792,
      "grad_norm": 2.953125,
      "learning_rate": 3.7249053067179664e-06,
      "loss": 0.8278,
      "step": 798590
    },
    {
      "epoch": 2.798893900682375,
      "grad_norm": 3.03125,
      "learning_rate": 3.724256278054264e-06,
      "loss": 0.8408,
      "step": 798600
    },
    {
      "epoch": 2.7989289481892703,
      "grad_norm": 3.53125,
      "learning_rate": 3.723607249390562e-06,
      "loss": 0.8431,
      "step": 798610
    },
    {
      "epoch": 2.798963995696166,
      "grad_norm": 2.671875,
      "learning_rate": 3.7229582207268604e-06,
      "loss": 0.7264,
      "step": 798620
    },
    {
      "epoch": 2.798999043203062,
      "grad_norm": 2.703125,
      "learning_rate": 3.722309192063159e-06,
      "loss": 0.7348,
      "step": 798630
    },
    {
      "epoch": 2.7990340907099576,
      "grad_norm": 3.15625,
      "learning_rate": 3.721660163399457e-06,
      "loss": 0.7829,
      "step": 798640
    },
    {
      "epoch": 2.799069138216853,
      "grad_norm": 3.625,
      "learning_rate": 3.7210111347357544e-06,
      "loss": 0.8025,
      "step": 798650
    },
    {
      "epoch": 2.7991041857237486,
      "grad_norm": 3.171875,
      "learning_rate": 3.720362106072053e-06,
      "loss": 0.8973,
      "step": 798660
    },
    {
      "epoch": 2.799139233230644,
      "grad_norm": 3.28125,
      "learning_rate": 3.719713077408351e-06,
      "loss": 0.7429,
      "step": 798670
    },
    {
      "epoch": 2.7991742807375397,
      "grad_norm": 2.96875,
      "learning_rate": 3.7190640487446492e-06,
      "loss": 0.8468,
      "step": 798680
    },
    {
      "epoch": 2.7992093282444355,
      "grad_norm": 3.109375,
      "learning_rate": 3.718415020080947e-06,
      "loss": 0.7604,
      "step": 798690
    },
    {
      "epoch": 2.799244375751331,
      "grad_norm": 3.359375,
      "learning_rate": 3.717765991417245e-06,
      "loss": 0.8754,
      "step": 798700
    },
    {
      "epoch": 2.7992794232582265,
      "grad_norm": 2.53125,
      "learning_rate": 3.7171169627535432e-06,
      "loss": 0.8414,
      "step": 798710
    },
    {
      "epoch": 2.799314470765122,
      "grad_norm": 2.859375,
      "learning_rate": 3.7164679340898417e-06,
      "loss": 0.8233,
      "step": 798720
    },
    {
      "epoch": 2.7993495182720176,
      "grad_norm": 3.140625,
      "learning_rate": 3.7158189054261397e-06,
      "loss": 0.8112,
      "step": 798730
    },
    {
      "epoch": 2.7993845657789134,
      "grad_norm": 2.84375,
      "learning_rate": 3.7151698767624372e-06,
      "loss": 0.7685,
      "step": 798740
    },
    {
      "epoch": 2.799419613285809,
      "grad_norm": 2.90625,
      "learning_rate": 3.7145208480987357e-06,
      "loss": 0.7623,
      "step": 798750
    },
    {
      "epoch": 2.7994546607927044,
      "grad_norm": 3.953125,
      "learning_rate": 3.7138718194350337e-06,
      "loss": 0.9841,
      "step": 798760
    },
    {
      "epoch": 2.7994897082996,
      "grad_norm": 3.390625,
      "learning_rate": 3.713222790771332e-06,
      "loss": 0.7641,
      "step": 798770
    },
    {
      "epoch": 2.7995247558064955,
      "grad_norm": 2.671875,
      "learning_rate": 3.71257376210763e-06,
      "loss": 0.7885,
      "step": 798780
    },
    {
      "epoch": 2.7995598033133913,
      "grad_norm": 3.171875,
      "learning_rate": 3.7119247334439277e-06,
      "loss": 0.7879,
      "step": 798790
    },
    {
      "epoch": 2.799594850820287,
      "grad_norm": 3.140625,
      "learning_rate": 3.711275704780226e-06,
      "loss": 0.8373,
      "step": 798800
    },
    {
      "epoch": 2.7996298983271823,
      "grad_norm": 2.828125,
      "learning_rate": 3.710626676116524e-06,
      "loss": 0.783,
      "step": 798810
    },
    {
      "epoch": 2.799664945834078,
      "grad_norm": 3.25,
      "learning_rate": 3.7099776474528225e-06,
      "loss": 0.8209,
      "step": 798820
    },
    {
      "epoch": 2.7996999933409734,
      "grad_norm": 2.5625,
      "learning_rate": 3.70932861878912e-06,
      "loss": 0.7978,
      "step": 798830
    },
    {
      "epoch": 2.799735040847869,
      "grad_norm": 3.140625,
      "learning_rate": 3.708679590125418e-06,
      "loss": 0.7213,
      "step": 798840
    },
    {
      "epoch": 2.799770088354765,
      "grad_norm": 3.140625,
      "learning_rate": 3.7080305614617165e-06,
      "loss": 0.7769,
      "step": 798850
    },
    {
      "epoch": 2.7998051358616607,
      "grad_norm": 3.015625,
      "learning_rate": 3.707381532798015e-06,
      "loss": 0.8408,
      "step": 798860
    },
    {
      "epoch": 2.799840183368556,
      "grad_norm": 3.28125,
      "learning_rate": 3.706732504134313e-06,
      "loss": 0.7712,
      "step": 798870
    },
    {
      "epoch": 2.7998752308754518,
      "grad_norm": 2.984375,
      "learning_rate": 3.7060834754706105e-06,
      "loss": 0.8397,
      "step": 798880
    },
    {
      "epoch": 2.799910278382347,
      "grad_norm": 3.109375,
      "learning_rate": 3.705434446806909e-06,
      "loss": 0.8365,
      "step": 798890
    },
    {
      "epoch": 2.799945325889243,
      "grad_norm": 2.6875,
      "learning_rate": 3.704785418143207e-06,
      "loss": 0.7689,
      "step": 798900
    },
    {
      "epoch": 2.7999803733961386,
      "grad_norm": 2.65625,
      "learning_rate": 3.7041363894795054e-06,
      "loss": 0.7937,
      "step": 798910
    },
    {
      "epoch": 2.800015420903034,
      "grad_norm": 2.671875,
      "learning_rate": 3.7034873608158034e-06,
      "loss": 0.7807,
      "step": 798920
    },
    {
      "epoch": 2.8000504684099297,
      "grad_norm": 3.015625,
      "learning_rate": 3.702838332152101e-06,
      "loss": 0.8448,
      "step": 798930
    },
    {
      "epoch": 2.8000855159168254,
      "grad_norm": 2.875,
      "learning_rate": 3.7021893034883994e-06,
      "loss": 0.7708,
      "step": 798940
    },
    {
      "epoch": 2.8001205634237207,
      "grad_norm": 2.796875,
      "learning_rate": 3.7015402748246978e-06,
      "loss": 0.8429,
      "step": 798950
    },
    {
      "epoch": 2.8001556109306165,
      "grad_norm": 3.203125,
      "learning_rate": 3.7008912461609958e-06,
      "loss": 0.7831,
      "step": 798960
    },
    {
      "epoch": 2.8001906584375122,
      "grad_norm": 2.890625,
      "learning_rate": 3.7002422174972934e-06,
      "loss": 0.8363,
      "step": 798970
    },
    {
      "epoch": 2.8002257059444076,
      "grad_norm": 3.25,
      "learning_rate": 3.6995931888335918e-06,
      "loss": 0.7699,
      "step": 798980
    },
    {
      "epoch": 2.8002607534513033,
      "grad_norm": 3.1875,
      "learning_rate": 3.6989441601698898e-06,
      "loss": 0.8218,
      "step": 798990
    },
    {
      "epoch": 2.8002958009581986,
      "grad_norm": 3.109375,
      "learning_rate": 3.698295131506188e-06,
      "loss": 0.7365,
      "step": 799000
    },
    {
      "epoch": 2.8003308484650944,
      "grad_norm": 3.140625,
      "learning_rate": 3.697646102842486e-06,
      "loss": 0.8383,
      "step": 799010
    },
    {
      "epoch": 2.80036589597199,
      "grad_norm": 3.03125,
      "learning_rate": 3.6969970741787838e-06,
      "loss": 0.7843,
      "step": 799020
    },
    {
      "epoch": 2.800400943478886,
      "grad_norm": 3.15625,
      "learning_rate": 3.696348045515082e-06,
      "loss": 0.782,
      "step": 799030
    },
    {
      "epoch": 2.800435990985781,
      "grad_norm": 2.890625,
      "learning_rate": 3.69569901685138e-06,
      "loss": 0.89,
      "step": 799040
    },
    {
      "epoch": 2.800471038492677,
      "grad_norm": 2.859375,
      "learning_rate": 3.6950499881876786e-06,
      "loss": 0.865,
      "step": 799050
    },
    {
      "epoch": 2.8005060859995723,
      "grad_norm": 2.734375,
      "learning_rate": 3.694400959523977e-06,
      "loss": 0.7972,
      "step": 799060
    },
    {
      "epoch": 2.800541133506468,
      "grad_norm": 2.53125,
      "learning_rate": 3.6937519308602746e-06,
      "loss": 0.7429,
      "step": 799070
    },
    {
      "epoch": 2.800576181013364,
      "grad_norm": 2.8125,
      "learning_rate": 3.6931029021965726e-06,
      "loss": 0.7713,
      "step": 799080
    },
    {
      "epoch": 2.800611228520259,
      "grad_norm": 3.28125,
      "learning_rate": 3.692453873532871e-06,
      "loss": 0.7901,
      "step": 799090
    },
    {
      "epoch": 2.800646276027155,
      "grad_norm": 2.953125,
      "learning_rate": 3.691804844869169e-06,
      "loss": 0.7389,
      "step": 799100
    },
    {
      "epoch": 2.80068132353405,
      "grad_norm": 2.796875,
      "learning_rate": 3.6911558162054675e-06,
      "loss": 0.7592,
      "step": 799110
    },
    {
      "epoch": 2.800716371040946,
      "grad_norm": 2.875,
      "learning_rate": 3.690506787541765e-06,
      "loss": 0.7612,
      "step": 799120
    },
    {
      "epoch": 2.8007514185478417,
      "grad_norm": 2.65625,
      "learning_rate": 3.689857758878063e-06,
      "loss": 0.8148,
      "step": 799130
    },
    {
      "epoch": 2.8007864660547375,
      "grad_norm": 2.53125,
      "learning_rate": 3.6892087302143615e-06,
      "loss": 0.7899,
      "step": 799140
    },
    {
      "epoch": 2.800821513561633,
      "grad_norm": 2.90625,
      "learning_rate": 3.68855970155066e-06,
      "loss": 0.7788,
      "step": 799150
    },
    {
      "epoch": 2.8008565610685285,
      "grad_norm": 2.6875,
      "learning_rate": 3.687910672886957e-06,
      "loss": 0.8316,
      "step": 799160
    },
    {
      "epoch": 2.800891608575424,
      "grad_norm": 2.71875,
      "learning_rate": 3.6872616442232555e-06,
      "loss": 0.8274,
      "step": 799170
    },
    {
      "epoch": 2.8009266560823196,
      "grad_norm": 3.203125,
      "learning_rate": 3.686612615559554e-06,
      "loss": 0.8195,
      "step": 799180
    },
    {
      "epoch": 2.8009617035892154,
      "grad_norm": 3.53125,
      "learning_rate": 3.685963586895852e-06,
      "loss": 0.8215,
      "step": 799190
    },
    {
      "epoch": 2.8009967510961107,
      "grad_norm": 2.734375,
      "learning_rate": 3.6853145582321503e-06,
      "loss": 0.8045,
      "step": 799200
    },
    {
      "epoch": 2.8010317986030064,
      "grad_norm": 3.0,
      "learning_rate": 3.684665529568448e-06,
      "loss": 0.7341,
      "step": 799210
    },
    {
      "epoch": 2.8010668461099018,
      "grad_norm": 3.375,
      "learning_rate": 3.684016500904746e-06,
      "loss": 0.8989,
      "step": 799220
    },
    {
      "epoch": 2.8011018936167975,
      "grad_norm": 3.140625,
      "learning_rate": 3.6833674722410443e-06,
      "loss": 0.8251,
      "step": 799230
    },
    {
      "epoch": 2.8011369411236933,
      "grad_norm": 2.921875,
      "learning_rate": 3.6827184435773423e-06,
      "loss": 0.8493,
      "step": 799240
    },
    {
      "epoch": 2.801171988630589,
      "grad_norm": 2.859375,
      "learning_rate": 3.6820694149136407e-06,
      "loss": 0.8141,
      "step": 799250
    },
    {
      "epoch": 2.8012070361374843,
      "grad_norm": 2.484375,
      "learning_rate": 3.6814203862499383e-06,
      "loss": 0.757,
      "step": 799260
    },
    {
      "epoch": 2.80124208364438,
      "grad_norm": 3.03125,
      "learning_rate": 3.6807713575862367e-06,
      "loss": 0.8446,
      "step": 799270
    },
    {
      "epoch": 2.8012771311512754,
      "grad_norm": 2.546875,
      "learning_rate": 3.6801223289225347e-06,
      "loss": 0.7987,
      "step": 799280
    },
    {
      "epoch": 2.801312178658171,
      "grad_norm": 2.859375,
      "learning_rate": 3.679473300258833e-06,
      "loss": 0.7171,
      "step": 799290
    },
    {
      "epoch": 2.801347226165067,
      "grad_norm": 2.6875,
      "learning_rate": 3.6788242715951307e-06,
      "loss": 0.8824,
      "step": 799300
    },
    {
      "epoch": 2.8013822736719622,
      "grad_norm": 3.03125,
      "learning_rate": 3.6781752429314287e-06,
      "loss": 0.7894,
      "step": 799310
    },
    {
      "epoch": 2.801417321178858,
      "grad_norm": 2.78125,
      "learning_rate": 3.677526214267727e-06,
      "loss": 0.8352,
      "step": 799320
    },
    {
      "epoch": 2.8014523686857533,
      "grad_norm": 3.140625,
      "learning_rate": 3.676877185604025e-06,
      "loss": 0.7788,
      "step": 799330
    },
    {
      "epoch": 2.801487416192649,
      "grad_norm": 2.8125,
      "learning_rate": 3.6762281569403236e-06,
      "loss": 0.8085,
      "step": 799340
    },
    {
      "epoch": 2.801522463699545,
      "grad_norm": 2.453125,
      "learning_rate": 3.675579128276621e-06,
      "loss": 0.7699,
      "step": 799350
    },
    {
      "epoch": 2.8015575112064406,
      "grad_norm": 3.109375,
      "learning_rate": 3.674930099612919e-06,
      "loss": 0.7919,
      "step": 799360
    },
    {
      "epoch": 2.801592558713336,
      "grad_norm": 2.90625,
      "learning_rate": 3.6742810709492176e-06,
      "loss": 0.7812,
      "step": 799370
    },
    {
      "epoch": 2.8016276062202317,
      "grad_norm": 2.90625,
      "learning_rate": 3.673632042285516e-06,
      "loss": 0.756,
      "step": 799380
    },
    {
      "epoch": 2.801662653727127,
      "grad_norm": 3.109375,
      "learning_rate": 3.672983013621814e-06,
      "loss": 0.8007,
      "step": 799390
    },
    {
      "epoch": 2.8016977012340227,
      "grad_norm": 3.46875,
      "learning_rate": 3.6723339849581116e-06,
      "loss": 0.7763,
      "step": 799400
    },
    {
      "epoch": 2.8017327487409185,
      "grad_norm": 3.71875,
      "learning_rate": 3.67168495629441e-06,
      "loss": 0.8092,
      "step": 799410
    },
    {
      "epoch": 2.801767796247814,
      "grad_norm": 3.171875,
      "learning_rate": 3.671035927630708e-06,
      "loss": 0.7972,
      "step": 799420
    },
    {
      "epoch": 2.8018028437547096,
      "grad_norm": 2.78125,
      "learning_rate": 3.6703868989670064e-06,
      "loss": 0.827,
      "step": 799430
    },
    {
      "epoch": 2.801837891261605,
      "grad_norm": 3.09375,
      "learning_rate": 3.669737870303304e-06,
      "loss": 0.8541,
      "step": 799440
    },
    {
      "epoch": 2.8018729387685006,
      "grad_norm": 3.09375,
      "learning_rate": 3.669088841639602e-06,
      "loss": 0.8216,
      "step": 799450
    },
    {
      "epoch": 2.8019079862753964,
      "grad_norm": 3.171875,
      "learning_rate": 3.6684398129759004e-06,
      "loss": 0.8175,
      "step": 799460
    },
    {
      "epoch": 2.801943033782292,
      "grad_norm": 3.34375,
      "learning_rate": 3.6677907843121984e-06,
      "loss": 0.7504,
      "step": 799470
    },
    {
      "epoch": 2.8019780812891875,
      "grad_norm": 2.859375,
      "learning_rate": 3.667141755648497e-06,
      "loss": 0.8438,
      "step": 799480
    },
    {
      "epoch": 2.802013128796083,
      "grad_norm": 3.21875,
      "learning_rate": 3.6664927269847944e-06,
      "loss": 0.7738,
      "step": 799490
    },
    {
      "epoch": 2.8020481763029785,
      "grad_norm": 2.78125,
      "learning_rate": 3.665843698321093e-06,
      "loss": 0.7928,
      "step": 799500
    },
    {
      "epoch": 2.8020832238098743,
      "grad_norm": 3.046875,
      "learning_rate": 3.665194669657391e-06,
      "loss": 0.7932,
      "step": 799510
    },
    {
      "epoch": 2.80211827131677,
      "grad_norm": 3.28125,
      "learning_rate": 3.6645456409936893e-06,
      "loss": 0.7354,
      "step": 799520
    },
    {
      "epoch": 2.8021533188236654,
      "grad_norm": 3.15625,
      "learning_rate": 3.6638966123299873e-06,
      "loss": 0.7908,
      "step": 799530
    },
    {
      "epoch": 2.802188366330561,
      "grad_norm": 2.9375,
      "learning_rate": 3.663247583666285e-06,
      "loss": 0.8142,
      "step": 799540
    },
    {
      "epoch": 2.8022234138374564,
      "grad_norm": 3.390625,
      "learning_rate": 3.6625985550025833e-06,
      "loss": 0.8163,
      "step": 799550
    },
    {
      "epoch": 2.802258461344352,
      "grad_norm": 3.203125,
      "learning_rate": 3.6619495263388813e-06,
      "loss": 0.7965,
      "step": 799560
    },
    {
      "epoch": 2.802293508851248,
      "grad_norm": 3.0,
      "learning_rate": 3.6613004976751797e-06,
      "loss": 0.7622,
      "step": 799570
    },
    {
      "epoch": 2.8023285563581437,
      "grad_norm": 2.890625,
      "learning_rate": 3.660651469011478e-06,
      "loss": 0.7813,
      "step": 799580
    },
    {
      "epoch": 2.802363603865039,
      "grad_norm": 3.0625,
      "learning_rate": 3.6600024403477753e-06,
      "loss": 0.7372,
      "step": 799590
    },
    {
      "epoch": 2.802398651371935,
      "grad_norm": 2.71875,
      "learning_rate": 3.6593534116840737e-06,
      "loss": 0.8068,
      "step": 799600
    },
    {
      "epoch": 2.80243369887883,
      "grad_norm": 2.734375,
      "learning_rate": 3.658704383020372e-06,
      "loss": 0.7635,
      "step": 799610
    },
    {
      "epoch": 2.802468746385726,
      "grad_norm": 2.796875,
      "learning_rate": 3.65805535435667e-06,
      "loss": 0.7638,
      "step": 799620
    },
    {
      "epoch": 2.8025037938926216,
      "grad_norm": 2.78125,
      "learning_rate": 3.6574063256929677e-06,
      "loss": 0.8004,
      "step": 799630
    },
    {
      "epoch": 2.802538841399517,
      "grad_norm": 2.78125,
      "learning_rate": 3.656757297029266e-06,
      "loss": 0.767,
      "step": 799640
    },
    {
      "epoch": 2.8025738889064127,
      "grad_norm": 2.546875,
      "learning_rate": 3.656108268365564e-06,
      "loss": 0.8144,
      "step": 799650
    },
    {
      "epoch": 2.802608936413308,
      "grad_norm": 3.0,
      "learning_rate": 3.6554592397018625e-06,
      "loss": 0.7353,
      "step": 799660
    },
    {
      "epoch": 2.8026439839202038,
      "grad_norm": 3.5625,
      "learning_rate": 3.6548102110381605e-06,
      "loss": 0.7937,
      "step": 799670
    },
    {
      "epoch": 2.8026790314270995,
      "grad_norm": 2.59375,
      "learning_rate": 3.654161182374458e-06,
      "loss": 0.7951,
      "step": 799680
    },
    {
      "epoch": 2.8027140789339953,
      "grad_norm": 2.734375,
      "learning_rate": 3.6535121537107565e-06,
      "loss": 0.7319,
      "step": 799690
    },
    {
      "epoch": 2.8027491264408906,
      "grad_norm": 2.921875,
      "learning_rate": 3.652863125047055e-06,
      "loss": 0.7297,
      "step": 799700
    },
    {
      "epoch": 2.8027841739477863,
      "grad_norm": 3.046875,
      "learning_rate": 3.652214096383353e-06,
      "loss": 0.8302,
      "step": 799710
    },
    {
      "epoch": 2.8028192214546817,
      "grad_norm": 2.75,
      "learning_rate": 3.6515650677196514e-06,
      "loss": 0.8002,
      "step": 799720
    },
    {
      "epoch": 2.8028542689615774,
      "grad_norm": 2.546875,
      "learning_rate": 3.650916039055949e-06,
      "loss": 0.8279,
      "step": 799730
    },
    {
      "epoch": 2.802889316468473,
      "grad_norm": 2.625,
      "learning_rate": 3.650267010392247e-06,
      "loss": 0.7639,
      "step": 799740
    },
    {
      "epoch": 2.8029243639753685,
      "grad_norm": 2.796875,
      "learning_rate": 3.6496179817285454e-06,
      "loss": 0.821,
      "step": 799750
    },
    {
      "epoch": 2.8029594114822642,
      "grad_norm": 3.0625,
      "learning_rate": 3.6489689530648434e-06,
      "loss": 0.8111,
      "step": 799760
    },
    {
      "epoch": 2.8029944589891596,
      "grad_norm": 3.28125,
      "learning_rate": 3.648319924401141e-06,
      "loss": 0.7802,
      "step": 799770
    },
    {
      "epoch": 2.8030295064960553,
      "grad_norm": 2.890625,
      "learning_rate": 3.6476708957374394e-06,
      "loss": 0.7575,
      "step": 799780
    },
    {
      "epoch": 2.803064554002951,
      "grad_norm": 2.78125,
      "learning_rate": 3.6470218670737374e-06,
      "loss": 0.7759,
      "step": 799790
    },
    {
      "epoch": 2.803099601509847,
      "grad_norm": 2.890625,
      "learning_rate": 3.646372838410036e-06,
      "loss": 0.7824,
      "step": 799800
    },
    {
      "epoch": 2.803134649016742,
      "grad_norm": 2.8125,
      "learning_rate": 3.6457238097463342e-06,
      "loss": 0.7373,
      "step": 799810
    },
    {
      "epoch": 2.803169696523638,
      "grad_norm": 2.4375,
      "learning_rate": 3.645074781082632e-06,
      "loss": 0.6725,
      "step": 799820
    },
    {
      "epoch": 2.803204744030533,
      "grad_norm": 3.4375,
      "learning_rate": 3.64442575241893e-06,
      "loss": 0.8209,
      "step": 799830
    },
    {
      "epoch": 2.803239791537429,
      "grad_norm": 3.046875,
      "learning_rate": 3.6437767237552282e-06,
      "loss": 0.8046,
      "step": 799840
    },
    {
      "epoch": 2.8032748390443247,
      "grad_norm": 2.609375,
      "learning_rate": 3.6431276950915262e-06,
      "loss": 0.8195,
      "step": 799850
    },
    {
      "epoch": 2.80330988655122,
      "grad_norm": 3.390625,
      "learning_rate": 3.6424786664278247e-06,
      "loss": 0.7947,
      "step": 799860
    },
    {
      "epoch": 2.803344934058116,
      "grad_norm": 3.46875,
      "learning_rate": 3.6418296377641222e-06,
      "loss": 0.7621,
      "step": 799870
    },
    {
      "epoch": 2.803379981565011,
      "grad_norm": 2.875,
      "learning_rate": 3.6411806091004202e-06,
      "loss": 0.7349,
      "step": 799880
    },
    {
      "epoch": 2.803415029071907,
      "grad_norm": 2.671875,
      "learning_rate": 3.6405315804367187e-06,
      "loss": 0.7611,
      "step": 799890
    },
    {
      "epoch": 2.8034500765788026,
      "grad_norm": 3.125,
      "learning_rate": 3.6398825517730167e-06,
      "loss": 0.7539,
      "step": 799900
    },
    {
      "epoch": 2.8034851240856984,
      "grad_norm": 2.859375,
      "learning_rate": 3.639233523109315e-06,
      "loss": 0.752,
      "step": 799910
    },
    {
      "epoch": 2.8035201715925937,
      "grad_norm": 2.78125,
      "learning_rate": 3.6385844944456127e-06,
      "loss": 0.8394,
      "step": 799920
    },
    {
      "epoch": 2.8035552190994895,
      "grad_norm": 3.109375,
      "learning_rate": 3.637935465781911e-06,
      "loss": 0.896,
      "step": 799930
    },
    {
      "epoch": 2.8035902666063848,
      "grad_norm": 2.890625,
      "learning_rate": 3.637286437118209e-06,
      "loss": 0.723,
      "step": 799940
    },
    {
      "epoch": 2.8036253141132805,
      "grad_norm": 3.171875,
      "learning_rate": 3.6366374084545075e-06,
      "loss": 0.802,
      "step": 799950
    },
    {
      "epoch": 2.8036603616201763,
      "grad_norm": 2.8125,
      "learning_rate": 3.635988379790805e-06,
      "loss": 0.766,
      "step": 799960
    },
    {
      "epoch": 2.8036954091270716,
      "grad_norm": 2.84375,
      "learning_rate": 3.635339351127103e-06,
      "loss": 0.8083,
      "step": 799970
    },
    {
      "epoch": 2.8037304566339674,
      "grad_norm": 2.890625,
      "learning_rate": 3.6346903224634015e-06,
      "loss": 0.7866,
      "step": 799980
    },
    {
      "epoch": 2.8037655041408627,
      "grad_norm": 3.109375,
      "learning_rate": 3.6340412937996995e-06,
      "loss": 0.9152,
      "step": 799990
    },
    {
      "epoch": 2.8038005516477584,
      "grad_norm": 2.65625,
      "learning_rate": 3.633392265135998e-06,
      "loss": 0.7543,
      "step": 800000
    },
    {
      "epoch": 2.8038005516477584,
      "eval_loss": 0.750343918800354,
      "eval_runtime": 554.1374,
      "eval_samples_per_second": 686.537,
      "eval_steps_per_second": 57.211,
      "step": 800000
    },
    {
      "epoch": 2.803835599154654,
      "grad_norm": 2.890625,
      "learning_rate": 3.6327432364722955e-06,
      "loss": 0.7675,
      "step": 800010
    },
    {
      "epoch": 2.80387064666155,
      "grad_norm": 3.015625,
      "learning_rate": 3.6320942078085935e-06,
      "loss": 0.779,
      "step": 800020
    },
    {
      "epoch": 2.8039056941684453,
      "grad_norm": 2.25,
      "learning_rate": 3.631445179144892e-06,
      "loss": 0.7254,
      "step": 800030
    },
    {
      "epoch": 2.803940741675341,
      "grad_norm": 2.625,
      "learning_rate": 3.6307961504811904e-06,
      "loss": 0.7447,
      "step": 800040
    },
    {
      "epoch": 2.8039757891822363,
      "grad_norm": 2.6875,
      "learning_rate": 3.6301471218174884e-06,
      "loss": 0.8484,
      "step": 800050
    },
    {
      "epoch": 2.804010836689132,
      "grad_norm": 2.84375,
      "learning_rate": 3.629498093153786e-06,
      "loss": 0.8194,
      "step": 800060
    },
    {
      "epoch": 2.804045884196028,
      "grad_norm": 2.953125,
      "learning_rate": 3.6288490644900844e-06,
      "loss": 0.8527,
      "step": 800070
    },
    {
      "epoch": 2.804080931702923,
      "grad_norm": 2.84375,
      "learning_rate": 3.6282000358263824e-06,
      "loss": 0.7849,
      "step": 800080
    },
    {
      "epoch": 2.804115979209819,
      "grad_norm": 2.75,
      "learning_rate": 3.6275510071626808e-06,
      "loss": 0.7918,
      "step": 800090
    },
    {
      "epoch": 2.8041510267167142,
      "grad_norm": 2.703125,
      "learning_rate": 3.6269019784989784e-06,
      "loss": 0.8349,
      "step": 800100
    },
    {
      "epoch": 2.80418607422361,
      "grad_norm": 2.953125,
      "learning_rate": 3.6262529498352764e-06,
      "loss": 0.7874,
      "step": 800110
    },
    {
      "epoch": 2.8042211217305058,
      "grad_norm": 2.671875,
      "learning_rate": 3.6256039211715748e-06,
      "loss": 0.7731,
      "step": 800120
    },
    {
      "epoch": 2.8042561692374015,
      "grad_norm": 3.140625,
      "learning_rate": 3.624954892507873e-06,
      "loss": 0.7826,
      "step": 800130
    },
    {
      "epoch": 2.804291216744297,
      "grad_norm": 3.21875,
      "learning_rate": 3.624305863844171e-06,
      "loss": 0.8851,
      "step": 800140
    },
    {
      "epoch": 2.8043262642511926,
      "grad_norm": 3.03125,
      "learning_rate": 3.6236568351804688e-06,
      "loss": 0.9044,
      "step": 800150
    },
    {
      "epoch": 2.804361311758088,
      "grad_norm": 2.890625,
      "learning_rate": 3.623007806516767e-06,
      "loss": 0.7553,
      "step": 800160
    },
    {
      "epoch": 2.8043963592649837,
      "grad_norm": 3.1875,
      "learning_rate": 3.622358777853065e-06,
      "loss": 0.821,
      "step": 800170
    },
    {
      "epoch": 2.8044314067718794,
      "grad_norm": 2.703125,
      "learning_rate": 3.6217097491893636e-06,
      "loss": 0.7365,
      "step": 800180
    },
    {
      "epoch": 2.8044664542787747,
      "grad_norm": 3.171875,
      "learning_rate": 3.6210607205256616e-06,
      "loss": 0.8211,
      "step": 800190
    },
    {
      "epoch": 2.8045015017856705,
      "grad_norm": 2.90625,
      "learning_rate": 3.620411691861959e-06,
      "loss": 0.7844,
      "step": 800200
    },
    {
      "epoch": 2.8045365492925662,
      "grad_norm": 2.625,
      "learning_rate": 3.6197626631982576e-06,
      "loss": 0.7922,
      "step": 800210
    },
    {
      "epoch": 2.8045715967994616,
      "grad_norm": 3.53125,
      "learning_rate": 3.6191136345345556e-06,
      "loss": 0.814,
      "step": 800220
    },
    {
      "epoch": 2.8046066443063573,
      "grad_norm": 3.3125,
      "learning_rate": 3.618464605870854e-06,
      "loss": 0.8284,
      "step": 800230
    },
    {
      "epoch": 2.804641691813253,
      "grad_norm": 2.75,
      "learning_rate": 3.6178155772071516e-06,
      "loss": 0.8224,
      "step": 800240
    },
    {
      "epoch": 2.8046767393201484,
      "grad_norm": 3.265625,
      "learning_rate": 3.61716654854345e-06,
      "loss": 0.8078,
      "step": 800250
    },
    {
      "epoch": 2.804711786827044,
      "grad_norm": 3.140625,
      "learning_rate": 3.616517519879748e-06,
      "loss": 0.7515,
      "step": 800260
    },
    {
      "epoch": 2.8047468343339395,
      "grad_norm": 2.890625,
      "learning_rate": 3.6158684912160465e-06,
      "loss": 0.8057,
      "step": 800270
    },
    {
      "epoch": 2.804781881840835,
      "grad_norm": 3.609375,
      "learning_rate": 3.6152194625523445e-06,
      "loss": 0.7839,
      "step": 800280
    },
    {
      "epoch": 2.804816929347731,
      "grad_norm": 2.984375,
      "learning_rate": 3.614570433888642e-06,
      "loss": 0.7304,
      "step": 800290
    },
    {
      "epoch": 2.8048519768546263,
      "grad_norm": 3.0625,
      "learning_rate": 3.6139214052249405e-06,
      "loss": 0.8555,
      "step": 800300
    },
    {
      "epoch": 2.804887024361522,
      "grad_norm": 3.265625,
      "learning_rate": 3.6132723765612385e-06,
      "loss": 0.8249,
      "step": 800310
    },
    {
      "epoch": 2.804922071868418,
      "grad_norm": 2.84375,
      "learning_rate": 3.612623347897537e-06,
      "loss": 0.7507,
      "step": 800320
    },
    {
      "epoch": 2.804957119375313,
      "grad_norm": 2.78125,
      "learning_rate": 3.6119743192338353e-06,
      "loss": 0.774,
      "step": 800330
    },
    {
      "epoch": 2.804992166882209,
      "grad_norm": 3.171875,
      "learning_rate": 3.6113252905701325e-06,
      "loss": 0.7874,
      "step": 800340
    },
    {
      "epoch": 2.8050272143891046,
      "grad_norm": 3.0625,
      "learning_rate": 3.610676261906431e-06,
      "loss": 0.8233,
      "step": 800350
    },
    {
      "epoch": 2.805062261896,
      "grad_norm": 3.125,
      "learning_rate": 3.6100272332427293e-06,
      "loss": 0.7297,
      "step": 800360
    },
    {
      "epoch": 2.8050973094028957,
      "grad_norm": 2.84375,
      "learning_rate": 3.6093782045790273e-06,
      "loss": 0.7815,
      "step": 800370
    },
    {
      "epoch": 2.805132356909791,
      "grad_norm": 3.453125,
      "learning_rate": 3.6087291759153257e-06,
      "loss": 0.8024,
      "step": 800380
    },
    {
      "epoch": 2.8051674044166868,
      "grad_norm": 2.640625,
      "learning_rate": 3.6080801472516233e-06,
      "loss": 0.8871,
      "step": 800390
    },
    {
      "epoch": 2.8052024519235825,
      "grad_norm": 2.984375,
      "learning_rate": 3.6074311185879213e-06,
      "loss": 0.81,
      "step": 800400
    },
    {
      "epoch": 2.8052374994304783,
      "grad_norm": 2.390625,
      "learning_rate": 3.6067820899242197e-06,
      "loss": 0.755,
      "step": 800410
    },
    {
      "epoch": 2.8052725469373736,
      "grad_norm": 3.109375,
      "learning_rate": 3.6061330612605177e-06,
      "loss": 0.8206,
      "step": 800420
    },
    {
      "epoch": 2.8053075944442694,
      "grad_norm": 2.390625,
      "learning_rate": 3.6054840325968153e-06,
      "loss": 0.7695,
      "step": 800430
    },
    {
      "epoch": 2.8053426419511647,
      "grad_norm": 2.953125,
      "learning_rate": 3.6048350039331137e-06,
      "loss": 0.8281,
      "step": 800440
    },
    {
      "epoch": 2.8053776894580604,
      "grad_norm": 2.984375,
      "learning_rate": 3.604185975269412e-06,
      "loss": 0.8172,
      "step": 800450
    },
    {
      "epoch": 2.805412736964956,
      "grad_norm": 2.625,
      "learning_rate": 3.60353694660571e-06,
      "loss": 0.836,
      "step": 800460
    },
    {
      "epoch": 2.8054477844718515,
      "grad_norm": 2.828125,
      "learning_rate": 3.6028879179420086e-06,
      "loss": 0.7905,
      "step": 800470
    },
    {
      "epoch": 2.8054828319787473,
      "grad_norm": 2.640625,
      "learning_rate": 3.602238889278306e-06,
      "loss": 0.8824,
      "step": 800480
    },
    {
      "epoch": 2.8055178794856426,
      "grad_norm": 2.390625,
      "learning_rate": 3.601589860614604e-06,
      "loss": 0.8672,
      "step": 800490
    },
    {
      "epoch": 2.8055529269925383,
      "grad_norm": 3.15625,
      "learning_rate": 3.6009408319509026e-06,
      "loss": 0.7814,
      "step": 800500
    },
    {
      "epoch": 2.805587974499434,
      "grad_norm": 2.625,
      "learning_rate": 3.6002918032872006e-06,
      "loss": 0.8032,
      "step": 800510
    },
    {
      "epoch": 2.80562302200633,
      "grad_norm": 3.25,
      "learning_rate": 3.599642774623499e-06,
      "loss": 0.8023,
      "step": 800520
    },
    {
      "epoch": 2.805658069513225,
      "grad_norm": 2.453125,
      "learning_rate": 3.5989937459597966e-06,
      "loss": 0.8178,
      "step": 800530
    },
    {
      "epoch": 2.805693117020121,
      "grad_norm": 2.921875,
      "learning_rate": 3.5983447172960946e-06,
      "loss": 0.7695,
      "step": 800540
    },
    {
      "epoch": 2.8057281645270162,
      "grad_norm": 2.78125,
      "learning_rate": 3.597695688632393e-06,
      "loss": 0.7352,
      "step": 800550
    },
    {
      "epoch": 2.805763212033912,
      "grad_norm": 3.09375,
      "learning_rate": 3.5970466599686914e-06,
      "loss": 0.8318,
      "step": 800560
    },
    {
      "epoch": 2.8057982595408077,
      "grad_norm": 2.984375,
      "learning_rate": 3.596397631304989e-06,
      "loss": 0.8544,
      "step": 800570
    },
    {
      "epoch": 2.805833307047703,
      "grad_norm": 2.953125,
      "learning_rate": 3.595748602641287e-06,
      "loss": 0.8032,
      "step": 800580
    },
    {
      "epoch": 2.805868354554599,
      "grad_norm": 2.71875,
      "learning_rate": 3.5950995739775854e-06,
      "loss": 0.707,
      "step": 800590
    },
    {
      "epoch": 2.805903402061494,
      "grad_norm": 3.046875,
      "learning_rate": 3.5944505453138834e-06,
      "loss": 0.7506,
      "step": 800600
    },
    {
      "epoch": 2.80593844956839,
      "grad_norm": 2.6875,
      "learning_rate": 3.593801516650182e-06,
      "loss": 0.7136,
      "step": 800610
    },
    {
      "epoch": 2.8059734970752856,
      "grad_norm": 3.1875,
      "learning_rate": 3.5931524879864794e-06,
      "loss": 0.7918,
      "step": 800620
    },
    {
      "epoch": 2.8060085445821814,
      "grad_norm": 3.34375,
      "learning_rate": 3.5925034593227774e-06,
      "loss": 0.7884,
      "step": 800630
    },
    {
      "epoch": 2.8060435920890767,
      "grad_norm": 2.890625,
      "learning_rate": 3.591854430659076e-06,
      "loss": 0.8113,
      "step": 800640
    },
    {
      "epoch": 2.8060786395959725,
      "grad_norm": 2.671875,
      "learning_rate": 3.591205401995374e-06,
      "loss": 0.8892,
      "step": 800650
    },
    {
      "epoch": 2.806113687102868,
      "grad_norm": 2.640625,
      "learning_rate": 3.5905563733316723e-06,
      "loss": 0.7403,
      "step": 800660
    },
    {
      "epoch": 2.8061487346097636,
      "grad_norm": 3.203125,
      "learning_rate": 3.58990734466797e-06,
      "loss": 0.8853,
      "step": 800670
    },
    {
      "epoch": 2.8061837821166593,
      "grad_norm": 3.265625,
      "learning_rate": 3.5892583160042683e-06,
      "loss": 0.8113,
      "step": 800680
    },
    {
      "epoch": 2.8062188296235546,
      "grad_norm": 3.25,
      "learning_rate": 3.5886092873405663e-06,
      "loss": 0.8257,
      "step": 800690
    },
    {
      "epoch": 2.8062538771304504,
      "grad_norm": 2.421875,
      "learning_rate": 3.5879602586768647e-06,
      "loss": 0.813,
      "step": 800700
    },
    {
      "epoch": 2.8062889246373457,
      "grad_norm": 3.359375,
      "learning_rate": 3.5873112300131623e-06,
      "loss": 0.8408,
      "step": 800710
    },
    {
      "epoch": 2.8063239721442415,
      "grad_norm": 2.65625,
      "learning_rate": 3.5866622013494603e-06,
      "loss": 0.7974,
      "step": 800720
    },
    {
      "epoch": 2.806359019651137,
      "grad_norm": 2.78125,
      "learning_rate": 3.5860131726857587e-06,
      "loss": 0.7883,
      "step": 800730
    },
    {
      "epoch": 2.806394067158033,
      "grad_norm": 2.96875,
      "learning_rate": 3.5853641440220567e-06,
      "loss": 0.7771,
      "step": 800740
    },
    {
      "epoch": 2.8064291146649283,
      "grad_norm": 2.921875,
      "learning_rate": 3.584715115358355e-06,
      "loss": 0.8879,
      "step": 800750
    },
    {
      "epoch": 2.806464162171824,
      "grad_norm": 3.109375,
      "learning_rate": 3.5840660866946527e-06,
      "loss": 0.8024,
      "step": 800760
    },
    {
      "epoch": 2.8064992096787194,
      "grad_norm": 3.109375,
      "learning_rate": 3.5834170580309507e-06,
      "loss": 0.7638,
      "step": 800770
    },
    {
      "epoch": 2.806534257185615,
      "grad_norm": 2.8125,
      "learning_rate": 3.582768029367249e-06,
      "loss": 0.7696,
      "step": 800780
    },
    {
      "epoch": 2.806569304692511,
      "grad_norm": 3.15625,
      "learning_rate": 3.5821190007035475e-06,
      "loss": 0.7729,
      "step": 800790
    },
    {
      "epoch": 2.806604352199406,
      "grad_norm": 2.5625,
      "learning_rate": 3.5814699720398455e-06,
      "loss": 0.7136,
      "step": 800800
    },
    {
      "epoch": 2.806639399706302,
      "grad_norm": 3.40625,
      "learning_rate": 3.580820943376143e-06,
      "loss": 0.834,
      "step": 800810
    },
    {
      "epoch": 2.8066744472131973,
      "grad_norm": 3.109375,
      "learning_rate": 3.5801719147124415e-06,
      "loss": 0.8179,
      "step": 800820
    },
    {
      "epoch": 2.806709494720093,
      "grad_norm": 2.90625,
      "learning_rate": 3.5795228860487395e-06,
      "loss": 0.8481,
      "step": 800830
    },
    {
      "epoch": 2.8067445422269888,
      "grad_norm": 2.8125,
      "learning_rate": 3.578873857385038e-06,
      "loss": 0.8096,
      "step": 800840
    },
    {
      "epoch": 2.8067795897338845,
      "grad_norm": 2.8125,
      "learning_rate": 3.578224828721336e-06,
      "loss": 0.724,
      "step": 800850
    },
    {
      "epoch": 2.80681463724078,
      "grad_norm": 3.28125,
      "learning_rate": 3.5775758000576335e-06,
      "loss": 0.7992,
      "step": 800860
    },
    {
      "epoch": 2.8068496847476756,
      "grad_norm": 2.90625,
      "learning_rate": 3.576926771393932e-06,
      "loss": 0.8464,
      "step": 800870
    },
    {
      "epoch": 2.806884732254571,
      "grad_norm": 2.828125,
      "learning_rate": 3.5762777427302304e-06,
      "loss": 0.7287,
      "step": 800880
    },
    {
      "epoch": 2.8069197797614667,
      "grad_norm": 3.4375,
      "learning_rate": 3.5756287140665284e-06,
      "loss": 0.8118,
      "step": 800890
    },
    {
      "epoch": 2.8069548272683624,
      "grad_norm": 2.8125,
      "learning_rate": 3.574979685402826e-06,
      "loss": 0.815,
      "step": 800900
    },
    {
      "epoch": 2.8069898747752577,
      "grad_norm": 2.90625,
      "learning_rate": 3.5743306567391244e-06,
      "loss": 0.817,
      "step": 800910
    },
    {
      "epoch": 2.8070249222821535,
      "grad_norm": 2.90625,
      "learning_rate": 3.5736816280754224e-06,
      "loss": 0.8567,
      "step": 800920
    },
    {
      "epoch": 2.807059969789049,
      "grad_norm": 2.546875,
      "learning_rate": 3.573032599411721e-06,
      "loss": 0.769,
      "step": 800930
    },
    {
      "epoch": 2.8070950172959446,
      "grad_norm": 2.734375,
      "learning_rate": 3.572383570748019e-06,
      "loss": 0.8387,
      "step": 800940
    },
    {
      "epoch": 2.8071300648028403,
      "grad_norm": 2.640625,
      "learning_rate": 3.5717345420843164e-06,
      "loss": 0.8146,
      "step": 800950
    },
    {
      "epoch": 2.807165112309736,
      "grad_norm": 2.609375,
      "learning_rate": 3.571085513420615e-06,
      "loss": 0.7279,
      "step": 800960
    },
    {
      "epoch": 2.8072001598166314,
      "grad_norm": 2.734375,
      "learning_rate": 3.570436484756913e-06,
      "loss": 0.7841,
      "step": 800970
    },
    {
      "epoch": 2.807235207323527,
      "grad_norm": 3.015625,
      "learning_rate": 3.5697874560932112e-06,
      "loss": 0.7979,
      "step": 800980
    },
    {
      "epoch": 2.8072702548304225,
      "grad_norm": 3.203125,
      "learning_rate": 3.5691384274295097e-06,
      "loss": 0.8157,
      "step": 800990
    },
    {
      "epoch": 2.8073053023373182,
      "grad_norm": 3.8125,
      "learning_rate": 3.5684893987658072e-06,
      "loss": 0.8724,
      "step": 801000
    },
    {
      "epoch": 2.807340349844214,
      "grad_norm": 3.046875,
      "learning_rate": 3.5678403701021052e-06,
      "loss": 0.8371,
      "step": 801010
    },
    {
      "epoch": 2.8073753973511093,
      "grad_norm": 2.953125,
      "learning_rate": 3.5671913414384037e-06,
      "loss": 0.8034,
      "step": 801020
    },
    {
      "epoch": 2.807410444858005,
      "grad_norm": 2.703125,
      "learning_rate": 3.5665423127747017e-06,
      "loss": 0.7848,
      "step": 801030
    },
    {
      "epoch": 2.8074454923649004,
      "grad_norm": 2.65625,
      "learning_rate": 3.5658932841109992e-06,
      "loss": 0.7054,
      "step": 801040
    },
    {
      "epoch": 2.807480539871796,
      "grad_norm": 3.0625,
      "learning_rate": 3.5652442554472977e-06,
      "loss": 0.7952,
      "step": 801050
    },
    {
      "epoch": 2.807515587378692,
      "grad_norm": 2.515625,
      "learning_rate": 3.5645952267835957e-06,
      "loss": 0.7458,
      "step": 801060
    },
    {
      "epoch": 2.8075506348855876,
      "grad_norm": 3.28125,
      "learning_rate": 3.563946198119894e-06,
      "loss": 0.907,
      "step": 801070
    },
    {
      "epoch": 2.807585682392483,
      "grad_norm": 2.984375,
      "learning_rate": 3.563297169456192e-06,
      "loss": 0.7948,
      "step": 801080
    },
    {
      "epoch": 2.8076207298993787,
      "grad_norm": 2.53125,
      "learning_rate": 3.5626481407924897e-06,
      "loss": 0.8509,
      "step": 801090
    },
    {
      "epoch": 2.807655777406274,
      "grad_norm": 3.140625,
      "learning_rate": 3.561999112128788e-06,
      "loss": 0.7879,
      "step": 801100
    },
    {
      "epoch": 2.80769082491317,
      "grad_norm": 3.421875,
      "learning_rate": 3.5613500834650865e-06,
      "loss": 0.8209,
      "step": 801110
    },
    {
      "epoch": 2.8077258724200655,
      "grad_norm": 2.875,
      "learning_rate": 3.5607010548013845e-06,
      "loss": 0.73,
      "step": 801120
    },
    {
      "epoch": 2.807760919926961,
      "grad_norm": 2.890625,
      "learning_rate": 3.560052026137683e-06,
      "loss": 0.7731,
      "step": 801130
    },
    {
      "epoch": 2.8077959674338566,
      "grad_norm": 3.078125,
      "learning_rate": 3.5594029974739805e-06,
      "loss": 0.8123,
      "step": 801140
    },
    {
      "epoch": 2.807831014940752,
      "grad_norm": 3.0,
      "learning_rate": 3.5587539688102785e-06,
      "loss": 0.7619,
      "step": 801150
    },
    {
      "epoch": 2.8078660624476477,
      "grad_norm": 2.75,
      "learning_rate": 3.558104940146577e-06,
      "loss": 0.7784,
      "step": 801160
    },
    {
      "epoch": 2.8079011099545435,
      "grad_norm": 2.75,
      "learning_rate": 3.557455911482875e-06,
      "loss": 0.722,
      "step": 801170
    },
    {
      "epoch": 2.807936157461439,
      "grad_norm": 2.734375,
      "learning_rate": 3.5568068828191725e-06,
      "loss": 0.8347,
      "step": 801180
    },
    {
      "epoch": 2.8079712049683345,
      "grad_norm": 2.78125,
      "learning_rate": 3.556157854155471e-06,
      "loss": 0.7357,
      "step": 801190
    },
    {
      "epoch": 2.8080062524752303,
      "grad_norm": 3.0625,
      "learning_rate": 3.555508825491769e-06,
      "loss": 0.8089,
      "step": 801200
    },
    {
      "epoch": 2.8080412999821256,
      "grad_norm": 2.625,
      "learning_rate": 3.5548597968280674e-06,
      "loss": 0.7737,
      "step": 801210
    },
    {
      "epoch": 2.8080763474890214,
      "grad_norm": 3.15625,
      "learning_rate": 3.5542107681643658e-06,
      "loss": 0.7329,
      "step": 801220
    },
    {
      "epoch": 2.808111394995917,
      "grad_norm": 2.640625,
      "learning_rate": 3.5535617395006634e-06,
      "loss": 0.7909,
      "step": 801230
    },
    {
      "epoch": 2.8081464425028124,
      "grad_norm": 3.234375,
      "learning_rate": 3.5529127108369614e-06,
      "loss": 0.8106,
      "step": 801240
    },
    {
      "epoch": 2.808181490009708,
      "grad_norm": 2.59375,
      "learning_rate": 3.5522636821732598e-06,
      "loss": 0.8136,
      "step": 801250
    },
    {
      "epoch": 2.8082165375166035,
      "grad_norm": 2.828125,
      "learning_rate": 3.5516146535095578e-06,
      "loss": 0.713,
      "step": 801260
    },
    {
      "epoch": 2.8082515850234993,
      "grad_norm": 3.1875,
      "learning_rate": 3.550965624845856e-06,
      "loss": 0.8063,
      "step": 801270
    },
    {
      "epoch": 2.808286632530395,
      "grad_norm": 2.96875,
      "learning_rate": 3.5503165961821538e-06,
      "loss": 0.8067,
      "step": 801280
    },
    {
      "epoch": 2.8083216800372908,
      "grad_norm": 2.734375,
      "learning_rate": 3.5496675675184518e-06,
      "loss": 0.8091,
      "step": 801290
    },
    {
      "epoch": 2.808356727544186,
      "grad_norm": 3.109375,
      "learning_rate": 3.54901853885475e-06,
      "loss": 0.8132,
      "step": 801300
    },
    {
      "epoch": 2.808391775051082,
      "grad_norm": 2.65625,
      "learning_rate": 3.5483695101910486e-06,
      "loss": 0.7859,
      "step": 801310
    },
    {
      "epoch": 2.808426822557977,
      "grad_norm": 3.0,
      "learning_rate": 3.5477204815273466e-06,
      "loss": 0.8229,
      "step": 801320
    },
    {
      "epoch": 2.808461870064873,
      "grad_norm": 2.84375,
      "learning_rate": 3.547071452863644e-06,
      "loss": 0.7846,
      "step": 801330
    },
    {
      "epoch": 2.8084969175717687,
      "grad_norm": 2.890625,
      "learning_rate": 3.5464224241999426e-06,
      "loss": 0.7883,
      "step": 801340
    },
    {
      "epoch": 2.808531965078664,
      "grad_norm": 2.53125,
      "learning_rate": 3.5457733955362406e-06,
      "loss": 0.76,
      "step": 801350
    },
    {
      "epoch": 2.8085670125855597,
      "grad_norm": 2.78125,
      "learning_rate": 3.545124366872539e-06,
      "loss": 0.7638,
      "step": 801360
    },
    {
      "epoch": 2.808602060092455,
      "grad_norm": 2.78125,
      "learning_rate": 3.5444753382088366e-06,
      "loss": 0.7549,
      "step": 801370
    },
    {
      "epoch": 2.808637107599351,
      "grad_norm": 2.640625,
      "learning_rate": 3.5438263095451346e-06,
      "loss": 0.8025,
      "step": 801380
    },
    {
      "epoch": 2.8086721551062466,
      "grad_norm": 2.96875,
      "learning_rate": 3.543177280881433e-06,
      "loss": 0.796,
      "step": 801390
    },
    {
      "epoch": 2.8087072026131423,
      "grad_norm": 2.78125,
      "learning_rate": 3.542528252217731e-06,
      "loss": 0.8662,
      "step": 801400
    },
    {
      "epoch": 2.8087422501200376,
      "grad_norm": 2.984375,
      "learning_rate": 3.5418792235540295e-06,
      "loss": 0.8307,
      "step": 801410
    },
    {
      "epoch": 2.8087772976269334,
      "grad_norm": 2.953125,
      "learning_rate": 3.541230194890327e-06,
      "loss": 0.7894,
      "step": 801420
    },
    {
      "epoch": 2.8088123451338287,
      "grad_norm": 2.59375,
      "learning_rate": 3.5405811662266255e-06,
      "loss": 0.8626,
      "step": 801430
    },
    {
      "epoch": 2.8088473926407245,
      "grad_norm": 2.671875,
      "learning_rate": 3.5399321375629235e-06,
      "loss": 0.78,
      "step": 801440
    },
    {
      "epoch": 2.8088824401476202,
      "grad_norm": 2.984375,
      "learning_rate": 3.539283108899222e-06,
      "loss": 0.8471,
      "step": 801450
    },
    {
      "epoch": 2.8089174876545155,
      "grad_norm": 2.9375,
      "learning_rate": 3.53863408023552e-06,
      "loss": 0.8525,
      "step": 801460
    },
    {
      "epoch": 2.8089525351614113,
      "grad_norm": 2.84375,
      "learning_rate": 3.5379850515718175e-06,
      "loss": 0.6619,
      "step": 801470
    },
    {
      "epoch": 2.8089875826683066,
      "grad_norm": 3.28125,
      "learning_rate": 3.537336022908116e-06,
      "loss": 0.8573,
      "step": 801480
    },
    {
      "epoch": 2.8090226301752024,
      "grad_norm": 2.875,
      "learning_rate": 3.536686994244414e-06,
      "loss": 0.7834,
      "step": 801490
    },
    {
      "epoch": 2.809057677682098,
      "grad_norm": 3.140625,
      "learning_rate": 3.5360379655807123e-06,
      "loss": 0.8152,
      "step": 801500
    },
    {
      "epoch": 2.809092725188994,
      "grad_norm": 3.015625,
      "learning_rate": 3.53538893691701e-06,
      "loss": 0.7785,
      "step": 801510
    },
    {
      "epoch": 2.809127772695889,
      "grad_norm": 3.03125,
      "learning_rate": 3.534739908253308e-06,
      "loss": 0.8885,
      "step": 801520
    },
    {
      "epoch": 2.809162820202785,
      "grad_norm": 2.796875,
      "learning_rate": 3.5340908795896063e-06,
      "loss": 0.7319,
      "step": 801530
    },
    {
      "epoch": 2.8091978677096803,
      "grad_norm": 3.296875,
      "learning_rate": 3.5334418509259047e-06,
      "loss": 0.8801,
      "step": 801540
    },
    {
      "epoch": 2.809232915216576,
      "grad_norm": 3.03125,
      "learning_rate": 3.5327928222622027e-06,
      "loss": 0.7905,
      "step": 801550
    },
    {
      "epoch": 2.809267962723472,
      "grad_norm": 3.609375,
      "learning_rate": 3.5321437935985003e-06,
      "loss": 0.8757,
      "step": 801560
    },
    {
      "epoch": 2.809303010230367,
      "grad_norm": 2.765625,
      "learning_rate": 3.5314947649347987e-06,
      "loss": 0.7232,
      "step": 801570
    },
    {
      "epoch": 2.809338057737263,
      "grad_norm": 2.75,
      "learning_rate": 3.5308457362710967e-06,
      "loss": 0.729,
      "step": 801580
    },
    {
      "epoch": 2.8093731052441586,
      "grad_norm": 3.390625,
      "learning_rate": 3.530196707607395e-06,
      "loss": 0.7827,
      "step": 801590
    },
    {
      "epoch": 2.809408152751054,
      "grad_norm": 3.0,
      "learning_rate": 3.529547678943693e-06,
      "loss": 0.7922,
      "step": 801600
    },
    {
      "epoch": 2.8094432002579497,
      "grad_norm": 3.03125,
      "learning_rate": 3.5288986502799907e-06,
      "loss": 0.7201,
      "step": 801610
    },
    {
      "epoch": 2.8094782477648454,
      "grad_norm": 3.0,
      "learning_rate": 3.528249621616289e-06,
      "loss": 0.7921,
      "step": 801620
    },
    {
      "epoch": 2.8095132952717408,
      "grad_norm": 2.765625,
      "learning_rate": 3.5276005929525876e-06,
      "loss": 0.8012,
      "step": 801630
    },
    {
      "epoch": 2.8095483427786365,
      "grad_norm": 3.171875,
      "learning_rate": 3.5269515642888856e-06,
      "loss": 0.7864,
      "step": 801640
    },
    {
      "epoch": 2.809583390285532,
      "grad_norm": 2.765625,
      "learning_rate": 3.526302535625183e-06,
      "loss": 0.8945,
      "step": 801650
    },
    {
      "epoch": 2.8096184377924276,
      "grad_norm": 2.859375,
      "learning_rate": 3.5256535069614816e-06,
      "loss": 0.7809,
      "step": 801660
    },
    {
      "epoch": 2.8096534852993233,
      "grad_norm": 3.03125,
      "learning_rate": 3.5250044782977796e-06,
      "loss": 0.8322,
      "step": 801670
    },
    {
      "epoch": 2.8096885328062187,
      "grad_norm": 2.9375,
      "learning_rate": 3.524355449634078e-06,
      "loss": 0.8323,
      "step": 801680
    },
    {
      "epoch": 2.8097235803131144,
      "grad_norm": 2.84375,
      "learning_rate": 3.523706420970376e-06,
      "loss": 0.7642,
      "step": 801690
    },
    {
      "epoch": 2.80975862782001,
      "grad_norm": 2.765625,
      "learning_rate": 3.5230573923066736e-06,
      "loss": 0.7533,
      "step": 801700
    },
    {
      "epoch": 2.8097936753269055,
      "grad_norm": 2.9375,
      "learning_rate": 3.522408363642972e-06,
      "loss": 0.7493,
      "step": 801710
    },
    {
      "epoch": 2.8098287228338013,
      "grad_norm": 2.953125,
      "learning_rate": 3.52175933497927e-06,
      "loss": 0.748,
      "step": 801720
    },
    {
      "epoch": 2.809863770340697,
      "grad_norm": 3.109375,
      "learning_rate": 3.5211103063155684e-06,
      "loss": 0.8236,
      "step": 801730
    },
    {
      "epoch": 2.8098988178475923,
      "grad_norm": 2.890625,
      "learning_rate": 3.520461277651867e-06,
      "loss": 0.7631,
      "step": 801740
    },
    {
      "epoch": 2.809933865354488,
      "grad_norm": 2.96875,
      "learning_rate": 3.5198122489881644e-06,
      "loss": 0.7759,
      "step": 801750
    },
    {
      "epoch": 2.8099689128613834,
      "grad_norm": 3.1875,
      "learning_rate": 3.5191632203244624e-06,
      "loss": 0.7801,
      "step": 801760
    },
    {
      "epoch": 2.810003960368279,
      "grad_norm": 3.3125,
      "learning_rate": 3.518514191660761e-06,
      "loss": 0.7941,
      "step": 801770
    },
    {
      "epoch": 2.810039007875175,
      "grad_norm": 2.953125,
      "learning_rate": 3.517865162997059e-06,
      "loss": 0.8743,
      "step": 801780
    },
    {
      "epoch": 2.8100740553820707,
      "grad_norm": 2.84375,
      "learning_rate": 3.5172161343333573e-06,
      "loss": 0.783,
      "step": 801790
    },
    {
      "epoch": 2.810109102888966,
      "grad_norm": 3.484375,
      "learning_rate": 3.516567105669655e-06,
      "loss": 0.7647,
      "step": 801800
    },
    {
      "epoch": 2.8101441503958617,
      "grad_norm": 2.65625,
      "learning_rate": 3.515918077005953e-06,
      "loss": 0.7759,
      "step": 801810
    },
    {
      "epoch": 2.810179197902757,
      "grad_norm": 3.015625,
      "learning_rate": 3.5152690483422513e-06,
      "loss": 0.7914,
      "step": 801820
    },
    {
      "epoch": 2.810214245409653,
      "grad_norm": 2.640625,
      "learning_rate": 3.5146200196785493e-06,
      "loss": 0.7381,
      "step": 801830
    },
    {
      "epoch": 2.8102492929165486,
      "grad_norm": 3.375,
      "learning_rate": 3.513970991014847e-06,
      "loss": 0.7403,
      "step": 801840
    },
    {
      "epoch": 2.810284340423444,
      "grad_norm": 2.75,
      "learning_rate": 3.5133219623511453e-06,
      "loss": 0.7849,
      "step": 801850
    },
    {
      "epoch": 2.8103193879303396,
      "grad_norm": 2.890625,
      "learning_rate": 3.5126729336874437e-06,
      "loss": 0.7244,
      "step": 801860
    },
    {
      "epoch": 2.810354435437235,
      "grad_norm": 2.8125,
      "learning_rate": 3.5120239050237417e-06,
      "loss": 0.791,
      "step": 801870
    },
    {
      "epoch": 2.8103894829441307,
      "grad_norm": 2.78125,
      "learning_rate": 3.51137487636004e-06,
      "loss": 0.7812,
      "step": 801880
    },
    {
      "epoch": 2.8104245304510265,
      "grad_norm": 3.1875,
      "learning_rate": 3.5107258476963377e-06,
      "loss": 0.7945,
      "step": 801890
    },
    {
      "epoch": 2.8104595779579222,
      "grad_norm": 3.453125,
      "learning_rate": 3.5100768190326357e-06,
      "loss": 0.8703,
      "step": 801900
    },
    {
      "epoch": 2.8104946254648175,
      "grad_norm": 2.984375,
      "learning_rate": 3.509427790368934e-06,
      "loss": 0.7958,
      "step": 801910
    },
    {
      "epoch": 2.8105296729717133,
      "grad_norm": 2.546875,
      "learning_rate": 3.508778761705232e-06,
      "loss": 0.8438,
      "step": 801920
    },
    {
      "epoch": 2.8105647204786086,
      "grad_norm": 2.4375,
      "learning_rate": 3.5081297330415305e-06,
      "loss": 0.739,
      "step": 801930
    },
    {
      "epoch": 2.8105997679855044,
      "grad_norm": 2.96875,
      "learning_rate": 3.507480704377828e-06,
      "loss": 0.8126,
      "step": 801940
    },
    {
      "epoch": 2.8106348154924,
      "grad_norm": 2.953125,
      "learning_rate": 3.506831675714126e-06,
      "loss": 0.7743,
      "step": 801950
    },
    {
      "epoch": 2.8106698629992954,
      "grad_norm": 2.65625,
      "learning_rate": 3.5061826470504245e-06,
      "loss": 0.7611,
      "step": 801960
    },
    {
      "epoch": 2.810704910506191,
      "grad_norm": 2.84375,
      "learning_rate": 3.505533618386723e-06,
      "loss": 0.8064,
      "step": 801970
    },
    {
      "epoch": 2.8107399580130865,
      "grad_norm": 2.734375,
      "learning_rate": 3.5048845897230205e-06,
      "loss": 0.6959,
      "step": 801980
    },
    {
      "epoch": 2.8107750055199823,
      "grad_norm": 3.421875,
      "learning_rate": 3.5042355610593185e-06,
      "loss": 0.8305,
      "step": 801990
    },
    {
      "epoch": 2.810810053026878,
      "grad_norm": 2.75,
      "learning_rate": 3.503586532395617e-06,
      "loss": 0.7927,
      "step": 802000
    },
    {
      "epoch": 2.810845100533774,
      "grad_norm": 3.609375,
      "learning_rate": 3.502937503731915e-06,
      "loss": 0.8117,
      "step": 802010
    },
    {
      "epoch": 2.810880148040669,
      "grad_norm": 2.859375,
      "learning_rate": 3.5022884750682134e-06,
      "loss": 0.8238,
      "step": 802020
    },
    {
      "epoch": 2.810915195547565,
      "grad_norm": 2.953125,
      "learning_rate": 3.501639446404511e-06,
      "loss": 0.8658,
      "step": 802030
    },
    {
      "epoch": 2.81095024305446,
      "grad_norm": 2.890625,
      "learning_rate": 3.500990417740809e-06,
      "loss": 0.8385,
      "step": 802040
    },
    {
      "epoch": 2.810985290561356,
      "grad_norm": 2.71875,
      "learning_rate": 3.5003413890771074e-06,
      "loss": 0.8349,
      "step": 802050
    },
    {
      "epoch": 2.8110203380682517,
      "grad_norm": 3.0625,
      "learning_rate": 3.499692360413406e-06,
      "loss": 0.8341,
      "step": 802060
    },
    {
      "epoch": 2.811055385575147,
      "grad_norm": 2.46875,
      "learning_rate": 3.499043331749704e-06,
      "loss": 0.7761,
      "step": 802070
    },
    {
      "epoch": 2.8110904330820428,
      "grad_norm": 2.96875,
      "learning_rate": 3.4983943030860014e-06,
      "loss": 0.7883,
      "step": 802080
    },
    {
      "epoch": 2.811125480588938,
      "grad_norm": 3.015625,
      "learning_rate": 3.4977452744223e-06,
      "loss": 0.8739,
      "step": 802090
    },
    {
      "epoch": 2.811160528095834,
      "grad_norm": 3.078125,
      "learning_rate": 3.497096245758598e-06,
      "loss": 0.8165,
      "step": 802100
    },
    {
      "epoch": 2.8111955756027296,
      "grad_norm": 3.25,
      "learning_rate": 3.4964472170948962e-06,
      "loss": 0.8398,
      "step": 802110
    },
    {
      "epoch": 2.8112306231096253,
      "grad_norm": 3.25,
      "learning_rate": 3.4957981884311942e-06,
      "loss": 0.8994,
      "step": 802120
    },
    {
      "epoch": 2.8112656706165207,
      "grad_norm": 2.78125,
      "learning_rate": 3.495149159767492e-06,
      "loss": 0.8267,
      "step": 802130
    },
    {
      "epoch": 2.8113007181234164,
      "grad_norm": 3.359375,
      "learning_rate": 3.4945001311037902e-06,
      "loss": 0.8129,
      "step": 802140
    },
    {
      "epoch": 2.8113357656303117,
      "grad_norm": 2.734375,
      "learning_rate": 3.4938511024400882e-06,
      "loss": 0.8077,
      "step": 802150
    },
    {
      "epoch": 2.8113708131372075,
      "grad_norm": 3.375,
      "learning_rate": 3.4932020737763867e-06,
      "loss": 0.9325,
      "step": 802160
    },
    {
      "epoch": 2.8114058606441032,
      "grad_norm": 3.359375,
      "learning_rate": 3.4925530451126842e-06,
      "loss": 0.8,
      "step": 802170
    },
    {
      "epoch": 2.8114409081509986,
      "grad_norm": 2.890625,
      "learning_rate": 3.4919040164489827e-06,
      "loss": 0.8142,
      "step": 802180
    },
    {
      "epoch": 2.8114759556578943,
      "grad_norm": 3.21875,
      "learning_rate": 3.4912549877852807e-06,
      "loss": 0.8467,
      "step": 802190
    },
    {
      "epoch": 2.8115110031647896,
      "grad_norm": 3.046875,
      "learning_rate": 3.490605959121579e-06,
      "loss": 0.8067,
      "step": 802200
    },
    {
      "epoch": 2.8115460506716854,
      "grad_norm": 2.859375,
      "learning_rate": 3.489956930457877e-06,
      "loss": 0.7765,
      "step": 802210
    },
    {
      "epoch": 2.811581098178581,
      "grad_norm": 3.421875,
      "learning_rate": 3.4893079017941747e-06,
      "loss": 0.8247,
      "step": 802220
    },
    {
      "epoch": 2.811616145685477,
      "grad_norm": 3.0625,
      "learning_rate": 3.488658873130473e-06,
      "loss": 0.7694,
      "step": 802230
    },
    {
      "epoch": 2.8116511931923722,
      "grad_norm": 2.65625,
      "learning_rate": 3.488009844466771e-06,
      "loss": 0.8304,
      "step": 802240
    },
    {
      "epoch": 2.811686240699268,
      "grad_norm": 2.921875,
      "learning_rate": 3.4873608158030695e-06,
      "loss": 0.8092,
      "step": 802250
    },
    {
      "epoch": 2.8117212882061633,
      "grad_norm": 3.296875,
      "learning_rate": 3.4867117871393675e-06,
      "loss": 0.8003,
      "step": 802260
    },
    {
      "epoch": 2.811756335713059,
      "grad_norm": 3.34375,
      "learning_rate": 3.486062758475665e-06,
      "loss": 0.7807,
      "step": 802270
    },
    {
      "epoch": 2.811791383219955,
      "grad_norm": 3.03125,
      "learning_rate": 3.4854137298119635e-06,
      "loss": 0.702,
      "step": 802280
    },
    {
      "epoch": 2.81182643072685,
      "grad_norm": 3.09375,
      "learning_rate": 3.484764701148262e-06,
      "loss": 0.7817,
      "step": 802290
    },
    {
      "epoch": 2.811861478233746,
      "grad_norm": 3.140625,
      "learning_rate": 3.48411567248456e-06,
      "loss": 0.8416,
      "step": 802300
    },
    {
      "epoch": 2.811896525740641,
      "grad_norm": 3.28125,
      "learning_rate": 3.4834666438208575e-06,
      "loss": 0.8383,
      "step": 802310
    },
    {
      "epoch": 2.811931573247537,
      "grad_norm": 2.828125,
      "learning_rate": 3.482817615157156e-06,
      "loss": 0.867,
      "step": 802320
    },
    {
      "epoch": 2.8119666207544327,
      "grad_norm": 3.1875,
      "learning_rate": 3.482168586493454e-06,
      "loss": 0.7985,
      "step": 802330
    },
    {
      "epoch": 2.8120016682613285,
      "grad_norm": 2.84375,
      "learning_rate": 3.4815195578297524e-06,
      "loss": 0.8325,
      "step": 802340
    },
    {
      "epoch": 2.812036715768224,
      "grad_norm": 2.90625,
      "learning_rate": 3.4808705291660504e-06,
      "loss": 0.7732,
      "step": 802350
    },
    {
      "epoch": 2.8120717632751195,
      "grad_norm": 3.453125,
      "learning_rate": 3.480221500502348e-06,
      "loss": 0.9193,
      "step": 802360
    },
    {
      "epoch": 2.812106810782015,
      "grad_norm": 2.671875,
      "learning_rate": 3.4795724718386464e-06,
      "loss": 0.7901,
      "step": 802370
    },
    {
      "epoch": 2.8121418582889106,
      "grad_norm": 2.625,
      "learning_rate": 3.4789234431749444e-06,
      "loss": 0.7022,
      "step": 802380
    },
    {
      "epoch": 2.8121769057958064,
      "grad_norm": 3.046875,
      "learning_rate": 3.4782744145112428e-06,
      "loss": 0.8264,
      "step": 802390
    },
    {
      "epoch": 2.8122119533027017,
      "grad_norm": 3.28125,
      "learning_rate": 3.477625385847541e-06,
      "loss": 0.8057,
      "step": 802400
    },
    {
      "epoch": 2.8122470008095974,
      "grad_norm": 2.875,
      "learning_rate": 3.4769763571838388e-06,
      "loss": 0.7504,
      "step": 802410
    },
    {
      "epoch": 2.8122820483164928,
      "grad_norm": 2.890625,
      "learning_rate": 3.4763273285201368e-06,
      "loss": 0.715,
      "step": 802420
    },
    {
      "epoch": 2.8123170958233885,
      "grad_norm": 2.78125,
      "learning_rate": 3.475678299856435e-06,
      "loss": 0.8081,
      "step": 802430
    },
    {
      "epoch": 2.8123521433302843,
      "grad_norm": 3.046875,
      "learning_rate": 3.475029271192733e-06,
      "loss": 0.8329,
      "step": 802440
    },
    {
      "epoch": 2.81238719083718,
      "grad_norm": 2.953125,
      "learning_rate": 3.4743802425290308e-06,
      "loss": 0.8129,
      "step": 802450
    },
    {
      "epoch": 2.8124222383440753,
      "grad_norm": 3.265625,
      "learning_rate": 3.473731213865329e-06,
      "loss": 0.9048,
      "step": 802460
    },
    {
      "epoch": 2.812457285850971,
      "grad_norm": 3.53125,
      "learning_rate": 3.473082185201627e-06,
      "loss": 0.829,
      "step": 802470
    },
    {
      "epoch": 2.8124923333578664,
      "grad_norm": 2.890625,
      "learning_rate": 3.4724331565379256e-06,
      "loss": 0.7832,
      "step": 802480
    },
    {
      "epoch": 2.812527380864762,
      "grad_norm": 2.5625,
      "learning_rate": 3.471784127874224e-06,
      "loss": 0.7678,
      "step": 802490
    },
    {
      "epoch": 2.812562428371658,
      "grad_norm": 3.28125,
      "learning_rate": 3.471135099210521e-06,
      "loss": 0.8223,
      "step": 802500
    },
    {
      "epoch": 2.8125974758785532,
      "grad_norm": 2.96875,
      "learning_rate": 3.4704860705468196e-06,
      "loss": 0.7675,
      "step": 802510
    },
    {
      "epoch": 2.812632523385449,
      "grad_norm": 3.28125,
      "learning_rate": 3.469837041883118e-06,
      "loss": 0.8317,
      "step": 802520
    },
    {
      "epoch": 2.8126675708923443,
      "grad_norm": 2.484375,
      "learning_rate": 3.469188013219416e-06,
      "loss": 0.7364,
      "step": 802530
    },
    {
      "epoch": 2.81270261839924,
      "grad_norm": 3.125,
      "learning_rate": 3.4685389845557145e-06,
      "loss": 0.7653,
      "step": 802540
    },
    {
      "epoch": 2.812737665906136,
      "grad_norm": 2.609375,
      "learning_rate": 3.467889955892012e-06,
      "loss": 0.727,
      "step": 802550
    },
    {
      "epoch": 2.8127727134130316,
      "grad_norm": 2.59375,
      "learning_rate": 3.46724092722831e-06,
      "loss": 0.8029,
      "step": 802560
    },
    {
      "epoch": 2.812807760919927,
      "grad_norm": 3.109375,
      "learning_rate": 3.4665918985646085e-06,
      "loss": 0.8236,
      "step": 802570
    },
    {
      "epoch": 2.8128428084268227,
      "grad_norm": 3.203125,
      "learning_rate": 3.4659428699009065e-06,
      "loss": 0.8465,
      "step": 802580
    },
    {
      "epoch": 2.812877855933718,
      "grad_norm": 2.921875,
      "learning_rate": 3.465293841237205e-06,
      "loss": 0.8289,
      "step": 802590
    },
    {
      "epoch": 2.8129129034406137,
      "grad_norm": 3.015625,
      "learning_rate": 3.4646448125735025e-06,
      "loss": 0.7638,
      "step": 802600
    },
    {
      "epoch": 2.8129479509475095,
      "grad_norm": 2.578125,
      "learning_rate": 3.463995783909801e-06,
      "loss": 0.8298,
      "step": 802610
    },
    {
      "epoch": 2.812982998454405,
      "grad_norm": 3.171875,
      "learning_rate": 3.463346755246099e-06,
      "loss": 0.7725,
      "step": 802620
    },
    {
      "epoch": 2.8130180459613006,
      "grad_norm": 2.90625,
      "learning_rate": 3.4626977265823973e-06,
      "loss": 0.7325,
      "step": 802630
    },
    {
      "epoch": 2.813053093468196,
      "grad_norm": 2.453125,
      "learning_rate": 3.462048697918695e-06,
      "loss": 0.7512,
      "step": 802640
    },
    {
      "epoch": 2.8130881409750916,
      "grad_norm": 2.9375,
      "learning_rate": 3.461399669254993e-06,
      "loss": 0.7437,
      "step": 802650
    },
    {
      "epoch": 2.8131231884819874,
      "grad_norm": 3.34375,
      "learning_rate": 3.4607506405912913e-06,
      "loss": 0.7952,
      "step": 802660
    },
    {
      "epoch": 2.813158235988883,
      "grad_norm": 3.328125,
      "learning_rate": 3.4601016119275893e-06,
      "loss": 0.8645,
      "step": 802670
    },
    {
      "epoch": 2.8131932834957785,
      "grad_norm": 2.6875,
      "learning_rate": 3.4594525832638877e-06,
      "loss": 0.8287,
      "step": 802680
    },
    {
      "epoch": 2.813228331002674,
      "grad_norm": 3.09375,
      "learning_rate": 3.4588035546001853e-06,
      "loss": 0.8364,
      "step": 802690
    },
    {
      "epoch": 2.8132633785095695,
      "grad_norm": 3.25,
      "learning_rate": 3.4581545259364833e-06,
      "loss": 0.8008,
      "step": 802700
    },
    {
      "epoch": 2.8132984260164653,
      "grad_norm": 2.890625,
      "learning_rate": 3.4575054972727817e-06,
      "loss": 0.7121,
      "step": 802710
    },
    {
      "epoch": 2.813333473523361,
      "grad_norm": 3.375,
      "learning_rate": 3.45685646860908e-06,
      "loss": 0.8037,
      "step": 802720
    },
    {
      "epoch": 2.8133685210302564,
      "grad_norm": 2.984375,
      "learning_rate": 3.456207439945378e-06,
      "loss": 0.8072,
      "step": 802730
    },
    {
      "epoch": 2.813403568537152,
      "grad_norm": 3.21875,
      "learning_rate": 3.4555584112816757e-06,
      "loss": 0.7876,
      "step": 802740
    },
    {
      "epoch": 2.8134386160440474,
      "grad_norm": 3.265625,
      "learning_rate": 3.454909382617974e-06,
      "loss": 0.7531,
      "step": 802750
    },
    {
      "epoch": 2.813473663550943,
      "grad_norm": 3.3125,
      "learning_rate": 3.454260353954272e-06,
      "loss": 0.8346,
      "step": 802760
    },
    {
      "epoch": 2.813508711057839,
      "grad_norm": 3.21875,
      "learning_rate": 3.4536113252905706e-06,
      "loss": 0.8466,
      "step": 802770
    },
    {
      "epoch": 2.8135437585647347,
      "grad_norm": 2.78125,
      "learning_rate": 3.452962296626868e-06,
      "loss": 0.8667,
      "step": 802780
    },
    {
      "epoch": 2.81357880607163,
      "grad_norm": 2.640625,
      "learning_rate": 3.452313267963166e-06,
      "loss": 0.7567,
      "step": 802790
    },
    {
      "epoch": 2.813613853578526,
      "grad_norm": 3.15625,
      "learning_rate": 3.4516642392994646e-06,
      "loss": 0.8518,
      "step": 802800
    },
    {
      "epoch": 2.813648901085421,
      "grad_norm": 3.265625,
      "learning_rate": 3.451015210635763e-06,
      "loss": 0.9215,
      "step": 802810
    },
    {
      "epoch": 2.813683948592317,
      "grad_norm": 3.03125,
      "learning_rate": 3.450366181972061e-06,
      "loss": 0.7366,
      "step": 802820
    },
    {
      "epoch": 2.8137189960992126,
      "grad_norm": 3.21875,
      "learning_rate": 3.4497171533083586e-06,
      "loss": 0.7688,
      "step": 802830
    },
    {
      "epoch": 2.813754043606108,
      "grad_norm": 3.15625,
      "learning_rate": 3.449068124644657e-06,
      "loss": 0.9021,
      "step": 802840
    },
    {
      "epoch": 2.8137890911130037,
      "grad_norm": 3.203125,
      "learning_rate": 3.448419095980955e-06,
      "loss": 0.7947,
      "step": 802850
    },
    {
      "epoch": 2.8138241386198994,
      "grad_norm": 3.125,
      "learning_rate": 3.4477700673172534e-06,
      "loss": 0.8019,
      "step": 802860
    },
    {
      "epoch": 2.8138591861267948,
      "grad_norm": 3.109375,
      "learning_rate": 3.4471210386535514e-06,
      "loss": 0.7798,
      "step": 802870
    },
    {
      "epoch": 2.8138942336336905,
      "grad_norm": 2.96875,
      "learning_rate": 3.446472009989849e-06,
      "loss": 0.7508,
      "step": 802880
    },
    {
      "epoch": 2.8139292811405863,
      "grad_norm": 3.3125,
      "learning_rate": 3.4458229813261474e-06,
      "loss": 0.7523,
      "step": 802890
    },
    {
      "epoch": 2.8139643286474816,
      "grad_norm": 2.953125,
      "learning_rate": 3.4451739526624454e-06,
      "loss": 0.8629,
      "step": 802900
    },
    {
      "epoch": 2.8139993761543773,
      "grad_norm": 2.75,
      "learning_rate": 3.444524923998744e-06,
      "loss": 0.8061,
      "step": 802910
    },
    {
      "epoch": 2.8140344236612727,
      "grad_norm": 2.9375,
      "learning_rate": 3.4438758953350414e-06,
      "loss": 0.7627,
      "step": 802920
    },
    {
      "epoch": 2.8140694711681684,
      "grad_norm": 2.734375,
      "learning_rate": 3.44322686667134e-06,
      "loss": 0.8346,
      "step": 802930
    },
    {
      "epoch": 2.814104518675064,
      "grad_norm": 3.0,
      "learning_rate": 3.442577838007638e-06,
      "loss": 0.7456,
      "step": 802940
    },
    {
      "epoch": 2.8141395661819595,
      "grad_norm": 2.0625,
      "learning_rate": 3.4419288093439363e-06,
      "loss": 0.8355,
      "step": 802950
    },
    {
      "epoch": 2.8141746136888552,
      "grad_norm": 3.125,
      "learning_rate": 3.4412797806802343e-06,
      "loss": 0.9062,
      "step": 802960
    },
    {
      "epoch": 2.814209661195751,
      "grad_norm": 2.953125,
      "learning_rate": 3.440630752016532e-06,
      "loss": 0.855,
      "step": 802970
    },
    {
      "epoch": 2.8142447087026463,
      "grad_norm": 3.265625,
      "learning_rate": 3.4399817233528303e-06,
      "loss": 0.7775,
      "step": 802980
    },
    {
      "epoch": 2.814279756209542,
      "grad_norm": 2.6875,
      "learning_rate": 3.4393326946891283e-06,
      "loss": 0.7612,
      "step": 802990
    },
    {
      "epoch": 2.814314803716438,
      "grad_norm": 3.0625,
      "learning_rate": 3.4386836660254267e-06,
      "loss": 0.8005,
      "step": 803000
    },
    {
      "epoch": 2.814349851223333,
      "grad_norm": 3.0,
      "learning_rate": 3.4380346373617247e-06,
      "loss": 0.8288,
      "step": 803010
    },
    {
      "epoch": 2.814384898730229,
      "grad_norm": 2.859375,
      "learning_rate": 3.4373856086980223e-06,
      "loss": 0.7656,
      "step": 803020
    },
    {
      "epoch": 2.814419946237124,
      "grad_norm": 3.046875,
      "learning_rate": 3.4367365800343207e-06,
      "loss": 0.8474,
      "step": 803030
    },
    {
      "epoch": 2.81445499374402,
      "grad_norm": 3.140625,
      "learning_rate": 3.436087551370619e-06,
      "loss": 0.804,
      "step": 803040
    },
    {
      "epoch": 2.8144900412509157,
      "grad_norm": 2.6875,
      "learning_rate": 3.435438522706917e-06,
      "loss": 0.8054,
      "step": 803050
    },
    {
      "epoch": 2.8145250887578115,
      "grad_norm": 2.953125,
      "learning_rate": 3.4347894940432155e-06,
      "loss": 0.8222,
      "step": 803060
    },
    {
      "epoch": 2.814560136264707,
      "grad_norm": 2.921875,
      "learning_rate": 3.434140465379513e-06,
      "loss": 0.8891,
      "step": 803070
    },
    {
      "epoch": 2.8145951837716026,
      "grad_norm": 2.84375,
      "learning_rate": 3.433491436715811e-06,
      "loss": 0.7696,
      "step": 803080
    },
    {
      "epoch": 2.814630231278498,
      "grad_norm": 2.578125,
      "learning_rate": 3.4328424080521095e-06,
      "loss": 0.8054,
      "step": 803090
    },
    {
      "epoch": 2.8146652787853936,
      "grad_norm": 2.96875,
      "learning_rate": 3.4321933793884075e-06,
      "loss": 0.8307,
      "step": 803100
    },
    {
      "epoch": 2.8147003262922894,
      "grad_norm": 2.84375,
      "learning_rate": 3.431544350724705e-06,
      "loss": 0.8326,
      "step": 803110
    },
    {
      "epoch": 2.8147353737991847,
      "grad_norm": 3.515625,
      "learning_rate": 3.4308953220610035e-06,
      "loss": 0.8445,
      "step": 803120
    },
    {
      "epoch": 2.8147704213060805,
      "grad_norm": 2.890625,
      "learning_rate": 3.4302462933973015e-06,
      "loss": 0.8399,
      "step": 803130
    },
    {
      "epoch": 2.8148054688129758,
      "grad_norm": 2.828125,
      "learning_rate": 3.4295972647336e-06,
      "loss": 0.711,
      "step": 803140
    },
    {
      "epoch": 2.8148405163198715,
      "grad_norm": 2.8125,
      "learning_rate": 3.4289482360698984e-06,
      "loss": 0.8305,
      "step": 803150
    },
    {
      "epoch": 2.8148755638267673,
      "grad_norm": 2.984375,
      "learning_rate": 3.428299207406196e-06,
      "loss": 0.7415,
      "step": 803160
    },
    {
      "epoch": 2.814910611333663,
      "grad_norm": 3.046875,
      "learning_rate": 3.427650178742494e-06,
      "loss": 0.8627,
      "step": 803170
    },
    {
      "epoch": 2.8149456588405584,
      "grad_norm": 2.5625,
      "learning_rate": 3.4270011500787924e-06,
      "loss": 0.732,
      "step": 803180
    },
    {
      "epoch": 2.814980706347454,
      "grad_norm": 2.328125,
      "learning_rate": 3.4263521214150904e-06,
      "loss": 0.678,
      "step": 803190
    },
    {
      "epoch": 2.8150157538543494,
      "grad_norm": 2.8125,
      "learning_rate": 3.425703092751389e-06,
      "loss": 0.7713,
      "step": 803200
    },
    {
      "epoch": 2.815050801361245,
      "grad_norm": 3.265625,
      "learning_rate": 3.4250540640876864e-06,
      "loss": 0.8646,
      "step": 803210
    },
    {
      "epoch": 2.815085848868141,
      "grad_norm": 2.6875,
      "learning_rate": 3.4244050354239844e-06,
      "loss": 0.7107,
      "step": 803220
    },
    {
      "epoch": 2.8151208963750363,
      "grad_norm": 2.96875,
      "learning_rate": 3.423756006760283e-06,
      "loss": 0.8106,
      "step": 803230
    },
    {
      "epoch": 2.815155943881932,
      "grad_norm": 2.796875,
      "learning_rate": 3.4231069780965812e-06,
      "loss": 0.7858,
      "step": 803240
    },
    {
      "epoch": 2.8151909913888273,
      "grad_norm": 2.859375,
      "learning_rate": 3.4224579494328784e-06,
      "loss": 0.8336,
      "step": 803250
    },
    {
      "epoch": 2.815226038895723,
      "grad_norm": 2.921875,
      "learning_rate": 3.421808920769177e-06,
      "loss": 0.8289,
      "step": 803260
    },
    {
      "epoch": 2.815261086402619,
      "grad_norm": 2.953125,
      "learning_rate": 3.4211598921054752e-06,
      "loss": 0.8211,
      "step": 803270
    },
    {
      "epoch": 2.8152961339095146,
      "grad_norm": 2.921875,
      "learning_rate": 3.4205108634417732e-06,
      "loss": 0.8263,
      "step": 803280
    },
    {
      "epoch": 2.81533118141641,
      "grad_norm": 2.890625,
      "learning_rate": 3.4198618347780717e-06,
      "loss": 0.7858,
      "step": 803290
    },
    {
      "epoch": 2.8153662289233057,
      "grad_norm": 3.625,
      "learning_rate": 3.4192128061143692e-06,
      "loss": 0.8439,
      "step": 803300
    },
    {
      "epoch": 2.815401276430201,
      "grad_norm": 3.328125,
      "learning_rate": 3.4185637774506672e-06,
      "loss": 0.8515,
      "step": 803310
    },
    {
      "epoch": 2.8154363239370968,
      "grad_norm": 2.3125,
      "learning_rate": 3.4179147487869657e-06,
      "loss": 0.7848,
      "step": 803320
    },
    {
      "epoch": 2.8154713714439925,
      "grad_norm": 2.96875,
      "learning_rate": 3.4172657201232637e-06,
      "loss": 0.7292,
      "step": 803330
    },
    {
      "epoch": 2.815506418950888,
      "grad_norm": 2.671875,
      "learning_rate": 3.416616691459562e-06,
      "loss": 0.7885,
      "step": 803340
    },
    {
      "epoch": 2.8155414664577836,
      "grad_norm": 2.90625,
      "learning_rate": 3.4159676627958597e-06,
      "loss": 0.7809,
      "step": 803350
    },
    {
      "epoch": 2.815576513964679,
      "grad_norm": 3.203125,
      "learning_rate": 3.415318634132158e-06,
      "loss": 0.7943,
      "step": 803360
    },
    {
      "epoch": 2.8156115614715747,
      "grad_norm": 2.65625,
      "learning_rate": 3.414669605468456e-06,
      "loss": 0.7918,
      "step": 803370
    },
    {
      "epoch": 2.8156466089784704,
      "grad_norm": 2.40625,
      "learning_rate": 3.4140205768047545e-06,
      "loss": 0.7916,
      "step": 803380
    },
    {
      "epoch": 2.815681656485366,
      "grad_norm": 2.953125,
      "learning_rate": 3.413371548141052e-06,
      "loss": 0.8246,
      "step": 803390
    },
    {
      "epoch": 2.8157167039922615,
      "grad_norm": 2.65625,
      "learning_rate": 3.41272251947735e-06,
      "loss": 0.7526,
      "step": 803400
    },
    {
      "epoch": 2.8157517514991572,
      "grad_norm": 3.171875,
      "learning_rate": 3.4120734908136485e-06,
      "loss": 0.8075,
      "step": 803410
    },
    {
      "epoch": 2.8157867990060526,
      "grad_norm": 3.0,
      "learning_rate": 3.4114244621499465e-06,
      "loss": 0.7484,
      "step": 803420
    },
    {
      "epoch": 2.8158218465129483,
      "grad_norm": 2.859375,
      "learning_rate": 3.410775433486245e-06,
      "loss": 0.8366,
      "step": 803430
    },
    {
      "epoch": 2.815856894019844,
      "grad_norm": 2.84375,
      "learning_rate": 3.4101264048225425e-06,
      "loss": 0.7749,
      "step": 803440
    },
    {
      "epoch": 2.8158919415267394,
      "grad_norm": 2.703125,
      "learning_rate": 3.4094773761588405e-06,
      "loss": 0.7931,
      "step": 803450
    },
    {
      "epoch": 2.815926989033635,
      "grad_norm": 2.8125,
      "learning_rate": 3.408828347495139e-06,
      "loss": 0.8306,
      "step": 803460
    },
    {
      "epoch": 2.8159620365405305,
      "grad_norm": 2.84375,
      "learning_rate": 3.4081793188314374e-06,
      "loss": 0.7593,
      "step": 803470
    },
    {
      "epoch": 2.815997084047426,
      "grad_norm": 2.859375,
      "learning_rate": 3.4075302901677354e-06,
      "loss": 0.8151,
      "step": 803480
    },
    {
      "epoch": 2.816032131554322,
      "grad_norm": 3.109375,
      "learning_rate": 3.406881261504033e-06,
      "loss": 0.8147,
      "step": 803490
    },
    {
      "epoch": 2.8160671790612177,
      "grad_norm": 2.546875,
      "learning_rate": 3.4062322328403314e-06,
      "loss": 0.7294,
      "step": 803500
    },
    {
      "epoch": 2.816102226568113,
      "grad_norm": 2.671875,
      "learning_rate": 3.4055832041766294e-06,
      "loss": 0.7563,
      "step": 803510
    },
    {
      "epoch": 2.816137274075009,
      "grad_norm": 3.4375,
      "learning_rate": 3.4049341755129278e-06,
      "loss": 0.7613,
      "step": 803520
    },
    {
      "epoch": 2.816172321581904,
      "grad_norm": 2.6875,
      "learning_rate": 3.4042851468492258e-06,
      "loss": 0.8359,
      "step": 803530
    },
    {
      "epoch": 2.8162073690888,
      "grad_norm": 3.078125,
      "learning_rate": 3.4036361181855234e-06,
      "loss": 0.8051,
      "step": 803540
    },
    {
      "epoch": 2.8162424165956956,
      "grad_norm": 3.171875,
      "learning_rate": 3.4029870895218218e-06,
      "loss": 0.8178,
      "step": 803550
    },
    {
      "epoch": 2.816277464102591,
      "grad_norm": 2.84375,
      "learning_rate": 3.4023380608581198e-06,
      "loss": 0.7418,
      "step": 803560
    },
    {
      "epoch": 2.8163125116094867,
      "grad_norm": 2.796875,
      "learning_rate": 3.401689032194418e-06,
      "loss": 0.8455,
      "step": 803570
    },
    {
      "epoch": 2.816347559116382,
      "grad_norm": 2.875,
      "learning_rate": 3.4010400035307158e-06,
      "loss": 0.8346,
      "step": 803580
    },
    {
      "epoch": 2.8163826066232778,
      "grad_norm": 3.34375,
      "learning_rate": 3.400390974867014e-06,
      "loss": 0.8843,
      "step": 803590
    },
    {
      "epoch": 2.8164176541301735,
      "grad_norm": 2.765625,
      "learning_rate": 3.399741946203312e-06,
      "loss": 0.7036,
      "step": 803600
    },
    {
      "epoch": 2.8164527016370693,
      "grad_norm": 3.265625,
      "learning_rate": 3.3990929175396106e-06,
      "loss": 0.7352,
      "step": 803610
    },
    {
      "epoch": 2.8164877491439646,
      "grad_norm": 3.125,
      "learning_rate": 3.3984438888759086e-06,
      "loss": 0.746,
      "step": 803620
    },
    {
      "epoch": 2.8165227966508604,
      "grad_norm": 3.1875,
      "learning_rate": 3.397794860212206e-06,
      "loss": 0.8578,
      "step": 803630
    },
    {
      "epoch": 2.8165578441577557,
      "grad_norm": 2.53125,
      "learning_rate": 3.3971458315485046e-06,
      "loss": 0.8111,
      "step": 803640
    },
    {
      "epoch": 2.8165928916646514,
      "grad_norm": 2.5625,
      "learning_rate": 3.3964968028848026e-06,
      "loss": 0.8066,
      "step": 803650
    },
    {
      "epoch": 2.816627939171547,
      "grad_norm": 2.671875,
      "learning_rate": 3.395847774221101e-06,
      "loss": 0.802,
      "step": 803660
    },
    {
      "epoch": 2.8166629866784425,
      "grad_norm": 2.578125,
      "learning_rate": 3.3951987455573995e-06,
      "loss": 0.7128,
      "step": 803670
    },
    {
      "epoch": 2.8166980341853383,
      "grad_norm": 2.640625,
      "learning_rate": 3.3945497168936966e-06,
      "loss": 0.7816,
      "step": 803680
    },
    {
      "epoch": 2.8167330816922336,
      "grad_norm": 2.9375,
      "learning_rate": 3.393900688229995e-06,
      "loss": 0.8643,
      "step": 803690
    },
    {
      "epoch": 2.8167681291991293,
      "grad_norm": 2.9375,
      "learning_rate": 3.3932516595662935e-06,
      "loss": 0.7652,
      "step": 803700
    },
    {
      "epoch": 2.816803176706025,
      "grad_norm": 3.09375,
      "learning_rate": 3.3926026309025915e-06,
      "loss": 0.8523,
      "step": 803710
    },
    {
      "epoch": 2.816838224212921,
      "grad_norm": 2.65625,
      "learning_rate": 3.391953602238889e-06,
      "loss": 0.8509,
      "step": 803720
    },
    {
      "epoch": 2.816873271719816,
      "grad_norm": 3.046875,
      "learning_rate": 3.3913045735751875e-06,
      "loss": 0.8188,
      "step": 803730
    },
    {
      "epoch": 2.816908319226712,
      "grad_norm": 2.9375,
      "learning_rate": 3.3906555449114855e-06,
      "loss": 0.8162,
      "step": 803740
    },
    {
      "epoch": 2.8169433667336072,
      "grad_norm": 2.71875,
      "learning_rate": 3.390006516247784e-06,
      "loss": 0.7804,
      "step": 803750
    },
    {
      "epoch": 2.816978414240503,
      "grad_norm": 3.171875,
      "learning_rate": 3.389357487584082e-06,
      "loss": 0.7444,
      "step": 803760
    },
    {
      "epoch": 2.8170134617473988,
      "grad_norm": 3.078125,
      "learning_rate": 3.3887084589203795e-06,
      "loss": 0.8105,
      "step": 803770
    },
    {
      "epoch": 2.817048509254294,
      "grad_norm": 2.65625,
      "learning_rate": 3.388059430256678e-06,
      "loss": 0.9156,
      "step": 803780
    },
    {
      "epoch": 2.81708355676119,
      "grad_norm": 3.359375,
      "learning_rate": 3.3874104015929763e-06,
      "loss": 0.8287,
      "step": 803790
    },
    {
      "epoch": 2.817118604268085,
      "grad_norm": 2.78125,
      "learning_rate": 3.3867613729292743e-06,
      "loss": 0.8119,
      "step": 803800
    },
    {
      "epoch": 2.817153651774981,
      "grad_norm": 2.765625,
      "learning_rate": 3.3861123442655727e-06,
      "loss": 0.7738,
      "step": 803810
    },
    {
      "epoch": 2.8171886992818767,
      "grad_norm": 2.8125,
      "learning_rate": 3.3854633156018703e-06,
      "loss": 0.7209,
      "step": 803820
    },
    {
      "epoch": 2.8172237467887724,
      "grad_norm": 2.796875,
      "learning_rate": 3.3848142869381683e-06,
      "loss": 0.7733,
      "step": 803830
    },
    {
      "epoch": 2.8172587942956677,
      "grad_norm": 2.796875,
      "learning_rate": 3.3841652582744667e-06,
      "loss": 0.7511,
      "step": 803840
    },
    {
      "epoch": 2.8172938418025635,
      "grad_norm": 2.8125,
      "learning_rate": 3.3835162296107647e-06,
      "loss": 0.7636,
      "step": 803850
    },
    {
      "epoch": 2.817328889309459,
      "grad_norm": 2.90625,
      "learning_rate": 3.3828672009470623e-06,
      "loss": 0.793,
      "step": 803860
    },
    {
      "epoch": 2.8173639368163546,
      "grad_norm": 2.453125,
      "learning_rate": 3.3822181722833607e-06,
      "loss": 0.7821,
      "step": 803870
    },
    {
      "epoch": 2.8173989843232503,
      "grad_norm": 2.984375,
      "learning_rate": 3.3815691436196587e-06,
      "loss": 0.7276,
      "step": 803880
    },
    {
      "epoch": 2.8174340318301456,
      "grad_norm": 2.546875,
      "learning_rate": 3.380920114955957e-06,
      "loss": 0.7375,
      "step": 803890
    },
    {
      "epoch": 2.8174690793370414,
      "grad_norm": 2.40625,
      "learning_rate": 3.3802710862922556e-06,
      "loss": 0.7788,
      "step": 803900
    },
    {
      "epoch": 2.8175041268439367,
      "grad_norm": 2.609375,
      "learning_rate": 3.379622057628553e-06,
      "loss": 0.8458,
      "step": 803910
    },
    {
      "epoch": 2.8175391743508325,
      "grad_norm": 2.859375,
      "learning_rate": 3.378973028964851e-06,
      "loss": 0.7991,
      "step": 803920
    },
    {
      "epoch": 2.817574221857728,
      "grad_norm": 2.6875,
      "learning_rate": 3.3783240003011496e-06,
      "loss": 0.7974,
      "step": 803930
    },
    {
      "epoch": 2.817609269364624,
      "grad_norm": 2.78125,
      "learning_rate": 3.3776749716374476e-06,
      "loss": 0.819,
      "step": 803940
    },
    {
      "epoch": 2.8176443168715193,
      "grad_norm": 3.0,
      "learning_rate": 3.377025942973746e-06,
      "loss": 0.803,
      "step": 803950
    },
    {
      "epoch": 2.817679364378415,
      "grad_norm": 2.859375,
      "learning_rate": 3.3763769143100436e-06,
      "loss": 0.8496,
      "step": 803960
    },
    {
      "epoch": 2.8177144118853104,
      "grad_norm": 2.921875,
      "learning_rate": 3.3757278856463416e-06,
      "loss": 0.774,
      "step": 803970
    },
    {
      "epoch": 2.817749459392206,
      "grad_norm": 2.78125,
      "learning_rate": 3.37507885698264e-06,
      "loss": 0.6985,
      "step": 803980
    },
    {
      "epoch": 2.817784506899102,
      "grad_norm": 2.796875,
      "learning_rate": 3.3744298283189384e-06,
      "loss": 0.867,
      "step": 803990
    },
    {
      "epoch": 2.817819554405997,
      "grad_norm": 2.890625,
      "learning_rate": 3.3737807996552364e-06,
      "loss": 0.8383,
      "step": 804000
    },
    {
      "epoch": 2.817854601912893,
      "grad_norm": 2.9375,
      "learning_rate": 3.373131770991534e-06,
      "loss": 0.8227,
      "step": 804010
    },
    {
      "epoch": 2.8178896494197883,
      "grad_norm": 3.171875,
      "learning_rate": 3.3724827423278324e-06,
      "loss": 0.8472,
      "step": 804020
    },
    {
      "epoch": 2.817924696926684,
      "grad_norm": 3.40625,
      "learning_rate": 3.3718337136641304e-06,
      "loss": 0.8253,
      "step": 804030
    },
    {
      "epoch": 2.8179597444335798,
      "grad_norm": 2.515625,
      "learning_rate": 3.371184685000429e-06,
      "loss": 0.7241,
      "step": 804040
    },
    {
      "epoch": 2.8179947919404755,
      "grad_norm": 2.984375,
      "learning_rate": 3.3705356563367264e-06,
      "loss": 0.8109,
      "step": 804050
    },
    {
      "epoch": 2.818029839447371,
      "grad_norm": 2.828125,
      "learning_rate": 3.3698866276730244e-06,
      "loss": 0.814,
      "step": 804060
    },
    {
      "epoch": 2.8180648869542666,
      "grad_norm": 2.15625,
      "learning_rate": 3.369237599009323e-06,
      "loss": 0.8664,
      "step": 804070
    },
    {
      "epoch": 2.818099934461162,
      "grad_norm": 2.703125,
      "learning_rate": 3.368588570345621e-06,
      "loss": 0.7379,
      "step": 804080
    },
    {
      "epoch": 2.8181349819680577,
      "grad_norm": 3.109375,
      "learning_rate": 3.3679395416819193e-06,
      "loss": 0.7381,
      "step": 804090
    },
    {
      "epoch": 2.8181700294749534,
      "grad_norm": 3.125,
      "learning_rate": 3.367290513018217e-06,
      "loss": 0.7443,
      "step": 804100
    },
    {
      "epoch": 2.8182050769818487,
      "grad_norm": 2.8125,
      "learning_rate": 3.3666414843545153e-06,
      "loss": 0.8145,
      "step": 804110
    },
    {
      "epoch": 2.8182401244887445,
      "grad_norm": 2.796875,
      "learning_rate": 3.3659924556908133e-06,
      "loss": 0.8043,
      "step": 804120
    },
    {
      "epoch": 2.81827517199564,
      "grad_norm": 3.15625,
      "learning_rate": 3.3653434270271117e-06,
      "loss": 0.8234,
      "step": 804130
    },
    {
      "epoch": 2.8183102195025356,
      "grad_norm": 2.640625,
      "learning_rate": 3.3646943983634097e-06,
      "loss": 0.8097,
      "step": 804140
    },
    {
      "epoch": 2.8183452670094313,
      "grad_norm": 2.953125,
      "learning_rate": 3.3640453696997073e-06,
      "loss": 0.725,
      "step": 804150
    },
    {
      "epoch": 2.818380314516327,
      "grad_norm": 2.65625,
      "learning_rate": 3.3633963410360057e-06,
      "loss": 0.8376,
      "step": 804160
    },
    {
      "epoch": 2.8184153620232224,
      "grad_norm": 2.828125,
      "learning_rate": 3.3627473123723037e-06,
      "loss": 0.8038,
      "step": 804170
    },
    {
      "epoch": 2.818450409530118,
      "grad_norm": 3.078125,
      "learning_rate": 3.362098283708602e-06,
      "loss": 0.8058,
      "step": 804180
    },
    {
      "epoch": 2.8184854570370135,
      "grad_norm": 2.765625,
      "learning_rate": 3.3614492550448997e-06,
      "loss": 0.754,
      "step": 804190
    },
    {
      "epoch": 2.8185205045439092,
      "grad_norm": 2.9375,
      "learning_rate": 3.3608002263811977e-06,
      "loss": 0.8504,
      "step": 804200
    },
    {
      "epoch": 2.818555552050805,
      "grad_norm": 3.109375,
      "learning_rate": 3.360151197717496e-06,
      "loss": 0.7419,
      "step": 804210
    },
    {
      "epoch": 2.8185905995577003,
      "grad_norm": 2.734375,
      "learning_rate": 3.3595021690537945e-06,
      "loss": 0.8244,
      "step": 804220
    },
    {
      "epoch": 2.818625647064596,
      "grad_norm": 2.953125,
      "learning_rate": 3.3588531403900925e-06,
      "loss": 0.8327,
      "step": 804230
    },
    {
      "epoch": 2.818660694571492,
      "grad_norm": 2.5625,
      "learning_rate": 3.35820411172639e-06,
      "loss": 0.8437,
      "step": 804240
    },
    {
      "epoch": 2.818695742078387,
      "grad_norm": 2.890625,
      "learning_rate": 3.3575550830626885e-06,
      "loss": 0.8565,
      "step": 804250
    },
    {
      "epoch": 2.818730789585283,
      "grad_norm": 3.46875,
      "learning_rate": 3.3569060543989865e-06,
      "loss": 0.7868,
      "step": 804260
    },
    {
      "epoch": 2.8187658370921786,
      "grad_norm": 2.359375,
      "learning_rate": 3.356257025735285e-06,
      "loss": 0.7733,
      "step": 804270
    },
    {
      "epoch": 2.818800884599074,
      "grad_norm": 3.0625,
      "learning_rate": 3.355607997071583e-06,
      "loss": 0.8667,
      "step": 804280
    },
    {
      "epoch": 2.8188359321059697,
      "grad_norm": 2.75,
      "learning_rate": 3.3549589684078805e-06,
      "loss": 0.8853,
      "step": 804290
    },
    {
      "epoch": 2.818870979612865,
      "grad_norm": 2.875,
      "learning_rate": 3.354309939744179e-06,
      "loss": 0.829,
      "step": 804300
    },
    {
      "epoch": 2.818906027119761,
      "grad_norm": 2.96875,
      "learning_rate": 3.353660911080477e-06,
      "loss": 0.7337,
      "step": 804310
    },
    {
      "epoch": 2.8189410746266566,
      "grad_norm": 2.734375,
      "learning_rate": 3.3530118824167754e-06,
      "loss": 0.8145,
      "step": 804320
    },
    {
      "epoch": 2.818976122133552,
      "grad_norm": 2.578125,
      "learning_rate": 3.352362853753074e-06,
      "loss": 0.7451,
      "step": 804330
    },
    {
      "epoch": 2.8190111696404476,
      "grad_norm": 2.921875,
      "learning_rate": 3.3517138250893714e-06,
      "loss": 0.7598,
      "step": 804340
    },
    {
      "epoch": 2.8190462171473434,
      "grad_norm": 3.34375,
      "learning_rate": 3.3510647964256694e-06,
      "loss": 0.8152,
      "step": 804350
    },
    {
      "epoch": 2.8190812646542387,
      "grad_norm": 2.734375,
      "learning_rate": 3.350415767761968e-06,
      "loss": 0.7645,
      "step": 804360
    },
    {
      "epoch": 2.8191163121611345,
      "grad_norm": 2.21875,
      "learning_rate": 3.349766739098266e-06,
      "loss": 0.7373,
      "step": 804370
    },
    {
      "epoch": 2.81915135966803,
      "grad_norm": 2.53125,
      "learning_rate": 3.3491177104345634e-06,
      "loss": 0.7146,
      "step": 804380
    },
    {
      "epoch": 2.8191864071749255,
      "grad_norm": 3.0625,
      "learning_rate": 3.348468681770862e-06,
      "loss": 0.9088,
      "step": 804390
    },
    {
      "epoch": 2.8192214546818213,
      "grad_norm": 2.6875,
      "learning_rate": 3.34781965310716e-06,
      "loss": 0.816,
      "step": 804400
    },
    {
      "epoch": 2.8192565021887166,
      "grad_norm": 2.625,
      "learning_rate": 3.3471706244434582e-06,
      "loss": 0.8246,
      "step": 804410
    },
    {
      "epoch": 2.8192915496956124,
      "grad_norm": 2.953125,
      "learning_rate": 3.3465215957797567e-06,
      "loss": 0.8366,
      "step": 804420
    },
    {
      "epoch": 2.819326597202508,
      "grad_norm": 2.875,
      "learning_rate": 3.345872567116054e-06,
      "loss": 0.7831,
      "step": 804430
    },
    {
      "epoch": 2.819361644709404,
      "grad_norm": 2.75,
      "learning_rate": 3.3452235384523522e-06,
      "loss": 0.7722,
      "step": 804440
    },
    {
      "epoch": 2.819396692216299,
      "grad_norm": 3.015625,
      "learning_rate": 3.3445745097886507e-06,
      "loss": 0.7394,
      "step": 804450
    },
    {
      "epoch": 2.819431739723195,
      "grad_norm": 2.90625,
      "learning_rate": 3.3439254811249487e-06,
      "loss": 0.8527,
      "step": 804460
    },
    {
      "epoch": 2.8194667872300903,
      "grad_norm": 2.90625,
      "learning_rate": 3.343276452461247e-06,
      "loss": 0.7977,
      "step": 804470
    },
    {
      "epoch": 2.819501834736986,
      "grad_norm": 3.0,
      "learning_rate": 3.3426274237975447e-06,
      "loss": 0.7825,
      "step": 804480
    },
    {
      "epoch": 2.8195368822438818,
      "grad_norm": 2.703125,
      "learning_rate": 3.3419783951338427e-06,
      "loss": 0.7562,
      "step": 804490
    },
    {
      "epoch": 2.819571929750777,
      "grad_norm": 3.265625,
      "learning_rate": 3.341329366470141e-06,
      "loss": 0.8619,
      "step": 804500
    },
    {
      "epoch": 2.819606977257673,
      "grad_norm": 3.1875,
      "learning_rate": 3.340680337806439e-06,
      "loss": 0.8068,
      "step": 804510
    },
    {
      "epoch": 2.819642024764568,
      "grad_norm": 3.25,
      "learning_rate": 3.3400313091427367e-06,
      "loss": 0.8575,
      "step": 804520
    },
    {
      "epoch": 2.819677072271464,
      "grad_norm": 2.859375,
      "learning_rate": 3.339382280479035e-06,
      "loss": 0.772,
      "step": 804530
    },
    {
      "epoch": 2.8197121197783597,
      "grad_norm": 2.890625,
      "learning_rate": 3.3387332518153335e-06,
      "loss": 0.9071,
      "step": 804540
    },
    {
      "epoch": 2.8197471672852554,
      "grad_norm": 2.96875,
      "learning_rate": 3.3380842231516315e-06,
      "loss": 0.7712,
      "step": 804550
    },
    {
      "epoch": 2.8197822147921507,
      "grad_norm": 2.546875,
      "learning_rate": 3.33743519448793e-06,
      "loss": 0.8458,
      "step": 804560
    },
    {
      "epoch": 2.8198172622990465,
      "grad_norm": 3.203125,
      "learning_rate": 3.3367861658242275e-06,
      "loss": 0.9024,
      "step": 804570
    },
    {
      "epoch": 2.819852309805942,
      "grad_norm": 3.078125,
      "learning_rate": 3.3361371371605255e-06,
      "loss": 0.7767,
      "step": 804580
    },
    {
      "epoch": 2.8198873573128376,
      "grad_norm": 2.71875,
      "learning_rate": 3.335488108496824e-06,
      "loss": 0.7391,
      "step": 804590
    },
    {
      "epoch": 2.8199224048197333,
      "grad_norm": 3.328125,
      "learning_rate": 3.334839079833122e-06,
      "loss": 0.8708,
      "step": 804600
    },
    {
      "epoch": 2.8199574523266286,
      "grad_norm": 3.09375,
      "learning_rate": 3.3341900511694204e-06,
      "loss": 0.8354,
      "step": 804610
    },
    {
      "epoch": 2.8199924998335244,
      "grad_norm": 3.25,
      "learning_rate": 3.333541022505718e-06,
      "loss": 0.789,
      "step": 804620
    },
    {
      "epoch": 2.8200275473404197,
      "grad_norm": 3.65625,
      "learning_rate": 3.332891993842016e-06,
      "loss": 0.8029,
      "step": 804630
    },
    {
      "epoch": 2.8200625948473155,
      "grad_norm": 2.765625,
      "learning_rate": 3.3322429651783144e-06,
      "loss": 0.8093,
      "step": 804640
    },
    {
      "epoch": 2.8200976423542112,
      "grad_norm": 3.109375,
      "learning_rate": 3.3315939365146128e-06,
      "loss": 0.7953,
      "step": 804650
    },
    {
      "epoch": 2.820132689861107,
      "grad_norm": 3.0,
      "learning_rate": 3.3309449078509104e-06,
      "loss": 0.7699,
      "step": 804660
    },
    {
      "epoch": 2.8201677373680023,
      "grad_norm": 2.59375,
      "learning_rate": 3.3302958791872084e-06,
      "loss": 0.8651,
      "step": 804670
    },
    {
      "epoch": 2.820202784874898,
      "grad_norm": 2.96875,
      "learning_rate": 3.3296468505235068e-06,
      "loss": 0.8136,
      "step": 804680
    },
    {
      "epoch": 2.8202378323817934,
      "grad_norm": 3.25,
      "learning_rate": 3.3289978218598048e-06,
      "loss": 0.8935,
      "step": 804690
    },
    {
      "epoch": 2.820272879888689,
      "grad_norm": 2.703125,
      "learning_rate": 3.328348793196103e-06,
      "loss": 0.8086,
      "step": 804700
    },
    {
      "epoch": 2.820307927395585,
      "grad_norm": 2.953125,
      "learning_rate": 3.3276997645324008e-06,
      "loss": 0.8975,
      "step": 804710
    },
    {
      "epoch": 2.82034297490248,
      "grad_norm": 2.671875,
      "learning_rate": 3.3270507358686988e-06,
      "loss": 0.7471,
      "step": 804720
    },
    {
      "epoch": 2.820378022409376,
      "grad_norm": 2.734375,
      "learning_rate": 3.326401707204997e-06,
      "loss": 0.7845,
      "step": 804730
    },
    {
      "epoch": 2.8204130699162713,
      "grad_norm": 2.765625,
      "learning_rate": 3.325752678541295e-06,
      "loss": 0.7707,
      "step": 804740
    },
    {
      "epoch": 2.820448117423167,
      "grad_norm": 2.78125,
      "learning_rate": 3.3251036498775936e-06,
      "loss": 0.8554,
      "step": 804750
    },
    {
      "epoch": 2.820483164930063,
      "grad_norm": 3.0,
      "learning_rate": 3.324454621213891e-06,
      "loss": 0.7718,
      "step": 804760
    },
    {
      "epoch": 2.8205182124369585,
      "grad_norm": 2.671875,
      "learning_rate": 3.3238055925501896e-06,
      "loss": 0.8446,
      "step": 804770
    },
    {
      "epoch": 2.820553259943854,
      "grad_norm": 3.1875,
      "learning_rate": 3.3231565638864876e-06,
      "loss": 0.7597,
      "step": 804780
    },
    {
      "epoch": 2.8205883074507496,
      "grad_norm": 2.828125,
      "learning_rate": 3.322507535222786e-06,
      "loss": 0.8808,
      "step": 804790
    },
    {
      "epoch": 2.820623354957645,
      "grad_norm": 3.0,
      "learning_rate": 3.321858506559084e-06,
      "loss": 0.8514,
      "step": 804800
    },
    {
      "epoch": 2.8206584024645407,
      "grad_norm": 2.671875,
      "learning_rate": 3.3212094778953816e-06,
      "loss": 0.7805,
      "step": 804810
    },
    {
      "epoch": 2.8206934499714365,
      "grad_norm": 3.03125,
      "learning_rate": 3.32056044923168e-06,
      "loss": 0.86,
      "step": 804820
    },
    {
      "epoch": 2.8207284974783318,
      "grad_norm": 3.296875,
      "learning_rate": 3.319911420567978e-06,
      "loss": 0.8012,
      "step": 804830
    },
    {
      "epoch": 2.8207635449852275,
      "grad_norm": 2.84375,
      "learning_rate": 3.3192623919042765e-06,
      "loss": 0.8404,
      "step": 804840
    },
    {
      "epoch": 2.820798592492123,
      "grad_norm": 2.796875,
      "learning_rate": 3.318613363240574e-06,
      "loss": 0.8199,
      "step": 804850
    },
    {
      "epoch": 2.8208336399990186,
      "grad_norm": 2.84375,
      "learning_rate": 3.317964334576872e-06,
      "loss": 0.7423,
      "step": 804860
    },
    {
      "epoch": 2.8208686875059144,
      "grad_norm": 2.71875,
      "learning_rate": 3.3173153059131705e-06,
      "loss": 0.8324,
      "step": 804870
    },
    {
      "epoch": 2.82090373501281,
      "grad_norm": 2.828125,
      "learning_rate": 3.316666277249469e-06,
      "loss": 0.8006,
      "step": 804880
    },
    {
      "epoch": 2.8209387825197054,
      "grad_norm": 3.203125,
      "learning_rate": 3.316017248585767e-06,
      "loss": 0.7817,
      "step": 804890
    },
    {
      "epoch": 2.820973830026601,
      "grad_norm": 3.234375,
      "learning_rate": 3.3153682199220645e-06,
      "loss": 0.7369,
      "step": 804900
    },
    {
      "epoch": 2.8210088775334965,
      "grad_norm": 3.125,
      "learning_rate": 3.314719191258363e-06,
      "loss": 0.8069,
      "step": 804910
    },
    {
      "epoch": 2.8210439250403923,
      "grad_norm": 2.96875,
      "learning_rate": 3.314070162594661e-06,
      "loss": 0.7906,
      "step": 804920
    },
    {
      "epoch": 2.821078972547288,
      "grad_norm": 2.734375,
      "learning_rate": 3.3134211339309593e-06,
      "loss": 0.759,
      "step": 804930
    },
    {
      "epoch": 2.8211140200541833,
      "grad_norm": 2.6875,
      "learning_rate": 3.3127721052672573e-06,
      "loss": 0.7944,
      "step": 804940
    },
    {
      "epoch": 2.821149067561079,
      "grad_norm": 2.90625,
      "learning_rate": 3.312123076603555e-06,
      "loss": 0.7937,
      "step": 804950
    },
    {
      "epoch": 2.8211841150679744,
      "grad_norm": 3.03125,
      "learning_rate": 3.3114740479398533e-06,
      "loss": 0.8569,
      "step": 804960
    },
    {
      "epoch": 2.82121916257487,
      "grad_norm": 2.65625,
      "learning_rate": 3.3108250192761517e-06,
      "loss": 0.7495,
      "step": 804970
    },
    {
      "epoch": 2.821254210081766,
      "grad_norm": 3.328125,
      "learning_rate": 3.3101759906124497e-06,
      "loss": 0.8068,
      "step": 804980
    },
    {
      "epoch": 2.8212892575886617,
      "grad_norm": 3.34375,
      "learning_rate": 3.3095269619487473e-06,
      "loss": 0.8487,
      "step": 804990
    },
    {
      "epoch": 2.821324305095557,
      "grad_norm": 2.765625,
      "learning_rate": 3.3088779332850457e-06,
      "loss": 0.8257,
      "step": 805000
    },
    {
      "epoch": 2.821324305095557,
      "eval_loss": 0.7513429522514343,
      "eval_runtime": 560.6213,
      "eval_samples_per_second": 678.597,
      "eval_steps_per_second": 56.55,
      "step": 805000
    },
    {
      "epoch": 2.8213593526024527,
      "grad_norm": 3.21875,
      "learning_rate": 3.3082289046213437e-06,
      "loss": 0.7744,
      "step": 805010
    },
    {
      "epoch": 2.821394400109348,
      "grad_norm": 3.046875,
      "learning_rate": 3.307579875957642e-06,
      "loss": 0.8117,
      "step": 805020
    },
    {
      "epoch": 2.821429447616244,
      "grad_norm": 2.96875,
      "learning_rate": 3.30693084729394e-06,
      "loss": 0.7958,
      "step": 805030
    },
    {
      "epoch": 2.8214644951231396,
      "grad_norm": 2.9375,
      "learning_rate": 3.3062818186302377e-06,
      "loss": 0.8294,
      "step": 805040
    },
    {
      "epoch": 2.821499542630035,
      "grad_norm": 3.109375,
      "learning_rate": 3.305632789966536e-06,
      "loss": 0.825,
      "step": 805050
    },
    {
      "epoch": 2.8215345901369306,
      "grad_norm": 3.296875,
      "learning_rate": 3.304983761302834e-06,
      "loss": 0.7587,
      "step": 805060
    },
    {
      "epoch": 2.821569637643826,
      "grad_norm": 2.546875,
      "learning_rate": 3.3043347326391326e-06,
      "loss": 0.8251,
      "step": 805070
    },
    {
      "epoch": 2.8216046851507217,
      "grad_norm": 3.515625,
      "learning_rate": 3.303685703975431e-06,
      "loss": 0.8205,
      "step": 805080
    },
    {
      "epoch": 2.8216397326576175,
      "grad_norm": 3.03125,
      "learning_rate": 3.3030366753117286e-06,
      "loss": 0.7752,
      "step": 805090
    },
    {
      "epoch": 2.8216747801645132,
      "grad_norm": 2.90625,
      "learning_rate": 3.3023876466480266e-06,
      "loss": 0.8654,
      "step": 805100
    },
    {
      "epoch": 2.8217098276714085,
      "grad_norm": 3.15625,
      "learning_rate": 3.301738617984325e-06,
      "loss": 0.7889,
      "step": 805110
    },
    {
      "epoch": 2.8217448751783043,
      "grad_norm": 2.625,
      "learning_rate": 3.301089589320623e-06,
      "loss": 0.8205,
      "step": 805120
    },
    {
      "epoch": 2.8217799226851996,
      "grad_norm": 3.15625,
      "learning_rate": 3.3004405606569206e-06,
      "loss": 0.8081,
      "step": 805130
    },
    {
      "epoch": 2.8218149701920954,
      "grad_norm": 3.09375,
      "learning_rate": 3.299791531993219e-06,
      "loss": 0.7784,
      "step": 805140
    },
    {
      "epoch": 2.821850017698991,
      "grad_norm": 3.0625,
      "learning_rate": 3.299142503329517e-06,
      "loss": 0.7822,
      "step": 805150
    },
    {
      "epoch": 2.8218850652058864,
      "grad_norm": 2.96875,
      "learning_rate": 3.2984934746658154e-06,
      "loss": 0.7813,
      "step": 805160
    },
    {
      "epoch": 2.821920112712782,
      "grad_norm": 2.515625,
      "learning_rate": 3.297844446002114e-06,
      "loss": 0.7771,
      "step": 805170
    },
    {
      "epoch": 2.8219551602196775,
      "grad_norm": 3.109375,
      "learning_rate": 3.297195417338411e-06,
      "loss": 0.791,
      "step": 805180
    },
    {
      "epoch": 2.8219902077265733,
      "grad_norm": 2.96875,
      "learning_rate": 3.2965463886747094e-06,
      "loss": 0.8398,
      "step": 805190
    },
    {
      "epoch": 2.822025255233469,
      "grad_norm": 3.03125,
      "learning_rate": 3.295897360011008e-06,
      "loss": 0.8153,
      "step": 805200
    },
    {
      "epoch": 2.822060302740365,
      "grad_norm": 3.34375,
      "learning_rate": 3.295248331347306e-06,
      "loss": 0.828,
      "step": 805210
    },
    {
      "epoch": 2.82209535024726,
      "grad_norm": 2.859375,
      "learning_rate": 3.2945993026836043e-06,
      "loss": 0.7629,
      "step": 805220
    },
    {
      "epoch": 2.822130397754156,
      "grad_norm": 2.75,
      "learning_rate": 3.293950274019902e-06,
      "loss": 0.871,
      "step": 805230
    },
    {
      "epoch": 2.822165445261051,
      "grad_norm": 3.328125,
      "learning_rate": 3.2933012453562e-06,
      "loss": 0.7821,
      "step": 805240
    },
    {
      "epoch": 2.822200492767947,
      "grad_norm": 2.515625,
      "learning_rate": 3.2926522166924983e-06,
      "loss": 0.7401,
      "step": 805250
    },
    {
      "epoch": 2.8222355402748427,
      "grad_norm": 2.890625,
      "learning_rate": 3.2920031880287963e-06,
      "loss": 0.8531,
      "step": 805260
    },
    {
      "epoch": 2.822270587781738,
      "grad_norm": 3.0625,
      "learning_rate": 3.2913541593650947e-06,
      "loss": 0.8303,
      "step": 805270
    },
    {
      "epoch": 2.8223056352886338,
      "grad_norm": 2.53125,
      "learning_rate": 3.2907051307013923e-06,
      "loss": 0.8335,
      "step": 805280
    },
    {
      "epoch": 2.822340682795529,
      "grad_norm": 3.0625,
      "learning_rate": 3.2900561020376907e-06,
      "loss": 0.8417,
      "step": 805290
    },
    {
      "epoch": 2.822375730302425,
      "grad_norm": 3.421875,
      "learning_rate": 3.2894070733739887e-06,
      "loss": 0.7844,
      "step": 805300
    },
    {
      "epoch": 2.8224107778093206,
      "grad_norm": 3.265625,
      "learning_rate": 3.288758044710287e-06,
      "loss": 0.8125,
      "step": 805310
    },
    {
      "epoch": 2.8224458253162164,
      "grad_norm": 2.859375,
      "learning_rate": 3.2881090160465847e-06,
      "loss": 0.7963,
      "step": 805320
    },
    {
      "epoch": 2.8224808728231117,
      "grad_norm": 2.59375,
      "learning_rate": 3.2874599873828827e-06,
      "loss": 0.7396,
      "step": 805330
    },
    {
      "epoch": 2.8225159203300074,
      "grad_norm": 3.1875,
      "learning_rate": 3.286810958719181e-06,
      "loss": 0.7319,
      "step": 805340
    },
    {
      "epoch": 2.8225509678369027,
      "grad_norm": 2.875,
      "learning_rate": 3.286161930055479e-06,
      "loss": 0.7957,
      "step": 805350
    },
    {
      "epoch": 2.8225860153437985,
      "grad_norm": 2.765625,
      "learning_rate": 3.2855129013917775e-06,
      "loss": 0.8506,
      "step": 805360
    },
    {
      "epoch": 2.8226210628506943,
      "grad_norm": 2.953125,
      "learning_rate": 3.284863872728075e-06,
      "loss": 0.8113,
      "step": 805370
    },
    {
      "epoch": 2.8226561103575896,
      "grad_norm": 2.71875,
      "learning_rate": 3.284214844064373e-06,
      "loss": 0.7531,
      "step": 805380
    },
    {
      "epoch": 2.8226911578644853,
      "grad_norm": 2.765625,
      "learning_rate": 3.2835658154006715e-06,
      "loss": 0.776,
      "step": 805390
    },
    {
      "epoch": 2.8227262053713806,
      "grad_norm": 3.25,
      "learning_rate": 3.28291678673697e-06,
      "loss": 0.7419,
      "step": 805400
    },
    {
      "epoch": 2.8227612528782764,
      "grad_norm": 3.15625,
      "learning_rate": 3.282267758073268e-06,
      "loss": 0.7809,
      "step": 805410
    },
    {
      "epoch": 2.822796300385172,
      "grad_norm": 2.890625,
      "learning_rate": 3.2816187294095655e-06,
      "loss": 0.7539,
      "step": 805420
    },
    {
      "epoch": 2.822831347892068,
      "grad_norm": 2.8125,
      "learning_rate": 3.280969700745864e-06,
      "loss": 0.7601,
      "step": 805430
    },
    {
      "epoch": 2.8228663953989632,
      "grad_norm": 2.140625,
      "learning_rate": 3.280320672082162e-06,
      "loss": 0.6869,
      "step": 805440
    },
    {
      "epoch": 2.822901442905859,
      "grad_norm": 2.90625,
      "learning_rate": 3.2796716434184604e-06,
      "loss": 0.6875,
      "step": 805450
    },
    {
      "epoch": 2.8229364904127543,
      "grad_norm": 3.109375,
      "learning_rate": 3.279022614754758e-06,
      "loss": 0.7763,
      "step": 805460
    },
    {
      "epoch": 2.82297153791965,
      "grad_norm": 3.953125,
      "learning_rate": 3.278373586091056e-06,
      "loss": 0.8384,
      "step": 805470
    },
    {
      "epoch": 2.823006585426546,
      "grad_norm": 2.796875,
      "learning_rate": 3.2777245574273544e-06,
      "loss": 0.7999,
      "step": 805480
    },
    {
      "epoch": 2.823041632933441,
      "grad_norm": 2.8125,
      "learning_rate": 3.2770755287636524e-06,
      "loss": 0.76,
      "step": 805490
    },
    {
      "epoch": 2.823076680440337,
      "grad_norm": 3.171875,
      "learning_rate": 3.276426500099951e-06,
      "loss": 0.8014,
      "step": 805500
    },
    {
      "epoch": 2.823111727947232,
      "grad_norm": 3.046875,
      "learning_rate": 3.2757774714362484e-06,
      "loss": 0.8733,
      "step": 805510
    },
    {
      "epoch": 2.823146775454128,
      "grad_norm": 3.046875,
      "learning_rate": 3.275128442772547e-06,
      "loss": 0.7905,
      "step": 805520
    },
    {
      "epoch": 2.8231818229610237,
      "grad_norm": 3.078125,
      "learning_rate": 3.274479414108845e-06,
      "loss": 0.7358,
      "step": 805530
    },
    {
      "epoch": 2.8232168704679195,
      "grad_norm": 3.734375,
      "learning_rate": 3.2738303854451432e-06,
      "loss": 0.8014,
      "step": 805540
    },
    {
      "epoch": 2.823251917974815,
      "grad_norm": 3.125,
      "learning_rate": 3.2731813567814412e-06,
      "loss": 0.7974,
      "step": 805550
    },
    {
      "epoch": 2.8232869654817105,
      "grad_norm": 2.875,
      "learning_rate": 3.272532328117739e-06,
      "loss": 0.8142,
      "step": 805560
    },
    {
      "epoch": 2.823322012988606,
      "grad_norm": 2.953125,
      "learning_rate": 3.2718832994540372e-06,
      "loss": 0.8292,
      "step": 805570
    },
    {
      "epoch": 2.8233570604955016,
      "grad_norm": 2.984375,
      "learning_rate": 3.2712342707903352e-06,
      "loss": 0.7673,
      "step": 805580
    },
    {
      "epoch": 2.8233921080023974,
      "grad_norm": 2.890625,
      "learning_rate": 3.2705852421266337e-06,
      "loss": 0.688,
      "step": 805590
    },
    {
      "epoch": 2.8234271555092927,
      "grad_norm": 3.5,
      "learning_rate": 3.2699362134629312e-06,
      "loss": 0.8382,
      "step": 805600
    },
    {
      "epoch": 2.8234622030161884,
      "grad_norm": 3.5,
      "learning_rate": 3.2692871847992292e-06,
      "loss": 0.8356,
      "step": 805610
    },
    {
      "epoch": 2.823497250523084,
      "grad_norm": 3.28125,
      "learning_rate": 3.2686381561355277e-06,
      "loss": 0.749,
      "step": 805620
    },
    {
      "epoch": 2.8235322980299795,
      "grad_norm": 3.171875,
      "learning_rate": 3.267989127471826e-06,
      "loss": 0.8347,
      "step": 805630
    },
    {
      "epoch": 2.8235673455368753,
      "grad_norm": 2.90625,
      "learning_rate": 3.267340098808124e-06,
      "loss": 0.7598,
      "step": 805640
    },
    {
      "epoch": 2.823602393043771,
      "grad_norm": 2.671875,
      "learning_rate": 3.2666910701444217e-06,
      "loss": 0.7926,
      "step": 805650
    },
    {
      "epoch": 2.8236374405506663,
      "grad_norm": 2.5625,
      "learning_rate": 3.26604204148072e-06,
      "loss": 0.7632,
      "step": 805660
    },
    {
      "epoch": 2.823672488057562,
      "grad_norm": 2.84375,
      "learning_rate": 3.265393012817018e-06,
      "loss": 0.7831,
      "step": 805670
    },
    {
      "epoch": 2.8237075355644574,
      "grad_norm": 2.859375,
      "learning_rate": 3.2647439841533165e-06,
      "loss": 0.8235,
      "step": 805680
    },
    {
      "epoch": 2.823742583071353,
      "grad_norm": 2.9375,
      "learning_rate": 3.2640949554896145e-06,
      "loss": 0.8558,
      "step": 805690
    },
    {
      "epoch": 2.823777630578249,
      "grad_norm": 2.875,
      "learning_rate": 3.263445926825912e-06,
      "loss": 0.7308,
      "step": 805700
    },
    {
      "epoch": 2.8238126780851447,
      "grad_norm": 2.828125,
      "learning_rate": 3.2627968981622105e-06,
      "loss": 0.7903,
      "step": 805710
    },
    {
      "epoch": 2.82384772559204,
      "grad_norm": 3.03125,
      "learning_rate": 3.262147869498509e-06,
      "loss": 0.8037,
      "step": 805720
    },
    {
      "epoch": 2.8238827730989358,
      "grad_norm": 3.125,
      "learning_rate": 3.261498840834807e-06,
      "loss": 0.8226,
      "step": 805730
    },
    {
      "epoch": 2.823917820605831,
      "grad_norm": 2.890625,
      "learning_rate": 3.2608498121711054e-06,
      "loss": 0.8271,
      "step": 805740
    },
    {
      "epoch": 2.823952868112727,
      "grad_norm": 3.1875,
      "learning_rate": 3.260200783507403e-06,
      "loss": 0.8395,
      "step": 805750
    },
    {
      "epoch": 2.8239879156196226,
      "grad_norm": 3.15625,
      "learning_rate": 3.259551754843701e-06,
      "loss": 0.7854,
      "step": 805760
    },
    {
      "epoch": 2.824022963126518,
      "grad_norm": 3.59375,
      "learning_rate": 3.2589027261799994e-06,
      "loss": 0.7997,
      "step": 805770
    },
    {
      "epoch": 2.8240580106334137,
      "grad_norm": 2.828125,
      "learning_rate": 3.2582536975162974e-06,
      "loss": 0.7378,
      "step": 805780
    },
    {
      "epoch": 2.824093058140309,
      "grad_norm": 2.625,
      "learning_rate": 3.257604668852595e-06,
      "loss": 0.8428,
      "step": 805790
    },
    {
      "epoch": 2.8241281056472047,
      "grad_norm": 2.78125,
      "learning_rate": 3.2569556401888934e-06,
      "loss": 0.8161,
      "step": 805800
    },
    {
      "epoch": 2.8241631531541005,
      "grad_norm": 2.875,
      "learning_rate": 3.2563066115251914e-06,
      "loss": 0.8179,
      "step": 805810
    },
    {
      "epoch": 2.8241982006609962,
      "grad_norm": 3.171875,
      "learning_rate": 3.2556575828614898e-06,
      "loss": 0.8151,
      "step": 805820
    },
    {
      "epoch": 2.8242332481678916,
      "grad_norm": 2.9375,
      "learning_rate": 3.255008554197788e-06,
      "loss": 0.8132,
      "step": 805830
    },
    {
      "epoch": 2.8242682956747873,
      "grad_norm": 2.78125,
      "learning_rate": 3.2543595255340858e-06,
      "loss": 0.7789,
      "step": 805840
    },
    {
      "epoch": 2.8243033431816826,
      "grad_norm": 2.875,
      "learning_rate": 3.2537104968703838e-06,
      "loss": 0.7483,
      "step": 805850
    },
    {
      "epoch": 2.8243383906885784,
      "grad_norm": 3.03125,
      "learning_rate": 3.253061468206682e-06,
      "loss": 0.8617,
      "step": 805860
    },
    {
      "epoch": 2.824373438195474,
      "grad_norm": 2.953125,
      "learning_rate": 3.25241243954298e-06,
      "loss": 0.7974,
      "step": 805870
    },
    {
      "epoch": 2.8244084857023695,
      "grad_norm": 2.671875,
      "learning_rate": 3.2517634108792786e-06,
      "loss": 0.8194,
      "step": 805880
    },
    {
      "epoch": 2.8244435332092652,
      "grad_norm": 2.953125,
      "learning_rate": 3.251114382215576e-06,
      "loss": 0.7432,
      "step": 805890
    },
    {
      "epoch": 2.8244785807161605,
      "grad_norm": 3.046875,
      "learning_rate": 3.250465353551874e-06,
      "loss": 0.7367,
      "step": 805900
    },
    {
      "epoch": 2.8245136282230563,
      "grad_norm": 3.109375,
      "learning_rate": 3.2498163248881726e-06,
      "loss": 0.8171,
      "step": 805910
    },
    {
      "epoch": 2.824548675729952,
      "grad_norm": 2.453125,
      "learning_rate": 3.2491672962244706e-06,
      "loss": 0.7203,
      "step": 805920
    },
    {
      "epoch": 2.824583723236848,
      "grad_norm": 2.84375,
      "learning_rate": 3.248518267560768e-06,
      "loss": 0.7589,
      "step": 805930
    },
    {
      "epoch": 2.824618770743743,
      "grad_norm": 2.71875,
      "learning_rate": 3.2478692388970666e-06,
      "loss": 0.8413,
      "step": 805940
    },
    {
      "epoch": 2.824653818250639,
      "grad_norm": 2.875,
      "learning_rate": 3.247220210233365e-06,
      "loss": 0.8592,
      "step": 805950
    },
    {
      "epoch": 2.824688865757534,
      "grad_norm": 3.09375,
      "learning_rate": 3.246571181569663e-06,
      "loss": 0.8317,
      "step": 805960
    },
    {
      "epoch": 2.82472391326443,
      "grad_norm": 3.203125,
      "learning_rate": 3.2459221529059615e-06,
      "loss": 0.8001,
      "step": 805970
    },
    {
      "epoch": 2.8247589607713257,
      "grad_norm": 3.421875,
      "learning_rate": 3.245273124242259e-06,
      "loss": 0.9053,
      "step": 805980
    },
    {
      "epoch": 2.824794008278221,
      "grad_norm": 2.578125,
      "learning_rate": 3.244624095578557e-06,
      "loss": 0.821,
      "step": 805990
    },
    {
      "epoch": 2.824829055785117,
      "grad_norm": 2.734375,
      "learning_rate": 3.2439750669148555e-06,
      "loss": 0.717,
      "step": 806000
    },
    {
      "epoch": 2.824864103292012,
      "grad_norm": 2.84375,
      "learning_rate": 3.2433260382511535e-06,
      "loss": 0.8618,
      "step": 806010
    },
    {
      "epoch": 2.824899150798908,
      "grad_norm": 2.390625,
      "learning_rate": 3.242677009587452e-06,
      "loss": 0.788,
      "step": 806020
    },
    {
      "epoch": 2.8249341983058036,
      "grad_norm": 2.890625,
      "learning_rate": 3.2420279809237495e-06,
      "loss": 0.7876,
      "step": 806030
    },
    {
      "epoch": 2.8249692458126994,
      "grad_norm": 3.140625,
      "learning_rate": 3.2413789522600475e-06,
      "loss": 0.8106,
      "step": 806040
    },
    {
      "epoch": 2.8250042933195947,
      "grad_norm": 3.109375,
      "learning_rate": 3.240729923596346e-06,
      "loss": 0.9006,
      "step": 806050
    },
    {
      "epoch": 2.8250393408264904,
      "grad_norm": 2.828125,
      "learning_rate": 3.2400808949326443e-06,
      "loss": 0.7962,
      "step": 806060
    },
    {
      "epoch": 2.8250743883333858,
      "grad_norm": 2.90625,
      "learning_rate": 3.239431866268942e-06,
      "loss": 0.773,
      "step": 806070
    },
    {
      "epoch": 2.8251094358402815,
      "grad_norm": 2.578125,
      "learning_rate": 3.23878283760524e-06,
      "loss": 0.7958,
      "step": 806080
    },
    {
      "epoch": 2.8251444833471773,
      "grad_norm": 3.046875,
      "learning_rate": 3.2381338089415383e-06,
      "loss": 0.833,
      "step": 806090
    },
    {
      "epoch": 2.8251795308540726,
      "grad_norm": 3.484375,
      "learning_rate": 3.2374847802778363e-06,
      "loss": 0.7678,
      "step": 806100
    },
    {
      "epoch": 2.8252145783609683,
      "grad_norm": 3.03125,
      "learning_rate": 3.2368357516141347e-06,
      "loss": 0.8381,
      "step": 806110
    },
    {
      "epoch": 2.8252496258678637,
      "grad_norm": 2.796875,
      "learning_rate": 3.2361867229504323e-06,
      "loss": 0.7523,
      "step": 806120
    },
    {
      "epoch": 2.8252846733747594,
      "grad_norm": 2.859375,
      "learning_rate": 3.2355376942867303e-06,
      "loss": 0.83,
      "step": 806130
    },
    {
      "epoch": 2.825319720881655,
      "grad_norm": 2.59375,
      "learning_rate": 3.2348886656230287e-06,
      "loss": 0.7527,
      "step": 806140
    },
    {
      "epoch": 2.825354768388551,
      "grad_norm": 2.8125,
      "learning_rate": 3.234239636959327e-06,
      "loss": 0.7969,
      "step": 806150
    },
    {
      "epoch": 2.8253898158954462,
      "grad_norm": 2.578125,
      "learning_rate": 3.233590608295625e-06,
      "loss": 0.7448,
      "step": 806160
    },
    {
      "epoch": 2.825424863402342,
      "grad_norm": 3.109375,
      "learning_rate": 3.2329415796319227e-06,
      "loss": 0.8044,
      "step": 806170
    },
    {
      "epoch": 2.8254599109092373,
      "grad_norm": 2.5625,
      "learning_rate": 3.232292550968221e-06,
      "loss": 0.7333,
      "step": 806180
    },
    {
      "epoch": 2.825494958416133,
      "grad_norm": 2.796875,
      "learning_rate": 3.231643522304519e-06,
      "loss": 0.754,
      "step": 806190
    },
    {
      "epoch": 2.825530005923029,
      "grad_norm": 2.921875,
      "learning_rate": 3.2309944936408176e-06,
      "loss": 0.7909,
      "step": 806200
    },
    {
      "epoch": 2.825565053429924,
      "grad_norm": 3.328125,
      "learning_rate": 3.2303454649771156e-06,
      "loss": 0.7971,
      "step": 806210
    },
    {
      "epoch": 2.82560010093682,
      "grad_norm": 2.84375,
      "learning_rate": 3.229696436313413e-06,
      "loss": 0.8576,
      "step": 806220
    },
    {
      "epoch": 2.825635148443715,
      "grad_norm": 2.734375,
      "learning_rate": 3.2290474076497116e-06,
      "loss": 0.7621,
      "step": 806230
    },
    {
      "epoch": 2.825670195950611,
      "grad_norm": 3.21875,
      "learning_rate": 3.2283983789860096e-06,
      "loss": 0.8181,
      "step": 806240
    },
    {
      "epoch": 2.8257052434575067,
      "grad_norm": 3.21875,
      "learning_rate": 3.227749350322308e-06,
      "loss": 0.8028,
      "step": 806250
    },
    {
      "epoch": 2.8257402909644025,
      "grad_norm": 3.015625,
      "learning_rate": 3.2271003216586056e-06,
      "loss": 0.7896,
      "step": 806260
    },
    {
      "epoch": 2.825775338471298,
      "grad_norm": 3.03125,
      "learning_rate": 3.226451292994904e-06,
      "loss": 0.7582,
      "step": 806270
    },
    {
      "epoch": 2.8258103859781936,
      "grad_norm": 3.046875,
      "learning_rate": 3.225802264331202e-06,
      "loss": 0.7704,
      "step": 806280
    },
    {
      "epoch": 2.825845433485089,
      "grad_norm": 2.890625,
      "learning_rate": 3.2251532356675004e-06,
      "loss": 0.809,
      "step": 806290
    },
    {
      "epoch": 2.8258804809919846,
      "grad_norm": 3.015625,
      "learning_rate": 3.2245042070037984e-06,
      "loss": 0.7931,
      "step": 806300
    },
    {
      "epoch": 2.8259155284988804,
      "grad_norm": 2.765625,
      "learning_rate": 3.223855178340096e-06,
      "loss": 0.8496,
      "step": 806310
    },
    {
      "epoch": 2.8259505760057757,
      "grad_norm": 3.015625,
      "learning_rate": 3.2232061496763944e-06,
      "loss": 0.755,
      "step": 806320
    },
    {
      "epoch": 2.8259856235126715,
      "grad_norm": 2.890625,
      "learning_rate": 3.2225571210126924e-06,
      "loss": 0.8091,
      "step": 806330
    },
    {
      "epoch": 2.8260206710195668,
      "grad_norm": 3.421875,
      "learning_rate": 3.221908092348991e-06,
      "loss": 0.7812,
      "step": 806340
    },
    {
      "epoch": 2.8260557185264625,
      "grad_norm": 3.046875,
      "learning_rate": 3.2212590636852893e-06,
      "loss": 0.7236,
      "step": 806350
    },
    {
      "epoch": 2.8260907660333583,
      "grad_norm": 3.078125,
      "learning_rate": 3.2206100350215864e-06,
      "loss": 0.7401,
      "step": 806360
    },
    {
      "epoch": 2.826125813540254,
      "grad_norm": 3.203125,
      "learning_rate": 3.219961006357885e-06,
      "loss": 0.8152,
      "step": 806370
    },
    {
      "epoch": 2.8261608610471494,
      "grad_norm": 3.03125,
      "learning_rate": 3.2193119776941833e-06,
      "loss": 0.7926,
      "step": 806380
    },
    {
      "epoch": 2.826195908554045,
      "grad_norm": 2.625,
      "learning_rate": 3.2186629490304813e-06,
      "loss": 0.7707,
      "step": 806390
    },
    {
      "epoch": 2.8262309560609404,
      "grad_norm": 2.921875,
      "learning_rate": 3.218013920366779e-06,
      "loss": 0.8414,
      "step": 806400
    },
    {
      "epoch": 2.826266003567836,
      "grad_norm": 2.484375,
      "learning_rate": 3.2173648917030773e-06,
      "loss": 0.7845,
      "step": 806410
    },
    {
      "epoch": 2.826301051074732,
      "grad_norm": 2.953125,
      "learning_rate": 3.2167158630393753e-06,
      "loss": 0.7864,
      "step": 806420
    },
    {
      "epoch": 2.8263360985816273,
      "grad_norm": 2.53125,
      "learning_rate": 3.2160668343756737e-06,
      "loss": 0.8203,
      "step": 806430
    },
    {
      "epoch": 2.826371146088523,
      "grad_norm": 3.0,
      "learning_rate": 3.2154178057119717e-06,
      "loss": 0.7856,
      "step": 806440
    },
    {
      "epoch": 2.8264061935954183,
      "grad_norm": 2.734375,
      "learning_rate": 3.2147687770482693e-06,
      "loss": 0.8112,
      "step": 806450
    },
    {
      "epoch": 2.826441241102314,
      "grad_norm": 2.4375,
      "learning_rate": 3.2141197483845677e-06,
      "loss": 0.7957,
      "step": 806460
    },
    {
      "epoch": 2.82647628860921,
      "grad_norm": 2.828125,
      "learning_rate": 3.213470719720866e-06,
      "loss": 0.7785,
      "step": 806470
    },
    {
      "epoch": 2.8265113361161056,
      "grad_norm": 2.703125,
      "learning_rate": 3.212821691057164e-06,
      "loss": 0.843,
      "step": 806480
    },
    {
      "epoch": 2.826546383623001,
      "grad_norm": 2.765625,
      "learning_rate": 3.2121726623934625e-06,
      "loss": 0.7561,
      "step": 806490
    },
    {
      "epoch": 2.8265814311298967,
      "grad_norm": 2.734375,
      "learning_rate": 3.21152363372976e-06,
      "loss": 0.8591,
      "step": 806500
    },
    {
      "epoch": 2.826616478636792,
      "grad_norm": 2.953125,
      "learning_rate": 3.210874605066058e-06,
      "loss": 0.8133,
      "step": 806510
    },
    {
      "epoch": 2.8266515261436878,
      "grad_norm": 2.84375,
      "learning_rate": 3.2102255764023565e-06,
      "loss": 0.7849,
      "step": 806520
    },
    {
      "epoch": 2.8266865736505835,
      "grad_norm": 2.609375,
      "learning_rate": 3.2095765477386545e-06,
      "loss": 0.819,
      "step": 806530
    },
    {
      "epoch": 2.826721621157479,
      "grad_norm": 3.21875,
      "learning_rate": 3.208927519074953e-06,
      "loss": 0.8094,
      "step": 806540
    },
    {
      "epoch": 2.8267566686643746,
      "grad_norm": 2.84375,
      "learning_rate": 3.2082784904112505e-06,
      "loss": 0.7672,
      "step": 806550
    },
    {
      "epoch": 2.82679171617127,
      "grad_norm": 3.25,
      "learning_rate": 3.2076294617475485e-06,
      "loss": 0.7757,
      "step": 806560
    },
    {
      "epoch": 2.8268267636781657,
      "grad_norm": 3.328125,
      "learning_rate": 3.206980433083847e-06,
      "loss": 0.8279,
      "step": 806570
    },
    {
      "epoch": 2.8268618111850614,
      "grad_norm": 2.734375,
      "learning_rate": 3.2063314044201454e-06,
      "loss": 0.8048,
      "step": 806580
    },
    {
      "epoch": 2.826896858691957,
      "grad_norm": 2.6875,
      "learning_rate": 3.205682375756443e-06,
      "loss": 0.8303,
      "step": 806590
    },
    {
      "epoch": 2.8269319061988525,
      "grad_norm": 2.859375,
      "learning_rate": 3.205033347092741e-06,
      "loss": 0.8185,
      "step": 806600
    },
    {
      "epoch": 2.8269669537057482,
      "grad_norm": 2.75,
      "learning_rate": 3.2043843184290394e-06,
      "loss": 0.7205,
      "step": 806610
    },
    {
      "epoch": 2.8270020012126436,
      "grad_norm": 3.203125,
      "learning_rate": 3.2037352897653374e-06,
      "loss": 0.8521,
      "step": 806620
    },
    {
      "epoch": 2.8270370487195393,
      "grad_norm": 2.96875,
      "learning_rate": 3.203086261101636e-06,
      "loss": 0.8454,
      "step": 806630
    },
    {
      "epoch": 2.827072096226435,
      "grad_norm": 3.109375,
      "learning_rate": 3.2024372324379334e-06,
      "loss": 0.889,
      "step": 806640
    },
    {
      "epoch": 2.8271071437333304,
      "grad_norm": 2.484375,
      "learning_rate": 3.2017882037742314e-06,
      "loss": 0.7586,
      "step": 806650
    },
    {
      "epoch": 2.827142191240226,
      "grad_norm": 2.515625,
      "learning_rate": 3.20113917511053e-06,
      "loss": 0.8149,
      "step": 806660
    },
    {
      "epoch": 2.8271772387471215,
      "grad_norm": 2.59375,
      "learning_rate": 3.200490146446828e-06,
      "loss": 0.7376,
      "step": 806670
    },
    {
      "epoch": 2.827212286254017,
      "grad_norm": 2.75,
      "learning_rate": 3.1998411177831262e-06,
      "loss": 0.8034,
      "step": 806680
    },
    {
      "epoch": 2.827247333760913,
      "grad_norm": 2.84375,
      "learning_rate": 3.199192089119424e-06,
      "loss": 0.6848,
      "step": 806690
    },
    {
      "epoch": 2.8272823812678087,
      "grad_norm": 2.84375,
      "learning_rate": 3.1985430604557222e-06,
      "loss": 0.719,
      "step": 806700
    },
    {
      "epoch": 2.827317428774704,
      "grad_norm": 2.703125,
      "learning_rate": 3.1978940317920202e-06,
      "loss": 0.812,
      "step": 806710
    },
    {
      "epoch": 2.8273524762816,
      "grad_norm": 3.09375,
      "learning_rate": 3.1972450031283187e-06,
      "loss": 0.8442,
      "step": 806720
    },
    {
      "epoch": 2.827387523788495,
      "grad_norm": 3.140625,
      "learning_rate": 3.1965959744646162e-06,
      "loss": 0.8304,
      "step": 806730
    },
    {
      "epoch": 2.827422571295391,
      "grad_norm": 3.046875,
      "learning_rate": 3.1959469458009142e-06,
      "loss": 0.8387,
      "step": 806740
    },
    {
      "epoch": 2.8274576188022866,
      "grad_norm": 2.9375,
      "learning_rate": 3.1952979171372127e-06,
      "loss": 0.8087,
      "step": 806750
    },
    {
      "epoch": 2.827492666309182,
      "grad_norm": 2.640625,
      "learning_rate": 3.1946488884735107e-06,
      "loss": 0.7142,
      "step": 806760
    },
    {
      "epoch": 2.8275277138160777,
      "grad_norm": 3.09375,
      "learning_rate": 3.193999859809809e-06,
      "loss": 0.7506,
      "step": 806770
    },
    {
      "epoch": 2.827562761322973,
      "grad_norm": 2.796875,
      "learning_rate": 3.1933508311461067e-06,
      "loss": 0.8192,
      "step": 806780
    },
    {
      "epoch": 2.8275978088298688,
      "grad_norm": 2.765625,
      "learning_rate": 3.1927018024824047e-06,
      "loss": 0.7783,
      "step": 806790
    },
    {
      "epoch": 2.8276328563367645,
      "grad_norm": 3.390625,
      "learning_rate": 3.192052773818703e-06,
      "loss": 0.7946,
      "step": 806800
    },
    {
      "epoch": 2.8276679038436603,
      "grad_norm": 2.609375,
      "learning_rate": 3.1914037451550015e-06,
      "loss": 0.8268,
      "step": 806810
    },
    {
      "epoch": 2.8277029513505556,
      "grad_norm": 2.765625,
      "learning_rate": 3.1907547164912995e-06,
      "loss": 0.7265,
      "step": 806820
    },
    {
      "epoch": 2.8277379988574514,
      "grad_norm": 2.734375,
      "learning_rate": 3.190105687827597e-06,
      "loss": 0.7758,
      "step": 806830
    },
    {
      "epoch": 2.8277730463643467,
      "grad_norm": 3.03125,
      "learning_rate": 3.1894566591638955e-06,
      "loss": 0.7939,
      "step": 806840
    },
    {
      "epoch": 2.8278080938712424,
      "grad_norm": 3.03125,
      "learning_rate": 3.1888076305001935e-06,
      "loss": 0.8034,
      "step": 806850
    },
    {
      "epoch": 2.827843141378138,
      "grad_norm": 2.828125,
      "learning_rate": 3.188158601836492e-06,
      "loss": 0.7627,
      "step": 806860
    },
    {
      "epoch": 2.8278781888850335,
      "grad_norm": 2.734375,
      "learning_rate": 3.1875095731727895e-06,
      "loss": 0.7618,
      "step": 806870
    },
    {
      "epoch": 2.8279132363919293,
      "grad_norm": 2.890625,
      "learning_rate": 3.1868605445090875e-06,
      "loss": 0.8022,
      "step": 806880
    },
    {
      "epoch": 2.827948283898825,
      "grad_norm": 2.921875,
      "learning_rate": 3.186211515845386e-06,
      "loss": 0.8068,
      "step": 806890
    },
    {
      "epoch": 2.8279833314057203,
      "grad_norm": 2.609375,
      "learning_rate": 3.1855624871816844e-06,
      "loss": 0.8298,
      "step": 806900
    },
    {
      "epoch": 2.828018378912616,
      "grad_norm": 2.875,
      "learning_rate": 3.1849134585179824e-06,
      "loss": 0.7921,
      "step": 806910
    },
    {
      "epoch": 2.828053426419512,
      "grad_norm": 2.890625,
      "learning_rate": 3.18426442985428e-06,
      "loss": 0.86,
      "step": 806920
    },
    {
      "epoch": 2.828088473926407,
      "grad_norm": 2.6875,
      "learning_rate": 3.1836154011905784e-06,
      "loss": 0.7787,
      "step": 806930
    },
    {
      "epoch": 2.828123521433303,
      "grad_norm": 3.171875,
      "learning_rate": 3.1829663725268764e-06,
      "loss": 0.8308,
      "step": 806940
    },
    {
      "epoch": 2.8281585689401982,
      "grad_norm": 3.03125,
      "learning_rate": 3.1823173438631748e-06,
      "loss": 0.8648,
      "step": 806950
    },
    {
      "epoch": 2.828193616447094,
      "grad_norm": 3.109375,
      "learning_rate": 3.1816683151994728e-06,
      "loss": 0.812,
      "step": 806960
    },
    {
      "epoch": 2.8282286639539898,
      "grad_norm": 2.546875,
      "learning_rate": 3.1810192865357704e-06,
      "loss": 0.8145,
      "step": 806970
    },
    {
      "epoch": 2.828263711460885,
      "grad_norm": 2.65625,
      "learning_rate": 3.1803702578720688e-06,
      "loss": 0.8166,
      "step": 806980
    },
    {
      "epoch": 2.828298758967781,
      "grad_norm": 2.796875,
      "learning_rate": 3.1797212292083668e-06,
      "loss": 0.8064,
      "step": 806990
    },
    {
      "epoch": 2.8283338064746766,
      "grad_norm": 2.796875,
      "learning_rate": 3.179072200544665e-06,
      "loss": 0.7172,
      "step": 807000
    },
    {
      "epoch": 2.828368853981572,
      "grad_norm": 2.765625,
      "learning_rate": 3.1784231718809636e-06,
      "loss": 0.8474,
      "step": 807010
    },
    {
      "epoch": 2.8284039014884677,
      "grad_norm": 2.5625,
      "learning_rate": 3.177774143217261e-06,
      "loss": 0.8401,
      "step": 807020
    },
    {
      "epoch": 2.8284389489953634,
      "grad_norm": 3.484375,
      "learning_rate": 3.177125114553559e-06,
      "loss": 0.8044,
      "step": 807030
    },
    {
      "epoch": 2.8284739965022587,
      "grad_norm": 2.796875,
      "learning_rate": 3.1764760858898576e-06,
      "loss": 0.8359,
      "step": 807040
    },
    {
      "epoch": 2.8285090440091545,
      "grad_norm": 2.78125,
      "learning_rate": 3.1758270572261556e-06,
      "loss": 0.8366,
      "step": 807050
    },
    {
      "epoch": 2.82854409151605,
      "grad_norm": 3.046875,
      "learning_rate": 3.175178028562453e-06,
      "loss": 0.7855,
      "step": 807060
    },
    {
      "epoch": 2.8285791390229456,
      "grad_norm": 2.734375,
      "learning_rate": 3.1745289998987516e-06,
      "loss": 0.8038,
      "step": 807070
    },
    {
      "epoch": 2.8286141865298413,
      "grad_norm": 2.796875,
      "learning_rate": 3.1738799712350496e-06,
      "loss": 0.7715,
      "step": 807080
    },
    {
      "epoch": 2.828649234036737,
      "grad_norm": 2.921875,
      "learning_rate": 3.173230942571348e-06,
      "loss": 0.7491,
      "step": 807090
    },
    {
      "epoch": 2.8286842815436324,
      "grad_norm": 3.0,
      "learning_rate": 3.172581913907646e-06,
      "loss": 0.8886,
      "step": 807100
    },
    {
      "epoch": 2.828719329050528,
      "grad_norm": 2.9375,
      "learning_rate": 3.1719328852439436e-06,
      "loss": 0.8263,
      "step": 807110
    },
    {
      "epoch": 2.8287543765574235,
      "grad_norm": 3.15625,
      "learning_rate": 3.171283856580242e-06,
      "loss": 0.878,
      "step": 807120
    },
    {
      "epoch": 2.828789424064319,
      "grad_norm": 2.8125,
      "learning_rate": 3.1706348279165405e-06,
      "loss": 0.7829,
      "step": 807130
    },
    {
      "epoch": 2.828824471571215,
      "grad_norm": 2.890625,
      "learning_rate": 3.1699857992528385e-06,
      "loss": 0.7948,
      "step": 807140
    },
    {
      "epoch": 2.8288595190781103,
      "grad_norm": 3.5,
      "learning_rate": 3.169336770589137e-06,
      "loss": 0.8737,
      "step": 807150
    },
    {
      "epoch": 2.828894566585006,
      "grad_norm": 2.78125,
      "learning_rate": 3.1686877419254345e-06,
      "loss": 0.7591,
      "step": 807160
    },
    {
      "epoch": 2.8289296140919014,
      "grad_norm": 2.765625,
      "learning_rate": 3.1680387132617325e-06,
      "loss": 0.7587,
      "step": 807170
    },
    {
      "epoch": 2.828964661598797,
      "grad_norm": 3.1875,
      "learning_rate": 3.167389684598031e-06,
      "loss": 0.8096,
      "step": 807180
    },
    {
      "epoch": 2.828999709105693,
      "grad_norm": 2.796875,
      "learning_rate": 3.166740655934329e-06,
      "loss": 0.8033,
      "step": 807190
    },
    {
      "epoch": 2.8290347566125886,
      "grad_norm": 3.03125,
      "learning_rate": 3.1660916272706265e-06,
      "loss": 0.8083,
      "step": 807200
    },
    {
      "epoch": 2.829069804119484,
      "grad_norm": 2.25,
      "learning_rate": 3.165442598606925e-06,
      "loss": 0.802,
      "step": 807210
    },
    {
      "epoch": 2.8291048516263797,
      "grad_norm": 3.21875,
      "learning_rate": 3.164793569943223e-06,
      "loss": 0.7961,
      "step": 807220
    },
    {
      "epoch": 2.829139899133275,
      "grad_norm": 2.921875,
      "learning_rate": 3.1641445412795213e-06,
      "loss": 0.7026,
      "step": 807230
    },
    {
      "epoch": 2.8291749466401708,
      "grad_norm": 3.3125,
      "learning_rate": 3.1634955126158197e-06,
      "loss": 0.893,
      "step": 807240
    },
    {
      "epoch": 2.8292099941470665,
      "grad_norm": 2.75,
      "learning_rate": 3.1628464839521173e-06,
      "loss": 0.8077,
      "step": 807250
    },
    {
      "epoch": 2.829245041653962,
      "grad_norm": 2.515625,
      "learning_rate": 3.1621974552884153e-06,
      "loss": 0.8592,
      "step": 807260
    },
    {
      "epoch": 2.8292800891608576,
      "grad_norm": 2.515625,
      "learning_rate": 3.1615484266247137e-06,
      "loss": 0.8558,
      "step": 807270
    },
    {
      "epoch": 2.829315136667753,
      "grad_norm": 2.90625,
      "learning_rate": 3.1608993979610117e-06,
      "loss": 0.806,
      "step": 807280
    },
    {
      "epoch": 2.8293501841746487,
      "grad_norm": 3.1875,
      "learning_rate": 3.16025036929731e-06,
      "loss": 0.8834,
      "step": 807290
    },
    {
      "epoch": 2.8293852316815444,
      "grad_norm": 2.9375,
      "learning_rate": 3.1596013406336077e-06,
      "loss": 0.7874,
      "step": 807300
    },
    {
      "epoch": 2.82942027918844,
      "grad_norm": 3.453125,
      "learning_rate": 3.1589523119699057e-06,
      "loss": 0.8635,
      "step": 807310
    },
    {
      "epoch": 2.8294553266953355,
      "grad_norm": 2.8125,
      "learning_rate": 3.158303283306204e-06,
      "loss": 0.8827,
      "step": 807320
    },
    {
      "epoch": 2.8294903742022313,
      "grad_norm": 3.1875,
      "learning_rate": 3.1576542546425026e-06,
      "loss": 0.7647,
      "step": 807330
    },
    {
      "epoch": 2.8295254217091266,
      "grad_norm": 2.609375,
      "learning_rate": 3.1570052259787997e-06,
      "loss": 0.7415,
      "step": 807340
    },
    {
      "epoch": 2.8295604692160223,
      "grad_norm": 3.984375,
      "learning_rate": 3.156356197315098e-06,
      "loss": 0.7469,
      "step": 807350
    },
    {
      "epoch": 2.829595516722918,
      "grad_norm": 2.8125,
      "learning_rate": 3.1557071686513966e-06,
      "loss": 0.8124,
      "step": 807360
    },
    {
      "epoch": 2.8296305642298134,
      "grad_norm": 3.375,
      "learning_rate": 3.1550581399876946e-06,
      "loss": 0.798,
      "step": 807370
    },
    {
      "epoch": 2.829665611736709,
      "grad_norm": 4.40625,
      "learning_rate": 3.154409111323993e-06,
      "loss": 0.8026,
      "step": 807380
    },
    {
      "epoch": 2.8297006592436045,
      "grad_norm": 3.109375,
      "learning_rate": 3.1537600826602906e-06,
      "loss": 0.7796,
      "step": 807390
    },
    {
      "epoch": 2.8297357067505002,
      "grad_norm": 2.921875,
      "learning_rate": 3.1531110539965886e-06,
      "loss": 0.7708,
      "step": 807400
    },
    {
      "epoch": 2.829770754257396,
      "grad_norm": 2.890625,
      "learning_rate": 3.152462025332887e-06,
      "loss": 0.7744,
      "step": 807410
    },
    {
      "epoch": 2.8298058017642918,
      "grad_norm": 3.328125,
      "learning_rate": 3.151812996669185e-06,
      "loss": 0.8366,
      "step": 807420
    },
    {
      "epoch": 2.829840849271187,
      "grad_norm": 3.03125,
      "learning_rate": 3.1511639680054834e-06,
      "loss": 0.7461,
      "step": 807430
    },
    {
      "epoch": 2.829875896778083,
      "grad_norm": 2.859375,
      "learning_rate": 3.150514939341781e-06,
      "loss": 0.8078,
      "step": 807440
    },
    {
      "epoch": 2.829910944284978,
      "grad_norm": 2.65625,
      "learning_rate": 3.1498659106780794e-06,
      "loss": 0.8031,
      "step": 807450
    },
    {
      "epoch": 2.829945991791874,
      "grad_norm": 2.78125,
      "learning_rate": 3.1492168820143774e-06,
      "loss": 0.7919,
      "step": 807460
    },
    {
      "epoch": 2.8299810392987697,
      "grad_norm": 2.875,
      "learning_rate": 3.148567853350676e-06,
      "loss": 0.8045,
      "step": 807470
    },
    {
      "epoch": 2.830016086805665,
      "grad_norm": 2.546875,
      "learning_rate": 3.147918824686974e-06,
      "loss": 0.7785,
      "step": 807480
    },
    {
      "epoch": 2.8300511343125607,
      "grad_norm": 3.21875,
      "learning_rate": 3.1472697960232714e-06,
      "loss": 0.792,
      "step": 807490
    },
    {
      "epoch": 2.830086181819456,
      "grad_norm": 2.765625,
      "learning_rate": 3.14662076735957e-06,
      "loss": 0.7769,
      "step": 807500
    },
    {
      "epoch": 2.830121229326352,
      "grad_norm": 2.96875,
      "learning_rate": 3.145971738695868e-06,
      "loss": 0.7846,
      "step": 807510
    },
    {
      "epoch": 2.8301562768332476,
      "grad_norm": 3.203125,
      "learning_rate": 3.1453227100321663e-06,
      "loss": 0.8687,
      "step": 807520
    },
    {
      "epoch": 2.8301913243401433,
      "grad_norm": 3.0,
      "learning_rate": 3.144673681368464e-06,
      "loss": 0.7941,
      "step": 807530
    },
    {
      "epoch": 2.8302263718470386,
      "grad_norm": 3.046875,
      "learning_rate": 3.144024652704762e-06,
      "loss": 0.7932,
      "step": 807540
    },
    {
      "epoch": 2.8302614193539344,
      "grad_norm": 2.6875,
      "learning_rate": 3.1433756240410603e-06,
      "loss": 0.7701,
      "step": 807550
    },
    {
      "epoch": 2.8302964668608297,
      "grad_norm": 2.5,
      "learning_rate": 3.1427265953773587e-06,
      "loss": 0.7584,
      "step": 807560
    },
    {
      "epoch": 2.8303315143677255,
      "grad_norm": 2.8125,
      "learning_rate": 3.1420775667136567e-06,
      "loss": 0.7373,
      "step": 807570
    },
    {
      "epoch": 2.830366561874621,
      "grad_norm": 2.40625,
      "learning_rate": 3.1414285380499543e-06,
      "loss": 0.7479,
      "step": 807580
    },
    {
      "epoch": 2.8304016093815165,
      "grad_norm": 2.71875,
      "learning_rate": 3.1407795093862527e-06,
      "loss": 0.823,
      "step": 807590
    },
    {
      "epoch": 2.8304366568884123,
      "grad_norm": 2.953125,
      "learning_rate": 3.1401304807225507e-06,
      "loss": 0.6888,
      "step": 807600
    },
    {
      "epoch": 2.8304717043953076,
      "grad_norm": 3.234375,
      "learning_rate": 3.139481452058849e-06,
      "loss": 0.8024,
      "step": 807610
    },
    {
      "epoch": 2.8305067519022034,
      "grad_norm": 2.984375,
      "learning_rate": 3.138832423395147e-06,
      "loss": 0.7904,
      "step": 807620
    },
    {
      "epoch": 2.830541799409099,
      "grad_norm": 3.125,
      "learning_rate": 3.1381833947314447e-06,
      "loss": 0.7241,
      "step": 807630
    },
    {
      "epoch": 2.830576846915995,
      "grad_norm": 3.390625,
      "learning_rate": 3.137534366067743e-06,
      "loss": 0.788,
      "step": 807640
    },
    {
      "epoch": 2.83061189442289,
      "grad_norm": 2.984375,
      "learning_rate": 3.1368853374040415e-06,
      "loss": 0.7593,
      "step": 807650
    },
    {
      "epoch": 2.830646941929786,
      "grad_norm": 2.671875,
      "learning_rate": 3.1362363087403395e-06,
      "loss": 0.7804,
      "step": 807660
    },
    {
      "epoch": 2.8306819894366813,
      "grad_norm": 2.984375,
      "learning_rate": 3.135587280076637e-06,
      "loss": 0.757,
      "step": 807670
    },
    {
      "epoch": 2.830717036943577,
      "grad_norm": 3.15625,
      "learning_rate": 3.1349382514129355e-06,
      "loss": 0.8196,
      "step": 807680
    },
    {
      "epoch": 2.8307520844504728,
      "grad_norm": 2.78125,
      "learning_rate": 3.1342892227492335e-06,
      "loss": 0.7195,
      "step": 807690
    },
    {
      "epoch": 2.830787131957368,
      "grad_norm": 2.5,
      "learning_rate": 3.133640194085532e-06,
      "loss": 0.8103,
      "step": 807700
    },
    {
      "epoch": 2.830822179464264,
      "grad_norm": 3.328125,
      "learning_rate": 3.13299116542183e-06,
      "loss": 0.9238,
      "step": 807710
    },
    {
      "epoch": 2.830857226971159,
      "grad_norm": 2.421875,
      "learning_rate": 3.1323421367581275e-06,
      "loss": 0.7833,
      "step": 807720
    },
    {
      "epoch": 2.830892274478055,
      "grad_norm": 2.921875,
      "learning_rate": 3.131693108094426e-06,
      "loss": 0.7622,
      "step": 807730
    },
    {
      "epoch": 2.8309273219849507,
      "grad_norm": 2.921875,
      "learning_rate": 3.131044079430724e-06,
      "loss": 0.7706,
      "step": 807740
    },
    {
      "epoch": 2.8309623694918464,
      "grad_norm": 3.125,
      "learning_rate": 3.1303950507670224e-06,
      "loss": 0.7934,
      "step": 807750
    },
    {
      "epoch": 2.8309974169987417,
      "grad_norm": 2.734375,
      "learning_rate": 3.129746022103321e-06,
      "loss": 0.8076,
      "step": 807760
    },
    {
      "epoch": 2.8310324645056375,
      "grad_norm": 2.671875,
      "learning_rate": 3.1290969934396184e-06,
      "loss": 0.7348,
      "step": 807770
    },
    {
      "epoch": 2.831067512012533,
      "grad_norm": 2.890625,
      "learning_rate": 3.1284479647759164e-06,
      "loss": 0.7989,
      "step": 807780
    },
    {
      "epoch": 2.8311025595194286,
      "grad_norm": 3.234375,
      "learning_rate": 3.127798936112215e-06,
      "loss": 0.8575,
      "step": 807790
    },
    {
      "epoch": 2.8311376070263243,
      "grad_norm": 3.671875,
      "learning_rate": 3.127149907448513e-06,
      "loss": 0.8333,
      "step": 807800
    },
    {
      "epoch": 2.8311726545332196,
      "grad_norm": 2.890625,
      "learning_rate": 3.1265008787848104e-06,
      "loss": 0.7989,
      "step": 807810
    },
    {
      "epoch": 2.8312077020401154,
      "grad_norm": 2.5625,
      "learning_rate": 3.125851850121109e-06,
      "loss": 0.7832,
      "step": 807820
    },
    {
      "epoch": 2.8312427495470107,
      "grad_norm": 3.03125,
      "learning_rate": 3.125202821457407e-06,
      "loss": 0.8723,
      "step": 807830
    },
    {
      "epoch": 2.8312777970539065,
      "grad_norm": 2.625,
      "learning_rate": 3.1245537927937052e-06,
      "loss": 0.7163,
      "step": 807840
    },
    {
      "epoch": 2.8313128445608022,
      "grad_norm": 2.609375,
      "learning_rate": 3.1239047641300032e-06,
      "loss": 0.8599,
      "step": 807850
    },
    {
      "epoch": 2.831347892067698,
      "grad_norm": 2.828125,
      "learning_rate": 3.1232557354663012e-06,
      "loss": 0.7664,
      "step": 807860
    },
    {
      "epoch": 2.8313829395745933,
      "grad_norm": 3.171875,
      "learning_rate": 3.1226067068025992e-06,
      "loss": 0.7683,
      "step": 807870
    },
    {
      "epoch": 2.831417987081489,
      "grad_norm": 3.109375,
      "learning_rate": 3.1219576781388977e-06,
      "loss": 0.8193,
      "step": 807880
    },
    {
      "epoch": 2.8314530345883844,
      "grad_norm": 3.28125,
      "learning_rate": 3.1213086494751957e-06,
      "loss": 0.775,
      "step": 807890
    },
    {
      "epoch": 2.83148808209528,
      "grad_norm": 3.59375,
      "learning_rate": 3.1206596208114937e-06,
      "loss": 0.8189,
      "step": 807900
    },
    {
      "epoch": 2.831523129602176,
      "grad_norm": 2.671875,
      "learning_rate": 3.1200105921477917e-06,
      "loss": 0.7476,
      "step": 807910
    },
    {
      "epoch": 2.831558177109071,
      "grad_norm": 2.65625,
      "learning_rate": 3.1193615634840897e-06,
      "loss": 0.8147,
      "step": 807920
    },
    {
      "epoch": 2.831593224615967,
      "grad_norm": 3.109375,
      "learning_rate": 3.118712534820388e-06,
      "loss": 0.7726,
      "step": 807930
    },
    {
      "epoch": 2.8316282721228623,
      "grad_norm": 2.875,
      "learning_rate": 3.118063506156686e-06,
      "loss": 0.844,
      "step": 807940
    },
    {
      "epoch": 2.831663319629758,
      "grad_norm": 3.171875,
      "learning_rate": 3.117414477492984e-06,
      "loss": 0.802,
      "step": 807950
    },
    {
      "epoch": 2.831698367136654,
      "grad_norm": 3.0625,
      "learning_rate": 3.1167654488292825e-06,
      "loss": 0.7989,
      "step": 807960
    },
    {
      "epoch": 2.8317334146435496,
      "grad_norm": 2.90625,
      "learning_rate": 3.11611642016558e-06,
      "loss": 0.7819,
      "step": 807970
    },
    {
      "epoch": 2.831768462150445,
      "grad_norm": 3.234375,
      "learning_rate": 3.1154673915018785e-06,
      "loss": 0.8296,
      "step": 807980
    },
    {
      "epoch": 2.8318035096573406,
      "grad_norm": 3.09375,
      "learning_rate": 3.1148183628381765e-06,
      "loss": 0.8034,
      "step": 807990
    },
    {
      "epoch": 2.831838557164236,
      "grad_norm": 3.0,
      "learning_rate": 3.1141693341744745e-06,
      "loss": 0.8175,
      "step": 808000
    },
    {
      "epoch": 2.8318736046711317,
      "grad_norm": 3.0625,
      "learning_rate": 3.1135203055107725e-06,
      "loss": 0.8027,
      "step": 808010
    },
    {
      "epoch": 2.8319086521780275,
      "grad_norm": 3.171875,
      "learning_rate": 3.112871276847071e-06,
      "loss": 0.8581,
      "step": 808020
    },
    {
      "epoch": 2.8319436996849228,
      "grad_norm": 3.296875,
      "learning_rate": 3.112222248183369e-06,
      "loss": 0.766,
      "step": 808030
    },
    {
      "epoch": 2.8319787471918185,
      "grad_norm": 2.890625,
      "learning_rate": 3.111573219519667e-06,
      "loss": 0.7927,
      "step": 808040
    },
    {
      "epoch": 2.832013794698714,
      "grad_norm": 2.96875,
      "learning_rate": 3.1109241908559654e-06,
      "loss": 0.8336,
      "step": 808050
    },
    {
      "epoch": 2.8320488422056096,
      "grad_norm": 2.765625,
      "learning_rate": 3.110275162192263e-06,
      "loss": 0.7585,
      "step": 808060
    },
    {
      "epoch": 2.8320838897125054,
      "grad_norm": 3.03125,
      "learning_rate": 3.1096261335285614e-06,
      "loss": 0.7287,
      "step": 808070
    },
    {
      "epoch": 2.832118937219401,
      "grad_norm": 2.625,
      "learning_rate": 3.1089771048648594e-06,
      "loss": 0.7523,
      "step": 808080
    },
    {
      "epoch": 2.8321539847262964,
      "grad_norm": 3.4375,
      "learning_rate": 3.1083280762011574e-06,
      "loss": 0.8388,
      "step": 808090
    },
    {
      "epoch": 2.832189032233192,
      "grad_norm": 3.171875,
      "learning_rate": 3.1076790475374558e-06,
      "loss": 0.8783,
      "step": 808100
    },
    {
      "epoch": 2.8322240797400875,
      "grad_norm": 2.96875,
      "learning_rate": 3.1070300188737538e-06,
      "loss": 0.8142,
      "step": 808110
    },
    {
      "epoch": 2.8322591272469833,
      "grad_norm": 2.46875,
      "learning_rate": 3.1063809902100518e-06,
      "loss": 0.731,
      "step": 808120
    },
    {
      "epoch": 2.832294174753879,
      "grad_norm": 3.15625,
      "learning_rate": 3.1057319615463498e-06,
      "loss": 0.9072,
      "step": 808130
    },
    {
      "epoch": 2.8323292222607743,
      "grad_norm": 2.765625,
      "learning_rate": 3.105082932882648e-06,
      "loss": 0.8691,
      "step": 808140
    },
    {
      "epoch": 2.83236426976767,
      "grad_norm": 3.03125,
      "learning_rate": 3.104433904218946e-06,
      "loss": 0.7505,
      "step": 808150
    },
    {
      "epoch": 2.8323993172745654,
      "grad_norm": 3.015625,
      "learning_rate": 3.103784875555244e-06,
      "loss": 0.7254,
      "step": 808160
    },
    {
      "epoch": 2.832434364781461,
      "grad_norm": 2.953125,
      "learning_rate": 3.103135846891542e-06,
      "loss": 0.8728,
      "step": 808170
    },
    {
      "epoch": 2.832469412288357,
      "grad_norm": 2.953125,
      "learning_rate": 3.10248681822784e-06,
      "loss": 0.7446,
      "step": 808180
    },
    {
      "epoch": 2.8325044597952527,
      "grad_norm": 2.875,
      "learning_rate": 3.1018377895641386e-06,
      "loss": 0.8307,
      "step": 808190
    },
    {
      "epoch": 2.832539507302148,
      "grad_norm": 3.484375,
      "learning_rate": 3.1011887609004366e-06,
      "loss": 0.8349,
      "step": 808200
    },
    {
      "epoch": 2.8325745548090437,
      "grad_norm": 3.25,
      "learning_rate": 3.1005397322367346e-06,
      "loss": 0.7543,
      "step": 808210
    },
    {
      "epoch": 2.832609602315939,
      "grad_norm": 3.40625,
      "learning_rate": 3.099890703573033e-06,
      "loss": 0.7935,
      "step": 808220
    },
    {
      "epoch": 2.832644649822835,
      "grad_norm": 2.890625,
      "learning_rate": 3.0992416749093306e-06,
      "loss": 0.7701,
      "step": 808230
    },
    {
      "epoch": 2.8326796973297306,
      "grad_norm": 2.9375,
      "learning_rate": 3.098592646245629e-06,
      "loss": 0.8647,
      "step": 808240
    },
    {
      "epoch": 2.832714744836626,
      "grad_norm": 3.015625,
      "learning_rate": 3.097943617581927e-06,
      "loss": 0.8064,
      "step": 808250
    },
    {
      "epoch": 2.8327497923435216,
      "grad_norm": 2.734375,
      "learning_rate": 3.097294588918225e-06,
      "loss": 0.7853,
      "step": 808260
    },
    {
      "epoch": 2.8327848398504174,
      "grad_norm": 3.15625,
      "learning_rate": 3.096645560254523e-06,
      "loss": 0.8314,
      "step": 808270
    },
    {
      "epoch": 2.8328198873573127,
      "grad_norm": 2.71875,
      "learning_rate": 3.0959965315908215e-06,
      "loss": 0.7814,
      "step": 808280
    },
    {
      "epoch": 2.8328549348642085,
      "grad_norm": 2.84375,
      "learning_rate": 3.0953475029271195e-06,
      "loss": 0.7472,
      "step": 808290
    },
    {
      "epoch": 2.8328899823711042,
      "grad_norm": 2.9375,
      "learning_rate": 3.0946984742634175e-06,
      "loss": 0.832,
      "step": 808300
    },
    {
      "epoch": 2.8329250298779995,
      "grad_norm": 3.140625,
      "learning_rate": 3.094049445599716e-06,
      "loss": 0.7865,
      "step": 808310
    },
    {
      "epoch": 2.8329600773848953,
      "grad_norm": 3.34375,
      "learning_rate": 3.0934004169360135e-06,
      "loss": 0.8117,
      "step": 808320
    },
    {
      "epoch": 2.8329951248917906,
      "grad_norm": 2.765625,
      "learning_rate": 3.092751388272312e-06,
      "loss": 0.8337,
      "step": 808330
    },
    {
      "epoch": 2.8330301723986864,
      "grad_norm": 3.28125,
      "learning_rate": 3.09210235960861e-06,
      "loss": 0.8155,
      "step": 808340
    },
    {
      "epoch": 2.833065219905582,
      "grad_norm": 2.609375,
      "learning_rate": 3.091453330944908e-06,
      "loss": 0.7478,
      "step": 808350
    },
    {
      "epoch": 2.833100267412478,
      "grad_norm": 2.703125,
      "learning_rate": 3.0908043022812063e-06,
      "loss": 0.8437,
      "step": 808360
    },
    {
      "epoch": 2.833135314919373,
      "grad_norm": 3.125,
      "learning_rate": 3.0901552736175043e-06,
      "loss": 0.8808,
      "step": 808370
    },
    {
      "epoch": 2.833170362426269,
      "grad_norm": 2.96875,
      "learning_rate": 3.0895062449538023e-06,
      "loss": 0.8275,
      "step": 808380
    },
    {
      "epoch": 2.8332054099331643,
      "grad_norm": 2.8125,
      "learning_rate": 3.0888572162901003e-06,
      "loss": 0.8041,
      "step": 808390
    },
    {
      "epoch": 2.83324045744006,
      "grad_norm": 2.859375,
      "learning_rate": 3.0882081876263983e-06,
      "loss": 0.7399,
      "step": 808400
    },
    {
      "epoch": 2.833275504946956,
      "grad_norm": 3.15625,
      "learning_rate": 3.0875591589626963e-06,
      "loss": 0.7383,
      "step": 808410
    },
    {
      "epoch": 2.833310552453851,
      "grad_norm": 2.65625,
      "learning_rate": 3.0869101302989947e-06,
      "loss": 0.8407,
      "step": 808420
    },
    {
      "epoch": 2.833345599960747,
      "grad_norm": 2.8125,
      "learning_rate": 3.0862611016352927e-06,
      "loss": 0.8648,
      "step": 808430
    },
    {
      "epoch": 2.833380647467642,
      "grad_norm": 3.015625,
      "learning_rate": 3.0856120729715907e-06,
      "loss": 0.7977,
      "step": 808440
    },
    {
      "epoch": 2.833415694974538,
      "grad_norm": 3.359375,
      "learning_rate": 3.084963044307889e-06,
      "loss": 0.805,
      "step": 808450
    },
    {
      "epoch": 2.8334507424814337,
      "grad_norm": 2.46875,
      "learning_rate": 3.0843140156441867e-06,
      "loss": 0.7615,
      "step": 808460
    },
    {
      "epoch": 2.8334857899883295,
      "grad_norm": 2.984375,
      "learning_rate": 3.083664986980485e-06,
      "loss": 0.8191,
      "step": 808470
    },
    {
      "epoch": 2.8335208374952248,
      "grad_norm": 2.921875,
      "learning_rate": 3.083015958316783e-06,
      "loss": 0.7465,
      "step": 808480
    },
    {
      "epoch": 2.8335558850021205,
      "grad_norm": 3.078125,
      "learning_rate": 3.082366929653081e-06,
      "loss": 0.8029,
      "step": 808490
    },
    {
      "epoch": 2.833590932509016,
      "grad_norm": 2.96875,
      "learning_rate": 3.0817179009893796e-06,
      "loss": 0.857,
      "step": 808500
    },
    {
      "epoch": 2.8336259800159116,
      "grad_norm": 3.359375,
      "learning_rate": 3.0810688723256776e-06,
      "loss": 0.7711,
      "step": 808510
    },
    {
      "epoch": 2.8336610275228074,
      "grad_norm": 3.078125,
      "learning_rate": 3.0804198436619756e-06,
      "loss": 0.762,
      "step": 808520
    },
    {
      "epoch": 2.8336960750297027,
      "grad_norm": 2.5625,
      "learning_rate": 3.0797708149982736e-06,
      "loss": 0.7724,
      "step": 808530
    },
    {
      "epoch": 2.8337311225365984,
      "grad_norm": 2.578125,
      "learning_rate": 3.079121786334572e-06,
      "loss": 0.7462,
      "step": 808540
    },
    {
      "epoch": 2.8337661700434937,
      "grad_norm": 3.046875,
      "learning_rate": 3.07847275767087e-06,
      "loss": 0.7967,
      "step": 808550
    },
    {
      "epoch": 2.8338012175503895,
      "grad_norm": 2.59375,
      "learning_rate": 3.077823729007168e-06,
      "loss": 0.8269,
      "step": 808560
    },
    {
      "epoch": 2.8338362650572853,
      "grad_norm": 2.828125,
      "learning_rate": 3.0771747003434664e-06,
      "loss": 0.7563,
      "step": 808570
    },
    {
      "epoch": 2.833871312564181,
      "grad_norm": 2.53125,
      "learning_rate": 3.076525671679764e-06,
      "loss": 0.7758,
      "step": 808580
    },
    {
      "epoch": 2.8339063600710763,
      "grad_norm": 3.375,
      "learning_rate": 3.0758766430160624e-06,
      "loss": 0.8399,
      "step": 808590
    },
    {
      "epoch": 2.833941407577972,
      "grad_norm": 2.96875,
      "learning_rate": 3.0752276143523604e-06,
      "loss": 0.7596,
      "step": 808600
    },
    {
      "epoch": 2.8339764550848674,
      "grad_norm": 2.671875,
      "learning_rate": 3.0745785856886584e-06,
      "loss": 0.7145,
      "step": 808610
    },
    {
      "epoch": 2.834011502591763,
      "grad_norm": 2.875,
      "learning_rate": 3.073929557024957e-06,
      "loss": 0.7994,
      "step": 808620
    },
    {
      "epoch": 2.834046550098659,
      "grad_norm": 3.359375,
      "learning_rate": 3.073280528361255e-06,
      "loss": 0.7784,
      "step": 808630
    },
    {
      "epoch": 2.8340815976055542,
      "grad_norm": 3.078125,
      "learning_rate": 3.072631499697553e-06,
      "loss": 0.8449,
      "step": 808640
    },
    {
      "epoch": 2.83411664511245,
      "grad_norm": 2.859375,
      "learning_rate": 3.071982471033851e-06,
      "loss": 0.8295,
      "step": 808650
    },
    {
      "epoch": 2.8341516926193453,
      "grad_norm": 3.703125,
      "learning_rate": 3.071333442370149e-06,
      "loss": 0.7647,
      "step": 808660
    },
    {
      "epoch": 2.834186740126241,
      "grad_norm": 2.8125,
      "learning_rate": 3.070684413706447e-06,
      "loss": 0.804,
      "step": 808670
    },
    {
      "epoch": 2.834221787633137,
      "grad_norm": 3.140625,
      "learning_rate": 3.0700353850427453e-06,
      "loss": 0.8338,
      "step": 808680
    },
    {
      "epoch": 2.8342568351400326,
      "grad_norm": 2.671875,
      "learning_rate": 3.0693863563790433e-06,
      "loss": 0.7772,
      "step": 808690
    },
    {
      "epoch": 2.834291882646928,
      "grad_norm": 3.15625,
      "learning_rate": 3.0687373277153413e-06,
      "loss": 0.8447,
      "step": 808700
    },
    {
      "epoch": 2.8343269301538236,
      "grad_norm": 2.359375,
      "learning_rate": 3.0680882990516397e-06,
      "loss": 0.7811,
      "step": 808710
    },
    {
      "epoch": 2.834361977660719,
      "grad_norm": 2.390625,
      "learning_rate": 3.0674392703879373e-06,
      "loss": 0.8261,
      "step": 808720
    },
    {
      "epoch": 2.8343970251676147,
      "grad_norm": 2.75,
      "learning_rate": 3.0667902417242357e-06,
      "loss": 0.7562,
      "step": 808730
    },
    {
      "epoch": 2.8344320726745105,
      "grad_norm": 2.546875,
      "learning_rate": 3.0661412130605337e-06,
      "loss": 0.7869,
      "step": 808740
    },
    {
      "epoch": 2.834467120181406,
      "grad_norm": 2.84375,
      "learning_rate": 3.0654921843968317e-06,
      "loss": 0.7487,
      "step": 808750
    },
    {
      "epoch": 2.8345021676883015,
      "grad_norm": 2.640625,
      "learning_rate": 3.06484315573313e-06,
      "loss": 0.8348,
      "step": 808760
    },
    {
      "epoch": 2.834537215195197,
      "grad_norm": 2.9375,
      "learning_rate": 3.064194127069428e-06,
      "loss": 0.8459,
      "step": 808770
    },
    {
      "epoch": 2.8345722627020926,
      "grad_norm": 2.6875,
      "learning_rate": 3.063545098405726e-06,
      "loss": 0.8337,
      "step": 808780
    },
    {
      "epoch": 2.8346073102089884,
      "grad_norm": 2.921875,
      "learning_rate": 3.062896069742024e-06,
      "loss": 0.7494,
      "step": 808790
    },
    {
      "epoch": 2.834642357715884,
      "grad_norm": 3.25,
      "learning_rate": 3.0622470410783225e-06,
      "loss": 0.8377,
      "step": 808800
    },
    {
      "epoch": 2.8346774052227794,
      "grad_norm": 2.765625,
      "learning_rate": 3.06159801241462e-06,
      "loss": 0.8838,
      "step": 808810
    },
    {
      "epoch": 2.834712452729675,
      "grad_norm": 2.921875,
      "learning_rate": 3.0609489837509185e-06,
      "loss": 0.8178,
      "step": 808820
    },
    {
      "epoch": 2.8347475002365705,
      "grad_norm": 2.59375,
      "learning_rate": 3.060299955087217e-06,
      "loss": 0.7568,
      "step": 808830
    },
    {
      "epoch": 2.8347825477434663,
      "grad_norm": 2.8125,
      "learning_rate": 3.0596509264235145e-06,
      "loss": 0.7482,
      "step": 808840
    },
    {
      "epoch": 2.834817595250362,
      "grad_norm": 2.765625,
      "learning_rate": 3.059001897759813e-06,
      "loss": 0.7972,
      "step": 808850
    },
    {
      "epoch": 2.8348526427572573,
      "grad_norm": 3.1875,
      "learning_rate": 3.058352869096111e-06,
      "loss": 0.8249,
      "step": 808860
    },
    {
      "epoch": 2.834887690264153,
      "grad_norm": 3.015625,
      "learning_rate": 3.057703840432409e-06,
      "loss": 0.7996,
      "step": 808870
    },
    {
      "epoch": 2.8349227377710484,
      "grad_norm": 2.8125,
      "learning_rate": 3.057054811768707e-06,
      "loss": 0.7671,
      "step": 808880
    },
    {
      "epoch": 2.834957785277944,
      "grad_norm": 2.8125,
      "learning_rate": 3.0564057831050054e-06,
      "loss": 0.7659,
      "step": 808890
    },
    {
      "epoch": 2.83499283278484,
      "grad_norm": 2.90625,
      "learning_rate": 3.0557567544413034e-06,
      "loss": 0.7062,
      "step": 808900
    },
    {
      "epoch": 2.8350278802917357,
      "grad_norm": 2.875,
      "learning_rate": 3.0551077257776014e-06,
      "loss": 0.741,
      "step": 808910
    },
    {
      "epoch": 2.835062927798631,
      "grad_norm": 2.546875,
      "learning_rate": 3.0544586971138994e-06,
      "loss": 0.7676,
      "step": 808920
    },
    {
      "epoch": 2.8350979753055268,
      "grad_norm": 2.609375,
      "learning_rate": 3.0538096684501974e-06,
      "loss": 0.8061,
      "step": 808930
    },
    {
      "epoch": 2.835133022812422,
      "grad_norm": 3.4375,
      "learning_rate": 3.053160639786496e-06,
      "loss": 0.8333,
      "step": 808940
    },
    {
      "epoch": 2.835168070319318,
      "grad_norm": 2.640625,
      "learning_rate": 3.052511611122794e-06,
      "loss": 0.7767,
      "step": 808950
    },
    {
      "epoch": 2.8352031178262136,
      "grad_norm": 2.75,
      "learning_rate": 3.051862582459092e-06,
      "loss": 0.8633,
      "step": 808960
    },
    {
      "epoch": 2.835238165333109,
      "grad_norm": 2.5,
      "learning_rate": 3.0512135537953902e-06,
      "loss": 0.7487,
      "step": 808970
    },
    {
      "epoch": 2.8352732128400047,
      "grad_norm": 2.53125,
      "learning_rate": 3.050564525131688e-06,
      "loss": 0.8009,
      "step": 808980
    },
    {
      "epoch": 2.8353082603469,
      "grad_norm": 2.65625,
      "learning_rate": 3.0499154964679862e-06,
      "loss": 0.8719,
      "step": 808990
    },
    {
      "epoch": 2.8353433078537957,
      "grad_norm": 2.390625,
      "learning_rate": 3.0492664678042842e-06,
      "loss": 0.7635,
      "step": 809000
    },
    {
      "epoch": 2.8353783553606915,
      "grad_norm": 2.578125,
      "learning_rate": 3.0486174391405822e-06,
      "loss": 0.75,
      "step": 809010
    },
    {
      "epoch": 2.8354134028675873,
      "grad_norm": 2.921875,
      "learning_rate": 3.0479684104768807e-06,
      "loss": 0.8509,
      "step": 809020
    },
    {
      "epoch": 2.8354484503744826,
      "grad_norm": 2.828125,
      "learning_rate": 3.0473193818131787e-06,
      "loss": 0.8026,
      "step": 809030
    },
    {
      "epoch": 2.8354834978813783,
      "grad_norm": 3.015625,
      "learning_rate": 3.0466703531494767e-06,
      "loss": 0.8662,
      "step": 809040
    },
    {
      "epoch": 2.8355185453882736,
      "grad_norm": 3.109375,
      "learning_rate": 3.0460213244857747e-06,
      "loss": 0.801,
      "step": 809050
    },
    {
      "epoch": 2.8355535928951694,
      "grad_norm": 2.46875,
      "learning_rate": 3.045372295822073e-06,
      "loss": 0.8192,
      "step": 809060
    },
    {
      "epoch": 2.835588640402065,
      "grad_norm": 2.796875,
      "learning_rate": 3.0447232671583707e-06,
      "loss": 0.787,
      "step": 809070
    },
    {
      "epoch": 2.8356236879089605,
      "grad_norm": 2.9375,
      "learning_rate": 3.044074238494669e-06,
      "loss": 0.7374,
      "step": 809080
    },
    {
      "epoch": 2.8356587354158562,
      "grad_norm": 3.171875,
      "learning_rate": 3.043425209830967e-06,
      "loss": 0.7909,
      "step": 809090
    },
    {
      "epoch": 2.8356937829227515,
      "grad_norm": 2.953125,
      "learning_rate": 3.042776181167265e-06,
      "loss": 0.838,
      "step": 809100
    },
    {
      "epoch": 2.8357288304296473,
      "grad_norm": 2.75,
      "learning_rate": 3.0421271525035635e-06,
      "loss": 0.755,
      "step": 809110
    },
    {
      "epoch": 2.835763877936543,
      "grad_norm": 3.15625,
      "learning_rate": 3.0414781238398615e-06,
      "loss": 0.8346,
      "step": 809120
    },
    {
      "epoch": 2.835798925443439,
      "grad_norm": 3.359375,
      "learning_rate": 3.0408290951761595e-06,
      "loss": 0.8253,
      "step": 809130
    },
    {
      "epoch": 2.835833972950334,
      "grad_norm": 3.0,
      "learning_rate": 3.0401800665124575e-06,
      "loss": 0.8318,
      "step": 809140
    },
    {
      "epoch": 2.83586902045723,
      "grad_norm": 2.875,
      "learning_rate": 3.0395310378487555e-06,
      "loss": 0.7845,
      "step": 809150
    },
    {
      "epoch": 2.835904067964125,
      "grad_norm": 3.03125,
      "learning_rate": 3.038882009185054e-06,
      "loss": 0.7854,
      "step": 809160
    },
    {
      "epoch": 2.835939115471021,
      "grad_norm": 3.03125,
      "learning_rate": 3.038232980521352e-06,
      "loss": 0.821,
      "step": 809170
    },
    {
      "epoch": 2.8359741629779167,
      "grad_norm": 3.140625,
      "learning_rate": 3.03758395185765e-06,
      "loss": 0.743,
      "step": 809180
    },
    {
      "epoch": 2.836009210484812,
      "grad_norm": 3.046875,
      "learning_rate": 3.036934923193948e-06,
      "loss": 0.8315,
      "step": 809190
    },
    {
      "epoch": 2.836044257991708,
      "grad_norm": 2.671875,
      "learning_rate": 3.0362858945302464e-06,
      "loss": 0.8194,
      "step": 809200
    },
    {
      "epoch": 2.836079305498603,
      "grad_norm": 2.796875,
      "learning_rate": 3.035636865866544e-06,
      "loss": 0.757,
      "step": 809210
    },
    {
      "epoch": 2.836114353005499,
      "grad_norm": 2.625,
      "learning_rate": 3.0349878372028424e-06,
      "loss": 0.7484,
      "step": 809220
    },
    {
      "epoch": 2.8361494005123946,
      "grad_norm": 2.65625,
      "learning_rate": 3.0343388085391408e-06,
      "loss": 0.7592,
      "step": 809230
    },
    {
      "epoch": 2.8361844480192904,
      "grad_norm": 3.109375,
      "learning_rate": 3.0336897798754384e-06,
      "loss": 0.854,
      "step": 809240
    },
    {
      "epoch": 2.8362194955261857,
      "grad_norm": 3.09375,
      "learning_rate": 3.0330407512117368e-06,
      "loss": 0.8433,
      "step": 809250
    },
    {
      "epoch": 2.8362545430330814,
      "grad_norm": 2.90625,
      "learning_rate": 3.0323917225480348e-06,
      "loss": 0.7734,
      "step": 809260
    },
    {
      "epoch": 2.8362895905399768,
      "grad_norm": 3.3125,
      "learning_rate": 3.0317426938843328e-06,
      "loss": 0.8746,
      "step": 809270
    },
    {
      "epoch": 2.8363246380468725,
      "grad_norm": 2.859375,
      "learning_rate": 3.0310936652206308e-06,
      "loss": 0.8218,
      "step": 809280
    },
    {
      "epoch": 2.8363596855537683,
      "grad_norm": 3.21875,
      "learning_rate": 3.030444636556929e-06,
      "loss": 0.811,
      "step": 809290
    },
    {
      "epoch": 2.8363947330606636,
      "grad_norm": 2.90625,
      "learning_rate": 3.029795607893227e-06,
      "loss": 0.8113,
      "step": 809300
    },
    {
      "epoch": 2.8364297805675593,
      "grad_norm": 2.828125,
      "learning_rate": 3.029146579229525e-06,
      "loss": 0.746,
      "step": 809310
    },
    {
      "epoch": 2.8364648280744547,
      "grad_norm": 2.75,
      "learning_rate": 3.0284975505658236e-06,
      "loss": 0.7843,
      "step": 809320
    },
    {
      "epoch": 2.8364998755813504,
      "grad_norm": 2.828125,
      "learning_rate": 3.027848521902121e-06,
      "loss": 0.8158,
      "step": 809330
    },
    {
      "epoch": 2.836534923088246,
      "grad_norm": 3.078125,
      "learning_rate": 3.0271994932384196e-06,
      "loss": 0.8617,
      "step": 809340
    },
    {
      "epoch": 2.836569970595142,
      "grad_norm": 3.03125,
      "learning_rate": 3.0265504645747176e-06,
      "loss": 0.8758,
      "step": 809350
    },
    {
      "epoch": 2.8366050181020372,
      "grad_norm": 2.625,
      "learning_rate": 3.0259014359110156e-06,
      "loss": 0.8498,
      "step": 809360
    },
    {
      "epoch": 2.836640065608933,
      "grad_norm": 2.625,
      "learning_rate": 3.025252407247314e-06,
      "loss": 0.7957,
      "step": 809370
    },
    {
      "epoch": 2.8366751131158283,
      "grad_norm": 3.125,
      "learning_rate": 3.024603378583612e-06,
      "loss": 0.744,
      "step": 809380
    },
    {
      "epoch": 2.836710160622724,
      "grad_norm": 2.96875,
      "learning_rate": 3.02395434991991e-06,
      "loss": 0.7757,
      "step": 809390
    },
    {
      "epoch": 2.83674520812962,
      "grad_norm": 2.90625,
      "learning_rate": 3.023305321256208e-06,
      "loss": 0.8049,
      "step": 809400
    },
    {
      "epoch": 2.836780255636515,
      "grad_norm": 3.125,
      "learning_rate": 3.022656292592506e-06,
      "loss": 0.8484,
      "step": 809410
    },
    {
      "epoch": 2.836815303143411,
      "grad_norm": 3.140625,
      "learning_rate": 3.0220072639288045e-06,
      "loss": 0.7983,
      "step": 809420
    },
    {
      "epoch": 2.836850350650306,
      "grad_norm": 2.8125,
      "learning_rate": 3.0213582352651025e-06,
      "loss": 0.7935,
      "step": 809430
    },
    {
      "epoch": 2.836885398157202,
      "grad_norm": 2.65625,
      "learning_rate": 3.0207092066014005e-06,
      "loss": 0.8113,
      "step": 809440
    },
    {
      "epoch": 2.8369204456640977,
      "grad_norm": 2.8125,
      "learning_rate": 3.0200601779376985e-06,
      "loss": 0.7817,
      "step": 809450
    },
    {
      "epoch": 2.8369554931709935,
      "grad_norm": 3.234375,
      "learning_rate": 3.019411149273997e-06,
      "loss": 0.7803,
      "step": 809460
    },
    {
      "epoch": 2.836990540677889,
      "grad_norm": 3.046875,
      "learning_rate": 3.0187621206102945e-06,
      "loss": 0.8047,
      "step": 809470
    },
    {
      "epoch": 2.8370255881847846,
      "grad_norm": 2.890625,
      "learning_rate": 3.018113091946593e-06,
      "loss": 0.8585,
      "step": 809480
    },
    {
      "epoch": 2.83706063569168,
      "grad_norm": 3.046875,
      "learning_rate": 3.0174640632828913e-06,
      "loss": 0.8541,
      "step": 809490
    },
    {
      "epoch": 2.8370956831985756,
      "grad_norm": 2.953125,
      "learning_rate": 3.016815034619189e-06,
      "loss": 0.7869,
      "step": 809500
    },
    {
      "epoch": 2.8371307307054714,
      "grad_norm": 2.5,
      "learning_rate": 3.0161660059554873e-06,
      "loss": 0.8832,
      "step": 809510
    },
    {
      "epoch": 2.8371657782123667,
      "grad_norm": 2.890625,
      "learning_rate": 3.0155169772917853e-06,
      "loss": 0.8298,
      "step": 809520
    },
    {
      "epoch": 2.8372008257192625,
      "grad_norm": 3.015625,
      "learning_rate": 3.0148679486280833e-06,
      "loss": 0.7723,
      "step": 809530
    },
    {
      "epoch": 2.8372358732261582,
      "grad_norm": 2.9375,
      "learning_rate": 3.0142189199643813e-06,
      "loss": 0.8427,
      "step": 809540
    },
    {
      "epoch": 2.8372709207330535,
      "grad_norm": 3.15625,
      "learning_rate": 3.0135698913006797e-06,
      "loss": 0.8381,
      "step": 809550
    },
    {
      "epoch": 2.8373059682399493,
      "grad_norm": 3.03125,
      "learning_rate": 3.0129208626369777e-06,
      "loss": 0.8389,
      "step": 809560
    },
    {
      "epoch": 2.837341015746845,
      "grad_norm": 2.53125,
      "learning_rate": 3.0122718339732757e-06,
      "loss": 0.7617,
      "step": 809570
    },
    {
      "epoch": 2.8373760632537404,
      "grad_norm": 2.75,
      "learning_rate": 3.0116228053095737e-06,
      "loss": 0.7128,
      "step": 809580
    },
    {
      "epoch": 2.837411110760636,
      "grad_norm": 2.484375,
      "learning_rate": 3.0109737766458717e-06,
      "loss": 0.8341,
      "step": 809590
    },
    {
      "epoch": 2.8374461582675314,
      "grad_norm": 3.34375,
      "learning_rate": 3.01032474798217e-06,
      "loss": 0.8419,
      "step": 809600
    },
    {
      "epoch": 2.837481205774427,
      "grad_norm": 2.859375,
      "learning_rate": 3.009675719318468e-06,
      "loss": 0.8086,
      "step": 809610
    },
    {
      "epoch": 2.837516253281323,
      "grad_norm": 2.984375,
      "learning_rate": 3.009026690654766e-06,
      "loss": 0.8454,
      "step": 809620
    },
    {
      "epoch": 2.8375513007882183,
      "grad_norm": 3.0625,
      "learning_rate": 3.0083776619910646e-06,
      "loss": 0.8161,
      "step": 809630
    },
    {
      "epoch": 2.837586348295114,
      "grad_norm": 2.953125,
      "learning_rate": 3.007728633327362e-06,
      "loss": 0.8329,
      "step": 809640
    },
    {
      "epoch": 2.83762139580201,
      "grad_norm": 3.03125,
      "learning_rate": 3.0070796046636606e-06,
      "loss": 0.8339,
      "step": 809650
    },
    {
      "epoch": 2.837656443308905,
      "grad_norm": 2.828125,
      "learning_rate": 3.0064305759999586e-06,
      "loss": 0.7142,
      "step": 809660
    },
    {
      "epoch": 2.837691490815801,
      "grad_norm": 3.015625,
      "learning_rate": 3.0057815473362566e-06,
      "loss": 0.7583,
      "step": 809670
    },
    {
      "epoch": 2.8377265383226966,
      "grad_norm": 2.375,
      "learning_rate": 3.0051325186725546e-06,
      "loss": 0.7985,
      "step": 809680
    },
    {
      "epoch": 2.837761585829592,
      "grad_norm": 3.265625,
      "learning_rate": 3.004483490008853e-06,
      "loss": 0.7845,
      "step": 809690
    },
    {
      "epoch": 2.8377966333364877,
      "grad_norm": 2.75,
      "learning_rate": 3.003834461345151e-06,
      "loss": 0.7806,
      "step": 809700
    },
    {
      "epoch": 2.837831680843383,
      "grad_norm": 2.6875,
      "learning_rate": 3.003185432681449e-06,
      "loss": 0.8593,
      "step": 809710
    },
    {
      "epoch": 2.8378667283502788,
      "grad_norm": 2.125,
      "learning_rate": 3.0025364040177474e-06,
      "loss": 0.7288,
      "step": 809720
    },
    {
      "epoch": 2.8379017758571745,
      "grad_norm": 2.9375,
      "learning_rate": 3.001887375354045e-06,
      "loss": 0.8493,
      "step": 809730
    },
    {
      "epoch": 2.8379368233640703,
      "grad_norm": 3.109375,
      "learning_rate": 3.0012383466903434e-06,
      "loss": 0.8267,
      "step": 809740
    },
    {
      "epoch": 2.8379718708709656,
      "grad_norm": 2.71875,
      "learning_rate": 3.0005893180266414e-06,
      "loss": 0.7241,
      "step": 809750
    },
    {
      "epoch": 2.8380069183778613,
      "grad_norm": 2.921875,
      "learning_rate": 2.9999402893629394e-06,
      "loss": 0.7907,
      "step": 809760
    },
    {
      "epoch": 2.8380419658847567,
      "grad_norm": 3.296875,
      "learning_rate": 2.999291260699238e-06,
      "loss": 0.8413,
      "step": 809770
    },
    {
      "epoch": 2.8380770133916524,
      "grad_norm": 2.734375,
      "learning_rate": 2.998642232035536e-06,
      "loss": 0.7459,
      "step": 809780
    },
    {
      "epoch": 2.838112060898548,
      "grad_norm": 2.921875,
      "learning_rate": 2.997993203371834e-06,
      "loss": 0.7803,
      "step": 809790
    },
    {
      "epoch": 2.8381471084054435,
      "grad_norm": 2.609375,
      "learning_rate": 2.997344174708132e-06,
      "loss": 0.7581,
      "step": 809800
    },
    {
      "epoch": 2.8381821559123392,
      "grad_norm": 3.03125,
      "learning_rate": 2.9966951460444303e-06,
      "loss": 0.8068,
      "step": 809810
    },
    {
      "epoch": 2.8382172034192346,
      "grad_norm": 2.640625,
      "learning_rate": 2.996046117380728e-06,
      "loss": 0.7719,
      "step": 809820
    },
    {
      "epoch": 2.8382522509261303,
      "grad_norm": 2.671875,
      "learning_rate": 2.9953970887170263e-06,
      "loss": 0.7831,
      "step": 809830
    },
    {
      "epoch": 2.838287298433026,
      "grad_norm": 2.78125,
      "learning_rate": 2.9947480600533243e-06,
      "loss": 0.881,
      "step": 809840
    },
    {
      "epoch": 2.838322345939922,
      "grad_norm": 2.859375,
      "learning_rate": 2.9940990313896223e-06,
      "loss": 0.8504,
      "step": 809850
    },
    {
      "epoch": 2.838357393446817,
      "grad_norm": 2.78125,
      "learning_rate": 2.9934500027259207e-06,
      "loss": 0.757,
      "step": 809860
    },
    {
      "epoch": 2.838392440953713,
      "grad_norm": 3.03125,
      "learning_rate": 2.9928009740622187e-06,
      "loss": 0.8994,
      "step": 809870
    },
    {
      "epoch": 2.838427488460608,
      "grad_norm": 3.78125,
      "learning_rate": 2.9921519453985167e-06,
      "loss": 0.7408,
      "step": 809880
    },
    {
      "epoch": 2.838462535967504,
      "grad_norm": 2.78125,
      "learning_rate": 2.991502916734815e-06,
      "loss": 0.7873,
      "step": 809890
    },
    {
      "epoch": 2.8384975834743997,
      "grad_norm": 3.125,
      "learning_rate": 2.9908538880711127e-06,
      "loss": 0.8557,
      "step": 809900
    },
    {
      "epoch": 2.838532630981295,
      "grad_norm": 2.609375,
      "learning_rate": 2.990204859407411e-06,
      "loss": 0.8088,
      "step": 809910
    },
    {
      "epoch": 2.838567678488191,
      "grad_norm": 2.90625,
      "learning_rate": 2.989555830743709e-06,
      "loss": 0.8224,
      "step": 809920
    },
    {
      "epoch": 2.838602725995086,
      "grad_norm": 2.8125,
      "learning_rate": 2.988906802080007e-06,
      "loss": 0.7847,
      "step": 809930
    },
    {
      "epoch": 2.838637773501982,
      "grad_norm": 2.984375,
      "learning_rate": 2.988257773416305e-06,
      "loss": 0.8356,
      "step": 809940
    },
    {
      "epoch": 2.8386728210088776,
      "grad_norm": 2.890625,
      "learning_rate": 2.9876087447526035e-06,
      "loss": 0.8554,
      "step": 809950
    },
    {
      "epoch": 2.8387078685157734,
      "grad_norm": 3.328125,
      "learning_rate": 2.9869597160889015e-06,
      "loss": 0.7673,
      "step": 809960
    },
    {
      "epoch": 2.8387429160226687,
      "grad_norm": 2.96875,
      "learning_rate": 2.9863106874251995e-06,
      "loss": 0.8186,
      "step": 809970
    },
    {
      "epoch": 2.8387779635295645,
      "grad_norm": 2.640625,
      "learning_rate": 2.985661658761498e-06,
      "loss": 0.8153,
      "step": 809980
    },
    {
      "epoch": 2.8388130110364598,
      "grad_norm": 2.96875,
      "learning_rate": 2.9850126300977955e-06,
      "loss": 0.7558,
      "step": 809990
    },
    {
      "epoch": 2.8388480585433555,
      "grad_norm": 3.375,
      "learning_rate": 2.984363601434094e-06,
      "loss": 0.8924,
      "step": 810000
    },
    {
      "epoch": 2.8388480585433555,
      "eval_loss": 0.7509428262710571,
      "eval_runtime": 554.1643,
      "eval_samples_per_second": 686.504,
      "eval_steps_per_second": 57.209,
      "step": 810000
    },
    {
      "epoch": 2.8388831060502513,
      "grad_norm": 3.078125,
      "learning_rate": 2.983714572770392e-06,
      "loss": 0.8088,
      "step": 810010
    },
    {
      "epoch": 2.8389181535571466,
      "grad_norm": 3.25,
      "learning_rate": 2.98306554410669e-06,
      "loss": 0.8613,
      "step": 810020
    },
    {
      "epoch": 2.8389532010640424,
      "grad_norm": 2.671875,
      "learning_rate": 2.9824165154429884e-06,
      "loss": 0.7761,
      "step": 810030
    },
    {
      "epoch": 2.8389882485709377,
      "grad_norm": 3.328125,
      "learning_rate": 2.9817674867792864e-06,
      "loss": 0.7674,
      "step": 810040
    },
    {
      "epoch": 2.8390232960778334,
      "grad_norm": 2.515625,
      "learning_rate": 2.9811184581155844e-06,
      "loss": 0.8157,
      "step": 810050
    },
    {
      "epoch": 2.839058343584729,
      "grad_norm": 2.84375,
      "learning_rate": 2.9804694294518824e-06,
      "loss": 0.8211,
      "step": 810060
    },
    {
      "epoch": 2.839093391091625,
      "grad_norm": 2.78125,
      "learning_rate": 2.979820400788181e-06,
      "loss": 0.7887,
      "step": 810070
    },
    {
      "epoch": 2.8391284385985203,
      "grad_norm": 2.796875,
      "learning_rate": 2.9791713721244784e-06,
      "loss": 0.7862,
      "step": 810080
    },
    {
      "epoch": 2.839163486105416,
      "grad_norm": 3.140625,
      "learning_rate": 2.978522343460777e-06,
      "loss": 0.8522,
      "step": 810090
    },
    {
      "epoch": 2.8391985336123113,
      "grad_norm": 3.3125,
      "learning_rate": 2.977873314797075e-06,
      "loss": 0.8456,
      "step": 810100
    },
    {
      "epoch": 2.839233581119207,
      "grad_norm": 3.609375,
      "learning_rate": 2.977224286133373e-06,
      "loss": 0.7666,
      "step": 810110
    },
    {
      "epoch": 2.839268628626103,
      "grad_norm": 2.875,
      "learning_rate": 2.9765752574696712e-06,
      "loss": 0.8067,
      "step": 810120
    },
    {
      "epoch": 2.839303676132998,
      "grad_norm": 3.28125,
      "learning_rate": 2.9759262288059692e-06,
      "loss": 0.8481,
      "step": 810130
    },
    {
      "epoch": 2.839338723639894,
      "grad_norm": 2.890625,
      "learning_rate": 2.9752772001422672e-06,
      "loss": 0.7883,
      "step": 810140
    },
    {
      "epoch": 2.8393737711467892,
      "grad_norm": 2.9375,
      "learning_rate": 2.9746281714785652e-06,
      "loss": 0.7248,
      "step": 810150
    },
    {
      "epoch": 2.839408818653685,
      "grad_norm": 2.859375,
      "learning_rate": 2.9739791428148632e-06,
      "loss": 0.8703,
      "step": 810160
    },
    {
      "epoch": 2.8394438661605808,
      "grad_norm": 2.75,
      "learning_rate": 2.9733301141511617e-06,
      "loss": 0.8426,
      "step": 810170
    },
    {
      "epoch": 2.8394789136674765,
      "grad_norm": 2.890625,
      "learning_rate": 2.9726810854874597e-06,
      "loss": 0.8064,
      "step": 810180
    },
    {
      "epoch": 2.839513961174372,
      "grad_norm": 2.9375,
      "learning_rate": 2.9720320568237577e-06,
      "loss": 0.7946,
      "step": 810190
    },
    {
      "epoch": 2.8395490086812676,
      "grad_norm": 2.921875,
      "learning_rate": 2.9713830281600557e-06,
      "loss": 0.8775,
      "step": 810200
    },
    {
      "epoch": 2.839584056188163,
      "grad_norm": 3.09375,
      "learning_rate": 2.970733999496354e-06,
      "loss": 0.8848,
      "step": 810210
    },
    {
      "epoch": 2.8396191036950587,
      "grad_norm": 3.046875,
      "learning_rate": 2.9700849708326517e-06,
      "loss": 0.7763,
      "step": 810220
    },
    {
      "epoch": 2.8396541512019544,
      "grad_norm": 3.09375,
      "learning_rate": 2.96943594216895e-06,
      "loss": 0.8819,
      "step": 810230
    },
    {
      "epoch": 2.8396891987088497,
      "grad_norm": 3.15625,
      "learning_rate": 2.9687869135052485e-06,
      "loss": 0.828,
      "step": 810240
    },
    {
      "epoch": 2.8397242462157455,
      "grad_norm": 2.84375,
      "learning_rate": 2.968137884841546e-06,
      "loss": 0.8048,
      "step": 810250
    },
    {
      "epoch": 2.839759293722641,
      "grad_norm": 2.734375,
      "learning_rate": 2.9674888561778445e-06,
      "loss": 0.7667,
      "step": 810260
    },
    {
      "epoch": 2.8397943412295366,
      "grad_norm": 2.90625,
      "learning_rate": 2.9668398275141425e-06,
      "loss": 0.8596,
      "step": 810270
    },
    {
      "epoch": 2.8398293887364323,
      "grad_norm": 3.296875,
      "learning_rate": 2.9661907988504405e-06,
      "loss": 0.84,
      "step": 810280
    },
    {
      "epoch": 2.839864436243328,
      "grad_norm": 2.625,
      "learning_rate": 2.9655417701867385e-06,
      "loss": 0.7712,
      "step": 810290
    },
    {
      "epoch": 2.8398994837502234,
      "grad_norm": 3.40625,
      "learning_rate": 2.964892741523037e-06,
      "loss": 0.7438,
      "step": 810300
    },
    {
      "epoch": 2.839934531257119,
      "grad_norm": 2.3125,
      "learning_rate": 2.964243712859335e-06,
      "loss": 0.7725,
      "step": 810310
    },
    {
      "epoch": 2.8399695787640145,
      "grad_norm": 2.984375,
      "learning_rate": 2.963594684195633e-06,
      "loss": 0.7495,
      "step": 810320
    },
    {
      "epoch": 2.84000462627091,
      "grad_norm": 2.625,
      "learning_rate": 2.962945655531931e-06,
      "loss": 0.7431,
      "step": 810330
    },
    {
      "epoch": 2.840039673777806,
      "grad_norm": 3.203125,
      "learning_rate": 2.962296626868229e-06,
      "loss": 0.8921,
      "step": 810340
    },
    {
      "epoch": 2.8400747212847013,
      "grad_norm": 2.640625,
      "learning_rate": 2.9616475982045274e-06,
      "loss": 0.7513,
      "step": 810350
    },
    {
      "epoch": 2.840109768791597,
      "grad_norm": 2.703125,
      "learning_rate": 2.9609985695408254e-06,
      "loss": 0.7403,
      "step": 810360
    },
    {
      "epoch": 2.8401448162984924,
      "grad_norm": 3.140625,
      "learning_rate": 2.9603495408771234e-06,
      "loss": 0.812,
      "step": 810370
    },
    {
      "epoch": 2.840179863805388,
      "grad_norm": 3.203125,
      "learning_rate": 2.9597005122134218e-06,
      "loss": 0.7436,
      "step": 810380
    },
    {
      "epoch": 2.840214911312284,
      "grad_norm": 2.84375,
      "learning_rate": 2.9590514835497194e-06,
      "loss": 0.7963,
      "step": 810390
    },
    {
      "epoch": 2.8402499588191796,
      "grad_norm": 2.53125,
      "learning_rate": 2.9584024548860178e-06,
      "loss": 0.8294,
      "step": 810400
    },
    {
      "epoch": 2.840285006326075,
      "grad_norm": 2.78125,
      "learning_rate": 2.9577534262223158e-06,
      "loss": 0.7629,
      "step": 810410
    },
    {
      "epoch": 2.8403200538329707,
      "grad_norm": 2.875,
      "learning_rate": 2.9571043975586138e-06,
      "loss": 0.7373,
      "step": 810420
    },
    {
      "epoch": 2.840355101339866,
      "grad_norm": 3.21875,
      "learning_rate": 2.956455368894912e-06,
      "loss": 0.7837,
      "step": 810430
    },
    {
      "epoch": 2.8403901488467618,
      "grad_norm": 2.6875,
      "learning_rate": 2.95580634023121e-06,
      "loss": 0.8504,
      "step": 810440
    },
    {
      "epoch": 2.8404251963536575,
      "grad_norm": 2.890625,
      "learning_rate": 2.955157311567508e-06,
      "loss": 0.755,
      "step": 810450
    },
    {
      "epoch": 2.840460243860553,
      "grad_norm": 3.0,
      "learning_rate": 2.954508282903806e-06,
      "loss": 0.7123,
      "step": 810460
    },
    {
      "epoch": 2.8404952913674486,
      "grad_norm": 3.03125,
      "learning_rate": 2.9538592542401046e-06,
      "loss": 0.7422,
      "step": 810470
    },
    {
      "epoch": 2.840530338874344,
      "grad_norm": 3.3125,
      "learning_rate": 2.953210225576402e-06,
      "loss": 0.7656,
      "step": 810480
    },
    {
      "epoch": 2.8405653863812397,
      "grad_norm": 2.96875,
      "learning_rate": 2.9525611969127006e-06,
      "loss": 0.8232,
      "step": 810490
    },
    {
      "epoch": 2.8406004338881354,
      "grad_norm": 2.9375,
      "learning_rate": 2.951912168248999e-06,
      "loss": 0.7752,
      "step": 810500
    },
    {
      "epoch": 2.840635481395031,
      "grad_norm": 2.671875,
      "learning_rate": 2.9512631395852966e-06,
      "loss": 0.7433,
      "step": 810510
    },
    {
      "epoch": 2.8406705289019265,
      "grad_norm": 2.859375,
      "learning_rate": 2.950614110921595e-06,
      "loss": 0.794,
      "step": 810520
    },
    {
      "epoch": 2.8407055764088223,
      "grad_norm": 3.015625,
      "learning_rate": 2.949965082257893e-06,
      "loss": 0.7987,
      "step": 810530
    },
    {
      "epoch": 2.8407406239157176,
      "grad_norm": 2.765625,
      "learning_rate": 2.949316053594191e-06,
      "loss": 0.8088,
      "step": 810540
    },
    {
      "epoch": 2.8407756714226133,
      "grad_norm": 2.625,
      "learning_rate": 2.948667024930489e-06,
      "loss": 0.8103,
      "step": 810550
    },
    {
      "epoch": 2.840810718929509,
      "grad_norm": 2.765625,
      "learning_rate": 2.9480179962667875e-06,
      "loss": 0.8694,
      "step": 810560
    },
    {
      "epoch": 2.8408457664364044,
      "grad_norm": 2.984375,
      "learning_rate": 2.9473689676030855e-06,
      "loss": 0.7851,
      "step": 810570
    },
    {
      "epoch": 2.8408808139433,
      "grad_norm": 2.921875,
      "learning_rate": 2.9467199389393835e-06,
      "loss": 0.845,
      "step": 810580
    },
    {
      "epoch": 2.8409158614501955,
      "grad_norm": 2.859375,
      "learning_rate": 2.9460709102756815e-06,
      "loss": 0.7548,
      "step": 810590
    },
    {
      "epoch": 2.8409509089570912,
      "grad_norm": 3.3125,
      "learning_rate": 2.9454218816119795e-06,
      "loss": 0.8232,
      "step": 810600
    },
    {
      "epoch": 2.840985956463987,
      "grad_norm": 2.671875,
      "learning_rate": 2.944772852948278e-06,
      "loss": 0.8572,
      "step": 810610
    },
    {
      "epoch": 2.8410210039708828,
      "grad_norm": 3.515625,
      "learning_rate": 2.944123824284576e-06,
      "loss": 0.7737,
      "step": 810620
    },
    {
      "epoch": 2.841056051477778,
      "grad_norm": 2.765625,
      "learning_rate": 2.943474795620874e-06,
      "loss": 0.7536,
      "step": 810630
    },
    {
      "epoch": 2.841091098984674,
      "grad_norm": 3.265625,
      "learning_rate": 2.9428257669571723e-06,
      "loss": 0.8022,
      "step": 810640
    },
    {
      "epoch": 2.841126146491569,
      "grad_norm": 2.828125,
      "learning_rate": 2.94217673829347e-06,
      "loss": 0.7962,
      "step": 810650
    },
    {
      "epoch": 2.841161193998465,
      "grad_norm": 2.984375,
      "learning_rate": 2.9415277096297683e-06,
      "loss": 0.8395,
      "step": 810660
    },
    {
      "epoch": 2.8411962415053607,
      "grad_norm": 2.34375,
      "learning_rate": 2.9408786809660663e-06,
      "loss": 0.8626,
      "step": 810670
    },
    {
      "epoch": 2.841231289012256,
      "grad_norm": 2.9375,
      "learning_rate": 2.9402296523023643e-06,
      "loss": 0.8379,
      "step": 810680
    },
    {
      "epoch": 2.8412663365191517,
      "grad_norm": 3.421875,
      "learning_rate": 2.9395806236386623e-06,
      "loss": 0.786,
      "step": 810690
    },
    {
      "epoch": 2.841301384026047,
      "grad_norm": 3.375,
      "learning_rate": 2.9389315949749607e-06,
      "loss": 0.814,
      "step": 810700
    },
    {
      "epoch": 2.841336431532943,
      "grad_norm": 3.3125,
      "learning_rate": 2.9382825663112587e-06,
      "loss": 0.842,
      "step": 810710
    },
    {
      "epoch": 2.8413714790398386,
      "grad_norm": 2.9375,
      "learning_rate": 2.9376335376475567e-06,
      "loss": 0.765,
      "step": 810720
    },
    {
      "epoch": 2.8414065265467343,
      "grad_norm": 2.84375,
      "learning_rate": 2.936984508983855e-06,
      "loss": 0.7368,
      "step": 810730
    },
    {
      "epoch": 2.8414415740536296,
      "grad_norm": 3.140625,
      "learning_rate": 2.9363354803201527e-06,
      "loss": 0.7599,
      "step": 810740
    },
    {
      "epoch": 2.8414766215605254,
      "grad_norm": 2.890625,
      "learning_rate": 2.935686451656451e-06,
      "loss": 0.7485,
      "step": 810750
    },
    {
      "epoch": 2.8415116690674207,
      "grad_norm": 2.5625,
      "learning_rate": 2.935037422992749e-06,
      "loss": 0.8688,
      "step": 810760
    },
    {
      "epoch": 2.8415467165743165,
      "grad_norm": 2.875,
      "learning_rate": 2.934388394329047e-06,
      "loss": 0.8486,
      "step": 810770
    },
    {
      "epoch": 2.841581764081212,
      "grad_norm": 2.875,
      "learning_rate": 2.9337393656653456e-06,
      "loss": 0.7739,
      "step": 810780
    },
    {
      "epoch": 2.8416168115881075,
      "grad_norm": 2.828125,
      "learning_rate": 2.9330903370016436e-06,
      "loss": 0.8225,
      "step": 810790
    },
    {
      "epoch": 2.8416518590950033,
      "grad_norm": 2.75,
      "learning_rate": 2.9324413083379416e-06,
      "loss": 0.8797,
      "step": 810800
    },
    {
      "epoch": 2.8416869066018986,
      "grad_norm": 3.21875,
      "learning_rate": 2.9317922796742396e-06,
      "loss": 0.8095,
      "step": 810810
    },
    {
      "epoch": 2.8417219541087944,
      "grad_norm": 2.875,
      "learning_rate": 2.9311432510105376e-06,
      "loss": 0.8061,
      "step": 810820
    },
    {
      "epoch": 2.84175700161569,
      "grad_norm": 2.671875,
      "learning_rate": 2.930494222346836e-06,
      "loss": 0.7989,
      "step": 810830
    },
    {
      "epoch": 2.841792049122586,
      "grad_norm": 2.90625,
      "learning_rate": 2.929845193683134e-06,
      "loss": 0.7991,
      "step": 810840
    },
    {
      "epoch": 2.841827096629481,
      "grad_norm": 3.0,
      "learning_rate": 2.929196165019432e-06,
      "loss": 0.8346,
      "step": 810850
    },
    {
      "epoch": 2.841862144136377,
      "grad_norm": 2.609375,
      "learning_rate": 2.92854713635573e-06,
      "loss": 0.7364,
      "step": 810860
    },
    {
      "epoch": 2.8418971916432723,
      "grad_norm": 3.546875,
      "learning_rate": 2.9278981076920284e-06,
      "loss": 0.8023,
      "step": 810870
    },
    {
      "epoch": 2.841932239150168,
      "grad_norm": 3.375,
      "learning_rate": 2.927249079028326e-06,
      "loss": 0.8395,
      "step": 810880
    },
    {
      "epoch": 2.8419672866570638,
      "grad_norm": 2.921875,
      "learning_rate": 2.9266000503646244e-06,
      "loss": 0.7828,
      "step": 810890
    },
    {
      "epoch": 2.842002334163959,
      "grad_norm": 2.71875,
      "learning_rate": 2.925951021700923e-06,
      "loss": 0.8372,
      "step": 810900
    },
    {
      "epoch": 2.842037381670855,
      "grad_norm": 2.765625,
      "learning_rate": 2.9253019930372204e-06,
      "loss": 0.8134,
      "step": 810910
    },
    {
      "epoch": 2.8420724291777506,
      "grad_norm": 3.265625,
      "learning_rate": 2.924652964373519e-06,
      "loss": 0.8169,
      "step": 810920
    },
    {
      "epoch": 2.842107476684646,
      "grad_norm": 3.0,
      "learning_rate": 2.924003935709817e-06,
      "loss": 0.8368,
      "step": 810930
    },
    {
      "epoch": 2.8421425241915417,
      "grad_norm": 3.0625,
      "learning_rate": 2.923354907046115e-06,
      "loss": 0.8512,
      "step": 810940
    },
    {
      "epoch": 2.8421775716984374,
      "grad_norm": 2.8125,
      "learning_rate": 2.922705878382413e-06,
      "loss": 0.7858,
      "step": 810950
    },
    {
      "epoch": 2.8422126192053327,
      "grad_norm": 3.28125,
      "learning_rate": 2.9220568497187113e-06,
      "loss": 0.7654,
      "step": 810960
    },
    {
      "epoch": 2.8422476667122285,
      "grad_norm": 2.953125,
      "learning_rate": 2.9214078210550093e-06,
      "loss": 0.8836,
      "step": 810970
    },
    {
      "epoch": 2.842282714219124,
      "grad_norm": 2.953125,
      "learning_rate": 2.9207587923913073e-06,
      "loss": 0.7521,
      "step": 810980
    },
    {
      "epoch": 2.8423177617260196,
      "grad_norm": 2.984375,
      "learning_rate": 2.9201097637276057e-06,
      "loss": 0.8441,
      "step": 810990
    },
    {
      "epoch": 2.8423528092329153,
      "grad_norm": 2.890625,
      "learning_rate": 2.9194607350639033e-06,
      "loss": 0.8144,
      "step": 811000
    },
    {
      "epoch": 2.8423878567398106,
      "grad_norm": 2.578125,
      "learning_rate": 2.9188117064002017e-06,
      "loss": 0.7294,
      "step": 811010
    },
    {
      "epoch": 2.8424229042467064,
      "grad_norm": 3.203125,
      "learning_rate": 2.9181626777364997e-06,
      "loss": 0.762,
      "step": 811020
    },
    {
      "epoch": 2.842457951753602,
      "grad_norm": 3.3125,
      "learning_rate": 2.9175136490727977e-06,
      "loss": 0.8038,
      "step": 811030
    },
    {
      "epoch": 2.8424929992604975,
      "grad_norm": 2.875,
      "learning_rate": 2.916864620409096e-06,
      "loss": 0.7925,
      "step": 811040
    },
    {
      "epoch": 2.8425280467673932,
      "grad_norm": 2.625,
      "learning_rate": 2.916215591745394e-06,
      "loss": 0.792,
      "step": 811050
    },
    {
      "epoch": 2.842563094274289,
      "grad_norm": 2.78125,
      "learning_rate": 2.915566563081692e-06,
      "loss": 0.8027,
      "step": 811060
    },
    {
      "epoch": 2.8425981417811843,
      "grad_norm": 2.390625,
      "learning_rate": 2.91491753441799e-06,
      "loss": 0.7699,
      "step": 811070
    },
    {
      "epoch": 2.84263318928808,
      "grad_norm": 3.390625,
      "learning_rate": 2.914268505754288e-06,
      "loss": 0.805,
      "step": 811080
    },
    {
      "epoch": 2.8426682367949754,
      "grad_norm": 2.78125,
      "learning_rate": 2.913619477090586e-06,
      "loss": 0.7641,
      "step": 811090
    },
    {
      "epoch": 2.842703284301871,
      "grad_norm": 3.578125,
      "learning_rate": 2.9129704484268845e-06,
      "loss": 0.8317,
      "step": 811100
    },
    {
      "epoch": 2.842738331808767,
      "grad_norm": 3.390625,
      "learning_rate": 2.9123214197631825e-06,
      "loss": 0.8018,
      "step": 811110
    },
    {
      "epoch": 2.8427733793156627,
      "grad_norm": 2.78125,
      "learning_rate": 2.9116723910994805e-06,
      "loss": 0.8139,
      "step": 811120
    },
    {
      "epoch": 2.842808426822558,
      "grad_norm": 2.84375,
      "learning_rate": 2.911023362435779e-06,
      "loss": 0.7019,
      "step": 811130
    },
    {
      "epoch": 2.8428434743294537,
      "grad_norm": 2.859375,
      "learning_rate": 2.9103743337720765e-06,
      "loss": 0.7185,
      "step": 811140
    },
    {
      "epoch": 2.842878521836349,
      "grad_norm": 2.859375,
      "learning_rate": 2.909725305108375e-06,
      "loss": 0.7844,
      "step": 811150
    },
    {
      "epoch": 2.842913569343245,
      "grad_norm": 2.875,
      "learning_rate": 2.909076276444673e-06,
      "loss": 0.8305,
      "step": 811160
    },
    {
      "epoch": 2.8429486168501406,
      "grad_norm": 3.21875,
      "learning_rate": 2.908427247780971e-06,
      "loss": 0.8284,
      "step": 811170
    },
    {
      "epoch": 2.842983664357036,
      "grad_norm": 2.9375,
      "learning_rate": 2.9077782191172694e-06,
      "loss": 0.8083,
      "step": 811180
    },
    {
      "epoch": 2.8430187118639316,
      "grad_norm": 2.8125,
      "learning_rate": 2.9071291904535674e-06,
      "loss": 0.8015,
      "step": 811190
    },
    {
      "epoch": 2.843053759370827,
      "grad_norm": 2.71875,
      "learning_rate": 2.9064801617898654e-06,
      "loss": 0.7592,
      "step": 811200
    },
    {
      "epoch": 2.8430888068777227,
      "grad_norm": 2.84375,
      "learning_rate": 2.9058311331261634e-06,
      "loss": 0.7047,
      "step": 811210
    },
    {
      "epoch": 2.8431238543846185,
      "grad_norm": 3.15625,
      "learning_rate": 2.905182104462462e-06,
      "loss": 0.8355,
      "step": 811220
    },
    {
      "epoch": 2.843158901891514,
      "grad_norm": 2.421875,
      "learning_rate": 2.90453307579876e-06,
      "loss": 0.8663,
      "step": 811230
    },
    {
      "epoch": 2.8431939493984095,
      "grad_norm": 2.765625,
      "learning_rate": 2.903884047135058e-06,
      "loss": 0.7884,
      "step": 811240
    },
    {
      "epoch": 2.8432289969053053,
      "grad_norm": 3.25,
      "learning_rate": 2.9032350184713562e-06,
      "loss": 0.8565,
      "step": 811250
    },
    {
      "epoch": 2.8432640444122006,
      "grad_norm": 2.6875,
      "learning_rate": 2.902585989807654e-06,
      "loss": 0.8109,
      "step": 811260
    },
    {
      "epoch": 2.8432990919190964,
      "grad_norm": 3.34375,
      "learning_rate": 2.9019369611439522e-06,
      "loss": 0.8481,
      "step": 811270
    },
    {
      "epoch": 2.843334139425992,
      "grad_norm": 2.828125,
      "learning_rate": 2.9012879324802502e-06,
      "loss": 0.7134,
      "step": 811280
    },
    {
      "epoch": 2.8433691869328874,
      "grad_norm": 3.09375,
      "learning_rate": 2.9006389038165482e-06,
      "loss": 0.8621,
      "step": 811290
    },
    {
      "epoch": 2.843404234439783,
      "grad_norm": 2.859375,
      "learning_rate": 2.8999898751528467e-06,
      "loss": 0.802,
      "step": 811300
    },
    {
      "epoch": 2.8434392819466785,
      "grad_norm": 2.90625,
      "learning_rate": 2.8993408464891447e-06,
      "loss": 0.8093,
      "step": 811310
    },
    {
      "epoch": 2.8434743294535743,
      "grad_norm": 2.828125,
      "learning_rate": 2.8986918178254427e-06,
      "loss": 0.8273,
      "step": 811320
    },
    {
      "epoch": 2.84350937696047,
      "grad_norm": 3.046875,
      "learning_rate": 2.8980427891617407e-06,
      "loss": 0.755,
      "step": 811330
    },
    {
      "epoch": 2.8435444244673658,
      "grad_norm": 2.421875,
      "learning_rate": 2.8973937604980387e-06,
      "loss": 0.7753,
      "step": 811340
    },
    {
      "epoch": 2.843579471974261,
      "grad_norm": 2.484375,
      "learning_rate": 2.8967447318343367e-06,
      "loss": 0.7714,
      "step": 811350
    },
    {
      "epoch": 2.843614519481157,
      "grad_norm": 3.140625,
      "learning_rate": 2.896095703170635e-06,
      "loss": 0.7729,
      "step": 811360
    },
    {
      "epoch": 2.843649566988052,
      "grad_norm": 3.515625,
      "learning_rate": 2.895446674506933e-06,
      "loss": 0.8883,
      "step": 811370
    },
    {
      "epoch": 2.843684614494948,
      "grad_norm": 3.796875,
      "learning_rate": 2.894797645843231e-06,
      "loss": 0.8203,
      "step": 811380
    },
    {
      "epoch": 2.8437196620018437,
      "grad_norm": 2.921875,
      "learning_rate": 2.8941486171795295e-06,
      "loss": 0.8827,
      "step": 811390
    },
    {
      "epoch": 2.843754709508739,
      "grad_norm": 3.171875,
      "learning_rate": 2.893499588515827e-06,
      "loss": 0.7922,
      "step": 811400
    },
    {
      "epoch": 2.8437897570156347,
      "grad_norm": 2.484375,
      "learning_rate": 2.8928505598521255e-06,
      "loss": 0.786,
      "step": 811410
    },
    {
      "epoch": 2.84382480452253,
      "grad_norm": 2.4375,
      "learning_rate": 2.8922015311884235e-06,
      "loss": 0.777,
      "step": 811420
    },
    {
      "epoch": 2.843859852029426,
      "grad_norm": 3.078125,
      "learning_rate": 2.8915525025247215e-06,
      "loss": 0.8235,
      "step": 811430
    },
    {
      "epoch": 2.8438948995363216,
      "grad_norm": 2.96875,
      "learning_rate": 2.89090347386102e-06,
      "loss": 0.7827,
      "step": 811440
    },
    {
      "epoch": 2.8439299470432173,
      "grad_norm": 3.1875,
      "learning_rate": 2.890254445197318e-06,
      "loss": 0.8478,
      "step": 811450
    },
    {
      "epoch": 2.8439649945501126,
      "grad_norm": 2.53125,
      "learning_rate": 2.889605416533616e-06,
      "loss": 0.7498,
      "step": 811460
    },
    {
      "epoch": 2.8440000420570084,
      "grad_norm": 3.09375,
      "learning_rate": 2.888956387869914e-06,
      "loss": 0.7915,
      "step": 811470
    },
    {
      "epoch": 2.8440350895639037,
      "grad_norm": 2.703125,
      "learning_rate": 2.8883073592062124e-06,
      "loss": 0.757,
      "step": 811480
    },
    {
      "epoch": 2.8440701370707995,
      "grad_norm": 2.640625,
      "learning_rate": 2.88765833054251e-06,
      "loss": 0.8013,
      "step": 811490
    },
    {
      "epoch": 2.8441051845776952,
      "grad_norm": 3.140625,
      "learning_rate": 2.8870093018788084e-06,
      "loss": 0.8132,
      "step": 811500
    },
    {
      "epoch": 2.8441402320845905,
      "grad_norm": 2.96875,
      "learning_rate": 2.8863602732151064e-06,
      "loss": 0.7662,
      "step": 811510
    },
    {
      "epoch": 2.8441752795914863,
      "grad_norm": 2.921875,
      "learning_rate": 2.8857112445514043e-06,
      "loss": 0.7741,
      "step": 811520
    },
    {
      "epoch": 2.8442103270983816,
      "grad_norm": 2.78125,
      "learning_rate": 2.8850622158877028e-06,
      "loss": 0.7944,
      "step": 811530
    },
    {
      "epoch": 2.8442453746052774,
      "grad_norm": 2.984375,
      "learning_rate": 2.8844131872240008e-06,
      "loss": 0.8331,
      "step": 811540
    },
    {
      "epoch": 2.844280422112173,
      "grad_norm": 2.71875,
      "learning_rate": 2.8837641585602988e-06,
      "loss": 0.8172,
      "step": 811550
    },
    {
      "epoch": 2.844315469619069,
      "grad_norm": 2.640625,
      "learning_rate": 2.8831151298965968e-06,
      "loss": 0.7512,
      "step": 811560
    },
    {
      "epoch": 2.844350517125964,
      "grad_norm": 2.9375,
      "learning_rate": 2.8824661012328948e-06,
      "loss": 0.8228,
      "step": 811570
    },
    {
      "epoch": 2.84438556463286,
      "grad_norm": 2.796875,
      "learning_rate": 2.881817072569193e-06,
      "loss": 0.8384,
      "step": 811580
    },
    {
      "epoch": 2.8444206121397553,
      "grad_norm": 3.140625,
      "learning_rate": 2.881168043905491e-06,
      "loss": 0.7627,
      "step": 811590
    },
    {
      "epoch": 2.844455659646651,
      "grad_norm": 3.078125,
      "learning_rate": 2.880519015241789e-06,
      "loss": 0.7009,
      "step": 811600
    },
    {
      "epoch": 2.844490707153547,
      "grad_norm": 3.09375,
      "learning_rate": 2.879869986578087e-06,
      "loss": 0.7759,
      "step": 811610
    },
    {
      "epoch": 2.844525754660442,
      "grad_norm": 3.171875,
      "learning_rate": 2.8792209579143856e-06,
      "loss": 0.8209,
      "step": 811620
    },
    {
      "epoch": 2.844560802167338,
      "grad_norm": 2.890625,
      "learning_rate": 2.8785719292506836e-06,
      "loss": 0.8701,
      "step": 811630
    },
    {
      "epoch": 2.844595849674233,
      "grad_norm": 2.859375,
      "learning_rate": 2.8779229005869816e-06,
      "loss": 0.8375,
      "step": 811640
    },
    {
      "epoch": 2.844630897181129,
      "grad_norm": 2.796875,
      "learning_rate": 2.87727387192328e-06,
      "loss": 0.7618,
      "step": 811650
    },
    {
      "epoch": 2.8446659446880247,
      "grad_norm": 2.921875,
      "learning_rate": 2.8766248432595776e-06,
      "loss": 0.7927,
      "step": 811660
    },
    {
      "epoch": 2.8447009921949205,
      "grad_norm": 3.046875,
      "learning_rate": 2.875975814595876e-06,
      "loss": 0.8613,
      "step": 811670
    },
    {
      "epoch": 2.8447360397018158,
      "grad_norm": 3.09375,
      "learning_rate": 2.875326785932174e-06,
      "loss": 0.8144,
      "step": 811680
    },
    {
      "epoch": 2.8447710872087115,
      "grad_norm": 3.34375,
      "learning_rate": 2.874677757268472e-06,
      "loss": 0.7241,
      "step": 811690
    },
    {
      "epoch": 2.844806134715607,
      "grad_norm": 2.453125,
      "learning_rate": 2.8740287286047705e-06,
      "loss": 0.7998,
      "step": 811700
    },
    {
      "epoch": 2.8448411822225026,
      "grad_norm": 2.71875,
      "learning_rate": 2.8733796999410685e-06,
      "loss": 0.8286,
      "step": 811710
    },
    {
      "epoch": 2.8448762297293984,
      "grad_norm": 2.59375,
      "learning_rate": 2.8727306712773665e-06,
      "loss": 0.8581,
      "step": 811720
    },
    {
      "epoch": 2.8449112772362937,
      "grad_norm": 2.5,
      "learning_rate": 2.8720816426136645e-06,
      "loss": 0.7404,
      "step": 811730
    },
    {
      "epoch": 2.8449463247431894,
      "grad_norm": 3.046875,
      "learning_rate": 2.871432613949963e-06,
      "loss": 0.7888,
      "step": 811740
    },
    {
      "epoch": 2.8449813722500847,
      "grad_norm": 3.0,
      "learning_rate": 2.8707835852862605e-06,
      "loss": 0.8781,
      "step": 811750
    },
    {
      "epoch": 2.8450164197569805,
      "grad_norm": 3.03125,
      "learning_rate": 2.870134556622559e-06,
      "loss": 0.8491,
      "step": 811760
    },
    {
      "epoch": 2.8450514672638763,
      "grad_norm": 2.875,
      "learning_rate": 2.869485527958857e-06,
      "loss": 0.7444,
      "step": 811770
    },
    {
      "epoch": 2.845086514770772,
      "grad_norm": 2.390625,
      "learning_rate": 2.868836499295155e-06,
      "loss": 0.7623,
      "step": 811780
    },
    {
      "epoch": 2.8451215622776673,
      "grad_norm": 3.015625,
      "learning_rate": 2.8681874706314533e-06,
      "loss": 0.7983,
      "step": 811790
    },
    {
      "epoch": 2.845156609784563,
      "grad_norm": 3.09375,
      "learning_rate": 2.8675384419677513e-06,
      "loss": 0.8466,
      "step": 811800
    },
    {
      "epoch": 2.8451916572914584,
      "grad_norm": 3.0,
      "learning_rate": 2.8668894133040493e-06,
      "loss": 0.8945,
      "step": 811810
    },
    {
      "epoch": 2.845226704798354,
      "grad_norm": 3.4375,
      "learning_rate": 2.8662403846403473e-06,
      "loss": 0.8466,
      "step": 811820
    },
    {
      "epoch": 2.84526175230525,
      "grad_norm": 2.859375,
      "learning_rate": 2.8655913559766453e-06,
      "loss": 0.7794,
      "step": 811830
    },
    {
      "epoch": 2.8452967998121452,
      "grad_norm": 2.953125,
      "learning_rate": 2.8649423273129437e-06,
      "loss": 0.7941,
      "step": 811840
    },
    {
      "epoch": 2.845331847319041,
      "grad_norm": 2.65625,
      "learning_rate": 2.8642932986492417e-06,
      "loss": 0.8618,
      "step": 811850
    },
    {
      "epoch": 2.8453668948259363,
      "grad_norm": 2.515625,
      "learning_rate": 2.8636442699855397e-06,
      "loss": 0.8624,
      "step": 811860
    },
    {
      "epoch": 2.845401942332832,
      "grad_norm": 2.65625,
      "learning_rate": 2.8629952413218377e-06,
      "loss": 0.808,
      "step": 811870
    },
    {
      "epoch": 2.845436989839728,
      "grad_norm": 2.765625,
      "learning_rate": 2.862346212658136e-06,
      "loss": 0.7642,
      "step": 811880
    },
    {
      "epoch": 2.8454720373466236,
      "grad_norm": 2.453125,
      "learning_rate": 2.8616971839944337e-06,
      "loss": 0.734,
      "step": 811890
    },
    {
      "epoch": 2.845507084853519,
      "grad_norm": 3.4375,
      "learning_rate": 2.861048155330732e-06,
      "loss": 0.8186,
      "step": 811900
    },
    {
      "epoch": 2.8455421323604146,
      "grad_norm": 2.953125,
      "learning_rate": 2.8603991266670306e-06,
      "loss": 0.8576,
      "step": 811910
    },
    {
      "epoch": 2.84557717986731,
      "grad_norm": 2.546875,
      "learning_rate": 2.859750098003328e-06,
      "loss": 0.7744,
      "step": 811920
    },
    {
      "epoch": 2.8456122273742057,
      "grad_norm": 2.71875,
      "learning_rate": 2.8591010693396266e-06,
      "loss": 0.8436,
      "step": 811930
    },
    {
      "epoch": 2.8456472748811015,
      "grad_norm": 2.59375,
      "learning_rate": 2.8584520406759246e-06,
      "loss": 0.8053,
      "step": 811940
    },
    {
      "epoch": 2.845682322387997,
      "grad_norm": 3.171875,
      "learning_rate": 2.8578030120122226e-06,
      "loss": 0.7397,
      "step": 811950
    },
    {
      "epoch": 2.8457173698948925,
      "grad_norm": 2.921875,
      "learning_rate": 2.8571539833485206e-06,
      "loss": 0.7309,
      "step": 811960
    },
    {
      "epoch": 2.845752417401788,
      "grad_norm": 2.609375,
      "learning_rate": 2.856504954684819e-06,
      "loss": 0.7673,
      "step": 811970
    },
    {
      "epoch": 2.8457874649086836,
      "grad_norm": 3.109375,
      "learning_rate": 2.855855926021117e-06,
      "loss": 0.7853,
      "step": 811980
    },
    {
      "epoch": 2.8458225124155794,
      "grad_norm": 3.03125,
      "learning_rate": 2.855206897357415e-06,
      "loss": 0.8621,
      "step": 811990
    },
    {
      "epoch": 2.845857559922475,
      "grad_norm": 2.859375,
      "learning_rate": 2.854557868693713e-06,
      "loss": 0.7953,
      "step": 812000
    },
    {
      "epoch": 2.8458926074293704,
      "grad_norm": 2.921875,
      "learning_rate": 2.853908840030011e-06,
      "loss": 0.84,
      "step": 812010
    },
    {
      "epoch": 2.845927654936266,
      "grad_norm": 2.875,
      "learning_rate": 2.8532598113663094e-06,
      "loss": 0.8187,
      "step": 812020
    },
    {
      "epoch": 2.8459627024431615,
      "grad_norm": 2.984375,
      "learning_rate": 2.8526107827026074e-06,
      "loss": 0.8679,
      "step": 812030
    },
    {
      "epoch": 2.8459977499500573,
      "grad_norm": 3.078125,
      "learning_rate": 2.8519617540389054e-06,
      "loss": 0.8092,
      "step": 812040
    },
    {
      "epoch": 2.846032797456953,
      "grad_norm": 3.09375,
      "learning_rate": 2.851312725375204e-06,
      "loss": 0.8644,
      "step": 812050
    },
    {
      "epoch": 2.8460678449638483,
      "grad_norm": 2.9375,
      "learning_rate": 2.8506636967115014e-06,
      "loss": 0.8063,
      "step": 812060
    },
    {
      "epoch": 2.846102892470744,
      "grad_norm": 2.984375,
      "learning_rate": 2.8500146680478e-06,
      "loss": 0.873,
      "step": 812070
    },
    {
      "epoch": 2.8461379399776394,
      "grad_norm": 3.125,
      "learning_rate": 2.849365639384098e-06,
      "loss": 0.7435,
      "step": 812080
    },
    {
      "epoch": 2.846172987484535,
      "grad_norm": 2.578125,
      "learning_rate": 2.848716610720396e-06,
      "loss": 0.7403,
      "step": 812090
    },
    {
      "epoch": 2.846208034991431,
      "grad_norm": 3.78125,
      "learning_rate": 2.8480675820566943e-06,
      "loss": 0.8746,
      "step": 812100
    },
    {
      "epoch": 2.8462430824983267,
      "grad_norm": 3.21875,
      "learning_rate": 2.8474185533929923e-06,
      "loss": 0.7718,
      "step": 812110
    },
    {
      "epoch": 2.846278130005222,
      "grad_norm": 3.1875,
      "learning_rate": 2.8467695247292903e-06,
      "loss": 0.7655,
      "step": 812120
    },
    {
      "epoch": 2.8463131775121178,
      "grad_norm": 2.484375,
      "learning_rate": 2.8461204960655883e-06,
      "loss": 0.7608,
      "step": 812130
    },
    {
      "epoch": 2.846348225019013,
      "grad_norm": 2.75,
      "learning_rate": 2.8454714674018867e-06,
      "loss": 0.8109,
      "step": 812140
    },
    {
      "epoch": 2.846383272525909,
      "grad_norm": 3.125,
      "learning_rate": 2.8448224387381843e-06,
      "loss": 0.8515,
      "step": 812150
    },
    {
      "epoch": 2.8464183200328046,
      "grad_norm": 2.890625,
      "learning_rate": 2.8441734100744827e-06,
      "loss": 0.7443,
      "step": 812160
    },
    {
      "epoch": 2.8464533675397,
      "grad_norm": 3.09375,
      "learning_rate": 2.843524381410781e-06,
      "loss": 0.7933,
      "step": 812170
    },
    {
      "epoch": 2.8464884150465957,
      "grad_norm": 2.96875,
      "learning_rate": 2.8428753527470787e-06,
      "loss": 0.7547,
      "step": 812180
    },
    {
      "epoch": 2.8465234625534914,
      "grad_norm": 2.890625,
      "learning_rate": 2.842226324083377e-06,
      "loss": 0.736,
      "step": 812190
    },
    {
      "epoch": 2.8465585100603867,
      "grad_norm": 2.9375,
      "learning_rate": 2.841577295419675e-06,
      "loss": 0.7332,
      "step": 812200
    },
    {
      "epoch": 2.8465935575672825,
      "grad_norm": 3.046875,
      "learning_rate": 2.840928266755973e-06,
      "loss": 0.8023,
      "step": 812210
    },
    {
      "epoch": 2.8466286050741783,
      "grad_norm": 3.09375,
      "learning_rate": 2.840279238092271e-06,
      "loss": 0.8811,
      "step": 812220
    },
    {
      "epoch": 2.8466636525810736,
      "grad_norm": 2.953125,
      "learning_rate": 2.8396302094285695e-06,
      "loss": 0.8171,
      "step": 812230
    },
    {
      "epoch": 2.8466987000879693,
      "grad_norm": 2.90625,
      "learning_rate": 2.8389811807648675e-06,
      "loss": 0.7413,
      "step": 812240
    },
    {
      "epoch": 2.8467337475948646,
      "grad_norm": 3.09375,
      "learning_rate": 2.8383321521011655e-06,
      "loss": 0.8294,
      "step": 812250
    },
    {
      "epoch": 2.8467687951017604,
      "grad_norm": 3.453125,
      "learning_rate": 2.8376831234374635e-06,
      "loss": 0.7936,
      "step": 812260
    },
    {
      "epoch": 2.846803842608656,
      "grad_norm": 2.671875,
      "learning_rate": 2.8370340947737615e-06,
      "loss": 0.8726,
      "step": 812270
    },
    {
      "epoch": 2.8468388901155515,
      "grad_norm": 2.4375,
      "learning_rate": 2.83638506611006e-06,
      "loss": 0.8081,
      "step": 812280
    },
    {
      "epoch": 2.8468739376224472,
      "grad_norm": 3.078125,
      "learning_rate": 2.835736037446358e-06,
      "loss": 0.8545,
      "step": 812290
    },
    {
      "epoch": 2.846908985129343,
      "grad_norm": 3.265625,
      "learning_rate": 2.835087008782656e-06,
      "loss": 0.7688,
      "step": 812300
    },
    {
      "epoch": 2.8469440326362383,
      "grad_norm": 3.296875,
      "learning_rate": 2.8344379801189544e-06,
      "loss": 0.7651,
      "step": 812310
    },
    {
      "epoch": 2.846979080143134,
      "grad_norm": 2.90625,
      "learning_rate": 2.833788951455252e-06,
      "loss": 0.7575,
      "step": 812320
    },
    {
      "epoch": 2.84701412765003,
      "grad_norm": 2.75,
      "learning_rate": 2.8331399227915504e-06,
      "loss": 0.7454,
      "step": 812330
    },
    {
      "epoch": 2.847049175156925,
      "grad_norm": 2.59375,
      "learning_rate": 2.8324908941278484e-06,
      "loss": 0.7146,
      "step": 812340
    },
    {
      "epoch": 2.847084222663821,
      "grad_norm": 2.71875,
      "learning_rate": 2.8318418654641464e-06,
      "loss": 0.8237,
      "step": 812350
    },
    {
      "epoch": 2.847119270170716,
      "grad_norm": 2.8125,
      "learning_rate": 2.8311928368004444e-06,
      "loss": 0.7483,
      "step": 812360
    },
    {
      "epoch": 2.847154317677612,
      "grad_norm": 2.765625,
      "learning_rate": 2.830543808136743e-06,
      "loss": 0.871,
      "step": 812370
    },
    {
      "epoch": 2.8471893651845077,
      "grad_norm": 2.375,
      "learning_rate": 2.829894779473041e-06,
      "loss": 0.7675,
      "step": 812380
    },
    {
      "epoch": 2.8472244126914035,
      "grad_norm": 2.46875,
      "learning_rate": 2.829245750809339e-06,
      "loss": 0.7752,
      "step": 812390
    },
    {
      "epoch": 2.847259460198299,
      "grad_norm": 2.390625,
      "learning_rate": 2.8285967221456372e-06,
      "loss": 0.718,
      "step": 812400
    },
    {
      "epoch": 2.8472945077051945,
      "grad_norm": 3.0,
      "learning_rate": 2.827947693481935e-06,
      "loss": 0.7772,
      "step": 812410
    },
    {
      "epoch": 2.84732955521209,
      "grad_norm": 2.78125,
      "learning_rate": 2.8272986648182332e-06,
      "loss": 0.7358,
      "step": 812420
    },
    {
      "epoch": 2.8473646027189856,
      "grad_norm": 3.203125,
      "learning_rate": 2.8266496361545312e-06,
      "loss": 0.8095,
      "step": 812430
    },
    {
      "epoch": 2.8473996502258814,
      "grad_norm": 2.78125,
      "learning_rate": 2.8260006074908292e-06,
      "loss": 0.7396,
      "step": 812440
    },
    {
      "epoch": 2.8474346977327767,
      "grad_norm": 2.8125,
      "learning_rate": 2.8253515788271277e-06,
      "loss": 0.7874,
      "step": 812450
    },
    {
      "epoch": 2.8474697452396724,
      "grad_norm": 2.5625,
      "learning_rate": 2.8247025501634257e-06,
      "loss": 0.7368,
      "step": 812460
    },
    {
      "epoch": 2.8475047927465678,
      "grad_norm": 2.4375,
      "learning_rate": 2.8240535214997237e-06,
      "loss": 0.8205,
      "step": 812470
    },
    {
      "epoch": 2.8475398402534635,
      "grad_norm": 2.703125,
      "learning_rate": 2.8234044928360217e-06,
      "loss": 0.7925,
      "step": 812480
    },
    {
      "epoch": 2.8475748877603593,
      "grad_norm": 2.90625,
      "learning_rate": 2.82275546417232e-06,
      "loss": 0.8598,
      "step": 812490
    },
    {
      "epoch": 2.847609935267255,
      "grad_norm": 2.90625,
      "learning_rate": 2.8221064355086177e-06,
      "loss": 0.8368,
      "step": 812500
    },
    {
      "epoch": 2.8476449827741503,
      "grad_norm": 3.328125,
      "learning_rate": 2.821457406844916e-06,
      "loss": 0.8091,
      "step": 812510
    },
    {
      "epoch": 2.847680030281046,
      "grad_norm": 3.0,
      "learning_rate": 2.820808378181214e-06,
      "loss": 0.7773,
      "step": 812520
    },
    {
      "epoch": 2.8477150777879414,
      "grad_norm": 3.125,
      "learning_rate": 2.820159349517512e-06,
      "loss": 0.7757,
      "step": 812530
    },
    {
      "epoch": 2.847750125294837,
      "grad_norm": 2.6875,
      "learning_rate": 2.8195103208538105e-06,
      "loss": 0.7777,
      "step": 812540
    },
    {
      "epoch": 2.847785172801733,
      "grad_norm": 2.75,
      "learning_rate": 2.818861292190108e-06,
      "loss": 0.7563,
      "step": 812550
    },
    {
      "epoch": 2.8478202203086282,
      "grad_norm": 3.0625,
      "learning_rate": 2.8182122635264065e-06,
      "loss": 0.7947,
      "step": 812560
    },
    {
      "epoch": 2.847855267815524,
      "grad_norm": 2.6875,
      "learning_rate": 2.817563234862705e-06,
      "loss": 0.8301,
      "step": 812570
    },
    {
      "epoch": 2.8478903153224193,
      "grad_norm": 2.390625,
      "learning_rate": 2.8169142061990025e-06,
      "loss": 0.8297,
      "step": 812580
    },
    {
      "epoch": 2.847925362829315,
      "grad_norm": 2.78125,
      "learning_rate": 2.816265177535301e-06,
      "loss": 0.8288,
      "step": 812590
    },
    {
      "epoch": 2.847960410336211,
      "grad_norm": 2.875,
      "learning_rate": 2.815616148871599e-06,
      "loss": 0.8576,
      "step": 812600
    },
    {
      "epoch": 2.8479954578431066,
      "grad_norm": 2.640625,
      "learning_rate": 2.814967120207897e-06,
      "loss": 0.7682,
      "step": 812610
    },
    {
      "epoch": 2.848030505350002,
      "grad_norm": 2.9375,
      "learning_rate": 2.814318091544195e-06,
      "loss": 0.7371,
      "step": 812620
    },
    {
      "epoch": 2.8480655528568977,
      "grad_norm": 2.875,
      "learning_rate": 2.8136690628804933e-06,
      "loss": 0.7535,
      "step": 812630
    },
    {
      "epoch": 2.848100600363793,
      "grad_norm": 3.03125,
      "learning_rate": 2.8130200342167913e-06,
      "loss": 0.695,
      "step": 812640
    },
    {
      "epoch": 2.8481356478706887,
      "grad_norm": 2.875,
      "learning_rate": 2.8123710055530893e-06,
      "loss": 0.8575,
      "step": 812650
    },
    {
      "epoch": 2.8481706953775845,
      "grad_norm": 3.140625,
      "learning_rate": 2.8117219768893878e-06,
      "loss": 0.7871,
      "step": 812660
    },
    {
      "epoch": 2.84820574288448,
      "grad_norm": 2.5,
      "learning_rate": 2.8110729482256853e-06,
      "loss": 0.8093,
      "step": 812670
    },
    {
      "epoch": 2.8482407903913756,
      "grad_norm": 2.84375,
      "learning_rate": 2.8104239195619838e-06,
      "loss": 0.8794,
      "step": 812680
    },
    {
      "epoch": 2.848275837898271,
      "grad_norm": 2.953125,
      "learning_rate": 2.8097748908982818e-06,
      "loss": 0.8494,
      "step": 812690
    },
    {
      "epoch": 2.8483108854051666,
      "grad_norm": 3.0,
      "learning_rate": 2.8091258622345798e-06,
      "loss": 0.7585,
      "step": 812700
    },
    {
      "epoch": 2.8483459329120624,
      "grad_norm": 2.765625,
      "learning_rate": 2.808476833570878e-06,
      "loss": 0.7818,
      "step": 812710
    },
    {
      "epoch": 2.848380980418958,
      "grad_norm": 3.46875,
      "learning_rate": 2.807827804907176e-06,
      "loss": 0.8037,
      "step": 812720
    },
    {
      "epoch": 2.8484160279258535,
      "grad_norm": 2.921875,
      "learning_rate": 2.807178776243474e-06,
      "loss": 0.8112,
      "step": 812730
    },
    {
      "epoch": 2.8484510754327492,
      "grad_norm": 2.734375,
      "learning_rate": 2.806529747579772e-06,
      "loss": 0.8717,
      "step": 812740
    },
    {
      "epoch": 2.8484861229396445,
      "grad_norm": 3.140625,
      "learning_rate": 2.80588071891607e-06,
      "loss": 0.8656,
      "step": 812750
    },
    {
      "epoch": 2.8485211704465403,
      "grad_norm": 2.796875,
      "learning_rate": 2.805231690252368e-06,
      "loss": 0.8196,
      "step": 812760
    },
    {
      "epoch": 2.848556217953436,
      "grad_norm": 2.65625,
      "learning_rate": 2.8045826615886666e-06,
      "loss": 0.7559,
      "step": 812770
    },
    {
      "epoch": 2.8485912654603314,
      "grad_norm": 2.984375,
      "learning_rate": 2.8039336329249646e-06,
      "loss": 0.788,
      "step": 812780
    },
    {
      "epoch": 2.848626312967227,
      "grad_norm": 3.078125,
      "learning_rate": 2.8032846042612626e-06,
      "loss": 0.7107,
      "step": 812790
    },
    {
      "epoch": 2.8486613604741224,
      "grad_norm": 2.75,
      "learning_rate": 2.802635575597561e-06,
      "loss": 0.7815,
      "step": 812800
    },
    {
      "epoch": 2.848696407981018,
      "grad_norm": 3.3125,
      "learning_rate": 2.8019865469338586e-06,
      "loss": 0.8554,
      "step": 812810
    },
    {
      "epoch": 2.848731455487914,
      "grad_norm": 2.859375,
      "learning_rate": 2.801337518270157e-06,
      "loss": 0.8167,
      "step": 812820
    },
    {
      "epoch": 2.8487665029948097,
      "grad_norm": 3.1875,
      "learning_rate": 2.800688489606455e-06,
      "loss": 0.8809,
      "step": 812830
    },
    {
      "epoch": 2.848801550501705,
      "grad_norm": 2.921875,
      "learning_rate": 2.800039460942753e-06,
      "loss": 0.8551,
      "step": 812840
    },
    {
      "epoch": 2.848836598008601,
      "grad_norm": 3.046875,
      "learning_rate": 2.7993904322790515e-06,
      "loss": 0.7578,
      "step": 812850
    },
    {
      "epoch": 2.848871645515496,
      "grad_norm": 2.5625,
      "learning_rate": 2.7987414036153495e-06,
      "loss": 0.7894,
      "step": 812860
    },
    {
      "epoch": 2.848906693022392,
      "grad_norm": 3.1875,
      "learning_rate": 2.7980923749516475e-06,
      "loss": 0.8032,
      "step": 812870
    },
    {
      "epoch": 2.8489417405292876,
      "grad_norm": 2.53125,
      "learning_rate": 2.7974433462879455e-06,
      "loss": 0.8436,
      "step": 812880
    },
    {
      "epoch": 2.848976788036183,
      "grad_norm": 2.859375,
      "learning_rate": 2.796794317624244e-06,
      "loss": 0.8537,
      "step": 812890
    },
    {
      "epoch": 2.8490118355430787,
      "grad_norm": 3.671875,
      "learning_rate": 2.7961452889605415e-06,
      "loss": 0.7647,
      "step": 812900
    },
    {
      "epoch": 2.849046883049974,
      "grad_norm": 3.25,
      "learning_rate": 2.79549626029684e-06,
      "loss": 0.81,
      "step": 812910
    },
    {
      "epoch": 2.8490819305568698,
      "grad_norm": 2.96875,
      "learning_rate": 2.7948472316331383e-06,
      "loss": 0.7943,
      "step": 812920
    },
    {
      "epoch": 2.8491169780637655,
      "grad_norm": 3.0625,
      "learning_rate": 2.794198202969436e-06,
      "loss": 0.8038,
      "step": 812930
    },
    {
      "epoch": 2.8491520255706613,
      "grad_norm": 2.578125,
      "learning_rate": 2.7935491743057343e-06,
      "loss": 0.7726,
      "step": 812940
    },
    {
      "epoch": 2.8491870730775566,
      "grad_norm": 2.796875,
      "learning_rate": 2.7929001456420323e-06,
      "loss": 0.775,
      "step": 812950
    },
    {
      "epoch": 2.8492221205844523,
      "grad_norm": 2.953125,
      "learning_rate": 2.7922511169783303e-06,
      "loss": 0.8471,
      "step": 812960
    },
    {
      "epoch": 2.8492571680913477,
      "grad_norm": 2.859375,
      "learning_rate": 2.7916020883146287e-06,
      "loss": 0.885,
      "step": 812970
    },
    {
      "epoch": 2.8492922155982434,
      "grad_norm": 3.234375,
      "learning_rate": 2.7909530596509267e-06,
      "loss": 0.8564,
      "step": 812980
    },
    {
      "epoch": 2.849327263105139,
      "grad_norm": 2.6875,
      "learning_rate": 2.7903040309872247e-06,
      "loss": 0.8506,
      "step": 812990
    },
    {
      "epoch": 2.8493623106120345,
      "grad_norm": 2.828125,
      "learning_rate": 2.7896550023235227e-06,
      "loss": 0.7705,
      "step": 813000
    },
    {
      "epoch": 2.8493973581189302,
      "grad_norm": 2.46875,
      "learning_rate": 2.7890059736598207e-06,
      "loss": 0.8012,
      "step": 813010
    },
    {
      "epoch": 2.8494324056258256,
      "grad_norm": 2.78125,
      "learning_rate": 2.7883569449961187e-06,
      "loss": 0.8308,
      "step": 813020
    },
    {
      "epoch": 2.8494674531327213,
      "grad_norm": 3.453125,
      "learning_rate": 2.787707916332417e-06,
      "loss": 0.8794,
      "step": 813030
    },
    {
      "epoch": 2.849502500639617,
      "grad_norm": 2.796875,
      "learning_rate": 2.787058887668715e-06,
      "loss": 0.805,
      "step": 813040
    },
    {
      "epoch": 2.849537548146513,
      "grad_norm": 2.6875,
      "learning_rate": 2.786409859005013e-06,
      "loss": 0.7575,
      "step": 813050
    },
    {
      "epoch": 2.849572595653408,
      "grad_norm": 2.796875,
      "learning_rate": 2.7857608303413116e-06,
      "loss": 0.7896,
      "step": 813060
    },
    {
      "epoch": 2.849607643160304,
      "grad_norm": 2.71875,
      "learning_rate": 2.785111801677609e-06,
      "loss": 0.7492,
      "step": 813070
    },
    {
      "epoch": 2.849642690667199,
      "grad_norm": 3.03125,
      "learning_rate": 2.7844627730139076e-06,
      "loss": 0.754,
      "step": 813080
    },
    {
      "epoch": 2.849677738174095,
      "grad_norm": 3.15625,
      "learning_rate": 2.7838137443502056e-06,
      "loss": 0.8758,
      "step": 813090
    },
    {
      "epoch": 2.8497127856809907,
      "grad_norm": 3.015625,
      "learning_rate": 2.7831647156865036e-06,
      "loss": 0.7585,
      "step": 813100
    },
    {
      "epoch": 2.849747833187886,
      "grad_norm": 3.203125,
      "learning_rate": 2.782515687022802e-06,
      "loss": 0.758,
      "step": 813110
    },
    {
      "epoch": 2.849782880694782,
      "grad_norm": 3.15625,
      "learning_rate": 2.7818666583591e-06,
      "loss": 0.7948,
      "step": 813120
    },
    {
      "epoch": 2.849817928201677,
      "grad_norm": 3.15625,
      "learning_rate": 2.781217629695398e-06,
      "loss": 0.8254,
      "step": 813130
    },
    {
      "epoch": 2.849852975708573,
      "grad_norm": 3.09375,
      "learning_rate": 2.780568601031696e-06,
      "loss": 0.815,
      "step": 813140
    },
    {
      "epoch": 2.8498880232154686,
      "grad_norm": 3.109375,
      "learning_rate": 2.7799195723679944e-06,
      "loss": 0.8249,
      "step": 813150
    },
    {
      "epoch": 2.8499230707223644,
      "grad_norm": 2.75,
      "learning_rate": 2.779270543704292e-06,
      "loss": 0.8086,
      "step": 813160
    },
    {
      "epoch": 2.8499581182292597,
      "grad_norm": 2.796875,
      "learning_rate": 2.7786215150405904e-06,
      "loss": 0.8401,
      "step": 813170
    },
    {
      "epoch": 2.8499931657361555,
      "grad_norm": 3.59375,
      "learning_rate": 2.7779724863768884e-06,
      "loss": 0.8161,
      "step": 813180
    },
    {
      "epoch": 2.850028213243051,
      "grad_norm": 2.65625,
      "learning_rate": 2.7773234577131864e-06,
      "loss": 0.7678,
      "step": 813190
    },
    {
      "epoch": 2.8500632607499465,
      "grad_norm": 2.765625,
      "learning_rate": 2.776674429049485e-06,
      "loss": 0.7147,
      "step": 813200
    },
    {
      "epoch": 2.8500983082568423,
      "grad_norm": 2.796875,
      "learning_rate": 2.776025400385783e-06,
      "loss": 0.7869,
      "step": 813210
    },
    {
      "epoch": 2.8501333557637376,
      "grad_norm": 2.875,
      "learning_rate": 2.775376371722081e-06,
      "loss": 0.7956,
      "step": 813220
    },
    {
      "epoch": 2.8501684032706334,
      "grad_norm": 2.96875,
      "learning_rate": 2.774727343058379e-06,
      "loss": 0.8276,
      "step": 813230
    },
    {
      "epoch": 2.8502034507775287,
      "grad_norm": 3.125,
      "learning_rate": 2.774078314394677e-06,
      "loss": 0.8162,
      "step": 813240
    },
    {
      "epoch": 2.8502384982844244,
      "grad_norm": 3.015625,
      "learning_rate": 2.7734292857309753e-06,
      "loss": 0.7721,
      "step": 813250
    },
    {
      "epoch": 2.85027354579132,
      "grad_norm": 3.078125,
      "learning_rate": 2.7727802570672733e-06,
      "loss": 0.7508,
      "step": 813260
    },
    {
      "epoch": 2.850308593298216,
      "grad_norm": 2.90625,
      "learning_rate": 2.7721312284035713e-06,
      "loss": 0.7823,
      "step": 813270
    },
    {
      "epoch": 2.8503436408051113,
      "grad_norm": 3.03125,
      "learning_rate": 2.7714821997398693e-06,
      "loss": 0.8873,
      "step": 813280
    },
    {
      "epoch": 2.850378688312007,
      "grad_norm": 3.03125,
      "learning_rate": 2.7708331710761677e-06,
      "loss": 0.7639,
      "step": 813290
    },
    {
      "epoch": 2.8504137358189023,
      "grad_norm": 3.0,
      "learning_rate": 2.7701841424124653e-06,
      "loss": 0.791,
      "step": 813300
    },
    {
      "epoch": 2.850448783325798,
      "grad_norm": 2.765625,
      "learning_rate": 2.7695351137487637e-06,
      "loss": 0.7475,
      "step": 813310
    },
    {
      "epoch": 2.850483830832694,
      "grad_norm": 2.953125,
      "learning_rate": 2.768886085085062e-06,
      "loss": 0.7803,
      "step": 813320
    },
    {
      "epoch": 2.850518878339589,
      "grad_norm": 3.109375,
      "learning_rate": 2.7682370564213597e-06,
      "loss": 0.8389,
      "step": 813330
    },
    {
      "epoch": 2.850553925846485,
      "grad_norm": 2.734375,
      "learning_rate": 2.767588027757658e-06,
      "loss": 0.7586,
      "step": 813340
    },
    {
      "epoch": 2.8505889733533802,
      "grad_norm": 3.359375,
      "learning_rate": 2.766938999093956e-06,
      "loss": 0.83,
      "step": 813350
    },
    {
      "epoch": 2.850624020860276,
      "grad_norm": 3.28125,
      "learning_rate": 2.766289970430254e-06,
      "loss": 0.7619,
      "step": 813360
    },
    {
      "epoch": 2.8506590683671718,
      "grad_norm": 3.03125,
      "learning_rate": 2.765640941766552e-06,
      "loss": 0.8561,
      "step": 813370
    },
    {
      "epoch": 2.8506941158740675,
      "grad_norm": 2.96875,
      "learning_rate": 2.7649919131028505e-06,
      "loss": 0.8628,
      "step": 813380
    },
    {
      "epoch": 2.850729163380963,
      "grad_norm": 2.984375,
      "learning_rate": 2.7643428844391485e-06,
      "loss": 0.7627,
      "step": 813390
    },
    {
      "epoch": 2.8507642108878586,
      "grad_norm": 2.8125,
      "learning_rate": 2.7636938557754465e-06,
      "loss": 0.7177,
      "step": 813400
    },
    {
      "epoch": 2.850799258394754,
      "grad_norm": 3.796875,
      "learning_rate": 2.763044827111745e-06,
      "loss": 0.7496,
      "step": 813410
    },
    {
      "epoch": 2.8508343059016497,
      "grad_norm": 2.890625,
      "learning_rate": 2.7623957984480425e-06,
      "loss": 0.7857,
      "step": 813420
    },
    {
      "epoch": 2.8508693534085454,
      "grad_norm": 2.875,
      "learning_rate": 2.761746769784341e-06,
      "loss": 0.8166,
      "step": 813430
    },
    {
      "epoch": 2.8509044009154407,
      "grad_norm": 2.5,
      "learning_rate": 2.761097741120639e-06,
      "loss": 0.7966,
      "step": 813440
    },
    {
      "epoch": 2.8509394484223365,
      "grad_norm": 3.171875,
      "learning_rate": 2.760448712456937e-06,
      "loss": 0.8117,
      "step": 813450
    },
    {
      "epoch": 2.850974495929232,
      "grad_norm": 2.765625,
      "learning_rate": 2.7597996837932354e-06,
      "loss": 0.7582,
      "step": 813460
    },
    {
      "epoch": 2.8510095434361276,
      "grad_norm": 2.75,
      "learning_rate": 2.7591506551295334e-06,
      "loss": 0.7811,
      "step": 813470
    },
    {
      "epoch": 2.8510445909430233,
      "grad_norm": 3.234375,
      "learning_rate": 2.7585016264658314e-06,
      "loss": 0.7648,
      "step": 813480
    },
    {
      "epoch": 2.851079638449919,
      "grad_norm": 3.203125,
      "learning_rate": 2.7578525978021294e-06,
      "loss": 0.7662,
      "step": 813490
    },
    {
      "epoch": 2.8511146859568144,
      "grad_norm": 2.953125,
      "learning_rate": 2.7572035691384274e-06,
      "loss": 0.7806,
      "step": 813500
    },
    {
      "epoch": 2.85114973346371,
      "grad_norm": 3.109375,
      "learning_rate": 2.756554540474726e-06,
      "loss": 0.8449,
      "step": 813510
    },
    {
      "epoch": 2.8511847809706055,
      "grad_norm": 3.40625,
      "learning_rate": 2.755905511811024e-06,
      "loss": 0.834,
      "step": 813520
    },
    {
      "epoch": 2.851219828477501,
      "grad_norm": 3.046875,
      "learning_rate": 2.755256483147322e-06,
      "loss": 0.7986,
      "step": 813530
    },
    {
      "epoch": 2.851254875984397,
      "grad_norm": 3.234375,
      "learning_rate": 2.75460745448362e-06,
      "loss": 0.9051,
      "step": 813540
    },
    {
      "epoch": 2.8512899234912923,
      "grad_norm": 3.03125,
      "learning_rate": 2.7539584258199182e-06,
      "loss": 0.7806,
      "step": 813550
    },
    {
      "epoch": 2.851324970998188,
      "grad_norm": 2.515625,
      "learning_rate": 2.753309397156216e-06,
      "loss": 0.8619,
      "step": 813560
    },
    {
      "epoch": 2.851360018505084,
      "grad_norm": 3.046875,
      "learning_rate": 2.7526603684925142e-06,
      "loss": 0.8203,
      "step": 813570
    },
    {
      "epoch": 2.851395066011979,
      "grad_norm": 3.0625,
      "learning_rate": 2.7520113398288127e-06,
      "loss": 0.8003,
      "step": 813580
    },
    {
      "epoch": 2.851430113518875,
      "grad_norm": 3.171875,
      "learning_rate": 2.7513623111651102e-06,
      "loss": 0.8507,
      "step": 813590
    },
    {
      "epoch": 2.8514651610257706,
      "grad_norm": 2.6875,
      "learning_rate": 2.7507132825014087e-06,
      "loss": 0.7907,
      "step": 813600
    },
    {
      "epoch": 2.851500208532666,
      "grad_norm": 2.8125,
      "learning_rate": 2.7500642538377067e-06,
      "loss": 0.8156,
      "step": 813610
    },
    {
      "epoch": 2.8515352560395617,
      "grad_norm": 2.515625,
      "learning_rate": 2.7494152251740047e-06,
      "loss": 0.7236,
      "step": 813620
    },
    {
      "epoch": 2.851570303546457,
      "grad_norm": 3.109375,
      "learning_rate": 2.7487661965103027e-06,
      "loss": 0.8296,
      "step": 813630
    },
    {
      "epoch": 2.8516053510533528,
      "grad_norm": 3.015625,
      "learning_rate": 2.748117167846601e-06,
      "loss": 0.8375,
      "step": 813640
    },
    {
      "epoch": 2.8516403985602485,
      "grad_norm": 2.984375,
      "learning_rate": 2.747468139182899e-06,
      "loss": 0.8172,
      "step": 813650
    },
    {
      "epoch": 2.851675446067144,
      "grad_norm": 2.34375,
      "learning_rate": 2.746819110519197e-06,
      "loss": 0.7894,
      "step": 813660
    },
    {
      "epoch": 2.8517104935740396,
      "grad_norm": 3.0,
      "learning_rate": 2.746170081855495e-06,
      "loss": 0.7816,
      "step": 813670
    },
    {
      "epoch": 2.8517455410809354,
      "grad_norm": 2.984375,
      "learning_rate": 2.745521053191793e-06,
      "loss": 0.821,
      "step": 813680
    },
    {
      "epoch": 2.8517805885878307,
      "grad_norm": 5.03125,
      "learning_rate": 2.7448720245280915e-06,
      "loss": 0.8426,
      "step": 813690
    },
    {
      "epoch": 2.8518156360947264,
      "grad_norm": 2.96875,
      "learning_rate": 2.7442229958643895e-06,
      "loss": 0.7925,
      "step": 813700
    },
    {
      "epoch": 2.851850683601622,
      "grad_norm": 3.078125,
      "learning_rate": 2.7435739672006875e-06,
      "loss": 0.7921,
      "step": 813710
    },
    {
      "epoch": 2.8518857311085175,
      "grad_norm": 2.4375,
      "learning_rate": 2.742924938536986e-06,
      "loss": 0.7453,
      "step": 813720
    },
    {
      "epoch": 2.8519207786154133,
      "grad_norm": 3.03125,
      "learning_rate": 2.7422759098732835e-06,
      "loss": 0.8041,
      "step": 813730
    },
    {
      "epoch": 2.8519558261223086,
      "grad_norm": 2.46875,
      "learning_rate": 2.741626881209582e-06,
      "loss": 0.8128,
      "step": 813740
    },
    {
      "epoch": 2.8519908736292043,
      "grad_norm": 3.09375,
      "learning_rate": 2.74097785254588e-06,
      "loss": 0.8151,
      "step": 813750
    },
    {
      "epoch": 2.8520259211361,
      "grad_norm": 2.734375,
      "learning_rate": 2.740328823882178e-06,
      "loss": 0.7445,
      "step": 813760
    },
    {
      "epoch": 2.852060968642996,
      "grad_norm": 2.59375,
      "learning_rate": 2.739679795218476e-06,
      "loss": 0.81,
      "step": 813770
    },
    {
      "epoch": 2.852096016149891,
      "grad_norm": 2.78125,
      "learning_rate": 2.7390307665547743e-06,
      "loss": 0.7956,
      "step": 813780
    },
    {
      "epoch": 2.852131063656787,
      "grad_norm": 3.046875,
      "learning_rate": 2.7383817378910723e-06,
      "loss": 0.8018,
      "step": 813790
    },
    {
      "epoch": 2.8521661111636822,
      "grad_norm": 2.984375,
      "learning_rate": 2.7377327092273703e-06,
      "loss": 0.8149,
      "step": 813800
    },
    {
      "epoch": 2.852201158670578,
      "grad_norm": 2.875,
      "learning_rate": 2.7370836805636688e-06,
      "loss": 0.7367,
      "step": 813810
    },
    {
      "epoch": 2.8522362061774738,
      "grad_norm": 2.953125,
      "learning_rate": 2.7364346518999663e-06,
      "loss": 0.8124,
      "step": 813820
    },
    {
      "epoch": 2.852271253684369,
      "grad_norm": 3.265625,
      "learning_rate": 2.7357856232362648e-06,
      "loss": 0.8543,
      "step": 813830
    },
    {
      "epoch": 2.852306301191265,
      "grad_norm": 3.0625,
      "learning_rate": 2.7351365945725628e-06,
      "loss": 0.8636,
      "step": 813840
    },
    {
      "epoch": 2.85234134869816,
      "grad_norm": 3.34375,
      "learning_rate": 2.7344875659088608e-06,
      "loss": 0.847,
      "step": 813850
    },
    {
      "epoch": 2.852376396205056,
      "grad_norm": 3.015625,
      "learning_rate": 2.733838537245159e-06,
      "loss": 0.7804,
      "step": 813860
    },
    {
      "epoch": 2.8524114437119517,
      "grad_norm": 3.09375,
      "learning_rate": 2.733189508581457e-06,
      "loss": 0.7778,
      "step": 813870
    },
    {
      "epoch": 2.8524464912188474,
      "grad_norm": 2.90625,
      "learning_rate": 2.732540479917755e-06,
      "loss": 0.8019,
      "step": 813880
    },
    {
      "epoch": 2.8524815387257427,
      "grad_norm": 2.8125,
      "learning_rate": 2.731891451254053e-06,
      "loss": 0.7591,
      "step": 813890
    },
    {
      "epoch": 2.8525165862326385,
      "grad_norm": 3.21875,
      "learning_rate": 2.7312424225903516e-06,
      "loss": 0.8014,
      "step": 813900
    },
    {
      "epoch": 2.852551633739534,
      "grad_norm": 2.96875,
      "learning_rate": 2.7305933939266496e-06,
      "loss": 0.7298,
      "step": 813910
    },
    {
      "epoch": 2.8525866812464296,
      "grad_norm": 3.28125,
      "learning_rate": 2.7299443652629476e-06,
      "loss": 0.7706,
      "step": 813920
    },
    {
      "epoch": 2.8526217287533253,
      "grad_norm": 3.046875,
      "learning_rate": 2.7292953365992456e-06,
      "loss": 0.7512,
      "step": 813930
    },
    {
      "epoch": 2.8526567762602206,
      "grad_norm": 3.375,
      "learning_rate": 2.7286463079355436e-06,
      "loss": 0.7742,
      "step": 813940
    },
    {
      "epoch": 2.8526918237671164,
      "grad_norm": 3.265625,
      "learning_rate": 2.727997279271842e-06,
      "loss": 0.7756,
      "step": 813950
    },
    {
      "epoch": 2.8527268712740117,
      "grad_norm": 3.203125,
      "learning_rate": 2.72734825060814e-06,
      "loss": 0.8155,
      "step": 813960
    },
    {
      "epoch": 2.8527619187809075,
      "grad_norm": 2.546875,
      "learning_rate": 2.726699221944438e-06,
      "loss": 0.7423,
      "step": 813970
    },
    {
      "epoch": 2.852796966287803,
      "grad_norm": 2.890625,
      "learning_rate": 2.7260501932807365e-06,
      "loss": 0.7996,
      "step": 813980
    },
    {
      "epoch": 2.852832013794699,
      "grad_norm": 2.75,
      "learning_rate": 2.725401164617034e-06,
      "loss": 0.756,
      "step": 813990
    },
    {
      "epoch": 2.8528670613015943,
      "grad_norm": 2.40625,
      "learning_rate": 2.7247521359533325e-06,
      "loss": 0.7791,
      "step": 814000
    },
    {
      "epoch": 2.85290210880849,
      "grad_norm": 3.09375,
      "learning_rate": 2.7241031072896305e-06,
      "loss": 0.7676,
      "step": 814010
    },
    {
      "epoch": 2.8529371563153854,
      "grad_norm": 3.171875,
      "learning_rate": 2.7234540786259285e-06,
      "loss": 0.7421,
      "step": 814020
    },
    {
      "epoch": 2.852972203822281,
      "grad_norm": 2.546875,
      "learning_rate": 2.7228050499622265e-06,
      "loss": 0.7518,
      "step": 814030
    },
    {
      "epoch": 2.853007251329177,
      "grad_norm": 2.703125,
      "learning_rate": 2.722156021298525e-06,
      "loss": 0.7534,
      "step": 814040
    },
    {
      "epoch": 2.853042298836072,
      "grad_norm": 3.046875,
      "learning_rate": 2.721506992634823e-06,
      "loss": 0.8078,
      "step": 814050
    },
    {
      "epoch": 2.853077346342968,
      "grad_norm": 2.65625,
      "learning_rate": 2.720857963971121e-06,
      "loss": 0.7371,
      "step": 814060
    },
    {
      "epoch": 2.8531123938498633,
      "grad_norm": 2.875,
      "learning_rate": 2.7202089353074193e-06,
      "loss": 0.7848,
      "step": 814070
    },
    {
      "epoch": 2.853147441356759,
      "grad_norm": 3.359375,
      "learning_rate": 2.719559906643717e-06,
      "loss": 0.8333,
      "step": 814080
    },
    {
      "epoch": 2.8531824888636548,
      "grad_norm": 2.296875,
      "learning_rate": 2.7189108779800153e-06,
      "loss": 0.7449,
      "step": 814090
    },
    {
      "epoch": 2.8532175363705505,
      "grad_norm": 2.9375,
      "learning_rate": 2.7182618493163133e-06,
      "loss": 0.819,
      "step": 814100
    },
    {
      "epoch": 2.853252583877446,
      "grad_norm": 2.96875,
      "learning_rate": 2.7176128206526113e-06,
      "loss": 0.7884,
      "step": 814110
    },
    {
      "epoch": 2.8532876313843416,
      "grad_norm": 2.625,
      "learning_rate": 2.7169637919889097e-06,
      "loss": 0.8069,
      "step": 814120
    },
    {
      "epoch": 2.853322678891237,
      "grad_norm": 2.4375,
      "learning_rate": 2.7163147633252077e-06,
      "loss": 0.7694,
      "step": 814130
    },
    {
      "epoch": 2.8533577263981327,
      "grad_norm": 2.921875,
      "learning_rate": 2.7156657346615057e-06,
      "loss": 0.7361,
      "step": 814140
    },
    {
      "epoch": 2.8533927739050284,
      "grad_norm": 3.765625,
      "learning_rate": 2.7150167059978037e-06,
      "loss": 0.8302,
      "step": 814150
    },
    {
      "epoch": 2.8534278214119237,
      "grad_norm": 2.75,
      "learning_rate": 2.714367677334102e-06,
      "loss": 0.772,
      "step": 814160
    },
    {
      "epoch": 2.8534628689188195,
      "grad_norm": 3.6875,
      "learning_rate": 2.7137186486703997e-06,
      "loss": 0.8198,
      "step": 814170
    },
    {
      "epoch": 2.853497916425715,
      "grad_norm": 3.390625,
      "learning_rate": 2.713069620006698e-06,
      "loss": 0.791,
      "step": 814180
    },
    {
      "epoch": 2.8535329639326106,
      "grad_norm": 2.875,
      "learning_rate": 2.712420591342996e-06,
      "loss": 0.8729,
      "step": 814190
    },
    {
      "epoch": 2.8535680114395063,
      "grad_norm": 2.609375,
      "learning_rate": 2.711771562679294e-06,
      "loss": 0.8526,
      "step": 814200
    },
    {
      "epoch": 2.853603058946402,
      "grad_norm": 3.171875,
      "learning_rate": 2.7111225340155926e-06,
      "loss": 0.8594,
      "step": 814210
    },
    {
      "epoch": 2.8536381064532974,
      "grad_norm": 2.890625,
      "learning_rate": 2.7104735053518906e-06,
      "loss": 0.843,
      "step": 814220
    },
    {
      "epoch": 2.853673153960193,
      "grad_norm": 2.796875,
      "learning_rate": 2.7098244766881886e-06,
      "loss": 0.856,
      "step": 814230
    },
    {
      "epoch": 2.8537082014670885,
      "grad_norm": 3.625,
      "learning_rate": 2.7091754480244866e-06,
      "loss": 0.8153,
      "step": 814240
    },
    {
      "epoch": 2.8537432489739842,
      "grad_norm": 2.703125,
      "learning_rate": 2.7085264193607846e-06,
      "loss": 0.7991,
      "step": 814250
    },
    {
      "epoch": 2.85377829648088,
      "grad_norm": 2.734375,
      "learning_rate": 2.707877390697083e-06,
      "loss": 0.7495,
      "step": 814260
    },
    {
      "epoch": 2.8538133439877753,
      "grad_norm": 3.109375,
      "learning_rate": 2.707228362033381e-06,
      "loss": 0.8263,
      "step": 814270
    },
    {
      "epoch": 2.853848391494671,
      "grad_norm": 2.875,
      "learning_rate": 2.706579333369679e-06,
      "loss": 0.8435,
      "step": 814280
    },
    {
      "epoch": 2.8538834390015664,
      "grad_norm": 2.65625,
      "learning_rate": 2.705930304705977e-06,
      "loss": 0.8666,
      "step": 814290
    },
    {
      "epoch": 2.853918486508462,
      "grad_norm": 2.703125,
      "learning_rate": 2.7052812760422754e-06,
      "loss": 0.8483,
      "step": 814300
    },
    {
      "epoch": 2.853953534015358,
      "grad_norm": 3.171875,
      "learning_rate": 2.7046322473785734e-06,
      "loss": 0.7906,
      "step": 814310
    },
    {
      "epoch": 2.8539885815222537,
      "grad_norm": 2.84375,
      "learning_rate": 2.7039832187148714e-06,
      "loss": 0.8225,
      "step": 814320
    },
    {
      "epoch": 2.854023629029149,
      "grad_norm": 2.875,
      "learning_rate": 2.70333419005117e-06,
      "loss": 0.7867,
      "step": 814330
    },
    {
      "epoch": 2.8540586765360447,
      "grad_norm": 3.234375,
      "learning_rate": 2.7026851613874674e-06,
      "loss": 0.7917,
      "step": 814340
    },
    {
      "epoch": 2.85409372404294,
      "grad_norm": 2.953125,
      "learning_rate": 2.702036132723766e-06,
      "loss": 0.7442,
      "step": 814350
    },
    {
      "epoch": 2.854128771549836,
      "grad_norm": 2.375,
      "learning_rate": 2.701387104060064e-06,
      "loss": 0.7231,
      "step": 814360
    },
    {
      "epoch": 2.8541638190567316,
      "grad_norm": 2.90625,
      "learning_rate": 2.700738075396362e-06,
      "loss": 0.7917,
      "step": 814370
    },
    {
      "epoch": 2.854198866563627,
      "grad_norm": 2.703125,
      "learning_rate": 2.7000890467326603e-06,
      "loss": 0.7795,
      "step": 814380
    },
    {
      "epoch": 2.8542339140705226,
      "grad_norm": 3.421875,
      "learning_rate": 2.6994400180689583e-06,
      "loss": 0.8735,
      "step": 814390
    },
    {
      "epoch": 2.854268961577418,
      "grad_norm": 2.6875,
      "learning_rate": 2.6987909894052563e-06,
      "loss": 0.719,
      "step": 814400
    },
    {
      "epoch": 2.8543040090843137,
      "grad_norm": 3.34375,
      "learning_rate": 2.6981419607415543e-06,
      "loss": 0.7406,
      "step": 814410
    },
    {
      "epoch": 2.8543390565912095,
      "grad_norm": 2.96875,
      "learning_rate": 2.6974929320778523e-06,
      "loss": 0.8877,
      "step": 814420
    },
    {
      "epoch": 2.854374104098105,
      "grad_norm": 2.859375,
      "learning_rate": 2.6968439034141503e-06,
      "loss": 0.7962,
      "step": 814430
    },
    {
      "epoch": 2.8544091516050005,
      "grad_norm": 3.359375,
      "learning_rate": 2.6961948747504487e-06,
      "loss": 0.8345,
      "step": 814440
    },
    {
      "epoch": 2.8544441991118963,
      "grad_norm": 2.796875,
      "learning_rate": 2.6955458460867467e-06,
      "loss": 0.7612,
      "step": 814450
    },
    {
      "epoch": 2.8544792466187916,
      "grad_norm": 2.90625,
      "learning_rate": 2.6948968174230447e-06,
      "loss": 0.7823,
      "step": 814460
    },
    {
      "epoch": 2.8545142941256874,
      "grad_norm": 2.8125,
      "learning_rate": 2.694247788759343e-06,
      "loss": 0.821,
      "step": 814470
    },
    {
      "epoch": 2.854549341632583,
      "grad_norm": 2.71875,
      "learning_rate": 2.6935987600956407e-06,
      "loss": 0.7061,
      "step": 814480
    },
    {
      "epoch": 2.8545843891394784,
      "grad_norm": 2.9375,
      "learning_rate": 2.692949731431939e-06,
      "loss": 0.8397,
      "step": 814490
    },
    {
      "epoch": 2.854619436646374,
      "grad_norm": 3.15625,
      "learning_rate": 2.692300702768237e-06,
      "loss": 0.7908,
      "step": 814500
    },
    {
      "epoch": 2.8546544841532695,
      "grad_norm": 2.53125,
      "learning_rate": 2.691651674104535e-06,
      "loss": 0.8027,
      "step": 814510
    },
    {
      "epoch": 2.8546895316601653,
      "grad_norm": 2.984375,
      "learning_rate": 2.6910026454408335e-06,
      "loss": 0.7634,
      "step": 814520
    },
    {
      "epoch": 2.854724579167061,
      "grad_norm": 2.96875,
      "learning_rate": 2.6903536167771315e-06,
      "loss": 0.8218,
      "step": 814530
    },
    {
      "epoch": 2.8547596266739568,
      "grad_norm": 2.921875,
      "learning_rate": 2.6897045881134295e-06,
      "loss": 0.8271,
      "step": 814540
    },
    {
      "epoch": 2.854794674180852,
      "grad_norm": 3.09375,
      "learning_rate": 2.6890555594497275e-06,
      "loss": 0.822,
      "step": 814550
    },
    {
      "epoch": 2.854829721687748,
      "grad_norm": 2.65625,
      "learning_rate": 2.688406530786026e-06,
      "loss": 0.8788,
      "step": 814560
    },
    {
      "epoch": 2.854864769194643,
      "grad_norm": 2.796875,
      "learning_rate": 2.6877575021223235e-06,
      "loss": 0.7816,
      "step": 814570
    },
    {
      "epoch": 2.854899816701539,
      "grad_norm": 2.609375,
      "learning_rate": 2.687108473458622e-06,
      "loss": 0.8381,
      "step": 814580
    },
    {
      "epoch": 2.8549348642084347,
      "grad_norm": 3.234375,
      "learning_rate": 2.6864594447949204e-06,
      "loss": 0.8262,
      "step": 814590
    },
    {
      "epoch": 2.85496991171533,
      "grad_norm": 2.890625,
      "learning_rate": 2.685810416131218e-06,
      "loss": 0.7915,
      "step": 814600
    },
    {
      "epoch": 2.8550049592222257,
      "grad_norm": 2.734375,
      "learning_rate": 2.6851613874675164e-06,
      "loss": 0.7819,
      "step": 814610
    },
    {
      "epoch": 2.855040006729121,
      "grad_norm": 3.15625,
      "learning_rate": 2.6845123588038144e-06,
      "loss": 0.8556,
      "step": 814620
    },
    {
      "epoch": 2.855075054236017,
      "grad_norm": 3.40625,
      "learning_rate": 2.6838633301401124e-06,
      "loss": 0.7981,
      "step": 814630
    },
    {
      "epoch": 2.8551101017429126,
      "grad_norm": 2.546875,
      "learning_rate": 2.6832143014764104e-06,
      "loss": 0.7896,
      "step": 814640
    },
    {
      "epoch": 2.8551451492498083,
      "grad_norm": 2.9375,
      "learning_rate": 2.682565272812709e-06,
      "loss": 0.7852,
      "step": 814650
    },
    {
      "epoch": 2.8551801967567036,
      "grad_norm": 3.28125,
      "learning_rate": 2.681916244149007e-06,
      "loss": 0.8035,
      "step": 814660
    },
    {
      "epoch": 2.8552152442635994,
      "grad_norm": 2.765625,
      "learning_rate": 2.681267215485305e-06,
      "loss": 0.7937,
      "step": 814670
    },
    {
      "epoch": 2.8552502917704947,
      "grad_norm": 2.46875,
      "learning_rate": 2.680618186821603e-06,
      "loss": 0.7995,
      "step": 814680
    },
    {
      "epoch": 2.8552853392773905,
      "grad_norm": 2.859375,
      "learning_rate": 2.679969158157901e-06,
      "loss": 0.7634,
      "step": 814690
    },
    {
      "epoch": 2.8553203867842862,
      "grad_norm": 2.5,
      "learning_rate": 2.6793201294941992e-06,
      "loss": 0.8055,
      "step": 814700
    },
    {
      "epoch": 2.8553554342911815,
      "grad_norm": 3.25,
      "learning_rate": 2.6786711008304972e-06,
      "loss": 0.7537,
      "step": 814710
    },
    {
      "epoch": 2.8553904817980773,
      "grad_norm": 2.9375,
      "learning_rate": 2.6780220721667952e-06,
      "loss": 0.7514,
      "step": 814720
    },
    {
      "epoch": 2.8554255293049726,
      "grad_norm": 3.140625,
      "learning_rate": 2.6773730435030937e-06,
      "loss": 0.776,
      "step": 814730
    },
    {
      "epoch": 2.8554605768118684,
      "grad_norm": 2.75,
      "learning_rate": 2.6767240148393912e-06,
      "loss": 0.8175,
      "step": 814740
    },
    {
      "epoch": 2.855495624318764,
      "grad_norm": 3.375,
      "learning_rate": 2.6760749861756897e-06,
      "loss": 0.8001,
      "step": 814750
    },
    {
      "epoch": 2.85553067182566,
      "grad_norm": 2.703125,
      "learning_rate": 2.6754259575119877e-06,
      "loss": 0.792,
      "step": 814760
    },
    {
      "epoch": 2.855565719332555,
      "grad_norm": 3.0625,
      "learning_rate": 2.6747769288482857e-06,
      "loss": 0.7891,
      "step": 814770
    },
    {
      "epoch": 2.855600766839451,
      "grad_norm": 2.640625,
      "learning_rate": 2.674127900184584e-06,
      "loss": 0.7937,
      "step": 814780
    },
    {
      "epoch": 2.8556358143463463,
      "grad_norm": 3.140625,
      "learning_rate": 2.673478871520882e-06,
      "loss": 0.7609,
      "step": 814790
    },
    {
      "epoch": 2.855670861853242,
      "grad_norm": 2.734375,
      "learning_rate": 2.67282984285718e-06,
      "loss": 0.7741,
      "step": 814800
    },
    {
      "epoch": 2.855705909360138,
      "grad_norm": 2.84375,
      "learning_rate": 2.672180814193478e-06,
      "loss": 0.8148,
      "step": 814810
    },
    {
      "epoch": 2.855740956867033,
      "grad_norm": 2.875,
      "learning_rate": 2.6715317855297765e-06,
      "loss": 0.8112,
      "step": 814820
    },
    {
      "epoch": 2.855776004373929,
      "grad_norm": 3.109375,
      "learning_rate": 2.670882756866074e-06,
      "loss": 0.8359,
      "step": 814830
    },
    {
      "epoch": 2.855811051880824,
      "grad_norm": 2.671875,
      "learning_rate": 2.6702337282023725e-06,
      "loss": 0.7242,
      "step": 814840
    },
    {
      "epoch": 2.85584609938772,
      "grad_norm": 3.234375,
      "learning_rate": 2.6695846995386705e-06,
      "loss": 0.8513,
      "step": 814850
    },
    {
      "epoch": 2.8558811468946157,
      "grad_norm": 3.0625,
      "learning_rate": 2.6689356708749685e-06,
      "loss": 0.7787,
      "step": 814860
    },
    {
      "epoch": 2.8559161944015115,
      "grad_norm": 3.046875,
      "learning_rate": 2.668286642211267e-06,
      "loss": 0.8435,
      "step": 814870
    },
    {
      "epoch": 2.8559512419084068,
      "grad_norm": 2.828125,
      "learning_rate": 2.667637613547565e-06,
      "loss": 0.7775,
      "step": 814880
    },
    {
      "epoch": 2.8559862894153025,
      "grad_norm": 2.828125,
      "learning_rate": 2.666988584883863e-06,
      "loss": 0.8511,
      "step": 814890
    },
    {
      "epoch": 2.856021336922198,
      "grad_norm": 3.734375,
      "learning_rate": 2.666339556220161e-06,
      "loss": 0.8557,
      "step": 814900
    },
    {
      "epoch": 2.8560563844290936,
      "grad_norm": 2.578125,
      "learning_rate": 2.665690527556459e-06,
      "loss": 0.7755,
      "step": 814910
    },
    {
      "epoch": 2.8560914319359894,
      "grad_norm": 2.953125,
      "learning_rate": 2.6650414988927573e-06,
      "loss": 0.7171,
      "step": 814920
    },
    {
      "epoch": 2.8561264794428847,
      "grad_norm": 2.78125,
      "learning_rate": 2.6643924702290553e-06,
      "loss": 0.8103,
      "step": 814930
    },
    {
      "epoch": 2.8561615269497804,
      "grad_norm": 3.71875,
      "learning_rate": 2.6637434415653533e-06,
      "loss": 0.8437,
      "step": 814940
    },
    {
      "epoch": 2.856196574456676,
      "grad_norm": 2.890625,
      "learning_rate": 2.6630944129016513e-06,
      "loss": 0.8154,
      "step": 814950
    },
    {
      "epoch": 2.8562316219635715,
      "grad_norm": 3.125,
      "learning_rate": 2.6624453842379498e-06,
      "loss": 0.8249,
      "step": 814960
    },
    {
      "epoch": 2.8562666694704673,
      "grad_norm": 2.921875,
      "learning_rate": 2.6617963555742473e-06,
      "loss": 0.7936,
      "step": 814970
    },
    {
      "epoch": 2.856301716977363,
      "grad_norm": 2.84375,
      "learning_rate": 2.6611473269105458e-06,
      "loss": 0.7691,
      "step": 814980
    },
    {
      "epoch": 2.8563367644842583,
      "grad_norm": 2.90625,
      "learning_rate": 2.660498298246844e-06,
      "loss": 0.8445,
      "step": 814990
    },
    {
      "epoch": 2.856371811991154,
      "grad_norm": 2.796875,
      "learning_rate": 2.6598492695831418e-06,
      "loss": 0.7571,
      "step": 815000
    },
    {
      "epoch": 2.856371811991154,
      "eval_loss": 0.750056266784668,
      "eval_runtime": 553.1159,
      "eval_samples_per_second": 687.805,
      "eval_steps_per_second": 57.317,
      "step": 815000
    },
    {
      "epoch": 2.8564068594980494,
      "grad_norm": 2.96875,
      "learning_rate": 2.65920024091944e-06,
      "loss": 0.7618,
      "step": 815010
    },
    {
      "epoch": 2.856441907004945,
      "grad_norm": 2.609375,
      "learning_rate": 2.658551212255738e-06,
      "loss": 0.7566,
      "step": 815020
    },
    {
      "epoch": 2.856476954511841,
      "grad_norm": 2.671875,
      "learning_rate": 2.657902183592036e-06,
      "loss": 0.8599,
      "step": 815030
    },
    {
      "epoch": 2.8565120020187367,
      "grad_norm": 3.390625,
      "learning_rate": 2.657253154928334e-06,
      "loss": 0.8056,
      "step": 815040
    },
    {
      "epoch": 2.856547049525632,
      "grad_norm": 2.53125,
      "learning_rate": 2.6566041262646326e-06,
      "loss": 0.8392,
      "step": 815050
    },
    {
      "epoch": 2.8565820970325277,
      "grad_norm": 2.828125,
      "learning_rate": 2.6559550976009306e-06,
      "loss": 0.7803,
      "step": 815060
    },
    {
      "epoch": 2.856617144539423,
      "grad_norm": 2.4375,
      "learning_rate": 2.6553060689372286e-06,
      "loss": 0.734,
      "step": 815070
    },
    {
      "epoch": 2.856652192046319,
      "grad_norm": 2.5,
      "learning_rate": 2.654657040273527e-06,
      "loss": 0.8761,
      "step": 815080
    },
    {
      "epoch": 2.8566872395532146,
      "grad_norm": 2.734375,
      "learning_rate": 2.6540080116098246e-06,
      "loss": 0.7763,
      "step": 815090
    },
    {
      "epoch": 2.85672228706011,
      "grad_norm": 2.25,
      "learning_rate": 2.653358982946123e-06,
      "loss": 0.7349,
      "step": 815100
    },
    {
      "epoch": 2.8567573345670056,
      "grad_norm": 3.0,
      "learning_rate": 2.652709954282421e-06,
      "loss": 0.8347,
      "step": 815110
    },
    {
      "epoch": 2.856792382073901,
      "grad_norm": 3.359375,
      "learning_rate": 2.652060925618719e-06,
      "loss": 0.8095,
      "step": 815120
    },
    {
      "epoch": 2.8568274295807967,
      "grad_norm": 2.984375,
      "learning_rate": 2.6514118969550175e-06,
      "loss": 0.7652,
      "step": 815130
    },
    {
      "epoch": 2.8568624770876925,
      "grad_norm": 3.109375,
      "learning_rate": 2.6507628682913155e-06,
      "loss": 0.778,
      "step": 815140
    },
    {
      "epoch": 2.8568975245945882,
      "grad_norm": 2.578125,
      "learning_rate": 2.6501138396276135e-06,
      "loss": 0.7531,
      "step": 815150
    },
    {
      "epoch": 2.8569325721014835,
      "grad_norm": 2.96875,
      "learning_rate": 2.6494648109639115e-06,
      "loss": 0.7815,
      "step": 815160
    },
    {
      "epoch": 2.8569676196083793,
      "grad_norm": 2.9375,
      "learning_rate": 2.6488157823002095e-06,
      "loss": 0.7627,
      "step": 815170
    },
    {
      "epoch": 2.8570026671152746,
      "grad_norm": 2.578125,
      "learning_rate": 2.648166753636508e-06,
      "loss": 0.7957,
      "step": 815180
    },
    {
      "epoch": 2.8570377146221704,
      "grad_norm": 2.875,
      "learning_rate": 2.647517724972806e-06,
      "loss": 0.8058,
      "step": 815190
    },
    {
      "epoch": 2.857072762129066,
      "grad_norm": 2.703125,
      "learning_rate": 2.646868696309104e-06,
      "loss": 0.7589,
      "step": 815200
    },
    {
      "epoch": 2.8571078096359614,
      "grad_norm": 3.078125,
      "learning_rate": 2.646219667645402e-06,
      "loss": 0.8722,
      "step": 815210
    },
    {
      "epoch": 2.857142857142857,
      "grad_norm": 3.40625,
      "learning_rate": 2.6455706389817003e-06,
      "loss": 0.7943,
      "step": 815220
    },
    {
      "epoch": 2.8571779046497525,
      "grad_norm": 2.71875,
      "learning_rate": 2.644921610317998e-06,
      "loss": 0.7528,
      "step": 815230
    },
    {
      "epoch": 2.8572129521566483,
      "grad_norm": 2.78125,
      "learning_rate": 2.6442725816542963e-06,
      "loss": 0.7864,
      "step": 815240
    },
    {
      "epoch": 2.857247999663544,
      "grad_norm": 2.984375,
      "learning_rate": 2.6436235529905947e-06,
      "loss": 0.7944,
      "step": 815250
    },
    {
      "epoch": 2.85728304717044,
      "grad_norm": 3.28125,
      "learning_rate": 2.6429745243268923e-06,
      "loss": 0.8931,
      "step": 815260
    },
    {
      "epoch": 2.857318094677335,
      "grad_norm": 2.890625,
      "learning_rate": 2.6423254956631907e-06,
      "loss": 0.8308,
      "step": 815270
    },
    {
      "epoch": 2.857353142184231,
      "grad_norm": 2.96875,
      "learning_rate": 2.6416764669994887e-06,
      "loss": 0.8122,
      "step": 815280
    },
    {
      "epoch": 2.857388189691126,
      "grad_norm": 2.953125,
      "learning_rate": 2.6410274383357867e-06,
      "loss": 0.7859,
      "step": 815290
    },
    {
      "epoch": 2.857423237198022,
      "grad_norm": 2.90625,
      "learning_rate": 2.6403784096720847e-06,
      "loss": 0.8122,
      "step": 815300
    },
    {
      "epoch": 2.8574582847049177,
      "grad_norm": 2.984375,
      "learning_rate": 2.639729381008383e-06,
      "loss": 0.7997,
      "step": 815310
    },
    {
      "epoch": 2.857493332211813,
      "grad_norm": 3.234375,
      "learning_rate": 2.639080352344681e-06,
      "loss": 0.7808,
      "step": 815320
    },
    {
      "epoch": 2.8575283797187088,
      "grad_norm": 2.734375,
      "learning_rate": 2.638431323680979e-06,
      "loss": 0.7714,
      "step": 815330
    },
    {
      "epoch": 2.857563427225604,
      "grad_norm": 3.046875,
      "learning_rate": 2.6377822950172776e-06,
      "loss": 0.8722,
      "step": 815340
    },
    {
      "epoch": 2.8575984747325,
      "grad_norm": 2.75,
      "learning_rate": 2.637133266353575e-06,
      "loss": 0.8157,
      "step": 815350
    },
    {
      "epoch": 2.8576335222393956,
      "grad_norm": 3.234375,
      "learning_rate": 2.6364842376898736e-06,
      "loss": 0.7794,
      "step": 815360
    },
    {
      "epoch": 2.8576685697462914,
      "grad_norm": 2.640625,
      "learning_rate": 2.6358352090261716e-06,
      "loss": 0.8099,
      "step": 815370
    },
    {
      "epoch": 2.8577036172531867,
      "grad_norm": 3.0,
      "learning_rate": 2.6351861803624696e-06,
      "loss": 0.8766,
      "step": 815380
    },
    {
      "epoch": 2.8577386647600824,
      "grad_norm": 3.140625,
      "learning_rate": 2.634537151698768e-06,
      "loss": 0.8304,
      "step": 815390
    },
    {
      "epoch": 2.8577737122669777,
      "grad_norm": 2.828125,
      "learning_rate": 2.633888123035066e-06,
      "loss": 0.761,
      "step": 815400
    },
    {
      "epoch": 2.8578087597738735,
      "grad_norm": 2.84375,
      "learning_rate": 2.633239094371364e-06,
      "loss": 0.7924,
      "step": 815410
    },
    {
      "epoch": 2.8578438072807693,
      "grad_norm": 3.03125,
      "learning_rate": 2.632590065707662e-06,
      "loss": 0.8353,
      "step": 815420
    },
    {
      "epoch": 2.8578788547876646,
      "grad_norm": 2.828125,
      "learning_rate": 2.63194103704396e-06,
      "loss": 0.8287,
      "step": 815430
    },
    {
      "epoch": 2.8579139022945603,
      "grad_norm": 3.3125,
      "learning_rate": 2.631292008380258e-06,
      "loss": 0.7458,
      "step": 815440
    },
    {
      "epoch": 2.8579489498014556,
      "grad_norm": 3.078125,
      "learning_rate": 2.6306429797165564e-06,
      "loss": 0.8343,
      "step": 815450
    },
    {
      "epoch": 2.8579839973083514,
      "grad_norm": 3.0625,
      "learning_rate": 2.6299939510528544e-06,
      "loss": 0.8645,
      "step": 815460
    },
    {
      "epoch": 2.858019044815247,
      "grad_norm": 3.4375,
      "learning_rate": 2.6293449223891524e-06,
      "loss": 0.8121,
      "step": 815470
    },
    {
      "epoch": 2.858054092322143,
      "grad_norm": 3.09375,
      "learning_rate": 2.628695893725451e-06,
      "loss": 0.8286,
      "step": 815480
    },
    {
      "epoch": 2.8580891398290382,
      "grad_norm": 2.671875,
      "learning_rate": 2.6280468650617484e-06,
      "loss": 0.8217,
      "step": 815490
    },
    {
      "epoch": 2.858124187335934,
      "grad_norm": 2.8125,
      "learning_rate": 2.627397836398047e-06,
      "loss": 0.7975,
      "step": 815500
    },
    {
      "epoch": 2.8581592348428293,
      "grad_norm": 2.8125,
      "learning_rate": 2.626748807734345e-06,
      "loss": 0.7517,
      "step": 815510
    },
    {
      "epoch": 2.858194282349725,
      "grad_norm": 3.03125,
      "learning_rate": 2.626099779070643e-06,
      "loss": 0.7366,
      "step": 815520
    },
    {
      "epoch": 2.858229329856621,
      "grad_norm": 3.34375,
      "learning_rate": 2.6254507504069413e-06,
      "loss": 0.7796,
      "step": 815530
    },
    {
      "epoch": 2.858264377363516,
      "grad_norm": 3.28125,
      "learning_rate": 2.6248017217432393e-06,
      "loss": 0.8287,
      "step": 815540
    },
    {
      "epoch": 2.858299424870412,
      "grad_norm": 2.59375,
      "learning_rate": 2.6241526930795373e-06,
      "loss": 0.868,
      "step": 815550
    },
    {
      "epoch": 2.858334472377307,
      "grad_norm": 2.234375,
      "learning_rate": 2.6235036644158353e-06,
      "loss": 0.7266,
      "step": 815560
    },
    {
      "epoch": 2.858369519884203,
      "grad_norm": 2.859375,
      "learning_rate": 2.6228546357521337e-06,
      "loss": 0.7812,
      "step": 815570
    },
    {
      "epoch": 2.8584045673910987,
      "grad_norm": 2.890625,
      "learning_rate": 2.6222056070884313e-06,
      "loss": 0.7986,
      "step": 815580
    },
    {
      "epoch": 2.8584396148979945,
      "grad_norm": 2.78125,
      "learning_rate": 2.6215565784247297e-06,
      "loss": 0.8343,
      "step": 815590
    },
    {
      "epoch": 2.85847466240489,
      "grad_norm": 3.15625,
      "learning_rate": 2.6209075497610277e-06,
      "loss": 0.7744,
      "step": 815600
    },
    {
      "epoch": 2.8585097099117855,
      "grad_norm": 2.90625,
      "learning_rate": 2.6202585210973257e-06,
      "loss": 0.7334,
      "step": 815610
    },
    {
      "epoch": 2.858544757418681,
      "grad_norm": 3.0625,
      "learning_rate": 2.619609492433624e-06,
      "loss": 0.7898,
      "step": 815620
    },
    {
      "epoch": 2.8585798049255766,
      "grad_norm": 2.703125,
      "learning_rate": 2.618960463769922e-06,
      "loss": 0.8224,
      "step": 815630
    },
    {
      "epoch": 2.8586148524324724,
      "grad_norm": 2.9375,
      "learning_rate": 2.61831143510622e-06,
      "loss": 0.7804,
      "step": 815640
    },
    {
      "epoch": 2.8586498999393677,
      "grad_norm": 2.796875,
      "learning_rate": 2.6176624064425185e-06,
      "loss": 0.7864,
      "step": 815650
    },
    {
      "epoch": 2.8586849474462634,
      "grad_norm": 2.5,
      "learning_rate": 2.617013377778816e-06,
      "loss": 0.8128,
      "step": 815660
    },
    {
      "epoch": 2.8587199949531588,
      "grad_norm": 3.1875,
      "learning_rate": 2.6163643491151145e-06,
      "loss": 0.7803,
      "step": 815670
    },
    {
      "epoch": 2.8587550424600545,
      "grad_norm": 3.421875,
      "learning_rate": 2.6157153204514125e-06,
      "loss": 0.8124,
      "step": 815680
    },
    {
      "epoch": 2.8587900899669503,
      "grad_norm": 3.125,
      "learning_rate": 2.6150662917877105e-06,
      "loss": 0.8924,
      "step": 815690
    },
    {
      "epoch": 2.858825137473846,
      "grad_norm": 2.5625,
      "learning_rate": 2.6144172631240085e-06,
      "loss": 0.801,
      "step": 815700
    },
    {
      "epoch": 2.8588601849807413,
      "grad_norm": 2.953125,
      "learning_rate": 2.613768234460307e-06,
      "loss": 0.8687,
      "step": 815710
    },
    {
      "epoch": 2.858895232487637,
      "grad_norm": 3.015625,
      "learning_rate": 2.613119205796605e-06,
      "loss": 0.7634,
      "step": 815720
    },
    {
      "epoch": 2.8589302799945324,
      "grad_norm": 2.578125,
      "learning_rate": 2.612470177132903e-06,
      "loss": 0.7437,
      "step": 815730
    },
    {
      "epoch": 2.858965327501428,
      "grad_norm": 2.796875,
      "learning_rate": 2.6118211484692014e-06,
      "loss": 0.7753,
      "step": 815740
    },
    {
      "epoch": 2.859000375008324,
      "grad_norm": 2.8125,
      "learning_rate": 2.611172119805499e-06,
      "loss": 0.7248,
      "step": 815750
    },
    {
      "epoch": 2.8590354225152192,
      "grad_norm": 2.984375,
      "learning_rate": 2.6105230911417974e-06,
      "loss": 0.7715,
      "step": 815760
    },
    {
      "epoch": 2.859070470022115,
      "grad_norm": 2.984375,
      "learning_rate": 2.6098740624780954e-06,
      "loss": 0.8329,
      "step": 815770
    },
    {
      "epoch": 2.8591055175290103,
      "grad_norm": 3.3125,
      "learning_rate": 2.6092250338143934e-06,
      "loss": 0.8348,
      "step": 815780
    },
    {
      "epoch": 2.859140565035906,
      "grad_norm": 3.15625,
      "learning_rate": 2.608576005150692e-06,
      "loss": 0.7635,
      "step": 815790
    },
    {
      "epoch": 2.859175612542802,
      "grad_norm": 3.28125,
      "learning_rate": 2.60792697648699e-06,
      "loss": 0.8735,
      "step": 815800
    },
    {
      "epoch": 2.8592106600496976,
      "grad_norm": 2.796875,
      "learning_rate": 2.607277947823288e-06,
      "loss": 0.827,
      "step": 815810
    },
    {
      "epoch": 2.859245707556593,
      "grad_norm": 2.671875,
      "learning_rate": 2.606628919159586e-06,
      "loss": 0.8584,
      "step": 815820
    },
    {
      "epoch": 2.8592807550634887,
      "grad_norm": 3.8125,
      "learning_rate": 2.6059798904958842e-06,
      "loss": 0.7785,
      "step": 815830
    },
    {
      "epoch": 2.859315802570384,
      "grad_norm": 2.734375,
      "learning_rate": 2.605330861832182e-06,
      "loss": 0.8016,
      "step": 815840
    },
    {
      "epoch": 2.8593508500772797,
      "grad_norm": 3.078125,
      "learning_rate": 2.6046818331684802e-06,
      "loss": 0.873,
      "step": 815850
    },
    {
      "epoch": 2.8593858975841755,
      "grad_norm": 2.8125,
      "learning_rate": 2.6040328045047782e-06,
      "loss": 0.8251,
      "step": 815860
    },
    {
      "epoch": 2.859420945091071,
      "grad_norm": 2.765625,
      "learning_rate": 2.6033837758410762e-06,
      "loss": 0.795,
      "step": 815870
    },
    {
      "epoch": 2.8594559925979666,
      "grad_norm": 2.96875,
      "learning_rate": 2.6027347471773747e-06,
      "loss": 0.8017,
      "step": 815880
    },
    {
      "epoch": 2.859491040104862,
      "grad_norm": 2.828125,
      "learning_rate": 2.6020857185136727e-06,
      "loss": 0.7846,
      "step": 815890
    },
    {
      "epoch": 2.8595260876117576,
      "grad_norm": 3.09375,
      "learning_rate": 2.6014366898499707e-06,
      "loss": 0.7972,
      "step": 815900
    },
    {
      "epoch": 2.8595611351186534,
      "grad_norm": 2.734375,
      "learning_rate": 2.6007876611862687e-06,
      "loss": 0.7899,
      "step": 815910
    },
    {
      "epoch": 2.859596182625549,
      "grad_norm": 3.140625,
      "learning_rate": 2.6001386325225667e-06,
      "loss": 0.7979,
      "step": 815920
    },
    {
      "epoch": 2.8596312301324445,
      "grad_norm": 3.140625,
      "learning_rate": 2.599489603858865e-06,
      "loss": 0.7805,
      "step": 815930
    },
    {
      "epoch": 2.8596662776393402,
      "grad_norm": 3.5,
      "learning_rate": 2.598840575195163e-06,
      "loss": 0.7936,
      "step": 815940
    },
    {
      "epoch": 2.8597013251462355,
      "grad_norm": 3.046875,
      "learning_rate": 2.598191546531461e-06,
      "loss": 0.8464,
      "step": 815950
    },
    {
      "epoch": 2.8597363726531313,
      "grad_norm": 3.03125,
      "learning_rate": 2.597542517867759e-06,
      "loss": 0.8123,
      "step": 815960
    },
    {
      "epoch": 2.859771420160027,
      "grad_norm": 2.984375,
      "learning_rate": 2.5968934892040575e-06,
      "loss": 0.815,
      "step": 815970
    },
    {
      "epoch": 2.8598064676669224,
      "grad_norm": 3.046875,
      "learning_rate": 2.596244460540355e-06,
      "loss": 0.8012,
      "step": 815980
    },
    {
      "epoch": 2.859841515173818,
      "grad_norm": 2.78125,
      "learning_rate": 2.5955954318766535e-06,
      "loss": 0.7368,
      "step": 815990
    },
    {
      "epoch": 2.8598765626807134,
      "grad_norm": 3.109375,
      "learning_rate": 2.594946403212952e-06,
      "loss": 0.758,
      "step": 816000
    },
    {
      "epoch": 2.859911610187609,
      "grad_norm": 3.0,
      "learning_rate": 2.5942973745492495e-06,
      "loss": 0.8287,
      "step": 816010
    },
    {
      "epoch": 2.859946657694505,
      "grad_norm": 3.140625,
      "learning_rate": 2.593648345885548e-06,
      "loss": 0.7791,
      "step": 816020
    },
    {
      "epoch": 2.8599817052014007,
      "grad_norm": 2.640625,
      "learning_rate": 2.592999317221846e-06,
      "loss": 0.8318,
      "step": 816030
    },
    {
      "epoch": 2.860016752708296,
      "grad_norm": 3.171875,
      "learning_rate": 2.592350288558144e-06,
      "loss": 0.8296,
      "step": 816040
    },
    {
      "epoch": 2.860051800215192,
      "grad_norm": 2.921875,
      "learning_rate": 2.591701259894442e-06,
      "loss": 0.8611,
      "step": 816050
    },
    {
      "epoch": 2.860086847722087,
      "grad_norm": 2.828125,
      "learning_rate": 2.5910522312307403e-06,
      "loss": 0.7366,
      "step": 816060
    },
    {
      "epoch": 2.860121895228983,
      "grad_norm": 2.734375,
      "learning_rate": 2.5904032025670383e-06,
      "loss": 0.8931,
      "step": 816070
    },
    {
      "epoch": 2.8601569427358786,
      "grad_norm": 2.890625,
      "learning_rate": 2.5897541739033363e-06,
      "loss": 0.8125,
      "step": 816080
    },
    {
      "epoch": 2.860191990242774,
      "grad_norm": 2.78125,
      "learning_rate": 2.5891051452396343e-06,
      "loss": 0.7958,
      "step": 816090
    },
    {
      "epoch": 2.8602270377496697,
      "grad_norm": 2.90625,
      "learning_rate": 2.5884561165759323e-06,
      "loss": 0.7144,
      "step": 816100
    },
    {
      "epoch": 2.860262085256565,
      "grad_norm": 2.890625,
      "learning_rate": 2.5878070879122308e-06,
      "loss": 0.8054,
      "step": 816110
    },
    {
      "epoch": 2.8602971327634608,
      "grad_norm": 3.0625,
      "learning_rate": 2.5871580592485288e-06,
      "loss": 0.8272,
      "step": 816120
    },
    {
      "epoch": 2.8603321802703565,
      "grad_norm": 3.171875,
      "learning_rate": 2.5865090305848268e-06,
      "loss": 0.8413,
      "step": 816130
    },
    {
      "epoch": 2.8603672277772523,
      "grad_norm": 3.375,
      "learning_rate": 2.585860001921125e-06,
      "loss": 0.7994,
      "step": 816140
    },
    {
      "epoch": 2.8604022752841476,
      "grad_norm": 2.78125,
      "learning_rate": 2.5852109732574228e-06,
      "loss": 0.8171,
      "step": 816150
    },
    {
      "epoch": 2.8604373227910433,
      "grad_norm": 2.75,
      "learning_rate": 2.584561944593721e-06,
      "loss": 0.8675,
      "step": 816160
    },
    {
      "epoch": 2.8604723702979387,
      "grad_norm": 3.171875,
      "learning_rate": 2.583912915930019e-06,
      "loss": 0.7697,
      "step": 816170
    },
    {
      "epoch": 2.8605074178048344,
      "grad_norm": 2.859375,
      "learning_rate": 2.583263887266317e-06,
      "loss": 0.7736,
      "step": 816180
    },
    {
      "epoch": 2.86054246531173,
      "grad_norm": 2.859375,
      "learning_rate": 2.5826148586026156e-06,
      "loss": 0.8877,
      "step": 816190
    },
    {
      "epoch": 2.8605775128186255,
      "grad_norm": 2.796875,
      "learning_rate": 2.5819658299389136e-06,
      "loss": 0.7325,
      "step": 816200
    },
    {
      "epoch": 2.8606125603255212,
      "grad_norm": 3.0,
      "learning_rate": 2.5813168012752116e-06,
      "loss": 0.9111,
      "step": 816210
    },
    {
      "epoch": 2.860647607832417,
      "grad_norm": 2.625,
      "learning_rate": 2.5806677726115096e-06,
      "loss": 0.8157,
      "step": 816220
    },
    {
      "epoch": 2.8606826553393123,
      "grad_norm": 2.921875,
      "learning_rate": 2.580018743947808e-06,
      "loss": 0.8645,
      "step": 816230
    },
    {
      "epoch": 2.860717702846208,
      "grad_norm": 2.671875,
      "learning_rate": 2.5793697152841056e-06,
      "loss": 0.7478,
      "step": 816240
    },
    {
      "epoch": 2.860752750353104,
      "grad_norm": 2.75,
      "learning_rate": 2.578720686620404e-06,
      "loss": 0.7834,
      "step": 816250
    },
    {
      "epoch": 2.860787797859999,
      "grad_norm": 2.921875,
      "learning_rate": 2.5780716579567025e-06,
      "loss": 0.7286,
      "step": 816260
    },
    {
      "epoch": 2.860822845366895,
      "grad_norm": 2.765625,
      "learning_rate": 2.577422629293e-06,
      "loss": 0.8424,
      "step": 816270
    },
    {
      "epoch": 2.86085789287379,
      "grad_norm": 3.40625,
      "learning_rate": 2.5767736006292985e-06,
      "loss": 0.7844,
      "step": 816280
    },
    {
      "epoch": 2.860892940380686,
      "grad_norm": 2.9375,
      "learning_rate": 2.5761245719655965e-06,
      "loss": 0.8161,
      "step": 816290
    },
    {
      "epoch": 2.8609279878875817,
      "grad_norm": 3.265625,
      "learning_rate": 2.5754755433018945e-06,
      "loss": 0.7923,
      "step": 816300
    },
    {
      "epoch": 2.860963035394477,
      "grad_norm": 2.921875,
      "learning_rate": 2.5748265146381925e-06,
      "loss": 0.8102,
      "step": 816310
    },
    {
      "epoch": 2.860998082901373,
      "grad_norm": 2.953125,
      "learning_rate": 2.574177485974491e-06,
      "loss": 0.8569,
      "step": 816320
    },
    {
      "epoch": 2.8610331304082686,
      "grad_norm": 3.0625,
      "learning_rate": 2.573528457310789e-06,
      "loss": 0.8696,
      "step": 816330
    },
    {
      "epoch": 2.861068177915164,
      "grad_norm": 2.953125,
      "learning_rate": 2.572879428647087e-06,
      "loss": 0.88,
      "step": 816340
    },
    {
      "epoch": 2.8611032254220596,
      "grad_norm": 2.984375,
      "learning_rate": 2.572230399983385e-06,
      "loss": 0.7519,
      "step": 816350
    },
    {
      "epoch": 2.8611382729289554,
      "grad_norm": 2.796875,
      "learning_rate": 2.571581371319683e-06,
      "loss": 0.7811,
      "step": 816360
    },
    {
      "epoch": 2.8611733204358507,
      "grad_norm": 2.859375,
      "learning_rate": 2.5709323426559813e-06,
      "loss": 0.7807,
      "step": 816370
    },
    {
      "epoch": 2.8612083679427465,
      "grad_norm": 3.171875,
      "learning_rate": 2.5702833139922793e-06,
      "loss": 0.7908,
      "step": 816380
    },
    {
      "epoch": 2.861243415449642,
      "grad_norm": 3.140625,
      "learning_rate": 2.5696342853285773e-06,
      "loss": 0.7628,
      "step": 816390
    },
    {
      "epoch": 2.8612784629565375,
      "grad_norm": 3.0,
      "learning_rate": 2.5689852566648757e-06,
      "loss": 0.7964,
      "step": 816400
    },
    {
      "epoch": 2.8613135104634333,
      "grad_norm": 3.078125,
      "learning_rate": 2.5683362280011733e-06,
      "loss": 0.8654,
      "step": 816410
    },
    {
      "epoch": 2.861348557970329,
      "grad_norm": 3.140625,
      "learning_rate": 2.5676871993374717e-06,
      "loss": 0.8291,
      "step": 816420
    },
    {
      "epoch": 2.8613836054772244,
      "grad_norm": 2.421875,
      "learning_rate": 2.5670381706737697e-06,
      "loss": 0.7627,
      "step": 816430
    },
    {
      "epoch": 2.86141865298412,
      "grad_norm": 2.59375,
      "learning_rate": 2.5663891420100677e-06,
      "loss": 0.7995,
      "step": 816440
    },
    {
      "epoch": 2.8614537004910154,
      "grad_norm": 2.53125,
      "learning_rate": 2.5657401133463657e-06,
      "loss": 0.7973,
      "step": 816450
    },
    {
      "epoch": 2.861488747997911,
      "grad_norm": 3.28125,
      "learning_rate": 2.565091084682664e-06,
      "loss": 0.8207,
      "step": 816460
    },
    {
      "epoch": 2.861523795504807,
      "grad_norm": 3.328125,
      "learning_rate": 2.564442056018962e-06,
      "loss": 0.772,
      "step": 816470
    },
    {
      "epoch": 2.8615588430117023,
      "grad_norm": 3.09375,
      "learning_rate": 2.56379302735526e-06,
      "loss": 0.8693,
      "step": 816480
    },
    {
      "epoch": 2.861593890518598,
      "grad_norm": 2.640625,
      "learning_rate": 2.5631439986915586e-06,
      "loss": 0.8528,
      "step": 816490
    },
    {
      "epoch": 2.8616289380254933,
      "grad_norm": 2.78125,
      "learning_rate": 2.562494970027856e-06,
      "loss": 0.7358,
      "step": 816500
    },
    {
      "epoch": 2.861663985532389,
      "grad_norm": 3.15625,
      "learning_rate": 2.5618459413641546e-06,
      "loss": 0.7874,
      "step": 816510
    },
    {
      "epoch": 2.861699033039285,
      "grad_norm": 3.109375,
      "learning_rate": 2.561196912700453e-06,
      "loss": 0.7838,
      "step": 816520
    },
    {
      "epoch": 2.8617340805461806,
      "grad_norm": 2.859375,
      "learning_rate": 2.5605478840367506e-06,
      "loss": 0.7708,
      "step": 816530
    },
    {
      "epoch": 2.861769128053076,
      "grad_norm": 2.609375,
      "learning_rate": 2.559898855373049e-06,
      "loss": 0.8149,
      "step": 816540
    },
    {
      "epoch": 2.8618041755599717,
      "grad_norm": 2.84375,
      "learning_rate": 2.559249826709347e-06,
      "loss": 0.8081,
      "step": 816550
    },
    {
      "epoch": 2.861839223066867,
      "grad_norm": 3.375,
      "learning_rate": 2.558600798045645e-06,
      "loss": 0.8149,
      "step": 816560
    },
    {
      "epoch": 2.8618742705737628,
      "grad_norm": 2.71875,
      "learning_rate": 2.557951769381943e-06,
      "loss": 0.8459,
      "step": 816570
    },
    {
      "epoch": 2.8619093180806585,
      "grad_norm": 3.15625,
      "learning_rate": 2.5573027407182414e-06,
      "loss": 0.809,
      "step": 816580
    },
    {
      "epoch": 2.861944365587554,
      "grad_norm": 3.125,
      "learning_rate": 2.5566537120545394e-06,
      "loss": 0.8229,
      "step": 816590
    },
    {
      "epoch": 2.8619794130944496,
      "grad_norm": 3.265625,
      "learning_rate": 2.5560046833908374e-06,
      "loss": 0.7992,
      "step": 816600
    },
    {
      "epoch": 2.862014460601345,
      "grad_norm": 2.8125,
      "learning_rate": 2.5553556547271354e-06,
      "loss": 0.7712,
      "step": 816610
    },
    {
      "epoch": 2.8620495081082407,
      "grad_norm": 2.890625,
      "learning_rate": 2.5547066260634334e-06,
      "loss": 0.8703,
      "step": 816620
    },
    {
      "epoch": 2.8620845556151364,
      "grad_norm": 2.765625,
      "learning_rate": 2.554057597399732e-06,
      "loss": 0.8017,
      "step": 816630
    },
    {
      "epoch": 2.862119603122032,
      "grad_norm": 2.84375,
      "learning_rate": 2.55340856873603e-06,
      "loss": 0.7697,
      "step": 816640
    },
    {
      "epoch": 2.8621546506289275,
      "grad_norm": 2.953125,
      "learning_rate": 2.552759540072328e-06,
      "loss": 0.7941,
      "step": 816650
    },
    {
      "epoch": 2.8621896981358232,
      "grad_norm": 2.8125,
      "learning_rate": 2.5521105114086263e-06,
      "loss": 0.8354,
      "step": 816660
    },
    {
      "epoch": 2.8622247456427186,
      "grad_norm": 3.078125,
      "learning_rate": 2.551461482744924e-06,
      "loss": 0.7896,
      "step": 816670
    },
    {
      "epoch": 2.8622597931496143,
      "grad_norm": 2.71875,
      "learning_rate": 2.5508124540812223e-06,
      "loss": 0.7627,
      "step": 816680
    },
    {
      "epoch": 2.86229484065651,
      "grad_norm": 2.609375,
      "learning_rate": 2.5501634254175203e-06,
      "loss": 0.8493,
      "step": 816690
    },
    {
      "epoch": 2.8623298881634054,
      "grad_norm": 3.625,
      "learning_rate": 2.5495143967538183e-06,
      "loss": 0.7899,
      "step": 816700
    },
    {
      "epoch": 2.862364935670301,
      "grad_norm": 3.015625,
      "learning_rate": 2.5488653680901163e-06,
      "loss": 0.8226,
      "step": 816710
    },
    {
      "epoch": 2.8623999831771965,
      "grad_norm": 2.984375,
      "learning_rate": 2.5482163394264147e-06,
      "loss": 0.7925,
      "step": 816720
    },
    {
      "epoch": 2.862435030684092,
      "grad_norm": 3.125,
      "learning_rate": 2.5475673107627127e-06,
      "loss": 0.8981,
      "step": 816730
    },
    {
      "epoch": 2.862470078190988,
      "grad_norm": 2.515625,
      "learning_rate": 2.5469182820990107e-06,
      "loss": 0.7886,
      "step": 816740
    },
    {
      "epoch": 2.8625051256978837,
      "grad_norm": 3.078125,
      "learning_rate": 2.546269253435309e-06,
      "loss": 0.7434,
      "step": 816750
    },
    {
      "epoch": 2.862540173204779,
      "grad_norm": 2.453125,
      "learning_rate": 2.5456202247716067e-06,
      "loss": 0.7687,
      "step": 816760
    },
    {
      "epoch": 2.862575220711675,
      "grad_norm": 3.296875,
      "learning_rate": 2.544971196107905e-06,
      "loss": 0.808,
      "step": 816770
    },
    {
      "epoch": 2.86261026821857,
      "grad_norm": 3.109375,
      "learning_rate": 2.544322167444203e-06,
      "loss": 0.7478,
      "step": 816780
    },
    {
      "epoch": 2.862645315725466,
      "grad_norm": 3.28125,
      "learning_rate": 2.543673138780501e-06,
      "loss": 0.8402,
      "step": 816790
    },
    {
      "epoch": 2.8626803632323616,
      "grad_norm": 2.5625,
      "learning_rate": 2.5430241101167995e-06,
      "loss": 0.7664,
      "step": 816800
    },
    {
      "epoch": 2.862715410739257,
      "grad_norm": 2.296875,
      "learning_rate": 2.5423750814530975e-06,
      "loss": 0.7538,
      "step": 816810
    },
    {
      "epoch": 2.8627504582461527,
      "grad_norm": 2.75,
      "learning_rate": 2.5417260527893955e-06,
      "loss": 0.8103,
      "step": 816820
    },
    {
      "epoch": 2.862785505753048,
      "grad_norm": 2.078125,
      "learning_rate": 2.5410770241256935e-06,
      "loss": 0.7912,
      "step": 816830
    },
    {
      "epoch": 2.862820553259944,
      "grad_norm": 2.96875,
      "learning_rate": 2.5404279954619915e-06,
      "loss": 0.8351,
      "step": 816840
    },
    {
      "epoch": 2.8628556007668395,
      "grad_norm": 2.484375,
      "learning_rate": 2.5397789667982895e-06,
      "loss": 0.8436,
      "step": 816850
    },
    {
      "epoch": 2.8628906482737353,
      "grad_norm": 2.734375,
      "learning_rate": 2.539129938134588e-06,
      "loss": 0.871,
      "step": 816860
    },
    {
      "epoch": 2.8629256957806306,
      "grad_norm": 3.28125,
      "learning_rate": 2.538480909470886e-06,
      "loss": 0.8481,
      "step": 816870
    },
    {
      "epoch": 2.8629607432875264,
      "grad_norm": 3.046875,
      "learning_rate": 2.537831880807184e-06,
      "loss": 0.7439,
      "step": 816880
    },
    {
      "epoch": 2.8629957907944217,
      "grad_norm": 2.96875,
      "learning_rate": 2.5371828521434824e-06,
      "loss": 0.7267,
      "step": 816890
    },
    {
      "epoch": 2.8630308383013174,
      "grad_norm": 2.609375,
      "learning_rate": 2.53653382347978e-06,
      "loss": 0.7912,
      "step": 816900
    },
    {
      "epoch": 2.863065885808213,
      "grad_norm": 2.828125,
      "learning_rate": 2.5358847948160784e-06,
      "loss": 0.8467,
      "step": 816910
    },
    {
      "epoch": 2.8631009333151085,
      "grad_norm": 3.0625,
      "learning_rate": 2.5352357661523764e-06,
      "loss": 0.862,
      "step": 816920
    },
    {
      "epoch": 2.8631359808220043,
      "grad_norm": 3.015625,
      "learning_rate": 2.5345867374886744e-06,
      "loss": 0.8124,
      "step": 816930
    },
    {
      "epoch": 2.8631710283288996,
      "grad_norm": 2.59375,
      "learning_rate": 2.533937708824973e-06,
      "loss": 0.8444,
      "step": 816940
    },
    {
      "epoch": 2.8632060758357953,
      "grad_norm": 2.421875,
      "learning_rate": 2.533288680161271e-06,
      "loss": 0.8057,
      "step": 816950
    },
    {
      "epoch": 2.863241123342691,
      "grad_norm": 2.625,
      "learning_rate": 2.532639651497569e-06,
      "loss": 0.7804,
      "step": 816960
    },
    {
      "epoch": 2.863276170849587,
      "grad_norm": 3.234375,
      "learning_rate": 2.531990622833867e-06,
      "loss": 0.8433,
      "step": 816970
    },
    {
      "epoch": 2.863311218356482,
      "grad_norm": 3.0,
      "learning_rate": 2.5313415941701652e-06,
      "loss": 0.7585,
      "step": 816980
    },
    {
      "epoch": 2.863346265863378,
      "grad_norm": 2.9375,
      "learning_rate": 2.5306925655064632e-06,
      "loss": 0.766,
      "step": 816990
    },
    {
      "epoch": 2.8633813133702732,
      "grad_norm": 2.859375,
      "learning_rate": 2.5300435368427612e-06,
      "loss": 0.7531,
      "step": 817000
    },
    {
      "epoch": 2.863416360877169,
      "grad_norm": 2.5625,
      "learning_rate": 2.5293945081790597e-06,
      "loss": 0.8273,
      "step": 817010
    },
    {
      "epoch": 2.8634514083840648,
      "grad_norm": 2.859375,
      "learning_rate": 2.5287454795153572e-06,
      "loss": 0.8128,
      "step": 817020
    },
    {
      "epoch": 2.86348645589096,
      "grad_norm": 2.96875,
      "learning_rate": 2.5280964508516557e-06,
      "loss": 0.7417,
      "step": 817030
    },
    {
      "epoch": 2.863521503397856,
      "grad_norm": 2.890625,
      "learning_rate": 2.5274474221879537e-06,
      "loss": 0.7993,
      "step": 817040
    },
    {
      "epoch": 2.863556550904751,
      "grad_norm": 2.640625,
      "learning_rate": 2.5267983935242517e-06,
      "loss": 0.8307,
      "step": 817050
    },
    {
      "epoch": 2.863591598411647,
      "grad_norm": 2.546875,
      "learning_rate": 2.52614936486055e-06,
      "loss": 0.7603,
      "step": 817060
    },
    {
      "epoch": 2.8636266459185427,
      "grad_norm": 2.640625,
      "learning_rate": 2.525500336196848e-06,
      "loss": 0.7847,
      "step": 817070
    },
    {
      "epoch": 2.8636616934254384,
      "grad_norm": 2.921875,
      "learning_rate": 2.524851307533146e-06,
      "loss": 0.7951,
      "step": 817080
    },
    {
      "epoch": 2.8636967409323337,
      "grad_norm": 2.796875,
      "learning_rate": 2.524202278869444e-06,
      "loss": 0.8857,
      "step": 817090
    },
    {
      "epoch": 2.8637317884392295,
      "grad_norm": 3.109375,
      "learning_rate": 2.523553250205742e-06,
      "loss": 0.7803,
      "step": 817100
    },
    {
      "epoch": 2.863766835946125,
      "grad_norm": 2.9375,
      "learning_rate": 2.52290422154204e-06,
      "loss": 0.8119,
      "step": 817110
    },
    {
      "epoch": 2.8638018834530206,
      "grad_norm": 3.046875,
      "learning_rate": 2.5222551928783385e-06,
      "loss": 0.8028,
      "step": 817120
    },
    {
      "epoch": 2.8638369309599163,
      "grad_norm": 2.578125,
      "learning_rate": 2.5216061642146365e-06,
      "loss": 0.7791,
      "step": 817130
    },
    {
      "epoch": 2.8638719784668116,
      "grad_norm": 3.09375,
      "learning_rate": 2.5209571355509345e-06,
      "loss": 0.8623,
      "step": 817140
    },
    {
      "epoch": 2.8639070259737074,
      "grad_norm": 3.515625,
      "learning_rate": 2.520308106887233e-06,
      "loss": 0.7784,
      "step": 817150
    },
    {
      "epoch": 2.8639420734806027,
      "grad_norm": 2.703125,
      "learning_rate": 2.5196590782235305e-06,
      "loss": 0.7678,
      "step": 817160
    },
    {
      "epoch": 2.8639771209874985,
      "grad_norm": 3.046875,
      "learning_rate": 2.519010049559829e-06,
      "loss": 0.8219,
      "step": 817170
    },
    {
      "epoch": 2.864012168494394,
      "grad_norm": 3.171875,
      "learning_rate": 2.518361020896127e-06,
      "loss": 0.8035,
      "step": 817180
    },
    {
      "epoch": 2.86404721600129,
      "grad_norm": 2.921875,
      "learning_rate": 2.517711992232425e-06,
      "loss": 0.7724,
      "step": 817190
    },
    {
      "epoch": 2.8640822635081853,
      "grad_norm": 2.828125,
      "learning_rate": 2.5170629635687233e-06,
      "loss": 0.7961,
      "step": 817200
    },
    {
      "epoch": 2.864117311015081,
      "grad_norm": 2.6875,
      "learning_rate": 2.5164139349050213e-06,
      "loss": 0.8217,
      "step": 817210
    },
    {
      "epoch": 2.8641523585219764,
      "grad_norm": 2.421875,
      "learning_rate": 2.5157649062413193e-06,
      "loss": 0.7773,
      "step": 817220
    },
    {
      "epoch": 2.864187406028872,
      "grad_norm": 2.921875,
      "learning_rate": 2.5151158775776173e-06,
      "loss": 0.8839,
      "step": 817230
    },
    {
      "epoch": 2.864222453535768,
      "grad_norm": 4.125,
      "learning_rate": 2.5144668489139158e-06,
      "loss": 0.7422,
      "step": 817240
    },
    {
      "epoch": 2.864257501042663,
      "grad_norm": 2.9375,
      "learning_rate": 2.5138178202502133e-06,
      "loss": 0.7916,
      "step": 817250
    },
    {
      "epoch": 2.864292548549559,
      "grad_norm": 2.84375,
      "learning_rate": 2.5131687915865118e-06,
      "loss": 0.813,
      "step": 817260
    },
    {
      "epoch": 2.8643275960564543,
      "grad_norm": 2.828125,
      "learning_rate": 2.5125197629228098e-06,
      "loss": 0.8042,
      "step": 817270
    },
    {
      "epoch": 2.86436264356335,
      "grad_norm": 2.6875,
      "learning_rate": 2.5118707342591078e-06,
      "loss": 0.7945,
      "step": 817280
    },
    {
      "epoch": 2.8643976910702458,
      "grad_norm": 2.921875,
      "learning_rate": 2.511221705595406e-06,
      "loss": 0.8038,
      "step": 817290
    },
    {
      "epoch": 2.8644327385771415,
      "grad_norm": 2.859375,
      "learning_rate": 2.510572676931704e-06,
      "loss": 0.7504,
      "step": 817300
    },
    {
      "epoch": 2.864467786084037,
      "grad_norm": 2.90625,
      "learning_rate": 2.509923648268002e-06,
      "loss": 0.7843,
      "step": 817310
    },
    {
      "epoch": 2.8645028335909326,
      "grad_norm": 2.84375,
      "learning_rate": 2.5092746196043e-06,
      "loss": 0.8623,
      "step": 817320
    },
    {
      "epoch": 2.864537881097828,
      "grad_norm": 3.125,
      "learning_rate": 2.508625590940598e-06,
      "loss": 0.7624,
      "step": 817330
    },
    {
      "epoch": 2.8645729286047237,
      "grad_norm": 2.828125,
      "learning_rate": 2.5079765622768966e-06,
      "loss": 0.8661,
      "step": 817340
    },
    {
      "epoch": 2.8646079761116194,
      "grad_norm": 2.578125,
      "learning_rate": 2.5073275336131946e-06,
      "loss": 0.8228,
      "step": 817350
    },
    {
      "epoch": 2.8646430236185147,
      "grad_norm": 3.453125,
      "learning_rate": 2.5066785049494926e-06,
      "loss": 0.8485,
      "step": 817360
    },
    {
      "epoch": 2.8646780711254105,
      "grad_norm": 2.6875,
      "learning_rate": 2.5060294762857906e-06,
      "loss": 0.7512,
      "step": 817370
    },
    {
      "epoch": 2.864713118632306,
      "grad_norm": 3.03125,
      "learning_rate": 2.505380447622089e-06,
      "loss": 0.8784,
      "step": 817380
    },
    {
      "epoch": 2.8647481661392016,
      "grad_norm": 2.859375,
      "learning_rate": 2.504731418958387e-06,
      "loss": 0.7705,
      "step": 817390
    },
    {
      "epoch": 2.8647832136460973,
      "grad_norm": 3.15625,
      "learning_rate": 2.504082390294685e-06,
      "loss": 0.882,
      "step": 817400
    },
    {
      "epoch": 2.864818261152993,
      "grad_norm": 2.828125,
      "learning_rate": 2.5034333616309835e-06,
      "loss": 0.7447,
      "step": 817410
    },
    {
      "epoch": 2.8648533086598884,
      "grad_norm": 2.5625,
      "learning_rate": 2.502784332967281e-06,
      "loss": 0.8997,
      "step": 817420
    },
    {
      "epoch": 2.864888356166784,
      "grad_norm": 2.859375,
      "learning_rate": 2.5021353043035795e-06,
      "loss": 0.8224,
      "step": 817430
    },
    {
      "epoch": 2.8649234036736795,
      "grad_norm": 3.0,
      "learning_rate": 2.5014862756398775e-06,
      "loss": 0.8166,
      "step": 817440
    },
    {
      "epoch": 2.8649584511805752,
      "grad_norm": 3.03125,
      "learning_rate": 2.5008372469761755e-06,
      "loss": 0.8797,
      "step": 817450
    },
    {
      "epoch": 2.864993498687471,
      "grad_norm": 2.703125,
      "learning_rate": 2.500188218312474e-06,
      "loss": 0.7918,
      "step": 817460
    },
    {
      "epoch": 2.8650285461943663,
      "grad_norm": 3.09375,
      "learning_rate": 2.499539189648772e-06,
      "loss": 0.8986,
      "step": 817470
    },
    {
      "epoch": 2.865063593701262,
      "grad_norm": 2.875,
      "learning_rate": 2.49889016098507e-06,
      "loss": 0.8296,
      "step": 817480
    },
    {
      "epoch": 2.8650986412081574,
      "grad_norm": 2.390625,
      "learning_rate": 2.498241132321368e-06,
      "loss": 0.8587,
      "step": 817490
    },
    {
      "epoch": 2.865133688715053,
      "grad_norm": 2.984375,
      "learning_rate": 2.4975921036576663e-06,
      "loss": 0.7774,
      "step": 817500
    },
    {
      "epoch": 2.865168736221949,
      "grad_norm": 2.828125,
      "learning_rate": 2.496943074993964e-06,
      "loss": 0.8599,
      "step": 817510
    },
    {
      "epoch": 2.8652037837288447,
      "grad_norm": 3.125,
      "learning_rate": 2.4962940463302623e-06,
      "loss": 0.8153,
      "step": 817520
    },
    {
      "epoch": 2.86523883123574,
      "grad_norm": 3.0,
      "learning_rate": 2.4956450176665603e-06,
      "loss": 0.8213,
      "step": 817530
    },
    {
      "epoch": 2.8652738787426357,
      "grad_norm": 2.65625,
      "learning_rate": 2.4949959890028583e-06,
      "loss": 0.8456,
      "step": 817540
    },
    {
      "epoch": 2.865308926249531,
      "grad_norm": 3.34375,
      "learning_rate": 2.4943469603391567e-06,
      "loss": 0.8424,
      "step": 817550
    },
    {
      "epoch": 2.865343973756427,
      "grad_norm": 2.953125,
      "learning_rate": 2.4936979316754547e-06,
      "loss": 0.8189,
      "step": 817560
    },
    {
      "epoch": 2.8653790212633226,
      "grad_norm": 2.90625,
      "learning_rate": 2.4930489030117527e-06,
      "loss": 0.7526,
      "step": 817570
    },
    {
      "epoch": 2.865414068770218,
      "grad_norm": 2.8125,
      "learning_rate": 2.4923998743480507e-06,
      "loss": 0.7707,
      "step": 817580
    },
    {
      "epoch": 2.8654491162771136,
      "grad_norm": 2.796875,
      "learning_rate": 2.4917508456843487e-06,
      "loss": 0.7751,
      "step": 817590
    },
    {
      "epoch": 2.8654841637840094,
      "grad_norm": 3.328125,
      "learning_rate": 2.491101817020647e-06,
      "loss": 0.8647,
      "step": 817600
    },
    {
      "epoch": 2.8655192112909047,
      "grad_norm": 2.609375,
      "learning_rate": 2.490452788356945e-06,
      "loss": 0.7332,
      "step": 817610
    },
    {
      "epoch": 2.8655542587978005,
      "grad_norm": 3.109375,
      "learning_rate": 2.489803759693243e-06,
      "loss": 0.8753,
      "step": 817620
    },
    {
      "epoch": 2.865589306304696,
      "grad_norm": 2.75,
      "learning_rate": 2.489154731029541e-06,
      "loss": 0.7544,
      "step": 817630
    },
    {
      "epoch": 2.8656243538115915,
      "grad_norm": 2.984375,
      "learning_rate": 2.4885057023658396e-06,
      "loss": 0.7965,
      "step": 817640
    },
    {
      "epoch": 2.8656594013184873,
      "grad_norm": 2.78125,
      "learning_rate": 2.487856673702137e-06,
      "loss": 0.7116,
      "step": 817650
    },
    {
      "epoch": 2.8656944488253826,
      "grad_norm": 2.5,
      "learning_rate": 2.4872076450384356e-06,
      "loss": 0.8345,
      "step": 817660
    },
    {
      "epoch": 2.8657294963322784,
      "grad_norm": 3.0,
      "learning_rate": 2.486558616374734e-06,
      "loss": 0.8187,
      "step": 817670
    },
    {
      "epoch": 2.865764543839174,
      "grad_norm": 2.9375,
      "learning_rate": 2.4859095877110316e-06,
      "loss": 0.8865,
      "step": 817680
    },
    {
      "epoch": 2.86579959134607,
      "grad_norm": 2.6875,
      "learning_rate": 2.48526055904733e-06,
      "loss": 0.7674,
      "step": 817690
    },
    {
      "epoch": 2.865834638852965,
      "grad_norm": 3.34375,
      "learning_rate": 2.484611530383628e-06,
      "loss": 0.8488,
      "step": 817700
    },
    {
      "epoch": 2.865869686359861,
      "grad_norm": 3.125,
      "learning_rate": 2.483962501719926e-06,
      "loss": 0.8329,
      "step": 817710
    },
    {
      "epoch": 2.8659047338667563,
      "grad_norm": 3.09375,
      "learning_rate": 2.483313473056224e-06,
      "loss": 0.7329,
      "step": 817720
    },
    {
      "epoch": 2.865939781373652,
      "grad_norm": 2.421875,
      "learning_rate": 2.4826644443925224e-06,
      "loss": 0.9188,
      "step": 817730
    },
    {
      "epoch": 2.8659748288805478,
      "grad_norm": 3.34375,
      "learning_rate": 2.4820154157288204e-06,
      "loss": 0.7801,
      "step": 817740
    },
    {
      "epoch": 2.866009876387443,
      "grad_norm": 3.125,
      "learning_rate": 2.4813663870651184e-06,
      "loss": 0.8265,
      "step": 817750
    },
    {
      "epoch": 2.866044923894339,
      "grad_norm": 2.84375,
      "learning_rate": 2.480717358401417e-06,
      "loss": 0.8187,
      "step": 817760
    },
    {
      "epoch": 2.866079971401234,
      "grad_norm": 2.90625,
      "learning_rate": 2.4800683297377144e-06,
      "loss": 0.8094,
      "step": 817770
    },
    {
      "epoch": 2.86611501890813,
      "grad_norm": 2.625,
      "learning_rate": 2.479419301074013e-06,
      "loss": 0.735,
      "step": 817780
    },
    {
      "epoch": 2.8661500664150257,
      "grad_norm": 3.015625,
      "learning_rate": 2.478770272410311e-06,
      "loss": 0.9124,
      "step": 817790
    },
    {
      "epoch": 2.8661851139219214,
      "grad_norm": 3.125,
      "learning_rate": 2.478121243746609e-06,
      "loss": 0.7444,
      "step": 817800
    },
    {
      "epoch": 2.8662201614288167,
      "grad_norm": 2.328125,
      "learning_rate": 2.4774722150829073e-06,
      "loss": 0.7749,
      "step": 817810
    },
    {
      "epoch": 2.8662552089357125,
      "grad_norm": 3.125,
      "learning_rate": 2.4768231864192053e-06,
      "loss": 0.7858,
      "step": 817820
    },
    {
      "epoch": 2.866290256442608,
      "grad_norm": 2.875,
      "learning_rate": 2.4761741577555033e-06,
      "loss": 0.7567,
      "step": 817830
    },
    {
      "epoch": 2.8663253039495036,
      "grad_norm": 2.5,
      "learning_rate": 2.4755251290918013e-06,
      "loss": 0.7332,
      "step": 817840
    },
    {
      "epoch": 2.8663603514563993,
      "grad_norm": 2.828125,
      "learning_rate": 2.4748761004280993e-06,
      "loss": 0.7926,
      "step": 817850
    },
    {
      "epoch": 2.8663953989632946,
      "grad_norm": 2.875,
      "learning_rate": 2.4742270717643977e-06,
      "loss": 0.7674,
      "step": 817860
    },
    {
      "epoch": 2.8664304464701904,
      "grad_norm": 2.859375,
      "learning_rate": 2.4735780431006957e-06,
      "loss": 0.7547,
      "step": 817870
    },
    {
      "epoch": 2.8664654939770857,
      "grad_norm": 2.6875,
      "learning_rate": 2.4729290144369937e-06,
      "loss": 0.8119,
      "step": 817880
    },
    {
      "epoch": 2.8665005414839815,
      "grad_norm": 2.921875,
      "learning_rate": 2.4722799857732917e-06,
      "loss": 0.7756,
      "step": 817890
    },
    {
      "epoch": 2.8665355889908772,
      "grad_norm": 2.9375,
      "learning_rate": 2.47163095710959e-06,
      "loss": 0.8386,
      "step": 817900
    },
    {
      "epoch": 2.866570636497773,
      "grad_norm": 3.078125,
      "learning_rate": 2.4709819284458877e-06,
      "loss": 0.7778,
      "step": 817910
    },
    {
      "epoch": 2.8666056840046683,
      "grad_norm": 2.640625,
      "learning_rate": 2.470332899782186e-06,
      "loss": 0.6999,
      "step": 817920
    },
    {
      "epoch": 2.866640731511564,
      "grad_norm": 2.71875,
      "learning_rate": 2.4696838711184845e-06,
      "loss": 0.7576,
      "step": 817930
    },
    {
      "epoch": 2.8666757790184594,
      "grad_norm": 2.9375,
      "learning_rate": 2.469034842454782e-06,
      "loss": 0.8197,
      "step": 817940
    },
    {
      "epoch": 2.866710826525355,
      "grad_norm": 3.25,
      "learning_rate": 2.4683858137910805e-06,
      "loss": 0.816,
      "step": 817950
    },
    {
      "epoch": 2.866745874032251,
      "grad_norm": 3.375,
      "learning_rate": 2.4677367851273785e-06,
      "loss": 0.8066,
      "step": 817960
    },
    {
      "epoch": 2.866780921539146,
      "grad_norm": 2.953125,
      "learning_rate": 2.4670877564636765e-06,
      "loss": 0.8293,
      "step": 817970
    },
    {
      "epoch": 2.866815969046042,
      "grad_norm": 3.15625,
      "learning_rate": 2.4664387277999745e-06,
      "loss": 0.7397,
      "step": 817980
    },
    {
      "epoch": 2.8668510165529373,
      "grad_norm": 2.9375,
      "learning_rate": 2.465789699136273e-06,
      "loss": 0.7484,
      "step": 817990
    },
    {
      "epoch": 2.866886064059833,
      "grad_norm": 2.8125,
      "learning_rate": 2.465140670472571e-06,
      "loss": 0.8906,
      "step": 818000
    },
    {
      "epoch": 2.866921111566729,
      "grad_norm": 2.984375,
      "learning_rate": 2.464491641808869e-06,
      "loss": 0.8119,
      "step": 818010
    },
    {
      "epoch": 2.8669561590736246,
      "grad_norm": 2.53125,
      "learning_rate": 2.463842613145167e-06,
      "loss": 0.8685,
      "step": 818020
    },
    {
      "epoch": 2.86699120658052,
      "grad_norm": 3.390625,
      "learning_rate": 2.463193584481465e-06,
      "loss": 0.8294,
      "step": 818030
    },
    {
      "epoch": 2.8670262540874156,
      "grad_norm": 2.953125,
      "learning_rate": 2.4625445558177634e-06,
      "loss": 0.8156,
      "step": 818040
    },
    {
      "epoch": 2.867061301594311,
      "grad_norm": 2.671875,
      "learning_rate": 2.4618955271540614e-06,
      "loss": 0.7875,
      "step": 818050
    },
    {
      "epoch": 2.8670963491012067,
      "grad_norm": 3.171875,
      "learning_rate": 2.4612464984903594e-06,
      "loss": 0.877,
      "step": 818060
    },
    {
      "epoch": 2.8671313966081025,
      "grad_norm": 3.203125,
      "learning_rate": 2.460597469826658e-06,
      "loss": 0.8992,
      "step": 818070
    },
    {
      "epoch": 2.8671664441149978,
      "grad_norm": 3.3125,
      "learning_rate": 2.4599484411629554e-06,
      "loss": 0.7928,
      "step": 818080
    },
    {
      "epoch": 2.8672014916218935,
      "grad_norm": 2.78125,
      "learning_rate": 2.459299412499254e-06,
      "loss": 0.8171,
      "step": 818090
    },
    {
      "epoch": 2.867236539128789,
      "grad_norm": 2.515625,
      "learning_rate": 2.458650383835552e-06,
      "loss": 0.7494,
      "step": 818100
    },
    {
      "epoch": 2.8672715866356846,
      "grad_norm": 3.390625,
      "learning_rate": 2.45800135517185e-06,
      "loss": 0.8089,
      "step": 818110
    },
    {
      "epoch": 2.8673066341425804,
      "grad_norm": 2.953125,
      "learning_rate": 2.457352326508148e-06,
      "loss": 0.8075,
      "step": 818120
    },
    {
      "epoch": 2.867341681649476,
      "grad_norm": 2.6875,
      "learning_rate": 2.4567032978444462e-06,
      "loss": 0.7946,
      "step": 818130
    },
    {
      "epoch": 2.8673767291563714,
      "grad_norm": 2.546875,
      "learning_rate": 2.4560542691807442e-06,
      "loss": 0.8246,
      "step": 818140
    },
    {
      "epoch": 2.867411776663267,
      "grad_norm": 2.84375,
      "learning_rate": 2.4554052405170422e-06,
      "loss": 0.7987,
      "step": 818150
    },
    {
      "epoch": 2.8674468241701625,
      "grad_norm": 2.828125,
      "learning_rate": 2.4547562118533407e-06,
      "loss": 0.9319,
      "step": 818160
    },
    {
      "epoch": 2.8674818716770583,
      "grad_norm": 3.078125,
      "learning_rate": 2.4541071831896382e-06,
      "loss": 0.8993,
      "step": 818170
    },
    {
      "epoch": 2.867516919183954,
      "grad_norm": 2.453125,
      "learning_rate": 2.4534581545259367e-06,
      "loss": 0.8327,
      "step": 818180
    },
    {
      "epoch": 2.8675519666908493,
      "grad_norm": 3.171875,
      "learning_rate": 2.4528091258622347e-06,
      "loss": 0.7753,
      "step": 818190
    },
    {
      "epoch": 2.867587014197745,
      "grad_norm": 2.859375,
      "learning_rate": 2.4521600971985327e-06,
      "loss": 0.775,
      "step": 818200
    },
    {
      "epoch": 2.8676220617046404,
      "grad_norm": 2.890625,
      "learning_rate": 2.451511068534831e-06,
      "loss": 0.7951,
      "step": 818210
    },
    {
      "epoch": 2.867657109211536,
      "grad_norm": 2.875,
      "learning_rate": 2.450862039871129e-06,
      "loss": 0.7315,
      "step": 818220
    },
    {
      "epoch": 2.867692156718432,
      "grad_norm": 3.046875,
      "learning_rate": 2.450213011207427e-06,
      "loss": 0.7862,
      "step": 818230
    },
    {
      "epoch": 2.8677272042253277,
      "grad_norm": 3.421875,
      "learning_rate": 2.449563982543725e-06,
      "loss": 0.8466,
      "step": 818240
    },
    {
      "epoch": 2.867762251732223,
      "grad_norm": 3.234375,
      "learning_rate": 2.4489149538800235e-06,
      "loss": 0.783,
      "step": 818250
    },
    {
      "epoch": 2.8677972992391187,
      "grad_norm": 2.84375,
      "learning_rate": 2.448265925216321e-06,
      "loss": 0.8318,
      "step": 818260
    },
    {
      "epoch": 2.867832346746014,
      "grad_norm": 2.859375,
      "learning_rate": 2.4476168965526195e-06,
      "loss": 0.7423,
      "step": 818270
    },
    {
      "epoch": 2.86786739425291,
      "grad_norm": 3.34375,
      "learning_rate": 2.4469678678889175e-06,
      "loss": 0.7128,
      "step": 818280
    },
    {
      "epoch": 2.8679024417598056,
      "grad_norm": 2.4375,
      "learning_rate": 2.4463188392252155e-06,
      "loss": 0.7696,
      "step": 818290
    },
    {
      "epoch": 2.867937489266701,
      "grad_norm": 3.3125,
      "learning_rate": 2.445669810561514e-06,
      "loss": 0.7706,
      "step": 818300
    },
    {
      "epoch": 2.8679725367735966,
      "grad_norm": 3.21875,
      "learning_rate": 2.445020781897812e-06,
      "loss": 0.8946,
      "step": 818310
    },
    {
      "epoch": 2.868007584280492,
      "grad_norm": 3.453125,
      "learning_rate": 2.44437175323411e-06,
      "loss": 0.8636,
      "step": 818320
    },
    {
      "epoch": 2.8680426317873877,
      "grad_norm": 2.859375,
      "learning_rate": 2.4437227245704083e-06,
      "loss": 0.8833,
      "step": 818330
    },
    {
      "epoch": 2.8680776792942835,
      "grad_norm": 2.640625,
      "learning_rate": 2.443073695906706e-06,
      "loss": 0.8355,
      "step": 818340
    },
    {
      "epoch": 2.8681127268011792,
      "grad_norm": 2.671875,
      "learning_rate": 2.4424246672430043e-06,
      "loss": 0.8691,
      "step": 818350
    },
    {
      "epoch": 2.8681477743080745,
      "grad_norm": 2.640625,
      "learning_rate": 2.4417756385793023e-06,
      "loss": 0.8284,
      "step": 818360
    },
    {
      "epoch": 2.8681828218149703,
      "grad_norm": 2.9375,
      "learning_rate": 2.4411266099156003e-06,
      "loss": 0.7814,
      "step": 818370
    },
    {
      "epoch": 2.8682178693218656,
      "grad_norm": 3.328125,
      "learning_rate": 2.4404775812518983e-06,
      "loss": 0.8213,
      "step": 818380
    },
    {
      "epoch": 2.8682529168287614,
      "grad_norm": 2.65625,
      "learning_rate": 2.4398285525881968e-06,
      "loss": 0.8452,
      "step": 818390
    },
    {
      "epoch": 2.868287964335657,
      "grad_norm": 2.625,
      "learning_rate": 2.4391795239244948e-06,
      "loss": 0.8108,
      "step": 818400
    },
    {
      "epoch": 2.8683230118425525,
      "grad_norm": 2.75,
      "learning_rate": 2.4385304952607928e-06,
      "loss": 0.7879,
      "step": 818410
    },
    {
      "epoch": 2.868358059349448,
      "grad_norm": 2.4375,
      "learning_rate": 2.437881466597091e-06,
      "loss": 0.8438,
      "step": 818420
    },
    {
      "epoch": 2.8683931068563435,
      "grad_norm": 3.171875,
      "learning_rate": 2.4372324379333888e-06,
      "loss": 0.7656,
      "step": 818430
    },
    {
      "epoch": 2.8684281543632393,
      "grad_norm": 3.078125,
      "learning_rate": 2.436583409269687e-06,
      "loss": 0.7917,
      "step": 818440
    },
    {
      "epoch": 2.868463201870135,
      "grad_norm": 2.921875,
      "learning_rate": 2.435934380605985e-06,
      "loss": 0.8918,
      "step": 818450
    },
    {
      "epoch": 2.868498249377031,
      "grad_norm": 2.453125,
      "learning_rate": 2.435285351942283e-06,
      "loss": 0.7572,
      "step": 818460
    },
    {
      "epoch": 2.868533296883926,
      "grad_norm": 2.9375,
      "learning_rate": 2.4346363232785816e-06,
      "loss": 0.8063,
      "step": 818470
    },
    {
      "epoch": 2.868568344390822,
      "grad_norm": 3.09375,
      "learning_rate": 2.4339872946148796e-06,
      "loss": 0.8769,
      "step": 818480
    },
    {
      "epoch": 2.868603391897717,
      "grad_norm": 2.875,
      "learning_rate": 2.4333382659511776e-06,
      "loss": 0.8545,
      "step": 818490
    },
    {
      "epoch": 2.868638439404613,
      "grad_norm": 3.140625,
      "learning_rate": 2.4326892372874756e-06,
      "loss": 0.7775,
      "step": 818500
    },
    {
      "epoch": 2.8686734869115087,
      "grad_norm": 3.0625,
      "learning_rate": 2.4320402086237736e-06,
      "loss": 0.7302,
      "step": 818510
    },
    {
      "epoch": 2.868708534418404,
      "grad_norm": 2.546875,
      "learning_rate": 2.4313911799600716e-06,
      "loss": 0.7841,
      "step": 818520
    },
    {
      "epoch": 2.8687435819252998,
      "grad_norm": 2.796875,
      "learning_rate": 2.43074215129637e-06,
      "loss": 0.7896,
      "step": 818530
    },
    {
      "epoch": 2.868778629432195,
      "grad_norm": 3.125,
      "learning_rate": 2.430093122632668e-06,
      "loss": 0.8277,
      "step": 818540
    },
    {
      "epoch": 2.868813676939091,
      "grad_norm": 3.0625,
      "learning_rate": 2.429444093968966e-06,
      "loss": 0.7774,
      "step": 818550
    },
    {
      "epoch": 2.8688487244459866,
      "grad_norm": 2.84375,
      "learning_rate": 2.4287950653052645e-06,
      "loss": 0.7941,
      "step": 818560
    },
    {
      "epoch": 2.8688837719528824,
      "grad_norm": 2.921875,
      "learning_rate": 2.428146036641562e-06,
      "loss": 0.7733,
      "step": 818570
    },
    {
      "epoch": 2.8689188194597777,
      "grad_norm": 3.0,
      "learning_rate": 2.4274970079778605e-06,
      "loss": 0.7993,
      "step": 818580
    },
    {
      "epoch": 2.8689538669666734,
      "grad_norm": 2.734375,
      "learning_rate": 2.4268479793141585e-06,
      "loss": 0.7261,
      "step": 818590
    },
    {
      "epoch": 2.8689889144735687,
      "grad_norm": 3.25,
      "learning_rate": 2.4261989506504565e-06,
      "loss": 0.8525,
      "step": 818600
    },
    {
      "epoch": 2.8690239619804645,
      "grad_norm": 2.734375,
      "learning_rate": 2.425549921986755e-06,
      "loss": 0.6952,
      "step": 818610
    },
    {
      "epoch": 2.8690590094873603,
      "grad_norm": 2.5625,
      "learning_rate": 2.424900893323053e-06,
      "loss": 0.81,
      "step": 818620
    },
    {
      "epoch": 2.8690940569942556,
      "grad_norm": 2.671875,
      "learning_rate": 2.424251864659351e-06,
      "loss": 0.776,
      "step": 818630
    },
    {
      "epoch": 2.8691291045011513,
      "grad_norm": 3.3125,
      "learning_rate": 2.423602835995649e-06,
      "loss": 0.8758,
      "step": 818640
    },
    {
      "epoch": 2.8691641520080466,
      "grad_norm": 3.171875,
      "learning_rate": 2.4229538073319473e-06,
      "loss": 0.8286,
      "step": 818650
    },
    {
      "epoch": 2.8691991995149424,
      "grad_norm": 2.796875,
      "learning_rate": 2.422304778668245e-06,
      "loss": 0.7292,
      "step": 818660
    },
    {
      "epoch": 2.869234247021838,
      "grad_norm": 3.09375,
      "learning_rate": 2.4216557500045433e-06,
      "loss": 0.8006,
      "step": 818670
    },
    {
      "epoch": 2.869269294528734,
      "grad_norm": 2.65625,
      "learning_rate": 2.4210067213408417e-06,
      "loss": 0.7563,
      "step": 818680
    },
    {
      "epoch": 2.8693043420356292,
      "grad_norm": 3.015625,
      "learning_rate": 2.4203576926771393e-06,
      "loss": 0.8065,
      "step": 818690
    },
    {
      "epoch": 2.869339389542525,
      "grad_norm": 2.671875,
      "learning_rate": 2.4197086640134377e-06,
      "loss": 0.8664,
      "step": 818700
    },
    {
      "epoch": 2.8693744370494203,
      "grad_norm": 2.3125,
      "learning_rate": 2.4190596353497357e-06,
      "loss": 0.7895,
      "step": 818710
    },
    {
      "epoch": 2.869409484556316,
      "grad_norm": 2.6875,
      "learning_rate": 2.4184106066860337e-06,
      "loss": 0.6992,
      "step": 818720
    },
    {
      "epoch": 2.869444532063212,
      "grad_norm": 2.8125,
      "learning_rate": 2.417761578022332e-06,
      "loss": 0.7675,
      "step": 818730
    },
    {
      "epoch": 2.869479579570107,
      "grad_norm": 3.5,
      "learning_rate": 2.41711254935863e-06,
      "loss": 0.8969,
      "step": 818740
    },
    {
      "epoch": 2.869514627077003,
      "grad_norm": 3.171875,
      "learning_rate": 2.416463520694928e-06,
      "loss": 0.8868,
      "step": 818750
    },
    {
      "epoch": 2.869549674583898,
      "grad_norm": 3.078125,
      "learning_rate": 2.415814492031226e-06,
      "loss": 0.8289,
      "step": 818760
    },
    {
      "epoch": 2.869584722090794,
      "grad_norm": 2.8125,
      "learning_rate": 2.415165463367524e-06,
      "loss": 0.7695,
      "step": 818770
    },
    {
      "epoch": 2.8696197695976897,
      "grad_norm": 3.0,
      "learning_rate": 2.414516434703822e-06,
      "loss": 0.8068,
      "step": 818780
    },
    {
      "epoch": 2.8696548171045855,
      "grad_norm": 3.03125,
      "learning_rate": 2.4138674060401206e-06,
      "loss": 0.7888,
      "step": 818790
    },
    {
      "epoch": 2.869689864611481,
      "grad_norm": 3.09375,
      "learning_rate": 2.4132183773764186e-06,
      "loss": 0.7793,
      "step": 818800
    },
    {
      "epoch": 2.8697249121183765,
      "grad_norm": 2.671875,
      "learning_rate": 2.4125693487127166e-06,
      "loss": 0.7432,
      "step": 818810
    },
    {
      "epoch": 2.869759959625272,
      "grad_norm": 3.109375,
      "learning_rate": 2.411920320049015e-06,
      "loss": 0.8196,
      "step": 818820
    },
    {
      "epoch": 2.8697950071321676,
      "grad_norm": 3.578125,
      "learning_rate": 2.4112712913853126e-06,
      "loss": 0.8661,
      "step": 818830
    },
    {
      "epoch": 2.8698300546390634,
      "grad_norm": 2.71875,
      "learning_rate": 2.410622262721611e-06,
      "loss": 0.7831,
      "step": 818840
    },
    {
      "epoch": 2.8698651021459587,
      "grad_norm": 3.046875,
      "learning_rate": 2.409973234057909e-06,
      "loss": 0.8015,
      "step": 818850
    },
    {
      "epoch": 2.8699001496528544,
      "grad_norm": 2.6875,
      "learning_rate": 2.409324205394207e-06,
      "loss": 0.8047,
      "step": 818860
    },
    {
      "epoch": 2.86993519715975,
      "grad_norm": 2.875,
      "learning_rate": 2.4086751767305054e-06,
      "loss": 0.805,
      "step": 818870
    },
    {
      "epoch": 2.8699702446666455,
      "grad_norm": 3.140625,
      "learning_rate": 2.4080261480668034e-06,
      "loss": 0.8858,
      "step": 818880
    },
    {
      "epoch": 2.8700052921735413,
      "grad_norm": 2.953125,
      "learning_rate": 2.4073771194031014e-06,
      "loss": 0.7182,
      "step": 818890
    },
    {
      "epoch": 2.870040339680437,
      "grad_norm": 2.625,
      "learning_rate": 2.4067280907393994e-06,
      "loss": 0.8228,
      "step": 818900
    },
    {
      "epoch": 2.8700753871873323,
      "grad_norm": 3.109375,
      "learning_rate": 2.406079062075698e-06,
      "loss": 0.7966,
      "step": 818910
    },
    {
      "epoch": 2.870110434694228,
      "grad_norm": 2.84375,
      "learning_rate": 2.4054300334119954e-06,
      "loss": 0.8085,
      "step": 818920
    },
    {
      "epoch": 2.8701454822011234,
      "grad_norm": 2.734375,
      "learning_rate": 2.404781004748294e-06,
      "loss": 0.7836,
      "step": 818930
    },
    {
      "epoch": 2.870180529708019,
      "grad_norm": 3.375,
      "learning_rate": 2.4041319760845923e-06,
      "loss": 0.7877,
      "step": 818940
    },
    {
      "epoch": 2.870215577214915,
      "grad_norm": 2.84375,
      "learning_rate": 2.40348294742089e-06,
      "loss": 0.8411,
      "step": 818950
    },
    {
      "epoch": 2.8702506247218103,
      "grad_norm": 2.9375,
      "learning_rate": 2.4028339187571883e-06,
      "loss": 0.897,
      "step": 818960
    },
    {
      "epoch": 2.870285672228706,
      "grad_norm": 2.921875,
      "learning_rate": 2.4021848900934863e-06,
      "loss": 0.8371,
      "step": 818970
    },
    {
      "epoch": 2.8703207197356018,
      "grad_norm": 2.625,
      "learning_rate": 2.4015358614297843e-06,
      "loss": 0.8229,
      "step": 818980
    },
    {
      "epoch": 2.870355767242497,
      "grad_norm": 3.140625,
      "learning_rate": 2.4008868327660823e-06,
      "loss": 0.7965,
      "step": 818990
    },
    {
      "epoch": 2.870390814749393,
      "grad_norm": 3.78125,
      "learning_rate": 2.4002378041023807e-06,
      "loss": 0.7969,
      "step": 819000
    },
    {
      "epoch": 2.8704258622562886,
      "grad_norm": 2.921875,
      "learning_rate": 2.3995887754386787e-06,
      "loss": 0.8296,
      "step": 819010
    },
    {
      "epoch": 2.870460909763184,
      "grad_norm": 2.84375,
      "learning_rate": 2.3989397467749767e-06,
      "loss": 0.8445,
      "step": 819020
    },
    {
      "epoch": 2.8704959572700797,
      "grad_norm": 3.203125,
      "learning_rate": 2.3982907181112747e-06,
      "loss": 0.7954,
      "step": 819030
    },
    {
      "epoch": 2.870531004776975,
      "grad_norm": 3.40625,
      "learning_rate": 2.3976416894475727e-06,
      "loss": 0.8231,
      "step": 819040
    },
    {
      "epoch": 2.8705660522838707,
      "grad_norm": 3.109375,
      "learning_rate": 2.396992660783871e-06,
      "loss": 0.7994,
      "step": 819050
    },
    {
      "epoch": 2.8706010997907665,
      "grad_norm": 2.5,
      "learning_rate": 2.396343632120169e-06,
      "loss": 0.6852,
      "step": 819060
    },
    {
      "epoch": 2.8706361472976623,
      "grad_norm": 3.015625,
      "learning_rate": 2.395694603456467e-06,
      "loss": 0.7768,
      "step": 819070
    },
    {
      "epoch": 2.8706711948045576,
      "grad_norm": 3.40625,
      "learning_rate": 2.3950455747927655e-06,
      "loss": 0.8417,
      "step": 819080
    },
    {
      "epoch": 2.8707062423114533,
      "grad_norm": 2.875,
      "learning_rate": 2.394396546129063e-06,
      "loss": 0.8297,
      "step": 819090
    },
    {
      "epoch": 2.8707412898183486,
      "grad_norm": 3.203125,
      "learning_rate": 2.3937475174653615e-06,
      "loss": 0.8394,
      "step": 819100
    },
    {
      "epoch": 2.8707763373252444,
      "grad_norm": 2.828125,
      "learning_rate": 2.3930984888016595e-06,
      "loss": 0.7154,
      "step": 819110
    },
    {
      "epoch": 2.87081138483214,
      "grad_norm": 2.875,
      "learning_rate": 2.3924494601379575e-06,
      "loss": 0.8257,
      "step": 819120
    },
    {
      "epoch": 2.8708464323390355,
      "grad_norm": 3.0625,
      "learning_rate": 2.3918004314742555e-06,
      "loss": 0.8388,
      "step": 819130
    },
    {
      "epoch": 2.8708814798459312,
      "grad_norm": 3.265625,
      "learning_rate": 2.391151402810554e-06,
      "loss": 0.8514,
      "step": 819140
    },
    {
      "epoch": 2.8709165273528265,
      "grad_norm": 2.296875,
      "learning_rate": 2.390502374146852e-06,
      "loss": 0.7552,
      "step": 819150
    },
    {
      "epoch": 2.8709515748597223,
      "grad_norm": 2.921875,
      "learning_rate": 2.38985334548315e-06,
      "loss": 0.7946,
      "step": 819160
    },
    {
      "epoch": 2.870986622366618,
      "grad_norm": 3.125,
      "learning_rate": 2.3892043168194484e-06,
      "loss": 0.8074,
      "step": 819170
    },
    {
      "epoch": 2.871021669873514,
      "grad_norm": 2.640625,
      "learning_rate": 2.388555288155746e-06,
      "loss": 0.7487,
      "step": 819180
    },
    {
      "epoch": 2.871056717380409,
      "grad_norm": 2.65625,
      "learning_rate": 2.3879062594920444e-06,
      "loss": 0.7193,
      "step": 819190
    },
    {
      "epoch": 2.871091764887305,
      "grad_norm": 3.046875,
      "learning_rate": 2.3872572308283424e-06,
      "loss": 0.8264,
      "step": 819200
    },
    {
      "epoch": 2.8711268123942,
      "grad_norm": 2.65625,
      "learning_rate": 2.3866082021646404e-06,
      "loss": 0.7698,
      "step": 819210
    },
    {
      "epoch": 2.871161859901096,
      "grad_norm": 3.140625,
      "learning_rate": 2.385959173500939e-06,
      "loss": 0.7694,
      "step": 819220
    },
    {
      "epoch": 2.8711969074079917,
      "grad_norm": 2.65625,
      "learning_rate": 2.385310144837237e-06,
      "loss": 0.827,
      "step": 819230
    },
    {
      "epoch": 2.871231954914887,
      "grad_norm": 3.0625,
      "learning_rate": 2.384661116173535e-06,
      "loss": 0.7053,
      "step": 819240
    },
    {
      "epoch": 2.871267002421783,
      "grad_norm": 2.890625,
      "learning_rate": 2.384012087509833e-06,
      "loss": 0.776,
      "step": 819250
    },
    {
      "epoch": 2.871302049928678,
      "grad_norm": 3.0,
      "learning_rate": 2.383363058846131e-06,
      "loss": 0.797,
      "step": 819260
    },
    {
      "epoch": 2.871337097435574,
      "grad_norm": 3.109375,
      "learning_rate": 2.3827140301824292e-06,
      "loss": 0.7923,
      "step": 819270
    },
    {
      "epoch": 2.8713721449424696,
      "grad_norm": 3.3125,
      "learning_rate": 2.3820650015187272e-06,
      "loss": 0.7487,
      "step": 819280
    },
    {
      "epoch": 2.8714071924493654,
      "grad_norm": 3.515625,
      "learning_rate": 2.3814159728550252e-06,
      "loss": 0.8617,
      "step": 819290
    },
    {
      "epoch": 2.8714422399562607,
      "grad_norm": 2.96875,
      "learning_rate": 2.3807669441913232e-06,
      "loss": 0.8204,
      "step": 819300
    },
    {
      "epoch": 2.8714772874631564,
      "grad_norm": 3.375,
      "learning_rate": 2.3801179155276217e-06,
      "loss": 0.885,
      "step": 819310
    },
    {
      "epoch": 2.8715123349700518,
      "grad_norm": 2.734375,
      "learning_rate": 2.3794688868639192e-06,
      "loss": 0.7742,
      "step": 819320
    },
    {
      "epoch": 2.8715473824769475,
      "grad_norm": 3.453125,
      "learning_rate": 2.3788198582002177e-06,
      "loss": 0.7946,
      "step": 819330
    },
    {
      "epoch": 2.8715824299838433,
      "grad_norm": 2.75,
      "learning_rate": 2.378170829536516e-06,
      "loss": 0.7073,
      "step": 819340
    },
    {
      "epoch": 2.8716174774907386,
      "grad_norm": 2.703125,
      "learning_rate": 2.3775218008728137e-06,
      "loss": 0.87,
      "step": 819350
    },
    {
      "epoch": 2.8716525249976343,
      "grad_norm": 3.25,
      "learning_rate": 2.376872772209112e-06,
      "loss": 0.8245,
      "step": 819360
    },
    {
      "epoch": 2.8716875725045297,
      "grad_norm": 3.0,
      "learning_rate": 2.37622374354541e-06,
      "loss": 0.731,
      "step": 819370
    },
    {
      "epoch": 2.8717226200114254,
      "grad_norm": 3.0625,
      "learning_rate": 2.375574714881708e-06,
      "loss": 0.8027,
      "step": 819380
    },
    {
      "epoch": 2.871757667518321,
      "grad_norm": 2.671875,
      "learning_rate": 2.374925686218006e-06,
      "loss": 0.8433,
      "step": 819390
    },
    {
      "epoch": 2.871792715025217,
      "grad_norm": 2.890625,
      "learning_rate": 2.3742766575543045e-06,
      "loss": 0.7738,
      "step": 819400
    },
    {
      "epoch": 2.8718277625321122,
      "grad_norm": 2.953125,
      "learning_rate": 2.3736276288906025e-06,
      "loss": 0.8275,
      "step": 819410
    },
    {
      "epoch": 2.871862810039008,
      "grad_norm": 2.734375,
      "learning_rate": 2.3729786002269005e-06,
      "loss": 0.7278,
      "step": 819420
    },
    {
      "epoch": 2.8718978575459033,
      "grad_norm": 2.921875,
      "learning_rate": 2.372329571563199e-06,
      "loss": 0.8659,
      "step": 819430
    },
    {
      "epoch": 2.871932905052799,
      "grad_norm": 2.265625,
      "learning_rate": 2.3716805428994965e-06,
      "loss": 0.7532,
      "step": 819440
    },
    {
      "epoch": 2.871967952559695,
      "grad_norm": 2.609375,
      "learning_rate": 2.371031514235795e-06,
      "loss": 0.8127,
      "step": 819450
    },
    {
      "epoch": 2.87200300006659,
      "grad_norm": 2.640625,
      "learning_rate": 2.370382485572093e-06,
      "loss": 0.7703,
      "step": 819460
    },
    {
      "epoch": 2.872038047573486,
      "grad_norm": 2.484375,
      "learning_rate": 2.369733456908391e-06,
      "loss": 0.7807,
      "step": 819470
    },
    {
      "epoch": 2.872073095080381,
      "grad_norm": 3.078125,
      "learning_rate": 2.3690844282446893e-06,
      "loss": 0.7686,
      "step": 819480
    },
    {
      "epoch": 2.872108142587277,
      "grad_norm": 2.859375,
      "learning_rate": 2.3684353995809873e-06,
      "loss": 0.8255,
      "step": 819490
    },
    {
      "epoch": 2.8721431900941727,
      "grad_norm": 3.078125,
      "learning_rate": 2.3677863709172853e-06,
      "loss": 0.8143,
      "step": 819500
    },
    {
      "epoch": 2.8721782376010685,
      "grad_norm": 3.234375,
      "learning_rate": 2.3671373422535833e-06,
      "loss": 0.8053,
      "step": 819510
    },
    {
      "epoch": 2.872213285107964,
      "grad_norm": 2.859375,
      "learning_rate": 2.3664883135898813e-06,
      "loss": 0.8566,
      "step": 819520
    },
    {
      "epoch": 2.8722483326148596,
      "grad_norm": 3.109375,
      "learning_rate": 2.3658392849261793e-06,
      "loss": 0.8593,
      "step": 819530
    },
    {
      "epoch": 2.872283380121755,
      "grad_norm": 2.859375,
      "learning_rate": 2.3651902562624778e-06,
      "loss": 0.7718,
      "step": 819540
    },
    {
      "epoch": 2.8723184276286506,
      "grad_norm": 2.46875,
      "learning_rate": 2.3645412275987758e-06,
      "loss": 0.7255,
      "step": 819550
    },
    {
      "epoch": 2.8723534751355464,
      "grad_norm": 3.203125,
      "learning_rate": 2.3638921989350738e-06,
      "loss": 0.8489,
      "step": 819560
    },
    {
      "epoch": 2.8723885226424417,
      "grad_norm": 2.9375,
      "learning_rate": 2.363243170271372e-06,
      "loss": 0.8207,
      "step": 819570
    },
    {
      "epoch": 2.8724235701493375,
      "grad_norm": 2.984375,
      "learning_rate": 2.3625941416076698e-06,
      "loss": 0.7173,
      "step": 819580
    },
    {
      "epoch": 2.872458617656233,
      "grad_norm": 2.796875,
      "learning_rate": 2.361945112943968e-06,
      "loss": 0.6982,
      "step": 819590
    },
    {
      "epoch": 2.8724936651631285,
      "grad_norm": 3.265625,
      "learning_rate": 2.361296084280266e-06,
      "loss": 0.8454,
      "step": 819600
    },
    {
      "epoch": 2.8725287126700243,
      "grad_norm": 2.65625,
      "learning_rate": 2.360647055616564e-06,
      "loss": 0.7673,
      "step": 819610
    },
    {
      "epoch": 2.87256376017692,
      "grad_norm": 3.25,
      "learning_rate": 2.3599980269528626e-06,
      "loss": 0.7061,
      "step": 819620
    },
    {
      "epoch": 2.8725988076838154,
      "grad_norm": 2.609375,
      "learning_rate": 2.3593489982891606e-06,
      "loss": 0.7965,
      "step": 819630
    },
    {
      "epoch": 2.872633855190711,
      "grad_norm": 2.703125,
      "learning_rate": 2.3586999696254586e-06,
      "loss": 0.8282,
      "step": 819640
    },
    {
      "epoch": 2.8726689026976064,
      "grad_norm": 2.5625,
      "learning_rate": 2.3580509409617566e-06,
      "loss": 0.8135,
      "step": 819650
    },
    {
      "epoch": 2.872703950204502,
      "grad_norm": 2.46875,
      "learning_rate": 2.357401912298055e-06,
      "loss": 0.7608,
      "step": 819660
    },
    {
      "epoch": 2.872738997711398,
      "grad_norm": 3.140625,
      "learning_rate": 2.356752883634353e-06,
      "loss": 0.8513,
      "step": 819670
    },
    {
      "epoch": 2.8727740452182933,
      "grad_norm": 2.859375,
      "learning_rate": 2.356103854970651e-06,
      "loss": 0.7864,
      "step": 819680
    },
    {
      "epoch": 2.872809092725189,
      "grad_norm": 2.8125,
      "learning_rate": 2.355454826306949e-06,
      "loss": 0.7823,
      "step": 819690
    },
    {
      "epoch": 2.8728441402320843,
      "grad_norm": 3.546875,
      "learning_rate": 2.354805797643247e-06,
      "loss": 0.8018,
      "step": 819700
    },
    {
      "epoch": 2.87287918773898,
      "grad_norm": 2.5625,
      "learning_rate": 2.3541567689795455e-06,
      "loss": 0.8051,
      "step": 819710
    },
    {
      "epoch": 2.872914235245876,
      "grad_norm": 3.0,
      "learning_rate": 2.3535077403158435e-06,
      "loss": 0.7135,
      "step": 819720
    },
    {
      "epoch": 2.8729492827527716,
      "grad_norm": 2.671875,
      "learning_rate": 2.3528587116521415e-06,
      "loss": 0.8117,
      "step": 819730
    },
    {
      "epoch": 2.872984330259667,
      "grad_norm": 2.5,
      "learning_rate": 2.35220968298844e-06,
      "loss": 0.7741,
      "step": 819740
    },
    {
      "epoch": 2.8730193777665627,
      "grad_norm": 2.6875,
      "learning_rate": 2.3515606543247375e-06,
      "loss": 0.7728,
      "step": 819750
    },
    {
      "epoch": 2.873054425273458,
      "grad_norm": 2.78125,
      "learning_rate": 2.350911625661036e-06,
      "loss": 0.8156,
      "step": 819760
    },
    {
      "epoch": 2.8730894727803538,
      "grad_norm": 2.71875,
      "learning_rate": 2.350262596997334e-06,
      "loss": 0.8407,
      "step": 819770
    },
    {
      "epoch": 2.8731245202872495,
      "grad_norm": 2.84375,
      "learning_rate": 2.349613568333632e-06,
      "loss": 0.8132,
      "step": 819780
    },
    {
      "epoch": 2.873159567794145,
      "grad_norm": 3.0,
      "learning_rate": 2.34896453966993e-06,
      "loss": 0.8475,
      "step": 819790
    },
    {
      "epoch": 2.8731946153010406,
      "grad_norm": 3.265625,
      "learning_rate": 2.3483155110062283e-06,
      "loss": 0.8434,
      "step": 819800
    },
    {
      "epoch": 2.873229662807936,
      "grad_norm": 3.078125,
      "learning_rate": 2.3476664823425263e-06,
      "loss": 0.8232,
      "step": 819810
    },
    {
      "epoch": 2.8732647103148317,
      "grad_norm": 3.484375,
      "learning_rate": 2.3470174536788243e-06,
      "loss": 0.8731,
      "step": 819820
    },
    {
      "epoch": 2.8732997578217274,
      "grad_norm": 3.171875,
      "learning_rate": 2.3463684250151227e-06,
      "loss": 0.7918,
      "step": 819830
    },
    {
      "epoch": 2.873334805328623,
      "grad_norm": 2.9375,
      "learning_rate": 2.3457193963514203e-06,
      "loss": 0.8126,
      "step": 819840
    },
    {
      "epoch": 2.8733698528355185,
      "grad_norm": 2.3125,
      "learning_rate": 2.3450703676877187e-06,
      "loss": 0.7649,
      "step": 819850
    },
    {
      "epoch": 2.8734049003424142,
      "grad_norm": 3.046875,
      "learning_rate": 2.3444213390240167e-06,
      "loss": 0.8268,
      "step": 819860
    },
    {
      "epoch": 2.8734399478493096,
      "grad_norm": 3.09375,
      "learning_rate": 2.3437723103603147e-06,
      "loss": 0.8172,
      "step": 819870
    },
    {
      "epoch": 2.8734749953562053,
      "grad_norm": 2.5625,
      "learning_rate": 2.343123281696613e-06,
      "loss": 0.7462,
      "step": 819880
    },
    {
      "epoch": 2.873510042863101,
      "grad_norm": 3.234375,
      "learning_rate": 2.342474253032911e-06,
      "loss": 0.7867,
      "step": 819890
    },
    {
      "epoch": 2.8735450903699964,
      "grad_norm": 2.40625,
      "learning_rate": 2.341825224369209e-06,
      "loss": 0.7038,
      "step": 819900
    },
    {
      "epoch": 2.873580137876892,
      "grad_norm": 3.0,
      "learning_rate": 2.341176195705507e-06,
      "loss": 0.8331,
      "step": 819910
    },
    {
      "epoch": 2.8736151853837875,
      "grad_norm": 2.8125,
      "learning_rate": 2.3405271670418056e-06,
      "loss": 0.7959,
      "step": 819920
    },
    {
      "epoch": 2.873650232890683,
      "grad_norm": 3.109375,
      "learning_rate": 2.339878138378103e-06,
      "loss": 0.8382,
      "step": 819930
    },
    {
      "epoch": 2.873685280397579,
      "grad_norm": 3.15625,
      "learning_rate": 2.3392291097144016e-06,
      "loss": 0.7807,
      "step": 819940
    },
    {
      "epoch": 2.8737203279044747,
      "grad_norm": 2.828125,
      "learning_rate": 2.3385800810506996e-06,
      "loss": 0.8129,
      "step": 819950
    },
    {
      "epoch": 2.87375537541137,
      "grad_norm": 2.984375,
      "learning_rate": 2.3379310523869976e-06,
      "loss": 0.7668,
      "step": 819960
    },
    {
      "epoch": 2.873790422918266,
      "grad_norm": 2.203125,
      "learning_rate": 2.337282023723296e-06,
      "loss": 0.7023,
      "step": 819970
    },
    {
      "epoch": 2.873825470425161,
      "grad_norm": 3.078125,
      "learning_rate": 2.336632995059594e-06,
      "loss": 0.727,
      "step": 819980
    },
    {
      "epoch": 2.873860517932057,
      "grad_norm": 3.109375,
      "learning_rate": 2.335983966395892e-06,
      "loss": 0.8709,
      "step": 819990
    },
    {
      "epoch": 2.8738955654389526,
      "grad_norm": 2.734375,
      "learning_rate": 2.33533493773219e-06,
      "loss": 0.7284,
      "step": 820000
    },
    {
      "epoch": 2.8738955654389526,
      "eval_loss": 0.7499901652336121,
      "eval_runtime": 553.4924,
      "eval_samples_per_second": 687.337,
      "eval_steps_per_second": 57.278,
      "step": 820000
    },
    {
      "epoch": 2.873930612945848,
      "grad_norm": 3.0,
      "learning_rate": 2.334685909068488e-06,
      "loss": 0.8059,
      "step": 820010
    },
    {
      "epoch": 2.8739656604527437,
      "grad_norm": 3.359375,
      "learning_rate": 2.3340368804047864e-06,
      "loss": 0.7851,
      "step": 820020
    },
    {
      "epoch": 2.874000707959639,
      "grad_norm": 3.3125,
      "learning_rate": 2.3333878517410844e-06,
      "loss": 0.8222,
      "step": 820030
    },
    {
      "epoch": 2.874035755466535,
      "grad_norm": 2.921875,
      "learning_rate": 2.3327388230773824e-06,
      "loss": 0.8945,
      "step": 820040
    },
    {
      "epoch": 2.8740708029734305,
      "grad_norm": 3.140625,
      "learning_rate": 2.3320897944136804e-06,
      "loss": 0.8424,
      "step": 820050
    },
    {
      "epoch": 2.8741058504803263,
      "grad_norm": 3.265625,
      "learning_rate": 2.331440765749979e-06,
      "loss": 0.8039,
      "step": 820060
    },
    {
      "epoch": 2.8741408979872216,
      "grad_norm": 3.25,
      "learning_rate": 2.330791737086277e-06,
      "loss": 0.9392,
      "step": 820070
    },
    {
      "epoch": 2.8741759454941174,
      "grad_norm": 2.6875,
      "learning_rate": 2.330142708422575e-06,
      "loss": 0.7689,
      "step": 820080
    },
    {
      "epoch": 2.8742109930010127,
      "grad_norm": 2.734375,
      "learning_rate": 2.3294936797588733e-06,
      "loss": 0.781,
      "step": 820090
    },
    {
      "epoch": 2.8742460405079084,
      "grad_norm": 2.5,
      "learning_rate": 2.328844651095171e-06,
      "loss": 0.79,
      "step": 820100
    },
    {
      "epoch": 2.874281088014804,
      "grad_norm": 2.609375,
      "learning_rate": 2.3281956224314693e-06,
      "loss": 0.8416,
      "step": 820110
    },
    {
      "epoch": 2.8743161355216995,
      "grad_norm": 2.890625,
      "learning_rate": 2.3275465937677673e-06,
      "loss": 0.7839,
      "step": 820120
    },
    {
      "epoch": 2.8743511830285953,
      "grad_norm": 3.21875,
      "learning_rate": 2.3268975651040653e-06,
      "loss": 0.8684,
      "step": 820130
    },
    {
      "epoch": 2.8743862305354906,
      "grad_norm": 2.65625,
      "learning_rate": 2.3262485364403637e-06,
      "loss": 0.7695,
      "step": 820140
    },
    {
      "epoch": 2.8744212780423863,
      "grad_norm": 3.171875,
      "learning_rate": 2.3255995077766617e-06,
      "loss": 0.7915,
      "step": 820150
    },
    {
      "epoch": 2.874456325549282,
      "grad_norm": 3.25,
      "learning_rate": 2.3249504791129597e-06,
      "loss": 0.7841,
      "step": 820160
    },
    {
      "epoch": 2.874491373056178,
      "grad_norm": 2.90625,
      "learning_rate": 2.3243014504492577e-06,
      "loss": 0.8848,
      "step": 820170
    },
    {
      "epoch": 2.874526420563073,
      "grad_norm": 3.171875,
      "learning_rate": 2.323652421785556e-06,
      "loss": 0.8277,
      "step": 820180
    },
    {
      "epoch": 2.874561468069969,
      "grad_norm": 2.515625,
      "learning_rate": 2.3230033931218537e-06,
      "loss": 0.7212,
      "step": 820190
    },
    {
      "epoch": 2.8745965155768642,
      "grad_norm": 3.046875,
      "learning_rate": 2.322354364458152e-06,
      "loss": 0.7302,
      "step": 820200
    },
    {
      "epoch": 2.87463156308376,
      "grad_norm": 2.53125,
      "learning_rate": 2.32170533579445e-06,
      "loss": 0.7119,
      "step": 820210
    },
    {
      "epoch": 2.8746666105906558,
      "grad_norm": 3.265625,
      "learning_rate": 2.321056307130748e-06,
      "loss": 0.7829,
      "step": 820220
    },
    {
      "epoch": 2.874701658097551,
      "grad_norm": 2.921875,
      "learning_rate": 2.3204072784670465e-06,
      "loss": 0.6892,
      "step": 820230
    },
    {
      "epoch": 2.874736705604447,
      "grad_norm": 3.140625,
      "learning_rate": 2.3197582498033445e-06,
      "loss": 0.8036,
      "step": 820240
    },
    {
      "epoch": 2.8747717531113426,
      "grad_norm": 2.609375,
      "learning_rate": 2.3191092211396425e-06,
      "loss": 0.7706,
      "step": 820250
    },
    {
      "epoch": 2.874806800618238,
      "grad_norm": 2.734375,
      "learning_rate": 2.3184601924759405e-06,
      "loss": 0.783,
      "step": 820260
    },
    {
      "epoch": 2.8748418481251337,
      "grad_norm": 2.78125,
      "learning_rate": 2.3178111638122385e-06,
      "loss": 0.9146,
      "step": 820270
    },
    {
      "epoch": 2.8748768956320294,
      "grad_norm": 2.859375,
      "learning_rate": 2.317162135148537e-06,
      "loss": 0.8615,
      "step": 820280
    },
    {
      "epoch": 2.8749119431389247,
      "grad_norm": 2.703125,
      "learning_rate": 2.316513106484835e-06,
      "loss": 0.7663,
      "step": 820290
    },
    {
      "epoch": 2.8749469906458205,
      "grad_norm": 2.78125,
      "learning_rate": 2.315864077821133e-06,
      "loss": 0.8212,
      "step": 820300
    },
    {
      "epoch": 2.874982038152716,
      "grad_norm": 2.671875,
      "learning_rate": 2.315215049157431e-06,
      "loss": 0.7971,
      "step": 820310
    },
    {
      "epoch": 2.8750170856596116,
      "grad_norm": 2.640625,
      "learning_rate": 2.3145660204937294e-06,
      "loss": 0.8056,
      "step": 820320
    },
    {
      "epoch": 2.8750521331665073,
      "grad_norm": 2.78125,
      "learning_rate": 2.313916991830027e-06,
      "loss": 0.8768,
      "step": 820330
    },
    {
      "epoch": 2.8750871806734026,
      "grad_norm": 2.875,
      "learning_rate": 2.3132679631663254e-06,
      "loss": 0.7523,
      "step": 820340
    },
    {
      "epoch": 2.8751222281802984,
      "grad_norm": 3.09375,
      "learning_rate": 2.312618934502624e-06,
      "loss": 0.8137,
      "step": 820350
    },
    {
      "epoch": 2.875157275687194,
      "grad_norm": 2.609375,
      "learning_rate": 2.3119699058389214e-06,
      "loss": 0.8477,
      "step": 820360
    },
    {
      "epoch": 2.8751923231940895,
      "grad_norm": 3.25,
      "learning_rate": 2.31132087717522e-06,
      "loss": 0.8261,
      "step": 820370
    },
    {
      "epoch": 2.875227370700985,
      "grad_norm": 3.625,
      "learning_rate": 2.310671848511518e-06,
      "loss": 0.8359,
      "step": 820380
    },
    {
      "epoch": 2.875262418207881,
      "grad_norm": 2.6875,
      "learning_rate": 2.310022819847816e-06,
      "loss": 0.7887,
      "step": 820390
    },
    {
      "epoch": 2.8752974657147763,
      "grad_norm": 3.28125,
      "learning_rate": 2.309373791184114e-06,
      "loss": 0.7475,
      "step": 820400
    },
    {
      "epoch": 2.875332513221672,
      "grad_norm": 2.953125,
      "learning_rate": 2.3087247625204122e-06,
      "loss": 0.823,
      "step": 820410
    },
    {
      "epoch": 2.8753675607285674,
      "grad_norm": 2.8125,
      "learning_rate": 2.3080757338567102e-06,
      "loss": 0.8366,
      "step": 820420
    },
    {
      "epoch": 2.875402608235463,
      "grad_norm": 3.015625,
      "learning_rate": 2.3074267051930082e-06,
      "loss": 0.7705,
      "step": 820430
    },
    {
      "epoch": 2.875437655742359,
      "grad_norm": 3.03125,
      "learning_rate": 2.3067776765293062e-06,
      "loss": 0.8201,
      "step": 820440
    },
    {
      "epoch": 2.8754727032492546,
      "grad_norm": 2.25,
      "learning_rate": 2.3061286478656042e-06,
      "loss": 0.7032,
      "step": 820450
    },
    {
      "epoch": 2.87550775075615,
      "grad_norm": 3.015625,
      "learning_rate": 2.3054796192019027e-06,
      "loss": 0.8437,
      "step": 820460
    },
    {
      "epoch": 2.8755427982630457,
      "grad_norm": 2.875,
      "learning_rate": 2.3048305905382007e-06,
      "loss": 0.8277,
      "step": 820470
    },
    {
      "epoch": 2.875577845769941,
      "grad_norm": 2.796875,
      "learning_rate": 2.3041815618744987e-06,
      "loss": 0.7907,
      "step": 820480
    },
    {
      "epoch": 2.875612893276837,
      "grad_norm": 2.75,
      "learning_rate": 2.303532533210797e-06,
      "loss": 0.8301,
      "step": 820490
    },
    {
      "epoch": 2.8756479407837325,
      "grad_norm": 2.78125,
      "learning_rate": 2.3028835045470947e-06,
      "loss": 0.8055,
      "step": 820500
    },
    {
      "epoch": 2.875682988290628,
      "grad_norm": 2.765625,
      "learning_rate": 2.302234475883393e-06,
      "loss": 0.7059,
      "step": 820510
    },
    {
      "epoch": 2.8757180357975236,
      "grad_norm": 2.953125,
      "learning_rate": 2.301585447219691e-06,
      "loss": 0.8514,
      "step": 820520
    },
    {
      "epoch": 2.875753083304419,
      "grad_norm": 2.828125,
      "learning_rate": 2.300936418555989e-06,
      "loss": 0.7275,
      "step": 820530
    },
    {
      "epoch": 2.8757881308113147,
      "grad_norm": 2.859375,
      "learning_rate": 2.3002873898922875e-06,
      "loss": 0.7679,
      "step": 820540
    },
    {
      "epoch": 2.8758231783182104,
      "grad_norm": 2.609375,
      "learning_rate": 2.2996383612285855e-06,
      "loss": 0.877,
      "step": 820550
    },
    {
      "epoch": 2.875858225825106,
      "grad_norm": 2.953125,
      "learning_rate": 2.2989893325648835e-06,
      "loss": 0.8232,
      "step": 820560
    },
    {
      "epoch": 2.8758932733320015,
      "grad_norm": 2.65625,
      "learning_rate": 2.2983403039011815e-06,
      "loss": 0.8444,
      "step": 820570
    },
    {
      "epoch": 2.8759283208388973,
      "grad_norm": 2.890625,
      "learning_rate": 2.29769127523748e-06,
      "loss": 0.7735,
      "step": 820580
    },
    {
      "epoch": 2.8759633683457926,
      "grad_norm": 2.640625,
      "learning_rate": 2.2970422465737775e-06,
      "loss": 0.8076,
      "step": 820590
    },
    {
      "epoch": 2.8759984158526883,
      "grad_norm": 3.265625,
      "learning_rate": 2.296393217910076e-06,
      "loss": 0.8369,
      "step": 820600
    },
    {
      "epoch": 2.876033463359584,
      "grad_norm": 3.203125,
      "learning_rate": 2.2957441892463743e-06,
      "loss": 0.8174,
      "step": 820610
    },
    {
      "epoch": 2.8760685108664794,
      "grad_norm": 2.640625,
      "learning_rate": 2.295095160582672e-06,
      "loss": 0.7509,
      "step": 820620
    },
    {
      "epoch": 2.876103558373375,
      "grad_norm": 3.1875,
      "learning_rate": 2.2944461319189703e-06,
      "loss": 0.8247,
      "step": 820630
    },
    {
      "epoch": 2.8761386058802705,
      "grad_norm": 3.453125,
      "learning_rate": 2.2937971032552683e-06,
      "loss": 0.7761,
      "step": 820640
    },
    {
      "epoch": 2.8761736533871662,
      "grad_norm": 2.921875,
      "learning_rate": 2.2931480745915663e-06,
      "loss": 0.7985,
      "step": 820650
    },
    {
      "epoch": 2.876208700894062,
      "grad_norm": 2.65625,
      "learning_rate": 2.2924990459278643e-06,
      "loss": 0.8605,
      "step": 820660
    },
    {
      "epoch": 2.8762437484009578,
      "grad_norm": 2.734375,
      "learning_rate": 2.2918500172641628e-06,
      "loss": 0.7273,
      "step": 820670
    },
    {
      "epoch": 2.876278795907853,
      "grad_norm": 3.296875,
      "learning_rate": 2.2912009886004608e-06,
      "loss": 0.7605,
      "step": 820680
    },
    {
      "epoch": 2.876313843414749,
      "grad_norm": 3.09375,
      "learning_rate": 2.2905519599367588e-06,
      "loss": 0.7771,
      "step": 820690
    },
    {
      "epoch": 2.876348890921644,
      "grad_norm": 3.046875,
      "learning_rate": 2.2899029312730568e-06,
      "loss": 0.7716,
      "step": 820700
    },
    {
      "epoch": 2.87638393842854,
      "grad_norm": 3.046875,
      "learning_rate": 2.2892539026093548e-06,
      "loss": 0.7915,
      "step": 820710
    },
    {
      "epoch": 2.8764189859354357,
      "grad_norm": 2.609375,
      "learning_rate": 2.288604873945653e-06,
      "loss": 0.8383,
      "step": 820720
    },
    {
      "epoch": 2.876454033442331,
      "grad_norm": 2.984375,
      "learning_rate": 2.287955845281951e-06,
      "loss": 0.8043,
      "step": 820730
    },
    {
      "epoch": 2.8764890809492267,
      "grad_norm": 2.6875,
      "learning_rate": 2.287306816618249e-06,
      "loss": 0.7561,
      "step": 820740
    },
    {
      "epoch": 2.876524128456122,
      "grad_norm": 3.0625,
      "learning_rate": 2.2866577879545476e-06,
      "loss": 0.7169,
      "step": 820750
    },
    {
      "epoch": 2.876559175963018,
      "grad_norm": 2.546875,
      "learning_rate": 2.286008759290845e-06,
      "loss": 0.7587,
      "step": 820760
    },
    {
      "epoch": 2.8765942234699136,
      "grad_norm": 3.0,
      "learning_rate": 2.2853597306271436e-06,
      "loss": 0.7575,
      "step": 820770
    },
    {
      "epoch": 2.8766292709768093,
      "grad_norm": 3.125,
      "learning_rate": 2.2847107019634416e-06,
      "loss": 0.8086,
      "step": 820780
    },
    {
      "epoch": 2.8766643184837046,
      "grad_norm": 2.953125,
      "learning_rate": 2.2840616732997396e-06,
      "loss": 0.7655,
      "step": 820790
    },
    {
      "epoch": 2.8766993659906004,
      "grad_norm": 3.109375,
      "learning_rate": 2.2834126446360376e-06,
      "loss": 0.716,
      "step": 820800
    },
    {
      "epoch": 2.8767344134974957,
      "grad_norm": 2.984375,
      "learning_rate": 2.282763615972336e-06,
      "loss": 0.7868,
      "step": 820810
    },
    {
      "epoch": 2.8767694610043915,
      "grad_norm": 2.578125,
      "learning_rate": 2.282114587308634e-06,
      "loss": 0.8131,
      "step": 820820
    },
    {
      "epoch": 2.876804508511287,
      "grad_norm": 2.984375,
      "learning_rate": 2.281465558644932e-06,
      "loss": 0.8131,
      "step": 820830
    },
    {
      "epoch": 2.8768395560181825,
      "grad_norm": 2.703125,
      "learning_rate": 2.2808165299812305e-06,
      "loss": 0.7968,
      "step": 820840
    },
    {
      "epoch": 2.8768746035250783,
      "grad_norm": 2.90625,
      "learning_rate": 2.280167501317528e-06,
      "loss": 0.7754,
      "step": 820850
    },
    {
      "epoch": 2.8769096510319736,
      "grad_norm": 3.203125,
      "learning_rate": 2.2795184726538265e-06,
      "loss": 0.7997,
      "step": 820860
    },
    {
      "epoch": 2.8769446985388694,
      "grad_norm": 2.796875,
      "learning_rate": 2.2788694439901245e-06,
      "loss": 0.8375,
      "step": 820870
    },
    {
      "epoch": 2.876979746045765,
      "grad_norm": 3.171875,
      "learning_rate": 2.2782204153264225e-06,
      "loss": 0.84,
      "step": 820880
    },
    {
      "epoch": 2.877014793552661,
      "grad_norm": 3.078125,
      "learning_rate": 2.277571386662721e-06,
      "loss": 0.9009,
      "step": 820890
    },
    {
      "epoch": 2.877049841059556,
      "grad_norm": 3.0625,
      "learning_rate": 2.276922357999019e-06,
      "loss": 0.74,
      "step": 820900
    },
    {
      "epoch": 2.877084888566452,
      "grad_norm": 2.671875,
      "learning_rate": 2.276273329335317e-06,
      "loss": 0.811,
      "step": 820910
    },
    {
      "epoch": 2.8771199360733473,
      "grad_norm": 2.765625,
      "learning_rate": 2.275624300671615e-06,
      "loss": 0.8372,
      "step": 820920
    },
    {
      "epoch": 2.877154983580243,
      "grad_norm": 3.109375,
      "learning_rate": 2.274975272007913e-06,
      "loss": 0.8285,
      "step": 820930
    },
    {
      "epoch": 2.8771900310871388,
      "grad_norm": 3.34375,
      "learning_rate": 2.2743262433442113e-06,
      "loss": 0.7576,
      "step": 820940
    },
    {
      "epoch": 2.877225078594034,
      "grad_norm": 2.796875,
      "learning_rate": 2.2736772146805093e-06,
      "loss": 0.7886,
      "step": 820950
    },
    {
      "epoch": 2.87726012610093,
      "grad_norm": 3.0625,
      "learning_rate": 2.2730281860168073e-06,
      "loss": 0.7714,
      "step": 820960
    },
    {
      "epoch": 2.877295173607825,
      "grad_norm": 3.140625,
      "learning_rate": 2.2723791573531053e-06,
      "loss": 0.8078,
      "step": 820970
    },
    {
      "epoch": 2.877330221114721,
      "grad_norm": 2.875,
      "learning_rate": 2.2717301286894037e-06,
      "loss": 0.7824,
      "step": 820980
    },
    {
      "epoch": 2.8773652686216167,
      "grad_norm": 2.4375,
      "learning_rate": 2.2710811000257013e-06,
      "loss": 0.7344,
      "step": 820990
    },
    {
      "epoch": 2.8774003161285124,
      "grad_norm": 2.984375,
      "learning_rate": 2.2704320713619997e-06,
      "loss": 0.7588,
      "step": 821000
    },
    {
      "epoch": 2.8774353636354078,
      "grad_norm": 2.828125,
      "learning_rate": 2.269783042698298e-06,
      "loss": 0.7034,
      "step": 821010
    },
    {
      "epoch": 2.8774704111423035,
      "grad_norm": 2.875,
      "learning_rate": 2.2691340140345957e-06,
      "loss": 0.83,
      "step": 821020
    },
    {
      "epoch": 2.877505458649199,
      "grad_norm": 3.1875,
      "learning_rate": 2.268484985370894e-06,
      "loss": 0.7903,
      "step": 821030
    },
    {
      "epoch": 2.8775405061560946,
      "grad_norm": 3.140625,
      "learning_rate": 2.267835956707192e-06,
      "loss": 0.7876,
      "step": 821040
    },
    {
      "epoch": 2.8775755536629903,
      "grad_norm": 3.140625,
      "learning_rate": 2.26718692804349e-06,
      "loss": 0.7607,
      "step": 821050
    },
    {
      "epoch": 2.8776106011698857,
      "grad_norm": 2.90625,
      "learning_rate": 2.266537899379788e-06,
      "loss": 0.8286,
      "step": 821060
    },
    {
      "epoch": 2.8776456486767814,
      "grad_norm": 2.6875,
      "learning_rate": 2.2658888707160866e-06,
      "loss": 0.7512,
      "step": 821070
    },
    {
      "epoch": 2.8776806961836767,
      "grad_norm": 2.8125,
      "learning_rate": 2.2652398420523846e-06,
      "loss": 0.7781,
      "step": 821080
    },
    {
      "epoch": 2.8777157436905725,
      "grad_norm": 2.78125,
      "learning_rate": 2.2645908133886826e-06,
      "loss": 0.8331,
      "step": 821090
    },
    {
      "epoch": 2.8777507911974682,
      "grad_norm": 3.265625,
      "learning_rate": 2.263941784724981e-06,
      "loss": 0.7165,
      "step": 821100
    },
    {
      "epoch": 2.877785838704364,
      "grad_norm": 3.15625,
      "learning_rate": 2.2632927560612786e-06,
      "loss": 0.7708,
      "step": 821110
    },
    {
      "epoch": 2.8778208862112593,
      "grad_norm": 2.703125,
      "learning_rate": 2.262643727397577e-06,
      "loss": 0.8195,
      "step": 821120
    },
    {
      "epoch": 2.877855933718155,
      "grad_norm": 3.421875,
      "learning_rate": 2.261994698733875e-06,
      "loss": 0.858,
      "step": 821130
    },
    {
      "epoch": 2.8778909812250504,
      "grad_norm": 3.109375,
      "learning_rate": 2.261345670070173e-06,
      "loss": 0.8274,
      "step": 821140
    },
    {
      "epoch": 2.877926028731946,
      "grad_norm": 2.65625,
      "learning_rate": 2.2606966414064714e-06,
      "loss": 0.8419,
      "step": 821150
    },
    {
      "epoch": 2.877961076238842,
      "grad_norm": 3.1875,
      "learning_rate": 2.2600476127427694e-06,
      "loss": 0.7955,
      "step": 821160
    },
    {
      "epoch": 2.877996123745737,
      "grad_norm": 2.3125,
      "learning_rate": 2.2593985840790674e-06,
      "loss": 0.7716,
      "step": 821170
    },
    {
      "epoch": 2.878031171252633,
      "grad_norm": 2.921875,
      "learning_rate": 2.2587495554153654e-06,
      "loss": 0.7424,
      "step": 821180
    },
    {
      "epoch": 2.8780662187595283,
      "grad_norm": 2.4375,
      "learning_rate": 2.2581005267516634e-06,
      "loss": 0.6948,
      "step": 821190
    },
    {
      "epoch": 2.878101266266424,
      "grad_norm": 2.546875,
      "learning_rate": 2.2574514980879614e-06,
      "loss": 0.8196,
      "step": 821200
    },
    {
      "epoch": 2.87813631377332,
      "grad_norm": 2.640625,
      "learning_rate": 2.25680246942426e-06,
      "loss": 0.725,
      "step": 821210
    },
    {
      "epoch": 2.8781713612802156,
      "grad_norm": 2.515625,
      "learning_rate": 2.256153440760558e-06,
      "loss": 0.7972,
      "step": 821220
    },
    {
      "epoch": 2.878206408787111,
      "grad_norm": 2.921875,
      "learning_rate": 2.255504412096856e-06,
      "loss": 0.8229,
      "step": 821230
    },
    {
      "epoch": 2.8782414562940066,
      "grad_norm": 2.84375,
      "learning_rate": 2.2548553834331543e-06,
      "loss": 0.8315,
      "step": 821240
    },
    {
      "epoch": 2.878276503800902,
      "grad_norm": 3.046875,
      "learning_rate": 2.254206354769452e-06,
      "loss": 0.7481,
      "step": 821250
    },
    {
      "epoch": 2.8783115513077977,
      "grad_norm": 3.328125,
      "learning_rate": 2.2535573261057503e-06,
      "loss": 0.8789,
      "step": 821260
    },
    {
      "epoch": 2.8783465988146935,
      "grad_norm": 2.5,
      "learning_rate": 2.2529082974420483e-06,
      "loss": 0.7289,
      "step": 821270
    },
    {
      "epoch": 2.8783816463215888,
      "grad_norm": 2.96875,
      "learning_rate": 2.2522592687783463e-06,
      "loss": 0.7565,
      "step": 821280
    },
    {
      "epoch": 2.8784166938284845,
      "grad_norm": 3.03125,
      "learning_rate": 2.2516102401146447e-06,
      "loss": 0.7609,
      "step": 821290
    },
    {
      "epoch": 2.87845174133538,
      "grad_norm": 2.8125,
      "learning_rate": 2.2509612114509427e-06,
      "loss": 0.7913,
      "step": 821300
    },
    {
      "epoch": 2.8784867888422756,
      "grad_norm": 2.453125,
      "learning_rate": 2.2503121827872407e-06,
      "loss": 0.7896,
      "step": 821310
    },
    {
      "epoch": 2.8785218363491714,
      "grad_norm": 3.0,
      "learning_rate": 2.2496631541235387e-06,
      "loss": 0.7649,
      "step": 821320
    },
    {
      "epoch": 2.878556883856067,
      "grad_norm": 3.0,
      "learning_rate": 2.249014125459837e-06,
      "loss": 0.8158,
      "step": 821330
    },
    {
      "epoch": 2.8785919313629624,
      "grad_norm": 3.25,
      "learning_rate": 2.2483650967961347e-06,
      "loss": 0.8824,
      "step": 821340
    },
    {
      "epoch": 2.878626978869858,
      "grad_norm": 3.0625,
      "learning_rate": 2.247716068132433e-06,
      "loss": 0.8087,
      "step": 821350
    },
    {
      "epoch": 2.8786620263767535,
      "grad_norm": 3.140625,
      "learning_rate": 2.2470670394687315e-06,
      "loss": 0.7025,
      "step": 821360
    },
    {
      "epoch": 2.8786970738836493,
      "grad_norm": 3.09375,
      "learning_rate": 2.246418010805029e-06,
      "loss": 0.835,
      "step": 821370
    },
    {
      "epoch": 2.878732121390545,
      "grad_norm": 2.84375,
      "learning_rate": 2.2457689821413275e-06,
      "loss": 0.8205,
      "step": 821380
    },
    {
      "epoch": 2.8787671688974403,
      "grad_norm": 3.171875,
      "learning_rate": 2.2451199534776255e-06,
      "loss": 0.8808,
      "step": 821390
    },
    {
      "epoch": 2.878802216404336,
      "grad_norm": 3.875,
      "learning_rate": 2.2444709248139235e-06,
      "loss": 0.8148,
      "step": 821400
    },
    {
      "epoch": 2.8788372639112314,
      "grad_norm": 2.5,
      "learning_rate": 2.243821896150222e-06,
      "loss": 0.7707,
      "step": 821410
    },
    {
      "epoch": 2.878872311418127,
      "grad_norm": 2.96875,
      "learning_rate": 2.24317286748652e-06,
      "loss": 0.7786,
      "step": 821420
    },
    {
      "epoch": 2.878907358925023,
      "grad_norm": 3.140625,
      "learning_rate": 2.242523838822818e-06,
      "loss": 0.7664,
      "step": 821430
    },
    {
      "epoch": 2.8789424064319187,
      "grad_norm": 3.234375,
      "learning_rate": 2.241874810159116e-06,
      "loss": 0.8418,
      "step": 821440
    },
    {
      "epoch": 2.878977453938814,
      "grad_norm": 2.828125,
      "learning_rate": 2.241225781495414e-06,
      "loss": 0.7936,
      "step": 821450
    },
    {
      "epoch": 2.8790125014457097,
      "grad_norm": 2.875,
      "learning_rate": 2.240576752831712e-06,
      "loss": 0.8769,
      "step": 821460
    },
    {
      "epoch": 2.879047548952605,
      "grad_norm": 2.765625,
      "learning_rate": 2.2399277241680104e-06,
      "loss": 0.7376,
      "step": 821470
    },
    {
      "epoch": 2.879082596459501,
      "grad_norm": 3.15625,
      "learning_rate": 2.2392786955043084e-06,
      "loss": 0.7979,
      "step": 821480
    },
    {
      "epoch": 2.8791176439663966,
      "grad_norm": 3.0,
      "learning_rate": 2.2386296668406064e-06,
      "loss": 0.8784,
      "step": 821490
    },
    {
      "epoch": 2.879152691473292,
      "grad_norm": 2.84375,
      "learning_rate": 2.237980638176905e-06,
      "loss": 0.7978,
      "step": 821500
    },
    {
      "epoch": 2.8791877389801876,
      "grad_norm": 3.0625,
      "learning_rate": 2.2373316095132024e-06,
      "loss": 0.844,
      "step": 821510
    },
    {
      "epoch": 2.8792227864870834,
      "grad_norm": 2.484375,
      "learning_rate": 2.236682580849501e-06,
      "loss": 0.8041,
      "step": 821520
    },
    {
      "epoch": 2.8792578339939787,
      "grad_norm": 2.9375,
      "learning_rate": 2.236033552185799e-06,
      "loss": 0.6951,
      "step": 821530
    },
    {
      "epoch": 2.8792928815008745,
      "grad_norm": 2.578125,
      "learning_rate": 2.235384523522097e-06,
      "loss": 0.7452,
      "step": 821540
    },
    {
      "epoch": 2.8793279290077702,
      "grad_norm": 2.953125,
      "learning_rate": 2.2347354948583952e-06,
      "loss": 0.7752,
      "step": 821550
    },
    {
      "epoch": 2.8793629765146656,
      "grad_norm": 2.953125,
      "learning_rate": 2.2340864661946932e-06,
      "loss": 0.8407,
      "step": 821560
    },
    {
      "epoch": 2.8793980240215613,
      "grad_norm": 3.0,
      "learning_rate": 2.2334374375309912e-06,
      "loss": 0.7916,
      "step": 821570
    },
    {
      "epoch": 2.8794330715284566,
      "grad_norm": 3.234375,
      "learning_rate": 2.2327884088672892e-06,
      "loss": 0.856,
      "step": 821580
    },
    {
      "epoch": 2.8794681190353524,
      "grad_norm": 3.140625,
      "learning_rate": 2.2321393802035877e-06,
      "loss": 0.8492,
      "step": 821590
    },
    {
      "epoch": 2.879503166542248,
      "grad_norm": 2.546875,
      "learning_rate": 2.2314903515398852e-06,
      "loss": 0.808,
      "step": 821600
    },
    {
      "epoch": 2.8795382140491435,
      "grad_norm": 2.765625,
      "learning_rate": 2.2308413228761837e-06,
      "loss": 0.7578,
      "step": 821610
    },
    {
      "epoch": 2.879573261556039,
      "grad_norm": 2.546875,
      "learning_rate": 2.2301922942124817e-06,
      "loss": 0.8008,
      "step": 821620
    },
    {
      "epoch": 2.879608309062935,
      "grad_norm": 2.640625,
      "learning_rate": 2.2295432655487797e-06,
      "loss": 0.8121,
      "step": 821630
    },
    {
      "epoch": 2.8796433565698303,
      "grad_norm": 3.3125,
      "learning_rate": 2.228894236885078e-06,
      "loss": 0.7451,
      "step": 821640
    },
    {
      "epoch": 2.879678404076726,
      "grad_norm": 3.3125,
      "learning_rate": 2.228245208221376e-06,
      "loss": 0.8562,
      "step": 821650
    },
    {
      "epoch": 2.879713451583622,
      "grad_norm": 2.71875,
      "learning_rate": 2.227596179557674e-06,
      "loss": 0.7598,
      "step": 821660
    },
    {
      "epoch": 2.879748499090517,
      "grad_norm": 2.546875,
      "learning_rate": 2.226947150893972e-06,
      "loss": 0.7849,
      "step": 821670
    },
    {
      "epoch": 2.879783546597413,
      "grad_norm": 3.015625,
      "learning_rate": 2.22629812223027e-06,
      "loss": 0.8351,
      "step": 821680
    },
    {
      "epoch": 2.879818594104308,
      "grad_norm": 2.671875,
      "learning_rate": 2.2256490935665685e-06,
      "loss": 0.7849,
      "step": 821690
    },
    {
      "epoch": 2.879853641611204,
      "grad_norm": 2.640625,
      "learning_rate": 2.2250000649028665e-06,
      "loss": 0.8908,
      "step": 821700
    },
    {
      "epoch": 2.8798886891180997,
      "grad_norm": 3.046875,
      "learning_rate": 2.2243510362391645e-06,
      "loss": 0.7545,
      "step": 821710
    },
    {
      "epoch": 2.8799237366249955,
      "grad_norm": 3.09375,
      "learning_rate": 2.2237020075754625e-06,
      "loss": 0.8288,
      "step": 821720
    },
    {
      "epoch": 2.8799587841318908,
      "grad_norm": 2.453125,
      "learning_rate": 2.223052978911761e-06,
      "loss": 0.8314,
      "step": 821730
    },
    {
      "epoch": 2.8799938316387865,
      "grad_norm": 2.890625,
      "learning_rate": 2.2224039502480585e-06,
      "loss": 0.791,
      "step": 821740
    },
    {
      "epoch": 2.880028879145682,
      "grad_norm": 2.96875,
      "learning_rate": 2.221754921584357e-06,
      "loss": 0.8016,
      "step": 821750
    },
    {
      "epoch": 2.8800639266525776,
      "grad_norm": 2.75,
      "learning_rate": 2.2211058929206553e-06,
      "loss": 0.8732,
      "step": 821760
    },
    {
      "epoch": 2.8800989741594734,
      "grad_norm": 2.640625,
      "learning_rate": 2.220456864256953e-06,
      "loss": 0.7834,
      "step": 821770
    },
    {
      "epoch": 2.8801340216663687,
      "grad_norm": 3.09375,
      "learning_rate": 2.2198078355932513e-06,
      "loss": 0.8489,
      "step": 821780
    },
    {
      "epoch": 2.8801690691732644,
      "grad_norm": 2.6875,
      "learning_rate": 2.2191588069295493e-06,
      "loss": 0.859,
      "step": 821790
    },
    {
      "epoch": 2.8802041166801597,
      "grad_norm": 2.921875,
      "learning_rate": 2.2185097782658473e-06,
      "loss": 0.8627,
      "step": 821800
    },
    {
      "epoch": 2.8802391641870555,
      "grad_norm": 2.640625,
      "learning_rate": 2.2178607496021453e-06,
      "loss": 0.8475,
      "step": 821810
    },
    {
      "epoch": 2.8802742116939513,
      "grad_norm": 3.0625,
      "learning_rate": 2.2172117209384438e-06,
      "loss": 0.8358,
      "step": 821820
    },
    {
      "epoch": 2.880309259200847,
      "grad_norm": 3.4375,
      "learning_rate": 2.2165626922747418e-06,
      "loss": 0.7789,
      "step": 821830
    },
    {
      "epoch": 2.8803443067077423,
      "grad_norm": 2.671875,
      "learning_rate": 2.2159136636110398e-06,
      "loss": 0.758,
      "step": 821840
    },
    {
      "epoch": 2.880379354214638,
      "grad_norm": 2.71875,
      "learning_rate": 2.215264634947338e-06,
      "loss": 0.8602,
      "step": 821850
    },
    {
      "epoch": 2.8804144017215334,
      "grad_norm": 2.78125,
      "learning_rate": 2.2146156062836358e-06,
      "loss": 0.7667,
      "step": 821860
    },
    {
      "epoch": 2.880449449228429,
      "grad_norm": 3.203125,
      "learning_rate": 2.213966577619934e-06,
      "loss": 0.8769,
      "step": 821870
    },
    {
      "epoch": 2.880484496735325,
      "grad_norm": 2.984375,
      "learning_rate": 2.213317548956232e-06,
      "loss": 0.7642,
      "step": 821880
    },
    {
      "epoch": 2.8805195442422202,
      "grad_norm": 3.078125,
      "learning_rate": 2.21266852029253e-06,
      "loss": 0.7208,
      "step": 821890
    },
    {
      "epoch": 2.880554591749116,
      "grad_norm": 3.046875,
      "learning_rate": 2.2120194916288286e-06,
      "loss": 0.719,
      "step": 821900
    },
    {
      "epoch": 2.8805896392560113,
      "grad_norm": 3.109375,
      "learning_rate": 2.2113704629651266e-06,
      "loss": 0.8266,
      "step": 821910
    },
    {
      "epoch": 2.880624686762907,
      "grad_norm": 3.015625,
      "learning_rate": 2.2107214343014246e-06,
      "loss": 0.7913,
      "step": 821920
    },
    {
      "epoch": 2.880659734269803,
      "grad_norm": 2.953125,
      "learning_rate": 2.2100724056377226e-06,
      "loss": 0.819,
      "step": 821930
    },
    {
      "epoch": 2.8806947817766986,
      "grad_norm": 3.203125,
      "learning_rate": 2.2094233769740206e-06,
      "loss": 0.7594,
      "step": 821940
    },
    {
      "epoch": 2.880729829283594,
      "grad_norm": 3.203125,
      "learning_rate": 2.208774348310319e-06,
      "loss": 0.7969,
      "step": 821950
    },
    {
      "epoch": 2.8807648767904896,
      "grad_norm": 3.046875,
      "learning_rate": 2.208125319646617e-06,
      "loss": 0.754,
      "step": 821960
    },
    {
      "epoch": 2.880799924297385,
      "grad_norm": 3.0625,
      "learning_rate": 2.207476290982915e-06,
      "loss": 0.7686,
      "step": 821970
    },
    {
      "epoch": 2.8808349718042807,
      "grad_norm": 2.9375,
      "learning_rate": 2.206827262319213e-06,
      "loss": 0.8338,
      "step": 821980
    },
    {
      "epoch": 2.8808700193111765,
      "grad_norm": 3.40625,
      "learning_rate": 2.2061782336555115e-06,
      "loss": 0.7767,
      "step": 821990
    },
    {
      "epoch": 2.880905066818072,
      "grad_norm": 2.953125,
      "learning_rate": 2.205529204991809e-06,
      "loss": 0.7714,
      "step": 822000
    },
    {
      "epoch": 2.8809401143249675,
      "grad_norm": 3.265625,
      "learning_rate": 2.2048801763281075e-06,
      "loss": 0.8616,
      "step": 822010
    },
    {
      "epoch": 2.880975161831863,
      "grad_norm": 2.875,
      "learning_rate": 2.204231147664406e-06,
      "loss": 0.7639,
      "step": 822020
    },
    {
      "epoch": 2.8810102093387586,
      "grad_norm": 3.40625,
      "learning_rate": 2.2035821190007035e-06,
      "loss": 0.7622,
      "step": 822030
    },
    {
      "epoch": 2.8810452568456544,
      "grad_norm": 3.015625,
      "learning_rate": 2.202933090337002e-06,
      "loss": 0.8238,
      "step": 822040
    },
    {
      "epoch": 2.88108030435255,
      "grad_norm": 2.5625,
      "learning_rate": 2.2022840616733e-06,
      "loss": 0.7256,
      "step": 822050
    },
    {
      "epoch": 2.8811153518594455,
      "grad_norm": 2.609375,
      "learning_rate": 2.201635033009598e-06,
      "loss": 0.8064,
      "step": 822060
    },
    {
      "epoch": 2.881150399366341,
      "grad_norm": 2.484375,
      "learning_rate": 2.200986004345896e-06,
      "loss": 0.7352,
      "step": 822070
    },
    {
      "epoch": 2.8811854468732365,
      "grad_norm": 3.171875,
      "learning_rate": 2.2003369756821943e-06,
      "loss": 0.7436,
      "step": 822080
    },
    {
      "epoch": 2.8812204943801323,
      "grad_norm": 3.0,
      "learning_rate": 2.1996879470184923e-06,
      "loss": 0.8331,
      "step": 822090
    },
    {
      "epoch": 2.881255541887028,
      "grad_norm": 2.84375,
      "learning_rate": 2.1990389183547903e-06,
      "loss": 0.7391,
      "step": 822100
    },
    {
      "epoch": 2.8812905893939234,
      "grad_norm": 3.359375,
      "learning_rate": 2.1983898896910883e-06,
      "loss": 0.8253,
      "step": 822110
    },
    {
      "epoch": 2.881325636900819,
      "grad_norm": 2.890625,
      "learning_rate": 2.1977408610273863e-06,
      "loss": 0.8316,
      "step": 822120
    },
    {
      "epoch": 2.8813606844077144,
      "grad_norm": 3.984375,
      "learning_rate": 2.1970918323636847e-06,
      "loss": 0.8302,
      "step": 822130
    },
    {
      "epoch": 2.88139573191461,
      "grad_norm": 2.890625,
      "learning_rate": 2.1964428036999827e-06,
      "loss": 0.8339,
      "step": 822140
    },
    {
      "epoch": 2.881430779421506,
      "grad_norm": 3.125,
      "learning_rate": 2.1957937750362807e-06,
      "loss": 0.8518,
      "step": 822150
    },
    {
      "epoch": 2.8814658269284017,
      "grad_norm": 2.75,
      "learning_rate": 2.195144746372579e-06,
      "loss": 0.8059,
      "step": 822160
    },
    {
      "epoch": 2.881500874435297,
      "grad_norm": 2.59375,
      "learning_rate": 2.1944957177088767e-06,
      "loss": 0.7344,
      "step": 822170
    },
    {
      "epoch": 2.8815359219421928,
      "grad_norm": 2.96875,
      "learning_rate": 2.193846689045175e-06,
      "loss": 0.7196,
      "step": 822180
    },
    {
      "epoch": 2.881570969449088,
      "grad_norm": 3.046875,
      "learning_rate": 2.193197660381473e-06,
      "loss": 0.7525,
      "step": 822190
    },
    {
      "epoch": 2.881606016955984,
      "grad_norm": 3.125,
      "learning_rate": 2.192548631717771e-06,
      "loss": 0.7796,
      "step": 822200
    },
    {
      "epoch": 2.8816410644628796,
      "grad_norm": 2.90625,
      "learning_rate": 2.191899603054069e-06,
      "loss": 0.7621,
      "step": 822210
    },
    {
      "epoch": 2.881676111969775,
      "grad_norm": 3.21875,
      "learning_rate": 2.1912505743903676e-06,
      "loss": 0.8636,
      "step": 822220
    },
    {
      "epoch": 2.8817111594766707,
      "grad_norm": 2.8125,
      "learning_rate": 2.1906015457266656e-06,
      "loss": 0.8141,
      "step": 822230
    },
    {
      "epoch": 2.881746206983566,
      "grad_norm": 2.96875,
      "learning_rate": 2.1899525170629636e-06,
      "loss": 0.7457,
      "step": 822240
    },
    {
      "epoch": 2.8817812544904617,
      "grad_norm": 2.59375,
      "learning_rate": 2.189303488399262e-06,
      "loss": 0.8262,
      "step": 822250
    },
    {
      "epoch": 2.8818163019973575,
      "grad_norm": 2.796875,
      "learning_rate": 2.1886544597355596e-06,
      "loss": 0.7935,
      "step": 822260
    },
    {
      "epoch": 2.8818513495042533,
      "grad_norm": 2.8125,
      "learning_rate": 2.188005431071858e-06,
      "loss": 0.7556,
      "step": 822270
    },
    {
      "epoch": 2.8818863970111486,
      "grad_norm": 2.75,
      "learning_rate": 2.1873564024081564e-06,
      "loss": 0.7962,
      "step": 822280
    },
    {
      "epoch": 2.8819214445180443,
      "grad_norm": 2.578125,
      "learning_rate": 2.186707373744454e-06,
      "loss": 0.744,
      "step": 822290
    },
    {
      "epoch": 2.8819564920249396,
      "grad_norm": 2.9375,
      "learning_rate": 2.1860583450807524e-06,
      "loss": 0.8088,
      "step": 822300
    },
    {
      "epoch": 2.8819915395318354,
      "grad_norm": 2.8125,
      "learning_rate": 2.1854093164170504e-06,
      "loss": 0.8069,
      "step": 822310
    },
    {
      "epoch": 2.882026587038731,
      "grad_norm": 3.234375,
      "learning_rate": 2.1847602877533484e-06,
      "loss": 0.833,
      "step": 822320
    },
    {
      "epoch": 2.8820616345456265,
      "grad_norm": 2.59375,
      "learning_rate": 2.1841112590896464e-06,
      "loss": 0.8395,
      "step": 822330
    },
    {
      "epoch": 2.8820966820525222,
      "grad_norm": 2.828125,
      "learning_rate": 2.183462230425945e-06,
      "loss": 0.7461,
      "step": 822340
    },
    {
      "epoch": 2.8821317295594175,
      "grad_norm": 2.78125,
      "learning_rate": 2.182813201762243e-06,
      "loss": 0.7674,
      "step": 822350
    },
    {
      "epoch": 2.8821667770663133,
      "grad_norm": 2.59375,
      "learning_rate": 2.182164173098541e-06,
      "loss": 0.7386,
      "step": 822360
    },
    {
      "epoch": 2.882201824573209,
      "grad_norm": 3.171875,
      "learning_rate": 2.181515144434839e-06,
      "loss": 0.8235,
      "step": 822370
    },
    {
      "epoch": 2.882236872080105,
      "grad_norm": 2.671875,
      "learning_rate": 2.180866115771137e-06,
      "loss": 0.7207,
      "step": 822380
    },
    {
      "epoch": 2.882271919587,
      "grad_norm": 2.703125,
      "learning_rate": 2.1802170871074353e-06,
      "loss": 0.8084,
      "step": 822390
    },
    {
      "epoch": 2.882306967093896,
      "grad_norm": 3.140625,
      "learning_rate": 2.1795680584437333e-06,
      "loss": 0.8359,
      "step": 822400
    },
    {
      "epoch": 2.882342014600791,
      "grad_norm": 3.25,
      "learning_rate": 2.1789190297800313e-06,
      "loss": 0.8399,
      "step": 822410
    },
    {
      "epoch": 2.882377062107687,
      "grad_norm": 3.25,
      "learning_rate": 2.1782700011163297e-06,
      "loss": 0.796,
      "step": 822420
    },
    {
      "epoch": 2.8824121096145827,
      "grad_norm": 2.921875,
      "learning_rate": 2.1776209724526273e-06,
      "loss": 0.8135,
      "step": 822430
    },
    {
      "epoch": 2.882447157121478,
      "grad_norm": 2.484375,
      "learning_rate": 2.1769719437889257e-06,
      "loss": 0.7541,
      "step": 822440
    },
    {
      "epoch": 2.882482204628374,
      "grad_norm": 3.03125,
      "learning_rate": 2.1763229151252237e-06,
      "loss": 0.7224,
      "step": 822450
    },
    {
      "epoch": 2.882517252135269,
      "grad_norm": 3.0,
      "learning_rate": 2.1756738864615217e-06,
      "loss": 0.8705,
      "step": 822460
    },
    {
      "epoch": 2.882552299642165,
      "grad_norm": 3.3125,
      "learning_rate": 2.1750248577978197e-06,
      "loss": 0.7503,
      "step": 822470
    },
    {
      "epoch": 2.8825873471490606,
      "grad_norm": 3.09375,
      "learning_rate": 2.174375829134118e-06,
      "loss": 0.8364,
      "step": 822480
    },
    {
      "epoch": 2.8826223946559564,
      "grad_norm": 3.421875,
      "learning_rate": 2.173726800470416e-06,
      "loss": 0.7955,
      "step": 822490
    },
    {
      "epoch": 2.8826574421628517,
      "grad_norm": 3.546875,
      "learning_rate": 2.173077771806714e-06,
      "loss": 0.7646,
      "step": 822500
    },
    {
      "epoch": 2.8826924896697474,
      "grad_norm": 2.8125,
      "learning_rate": 2.1724287431430125e-06,
      "loss": 0.8061,
      "step": 822510
    },
    {
      "epoch": 2.8827275371766428,
      "grad_norm": 3.21875,
      "learning_rate": 2.17177971447931e-06,
      "loss": 0.8667,
      "step": 822520
    },
    {
      "epoch": 2.8827625846835385,
      "grad_norm": 3.375,
      "learning_rate": 2.1711306858156085e-06,
      "loss": 0.8008,
      "step": 822530
    },
    {
      "epoch": 2.8827976321904343,
      "grad_norm": 3.21875,
      "learning_rate": 2.1704816571519065e-06,
      "loss": 0.8391,
      "step": 822540
    },
    {
      "epoch": 2.8828326796973296,
      "grad_norm": 2.890625,
      "learning_rate": 2.1698326284882045e-06,
      "loss": 0.7054,
      "step": 822550
    },
    {
      "epoch": 2.8828677272042253,
      "grad_norm": 2.78125,
      "learning_rate": 2.169183599824503e-06,
      "loss": 0.7834,
      "step": 822560
    },
    {
      "epoch": 2.8829027747111207,
      "grad_norm": 3.03125,
      "learning_rate": 2.168534571160801e-06,
      "loss": 0.7666,
      "step": 822570
    },
    {
      "epoch": 2.8829378222180164,
      "grad_norm": 2.296875,
      "learning_rate": 2.167885542497099e-06,
      "loss": 0.7799,
      "step": 822580
    },
    {
      "epoch": 2.882972869724912,
      "grad_norm": 2.75,
      "learning_rate": 2.167236513833397e-06,
      "loss": 0.8485,
      "step": 822590
    },
    {
      "epoch": 2.883007917231808,
      "grad_norm": 2.671875,
      "learning_rate": 2.1665874851696954e-06,
      "loss": 0.7103,
      "step": 822600
    },
    {
      "epoch": 2.8830429647387033,
      "grad_norm": 3.15625,
      "learning_rate": 2.165938456505993e-06,
      "loss": 0.8006,
      "step": 822610
    },
    {
      "epoch": 2.883078012245599,
      "grad_norm": 2.296875,
      "learning_rate": 2.1652894278422914e-06,
      "loss": 0.7589,
      "step": 822620
    },
    {
      "epoch": 2.8831130597524943,
      "grad_norm": 2.859375,
      "learning_rate": 2.1646403991785894e-06,
      "loss": 0.7442,
      "step": 822630
    },
    {
      "epoch": 2.88314810725939,
      "grad_norm": 2.359375,
      "learning_rate": 2.1639913705148874e-06,
      "loss": 0.7861,
      "step": 822640
    },
    {
      "epoch": 2.883183154766286,
      "grad_norm": 3.296875,
      "learning_rate": 2.163342341851186e-06,
      "loss": 0.8046,
      "step": 822650
    },
    {
      "epoch": 2.883218202273181,
      "grad_norm": 2.625,
      "learning_rate": 2.162693313187484e-06,
      "loss": 0.7464,
      "step": 822660
    },
    {
      "epoch": 2.883253249780077,
      "grad_norm": 3.09375,
      "learning_rate": 2.162044284523782e-06,
      "loss": 0.7916,
      "step": 822670
    },
    {
      "epoch": 2.8832882972869722,
      "grad_norm": 3.296875,
      "learning_rate": 2.16139525586008e-06,
      "loss": 0.9396,
      "step": 822680
    },
    {
      "epoch": 2.883323344793868,
      "grad_norm": 2.4375,
      "learning_rate": 2.160746227196378e-06,
      "loss": 0.7251,
      "step": 822690
    },
    {
      "epoch": 2.8833583923007637,
      "grad_norm": 2.921875,
      "learning_rate": 2.1600971985326762e-06,
      "loss": 0.767,
      "step": 822700
    },
    {
      "epoch": 2.8833934398076595,
      "grad_norm": 3.328125,
      "learning_rate": 2.1594481698689742e-06,
      "loss": 0.8107,
      "step": 822710
    },
    {
      "epoch": 2.883428487314555,
      "grad_norm": 3.046875,
      "learning_rate": 2.1587991412052722e-06,
      "loss": 0.7753,
      "step": 822720
    },
    {
      "epoch": 2.8834635348214506,
      "grad_norm": 2.5,
      "learning_rate": 2.1581501125415702e-06,
      "loss": 0.7067,
      "step": 822730
    },
    {
      "epoch": 2.883498582328346,
      "grad_norm": 2.515625,
      "learning_rate": 2.1575010838778687e-06,
      "loss": 0.836,
      "step": 822740
    },
    {
      "epoch": 2.8835336298352416,
      "grad_norm": 3.015625,
      "learning_rate": 2.1568520552141667e-06,
      "loss": 0.7294,
      "step": 822750
    },
    {
      "epoch": 2.8835686773421374,
      "grad_norm": 2.953125,
      "learning_rate": 2.1562030265504647e-06,
      "loss": 0.8528,
      "step": 822760
    },
    {
      "epoch": 2.8836037248490327,
      "grad_norm": 3.0,
      "learning_rate": 2.155553997886763e-06,
      "loss": 0.7287,
      "step": 822770
    },
    {
      "epoch": 2.8836387723559285,
      "grad_norm": 3.28125,
      "learning_rate": 2.1549049692230607e-06,
      "loss": 0.7666,
      "step": 822780
    },
    {
      "epoch": 2.883673819862824,
      "grad_norm": 2.625,
      "learning_rate": 2.154255940559359e-06,
      "loss": 0.7904,
      "step": 822790
    },
    {
      "epoch": 2.8837088673697195,
      "grad_norm": 3.09375,
      "learning_rate": 2.153606911895657e-06,
      "loss": 0.8283,
      "step": 822800
    },
    {
      "epoch": 2.8837439148766153,
      "grad_norm": 2.875,
      "learning_rate": 2.152957883231955e-06,
      "loss": 0.8029,
      "step": 822810
    },
    {
      "epoch": 2.883778962383511,
      "grad_norm": 2.484375,
      "learning_rate": 2.1523088545682535e-06,
      "loss": 0.7776,
      "step": 822820
    },
    {
      "epoch": 2.8838140098904064,
      "grad_norm": 2.984375,
      "learning_rate": 2.1516598259045515e-06,
      "loss": 0.8342,
      "step": 822830
    },
    {
      "epoch": 2.883849057397302,
      "grad_norm": 3.375,
      "learning_rate": 2.1510107972408495e-06,
      "loss": 0.836,
      "step": 822840
    },
    {
      "epoch": 2.8838841049041974,
      "grad_norm": 3.1875,
      "learning_rate": 2.1503617685771475e-06,
      "loss": 0.7715,
      "step": 822850
    },
    {
      "epoch": 2.883919152411093,
      "grad_norm": 2.8125,
      "learning_rate": 2.1497127399134455e-06,
      "loss": 0.855,
      "step": 822860
    },
    {
      "epoch": 2.883954199917989,
      "grad_norm": 3.140625,
      "learning_rate": 2.1490637112497435e-06,
      "loss": 0.7773,
      "step": 822870
    },
    {
      "epoch": 2.8839892474248843,
      "grad_norm": 3.25,
      "learning_rate": 2.148414682586042e-06,
      "loss": 0.7399,
      "step": 822880
    },
    {
      "epoch": 2.88402429493178,
      "grad_norm": 2.828125,
      "learning_rate": 2.14776565392234e-06,
      "loss": 0.7083,
      "step": 822890
    },
    {
      "epoch": 2.884059342438676,
      "grad_norm": 2.953125,
      "learning_rate": 2.147116625258638e-06,
      "loss": 0.8405,
      "step": 822900
    },
    {
      "epoch": 2.884094389945571,
      "grad_norm": 2.390625,
      "learning_rate": 2.1464675965949363e-06,
      "loss": 0.7722,
      "step": 822910
    },
    {
      "epoch": 2.884129437452467,
      "grad_norm": 3.265625,
      "learning_rate": 2.145818567931234e-06,
      "loss": 0.7371,
      "step": 822920
    },
    {
      "epoch": 2.8841644849593626,
      "grad_norm": 2.84375,
      "learning_rate": 2.1451695392675323e-06,
      "loss": 0.8,
      "step": 822930
    },
    {
      "epoch": 2.884199532466258,
      "grad_norm": 3.171875,
      "learning_rate": 2.1445205106038303e-06,
      "loss": 0.8211,
      "step": 822940
    },
    {
      "epoch": 2.8842345799731537,
      "grad_norm": 3.390625,
      "learning_rate": 2.1438714819401283e-06,
      "loss": 0.8409,
      "step": 822950
    },
    {
      "epoch": 2.884269627480049,
      "grad_norm": 2.953125,
      "learning_rate": 2.1432224532764268e-06,
      "loss": 0.8437,
      "step": 822960
    },
    {
      "epoch": 2.8843046749869448,
      "grad_norm": 2.984375,
      "learning_rate": 2.1425734246127248e-06,
      "loss": 0.7941,
      "step": 822970
    },
    {
      "epoch": 2.8843397224938405,
      "grad_norm": 3.109375,
      "learning_rate": 2.1419243959490228e-06,
      "loss": 0.7547,
      "step": 822980
    },
    {
      "epoch": 2.884374770000736,
      "grad_norm": 2.875,
      "learning_rate": 2.1412753672853208e-06,
      "loss": 0.7624,
      "step": 822990
    },
    {
      "epoch": 2.8844098175076316,
      "grad_norm": 3.3125,
      "learning_rate": 2.140626338621619e-06,
      "loss": 0.768,
      "step": 823000
    },
    {
      "epoch": 2.8844448650145273,
      "grad_norm": 2.65625,
      "learning_rate": 2.1399773099579168e-06,
      "loss": 0.8684,
      "step": 823010
    },
    {
      "epoch": 2.8844799125214227,
      "grad_norm": 3.265625,
      "learning_rate": 2.139328281294215e-06,
      "loss": 0.8276,
      "step": 823020
    },
    {
      "epoch": 2.8845149600283184,
      "grad_norm": 2.859375,
      "learning_rate": 2.1386792526305136e-06,
      "loss": 0.8049,
      "step": 823030
    },
    {
      "epoch": 2.884550007535214,
      "grad_norm": 3.140625,
      "learning_rate": 2.138030223966811e-06,
      "loss": 0.8416,
      "step": 823040
    },
    {
      "epoch": 2.8845850550421095,
      "grad_norm": 3.015625,
      "learning_rate": 2.1373811953031096e-06,
      "loss": 0.8081,
      "step": 823050
    },
    {
      "epoch": 2.8846201025490052,
      "grad_norm": 2.890625,
      "learning_rate": 2.1367321666394076e-06,
      "loss": 0.7881,
      "step": 823060
    },
    {
      "epoch": 2.8846551500559006,
      "grad_norm": 2.953125,
      "learning_rate": 2.1360831379757056e-06,
      "loss": 0.8473,
      "step": 823070
    },
    {
      "epoch": 2.8846901975627963,
      "grad_norm": 2.96875,
      "learning_rate": 2.1354341093120036e-06,
      "loss": 0.7958,
      "step": 823080
    },
    {
      "epoch": 2.884725245069692,
      "grad_norm": 3.078125,
      "learning_rate": 2.134785080648302e-06,
      "loss": 0.7812,
      "step": 823090
    },
    {
      "epoch": 2.884760292576588,
      "grad_norm": 2.78125,
      "learning_rate": 2.1341360519846e-06,
      "loss": 0.728,
      "step": 823100
    },
    {
      "epoch": 2.884795340083483,
      "grad_norm": 2.828125,
      "learning_rate": 2.133487023320898e-06,
      "loss": 0.7386,
      "step": 823110
    },
    {
      "epoch": 2.884830387590379,
      "grad_norm": 2.59375,
      "learning_rate": 2.132837994657196e-06,
      "loss": 0.7608,
      "step": 823120
    },
    {
      "epoch": 2.8848654350972742,
      "grad_norm": 2.921875,
      "learning_rate": 2.132188965993494e-06,
      "loss": 0.76,
      "step": 823130
    },
    {
      "epoch": 2.88490048260417,
      "grad_norm": 2.734375,
      "learning_rate": 2.1315399373297925e-06,
      "loss": 0.831,
      "step": 823140
    },
    {
      "epoch": 2.8849355301110657,
      "grad_norm": 3.03125,
      "learning_rate": 2.1308909086660905e-06,
      "loss": 0.7785,
      "step": 823150
    },
    {
      "epoch": 2.884970577617961,
      "grad_norm": 3.0625,
      "learning_rate": 2.1302418800023885e-06,
      "loss": 0.7992,
      "step": 823160
    },
    {
      "epoch": 2.885005625124857,
      "grad_norm": 3.171875,
      "learning_rate": 2.129592851338687e-06,
      "loss": 0.8414,
      "step": 823170
    },
    {
      "epoch": 2.885040672631752,
      "grad_norm": 2.765625,
      "learning_rate": 2.1289438226749845e-06,
      "loss": 0.849,
      "step": 823180
    },
    {
      "epoch": 2.885075720138648,
      "grad_norm": 3.28125,
      "learning_rate": 2.128294794011283e-06,
      "loss": 0.8332,
      "step": 823190
    },
    {
      "epoch": 2.8851107676455436,
      "grad_norm": 3.375,
      "learning_rate": 2.127645765347581e-06,
      "loss": 0.8346,
      "step": 823200
    },
    {
      "epoch": 2.8851458151524394,
      "grad_norm": 3.109375,
      "learning_rate": 2.126996736683879e-06,
      "loss": 0.8079,
      "step": 823210
    },
    {
      "epoch": 2.8851808626593347,
      "grad_norm": 2.65625,
      "learning_rate": 2.1263477080201773e-06,
      "loss": 0.8031,
      "step": 823220
    },
    {
      "epoch": 2.8852159101662305,
      "grad_norm": 2.9375,
      "learning_rate": 2.1256986793564753e-06,
      "loss": 0.8,
      "step": 823230
    },
    {
      "epoch": 2.885250957673126,
      "grad_norm": 3.640625,
      "learning_rate": 2.1250496506927733e-06,
      "loss": 0.7974,
      "step": 823240
    },
    {
      "epoch": 2.8852860051800215,
      "grad_norm": 2.96875,
      "learning_rate": 2.1244006220290713e-06,
      "loss": 0.6992,
      "step": 823250
    },
    {
      "epoch": 2.8853210526869173,
      "grad_norm": 2.546875,
      "learning_rate": 2.1237515933653697e-06,
      "loss": 0.8097,
      "step": 823260
    },
    {
      "epoch": 2.8853561001938126,
      "grad_norm": 3.015625,
      "learning_rate": 2.1231025647016673e-06,
      "loss": 0.7229,
      "step": 823270
    },
    {
      "epoch": 2.8853911477007084,
      "grad_norm": 3.28125,
      "learning_rate": 2.1224535360379657e-06,
      "loss": 0.7878,
      "step": 823280
    },
    {
      "epoch": 2.8854261952076037,
      "grad_norm": 2.546875,
      "learning_rate": 2.1218045073742637e-06,
      "loss": 0.7881,
      "step": 823290
    },
    {
      "epoch": 2.8854612427144994,
      "grad_norm": 3.078125,
      "learning_rate": 2.1211554787105617e-06,
      "loss": 0.7758,
      "step": 823300
    },
    {
      "epoch": 2.885496290221395,
      "grad_norm": 2.6875,
      "learning_rate": 2.12050645004686e-06,
      "loss": 0.6984,
      "step": 823310
    },
    {
      "epoch": 2.885531337728291,
      "grad_norm": 2.796875,
      "learning_rate": 2.119857421383158e-06,
      "loss": 0.7868,
      "step": 823320
    },
    {
      "epoch": 2.8855663852351863,
      "grad_norm": 2.71875,
      "learning_rate": 2.119208392719456e-06,
      "loss": 0.8049,
      "step": 823330
    },
    {
      "epoch": 2.885601432742082,
      "grad_norm": 3.0625,
      "learning_rate": 2.118559364055754e-06,
      "loss": 0.8875,
      "step": 823340
    },
    {
      "epoch": 2.8856364802489773,
      "grad_norm": 3.015625,
      "learning_rate": 2.117910335392052e-06,
      "loss": 0.836,
      "step": 823350
    },
    {
      "epoch": 2.885671527755873,
      "grad_norm": 2.859375,
      "learning_rate": 2.1172613067283506e-06,
      "loss": 0.7695,
      "step": 823360
    },
    {
      "epoch": 2.885706575262769,
      "grad_norm": 2.75,
      "learning_rate": 2.1166122780646486e-06,
      "loss": 0.7211,
      "step": 823370
    },
    {
      "epoch": 2.885741622769664,
      "grad_norm": 3.296875,
      "learning_rate": 2.1159632494009466e-06,
      "loss": 0.8259,
      "step": 823380
    },
    {
      "epoch": 2.88577667027656,
      "grad_norm": 2.875,
      "learning_rate": 2.1153142207372446e-06,
      "loss": 0.7814,
      "step": 823390
    },
    {
      "epoch": 2.8858117177834552,
      "grad_norm": 2.765625,
      "learning_rate": 2.114665192073543e-06,
      "loss": 0.8042,
      "step": 823400
    },
    {
      "epoch": 2.885846765290351,
      "grad_norm": 2.84375,
      "learning_rate": 2.1140161634098406e-06,
      "loss": 0.7911,
      "step": 823410
    },
    {
      "epoch": 2.8858818127972468,
      "grad_norm": 3.1875,
      "learning_rate": 2.113367134746139e-06,
      "loss": 0.7515,
      "step": 823420
    },
    {
      "epoch": 2.8859168603041425,
      "grad_norm": 2.75,
      "learning_rate": 2.1127181060824374e-06,
      "loss": 0.7645,
      "step": 823430
    },
    {
      "epoch": 2.885951907811038,
      "grad_norm": 2.734375,
      "learning_rate": 2.112069077418735e-06,
      "loss": 0.77,
      "step": 823440
    },
    {
      "epoch": 2.8859869553179336,
      "grad_norm": 2.703125,
      "learning_rate": 2.1114200487550334e-06,
      "loss": 0.7506,
      "step": 823450
    },
    {
      "epoch": 2.886022002824829,
      "grad_norm": 2.921875,
      "learning_rate": 2.1107710200913314e-06,
      "loss": 0.8048,
      "step": 823460
    },
    {
      "epoch": 2.8860570503317247,
      "grad_norm": 2.390625,
      "learning_rate": 2.1101219914276294e-06,
      "loss": 0.793,
      "step": 823470
    },
    {
      "epoch": 2.8860920978386204,
      "grad_norm": 2.609375,
      "learning_rate": 2.1094729627639274e-06,
      "loss": 0.811,
      "step": 823480
    },
    {
      "epoch": 2.8861271453455157,
      "grad_norm": 3.09375,
      "learning_rate": 2.108823934100226e-06,
      "loss": 0.7862,
      "step": 823490
    },
    {
      "epoch": 2.8861621928524115,
      "grad_norm": 2.78125,
      "learning_rate": 2.108174905436524e-06,
      "loss": 0.7652,
      "step": 823500
    },
    {
      "epoch": 2.886197240359307,
      "grad_norm": 3.25,
      "learning_rate": 2.107525876772822e-06,
      "loss": 0.7853,
      "step": 823510
    },
    {
      "epoch": 2.8862322878662026,
      "grad_norm": 3.046875,
      "learning_rate": 2.1068768481091203e-06,
      "loss": 0.8022,
      "step": 823520
    },
    {
      "epoch": 2.8862673353730983,
      "grad_norm": 3.296875,
      "learning_rate": 2.106227819445418e-06,
      "loss": 0.7193,
      "step": 823530
    },
    {
      "epoch": 2.886302382879994,
      "grad_norm": 2.6875,
      "learning_rate": 2.1055787907817163e-06,
      "loss": 0.7987,
      "step": 823540
    },
    {
      "epoch": 2.8863374303868894,
      "grad_norm": 3.5625,
      "learning_rate": 2.1049297621180143e-06,
      "loss": 0.7838,
      "step": 823550
    },
    {
      "epoch": 2.886372477893785,
      "grad_norm": 2.75,
      "learning_rate": 2.1042807334543123e-06,
      "loss": 0.7381,
      "step": 823560
    },
    {
      "epoch": 2.8864075254006805,
      "grad_norm": 2.78125,
      "learning_rate": 2.1036317047906107e-06,
      "loss": 0.8974,
      "step": 823570
    },
    {
      "epoch": 2.886442572907576,
      "grad_norm": 2.859375,
      "learning_rate": 2.1029826761269087e-06,
      "loss": 0.7889,
      "step": 823580
    },
    {
      "epoch": 2.886477620414472,
      "grad_norm": 2.859375,
      "learning_rate": 2.1023336474632067e-06,
      "loss": 0.742,
      "step": 823590
    },
    {
      "epoch": 2.8865126679213673,
      "grad_norm": 2.59375,
      "learning_rate": 2.1016846187995047e-06,
      "loss": 0.8246,
      "step": 823600
    },
    {
      "epoch": 2.886547715428263,
      "grad_norm": 2.390625,
      "learning_rate": 2.1010355901358027e-06,
      "loss": 0.7051,
      "step": 823610
    },
    {
      "epoch": 2.8865827629351584,
      "grad_norm": 2.765625,
      "learning_rate": 2.100386561472101e-06,
      "loss": 0.7701,
      "step": 823620
    },
    {
      "epoch": 2.886617810442054,
      "grad_norm": 2.859375,
      "learning_rate": 2.099737532808399e-06,
      "loss": 0.8553,
      "step": 823630
    },
    {
      "epoch": 2.88665285794895,
      "grad_norm": 3.171875,
      "learning_rate": 2.099088504144697e-06,
      "loss": 0.7885,
      "step": 823640
    },
    {
      "epoch": 2.8866879054558456,
      "grad_norm": 2.90625,
      "learning_rate": 2.098439475480995e-06,
      "loss": 0.783,
      "step": 823650
    },
    {
      "epoch": 2.886722952962741,
      "grad_norm": 2.8125,
      "learning_rate": 2.0977904468172935e-06,
      "loss": 0.8047,
      "step": 823660
    },
    {
      "epoch": 2.8867580004696367,
      "grad_norm": 2.8125,
      "learning_rate": 2.097141418153591e-06,
      "loss": 0.8348,
      "step": 823670
    },
    {
      "epoch": 2.886793047976532,
      "grad_norm": 3.03125,
      "learning_rate": 2.0964923894898895e-06,
      "loss": 0.8486,
      "step": 823680
    },
    {
      "epoch": 2.886828095483428,
      "grad_norm": 3.28125,
      "learning_rate": 2.095843360826188e-06,
      "loss": 0.7476,
      "step": 823690
    },
    {
      "epoch": 2.8868631429903235,
      "grad_norm": 2.96875,
      "learning_rate": 2.0951943321624855e-06,
      "loss": 0.7773,
      "step": 823700
    },
    {
      "epoch": 2.886898190497219,
      "grad_norm": 3.140625,
      "learning_rate": 2.094545303498784e-06,
      "loss": 0.7981,
      "step": 823710
    },
    {
      "epoch": 2.8869332380041146,
      "grad_norm": 2.6875,
      "learning_rate": 2.093896274835082e-06,
      "loss": 0.8312,
      "step": 823720
    },
    {
      "epoch": 2.88696828551101,
      "grad_norm": 2.6875,
      "learning_rate": 2.09324724617138e-06,
      "loss": 0.7047,
      "step": 823730
    },
    {
      "epoch": 2.8870033330179057,
      "grad_norm": 3.0,
      "learning_rate": 2.092598217507678e-06,
      "loss": 0.7182,
      "step": 823740
    },
    {
      "epoch": 2.8870383805248014,
      "grad_norm": 3.125,
      "learning_rate": 2.0919491888439764e-06,
      "loss": 0.83,
      "step": 823750
    },
    {
      "epoch": 2.887073428031697,
      "grad_norm": 2.765625,
      "learning_rate": 2.0913001601802744e-06,
      "loss": 0.8105,
      "step": 823760
    },
    {
      "epoch": 2.8871084755385925,
      "grad_norm": 2.53125,
      "learning_rate": 2.0906511315165724e-06,
      "loss": 0.8533,
      "step": 823770
    },
    {
      "epoch": 2.8871435230454883,
      "grad_norm": 3.0,
      "learning_rate": 2.090002102852871e-06,
      "loss": 0.8225,
      "step": 823780
    },
    {
      "epoch": 2.8871785705523836,
      "grad_norm": 2.96875,
      "learning_rate": 2.0893530741891684e-06,
      "loss": 0.8047,
      "step": 823790
    },
    {
      "epoch": 2.8872136180592793,
      "grad_norm": 3.65625,
      "learning_rate": 2.088704045525467e-06,
      "loss": 0.7399,
      "step": 823800
    },
    {
      "epoch": 2.887248665566175,
      "grad_norm": 2.859375,
      "learning_rate": 2.088055016861765e-06,
      "loss": 0.7147,
      "step": 823810
    },
    {
      "epoch": 2.8872837130730704,
      "grad_norm": 2.796875,
      "learning_rate": 2.087405988198063e-06,
      "loss": 0.7891,
      "step": 823820
    },
    {
      "epoch": 2.887318760579966,
      "grad_norm": 3.125,
      "learning_rate": 2.0867569595343612e-06,
      "loss": 0.752,
      "step": 823830
    },
    {
      "epoch": 2.8873538080868615,
      "grad_norm": 2.765625,
      "learning_rate": 2.0861079308706592e-06,
      "loss": 0.8399,
      "step": 823840
    },
    {
      "epoch": 2.8873888555937572,
      "grad_norm": 2.765625,
      "learning_rate": 2.0854589022069572e-06,
      "loss": 0.837,
      "step": 823850
    },
    {
      "epoch": 2.887423903100653,
      "grad_norm": 2.96875,
      "learning_rate": 2.0848098735432552e-06,
      "loss": 0.8449,
      "step": 823860
    },
    {
      "epoch": 2.8874589506075488,
      "grad_norm": 2.671875,
      "learning_rate": 2.0841608448795532e-06,
      "loss": 0.7798,
      "step": 823870
    },
    {
      "epoch": 2.887493998114444,
      "grad_norm": 2.984375,
      "learning_rate": 2.0835118162158512e-06,
      "loss": 0.8131,
      "step": 823880
    },
    {
      "epoch": 2.88752904562134,
      "grad_norm": 2.78125,
      "learning_rate": 2.0828627875521497e-06,
      "loss": 0.7376,
      "step": 823890
    },
    {
      "epoch": 2.887564093128235,
      "grad_norm": 3.109375,
      "learning_rate": 2.0822137588884477e-06,
      "loss": 0.9106,
      "step": 823900
    },
    {
      "epoch": 2.887599140635131,
      "grad_norm": 3.015625,
      "learning_rate": 2.0815647302247457e-06,
      "loss": 0.7781,
      "step": 823910
    },
    {
      "epoch": 2.8876341881420267,
      "grad_norm": 3.484375,
      "learning_rate": 2.080915701561044e-06,
      "loss": 0.8106,
      "step": 823920
    },
    {
      "epoch": 2.887669235648922,
      "grad_norm": 2.828125,
      "learning_rate": 2.0802666728973417e-06,
      "loss": 0.7943,
      "step": 823930
    },
    {
      "epoch": 2.8877042831558177,
      "grad_norm": 2.765625,
      "learning_rate": 2.07961764423364e-06,
      "loss": 0.7756,
      "step": 823940
    },
    {
      "epoch": 2.887739330662713,
      "grad_norm": 3.046875,
      "learning_rate": 2.078968615569938e-06,
      "loss": 0.9018,
      "step": 823950
    },
    {
      "epoch": 2.887774378169609,
      "grad_norm": 2.703125,
      "learning_rate": 2.078319586906236e-06,
      "loss": 0.7847,
      "step": 823960
    },
    {
      "epoch": 2.8878094256765046,
      "grad_norm": 3.375,
      "learning_rate": 2.0776705582425345e-06,
      "loss": 0.7412,
      "step": 823970
    },
    {
      "epoch": 2.8878444731834003,
      "grad_norm": 2.65625,
      "learning_rate": 2.0770215295788325e-06,
      "loss": 0.8022,
      "step": 823980
    },
    {
      "epoch": 2.8878795206902956,
      "grad_norm": 2.71875,
      "learning_rate": 2.0763725009151305e-06,
      "loss": 0.8301,
      "step": 823990
    },
    {
      "epoch": 2.8879145681971914,
      "grad_norm": 2.578125,
      "learning_rate": 2.0757234722514285e-06,
      "loss": 0.748,
      "step": 824000
    },
    {
      "epoch": 2.8879496157040867,
      "grad_norm": 3.640625,
      "learning_rate": 2.075074443587727e-06,
      "loss": 0.8912,
      "step": 824010
    },
    {
      "epoch": 2.8879846632109825,
      "grad_norm": 2.90625,
      "learning_rate": 2.0744254149240245e-06,
      "loss": 0.8037,
      "step": 824020
    },
    {
      "epoch": 2.888019710717878,
      "grad_norm": 2.4375,
      "learning_rate": 2.073776386260323e-06,
      "loss": 0.6619,
      "step": 824030
    },
    {
      "epoch": 2.8880547582247735,
      "grad_norm": 2.875,
      "learning_rate": 2.073127357596621e-06,
      "loss": 0.8692,
      "step": 824040
    },
    {
      "epoch": 2.8880898057316693,
      "grad_norm": 2.671875,
      "learning_rate": 2.072478328932919e-06,
      "loss": 0.7356,
      "step": 824050
    },
    {
      "epoch": 2.8881248532385646,
      "grad_norm": 3.0625,
      "learning_rate": 2.0718293002692173e-06,
      "loss": 0.8062,
      "step": 824060
    },
    {
      "epoch": 2.8881599007454604,
      "grad_norm": 2.859375,
      "learning_rate": 2.0711802716055153e-06,
      "loss": 0.7816,
      "step": 824070
    },
    {
      "epoch": 2.888194948252356,
      "grad_norm": 2.859375,
      "learning_rate": 2.0705312429418133e-06,
      "loss": 0.8043,
      "step": 824080
    },
    {
      "epoch": 2.888229995759252,
      "grad_norm": 3.65625,
      "learning_rate": 2.0698822142781118e-06,
      "loss": 0.857,
      "step": 824090
    },
    {
      "epoch": 2.888265043266147,
      "grad_norm": 3.046875,
      "learning_rate": 2.0692331856144093e-06,
      "loss": 0.7954,
      "step": 824100
    },
    {
      "epoch": 2.888300090773043,
      "grad_norm": 2.984375,
      "learning_rate": 2.0685841569507078e-06,
      "loss": 0.7521,
      "step": 824110
    },
    {
      "epoch": 2.8883351382799383,
      "grad_norm": 2.421875,
      "learning_rate": 2.0679351282870058e-06,
      "loss": 0.7603,
      "step": 824120
    },
    {
      "epoch": 2.888370185786834,
      "grad_norm": 3.390625,
      "learning_rate": 2.0672860996233038e-06,
      "loss": 0.757,
      "step": 824130
    },
    {
      "epoch": 2.88840523329373,
      "grad_norm": 3.0,
      "learning_rate": 2.0666370709596018e-06,
      "loss": 0.7016,
      "step": 824140
    },
    {
      "epoch": 2.888440280800625,
      "grad_norm": 2.984375,
      "learning_rate": 2.0659880422959e-06,
      "loss": 0.8043,
      "step": 824150
    },
    {
      "epoch": 2.888475328307521,
      "grad_norm": 3.34375,
      "learning_rate": 2.065339013632198e-06,
      "loss": 0.7108,
      "step": 824160
    },
    {
      "epoch": 2.888510375814416,
      "grad_norm": 3.0625,
      "learning_rate": 2.064689984968496e-06,
      "loss": 0.7474,
      "step": 824170
    },
    {
      "epoch": 2.888545423321312,
      "grad_norm": 2.703125,
      "learning_rate": 2.0640409563047946e-06,
      "loss": 0.7591,
      "step": 824180
    },
    {
      "epoch": 2.8885804708282077,
      "grad_norm": 3.203125,
      "learning_rate": 2.063391927641092e-06,
      "loss": 0.8393,
      "step": 824190
    },
    {
      "epoch": 2.8886155183351034,
      "grad_norm": 3.390625,
      "learning_rate": 2.0627428989773906e-06,
      "loss": 0.7845,
      "step": 824200
    },
    {
      "epoch": 2.8886505658419988,
      "grad_norm": 2.78125,
      "learning_rate": 2.0620938703136886e-06,
      "loss": 0.8704,
      "step": 824210
    },
    {
      "epoch": 2.8886856133488945,
      "grad_norm": 3.171875,
      "learning_rate": 2.0614448416499866e-06,
      "loss": 0.7786,
      "step": 824220
    },
    {
      "epoch": 2.88872066085579,
      "grad_norm": 2.921875,
      "learning_rate": 2.060795812986285e-06,
      "loss": 0.8216,
      "step": 824230
    },
    {
      "epoch": 2.8887557083626856,
      "grad_norm": 2.828125,
      "learning_rate": 2.060146784322583e-06,
      "loss": 0.748,
      "step": 824240
    },
    {
      "epoch": 2.8887907558695813,
      "grad_norm": 2.796875,
      "learning_rate": 2.059497755658881e-06,
      "loss": 0.7795,
      "step": 824250
    },
    {
      "epoch": 2.8888258033764767,
      "grad_norm": 3.28125,
      "learning_rate": 2.058848726995179e-06,
      "loss": 0.8635,
      "step": 824260
    },
    {
      "epoch": 2.8888608508833724,
      "grad_norm": 2.703125,
      "learning_rate": 2.0581996983314775e-06,
      "loss": 0.761,
      "step": 824270
    },
    {
      "epoch": 2.888895898390268,
      "grad_norm": 3.109375,
      "learning_rate": 2.057550669667775e-06,
      "loss": 0.7379,
      "step": 824280
    },
    {
      "epoch": 2.8889309458971635,
      "grad_norm": 2.75,
      "learning_rate": 2.0569016410040735e-06,
      "loss": 0.7752,
      "step": 824290
    },
    {
      "epoch": 2.8889659934040592,
      "grad_norm": 2.4375,
      "learning_rate": 2.0562526123403715e-06,
      "loss": 0.7283,
      "step": 824300
    },
    {
      "epoch": 2.889001040910955,
      "grad_norm": 2.8125,
      "learning_rate": 2.0556035836766695e-06,
      "loss": 0.8022,
      "step": 824310
    },
    {
      "epoch": 2.8890360884178503,
      "grad_norm": 2.90625,
      "learning_rate": 2.054954555012968e-06,
      "loss": 0.8409,
      "step": 824320
    },
    {
      "epoch": 2.889071135924746,
      "grad_norm": 2.984375,
      "learning_rate": 2.054305526349266e-06,
      "loss": 0.7564,
      "step": 824330
    },
    {
      "epoch": 2.8891061834316414,
      "grad_norm": 3.203125,
      "learning_rate": 2.053656497685564e-06,
      "loss": 0.853,
      "step": 824340
    },
    {
      "epoch": 2.889141230938537,
      "grad_norm": 2.59375,
      "learning_rate": 2.053007469021862e-06,
      "loss": 0.7308,
      "step": 824350
    },
    {
      "epoch": 2.889176278445433,
      "grad_norm": 2.75,
      "learning_rate": 2.05235844035816e-06,
      "loss": 0.7796,
      "step": 824360
    },
    {
      "epoch": 2.8892113259523287,
      "grad_norm": 3.359375,
      "learning_rate": 2.0517094116944583e-06,
      "loss": 0.7994,
      "step": 824370
    },
    {
      "epoch": 2.889246373459224,
      "grad_norm": 2.875,
      "learning_rate": 2.0510603830307563e-06,
      "loss": 0.7826,
      "step": 824380
    },
    {
      "epoch": 2.8892814209661197,
      "grad_norm": 2.265625,
      "learning_rate": 2.0504113543670543e-06,
      "loss": 0.757,
      "step": 824390
    },
    {
      "epoch": 2.889316468473015,
      "grad_norm": 2.84375,
      "learning_rate": 2.0497623257033523e-06,
      "loss": 0.7317,
      "step": 824400
    },
    {
      "epoch": 2.889351515979911,
      "grad_norm": 2.796875,
      "learning_rate": 2.0491132970396507e-06,
      "loss": 0.8058,
      "step": 824410
    },
    {
      "epoch": 2.8893865634868066,
      "grad_norm": 3.21875,
      "learning_rate": 2.0484642683759483e-06,
      "loss": 0.7496,
      "step": 824420
    },
    {
      "epoch": 2.889421610993702,
      "grad_norm": 3.125,
      "learning_rate": 2.0478152397122467e-06,
      "loss": 0.8144,
      "step": 824430
    },
    {
      "epoch": 2.8894566585005976,
      "grad_norm": 2.578125,
      "learning_rate": 2.047166211048545e-06,
      "loss": 0.7491,
      "step": 824440
    },
    {
      "epoch": 2.889491706007493,
      "grad_norm": 2.921875,
      "learning_rate": 2.0465171823848427e-06,
      "loss": 0.7976,
      "step": 824450
    },
    {
      "epoch": 2.8895267535143887,
      "grad_norm": 3.015625,
      "learning_rate": 2.045868153721141e-06,
      "loss": 0.8452,
      "step": 824460
    },
    {
      "epoch": 2.8895618010212845,
      "grad_norm": 3.0,
      "learning_rate": 2.045219125057439e-06,
      "loss": 0.9262,
      "step": 824470
    },
    {
      "epoch": 2.88959684852818,
      "grad_norm": 3.4375,
      "learning_rate": 2.044570096393737e-06,
      "loss": 0.851,
      "step": 824480
    },
    {
      "epoch": 2.8896318960350755,
      "grad_norm": 2.6875,
      "learning_rate": 2.0439210677300356e-06,
      "loss": 0.7293,
      "step": 824490
    },
    {
      "epoch": 2.8896669435419713,
      "grad_norm": 3.140625,
      "learning_rate": 2.0432720390663336e-06,
      "loss": 0.8088,
      "step": 824500
    },
    {
      "epoch": 2.8897019910488666,
      "grad_norm": 3.1875,
      "learning_rate": 2.0426230104026316e-06,
      "loss": 0.7789,
      "step": 824510
    },
    {
      "epoch": 2.8897370385557624,
      "grad_norm": 3.15625,
      "learning_rate": 2.0419739817389296e-06,
      "loss": 0.7954,
      "step": 824520
    },
    {
      "epoch": 2.889772086062658,
      "grad_norm": 3.0,
      "learning_rate": 2.0413249530752276e-06,
      "loss": 0.7626,
      "step": 824530
    },
    {
      "epoch": 2.8898071335695534,
      "grad_norm": 2.6875,
      "learning_rate": 2.0406759244115256e-06,
      "loss": 0.7047,
      "step": 824540
    },
    {
      "epoch": 2.889842181076449,
      "grad_norm": 2.765625,
      "learning_rate": 2.040026895747824e-06,
      "loss": 0.8111,
      "step": 824550
    },
    {
      "epoch": 2.8898772285833445,
      "grad_norm": 3.15625,
      "learning_rate": 2.039377867084122e-06,
      "loss": 0.8372,
      "step": 824560
    },
    {
      "epoch": 2.8899122760902403,
      "grad_norm": 3.0,
      "learning_rate": 2.03872883842042e-06,
      "loss": 0.7976,
      "step": 824570
    },
    {
      "epoch": 2.889947323597136,
      "grad_norm": 3.296875,
      "learning_rate": 2.0380798097567184e-06,
      "loss": 0.8244,
      "step": 824580
    },
    {
      "epoch": 2.8899823711040318,
      "grad_norm": 3.28125,
      "learning_rate": 2.037430781093016e-06,
      "loss": 0.8401,
      "step": 824590
    },
    {
      "epoch": 2.890017418610927,
      "grad_norm": 2.75,
      "learning_rate": 2.0367817524293144e-06,
      "loss": 0.6956,
      "step": 824600
    },
    {
      "epoch": 2.890052466117823,
      "grad_norm": 2.890625,
      "learning_rate": 2.0361327237656124e-06,
      "loss": 0.8666,
      "step": 824610
    },
    {
      "epoch": 2.890087513624718,
      "grad_norm": 2.828125,
      "learning_rate": 2.0354836951019104e-06,
      "loss": 0.7752,
      "step": 824620
    },
    {
      "epoch": 2.890122561131614,
      "grad_norm": 3.03125,
      "learning_rate": 2.034834666438209e-06,
      "loss": 0.8103,
      "step": 824630
    },
    {
      "epoch": 2.8901576086385097,
      "grad_norm": 3.328125,
      "learning_rate": 2.034185637774507e-06,
      "loss": 0.8227,
      "step": 824640
    },
    {
      "epoch": 2.890192656145405,
      "grad_norm": 3.1875,
      "learning_rate": 2.033536609110805e-06,
      "loss": 0.7336,
      "step": 824650
    },
    {
      "epoch": 2.8902277036523008,
      "grad_norm": 3.328125,
      "learning_rate": 2.032887580447103e-06,
      "loss": 0.8086,
      "step": 824660
    },
    {
      "epoch": 2.890262751159196,
      "grad_norm": 3.09375,
      "learning_rate": 2.0322385517834013e-06,
      "loss": 0.8437,
      "step": 824670
    },
    {
      "epoch": 2.890297798666092,
      "grad_norm": 2.5625,
      "learning_rate": 2.031589523119699e-06,
      "loss": 0.753,
      "step": 824680
    },
    {
      "epoch": 2.8903328461729876,
      "grad_norm": 2.71875,
      "learning_rate": 2.0309404944559973e-06,
      "loss": 0.8015,
      "step": 824690
    },
    {
      "epoch": 2.8903678936798833,
      "grad_norm": 3.0,
      "learning_rate": 2.0302914657922957e-06,
      "loss": 0.896,
      "step": 824700
    },
    {
      "epoch": 2.8904029411867787,
      "grad_norm": 2.515625,
      "learning_rate": 2.0296424371285933e-06,
      "loss": 0.7575,
      "step": 824710
    },
    {
      "epoch": 2.8904379886936744,
      "grad_norm": 3.203125,
      "learning_rate": 2.0289934084648917e-06,
      "loss": 0.7714,
      "step": 824720
    },
    {
      "epoch": 2.8904730362005697,
      "grad_norm": 2.34375,
      "learning_rate": 2.0283443798011897e-06,
      "loss": 0.7705,
      "step": 824730
    },
    {
      "epoch": 2.8905080837074655,
      "grad_norm": 2.828125,
      "learning_rate": 2.0276953511374877e-06,
      "loss": 0.8162,
      "step": 824740
    },
    {
      "epoch": 2.8905431312143612,
      "grad_norm": 2.671875,
      "learning_rate": 2.0270463224737857e-06,
      "loss": 0.7871,
      "step": 824750
    },
    {
      "epoch": 2.8905781787212566,
      "grad_norm": 2.84375,
      "learning_rate": 2.026397293810084e-06,
      "loss": 0.7906,
      "step": 824760
    },
    {
      "epoch": 2.8906132262281523,
      "grad_norm": 2.671875,
      "learning_rate": 2.025748265146382e-06,
      "loss": 0.7094,
      "step": 824770
    },
    {
      "epoch": 2.8906482737350476,
      "grad_norm": 2.953125,
      "learning_rate": 2.02509923648268e-06,
      "loss": 0.83,
      "step": 824780
    },
    {
      "epoch": 2.8906833212419434,
      "grad_norm": 3.171875,
      "learning_rate": 2.024450207818978e-06,
      "loss": 0.8446,
      "step": 824790
    },
    {
      "epoch": 2.890718368748839,
      "grad_norm": 2.828125,
      "learning_rate": 2.023801179155276e-06,
      "loss": 0.7955,
      "step": 824800
    },
    {
      "epoch": 2.890753416255735,
      "grad_norm": 2.6875,
      "learning_rate": 2.0231521504915745e-06,
      "loss": 0.7984,
      "step": 824810
    },
    {
      "epoch": 2.89078846376263,
      "grad_norm": 3.21875,
      "learning_rate": 2.0225031218278725e-06,
      "loss": 0.8097,
      "step": 824820
    },
    {
      "epoch": 2.890823511269526,
      "grad_norm": 3.09375,
      "learning_rate": 2.0218540931641705e-06,
      "loss": 0.766,
      "step": 824830
    },
    {
      "epoch": 2.8908585587764213,
      "grad_norm": 2.71875,
      "learning_rate": 2.021205064500469e-06,
      "loss": 0.8137,
      "step": 824840
    },
    {
      "epoch": 2.890893606283317,
      "grad_norm": 2.90625,
      "learning_rate": 2.0205560358367665e-06,
      "loss": 0.8949,
      "step": 824850
    },
    {
      "epoch": 2.890928653790213,
      "grad_norm": 2.359375,
      "learning_rate": 2.019907007173065e-06,
      "loss": 0.7738,
      "step": 824860
    },
    {
      "epoch": 2.890963701297108,
      "grad_norm": 2.84375,
      "learning_rate": 2.019257978509363e-06,
      "loss": 0.8709,
      "step": 824870
    },
    {
      "epoch": 2.890998748804004,
      "grad_norm": 2.5625,
      "learning_rate": 2.018608949845661e-06,
      "loss": 0.7708,
      "step": 824880
    },
    {
      "epoch": 2.891033796310899,
      "grad_norm": 2.96875,
      "learning_rate": 2.017959921181959e-06,
      "loss": 0.797,
      "step": 824890
    },
    {
      "epoch": 2.891068843817795,
      "grad_norm": 2.609375,
      "learning_rate": 2.0173108925182574e-06,
      "loss": 0.8302,
      "step": 824900
    },
    {
      "epoch": 2.8911038913246907,
      "grad_norm": 2.1875,
      "learning_rate": 2.0166618638545554e-06,
      "loss": 0.7734,
      "step": 824910
    },
    {
      "epoch": 2.8911389388315865,
      "grad_norm": 3.046875,
      "learning_rate": 2.0160128351908534e-06,
      "loss": 0.9009,
      "step": 824920
    },
    {
      "epoch": 2.8911739863384818,
      "grad_norm": 2.9375,
      "learning_rate": 2.015363806527152e-06,
      "loss": 0.7784,
      "step": 824930
    },
    {
      "epoch": 2.8912090338453775,
      "grad_norm": 3.09375,
      "learning_rate": 2.0147147778634494e-06,
      "loss": 0.7615,
      "step": 824940
    },
    {
      "epoch": 2.891244081352273,
      "grad_norm": 2.375,
      "learning_rate": 2.014065749199748e-06,
      "loss": 0.7102,
      "step": 824950
    },
    {
      "epoch": 2.8912791288591686,
      "grad_norm": 2.4375,
      "learning_rate": 2.0134167205360462e-06,
      "loss": 0.7379,
      "step": 824960
    },
    {
      "epoch": 2.8913141763660644,
      "grad_norm": 3.015625,
      "learning_rate": 2.012767691872344e-06,
      "loss": 0.8768,
      "step": 824970
    },
    {
      "epoch": 2.8913492238729597,
      "grad_norm": 2.984375,
      "learning_rate": 2.0121186632086422e-06,
      "loss": 0.8132,
      "step": 824980
    },
    {
      "epoch": 2.8913842713798554,
      "grad_norm": 3.0,
      "learning_rate": 2.0114696345449402e-06,
      "loss": 0.7579,
      "step": 824990
    },
    {
      "epoch": 2.8914193188867507,
      "grad_norm": 2.75,
      "learning_rate": 2.0108206058812382e-06,
      "loss": 0.8028,
      "step": 825000
    },
    {
      "epoch": 2.8914193188867507,
      "eval_loss": 0.7499328255653381,
      "eval_runtime": 553.7643,
      "eval_samples_per_second": 687.0,
      "eval_steps_per_second": 57.25,
      "step": 825000
    },
    {
      "epoch": 2.8914543663936465,
      "grad_norm": 2.890625,
      "learning_rate": 2.0101715772175362e-06,
      "loss": 0.7556,
      "step": 825010
    },
    {
      "epoch": 2.8914894139005423,
      "grad_norm": 2.953125,
      "learning_rate": 2.0095225485538347e-06,
      "loss": 0.7668,
      "step": 825020
    },
    {
      "epoch": 2.891524461407438,
      "grad_norm": 3.234375,
      "learning_rate": 2.0088735198901327e-06,
      "loss": 0.8267,
      "step": 825030
    },
    {
      "epoch": 2.8915595089143333,
      "grad_norm": 2.921875,
      "learning_rate": 2.0082244912264307e-06,
      "loss": 0.8104,
      "step": 825040
    },
    {
      "epoch": 2.891594556421229,
      "grad_norm": 2.984375,
      "learning_rate": 2.0075754625627287e-06,
      "loss": 0.7604,
      "step": 825050
    },
    {
      "epoch": 2.8916296039281244,
      "grad_norm": 2.734375,
      "learning_rate": 2.0069264338990267e-06,
      "loss": 0.8611,
      "step": 825060
    },
    {
      "epoch": 2.89166465143502,
      "grad_norm": 3.328125,
      "learning_rate": 2.006277405235325e-06,
      "loss": 0.7346,
      "step": 825070
    },
    {
      "epoch": 2.891699698941916,
      "grad_norm": 3.03125,
      "learning_rate": 2.005628376571623e-06,
      "loss": 0.8718,
      "step": 825080
    },
    {
      "epoch": 2.8917347464488112,
      "grad_norm": 3.125,
      "learning_rate": 2.004979347907921e-06,
      "loss": 0.8615,
      "step": 825090
    },
    {
      "epoch": 2.891769793955707,
      "grad_norm": 2.921875,
      "learning_rate": 2.0043303192442195e-06,
      "loss": 0.796,
      "step": 825100
    },
    {
      "epoch": 2.8918048414626023,
      "grad_norm": 2.96875,
      "learning_rate": 2.003681290580517e-06,
      "loss": 0.734,
      "step": 825110
    },
    {
      "epoch": 2.891839888969498,
      "grad_norm": 2.78125,
      "learning_rate": 2.0030322619168155e-06,
      "loss": 0.7976,
      "step": 825120
    },
    {
      "epoch": 2.891874936476394,
      "grad_norm": 3.25,
      "learning_rate": 2.0023832332531135e-06,
      "loss": 0.8377,
      "step": 825130
    },
    {
      "epoch": 2.8919099839832896,
      "grad_norm": 2.78125,
      "learning_rate": 2.0017342045894115e-06,
      "loss": 0.8732,
      "step": 825140
    },
    {
      "epoch": 2.891945031490185,
      "grad_norm": 2.53125,
      "learning_rate": 2.0010851759257095e-06,
      "loss": 0.7432,
      "step": 825150
    },
    {
      "epoch": 2.8919800789970806,
      "grad_norm": 2.71875,
      "learning_rate": 2.000436147262008e-06,
      "loss": 0.8178,
      "step": 825160
    },
    {
      "epoch": 2.892015126503976,
      "grad_norm": 3.171875,
      "learning_rate": 1.999787118598306e-06,
      "loss": 0.8413,
      "step": 825170
    },
    {
      "epoch": 2.8920501740108717,
      "grad_norm": 3.171875,
      "learning_rate": 1.999138089934604e-06,
      "loss": 0.8742,
      "step": 825180
    },
    {
      "epoch": 2.8920852215177675,
      "grad_norm": 3.203125,
      "learning_rate": 1.9984890612709023e-06,
      "loss": 0.7986,
      "step": 825190
    },
    {
      "epoch": 2.892120269024663,
      "grad_norm": 2.9375,
      "learning_rate": 1.9978400326072e-06,
      "loss": 0.7755,
      "step": 825200
    },
    {
      "epoch": 2.8921553165315586,
      "grad_norm": 3.46875,
      "learning_rate": 1.9971910039434983e-06,
      "loss": 0.8251,
      "step": 825210
    },
    {
      "epoch": 2.892190364038454,
      "grad_norm": 3.015625,
      "learning_rate": 1.9965419752797963e-06,
      "loss": 0.7839,
      "step": 825220
    },
    {
      "epoch": 2.8922254115453496,
      "grad_norm": 3.453125,
      "learning_rate": 1.9958929466160943e-06,
      "loss": 0.8516,
      "step": 825230
    },
    {
      "epoch": 2.8922604590522454,
      "grad_norm": 3.046875,
      "learning_rate": 1.9952439179523928e-06,
      "loss": 0.83,
      "step": 825240
    },
    {
      "epoch": 2.892295506559141,
      "grad_norm": 2.515625,
      "learning_rate": 1.9945948892886908e-06,
      "loss": 0.6994,
      "step": 825250
    },
    {
      "epoch": 2.8923305540660365,
      "grad_norm": 2.890625,
      "learning_rate": 1.9939458606249888e-06,
      "loss": 0.7719,
      "step": 825260
    },
    {
      "epoch": 2.892365601572932,
      "grad_norm": 3.359375,
      "learning_rate": 1.9932968319612868e-06,
      "loss": 0.8202,
      "step": 825270
    },
    {
      "epoch": 2.8924006490798275,
      "grad_norm": 2.65625,
      "learning_rate": 1.9926478032975848e-06,
      "loss": 0.7683,
      "step": 825280
    },
    {
      "epoch": 2.8924356965867233,
      "grad_norm": 2.859375,
      "learning_rate": 1.9919987746338828e-06,
      "loss": 0.8307,
      "step": 825290
    },
    {
      "epoch": 2.892470744093619,
      "grad_norm": 2.640625,
      "learning_rate": 1.991349745970181e-06,
      "loss": 0.8093,
      "step": 825300
    },
    {
      "epoch": 2.8925057916005144,
      "grad_norm": 3.21875,
      "learning_rate": 1.990700717306479e-06,
      "loss": 0.8915,
      "step": 825310
    },
    {
      "epoch": 2.89254083910741,
      "grad_norm": 2.84375,
      "learning_rate": 1.990051688642777e-06,
      "loss": 0.7881,
      "step": 825320
    },
    {
      "epoch": 2.8925758866143054,
      "grad_norm": 3.203125,
      "learning_rate": 1.9894026599790756e-06,
      "loss": 0.7523,
      "step": 825330
    },
    {
      "epoch": 2.892610934121201,
      "grad_norm": 2.984375,
      "learning_rate": 1.988753631315373e-06,
      "loss": 0.8547,
      "step": 825340
    },
    {
      "epoch": 2.892645981628097,
      "grad_norm": 2.578125,
      "learning_rate": 1.9881046026516716e-06,
      "loss": 0.8042,
      "step": 825350
    },
    {
      "epoch": 2.8926810291349927,
      "grad_norm": 2.765625,
      "learning_rate": 1.9874555739879696e-06,
      "loss": 0.8417,
      "step": 825360
    },
    {
      "epoch": 2.892716076641888,
      "grad_norm": 3.140625,
      "learning_rate": 1.9868065453242676e-06,
      "loss": 0.7952,
      "step": 825370
    },
    {
      "epoch": 2.8927511241487838,
      "grad_norm": 2.328125,
      "learning_rate": 1.986157516660566e-06,
      "loss": 0.7864,
      "step": 825380
    },
    {
      "epoch": 2.892786171655679,
      "grad_norm": 2.890625,
      "learning_rate": 1.985508487996864e-06,
      "loss": 0.8337,
      "step": 825390
    },
    {
      "epoch": 2.892821219162575,
      "grad_norm": 2.921875,
      "learning_rate": 1.984859459333162e-06,
      "loss": 0.8468,
      "step": 825400
    },
    {
      "epoch": 2.8928562666694706,
      "grad_norm": 2.796875,
      "learning_rate": 1.98421043066946e-06,
      "loss": 0.7769,
      "step": 825410
    },
    {
      "epoch": 2.892891314176366,
      "grad_norm": 3.046875,
      "learning_rate": 1.9835614020057585e-06,
      "loss": 0.8593,
      "step": 825420
    },
    {
      "epoch": 2.8929263616832617,
      "grad_norm": 3.03125,
      "learning_rate": 1.9829123733420565e-06,
      "loss": 0.8873,
      "step": 825430
    },
    {
      "epoch": 2.892961409190157,
      "grad_norm": 3.203125,
      "learning_rate": 1.9822633446783545e-06,
      "loss": 0.8921,
      "step": 825440
    },
    {
      "epoch": 2.8929964566970527,
      "grad_norm": 3.203125,
      "learning_rate": 1.981614316014653e-06,
      "loss": 0.8459,
      "step": 825450
    },
    {
      "epoch": 2.8930315042039485,
      "grad_norm": 2.796875,
      "learning_rate": 1.9809652873509505e-06,
      "loss": 0.8451,
      "step": 825460
    },
    {
      "epoch": 2.8930665517108443,
      "grad_norm": 3.015625,
      "learning_rate": 1.980316258687249e-06,
      "loss": 0.8244,
      "step": 825470
    },
    {
      "epoch": 2.8931015992177396,
      "grad_norm": 2.96875,
      "learning_rate": 1.979667230023547e-06,
      "loss": 0.8267,
      "step": 825480
    },
    {
      "epoch": 2.8931366467246353,
      "grad_norm": 2.9375,
      "learning_rate": 1.979018201359845e-06,
      "loss": 0.829,
      "step": 825490
    },
    {
      "epoch": 2.8931716942315306,
      "grad_norm": 3.03125,
      "learning_rate": 1.9783691726961433e-06,
      "loss": 0.8235,
      "step": 825500
    },
    {
      "epoch": 2.8932067417384264,
      "grad_norm": 2.859375,
      "learning_rate": 1.9777201440324413e-06,
      "loss": 0.8042,
      "step": 825510
    },
    {
      "epoch": 2.893241789245322,
      "grad_norm": 2.578125,
      "learning_rate": 1.9770711153687393e-06,
      "loss": 0.7838,
      "step": 825520
    },
    {
      "epoch": 2.8932768367522175,
      "grad_norm": 2.828125,
      "learning_rate": 1.9764220867050373e-06,
      "loss": 0.7243,
      "step": 825530
    },
    {
      "epoch": 2.8933118842591132,
      "grad_norm": 3.140625,
      "learning_rate": 1.9757730580413353e-06,
      "loss": 0.7838,
      "step": 825540
    },
    {
      "epoch": 2.893346931766009,
      "grad_norm": 3.328125,
      "learning_rate": 1.9751240293776333e-06,
      "loss": 0.8361,
      "step": 825550
    },
    {
      "epoch": 2.8933819792729043,
      "grad_norm": 2.9375,
      "learning_rate": 1.9744750007139317e-06,
      "loss": 0.8117,
      "step": 825560
    },
    {
      "epoch": 2.8934170267798,
      "grad_norm": 2.6875,
      "learning_rate": 1.9738259720502297e-06,
      "loss": 0.7974,
      "step": 825570
    },
    {
      "epoch": 2.893452074286696,
      "grad_norm": 2.953125,
      "learning_rate": 1.9731769433865277e-06,
      "loss": 0.7854,
      "step": 825580
    },
    {
      "epoch": 2.893487121793591,
      "grad_norm": 3.046875,
      "learning_rate": 1.972527914722826e-06,
      "loss": 0.9033,
      "step": 825590
    },
    {
      "epoch": 2.893522169300487,
      "grad_norm": 3.328125,
      "learning_rate": 1.9718788860591237e-06,
      "loss": 0.8604,
      "step": 825600
    },
    {
      "epoch": 2.893557216807382,
      "grad_norm": 2.765625,
      "learning_rate": 1.971229857395422e-06,
      "loss": 0.8051,
      "step": 825610
    },
    {
      "epoch": 2.893592264314278,
      "grad_norm": 2.890625,
      "learning_rate": 1.97058082873172e-06,
      "loss": 0.7703,
      "step": 825620
    },
    {
      "epoch": 2.8936273118211737,
      "grad_norm": 2.984375,
      "learning_rate": 1.969931800068018e-06,
      "loss": 0.8101,
      "step": 825630
    },
    {
      "epoch": 2.893662359328069,
      "grad_norm": 2.84375,
      "learning_rate": 1.9692827714043166e-06,
      "loss": 0.7782,
      "step": 825640
    },
    {
      "epoch": 2.893697406834965,
      "grad_norm": 3.265625,
      "learning_rate": 1.9686337427406146e-06,
      "loss": 0.8163,
      "step": 825650
    },
    {
      "epoch": 2.8937324543418605,
      "grad_norm": 2.96875,
      "learning_rate": 1.9679847140769126e-06,
      "loss": 0.8381,
      "step": 825660
    },
    {
      "epoch": 2.893767501848756,
      "grad_norm": 2.828125,
      "learning_rate": 1.9673356854132106e-06,
      "loss": 0.7237,
      "step": 825670
    },
    {
      "epoch": 2.8938025493556516,
      "grad_norm": 3.15625,
      "learning_rate": 1.966686656749509e-06,
      "loss": 0.7535,
      "step": 825680
    },
    {
      "epoch": 2.8938375968625474,
      "grad_norm": 2.828125,
      "learning_rate": 1.9660376280858066e-06,
      "loss": 0.8854,
      "step": 825690
    },
    {
      "epoch": 2.8938726443694427,
      "grad_norm": 2.890625,
      "learning_rate": 1.965388599422105e-06,
      "loss": 0.7918,
      "step": 825700
    },
    {
      "epoch": 2.8939076918763385,
      "grad_norm": 2.859375,
      "learning_rate": 1.964739570758403e-06,
      "loss": 0.8413,
      "step": 825710
    },
    {
      "epoch": 2.8939427393832338,
      "grad_norm": 2.5625,
      "learning_rate": 1.964090542094701e-06,
      "loss": 0.7358,
      "step": 825720
    },
    {
      "epoch": 2.8939777868901295,
      "grad_norm": 3.234375,
      "learning_rate": 1.9634415134309994e-06,
      "loss": 0.8407,
      "step": 825730
    },
    {
      "epoch": 2.8940128343970253,
      "grad_norm": 2.9375,
      "learning_rate": 1.9627924847672974e-06,
      "loss": 0.7723,
      "step": 825740
    },
    {
      "epoch": 2.894047881903921,
      "grad_norm": 3.109375,
      "learning_rate": 1.9621434561035954e-06,
      "loss": 0.8452,
      "step": 825750
    },
    {
      "epoch": 2.8940829294108164,
      "grad_norm": 3.140625,
      "learning_rate": 1.9614944274398934e-06,
      "loss": 0.8772,
      "step": 825760
    },
    {
      "epoch": 2.894117976917712,
      "grad_norm": 2.953125,
      "learning_rate": 1.9608453987761914e-06,
      "loss": 0.8025,
      "step": 825770
    },
    {
      "epoch": 2.8941530244246074,
      "grad_norm": 2.71875,
      "learning_rate": 1.96019637011249e-06,
      "loss": 0.6785,
      "step": 825780
    },
    {
      "epoch": 2.894188071931503,
      "grad_norm": 2.953125,
      "learning_rate": 1.959547341448788e-06,
      "loss": 0.7575,
      "step": 825790
    },
    {
      "epoch": 2.894223119438399,
      "grad_norm": 2.6875,
      "learning_rate": 1.958898312785086e-06,
      "loss": 0.8332,
      "step": 825800
    },
    {
      "epoch": 2.8942581669452943,
      "grad_norm": 2.6875,
      "learning_rate": 1.958249284121384e-06,
      "loss": 0.7985,
      "step": 825810
    },
    {
      "epoch": 2.89429321445219,
      "grad_norm": 2.921875,
      "learning_rate": 1.9576002554576823e-06,
      "loss": 0.8221,
      "step": 825820
    },
    {
      "epoch": 2.8943282619590853,
      "grad_norm": 3.484375,
      "learning_rate": 1.9569512267939803e-06,
      "loss": 0.8102,
      "step": 825830
    },
    {
      "epoch": 2.894363309465981,
      "grad_norm": 2.90625,
      "learning_rate": 1.9563021981302783e-06,
      "loss": 0.7842,
      "step": 825840
    },
    {
      "epoch": 2.894398356972877,
      "grad_norm": 3.109375,
      "learning_rate": 1.9556531694665767e-06,
      "loss": 0.8199,
      "step": 825850
    },
    {
      "epoch": 2.8944334044797726,
      "grad_norm": 2.625,
      "learning_rate": 1.9550041408028743e-06,
      "loss": 0.7779,
      "step": 825860
    },
    {
      "epoch": 2.894468451986668,
      "grad_norm": 3.09375,
      "learning_rate": 1.9543551121391727e-06,
      "loss": 0.8109,
      "step": 825870
    },
    {
      "epoch": 2.8945034994935637,
      "grad_norm": 2.84375,
      "learning_rate": 1.9537060834754707e-06,
      "loss": 0.8183,
      "step": 825880
    },
    {
      "epoch": 2.894538547000459,
      "grad_norm": 3.0,
      "learning_rate": 1.9530570548117687e-06,
      "loss": 0.8588,
      "step": 825890
    },
    {
      "epoch": 2.8945735945073547,
      "grad_norm": 2.640625,
      "learning_rate": 1.952408026148067e-06,
      "loss": 0.7791,
      "step": 825900
    },
    {
      "epoch": 2.8946086420142505,
      "grad_norm": 2.9375,
      "learning_rate": 1.951758997484365e-06,
      "loss": 0.8495,
      "step": 825910
    },
    {
      "epoch": 2.894643689521146,
      "grad_norm": 3.796875,
      "learning_rate": 1.951109968820663e-06,
      "loss": 0.7625,
      "step": 825920
    },
    {
      "epoch": 2.8946787370280416,
      "grad_norm": 3.25,
      "learning_rate": 1.950460940156961e-06,
      "loss": 0.7659,
      "step": 825930
    },
    {
      "epoch": 2.894713784534937,
      "grad_norm": 2.65625,
      "learning_rate": 1.9498119114932595e-06,
      "loss": 0.8399,
      "step": 825940
    },
    {
      "epoch": 2.8947488320418326,
      "grad_norm": 2.84375,
      "learning_rate": 1.949162882829557e-06,
      "loss": 0.7827,
      "step": 825950
    },
    {
      "epoch": 2.8947838795487284,
      "grad_norm": 2.734375,
      "learning_rate": 1.9485138541658555e-06,
      "loss": 0.73,
      "step": 825960
    },
    {
      "epoch": 2.894818927055624,
      "grad_norm": 3.15625,
      "learning_rate": 1.9478648255021535e-06,
      "loss": 0.9557,
      "step": 825970
    },
    {
      "epoch": 2.8948539745625195,
      "grad_norm": 3.109375,
      "learning_rate": 1.9472157968384515e-06,
      "loss": 0.8161,
      "step": 825980
    },
    {
      "epoch": 2.8948890220694152,
      "grad_norm": 2.890625,
      "learning_rate": 1.94656676817475e-06,
      "loss": 0.8376,
      "step": 825990
    },
    {
      "epoch": 2.8949240695763105,
      "grad_norm": 2.90625,
      "learning_rate": 1.945917739511048e-06,
      "loss": 0.716,
      "step": 826000
    },
    {
      "epoch": 2.8949591170832063,
      "grad_norm": 2.828125,
      "learning_rate": 1.945268710847346e-06,
      "loss": 0.8063,
      "step": 826010
    },
    {
      "epoch": 2.894994164590102,
      "grad_norm": 3.203125,
      "learning_rate": 1.944619682183644e-06,
      "loss": 0.849,
      "step": 826020
    },
    {
      "epoch": 2.8950292120969974,
      "grad_norm": 2.75,
      "learning_rate": 1.943970653519942e-06,
      "loss": 0.9148,
      "step": 826030
    },
    {
      "epoch": 2.895064259603893,
      "grad_norm": 2.65625,
      "learning_rate": 1.9433216248562404e-06,
      "loss": 0.7973,
      "step": 826040
    },
    {
      "epoch": 2.8950993071107884,
      "grad_norm": 3.0625,
      "learning_rate": 1.9426725961925384e-06,
      "loss": 0.7818,
      "step": 826050
    },
    {
      "epoch": 2.895134354617684,
      "grad_norm": 2.859375,
      "learning_rate": 1.9420235675288364e-06,
      "loss": 0.8206,
      "step": 826060
    },
    {
      "epoch": 2.89516940212458,
      "grad_norm": 2.796875,
      "learning_rate": 1.9413745388651344e-06,
      "loss": 0.7552,
      "step": 826070
    },
    {
      "epoch": 2.8952044496314757,
      "grad_norm": 2.484375,
      "learning_rate": 1.940725510201433e-06,
      "loss": 0.8844,
      "step": 826080
    },
    {
      "epoch": 2.895239497138371,
      "grad_norm": 2.9375,
      "learning_rate": 1.9400764815377304e-06,
      "loss": 0.6827,
      "step": 826090
    },
    {
      "epoch": 2.895274544645267,
      "grad_norm": 2.640625,
      "learning_rate": 1.939427452874029e-06,
      "loss": 0.7872,
      "step": 826100
    },
    {
      "epoch": 2.895309592152162,
      "grad_norm": 3.46875,
      "learning_rate": 1.9387784242103272e-06,
      "loss": 0.8597,
      "step": 826110
    },
    {
      "epoch": 2.895344639659058,
      "grad_norm": 2.375,
      "learning_rate": 1.938129395546625e-06,
      "loss": 0.7235,
      "step": 826120
    },
    {
      "epoch": 2.8953796871659536,
      "grad_norm": 2.3125,
      "learning_rate": 1.9374803668829232e-06,
      "loss": 0.6929,
      "step": 826130
    },
    {
      "epoch": 2.895414734672849,
      "grad_norm": 2.8125,
      "learning_rate": 1.9368313382192212e-06,
      "loss": 0.8201,
      "step": 826140
    },
    {
      "epoch": 2.8954497821797447,
      "grad_norm": 2.515625,
      "learning_rate": 1.9361823095555192e-06,
      "loss": 0.8397,
      "step": 826150
    },
    {
      "epoch": 2.89548482968664,
      "grad_norm": 2.9375,
      "learning_rate": 1.9355332808918172e-06,
      "loss": 0.8037,
      "step": 826160
    },
    {
      "epoch": 2.8955198771935358,
      "grad_norm": 2.46875,
      "learning_rate": 1.9348842522281157e-06,
      "loss": 0.8265,
      "step": 826170
    },
    {
      "epoch": 2.8955549247004315,
      "grad_norm": 2.828125,
      "learning_rate": 1.9342352235644137e-06,
      "loss": 0.7838,
      "step": 826180
    },
    {
      "epoch": 2.8955899722073273,
      "grad_norm": 3.0,
      "learning_rate": 1.9335861949007117e-06,
      "loss": 0.7513,
      "step": 826190
    },
    {
      "epoch": 2.8956250197142226,
      "grad_norm": 2.78125,
      "learning_rate": 1.93293716623701e-06,
      "loss": 0.875,
      "step": 826200
    },
    {
      "epoch": 2.8956600672211184,
      "grad_norm": 3.4375,
      "learning_rate": 1.9322881375733077e-06,
      "loss": 0.7927,
      "step": 826210
    },
    {
      "epoch": 2.8956951147280137,
      "grad_norm": 3.234375,
      "learning_rate": 1.931639108909606e-06,
      "loss": 0.7925,
      "step": 826220
    },
    {
      "epoch": 2.8957301622349094,
      "grad_norm": 3.140625,
      "learning_rate": 1.930990080245904e-06,
      "loss": 0.8364,
      "step": 826230
    },
    {
      "epoch": 2.895765209741805,
      "grad_norm": 3.015625,
      "learning_rate": 1.930341051582202e-06,
      "loss": 0.7688,
      "step": 826240
    },
    {
      "epoch": 2.8958002572487005,
      "grad_norm": 3.203125,
      "learning_rate": 1.9296920229185005e-06,
      "loss": 0.7948,
      "step": 826250
    },
    {
      "epoch": 2.8958353047555963,
      "grad_norm": 2.890625,
      "learning_rate": 1.929042994254798e-06,
      "loss": 0.7801,
      "step": 826260
    },
    {
      "epoch": 2.8958703522624916,
      "grad_norm": 3.015625,
      "learning_rate": 1.9283939655910965e-06,
      "loss": 0.8619,
      "step": 826270
    },
    {
      "epoch": 2.8959053997693873,
      "grad_norm": 2.890625,
      "learning_rate": 1.9277449369273945e-06,
      "loss": 0.724,
      "step": 826280
    },
    {
      "epoch": 2.895940447276283,
      "grad_norm": 3.015625,
      "learning_rate": 1.9270959082636925e-06,
      "loss": 0.8667,
      "step": 826290
    },
    {
      "epoch": 2.895975494783179,
      "grad_norm": 3.3125,
      "learning_rate": 1.926446879599991e-06,
      "loss": 0.7908,
      "step": 826300
    },
    {
      "epoch": 2.896010542290074,
      "grad_norm": 2.984375,
      "learning_rate": 1.925797850936289e-06,
      "loss": 0.7879,
      "step": 826310
    },
    {
      "epoch": 2.89604558979697,
      "grad_norm": 2.6875,
      "learning_rate": 1.925148822272587e-06,
      "loss": 0.8315,
      "step": 826320
    },
    {
      "epoch": 2.8960806373038652,
      "grad_norm": 3.015625,
      "learning_rate": 1.924499793608885e-06,
      "loss": 0.8335,
      "step": 826330
    },
    {
      "epoch": 2.896115684810761,
      "grad_norm": 2.953125,
      "learning_rate": 1.9238507649451833e-06,
      "loss": 0.7834,
      "step": 826340
    },
    {
      "epoch": 2.8961507323176567,
      "grad_norm": 3.109375,
      "learning_rate": 1.923201736281481e-06,
      "loss": 0.8253,
      "step": 826350
    },
    {
      "epoch": 2.896185779824552,
      "grad_norm": 3.28125,
      "learning_rate": 1.9225527076177793e-06,
      "loss": 0.7628,
      "step": 826360
    },
    {
      "epoch": 2.896220827331448,
      "grad_norm": 2.78125,
      "learning_rate": 1.9219036789540778e-06,
      "loss": 0.7898,
      "step": 826370
    },
    {
      "epoch": 2.896255874838343,
      "grad_norm": 2.6875,
      "learning_rate": 1.9212546502903753e-06,
      "loss": 0.8006,
      "step": 826380
    },
    {
      "epoch": 2.896290922345239,
      "grad_norm": 2.984375,
      "learning_rate": 1.9206056216266738e-06,
      "loss": 0.7998,
      "step": 826390
    },
    {
      "epoch": 2.8963259698521346,
      "grad_norm": 3.28125,
      "learning_rate": 1.9199565929629718e-06,
      "loss": 0.8488,
      "step": 826400
    },
    {
      "epoch": 2.8963610173590304,
      "grad_norm": 2.75,
      "learning_rate": 1.9193075642992698e-06,
      "loss": 0.7544,
      "step": 826410
    },
    {
      "epoch": 2.8963960648659257,
      "grad_norm": 3.015625,
      "learning_rate": 1.9186585356355678e-06,
      "loss": 0.8136,
      "step": 826420
    },
    {
      "epoch": 2.8964311123728215,
      "grad_norm": 3.390625,
      "learning_rate": 1.918009506971866e-06,
      "loss": 0.8701,
      "step": 826430
    },
    {
      "epoch": 2.896466159879717,
      "grad_norm": 3.0,
      "learning_rate": 1.917360478308164e-06,
      "loss": 0.8475,
      "step": 826440
    },
    {
      "epoch": 2.8965012073866125,
      "grad_norm": 3.03125,
      "learning_rate": 1.916711449644462e-06,
      "loss": 0.8482,
      "step": 826450
    },
    {
      "epoch": 2.8965362548935083,
      "grad_norm": 3.0625,
      "learning_rate": 1.91606242098076e-06,
      "loss": 0.7463,
      "step": 826460
    },
    {
      "epoch": 2.8965713024004036,
      "grad_norm": 2.609375,
      "learning_rate": 1.915413392317058e-06,
      "loss": 0.7695,
      "step": 826470
    },
    {
      "epoch": 2.8966063499072994,
      "grad_norm": 3.125,
      "learning_rate": 1.9147643636533566e-06,
      "loss": 0.739,
      "step": 826480
    },
    {
      "epoch": 2.8966413974141947,
      "grad_norm": 2.859375,
      "learning_rate": 1.9141153349896546e-06,
      "loss": 0.8339,
      "step": 826490
    },
    {
      "epoch": 2.8966764449210904,
      "grad_norm": 2.8125,
      "learning_rate": 1.9134663063259526e-06,
      "loss": 0.774,
      "step": 826500
    },
    {
      "epoch": 2.896711492427986,
      "grad_norm": 3.203125,
      "learning_rate": 1.912817277662251e-06,
      "loss": 0.8155,
      "step": 826510
    },
    {
      "epoch": 2.896746539934882,
      "grad_norm": 2.796875,
      "learning_rate": 1.9121682489985486e-06,
      "loss": 0.7791,
      "step": 826520
    },
    {
      "epoch": 2.8967815874417773,
      "grad_norm": 3.140625,
      "learning_rate": 1.911519220334847e-06,
      "loss": 0.8314,
      "step": 826530
    },
    {
      "epoch": 2.896816634948673,
      "grad_norm": 3.015625,
      "learning_rate": 1.910870191671145e-06,
      "loss": 0.8308,
      "step": 826540
    },
    {
      "epoch": 2.8968516824555683,
      "grad_norm": 3.046875,
      "learning_rate": 1.910221163007443e-06,
      "loss": 0.8424,
      "step": 826550
    },
    {
      "epoch": 2.896886729962464,
      "grad_norm": 3.28125,
      "learning_rate": 1.909572134343741e-06,
      "loss": 0.8134,
      "step": 826560
    },
    {
      "epoch": 2.89692177746936,
      "grad_norm": 2.8125,
      "learning_rate": 1.9089231056800395e-06,
      "loss": 0.7888,
      "step": 826570
    },
    {
      "epoch": 2.896956824976255,
      "grad_norm": 3.0,
      "learning_rate": 1.9082740770163375e-06,
      "loss": 0.8165,
      "step": 826580
    },
    {
      "epoch": 2.896991872483151,
      "grad_norm": 2.515625,
      "learning_rate": 1.9076250483526355e-06,
      "loss": 0.8323,
      "step": 826590
    },
    {
      "epoch": 2.8970269199900462,
      "grad_norm": 2.78125,
      "learning_rate": 1.9069760196889337e-06,
      "loss": 0.7792,
      "step": 826600
    },
    {
      "epoch": 2.897061967496942,
      "grad_norm": 3.015625,
      "learning_rate": 1.9063269910252317e-06,
      "loss": 0.7413,
      "step": 826610
    },
    {
      "epoch": 2.8970970150038378,
      "grad_norm": 2.625,
      "learning_rate": 1.9056779623615299e-06,
      "loss": 0.7819,
      "step": 826620
    },
    {
      "epoch": 2.8971320625107335,
      "grad_norm": 2.546875,
      "learning_rate": 1.9050289336978277e-06,
      "loss": 0.7846,
      "step": 826630
    },
    {
      "epoch": 2.897167110017629,
      "grad_norm": 2.46875,
      "learning_rate": 1.904379905034126e-06,
      "loss": 0.8088,
      "step": 826640
    },
    {
      "epoch": 2.8972021575245246,
      "grad_norm": 3.296875,
      "learning_rate": 1.9037308763704243e-06,
      "loss": 0.8048,
      "step": 826650
    },
    {
      "epoch": 2.89723720503142,
      "grad_norm": 3.25,
      "learning_rate": 1.903081847706722e-06,
      "loss": 0.8048,
      "step": 826660
    },
    {
      "epoch": 2.8972722525383157,
      "grad_norm": 3.265625,
      "learning_rate": 1.9024328190430203e-06,
      "loss": 0.8809,
      "step": 826670
    },
    {
      "epoch": 2.8973073000452114,
      "grad_norm": 3.140625,
      "learning_rate": 1.9017837903793183e-06,
      "loss": 0.8244,
      "step": 826680
    },
    {
      "epoch": 2.8973423475521067,
      "grad_norm": 2.96875,
      "learning_rate": 1.9011347617156165e-06,
      "loss": 0.8027,
      "step": 826690
    },
    {
      "epoch": 2.8973773950590025,
      "grad_norm": 3.203125,
      "learning_rate": 1.9004857330519147e-06,
      "loss": 0.7545,
      "step": 826700
    },
    {
      "epoch": 2.897412442565898,
      "grad_norm": 2.84375,
      "learning_rate": 1.8998367043882127e-06,
      "loss": 0.7641,
      "step": 826710
    },
    {
      "epoch": 2.8974474900727936,
      "grad_norm": 2.828125,
      "learning_rate": 1.899187675724511e-06,
      "loss": 0.8231,
      "step": 826720
    },
    {
      "epoch": 2.8974825375796893,
      "grad_norm": 2.921875,
      "learning_rate": 1.8985386470608087e-06,
      "loss": 0.8879,
      "step": 826730
    },
    {
      "epoch": 2.897517585086585,
      "grad_norm": 2.96875,
      "learning_rate": 1.8978896183971072e-06,
      "loss": 0.8009,
      "step": 826740
    },
    {
      "epoch": 2.8975526325934804,
      "grad_norm": 3.765625,
      "learning_rate": 1.897240589733405e-06,
      "loss": 0.7348,
      "step": 826750
    },
    {
      "epoch": 2.897587680100376,
      "grad_norm": 3.25,
      "learning_rate": 1.8965915610697032e-06,
      "loss": 0.7851,
      "step": 826760
    },
    {
      "epoch": 2.8976227276072715,
      "grad_norm": 2.734375,
      "learning_rate": 1.8959425324060014e-06,
      "loss": 0.83,
      "step": 826770
    },
    {
      "epoch": 2.8976577751141672,
      "grad_norm": 2.640625,
      "learning_rate": 1.8952935037422994e-06,
      "loss": 0.7952,
      "step": 826780
    },
    {
      "epoch": 2.897692822621063,
      "grad_norm": 3.328125,
      "learning_rate": 1.8946444750785976e-06,
      "loss": 0.8384,
      "step": 826790
    },
    {
      "epoch": 2.8977278701279583,
      "grad_norm": 3.125,
      "learning_rate": 1.8939954464148956e-06,
      "loss": 0.8057,
      "step": 826800
    },
    {
      "epoch": 2.897762917634854,
      "grad_norm": 2.625,
      "learning_rate": 1.8933464177511938e-06,
      "loss": 0.6854,
      "step": 826810
    },
    {
      "epoch": 2.8977979651417494,
      "grad_norm": 2.6875,
      "learning_rate": 1.8926973890874916e-06,
      "loss": 0.7551,
      "step": 826820
    },
    {
      "epoch": 2.897833012648645,
      "grad_norm": 3.546875,
      "learning_rate": 1.8920483604237898e-06,
      "loss": 0.802,
      "step": 826830
    },
    {
      "epoch": 2.897868060155541,
      "grad_norm": 2.640625,
      "learning_rate": 1.891399331760088e-06,
      "loss": 0.8242,
      "step": 826840
    },
    {
      "epoch": 2.8979031076624366,
      "grad_norm": 2.71875,
      "learning_rate": 1.890750303096386e-06,
      "loss": 0.8196,
      "step": 826850
    },
    {
      "epoch": 2.897938155169332,
      "grad_norm": 2.84375,
      "learning_rate": 1.8901012744326842e-06,
      "loss": 0.8293,
      "step": 826860
    },
    {
      "epoch": 2.8979732026762277,
      "grad_norm": 3.25,
      "learning_rate": 1.8894522457689822e-06,
      "loss": 0.8025,
      "step": 826870
    },
    {
      "epoch": 2.898008250183123,
      "grad_norm": 2.703125,
      "learning_rate": 1.8888032171052804e-06,
      "loss": 0.8164,
      "step": 826880
    },
    {
      "epoch": 2.898043297690019,
      "grad_norm": 2.765625,
      "learning_rate": 1.8881541884415782e-06,
      "loss": 0.8475,
      "step": 826890
    },
    {
      "epoch": 2.8980783451969145,
      "grad_norm": 2.921875,
      "learning_rate": 1.8875051597778764e-06,
      "loss": 0.8082,
      "step": 826900
    },
    {
      "epoch": 2.89811339270381,
      "grad_norm": 2.765625,
      "learning_rate": 1.8868561311141748e-06,
      "loss": 0.8073,
      "step": 826910
    },
    {
      "epoch": 2.8981484402107056,
      "grad_norm": 2.15625,
      "learning_rate": 1.8862071024504726e-06,
      "loss": 0.731,
      "step": 826920
    },
    {
      "epoch": 2.8981834877176014,
      "grad_norm": 2.59375,
      "learning_rate": 1.8855580737867708e-06,
      "loss": 0.8355,
      "step": 826930
    },
    {
      "epoch": 2.8982185352244967,
      "grad_norm": 3.078125,
      "learning_rate": 1.8849090451230688e-06,
      "loss": 0.8391,
      "step": 826940
    },
    {
      "epoch": 2.8982535827313924,
      "grad_norm": 3.078125,
      "learning_rate": 1.884260016459367e-06,
      "loss": 0.8479,
      "step": 826950
    },
    {
      "epoch": 2.898288630238288,
      "grad_norm": 2.84375,
      "learning_rate": 1.8836109877956648e-06,
      "loss": 0.7505,
      "step": 826960
    },
    {
      "epoch": 2.8983236777451835,
      "grad_norm": 3.375,
      "learning_rate": 1.8829619591319633e-06,
      "loss": 0.8839,
      "step": 826970
    },
    {
      "epoch": 2.8983587252520793,
      "grad_norm": 2.671875,
      "learning_rate": 1.8823129304682615e-06,
      "loss": 0.7593,
      "step": 826980
    },
    {
      "epoch": 2.8983937727589746,
      "grad_norm": 2.546875,
      "learning_rate": 1.8816639018045593e-06,
      "loss": 0.7874,
      "step": 826990
    },
    {
      "epoch": 2.8984288202658703,
      "grad_norm": 3.0,
      "learning_rate": 1.8810148731408575e-06,
      "loss": 0.7692,
      "step": 827000
    },
    {
      "epoch": 2.898463867772766,
      "grad_norm": 2.953125,
      "learning_rate": 1.8803658444771555e-06,
      "loss": 0.7087,
      "step": 827010
    },
    {
      "epoch": 2.898498915279662,
      "grad_norm": 3.375,
      "learning_rate": 1.8797168158134537e-06,
      "loss": 0.8553,
      "step": 827020
    },
    {
      "epoch": 2.898533962786557,
      "grad_norm": 3.28125,
      "learning_rate": 1.8790677871497517e-06,
      "loss": 0.7968,
      "step": 827030
    },
    {
      "epoch": 2.898569010293453,
      "grad_norm": 2.90625,
      "learning_rate": 1.87841875848605e-06,
      "loss": 0.7592,
      "step": 827040
    },
    {
      "epoch": 2.8986040578003482,
      "grad_norm": 2.875,
      "learning_rate": 1.8777697298223481e-06,
      "loss": 0.8176,
      "step": 827050
    },
    {
      "epoch": 2.898639105307244,
      "grad_norm": 2.828125,
      "learning_rate": 1.877120701158646e-06,
      "loss": 0.7775,
      "step": 827060
    },
    {
      "epoch": 2.8986741528141398,
      "grad_norm": 2.828125,
      "learning_rate": 1.8764716724949443e-06,
      "loss": 0.8197,
      "step": 827070
    },
    {
      "epoch": 2.898709200321035,
      "grad_norm": 2.6875,
      "learning_rate": 1.8758226438312421e-06,
      "loss": 0.8759,
      "step": 827080
    },
    {
      "epoch": 2.898744247827931,
      "grad_norm": 2.921875,
      "learning_rate": 1.8751736151675403e-06,
      "loss": 0.8263,
      "step": 827090
    },
    {
      "epoch": 2.898779295334826,
      "grad_norm": 3.09375,
      "learning_rate": 1.8745245865038383e-06,
      "loss": 0.8193,
      "step": 827100
    },
    {
      "epoch": 2.898814342841722,
      "grad_norm": 2.734375,
      "learning_rate": 1.8738755578401365e-06,
      "loss": 0.7174,
      "step": 827110
    },
    {
      "epoch": 2.8988493903486177,
      "grad_norm": 2.71875,
      "learning_rate": 1.8732265291764347e-06,
      "loss": 0.8092,
      "step": 827120
    },
    {
      "epoch": 2.8988844378555134,
      "grad_norm": 2.71875,
      "learning_rate": 1.8725775005127327e-06,
      "loss": 0.7557,
      "step": 827130
    },
    {
      "epoch": 2.8989194853624087,
      "grad_norm": 2.828125,
      "learning_rate": 1.871928471849031e-06,
      "loss": 0.8087,
      "step": 827140
    },
    {
      "epoch": 2.8989545328693045,
      "grad_norm": 2.8125,
      "learning_rate": 1.8712794431853287e-06,
      "loss": 0.7814,
      "step": 827150
    },
    {
      "epoch": 2.8989895803762,
      "grad_norm": 3.015625,
      "learning_rate": 1.870630414521627e-06,
      "loss": 0.8407,
      "step": 827160
    },
    {
      "epoch": 2.8990246278830956,
      "grad_norm": 2.84375,
      "learning_rate": 1.8699813858579254e-06,
      "loss": 0.7886,
      "step": 827170
    },
    {
      "epoch": 2.8990596753899913,
      "grad_norm": 3.34375,
      "learning_rate": 1.8693323571942232e-06,
      "loss": 0.8259,
      "step": 827180
    },
    {
      "epoch": 2.8990947228968866,
      "grad_norm": 2.796875,
      "learning_rate": 1.8686833285305214e-06,
      "loss": 0.781,
      "step": 827190
    },
    {
      "epoch": 2.8991297704037824,
      "grad_norm": 3.578125,
      "learning_rate": 1.8680342998668194e-06,
      "loss": 0.8148,
      "step": 827200
    },
    {
      "epoch": 2.8991648179106777,
      "grad_norm": 3.25,
      "learning_rate": 1.8673852712031176e-06,
      "loss": 0.8005,
      "step": 827210
    },
    {
      "epoch": 2.8991998654175735,
      "grad_norm": 2.9375,
      "learning_rate": 1.8667362425394154e-06,
      "loss": 0.7761,
      "step": 827220
    },
    {
      "epoch": 2.899234912924469,
      "grad_norm": 3.078125,
      "learning_rate": 1.8660872138757138e-06,
      "loss": 0.8295,
      "step": 827230
    },
    {
      "epoch": 2.899269960431365,
      "grad_norm": 3.078125,
      "learning_rate": 1.865438185212012e-06,
      "loss": 0.8538,
      "step": 827240
    },
    {
      "epoch": 2.8993050079382603,
      "grad_norm": 3.171875,
      "learning_rate": 1.8647891565483098e-06,
      "loss": 0.8987,
      "step": 827250
    },
    {
      "epoch": 2.899340055445156,
      "grad_norm": 2.859375,
      "learning_rate": 1.864140127884608e-06,
      "loss": 0.8777,
      "step": 827260
    },
    {
      "epoch": 2.8993751029520514,
      "grad_norm": 3.109375,
      "learning_rate": 1.863491099220906e-06,
      "loss": 0.7608,
      "step": 827270
    },
    {
      "epoch": 2.899410150458947,
      "grad_norm": 3.1875,
      "learning_rate": 1.8628420705572042e-06,
      "loss": 0.8023,
      "step": 827280
    },
    {
      "epoch": 2.899445197965843,
      "grad_norm": 2.75,
      "learning_rate": 1.8621930418935022e-06,
      "loss": 0.7872,
      "step": 827290
    },
    {
      "epoch": 2.899480245472738,
      "grad_norm": 2.984375,
      "learning_rate": 1.8615440132298004e-06,
      "loss": 0.771,
      "step": 827300
    },
    {
      "epoch": 2.899515292979634,
      "grad_norm": 2.671875,
      "learning_rate": 1.8608949845660987e-06,
      "loss": 0.774,
      "step": 827310
    },
    {
      "epoch": 2.8995503404865293,
      "grad_norm": 2.90625,
      "learning_rate": 1.8602459559023964e-06,
      "loss": 0.8032,
      "step": 827320
    },
    {
      "epoch": 2.899585387993425,
      "grad_norm": 3.484375,
      "learning_rate": 1.8595969272386949e-06,
      "loss": 0.784,
      "step": 827330
    },
    {
      "epoch": 2.899620435500321,
      "grad_norm": 2.875,
      "learning_rate": 1.8589478985749927e-06,
      "loss": 0.7347,
      "step": 827340
    },
    {
      "epoch": 2.8996554830072165,
      "grad_norm": 3.09375,
      "learning_rate": 1.8582988699112909e-06,
      "loss": 0.8592,
      "step": 827350
    },
    {
      "epoch": 2.899690530514112,
      "grad_norm": 2.890625,
      "learning_rate": 1.8576498412475889e-06,
      "loss": 0.762,
      "step": 827360
    },
    {
      "epoch": 2.8997255780210076,
      "grad_norm": 2.96875,
      "learning_rate": 1.857000812583887e-06,
      "loss": 0.778,
      "step": 827370
    },
    {
      "epoch": 2.899760625527903,
      "grad_norm": 2.953125,
      "learning_rate": 1.8563517839201853e-06,
      "loss": 0.8581,
      "step": 827380
    },
    {
      "epoch": 2.8997956730347987,
      "grad_norm": 3.03125,
      "learning_rate": 1.8557027552564833e-06,
      "loss": 0.9102,
      "step": 827390
    },
    {
      "epoch": 2.8998307205416944,
      "grad_norm": 3.390625,
      "learning_rate": 1.8550537265927815e-06,
      "loss": 0.826,
      "step": 827400
    },
    {
      "epoch": 2.8998657680485898,
      "grad_norm": 3.625,
      "learning_rate": 1.8544046979290793e-06,
      "loss": 0.8353,
      "step": 827410
    },
    {
      "epoch": 2.8999008155554855,
      "grad_norm": 2.84375,
      "learning_rate": 1.8537556692653775e-06,
      "loss": 0.7524,
      "step": 827420
    },
    {
      "epoch": 2.899935863062381,
      "grad_norm": 3.171875,
      "learning_rate": 1.8531066406016755e-06,
      "loss": 0.8363,
      "step": 827430
    },
    {
      "epoch": 2.8999709105692766,
      "grad_norm": 3.375,
      "learning_rate": 1.8524576119379737e-06,
      "loss": 0.8689,
      "step": 827440
    },
    {
      "epoch": 2.9000059580761723,
      "grad_norm": 3.125,
      "learning_rate": 1.851808583274272e-06,
      "loss": 0.701,
      "step": 827450
    },
    {
      "epoch": 2.900041005583068,
      "grad_norm": 3.078125,
      "learning_rate": 1.85115955461057e-06,
      "loss": 0.7825,
      "step": 827460
    },
    {
      "epoch": 2.9000760530899634,
      "grad_norm": 3.15625,
      "learning_rate": 1.8505105259468681e-06,
      "loss": 0.8498,
      "step": 827470
    },
    {
      "epoch": 2.900111100596859,
      "grad_norm": 3.1875,
      "learning_rate": 1.849861497283166e-06,
      "loss": 0.8237,
      "step": 827480
    },
    {
      "epoch": 2.9001461481037545,
      "grad_norm": 2.984375,
      "learning_rate": 1.8492124686194641e-06,
      "loss": 0.7354,
      "step": 827490
    },
    {
      "epoch": 2.9001811956106502,
      "grad_norm": 2.609375,
      "learning_rate": 1.8485634399557621e-06,
      "loss": 0.7654,
      "step": 827500
    },
    {
      "epoch": 2.900216243117546,
      "grad_norm": 2.90625,
      "learning_rate": 1.8479144112920603e-06,
      "loss": 0.841,
      "step": 827510
    },
    {
      "epoch": 2.9002512906244413,
      "grad_norm": 3.671875,
      "learning_rate": 1.8472653826283586e-06,
      "loss": 0.7857,
      "step": 827520
    },
    {
      "epoch": 2.900286338131337,
      "grad_norm": 2.890625,
      "learning_rate": 1.8466163539646566e-06,
      "loss": 0.8307,
      "step": 827530
    },
    {
      "epoch": 2.9003213856382324,
      "grad_norm": 2.9375,
      "learning_rate": 1.8459673253009548e-06,
      "loss": 0.7662,
      "step": 827540
    },
    {
      "epoch": 2.900356433145128,
      "grad_norm": 2.859375,
      "learning_rate": 1.8453182966372526e-06,
      "loss": 0.753,
      "step": 827550
    },
    {
      "epoch": 2.900391480652024,
      "grad_norm": 2.984375,
      "learning_rate": 1.844669267973551e-06,
      "loss": 0.8216,
      "step": 827560
    },
    {
      "epoch": 2.9004265281589197,
      "grad_norm": 3.015625,
      "learning_rate": 1.8440202393098488e-06,
      "loss": 0.8097,
      "step": 827570
    },
    {
      "epoch": 2.900461575665815,
      "grad_norm": 2.921875,
      "learning_rate": 1.843371210646147e-06,
      "loss": 0.7519,
      "step": 827580
    },
    {
      "epoch": 2.9004966231727107,
      "grad_norm": 2.875,
      "learning_rate": 1.8427221819824452e-06,
      "loss": 0.7708,
      "step": 827590
    },
    {
      "epoch": 2.900531670679606,
      "grad_norm": 2.609375,
      "learning_rate": 1.8420731533187432e-06,
      "loss": 0.7261,
      "step": 827600
    },
    {
      "epoch": 2.900566718186502,
      "grad_norm": 3.390625,
      "learning_rate": 1.8414241246550414e-06,
      "loss": 0.8429,
      "step": 827610
    },
    {
      "epoch": 2.9006017656933976,
      "grad_norm": 3.203125,
      "learning_rate": 1.8407750959913394e-06,
      "loss": 0.8255,
      "step": 827620
    },
    {
      "epoch": 2.900636813200293,
      "grad_norm": 2.578125,
      "learning_rate": 1.8401260673276376e-06,
      "loss": 0.8182,
      "step": 827630
    },
    {
      "epoch": 2.9006718607071886,
      "grad_norm": 3.0,
      "learning_rate": 1.8394770386639358e-06,
      "loss": 0.8472,
      "step": 827640
    },
    {
      "epoch": 2.900706908214084,
      "grad_norm": 2.640625,
      "learning_rate": 1.8388280100002336e-06,
      "loss": 0.8373,
      "step": 827650
    },
    {
      "epoch": 2.9007419557209797,
      "grad_norm": 3.234375,
      "learning_rate": 1.838178981336532e-06,
      "loss": 0.8221,
      "step": 827660
    },
    {
      "epoch": 2.9007770032278755,
      "grad_norm": 2.921875,
      "learning_rate": 1.8375299526728298e-06,
      "loss": 0.7882,
      "step": 827670
    },
    {
      "epoch": 2.900812050734771,
      "grad_norm": 2.921875,
      "learning_rate": 1.836880924009128e-06,
      "loss": 0.8014,
      "step": 827680
    },
    {
      "epoch": 2.9008470982416665,
      "grad_norm": 2.921875,
      "learning_rate": 1.836231895345426e-06,
      "loss": 0.8099,
      "step": 827690
    },
    {
      "epoch": 2.9008821457485623,
      "grad_norm": 3.0,
      "learning_rate": 1.8355828666817242e-06,
      "loss": 0.7802,
      "step": 827700
    },
    {
      "epoch": 2.9009171932554576,
      "grad_norm": 2.75,
      "learning_rate": 1.8349338380180225e-06,
      "loss": 0.7806,
      "step": 827710
    },
    {
      "epoch": 2.9009522407623534,
      "grad_norm": 2.515625,
      "learning_rate": 1.8342848093543205e-06,
      "loss": 0.7638,
      "step": 827720
    },
    {
      "epoch": 2.900987288269249,
      "grad_norm": 2.9375,
      "learning_rate": 1.8336357806906187e-06,
      "loss": 0.8094,
      "step": 827730
    },
    {
      "epoch": 2.9010223357761444,
      "grad_norm": 3.046875,
      "learning_rate": 1.8329867520269165e-06,
      "loss": 0.8388,
      "step": 827740
    },
    {
      "epoch": 2.90105738328304,
      "grad_norm": 2.625,
      "learning_rate": 1.8323377233632147e-06,
      "loss": 0.8625,
      "step": 827750
    },
    {
      "epoch": 2.9010924307899355,
      "grad_norm": 3.28125,
      "learning_rate": 1.8316886946995127e-06,
      "loss": 0.7634,
      "step": 827760
    },
    {
      "epoch": 2.9011274782968313,
      "grad_norm": 2.859375,
      "learning_rate": 1.8310396660358109e-06,
      "loss": 0.8351,
      "step": 827770
    },
    {
      "epoch": 2.901162525803727,
      "grad_norm": 3.15625,
      "learning_rate": 1.830390637372109e-06,
      "loss": 0.7716,
      "step": 827780
    },
    {
      "epoch": 2.901197573310623,
      "grad_norm": 2.921875,
      "learning_rate": 1.829741608708407e-06,
      "loss": 0.8133,
      "step": 827790
    },
    {
      "epoch": 2.901232620817518,
      "grad_norm": 3.03125,
      "learning_rate": 1.8290925800447053e-06,
      "loss": 0.8109,
      "step": 827800
    },
    {
      "epoch": 2.901267668324414,
      "grad_norm": 2.375,
      "learning_rate": 1.828443551381003e-06,
      "loss": 0.785,
      "step": 827810
    },
    {
      "epoch": 2.901302715831309,
      "grad_norm": 2.921875,
      "learning_rate": 1.8277945227173015e-06,
      "loss": 0.7513,
      "step": 827820
    },
    {
      "epoch": 2.901337763338205,
      "grad_norm": 2.9375,
      "learning_rate": 1.8271454940535993e-06,
      "loss": 0.727,
      "step": 827830
    },
    {
      "epoch": 2.9013728108451007,
      "grad_norm": 3.359375,
      "learning_rate": 1.8264964653898975e-06,
      "loss": 0.7804,
      "step": 827840
    },
    {
      "epoch": 2.901407858351996,
      "grad_norm": 3.0,
      "learning_rate": 1.8258474367261957e-06,
      "loss": 0.7929,
      "step": 827850
    },
    {
      "epoch": 2.9014429058588918,
      "grad_norm": 3.25,
      "learning_rate": 1.8251984080624937e-06,
      "loss": 0.7538,
      "step": 827860
    },
    {
      "epoch": 2.901477953365787,
      "grad_norm": 2.515625,
      "learning_rate": 1.824549379398792e-06,
      "loss": 0.7608,
      "step": 827870
    },
    {
      "epoch": 2.901513000872683,
      "grad_norm": 2.875,
      "learning_rate": 1.82390035073509e-06,
      "loss": 0.8024,
      "step": 827880
    },
    {
      "epoch": 2.9015480483795786,
      "grad_norm": 3.1875,
      "learning_rate": 1.8232513220713882e-06,
      "loss": 0.7642,
      "step": 827890
    },
    {
      "epoch": 2.9015830958864743,
      "grad_norm": 3.25,
      "learning_rate": 1.822602293407686e-06,
      "loss": 0.8533,
      "step": 827900
    },
    {
      "epoch": 2.9016181433933697,
      "grad_norm": 2.46875,
      "learning_rate": 1.8219532647439842e-06,
      "loss": 0.7172,
      "step": 827910
    },
    {
      "epoch": 2.9016531909002654,
      "grad_norm": 3.15625,
      "learning_rate": 1.8213042360802826e-06,
      "loss": 0.8721,
      "step": 827920
    },
    {
      "epoch": 2.9016882384071607,
      "grad_norm": 3.109375,
      "learning_rate": 1.8206552074165804e-06,
      "loss": 0.8471,
      "step": 827930
    },
    {
      "epoch": 2.9017232859140565,
      "grad_norm": 2.65625,
      "learning_rate": 1.8200061787528786e-06,
      "loss": 0.7787,
      "step": 827940
    },
    {
      "epoch": 2.9017583334209522,
      "grad_norm": 2.734375,
      "learning_rate": 1.8193571500891766e-06,
      "loss": 0.82,
      "step": 827950
    },
    {
      "epoch": 2.9017933809278476,
      "grad_norm": 3.171875,
      "learning_rate": 1.8187081214254748e-06,
      "loss": 0.774,
      "step": 827960
    },
    {
      "epoch": 2.9018284284347433,
      "grad_norm": 2.9375,
      "learning_rate": 1.8180590927617726e-06,
      "loss": 0.7165,
      "step": 827970
    },
    {
      "epoch": 2.9018634759416386,
      "grad_norm": 2.6875,
      "learning_rate": 1.817410064098071e-06,
      "loss": 0.791,
      "step": 827980
    },
    {
      "epoch": 2.9018985234485344,
      "grad_norm": 2.78125,
      "learning_rate": 1.8167610354343692e-06,
      "loss": 0.8304,
      "step": 827990
    },
    {
      "epoch": 2.90193357095543,
      "grad_norm": 2.84375,
      "learning_rate": 1.816112006770667e-06,
      "loss": 0.8263,
      "step": 828000
    },
    {
      "epoch": 2.901968618462326,
      "grad_norm": 2.96875,
      "learning_rate": 1.8154629781069652e-06,
      "loss": 0.8776,
      "step": 828010
    },
    {
      "epoch": 2.902003665969221,
      "grad_norm": 2.734375,
      "learning_rate": 1.8148139494432632e-06,
      "loss": 0.7648,
      "step": 828020
    },
    {
      "epoch": 2.902038713476117,
      "grad_norm": 2.578125,
      "learning_rate": 1.8141649207795614e-06,
      "loss": 0.7263,
      "step": 828030
    },
    {
      "epoch": 2.9020737609830123,
      "grad_norm": 3.234375,
      "learning_rate": 1.8135158921158596e-06,
      "loss": 0.8572,
      "step": 828040
    },
    {
      "epoch": 2.902108808489908,
      "grad_norm": 2.828125,
      "learning_rate": 1.8128668634521576e-06,
      "loss": 0.7281,
      "step": 828050
    },
    {
      "epoch": 2.902143855996804,
      "grad_norm": 2.546875,
      "learning_rate": 1.8122178347884558e-06,
      "loss": 0.764,
      "step": 828060
    },
    {
      "epoch": 2.902178903503699,
      "grad_norm": 3.15625,
      "learning_rate": 1.8115688061247536e-06,
      "loss": 0.8133,
      "step": 828070
    },
    {
      "epoch": 2.902213951010595,
      "grad_norm": 2.734375,
      "learning_rate": 1.8109197774610518e-06,
      "loss": 0.8252,
      "step": 828080
    },
    {
      "epoch": 2.90224899851749,
      "grad_norm": 3.140625,
      "learning_rate": 1.8102707487973498e-06,
      "loss": 0.7613,
      "step": 828090
    },
    {
      "epoch": 2.902284046024386,
      "grad_norm": 2.734375,
      "learning_rate": 1.809621720133648e-06,
      "loss": 0.8642,
      "step": 828100
    },
    {
      "epoch": 2.9023190935312817,
      "grad_norm": 3.40625,
      "learning_rate": 1.8089726914699463e-06,
      "loss": 0.7772,
      "step": 828110
    },
    {
      "epoch": 2.9023541410381775,
      "grad_norm": 2.75,
      "learning_rate": 1.8083236628062443e-06,
      "loss": 0.8025,
      "step": 828120
    },
    {
      "epoch": 2.9023891885450728,
      "grad_norm": 2.984375,
      "learning_rate": 1.8076746341425425e-06,
      "loss": 0.7735,
      "step": 828130
    },
    {
      "epoch": 2.9024242360519685,
      "grad_norm": 3.140625,
      "learning_rate": 1.8070256054788403e-06,
      "loss": 0.8966,
      "step": 828140
    },
    {
      "epoch": 2.902459283558864,
      "grad_norm": 3.1875,
      "learning_rate": 1.8063765768151387e-06,
      "loss": 0.789,
      "step": 828150
    },
    {
      "epoch": 2.9024943310657596,
      "grad_norm": 3.09375,
      "learning_rate": 1.8057275481514365e-06,
      "loss": 0.7885,
      "step": 828160
    },
    {
      "epoch": 2.9025293785726554,
      "grad_norm": 2.9375,
      "learning_rate": 1.8050785194877347e-06,
      "loss": 0.805,
      "step": 828170
    },
    {
      "epoch": 2.9025644260795507,
      "grad_norm": 3.234375,
      "learning_rate": 1.804429490824033e-06,
      "loss": 0.8588,
      "step": 828180
    },
    {
      "epoch": 2.9025994735864464,
      "grad_norm": 2.734375,
      "learning_rate": 1.803780462160331e-06,
      "loss": 0.7504,
      "step": 828190
    },
    {
      "epoch": 2.902634521093342,
      "grad_norm": 2.53125,
      "learning_rate": 1.8031314334966291e-06,
      "loss": 0.7413,
      "step": 828200
    },
    {
      "epoch": 2.9026695686002375,
      "grad_norm": 2.84375,
      "learning_rate": 1.8024824048329271e-06,
      "loss": 0.8635,
      "step": 828210
    },
    {
      "epoch": 2.9027046161071333,
      "grad_norm": 2.828125,
      "learning_rate": 1.8018333761692253e-06,
      "loss": 0.7485,
      "step": 828220
    },
    {
      "epoch": 2.902739663614029,
      "grad_norm": 2.6875,
      "learning_rate": 1.8011843475055231e-06,
      "loss": 0.7702,
      "step": 828230
    },
    {
      "epoch": 2.9027747111209243,
      "grad_norm": 2.8125,
      "learning_rate": 1.8005353188418213e-06,
      "loss": 0.8313,
      "step": 828240
    },
    {
      "epoch": 2.90280975862782,
      "grad_norm": 3.28125,
      "learning_rate": 1.7998862901781197e-06,
      "loss": 0.8963,
      "step": 828250
    },
    {
      "epoch": 2.9028448061347154,
      "grad_norm": 2.5625,
      "learning_rate": 1.7992372615144175e-06,
      "loss": 0.7777,
      "step": 828260
    },
    {
      "epoch": 2.902879853641611,
      "grad_norm": 2.796875,
      "learning_rate": 1.7985882328507157e-06,
      "loss": 0.9102,
      "step": 828270
    },
    {
      "epoch": 2.902914901148507,
      "grad_norm": 2.90625,
      "learning_rate": 1.7979392041870137e-06,
      "loss": 0.8021,
      "step": 828280
    },
    {
      "epoch": 2.9029499486554022,
      "grad_norm": 2.90625,
      "learning_rate": 1.797290175523312e-06,
      "loss": 0.8086,
      "step": 828290
    },
    {
      "epoch": 2.902984996162298,
      "grad_norm": 2.859375,
      "learning_rate": 1.7966411468596097e-06,
      "loss": 0.8136,
      "step": 828300
    },
    {
      "epoch": 2.9030200436691938,
      "grad_norm": 2.765625,
      "learning_rate": 1.7959921181959082e-06,
      "loss": 0.8373,
      "step": 828310
    },
    {
      "epoch": 2.903055091176089,
      "grad_norm": 2.453125,
      "learning_rate": 1.7953430895322064e-06,
      "loss": 0.8001,
      "step": 828320
    },
    {
      "epoch": 2.903090138682985,
      "grad_norm": 3.03125,
      "learning_rate": 1.7946940608685042e-06,
      "loss": 0.8046,
      "step": 828330
    },
    {
      "epoch": 2.9031251861898806,
      "grad_norm": 2.640625,
      "learning_rate": 1.7940450322048024e-06,
      "loss": 0.7051,
      "step": 828340
    },
    {
      "epoch": 2.903160233696776,
      "grad_norm": 2.78125,
      "learning_rate": 1.7933960035411004e-06,
      "loss": 0.7865,
      "step": 828350
    },
    {
      "epoch": 2.9031952812036717,
      "grad_norm": 2.859375,
      "learning_rate": 1.7927469748773986e-06,
      "loss": 0.8153,
      "step": 828360
    },
    {
      "epoch": 2.903230328710567,
      "grad_norm": 2.671875,
      "learning_rate": 1.7920979462136966e-06,
      "loss": 0.7983,
      "step": 828370
    },
    {
      "epoch": 2.9032653762174627,
      "grad_norm": 3.140625,
      "learning_rate": 1.7914489175499948e-06,
      "loss": 0.8478,
      "step": 828380
    },
    {
      "epoch": 2.9033004237243585,
      "grad_norm": 2.75,
      "learning_rate": 1.790799888886293e-06,
      "loss": 0.8372,
      "step": 828390
    },
    {
      "epoch": 2.9033354712312542,
      "grad_norm": 2.96875,
      "learning_rate": 1.7901508602225908e-06,
      "loss": 0.7604,
      "step": 828400
    },
    {
      "epoch": 2.9033705187381496,
      "grad_norm": 3.0,
      "learning_rate": 1.7895018315588892e-06,
      "loss": 0.7744,
      "step": 828410
    },
    {
      "epoch": 2.9034055662450453,
      "grad_norm": 3.234375,
      "learning_rate": 1.788852802895187e-06,
      "loss": 0.8417,
      "step": 828420
    },
    {
      "epoch": 2.9034406137519406,
      "grad_norm": 3.09375,
      "learning_rate": 1.7882037742314852e-06,
      "loss": 0.8584,
      "step": 828430
    },
    {
      "epoch": 2.9034756612588364,
      "grad_norm": 2.9375,
      "learning_rate": 1.7875547455677832e-06,
      "loss": 0.7785,
      "step": 828440
    },
    {
      "epoch": 2.903510708765732,
      "grad_norm": 2.6875,
      "learning_rate": 1.7869057169040814e-06,
      "loss": 0.7854,
      "step": 828450
    },
    {
      "epoch": 2.9035457562726275,
      "grad_norm": 3.421875,
      "learning_rate": 1.7862566882403797e-06,
      "loss": 0.7696,
      "step": 828460
    },
    {
      "epoch": 2.903580803779523,
      "grad_norm": 2.65625,
      "learning_rate": 1.7856076595766777e-06,
      "loss": 0.7953,
      "step": 828470
    },
    {
      "epoch": 2.9036158512864185,
      "grad_norm": 2.921875,
      "learning_rate": 1.7849586309129759e-06,
      "loss": 0.7756,
      "step": 828480
    },
    {
      "epoch": 2.9036508987933143,
      "grad_norm": 2.90625,
      "learning_rate": 1.7843096022492737e-06,
      "loss": 0.7942,
      "step": 828490
    },
    {
      "epoch": 2.90368594630021,
      "grad_norm": 2.734375,
      "learning_rate": 1.7836605735855719e-06,
      "loss": 0.8541,
      "step": 828500
    },
    {
      "epoch": 2.903720993807106,
      "grad_norm": 3.125,
      "learning_rate": 1.7830115449218703e-06,
      "loss": 0.7461,
      "step": 828510
    },
    {
      "epoch": 2.903756041314001,
      "grad_norm": 3.25,
      "learning_rate": 1.782362516258168e-06,
      "loss": 0.7925,
      "step": 828520
    },
    {
      "epoch": 2.903791088820897,
      "grad_norm": 2.703125,
      "learning_rate": 1.7817134875944663e-06,
      "loss": 0.7696,
      "step": 828530
    },
    {
      "epoch": 2.903826136327792,
      "grad_norm": 3.140625,
      "learning_rate": 1.7810644589307643e-06,
      "loss": 0.7655,
      "step": 828540
    },
    {
      "epoch": 2.903861183834688,
      "grad_norm": 2.828125,
      "learning_rate": 1.7804154302670625e-06,
      "loss": 0.8466,
      "step": 828550
    },
    {
      "epoch": 2.9038962313415837,
      "grad_norm": 3.046875,
      "learning_rate": 1.7797664016033603e-06,
      "loss": 0.823,
      "step": 828560
    },
    {
      "epoch": 2.903931278848479,
      "grad_norm": 3.03125,
      "learning_rate": 1.7791173729396587e-06,
      "loss": 0.7856,
      "step": 828570
    },
    {
      "epoch": 2.9039663263553748,
      "grad_norm": 2.59375,
      "learning_rate": 1.778468344275957e-06,
      "loss": 0.8125,
      "step": 828580
    },
    {
      "epoch": 2.90400137386227,
      "grad_norm": 2.4375,
      "learning_rate": 1.7778193156122547e-06,
      "loss": 0.6683,
      "step": 828590
    },
    {
      "epoch": 2.904036421369166,
      "grad_norm": 3.03125,
      "learning_rate": 1.777170286948553e-06,
      "loss": 0.8038,
      "step": 828600
    },
    {
      "epoch": 2.9040714688760616,
      "grad_norm": 3.25,
      "learning_rate": 1.776521258284851e-06,
      "loss": 0.8234,
      "step": 828610
    },
    {
      "epoch": 2.9041065163829574,
      "grad_norm": 2.84375,
      "learning_rate": 1.7758722296211491e-06,
      "loss": 0.7734,
      "step": 828620
    },
    {
      "epoch": 2.9041415638898527,
      "grad_norm": 2.6875,
      "learning_rate": 1.7752232009574471e-06,
      "loss": 0.8325,
      "step": 828630
    },
    {
      "epoch": 2.9041766113967484,
      "grad_norm": 3.15625,
      "learning_rate": 1.7745741722937453e-06,
      "loss": 0.7636,
      "step": 828640
    },
    {
      "epoch": 2.9042116589036437,
      "grad_norm": 2.59375,
      "learning_rate": 1.7739251436300436e-06,
      "loss": 0.8706,
      "step": 828650
    },
    {
      "epoch": 2.9042467064105395,
      "grad_norm": 3.171875,
      "learning_rate": 1.7732761149663413e-06,
      "loss": 0.8499,
      "step": 828660
    },
    {
      "epoch": 2.9042817539174353,
      "grad_norm": 3.390625,
      "learning_rate": 1.7726270863026396e-06,
      "loss": 0.8,
      "step": 828670
    },
    {
      "epoch": 2.9043168014243306,
      "grad_norm": 2.59375,
      "learning_rate": 1.7719780576389376e-06,
      "loss": 0.7908,
      "step": 828680
    },
    {
      "epoch": 2.9043518489312263,
      "grad_norm": 3.140625,
      "learning_rate": 1.7713290289752358e-06,
      "loss": 0.8789,
      "step": 828690
    },
    {
      "epoch": 2.9043868964381216,
      "grad_norm": 2.53125,
      "learning_rate": 1.7706800003115338e-06,
      "loss": 0.796,
      "step": 828700
    },
    {
      "epoch": 2.9044219439450174,
      "grad_norm": 2.734375,
      "learning_rate": 1.770030971647832e-06,
      "loss": 0.7504,
      "step": 828710
    },
    {
      "epoch": 2.904456991451913,
      "grad_norm": 2.609375,
      "learning_rate": 1.7693819429841302e-06,
      "loss": 0.7694,
      "step": 828720
    },
    {
      "epoch": 2.904492038958809,
      "grad_norm": 2.921875,
      "learning_rate": 1.768732914320428e-06,
      "loss": 0.7489,
      "step": 828730
    },
    {
      "epoch": 2.9045270864657042,
      "grad_norm": 2.984375,
      "learning_rate": 1.7680838856567264e-06,
      "loss": 0.8268,
      "step": 828740
    },
    {
      "epoch": 2.9045621339726,
      "grad_norm": 2.984375,
      "learning_rate": 1.7674348569930242e-06,
      "loss": 0.775,
      "step": 828750
    },
    {
      "epoch": 2.9045971814794953,
      "grad_norm": 2.375,
      "learning_rate": 1.7667858283293224e-06,
      "loss": 0.7447,
      "step": 828760
    },
    {
      "epoch": 2.904632228986391,
      "grad_norm": 3.0625,
      "learning_rate": 1.7661367996656204e-06,
      "loss": 0.7856,
      "step": 828770
    },
    {
      "epoch": 2.904667276493287,
      "grad_norm": 3.328125,
      "learning_rate": 1.7654877710019186e-06,
      "loss": 0.845,
      "step": 828780
    },
    {
      "epoch": 2.904702324000182,
      "grad_norm": 2.9375,
      "learning_rate": 1.7648387423382168e-06,
      "loss": 0.9085,
      "step": 828790
    },
    {
      "epoch": 2.904737371507078,
      "grad_norm": 2.875,
      "learning_rate": 1.7641897136745148e-06,
      "loss": 0.779,
      "step": 828800
    },
    {
      "epoch": 2.904772419013973,
      "grad_norm": 3.0625,
      "learning_rate": 1.763540685010813e-06,
      "loss": 0.7663,
      "step": 828810
    },
    {
      "epoch": 2.904807466520869,
      "grad_norm": 2.828125,
      "learning_rate": 1.7628916563471108e-06,
      "loss": 0.8431,
      "step": 828820
    },
    {
      "epoch": 2.9048425140277647,
      "grad_norm": 3.3125,
      "learning_rate": 1.762242627683409e-06,
      "loss": 0.8061,
      "step": 828830
    },
    {
      "epoch": 2.9048775615346605,
      "grad_norm": 2.984375,
      "learning_rate": 1.761593599019707e-06,
      "loss": 0.7478,
      "step": 828840
    },
    {
      "epoch": 2.904912609041556,
      "grad_norm": 3.21875,
      "learning_rate": 1.7609445703560052e-06,
      "loss": 0.754,
      "step": 828850
    },
    {
      "epoch": 2.9049476565484516,
      "grad_norm": 2.953125,
      "learning_rate": 1.7602955416923035e-06,
      "loss": 0.8044,
      "step": 828860
    },
    {
      "epoch": 2.904982704055347,
      "grad_norm": 3.203125,
      "learning_rate": 1.7596465130286015e-06,
      "loss": 0.7621,
      "step": 828870
    },
    {
      "epoch": 2.9050177515622426,
      "grad_norm": 3.046875,
      "learning_rate": 1.7589974843648997e-06,
      "loss": 0.874,
      "step": 828880
    },
    {
      "epoch": 2.9050527990691384,
      "grad_norm": 2.3125,
      "learning_rate": 1.7583484557011975e-06,
      "loss": 0.7639,
      "step": 828890
    },
    {
      "epoch": 2.9050878465760337,
      "grad_norm": 2.484375,
      "learning_rate": 1.7576994270374959e-06,
      "loss": 0.7231,
      "step": 828900
    },
    {
      "epoch": 2.9051228940829295,
      "grad_norm": 2.734375,
      "learning_rate": 1.757050398373794e-06,
      "loss": 0.8131,
      "step": 828910
    },
    {
      "epoch": 2.9051579415898248,
      "grad_norm": 3.296875,
      "learning_rate": 1.7564013697100919e-06,
      "loss": 0.8978,
      "step": 828920
    },
    {
      "epoch": 2.9051929890967205,
      "grad_norm": 2.640625,
      "learning_rate": 1.75575234104639e-06,
      "loss": 0.741,
      "step": 828930
    },
    {
      "epoch": 2.9052280366036163,
      "grad_norm": 2.734375,
      "learning_rate": 1.755103312382688e-06,
      "loss": 0.8024,
      "step": 828940
    },
    {
      "epoch": 2.905263084110512,
      "grad_norm": 3.46875,
      "learning_rate": 1.7544542837189863e-06,
      "loss": 0.8316,
      "step": 828950
    },
    {
      "epoch": 2.9052981316174074,
      "grad_norm": 2.921875,
      "learning_rate": 1.7538052550552843e-06,
      "loss": 0.7734,
      "step": 828960
    },
    {
      "epoch": 2.905333179124303,
      "grad_norm": 3.046875,
      "learning_rate": 1.7531562263915825e-06,
      "loss": 0.8367,
      "step": 828970
    },
    {
      "epoch": 2.9053682266311984,
      "grad_norm": 2.875,
      "learning_rate": 1.7525071977278807e-06,
      "loss": 0.8825,
      "step": 828980
    },
    {
      "epoch": 2.905403274138094,
      "grad_norm": 2.8125,
      "learning_rate": 1.7518581690641785e-06,
      "loss": 0.7653,
      "step": 828990
    },
    {
      "epoch": 2.90543832164499,
      "grad_norm": 2.546875,
      "learning_rate": 1.751209140400477e-06,
      "loss": 0.8127,
      "step": 829000
    },
    {
      "epoch": 2.9054733691518853,
      "grad_norm": 2.953125,
      "learning_rate": 1.7505601117367747e-06,
      "loss": 0.7985,
      "step": 829010
    },
    {
      "epoch": 2.905508416658781,
      "grad_norm": 3.3125,
      "learning_rate": 1.749911083073073e-06,
      "loss": 0.8532,
      "step": 829020
    },
    {
      "epoch": 2.9055434641656763,
      "grad_norm": 2.53125,
      "learning_rate": 1.749262054409371e-06,
      "loss": 0.724,
      "step": 829030
    },
    {
      "epoch": 2.905578511672572,
      "grad_norm": 2.546875,
      "learning_rate": 1.7486130257456692e-06,
      "loss": 0.8066,
      "step": 829040
    },
    {
      "epoch": 2.905613559179468,
      "grad_norm": 2.96875,
      "learning_rate": 1.7479639970819674e-06,
      "loss": 0.8275,
      "step": 829050
    },
    {
      "epoch": 2.9056486066863636,
      "grad_norm": 3.21875,
      "learning_rate": 1.7473149684182654e-06,
      "loss": 0.9537,
      "step": 829060
    },
    {
      "epoch": 2.905683654193259,
      "grad_norm": 2.53125,
      "learning_rate": 1.7466659397545636e-06,
      "loss": 0.7137,
      "step": 829070
    },
    {
      "epoch": 2.9057187017001547,
      "grad_norm": 3.640625,
      "learning_rate": 1.7460169110908614e-06,
      "loss": 0.8666,
      "step": 829080
    },
    {
      "epoch": 2.90575374920705,
      "grad_norm": 3.140625,
      "learning_rate": 1.7453678824271596e-06,
      "loss": 0.8088,
      "step": 829090
    },
    {
      "epoch": 2.9057887967139457,
      "grad_norm": 3.125,
      "learning_rate": 1.7447188537634576e-06,
      "loss": 0.8427,
      "step": 829100
    },
    {
      "epoch": 2.9058238442208415,
      "grad_norm": 2.6875,
      "learning_rate": 1.7440698250997558e-06,
      "loss": 0.7179,
      "step": 829110
    },
    {
      "epoch": 2.905858891727737,
      "grad_norm": 2.6875,
      "learning_rate": 1.743420796436054e-06,
      "loss": 0.846,
      "step": 829120
    },
    {
      "epoch": 2.9058939392346326,
      "grad_norm": 3.140625,
      "learning_rate": 1.742771767772352e-06,
      "loss": 0.7919,
      "step": 829130
    },
    {
      "epoch": 2.905928986741528,
      "grad_norm": 2.6875,
      "learning_rate": 1.7421227391086502e-06,
      "loss": 0.7857,
      "step": 829140
    },
    {
      "epoch": 2.9059640342484236,
      "grad_norm": 2.59375,
      "learning_rate": 1.741473710444948e-06,
      "loss": 0.7414,
      "step": 829150
    },
    {
      "epoch": 2.9059990817553194,
      "grad_norm": 3.078125,
      "learning_rate": 1.7408246817812464e-06,
      "loss": 0.7761,
      "step": 829160
    },
    {
      "epoch": 2.906034129262215,
      "grad_norm": 2.5,
      "learning_rate": 1.7401756531175442e-06,
      "loss": 0.7851,
      "step": 829170
    },
    {
      "epoch": 2.9060691767691105,
      "grad_norm": 2.84375,
      "learning_rate": 1.7395266244538424e-06,
      "loss": 0.8388,
      "step": 829180
    },
    {
      "epoch": 2.9061042242760062,
      "grad_norm": 3.09375,
      "learning_rate": 1.7388775957901406e-06,
      "loss": 0.8521,
      "step": 829190
    },
    {
      "epoch": 2.9061392717829015,
      "grad_norm": 2.78125,
      "learning_rate": 1.7382285671264386e-06,
      "loss": 0.7648,
      "step": 829200
    },
    {
      "epoch": 2.9061743192897973,
      "grad_norm": 2.953125,
      "learning_rate": 1.7375795384627368e-06,
      "loss": 0.8794,
      "step": 829210
    },
    {
      "epoch": 2.906209366796693,
      "grad_norm": 3.265625,
      "learning_rate": 1.7369305097990348e-06,
      "loss": 0.7751,
      "step": 829220
    },
    {
      "epoch": 2.9062444143035884,
      "grad_norm": 3.4375,
      "learning_rate": 1.736281481135333e-06,
      "loss": 0.8135,
      "step": 829230
    },
    {
      "epoch": 2.906279461810484,
      "grad_norm": 3.453125,
      "learning_rate": 1.7356324524716308e-06,
      "loss": 0.7956,
      "step": 829240
    },
    {
      "epoch": 2.9063145093173794,
      "grad_norm": 2.875,
      "learning_rate": 1.734983423807929e-06,
      "loss": 0.7874,
      "step": 829250
    },
    {
      "epoch": 2.906349556824275,
      "grad_norm": 3.046875,
      "learning_rate": 1.7343343951442273e-06,
      "loss": 0.7899,
      "step": 829260
    },
    {
      "epoch": 2.906384604331171,
      "grad_norm": 3.15625,
      "learning_rate": 1.7336853664805253e-06,
      "loss": 0.8187,
      "step": 829270
    },
    {
      "epoch": 2.9064196518380667,
      "grad_norm": 2.578125,
      "learning_rate": 1.7330363378168235e-06,
      "loss": 0.8182,
      "step": 829280
    },
    {
      "epoch": 2.906454699344962,
      "grad_norm": 2.765625,
      "learning_rate": 1.7323873091531215e-06,
      "loss": 0.7843,
      "step": 829290
    },
    {
      "epoch": 2.906489746851858,
      "grad_norm": 2.75,
      "learning_rate": 1.7317382804894197e-06,
      "loss": 0.7828,
      "step": 829300
    },
    {
      "epoch": 2.906524794358753,
      "grad_norm": 2.625,
      "learning_rate": 1.7310892518257175e-06,
      "loss": 0.7985,
      "step": 829310
    },
    {
      "epoch": 2.906559841865649,
      "grad_norm": 2.953125,
      "learning_rate": 1.7304402231620157e-06,
      "loss": 0.713,
      "step": 829320
    },
    {
      "epoch": 2.9065948893725446,
      "grad_norm": 3.046875,
      "learning_rate": 1.7297911944983141e-06,
      "loss": 0.8562,
      "step": 829330
    },
    {
      "epoch": 2.90662993687944,
      "grad_norm": 2.875,
      "learning_rate": 1.729142165834612e-06,
      "loss": 0.8349,
      "step": 829340
    },
    {
      "epoch": 2.9066649843863357,
      "grad_norm": 3.25,
      "learning_rate": 1.7284931371709101e-06,
      "loss": 0.8591,
      "step": 829350
    },
    {
      "epoch": 2.906700031893231,
      "grad_norm": 2.8125,
      "learning_rate": 1.7278441085072081e-06,
      "loss": 0.8813,
      "step": 829360
    },
    {
      "epoch": 2.9067350794001268,
      "grad_norm": 3.359375,
      "learning_rate": 1.7271950798435063e-06,
      "loss": 0.9064,
      "step": 829370
    },
    {
      "epoch": 2.9067701269070225,
      "grad_norm": 2.765625,
      "learning_rate": 1.7265460511798045e-06,
      "loss": 0.8077,
      "step": 829380
    },
    {
      "epoch": 2.9068051744139183,
      "grad_norm": 3.09375,
      "learning_rate": 1.7258970225161025e-06,
      "loss": 0.7487,
      "step": 829390
    },
    {
      "epoch": 2.9068402219208136,
      "grad_norm": 2.484375,
      "learning_rate": 1.7252479938524007e-06,
      "loss": 0.8144,
      "step": 829400
    },
    {
      "epoch": 2.9068752694277094,
      "grad_norm": 3.21875,
      "learning_rate": 1.7245989651886985e-06,
      "loss": 0.8706,
      "step": 829410
    },
    {
      "epoch": 2.9069103169346047,
      "grad_norm": 2.75,
      "learning_rate": 1.7239499365249967e-06,
      "loss": 0.843,
      "step": 829420
    },
    {
      "epoch": 2.9069453644415004,
      "grad_norm": 2.53125,
      "learning_rate": 1.7233009078612947e-06,
      "loss": 0.7779,
      "step": 829430
    },
    {
      "epoch": 2.906980411948396,
      "grad_norm": 3.03125,
      "learning_rate": 1.722651879197593e-06,
      "loss": 0.8486,
      "step": 829440
    },
    {
      "epoch": 2.9070154594552915,
      "grad_norm": 2.84375,
      "learning_rate": 1.7220028505338912e-06,
      "loss": 0.8354,
      "step": 829450
    },
    {
      "epoch": 2.9070505069621873,
      "grad_norm": 3.25,
      "learning_rate": 1.7213538218701892e-06,
      "loss": 0.7837,
      "step": 829460
    },
    {
      "epoch": 2.9070855544690826,
      "grad_norm": 2.671875,
      "learning_rate": 1.7207047932064874e-06,
      "loss": 0.7978,
      "step": 829470
    },
    {
      "epoch": 2.9071206019759783,
      "grad_norm": 3.09375,
      "learning_rate": 1.7200557645427852e-06,
      "loss": 0.8061,
      "step": 829480
    },
    {
      "epoch": 2.907155649482874,
      "grad_norm": 3.21875,
      "learning_rate": 1.7194067358790836e-06,
      "loss": 0.8063,
      "step": 829490
    },
    {
      "epoch": 2.90719069698977,
      "grad_norm": 2.921875,
      "learning_rate": 1.7187577072153814e-06,
      "loss": 0.7676,
      "step": 829500
    },
    {
      "epoch": 2.907225744496665,
      "grad_norm": 3.171875,
      "learning_rate": 1.7181086785516796e-06,
      "loss": 0.8707,
      "step": 829510
    },
    {
      "epoch": 2.907260792003561,
      "grad_norm": 2.71875,
      "learning_rate": 1.7174596498879778e-06,
      "loss": 0.7846,
      "step": 829520
    },
    {
      "epoch": 2.9072958395104562,
      "grad_norm": 2.65625,
      "learning_rate": 1.7168106212242758e-06,
      "loss": 0.8082,
      "step": 829530
    },
    {
      "epoch": 2.907330887017352,
      "grad_norm": 2.828125,
      "learning_rate": 1.716161592560574e-06,
      "loss": 0.777,
      "step": 829540
    },
    {
      "epoch": 2.9073659345242477,
      "grad_norm": 3.46875,
      "learning_rate": 1.715512563896872e-06,
      "loss": 0.8642,
      "step": 829550
    },
    {
      "epoch": 2.907400982031143,
      "grad_norm": 2.96875,
      "learning_rate": 1.7148635352331702e-06,
      "loss": 0.7489,
      "step": 829560
    },
    {
      "epoch": 2.907436029538039,
      "grad_norm": 3.15625,
      "learning_rate": 1.714214506569468e-06,
      "loss": 0.829,
      "step": 829570
    },
    {
      "epoch": 2.9074710770449346,
      "grad_norm": 3.03125,
      "learning_rate": 1.7135654779057662e-06,
      "loss": 0.8353,
      "step": 829580
    },
    {
      "epoch": 2.90750612455183,
      "grad_norm": 2.84375,
      "learning_rate": 1.7129164492420647e-06,
      "loss": 0.884,
      "step": 829590
    },
    {
      "epoch": 2.9075411720587256,
      "grad_norm": 3.03125,
      "learning_rate": 1.7122674205783624e-06,
      "loss": 0.7771,
      "step": 829600
    },
    {
      "epoch": 2.9075762195656214,
      "grad_norm": 2.578125,
      "learning_rate": 1.7116183919146606e-06,
      "loss": 0.7442,
      "step": 829610
    },
    {
      "epoch": 2.9076112670725167,
      "grad_norm": 2.96875,
      "learning_rate": 1.7109693632509586e-06,
      "loss": 0.7984,
      "step": 829620
    },
    {
      "epoch": 2.9076463145794125,
      "grad_norm": 2.703125,
      "learning_rate": 1.7103203345872569e-06,
      "loss": 0.7842,
      "step": 829630
    },
    {
      "epoch": 2.907681362086308,
      "grad_norm": 2.90625,
      "learning_rate": 1.7096713059235546e-06,
      "loss": 0.7897,
      "step": 829640
    },
    {
      "epoch": 2.9077164095932035,
      "grad_norm": 2.453125,
      "learning_rate": 1.709022277259853e-06,
      "loss": 0.7767,
      "step": 829650
    },
    {
      "epoch": 2.9077514571000993,
      "grad_norm": 3.328125,
      "learning_rate": 1.7083732485961513e-06,
      "loss": 0.754,
      "step": 829660
    },
    {
      "epoch": 2.9077865046069946,
      "grad_norm": 3.203125,
      "learning_rate": 1.707724219932449e-06,
      "loss": 0.7813,
      "step": 829670
    },
    {
      "epoch": 2.9078215521138904,
      "grad_norm": 3.09375,
      "learning_rate": 1.7070751912687473e-06,
      "loss": 0.7774,
      "step": 829680
    },
    {
      "epoch": 2.907856599620786,
      "grad_norm": 2.796875,
      "learning_rate": 1.7064261626050453e-06,
      "loss": 0.7743,
      "step": 829690
    },
    {
      "epoch": 2.9078916471276814,
      "grad_norm": 2.859375,
      "learning_rate": 1.7057771339413435e-06,
      "loss": 0.768,
      "step": 829700
    },
    {
      "epoch": 2.907926694634577,
      "grad_norm": 3.203125,
      "learning_rate": 1.7051281052776415e-06,
      "loss": 0.8406,
      "step": 829710
    },
    {
      "epoch": 2.907961742141473,
      "grad_norm": 2.796875,
      "learning_rate": 1.7044790766139397e-06,
      "loss": 0.8327,
      "step": 829720
    },
    {
      "epoch": 2.9079967896483683,
      "grad_norm": 3.171875,
      "learning_rate": 1.703830047950238e-06,
      "loss": 0.7718,
      "step": 829730
    },
    {
      "epoch": 2.908031837155264,
      "grad_norm": 3.234375,
      "learning_rate": 1.7031810192865357e-06,
      "loss": 0.8044,
      "step": 829740
    },
    {
      "epoch": 2.9080668846621593,
      "grad_norm": 3.109375,
      "learning_rate": 1.7025319906228341e-06,
      "loss": 0.779,
      "step": 829750
    },
    {
      "epoch": 2.908101932169055,
      "grad_norm": 2.71875,
      "learning_rate": 1.701882961959132e-06,
      "loss": 0.8009,
      "step": 829760
    },
    {
      "epoch": 2.908136979675951,
      "grad_norm": 2.765625,
      "learning_rate": 1.7012339332954301e-06,
      "loss": 0.828,
      "step": 829770
    },
    {
      "epoch": 2.9081720271828466,
      "grad_norm": 2.625,
      "learning_rate": 1.7005849046317281e-06,
      "loss": 0.7925,
      "step": 829780
    },
    {
      "epoch": 2.908207074689742,
      "grad_norm": 3.375,
      "learning_rate": 1.6999358759680263e-06,
      "loss": 0.8498,
      "step": 829790
    },
    {
      "epoch": 2.9082421221966377,
      "grad_norm": 2.84375,
      "learning_rate": 1.6992868473043246e-06,
      "loss": 0.8227,
      "step": 829800
    },
    {
      "epoch": 2.908277169703533,
      "grad_norm": 3.078125,
      "learning_rate": 1.6986378186406226e-06,
      "loss": 0.7934,
      "step": 829810
    },
    {
      "epoch": 2.9083122172104288,
      "grad_norm": 3.265625,
      "learning_rate": 1.6979887899769208e-06,
      "loss": 0.8615,
      "step": 829820
    },
    {
      "epoch": 2.9083472647173245,
      "grad_norm": 2.84375,
      "learning_rate": 1.6973397613132186e-06,
      "loss": 0.7581,
      "step": 829830
    },
    {
      "epoch": 2.90838231222422,
      "grad_norm": 2.96875,
      "learning_rate": 1.6966907326495168e-06,
      "loss": 0.8347,
      "step": 829840
    },
    {
      "epoch": 2.9084173597311156,
      "grad_norm": 2.703125,
      "learning_rate": 1.696041703985815e-06,
      "loss": 0.8266,
      "step": 829850
    },
    {
      "epoch": 2.908452407238011,
      "grad_norm": 3.078125,
      "learning_rate": 1.695392675322113e-06,
      "loss": 0.8292,
      "step": 829860
    },
    {
      "epoch": 2.9084874547449067,
      "grad_norm": 2.671875,
      "learning_rate": 1.6947436466584112e-06,
      "loss": 0.7789,
      "step": 829870
    },
    {
      "epoch": 2.9085225022518024,
      "grad_norm": 3.203125,
      "learning_rate": 1.6940946179947092e-06,
      "loss": 0.7493,
      "step": 829880
    },
    {
      "epoch": 2.908557549758698,
      "grad_norm": 2.875,
      "learning_rate": 1.6934455893310074e-06,
      "loss": 0.795,
      "step": 829890
    },
    {
      "epoch": 2.9085925972655935,
      "grad_norm": 2.921875,
      "learning_rate": 1.6927965606673052e-06,
      "loss": 0.787,
      "step": 829900
    },
    {
      "epoch": 2.9086276447724893,
      "grad_norm": 3.375,
      "learning_rate": 1.6921475320036034e-06,
      "loss": 0.8486,
      "step": 829910
    },
    {
      "epoch": 2.9086626922793846,
      "grad_norm": 3.25,
      "learning_rate": 1.6914985033399018e-06,
      "loss": 0.8925,
      "step": 829920
    },
    {
      "epoch": 2.9086977397862803,
      "grad_norm": 2.78125,
      "learning_rate": 1.6908494746761996e-06,
      "loss": 0.8439,
      "step": 829930
    },
    {
      "epoch": 2.908732787293176,
      "grad_norm": 2.921875,
      "learning_rate": 1.6902004460124978e-06,
      "loss": 0.7827,
      "step": 829940
    },
    {
      "epoch": 2.9087678348000714,
      "grad_norm": 3.5,
      "learning_rate": 1.6895514173487958e-06,
      "loss": 0.8658,
      "step": 829950
    },
    {
      "epoch": 2.908802882306967,
      "grad_norm": 3.03125,
      "learning_rate": 1.688902388685094e-06,
      "loss": 0.8158,
      "step": 829960
    },
    {
      "epoch": 2.9088379298138625,
      "grad_norm": 2.546875,
      "learning_rate": 1.6882533600213918e-06,
      "loss": 0.831,
      "step": 829970
    },
    {
      "epoch": 2.9088729773207582,
      "grad_norm": 3.0625,
      "learning_rate": 1.6876043313576902e-06,
      "loss": 0.7836,
      "step": 829980
    },
    {
      "epoch": 2.908908024827654,
      "grad_norm": 3.09375,
      "learning_rate": 1.6869553026939885e-06,
      "loss": 0.7492,
      "step": 829990
    },
    {
      "epoch": 2.9089430723345497,
      "grad_norm": 2.734375,
      "learning_rate": 1.6863062740302862e-06,
      "loss": 0.7061,
      "step": 830000
    },
    {
      "epoch": 2.9089430723345497,
      "eval_loss": 0.7495379447937012,
      "eval_runtime": 552.8018,
      "eval_samples_per_second": 688.196,
      "eval_steps_per_second": 57.35,
      "step": 830000
    },
    {
      "epoch": 2.908978119841445,
      "grad_norm": 3.078125,
      "learning_rate": 1.6856572453665845e-06,
      "loss": 0.7821,
      "step": 830010
    },
    {
      "epoch": 2.909013167348341,
      "grad_norm": 3.03125,
      "learning_rate": 1.6850082167028825e-06,
      "loss": 0.7974,
      "step": 830020
    },
    {
      "epoch": 2.909048214855236,
      "grad_norm": 2.71875,
      "learning_rate": 1.6843591880391807e-06,
      "loss": 0.8362,
      "step": 830030
    },
    {
      "epoch": 2.909083262362132,
      "grad_norm": 3.0625,
      "learning_rate": 1.6837101593754787e-06,
      "loss": 0.7822,
      "step": 830040
    },
    {
      "epoch": 2.9091183098690276,
      "grad_norm": 2.875,
      "learning_rate": 1.6830611307117769e-06,
      "loss": 0.7386,
      "step": 830050
    },
    {
      "epoch": 2.909153357375923,
      "grad_norm": 2.59375,
      "learning_rate": 1.682412102048075e-06,
      "loss": 0.7435,
      "step": 830060
    },
    {
      "epoch": 2.9091884048828187,
      "grad_norm": 3.234375,
      "learning_rate": 1.6817630733843729e-06,
      "loss": 0.8642,
      "step": 830070
    },
    {
      "epoch": 2.909223452389714,
      "grad_norm": 3.515625,
      "learning_rate": 1.6811140447206713e-06,
      "loss": 0.804,
      "step": 830080
    },
    {
      "epoch": 2.90925849989661,
      "grad_norm": 2.90625,
      "learning_rate": 1.680465016056969e-06,
      "loss": 0.8377,
      "step": 830090
    },
    {
      "epoch": 2.9092935474035055,
      "grad_norm": 3.046875,
      "learning_rate": 1.6798159873932673e-06,
      "loss": 0.7062,
      "step": 830100
    },
    {
      "epoch": 2.9093285949104013,
      "grad_norm": 3.171875,
      "learning_rate": 1.6791669587295653e-06,
      "loss": 0.8287,
      "step": 830110
    },
    {
      "epoch": 2.9093636424172966,
      "grad_norm": 3.140625,
      "learning_rate": 1.6785179300658635e-06,
      "loss": 0.7642,
      "step": 830120
    },
    {
      "epoch": 2.9093986899241924,
      "grad_norm": 2.953125,
      "learning_rate": 1.6778689014021617e-06,
      "loss": 0.7126,
      "step": 830130
    },
    {
      "epoch": 2.9094337374310877,
      "grad_norm": 2.84375,
      "learning_rate": 1.6772198727384597e-06,
      "loss": 0.8097,
      "step": 830140
    },
    {
      "epoch": 2.9094687849379834,
      "grad_norm": 2.859375,
      "learning_rate": 1.676570844074758e-06,
      "loss": 0.8358,
      "step": 830150
    },
    {
      "epoch": 2.909503832444879,
      "grad_norm": 3.171875,
      "learning_rate": 1.6759218154110557e-06,
      "loss": 0.8821,
      "step": 830160
    },
    {
      "epoch": 2.9095388799517745,
      "grad_norm": 3.296875,
      "learning_rate": 1.675272786747354e-06,
      "loss": 0.8758,
      "step": 830170
    },
    {
      "epoch": 2.9095739274586703,
      "grad_norm": 3.140625,
      "learning_rate": 1.674623758083652e-06,
      "loss": 0.7853,
      "step": 830180
    },
    {
      "epoch": 2.9096089749655656,
      "grad_norm": 3.03125,
      "learning_rate": 1.6739747294199501e-06,
      "loss": 0.8341,
      "step": 830190
    },
    {
      "epoch": 2.9096440224724613,
      "grad_norm": 2.6875,
      "learning_rate": 1.6733257007562484e-06,
      "loss": 0.8262,
      "step": 830200
    },
    {
      "epoch": 2.909679069979357,
      "grad_norm": 2.921875,
      "learning_rate": 1.6726766720925464e-06,
      "loss": 0.7714,
      "step": 830210
    },
    {
      "epoch": 2.909714117486253,
      "grad_norm": 2.8125,
      "learning_rate": 1.6720276434288446e-06,
      "loss": 0.8345,
      "step": 830220
    },
    {
      "epoch": 2.909749164993148,
      "grad_norm": 2.921875,
      "learning_rate": 1.6713786147651424e-06,
      "loss": 0.7547,
      "step": 830230
    },
    {
      "epoch": 2.909784212500044,
      "grad_norm": 2.859375,
      "learning_rate": 1.6707295861014408e-06,
      "loss": 0.7909,
      "step": 830240
    },
    {
      "epoch": 2.9098192600069392,
      "grad_norm": 3.234375,
      "learning_rate": 1.670080557437739e-06,
      "loss": 0.9113,
      "step": 830250
    },
    {
      "epoch": 2.909854307513835,
      "grad_norm": 3.078125,
      "learning_rate": 1.6694315287740368e-06,
      "loss": 0.8945,
      "step": 830260
    },
    {
      "epoch": 2.9098893550207308,
      "grad_norm": 2.53125,
      "learning_rate": 1.668782500110335e-06,
      "loss": 0.7733,
      "step": 830270
    },
    {
      "epoch": 2.909924402527626,
      "grad_norm": 2.6875,
      "learning_rate": 1.668133471446633e-06,
      "loss": 0.7437,
      "step": 830280
    },
    {
      "epoch": 2.909959450034522,
      "grad_norm": 2.84375,
      "learning_rate": 1.6674844427829312e-06,
      "loss": 0.8135,
      "step": 830290
    },
    {
      "epoch": 2.909994497541417,
      "grad_norm": 2.78125,
      "learning_rate": 1.6668354141192292e-06,
      "loss": 0.8031,
      "step": 830300
    },
    {
      "epoch": 2.910029545048313,
      "grad_norm": 2.609375,
      "learning_rate": 1.6661863854555274e-06,
      "loss": 0.8012,
      "step": 830310
    },
    {
      "epoch": 2.9100645925552087,
      "grad_norm": 2.5625,
      "learning_rate": 1.6655373567918256e-06,
      "loss": 0.7558,
      "step": 830320
    },
    {
      "epoch": 2.9100996400621044,
      "grad_norm": 2.578125,
      "learning_rate": 1.6648883281281234e-06,
      "loss": 0.8234,
      "step": 830330
    },
    {
      "epoch": 2.9101346875689997,
      "grad_norm": 3.0625,
      "learning_rate": 1.6642392994644218e-06,
      "loss": 0.8206,
      "step": 830340
    },
    {
      "epoch": 2.9101697350758955,
      "grad_norm": 3.3125,
      "learning_rate": 1.6635902708007196e-06,
      "loss": 0.8236,
      "step": 830350
    },
    {
      "epoch": 2.910204782582791,
      "grad_norm": 3.046875,
      "learning_rate": 1.6629412421370178e-06,
      "loss": 0.8058,
      "step": 830360
    },
    {
      "epoch": 2.9102398300896866,
      "grad_norm": 2.9375,
      "learning_rate": 1.6622922134733158e-06,
      "loss": 0.7736,
      "step": 830370
    },
    {
      "epoch": 2.9102748775965823,
      "grad_norm": 2.390625,
      "learning_rate": 1.661643184809614e-06,
      "loss": 0.7803,
      "step": 830380
    },
    {
      "epoch": 2.9103099251034776,
      "grad_norm": 3.109375,
      "learning_rate": 1.6609941561459123e-06,
      "loss": 0.7628,
      "step": 830390
    },
    {
      "epoch": 2.9103449726103734,
      "grad_norm": 3.21875,
      "learning_rate": 1.6603451274822103e-06,
      "loss": 0.811,
      "step": 830400
    },
    {
      "epoch": 2.9103800201172687,
      "grad_norm": 2.953125,
      "learning_rate": 1.6596960988185085e-06,
      "loss": 0.8963,
      "step": 830410
    },
    {
      "epoch": 2.9104150676241645,
      "grad_norm": 3.078125,
      "learning_rate": 1.6590470701548063e-06,
      "loss": 0.8423,
      "step": 830420
    },
    {
      "epoch": 2.9104501151310602,
      "grad_norm": 2.859375,
      "learning_rate": 1.6583980414911045e-06,
      "loss": 0.8152,
      "step": 830430
    },
    {
      "epoch": 2.910485162637956,
      "grad_norm": 2.90625,
      "learning_rate": 1.6577490128274025e-06,
      "loss": 0.7734,
      "step": 830440
    },
    {
      "epoch": 2.9105202101448513,
      "grad_norm": 2.453125,
      "learning_rate": 1.6570999841637007e-06,
      "loss": 0.8184,
      "step": 830450
    },
    {
      "epoch": 2.910555257651747,
      "grad_norm": 2.671875,
      "learning_rate": 1.656450955499999e-06,
      "loss": 0.7104,
      "step": 830460
    },
    {
      "epoch": 2.9105903051586424,
      "grad_norm": 3.3125,
      "learning_rate": 1.655801926836297e-06,
      "loss": 0.8064,
      "step": 830470
    },
    {
      "epoch": 2.910625352665538,
      "grad_norm": 2.96875,
      "learning_rate": 1.6551528981725951e-06,
      "loss": 0.8318,
      "step": 830480
    },
    {
      "epoch": 2.910660400172434,
      "grad_norm": 3.046875,
      "learning_rate": 1.654503869508893e-06,
      "loss": 0.7642,
      "step": 830490
    },
    {
      "epoch": 2.910695447679329,
      "grad_norm": 3.078125,
      "learning_rate": 1.6538548408451911e-06,
      "loss": 0.8001,
      "step": 830500
    },
    {
      "epoch": 2.910730495186225,
      "grad_norm": 2.78125,
      "learning_rate": 1.6532058121814891e-06,
      "loss": 0.8181,
      "step": 830510
    },
    {
      "epoch": 2.9107655426931203,
      "grad_norm": 2.765625,
      "learning_rate": 1.6525567835177873e-06,
      "loss": 0.8108,
      "step": 830520
    },
    {
      "epoch": 2.910800590200016,
      "grad_norm": 2.71875,
      "learning_rate": 1.6519077548540855e-06,
      "loss": 0.7281,
      "step": 830530
    },
    {
      "epoch": 2.910835637706912,
      "grad_norm": 3.203125,
      "learning_rate": 1.6512587261903835e-06,
      "loss": 0.8426,
      "step": 830540
    },
    {
      "epoch": 2.9108706852138075,
      "grad_norm": 2.84375,
      "learning_rate": 1.6506096975266817e-06,
      "loss": 0.8621,
      "step": 830550
    },
    {
      "epoch": 2.910905732720703,
      "grad_norm": 3.03125,
      "learning_rate": 1.6499606688629795e-06,
      "loss": 0.8542,
      "step": 830560
    },
    {
      "epoch": 2.9109407802275986,
      "grad_norm": 2.796875,
      "learning_rate": 1.649311640199278e-06,
      "loss": 0.7768,
      "step": 830570
    },
    {
      "epoch": 2.910975827734494,
      "grad_norm": 2.8125,
      "learning_rate": 1.6486626115355757e-06,
      "loss": 0.7376,
      "step": 830580
    },
    {
      "epoch": 2.9110108752413897,
      "grad_norm": 2.65625,
      "learning_rate": 1.648013582871874e-06,
      "loss": 0.7819,
      "step": 830590
    },
    {
      "epoch": 2.9110459227482854,
      "grad_norm": 2.984375,
      "learning_rate": 1.6473645542081722e-06,
      "loss": 0.782,
      "step": 830600
    },
    {
      "epoch": 2.9110809702551808,
      "grad_norm": 3.5625,
      "learning_rate": 1.6467155255444702e-06,
      "loss": 0.8291,
      "step": 830610
    },
    {
      "epoch": 2.9111160177620765,
      "grad_norm": 2.484375,
      "learning_rate": 1.6460664968807684e-06,
      "loss": 0.8116,
      "step": 830620
    },
    {
      "epoch": 2.911151065268972,
      "grad_norm": 2.78125,
      "learning_rate": 1.6454174682170664e-06,
      "loss": 0.8102,
      "step": 830630
    },
    {
      "epoch": 2.9111861127758676,
      "grad_norm": 2.5625,
      "learning_rate": 1.6447684395533646e-06,
      "loss": 0.7979,
      "step": 830640
    },
    {
      "epoch": 2.9112211602827633,
      "grad_norm": 2.9375,
      "learning_rate": 1.6441194108896624e-06,
      "loss": 0.7887,
      "step": 830650
    },
    {
      "epoch": 2.911256207789659,
      "grad_norm": 3.34375,
      "learning_rate": 1.6434703822259606e-06,
      "loss": 0.8371,
      "step": 830660
    },
    {
      "epoch": 2.9112912552965544,
      "grad_norm": 2.890625,
      "learning_rate": 1.642821353562259e-06,
      "loss": 0.7405,
      "step": 830670
    },
    {
      "epoch": 2.91132630280345,
      "grad_norm": 3.03125,
      "learning_rate": 1.6421723248985568e-06,
      "loss": 0.8402,
      "step": 830680
    },
    {
      "epoch": 2.9113613503103455,
      "grad_norm": 3.328125,
      "learning_rate": 1.641523296234855e-06,
      "loss": 0.8435,
      "step": 830690
    },
    {
      "epoch": 2.9113963978172412,
      "grad_norm": 2.71875,
      "learning_rate": 1.640874267571153e-06,
      "loss": 0.8038,
      "step": 830700
    },
    {
      "epoch": 2.911431445324137,
      "grad_norm": 2.984375,
      "learning_rate": 1.6402252389074512e-06,
      "loss": 0.8348,
      "step": 830710
    },
    {
      "epoch": 2.9114664928310323,
      "grad_norm": 3.125,
      "learning_rate": 1.6395762102437494e-06,
      "loss": 0.7503,
      "step": 830720
    },
    {
      "epoch": 2.911501540337928,
      "grad_norm": 3.4375,
      "learning_rate": 1.6389271815800474e-06,
      "loss": 0.904,
      "step": 830730
    },
    {
      "epoch": 2.9115365878448234,
      "grad_norm": 2.96875,
      "learning_rate": 1.6382781529163456e-06,
      "loss": 0.8398,
      "step": 830740
    },
    {
      "epoch": 2.911571635351719,
      "grad_norm": 2.796875,
      "learning_rate": 1.6376291242526434e-06,
      "loss": 0.807,
      "step": 830750
    },
    {
      "epoch": 2.911606682858615,
      "grad_norm": 2.75,
      "learning_rate": 1.6369800955889416e-06,
      "loss": 0.7499,
      "step": 830760
    },
    {
      "epoch": 2.9116417303655107,
      "grad_norm": 2.9375,
      "learning_rate": 1.6363310669252396e-06,
      "loss": 0.6975,
      "step": 830770
    },
    {
      "epoch": 2.911676777872406,
      "grad_norm": 2.734375,
      "learning_rate": 1.6356820382615379e-06,
      "loss": 0.6856,
      "step": 830780
    },
    {
      "epoch": 2.9117118253793017,
      "grad_norm": 3.0,
      "learning_rate": 1.635033009597836e-06,
      "loss": 0.7796,
      "step": 830790
    },
    {
      "epoch": 2.911746872886197,
      "grad_norm": 2.734375,
      "learning_rate": 1.634383980934134e-06,
      "loss": 0.6453,
      "step": 830800
    },
    {
      "epoch": 2.911781920393093,
      "grad_norm": 2.796875,
      "learning_rate": 1.6337349522704323e-06,
      "loss": 0.74,
      "step": 830810
    },
    {
      "epoch": 2.9118169678999886,
      "grad_norm": 2.765625,
      "learning_rate": 1.63308592360673e-06,
      "loss": 0.7338,
      "step": 830820
    },
    {
      "epoch": 2.911852015406884,
      "grad_norm": 2.921875,
      "learning_rate": 1.6324368949430285e-06,
      "loss": 0.8246,
      "step": 830830
    },
    {
      "epoch": 2.9118870629137796,
      "grad_norm": 2.828125,
      "learning_rate": 1.6317878662793263e-06,
      "loss": 0.8505,
      "step": 830840
    },
    {
      "epoch": 2.9119221104206754,
      "grad_norm": 2.796875,
      "learning_rate": 1.6311388376156245e-06,
      "loss": 0.7696,
      "step": 830850
    },
    {
      "epoch": 2.9119571579275707,
      "grad_norm": 3.359375,
      "learning_rate": 1.6304898089519227e-06,
      "loss": 0.7729,
      "step": 830860
    },
    {
      "epoch": 2.9119922054344665,
      "grad_norm": 2.984375,
      "learning_rate": 1.6298407802882207e-06,
      "loss": 0.7722,
      "step": 830870
    },
    {
      "epoch": 2.912027252941362,
      "grad_norm": 2.8125,
      "learning_rate": 1.629191751624519e-06,
      "loss": 0.8413,
      "step": 830880
    },
    {
      "epoch": 2.9120623004482575,
      "grad_norm": 2.546875,
      "learning_rate": 1.628542722960817e-06,
      "loss": 0.7998,
      "step": 830890
    },
    {
      "epoch": 2.9120973479551533,
      "grad_norm": 2.796875,
      "learning_rate": 1.6278936942971151e-06,
      "loss": 0.8278,
      "step": 830900
    },
    {
      "epoch": 2.9121323954620486,
      "grad_norm": 2.875,
      "learning_rate": 1.627244665633413e-06,
      "loss": 0.851,
      "step": 830910
    },
    {
      "epoch": 2.9121674429689444,
      "grad_norm": 2.796875,
      "learning_rate": 1.6265956369697111e-06,
      "loss": 0.7502,
      "step": 830920
    },
    {
      "epoch": 2.91220249047584,
      "grad_norm": 2.75,
      "learning_rate": 1.6259466083060096e-06,
      "loss": 0.8019,
      "step": 830930
    },
    {
      "epoch": 2.9122375379827354,
      "grad_norm": 2.96875,
      "learning_rate": 1.6252975796423073e-06,
      "loss": 0.8068,
      "step": 830940
    },
    {
      "epoch": 2.912272585489631,
      "grad_norm": 2.703125,
      "learning_rate": 1.6246485509786056e-06,
      "loss": 0.7852,
      "step": 830950
    },
    {
      "epoch": 2.912307632996527,
      "grad_norm": 2.8125,
      "learning_rate": 1.6239995223149036e-06,
      "loss": 0.7708,
      "step": 830960
    },
    {
      "epoch": 2.9123426805034223,
      "grad_norm": 3.03125,
      "learning_rate": 1.6233504936512018e-06,
      "loss": 0.8146,
      "step": 830970
    },
    {
      "epoch": 2.912377728010318,
      "grad_norm": 2.984375,
      "learning_rate": 1.6227014649874996e-06,
      "loss": 0.6733,
      "step": 830980
    },
    {
      "epoch": 2.912412775517214,
      "grad_norm": 2.671875,
      "learning_rate": 1.622052436323798e-06,
      "loss": 0.6931,
      "step": 830990
    },
    {
      "epoch": 2.912447823024109,
      "grad_norm": 2.984375,
      "learning_rate": 1.6214034076600962e-06,
      "loss": 0.8556,
      "step": 831000
    },
    {
      "epoch": 2.912482870531005,
      "grad_norm": 2.828125,
      "learning_rate": 1.620754378996394e-06,
      "loss": 0.7103,
      "step": 831010
    },
    {
      "epoch": 2.9125179180379,
      "grad_norm": 2.890625,
      "learning_rate": 1.6201053503326922e-06,
      "loss": 0.7421,
      "step": 831020
    },
    {
      "epoch": 2.912552965544796,
      "grad_norm": 2.71875,
      "learning_rate": 1.6194563216689902e-06,
      "loss": 0.8649,
      "step": 831030
    },
    {
      "epoch": 2.9125880130516917,
      "grad_norm": 2.546875,
      "learning_rate": 1.6188072930052884e-06,
      "loss": 0.8062,
      "step": 831040
    },
    {
      "epoch": 2.9126230605585874,
      "grad_norm": 2.453125,
      "learning_rate": 1.6181582643415864e-06,
      "loss": 0.7737,
      "step": 831050
    },
    {
      "epoch": 2.9126581080654828,
      "grad_norm": 2.8125,
      "learning_rate": 1.6175092356778846e-06,
      "loss": 0.7009,
      "step": 831060
    },
    {
      "epoch": 2.9126931555723785,
      "grad_norm": 3.046875,
      "learning_rate": 1.6168602070141828e-06,
      "loss": 0.834,
      "step": 831070
    },
    {
      "epoch": 2.912728203079274,
      "grad_norm": 3.234375,
      "learning_rate": 1.6162111783504806e-06,
      "loss": 0.7821,
      "step": 831080
    },
    {
      "epoch": 2.9127632505861696,
      "grad_norm": 3.0625,
      "learning_rate": 1.6155621496867788e-06,
      "loss": 0.8847,
      "step": 831090
    },
    {
      "epoch": 2.9127982980930653,
      "grad_norm": 2.828125,
      "learning_rate": 1.6149131210230768e-06,
      "loss": 0.8371,
      "step": 831100
    },
    {
      "epoch": 2.9128333455999607,
      "grad_norm": 2.546875,
      "learning_rate": 1.614264092359375e-06,
      "loss": 0.7143,
      "step": 831110
    },
    {
      "epoch": 2.9128683931068564,
      "grad_norm": 3.234375,
      "learning_rate": 1.613615063695673e-06,
      "loss": 0.8008,
      "step": 831120
    },
    {
      "epoch": 2.9129034406137517,
      "grad_norm": 3.0,
      "learning_rate": 1.6129660350319712e-06,
      "loss": 0.7926,
      "step": 831130
    },
    {
      "epoch": 2.9129384881206475,
      "grad_norm": 3.125,
      "learning_rate": 1.6123170063682695e-06,
      "loss": 0.8162,
      "step": 831140
    },
    {
      "epoch": 2.9129735356275432,
      "grad_norm": 2.90625,
      "learning_rate": 1.6116679777045672e-06,
      "loss": 0.7399,
      "step": 831150
    },
    {
      "epoch": 2.913008583134439,
      "grad_norm": 2.953125,
      "learning_rate": 1.6110189490408657e-06,
      "loss": 0.7998,
      "step": 831160
    },
    {
      "epoch": 2.9130436306413343,
      "grad_norm": 2.828125,
      "learning_rate": 1.6103699203771635e-06,
      "loss": 0.8271,
      "step": 831170
    },
    {
      "epoch": 2.91307867814823,
      "grad_norm": 3.03125,
      "learning_rate": 1.6097208917134617e-06,
      "loss": 0.7902,
      "step": 831180
    },
    {
      "epoch": 2.9131137256551254,
      "grad_norm": 2.328125,
      "learning_rate": 1.6090718630497599e-06,
      "loss": 0.7117,
      "step": 831190
    },
    {
      "epoch": 2.913148773162021,
      "grad_norm": 2.90625,
      "learning_rate": 1.6084228343860579e-06,
      "loss": 0.7371,
      "step": 831200
    },
    {
      "epoch": 2.913183820668917,
      "grad_norm": 3.09375,
      "learning_rate": 1.607773805722356e-06,
      "loss": 0.7932,
      "step": 831210
    },
    {
      "epoch": 2.913218868175812,
      "grad_norm": 2.75,
      "learning_rate": 1.607124777058654e-06,
      "loss": 0.8365,
      "step": 831220
    },
    {
      "epoch": 2.913253915682708,
      "grad_norm": 2.390625,
      "learning_rate": 1.6064757483949523e-06,
      "loss": 0.8321,
      "step": 831230
    },
    {
      "epoch": 2.9132889631896033,
      "grad_norm": 2.71875,
      "learning_rate": 1.60582671973125e-06,
      "loss": 0.7344,
      "step": 831240
    },
    {
      "epoch": 2.913324010696499,
      "grad_norm": 2.390625,
      "learning_rate": 1.6051776910675483e-06,
      "loss": 0.7736,
      "step": 831250
    },
    {
      "epoch": 2.913359058203395,
      "grad_norm": 2.890625,
      "learning_rate": 1.6045286624038467e-06,
      "loss": 0.795,
      "step": 831260
    },
    {
      "epoch": 2.9133941057102906,
      "grad_norm": 3.171875,
      "learning_rate": 1.6038796337401445e-06,
      "loss": 0.8282,
      "step": 831270
    },
    {
      "epoch": 2.913429153217186,
      "grad_norm": 2.515625,
      "learning_rate": 1.6032306050764427e-06,
      "loss": 0.8277,
      "step": 831280
    },
    {
      "epoch": 2.9134642007240816,
      "grad_norm": 3.328125,
      "learning_rate": 1.6025815764127407e-06,
      "loss": 0.7888,
      "step": 831290
    },
    {
      "epoch": 2.913499248230977,
      "grad_norm": 2.65625,
      "learning_rate": 1.601932547749039e-06,
      "loss": 0.8747,
      "step": 831300
    },
    {
      "epoch": 2.9135342957378727,
      "grad_norm": 3.203125,
      "learning_rate": 1.6012835190853367e-06,
      "loss": 0.9011,
      "step": 831310
    },
    {
      "epoch": 2.9135693432447685,
      "grad_norm": 3.3125,
      "learning_rate": 1.6006344904216351e-06,
      "loss": 0.8613,
      "step": 831320
    },
    {
      "epoch": 2.9136043907516638,
      "grad_norm": 3.609375,
      "learning_rate": 1.5999854617579334e-06,
      "loss": 0.8248,
      "step": 831330
    },
    {
      "epoch": 2.9136394382585595,
      "grad_norm": 3.109375,
      "learning_rate": 1.5993364330942311e-06,
      "loss": 0.8259,
      "step": 831340
    },
    {
      "epoch": 2.913674485765455,
      "grad_norm": 2.890625,
      "learning_rate": 1.5986874044305294e-06,
      "loss": 0.7741,
      "step": 831350
    },
    {
      "epoch": 2.9137095332723506,
      "grad_norm": 3.09375,
      "learning_rate": 1.5980383757668274e-06,
      "loss": 0.8157,
      "step": 831360
    },
    {
      "epoch": 2.9137445807792464,
      "grad_norm": 3.125,
      "learning_rate": 1.5973893471031256e-06,
      "loss": 0.8738,
      "step": 831370
    },
    {
      "epoch": 2.913779628286142,
      "grad_norm": 2.71875,
      "learning_rate": 1.5967403184394236e-06,
      "loss": 0.7856,
      "step": 831380
    },
    {
      "epoch": 2.9138146757930374,
      "grad_norm": 2.96875,
      "learning_rate": 1.5960912897757218e-06,
      "loss": 0.8217,
      "step": 831390
    },
    {
      "epoch": 2.913849723299933,
      "grad_norm": 2.625,
      "learning_rate": 1.59544226111202e-06,
      "loss": 0.8752,
      "step": 831400
    },
    {
      "epoch": 2.9138847708068285,
      "grad_norm": 3.015625,
      "learning_rate": 1.5947932324483178e-06,
      "loss": 0.8119,
      "step": 831410
    },
    {
      "epoch": 2.9139198183137243,
      "grad_norm": 3.078125,
      "learning_rate": 1.5941442037846162e-06,
      "loss": 0.7907,
      "step": 831420
    },
    {
      "epoch": 2.91395486582062,
      "grad_norm": 2.890625,
      "learning_rate": 1.593495175120914e-06,
      "loss": 0.8053,
      "step": 831430
    },
    {
      "epoch": 2.9139899133275153,
      "grad_norm": 3.09375,
      "learning_rate": 1.5928461464572122e-06,
      "loss": 0.8504,
      "step": 831440
    },
    {
      "epoch": 2.914024960834411,
      "grad_norm": 2.828125,
      "learning_rate": 1.5921971177935102e-06,
      "loss": 0.7391,
      "step": 831450
    },
    {
      "epoch": 2.9140600083413064,
      "grad_norm": 3.0,
      "learning_rate": 1.5915480891298084e-06,
      "loss": 0.7469,
      "step": 831460
    },
    {
      "epoch": 2.914095055848202,
      "grad_norm": 3.109375,
      "learning_rate": 1.5908990604661066e-06,
      "loss": 0.82,
      "step": 831470
    },
    {
      "epoch": 2.914130103355098,
      "grad_norm": 2.796875,
      "learning_rate": 1.5902500318024046e-06,
      "loss": 0.7246,
      "step": 831480
    },
    {
      "epoch": 2.9141651508619937,
      "grad_norm": 3.34375,
      "learning_rate": 1.5896010031387028e-06,
      "loss": 0.8624,
      "step": 831490
    },
    {
      "epoch": 2.914200198368889,
      "grad_norm": 2.796875,
      "learning_rate": 1.5889519744750006e-06,
      "loss": 0.8346,
      "step": 831500
    },
    {
      "epoch": 2.9142352458757848,
      "grad_norm": 3.0625,
      "learning_rate": 1.5883029458112988e-06,
      "loss": 0.8843,
      "step": 831510
    },
    {
      "epoch": 2.91427029338268,
      "grad_norm": 2.75,
      "learning_rate": 1.5876539171475968e-06,
      "loss": 0.8628,
      "step": 831520
    },
    {
      "epoch": 2.914305340889576,
      "grad_norm": 3.125,
      "learning_rate": 1.587004888483895e-06,
      "loss": 0.8167,
      "step": 831530
    },
    {
      "epoch": 2.9143403883964716,
      "grad_norm": 2.921875,
      "learning_rate": 1.5863558598201933e-06,
      "loss": 0.7461,
      "step": 831540
    },
    {
      "epoch": 2.914375435903367,
      "grad_norm": 2.46875,
      "learning_rate": 1.5857068311564913e-06,
      "loss": 0.7281,
      "step": 831550
    },
    {
      "epoch": 2.9144104834102627,
      "grad_norm": 2.953125,
      "learning_rate": 1.5850578024927895e-06,
      "loss": 0.7998,
      "step": 831560
    },
    {
      "epoch": 2.914445530917158,
      "grad_norm": 3.15625,
      "learning_rate": 1.5844087738290873e-06,
      "loss": 0.7533,
      "step": 831570
    },
    {
      "epoch": 2.9144805784240537,
      "grad_norm": 3.0,
      "learning_rate": 1.5837597451653857e-06,
      "loss": 0.8363,
      "step": 831580
    },
    {
      "epoch": 2.9145156259309495,
      "grad_norm": 3.078125,
      "learning_rate": 1.583110716501684e-06,
      "loss": 0.7314,
      "step": 831590
    },
    {
      "epoch": 2.9145506734378452,
      "grad_norm": 3.390625,
      "learning_rate": 1.5824616878379817e-06,
      "loss": 0.8368,
      "step": 831600
    },
    {
      "epoch": 2.9145857209447406,
      "grad_norm": 2.578125,
      "learning_rate": 1.58181265917428e-06,
      "loss": 0.8443,
      "step": 831610
    },
    {
      "epoch": 2.9146207684516363,
      "grad_norm": 3.140625,
      "learning_rate": 1.581163630510578e-06,
      "loss": 0.8682,
      "step": 831620
    },
    {
      "epoch": 2.9146558159585316,
      "grad_norm": 3.0,
      "learning_rate": 1.5805146018468761e-06,
      "loss": 0.7278,
      "step": 831630
    },
    {
      "epoch": 2.9146908634654274,
      "grad_norm": 2.578125,
      "learning_rate": 1.5798655731831741e-06,
      "loss": 0.7552,
      "step": 831640
    },
    {
      "epoch": 2.914725910972323,
      "grad_norm": 3.265625,
      "learning_rate": 1.5792165445194723e-06,
      "loss": 0.866,
      "step": 831650
    },
    {
      "epoch": 2.9147609584792185,
      "grad_norm": 2.53125,
      "learning_rate": 1.5785675158557705e-06,
      "loss": 0.8208,
      "step": 831660
    },
    {
      "epoch": 2.914796005986114,
      "grad_norm": 2.875,
      "learning_rate": 1.5779184871920683e-06,
      "loss": 0.8198,
      "step": 831670
    },
    {
      "epoch": 2.9148310534930095,
      "grad_norm": 2.921875,
      "learning_rate": 1.5772694585283665e-06,
      "loss": 0.8136,
      "step": 831680
    },
    {
      "epoch": 2.9148661009999053,
      "grad_norm": 3.0,
      "learning_rate": 1.5766204298646645e-06,
      "loss": 0.8343,
      "step": 831690
    },
    {
      "epoch": 2.914901148506801,
      "grad_norm": 2.890625,
      "learning_rate": 1.5759714012009627e-06,
      "loss": 0.7938,
      "step": 831700
    },
    {
      "epoch": 2.914936196013697,
      "grad_norm": 2.53125,
      "learning_rate": 1.5753223725372607e-06,
      "loss": 0.7132,
      "step": 831710
    },
    {
      "epoch": 2.914971243520592,
      "grad_norm": 2.828125,
      "learning_rate": 1.574673343873559e-06,
      "loss": 0.6937,
      "step": 831720
    },
    {
      "epoch": 2.915006291027488,
      "grad_norm": 2.953125,
      "learning_rate": 1.5740243152098572e-06,
      "loss": 0.8344,
      "step": 831730
    },
    {
      "epoch": 2.915041338534383,
      "grad_norm": 3.25,
      "learning_rate": 1.573375286546155e-06,
      "loss": 0.7505,
      "step": 831740
    },
    {
      "epoch": 2.915076386041279,
      "grad_norm": 3.421875,
      "learning_rate": 1.5727262578824534e-06,
      "loss": 0.8225,
      "step": 831750
    },
    {
      "epoch": 2.9151114335481747,
      "grad_norm": 3.28125,
      "learning_rate": 1.5720772292187512e-06,
      "loss": 0.8386,
      "step": 831760
    },
    {
      "epoch": 2.91514648105507,
      "grad_norm": 3.171875,
      "learning_rate": 1.5714282005550494e-06,
      "loss": 0.7914,
      "step": 831770
    },
    {
      "epoch": 2.9151815285619658,
      "grad_norm": 2.96875,
      "learning_rate": 1.5707791718913474e-06,
      "loss": 0.8361,
      "step": 831780
    },
    {
      "epoch": 2.915216576068861,
      "grad_norm": 2.859375,
      "learning_rate": 1.5701301432276456e-06,
      "loss": 0.8009,
      "step": 831790
    },
    {
      "epoch": 2.915251623575757,
      "grad_norm": 2.890625,
      "learning_rate": 1.5694811145639438e-06,
      "loss": 0.7363,
      "step": 831800
    },
    {
      "epoch": 2.9152866710826526,
      "grad_norm": 2.59375,
      "learning_rate": 1.5688320859002418e-06,
      "loss": 0.8354,
      "step": 831810
    },
    {
      "epoch": 2.9153217185895484,
      "grad_norm": 3.3125,
      "learning_rate": 1.56818305723654e-06,
      "loss": 0.9308,
      "step": 831820
    },
    {
      "epoch": 2.9153567660964437,
      "grad_norm": 3.015625,
      "learning_rate": 1.5675340285728378e-06,
      "loss": 0.7828,
      "step": 831830
    },
    {
      "epoch": 2.9153918136033394,
      "grad_norm": 2.90625,
      "learning_rate": 1.566884999909136e-06,
      "loss": 0.7524,
      "step": 831840
    },
    {
      "epoch": 2.9154268611102347,
      "grad_norm": 3.109375,
      "learning_rate": 1.566235971245434e-06,
      "loss": 0.8631,
      "step": 831850
    },
    {
      "epoch": 2.9154619086171305,
      "grad_norm": 3.0,
      "learning_rate": 1.5655869425817322e-06,
      "loss": 0.7858,
      "step": 831860
    },
    {
      "epoch": 2.9154969561240263,
      "grad_norm": 3.1875,
      "learning_rate": 1.5649379139180304e-06,
      "loss": 0.9149,
      "step": 831870
    },
    {
      "epoch": 2.9155320036309216,
      "grad_norm": 2.75,
      "learning_rate": 1.5642888852543284e-06,
      "loss": 0.7748,
      "step": 831880
    },
    {
      "epoch": 2.9155670511378173,
      "grad_norm": 3.328125,
      "learning_rate": 1.5636398565906266e-06,
      "loss": 0.7182,
      "step": 831890
    },
    {
      "epoch": 2.9156020986447126,
      "grad_norm": 2.734375,
      "learning_rate": 1.5629908279269244e-06,
      "loss": 0.8492,
      "step": 831900
    },
    {
      "epoch": 2.9156371461516084,
      "grad_norm": 3.578125,
      "learning_rate": 1.5623417992632229e-06,
      "loss": 0.8627,
      "step": 831910
    },
    {
      "epoch": 2.915672193658504,
      "grad_norm": 2.796875,
      "learning_rate": 1.5616927705995209e-06,
      "loss": 0.8447,
      "step": 831920
    },
    {
      "epoch": 2.9157072411654,
      "grad_norm": 2.96875,
      "learning_rate": 1.5610437419358189e-06,
      "loss": 0.8402,
      "step": 831930
    },
    {
      "epoch": 2.9157422886722952,
      "grad_norm": 3.265625,
      "learning_rate": 1.560394713272117e-06,
      "loss": 0.7891,
      "step": 831940
    },
    {
      "epoch": 2.915777336179191,
      "grad_norm": 2.453125,
      "learning_rate": 1.559745684608415e-06,
      "loss": 0.7939,
      "step": 831950
    },
    {
      "epoch": 2.9158123836860863,
      "grad_norm": 2.53125,
      "learning_rate": 1.5590966559447133e-06,
      "loss": 0.8034,
      "step": 831960
    },
    {
      "epoch": 2.915847431192982,
      "grad_norm": 3.046875,
      "learning_rate": 1.5584476272810113e-06,
      "loss": 0.7637,
      "step": 831970
    },
    {
      "epoch": 2.915882478699878,
      "grad_norm": 2.796875,
      "learning_rate": 1.5577985986173095e-06,
      "loss": 0.74,
      "step": 831980
    },
    {
      "epoch": 2.915917526206773,
      "grad_norm": 2.78125,
      "learning_rate": 1.5571495699536075e-06,
      "loss": 0.8345,
      "step": 831990
    },
    {
      "epoch": 2.915952573713669,
      "grad_norm": 2.40625,
      "learning_rate": 1.5565005412899055e-06,
      "loss": 0.7473,
      "step": 832000
    },
    {
      "epoch": 2.915987621220564,
      "grad_norm": 2.734375,
      "learning_rate": 1.5558515126262037e-06,
      "loss": 0.7599,
      "step": 832010
    },
    {
      "epoch": 2.91602266872746,
      "grad_norm": 2.9375,
      "learning_rate": 1.5552024839625017e-06,
      "loss": 0.778,
      "step": 832020
    },
    {
      "epoch": 2.9160577162343557,
      "grad_norm": 2.90625,
      "learning_rate": 1.5545534552988e-06,
      "loss": 0.7984,
      "step": 832030
    },
    {
      "epoch": 2.9160927637412515,
      "grad_norm": 3.234375,
      "learning_rate": 1.5539044266350981e-06,
      "loss": 0.7199,
      "step": 832040
    },
    {
      "epoch": 2.916127811248147,
      "grad_norm": 2.578125,
      "learning_rate": 1.5532553979713961e-06,
      "loss": 0.771,
      "step": 832050
    },
    {
      "epoch": 2.9161628587550426,
      "grad_norm": 2.65625,
      "learning_rate": 1.5526063693076941e-06,
      "loss": 0.8123,
      "step": 832060
    },
    {
      "epoch": 2.916197906261938,
      "grad_norm": 2.515625,
      "learning_rate": 1.5519573406439923e-06,
      "loss": 0.8215,
      "step": 832070
    },
    {
      "epoch": 2.9162329537688336,
      "grad_norm": 2.484375,
      "learning_rate": 1.5513083119802903e-06,
      "loss": 0.8056,
      "step": 832080
    },
    {
      "epoch": 2.9162680012757294,
      "grad_norm": 3.03125,
      "learning_rate": 1.5506592833165883e-06,
      "loss": 0.8487,
      "step": 832090
    },
    {
      "epoch": 2.9163030487826247,
      "grad_norm": 2.546875,
      "learning_rate": 1.5500102546528866e-06,
      "loss": 0.7578,
      "step": 832100
    },
    {
      "epoch": 2.9163380962895205,
      "grad_norm": 2.734375,
      "learning_rate": 1.5493612259891848e-06,
      "loss": 0.7621,
      "step": 832110
    },
    {
      "epoch": 2.9163731437964158,
      "grad_norm": 3.25,
      "learning_rate": 1.5487121973254828e-06,
      "loss": 0.8538,
      "step": 832120
    },
    {
      "epoch": 2.9164081913033115,
      "grad_norm": 2.546875,
      "learning_rate": 1.5480631686617808e-06,
      "loss": 0.8728,
      "step": 832130
    },
    {
      "epoch": 2.9164432388102073,
      "grad_norm": 2.6875,
      "learning_rate": 1.547414139998079e-06,
      "loss": 0.8169,
      "step": 832140
    },
    {
      "epoch": 2.916478286317103,
      "grad_norm": 2.65625,
      "learning_rate": 1.546765111334377e-06,
      "loss": 0.8067,
      "step": 832150
    },
    {
      "epoch": 2.9165133338239984,
      "grad_norm": 2.828125,
      "learning_rate": 1.5461160826706752e-06,
      "loss": 0.7766,
      "step": 832160
    },
    {
      "epoch": 2.916548381330894,
      "grad_norm": 2.609375,
      "learning_rate": 1.5454670540069734e-06,
      "loss": 0.7568,
      "step": 832170
    },
    {
      "epoch": 2.9165834288377894,
      "grad_norm": 2.734375,
      "learning_rate": 1.5448180253432714e-06,
      "loss": 0.842,
      "step": 832180
    },
    {
      "epoch": 2.916618476344685,
      "grad_norm": 2.828125,
      "learning_rate": 1.5441689966795694e-06,
      "loss": 0.7124,
      "step": 832190
    },
    {
      "epoch": 2.916653523851581,
      "grad_norm": 3.296875,
      "learning_rate": 1.5435199680158676e-06,
      "loss": 0.8759,
      "step": 832200
    },
    {
      "epoch": 2.9166885713584763,
      "grad_norm": 3.03125,
      "learning_rate": 1.5428709393521656e-06,
      "loss": 0.7325,
      "step": 832210
    },
    {
      "epoch": 2.916723618865372,
      "grad_norm": 3.0625,
      "learning_rate": 1.5422219106884636e-06,
      "loss": 0.8535,
      "step": 832220
    },
    {
      "epoch": 2.9167586663722678,
      "grad_norm": 3.1875,
      "learning_rate": 1.5415728820247618e-06,
      "loss": 0.8776,
      "step": 832230
    },
    {
      "epoch": 2.916793713879163,
      "grad_norm": 2.828125,
      "learning_rate": 1.54092385336106e-06,
      "loss": 0.8763,
      "step": 832240
    },
    {
      "epoch": 2.916828761386059,
      "grad_norm": 3.03125,
      "learning_rate": 1.540274824697358e-06,
      "loss": 0.7723,
      "step": 832250
    },
    {
      "epoch": 2.9168638088929546,
      "grad_norm": 2.828125,
      "learning_rate": 1.539625796033656e-06,
      "loss": 0.7573,
      "step": 832260
    },
    {
      "epoch": 2.91689885639985,
      "grad_norm": 3.078125,
      "learning_rate": 1.5389767673699542e-06,
      "loss": 0.8817,
      "step": 832270
    },
    {
      "epoch": 2.9169339039067457,
      "grad_norm": 3.046875,
      "learning_rate": 1.5383277387062522e-06,
      "loss": 0.7897,
      "step": 832280
    },
    {
      "epoch": 2.916968951413641,
      "grad_norm": 2.40625,
      "learning_rate": 1.5376787100425502e-06,
      "loss": 0.7667,
      "step": 832290
    },
    {
      "epoch": 2.9170039989205367,
      "grad_norm": 3.171875,
      "learning_rate": 1.5370296813788485e-06,
      "loss": 0.7971,
      "step": 832300
    },
    {
      "epoch": 2.9170390464274325,
      "grad_norm": 3.03125,
      "learning_rate": 1.5363806527151467e-06,
      "loss": 0.8512,
      "step": 832310
    },
    {
      "epoch": 2.917074093934328,
      "grad_norm": 2.78125,
      "learning_rate": 1.5357316240514447e-06,
      "loss": 0.7021,
      "step": 832320
    },
    {
      "epoch": 2.9171091414412236,
      "grad_norm": 2.734375,
      "learning_rate": 1.5350825953877427e-06,
      "loss": 0.7792,
      "step": 832330
    },
    {
      "epoch": 2.9171441889481193,
      "grad_norm": 3.0,
      "learning_rate": 1.5344335667240409e-06,
      "loss": 0.8288,
      "step": 832340
    },
    {
      "epoch": 2.9171792364550146,
      "grad_norm": 2.59375,
      "learning_rate": 1.5337845380603389e-06,
      "loss": 0.8317,
      "step": 832350
    },
    {
      "epoch": 2.9172142839619104,
      "grad_norm": 2.890625,
      "learning_rate": 1.533135509396637e-06,
      "loss": 0.797,
      "step": 832360
    },
    {
      "epoch": 2.917249331468806,
      "grad_norm": 2.6875,
      "learning_rate": 1.5324864807329353e-06,
      "loss": 0.7558,
      "step": 832370
    },
    {
      "epoch": 2.9172843789757015,
      "grad_norm": 3.25,
      "learning_rate": 1.5318374520692333e-06,
      "loss": 0.7988,
      "step": 832380
    },
    {
      "epoch": 2.9173194264825972,
      "grad_norm": 2.546875,
      "learning_rate": 1.5311884234055313e-06,
      "loss": 0.7916,
      "step": 832390
    },
    {
      "epoch": 2.9173544739894925,
      "grad_norm": 3.1875,
      "learning_rate": 1.5305393947418295e-06,
      "loss": 0.8483,
      "step": 832400
    },
    {
      "epoch": 2.9173895214963883,
      "grad_norm": 2.921875,
      "learning_rate": 1.5298903660781275e-06,
      "loss": 0.7635,
      "step": 832410
    },
    {
      "epoch": 2.917424569003284,
      "grad_norm": 3.078125,
      "learning_rate": 1.5292413374144255e-06,
      "loss": 0.7867,
      "step": 832420
    },
    {
      "epoch": 2.91745961651018,
      "grad_norm": 2.875,
      "learning_rate": 1.5285923087507237e-06,
      "loss": 0.806,
      "step": 832430
    },
    {
      "epoch": 2.917494664017075,
      "grad_norm": 2.921875,
      "learning_rate": 1.527943280087022e-06,
      "loss": 0.8062,
      "step": 832440
    },
    {
      "epoch": 2.917529711523971,
      "grad_norm": 3.140625,
      "learning_rate": 1.52729425142332e-06,
      "loss": 0.8736,
      "step": 832450
    },
    {
      "epoch": 2.917564759030866,
      "grad_norm": 3.0,
      "learning_rate": 1.526645222759618e-06,
      "loss": 0.8083,
      "step": 832460
    },
    {
      "epoch": 2.917599806537762,
      "grad_norm": 2.578125,
      "learning_rate": 1.5259961940959161e-06,
      "loss": 0.7191,
      "step": 832470
    },
    {
      "epoch": 2.9176348540446577,
      "grad_norm": 2.765625,
      "learning_rate": 1.5253471654322141e-06,
      "loss": 0.7494,
      "step": 832480
    },
    {
      "epoch": 2.917669901551553,
      "grad_norm": 2.890625,
      "learning_rate": 1.5246981367685121e-06,
      "loss": 0.7787,
      "step": 832490
    },
    {
      "epoch": 2.917704949058449,
      "grad_norm": 2.9375,
      "learning_rate": 1.5240491081048106e-06,
      "loss": 0.7859,
      "step": 832500
    },
    {
      "epoch": 2.917739996565344,
      "grad_norm": 2.6875,
      "learning_rate": 1.5234000794411086e-06,
      "loss": 0.7849,
      "step": 832510
    },
    {
      "epoch": 2.91777504407224,
      "grad_norm": 2.71875,
      "learning_rate": 1.5227510507774066e-06,
      "loss": 0.8192,
      "step": 832520
    },
    {
      "epoch": 2.9178100915791356,
      "grad_norm": 3.453125,
      "learning_rate": 1.5221020221137048e-06,
      "loss": 0.8076,
      "step": 832530
    },
    {
      "epoch": 2.9178451390860314,
      "grad_norm": 3.515625,
      "learning_rate": 1.5214529934500028e-06,
      "loss": 0.8169,
      "step": 832540
    },
    {
      "epoch": 2.9178801865929267,
      "grad_norm": 2.875,
      "learning_rate": 1.5208039647863008e-06,
      "loss": 0.8236,
      "step": 832550
    },
    {
      "epoch": 2.9179152340998225,
      "grad_norm": 2.953125,
      "learning_rate": 1.520154936122599e-06,
      "loss": 0.805,
      "step": 832560
    },
    {
      "epoch": 2.9179502816067178,
      "grad_norm": 3.296875,
      "learning_rate": 1.5195059074588972e-06,
      "loss": 0.8355,
      "step": 832570
    },
    {
      "epoch": 2.9179853291136135,
      "grad_norm": 2.46875,
      "learning_rate": 1.5188568787951952e-06,
      "loss": 0.7756,
      "step": 832580
    },
    {
      "epoch": 2.9180203766205093,
      "grad_norm": 2.875,
      "learning_rate": 1.5182078501314932e-06,
      "loss": 0.7936,
      "step": 832590
    },
    {
      "epoch": 2.9180554241274046,
      "grad_norm": 2.6875,
      "learning_rate": 1.5175588214677914e-06,
      "loss": 0.8401,
      "step": 832600
    },
    {
      "epoch": 2.9180904716343004,
      "grad_norm": 3.09375,
      "learning_rate": 1.5169097928040894e-06,
      "loss": 0.7902,
      "step": 832610
    },
    {
      "epoch": 2.9181255191411957,
      "grad_norm": 2.703125,
      "learning_rate": 1.5162607641403874e-06,
      "loss": 0.8114,
      "step": 832620
    },
    {
      "epoch": 2.9181605666480914,
      "grad_norm": 2.515625,
      "learning_rate": 1.5156117354766858e-06,
      "loss": 0.7649,
      "step": 832630
    },
    {
      "epoch": 2.918195614154987,
      "grad_norm": 2.671875,
      "learning_rate": 1.5149627068129838e-06,
      "loss": 0.7999,
      "step": 832640
    },
    {
      "epoch": 2.918230661661883,
      "grad_norm": 3.0,
      "learning_rate": 1.5143136781492818e-06,
      "loss": 0.7519,
      "step": 832650
    },
    {
      "epoch": 2.9182657091687783,
      "grad_norm": 2.609375,
      "learning_rate": 1.51366464948558e-06,
      "loss": 0.7122,
      "step": 832660
    },
    {
      "epoch": 2.918300756675674,
      "grad_norm": 3.109375,
      "learning_rate": 1.513015620821878e-06,
      "loss": 0.7544,
      "step": 832670
    },
    {
      "epoch": 2.9183358041825693,
      "grad_norm": 2.84375,
      "learning_rate": 1.512366592158176e-06,
      "loss": 0.7504,
      "step": 832680
    },
    {
      "epoch": 2.918370851689465,
      "grad_norm": 2.78125,
      "learning_rate": 1.5117175634944743e-06,
      "loss": 0.8105,
      "step": 832690
    },
    {
      "epoch": 2.918405899196361,
      "grad_norm": 2.65625,
      "learning_rate": 1.5110685348307725e-06,
      "loss": 0.7668,
      "step": 832700
    },
    {
      "epoch": 2.918440946703256,
      "grad_norm": 2.796875,
      "learning_rate": 1.5104195061670705e-06,
      "loss": 0.8779,
      "step": 832710
    },
    {
      "epoch": 2.918475994210152,
      "grad_norm": 2.546875,
      "learning_rate": 1.5097704775033685e-06,
      "loss": 0.8099,
      "step": 832720
    },
    {
      "epoch": 2.9185110417170472,
      "grad_norm": 3.09375,
      "learning_rate": 1.5091214488396667e-06,
      "loss": 0.8196,
      "step": 832730
    },
    {
      "epoch": 2.918546089223943,
      "grad_norm": 2.921875,
      "learning_rate": 1.5084724201759647e-06,
      "loss": 0.8242,
      "step": 832740
    },
    {
      "epoch": 2.9185811367308387,
      "grad_norm": 2.78125,
      "learning_rate": 1.5078233915122627e-06,
      "loss": 0.7691,
      "step": 832750
    },
    {
      "epoch": 2.9186161842377345,
      "grad_norm": 3.0625,
      "learning_rate": 1.507174362848561e-06,
      "loss": 0.8164,
      "step": 832760
    },
    {
      "epoch": 2.91865123174463,
      "grad_norm": 3.0,
      "learning_rate": 1.5065253341848591e-06,
      "loss": 0.8042,
      "step": 832770
    },
    {
      "epoch": 2.9186862792515256,
      "grad_norm": 2.875,
      "learning_rate": 1.5058763055211571e-06,
      "loss": 0.7805,
      "step": 832780
    },
    {
      "epoch": 2.918721326758421,
      "grad_norm": 2.59375,
      "learning_rate": 1.5052272768574553e-06,
      "loss": 0.78,
      "step": 832790
    },
    {
      "epoch": 2.9187563742653166,
      "grad_norm": 3.03125,
      "learning_rate": 1.5045782481937533e-06,
      "loss": 0.887,
      "step": 832800
    },
    {
      "epoch": 2.9187914217722124,
      "grad_norm": 3.546875,
      "learning_rate": 1.5039292195300513e-06,
      "loss": 0.784,
      "step": 832810
    },
    {
      "epoch": 2.9188264692791077,
      "grad_norm": 3.125,
      "learning_rate": 1.5032801908663495e-06,
      "loss": 0.8351,
      "step": 832820
    },
    {
      "epoch": 2.9188615167860035,
      "grad_norm": 3.125,
      "learning_rate": 1.5026311622026477e-06,
      "loss": 0.8313,
      "step": 832830
    },
    {
      "epoch": 2.918896564292899,
      "grad_norm": 2.625,
      "learning_rate": 1.5019821335389457e-06,
      "loss": 0.7089,
      "step": 832840
    },
    {
      "epoch": 2.9189316117997945,
      "grad_norm": 2.75,
      "learning_rate": 1.5013331048752437e-06,
      "loss": 0.7629,
      "step": 832850
    },
    {
      "epoch": 2.9189666593066903,
      "grad_norm": 3.109375,
      "learning_rate": 1.500684076211542e-06,
      "loss": 0.8026,
      "step": 832860
    },
    {
      "epoch": 2.919001706813586,
      "grad_norm": 2.828125,
      "learning_rate": 1.50003504754784e-06,
      "loss": 0.8223,
      "step": 832870
    },
    {
      "epoch": 2.9190367543204814,
      "grad_norm": 3.015625,
      "learning_rate": 1.499386018884138e-06,
      "loss": 0.8031,
      "step": 832880
    },
    {
      "epoch": 2.919071801827377,
      "grad_norm": 2.859375,
      "learning_rate": 1.4987369902204362e-06,
      "loss": 0.7895,
      "step": 832890
    },
    {
      "epoch": 2.9191068493342724,
      "grad_norm": 2.890625,
      "learning_rate": 1.4980879615567344e-06,
      "loss": 0.7304,
      "step": 832900
    },
    {
      "epoch": 2.919141896841168,
      "grad_norm": 3.265625,
      "learning_rate": 1.4974389328930324e-06,
      "loss": 0.8208,
      "step": 832910
    },
    {
      "epoch": 2.919176944348064,
      "grad_norm": 2.6875,
      "learning_rate": 1.4967899042293304e-06,
      "loss": 0.725,
      "step": 832920
    },
    {
      "epoch": 2.9192119918549593,
      "grad_norm": 3.171875,
      "learning_rate": 1.4961408755656286e-06,
      "loss": 0.7997,
      "step": 832930
    },
    {
      "epoch": 2.919247039361855,
      "grad_norm": 3.140625,
      "learning_rate": 1.4954918469019266e-06,
      "loss": 0.7806,
      "step": 832940
    },
    {
      "epoch": 2.9192820868687503,
      "grad_norm": 3.140625,
      "learning_rate": 1.4948428182382246e-06,
      "loss": 0.8415,
      "step": 832950
    },
    {
      "epoch": 2.919317134375646,
      "grad_norm": 2.484375,
      "learning_rate": 1.4941937895745228e-06,
      "loss": 0.7592,
      "step": 832960
    },
    {
      "epoch": 2.919352181882542,
      "grad_norm": 2.8125,
      "learning_rate": 1.493544760910821e-06,
      "loss": 0.8521,
      "step": 832970
    },
    {
      "epoch": 2.9193872293894376,
      "grad_norm": 3.09375,
      "learning_rate": 1.492895732247119e-06,
      "loss": 0.7975,
      "step": 832980
    },
    {
      "epoch": 2.919422276896333,
      "grad_norm": 3.21875,
      "learning_rate": 1.4922467035834172e-06,
      "loss": 0.8164,
      "step": 832990
    },
    {
      "epoch": 2.9194573244032287,
      "grad_norm": 2.84375,
      "learning_rate": 1.4915976749197152e-06,
      "loss": 0.8005,
      "step": 833000
    },
    {
      "epoch": 2.919492371910124,
      "grad_norm": 2.9375,
      "learning_rate": 1.4909486462560132e-06,
      "loss": 0.7316,
      "step": 833010
    },
    {
      "epoch": 2.9195274194170198,
      "grad_norm": 2.78125,
      "learning_rate": 1.4902996175923114e-06,
      "loss": 0.7758,
      "step": 833020
    },
    {
      "epoch": 2.9195624669239155,
      "grad_norm": 2.84375,
      "learning_rate": 1.4896505889286096e-06,
      "loss": 0.7559,
      "step": 833030
    },
    {
      "epoch": 2.919597514430811,
      "grad_norm": 3.265625,
      "learning_rate": 1.4890015602649076e-06,
      "loss": 0.8715,
      "step": 833040
    },
    {
      "epoch": 2.9196325619377066,
      "grad_norm": 2.515625,
      "learning_rate": 1.4883525316012056e-06,
      "loss": 0.8077,
      "step": 833050
    },
    {
      "epoch": 2.919667609444602,
      "grad_norm": 3.03125,
      "learning_rate": 1.4877035029375039e-06,
      "loss": 0.7712,
      "step": 833060
    },
    {
      "epoch": 2.9197026569514977,
      "grad_norm": 2.96875,
      "learning_rate": 1.4870544742738019e-06,
      "loss": 0.7751,
      "step": 833070
    },
    {
      "epoch": 2.9197377044583934,
      "grad_norm": 2.40625,
      "learning_rate": 1.4864054456100999e-06,
      "loss": 0.7822,
      "step": 833080
    },
    {
      "epoch": 2.919772751965289,
      "grad_norm": 2.921875,
      "learning_rate": 1.485756416946398e-06,
      "loss": 0.7184,
      "step": 833090
    },
    {
      "epoch": 2.9198077994721845,
      "grad_norm": 2.65625,
      "learning_rate": 1.4851073882826963e-06,
      "loss": 0.8387,
      "step": 833100
    },
    {
      "epoch": 2.9198428469790803,
      "grad_norm": 2.609375,
      "learning_rate": 1.4844583596189943e-06,
      "loss": 0.7528,
      "step": 833110
    },
    {
      "epoch": 2.9198778944859756,
      "grad_norm": 2.59375,
      "learning_rate": 1.4838093309552925e-06,
      "loss": 0.7468,
      "step": 833120
    },
    {
      "epoch": 2.9199129419928713,
      "grad_norm": 3.015625,
      "learning_rate": 1.4831603022915905e-06,
      "loss": 0.7508,
      "step": 833130
    },
    {
      "epoch": 2.919947989499767,
      "grad_norm": 2.765625,
      "learning_rate": 1.4825112736278885e-06,
      "loss": 0.806,
      "step": 833140
    },
    {
      "epoch": 2.9199830370066624,
      "grad_norm": 2.921875,
      "learning_rate": 1.4818622449641867e-06,
      "loss": 0.7824,
      "step": 833150
    },
    {
      "epoch": 2.920018084513558,
      "grad_norm": 3.125,
      "learning_rate": 1.4812132163004847e-06,
      "loss": 0.7524,
      "step": 833160
    },
    {
      "epoch": 2.9200531320204535,
      "grad_norm": 2.84375,
      "learning_rate": 1.480564187636783e-06,
      "loss": 0.7342,
      "step": 833170
    },
    {
      "epoch": 2.9200881795273492,
      "grad_norm": 2.703125,
      "learning_rate": 1.479915158973081e-06,
      "loss": 0.7265,
      "step": 833180
    },
    {
      "epoch": 2.920123227034245,
      "grad_norm": 3.1875,
      "learning_rate": 1.4792661303093791e-06,
      "loss": 0.8473,
      "step": 833190
    },
    {
      "epoch": 2.9201582745411407,
      "grad_norm": 2.96875,
      "learning_rate": 1.4786171016456771e-06,
      "loss": 0.7529,
      "step": 833200
    },
    {
      "epoch": 2.920193322048036,
      "grad_norm": 2.71875,
      "learning_rate": 1.4779680729819751e-06,
      "loss": 0.8139,
      "step": 833210
    },
    {
      "epoch": 2.920228369554932,
      "grad_norm": 2.8125,
      "learning_rate": 1.4773190443182733e-06,
      "loss": 0.7485,
      "step": 833220
    },
    {
      "epoch": 2.920263417061827,
      "grad_norm": 2.90625,
      "learning_rate": 1.4766700156545716e-06,
      "loss": 0.8015,
      "step": 833230
    },
    {
      "epoch": 2.920298464568723,
      "grad_norm": 2.84375,
      "learning_rate": 1.4760209869908696e-06,
      "loss": 0.7654,
      "step": 833240
    },
    {
      "epoch": 2.9203335120756186,
      "grad_norm": 2.78125,
      "learning_rate": 1.4753719583271678e-06,
      "loss": 0.7896,
      "step": 833250
    },
    {
      "epoch": 2.920368559582514,
      "grad_norm": 3.140625,
      "learning_rate": 1.4747229296634658e-06,
      "loss": 0.7694,
      "step": 833260
    },
    {
      "epoch": 2.9204036070894097,
      "grad_norm": 2.890625,
      "learning_rate": 1.4740739009997638e-06,
      "loss": 0.8004,
      "step": 833270
    },
    {
      "epoch": 2.920438654596305,
      "grad_norm": 2.390625,
      "learning_rate": 1.473424872336062e-06,
      "loss": 0.7343,
      "step": 833280
    },
    {
      "epoch": 2.920473702103201,
      "grad_norm": 3.265625,
      "learning_rate": 1.47277584367236e-06,
      "loss": 0.6852,
      "step": 833290
    },
    {
      "epoch": 2.9205087496100965,
      "grad_norm": 2.90625,
      "learning_rate": 1.4721268150086582e-06,
      "loss": 0.6943,
      "step": 833300
    },
    {
      "epoch": 2.9205437971169923,
      "grad_norm": 2.125,
      "learning_rate": 1.4714777863449562e-06,
      "loss": 0.7591,
      "step": 833310
    },
    {
      "epoch": 2.9205788446238876,
      "grad_norm": 2.734375,
      "learning_rate": 1.4708287576812544e-06,
      "loss": 0.7362,
      "step": 833320
    },
    {
      "epoch": 2.9206138921307834,
      "grad_norm": 3.109375,
      "learning_rate": 1.4701797290175524e-06,
      "loss": 0.763,
      "step": 833330
    },
    {
      "epoch": 2.9206489396376787,
      "grad_norm": 2.96875,
      "learning_rate": 1.4695307003538504e-06,
      "loss": 0.7301,
      "step": 833340
    },
    {
      "epoch": 2.9206839871445744,
      "grad_norm": 3.09375,
      "learning_rate": 1.4688816716901486e-06,
      "loss": 0.7886,
      "step": 833350
    },
    {
      "epoch": 2.92071903465147,
      "grad_norm": 2.578125,
      "learning_rate": 1.4682326430264466e-06,
      "loss": 0.7674,
      "step": 833360
    },
    {
      "epoch": 2.9207540821583655,
      "grad_norm": 2.9375,
      "learning_rate": 1.4675836143627448e-06,
      "loss": 0.9095,
      "step": 833370
    },
    {
      "epoch": 2.9207891296652613,
      "grad_norm": 2.9375,
      "learning_rate": 1.466934585699043e-06,
      "loss": 0.8822,
      "step": 833380
    },
    {
      "epoch": 2.9208241771721566,
      "grad_norm": 2.640625,
      "learning_rate": 1.466285557035341e-06,
      "loss": 0.7668,
      "step": 833390
    },
    {
      "epoch": 2.9208592246790523,
      "grad_norm": 2.921875,
      "learning_rate": 1.465636528371639e-06,
      "loss": 0.8157,
      "step": 833400
    },
    {
      "epoch": 2.920894272185948,
      "grad_norm": 2.875,
      "learning_rate": 1.4649874997079372e-06,
      "loss": 0.8181,
      "step": 833410
    },
    {
      "epoch": 2.920929319692844,
      "grad_norm": 2.75,
      "learning_rate": 1.4643384710442352e-06,
      "loss": 0.7549,
      "step": 833420
    },
    {
      "epoch": 2.920964367199739,
      "grad_norm": 3.109375,
      "learning_rate": 1.4636894423805332e-06,
      "loss": 0.8296,
      "step": 833430
    },
    {
      "epoch": 2.920999414706635,
      "grad_norm": 2.578125,
      "learning_rate": 1.4630404137168315e-06,
      "loss": 0.8355,
      "step": 833440
    },
    {
      "epoch": 2.9210344622135302,
      "grad_norm": 3.15625,
      "learning_rate": 1.4623913850531297e-06,
      "loss": 0.8684,
      "step": 833450
    },
    {
      "epoch": 2.921069509720426,
      "grad_norm": 3.375,
      "learning_rate": 1.4617423563894277e-06,
      "loss": 0.7409,
      "step": 833460
    },
    {
      "epoch": 2.9211045572273218,
      "grad_norm": 2.734375,
      "learning_rate": 1.4610933277257257e-06,
      "loss": 0.8139,
      "step": 833470
    },
    {
      "epoch": 2.921139604734217,
      "grad_norm": 2.90625,
      "learning_rate": 1.4604442990620239e-06,
      "loss": 0.834,
      "step": 833480
    },
    {
      "epoch": 2.921174652241113,
      "grad_norm": 2.75,
      "learning_rate": 1.4597952703983219e-06,
      "loss": 0.7018,
      "step": 833490
    },
    {
      "epoch": 2.921209699748008,
      "grad_norm": 2.484375,
      "learning_rate": 1.45914624173462e-06,
      "loss": 0.766,
      "step": 833500
    },
    {
      "epoch": 2.921244747254904,
      "grad_norm": 3.140625,
      "learning_rate": 1.458497213070918e-06,
      "loss": 0.7504,
      "step": 833510
    },
    {
      "epoch": 2.9212797947617997,
      "grad_norm": 3.3125,
      "learning_rate": 1.4578481844072163e-06,
      "loss": 0.8406,
      "step": 833520
    },
    {
      "epoch": 2.9213148422686954,
      "grad_norm": 2.796875,
      "learning_rate": 1.4571991557435143e-06,
      "loss": 0.764,
      "step": 833530
    },
    {
      "epoch": 2.9213498897755907,
      "grad_norm": 3.171875,
      "learning_rate": 1.4565501270798123e-06,
      "loss": 0.7706,
      "step": 833540
    },
    {
      "epoch": 2.9213849372824865,
      "grad_norm": 2.859375,
      "learning_rate": 1.4559010984161105e-06,
      "loss": 0.7045,
      "step": 833550
    },
    {
      "epoch": 2.921419984789382,
      "grad_norm": 2.84375,
      "learning_rate": 1.4552520697524085e-06,
      "loss": 0.813,
      "step": 833560
    },
    {
      "epoch": 2.9214550322962776,
      "grad_norm": 2.71875,
      "learning_rate": 1.4546030410887067e-06,
      "loss": 0.8753,
      "step": 833570
    },
    {
      "epoch": 2.9214900798031733,
      "grad_norm": 2.4375,
      "learning_rate": 1.453954012425005e-06,
      "loss": 0.8155,
      "step": 833580
    },
    {
      "epoch": 2.9215251273100686,
      "grad_norm": 2.75,
      "learning_rate": 1.453304983761303e-06,
      "loss": 0.7882,
      "step": 833590
    },
    {
      "epoch": 2.9215601748169644,
      "grad_norm": 3.21875,
      "learning_rate": 1.452655955097601e-06,
      "loss": 0.8199,
      "step": 833600
    },
    {
      "epoch": 2.92159522232386,
      "grad_norm": 3.09375,
      "learning_rate": 1.4520069264338991e-06,
      "loss": 0.8497,
      "step": 833610
    },
    {
      "epoch": 2.9216302698307555,
      "grad_norm": 3.1875,
      "learning_rate": 1.4513578977701971e-06,
      "loss": 0.819,
      "step": 833620
    },
    {
      "epoch": 2.9216653173376512,
      "grad_norm": 2.359375,
      "learning_rate": 1.4507088691064951e-06,
      "loss": 0.7866,
      "step": 833630
    },
    {
      "epoch": 2.921700364844547,
      "grad_norm": 2.796875,
      "learning_rate": 1.4500598404427934e-06,
      "loss": 0.8349,
      "step": 833640
    },
    {
      "epoch": 2.9217354123514423,
      "grad_norm": 2.828125,
      "learning_rate": 1.4494108117790916e-06,
      "loss": 0.8,
      "step": 833650
    },
    {
      "epoch": 2.921770459858338,
      "grad_norm": 2.828125,
      "learning_rate": 1.4487617831153896e-06,
      "loss": 0.8522,
      "step": 833660
    },
    {
      "epoch": 2.9218055073652334,
      "grad_norm": 2.890625,
      "learning_rate": 1.4481127544516876e-06,
      "loss": 0.7936,
      "step": 833670
    },
    {
      "epoch": 2.921840554872129,
      "grad_norm": 3.015625,
      "learning_rate": 1.4474637257879858e-06,
      "loss": 0.8771,
      "step": 833680
    },
    {
      "epoch": 2.921875602379025,
      "grad_norm": 2.875,
      "learning_rate": 1.4468146971242838e-06,
      "loss": 0.7955,
      "step": 833690
    },
    {
      "epoch": 2.9219106498859206,
      "grad_norm": 2.296875,
      "learning_rate": 1.446165668460582e-06,
      "loss": 0.715,
      "step": 833700
    },
    {
      "epoch": 2.921945697392816,
      "grad_norm": 2.8125,
      "learning_rate": 1.4455166397968802e-06,
      "loss": 0.7877,
      "step": 833710
    },
    {
      "epoch": 2.9219807448997117,
      "grad_norm": 3.109375,
      "learning_rate": 1.4448676111331782e-06,
      "loss": 0.9101,
      "step": 833720
    },
    {
      "epoch": 2.922015792406607,
      "grad_norm": 2.984375,
      "learning_rate": 1.4442185824694762e-06,
      "loss": 0.7407,
      "step": 833730
    },
    {
      "epoch": 2.922050839913503,
      "grad_norm": 3.0625,
      "learning_rate": 1.4435695538057744e-06,
      "loss": 0.8262,
      "step": 833740
    },
    {
      "epoch": 2.9220858874203985,
      "grad_norm": 3.0625,
      "learning_rate": 1.4429205251420724e-06,
      "loss": 0.8531,
      "step": 833750
    },
    {
      "epoch": 2.922120934927294,
      "grad_norm": 3.234375,
      "learning_rate": 1.4422714964783704e-06,
      "loss": 0.847,
      "step": 833760
    },
    {
      "epoch": 2.9221559824341896,
      "grad_norm": 2.8125,
      "learning_rate": 1.4416224678146686e-06,
      "loss": 0.8293,
      "step": 833770
    },
    {
      "epoch": 2.922191029941085,
      "grad_norm": 2.703125,
      "learning_rate": 1.4409734391509668e-06,
      "loss": 0.8125,
      "step": 833780
    },
    {
      "epoch": 2.9222260774479807,
      "grad_norm": 2.28125,
      "learning_rate": 1.4403244104872648e-06,
      "loss": 0.782,
      "step": 833790
    },
    {
      "epoch": 2.9222611249548764,
      "grad_norm": 3.140625,
      "learning_rate": 1.4396753818235628e-06,
      "loss": 0.7561,
      "step": 833800
    },
    {
      "epoch": 2.922296172461772,
      "grad_norm": 3.09375,
      "learning_rate": 1.439026353159861e-06,
      "loss": 0.6801,
      "step": 833810
    },
    {
      "epoch": 2.9223312199686675,
      "grad_norm": 3.0625,
      "learning_rate": 1.438377324496159e-06,
      "loss": 0.8004,
      "step": 833820
    },
    {
      "epoch": 2.9223662674755633,
      "grad_norm": 2.5625,
      "learning_rate": 1.437728295832457e-06,
      "loss": 0.7861,
      "step": 833830
    },
    {
      "epoch": 2.9224013149824586,
      "grad_norm": 2.90625,
      "learning_rate": 1.4370792671687555e-06,
      "loss": 0.8918,
      "step": 833840
    },
    {
      "epoch": 2.9224363624893543,
      "grad_norm": 2.984375,
      "learning_rate": 1.4364302385050535e-06,
      "loss": 0.7513,
      "step": 833850
    },
    {
      "epoch": 2.92247140999625,
      "grad_norm": 2.65625,
      "learning_rate": 1.4357812098413515e-06,
      "loss": 0.7302,
      "step": 833860
    },
    {
      "epoch": 2.9225064575031454,
      "grad_norm": 3.15625,
      "learning_rate": 1.4351321811776497e-06,
      "loss": 0.8043,
      "step": 833870
    },
    {
      "epoch": 2.922541505010041,
      "grad_norm": 2.828125,
      "learning_rate": 1.4344831525139477e-06,
      "loss": 0.8238,
      "step": 833880
    },
    {
      "epoch": 2.9225765525169365,
      "grad_norm": 3.234375,
      "learning_rate": 1.4338341238502457e-06,
      "loss": 0.8264,
      "step": 833890
    },
    {
      "epoch": 2.9226116000238322,
      "grad_norm": 3.015625,
      "learning_rate": 1.433185095186544e-06,
      "loss": 0.7815,
      "step": 833900
    },
    {
      "epoch": 2.922646647530728,
      "grad_norm": 2.671875,
      "learning_rate": 1.4325360665228421e-06,
      "loss": 0.7574,
      "step": 833910
    },
    {
      "epoch": 2.9226816950376238,
      "grad_norm": 2.6875,
      "learning_rate": 1.4318870378591401e-06,
      "loss": 0.7651,
      "step": 833920
    },
    {
      "epoch": 2.922716742544519,
      "grad_norm": 2.875,
      "learning_rate": 1.4312380091954381e-06,
      "loss": 0.7681,
      "step": 833930
    },
    {
      "epoch": 2.922751790051415,
      "grad_norm": 2.921875,
      "learning_rate": 1.4305889805317363e-06,
      "loss": 0.7413,
      "step": 833940
    },
    {
      "epoch": 2.92278683755831,
      "grad_norm": 2.796875,
      "learning_rate": 1.4299399518680343e-06,
      "loss": 0.7558,
      "step": 833950
    },
    {
      "epoch": 2.922821885065206,
      "grad_norm": 2.78125,
      "learning_rate": 1.4292909232043323e-06,
      "loss": 0.8318,
      "step": 833960
    },
    {
      "epoch": 2.9228569325721017,
      "grad_norm": 3.03125,
      "learning_rate": 1.4286418945406307e-06,
      "loss": 0.7484,
      "step": 833970
    },
    {
      "epoch": 2.922891980078997,
      "grad_norm": 3.125,
      "learning_rate": 1.4279928658769287e-06,
      "loss": 0.8098,
      "step": 833980
    },
    {
      "epoch": 2.9229270275858927,
      "grad_norm": 2.8125,
      "learning_rate": 1.4273438372132267e-06,
      "loss": 0.8073,
      "step": 833990
    },
    {
      "epoch": 2.922962075092788,
      "grad_norm": 2.578125,
      "learning_rate": 1.426694808549525e-06,
      "loss": 0.8226,
      "step": 834000
    },
    {
      "epoch": 2.922997122599684,
      "grad_norm": 2.59375,
      "learning_rate": 1.426045779885823e-06,
      "loss": 0.7528,
      "step": 834010
    },
    {
      "epoch": 2.9230321701065796,
      "grad_norm": 3.28125,
      "learning_rate": 1.425396751222121e-06,
      "loss": 0.8543,
      "step": 834020
    },
    {
      "epoch": 2.9230672176134753,
      "grad_norm": 3.265625,
      "learning_rate": 1.424747722558419e-06,
      "loss": 0.7946,
      "step": 834030
    },
    {
      "epoch": 2.9231022651203706,
      "grad_norm": 2.28125,
      "learning_rate": 1.4240986938947174e-06,
      "loss": 0.7562,
      "step": 834040
    },
    {
      "epoch": 2.9231373126272664,
      "grad_norm": 2.71875,
      "learning_rate": 1.4234496652310154e-06,
      "loss": 0.7908,
      "step": 834050
    },
    {
      "epoch": 2.9231723601341617,
      "grad_norm": 3.265625,
      "learning_rate": 1.4228006365673134e-06,
      "loss": 0.8281,
      "step": 834060
    },
    {
      "epoch": 2.9232074076410575,
      "grad_norm": 3.125,
      "learning_rate": 1.4221516079036116e-06,
      "loss": 0.813,
      "step": 834070
    },
    {
      "epoch": 2.9232424551479532,
      "grad_norm": 3.140625,
      "learning_rate": 1.4215025792399096e-06,
      "loss": 0.8168,
      "step": 834080
    },
    {
      "epoch": 2.9232775026548485,
      "grad_norm": 2.46875,
      "learning_rate": 1.4208535505762076e-06,
      "loss": 0.85,
      "step": 834090
    },
    {
      "epoch": 2.9233125501617443,
      "grad_norm": 2.828125,
      "learning_rate": 1.4202045219125058e-06,
      "loss": 0.7475,
      "step": 834100
    },
    {
      "epoch": 2.9233475976686396,
      "grad_norm": 2.859375,
      "learning_rate": 1.419555493248804e-06,
      "loss": 0.8481,
      "step": 834110
    },
    {
      "epoch": 2.9233826451755354,
      "grad_norm": 2.96875,
      "learning_rate": 1.418906464585102e-06,
      "loss": 0.8482,
      "step": 834120
    },
    {
      "epoch": 2.923417692682431,
      "grad_norm": 2.609375,
      "learning_rate": 1.4182574359214e-06,
      "loss": 0.7766,
      "step": 834130
    },
    {
      "epoch": 2.923452740189327,
      "grad_norm": 2.671875,
      "learning_rate": 1.4176084072576982e-06,
      "loss": 0.7803,
      "step": 834140
    },
    {
      "epoch": 2.923487787696222,
      "grad_norm": 2.515625,
      "learning_rate": 1.4169593785939962e-06,
      "loss": 0.7905,
      "step": 834150
    },
    {
      "epoch": 2.923522835203118,
      "grad_norm": 3.140625,
      "learning_rate": 1.4163103499302942e-06,
      "loss": 0.7979,
      "step": 834160
    },
    {
      "epoch": 2.9235578827100133,
      "grad_norm": 2.828125,
      "learning_rate": 1.4156613212665926e-06,
      "loss": 0.7917,
      "step": 834170
    },
    {
      "epoch": 2.923592930216909,
      "grad_norm": 2.875,
      "learning_rate": 1.4150122926028906e-06,
      "loss": 0.7805,
      "step": 834180
    },
    {
      "epoch": 2.923627977723805,
      "grad_norm": 3.28125,
      "learning_rate": 1.4143632639391886e-06,
      "loss": 0.7426,
      "step": 834190
    },
    {
      "epoch": 2.9236630252307,
      "grad_norm": 2.703125,
      "learning_rate": 1.4137142352754869e-06,
      "loss": 0.7612,
      "step": 834200
    },
    {
      "epoch": 2.923698072737596,
      "grad_norm": 3.078125,
      "learning_rate": 1.4130652066117849e-06,
      "loss": 0.8505,
      "step": 834210
    },
    {
      "epoch": 2.923733120244491,
      "grad_norm": 2.734375,
      "learning_rate": 1.4124161779480829e-06,
      "loss": 0.776,
      "step": 834220
    },
    {
      "epoch": 2.923768167751387,
      "grad_norm": 4.15625,
      "learning_rate": 1.411767149284381e-06,
      "loss": 0.7516,
      "step": 834230
    },
    {
      "epoch": 2.9238032152582827,
      "grad_norm": 2.765625,
      "learning_rate": 1.4111181206206793e-06,
      "loss": 0.7815,
      "step": 834240
    },
    {
      "epoch": 2.9238382627651784,
      "grad_norm": 2.390625,
      "learning_rate": 1.4104690919569773e-06,
      "loss": 0.7021,
      "step": 834250
    },
    {
      "epoch": 2.9238733102720738,
      "grad_norm": 2.78125,
      "learning_rate": 1.4098200632932753e-06,
      "loss": 0.8115,
      "step": 834260
    },
    {
      "epoch": 2.9239083577789695,
      "grad_norm": 2.8125,
      "learning_rate": 1.4091710346295735e-06,
      "loss": 0.8358,
      "step": 834270
    },
    {
      "epoch": 2.923943405285865,
      "grad_norm": 3.234375,
      "learning_rate": 1.4085220059658715e-06,
      "loss": 0.8202,
      "step": 834280
    },
    {
      "epoch": 2.9239784527927606,
      "grad_norm": 3.265625,
      "learning_rate": 1.4078729773021695e-06,
      "loss": 0.8139,
      "step": 834290
    },
    {
      "epoch": 2.9240135002996563,
      "grad_norm": 2.25,
      "learning_rate": 1.4072239486384677e-06,
      "loss": 0.7414,
      "step": 834300
    },
    {
      "epoch": 2.9240485478065517,
      "grad_norm": 2.734375,
      "learning_rate": 1.406574919974766e-06,
      "loss": 0.7851,
      "step": 834310
    },
    {
      "epoch": 2.9240835953134474,
      "grad_norm": 2.703125,
      "learning_rate": 1.405925891311064e-06,
      "loss": 0.7635,
      "step": 834320
    },
    {
      "epoch": 2.9241186428203427,
      "grad_norm": 3.421875,
      "learning_rate": 1.4052768626473621e-06,
      "loss": 0.9031,
      "step": 834330
    },
    {
      "epoch": 2.9241536903272385,
      "grad_norm": 3.09375,
      "learning_rate": 1.4046278339836601e-06,
      "loss": 0.7757,
      "step": 834340
    },
    {
      "epoch": 2.9241887378341342,
      "grad_norm": 2.796875,
      "learning_rate": 1.4039788053199581e-06,
      "loss": 0.7437,
      "step": 834350
    },
    {
      "epoch": 2.92422378534103,
      "grad_norm": 2.78125,
      "learning_rate": 1.4033297766562563e-06,
      "loss": 0.7988,
      "step": 834360
    },
    {
      "epoch": 2.9242588328479253,
      "grad_norm": 3.015625,
      "learning_rate": 1.4026807479925546e-06,
      "loss": 0.7571,
      "step": 834370
    },
    {
      "epoch": 2.924293880354821,
      "grad_norm": 2.75,
      "learning_rate": 1.4020317193288526e-06,
      "loss": 0.7935,
      "step": 834380
    },
    {
      "epoch": 2.9243289278617164,
      "grad_norm": 2.984375,
      "learning_rate": 1.4013826906651506e-06,
      "loss": 0.7315,
      "step": 834390
    },
    {
      "epoch": 2.924363975368612,
      "grad_norm": 3.0625,
      "learning_rate": 1.4007336620014488e-06,
      "loss": 0.8469,
      "step": 834400
    },
    {
      "epoch": 2.924399022875508,
      "grad_norm": 2.59375,
      "learning_rate": 1.4000846333377468e-06,
      "loss": 0.7846,
      "step": 834410
    },
    {
      "epoch": 2.924434070382403,
      "grad_norm": 2.765625,
      "learning_rate": 1.3994356046740448e-06,
      "loss": 0.8069,
      "step": 834420
    },
    {
      "epoch": 2.924469117889299,
      "grad_norm": 2.96875,
      "learning_rate": 1.398786576010343e-06,
      "loss": 0.7828,
      "step": 834430
    },
    {
      "epoch": 2.9245041653961943,
      "grad_norm": 2.890625,
      "learning_rate": 1.3981375473466412e-06,
      "loss": 0.8008,
      "step": 834440
    },
    {
      "epoch": 2.92453921290309,
      "grad_norm": 2.953125,
      "learning_rate": 1.3974885186829392e-06,
      "loss": 0.8237,
      "step": 834450
    },
    {
      "epoch": 2.924574260409986,
      "grad_norm": 2.921875,
      "learning_rate": 1.3968394900192374e-06,
      "loss": 0.8328,
      "step": 834460
    },
    {
      "epoch": 2.9246093079168816,
      "grad_norm": 3.046875,
      "learning_rate": 1.3961904613555354e-06,
      "loss": 0.8139,
      "step": 834470
    },
    {
      "epoch": 2.924644355423777,
      "grad_norm": 3.171875,
      "learning_rate": 1.3955414326918334e-06,
      "loss": 0.7949,
      "step": 834480
    },
    {
      "epoch": 2.9246794029306726,
      "grad_norm": 3.265625,
      "learning_rate": 1.3948924040281316e-06,
      "loss": 0.8646,
      "step": 834490
    },
    {
      "epoch": 2.924714450437568,
      "grad_norm": 3.140625,
      "learning_rate": 1.3942433753644296e-06,
      "loss": 0.8371,
      "step": 834500
    },
    {
      "epoch": 2.9247494979444637,
      "grad_norm": 2.640625,
      "learning_rate": 1.3935943467007278e-06,
      "loss": 0.734,
      "step": 834510
    },
    {
      "epoch": 2.9247845454513595,
      "grad_norm": 2.71875,
      "learning_rate": 1.3929453180370258e-06,
      "loss": 0.7813,
      "step": 834520
    },
    {
      "epoch": 2.9248195929582548,
      "grad_norm": 3.078125,
      "learning_rate": 1.392296289373324e-06,
      "loss": 0.7675,
      "step": 834530
    },
    {
      "epoch": 2.9248546404651505,
      "grad_norm": 2.921875,
      "learning_rate": 1.391647260709622e-06,
      "loss": 0.7766,
      "step": 834540
    },
    {
      "epoch": 2.924889687972046,
      "grad_norm": 3.15625,
      "learning_rate": 1.39099823204592e-06,
      "loss": 0.8195,
      "step": 834550
    },
    {
      "epoch": 2.9249247354789416,
      "grad_norm": 2.640625,
      "learning_rate": 1.3903492033822182e-06,
      "loss": 0.7883,
      "step": 834560
    },
    {
      "epoch": 2.9249597829858374,
      "grad_norm": 2.59375,
      "learning_rate": 1.3897001747185165e-06,
      "loss": 0.784,
      "step": 834570
    },
    {
      "epoch": 2.924994830492733,
      "grad_norm": 3.171875,
      "learning_rate": 1.3890511460548145e-06,
      "loss": 0.7904,
      "step": 834580
    },
    {
      "epoch": 2.9250298779996284,
      "grad_norm": 3.046875,
      "learning_rate": 1.3884021173911125e-06,
      "loss": 0.7237,
      "step": 834590
    },
    {
      "epoch": 2.925064925506524,
      "grad_norm": 3.40625,
      "learning_rate": 1.3877530887274107e-06,
      "loss": 0.7895,
      "step": 834600
    },
    {
      "epoch": 2.9250999730134195,
      "grad_norm": 2.640625,
      "learning_rate": 1.3871040600637087e-06,
      "loss": 0.7875,
      "step": 834610
    },
    {
      "epoch": 2.9251350205203153,
      "grad_norm": 2.9375,
      "learning_rate": 1.3864550314000067e-06,
      "loss": 0.8583,
      "step": 834620
    },
    {
      "epoch": 2.925170068027211,
      "grad_norm": 2.78125,
      "learning_rate": 1.3858060027363049e-06,
      "loss": 0.7776,
      "step": 834630
    },
    {
      "epoch": 2.9252051155341063,
      "grad_norm": 3.140625,
      "learning_rate": 1.385156974072603e-06,
      "loss": 0.7541,
      "step": 834640
    },
    {
      "epoch": 2.925240163041002,
      "grad_norm": 2.984375,
      "learning_rate": 1.384507945408901e-06,
      "loss": 0.7808,
      "step": 834650
    },
    {
      "epoch": 2.9252752105478974,
      "grad_norm": 2.703125,
      "learning_rate": 1.3838589167451993e-06,
      "loss": 0.752,
      "step": 834660
    },
    {
      "epoch": 2.925310258054793,
      "grad_norm": 2.9375,
      "learning_rate": 1.3832098880814973e-06,
      "loss": 0.8339,
      "step": 834670
    },
    {
      "epoch": 2.925345305561689,
      "grad_norm": 2.859375,
      "learning_rate": 1.3825608594177953e-06,
      "loss": 0.7626,
      "step": 834680
    },
    {
      "epoch": 2.9253803530685847,
      "grad_norm": 3.046875,
      "learning_rate": 1.3819118307540935e-06,
      "loss": 0.8426,
      "step": 834690
    },
    {
      "epoch": 2.92541540057548,
      "grad_norm": 3.578125,
      "learning_rate": 1.3812628020903915e-06,
      "loss": 0.8061,
      "step": 834700
    },
    {
      "epoch": 2.9254504480823758,
      "grad_norm": 3.296875,
      "learning_rate": 1.3806137734266897e-06,
      "loss": 0.8408,
      "step": 834710
    },
    {
      "epoch": 2.925485495589271,
      "grad_norm": 3.3125,
      "learning_rate": 1.3799647447629877e-06,
      "loss": 0.8719,
      "step": 834720
    },
    {
      "epoch": 2.925520543096167,
      "grad_norm": 3.140625,
      "learning_rate": 1.379315716099286e-06,
      "loss": 0.8027,
      "step": 834730
    },
    {
      "epoch": 2.9255555906030626,
      "grad_norm": 2.765625,
      "learning_rate": 1.378666687435584e-06,
      "loss": 0.7608,
      "step": 834740
    },
    {
      "epoch": 2.925590638109958,
      "grad_norm": 2.75,
      "learning_rate": 1.378017658771882e-06,
      "loss": 0.7613,
      "step": 834750
    },
    {
      "epoch": 2.9256256856168537,
      "grad_norm": 2.609375,
      "learning_rate": 1.3773686301081801e-06,
      "loss": 0.811,
      "step": 834760
    },
    {
      "epoch": 2.925660733123749,
      "grad_norm": 2.90625,
      "learning_rate": 1.3767196014444781e-06,
      "loss": 0.8735,
      "step": 834770
    },
    {
      "epoch": 2.9256957806306447,
      "grad_norm": 2.453125,
      "learning_rate": 1.3760705727807764e-06,
      "loss": 0.7403,
      "step": 834780
    },
    {
      "epoch": 2.9257308281375405,
      "grad_norm": 2.78125,
      "learning_rate": 1.3754215441170746e-06,
      "loss": 0.8157,
      "step": 834790
    },
    {
      "epoch": 2.9257658756444362,
      "grad_norm": 3.046875,
      "learning_rate": 1.3747725154533726e-06,
      "loss": 0.8287,
      "step": 834800
    },
    {
      "epoch": 2.9258009231513316,
      "grad_norm": 3.078125,
      "learning_rate": 1.3741234867896706e-06,
      "loss": 0.7618,
      "step": 834810
    },
    {
      "epoch": 2.9258359706582273,
      "grad_norm": 3.203125,
      "learning_rate": 1.3734744581259688e-06,
      "loss": 0.8161,
      "step": 834820
    },
    {
      "epoch": 2.9258710181651226,
      "grad_norm": 2.796875,
      "learning_rate": 1.3728254294622668e-06,
      "loss": 0.8477,
      "step": 834830
    },
    {
      "epoch": 2.9259060656720184,
      "grad_norm": 2.578125,
      "learning_rate": 1.372176400798565e-06,
      "loss": 0.823,
      "step": 834840
    },
    {
      "epoch": 2.925941113178914,
      "grad_norm": 2.578125,
      "learning_rate": 1.371527372134863e-06,
      "loss": 0.811,
      "step": 834850
    },
    {
      "epoch": 2.9259761606858095,
      "grad_norm": 3.140625,
      "learning_rate": 1.3708783434711612e-06,
      "loss": 0.8224,
      "step": 834860
    },
    {
      "epoch": 2.926011208192705,
      "grad_norm": 2.75,
      "learning_rate": 1.3702293148074592e-06,
      "loss": 0.8088,
      "step": 834870
    },
    {
      "epoch": 2.926046255699601,
      "grad_norm": 3.15625,
      "learning_rate": 1.3695802861437572e-06,
      "loss": 0.8418,
      "step": 834880
    },
    {
      "epoch": 2.9260813032064963,
      "grad_norm": 3.0625,
      "learning_rate": 1.3689312574800554e-06,
      "loss": 0.7165,
      "step": 834890
    },
    {
      "epoch": 2.926116350713392,
      "grad_norm": 2.703125,
      "learning_rate": 1.3682822288163534e-06,
      "loss": 0.7645,
      "step": 834900
    },
    {
      "epoch": 2.926151398220288,
      "grad_norm": 2.578125,
      "learning_rate": 1.3676332001526516e-06,
      "loss": 0.799,
      "step": 834910
    },
    {
      "epoch": 2.926186445727183,
      "grad_norm": 2.609375,
      "learning_rate": 1.3669841714889498e-06,
      "loss": 0.7681,
      "step": 834920
    },
    {
      "epoch": 2.926221493234079,
      "grad_norm": 2.984375,
      "learning_rate": 1.3663351428252478e-06,
      "loss": 0.7849,
      "step": 834930
    },
    {
      "epoch": 2.926256540740974,
      "grad_norm": 3.203125,
      "learning_rate": 1.3656861141615458e-06,
      "loss": 0.8481,
      "step": 834940
    },
    {
      "epoch": 2.92629158824787,
      "grad_norm": 3.0625,
      "learning_rate": 1.365037085497844e-06,
      "loss": 0.7528,
      "step": 834950
    },
    {
      "epoch": 2.9263266357547657,
      "grad_norm": 3.671875,
      "learning_rate": 1.364388056834142e-06,
      "loss": 0.8723,
      "step": 834960
    },
    {
      "epoch": 2.926361683261661,
      "grad_norm": 2.546875,
      "learning_rate": 1.36373902817044e-06,
      "loss": 0.7622,
      "step": 834970
    },
    {
      "epoch": 2.9263967307685568,
      "grad_norm": 3.015625,
      "learning_rate": 1.3630899995067383e-06,
      "loss": 0.7921,
      "step": 834980
    },
    {
      "epoch": 2.9264317782754525,
      "grad_norm": 2.453125,
      "learning_rate": 1.3624409708430365e-06,
      "loss": 0.7627,
      "step": 834990
    },
    {
      "epoch": 2.926466825782348,
      "grad_norm": 2.96875,
      "learning_rate": 1.3617919421793345e-06,
      "loss": 0.7959,
      "step": 835000
    },
    {
      "epoch": 2.926466825782348,
      "eval_loss": 0.7498558163642883,
      "eval_runtime": 554.4073,
      "eval_samples_per_second": 686.203,
      "eval_steps_per_second": 57.184,
      "step": 835000
    },
    {
      "epoch": 2.9265018732892436,
      "grad_norm": 2.875,
      "learning_rate": 1.3611429135156325e-06,
      "loss": 0.8079,
      "step": 835010
    },
    {
      "epoch": 2.9265369207961394,
      "grad_norm": 2.828125,
      "learning_rate": 1.3604938848519307e-06,
      "loss": 0.7837,
      "step": 835020
    },
    {
      "epoch": 2.9265719683030347,
      "grad_norm": 3.21875,
      "learning_rate": 1.3598448561882287e-06,
      "loss": 0.757,
      "step": 835030
    },
    {
      "epoch": 2.9266070158099304,
      "grad_norm": 2.4375,
      "learning_rate": 1.359195827524527e-06,
      "loss": 0.7463,
      "step": 835040
    },
    {
      "epoch": 2.9266420633168257,
      "grad_norm": 2.9375,
      "learning_rate": 1.3585467988608251e-06,
      "loss": 0.8237,
      "step": 835050
    },
    {
      "epoch": 2.9266771108237215,
      "grad_norm": 3.171875,
      "learning_rate": 1.3578977701971231e-06,
      "loss": 0.944,
      "step": 835060
    },
    {
      "epoch": 2.9267121583306173,
      "grad_norm": 2.5,
      "learning_rate": 1.3572487415334211e-06,
      "loss": 0.7885,
      "step": 835070
    },
    {
      "epoch": 2.926747205837513,
      "grad_norm": 2.921875,
      "learning_rate": 1.3565997128697193e-06,
      "loss": 0.8335,
      "step": 835080
    },
    {
      "epoch": 2.9267822533444083,
      "grad_norm": 4.59375,
      "learning_rate": 1.3559506842060173e-06,
      "loss": 0.7996,
      "step": 835090
    },
    {
      "epoch": 2.926817300851304,
      "grad_norm": 2.84375,
      "learning_rate": 1.3553016555423153e-06,
      "loss": 0.8901,
      "step": 835100
    },
    {
      "epoch": 2.9268523483581994,
      "grad_norm": 2.703125,
      "learning_rate": 1.3546526268786135e-06,
      "loss": 0.7612,
      "step": 835110
    },
    {
      "epoch": 2.926887395865095,
      "grad_norm": 2.96875,
      "learning_rate": 1.3540035982149117e-06,
      "loss": 0.8555,
      "step": 835120
    },
    {
      "epoch": 2.926922443371991,
      "grad_norm": 3.28125,
      "learning_rate": 1.3533545695512097e-06,
      "loss": 0.7407,
      "step": 835130
    },
    {
      "epoch": 2.9269574908788862,
      "grad_norm": 2.546875,
      "learning_rate": 1.3527055408875077e-06,
      "loss": 0.8321,
      "step": 835140
    },
    {
      "epoch": 2.926992538385782,
      "grad_norm": 2.78125,
      "learning_rate": 1.352056512223806e-06,
      "loss": 0.8402,
      "step": 835150
    },
    {
      "epoch": 2.9270275858926773,
      "grad_norm": 2.90625,
      "learning_rate": 1.351407483560104e-06,
      "loss": 0.7731,
      "step": 835160
    },
    {
      "epoch": 2.927062633399573,
      "grad_norm": 2.953125,
      "learning_rate": 1.350758454896402e-06,
      "loss": 0.942,
      "step": 835170
    },
    {
      "epoch": 2.927097680906469,
      "grad_norm": 2.84375,
      "learning_rate": 1.3501094262327002e-06,
      "loss": 0.8735,
      "step": 835180
    },
    {
      "epoch": 2.9271327284133646,
      "grad_norm": 3.109375,
      "learning_rate": 1.3494603975689984e-06,
      "loss": 0.851,
      "step": 835190
    },
    {
      "epoch": 2.92716777592026,
      "grad_norm": 3.65625,
      "learning_rate": 1.3488113689052964e-06,
      "loss": 0.8104,
      "step": 835200
    },
    {
      "epoch": 2.9272028234271557,
      "grad_norm": 3.109375,
      "learning_rate": 1.3481623402415944e-06,
      "loss": 0.7675,
      "step": 835210
    },
    {
      "epoch": 2.927237870934051,
      "grad_norm": 2.90625,
      "learning_rate": 1.3475133115778926e-06,
      "loss": 0.8138,
      "step": 835220
    },
    {
      "epoch": 2.9272729184409467,
      "grad_norm": 3.25,
      "learning_rate": 1.3468642829141906e-06,
      "loss": 0.8557,
      "step": 835230
    },
    {
      "epoch": 2.9273079659478425,
      "grad_norm": 3.25,
      "learning_rate": 1.3462152542504888e-06,
      "loss": 0.7609,
      "step": 835240
    },
    {
      "epoch": 2.927343013454738,
      "grad_norm": 2.8125,
      "learning_rate": 1.345566225586787e-06,
      "loss": 0.7788,
      "step": 835250
    },
    {
      "epoch": 2.9273780609616336,
      "grad_norm": 2.703125,
      "learning_rate": 1.344917196923085e-06,
      "loss": 0.7915,
      "step": 835260
    },
    {
      "epoch": 2.927413108468529,
      "grad_norm": 2.9375,
      "learning_rate": 1.344268168259383e-06,
      "loss": 0.7818,
      "step": 835270
    },
    {
      "epoch": 2.9274481559754246,
      "grad_norm": 2.890625,
      "learning_rate": 1.3436191395956812e-06,
      "loss": 0.7922,
      "step": 835280
    },
    {
      "epoch": 2.9274832034823204,
      "grad_norm": 2.6875,
      "learning_rate": 1.3429701109319792e-06,
      "loss": 0.7642,
      "step": 835290
    },
    {
      "epoch": 2.927518250989216,
      "grad_norm": 3.046875,
      "learning_rate": 1.3423210822682772e-06,
      "loss": 0.8213,
      "step": 835300
    },
    {
      "epoch": 2.9275532984961115,
      "grad_norm": 2.625,
      "learning_rate": 1.3416720536045754e-06,
      "loss": 0.7803,
      "step": 835310
    },
    {
      "epoch": 2.927588346003007,
      "grad_norm": 3.125,
      "learning_rate": 1.3410230249408736e-06,
      "loss": 0.8571,
      "step": 835320
    },
    {
      "epoch": 2.9276233935099025,
      "grad_norm": 3.0,
      "learning_rate": 1.3403739962771716e-06,
      "loss": 0.7926,
      "step": 835330
    },
    {
      "epoch": 2.9276584410167983,
      "grad_norm": 2.8125,
      "learning_rate": 1.3397249676134696e-06,
      "loss": 0.6934,
      "step": 835340
    },
    {
      "epoch": 2.927693488523694,
      "grad_norm": 3.3125,
      "learning_rate": 1.3390759389497679e-06,
      "loss": 0.8455,
      "step": 835350
    },
    {
      "epoch": 2.9277285360305894,
      "grad_norm": 3.390625,
      "learning_rate": 1.3384269102860659e-06,
      "loss": 0.8187,
      "step": 835360
    },
    {
      "epoch": 2.927763583537485,
      "grad_norm": 2.859375,
      "learning_rate": 1.3377778816223639e-06,
      "loss": 0.7921,
      "step": 835370
    },
    {
      "epoch": 2.9277986310443804,
      "grad_norm": 3.046875,
      "learning_rate": 1.3371288529586623e-06,
      "loss": 0.792,
      "step": 835380
    },
    {
      "epoch": 2.927833678551276,
      "grad_norm": 2.921875,
      "learning_rate": 1.3364798242949603e-06,
      "loss": 0.8264,
      "step": 835390
    },
    {
      "epoch": 2.927868726058172,
      "grad_norm": 2.96875,
      "learning_rate": 1.3358307956312583e-06,
      "loss": 0.7706,
      "step": 835400
    },
    {
      "epoch": 2.9279037735650677,
      "grad_norm": 3.25,
      "learning_rate": 1.3351817669675565e-06,
      "loss": 0.8097,
      "step": 835410
    },
    {
      "epoch": 2.927938821071963,
      "grad_norm": 2.984375,
      "learning_rate": 1.3345327383038545e-06,
      "loss": 0.8068,
      "step": 835420
    },
    {
      "epoch": 2.9279738685788588,
      "grad_norm": 2.9375,
      "learning_rate": 1.3338837096401525e-06,
      "loss": 0.866,
      "step": 835430
    },
    {
      "epoch": 2.928008916085754,
      "grad_norm": 2.9375,
      "learning_rate": 1.3332346809764507e-06,
      "loss": 0.8803,
      "step": 835440
    },
    {
      "epoch": 2.92804396359265,
      "grad_norm": 3.25,
      "learning_rate": 1.332585652312749e-06,
      "loss": 0.773,
      "step": 835450
    },
    {
      "epoch": 2.9280790110995456,
      "grad_norm": 2.828125,
      "learning_rate": 1.331936623649047e-06,
      "loss": 0.8372,
      "step": 835460
    },
    {
      "epoch": 2.928114058606441,
      "grad_norm": 3.5,
      "learning_rate": 1.331287594985345e-06,
      "loss": 0.7705,
      "step": 835470
    },
    {
      "epoch": 2.9281491061133367,
      "grad_norm": 2.75,
      "learning_rate": 1.3306385663216431e-06,
      "loss": 0.7752,
      "step": 835480
    },
    {
      "epoch": 2.928184153620232,
      "grad_norm": 3.015625,
      "learning_rate": 1.3299895376579411e-06,
      "loss": 0.8305,
      "step": 835490
    },
    {
      "epoch": 2.9282192011271277,
      "grad_norm": 2.640625,
      "learning_rate": 1.3293405089942391e-06,
      "loss": 0.8529,
      "step": 835500
    },
    {
      "epoch": 2.9282542486340235,
      "grad_norm": 2.890625,
      "learning_rate": 1.3286914803305376e-06,
      "loss": 0.74,
      "step": 835510
    },
    {
      "epoch": 2.9282892961409193,
      "grad_norm": 2.796875,
      "learning_rate": 1.3280424516668356e-06,
      "loss": 0.8406,
      "step": 835520
    },
    {
      "epoch": 2.9283243436478146,
      "grad_norm": 3.15625,
      "learning_rate": 1.3273934230031336e-06,
      "loss": 0.8725,
      "step": 835530
    },
    {
      "epoch": 2.9283593911547103,
      "grad_norm": 3.234375,
      "learning_rate": 1.3267443943394318e-06,
      "loss": 0.823,
      "step": 835540
    },
    {
      "epoch": 2.9283944386616056,
      "grad_norm": 3.0625,
      "learning_rate": 1.3260953656757298e-06,
      "loss": 0.7663,
      "step": 835550
    },
    {
      "epoch": 2.9284294861685014,
      "grad_norm": 2.78125,
      "learning_rate": 1.3254463370120278e-06,
      "loss": 0.7805,
      "step": 835560
    },
    {
      "epoch": 2.928464533675397,
      "grad_norm": 2.84375,
      "learning_rate": 1.324797308348326e-06,
      "loss": 0.8538,
      "step": 835570
    },
    {
      "epoch": 2.9284995811822925,
      "grad_norm": 2.78125,
      "learning_rate": 1.3241482796846242e-06,
      "loss": 0.8029,
      "step": 835580
    },
    {
      "epoch": 2.9285346286891882,
      "grad_norm": 3.171875,
      "learning_rate": 1.3234992510209222e-06,
      "loss": 0.7988,
      "step": 835590
    },
    {
      "epoch": 2.9285696761960835,
      "grad_norm": 2.515625,
      "learning_rate": 1.3228502223572202e-06,
      "loss": 0.7798,
      "step": 835600
    },
    {
      "epoch": 2.9286047237029793,
      "grad_norm": 2.34375,
      "learning_rate": 1.3222011936935184e-06,
      "loss": 0.7166,
      "step": 835610
    },
    {
      "epoch": 2.928639771209875,
      "grad_norm": 3.359375,
      "learning_rate": 1.3215521650298164e-06,
      "loss": 0.821,
      "step": 835620
    },
    {
      "epoch": 2.928674818716771,
      "grad_norm": 2.859375,
      "learning_rate": 1.3209031363661144e-06,
      "loss": 0.7986,
      "step": 835630
    },
    {
      "epoch": 2.928709866223666,
      "grad_norm": 2.75,
      "learning_rate": 1.3202541077024126e-06,
      "loss": 0.7152,
      "step": 835640
    },
    {
      "epoch": 2.928744913730562,
      "grad_norm": 3.15625,
      "learning_rate": 1.3196050790387108e-06,
      "loss": 0.8111,
      "step": 835650
    },
    {
      "epoch": 2.928779961237457,
      "grad_norm": 2.6875,
      "learning_rate": 1.3189560503750088e-06,
      "loss": 0.8245,
      "step": 835660
    },
    {
      "epoch": 2.928815008744353,
      "grad_norm": 2.890625,
      "learning_rate": 1.318307021711307e-06,
      "loss": 0.8171,
      "step": 835670
    },
    {
      "epoch": 2.9288500562512487,
      "grad_norm": 2.921875,
      "learning_rate": 1.317657993047605e-06,
      "loss": 0.91,
      "step": 835680
    },
    {
      "epoch": 2.928885103758144,
      "grad_norm": 3.3125,
      "learning_rate": 1.317008964383903e-06,
      "loss": 0.8946,
      "step": 835690
    },
    {
      "epoch": 2.92892015126504,
      "grad_norm": 2.953125,
      "learning_rate": 1.3163599357202012e-06,
      "loss": 0.7276,
      "step": 835700
    },
    {
      "epoch": 2.928955198771935,
      "grad_norm": 3.390625,
      "learning_rate": 1.3157109070564995e-06,
      "loss": 0.8794,
      "step": 835710
    },
    {
      "epoch": 2.928990246278831,
      "grad_norm": 2.984375,
      "learning_rate": 1.3150618783927975e-06,
      "loss": 0.7897,
      "step": 835720
    },
    {
      "epoch": 2.9290252937857266,
      "grad_norm": 3.25,
      "learning_rate": 1.3144128497290955e-06,
      "loss": 0.8422,
      "step": 835730
    },
    {
      "epoch": 2.9290603412926224,
      "grad_norm": 2.828125,
      "learning_rate": 1.3137638210653937e-06,
      "loss": 0.7903,
      "step": 835740
    },
    {
      "epoch": 2.9290953887995177,
      "grad_norm": 3.0625,
      "learning_rate": 1.3131147924016917e-06,
      "loss": 0.8151,
      "step": 835750
    },
    {
      "epoch": 2.9291304363064135,
      "grad_norm": 2.875,
      "learning_rate": 1.3124657637379897e-06,
      "loss": 0.788,
      "step": 835760
    },
    {
      "epoch": 2.9291654838133088,
      "grad_norm": 2.921875,
      "learning_rate": 1.3118167350742879e-06,
      "loss": 0.8914,
      "step": 835770
    },
    {
      "epoch": 2.9292005313202045,
      "grad_norm": 2.578125,
      "learning_rate": 1.311167706410586e-06,
      "loss": 0.7546,
      "step": 835780
    },
    {
      "epoch": 2.9292355788271003,
      "grad_norm": 2.6875,
      "learning_rate": 1.310518677746884e-06,
      "loss": 0.7572,
      "step": 835790
    },
    {
      "epoch": 2.9292706263339956,
      "grad_norm": 3.25,
      "learning_rate": 1.309869649083182e-06,
      "loss": 0.7991,
      "step": 835800
    },
    {
      "epoch": 2.9293056738408914,
      "grad_norm": 2.75,
      "learning_rate": 1.3092206204194803e-06,
      "loss": 0.7677,
      "step": 835810
    },
    {
      "epoch": 2.9293407213477867,
      "grad_norm": 2.890625,
      "learning_rate": 1.3085715917557783e-06,
      "loss": 0.742,
      "step": 835820
    },
    {
      "epoch": 2.9293757688546824,
      "grad_norm": 3.296875,
      "learning_rate": 1.3079225630920763e-06,
      "loss": 0.848,
      "step": 835830
    },
    {
      "epoch": 2.929410816361578,
      "grad_norm": 3.046875,
      "learning_rate": 1.3072735344283745e-06,
      "loss": 0.7826,
      "step": 835840
    },
    {
      "epoch": 2.929445863868474,
      "grad_norm": 3.0,
      "learning_rate": 1.3066245057646727e-06,
      "loss": 0.8743,
      "step": 835850
    },
    {
      "epoch": 2.9294809113753693,
      "grad_norm": 2.765625,
      "learning_rate": 1.3059754771009707e-06,
      "loss": 0.8024,
      "step": 835860
    },
    {
      "epoch": 2.929515958882265,
      "grad_norm": 2.8125,
      "learning_rate": 1.305326448437269e-06,
      "loss": 0.8271,
      "step": 835870
    },
    {
      "epoch": 2.9295510063891603,
      "grad_norm": 2.71875,
      "learning_rate": 1.304677419773567e-06,
      "loss": 0.758,
      "step": 835880
    },
    {
      "epoch": 2.929586053896056,
      "grad_norm": 3.109375,
      "learning_rate": 1.304028391109865e-06,
      "loss": 0.8556,
      "step": 835890
    },
    {
      "epoch": 2.929621101402952,
      "grad_norm": 3.046875,
      "learning_rate": 1.3033793624461631e-06,
      "loss": 0.8279,
      "step": 835900
    },
    {
      "epoch": 2.929656148909847,
      "grad_norm": 2.96875,
      "learning_rate": 1.3027303337824614e-06,
      "loss": 0.8159,
      "step": 835910
    },
    {
      "epoch": 2.929691196416743,
      "grad_norm": 2.765625,
      "learning_rate": 1.3020813051187594e-06,
      "loss": 0.7753,
      "step": 835920
    },
    {
      "epoch": 2.9297262439236382,
      "grad_norm": 2.75,
      "learning_rate": 1.3014322764550574e-06,
      "loss": 0.8677,
      "step": 835930
    },
    {
      "epoch": 2.929761291430534,
      "grad_norm": 2.859375,
      "learning_rate": 1.3007832477913556e-06,
      "loss": 0.7542,
      "step": 835940
    },
    {
      "epoch": 2.9297963389374297,
      "grad_norm": 2.6875,
      "learning_rate": 1.3001342191276536e-06,
      "loss": 0.7643,
      "step": 835950
    },
    {
      "epoch": 2.9298313864443255,
      "grad_norm": 2.859375,
      "learning_rate": 1.2994851904639516e-06,
      "loss": 0.7779,
      "step": 835960
    },
    {
      "epoch": 2.929866433951221,
      "grad_norm": 2.375,
      "learning_rate": 1.2988361618002498e-06,
      "loss": 0.7635,
      "step": 835970
    },
    {
      "epoch": 2.9299014814581166,
      "grad_norm": 2.875,
      "learning_rate": 1.298187133136548e-06,
      "loss": 0.7778,
      "step": 835980
    },
    {
      "epoch": 2.929936528965012,
      "grad_norm": 2.984375,
      "learning_rate": 1.297538104472846e-06,
      "loss": 0.7951,
      "step": 835990
    },
    {
      "epoch": 2.9299715764719076,
      "grad_norm": 2.796875,
      "learning_rate": 1.2968890758091442e-06,
      "loss": 0.7535,
      "step": 836000
    },
    {
      "epoch": 2.9300066239788034,
      "grad_norm": 3.203125,
      "learning_rate": 1.2962400471454422e-06,
      "loss": 0.752,
      "step": 836010
    },
    {
      "epoch": 2.9300416714856987,
      "grad_norm": 3.4375,
      "learning_rate": 1.2955910184817402e-06,
      "loss": 0.748,
      "step": 836020
    },
    {
      "epoch": 2.9300767189925945,
      "grad_norm": 3.359375,
      "learning_rate": 1.2949419898180384e-06,
      "loss": 0.9362,
      "step": 836030
    },
    {
      "epoch": 2.93011176649949,
      "grad_norm": 2.78125,
      "learning_rate": 1.2942929611543364e-06,
      "loss": 0.7906,
      "step": 836040
    },
    {
      "epoch": 2.9301468140063855,
      "grad_norm": 2.859375,
      "learning_rate": 1.2936439324906346e-06,
      "loss": 0.7925,
      "step": 836050
    },
    {
      "epoch": 2.9301818615132813,
      "grad_norm": 2.53125,
      "learning_rate": 1.2929949038269326e-06,
      "loss": 0.7876,
      "step": 836060
    },
    {
      "epoch": 2.930216909020177,
      "grad_norm": 3.359375,
      "learning_rate": 1.2923458751632308e-06,
      "loss": 0.8017,
      "step": 836070
    },
    {
      "epoch": 2.9302519565270724,
      "grad_norm": 2.734375,
      "learning_rate": 1.2916968464995288e-06,
      "loss": 0.7364,
      "step": 836080
    },
    {
      "epoch": 2.930287004033968,
      "grad_norm": 3.09375,
      "learning_rate": 1.2910478178358268e-06,
      "loss": 0.8216,
      "step": 836090
    },
    {
      "epoch": 2.9303220515408634,
      "grad_norm": 2.890625,
      "learning_rate": 1.290398789172125e-06,
      "loss": 0.8092,
      "step": 836100
    },
    {
      "epoch": 2.930357099047759,
      "grad_norm": 2.703125,
      "learning_rate": 1.2897497605084233e-06,
      "loss": 0.8026,
      "step": 836110
    },
    {
      "epoch": 2.930392146554655,
      "grad_norm": 2.90625,
      "learning_rate": 1.2891007318447213e-06,
      "loss": 0.8139,
      "step": 836120
    },
    {
      "epoch": 2.9304271940615503,
      "grad_norm": 2.578125,
      "learning_rate": 1.2884517031810195e-06,
      "loss": 0.7469,
      "step": 836130
    },
    {
      "epoch": 2.930462241568446,
      "grad_norm": 3.0,
      "learning_rate": 1.2878026745173175e-06,
      "loss": 0.7203,
      "step": 836140
    },
    {
      "epoch": 2.9304972890753413,
      "grad_norm": 2.828125,
      "learning_rate": 1.2871536458536155e-06,
      "loss": 0.8106,
      "step": 836150
    },
    {
      "epoch": 2.930532336582237,
      "grad_norm": 2.46875,
      "learning_rate": 1.2865046171899137e-06,
      "loss": 0.7892,
      "step": 836160
    },
    {
      "epoch": 2.930567384089133,
      "grad_norm": 3.09375,
      "learning_rate": 1.2858555885262117e-06,
      "loss": 0.8553,
      "step": 836170
    },
    {
      "epoch": 2.9306024315960286,
      "grad_norm": 2.59375,
      "learning_rate": 1.28520655986251e-06,
      "loss": 0.7704,
      "step": 836180
    },
    {
      "epoch": 2.930637479102924,
      "grad_norm": 3.046875,
      "learning_rate": 1.284557531198808e-06,
      "loss": 0.8078,
      "step": 836190
    },
    {
      "epoch": 2.9306725266098197,
      "grad_norm": 2.796875,
      "learning_rate": 1.2839085025351061e-06,
      "loss": 0.8191,
      "step": 836200
    },
    {
      "epoch": 2.930707574116715,
      "grad_norm": 3.109375,
      "learning_rate": 1.2832594738714041e-06,
      "loss": 0.7935,
      "step": 836210
    },
    {
      "epoch": 2.9307426216236108,
      "grad_norm": 2.8125,
      "learning_rate": 1.2826104452077021e-06,
      "loss": 0.8294,
      "step": 836220
    },
    {
      "epoch": 2.9307776691305065,
      "grad_norm": 2.953125,
      "learning_rate": 1.2819614165440003e-06,
      "loss": 0.7636,
      "step": 836230
    },
    {
      "epoch": 2.930812716637402,
      "grad_norm": 2.828125,
      "learning_rate": 1.2813123878802983e-06,
      "loss": 0.8287,
      "step": 836240
    },
    {
      "epoch": 2.9308477641442976,
      "grad_norm": 2.875,
      "learning_rate": 1.2806633592165965e-06,
      "loss": 0.7892,
      "step": 836250
    },
    {
      "epoch": 2.9308828116511934,
      "grad_norm": 2.890625,
      "learning_rate": 1.2800143305528947e-06,
      "loss": 0.8131,
      "step": 836260
    },
    {
      "epoch": 2.9309178591580887,
      "grad_norm": 3.109375,
      "learning_rate": 1.2793653018891927e-06,
      "loss": 0.7334,
      "step": 836270
    },
    {
      "epoch": 2.9309529066649844,
      "grad_norm": 3.203125,
      "learning_rate": 1.2787162732254907e-06,
      "loss": 0.7947,
      "step": 836280
    },
    {
      "epoch": 2.93098795417188,
      "grad_norm": 3.109375,
      "learning_rate": 1.278067244561789e-06,
      "loss": 0.8185,
      "step": 836290
    },
    {
      "epoch": 2.9310230016787755,
      "grad_norm": 3.1875,
      "learning_rate": 1.277418215898087e-06,
      "loss": 0.8295,
      "step": 836300
    },
    {
      "epoch": 2.9310580491856713,
      "grad_norm": 2.859375,
      "learning_rate": 1.276769187234385e-06,
      "loss": 0.8229,
      "step": 836310
    },
    {
      "epoch": 2.9310930966925666,
      "grad_norm": 3.0,
      "learning_rate": 1.2761201585706832e-06,
      "loss": 0.7917,
      "step": 836320
    },
    {
      "epoch": 2.9311281441994623,
      "grad_norm": 3.125,
      "learning_rate": 1.2754711299069814e-06,
      "loss": 0.7813,
      "step": 836330
    },
    {
      "epoch": 2.931163191706358,
      "grad_norm": 2.625,
      "learning_rate": 1.2748221012432794e-06,
      "loss": 0.7652,
      "step": 836340
    },
    {
      "epoch": 2.931198239213254,
      "grad_norm": 3.046875,
      "learning_rate": 1.2741730725795774e-06,
      "loss": 0.8072,
      "step": 836350
    },
    {
      "epoch": 2.931233286720149,
      "grad_norm": 2.859375,
      "learning_rate": 1.2735240439158756e-06,
      "loss": 0.775,
      "step": 836360
    },
    {
      "epoch": 2.931268334227045,
      "grad_norm": 3.203125,
      "learning_rate": 1.2728750152521736e-06,
      "loss": 0.7887,
      "step": 836370
    },
    {
      "epoch": 2.9313033817339402,
      "grad_norm": 3.140625,
      "learning_rate": 1.2722259865884718e-06,
      "loss": 0.8464,
      "step": 836380
    },
    {
      "epoch": 2.931338429240836,
      "grad_norm": 3.453125,
      "learning_rate": 1.2715769579247698e-06,
      "loss": 0.8625,
      "step": 836390
    },
    {
      "epoch": 2.9313734767477317,
      "grad_norm": 3.171875,
      "learning_rate": 1.270927929261068e-06,
      "loss": 0.7626,
      "step": 836400
    },
    {
      "epoch": 2.931408524254627,
      "grad_norm": 3.015625,
      "learning_rate": 1.270278900597366e-06,
      "loss": 0.7736,
      "step": 836410
    },
    {
      "epoch": 2.931443571761523,
      "grad_norm": 2.984375,
      "learning_rate": 1.269629871933664e-06,
      "loss": 0.8667,
      "step": 836420
    },
    {
      "epoch": 2.931478619268418,
      "grad_norm": 2.859375,
      "learning_rate": 1.2689808432699622e-06,
      "loss": 0.805,
      "step": 836430
    },
    {
      "epoch": 2.931513666775314,
      "grad_norm": 2.921875,
      "learning_rate": 1.2683318146062602e-06,
      "loss": 0.7795,
      "step": 836440
    },
    {
      "epoch": 2.9315487142822096,
      "grad_norm": 3.359375,
      "learning_rate": 1.2676827859425584e-06,
      "loss": 0.7798,
      "step": 836450
    },
    {
      "epoch": 2.9315837617891054,
      "grad_norm": 2.765625,
      "learning_rate": 1.2670337572788566e-06,
      "loss": 0.7488,
      "step": 836460
    },
    {
      "epoch": 2.9316188092960007,
      "grad_norm": 3.03125,
      "learning_rate": 1.2663847286151546e-06,
      "loss": 0.7198,
      "step": 836470
    },
    {
      "epoch": 2.9316538568028965,
      "grad_norm": 2.609375,
      "learning_rate": 1.2657356999514526e-06,
      "loss": 0.7899,
      "step": 836480
    },
    {
      "epoch": 2.931688904309792,
      "grad_norm": 2.5625,
      "learning_rate": 1.2650866712877509e-06,
      "loss": 0.8249,
      "step": 836490
    },
    {
      "epoch": 2.9317239518166875,
      "grad_norm": 2.8125,
      "learning_rate": 1.2644376426240489e-06,
      "loss": 0.765,
      "step": 836500
    },
    {
      "epoch": 2.9317589993235833,
      "grad_norm": 3.15625,
      "learning_rate": 1.2637886139603469e-06,
      "loss": 0.756,
      "step": 836510
    },
    {
      "epoch": 2.9317940468304786,
      "grad_norm": 3.0625,
      "learning_rate": 1.263139585296645e-06,
      "loss": 0.7975,
      "step": 836520
    },
    {
      "epoch": 2.9318290943373744,
      "grad_norm": 3.0625,
      "learning_rate": 1.2624905566329433e-06,
      "loss": 0.7973,
      "step": 836530
    },
    {
      "epoch": 2.9318641418442697,
      "grad_norm": 3.390625,
      "learning_rate": 1.2618415279692413e-06,
      "loss": 0.8263,
      "step": 836540
    },
    {
      "epoch": 2.9318991893511654,
      "grad_norm": 2.75,
      "learning_rate": 1.2611924993055393e-06,
      "loss": 0.833,
      "step": 836550
    },
    {
      "epoch": 2.931934236858061,
      "grad_norm": 3.15625,
      "learning_rate": 1.2605434706418375e-06,
      "loss": 0.7541,
      "step": 836560
    },
    {
      "epoch": 2.931969284364957,
      "grad_norm": 2.5,
      "learning_rate": 1.2598944419781355e-06,
      "loss": 0.742,
      "step": 836570
    },
    {
      "epoch": 2.9320043318718523,
      "grad_norm": 2.8125,
      "learning_rate": 1.2592454133144337e-06,
      "loss": 0.7777,
      "step": 836580
    },
    {
      "epoch": 2.932039379378748,
      "grad_norm": 3.40625,
      "learning_rate": 1.258596384650732e-06,
      "loss": 0.8536,
      "step": 836590
    },
    {
      "epoch": 2.9320744268856433,
      "grad_norm": 2.75,
      "learning_rate": 1.25794735598703e-06,
      "loss": 0.8399,
      "step": 836600
    },
    {
      "epoch": 2.932109474392539,
      "grad_norm": 2.78125,
      "learning_rate": 1.257298327323328e-06,
      "loss": 0.7484,
      "step": 836610
    },
    {
      "epoch": 2.932144521899435,
      "grad_norm": 3.25,
      "learning_rate": 1.2566492986596261e-06,
      "loss": 0.7204,
      "step": 836620
    },
    {
      "epoch": 2.93217956940633,
      "grad_norm": 2.9375,
      "learning_rate": 1.2560002699959241e-06,
      "loss": 0.7707,
      "step": 836630
    },
    {
      "epoch": 2.932214616913226,
      "grad_norm": 3.421875,
      "learning_rate": 1.2553512413322221e-06,
      "loss": 0.6861,
      "step": 836640
    },
    {
      "epoch": 2.9322496644201212,
      "grad_norm": 2.6875,
      "learning_rate": 1.2547022126685203e-06,
      "loss": 0.7823,
      "step": 836650
    },
    {
      "epoch": 2.932284711927017,
      "grad_norm": 3.421875,
      "learning_rate": 1.2540531840048186e-06,
      "loss": 0.7725,
      "step": 836660
    },
    {
      "epoch": 2.9323197594339128,
      "grad_norm": 2.859375,
      "learning_rate": 1.2534041553411166e-06,
      "loss": 0.7799,
      "step": 836670
    },
    {
      "epoch": 2.9323548069408085,
      "grad_norm": 2.296875,
      "learning_rate": 1.2527551266774146e-06,
      "loss": 0.7051,
      "step": 836680
    },
    {
      "epoch": 2.932389854447704,
      "grad_norm": 2.828125,
      "learning_rate": 1.2521060980137128e-06,
      "loss": 0.8016,
      "step": 836690
    },
    {
      "epoch": 2.9324249019545996,
      "grad_norm": 3.03125,
      "learning_rate": 1.2514570693500108e-06,
      "loss": 0.791,
      "step": 836700
    },
    {
      "epoch": 2.932459949461495,
      "grad_norm": 3.421875,
      "learning_rate": 1.2508080406863088e-06,
      "loss": 0.7833,
      "step": 836710
    },
    {
      "epoch": 2.9324949969683907,
      "grad_norm": 2.8125,
      "learning_rate": 1.2501590120226072e-06,
      "loss": 0.7715,
      "step": 836720
    },
    {
      "epoch": 2.9325300444752864,
      "grad_norm": 3.265625,
      "learning_rate": 1.2495099833589052e-06,
      "loss": 0.8322,
      "step": 836730
    },
    {
      "epoch": 2.9325650919821817,
      "grad_norm": 2.875,
      "learning_rate": 1.2488609546952032e-06,
      "loss": 0.7603,
      "step": 836740
    },
    {
      "epoch": 2.9326001394890775,
      "grad_norm": 2.953125,
      "learning_rate": 1.2482119260315014e-06,
      "loss": 0.8633,
      "step": 836750
    },
    {
      "epoch": 2.932635186995973,
      "grad_norm": 2.890625,
      "learning_rate": 1.2475628973677994e-06,
      "loss": 0.827,
      "step": 836760
    },
    {
      "epoch": 2.9326702345028686,
      "grad_norm": 2.9375,
      "learning_rate": 1.2469138687040974e-06,
      "loss": 0.7759,
      "step": 836770
    },
    {
      "epoch": 2.9327052820097643,
      "grad_norm": 2.8125,
      "learning_rate": 1.2462648400403956e-06,
      "loss": 0.8847,
      "step": 836780
    },
    {
      "epoch": 2.93274032951666,
      "grad_norm": 3.234375,
      "learning_rate": 1.2456158113766938e-06,
      "loss": 0.8582,
      "step": 836790
    },
    {
      "epoch": 2.9327753770235554,
      "grad_norm": 2.8125,
      "learning_rate": 1.2449667827129918e-06,
      "loss": 0.8446,
      "step": 836800
    },
    {
      "epoch": 2.932810424530451,
      "grad_norm": 3.25,
      "learning_rate": 1.2443177540492898e-06,
      "loss": 0.8837,
      "step": 836810
    },
    {
      "epoch": 2.9328454720373465,
      "grad_norm": 2.78125,
      "learning_rate": 1.243668725385588e-06,
      "loss": 0.7514,
      "step": 836820
    },
    {
      "epoch": 2.9328805195442422,
      "grad_norm": 3.5,
      "learning_rate": 1.243019696721886e-06,
      "loss": 0.7635,
      "step": 836830
    },
    {
      "epoch": 2.932915567051138,
      "grad_norm": 3.296875,
      "learning_rate": 1.242370668058184e-06,
      "loss": 0.7063,
      "step": 836840
    },
    {
      "epoch": 2.9329506145580333,
      "grad_norm": 2.78125,
      "learning_rate": 1.2417216393944825e-06,
      "loss": 0.7895,
      "step": 836850
    },
    {
      "epoch": 2.932985662064929,
      "grad_norm": 2.828125,
      "learning_rate": 1.2410726107307805e-06,
      "loss": 0.787,
      "step": 836860
    },
    {
      "epoch": 2.9330207095718244,
      "grad_norm": 3.171875,
      "learning_rate": 1.2404235820670785e-06,
      "loss": 0.8177,
      "step": 836870
    },
    {
      "epoch": 2.93305575707872,
      "grad_norm": 3.21875,
      "learning_rate": 1.2397745534033767e-06,
      "loss": 0.8134,
      "step": 836880
    },
    {
      "epoch": 2.933090804585616,
      "grad_norm": 2.921875,
      "learning_rate": 1.2391255247396747e-06,
      "loss": 0.7906,
      "step": 836890
    },
    {
      "epoch": 2.9331258520925116,
      "grad_norm": 2.9375,
      "learning_rate": 1.2384764960759727e-06,
      "loss": 0.7354,
      "step": 836900
    },
    {
      "epoch": 2.933160899599407,
      "grad_norm": 2.8125,
      "learning_rate": 1.2378274674122709e-06,
      "loss": 0.7379,
      "step": 836910
    },
    {
      "epoch": 2.9331959471063027,
      "grad_norm": 2.71875,
      "learning_rate": 1.237178438748569e-06,
      "loss": 0.8306,
      "step": 836920
    },
    {
      "epoch": 2.933230994613198,
      "grad_norm": 2.96875,
      "learning_rate": 1.236529410084867e-06,
      "loss": 0.8017,
      "step": 836930
    },
    {
      "epoch": 2.933266042120094,
      "grad_norm": 2.6875,
      "learning_rate": 1.235880381421165e-06,
      "loss": 0.822,
      "step": 836940
    },
    {
      "epoch": 2.9333010896269895,
      "grad_norm": 2.765625,
      "learning_rate": 1.2352313527574633e-06,
      "loss": 0.766,
      "step": 836950
    },
    {
      "epoch": 2.933336137133885,
      "grad_norm": 2.765625,
      "learning_rate": 1.2345823240937613e-06,
      "loss": 0.8095,
      "step": 836960
    },
    {
      "epoch": 2.9333711846407806,
      "grad_norm": 3.109375,
      "learning_rate": 1.2339332954300593e-06,
      "loss": 0.8376,
      "step": 836970
    },
    {
      "epoch": 2.933406232147676,
      "grad_norm": 2.8125,
      "learning_rate": 1.2332842667663575e-06,
      "loss": 0.7716,
      "step": 836980
    },
    {
      "epoch": 2.9334412796545717,
      "grad_norm": 3.25,
      "learning_rate": 1.2326352381026557e-06,
      "loss": 0.7578,
      "step": 836990
    },
    {
      "epoch": 2.9334763271614674,
      "grad_norm": 2.734375,
      "learning_rate": 1.2319862094389537e-06,
      "loss": 0.8818,
      "step": 837000
    },
    {
      "epoch": 2.933511374668363,
      "grad_norm": 2.515625,
      "learning_rate": 1.2313371807752517e-06,
      "loss": 0.8266,
      "step": 837010
    },
    {
      "epoch": 2.9335464221752585,
      "grad_norm": 3.328125,
      "learning_rate": 1.23068815211155e-06,
      "loss": 0.7306,
      "step": 837020
    },
    {
      "epoch": 2.9335814696821543,
      "grad_norm": 2.984375,
      "learning_rate": 1.230039123447848e-06,
      "loss": 0.7716,
      "step": 837030
    },
    {
      "epoch": 2.9336165171890496,
      "grad_norm": 2.984375,
      "learning_rate": 1.229390094784146e-06,
      "loss": 0.7875,
      "step": 837040
    },
    {
      "epoch": 2.9336515646959453,
      "grad_norm": 2.75,
      "learning_rate": 1.2287410661204444e-06,
      "loss": 0.7599,
      "step": 837050
    },
    {
      "epoch": 2.933686612202841,
      "grad_norm": 3.28125,
      "learning_rate": 1.2280920374567424e-06,
      "loss": 0.779,
      "step": 837060
    },
    {
      "epoch": 2.9337216597097364,
      "grad_norm": 2.4375,
      "learning_rate": 1.2274430087930404e-06,
      "loss": 0.7878,
      "step": 837070
    },
    {
      "epoch": 2.933756707216632,
      "grad_norm": 2.609375,
      "learning_rate": 1.2267939801293386e-06,
      "loss": 0.8202,
      "step": 837080
    },
    {
      "epoch": 2.9337917547235275,
      "grad_norm": 2.9375,
      "learning_rate": 1.2261449514656366e-06,
      "loss": 0.7536,
      "step": 837090
    },
    {
      "epoch": 2.9338268022304232,
      "grad_norm": 3.0,
      "learning_rate": 1.2254959228019346e-06,
      "loss": 0.7473,
      "step": 837100
    },
    {
      "epoch": 2.933861849737319,
      "grad_norm": 3.09375,
      "learning_rate": 1.2248468941382328e-06,
      "loss": 0.8205,
      "step": 837110
    },
    {
      "epoch": 2.9338968972442148,
      "grad_norm": 2.546875,
      "learning_rate": 1.224197865474531e-06,
      "loss": 0.7468,
      "step": 837120
    },
    {
      "epoch": 2.93393194475111,
      "grad_norm": 2.890625,
      "learning_rate": 1.223548836810829e-06,
      "loss": 0.7425,
      "step": 837130
    },
    {
      "epoch": 2.933966992258006,
      "grad_norm": 3.1875,
      "learning_rate": 1.222899808147127e-06,
      "loss": 0.8384,
      "step": 837140
    },
    {
      "epoch": 2.934002039764901,
      "grad_norm": 3.390625,
      "learning_rate": 1.2222507794834252e-06,
      "loss": 0.7521,
      "step": 837150
    },
    {
      "epoch": 2.934037087271797,
      "grad_norm": 2.734375,
      "learning_rate": 1.2216017508197232e-06,
      "loss": 0.8184,
      "step": 837160
    },
    {
      "epoch": 2.9340721347786927,
      "grad_norm": 2.75,
      "learning_rate": 1.2209527221560212e-06,
      "loss": 0.7359,
      "step": 837170
    },
    {
      "epoch": 2.934107182285588,
      "grad_norm": 3.203125,
      "learning_rate": 1.2203036934923194e-06,
      "loss": 0.7884,
      "step": 837180
    },
    {
      "epoch": 2.9341422297924837,
      "grad_norm": 3.078125,
      "learning_rate": 1.2196546648286176e-06,
      "loss": 0.8311,
      "step": 837190
    },
    {
      "epoch": 2.934177277299379,
      "grad_norm": 2.9375,
      "learning_rate": 1.2190056361649156e-06,
      "loss": 0.7573,
      "step": 837200
    },
    {
      "epoch": 2.934212324806275,
      "grad_norm": 2.75,
      "learning_rate": 1.2183566075012138e-06,
      "loss": 0.7597,
      "step": 837210
    },
    {
      "epoch": 2.9342473723131706,
      "grad_norm": 2.515625,
      "learning_rate": 1.2177075788375118e-06,
      "loss": 0.7022,
      "step": 837220
    },
    {
      "epoch": 2.9342824198200663,
      "grad_norm": 3.203125,
      "learning_rate": 1.2170585501738098e-06,
      "loss": 0.7945,
      "step": 837230
    },
    {
      "epoch": 2.9343174673269616,
      "grad_norm": 2.890625,
      "learning_rate": 1.216409521510108e-06,
      "loss": 0.8468,
      "step": 837240
    },
    {
      "epoch": 2.9343525148338574,
      "grad_norm": 2.921875,
      "learning_rate": 1.2157604928464063e-06,
      "loss": 0.8202,
      "step": 837250
    },
    {
      "epoch": 2.9343875623407527,
      "grad_norm": 2.828125,
      "learning_rate": 1.2151114641827043e-06,
      "loss": 0.8577,
      "step": 837260
    },
    {
      "epoch": 2.9344226098476485,
      "grad_norm": 2.75,
      "learning_rate": 1.2144624355190023e-06,
      "loss": 0.8188,
      "step": 837270
    },
    {
      "epoch": 2.9344576573545442,
      "grad_norm": 2.703125,
      "learning_rate": 1.2138134068553005e-06,
      "loss": 0.8314,
      "step": 837280
    },
    {
      "epoch": 2.9344927048614395,
      "grad_norm": 3.0,
      "learning_rate": 1.2131643781915985e-06,
      "loss": 0.8132,
      "step": 837290
    },
    {
      "epoch": 2.9345277523683353,
      "grad_norm": 3.046875,
      "learning_rate": 1.2125153495278965e-06,
      "loss": 0.7859,
      "step": 837300
    },
    {
      "epoch": 2.9345627998752306,
      "grad_norm": 3.21875,
      "learning_rate": 1.2118663208641947e-06,
      "loss": 0.8094,
      "step": 837310
    },
    {
      "epoch": 2.9345978473821264,
      "grad_norm": 3.421875,
      "learning_rate": 1.211217292200493e-06,
      "loss": 0.7887,
      "step": 837320
    },
    {
      "epoch": 2.934632894889022,
      "grad_norm": 2.953125,
      "learning_rate": 1.210568263536791e-06,
      "loss": 0.7963,
      "step": 837330
    },
    {
      "epoch": 2.934667942395918,
      "grad_norm": 3.0625,
      "learning_rate": 1.2099192348730891e-06,
      "loss": 0.8384,
      "step": 837340
    },
    {
      "epoch": 2.934702989902813,
      "grad_norm": 2.8125,
      "learning_rate": 1.2092702062093871e-06,
      "loss": 0.7693,
      "step": 837350
    },
    {
      "epoch": 2.934738037409709,
      "grad_norm": 3.359375,
      "learning_rate": 1.2086211775456851e-06,
      "loss": 0.7856,
      "step": 837360
    },
    {
      "epoch": 2.9347730849166043,
      "grad_norm": 3.15625,
      "learning_rate": 1.2079721488819833e-06,
      "loss": 0.7874,
      "step": 837370
    },
    {
      "epoch": 2.9348081324235,
      "grad_norm": 3.0625,
      "learning_rate": 1.2073231202182813e-06,
      "loss": 0.7838,
      "step": 837380
    },
    {
      "epoch": 2.934843179930396,
      "grad_norm": 3.484375,
      "learning_rate": 1.2066740915545795e-06,
      "loss": 0.8098,
      "step": 837390
    },
    {
      "epoch": 2.934878227437291,
      "grad_norm": 2.6875,
      "learning_rate": 1.2060250628908775e-06,
      "loss": 0.8364,
      "step": 837400
    },
    {
      "epoch": 2.934913274944187,
      "grad_norm": 2.609375,
      "learning_rate": 1.2053760342271757e-06,
      "loss": 0.741,
      "step": 837410
    },
    {
      "epoch": 2.934948322451082,
      "grad_norm": 2.359375,
      "learning_rate": 1.2047270055634737e-06,
      "loss": 0.811,
      "step": 837420
    },
    {
      "epoch": 2.934983369957978,
      "grad_norm": 2.421875,
      "learning_rate": 1.2040779768997717e-06,
      "loss": 0.7676,
      "step": 837430
    },
    {
      "epoch": 2.9350184174648737,
      "grad_norm": 2.84375,
      "learning_rate": 1.20342894823607e-06,
      "loss": 0.7877,
      "step": 837440
    },
    {
      "epoch": 2.9350534649717694,
      "grad_norm": 2.984375,
      "learning_rate": 1.2027799195723682e-06,
      "loss": 0.821,
      "step": 837450
    },
    {
      "epoch": 2.9350885124786648,
      "grad_norm": 3.59375,
      "learning_rate": 1.2021308909086662e-06,
      "loss": 0.8233,
      "step": 837460
    },
    {
      "epoch": 2.9351235599855605,
      "grad_norm": 3.015625,
      "learning_rate": 1.2014818622449644e-06,
      "loss": 0.8319,
      "step": 837470
    },
    {
      "epoch": 2.935158607492456,
      "grad_norm": 3.015625,
      "learning_rate": 1.2008328335812624e-06,
      "loss": 0.7815,
      "step": 837480
    },
    {
      "epoch": 2.9351936549993516,
      "grad_norm": 3.140625,
      "learning_rate": 1.2001838049175604e-06,
      "loss": 0.8579,
      "step": 837490
    },
    {
      "epoch": 2.9352287025062473,
      "grad_norm": 3.328125,
      "learning_rate": 1.1995347762538586e-06,
      "loss": 0.7772,
      "step": 837500
    },
    {
      "epoch": 2.9352637500131427,
      "grad_norm": 2.734375,
      "learning_rate": 1.1988857475901566e-06,
      "loss": 0.8362,
      "step": 837510
    },
    {
      "epoch": 2.9352987975200384,
      "grad_norm": 2.65625,
      "learning_rate": 1.1982367189264548e-06,
      "loss": 0.8684,
      "step": 837520
    },
    {
      "epoch": 2.935333845026934,
      "grad_norm": 2.796875,
      "learning_rate": 1.1975876902627528e-06,
      "loss": 0.8706,
      "step": 837530
    },
    {
      "epoch": 2.9353688925338295,
      "grad_norm": 3.078125,
      "learning_rate": 1.196938661599051e-06,
      "loss": 0.7071,
      "step": 837540
    },
    {
      "epoch": 2.9354039400407252,
      "grad_norm": 2.9375,
      "learning_rate": 1.196289632935349e-06,
      "loss": 0.8029,
      "step": 837550
    },
    {
      "epoch": 2.935438987547621,
      "grad_norm": 2.46875,
      "learning_rate": 1.195640604271647e-06,
      "loss": 0.8558,
      "step": 837560
    },
    {
      "epoch": 2.9354740350545163,
      "grad_norm": 3.015625,
      "learning_rate": 1.1949915756079452e-06,
      "loss": 0.8535,
      "step": 837570
    },
    {
      "epoch": 2.935509082561412,
      "grad_norm": 2.71875,
      "learning_rate": 1.1943425469442432e-06,
      "loss": 0.7717,
      "step": 837580
    },
    {
      "epoch": 2.9355441300683074,
      "grad_norm": 3.25,
      "learning_rate": 1.1936935182805414e-06,
      "loss": 0.7662,
      "step": 837590
    },
    {
      "epoch": 2.935579177575203,
      "grad_norm": 3.171875,
      "learning_rate": 1.1930444896168394e-06,
      "loss": 0.8452,
      "step": 837600
    },
    {
      "epoch": 2.935614225082099,
      "grad_norm": 2.90625,
      "learning_rate": 1.1923954609531376e-06,
      "loss": 0.7788,
      "step": 837610
    },
    {
      "epoch": 2.935649272588994,
      "grad_norm": 3.078125,
      "learning_rate": 1.1917464322894356e-06,
      "loss": 0.8659,
      "step": 837620
    },
    {
      "epoch": 2.93568432009589,
      "grad_norm": 2.59375,
      "learning_rate": 1.1910974036257336e-06,
      "loss": 0.7586,
      "step": 837630
    },
    {
      "epoch": 2.9357193676027857,
      "grad_norm": 3.09375,
      "learning_rate": 1.1904483749620319e-06,
      "loss": 0.819,
      "step": 837640
    },
    {
      "epoch": 2.935754415109681,
      "grad_norm": 3.265625,
      "learning_rate": 1.1897993462983299e-06,
      "loss": 0.7817,
      "step": 837650
    },
    {
      "epoch": 2.935789462616577,
      "grad_norm": 3.359375,
      "learning_rate": 1.189150317634628e-06,
      "loss": 0.8503,
      "step": 837660
    },
    {
      "epoch": 2.9358245101234726,
      "grad_norm": 3.0,
      "learning_rate": 1.1885012889709263e-06,
      "loss": 0.7877,
      "step": 837670
    },
    {
      "epoch": 2.935859557630368,
      "grad_norm": 3.25,
      "learning_rate": 1.1878522603072243e-06,
      "loss": 0.7807,
      "step": 837680
    },
    {
      "epoch": 2.9358946051372636,
      "grad_norm": 3.21875,
      "learning_rate": 1.1872032316435223e-06,
      "loss": 0.8644,
      "step": 837690
    },
    {
      "epoch": 2.935929652644159,
      "grad_norm": 3.359375,
      "learning_rate": 1.1865542029798205e-06,
      "loss": 0.795,
      "step": 837700
    },
    {
      "epoch": 2.9359647001510547,
      "grad_norm": 3.4375,
      "learning_rate": 1.1859051743161185e-06,
      "loss": 0.733,
      "step": 837710
    },
    {
      "epoch": 2.9359997476579505,
      "grad_norm": 3.234375,
      "learning_rate": 1.1852561456524167e-06,
      "loss": 0.8585,
      "step": 837720
    },
    {
      "epoch": 2.9360347951648462,
      "grad_norm": 3.015625,
      "learning_rate": 1.1846071169887147e-06,
      "loss": 0.7895,
      "step": 837730
    },
    {
      "epoch": 2.9360698426717415,
      "grad_norm": 3.09375,
      "learning_rate": 1.183958088325013e-06,
      "loss": 0.759,
      "step": 837740
    },
    {
      "epoch": 2.9361048901786373,
      "grad_norm": 2.9375,
      "learning_rate": 1.183309059661311e-06,
      "loss": 0.7984,
      "step": 837750
    },
    {
      "epoch": 2.9361399376855326,
      "grad_norm": 3.40625,
      "learning_rate": 1.182660030997609e-06,
      "loss": 0.7777,
      "step": 837760
    },
    {
      "epoch": 2.9361749851924284,
      "grad_norm": 2.609375,
      "learning_rate": 1.1820110023339071e-06,
      "loss": 0.8073,
      "step": 837770
    },
    {
      "epoch": 2.936210032699324,
      "grad_norm": 3.296875,
      "learning_rate": 1.1813619736702051e-06,
      "loss": 0.8074,
      "step": 837780
    },
    {
      "epoch": 2.9362450802062194,
      "grad_norm": 2.859375,
      "learning_rate": 1.1807129450065033e-06,
      "loss": 0.8147,
      "step": 837790
    },
    {
      "epoch": 2.936280127713115,
      "grad_norm": 3.234375,
      "learning_rate": 1.1800639163428016e-06,
      "loss": 0.8482,
      "step": 837800
    },
    {
      "epoch": 2.9363151752200105,
      "grad_norm": 2.875,
      "learning_rate": 1.1794148876790996e-06,
      "loss": 0.8375,
      "step": 837810
    },
    {
      "epoch": 2.9363502227269063,
      "grad_norm": 3.046875,
      "learning_rate": 1.1787658590153976e-06,
      "loss": 0.7882,
      "step": 837820
    },
    {
      "epoch": 2.936385270233802,
      "grad_norm": 2.625,
      "learning_rate": 1.1781168303516958e-06,
      "loss": 0.8237,
      "step": 837830
    },
    {
      "epoch": 2.936420317740698,
      "grad_norm": 2.921875,
      "learning_rate": 1.1774678016879938e-06,
      "loss": 0.7688,
      "step": 837840
    },
    {
      "epoch": 2.936455365247593,
      "grad_norm": 3.359375,
      "learning_rate": 1.1768187730242918e-06,
      "loss": 0.8184,
      "step": 837850
    },
    {
      "epoch": 2.936490412754489,
      "grad_norm": 2.78125,
      "learning_rate": 1.17616974436059e-06,
      "loss": 0.7059,
      "step": 837860
    },
    {
      "epoch": 2.936525460261384,
      "grad_norm": 3.078125,
      "learning_rate": 1.1755207156968882e-06,
      "loss": 0.7546,
      "step": 837870
    },
    {
      "epoch": 2.93656050776828,
      "grad_norm": 3.390625,
      "learning_rate": 1.1748716870331862e-06,
      "loss": 0.75,
      "step": 837880
    },
    {
      "epoch": 2.9365955552751757,
      "grad_norm": 3.015625,
      "learning_rate": 1.1742226583694842e-06,
      "loss": 0.8426,
      "step": 837890
    },
    {
      "epoch": 2.936630602782071,
      "grad_norm": 3.1875,
      "learning_rate": 1.1735736297057824e-06,
      "loss": 0.7906,
      "step": 837900
    },
    {
      "epoch": 2.9366656502889668,
      "grad_norm": 3.140625,
      "learning_rate": 1.1729246010420804e-06,
      "loss": 0.8399,
      "step": 837910
    },
    {
      "epoch": 2.936700697795862,
      "grad_norm": 2.4375,
      "learning_rate": 1.1722755723783786e-06,
      "loss": 0.6835,
      "step": 837920
    },
    {
      "epoch": 2.936735745302758,
      "grad_norm": 2.8125,
      "learning_rate": 1.1716265437146768e-06,
      "loss": 0.8426,
      "step": 837930
    },
    {
      "epoch": 2.9367707928096536,
      "grad_norm": 3.25,
      "learning_rate": 1.1709775150509748e-06,
      "loss": 0.779,
      "step": 837940
    },
    {
      "epoch": 2.9368058403165493,
      "grad_norm": 3.375,
      "learning_rate": 1.1703284863872728e-06,
      "loss": 0.8659,
      "step": 837950
    },
    {
      "epoch": 2.9368408878234447,
      "grad_norm": 2.71875,
      "learning_rate": 1.169679457723571e-06,
      "loss": 0.8092,
      "step": 837960
    },
    {
      "epoch": 2.9368759353303404,
      "grad_norm": 2.71875,
      "learning_rate": 1.169030429059869e-06,
      "loss": 0.8466,
      "step": 837970
    },
    {
      "epoch": 2.9369109828372357,
      "grad_norm": 2.328125,
      "learning_rate": 1.168381400396167e-06,
      "loss": 0.7193,
      "step": 837980
    },
    {
      "epoch": 2.9369460303441315,
      "grad_norm": 2.765625,
      "learning_rate": 1.1677323717324652e-06,
      "loss": 0.7609,
      "step": 837990
    },
    {
      "epoch": 2.9369810778510272,
      "grad_norm": 3.234375,
      "learning_rate": 1.1670833430687635e-06,
      "loss": 0.8392,
      "step": 838000
    },
    {
      "epoch": 2.9370161253579226,
      "grad_norm": 2.90625,
      "learning_rate": 1.1664343144050615e-06,
      "loss": 0.7826,
      "step": 838010
    },
    {
      "epoch": 2.9370511728648183,
      "grad_norm": 3.484375,
      "learning_rate": 1.1657852857413595e-06,
      "loss": 0.7718,
      "step": 838020
    },
    {
      "epoch": 2.9370862203717136,
      "grad_norm": 3.25,
      "learning_rate": 1.1651362570776577e-06,
      "loss": 0.7459,
      "step": 838030
    },
    {
      "epoch": 2.9371212678786094,
      "grad_norm": 3.03125,
      "learning_rate": 1.1644872284139557e-06,
      "loss": 0.7802,
      "step": 838040
    },
    {
      "epoch": 2.937156315385505,
      "grad_norm": 2.921875,
      "learning_rate": 1.1638381997502537e-06,
      "loss": 0.7561,
      "step": 838050
    },
    {
      "epoch": 2.937191362892401,
      "grad_norm": 2.765625,
      "learning_rate": 1.163189171086552e-06,
      "loss": 0.748,
      "step": 838060
    },
    {
      "epoch": 2.937226410399296,
      "grad_norm": 2.75,
      "learning_rate": 1.16254014242285e-06,
      "loss": 0.7856,
      "step": 838070
    },
    {
      "epoch": 2.937261457906192,
      "grad_norm": 2.8125,
      "learning_rate": 1.161891113759148e-06,
      "loss": 0.8715,
      "step": 838080
    },
    {
      "epoch": 2.9372965054130873,
      "grad_norm": 3.015625,
      "learning_rate": 1.1612420850954463e-06,
      "loss": 0.827,
      "step": 838090
    },
    {
      "epoch": 2.937331552919983,
      "grad_norm": 2.75,
      "learning_rate": 1.1605930564317443e-06,
      "loss": 0.7993,
      "step": 838100
    },
    {
      "epoch": 2.937366600426879,
      "grad_norm": 2.75,
      "learning_rate": 1.1599440277680423e-06,
      "loss": 0.7239,
      "step": 838110
    },
    {
      "epoch": 2.937401647933774,
      "grad_norm": 3.046875,
      "learning_rate": 1.1592949991043405e-06,
      "loss": 0.8242,
      "step": 838120
    },
    {
      "epoch": 2.93743669544067,
      "grad_norm": 2.90625,
      "learning_rate": 1.1586459704406387e-06,
      "loss": 0.7737,
      "step": 838130
    },
    {
      "epoch": 2.937471742947565,
      "grad_norm": 3.1875,
      "learning_rate": 1.1579969417769367e-06,
      "loss": 0.7766,
      "step": 838140
    },
    {
      "epoch": 2.937506790454461,
      "grad_norm": 2.96875,
      "learning_rate": 1.1573479131132347e-06,
      "loss": 0.7879,
      "step": 838150
    },
    {
      "epoch": 2.9375418379613567,
      "grad_norm": 2.921875,
      "learning_rate": 1.156698884449533e-06,
      "loss": 0.8519,
      "step": 838160
    },
    {
      "epoch": 2.9375768854682525,
      "grad_norm": 2.625,
      "learning_rate": 1.156049855785831e-06,
      "loss": 0.7549,
      "step": 838170
    },
    {
      "epoch": 2.9376119329751478,
      "grad_norm": 3.375,
      "learning_rate": 1.155400827122129e-06,
      "loss": 0.7921,
      "step": 838180
    },
    {
      "epoch": 2.9376469804820435,
      "grad_norm": 2.921875,
      "learning_rate": 1.1547517984584271e-06,
      "loss": 0.807,
      "step": 838190
    },
    {
      "epoch": 2.937682027988939,
      "grad_norm": 2.90625,
      "learning_rate": 1.1541027697947254e-06,
      "loss": 0.7669,
      "step": 838200
    },
    {
      "epoch": 2.9377170754958346,
      "grad_norm": 3.0,
      "learning_rate": 1.1534537411310234e-06,
      "loss": 0.7769,
      "step": 838210
    },
    {
      "epoch": 2.9377521230027304,
      "grad_norm": 2.5625,
      "learning_rate": 1.1528047124673214e-06,
      "loss": 0.824,
      "step": 838220
    },
    {
      "epoch": 2.9377871705096257,
      "grad_norm": 3.640625,
      "learning_rate": 1.1521556838036196e-06,
      "loss": 0.7879,
      "step": 838230
    },
    {
      "epoch": 2.9378222180165214,
      "grad_norm": 2.796875,
      "learning_rate": 1.1515066551399176e-06,
      "loss": 0.7924,
      "step": 838240
    },
    {
      "epoch": 2.9378572655234167,
      "grad_norm": 2.578125,
      "learning_rate": 1.1508576264762156e-06,
      "loss": 0.7568,
      "step": 838250
    },
    {
      "epoch": 2.9378923130303125,
      "grad_norm": 3.109375,
      "learning_rate": 1.150208597812514e-06,
      "loss": 0.9046,
      "step": 838260
    },
    {
      "epoch": 2.9379273605372083,
      "grad_norm": 2.84375,
      "learning_rate": 1.149559569148812e-06,
      "loss": 0.8032,
      "step": 838270
    },
    {
      "epoch": 2.937962408044104,
      "grad_norm": 2.671875,
      "learning_rate": 1.14891054048511e-06,
      "loss": 0.8116,
      "step": 838280
    },
    {
      "epoch": 2.9379974555509993,
      "grad_norm": 2.921875,
      "learning_rate": 1.1482615118214082e-06,
      "loss": 0.7572,
      "step": 838290
    },
    {
      "epoch": 2.938032503057895,
      "grad_norm": 2.390625,
      "learning_rate": 1.1476124831577062e-06,
      "loss": 0.7244,
      "step": 838300
    },
    {
      "epoch": 2.9380675505647904,
      "grad_norm": 3.359375,
      "learning_rate": 1.1469634544940042e-06,
      "loss": 0.8569,
      "step": 838310
    },
    {
      "epoch": 2.938102598071686,
      "grad_norm": 3.125,
      "learning_rate": 1.1463144258303024e-06,
      "loss": 0.7847,
      "step": 838320
    },
    {
      "epoch": 2.938137645578582,
      "grad_norm": 2.546875,
      "learning_rate": 1.1456653971666006e-06,
      "loss": 0.8214,
      "step": 838330
    },
    {
      "epoch": 2.9381726930854772,
      "grad_norm": 2.984375,
      "learning_rate": 1.1450163685028986e-06,
      "loss": 0.7994,
      "step": 838340
    },
    {
      "epoch": 2.938207740592373,
      "grad_norm": 3.171875,
      "learning_rate": 1.1443673398391966e-06,
      "loss": 0.8095,
      "step": 838350
    },
    {
      "epoch": 2.9382427880992683,
      "grad_norm": 3.015625,
      "learning_rate": 1.1437183111754948e-06,
      "loss": 0.8382,
      "step": 838360
    },
    {
      "epoch": 2.938277835606164,
      "grad_norm": 2.546875,
      "learning_rate": 1.1430692825117928e-06,
      "loss": 0.7657,
      "step": 838370
    },
    {
      "epoch": 2.93831288311306,
      "grad_norm": 3.375,
      "learning_rate": 1.1424202538480908e-06,
      "loss": 0.8231,
      "step": 838380
    },
    {
      "epoch": 2.9383479306199556,
      "grad_norm": 3.0,
      "learning_rate": 1.1417712251843893e-06,
      "loss": 0.7833,
      "step": 838390
    },
    {
      "epoch": 2.938382978126851,
      "grad_norm": 3.4375,
      "learning_rate": 1.1411221965206873e-06,
      "loss": 0.8434,
      "step": 838400
    },
    {
      "epoch": 2.9384180256337467,
      "grad_norm": 3.140625,
      "learning_rate": 1.1404731678569853e-06,
      "loss": 0.8221,
      "step": 838410
    },
    {
      "epoch": 2.938453073140642,
      "grad_norm": 3.171875,
      "learning_rate": 1.1398241391932835e-06,
      "loss": 0.794,
      "step": 838420
    },
    {
      "epoch": 2.9384881206475377,
      "grad_norm": 2.890625,
      "learning_rate": 1.1391751105295815e-06,
      "loss": 0.8086,
      "step": 838430
    },
    {
      "epoch": 2.9385231681544335,
      "grad_norm": 3.0,
      "learning_rate": 1.1385260818658795e-06,
      "loss": 0.7443,
      "step": 838440
    },
    {
      "epoch": 2.938558215661329,
      "grad_norm": 3.046875,
      "learning_rate": 1.1378770532021777e-06,
      "loss": 0.8398,
      "step": 838450
    },
    {
      "epoch": 2.9385932631682246,
      "grad_norm": 2.8125,
      "learning_rate": 1.137228024538476e-06,
      "loss": 0.8258,
      "step": 838460
    },
    {
      "epoch": 2.93862831067512,
      "grad_norm": 2.890625,
      "learning_rate": 1.136578995874774e-06,
      "loss": 0.7592,
      "step": 838470
    },
    {
      "epoch": 2.9386633581820156,
      "grad_norm": 3.109375,
      "learning_rate": 1.135929967211072e-06,
      "loss": 0.8031,
      "step": 838480
    },
    {
      "epoch": 2.9386984056889114,
      "grad_norm": 2.921875,
      "learning_rate": 1.1352809385473701e-06,
      "loss": 0.7873,
      "step": 838490
    },
    {
      "epoch": 2.938733453195807,
      "grad_norm": 2.859375,
      "learning_rate": 1.1346319098836681e-06,
      "loss": 0.7598,
      "step": 838500
    },
    {
      "epoch": 2.9387685007027025,
      "grad_norm": 2.671875,
      "learning_rate": 1.1339828812199661e-06,
      "loss": 0.7306,
      "step": 838510
    },
    {
      "epoch": 2.938803548209598,
      "grad_norm": 3.09375,
      "learning_rate": 1.1333338525562643e-06,
      "loss": 0.8072,
      "step": 838520
    },
    {
      "epoch": 2.9388385957164935,
      "grad_norm": 3.453125,
      "learning_rate": 1.1326848238925625e-06,
      "loss": 0.8837,
      "step": 838530
    },
    {
      "epoch": 2.9388736432233893,
      "grad_norm": 3.078125,
      "learning_rate": 1.1320357952288605e-06,
      "loss": 0.8446,
      "step": 838540
    },
    {
      "epoch": 2.938908690730285,
      "grad_norm": 3.109375,
      "learning_rate": 1.1313867665651587e-06,
      "loss": 0.8192,
      "step": 838550
    },
    {
      "epoch": 2.9389437382371804,
      "grad_norm": 2.890625,
      "learning_rate": 1.1307377379014567e-06,
      "loss": 0.8847,
      "step": 838560
    },
    {
      "epoch": 2.938978785744076,
      "grad_norm": 2.734375,
      "learning_rate": 1.1300887092377547e-06,
      "loss": 0.8273,
      "step": 838570
    },
    {
      "epoch": 2.9390138332509714,
      "grad_norm": 3.734375,
      "learning_rate": 1.129439680574053e-06,
      "loss": 0.8276,
      "step": 838580
    },
    {
      "epoch": 2.939048880757867,
      "grad_norm": 2.84375,
      "learning_rate": 1.1287906519103512e-06,
      "loss": 0.7812,
      "step": 838590
    },
    {
      "epoch": 2.939083928264763,
      "grad_norm": 2.59375,
      "learning_rate": 1.1281416232466492e-06,
      "loss": 0.769,
      "step": 838600
    },
    {
      "epoch": 2.9391189757716587,
      "grad_norm": 2.484375,
      "learning_rate": 1.1274925945829472e-06,
      "loss": 0.6862,
      "step": 838610
    },
    {
      "epoch": 2.939154023278554,
      "grad_norm": 2.84375,
      "learning_rate": 1.1268435659192454e-06,
      "loss": 0.8526,
      "step": 838620
    },
    {
      "epoch": 2.9391890707854498,
      "grad_norm": 3.421875,
      "learning_rate": 1.1261945372555434e-06,
      "loss": 0.7882,
      "step": 838630
    },
    {
      "epoch": 2.939224118292345,
      "grad_norm": 3.015625,
      "learning_rate": 1.1255455085918414e-06,
      "loss": 0.8114,
      "step": 838640
    },
    {
      "epoch": 2.939259165799241,
      "grad_norm": 2.625,
      "learning_rate": 1.1248964799281396e-06,
      "loss": 0.8565,
      "step": 838650
    },
    {
      "epoch": 2.9392942133061366,
      "grad_norm": 2.46875,
      "learning_rate": 1.1242474512644378e-06,
      "loss": 0.7296,
      "step": 838660
    },
    {
      "epoch": 2.939329260813032,
      "grad_norm": 3.09375,
      "learning_rate": 1.1235984226007358e-06,
      "loss": 0.8386,
      "step": 838670
    },
    {
      "epoch": 2.9393643083199277,
      "grad_norm": 2.984375,
      "learning_rate": 1.122949393937034e-06,
      "loss": 0.7918,
      "step": 838680
    },
    {
      "epoch": 2.939399355826823,
      "grad_norm": 3.078125,
      "learning_rate": 1.122300365273332e-06,
      "loss": 0.8487,
      "step": 838690
    },
    {
      "epoch": 2.9394344033337187,
      "grad_norm": 3.15625,
      "learning_rate": 1.12165133660963e-06,
      "loss": 0.7699,
      "step": 838700
    },
    {
      "epoch": 2.9394694508406145,
      "grad_norm": 2.640625,
      "learning_rate": 1.1210023079459282e-06,
      "loss": 0.7984,
      "step": 838710
    },
    {
      "epoch": 2.9395044983475103,
      "grad_norm": 3.25,
      "learning_rate": 1.1203532792822262e-06,
      "loss": 0.7629,
      "step": 838720
    },
    {
      "epoch": 2.9395395458544056,
      "grad_norm": 3.140625,
      "learning_rate": 1.1197042506185244e-06,
      "loss": 0.8462,
      "step": 838730
    },
    {
      "epoch": 2.9395745933613013,
      "grad_norm": 3.140625,
      "learning_rate": 1.1190552219548224e-06,
      "loss": 0.8223,
      "step": 838740
    },
    {
      "epoch": 2.9396096408681966,
      "grad_norm": 2.953125,
      "learning_rate": 1.1184061932911206e-06,
      "loss": 0.7838,
      "step": 838750
    },
    {
      "epoch": 2.9396446883750924,
      "grad_norm": 2.171875,
      "learning_rate": 1.1177571646274186e-06,
      "loss": 0.7629,
      "step": 838760
    },
    {
      "epoch": 2.939679735881988,
      "grad_norm": 2.890625,
      "learning_rate": 1.1171081359637166e-06,
      "loss": 0.8832,
      "step": 838770
    },
    {
      "epoch": 2.9397147833888835,
      "grad_norm": 2.78125,
      "learning_rate": 1.1164591073000149e-06,
      "loss": 0.8458,
      "step": 838780
    },
    {
      "epoch": 2.9397498308957792,
      "grad_norm": 2.625,
      "learning_rate": 1.115810078636313e-06,
      "loss": 0.798,
      "step": 838790
    },
    {
      "epoch": 2.9397848784026746,
      "grad_norm": 2.921875,
      "learning_rate": 1.115161049972611e-06,
      "loss": 0.7018,
      "step": 838800
    },
    {
      "epoch": 2.9398199259095703,
      "grad_norm": 2.921875,
      "learning_rate": 1.114512021308909e-06,
      "loss": 0.8453,
      "step": 838810
    },
    {
      "epoch": 2.939854973416466,
      "grad_norm": 3.046875,
      "learning_rate": 1.1138629926452073e-06,
      "loss": 0.8755,
      "step": 838820
    },
    {
      "epoch": 2.939890020923362,
      "grad_norm": 2.71875,
      "learning_rate": 1.1132139639815053e-06,
      "loss": 0.7929,
      "step": 838830
    },
    {
      "epoch": 2.939925068430257,
      "grad_norm": 3.4375,
      "learning_rate": 1.1125649353178033e-06,
      "loss": 0.8301,
      "step": 838840
    },
    {
      "epoch": 2.939960115937153,
      "grad_norm": 2.234375,
      "learning_rate": 1.1119159066541015e-06,
      "loss": 0.7742,
      "step": 838850
    },
    {
      "epoch": 2.939995163444048,
      "grad_norm": 3.140625,
      "learning_rate": 1.1112668779903997e-06,
      "loss": 0.8587,
      "step": 838860
    },
    {
      "epoch": 2.940030210950944,
      "grad_norm": 3.0625,
      "learning_rate": 1.1106178493266977e-06,
      "loss": 0.8701,
      "step": 838870
    },
    {
      "epoch": 2.9400652584578397,
      "grad_norm": 2.703125,
      "learning_rate": 1.109968820662996e-06,
      "loss": 0.6829,
      "step": 838880
    },
    {
      "epoch": 2.940100305964735,
      "grad_norm": 3.0,
      "learning_rate": 1.109319791999294e-06,
      "loss": 0.8628,
      "step": 838890
    },
    {
      "epoch": 2.940135353471631,
      "grad_norm": 2.96875,
      "learning_rate": 1.108670763335592e-06,
      "loss": 0.7565,
      "step": 838900
    },
    {
      "epoch": 2.9401704009785266,
      "grad_norm": 3.625,
      "learning_rate": 1.1080217346718901e-06,
      "loss": 0.785,
      "step": 838910
    },
    {
      "epoch": 2.940205448485422,
      "grad_norm": 2.765625,
      "learning_rate": 1.1073727060081881e-06,
      "loss": 0.7965,
      "step": 838920
    },
    {
      "epoch": 2.9402404959923176,
      "grad_norm": 2.28125,
      "learning_rate": 1.1067236773444863e-06,
      "loss": 0.8719,
      "step": 838930
    },
    {
      "epoch": 2.9402755434992134,
      "grad_norm": 2.828125,
      "learning_rate": 1.1060746486807843e-06,
      "loss": 0.8019,
      "step": 838940
    },
    {
      "epoch": 2.9403105910061087,
      "grad_norm": 2.859375,
      "learning_rate": 1.1054256200170825e-06,
      "loss": 0.7721,
      "step": 838950
    },
    {
      "epoch": 2.9403456385130045,
      "grad_norm": 2.78125,
      "learning_rate": 1.1047765913533805e-06,
      "loss": 0.7954,
      "step": 838960
    },
    {
      "epoch": 2.9403806860198998,
      "grad_norm": 2.71875,
      "learning_rate": 1.1041275626896785e-06,
      "loss": 0.7654,
      "step": 838970
    },
    {
      "epoch": 2.9404157335267955,
      "grad_norm": 2.734375,
      "learning_rate": 1.1034785340259768e-06,
      "loss": 0.8336,
      "step": 838980
    },
    {
      "epoch": 2.9404507810336913,
      "grad_norm": 2.84375,
      "learning_rate": 1.102829505362275e-06,
      "loss": 0.8491,
      "step": 838990
    },
    {
      "epoch": 2.9404858285405866,
      "grad_norm": 2.65625,
      "learning_rate": 1.102180476698573e-06,
      "loss": 0.8271,
      "step": 839000
    },
    {
      "epoch": 2.9405208760474824,
      "grad_norm": 2.796875,
      "learning_rate": 1.1015314480348712e-06,
      "loss": 0.7575,
      "step": 839010
    },
    {
      "epoch": 2.940555923554378,
      "grad_norm": 3.296875,
      "learning_rate": 1.1008824193711692e-06,
      "loss": 0.8434,
      "step": 839020
    },
    {
      "epoch": 2.9405909710612734,
      "grad_norm": 3.34375,
      "learning_rate": 1.1002333907074672e-06,
      "loss": 0.8196,
      "step": 839030
    },
    {
      "epoch": 2.940626018568169,
      "grad_norm": 2.609375,
      "learning_rate": 1.0995843620437654e-06,
      "loss": 0.7854,
      "step": 839040
    },
    {
      "epoch": 2.940661066075065,
      "grad_norm": 3.203125,
      "learning_rate": 1.0989353333800634e-06,
      "loss": 0.7452,
      "step": 839050
    },
    {
      "epoch": 2.9406961135819603,
      "grad_norm": 3.140625,
      "learning_rate": 1.0982863047163616e-06,
      "loss": 0.7902,
      "step": 839060
    },
    {
      "epoch": 2.940731161088856,
      "grad_norm": 2.8125,
      "learning_rate": 1.0976372760526596e-06,
      "loss": 0.8083,
      "step": 839070
    },
    {
      "epoch": 2.9407662085957513,
      "grad_norm": 2.65625,
      "learning_rate": 1.0969882473889578e-06,
      "loss": 0.7663,
      "step": 839080
    },
    {
      "epoch": 2.940801256102647,
      "grad_norm": 3.109375,
      "learning_rate": 1.0963392187252558e-06,
      "loss": 0.8259,
      "step": 839090
    },
    {
      "epoch": 2.940836303609543,
      "grad_norm": 2.578125,
      "learning_rate": 1.0956901900615538e-06,
      "loss": 0.7705,
      "step": 839100
    },
    {
      "epoch": 2.9408713511164386,
      "grad_norm": 2.921875,
      "learning_rate": 1.095041161397852e-06,
      "loss": 0.8247,
      "step": 839110
    },
    {
      "epoch": 2.940906398623334,
      "grad_norm": 2.84375,
      "learning_rate": 1.09439213273415e-06,
      "loss": 0.7186,
      "step": 839120
    },
    {
      "epoch": 2.9409414461302297,
      "grad_norm": 2.9375,
      "learning_rate": 1.0937431040704482e-06,
      "loss": 0.7736,
      "step": 839130
    },
    {
      "epoch": 2.940976493637125,
      "grad_norm": 4.0,
      "learning_rate": 1.0930940754067465e-06,
      "loss": 0.7529,
      "step": 839140
    },
    {
      "epoch": 2.9410115411440207,
      "grad_norm": 3.3125,
      "learning_rate": 1.0924450467430445e-06,
      "loss": 0.8371,
      "step": 839150
    },
    {
      "epoch": 2.9410465886509165,
      "grad_norm": 2.5625,
      "learning_rate": 1.0917960180793425e-06,
      "loss": 0.8843,
      "step": 839160
    },
    {
      "epoch": 2.941081636157812,
      "grad_norm": 2.546875,
      "learning_rate": 1.0911469894156407e-06,
      "loss": 0.7784,
      "step": 839170
    },
    {
      "epoch": 2.9411166836647076,
      "grad_norm": 2.921875,
      "learning_rate": 1.0904979607519387e-06,
      "loss": 0.8518,
      "step": 839180
    },
    {
      "epoch": 2.941151731171603,
      "grad_norm": 2.65625,
      "learning_rate": 1.0898489320882367e-06,
      "loss": 0.6824,
      "step": 839190
    },
    {
      "epoch": 2.9411867786784986,
      "grad_norm": 2.84375,
      "learning_rate": 1.0891999034245349e-06,
      "loss": 0.783,
      "step": 839200
    },
    {
      "epoch": 2.9412218261853944,
      "grad_norm": 2.78125,
      "learning_rate": 1.088550874760833e-06,
      "loss": 0.7777,
      "step": 839210
    },
    {
      "epoch": 2.94125687369229,
      "grad_norm": 3.046875,
      "learning_rate": 1.087901846097131e-06,
      "loss": 0.807,
      "step": 839220
    },
    {
      "epoch": 2.9412919211991855,
      "grad_norm": 2.75,
      "learning_rate": 1.087252817433429e-06,
      "loss": 0.7449,
      "step": 839230
    },
    {
      "epoch": 2.9413269687060812,
      "grad_norm": 2.828125,
      "learning_rate": 1.0866037887697273e-06,
      "loss": 0.8128,
      "step": 839240
    },
    {
      "epoch": 2.9413620162129765,
      "grad_norm": 3.015625,
      "learning_rate": 1.0859547601060253e-06,
      "loss": 0.8022,
      "step": 839250
    },
    {
      "epoch": 2.9413970637198723,
      "grad_norm": 2.890625,
      "learning_rate": 1.0853057314423235e-06,
      "loss": 0.7394,
      "step": 839260
    },
    {
      "epoch": 2.941432111226768,
      "grad_norm": 2.875,
      "learning_rate": 1.0846567027786217e-06,
      "loss": 0.8001,
      "step": 839270
    },
    {
      "epoch": 2.9414671587336634,
      "grad_norm": 2.8125,
      "learning_rate": 1.0840076741149197e-06,
      "loss": 0.7768,
      "step": 839280
    },
    {
      "epoch": 2.941502206240559,
      "grad_norm": 2.890625,
      "learning_rate": 1.0833586454512177e-06,
      "loss": 0.746,
      "step": 839290
    },
    {
      "epoch": 2.9415372537474545,
      "grad_norm": 3.234375,
      "learning_rate": 1.082709616787516e-06,
      "loss": 0.9112,
      "step": 839300
    },
    {
      "epoch": 2.94157230125435,
      "grad_norm": 2.75,
      "learning_rate": 1.082060588123814e-06,
      "loss": 0.8243,
      "step": 839310
    },
    {
      "epoch": 2.941607348761246,
      "grad_norm": 3.125,
      "learning_rate": 1.081411559460112e-06,
      "loss": 0.7578,
      "step": 839320
    },
    {
      "epoch": 2.9416423962681417,
      "grad_norm": 3.03125,
      "learning_rate": 1.0807625307964101e-06,
      "loss": 0.7945,
      "step": 839330
    },
    {
      "epoch": 2.941677443775037,
      "grad_norm": 3.0,
      "learning_rate": 1.0801135021327084e-06,
      "loss": 0.8281,
      "step": 839340
    },
    {
      "epoch": 2.941712491281933,
      "grad_norm": 3.234375,
      "learning_rate": 1.0794644734690064e-06,
      "loss": 0.7866,
      "step": 839350
    },
    {
      "epoch": 2.941747538788828,
      "grad_norm": 2.59375,
      "learning_rate": 1.0788154448053044e-06,
      "loss": 0.7741,
      "step": 839360
    },
    {
      "epoch": 2.941782586295724,
      "grad_norm": 3.03125,
      "learning_rate": 1.0781664161416026e-06,
      "loss": 0.7236,
      "step": 839370
    },
    {
      "epoch": 2.9418176338026196,
      "grad_norm": 3.515625,
      "learning_rate": 1.0775173874779006e-06,
      "loss": 0.8463,
      "step": 839380
    },
    {
      "epoch": 2.941852681309515,
      "grad_norm": 2.875,
      "learning_rate": 1.0768683588141986e-06,
      "loss": 0.8019,
      "step": 839390
    },
    {
      "epoch": 2.9418877288164107,
      "grad_norm": 2.96875,
      "learning_rate": 1.0762193301504968e-06,
      "loss": 0.7418,
      "step": 839400
    },
    {
      "epoch": 2.941922776323306,
      "grad_norm": 2.9375,
      "learning_rate": 1.075570301486795e-06,
      "loss": 0.8885,
      "step": 839410
    },
    {
      "epoch": 2.9419578238302018,
      "grad_norm": 2.296875,
      "learning_rate": 1.074921272823093e-06,
      "loss": 0.7094,
      "step": 839420
    },
    {
      "epoch": 2.9419928713370975,
      "grad_norm": 2.640625,
      "learning_rate": 1.074272244159391e-06,
      "loss": 0.8476,
      "step": 839430
    },
    {
      "epoch": 2.9420279188439933,
      "grad_norm": 2.890625,
      "learning_rate": 1.0736232154956892e-06,
      "loss": 0.7522,
      "step": 839440
    },
    {
      "epoch": 2.9420629663508886,
      "grad_norm": 2.921875,
      "learning_rate": 1.0729741868319872e-06,
      "loss": 0.8021,
      "step": 839450
    },
    {
      "epoch": 2.9420980138577844,
      "grad_norm": 2.90625,
      "learning_rate": 1.0723251581682854e-06,
      "loss": 0.7316,
      "step": 839460
    },
    {
      "epoch": 2.9421330613646797,
      "grad_norm": 3.109375,
      "learning_rate": 1.0716761295045836e-06,
      "loss": 0.8018,
      "step": 839470
    },
    {
      "epoch": 2.9421681088715754,
      "grad_norm": 2.671875,
      "learning_rate": 1.0710271008408816e-06,
      "loss": 0.7916,
      "step": 839480
    },
    {
      "epoch": 2.942203156378471,
      "grad_norm": 2.53125,
      "learning_rate": 1.0703780721771796e-06,
      "loss": 0.9064,
      "step": 839490
    },
    {
      "epoch": 2.9422382038853665,
      "grad_norm": 3.03125,
      "learning_rate": 1.0697290435134778e-06,
      "loss": 0.7751,
      "step": 839500
    },
    {
      "epoch": 2.9422732513922623,
      "grad_norm": 2.875,
      "learning_rate": 1.0690800148497758e-06,
      "loss": 0.7632,
      "step": 839510
    },
    {
      "epoch": 2.9423082988991576,
      "grad_norm": 3.28125,
      "learning_rate": 1.0684309861860738e-06,
      "loss": 0.761,
      "step": 839520
    },
    {
      "epoch": 2.9423433464060533,
      "grad_norm": 2.90625,
      "learning_rate": 1.067781957522372e-06,
      "loss": 0.8052,
      "step": 839530
    },
    {
      "epoch": 2.942378393912949,
      "grad_norm": 2.5,
      "learning_rate": 1.0671329288586703e-06,
      "loss": 0.8155,
      "step": 839540
    },
    {
      "epoch": 2.942413441419845,
      "grad_norm": 2.734375,
      "learning_rate": 1.0664839001949683e-06,
      "loss": 0.8033,
      "step": 839550
    },
    {
      "epoch": 2.94244848892674,
      "grad_norm": 2.984375,
      "learning_rate": 1.0658348715312663e-06,
      "loss": 0.8475,
      "step": 839560
    },
    {
      "epoch": 2.942483536433636,
      "grad_norm": 3.171875,
      "learning_rate": 1.0651858428675645e-06,
      "loss": 0.844,
      "step": 839570
    },
    {
      "epoch": 2.9425185839405312,
      "grad_norm": 2.71875,
      "learning_rate": 1.0645368142038625e-06,
      "loss": 0.8368,
      "step": 839580
    },
    {
      "epoch": 2.942553631447427,
      "grad_norm": 3.09375,
      "learning_rate": 1.0638877855401605e-06,
      "loss": 0.9012,
      "step": 839590
    },
    {
      "epoch": 2.9425886789543227,
      "grad_norm": 2.703125,
      "learning_rate": 1.063238756876459e-06,
      "loss": 0.7778,
      "step": 839600
    },
    {
      "epoch": 2.942623726461218,
      "grad_norm": 2.734375,
      "learning_rate": 1.062589728212757e-06,
      "loss": 0.7669,
      "step": 839610
    },
    {
      "epoch": 2.942658773968114,
      "grad_norm": 2.75,
      "learning_rate": 1.061940699549055e-06,
      "loss": 0.8158,
      "step": 839620
    },
    {
      "epoch": 2.942693821475009,
      "grad_norm": 2.796875,
      "learning_rate": 1.0612916708853531e-06,
      "loss": 0.7918,
      "step": 839630
    },
    {
      "epoch": 2.942728868981905,
      "grad_norm": 2.875,
      "learning_rate": 1.0606426422216511e-06,
      "loss": 0.8011,
      "step": 839640
    },
    {
      "epoch": 2.9427639164888006,
      "grad_norm": 2.9375,
      "learning_rate": 1.0599936135579491e-06,
      "loss": 0.7639,
      "step": 839650
    },
    {
      "epoch": 2.9427989639956964,
      "grad_norm": 2.96875,
      "learning_rate": 1.0593445848942473e-06,
      "loss": 0.826,
      "step": 839660
    },
    {
      "epoch": 2.9428340115025917,
      "grad_norm": 2.953125,
      "learning_rate": 1.0586955562305455e-06,
      "loss": 0.7496,
      "step": 839670
    },
    {
      "epoch": 2.9428690590094875,
      "grad_norm": 2.765625,
      "learning_rate": 1.0580465275668435e-06,
      "loss": 0.8764,
      "step": 839680
    },
    {
      "epoch": 2.942904106516383,
      "grad_norm": 2.40625,
      "learning_rate": 1.0573974989031415e-06,
      "loss": 0.7366,
      "step": 839690
    },
    {
      "epoch": 2.9429391540232785,
      "grad_norm": 2.765625,
      "learning_rate": 1.0567484702394397e-06,
      "loss": 0.823,
      "step": 839700
    },
    {
      "epoch": 2.9429742015301743,
      "grad_norm": 2.921875,
      "learning_rate": 1.0560994415757377e-06,
      "loss": 0.8706,
      "step": 839710
    },
    {
      "epoch": 2.9430092490370696,
      "grad_norm": 3.03125,
      "learning_rate": 1.0554504129120357e-06,
      "loss": 0.8333,
      "step": 839720
    },
    {
      "epoch": 2.9430442965439654,
      "grad_norm": 2.78125,
      "learning_rate": 1.0548013842483342e-06,
      "loss": 0.7482,
      "step": 839730
    },
    {
      "epoch": 2.9430793440508607,
      "grad_norm": 2.6875,
      "learning_rate": 1.0541523555846322e-06,
      "loss": 0.7843,
      "step": 839740
    },
    {
      "epoch": 2.9431143915577564,
      "grad_norm": 2.65625,
      "learning_rate": 1.0535033269209302e-06,
      "loss": 0.7874,
      "step": 839750
    },
    {
      "epoch": 2.943149439064652,
      "grad_norm": 2.609375,
      "learning_rate": 1.0528542982572284e-06,
      "loss": 0.7113,
      "step": 839760
    },
    {
      "epoch": 2.943184486571548,
      "grad_norm": 2.765625,
      "learning_rate": 1.0522052695935264e-06,
      "loss": 0.7231,
      "step": 839770
    },
    {
      "epoch": 2.9432195340784433,
      "grad_norm": 3.1875,
      "learning_rate": 1.0515562409298244e-06,
      "loss": 0.7902,
      "step": 839780
    },
    {
      "epoch": 2.943254581585339,
      "grad_norm": 2.921875,
      "learning_rate": 1.0509072122661226e-06,
      "loss": 0.8173,
      "step": 839790
    },
    {
      "epoch": 2.9432896290922343,
      "grad_norm": 3.234375,
      "learning_rate": 1.0502581836024208e-06,
      "loss": 0.8281,
      "step": 839800
    },
    {
      "epoch": 2.94332467659913,
      "grad_norm": 2.5625,
      "learning_rate": 1.0496091549387188e-06,
      "loss": 0.7893,
      "step": 839810
    },
    {
      "epoch": 2.943359724106026,
      "grad_norm": 3.359375,
      "learning_rate": 1.0489601262750168e-06,
      "loss": 0.9157,
      "step": 839820
    },
    {
      "epoch": 2.943394771612921,
      "grad_norm": 2.5625,
      "learning_rate": 1.048311097611315e-06,
      "loss": 0.789,
      "step": 839830
    },
    {
      "epoch": 2.943429819119817,
      "grad_norm": 3.03125,
      "learning_rate": 1.047662068947613e-06,
      "loss": 0.8303,
      "step": 839840
    },
    {
      "epoch": 2.9434648666267123,
      "grad_norm": 3.65625,
      "learning_rate": 1.047013040283911e-06,
      "loss": 0.8705,
      "step": 839850
    },
    {
      "epoch": 2.943499914133608,
      "grad_norm": 2.578125,
      "learning_rate": 1.0463640116202092e-06,
      "loss": 0.7598,
      "step": 839860
    },
    {
      "epoch": 2.9435349616405038,
      "grad_norm": 3.09375,
      "learning_rate": 1.0457149829565074e-06,
      "loss": 0.8085,
      "step": 839870
    },
    {
      "epoch": 2.9435700091473995,
      "grad_norm": 3.28125,
      "learning_rate": 1.0450659542928054e-06,
      "loss": 0.8718,
      "step": 839880
    },
    {
      "epoch": 2.943605056654295,
      "grad_norm": 2.0625,
      "learning_rate": 1.0444169256291036e-06,
      "loss": 0.8112,
      "step": 839890
    },
    {
      "epoch": 2.9436401041611906,
      "grad_norm": 2.859375,
      "learning_rate": 1.0437678969654016e-06,
      "loss": 0.8295,
      "step": 839900
    },
    {
      "epoch": 2.943675151668086,
      "grad_norm": 2.25,
      "learning_rate": 1.0431188683016996e-06,
      "loss": 0.764,
      "step": 839910
    },
    {
      "epoch": 2.9437101991749817,
      "grad_norm": 3.703125,
      "learning_rate": 1.0424698396379979e-06,
      "loss": 0.8035,
      "step": 839920
    },
    {
      "epoch": 2.9437452466818774,
      "grad_norm": 2.859375,
      "learning_rate": 1.041820810974296e-06,
      "loss": 0.7771,
      "step": 839930
    },
    {
      "epoch": 2.9437802941887727,
      "grad_norm": 3.234375,
      "learning_rate": 1.041171782310594e-06,
      "loss": 0.8573,
      "step": 839940
    },
    {
      "epoch": 2.9438153416956685,
      "grad_norm": 3.28125,
      "learning_rate": 1.040522753646892e-06,
      "loss": 0.8597,
      "step": 839950
    },
    {
      "epoch": 2.943850389202564,
      "grad_norm": 3.078125,
      "learning_rate": 1.0398737249831903e-06,
      "loss": 0.8507,
      "step": 839960
    },
    {
      "epoch": 2.9438854367094596,
      "grad_norm": 3.125,
      "learning_rate": 1.0392246963194883e-06,
      "loss": 0.8321,
      "step": 839970
    },
    {
      "epoch": 2.9439204842163553,
      "grad_norm": 3.296875,
      "learning_rate": 1.0385756676557863e-06,
      "loss": 0.8572,
      "step": 839980
    },
    {
      "epoch": 2.943955531723251,
      "grad_norm": 2.75,
      "learning_rate": 1.0379266389920845e-06,
      "loss": 0.7059,
      "step": 839990
    },
    {
      "epoch": 2.9439905792301464,
      "grad_norm": 3.265625,
      "learning_rate": 1.0372776103283827e-06,
      "loss": 0.8008,
      "step": 840000
    },
    {
      "epoch": 2.9439905792301464,
      "eval_loss": 0.7500327825546265,
      "eval_runtime": 558.6705,
      "eval_samples_per_second": 680.967,
      "eval_steps_per_second": 56.747,
      "step": 840000
    },
    {
      "epoch": 2.944025626737042,
      "grad_norm": 2.59375,
      "learning_rate": 1.0366285816646807e-06,
      "loss": 0.7922,
      "step": 840010
    },
    {
      "epoch": 2.9440606742439375,
      "grad_norm": 3.140625,
      "learning_rate": 1.0359795530009787e-06,
      "loss": 0.8168,
      "step": 840020
    },
    {
      "epoch": 2.9440957217508332,
      "grad_norm": 2.875,
      "learning_rate": 1.035330524337277e-06,
      "loss": 0.7996,
      "step": 840030
    },
    {
      "epoch": 2.944130769257729,
      "grad_norm": 2.984375,
      "learning_rate": 1.034681495673575e-06,
      "loss": 0.8755,
      "step": 840040
    },
    {
      "epoch": 2.9441658167646243,
      "grad_norm": 3.78125,
      "learning_rate": 1.034032467009873e-06,
      "loss": 0.8278,
      "step": 840050
    },
    {
      "epoch": 2.94420086427152,
      "grad_norm": 3.34375,
      "learning_rate": 1.0333834383461711e-06,
      "loss": 0.8321,
      "step": 840060
    },
    {
      "epoch": 2.9442359117784154,
      "grad_norm": 3.53125,
      "learning_rate": 1.0327344096824693e-06,
      "loss": 0.7383,
      "step": 840070
    },
    {
      "epoch": 2.944270959285311,
      "grad_norm": 2.671875,
      "learning_rate": 1.0320853810187673e-06,
      "loss": 0.7283,
      "step": 840080
    },
    {
      "epoch": 2.944306006792207,
      "grad_norm": 2.4375,
      "learning_rate": 1.0314363523550655e-06,
      "loss": 0.8272,
      "step": 840090
    },
    {
      "epoch": 2.9443410542991026,
      "grad_norm": 2.578125,
      "learning_rate": 1.0307873236913635e-06,
      "loss": 0.7995,
      "step": 840100
    },
    {
      "epoch": 2.944376101805998,
      "grad_norm": 2.859375,
      "learning_rate": 1.0301382950276615e-06,
      "loss": 0.9029,
      "step": 840110
    },
    {
      "epoch": 2.9444111493128937,
      "grad_norm": 2.828125,
      "learning_rate": 1.0294892663639598e-06,
      "loss": 0.7731,
      "step": 840120
    },
    {
      "epoch": 2.944446196819789,
      "grad_norm": 2.890625,
      "learning_rate": 1.028840237700258e-06,
      "loss": 0.7599,
      "step": 840130
    },
    {
      "epoch": 2.944481244326685,
      "grad_norm": 3.03125,
      "learning_rate": 1.028191209036556e-06,
      "loss": 0.8379,
      "step": 840140
    },
    {
      "epoch": 2.9445162918335805,
      "grad_norm": 3.234375,
      "learning_rate": 1.027542180372854e-06,
      "loss": 0.7957,
      "step": 840150
    },
    {
      "epoch": 2.944551339340476,
      "grad_norm": 3.140625,
      "learning_rate": 1.0268931517091522e-06,
      "loss": 0.8057,
      "step": 840160
    },
    {
      "epoch": 2.9445863868473716,
      "grad_norm": 2.8125,
      "learning_rate": 1.0262441230454502e-06,
      "loss": 0.7816,
      "step": 840170
    },
    {
      "epoch": 2.9446214343542674,
      "grad_norm": 2.796875,
      "learning_rate": 1.0255950943817482e-06,
      "loss": 0.8274,
      "step": 840180
    },
    {
      "epoch": 2.9446564818611627,
      "grad_norm": 2.6875,
      "learning_rate": 1.0249460657180464e-06,
      "loss": 0.7536,
      "step": 840190
    },
    {
      "epoch": 2.9446915293680584,
      "grad_norm": 3.25,
      "learning_rate": 1.0242970370543446e-06,
      "loss": 0.8251,
      "step": 840200
    },
    {
      "epoch": 2.944726576874954,
      "grad_norm": 2.625,
      "learning_rate": 1.0236480083906426e-06,
      "loss": 0.8166,
      "step": 840210
    },
    {
      "epoch": 2.9447616243818495,
      "grad_norm": 2.859375,
      "learning_rate": 1.0229989797269408e-06,
      "loss": 0.8459,
      "step": 840220
    },
    {
      "epoch": 2.9447966718887453,
      "grad_norm": 2.828125,
      "learning_rate": 1.0223499510632388e-06,
      "loss": 0.7856,
      "step": 840230
    },
    {
      "epoch": 2.9448317193956406,
      "grad_norm": 2.765625,
      "learning_rate": 1.0217009223995368e-06,
      "loss": 0.8721,
      "step": 840240
    },
    {
      "epoch": 2.9448667669025363,
      "grad_norm": 2.765625,
      "learning_rate": 1.021051893735835e-06,
      "loss": 0.764,
      "step": 840250
    },
    {
      "epoch": 2.944901814409432,
      "grad_norm": 3.0,
      "learning_rate": 1.020402865072133e-06,
      "loss": 0.754,
      "step": 840260
    },
    {
      "epoch": 2.9449368619163274,
      "grad_norm": 3.140625,
      "learning_rate": 1.0197538364084312e-06,
      "loss": 0.7987,
      "step": 840270
    },
    {
      "epoch": 2.944971909423223,
      "grad_norm": 3.015625,
      "learning_rate": 1.0191048077447292e-06,
      "loss": 0.8472,
      "step": 840280
    },
    {
      "epoch": 2.945006956930119,
      "grad_norm": 2.765625,
      "learning_rate": 1.0184557790810275e-06,
      "loss": 0.722,
      "step": 840290
    },
    {
      "epoch": 2.9450420044370142,
      "grad_norm": 2.59375,
      "learning_rate": 1.0178067504173255e-06,
      "loss": 0.7644,
      "step": 840300
    },
    {
      "epoch": 2.94507705194391,
      "grad_norm": 3.203125,
      "learning_rate": 1.0171577217536235e-06,
      "loss": 0.8147,
      "step": 840310
    },
    {
      "epoch": 2.9451120994508058,
      "grad_norm": 3.140625,
      "learning_rate": 1.0165086930899217e-06,
      "loss": 0.9163,
      "step": 840320
    },
    {
      "epoch": 2.945147146957701,
      "grad_norm": 2.96875,
      "learning_rate": 1.0158596644262199e-06,
      "loss": 0.8536,
      "step": 840330
    },
    {
      "epoch": 2.945182194464597,
      "grad_norm": 3.21875,
      "learning_rate": 1.0152106357625179e-06,
      "loss": 0.809,
      "step": 840340
    },
    {
      "epoch": 2.945217241971492,
      "grad_norm": 2.875,
      "learning_rate": 1.014561607098816e-06,
      "loss": 0.8072,
      "step": 840350
    },
    {
      "epoch": 2.945252289478388,
      "grad_norm": 3.078125,
      "learning_rate": 1.013912578435114e-06,
      "loss": 0.7966,
      "step": 840360
    },
    {
      "epoch": 2.9452873369852837,
      "grad_norm": 3.03125,
      "learning_rate": 1.013263549771412e-06,
      "loss": 0.7898,
      "step": 840370
    },
    {
      "epoch": 2.9453223844921794,
      "grad_norm": 3.0,
      "learning_rate": 1.0126145211077103e-06,
      "loss": 0.7936,
      "step": 840380
    },
    {
      "epoch": 2.9453574319990747,
      "grad_norm": 2.296875,
      "learning_rate": 1.0119654924440083e-06,
      "loss": 0.7046,
      "step": 840390
    },
    {
      "epoch": 2.9453924795059705,
      "grad_norm": 2.9375,
      "learning_rate": 1.0113164637803065e-06,
      "loss": 0.8311,
      "step": 840400
    },
    {
      "epoch": 2.945427527012866,
      "grad_norm": 3.09375,
      "learning_rate": 1.0106674351166045e-06,
      "loss": 0.7478,
      "step": 840410
    },
    {
      "epoch": 2.9454625745197616,
      "grad_norm": 3.34375,
      "learning_rate": 1.0100184064529027e-06,
      "loss": 0.819,
      "step": 840420
    },
    {
      "epoch": 2.9454976220266573,
      "grad_norm": 3.078125,
      "learning_rate": 1.0093693777892007e-06,
      "loss": 0.8501,
      "step": 840430
    },
    {
      "epoch": 2.9455326695335526,
      "grad_norm": 2.71875,
      "learning_rate": 1.0087203491254987e-06,
      "loss": 0.805,
      "step": 840440
    },
    {
      "epoch": 2.9455677170404484,
      "grad_norm": 2.71875,
      "learning_rate": 1.008071320461797e-06,
      "loss": 0.7592,
      "step": 840450
    },
    {
      "epoch": 2.9456027645473437,
      "grad_norm": 2.5625,
      "learning_rate": 1.007422291798095e-06,
      "loss": 0.7736,
      "step": 840460
    },
    {
      "epoch": 2.9456378120542395,
      "grad_norm": 2.875,
      "learning_rate": 1.0067732631343931e-06,
      "loss": 0.8707,
      "step": 840470
    },
    {
      "epoch": 2.9456728595611352,
      "grad_norm": 2.5,
      "learning_rate": 1.0061242344706914e-06,
      "loss": 0.7891,
      "step": 840480
    },
    {
      "epoch": 2.945707907068031,
      "grad_norm": 3.171875,
      "learning_rate": 1.0054752058069894e-06,
      "loss": 0.7861,
      "step": 840490
    },
    {
      "epoch": 2.9457429545749263,
      "grad_norm": 3.28125,
      "learning_rate": 1.0048261771432874e-06,
      "loss": 0.8347,
      "step": 840500
    },
    {
      "epoch": 2.945778002081822,
      "grad_norm": 2.796875,
      "learning_rate": 1.0041771484795856e-06,
      "loss": 0.7562,
      "step": 840510
    },
    {
      "epoch": 2.9458130495887174,
      "grad_norm": 3.28125,
      "learning_rate": 1.0035281198158836e-06,
      "loss": 0.8631,
      "step": 840520
    },
    {
      "epoch": 2.945848097095613,
      "grad_norm": 3.0,
      "learning_rate": 1.0028790911521816e-06,
      "loss": 0.7345,
      "step": 840530
    },
    {
      "epoch": 2.945883144602509,
      "grad_norm": 3.15625,
      "learning_rate": 1.0022300624884798e-06,
      "loss": 0.8079,
      "step": 840540
    },
    {
      "epoch": 2.945918192109404,
      "grad_norm": 2.71875,
      "learning_rate": 1.001581033824778e-06,
      "loss": 0.8299,
      "step": 840550
    },
    {
      "epoch": 2.9459532396163,
      "grad_norm": 2.8125,
      "learning_rate": 1.000932005161076e-06,
      "loss": 0.7687,
      "step": 840560
    },
    {
      "epoch": 2.9459882871231953,
      "grad_norm": 2.890625,
      "learning_rate": 1.000282976497374e-06,
      "loss": 0.8358,
      "step": 840570
    },
    {
      "epoch": 2.946023334630091,
      "grad_norm": 3.109375,
      "learning_rate": 9.996339478336722e-07,
      "loss": 0.7665,
      "step": 840580
    },
    {
      "epoch": 2.946058382136987,
      "grad_norm": 3.09375,
      "learning_rate": 9.989849191699702e-07,
      "loss": 0.8024,
      "step": 840590
    },
    {
      "epoch": 2.9460934296438825,
      "grad_norm": 3.515625,
      "learning_rate": 9.983358905062684e-07,
      "loss": 0.8158,
      "step": 840600
    },
    {
      "epoch": 2.946128477150778,
      "grad_norm": 3.109375,
      "learning_rate": 9.976868618425664e-07,
      "loss": 0.6835,
      "step": 840610
    },
    {
      "epoch": 2.9461635246576736,
      "grad_norm": 2.703125,
      "learning_rate": 9.970378331788646e-07,
      "loss": 0.8108,
      "step": 840620
    },
    {
      "epoch": 2.946198572164569,
      "grad_norm": 2.9375,
      "learning_rate": 9.963888045151626e-07,
      "loss": 0.8237,
      "step": 840630
    },
    {
      "epoch": 2.9462336196714647,
      "grad_norm": 2.71875,
      "learning_rate": 9.957397758514606e-07,
      "loss": 0.7744,
      "step": 840640
    },
    {
      "epoch": 2.9462686671783604,
      "grad_norm": 3.0625,
      "learning_rate": 9.950907471877588e-07,
      "loss": 0.7003,
      "step": 840650
    },
    {
      "epoch": 2.9463037146852558,
      "grad_norm": 2.609375,
      "learning_rate": 9.944417185240568e-07,
      "loss": 0.7725,
      "step": 840660
    },
    {
      "epoch": 2.9463387621921515,
      "grad_norm": 2.96875,
      "learning_rate": 9.93792689860355e-07,
      "loss": 0.7645,
      "step": 840670
    },
    {
      "epoch": 2.946373809699047,
      "grad_norm": 2.65625,
      "learning_rate": 9.931436611966533e-07,
      "loss": 0.8297,
      "step": 840680
    },
    {
      "epoch": 2.9464088572059426,
      "grad_norm": 3.0625,
      "learning_rate": 9.924946325329513e-07,
      "loss": 0.8802,
      "step": 840690
    },
    {
      "epoch": 2.9464439047128383,
      "grad_norm": 3.53125,
      "learning_rate": 9.918456038692493e-07,
      "loss": 0.8705,
      "step": 840700
    },
    {
      "epoch": 2.946478952219734,
      "grad_norm": 2.65625,
      "learning_rate": 9.911965752055475e-07,
      "loss": 0.802,
      "step": 840710
    },
    {
      "epoch": 2.9465139997266294,
      "grad_norm": 2.90625,
      "learning_rate": 9.905475465418455e-07,
      "loss": 0.8334,
      "step": 840720
    },
    {
      "epoch": 2.946549047233525,
      "grad_norm": 3.109375,
      "learning_rate": 9.898985178781435e-07,
      "loss": 0.766,
      "step": 840730
    },
    {
      "epoch": 2.9465840947404205,
      "grad_norm": 2.703125,
      "learning_rate": 9.892494892144417e-07,
      "loss": 0.767,
      "step": 840740
    },
    {
      "epoch": 2.9466191422473162,
      "grad_norm": 2.734375,
      "learning_rate": 9.8860046055074e-07,
      "loss": 0.8212,
      "step": 840750
    },
    {
      "epoch": 2.946654189754212,
      "grad_norm": 2.8125,
      "learning_rate": 9.87951431887038e-07,
      "loss": 0.8612,
      "step": 840760
    },
    {
      "epoch": 2.9466892372611073,
      "grad_norm": 3.109375,
      "learning_rate": 9.87302403223336e-07,
      "loss": 0.802,
      "step": 840770
    },
    {
      "epoch": 2.946724284768003,
      "grad_norm": 3.015625,
      "learning_rate": 9.866533745596341e-07,
      "loss": 0.7785,
      "step": 840780
    },
    {
      "epoch": 2.9467593322748984,
      "grad_norm": 2.59375,
      "learning_rate": 9.860043458959321e-07,
      "loss": 0.7309,
      "step": 840790
    },
    {
      "epoch": 2.946794379781794,
      "grad_norm": 3.09375,
      "learning_rate": 9.853553172322303e-07,
      "loss": 0.7977,
      "step": 840800
    },
    {
      "epoch": 2.94682942728869,
      "grad_norm": 2.9375,
      "learning_rate": 9.847062885685285e-07,
      "loss": 0.7757,
      "step": 840810
    },
    {
      "epoch": 2.9468644747955857,
      "grad_norm": 3.0625,
      "learning_rate": 9.840572599048265e-07,
      "loss": 0.7333,
      "step": 840820
    },
    {
      "epoch": 2.946899522302481,
      "grad_norm": 2.9375,
      "learning_rate": 9.834082312411245e-07,
      "loss": 0.7913,
      "step": 840830
    },
    {
      "epoch": 2.9469345698093767,
      "grad_norm": 2.78125,
      "learning_rate": 9.827592025774227e-07,
      "loss": 0.8424,
      "step": 840840
    },
    {
      "epoch": 2.946969617316272,
      "grad_norm": 3.171875,
      "learning_rate": 9.821101739137207e-07,
      "loss": 0.8616,
      "step": 840850
    },
    {
      "epoch": 2.947004664823168,
      "grad_norm": 2.859375,
      "learning_rate": 9.814611452500187e-07,
      "loss": 0.8546,
      "step": 840860
    },
    {
      "epoch": 2.9470397123300636,
      "grad_norm": 2.984375,
      "learning_rate": 9.80812116586317e-07,
      "loss": 0.7968,
      "step": 840870
    },
    {
      "epoch": 2.947074759836959,
      "grad_norm": 2.828125,
      "learning_rate": 9.801630879226152e-07,
      "loss": 0.76,
      "step": 840880
    },
    {
      "epoch": 2.9471098073438546,
      "grad_norm": 2.640625,
      "learning_rate": 9.795140592589132e-07,
      "loss": 0.7718,
      "step": 840890
    },
    {
      "epoch": 2.94714485485075,
      "grad_norm": 2.5625,
      "learning_rate": 9.788650305952112e-07,
      "loss": 0.767,
      "step": 840900
    },
    {
      "epoch": 2.9471799023576457,
      "grad_norm": 2.953125,
      "learning_rate": 9.782160019315094e-07,
      "loss": 0.84,
      "step": 840910
    },
    {
      "epoch": 2.9472149498645415,
      "grad_norm": 3.078125,
      "learning_rate": 9.775669732678074e-07,
      "loss": 0.787,
      "step": 840920
    },
    {
      "epoch": 2.9472499973714372,
      "grad_norm": 2.75,
      "learning_rate": 9.769179446041054e-07,
      "loss": 0.7563,
      "step": 840930
    },
    {
      "epoch": 2.9472850448783325,
      "grad_norm": 3.40625,
      "learning_rate": 9.762689159404038e-07,
      "loss": 0.8124,
      "step": 840940
    },
    {
      "epoch": 2.9473200923852283,
      "grad_norm": 2.59375,
      "learning_rate": 9.756198872767018e-07,
      "loss": 0.7597,
      "step": 840950
    },
    {
      "epoch": 2.9473551398921236,
      "grad_norm": 2.90625,
      "learning_rate": 9.749708586129998e-07,
      "loss": 0.8061,
      "step": 840960
    },
    {
      "epoch": 2.9473901873990194,
      "grad_norm": 2.96875,
      "learning_rate": 9.74321829949298e-07,
      "loss": 0.7609,
      "step": 840970
    },
    {
      "epoch": 2.947425234905915,
      "grad_norm": 2.96875,
      "learning_rate": 9.73672801285596e-07,
      "loss": 0.8269,
      "step": 840980
    },
    {
      "epoch": 2.9474602824128104,
      "grad_norm": 3.296875,
      "learning_rate": 9.73023772621894e-07,
      "loss": 0.9103,
      "step": 840990
    },
    {
      "epoch": 2.947495329919706,
      "grad_norm": 2.828125,
      "learning_rate": 9.723747439581922e-07,
      "loss": 0.8054,
      "step": 841000
    },
    {
      "epoch": 2.9475303774266015,
      "grad_norm": 3.125,
      "learning_rate": 9.717257152944904e-07,
      "loss": 0.9131,
      "step": 841010
    },
    {
      "epoch": 2.9475654249334973,
      "grad_norm": 2.765625,
      "learning_rate": 9.710766866307884e-07,
      "loss": 0.7685,
      "step": 841020
    },
    {
      "epoch": 2.947600472440393,
      "grad_norm": 2.828125,
      "learning_rate": 9.704276579670864e-07,
      "loss": 0.8136,
      "step": 841030
    },
    {
      "epoch": 2.947635519947289,
      "grad_norm": 3.0,
      "learning_rate": 9.697786293033846e-07,
      "loss": 0.8251,
      "step": 841040
    },
    {
      "epoch": 2.947670567454184,
      "grad_norm": 3.375,
      "learning_rate": 9.691296006396826e-07,
      "loss": 0.8598,
      "step": 841050
    },
    {
      "epoch": 2.94770561496108,
      "grad_norm": 2.5625,
      "learning_rate": 9.684805719759806e-07,
      "loss": 0.7022,
      "step": 841060
    },
    {
      "epoch": 2.947740662467975,
      "grad_norm": 2.84375,
      "learning_rate": 9.67831543312279e-07,
      "loss": 0.7941,
      "step": 841070
    },
    {
      "epoch": 2.947775709974871,
      "grad_norm": 2.78125,
      "learning_rate": 9.67182514648577e-07,
      "loss": 0.7968,
      "step": 841080
    },
    {
      "epoch": 2.9478107574817667,
      "grad_norm": 2.828125,
      "learning_rate": 9.66533485984875e-07,
      "loss": 0.8123,
      "step": 841090
    },
    {
      "epoch": 2.947845804988662,
      "grad_norm": 2.9375,
      "learning_rate": 9.658844573211733e-07,
      "loss": 0.8104,
      "step": 841100
    },
    {
      "epoch": 2.9478808524955578,
      "grad_norm": 3.046875,
      "learning_rate": 9.652354286574713e-07,
      "loss": 0.8065,
      "step": 841110
    },
    {
      "epoch": 2.947915900002453,
      "grad_norm": 2.484375,
      "learning_rate": 9.645863999937693e-07,
      "loss": 0.794,
      "step": 841120
    },
    {
      "epoch": 2.947950947509349,
      "grad_norm": 3.1875,
      "learning_rate": 9.639373713300675e-07,
      "loss": 0.7735,
      "step": 841130
    },
    {
      "epoch": 2.9479859950162446,
      "grad_norm": 3.140625,
      "learning_rate": 9.632883426663657e-07,
      "loss": 0.8277,
      "step": 841140
    },
    {
      "epoch": 2.9480210425231403,
      "grad_norm": 3.015625,
      "learning_rate": 9.626393140026637e-07,
      "loss": 0.7516,
      "step": 841150
    },
    {
      "epoch": 2.9480560900300357,
      "grad_norm": 2.78125,
      "learning_rate": 9.619902853389617e-07,
      "loss": 0.8624,
      "step": 841160
    },
    {
      "epoch": 2.9480911375369314,
      "grad_norm": 2.828125,
      "learning_rate": 9.6134125667526e-07,
      "loss": 0.7993,
      "step": 841170
    },
    {
      "epoch": 2.9481261850438267,
      "grad_norm": 3.359375,
      "learning_rate": 9.60692228011558e-07,
      "loss": 0.7652,
      "step": 841180
    },
    {
      "epoch": 2.9481612325507225,
      "grad_norm": 3.125,
      "learning_rate": 9.60043199347856e-07,
      "loss": 0.8812,
      "step": 841190
    },
    {
      "epoch": 2.9481962800576182,
      "grad_norm": 2.78125,
      "learning_rate": 9.593941706841541e-07,
      "loss": 0.8162,
      "step": 841200
    },
    {
      "epoch": 2.9482313275645136,
      "grad_norm": 2.921875,
      "learning_rate": 9.587451420204523e-07,
      "loss": 0.7563,
      "step": 841210
    },
    {
      "epoch": 2.9482663750714093,
      "grad_norm": 3.40625,
      "learning_rate": 9.580961133567503e-07,
      "loss": 0.7826,
      "step": 841220
    },
    {
      "epoch": 2.9483014225783046,
      "grad_norm": 2.640625,
      "learning_rate": 9.574470846930483e-07,
      "loss": 0.7161,
      "step": 841230
    },
    {
      "epoch": 2.9483364700852004,
      "grad_norm": 3.0625,
      "learning_rate": 9.567980560293465e-07,
      "loss": 0.7876,
      "step": 841240
    },
    {
      "epoch": 2.948371517592096,
      "grad_norm": 3.046875,
      "learning_rate": 9.561490273656445e-07,
      "loss": 0.7751,
      "step": 841250
    },
    {
      "epoch": 2.948406565098992,
      "grad_norm": 2.6875,
      "learning_rate": 9.554999987019425e-07,
      "loss": 0.7889,
      "step": 841260
    },
    {
      "epoch": 2.948441612605887,
      "grad_norm": 2.578125,
      "learning_rate": 9.54850970038241e-07,
      "loss": 0.8108,
      "step": 841270
    },
    {
      "epoch": 2.948476660112783,
      "grad_norm": 2.828125,
      "learning_rate": 9.54201941374539e-07,
      "loss": 0.7739,
      "step": 841280
    },
    {
      "epoch": 2.9485117076196783,
      "grad_norm": 2.5625,
      "learning_rate": 9.535529127108371e-07,
      "loss": 0.752,
      "step": 841290
    },
    {
      "epoch": 2.948546755126574,
      "grad_norm": 3.015625,
      "learning_rate": 9.529038840471351e-07,
      "loss": 0.8163,
      "step": 841300
    },
    {
      "epoch": 2.94858180263347,
      "grad_norm": 2.984375,
      "learning_rate": 9.522548553834332e-07,
      "loss": 0.8963,
      "step": 841310
    },
    {
      "epoch": 2.948616850140365,
      "grad_norm": 3.125,
      "learning_rate": 9.516058267197313e-07,
      "loss": 0.8652,
      "step": 841320
    },
    {
      "epoch": 2.948651897647261,
      "grad_norm": 3.09375,
      "learning_rate": 9.509567980560293e-07,
      "loss": 0.9255,
      "step": 841330
    },
    {
      "epoch": 2.948686945154156,
      "grad_norm": 2.734375,
      "learning_rate": 9.503077693923276e-07,
      "loss": 0.7845,
      "step": 841340
    },
    {
      "epoch": 2.948721992661052,
      "grad_norm": 2.921875,
      "learning_rate": 9.496587407286256e-07,
      "loss": 0.8924,
      "step": 841350
    },
    {
      "epoch": 2.9487570401679477,
      "grad_norm": 2.84375,
      "learning_rate": 9.490097120649237e-07,
      "loss": 0.8211,
      "step": 841360
    },
    {
      "epoch": 2.9487920876748435,
      "grad_norm": 3.109375,
      "learning_rate": 9.483606834012218e-07,
      "loss": 0.7866,
      "step": 841370
    },
    {
      "epoch": 2.948827135181739,
      "grad_norm": 2.8125,
      "learning_rate": 9.477116547375198e-07,
      "loss": 0.8529,
      "step": 841380
    },
    {
      "epoch": 2.9488621826886345,
      "grad_norm": 2.359375,
      "learning_rate": 9.470626260738179e-07,
      "loss": 0.7004,
      "step": 841390
    },
    {
      "epoch": 2.94889723019553,
      "grad_norm": 3.140625,
      "learning_rate": 9.46413597410116e-07,
      "loss": 0.8119,
      "step": 841400
    },
    {
      "epoch": 2.9489322777024256,
      "grad_norm": 3.0,
      "learning_rate": 9.457645687464142e-07,
      "loss": 0.7583,
      "step": 841410
    },
    {
      "epoch": 2.9489673252093214,
      "grad_norm": 2.515625,
      "learning_rate": 9.451155400827123e-07,
      "loss": 0.7262,
      "step": 841420
    },
    {
      "epoch": 2.9490023727162167,
      "grad_norm": 3.03125,
      "learning_rate": 9.444665114190103e-07,
      "loss": 0.7675,
      "step": 841430
    },
    {
      "epoch": 2.9490374202231124,
      "grad_norm": 2.953125,
      "learning_rate": 9.438174827553085e-07,
      "loss": 0.7783,
      "step": 841440
    },
    {
      "epoch": 2.9490724677300078,
      "grad_norm": 3.046875,
      "learning_rate": 9.431684540916066e-07,
      "loss": 0.7859,
      "step": 841450
    },
    {
      "epoch": 2.9491075152369035,
      "grad_norm": 2.859375,
      "learning_rate": 9.425194254279046e-07,
      "loss": 0.8217,
      "step": 841460
    },
    {
      "epoch": 2.9491425627437993,
      "grad_norm": 3.140625,
      "learning_rate": 9.418703967642028e-07,
      "loss": 0.8031,
      "step": 841470
    },
    {
      "epoch": 2.949177610250695,
      "grad_norm": 3.3125,
      "learning_rate": 9.412213681005009e-07,
      "loss": 0.8044,
      "step": 841480
    },
    {
      "epoch": 2.9492126577575903,
      "grad_norm": 3.296875,
      "learning_rate": 9.40572339436799e-07,
      "loss": 0.9145,
      "step": 841490
    },
    {
      "epoch": 2.949247705264486,
      "grad_norm": 3.140625,
      "learning_rate": 9.39923310773097e-07,
      "loss": 0.7561,
      "step": 841500
    },
    {
      "epoch": 2.9492827527713814,
      "grad_norm": 2.9375,
      "learning_rate": 9.392742821093951e-07,
      "loss": 0.8107,
      "step": 841510
    },
    {
      "epoch": 2.949317800278277,
      "grad_norm": 2.96875,
      "learning_rate": 9.386252534456932e-07,
      "loss": 0.9034,
      "step": 841520
    },
    {
      "epoch": 2.949352847785173,
      "grad_norm": 2.875,
      "learning_rate": 9.379762247819912e-07,
      "loss": 0.7722,
      "step": 841530
    },
    {
      "epoch": 2.9493878952920682,
      "grad_norm": 2.703125,
      "learning_rate": 9.373271961182895e-07,
      "loss": 0.803,
      "step": 841540
    },
    {
      "epoch": 2.949422942798964,
      "grad_norm": 2.96875,
      "learning_rate": 9.366781674545875e-07,
      "loss": 0.7337,
      "step": 841550
    },
    {
      "epoch": 2.9494579903058598,
      "grad_norm": 2.5,
      "learning_rate": 9.360291387908856e-07,
      "loss": 0.7528,
      "step": 841560
    },
    {
      "epoch": 2.949493037812755,
      "grad_norm": 2.609375,
      "learning_rate": 9.353801101271837e-07,
      "loss": 0.8684,
      "step": 841570
    },
    {
      "epoch": 2.949528085319651,
      "grad_norm": 2.5625,
      "learning_rate": 9.347310814634817e-07,
      "loss": 0.7914,
      "step": 841580
    },
    {
      "epoch": 2.9495631328265466,
      "grad_norm": 3.109375,
      "learning_rate": 9.340820527997798e-07,
      "loss": 0.7529,
      "step": 841590
    },
    {
      "epoch": 2.949598180333442,
      "grad_norm": 2.6875,
      "learning_rate": 9.334330241360779e-07,
      "loss": 0.8496,
      "step": 841600
    },
    {
      "epoch": 2.9496332278403377,
      "grad_norm": 3.203125,
      "learning_rate": 9.327839954723761e-07,
      "loss": 0.7803,
      "step": 841610
    },
    {
      "epoch": 2.949668275347233,
      "grad_norm": 2.953125,
      "learning_rate": 9.321349668086743e-07,
      "loss": 0.7854,
      "step": 841620
    },
    {
      "epoch": 2.9497033228541287,
      "grad_norm": 3.234375,
      "learning_rate": 9.314859381449723e-07,
      "loss": 0.8219,
      "step": 841630
    },
    {
      "epoch": 2.9497383703610245,
      "grad_norm": 2.859375,
      "learning_rate": 9.308369094812704e-07,
      "loss": 0.8521,
      "step": 841640
    },
    {
      "epoch": 2.94977341786792,
      "grad_norm": 3.125,
      "learning_rate": 9.301878808175685e-07,
      "loss": 0.7928,
      "step": 841650
    },
    {
      "epoch": 2.9498084653748156,
      "grad_norm": 2.796875,
      "learning_rate": 9.295388521538665e-07,
      "loss": 0.7638,
      "step": 841660
    },
    {
      "epoch": 2.9498435128817113,
      "grad_norm": 2.671875,
      "learning_rate": 9.288898234901648e-07,
      "loss": 0.7917,
      "step": 841670
    },
    {
      "epoch": 2.9498785603886066,
      "grad_norm": 3.03125,
      "learning_rate": 9.282407948264628e-07,
      "loss": 0.7342,
      "step": 841680
    },
    {
      "epoch": 2.9499136078955024,
      "grad_norm": 2.875,
      "learning_rate": 9.275917661627609e-07,
      "loss": 0.7988,
      "step": 841690
    },
    {
      "epoch": 2.949948655402398,
      "grad_norm": 2.640625,
      "learning_rate": 9.26942737499059e-07,
      "loss": 0.7876,
      "step": 841700
    },
    {
      "epoch": 2.9499837029092935,
      "grad_norm": 3.328125,
      "learning_rate": 9.26293708835357e-07,
      "loss": 0.8278,
      "step": 841710
    },
    {
      "epoch": 2.950018750416189,
      "grad_norm": 2.765625,
      "learning_rate": 9.256446801716551e-07,
      "loss": 0.7938,
      "step": 841720
    },
    {
      "epoch": 2.9500537979230845,
      "grad_norm": 2.609375,
      "learning_rate": 9.249956515079532e-07,
      "loss": 0.8013,
      "step": 841730
    },
    {
      "epoch": 2.9500888454299803,
      "grad_norm": 3.15625,
      "learning_rate": 9.243466228442514e-07,
      "loss": 0.8235,
      "step": 841740
    },
    {
      "epoch": 2.950123892936876,
      "grad_norm": 2.703125,
      "learning_rate": 9.236975941805495e-07,
      "loss": 0.7814,
      "step": 841750
    },
    {
      "epoch": 2.950158940443772,
      "grad_norm": 2.515625,
      "learning_rate": 9.230485655168475e-07,
      "loss": 0.8112,
      "step": 841760
    },
    {
      "epoch": 2.950193987950667,
      "grad_norm": 2.828125,
      "learning_rate": 9.223995368531456e-07,
      "loss": 0.8072,
      "step": 841770
    },
    {
      "epoch": 2.950229035457563,
      "grad_norm": 3.4375,
      "learning_rate": 9.217505081894437e-07,
      "loss": 0.8347,
      "step": 841780
    },
    {
      "epoch": 2.950264082964458,
      "grad_norm": 3.109375,
      "learning_rate": 9.211014795257417e-07,
      "loss": 0.8029,
      "step": 841790
    },
    {
      "epoch": 2.950299130471354,
      "grad_norm": 3.125,
      "learning_rate": 9.204524508620398e-07,
      "loss": 0.8265,
      "step": 841800
    },
    {
      "epoch": 2.9503341779782497,
      "grad_norm": 2.96875,
      "learning_rate": 9.19803422198338e-07,
      "loss": 0.807,
      "step": 841810
    },
    {
      "epoch": 2.950369225485145,
      "grad_norm": 2.765625,
      "learning_rate": 9.191543935346362e-07,
      "loss": 0.7973,
      "step": 841820
    },
    {
      "epoch": 2.9504042729920408,
      "grad_norm": 3.0,
      "learning_rate": 9.185053648709343e-07,
      "loss": 0.8079,
      "step": 841830
    },
    {
      "epoch": 2.950439320498936,
      "grad_norm": 2.84375,
      "learning_rate": 9.178563362072323e-07,
      "loss": 0.7618,
      "step": 841840
    },
    {
      "epoch": 2.950474368005832,
      "grad_norm": 2.84375,
      "learning_rate": 9.172073075435304e-07,
      "loss": 0.8848,
      "step": 841850
    },
    {
      "epoch": 2.9505094155127276,
      "grad_norm": 3.171875,
      "learning_rate": 9.165582788798285e-07,
      "loss": 0.7583,
      "step": 841860
    },
    {
      "epoch": 2.9505444630196234,
      "grad_norm": 3.09375,
      "learning_rate": 9.159092502161267e-07,
      "loss": 0.7886,
      "step": 841870
    },
    {
      "epoch": 2.9505795105265187,
      "grad_norm": 2.96875,
      "learning_rate": 9.152602215524248e-07,
      "loss": 0.7669,
      "step": 841880
    },
    {
      "epoch": 2.9506145580334144,
      "grad_norm": 2.859375,
      "learning_rate": 9.146111928887228e-07,
      "loss": 0.7325,
      "step": 841890
    },
    {
      "epoch": 2.9506496055403098,
      "grad_norm": 3.5625,
      "learning_rate": 9.139621642250209e-07,
      "loss": 0.7828,
      "step": 841900
    },
    {
      "epoch": 2.9506846530472055,
      "grad_norm": 3.046875,
      "learning_rate": 9.13313135561319e-07,
      "loss": 0.8349,
      "step": 841910
    },
    {
      "epoch": 2.9507197005541013,
      "grad_norm": 2.828125,
      "learning_rate": 9.12664106897617e-07,
      "loss": 0.7072,
      "step": 841920
    },
    {
      "epoch": 2.9507547480609966,
      "grad_norm": 2.8125,
      "learning_rate": 9.120150782339151e-07,
      "loss": 0.7733,
      "step": 841930
    },
    {
      "epoch": 2.9507897955678923,
      "grad_norm": 2.609375,
      "learning_rate": 9.113660495702133e-07,
      "loss": 0.7844,
      "step": 841940
    },
    {
      "epoch": 2.9508248430747877,
      "grad_norm": 2.609375,
      "learning_rate": 9.107170209065114e-07,
      "loss": 0.7795,
      "step": 841950
    },
    {
      "epoch": 2.9508598905816834,
      "grad_norm": 3.28125,
      "learning_rate": 9.100679922428095e-07,
      "loss": 0.8337,
      "step": 841960
    },
    {
      "epoch": 2.950894938088579,
      "grad_norm": 2.671875,
      "learning_rate": 9.094189635791075e-07,
      "loss": 0.8792,
      "step": 841970
    },
    {
      "epoch": 2.950929985595475,
      "grad_norm": 2.796875,
      "learning_rate": 9.087699349154056e-07,
      "loss": 0.7531,
      "step": 841980
    },
    {
      "epoch": 2.9509650331023702,
      "grad_norm": 2.984375,
      "learning_rate": 9.081209062517037e-07,
      "loss": 0.7626,
      "step": 841990
    },
    {
      "epoch": 2.951000080609266,
      "grad_norm": 2.734375,
      "learning_rate": 9.074718775880017e-07,
      "loss": 0.7633,
      "step": 842000
    },
    {
      "epoch": 2.9510351281161613,
      "grad_norm": 2.453125,
      "learning_rate": 9.068228489243001e-07,
      "loss": 0.8222,
      "step": 842010
    },
    {
      "epoch": 2.951070175623057,
      "grad_norm": 3.0,
      "learning_rate": 9.061738202605981e-07,
      "loss": 0.8333,
      "step": 842020
    },
    {
      "epoch": 2.951105223129953,
      "grad_norm": 3.421875,
      "learning_rate": 9.055247915968962e-07,
      "loss": 0.8101,
      "step": 842030
    },
    {
      "epoch": 2.951140270636848,
      "grad_norm": 3.03125,
      "learning_rate": 9.048757629331943e-07,
      "loss": 0.8512,
      "step": 842040
    },
    {
      "epoch": 2.951175318143744,
      "grad_norm": 1.9453125,
      "learning_rate": 9.042267342694923e-07,
      "loss": 0.7482,
      "step": 842050
    },
    {
      "epoch": 2.951210365650639,
      "grad_norm": 2.578125,
      "learning_rate": 9.035777056057904e-07,
      "loss": 0.8521,
      "step": 842060
    },
    {
      "epoch": 2.951245413157535,
      "grad_norm": 2.828125,
      "learning_rate": 9.029286769420885e-07,
      "loss": 0.8442,
      "step": 842070
    },
    {
      "epoch": 2.9512804606644307,
      "grad_norm": 2.109375,
      "learning_rate": 9.022796482783867e-07,
      "loss": 0.8084,
      "step": 842080
    },
    {
      "epoch": 2.9513155081713265,
      "grad_norm": 2.875,
      "learning_rate": 9.016306196146847e-07,
      "loss": 0.8016,
      "step": 842090
    },
    {
      "epoch": 2.951350555678222,
      "grad_norm": 2.8125,
      "learning_rate": 9.009815909509828e-07,
      "loss": 0.794,
      "step": 842100
    },
    {
      "epoch": 2.9513856031851176,
      "grad_norm": 2.703125,
      "learning_rate": 9.003325622872809e-07,
      "loss": 0.8516,
      "step": 842110
    },
    {
      "epoch": 2.951420650692013,
      "grad_norm": 3.25,
      "learning_rate": 8.996835336235789e-07,
      "loss": 0.8041,
      "step": 842120
    },
    {
      "epoch": 2.9514556981989086,
      "grad_norm": 3.015625,
      "learning_rate": 8.99034504959877e-07,
      "loss": 0.8295,
      "step": 842130
    },
    {
      "epoch": 2.9514907457058044,
      "grad_norm": 3.46875,
      "learning_rate": 8.983854762961752e-07,
      "loss": 0.7544,
      "step": 842140
    },
    {
      "epoch": 2.9515257932126997,
      "grad_norm": 3.296875,
      "learning_rate": 8.977364476324733e-07,
      "loss": 0.797,
      "step": 842150
    },
    {
      "epoch": 2.9515608407195955,
      "grad_norm": 2.734375,
      "learning_rate": 8.970874189687714e-07,
      "loss": 0.7228,
      "step": 842160
    },
    {
      "epoch": 2.9515958882264908,
      "grad_norm": 3.234375,
      "learning_rate": 8.964383903050694e-07,
      "loss": 0.8695,
      "step": 842170
    },
    {
      "epoch": 2.9516309357333865,
      "grad_norm": 2.9375,
      "learning_rate": 8.957893616413675e-07,
      "loss": 0.7837,
      "step": 842180
    },
    {
      "epoch": 2.9516659832402823,
      "grad_norm": 3.125,
      "learning_rate": 8.951403329776656e-07,
      "loss": 0.7027,
      "step": 842190
    },
    {
      "epoch": 2.951701030747178,
      "grad_norm": 3.21875,
      "learning_rate": 8.944913043139636e-07,
      "loss": 0.7684,
      "step": 842200
    },
    {
      "epoch": 2.9517360782540734,
      "grad_norm": 3.046875,
      "learning_rate": 8.93842275650262e-07,
      "loss": 0.8828,
      "step": 842210
    },
    {
      "epoch": 2.951771125760969,
      "grad_norm": 3.1875,
      "learning_rate": 8.9319324698656e-07,
      "loss": 0.7661,
      "step": 842220
    },
    {
      "epoch": 2.9518061732678644,
      "grad_norm": 2.84375,
      "learning_rate": 8.925442183228581e-07,
      "loss": 0.7299,
      "step": 842230
    },
    {
      "epoch": 2.95184122077476,
      "grad_norm": 2.765625,
      "learning_rate": 8.918951896591562e-07,
      "loss": 0.837,
      "step": 842240
    },
    {
      "epoch": 2.951876268281656,
      "grad_norm": 2.890625,
      "learning_rate": 8.912461609954542e-07,
      "loss": 0.8269,
      "step": 842250
    },
    {
      "epoch": 2.9519113157885513,
      "grad_norm": 2.78125,
      "learning_rate": 8.905971323317523e-07,
      "loss": 0.8012,
      "step": 842260
    },
    {
      "epoch": 2.951946363295447,
      "grad_norm": 3.0,
      "learning_rate": 8.899481036680504e-07,
      "loss": 0.8014,
      "step": 842270
    },
    {
      "epoch": 2.9519814108023423,
      "grad_norm": 2.96875,
      "learning_rate": 8.892990750043486e-07,
      "loss": 0.7857,
      "step": 842280
    },
    {
      "epoch": 2.952016458309238,
      "grad_norm": 2.65625,
      "learning_rate": 8.886500463406467e-07,
      "loss": 0.7468,
      "step": 842290
    },
    {
      "epoch": 2.952051505816134,
      "grad_norm": 2.96875,
      "learning_rate": 8.880010176769447e-07,
      "loss": 0.9386,
      "step": 842300
    },
    {
      "epoch": 2.9520865533230296,
      "grad_norm": 3.140625,
      "learning_rate": 8.873519890132428e-07,
      "loss": 0.7388,
      "step": 842310
    },
    {
      "epoch": 2.952121600829925,
      "grad_norm": 2.703125,
      "learning_rate": 8.867029603495409e-07,
      "loss": 0.8591,
      "step": 842320
    },
    {
      "epoch": 2.9521566483368207,
      "grad_norm": 2.703125,
      "learning_rate": 8.860539316858389e-07,
      "loss": 0.9083,
      "step": 842330
    },
    {
      "epoch": 2.952191695843716,
      "grad_norm": 2.71875,
      "learning_rate": 8.854049030221372e-07,
      "loss": 0.718,
      "step": 842340
    },
    {
      "epoch": 2.9522267433506117,
      "grad_norm": 2.796875,
      "learning_rate": 8.847558743584352e-07,
      "loss": 0.789,
      "step": 842350
    },
    {
      "epoch": 2.9522617908575075,
      "grad_norm": 3.046875,
      "learning_rate": 8.841068456947333e-07,
      "loss": 0.8168,
      "step": 842360
    },
    {
      "epoch": 2.952296838364403,
      "grad_norm": 2.6875,
      "learning_rate": 8.834578170310314e-07,
      "loss": 0.7534,
      "step": 842370
    },
    {
      "epoch": 2.9523318858712986,
      "grad_norm": 3.078125,
      "learning_rate": 8.828087883673294e-07,
      "loss": 0.8786,
      "step": 842380
    },
    {
      "epoch": 2.952366933378194,
      "grad_norm": 2.796875,
      "learning_rate": 8.821597597036275e-07,
      "loss": 0.8508,
      "step": 842390
    },
    {
      "epoch": 2.9524019808850896,
      "grad_norm": 2.828125,
      "learning_rate": 8.815107310399257e-07,
      "loss": 0.8258,
      "step": 842400
    },
    {
      "epoch": 2.9524370283919854,
      "grad_norm": 3.28125,
      "learning_rate": 8.808617023762239e-07,
      "loss": 0.8286,
      "step": 842410
    },
    {
      "epoch": 2.952472075898881,
      "grad_norm": 2.734375,
      "learning_rate": 8.80212673712522e-07,
      "loss": 0.774,
      "step": 842420
    },
    {
      "epoch": 2.9525071234057765,
      "grad_norm": 2.75,
      "learning_rate": 8.7956364504882e-07,
      "loss": 0.77,
      "step": 842430
    },
    {
      "epoch": 2.9525421709126722,
      "grad_norm": 3.015625,
      "learning_rate": 8.789146163851181e-07,
      "loss": 0.7837,
      "step": 842440
    },
    {
      "epoch": 2.9525772184195676,
      "grad_norm": 2.625,
      "learning_rate": 8.782655877214162e-07,
      "loss": 0.8155,
      "step": 842450
    },
    {
      "epoch": 2.9526122659264633,
      "grad_norm": 2.640625,
      "learning_rate": 8.776165590577142e-07,
      "loss": 0.8002,
      "step": 842460
    },
    {
      "epoch": 2.952647313433359,
      "grad_norm": 3.0625,
      "learning_rate": 8.769675303940123e-07,
      "loss": 0.7648,
      "step": 842470
    },
    {
      "epoch": 2.9526823609402544,
      "grad_norm": 2.890625,
      "learning_rate": 8.763185017303105e-07,
      "loss": 0.864,
      "step": 842480
    },
    {
      "epoch": 2.95271740844715,
      "grad_norm": 3.34375,
      "learning_rate": 8.756694730666086e-07,
      "loss": 0.7728,
      "step": 842490
    },
    {
      "epoch": 2.9527524559540455,
      "grad_norm": 3.53125,
      "learning_rate": 8.750204444029067e-07,
      "loss": 0.8047,
      "step": 842500
    },
    {
      "epoch": 2.952787503460941,
      "grad_norm": 2.71875,
      "learning_rate": 8.743714157392047e-07,
      "loss": 0.8827,
      "step": 842510
    },
    {
      "epoch": 2.952822550967837,
      "grad_norm": 2.953125,
      "learning_rate": 8.737223870755028e-07,
      "loss": 0.6994,
      "step": 842520
    },
    {
      "epoch": 2.9528575984747327,
      "grad_norm": 2.8125,
      "learning_rate": 8.730733584118009e-07,
      "loss": 0.7211,
      "step": 842530
    },
    {
      "epoch": 2.952892645981628,
      "grad_norm": 3.046875,
      "learning_rate": 8.724243297480991e-07,
      "loss": 0.7421,
      "step": 842540
    },
    {
      "epoch": 2.952927693488524,
      "grad_norm": 3.21875,
      "learning_rate": 8.717753010843972e-07,
      "loss": 0.882,
      "step": 842550
    },
    {
      "epoch": 2.952962740995419,
      "grad_norm": 2.9375,
      "learning_rate": 8.711262724206952e-07,
      "loss": 0.7957,
      "step": 842560
    },
    {
      "epoch": 2.952997788502315,
      "grad_norm": 2.5625,
      "learning_rate": 8.704772437569933e-07,
      "loss": 0.7736,
      "step": 842570
    },
    {
      "epoch": 2.9530328360092106,
      "grad_norm": 2.84375,
      "learning_rate": 8.698282150932915e-07,
      "loss": 0.7293,
      "step": 842580
    },
    {
      "epoch": 2.953067883516106,
      "grad_norm": 3.25,
      "learning_rate": 8.691791864295895e-07,
      "loss": 0.8348,
      "step": 842590
    },
    {
      "epoch": 2.9531029310230017,
      "grad_norm": 3.109375,
      "learning_rate": 8.685301577658876e-07,
      "loss": 0.8486,
      "step": 842600
    },
    {
      "epoch": 2.953137978529897,
      "grad_norm": 3.046875,
      "learning_rate": 8.678811291021858e-07,
      "loss": 0.8534,
      "step": 842610
    },
    {
      "epoch": 2.9531730260367928,
      "grad_norm": 2.796875,
      "learning_rate": 8.672321004384839e-07,
      "loss": 0.7816,
      "step": 842620
    },
    {
      "epoch": 2.9532080735436885,
      "grad_norm": 3.03125,
      "learning_rate": 8.66583071774782e-07,
      "loss": 0.7755,
      "step": 842630
    },
    {
      "epoch": 2.9532431210505843,
      "grad_norm": 2.890625,
      "learning_rate": 8.6593404311108e-07,
      "loss": 0.7939,
      "step": 842640
    },
    {
      "epoch": 2.9532781685574796,
      "grad_norm": 3.59375,
      "learning_rate": 8.652850144473781e-07,
      "loss": 0.8345,
      "step": 842650
    },
    {
      "epoch": 2.9533132160643754,
      "grad_norm": 2.84375,
      "learning_rate": 8.646359857836762e-07,
      "loss": 0.8341,
      "step": 842660
    },
    {
      "epoch": 2.9533482635712707,
      "grad_norm": 3.1875,
      "learning_rate": 8.639869571199742e-07,
      "loss": 0.7776,
      "step": 842670
    },
    {
      "epoch": 2.9533833110781664,
      "grad_norm": 2.90625,
      "learning_rate": 8.633379284562724e-07,
      "loss": 0.916,
      "step": 842680
    },
    {
      "epoch": 2.953418358585062,
      "grad_norm": 3.0,
      "learning_rate": 8.626888997925705e-07,
      "loss": 0.7635,
      "step": 842690
    },
    {
      "epoch": 2.9534534060919575,
      "grad_norm": 2.84375,
      "learning_rate": 8.620398711288686e-07,
      "loss": 0.8077,
      "step": 842700
    },
    {
      "epoch": 2.9534884535988533,
      "grad_norm": 2.953125,
      "learning_rate": 8.613908424651666e-07,
      "loss": 0.8194,
      "step": 842710
    },
    {
      "epoch": 2.9535235011057486,
      "grad_norm": 2.84375,
      "learning_rate": 8.607418138014647e-07,
      "loss": 0.7595,
      "step": 842720
    },
    {
      "epoch": 2.9535585486126443,
      "grad_norm": 2.34375,
      "learning_rate": 8.600927851377628e-07,
      "loss": 0.7406,
      "step": 842730
    },
    {
      "epoch": 2.95359359611954,
      "grad_norm": 2.8125,
      "learning_rate": 8.594437564740608e-07,
      "loss": 0.8187,
      "step": 842740
    },
    {
      "epoch": 2.953628643626436,
      "grad_norm": 3.359375,
      "learning_rate": 8.587947278103591e-07,
      "loss": 0.8156,
      "step": 842750
    },
    {
      "epoch": 2.953663691133331,
      "grad_norm": 2.984375,
      "learning_rate": 8.581456991466571e-07,
      "loss": 0.7337,
      "step": 842760
    },
    {
      "epoch": 2.953698738640227,
      "grad_norm": 2.875,
      "learning_rate": 8.574966704829552e-07,
      "loss": 0.7807,
      "step": 842770
    },
    {
      "epoch": 2.9537337861471222,
      "grad_norm": 2.984375,
      "learning_rate": 8.568476418192534e-07,
      "loss": 0.8236,
      "step": 842780
    },
    {
      "epoch": 2.953768833654018,
      "grad_norm": 2.421875,
      "learning_rate": 8.561986131555514e-07,
      "loss": 0.7832,
      "step": 842790
    },
    {
      "epoch": 2.9538038811609137,
      "grad_norm": 3.0,
      "learning_rate": 8.555495844918495e-07,
      "loss": 0.8624,
      "step": 842800
    },
    {
      "epoch": 2.953838928667809,
      "grad_norm": 2.71875,
      "learning_rate": 8.549005558281477e-07,
      "loss": 0.7428,
      "step": 842810
    },
    {
      "epoch": 2.953873976174705,
      "grad_norm": 3.0,
      "learning_rate": 8.542515271644458e-07,
      "loss": 0.7459,
      "step": 842820
    },
    {
      "epoch": 2.9539090236816,
      "grad_norm": 2.953125,
      "learning_rate": 8.536024985007439e-07,
      "loss": 0.8096,
      "step": 842830
    },
    {
      "epoch": 2.953944071188496,
      "grad_norm": 2.71875,
      "learning_rate": 8.529534698370419e-07,
      "loss": 0.8088,
      "step": 842840
    },
    {
      "epoch": 2.9539791186953916,
      "grad_norm": 3.140625,
      "learning_rate": 8.5230444117334e-07,
      "loss": 0.8109,
      "step": 842850
    },
    {
      "epoch": 2.9540141662022874,
      "grad_norm": 3.140625,
      "learning_rate": 8.516554125096381e-07,
      "loss": 0.7508,
      "step": 842860
    },
    {
      "epoch": 2.9540492137091827,
      "grad_norm": 3.109375,
      "learning_rate": 8.510063838459361e-07,
      "loss": 0.74,
      "step": 842870
    },
    {
      "epoch": 2.9540842612160785,
      "grad_norm": 2.71875,
      "learning_rate": 8.503573551822344e-07,
      "loss": 0.77,
      "step": 842880
    },
    {
      "epoch": 2.954119308722974,
      "grad_norm": 2.9375,
      "learning_rate": 8.497083265185324e-07,
      "loss": 0.8276,
      "step": 842890
    },
    {
      "epoch": 2.9541543562298695,
      "grad_norm": 3.0625,
      "learning_rate": 8.490592978548305e-07,
      "loss": 0.754,
      "step": 842900
    },
    {
      "epoch": 2.9541894037367653,
      "grad_norm": 2.859375,
      "learning_rate": 8.484102691911286e-07,
      "loss": 0.7213,
      "step": 842910
    },
    {
      "epoch": 2.9542244512436606,
      "grad_norm": 2.84375,
      "learning_rate": 8.477612405274266e-07,
      "loss": 0.7817,
      "step": 842920
    },
    {
      "epoch": 2.9542594987505564,
      "grad_norm": 2.796875,
      "learning_rate": 8.471122118637247e-07,
      "loss": 0.8504,
      "step": 842930
    },
    {
      "epoch": 2.954294546257452,
      "grad_norm": 3.0,
      "learning_rate": 8.464631832000228e-07,
      "loss": 0.8006,
      "step": 842940
    },
    {
      "epoch": 2.9543295937643475,
      "grad_norm": 2.90625,
      "learning_rate": 8.45814154536321e-07,
      "loss": 0.7138,
      "step": 842950
    },
    {
      "epoch": 2.954364641271243,
      "grad_norm": 2.984375,
      "learning_rate": 8.451651258726192e-07,
      "loss": 0.8118,
      "step": 842960
    },
    {
      "epoch": 2.954399688778139,
      "grad_norm": 3.265625,
      "learning_rate": 8.445160972089172e-07,
      "loss": 0.7703,
      "step": 842970
    },
    {
      "epoch": 2.9544347362850343,
      "grad_norm": 2.625,
      "learning_rate": 8.438670685452153e-07,
      "loss": 0.7388,
      "step": 842980
    },
    {
      "epoch": 2.95446978379193,
      "grad_norm": 2.6875,
      "learning_rate": 8.432180398815134e-07,
      "loss": 0.8404,
      "step": 842990
    },
    {
      "epoch": 2.9545048312988254,
      "grad_norm": 2.828125,
      "learning_rate": 8.425690112178114e-07,
      "loss": 0.7801,
      "step": 843000
    },
    {
      "epoch": 2.954539878805721,
      "grad_norm": 3.484375,
      "learning_rate": 8.419199825541097e-07,
      "loss": 0.8317,
      "step": 843010
    },
    {
      "epoch": 2.954574926312617,
      "grad_norm": 2.953125,
      "learning_rate": 8.412709538904077e-07,
      "loss": 0.8336,
      "step": 843020
    },
    {
      "epoch": 2.9546099738195126,
      "grad_norm": 2.859375,
      "learning_rate": 8.406219252267058e-07,
      "loss": 0.7794,
      "step": 843030
    },
    {
      "epoch": 2.954645021326408,
      "grad_norm": 3.015625,
      "learning_rate": 8.399728965630039e-07,
      "loss": 0.7817,
      "step": 843040
    },
    {
      "epoch": 2.9546800688333037,
      "grad_norm": 3.140625,
      "learning_rate": 8.393238678993019e-07,
      "loss": 0.7813,
      "step": 843050
    },
    {
      "epoch": 2.954715116340199,
      "grad_norm": 2.796875,
      "learning_rate": 8.386748392356e-07,
      "loss": 0.7706,
      "step": 843060
    },
    {
      "epoch": 2.9547501638470948,
      "grad_norm": 3.46875,
      "learning_rate": 8.380258105718981e-07,
      "loss": 0.8629,
      "step": 843070
    },
    {
      "epoch": 2.9547852113539905,
      "grad_norm": 2.90625,
      "learning_rate": 8.373767819081963e-07,
      "loss": 0.8546,
      "step": 843080
    },
    {
      "epoch": 2.954820258860886,
      "grad_norm": 2.8125,
      "learning_rate": 8.367277532444944e-07,
      "loss": 0.8743,
      "step": 843090
    },
    {
      "epoch": 2.9548553063677816,
      "grad_norm": 2.515625,
      "learning_rate": 8.360787245807924e-07,
      "loss": 0.7719,
      "step": 843100
    },
    {
      "epoch": 2.954890353874677,
      "grad_norm": 3.109375,
      "learning_rate": 8.354296959170905e-07,
      "loss": 0.7449,
      "step": 843110
    },
    {
      "epoch": 2.9549254013815727,
      "grad_norm": 2.921875,
      "learning_rate": 8.347806672533886e-07,
      "loss": 0.8104,
      "step": 843120
    },
    {
      "epoch": 2.9549604488884684,
      "grad_norm": 3.359375,
      "learning_rate": 8.341316385896866e-07,
      "loss": 0.7375,
      "step": 843130
    },
    {
      "epoch": 2.954995496395364,
      "grad_norm": 2.8125,
      "learning_rate": 8.334826099259847e-07,
      "loss": 0.7422,
      "step": 843140
    },
    {
      "epoch": 2.9550305439022595,
      "grad_norm": 2.75,
      "learning_rate": 8.32833581262283e-07,
      "loss": 0.7648,
      "step": 843150
    },
    {
      "epoch": 2.9550655914091553,
      "grad_norm": 3.09375,
      "learning_rate": 8.321845525985811e-07,
      "loss": 0.7859,
      "step": 843160
    },
    {
      "epoch": 2.9551006389160506,
      "grad_norm": 2.84375,
      "learning_rate": 8.315355239348792e-07,
      "loss": 0.7999,
      "step": 843170
    },
    {
      "epoch": 2.9551356864229463,
      "grad_norm": 2.8125,
      "learning_rate": 8.308864952711772e-07,
      "loss": 0.8383,
      "step": 843180
    },
    {
      "epoch": 2.955170733929842,
      "grad_norm": 2.96875,
      "learning_rate": 8.302374666074753e-07,
      "loss": 0.8279,
      "step": 843190
    },
    {
      "epoch": 2.9552057814367374,
      "grad_norm": 2.59375,
      "learning_rate": 8.295884379437734e-07,
      "loss": 0.8187,
      "step": 843200
    },
    {
      "epoch": 2.955240828943633,
      "grad_norm": 3.0625,
      "learning_rate": 8.289394092800716e-07,
      "loss": 0.8436,
      "step": 843210
    },
    {
      "epoch": 2.9552758764505285,
      "grad_norm": 2.578125,
      "learning_rate": 8.282903806163697e-07,
      "loss": 0.8348,
      "step": 843220
    },
    {
      "epoch": 2.9553109239574242,
      "grad_norm": 3.21875,
      "learning_rate": 8.276413519526677e-07,
      "loss": 0.7851,
      "step": 843230
    },
    {
      "epoch": 2.95534597146432,
      "grad_norm": 2.9375,
      "learning_rate": 8.269923232889658e-07,
      "loss": 0.7609,
      "step": 843240
    },
    {
      "epoch": 2.9553810189712157,
      "grad_norm": 2.828125,
      "learning_rate": 8.263432946252639e-07,
      "loss": 0.7991,
      "step": 843250
    },
    {
      "epoch": 2.955416066478111,
      "grad_norm": 2.96875,
      "learning_rate": 8.256942659615619e-07,
      "loss": 0.7992,
      "step": 843260
    },
    {
      "epoch": 2.955451113985007,
      "grad_norm": 3.015625,
      "learning_rate": 8.2504523729786e-07,
      "loss": 0.8469,
      "step": 843270
    },
    {
      "epoch": 2.955486161491902,
      "grad_norm": 3.109375,
      "learning_rate": 8.243962086341582e-07,
      "loss": 0.851,
      "step": 843280
    },
    {
      "epoch": 2.955521208998798,
      "grad_norm": 3.0625,
      "learning_rate": 8.237471799704563e-07,
      "loss": 0.7793,
      "step": 843290
    },
    {
      "epoch": 2.9555562565056936,
      "grad_norm": 3.0625,
      "learning_rate": 8.230981513067543e-07,
      "loss": 0.8304,
      "step": 843300
    },
    {
      "epoch": 2.955591304012589,
      "grad_norm": 3.375,
      "learning_rate": 8.224491226430524e-07,
      "loss": 0.7569,
      "step": 843310
    },
    {
      "epoch": 2.9556263515194847,
      "grad_norm": 2.828125,
      "learning_rate": 8.218000939793505e-07,
      "loss": 0.7878,
      "step": 843320
    },
    {
      "epoch": 2.95566139902638,
      "grad_norm": 3.40625,
      "learning_rate": 8.211510653156485e-07,
      "loss": 0.8663,
      "step": 843330
    },
    {
      "epoch": 2.955696446533276,
      "grad_norm": 3.171875,
      "learning_rate": 8.205020366519466e-07,
      "loss": 0.7675,
      "step": 843340
    },
    {
      "epoch": 2.9557314940401715,
      "grad_norm": 3.171875,
      "learning_rate": 8.198530079882449e-07,
      "loss": 0.9078,
      "step": 843350
    },
    {
      "epoch": 2.9557665415470673,
      "grad_norm": 2.921875,
      "learning_rate": 8.19203979324543e-07,
      "loss": 0.8114,
      "step": 843360
    },
    {
      "epoch": 2.9558015890539626,
      "grad_norm": 3.21875,
      "learning_rate": 8.185549506608411e-07,
      "loss": 0.7918,
      "step": 843370
    },
    {
      "epoch": 2.9558366365608584,
      "grad_norm": 3.421875,
      "learning_rate": 8.179059219971391e-07,
      "loss": 0.7675,
      "step": 843380
    },
    {
      "epoch": 2.9558716840677537,
      "grad_norm": 2.96875,
      "learning_rate": 8.172568933334372e-07,
      "loss": 0.8224,
      "step": 843390
    },
    {
      "epoch": 2.9559067315746494,
      "grad_norm": 2.65625,
      "learning_rate": 8.166078646697353e-07,
      "loss": 0.8987,
      "step": 843400
    },
    {
      "epoch": 2.955941779081545,
      "grad_norm": 3.546875,
      "learning_rate": 8.159588360060333e-07,
      "loss": 0.8964,
      "step": 843410
    },
    {
      "epoch": 2.9559768265884405,
      "grad_norm": 3.15625,
      "learning_rate": 8.153098073423316e-07,
      "loss": 0.822,
      "step": 843420
    },
    {
      "epoch": 2.9560118740953363,
      "grad_norm": 2.875,
      "learning_rate": 8.146607786786296e-07,
      "loss": 0.8726,
      "step": 843430
    },
    {
      "epoch": 2.9560469216022316,
      "grad_norm": 2.984375,
      "learning_rate": 8.140117500149277e-07,
      "loss": 0.8205,
      "step": 843440
    },
    {
      "epoch": 2.9560819691091273,
      "grad_norm": 2.8125,
      "learning_rate": 8.133627213512258e-07,
      "loss": 0.8911,
      "step": 843450
    },
    {
      "epoch": 2.956117016616023,
      "grad_norm": 2.796875,
      "learning_rate": 8.127136926875238e-07,
      "loss": 0.8502,
      "step": 843460
    },
    {
      "epoch": 2.956152064122919,
      "grad_norm": 2.90625,
      "learning_rate": 8.120646640238219e-07,
      "loss": 0.7652,
      "step": 843470
    },
    {
      "epoch": 2.956187111629814,
      "grad_norm": 3.15625,
      "learning_rate": 8.114156353601201e-07,
      "loss": 0.8556,
      "step": 843480
    },
    {
      "epoch": 2.95622215913671,
      "grad_norm": 2.640625,
      "learning_rate": 8.107666066964182e-07,
      "loss": 0.7608,
      "step": 843490
    },
    {
      "epoch": 2.9562572066436053,
      "grad_norm": 3.28125,
      "learning_rate": 8.101175780327163e-07,
      "loss": 0.8043,
      "step": 843500
    },
    {
      "epoch": 2.956292254150501,
      "grad_norm": 3.078125,
      "learning_rate": 8.094685493690143e-07,
      "loss": 0.8217,
      "step": 843510
    },
    {
      "epoch": 2.9563273016573968,
      "grad_norm": 2.90625,
      "learning_rate": 8.088195207053124e-07,
      "loss": 0.8706,
      "step": 843520
    },
    {
      "epoch": 2.956362349164292,
      "grad_norm": 3.046875,
      "learning_rate": 8.081704920416105e-07,
      "loss": 0.7695,
      "step": 843530
    },
    {
      "epoch": 2.956397396671188,
      "grad_norm": 3.375,
      "learning_rate": 8.075214633779085e-07,
      "loss": 0.8,
      "step": 843540
    },
    {
      "epoch": 2.956432444178083,
      "grad_norm": 2.875,
      "learning_rate": 8.068724347142069e-07,
      "loss": 0.9121,
      "step": 843550
    },
    {
      "epoch": 2.956467491684979,
      "grad_norm": 2.84375,
      "learning_rate": 8.062234060505049e-07,
      "loss": 0.8812,
      "step": 843560
    },
    {
      "epoch": 2.9565025391918747,
      "grad_norm": 3.203125,
      "learning_rate": 8.05574377386803e-07,
      "loss": 0.7943,
      "step": 843570
    },
    {
      "epoch": 2.9565375866987704,
      "grad_norm": 3.09375,
      "learning_rate": 8.049253487231011e-07,
      "loss": 0.797,
      "step": 843580
    },
    {
      "epoch": 2.9565726342056657,
      "grad_norm": 2.734375,
      "learning_rate": 8.042763200593991e-07,
      "loss": 0.7982,
      "step": 843590
    },
    {
      "epoch": 2.9566076817125615,
      "grad_norm": 3.0625,
      "learning_rate": 8.036272913956972e-07,
      "loss": 0.8106,
      "step": 843600
    },
    {
      "epoch": 2.956642729219457,
      "grad_norm": 2.5625,
      "learning_rate": 8.029782627319953e-07,
      "loss": 0.7908,
      "step": 843610
    },
    {
      "epoch": 2.9566777767263526,
      "grad_norm": 3.0,
      "learning_rate": 8.023292340682935e-07,
      "loss": 0.7723,
      "step": 843620
    },
    {
      "epoch": 2.9567128242332483,
      "grad_norm": 2.828125,
      "learning_rate": 8.016802054045916e-07,
      "loss": 0.7414,
      "step": 843630
    },
    {
      "epoch": 2.9567478717401436,
      "grad_norm": 2.71875,
      "learning_rate": 8.010311767408896e-07,
      "loss": 0.7521,
      "step": 843640
    },
    {
      "epoch": 2.9567829192470394,
      "grad_norm": 2.65625,
      "learning_rate": 8.003821480771877e-07,
      "loss": 0.7392,
      "step": 843650
    },
    {
      "epoch": 2.9568179667539347,
      "grad_norm": 2.734375,
      "learning_rate": 7.997331194134858e-07,
      "loss": 0.8705,
      "step": 843660
    },
    {
      "epoch": 2.9568530142608305,
      "grad_norm": 2.53125,
      "learning_rate": 7.990840907497838e-07,
      "loss": 0.7674,
      "step": 843670
    },
    {
      "epoch": 2.9568880617677262,
      "grad_norm": 2.796875,
      "learning_rate": 7.984350620860821e-07,
      "loss": 0.7894,
      "step": 843680
    },
    {
      "epoch": 2.956923109274622,
      "grad_norm": 2.90625,
      "learning_rate": 7.977860334223801e-07,
      "loss": 0.7756,
      "step": 843690
    },
    {
      "epoch": 2.9569581567815173,
      "grad_norm": 3.359375,
      "learning_rate": 7.971370047586782e-07,
      "loss": 0.7719,
      "step": 843700
    },
    {
      "epoch": 2.956993204288413,
      "grad_norm": 2.921875,
      "learning_rate": 7.964879760949763e-07,
      "loss": 0.7613,
      "step": 843710
    },
    {
      "epoch": 2.9570282517953084,
      "grad_norm": 3.484375,
      "learning_rate": 7.958389474312743e-07,
      "loss": 0.9115,
      "step": 843720
    },
    {
      "epoch": 2.957063299302204,
      "grad_norm": 2.703125,
      "learning_rate": 7.951899187675725e-07,
      "loss": 0.7997,
      "step": 843730
    },
    {
      "epoch": 2.9570983468091,
      "grad_norm": 3.078125,
      "learning_rate": 7.945408901038706e-07,
      "loss": 0.7582,
      "step": 843740
    },
    {
      "epoch": 2.957133394315995,
      "grad_norm": 3.265625,
      "learning_rate": 7.938918614401688e-07,
      "loss": 0.8515,
      "step": 843750
    },
    {
      "epoch": 2.957168441822891,
      "grad_norm": 3.171875,
      "learning_rate": 7.932428327764669e-07,
      "loss": 0.8571,
      "step": 843760
    },
    {
      "epoch": 2.9572034893297863,
      "grad_norm": 2.90625,
      "learning_rate": 7.925938041127649e-07,
      "loss": 0.7959,
      "step": 843770
    },
    {
      "epoch": 2.957238536836682,
      "grad_norm": 2.671875,
      "learning_rate": 7.91944775449063e-07,
      "loss": 0.7948,
      "step": 843780
    },
    {
      "epoch": 2.957273584343578,
      "grad_norm": 3.109375,
      "learning_rate": 7.912957467853611e-07,
      "loss": 0.8357,
      "step": 843790
    },
    {
      "epoch": 2.9573086318504735,
      "grad_norm": 2.6875,
      "learning_rate": 7.906467181216591e-07,
      "loss": 0.7155,
      "step": 843800
    },
    {
      "epoch": 2.957343679357369,
      "grad_norm": 2.6875,
      "learning_rate": 7.899976894579572e-07,
      "loss": 0.7187,
      "step": 843810
    },
    {
      "epoch": 2.9573787268642646,
      "grad_norm": 2.78125,
      "learning_rate": 7.893486607942554e-07,
      "loss": 0.8625,
      "step": 843820
    },
    {
      "epoch": 2.95741377437116,
      "grad_norm": 2.953125,
      "learning_rate": 7.886996321305535e-07,
      "loss": 0.7408,
      "step": 843830
    },
    {
      "epoch": 2.9574488218780557,
      "grad_norm": 2.890625,
      "learning_rate": 7.880506034668516e-07,
      "loss": 0.8053,
      "step": 843840
    },
    {
      "epoch": 2.9574838693849514,
      "grad_norm": 3.171875,
      "learning_rate": 7.874015748031496e-07,
      "loss": 0.7246,
      "step": 843850
    },
    {
      "epoch": 2.9575189168918468,
      "grad_norm": 3.0625,
      "learning_rate": 7.867525461394477e-07,
      "loss": 0.8238,
      "step": 843860
    },
    {
      "epoch": 2.9575539643987425,
      "grad_norm": 2.921875,
      "learning_rate": 7.861035174757458e-07,
      "loss": 0.7098,
      "step": 843870
    },
    {
      "epoch": 2.957589011905638,
      "grad_norm": 3.46875,
      "learning_rate": 7.85454488812044e-07,
      "loss": 0.7786,
      "step": 843880
    },
    {
      "epoch": 2.9576240594125336,
      "grad_norm": 2.90625,
      "learning_rate": 7.84805460148342e-07,
      "loss": 0.9087,
      "step": 843890
    },
    {
      "epoch": 2.9576591069194293,
      "grad_norm": 2.96875,
      "learning_rate": 7.841564314846401e-07,
      "loss": 0.8236,
      "step": 843900
    },
    {
      "epoch": 2.957694154426325,
      "grad_norm": 2.71875,
      "learning_rate": 7.835074028209382e-07,
      "loss": 0.8608,
      "step": 843910
    },
    {
      "epoch": 2.9577292019332204,
      "grad_norm": 2.8125,
      "learning_rate": 7.828583741572362e-07,
      "loss": 0.7812,
      "step": 843920
    },
    {
      "epoch": 2.957764249440116,
      "grad_norm": 2.484375,
      "learning_rate": 7.822093454935344e-07,
      "loss": 0.7596,
      "step": 843930
    },
    {
      "epoch": 2.9577992969470115,
      "grad_norm": 2.78125,
      "learning_rate": 7.815603168298325e-07,
      "loss": 0.7927,
      "step": 843940
    },
    {
      "epoch": 2.9578343444539072,
      "grad_norm": 3.4375,
      "learning_rate": 7.809112881661306e-07,
      "loss": 0.9036,
      "step": 843950
    },
    {
      "epoch": 2.957869391960803,
      "grad_norm": 3.015625,
      "learning_rate": 7.802622595024287e-07,
      "loss": 0.8097,
      "step": 843960
    },
    {
      "epoch": 2.9579044394676983,
      "grad_norm": 2.75,
      "learning_rate": 7.796132308387268e-07,
      "loss": 0.7557,
      "step": 843970
    },
    {
      "epoch": 2.957939486974594,
      "grad_norm": 3.1875,
      "learning_rate": 7.789642021750249e-07,
      "loss": 0.7796,
      "step": 843980
    },
    {
      "epoch": 2.9579745344814894,
      "grad_norm": 2.78125,
      "learning_rate": 7.78315173511323e-07,
      "loss": 0.8399,
      "step": 843990
    },
    {
      "epoch": 2.958009581988385,
      "grad_norm": 3.046875,
      "learning_rate": 7.776661448476211e-07,
      "loss": 0.8946,
      "step": 844000
    },
    {
      "epoch": 2.958044629495281,
      "grad_norm": 2.828125,
      "learning_rate": 7.770171161839192e-07,
      "loss": 0.7661,
      "step": 844010
    },
    {
      "epoch": 2.9580796770021767,
      "grad_norm": 3.078125,
      "learning_rate": 7.763680875202173e-07,
      "loss": 0.8576,
      "step": 844020
    },
    {
      "epoch": 2.958114724509072,
      "grad_norm": 2.59375,
      "learning_rate": 7.757190588565154e-07,
      "loss": 0.8081,
      "step": 844030
    },
    {
      "epoch": 2.9581497720159677,
      "grad_norm": 2.75,
      "learning_rate": 7.750700301928135e-07,
      "loss": 0.784,
      "step": 844040
    },
    {
      "epoch": 2.958184819522863,
      "grad_norm": 2.65625,
      "learning_rate": 7.744210015291115e-07,
      "loss": 0.7823,
      "step": 844050
    },
    {
      "epoch": 2.958219867029759,
      "grad_norm": 2.90625,
      "learning_rate": 7.737719728654096e-07,
      "loss": 0.7952,
      "step": 844060
    },
    {
      "epoch": 2.9582549145366546,
      "grad_norm": 2.515625,
      "learning_rate": 7.731229442017078e-07,
      "loss": 0.8005,
      "step": 844070
    },
    {
      "epoch": 2.95828996204355,
      "grad_norm": 3.078125,
      "learning_rate": 7.724739155380058e-07,
      "loss": 0.8018,
      "step": 844080
    },
    {
      "epoch": 2.9583250095504456,
      "grad_norm": 2.484375,
      "learning_rate": 7.718248868743039e-07,
      "loss": 0.7716,
      "step": 844090
    },
    {
      "epoch": 2.958360057057341,
      "grad_norm": 3.0625,
      "learning_rate": 7.71175858210602e-07,
      "loss": 0.7461,
      "step": 844100
    },
    {
      "epoch": 2.9583951045642367,
      "grad_norm": 2.703125,
      "learning_rate": 7.705268295469002e-07,
      "loss": 0.8464,
      "step": 844110
    },
    {
      "epoch": 2.9584301520711325,
      "grad_norm": 3.03125,
      "learning_rate": 7.698778008831983e-07,
      "loss": 0.6872,
      "step": 844120
    },
    {
      "epoch": 2.9584651995780282,
      "grad_norm": 2.515625,
      "learning_rate": 7.692287722194963e-07,
      "loss": 0.6815,
      "step": 844130
    },
    {
      "epoch": 2.9585002470849235,
      "grad_norm": 2.9375,
      "learning_rate": 7.685797435557945e-07,
      "loss": 0.7541,
      "step": 844140
    },
    {
      "epoch": 2.9585352945918193,
      "grad_norm": 2.546875,
      "learning_rate": 7.679307148920926e-07,
      "loss": 0.7438,
      "step": 844150
    },
    {
      "epoch": 2.9585703420987146,
      "grad_norm": 3.015625,
      "learning_rate": 7.672816862283906e-07,
      "loss": 0.8346,
      "step": 844160
    },
    {
      "epoch": 2.9586053896056104,
      "grad_norm": 3.046875,
      "learning_rate": 7.666326575646888e-07,
      "loss": 0.8372,
      "step": 844170
    },
    {
      "epoch": 2.958640437112506,
      "grad_norm": 2.90625,
      "learning_rate": 7.659836289009868e-07,
      "loss": 0.8334,
      "step": 844180
    },
    {
      "epoch": 2.9586754846194014,
      "grad_norm": 3.21875,
      "learning_rate": 7.653346002372849e-07,
      "loss": 0.789,
      "step": 844190
    },
    {
      "epoch": 2.958710532126297,
      "grad_norm": 2.375,
      "learning_rate": 7.64685571573583e-07,
      "loss": 0.7138,
      "step": 844200
    },
    {
      "epoch": 2.958745579633193,
      "grad_norm": 2.671875,
      "learning_rate": 7.640365429098811e-07,
      "loss": 0.8282,
      "step": 844210
    },
    {
      "epoch": 2.9587806271400883,
      "grad_norm": 2.75,
      "learning_rate": 7.633875142461792e-07,
      "loss": 0.8684,
      "step": 844220
    },
    {
      "epoch": 2.958815674646984,
      "grad_norm": 3.015625,
      "learning_rate": 7.627384855824772e-07,
      "loss": 0.8521,
      "step": 844230
    },
    {
      "epoch": 2.95885072215388,
      "grad_norm": 2.890625,
      "learning_rate": 7.620894569187754e-07,
      "loss": 0.8151,
      "step": 844240
    },
    {
      "epoch": 2.958885769660775,
      "grad_norm": 2.703125,
      "learning_rate": 7.614404282550735e-07,
      "loss": 0.8147,
      "step": 844250
    },
    {
      "epoch": 2.958920817167671,
      "grad_norm": 2.609375,
      "learning_rate": 7.607913995913715e-07,
      "loss": 0.7948,
      "step": 844260
    },
    {
      "epoch": 2.958955864674566,
      "grad_norm": 3.109375,
      "learning_rate": 7.601423709276697e-07,
      "loss": 0.8272,
      "step": 844270
    },
    {
      "epoch": 2.958990912181462,
      "grad_norm": 2.703125,
      "learning_rate": 7.594933422639677e-07,
      "loss": 0.7482,
      "step": 844280
    },
    {
      "epoch": 2.9590259596883577,
      "grad_norm": 3.3125,
      "learning_rate": 7.588443136002658e-07,
      "loss": 0.8571,
      "step": 844290
    },
    {
      "epoch": 2.959061007195253,
      "grad_norm": 2.828125,
      "learning_rate": 7.581952849365641e-07,
      "loss": 0.7346,
      "step": 844300
    },
    {
      "epoch": 2.9590960547021488,
      "grad_norm": 2.984375,
      "learning_rate": 7.575462562728621e-07,
      "loss": 0.804,
      "step": 844310
    },
    {
      "epoch": 2.9591311022090445,
      "grad_norm": 2.6875,
      "learning_rate": 7.568972276091602e-07,
      "loss": 0.6984,
      "step": 844320
    },
    {
      "epoch": 2.95916614971594,
      "grad_norm": 3.0625,
      "learning_rate": 7.562481989454583e-07,
      "loss": 0.8338,
      "step": 844330
    },
    {
      "epoch": 2.9592011972228356,
      "grad_norm": 3.140625,
      "learning_rate": 7.555991702817564e-07,
      "loss": 0.8753,
      "step": 844340
    },
    {
      "epoch": 2.9592362447297313,
      "grad_norm": 3.0,
      "learning_rate": 7.549501416180545e-07,
      "loss": 0.7946,
      "step": 844350
    },
    {
      "epoch": 2.9592712922366267,
      "grad_norm": 2.96875,
      "learning_rate": 7.543011129543525e-07,
      "loss": 0.8095,
      "step": 844360
    },
    {
      "epoch": 2.9593063397435224,
      "grad_norm": 2.96875,
      "learning_rate": 7.536520842906507e-07,
      "loss": 0.7963,
      "step": 844370
    },
    {
      "epoch": 2.9593413872504177,
      "grad_norm": 3.40625,
      "learning_rate": 7.530030556269488e-07,
      "loss": 0.832,
      "step": 844380
    },
    {
      "epoch": 2.9593764347573135,
      "grad_norm": 3.375,
      "learning_rate": 7.523540269632468e-07,
      "loss": 0.7648,
      "step": 844390
    },
    {
      "epoch": 2.9594114822642092,
      "grad_norm": 2.609375,
      "learning_rate": 7.51704998299545e-07,
      "loss": 0.7694,
      "step": 844400
    },
    {
      "epoch": 2.959446529771105,
      "grad_norm": 3.09375,
      "learning_rate": 7.51055969635843e-07,
      "loss": 0.799,
      "step": 844410
    },
    {
      "epoch": 2.9594815772780003,
      "grad_norm": 2.71875,
      "learning_rate": 7.504069409721411e-07,
      "loss": 0.7827,
      "step": 844420
    },
    {
      "epoch": 2.959516624784896,
      "grad_norm": 2.953125,
      "learning_rate": 7.497579123084392e-07,
      "loss": 0.7979,
      "step": 844430
    },
    {
      "epoch": 2.9595516722917914,
      "grad_norm": 2.90625,
      "learning_rate": 7.491088836447373e-07,
      "loss": 0.829,
      "step": 844440
    },
    {
      "epoch": 2.959586719798687,
      "grad_norm": 3.109375,
      "learning_rate": 7.484598549810354e-07,
      "loss": 0.7537,
      "step": 844450
    },
    {
      "epoch": 2.959621767305583,
      "grad_norm": 2.578125,
      "learning_rate": 7.478108263173335e-07,
      "loss": 0.7276,
      "step": 844460
    },
    {
      "epoch": 2.959656814812478,
      "grad_norm": 3.09375,
      "learning_rate": 7.471617976536316e-07,
      "loss": 0.8077,
      "step": 844470
    },
    {
      "epoch": 2.959691862319374,
      "grad_norm": 2.421875,
      "learning_rate": 7.465127689899297e-07,
      "loss": 0.7269,
      "step": 844480
    },
    {
      "epoch": 2.9597269098262693,
      "grad_norm": 3.171875,
      "learning_rate": 7.458637403262277e-07,
      "loss": 0.7632,
      "step": 844490
    },
    {
      "epoch": 2.959761957333165,
      "grad_norm": 2.578125,
      "learning_rate": 7.45214711662526e-07,
      "loss": 0.7655,
      "step": 844500
    },
    {
      "epoch": 2.959797004840061,
      "grad_norm": 2.90625,
      "learning_rate": 7.44565682998824e-07,
      "loss": 0.8143,
      "step": 844510
    },
    {
      "epoch": 2.9598320523469566,
      "grad_norm": 3.0625,
      "learning_rate": 7.439166543351221e-07,
      "loss": 0.8469,
      "step": 844520
    },
    {
      "epoch": 2.959867099853852,
      "grad_norm": 3.15625,
      "learning_rate": 7.432676256714202e-07,
      "loss": 0.7415,
      "step": 844530
    },
    {
      "epoch": 2.9599021473607476,
      "grad_norm": 3.296875,
      "learning_rate": 7.426185970077183e-07,
      "loss": 0.7572,
      "step": 844540
    },
    {
      "epoch": 2.959937194867643,
      "grad_norm": 3.1875,
      "learning_rate": 7.419695683440164e-07,
      "loss": 0.8937,
      "step": 844550
    },
    {
      "epoch": 2.9599722423745387,
      "grad_norm": 2.890625,
      "learning_rate": 7.413205396803145e-07,
      "loss": 0.749,
      "step": 844560
    },
    {
      "epoch": 2.9600072898814345,
      "grad_norm": 3.21875,
      "learning_rate": 7.406715110166126e-07,
      "loss": 0.7975,
      "step": 844570
    },
    {
      "epoch": 2.96004233738833,
      "grad_norm": 3.0625,
      "learning_rate": 7.400224823529107e-07,
      "loss": 0.8134,
      "step": 844580
    },
    {
      "epoch": 2.9600773848952255,
      "grad_norm": 2.578125,
      "learning_rate": 7.393734536892087e-07,
      "loss": 0.6794,
      "step": 844590
    },
    {
      "epoch": 2.960112432402121,
      "grad_norm": 3.140625,
      "learning_rate": 7.387244250255069e-07,
      "loss": 0.8119,
      "step": 844600
    },
    {
      "epoch": 2.9601474799090166,
      "grad_norm": 2.34375,
      "learning_rate": 7.38075396361805e-07,
      "loss": 0.8153,
      "step": 844610
    },
    {
      "epoch": 2.9601825274159124,
      "grad_norm": 2.5,
      "learning_rate": 7.37426367698103e-07,
      "loss": 0.7694,
      "step": 844620
    },
    {
      "epoch": 2.960217574922808,
      "grad_norm": 2.9375,
      "learning_rate": 7.367773390344011e-07,
      "loss": 0.8077,
      "step": 844630
    },
    {
      "epoch": 2.9602526224297034,
      "grad_norm": 2.859375,
      "learning_rate": 7.361283103706992e-07,
      "loss": 0.809,
      "step": 844640
    },
    {
      "epoch": 2.960287669936599,
      "grad_norm": 2.59375,
      "learning_rate": 7.354792817069973e-07,
      "loss": 0.8246,
      "step": 844650
    },
    {
      "epoch": 2.9603227174434945,
      "grad_norm": 3.125,
      "learning_rate": 7.348302530432954e-07,
      "loss": 0.763,
      "step": 844660
    },
    {
      "epoch": 2.9603577649503903,
      "grad_norm": 3.28125,
      "learning_rate": 7.341812243795935e-07,
      "loss": 0.864,
      "step": 844670
    },
    {
      "epoch": 2.960392812457286,
      "grad_norm": 2.984375,
      "learning_rate": 7.335321957158917e-07,
      "loss": 0.8509,
      "step": 844680
    },
    {
      "epoch": 2.9604278599641813,
      "grad_norm": 3.21875,
      "learning_rate": 7.328831670521898e-07,
      "loss": 0.8044,
      "step": 844690
    },
    {
      "epoch": 2.960462907471077,
      "grad_norm": 2.921875,
      "learning_rate": 7.322341383884879e-07,
      "loss": 0.8457,
      "step": 844700
    },
    {
      "epoch": 2.9604979549779724,
      "grad_norm": 3.3125,
      "learning_rate": 7.31585109724786e-07,
      "loss": 0.7844,
      "step": 844710
    },
    {
      "epoch": 2.960533002484868,
      "grad_norm": 2.828125,
      "learning_rate": 7.30936081061084e-07,
      "loss": 0.8367,
      "step": 844720
    },
    {
      "epoch": 2.960568049991764,
      "grad_norm": 2.9375,
      "learning_rate": 7.302870523973821e-07,
      "loss": 0.7871,
      "step": 844730
    },
    {
      "epoch": 2.9606030974986597,
      "grad_norm": 2.71875,
      "learning_rate": 7.296380237336803e-07,
      "loss": 0.8195,
      "step": 844740
    },
    {
      "epoch": 2.960638145005555,
      "grad_norm": 2.59375,
      "learning_rate": 7.289889950699783e-07,
      "loss": 0.7588,
      "step": 844750
    },
    {
      "epoch": 2.9606731925124508,
      "grad_norm": 2.8125,
      "learning_rate": 7.283399664062764e-07,
      "loss": 0.7378,
      "step": 844760
    },
    {
      "epoch": 2.960708240019346,
      "grad_norm": 2.578125,
      "learning_rate": 7.276909377425745e-07,
      "loss": 0.7753,
      "step": 844770
    },
    {
      "epoch": 2.960743287526242,
      "grad_norm": 3.015625,
      "learning_rate": 7.270419090788726e-07,
      "loss": 0.7934,
      "step": 844780
    },
    {
      "epoch": 2.9607783350331376,
      "grad_norm": 2.859375,
      "learning_rate": 7.263928804151707e-07,
      "loss": 0.7733,
      "step": 844790
    },
    {
      "epoch": 2.960813382540033,
      "grad_norm": 2.921875,
      "learning_rate": 7.257438517514687e-07,
      "loss": 0.9111,
      "step": 844800
    },
    {
      "epoch": 2.9608484300469287,
      "grad_norm": 2.5625,
      "learning_rate": 7.250948230877669e-07,
      "loss": 0.79,
      "step": 844810
    },
    {
      "epoch": 2.960883477553824,
      "grad_norm": 2.40625,
      "learning_rate": 7.244457944240649e-07,
      "loss": 0.77,
      "step": 844820
    },
    {
      "epoch": 2.9609185250607197,
      "grad_norm": 2.8125,
      "learning_rate": 7.23796765760363e-07,
      "loss": 0.7991,
      "step": 844830
    },
    {
      "epoch": 2.9609535725676155,
      "grad_norm": 2.921875,
      "learning_rate": 7.231477370966612e-07,
      "loss": 0.7918,
      "step": 844840
    },
    {
      "epoch": 2.9609886200745112,
      "grad_norm": 2.640625,
      "learning_rate": 7.224987084329592e-07,
      "loss": 0.7995,
      "step": 844850
    },
    {
      "epoch": 2.9610236675814066,
      "grad_norm": 3.3125,
      "learning_rate": 7.218496797692573e-07,
      "loss": 0.7575,
      "step": 844860
    },
    {
      "epoch": 2.9610587150883023,
      "grad_norm": 3.296875,
      "learning_rate": 7.212006511055555e-07,
      "loss": 0.7914,
      "step": 844870
    },
    {
      "epoch": 2.9610937625951976,
      "grad_norm": 2.734375,
      "learning_rate": 7.205516224418536e-07,
      "loss": 0.8069,
      "step": 844880
    },
    {
      "epoch": 2.9611288101020934,
      "grad_norm": 2.734375,
      "learning_rate": 7.199025937781517e-07,
      "loss": 0.7772,
      "step": 844890
    },
    {
      "epoch": 2.961163857608989,
      "grad_norm": 2.65625,
      "learning_rate": 7.192535651144497e-07,
      "loss": 0.7542,
      "step": 844900
    },
    {
      "epoch": 2.9611989051158845,
      "grad_norm": 3.03125,
      "learning_rate": 7.186045364507479e-07,
      "loss": 0.7434,
      "step": 844910
    },
    {
      "epoch": 2.96123395262278,
      "grad_norm": 2.234375,
      "learning_rate": 7.17955507787046e-07,
      "loss": 0.8051,
      "step": 844920
    },
    {
      "epoch": 2.9612690001296755,
      "grad_norm": 2.75,
      "learning_rate": 7.17306479123344e-07,
      "loss": 0.8057,
      "step": 844930
    },
    {
      "epoch": 2.9613040476365713,
      "grad_norm": 2.890625,
      "learning_rate": 7.166574504596422e-07,
      "loss": 0.8554,
      "step": 844940
    },
    {
      "epoch": 2.961339095143467,
      "grad_norm": 2.890625,
      "learning_rate": 7.160084217959402e-07,
      "loss": 0.7102,
      "step": 844950
    },
    {
      "epoch": 2.961374142650363,
      "grad_norm": 2.75,
      "learning_rate": 7.153593931322383e-07,
      "loss": 0.7659,
      "step": 844960
    },
    {
      "epoch": 2.961409190157258,
      "grad_norm": 2.828125,
      "learning_rate": 7.147103644685365e-07,
      "loss": 0.811,
      "step": 844970
    },
    {
      "epoch": 2.961444237664154,
      "grad_norm": 3.1875,
      "learning_rate": 7.140613358048345e-07,
      "loss": 0.7044,
      "step": 844980
    },
    {
      "epoch": 2.961479285171049,
      "grad_norm": 3.125,
      "learning_rate": 7.134123071411326e-07,
      "loss": 0.7957,
      "step": 844990
    },
    {
      "epoch": 2.961514332677945,
      "grad_norm": 2.921875,
      "learning_rate": 7.127632784774307e-07,
      "loss": 0.8299,
      "step": 845000
    },
    {
      "epoch": 2.961514332677945,
      "eval_loss": 0.7498059868812561,
      "eval_runtime": 559.2392,
      "eval_samples_per_second": 680.274,
      "eval_steps_per_second": 56.69,
      "step": 845000
    },
    {
      "epoch": 2.9615493801848407,
      "grad_norm": 2.8125,
      "learning_rate": 7.121142498137288e-07,
      "loss": 0.8005,
      "step": 845010
    },
    {
      "epoch": 2.961584427691736,
      "grad_norm": 2.90625,
      "learning_rate": 7.114652211500269e-07,
      "loss": 0.8409,
      "step": 845020
    },
    {
      "epoch": 2.961619475198632,
      "grad_norm": 2.796875,
      "learning_rate": 7.108161924863249e-07,
      "loss": 0.7891,
      "step": 845030
    },
    {
      "epoch": 2.961654522705527,
      "grad_norm": 3.03125,
      "learning_rate": 7.101671638226231e-07,
      "loss": 0.8124,
      "step": 845040
    },
    {
      "epoch": 2.961689570212423,
      "grad_norm": 2.96875,
      "learning_rate": 7.095181351589212e-07,
      "loss": 0.8289,
      "step": 845050
    },
    {
      "epoch": 2.9617246177193186,
      "grad_norm": 2.421875,
      "learning_rate": 7.088691064952192e-07,
      "loss": 0.7932,
      "step": 845060
    },
    {
      "epoch": 2.9617596652262144,
      "grad_norm": 2.875,
      "learning_rate": 7.082200778315175e-07,
      "loss": 0.806,
      "step": 845070
    },
    {
      "epoch": 2.9617947127331097,
      "grad_norm": 2.875,
      "learning_rate": 7.075710491678155e-07,
      "loss": 0.7558,
      "step": 845080
    },
    {
      "epoch": 2.9618297602400054,
      "grad_norm": 2.96875,
      "learning_rate": 7.069220205041136e-07,
      "loss": 0.8262,
      "step": 845090
    },
    {
      "epoch": 2.9618648077469008,
      "grad_norm": 2.78125,
      "learning_rate": 7.062729918404117e-07,
      "loss": 0.7971,
      "step": 845100
    },
    {
      "epoch": 2.9618998552537965,
      "grad_norm": 2.6875,
      "learning_rate": 7.056239631767098e-07,
      "loss": 0.7413,
      "step": 845110
    },
    {
      "epoch": 2.9619349027606923,
      "grad_norm": 3.0625,
      "learning_rate": 7.049749345130079e-07,
      "loss": 0.7204,
      "step": 845120
    },
    {
      "epoch": 2.9619699502675876,
      "grad_norm": 2.234375,
      "learning_rate": 7.043259058493059e-07,
      "loss": 0.7094,
      "step": 845130
    },
    {
      "epoch": 2.9620049977744833,
      "grad_norm": 2.984375,
      "learning_rate": 7.036768771856041e-07,
      "loss": 0.7784,
      "step": 845140
    },
    {
      "epoch": 2.9620400452813787,
      "grad_norm": 3.0,
      "learning_rate": 7.030278485219022e-07,
      "loss": 0.882,
      "step": 845150
    },
    {
      "epoch": 2.9620750927882744,
      "grad_norm": 3.203125,
      "learning_rate": 7.023788198582002e-07,
      "loss": 0.7812,
      "step": 845160
    },
    {
      "epoch": 2.96211014029517,
      "grad_norm": 3.078125,
      "learning_rate": 7.017297911944984e-07,
      "loss": 0.788,
      "step": 845170
    },
    {
      "epoch": 2.962145187802066,
      "grad_norm": 2.703125,
      "learning_rate": 7.010807625307964e-07,
      "loss": 0.8311,
      "step": 845180
    },
    {
      "epoch": 2.9621802353089612,
      "grad_norm": 2.890625,
      "learning_rate": 7.004317338670945e-07,
      "loss": 0.7997,
      "step": 845190
    },
    {
      "epoch": 2.962215282815857,
      "grad_norm": 2.75,
      "learning_rate": 6.997827052033926e-07,
      "loss": 0.6575,
      "step": 845200
    },
    {
      "epoch": 2.9622503303227523,
      "grad_norm": 2.53125,
      "learning_rate": 6.991336765396907e-07,
      "loss": 0.8568,
      "step": 845210
    },
    {
      "epoch": 2.962285377829648,
      "grad_norm": 2.71875,
      "learning_rate": 6.984846478759888e-07,
      "loss": 0.78,
      "step": 845220
    },
    {
      "epoch": 2.962320425336544,
      "grad_norm": 2.9375,
      "learning_rate": 6.978356192122869e-07,
      "loss": 0.7547,
      "step": 845230
    },
    {
      "epoch": 2.962355472843439,
      "grad_norm": 3.265625,
      "learning_rate": 6.97186590548585e-07,
      "loss": 0.8619,
      "step": 845240
    },
    {
      "epoch": 2.962390520350335,
      "grad_norm": 2.921875,
      "learning_rate": 6.965375618848832e-07,
      "loss": 0.7992,
      "step": 845250
    },
    {
      "epoch": 2.96242556785723,
      "grad_norm": 2.75,
      "learning_rate": 6.958885332211812e-07,
      "loss": 0.7502,
      "step": 845260
    },
    {
      "epoch": 2.962460615364126,
      "grad_norm": 2.84375,
      "learning_rate": 6.952395045574794e-07,
      "loss": 0.8143,
      "step": 845270
    },
    {
      "epoch": 2.9624956628710217,
      "grad_norm": 2.5625,
      "learning_rate": 6.945904758937775e-07,
      "loss": 0.7181,
      "step": 845280
    },
    {
      "epoch": 2.9625307103779175,
      "grad_norm": 3.046875,
      "learning_rate": 6.939414472300755e-07,
      "loss": 0.8225,
      "step": 845290
    },
    {
      "epoch": 2.962565757884813,
      "grad_norm": 2.734375,
      "learning_rate": 6.932924185663736e-07,
      "loss": 0.8109,
      "step": 845300
    },
    {
      "epoch": 2.9626008053917086,
      "grad_norm": 2.390625,
      "learning_rate": 6.926433899026717e-07,
      "loss": 0.7876,
      "step": 845310
    },
    {
      "epoch": 2.962635852898604,
      "grad_norm": 2.890625,
      "learning_rate": 6.919943612389698e-07,
      "loss": 0.7878,
      "step": 845320
    },
    {
      "epoch": 2.9626709004054996,
      "grad_norm": 3.296875,
      "learning_rate": 6.913453325752679e-07,
      "loss": 0.8229,
      "step": 845330
    },
    {
      "epoch": 2.9627059479123954,
      "grad_norm": 3.203125,
      "learning_rate": 6.90696303911566e-07,
      "loss": 0.6967,
      "step": 845340
    },
    {
      "epoch": 2.9627409954192907,
      "grad_norm": 2.703125,
      "learning_rate": 6.900472752478641e-07,
      "loss": 0.7792,
      "step": 845350
    },
    {
      "epoch": 2.9627760429261865,
      "grad_norm": 3.078125,
      "learning_rate": 6.893982465841621e-07,
      "loss": 0.7547,
      "step": 845360
    },
    {
      "epoch": 2.9628110904330818,
      "grad_norm": 2.890625,
      "learning_rate": 6.887492179204603e-07,
      "loss": 0.7883,
      "step": 845370
    },
    {
      "epoch": 2.9628461379399775,
      "grad_norm": 2.875,
      "learning_rate": 6.881001892567584e-07,
      "loss": 0.7806,
      "step": 845380
    },
    {
      "epoch": 2.9628811854468733,
      "grad_norm": 3.109375,
      "learning_rate": 6.874511605930564e-07,
      "loss": 0.725,
      "step": 845390
    },
    {
      "epoch": 2.962916232953769,
      "grad_norm": 2.9375,
      "learning_rate": 6.868021319293545e-07,
      "loss": 0.8,
      "step": 845400
    },
    {
      "epoch": 2.9629512804606644,
      "grad_norm": 2.78125,
      "learning_rate": 6.861531032656526e-07,
      "loss": 0.7546,
      "step": 845410
    },
    {
      "epoch": 2.96298632796756,
      "grad_norm": 3.015625,
      "learning_rate": 6.855040746019507e-07,
      "loss": 0.7289,
      "step": 845420
    },
    {
      "epoch": 2.9630213754744554,
      "grad_norm": 2.546875,
      "learning_rate": 6.848550459382488e-07,
      "loss": 0.7982,
      "step": 845430
    },
    {
      "epoch": 2.963056422981351,
      "grad_norm": 3.234375,
      "learning_rate": 6.84206017274547e-07,
      "loss": 0.7678,
      "step": 845440
    },
    {
      "epoch": 2.963091470488247,
      "grad_norm": 2.703125,
      "learning_rate": 6.835569886108451e-07,
      "loss": 0.7844,
      "step": 845450
    },
    {
      "epoch": 2.9631265179951423,
      "grad_norm": 2.40625,
      "learning_rate": 6.829079599471432e-07,
      "loss": 0.7602,
      "step": 845460
    },
    {
      "epoch": 2.963161565502038,
      "grad_norm": 2.515625,
      "learning_rate": 6.822589312834413e-07,
      "loss": 0.8185,
      "step": 845470
    },
    {
      "epoch": 2.9631966130089333,
      "grad_norm": 2.890625,
      "learning_rate": 6.816099026197394e-07,
      "loss": 0.8026,
      "step": 845480
    },
    {
      "epoch": 2.963231660515829,
      "grad_norm": 2.5,
      "learning_rate": 6.809608739560374e-07,
      "loss": 0.7621,
      "step": 845490
    },
    {
      "epoch": 2.963266708022725,
      "grad_norm": 2.765625,
      "learning_rate": 6.803118452923355e-07,
      "loss": 0.7361,
      "step": 845500
    },
    {
      "epoch": 2.9633017555296206,
      "grad_norm": 3.15625,
      "learning_rate": 6.796628166286337e-07,
      "loss": 0.8221,
      "step": 845510
    },
    {
      "epoch": 2.963336803036516,
      "grad_norm": 2.9375,
      "learning_rate": 6.790137879649317e-07,
      "loss": 0.8395,
      "step": 845520
    },
    {
      "epoch": 2.9633718505434117,
      "grad_norm": 3.125,
      "learning_rate": 6.783647593012298e-07,
      "loss": 0.8155,
      "step": 845530
    },
    {
      "epoch": 2.963406898050307,
      "grad_norm": 2.5625,
      "learning_rate": 6.777157306375279e-07,
      "loss": 0.8348,
      "step": 845540
    },
    {
      "epoch": 2.9634419455572028,
      "grad_norm": 2.4375,
      "learning_rate": 6.77066701973826e-07,
      "loss": 0.7414,
      "step": 845550
    },
    {
      "epoch": 2.9634769930640985,
      "grad_norm": 3.296875,
      "learning_rate": 6.764176733101241e-07,
      "loss": 0.753,
      "step": 845560
    },
    {
      "epoch": 2.963512040570994,
      "grad_norm": 3.21875,
      "learning_rate": 6.757686446464221e-07,
      "loss": 0.9094,
      "step": 845570
    },
    {
      "epoch": 2.9635470880778896,
      "grad_norm": 2.984375,
      "learning_rate": 6.751196159827203e-07,
      "loss": 0.8262,
      "step": 845580
    },
    {
      "epoch": 2.9635821355847853,
      "grad_norm": 2.59375,
      "learning_rate": 6.744705873190184e-07,
      "loss": 0.8111,
      "step": 845590
    },
    {
      "epoch": 2.9636171830916807,
      "grad_norm": 3.21875,
      "learning_rate": 6.738215586553164e-07,
      "loss": 0.8742,
      "step": 845600
    },
    {
      "epoch": 2.9636522305985764,
      "grad_norm": 3.4375,
      "learning_rate": 6.731725299916146e-07,
      "loss": 0.7116,
      "step": 845610
    },
    {
      "epoch": 2.963687278105472,
      "grad_norm": 2.65625,
      "learning_rate": 6.725235013279126e-07,
      "loss": 0.7863,
      "step": 845620
    },
    {
      "epoch": 2.9637223256123675,
      "grad_norm": 2.546875,
      "learning_rate": 6.718744726642107e-07,
      "loss": 0.8032,
      "step": 845630
    },
    {
      "epoch": 2.9637573731192632,
      "grad_norm": 3.140625,
      "learning_rate": 6.712254440005089e-07,
      "loss": 0.7772,
      "step": 845640
    },
    {
      "epoch": 2.9637924206261586,
      "grad_norm": 2.921875,
      "learning_rate": 6.70576415336807e-07,
      "loss": 0.8307,
      "step": 845650
    },
    {
      "epoch": 2.9638274681330543,
      "grad_norm": 2.921875,
      "learning_rate": 6.699273866731051e-07,
      "loss": 0.7472,
      "step": 845660
    },
    {
      "epoch": 2.96386251563995,
      "grad_norm": 3.390625,
      "learning_rate": 6.692783580094031e-07,
      "loss": 0.8027,
      "step": 845670
    },
    {
      "epoch": 2.963897563146846,
      "grad_norm": 3.234375,
      "learning_rate": 6.686293293457013e-07,
      "loss": 0.7955,
      "step": 845680
    },
    {
      "epoch": 2.963932610653741,
      "grad_norm": 2.9375,
      "learning_rate": 6.679803006819994e-07,
      "loss": 0.7798,
      "step": 845690
    },
    {
      "epoch": 2.963967658160637,
      "grad_norm": 2.984375,
      "learning_rate": 6.673312720182974e-07,
      "loss": 0.8402,
      "step": 845700
    },
    {
      "epoch": 2.964002705667532,
      "grad_norm": 3.0625,
      "learning_rate": 6.666822433545956e-07,
      "loss": 0.8373,
      "step": 845710
    },
    {
      "epoch": 2.964037753174428,
      "grad_norm": 3.109375,
      "learning_rate": 6.660332146908936e-07,
      "loss": 0.8383,
      "step": 845720
    },
    {
      "epoch": 2.9640728006813237,
      "grad_norm": 2.96875,
      "learning_rate": 6.653841860271917e-07,
      "loss": 0.8268,
      "step": 845730
    },
    {
      "epoch": 2.964107848188219,
      "grad_norm": 2.53125,
      "learning_rate": 6.647351573634899e-07,
      "loss": 0.7651,
      "step": 845740
    },
    {
      "epoch": 2.964142895695115,
      "grad_norm": 3.125,
      "learning_rate": 6.640861286997879e-07,
      "loss": 0.8076,
      "step": 845750
    },
    {
      "epoch": 2.96417794320201,
      "grad_norm": 2.953125,
      "learning_rate": 6.63437100036086e-07,
      "loss": 0.9371,
      "step": 845760
    },
    {
      "epoch": 2.964212990708906,
      "grad_norm": 2.65625,
      "learning_rate": 6.627880713723841e-07,
      "loss": 0.7655,
      "step": 845770
    },
    {
      "epoch": 2.9642480382158016,
      "grad_norm": 2.65625,
      "learning_rate": 6.621390427086822e-07,
      "loss": 0.8003,
      "step": 845780
    },
    {
      "epoch": 2.9642830857226974,
      "grad_norm": 2.875,
      "learning_rate": 6.614900140449803e-07,
      "loss": 0.7808,
      "step": 845790
    },
    {
      "epoch": 2.9643181332295927,
      "grad_norm": 3.09375,
      "learning_rate": 6.608409853812783e-07,
      "loss": 0.813,
      "step": 845800
    },
    {
      "epoch": 2.9643531807364885,
      "grad_norm": 2.71875,
      "learning_rate": 6.601919567175765e-07,
      "loss": 0.8441,
      "step": 845810
    },
    {
      "epoch": 2.9643882282433838,
      "grad_norm": 2.953125,
      "learning_rate": 6.595429280538747e-07,
      "loss": 0.7072,
      "step": 845820
    },
    {
      "epoch": 2.9644232757502795,
      "grad_norm": 3.109375,
      "learning_rate": 6.588938993901727e-07,
      "loss": 0.8815,
      "step": 845830
    },
    {
      "epoch": 2.9644583232571753,
      "grad_norm": 3.234375,
      "learning_rate": 6.582448707264709e-07,
      "loss": 0.8194,
      "step": 845840
    },
    {
      "epoch": 2.9644933707640706,
      "grad_norm": 3.0,
      "learning_rate": 6.575958420627689e-07,
      "loss": 0.7845,
      "step": 845850
    },
    {
      "epoch": 2.9645284182709664,
      "grad_norm": 2.96875,
      "learning_rate": 6.56946813399067e-07,
      "loss": 0.8682,
      "step": 845860
    },
    {
      "epoch": 2.9645634657778617,
      "grad_norm": 2.75,
      "learning_rate": 6.562977847353651e-07,
      "loss": 0.8866,
      "step": 845870
    },
    {
      "epoch": 2.9645985132847574,
      "grad_norm": 2.9375,
      "learning_rate": 6.556487560716632e-07,
      "loss": 0.8536,
      "step": 845880
    },
    {
      "epoch": 2.964633560791653,
      "grad_norm": 2.765625,
      "learning_rate": 6.549997274079613e-07,
      "loss": 0.7471,
      "step": 845890
    },
    {
      "epoch": 2.964668608298549,
      "grad_norm": 3.296875,
      "learning_rate": 6.543506987442594e-07,
      "loss": 0.8056,
      "step": 845900
    },
    {
      "epoch": 2.9647036558054443,
      "grad_norm": 2.9375,
      "learning_rate": 6.537016700805575e-07,
      "loss": 0.7158,
      "step": 845910
    },
    {
      "epoch": 2.96473870331234,
      "grad_norm": 3.1875,
      "learning_rate": 6.530526414168556e-07,
      "loss": 0.8655,
      "step": 845920
    },
    {
      "epoch": 2.9647737508192353,
      "grad_norm": 2.859375,
      "learning_rate": 6.524036127531536e-07,
      "loss": 0.8316,
      "step": 845930
    },
    {
      "epoch": 2.964808798326131,
      "grad_norm": 3.140625,
      "learning_rate": 6.517545840894518e-07,
      "loss": 0.8194,
      "step": 845940
    },
    {
      "epoch": 2.964843845833027,
      "grad_norm": 3.1875,
      "learning_rate": 6.511055554257498e-07,
      "loss": 0.8043,
      "step": 845950
    },
    {
      "epoch": 2.964878893339922,
      "grad_norm": 3.03125,
      "learning_rate": 6.504565267620479e-07,
      "loss": 0.7519,
      "step": 845960
    },
    {
      "epoch": 2.964913940846818,
      "grad_norm": 2.890625,
      "learning_rate": 6.49807498098346e-07,
      "loss": 0.8549,
      "step": 845970
    },
    {
      "epoch": 2.9649489883537132,
      "grad_norm": 3.078125,
      "learning_rate": 6.491584694346441e-07,
      "loss": 0.7847,
      "step": 845980
    },
    {
      "epoch": 2.964984035860609,
      "grad_norm": 3.21875,
      "learning_rate": 6.485094407709422e-07,
      "loss": 0.7429,
      "step": 845990
    },
    {
      "epoch": 2.9650190833675047,
      "grad_norm": 2.75,
      "learning_rate": 6.478604121072403e-07,
      "loss": 0.8054,
      "step": 846000
    },
    {
      "epoch": 2.9650541308744005,
      "grad_norm": 3.109375,
      "learning_rate": 6.472113834435385e-07,
      "loss": 0.7849,
      "step": 846010
    },
    {
      "epoch": 2.965089178381296,
      "grad_norm": 3.078125,
      "learning_rate": 6.465623547798366e-07,
      "loss": 0.7635,
      "step": 846020
    },
    {
      "epoch": 2.9651242258881916,
      "grad_norm": 3.265625,
      "learning_rate": 6.459133261161346e-07,
      "loss": 0.7122,
      "step": 846030
    },
    {
      "epoch": 2.965159273395087,
      "grad_norm": 2.8125,
      "learning_rate": 6.452642974524328e-07,
      "loss": 0.8282,
      "step": 846040
    },
    {
      "epoch": 2.9651943209019826,
      "grad_norm": 2.609375,
      "learning_rate": 6.446152687887309e-07,
      "loss": 0.7696,
      "step": 846050
    },
    {
      "epoch": 2.9652293684088784,
      "grad_norm": 2.875,
      "learning_rate": 6.439662401250289e-07,
      "loss": 0.7797,
      "step": 846060
    },
    {
      "epoch": 2.9652644159157737,
      "grad_norm": 3.265625,
      "learning_rate": 6.43317211461327e-07,
      "loss": 0.7852,
      "step": 846070
    },
    {
      "epoch": 2.9652994634226695,
      "grad_norm": 3.1875,
      "learning_rate": 6.426681827976251e-07,
      "loss": 0.7899,
      "step": 846080
    },
    {
      "epoch": 2.965334510929565,
      "grad_norm": 2.78125,
      "learning_rate": 6.420191541339232e-07,
      "loss": 0.7692,
      "step": 846090
    },
    {
      "epoch": 2.9653695584364606,
      "grad_norm": 2.921875,
      "learning_rate": 6.413701254702213e-07,
      "loss": 0.8235,
      "step": 846100
    },
    {
      "epoch": 2.9654046059433563,
      "grad_norm": 2.828125,
      "learning_rate": 6.407210968065194e-07,
      "loss": 0.8176,
      "step": 846110
    },
    {
      "epoch": 2.965439653450252,
      "grad_norm": 3.1875,
      "learning_rate": 6.400720681428175e-07,
      "loss": 0.8516,
      "step": 846120
    },
    {
      "epoch": 2.9654747009571474,
      "grad_norm": 3.359375,
      "learning_rate": 6.394230394791156e-07,
      "loss": 0.8444,
      "step": 846130
    },
    {
      "epoch": 2.965509748464043,
      "grad_norm": 2.984375,
      "learning_rate": 6.387740108154137e-07,
      "loss": 0.8244,
      "step": 846140
    },
    {
      "epoch": 2.9655447959709385,
      "grad_norm": 2.96875,
      "learning_rate": 6.381249821517118e-07,
      "loss": 0.8401,
      "step": 846150
    },
    {
      "epoch": 2.965579843477834,
      "grad_norm": 2.5625,
      "learning_rate": 6.374759534880098e-07,
      "loss": 0.8159,
      "step": 846160
    },
    {
      "epoch": 2.96561489098473,
      "grad_norm": 2.859375,
      "learning_rate": 6.368269248243079e-07,
      "loss": 0.8592,
      "step": 846170
    },
    {
      "epoch": 2.9656499384916253,
      "grad_norm": 3.109375,
      "learning_rate": 6.361778961606061e-07,
      "loss": 0.8365,
      "step": 846180
    },
    {
      "epoch": 2.965684985998521,
      "grad_norm": 2.6875,
      "learning_rate": 6.355288674969041e-07,
      "loss": 0.7742,
      "step": 846190
    },
    {
      "epoch": 2.9657200335054164,
      "grad_norm": 3.1875,
      "learning_rate": 6.348798388332022e-07,
      "loss": 0.8408,
      "step": 846200
    },
    {
      "epoch": 2.965755081012312,
      "grad_norm": 2.90625,
      "learning_rate": 6.342308101695004e-07,
      "loss": 0.8435,
      "step": 846210
    },
    {
      "epoch": 2.965790128519208,
      "grad_norm": 2.734375,
      "learning_rate": 6.335817815057985e-07,
      "loss": 0.8071,
      "step": 846220
    },
    {
      "epoch": 2.9658251760261036,
      "grad_norm": 3.4375,
      "learning_rate": 6.329327528420966e-07,
      "loss": 0.7638,
      "step": 846230
    },
    {
      "epoch": 2.965860223532999,
      "grad_norm": 2.796875,
      "learning_rate": 6.322837241783946e-07,
      "loss": 0.7504,
      "step": 846240
    },
    {
      "epoch": 2.9658952710398947,
      "grad_norm": 2.859375,
      "learning_rate": 6.316346955146928e-07,
      "loss": 0.8368,
      "step": 846250
    },
    {
      "epoch": 2.96593031854679,
      "grad_norm": 2.71875,
      "learning_rate": 6.309856668509908e-07,
      "loss": 0.7878,
      "step": 846260
    },
    {
      "epoch": 2.9659653660536858,
      "grad_norm": 2.734375,
      "learning_rate": 6.303366381872889e-07,
      "loss": 0.7102,
      "step": 846270
    },
    {
      "epoch": 2.9660004135605815,
      "grad_norm": 2.5625,
      "learning_rate": 6.296876095235871e-07,
      "loss": 0.7948,
      "step": 846280
    },
    {
      "epoch": 2.966035461067477,
      "grad_norm": 3.046875,
      "learning_rate": 6.290385808598851e-07,
      "loss": 0.7674,
      "step": 846290
    },
    {
      "epoch": 2.9660705085743726,
      "grad_norm": 3.265625,
      "learning_rate": 6.283895521961832e-07,
      "loss": 0.8784,
      "step": 846300
    },
    {
      "epoch": 2.966105556081268,
      "grad_norm": 2.984375,
      "learning_rate": 6.277405235324813e-07,
      "loss": 0.7825,
      "step": 846310
    },
    {
      "epoch": 2.9661406035881637,
      "grad_norm": 2.78125,
      "learning_rate": 6.270914948687794e-07,
      "loss": 0.8329,
      "step": 846320
    },
    {
      "epoch": 2.9661756510950594,
      "grad_norm": 3.078125,
      "learning_rate": 6.264424662050775e-07,
      "loss": 0.7955,
      "step": 846330
    },
    {
      "epoch": 2.966210698601955,
      "grad_norm": 2.515625,
      "learning_rate": 6.257934375413755e-07,
      "loss": 0.8382,
      "step": 846340
    },
    {
      "epoch": 2.9662457461088505,
      "grad_norm": 3.21875,
      "learning_rate": 6.251444088776737e-07,
      "loss": 0.7834,
      "step": 846350
    },
    {
      "epoch": 2.9662807936157463,
      "grad_norm": 3.15625,
      "learning_rate": 6.244953802139718e-07,
      "loss": 0.8611,
      "step": 846360
    },
    {
      "epoch": 2.9663158411226416,
      "grad_norm": 3.015625,
      "learning_rate": 6.238463515502698e-07,
      "loss": 0.8134,
      "step": 846370
    },
    {
      "epoch": 2.9663508886295373,
      "grad_norm": 2.65625,
      "learning_rate": 6.23197322886568e-07,
      "loss": 0.8077,
      "step": 846380
    },
    {
      "epoch": 2.966385936136433,
      "grad_norm": 2.828125,
      "learning_rate": 6.22548294222866e-07,
      "loss": 0.7957,
      "step": 846390
    },
    {
      "epoch": 2.9664209836433284,
      "grad_norm": 2.625,
      "learning_rate": 6.218992655591642e-07,
      "loss": 0.8264,
      "step": 846400
    },
    {
      "epoch": 2.966456031150224,
      "grad_norm": 3.046875,
      "learning_rate": 6.212502368954624e-07,
      "loss": 0.7833,
      "step": 846410
    },
    {
      "epoch": 2.9664910786571195,
      "grad_norm": 2.84375,
      "learning_rate": 6.206012082317604e-07,
      "loss": 0.7853,
      "step": 846420
    },
    {
      "epoch": 2.9665261261640152,
      "grad_norm": 2.71875,
      "learning_rate": 6.199521795680585e-07,
      "loss": 0.8142,
      "step": 846430
    },
    {
      "epoch": 2.966561173670911,
      "grad_norm": 2.984375,
      "learning_rate": 6.193031509043566e-07,
      "loss": 0.76,
      "step": 846440
    },
    {
      "epoch": 2.9665962211778067,
      "grad_norm": 2.921875,
      "learning_rate": 6.186541222406547e-07,
      "loss": 0.8058,
      "step": 846450
    },
    {
      "epoch": 2.966631268684702,
      "grad_norm": 2.78125,
      "learning_rate": 6.180050935769528e-07,
      "loss": 0.7222,
      "step": 846460
    },
    {
      "epoch": 2.966666316191598,
      "grad_norm": 2.703125,
      "learning_rate": 6.173560649132508e-07,
      "loss": 0.8004,
      "step": 846470
    },
    {
      "epoch": 2.966701363698493,
      "grad_norm": 2.6875,
      "learning_rate": 6.16707036249549e-07,
      "loss": 0.8583,
      "step": 846480
    },
    {
      "epoch": 2.966736411205389,
      "grad_norm": 2.53125,
      "learning_rate": 6.160580075858471e-07,
      "loss": 0.7981,
      "step": 846490
    },
    {
      "epoch": 2.9667714587122846,
      "grad_norm": 2.296875,
      "learning_rate": 6.154089789221451e-07,
      "loss": 0.7326,
      "step": 846500
    },
    {
      "epoch": 2.96680650621918,
      "grad_norm": 3.328125,
      "learning_rate": 6.147599502584433e-07,
      "loss": 0.7855,
      "step": 846510
    },
    {
      "epoch": 2.9668415537260757,
      "grad_norm": 2.84375,
      "learning_rate": 6.141109215947413e-07,
      "loss": 0.8425,
      "step": 846520
    },
    {
      "epoch": 2.966876601232971,
      "grad_norm": 3.390625,
      "learning_rate": 6.134618929310394e-07,
      "loss": 0.8908,
      "step": 846530
    },
    {
      "epoch": 2.966911648739867,
      "grad_norm": 2.90625,
      "learning_rate": 6.128128642673375e-07,
      "loss": 0.8038,
      "step": 846540
    },
    {
      "epoch": 2.9669466962467625,
      "grad_norm": 2.75,
      "learning_rate": 6.121638356036356e-07,
      "loss": 0.8517,
      "step": 846550
    },
    {
      "epoch": 2.9669817437536583,
      "grad_norm": 3.296875,
      "learning_rate": 6.115148069399337e-07,
      "loss": 0.8338,
      "step": 846560
    },
    {
      "epoch": 2.9670167912605536,
      "grad_norm": 2.875,
      "learning_rate": 6.108657782762317e-07,
      "loss": 0.7497,
      "step": 846570
    },
    {
      "epoch": 2.9670518387674494,
      "grad_norm": 2.75,
      "learning_rate": 6.1021674961253e-07,
      "loss": 0.7557,
      "step": 846580
    },
    {
      "epoch": 2.9670868862743447,
      "grad_norm": 2.9375,
      "learning_rate": 6.095677209488281e-07,
      "loss": 0.7812,
      "step": 846590
    },
    {
      "epoch": 2.9671219337812405,
      "grad_norm": 2.5625,
      "learning_rate": 6.089186922851261e-07,
      "loss": 0.7797,
      "step": 846600
    },
    {
      "epoch": 2.967156981288136,
      "grad_norm": 3.25,
      "learning_rate": 6.082696636214243e-07,
      "loss": 0.8343,
      "step": 846610
    },
    {
      "epoch": 2.9671920287950315,
      "grad_norm": 2.96875,
      "learning_rate": 6.076206349577223e-07,
      "loss": 0.7841,
      "step": 846620
    },
    {
      "epoch": 2.9672270763019273,
      "grad_norm": 2.84375,
      "learning_rate": 6.069716062940204e-07,
      "loss": 0.7562,
      "step": 846630
    },
    {
      "epoch": 2.9672621238088226,
      "grad_norm": 2.828125,
      "learning_rate": 6.063225776303185e-07,
      "loss": 0.783,
      "step": 846640
    },
    {
      "epoch": 2.9672971713157184,
      "grad_norm": 3.21875,
      "learning_rate": 6.056735489666166e-07,
      "loss": 0.839,
      "step": 846650
    },
    {
      "epoch": 2.967332218822614,
      "grad_norm": 2.546875,
      "learning_rate": 6.050245203029147e-07,
      "loss": 0.8387,
      "step": 846660
    },
    {
      "epoch": 2.96736726632951,
      "grad_norm": 2.90625,
      "learning_rate": 6.043754916392128e-07,
      "loss": 0.8251,
      "step": 846670
    },
    {
      "epoch": 2.967402313836405,
      "grad_norm": 3.328125,
      "learning_rate": 6.037264629755109e-07,
      "loss": 0.8721,
      "step": 846680
    },
    {
      "epoch": 2.967437361343301,
      "grad_norm": 2.78125,
      "learning_rate": 6.03077434311809e-07,
      "loss": 0.7704,
      "step": 846690
    },
    {
      "epoch": 2.9674724088501963,
      "grad_norm": 2.96875,
      "learning_rate": 6.02428405648107e-07,
      "loss": 0.7937,
      "step": 846700
    },
    {
      "epoch": 2.967507456357092,
      "grad_norm": 2.453125,
      "learning_rate": 6.017793769844052e-07,
      "loss": 0.813,
      "step": 846710
    },
    {
      "epoch": 2.9675425038639878,
      "grad_norm": 3.234375,
      "learning_rate": 6.011303483207033e-07,
      "loss": 0.7912,
      "step": 846720
    },
    {
      "epoch": 2.967577551370883,
      "grad_norm": 2.90625,
      "learning_rate": 6.004813196570013e-07,
      "loss": 0.815,
      "step": 846730
    },
    {
      "epoch": 2.967612598877779,
      "grad_norm": 2.859375,
      "learning_rate": 5.998322909932994e-07,
      "loss": 0.8488,
      "step": 846740
    },
    {
      "epoch": 2.967647646384674,
      "grad_norm": 2.8125,
      "learning_rate": 5.991832623295975e-07,
      "loss": 0.7829,
      "step": 846750
    },
    {
      "epoch": 2.96768269389157,
      "grad_norm": 2.40625,
      "learning_rate": 5.985342336658956e-07,
      "loss": 0.7937,
      "step": 846760
    },
    {
      "epoch": 2.9677177413984657,
      "grad_norm": 2.359375,
      "learning_rate": 5.978852050021937e-07,
      "loss": 0.8566,
      "step": 846770
    },
    {
      "epoch": 2.9677527889053614,
      "grad_norm": 3.0,
      "learning_rate": 5.972361763384919e-07,
      "loss": 0.8328,
      "step": 846780
    },
    {
      "epoch": 2.9677878364122567,
      "grad_norm": 3.1875,
      "learning_rate": 5.9658714767479e-07,
      "loss": 0.7167,
      "step": 846790
    },
    {
      "epoch": 2.9678228839191525,
      "grad_norm": 3.1875,
      "learning_rate": 5.959381190110881e-07,
      "loss": 0.8094,
      "step": 846800
    },
    {
      "epoch": 2.967857931426048,
      "grad_norm": 3.15625,
      "learning_rate": 5.952890903473862e-07,
      "loss": 0.8255,
      "step": 846810
    },
    {
      "epoch": 2.9678929789329436,
      "grad_norm": 3.359375,
      "learning_rate": 5.946400616836843e-07,
      "loss": 0.8112,
      "step": 846820
    },
    {
      "epoch": 2.9679280264398393,
      "grad_norm": 3.0625,
      "learning_rate": 5.939910330199823e-07,
      "loss": 0.8117,
      "step": 846830
    },
    {
      "epoch": 2.9679630739467346,
      "grad_norm": 2.9375,
      "learning_rate": 5.933420043562804e-07,
      "loss": 0.7464,
      "step": 846840
    },
    {
      "epoch": 2.9679981214536304,
      "grad_norm": 2.703125,
      "learning_rate": 5.926929756925785e-07,
      "loss": 0.6996,
      "step": 846850
    },
    {
      "epoch": 2.968033168960526,
      "grad_norm": 3.203125,
      "learning_rate": 5.920439470288766e-07,
      "loss": 0.8385,
      "step": 846860
    },
    {
      "epoch": 2.9680682164674215,
      "grad_norm": 3.046875,
      "learning_rate": 5.913949183651747e-07,
      "loss": 0.7955,
      "step": 846870
    },
    {
      "epoch": 2.9681032639743172,
      "grad_norm": 3.1875,
      "learning_rate": 5.907458897014728e-07,
      "loss": 0.7711,
      "step": 846880
    },
    {
      "epoch": 2.968138311481213,
      "grad_norm": 3.484375,
      "learning_rate": 5.900968610377709e-07,
      "loss": 0.8983,
      "step": 846890
    },
    {
      "epoch": 2.9681733589881083,
      "grad_norm": 3.0,
      "learning_rate": 5.89447832374069e-07,
      "loss": 0.7762,
      "step": 846900
    },
    {
      "epoch": 2.968208406495004,
      "grad_norm": 3.109375,
      "learning_rate": 5.887988037103671e-07,
      "loss": 0.7528,
      "step": 846910
    },
    {
      "epoch": 2.9682434540018994,
      "grad_norm": 3.015625,
      "learning_rate": 5.881497750466652e-07,
      "loss": 0.7369,
      "step": 846920
    },
    {
      "epoch": 2.968278501508795,
      "grad_norm": 2.859375,
      "learning_rate": 5.875007463829632e-07,
      "loss": 0.7732,
      "step": 846930
    },
    {
      "epoch": 2.968313549015691,
      "grad_norm": 2.53125,
      "learning_rate": 5.868517177192613e-07,
      "loss": 0.7843,
      "step": 846940
    },
    {
      "epoch": 2.968348596522586,
      "grad_norm": 2.90625,
      "learning_rate": 5.862026890555595e-07,
      "loss": 0.8823,
      "step": 846950
    },
    {
      "epoch": 2.968383644029482,
      "grad_norm": 3.25,
      "learning_rate": 5.855536603918575e-07,
      "loss": 0.8269,
      "step": 846960
    },
    {
      "epoch": 2.9684186915363777,
      "grad_norm": 2.734375,
      "learning_rate": 5.849046317281557e-07,
      "loss": 0.7701,
      "step": 846970
    },
    {
      "epoch": 2.968453739043273,
      "grad_norm": 3.265625,
      "learning_rate": 5.842556030644538e-07,
      "loss": 0.8067,
      "step": 846980
    },
    {
      "epoch": 2.968488786550169,
      "grad_norm": 2.625,
      "learning_rate": 5.836065744007519e-07,
      "loss": 0.7218,
      "step": 846990
    },
    {
      "epoch": 2.9685238340570645,
      "grad_norm": 3.03125,
      "learning_rate": 5.8295754573705e-07,
      "loss": 0.8392,
      "step": 847000
    },
    {
      "epoch": 2.96855888156396,
      "grad_norm": 3.375,
      "learning_rate": 5.82308517073348e-07,
      "loss": 0.7699,
      "step": 847010
    },
    {
      "epoch": 2.9685939290708556,
      "grad_norm": 3.265625,
      "learning_rate": 5.816594884096462e-07,
      "loss": 0.8566,
      "step": 847020
    },
    {
      "epoch": 2.968628976577751,
      "grad_norm": 2.671875,
      "learning_rate": 5.810104597459443e-07,
      "loss": 0.7651,
      "step": 847030
    },
    {
      "epoch": 2.9686640240846467,
      "grad_norm": 3.359375,
      "learning_rate": 5.803614310822423e-07,
      "loss": 0.7838,
      "step": 847040
    },
    {
      "epoch": 2.9686990715915424,
      "grad_norm": 2.9375,
      "learning_rate": 5.797124024185405e-07,
      "loss": 0.8126,
      "step": 847050
    },
    {
      "epoch": 2.968734119098438,
      "grad_norm": 2.5625,
      "learning_rate": 5.790633737548385e-07,
      "loss": 0.7683,
      "step": 847060
    },
    {
      "epoch": 2.9687691666053335,
      "grad_norm": 2.875,
      "learning_rate": 5.784143450911366e-07,
      "loss": 0.7359,
      "step": 847070
    },
    {
      "epoch": 2.9688042141122293,
      "grad_norm": 3.078125,
      "learning_rate": 5.777653164274348e-07,
      "loss": 0.8095,
      "step": 847080
    },
    {
      "epoch": 2.9688392616191246,
      "grad_norm": 2.28125,
      "learning_rate": 5.771162877637328e-07,
      "loss": 0.713,
      "step": 847090
    },
    {
      "epoch": 2.9688743091260203,
      "grad_norm": 3.046875,
      "learning_rate": 5.764672591000309e-07,
      "loss": 0.7573,
      "step": 847100
    },
    {
      "epoch": 2.968909356632916,
      "grad_norm": 3.421875,
      "learning_rate": 5.75818230436329e-07,
      "loss": 0.8307,
      "step": 847110
    },
    {
      "epoch": 2.9689444041398114,
      "grad_norm": 2.765625,
      "learning_rate": 5.751692017726271e-07,
      "loss": 0.8218,
      "step": 847120
    },
    {
      "epoch": 2.968979451646707,
      "grad_norm": 3.203125,
      "learning_rate": 5.745201731089252e-07,
      "loss": 0.8769,
      "step": 847130
    },
    {
      "epoch": 2.9690144991536025,
      "grad_norm": 3.40625,
      "learning_rate": 5.738711444452232e-07,
      "loss": 0.7719,
      "step": 847140
    },
    {
      "epoch": 2.9690495466604983,
      "grad_norm": 3.5,
      "learning_rate": 5.732221157815215e-07,
      "loss": 0.8384,
      "step": 847150
    },
    {
      "epoch": 2.969084594167394,
      "grad_norm": 2.640625,
      "learning_rate": 5.725730871178194e-07,
      "loss": 0.7868,
      "step": 847160
    },
    {
      "epoch": 2.9691196416742898,
      "grad_norm": 2.953125,
      "learning_rate": 5.719240584541176e-07,
      "loss": 0.7473,
      "step": 847170
    },
    {
      "epoch": 2.969154689181185,
      "grad_norm": 3.0,
      "learning_rate": 5.712750297904158e-07,
      "loss": 0.8239,
      "step": 847180
    },
    {
      "epoch": 2.969189736688081,
      "grad_norm": 2.921875,
      "learning_rate": 5.706260011267138e-07,
      "loss": 0.8132,
      "step": 847190
    },
    {
      "epoch": 2.969224784194976,
      "grad_norm": 2.6875,
      "learning_rate": 5.699769724630119e-07,
      "loss": 0.7936,
      "step": 847200
    },
    {
      "epoch": 2.969259831701872,
      "grad_norm": 3.015625,
      "learning_rate": 5.6932794379931e-07,
      "loss": 0.7863,
      "step": 847210
    },
    {
      "epoch": 2.9692948792087677,
      "grad_norm": 3.1875,
      "learning_rate": 5.686789151356081e-07,
      "loss": 0.7306,
      "step": 847220
    },
    {
      "epoch": 2.969329926715663,
      "grad_norm": 3.125,
      "learning_rate": 5.680298864719062e-07,
      "loss": 0.7904,
      "step": 847230
    },
    {
      "epoch": 2.9693649742225587,
      "grad_norm": 2.578125,
      "learning_rate": 5.673808578082042e-07,
      "loss": 0.7914,
      "step": 847240
    },
    {
      "epoch": 2.969400021729454,
      "grad_norm": 3.109375,
      "learning_rate": 5.667318291445024e-07,
      "loss": 0.7936,
      "step": 847250
    },
    {
      "epoch": 2.96943506923635,
      "grad_norm": 3.296875,
      "learning_rate": 5.660828004808005e-07,
      "loss": 0.9145,
      "step": 847260
    },
    {
      "epoch": 2.9694701167432456,
      "grad_norm": 2.734375,
      "learning_rate": 5.654337718170985e-07,
      "loss": 0.7822,
      "step": 847270
    },
    {
      "epoch": 2.9695051642501413,
      "grad_norm": 2.453125,
      "learning_rate": 5.647847431533967e-07,
      "loss": 0.741,
      "step": 847280
    },
    {
      "epoch": 2.9695402117570366,
      "grad_norm": 3.09375,
      "learning_rate": 5.641357144896947e-07,
      "loss": 0.7893,
      "step": 847290
    },
    {
      "epoch": 2.9695752592639324,
      "grad_norm": 2.609375,
      "learning_rate": 5.634866858259928e-07,
      "loss": 0.8204,
      "step": 847300
    },
    {
      "epoch": 2.9696103067708277,
      "grad_norm": 3.09375,
      "learning_rate": 5.628376571622909e-07,
      "loss": 0.7877,
      "step": 847310
    },
    {
      "epoch": 2.9696453542777235,
      "grad_norm": 2.8125,
      "learning_rate": 5.62188628498589e-07,
      "loss": 0.809,
      "step": 847320
    },
    {
      "epoch": 2.9696804017846192,
      "grad_norm": 3.28125,
      "learning_rate": 5.615395998348871e-07,
      "loss": 0.7662,
      "step": 847330
    },
    {
      "epoch": 2.9697154492915145,
      "grad_norm": 3.15625,
      "learning_rate": 5.608905711711852e-07,
      "loss": 0.7774,
      "step": 847340
    },
    {
      "epoch": 2.9697504967984103,
      "grad_norm": 2.609375,
      "learning_rate": 5.602415425074834e-07,
      "loss": 0.7517,
      "step": 847350
    },
    {
      "epoch": 2.9697855443053056,
      "grad_norm": 3.34375,
      "learning_rate": 5.595925138437815e-07,
      "loss": 0.8303,
      "step": 847360
    },
    {
      "epoch": 2.9698205918122014,
      "grad_norm": 2.953125,
      "learning_rate": 5.589434851800795e-07,
      "loss": 0.8346,
      "step": 847370
    },
    {
      "epoch": 2.969855639319097,
      "grad_norm": 2.515625,
      "learning_rate": 5.582944565163777e-07,
      "loss": 0.7213,
      "step": 847380
    },
    {
      "epoch": 2.969890686825993,
      "grad_norm": 3.078125,
      "learning_rate": 5.576454278526758e-07,
      "loss": 0.9,
      "step": 847390
    },
    {
      "epoch": 2.969925734332888,
      "grad_norm": 2.59375,
      "learning_rate": 5.569963991889738e-07,
      "loss": 0.7769,
      "step": 847400
    },
    {
      "epoch": 2.969960781839784,
      "grad_norm": 2.59375,
      "learning_rate": 5.563473705252719e-07,
      "loss": 0.8045,
      "step": 847410
    },
    {
      "epoch": 2.9699958293466793,
      "grad_norm": 2.828125,
      "learning_rate": 5.5569834186157e-07,
      "loss": 0.8211,
      "step": 847420
    },
    {
      "epoch": 2.970030876853575,
      "grad_norm": 2.8125,
      "learning_rate": 5.550493131978681e-07,
      "loss": 0.7677,
      "step": 847430
    },
    {
      "epoch": 2.970065924360471,
      "grad_norm": 2.546875,
      "learning_rate": 5.544002845341662e-07,
      "loss": 0.7418,
      "step": 847440
    },
    {
      "epoch": 2.970100971867366,
      "grad_norm": 2.984375,
      "learning_rate": 5.537512558704643e-07,
      "loss": 0.7761,
      "step": 847450
    },
    {
      "epoch": 2.970136019374262,
      "grad_norm": 2.78125,
      "learning_rate": 5.531022272067624e-07,
      "loss": 0.8302,
      "step": 847460
    },
    {
      "epoch": 2.970171066881157,
      "grad_norm": 2.5625,
      "learning_rate": 5.524531985430604e-07,
      "loss": 0.7823,
      "step": 847470
    },
    {
      "epoch": 2.970206114388053,
      "grad_norm": 2.703125,
      "learning_rate": 5.518041698793586e-07,
      "loss": 0.809,
      "step": 847480
    },
    {
      "epoch": 2.9702411618949487,
      "grad_norm": 2.75,
      "learning_rate": 5.511551412156567e-07,
      "loss": 0.8738,
      "step": 847490
    },
    {
      "epoch": 2.9702762094018444,
      "grad_norm": 3.328125,
      "learning_rate": 5.505061125519547e-07,
      "loss": 0.8932,
      "step": 847500
    },
    {
      "epoch": 2.9703112569087398,
      "grad_norm": 2.671875,
      "learning_rate": 5.498570838882528e-07,
      "loss": 0.8726,
      "step": 847510
    },
    {
      "epoch": 2.9703463044156355,
      "grad_norm": 2.625,
      "learning_rate": 5.492080552245509e-07,
      "loss": 0.8217,
      "step": 847520
    },
    {
      "epoch": 2.970381351922531,
      "grad_norm": 2.953125,
      "learning_rate": 5.48559026560849e-07,
      "loss": 0.8532,
      "step": 847530
    },
    {
      "epoch": 2.9704163994294266,
      "grad_norm": 2.609375,
      "learning_rate": 5.479099978971472e-07,
      "loss": 0.7878,
      "step": 847540
    },
    {
      "epoch": 2.9704514469363223,
      "grad_norm": 3.25,
      "learning_rate": 5.472609692334453e-07,
      "loss": 0.8167,
      "step": 847550
    },
    {
      "epoch": 2.9704864944432177,
      "grad_norm": 2.921875,
      "learning_rate": 5.466119405697434e-07,
      "loss": 0.8695,
      "step": 847560
    },
    {
      "epoch": 2.9705215419501134,
      "grad_norm": 2.84375,
      "learning_rate": 5.459629119060415e-07,
      "loss": 0.8267,
      "step": 847570
    },
    {
      "epoch": 2.9705565894570087,
      "grad_norm": 2.765625,
      "learning_rate": 5.453138832423396e-07,
      "loss": 0.7466,
      "step": 847580
    },
    {
      "epoch": 2.9705916369639045,
      "grad_norm": 3.3125,
      "learning_rate": 5.446648545786377e-07,
      "loss": 0.8144,
      "step": 847590
    },
    {
      "epoch": 2.9706266844708002,
      "grad_norm": 2.921875,
      "learning_rate": 5.440158259149357e-07,
      "loss": 0.7531,
      "step": 847600
    },
    {
      "epoch": 2.970661731977696,
      "grad_norm": 3.1875,
      "learning_rate": 5.433667972512338e-07,
      "loss": 0.7575,
      "step": 847610
    },
    {
      "epoch": 2.9706967794845913,
      "grad_norm": 2.96875,
      "learning_rate": 5.42717768587532e-07,
      "loss": 0.7682,
      "step": 847620
    },
    {
      "epoch": 2.970731826991487,
      "grad_norm": 2.734375,
      "learning_rate": 5.4206873992383e-07,
      "loss": 0.8046,
      "step": 847630
    },
    {
      "epoch": 2.9707668744983824,
      "grad_norm": 2.515625,
      "learning_rate": 5.414197112601281e-07,
      "loss": 0.7733,
      "step": 847640
    },
    {
      "epoch": 2.970801922005278,
      "grad_norm": 3.140625,
      "learning_rate": 5.407706825964262e-07,
      "loss": 0.8407,
      "step": 847650
    },
    {
      "epoch": 2.970836969512174,
      "grad_norm": 3.265625,
      "learning_rate": 5.401216539327243e-07,
      "loss": 0.8392,
      "step": 847660
    },
    {
      "epoch": 2.9708720170190692,
      "grad_norm": 3.046875,
      "learning_rate": 5.394726252690224e-07,
      "loss": 0.7274,
      "step": 847670
    },
    {
      "epoch": 2.970907064525965,
      "grad_norm": 2.96875,
      "learning_rate": 5.388235966053204e-07,
      "loss": 0.8252,
      "step": 847680
    },
    {
      "epoch": 2.9709421120328603,
      "grad_norm": 3.09375,
      "learning_rate": 5.381745679416186e-07,
      "loss": 0.8468,
      "step": 847690
    },
    {
      "epoch": 2.970977159539756,
      "grad_norm": 2.921875,
      "learning_rate": 5.375255392779167e-07,
      "loss": 0.8027,
      "step": 847700
    },
    {
      "epoch": 2.971012207046652,
      "grad_norm": 3.25,
      "learning_rate": 5.368765106142147e-07,
      "loss": 0.7592,
      "step": 847710
    },
    {
      "epoch": 2.9710472545535476,
      "grad_norm": 2.625,
      "learning_rate": 5.36227481950513e-07,
      "loss": 0.7767,
      "step": 847720
    },
    {
      "epoch": 2.971082302060443,
      "grad_norm": 2.625,
      "learning_rate": 5.35578453286811e-07,
      "loss": 0.7231,
      "step": 847730
    },
    {
      "epoch": 2.9711173495673386,
      "grad_norm": 2.953125,
      "learning_rate": 5.349294246231091e-07,
      "loss": 0.8036,
      "step": 847740
    },
    {
      "epoch": 2.971152397074234,
      "grad_norm": 2.609375,
      "learning_rate": 5.342803959594072e-07,
      "loss": 0.8351,
      "step": 847750
    },
    {
      "epoch": 2.9711874445811297,
      "grad_norm": 3.03125,
      "learning_rate": 5.336313672957053e-07,
      "loss": 0.8277,
      "step": 847760
    },
    {
      "epoch": 2.9712224920880255,
      "grad_norm": 2.828125,
      "learning_rate": 5.329823386320034e-07,
      "loss": 0.8369,
      "step": 847770
    },
    {
      "epoch": 2.971257539594921,
      "grad_norm": 2.90625,
      "learning_rate": 5.323333099683014e-07,
      "loss": 0.8297,
      "step": 847780
    },
    {
      "epoch": 2.9712925871018165,
      "grad_norm": 3.53125,
      "learning_rate": 5.316842813045996e-07,
      "loss": 0.7992,
      "step": 847790
    },
    {
      "epoch": 2.971327634608712,
      "grad_norm": 2.953125,
      "learning_rate": 5.310352526408977e-07,
      "loss": 0.7831,
      "step": 847800
    },
    {
      "epoch": 2.9713626821156076,
      "grad_norm": 2.75,
      "learning_rate": 5.303862239771957e-07,
      "loss": 0.7453,
      "step": 847810
    },
    {
      "epoch": 2.9713977296225034,
      "grad_norm": 3.453125,
      "learning_rate": 5.297371953134939e-07,
      "loss": 0.7796,
      "step": 847820
    },
    {
      "epoch": 2.971432777129399,
      "grad_norm": 3.015625,
      "learning_rate": 5.290881666497919e-07,
      "loss": 0.6512,
      "step": 847830
    },
    {
      "epoch": 2.9714678246362944,
      "grad_norm": 2.6875,
      "learning_rate": 5.2843913798609e-07,
      "loss": 0.8737,
      "step": 847840
    },
    {
      "epoch": 2.97150287214319,
      "grad_norm": 3.015625,
      "learning_rate": 5.277901093223882e-07,
      "loss": 0.8512,
      "step": 847850
    },
    {
      "epoch": 2.9715379196500855,
      "grad_norm": 2.921875,
      "learning_rate": 5.271410806586862e-07,
      "loss": 0.7317,
      "step": 847860
    },
    {
      "epoch": 2.9715729671569813,
      "grad_norm": 2.75,
      "learning_rate": 5.264920519949843e-07,
      "loss": 0.8235,
      "step": 847870
    },
    {
      "epoch": 2.971608014663877,
      "grad_norm": 3.078125,
      "learning_rate": 5.258430233312824e-07,
      "loss": 0.7198,
      "step": 847880
    },
    {
      "epoch": 2.9716430621707723,
      "grad_norm": 3.4375,
      "learning_rate": 5.251939946675805e-07,
      "loss": 0.8073,
      "step": 847890
    },
    {
      "epoch": 2.971678109677668,
      "grad_norm": 3.078125,
      "learning_rate": 5.245449660038786e-07,
      "loss": 0.8348,
      "step": 847900
    },
    {
      "epoch": 2.9717131571845634,
      "grad_norm": 3.109375,
      "learning_rate": 5.238959373401766e-07,
      "loss": 0.8513,
      "step": 847910
    },
    {
      "epoch": 2.971748204691459,
      "grad_norm": 2.015625,
      "learning_rate": 5.232469086764749e-07,
      "loss": 0.7499,
      "step": 847920
    },
    {
      "epoch": 2.971783252198355,
      "grad_norm": 2.75,
      "learning_rate": 5.22597880012773e-07,
      "loss": 0.7874,
      "step": 847930
    },
    {
      "epoch": 2.9718182997052507,
      "grad_norm": 3.09375,
      "learning_rate": 5.21948851349071e-07,
      "loss": 0.8638,
      "step": 847940
    },
    {
      "epoch": 2.971853347212146,
      "grad_norm": 3.078125,
      "learning_rate": 5.212998226853692e-07,
      "loss": 0.8268,
      "step": 847950
    },
    {
      "epoch": 2.9718883947190418,
      "grad_norm": 3.140625,
      "learning_rate": 5.206507940216672e-07,
      "loss": 0.818,
      "step": 847960
    },
    {
      "epoch": 2.971923442225937,
      "grad_norm": 2.921875,
      "learning_rate": 5.200017653579653e-07,
      "loss": 0.8259,
      "step": 847970
    },
    {
      "epoch": 2.971958489732833,
      "grad_norm": 3.65625,
      "learning_rate": 5.193527366942634e-07,
      "loss": 0.8188,
      "step": 847980
    },
    {
      "epoch": 2.9719935372397286,
      "grad_norm": 2.859375,
      "learning_rate": 5.187037080305615e-07,
      "loss": 0.7785,
      "step": 847990
    },
    {
      "epoch": 2.972028584746624,
      "grad_norm": 2.953125,
      "learning_rate": 5.180546793668596e-07,
      "loss": 0.7683,
      "step": 848000
    },
    {
      "epoch": 2.9720636322535197,
      "grad_norm": 2.921875,
      "learning_rate": 5.174056507031577e-07,
      "loss": 0.7666,
      "step": 848010
    },
    {
      "epoch": 2.972098679760415,
      "grad_norm": 3.046875,
      "learning_rate": 5.167566220394558e-07,
      "loss": 0.8001,
      "step": 848020
    },
    {
      "epoch": 2.9721337272673107,
      "grad_norm": 2.71875,
      "learning_rate": 5.161075933757539e-07,
      "loss": 0.7636,
      "step": 848030
    },
    {
      "epoch": 2.9721687747742065,
      "grad_norm": 3.0,
      "learning_rate": 5.154585647120519e-07,
      "loss": 0.8654,
      "step": 848040
    },
    {
      "epoch": 2.9722038222811022,
      "grad_norm": 3.09375,
      "learning_rate": 5.148095360483501e-07,
      "loss": 0.8448,
      "step": 848050
    },
    {
      "epoch": 2.9722388697879976,
      "grad_norm": 2.890625,
      "learning_rate": 5.141605073846481e-07,
      "loss": 0.7397,
      "step": 848060
    },
    {
      "epoch": 2.9722739172948933,
      "grad_norm": 3.40625,
      "learning_rate": 5.135114787209462e-07,
      "loss": 0.8498,
      "step": 848070
    },
    {
      "epoch": 2.9723089648017886,
      "grad_norm": 3.125,
      "learning_rate": 5.128624500572443e-07,
      "loss": 0.8426,
      "step": 848080
    },
    {
      "epoch": 2.9723440123086844,
      "grad_norm": 3.0625,
      "learning_rate": 5.122134213935424e-07,
      "loss": 0.7841,
      "step": 848090
    },
    {
      "epoch": 2.97237905981558,
      "grad_norm": 3.0,
      "learning_rate": 5.115643927298405e-07,
      "loss": 0.8006,
      "step": 848100
    },
    {
      "epoch": 2.9724141073224755,
      "grad_norm": 3.140625,
      "learning_rate": 5.109153640661387e-07,
      "loss": 0.8453,
      "step": 848110
    },
    {
      "epoch": 2.972449154829371,
      "grad_norm": 2.953125,
      "learning_rate": 5.102663354024368e-07,
      "loss": 0.7962,
      "step": 848120
    },
    {
      "epoch": 2.9724842023362665,
      "grad_norm": 3.078125,
      "learning_rate": 5.096173067387349e-07,
      "loss": 0.8337,
      "step": 848130
    },
    {
      "epoch": 2.9725192498431623,
      "grad_norm": 2.359375,
      "learning_rate": 5.089682780750329e-07,
      "loss": 0.8121,
      "step": 848140
    },
    {
      "epoch": 2.972554297350058,
      "grad_norm": 2.65625,
      "learning_rate": 5.083192494113311e-07,
      "loss": 0.7056,
      "step": 848150
    },
    {
      "epoch": 2.972589344856954,
      "grad_norm": 2.578125,
      "learning_rate": 5.076702207476292e-07,
      "loss": 0.8301,
      "step": 848160
    },
    {
      "epoch": 2.972624392363849,
      "grad_norm": 3.140625,
      "learning_rate": 5.070211920839272e-07,
      "loss": 0.8159,
      "step": 848170
    },
    {
      "epoch": 2.972659439870745,
      "grad_norm": 2.78125,
      "learning_rate": 5.063721634202253e-07,
      "loss": 0.7706,
      "step": 848180
    },
    {
      "epoch": 2.97269448737764,
      "grad_norm": 3.015625,
      "learning_rate": 5.057231347565234e-07,
      "loss": 0.7483,
      "step": 848190
    },
    {
      "epoch": 2.972729534884536,
      "grad_norm": 2.75,
      "learning_rate": 5.050741060928215e-07,
      "loss": 0.7371,
      "step": 848200
    },
    {
      "epoch": 2.9727645823914317,
      "grad_norm": 3.015625,
      "learning_rate": 5.044250774291196e-07,
      "loss": 0.7557,
      "step": 848210
    },
    {
      "epoch": 2.972799629898327,
      "grad_norm": 2.59375,
      "learning_rate": 5.037760487654177e-07,
      "loss": 0.8431,
      "step": 848220
    },
    {
      "epoch": 2.972834677405223,
      "grad_norm": 2.3125,
      "learning_rate": 5.031270201017158e-07,
      "loss": 0.8166,
      "step": 848230
    },
    {
      "epoch": 2.9728697249121185,
      "grad_norm": 3.28125,
      "learning_rate": 5.024779914380139e-07,
      "loss": 0.7553,
      "step": 848240
    },
    {
      "epoch": 2.972904772419014,
      "grad_norm": 2.75,
      "learning_rate": 5.01828962774312e-07,
      "loss": 0.8225,
      "step": 848250
    },
    {
      "epoch": 2.9729398199259096,
      "grad_norm": 3.09375,
      "learning_rate": 5.011799341106101e-07,
      "loss": 0.8166,
      "step": 848260
    },
    {
      "epoch": 2.9729748674328054,
      "grad_norm": 3.46875,
      "learning_rate": 5.005309054469081e-07,
      "loss": 0.8361,
      "step": 848270
    },
    {
      "epoch": 2.9730099149397007,
      "grad_norm": 3.171875,
      "learning_rate": 4.998818767832062e-07,
      "loss": 0.8527,
      "step": 848280
    },
    {
      "epoch": 2.9730449624465964,
      "grad_norm": 2.8125,
      "learning_rate": 4.992328481195044e-07,
      "loss": 0.8006,
      "step": 848290
    },
    {
      "epoch": 2.9730800099534918,
      "grad_norm": 3.03125,
      "learning_rate": 4.985838194558024e-07,
      "loss": 0.7695,
      "step": 848300
    },
    {
      "epoch": 2.9731150574603875,
      "grad_norm": 2.890625,
      "learning_rate": 4.979347907921006e-07,
      "loss": 0.743,
      "step": 848310
    },
    {
      "epoch": 2.9731501049672833,
      "grad_norm": 2.796875,
      "learning_rate": 4.972857621283987e-07,
      "loss": 0.8358,
      "step": 848320
    },
    {
      "epoch": 2.9731851524741786,
      "grad_norm": 2.890625,
      "learning_rate": 4.966367334646968e-07,
      "loss": 0.8674,
      "step": 848330
    },
    {
      "epoch": 2.9732201999810743,
      "grad_norm": 2.515625,
      "learning_rate": 4.959877048009949e-07,
      "loss": 0.6917,
      "step": 848340
    },
    {
      "epoch": 2.97325524748797,
      "grad_norm": 2.59375,
      "learning_rate": 4.95338676137293e-07,
      "loss": 0.796,
      "step": 848350
    },
    {
      "epoch": 2.9732902949948654,
      "grad_norm": 3.25,
      "learning_rate": 4.946896474735911e-07,
      "loss": 0.8525,
      "step": 848360
    },
    {
      "epoch": 2.973325342501761,
      "grad_norm": 3.203125,
      "learning_rate": 4.940406188098891e-07,
      "loss": 0.835,
      "step": 848370
    },
    {
      "epoch": 2.973360390008657,
      "grad_norm": 2.828125,
      "learning_rate": 4.933915901461872e-07,
      "loss": 0.7706,
      "step": 848380
    },
    {
      "epoch": 2.9733954375155522,
      "grad_norm": 2.578125,
      "learning_rate": 4.927425614824854e-07,
      "loss": 0.7689,
      "step": 848390
    },
    {
      "epoch": 2.973430485022448,
      "grad_norm": 2.921875,
      "learning_rate": 4.920935328187834e-07,
      "loss": 0.7778,
      "step": 848400
    },
    {
      "epoch": 2.9734655325293433,
      "grad_norm": 2.984375,
      "learning_rate": 4.914445041550815e-07,
      "loss": 0.7737,
      "step": 848410
    },
    {
      "epoch": 2.973500580036239,
      "grad_norm": 2.875,
      "learning_rate": 4.907954754913796e-07,
      "loss": 0.8677,
      "step": 848420
    },
    {
      "epoch": 2.973535627543135,
      "grad_norm": 3.28125,
      "learning_rate": 4.901464468276777e-07,
      "loss": 0.8389,
      "step": 848430
    },
    {
      "epoch": 2.9735706750500306,
      "grad_norm": 2.96875,
      "learning_rate": 4.894974181639758e-07,
      "loss": 0.7951,
      "step": 848440
    },
    {
      "epoch": 2.973605722556926,
      "grad_norm": 2.796875,
      "learning_rate": 4.888483895002738e-07,
      "loss": 0.7775,
      "step": 848450
    },
    {
      "epoch": 2.9736407700638217,
      "grad_norm": 2.359375,
      "learning_rate": 4.88199360836572e-07,
      "loss": 0.7933,
      "step": 848460
    },
    {
      "epoch": 2.973675817570717,
      "grad_norm": 3.515625,
      "learning_rate": 4.875503321728701e-07,
      "loss": 0.8111,
      "step": 848470
    },
    {
      "epoch": 2.9737108650776127,
      "grad_norm": 3.21875,
      "learning_rate": 4.869013035091681e-07,
      "loss": 0.7698,
      "step": 848480
    },
    {
      "epoch": 2.9737459125845085,
      "grad_norm": 2.953125,
      "learning_rate": 4.862522748454664e-07,
      "loss": 0.7561,
      "step": 848490
    },
    {
      "epoch": 2.973780960091404,
      "grad_norm": 3.171875,
      "learning_rate": 4.856032461817644e-07,
      "loss": 0.8225,
      "step": 848500
    },
    {
      "epoch": 2.9738160075982996,
      "grad_norm": 2.5625,
      "learning_rate": 4.849542175180625e-07,
      "loss": 0.7417,
      "step": 848510
    },
    {
      "epoch": 2.973851055105195,
      "grad_norm": 2.59375,
      "learning_rate": 4.843051888543607e-07,
      "loss": 0.8203,
      "step": 848520
    },
    {
      "epoch": 2.9738861026120906,
      "grad_norm": 2.578125,
      "learning_rate": 4.836561601906587e-07,
      "loss": 0.7835,
      "step": 848530
    },
    {
      "epoch": 2.9739211501189864,
      "grad_norm": 3.03125,
      "learning_rate": 4.830071315269568e-07,
      "loss": 0.8383,
      "step": 848540
    },
    {
      "epoch": 2.973956197625882,
      "grad_norm": 2.40625,
      "learning_rate": 4.823581028632549e-07,
      "loss": 0.6861,
      "step": 848550
    },
    {
      "epoch": 2.9739912451327775,
      "grad_norm": 2.765625,
      "learning_rate": 4.81709074199553e-07,
      "loss": 0.7877,
      "step": 848560
    },
    {
      "epoch": 2.974026292639673,
      "grad_norm": 2.78125,
      "learning_rate": 4.810600455358511e-07,
      "loss": 0.7882,
      "step": 848570
    },
    {
      "epoch": 2.9740613401465685,
      "grad_norm": 2.828125,
      "learning_rate": 4.804110168721491e-07,
      "loss": 0.7306,
      "step": 848580
    },
    {
      "epoch": 2.9740963876534643,
      "grad_norm": 3.390625,
      "learning_rate": 4.797619882084473e-07,
      "loss": 0.8144,
      "step": 848590
    },
    {
      "epoch": 2.97413143516036,
      "grad_norm": 2.90625,
      "learning_rate": 4.791129595447454e-07,
      "loss": 0.7635,
      "step": 848600
    },
    {
      "epoch": 2.9741664826672554,
      "grad_norm": 3.171875,
      "learning_rate": 4.784639308810434e-07,
      "loss": 0.7635,
      "step": 848610
    },
    {
      "epoch": 2.974201530174151,
      "grad_norm": 2.734375,
      "learning_rate": 4.778149022173416e-07,
      "loss": 0.7605,
      "step": 848620
    },
    {
      "epoch": 2.9742365776810464,
      "grad_norm": 3.390625,
      "learning_rate": 4.771658735536396e-07,
      "loss": 0.7318,
      "step": 848630
    },
    {
      "epoch": 2.974271625187942,
      "grad_norm": 3.046875,
      "learning_rate": 4.7651684488993773e-07,
      "loss": 0.8385,
      "step": 848640
    },
    {
      "epoch": 2.974306672694838,
      "grad_norm": 2.78125,
      "learning_rate": 4.758678162262358e-07,
      "loss": 0.7868,
      "step": 848650
    },
    {
      "epoch": 2.9743417202017337,
      "grad_norm": 3.359375,
      "learning_rate": 4.7521878756253394e-07,
      "loss": 0.824,
      "step": 848660
    },
    {
      "epoch": 2.974376767708629,
      "grad_norm": 2.53125,
      "learning_rate": 4.7456975889883205e-07,
      "loss": 0.8777,
      "step": 848670
    },
    {
      "epoch": 2.974411815215525,
      "grad_norm": 3.046875,
      "learning_rate": 4.739207302351301e-07,
      "loss": 0.7706,
      "step": 848680
    },
    {
      "epoch": 2.97444686272242,
      "grad_norm": 3.1875,
      "learning_rate": 4.7327170157142826e-07,
      "loss": 0.7867,
      "step": 848690
    },
    {
      "epoch": 2.974481910229316,
      "grad_norm": 2.828125,
      "learning_rate": 4.726226729077263e-07,
      "loss": 0.8363,
      "step": 848700
    },
    {
      "epoch": 2.9745169577362116,
      "grad_norm": 2.984375,
      "learning_rate": 4.719736442440244e-07,
      "loss": 0.7637,
      "step": 848710
    },
    {
      "epoch": 2.974552005243107,
      "grad_norm": 2.828125,
      "learning_rate": 4.713246155803226e-07,
      "loss": 0.8381,
      "step": 848720
    },
    {
      "epoch": 2.9745870527500027,
      "grad_norm": 3.046875,
      "learning_rate": 4.7067558691662063e-07,
      "loss": 0.7398,
      "step": 848730
    },
    {
      "epoch": 2.974622100256898,
      "grad_norm": 3.3125,
      "learning_rate": 4.700265582529187e-07,
      "loss": 0.8104,
      "step": 848740
    },
    {
      "epoch": 2.9746571477637938,
      "grad_norm": 2.40625,
      "learning_rate": 4.693775295892168e-07,
      "loss": 0.6958,
      "step": 848750
    },
    {
      "epoch": 2.9746921952706895,
      "grad_norm": 3.0625,
      "learning_rate": 4.6872850092551494e-07,
      "loss": 0.8039,
      "step": 848760
    },
    {
      "epoch": 2.9747272427775853,
      "grad_norm": 3.34375,
      "learning_rate": 4.68079472261813e-07,
      "loss": 0.8315,
      "step": 848770
    },
    {
      "epoch": 2.9747622902844806,
      "grad_norm": 2.5625,
      "learning_rate": 4.6743044359811105e-07,
      "loss": 0.8411,
      "step": 848780
    },
    {
      "epoch": 2.9747973377913763,
      "grad_norm": 2.875,
      "learning_rate": 4.667814149344092e-07,
      "loss": 0.8224,
      "step": 848790
    },
    {
      "epoch": 2.9748323852982717,
      "grad_norm": 2.953125,
      "learning_rate": 4.6613238627070726e-07,
      "loss": 0.8565,
      "step": 848800
    },
    {
      "epoch": 2.9748674328051674,
      "grad_norm": 2.828125,
      "learning_rate": 4.6548335760700537e-07,
      "loss": 0.7595,
      "step": 848810
    },
    {
      "epoch": 2.974902480312063,
      "grad_norm": 3.03125,
      "learning_rate": 4.648343289433035e-07,
      "loss": 0.7872,
      "step": 848820
    },
    {
      "epoch": 2.9749375278189585,
      "grad_norm": 2.390625,
      "learning_rate": 4.641853002796016e-07,
      "loss": 0.8059,
      "step": 848830
    },
    {
      "epoch": 2.9749725753258542,
      "grad_norm": 2.890625,
      "learning_rate": 4.6353627161589963e-07,
      "loss": 0.8271,
      "step": 848840
    },
    {
      "epoch": 2.9750076228327496,
      "grad_norm": 2.859375,
      "learning_rate": 4.6288724295219774e-07,
      "loss": 0.8378,
      "step": 848850
    },
    {
      "epoch": 2.9750426703396453,
      "grad_norm": 2.8125,
      "learning_rate": 4.622382142884959e-07,
      "loss": 0.8088,
      "step": 848860
    },
    {
      "epoch": 2.975077717846541,
      "grad_norm": 2.90625,
      "learning_rate": 4.6158918562479395e-07,
      "loss": 0.8483,
      "step": 848870
    },
    {
      "epoch": 2.975112765353437,
      "grad_norm": 2.484375,
      "learning_rate": 4.60940156961092e-07,
      "loss": 0.8355,
      "step": 848880
    },
    {
      "epoch": 2.975147812860332,
      "grad_norm": 3.015625,
      "learning_rate": 4.6029112829739016e-07,
      "loss": 0.8185,
      "step": 848890
    },
    {
      "epoch": 2.975182860367228,
      "grad_norm": 2.546875,
      "learning_rate": 4.5964209963368827e-07,
      "loss": 0.7308,
      "step": 848900
    },
    {
      "epoch": 2.975217907874123,
      "grad_norm": 2.921875,
      "learning_rate": 4.589930709699863e-07,
      "loss": 0.8306,
      "step": 848910
    },
    {
      "epoch": 2.975252955381019,
      "grad_norm": 3.046875,
      "learning_rate": 4.583440423062845e-07,
      "loss": 0.8394,
      "step": 848920
    },
    {
      "epoch": 2.9752880028879147,
      "grad_norm": 3.0,
      "learning_rate": 4.5769501364258253e-07,
      "loss": 0.7653,
      "step": 848930
    },
    {
      "epoch": 2.97532305039481,
      "grad_norm": 2.734375,
      "learning_rate": 4.5704598497888064e-07,
      "loss": 0.761,
      "step": 848940
    },
    {
      "epoch": 2.975358097901706,
      "grad_norm": 2.8125,
      "learning_rate": 4.563969563151787e-07,
      "loss": 0.8761,
      "step": 848950
    },
    {
      "epoch": 2.975393145408601,
      "grad_norm": 3.0,
      "learning_rate": 4.5574792765147685e-07,
      "loss": 0.7683,
      "step": 848960
    },
    {
      "epoch": 2.975428192915497,
      "grad_norm": 2.953125,
      "learning_rate": 4.550988989877749e-07,
      "loss": 0.8417,
      "step": 848970
    },
    {
      "epoch": 2.9754632404223926,
      "grad_norm": 2.609375,
      "learning_rate": 4.54449870324073e-07,
      "loss": 0.7441,
      "step": 848980
    },
    {
      "epoch": 2.9754982879292884,
      "grad_norm": 2.9375,
      "learning_rate": 4.5380084166037117e-07,
      "loss": 0.8404,
      "step": 848990
    },
    {
      "epoch": 2.9755333354361837,
      "grad_norm": 2.78125,
      "learning_rate": 4.531518129966692e-07,
      "loss": 0.9041,
      "step": 849000
    },
    {
      "epoch": 2.9755683829430795,
      "grad_norm": 2.84375,
      "learning_rate": 4.5250278433296727e-07,
      "loss": 0.8658,
      "step": 849010
    },
    {
      "epoch": 2.9756034304499748,
      "grad_norm": 2.65625,
      "learning_rate": 4.5185375566926543e-07,
      "loss": 0.8139,
      "step": 849020
    },
    {
      "epoch": 2.9756384779568705,
      "grad_norm": 3.328125,
      "learning_rate": 4.5120472700556354e-07,
      "loss": 0.7829,
      "step": 849030
    },
    {
      "epoch": 2.9756735254637663,
      "grad_norm": 3.28125,
      "learning_rate": 4.505556983418616e-07,
      "loss": 0.7778,
      "step": 849040
    },
    {
      "epoch": 2.9757085729706616,
      "grad_norm": 2.71875,
      "learning_rate": 4.4990666967815964e-07,
      "loss": 0.7726,
      "step": 849050
    },
    {
      "epoch": 2.9757436204775574,
      "grad_norm": 3.21875,
      "learning_rate": 4.492576410144578e-07,
      "loss": 0.8346,
      "step": 849060
    },
    {
      "epoch": 2.9757786679844527,
      "grad_norm": 2.953125,
      "learning_rate": 4.486086123507559e-07,
      "loss": 0.8257,
      "step": 849070
    },
    {
      "epoch": 2.9758137154913484,
      "grad_norm": 3.34375,
      "learning_rate": 4.4795958368705396e-07,
      "loss": 0.84,
      "step": 849080
    },
    {
      "epoch": 2.975848762998244,
      "grad_norm": 2.984375,
      "learning_rate": 4.473105550233521e-07,
      "loss": 0.7986,
      "step": 849090
    },
    {
      "epoch": 2.97588381050514,
      "grad_norm": 2.9375,
      "learning_rate": 4.4666152635965017e-07,
      "loss": 0.8197,
      "step": 849100
    },
    {
      "epoch": 2.9759188580120353,
      "grad_norm": 2.84375,
      "learning_rate": 4.460124976959482e-07,
      "loss": 0.7767,
      "step": 849110
    },
    {
      "epoch": 2.975953905518931,
      "grad_norm": 2.703125,
      "learning_rate": 4.4536346903224633e-07,
      "loss": 0.8096,
      "step": 849120
    },
    {
      "epoch": 2.9759889530258263,
      "grad_norm": 2.75,
      "learning_rate": 4.447144403685445e-07,
      "loss": 0.772,
      "step": 849130
    },
    {
      "epoch": 2.976024000532722,
      "grad_norm": 2.796875,
      "learning_rate": 4.4406541170484254e-07,
      "loss": 0.7584,
      "step": 849140
    },
    {
      "epoch": 2.976059048039618,
      "grad_norm": 3.390625,
      "learning_rate": 4.434163830411406e-07,
      "loss": 0.8135,
      "step": 849150
    },
    {
      "epoch": 2.976094095546513,
      "grad_norm": 2.96875,
      "learning_rate": 4.4276735437743875e-07,
      "loss": 0.7997,
      "step": 849160
    },
    {
      "epoch": 2.976129143053409,
      "grad_norm": 2.484375,
      "learning_rate": 4.4211832571373686e-07,
      "loss": 0.7757,
      "step": 849170
    },
    {
      "epoch": 2.9761641905603042,
      "grad_norm": 3.046875,
      "learning_rate": 4.414692970500349e-07,
      "loss": 0.8407,
      "step": 849180
    },
    {
      "epoch": 2.9761992380672,
      "grad_norm": 2.65625,
      "learning_rate": 4.4082026838633307e-07,
      "loss": 0.8494,
      "step": 849190
    },
    {
      "epoch": 2.9762342855740958,
      "grad_norm": 2.875,
      "learning_rate": 4.401712397226311e-07,
      "loss": 0.8123,
      "step": 849200
    },
    {
      "epoch": 2.9762693330809915,
      "grad_norm": 3.0,
      "learning_rate": 4.3952221105892923e-07,
      "loss": 0.808,
      "step": 849210
    },
    {
      "epoch": 2.976304380587887,
      "grad_norm": 3.109375,
      "learning_rate": 4.388731823952273e-07,
      "loss": 0.7914,
      "step": 849220
    },
    {
      "epoch": 2.9763394280947826,
      "grad_norm": 2.609375,
      "learning_rate": 4.3822415373152544e-07,
      "loss": 0.7373,
      "step": 849230
    },
    {
      "epoch": 2.976374475601678,
      "grad_norm": 3.375,
      "learning_rate": 4.375751250678235e-07,
      "loss": 0.8536,
      "step": 849240
    },
    {
      "epoch": 2.9764095231085737,
      "grad_norm": 2.859375,
      "learning_rate": 4.369260964041216e-07,
      "loss": 0.8473,
      "step": 849250
    },
    {
      "epoch": 2.9764445706154694,
      "grad_norm": 2.8125,
      "learning_rate": 4.3627706774041976e-07,
      "loss": 0.8141,
      "step": 849260
    },
    {
      "epoch": 2.9764796181223647,
      "grad_norm": 2.96875,
      "learning_rate": 4.356280390767178e-07,
      "loss": 0.7994,
      "step": 849270
    },
    {
      "epoch": 2.9765146656292605,
      "grad_norm": 2.9375,
      "learning_rate": 4.3497901041301586e-07,
      "loss": 0.7961,
      "step": 849280
    },
    {
      "epoch": 2.976549713136156,
      "grad_norm": 2.390625,
      "learning_rate": 4.34329981749314e-07,
      "loss": 0.791,
      "step": 849290
    },
    {
      "epoch": 2.9765847606430516,
      "grad_norm": 2.296875,
      "learning_rate": 4.3368095308561213e-07,
      "loss": 0.7011,
      "step": 849300
    },
    {
      "epoch": 2.9766198081499473,
      "grad_norm": 2.609375,
      "learning_rate": 4.330319244219102e-07,
      "loss": 0.8115,
      "step": 849310
    },
    {
      "epoch": 2.976654855656843,
      "grad_norm": 2.859375,
      "learning_rate": 4.3238289575820823e-07,
      "loss": 0.8613,
      "step": 849320
    },
    {
      "epoch": 2.9766899031637384,
      "grad_norm": 2.703125,
      "learning_rate": 4.317338670945064e-07,
      "loss": 0.882,
      "step": 849330
    },
    {
      "epoch": 2.976724950670634,
      "grad_norm": 2.53125,
      "learning_rate": 4.310848384308045e-07,
      "loss": 0.8661,
      "step": 849340
    },
    {
      "epoch": 2.9767599981775295,
      "grad_norm": 2.5,
      "learning_rate": 4.3043580976710255e-07,
      "loss": 0.8056,
      "step": 849350
    },
    {
      "epoch": 2.976795045684425,
      "grad_norm": 3.53125,
      "learning_rate": 4.297867811034007e-07,
      "loss": 0.7813,
      "step": 849360
    },
    {
      "epoch": 2.976830093191321,
      "grad_norm": 2.59375,
      "learning_rate": 4.2913775243969876e-07,
      "loss": 0.7654,
      "step": 849370
    },
    {
      "epoch": 2.9768651406982163,
      "grad_norm": 2.796875,
      "learning_rate": 4.2848872377599687e-07,
      "loss": 0.7906,
      "step": 849380
    },
    {
      "epoch": 2.976900188205112,
      "grad_norm": 3.328125,
      "learning_rate": 4.2783969511229497e-07,
      "loss": 0.8165,
      "step": 849390
    },
    {
      "epoch": 2.9769352357120074,
      "grad_norm": 2.609375,
      "learning_rate": 4.271906664485931e-07,
      "loss": 0.7152,
      "step": 849400
    },
    {
      "epoch": 2.976970283218903,
      "grad_norm": 2.765625,
      "learning_rate": 4.2654163778489113e-07,
      "loss": 0.813,
      "step": 849410
    },
    {
      "epoch": 2.977005330725799,
      "grad_norm": 2.421875,
      "learning_rate": 4.258926091211892e-07,
      "loss": 0.7991,
      "step": 849420
    },
    {
      "epoch": 2.9770403782326946,
      "grad_norm": 3.75,
      "learning_rate": 4.2524358045748734e-07,
      "loss": 0.8054,
      "step": 849430
    },
    {
      "epoch": 2.97707542573959,
      "grad_norm": 2.578125,
      "learning_rate": 4.2459455179378545e-07,
      "loss": 0.8876,
      "step": 849440
    },
    {
      "epoch": 2.9771104732464857,
      "grad_norm": 2.71875,
      "learning_rate": 4.239455231300835e-07,
      "loss": 0.7962,
      "step": 849450
    },
    {
      "epoch": 2.977145520753381,
      "grad_norm": 3.5,
      "learning_rate": 4.2329649446638166e-07,
      "loss": 0.8851,
      "step": 849460
    },
    {
      "epoch": 2.9771805682602768,
      "grad_norm": 2.953125,
      "learning_rate": 4.226474658026797e-07,
      "loss": 0.8014,
      "step": 849470
    },
    {
      "epoch": 2.9772156157671725,
      "grad_norm": 3.203125,
      "learning_rate": 4.219984371389778e-07,
      "loss": 0.8007,
      "step": 849480
    },
    {
      "epoch": 2.977250663274068,
      "grad_norm": 2.71875,
      "learning_rate": 4.21349408475276e-07,
      "loss": 0.7564,
      "step": 849490
    },
    {
      "epoch": 2.9772857107809636,
      "grad_norm": 3.046875,
      "learning_rate": 4.2070037981157403e-07,
      "loss": 0.763,
      "step": 849500
    },
    {
      "epoch": 2.9773207582878594,
      "grad_norm": 2.890625,
      "learning_rate": 4.200513511478721e-07,
      "loss": 0.8673,
      "step": 849510
    },
    {
      "epoch": 2.9773558057947547,
      "grad_norm": 3.03125,
      "learning_rate": 4.194023224841702e-07,
      "loss": 0.8515,
      "step": 849520
    },
    {
      "epoch": 2.9773908533016504,
      "grad_norm": 2.96875,
      "learning_rate": 4.1875329382046835e-07,
      "loss": 0.7939,
      "step": 849530
    },
    {
      "epoch": 2.977425900808546,
      "grad_norm": 3.046875,
      "learning_rate": 4.181042651567664e-07,
      "loss": 0.8176,
      "step": 849540
    },
    {
      "epoch": 2.9774609483154415,
      "grad_norm": 2.890625,
      "learning_rate": 4.1745523649306445e-07,
      "loss": 0.7237,
      "step": 849550
    },
    {
      "epoch": 2.9774959958223373,
      "grad_norm": 3.40625,
      "learning_rate": 4.168062078293626e-07,
      "loss": 0.8488,
      "step": 849560
    },
    {
      "epoch": 2.9775310433292326,
      "grad_norm": 2.953125,
      "learning_rate": 4.161571791656607e-07,
      "loss": 0.7225,
      "step": 849570
    },
    {
      "epoch": 2.9775660908361283,
      "grad_norm": 3.359375,
      "learning_rate": 4.1550815050195877e-07,
      "loss": 0.9057,
      "step": 849580
    },
    {
      "epoch": 2.977601138343024,
      "grad_norm": 3.25,
      "learning_rate": 4.1485912183825693e-07,
      "loss": 0.7941,
      "step": 849590
    },
    {
      "epoch": 2.9776361858499194,
      "grad_norm": 2.84375,
      "learning_rate": 4.14210093174555e-07,
      "loss": 0.7901,
      "step": 849600
    },
    {
      "epoch": 2.977671233356815,
      "grad_norm": 2.84375,
      "learning_rate": 4.135610645108531e-07,
      "loss": 0.893,
      "step": 849610
    },
    {
      "epoch": 2.977706280863711,
      "grad_norm": 3.34375,
      "learning_rate": 4.1291203584715114e-07,
      "loss": 0.8361,
      "step": 849620
    },
    {
      "epoch": 2.9777413283706062,
      "grad_norm": 2.6875,
      "learning_rate": 4.122630071834493e-07,
      "loss": 0.7614,
      "step": 849630
    },
    {
      "epoch": 2.977776375877502,
      "grad_norm": 2.921875,
      "learning_rate": 4.1161397851974735e-07,
      "loss": 0.8296,
      "step": 849640
    },
    {
      "epoch": 2.9778114233843977,
      "grad_norm": 2.765625,
      "learning_rate": 4.1096494985604546e-07,
      "loss": 0.8426,
      "step": 849650
    },
    {
      "epoch": 2.977846470891293,
      "grad_norm": 3.125,
      "learning_rate": 4.103159211923436e-07,
      "loss": 0.7891,
      "step": 849660
    },
    {
      "epoch": 2.977881518398189,
      "grad_norm": 3.15625,
      "learning_rate": 4.0966689252864167e-07,
      "loss": 0.8276,
      "step": 849670
    },
    {
      "epoch": 2.977916565905084,
      "grad_norm": 2.59375,
      "learning_rate": 4.090178638649397e-07,
      "loss": 0.8267,
      "step": 849680
    },
    {
      "epoch": 2.97795161341198,
      "grad_norm": 2.875,
      "learning_rate": 4.083688352012379e-07,
      "loss": 0.7446,
      "step": 849690
    },
    {
      "epoch": 2.9779866609188756,
      "grad_norm": 3.046875,
      "learning_rate": 4.0771980653753593e-07,
      "loss": 0.7508,
      "step": 849700
    },
    {
      "epoch": 2.9780217084257714,
      "grad_norm": 2.8125,
      "learning_rate": 4.0707077787383404e-07,
      "loss": 0.7772,
      "step": 849710
    },
    {
      "epoch": 2.9780567559326667,
      "grad_norm": 3.21875,
      "learning_rate": 4.064217492101321e-07,
      "loss": 0.782,
      "step": 849720
    },
    {
      "epoch": 2.9780918034395625,
      "grad_norm": 2.734375,
      "learning_rate": 4.0577272054643025e-07,
      "loss": 0.878,
      "step": 849730
    },
    {
      "epoch": 2.978126850946458,
      "grad_norm": 3.078125,
      "learning_rate": 4.051236918827283e-07,
      "loss": 0.8219,
      "step": 849740
    },
    {
      "epoch": 2.9781618984533536,
      "grad_norm": 3.078125,
      "learning_rate": 4.044746632190264e-07,
      "loss": 0.7984,
      "step": 849750
    },
    {
      "epoch": 2.9781969459602493,
      "grad_norm": 3.015625,
      "learning_rate": 4.0382563455532457e-07,
      "loss": 0.8761,
      "step": 849760
    },
    {
      "epoch": 2.9782319934671446,
      "grad_norm": 3.046875,
      "learning_rate": 4.031766058916226e-07,
      "loss": 0.7799,
      "step": 849770
    },
    {
      "epoch": 2.9782670409740404,
      "grad_norm": 2.84375,
      "learning_rate": 4.025275772279207e-07,
      "loss": 0.8056,
      "step": 849780
    },
    {
      "epoch": 2.9783020884809357,
      "grad_norm": 2.890625,
      "learning_rate": 4.0187854856421883e-07,
      "loss": 0.7524,
      "step": 849790
    },
    {
      "epoch": 2.9783371359878315,
      "grad_norm": 3.0,
      "learning_rate": 4.0122951990051694e-07,
      "loss": 0.8112,
      "step": 849800
    },
    {
      "epoch": 2.978372183494727,
      "grad_norm": 2.78125,
      "learning_rate": 4.00580491236815e-07,
      "loss": 0.8031,
      "step": 849810
    },
    {
      "epoch": 2.978407231001623,
      "grad_norm": 3.203125,
      "learning_rate": 3.9993146257311304e-07,
      "loss": 0.776,
      "step": 849820
    },
    {
      "epoch": 2.9784422785085183,
      "grad_norm": 2.75,
      "learning_rate": 3.992824339094112e-07,
      "loss": 0.8132,
      "step": 849830
    },
    {
      "epoch": 2.978477326015414,
      "grad_norm": 2.671875,
      "learning_rate": 3.986334052457093e-07,
      "loss": 0.7625,
      "step": 849840
    },
    {
      "epoch": 2.9785123735223094,
      "grad_norm": 3.390625,
      "learning_rate": 3.9798437658200736e-07,
      "loss": 0.8048,
      "step": 849850
    },
    {
      "epoch": 2.978547421029205,
      "grad_norm": 2.734375,
      "learning_rate": 3.973353479183055e-07,
      "loss": 0.7666,
      "step": 849860
    },
    {
      "epoch": 2.978582468536101,
      "grad_norm": 2.5,
      "learning_rate": 3.966863192546036e-07,
      "loss": 0.8048,
      "step": 849870
    },
    {
      "epoch": 2.978617516042996,
      "grad_norm": 2.71875,
      "learning_rate": 3.960372905909017e-07,
      "loss": 0.8122,
      "step": 849880
    },
    {
      "epoch": 2.978652563549892,
      "grad_norm": 3.09375,
      "learning_rate": 3.9538826192719973e-07,
      "loss": 0.8283,
      "step": 849890
    },
    {
      "epoch": 2.9786876110567873,
      "grad_norm": 3.265625,
      "learning_rate": 3.947392332634979e-07,
      "loss": 0.8288,
      "step": 849900
    },
    {
      "epoch": 2.978722658563683,
      "grad_norm": 2.625,
      "learning_rate": 3.9409020459979594e-07,
      "loss": 0.6969,
      "step": 849910
    },
    {
      "epoch": 2.9787577060705788,
      "grad_norm": 2.953125,
      "learning_rate": 3.9344117593609405e-07,
      "loss": 0.8102,
      "step": 849920
    },
    {
      "epoch": 2.9787927535774745,
      "grad_norm": 2.25,
      "learning_rate": 3.927921472723922e-07,
      "loss": 0.7515,
      "step": 849930
    },
    {
      "epoch": 2.97882780108437,
      "grad_norm": 3.15625,
      "learning_rate": 3.9214311860869026e-07,
      "loss": 0.7715,
      "step": 849940
    },
    {
      "epoch": 2.9788628485912656,
      "grad_norm": 3.484375,
      "learning_rate": 3.914940899449883e-07,
      "loss": 0.8632,
      "step": 849950
    },
    {
      "epoch": 2.978897896098161,
      "grad_norm": 3.328125,
      "learning_rate": 3.9084506128128647e-07,
      "loss": 0.7902,
      "step": 849960
    },
    {
      "epoch": 2.9789329436050567,
      "grad_norm": 2.828125,
      "learning_rate": 3.901960326175846e-07,
      "loss": 0.7553,
      "step": 849970
    },
    {
      "epoch": 2.9789679911119524,
      "grad_norm": 3.453125,
      "learning_rate": 3.8954700395388263e-07,
      "loss": 0.8694,
      "step": 849980
    },
    {
      "epoch": 2.9790030386188477,
      "grad_norm": 2.703125,
      "learning_rate": 3.8889797529018074e-07,
      "loss": 0.7713,
      "step": 849990
    },
    {
      "epoch": 2.9790380861257435,
      "grad_norm": 3.046875,
      "learning_rate": 3.8824894662647884e-07,
      "loss": 0.7238,
      "step": 850000
    },
    {
      "epoch": 2.9790380861257435,
      "eval_loss": 0.7491686344146729,
      "eval_runtime": 555.4104,
      "eval_samples_per_second": 684.964,
      "eval_steps_per_second": 57.08,
      "step": 850000
    },
    {
      "epoch": 2.979073133632639,
      "grad_norm": 2.84375,
      "learning_rate": 3.875999179627769e-07,
      "loss": 0.8427,
      "step": 850010
    },
    {
      "epoch": 2.9791081811395346,
      "grad_norm": 2.640625,
      "learning_rate": 3.8695088929907505e-07,
      "loss": 0.8769,
      "step": 850020
    },
    {
      "epoch": 2.9791432286464303,
      "grad_norm": 2.71875,
      "learning_rate": 3.863018606353731e-07,
      "loss": 0.768,
      "step": 850030
    },
    {
      "epoch": 2.979178276153326,
      "grad_norm": 2.703125,
      "learning_rate": 3.856528319716712e-07,
      "loss": 0.7981,
      "step": 850040
    },
    {
      "epoch": 2.9792133236602214,
      "grad_norm": 2.5,
      "learning_rate": 3.850038033079693e-07,
      "loss": 0.8078,
      "step": 850050
    },
    {
      "epoch": 2.979248371167117,
      "grad_norm": 2.734375,
      "learning_rate": 3.843547746442674e-07,
      "loss": 0.8152,
      "step": 850060
    },
    {
      "epoch": 2.9792834186740125,
      "grad_norm": 3.0625,
      "learning_rate": 3.8370574598056553e-07,
      "loss": 0.7092,
      "step": 850070
    },
    {
      "epoch": 2.9793184661809082,
      "grad_norm": 3.453125,
      "learning_rate": 3.830567173168636e-07,
      "loss": 0.8947,
      "step": 850080
    },
    {
      "epoch": 2.979353513687804,
      "grad_norm": 3.21875,
      "learning_rate": 3.824076886531617e-07,
      "loss": 0.7622,
      "step": 850090
    },
    {
      "epoch": 2.9793885611946993,
      "grad_norm": 3.171875,
      "learning_rate": 3.817586599894598e-07,
      "loss": 0.7591,
      "step": 850100
    },
    {
      "epoch": 2.979423608701595,
      "grad_norm": 2.9375,
      "learning_rate": 3.811096313257579e-07,
      "loss": 0.8186,
      "step": 850110
    },
    {
      "epoch": 2.9794586562084904,
      "grad_norm": 2.859375,
      "learning_rate": 3.80460602662056e-07,
      "loss": 0.7777,
      "step": 850120
    },
    {
      "epoch": 2.979493703715386,
      "grad_norm": 3.09375,
      "learning_rate": 3.7981157399835406e-07,
      "loss": 0.8346,
      "step": 850130
    },
    {
      "epoch": 2.979528751222282,
      "grad_norm": 3.046875,
      "learning_rate": 3.7916254533465217e-07,
      "loss": 0.7682,
      "step": 850140
    },
    {
      "epoch": 2.9795637987291776,
      "grad_norm": 2.953125,
      "learning_rate": 3.7851351667095027e-07,
      "loss": 0.8084,
      "step": 850150
    },
    {
      "epoch": 2.979598846236073,
      "grad_norm": 2.921875,
      "learning_rate": 3.778644880072484e-07,
      "loss": 0.8019,
      "step": 850160
    },
    {
      "epoch": 2.9796338937429687,
      "grad_norm": 2.734375,
      "learning_rate": 3.772154593435465e-07,
      "loss": 0.8187,
      "step": 850170
    },
    {
      "epoch": 2.979668941249864,
      "grad_norm": 2.8125,
      "learning_rate": 3.7656643067984454e-07,
      "loss": 0.813,
      "step": 850180
    },
    {
      "epoch": 2.97970398875676,
      "grad_norm": 3.59375,
      "learning_rate": 3.7591740201614264e-07,
      "loss": 0.7807,
      "step": 850190
    },
    {
      "epoch": 2.9797390362636555,
      "grad_norm": 3.1875,
      "learning_rate": 3.7526837335244075e-07,
      "loss": 0.8739,
      "step": 850200
    },
    {
      "epoch": 2.979774083770551,
      "grad_norm": 3.234375,
      "learning_rate": 3.7461934468873885e-07,
      "loss": 0.7334,
      "step": 850210
    },
    {
      "epoch": 2.9798091312774466,
      "grad_norm": 2.46875,
      "learning_rate": 3.7397031602503696e-07,
      "loss": 0.7815,
      "step": 850220
    },
    {
      "epoch": 2.979844178784342,
      "grad_norm": 2.734375,
      "learning_rate": 3.73321287361335e-07,
      "loss": 0.7269,
      "step": 850230
    },
    {
      "epoch": 2.9798792262912377,
      "grad_norm": 2.640625,
      "learning_rate": 3.7267225869763317e-07,
      "loss": 0.8202,
      "step": 850240
    },
    {
      "epoch": 2.9799142737981335,
      "grad_norm": 2.84375,
      "learning_rate": 3.720232300339312e-07,
      "loss": 0.7496,
      "step": 850250
    },
    {
      "epoch": 2.979949321305029,
      "grad_norm": 2.984375,
      "learning_rate": 3.7137420137022933e-07,
      "loss": 0.7947,
      "step": 850260
    },
    {
      "epoch": 2.9799843688119245,
      "grad_norm": 2.875,
      "learning_rate": 3.7072517270652743e-07,
      "loss": 0.8197,
      "step": 850270
    },
    {
      "epoch": 2.9800194163188203,
      "grad_norm": 3.390625,
      "learning_rate": 3.7007614404282554e-07,
      "loss": 0.7961,
      "step": 850280
    },
    {
      "epoch": 2.9800544638257156,
      "grad_norm": 2.65625,
      "learning_rate": 3.6942711537912365e-07,
      "loss": 0.8152,
      "step": 850290
    },
    {
      "epoch": 2.9800895113326114,
      "grad_norm": 3.046875,
      "learning_rate": 3.687780867154217e-07,
      "loss": 0.8008,
      "step": 850300
    },
    {
      "epoch": 2.980124558839507,
      "grad_norm": 2.953125,
      "learning_rate": 3.681290580517198e-07,
      "loss": 0.8275,
      "step": 850310
    },
    {
      "epoch": 2.9801596063464024,
      "grad_norm": 2.8125,
      "learning_rate": 3.674800293880179e-07,
      "loss": 0.7638,
      "step": 850320
    },
    {
      "epoch": 2.980194653853298,
      "grad_norm": 3.453125,
      "learning_rate": 3.66831000724316e-07,
      "loss": 0.8217,
      "step": 850330
    },
    {
      "epoch": 2.9802297013601935,
      "grad_norm": 2.578125,
      "learning_rate": 3.661819720606141e-07,
      "loss": 0.691,
      "step": 850340
    },
    {
      "epoch": 2.9802647488670893,
      "grad_norm": 2.5,
      "learning_rate": 3.655329433969122e-07,
      "loss": 0.8289,
      "step": 850350
    },
    {
      "epoch": 2.980299796373985,
      "grad_norm": 2.90625,
      "learning_rate": 3.648839147332103e-07,
      "loss": 0.8227,
      "step": 850360
    },
    {
      "epoch": 2.9803348438808808,
      "grad_norm": 2.953125,
      "learning_rate": 3.642348860695084e-07,
      "loss": 0.8327,
      "step": 850370
    },
    {
      "epoch": 2.980369891387776,
      "grad_norm": 2.75,
      "learning_rate": 3.635858574058065e-07,
      "loss": 0.7506,
      "step": 850380
    },
    {
      "epoch": 2.980404938894672,
      "grad_norm": 3.046875,
      "learning_rate": 3.629368287421046e-07,
      "loss": 0.721,
      "step": 850390
    },
    {
      "epoch": 2.980439986401567,
      "grad_norm": 3.453125,
      "learning_rate": 3.6228780007840265e-07,
      "loss": 0.7264,
      "step": 850400
    },
    {
      "epoch": 2.980475033908463,
      "grad_norm": 3.359375,
      "learning_rate": 3.6163877141470076e-07,
      "loss": 0.8503,
      "step": 850410
    },
    {
      "epoch": 2.9805100814153587,
      "grad_norm": 3.171875,
      "learning_rate": 3.609897427509989e-07,
      "loss": 0.8972,
      "step": 850420
    },
    {
      "epoch": 2.980545128922254,
      "grad_norm": 2.625,
      "learning_rate": 3.6034071408729697e-07,
      "loss": 0.6844,
      "step": 850430
    },
    {
      "epoch": 2.9805801764291497,
      "grad_norm": 3.328125,
      "learning_rate": 3.596916854235951e-07,
      "loss": 0.8008,
      "step": 850440
    },
    {
      "epoch": 2.980615223936045,
      "grad_norm": 2.65625,
      "learning_rate": 3.590426567598931e-07,
      "loss": 0.7605,
      "step": 850450
    },
    {
      "epoch": 2.980650271442941,
      "grad_norm": 3.296875,
      "learning_rate": 3.5839362809619123e-07,
      "loss": 0.7787,
      "step": 850460
    },
    {
      "epoch": 2.9806853189498366,
      "grad_norm": 3.1875,
      "learning_rate": 3.577445994324894e-07,
      "loss": 0.79,
      "step": 850470
    },
    {
      "epoch": 2.9807203664567323,
      "grad_norm": 3.34375,
      "learning_rate": 3.5709557076878744e-07,
      "loss": 0.9049,
      "step": 850480
    },
    {
      "epoch": 2.9807554139636276,
      "grad_norm": 2.78125,
      "learning_rate": 3.5644654210508555e-07,
      "loss": 0.775,
      "step": 850490
    },
    {
      "epoch": 2.9807904614705234,
      "grad_norm": 2.40625,
      "learning_rate": 3.557975134413836e-07,
      "loss": 0.8324,
      "step": 850500
    },
    {
      "epoch": 2.9808255089774187,
      "grad_norm": 3.234375,
      "learning_rate": 3.5514848477768176e-07,
      "loss": 0.8024,
      "step": 850510
    },
    {
      "epoch": 2.9808605564843145,
      "grad_norm": 2.71875,
      "learning_rate": 3.5449945611397987e-07,
      "loss": 0.8401,
      "step": 850520
    },
    {
      "epoch": 2.9808956039912102,
      "grad_norm": 2.71875,
      "learning_rate": 3.538504274502779e-07,
      "loss": 0.7754,
      "step": 850530
    },
    {
      "epoch": 2.9809306514981055,
      "grad_norm": 2.734375,
      "learning_rate": 3.53201398786576e-07,
      "loss": 0.7957,
      "step": 850540
    },
    {
      "epoch": 2.9809656990050013,
      "grad_norm": 2.703125,
      "learning_rate": 3.5255237012287413e-07,
      "loss": 0.7219,
      "step": 850550
    },
    {
      "epoch": 2.9810007465118966,
      "grad_norm": 3.375,
      "learning_rate": 3.5190334145917224e-07,
      "loss": 0.8483,
      "step": 850560
    },
    {
      "epoch": 2.9810357940187924,
      "grad_norm": 3.265625,
      "learning_rate": 3.5125431279547034e-07,
      "loss": 0.8417,
      "step": 850570
    },
    {
      "epoch": 2.981070841525688,
      "grad_norm": 2.75,
      "learning_rate": 3.506052841317684e-07,
      "loss": 0.772,
      "step": 850580
    },
    {
      "epoch": 2.981105889032584,
      "grad_norm": 2.984375,
      "learning_rate": 3.499562554680665e-07,
      "loss": 0.7675,
      "step": 850590
    },
    {
      "epoch": 2.981140936539479,
      "grad_norm": 2.5625,
      "learning_rate": 3.493072268043646e-07,
      "loss": 0.7679,
      "step": 850600
    },
    {
      "epoch": 2.981175984046375,
      "grad_norm": 3.015625,
      "learning_rate": 3.486581981406627e-07,
      "loss": 0.8536,
      "step": 850610
    },
    {
      "epoch": 2.9812110315532703,
      "grad_norm": 2.984375,
      "learning_rate": 3.480091694769608e-07,
      "loss": 0.7839,
      "step": 850620
    },
    {
      "epoch": 2.981246079060166,
      "grad_norm": 3.0,
      "learning_rate": 3.4736014081325887e-07,
      "loss": 0.8321,
      "step": 850630
    },
    {
      "epoch": 2.981281126567062,
      "grad_norm": 2.875,
      "learning_rate": 3.46711112149557e-07,
      "loss": 0.84,
      "step": 850640
    },
    {
      "epoch": 2.981316174073957,
      "grad_norm": 2.78125,
      "learning_rate": 3.460620834858551e-07,
      "loss": 0.7769,
      "step": 850650
    },
    {
      "epoch": 2.981351221580853,
      "grad_norm": 3.390625,
      "learning_rate": 3.454130548221532e-07,
      "loss": 0.8285,
      "step": 850660
    },
    {
      "epoch": 2.981386269087748,
      "grad_norm": 2.96875,
      "learning_rate": 3.447640261584513e-07,
      "loss": 0.7557,
      "step": 850670
    },
    {
      "epoch": 2.981421316594644,
      "grad_norm": 2.796875,
      "learning_rate": 3.4411499749474935e-07,
      "loss": 0.7568,
      "step": 850680
    },
    {
      "epoch": 2.9814563641015397,
      "grad_norm": 2.875,
      "learning_rate": 3.434659688310475e-07,
      "loss": 0.7912,
      "step": 850690
    },
    {
      "epoch": 2.9814914116084354,
      "grad_norm": 2.890625,
      "learning_rate": 3.4281694016734556e-07,
      "loss": 0.8396,
      "step": 850700
    },
    {
      "epoch": 2.9815264591153308,
      "grad_norm": 3.15625,
      "learning_rate": 3.4216791150364366e-07,
      "loss": 0.7431,
      "step": 850710
    },
    {
      "epoch": 2.9815615066222265,
      "grad_norm": 2.734375,
      "learning_rate": 3.4151888283994177e-07,
      "loss": 0.8355,
      "step": 850720
    },
    {
      "epoch": 2.981596554129122,
      "grad_norm": 2.59375,
      "learning_rate": 3.408698541762398e-07,
      "loss": 0.8095,
      "step": 850730
    },
    {
      "epoch": 2.9816316016360176,
      "grad_norm": 2.828125,
      "learning_rate": 3.40220825512538e-07,
      "loss": 0.713,
      "step": 850740
    },
    {
      "epoch": 2.9816666491429134,
      "grad_norm": 3.09375,
      "learning_rate": 3.3957179684883603e-07,
      "loss": 0.8615,
      "step": 850750
    },
    {
      "epoch": 2.9817016966498087,
      "grad_norm": 2.75,
      "learning_rate": 3.3892276818513414e-07,
      "loss": 0.7763,
      "step": 850760
    },
    {
      "epoch": 2.9817367441567044,
      "grad_norm": 3.34375,
      "learning_rate": 3.3827373952143225e-07,
      "loss": 0.8003,
      "step": 850770
    },
    {
      "epoch": 2.9817717916635997,
      "grad_norm": 2.921875,
      "learning_rate": 3.3762471085773035e-07,
      "loss": 0.8419,
      "step": 850780
    },
    {
      "epoch": 2.9818068391704955,
      "grad_norm": 3.21875,
      "learning_rate": 3.3697568219402846e-07,
      "loss": 0.8318,
      "step": 850790
    },
    {
      "epoch": 2.9818418866773913,
      "grad_norm": 3.203125,
      "learning_rate": 3.363266535303265e-07,
      "loss": 0.846,
      "step": 850800
    },
    {
      "epoch": 2.981876934184287,
      "grad_norm": 2.53125,
      "learning_rate": 3.356776248666246e-07,
      "loss": 0.7885,
      "step": 850810
    },
    {
      "epoch": 2.9819119816911823,
      "grad_norm": 3.078125,
      "learning_rate": 3.350285962029227e-07,
      "loss": 0.8681,
      "step": 850820
    },
    {
      "epoch": 2.981947029198078,
      "grad_norm": 3.21875,
      "learning_rate": 3.3437956753922083e-07,
      "loss": 0.8252,
      "step": 850830
    },
    {
      "epoch": 2.9819820767049734,
      "grad_norm": 2.8125,
      "learning_rate": 3.3373053887551893e-07,
      "loss": 0.8237,
      "step": 850840
    },
    {
      "epoch": 2.982017124211869,
      "grad_norm": 2.984375,
      "learning_rate": 3.33081510211817e-07,
      "loss": 0.7251,
      "step": 850850
    },
    {
      "epoch": 2.982052171718765,
      "grad_norm": 2.921875,
      "learning_rate": 3.324324815481151e-07,
      "loss": 0.7691,
      "step": 850860
    },
    {
      "epoch": 2.9820872192256602,
      "grad_norm": 2.96875,
      "learning_rate": 3.317834528844132e-07,
      "loss": 0.7739,
      "step": 850870
    },
    {
      "epoch": 2.982122266732556,
      "grad_norm": 3.0,
      "learning_rate": 3.311344242207113e-07,
      "loss": 0.7547,
      "step": 850880
    },
    {
      "epoch": 2.9821573142394517,
      "grad_norm": 2.84375,
      "learning_rate": 3.304853955570094e-07,
      "loss": 0.7789,
      "step": 850890
    },
    {
      "epoch": 2.982192361746347,
      "grad_norm": 2.8125,
      "learning_rate": 3.2983636689330746e-07,
      "loss": 0.7679,
      "step": 850900
    },
    {
      "epoch": 2.982227409253243,
      "grad_norm": 2.859375,
      "learning_rate": 3.2918733822960557e-07,
      "loss": 0.8492,
      "step": 850910
    },
    {
      "epoch": 2.9822624567601386,
      "grad_norm": 2.890625,
      "learning_rate": 3.285383095659037e-07,
      "loss": 0.7672,
      "step": 850920
    },
    {
      "epoch": 2.982297504267034,
      "grad_norm": 2.8125,
      "learning_rate": 3.278892809022018e-07,
      "loss": 0.7572,
      "step": 850930
    },
    {
      "epoch": 2.9823325517739296,
      "grad_norm": 2.875,
      "learning_rate": 3.272402522384999e-07,
      "loss": 0.7818,
      "step": 850940
    },
    {
      "epoch": 2.982367599280825,
      "grad_norm": 3.265625,
      "learning_rate": 3.2659122357479794e-07,
      "loss": 0.7947,
      "step": 850950
    },
    {
      "epoch": 2.9824026467877207,
      "grad_norm": 2.90625,
      "learning_rate": 3.259421949110961e-07,
      "loss": 0.8642,
      "step": 850960
    },
    {
      "epoch": 2.9824376942946165,
      "grad_norm": 2.796875,
      "learning_rate": 3.2529316624739415e-07,
      "loss": 0.8458,
      "step": 850970
    },
    {
      "epoch": 2.982472741801512,
      "grad_norm": 3.078125,
      "learning_rate": 3.2464413758369226e-07,
      "loss": 0.7892,
      "step": 850980
    },
    {
      "epoch": 2.9825077893084075,
      "grad_norm": 2.8125,
      "learning_rate": 3.2399510891999036e-07,
      "loss": 0.7526,
      "step": 850990
    },
    {
      "epoch": 2.9825428368153033,
      "grad_norm": 3.1875,
      "learning_rate": 3.2334608025628847e-07,
      "loss": 0.7468,
      "step": 851000
    },
    {
      "epoch": 2.9825778843221986,
      "grad_norm": 2.28125,
      "learning_rate": 3.2269705159258657e-07,
      "loss": 0.812,
      "step": 851010
    },
    {
      "epoch": 2.9826129318290944,
      "grad_norm": 3.171875,
      "learning_rate": 3.220480229288846e-07,
      "loss": 0.7665,
      "step": 851020
    },
    {
      "epoch": 2.98264797933599,
      "grad_norm": 3.015625,
      "learning_rate": 3.2139899426518273e-07,
      "loss": 0.7448,
      "step": 851030
    },
    {
      "epoch": 2.9826830268428854,
      "grad_norm": 3.265625,
      "learning_rate": 3.2074996560148084e-07,
      "loss": 0.843,
      "step": 851040
    },
    {
      "epoch": 2.982718074349781,
      "grad_norm": 2.625,
      "learning_rate": 3.2010093693777894e-07,
      "loss": 0.7813,
      "step": 851050
    },
    {
      "epoch": 2.9827531218566765,
      "grad_norm": 3.078125,
      "learning_rate": 3.1945190827407705e-07,
      "loss": 0.7542,
      "step": 851060
    },
    {
      "epoch": 2.9827881693635723,
      "grad_norm": 3.09375,
      "learning_rate": 3.188028796103751e-07,
      "loss": 0.8694,
      "step": 851070
    },
    {
      "epoch": 2.982823216870468,
      "grad_norm": 3.109375,
      "learning_rate": 3.181538509466732e-07,
      "loss": 0.802,
      "step": 851080
    },
    {
      "epoch": 2.982858264377364,
      "grad_norm": 3.359375,
      "learning_rate": 3.175048222829713e-07,
      "loss": 0.8329,
      "step": 851090
    },
    {
      "epoch": 2.982893311884259,
      "grad_norm": 3.375,
      "learning_rate": 3.168557936192694e-07,
      "loss": 0.8467,
      "step": 851100
    },
    {
      "epoch": 2.982928359391155,
      "grad_norm": 2.875,
      "learning_rate": 3.162067649555675e-07,
      "loss": 0.7237,
      "step": 851110
    },
    {
      "epoch": 2.98296340689805,
      "grad_norm": 3.1875,
      "learning_rate": 3.155577362918656e-07,
      "loss": 0.8067,
      "step": 851120
    },
    {
      "epoch": 2.982998454404946,
      "grad_norm": 2.859375,
      "learning_rate": 3.149087076281637e-07,
      "loss": 0.7935,
      "step": 851130
    },
    {
      "epoch": 2.9830335019118417,
      "grad_norm": 3.234375,
      "learning_rate": 3.1425967896446184e-07,
      "loss": 0.7363,
      "step": 851140
    },
    {
      "epoch": 2.983068549418737,
      "grad_norm": 2.703125,
      "learning_rate": 3.136106503007599e-07,
      "loss": 0.7886,
      "step": 851150
    },
    {
      "epoch": 2.9831035969256328,
      "grad_norm": 3.140625,
      "learning_rate": 3.12961621637058e-07,
      "loss": 0.7349,
      "step": 851160
    },
    {
      "epoch": 2.983138644432528,
      "grad_norm": 3.171875,
      "learning_rate": 3.1231259297335605e-07,
      "loss": 0.7323,
      "step": 851170
    },
    {
      "epoch": 2.983173691939424,
      "grad_norm": 2.8125,
      "learning_rate": 3.1166356430965416e-07,
      "loss": 0.7548,
      "step": 851180
    },
    {
      "epoch": 2.9832087394463196,
      "grad_norm": 2.65625,
      "learning_rate": 3.110145356459523e-07,
      "loss": 0.7415,
      "step": 851190
    },
    {
      "epoch": 2.9832437869532153,
      "grad_norm": 2.84375,
      "learning_rate": 3.1036550698225037e-07,
      "loss": 0.779,
      "step": 851200
    },
    {
      "epoch": 2.9832788344601107,
      "grad_norm": 3.109375,
      "learning_rate": 3.097164783185485e-07,
      "loss": 0.8706,
      "step": 851210
    },
    {
      "epoch": 2.9833138819670064,
      "grad_norm": 3.015625,
      "learning_rate": 3.0906744965484653e-07,
      "loss": 0.8951,
      "step": 851220
    },
    {
      "epoch": 2.9833489294739017,
      "grad_norm": 3.109375,
      "learning_rate": 3.084184209911447e-07,
      "loss": 0.8782,
      "step": 851230
    },
    {
      "epoch": 2.9833839769807975,
      "grad_norm": 3.40625,
      "learning_rate": 3.077693923274428e-07,
      "loss": 0.7854,
      "step": 851240
    },
    {
      "epoch": 2.9834190244876932,
      "grad_norm": 2.5,
      "learning_rate": 3.0712036366374085e-07,
      "loss": 0.7454,
      "step": 851250
    },
    {
      "epoch": 2.9834540719945886,
      "grad_norm": 2.609375,
      "learning_rate": 3.0647133500003895e-07,
      "loss": 0.824,
      "step": 851260
    },
    {
      "epoch": 2.9834891195014843,
      "grad_norm": 2.875,
      "learning_rate": 3.0582230633633706e-07,
      "loss": 0.8352,
      "step": 851270
    },
    {
      "epoch": 2.9835241670083796,
      "grad_norm": 3.0,
      "learning_rate": 3.0517327767263516e-07,
      "loss": 0.7357,
      "step": 851280
    },
    {
      "epoch": 2.9835592145152754,
      "grad_norm": 3.28125,
      "learning_rate": 3.0452424900893327e-07,
      "loss": 0.8353,
      "step": 851290
    },
    {
      "epoch": 2.983594262022171,
      "grad_norm": 2.8125,
      "learning_rate": 3.038752203452313e-07,
      "loss": 0.8147,
      "step": 851300
    },
    {
      "epoch": 2.983629309529067,
      "grad_norm": 2.859375,
      "learning_rate": 3.0322619168152943e-07,
      "loss": 0.7801,
      "step": 851310
    },
    {
      "epoch": 2.9836643570359622,
      "grad_norm": 2.671875,
      "learning_rate": 3.0257716301782753e-07,
      "loss": 0.8504,
      "step": 851320
    },
    {
      "epoch": 2.983699404542858,
      "grad_norm": 2.90625,
      "learning_rate": 3.0192813435412564e-07,
      "loss": 0.7812,
      "step": 851330
    },
    {
      "epoch": 2.9837344520497533,
      "grad_norm": 3.1875,
      "learning_rate": 3.0127910569042375e-07,
      "loss": 0.6867,
      "step": 851340
    },
    {
      "epoch": 2.983769499556649,
      "grad_norm": 3.03125,
      "learning_rate": 3.006300770267218e-07,
      "loss": 0.8434,
      "step": 851350
    },
    {
      "epoch": 2.983804547063545,
      "grad_norm": 3.640625,
      "learning_rate": 2.999810483630199e-07,
      "loss": 0.9066,
      "step": 851360
    },
    {
      "epoch": 2.98383959457044,
      "grad_norm": 3.203125,
      "learning_rate": 2.99332019699318e-07,
      "loss": 0.8126,
      "step": 851370
    },
    {
      "epoch": 2.983874642077336,
      "grad_norm": 3.046875,
      "learning_rate": 2.986829910356161e-07,
      "loss": 0.7955,
      "step": 851380
    },
    {
      "epoch": 2.983909689584231,
      "grad_norm": 2.84375,
      "learning_rate": 2.980339623719142e-07,
      "loss": 0.7985,
      "step": 851390
    },
    {
      "epoch": 2.983944737091127,
      "grad_norm": 3.515625,
      "learning_rate": 2.973849337082123e-07,
      "loss": 0.8194,
      "step": 851400
    },
    {
      "epoch": 2.9839797845980227,
      "grad_norm": 2.484375,
      "learning_rate": 2.9673590504451043e-07,
      "loss": 0.7384,
      "step": 851410
    },
    {
      "epoch": 2.9840148321049185,
      "grad_norm": 3.109375,
      "learning_rate": 2.960868763808085e-07,
      "loss": 0.7726,
      "step": 851420
    },
    {
      "epoch": 2.984049879611814,
      "grad_norm": 2.609375,
      "learning_rate": 2.954378477171066e-07,
      "loss": 0.9176,
      "step": 851430
    },
    {
      "epoch": 2.9840849271187095,
      "grad_norm": 2.59375,
      "learning_rate": 2.947888190534047e-07,
      "loss": 0.8199,
      "step": 851440
    },
    {
      "epoch": 2.984119974625605,
      "grad_norm": 2.828125,
      "learning_rate": 2.941397903897028e-07,
      "loss": 0.8351,
      "step": 851450
    },
    {
      "epoch": 2.9841550221325006,
      "grad_norm": 2.953125,
      "learning_rate": 2.934907617260009e-07,
      "loss": 0.8126,
      "step": 851460
    },
    {
      "epoch": 2.9841900696393964,
      "grad_norm": 2.984375,
      "learning_rate": 2.9284173306229896e-07,
      "loss": 0.8365,
      "step": 851470
    },
    {
      "epoch": 2.9842251171462917,
      "grad_norm": 3.3125,
      "learning_rate": 2.9219270439859707e-07,
      "loss": 0.7601,
      "step": 851480
    },
    {
      "epoch": 2.9842601646531874,
      "grad_norm": 2.75,
      "learning_rate": 2.915436757348952e-07,
      "loss": 0.7831,
      "step": 851490
    },
    {
      "epoch": 2.9842952121600828,
      "grad_norm": 2.984375,
      "learning_rate": 2.908946470711933e-07,
      "loss": 0.7882,
      "step": 851500
    },
    {
      "epoch": 2.9843302596669785,
      "grad_norm": 2.8125,
      "learning_rate": 2.902456184074914e-07,
      "loss": 0.843,
      "step": 851510
    },
    {
      "epoch": 2.9843653071738743,
      "grad_norm": 2.921875,
      "learning_rate": 2.8959658974378944e-07,
      "loss": 0.768,
      "step": 851520
    },
    {
      "epoch": 2.98440035468077,
      "grad_norm": 2.875,
      "learning_rate": 2.8894756108008754e-07,
      "loss": 0.8485,
      "step": 851530
    },
    {
      "epoch": 2.9844354021876653,
      "grad_norm": 3.0625,
      "learning_rate": 2.8829853241638565e-07,
      "loss": 0.7887,
      "step": 851540
    },
    {
      "epoch": 2.984470449694561,
      "grad_norm": 2.546875,
      "learning_rate": 2.8764950375268376e-07,
      "loss": 0.7658,
      "step": 851550
    },
    {
      "epoch": 2.9845054972014564,
      "grad_norm": 2.671875,
      "learning_rate": 2.8700047508898186e-07,
      "loss": 0.8579,
      "step": 851560
    },
    {
      "epoch": 2.984540544708352,
      "grad_norm": 2.90625,
      "learning_rate": 2.863514464252799e-07,
      "loss": 0.7441,
      "step": 851570
    },
    {
      "epoch": 2.984575592215248,
      "grad_norm": 2.6875,
      "learning_rate": 2.85702417761578e-07,
      "loss": 0.7713,
      "step": 851580
    },
    {
      "epoch": 2.9846106397221432,
      "grad_norm": 2.734375,
      "learning_rate": 2.850533890978762e-07,
      "loss": 0.7979,
      "step": 851590
    },
    {
      "epoch": 2.984645687229039,
      "grad_norm": 3.3125,
      "learning_rate": 2.8440436043417423e-07,
      "loss": 0.8052,
      "step": 851600
    },
    {
      "epoch": 2.9846807347359343,
      "grad_norm": 2.578125,
      "learning_rate": 2.8375533177047234e-07,
      "loss": 0.809,
      "step": 851610
    },
    {
      "epoch": 2.98471578224283,
      "grad_norm": 3.375,
      "learning_rate": 2.831063031067704e-07,
      "loss": 0.9066,
      "step": 851620
    },
    {
      "epoch": 2.984750829749726,
      "grad_norm": 3.0,
      "learning_rate": 2.824572744430685e-07,
      "loss": 0.8556,
      "step": 851630
    },
    {
      "epoch": 2.9847858772566216,
      "grad_norm": 2.65625,
      "learning_rate": 2.818082457793666e-07,
      "loss": 0.7778,
      "step": 851640
    },
    {
      "epoch": 2.984820924763517,
      "grad_norm": 2.71875,
      "learning_rate": 2.811592171156647e-07,
      "loss": 0.8534,
      "step": 851650
    },
    {
      "epoch": 2.9848559722704127,
      "grad_norm": 2.65625,
      "learning_rate": 2.805101884519628e-07,
      "loss": 0.7991,
      "step": 851660
    },
    {
      "epoch": 2.984891019777308,
      "grad_norm": 3.109375,
      "learning_rate": 2.7986115978826087e-07,
      "loss": 0.7809,
      "step": 851670
    },
    {
      "epoch": 2.9849260672842037,
      "grad_norm": 3.359375,
      "learning_rate": 2.79212131124559e-07,
      "loss": 0.7784,
      "step": 851680
    },
    {
      "epoch": 2.9849611147910995,
      "grad_norm": 2.75,
      "learning_rate": 2.785631024608571e-07,
      "loss": 0.7547,
      "step": 851690
    },
    {
      "epoch": 2.984996162297995,
      "grad_norm": 3.296875,
      "learning_rate": 2.779140737971552e-07,
      "loss": 0.7915,
      "step": 851700
    },
    {
      "epoch": 2.9850312098048906,
      "grad_norm": 2.875,
      "learning_rate": 2.772650451334533e-07,
      "loss": 0.8151,
      "step": 851710
    },
    {
      "epoch": 2.985066257311786,
      "grad_norm": 2.78125,
      "learning_rate": 2.766160164697514e-07,
      "loss": 0.7558,
      "step": 851720
    },
    {
      "epoch": 2.9851013048186816,
      "grad_norm": 3.09375,
      "learning_rate": 2.759669878060495e-07,
      "loss": 0.826,
      "step": 851730
    },
    {
      "epoch": 2.9851363523255774,
      "grad_norm": 3.046875,
      "learning_rate": 2.7531795914234755e-07,
      "loss": 0.784,
      "step": 851740
    },
    {
      "epoch": 2.985171399832473,
      "grad_norm": 2.640625,
      "learning_rate": 2.7466893047864566e-07,
      "loss": 0.7835,
      "step": 851750
    },
    {
      "epoch": 2.9852064473393685,
      "grad_norm": 3.03125,
      "learning_rate": 2.7401990181494377e-07,
      "loss": 0.8098,
      "step": 851760
    },
    {
      "epoch": 2.985241494846264,
      "grad_norm": 2.75,
      "learning_rate": 2.7337087315124187e-07,
      "loss": 0.7993,
      "step": 851770
    },
    {
      "epoch": 2.9852765423531595,
      "grad_norm": 2.546875,
      "learning_rate": 2.7272184448754e-07,
      "loss": 0.809,
      "step": 851780
    },
    {
      "epoch": 2.9853115898600553,
      "grad_norm": 3.0625,
      "learning_rate": 2.7207281582383803e-07,
      "loss": 0.8377,
      "step": 851790
    },
    {
      "epoch": 2.985346637366951,
      "grad_norm": 3.3125,
      "learning_rate": 2.7142378716013614e-07,
      "loss": 0.8468,
      "step": 851800
    },
    {
      "epoch": 2.9853816848738464,
      "grad_norm": 2.703125,
      "learning_rate": 2.7077475849643424e-07,
      "loss": 0.7278,
      "step": 851810
    },
    {
      "epoch": 2.985416732380742,
      "grad_norm": 3.046875,
      "learning_rate": 2.7012572983273235e-07,
      "loss": 0.7457,
      "step": 851820
    },
    {
      "epoch": 2.9854517798876374,
      "grad_norm": 2.75,
      "learning_rate": 2.6947670116903045e-07,
      "loss": 0.7023,
      "step": 851830
    },
    {
      "epoch": 2.985486827394533,
      "grad_norm": 2.59375,
      "learning_rate": 2.688276725053285e-07,
      "loss": 0.8223,
      "step": 851840
    },
    {
      "epoch": 2.985521874901429,
      "grad_norm": 3.03125,
      "learning_rate": 2.681786438416266e-07,
      "loss": 0.7431,
      "step": 851850
    },
    {
      "epoch": 2.9855569224083247,
      "grad_norm": 2.84375,
      "learning_rate": 2.6752961517792477e-07,
      "loss": 0.827,
      "step": 851860
    },
    {
      "epoch": 2.98559196991522,
      "grad_norm": 3.265625,
      "learning_rate": 2.668805865142228e-07,
      "loss": 0.7951,
      "step": 851870
    },
    {
      "epoch": 2.985627017422116,
      "grad_norm": 3.15625,
      "learning_rate": 2.6623155785052093e-07,
      "loss": 0.8392,
      "step": 851880
    },
    {
      "epoch": 2.985662064929011,
      "grad_norm": 3.125,
      "learning_rate": 2.65582529186819e-07,
      "loss": 0.7341,
      "step": 851890
    },
    {
      "epoch": 2.985697112435907,
      "grad_norm": 2.71875,
      "learning_rate": 2.6493350052311714e-07,
      "loss": 0.7664,
      "step": 851900
    },
    {
      "epoch": 2.9857321599428026,
      "grad_norm": 2.328125,
      "learning_rate": 2.6428447185941525e-07,
      "loss": 0.7936,
      "step": 851910
    },
    {
      "epoch": 2.985767207449698,
      "grad_norm": 3.078125,
      "learning_rate": 2.636354431957133e-07,
      "loss": 0.8754,
      "step": 851920
    },
    {
      "epoch": 2.9858022549565937,
      "grad_norm": 3.1875,
      "learning_rate": 2.629864145320114e-07,
      "loss": 0.8316,
      "step": 851930
    },
    {
      "epoch": 2.985837302463489,
      "grad_norm": 3.6875,
      "learning_rate": 2.6233738586830946e-07,
      "loss": 0.8776,
      "step": 851940
    },
    {
      "epoch": 2.9858723499703848,
      "grad_norm": 2.4375,
      "learning_rate": 2.616883572046076e-07,
      "loss": 0.8682,
      "step": 851950
    },
    {
      "epoch": 2.9859073974772805,
      "grad_norm": 3.03125,
      "learning_rate": 2.610393285409057e-07,
      "loss": 0.7377,
      "step": 851960
    },
    {
      "epoch": 2.9859424449841763,
      "grad_norm": 3.09375,
      "learning_rate": 2.603902998772038e-07,
      "loss": 0.7631,
      "step": 851970
    },
    {
      "epoch": 2.9859774924910716,
      "grad_norm": 3.015625,
      "learning_rate": 2.597412712135019e-07,
      "loss": 0.8149,
      "step": 851980
    },
    {
      "epoch": 2.9860125399979673,
      "grad_norm": 3.375,
      "learning_rate": 2.590922425498e-07,
      "loss": 0.8906,
      "step": 851990
    },
    {
      "epoch": 2.9860475875048627,
      "grad_norm": 2.96875,
      "learning_rate": 2.584432138860981e-07,
      "loss": 0.7807,
      "step": 852000
    },
    {
      "epoch": 2.9860826350117584,
      "grad_norm": 3.046875,
      "learning_rate": 2.577941852223962e-07,
      "loss": 0.6997,
      "step": 852010
    },
    {
      "epoch": 2.986117682518654,
      "grad_norm": 3.40625,
      "learning_rate": 2.5714515655869425e-07,
      "loss": 0.8427,
      "step": 852020
    },
    {
      "epoch": 2.9861527300255495,
      "grad_norm": 2.734375,
      "learning_rate": 2.5649612789499236e-07,
      "loss": 0.7844,
      "step": 852030
    },
    {
      "epoch": 2.9861877775324452,
      "grad_norm": 2.875,
      "learning_rate": 2.5584709923129046e-07,
      "loss": 0.7882,
      "step": 852040
    },
    {
      "epoch": 2.9862228250393406,
      "grad_norm": 2.921875,
      "learning_rate": 2.5519807056758857e-07,
      "loss": 0.7528,
      "step": 852050
    },
    {
      "epoch": 2.9862578725462363,
      "grad_norm": 2.921875,
      "learning_rate": 2.545490419038867e-07,
      "loss": 0.7224,
      "step": 852060
    },
    {
      "epoch": 2.986292920053132,
      "grad_norm": 3.640625,
      "learning_rate": 2.5390001324018473e-07,
      "loss": 0.8188,
      "step": 852070
    },
    {
      "epoch": 2.986327967560028,
      "grad_norm": 3.25,
      "learning_rate": 2.5325098457648283e-07,
      "loss": 0.8433,
      "step": 852080
    },
    {
      "epoch": 2.986363015066923,
      "grad_norm": 2.59375,
      "learning_rate": 2.5260195591278094e-07,
      "loss": 0.7956,
      "step": 852090
    },
    {
      "epoch": 2.986398062573819,
      "grad_norm": 2.84375,
      "learning_rate": 2.5195292724907904e-07,
      "loss": 0.7569,
      "step": 852100
    },
    {
      "epoch": 2.986433110080714,
      "grad_norm": 2.734375,
      "learning_rate": 2.5130389858537715e-07,
      "loss": 0.777,
      "step": 852110
    },
    {
      "epoch": 2.98646815758761,
      "grad_norm": 2.96875,
      "learning_rate": 2.506548699216752e-07,
      "loss": 0.8594,
      "step": 852120
    },
    {
      "epoch": 2.9865032050945057,
      "grad_norm": 3.03125,
      "learning_rate": 2.5000584125797336e-07,
      "loss": 0.8482,
      "step": 852130
    },
    {
      "epoch": 2.986538252601401,
      "grad_norm": 2.828125,
      "learning_rate": 2.493568125942714e-07,
      "loss": 0.7186,
      "step": 852140
    },
    {
      "epoch": 2.986573300108297,
      "grad_norm": 2.84375,
      "learning_rate": 2.487077839305695e-07,
      "loss": 0.7774,
      "step": 852150
    },
    {
      "epoch": 2.986608347615192,
      "grad_norm": 3.53125,
      "learning_rate": 2.480587552668676e-07,
      "loss": 0.8792,
      "step": 852160
    },
    {
      "epoch": 2.986643395122088,
      "grad_norm": 3.015625,
      "learning_rate": 2.4740972660316573e-07,
      "loss": 0.8764,
      "step": 852170
    },
    {
      "epoch": 2.9866784426289836,
      "grad_norm": 3.140625,
      "learning_rate": 2.4676069793946384e-07,
      "loss": 0.8031,
      "step": 852180
    },
    {
      "epoch": 2.9867134901358794,
      "grad_norm": 3.171875,
      "learning_rate": 2.461116692757619e-07,
      "loss": 0.802,
      "step": 852190
    },
    {
      "epoch": 2.9867485376427747,
      "grad_norm": 3.328125,
      "learning_rate": 2.4546264061206e-07,
      "loss": 0.8091,
      "step": 852200
    },
    {
      "epoch": 2.9867835851496705,
      "grad_norm": 3.0,
      "learning_rate": 2.448136119483581e-07,
      "loss": 0.8133,
      "step": 852210
    },
    {
      "epoch": 2.9868186326565658,
      "grad_norm": 2.65625,
      "learning_rate": 2.441645832846562e-07,
      "loss": 0.8344,
      "step": 852220
    },
    {
      "epoch": 2.9868536801634615,
      "grad_norm": 3.078125,
      "learning_rate": 2.435155546209543e-07,
      "loss": 0.8195,
      "step": 852230
    },
    {
      "epoch": 2.9868887276703573,
      "grad_norm": 3.03125,
      "learning_rate": 2.4286652595725237e-07,
      "loss": 0.8422,
      "step": 852240
    },
    {
      "epoch": 2.9869237751772526,
      "grad_norm": 2.625,
      "learning_rate": 2.4221749729355047e-07,
      "loss": 0.7952,
      "step": 852250
    },
    {
      "epoch": 2.9869588226841484,
      "grad_norm": 2.796875,
      "learning_rate": 2.415684686298486e-07,
      "loss": 0.8017,
      "step": 852260
    },
    {
      "epoch": 2.986993870191044,
      "grad_norm": 2.65625,
      "learning_rate": 2.409194399661467e-07,
      "loss": 0.7994,
      "step": 852270
    },
    {
      "epoch": 2.9870289176979394,
      "grad_norm": 3.296875,
      "learning_rate": 2.402704113024448e-07,
      "loss": 0.857,
      "step": 852280
    },
    {
      "epoch": 2.987063965204835,
      "grad_norm": 2.78125,
      "learning_rate": 2.3962138263874284e-07,
      "loss": 0.8108,
      "step": 852290
    },
    {
      "epoch": 2.987099012711731,
      "grad_norm": 2.921875,
      "learning_rate": 2.3897235397504095e-07,
      "loss": 0.8442,
      "step": 852300
    },
    {
      "epoch": 2.9871340602186263,
      "grad_norm": 2.90625,
      "learning_rate": 2.3832332531133908e-07,
      "loss": 0.7272,
      "step": 852310
    },
    {
      "epoch": 2.987169107725522,
      "grad_norm": 3.25,
      "learning_rate": 2.3767429664763716e-07,
      "loss": 0.8287,
      "step": 852320
    },
    {
      "epoch": 2.9872041552324173,
      "grad_norm": 2.546875,
      "learning_rate": 2.3702526798393527e-07,
      "loss": 0.8007,
      "step": 852330
    },
    {
      "epoch": 2.987239202739313,
      "grad_norm": 2.96875,
      "learning_rate": 2.3637623932023334e-07,
      "loss": 0.7419,
      "step": 852340
    },
    {
      "epoch": 2.987274250246209,
      "grad_norm": 3.21875,
      "learning_rate": 2.3572721065653145e-07,
      "loss": 0.7779,
      "step": 852350
    },
    {
      "epoch": 2.9873092977531046,
      "grad_norm": 2.734375,
      "learning_rate": 2.3507818199282953e-07,
      "loss": 0.7805,
      "step": 852360
    },
    {
      "epoch": 2.98734434526,
      "grad_norm": 3.46875,
      "learning_rate": 2.3442915332912764e-07,
      "loss": 0.8005,
      "step": 852370
    },
    {
      "epoch": 2.9873793927668957,
      "grad_norm": 3.1875,
      "learning_rate": 2.3378012466542574e-07,
      "loss": 0.8823,
      "step": 852380
    },
    {
      "epoch": 2.987414440273791,
      "grad_norm": 3.03125,
      "learning_rate": 2.3313109600172382e-07,
      "loss": 0.7925,
      "step": 852390
    },
    {
      "epoch": 2.9874494877806868,
      "grad_norm": 3.046875,
      "learning_rate": 2.3248206733802193e-07,
      "loss": 0.775,
      "step": 852400
    },
    {
      "epoch": 2.9874845352875825,
      "grad_norm": 3.0625,
      "learning_rate": 2.3183303867432e-07,
      "loss": 0.8296,
      "step": 852410
    },
    {
      "epoch": 2.987519582794478,
      "grad_norm": 2.796875,
      "learning_rate": 2.311840100106181e-07,
      "loss": 0.7941,
      "step": 852420
    },
    {
      "epoch": 2.9875546303013736,
      "grad_norm": 2.78125,
      "learning_rate": 2.3053498134691624e-07,
      "loss": 0.7519,
      "step": 852430
    },
    {
      "epoch": 2.987589677808269,
      "grad_norm": 3.0,
      "learning_rate": 2.298859526832143e-07,
      "loss": 0.7155,
      "step": 852440
    },
    {
      "epoch": 2.9876247253151647,
      "grad_norm": 2.90625,
      "learning_rate": 2.292369240195124e-07,
      "loss": 0.8297,
      "step": 852450
    },
    {
      "epoch": 2.9876597728220604,
      "grad_norm": 2.953125,
      "learning_rate": 2.2858789535581048e-07,
      "loss": 0.695,
      "step": 852460
    },
    {
      "epoch": 2.987694820328956,
      "grad_norm": 2.40625,
      "learning_rate": 2.279388666921086e-07,
      "loss": 0.7728,
      "step": 852470
    },
    {
      "epoch": 2.9877298678358515,
      "grad_norm": 2.921875,
      "learning_rate": 2.2728983802840672e-07,
      "loss": 0.7772,
      "step": 852480
    },
    {
      "epoch": 2.9877649153427472,
      "grad_norm": 2.796875,
      "learning_rate": 2.2664080936470477e-07,
      "loss": 0.7827,
      "step": 852490
    },
    {
      "epoch": 2.9877999628496426,
      "grad_norm": 3.0,
      "learning_rate": 2.259917807010029e-07,
      "loss": 0.7338,
      "step": 852500
    },
    {
      "epoch": 2.9878350103565383,
      "grad_norm": 2.78125,
      "learning_rate": 2.2534275203730096e-07,
      "loss": 0.7099,
      "step": 852510
    },
    {
      "epoch": 2.987870057863434,
      "grad_norm": 2.796875,
      "learning_rate": 2.246937233735991e-07,
      "loss": 0.8248,
      "step": 852520
    },
    {
      "epoch": 2.9879051053703294,
      "grad_norm": 2.90625,
      "learning_rate": 2.240446947098972e-07,
      "loss": 0.7592,
      "step": 852530
    },
    {
      "epoch": 2.987940152877225,
      "grad_norm": 2.96875,
      "learning_rate": 2.2339566604619527e-07,
      "loss": 0.7537,
      "step": 852540
    },
    {
      "epoch": 2.9879752003841205,
      "grad_norm": 3.0,
      "learning_rate": 2.2274663738249338e-07,
      "loss": 0.8649,
      "step": 852550
    },
    {
      "epoch": 2.988010247891016,
      "grad_norm": 2.578125,
      "learning_rate": 2.2209760871879146e-07,
      "loss": 0.7184,
      "step": 852560
    },
    {
      "epoch": 2.988045295397912,
      "grad_norm": 2.765625,
      "learning_rate": 2.2144858005508957e-07,
      "loss": 0.8223,
      "step": 852570
    },
    {
      "epoch": 2.9880803429048077,
      "grad_norm": 2.25,
      "learning_rate": 2.2079955139138767e-07,
      "loss": 0.7346,
      "step": 852580
    },
    {
      "epoch": 2.988115390411703,
      "grad_norm": 3.15625,
      "learning_rate": 2.2015052272768575e-07,
      "loss": 0.8305,
      "step": 852590
    },
    {
      "epoch": 2.988150437918599,
      "grad_norm": 2.8125,
      "learning_rate": 2.1950149406398386e-07,
      "loss": 0.7819,
      "step": 852600
    },
    {
      "epoch": 2.988185485425494,
      "grad_norm": 3.265625,
      "learning_rate": 2.1885246540028194e-07,
      "loss": 0.7762,
      "step": 852610
    },
    {
      "epoch": 2.98822053293239,
      "grad_norm": 3.125,
      "learning_rate": 2.1820343673658004e-07,
      "loss": 0.8194,
      "step": 852620
    },
    {
      "epoch": 2.9882555804392856,
      "grad_norm": 2.953125,
      "learning_rate": 2.1755440807287815e-07,
      "loss": 0.7703,
      "step": 852630
    },
    {
      "epoch": 2.988290627946181,
      "grad_norm": 2.75,
      "learning_rate": 2.1690537940917623e-07,
      "loss": 0.8554,
      "step": 852640
    },
    {
      "epoch": 2.9883256754530767,
      "grad_norm": 2.734375,
      "learning_rate": 2.1625635074547433e-07,
      "loss": 0.7175,
      "step": 852650
    },
    {
      "epoch": 2.988360722959972,
      "grad_norm": 2.984375,
      "learning_rate": 2.156073220817724e-07,
      "loss": 0.8336,
      "step": 852660
    },
    {
      "epoch": 2.9883957704668678,
      "grad_norm": 3.09375,
      "learning_rate": 2.1495829341807052e-07,
      "loss": 0.7988,
      "step": 852670
    },
    {
      "epoch": 2.9884308179737635,
      "grad_norm": 2.203125,
      "learning_rate": 2.1430926475436865e-07,
      "loss": 0.7358,
      "step": 852680
    },
    {
      "epoch": 2.9884658654806593,
      "grad_norm": 2.84375,
      "learning_rate": 2.136602360906667e-07,
      "loss": 0.8089,
      "step": 852690
    },
    {
      "epoch": 2.9885009129875546,
      "grad_norm": 2.796875,
      "learning_rate": 2.1301120742696483e-07,
      "loss": 0.7855,
      "step": 852700
    },
    {
      "epoch": 2.9885359604944504,
      "grad_norm": 3.09375,
      "learning_rate": 2.123621787632629e-07,
      "loss": 0.7977,
      "step": 852710
    },
    {
      "epoch": 2.9885710080013457,
      "grad_norm": 2.875,
      "learning_rate": 2.1171315009956102e-07,
      "loss": 0.738,
      "step": 852720
    },
    {
      "epoch": 2.9886060555082414,
      "grad_norm": 2.578125,
      "learning_rate": 2.1106412143585913e-07,
      "loss": 0.756,
      "step": 852730
    },
    {
      "epoch": 2.988641103015137,
      "grad_norm": 2.890625,
      "learning_rate": 2.104150927721572e-07,
      "loss": 0.8154,
      "step": 852740
    },
    {
      "epoch": 2.9886761505220325,
      "grad_norm": 2.65625,
      "learning_rate": 2.097660641084553e-07,
      "loss": 0.817,
      "step": 852750
    },
    {
      "epoch": 2.9887111980289283,
      "grad_norm": 2.875,
      "learning_rate": 2.0911703544475336e-07,
      "loss": 0.6786,
      "step": 852760
    },
    {
      "epoch": 2.9887462455358236,
      "grad_norm": 3.046875,
      "learning_rate": 2.084680067810515e-07,
      "loss": 0.7307,
      "step": 852770
    },
    {
      "epoch": 2.9887812930427193,
      "grad_norm": 3.1875,
      "learning_rate": 2.078189781173496e-07,
      "loss": 0.8734,
      "step": 852780
    },
    {
      "epoch": 2.988816340549615,
      "grad_norm": 3.34375,
      "learning_rate": 2.0716994945364768e-07,
      "loss": 0.7842,
      "step": 852790
    },
    {
      "epoch": 2.988851388056511,
      "grad_norm": 2.921875,
      "learning_rate": 2.0652092078994579e-07,
      "loss": 0.8014,
      "step": 852800
    },
    {
      "epoch": 2.988886435563406,
      "grad_norm": 3.125,
      "learning_rate": 2.0587189212624387e-07,
      "loss": 0.7731,
      "step": 852810
    },
    {
      "epoch": 2.988921483070302,
      "grad_norm": 3.71875,
      "learning_rate": 2.0522286346254197e-07,
      "loss": 0.7523,
      "step": 852820
    },
    {
      "epoch": 2.9889565305771972,
      "grad_norm": 3.171875,
      "learning_rate": 2.0457383479884008e-07,
      "loss": 0.7586,
      "step": 852830
    },
    {
      "epoch": 2.988991578084093,
      "grad_norm": 2.9375,
      "learning_rate": 2.0392480613513816e-07,
      "loss": 0.7899,
      "step": 852840
    },
    {
      "epoch": 2.9890266255909888,
      "grad_norm": 2.6875,
      "learning_rate": 2.0327577747143626e-07,
      "loss": 0.865,
      "step": 852850
    },
    {
      "epoch": 2.989061673097884,
      "grad_norm": 3.5625,
      "learning_rate": 2.0262674880773434e-07,
      "loss": 0.7986,
      "step": 852860
    },
    {
      "epoch": 2.98909672060478,
      "grad_norm": 3.21875,
      "learning_rate": 2.0197772014403245e-07,
      "loss": 0.7933,
      "step": 852870
    },
    {
      "epoch": 2.989131768111675,
      "grad_norm": 3.015625,
      "learning_rate": 2.0132869148033058e-07,
      "loss": 0.7653,
      "step": 852880
    },
    {
      "epoch": 2.989166815618571,
      "grad_norm": 2.921875,
      "learning_rate": 2.0067966281662863e-07,
      "loss": 0.8749,
      "step": 852890
    },
    {
      "epoch": 2.9892018631254667,
      "grad_norm": 2.78125,
      "learning_rate": 2.0003063415292674e-07,
      "loss": 0.8005,
      "step": 852900
    },
    {
      "epoch": 2.9892369106323624,
      "grad_norm": 2.71875,
      "learning_rate": 1.9938160548922482e-07,
      "loss": 0.7808,
      "step": 852910
    },
    {
      "epoch": 2.9892719581392577,
      "grad_norm": 2.96875,
      "learning_rate": 1.9873257682552292e-07,
      "loss": 0.7972,
      "step": 852920
    },
    {
      "epoch": 2.9893070056461535,
      "grad_norm": 2.75,
      "learning_rate": 1.9808354816182106e-07,
      "loss": 0.8371,
      "step": 852930
    },
    {
      "epoch": 2.989342053153049,
      "grad_norm": 3.09375,
      "learning_rate": 1.974345194981191e-07,
      "loss": 0.8038,
      "step": 852940
    },
    {
      "epoch": 2.9893771006599446,
      "grad_norm": 3.4375,
      "learning_rate": 1.9678549083441724e-07,
      "loss": 0.7878,
      "step": 852950
    },
    {
      "epoch": 2.9894121481668403,
      "grad_norm": 2.609375,
      "learning_rate": 1.961364621707153e-07,
      "loss": 0.8089,
      "step": 852960
    },
    {
      "epoch": 2.9894471956737356,
      "grad_norm": 2.78125,
      "learning_rate": 1.9548743350701343e-07,
      "loss": 0.7749,
      "step": 852970
    },
    {
      "epoch": 2.9894822431806314,
      "grad_norm": 2.640625,
      "learning_rate": 1.948384048433115e-07,
      "loss": 0.7697,
      "step": 852980
    },
    {
      "epoch": 2.9895172906875267,
      "grad_norm": 2.28125,
      "learning_rate": 1.941893761796096e-07,
      "loss": 0.8083,
      "step": 852990
    },
    {
      "epoch": 2.9895523381944225,
      "grad_norm": 2.578125,
      "learning_rate": 1.935403475159077e-07,
      "loss": 0.7799,
      "step": 853000
    },
    {
      "epoch": 2.989587385701318,
      "grad_norm": 2.671875,
      "learning_rate": 1.928913188522058e-07,
      "loss": 0.7972,
      "step": 853010
    },
    {
      "epoch": 2.989622433208214,
      "grad_norm": 3.0625,
      "learning_rate": 1.922422901885039e-07,
      "loss": 0.7511,
      "step": 853020
    },
    {
      "epoch": 2.9896574807151093,
      "grad_norm": 2.859375,
      "learning_rate": 1.9159326152480198e-07,
      "loss": 0.7453,
      "step": 853030
    },
    {
      "epoch": 2.989692528222005,
      "grad_norm": 2.921875,
      "learning_rate": 1.909442328611001e-07,
      "loss": 0.8111,
      "step": 853040
    },
    {
      "epoch": 2.9897275757289004,
      "grad_norm": 3.265625,
      "learning_rate": 1.9029520419739817e-07,
      "loss": 0.8173,
      "step": 853050
    },
    {
      "epoch": 2.989762623235796,
      "grad_norm": 3.015625,
      "learning_rate": 1.896461755336963e-07,
      "loss": 0.7851,
      "step": 853060
    },
    {
      "epoch": 2.989797670742692,
      "grad_norm": 2.65625,
      "learning_rate": 1.8899714686999438e-07,
      "loss": 0.7972,
      "step": 853070
    },
    {
      "epoch": 2.989832718249587,
      "grad_norm": 2.5625,
      "learning_rate": 1.8834811820629248e-07,
      "loss": 0.7429,
      "step": 853080
    },
    {
      "epoch": 2.989867765756483,
      "grad_norm": 2.65625,
      "learning_rate": 1.8769908954259056e-07,
      "loss": 0.7166,
      "step": 853090
    },
    {
      "epoch": 2.9899028132633783,
      "grad_norm": 3.125,
      "learning_rate": 1.8705006087888867e-07,
      "loss": 0.8956,
      "step": 853100
    },
    {
      "epoch": 2.989937860770274,
      "grad_norm": 2.75,
      "learning_rate": 1.8640103221518677e-07,
      "loss": 0.7729,
      "step": 853110
    },
    {
      "epoch": 2.9899729082771698,
      "grad_norm": 2.96875,
      "learning_rate": 1.8575200355148485e-07,
      "loss": 0.764,
      "step": 853120
    },
    {
      "epoch": 2.9900079557840655,
      "grad_norm": 2.625,
      "learning_rate": 1.8510297488778296e-07,
      "loss": 0.7802,
      "step": 853130
    },
    {
      "epoch": 2.990043003290961,
      "grad_norm": 2.875,
      "learning_rate": 1.8445394622408104e-07,
      "loss": 0.8479,
      "step": 853140
    },
    {
      "epoch": 2.9900780507978566,
      "grad_norm": 3.265625,
      "learning_rate": 1.8380491756037914e-07,
      "loss": 0.7854,
      "step": 853150
    },
    {
      "epoch": 2.990113098304752,
      "grad_norm": 3.0625,
      "learning_rate": 1.8315588889667725e-07,
      "loss": 0.7277,
      "step": 853160
    },
    {
      "epoch": 2.9901481458116477,
      "grad_norm": 2.71875,
      "learning_rate": 1.8250686023297536e-07,
      "loss": 0.8244,
      "step": 853170
    },
    {
      "epoch": 2.9901831933185434,
      "grad_norm": 2.9375,
      "learning_rate": 1.8185783156927344e-07,
      "loss": 0.8642,
      "step": 853180
    },
    {
      "epoch": 2.9902182408254387,
      "grad_norm": 2.6875,
      "learning_rate": 1.8120880290557154e-07,
      "loss": 0.7326,
      "step": 853190
    },
    {
      "epoch": 2.9902532883323345,
      "grad_norm": 3.09375,
      "learning_rate": 1.8055977424186962e-07,
      "loss": 0.8463,
      "step": 853200
    },
    {
      "epoch": 2.99028833583923,
      "grad_norm": 3.0625,
      "learning_rate": 1.7991074557816773e-07,
      "loss": 0.8368,
      "step": 853210
    },
    {
      "epoch": 2.9903233833461256,
      "grad_norm": 2.671875,
      "learning_rate": 1.7926171691446583e-07,
      "loss": 0.769,
      "step": 853220
    },
    {
      "epoch": 2.9903584308530213,
      "grad_norm": 2.65625,
      "learning_rate": 1.786126882507639e-07,
      "loss": 0.8106,
      "step": 853230
    },
    {
      "epoch": 2.990393478359917,
      "grad_norm": 3.359375,
      "learning_rate": 1.7796365958706202e-07,
      "loss": 0.8433,
      "step": 853240
    },
    {
      "epoch": 2.9904285258668124,
      "grad_norm": 2.8125,
      "learning_rate": 1.773146309233601e-07,
      "loss": 0.7788,
      "step": 853250
    },
    {
      "epoch": 2.990463573373708,
      "grad_norm": 3.15625,
      "learning_rate": 1.766656022596582e-07,
      "loss": 0.8625,
      "step": 853260
    },
    {
      "epoch": 2.9904986208806035,
      "grad_norm": 3.296875,
      "learning_rate": 1.760165735959563e-07,
      "loss": 0.8686,
      "step": 853270
    },
    {
      "epoch": 2.9905336683874992,
      "grad_norm": 3.21875,
      "learning_rate": 1.753675449322544e-07,
      "loss": 0.7983,
      "step": 853280
    },
    {
      "epoch": 2.990568715894395,
      "grad_norm": 3.1875,
      "learning_rate": 1.747185162685525e-07,
      "loss": 0.8575,
      "step": 853290
    },
    {
      "epoch": 2.9906037634012903,
      "grad_norm": 2.828125,
      "learning_rate": 1.7406948760485057e-07,
      "loss": 0.8114,
      "step": 853300
    },
    {
      "epoch": 2.990638810908186,
      "grad_norm": 3.328125,
      "learning_rate": 1.7342045894114868e-07,
      "loss": 0.8367,
      "step": 853310
    },
    {
      "epoch": 2.9906738584150814,
      "grad_norm": 3.109375,
      "learning_rate": 1.7277143027744678e-07,
      "loss": 0.8283,
      "step": 853320
    },
    {
      "epoch": 2.990708905921977,
      "grad_norm": 3.140625,
      "learning_rate": 1.721224016137449e-07,
      "loss": 0.8538,
      "step": 853330
    },
    {
      "epoch": 2.990743953428873,
      "grad_norm": 2.78125,
      "learning_rate": 1.7147337295004297e-07,
      "loss": 0.7887,
      "step": 853340
    },
    {
      "epoch": 2.9907790009357687,
      "grad_norm": 3.296875,
      "learning_rate": 1.7082434428634107e-07,
      "loss": 0.7978,
      "step": 853350
    },
    {
      "epoch": 2.990814048442664,
      "grad_norm": 2.984375,
      "learning_rate": 1.7017531562263915e-07,
      "loss": 0.8783,
      "step": 853360
    },
    {
      "epoch": 2.9908490959495597,
      "grad_norm": 2.859375,
      "learning_rate": 1.6952628695893726e-07,
      "loss": 0.7434,
      "step": 853370
    },
    {
      "epoch": 2.990884143456455,
      "grad_norm": 2.875,
      "learning_rate": 1.6887725829523537e-07,
      "loss": 0.7653,
      "step": 853380
    },
    {
      "epoch": 2.990919190963351,
      "grad_norm": 3.1875,
      "learning_rate": 1.6822822963153344e-07,
      "loss": 0.8371,
      "step": 853390
    },
    {
      "epoch": 2.9909542384702466,
      "grad_norm": 3.109375,
      "learning_rate": 1.6757920096783155e-07,
      "loss": 0.8309,
      "step": 853400
    },
    {
      "epoch": 2.990989285977142,
      "grad_norm": 2.78125,
      "learning_rate": 1.6693017230412963e-07,
      "loss": 0.794,
      "step": 853410
    },
    {
      "epoch": 2.9910243334840376,
      "grad_norm": 2.625,
      "learning_rate": 1.6628114364042776e-07,
      "loss": 0.7428,
      "step": 853420
    },
    {
      "epoch": 2.991059380990933,
      "grad_norm": 2.5625,
      "learning_rate": 1.6563211497672584e-07,
      "loss": 0.7557,
      "step": 853430
    },
    {
      "epoch": 2.9910944284978287,
      "grad_norm": 2.515625,
      "learning_rate": 1.6498308631302395e-07,
      "loss": 0.792,
      "step": 853440
    },
    {
      "epoch": 2.9911294760047245,
      "grad_norm": 2.703125,
      "learning_rate": 1.6433405764932203e-07,
      "loss": 0.7873,
      "step": 853450
    },
    {
      "epoch": 2.99116452351162,
      "grad_norm": 2.734375,
      "learning_rate": 1.6368502898562013e-07,
      "loss": 0.7609,
      "step": 853460
    },
    {
      "epoch": 2.9911995710185155,
      "grad_norm": 2.90625,
      "learning_rate": 1.6303600032191824e-07,
      "loss": 0.7901,
      "step": 853470
    },
    {
      "epoch": 2.9912346185254113,
      "grad_norm": 2.703125,
      "learning_rate": 1.6238697165821632e-07,
      "loss": 0.6995,
      "step": 853480
    },
    {
      "epoch": 2.9912696660323066,
      "grad_norm": 3.65625,
      "learning_rate": 1.6173794299451442e-07,
      "loss": 0.8169,
      "step": 853490
    },
    {
      "epoch": 2.9913047135392024,
      "grad_norm": 3.375,
      "learning_rate": 1.610889143308125e-07,
      "loss": 0.8281,
      "step": 853500
    },
    {
      "epoch": 2.991339761046098,
      "grad_norm": 2.90625,
      "learning_rate": 1.604398856671106e-07,
      "loss": 0.765,
      "step": 853510
    },
    {
      "epoch": 2.9913748085529934,
      "grad_norm": 2.625,
      "learning_rate": 1.5979085700340871e-07,
      "loss": 0.7708,
      "step": 853520
    },
    {
      "epoch": 2.991409856059889,
      "grad_norm": 2.609375,
      "learning_rate": 1.5914182833970682e-07,
      "loss": 0.7414,
      "step": 853530
    },
    {
      "epoch": 2.991444903566785,
      "grad_norm": 3.203125,
      "learning_rate": 1.584927996760049e-07,
      "loss": 0.7526,
      "step": 853540
    },
    {
      "epoch": 2.9914799510736803,
      "grad_norm": 3.3125,
      "learning_rate": 1.57843771012303e-07,
      "loss": 0.7632,
      "step": 853550
    },
    {
      "epoch": 2.991514998580576,
      "grad_norm": 2.90625,
      "learning_rate": 1.5719474234860108e-07,
      "loss": 0.7739,
      "step": 853560
    },
    {
      "epoch": 2.9915500460874718,
      "grad_norm": 2.90625,
      "learning_rate": 1.565457136848992e-07,
      "loss": 0.8151,
      "step": 853570
    },
    {
      "epoch": 2.991585093594367,
      "grad_norm": 2.4375,
      "learning_rate": 1.558966850211973e-07,
      "loss": 0.7925,
      "step": 853580
    },
    {
      "epoch": 2.991620141101263,
      "grad_norm": 3.328125,
      "learning_rate": 1.5524765635749538e-07,
      "loss": 0.809,
      "step": 853590
    },
    {
      "epoch": 2.991655188608158,
      "grad_norm": 3.0625,
      "learning_rate": 1.5459862769379348e-07,
      "loss": 0.7199,
      "step": 853600
    },
    {
      "epoch": 2.991690236115054,
      "grad_norm": 2.796875,
      "learning_rate": 1.5394959903009156e-07,
      "loss": 0.7599,
      "step": 853610
    },
    {
      "epoch": 2.9917252836219497,
      "grad_norm": 3.125,
      "learning_rate": 1.5330057036638967e-07,
      "loss": 0.8376,
      "step": 853620
    },
    {
      "epoch": 2.991760331128845,
      "grad_norm": 2.828125,
      "learning_rate": 1.5265154170268777e-07,
      "loss": 0.8224,
      "step": 853630
    },
    {
      "epoch": 2.9917953786357407,
      "grad_norm": 3.046875,
      "learning_rate": 1.5200251303898585e-07,
      "loss": 0.8288,
      "step": 853640
    },
    {
      "epoch": 2.9918304261426365,
      "grad_norm": 3.265625,
      "learning_rate": 1.5135348437528396e-07,
      "loss": 0.7875,
      "step": 853650
    },
    {
      "epoch": 2.991865473649532,
      "grad_norm": 3.046875,
      "learning_rate": 1.5070445571158204e-07,
      "loss": 0.803,
      "step": 853660
    },
    {
      "epoch": 2.9919005211564276,
      "grad_norm": 2.453125,
      "learning_rate": 1.5005542704788014e-07,
      "loss": 0.7598,
      "step": 853670
    },
    {
      "epoch": 2.9919355686633233,
      "grad_norm": 3.21875,
      "learning_rate": 1.4940639838417825e-07,
      "loss": 0.7777,
      "step": 853680
    },
    {
      "epoch": 2.9919706161702186,
      "grad_norm": 3.4375,
      "learning_rate": 1.4875736972047635e-07,
      "loss": 0.8616,
      "step": 853690
    },
    {
      "epoch": 2.9920056636771144,
      "grad_norm": 2.875,
      "learning_rate": 1.4810834105677443e-07,
      "loss": 0.8028,
      "step": 853700
    },
    {
      "epoch": 2.9920407111840097,
      "grad_norm": 2.765625,
      "learning_rate": 1.4745931239307254e-07,
      "loss": 0.7816,
      "step": 853710
    },
    {
      "epoch": 2.9920757586909055,
      "grad_norm": 3.125,
      "learning_rate": 1.4681028372937062e-07,
      "loss": 0.7352,
      "step": 853720
    },
    {
      "epoch": 2.9921108061978012,
      "grad_norm": 2.546875,
      "learning_rate": 1.4616125506566872e-07,
      "loss": 0.7679,
      "step": 853730
    },
    {
      "epoch": 2.992145853704697,
      "grad_norm": 2.671875,
      "learning_rate": 1.4551222640196683e-07,
      "loss": 0.7434,
      "step": 853740
    },
    {
      "epoch": 2.9921809012115923,
      "grad_norm": 2.875,
      "learning_rate": 1.448631977382649e-07,
      "loss": 0.8221,
      "step": 853750
    },
    {
      "epoch": 2.992215948718488,
      "grad_norm": 2.609375,
      "learning_rate": 1.4421416907456301e-07,
      "loss": 0.8193,
      "step": 853760
    },
    {
      "epoch": 2.9922509962253834,
      "grad_norm": 2.625,
      "learning_rate": 1.435651404108611e-07,
      "loss": 0.8659,
      "step": 853770
    },
    {
      "epoch": 2.992286043732279,
      "grad_norm": 2.921875,
      "learning_rate": 1.4291611174715923e-07,
      "loss": 0.7784,
      "step": 853780
    },
    {
      "epoch": 2.992321091239175,
      "grad_norm": 3.09375,
      "learning_rate": 1.422670830834573e-07,
      "loss": 0.8492,
      "step": 853790
    },
    {
      "epoch": 2.99235613874607,
      "grad_norm": 3.09375,
      "learning_rate": 1.416180544197554e-07,
      "loss": 0.7589,
      "step": 853800
    },
    {
      "epoch": 2.992391186252966,
      "grad_norm": 2.765625,
      "learning_rate": 1.409690257560535e-07,
      "loss": 0.8022,
      "step": 853810
    },
    {
      "epoch": 2.9924262337598613,
      "grad_norm": 2.96875,
      "learning_rate": 1.403199970923516e-07,
      "loss": 0.7737,
      "step": 853820
    },
    {
      "epoch": 2.992461281266757,
      "grad_norm": 3.0625,
      "learning_rate": 1.396709684286497e-07,
      "loss": 0.7855,
      "step": 853830
    },
    {
      "epoch": 2.992496328773653,
      "grad_norm": 2.609375,
      "learning_rate": 1.3902193976494778e-07,
      "loss": 0.7856,
      "step": 853840
    },
    {
      "epoch": 2.9925313762805485,
      "grad_norm": 2.875,
      "learning_rate": 1.383729111012459e-07,
      "loss": 0.8166,
      "step": 853850
    },
    {
      "epoch": 2.992566423787444,
      "grad_norm": 3.203125,
      "learning_rate": 1.3772388243754397e-07,
      "loss": 0.7732,
      "step": 853860
    },
    {
      "epoch": 2.9926014712943396,
      "grad_norm": 3.140625,
      "learning_rate": 1.3707485377384207e-07,
      "loss": 0.7612,
      "step": 853870
    },
    {
      "epoch": 2.992636518801235,
      "grad_norm": 2.828125,
      "learning_rate": 1.3642582511014018e-07,
      "loss": 0.7493,
      "step": 853880
    },
    {
      "epoch": 2.9926715663081307,
      "grad_norm": 3.578125,
      "learning_rate": 1.3577679644643828e-07,
      "loss": 0.8523,
      "step": 853890
    },
    {
      "epoch": 2.9927066138150265,
      "grad_norm": 3.25,
      "learning_rate": 1.3512776778273636e-07,
      "loss": 0.761,
      "step": 853900
    },
    {
      "epoch": 2.9927416613219218,
      "grad_norm": 2.671875,
      "learning_rate": 1.3447873911903447e-07,
      "loss": 0.8133,
      "step": 853910
    },
    {
      "epoch": 2.9927767088288175,
      "grad_norm": 3.125,
      "learning_rate": 1.3382971045533255e-07,
      "loss": 0.7338,
      "step": 853920
    },
    {
      "epoch": 2.992811756335713,
      "grad_norm": 2.953125,
      "learning_rate": 1.3318068179163065e-07,
      "loss": 0.7967,
      "step": 853930
    },
    {
      "epoch": 2.9928468038426086,
      "grad_norm": 2.75,
      "learning_rate": 1.3253165312792876e-07,
      "loss": 0.7601,
      "step": 853940
    },
    {
      "epoch": 2.9928818513495044,
      "grad_norm": 2.9375,
      "learning_rate": 1.3188262446422684e-07,
      "loss": 0.7723,
      "step": 853950
    },
    {
      "epoch": 2.9929168988564,
      "grad_norm": 2.875,
      "learning_rate": 1.3123359580052494e-07,
      "loss": 0.7975,
      "step": 853960
    },
    {
      "epoch": 2.9929519463632954,
      "grad_norm": 3.0,
      "learning_rate": 1.3058456713682302e-07,
      "loss": 0.7679,
      "step": 853970
    },
    {
      "epoch": 2.992986993870191,
      "grad_norm": 3.171875,
      "learning_rate": 1.2993553847312113e-07,
      "loss": 0.8417,
      "step": 853980
    },
    {
      "epoch": 2.9930220413770865,
      "grad_norm": 2.90625,
      "learning_rate": 1.2928650980941924e-07,
      "loss": 0.835,
      "step": 853990
    },
    {
      "epoch": 2.9930570888839823,
      "grad_norm": 3.390625,
      "learning_rate": 1.2863748114571734e-07,
      "loss": 0.7515,
      "step": 854000
    },
    {
      "epoch": 2.993092136390878,
      "grad_norm": 3.015625,
      "learning_rate": 1.2798845248201542e-07,
      "loss": 0.7497,
      "step": 854010
    },
    {
      "epoch": 2.9931271838977733,
      "grad_norm": 2.65625,
      "learning_rate": 1.273394238183135e-07,
      "loss": 0.7452,
      "step": 854020
    },
    {
      "epoch": 2.993162231404669,
      "grad_norm": 3.53125,
      "learning_rate": 1.266903951546116e-07,
      "loss": 0.8362,
      "step": 854030
    },
    {
      "epoch": 2.9931972789115644,
      "grad_norm": 2.9375,
      "learning_rate": 1.260413664909097e-07,
      "loss": 0.86,
      "step": 854040
    },
    {
      "epoch": 2.99323232641846,
      "grad_norm": 2.640625,
      "learning_rate": 1.2539233782720782e-07,
      "loss": 0.7661,
      "step": 854050
    },
    {
      "epoch": 2.993267373925356,
      "grad_norm": 3.125,
      "learning_rate": 1.247433091635059e-07,
      "loss": 0.7976,
      "step": 854060
    },
    {
      "epoch": 2.9933024214322517,
      "grad_norm": 3.484375,
      "learning_rate": 1.24094280499804e-07,
      "loss": 0.7917,
      "step": 854070
    },
    {
      "epoch": 2.993337468939147,
      "grad_norm": 2.671875,
      "learning_rate": 1.2344525183610208e-07,
      "loss": 0.7798,
      "step": 854080
    },
    {
      "epoch": 2.9933725164460427,
      "grad_norm": 2.984375,
      "learning_rate": 1.227962231724002e-07,
      "loss": 0.8409,
      "step": 854090
    },
    {
      "epoch": 2.993407563952938,
      "grad_norm": 2.84375,
      "learning_rate": 1.221471945086983e-07,
      "loss": 0.8037,
      "step": 854100
    },
    {
      "epoch": 2.993442611459834,
      "grad_norm": 2.953125,
      "learning_rate": 1.2149816584499637e-07,
      "loss": 0.7395,
      "step": 854110
    },
    {
      "epoch": 2.9934776589667296,
      "grad_norm": 2.921875,
      "learning_rate": 1.2084913718129448e-07,
      "loss": 0.8294,
      "step": 854120
    },
    {
      "epoch": 2.993512706473625,
      "grad_norm": 2.984375,
      "learning_rate": 1.2020010851759256e-07,
      "loss": 0.801,
      "step": 854130
    },
    {
      "epoch": 2.9935477539805206,
      "grad_norm": 2.78125,
      "learning_rate": 1.195510798538907e-07,
      "loss": 0.7769,
      "step": 854140
    },
    {
      "epoch": 2.993582801487416,
      "grad_norm": 3.0625,
      "learning_rate": 1.1890205119018877e-07,
      "loss": 0.7842,
      "step": 854150
    },
    {
      "epoch": 2.9936178489943117,
      "grad_norm": 2.78125,
      "learning_rate": 1.1825302252648686e-07,
      "loss": 0.8319,
      "step": 854160
    },
    {
      "epoch": 2.9936528965012075,
      "grad_norm": 3.015625,
      "learning_rate": 1.1760399386278495e-07,
      "loss": 0.7266,
      "step": 854170
    },
    {
      "epoch": 2.9936879440081032,
      "grad_norm": 2.796875,
      "learning_rate": 1.1695496519908305e-07,
      "loss": 0.8138,
      "step": 854180
    },
    {
      "epoch": 2.9937229915149985,
      "grad_norm": 3.09375,
      "learning_rate": 1.1630593653538117e-07,
      "loss": 0.8022,
      "step": 854190
    },
    {
      "epoch": 2.9937580390218943,
      "grad_norm": 3.171875,
      "learning_rate": 1.1565690787167926e-07,
      "loss": 0.8965,
      "step": 854200
    },
    {
      "epoch": 2.9937930865287896,
      "grad_norm": 2.703125,
      "learning_rate": 1.1500787920797735e-07,
      "loss": 0.7952,
      "step": 854210
    },
    {
      "epoch": 2.9938281340356854,
      "grad_norm": 3.015625,
      "learning_rate": 1.1435885054427544e-07,
      "loss": 0.7431,
      "step": 854220
    },
    {
      "epoch": 2.993863181542581,
      "grad_norm": 2.84375,
      "learning_rate": 1.1370982188057354e-07,
      "loss": 0.7351,
      "step": 854230
    },
    {
      "epoch": 2.9938982290494764,
      "grad_norm": 2.578125,
      "learning_rate": 1.1306079321687164e-07,
      "loss": 0.8994,
      "step": 854240
    },
    {
      "epoch": 2.993933276556372,
      "grad_norm": 3.140625,
      "learning_rate": 1.1241176455316973e-07,
      "loss": 0.8531,
      "step": 854250
    },
    {
      "epoch": 2.9939683240632675,
      "grad_norm": 2.796875,
      "learning_rate": 1.1176273588946783e-07,
      "loss": 0.7508,
      "step": 854260
    },
    {
      "epoch": 2.9940033715701633,
      "grad_norm": 3.4375,
      "learning_rate": 1.1111370722576592e-07,
      "loss": 0.8295,
      "step": 854270
    },
    {
      "epoch": 2.994038419077059,
      "grad_norm": 2.890625,
      "learning_rate": 1.1046467856206401e-07,
      "loss": 0.7833,
      "step": 854280
    },
    {
      "epoch": 2.994073466583955,
      "grad_norm": 2.625,
      "learning_rate": 1.0981564989836213e-07,
      "loss": 0.7388,
      "step": 854290
    },
    {
      "epoch": 2.99410851409085,
      "grad_norm": 2.546875,
      "learning_rate": 1.0916662123466022e-07,
      "loss": 0.8587,
      "step": 854300
    },
    {
      "epoch": 2.994143561597746,
      "grad_norm": 2.84375,
      "learning_rate": 1.0851759257095832e-07,
      "loss": 0.8599,
      "step": 854310
    },
    {
      "epoch": 2.994178609104641,
      "grad_norm": 2.90625,
      "learning_rate": 1.0786856390725641e-07,
      "loss": 0.8087,
      "step": 854320
    },
    {
      "epoch": 2.994213656611537,
      "grad_norm": 2.84375,
      "learning_rate": 1.072195352435545e-07,
      "loss": 0.8296,
      "step": 854330
    },
    {
      "epoch": 2.9942487041184327,
      "grad_norm": 3.140625,
      "learning_rate": 1.065705065798526e-07,
      "loss": 0.7829,
      "step": 854340
    },
    {
      "epoch": 2.994283751625328,
      "grad_norm": 3.203125,
      "learning_rate": 1.059214779161507e-07,
      "loss": 0.6997,
      "step": 854350
    },
    {
      "epoch": 2.9943187991322238,
      "grad_norm": 2.8125,
      "learning_rate": 1.0527244925244879e-07,
      "loss": 0.7632,
      "step": 854360
    },
    {
      "epoch": 2.994353846639119,
      "grad_norm": 3.40625,
      "learning_rate": 1.0462342058874688e-07,
      "loss": 0.7662,
      "step": 854370
    },
    {
      "epoch": 2.994388894146015,
      "grad_norm": 3.078125,
      "learning_rate": 1.0397439192504498e-07,
      "loss": 0.7568,
      "step": 854380
    },
    {
      "epoch": 2.9944239416529106,
      "grad_norm": 2.484375,
      "learning_rate": 1.0332536326134307e-07,
      "loss": 0.756,
      "step": 854390
    },
    {
      "epoch": 2.9944589891598064,
      "grad_norm": 2.890625,
      "learning_rate": 1.0267633459764119e-07,
      "loss": 0.8193,
      "step": 854400
    },
    {
      "epoch": 2.9944940366667017,
      "grad_norm": 2.453125,
      "learning_rate": 1.0202730593393928e-07,
      "loss": 0.8133,
      "step": 854410
    },
    {
      "epoch": 2.9945290841735974,
      "grad_norm": 2.765625,
      "learning_rate": 1.0137827727023737e-07,
      "loss": 0.9147,
      "step": 854420
    },
    {
      "epoch": 2.9945641316804927,
      "grad_norm": 3.09375,
      "learning_rate": 1.0072924860653545e-07,
      "loss": 0.7969,
      "step": 854430
    },
    {
      "epoch": 2.9945991791873885,
      "grad_norm": 3.03125,
      "learning_rate": 1.0008021994283355e-07,
      "loss": 0.8671,
      "step": 854440
    },
    {
      "epoch": 2.9946342266942843,
      "grad_norm": 2.859375,
      "learning_rate": 9.943119127913166e-08,
      "loss": 0.8141,
      "step": 854450
    },
    {
      "epoch": 2.9946692742011796,
      "grad_norm": 3.0,
      "learning_rate": 9.878216261542976e-08,
      "loss": 0.795,
      "step": 854460
    },
    {
      "epoch": 2.9947043217080753,
      "grad_norm": 2.765625,
      "learning_rate": 9.813313395172785e-08,
      "loss": 0.7219,
      "step": 854470
    },
    {
      "epoch": 2.9947393692149706,
      "grad_norm": 3.015625,
      "learning_rate": 9.748410528802594e-08,
      "loss": 0.9051,
      "step": 854480
    },
    {
      "epoch": 2.9947744167218664,
      "grad_norm": 2.78125,
      "learning_rate": 9.683507662432405e-08,
      "loss": 0.7951,
      "step": 854490
    },
    {
      "epoch": 2.994809464228762,
      "grad_norm": 2.453125,
      "learning_rate": 9.618604796062214e-08,
      "loss": 0.8165,
      "step": 854500
    },
    {
      "epoch": 2.994844511735658,
      "grad_norm": 2.65625,
      "learning_rate": 9.553701929692023e-08,
      "loss": 0.8048,
      "step": 854510
    },
    {
      "epoch": 2.9948795592425532,
      "grad_norm": 2.8125,
      "learning_rate": 9.488799063321833e-08,
      "loss": 0.7766,
      "step": 854520
    },
    {
      "epoch": 2.994914606749449,
      "grad_norm": 3.0,
      "learning_rate": 9.423896196951642e-08,
      "loss": 0.764,
      "step": 854530
    },
    {
      "epoch": 2.9949496542563443,
      "grad_norm": 2.96875,
      "learning_rate": 9.358993330581452e-08,
      "loss": 0.7474,
      "step": 854540
    },
    {
      "epoch": 2.99498470176324,
      "grad_norm": 2.90625,
      "learning_rate": 9.294090464211262e-08,
      "loss": 0.7946,
      "step": 854550
    },
    {
      "epoch": 2.995019749270136,
      "grad_norm": 2.828125,
      "learning_rate": 9.229187597841072e-08,
      "loss": 0.9088,
      "step": 854560
    },
    {
      "epoch": 2.995054796777031,
      "grad_norm": 2.5625,
      "learning_rate": 9.164284731470881e-08,
      "loss": 0.8342,
      "step": 854570
    },
    {
      "epoch": 2.995089844283927,
      "grad_norm": 2.796875,
      "learning_rate": 9.099381865100691e-08,
      "loss": 0.7768,
      "step": 854580
    },
    {
      "epoch": 2.995124891790822,
      "grad_norm": 3.0,
      "learning_rate": 9.034478998730501e-08,
      "loss": 0.8041,
      "step": 854590
    },
    {
      "epoch": 2.995159939297718,
      "grad_norm": 3.203125,
      "learning_rate": 8.96957613236031e-08,
      "loss": 0.7084,
      "step": 854600
    },
    {
      "epoch": 2.9951949868046137,
      "grad_norm": 3.15625,
      "learning_rate": 8.904673265990118e-08,
      "loss": 0.8133,
      "step": 854610
    },
    {
      "epoch": 2.9952300343115095,
      "grad_norm": 2.984375,
      "learning_rate": 8.839770399619929e-08,
      "loss": 0.7571,
      "step": 854620
    },
    {
      "epoch": 2.995265081818405,
      "grad_norm": 2.859375,
      "learning_rate": 8.774867533249738e-08,
      "loss": 0.7608,
      "step": 854630
    },
    {
      "epoch": 2.9953001293253005,
      "grad_norm": 2.875,
      "learning_rate": 8.709964666879549e-08,
      "loss": 0.7589,
      "step": 854640
    },
    {
      "epoch": 2.995335176832196,
      "grad_norm": 2.765625,
      "learning_rate": 8.645061800509358e-08,
      "loss": 0.8109,
      "step": 854650
    },
    {
      "epoch": 2.9953702243390916,
      "grad_norm": 3.265625,
      "learning_rate": 8.580158934139167e-08,
      "loss": 0.8414,
      "step": 854660
    },
    {
      "epoch": 2.9954052718459874,
      "grad_norm": 3.03125,
      "learning_rate": 8.515256067768978e-08,
      "loss": 0.7944,
      "step": 854670
    },
    {
      "epoch": 2.9954403193528827,
      "grad_norm": 3.15625,
      "learning_rate": 8.450353201398787e-08,
      "loss": 0.8399,
      "step": 854680
    },
    {
      "epoch": 2.9954753668597784,
      "grad_norm": 3.296875,
      "learning_rate": 8.385450335028596e-08,
      "loss": 0.7458,
      "step": 854690
    },
    {
      "epoch": 2.9955104143666738,
      "grad_norm": 2.515625,
      "learning_rate": 8.320547468658406e-08,
      "loss": 0.8041,
      "step": 854700
    },
    {
      "epoch": 2.9955454618735695,
      "grad_norm": 3.28125,
      "learning_rate": 8.255644602288215e-08,
      "loss": 0.8478,
      "step": 854710
    },
    {
      "epoch": 2.9955805093804653,
      "grad_norm": 2.796875,
      "learning_rate": 8.190741735918026e-08,
      "loss": 0.8072,
      "step": 854720
    },
    {
      "epoch": 2.995615556887361,
      "grad_norm": 3.125,
      "learning_rate": 8.125838869547835e-08,
      "loss": 0.7417,
      "step": 854730
    },
    {
      "epoch": 2.9956506043942563,
      "grad_norm": 3.21875,
      "learning_rate": 8.060936003177645e-08,
      "loss": 0.8661,
      "step": 854740
    },
    {
      "epoch": 2.995685651901152,
      "grad_norm": 3.0625,
      "learning_rate": 7.996033136807455e-08,
      "loss": 0.8714,
      "step": 854750
    },
    {
      "epoch": 2.9957206994080474,
      "grad_norm": 2.859375,
      "learning_rate": 7.931130270437264e-08,
      "loss": 0.7782,
      "step": 854760
    },
    {
      "epoch": 2.995755746914943,
      "grad_norm": 2.96875,
      "learning_rate": 7.866227404067074e-08,
      "loss": 0.7842,
      "step": 854770
    },
    {
      "epoch": 2.995790794421839,
      "grad_norm": 2.984375,
      "learning_rate": 7.801324537696884e-08,
      "loss": 0.8629,
      "step": 854780
    },
    {
      "epoch": 2.9958258419287342,
      "grad_norm": 3.28125,
      "learning_rate": 7.736421671326693e-08,
      "loss": 0.7965,
      "step": 854790
    },
    {
      "epoch": 2.99586088943563,
      "grad_norm": 3.078125,
      "learning_rate": 7.671518804956502e-08,
      "loss": 0.7729,
      "step": 854800
    },
    {
      "epoch": 2.9958959369425253,
      "grad_norm": 3.234375,
      "learning_rate": 7.606615938586311e-08,
      "loss": 0.7763,
      "step": 854810
    },
    {
      "epoch": 2.995930984449421,
      "grad_norm": 2.828125,
      "learning_rate": 7.541713072216122e-08,
      "loss": 0.7203,
      "step": 854820
    },
    {
      "epoch": 2.995966031956317,
      "grad_norm": 2.828125,
      "learning_rate": 7.476810205845931e-08,
      "loss": 0.7277,
      "step": 854830
    },
    {
      "epoch": 2.9960010794632126,
      "grad_norm": 3.265625,
      "learning_rate": 7.41190733947574e-08,
      "loss": 0.7835,
      "step": 854840
    },
    {
      "epoch": 2.996036126970108,
      "grad_norm": 2.625,
      "learning_rate": 7.347004473105551e-08,
      "loss": 0.7594,
      "step": 854850
    },
    {
      "epoch": 2.9960711744770037,
      "grad_norm": 2.546875,
      "learning_rate": 7.28210160673536e-08,
      "loss": 0.7791,
      "step": 854860
    },
    {
      "epoch": 2.996106221983899,
      "grad_norm": 2.671875,
      "learning_rate": 7.21719874036517e-08,
      "loss": 0.7717,
      "step": 854870
    },
    {
      "epoch": 2.9961412694907947,
      "grad_norm": 3.1875,
      "learning_rate": 7.152295873994979e-08,
      "loss": 0.7479,
      "step": 854880
    },
    {
      "epoch": 2.9961763169976905,
      "grad_norm": 3.296875,
      "learning_rate": 7.087393007624788e-08,
      "loss": 0.7359,
      "step": 854890
    },
    {
      "epoch": 2.996211364504586,
      "grad_norm": 3.203125,
      "learning_rate": 7.022490141254599e-08,
      "loss": 0.7573,
      "step": 854900
    },
    {
      "epoch": 2.9962464120114816,
      "grad_norm": 2.578125,
      "learning_rate": 6.957587274884408e-08,
      "loss": 0.717,
      "step": 854910
    },
    {
      "epoch": 2.9962814595183773,
      "grad_norm": 2.59375,
      "learning_rate": 6.892684408514219e-08,
      "loss": 0.8231,
      "step": 854920
    },
    {
      "epoch": 2.9963165070252726,
      "grad_norm": 2.96875,
      "learning_rate": 6.827781542144028e-08,
      "loss": 0.7684,
      "step": 854930
    },
    {
      "epoch": 2.9963515545321684,
      "grad_norm": 3.046875,
      "learning_rate": 6.762878675773837e-08,
      "loss": 0.777,
      "step": 854940
    },
    {
      "epoch": 2.996386602039064,
      "grad_norm": 2.9375,
      "learning_rate": 6.697975809403648e-08,
      "loss": 0.7436,
      "step": 854950
    },
    {
      "epoch": 2.9964216495459595,
      "grad_norm": 4.28125,
      "learning_rate": 6.633072943033457e-08,
      "loss": 0.8227,
      "step": 854960
    },
    {
      "epoch": 2.9964566970528552,
      "grad_norm": 3.109375,
      "learning_rate": 6.568170076663266e-08,
      "loss": 0.7914,
      "step": 854970
    },
    {
      "epoch": 2.9964917445597505,
      "grad_norm": 3.046875,
      "learning_rate": 6.503267210293075e-08,
      "loss": 0.8176,
      "step": 854980
    },
    {
      "epoch": 2.9965267920666463,
      "grad_norm": 3.09375,
      "learning_rate": 6.438364343922885e-08,
      "loss": 0.8317,
      "step": 854990
    },
    {
      "epoch": 2.996561839573542,
      "grad_norm": 2.578125,
      "learning_rate": 6.373461477552695e-08,
      "loss": 0.8302,
      "step": 855000
    },
    {
      "epoch": 2.996561839573542,
      "eval_loss": 0.7497047781944275,
      "eval_runtime": 557.0875,
      "eval_samples_per_second": 682.902,
      "eval_steps_per_second": 56.908,
      "step": 855000
    },
    {
      "epoch": 2.996596887080438,
      "grad_norm": 2.90625,
      "learning_rate": 6.308558611182505e-08,
      "loss": 0.7934,
      "step": 855010
    },
    {
      "epoch": 2.996631934587333,
      "grad_norm": 3.0625,
      "learning_rate": 6.243655744812314e-08,
      "loss": 0.7577,
      "step": 855020
    },
    {
      "epoch": 2.996666982094229,
      "grad_norm": 2.828125,
      "learning_rate": 6.178752878442124e-08,
      "loss": 0.8273,
      "step": 855030
    },
    {
      "epoch": 2.996702029601124,
      "grad_norm": 2.859375,
      "learning_rate": 6.113850012071934e-08,
      "loss": 0.7834,
      "step": 855040
    },
    {
      "epoch": 2.99673707710802,
      "grad_norm": 2.59375,
      "learning_rate": 6.048947145701744e-08,
      "loss": 0.8003,
      "step": 855050
    },
    {
      "epoch": 2.9967721246149157,
      "grad_norm": 3.015625,
      "learning_rate": 5.984044279331552e-08,
      "loss": 0.7771,
      "step": 855060
    },
    {
      "epoch": 2.996807172121811,
      "grad_norm": 3.015625,
      "learning_rate": 5.919141412961362e-08,
      "loss": 0.7457,
      "step": 855070
    },
    {
      "epoch": 2.996842219628707,
      "grad_norm": 3.015625,
      "learning_rate": 5.854238546591172e-08,
      "loss": 0.8139,
      "step": 855080
    },
    {
      "epoch": 2.996877267135602,
      "grad_norm": 2.828125,
      "learning_rate": 5.789335680220981e-08,
      "loss": 0.8405,
      "step": 855090
    },
    {
      "epoch": 2.996912314642498,
      "grad_norm": 2.84375,
      "learning_rate": 5.724432813850792e-08,
      "loss": 0.777,
      "step": 855100
    },
    {
      "epoch": 2.9969473621493936,
      "grad_norm": 3.125,
      "learning_rate": 5.659529947480601e-08,
      "loss": 0.7867,
      "step": 855110
    },
    {
      "epoch": 2.9969824096562894,
      "grad_norm": 3.0,
      "learning_rate": 5.59462708111041e-08,
      "loss": 0.7396,
      "step": 855120
    },
    {
      "epoch": 2.9970174571631847,
      "grad_norm": 3.015625,
      "learning_rate": 5.52972421474022e-08,
      "loss": 0.8153,
      "step": 855130
    },
    {
      "epoch": 2.9970525046700804,
      "grad_norm": 2.765625,
      "learning_rate": 5.4648213483700294e-08,
      "loss": 0.8075,
      "step": 855140
    },
    {
      "epoch": 2.9970875521769758,
      "grad_norm": 2.53125,
      "learning_rate": 5.399918481999839e-08,
      "loss": 0.7741,
      "step": 855150
    },
    {
      "epoch": 2.9971225996838715,
      "grad_norm": 3.078125,
      "learning_rate": 5.335015615629649e-08,
      "loss": 0.7902,
      "step": 855160
    },
    {
      "epoch": 2.9971576471907673,
      "grad_norm": 3.15625,
      "learning_rate": 5.2701127492594585e-08,
      "loss": 0.8217,
      "step": 855170
    },
    {
      "epoch": 2.9971926946976626,
      "grad_norm": 2.71875,
      "learning_rate": 5.2052098828892684e-08,
      "loss": 0.7989,
      "step": 855180
    },
    {
      "epoch": 2.9972277422045583,
      "grad_norm": 3.109375,
      "learning_rate": 5.140307016519078e-08,
      "loss": 0.7989,
      "step": 855190
    },
    {
      "epoch": 2.9972627897114537,
      "grad_norm": 2.828125,
      "learning_rate": 5.075404150148887e-08,
      "loss": 0.8104,
      "step": 855200
    },
    {
      "epoch": 2.9972978372183494,
      "grad_norm": 3.0,
      "learning_rate": 5.0105012837786975e-08,
      "loss": 0.8238,
      "step": 855210
    },
    {
      "epoch": 2.997332884725245,
      "grad_norm": 3.140625,
      "learning_rate": 4.945598417408507e-08,
      "loss": 0.849,
      "step": 855220
    },
    {
      "epoch": 2.997367932232141,
      "grad_norm": 3.0,
      "learning_rate": 4.880695551038316e-08,
      "loss": 0.7566,
      "step": 855230
    },
    {
      "epoch": 2.9974029797390362,
      "grad_norm": 3.40625,
      "learning_rate": 4.815792684668126e-08,
      "loss": 0.7943,
      "step": 855240
    },
    {
      "epoch": 2.997438027245932,
      "grad_norm": 2.484375,
      "learning_rate": 4.750889818297936e-08,
      "loss": 0.7089,
      "step": 855250
    },
    {
      "epoch": 2.9974730747528273,
      "grad_norm": 2.828125,
      "learning_rate": 4.685986951927745e-08,
      "loss": 0.8291,
      "step": 855260
    },
    {
      "epoch": 2.997508122259723,
      "grad_norm": 3.046875,
      "learning_rate": 4.6210840855575544e-08,
      "loss": 0.8419,
      "step": 855270
    },
    {
      "epoch": 2.997543169766619,
      "grad_norm": 3.390625,
      "learning_rate": 4.556181219187364e-08,
      "loss": 0.7891,
      "step": 855280
    },
    {
      "epoch": 2.997578217273514,
      "grad_norm": 2.5625,
      "learning_rate": 4.491278352817174e-08,
      "loss": 0.7431,
      "step": 855290
    },
    {
      "epoch": 2.99761326478041,
      "grad_norm": 3.15625,
      "learning_rate": 4.426375486446984e-08,
      "loss": 0.7573,
      "step": 855300
    },
    {
      "epoch": 2.997648312287305,
      "grad_norm": 2.78125,
      "learning_rate": 4.3614726200767934e-08,
      "loss": 0.7813,
      "step": 855310
    },
    {
      "epoch": 2.997683359794201,
      "grad_norm": 2.859375,
      "learning_rate": 4.2965697537066026e-08,
      "loss": 0.8085,
      "step": 855320
    },
    {
      "epoch": 2.9977184073010967,
      "grad_norm": 3.0,
      "learning_rate": 4.2316668873364126e-08,
      "loss": 0.7634,
      "step": 855330
    },
    {
      "epoch": 2.9977534548079925,
      "grad_norm": 2.9375,
      "learning_rate": 4.1667640209662225e-08,
      "loss": 0.7713,
      "step": 855340
    },
    {
      "epoch": 2.997788502314888,
      "grad_norm": 2.921875,
      "learning_rate": 4.101861154596032e-08,
      "loss": 0.8132,
      "step": 855350
    },
    {
      "epoch": 2.9978235498217836,
      "grad_norm": 2.953125,
      "learning_rate": 4.036958288225841e-08,
      "loss": 0.8402,
      "step": 855360
    },
    {
      "epoch": 2.997858597328679,
      "grad_norm": 2.8125,
      "learning_rate": 3.972055421855651e-08,
      "loss": 0.7896,
      "step": 855370
    },
    {
      "epoch": 2.9978936448355746,
      "grad_norm": 3.140625,
      "learning_rate": 3.907152555485461e-08,
      "loss": 0.8107,
      "step": 855380
    },
    {
      "epoch": 2.9979286923424704,
      "grad_norm": 3.015625,
      "learning_rate": 3.842249689115271e-08,
      "loss": 0.7705,
      "step": 855390
    },
    {
      "epoch": 2.9979637398493657,
      "grad_norm": 3.015625,
      "learning_rate": 3.77734682274508e-08,
      "loss": 0.7395,
      "step": 855400
    },
    {
      "epoch": 2.9979987873562615,
      "grad_norm": 2.734375,
      "learning_rate": 3.712443956374889e-08,
      "loss": 0.7536,
      "step": 855410
    },
    {
      "epoch": 2.9980338348631568,
      "grad_norm": 2.484375,
      "learning_rate": 3.647541090004699e-08,
      "loss": 0.7453,
      "step": 855420
    },
    {
      "epoch": 2.9980688823700525,
      "grad_norm": 2.578125,
      "learning_rate": 3.582638223634509e-08,
      "loss": 0.807,
      "step": 855430
    },
    {
      "epoch": 2.9981039298769483,
      "grad_norm": 3.28125,
      "learning_rate": 3.517735357264318e-08,
      "loss": 0.8842,
      "step": 855440
    },
    {
      "epoch": 2.998138977383844,
      "grad_norm": 3.234375,
      "learning_rate": 3.4528324908941276e-08,
      "loss": 0.8088,
      "step": 855450
    },
    {
      "epoch": 2.9981740248907394,
      "grad_norm": 3.078125,
      "learning_rate": 3.3879296245239375e-08,
      "loss": 0.8394,
      "step": 855460
    },
    {
      "epoch": 2.998209072397635,
      "grad_norm": 3.515625,
      "learning_rate": 3.3230267581537474e-08,
      "loss": 0.8547,
      "step": 855470
    },
    {
      "epoch": 2.9982441199045304,
      "grad_norm": 3.484375,
      "learning_rate": 3.258123891783557e-08,
      "loss": 0.8688,
      "step": 855480
    },
    {
      "epoch": 2.998279167411426,
      "grad_norm": 3.265625,
      "learning_rate": 3.1932210254133666e-08,
      "loss": 0.885,
      "step": 855490
    },
    {
      "epoch": 2.998314214918322,
      "grad_norm": 2.984375,
      "learning_rate": 3.128318159043176e-08,
      "loss": 0.8375,
      "step": 855500
    },
    {
      "epoch": 2.9983492624252173,
      "grad_norm": 3.046875,
      "learning_rate": 3.063415292672986e-08,
      "loss": 0.7983,
      "step": 855510
    },
    {
      "epoch": 2.998384309932113,
      "grad_norm": 2.421875,
      "learning_rate": 2.9985124263027957e-08,
      "loss": 0.8009,
      "step": 855520
    },
    {
      "epoch": 2.9984193574390083,
      "grad_norm": 2.625,
      "learning_rate": 2.933609559932605e-08,
      "loss": 0.7932,
      "step": 855530
    },
    {
      "epoch": 2.998454404945904,
      "grad_norm": 3.15625,
      "learning_rate": 2.8687066935624145e-08,
      "loss": 0.8337,
      "step": 855540
    },
    {
      "epoch": 2.9984894524528,
      "grad_norm": 2.859375,
      "learning_rate": 2.8038038271922244e-08,
      "loss": 0.8298,
      "step": 855550
    },
    {
      "epoch": 2.9985244999596956,
      "grad_norm": 2.953125,
      "learning_rate": 2.738900960822034e-08,
      "loss": 0.8514,
      "step": 855560
    },
    {
      "epoch": 2.998559547466591,
      "grad_norm": 2.5,
      "learning_rate": 2.6739980944518436e-08,
      "loss": 0.8711,
      "step": 855570
    },
    {
      "epoch": 2.9985945949734867,
      "grad_norm": 3.203125,
      "learning_rate": 2.609095228081653e-08,
      "loss": 0.8285,
      "step": 855580
    },
    {
      "epoch": 2.998629642480382,
      "grad_norm": 3.203125,
      "learning_rate": 2.5441923617114628e-08,
      "loss": 0.7976,
      "step": 855590
    },
    {
      "epoch": 2.9986646899872778,
      "grad_norm": 3.0625,
      "learning_rate": 2.4792894953412723e-08,
      "loss": 0.7581,
      "step": 855600
    },
    {
      "epoch": 2.9986997374941735,
      "grad_norm": 3.3125,
      "learning_rate": 2.414386628971082e-08,
      "loss": 0.789,
      "step": 855610
    },
    {
      "epoch": 2.998734785001069,
      "grad_norm": 2.9375,
      "learning_rate": 2.3494837626008915e-08,
      "loss": 0.8601,
      "step": 855620
    },
    {
      "epoch": 2.9987698325079646,
      "grad_norm": 3.234375,
      "learning_rate": 2.284580896230701e-08,
      "loss": 0.8228,
      "step": 855630
    },
    {
      "epoch": 2.99880488001486,
      "grad_norm": 3.3125,
      "learning_rate": 2.219678029860511e-08,
      "loss": 0.841,
      "step": 855640
    },
    {
      "epoch": 2.9988399275217557,
      "grad_norm": 2.953125,
      "learning_rate": 2.1547751634903206e-08,
      "loss": 0.7287,
      "step": 855650
    },
    {
      "epoch": 2.9988749750286514,
      "grad_norm": 3.09375,
      "learning_rate": 2.0898722971201302e-08,
      "loss": 0.7562,
      "step": 855660
    },
    {
      "epoch": 2.998910022535547,
      "grad_norm": 2.703125,
      "learning_rate": 2.0249694307499398e-08,
      "loss": 0.7863,
      "step": 855670
    },
    {
      "epoch": 2.9989450700424425,
      "grad_norm": 2.375,
      "learning_rate": 1.9600665643797493e-08,
      "loss": 0.7381,
      "step": 855680
    },
    {
      "epoch": 2.9989801175493382,
      "grad_norm": 3.34375,
      "learning_rate": 1.895163698009559e-08,
      "loss": 0.7358,
      "step": 855690
    },
    {
      "epoch": 2.9990151650562336,
      "grad_norm": 2.84375,
      "learning_rate": 1.8302608316393685e-08,
      "loss": 0.804,
      "step": 855700
    },
    {
      "epoch": 2.9990502125631293,
      "grad_norm": 2.75,
      "learning_rate": 1.765357965269178e-08,
      "loss": 0.7575,
      "step": 855710
    },
    {
      "epoch": 2.999085260070025,
      "grad_norm": 2.828125,
      "learning_rate": 1.7004550988989877e-08,
      "loss": 0.777,
      "step": 855720
    },
    {
      "epoch": 2.9991203075769204,
      "grad_norm": 2.53125,
      "learning_rate": 1.6355522325287976e-08,
      "loss": 0.7727,
      "step": 855730
    },
    {
      "epoch": 2.999155355083816,
      "grad_norm": 3.046875,
      "learning_rate": 1.5706493661586072e-08,
      "loss": 0.8498,
      "step": 855740
    },
    {
      "epoch": 2.9991904025907115,
      "grad_norm": 2.984375,
      "learning_rate": 1.5057464997884168e-08,
      "loss": 0.8235,
      "step": 855750
    },
    {
      "epoch": 2.999225450097607,
      "grad_norm": 2.96875,
      "learning_rate": 1.4408436334182265e-08,
      "loss": 0.8784,
      "step": 855760
    },
    {
      "epoch": 2.999260497604503,
      "grad_norm": 2.9375,
      "learning_rate": 1.375940767048036e-08,
      "loss": 0.8377,
      "step": 855770
    },
    {
      "epoch": 2.9992955451113987,
      "grad_norm": 3.015625,
      "learning_rate": 1.3110379006778457e-08,
      "loss": 0.7806,
      "step": 855780
    },
    {
      "epoch": 2.999330592618294,
      "grad_norm": 3.296875,
      "learning_rate": 1.2461350343076551e-08,
      "loss": 0.8782,
      "step": 855790
    },
    {
      "epoch": 2.99936564012519,
      "grad_norm": 2.6875,
      "learning_rate": 1.1812321679374649e-08,
      "loss": 0.7427,
      "step": 855800
    },
    {
      "epoch": 2.999400687632085,
      "grad_norm": 2.703125,
      "learning_rate": 1.1163293015672744e-08,
      "loss": 0.8024,
      "step": 855810
    },
    {
      "epoch": 2.999435735138981,
      "grad_norm": 3.21875,
      "learning_rate": 1.051426435197084e-08,
      "loss": 0.8381,
      "step": 855820
    },
    {
      "epoch": 2.9994707826458766,
      "grad_norm": 2.875,
      "learning_rate": 9.865235688268938e-09,
      "loss": 0.8504,
      "step": 855830
    },
    {
      "epoch": 2.999505830152772,
      "grad_norm": 2.484375,
      "learning_rate": 9.216207024567034e-09,
      "loss": 0.8158,
      "step": 855840
    },
    {
      "epoch": 2.9995408776596677,
      "grad_norm": 2.921875,
      "learning_rate": 8.56717836086513e-09,
      "loss": 0.7746,
      "step": 855850
    },
    {
      "epoch": 2.999575925166563,
      "grad_norm": 2.9375,
      "learning_rate": 7.918149697163225e-09,
      "loss": 0.6902,
      "step": 855860
    },
    {
      "epoch": 2.9996109726734588,
      "grad_norm": 2.859375,
      "learning_rate": 7.269121033461323e-09,
      "loss": 0.7728,
      "step": 855870
    },
    {
      "epoch": 2.9996460201803545,
      "grad_norm": 3.34375,
      "learning_rate": 6.620092369759419e-09,
      "loss": 0.8908,
      "step": 855880
    },
    {
      "epoch": 2.9996810676872503,
      "grad_norm": 2.65625,
      "learning_rate": 5.9710637060575146e-09,
      "loss": 0.7885,
      "step": 855890
    },
    {
      "epoch": 2.9997161151941456,
      "grad_norm": 3.15625,
      "learning_rate": 5.322035042355611e-09,
      "loss": 0.7132,
      "step": 855900
    },
    {
      "epoch": 2.9997511627010414,
      "grad_norm": 2.890625,
      "learning_rate": 4.673006378653707e-09,
      "loss": 0.8292,
      "step": 855910
    },
    {
      "epoch": 2.9997862102079367,
      "grad_norm": 2.953125,
      "learning_rate": 4.023977714951803e-09,
      "loss": 0.8197,
      "step": 855920
    },
    {
      "epoch": 2.9998212577148324,
      "grad_norm": 2.734375,
      "learning_rate": 3.374949051249899e-09,
      "loss": 0.7305,
      "step": 855930
    },
    {
      "epoch": 2.999856305221728,
      "grad_norm": 2.625,
      "learning_rate": 2.725920387547996e-09,
      "loss": 0.7455,
      "step": 855940
    },
    {
      "epoch": 2.9998913527286235,
      "grad_norm": 2.921875,
      "learning_rate": 2.076891723846092e-09,
      "loss": 0.8306,
      "step": 855950
    },
    {
      "epoch": 2.9999264002355193,
      "grad_norm": 2.890625,
      "learning_rate": 1.4278630601441882e-09,
      "loss": 0.8474,
      "step": 855960
    },
    {
      "epoch": 2.9999614477424146,
      "grad_norm": 2.796875,
      "learning_rate": 7.788343964422844e-10,
      "loss": 0.7966,
      "step": 855970
    },
    {
      "epoch": 2.9999964952493103,
      "grad_norm": 2.609375,
      "learning_rate": 1.2980573274038076e-10,
      "loss": 0.8361,
      "step": 855980
    }
  ],
  "logging_steps": 10,
  "max_steps": 855981,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 200,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.703596990745305e+18,
  "train_batch_size": 12,
  "trial_name": null,
  "trial_params": null
}
